summaryrefslogtreecommitdiff
path: root/thirdparty
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty')
-rw-r--r--thirdparty/README.md62
-rw-r--r--thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp9
-rw-r--r--thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3File.cpp6
-rw-r--r--thirdparty/bullet/BulletCollision/BroadphaseCollision/btQuantizedBvh.cpp10
-rw-r--r--thirdparty/bullet/BulletCollision/CollisionDispatch/btCollisionObject.h11
-rw-r--r--thirdparty/bullet/BulletCollision/CollisionDispatch/btInternalEdgeUtility.cpp8
-rw-r--r--thirdparty/bullet/BulletCollision/CollisionShapes/btBvhTriangleMeshShape.cpp18
-rw-r--r--thirdparty/bullet/BulletCollision/CollisionShapes/btCollisionShape.h13
-rw-r--r--thirdparty/bullet/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.cpp6
-rw-r--r--thirdparty/bullet/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.h10
-rw-r--r--thirdparty/bullet/BulletCollision/CollisionShapes/btOptimizedBvh.cpp9
-rw-r--r--thirdparty/bullet/BulletCollision/CollisionShapes/btSdfCollisionShape.cpp5
-rw-r--r--thirdparty/bullet/BulletCollision/Gimpact/btGImpactShape.h10
-rw-r--r--thirdparty/bullet/BulletCollision/NarrowPhaseCollision/btGjkEpa2.cpp3
-rw-r--r--thirdparty/bullet/BulletDynamics/ConstraintSolver/btBatchedConstraints.cpp2
-rw-r--r--thirdparty/bullet/BulletDynamics/ConstraintSolver/btContactSolverInfo.h6
-rw-r--r--thirdparty/bullet/BulletDynamics/Dynamics/btRigidBody.h10
-rw-r--r--thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.cpp563
-rw-r--r--thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.h84
-rw-r--r--thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyConstraint.cpp3
-rw-r--r--thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyConstraint.h23
-rw-r--r--thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyDynamicsWorld.cpp14
-rw-r--r--thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyDynamicsWorld.h2
-rw-r--r--thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyFixedConstraint.cpp4
-rw-r--r--thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyGearConstraint.cpp2
-rw-r--r--thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyJointLimitConstraint.cpp2
-rw-r--r--thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyJointLimitConstraint.h16
-rw-r--r--thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyJointMotor.cpp4
-rw-r--r--thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyLink.h3
-rw-r--r--thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyLinkCollider.h17
-rw-r--r--thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyPoint2Point.cpp4
-rw-r--r--thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodySliderConstraint.cpp4
-rw-r--r--thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodySphericalJointMotor.cpp2
-rw-r--r--thirdparty/bullet/BulletSoftBody/DeformableBodyInplaceSolverIslandCallback.h9
-rw-r--r--thirdparty/bullet/BulletSoftBody/btCGProjection.h146
-rw-r--r--thirdparty/bullet/BulletSoftBody/btConjugateGradient.h229
-rw-r--r--thirdparty/bullet/BulletSoftBody/btConjugateResidual.h256
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.cpp380
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.h299
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.cpp757
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.h250
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.cpp979
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.h386
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.cpp761
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.h101
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableCorotatedForce.h189
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableGravityForce.h160
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableLagrangianForce.h675
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableLinearElasticityForce.h748
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableMassSpringForce.h544
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableMousePickingForce.h255
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyConstraintSolver.cpp207
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyConstraintSolver.h46
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.cpp1070
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.h505
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableNeoHookeanForce.h703
-rw-r--r--thirdparty/bullet/BulletSoftBody/btKrylovSolver.h107
-rw-r--r--thirdparty/bullet/BulletSoftBody/btPreconditioner.h471
-rw-r--r--thirdparty/bullet/BulletSoftBody/btSoftBody.cpp1413
-rw-r--r--thirdparty/bullet/BulletSoftBody/btSoftBody.h418
-rw-r--r--thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.cpp729
-rw-r--r--thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.h30
-rw-r--r--thirdparty/bullet/BulletSoftBody/btSoftBodyInternals.h2074
-rw-r--r--thirdparty/bullet/BulletSoftBody/btSoftBodySolvers.h2
-rw-r--r--thirdparty/bullet/BulletSoftBody/btSoftMultiBodyDynamicsWorld.cpp5
-rw-r--r--thirdparty/bullet/BulletSoftBody/btSparseSDF.h68
-rw-r--r--thirdparty/bullet/BulletSoftBody/poly34.cpp742
-rw-r--r--thirdparty/bullet/BulletSoftBody/poly34.h18
-rw-r--r--thirdparty/bullet/LinearMath/btAlignedAllocator.cpp4
-rw-r--r--thirdparty/bullet/LinearMath/btAlignedAllocator.h4
-rw-r--r--thirdparty/bullet/LinearMath/btConvexHullComputer.cpp6
-rw-r--r--thirdparty/bullet/LinearMath/btConvexHullComputer.h3
-rw-r--r--thirdparty/bullet/LinearMath/btReducedVector.h2
-rw-r--r--thirdparty/bullet/LinearMath/btScalar.h2
-rw-r--r--thirdparty/bullet/LinearMath/btSerializer.h6
-rw-r--r--thirdparty/fonts/NotoSansBengali_Regular.ttfbin0 -> 125440 bytes
-rw-r--r--thirdparty/fonts/NotoSansGeorgian_Regular.ttfbin0 -> 26940 bytes
-rw-r--r--thirdparty/fonts/NotoSansMalayalamUI_Regular.ttfbin0 -> 61264 bytes
-rw-r--r--thirdparty/fonts/NotoSansOriyaUI_Regular.ttfbin0 -> 96996 bytes
-rw-r--r--thirdparty/fonts/NotoSansSinhalaUI_Regular.ttfbin0 -> 188112 bytes
-rw-r--r--thirdparty/fonts/NotoSansTamilUI_Regular.ttfbin0 -> 44820 bytes
-rw-r--r--thirdparty/fonts/NotoSansTeluguUI_Regular.ttfbin0 -> 127912 bytes
-rw-r--r--thirdparty/fonts/Tamsyn10x20.pngbin0 -> 270 bytes
-rw-r--r--thirdparty/fonts/Tamsyn5x9.pngbin0 -> 175 bytes
-rw-r--r--thirdparty/graphite/COPYING26
-rw-r--r--thirdparty/graphite/ChangeLog238
-rw-r--r--thirdparty/graphite/include/graphite2/Font.h389
-rw-r--r--thirdparty/graphite/include/graphite2/Log.h85
-rw-r--r--thirdparty/graphite/include/graphite2/Segment.h461
-rw-r--r--thirdparty/graphite/include/graphite2/Types.h79
-rw-r--r--thirdparty/graphite/src/CmapCache.cpp155
-rw-r--r--thirdparty/graphite/src/Code.cpp782
-rw-r--r--thirdparty/graphite/src/Collider.cpp1115
-rw-r--r--thirdparty/graphite/src/Decompressor.cpp125
-rw-r--r--thirdparty/graphite/src/Face.cpp366
-rw-r--r--thirdparty/graphite/src/FeatureMap.cpp293
-rw-r--r--thirdparty/graphite/src/FileFace.cpp115
-rw-r--r--thirdparty/graphite/src/Font.cpp58
-rw-r--r--thirdparty/graphite/src/GlyphCache.cpp492
-rw-r--r--thirdparty/graphite/src/GlyphFace.cpp48
-rw-r--r--thirdparty/graphite/src/Intervals.cpp298
-rw-r--r--thirdparty/graphite/src/Justifier.cpp282
-rw-r--r--thirdparty/graphite/src/NameTable.cpp254
-rw-r--r--thirdparty/graphite/src/Pass.cpp1107
-rw-r--r--thirdparty/graphite/src/Position.cpp97
-rw-r--r--thirdparty/graphite/src/Segment.cpp423
-rw-r--r--thirdparty/graphite/src/Silf.cpp439
-rw-r--r--thirdparty/graphite/src/Slot.cpp529
-rw-r--r--thirdparty/graphite/src/Sparse.cpp62
-rw-r--r--thirdparty/graphite/src/TtfUtil.cpp2053
-rw-r--r--thirdparty/graphite/src/UtfCodec.cpp45
-rw-r--r--thirdparty/graphite/src/call_machine.cpp138
-rw-r--r--thirdparty/graphite/src/direct_machine.cpp140
-rw-r--r--thirdparty/graphite/src/gr_char_info.cpp65
-rw-r--r--thirdparty/graphite/src/gr_face.cpp267
-rw-r--r--thirdparty/graphite/src/gr_features.cpp138
-rw-r--r--thirdparty/graphite/src/gr_font.cpp74
-rw-r--r--thirdparty/graphite/src/gr_logging.cpp267
-rw-r--r--thirdparty/graphite/src/gr_segment.cpp175
-rw-r--r--thirdparty/graphite/src/gr_slot.cpp173
-rw-r--r--thirdparty/graphite/src/inc/CharInfo.h66
-rw-r--r--thirdparty/graphite/src/inc/CmapCache.h82
-rw-r--r--thirdparty/graphite/src/inc/Code.h171
-rw-r--r--thirdparty/graphite/src/inc/Collider.h245
-rw-r--r--thirdparty/graphite/src/inc/Compression.h104
-rw-r--r--thirdparty/graphite/src/inc/Decompressor.h54
-rw-r--r--thirdparty/graphite/src/inc/Endian.h111
-rw-r--r--thirdparty/graphite/src/inc/Error.h134
-rw-r--r--thirdparty/graphite/src/inc/Face.h225
-rw-r--r--thirdparty/graphite/src/inc/FeatureMap.h198
-rw-r--r--thirdparty/graphite/src/inc/FeatureVal.h68
-rw-r--r--thirdparty/graphite/src/inc/FileFace.h80
-rw-r--r--thirdparty/graphite/src/inc/Font.h90
-rw-r--r--thirdparty/graphite/src/inc/GlyphCache.h223
-rw-r--r--thirdparty/graphite/src/inc/GlyphFace.h83
-rw-r--r--thirdparty/graphite/src/inc/Intervals.h234
-rw-r--r--thirdparty/graphite/src/inc/List.h168
-rw-r--r--thirdparty/graphite/src/inc/Machine.h207
-rw-r--r--thirdparty/graphite/src/inc/Main.h199
-rw-r--r--thirdparty/graphite/src/inc/NameTable.h65
-rw-r--r--thirdparty/graphite/src/inc/Pass.h118
-rw-r--r--thirdparty/graphite/src/inc/Position.h68
-rw-r--r--thirdparty/graphite/src/inc/Rule.h305
-rw-r--r--thirdparty/graphite/src/inc/Segment.h236
-rw-r--r--thirdparty/graphite/src/inc/Silf.h128
-rw-r--r--thirdparty/graphite/src/inc/Slot.h170
-rw-r--r--thirdparty/graphite/src/inc/Sparse.h168
-rw-r--r--thirdparty/graphite/src/inc/TtfTypes.h419
-rw-r--r--thirdparty/graphite/src/inc/TtfUtil.h208
-rw-r--r--thirdparty/graphite/src/inc/UtfCodec.h251
-rw-r--r--thirdparty/graphite/src/inc/bits.h150
-rw-r--r--thirdparty/graphite/src/inc/debug.h89
-rw-r--r--thirdparty/graphite/src/inc/json.h178
-rw-r--r--thirdparty/graphite/src/inc/locale2lcid.h450
-rw-r--r--thirdparty/graphite/src/inc/opcode_table.h124
-rw-r--r--thirdparty/graphite/src/inc/opcodes.h691
-rw-r--r--thirdparty/graphite/src/json.cpp147
-rw-r--r--thirdparty/harfbuzz/AUTHORS14
-rw-r--r--thirdparty/harfbuzz/COPYING38
-rw-r--r--thirdparty/harfbuzz/NEWS2412
-rw-r--r--thirdparty/harfbuzz/THANKS7
-rw-r--r--thirdparty/harfbuzz/src/hb-aat-layout-ankr-table.hh98
-rw-r--r--thirdparty/harfbuzz/src/hb-aat-layout-bsln-table.hh158
-rw-r--r--thirdparty/harfbuzz/src/hb-aat-layout-common.hh840
-rw-r--r--thirdparty/harfbuzz/src/hb-aat-layout-feat-table.hh222
-rw-r--r--thirdparty/harfbuzz/src/hb-aat-layout-just-table.hh417
-rw-r--r--thirdparty/harfbuzz/src/hb-aat-layout-kerx-table.hh999
-rw-r--r--thirdparty/harfbuzz/src/hb-aat-layout-morx-table.hh1157
-rw-r--r--thirdparty/harfbuzz/src/hb-aat-layout-opbd-table.hh173
-rw-r--r--thirdparty/harfbuzz/src/hb-aat-layout-trak-table.hh230
-rw-r--r--thirdparty/harfbuzz/src/hb-aat-layout.cc382
-rw-r--r--thirdparty/harfbuzz/src/hb-aat-layout.h486
-rw-r--r--thirdparty/harfbuzz/src/hb-aat-layout.hh75
-rw-r--r--thirdparty/harfbuzz/src/hb-aat-ltag-table.hh92
-rw-r--r--thirdparty/harfbuzz/src/hb-aat-map.cc102
-rw-r--r--thirdparty/harfbuzz/src/hb-aat-map.hh96
-rw-r--r--thirdparty/harfbuzz/src/hb-aat.h38
-rw-r--r--thirdparty/harfbuzz/src/hb-algs.hh1127
-rw-r--r--thirdparty/harfbuzz/src/hb-array.hh408
-rw-r--r--thirdparty/harfbuzz/src/hb-atomic.hh295
-rw-r--r--thirdparty/harfbuzz/src/hb-bimap.hh166
-rw-r--r--thirdparty/harfbuzz/src/hb-blob.cc717
-rw-r--r--thirdparty/harfbuzz/src/hb-blob.h131
-rw-r--r--thirdparty/harfbuzz/src/hb-blob.hh97
-rw-r--r--thirdparty/harfbuzz/src/hb-buffer-deserialize-json.hh643
-rw-r--r--thirdparty/harfbuzz/src/hb-buffer-deserialize-text.hh571
-rw-r--r--thirdparty/harfbuzz/src/hb-buffer-serialize.cc474
-rw-r--r--thirdparty/harfbuzz/src/hb-buffer.cc2004
-rw-r--r--thirdparty/harfbuzz/src/hb-buffer.h586
-rw-r--r--thirdparty/harfbuzz/src/hb-buffer.hh451
-rw-r--r--thirdparty/harfbuzz/src/hb-cache.hh80
-rw-r--r--thirdparty/harfbuzz/src/hb-cff-interp-common.hh688
-rw-r--r--thirdparty/harfbuzz/src/hb-cff-interp-cs-common.hh911
-rw-r--r--thirdparty/harfbuzz/src/hb-cff-interp-dict-common.hh201
-rw-r--r--thirdparty/harfbuzz/src/hb-cff1-interp-cs.hh161
-rw-r--r--thirdparty/harfbuzz/src/hb-cff2-interp-cs.hh272
-rw-r--r--thirdparty/harfbuzz/src/hb-common.cc1098
-rw-r--r--thirdparty/harfbuzz/src/hb-common.h513
-rw-r--r--thirdparty/harfbuzz/src/hb-config.hh163
-rw-r--r--thirdparty/harfbuzz/src/hb-coretext.cc1194
-rw-r--r--thirdparty/harfbuzz/src/hb-coretext.h96
-rw-r--r--thirdparty/harfbuzz/src/hb-debug.hh459
-rw-r--r--thirdparty/harfbuzz/src/hb-deprecated.h195
-rw-r--r--thirdparty/harfbuzz/src/hb-directwrite.cc979
-rw-r--r--thirdparty/harfbuzz/src/hb-directwrite.h40
-rw-r--r--thirdparty/harfbuzz/src/hb-dispatch.hh61
-rw-r--r--thirdparty/harfbuzz/src/hb-draw.cc261
-rw-r--r--thirdparty/harfbuzz/src/hb-draw.h98
-rw-r--r--thirdparty/harfbuzz/src/hb-draw.hh139
-rw-r--r--thirdparty/harfbuzz/src/hb-face.cc733
-rw-r--r--thirdparty/harfbuzz/src/hb-face.h158
-rw-r--r--thirdparty/harfbuzz/src/hb-face.hh109
-rw-r--r--thirdparty/harfbuzz/src/hb-fallback-shape.cc125
-rw-r--r--thirdparty/harfbuzz/src/hb-font.cc2186
-rw-r--r--thirdparty/harfbuzz/src/hb-font.h735
-rw-r--r--thirdparty/harfbuzz/src/hb-font.hh632
-rw-r--r--thirdparty/harfbuzz/src/hb-ft.cc1042
-rw-r--r--thirdparty/harfbuzz/src/hb-ft.h138
-rw-r--r--thirdparty/harfbuzz/src/hb-gdi.cc73
-rw-r--r--thirdparty/harfbuzz/src/hb-gdi.h39
-rw-r--r--thirdparty/harfbuzz/src/hb-glib.cc307
-rw-r--r--thirdparty/harfbuzz/src/hb-glib.h56
-rw-r--r--thirdparty/harfbuzz/src/hb-gobject-structs.cc110
-rw-r--r--thirdparty/harfbuzz/src/hb-gobject-structs.h142
-rw-r--r--thirdparty/harfbuzz/src/hb-gobject.h40
-rw-r--r--thirdparty/harfbuzz/src/hb-graphite2.cc442
-rw-r--r--thirdparty/harfbuzz/src/hb-graphite2.h60
-rw-r--r--thirdparty/harfbuzz/src/hb-icu.cc296
-rw-r--r--thirdparty/harfbuzz/src/hb-icu.h52
-rw-r--r--thirdparty/harfbuzz/src/hb-iter.hh939
-rw-r--r--thirdparty/harfbuzz/src/hb-kern.hh138
-rw-r--r--thirdparty/harfbuzz/src/hb-machinery.hh307
-rw-r--r--thirdparty/harfbuzz/src/hb-map.cc268
-rw-r--r--thirdparty/harfbuzz/src/hb-map.h104
-rw-r--r--thirdparty/harfbuzz/src/hb-map.hh326
-rw-r--r--thirdparty/harfbuzz/src/hb-meta.hh410
-rw-r--r--thirdparty/harfbuzz/src/hb-mutex.hh133
-rw-r--r--thirdparty/harfbuzz/src/hb-null.hh184
-rw-r--r--thirdparty/harfbuzz/src/hb-number-parser.hh237
-rw-r--r--thirdparty/harfbuzz/src/hb-number.cc80
-rw-r--r--thirdparty/harfbuzz/src/hb-number.hh41
-rw-r--r--thirdparty/harfbuzz/src/hb-object.hh342
-rw-r--r--thirdparty/harfbuzz/src/hb-open-file.hh521
-rw-r--r--thirdparty/harfbuzz/src/hb-open-type.hh1078
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-cff-common.hh622
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-cff1-std-str.hh425
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-cff1-table.cc620
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-cff1-table.hh1403
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-cff2-table.cc215
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-cff2-table.hh531
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-cmap-table.hh1711
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-color-cbdt-table.hh985
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-color-colr-table.hh278
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-color-cpal-table.hh190
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-color-sbix-table.hh414
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-color-svg-table.hh124
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-color.cc321
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-color.h139
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-deprecated.h111
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-face-table-list.hh138
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-face.cc58
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-face.hh74
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-font.cc336
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-font.h45
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-gasp-table.hh84
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-glyf-table.hh1261
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-hdmx-table.hh177
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-head-table.hh179
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-hhea-table.hh104
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-hmtx-table.hh340
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-kern-table.hh359
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-layout-base-table.hh509
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-layout-common.hh3178
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-layout-gdef-table.hh725
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-layout-gpos-table.hh2740
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-layout-gsub-table.hh1627
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-layout-gsubgpos.hh3422
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-layout-jstf-table.hh235
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-layout.cc1993
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-layout.h462
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-layout.hh627
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-map.cc342
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-map.hh284
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-math-table.hh728
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-math.cc293
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-math.h230
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-maxp-table.hh142
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-meta-table.hh127
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-meta.cc77
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-meta.h71
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-metrics.cc231
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-metrics.h122
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-metrics.hh35
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-name-language-static.hh456
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-name-language.hh40
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-name-table.hh376
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-name.cc228
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-name.h129
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-os2-table.hh316
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-os2-unicode-ranges.hh231
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-post-macroman.hh294
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-post-table.hh298
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-fallback.hh348
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-joining-list.hh46
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-table.hh433
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-win1256.hh323
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic.cc716
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic.hh50
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex-default.cc73
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex-hangul.cc439
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex-hebrew.cc185
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex-indic-machine.hh574
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex-indic-table.cc501
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex-indic.cc1615
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex-indic.hh436
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer-machine.hh372
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer-machine.rl113
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer.cc457
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer.hh113
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex-myanmar-machine.hh430
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex-myanmar.cc387
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex-myanmar.hh171
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex-thai.cc394
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex-use-machine.hh562
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex-use-table.cc873
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex-use.cc569
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex-use.hh105
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex-vowel-constraints.cc464
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex-vowel-constraints.hh39
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-complex.hh402
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-fallback.cc596
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-fallback.hh54
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-normalize.cc478
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape-normalize.hh70
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape.cc1223
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape.h53
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-shape.hh170
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-stat-table.hh404
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-tag-table.hh2176
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-tag.cc567
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-var-avar-table.hh169
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-var-fvar-table.hh327
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-var-gvar-table.hh701
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-var-hvar-table.hh488
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-var-mvar-table.hh119
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-var.cc220
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-var.h146
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-vorg-table.hh136
-rw-r--r--thirdparty/harfbuzz/src/hb-ot.h49
-rw-r--r--thirdparty/harfbuzz/src/hb-pool.hh100
-rw-r--r--thirdparty/harfbuzz/src/hb-sanitize.hh412
-rw-r--r--thirdparty/harfbuzz/src/hb-serialize.hh553
-rw-r--r--thirdparty/harfbuzz/src/hb-set-digest.hh174
-rw-r--r--thirdparty/harfbuzz/src/hb-set.cc541
-rw-r--r--thirdparty/harfbuzz/src/hb-set.h167
-rw-r--r--thirdparty/harfbuzz/src/hb-set.hh884
-rw-r--r--thirdparty/harfbuzz/src/hb-shape-plan.cc513
-rw-r--r--thirdparty/harfbuzz/src/hb-shape-plan.h108
-rw-r--r--thirdparty/harfbuzz/src/hb-shape-plan.hh76
-rw-r--r--thirdparty/harfbuzz/src/hb-shape.cc172
-rw-r--r--thirdparty/harfbuzz/src/hb-shape.h62
-rw-r--r--thirdparty/harfbuzz/src/hb-shaper-impl.hh38
-rw-r--r--thirdparty/harfbuzz/src/hb-shaper-list.hh60
-rw-r--r--thirdparty/harfbuzz/src/hb-shaper.cc108
-rw-r--r--thirdparty/harfbuzz/src/hb-shaper.hh134
-rw-r--r--thirdparty/harfbuzz/src/hb-static.cc112
-rw-r--r--thirdparty/harfbuzz/src/hb-string-array.hh85
-rw-r--r--thirdparty/harfbuzz/src/hb-style.cc135
-rw-r--r--thirdparty/harfbuzz/src/hb-style.h43
-rw-r--r--thirdparty/harfbuzz/src/hb-subset-cff-common.cc227
-rw-r--r--thirdparty/harfbuzz/src/hb-subset-cff-common.hh989
-rw-r--r--thirdparty/harfbuzz/src/hb-subset-cff1.cc940
-rw-r--r--thirdparty/harfbuzz/src/hb-subset-cff1.hh37
-rw-r--r--thirdparty/harfbuzz/src/hb-subset-cff2.cc488
-rw-r--r--thirdparty/harfbuzz/src/hb-subset-cff2.hh37
-rw-r--r--thirdparty/harfbuzz/src/hb-subset-input.cc229
-rw-r--r--thirdparty/harfbuzz/src/hb-subset-input.hh61
-rw-r--r--thirdparty/harfbuzz/src/hb-subset-plan.cc395
-rw-r--r--thirdparty/harfbuzz/src/hb-subset-plan.hh194
-rw-r--r--thirdparty/harfbuzz/src/hb-subset.cc269
-rw-r--r--thirdparty/harfbuzz/src/hb-subset.h97
-rw-r--r--thirdparty/harfbuzz/src/hb-subset.hh73
-rw-r--r--thirdparty/harfbuzz/src/hb-ucd-table.hh6780
-rw-r--r--thirdparty/harfbuzz/src/hb-ucd.cc248
-rw-r--r--thirdparty/harfbuzz/src/hb-unicode-emoji-table.hh78
-rw-r--r--thirdparty/harfbuzz/src/hb-unicode.cc586
-rw-r--r--thirdparty/harfbuzz/src/hb-unicode.h404
-rw-r--r--thirdparty/harfbuzz/src/hb-unicode.hh398
-rw-r--r--thirdparty/harfbuzz/src/hb-uniscribe.cc1047
-rw-r--r--thirdparty/harfbuzz/src/hb-uniscribe.h46
-rw-r--r--thirdparty/harfbuzz/src/hb-utf.hh453
-rw-r--r--thirdparty/harfbuzz/src/hb-vector.hh313
-rw-r--r--thirdparty/harfbuzz/src/hb-version.h66
-rw-r--r--thirdparty/harfbuzz/src/hb.h50
-rw-r--r--thirdparty/harfbuzz/src/hb.hh634
-rw-r--r--thirdparty/icu4c/APIChangeReport.md380
-rw-r--r--thirdparty/icu4c/LICENSE414
-rw-r--r--thirdparty/icu4c/common/appendable.cpp74
-rw-r--r--thirdparty/icu4c/common/bmpset.cpp741
-rw-r--r--thirdparty/icu4c/common/bmpset.h164
-rw-r--r--thirdparty/icu4c/common/brkeng.cpp284
-rw-r--r--thirdparty/icu4c/common/brkeng.h271
-rw-r--r--thirdparty/icu4c/common/brkiter.cpp527
-rw-r--r--thirdparty/icu4c/common/bytesinkutil.cpp161
-rw-r--r--thirdparty/icu4c/common/bytesinkutil.h83
-rw-r--r--thirdparty/icu4c/common/bytestream.cpp85
-rw-r--r--thirdparty/icu4c/common/bytestrie.cpp441
-rw-r--r--thirdparty/icu4c/common/bytestriebuilder.cpp504
-rw-r--r--thirdparty/icu4c/common/bytestrieiterator.cpp214
-rw-r--r--thirdparty/icu4c/common/caniter.cpp586
-rw-r--r--thirdparty/icu4c/common/capi_helper.h97
-rw-r--r--thirdparty/icu4c/common/characterproperties.cpp383
-rw-r--r--thirdparty/icu4c/common/chariter.cpp100
-rw-r--r--thirdparty/icu4c/common/charstr.cpp239
-rw-r--r--thirdparty/icu4c/common/charstr.h190
-rw-r--r--thirdparty/icu4c/common/charstrmap.h55
-rw-r--r--thirdparty/icu4c/common/cmemory.cpp138
-rw-r--r--thirdparty/icu4c/common/cmemory.h849
-rw-r--r--thirdparty/icu4c/common/cpputils.h97
-rw-r--r--thirdparty/icu4c/common/cstr.cpp54
-rw-r--r--thirdparty/icu4c/common/cstr.h60
-rw-r--r--thirdparty/icu4c/common/cstring.cpp341
-rw-r--r--thirdparty/icu4c/common/cstring.h126
-rw-r--r--thirdparty/icu4c/common/cwchar.cpp55
-rw-r--r--thirdparty/icu4c/common/cwchar.h58
-rw-r--r--thirdparty/icu4c/common/dictbe.cpp1410
-rw-r--r--thirdparty/icu4c/common/dictbe.h402
-rw-r--r--thirdparty/icu4c/common/dictionarydata.cpp242
-rw-r--r--thirdparty/icu4c/common/dictionarydata.h191
-rw-r--r--thirdparty/icu4c/common/dtintrv.cpp63
-rw-r--r--thirdparty/icu4c/common/edits.cpp803
-rw-r--r--thirdparty/icu4c/common/errorcode.cpp42
-rw-r--r--thirdparty/icu4c/common/filteredbrk.cpp710
-rw-r--r--thirdparty/icu4c/common/filterednormalizer2.cpp363
-rw-r--r--thirdparty/icu4c/common/hash.h248
-rw-r--r--thirdparty/icu4c/common/icudataver.cpp31
-rw-r--r--thirdparty/icu4c/common/icuplug.cpp884
-rw-r--r--thirdparty/icu4c/common/icuplugimp.h93
-rw-r--r--thirdparty/icu4c/common/loadednormalizer2impl.cpp418
-rw-r--r--thirdparty/icu4c/common/localebuilder.cpp468
-rw-r--r--thirdparty/icu4c/common/localematcher.cpp846
-rw-r--r--thirdparty/icu4c/common/localeprioritylist.cpp239
-rw-r--r--thirdparty/icu4c/common/localeprioritylist.h115
-rw-r--r--thirdparty/icu4c/common/localsvc.h27
-rw-r--r--thirdparty/icu4c/common/locavailable.cpp270
-rw-r--r--thirdparty/icu4c/common/locbased.cpp55
-rw-r--r--thirdparty/icu4c/common/locbased.h107
-rw-r--r--thirdparty/icu4c/common/locdispnames.cpp890
-rw-r--r--thirdparty/icu4c/common/locdistance.cpp415
-rw-r--r--thirdparty/icu4c/common/locdistance.h151
-rw-r--r--thirdparty/icu4c/common/locdspnm.cpp1110
-rw-r--r--thirdparty/icu4c/common/locid.cpp2536
-rw-r--r--thirdparty/icu4c/common/loclikely.cpp1410
-rw-r--r--thirdparty/icu4c/common/loclikelysubtags.cpp682
-rw-r--r--thirdparty/icu4c/common/loclikelysubtags.h121
-rw-r--r--thirdparty/icu4c/common/locmap.cpp1315
-rw-r--r--thirdparty/icu4c/common/locmap.h40
-rw-r--r--thirdparty/icu4c/common/locresdata.cpp220
-rw-r--r--thirdparty/icu4c/common/locutil.cpp275
-rw-r--r--thirdparty/icu4c/common/locutil.h39
-rw-r--r--thirdparty/icu4c/common/lsr.cpp114
-rw-r--r--thirdparty/icu4c/common/lsr.h82
-rw-r--r--thirdparty/icu4c/common/messageimpl.h65
-rw-r--r--thirdparty/icu4c/common/messagepattern.cpp1233
-rw-r--r--thirdparty/icu4c/common/msvcres.h25
-rw-r--r--thirdparty/icu4c/common/mutex.h77
-rw-r--r--thirdparty/icu4c/common/norm2_nfc_data.h1149
-rw-r--r--thirdparty/icu4c/common/norm2allmodes.h369
-rw-r--r--thirdparty/icu4c/common/normalizer2.cpp572
-rw-r--r--thirdparty/icu4c/common/normalizer2impl.cpp2669
-rw-r--r--thirdparty/icu4c/common/normalizer2impl.h978
-rw-r--r--thirdparty/icu4c/common/normlzr.cpp529
-rw-r--r--thirdparty/icu4c/common/parsepos.cpp23
-rw-r--r--thirdparty/icu4c/common/patternprops.cpp230
-rw-r--r--thirdparty/icu4c/common/patternprops.h98
-rw-r--r--thirdparty/icu4c/common/pluralmap.cpp44
-rw-r--r--thirdparty/icu4c/common/pluralmap.h292
-rw-r--r--thirdparty/icu4c/common/propname.cpp328
-rw-r--r--thirdparty/icu4c/common/propname.h212
-rw-r--r--thirdparty/icu4c/common/propname_data.h1919
-rw-r--r--thirdparty/icu4c/common/propsvec.cpp529
-rw-r--r--thirdparty/icu4c/common/propsvec.h178
-rw-r--r--thirdparty/icu4c/common/punycode.cpp590
-rw-r--r--thirdparty/icu4c/common/punycode.h120
-rw-r--r--thirdparty/icu4c/common/putil.cpp2482
-rw-r--r--thirdparty/icu4c/common/putilimp.h615
-rw-r--r--thirdparty/icu4c/common/rbbi.cpp1301
-rw-r--r--thirdparty/icu4c/common/rbbi_cache.cpp655
-rw-r--r--thirdparty/icu4c/common/rbbi_cache.h203
-rw-r--r--thirdparty/icu4c/common/rbbidata.cpp476
-rw-r--r--thirdparty/icu4c/common/rbbidata.h212
-rw-r--r--thirdparty/icu4c/common/rbbinode.cpp372
-rw-r--r--thirdparty/icu4c/common/rbbinode.h127
-rw-r--r--thirdparty/icu4c/common/rbbirb.cpp361
-rw-r--r--thirdparty/icu4c/common/rbbirb.h237
-rw-r--r--thirdparty/icu4c/common/rbbirpt.h296
-rw-r--r--thirdparty/icu4c/common/rbbiscan.cpp1281
-rw-r--r--thirdparty/icu4c/common/rbbiscan.h167
-rw-r--r--thirdparty/icu4c/common/rbbisetb.cpp694
-rw-r--r--thirdparty/icu4c/common/rbbisetb.h147
-rw-r--r--thirdparty/icu4c/common/rbbistbl.cpp270
-rw-r--r--thirdparty/icu4c/common/rbbitblb.cpp1793
-rw-r--r--thirdparty/icu4c/common/rbbitblb.h232
-rw-r--r--thirdparty/icu4c/common/resbund.cpp399
-rw-r--r--thirdparty/icu4c/common/resbund_cnv.cpp57
-rw-r--r--thirdparty/icu4c/common/resource.cpp22
-rw-r--r--thirdparty/icu4c/common/resource.h293
-rw-r--r--thirdparty/icu4c/common/restrace.cpp130
-rw-r--r--thirdparty/icu4c/common/restrace.h147
-rw-r--r--thirdparty/icu4c/common/ruleiter.cpp162
-rw-r--r--thirdparty/icu4c/common/ruleiter.h233
-rw-r--r--thirdparty/icu4c/common/schriter.cpp119
-rw-r--r--thirdparty/icu4c/common/serv.cpp982
-rw-r--r--thirdparty/icu4c/common/serv.h996
-rw-r--r--thirdparty/icu4c/common/servlk.cpp188
-rw-r--r--thirdparty/icu4c/common/servlkf.cpp152
-rw-r--r--thirdparty/icu4c/common/servloc.h551
-rw-r--r--thirdparty/icu4c/common/servls.cpp295
-rw-r--r--thirdparty/icu4c/common/servnotf.cpp120
-rw-r--r--thirdparty/icu4c/common/servnotf.h125
-rw-r--r--thirdparty/icu4c/common/servrbf.cpp96
-rw-r--r--thirdparty/icu4c/common/servslkf.cpp123
-rw-r--r--thirdparty/icu4c/common/sharedobject.cpp62
-rw-r--r--thirdparty/icu4c/common/sharedobject.h184
-rw-r--r--thirdparty/icu4c/common/simpleformatter.cpp325
-rw-r--r--thirdparty/icu4c/common/sprpimpl.h130
-rw-r--r--thirdparty/icu4c/common/static_unicode_sets.cpp245
-rw-r--r--thirdparty/icu4c/common/static_unicode_sets.h140
-rw-r--r--thirdparty/icu4c/common/stringpiece.cpp116
-rw-r--r--thirdparty/icu4c/common/stringtriebuilder.cpp618
-rw-r--r--thirdparty/icu4c/common/uarrsort.cpp274
-rw-r--r--thirdparty/icu4c/common/uarrsort.h103
-rw-r--r--thirdparty/icu4c/common/uassert.h51
-rw-r--r--thirdparty/icu4c/common/ubidi.cpp3036
-rw-r--r--thirdparty/icu4c/common/ubidi_props.cpp254
-rw-r--r--thirdparty/icu4c/common/ubidi_props.h148
-rw-r--r--thirdparty/icu4c/common/ubidi_props_data.h922
-rw-r--r--thirdparty/icu4c/common/ubidiimp.h484
-rw-r--r--thirdparty/icu4c/common/ubidiln.cpp1347
-rw-r--r--thirdparty/icu4c/common/ubiditransform.cpp530
-rw-r--r--thirdparty/icu4c/common/ubidiwrt.cpp650
-rw-r--r--thirdparty/icu4c/common/ubrk.cpp357
-rw-r--r--thirdparty/icu4c/common/ubrkimpl.h15
-rw-r--r--thirdparty/icu4c/common/ucase.cpp1608
-rw-r--r--thirdparty/icu4c/common/ucase.h445
-rw-r--r--thirdparty/icu4c/common/ucase_props_data.h951
-rw-r--r--thirdparty/icu4c/common/ucasemap.cpp953
-rw-r--r--thirdparty/icu4c/common/ucasemap_imp.h282
-rw-r--r--thirdparty/icu4c/common/ucasemap_titlecase_brkiter.cpp134
-rw-r--r--thirdparty/icu4c/common/ucat.cpp78
-rw-r--r--thirdparty/icu4c/common/uchar.cpp730
-rw-r--r--thirdparty/icu4c/common/uchar_props_data.h3860
-rw-r--r--thirdparty/icu4c/common/ucharstrie.cpp414
-rw-r--r--thirdparty/icu4c/common/ucharstriebuilder.cpp443
-rw-r--r--thirdparty/icu4c/common/ucharstrieiterator.cpp215
-rw-r--r--thirdparty/icu4c/common/uchriter.cpp367
-rw-r--r--thirdparty/icu4c/common/ucln.h91
-rw-r--r--thirdparty/icu4c/common/ucln_cmn.cpp124
-rw-r--r--thirdparty/icu4c/common/ucln_cmn.h77
-rw-r--r--thirdparty/icu4c/common/ucln_imp.h182
-rw-r--r--thirdparty/icu4c/common/ucmndata.cpp393
-rw-r--r--thirdparty/icu4c/common/ucmndata.h117
-rw-r--r--thirdparty/icu4c/common/ucnv.cpp2910
-rw-r--r--thirdparty/icu4c/common/ucnv2022.cpp3973
-rw-r--r--thirdparty/icu4c/common/ucnv_bld.cpp1689
-rw-r--r--thirdparty/icu4c/common/ucnv_bld.h296
-rw-r--r--thirdparty/icu4c/common/ucnv_cb.cpp261
-rw-r--r--thirdparty/icu4c/common/ucnv_cnv.cpp182
-rw-r--r--thirdparty/icu4c/common/ucnv_cnv.h323
-rw-r--r--thirdparty/icu4c/common/ucnv_ct.cpp646
-rw-r--r--thirdparty/icu4c/common/ucnv_err.cpp486
-rw-r--r--thirdparty/icu4c/common/ucnv_ext.cpp1143
-rw-r--r--thirdparty/icu4c/common/ucnv_ext.h481
-rw-r--r--thirdparty/icu4c/common/ucnv_imp.h139
-rw-r--r--thirdparty/icu4c/common/ucnv_io.cpp1360
-rw-r--r--thirdparty/icu4c/common/ucnv_io.h127
-rw-r--r--thirdparty/icu4c/common/ucnv_lmb.cpp1388
-rw-r--r--thirdparty/icu4c/common/ucnv_set.cpp70
-rw-r--r--thirdparty/icu4c/common/ucnv_u16.cpp1579
-rw-r--r--thirdparty/icu4c/common/ucnv_u32.cpp1253
-rw-r--r--thirdparty/icu4c/common/ucnv_u7.cpp1491
-rw-r--r--thirdparty/icu4c/common/ucnv_u8.cpp944
-rw-r--r--thirdparty/icu4c/common/ucnvbocu.cpp1413
-rw-r--r--thirdparty/icu4c/common/ucnvdisp.cpp88
-rw-r--r--thirdparty/icu4c/common/ucnvhz.cpp625
-rw-r--r--thirdparty/icu4c/common/ucnvisci.cpp1635
-rw-r--r--thirdparty/icu4c/common/ucnvlat1.cpp756
-rw-r--r--thirdparty/icu4c/common/ucnvmbcs.cpp5723
-rw-r--r--thirdparty/icu4c/common/ucnvmbcs.h605
-rw-r--r--thirdparty/icu4c/common/ucnvscsu.cpp2045
-rw-r--r--thirdparty/icu4c/common/ucnvsel.cpp823
-rw-r--r--thirdparty/icu4c/common/ucol_data.h89
-rw-r--r--thirdparty/icu4c/common/ucol_swp.cpp615
-rw-r--r--thirdparty/icu4c/common/ucol_swp.h58
-rw-r--r--thirdparty/icu4c/common/ucptrie.cpp601
-rw-r--r--thirdparty/icu4c/common/ucptrie_impl.h289
-rw-r--r--thirdparty/icu4c/common/ucurr.cpp2701
-rw-r--r--thirdparty/icu4c/common/ucurrimp.h78
-rw-r--r--thirdparty/icu4c/common/udata.cpp1460
-rw-r--r--thirdparty/icu4c/common/udatamem.cpp161
-rw-r--r--thirdparty/icu4c/common/udatamem.h61
-rw-r--r--thirdparty/icu4c/common/udataswp.cpp473
-rw-r--r--thirdparty/icu4c/common/udataswp.h404
-rw-r--r--thirdparty/icu4c/common/uelement.h91
-rw-r--r--thirdparty/icu4c/common/uenum.cpp189
-rw-r--r--thirdparty/icu4c/common/uenumimp.h155
-rw-r--r--thirdparty/icu4c/common/uhash.cpp991
-rw-r--r--thirdparty/icu4c/common/uhash.h718
-rw-r--r--thirdparty/icu4c/common/uhash_us.cpp26
-rw-r--r--thirdparty/icu4c/common/uidna.cpp921
-rw-r--r--thirdparty/icu4c/common/uinit.cpp74
-rw-r--r--thirdparty/icu4c/common/uinvchar.cpp627
-rw-r--r--thirdparty/icu4c/common/uinvchar.h219
-rw-r--r--thirdparty/icu4c/common/uiter.cpp1108
-rw-r--r--thirdparty/icu4c/common/ulayout_props.h46
-rw-r--r--thirdparty/icu4c/common/ulist.cpp270
-rw-r--r--thirdparty/icu4c/common/ulist.h50
-rw-r--r--thirdparty/icu4c/common/uloc.cpp2176
-rw-r--r--thirdparty/icu4c/common/uloc_keytype.cpp534
-rw-r--r--thirdparty/icu4c/common/uloc_tag.cpp2844
-rw-r--r--thirdparty/icu4c/common/ulocimp.h307
-rw-r--r--thirdparty/icu4c/common/umapfile.cpp530
-rw-r--r--thirdparty/icu4c/common/umapfile.h57
-rw-r--r--thirdparty/icu4c/common/umath.cpp26
-rw-r--r--thirdparty/icu4c/common/umutablecptrie.cpp1852
-rw-r--r--thirdparty/icu4c/common/umutex.cpp204
-rw-r--r--thirdparty/icu4c/common/umutex.h277
-rw-r--r--thirdparty/icu4c/common/unames.cpp2108
-rw-r--r--thirdparty/icu4c/common/unicode/appendable.h239
-rw-r--r--thirdparty/icu4c/common/unicode/brkiter.h670
-rw-r--r--thirdparty/icu4c/common/unicode/bytestream.h309
-rw-r--r--thirdparty/icu4c/common/unicode/bytestrie.h565
-rw-r--r--thirdparty/icu4c/common/unicode/bytestriebuilder.h188
-rw-r--r--thirdparty/icu4c/common/unicode/caniter.h214
-rw-r--r--thirdparty/icu4c/common/unicode/casemap.h497
-rw-r--r--thirdparty/icu4c/common/unicode/char16ptr.h313
-rw-r--r--thirdparty/icu4c/common/unicode/chariter.h734
-rw-r--r--thirdparty/icu4c/common/unicode/dbbi.h48
-rw-r--r--thirdparty/icu4c/common/unicode/docmain.h232
-rw-r--r--thirdparty/icu4c/common/unicode/dtintrv.h164
-rw-r--r--thirdparty/icu4c/common/unicode/edits.h531
-rw-r--r--thirdparty/icu4c/common/unicode/enumset.h69
-rw-r--r--thirdparty/icu4c/common/unicode/errorcode.h144
-rw-r--r--thirdparty/icu4c/common/unicode/filteredbrk.h152
-rw-r--r--thirdparty/icu4c/common/unicode/icudataver.h43
-rw-r--r--thirdparty/icu4c/common/unicode/icuplug.h388
-rw-r--r--thirdparty/icu4c/common/unicode/idna.h330
-rw-r--r--thirdparty/icu4c/common/unicode/localebuilder.h311
-rw-r--r--thirdparty/icu4c/common/unicode/localematcher.h720
-rw-r--r--thirdparty/icu4c/common/unicode/localpointer.h595
-rw-r--r--thirdparty/icu4c/common/unicode/locdspnm.h211
-rw-r--r--thirdparty/icu4c/common/unicode/locid.h1274
-rw-r--r--thirdparty/icu4c/common/unicode/messagepattern.h949
-rw-r--r--thirdparty/icu4c/common/unicode/normalizer2.h779
-rw-r--r--thirdparty/icu4c/common/unicode/normlzr.h816
-rw-r--r--thirdparty/icu4c/common/unicode/parseerr.h94
-rw-r--r--thirdparty/icu4c/common/unicode/parsepos.h237
-rw-r--r--thirdparty/icu4c/common/unicode/platform.h885
-rw-r--r--thirdparty/icu4c/common/unicode/ptypes.h130
-rw-r--r--thirdparty/icu4c/common/unicode/putil.h183
-rw-r--r--thirdparty/icu4c/common/unicode/rbbi.h732
-rw-r--r--thirdparty/icu4c/common/unicode/rep.h266
-rw-r--r--thirdparty/icu4c/common/unicode/resbund.h498
-rw-r--r--thirdparty/icu4c/common/unicode/schriter.h195
-rw-r--r--thirdparty/icu4c/common/unicode/simpleformatter.h341
-rw-r--r--thirdparty/icu4c/common/unicode/std_string.h41
-rw-r--r--thirdparty/icu4c/common/unicode/strenum.h281
-rw-r--r--thirdparty/icu4c/common/unicode/stringoptions.h190
-rw-r--r--thirdparty/icu4c/common/unicode/stringpiece.h353
-rw-r--r--thirdparty/icu4c/common/unicode/stringtriebuilder.h426
-rw-r--r--thirdparty/icu4c/common/unicode/symtable.h119
-rw-r--r--thirdparty/icu4c/common/unicode/ubidi.h2210
-rw-r--r--thirdparty/icu4c/common/unicode/ubiditransform.h326
-rw-r--r--thirdparty/icu4c/common/unicode/ubrk.h631
-rw-r--r--thirdparty/icu4c/common/unicode/ucasemap.h388
-rw-r--r--thirdparty/icu4c/common/unicode/ucat.h160
-rw-r--r--thirdparty/icu4c/common/unicode/uchar.h4056
-rw-r--r--thirdparty/icu4c/common/unicode/ucharstrie.h623
-rw-r--r--thirdparty/icu4c/common/unicode/ucharstriebuilder.h193
-rw-r--r--thirdparty/icu4c/common/unicode/uchriter.h393
-rw-r--r--thirdparty/icu4c/common/unicode/uclean.h262
-rw-r--r--thirdparty/icu4c/common/unicode/ucnv.h2045
-rw-r--r--thirdparty/icu4c/common/unicode/ucnv_cb.h164
-rw-r--r--thirdparty/icu4c/common/unicode/ucnv_err.h465
-rw-r--r--thirdparty/icu4c/common/unicode/ucnvsel.h192
-rw-r--r--thirdparty/icu4c/common/unicode/uconfig.h456
-rw-r--r--thirdparty/icu4c/common/unicode/ucpmap.h159
-rw-r--r--thirdparty/icu4c/common/unicode/ucptrie.h646
-rw-r--r--thirdparty/icu4c/common/unicode/ucurr.h468
-rw-r--r--thirdparty/icu4c/common/unicode/udata.h440
-rw-r--r--thirdparty/icu4c/common/unicode/udisplaycontext.h173
-rw-r--r--thirdparty/icu4c/common/unicode/uenum.h209
-rw-r--r--thirdparty/icu4c/common/unicode/uidna.h776
-rw-r--r--thirdparty/icu4c/common/unicode/uiter.h709
-rw-r--r--thirdparty/icu4c/common/unicode/uldnames.h307
-rw-r--r--thirdparty/icu4c/common/unicode/uloc.h1393
-rw-r--r--thirdparty/icu4c/common/unicode/umachine.h491
-rw-r--r--thirdparty/icu4c/common/unicode/umisc.h62
-rw-r--r--thirdparty/icu4c/common/unicode/umutablecptrie.h241
-rw-r--r--thirdparty/icu4c/common/unicode/unifilt.h136
-rw-r--r--thirdparty/icu4c/common/unicode/unifunct.h132
-rw-r--r--thirdparty/icu4c/common/unicode/unimatch.h168
-rw-r--r--thirdparty/icu4c/common/unicode/uniset.h1744
-rw-r--r--thirdparty/icu4c/common/unicode/unistr.h4757
-rw-r--r--thirdparty/icu4c/common/unicode/unorm.h476
-rw-r--r--thirdparty/icu4c/common/unicode/unorm2.h606
-rw-r--r--thirdparty/icu4c/common/unicode/uobject.h324
-rw-r--r--thirdparty/icu4c/common/unicode/urename.h1922
-rw-r--r--thirdparty/icu4c/common/unicode/urep.h157
-rw-r--r--thirdparty/icu4c/common/unicode/ures.h911
-rw-r--r--thirdparty/icu4c/common/unicode/uscript.h708
-rw-r--r--thirdparty/icu4c/common/unicode/uset.h1137
-rw-r--r--thirdparty/icu4c/common/unicode/usetiter.h325
-rw-r--r--thirdparty/icu4c/common/unicode/ushape.h476
-rw-r--r--thirdparty/icu4c/common/unicode/usprep.h274
-rw-r--r--thirdparty/icu4c/common/unicode/ustring.h1689
-rw-r--r--thirdparty/icu4c/common/unicode/ustringtrie.h97
-rw-r--r--thirdparty/icu4c/common/unicode/utext.h1603
-rw-r--r--thirdparty/icu4c/common/unicode/utf.h225
-rw-r--r--thirdparty/icu4c/common/unicode/utf16.h734
-rw-r--r--thirdparty/icu4c/common/unicode/utf32.h25
-rw-r--r--thirdparty/icu4c/common/unicode/utf8.h882
-rw-r--r--thirdparty/icu4c/common/unicode/utf_old.h1201
-rw-r--r--thirdparty/icu4c/common/unicode/utrace.h509
-rw-r--r--thirdparty/icu4c/common/unicode/utypes.h732
-rw-r--r--thirdparty/icu4c/common/unicode/uvernum.h198
-rw-r--r--thirdparty/icu4c/common/unicode/uversion.h187
-rw-r--r--thirdparty/icu4c/common/unifiedcache.cpp522
-rw-r--r--thirdparty/icu4c/common/unifiedcache.h556
-rw-r--r--thirdparty/icu4c/common/unifilt.cpp71
-rw-r--r--thirdparty/icu4c/common/unifunct.cpp28
-rw-r--r--thirdparty/icu4c/common/uniquecharstr.h98
-rw-r--r--thirdparty/icu4c/common/uniset.cpp2356
-rw-r--r--thirdparty/icu4c/common/uniset_closure.cpp250
-rw-r--r--thirdparty/icu4c/common/uniset_props.cpp1174
-rw-r--r--thirdparty/icu4c/common/unisetspan.cpp1509
-rw-r--r--thirdparty/icu4c/common/unisetspan.h157
-rw-r--r--thirdparty/icu4c/common/unistr.cpp1982
-rw-r--r--thirdparty/icu4c/common/unistr_case.cpp250
-rw-r--r--thirdparty/icu4c/common/unistr_case_locale.cpp56
-rw-r--r--thirdparty/icu4c/common/unistr_cnv.cpp417
-rw-r--r--thirdparty/icu4c/common/unistr_props.cpp77
-rw-r--r--thirdparty/icu4c/common/unistr_titlecase_brkiter.cpp57
-rw-r--r--thirdparty/icu4c/common/unistrappender.h90
-rw-r--r--thirdparty/icu4c/common/unorm.cpp280
-rw-r--r--thirdparty/icu4c/common/unormcmp.cpp640
-rw-r--r--thirdparty/icu4c/common/unormimp.h488
-rw-r--r--thirdparty/icu4c/common/uobject.cpp105
-rw-r--r--thirdparty/icu4c/common/uposixdefs.h77
-rw-r--r--thirdparty/icu4c/common/uprops.cpp797
-rw-r--r--thirdparty/icu4c/common/uprops.h504
-rw-r--r--thirdparty/icu4c/common/ures_cnv.cpp78
-rw-r--r--thirdparty/icu4c/common/uresbund.cpp3090
-rw-r--r--thirdparty/icu4c/common/uresdata.cpp1518
-rw-r--r--thirdparty/icu4c/common/uresdata.h565
-rw-r--r--thirdparty/icu4c/common/uresimp.h364
-rw-r--r--thirdparty/icu4c/common/ureslocs.h27
-rw-r--r--thirdparty/icu4c/common/usc_impl.cpp361
-rw-r--r--thirdparty/icu4c/common/usc_impl.h139
-rw-r--r--thirdparty/icu4c/common/uscript.cpp149
-rw-r--r--thirdparty/icu4c/common/uscript_props.cpp302
-rw-r--r--thirdparty/icu4c/common/uset.cpp641
-rw-r--r--thirdparty/icu4c/common/uset_imp.h62
-rw-r--r--thirdparty/icu4c/common/uset_props.cpp143
-rw-r--r--thirdparty/icu4c/common/usetiter.cpp152
-rw-r--r--thirdparty/icu4c/common/ushape.cpp1728
-rw-r--r--thirdparty/icu4c/common/usprep.cpp871
-rw-r--r--thirdparty/icu4c/common/ustack.cpp63
-rw-r--r--thirdparty/icu4c/common/ustr_cnv.cpp256
-rw-r--r--thirdparty/icu4c/common/ustr_cnv.h51
-rw-r--r--thirdparty/icu4c/common/ustr_imp.h155
-rw-r--r--thirdparty/icu4c/common/ustr_titlecase_brkiter.cpp237
-rw-r--r--thirdparty/icu4c/common/ustr_wcs.cpp535
-rw-r--r--thirdparty/icu4c/common/ustrcase.cpp1818
-rw-r--r--thirdparty/icu4c/common/ustrcase_locale.cpp94
-rw-r--r--thirdparty/icu4c/common/ustrenum.cpp398
-rw-r--r--thirdparty/icu4c/common/ustrenum.h87
-rw-r--r--thirdparty/icu4c/common/ustrfmt.cpp59
-rw-r--r--thirdparty/icu4c/common/ustrfmt.h19
-rw-r--r--thirdparty/icu4c/common/ustring.cpp1537
-rw-r--r--thirdparty/icu4c/common/ustrtrns.cpp1451
-rw-r--r--thirdparty/icu4c/common/utext.cpp2877
-rw-r--r--thirdparty/icu4c/common/utf_impl.cpp329
-rw-r--r--thirdparty/icu4c/common/util.cpp421
-rw-r--r--thirdparty/icu4c/common/util.h257
-rw-r--r--thirdparty/icu4c/common/util_props.cpp217
-rw-r--r--thirdparty/icu4c/common/utrace.cpp504
-rw-r--r--thirdparty/icu4c/common/utracimp.h391
-rw-r--r--thirdparty/icu4c/common/utrie.cpp1234
-rw-r--r--thirdparty/icu4c/common/utrie.h793
-rw-r--r--thirdparty/icu4c/common/utrie2.cpp663
-rw-r--r--thirdparty/icu4c/common/utrie2.h955
-rw-r--r--thirdparty/icu4c/common/utrie2_builder.cpp1483
-rw-r--r--thirdparty/icu4c/common/utrie2_impl.h175
-rw-r--r--thirdparty/icu4c/common/utrie_swap.cpp348
-rw-r--r--thirdparty/icu4c/common/uts46.cpp1494
-rw-r--r--thirdparty/icu4c/common/utypeinfo.h32
-rw-r--r--thirdparty/icu4c/common/utypes.cpp227
-rw-r--r--thirdparty/icu4c/common/uvector.cpp567
-rw-r--r--thirdparty/icu4c/common/uvector.h415
-rw-r--r--thirdparty/icu4c/common/uvectr32.cpp335
-rw-r--r--thirdparty/icu4c/common/uvectr32.h306
-rw-r--r--thirdparty/icu4c/common/uvectr64.cpp214
-rw-r--r--thirdparty/icu4c/common/uvectr64.h279
-rw-r--r--thirdparty/icu4c/common/wintz.cpp147
-rw-r--r--thirdparty/icu4c/common/wintz.h36
-rw-r--r--thirdparty/icu4c/godot_data.json9
-rw-r--r--thirdparty/icu4c/icudt68l.datbin0 -> 3846608 bytes
-rw-r--r--thirdparty/meshoptimizer/LICENSE.md21
-rw-r--r--thirdparty/meshoptimizer/allocator.cpp8
-rw-r--r--thirdparty/meshoptimizer/clusterizer.cpp351
-rw-r--r--thirdparty/meshoptimizer/indexcodec.cpp752
-rw-r--r--thirdparty/meshoptimizer/indexgenerator.cpp347
-rw-r--r--thirdparty/meshoptimizer/meshoptimizer.h951
-rw-r--r--thirdparty/meshoptimizer/overdrawanalyzer.cpp230
-rw-r--r--thirdparty/meshoptimizer/overdrawoptimizer.cpp333
-rw-r--r--thirdparty/meshoptimizer/patches/simplifier_get_resulting_error.patch96
-rw-r--r--thirdparty/meshoptimizer/simplifier.cpp1562
-rw-r--r--thirdparty/meshoptimizer/spatialorder.cpp194
-rw-r--r--thirdparty/meshoptimizer/stripifier.cpp295
-rw-r--r--thirdparty/meshoptimizer/vcacheanalyzer.cpp73
-rw-r--r--thirdparty/meshoptimizer/vcacheoptimizer.cpp473
-rw-r--r--thirdparty/meshoptimizer/vertexcodec.cpp1265
-rw-r--r--thirdparty/meshoptimizer/vertexfilter.cpp825
-rw-r--r--thirdparty/meshoptimizer/vfetchanalyzer.cpp58
-rw-r--r--thirdparty/meshoptimizer/vfetchoptimizer.cpp74
-rw-r--r--thirdparty/minimp3/LICENSE117
-rw-r--r--thirdparty/minimp3/minimp3.h1855
-rw-r--r--thirdparty/minimp3/minimp3_ex.h1394
-rw-r--r--thirdparty/misc/easing_equations.cpp35
-rw-r--r--thirdparty/misc/open-simplex-noise.c20
-rw-r--r--thirdparty/misc/open-simplex-noise.h6
-rw-r--r--thirdparty/rvo2/API.h (renamed from thirdparty/rvo2/src/API.h)0
-rw-r--r--thirdparty/rvo2/Agent.cpp (renamed from thirdparty/rvo2/src/Agent.cpp)0
-rw-r--r--thirdparty/rvo2/Agent.h (renamed from thirdparty/rvo2/src/Agent.h)0
-rw-r--r--thirdparty/rvo2/Definitions.h (renamed from thirdparty/rvo2/src/Definitions.h)0
-rw-r--r--thirdparty/rvo2/KdTree.cpp (renamed from thirdparty/rvo2/src/KdTree.cpp)0
-rw-r--r--thirdparty/rvo2/KdTree.h (renamed from thirdparty/rvo2/src/KdTree.h)0
-rw-r--r--thirdparty/rvo2/Vector3.h (renamed from thirdparty/rvo2/src/Vector3.h)0
-rw-r--r--thirdparty/xatlas/xatlas.cpp5775
-rw-r--r--thirdparty/xatlas/xatlas.h170
841 files changed, 383528 insertions, 11143 deletions
diff --git a/thirdparty/README.md b/thirdparty/README.md
index f4f3aad0fc..1db7f5d583 100644
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@@ -40,11 +40,9 @@ Files extracted from upstream source:
## bullet
- Upstream: https://github.com/bulletphysics/bullet3
-- Version: git pre-2.90 (cd8cf7521cbb8b7808126a6adebd47bb83ea166a, 2020)
+- Version: 3.07 (e32fc59c88a3908876949c6f2665e8d091d987fa, 2020)
- License: zlib
-Important: Synced with a pre-release version of bullet 2.90 from the master branch.
-
Files extracted from upstream source:
- src/* apart from CMakeLists.txt and premake4.lua files
@@ -143,6 +141,12 @@ Use UI font variant if available, because it has tight vertical metrics and good
- Version: ? (pre-2014 commit when DroidSansJapanese.ttf was obsoleted)
- License: Apache 2.0
+### Tamsyn
+- Upstream: http://www.fial.com/~scott/tamsyn-font/
+- Version: 1.11
+- License: Tamsyn
+
+Extracted "0..9,A..F" characters for hex code printing.
## freetype
@@ -178,6 +182,39 @@ Files extracted from upstream source:
Patches in the `patches` directory should be re-applied after updates.
+## Graphite engine
+
+- Upstream: https://github.com/silnrsi/graphite
+- Version: 1.3.14
+- License: MPL-2.0
+
+Files extracted from upstream source:
+- the `include` folder
+- the `src` folder
+- `COPYING`, `ChangeLog`
+
+## HarfBuzz
+
+- Upstream: https://github.com/harfbuzz/harfbuzz
+- Version: 2.7.2
+- License: HarfBuzz
+
+Files extracted from upstream source:
+- the `src` folder
+- `AUTHORS`, `COPYING`, `NEWS`, `THANKS`
+
+## International Components for Unicode
+
+- Upstream: https://github.com/unicode-org/icu
+- Version: 68.1
+- License: Unicode
+
+Files extracted from upstream source:
+- the `common` folder
+- `APIChangeReport.md`, `LICENSE`
+
+Files generated from upstream source:
+- the `icudt68l.dat` built with the provided `godot_data.json` config file (see https://github.com/unicode-org/icu/blob/master/docs/userguide/icu_data/buildtool.md for instructions)
## jpeg-compressor
@@ -319,6 +356,21 @@ File extracted from upstream release tarball:
for light bundling with core.
+## meshoptimizer
+
+- Upstream: https://github.com/zeux/meshoptimizer
+- Version: 0.15 (2020)
+- License: MIT
+
+File extracted from upstream release tarball:
+
+- All files in `src/`.
+
+Important: Some files have Godot-made changes.
+They can be applied with the patch in the `patches` folder, but are meant to be superseded
+by upstream API changes.
+
+
## miniupnpc
- Upstream: https://github.com/miniupnp/miniupnp/tree/master/miniupnpc
@@ -374,7 +426,7 @@ Collection of single-file libraries used in Godot components.
* License: Apache 2.0
- `open-simplex-noise.{c,h}`
* Upstream: https://github.com/smcameron/open-simplex-noise-in-c
- * Version: git (0fef0dbedd76f767da7e3c894822729d0f07e54d, 2020) + custom changes
+ * Version: git (826f1dd1724e6fb3ff45f58e48c0fbae864c3403, 2020) + custom changes
* License: Unlicense
- `pcg.{cpp,h}`
* Upstream: http://www.pcg-random.org
@@ -632,7 +684,7 @@ File extracted from upstream release tarball:
## xatlas
- Upstream: https://github.com/jpcy/xatlas
-- Version: git (470576d3516f7e6d8b4554e7c941194a935969fd, 2020)
+- Version: git (5571fc7ef0d06832947c0a935ccdcf083f7a9264, 2020)
- License: MIT
Files extracted from upstream source:
diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp
index 6f2c5251a0..4938fa17af 100644
--- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp
+++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp
@@ -285,7 +285,6 @@ void b3OptimizedBvh::updateBvhNodes(b3StridingMeshInterface* meshInterface, int
meshInterface->getLockedReadOnlyVertexIndexBase(&vertexbase, numverts, type, stride, &indexbase, indexstride, numfaces, indicestype, nodeSubPart);
curNodeSubPart = nodeSubPart;
- b3Assert(indicestype == PHY_INTEGER || indicestype == PHY_SHORT);
}
//triangles->getLockedReadOnlyVertexIndexBase(vertexBase,numVerts,
@@ -293,7 +292,13 @@ void b3OptimizedBvh::updateBvhNodes(b3StridingMeshInterface* meshInterface, int
for (int j = 2; j >= 0; j--)
{
- int graphicsindex = indicestype == PHY_SHORT ? ((unsigned short*)gfxbase)[j] : gfxbase[j];
+ int graphicsindex;
+ switch (indicestype) {
+ case PHY_INTEGER: graphicsindex = gfxbase[j]; break;
+ case PHY_SHORT: graphicsindex = ((unsigned short*)gfxbase)[j]; break;
+ case PHY_UCHAR: graphicsindex = ((unsigned char*)gfxbase)[j]; break;
+ default: b3Assert(0);
+ }
if (type == PHY_FLOAT)
{
float* graphicsbase = (float*)(vertexbase + graphicsindex * stride);
diff --git a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3File.cpp b/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3File.cpp
index 145de62db3..f6c779a919 100644
--- a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3File.cpp
+++ b/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3File.cpp
@@ -851,12 +851,12 @@ void bFile::swapData(char *data, short type, int arraySize, bool ignoreEndianFla
void bFile::safeSwapPtr(char *dst, const char *src)
{
+ if (!src || !dst)
+ return;
+
int ptrFile = mFileDNA->getPointerSize();
int ptrMem = mMemoryDNA->getPointerSize();
- if (!src && !dst)
- return;
-
if (ptrFile == ptrMem)
{
memcpy(dst, src, ptrMem);
diff --git a/thirdparty/bullet/BulletCollision/BroadphaseCollision/btQuantizedBvh.cpp b/thirdparty/bullet/BulletCollision/BroadphaseCollision/btQuantizedBvh.cpp
index 4954e773e2..19f1737b73 100644
--- a/thirdparty/bullet/BulletCollision/BroadphaseCollision/btQuantizedBvh.cpp
+++ b/thirdparty/bullet/BulletCollision/BroadphaseCollision/btQuantizedBvh.cpp
@@ -346,8 +346,6 @@ void btQuantizedBvh::reportAabbOverlappingNodex(btNodeOverlapCallback* nodeCallb
}
}
-int maxIterations = 0;
-
void btQuantizedBvh::walkStacklessTree(btNodeOverlapCallback* nodeCallback, const btVector3& aabbMin, const btVector3& aabbMax) const
{
btAssert(!m_useQuantization);
@@ -387,8 +385,6 @@ void btQuantizedBvh::walkStacklessTree(btNodeOverlapCallback* nodeCallback, cons
curIndex += escapeIndex;
}
}
- if (maxIterations < walkIterations)
- maxIterations = walkIterations;
}
/*
@@ -529,8 +525,6 @@ void btQuantizedBvh::walkStacklessTreeAgainstRay(btNodeOverlapCallback* nodeCall
curIndex += escapeIndex;
}
}
- if (maxIterations < walkIterations)
- maxIterations = walkIterations;
}
void btQuantizedBvh::walkStacklessQuantizedTreeAgainstRay(btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget, const btVector3& aabbMin, const btVector3& aabbMax, int startNodeIndex, int endNodeIndex) const
@@ -654,8 +648,6 @@ void btQuantizedBvh::walkStacklessQuantizedTreeAgainstRay(btNodeOverlapCallback*
curIndex += escapeIndex;
}
}
- if (maxIterations < walkIterations)
- maxIterations = walkIterations;
}
void btQuantizedBvh::walkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback, unsigned short int* quantizedQueryAabbMin, unsigned short int* quantizedQueryAabbMax, int startNodeIndex, int endNodeIndex) const
@@ -718,8 +710,6 @@ void btQuantizedBvh::walkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallb
curIndex += escapeIndex;
}
}
- if (maxIterations < walkIterations)
- maxIterations = walkIterations;
}
//This traversal can be called from Playstation 3 SPU
diff --git a/thirdparty/bullet/BulletCollision/CollisionDispatch/btCollisionObject.h b/thirdparty/bullet/BulletCollision/CollisionDispatch/btCollisionObject.h
index 85dc488c8c..e085c40892 100644
--- a/thirdparty/bullet/BulletCollision/CollisionDispatch/btCollisionObject.h
+++ b/thirdparty/bullet/BulletCollision/CollisionDispatch/btCollisionObject.h
@@ -127,6 +127,7 @@ public:
enum CollisionFlags
{
+ CF_DYNAMIC_OBJECT = 0,
CF_STATIC_OBJECT = 1,
CF_KINEMATIC_OBJECT = 2,
CF_NO_CONTACT_RESPONSE = 4,
@@ -251,6 +252,16 @@ public:
m_checkCollideWith = m_objectsWithoutCollisionCheck.size() > 0;
}
+ int getNumObjectsWithoutCollision() const
+ {
+ return m_objectsWithoutCollisionCheck.size();
+ }
+
+ const btCollisionObject* getObjectWithoutCollision(int index)
+ {
+ return m_objectsWithoutCollisionCheck[index];
+ }
+
virtual bool checkCollideWithOverride(const btCollisionObject* co) const
{
int index = m_objectsWithoutCollisionCheck.findLinearSearch(co);
diff --git a/thirdparty/bullet/BulletCollision/CollisionDispatch/btInternalEdgeUtility.cpp b/thirdparty/bullet/BulletCollision/CollisionDispatch/btInternalEdgeUtility.cpp
index a4252c296a..a71700f58a 100644
--- a/thirdparty/bullet/BulletCollision/CollisionDispatch/btInternalEdgeUtility.cpp
+++ b/thirdparty/bullet/BulletCollision/CollisionDispatch/btInternalEdgeUtility.cpp
@@ -361,7 +361,13 @@ void btGenerateInternalEdgeInfo(btBvhTriangleMeshShape* trimeshShape, btTriangle
for (int j = 2; j >= 0; j--)
{
- int graphicsindex = indicestype == PHY_SHORT ? ((unsigned short*)gfxbase)[j] : gfxbase[j];
+ int graphicsindex;
+ switch (indicestype) {
+ case PHY_INTEGER: graphicsindex = gfxbase[j]; break;
+ case PHY_SHORT: graphicsindex = ((unsigned short*)gfxbase)[j]; break;
+ case PHY_UCHAR: graphicsindex = ((unsigned char*)gfxbase)[j]; break;
+ default: btAssert(0);
+ }
if (type == PHY_FLOAT)
{
float* graphicsbase = (float*)(vertexbase + graphicsindex * stride);
diff --git a/thirdparty/bullet/BulletCollision/CollisionShapes/btBvhTriangleMeshShape.cpp b/thirdparty/bullet/BulletCollision/CollisionShapes/btBvhTriangleMeshShape.cpp
index d663b3d6d6..c66ce58e3e 100644
--- a/thirdparty/bullet/BulletCollision/CollisionShapes/btBvhTriangleMeshShape.cpp
+++ b/thirdparty/bullet/BulletCollision/CollisionShapes/btBvhTriangleMeshShape.cpp
@@ -124,12 +124,17 @@ void btBvhTriangleMeshShape::performRaycast(btTriangleCallback* callback, const
nodeSubPart);
unsigned int* gfxbase = (unsigned int*)(indexbase + nodeTriangleIndex * indexstride);
- btAssert(indicestype == PHY_INTEGER || indicestype == PHY_SHORT);
const btVector3& meshScaling = m_meshInterface->getScaling();
for (int j = 2; j >= 0; j--)
{
- int graphicsindex = indicestype == PHY_SHORT ? ((unsigned short*)gfxbase)[j] : gfxbase[j];
+ int graphicsindex;
+ switch (indicestype) {
+ case PHY_INTEGER: graphicsindex = gfxbase[j]; break;
+ case PHY_SHORT: graphicsindex = ((unsigned short*)gfxbase)[j]; break;
+ case PHY_UCHAR: graphicsindex = ((unsigned char*)gfxbase)[j]; break;
+ default: btAssert(0);
+ }
if (type == PHY_FLOAT)
{
@@ -193,12 +198,17 @@ void btBvhTriangleMeshShape::performConvexcast(btTriangleCallback* callback, con
nodeSubPart);
unsigned int* gfxbase = (unsigned int*)(indexbase + nodeTriangleIndex * indexstride);
- btAssert(indicestype == PHY_INTEGER || indicestype == PHY_SHORT);
const btVector3& meshScaling = m_meshInterface->getScaling();
for (int j = 2; j >= 0; j--)
{
- int graphicsindex = indicestype == PHY_SHORT ? ((unsigned short*)gfxbase)[j] : gfxbase[j];
+ int graphicsindex;
+ switch (indicestype) {
+ case PHY_INTEGER: graphicsindex = gfxbase[j]; break;
+ case PHY_SHORT: graphicsindex = ((unsigned short*)gfxbase)[j]; break;
+ case PHY_UCHAR: graphicsindex = ((unsigned char*)gfxbase)[j]; break;
+ default: btAssert(0);
+ }
if (type == PHY_FLOAT)
{
diff --git a/thirdparty/bullet/BulletCollision/CollisionShapes/btCollisionShape.h b/thirdparty/bullet/BulletCollision/CollisionShapes/btCollisionShape.h
index c80e105a4d..16f9e0c77a 100644
--- a/thirdparty/bullet/BulletCollision/CollisionShapes/btCollisionShape.h
+++ b/thirdparty/bullet/BulletCollision/CollisionShapes/btCollisionShape.h
@@ -30,11 +30,12 @@ protected:
int m_shapeType;
void* m_userPointer;
int m_userIndex;
+ int m_userIndex2;
public:
BT_DECLARE_ALIGNED_ALLOCATOR();
- btCollisionShape() : m_shapeType(INVALID_SHAPE_PROXYTYPE), m_userPointer(0), m_userIndex(-1)
+ btCollisionShape() : m_shapeType(INVALID_SHAPE_PROXYTYPE), m_userPointer(0), m_userIndex(-1), m_userIndex2(-1)
{
}
@@ -137,6 +138,16 @@ public:
return m_userIndex;
}
+ void setUserIndex2(int index)
+ {
+ m_userIndex2 = index;
+ }
+
+ int getUserIndex2() const
+ {
+ return m_userIndex2;
+ }
+
virtual int calculateSerializeBufferSize() const;
///fills the dataBuffer and returns the struct name (and 0 on failure)
diff --git a/thirdparty/bullet/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.cpp b/thirdparty/bullet/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.cpp
index 34e7926f17..cab6980b65 100644
--- a/thirdparty/bullet/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.cpp
+++ b/thirdparty/bullet/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.cpp
@@ -21,8 +21,7 @@ btHeightfieldTerrainShape::btHeightfieldTerrainShape(
int heightStickWidth, int heightStickLength, const void* heightfieldData,
btScalar heightScale, btScalar minHeight, btScalar maxHeight, int upAxis,
PHY_ScalarType hdt, bool flipQuadEdges)
- :m_userIndex2(-1),
- m_userValue3(0),
+ :m_userValue3(0),
m_triangleInfoMap(0)
{
initialize(heightStickWidth, heightStickLength, heightfieldData,
@@ -31,8 +30,7 @@ btHeightfieldTerrainShape::btHeightfieldTerrainShape(
}
btHeightfieldTerrainShape::btHeightfieldTerrainShape(int heightStickWidth, int heightStickLength, const void* heightfieldData, btScalar maxHeight, int upAxis, bool useFloatData, bool flipQuadEdges)
- :m_userIndex2(-1),
- m_userValue3(0),
+ : m_userValue3(0),
m_triangleInfoMap(0)
{
// legacy constructor: support only float or unsigned char,
diff --git a/thirdparty/bullet/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.h b/thirdparty/bullet/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.h
index 8dea98fc6b..2cf3c00721 100644
--- a/thirdparty/bullet/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.h
+++ b/thirdparty/bullet/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.h
@@ -114,7 +114,7 @@ protected:
int m_vboundsGridLength;
int m_vboundsChunkSize;
- int m_userIndex2;
+
btScalar m_userValue3;
struct btTriangleInfoMap* m_triangleInfoMap;
@@ -192,14 +192,6 @@ public:
virtual const char* getName() const { return "HEIGHTFIELD"; }
- void setUserIndex2(int index)
- {
- m_userIndex2 = index;
- }
- int getUserIndex2() const
- {
- return m_userIndex2;
- }
void setUserValue3(btScalar value)
{
m_userValue3 = value;
diff --git a/thirdparty/bullet/BulletCollision/CollisionShapes/btOptimizedBvh.cpp b/thirdparty/bullet/BulletCollision/CollisionShapes/btOptimizedBvh.cpp
index 687399e0a9..863ea6d6ac 100644
--- a/thirdparty/bullet/BulletCollision/CollisionShapes/btOptimizedBvh.cpp
+++ b/thirdparty/bullet/BulletCollision/CollisionShapes/btOptimizedBvh.cpp
@@ -286,7 +286,6 @@ void btOptimizedBvh::updateBvhNodes(btStridingMeshInterface* meshInterface, int
meshInterface->getLockedReadOnlyVertexIndexBase(&vertexbase, numverts, type, stride, &indexbase, indexstride, numfaces, indicestype, nodeSubPart);
curNodeSubPart = nodeSubPart;
- btAssert(indicestype == PHY_INTEGER || indicestype == PHY_SHORT);
}
//triangles->getLockedReadOnlyVertexIndexBase(vertexBase,numVerts,
@@ -294,7 +293,13 @@ void btOptimizedBvh::updateBvhNodes(btStridingMeshInterface* meshInterface, int
for (int j = 2; j >= 0; j--)
{
- int graphicsindex = indicestype == PHY_SHORT ? ((unsigned short*)gfxbase)[j] : gfxbase[j];
+ int graphicsindex;
+ switch (indicestype) {
+ case PHY_INTEGER: graphicsindex = gfxbase[j]; break;
+ case PHY_SHORT: graphicsindex = ((unsigned short*)gfxbase)[j]; break;
+ case PHY_UCHAR: graphicsindex = ((unsigned char*)gfxbase)[j]; break;
+ default: btAssert(0);
+ }
if (type == PHY_FLOAT)
{
float* graphicsbase = (float*)(vertexbase + graphicsindex * stride);
diff --git a/thirdparty/bullet/BulletCollision/CollisionShapes/btSdfCollisionShape.cpp b/thirdparty/bullet/BulletCollision/CollisionShapes/btSdfCollisionShape.cpp
index 4a95dbea4f..23c95ad3ff 100644
--- a/thirdparty/bullet/BulletCollision/CollisionShapes/btSdfCollisionShape.cpp
+++ b/thirdparty/bullet/BulletCollision/CollisionShapes/btSdfCollisionShape.cpp
@@ -2,8 +2,11 @@
#include "btMiniSDF.h"
#include "LinearMath/btAabbUtil2.h"
-struct btSdfCollisionShapeInternalData
+ATTRIBUTE_ALIGNED16(struct)
+btSdfCollisionShapeInternalData
{
+ BT_DECLARE_ALIGNED_ALLOCATOR();
+
btVector3 m_localScaling;
btScalar m_margin;
btMiniSDF m_sdf;
diff --git a/thirdparty/bullet/BulletCollision/Gimpact/btGImpactShape.h b/thirdparty/bullet/BulletCollision/Gimpact/btGImpactShape.h
index 5b85e87041..cc91079579 100644
--- a/thirdparty/bullet/BulletCollision/Gimpact/btGImpactShape.h
+++ b/thirdparty/bullet/BulletCollision/Gimpact/btGImpactShape.h
@@ -623,13 +623,21 @@ public:
i1 = s_indices[1];
i2 = s_indices[2];
}
- else
+ else if (indicestype == PHY_INTEGER)
{
unsigned int* i_indices = (unsigned int*)(indexbase + face_index * indexstride);
i0 = i_indices[0];
i1 = i_indices[1];
i2 = i_indices[2];
}
+ else
+ {
+ btAssert(indicestype == PHY_UCHAR);
+ unsigned char* i_indices = (unsigned char*)(indexbase + face_index * indexstride);
+ i0 = i_indices[0];
+ i1 = i_indices[1];
+ i2 = i_indices[2];
+ }
}
SIMD_FORCE_INLINE void get_vertex(unsigned int vertex_index, btVector3& vertex) const
diff --git a/thirdparty/bullet/BulletCollision/NarrowPhaseCollision/btGjkEpa2.cpp b/thirdparty/bullet/BulletCollision/NarrowPhaseCollision/btGjkEpa2.cpp
index 45d1817135..7d53f8624a 100644
--- a/thirdparty/bullet/BulletCollision/NarrowPhaseCollision/btGjkEpa2.cpp
+++ b/thirdparty/bullet/BulletCollision/NarrowPhaseCollision/btGjkEpa2.cpp
@@ -1049,7 +1049,8 @@ btScalar btGjkEpaSolver2::SignedDistance(const btVector3& position,
const btScalar length = delta.length();
results.normal = delta / length;
results.witnesses[0] += results.normal * margin;
- return (length - margin);
+ results.distance = length - margin;
+ return results.distance;
}
else
{
diff --git a/thirdparty/bullet/BulletDynamics/ConstraintSolver/btBatchedConstraints.cpp b/thirdparty/bullet/BulletDynamics/ConstraintSolver/btBatchedConstraints.cpp
index 27f76b8425..0f5ed1c2ce 100644
--- a/thirdparty/bullet/BulletDynamics/ConstraintSolver/btBatchedConstraints.cpp
+++ b/thirdparty/bullet/BulletDynamics/ConstraintSolver/btBatchedConstraints.cpp
@@ -852,7 +852,7 @@ static void setupSpatialGridBatchesMt(
memHelper.addChunk((void**)&constraintRowBatchIds, sizeof(int) * numConstraintRows);
size_t scratchSize = memHelper.getSizeToAllocate();
// if we need to reallocate
- if (scratchMemory->capacity() < scratchSize)
+ if (static_cast<size_t>(scratchMemory->capacity()) < scratchSize)
{
// allocate 6.25% extra to avoid repeated reallocs
scratchMemory->reserve(scratchSize + scratchSize / 16);
diff --git a/thirdparty/bullet/BulletDynamics/ConstraintSolver/btContactSolverInfo.h b/thirdparty/bullet/BulletDynamics/ConstraintSolver/btContactSolverInfo.h
index 4356c12abf..3316403a87 100644
--- a/thirdparty/bullet/BulletDynamics/ConstraintSolver/btContactSolverInfo.h
+++ b/thirdparty/bullet/BulletDynamics/ConstraintSolver/btContactSolverInfo.h
@@ -47,6 +47,8 @@ struct btContactSolverInfoData
btScalar m_erp; //error reduction for non-contact constraints
btScalar m_erp2; //error reduction for contact constraints
btScalar m_deformable_erp; //error reduction for deformable constraints
+ btScalar m_deformable_cfm; //constraint force mixing for deformable constraints
+ btScalar m_deformable_maxErrorReduction; // maxErrorReduction for deformable contact
btScalar m_globalCfm; //constraint force mixing for contacts and non-contacts
btScalar m_frictionERP; //error reduction for friction constraints
btScalar m_frictionCFM; //constraint force mixing for friction constraints
@@ -83,7 +85,9 @@ struct btContactSolverInfo : public btContactSolverInfoData
m_numIterations = 10;
m_erp = btScalar(0.2);
m_erp2 = btScalar(0.2);
- m_deformable_erp = btScalar(0.1);
+ m_deformable_erp = btScalar(0.06);
+ m_deformable_cfm = btScalar(0.01);
+ m_deformable_maxErrorReduction = btScalar(0.1);
m_globalCfm = btScalar(0.);
m_frictionERP = btScalar(0.2); //positional friction 'anchors' are disabled by default
m_frictionCFM = btScalar(0.);
diff --git a/thirdparty/bullet/BulletDynamics/Dynamics/btRigidBody.h b/thirdparty/bullet/BulletDynamics/Dynamics/btRigidBody.h
index 943d724cce..7442dd1e6a 100644
--- a/thirdparty/bullet/BulletDynamics/Dynamics/btRigidBody.h
+++ b/thirdparty/bullet/BulletDynamics/Dynamics/btRigidBody.h
@@ -356,12 +356,12 @@ public:
}
}
- btVector3 getPushVelocity()
+ btVector3 getPushVelocity() const
{
return m_pushVelocity;
}
- btVector3 getTurnVelocity()
+ btVector3 getTurnVelocity() const
{
return m_turnVelocity;
}
@@ -465,6 +465,12 @@ public:
//for kinematic objects, we could also use use:
// return (m_worldTransform(rel_pos) - m_interpolationWorldTransform(rel_pos)) / m_kinematicTimeStep;
}
+
+ btVector3 getPushVelocityInLocalPoint(const btVector3& rel_pos) const
+ {
+ //we also calculate lin/ang velocity for kinematic objects
+ return m_pushVelocity + m_turnVelocity.cross(rel_pos);
+ }
void translate(const btVector3& v)
{
diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.cpp b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.cpp
index a1d5bb9ca8..bec8c6530d 100644
--- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.cpp
+++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.cpp
@@ -344,6 +344,8 @@ void btMultiBody::finalizeMultiDof()
{
m_deltaV.resize(0);
m_deltaV.resize(6 + m_dofCount);
+ m_splitV.resize(0);
+ m_splitV.resize(6 + m_dofCount);
m_realBuf.resize(6 + m_dofCount + m_dofCount * m_dofCount + 6 + m_dofCount); //m_dofCount for joint-space vels + m_dofCount^2 for "D" matrices + delta-pos vector (6 base "vels" + joint "vels")
m_vectorBuf.resize(2 * m_dofCount); //two 3-vectors (i.e. one six-vector) for each system dof ("h" matrices)
m_matrixBuf.resize(m_links.size() + 1);
@@ -671,6 +673,30 @@ btScalar *btMultiBody::getJointTorqueMultiDof(int i)
return &m_links[i].m_jointTorque[0];
}
+bool btMultiBody::hasFixedBase() const
+{
+ return m_fixedBase || (getBaseCollider() && getBaseCollider()->isStaticObject());
+}
+
+bool btMultiBody::isBaseStaticOrKinematic() const
+{
+ return m_fixedBase || (getBaseCollider() && getBaseCollider()->isStaticOrKinematicObject());
+}
+
+bool btMultiBody::isBaseKinematic() const
+{
+ return getBaseCollider() && getBaseCollider()->isKinematicObject();
+}
+
+void btMultiBody::setBaseDynamicType(int dynamicType)
+{
+ if(getBaseCollider()) {
+ int oldFlags = getBaseCollider()->getCollisionFlags();
+ oldFlags &= ~(btCollisionObject::CF_STATIC_OBJECT | btCollisionObject::CF_KINEMATIC_OBJECT);
+ getBaseCollider()->setCollisionFlags(oldFlags | dynamicType);
+ }
+}
+
inline btMatrix3x3 outerProduct(const btVector3 &v0, const btVector3 &v1) //renamed it from vecMulVecTranspose (http://en.wikipedia.org/wiki/Outer_product); maybe it should be moved to btVector3 like dot and cross?
{
btVector3 row0 = btVector3(
@@ -796,7 +822,7 @@ void btMultiBody::computeAccelerationsArticulatedBodyAlgorithmMultiDof(btScalar
//create the vector of spatial velocity of the base by transforming global-coor linear and angular velocities into base-local coordinates
spatVel[0].setVector(rot_from_parent[0] * base_omega, rot_from_parent[0] * base_vel);
- if (m_fixedBase)
+ if (isBaseStaticOrKinematic())
{
zeroAccSpatFrc[0].setZero();
}
@@ -872,31 +898,53 @@ void btMultiBody::computeAccelerationsArticulatedBodyAlgorithmMultiDof(btScalar
// calculate zhat_i^A
//
- //external forces
- btVector3 linkAppliedForce = isConstraintPass ? m_links[i].m_appliedConstraintForce : m_links[i].m_appliedForce;
- btVector3 linkAppliedTorque = isConstraintPass ? m_links[i].m_appliedConstraintTorque : m_links[i].m_appliedTorque;
+ if (isLinkAndAllAncestorsKinematic(i))
+ {
+ zeroAccSpatFrc[i].setZero();
+ }
+ else{
+ //external forces
+ btVector3 linkAppliedForce = isConstraintPass ? m_links[i].m_appliedConstraintForce : m_links[i].m_appliedForce;
+ btVector3 linkAppliedTorque = isConstraintPass ? m_links[i].m_appliedConstraintTorque : m_links[i].m_appliedTorque;
- zeroAccSpatFrc[i + 1].setVector(-(rot_from_world[i + 1] * linkAppliedTorque), -(rot_from_world[i + 1] * linkAppliedForce));
+ zeroAccSpatFrc[i + 1].setVector(-(rot_from_world[i + 1] * linkAppliedTorque), -(rot_from_world[i + 1] * linkAppliedForce));
#if 0
- {
+ {
- b3Printf("stepVelocitiesMultiDof zeroAccSpatFrc[%d] linear:%f,%f,%f, angular:%f,%f,%f",
- i+1,
- zeroAccSpatFrc[i+1].m_topVec[0],
- zeroAccSpatFrc[i+1].m_topVec[1],
- zeroAccSpatFrc[i+1].m_topVec[2],
+ b3Printf("stepVelocitiesMultiDof zeroAccSpatFrc[%d] linear:%f,%f,%f, angular:%f,%f,%f",
+ i+1,
+ zeroAccSpatFrc[i+1].m_topVec[0],
+ zeroAccSpatFrc[i+1].m_topVec[1],
+ zeroAccSpatFrc[i+1].m_topVec[2],
- zeroAccSpatFrc[i+1].m_bottomVec[0],
- zeroAccSpatFrc[i+1].m_bottomVec[1],
- zeroAccSpatFrc[i+1].m_bottomVec[2]);
- }
+ zeroAccSpatFrc[i+1].m_bottomVec[0],
+ zeroAccSpatFrc[i+1].m_bottomVec[1],
+ zeroAccSpatFrc[i+1].m_bottomVec[2]);
+ }
#endif
- //
- //adding damping terms (only)
- btScalar linDampMult = 1., angDampMult = 1.;
- zeroAccSpatFrc[i + 1].addVector(angDampMult * m_links[i].m_inertiaLocal * spatVel[i + 1].getAngular() * (DAMPING_K1_ANGULAR + DAMPING_K2_ANGULAR * spatVel[i + 1].getAngular().safeNorm()),
- linDampMult * m_links[i].m_mass * spatVel[i + 1].getLinear() * (DAMPING_K1_LINEAR + DAMPING_K2_LINEAR * spatVel[i + 1].getLinear().safeNorm()));
+ //
+ //adding damping terms (only)
+ btScalar linDampMult = 1., angDampMult = 1.;
+ zeroAccSpatFrc[i + 1].addVector(angDampMult * m_links[i].m_inertiaLocal * spatVel[i + 1].getAngular() * (DAMPING_K1_ANGULAR + DAMPING_K2_ANGULAR * spatVel[i + 1].getAngular().safeNorm()),
+ linDampMult * m_links[i].m_mass * spatVel[i + 1].getLinear() * (DAMPING_K1_LINEAR + DAMPING_K2_LINEAR * spatVel[i + 1].getLinear().safeNorm()));
+ //p += vhat x Ihat vhat - done in a simpler way
+ if (m_useGyroTerm)
+ zeroAccSpatFrc[i + 1].addAngular(spatVel[i + 1].getAngular().cross(m_links[i].m_inertiaLocal * spatVel[i + 1].getAngular()));
+ //
+ zeroAccSpatFrc[i + 1].addLinear(m_links[i].m_mass * spatVel[i + 1].getAngular().cross(spatVel[i + 1].getLinear()));
+ //
+ //btVector3 temp = m_links[i].m_mass * spatVel[i+1].getAngular().cross(spatVel[i+1].getLinear());
+ ////clamp parent's omega
+ //btScalar parOmegaMod = temp.length();
+ //btScalar parOmegaModMax = 1000;
+ //if(parOmegaMod > parOmegaModMax)
+ // temp *= parOmegaModMax / parOmegaMod;
+ //zeroAccSpatFrc[i+1].addLinear(temp);
+ //printf("|zeroAccSpatFrc[%d]| = %.4f\n", i+1, temp.length());
+ //temp = spatCoriolisAcc[i].getLinear();
+ //printf("|spatCoriolisAcc[%d]| = %.4f\n", i+1, temp.length());
+ }
// calculate Ihat_i^A
//init the spatial AB inertia (it has the simple form thanks to choosing local body frames origins at their COMs)
@@ -909,22 +957,6 @@ void btMultiBody::computeAccelerationsArticulatedBodyAlgorithmMultiDof(btScalar
btMatrix3x3(m_links[i].m_inertiaLocal[0], 0, 0,
0, m_links[i].m_inertiaLocal[1], 0,
0, 0, m_links[i].m_inertiaLocal[2]));
- //
- //p += vhat x Ihat vhat - done in a simpler way
- if (m_useGyroTerm)
- zeroAccSpatFrc[i + 1].addAngular(spatVel[i + 1].getAngular().cross(m_links[i].m_inertiaLocal * spatVel[i + 1].getAngular()));
- //
- zeroAccSpatFrc[i + 1].addLinear(m_links[i].m_mass * spatVel[i + 1].getAngular().cross(spatVel[i + 1].getLinear()));
- //btVector3 temp = m_links[i].m_mass * spatVel[i+1].getAngular().cross(spatVel[i+1].getLinear());
- ////clamp parent's omega
- //btScalar parOmegaMod = temp.length();
- //btScalar parOmegaModMax = 1000;
- //if(parOmegaMod > parOmegaModMax)
- // temp *= parOmegaModMax / parOmegaMod;
- //zeroAccSpatFrc[i+1].addLinear(temp);
- //printf("|zeroAccSpatFrc[%d]| = %.4f\n", i+1, temp.length());
- //temp = spatCoriolisAcc[i].getLinear();
- //printf("|spatCoriolisAcc[%d]| = %.4f\n", i+1, temp.length());
//printf("w[%d] = [%.4f %.4f %.4f]\n", i, vel_top_angular[i+1].x(), vel_top_angular[i+1].y(), vel_top_angular[i+1].z());
//printf("v[%d] = [%.4f %.4f %.4f]\n", i, vel_bottom_linear[i+1].x(), vel_bottom_linear[i+1].y(), vel_bottom_linear[i+1].z());
@@ -935,6 +967,8 @@ void btMultiBody::computeAccelerationsArticulatedBodyAlgorithmMultiDof(btScalar
// (part of TreeForwardDynamics in Mirtich.)
for (int i = num_links - 1; i >= 0; --i)
{
+ if(isLinkAndAllAncestorsKinematic(i))
+ continue;
const int parent = m_links[i].m_parent;
fromParent.m_rotMat = rot_from_parent[i + 1];
fromParent.m_trnVec = m_links[i].m_cachedRVector;
@@ -1047,7 +1081,7 @@ void btMultiBody::computeAccelerationsArticulatedBodyAlgorithmMultiDof(btScalar
// Second 'upward' loop
// (part of TreeForwardDynamics in Mirtich)
- if (m_fixedBase)
+ if (isBaseStaticOrKinematic())
{
spatAcc[0].setZero();
}
@@ -1081,21 +1115,23 @@ void btMultiBody::computeAccelerationsArticulatedBodyAlgorithmMultiDof(btScalar
fromParent.transform(spatAcc[parent + 1], spatAcc[i + 1]);
- for (int dof = 0; dof < m_links[i].m_dofCount; ++dof)
+ if(!isLinkAndAllAncestorsKinematic(i))
{
- const btSpatialForceVector &hDof = h[m_links[i].m_dofOffset + dof];
- //
- Y_minus_hT_a[dof] = Y[m_links[i].m_dofOffset + dof] - spatAcc[i + 1].dot(hDof);
- }
-
- btScalar *invDi = &invD[m_links[i].m_dofOffset * m_links[i].m_dofOffset];
- //D^{-1} * (Y - h^{T}*apar)
- mulMatrix(invDi, Y_minus_hT_a, m_links[i].m_dofCount, m_links[i].m_dofCount, m_links[i].m_dofCount, 1, &joint_accel[m_links[i].m_dofOffset]);
+ for (int dof = 0; dof < m_links[i].m_dofCount; ++dof)
+ {
+ const btSpatialForceVector &hDof = h[m_links[i].m_dofOffset + dof];
+ //
+ Y_minus_hT_a[dof] = Y[m_links[i].m_dofOffset + dof] - spatAcc[i + 1].dot(hDof);
+ }
+ btScalar *invDi = &invD[m_links[i].m_dofOffset * m_links[i].m_dofOffset];
+ //D^{-1} * (Y - h^{T}*apar)
+ mulMatrix(invDi, Y_minus_hT_a, m_links[i].m_dofCount, m_links[i].m_dofCount, m_links[i].m_dofCount, 1, &joint_accel[m_links[i].m_dofOffset]);
- spatAcc[i + 1] += spatCoriolisAcc[i];
+ spatAcc[i + 1] += spatCoriolisAcc[i];
- for (int dof = 0; dof < m_links[i].m_dofCount; ++dof)
- spatAcc[i + 1] += m_links[i].m_axes[dof] * joint_accel[m_links[i].m_dofOffset + dof];
+ for (int dof = 0; dof < m_links[i].m_dofCount; ++dof)
+ spatAcc[i + 1] += m_links[i].m_axes[dof] * joint_accel[m_links[i].m_dofOffset + dof];
+ }
if (m_links[i].m_jointFeedback)
{
@@ -1432,7 +1468,7 @@ void btMultiBody::calcAccelerationDeltasMultiDof(const btScalar *force, btScalar
// Fill in zero_acc
// -- set to force/torque on the base, zero otherwise
- if (m_fixedBase)
+ if (isBaseStaticOrKinematic())
{
zeroAccSpatFrc[0].setZero();
}
@@ -1451,6 +1487,8 @@ void btMultiBody::calcAccelerationDeltasMultiDof(const btScalar *force, btScalar
// (part of TreeForwardDynamics in Mirtich.)
for (int i = num_links - 1; i >= 0; --i)
{
+ if(isLinkAndAllAncestorsKinematic(i))
+ continue;
const int parent = m_links[i].m_parent;
fromParent.m_rotMat = rot_from_parent[i + 1];
fromParent.m_trnVec = m_links[i].m_cachedRVector;
@@ -1494,7 +1532,7 @@ void btMultiBody::calcAccelerationDeltasMultiDof(const btScalar *force, btScalar
// Second 'upward' loop
// (part of TreeForwardDynamics in Mirtich)
- if (m_fixedBase)
+ if (isBaseStaticOrKinematic())
{
spatAcc[0].setZero();
}
@@ -1507,6 +1545,8 @@ void btMultiBody::calcAccelerationDeltasMultiDof(const btScalar *force, btScalar
// now do the loop over the m_links
for (int i = 0; i < num_links; ++i)
{
+ if(isLinkAndAllAncestorsKinematic(i))
+ continue;
const int parent = m_links[i].m_parent;
fromParent.m_rotMat = rot_from_parent[i + 1];
fromParent.m_trnVec = m_links[i].m_cachedRVector;
@@ -1550,23 +1590,26 @@ void btMultiBody::calcAccelerationDeltasMultiDof(const btScalar *force, btScalar
void btMultiBody::predictPositionsMultiDof(btScalar dt)
{
int num_links = getNumLinks();
- // step position by adding dt * velocity
- //btVector3 v = getBaseVel();
- //m_basePos += dt * v;
- //
- btScalar *pBasePos;
- btScalar *pBaseVel = &m_realBuf[3]; //note: the !pqd case assumes m_realBuf holds with base velocity at 3,4,5 (should be wrapped for safety)
-
- // reset to current position
- for (int i = 0; i < 3; ++i)
- {
- m_basePos_interpolate[i] = m_basePos[i];
- }
- pBasePos = m_basePos_interpolate;
+ if(!isBaseKinematic())
+ {
+ // step position by adding dt * velocity
+ //btVector3 v = getBaseVel();
+ //m_basePos += dt * v;
+ //
+ btScalar *pBasePos;
+ btScalar *pBaseVel = &m_realBuf[3]; //note: the !pqd case assumes m_realBuf holds with base velocity at 3,4,5 (should be wrapped for safety)
- pBasePos[0] += dt * pBaseVel[0];
- pBasePos[1] += dt * pBaseVel[1];
- pBasePos[2] += dt * pBaseVel[2];
+ // reset to current position
+ for (int i = 0; i < 3; ++i)
+ {
+ m_basePos_interpolate[i] = m_basePos[i];
+ }
+ pBasePos = m_basePos_interpolate;
+
+ pBasePos[0] += dt * pBaseVel[0];
+ pBasePos[1] += dt * pBaseVel[1];
+ pBasePos[2] += dt * pBaseVel[2];
+ }
///////////////////////////////
//local functor for quaternion integration (to avoid error prone redundancy)
@@ -1617,26 +1660,29 @@ void btMultiBody::predictPositionsMultiDof(btScalar dt)
//pQuatUpdateFun(getBaseOmega(), m_baseQuat, true, dt);
//
- btScalar *pBaseQuat;
-
- // reset to current orientation
- for (int i = 0; i < 4; ++i)
- {
- m_baseQuat_interpolate[i] = m_baseQuat[i];
- }
- pBaseQuat = m_baseQuat_interpolate;
+ if(!isBaseKinematic())
+ {
+ btScalar *pBaseQuat;
- btScalar *pBaseOmega = &m_realBuf[0]; //note: the !pqd case assumes m_realBuf starts with base omega (should be wrapped for safety)
- //
- btQuaternion baseQuat;
- baseQuat.setValue(pBaseQuat[0], pBaseQuat[1], pBaseQuat[2], pBaseQuat[3]);
- btVector3 baseOmega;
- baseOmega.setValue(pBaseOmega[0], pBaseOmega[1], pBaseOmega[2]);
- pQuatUpdateFun(baseOmega, baseQuat, true, dt);
- pBaseQuat[0] = baseQuat.x();
- pBaseQuat[1] = baseQuat.y();
- pBaseQuat[2] = baseQuat.z();
- pBaseQuat[3] = baseQuat.w();
+ // reset to current orientation
+ for (int i = 0; i < 4; ++i)
+ {
+ m_baseQuat_interpolate[i] = m_baseQuat[i];
+ }
+ pBaseQuat = m_baseQuat_interpolate;
+
+ btScalar *pBaseOmega = &m_realBuf[0]; //note: the !pqd case assumes m_realBuf starts with base omega (should be wrapped for safety)
+ //
+ btQuaternion baseQuat;
+ baseQuat.setValue(pBaseQuat[0], pBaseQuat[1], pBaseQuat[2], pBaseQuat[3]);
+ btVector3 baseOmega;
+ baseOmega.setValue(pBaseOmega[0], pBaseOmega[1], pBaseOmega[2]);
+ pQuatUpdateFun(baseOmega, baseQuat, true, dt);
+ pBaseQuat[0] = baseQuat.x();
+ pBaseQuat[1] = baseQuat.y();
+ pBaseQuat[2] = baseQuat.z();
+ pBaseQuat[3] = baseQuat.w();
+ }
// Finally we can update m_jointPos for each of the m_links
for (int i = 0; i < num_links; ++i)
@@ -1644,55 +1690,88 @@ void btMultiBody::predictPositionsMultiDof(btScalar dt)
btScalar *pJointPos;
pJointPos = &m_links[i].m_jointPos_interpolate[0];
- btScalar *pJointVel = getJointVelMultiDof(i);
-
- switch (m_links[i].m_jointType)
- {
- case btMultibodyLink::ePrismatic:
- case btMultibodyLink::eRevolute:
- {
- //reset to current pos
- pJointPos[0] = m_links[i].m_jointPos[0];
- btScalar jointVel = pJointVel[0];
- pJointPos[0] += dt * jointVel;
- break;
- }
- case btMultibodyLink::eSpherical:
- {
- //reset to current pos
-
- for (int j = 0; j < 4; ++j)
+ if (m_links[i].m_collider && m_links[i].m_collider->isStaticOrKinematic())
+ {
+ switch (m_links[i].m_jointType)
+ {
+ case btMultibodyLink::ePrismatic:
+ case btMultibodyLink::eRevolute:
{
- pJointPos[j] = m_links[i].m_jointPos[j];
+ pJointPos[0] = m_links[i].m_jointPos[0];
+ break;
}
-
- btVector3 jointVel;
- jointVel.setValue(pJointVel[0], pJointVel[1], pJointVel[2]);
- btQuaternion jointOri;
- jointOri.setValue(pJointPos[0], pJointPos[1], pJointPos[2], pJointPos[3]);
- pQuatUpdateFun(jointVel, jointOri, false, dt);
- pJointPos[0] = jointOri.x();
- pJointPos[1] = jointOri.y();
- pJointPos[2] = jointOri.z();
- pJointPos[3] = jointOri.w();
- break;
- }
- case btMultibodyLink::ePlanar:
- {
- for (int j = 0; j < 3; ++j)
+ case btMultibodyLink::eSpherical:
{
- pJointPos[j] = m_links[i].m_jointPos[j];
+ for (int j = 0; j < 4; ++j)
+ {
+ pJointPos[j] = m_links[i].m_jointPos[j];
+ }
+ break;
}
- pJointPos[0] += dt * getJointVelMultiDof(i)[0];
-
- btVector3 q0_coors_qd1qd2 = getJointVelMultiDof(i)[1] * m_links[i].getAxisBottom(1) + getJointVelMultiDof(i)[2] * m_links[i].getAxisBottom(2);
- btVector3 no_q0_coors_qd1qd2 = quatRotate(btQuaternion(m_links[i].getAxisTop(0), pJointPos[0]), q0_coors_qd1qd2);
- pJointPos[1] += m_links[i].getAxisBottom(1).dot(no_q0_coors_qd1qd2) * dt;
- pJointPos[2] += m_links[i].getAxisBottom(2).dot(no_q0_coors_qd1qd2) * dt;
- break;
+ case btMultibodyLink::ePlanar:
+ {
+ for (int j = 0; j < 3; ++j)
+ {
+ pJointPos[j] = m_links[i].m_jointPos[j];
+ }
+ break;
+ }
+ default:
+ break;
}
- default:
+ }
+ else
+ {
+ btScalar *pJointVel = getJointVelMultiDof(i);
+
+ switch (m_links[i].m_jointType)
{
+ case btMultibodyLink::ePrismatic:
+ case btMultibodyLink::eRevolute:
+ {
+ //reset to current pos
+ pJointPos[0] = m_links[i].m_jointPos[0];
+ btScalar jointVel = pJointVel[0];
+ pJointPos[0] += dt * jointVel;
+ break;
+ }
+ case btMultibodyLink::eSpherical:
+ {
+ //reset to current pos
+
+ for (int j = 0; j < 4; ++j)
+ {
+ pJointPos[j] = m_links[i].m_jointPos[j];
+ }
+
+ btVector3 jointVel;
+ jointVel.setValue(pJointVel[0], pJointVel[1], pJointVel[2]);
+ btQuaternion jointOri;
+ jointOri.setValue(pJointPos[0], pJointPos[1], pJointPos[2], pJointPos[3]);
+ pQuatUpdateFun(jointVel, jointOri, false, dt);
+ pJointPos[0] = jointOri.x();
+ pJointPos[1] = jointOri.y();
+ pJointPos[2] = jointOri.z();
+ pJointPos[3] = jointOri.w();
+ break;
+ }
+ case btMultibodyLink::ePlanar:
+ {
+ for (int j = 0; j < 3; ++j)
+ {
+ pJointPos[j] = m_links[i].m_jointPos[j];
+ }
+ pJointPos[0] += dt * getJointVelMultiDof(i)[0];
+
+ btVector3 q0_coors_qd1qd2 = getJointVelMultiDof(i)[1] * m_links[i].getAxisBottom(1) + getJointVelMultiDof(i)[2] * m_links[i].getAxisBottom(2);
+ btVector3 no_q0_coors_qd1qd2 = quatRotate(btQuaternion(m_links[i].getAxisTop(0), pJointPos[0]), q0_coors_qd1qd2);
+ pJointPos[1] += m_links[i].getAxisBottom(1).dot(no_q0_coors_qd1qd2) * dt;
+ pJointPos[2] += m_links[i].getAxisBottom(2).dot(no_q0_coors_qd1qd2) * dt;
+ break;
+ }
+ default:
+ {
+ }
}
}
@@ -1703,16 +1782,19 @@ void btMultiBody::predictPositionsMultiDof(btScalar dt)
void btMultiBody::stepPositionsMultiDof(btScalar dt, btScalar *pq, btScalar *pqd)
{
int num_links = getNumLinks();
- // step position by adding dt * velocity
- //btVector3 v = getBaseVel();
- //m_basePos += dt * v;
- //
- btScalar *pBasePos = (pq ? &pq[4] : m_basePos);
- btScalar *pBaseVel = (pqd ? &pqd[3] : &m_realBuf[3]); //note: the !pqd case assumes m_realBuf holds with base velocity at 3,4,5 (should be wrapped for safety)
-
- pBasePos[0] += dt * pBaseVel[0];
- pBasePos[1] += dt * pBaseVel[1];
- pBasePos[2] += dt * pBaseVel[2];
+ if(!isBaseKinematic())
+ {
+ // step position by adding dt * velocity
+ //btVector3 v = getBaseVel();
+ //m_basePos += dt * v;
+ //
+ btScalar *pBasePos = (pq ? &pq[4] : m_basePos);
+ btScalar *pBaseVel = (pqd ? &pqd[3] : &m_realBuf[3]); //note: the !pqd case assumes m_realBuf holds with base velocity at 3,4,5 (should be wrapped for safety)
+
+ pBasePos[0] += dt * pBaseVel[0];
+ pBasePos[1] += dt * pBaseVel[1];
+ pBasePos[2] += dt * pBaseVel[2];
+ }
///////////////////////////////
//local functor for quaternion integration (to avoid error prone redundancy)
@@ -1763,22 +1845,25 @@ void btMultiBody::stepPositionsMultiDof(btScalar dt, btScalar *pq, btScalar *pqd
//pQuatUpdateFun(getBaseOmega(), m_baseQuat, true, dt);
//
- btScalar *pBaseQuat = pq ? pq : m_baseQuat;
- btScalar *pBaseOmega = pqd ? pqd : &m_realBuf[0]; //note: the !pqd case assumes m_realBuf starts with base omega (should be wrapped for safety)
- //
- btQuaternion baseQuat;
- baseQuat.setValue(pBaseQuat[0], pBaseQuat[1], pBaseQuat[2], pBaseQuat[3]);
- btVector3 baseOmega;
- baseOmega.setValue(pBaseOmega[0], pBaseOmega[1], pBaseOmega[2]);
- pQuatUpdateFun(baseOmega, baseQuat, true, dt);
- pBaseQuat[0] = baseQuat.x();
- pBaseQuat[1] = baseQuat.y();
- pBaseQuat[2] = baseQuat.z();
- pBaseQuat[3] = baseQuat.w();
-
- //printf("pBaseOmega = %.4f %.4f %.4f\n", pBaseOmega->x(), pBaseOmega->y(), pBaseOmega->z());
- //printf("pBaseVel = %.4f %.4f %.4f\n", pBaseVel->x(), pBaseVel->y(), pBaseVel->z());
- //printf("baseQuat = %.4f %.4f %.4f %.4f\n", pBaseQuat->x(), pBaseQuat->y(), pBaseQuat->z(), pBaseQuat->w());
+ if(!isBaseKinematic())
+ {
+ btScalar *pBaseQuat = pq ? pq : m_baseQuat;
+ btScalar *pBaseOmega = pqd ? pqd : &m_realBuf[0]; //note: the !pqd case assumes m_realBuf starts with base omega (should be wrapped for safety)
+ //
+ btQuaternion baseQuat;
+ baseQuat.setValue(pBaseQuat[0], pBaseQuat[1], pBaseQuat[2], pBaseQuat[3]);
+ btVector3 baseOmega;
+ baseOmega.setValue(pBaseOmega[0], pBaseOmega[1], pBaseOmega[2]);
+ pQuatUpdateFun(baseOmega, baseQuat, true, dt);
+ pBaseQuat[0] = baseQuat.x();
+ pBaseQuat[1] = baseQuat.y();
+ pBaseQuat[2] = baseQuat.z();
+ pBaseQuat[3] = baseQuat.w();
+
+ //printf("pBaseOmega = %.4f %.4f %.4f\n", pBaseOmega->x(), pBaseOmega->y(), pBaseOmega->z());
+ //printf("pBaseVel = %.4f %.4f %.4f\n", pBaseVel->x(), pBaseVel->y(), pBaseVel->z());
+ //printf("baseQuat = %.4f %.4f %.4f %.4f\n", pBaseQuat->x(), pBaseQuat->y(), pBaseQuat->z(), pBaseQuat->w());
+ }
if (pq)
pq += 7;
@@ -1788,48 +1873,51 @@ void btMultiBody::stepPositionsMultiDof(btScalar dt, btScalar *pq, btScalar *pqd
// Finally we can update m_jointPos for each of the m_links
for (int i = 0; i < num_links; ++i)
{
- btScalar *pJointPos;
- pJointPos= (pq ? pq : &m_links[i].m_jointPos[0]);
-
- btScalar *pJointVel = (pqd ? pqd : getJointVelMultiDof(i));
-
- switch (m_links[i].m_jointType)
+ if (!(m_links[i].m_collider && m_links[i].m_collider->isStaticOrKinematic()))
{
- case btMultibodyLink::ePrismatic:
- case btMultibodyLink::eRevolute:
- {
- //reset to current pos
- btScalar jointVel = pJointVel[0];
- pJointPos[0] += dt * jointVel;
- break;
- }
- case btMultibodyLink::eSpherical:
- {
- //reset to current pos
- btVector3 jointVel;
- jointVel.setValue(pJointVel[0], pJointVel[1], pJointVel[2]);
- btQuaternion jointOri;
- jointOri.setValue(pJointPos[0], pJointPos[1], pJointPos[2], pJointPos[3]);
- pQuatUpdateFun(jointVel, jointOri, false, dt);
- pJointPos[0] = jointOri.x();
- pJointPos[1] = jointOri.y();
- pJointPos[2] = jointOri.z();
- pJointPos[3] = jointOri.w();
- break;
- }
- case btMultibodyLink::ePlanar:
+ btScalar *pJointPos;
+ pJointPos= (pq ? pq : &m_links[i].m_jointPos[0]);
+
+ btScalar *pJointVel = (pqd ? pqd : getJointVelMultiDof(i));
+
+ switch (m_links[i].m_jointType)
{
- pJointPos[0] += dt * getJointVelMultiDof(i)[0];
+ case btMultibodyLink::ePrismatic:
+ case btMultibodyLink::eRevolute:
+ {
+ //reset to current pos
+ btScalar jointVel = pJointVel[0];
+ pJointPos[0] += dt * jointVel;
+ break;
+ }
+ case btMultibodyLink::eSpherical:
+ {
+ //reset to current pos
+ btVector3 jointVel;
+ jointVel.setValue(pJointVel[0], pJointVel[1], pJointVel[2]);
+ btQuaternion jointOri;
+ jointOri.setValue(pJointPos[0], pJointPos[1], pJointPos[2], pJointPos[3]);
+ pQuatUpdateFun(jointVel, jointOri, false, dt);
+ pJointPos[0] = jointOri.x();
+ pJointPos[1] = jointOri.y();
+ pJointPos[2] = jointOri.z();
+ pJointPos[3] = jointOri.w();
+ break;
+ }
+ case btMultibodyLink::ePlanar:
+ {
+ pJointPos[0] += dt * getJointVelMultiDof(i)[0];
- btVector3 q0_coors_qd1qd2 = getJointVelMultiDof(i)[1] * m_links[i].getAxisBottom(1) + getJointVelMultiDof(i)[2] * m_links[i].getAxisBottom(2);
- btVector3 no_q0_coors_qd1qd2 = quatRotate(btQuaternion(m_links[i].getAxisTop(0), pJointPos[0]), q0_coors_qd1qd2);
- pJointPos[1] += m_links[i].getAxisBottom(1).dot(no_q0_coors_qd1qd2) * dt;
- pJointPos[2] += m_links[i].getAxisBottom(2).dot(no_q0_coors_qd1qd2) * dt;
+ btVector3 q0_coors_qd1qd2 = getJointVelMultiDof(i)[1] * m_links[i].getAxisBottom(1) + getJointVelMultiDof(i)[2] * m_links[i].getAxisBottom(2);
+ btVector3 no_q0_coors_qd1qd2 = quatRotate(btQuaternion(m_links[i].getAxisTop(0), pJointPos[0]), q0_coors_qd1qd2);
+ pJointPos[1] += m_links[i].getAxisBottom(1).dot(no_q0_coors_qd1qd2) * dt;
+ pJointPos[2] += m_links[i].getAxisBottom(2).dot(no_q0_coors_qd1qd2) * dt;
- break;
- }
- default:
- {
+ break;
+ }
+ default:
+ {
+ }
}
}
@@ -2135,8 +2223,15 @@ void btMultiBody::updateCollisionObjectInterpolationWorldTransforms(btAlignedObj
world_to_local.resize(getNumLinks() + 1);
local_origin.resize(getNumLinks() + 1);
- world_to_local[0] = getInterpolateWorldToBaseRot();
- local_origin[0] = getInterpolateBasePos();
+ if(isBaseKinematic()){
+ world_to_local[0] = getWorldToBaseRot();
+ local_origin[0] = getBasePos();
+ }
+ else
+ {
+ world_to_local[0] = getInterpolateWorldToBaseRot();
+ local_origin[0] = getInterpolateBasePos();
+ }
if (getBaseCollider())
{
@@ -2282,3 +2377,81 @@ const char *btMultiBody::serialize(void *dataBuffer, class btSerializer *seriali
return btMultiBodyDataName;
}
+
+void btMultiBody::saveKinematicState(btScalar timeStep)
+{
+ //todo: clamp to some (user definable) safe minimum timestep, to limit maximum angular/linear velocities
+ if (timeStep != btScalar(0.))
+ {
+ btVector3 linearVelocity, angularVelocity;
+ btTransformUtil::calculateVelocity(getInterpolateBaseWorldTransform(), getBaseWorldTransform(), timeStep, linearVelocity, angularVelocity);
+ setBaseVel(linearVelocity);
+ setBaseOmega(angularVelocity);
+ setInterpolateBaseWorldTransform(getBaseWorldTransform());
+ }
+}
+
+void btMultiBody::setLinkDynamicType(const int i, int type)
+{
+ if (i == -1)
+ {
+ setBaseDynamicType(type);
+ }
+ else if (i >= 0 && i < getNumLinks())
+ {
+ if (m_links[i].m_collider)
+ {
+ m_links[i].m_collider->setDynamicType(type);
+ }
+ }
+}
+
+bool btMultiBody::isLinkStaticOrKinematic(const int i) const
+{
+ if (i == -1)
+ {
+ return isBaseStaticOrKinematic();
+ }
+ else
+ {
+ if (m_links[i].m_collider)
+ return m_links[i].m_collider->isStaticOrKinematic();
+ }
+ return false;
+}
+
+bool btMultiBody::isLinkKinematic(const int i) const
+{
+ if (i == -1)
+ {
+ return isBaseKinematic();
+ }
+ else
+ {
+ if (m_links[i].m_collider)
+ return m_links[i].m_collider->isKinematic();
+ }
+ return false;
+}
+
+bool btMultiBody::isLinkAndAllAncestorsStaticOrKinematic(const int i) const
+{
+ int link = i;
+ while (link != -1) {
+ if (!isLinkStaticOrKinematic(link))
+ return false;
+ link = m_links[link].m_parent;
+ }
+ return isBaseStaticOrKinematic();
+}
+
+bool btMultiBody::isLinkAndAllAncestorsKinematic(const int i) const
+{
+ int link = i;
+ while (link != -1) {
+ if (!isLinkKinematic(link))
+ return false;
+ link = m_links[link].m_parent;
+ }
+ return isBaseKinematic();
+}
diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.h b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.h
index be795633fd..25112a6805 100644
--- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.h
+++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.h
@@ -210,7 +210,13 @@ public:
void setBasePos(const btVector3 &pos)
{
m_basePos = pos;
- m_basePos_interpolate = pos;
+ if(!isBaseKinematic())
+ m_basePos_interpolate = pos;
+ }
+
+ void setInterpolateBasePos(const btVector3 &pos)
+ {
+ m_basePos_interpolate = pos;
}
void setBaseWorldTransform(const btTransform &tr)
@@ -227,17 +233,39 @@ public:
return tr;
}
+ void setInterpolateBaseWorldTransform(const btTransform &tr)
+ {
+ setInterpolateBasePos(tr.getOrigin());
+ setInterpolateWorldToBaseRot(tr.getRotation().inverse());
+ }
+
+ btTransform getInterpolateBaseWorldTransform() const
+ {
+ btTransform tr;
+ tr.setOrigin(getInterpolateBasePos());
+ tr.setRotation(getInterpolateWorldToBaseRot().inverse());
+ return tr;
+ }
+
void setBaseVel(const btVector3 &vel)
{
m_realBuf[3] = vel[0];
m_realBuf[4] = vel[1];
m_realBuf[5] = vel[2];
}
+
void setWorldToBaseRot(const btQuaternion &rot)
{
m_baseQuat = rot; //m_baseQuat asumed to ba alias!?
- m_baseQuat_interpolate = rot;
+ if(!isBaseKinematic())
+ m_baseQuat_interpolate = rot;
+ }
+
+ void setInterpolateWorldToBaseRot(const btQuaternion &rot)
+ {
+ m_baseQuat_interpolate = rot;
}
+
void setBaseOmega(const btVector3 &omega)
{
m_realBuf[0] = omega[0];
@@ -245,6 +273,8 @@ public:
m_realBuf[2] = omega[2];
}
+ void saveKinematicState(btScalar timeStep);
+
//
// get/set pos/vel for child m_links (i = 0 to num_links-1)
//
@@ -278,6 +308,11 @@ public:
{
return &m_deltaV[0];
}
+
+ const btScalar *getSplitVelocityVector() const
+ {
+ return &m_splitV[0];
+ }
/* btScalar * getVelocityVector()
{
return &real_buf[0];
@@ -397,6 +432,26 @@ public:
m_deltaV[dof] += delta_vee[dof] * multiplier;
}
}
+ void applyDeltaSplitVeeMultiDof(const btScalar *delta_vee, btScalar multiplier)
+ {
+ for (int dof = 0; dof < 6 + getNumDofs(); ++dof)
+ {
+ m_splitV[dof] += delta_vee[dof] * multiplier;
+ }
+ }
+ void addSplitV()
+ {
+ applyDeltaVeeMultiDof(&m_splitV[0], 1);
+ }
+ void substractSplitV()
+ {
+ applyDeltaVeeMultiDof(&m_splitV[0], -1);
+
+ for (int dof = 0; dof < 6 + getNumDofs(); ++dof)
+ {
+ m_splitV[dof] = 0.f;
+ }
+ }
void processDeltaVeeMultiDof2()
{
applyDeltaVeeMultiDof(&m_deltaV[0], 1);
@@ -495,14 +550,22 @@ public:
void goToSleep();
void checkMotionAndSleepIfRequired(btScalar timestep);
- bool hasFixedBase() const
- {
- return m_fixedBase;
- }
+ bool hasFixedBase() const;
+
+ bool isBaseKinematic() const;
+
+ bool isBaseStaticOrKinematic() const;
+
+ // set the dynamic type in the base's collision flags.
+ void setBaseDynamicType(int dynamicType);
void setFixedBase(bool fixedBase)
{
m_fixedBase = fixedBase;
+ if(m_fixedBase)
+ setBaseDynamicType(btCollisionObject::CF_STATIC_OBJECT);
+ else
+ setBaseDynamicType(btCollisionObject::CF_DYNAMIC_OBJECT);
}
int getCompanionId() const
@@ -653,7 +716,15 @@ public:
btVector3 &top_out, // top part of output vector
btVector3 &bottom_out); // bottom part of output vector
+ void setLinkDynamicType(const int i, int type);
+
+ bool isLinkStaticOrKinematic(const int i) const;
+
+ bool isLinkKinematic(const int i) const;
+
+ bool isLinkAndAllAncestorsStaticOrKinematic(const int i) const;
+ bool isLinkAndAllAncestorsKinematic(const int i) const;
private:
btMultiBody(const btMultiBody &); // not implemented
@@ -711,6 +782,7 @@ private:
// offset size array
// 0 num_links+1 rot_from_parent
//
+ btAlignedObjectArray<btScalar> m_splitV;
btAlignedObjectArray<btScalar> m_deltaV;
btAlignedObjectArray<btScalar> m_realBuf;
btAlignedObjectArray<btVector3> m_vectorBuf;
diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyConstraint.cpp b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyConstraint.cpp
index d7ed05ce57..1ba5861145 100644
--- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyConstraint.cpp
+++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyConstraint.cpp
@@ -2,11 +2,12 @@
#include "BulletDynamics/Dynamics/btRigidBody.h"
#include "btMultiBodyPoint2Point.h" //for testing (BTMBP2PCONSTRAINT_BLOCK_ANGULAR_MOTION_TEST macro)
-btMultiBodyConstraint::btMultiBodyConstraint(btMultiBody* bodyA, btMultiBody* bodyB, int linkA, int linkB, int numRows, bool isUnilateral)
+btMultiBodyConstraint::btMultiBodyConstraint(btMultiBody* bodyA, btMultiBody* bodyB, int linkA, int linkB, int numRows, bool isUnilateral, int type)
: m_bodyA(bodyA),
m_bodyB(bodyB),
m_linkA(linkA),
m_linkB(linkB),
+ m_type(type),
m_numRows(numRows),
m_jacSizeA(0),
m_jacSizeBoth(0),
diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyConstraint.h b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyConstraint.h
index 5c15f3e851..4a6007ee3e 100644
--- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyConstraint.h
+++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyConstraint.h
@@ -20,6 +20,21 @@ subject to the following restrictions:
#include "LinearMath/btAlignedObjectArray.h"
#include "btMultiBody.h"
+
+//Don't change any of the existing enum values, so add enum types at the end for serialization compatibility
+enum btTypedMultiBodyConstraintType
+{
+ MULTIBODY_CONSTRAINT_LIMIT=3,
+ MULTIBODY_CONSTRAINT_1DOF_JOINT_MOTOR,
+ MULTIBODY_CONSTRAINT_GEAR,
+ MULTIBODY_CONSTRAINT_POINT_TO_POINT,
+ MULTIBODY_CONSTRAINT_SLIDER,
+ MULTIBODY_CONSTRAINT_SPHERICAL_MOTOR,
+ MULTIBODY_CONSTRAINT_FIXED,
+
+ MAX_MULTIBODY_CONSTRAINT_TYPE,
+};
+
class btMultiBody;
struct btSolverInfo;
@@ -46,6 +61,8 @@ protected:
int m_linkA;
int m_linkB;
+ int m_type; //btTypedMultiBodyConstraintType
+
int m_numRows;
int m_jacSizeA;
int m_jacSizeBoth;
@@ -82,12 +99,16 @@ protected:
public:
BT_DECLARE_ALIGNED_ALLOCATOR();
- btMultiBodyConstraint(btMultiBody * bodyA, btMultiBody * bodyB, int linkA, int linkB, int numRows, bool isUnilateral);
+ btMultiBodyConstraint(btMultiBody * bodyA, btMultiBody * bodyB, int linkA, int linkB, int numRows, bool isUnilateral, int type);
virtual ~btMultiBodyConstraint();
void updateJacobianSizes();
void allocateJacobiansMultiDof();
+ int getConstraintType() const
+ {
+ return m_type;
+ }
//many constraints have setFrameInB/setPivotInB. Will use 'getConstraintType' later.
virtual void setFrameInB(const btMatrix3x3& frameInB) {}
virtual void setPivotInB(const btVector3& pivotInB) {}
diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyDynamicsWorld.cpp b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyDynamicsWorld.cpp
index cd1bad089e..fef95f0c4e 100644
--- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyDynamicsWorld.cpp
+++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyDynamicsWorld.cpp
@@ -592,6 +592,7 @@ void btMultiBodyDynamicsWorld::integrateMultiBodyTransforms(btScalar timeStep)
if (!isSleeping)
{
+ bod->addSplitV();
int nLinks = bod->getNumLinks();
///base + num m_links
@@ -610,6 +611,7 @@ void btMultiBodyDynamicsWorld::integrateMultiBodyTransforms(btScalar timeStep)
m_scratch_world_to_local.resize(nLinks + 1);
m_scratch_local_origin.resize(nLinks + 1);
bod->updateCollisionObjectWorldTransforms(m_scratch_world_to_local, m_scratch_local_origin);
+ bod->substractSplitV();
}
else
{
@@ -867,6 +869,18 @@ void btMultiBodyDynamicsWorld::serializeMultiBodies(btSerializer* serializer)
}
}
}
+
+void btMultiBodyDynamicsWorld::saveKinematicState(btScalar timeStep)
+{
+ btDiscreteDynamicsWorld::saveKinematicState(timeStep);
+ for(int i = 0; i < m_multiBodies.size(); i++)
+ {
+ btMultiBody* body = m_multiBodies[i];
+ if(body->isBaseKinematic())
+ body->saveKinematicState(timeStep);
+ }
+}
+
//
//void btMultiBodyDynamicsWorld::setSplitIslands(bool split)
//{
diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyDynamicsWorld.h b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyDynamicsWorld.h
index 9ac46f4b64..d2d76c8b92 100644
--- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyDynamicsWorld.h
+++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyDynamicsWorld.h
@@ -120,5 +120,7 @@ public:
virtual void solveExternalForces(btContactSolverInfo& solverInfo);
virtual void solveInternalConstraints(btContactSolverInfo& solverInfo);
void buildIslands();
+
+ virtual void saveKinematicState(btScalar timeStep);
};
#endif //BT_MULTIBODY_DYNAMICS_WORLD_H
diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyFixedConstraint.cpp b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyFixedConstraint.cpp
index 5ef9444c2f..df2abbe97a 100644
--- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyFixedConstraint.cpp
+++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyFixedConstraint.cpp
@@ -24,7 +24,7 @@ subject to the following restrictions:
#define BTMBFIXEDCONSTRAINT_DIM 6
btMultiBodyFixedConstraint::btMultiBodyFixedConstraint(btMultiBody* body, int link, btRigidBody* bodyB, const btVector3& pivotInA, const btVector3& pivotInB, const btMatrix3x3& frameInA, const btMatrix3x3& frameInB)
- : btMultiBodyConstraint(body, 0, link, -1, BTMBFIXEDCONSTRAINT_DIM, false),
+ : btMultiBodyConstraint(body, 0, link, -1, BTMBFIXEDCONSTRAINT_DIM, false, MULTIBODY_CONSTRAINT_FIXED),
m_rigidBodyA(0),
m_rigidBodyB(bodyB),
m_pivotInA(pivotInA),
@@ -36,7 +36,7 @@ btMultiBodyFixedConstraint::btMultiBodyFixedConstraint(btMultiBody* body, int li
}
btMultiBodyFixedConstraint::btMultiBodyFixedConstraint(btMultiBody* bodyA, int linkA, btMultiBody* bodyB, int linkB, const btVector3& pivotInA, const btVector3& pivotInB, const btMatrix3x3& frameInA, const btMatrix3x3& frameInB)
- : btMultiBodyConstraint(bodyA, bodyB, linkA, linkB, BTMBFIXEDCONSTRAINT_DIM, false),
+ : btMultiBodyConstraint(bodyA, bodyB, linkA, linkB, BTMBFIXEDCONSTRAINT_DIM, false, MULTIBODY_CONSTRAINT_FIXED),
m_rigidBodyA(0),
m_rigidBodyB(0),
m_pivotInA(pivotInA),
diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyGearConstraint.cpp b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyGearConstraint.cpp
index bf6b811d26..ee02cf9b07 100644
--- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyGearConstraint.cpp
+++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyGearConstraint.cpp
@@ -21,7 +21,7 @@ subject to the following restrictions:
#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
btMultiBodyGearConstraint::btMultiBodyGearConstraint(btMultiBody* bodyA, int linkA, btMultiBody* bodyB, int linkB, const btVector3& pivotInA, const btVector3& pivotInB, const btMatrix3x3& frameInA, const btMatrix3x3& frameInB)
- : btMultiBodyConstraint(bodyA, bodyB, linkA, linkB, 1, false),
+ : btMultiBodyConstraint(bodyA, bodyB, linkA, linkB, 1, false, MULTIBODY_CONSTRAINT_GEAR),
m_gearRatio(1),
m_gearAuxLink(-1),
m_erp(0),
diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyJointLimitConstraint.cpp b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyJointLimitConstraint.cpp
index 8791ad2868..94b36ac108 100644
--- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyJointLimitConstraint.cpp
+++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyJointLimitConstraint.cpp
@@ -22,7 +22,7 @@ subject to the following restrictions:
btMultiBodyJointLimitConstraint::btMultiBodyJointLimitConstraint(btMultiBody* body, int link, btScalar lower, btScalar upper)
//:btMultiBodyConstraint(body,0,link,-1,2,true),
- : btMultiBodyConstraint(body, body, link, body->getLink(link).m_parent, 2, true),
+ : btMultiBodyConstraint(body, body, link, body->getLink(link).m_parent, 2, true, MULTIBODY_CONSTRAINT_LIMIT),
m_lowerBound(lower),
m_upperBound(upper)
{
diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyJointLimitConstraint.h b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyJointLimitConstraint.h
index 6716ba490f..b810692b4c 100644
--- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyJointLimitConstraint.h
+++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyJointLimitConstraint.h
@@ -42,6 +42,22 @@ public:
{
//todo(erwincoumans)
}
+ btScalar getLowerBound() const
+ {
+ return m_lowerBound;
+ }
+ btScalar getUpperBound() const
+ {
+ return m_upperBound;
+ }
+ void setLowerBound(btScalar lower)
+ {
+ m_lowerBound = lower;
+ }
+ void setUpperBound(btScalar upper)
+ {
+ m_upperBound = upper;
+ }
};
#endif //BT_MULTIBODY_JOINT_LIMIT_CONSTRAINT_H
diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyJointMotor.cpp b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyJointMotor.cpp
index 5c816c4987..fec9b03213 100644
--- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyJointMotor.cpp
+++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyJointMotor.cpp
@@ -21,7 +21,7 @@ subject to the following restrictions:
#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
btMultiBodyJointMotor::btMultiBodyJointMotor(btMultiBody* body, int link, btScalar desiredVelocity, btScalar maxMotorImpulse)
- : btMultiBodyConstraint(body, body, link, body->getLink(link).m_parent, 1, true),
+ : btMultiBodyConstraint(body, body, link, body->getLink(link).m_parent, 1, true, MULTIBODY_CONSTRAINT_1DOF_JOINT_MOTOR),
m_desiredVelocity(desiredVelocity),
m_desiredPosition(0),
m_kd(1.),
@@ -51,7 +51,7 @@ void btMultiBodyJointMotor::finalizeMultiDof()
btMultiBodyJointMotor::btMultiBodyJointMotor(btMultiBody* body, int link, int linkDoF, btScalar desiredVelocity, btScalar maxMotorImpulse)
//:btMultiBodyConstraint(body,0,link,-1,1,true),
- : btMultiBodyConstraint(body, body, link, body->getLink(link).m_parent, 1, true),
+ : btMultiBodyConstraint(body, body, link, body->getLink(link).m_parent, 1, true, MULTIBODY_CONSTRAINT_1DOF_JOINT_MOTOR),
m_desiredVelocity(desiredVelocity),
m_desiredPosition(0),
m_kd(1.),
diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyLink.h b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyLink.h
index 01d5583c2f..5a1429340f 100644
--- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyLink.h
+++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyLink.h
@@ -295,6 +295,9 @@ struct btMultibodyLink
}
}
}
+
+
+
};
#endif //BT_MULTIBODY_LINK_H
diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyLinkCollider.h b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyLinkCollider.h
index bc909990c2..3dc35a5814 100644
--- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyLinkCollider.h
+++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyLinkCollider.h
@@ -130,6 +130,23 @@ public:
return true;
}
+ bool isStaticOrKinematic() const
+ {
+ return isStaticOrKinematicObject();
+ }
+
+ bool isKinematic() const
+ {
+ return isKinematicObject();
+ }
+
+ void setDynamicType(int dynamicType)
+ {
+ int oldFlags = getCollisionFlags();
+ oldFlags &= ~(btCollisionObject::CF_STATIC_OBJECT | btCollisionObject::CF_KINEMATIC_OBJECT);
+ setCollisionFlags(oldFlags | dynamicType);
+ }
+
virtual int calculateSerializeBufferSize() const;
///fills the dataBuffer and returns the struct name (and 0 on failure)
diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyPoint2Point.cpp b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyPoint2Point.cpp
index 37d3aede37..f51e69deb1 100644
--- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyPoint2Point.cpp
+++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyPoint2Point.cpp
@@ -27,7 +27,7 @@ subject to the following restrictions:
#endif
btMultiBodyPoint2Point::btMultiBodyPoint2Point(btMultiBody* body, int link, btRigidBody* bodyB, const btVector3& pivotInA, const btVector3& pivotInB)
- : btMultiBodyConstraint(body, 0, link, -1, BTMBP2PCONSTRAINT_DIM, false),
+ : btMultiBodyConstraint(body, 0, link, -1, BTMBP2PCONSTRAINT_DIM, false, MULTIBODY_CONSTRAINT_POINT_TO_POINT),
m_rigidBodyA(0),
m_rigidBodyB(bodyB),
m_pivotInA(pivotInA),
@@ -37,7 +37,7 @@ btMultiBodyPoint2Point::btMultiBodyPoint2Point(btMultiBody* body, int link, btRi
}
btMultiBodyPoint2Point::btMultiBodyPoint2Point(btMultiBody* bodyA, int linkA, btMultiBody* bodyB, int linkB, const btVector3& pivotInA, const btVector3& pivotInB)
- : btMultiBodyConstraint(bodyA, bodyB, linkA, linkB, BTMBP2PCONSTRAINT_DIM, false),
+ : btMultiBodyConstraint(bodyA, bodyB, linkA, linkB, BTMBP2PCONSTRAINT_DIM, false, MULTIBODY_CONSTRAINT_POINT_TO_POINT),
m_rigidBodyA(0),
m_rigidBodyB(0),
m_pivotInA(pivotInA),
diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodySliderConstraint.cpp b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodySliderConstraint.cpp
index e025302ce6..48ec1d5af2 100644
--- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodySliderConstraint.cpp
+++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodySliderConstraint.cpp
@@ -25,7 +25,7 @@ subject to the following restrictions:
#define EPSILON 0.000001
btMultiBodySliderConstraint::btMultiBodySliderConstraint(btMultiBody* body, int link, btRigidBody* bodyB, const btVector3& pivotInA, const btVector3& pivotInB, const btMatrix3x3& frameInA, const btMatrix3x3& frameInB, const btVector3& jointAxis)
- : btMultiBodyConstraint(body, 0, link, -1, BTMBSLIDERCONSTRAINT_DIM, false),
+ : btMultiBodyConstraint(body, 0, link, -1, BTMBSLIDERCONSTRAINT_DIM, false, MULTIBODY_CONSTRAINT_SLIDER),
m_rigidBodyA(0),
m_rigidBodyB(bodyB),
m_pivotInA(pivotInA),
@@ -38,7 +38,7 @@ btMultiBodySliderConstraint::btMultiBodySliderConstraint(btMultiBody* body, int
}
btMultiBodySliderConstraint::btMultiBodySliderConstraint(btMultiBody* bodyA, int linkA, btMultiBody* bodyB, int linkB, const btVector3& pivotInA, const btVector3& pivotInB, const btMatrix3x3& frameInA, const btMatrix3x3& frameInB, const btVector3& jointAxis)
- : btMultiBodyConstraint(bodyA, bodyB, linkA, linkB, BTMBSLIDERCONSTRAINT_DIM, false),
+ : btMultiBodyConstraint(bodyA, bodyB, linkA, linkB, BTMBSLIDERCONSTRAINT_DIM, false, MULTIBODY_CONSTRAINT_SLIDER),
m_rigidBodyA(0),
m_rigidBodyB(0),
m_pivotInA(pivotInA),
diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodySphericalJointMotor.cpp b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodySphericalJointMotor.cpp
index 3e5aa30f28..25ddd539bf 100644
--- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodySphericalJointMotor.cpp
+++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodySphericalJointMotor.cpp
@@ -23,7 +23,7 @@ subject to the following restrictions:
#include "BulletDynamics/ConstraintSolver/btGeneric6DofSpring2Constraint.h"
btMultiBodySphericalJointMotor::btMultiBodySphericalJointMotor(btMultiBody* body, int link, btScalar maxMotorImpulse)
- : btMultiBodyConstraint(body, body, link, body->getLink(link).m_parent, 3, true),
+ : btMultiBodyConstraint(body, body, link, body->getLink(link).m_parent, 3, true, MULTIBODY_CONSTRAINT_SPHERICAL_MOTOR),
m_desiredVelocity(0, 0, 0),
m_desiredPosition(0,0,0,1),
m_kd(1.),
diff --git a/thirdparty/bullet/BulletSoftBody/DeformableBodyInplaceSolverIslandCallback.h b/thirdparty/bullet/BulletSoftBody/DeformableBodyInplaceSolverIslandCallback.h
index 7b225701f6..01c7e93a1b 100644
--- a/thirdparty/bullet/BulletSoftBody/DeformableBodyInplaceSolverIslandCallback.h
+++ b/thirdparty/bullet/BulletSoftBody/DeformableBodyInplaceSolverIslandCallback.h
@@ -13,13 +13,12 @@ struct DeformableBodyInplaceSolverIslandCallback : public MultiBodyInplaceSolver
btDeformableMultiBodyConstraintSolver* m_deformableSolver;
DeformableBodyInplaceSolverIslandCallback(btDeformableMultiBodyConstraintSolver* solver,
- btDispatcher* dispatcher)
- : MultiBodyInplaceSolverIslandCallback(solver, dispatcher), m_deformableSolver(solver)
+ btDispatcher* dispatcher)
+ : MultiBodyInplaceSolverIslandCallback(solver, dispatcher), m_deformableSolver(solver)
{
}
-
- virtual void processConstraints(int islandId=-1)
+ virtual void processConstraints(int islandId = -1)
{
btCollisionObject** bodies = m_bodies.size() ? &m_bodies[0] : 0;
btCollisionObject** softBodies = m_softBodies.size() ? &m_softBodies[0] : 0;
@@ -30,7 +29,7 @@ struct DeformableBodyInplaceSolverIslandCallback : public MultiBodyInplaceSolver
//printf("mb contacts = %d, mb constraints = %d\n", mbContacts, m_multiBodyConstraints.size());
m_deformableSolver->solveDeformableBodyGroup(bodies, m_bodies.size(), softBodies, m_softBodies.size(), manifold, m_manifolds.size(), constraints, m_constraints.size(), multiBodyConstraints, m_multiBodyConstraints.size(), *m_solverInfo, m_debugDrawer, m_dispatcher);
- if (m_bodies.size() && (m_solverInfo->m_reportSolverAnalytics&1))
+ if (m_bodies.size() && (m_solverInfo->m_reportSolverAnalytics & 1))
{
m_deformableSolver->m_analyticsData.m_islandId = islandId;
m_islandAnalyticsData.push_back(m_solver->m_analyticsData);
diff --git a/thirdparty/bullet/BulletSoftBody/btCGProjection.h b/thirdparty/bullet/BulletSoftBody/btCGProjection.h
index d047e6d3d9..e05970664c 100644
--- a/thirdparty/bullet/BulletSoftBody/btCGProjection.h
+++ b/thirdparty/bullet/BulletSoftBody/btCGProjection.h
@@ -22,85 +22,83 @@
struct DeformableContactConstraint
{
- const btSoftBody::Node* m_node;
- btAlignedObjectArray<const btSoftBody::RContact*> m_contact;
- btAlignedObjectArray<btVector3> m_total_normal_dv;
- btAlignedObjectArray<btVector3> m_total_tangent_dv;
- btAlignedObjectArray<bool> m_static;
- btAlignedObjectArray<bool> m_can_be_dynamic;
-
- DeformableContactConstraint(const btSoftBody::RContact& rcontact): m_node(rcontact.m_node)
- {
- append(rcontact);
- }
-
- DeformableContactConstraint(): m_node(NULL)
- {
- m_contact.push_back(NULL);
- }
-
- void append(const btSoftBody::RContact& rcontact)
- {
- m_contact.push_back(&rcontact);
- m_total_normal_dv.push_back(btVector3(0,0,0));
- m_total_tangent_dv.push_back(btVector3(0,0,0));
- m_static.push_back(false);
- m_can_be_dynamic.push_back(true);
- }
-
- void replace(const btSoftBody::RContact& rcontact)
- {
- m_contact.clear();
- m_total_normal_dv.clear();
- m_total_tangent_dv.clear();
- m_static.clear();
- m_can_be_dynamic.clear();
- append(rcontact);
- }
-
- ~DeformableContactConstraint()
- {
- }
+ const btSoftBody::Node* m_node;
+ btAlignedObjectArray<const btSoftBody::RContact*> m_contact;
+ btAlignedObjectArray<btVector3> m_total_normal_dv;
+ btAlignedObjectArray<btVector3> m_total_tangent_dv;
+ btAlignedObjectArray<bool> m_static;
+ btAlignedObjectArray<bool> m_can_be_dynamic;
+
+ DeformableContactConstraint(const btSoftBody::RContact& rcontact) : m_node(rcontact.m_node)
+ {
+ append(rcontact);
+ }
+
+ DeformableContactConstraint() : m_node(NULL)
+ {
+ m_contact.push_back(NULL);
+ }
+
+ void append(const btSoftBody::RContact& rcontact)
+ {
+ m_contact.push_back(&rcontact);
+ m_total_normal_dv.push_back(btVector3(0, 0, 0));
+ m_total_tangent_dv.push_back(btVector3(0, 0, 0));
+ m_static.push_back(false);
+ m_can_be_dynamic.push_back(true);
+ }
+
+ void replace(const btSoftBody::RContact& rcontact)
+ {
+ m_contact.clear();
+ m_total_normal_dv.clear();
+ m_total_tangent_dv.clear();
+ m_static.clear();
+ m_can_be_dynamic.clear();
+ append(rcontact);
+ }
+
+ ~DeformableContactConstraint()
+ {
+ }
};
class btCGProjection
{
public:
- typedef btAlignedObjectArray<btVector3> TVStack;
- typedef btAlignedObjectArray<btAlignedObjectArray<btVector3> > TVArrayStack;
- typedef btAlignedObjectArray<btAlignedObjectArray<btScalar> > TArrayStack;
- btAlignedObjectArray<btSoftBody *>& m_softBodies;
- const btScalar& m_dt;
- // map from node indices to node pointers
- const btAlignedObjectArray<btSoftBody::Node*>* m_nodes;
-
- btCGProjection(btAlignedObjectArray<btSoftBody *>& softBodies, const btScalar& dt)
- : m_softBodies(softBodies)
- , m_dt(dt)
- {
- }
-
- virtual ~btCGProjection()
- {
- }
-
- // apply the constraints
- virtual void project(TVStack& x) = 0;
-
- virtual void setConstraints() = 0;
-
- // update the constraints
- virtual btScalar update() = 0;
-
- virtual void reinitialize(bool nodeUpdated)
- {
- }
-
- virtual void setIndices(const btAlignedObjectArray<btSoftBody::Node*>* nodes)
- {
- m_nodes = nodes;
- }
-};
+ typedef btAlignedObjectArray<btVector3> TVStack;
+ typedef btAlignedObjectArray<btAlignedObjectArray<btVector3> > TVArrayStack;
+ typedef btAlignedObjectArray<btAlignedObjectArray<btScalar> > TArrayStack;
+ btAlignedObjectArray<btSoftBody*>& m_softBodies;
+ const btScalar& m_dt;
+ // map from node indices to node pointers
+ const btAlignedObjectArray<btSoftBody::Node*>* m_nodes;
+
+ btCGProjection(btAlignedObjectArray<btSoftBody*>& softBodies, const btScalar& dt)
+ : m_softBodies(softBodies), m_dt(dt)
+ {
+ }
+ virtual ~btCGProjection()
+ {
+ }
+
+ // apply the constraints
+ virtual void project(TVStack& x) = 0;
+
+ virtual void setConstraints() = 0;
+
+ // update the constraints
+ virtual btScalar update() = 0;
+
+ virtual void reinitialize(bool nodeUpdated)
+ {
+ }
+
+ virtual void setIndices(const btAlignedObjectArray<btSoftBody::Node*>* nodes)
+ {
+ m_nodes = nodes;
+ }
+};
#endif /* btCGProjection_h */
diff --git a/thirdparty/bullet/BulletSoftBody/btConjugateGradient.h b/thirdparty/bullet/BulletSoftBody/btConjugateGradient.h
index bd51e584b9..bcd5e6b519 100644
--- a/thirdparty/bullet/BulletSoftBody/btConjugateGradient.h
+++ b/thirdparty/bullet/BulletSoftBody/btConjugateGradient.h
@@ -15,144 +15,103 @@
#ifndef BT_CONJUGATE_GRADIENT_H
#define BT_CONJUGATE_GRADIENT_H
-#include <iostream>
-#include <cmath>
-#include <limits>
-#include <LinearMath/btAlignedObjectArray.h>
-#include <LinearMath/btVector3.h>
-#include "LinearMath/btQuickprof.h"
+#include "btKrylovSolver.h"
template <class MatrixX>
-class btConjugateGradient
+class btConjugateGradient : public btKrylovSolver<MatrixX>
{
- typedef btAlignedObjectArray<btVector3> TVStack;
- TVStack r,p,z,temp;
- int max_iterations;
- btScalar tolerance_squared;
+ typedef btAlignedObjectArray<btVector3> TVStack;
+ typedef btKrylovSolver<MatrixX> Base;
+ TVStack r, p, z, temp;
+
public:
- btConjugateGradient(const int max_it_in)
- : max_iterations(max_it_in)
- {
- tolerance_squared = 1e-5;
- }
-
- virtual ~btConjugateGradient(){}
-
- // return the number of iterations taken
- int solve(MatrixX& A, TVStack& x, const TVStack& b, bool verbose = false)
- {
- BT_PROFILE("CGSolve");
- btAssert(x.size() == b.size());
- reinitialize(b);
- // r = b - A * x --with assigned dof zeroed out
- A.multiply(x, temp);
- r = sub(b, temp);
- A.project(r);
- // z = M^(-1) * r
- A.precondition(r, z);
- A.project(z);
- btScalar r_dot_z = dot(z,r);
- if (r_dot_z <= tolerance_squared) {
- if (verbose)
- {
- std::cout << "Iteration = 0" << std::endl;
- std::cout << "Two norm of the residual = " << r_dot_z << std::endl;
- }
- return 0;
- }
- p = z;
- btScalar r_dot_z_new = r_dot_z;
- for (int k = 1; k <= max_iterations; k++) {
- // temp = A*p
- A.multiply(p, temp);
- A.project(temp);
- if (dot(p,temp) < SIMD_EPSILON)
- {
- if (verbose)
- std::cout << "Encountered negative direction in CG!" << std::endl;
- if (k == 1)
- {
- x = b;
- }
- return k;
- }
- // alpha = r^T * z / (p^T * A * p)
- btScalar alpha = r_dot_z_new / dot(p, temp);
- // x += alpha * p;
- multAndAddTo(alpha, p, x);
- // r -= alpha * temp;
- multAndAddTo(-alpha, temp, r);
- // z = M^(-1) * r
- A.precondition(r, z);
- r_dot_z = r_dot_z_new;
- r_dot_z_new = dot(r,z);
- if (r_dot_z_new < tolerance_squared) {
- if (verbose)
- {
- std::cout << "ConjugateGradient iterations " << k << std::endl;
- }
- return k;
- }
+ btConjugateGradient(const int max_it_in)
+ : btKrylovSolver<MatrixX>(max_it_in, SIMD_EPSILON)
+ {
+ }
+
+ virtual ~btConjugateGradient() {}
+
+ // return the number of iterations taken
+ int solve(MatrixX& A, TVStack& x, const TVStack& b, bool verbose = false)
+ {
+ BT_PROFILE("CGSolve");
+ btAssert(x.size() == b.size());
+ reinitialize(b);
+ temp = b;
+ A.project(temp);
+ p = temp;
+ A.precondition(p, z);
+ btScalar d0 = this->dot(z, temp);
+ d0 = btMin(btScalar(1), d0);
+ // r = b - A * x --with assigned dof zeroed out
+ A.multiply(x, temp);
+ r = this->sub(b, temp);
+ A.project(r);
+ // z = M^(-1) * r
+ A.precondition(r, z);
+ A.project(z);
+ btScalar r_dot_z = this->dot(z, r);
+ if (r_dot_z <= Base::m_tolerance * d0)
+ {
+ if (verbose)
+ {
+ std::cout << "Iteration = 0" << std::endl;
+ std::cout << "Two norm of the residual = " << r_dot_z << std::endl;
+ }
+ return 0;
+ }
+ p = z;
+ btScalar r_dot_z_new = r_dot_z;
+ for (int k = 1; k <= Base::m_maxIterations; k++)
+ {
+ // temp = A*p
+ A.multiply(p, temp);
+ A.project(temp);
+ if (this->dot(p, temp) < 0)
+ {
+ if (verbose)
+ std::cout << "Encountered negative direction in CG!" << std::endl;
+ if (k == 1)
+ {
+ x = b;
+ }
+ return k;
+ }
+ // alpha = r^T * z / (p^T * A * p)
+ btScalar alpha = r_dot_z_new / this->dot(p, temp);
+ // x += alpha * p;
+ this->multAndAddTo(alpha, p, x);
+ // r -= alpha * temp;
+ this->multAndAddTo(-alpha, temp, r);
+ // z = M^(-1) * r
+ A.precondition(r, z);
+ r_dot_z = r_dot_z_new;
+ r_dot_z_new = this->dot(r, z);
+ if (r_dot_z_new < Base::m_tolerance * d0)
+ {
+ if (verbose)
+ {
+ std::cout << "ConjugateGradient iterations " << k << " residual = " << r_dot_z_new << std::endl;
+ }
+ return k;
+ }
+
+ btScalar beta = r_dot_z_new / r_dot_z;
+ p = this->multAndAdd(beta, p, z);
+ }
+ if (verbose)
+ {
+ std::cout << "ConjugateGradient max iterations reached " << Base::m_maxIterations << " error = " << r_dot_z_new << std::endl;
+ }
+ return Base::m_maxIterations;
+ }
- btScalar beta = r_dot_z_new/r_dot_z;
- p = multAndAdd(beta, p, z);
- }
- if (verbose)
- {
- std::cout << "ConjugateGradient max iterations reached " << max_iterations << std::endl;
- }
- return max_iterations;
- }
-
- void reinitialize(const TVStack& b)
- {
- r.resize(b.size());
- p.resize(b.size());
- z.resize(b.size());
- temp.resize(b.size());
- }
-
- TVStack sub(const TVStack& a, const TVStack& b)
- {
- // c = a-b
- btAssert(a.size() == b.size());
- TVStack c;
- c.resize(a.size());
- for (int i = 0; i < a.size(); ++i)
- {
- c[i] = a[i] - b[i];
- }
- return c;
- }
-
- btScalar squaredNorm(const TVStack& a)
- {
- return dot(a,a);
- }
-
- btScalar dot(const TVStack& a, const TVStack& b)
- {
- btScalar ans(0);
- for (int i = 0; i < a.size(); ++i)
- ans += a[i].dot(b[i]);
- return ans;
- }
-
- void multAndAddTo(btScalar s, const TVStack& a, TVStack& result)
- {
-// result += s*a
- btAssert(a.size() == result.size());
- for (int i = 0; i < a.size(); ++i)
- result[i] += s * a[i];
- }
-
- TVStack multAndAdd(btScalar s, const TVStack& a, const TVStack& b)
- {
- // result = a*s + b
- TVStack result;
- result.resize(a.size());
- for (int i = 0; i < a.size(); ++i)
- result[i] = s * a[i] + b[i];
- return result;
- }
+ void reinitialize(const TVStack& b)
+ {
+ r.resize(b.size());
+ p.resize(b.size());
+ z.resize(b.size());
+ temp.resize(b.size());
+ }
};
#endif /* btConjugateGradient_h */
diff --git a/thirdparty/bullet/BulletSoftBody/btConjugateResidual.h b/thirdparty/bullet/BulletSoftBody/btConjugateResidual.h
index 7b211c4172..6146120365 100644
--- a/thirdparty/bullet/BulletSoftBody/btConjugateResidual.h
+++ b/thirdparty/bullet/BulletSoftBody/btConjugateResidual.h
@@ -15,174 +15,98 @@
#ifndef BT_CONJUGATE_RESIDUAL_H
#define BT_CONJUGATE_RESIDUAL_H
-#include <iostream>
-#include <cmath>
-#include <limits>
-#include <LinearMath/btAlignedObjectArray.h>
-#include <LinearMath/btVector3.h>
-#include <LinearMath/btScalar.h>
-#include "LinearMath/btQuickprof.h"
+#include "btKrylovSolver.h"
+
template <class MatrixX>
-class btConjugateResidual
+class btConjugateResidual : public btKrylovSolver<MatrixX>
{
- typedef btAlignedObjectArray<btVector3> TVStack;
- TVStack r,p,z,temp_p, temp_r, best_x;
- // temp_r = A*r
- // temp_p = A*p
- // z = M^(-1) * temp_p = M^(-1) * A * p
- int max_iterations;
- btScalar tolerance_squared, best_r;
+ typedef btAlignedObjectArray<btVector3> TVStack;
+ typedef btKrylovSolver<MatrixX> Base;
+ TVStack r, p, z, temp_p, temp_r, best_x;
+ // temp_r = A*r
+ // temp_p = A*p
+ // z = M^(-1) * temp_p = M^(-1) * A * p
+ btScalar best_r;
+
public:
- btConjugateResidual(const int max_it_in)
- : max_iterations(max_it_in)
- {
- tolerance_squared = 1e-2;
- }
-
- virtual ~btConjugateResidual(){}
-
- // return the number of iterations taken
- int solve(MatrixX& A, TVStack& x, const TVStack& b, bool verbose = false)
- {
- BT_PROFILE("CRSolve");
- btAssert(x.size() == b.size());
- reinitialize(b);
- // r = b - A * x --with assigned dof zeroed out
- A.multiply(x, temp_r); // borrow temp_r here to store A*x
- r = sub(b, temp_r);
- // z = M^(-1) * r
- A.precondition(r, z); // borrow z to store preconditioned r
- r = z;
- btScalar residual_norm = norm(r);
- if (residual_norm <= tolerance_squared) {
- if (verbose)
- {
- std::cout << "Iteration = 0" << std::endl;
- std::cout << "Two norm of the residual = " << residual_norm << std::endl;
- }
- return 0;
- }
- p = r;
- btScalar r_dot_Ar, r_dot_Ar_new;
- // temp_p = A*p
- A.multiply(p, temp_p);
- // temp_r = A*r
- temp_r = temp_p;
- r_dot_Ar = dot(r, temp_r);
- for (int k = 1; k <= max_iterations; k++) {
- // z = M^(-1) * Ap
- A.precondition(temp_p, z);
- // alpha = r^T * A * r / (Ap)^T * M^-1 * Ap)
- btScalar alpha = r_dot_Ar / dot(temp_p, z);
- // x += alpha * p;
- multAndAddTo(alpha, p, x);
- // r -= alpha * z;
- multAndAddTo(-alpha, z, r);
- btScalar norm_r = norm(r);
- if (norm_r < best_r)
- {
- best_x = x;
- best_r = norm_r;
- if (norm_r < tolerance_squared) {
- if (verbose)
- {
- std::cout << "ConjugateResidual iterations " << k << std::endl;
- }
- return k;
- }
- else
- {
- if (verbose)
- {
- std::cout << "ConjugateResidual iterations " << k << " has residual "<< norm_r << std::endl;
- }
- }
- }
- // temp_r = A * r;
- A.multiply(r, temp_r);
- r_dot_Ar_new = dot(r, temp_r);
- btScalar beta = r_dot_Ar_new/r_dot_Ar;
- r_dot_Ar = r_dot_Ar_new;
- // p = beta*p + r;
- p = multAndAdd(beta, p, r);
- // temp_p = beta*temp_p + temp_r;
- temp_p = multAndAdd(beta, temp_p, temp_r);
- }
- if (verbose)
- {
- std::cout << "ConjugateResidual max iterations reached " << max_iterations << std::endl;
- }
- x = best_x;
- return max_iterations;
- }
-
- void reinitialize(const TVStack& b)
- {
- r.resize(b.size());
- p.resize(b.size());
- z.resize(b.size());
- temp_p.resize(b.size());
- temp_r.resize(b.size());
- best_x.resize(b.size());
- best_r = SIMD_INFINITY;
- }
-
- TVStack sub(const TVStack& a, const TVStack& b)
- {
- // c = a-b
- btAssert(a.size() == b.size());
- TVStack c;
- c.resize(a.size());
- for (int i = 0; i < a.size(); ++i)
- {
- c[i] = a[i] - b[i];
- }
- return c;
- }
-
- btScalar squaredNorm(const TVStack& a)
- {
- return dot(a,a);
- }
-
- btScalar norm(const TVStack& a)
- {
- btScalar ret = 0;
- for (int i = 0; i < a.size(); ++i)
- {
- for (int d = 0; d < 3; ++d)
- {
- ret = btMax(ret, btFabs(a[i][d]));
- }
- }
- return ret;
- }
-
- btScalar dot(const TVStack& a, const TVStack& b)
- {
- btScalar ans(0);
- for (int i = 0; i < a.size(); ++i)
- ans += a[i].dot(b[i]);
- return ans;
- }
-
- void multAndAddTo(btScalar s, const TVStack& a, TVStack& result)
- {
- // result += s*a
- btAssert(a.size() == result.size());
- for (int i = 0; i < a.size(); ++i)
- result[i] += s * a[i];
- }
-
- TVStack multAndAdd(btScalar s, const TVStack& a, const TVStack& b)
- {
- // result = a*s + b
- TVStack result;
- result.resize(a.size());
- for (int i = 0; i < a.size(); ++i)
- result[i] = s * a[i] + b[i];
- return result;
- }
+ btConjugateResidual(const int max_it_in)
+ : Base(max_it_in, 1e-8)
+ {
+ }
+
+ virtual ~btConjugateResidual() {}
+
+ // return the number of iterations taken
+ int solve(MatrixX& A, TVStack& x, const TVStack& b, bool verbose = false)
+ {
+ BT_PROFILE("CRSolve");
+ btAssert(x.size() == b.size());
+ reinitialize(b);
+ // r = b - A * x --with assigned dof zeroed out
+ A.multiply(x, temp_r); // borrow temp_r here to store A*x
+ r = this->sub(b, temp_r);
+ // z = M^(-1) * r
+ A.precondition(r, z); // borrow z to store preconditioned r
+ r = z;
+ btScalar residual_norm = this->norm(r);
+ if (residual_norm <= Base::m_tolerance)
+ {
+ return 0;
+ }
+ p = r;
+ btScalar r_dot_Ar, r_dot_Ar_new;
+ // temp_p = A*p
+ A.multiply(p, temp_p);
+ // temp_r = A*r
+ temp_r = temp_p;
+ r_dot_Ar = this->dot(r, temp_r);
+ for (int k = 1; k <= Base::m_maxIterations; k++)
+ {
+ // z = M^(-1) * Ap
+ A.precondition(temp_p, z);
+ // alpha = r^T * A * r / (Ap)^T * M^-1 * Ap)
+ btScalar alpha = r_dot_Ar / this->dot(temp_p, z);
+ // x += alpha * p;
+ this->multAndAddTo(alpha, p, x);
+ // r -= alpha * z;
+ this->multAndAddTo(-alpha, z, r);
+ btScalar norm_r = this->norm(r);
+ if (norm_r < best_r)
+ {
+ best_x = x;
+ best_r = norm_r;
+ if (norm_r < Base::m_tolerance)
+ {
+ return k;
+ }
+ }
+ // temp_r = A * r;
+ A.multiply(r, temp_r);
+ r_dot_Ar_new = this->dot(r, temp_r);
+ btScalar beta = r_dot_Ar_new / r_dot_Ar;
+ r_dot_Ar = r_dot_Ar_new;
+ // p = beta*p + r;
+ p = this->multAndAdd(beta, p, r);
+ // temp_p = beta*temp_p + temp_r;
+ temp_p = this->multAndAdd(beta, temp_p, temp_r);
+ }
+ if (verbose)
+ {
+ std::cout << "ConjugateResidual max iterations reached, residual = " << best_r << std::endl;
+ }
+ x = best_x;
+ return Base::m_maxIterations;
+ }
+
+ void reinitialize(const TVStack& b)
+ {
+ r.resize(b.size());
+ p.resize(b.size());
+ z.resize(b.size());
+ temp_p.resize(b.size());
+ temp_r.resize(b.size());
+ best_x.resize(b.size());
+ best_r = SIMD_INFINITY;
+ }
};
#endif /* btConjugateResidual_h */
-
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.cpp b/thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.cpp
index 5381ee6265..2455ed2138 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.cpp
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.cpp
@@ -17,211 +17,283 @@
#include "btPreconditioner.h"
#include "LinearMath/btQuickprof.h"
-btDeformableBackwardEulerObjective::btDeformableBackwardEulerObjective(btAlignedObjectArray<btSoftBody *>& softBodies, const TVStack& backup_v)
-: m_softBodies(softBodies)
-, m_projection(softBodies)
-, m_backupVelocity(backup_v)
-, m_implicit(false)
+btDeformableBackwardEulerObjective::btDeformableBackwardEulerObjective(btAlignedObjectArray<btSoftBody*>& softBodies, const TVStack& backup_v)
+ : m_softBodies(softBodies), m_projection(softBodies), m_backupVelocity(backup_v), m_implicit(false)
{
- m_massPreconditioner = new MassPreconditioner(m_softBodies);
- m_KKTPreconditioner = new KKTPreconditioner(m_softBodies, m_projection, m_lf, m_dt, m_implicit);
- m_preconditioner = m_KKTPreconditioner;
+ m_massPreconditioner = new MassPreconditioner(m_softBodies);
+ m_KKTPreconditioner = new KKTPreconditioner(m_softBodies, m_projection, m_lf, m_dt, m_implicit);
+ m_preconditioner = m_KKTPreconditioner;
}
btDeformableBackwardEulerObjective::~btDeformableBackwardEulerObjective()
{
- delete m_KKTPreconditioner;
- delete m_massPreconditioner;
+ delete m_KKTPreconditioner;
+ delete m_massPreconditioner;
}
void btDeformableBackwardEulerObjective::reinitialize(bool nodeUpdated, btScalar dt)
{
- BT_PROFILE("reinitialize");
- if (dt > 0)
- {
- setDt(dt);
- }
- if(nodeUpdated)
- {
- updateId();
- }
- for (int i = 0; i < m_lf.size(); ++i)
- {
- m_lf[i]->reinitialize(nodeUpdated);
- }
- m_projection.reinitialize(nodeUpdated);
-// m_preconditioner->reinitialize(nodeUpdated);
+ BT_PROFILE("reinitialize");
+ if (dt > 0)
+ {
+ setDt(dt);
+ }
+ if (nodeUpdated)
+ {
+ updateId();
+ }
+ for (int i = 0; i < m_lf.size(); ++i)
+ {
+ m_lf[i]->reinitialize(nodeUpdated);
+ }
+ btMatrix3x3 I;
+ I.setIdentity();
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ if (psb->m_nodes[j].m_im > 0)
+ psb->m_nodes[j].m_effectiveMass = I * (1.0 / psb->m_nodes[j].m_im);
+ }
+ }
+ m_projection.reinitialize(nodeUpdated);
+ // m_preconditioner->reinitialize(nodeUpdated);
}
void btDeformableBackwardEulerObjective::setDt(btScalar dt)
{
- m_dt = dt;
+ m_dt = dt;
}
void btDeformableBackwardEulerObjective::multiply(const TVStack& x, TVStack& b) const
{
- BT_PROFILE("multiply");
- // add in the mass term
- size_t counter = 0;
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- const btSoftBody::Node& node = psb->m_nodes[j];
- b[counter] = (node.m_im == 0) ? btVector3(0,0,0) : x[counter] / node.m_im;
- ++counter;
- }
- }
-
- for (int i = 0; i < m_lf.size(); ++i)
- {
- // add damping matrix
- m_lf[i]->addScaledDampingForceDifferential(-m_dt, x, b);
- if (m_implicit)
- {
- m_lf[i]->addScaledElasticForceDifferential(-m_dt*m_dt, x, b);
- }
- }
- int offset = m_nodes.size();
- for (int i = offset; i < b.size(); ++i)
- {
- b[i].setZero();
- }
- // add in the lagrange multiplier terms
-
- for (int c = 0; c < m_projection.m_lagrangeMultipliers.size(); ++c)
- {
- // C^T * lambda
- const LagrangeMultiplier& lm = m_projection.m_lagrangeMultipliers[c];
- for (int i = 0; i < lm.m_num_nodes; ++i)
- {
- for (int j = 0; j < lm.m_num_constraints; ++j)
- {
- b[lm.m_indices[i]] += x[offset+c][j] * lm.m_weights[i] * lm.m_dirs[j];
- }
- }
- // C * x
- for (int d = 0; d < lm.m_num_constraints; ++d)
- {
- for (int i = 0; i < lm.m_num_nodes; ++i)
- {
- b[offset+c][d] += lm.m_weights[i] * x[lm.m_indices[i]].dot(lm.m_dirs[d]);
- }
- }
- }
+ BT_PROFILE("multiply");
+ // add in the mass term
+ size_t counter = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ const btSoftBody::Node& node = psb->m_nodes[j];
+ b[counter] = (node.m_im == 0) ? btVector3(0, 0, 0) : x[counter] / node.m_im;
+ ++counter;
+ }
+ }
+
+ for (int i = 0; i < m_lf.size(); ++i)
+ {
+ // add damping matrix
+ m_lf[i]->addScaledDampingForceDifferential(-m_dt, x, b);
+ // Always integrate picking force implicitly for stability.
+ if (m_implicit || m_lf[i]->getForceType() == BT_MOUSE_PICKING_FORCE)
+ {
+ m_lf[i]->addScaledElasticForceDifferential(-m_dt * m_dt, x, b);
+ }
+ }
+ int offset = m_nodes.size();
+ for (int i = offset; i < b.size(); ++i)
+ {
+ b[i].setZero();
+ }
+ // add in the lagrange multiplier terms
+
+ for (int c = 0; c < m_projection.m_lagrangeMultipliers.size(); ++c)
+ {
+ // C^T * lambda
+ const LagrangeMultiplier& lm = m_projection.m_lagrangeMultipliers[c];
+ for (int i = 0; i < lm.m_num_nodes; ++i)
+ {
+ for (int j = 0; j < lm.m_num_constraints; ++j)
+ {
+ b[lm.m_indices[i]] += x[offset + c][j] * lm.m_weights[i] * lm.m_dirs[j];
+ }
+ }
+ // C * x
+ for (int d = 0; d < lm.m_num_constraints; ++d)
+ {
+ for (int i = 0; i < lm.m_num_nodes; ++i)
+ {
+ b[offset + c][d] += lm.m_weights[i] * x[lm.m_indices[i]].dot(lm.m_dirs[d]);
+ }
+ }
+ }
}
void btDeformableBackwardEulerObjective::updateVelocity(const TVStack& dv)
{
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- btSoftBody::Node& node = psb->m_nodes[j];
- node.m_v = m_backupVelocity[node.index] + dv[node.index];
- }
- }
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ btSoftBody::Node& node = psb->m_nodes[j];
+ node.m_v = m_backupVelocity[node.index] + dv[node.index];
+ }
+ }
}
void btDeformableBackwardEulerObjective::applyForce(TVStack& force, bool setZero)
{
- size_t counter = 0;
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- counter += psb->m_nodes.size();
- continue;
- }
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- btScalar one_over_mass = (psb->m_nodes[j].m_im == 0) ? 0 : psb->m_nodes[j].m_im;
- psb->m_nodes[j].m_v += one_over_mass * force[counter++];
- }
- }
- if (setZero)
- {
- for (int i = 0; i < force.size(); ++i)
- force[i].setZero();
- }
+ size_t counter = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ counter += psb->m_nodes.size();
+ continue;
+ }
+ if (m_implicit)
+ {
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ if (psb->m_nodes[j].m_im != 0)
+ {
+ psb->m_nodes[j].m_v += psb->m_nodes[j].m_effectiveMass_inv * force[counter++];
+ }
+ }
+ }
+ else
+ {
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ btScalar one_over_mass = (psb->m_nodes[j].m_im == 0) ? 0 : psb->m_nodes[j].m_im;
+ psb->m_nodes[j].m_v += one_over_mass * force[counter++];
+ }
+ }
+ }
+ if (setZero)
+ {
+ for (int i = 0; i < force.size(); ++i)
+ force[i].setZero();
+ }
}
-void btDeformableBackwardEulerObjective::computeResidual(btScalar dt, TVStack &residual)
+void btDeformableBackwardEulerObjective::computeResidual(btScalar dt, TVStack& residual)
{
- BT_PROFILE("computeResidual");
- // add implicit force
- for (int i = 0; i < m_lf.size(); ++i)
- {
- if (m_implicit)
- {
- m_lf[i]->addScaledForces(dt, residual);
- }
- else
- {
- m_lf[i]->addScaledDampingForce(dt, residual);
- }
- }
-// m_projection.project(residual);
+ BT_PROFILE("computeResidual");
+ // add implicit force
+ for (int i = 0; i < m_lf.size(); ++i)
+ {
+ // Always integrate picking force implicitly for stability.
+ if (m_implicit || m_lf[i]->getForceType() == BT_MOUSE_PICKING_FORCE)
+ {
+ m_lf[i]->addScaledForces(dt, residual);
+ }
+ else
+ {
+ m_lf[i]->addScaledDampingForce(dt, residual);
+ }
+ }
+ // m_projection.project(residual);
}
btScalar btDeformableBackwardEulerObjective::computeNorm(const TVStack& residual) const
{
- btScalar mag = 0;
- for (int i = 0; i < residual.size(); ++i)
- {
- mag += residual[i].length2();
- }
- return std::sqrt(mag);
+ btScalar mag = 0;
+ for (int i = 0; i < residual.size(); ++i)
+ {
+ mag += residual[i].length2();
+ }
+ return std::sqrt(mag);
}
btScalar btDeformableBackwardEulerObjective::totalEnergy(btScalar dt)
{
- btScalar e = 0;
- for (int i = 0; i < m_lf.size(); ++i)
- {
- e += m_lf[i]->totalEnergy(dt);
- }
- return e;
+ btScalar e = 0;
+ for (int i = 0; i < m_lf.size(); ++i)
+ {
+ e += m_lf[i]->totalEnergy(dt);
+ }
+ return e;
}
void btDeformableBackwardEulerObjective::applyExplicitForce(TVStack& force)
{
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- m_softBodies[i]->advanceDeformation();
- }
-
- for (int i = 0; i < m_lf.size(); ++i)
- {
- m_lf[i]->addScaledExplicitForce(m_dt, force);
- }
- applyForce(force, true);
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ m_softBodies[i]->advanceDeformation();
+ }
+ if (m_implicit)
+ {
+ // apply forces except gravity force
+ btVector3 gravity;
+ for (int i = 0; i < m_lf.size(); ++i)
+ {
+ if (m_lf[i]->getForceType() == BT_GRAVITY_FORCE)
+ {
+ gravity = static_cast<btDeformableGravityForce*>(m_lf[i])->m_gravity;
+ }
+ else
+ {
+ m_lf[i]->addScaledForces(m_dt, force);
+ }
+ }
+ for (int i = 0; i < m_lf.size(); ++i)
+ {
+ m_lf[i]->addScaledHessian(m_dt);
+ }
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (psb->isActive())
+ {
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ // add gravity explicitly
+ psb->m_nodes[j].m_v += m_dt * psb->m_gravityFactor * gravity;
+ }
+ }
+ }
+ }
+ else
+ {
+ for (int i = 0; i < m_lf.size(); ++i)
+ {
+ m_lf[i]->addScaledExplicitForce(m_dt, force);
+ }
+ }
+ // calculate inverse mass matrix for all nodes
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (psb->isActive())
+ {
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ if (psb->m_nodes[j].m_im > 0)
+ {
+ psb->m_nodes[j].m_effectiveMass_inv = psb->m_nodes[j].m_effectiveMass.inverse();
+ }
+ }
+ }
+ }
+ applyForce(force, true);
}
void btDeformableBackwardEulerObjective::initialGuess(TVStack& dv, const TVStack& residual)
{
- size_t counter = 0;
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- dv[counter] = psb->m_nodes[j].m_im * residual[counter];
- ++counter;
- }
- }
+ size_t counter = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ dv[counter] = psb->m_nodes[j].m_im * residual[counter];
+ ++counter;
+ }
+ }
}
//set constraints as projections
void btDeformableBackwardEulerObjective::setConstraints(const btContactSolverInfo& infoGlobal)
{
- m_projection.setConstraints(infoGlobal);
+ m_projection.setConstraints(infoGlobal);
}
void btDeformableBackwardEulerObjective::applyDynamicFriction(TVStack& r)
{
- m_projection.applyDynamicFriction(r);
+ m_projection.applyDynamicFriction(r);
}
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.h b/thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.h
index 86579e71ac..eb05b9f010 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.h
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.h
@@ -31,143 +31,168 @@
class btDeformableBackwardEulerObjective
{
public:
- typedef btAlignedObjectArray<btVector3> TVStack;
- btScalar m_dt;
- btAlignedObjectArray<btDeformableLagrangianForce*> m_lf;
- btAlignedObjectArray<btSoftBody *>& m_softBodies;
- Preconditioner* m_preconditioner;
- btDeformableContactProjection m_projection;
- const TVStack& m_backupVelocity;
- btAlignedObjectArray<btSoftBody::Node* > m_nodes;
- bool m_implicit;
- MassPreconditioner* m_massPreconditioner;
- KKTPreconditioner* m_KKTPreconditioner;
-
- btDeformableBackwardEulerObjective(btAlignedObjectArray<btSoftBody *>& softBodies, const TVStack& backup_v);
-
- virtual ~btDeformableBackwardEulerObjective();
-
- void initialize(){}
-
- // compute the rhs for CG solve, i.e, add the dt scaled implicit force to residual
- void computeResidual(btScalar dt, TVStack& residual);
-
- // add explicit force to the velocity
- void applyExplicitForce(TVStack& force);
-
- // apply force to velocity and optionally reset the force to zero
- void applyForce(TVStack& force, bool setZero);
-
- // compute the norm of the residual
- btScalar computeNorm(const TVStack& residual) const;
-
- // compute one step of the solve (there is only one solve if the system is linear)
- void computeStep(TVStack& dv, const TVStack& residual, const btScalar& dt);
-
- // perform A*x = b
- void multiply(const TVStack& x, TVStack& b) const;
-
- // set initial guess for CG solve
- void initialGuess(TVStack& dv, const TVStack& residual);
-
- // reset data structure and reset dt
- void reinitialize(bool nodeUpdated, btScalar dt);
-
- void setDt(btScalar dt);
-
- // add friction force to residual
- void applyDynamicFriction(TVStack& r);
-
- // add dv to velocity
- void updateVelocity(const TVStack& dv);
-
- //set constraints as projections
- void setConstraints(const btContactSolverInfo& infoGlobal);
-
- // update the projections and project the residual
- void project(TVStack& r)
- {
- BT_PROFILE("project");
- m_projection.project(r);
- }
-
- // perform precondition M^(-1) x = b
- void precondition(const TVStack& x, TVStack& b)
- {
- m_preconditioner->operator()(x,b);
- }
-
- // reindex all the vertices
- virtual void updateId()
- {
- size_t node_id = 0;
- size_t face_id = 0;
- m_nodes.clear();
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- psb->m_nodes[j].index = node_id;
- m_nodes.push_back(&psb->m_nodes[j]);
- ++node_id;
- }
- for (int j = 0; j < psb->m_faces.size(); ++j)
- {
- psb->m_faces[j].m_index = face_id;
- ++face_id;
- }
- }
- }
-
- const btAlignedObjectArray<btSoftBody::Node*>* getIndices() const
- {
- return &m_nodes;
- }
-
- void setImplicit(bool implicit)
- {
- m_implicit = implicit;
- }
-
- // Calculate the total potential energy in the system
- btScalar totalEnergy(btScalar dt);
-
- void addLagrangeMultiplier(const TVStack& vec, TVStack& extended_vec)
- {
- extended_vec.resize(vec.size() + m_projection.m_lagrangeMultipliers.size());
- for (int i = 0; i < vec.size(); ++i)
- {
- extended_vec[i] = vec[i];
- }
- int offset = vec.size();
- for (int i = 0; i < m_projection.m_lagrangeMultipliers.size(); ++i)
- {
- extended_vec[offset + i].setZero();
- }
- }
-
- void addLagrangeMultiplierRHS(const TVStack& residual, const TVStack& m_dv, TVStack& extended_residual)
- {
- extended_residual.resize(residual.size() + m_projection.m_lagrangeMultipliers.size());
- for (int i = 0; i < residual.size(); ++i)
- {
- extended_residual[i] = residual[i];
- }
- int offset = residual.size();
- for (int i = 0; i < m_projection.m_lagrangeMultipliers.size(); ++i)
- {
- const LagrangeMultiplier& lm = m_projection.m_lagrangeMultipliers[i];
- extended_residual[offset + i].setZero();
- for (int d = 0; d < lm.m_num_constraints; ++d)
- {
- for (int n = 0; n < lm.m_num_nodes; ++n)
- {
- extended_residual[offset + i][d] += lm.m_weights[n] * m_dv[lm.m_indices[n]].dot(lm.m_dirs[d]);
- }
- }
- }
- }
+ typedef btAlignedObjectArray<btVector3> TVStack;
+ btScalar m_dt;
+ btAlignedObjectArray<btDeformableLagrangianForce*> m_lf;
+ btAlignedObjectArray<btSoftBody*>& m_softBodies;
+ Preconditioner* m_preconditioner;
+ btDeformableContactProjection m_projection;
+ const TVStack& m_backupVelocity;
+ btAlignedObjectArray<btSoftBody::Node*> m_nodes;
+ bool m_implicit;
+ MassPreconditioner* m_massPreconditioner;
+ KKTPreconditioner* m_KKTPreconditioner;
+
+ btDeformableBackwardEulerObjective(btAlignedObjectArray<btSoftBody*>& softBodies, const TVStack& backup_v);
+
+ virtual ~btDeformableBackwardEulerObjective();
+
+ void initialize() {}
+
+ // compute the rhs for CG solve, i.e, add the dt scaled implicit force to residual
+ void computeResidual(btScalar dt, TVStack& residual);
+
+ // add explicit force to the velocity
+ void applyExplicitForce(TVStack& force);
+
+ // apply force to velocity and optionally reset the force to zero
+ void applyForce(TVStack& force, bool setZero);
+
+ // compute the norm of the residual
+ btScalar computeNorm(const TVStack& residual) const;
+
+ // compute one step of the solve (there is only one solve if the system is linear)
+ void computeStep(TVStack& dv, const TVStack& residual, const btScalar& dt);
+
+ // perform A*x = b
+ void multiply(const TVStack& x, TVStack& b) const;
+
+ // set initial guess for CG solve
+ void initialGuess(TVStack& dv, const TVStack& residual);
+
+ // reset data structure and reset dt
+ void reinitialize(bool nodeUpdated, btScalar dt);
+
+ void setDt(btScalar dt);
+
+ // add friction force to residual
+ void applyDynamicFriction(TVStack& r);
+
+ // add dv to velocity
+ void updateVelocity(const TVStack& dv);
+
+ //set constraints as projections
+ void setConstraints(const btContactSolverInfo& infoGlobal);
+
+ // update the projections and project the residual
+ void project(TVStack& r)
+ {
+ BT_PROFILE("project");
+ m_projection.project(r);
+ }
+
+ // perform precondition M^(-1) x = b
+ void precondition(const TVStack& x, TVStack& b)
+ {
+ m_preconditioner->operator()(x, b);
+ }
+
+ // reindex all the vertices
+ virtual void updateId()
+ {
+ size_t node_id = 0;
+ size_t face_id = 0;
+ m_nodes.clear();
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ psb->m_nodes[j].index = node_id;
+ m_nodes.push_back(&psb->m_nodes[j]);
+ ++node_id;
+ }
+ for (int j = 0; j < psb->m_faces.size(); ++j)
+ {
+ psb->m_faces[j].m_index = face_id;
+ ++face_id;
+ }
+ }
+ }
+
+ const btAlignedObjectArray<btSoftBody::Node*>* getIndices() const
+ {
+ return &m_nodes;
+ }
+
+ void setImplicit(bool implicit)
+ {
+ m_implicit = implicit;
+ }
+
+ // Calculate the total potential energy in the system
+ btScalar totalEnergy(btScalar dt);
+
+ void addLagrangeMultiplier(const TVStack& vec, TVStack& extended_vec)
+ {
+ extended_vec.resize(vec.size() + m_projection.m_lagrangeMultipliers.size());
+ for (int i = 0; i < vec.size(); ++i)
+ {
+ extended_vec[i] = vec[i];
+ }
+ int offset = vec.size();
+ for (int i = 0; i < m_projection.m_lagrangeMultipliers.size(); ++i)
+ {
+ extended_vec[offset + i].setZero();
+ }
+ }
+
+ void addLagrangeMultiplierRHS(const TVStack& residual, const TVStack& m_dv, TVStack& extended_residual)
+ {
+ extended_residual.resize(residual.size() + m_projection.m_lagrangeMultipliers.size());
+ for (int i = 0; i < residual.size(); ++i)
+ {
+ extended_residual[i] = residual[i];
+ }
+ int offset = residual.size();
+ for (int i = 0; i < m_projection.m_lagrangeMultipliers.size(); ++i)
+ {
+ const LagrangeMultiplier& lm = m_projection.m_lagrangeMultipliers[i];
+ extended_residual[offset + i].setZero();
+ for (int d = 0; d < lm.m_num_constraints; ++d)
+ {
+ for (int n = 0; n < lm.m_num_nodes; ++n)
+ {
+ extended_residual[offset + i][d] += lm.m_weights[n] * m_dv[lm.m_indices[n]].dot(lm.m_dirs[d]);
+ }
+ }
+ }
+ }
+
+ void calculateContactForce(const TVStack& dv, const TVStack& rhs, TVStack& f)
+ {
+ size_t counter = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ const btSoftBody::Node& node = psb->m_nodes[j];
+ f[counter] = (node.m_im == 0) ? btVector3(0, 0, 0) : dv[counter] / node.m_im;
+ ++counter;
+ }
+ }
+ for (int i = 0; i < m_lf.size(); ++i)
+ {
+ // add damping matrix
+ m_lf[i]->addScaledDampingForceDifferential(-m_dt, dv, f);
+ }
+ counter = 0;
+ for (; counter < f.size(); ++counter)
+ {
+ f[counter] = rhs[counter] - f[counter];
+ }
+ }
};
#endif /* btBackwardEulerObjective_h */
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.cpp b/thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.cpp
index 132699c54f..4b11fccecb 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.cpp
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.cpp
@@ -18,468 +18,489 @@
#include "btDeformableBodySolver.h"
#include "btSoftBodyInternals.h"
#include "LinearMath/btQuickprof.h"
-static const int kMaxConjugateGradientIterations = 50;
+static const int kMaxConjugateGradientIterations = 300;
btDeformableBodySolver::btDeformableBodySolver()
-: m_numNodes(0)
-, m_cg(kMaxConjugateGradientIterations)
-, m_cr(kMaxConjugateGradientIterations)
-, m_maxNewtonIterations(5)
-, m_newtonTolerance(1e-4)
-, m_lineSearch(false)
-, m_useProjection(false)
+ : m_numNodes(0), m_cg(kMaxConjugateGradientIterations), m_cr(kMaxConjugateGradientIterations), m_maxNewtonIterations(1), m_newtonTolerance(1e-4), m_lineSearch(false), m_useProjection(false)
{
- m_objective = new btDeformableBackwardEulerObjective(m_softBodies, m_backupVelocity);
+ m_objective = new btDeformableBackwardEulerObjective(m_softBodies, m_backupVelocity);
}
btDeformableBodySolver::~btDeformableBodySolver()
{
- delete m_objective;
+ delete m_objective;
}
void btDeformableBodySolver::solveDeformableConstraints(btScalar solverdt)
{
- BT_PROFILE("solveDeformableConstraints");
- if (!m_implicit)
- {
- m_objective->computeResidual(solverdt, m_residual);
- m_objective->applyDynamicFriction(m_residual);
- if (m_useProjection)
- {
- computeStep(m_dv, m_residual);
- }
- else
- {
- TVStack rhs, x;
- m_objective->addLagrangeMultiplierRHS(m_residual, m_dv, rhs);
- m_objective->addLagrangeMultiplier(m_dv, x);
- m_objective->m_preconditioner->reinitialize(true);
- computeStep(x, rhs);
- for (int i = 0; i<m_dv.size(); ++i)
- {
- m_dv[i] = x[i];
- }
- }
- updateVelocity();
- }
- else
- {
- for (int i = 0; i < m_maxNewtonIterations; ++i)
- {
- updateState();
- // add the inertia term in the residual
- int counter = 0;
- for (int k = 0; k < m_softBodies.size(); ++k)
- {
- btSoftBody* psb = m_softBodies[k];
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- if (psb->m_nodes[j].m_im > 0)
- {
- m_residual[counter] = (-1./psb->m_nodes[j].m_im) * m_dv[counter];
- }
- ++counter;
- }
- }
-
- m_objective->computeResidual(solverdt, m_residual);
- if (m_objective->computeNorm(m_residual) < m_newtonTolerance && i > 0)
- {
- break;
- }
- // todo xuchenhan@: this really only needs to be calculated once
- m_objective->applyDynamicFriction(m_residual);
- if (m_lineSearch)
- {
- btScalar inner_product = computeDescentStep(m_ddv,m_residual);
- btScalar alpha = 0.01, beta = 0.5; // Boyd & Vandenberghe suggested alpha between 0.01 and 0.3, beta between 0.1 to 0.8
- btScalar scale = 2;
- btScalar f0 = m_objective->totalEnergy(solverdt)+kineticEnergy(), f1, f2;
- backupDv();
- do {
- scale *= beta;
- if (scale < 1e-8) {
- return;
- }
- updateEnergy(scale);
- f1 = m_objective->totalEnergy(solverdt)+kineticEnergy();
- f2 = f0 - alpha * scale * inner_product;
- } while (!(f1 < f2+SIMD_EPSILON)); // if anything here is nan then the search continues
- revertDv();
- updateDv(scale);
- }
- else
- {
- computeStep(m_ddv, m_residual);
- updateDv();
- }
- for (int j = 0; j < m_numNodes; ++j)
- {
- m_ddv[j].setZero();
- m_residual[j].setZero();
- }
- }
- updateVelocity();
- }
+ BT_PROFILE("solveDeformableConstraints");
+ if (!m_implicit)
+ {
+ m_objective->computeResidual(solverdt, m_residual);
+ m_objective->applyDynamicFriction(m_residual);
+ if (m_useProjection)
+ {
+ computeStep(m_dv, m_residual);
+ }
+ else
+ {
+ TVStack rhs, x;
+ m_objective->addLagrangeMultiplierRHS(m_residual, m_dv, rhs);
+ m_objective->addLagrangeMultiplier(m_dv, x);
+ m_objective->m_preconditioner->reinitialize(true);
+ computeStep(x, rhs);
+ for (int i = 0; i < m_dv.size(); ++i)
+ {
+ m_dv[i] = x[i];
+ }
+ }
+ updateVelocity();
+ }
+ else
+ {
+ for (int i = 0; i < m_maxNewtonIterations; ++i)
+ {
+ updateState();
+ // add the inertia term in the residual
+ int counter = 0;
+ for (int k = 0; k < m_softBodies.size(); ++k)
+ {
+ btSoftBody* psb = m_softBodies[k];
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ if (psb->m_nodes[j].m_im > 0)
+ {
+ m_residual[counter] = (-1. / psb->m_nodes[j].m_im) * m_dv[counter];
+ }
+ ++counter;
+ }
+ }
+
+ m_objective->computeResidual(solverdt, m_residual);
+ if (m_objective->computeNorm(m_residual) < m_newtonTolerance && i > 0)
+ {
+ break;
+ }
+ // todo xuchenhan@: this really only needs to be calculated once
+ m_objective->applyDynamicFriction(m_residual);
+ if (m_lineSearch)
+ {
+ btScalar inner_product = computeDescentStep(m_ddv, m_residual);
+ btScalar alpha = 0.01, beta = 0.5; // Boyd & Vandenberghe suggested alpha between 0.01 and 0.3, beta between 0.1 to 0.8
+ btScalar scale = 2;
+ btScalar f0 = m_objective->totalEnergy(solverdt) + kineticEnergy(), f1, f2;
+ backupDv();
+ do
+ {
+ scale *= beta;
+ if (scale < 1e-8)
+ {
+ return;
+ }
+ updateEnergy(scale);
+ f1 = m_objective->totalEnergy(solverdt) + kineticEnergy();
+ f2 = f0 - alpha * scale * inner_product;
+ } while (!(f1 < f2 + SIMD_EPSILON)); // if anything here is nan then the search continues
+ revertDv();
+ updateDv(scale);
+ }
+ else
+ {
+ computeStep(m_ddv, m_residual);
+ updateDv();
+ }
+ for (int j = 0; j < m_numNodes; ++j)
+ {
+ m_ddv[j].setZero();
+ m_residual[j].setZero();
+ }
+ }
+ updateVelocity();
+ }
}
btScalar btDeformableBodySolver::kineticEnergy()
{
- btScalar ke = 0;
- for (int i = 0; i < m_softBodies.size();++i)
- {
- btSoftBody* psb = m_softBodies[i];
- for (int j = 0; j < psb->m_nodes.size();++j)
- {
- btSoftBody::Node& node = psb->m_nodes[j];
- if (node.m_im > 0)
- {
- ke += m_dv[node.index].length2() * 0.5 / node.m_im;
- }
- }
- }
- return ke;
+ btScalar ke = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ btSoftBody::Node& node = psb->m_nodes[j];
+ if (node.m_im > 0)
+ {
+ ke += m_dv[node.index].length2() * 0.5 / node.m_im;
+ }
+ }
+ }
+ return ke;
}
void btDeformableBodySolver::backupDv()
{
- m_backup_dv.resize(m_dv.size());
- for (int i = 0; i<m_backup_dv.size(); ++i)
- {
- m_backup_dv[i] = m_dv[i];
- }
+ m_backup_dv.resize(m_dv.size());
+ for (int i = 0; i < m_backup_dv.size(); ++i)
+ {
+ m_backup_dv[i] = m_dv[i];
+ }
}
void btDeformableBodySolver::revertDv()
{
- for (int i = 0; i<m_backup_dv.size(); ++i)
- {
- m_dv[i] = m_backup_dv[i];
- }
+ for (int i = 0; i < m_backup_dv.size(); ++i)
+ {
+ m_dv[i] = m_backup_dv[i];
+ }
}
void btDeformableBodySolver::updateEnergy(btScalar scale)
{
- for (int i = 0; i<m_dv.size(); ++i)
- {
- m_dv[i] = m_backup_dv[i] + scale * m_ddv[i];
- }
- updateState();
+ for (int i = 0; i < m_dv.size(); ++i)
+ {
+ m_dv[i] = m_backup_dv[i] + scale * m_ddv[i];
+ }
+ updateState();
}
-
btScalar btDeformableBodySolver::computeDescentStep(TVStack& ddv, const TVStack& residual, bool verbose)
{
- m_cg.solve(*m_objective, ddv, residual, false);
- btScalar inner_product = m_cg.dot(residual, m_ddv);
- btScalar res_norm = m_objective->computeNorm(residual);
- btScalar tol = 1e-5 * res_norm * m_objective->computeNorm(m_ddv);
- if (inner_product < -tol)
- {
- if (verbose)
- {
- std::cout << "Looking backwards!" << std::endl;
- }
- for (int i = 0; i < m_ddv.size();++i)
- {
- m_ddv[i] = -m_ddv[i];
- }
- inner_product = -inner_product;
- }
- else if (std::abs(inner_product) < tol)
- {
- if (verbose)
- {
- std::cout << "Gradient Descent!" << std::endl;
- }
- btScalar scale = m_objective->computeNorm(m_ddv) / res_norm;
- for (int i = 0; i < m_ddv.size();++i)
- {
- m_ddv[i] = scale * residual[i];
- }
- inner_product = scale * res_norm * res_norm;
- }
- return inner_product;
+ m_cg.solve(*m_objective, ddv, residual, false);
+ btScalar inner_product = m_cg.dot(residual, m_ddv);
+ btScalar res_norm = m_objective->computeNorm(residual);
+ btScalar tol = 1e-5 * res_norm * m_objective->computeNorm(m_ddv);
+ if (inner_product < -tol)
+ {
+ if (verbose)
+ {
+ std::cout << "Looking backwards!" << std::endl;
+ }
+ for (int i = 0; i < m_ddv.size(); ++i)
+ {
+ m_ddv[i] = -m_ddv[i];
+ }
+ inner_product = -inner_product;
+ }
+ else if (std::abs(inner_product) < tol)
+ {
+ if (verbose)
+ {
+ std::cout << "Gradient Descent!" << std::endl;
+ }
+ btScalar scale = m_objective->computeNorm(m_ddv) / res_norm;
+ for (int i = 0; i < m_ddv.size(); ++i)
+ {
+ m_ddv[i] = scale * residual[i];
+ }
+ inner_product = scale * res_norm * res_norm;
+ }
+ return inner_product;
}
void btDeformableBodySolver::updateState()
{
- updateVelocity();
- updateTempPosition();
+ updateVelocity();
+ updateTempPosition();
}
void btDeformableBodySolver::updateDv(btScalar scale)
{
- for (int i = 0; i < m_numNodes; ++i)
- {
- m_dv[i] += scale * m_ddv[i];
- }
+ for (int i = 0; i < m_numNodes; ++i)
+ {
+ m_dv[i] += scale * m_ddv[i];
+ }
}
void btDeformableBodySolver::computeStep(TVStack& ddv, const TVStack& residual)
{
- if (m_useProjection)
- m_cg.solve(*m_objective, ddv, residual, false);
- else
- m_cr.solve(*m_objective, ddv, residual, false);
+ if (m_useProjection)
+ m_cg.solve(*m_objective, ddv, residual, false);
+ else
+ m_cr.solve(*m_objective, ddv, residual, false);
}
-void btDeformableBodySolver::reinitialize(const btAlignedObjectArray<btSoftBody *>& softBodies, btScalar dt)
+void btDeformableBodySolver::reinitialize(const btAlignedObjectArray<btSoftBody*>& softBodies, btScalar dt)
{
- m_softBodies.copyFromArray(softBodies);
- bool nodeUpdated = updateNodes();
-
- if (nodeUpdated)
- {
- m_dv.resize(m_numNodes, btVector3(0,0,0));
- m_ddv.resize(m_numNodes, btVector3(0,0,0));
- m_residual.resize(m_numNodes, btVector3(0,0,0));
- m_backupVelocity.resize(m_numNodes, btVector3(0,0,0));
- }
-
- // need to setZero here as resize only set value for newly allocated items
- for (int i = 0; i < m_numNodes; ++i)
- {
- m_dv[i].setZero();
- m_ddv[i].setZero();
- m_residual[i].setZero();
- }
-
- m_dt = dt;
- m_objective->reinitialize(nodeUpdated, dt);
- updateSoftBodies();
-}
+ m_softBodies.copyFromArray(softBodies);
+ bool nodeUpdated = updateNodes();
-void btDeformableBodySolver::setConstraints(const btContactSolverInfo& infoGlobal)
-{
- BT_PROFILE("setConstraint");
- m_objective->setConstraints(infoGlobal);
+ if (nodeUpdated)
+ {
+ m_dv.resize(m_numNodes, btVector3(0, 0, 0));
+ m_ddv.resize(m_numNodes, btVector3(0, 0, 0));
+ m_residual.resize(m_numNodes, btVector3(0, 0, 0));
+ m_backupVelocity.resize(m_numNodes, btVector3(0, 0, 0));
+ }
+
+ // need to setZero here as resize only set value for newly allocated items
+ for (int i = 0; i < m_numNodes; ++i)
+ {
+ m_dv[i].setZero();
+ m_ddv[i].setZero();
+ m_residual[i].setZero();
+ }
+
+ if (dt > 0)
+ {
+ m_dt = dt;
+ }
+ m_objective->reinitialize(nodeUpdated, dt);
+ updateSoftBodies();
}
-btScalar btDeformableBodySolver::solveContactConstraints(btCollisionObject** deformableBodies,int numDeformableBodies, const btContactSolverInfo& infoGlobal)
+void btDeformableBodySolver::setConstraints(const btContactSolverInfo& infoGlobal)
{
- BT_PROFILE("solveContactConstraints");
- btScalar maxSquaredResidual = m_objective->m_projection.update(deformableBodies,numDeformableBodies, infoGlobal);
- return maxSquaredResidual;
+ BT_PROFILE("setConstraint");
+ m_objective->setConstraints(infoGlobal);
}
-void btDeformableBodySolver::splitImpulseSetup(const btContactSolverInfo& infoGlobal)
+btScalar btDeformableBodySolver::solveContactConstraints(btCollisionObject** deformableBodies, int numDeformableBodies, const btContactSolverInfo& infoGlobal)
{
- m_objective->m_projection.splitImpulseSetup(infoGlobal);
+ BT_PROFILE("solveContactConstraints");
+ btScalar maxSquaredResidual = m_objective->m_projection.update(deformableBodies, numDeformableBodies, infoGlobal);
+ return maxSquaredResidual;
}
void btDeformableBodySolver::updateVelocity()
{
- int counter = 0;
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- psb->m_maxSpeedSquared = 0;
- if (!psb->isActive())
- {
- counter += psb->m_nodes.size();
- continue;
- }
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- // set NaN to zero;
- if (m_dv[counter] != m_dv[counter])
- {
- m_dv[counter].setZero();
- }
- psb->m_nodes[j].m_v = m_backupVelocity[counter]+m_dv[counter];
- psb->m_maxSpeedSquared = btMax(psb->m_maxSpeedSquared, psb->m_nodes[j].m_v.length2());
- ++counter;
- }
- }
+ int counter = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ psb->m_maxSpeedSquared = 0;
+ if (!psb->isActive())
+ {
+ counter += psb->m_nodes.size();
+ continue;
+ }
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ // set NaN to zero;
+ if (m_dv[counter] != m_dv[counter])
+ {
+ m_dv[counter].setZero();
+ }
+ if (m_implicit)
+ {
+ psb->m_nodes[j].m_v = m_backupVelocity[counter] + m_dv[counter];
+ }
+ else
+ {
+ psb->m_nodes[j].m_v = m_backupVelocity[counter] + m_dv[counter] - psb->m_nodes[j].m_splitv;
+ }
+ psb->m_maxSpeedSquared = btMax(psb->m_maxSpeedSquared, psb->m_nodes[j].m_v.length2());
+ ++counter;
+ }
+ }
}
void btDeformableBodySolver::updateTempPosition()
{
- int counter = 0;
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- counter += psb->m_nodes.size();
- continue;
- }
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- psb->m_nodes[j].m_q = psb->m_nodes[j].m_x + m_dt * psb->m_nodes[j].m_v;
- ++counter;
- }
- psb->updateDeformation();
- }
+ int counter = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ counter += psb->m_nodes.size();
+ continue;
+ }
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ psb->m_nodes[j].m_q = psb->m_nodes[j].m_x + m_dt * (psb->m_nodes[j].m_v + psb->m_nodes[j].m_splitv);
+ ++counter;
+ }
+ psb->updateDeformation();
+ }
}
void btDeformableBodySolver::backupVelocity()
{
- int counter = 0;
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- m_backupVelocity[counter++] = psb->m_nodes[j].m_v;
- }
- }
+ int counter = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ m_backupVelocity[counter++] = psb->m_nodes[j].m_v;
+ }
+ }
}
void btDeformableBodySolver::setupDeformableSolve(bool implicit)
{
- int counter = 0;
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- counter += psb->m_nodes.size();
- continue;
- }
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- if (implicit)
- {
- if ((psb->m_nodes[j].m_v - m_backupVelocity[counter]).norm() < SIMD_EPSILON)
- m_dv[counter] = psb->m_nodes[j].m_v - m_backupVelocity[counter];
- else
- m_dv[counter] = psb->m_nodes[j].m_v - psb->m_nodes[j].m_vn;
- m_backupVelocity[counter] = psb->m_nodes[j].m_vn;
- }
- else
- {
- m_dv[counter] = psb->m_nodes[j].m_v - m_backupVelocity[counter];
- }
- psb->m_nodes[j].m_v = m_backupVelocity[counter];
- ++counter;
- }
- }
+ int counter = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ counter += psb->m_nodes.size();
+ continue;
+ }
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ if (implicit)
+ {
+ // setting the initial guess for newton, need m_dv = v_{n+1} - v_n for dofs that are in constraint.
+ if (psb->m_nodes[j].m_v == m_backupVelocity[counter])
+ m_dv[counter].setZero();
+ else
+ m_dv[counter] = psb->m_nodes[j].m_v - psb->m_nodes[j].m_vn;
+ m_backupVelocity[counter] = psb->m_nodes[j].m_vn;
+ }
+ else
+ {
+ m_dv[counter] = psb->m_nodes[j].m_v + psb->m_nodes[j].m_splitv - m_backupVelocity[counter];
+ }
+ psb->m_nodes[j].m_v = m_backupVelocity[counter];
+ ++counter;
+ }
+ }
}
void btDeformableBodySolver::revertVelocity()
{
- int counter = 0;
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- psb->m_nodes[j].m_v = m_backupVelocity[counter++];
- }
- }
+ int counter = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ psb->m_nodes[j].m_v = m_backupVelocity[counter++];
+ }
+ }
}
bool btDeformableBodySolver::updateNodes()
{
- int numNodes = 0;
- for (int i = 0; i < m_softBodies.size(); ++i)
- numNodes += m_softBodies[i]->m_nodes.size();
- if (numNodes != m_numNodes)
- {
- m_numNodes = numNodes;
- return true;
- }
- return false;
+ int numNodes = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ numNodes += m_softBodies[i]->m_nodes.size();
+ if (numNodes != m_numNodes)
+ {
+ m_numNodes = numNodes;
+ return true;
+ }
+ return false;
}
-
void btDeformableBodySolver::predictMotion(btScalar solverdt)
{
- // apply explicit forces to velocity
- m_objective->applyExplicitForce(m_residual);
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody *psb = m_softBodies[i];
-
- if (psb->isActive())
- {
- // predict motion for collision detection
- predictDeformableMotion(psb, solverdt);
- }
- }
+ // apply explicit forces to velocity
+ if (m_implicit)
+ {
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (psb->isActive())
+ {
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ psb->m_nodes[j].m_q = psb->m_nodes[j].m_x + psb->m_nodes[j].m_v * solverdt;
+ }
+ }
+ }
+ }
+ m_objective->applyExplicitForce(m_residual);
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+
+ if (psb->isActive())
+ {
+ // predict motion for collision detection
+ predictDeformableMotion(psb, solverdt);
+ }
+ }
}
void btDeformableBodySolver::predictDeformableMotion(btSoftBody* psb, btScalar dt)
{
- BT_PROFILE("btDeformableBodySolver::predictDeformableMotion");
- int i, ni;
-
- /* Update */
- if (psb->m_bUpdateRtCst)
- {
- psb->m_bUpdateRtCst = false;
- psb->updateConstants();
- psb->m_fdbvt.clear();
- if (psb->m_cfg.collisions & btSoftBody::fCollision::SDF_RD)
- {
- psb->initializeFaceTree();
- }
- }
-
- /* Prepare */
- psb->m_sst.sdt = dt * psb->m_cfg.timescale;
- psb->m_sst.isdt = 1 / psb->m_sst.sdt;
- psb->m_sst.velmrg = psb->m_sst.sdt * 3;
- psb->m_sst.radmrg = psb->getCollisionShape()->getMargin();
- psb->m_sst.updmrg = psb->m_sst.radmrg * (btScalar)0.25;
- /* Bounds */
- psb->updateBounds();
-
- /* Integrate */
- // do not allow particles to move more than the bounding box size
- btScalar max_v = (psb->m_bounds[1]-psb->m_bounds[0]).norm() / dt;
- for (i = 0, ni = psb->m_nodes.size(); i < ni; ++i)
- {
- btSoftBody::Node& n = psb->m_nodes[i];
- // apply drag
- n.m_v *= (1 - psb->m_cfg.drag);
- // scale velocity back
- if (n.m_v.norm() > max_v)
- {
- n.m_v.safeNormalize();
- n.m_v *= max_v;
- }
- n.m_q = n.m_x + n.m_v * dt;
- n.m_penetration = 0;
- }
-
- /* Nodes */
- psb->updateNodeTree(true, true);
- if (!psb->m_fdbvt.empty())
- {
- psb->updateFaceTree(true, true);
- }
- /* Clear contacts */
- psb->m_nodeRigidContacts.resize(0);
- psb->m_faceRigidContacts.resize(0);
- psb->m_faceNodeContacts.resize(0);
- /* Optimize dbvt's */
-// psb->m_ndbvt.optimizeIncremental(1);
-// psb->m_fdbvt.optimizeIncremental(1);
-}
+ BT_PROFILE("btDeformableBodySolver::predictDeformableMotion");
+ int i, ni;
+
+ /* Update */
+ if (psb->m_bUpdateRtCst)
+ {
+ psb->m_bUpdateRtCst = false;
+ psb->updateConstants();
+ psb->m_fdbvt.clear();
+ if (psb->m_cfg.collisions & btSoftBody::fCollision::SDF_RD)
+ {
+ psb->initializeFaceTree();
+ }
+ }
+ /* Prepare */
+ psb->m_sst.sdt = dt * psb->m_cfg.timescale;
+ psb->m_sst.isdt = 1 / psb->m_sst.sdt;
+ psb->m_sst.velmrg = psb->m_sst.sdt * 3;
+ psb->m_sst.radmrg = psb->getCollisionShape()->getMargin();
+ psb->m_sst.updmrg = psb->m_sst.radmrg * (btScalar)0.25;
+ /* Bounds */
+ psb->updateBounds();
+
+ /* Integrate */
+ // do not allow particles to move more than the bounding box size
+ btScalar max_v = (psb->m_bounds[1] - psb->m_bounds[0]).norm() / dt;
+ for (i = 0, ni = psb->m_nodes.size(); i < ni; ++i)
+ {
+ btSoftBody::Node& n = psb->m_nodes[i];
+ // apply drag
+ n.m_v *= (1 - psb->m_cfg.drag);
+ // scale velocity back
+ if (m_implicit)
+ {
+ n.m_q = n.m_x;
+ }
+ else
+ {
+ if (n.m_v.norm() > max_v)
+ {
+ n.m_v.safeNormalize();
+ n.m_v *= max_v;
+ }
+ n.m_q = n.m_x + n.m_v * dt;
+ }
+ n.m_splitv.setZero();
+ n.m_constrained = false;
+ }
+
+ /* Nodes */
+ psb->updateNodeTree(true, true);
+ if (!psb->m_fdbvt.empty())
+ {
+ psb->updateFaceTree(true, true);
+ }
+ /* Clear contacts */
+ psb->m_nodeRigidContacts.resize(0);
+ psb->m_faceRigidContacts.resize(0);
+ psb->m_faceNodeContacts.resize(0);
+ /* Optimize dbvt's */
+ // psb->m_ndbvt.optimizeIncremental(1);
+ // psb->m_fdbvt.optimizeIncremental(1);
+}
void btDeformableBodySolver::updateSoftBodies()
{
- BT_PROFILE("updateSoftBodies");
- for (int i = 0; i < m_softBodies.size(); i++)
- {
- btSoftBody *psb = (btSoftBody *)m_softBodies[i];
- if (psb->isActive())
- {
- psb->updateNormals();
- }
- }
+ BT_PROFILE("updateSoftBodies");
+ for (int i = 0; i < m_softBodies.size(); i++)
+ {
+ btSoftBody* psb = (btSoftBody*)m_softBodies[i];
+ if (psb->isActive())
+ {
+ psb->updateNormals();
+ }
+ }
}
void btDeformableBodySolver::setImplicit(bool implicit)
{
- m_implicit = implicit;
- m_objective->setImplicit(implicit);
+ m_implicit = implicit;
+ m_objective->setImplicit(implicit);
}
void btDeformableBodySolver::setLineSearch(bool lineSearch)
{
- m_lineSearch = lineSearch;
+ m_lineSearch = lineSearch;
}
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.h b/thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.h
index d4e5f4c603..ae674d6e89 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.h
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.h
@@ -16,7 +16,6 @@
#ifndef BT_DEFORMABLE_BODY_SOLVERS_H
#define BT_DEFORMABLE_BODY_SOLVERS_H
-
#include "btSoftBodySolvers.h"
#include "btDeformableBackwardEulerObjective.h"
#include "btDeformableMultiBodyDynamicsWorld.h"
@@ -30,133 +29,132 @@ class btDeformableMultiBodyDynamicsWorld;
class btDeformableBodySolver : public btSoftBodySolver
{
- typedef btAlignedObjectArray<btVector3> TVStack;
+ typedef btAlignedObjectArray<btVector3> TVStack;
+
protected:
- int m_numNodes; // total number of deformable body nodes
- TVStack m_dv; // v_{n+1} - v_n
- TVStack m_backup_dv; // backed up dv
- TVStack m_ddv; // incremental dv
- TVStack m_residual; // rhs of the linear solve
- btAlignedObjectArray<btSoftBody *> m_softBodies; // all deformable bodies
- TVStack m_backupVelocity; // backed up v, equals v_n for implicit, equals v_{n+1}^* for explicit
- btScalar m_dt; // dt
- btConjugateGradient<btDeformableBackwardEulerObjective> m_cg; // CG solver
- btConjugateResidual<btDeformableBackwardEulerObjective> m_cr; // CR solver
- bool m_implicit; // use implicit scheme if true, explicit scheme if false
- int m_maxNewtonIterations; // max number of newton iterations
- btScalar m_newtonTolerance; // stop newton iterations if f(x) < m_newtonTolerance
- bool m_lineSearch; // If true, use newton's method with line search under implicit scheme
+ int m_numNodes; // total number of deformable body nodes
+ TVStack m_dv; // v_{n+1} - v_n
+ TVStack m_backup_dv; // backed up dv
+ TVStack m_ddv; // incremental dv
+ TVStack m_residual; // rhs of the linear solve
+ btAlignedObjectArray<btSoftBody*> m_softBodies; // all deformable bodies
+ TVStack m_backupVelocity; // backed up v, equals v_n for implicit, equals v_{n+1}^* for explicit
+ btScalar m_dt; // dt
+ btConjugateGradient<btDeformableBackwardEulerObjective> m_cg; // CG solver
+ btConjugateResidual<btDeformableBackwardEulerObjective> m_cr; // CR solver
+ bool m_implicit; // use implicit scheme if true, explicit scheme if false
+ int m_maxNewtonIterations; // max number of newton iterations
+ btScalar m_newtonTolerance; // stop newton iterations if f(x) < m_newtonTolerance
+ bool m_lineSearch; // If true, use newton's method with line search under implicit scheme
public:
- // handles data related to objective function
- btDeformableBackwardEulerObjective* m_objective;
- bool m_useProjection;
-
- btDeformableBodySolver();
-
- virtual ~btDeformableBodySolver();
-
- virtual SolverTypes getSolverType() const
- {
- return DEFORMABLE_SOLVER;
- }
-
- // update soft body normals
- virtual void updateSoftBodies();
-
- virtual btScalar solveContactConstraints(btCollisionObject** deformableBodies,int numDeformableBodies, const btContactSolverInfo& infoGlobal);
-
- // solve the momentum equation
- virtual void solveDeformableConstraints(btScalar solverdt);
-
- // set up the position error in split impulse
- void splitImpulseSetup(const btContactSolverInfo& infoGlobal);
-
- // resize/clear data structures
- void reinitialize(const btAlignedObjectArray<btSoftBody *>& softBodies, btScalar dt);
-
- // set up contact constraints
- void setConstraints(const btContactSolverInfo& infoGlobal);
-
- // add in elastic forces and gravity to obtain v_{n+1}^* and calls predictDeformableMotion
- virtual void predictMotion(btScalar solverdt);
-
- // move to temporary position x_{n+1}^* = x_n + dt * v_{n+1}^*
- // x_{n+1}^* is stored in m_q
- void predictDeformableMotion(btSoftBody* psb, btScalar dt);
-
- // save the current velocity to m_backupVelocity
- void backupVelocity();
-
- // set m_dv and m_backupVelocity to desired value to prepare for momentum solve
- void setupDeformableSolve(bool implicit);
-
- // set the current velocity to that backed up in m_backupVelocity
- void revertVelocity();
-
- // set velocity to m_dv + m_backupVelocity
- void updateVelocity();
-
- // update the node count
- bool updateNodes();
-
- // calculate the change in dv resulting from the momentum solve
- void computeStep(TVStack& ddv, const TVStack& residual);
-
- // calculate the change in dv resulting from the momentum solve when line search is turned on
- btScalar computeDescentStep(TVStack& ddv, const TVStack& residual, bool verbose=false);
-
- virtual void copySoftBodyToVertexBuffer(const btSoftBody *const softBody, btVertexBufferDescriptor *vertexBuffer) {}
-
- // process collision between deformable and rigid
- virtual void processCollision(btSoftBody * softBody, const btCollisionObjectWrapper * collisionObjectWrap)
- {
- softBody->defaultCollisionHandler(collisionObjectWrap);
- }
-
- // process collision between deformable and deformable
- virtual void processCollision(btSoftBody * softBody, btSoftBody * otherSoftBody) {
- softBody->defaultCollisionHandler(otherSoftBody);
- }
-
- // If true, implicit time stepping scheme is used.
- // Otherwise, explicit time stepping scheme is used
- void setImplicit(bool implicit);
-
- // If true, newton's method with line search is used when implicit time stepping scheme is turned on
- void setLineSearch(bool lineSearch);
-
- // set temporary position x^* = x_n + dt * v
- // update the deformation gradient at position x^*
- void updateState();
-
- // set dv = dv + scale * ddv
- void updateDv(btScalar scale = 1);
-
- // set temporary position x^* = x_n + dt * v^*
- void updateTempPosition();
-
- // save the current dv to m_backup_dv;
- void backupDv();
-
- // set dv to the backed-up value
- void revertDv();
-
- // set dv = dv + scale * ddv
- // set v^* = v_n + dv
- // set temporary position x^* = x_n + dt * v^*
- // update the deformation gradient at position x^*
- void updateEnergy(btScalar scale);
-
- // calculates the appropriately scaled kinetic energy in the system, which is
- // 1/2 * dv^T * M * dv
- // used in line search
- btScalar kineticEnergy();
-
- // unused functions
- virtual void optimize(btAlignedObjectArray<btSoftBody *> &softBodies, bool forceUpdate = false){}
- virtual void solveConstraints(btScalar dt){}
- virtual bool checkInitialized(){return true;}
- virtual void copyBackToSoftBodies(bool bMove = true) {}
+ // handles data related to objective function
+ btDeformableBackwardEulerObjective* m_objective;
+ bool m_useProjection;
+
+ btDeformableBodySolver();
+
+ virtual ~btDeformableBodySolver();
+
+ virtual SolverTypes getSolverType() const
+ {
+ return DEFORMABLE_SOLVER;
+ }
+
+ // update soft body normals
+ virtual void updateSoftBodies();
+
+ virtual btScalar solveContactConstraints(btCollisionObject** deformableBodies, int numDeformableBodies, const btContactSolverInfo& infoGlobal);
+
+ // solve the momentum equation
+ virtual void solveDeformableConstraints(btScalar solverdt);
+
+ // resize/clear data structures
+ void reinitialize(const btAlignedObjectArray<btSoftBody*>& softBodies, btScalar dt);
+
+ // set up contact constraints
+ void setConstraints(const btContactSolverInfo& infoGlobal);
+
+ // add in elastic forces and gravity to obtain v_{n+1}^* and calls predictDeformableMotion
+ virtual void predictMotion(btScalar solverdt);
+
+ // move to temporary position x_{n+1}^* = x_n + dt * v_{n+1}^*
+ // x_{n+1}^* is stored in m_q
+ void predictDeformableMotion(btSoftBody* psb, btScalar dt);
+
+ // save the current velocity to m_backupVelocity
+ void backupVelocity();
+
+ // set m_dv and m_backupVelocity to desired value to prepare for momentum solve
+ void setupDeformableSolve(bool implicit);
+
+ // set the current velocity to that backed up in m_backupVelocity
+ void revertVelocity();
+
+ // set velocity to m_dv + m_backupVelocity
+ void updateVelocity();
+
+ // update the node count
+ bool updateNodes();
+
+ // calculate the change in dv resulting from the momentum solve
+ void computeStep(TVStack& ddv, const TVStack& residual);
+
+ // calculate the change in dv resulting from the momentum solve when line search is turned on
+ btScalar computeDescentStep(TVStack& ddv, const TVStack& residual, bool verbose = false);
+
+ virtual void copySoftBodyToVertexBuffer(const btSoftBody* const softBody, btVertexBufferDescriptor* vertexBuffer) {}
+
+ // process collision between deformable and rigid
+ virtual void processCollision(btSoftBody* softBody, const btCollisionObjectWrapper* collisionObjectWrap)
+ {
+ softBody->defaultCollisionHandler(collisionObjectWrap);
+ }
+
+ // process collision between deformable and deformable
+ virtual void processCollision(btSoftBody* softBody, btSoftBody* otherSoftBody)
+ {
+ softBody->defaultCollisionHandler(otherSoftBody);
+ }
+
+ // If true, implicit time stepping scheme is used.
+ // Otherwise, explicit time stepping scheme is used
+ void setImplicit(bool implicit);
+
+ // If true, newton's method with line search is used when implicit time stepping scheme is turned on
+ void setLineSearch(bool lineSearch);
+
+ // set temporary position x^* = x_n + dt * v
+ // update the deformation gradient at position x^*
+ void updateState();
+
+ // set dv = dv + scale * ddv
+ void updateDv(btScalar scale = 1);
+
+ // set temporary position x^* = x_n + dt * v^*
+ void updateTempPosition();
+
+ // save the current dv to m_backup_dv;
+ void backupDv();
+
+ // set dv to the backed-up value
+ void revertDv();
+
+ // set dv = dv + scale * ddv
+ // set v^* = v_n + dv
+ // set temporary position x^* = x_n + dt * v^*
+ // update the deformation gradient at position x^*
+ void updateEnergy(btScalar scale);
+
+ // calculates the appropriately scaled kinetic energy in the system, which is
+ // 1/2 * dv^T * M * dv
+ // used in line search
+ btScalar kineticEnergy();
+
+ // unused functions
+ virtual void optimize(btAlignedObjectArray<btSoftBody*>& softBodies, bool forceUpdate = false) {}
+ virtual void solveConstraints(btScalar dt) {}
+ virtual bool checkInitialized() { return true; }
+ virtual void copyBackToSoftBodies(bool bMove = true) {}
};
#endif /* btDeformableBodySolver_h */
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.cpp b/thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.cpp
index 2864446de6..09398d79a5 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.cpp
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.cpp
@@ -16,387 +16,503 @@
#include "btDeformableContactConstraint.h"
/* ================ Deformable Node Anchor =================== */
btDeformableNodeAnchorConstraint::btDeformableNodeAnchorConstraint(const btSoftBody::DeformableNodeRigidAnchor& a, const btContactSolverInfo& infoGlobal)
-: m_anchor(&a)
-, btDeformableContactConstraint(a.m_cti.m_normal, infoGlobal)
+ : m_anchor(&a), btDeformableContactConstraint(a.m_cti.m_normal, infoGlobal)
{
}
btDeformableNodeAnchorConstraint::btDeformableNodeAnchorConstraint(const btDeformableNodeAnchorConstraint& other)
-: m_anchor(other.m_anchor)
-, btDeformableContactConstraint(other)
+ : m_anchor(other.m_anchor), btDeformableContactConstraint(other)
{
}
btVector3 btDeformableNodeAnchorConstraint::getVa() const
{
- const btSoftBody::sCti& cti = m_anchor->m_cti;
- btVector3 va(0, 0, 0);
- if (cti.m_colObj->hasContactResponse())
- {
- btRigidBody* rigidCol = 0;
- btMultiBodyLinkCollider* multibodyLinkCol = 0;
-
- // grab the velocity of the rigid body
- if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY)
- {
- rigidCol = (btRigidBody*)btRigidBody::upcast(cti.m_colObj);
- va = rigidCol ? (rigidCol->getVelocityInLocalPoint(m_anchor->m_c1)) : btVector3(0, 0, 0);
- }
- else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK)
- {
- multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj);
- if (multibodyLinkCol)
- {
- const int ndof = multibodyLinkCol->m_multiBody->getNumDofs() + 6;
- const btScalar* J_n = &m_anchor->jacobianData_normal.m_jacobians[0];
- const btScalar* J_t1 = &m_anchor->jacobianData_t1.m_jacobians[0];
- const btScalar* J_t2 = &m_anchor->jacobianData_t2.m_jacobians[0];
- const btScalar* local_v = multibodyLinkCol->m_multiBody->getVelocityVector();
- const btScalar* local_dv = multibodyLinkCol->m_multiBody->getDeltaVelocityVector();
- // add in the normal component of the va
- btScalar vel = 0.0;
- for (int k = 0; k < ndof; ++k)
- {
- vel += (local_v[k]+local_dv[k]) * J_n[k];
- }
- va = cti.m_normal * vel;
- // add in the tangential components of the va
- vel = 0.0;
- for (int k = 0; k < ndof; ++k)
- {
- vel += (local_v[k]+local_dv[k]) * J_t1[k];
- }
- va += m_anchor->t1 * vel;
- vel = 0.0;
- for (int k = 0; k < ndof; ++k)
- {
- vel += (local_v[k]+local_dv[k]) * J_t2[k];
- }
- va += m_anchor->t2 * vel;
- }
- }
- }
- return va;
+ const btSoftBody::sCti& cti = m_anchor->m_cti;
+ btVector3 va(0, 0, 0);
+ if (cti.m_colObj->hasContactResponse())
+ {
+ btRigidBody* rigidCol = 0;
+ btMultiBodyLinkCollider* multibodyLinkCol = 0;
+
+ // grab the velocity of the rigid body
+ if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY)
+ {
+ rigidCol = (btRigidBody*)btRigidBody::upcast(cti.m_colObj);
+ va = rigidCol ? (rigidCol->getVelocityInLocalPoint(m_anchor->m_c1)) : btVector3(0, 0, 0);
+ }
+ else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK)
+ {
+ multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj);
+ if (multibodyLinkCol)
+ {
+ const int ndof = multibodyLinkCol->m_multiBody->getNumDofs() + 6;
+ const btScalar* J_n = &m_anchor->jacobianData_normal.m_jacobians[0];
+ const btScalar* J_t1 = &m_anchor->jacobianData_t1.m_jacobians[0];
+ const btScalar* J_t2 = &m_anchor->jacobianData_t2.m_jacobians[0];
+ const btScalar* local_v = multibodyLinkCol->m_multiBody->getVelocityVector();
+ const btScalar* local_dv = multibodyLinkCol->m_multiBody->getDeltaVelocityVector();
+ // add in the normal component of the va
+ btScalar vel = 0.0;
+ for (int k = 0; k < ndof; ++k)
+ {
+ vel += (local_v[k] + local_dv[k]) * J_n[k];
+ }
+ va = cti.m_normal * vel;
+ // add in the tangential components of the va
+ vel = 0.0;
+ for (int k = 0; k < ndof; ++k)
+ {
+ vel += (local_v[k] + local_dv[k]) * J_t1[k];
+ }
+ va += m_anchor->t1 * vel;
+ vel = 0.0;
+ for (int k = 0; k < ndof; ++k)
+ {
+ vel += (local_v[k] + local_dv[k]) * J_t2[k];
+ }
+ va += m_anchor->t2 * vel;
+ }
+ }
+ }
+ return va;
}
btScalar btDeformableNodeAnchorConstraint::solveConstraint(const btContactSolverInfo& infoGlobal)
{
- const btSoftBody::sCti& cti = m_anchor->m_cti;
- btVector3 va = getVa();
- btVector3 vb = getVb();
- btVector3 vr = (vb - va);
- // + (m_anchor->m_node->m_x - cti.m_colObj->getWorldTransform() * m_anchor->m_local) * 10.0
- const btScalar dn = btDot(vr, vr);
- // dn is the normal component of velocity diffrerence. Approximates the residual. // todo xuchenhan@: this prob needs to be scaled by dt
- btScalar residualSquare = dn*dn;
- btVector3 impulse = m_anchor->m_c0 * vr;
- // apply impulse to deformable nodes involved and change their velocities
- applyImpulse(impulse);
-
- // apply impulse to the rigid/multibodies involved and change their velocities
- if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY)
- {
- btRigidBody* rigidCol = 0;
- rigidCol = (btRigidBody*)btRigidBody::upcast(cti.m_colObj);
- if (rigidCol)
- {
- rigidCol->applyImpulse(impulse, m_anchor->m_c1);
- }
- }
- else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK)
- {
- btMultiBodyLinkCollider* multibodyLinkCol = 0;
- multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj);
- if (multibodyLinkCol)
- {
- const btScalar* deltaV_normal = &m_anchor->jacobianData_normal.m_deltaVelocitiesUnitImpulse[0];
- // apply normal component of the impulse
- multibodyLinkCol->m_multiBody->applyDeltaVeeMultiDof2(deltaV_normal, impulse.dot(cti.m_normal));
- // apply tangential component of the impulse
- const btScalar* deltaV_t1 = &m_anchor->jacobianData_t1.m_deltaVelocitiesUnitImpulse[0];
- multibodyLinkCol->m_multiBody->applyDeltaVeeMultiDof2(deltaV_t1, impulse.dot(m_anchor->t1));
- const btScalar* deltaV_t2 = &m_anchor->jacobianData_t2.m_deltaVelocitiesUnitImpulse[0];
- multibodyLinkCol->m_multiBody->applyDeltaVeeMultiDof2(deltaV_t2, impulse.dot(m_anchor->t2));
- }
- }
- return residualSquare;
+ const btSoftBody::sCti& cti = m_anchor->m_cti;
+ btVector3 va = getVa();
+ btVector3 vb = getVb();
+ btVector3 vr = (vb - va);
+ // + (m_anchor->m_node->m_x - cti.m_colObj->getWorldTransform() * m_anchor->m_local) * 10.0
+ const btScalar dn = btDot(vr, vr);
+ // dn is the normal component of velocity diffrerence. Approximates the residual. // todo xuchenhan@: this prob needs to be scaled by dt
+ btScalar residualSquare = dn * dn;
+ btVector3 impulse = m_anchor->m_c0 * vr;
+ // apply impulse to deformable nodes involved and change their velocities
+ applyImpulse(impulse);
+
+ // apply impulse to the rigid/multibodies involved and change their velocities
+ if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY)
+ {
+ btRigidBody* rigidCol = 0;
+ rigidCol = (btRigidBody*)btRigidBody::upcast(cti.m_colObj);
+ if (rigidCol)
+ {
+ rigidCol->applyImpulse(impulse, m_anchor->m_c1);
+ }
+ }
+ else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK)
+ {
+ btMultiBodyLinkCollider* multibodyLinkCol = 0;
+ multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj);
+ if (multibodyLinkCol)
+ {
+ const btScalar* deltaV_normal = &m_anchor->jacobianData_normal.m_deltaVelocitiesUnitImpulse[0];
+ // apply normal component of the impulse
+ multibodyLinkCol->m_multiBody->applyDeltaVeeMultiDof2(deltaV_normal, impulse.dot(cti.m_normal));
+ // apply tangential component of the impulse
+ const btScalar* deltaV_t1 = &m_anchor->jacobianData_t1.m_deltaVelocitiesUnitImpulse[0];
+ multibodyLinkCol->m_multiBody->applyDeltaVeeMultiDof2(deltaV_t1, impulse.dot(m_anchor->t1));
+ const btScalar* deltaV_t2 = &m_anchor->jacobianData_t2.m_deltaVelocitiesUnitImpulse[0];
+ multibodyLinkCol->m_multiBody->applyDeltaVeeMultiDof2(deltaV_t2, impulse.dot(m_anchor->t2));
+ }
+ }
+ return residualSquare;
}
btVector3 btDeformableNodeAnchorConstraint::getVb() const
{
- return m_anchor->m_node->m_v;
+ return m_anchor->m_node->m_v;
}
void btDeformableNodeAnchorConstraint::applyImpulse(const btVector3& impulse)
{
- btVector3 dv = impulse * m_anchor->m_c2;
- m_anchor->m_node->m_v -= dv;
+ btVector3 dv = impulse * m_anchor->m_c2;
+ m_anchor->m_node->m_v -= dv;
}
/* ================ Deformable vs. Rigid =================== */
btDeformableRigidContactConstraint::btDeformableRigidContactConstraint(const btSoftBody::DeformableRigidContact& c, const btContactSolverInfo& infoGlobal)
-: m_contact(&c)
-, btDeformableContactConstraint(c.m_cti.m_normal, infoGlobal)
+ : m_contact(&c), btDeformableContactConstraint(c.m_cti.m_normal, infoGlobal)
{
- m_total_normal_dv.setZero();
- m_total_tangent_dv.setZero();
- // The magnitude of penetration is the depth of penetration.
- m_penetration = c.m_cti.m_offset;
-// m_penetration = btMin(btScalar(0),c.m_cti.m_offset);
+ m_total_normal_dv.setZero();
+ m_total_tangent_dv.setZero();
+ // The magnitude of penetration is the depth of penetration.
+ m_penetration = c.m_cti.m_offset;
+ m_total_split_impulse = 0;
+ m_binding = false;
}
btDeformableRigidContactConstraint::btDeformableRigidContactConstraint(const btDeformableRigidContactConstraint& other)
-: m_contact(other.m_contact)
-, btDeformableContactConstraint(other)
-, m_penetration(other.m_penetration)
+ : m_contact(other.m_contact), btDeformableContactConstraint(other), m_penetration(other.m_penetration), m_total_split_impulse(other.m_total_split_impulse), m_binding(other.m_binding)
{
- m_total_normal_dv = other.m_total_normal_dv;
- m_total_tangent_dv = other.m_total_tangent_dv;
+ m_total_normal_dv = other.m_total_normal_dv;
+ m_total_tangent_dv = other.m_total_tangent_dv;
}
-
btVector3 btDeformableRigidContactConstraint::getVa() const
{
- const btSoftBody::sCti& cti = m_contact->m_cti;
- btVector3 va(0, 0, 0);
- if (cti.m_colObj->hasContactResponse())
- {
- btRigidBody* rigidCol = 0;
- btMultiBodyLinkCollider* multibodyLinkCol = 0;
-
- // grab the velocity of the rigid body
- if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY)
- {
- rigidCol = (btRigidBody*)btRigidBody::upcast(cti.m_colObj);
- va = rigidCol ? (rigidCol->getVelocityInLocalPoint(m_contact->m_c1)) : btVector3(0, 0, 0);
- }
- else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK)
- {
- multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj);
- if (multibodyLinkCol)
- {
- const int ndof = multibodyLinkCol->m_multiBody->getNumDofs() + 6;
- const btScalar* J_n = &m_contact->jacobianData_normal.m_jacobians[0];
- const btScalar* J_t1 = &m_contact->jacobianData_t1.m_jacobians[0];
- const btScalar* J_t2 = &m_contact->jacobianData_t2.m_jacobians[0];
- const btScalar* local_v = multibodyLinkCol->m_multiBody->getVelocityVector();
- const btScalar* local_dv = multibodyLinkCol->m_multiBody->getDeltaVelocityVector();
- // add in the normal component of the va
- btScalar vel = 0.0;
- for (int k = 0; k < ndof; ++k)
- {
- vel += (local_v[k]+local_dv[k]) * J_n[k];
- }
- va = cti.m_normal * vel;
- // add in the tangential components of the va
- vel = 0.0;
- for (int k = 0; k < ndof; ++k)
- {
- vel += (local_v[k]+local_dv[k]) * J_t1[k];
- }
- va += m_contact->t1 * vel;
- vel = 0.0;
- for (int k = 0; k < ndof; ++k)
- {
- vel += (local_v[k]+local_dv[k]) * J_t2[k];
- }
- va += m_contact->t2 * vel;
- }
- }
- }
- return va;
+ const btSoftBody::sCti& cti = m_contact->m_cti;
+ btVector3 va(0, 0, 0);
+ if (cti.m_colObj->hasContactResponse())
+ {
+ btRigidBody* rigidCol = 0;
+ btMultiBodyLinkCollider* multibodyLinkCol = 0;
+
+ // grab the velocity of the rigid body
+ if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY)
+ {
+ rigidCol = (btRigidBody*)btRigidBody::upcast(cti.m_colObj);
+ va = rigidCol ? (rigidCol->getVelocityInLocalPoint(m_contact->m_c1)) : btVector3(0, 0, 0);
+ }
+ else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK)
+ {
+ multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj);
+ if (multibodyLinkCol)
+ {
+ const int ndof = multibodyLinkCol->m_multiBody->getNumDofs() + 6;
+ const btScalar* J_n = &m_contact->jacobianData_normal.m_jacobians[0];
+ const btScalar* J_t1 = &m_contact->jacobianData_t1.m_jacobians[0];
+ const btScalar* J_t2 = &m_contact->jacobianData_t2.m_jacobians[0];
+ const btScalar* local_v = multibodyLinkCol->m_multiBody->getVelocityVector();
+ const btScalar* local_dv = multibodyLinkCol->m_multiBody->getDeltaVelocityVector();
+ // add in the normal component of the va
+ btScalar vel = 0.0;
+ for (int k = 0; k < ndof; ++k)
+ {
+ vel += (local_v[k] + local_dv[k]) * J_n[k];
+ }
+ va = cti.m_normal * vel;
+ // add in the tangential components of the va
+ vel = 0.0;
+ for (int k = 0; k < ndof; ++k)
+ {
+ vel += (local_v[k] + local_dv[k]) * J_t1[k];
+ }
+ va += m_contact->t1 * vel;
+ vel = 0.0;
+ for (int k = 0; k < ndof; ++k)
+ {
+ vel += (local_v[k] + local_dv[k]) * J_t2[k];
+ }
+ va += m_contact->t2 * vel;
+ }
+ }
+ }
+ return va;
+}
+
+btVector3 btDeformableRigidContactConstraint::getSplitVa() const
+{
+ const btSoftBody::sCti& cti = m_contact->m_cti;
+ btVector3 va(0, 0, 0);
+ if (cti.m_colObj->hasContactResponse())
+ {
+ btRigidBody* rigidCol = 0;
+ btMultiBodyLinkCollider* multibodyLinkCol = 0;
+
+ // grab the velocity of the rigid body
+ if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY)
+ {
+ rigidCol = (btRigidBody*)btRigidBody::upcast(cti.m_colObj);
+ va = rigidCol ? (rigidCol->getPushVelocityInLocalPoint(m_contact->m_c1)) : btVector3(0, 0, 0);
+ }
+ else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK)
+ {
+ multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj);
+ if (multibodyLinkCol)
+ {
+ const int ndof = multibodyLinkCol->m_multiBody->getNumDofs() + 6;
+ const btScalar* J_n = &m_contact->jacobianData_normal.m_jacobians[0];
+ const btScalar* J_t1 = &m_contact->jacobianData_t1.m_jacobians[0];
+ const btScalar* J_t2 = &m_contact->jacobianData_t2.m_jacobians[0];
+ const btScalar* local_split_v = multibodyLinkCol->m_multiBody->getSplitVelocityVector();
+ // add in the normal component of the va
+ btScalar vel = 0.0;
+ for (int k = 0; k < ndof; ++k)
+ {
+ vel += local_split_v[k] * J_n[k];
+ }
+ va = cti.m_normal * vel;
+ // add in the tangential components of the va
+ vel = 0.0;
+ for (int k = 0; k < ndof; ++k)
+ {
+ vel += local_split_v[k] * J_t1[k];
+ }
+ va += m_contact->t1 * vel;
+ vel = 0.0;
+ for (int k = 0; k < ndof; ++k)
+ {
+ vel += local_split_v[k] * J_t2[k];
+ }
+ va += m_contact->t2 * vel;
+ }
+ }
+ }
+ return va;
}
btScalar btDeformableRigidContactConstraint::solveConstraint(const btContactSolverInfo& infoGlobal)
{
- const btSoftBody::sCti& cti = m_contact->m_cti;
- btVector3 va = getVa();
- btVector3 vb = getVb();
- btVector3 vr = vb - va;
- btScalar dn = btDot(vr, cti.m_normal) + m_penetration * infoGlobal.m_deformable_erp / infoGlobal.m_timeStep;
- // dn is the normal component of velocity diffrerence. Approximates the residual. // todo xuchenhan@: this prob needs to be scaled by dt
- btScalar residualSquare = dn*dn;
- btVector3 impulse = m_contact->m_c0 * (vr + m_penetration * infoGlobal.m_deformable_erp / infoGlobal.m_timeStep * cti.m_normal) ;
- const btVector3 impulse_normal = m_contact->m_c0 * (cti.m_normal * dn);
- btVector3 impulse_tangent = impulse - impulse_normal;
- btVector3 old_total_tangent_dv = m_total_tangent_dv;
- // m_c2 is the inverse mass of the deformable node/face
- m_total_normal_dv -= impulse_normal * m_contact->m_c2;
- m_total_tangent_dv -= impulse_tangent * m_contact->m_c2;
-
- if (m_total_normal_dv.dot(cti.m_normal) < 0)
- {
- // separating in the normal direction
- m_static = false;
- m_total_tangent_dv = btVector3(0,0,0);
- impulse_tangent.setZero();
- }
- else
- {
- if (m_total_normal_dv.norm() * m_contact->m_c3 < m_total_tangent_dv.norm())
- {
- // dynamic friction
- // with dynamic friction, the impulse are still applied to the two objects colliding, however, it does not pose a constraint in the cg solve, hence the change to dv merely serves to update velocity in the contact iterations.
- m_static = false;
- if (m_total_tangent_dv.safeNorm() < SIMD_EPSILON)
- {
- m_total_tangent_dv = btVector3(0,0,0);
- }
- else
- {
- m_total_tangent_dv = m_total_tangent_dv.normalized() * m_total_normal_dv.safeNorm() * m_contact->m_c3;
- }
- impulse_tangent = -btScalar(1)/m_contact->m_c2 * (m_total_tangent_dv - old_total_tangent_dv);
- }
- else
- {
- // static friction
- m_static = true;
- }
- }
- impulse = impulse_normal + impulse_tangent;
- // apply impulse to deformable nodes involved and change their velocities
- applyImpulse(impulse);
- if (residualSquare < 1e-7)
- return residualSquare;
- // apply impulse to the rigid/multibodies involved and change their velocities
- if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY)
- {
- btRigidBody* rigidCol = 0;
- rigidCol = (btRigidBody*)btRigidBody::upcast(cti.m_colObj);
- if (rigidCol)
- {
- rigidCol->applyImpulse(impulse, m_contact->m_c1);
- }
- }
- else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK)
- {
- btMultiBodyLinkCollider* multibodyLinkCol = 0;
- multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj);
- if (multibodyLinkCol)
- {
- const btScalar* deltaV_normal = &m_contact->jacobianData_normal.m_deltaVelocitiesUnitImpulse[0];
- // apply normal component of the impulse
- multibodyLinkCol->m_multiBody->applyDeltaVeeMultiDof2(deltaV_normal, impulse.dot(cti.m_normal));
- if (impulse_tangent.norm() > SIMD_EPSILON)
- {
- // apply tangential component of the impulse
- const btScalar* deltaV_t1 = &m_contact->jacobianData_t1.m_deltaVelocitiesUnitImpulse[0];
- multibodyLinkCol->m_multiBody->applyDeltaVeeMultiDof2(deltaV_t1, impulse.dot(m_contact->t1));
- const btScalar* deltaV_t2 = &m_contact->jacobianData_t2.m_deltaVelocitiesUnitImpulse[0];
- multibodyLinkCol->m_multiBody->applyDeltaVeeMultiDof2(deltaV_t2, impulse.dot(m_contact->t2));
- }
- }
- }
-// va = getVa();
-// vb = getVb();
-// vr = vb - va;
-// btScalar dn1 = btDot(vr, cti.m_normal) / 150;
-// m_penetration += dn1;
- return residualSquare;
+ const btSoftBody::sCti& cti = m_contact->m_cti;
+ btVector3 va = getVa();
+ btVector3 vb = getVb();
+ btVector3 vr = vb - va;
+ btScalar dn = btDot(vr, cti.m_normal) + m_total_normal_dv.dot(cti.m_normal) * infoGlobal.m_deformable_cfm;
+ if (m_penetration > 0)
+ {
+ dn += m_penetration / infoGlobal.m_timeStep;
+ }
+ if (!infoGlobal.m_splitImpulse)
+ {
+ dn += m_penetration * infoGlobal.m_deformable_erp / infoGlobal.m_timeStep;
+ }
+ // dn is the normal component of velocity diffrerence. Approximates the residual. // todo xuchenhan@: this prob needs to be scaled by dt
+ btVector3 impulse = m_contact->m_c0 * (vr + m_total_normal_dv * infoGlobal.m_deformable_cfm + ((m_penetration > 0) ? m_penetration / infoGlobal.m_timeStep * cti.m_normal : btVector3(0, 0, 0)));
+ if (!infoGlobal.m_splitImpulse)
+ {
+ impulse += m_contact->m_c0 * (m_penetration * infoGlobal.m_deformable_erp / infoGlobal.m_timeStep * cti.m_normal);
+ }
+ btVector3 impulse_normal = m_contact->m_c0 * (cti.m_normal * dn);
+ btVector3 impulse_tangent = impulse - impulse_normal;
+ if (dn > 0)
+ {
+ return 0;
+ }
+ m_binding = true;
+ btScalar residualSquare = dn * dn;
+ btVector3 old_total_tangent_dv = m_total_tangent_dv;
+ // m_c5 is the inverse mass of the deformable node/face
+ m_total_normal_dv -= m_contact->m_c5 * impulse_normal;
+ m_total_tangent_dv -= m_contact->m_c5 * impulse_tangent;
+
+ if (m_total_normal_dv.dot(cti.m_normal) < 0)
+ {
+ // separating in the normal direction
+ m_binding = false;
+ m_static = false;
+ impulse_tangent.setZero();
+ }
+ else
+ {
+ if (m_total_normal_dv.norm() * m_contact->m_c3 < m_total_tangent_dv.norm())
+ {
+ // dynamic friction
+ // with dynamic friction, the impulse are still applied to the two objects colliding, however, it does not pose a constraint in the cg solve, hence the change to dv merely serves to update velocity in the contact iterations.
+ m_static = false;
+ if (m_total_tangent_dv.safeNorm() < SIMD_EPSILON)
+ {
+ m_total_tangent_dv = btVector3(0, 0, 0);
+ }
+ else
+ {
+ m_total_tangent_dv = m_total_tangent_dv.normalized() * m_total_normal_dv.safeNorm() * m_contact->m_c3;
+ }
+ // impulse_tangent = -btScalar(1)/m_contact->m_c2 * (m_total_tangent_dv - old_total_tangent_dv);
+ impulse_tangent = m_contact->m_c5.inverse() * (old_total_tangent_dv - m_total_tangent_dv);
+ }
+ else
+ {
+ // static friction
+ m_static = true;
+ }
+ }
+ impulse = impulse_normal + impulse_tangent;
+ // apply impulse to deformable nodes involved and change their velocities
+ applyImpulse(impulse);
+ // apply impulse to the rigid/multibodies involved and change their velocities
+ if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY)
+ {
+ btRigidBody* rigidCol = 0;
+ rigidCol = (btRigidBody*)btRigidBody::upcast(cti.m_colObj);
+ if (rigidCol)
+ {
+ rigidCol->applyImpulse(impulse, m_contact->m_c1);
+ }
+ }
+ else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK)
+ {
+ btMultiBodyLinkCollider* multibodyLinkCol = 0;
+ multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj);
+ if (multibodyLinkCol)
+ {
+ const btScalar* deltaV_normal = &m_contact->jacobianData_normal.m_deltaVelocitiesUnitImpulse[0];
+ // apply normal component of the impulse
+ multibodyLinkCol->m_multiBody->applyDeltaVeeMultiDof2(deltaV_normal, impulse.dot(cti.m_normal));
+ if (impulse_tangent.norm() > SIMD_EPSILON)
+ {
+ // apply tangential component of the impulse
+ const btScalar* deltaV_t1 = &m_contact->jacobianData_t1.m_deltaVelocitiesUnitImpulse[0];
+ multibodyLinkCol->m_multiBody->applyDeltaVeeMultiDof2(deltaV_t1, impulse.dot(m_contact->t1));
+ const btScalar* deltaV_t2 = &m_contact->jacobianData_t2.m_deltaVelocitiesUnitImpulse[0];
+ multibodyLinkCol->m_multiBody->applyDeltaVeeMultiDof2(deltaV_t2, impulse.dot(m_contact->t2));
+ }
+ }
+ }
+ return residualSquare;
+}
+
+btScalar btDeformableRigidContactConstraint::solveSplitImpulse(const btContactSolverInfo& infoGlobal)
+{
+ btScalar MAX_PENETRATION_CORRECTION = infoGlobal.m_deformable_maxErrorReduction;
+ const btSoftBody::sCti& cti = m_contact->m_cti;
+ btVector3 vb = getSplitVb();
+ btVector3 va = getSplitVa();
+ btScalar p = m_penetration;
+ if (p > 0)
+ {
+ return 0;
+ }
+ btVector3 vr = vb - va;
+ btScalar dn = btDot(vr, cti.m_normal) + p * infoGlobal.m_deformable_erp / infoGlobal.m_timeStep;
+ if (dn > 0)
+ {
+ return 0;
+ }
+ if (m_total_split_impulse + dn > MAX_PENETRATION_CORRECTION)
+ {
+ dn = MAX_PENETRATION_CORRECTION - m_total_split_impulse;
+ }
+ if (m_total_split_impulse + dn < -MAX_PENETRATION_CORRECTION)
+ {
+ dn = -MAX_PENETRATION_CORRECTION - m_total_split_impulse;
+ }
+ m_total_split_impulse += dn;
+
+ btScalar residualSquare = dn * dn;
+ const btVector3 impulse = m_contact->m_c0 * (cti.m_normal * dn);
+ applySplitImpulse(impulse);
+
+ // apply split impulse to the rigid/multibodies involved and change their velocities
+ if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY)
+ {
+ btRigidBody* rigidCol = 0;
+ rigidCol = (btRigidBody*)btRigidBody::upcast(cti.m_colObj);
+ if (rigidCol)
+ {
+ rigidCol->applyPushImpulse(impulse, m_contact->m_c1);
+ }
+ }
+ else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK)
+ {
+ btMultiBodyLinkCollider* multibodyLinkCol = 0;
+ multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj);
+ if (multibodyLinkCol)
+ {
+ const btScalar* deltaV_normal = &m_contact->jacobianData_normal.m_deltaVelocitiesUnitImpulse[0];
+ // apply normal component of the impulse
+ multibodyLinkCol->m_multiBody->applyDeltaSplitVeeMultiDof(deltaV_normal, impulse.dot(cti.m_normal));
+ }
+ }
+ return residualSquare;
}
/* ================ Node vs. Rigid =================== */
btDeformableNodeRigidContactConstraint::btDeformableNodeRigidContactConstraint(const btSoftBody::DeformableNodeRigidContact& contact, const btContactSolverInfo& infoGlobal)
- : m_node(contact.m_node)
- , btDeformableRigidContactConstraint(contact, infoGlobal)
- {
- }
+ : m_node(contact.m_node), btDeformableRigidContactConstraint(contact, infoGlobal)
+{
+}
btDeformableNodeRigidContactConstraint::btDeformableNodeRigidContactConstraint(const btDeformableNodeRigidContactConstraint& other)
-: m_node(other.m_node)
-, btDeformableRigidContactConstraint(other)
+ : m_node(other.m_node), btDeformableRigidContactConstraint(other)
{
}
btVector3 btDeformableNodeRigidContactConstraint::getVb() const
{
- return m_node->m_v;
+ return m_node->m_v;
}
+btVector3 btDeformableNodeRigidContactConstraint::getSplitVb() const
+{
+ return m_node->m_splitv;
+}
btVector3 btDeformableNodeRigidContactConstraint::getDv(const btSoftBody::Node* node) const
{
- return m_total_normal_dv + m_total_tangent_dv;
+ return m_total_normal_dv + m_total_tangent_dv;
}
void btDeformableNodeRigidContactConstraint::applyImpulse(const btVector3& impulse)
{
- const btSoftBody::DeformableNodeRigidContact* contact = getContact();
- btVector3 dv = impulse * contact->m_c2;
- contact->m_node->m_v -= dv;
+ const btSoftBody::DeformableNodeRigidContact* contact = getContact();
+ btVector3 dv = contact->m_c5 * impulse;
+ contact->m_node->m_v -= dv;
+}
+
+void btDeformableNodeRigidContactConstraint::applySplitImpulse(const btVector3& impulse)
+{
+ const btSoftBody::DeformableNodeRigidContact* contact = getContact();
+ btVector3 dv = contact->m_c5 * impulse;
+ contact->m_node->m_splitv -= dv;
}
/* ================ Face vs. Rigid =================== */
btDeformableFaceRigidContactConstraint::btDeformableFaceRigidContactConstraint(const btSoftBody::DeformableFaceRigidContact& contact, const btContactSolverInfo& infoGlobal, bool useStrainLimiting)
-: m_face(contact.m_face)
-, m_useStrainLimiting(useStrainLimiting)
-, btDeformableRigidContactConstraint(contact, infoGlobal)
+ : m_face(contact.m_face), m_useStrainLimiting(useStrainLimiting), btDeformableRigidContactConstraint(contact, infoGlobal)
{
}
btDeformableFaceRigidContactConstraint::btDeformableFaceRigidContactConstraint(const btDeformableFaceRigidContactConstraint& other)
-: m_face(other.m_face)
-, m_useStrainLimiting(other.m_useStrainLimiting)
-, btDeformableRigidContactConstraint(other)
+ : m_face(other.m_face), m_useStrainLimiting(other.m_useStrainLimiting), btDeformableRigidContactConstraint(other)
{
}
btVector3 btDeformableFaceRigidContactConstraint::getVb() const
{
- const btSoftBody::DeformableFaceRigidContact* contact = getContact();
- btVector3 vb = m_face->m_n[0]->m_v * contact->m_bary[0] + m_face->m_n[1]->m_v * contact->m_bary[1] + m_face->m_n[2]->m_v * contact->m_bary[2];
- return vb;
+ const btSoftBody::DeformableFaceRigidContact* contact = getContact();
+ btVector3 vb = m_face->m_n[0]->m_v * contact->m_bary[0] + m_face->m_n[1]->m_v * contact->m_bary[1] + m_face->m_n[2]->m_v * contact->m_bary[2];
+ return vb;
}
-
btVector3 btDeformableFaceRigidContactConstraint::getDv(const btSoftBody::Node* node) const
{
- btVector3 face_dv = m_total_normal_dv + m_total_tangent_dv;
- const btSoftBody::DeformableFaceRigidContact* contact = getContact();
- if (m_face->m_n[0] == node)
- {
- return face_dv * contact->m_weights[0];
- }
- if (m_face->m_n[1] == node)
- {
- return face_dv * contact->m_weights[1];
- }
- btAssert(node == m_face->m_n[2]);
- return face_dv * contact->m_weights[2];
+ btVector3 face_dv = m_total_normal_dv + m_total_tangent_dv;
+ const btSoftBody::DeformableFaceRigidContact* contact = getContact();
+ if (m_face->m_n[0] == node)
+ {
+ return face_dv * contact->m_weights[0];
+ }
+ if (m_face->m_n[1] == node)
+ {
+ return face_dv * contact->m_weights[1];
+ }
+ btAssert(node == m_face->m_n[2]);
+ return face_dv * contact->m_weights[2];
}
void btDeformableFaceRigidContactConstraint::applyImpulse(const btVector3& impulse)
{
- const btSoftBody::DeformableFaceRigidContact* contact = getContact();
- btVector3 dv = impulse * contact->m_c2;
- btSoftBody::Face* face = contact->m_face;
-
- btVector3& v0 = face->m_n[0]->m_v;
- btVector3& v1 = face->m_n[1]->m_v;
- btVector3& v2 = face->m_n[2]->m_v;
- const btScalar& im0 = face->m_n[0]->m_im;
- const btScalar& im1 = face->m_n[1]->m_im;
- const btScalar& im2 = face->m_n[2]->m_im;
- if (im0 > 0)
- v0 -= dv * contact->m_weights[0];
- if (im1 > 0)
- v1 -= dv * contact->m_weights[1];
- if (im2 > 0)
- v2 -= dv * contact->m_weights[2];
+ const btSoftBody::DeformableFaceRigidContact* contact = getContact();
+ btVector3 dv = impulse * contact->m_c2;
+ btSoftBody::Face* face = contact->m_face;
+
+ btVector3& v0 = face->m_n[0]->m_v;
+ btVector3& v1 = face->m_n[1]->m_v;
+ btVector3& v2 = face->m_n[2]->m_v;
+ const btScalar& im0 = face->m_n[0]->m_im;
+ const btScalar& im1 = face->m_n[1]->m_im;
+ const btScalar& im2 = face->m_n[2]->m_im;
+ if (im0 > 0)
+ v0 -= dv * contact->m_weights[0];
+ if (im1 > 0)
+ v1 -= dv * contact->m_weights[1];
+ if (im2 > 0)
+ v2 -= dv * contact->m_weights[2];
if (m_useStrainLimiting)
{
- btScalar relaxation = 1./btScalar(m_infoGlobal->m_numIterations);
- btScalar m01 = (relaxation/(im0 + im1));
- btScalar m02 = (relaxation/(im0 + im2));
- btScalar m12 = (relaxation/(im1 + im2));
- #ifdef USE_STRAIN_RATE_LIMITING
+ btScalar relaxation = 1. / btScalar(m_infoGlobal->m_numIterations);
+ btScalar m01 = (relaxation / (im0 + im1));
+ btScalar m02 = (relaxation / (im0 + im2));
+ btScalar m12 = (relaxation / (im1 + im2));
+#ifdef USE_STRAIN_RATE_LIMITING
// apply strain limiting to prevent the new velocity to change the current length of the edge by more than 1%.
btScalar p = 0.01;
btVector3& x0 = face->m_n[0]->m_x;
btVector3& x1 = face->m_n[1]->m_x;
btVector3& x2 = face->m_n[2]->m_x;
- const btVector3 x_diff[3] = {x1-x0, x2-x0, x2-x1};
- const btVector3 v_diff[3] = {v1-v0, v2-v0, v2-v1};
+ const btVector3 x_diff[3] = {x1 - x0, x2 - x0, x2 - x1};
+ const btVector3 v_diff[3] = {v1 - v0, v2 - v0, v2 - v1};
btVector3 u[3];
btScalar x_diff_dot_u, dn[3];
btScalar dt = m_infoGlobal->m_timeStep;
@@ -404,172 +520,201 @@ void btDeformableFaceRigidContactConstraint::applyImpulse(const btVector3& impul
{
btScalar x_diff_norm = x_diff[i].safeNorm();
btScalar x_diff_norm_new = (x_diff[i] + v_diff[i] * dt).safeNorm();
- btScalar strainRate = x_diff_norm_new/x_diff_norm;
+ btScalar strainRate = x_diff_norm_new / x_diff_norm;
u[i] = v_diff[i];
u[i].safeNormalize();
- if (x_diff_norm == 0 || (1-p <= strainRate && strainRate <= 1+p))
+ if (x_diff_norm == 0 || (1 - p <= strainRate && strainRate <= 1 + p))
{
dn[i] = 0;
continue;
}
x_diff_dot_u = btDot(x_diff[i], u[i]);
btScalar s;
- if (1-p > strainRate)
+ if (1 - p > strainRate)
{
- s = 1/dt * (-x_diff_dot_u - btSqrt(x_diff_dot_u*x_diff_dot_u + (p*p-2*p) * x_diff_norm * x_diff_norm));
+ s = 1 / dt * (-x_diff_dot_u - btSqrt(x_diff_dot_u * x_diff_dot_u + (p * p - 2 * p) * x_diff_norm * x_diff_norm));
}
else
{
- s = 1/dt * (-x_diff_dot_u + btSqrt(x_diff_dot_u*x_diff_dot_u + (p*p+2*p) * x_diff_norm * x_diff_norm));
+ s = 1 / dt * (-x_diff_dot_u + btSqrt(x_diff_dot_u * x_diff_dot_u + (p * p + 2 * p) * x_diff_norm * x_diff_norm));
}
// x_diff_norm_new = (x_diff[i] + s * u[i] * dt).safeNorm();
// strainRate = x_diff_norm_new/x_diff_norm;
dn[i] = s - v_diff[i].safeNorm();
}
- btVector3 dv0 = im0 * (m01 * u[0]*(-dn[0]) + m02 * u[1]*-(dn[1]));
- btVector3 dv1 = im1 * (m01 * u[0]*(dn[0]) + m12 * u[2]*(-dn[2]));
- btVector3 dv2 = im2 * (m12 * u[2]*(dn[2]) + m02 * u[1]*(dn[1]));
- #else
+ btVector3 dv0 = im0 * (m01 * u[0] * (-dn[0]) + m02 * u[1] * -(dn[1]));
+ btVector3 dv1 = im1 * (m01 * u[0] * (dn[0]) + m12 * u[2] * (-dn[2]));
+ btVector3 dv2 = im2 * (m12 * u[2] * (dn[2]) + m02 * u[1] * (dn[1]));
+#else
// apply strain limiting to prevent undamped modes
- btVector3 dv0 = im0 * (m01 * (v1-v0) + m02 * (v2-v0));
- btVector3 dv1 = im1 * (m01 * (v0-v1) + m12 * (v2-v1));
- btVector3 dv2 = im2 * (m12 * (v1-v2) + m02 * (v0-v2));
- #endif
+ btVector3 dv0 = im0 * (m01 * (v1 - v0) + m02 * (v2 - v0));
+ btVector3 dv1 = im1 * (m01 * (v0 - v1) + m12 * (v2 - v1));
+ btVector3 dv2 = im2 * (m12 * (v1 - v2) + m02 * (v0 - v2));
+#endif
v0 += dv0;
v1 += dv1;
v2 += dv2;
}
}
+btVector3 btDeformableFaceRigidContactConstraint::getSplitVb() const
+{
+ const btSoftBody::DeformableFaceRigidContact* contact = getContact();
+ btVector3 vb = (m_face->m_n[0]->m_splitv) * contact->m_bary[0] + (m_face->m_n[1]->m_splitv) * contact->m_bary[1] + (m_face->m_n[2]->m_splitv) * contact->m_bary[2];
+ return vb;
+}
+
+void btDeformableFaceRigidContactConstraint::applySplitImpulse(const btVector3& impulse)
+{
+ const btSoftBody::DeformableFaceRigidContact* contact = getContact();
+ btVector3 dv = impulse * contact->m_c2;
+ btSoftBody::Face* face = contact->m_face;
+ btVector3& v0 = face->m_n[0]->m_splitv;
+ btVector3& v1 = face->m_n[1]->m_splitv;
+ btVector3& v2 = face->m_n[2]->m_splitv;
+ const btScalar& im0 = face->m_n[0]->m_im;
+ const btScalar& im1 = face->m_n[1]->m_im;
+ const btScalar& im2 = face->m_n[2]->m_im;
+ if (im0 > 0)
+ {
+ v0 -= dv * contact->m_weights[0];
+ }
+ if (im1 > 0)
+ {
+ v1 -= dv * contact->m_weights[1];
+ }
+ if (im2 > 0)
+ {
+ v2 -= dv * contact->m_weights[2];
+ }
+}
+
/* ================ Face vs. Node =================== */
btDeformableFaceNodeContactConstraint::btDeformableFaceNodeContactConstraint(const btSoftBody::DeformableFaceNodeContact& contact, const btContactSolverInfo& infoGlobal)
-: m_node(contact.m_node)
-, m_face(contact.m_face)
-, m_contact(&contact)
-, btDeformableContactConstraint(contact.m_normal, infoGlobal)
+ : m_node(contact.m_node), m_face(contact.m_face), m_contact(&contact), btDeformableContactConstraint(contact.m_normal, infoGlobal)
{
- m_total_normal_dv.setZero();
- m_total_tangent_dv.setZero();
+ m_total_normal_dv.setZero();
+ m_total_tangent_dv.setZero();
}
btVector3 btDeformableFaceNodeContactConstraint::getVa() const
{
- return m_node->m_v;
+ return m_node->m_v;
}
btVector3 btDeformableFaceNodeContactConstraint::getVb() const
{
- const btSoftBody::DeformableFaceNodeContact* contact = getContact();
- btVector3 vb = m_face->m_n[0]->m_v * contact->m_bary[0] + m_face->m_n[1]->m_v * contact->m_bary[1] + m_face->m_n[2]->m_v * contact->m_bary[2];
- return vb;
+ const btSoftBody::DeformableFaceNodeContact* contact = getContact();
+ btVector3 vb = m_face->m_n[0]->m_v * contact->m_bary[0] + m_face->m_n[1]->m_v * contact->m_bary[1] + m_face->m_n[2]->m_v * contact->m_bary[2];
+ return vb;
}
btVector3 btDeformableFaceNodeContactConstraint::getDv(const btSoftBody::Node* n) const
{
- btVector3 dv = m_total_normal_dv + m_total_tangent_dv;
- if (n == m_node)
- return dv;
- const btSoftBody::DeformableFaceNodeContact* contact = getContact();
- if (m_face->m_n[0] == n)
- {
- return dv * contact->m_weights[0];
- }
- if (m_face->m_n[1] == n)
- {
- return dv * contact->m_weights[1];
- }
- btAssert(n == m_face->m_n[2]);
- return dv * contact->m_weights[2];
+ btVector3 dv = m_total_normal_dv + m_total_tangent_dv;
+ if (n == m_node)
+ return dv;
+ const btSoftBody::DeformableFaceNodeContact* contact = getContact();
+ if (m_face->m_n[0] == n)
+ {
+ return dv * contact->m_weights[0];
+ }
+ if (m_face->m_n[1] == n)
+ {
+ return dv * contact->m_weights[1];
+ }
+ btAssert(n == m_face->m_n[2]);
+ return dv * contact->m_weights[2];
}
btScalar btDeformableFaceNodeContactConstraint::solveConstraint(const btContactSolverInfo& infoGlobal)
{
- btVector3 va = getVa();
- btVector3 vb = getVb();
- btVector3 vr = vb - va;
- const btScalar dn = btDot(vr, m_contact->m_normal);
- // dn is the normal component of velocity diffrerence. Approximates the residual. // todo xuchenhan@: this prob needs to be scaled by dt
- btScalar residualSquare = dn*dn;
- btVector3 impulse = m_contact->m_c0 * vr;
- const btVector3 impulse_normal = m_contact->m_c0 * (m_contact->m_normal * dn);
- btVector3 impulse_tangent = impulse - impulse_normal;
-
- btVector3 old_total_tangent_dv = m_total_tangent_dv;
- // m_c2 is the inverse mass of the deformable node/face
- if (m_node->m_im > 0)
- {
- m_total_normal_dv -= impulse_normal * m_node->m_im;
- m_total_tangent_dv -= impulse_tangent * m_node->m_im;
- }
- else
- {
- m_total_normal_dv -= impulse_normal * m_contact->m_imf;
- m_total_tangent_dv -= impulse_tangent * m_contact->m_imf;
- }
-
- if (m_total_normal_dv.dot(m_contact->m_normal) > 0)
- {
- // separating in the normal direction
- m_static = false;
- m_total_tangent_dv = btVector3(0,0,0);
- impulse_tangent.setZero();
- }
- else
- {
- if (m_total_normal_dv.norm() * m_contact->m_friction < m_total_tangent_dv.norm())
- {
- // dynamic friction
- // with dynamic friction, the impulse are still applied to the two objects colliding, however, it does not pose a constraint in the cg solve, hence the change to dv merely serves to update velocity in the contact iterations.
- m_static = false;
- if (m_total_tangent_dv.safeNorm() < SIMD_EPSILON)
- {
- m_total_tangent_dv = btVector3(0,0,0);
- }
- else
- {
- m_total_tangent_dv = m_total_tangent_dv.normalized() * m_total_normal_dv.safeNorm() * m_contact->m_friction;
- }
- impulse_tangent = -btScalar(1)/m_node->m_im * (m_total_tangent_dv - old_total_tangent_dv);
- }
- else
- {
- // static friction
- m_static = true;
- }
- }
- impulse = impulse_normal + impulse_tangent;
- // apply impulse to deformable nodes involved and change their velocities
- applyImpulse(impulse);
- return residualSquare;
+ btVector3 va = getVa();
+ btVector3 vb = getVb();
+ btVector3 vr = vb - va;
+ const btScalar dn = btDot(vr, m_contact->m_normal);
+ // dn is the normal component of velocity diffrerence. Approximates the residual. // todo xuchenhan@: this prob needs to be scaled by dt
+ btScalar residualSquare = dn * dn;
+ btVector3 impulse = m_contact->m_c0 * vr;
+ const btVector3 impulse_normal = m_contact->m_c0 * (m_contact->m_normal * dn);
+ btVector3 impulse_tangent = impulse - impulse_normal;
+
+ btVector3 old_total_tangent_dv = m_total_tangent_dv;
+ // m_c2 is the inverse mass of the deformable node/face
+ if (m_node->m_im > 0)
+ {
+ m_total_normal_dv -= impulse_normal * m_node->m_im;
+ m_total_tangent_dv -= impulse_tangent * m_node->m_im;
+ }
+ else
+ {
+ m_total_normal_dv -= impulse_normal * m_contact->m_imf;
+ m_total_tangent_dv -= impulse_tangent * m_contact->m_imf;
+ }
+
+ if (m_total_normal_dv.dot(m_contact->m_normal) > 0)
+ {
+ // separating in the normal direction
+ m_static = false;
+ m_total_tangent_dv = btVector3(0, 0, 0);
+ impulse_tangent.setZero();
+ }
+ else
+ {
+ if (m_total_normal_dv.norm() * m_contact->m_friction < m_total_tangent_dv.norm())
+ {
+ // dynamic friction
+ // with dynamic friction, the impulse are still applied to the two objects colliding, however, it does not pose a constraint in the cg solve, hence the change to dv merely serves to update velocity in the contact iterations.
+ m_static = false;
+ if (m_total_tangent_dv.safeNorm() < SIMD_EPSILON)
+ {
+ m_total_tangent_dv = btVector3(0, 0, 0);
+ }
+ else
+ {
+ m_total_tangent_dv = m_total_tangent_dv.normalized() * m_total_normal_dv.safeNorm() * m_contact->m_friction;
+ }
+ impulse_tangent = -btScalar(1) / m_node->m_im * (m_total_tangent_dv - old_total_tangent_dv);
+ }
+ else
+ {
+ // static friction
+ m_static = true;
+ }
+ }
+ impulse = impulse_normal + impulse_tangent;
+ // apply impulse to deformable nodes involved and change their velocities
+ applyImpulse(impulse);
+ return residualSquare;
}
void btDeformableFaceNodeContactConstraint::applyImpulse(const btVector3& impulse)
{
- const btSoftBody::DeformableFaceNodeContact* contact = getContact();
- btVector3 dva = impulse * contact->m_node->m_im;
- btVector3 dvb = impulse * contact->m_imf;
- if (contact->m_node->m_im > 0)
- {
- contact->m_node->m_v += dva;
- }
-
- btSoftBody::Face* face = contact->m_face;
- btVector3& v0 = face->m_n[0]->m_v;
- btVector3& v1 = face->m_n[1]->m_v;
- btVector3& v2 = face->m_n[2]->m_v;
- const btScalar& im0 = face->m_n[0]->m_im;
- const btScalar& im1 = face->m_n[1]->m_im;
- const btScalar& im2 = face->m_n[2]->m_im;
- if (im0 > 0)
- {
- v0 -= dvb * contact->m_weights[0];
- }
- if (im1 > 0)
- {
- v1 -= dvb * contact->m_weights[1];
- }
- if (im2 > 0)
- {
- v2 -= dvb * contact->m_weights[2];
- }
+ const btSoftBody::DeformableFaceNodeContact* contact = getContact();
+ btVector3 dva = impulse * contact->m_node->m_im;
+ btVector3 dvb = impulse * contact->m_imf;
+ if (contact->m_node->m_im > 0)
+ {
+ contact->m_node->m_v += dva;
+ }
+
+ btSoftBody::Face* face = contact->m_face;
+ btVector3& v0 = face->m_n[0]->m_v;
+ btVector3& v1 = face->m_n[1]->m_v;
+ btVector3& v2 = face->m_n[2]->m_v;
+ const btScalar& im0 = face->m_n[0]->m_im;
+ const btScalar& im1 = face->m_n[1]->m_im;
+ const btScalar& im2 = face->m_n[2]->m_im;
+ if (im0 > 0)
+ {
+ v0 -= dvb * contact->m_weights[0];
+ }
+ if (im1 > 0)
+ {
+ v1 -= dvb * contact->m_weights[1];
+ }
+ if (im2 > 0)
+ {
+ v2 -= dvb * contact->m_weights[2];
+ }
}
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.h b/thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.h
index 9f9d5bf0a3..1e2c9f5bce 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.h
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.h
@@ -21,51 +21,49 @@
class btDeformableContactConstraint
{
public:
- // True if the friction is static
- // False if the friction is dynamic
- bool m_static;
+ // True if the friction is static
+ // False if the friction is dynamic
+ bool m_static;
const btContactSolverInfo* m_infoGlobal;
// normal of the contact
btVector3 m_normal;
- btDeformableContactConstraint(const btVector3& normal, const btContactSolverInfo& infoGlobal): m_static(false), m_normal(normal), m_infoGlobal(&infoGlobal)
+ btDeformableContactConstraint(const btVector3& normal, const btContactSolverInfo& infoGlobal) : m_static(false), m_normal(normal), m_infoGlobal(&infoGlobal)
{
}
- btDeformableContactConstraint(bool isStatic, const btVector3& normal, const btContactSolverInfo& infoGlobal): m_static(isStatic), m_normal(normal), m_infoGlobal(&infoGlobal)
+ btDeformableContactConstraint(bool isStatic, const btVector3& normal, const btContactSolverInfo& infoGlobal) : m_static(isStatic), m_normal(normal), m_infoGlobal(&infoGlobal)
{
}
-
- btDeformableContactConstraint(){}
+
+ btDeformableContactConstraint() {}
btDeformableContactConstraint(const btDeformableContactConstraint& other)
- : m_static(other.m_static)
- , m_normal(other.m_normal)
- , m_infoGlobal(other.m_infoGlobal)
+ : m_static(other.m_static), m_normal(other.m_normal), m_infoGlobal(other.m_infoGlobal)
{
}
- virtual ~btDeformableContactConstraint(){}
-
- // solve the constraint with inelastic impulse and return the error, which is the square of normal component of velocity diffrerence
- // the constraint is solved by calculating the impulse between object A and B in the contact and apply the impulse to both objects involved in the contact
- virtual btScalar solveConstraint(const btContactSolverInfo& infoGlobal) = 0;
-
- // get the velocity of the object A in the contact
- virtual btVector3 getVa() const = 0;
-
- // get the velocity of the object B in the contact
- virtual btVector3 getVb() const = 0;
-
- // get the velocity change of the soft body node in the constraint
- virtual btVector3 getDv(const btSoftBody::Node*) const = 0;
-
- // apply impulse to the soft body node and/or face involved
- virtual void applyImpulse(const btVector3& impulse) = 0;
-
- // scale the penetration depth by erp
- virtual void setPenetrationScale(btScalar scale) = 0;
+ virtual ~btDeformableContactConstraint() {}
+
+ // solve the constraint with inelastic impulse and return the error, which is the square of normal component of velocity diffrerence
+ // the constraint is solved by calculating the impulse between object A and B in the contact and apply the impulse to both objects involved in the contact
+ virtual btScalar solveConstraint(const btContactSolverInfo& infoGlobal) = 0;
+
+ // get the velocity of the object A in the contact
+ virtual btVector3 getVa() const = 0;
+
+ // get the velocity of the object B in the contact
+ virtual btVector3 getVb() const = 0;
+
+ // get the velocity change of the soft body node in the constraint
+ virtual btVector3 getDv(const btSoftBody::Node*) const = 0;
+
+ // apply impulse to the soft body node and/or face involved
+ virtual void applyImpulse(const btVector3& impulse) = 0;
+
+ // scale the penetration depth by erp
+ virtual void setPenetrationScale(btScalar scale) = 0;
};
//
@@ -73,42 +71,41 @@ public:
class btDeformableStaticConstraint : public btDeformableContactConstraint
{
public:
- btSoftBody::Node* m_node;
-
- btDeformableStaticConstraint(btSoftBody::Node* node, const btContactSolverInfo& infoGlobal): m_node(node), btDeformableContactConstraint(false, btVector3(0,0,0), infoGlobal)
- {
- }
- btDeformableStaticConstraint(){}
- btDeformableStaticConstraint(const btDeformableStaticConstraint& other)
- : m_node(other.m_node)
- , btDeformableContactConstraint(other)
- {
- }
-
- virtual ~btDeformableStaticConstraint(){}
-
- virtual btScalar solveConstraint(const btContactSolverInfo& infoGlobal)
- {
- return 0;
- }
-
- virtual btVector3 getVa() const
- {
- return btVector3(0,0,0);
- }
-
- virtual btVector3 getVb() const
- {
- return btVector3(0,0,0);
- }
-
- virtual btVector3 getDv(const btSoftBody::Node* n) const
- {
- return btVector3(0,0,0);
- }
-
- virtual void applyImpulse(const btVector3& impulse){}
- virtual void setPenetrationScale(btScalar scale){}
+ btSoftBody::Node* m_node;
+
+ btDeformableStaticConstraint(btSoftBody::Node* node, const btContactSolverInfo& infoGlobal) : m_node(node), btDeformableContactConstraint(false, btVector3(0, 0, 0), infoGlobal)
+ {
+ }
+ btDeformableStaticConstraint() {}
+ btDeformableStaticConstraint(const btDeformableStaticConstraint& other)
+ : m_node(other.m_node), btDeformableContactConstraint(other)
+ {
+ }
+
+ virtual ~btDeformableStaticConstraint() {}
+
+ virtual btScalar solveConstraint(const btContactSolverInfo& infoGlobal)
+ {
+ return 0;
+ }
+
+ virtual btVector3 getVa() const
+ {
+ return btVector3(0, 0, 0);
+ }
+
+ virtual btVector3 getVb() const
+ {
+ return btVector3(0, 0, 0);
+ }
+
+ virtual btVector3 getDv(const btSoftBody::Node* n) const
+ {
+ return btVector3(0, 0, 0);
+ }
+
+ virtual void applyImpulse(const btVector3& impulse) {}
+ virtual void setPenetrationScale(btScalar scale) {}
};
//
@@ -116,56 +113,67 @@ public:
class btDeformableNodeAnchorConstraint : public btDeformableContactConstraint
{
public:
- const btSoftBody::DeformableNodeRigidAnchor* m_anchor;
-
- btDeformableNodeAnchorConstraint(const btSoftBody::DeformableNodeRigidAnchor& c, const btContactSolverInfo& infoGlobal);
- btDeformableNodeAnchorConstraint(const btDeformableNodeAnchorConstraint& other);
- btDeformableNodeAnchorConstraint(){}
- virtual ~btDeformableNodeAnchorConstraint()
- {
- }
- virtual btScalar solveConstraint(const btContactSolverInfo& infoGlobal);
-
- // object A is the rigid/multi body, and object B is the deformable node/face
- virtual btVector3 getVa() const;
- // get the velocity of the deformable node in contact
- virtual btVector3 getVb() const;
- virtual btVector3 getDv(const btSoftBody::Node* n) const
- {
- return btVector3(0,0,0);
- }
- virtual void applyImpulse(const btVector3& impulse);
-
- virtual void setPenetrationScale(btScalar scale){}
-};
+ const btSoftBody::DeformableNodeRigidAnchor* m_anchor;
+ btDeformableNodeAnchorConstraint(const btSoftBody::DeformableNodeRigidAnchor& c, const btContactSolverInfo& infoGlobal);
+ btDeformableNodeAnchorConstraint(const btDeformableNodeAnchorConstraint& other);
+ btDeformableNodeAnchorConstraint() {}
+ virtual ~btDeformableNodeAnchorConstraint()
+ {
+ }
+ virtual btScalar solveConstraint(const btContactSolverInfo& infoGlobal);
+
+ // object A is the rigid/multi body, and object B is the deformable node/face
+ virtual btVector3 getVa() const;
+ // get the velocity of the deformable node in contact
+ virtual btVector3 getVb() const;
+ virtual btVector3 getDv(const btSoftBody::Node* n) const
+ {
+ return btVector3(0, 0, 0);
+ }
+ virtual void applyImpulse(const btVector3& impulse);
+
+ virtual void setPenetrationScale(btScalar scale) {}
+};
//
// Constraint between rigid/multi body and deformable objects
class btDeformableRigidContactConstraint : public btDeformableContactConstraint
{
public:
- btVector3 m_total_normal_dv;
- btVector3 m_total_tangent_dv;
- btScalar m_penetration;
- const btSoftBody::DeformableRigidContact* m_contact;
-
- btDeformableRigidContactConstraint(const btSoftBody::DeformableRigidContact& c, const btContactSolverInfo& infoGlobal);
- btDeformableRigidContactConstraint(const btDeformableRigidContactConstraint& other);
- btDeformableRigidContactConstraint(){}
- virtual ~btDeformableRigidContactConstraint()
- {
- }
-
- // object A is the rigid/multi body, and object B is the deformable node/face
- virtual btVector3 getVa() const;
-
- virtual btScalar solveConstraint(const btContactSolverInfo& infoGlobal);
-
- virtual void setPenetrationScale(btScalar scale)
- {
- m_penetration *= scale;
- }
+ btVector3 m_total_normal_dv;
+ btVector3 m_total_tangent_dv;
+ btScalar m_penetration;
+ btScalar m_total_split_impulse;
+ bool m_binding;
+ const btSoftBody::DeformableRigidContact* m_contact;
+
+ btDeformableRigidContactConstraint(const btSoftBody::DeformableRigidContact& c, const btContactSolverInfo& infoGlobal);
+ btDeformableRigidContactConstraint(const btDeformableRigidContactConstraint& other);
+ btDeformableRigidContactConstraint() {}
+ virtual ~btDeformableRigidContactConstraint()
+ {
+ }
+
+ // object A is the rigid/multi body, and object B is the deformable node/face
+ virtual btVector3 getVa() const;
+
+ // get the split impulse velocity of the deformable face at the contact point
+ virtual btVector3 getSplitVb() const = 0;
+
+ // get the split impulse velocity of the rigid/multibdoy at the contaft
+ virtual btVector3 getSplitVa() const;
+
+ virtual btScalar solveConstraint(const btContactSolverInfo& infoGlobal);
+
+ virtual void setPenetrationScale(btScalar scale)
+ {
+ m_penetration *= scale;
+ }
+
+ btScalar solveSplitImpulse(const btContactSolverInfo& infoGlobal);
+
+ virtual void applySplitImpulse(const btVector3& impulse) = 0;
};
//
@@ -173,29 +181,34 @@ public:
class btDeformableNodeRigidContactConstraint : public btDeformableRigidContactConstraint
{
public:
- // the deformable node in contact
- btSoftBody::Node* m_node;
-
- btDeformableNodeRigidContactConstraint(const btSoftBody::DeformableNodeRigidContact& contact, const btContactSolverInfo& infoGlobal);
- btDeformableNodeRigidContactConstraint(const btDeformableNodeRigidContactConstraint& other);
- btDeformableNodeRigidContactConstraint(){}
- virtual ~btDeformableNodeRigidContactConstraint()
- {
- }
-
- // get the velocity of the deformable node in contact
- virtual btVector3 getVb() const;
-
- // get the velocity change of the input soft body node in the constraint
- virtual btVector3 getDv(const btSoftBody::Node*) const;
-
- // cast the contact to the desired type
- const btSoftBody::DeformableNodeRigidContact* getContact() const
- {
- return static_cast<const btSoftBody::DeformableNodeRigidContact*>(m_contact);
- }
-
- virtual void applyImpulse(const btVector3& impulse);
+ // the deformable node in contact
+ btSoftBody::Node* m_node;
+
+ btDeformableNodeRigidContactConstraint(const btSoftBody::DeformableNodeRigidContact& contact, const btContactSolverInfo& infoGlobal);
+ btDeformableNodeRigidContactConstraint(const btDeformableNodeRigidContactConstraint& other);
+ btDeformableNodeRigidContactConstraint() {}
+ virtual ~btDeformableNodeRigidContactConstraint()
+ {
+ }
+
+ // get the velocity of the deformable node in contact
+ virtual btVector3 getVb() const;
+
+ // get the split impulse velocity of the deformable face at the contact point
+ virtual btVector3 getSplitVb() const;
+
+ // get the velocity change of the input soft body node in the constraint
+ virtual btVector3 getDv(const btSoftBody::Node*) const;
+
+ // cast the contact to the desired type
+ const btSoftBody::DeformableNodeRigidContact* getContact() const
+ {
+ return static_cast<const btSoftBody::DeformableNodeRigidContact*>(m_contact);
+ }
+
+ virtual void applyImpulse(const btVector3& impulse);
+
+ virtual void applySplitImpulse(const btVector3& impulse);
};
//
@@ -203,28 +216,33 @@ public:
class btDeformableFaceRigidContactConstraint : public btDeformableRigidContactConstraint
{
public:
- const btSoftBody::Face* m_face;
- bool m_useStrainLimiting;
- btDeformableFaceRigidContactConstraint(const btSoftBody::DeformableFaceRigidContact& contact, const btContactSolverInfo& infoGlobal, bool useStrainLimiting);
- btDeformableFaceRigidContactConstraint(const btDeformableFaceRigidContactConstraint& other);
- btDeformableFaceRigidContactConstraint(): m_useStrainLimiting(false) {}
- virtual ~btDeformableFaceRigidContactConstraint()
- {
- }
-
- // get the velocity of the deformable face at the contact point
- virtual btVector3 getVb() const;
-
- // get the velocity change of the input soft body node in the constraint
- virtual btVector3 getDv(const btSoftBody::Node*) const;
-
- // cast the contact to the desired type
- const btSoftBody::DeformableFaceRigidContact* getContact() const
- {
- return static_cast<const btSoftBody::DeformableFaceRigidContact*>(m_contact);
- }
-
- virtual void applyImpulse(const btVector3& impulse);
+ btSoftBody::Face* m_face;
+ bool m_useStrainLimiting;
+ btDeformableFaceRigidContactConstraint(const btSoftBody::DeformableFaceRigidContact& contact, const btContactSolverInfo& infoGlobal, bool useStrainLimiting);
+ btDeformableFaceRigidContactConstraint(const btDeformableFaceRigidContactConstraint& other);
+ btDeformableFaceRigidContactConstraint() : m_useStrainLimiting(false) {}
+ virtual ~btDeformableFaceRigidContactConstraint()
+ {
+ }
+
+ // get the velocity of the deformable face at the contact point
+ virtual btVector3 getVb() const;
+
+ // get the split impulse velocity of the deformable face at the contact point
+ virtual btVector3 getSplitVb() const;
+
+ // get the velocity change of the input soft body node in the constraint
+ virtual btVector3 getDv(const btSoftBody::Node*) const;
+
+ // cast the contact to the desired type
+ const btSoftBody::DeformableFaceRigidContact* getContact() const
+ {
+ return static_cast<const btSoftBody::DeformableFaceRigidContact*>(m_contact);
+ }
+
+ virtual void applyImpulse(const btVector3& impulse);
+
+ virtual void applySplitImpulse(const btVector3& impulse);
};
//
@@ -232,35 +250,35 @@ public:
class btDeformableFaceNodeContactConstraint : public btDeformableContactConstraint
{
public:
- btSoftBody::Node* m_node;
- btSoftBody::Face* m_face;
- const btSoftBody::DeformableFaceNodeContact* m_contact;
- btVector3 m_total_normal_dv;
- btVector3 m_total_tangent_dv;
-
- btDeformableFaceNodeContactConstraint(const btSoftBody::DeformableFaceNodeContact& contact, const btContactSolverInfo& infoGlobal);
- btDeformableFaceNodeContactConstraint(){}
- virtual ~btDeformableFaceNodeContactConstraint(){}
-
- virtual btScalar solveConstraint(const btContactSolverInfo& infoGlobal);
-
- // get the velocity of the object A in the contact
- virtual btVector3 getVa() const;
-
- // get the velocity of the object B in the contact
- virtual btVector3 getVb() const;
-
- // get the velocity change of the input soft body node in the constraint
- virtual btVector3 getDv(const btSoftBody::Node*) const;
-
- // cast the contact to the desired type
- const btSoftBody::DeformableFaceNodeContact* getContact() const
- {
- return static_cast<const btSoftBody::DeformableFaceNodeContact*>(m_contact);
- }
-
- virtual void applyImpulse(const btVector3& impulse);
-
- virtual void setPenetrationScale(btScalar scale){}
+ btSoftBody::Node* m_node;
+ btSoftBody::Face* m_face;
+ const btSoftBody::DeformableFaceNodeContact* m_contact;
+ btVector3 m_total_normal_dv;
+ btVector3 m_total_tangent_dv;
+
+ btDeformableFaceNodeContactConstraint(const btSoftBody::DeformableFaceNodeContact& contact, const btContactSolverInfo& infoGlobal);
+ btDeformableFaceNodeContactConstraint() {}
+ virtual ~btDeformableFaceNodeContactConstraint() {}
+
+ virtual btScalar solveConstraint(const btContactSolverInfo& infoGlobal);
+
+ // get the velocity of the object A in the contact
+ virtual btVector3 getVa() const;
+
+ // get the velocity of the object B in the contact
+ virtual btVector3 getVb() const;
+
+ // get the velocity change of the input soft body node in the constraint
+ virtual btVector3 getDv(const btSoftBody::Node*) const;
+
+ // cast the contact to the desired type
+ const btSoftBody::DeformableFaceNodeContact* getContact() const
+ {
+ return static_cast<const btSoftBody::DeformableFaceNodeContact*>(m_contact);
+ }
+
+ virtual void applyImpulse(const btVector3& impulse);
+
+ virtual void setPenetrationScale(btScalar scale) {}
};
#endif /* BT_DEFORMABLE_CONTACT_CONSTRAINT_H */
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.cpp b/thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.cpp
index 22ca8bf582..7f67260ce6 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.cpp
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.cpp
@@ -17,7 +17,7 @@
#include "btDeformableMultiBodyDynamicsWorld.h"
#include <algorithm>
#include <cmath>
-btScalar btDeformableContactProjection::update(btCollisionObject** deformableBodies,int numDeformableBodies, const btContactSolverInfo& infoGlobal)
+btScalar btDeformableContactProjection::update(btCollisionObject** deformableBodies, int numDeformableBodies, const btContactSolverInfo& infoGlobal)
{
btScalar residualSquare = 0;
for (int i = 0; i < numDeformableBodies; ++i)
@@ -58,27 +58,37 @@ btScalar btDeformableContactProjection::update(btCollisionObject** deformableBod
return residualSquare;
}
-void btDeformableContactProjection::splitImpulseSetup(const btContactSolverInfo& infoGlobal)
+btScalar btDeformableContactProjection::solveSplitImpulse(btCollisionObject** deformableBodies, int numDeformableBodies, const btContactSolverInfo& infoGlobal)
{
- for (int i = 0; i < m_softBodies.size(); ++i)
+ btScalar residualSquare = 0;
+ for (int i = 0; i < numDeformableBodies; ++i)
{
- // node constraints
- for (int j = 0; j < m_nodeRigidConstraints[i].size(); ++j)
- {
- btDeformableNodeRigidContactConstraint& constraint = m_nodeRigidConstraints[i][j];
- constraint.setPenetrationScale(infoGlobal.m_deformable_erp);
- }
- // face constraints
- for (int j = 0; j < m_faceRigidConstraints[i].size(); ++j)
+ for (int j = 0; j < m_softBodies.size(); ++j)
{
- btDeformableFaceRigidContactConstraint& constraint = m_faceRigidConstraints[i][j];
- constraint.setPenetrationScale(infoGlobal.m_deformable_erp);
+ btCollisionObject* psb = m_softBodies[j];
+ if (psb != deformableBodies[i])
+ {
+ continue;
+ }
+ for (int k = 0; k < m_nodeRigidConstraints[j].size(); ++k)
+ {
+ btDeformableNodeRigidContactConstraint& constraint = m_nodeRigidConstraints[j][k];
+ btScalar localResidualSquare = constraint.solveSplitImpulse(infoGlobal);
+ residualSquare = btMax(residualSquare, localResidualSquare);
+ }
+ for (int k = 0; k < m_faceRigidConstraints[j].size(); ++k)
+ {
+ btDeformableFaceRigidContactConstraint& constraint = m_faceRigidConstraints[j][k];
+ btScalar localResidualSquare = constraint.solveSplitImpulse(infoGlobal);
+ residualSquare = btMax(residualSquare, localResidualSquare);
+ }
}
}
+ return residualSquare;
}
void btDeformableContactProjection::setConstraints(const btContactSolverInfo& infoGlobal)
-{
+{
BT_PROFILE("setConstraints");
for (int i = 0; i < m_softBodies.size(); ++i)
{
@@ -97,7 +107,7 @@ void btDeformableContactProjection::setConstraints(const btContactSolverInfo& in
m_staticConstraints[i].push_back(static_constraint);
}
}
-
+
// set up deformable anchors
for (int j = 0; j < psb->m_deformableAnchors.size(); ++j)
{
@@ -111,7 +121,7 @@ void btDeformableContactProjection::setConstraints(const btContactSolverInfo& in
btDeformableNodeAnchorConstraint constraint(anchor, infoGlobal);
m_nodeAnchorConstraints[i].push_back(constraint);
}
-
+
// set Deformable Node vs. Rigid constraint
for (int j = 0; j < psb->m_nodeRigidContacts.size(); ++j)
{
@@ -122,17 +132,9 @@ void btDeformableContactProjection::setConstraints(const btContactSolverInfo& in
continue;
}
btDeformableNodeRigidContactConstraint constraint(contact, infoGlobal);
- btVector3 va = constraint.getVa();
- btVector3 vb = constraint.getVb();
- const btVector3 vr = vb - va;
- const btSoftBody::sCti& cti = contact.m_cti;
- const btScalar dn = btDot(vr, cti.m_normal);
- if (dn < SIMD_EPSILON)
- {
- m_nodeRigidConstraints[i].push_back(constraint);
- }
+ m_nodeRigidConstraints[i].push_back(constraint);
}
-
+
// set Deformable Face vs. Rigid constraint
for (int j = 0; j < psb->m_faceRigidContacts.size(); ++j)
{
@@ -143,15 +145,7 @@ void btDeformableContactProjection::setConstraints(const btContactSolverInfo& in
continue;
}
btDeformableFaceRigidContactConstraint constraint(contact, infoGlobal, m_useStrainLimiting);
- btVector3 va = constraint.getVa();
- btVector3 vb = constraint.getVb();
- const btVector3 vr = vb - va;
- const btSoftBody::sCti& cti = contact.m_cti;
- const btScalar dn = btDot(vr, cti.m_normal);
- if (dn < SIMD_EPSILON)
- {
- m_faceRigidConstraints[i].push_back(constraint);
- }
+ m_faceRigidConstraints[i].push_back(constraint);
}
}
}
@@ -159,267 +153,269 @@ void btDeformableContactProjection::setConstraints(const btContactSolverInfo& in
void btDeformableContactProjection::project(TVStack& x)
{
#ifndef USE_MGS
- const int dim = 3;
- for (int index = 0; index < m_projectionsDict.size(); ++index)
- {
- btAlignedObjectArray<btVector3>& projectionDirs = *m_projectionsDict.getAtIndex(index);
- size_t i = m_projectionsDict.getKeyAtIndex(index).getUid1();
- if (projectionDirs.size() >= dim)
- {
- // static node
- x[i].setZero();
- continue;
- }
- else if (projectionDirs.size() == 2)
- {
- btVector3 dir0 = projectionDirs[0];
- btVector3 dir1 = projectionDirs[1];
- btVector3 free_dir = btCross(dir0, dir1);
- if (free_dir.safeNorm() < SIMD_EPSILON)
- {
- x[i] -= x[i].dot(dir0) * dir0;
- x[i] -= x[i].dot(dir1) * dir1;
- }
- else
- {
- free_dir.normalize();
- x[i] = x[i].dot(free_dir) * free_dir;
- }
- }
- else
- {
- btAssert(projectionDirs.size() == 1);
- btVector3 dir0 = projectionDirs[0];
- x[i] -= x[i].dot(dir0) * dir0;
- }
- }
+ const int dim = 3;
+ for (int index = 0; index < m_projectionsDict.size(); ++index)
+ {
+ btAlignedObjectArray<btVector3>& projectionDirs = *m_projectionsDict.getAtIndex(index);
+ size_t i = m_projectionsDict.getKeyAtIndex(index).getUid1();
+ if (projectionDirs.size() >= dim)
+ {
+ // static node
+ x[i].setZero();
+ continue;
+ }
+ else if (projectionDirs.size() == 2)
+ {
+ btVector3 dir0 = projectionDirs[0];
+ btVector3 dir1 = projectionDirs[1];
+ btVector3 free_dir = btCross(dir0, dir1);
+ if (free_dir.safeNorm() < SIMD_EPSILON)
+ {
+ x[i] -= x[i].dot(dir0) * dir0;
+ }
+ else
+ {
+ free_dir.normalize();
+ x[i] = x[i].dot(free_dir) * free_dir;
+ }
+ }
+ else
+ {
+ btAssert(projectionDirs.size() == 1);
+ btVector3 dir0 = projectionDirs[0];
+ x[i] -= x[i].dot(dir0) * dir0;
+ }
+ }
#else
- btReducedVector p(x.size());
- for (int i = 0; i < m_projections.size(); ++i)
- {
- p += (m_projections[i].dot(x) * m_projections[i]);
- }
- for (int i = 0; i < p.m_indices.size(); ++i)
- {
- x[p.m_indices[i]] -= p.m_vecs[i];
- }
+ btReducedVector p(x.size());
+ for (int i = 0; i < m_projections.size(); ++i)
+ {
+ p += (m_projections[i].dot(x) * m_projections[i]);
+ }
+ for (int i = 0; i < p.m_indices.size(); ++i)
+ {
+ x[p.m_indices[i]] -= p.m_vecs[i];
+ }
#endif
}
void btDeformableContactProjection::setProjection()
{
#ifndef USE_MGS
- BT_PROFILE("btDeformableContactProjection::setProjection");
- btAlignedObjectArray<btVector3> units;
- units.push_back(btVector3(1,0,0));
- units.push_back(btVector3(0,1,0));
- units.push_back(btVector3(0,0,1));
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- continue;
- }
- for (int j = 0; j < m_staticConstraints[i].size(); ++j)
- {
- int index = m_staticConstraints[i][j].m_node->index;
- m_staticConstraints[i][j].m_node->m_penetration = SIMD_INFINITY;
- if (m_projectionsDict.find(index) == NULL)
- {
- m_projectionsDict.insert(index, units);
- }
- else
- {
- btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
- for (int k = 0; k < 3; ++k)
- {
- projections.push_back(units[k]);
- }
- }
- }
- for (int j = 0; j < m_nodeAnchorConstraints[i].size(); ++j)
- {
- int index = m_nodeAnchorConstraints[i][j].m_anchor->m_node->index;
- m_nodeAnchorConstraints[i][j].m_anchor->m_node->m_penetration = SIMD_INFINITY;
- if (m_projectionsDict.find(index) == NULL)
- {
- m_projectionsDict.insert(index, units);
- }
- else
- {
- btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
- for (int k = 0; k < 3; ++k)
- {
- projections.push_back(units[k]);
- }
- }
- }
- for (int j = 0; j < m_nodeRigidConstraints[i].size(); ++j)
- {
- int index = m_nodeRigidConstraints[i][j].m_node->index;
- m_nodeRigidConstraints[i][j].m_node->m_penetration = -m_nodeRigidConstraints[i][j].getContact()->m_cti.m_offset;
- if (m_nodeRigidConstraints[i][j].m_static)
- {
- if (m_projectionsDict.find(index) == NULL)
- {
- m_projectionsDict.insert(index, units);
- }
- else
- {
- btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
- for (int k = 0; k < 3; ++k)
- {
- projections.push_back(units[k]);
- }
- }
- }
- else
- {
- if (m_projectionsDict.find(index) == NULL)
- {
- btAlignedObjectArray<btVector3> projections;
- projections.push_back(m_nodeRigidConstraints[i][j].m_normal);
- m_projectionsDict.insert(index, projections);
- }
- else
- {
- btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
- projections.push_back(m_nodeRigidConstraints[i][j].m_normal);
- }
- }
- }
- for (int j = 0; j < m_faceRigidConstraints[i].size(); ++j)
- {
- const btSoftBody::Face* face = m_faceRigidConstraints[i][j].m_face;
- btScalar penetration = -m_faceRigidConstraints[i][j].getContact()->m_cti.m_offset;
- for (int k = 0; k < 3; ++k)
- {
- face->m_n[k]->m_penetration = btMax(face->m_n[k]->m_penetration, penetration);
- }
- for (int k = 0; k < 3; ++k)
- {
- btSoftBody::Node* node = face->m_n[k];
- node->m_penetration = true;
- int index = node->index;
- if (m_faceRigidConstraints[i][j].m_static)
- {
- if (m_projectionsDict.find(index) == NULL)
- {
- m_projectionsDict.insert(index, units);
- }
- else
- {
- btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
- for (int k = 0; k < 3; ++k)
- {
- projections.push_back(units[k]);
- }
- }
- }
- else
- {
- if (m_projectionsDict.find(index) == NULL)
- {
- btAlignedObjectArray<btVector3> projections;
- projections.push_back(m_faceRigidConstraints[i][j].m_normal);
- m_projectionsDict.insert(index, projections);
- }
- else
- {
- btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
- projections.push_back(m_faceRigidConstraints[i][j].m_normal);
- }
- }
- }
- }
- }
+ BT_PROFILE("btDeformableContactProjection::setProjection");
+ btAlignedObjectArray<btVector3> units;
+ units.push_back(btVector3(1, 0, 0));
+ units.push_back(btVector3(0, 1, 0));
+ units.push_back(btVector3(0, 0, 1));
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ for (int j = 0; j < m_staticConstraints[i].size(); ++j)
+ {
+ int index = m_staticConstraints[i][j].m_node->index;
+ m_staticConstraints[i][j].m_node->m_constrained = true;
+ if (m_projectionsDict.find(index) == NULL)
+ {
+ m_projectionsDict.insert(index, units);
+ }
+ else
+ {
+ btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
+ for (int k = 0; k < 3; ++k)
+ {
+ projections.push_back(units[k]);
+ }
+ }
+ }
+ for (int j = 0; j < m_nodeAnchorConstraints[i].size(); ++j)
+ {
+ int index = m_nodeAnchorConstraints[i][j].m_anchor->m_node->index;
+ m_nodeAnchorConstraints[i][j].m_anchor->m_node->m_constrained = true;
+ if (m_projectionsDict.find(index) == NULL)
+ {
+ m_projectionsDict.insert(index, units);
+ }
+ else
+ {
+ btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
+ for (int k = 0; k < 3; ++k)
+ {
+ projections.push_back(units[k]);
+ }
+ }
+ }
+ for (int j = 0; j < m_nodeRigidConstraints[i].size(); ++j)
+ {
+ int index = m_nodeRigidConstraints[i][j].m_node->index;
+ m_nodeRigidConstraints[i][j].m_node->m_constrained = true;
+ if (m_nodeRigidConstraints[i][j].m_binding)
+ {
+ if (m_nodeRigidConstraints[i][j].m_static)
+ {
+ if (m_projectionsDict.find(index) == NULL)
+ {
+ m_projectionsDict.insert(index, units);
+ }
+ else
+ {
+ btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
+ for (int k = 0; k < 3; ++k)
+ {
+ projections.push_back(units[k]);
+ }
+ }
+ }
+ else
+ {
+ if (m_projectionsDict.find(index) == NULL)
+ {
+ btAlignedObjectArray<btVector3> projections;
+ projections.push_back(m_nodeRigidConstraints[i][j].m_normal);
+ m_projectionsDict.insert(index, projections);
+ }
+ else
+ {
+ btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
+ projections.push_back(m_nodeRigidConstraints[i][j].m_normal);
+ }
+ }
+ }
+ }
+ for (int j = 0; j < m_faceRigidConstraints[i].size(); ++j)
+ {
+ const btSoftBody::Face* face = m_faceRigidConstraints[i][j].m_face;
+ if (m_faceRigidConstraints[i][j].m_binding)
+ {
+ for (int k = 0; k < 3; ++k)
+ {
+ face->m_n[k]->m_constrained = true;
+ }
+ }
+ for (int k = 0; k < 3; ++k)
+ {
+ btSoftBody::Node* node = face->m_n[k];
+ int index = node->index;
+ if (m_faceRigidConstraints[i][j].m_static)
+ {
+ if (m_projectionsDict.find(index) == NULL)
+ {
+ m_projectionsDict.insert(index, units);
+ }
+ else
+ {
+ btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
+ for (int l = 0; l < 3; ++l)
+ {
+ projections.push_back(units[l]);
+ }
+ }
+ }
+ else
+ {
+ if (m_projectionsDict.find(index) == NULL)
+ {
+ btAlignedObjectArray<btVector3> projections;
+ projections.push_back(m_faceRigidConstraints[i][j].m_normal);
+ m_projectionsDict.insert(index, projections);
+ }
+ else
+ {
+ btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
+ projections.push_back(m_faceRigidConstraints[i][j].m_normal);
+ }
+ }
+ }
+ }
+ }
#else
- int dof = 0;
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- dof += m_softBodies[i]->m_nodes.size();
- }
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- continue;
- }
- for (int j = 0; j < m_staticConstraints[i].size(); ++j)
- {
- int index = m_staticConstraints[i][j].m_node->index;
- m_staticConstraints[i][j].m_node->m_penetration = SIMD_INFINITY;
- btAlignedObjectArray<int> indices;
- btAlignedObjectArray<btVector3> vecs1,vecs2,vecs3;
- indices.push_back(index);
- vecs1.push_back(btVector3(1,0,0));
- vecs2.push_back(btVector3(0,1,0));
- vecs3.push_back(btVector3(0,0,1));
- m_projections.push_back(btReducedVector(dof, indices, vecs1));
- m_projections.push_back(btReducedVector(dof, indices, vecs2));
- m_projections.push_back(btReducedVector(dof, indices, vecs3));
- }
-
- for (int j = 0; j < m_nodeAnchorConstraints[i].size(); ++j)
- {
- int index = m_nodeAnchorConstraints[i][j].m_anchor->m_node->index;
- m_nodeAnchorConstraints[i][j].m_anchor->m_node->m_penetration = SIMD_INFINITY;
- btAlignedObjectArray<int> indices;
- btAlignedObjectArray<btVector3> vecs1,vecs2,vecs3;
- indices.push_back(index);
- vecs1.push_back(btVector3(1,0,0));
- vecs2.push_back(btVector3(0,1,0));
- vecs3.push_back(btVector3(0,0,1));
- m_projections.push_back(btReducedVector(dof, indices, vecs1));
- m_projections.push_back(btReducedVector(dof, indices, vecs2));
- m_projections.push_back(btReducedVector(dof, indices, vecs3));
- }
- for (int j = 0; j < m_nodeRigidConstraints[i].size(); ++j)
- {
- int index = m_nodeRigidConstraints[i][j].m_node->index;
- m_nodeRigidConstraints[i][j].m_node->m_penetration = -m_nodeRigidConstraints[i][j].getContact()->m_cti.m_offset;
- btAlignedObjectArray<int> indices;
- indices.push_back(index);
- btAlignedObjectArray<btVector3> vecs1,vecs2,vecs3;
- if (m_nodeRigidConstraints[i][j].m_static)
- {
- vecs1.push_back(btVector3(1,0,0));
- vecs2.push_back(btVector3(0,1,0));
- vecs3.push_back(btVector3(0,0,1));
- m_projections.push_back(btReducedVector(dof, indices, vecs1));
- m_projections.push_back(btReducedVector(dof, indices, vecs2));
- m_projections.push_back(btReducedVector(dof, indices, vecs3));
- }
- else
- {
- vecs1.push_back(m_nodeRigidConstraints[i][j].m_normal);
- m_projections.push_back(btReducedVector(dof, indices, vecs1));
- }
- }
- for (int j = 0; j < m_faceRigidConstraints[i].size(); ++j)
- {
- const btSoftBody::Face* face = m_faceRigidConstraints[i][j].m_face;
+ int dof = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ dof += m_softBodies[i]->m_nodes.size();
+ }
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ for (int j = 0; j < m_staticConstraints[i].size(); ++j)
+ {
+ int index = m_staticConstraints[i][j].m_node->index;
+ m_staticConstraints[i][j].m_node->m_penetration = SIMD_INFINITY;
+ btAlignedObjectArray<int> indices;
+ btAlignedObjectArray<btVector3> vecs1, vecs2, vecs3;
+ indices.push_back(index);
+ vecs1.push_back(btVector3(1, 0, 0));
+ vecs2.push_back(btVector3(0, 1, 0));
+ vecs3.push_back(btVector3(0, 0, 1));
+ m_projections.push_back(btReducedVector(dof, indices, vecs1));
+ m_projections.push_back(btReducedVector(dof, indices, vecs2));
+ m_projections.push_back(btReducedVector(dof, indices, vecs3));
+ }
+
+ for (int j = 0; j < m_nodeAnchorConstraints[i].size(); ++j)
+ {
+ int index = m_nodeAnchorConstraints[i][j].m_anchor->m_node->index;
+ m_nodeAnchorConstraints[i][j].m_anchor->m_node->m_penetration = SIMD_INFINITY;
+ btAlignedObjectArray<int> indices;
+ btAlignedObjectArray<btVector3> vecs1, vecs2, vecs3;
+ indices.push_back(index);
+ vecs1.push_back(btVector3(1, 0, 0));
+ vecs2.push_back(btVector3(0, 1, 0));
+ vecs3.push_back(btVector3(0, 0, 1));
+ m_projections.push_back(btReducedVector(dof, indices, vecs1));
+ m_projections.push_back(btReducedVector(dof, indices, vecs2));
+ m_projections.push_back(btReducedVector(dof, indices, vecs3));
+ }
+ for (int j = 0; j < m_nodeRigidConstraints[i].size(); ++j)
+ {
+ int index = m_nodeRigidConstraints[i][j].m_node->index;
+ m_nodeRigidConstraints[i][j].m_node->m_penetration = -m_nodeRigidConstraints[i][j].getContact()->m_cti.m_offset;
+ btAlignedObjectArray<int> indices;
+ indices.push_back(index);
+ btAlignedObjectArray<btVector3> vecs1, vecs2, vecs3;
+ if (m_nodeRigidConstraints[i][j].m_static)
+ {
+ vecs1.push_back(btVector3(1, 0, 0));
+ vecs2.push_back(btVector3(0, 1, 0));
+ vecs3.push_back(btVector3(0, 0, 1));
+ m_projections.push_back(btReducedVector(dof, indices, vecs1));
+ m_projections.push_back(btReducedVector(dof, indices, vecs2));
+ m_projections.push_back(btReducedVector(dof, indices, vecs3));
+ }
+ else
+ {
+ vecs1.push_back(m_nodeRigidConstraints[i][j].m_normal);
+ m_projections.push_back(btReducedVector(dof, indices, vecs1));
+ }
+ }
+ for (int j = 0; j < m_faceRigidConstraints[i].size(); ++j)
+ {
+ const btSoftBody::Face* face = m_faceRigidConstraints[i][j].m_face;
btVector3 bary = m_faceRigidConstraints[i][j].getContact()->m_bary;
- btScalar penetration = -m_faceRigidConstraints[i][j].getContact()->m_cti.m_offset;
- for (int k = 0; k < 3; ++k)
- {
- face->m_n[k]->m_penetration = btMax(face->m_n[k]->m_penetration, penetration);
- }
+ btScalar penetration = -m_faceRigidConstraints[i][j].getContact()->m_cti.m_offset;
+ for (int k = 0; k < 3; ++k)
+ {
+ face->m_n[k]->m_penetration = btMax(face->m_n[k]->m_penetration, penetration);
+ }
if (m_faceRigidConstraints[i][j].m_static)
{
for (int l = 0; l < 3; ++l)
{
-
btReducedVector rv(dof);
for (int k = 0; k < 3; ++k)
{
rv.m_indices.push_back(face->m_n[k]->index);
- btVector3 v(0,0,0);
+ btVector3 v(0, 0, 0);
v[l] = bary[k];
rv.m_vecs.push_back(v);
- rv.sort();
+ rv.sort();
}
m_projections.push_back(rv);
}
@@ -431,121 +427,134 @@ void btDeformableContactProjection::setProjection()
{
rv.m_indices.push_back(face->m_n[k]->index);
rv.m_vecs.push_back(bary[k] * m_faceRigidConstraints[i][j].m_normal);
- rv.sort();
+ rv.sort();
}
m_projections.push_back(rv);
}
}
- }
- btModifiedGramSchmidt<btReducedVector> mgs(m_projections);
- mgs.solve();
- m_projections = mgs.m_out;
+ }
+ btModifiedGramSchmidt<btReducedVector> mgs(m_projections);
+ mgs.solve();
+ m_projections = mgs.m_out;
#endif
}
void btDeformableContactProjection::checkConstraints(const TVStack& x)
{
- for (int i = 0; i < m_lagrangeMultipliers.size(); ++i)
- {
- btVector3 d(0,0,0);
- const LagrangeMultiplier& lm = m_lagrangeMultipliers[i];
- for (int j = 0; j < lm.m_num_constraints; ++j)
- {
- for (int k = 0; k < lm.m_num_nodes; ++k)
- {
- d[j] += lm.m_weights[k] * x[lm.m_indices[k]].dot(lm.m_dirs[j]);
- }
- }
- printf("d = %f, %f, %f\n",d[0],d[1],d[2]);
- }
+ for (int i = 0; i < m_lagrangeMultipliers.size(); ++i)
+ {
+ btVector3 d(0, 0, 0);
+ const LagrangeMultiplier& lm = m_lagrangeMultipliers[i];
+ for (int j = 0; j < lm.m_num_constraints; ++j)
+ {
+ for (int k = 0; k < lm.m_num_nodes; ++k)
+ {
+ d[j] += lm.m_weights[k] * x[lm.m_indices[k]].dot(lm.m_dirs[j]);
+ }
+ }
+ // printf("d = %f, %f, %f\n", d[0], d[1], d[2]);
+ // printf("val = %f, %f, %f\n", lm.m_vals[0], lm.m_vals[1], lm.m_vals[2]);
+ }
}
void btDeformableContactProjection::setLagrangeMultiplier()
{
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- continue;
- }
- for (int j = 0; j < m_staticConstraints[i].size(); ++j)
- {
- int index = m_staticConstraints[i][j].m_node->index;
- m_staticConstraints[i][j].m_node->m_penetration = SIMD_INFINITY;
- LagrangeMultiplier lm;
- lm.m_num_nodes = 1;
- lm.m_indices[0] = index;
- lm.m_weights[0] = 1.0;
- lm.m_num_constraints = 3;
- lm.m_dirs[0] = btVector3(1,0,0);
- lm.m_dirs[1] = btVector3(0,1,0);
- lm.m_dirs[2] = btVector3(0,0,1);
- m_lagrangeMultipliers.push_back(lm);
- }
- for (int j = 0; j < m_nodeAnchorConstraints[i].size(); ++j)
- {
- int index = m_nodeAnchorConstraints[i][j].m_anchor->m_node->index;
- m_nodeAnchorConstraints[i][j].m_anchor->m_node->m_penetration = SIMD_INFINITY;
- LagrangeMultiplier lm;
- lm.m_num_nodes = 1;
- lm.m_indices[0] = index;
- lm.m_weights[0] = 1.0;
- lm.m_num_constraints = 3;
- lm.m_dirs[0] = btVector3(1,0,0);
- lm.m_dirs[1] = btVector3(0,1,0);
- lm.m_dirs[2] = btVector3(0,0,1);
- m_lagrangeMultipliers.push_back(lm);
- }
- for (int j = 0; j < m_nodeRigidConstraints[i].size(); ++j)
- {
- int index = m_nodeRigidConstraints[i][j].m_node->index;
- m_nodeRigidConstraints[i][j].m_node->m_penetration = -m_nodeRigidConstraints[i][j].getContact()->m_cti.m_offset;
- LagrangeMultiplier lm;
- lm.m_num_nodes = 1;
- lm.m_indices[0] = index;
- lm.m_weights[0] = 1.0;
- if (m_nodeRigidConstraints[i][j].m_static)
- {
- lm.m_num_constraints = 3;
- lm.m_dirs[0] = btVector3(1,0,0);
- lm.m_dirs[1] = btVector3(0,1,0);
- lm.m_dirs[2] = btVector3(0,0,1);
- }
- else
- {
- lm.m_num_constraints = 1;
- lm.m_dirs[0] = m_nodeRigidConstraints[i][j].m_normal;
- }
- m_lagrangeMultipliers.push_back(lm);
- }
- for (int j = 0; j < m_faceRigidConstraints[i].size(); ++j)
- {
- const btSoftBody::Face* face = m_faceRigidConstraints[i][j].m_face;
-
- btVector3 bary = m_faceRigidConstraints[i][j].getContact()->m_bary;
- btScalar penetration = -m_faceRigidConstraints[i][j].getContact()->m_cti.m_offset;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ for (int j = 0; j < m_staticConstraints[i].size(); ++j)
+ {
+ int index = m_staticConstraints[i][j].m_node->index;
+ m_staticConstraints[i][j].m_node->m_constrained = true;
+ LagrangeMultiplier lm;
+ lm.m_num_nodes = 1;
+ lm.m_indices[0] = index;
+ lm.m_weights[0] = 1.0;
+ lm.m_num_constraints = 3;
+ lm.m_dirs[0] = btVector3(1, 0, 0);
+ lm.m_dirs[1] = btVector3(0, 1, 0);
+ lm.m_dirs[2] = btVector3(0, 0, 1);
+ m_lagrangeMultipliers.push_back(lm);
+ }
+ for (int j = 0; j < m_nodeAnchorConstraints[i].size(); ++j)
+ {
+ int index = m_nodeAnchorConstraints[i][j].m_anchor->m_node->index;
+ m_nodeAnchorConstraints[i][j].m_anchor->m_node->m_constrained = true;
+ LagrangeMultiplier lm;
+ lm.m_num_nodes = 1;
+ lm.m_indices[0] = index;
+ lm.m_weights[0] = 1.0;
+ lm.m_num_constraints = 3;
+ lm.m_dirs[0] = btVector3(1, 0, 0);
+ lm.m_dirs[1] = btVector3(0, 1, 0);
+ lm.m_dirs[2] = btVector3(0, 0, 1);
+ m_lagrangeMultipliers.push_back(lm);
+ }
+
+ for (int j = 0; j < m_nodeRigidConstraints[i].size(); ++j)
+ {
+ if (!m_nodeRigidConstraints[i][j].m_binding)
+ {
+ continue;
+ }
+ int index = m_nodeRigidConstraints[i][j].m_node->index;
+ m_nodeRigidConstraints[i][j].m_node->m_constrained = true;
+ LagrangeMultiplier lm;
+ lm.m_num_nodes = 1;
+ lm.m_indices[0] = index;
+ lm.m_weights[0] = 1.0;
+ if (m_nodeRigidConstraints[i][j].m_static)
+ {
+ lm.m_num_constraints = 3;
+ lm.m_dirs[0] = btVector3(1, 0, 0);
+ lm.m_dirs[1] = btVector3(0, 1, 0);
+ lm.m_dirs[2] = btVector3(0, 0, 1);
+ }
+ else
+ {
+ lm.m_num_constraints = 1;
+ lm.m_dirs[0] = m_nodeRigidConstraints[i][j].m_normal;
+ }
+ m_lagrangeMultipliers.push_back(lm);
+ }
+
+ for (int j = 0; j < m_faceRigidConstraints[i].size(); ++j)
+ {
+ if (!m_faceRigidConstraints[i][j].m_binding)
+ {
+ continue;
+ }
+ btSoftBody::Face* face = m_faceRigidConstraints[i][j].m_face;
+
+ btVector3 bary = m_faceRigidConstraints[i][j].getContact()->m_bary;
LagrangeMultiplier lm;
lm.m_num_nodes = 3;
- for (int k = 0; k<3; ++k)
+
+ for (int k = 0; k < 3; ++k)
{
- face->m_n[k]->m_penetration = btMax(face->m_n[k]->m_penetration, penetration);
+ face->m_n[k]->m_constrained = true;
lm.m_indices[k] = face->m_n[k]->index;
lm.m_weights[k] = bary[k];
}
- if (m_faceRigidConstraints[i][j].m_static)
- {
+ if (m_faceRigidConstraints[i][j].m_static)
+ {
+ face->m_pcontact[3] = 1;
lm.m_num_constraints = 3;
- lm.m_dirs[0] = btVector3(1,0,0);
- lm.m_dirs[1] = btVector3(0,1,0);
- lm.m_dirs[2] = btVector3(0,0,1);
+ lm.m_dirs[0] = btVector3(1, 0, 0);
+ lm.m_dirs[1] = btVector3(0, 1, 0);
+ lm.m_dirs[2] = btVector3(0, 0, 1);
}
else
{
+ face->m_pcontact[3] = 0;
lm.m_num_constraints = 1;
lm.m_dirs[0] = m_faceRigidConstraints[i][j].m_normal;
}
- m_lagrangeMultipliers.push_back(lm);
+ m_lagrangeMultipliers.push_back(lm);
}
}
}
@@ -562,7 +571,7 @@ void btDeformableContactProjection::applyDynamicFriction(TVStack& f)
if (node->m_im != 0)
{
int index = node->index;
- f[index] += constraint.getDv(node)* (1./node->m_im);
+ f[index] += constraint.getDv(node) * (1. / node->m_im);
}
}
for (int j = 0; j < m_faceRigidConstraints[i].size(); ++j)
@@ -575,7 +584,7 @@ void btDeformableContactProjection::applyDynamicFriction(TVStack& f)
if (node->m_im != 0)
{
int index = node->index;
- f[index] += constraint.getDv(node)* (1./node->m_im);
+ f[index] += constraint.getDv(node) * (1. / node->m_im);
}
}
}
@@ -587,7 +596,7 @@ void btDeformableContactProjection::applyDynamicFriction(TVStack& f)
if (node->m_im != 0)
{
int index = node->index;
- f[index] += constraint.getDv(node)* (1./node->m_im);
+ f[index] += constraint.getDv(node) * (1. / node->m_im);
}
for (int k = 0; k < 3; ++k)
{
@@ -595,7 +604,7 @@ void btDeformableContactProjection::applyDynamicFriction(TVStack& f)
if (node->m_im != 0)
{
int index = node->index;
- f[index] += constraint.getDv(node)* (1./node->m_im);
+ f[index] += constraint.getDv(node) * (1. / node->m_im);
}
}
}
@@ -612,9 +621,8 @@ void btDeformableContactProjection::reinitialize(bool nodeUpdated)
m_nodeRigidConstraints.resize(N);
m_faceRigidConstraints.resize(N);
m_deformableConstraints.resize(N);
-
}
- for (int i = 0 ; i < N; ++i)
+ for (int i = 0; i < N; ++i)
{
m_staticConstraints[i].clear();
m_nodeAnchorConstraints[i].clear();
@@ -623,12 +631,9 @@ void btDeformableContactProjection::reinitialize(bool nodeUpdated)
m_deformableConstraints[i].clear();
}
#ifndef USE_MGS
- m_projectionsDict.clear();
+ m_projectionsDict.clear();
#else
- m_projections.clear();
+ m_projections.clear();
#endif
- m_lagrangeMultipliers.clear();
+ m_lagrangeMultipliers.clear();
}
-
-
-
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.h b/thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.h
index 8d7e94d4fb..4964eaf990 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.h
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.h
@@ -27,31 +27,30 @@
struct LagrangeMultiplier
{
- int m_num_constraints; // Number of constraints
- int m_num_nodes; // Number of nodes in these constraints
- btScalar m_weights[3]; // weights of the nodes involved, same size as m_num_nodes
- btVector3 m_dirs[3]; // Constraint directions, same size of m_num_constraints;
- int m_indices[3]; // indices of the nodes involved, same size as m_num_nodes;
+ int m_num_constraints; // Number of constraints
+ int m_num_nodes; // Number of nodes in these constraints
+ btScalar m_weights[3]; // weights of the nodes involved, same size as m_num_nodes
+ btVector3 m_dirs[3]; // Constraint directions, same size of m_num_constraints;
+ int m_indices[3]; // indices of the nodes involved, same size as m_num_nodes;
};
-
class btDeformableContactProjection
{
public:
- typedef btAlignedObjectArray<btVector3> TVStack;
- btAlignedObjectArray<btSoftBody *>& m_softBodies;
-
- // all constraints involving face
- btAlignedObjectArray<btDeformableContactConstraint*> m_allFaceConstraints;
+ typedef btAlignedObjectArray<btVector3> TVStack;
+ btAlignedObjectArray<btSoftBody*>& m_softBodies;
+
+ // all constraints involving face
+ btAlignedObjectArray<btDeformableContactConstraint*> m_allFaceConstraints;
#ifndef USE_MGS
- // map from node index to projection directions
- btHashMap<btHashInt, btAlignedObjectArray<btVector3> > m_projectionsDict;
+ // map from node index to projection directions
+ btHashMap<btHashInt, btAlignedObjectArray<btVector3> > m_projectionsDict;
#else
- btAlignedObjectArray<btReducedVector> m_projections;
+ btAlignedObjectArray<btReducedVector> m_projections;
#endif
-
- btAlignedObjectArray<LagrangeMultiplier> m_lagrangeMultipliers;
-
+
+ btAlignedObjectArray<LagrangeMultiplier> m_lagrangeMultipliers;
+
// map from node index to static constraint
btAlignedObjectArray<btAlignedObjectArray<btDeformableStaticConstraint> > m_staticConstraints;
// map from node index to node rigid constraint
@@ -62,39 +61,39 @@ public:
btAlignedObjectArray<btAlignedObjectArray<btDeformableFaceNodeContactConstraint> > m_deformableConstraints;
// map from node index to node anchor constraint
btAlignedObjectArray<btAlignedObjectArray<btDeformableNodeAnchorConstraint> > m_nodeAnchorConstraints;
-
- bool m_useStrainLimiting;
-
- btDeformableContactProjection(btAlignedObjectArray<btSoftBody *>& softBodies)
- : m_softBodies(softBodies)
- {
- }
-
- virtual ~btDeformableContactProjection()
- {
- }
-
- // apply the constraints to the rhs of the linear solve
- virtual void project(TVStack& x);
-
- // add friction force to the rhs of the linear solve
- virtual void applyDynamicFriction(TVStack& f);
-
- // update and solve the constraints
- virtual btScalar update(btCollisionObject** deformableBodies,int numDeformableBodies, const btContactSolverInfo& infoGlobal);
-
- // Add constraints to m_constraints. In addition, the constraints that each vertex own are recorded in m_constraintsDict.
- virtual void setConstraints(const btContactSolverInfo& infoGlobal);
-
- // Set up projections for each vertex by adding the projection direction to
- virtual void setProjection();
-
- virtual void reinitialize(bool nodeUpdated);
-
- virtual void splitImpulseSetup(const btContactSolverInfo& infoGlobal);
-
- virtual void setLagrangeMultiplier();
-
- void checkConstraints(const TVStack& x);
+
+ bool m_useStrainLimiting;
+
+ btDeformableContactProjection(btAlignedObjectArray<btSoftBody*>& softBodies)
+ : m_softBodies(softBodies)
+ {
+ }
+
+ virtual ~btDeformableContactProjection()
+ {
+ }
+
+ // apply the constraints to the rhs of the linear solve
+ virtual void project(TVStack& x);
+
+ // add friction force to the rhs of the linear solve
+ virtual void applyDynamicFriction(TVStack& f);
+
+ // update and solve the constraints
+ virtual btScalar update(btCollisionObject** deformableBodies, int numDeformableBodies, const btContactSolverInfo& infoGlobal);
+
+ // Add constraints to m_constraints. In addition, the constraints that each vertex own are recorded in m_constraintsDict.
+ virtual void setConstraints(const btContactSolverInfo& infoGlobal);
+
+ // Set up projections for each vertex by adding the projection direction to
+ virtual void setProjection();
+
+ virtual void reinitialize(bool nodeUpdated);
+
+ btScalar solveSplitImpulse(btCollisionObject** deformableBodies, int numDeformableBodies, const btContactSolverInfo& infoGlobal);
+
+ virtual void setLagrangeMultiplier();
+
+ void checkConstraints(const TVStack& x);
};
#endif /* btDeformableContactProjection_h */
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableCorotatedForce.h b/thirdparty/bullet/BulletSoftBody/btDeformableCorotatedForce.h
index 2d042df729..dfd85523bc 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableCorotatedForce.h
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableCorotatedForce.h
@@ -21,107 +21,104 @@
static inline int PolarDecomposition(const btMatrix3x3& m, btMatrix3x3& q, btMatrix3x3& s)
{
- static const btPolarDecomposition polar;
- return polar.decompose(m, q, s);
+ static const btPolarDecomposition polar;
+ return polar.decompose(m, q, s);
}
class btDeformableCorotatedForce : public btDeformableLagrangianForce
{
public:
- typedef btAlignedObjectArray<btVector3> TVStack;
- btScalar m_mu, m_lambda;
- btDeformableCorotatedForce(): m_mu(1), m_lambda(1)
- {
-
- }
-
- btDeformableCorotatedForce(btScalar mu, btScalar lambda): m_mu(mu), m_lambda(lambda)
- {
- }
-
- virtual void addScaledForces(btScalar scale, TVStack& force)
- {
- addScaledElasticForce(scale, force);
- }
-
- virtual void addScaledExplicitForce(btScalar scale, TVStack& force)
- {
- addScaledElasticForce(scale, force);
- }
-
- virtual void addScaledDampingForce(btScalar scale, TVStack& force)
- {
- }
-
- virtual void addScaledElasticForce(btScalar scale, TVStack& force)
- {
- int numNodes = getNumNodes();
- btAssert(numNodes <= force.size());
- btVector3 grad_N_hat_1st_col = btVector3(-1,-1,-1);
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- for (int j = 0; j < psb->m_tetras.size(); ++j)
- {
- btSoftBody::Tetra& tetra = psb->m_tetras[j];
- btMatrix3x3 P;
- firstPiola(tetra.m_F,P);
- btVector3 force_on_node0 = P * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col);
- btMatrix3x3 force_on_node123 = P * tetra.m_Dm_inverse.transpose();
-
- btSoftBody::Node* node0 = tetra.m_n[0];
- btSoftBody::Node* node1 = tetra.m_n[1];
- btSoftBody::Node* node2 = tetra.m_n[2];
- btSoftBody::Node* node3 = tetra.m_n[3];
- size_t id0 = node0->index;
- size_t id1 = node1->index;
- size_t id2 = node2->index;
- size_t id3 = node3->index;
-
- // elastic force
- // explicit elastic force
- btScalar scale1 = scale * tetra.m_element_measure;
- force[id0] -= scale1 * force_on_node0;
- force[id1] -= scale1 * force_on_node123.getColumn(0);
- force[id2] -= scale1 * force_on_node123.getColumn(1);
- force[id3] -= scale1 * force_on_node123.getColumn(2);
- }
- }
- }
-
- void firstPiola(const btMatrix3x3& F, btMatrix3x3& P)
- {
- // btMatrix3x3 JFinvT = F.adjoint();
- btScalar J = F.determinant();
- P = F.adjoint().transpose() * (m_lambda * (J-1));
- if (m_mu > SIMD_EPSILON)
- {
- btMatrix3x3 R,S;
- if (J < 1024 * SIMD_EPSILON)
- R.setIdentity();
- else
- PolarDecomposition(F, R, S); // this QR is not robust, consider using implicit shift svd
- /*https://fuchuyuan.github.io/research/svd/paper.pdf*/
- P += (F-R) * 2 * m_mu;
- }
- }
-
- virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df)
- {
- }
-
- virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df)
- {
- }
-
- virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA){}
-
- virtual btDeformableLagrangianForceType getForceType()
- {
- return BT_COROTATED_FORCE;
- }
-
-};
+ typedef btAlignedObjectArray<btVector3> TVStack;
+ btScalar m_mu, m_lambda;
+ btDeformableCorotatedForce() : m_mu(1), m_lambda(1)
+ {
+ }
+
+ btDeformableCorotatedForce(btScalar mu, btScalar lambda) : m_mu(mu), m_lambda(lambda)
+ {
+ }
+
+ virtual void addScaledForces(btScalar scale, TVStack& force)
+ {
+ addScaledElasticForce(scale, force);
+ }
+
+ virtual void addScaledExplicitForce(btScalar scale, TVStack& force)
+ {
+ addScaledElasticForce(scale, force);
+ }
+
+ virtual void addScaledDampingForce(btScalar scale, TVStack& force)
+ {
+ }
+
+ virtual void addScaledElasticForce(btScalar scale, TVStack& force)
+ {
+ int numNodes = getNumNodes();
+ btAssert(numNodes <= force.size());
+ btVector3 grad_N_hat_1st_col = btVector3(-1, -1, -1);
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ for (int j = 0; j < psb->m_tetras.size(); ++j)
+ {
+ btSoftBody::Tetra& tetra = psb->m_tetras[j];
+ btMatrix3x3 P;
+ firstPiola(tetra.m_F, P);
+ btVector3 force_on_node0 = P * (tetra.m_Dm_inverse.transpose() * grad_N_hat_1st_col);
+ btMatrix3x3 force_on_node123 = P * tetra.m_Dm_inverse.transpose();
+
+ btSoftBody::Node* node0 = tetra.m_n[0];
+ btSoftBody::Node* node1 = tetra.m_n[1];
+ btSoftBody::Node* node2 = tetra.m_n[2];
+ btSoftBody::Node* node3 = tetra.m_n[3];
+ size_t id0 = node0->index;
+ size_t id1 = node1->index;
+ size_t id2 = node2->index;
+ size_t id3 = node3->index;
+ // elastic force
+ // explicit elastic force
+ btScalar scale1 = scale * tetra.m_element_measure;
+ force[id0] -= scale1 * force_on_node0;
+ force[id1] -= scale1 * force_on_node123.getColumn(0);
+ force[id2] -= scale1 * force_on_node123.getColumn(1);
+ force[id3] -= scale1 * force_on_node123.getColumn(2);
+ }
+ }
+ }
+
+ void firstPiola(const btMatrix3x3& F, btMatrix3x3& P)
+ {
+ // btMatrix3x3 JFinvT = F.adjoint();
+ btScalar J = F.determinant();
+ P = F.adjoint().transpose() * (m_lambda * (J - 1));
+ if (m_mu > SIMD_EPSILON)
+ {
+ btMatrix3x3 R, S;
+ if (J < 1024 * SIMD_EPSILON)
+ R.setIdentity();
+ else
+ PolarDecomposition(F, R, S); // this QR is not robust, consider using implicit shift svd
+ /*https://fuchuyuan.github.io/research/svd/paper.pdf*/
+ P += (F - R) * 2 * m_mu;
+ }
+ }
+
+ virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df)
+ {
+ }
+
+ virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df)
+ {
+ }
+
+ virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA) {}
+
+ virtual btDeformableLagrangianForceType getForceType()
+ {
+ return BT_COROTATED_FORCE;
+ }
+};
#endif /* btCorotated_h */
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableGravityForce.h b/thirdparty/bullet/BulletSoftBody/btDeformableGravityForce.h
index 13ee3eacb6..d91867f457 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableGravityForce.h
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableGravityForce.h
@@ -21,87 +21,85 @@
class btDeformableGravityForce : public btDeformableLagrangianForce
{
public:
- typedef btAlignedObjectArray<btVector3> TVStack;
- btVector3 m_gravity;
-
- btDeformableGravityForce(const btVector3& g) : m_gravity(g)
- {
- }
-
- virtual void addScaledForces(btScalar scale, TVStack& force)
- {
- addScaledGravityForce(scale, force);
- }
-
- virtual void addScaledExplicitForce(btScalar scale, TVStack& force)
- {
- addScaledGravityForce(scale, force);
- }
-
- virtual void addScaledDampingForce(btScalar scale, TVStack& force)
- {
- }
-
- virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df)
- {
- }
-
- virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df)
- {
- }
-
- virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA){}
-
- virtual void addScaledGravityForce(btScalar scale, TVStack& force)
- {
- int numNodes = getNumNodes();
- btAssert(numNodes <= force.size());
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- continue;
- }
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- btSoftBody::Node& n = psb->m_nodes[j];
- size_t id = n.index;
- btScalar mass = (n.m_im == 0) ? 0 : 1. / n.m_im;
- btVector3 scaled_force = scale * m_gravity * mass;
- force[id] += scaled_force;
- }
- }
- }
-
- virtual btDeformableLagrangianForceType getForceType()
- {
- return BT_GRAVITY_FORCE;
- }
+ typedef btAlignedObjectArray<btVector3> TVStack;
+ btVector3 m_gravity;
- // the gravitational potential energy
- virtual double totalEnergy(btScalar dt)
- {
- double e = 0;
- for (int i = 0; i<m_softBodies.size();++i)
- {
- btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- continue;
- }
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- const btSoftBody::Node& node = psb->m_nodes[j];
- if (node.m_im > 0)
- {
- e -= m_gravity.dot(node.m_q)/node.m_im;
- }
- }
- }
- return e;
- }
-
-
+ btDeformableGravityForce(const btVector3& g) : m_gravity(g)
+ {
+ }
+
+ virtual void addScaledForces(btScalar scale, TVStack& force)
+ {
+ addScaledGravityForce(scale, force);
+ }
+
+ virtual void addScaledExplicitForce(btScalar scale, TVStack& force)
+ {
+ addScaledGravityForce(scale, force);
+ }
+
+ virtual void addScaledDampingForce(btScalar scale, TVStack& force)
+ {
+ }
+
+ virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df)
+ {
+ }
+
+ virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df)
+ {
+ }
+
+ virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA) {}
+
+ virtual void addScaledGravityForce(btScalar scale, TVStack& force)
+ {
+ int numNodes = getNumNodes();
+ btAssert(numNodes <= force.size());
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ btSoftBody::Node& n = psb->m_nodes[j];
+ size_t id = n.index;
+ btScalar mass = (n.m_im == 0) ? 0 : 1. / n.m_im;
+ btVector3 scaled_force = scale * m_gravity * mass * m_softBodies[i]->m_gravityFactor;
+ force[id] += scaled_force;
+ }
+ }
+ }
+
+ virtual btDeformableLagrangianForceType getForceType()
+ {
+ return BT_GRAVITY_FORCE;
+ }
+
+ // the gravitational potential energy
+ virtual double totalEnergy(btScalar dt)
+ {
+ double e = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ const btSoftBody::Node& node = psb->m_nodes[j];
+ if (node.m_im > 0)
+ {
+ e -= m_gravity.dot(node.m_q) / node.m_im;
+ }
+ }
+ }
+ return e;
+ }
};
#endif /* BT_DEFORMABLE_GRAVITY_FORCE_H */
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableLagrangianForce.h b/thirdparty/bullet/BulletSoftBody/btDeformableLagrangianForce.h
index 0b6447442d..d58d825d1c 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableLagrangianForce.h
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableLagrangianForce.h
@@ -22,352 +22,351 @@
enum btDeformableLagrangianForceType
{
- BT_GRAVITY_FORCE = 1,
- BT_MASSSPRING_FORCE = 2,
- BT_COROTATED_FORCE = 3,
- BT_NEOHOOKEAN_FORCE = 4,
- BT_LINEAR_ELASTICITY_FORCE = 5,
- BT_MOUSE_PICKING_FORCE = 6
+ BT_GRAVITY_FORCE = 1,
+ BT_MASSSPRING_FORCE = 2,
+ BT_COROTATED_FORCE = 3,
+ BT_NEOHOOKEAN_FORCE = 4,
+ BT_LINEAR_ELASTICITY_FORCE = 5,
+ BT_MOUSE_PICKING_FORCE = 6
};
static inline double randomDouble(double low, double high)
{
- return low + static_cast<double>(rand()) / RAND_MAX * (high - low);
+ return low + static_cast<double>(rand()) / RAND_MAX * (high - low);
}
class btDeformableLagrangianForce
{
public:
- typedef btAlignedObjectArray<btVector3> TVStack;
- btAlignedObjectArray<btSoftBody *> m_softBodies;
- const btAlignedObjectArray<btSoftBody::Node*>* m_nodes;
-
- btDeformableLagrangianForce()
- {
- }
-
- virtual ~btDeformableLagrangianForce(){}
-
- // add all forces
- virtual void addScaledForces(btScalar scale, TVStack& force) = 0;
-
- // add damping df
- virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df) = 0;
-
- // build diagonal of A matrix
- virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA) = 0;
-
- // add elastic df
- virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df) = 0;
-
- // add all forces that are explicit in explicit solve
- virtual void addScaledExplicitForce(btScalar scale, TVStack& force) = 0;
-
- // add all damping forces
- virtual void addScaledDampingForce(btScalar scale, TVStack& force) = 0;
-
- virtual btDeformableLagrangianForceType getForceType() = 0;
-
- virtual void reinitialize(bool nodeUpdated)
- {
- }
-
- // get number of nodes that have the force
- virtual int getNumNodes()
- {
- int numNodes = 0;
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- numNodes += m_softBodies[i]->m_nodes.size();
- }
- return numNodes;
- }
-
- // add a soft body to be affected by the particular lagrangian force
- virtual void addSoftBody(btSoftBody* psb)
- {
- m_softBodies.push_back(psb);
- }
-
- virtual void removeSoftBody(btSoftBody* psb)
- {
- m_softBodies.remove(psb);
- }
-
- virtual void setIndices(const btAlignedObjectArray<btSoftBody::Node*>* nodes)
- {
- m_nodes = nodes;
- }
-
- // Calculate the incremental deformable generated from the input dx
- virtual btMatrix3x3 Ds(int id0, int id1, int id2, int id3, const TVStack& dx)
- {
- btVector3 c1 = dx[id1] - dx[id0];
- btVector3 c2 = dx[id2] - dx[id0];
- btVector3 c3 = dx[id3] - dx[id0];
- return btMatrix3x3(c1,c2,c3).transpose();
- }
-
- // Calculate the incremental deformable generated from the current velocity
- virtual btMatrix3x3 DsFromVelocity(const btSoftBody::Node* n0, const btSoftBody::Node* n1, const btSoftBody::Node* n2, const btSoftBody::Node* n3)
- {
- btVector3 c1 = n1->m_v - n0->m_v;
- btVector3 c2 = n2->m_v - n0->m_v;
- btVector3 c3 = n3->m_v - n0->m_v;
- return btMatrix3x3(c1,c2,c3).transpose();
- }
-
- // test for addScaledElasticForce function
- virtual void testDerivative()
- {
- for (int i = 0; i<m_softBodies.size();++i)
- {
- btSoftBody* psb = m_softBodies[i];
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- psb->m_nodes[j].m_q += btVector3(randomDouble(-.1, .1), randomDouble(-.1, .1), randomDouble(-.1, .1));
- }
- psb->updateDeformation();
- }
-
- TVStack dx;
- dx.resize(getNumNodes());
- TVStack dphi_dx;
- dphi_dx.resize(dx.size());
- for (int i =0; i < dphi_dx.size();++i)
- {
- dphi_dx[i].setZero();
- }
- addScaledForces(-1, dphi_dx);
-
- // write down the current position
- TVStack x;
- x.resize(dx.size());
- int counter = 0;
- for (int i = 0; i<m_softBodies.size();++i)
- {
- btSoftBody* psb = m_softBodies[i];
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- x[counter] = psb->m_nodes[j].m_q;
- counter++;
- }
- }
- counter = 0;
-
- // populate dx with random vectors
- for (int i = 0; i < dx.size(); ++i)
- {
- dx[i].setX(randomDouble(-1, 1));
- dx[i].setY(randomDouble(-1, 1));
- dx[i].setZ(randomDouble(-1, 1));
- }
-
- btAlignedObjectArray<double> errors;
- for (int it = 0; it < 10; ++it)
- {
- for (int i = 0; i < dx.size(); ++i)
- {
- dx[i] *= 0.5;
- }
-
- // get dphi/dx * dx
- double dphi = 0;
- for (int i = 0; i < dx.size(); ++i)
- {
- dphi += dphi_dx[i].dot(dx[i]);
- }
-
-
- for (int i = 0; i<m_softBodies.size();++i)
- {
- btSoftBody* psb = m_softBodies[i];
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- psb->m_nodes[j].m_q = x[counter] + dx[counter];
- counter++;
- }
- psb->updateDeformation();
- }
- counter = 0;
- double f1 = totalElasticEnergy(0);
-
- for (int i = 0; i<m_softBodies.size();++i)
- {
- btSoftBody* psb = m_softBodies[i];
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- psb->m_nodes[j].m_q = x[counter] - dx[counter];
- counter++;
- }
- psb->updateDeformation();
- }
- counter = 0;
-
- double f2 = totalElasticEnergy(0);
-
- //restore m_q
- for (int i = 0; i<m_softBodies.size();++i)
- {
- btSoftBody* psb = m_softBodies[i];
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- psb->m_nodes[j].m_q = x[counter];
- counter++;
- }
- psb->updateDeformation();
- }
- counter = 0;
- double error = f1-f2-2*dphi;
- errors.push_back(error);
- std::cout << "Iteration = " << it <<", f1 = " << f1 << ", f2 = " << f2 << ", error = " << error << std::endl;
- }
- for (int i = 1; i < errors.size(); ++i)
- {
- std::cout << "Iteration = " << i << ", ratio = " << errors[i-1]/errors[i] << std::endl;
- }
- }
-
- // test for addScaledElasticForce function
- virtual void testHessian()
- {
- for (int i = 0; i<m_softBodies.size();++i)
- {
- btSoftBody* psb = m_softBodies[i];
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- psb->m_nodes[j].m_q += btVector3(randomDouble(-.1, .1), randomDouble(-.1, .1), randomDouble(-.1, .1));
- }
- psb->updateDeformation();
- }
-
-
- TVStack dx;
- dx.resize(getNumNodes());
- TVStack df;
- df.resize(dx.size());
- TVStack f1;
- f1.resize(dx.size());
- TVStack f2;
- f2.resize(dx.size());
-
-
- // write down the current position
- TVStack x;
- x.resize(dx.size());
- int counter = 0;
- for (int i = 0; i<m_softBodies.size();++i)
- {
- btSoftBody* psb = m_softBodies[i];
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- x[counter] = psb->m_nodes[j].m_q;
- counter++;
- }
- }
- counter = 0;
-
- // populate dx with random vectors
- for (int i = 0; i < dx.size(); ++i)
- {
- dx[i].setX(randomDouble(-1, 1));
- dx[i].setY(randomDouble(-1, 1));
- dx[i].setZ(randomDouble(-1, 1));
- }
-
- btAlignedObjectArray<double> errors;
- for (int it = 0; it < 10; ++it)
- {
- for (int i = 0; i < dx.size(); ++i)
- {
- dx[i] *= 0.5;
- }
-
- // get df
- for (int i =0; i < df.size();++i)
- {
- df[i].setZero();
- f1[i].setZero();
- f2[i].setZero();
- }
-
- //set df
- addScaledElasticForceDifferential(-1, dx, df);
-
- for (int i = 0; i<m_softBodies.size();++i)
- {
- btSoftBody* psb = m_softBodies[i];
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- psb->m_nodes[j].m_q = x[counter] + dx[counter];
- counter++;
- }
- psb->updateDeformation();
- }
- counter = 0;
-
- //set f1
- addScaledForces(-1, f1);
-
- for (int i = 0; i<m_softBodies.size();++i)
- {
- btSoftBody* psb = m_softBodies[i];
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- psb->m_nodes[j].m_q = x[counter] - dx[counter];
- counter++;
- }
- psb->updateDeformation();
- }
- counter = 0;
-
- //set f2
- addScaledForces(-1, f2);
-
- //restore m_q
- for (int i = 0; i<m_softBodies.size();++i)
- {
- btSoftBody* psb = m_softBodies[i];
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- psb->m_nodes[j].m_q = x[counter];
- counter++;
- }
- psb->updateDeformation();
- }
- counter = 0;
- double error = 0;
- for (int i = 0; i < df.size();++i)
- {
- btVector3 error_vector = f1[i]-f2[i]-2*df[i];
- error += error_vector.length2();
- }
- error = btSqrt(error);
- errors.push_back(error);
- std::cout << "Iteration = " << it << ", error = " << error << std::endl;
- }
- for (int i = 1; i < errors.size(); ++i)
- {
- std::cout << "Iteration = " << i << ", ratio = " << errors[i-1]/errors[i] << std::endl;
- }
- }
-
- //
- virtual double totalElasticEnergy(btScalar dt)
- {
- return 0;
- }
-
- //
- virtual double totalDampingEnergy(btScalar dt)
- {
- return 0;
- }
-
- // total Energy takes dt as input because certain energies depend on dt
- virtual double totalEnergy(btScalar dt)
- {
- return totalElasticEnergy(dt) + totalDampingEnergy(dt);
- }
+ typedef btAlignedObjectArray<btVector3> TVStack;
+ btAlignedObjectArray<btSoftBody*> m_softBodies;
+ const btAlignedObjectArray<btSoftBody::Node*>* m_nodes;
+
+ btDeformableLagrangianForce()
+ {
+ }
+
+ virtual ~btDeformableLagrangianForce() {}
+
+ // add all forces
+ virtual void addScaledForces(btScalar scale, TVStack& force) = 0;
+
+ // add damping df
+ virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df) = 0;
+
+ // build diagonal of A matrix
+ virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA) = 0;
+
+ // add elastic df
+ virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df) = 0;
+
+ // add all forces that are explicit in explicit solve
+ virtual void addScaledExplicitForce(btScalar scale, TVStack& force) = 0;
+
+ // add all damping forces
+ virtual void addScaledDampingForce(btScalar scale, TVStack& force) = 0;
+
+ virtual void addScaledHessian(btScalar scale) {}
+
+ virtual btDeformableLagrangianForceType getForceType() = 0;
+
+ virtual void reinitialize(bool nodeUpdated)
+ {
+ }
+
+ // get number of nodes that have the force
+ virtual int getNumNodes()
+ {
+ int numNodes = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ numNodes += m_softBodies[i]->m_nodes.size();
+ }
+ return numNodes;
+ }
+
+ // add a soft body to be affected by the particular lagrangian force
+ virtual void addSoftBody(btSoftBody* psb)
+ {
+ m_softBodies.push_back(psb);
+ }
+
+ virtual void removeSoftBody(btSoftBody* psb)
+ {
+ m_softBodies.remove(psb);
+ }
+
+ virtual void setIndices(const btAlignedObjectArray<btSoftBody::Node*>* nodes)
+ {
+ m_nodes = nodes;
+ }
+
+ // Calculate the incremental deformable generated from the input dx
+ virtual btMatrix3x3 Ds(int id0, int id1, int id2, int id3, const TVStack& dx)
+ {
+ btVector3 c1 = dx[id1] - dx[id0];
+ btVector3 c2 = dx[id2] - dx[id0];
+ btVector3 c3 = dx[id3] - dx[id0];
+ return btMatrix3x3(c1, c2, c3).transpose();
+ }
+
+ // Calculate the incremental deformable generated from the current velocity
+ virtual btMatrix3x3 DsFromVelocity(const btSoftBody::Node* n0, const btSoftBody::Node* n1, const btSoftBody::Node* n2, const btSoftBody::Node* n3)
+ {
+ btVector3 c1 = n1->m_v - n0->m_v;
+ btVector3 c2 = n2->m_v - n0->m_v;
+ btVector3 c3 = n3->m_v - n0->m_v;
+ return btMatrix3x3(c1, c2, c3).transpose();
+ }
+
+ // test for addScaledElasticForce function
+ virtual void testDerivative()
+ {
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ psb->m_nodes[j].m_q += btVector3(randomDouble(-.1, .1), randomDouble(-.1, .1), randomDouble(-.1, .1));
+ }
+ psb->updateDeformation();
+ }
+
+ TVStack dx;
+ dx.resize(getNumNodes());
+ TVStack dphi_dx;
+ dphi_dx.resize(dx.size());
+ for (int i = 0; i < dphi_dx.size(); ++i)
+ {
+ dphi_dx[i].setZero();
+ }
+ addScaledForces(-1, dphi_dx);
+
+ // write down the current position
+ TVStack x;
+ x.resize(dx.size());
+ int counter = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ x[counter] = psb->m_nodes[j].m_q;
+ counter++;
+ }
+ }
+ counter = 0;
+
+ // populate dx with random vectors
+ for (int i = 0; i < dx.size(); ++i)
+ {
+ dx[i].setX(randomDouble(-1, 1));
+ dx[i].setY(randomDouble(-1, 1));
+ dx[i].setZ(randomDouble(-1, 1));
+ }
+
+ btAlignedObjectArray<double> errors;
+ for (int it = 0; it < 10; ++it)
+ {
+ for (int i = 0; i < dx.size(); ++i)
+ {
+ dx[i] *= 0.5;
+ }
+
+ // get dphi/dx * dx
+ double dphi = 0;
+ for (int i = 0; i < dx.size(); ++i)
+ {
+ dphi += dphi_dx[i].dot(dx[i]);
+ }
+
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ psb->m_nodes[j].m_q = x[counter] + dx[counter];
+ counter++;
+ }
+ psb->updateDeformation();
+ }
+ counter = 0;
+ double f1 = totalElasticEnergy(0);
+
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ psb->m_nodes[j].m_q = x[counter] - dx[counter];
+ counter++;
+ }
+ psb->updateDeformation();
+ }
+ counter = 0;
+
+ double f2 = totalElasticEnergy(0);
+
+ //restore m_q
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ psb->m_nodes[j].m_q = x[counter];
+ counter++;
+ }
+ psb->updateDeformation();
+ }
+ counter = 0;
+ double error = f1 - f2 - 2 * dphi;
+ errors.push_back(error);
+ std::cout << "Iteration = " << it << ", f1 = " << f1 << ", f2 = " << f2 << ", error = " << error << std::endl;
+ }
+ for (int i = 1; i < errors.size(); ++i)
+ {
+ std::cout << "Iteration = " << i << ", ratio = " << errors[i - 1] / errors[i] << std::endl;
+ }
+ }
+
+ // test for addScaledElasticForce function
+ virtual void testHessian()
+ {
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ psb->m_nodes[j].m_q += btVector3(randomDouble(-.1, .1), randomDouble(-.1, .1), randomDouble(-.1, .1));
+ }
+ psb->updateDeformation();
+ }
+
+ TVStack dx;
+ dx.resize(getNumNodes());
+ TVStack df;
+ df.resize(dx.size());
+ TVStack f1;
+ f1.resize(dx.size());
+ TVStack f2;
+ f2.resize(dx.size());
+
+ // write down the current position
+ TVStack x;
+ x.resize(dx.size());
+ int counter = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ x[counter] = psb->m_nodes[j].m_q;
+ counter++;
+ }
+ }
+ counter = 0;
+
+ // populate dx with random vectors
+ for (int i = 0; i < dx.size(); ++i)
+ {
+ dx[i].setX(randomDouble(-1, 1));
+ dx[i].setY(randomDouble(-1, 1));
+ dx[i].setZ(randomDouble(-1, 1));
+ }
+
+ btAlignedObjectArray<double> errors;
+ for (int it = 0; it < 10; ++it)
+ {
+ for (int i = 0; i < dx.size(); ++i)
+ {
+ dx[i] *= 0.5;
+ }
+
+ // get df
+ for (int i = 0; i < df.size(); ++i)
+ {
+ df[i].setZero();
+ f1[i].setZero();
+ f2[i].setZero();
+ }
+
+ //set df
+ addScaledElasticForceDifferential(-1, dx, df);
+
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ psb->m_nodes[j].m_q = x[counter] + dx[counter];
+ counter++;
+ }
+ psb->updateDeformation();
+ }
+ counter = 0;
+
+ //set f1
+ addScaledForces(-1, f1);
+
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ psb->m_nodes[j].m_q = x[counter] - dx[counter];
+ counter++;
+ }
+ psb->updateDeformation();
+ }
+ counter = 0;
+
+ //set f2
+ addScaledForces(-1, f2);
+
+ //restore m_q
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ psb->m_nodes[j].m_q = x[counter];
+ counter++;
+ }
+ psb->updateDeformation();
+ }
+ counter = 0;
+ double error = 0;
+ for (int i = 0; i < df.size(); ++i)
+ {
+ btVector3 error_vector = f1[i] - f2[i] - 2 * df[i];
+ error += error_vector.length2();
+ }
+ error = btSqrt(error);
+ errors.push_back(error);
+ std::cout << "Iteration = " << it << ", error = " << error << std::endl;
+ }
+ for (int i = 1; i < errors.size(); ++i)
+ {
+ std::cout << "Iteration = " << i << ", ratio = " << errors[i - 1] / errors[i] << std::endl;
+ }
+ }
+
+ //
+ virtual double totalElasticEnergy(btScalar dt)
+ {
+ return 0;
+ }
+
+ //
+ virtual double totalDampingEnergy(btScalar dt)
+ {
+ return 0;
+ }
+
+ // total Energy takes dt as input because certain energies depend on dt
+ virtual double totalEnergy(btScalar dt)
+ {
+ return totalElasticEnergy(dt) + totalDampingEnergy(dt);
+ }
};
#endif /* BT_DEFORMABLE_LAGRANGIAN_FORCE */
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableLinearElasticityForce.h b/thirdparty/bullet/BulletSoftBody/btDeformableLinearElasticityForce.h
index 106dc10ad6..971192050b 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableLinearElasticityForce.h
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableLinearElasticityForce.h
@@ -18,323 +18,445 @@
#include "btDeformableLagrangianForce.h"
#include "LinearMath/btQuickprof.h"
+#include "btSoftBodyInternals.h"
+#define TETRA_FLAT_THRESHOLD 0.01
class btDeformableLinearElasticityForce : public btDeformableLagrangianForce
{
public:
- typedef btAlignedObjectArray<btVector3> TVStack;
- btScalar m_mu, m_lambda;
- btScalar m_mu_damp, m_lambda_damp;
- btDeformableLinearElasticityForce(): m_mu(1), m_lambda(1)
- {
- btScalar damping = 0.05;
- m_mu_damp = damping * m_mu;
- m_lambda_damp = damping * m_lambda;
- }
-
- btDeformableLinearElasticityForce(btScalar mu, btScalar lambda, btScalar damping = 0.05): m_mu(mu), m_lambda(lambda)
- {
- m_mu_damp = damping * m_mu;
- m_lambda_damp = damping * m_lambda;
- }
-
- virtual void addScaledForces(btScalar scale, TVStack& force)
- {
- addScaledDampingForce(scale, force);
- addScaledElasticForce(scale, force);
- }
-
- virtual void addScaledExplicitForce(btScalar scale, TVStack& force)
- {
- addScaledElasticForce(scale, force);
- }
-
- // The damping matrix is calculated using the time n state as described in https://www.math.ucla.edu/~jteran/papers/GSSJT15.pdf to allow line search
- virtual void addScaledDampingForce(btScalar scale, TVStack& force)
- {
- if (m_mu_damp == 0 && m_lambda_damp == 0)
- return;
- int numNodes = getNumNodes();
- btAssert(numNodes <= force.size());
- btVector3 grad_N_hat_1st_col = btVector3(-1,-1,-1);
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- continue;
- }
- for (int j = 0; j < psb->m_tetras.size(); ++j)
- {
- btSoftBody::Tetra& tetra = psb->m_tetras[j];
- btSoftBody::Node* node0 = tetra.m_n[0];
- btSoftBody::Node* node1 = tetra.m_n[1];
- btSoftBody::Node* node2 = tetra.m_n[2];
- btSoftBody::Node* node3 = tetra.m_n[3];
- size_t id0 = node0->index;
- size_t id1 = node1->index;
- size_t id2 = node2->index;
- size_t id3 = node3->index;
- btMatrix3x3 dF = DsFromVelocity(node0, node1, node2, node3) * tetra.m_Dm_inverse;
- btMatrix3x3 I;
- I.setIdentity();
- btMatrix3x3 dP = (dF + dF.transpose()) * m_mu_damp + I * (dF[0][0]+dF[1][1]+dF[2][2]) * m_lambda_damp;
- // firstPiolaDampingDifferential(psb->m_tetraScratchesTn[j], dF, dP);
- btVector3 df_on_node0 = dP * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col);
- btMatrix3x3 df_on_node123 = dP * tetra.m_Dm_inverse.transpose();
-
- // damping force differential
- btScalar scale1 = scale * tetra.m_element_measure;
- force[id0] -= scale1 * df_on_node0;
- force[id1] -= scale1 * df_on_node123.getColumn(0);
- force[id2] -= scale1 * df_on_node123.getColumn(1);
- force[id3] -= scale1 * df_on_node123.getColumn(2);
- }
- }
- }
-
- virtual double totalElasticEnergy(btScalar dt)
- {
- double energy = 0;
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- continue;
- }
- for (int j = 0; j < psb->m_tetraScratches.size(); ++j)
- {
- btSoftBody::Tetra& tetra = psb->m_tetras[j];
- btSoftBody::TetraScratch& s = psb->m_tetraScratches[j];
- energy += tetra.m_element_measure * elasticEnergyDensity(s);
- }
- }
- return energy;
- }
-
- // The damping energy is formulated as in https://www.math.ucla.edu/~jteran/papers/GSSJT15.pdf to allow line search
- virtual double totalDampingEnergy(btScalar dt)
- {
- double energy = 0;
- int sz = 0;
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- continue;
- }
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- sz = btMax(sz, psb->m_nodes[j].index);
- }
- }
- TVStack dampingForce;
- dampingForce.resize(sz+1);
- for (int i = 0; i < dampingForce.size(); ++i)
- dampingForce[i].setZero();
- addScaledDampingForce(0.5, dampingForce);
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- const btSoftBody::Node& node = psb->m_nodes[j];
- energy -= dampingForce[node.index].dot(node.m_v) / dt;
- }
- }
- return energy;
- }
-
- double elasticEnergyDensity(const btSoftBody::TetraScratch& s)
- {
- double density = 0;
- btMatrix3x3 epsilon = (s.m_F + s.m_F.transpose()) * 0.5 - btMatrix3x3::getIdentity();
- btScalar trace = epsilon[0][0] + epsilon[1][1] + epsilon[2][2];
- density += m_mu * (epsilon[0].length2() + epsilon[1].length2() + epsilon[2].length2());
- density += m_lambda * trace * trace * 0.5;
- return density;
- }
-
- virtual void addScaledElasticForce(btScalar scale, TVStack& force)
- {
- int numNodes = getNumNodes();
- btAssert(numNodes <= force.size());
- btVector3 grad_N_hat_1st_col = btVector3(-1,-1,-1);
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- continue;
- }
- btScalar max_p = psb->m_cfg.m_maxStress;
- for (int j = 0; j < psb->m_tetras.size(); ++j)
- {
- btSoftBody::Tetra& tetra = psb->m_tetras[j];
- btMatrix3x3 P;
- firstPiola(psb->m_tetraScratches[j],P);
+ typedef btAlignedObjectArray<btVector3> TVStack;
+ btScalar m_mu, m_lambda;
+ btScalar m_E, m_nu; // Young's modulus and Poisson ratio
+ btScalar m_damping_alpha, m_damping_beta;
+ btDeformableLinearElasticityForce() : m_mu(1), m_lambda(1), m_damping_alpha(0.01), m_damping_beta(0.01)
+ {
+ updateYoungsModulusAndPoissonRatio();
+ }
+
+ btDeformableLinearElasticityForce(btScalar mu, btScalar lambda, btScalar damping_alpha = 0.01, btScalar damping_beta = 0.01) : m_mu(mu), m_lambda(lambda), m_damping_alpha(damping_alpha), m_damping_beta(damping_beta)
+ {
+ updateYoungsModulusAndPoissonRatio();
+ }
+
+ void updateYoungsModulusAndPoissonRatio()
+ {
+ // conversion from Lame Parameters to Young's modulus and Poisson ratio
+ // https://en.wikipedia.org/wiki/Lam%C3%A9_parameters
+ m_E = m_mu * (3 * m_lambda + 2 * m_mu) / (m_lambda + m_mu);
+ m_nu = m_lambda * 0.5 / (m_mu + m_lambda);
+ }
+
+ void updateLameParameters()
+ {
+ // conversion from Young's modulus and Poisson ratio to Lame Parameters
+ // https://en.wikipedia.org/wiki/Lam%C3%A9_parameters
+ m_mu = m_E * 0.5 / (1 + m_nu);
+ m_lambda = m_E * m_nu / ((1 + m_nu) * (1 - 2 * m_nu));
+ }
+
+ void setYoungsModulus(btScalar E)
+ {
+ m_E = E;
+ updateLameParameters();
+ }
+
+ void setPoissonRatio(btScalar nu)
+ {
+ m_nu = nu;
+ updateLameParameters();
+ }
+
+ void setDamping(btScalar damping_alpha, btScalar damping_beta)
+ {
+ m_damping_alpha = damping_alpha;
+ m_damping_beta = damping_beta;
+ }
+
+ void setLameParameters(btScalar mu, btScalar lambda)
+ {
+ m_mu = mu;
+ m_lambda = lambda;
+ updateYoungsModulusAndPoissonRatio();
+ }
+
+ virtual void addScaledForces(btScalar scale, TVStack& force)
+ {
+ addScaledDampingForce(scale, force);
+ addScaledElasticForce(scale, force);
+ }
+
+ virtual void addScaledExplicitForce(btScalar scale, TVStack& force)
+ {
+ addScaledElasticForce(scale, force);
+ }
+
+ // The damping matrix is calculated using the time n state as described in https://www.math.ucla.edu/~jteran/papers/GSSJT15.pdf to allow line search
+ virtual void addScaledDampingForce(btScalar scale, TVStack& force)
+ {
+ if (m_damping_alpha == 0 && m_damping_beta == 0)
+ return;
+ btScalar mu_damp = m_damping_beta * m_mu;
+ btScalar lambda_damp = m_damping_beta * m_lambda;
+ int numNodes = getNumNodes();
+ btAssert(numNodes <= force.size());
+ btVector3 grad_N_hat_1st_col = btVector3(-1, -1, -1);
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ for (int j = 0; j < psb->m_tetras.size(); ++j)
+ {
+ bool close_to_flat = (psb->m_tetraScratches[j].m_J < TETRA_FLAT_THRESHOLD);
+ btSoftBody::Tetra& tetra = psb->m_tetras[j];
+ btSoftBody::Node* node0 = tetra.m_n[0];
+ btSoftBody::Node* node1 = tetra.m_n[1];
+ btSoftBody::Node* node2 = tetra.m_n[2];
+ btSoftBody::Node* node3 = tetra.m_n[3];
+ size_t id0 = node0->index;
+ size_t id1 = node1->index;
+ size_t id2 = node2->index;
+ size_t id3 = node3->index;
+ btMatrix3x3 dF = DsFromVelocity(node0, node1, node2, node3) * tetra.m_Dm_inverse;
+ if (!close_to_flat)
+ {
+ dF = psb->m_tetraScratches[j].m_corotation.transpose() * dF;
+ }
+ btMatrix3x3 I;
+ I.setIdentity();
+ btMatrix3x3 dP = (dF + dF.transpose()) * mu_damp + I * ((dF[0][0] + dF[1][1] + dF[2][2]) * lambda_damp);
+ btMatrix3x3 df_on_node123 = dP * tetra.m_Dm_inverse.transpose();
+ if (!close_to_flat)
+ {
+ df_on_node123 = psb->m_tetraScratches[j].m_corotation * df_on_node123;
+ }
+ btVector3 df_on_node0 = df_on_node123 * grad_N_hat_1st_col;
+ // damping force differential
+ btScalar scale1 = scale * tetra.m_element_measure;
+ force[id0] -= scale1 * df_on_node0;
+ force[id1] -= scale1 * df_on_node123.getColumn(0);
+ force[id2] -= scale1 * df_on_node123.getColumn(1);
+ force[id3] -= scale1 * df_on_node123.getColumn(2);
+ }
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ const btSoftBody::Node& node = psb->m_nodes[j];
+ size_t id = node.index;
+ if (node.m_im > 0)
+ {
+ force[id] -= scale * node.m_v / node.m_im * m_damping_alpha;
+ }
+ }
+ }
+ }
+
+ virtual double totalElasticEnergy(btScalar dt)
+ {
+ double energy = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ for (int j = 0; j < psb->m_tetraScratches.size(); ++j)
+ {
+ btSoftBody::Tetra& tetra = psb->m_tetras[j];
+ btSoftBody::TetraScratch& s = psb->m_tetraScratches[j];
+ energy += tetra.m_element_measure * elasticEnergyDensity(s);
+ }
+ }
+ return energy;
+ }
+
+ // The damping energy is formulated as in https://www.math.ucla.edu/~jteran/papers/GSSJT15.pdf to allow line search
+ virtual double totalDampingEnergy(btScalar dt)
+ {
+ double energy = 0;
+ int sz = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ sz = btMax(sz, psb->m_nodes[j].index);
+ }
+ }
+ TVStack dampingForce;
+ dampingForce.resize(sz + 1);
+ for (int i = 0; i < dampingForce.size(); ++i)
+ dampingForce[i].setZero();
+ addScaledDampingForce(0.5, dampingForce);
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ const btSoftBody::Node& node = psb->m_nodes[j];
+ energy -= dampingForce[node.index].dot(node.m_v) / dt;
+ }
+ }
+ return energy;
+ }
+
+ double elasticEnergyDensity(const btSoftBody::TetraScratch& s)
+ {
+ double density = 0;
+ btMatrix3x3 epsilon = (s.m_F + s.m_F.transpose()) * 0.5 - btMatrix3x3::getIdentity();
+ btScalar trace = epsilon[0][0] + epsilon[1][1] + epsilon[2][2];
+ density += m_mu * (epsilon[0].length2() + epsilon[1].length2() + epsilon[2].length2());
+ density += m_lambda * trace * trace * 0.5;
+ return density;
+ }
+
+ virtual void addScaledElasticForce(btScalar scale, TVStack& force)
+ {
+ int numNodes = getNumNodes();
+ btAssert(numNodes <= force.size());
+ btVector3 grad_N_hat_1st_col = btVector3(-1, -1, -1);
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ btScalar max_p = psb->m_cfg.m_maxStress;
+ for (int j = 0; j < psb->m_tetras.size(); ++j)
+ {
+ btSoftBody::Tetra& tetra = psb->m_tetras[j];
+ btMatrix3x3 P;
+ firstPiola(psb->m_tetraScratches[j], P);
#if USE_SVD
- if (max_p > 0)
- {
- // since we want to clamp the principal stress to max_p, we only need to
- // calculate SVD when sigma_0^2 + sigma_1^2 + sigma_2^2 > max_p * max_p
- btScalar trPTP = (P[0].length2() + P[1].length2() + P[2].length2());
- if (trPTP > max_p * max_p)
- {
- btMatrix3x3 U, V;
- btVector3 sigma;
- singularValueDecomposition(P, U, sigma, V);
- sigma[0] = btMin(sigma[0], max_p);
- sigma[1] = btMin(sigma[1], max_p);
- sigma[2] = btMin(sigma[2], max_p);
- sigma[0] = btMax(sigma[0], -max_p);
- sigma[1] = btMax(sigma[1], -max_p);
- sigma[2] = btMax(sigma[2], -max_p);
- btMatrix3x3 Sigma;
- Sigma.setIdentity();
- Sigma[0][0] = sigma[0];
- Sigma[1][1] = sigma[1];
- Sigma[2][2] = sigma[2];
- P = U * Sigma * V.transpose();
- }
- }
+ if (max_p > 0)
+ {
+ // since we want to clamp the principal stress to max_p, we only need to
+ // calculate SVD when sigma_0^2 + sigma_1^2 + sigma_2^2 > max_p * max_p
+ btScalar trPTP = (P[0].length2() + P[1].length2() + P[2].length2());
+ if (trPTP > max_p * max_p)
+ {
+ btMatrix3x3 U, V;
+ btVector3 sigma;
+ singularValueDecomposition(P, U, sigma, V);
+ sigma[0] = btMin(sigma[0], max_p);
+ sigma[1] = btMin(sigma[1], max_p);
+ sigma[2] = btMin(sigma[2], max_p);
+ sigma[0] = btMax(sigma[0], -max_p);
+ sigma[1] = btMax(sigma[1], -max_p);
+ sigma[2] = btMax(sigma[2], -max_p);
+ btMatrix3x3 Sigma;
+ Sigma.setIdentity();
+ Sigma[0][0] = sigma[0];
+ Sigma[1][1] = sigma[1];
+ Sigma[2][2] = sigma[2];
+ P = U * Sigma * V.transpose();
+ }
+ }
#endif
- // btVector3 force_on_node0 = P * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col);
- btMatrix3x3 force_on_node123 = P * tetra.m_Dm_inverse.transpose();
- btVector3 force_on_node0 = force_on_node123 * grad_N_hat_1st_col;
-
- btSoftBody::Node* node0 = tetra.m_n[0];
- btSoftBody::Node* node1 = tetra.m_n[1];
- btSoftBody::Node* node2 = tetra.m_n[2];
- btSoftBody::Node* node3 = tetra.m_n[3];
- size_t id0 = node0->index;
- size_t id1 = node1->index;
- size_t id2 = node2->index;
- size_t id3 = node3->index;
-
- // elastic force
- btScalar scale1 = scale * tetra.m_element_measure;
- force[id0] -= scale1 * force_on_node0;
- force[id1] -= scale1 * force_on_node123.getColumn(0);
- force[id2] -= scale1 * force_on_node123.getColumn(1);
- force[id3] -= scale1 * force_on_node123.getColumn(2);
- }
- }
- }
-
- // The damping matrix is calculated using the time n state as described in https://www.math.ucla.edu/~jteran/papers/GSSJT15.pdf to allow line search
- virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df)
- {
- if (m_mu_damp == 0 && m_lambda_damp == 0)
- return;
- int numNodes = getNumNodes();
- btAssert(numNodes <= df.size());
- btVector3 grad_N_hat_1st_col = btVector3(-1,-1,-1);
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- continue;
- }
- for (int j = 0; j < psb->m_tetras.size(); ++j)
- {
- btSoftBody::Tetra& tetra = psb->m_tetras[j];
- btSoftBody::Node* node0 = tetra.m_n[0];
- btSoftBody::Node* node1 = tetra.m_n[1];
- btSoftBody::Node* node2 = tetra.m_n[2];
- btSoftBody::Node* node3 = tetra.m_n[3];
- size_t id0 = node0->index;
- size_t id1 = node1->index;
- size_t id2 = node2->index;
- size_t id3 = node3->index;
- btMatrix3x3 dF = Ds(id0, id1, id2, id3, dv) * tetra.m_Dm_inverse;
- btMatrix3x3 I;
- I.setIdentity();
- btMatrix3x3 dP = (dF + dF.transpose()) * m_mu_damp + I * (dF[0][0]+dF[1][1]+dF[2][2]) * m_lambda_damp;
- // firstPiolaDampingDifferential(psb->m_tetraScratchesTn[j], dF, dP);
- // btVector3 df_on_node0 = dP * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col);
- btMatrix3x3 df_on_node123 = dP * tetra.m_Dm_inverse.transpose();
- btVector3 df_on_node0 = df_on_node123 * grad_N_hat_1st_col;
-
- // damping force differential
- btScalar scale1 = scale * tetra.m_element_measure;
- df[id0] -= scale1 * df_on_node0;
- df[id1] -= scale1 * df_on_node123.getColumn(0);
- df[id2] -= scale1 * df_on_node123.getColumn(1);
- df[id3] -= scale1 * df_on_node123.getColumn(2);
- }
- }
- }
-
- virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df)
- {
- int numNodes = getNumNodes();
- btAssert(numNodes <= df.size());
- btVector3 grad_N_hat_1st_col = btVector3(-1,-1,-1);
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- continue;
- }
- for (int j = 0; j < psb->m_tetras.size(); ++j)
- {
- btSoftBody::Tetra& tetra = psb->m_tetras[j];
- btSoftBody::Node* node0 = tetra.m_n[0];
- btSoftBody::Node* node1 = tetra.m_n[1];
- btSoftBody::Node* node2 = tetra.m_n[2];
- btSoftBody::Node* node3 = tetra.m_n[3];
- size_t id0 = node0->index;
- size_t id1 = node1->index;
- size_t id2 = node2->index;
- size_t id3 = node3->index;
- btMatrix3x3 dF = Ds(id0, id1, id2, id3, dx) * tetra.m_Dm_inverse;
- btMatrix3x3 dP;
- firstPiolaDifferential(psb->m_tetraScratches[j], dF, dP);
- // btVector3 df_on_node0 = dP * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col);
- btMatrix3x3 df_on_node123 = dP * tetra.m_Dm_inverse.transpose();
- btVector3 df_on_node0 = df_on_node123 * grad_N_hat_1st_col;
-
- // elastic force differential
- btScalar scale1 = scale * tetra.m_element_measure;
- df[id0] -= scale1 * df_on_node0;
- df[id1] -= scale1 * df_on_node123.getColumn(0);
- df[id2] -= scale1 * df_on_node123.getColumn(1);
- df[id3] -= scale1 * df_on_node123.getColumn(2);
- }
- }
- }
-
- void firstPiola(const btSoftBody::TetraScratch& s, btMatrix3x3& P)
- {
- btMatrix3x3 epsilon = (s.m_F + s.m_F.transpose()) * 0.5 - btMatrix3x3::getIdentity();
- btScalar trace = epsilon[0][0] + epsilon[1][1] + epsilon[2][2];
- P = epsilon * btScalar(2) * m_mu + btMatrix3x3::getIdentity() * m_lambda * trace;
- }
-
- // Let P be the first piola stress.
- // This function calculates the dP = dP/dF * dF
- void firstPiolaDifferential(const btSoftBody::TetraScratch& s, const btMatrix3x3& dF, btMatrix3x3& dP)
- {
- btScalar trace = (dF[0][0] + dF[1][1] + dF[2][2]);
- dP = (dF + dF.transpose()) * m_mu + btMatrix3x3::getIdentity() * m_lambda * trace;
- }
-
- // Let Q be the damping stress.
- // This function calculates the dP = dQ/dF * dF
- void firstPiolaDampingDifferential(const btSoftBody::TetraScratch& s, const btMatrix3x3& dF, btMatrix3x3& dP)
- {
- btScalar trace = (dF[0][0] + dF[1][1] + dF[2][2]);
- dP = (dF + dF.transpose()) * m_mu_damp + btMatrix3x3::getIdentity() * m_lambda_damp * trace;
- }
-
- virtual btDeformableLagrangianForceType getForceType()
- {
- return BT_LINEAR_ELASTICITY_FORCE;
- }
-
+ // btVector3 force_on_node0 = P * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col);
+ btMatrix3x3 force_on_node123 = psb->m_tetraScratches[j].m_corotation * P * tetra.m_Dm_inverse.transpose();
+ btVector3 force_on_node0 = force_on_node123 * grad_N_hat_1st_col;
+
+ btSoftBody::Node* node0 = tetra.m_n[0];
+ btSoftBody::Node* node1 = tetra.m_n[1];
+ btSoftBody::Node* node2 = tetra.m_n[2];
+ btSoftBody::Node* node3 = tetra.m_n[3];
+ size_t id0 = node0->index;
+ size_t id1 = node1->index;
+ size_t id2 = node2->index;
+ size_t id3 = node3->index;
+
+ // elastic force
+ btScalar scale1 = scale * tetra.m_element_measure;
+ force[id0] -= scale1 * force_on_node0;
+ force[id1] -= scale1 * force_on_node123.getColumn(0);
+ force[id2] -= scale1 * force_on_node123.getColumn(1);
+ force[id3] -= scale1 * force_on_node123.getColumn(2);
+ }
+ }
+ }
+
+ virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA) {}
+
+ // The damping matrix is calculated using the time n state as described in https://www.math.ucla.edu/~jteran/papers/GSSJT15.pdf to allow line search
+ virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df)
+ {
+ if (m_damping_alpha == 0 && m_damping_beta == 0)
+ return;
+ btScalar mu_damp = m_damping_beta * m_mu;
+ btScalar lambda_damp = m_damping_beta * m_lambda;
+ int numNodes = getNumNodes();
+ btAssert(numNodes <= df.size());
+ btVector3 grad_N_hat_1st_col = btVector3(-1, -1, -1);
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ for (int j = 0; j < psb->m_tetras.size(); ++j)
+ {
+ bool close_to_flat = (psb->m_tetraScratches[j].m_J < TETRA_FLAT_THRESHOLD);
+ btSoftBody::Tetra& tetra = psb->m_tetras[j];
+ btSoftBody::Node* node0 = tetra.m_n[0];
+ btSoftBody::Node* node1 = tetra.m_n[1];
+ btSoftBody::Node* node2 = tetra.m_n[2];
+ btSoftBody::Node* node3 = tetra.m_n[3];
+ size_t id0 = node0->index;
+ size_t id1 = node1->index;
+ size_t id2 = node2->index;
+ size_t id3 = node3->index;
+ btMatrix3x3 dF = Ds(id0, id1, id2, id3, dv) * tetra.m_Dm_inverse;
+ if (!close_to_flat)
+ {
+ dF = psb->m_tetraScratches[j].m_corotation.transpose() * dF;
+ }
+ btMatrix3x3 I;
+ I.setIdentity();
+ btMatrix3x3 dP = (dF + dF.transpose()) * mu_damp + I * ((dF[0][0] + dF[1][1] + dF[2][2]) * lambda_damp);
+ btMatrix3x3 df_on_node123 = dP * tetra.m_Dm_inverse.transpose();
+ if (!close_to_flat)
+ {
+ df_on_node123 = psb->m_tetraScratches[j].m_corotation * df_on_node123;
+ }
+ btVector3 df_on_node0 = df_on_node123 * grad_N_hat_1st_col;
+
+ // damping force differential
+ btScalar scale1 = scale * tetra.m_element_measure;
+ df[id0] -= scale1 * df_on_node0;
+ df[id1] -= scale1 * df_on_node123.getColumn(0);
+ df[id2] -= scale1 * df_on_node123.getColumn(1);
+ df[id3] -= scale1 * df_on_node123.getColumn(2);
+ }
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ const btSoftBody::Node& node = psb->m_nodes[j];
+ size_t id = node.index;
+ if (node.m_im > 0)
+ {
+ df[id] -= scale * dv[id] / node.m_im * m_damping_alpha;
+ }
+ }
+ }
+ }
+
+ virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df)
+ {
+ int numNodes = getNumNodes();
+ btAssert(numNodes <= df.size());
+ btVector3 grad_N_hat_1st_col = btVector3(-1, -1, -1);
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ for (int j = 0; j < psb->m_tetras.size(); ++j)
+ {
+ btSoftBody::Tetra& tetra = psb->m_tetras[j];
+ btSoftBody::Node* node0 = tetra.m_n[0];
+ btSoftBody::Node* node1 = tetra.m_n[1];
+ btSoftBody::Node* node2 = tetra.m_n[2];
+ btSoftBody::Node* node3 = tetra.m_n[3];
+ size_t id0 = node0->index;
+ size_t id1 = node1->index;
+ size_t id2 = node2->index;
+ size_t id3 = node3->index;
+ btMatrix3x3 dF = psb->m_tetraScratches[j].m_corotation.transpose() * Ds(id0, id1, id2, id3, dx) * tetra.m_Dm_inverse;
+ btMatrix3x3 dP;
+ firstPiolaDifferential(psb->m_tetraScratches[j], dF, dP);
+ // btVector3 df_on_node0 = dP * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col);
+ btMatrix3x3 df_on_node123 = psb->m_tetraScratches[j].m_corotation * dP * tetra.m_Dm_inverse.transpose();
+ btVector3 df_on_node0 = df_on_node123 * grad_N_hat_1st_col;
+
+ // elastic force differential
+ btScalar scale1 = scale * tetra.m_element_measure;
+ df[id0] -= scale1 * df_on_node0;
+ df[id1] -= scale1 * df_on_node123.getColumn(0);
+ df[id2] -= scale1 * df_on_node123.getColumn(1);
+ df[id3] -= scale1 * df_on_node123.getColumn(2);
+ }
+ }
+ }
+
+ void firstPiola(const btSoftBody::TetraScratch& s, btMatrix3x3& P)
+ {
+ btMatrix3x3 corotated_F = s.m_corotation.transpose() * s.m_F;
+
+ btMatrix3x3 epsilon = (corotated_F + corotated_F.transpose()) * 0.5 - btMatrix3x3::getIdentity();
+ btScalar trace = epsilon[0][0] + epsilon[1][1] + epsilon[2][2];
+ P = epsilon * btScalar(2) * m_mu + btMatrix3x3::getIdentity() * m_lambda * trace;
+ }
+
+ // Let P be the first piola stress.
+ // This function calculates the dP = dP/dF * dF
+ void firstPiolaDifferential(const btSoftBody::TetraScratch& s, const btMatrix3x3& dF, btMatrix3x3& dP)
+ {
+ btScalar trace = (dF[0][0] + dF[1][1] + dF[2][2]);
+ dP = (dF + dF.transpose()) * m_mu + btMatrix3x3::getIdentity() * m_lambda * trace;
+ }
+
+ // Let Q be the damping stress.
+ // This function calculates the dP = dQ/dF * dF
+ void firstPiolaDampingDifferential(const btSoftBody::TetraScratch& s, const btMatrix3x3& dF, btMatrix3x3& dP)
+ {
+ btScalar mu_damp = m_damping_beta * m_mu;
+ btScalar lambda_damp = m_damping_beta * m_lambda;
+ btScalar trace = (dF[0][0] + dF[1][1] + dF[2][2]);
+ dP = (dF + dF.transpose()) * mu_damp + btMatrix3x3::getIdentity() * lambda_damp * trace;
+ }
+
+ virtual void addScaledHessian(btScalar scale)
+ {
+ btVector3 grad_N_hat_1st_col = btVector3(-1, -1, -1);
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ for (int j = 0; j < psb->m_tetras.size(); ++j)
+ {
+ btSoftBody::Tetra& tetra = psb->m_tetras[j];
+ btMatrix3x3 P;
+ firstPiola(psb->m_tetraScratches[j], P); // make sure scratch is evaluated at x_n + dt * vn
+ btMatrix3x3 force_on_node123 = psb->m_tetraScratches[j].m_corotation * P * tetra.m_Dm_inverse.transpose();
+ btVector3 force_on_node0 = force_on_node123 * grad_N_hat_1st_col;
+ btSoftBody::Node* node0 = tetra.m_n[0];
+ btSoftBody::Node* node1 = tetra.m_n[1];
+ btSoftBody::Node* node2 = tetra.m_n[2];
+ btSoftBody::Node* node3 = tetra.m_n[3];
+ btScalar scale1 = scale * (scale + m_damping_beta) * tetra.m_element_measure; // stiff and stiffness-damping terms;
+ node0->m_effectiveMass += OuterProduct(force_on_node0, force_on_node0) * scale1;
+ node1->m_effectiveMass += OuterProduct(force_on_node123.getColumn(0), force_on_node123.getColumn(0)) * scale1;
+ node2->m_effectiveMass += OuterProduct(force_on_node123.getColumn(1), force_on_node123.getColumn(1)) * scale1;
+ node3->m_effectiveMass += OuterProduct(force_on_node123.getColumn(2), force_on_node123.getColumn(2)) * scale1;
+ }
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ btSoftBody::Node& node = psb->m_nodes[j];
+ if (node.m_im > 0)
+ {
+ btMatrix3x3 I;
+ I.setIdentity();
+ node.m_effectiveMass += I * (scale * (1.0 / node.m_im) * m_damping_alpha);
+ }
+ }
+ }
+ }
+
+ virtual btDeformableLagrangianForceType getForceType()
+ {
+ return BT_LINEAR_ELASTICITY_FORCE;
+ }
};
#endif /* BT_LINEAR_ELASTICITY_H */
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableMassSpringForce.h b/thirdparty/bullet/BulletSoftBody/btDeformableMassSpringForce.h
index b128df92cc..8c97bd1ba8 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableMassSpringForce.h
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableMassSpringForce.h
@@ -20,282 +20,282 @@
class btDeformableMassSpringForce : public btDeformableLagrangianForce
{
- // If true, the damping force will be in the direction of the spring
- // If false, the damping force will be in the direction of the velocity
- bool m_momentum_conserving;
- btScalar m_elasticStiffness, m_dampingStiffness, m_bendingStiffness;
+ // If true, the damping force will be in the direction of the spring
+ // If false, the damping force will be in the direction of the velocity
+ bool m_momentum_conserving;
+ btScalar m_elasticStiffness, m_dampingStiffness, m_bendingStiffness;
+
public:
- typedef btAlignedObjectArray<btVector3> TVStack;
- btDeformableMassSpringForce() : m_momentum_conserving(false), m_elasticStiffness(1), m_dampingStiffness(0.05)
- {
- }
- btDeformableMassSpringForce(btScalar k, btScalar d, bool conserve_angular = true, double bending_k = -1) : m_momentum_conserving(conserve_angular), m_elasticStiffness(k), m_dampingStiffness(d), m_bendingStiffness(bending_k)
- {
- if (m_bendingStiffness < btScalar(0))
- {
- m_bendingStiffness = m_elasticStiffness;
- }
- }
-
- virtual void addScaledForces(btScalar scale, TVStack& force)
- {
- addScaledDampingForce(scale, force);
- addScaledElasticForce(scale, force);
- }
-
- virtual void addScaledExplicitForce(btScalar scale, TVStack& force)
- {
- addScaledElasticForce(scale, force);
- }
-
- virtual void addScaledDampingForce(btScalar scale, TVStack& force)
- {
- int numNodes = getNumNodes();
- btAssert(numNodes <= force.size());
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- const btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- continue;
- }
- for (int j = 0; j < psb->m_links.size(); ++j)
- {
- const btSoftBody::Link& link = psb->m_links[j];
- btSoftBody::Node* node1 = link.m_n[0];
- btSoftBody::Node* node2 = link.m_n[1];
- size_t id1 = node1->index;
- size_t id2 = node2->index;
-
- // damping force
- btVector3 v_diff = (node2->m_v - node1->m_v);
- btVector3 scaled_force = scale * m_dampingStiffness * v_diff;
- if (m_momentum_conserving)
- {
- if ((node2->m_x - node1->m_x).norm() > SIMD_EPSILON)
- {
- btVector3 dir = (node2->m_x - node1->m_x).normalized();
- scaled_force = scale * m_dampingStiffness * v_diff.dot(dir) * dir;
- }
- }
- force[id1] += scaled_force;
- force[id2] -= scaled_force;
- }
- }
- }
-
- virtual void addScaledElasticForce(btScalar scale, TVStack& force)
- {
- int numNodes = getNumNodes();
- btAssert(numNodes <= force.size());
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- const btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- continue;
- }
- for (int j = 0; j < psb->m_links.size(); ++j)
- {
- const btSoftBody::Link& link = psb->m_links[j];
- btSoftBody::Node* node1 = link.m_n[0];
- btSoftBody::Node* node2 = link.m_n[1];
- btScalar r = link.m_rl;
- size_t id1 = node1->index;
- size_t id2 = node2->index;
-
- // elastic force
- btVector3 dir = (node2->m_q - node1->m_q);
- btVector3 dir_normalized = (dir.norm() > SIMD_EPSILON) ? dir.normalized() : btVector3(0,0,0);
- btScalar scaled_stiffness = scale * (link.m_bbending ? m_bendingStiffness : m_elasticStiffness);
- btVector3 scaled_force = scaled_stiffness * (dir - dir_normalized * r);
- force[id1] += scaled_force;
- force[id2] -= scaled_force;
- }
- }
- }
-
- virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df)
- {
- // implicit damping force differential
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- continue;
- }
- btScalar scaled_k_damp = m_dampingStiffness * scale;
- for (int j = 0; j < psb->m_links.size(); ++j)
- {
- const btSoftBody::Link& link = psb->m_links[j];
- btSoftBody::Node* node1 = link.m_n[0];
- btSoftBody::Node* node2 = link.m_n[1];
- size_t id1 = node1->index;
- size_t id2 = node2->index;
+ typedef btAlignedObjectArray<btVector3> TVStack;
+ btDeformableMassSpringForce() : m_momentum_conserving(false), m_elasticStiffness(1), m_dampingStiffness(0.05)
+ {
+ }
+ btDeformableMassSpringForce(btScalar k, btScalar d, bool conserve_angular = true, double bending_k = -1) : m_momentum_conserving(conserve_angular), m_elasticStiffness(k), m_dampingStiffness(d), m_bendingStiffness(bending_k)
+ {
+ if (m_bendingStiffness < btScalar(0))
+ {
+ m_bendingStiffness = m_elasticStiffness;
+ }
+ }
+
+ virtual void addScaledForces(btScalar scale, TVStack& force)
+ {
+ addScaledDampingForce(scale, force);
+ addScaledElasticForce(scale, force);
+ }
+
+ virtual void addScaledExplicitForce(btScalar scale, TVStack& force)
+ {
+ addScaledElasticForce(scale, force);
+ }
+
+ virtual void addScaledDampingForce(btScalar scale, TVStack& force)
+ {
+ int numNodes = getNumNodes();
+ btAssert(numNodes <= force.size());
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ const btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ for (int j = 0; j < psb->m_links.size(); ++j)
+ {
+ const btSoftBody::Link& link = psb->m_links[j];
+ btSoftBody::Node* node1 = link.m_n[0];
+ btSoftBody::Node* node2 = link.m_n[1];
+ size_t id1 = node1->index;
+ size_t id2 = node2->index;
+
+ // damping force
+ btVector3 v_diff = (node2->m_v - node1->m_v);
+ btVector3 scaled_force = scale * m_dampingStiffness * v_diff;
+ if (m_momentum_conserving)
+ {
+ if ((node2->m_x - node1->m_x).norm() > SIMD_EPSILON)
+ {
+ btVector3 dir = (node2->m_x - node1->m_x).normalized();
+ scaled_force = scale * m_dampingStiffness * v_diff.dot(dir) * dir;
+ }
+ }
+ force[id1] += scaled_force;
+ force[id2] -= scaled_force;
+ }
+ }
+ }
+
+ virtual void addScaledElasticForce(btScalar scale, TVStack& force)
+ {
+ int numNodes = getNumNodes();
+ btAssert(numNodes <= force.size());
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ const btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ for (int j = 0; j < psb->m_links.size(); ++j)
+ {
+ const btSoftBody::Link& link = psb->m_links[j];
+ btSoftBody::Node* node1 = link.m_n[0];
+ btSoftBody::Node* node2 = link.m_n[1];
+ btScalar r = link.m_rl;
+ size_t id1 = node1->index;
+ size_t id2 = node2->index;
+
+ // elastic force
+ btVector3 dir = (node2->m_q - node1->m_q);
+ btVector3 dir_normalized = (dir.norm() > SIMD_EPSILON) ? dir.normalized() : btVector3(0, 0, 0);
+ btScalar scaled_stiffness = scale * (link.m_bbending ? m_bendingStiffness : m_elasticStiffness);
+ btVector3 scaled_force = scaled_stiffness * (dir - dir_normalized * r);
+ force[id1] += scaled_force;
+ force[id2] -= scaled_force;
+ }
+ }
+ }
+
+ virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df)
+ {
+ // implicit damping force differential
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ btScalar scaled_k_damp = m_dampingStiffness * scale;
+ for (int j = 0; j < psb->m_links.size(); ++j)
+ {
+ const btSoftBody::Link& link = psb->m_links[j];
+ btSoftBody::Node* node1 = link.m_n[0];
+ btSoftBody::Node* node2 = link.m_n[1];
+ size_t id1 = node1->index;
+ size_t id2 = node2->index;
+
+ btVector3 local_scaled_df = scaled_k_damp * (dv[id2] - dv[id1]);
+ if (m_momentum_conserving)
+ {
+ if ((node2->m_x - node1->m_x).norm() > SIMD_EPSILON)
+ {
+ btVector3 dir = (node2->m_x - node1->m_x).normalized();
+ local_scaled_df = scaled_k_damp * (dv[id2] - dv[id1]).dot(dir) * dir;
+ }
+ }
+ df[id1] += local_scaled_df;
+ df[id2] -= local_scaled_df;
+ }
+ }
+ }
+
+ virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA)
+ {
+ // implicit damping force differential
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ btScalar scaled_k_damp = m_dampingStiffness * scale;
+ for (int j = 0; j < psb->m_links.size(); ++j)
+ {
+ const btSoftBody::Link& link = psb->m_links[j];
+ btSoftBody::Node* node1 = link.m_n[0];
+ btSoftBody::Node* node2 = link.m_n[1];
+ size_t id1 = node1->index;
+ size_t id2 = node2->index;
+ if (m_momentum_conserving)
+ {
+ if ((node2->m_x - node1->m_x).norm() > SIMD_EPSILON)
+ {
+ btVector3 dir = (node2->m_x - node1->m_x).normalized();
+ for (int d = 0; d < 3; ++d)
+ {
+ if (node1->m_im > 0)
+ diagA[id1][d] -= scaled_k_damp * dir[d] * dir[d];
+ if (node2->m_im > 0)
+ diagA[id2][d] -= scaled_k_damp * dir[d] * dir[d];
+ }
+ }
+ }
+ else
+ {
+ for (int d = 0; d < 3; ++d)
+ {
+ if (node1->m_im > 0)
+ diagA[id1][d] -= scaled_k_damp;
+ if (node2->m_im > 0)
+ diagA[id2][d] -= scaled_k_damp;
+ }
+ }
+ }
+ }
+ }
+
+ virtual double totalElasticEnergy(btScalar dt)
+ {
+ double energy = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ const btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ for (int j = 0; j < psb->m_links.size(); ++j)
+ {
+ const btSoftBody::Link& link = psb->m_links[j];
+ btSoftBody::Node* node1 = link.m_n[0];
+ btSoftBody::Node* node2 = link.m_n[1];
+ btScalar r = link.m_rl;
+
+ // elastic force
+ btVector3 dir = (node2->m_q - node1->m_q);
+ energy += 0.5 * m_elasticStiffness * (dir.norm() - r) * (dir.norm() - r);
+ }
+ }
+ return energy;
+ }
+
+ virtual double totalDampingEnergy(btScalar dt)
+ {
+ double energy = 0;
+ int sz = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ sz = btMax(sz, psb->m_nodes[j].index);
+ }
+ }
+ TVStack dampingForce;
+ dampingForce.resize(sz + 1);
+ for (int i = 0; i < dampingForce.size(); ++i)
+ dampingForce[i].setZero();
+ addScaledDampingForce(0.5, dampingForce);
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ const btSoftBody::Node& node = psb->m_nodes[j];
+ energy -= dampingForce[node.index].dot(node.m_v) / dt;
+ }
+ }
+ return energy;
+ }
+
+ virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df)
+ {
+ // implicit damping force differential
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ const btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ for (int j = 0; j < psb->m_links.size(); ++j)
+ {
+ const btSoftBody::Link& link = psb->m_links[j];
+ btSoftBody::Node* node1 = link.m_n[0];
+ btSoftBody::Node* node2 = link.m_n[1];
+ size_t id1 = node1->index;
+ size_t id2 = node2->index;
+ btScalar r = link.m_rl;
- btVector3 local_scaled_df = scaled_k_damp * (dv[id2] - dv[id1]);
- if (m_momentum_conserving)
- {
- if ((node2->m_x - node1->m_x).norm() > SIMD_EPSILON)
- {
- btVector3 dir = (node2->m_x - node1->m_x).normalized();
- local_scaled_df= scaled_k_damp * (dv[id2] - dv[id1]).dot(dir) * dir;
- }
- }
- df[id1] += local_scaled_df;
- df[id2] -= local_scaled_df;
- }
- }
- }
-
- virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA)
- {
- // implicit damping force differential
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- continue;
- }
- btScalar scaled_k_damp = m_dampingStiffness * scale;
- for (int j = 0; j < psb->m_links.size(); ++j)
- {
- const btSoftBody::Link& link = psb->m_links[j];
- btSoftBody::Node* node1 = link.m_n[0];
- btSoftBody::Node* node2 = link.m_n[1];
- size_t id1 = node1->index;
- size_t id2 = node2->index;
- if (m_momentum_conserving)
- {
- if ((node2->m_x - node1->m_x).norm() > SIMD_EPSILON)
- {
- btVector3 dir = (node2->m_x - node1->m_x).normalized();
- for (int d = 0; d < 3; ++d)
- {
- if (node1->m_im > 0)
- diagA[id1][d] -= scaled_k_damp * dir[d] * dir[d];
- if (node2->m_im > 0)
- diagA[id2][d] -= scaled_k_damp * dir[d] * dir[d];
- }
- }
- }
- else
- {
- for (int d = 0; d < 3; ++d)
- {
- if (node1->m_im > 0)
- diagA[id1][d] -= scaled_k_damp;
- if (node2->m_im > 0)
- diagA[id2][d] -= scaled_k_damp;
- }
- }
- }
- }
- }
-
- virtual double totalElasticEnergy(btScalar dt)
- {
- double energy = 0;
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- const btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- continue;
- }
- for (int j = 0; j < psb->m_links.size(); ++j)
- {
- const btSoftBody::Link& link = psb->m_links[j];
- btSoftBody::Node* node1 = link.m_n[0];
- btSoftBody::Node* node2 = link.m_n[1];
- btScalar r = link.m_rl;
+ btVector3 dir = (node1->m_q - node2->m_q);
+ btScalar dir_norm = dir.norm();
+ btVector3 dir_normalized = (dir_norm > SIMD_EPSILON) ? dir.normalized() : btVector3(0, 0, 0);
+ btVector3 dx_diff = dx[id1] - dx[id2];
+ btVector3 scaled_df = btVector3(0, 0, 0);
+ btScalar scaled_k = scale * (link.m_bbending ? m_bendingStiffness : m_elasticStiffness);
+ if (dir_norm > SIMD_EPSILON)
+ {
+ scaled_df -= scaled_k * dir_normalized.dot(dx_diff) * dir_normalized;
+ scaled_df += scaled_k * dir_normalized.dot(dx_diff) * ((dir_norm - r) / dir_norm) * dir_normalized;
+ scaled_df -= scaled_k * ((dir_norm - r) / dir_norm) * dx_diff;
+ }
- // elastic force
- btVector3 dir = (node2->m_q - node1->m_q);
- energy += 0.5 * m_elasticStiffness * (dir.norm() - r) * (dir.norm() -r);
- }
- }
- return energy;
- }
-
- virtual double totalDampingEnergy(btScalar dt)
- {
- double energy = 0;
- int sz = 0;
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- continue;
- }
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- sz = btMax(sz, psb->m_nodes[j].index);
- }
- }
- TVStack dampingForce;
- dampingForce.resize(sz+1);
- for (int i = 0; i < dampingForce.size(); ++i)
- dampingForce[i].setZero();
- addScaledDampingForce(0.5, dampingForce);
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- const btSoftBody::Node& node = psb->m_nodes[j];
- energy -= dampingForce[node.index].dot(node.m_v) / dt;
- }
- }
- return energy;
- }
-
- virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df)
- {
- // implicit damping force differential
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- const btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- continue;
- }
- for (int j = 0; j < psb->m_links.size(); ++j)
- {
- const btSoftBody::Link& link = psb->m_links[j];
- btSoftBody::Node* node1 = link.m_n[0];
- btSoftBody::Node* node2 = link.m_n[1];
- size_t id1 = node1->index;
- size_t id2 = node2->index;
- btScalar r = link.m_rl;
+ df[id1] += scaled_df;
+ df[id2] -= scaled_df;
+ }
+ }
+ }
- btVector3 dir = (node1->m_q - node2->m_q);
- btScalar dir_norm = dir.norm();
- btVector3 dir_normalized = (dir_norm > SIMD_EPSILON) ? dir.normalized() : btVector3(0,0,0);
- btVector3 dx_diff = dx[id1] - dx[id2];
- btVector3 scaled_df = btVector3(0,0,0);
- btScalar scaled_k = scale * (link.m_bbending ? m_bendingStiffness : m_elasticStiffness);
- if (dir_norm > SIMD_EPSILON)
- {
- scaled_df -= scaled_k * dir_normalized.dot(dx_diff) * dir_normalized;
- scaled_df += scaled_k * dir_normalized.dot(dx_diff) * ((dir_norm-r)/dir_norm) * dir_normalized;
- scaled_df -= scaled_k * ((dir_norm-r)/dir_norm) * dx_diff;
- }
-
- df[id1] += scaled_df;
- df[id2] -= scaled_df;
- }
- }
- }
-
- virtual btDeformableLagrangianForceType getForceType()
- {
- return BT_MASSSPRING_FORCE;
- }
-
+ virtual btDeformableLagrangianForceType getForceType()
+ {
+ return BT_MASSSPRING_FORCE;
+ }
};
#endif /* btMassSpring_h */
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableMousePickingForce.h b/thirdparty/bullet/BulletSoftBody/btDeformableMousePickingForce.h
index 07c10935f4..d218d96214 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableMousePickingForce.h
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableMousePickingForce.h
@@ -20,126 +20,143 @@
class btDeformableMousePickingForce : public btDeformableLagrangianForce
{
- // If true, the damping force will be in the direction of the spring
- // If false, the damping force will be in the direction of the velocity
- btScalar m_elasticStiffness, m_dampingStiffness;
- const btSoftBody::Face& m_face;
- btVector3 m_mouse_pos;
- btScalar m_maxForce;
+ // If true, the damping force will be in the direction of the spring
+ // If false, the damping force will be in the direction of the velocity
+ btScalar m_elasticStiffness, m_dampingStiffness;
+ const btSoftBody::Face& m_face;
+ btVector3 m_mouse_pos;
+ btScalar m_maxForce;
+
public:
- typedef btAlignedObjectArray<btVector3> TVStack;
- btDeformableMousePickingForce(btScalar k, btScalar d, const btSoftBody::Face& face, btVector3 mouse_pos, btScalar maxForce = 0.3) : m_elasticStiffness(k), m_dampingStiffness(d), m_face(face), m_mouse_pos(mouse_pos), m_maxForce(maxForce)
- {
- }
-
- virtual void addScaledForces(btScalar scale, TVStack& force)
- {
- addScaledDampingForce(scale, force);
- addScaledElasticForce(scale, force);
- }
-
- virtual void addScaledExplicitForce(btScalar scale, TVStack& force)
- {
- addScaledElasticForce(scale, force);
- }
-
- virtual void addScaledDampingForce(btScalar scale, TVStack& force)
- {
- for (int i = 0; i < 3; ++i)
- {
- btVector3 v_diff = m_face.m_n[i]->m_v;
- btVector3 scaled_force = scale * m_dampingStiffness * v_diff;
- if ((m_face.m_n[i]->m_x - m_mouse_pos).norm() > SIMD_EPSILON)
- {
- btVector3 dir = (m_face.m_n[i]->m_x - m_mouse_pos).normalized();
- scaled_force = scale * m_dampingStiffness * v_diff.dot(dir) * dir;
- }
- force[m_face.m_n[i]->index] -= scaled_force;
- }
- }
-
- virtual void addScaledElasticForce(btScalar scale, TVStack& force)
- {
- btScalar scaled_stiffness = scale * m_elasticStiffness;
- for (int i = 0; i < 3; ++i)
- {
- btVector3 dir = (m_face.m_n[i]->m_q - m_mouse_pos);
- btVector3 scaled_force = scaled_stiffness * dir;
- if (scaled_force.safeNorm() > m_maxForce)
- {
- scaled_force.safeNormalize();
- scaled_force *= m_maxForce;
- }
- force[m_face.m_n[i]->index] -= scaled_force;
- }
- }
-
- virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df)
- {
- btScalar scaled_k_damp = m_dampingStiffness * scale;
- for (int i = 0; i < 3; ++i)
- {
- btVector3 local_scaled_df = scaled_k_damp * dv[m_face.m_n[i]->index];
- if ((m_face.m_n[i]->m_x - m_mouse_pos).norm() > SIMD_EPSILON)
- {
- btVector3 dir = (m_face.m_n[i]->m_x - m_mouse_pos).normalized();
- local_scaled_df= scaled_k_damp * dv[m_face.m_n[i]->index].dot(dir) * dir;
- }
- df[m_face.m_n[i]->index] -= local_scaled_df;
- }
- }
-
- virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA){}
-
- virtual double totalElasticEnergy(btScalar dt)
- {
- double energy = 0;
- for (int i = 0; i < 3; ++i)
- {
- btVector3 dir = (m_face.m_n[i]->m_q - m_mouse_pos);
- btVector3 scaled_force = m_elasticStiffness * dir;
- if (scaled_force.safeNorm() > m_maxForce)
- {
- scaled_force.safeNormalize();
- scaled_force *= m_maxForce;
- }
- energy += 0.5 * scaled_force.dot(dir);
- }
- return energy;
- }
-
- virtual double totalDampingEnergy(btScalar dt)
- {
- double energy = 0;
- for (int i = 0; i < 3; ++i)
- {
- btVector3 v_diff = m_face.m_n[i]->m_v;
- btVector3 scaled_force = m_dampingStiffness * v_diff;
- if ((m_face.m_n[i]->m_x - m_mouse_pos).norm() > SIMD_EPSILON)
- {
- btVector3 dir = (m_face.m_n[i]->m_x - m_mouse_pos).normalized();
- scaled_force = m_dampingStiffness * v_diff.dot(dir) * dir;
- }
- energy -= scaled_force.dot(m_face.m_n[i]->m_v) / dt;
- }
- return energy;
- }
-
- virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df)
- {
- //TODO
- }
-
- void setMousePos(const btVector3& p)
- {
- m_mouse_pos = p;
- }
-
- virtual btDeformableLagrangianForceType getForceType()
- {
- return BT_MOUSE_PICKING_FORCE;
- }
-
+ typedef btAlignedObjectArray<btVector3> TVStack;
+ btDeformableMousePickingForce(btScalar k, btScalar d, const btSoftBody::Face& face, btVector3 mouse_pos, btScalar maxForce = 0.3) : m_elasticStiffness(k), m_dampingStiffness(d), m_face(face), m_mouse_pos(mouse_pos), m_maxForce(maxForce)
+ {
+ }
+
+ virtual void addScaledForces(btScalar scale, TVStack& force)
+ {
+ addScaledDampingForce(scale, force);
+ addScaledElasticForce(scale, force);
+ }
+
+ virtual void addScaledExplicitForce(btScalar scale, TVStack& force)
+ {
+ addScaledElasticForce(scale, force);
+ }
+
+ virtual void addScaledDampingForce(btScalar scale, TVStack& force)
+ {
+ for (int i = 0; i < 3; ++i)
+ {
+ btVector3 v_diff = m_face.m_n[i]->m_v;
+ btVector3 scaled_force = scale * m_dampingStiffness * v_diff;
+ if ((m_face.m_n[i]->m_x - m_mouse_pos).norm() > SIMD_EPSILON)
+ {
+ btVector3 dir = (m_face.m_n[i]->m_x - m_mouse_pos).normalized();
+ scaled_force = scale * m_dampingStiffness * v_diff.dot(dir) * dir;
+ }
+ force[m_face.m_n[i]->index] -= scaled_force;
+ }
+ }
+
+ virtual void addScaledElasticForce(btScalar scale, TVStack& force)
+ {
+ btScalar scaled_stiffness = scale * m_elasticStiffness;
+ for (int i = 0; i < 3; ++i)
+ {
+ btVector3 dir = (m_face.m_n[i]->m_q - m_mouse_pos);
+ btVector3 scaled_force = scaled_stiffness * dir;
+ if (scaled_force.safeNorm() > m_maxForce)
+ {
+ scaled_force.safeNormalize();
+ scaled_force *= m_maxForce;
+ }
+ force[m_face.m_n[i]->index] -= scaled_force;
+ }
+ }
+
+ virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df)
+ {
+ btScalar scaled_k_damp = m_dampingStiffness * scale;
+ for (int i = 0; i < 3; ++i)
+ {
+ btVector3 local_scaled_df = scaled_k_damp * dv[m_face.m_n[i]->index];
+ if ((m_face.m_n[i]->m_x - m_mouse_pos).norm() > SIMD_EPSILON)
+ {
+ btVector3 dir = (m_face.m_n[i]->m_x - m_mouse_pos).normalized();
+ local_scaled_df = scaled_k_damp * dv[m_face.m_n[i]->index].dot(dir) * dir;
+ }
+ df[m_face.m_n[i]->index] -= local_scaled_df;
+ }
+ }
+
+ virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA) {}
+
+ virtual double totalElasticEnergy(btScalar dt)
+ {
+ double energy = 0;
+ for (int i = 0; i < 3; ++i)
+ {
+ btVector3 dir = (m_face.m_n[i]->m_q - m_mouse_pos);
+ btVector3 scaled_force = m_elasticStiffness * dir;
+ if (scaled_force.safeNorm() > m_maxForce)
+ {
+ scaled_force.safeNormalize();
+ scaled_force *= m_maxForce;
+ }
+ energy += 0.5 * scaled_force.dot(dir);
+ }
+ return energy;
+ }
+
+ virtual double totalDampingEnergy(btScalar dt)
+ {
+ double energy = 0;
+ for (int i = 0; i < 3; ++i)
+ {
+ btVector3 v_diff = m_face.m_n[i]->m_v;
+ btVector3 scaled_force = m_dampingStiffness * v_diff;
+ if ((m_face.m_n[i]->m_x - m_mouse_pos).norm() > SIMD_EPSILON)
+ {
+ btVector3 dir = (m_face.m_n[i]->m_x - m_mouse_pos).normalized();
+ scaled_force = m_dampingStiffness * v_diff.dot(dir) * dir;
+ }
+ energy -= scaled_force.dot(m_face.m_n[i]->m_v) / dt;
+ }
+ return energy;
+ }
+
+ virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df)
+ {
+ btScalar scaled_stiffness = scale * m_elasticStiffness;
+ for (int i = 0; i < 3; ++i)
+ {
+ btVector3 dir = (m_face.m_n[i]->m_q - m_mouse_pos);
+ btScalar dir_norm = dir.norm();
+ btVector3 dir_normalized = (dir_norm > SIMD_EPSILON) ? dir.normalized() : btVector3(0, 0, 0);
+ int id = m_face.m_n[i]->index;
+ btVector3 dx_diff = dx[id];
+ btScalar r = 0; // rest length is 0 for picking spring
+ btVector3 scaled_df = btVector3(0, 0, 0);
+ if (dir_norm > SIMD_EPSILON)
+ {
+ scaled_df -= scaled_stiffness * dir_normalized.dot(dx_diff) * dir_normalized;
+ scaled_df += scaled_stiffness * dir_normalized.dot(dx_diff) * ((dir_norm - r) / dir_norm) * dir_normalized;
+ scaled_df -= scaled_stiffness * ((dir_norm - r) / dir_norm) * dx_diff;
+ }
+ df[id] += scaled_df;
+ }
+ }
+
+ void setMousePos(const btVector3& p)
+ {
+ m_mouse_pos = p;
+ }
+
+ virtual btDeformableLagrangianForceType getForceType()
+ {
+ return BT_MOUSE_PICKING_FORCE;
+ }
};
#endif /* btMassSpring_h */
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyConstraintSolver.cpp b/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyConstraintSolver.cpp
index c8cc47923e..631fd5fbed 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyConstraintSolver.cpp
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyConstraintSolver.cpp
@@ -13,131 +13,132 @@
3. This notice may not be removed or altered from any source distribution.
*/
-
#include "btDeformableMultiBodyConstraintSolver.h"
#include <iostream>
// override the iterations method to include deformable/multibody contact
-btScalar btDeformableMultiBodyConstraintSolver::solveDeformableGroupIterations(btCollisionObject** bodies,int numBodies,btCollisionObject** deformableBodies,int numDeformableBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer)
+btScalar btDeformableMultiBodyConstraintSolver::solveDeformableGroupIterations(btCollisionObject** bodies, int numBodies, btCollisionObject** deformableBodies, int numDeformableBodies, btPersistentManifold** manifoldPtr, int numManifolds, btTypedConstraint** constraints, int numConstraints, const btContactSolverInfo& infoGlobal, btIDebugDraw* debugDrawer)
{
- {
- ///this is a special step to resolve penetrations (just for contacts)
- solveGroupCacheFriendlySplitImpulseIterations(bodies, numBodies, manifoldPtr, numManifolds, constraints, numConstraints, infoGlobal, debugDrawer);
+ {
+ ///this is a special step to resolve penetrations (just for contacts)
+ solveGroupCacheFriendlySplitImpulseIterations(bodies, numBodies, deformableBodies, numDeformableBodies, manifoldPtr, numManifolds, constraints, numConstraints, infoGlobal, debugDrawer);
+
+ int maxIterations = m_maxOverrideNumSolverIterations > infoGlobal.m_numIterations ? m_maxOverrideNumSolverIterations : infoGlobal.m_numIterations;
+ for (int iteration = 0; iteration < maxIterations; iteration++)
+ {
+ // rigid bodies are solved using solver body velocity, but rigid/deformable contact directly uses the velocity of the actual rigid body. So we have to do the following: Solve one iteration of the rigid/rigid contact, get the updated velocity in the solver body and update the velocity of the underlying rigid body. Then solve the rigid/deformable contact. Finally, grab the (once again) updated rigid velocity and update the velocity of the wrapping solver body
- int maxIterations = m_maxOverrideNumSolverIterations > infoGlobal.m_numIterations ? m_maxOverrideNumSolverIterations : infoGlobal.m_numIterations;
- for (int iteration = 0; iteration < maxIterations; iteration++)
- {
- // rigid bodies are solved using solver body velocity, but rigid/deformable contact directly uses the velocity of the actual rigid body. So we have to do the following: Solve one iteration of the rigid/rigid contact, get the updated velocity in the solver body and update the velocity of the underlying rigid body. Then solve the rigid/deformable contact. Finally, grab the (once again) updated rigid velocity and update the velocity of the wrapping solver body
-
- // solve rigid/rigid in solver body
- m_leastSquaresResidual = solveSingleIteration(iteration, bodies, numBodies, manifoldPtr, numManifolds, constraints, numConstraints, infoGlobal, debugDrawer);
- // solver body velocity -> rigid body velocity
- solverBodyWriteBack(infoGlobal);
- btScalar deformableResidual = m_deformableSolver->solveContactConstraints(deformableBodies,numDeformableBodies, infoGlobal);
- // update rigid body velocity in rigid/deformable contact
- m_leastSquaresResidual = btMax(m_leastSquaresResidual, deformableResidual);
- // solver body velocity <- rigid body velocity
- writeToSolverBody(bodies, numBodies, infoGlobal);
-
- if (m_leastSquaresResidual <= infoGlobal.m_leastSquaresResidualThreshold || (iteration >= (maxIterations - 1)))
- {
+ // solve rigid/rigid in solver body
+ m_leastSquaresResidual = solveSingleIteration(iteration, bodies, numBodies, manifoldPtr, numManifolds, constraints, numConstraints, infoGlobal, debugDrawer);
+ // solver body velocity -> rigid body velocity
+ solverBodyWriteBack(infoGlobal);
+ btScalar deformableResidual = m_deformableSolver->solveContactConstraints(deformableBodies, numDeformableBodies, infoGlobal);
+ // update rigid body velocity in rigid/deformable contact
+ m_leastSquaresResidual = btMax(m_leastSquaresResidual, deformableResidual);
+ // solver body velocity <- rigid body velocity
+ writeToSolverBody(bodies, numBodies, infoGlobal);
+
+ if (m_leastSquaresResidual <= infoGlobal.m_leastSquaresResidualThreshold || (iteration >= (maxIterations - 1)))
+ {
#ifdef VERBOSE_RESIDUAL_PRINTF
- printf("residual = %f at iteration #%d\n", m_leastSquaresResidual, iteration);
+ if (iteration >= (maxIterations - 1))
+ printf("residual = %f at iteration #%d\n", m_leastSquaresResidual, iteration);
#endif
- m_analyticsData.m_numSolverCalls++;
- m_analyticsData.m_numIterationsUsed = iteration+1;
- m_analyticsData.m_islandId = -2;
- if (numBodies>0)
- m_analyticsData.m_islandId = bodies[0]->getCompanionId();
- m_analyticsData.m_numBodies = numBodies;
- m_analyticsData.m_numContactManifolds = numManifolds;
- m_analyticsData.m_remainingLeastSquaresResidual = m_leastSquaresResidual;
- break;
- }
- }
- }
- return 0.f;
+ m_analyticsData.m_numSolverCalls++;
+ m_analyticsData.m_numIterationsUsed = iteration + 1;
+ m_analyticsData.m_islandId = -2;
+ if (numBodies > 0)
+ m_analyticsData.m_islandId = bodies[0]->getCompanionId();
+ m_analyticsData.m_numBodies = numBodies;
+ m_analyticsData.m_numContactManifolds = numManifolds;
+ m_analyticsData.m_remainingLeastSquaresResidual = m_leastSquaresResidual;
+ break;
+ }
+ }
+ }
+ return 0.f;
}
-void btDeformableMultiBodyConstraintSolver::solveDeformableBodyGroup(btCollisionObject * *bodies, int numBodies, btCollisionObject * *deformableBodies, int numDeformableBodies, btPersistentManifold** manifold, int numManifolds, btTypedConstraint** constraints, int numConstraints, btMultiBodyConstraint** multiBodyConstraints, int numMultiBodyConstraints, const btContactSolverInfo& info, btIDebugDraw* debugDrawer, btDispatcher* dispatcher)
+void btDeformableMultiBodyConstraintSolver::solveDeformableBodyGroup(btCollisionObject** bodies, int numBodies, btCollisionObject** deformableBodies, int numDeformableBodies, btPersistentManifold** manifold, int numManifolds, btTypedConstraint** constraints, int numConstraints, btMultiBodyConstraint** multiBodyConstraints, int numMultiBodyConstraints, const btContactSolverInfo& info, btIDebugDraw* debugDrawer, btDispatcher* dispatcher)
{
- m_tmpMultiBodyConstraints = multiBodyConstraints;
- m_tmpNumMultiBodyConstraints = numMultiBodyConstraints;
-
- // inherited from MultiBodyConstraintSolver
- solveGroupCacheFriendlySetup(bodies, numBodies, manifold, numManifolds, constraints, numConstraints, info, debugDrawer);
-
- // overriden
- solveDeformableGroupIterations(bodies, numBodies, deformableBodies, numDeformableBodies, manifold, numManifolds, constraints, numConstraints, info, debugDrawer);
-
- // inherited from MultiBodyConstraintSolver
- solveGroupCacheFriendlyFinish(bodies, numBodies, info);
-
- m_tmpMultiBodyConstraints = 0;
- m_tmpNumMultiBodyConstraints = 0;
+ m_tmpMultiBodyConstraints = multiBodyConstraints;
+ m_tmpNumMultiBodyConstraints = numMultiBodyConstraints;
+
+ // inherited from MultiBodyConstraintSolver
+ solveGroupCacheFriendlySetup(bodies, numBodies, manifold, numManifolds, constraints, numConstraints, info, debugDrawer);
+
+ // overriden
+ solveDeformableGroupIterations(bodies, numBodies, deformableBodies, numDeformableBodies, manifold, numManifolds, constraints, numConstraints, info, debugDrawer);
+
+ // inherited from MultiBodyConstraintSolver
+ solveGroupCacheFriendlyFinish(bodies, numBodies, info);
+
+ m_tmpMultiBodyConstraints = 0;
+ m_tmpNumMultiBodyConstraints = 0;
}
void btDeformableMultiBodyConstraintSolver::writeToSolverBody(btCollisionObject** bodies, int numBodies, const btContactSolverInfo& infoGlobal)
{
- for (int i = 0; i < numBodies; i++)
- {
- int bodyId = getOrInitSolverBody(*bodies[i], infoGlobal.m_timeStep);
+ for (int i = 0; i < numBodies; i++)
+ {
+ int bodyId = getOrInitSolverBody(*bodies[i], infoGlobal.m_timeStep);
- btRigidBody* body = btRigidBody::upcast(bodies[i]);
- if (body && body->getInvMass())
- {
- btSolverBody& solverBody = m_tmpSolverBodyPool[bodyId];
- solverBody.m_linearVelocity = body->getLinearVelocity() - solverBody.m_deltaLinearVelocity;
- solverBody.m_angularVelocity = body->getAngularVelocity() - solverBody.m_deltaAngularVelocity;
- }
- }
+ btRigidBody* body = btRigidBody::upcast(bodies[i]);
+ if (body && body->getInvMass())
+ {
+ btSolverBody& solverBody = m_tmpSolverBodyPool[bodyId];
+ solverBody.m_linearVelocity = body->getLinearVelocity() - solverBody.m_deltaLinearVelocity;
+ solverBody.m_angularVelocity = body->getAngularVelocity() - solverBody.m_deltaAngularVelocity;
+ }
+ }
}
void btDeformableMultiBodyConstraintSolver::solverBodyWriteBack(const btContactSolverInfo& infoGlobal)
{
- for (int i = 0; i < m_tmpSolverBodyPool.size(); i++)
- {
- btRigidBody* body = m_tmpSolverBodyPool[i].m_originalBody;
- if (body)
- {
- m_tmpSolverBodyPool[i].m_originalBody->setLinearVelocity(m_tmpSolverBodyPool[i].m_linearVelocity + m_tmpSolverBodyPool[i].m_deltaLinearVelocity);
- m_tmpSolverBodyPool[i].m_originalBody->setAngularVelocity(m_tmpSolverBodyPool[i].m_angularVelocity+m_tmpSolverBodyPool[i].m_deltaAngularVelocity);
- }
- }
+ for (int i = 0; i < m_tmpSolverBodyPool.size(); i++)
+ {
+ btRigidBody* body = m_tmpSolverBodyPool[i].m_originalBody;
+ if (body)
+ {
+ m_tmpSolverBodyPool[i].m_originalBody->setLinearVelocity(m_tmpSolverBodyPool[i].m_linearVelocity + m_tmpSolverBodyPool[i].m_deltaLinearVelocity);
+ m_tmpSolverBodyPool[i].m_originalBody->setAngularVelocity(m_tmpSolverBodyPool[i].m_angularVelocity + m_tmpSolverBodyPool[i].m_deltaAngularVelocity);
+ }
+ }
}
-void btDeformableMultiBodyConstraintSolver::solveGroupCacheFriendlySplitImpulseIterations(btCollisionObject** bodies, int numBodies, btPersistentManifold** manifoldPtr, int numManifolds, btTypedConstraint** constraints, int numConstraints, const btContactSolverInfo& infoGlobal, btIDebugDraw* debugDrawer)
+void btDeformableMultiBodyConstraintSolver::solveGroupCacheFriendlySplitImpulseIterations(btCollisionObject** bodies, int numBodies, btCollisionObject** deformableBodies, int numDeformableBodies, btPersistentManifold** manifoldPtr, int numManifolds, btTypedConstraint** constraints, int numConstraints, const btContactSolverInfo& infoGlobal, btIDebugDraw* debugDrawer)
{
- BT_PROFILE("solveGroupCacheFriendlySplitImpulseIterations");
- int iteration;
- if (infoGlobal.m_splitImpulse)
- {
- {
-// m_deformableSolver->splitImpulseSetup(infoGlobal);
- for (iteration = 0; iteration < infoGlobal.m_numIterations; iteration++)
- {
- btScalar leastSquaresResidual = 0.f;
- {
- int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
- int j;
- for (j = 0; j < numPoolConstraints; j++)
- {
- const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[j]];
-
- btScalar residual = resolveSplitPenetrationImpulse(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA], m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB], solveManifold);
- leastSquaresResidual = btMax(leastSquaresResidual, residual * residual);
- }
- // solve the position correction between deformable and rigid/multibody
-// btScalar residual = m_deformableSolver->solveSplitImpulse(infoGlobal);
-// leastSquaresResidual = btMax(leastSquaresResidual, residual * residual);
- }
- if (leastSquaresResidual <= infoGlobal.m_leastSquaresResidualThreshold || iteration >= (infoGlobal.m_numIterations - 1))
- {
+ BT_PROFILE("solveGroupCacheFriendlySplitImpulseIterations");
+ int iteration;
+ if (infoGlobal.m_splitImpulse)
+ {
+ {
+ for (iteration = 0; iteration < infoGlobal.m_numIterations; iteration++)
+ {
+ btScalar leastSquaresResidual = 0.f;
+ {
+ int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
+ int j;
+ for (j = 0; j < numPoolConstraints; j++)
+ {
+ const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[j]];
+
+ btScalar residual = resolveSplitPenetrationImpulse(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA], m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB], solveManifold);
+ leastSquaresResidual = btMax(leastSquaresResidual, residual * residual);
+ }
+ // solve the position correction between deformable and rigid/multibody
+ // btScalar residual = m_deformableSolver->solveSplitImpulse(infoGlobal);
+ btScalar residual = m_deformableSolver->m_objective->m_projection.solveSplitImpulse(deformableBodies, numDeformableBodies, infoGlobal);
+ leastSquaresResidual = btMax(leastSquaresResidual, residual * residual);
+ }
+ if (leastSquaresResidual <= infoGlobal.m_leastSquaresResidualThreshold || iteration >= (infoGlobal.m_numIterations - 1))
+ {
#ifdef VERBOSE_RESIDUAL_PRINTF
- printf("residual = %f at iteration #%d\n", leastSquaresResidual, iteration);
+ if (iteration >= (infoGlobal.m_numIterations - 1))
+ printf("split impulse residual = %f at iteration #%d\n", leastSquaresResidual, iteration);
#endif
- break;
- }
- }
- }
- }
+ break;
+ }
+ }
+ }
+ }
}
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyConstraintSolver.h b/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyConstraintSolver.h
index 0c7cc26a83..94aabce838 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyConstraintSolver.h
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyConstraintSolver.h
@@ -13,7 +13,6 @@
3. This notice may not be removed or altered from any source distribution.
*/
-
#ifndef BT_DEFORMABLE_MULTIBODY_CONSTRAINT_SOLVER_H
#define BT_DEFORMABLE_MULTIBODY_CONSTRAINT_SOLVER_H
@@ -32,30 +31,31 @@ class btDeformableBodySolver;
ATTRIBUTE_ALIGNED16(class)
btDeformableMultiBodyConstraintSolver : public btMultiBodyConstraintSolver
{
- btDeformableBodySolver* m_deformableSolver;
-
+ btDeformableBodySolver* m_deformableSolver;
+
protected:
- // override the iterations method to include deformable/multibody contact
-// virtual btScalar solveGroupCacheFriendlyIterations(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer);
-
- // write the velocity of the the solver body to the underlying rigid body
- void solverBodyWriteBack(const btContactSolverInfo& infoGlobal);
-
- // write the velocity of the underlying rigid body to the the the solver body
- void writeToSolverBody(btCollisionObject** bodies, int numBodies, const btContactSolverInfo& infoGlobal);
-
- virtual void solveGroupCacheFriendlySplitImpulseIterations(btCollisionObject** bodies, int numBodies, btPersistentManifold** manifoldPtr, int numManifolds, btTypedConstraint** constraints, int numConstraints, const btContactSolverInfo& infoGlobal, btIDebugDraw* debugDrawer);
-
- virtual btScalar solveDeformableGroupIterations(btCollisionObject** bodies,int numBodies,btCollisionObject** deformableBodies,int numDeformableBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer);
+ // override the iterations method to include deformable/multibody contact
+ // virtual btScalar solveGroupCacheFriendlyIterations(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer);
+
+ // write the velocity of the the solver body to the underlying rigid body
+ void solverBodyWriteBack(const btContactSolverInfo& infoGlobal);
+
+ // write the velocity of the underlying rigid body to the the the solver body
+ void writeToSolverBody(btCollisionObject * *bodies, int numBodies, const btContactSolverInfo& infoGlobal);
+
+ virtual void solveGroupCacheFriendlySplitImpulseIterations(btCollisionObject * *bodies, int numBodies, btCollisionObject** deformableBodies, int numDeformableBodies, btPersistentManifold** manifoldPtr, int numManifolds, btTypedConstraint** constraints, int numConstraints, const btContactSolverInfo& infoGlobal, btIDebugDraw* debugDrawer);
+
+ virtual btScalar solveDeformableGroupIterations(btCollisionObject * *bodies, int numBodies, btCollisionObject** deformableBodies, int numDeformableBodies, btPersistentManifold** manifoldPtr, int numManifolds, btTypedConstraint** constraints, int numConstraints, const btContactSolverInfo& infoGlobal, btIDebugDraw* debugDrawer);
+
public:
- BT_DECLARE_ALIGNED_ALLOCATOR();
-
- void setDeformableSolver(btDeformableBodySolver* deformableSolver)
- {
- m_deformableSolver = deformableSolver;
- }
-
- virtual void solveDeformableBodyGroup(btCollisionObject * *bodies, int numBodies, btCollisionObject * *deformableBodies, int numDeformableBodies, btPersistentManifold** manifold, int numManifolds, btTypedConstraint** constraints, int numConstraints, btMultiBodyConstraint** multiBodyConstraints, int numMultiBodyConstraints, const btContactSolverInfo& info, btIDebugDraw* debugDrawer, btDispatcher* dispatcher);
+ BT_DECLARE_ALIGNED_ALLOCATOR();
+
+ void setDeformableSolver(btDeformableBodySolver * deformableSolver)
+ {
+ m_deformableSolver = deformableSolver;
+ }
+
+ virtual void solveDeformableBodyGroup(btCollisionObject * *bodies, int numBodies, btCollisionObject** deformableBodies, int numDeformableBodies, btPersistentManifold** manifold, int numManifolds, btTypedConstraint** constraints, int numConstraints, btMultiBodyConstraint** multiBodyConstraints, int numMultiBodyConstraints, const btContactSolverInfo& info, btIDebugDraw* debugDrawer, btDispatcher* dispatcher);
};
#endif /* BT_DEFORMABLE_MULTIBODY_CONSTRAINT_SOLVER_H */
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.cpp b/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.cpp
index 6b742978ef..983e622b5f 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.cpp
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.cpp
@@ -40,8 +40,9 @@ The algorithm also closely resembles the one in http://physbam.stanford.edu/~fed
#include "LinearMath/btQuickprof.h"
#include "btSoftBodyInternals.h"
btDeformableMultiBodyDynamicsWorld::btDeformableMultiBodyDynamicsWorld(btDispatcher* dispatcher, btBroadphaseInterface* pairCache, btDeformableMultiBodyConstraintSolver* constraintSolver, btCollisionConfiguration* collisionConfiguration, btDeformableBodySolver* deformableBodySolver)
-: btMultiBodyDynamicsWorld(dispatcher, pairCache, (btMultiBodyConstraintSolver*)constraintSolver, collisionConfiguration),
-m_deformableBodySolver(deformableBodySolver), m_solverCallback(0)
+ : btMultiBodyDynamicsWorld(dispatcher, pairCache, (btMultiBodyConstraintSolver*)constraintSolver, collisionConfiguration),
+ m_deformableBodySolver(deformableBodySolver),
+ m_solverCallback(0)
{
m_drawFlags = fDrawFlags::Std;
m_drawNodeTree = true;
@@ -52,7 +53,7 @@ m_deformableBodySolver(deformableBodySolver), m_solverCallback(0)
m_sbi.m_sparsesdf.Initialize();
m_sbi.m_sparsesdf.setDefaultVoxelsz(0.005);
m_sbi.m_sparsesdf.Reset();
-
+
m_sbi.air_density = (btScalar)1.2;
m_sbi.water_density = 0;
m_sbi.water_offset = 0;
@@ -61,57 +62,57 @@ m_deformableBodySolver(deformableBodySolver), m_solverCallback(0)
m_internalTime = 0.0;
m_implicit = false;
m_lineSearch = false;
- m_useProjection = true;
+ m_useProjection = false;
m_ccdIterations = 5;
m_solverDeformableBodyIslandCallback = new DeformableBodyInplaceSolverIslandCallback(constraintSolver, dispatcher);
}
btDeformableMultiBodyDynamicsWorld::~btDeformableMultiBodyDynamicsWorld()
{
- delete m_solverDeformableBodyIslandCallback;
+ delete m_solverDeformableBodyIslandCallback;
}
void btDeformableMultiBodyDynamicsWorld::internalSingleStepSimulation(btScalar timeStep)
{
- BT_PROFILE("internalSingleStepSimulation");
- if (0 != m_internalPreTickCallback)
- {
- (*m_internalPreTickCallback)(this, timeStep);
- }
- reinitialize(timeStep);
-
- // add gravity to velocity of rigid and multi bodys
- applyRigidBodyGravity(timeStep);
-
- ///apply gravity and explicit force to velocity, predict motion
- predictUnconstraintMotion(timeStep);
-
- ///perform collision detection that involves rigid/multi bodies
- btMultiBodyDynamicsWorld::performDiscreteCollisionDetection();
-
- btMultiBodyDynamicsWorld::calculateSimulationIslands();
-
- beforeSolverCallbacks(timeStep);
-
- ///solve contact constraints and then deformable bodies momemtum equation
- solveConstraints(timeStep);
-
- afterSolverCallbacks(timeStep);
+ BT_PROFILE("internalSingleStepSimulation");
+ if (0 != m_internalPreTickCallback)
+ {
+ (*m_internalPreTickCallback)(this, timeStep);
+ }
+ reinitialize(timeStep);
+
+ // add gravity to velocity of rigid and multi bodys
+ applyRigidBodyGravity(timeStep);
+
+ ///apply gravity and explicit force to velocity, predict motion
+ predictUnconstraintMotion(timeStep);
+
+ ///perform collision detection that involves rigid/multi bodies
+ btMultiBodyDynamicsWorld::performDiscreteCollisionDetection();
+
+ btMultiBodyDynamicsWorld::calculateSimulationIslands();
+
+ beforeSolverCallbacks(timeStep);
+
+ ///solve contact constraints and then deformable bodies momemtum equation
+ solveConstraints(timeStep);
+
+ afterSolverCallbacks(timeStep);
performDeformableCollisionDetection();
- applyRepulsionForce(timeStep);
+ applyRepulsionForce(timeStep);
+
+ performGeometricCollisions(timeStep);
+
+ integrateTransforms(timeStep);
- performGeometricCollisions(timeStep);
+ ///update vehicle simulation
+ btMultiBodyDynamicsWorld::updateActions(timeStep);
- integrateTransforms(timeStep);
-
- ///update vehicle simulation
- btMultiBodyDynamicsWorld::updateActions(timeStep);
-
- updateActivationState(timeStep);
- // End solver-wise simulation step
- // ///////////////////////////////
+ updateActivationState(timeStep);
+ // End solver-wise simulation step
+ // ///////////////////////////////
}
void btDeformableMultiBodyDynamicsWorld::performDeformableCollisionDetection()
@@ -120,7 +121,7 @@ void btDeformableMultiBodyDynamicsWorld::performDeformableCollisionDetection()
{
m_softBodies[i]->m_softSoftCollision = true;
}
-
+
for (int i = 0; i < m_softBodies.size(); ++i)
{
for (int j = i; j < m_softBodies.size(); ++j)
@@ -128,7 +129,7 @@ void btDeformableMultiBodyDynamicsWorld::performDeformableCollisionDetection()
m_softBodies[i]->defaultCollisionHandler(m_softBodies[j]);
}
}
-
+
for (int i = 0; i < m_softBodies.size(); ++i)
{
m_softBodies[i]->m_softSoftCollision = false;
@@ -137,45 +138,45 @@ void btDeformableMultiBodyDynamicsWorld::performDeformableCollisionDetection()
void btDeformableMultiBodyDynamicsWorld::updateActivationState(btScalar timeStep)
{
- for (int i = 0; i < m_softBodies.size(); i++)
- {
- btSoftBody* psb = m_softBodies[i];
- psb->updateDeactivation(timeStep);
- if (psb->wantsSleeping())
- {
- if (psb->getActivationState() == ACTIVE_TAG)
- psb->setActivationState(WANTS_DEACTIVATION);
- if (psb->getActivationState() == ISLAND_SLEEPING)
- {
- psb->setZeroVelocity();
- }
- }
- else
- {
- if (psb->getActivationState() != DISABLE_DEACTIVATION)
- psb->setActivationState(ACTIVE_TAG);
- }
- }
- btMultiBodyDynamicsWorld::updateActivationState(timeStep);
+ for (int i = 0; i < m_softBodies.size(); i++)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ psb->updateDeactivation(timeStep);
+ if (psb->wantsSleeping())
+ {
+ if (psb->getActivationState() == ACTIVE_TAG)
+ psb->setActivationState(WANTS_DEACTIVATION);
+ if (psb->getActivationState() == ISLAND_SLEEPING)
+ {
+ psb->setZeroVelocity();
+ }
+ }
+ else
+ {
+ if (psb->getActivationState() != DISABLE_DEACTIVATION)
+ psb->setActivationState(ACTIVE_TAG);
+ }
+ }
+ btMultiBodyDynamicsWorld::updateActivationState(timeStep);
}
void btDeformableMultiBodyDynamicsWorld::applyRepulsionForce(btScalar timeStep)
{
- BT_PROFILE("btDeformableMultiBodyDynamicsWorld::applyRepulsionForce");
- for (int i = 0; i < m_softBodies.size(); i++)
- {
- btSoftBody* psb = m_softBodies[i];
- if (psb->isActive())
- {
+ BT_PROFILE("btDeformableMultiBodyDynamicsWorld::applyRepulsionForce");
+ for (int i = 0; i < m_softBodies.size(); i++)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (psb->isActive())
+ {
psb->applyRepulsionForce(timeStep, true);
- }
- }
+ }
+ }
}
void btDeformableMultiBodyDynamicsWorld::performGeometricCollisions(btScalar timeStep)
{
BT_PROFILE("btDeformableMultiBodyDynamicsWorld::performGeometricCollisions");
- // refit the BVH tree for CCD
+ // refit the BVH tree for CCD
for (int i = 0; i < m_softBodies.size(); ++i)
{
btSoftBody* psb = m_softBodies[i];
@@ -214,7 +215,7 @@ void btDeformableMultiBodyDynamicsWorld::performGeometricCollisions(btScalar tim
f.m_vn = (f.m_n[1]->m_v - f.m_n[0]->m_v).cross(f.m_n[2]->m_v - f.m_n[0]->m_v) * timeStep * timeStep;
}
}
- }
+ }
// apply CCD to register new contact points
for (int i = 0; i < m_softBodies.size(); ++i)
@@ -228,7 +229,7 @@ void btDeformableMultiBodyDynamicsWorld::performGeometricCollisions(btScalar tim
m_softBodies[i]->geometricCollisionHandler(m_softBodies[j]);
}
}
- }
+ }
int penetration_count = 0;
for (int i = 0; i < m_softBodies.size(); ++i)
@@ -258,294 +259,292 @@ void btDeformableMultiBodyDynamicsWorld::performGeometricCollisions(btScalar tim
void btDeformableMultiBodyDynamicsWorld::softBodySelfCollision()
{
- BT_PROFILE("btDeformableMultiBodyDynamicsWorld::softBodySelfCollision");
- for (int i = 0; i < m_softBodies.size(); i++)
- {
- btSoftBody* psb = m_softBodies[i];
- if (psb->isActive())
- {
- psb->defaultCollisionHandler(psb);
- }
- }
+ BT_PROFILE("btDeformableMultiBodyDynamicsWorld::softBodySelfCollision");
+ for (int i = 0; i < m_softBodies.size(); i++)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (psb->isActive())
+ {
+ psb->defaultCollisionHandler(psb);
+ }
+ }
}
void btDeformableMultiBodyDynamicsWorld::positionCorrection(btScalar timeStep)
{
- // correct the position of rigid bodies with temporary velocity generated from split impulse
- btContactSolverInfo infoGlobal;
- btVector3 zero(0,0,0);
- for (int i = 0; i < m_nonStaticRigidBodies.size(); ++i)
- {
- btRigidBody* rb = m_nonStaticRigidBodies[i];
- //correct the position/orientation based on push/turn recovery
- btTransform newTransform;
- btVector3 pushVelocity = rb->getPushVelocity();
- btVector3 turnVelocity = rb->getTurnVelocity();
- if (pushVelocity[0] != 0.f || pushVelocity[1] != 0 || pushVelocity[2] != 0 || turnVelocity[0] != 0.f || turnVelocity[1] != 0 || turnVelocity[2] != 0)
- {
- btTransformUtil::integrateTransform(rb->getWorldTransform(), pushVelocity, turnVelocity * infoGlobal.m_splitImpulseTurnErp, timeStep, newTransform);
- rb->setWorldTransform(newTransform);
- rb->setPushVelocity(zero);
- rb->setTurnVelocity(zero);
- }
- }
+ // correct the position of rigid bodies with temporary velocity generated from split impulse
+ btContactSolverInfo infoGlobal;
+ btVector3 zero(0, 0, 0);
+ for (int i = 0; i < m_nonStaticRigidBodies.size(); ++i)
+ {
+ btRigidBody* rb = m_nonStaticRigidBodies[i];
+ //correct the position/orientation based on push/turn recovery
+ btTransform newTransform;
+ btVector3 pushVelocity = rb->getPushVelocity();
+ btVector3 turnVelocity = rb->getTurnVelocity();
+ if (pushVelocity[0] != 0.f || pushVelocity[1] != 0 || pushVelocity[2] != 0 || turnVelocity[0] != 0.f || turnVelocity[1] != 0 || turnVelocity[2] != 0)
+ {
+ btTransformUtil::integrateTransform(rb->getWorldTransform(), pushVelocity, turnVelocity * infoGlobal.m_splitImpulseTurnErp, timeStep, newTransform);
+ rb->setWorldTransform(newTransform);
+ rb->setPushVelocity(zero);
+ rb->setTurnVelocity(zero);
+ }
+ }
}
void btDeformableMultiBodyDynamicsWorld::integrateTransforms(btScalar timeStep)
{
- BT_PROFILE("integrateTransforms");
- positionCorrection(timeStep);
- btMultiBodyDynamicsWorld::integrateTransforms(timeStep);
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- btSoftBody::Node& node = psb->m_nodes[j];
- btScalar maxDisplacement = psb->getWorldInfo()->m_maxDisplacement;
- btScalar clampDeltaV = maxDisplacement / timeStep;
- for (int c = 0; c < 3; c++)
- {
- if (node.m_v[c] > clampDeltaV)
- {
- node.m_v[c] = clampDeltaV;
- }
- if (node.m_v[c] < -clampDeltaV)
- {
- node.m_v[c] = -clampDeltaV;
- }
- }
- node.m_x = node.m_x + timeStep * node.m_v;
- node.m_q = node.m_x;
- node.m_vn = node.m_v;
- }
- // enforce anchor constraints
- for (int j = 0; j < psb->m_deformableAnchors.size();++j)
- {
- btSoftBody::DeformableNodeRigidAnchor& a = psb->m_deformableAnchors[j];
- btSoftBody::Node* n = a.m_node;
- n->m_x = a.m_cti.m_colObj->getWorldTransform() * a.m_local;
-
- // update multibody anchor info
- if (a.m_cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK)
- {
- btMultiBodyLinkCollider* multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(a.m_cti.m_colObj);
- if (multibodyLinkCol)
- {
- btVector3 nrm;
- const btCollisionShape* shp = multibodyLinkCol->getCollisionShape();
- const btTransform& wtr = multibodyLinkCol->getWorldTransform();
- psb->m_worldInfo->m_sparsesdf.Evaluate(
- wtr.invXform(n->m_x),
- shp,
- nrm,
- 0);
- a.m_cti.m_normal = wtr.getBasis() * nrm;
- btVector3 normal = a.m_cti.m_normal;
- btVector3 t1 = generateUnitOrthogonalVector(normal);
- btVector3 t2 = btCross(normal, t1);
- btMultiBodyJacobianData jacobianData_normal, jacobianData_t1, jacobianData_t2;
- findJacobian(multibodyLinkCol, jacobianData_normal, a.m_node->m_x, normal);
- findJacobian(multibodyLinkCol, jacobianData_t1, a.m_node->m_x, t1);
- findJacobian(multibodyLinkCol, jacobianData_t2, a.m_node->m_x, t2);
-
- btScalar* J_n = &jacobianData_normal.m_jacobians[0];
- btScalar* J_t1 = &jacobianData_t1.m_jacobians[0];
- btScalar* J_t2 = &jacobianData_t2.m_jacobians[0];
-
- btScalar* u_n = &jacobianData_normal.m_deltaVelocitiesUnitImpulse[0];
- btScalar* u_t1 = &jacobianData_t1.m_deltaVelocitiesUnitImpulse[0];
- btScalar* u_t2 = &jacobianData_t2.m_deltaVelocitiesUnitImpulse[0];
-
- btMatrix3x3 rot(normal.getX(), normal.getY(), normal.getZ(),
- t1.getX(), t1.getY(), t1.getZ(),
- t2.getX(), t2.getY(), t2.getZ()); // world frame to local frame
- const int ndof = multibodyLinkCol->m_multiBody->getNumDofs() + 6;
- btMatrix3x3 local_impulse_matrix = (Diagonal(n->m_im) + OuterProduct(J_n, J_t1, J_t2, u_n, u_t1, u_t2, ndof)).inverse();
- a.m_c0 = rot.transpose() * local_impulse_matrix * rot;
- a.jacobianData_normal = jacobianData_normal;
- a.jacobianData_t1 = jacobianData_t1;
- a.jacobianData_t2 = jacobianData_t2;
- a.t1 = t1;
- a.t2 = t2;
- }
- }
- }
- psb->interpolateRenderMesh();
- }
+ BT_PROFILE("integrateTransforms");
+ positionCorrection(timeStep);
+ btMultiBodyDynamicsWorld::integrateTransforms(timeStep);
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ btSoftBody::Node& node = psb->m_nodes[j];
+ btScalar maxDisplacement = psb->getWorldInfo()->m_maxDisplacement;
+ btScalar clampDeltaV = maxDisplacement / timeStep;
+ for (int c = 0; c < 3; c++)
+ {
+ if (node.m_v[c] > clampDeltaV)
+ {
+ node.m_v[c] = clampDeltaV;
+ }
+ if (node.m_v[c] < -clampDeltaV)
+ {
+ node.m_v[c] = -clampDeltaV;
+ }
+ }
+ node.m_x = node.m_x + timeStep * (node.m_v + node.m_splitv);
+ node.m_q = node.m_x;
+ node.m_vn = node.m_v;
+ }
+ // enforce anchor constraints
+ for (int j = 0; j < psb->m_deformableAnchors.size(); ++j)
+ {
+ btSoftBody::DeformableNodeRigidAnchor& a = psb->m_deformableAnchors[j];
+ btSoftBody::Node* n = a.m_node;
+ n->m_x = a.m_cti.m_colObj->getWorldTransform() * a.m_local;
+
+ // update multibody anchor info
+ if (a.m_cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK)
+ {
+ btMultiBodyLinkCollider* multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(a.m_cti.m_colObj);
+ if (multibodyLinkCol)
+ {
+ btVector3 nrm;
+ const btCollisionShape* shp = multibodyLinkCol->getCollisionShape();
+ const btTransform& wtr = multibodyLinkCol->getWorldTransform();
+ psb->m_worldInfo->m_sparsesdf.Evaluate(
+ wtr.invXform(n->m_x),
+ shp,
+ nrm,
+ 0);
+ a.m_cti.m_normal = wtr.getBasis() * nrm;
+ btVector3 normal = a.m_cti.m_normal;
+ btVector3 t1 = generateUnitOrthogonalVector(normal);
+ btVector3 t2 = btCross(normal, t1);
+ btMultiBodyJacobianData jacobianData_normal, jacobianData_t1, jacobianData_t2;
+ findJacobian(multibodyLinkCol, jacobianData_normal, a.m_node->m_x, normal);
+ findJacobian(multibodyLinkCol, jacobianData_t1, a.m_node->m_x, t1);
+ findJacobian(multibodyLinkCol, jacobianData_t2, a.m_node->m_x, t2);
+
+ btScalar* J_n = &jacobianData_normal.m_jacobians[0];
+ btScalar* J_t1 = &jacobianData_t1.m_jacobians[0];
+ btScalar* J_t2 = &jacobianData_t2.m_jacobians[0];
+
+ btScalar* u_n = &jacobianData_normal.m_deltaVelocitiesUnitImpulse[0];
+ btScalar* u_t1 = &jacobianData_t1.m_deltaVelocitiesUnitImpulse[0];
+ btScalar* u_t2 = &jacobianData_t2.m_deltaVelocitiesUnitImpulse[0];
+
+ btMatrix3x3 rot(normal.getX(), normal.getY(), normal.getZ(),
+ t1.getX(), t1.getY(), t1.getZ(),
+ t2.getX(), t2.getY(), t2.getZ()); // world frame to local frame
+ const int ndof = multibodyLinkCol->m_multiBody->getNumDofs() + 6;
+ btMatrix3x3 local_impulse_matrix = (Diagonal(n->m_im) + OuterProduct(J_n, J_t1, J_t2, u_n, u_t1, u_t2, ndof)).inverse();
+ a.m_c0 = rot.transpose() * local_impulse_matrix * rot;
+ a.jacobianData_normal = jacobianData_normal;
+ a.jacobianData_t1 = jacobianData_t1;
+ a.jacobianData_t2 = jacobianData_t2;
+ a.t1 = t1;
+ a.t2 = t2;
+ }
+ }
+ }
+ psb->interpolateRenderMesh();
+ }
}
void btDeformableMultiBodyDynamicsWorld::solveConstraints(btScalar timeStep)
{
- BT_PROFILE("btDeformableMultiBodyDynamicsWorld::solveConstraints");
- // save v_{n+1}^* velocity after explicit forces
- m_deformableBodySolver->backupVelocity();
-
- // set up constraints among multibodies and between multibodies and deformable bodies
- setupConstraints();
-
- // solve contact constraints
- solveContactConstraints();
-
- // set up the directions in which the velocity does not change in the momentum solve
- if (m_useProjection)
- m_deformableBodySolver->m_objective->m_projection.setProjection();
- else
- m_deformableBodySolver->m_objective->m_projection.setLagrangeMultiplier();
-
- // for explicit scheme, m_backupVelocity = v_{n+1}^*
- // for implicit scheme, m_backupVelocity = v_n
- // Here, set dv = v_{n+1} - v_n for nodes in contact
- m_deformableBodySolver->setupDeformableSolve(m_implicit);
-
- // At this point, dv should be golden for nodes in contact
- // proceed to solve deformable momentum equation
- m_deformableBodySolver->solveDeformableConstraints(timeStep);
+ BT_PROFILE("btDeformableMultiBodyDynamicsWorld::solveConstraints");
+ // save v_{n+1}^* velocity after explicit forces
+ m_deformableBodySolver->backupVelocity();
+
+ // set up constraints among multibodies and between multibodies and deformable bodies
+ setupConstraints();
+
+ // solve contact constraints
+ solveContactConstraints();
+
+ // set up the directions in which the velocity does not change in the momentum solve
+ if (m_useProjection)
+ m_deformableBodySolver->m_objective->m_projection.setProjection();
+ else
+ m_deformableBodySolver->m_objective->m_projection.setLagrangeMultiplier();
+
+ // for explicit scheme, m_backupVelocity = v_{n+1}^*
+ // for implicit scheme, m_backupVelocity = v_n
+ // Here, set dv = v_{n+1} - v_n for nodes in contact
+ m_deformableBodySolver->setupDeformableSolve(m_implicit);
+
+ // At this point, dv should be golden for nodes in contact
+ // proceed to solve deformable momentum equation
+ m_deformableBodySolver->solveDeformableConstraints(timeStep);
}
void btDeformableMultiBodyDynamicsWorld::setupConstraints()
{
- // set up constraints between multibody and deformable bodies
- m_deformableBodySolver->setConstraints(m_solverInfo);
-
- // set up constraints among multibodies
- {
- sortConstraints();
- // setup the solver callback
- btMultiBodyConstraint** sortedMultiBodyConstraints = m_sortedMultiBodyConstraints.size() ? &m_sortedMultiBodyConstraints[0] : 0;
- btTypedConstraint** constraintsPtr = getNumConstraints() ? &m_sortedConstraints[0] : 0;
- m_solverDeformableBodyIslandCallback->setup(&m_solverInfo, constraintsPtr, m_sortedConstraints.size(), sortedMultiBodyConstraints, m_sortedMultiBodyConstraints.size(), getDebugDrawer());
-
- // build islands
- m_islandManager->buildIslands(getCollisionWorld()->getDispatcher(), getCollisionWorld());
- }
+ // set up constraints between multibody and deformable bodies
+ m_deformableBodySolver->setConstraints(m_solverInfo);
+
+ // set up constraints among multibodies
+ {
+ sortConstraints();
+ // setup the solver callback
+ btMultiBodyConstraint** sortedMultiBodyConstraints = m_sortedMultiBodyConstraints.size() ? &m_sortedMultiBodyConstraints[0] : 0;
+ btTypedConstraint** constraintsPtr = getNumConstraints() ? &m_sortedConstraints[0] : 0;
+ m_solverDeformableBodyIslandCallback->setup(&m_solverInfo, constraintsPtr, m_sortedConstraints.size(), sortedMultiBodyConstraints, m_sortedMultiBodyConstraints.size(), getDebugDrawer());
+
+ // build islands
+ m_islandManager->buildIslands(getCollisionWorld()->getDispatcher(), getCollisionWorld());
+ }
}
void btDeformableMultiBodyDynamicsWorld::sortConstraints()
{
- m_sortedConstraints.resize(m_constraints.size());
- int i;
- for (i = 0; i < getNumConstraints(); i++)
- {
- m_sortedConstraints[i] = m_constraints[i];
- }
- m_sortedConstraints.quickSort(btSortConstraintOnIslandPredicate2());
-
- m_sortedMultiBodyConstraints.resize(m_multiBodyConstraints.size());
- for (i = 0; i < m_multiBodyConstraints.size(); i++)
- {
- m_sortedMultiBodyConstraints[i] = m_multiBodyConstraints[i];
- }
- m_sortedMultiBodyConstraints.quickSort(btSortMultiBodyConstraintOnIslandPredicate());
+ m_sortedConstraints.resize(m_constraints.size());
+ int i;
+ for (i = 0; i < getNumConstraints(); i++)
+ {
+ m_sortedConstraints[i] = m_constraints[i];
+ }
+ m_sortedConstraints.quickSort(btSortConstraintOnIslandPredicate2());
+
+ m_sortedMultiBodyConstraints.resize(m_multiBodyConstraints.size());
+ for (i = 0; i < m_multiBodyConstraints.size(); i++)
+ {
+ m_sortedMultiBodyConstraints[i] = m_multiBodyConstraints[i];
+ }
+ m_sortedMultiBodyConstraints.quickSort(btSortMultiBodyConstraintOnIslandPredicate());
}
-
-
+
void btDeformableMultiBodyDynamicsWorld::solveContactConstraints()
{
- // process constraints on each island
- m_islandManager->processIslands(getCollisionWorld()->getDispatcher(), getCollisionWorld(), m_solverDeformableBodyIslandCallback);
-
- // process deferred
- m_solverDeformableBodyIslandCallback->processConstraints();
- m_constraintSolver->allSolved(m_solverInfo, m_debugDrawer);
-
- // write joint feedback
- {
- for (int i = 0; i < this->m_multiBodies.size(); i++)
- {
- btMultiBody* bod = m_multiBodies[i];
-
- bool isSleeping = false;
-
- if (bod->getBaseCollider() && bod->getBaseCollider()->getActivationState() == ISLAND_SLEEPING)
- {
- isSleeping = true;
- }
- for (int b = 0; b < bod->getNumLinks(); b++)
- {
- if (bod->getLink(b).m_collider && bod->getLink(b).m_collider->getActivationState() == ISLAND_SLEEPING)
- isSleeping = true;
- }
-
- if (!isSleeping)
- {
- //useless? they get resized in stepVelocities once again (AND DIFFERENTLY)
- m_scratch_r.resize(bod->getNumLinks() + 1); //multidof? ("Y"s use it and it is used to store qdd)
- m_scratch_v.resize(bod->getNumLinks() + 1);
- m_scratch_m.resize(bod->getNumLinks() + 1);
-
- if (bod->internalNeedsJointFeedback())
- {
- if (!bod->isUsingRK4Integration())
- {
- if (bod->internalNeedsJointFeedback())
- {
- bool isConstraintPass = true;
- bod->computeAccelerationsArticulatedBodyAlgorithmMultiDof(m_solverInfo.m_timeStep, m_scratch_r, m_scratch_v, m_scratch_m, isConstraintPass,
- getSolverInfo().m_jointFeedbackInWorldSpace,
- getSolverInfo().m_jointFeedbackInJointFrame);
- }
- }
- }
- }
- }
- }
-
- for (int i = 0; i < this->m_multiBodies.size(); i++)
- {
- btMultiBody* bod = m_multiBodies[i];
- bod->processDeltaVeeMultiDof2();
- }
+ // process constraints on each island
+ m_islandManager->processIslands(getCollisionWorld()->getDispatcher(), getCollisionWorld(), m_solverDeformableBodyIslandCallback);
+
+ // process deferred
+ m_solverDeformableBodyIslandCallback->processConstraints();
+ m_constraintSolver->allSolved(m_solverInfo, m_debugDrawer);
+
+ // write joint feedback
+ {
+ for (int i = 0; i < this->m_multiBodies.size(); i++)
+ {
+ btMultiBody* bod = m_multiBodies[i];
+
+ bool isSleeping = false;
+
+ if (bod->getBaseCollider() && bod->getBaseCollider()->getActivationState() == ISLAND_SLEEPING)
+ {
+ isSleeping = true;
+ }
+ for (int b = 0; b < bod->getNumLinks(); b++)
+ {
+ if (bod->getLink(b).m_collider && bod->getLink(b).m_collider->getActivationState() == ISLAND_SLEEPING)
+ isSleeping = true;
+ }
+
+ if (!isSleeping)
+ {
+ //useless? they get resized in stepVelocities once again (AND DIFFERENTLY)
+ m_scratch_r.resize(bod->getNumLinks() + 1); //multidof? ("Y"s use it and it is used to store qdd)
+ m_scratch_v.resize(bod->getNumLinks() + 1);
+ m_scratch_m.resize(bod->getNumLinks() + 1);
+
+ if (bod->internalNeedsJointFeedback())
+ {
+ if (!bod->isUsingRK4Integration())
+ {
+ if (bod->internalNeedsJointFeedback())
+ {
+ bool isConstraintPass = true;
+ bod->computeAccelerationsArticulatedBodyAlgorithmMultiDof(m_solverInfo.m_timeStep, m_scratch_r, m_scratch_v, m_scratch_m, isConstraintPass,
+ getSolverInfo().m_jointFeedbackInWorldSpace,
+ getSolverInfo().m_jointFeedbackInJointFrame);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ for (int i = 0; i < this->m_multiBodies.size(); i++)
+ {
+ btMultiBody* bod = m_multiBodies[i];
+ bod->processDeltaVeeMultiDof2();
+ }
}
void btDeformableMultiBodyDynamicsWorld::addSoftBody(btSoftBody* body, int collisionFilterGroup, int collisionFilterMask)
{
- m_softBodies.push_back(body);
-
- // Set the soft body solver that will deal with this body
- // to be the world's solver
- body->setSoftBodySolver(m_deformableBodySolver);
-
- btCollisionWorld::addCollisionObject(body,
- collisionFilterGroup,
- collisionFilterMask);
+ m_softBodies.push_back(body);
+
+ // Set the soft body solver that will deal with this body
+ // to be the world's solver
+ body->setSoftBodySolver(m_deformableBodySolver);
+
+ btCollisionWorld::addCollisionObject(body,
+ collisionFilterGroup,
+ collisionFilterMask);
}
void btDeformableMultiBodyDynamicsWorld::predictUnconstraintMotion(btScalar timeStep)
{
- BT_PROFILE("predictUnconstraintMotion");
- btMultiBodyDynamicsWorld::predictUnconstraintMotion(timeStep);
- m_deformableBodySolver->predictMotion(timeStep);
+ BT_PROFILE("predictUnconstraintMotion");
+ btMultiBodyDynamicsWorld::predictUnconstraintMotion(timeStep);
+ m_deformableBodySolver->predictMotion(timeStep);
}
void btDeformableMultiBodyDynamicsWorld::reinitialize(btScalar timeStep)
{
- m_internalTime += timeStep;
- m_deformableBodySolver->setImplicit(m_implicit);
- m_deformableBodySolver->setLineSearch(m_lineSearch);
- m_deformableBodySolver->reinitialize(m_softBodies, timeStep);
- btDispatcherInfo& dispatchInfo = btMultiBodyDynamicsWorld::getDispatchInfo();
- dispatchInfo.m_timeStep = timeStep;
- dispatchInfo.m_stepCount = 0;
- dispatchInfo.m_debugDraw = btMultiBodyDynamicsWorld::getDebugDrawer();
- btMultiBodyDynamicsWorld::getSolverInfo().m_timeStep = timeStep;
- if (m_useProjection)
- {
- m_deformableBodySolver->m_useProjection = true;
-// m_deformableBodySolver->m_objective->m_projection.m_useStrainLimiting = true;
- m_deformableBodySolver->m_objective->m_preconditioner = m_deformableBodySolver->m_objective->m_massPreconditioner;
- }
- else
- {
- m_deformableBodySolver->m_objective->m_preconditioner = m_deformableBodySolver->m_objective->m_KKTPreconditioner;
- }
-
+ m_internalTime += timeStep;
+ m_deformableBodySolver->setImplicit(m_implicit);
+ m_deformableBodySolver->setLineSearch(m_lineSearch);
+ m_deformableBodySolver->reinitialize(m_softBodies, timeStep);
+ btDispatcherInfo& dispatchInfo = btMultiBodyDynamicsWorld::getDispatchInfo();
+ dispatchInfo.m_timeStep = timeStep;
+ dispatchInfo.m_stepCount = 0;
+ dispatchInfo.m_debugDraw = btMultiBodyDynamicsWorld::getDebugDrawer();
+ btMultiBodyDynamicsWorld::getSolverInfo().m_timeStep = timeStep;
+ if (m_useProjection)
+ {
+ m_deformableBodySolver->m_useProjection = true;
+ m_deformableBodySolver->m_objective->m_projection.m_useStrainLimiting = true;
+ m_deformableBodySolver->m_objective->m_preconditioner = m_deformableBodySolver->m_objective->m_massPreconditioner;
+ }
+ else
+ {
+ m_deformableBodySolver->m_useProjection = false;
+ m_deformableBodySolver->m_objective->m_projection.m_useStrainLimiting = false;
+ m_deformableBodySolver->m_objective->m_preconditioner = m_deformableBodySolver->m_objective->m_KKTPreconditioner;
+ }
}
-
void btDeformableMultiBodyDynamicsWorld::debugDrawWorld()
{
-
btMultiBodyDynamicsWorld::debugDrawWorld();
for (int i = 0; i < getSoftBodyArray().size(); i++)
@@ -556,253 +555,260 @@ void btDeformableMultiBodyDynamicsWorld::debugDrawWorld()
btSoftBodyHelpers::Draw(psb, getDebugDrawer(), getDrawFlags());
}
}
-
-
}
void btDeformableMultiBodyDynamicsWorld::applyRigidBodyGravity(btScalar timeStep)
{
- // Gravity is applied in stepSimulation and then cleared here and then applied here and then cleared here again
- // so that 1) gravity is applied to velocity before constraint solve and 2) gravity is applied in each substep
- // when there are multiple substeps
- btMultiBodyDynamicsWorld::applyGravity();
- // integrate rigid body gravity
- for (int i = 0; i < m_nonStaticRigidBodies.size(); ++i)
- {
- btRigidBody* rb = m_nonStaticRigidBodies[i];
- rb->integrateVelocities(timeStep);
- }
-
- // integrate multibody gravity
- {
- forwardKinematics();
- clearMultiBodyConstraintForces();
- {
- for (int i = 0; i < this->m_multiBodies.size(); i++)
- {
- btMultiBody* bod = m_multiBodies[i];
-
- bool isSleeping = false;
-
- if (bod->getBaseCollider() && bod->getBaseCollider()->getActivationState() == ISLAND_SLEEPING)
- {
- isSleeping = true;
- }
- for (int b = 0; b < bod->getNumLinks(); b++)
- {
- if (bod->getLink(b).m_collider && bod->getLink(b).m_collider->getActivationState() == ISLAND_SLEEPING)
- isSleeping = true;
- }
-
- if (!isSleeping)
- {
- m_scratch_r.resize(bod->getNumLinks() + 1);
- m_scratch_v.resize(bod->getNumLinks() + 1);
- m_scratch_m.resize(bod->getNumLinks() + 1);
- bool isConstraintPass = false;
- {
- if (!bod->isUsingRK4Integration())
- {
- bod->computeAccelerationsArticulatedBodyAlgorithmMultiDof(m_solverInfo.m_timeStep,
- m_scratch_r, m_scratch_v, m_scratch_m,isConstraintPass,
- getSolverInfo().m_jointFeedbackInWorldSpace,
- getSolverInfo().m_jointFeedbackInJointFrame);
- }
- else
- {
- btAssert(" RK4Integration is not supported" );
- }
- }
- }
- }
- }
- }
- clearGravity();
+ // Gravity is applied in stepSimulation and then cleared here and then applied here and then cleared here again
+ // so that 1) gravity is applied to velocity before constraint solve and 2) gravity is applied in each substep
+ // when there are multiple substeps
+ btMultiBodyDynamicsWorld::applyGravity();
+ // integrate rigid body gravity
+ for (int i = 0; i < m_nonStaticRigidBodies.size(); ++i)
+ {
+ btRigidBody* rb = m_nonStaticRigidBodies[i];
+ rb->integrateVelocities(timeStep);
+ }
+
+ // integrate multibody gravity
+ {
+ forwardKinematics();
+ clearMultiBodyConstraintForces();
+ {
+ for (int i = 0; i < this->m_multiBodies.size(); i++)
+ {
+ btMultiBody* bod = m_multiBodies[i];
+
+ bool isSleeping = false;
+
+ if (bod->getBaseCollider() && bod->getBaseCollider()->getActivationState() == ISLAND_SLEEPING)
+ {
+ isSleeping = true;
+ }
+ for (int b = 0; b < bod->getNumLinks(); b++)
+ {
+ if (bod->getLink(b).m_collider && bod->getLink(b).m_collider->getActivationState() == ISLAND_SLEEPING)
+ isSleeping = true;
+ }
+
+ if (!isSleeping)
+ {
+ m_scratch_r.resize(bod->getNumLinks() + 1);
+ m_scratch_v.resize(bod->getNumLinks() + 1);
+ m_scratch_m.resize(bod->getNumLinks() + 1);
+ bool isConstraintPass = false;
+ {
+ if (!bod->isUsingRK4Integration())
+ {
+ bod->computeAccelerationsArticulatedBodyAlgorithmMultiDof(m_solverInfo.m_timeStep,
+ m_scratch_r, m_scratch_v, m_scratch_m, isConstraintPass,
+ getSolverInfo().m_jointFeedbackInWorldSpace,
+ getSolverInfo().m_jointFeedbackInJointFrame);
+ }
+ else
+ {
+ btAssert(" RK4Integration is not supported");
+ }
+ }
+ }
+ }
+ }
+ }
+ clearGravity();
}
void btDeformableMultiBodyDynamicsWorld::clearGravity()
{
- BT_PROFILE("btMultiBody clearGravity");
- // clear rigid body gravity
- for (int i = 0; i < m_nonStaticRigidBodies.size(); i++)
- {
- btRigidBody* body = m_nonStaticRigidBodies[i];
- if (body->isActive())
- {
- body->clearGravity();
- }
- }
- // clear multibody gravity
- for (int i = 0; i < this->m_multiBodies.size(); i++)
- {
- btMultiBody* bod = m_multiBodies[i];
-
- bool isSleeping = false;
-
- if (bod->getBaseCollider() && bod->getBaseCollider()->getActivationState() == ISLAND_SLEEPING)
- {
- isSleeping = true;
- }
- for (int b = 0; b < bod->getNumLinks(); b++)
- {
- if (bod->getLink(b).m_collider && bod->getLink(b).m_collider->getActivationState() == ISLAND_SLEEPING)
- isSleeping = true;
- }
-
- if (!isSleeping)
- {
- bod->addBaseForce(-m_gravity * bod->getBaseMass());
-
- for (int j = 0; j < bod->getNumLinks(); ++j)
- {
- bod->addLinkForce(j, -m_gravity * bod->getLinkMass(j));
- }
- }
- }
+ BT_PROFILE("btMultiBody clearGravity");
+ // clear rigid body gravity
+ for (int i = 0; i < m_nonStaticRigidBodies.size(); i++)
+ {
+ btRigidBody* body = m_nonStaticRigidBodies[i];
+ if (body->isActive())
+ {
+ body->clearGravity();
+ }
+ }
+ // clear multibody gravity
+ for (int i = 0; i < this->m_multiBodies.size(); i++)
+ {
+ btMultiBody* bod = m_multiBodies[i];
+
+ bool isSleeping = false;
+
+ if (bod->getBaseCollider() && bod->getBaseCollider()->getActivationState() == ISLAND_SLEEPING)
+ {
+ isSleeping = true;
+ }
+ for (int b = 0; b < bod->getNumLinks(); b++)
+ {
+ if (bod->getLink(b).m_collider && bod->getLink(b).m_collider->getActivationState() == ISLAND_SLEEPING)
+ isSleeping = true;
+ }
+
+ if (!isSleeping)
+ {
+ bod->addBaseForce(-m_gravity * bod->getBaseMass());
+
+ for (int j = 0; j < bod->getNumLinks(); ++j)
+ {
+ bod->addLinkForce(j, -m_gravity * bod->getLinkMass(j));
+ }
+ }
+ }
}
void btDeformableMultiBodyDynamicsWorld::beforeSolverCallbacks(btScalar timeStep)
{
- if (0 != m_internalTickCallback)
- {
- (*m_internalTickCallback)(this, timeStep);
- }
-
- if (0 != m_solverCallback)
- {
- (*m_solverCallback)(m_internalTime, this);
- }
+ if (0 != m_internalTickCallback)
+ {
+ (*m_internalTickCallback)(this, timeStep);
+ }
+
+ if (0 != m_solverCallback)
+ {
+ (*m_solverCallback)(m_internalTime, this);
+ }
}
void btDeformableMultiBodyDynamicsWorld::afterSolverCallbacks(btScalar timeStep)
{
- if (0 != m_solverCallback)
- {
- (*m_solverCallback)(m_internalTime, this);
- }
+ if (0 != m_solverCallback)
+ {
+ (*m_solverCallback)(m_internalTime, this);
+ }
}
void btDeformableMultiBodyDynamicsWorld::addForce(btSoftBody* psb, btDeformableLagrangianForce* force)
{
- btAlignedObjectArray<btDeformableLagrangianForce*>& forces = m_deformableBodySolver->m_objective->m_lf;
- bool added = false;
- for (int i = 0; i < forces.size(); ++i)
- {
- if (forces[i]->getForceType() == force->getForceType())
- {
- forces[i]->addSoftBody(psb);
- added = true;
- break;
- }
- }
- if (!added)
- {
- force->addSoftBody(psb);
- force->setIndices(m_deformableBodySolver->m_objective->getIndices());
- forces.push_back(force);
- }
+ btAlignedObjectArray<btDeformableLagrangianForce*>& forces = m_deformableBodySolver->m_objective->m_lf;
+ bool added = false;
+ for (int i = 0; i < forces.size(); ++i)
+ {
+ if (forces[i]->getForceType() == force->getForceType())
+ {
+ forces[i]->addSoftBody(psb);
+ added = true;
+ break;
+ }
+ }
+ if (!added)
+ {
+ force->addSoftBody(psb);
+ force->setIndices(m_deformableBodySolver->m_objective->getIndices());
+ forces.push_back(force);
+ }
}
void btDeformableMultiBodyDynamicsWorld::removeForce(btSoftBody* psb, btDeformableLagrangianForce* force)
{
- btAlignedObjectArray<btDeformableLagrangianForce*>& forces = m_deformableBodySolver->m_objective->m_lf;
- int removed_index = -1;
- for (int i = 0; i < forces.size(); ++i)
- {
- if (forces[i]->getForceType() == force->getForceType())
- {
- forces[i]->removeSoftBody(psb);
- if (forces[i]->m_softBodies.size() == 0)
- removed_index = i;
- break;
- }
- }
- if (removed_index >= 0)
- forces.removeAtIndex(removed_index);
+ btAlignedObjectArray<btDeformableLagrangianForce*>& forces = m_deformableBodySolver->m_objective->m_lf;
+ int removed_index = -1;
+ for (int i = 0; i < forces.size(); ++i)
+ {
+ if (forces[i]->getForceType() == force->getForceType())
+ {
+ forces[i]->removeSoftBody(psb);
+ if (forces[i]->m_softBodies.size() == 0)
+ removed_index = i;
+ break;
+ }
+ }
+ if (removed_index >= 0)
+ forces.removeAtIndex(removed_index);
+}
+
+void btDeformableMultiBodyDynamicsWorld::removeSoftBodyForce(btSoftBody* psb)
+{
+ btAlignedObjectArray<btDeformableLagrangianForce*>& forces = m_deformableBodySolver->m_objective->m_lf;
+ for (int i = 0; i < forces.size(); ++i)
+ {
+ forces[i]->removeSoftBody(psb);
+ }
}
void btDeformableMultiBodyDynamicsWorld::removeSoftBody(btSoftBody* body)
{
- m_softBodies.remove(body);
- btCollisionWorld::removeCollisionObject(body);
- // force a reinitialize so that node indices get updated.
- m_deformableBodySolver->reinitialize(m_softBodies, btScalar(-1));
+ removeSoftBodyForce(body);
+ m_softBodies.remove(body);
+ btCollisionWorld::removeCollisionObject(body);
+ // force a reinitialize so that node indices get updated.
+ m_deformableBodySolver->reinitialize(m_softBodies, btScalar(-1));
}
void btDeformableMultiBodyDynamicsWorld::removeCollisionObject(btCollisionObject* collisionObject)
{
- btSoftBody* body = btSoftBody::upcast(collisionObject);
- if (body)
- removeSoftBody(body);
- else
- btDiscreteDynamicsWorld::removeCollisionObject(collisionObject);
+ btSoftBody* body = btSoftBody::upcast(collisionObject);
+ if (body)
+ removeSoftBody(body);
+ else
+ btDiscreteDynamicsWorld::removeCollisionObject(collisionObject);
}
-
int btDeformableMultiBodyDynamicsWorld::stepSimulation(btScalar timeStep, int maxSubSteps, btScalar fixedTimeStep)
{
- startProfiling(timeStep);
-
- int numSimulationSubSteps = 0;
-
- if (maxSubSteps)
- {
- //fixed timestep with interpolation
- m_fixedTimeStep = fixedTimeStep;
- m_localTime += timeStep;
- if (m_localTime >= fixedTimeStep)
- {
- numSimulationSubSteps = int(m_localTime / fixedTimeStep);
- m_localTime -= numSimulationSubSteps * fixedTimeStep;
- }
- }
- else
- {
- //variable timestep
- fixedTimeStep = timeStep;
- m_localTime = m_latencyMotionStateInterpolation ? 0 : timeStep;
- m_fixedTimeStep = 0;
- if (btFuzzyZero(timeStep))
- {
- numSimulationSubSteps = 0;
- maxSubSteps = 0;
- }
- else
- {
- numSimulationSubSteps = 1;
- maxSubSteps = 1;
- }
- }
-
- //process some debugging flags
- if (getDebugDrawer())
- {
- btIDebugDraw* debugDrawer = getDebugDrawer();
- gDisableDeactivation = (debugDrawer->getDebugMode() & btIDebugDraw::DBG_NoDeactivation) != 0;
- }
- if (numSimulationSubSteps)
- {
- //clamp the number of substeps, to prevent simulation grinding spiralling down to a halt
- int clampedSimulationSteps = (numSimulationSubSteps > maxSubSteps) ? maxSubSteps : numSimulationSubSteps;
-
- saveKinematicState(fixedTimeStep * clampedSimulationSteps);
-
- for (int i = 0; i < clampedSimulationSteps; i++)
- {
- internalSingleStepSimulation(fixedTimeStep);
- synchronizeMotionStates();
- }
- }
- else
- {
- synchronizeMotionStates();
- }
-
- clearForces();
-
+ startProfiling(timeStep);
+
+ int numSimulationSubSteps = 0;
+
+ if (maxSubSteps)
+ {
+ //fixed timestep with interpolation
+ m_fixedTimeStep = fixedTimeStep;
+ m_localTime += timeStep;
+ if (m_localTime >= fixedTimeStep)
+ {
+ numSimulationSubSteps = int(m_localTime / fixedTimeStep);
+ m_localTime -= numSimulationSubSteps * fixedTimeStep;
+ }
+ }
+ else
+ {
+ //variable timestep
+ fixedTimeStep = timeStep;
+ m_localTime = m_latencyMotionStateInterpolation ? 0 : timeStep;
+ m_fixedTimeStep = 0;
+ if (btFuzzyZero(timeStep))
+ {
+ numSimulationSubSteps = 0;
+ maxSubSteps = 0;
+ }
+ else
+ {
+ numSimulationSubSteps = 1;
+ maxSubSteps = 1;
+ }
+ }
+
+ //process some debugging flags
+ if (getDebugDrawer())
+ {
+ btIDebugDraw* debugDrawer = getDebugDrawer();
+ gDisableDeactivation = (debugDrawer->getDebugMode() & btIDebugDraw::DBG_NoDeactivation) != 0;
+ }
+ if (numSimulationSubSteps)
+ {
+ //clamp the number of substeps, to prevent simulation grinding spiralling down to a halt
+ int clampedSimulationSteps = (numSimulationSubSteps > maxSubSteps) ? maxSubSteps : numSimulationSubSteps;
+
+ saveKinematicState(fixedTimeStep * clampedSimulationSteps);
+
+ for (int i = 0; i < clampedSimulationSteps; i++)
+ {
+ internalSingleStepSimulation(fixedTimeStep);
+ synchronizeMotionStates();
+ }
+ }
+ else
+ {
+ synchronizeMotionStates();
+ }
+
+ clearForces();
+
#ifndef BT_NO_PROFILE
- CProfileManager::Increment_Frame_Counter();
+ CProfileManager::Increment_Frame_Counter();
#endif //BT_NO_PROFILE
-
- return numSimulationSubSteps;
+
+ return numSimulationSubSteps;
}
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.h b/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.h
index 76b58a0378..4b7069aac7 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.h
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.h
@@ -36,185 +36,192 @@ typedef btAlignedObjectArray<btSoftBody*> btSoftBodyArray;
class btDeformableMultiBodyDynamicsWorld : public btMultiBodyDynamicsWorld
{
- typedef btAlignedObjectArray<btVector3> TVStack;
- ///Solver classes that encapsulate multiple deformable bodies for solving
- btDeformableBodySolver* m_deformableBodySolver;
- btSoftBodyArray m_softBodies;
- int m_drawFlags;
- bool m_drawNodeTree;
- bool m_drawFaceTree;
- bool m_drawClusterTree;
- btSoftBodyWorldInfo m_sbi;
- btScalar m_internalTime;
- int m_ccdIterations;
- bool m_implicit;
- bool m_lineSearch;
- bool m_useProjection;
- DeformableBodyInplaceSolverIslandCallback* m_solverDeformableBodyIslandCallback;
-
- typedef void (*btSolverCallback)(btScalar time, btDeformableMultiBodyDynamicsWorld* world);
- btSolverCallback m_solverCallback;
-
+ typedef btAlignedObjectArray<btVector3> TVStack;
+ ///Solver classes that encapsulate multiple deformable bodies for solving
+ btDeformableBodySolver* m_deformableBodySolver;
+ btSoftBodyArray m_softBodies;
+ int m_drawFlags;
+ bool m_drawNodeTree;
+ bool m_drawFaceTree;
+ bool m_drawClusterTree;
+ btSoftBodyWorldInfo m_sbi;
+ btScalar m_internalTime;
+ int m_ccdIterations;
+ bool m_implicit;
+ bool m_lineSearch;
+ bool m_useProjection;
+ DeformableBodyInplaceSolverIslandCallback* m_solverDeformableBodyIslandCallback;
+
+ typedef void (*btSolverCallback)(btScalar time, btDeformableMultiBodyDynamicsWorld* world);
+ btSolverCallback m_solverCallback;
+
protected:
- virtual void internalSingleStepSimulation(btScalar timeStep);
-
- virtual void integrateTransforms(btScalar timeStep);
-
- void positionCorrection(btScalar timeStep);
-
- void solveConstraints(btScalar timeStep);
-
- void updateActivationState(btScalar timeStep);
-
- void clearGravity();
-
+ virtual void internalSingleStepSimulation(btScalar timeStep);
+
+ virtual void integrateTransforms(btScalar timeStep);
+
+ void positionCorrection(btScalar timeStep);
+
+ void solveConstraints(btScalar timeStep);
+
+ void updateActivationState(btScalar timeStep);
+
+ void clearGravity();
+
public:
btDeformableMultiBodyDynamicsWorld(btDispatcher* dispatcher, btBroadphaseInterface* pairCache, btDeformableMultiBodyConstraintSolver* constraintSolver, btCollisionConfiguration* collisionConfiguration, btDeformableBodySolver* deformableBodySolver = 0);
- virtual int stepSimulation(btScalar timeStep, int maxSubSteps = 1, btScalar fixedTimeStep = btScalar(1.) / btScalar(60.));
+ virtual int stepSimulation(btScalar timeStep, int maxSubSteps = 1, btScalar fixedTimeStep = btScalar(1.) / btScalar(60.));
virtual void debugDrawWorld();
- void setSolverCallback(btSolverCallback cb)
- {
- m_solverCallback = cb;
- }
-
- virtual ~btDeformableMultiBodyDynamicsWorld();
-
- virtual btMultiBodyDynamicsWorld* getMultiBodyDynamicsWorld()
- {
- return (btMultiBodyDynamicsWorld*)(this);
- }
-
- virtual const btMultiBodyDynamicsWorld* getMultiBodyDynamicsWorld() const
- {
- return (const btMultiBodyDynamicsWorld*)(this);
- }
-
- virtual btDynamicsWorldType getWorldType() const
- {
- return BT_DEFORMABLE_MULTIBODY_DYNAMICS_WORLD;
- }
-
- virtual void predictUnconstraintMotion(btScalar timeStep);
-
- virtual void addSoftBody(btSoftBody* body, int collisionFilterGroup = btBroadphaseProxy::DefaultFilter, int collisionFilterMask = btBroadphaseProxy::AllFilter);
-
- btSoftBodyArray& getSoftBodyArray()
- {
- return m_softBodies;
- }
-
- const btSoftBodyArray& getSoftBodyArray() const
- {
- return m_softBodies;
- }
-
- btSoftBodyWorldInfo& getWorldInfo()
- {
- return m_sbi;
- }
-
- const btSoftBodyWorldInfo& getWorldInfo() const
- {
- return m_sbi;
- }
-
- void reinitialize(btScalar timeStep);
-
- void applyRigidBodyGravity(btScalar timeStep);
-
- void beforeSolverCallbacks(btScalar timeStep);
-
- void afterSolverCallbacks(btScalar timeStep);
-
- void addForce(btSoftBody* psb, btDeformableLagrangianForce* force);
-
- void removeForce(btSoftBody* psb, btDeformableLagrangianForce* force);
-
- void removeSoftBody(btSoftBody* body);
-
- void removeCollisionObject(btCollisionObject* collisionObject);
-
- int getDrawFlags() const { return (m_drawFlags); }
- void setDrawFlags(int f) { m_drawFlags = f; }
-
- void setupConstraints();
-
- void performDeformableCollisionDetection();
-
- void solveMultiBodyConstraints();
-
- void solveContactConstraints();
-
- void sortConstraints();
-
- void softBodySelfCollision();
-
- void setImplicit(bool implicit)
- {
- m_implicit = implicit;
- }
-
- void setLineSearch(bool lineSearch)
- {
- m_lineSearch = lineSearch;
- }
-
- void applyRepulsionForce(btScalar timeStep);
-
- void performGeometricCollisions(btScalar timeStep);
-
- struct btDeformableSingleRayCallback : public btBroadphaseRayCallback
- {
- btVector3 m_rayFromWorld;
- btVector3 m_rayToWorld;
- btTransform m_rayFromTrans;
- btTransform m_rayToTrans;
- btVector3 m_hitNormal;
-
- const btDeformableMultiBodyDynamicsWorld* m_world;
- btCollisionWorld::RayResultCallback& m_resultCallback;
-
- btDeformableSingleRayCallback(const btVector3& rayFromWorld, const btVector3& rayToWorld, const btDeformableMultiBodyDynamicsWorld* world, btCollisionWorld::RayResultCallback& resultCallback)
- : m_rayFromWorld(rayFromWorld),
- m_rayToWorld(rayToWorld),
- m_world(world),
- m_resultCallback(resultCallback)
- {
- m_rayFromTrans.setIdentity();
- m_rayFromTrans.setOrigin(m_rayFromWorld);
- m_rayToTrans.setIdentity();
- m_rayToTrans.setOrigin(m_rayToWorld);
-
- btVector3 rayDir = (rayToWorld - rayFromWorld);
-
- rayDir.normalize();
- ///what about division by zero? --> just set rayDirection[i] to INF/1e30
- m_rayDirectionInverse[0] = rayDir[0] == btScalar(0.0) ? btScalar(1e30) : btScalar(1.0) / rayDir[0];
- m_rayDirectionInverse[1] = rayDir[1] == btScalar(0.0) ? btScalar(1e30) : btScalar(1.0) / rayDir[1];
- m_rayDirectionInverse[2] = rayDir[2] == btScalar(0.0) ? btScalar(1e30) : btScalar(1.0) / rayDir[2];
- m_signs[0] = m_rayDirectionInverse[0] < 0.0;
- m_signs[1] = m_rayDirectionInverse[1] < 0.0;
- m_signs[2] = m_rayDirectionInverse[2] < 0.0;
-
- m_lambda_max = rayDir.dot(m_rayToWorld - m_rayFromWorld);
- }
-
- virtual bool process(const btBroadphaseProxy* proxy)
- {
- ///terminate further ray tests, once the closestHitFraction reached zero
- if (m_resultCallback.m_closestHitFraction == btScalar(0.f))
- return false;
-
- btCollisionObject* collisionObject = (btCollisionObject*)proxy->m_clientObject;
-
- //only perform raycast if filterMask matches
- if (m_resultCallback.needsCollision(collisionObject->getBroadphaseHandle()))
- {
- //RigidcollisionObject* collisionObject = ctrl->GetRigidcollisionObject();
- //btVector3 collisionObjectAabbMin,collisionObjectAabbMax;
+ void setSolverCallback(btSolverCallback cb)
+ {
+ m_solverCallback = cb;
+ }
+
+ virtual ~btDeformableMultiBodyDynamicsWorld();
+
+ virtual btMultiBodyDynamicsWorld* getMultiBodyDynamicsWorld()
+ {
+ return (btMultiBodyDynamicsWorld*)(this);
+ }
+
+ virtual const btMultiBodyDynamicsWorld* getMultiBodyDynamicsWorld() const
+ {
+ return (const btMultiBodyDynamicsWorld*)(this);
+ }
+
+ virtual btDynamicsWorldType getWorldType() const
+ {
+ return BT_DEFORMABLE_MULTIBODY_DYNAMICS_WORLD;
+ }
+
+ virtual void predictUnconstraintMotion(btScalar timeStep);
+
+ virtual void addSoftBody(btSoftBody* body, int collisionFilterGroup = btBroadphaseProxy::DefaultFilter, int collisionFilterMask = btBroadphaseProxy::AllFilter);
+
+ btSoftBodyArray& getSoftBodyArray()
+ {
+ return m_softBodies;
+ }
+
+ const btSoftBodyArray& getSoftBodyArray() const
+ {
+ return m_softBodies;
+ }
+
+ btSoftBodyWorldInfo& getWorldInfo()
+ {
+ return m_sbi;
+ }
+
+ const btSoftBodyWorldInfo& getWorldInfo() const
+ {
+ return m_sbi;
+ }
+
+ void reinitialize(btScalar timeStep);
+
+ void applyRigidBodyGravity(btScalar timeStep);
+
+ void beforeSolverCallbacks(btScalar timeStep);
+
+ void afterSolverCallbacks(btScalar timeStep);
+
+ void addForce(btSoftBody* psb, btDeformableLagrangianForce* force);
+
+ void removeForce(btSoftBody* psb, btDeformableLagrangianForce* force);
+
+ void removeSoftBodyForce(btSoftBody* psb);
+
+ void removeSoftBody(btSoftBody* body);
+
+ void removeCollisionObject(btCollisionObject* collisionObject);
+
+ int getDrawFlags() const { return (m_drawFlags); }
+ void setDrawFlags(int f) { m_drawFlags = f; }
+
+ void setupConstraints();
+
+ void performDeformableCollisionDetection();
+
+ void solveMultiBodyConstraints();
+
+ void solveContactConstraints();
+
+ void sortConstraints();
+
+ void softBodySelfCollision();
+
+ void setImplicit(bool implicit)
+ {
+ m_implicit = implicit;
+ }
+
+ void setLineSearch(bool lineSearch)
+ {
+ m_lineSearch = lineSearch;
+ }
+
+ void setUseProjection(bool useProjection)
+ {
+ m_useProjection = useProjection;
+ }
+
+ void applyRepulsionForce(btScalar timeStep);
+
+ void performGeometricCollisions(btScalar timeStep);
+
+ struct btDeformableSingleRayCallback : public btBroadphaseRayCallback
+ {
+ btVector3 m_rayFromWorld;
+ btVector3 m_rayToWorld;
+ btTransform m_rayFromTrans;
+ btTransform m_rayToTrans;
+ btVector3 m_hitNormal;
+
+ const btDeformableMultiBodyDynamicsWorld* m_world;
+ btCollisionWorld::RayResultCallback& m_resultCallback;
+
+ btDeformableSingleRayCallback(const btVector3& rayFromWorld, const btVector3& rayToWorld, const btDeformableMultiBodyDynamicsWorld* world, btCollisionWorld::RayResultCallback& resultCallback)
+ : m_rayFromWorld(rayFromWorld),
+ m_rayToWorld(rayToWorld),
+ m_world(world),
+ m_resultCallback(resultCallback)
+ {
+ m_rayFromTrans.setIdentity();
+ m_rayFromTrans.setOrigin(m_rayFromWorld);
+ m_rayToTrans.setIdentity();
+ m_rayToTrans.setOrigin(m_rayToWorld);
+
+ btVector3 rayDir = (rayToWorld - rayFromWorld);
+
+ rayDir.normalize();
+ ///what about division by zero? --> just set rayDirection[i] to INF/1e30
+ m_rayDirectionInverse[0] = rayDir[0] == btScalar(0.0) ? btScalar(1e30) : btScalar(1.0) / rayDir[0];
+ m_rayDirectionInverse[1] = rayDir[1] == btScalar(0.0) ? btScalar(1e30) : btScalar(1.0) / rayDir[1];
+ m_rayDirectionInverse[2] = rayDir[2] == btScalar(0.0) ? btScalar(1e30) : btScalar(1.0) / rayDir[2];
+ m_signs[0] = m_rayDirectionInverse[0] < 0.0;
+ m_signs[1] = m_rayDirectionInverse[1] < 0.0;
+ m_signs[2] = m_rayDirectionInverse[2] < 0.0;
+
+ m_lambda_max = rayDir.dot(m_rayToWorld - m_rayFromWorld);
+ }
+
+ virtual bool process(const btBroadphaseProxy* proxy)
+ {
+ ///terminate further ray tests, once the closestHitFraction reached zero
+ if (m_resultCallback.m_closestHitFraction == btScalar(0.f))
+ return false;
+
+ btCollisionObject* collisionObject = (btCollisionObject*)proxy->m_clientObject;
+
+ //only perform raycast if filterMask matches
+ if (m_resultCallback.needsCollision(collisionObject->getBroadphaseHandle()))
+ {
+ //RigidcollisionObject* collisionObject = ctrl->GetRigidcollisionObject();
+ //btVector3 collisionObjectAabbMin,collisionObjectAabbMax;
#if 0
#ifdef RECALCULATE_AABB
btVector3 collisionObjectAabbMin,collisionObjectAabbMax;
@@ -225,87 +232,85 @@ public:
const btVector3& collisionObjectAabbMax = collisionObject->getBroadphaseHandle()->m_aabbMax;
#endif
#endif
- //btScalar hitLambda = m_resultCallback.m_closestHitFraction;
- //culling already done by broadphase
- //if (btRayAabb(m_rayFromWorld,m_rayToWorld,collisionObjectAabbMin,collisionObjectAabbMax,hitLambda,m_hitNormal))
- {
- m_world->rayTestSingle(m_rayFromTrans, m_rayToTrans,
- collisionObject,
- collisionObject->getCollisionShape(),
- collisionObject->getWorldTransform(),
- m_resultCallback);
- }
- }
- return true;
- }
- };
-
-
-
- void rayTest(const btVector3& rayFromWorld, const btVector3& rayToWorld, RayResultCallback& resultCallback) const
- {
- BT_PROFILE("rayTest");
- /// use the broadphase to accelerate the search for objects, based on their aabb
- /// and for each object with ray-aabb overlap, perform an exact ray test
- btDeformableSingleRayCallback rayCB(rayFromWorld, rayToWorld, this, resultCallback);
-
+ //btScalar hitLambda = m_resultCallback.m_closestHitFraction;
+ //culling already done by broadphase
+ //if (btRayAabb(m_rayFromWorld,m_rayToWorld,collisionObjectAabbMin,collisionObjectAabbMax,hitLambda,m_hitNormal))
+ {
+ m_world->rayTestSingle(m_rayFromTrans, m_rayToTrans,
+ collisionObject,
+ collisionObject->getCollisionShape(),
+ collisionObject->getWorldTransform(),
+ m_resultCallback);
+ }
+ }
+ return true;
+ }
+ };
+
+ void rayTest(const btVector3& rayFromWorld, const btVector3& rayToWorld, RayResultCallback& resultCallback) const
+ {
+ BT_PROFILE("rayTest");
+ /// use the broadphase to accelerate the search for objects, based on their aabb
+ /// and for each object with ray-aabb overlap, perform an exact ray test
+ btDeformableSingleRayCallback rayCB(rayFromWorld, rayToWorld, this, resultCallback);
+
#ifndef USE_BRUTEFORCE_RAYBROADPHASE
- m_broadphasePairCache->rayTest(rayFromWorld, rayToWorld, rayCB);
+ m_broadphasePairCache->rayTest(rayFromWorld, rayToWorld, rayCB);
#else
- for (int i = 0; i < this->getNumCollisionObjects(); i++)
- {
- rayCB.process(m_collisionObjects[i]->getBroadphaseHandle());
- }
+ for (int i = 0; i < this->getNumCollisionObjects(); i++)
+ {
+ rayCB.process(m_collisionObjects[i]->getBroadphaseHandle());
+ }
#endif //USE_BRUTEFORCE_RAYBROADPHASE
- }
-
- void rayTestSingle(const btTransform& rayFromTrans, const btTransform& rayToTrans,
- btCollisionObject* collisionObject,
- const btCollisionShape* collisionShape,
- const btTransform& colObjWorldTransform,
- RayResultCallback& resultCallback) const
- {
- if (collisionShape->isSoftBody())
- {
- btSoftBody* softBody = btSoftBody::upcast(collisionObject);
- if (softBody)
- {
- btSoftBody::sRayCast softResult;
- if (softBody->rayFaceTest(rayFromTrans.getOrigin(), rayToTrans.getOrigin(), softResult))
- {
- if (softResult.fraction <= resultCallback.m_closestHitFraction)
- {
- btCollisionWorld::LocalShapeInfo shapeInfo;
- shapeInfo.m_shapePart = 0;
- shapeInfo.m_triangleIndex = softResult.index;
- // get the normal
- btVector3 rayDir = rayToTrans.getOrigin() - rayFromTrans.getOrigin();
- btVector3 normal = -rayDir;
- normal.normalize();
- {
- normal = softBody->m_faces[softResult.index].m_normal;
- if (normal.dot(rayDir) > 0)
- {
- // normal always point toward origin of the ray
- normal = -normal;
- }
- }
-
- btCollisionWorld::LocalRayResult rayResult(collisionObject,
- &shapeInfo,
- normal,
- softResult.fraction);
- bool normalInWorldSpace = true;
- resultCallback.addSingleResult(rayResult, normalInWorldSpace);
- }
- }
- }
- }
- else
- {
- btCollisionWorld::rayTestSingle(rayFromTrans, rayToTrans, collisionObject, collisionShape, colObjWorldTransform, resultCallback);
- }
- }
+ }
+
+ void rayTestSingle(const btTransform& rayFromTrans, const btTransform& rayToTrans,
+ btCollisionObject* collisionObject,
+ const btCollisionShape* collisionShape,
+ const btTransform& colObjWorldTransform,
+ RayResultCallback& resultCallback) const
+ {
+ if (collisionShape->isSoftBody())
+ {
+ btSoftBody* softBody = btSoftBody::upcast(collisionObject);
+ if (softBody)
+ {
+ btSoftBody::sRayCast softResult;
+ if (softBody->rayFaceTest(rayFromTrans.getOrigin(), rayToTrans.getOrigin(), softResult))
+ {
+ if (softResult.fraction <= resultCallback.m_closestHitFraction)
+ {
+ btCollisionWorld::LocalShapeInfo shapeInfo;
+ shapeInfo.m_shapePart = 0;
+ shapeInfo.m_triangleIndex = softResult.index;
+ // get the normal
+ btVector3 rayDir = rayToTrans.getOrigin() - rayFromTrans.getOrigin();
+ btVector3 normal = -rayDir;
+ normal.normalize();
+ {
+ normal = softBody->m_faces[softResult.index].m_normal;
+ if (normal.dot(rayDir) > 0)
+ {
+ // normal always point toward origin of the ray
+ normal = -normal;
+ }
+ }
+
+ btCollisionWorld::LocalRayResult rayResult(collisionObject,
+ &shapeInfo,
+ normal,
+ softResult.fraction);
+ bool normalInWorldSpace = true;
+ resultCallback.addSingleResult(rayResult, normalInWorldSpace);
+ }
+ }
+ }
+ }
+ else
+ {
+ btCollisionWorld::rayTestSingle(rayFromTrans, rayToTrans, collisionObject, collisionShape, colObjWorldTransform, resultCallback);
+ }
+ }
};
#endif //BT_DEFORMABLE_MULTIBODY_DYNAMICS_WORLD_H
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableNeoHookeanForce.h b/thirdparty/bullet/BulletSoftBody/btDeformableNeoHookeanForce.h
index d89bc4aca4..60798c5bcd 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableNeoHookeanForce.h
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableNeoHookeanForce.h
@@ -23,30 +23,30 @@ subject to the following restrictions:
class btDeformableNeoHookeanForce : public btDeformableLagrangianForce
{
public:
- typedef btAlignedObjectArray<btVector3> TVStack;
- btScalar m_mu, m_lambda; // Lame Parameters
- btScalar m_E, m_nu; // Young's modulus and Poisson ratio
- btScalar m_mu_damp, m_lambda_damp;
- btDeformableNeoHookeanForce(): m_mu(1), m_lambda(1)
- {
- btScalar damping = 0.05;
- m_mu_damp = damping * m_mu;
- m_lambda_damp = damping * m_lambda;
+ typedef btAlignedObjectArray<btVector3> TVStack;
+ btScalar m_mu, m_lambda; // Lame Parameters
+ btScalar m_E, m_nu; // Young's modulus and Poisson ratio
+ btScalar m_mu_damp, m_lambda_damp;
+ btDeformableNeoHookeanForce() : m_mu(1), m_lambda(1)
+ {
+ btScalar damping = 0.05;
+ m_mu_damp = damping * m_mu;
+ m_lambda_damp = damping * m_lambda;
updateYoungsModulusAndPoissonRatio();
- }
-
- btDeformableNeoHookeanForce(btScalar mu, btScalar lambda, btScalar damping = 0.05): m_mu(mu), m_lambda(lambda)
- {
- m_mu_damp = damping * m_mu;
- m_lambda_damp = damping * m_lambda;
+ }
+
+ btDeformableNeoHookeanForce(btScalar mu, btScalar lambda, btScalar damping = 0.05) : m_mu(mu), m_lambda(lambda)
+ {
+ m_mu_damp = damping * m_mu;
+ m_lambda_damp = damping * m_lambda;
updateYoungsModulusAndPoissonRatio();
- }
+ }
void updateYoungsModulusAndPoissonRatio()
{
// conversion from Lame Parameters to Young's modulus and Poisson ratio
// https://en.wikipedia.org/wiki/Lam%C3%A9_parameters
- m_E = m_mu * (3*m_lambda + 2*m_mu)/(m_lambda + m_mu);
+ m_E = m_mu * (3 * m_lambda + 2 * m_mu) / (m_lambda + m_mu);
m_nu = m_lambda * 0.5 / (m_mu + m_lambda);
}
@@ -55,21 +55,21 @@ public:
// conversion from Young's modulus and Poisson ratio to Lame Parameters
// https://en.wikipedia.org/wiki/Lam%C3%A9_parameters
m_mu = m_E * 0.5 / (1 + m_nu);
- m_lambda = m_E * m_nu / ((1 + m_nu) * (1- 2*m_nu));
+ m_lambda = m_E * m_nu / ((1 + m_nu) * (1 - 2 * m_nu));
}
- void setYoungsModulus(btScalar E)
- {
+ void setYoungsModulus(btScalar E)
+ {
m_E = E;
updateLameParameters();
- }
+ }
void setPoissonRatio(btScalar nu)
{
m_nu = nu;
updateLameParameters();
}
-
+
void setDamping(btScalar damping)
{
m_mu_damp = damping * m_mu;
@@ -83,339 +83,338 @@ public:
updateYoungsModulusAndPoissonRatio();
}
- virtual void addScaledForces(btScalar scale, TVStack& force)
- {
- addScaledDampingForce(scale, force);
- addScaledElasticForce(scale, force);
- }
-
- virtual void addScaledExplicitForce(btScalar scale, TVStack& force)
- {
- addScaledElasticForce(scale, force);
- }
-
- // The damping matrix is calculated using the time n state as described in https://www.math.ucla.edu/~jteran/papers/GSSJT15.pdf to allow line search
- virtual void addScaledDampingForce(btScalar scale, TVStack& force)
- {
- if (m_mu_damp == 0 && m_lambda_damp == 0)
- return;
- int numNodes = getNumNodes();
- btAssert(numNodes <= force.size());
- btVector3 grad_N_hat_1st_col = btVector3(-1,-1,-1);
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- continue;
- }
- for (int j = 0; j < psb->m_tetras.size(); ++j)
- {
- btSoftBody::Tetra& tetra = psb->m_tetras[j];
- btSoftBody::Node* node0 = tetra.m_n[0];
- btSoftBody::Node* node1 = tetra.m_n[1];
- btSoftBody::Node* node2 = tetra.m_n[2];
- btSoftBody::Node* node3 = tetra.m_n[3];
- size_t id0 = node0->index;
- size_t id1 = node1->index;
- size_t id2 = node2->index;
- size_t id3 = node3->index;
- btMatrix3x3 dF = DsFromVelocity(node0, node1, node2, node3) * tetra.m_Dm_inverse;
- btMatrix3x3 I;
- I.setIdentity();
- btMatrix3x3 dP = (dF + dF.transpose()) * m_mu_damp + I * (dF[0][0]+dF[1][1]+dF[2][2]) * m_lambda_damp;
-// firstPiolaDampingDifferential(psb->m_tetraScratchesTn[j], dF, dP);
- btVector3 df_on_node0 = dP * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col);
- btMatrix3x3 df_on_node123 = dP * tetra.m_Dm_inverse.transpose();
+ virtual void addScaledForces(btScalar scale, TVStack& force)
+ {
+ addScaledDampingForce(scale, force);
+ addScaledElasticForce(scale, force);
+ }
+
+ virtual void addScaledExplicitForce(btScalar scale, TVStack& force)
+ {
+ addScaledElasticForce(scale, force);
+ }
+
+ // The damping matrix is calculated using the time n state as described in https://www.math.ucla.edu/~jteran/papers/GSSJT15.pdf to allow line search
+ virtual void addScaledDampingForce(btScalar scale, TVStack& force)
+ {
+ if (m_mu_damp == 0 && m_lambda_damp == 0)
+ return;
+ int numNodes = getNumNodes();
+ btAssert(numNodes <= force.size());
+ btVector3 grad_N_hat_1st_col = btVector3(-1, -1, -1);
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ for (int j = 0; j < psb->m_tetras.size(); ++j)
+ {
+ btSoftBody::Tetra& tetra = psb->m_tetras[j];
+ btSoftBody::Node* node0 = tetra.m_n[0];
+ btSoftBody::Node* node1 = tetra.m_n[1];
+ btSoftBody::Node* node2 = tetra.m_n[2];
+ btSoftBody::Node* node3 = tetra.m_n[3];
+ size_t id0 = node0->index;
+ size_t id1 = node1->index;
+ size_t id2 = node2->index;
+ size_t id3 = node3->index;
+ btMatrix3x3 dF = DsFromVelocity(node0, node1, node2, node3) * tetra.m_Dm_inverse;
+ btMatrix3x3 I;
+ I.setIdentity();
+ btMatrix3x3 dP = (dF + dF.transpose()) * m_mu_damp + I * (dF[0][0] + dF[1][1] + dF[2][2]) * m_lambda_damp;
+ // firstPiolaDampingDifferential(psb->m_tetraScratchesTn[j], dF, dP);
+ btVector3 df_on_node0 = dP * (tetra.m_Dm_inverse.transpose() * grad_N_hat_1st_col);
+ btMatrix3x3 df_on_node123 = dP * tetra.m_Dm_inverse.transpose();
+
+ // damping force differential
+ btScalar scale1 = scale * tetra.m_element_measure;
+ force[id0] -= scale1 * df_on_node0;
+ force[id1] -= scale1 * df_on_node123.getColumn(0);
+ force[id2] -= scale1 * df_on_node123.getColumn(1);
+ force[id3] -= scale1 * df_on_node123.getColumn(2);
+ }
+ }
+ }
+
+ virtual double totalElasticEnergy(btScalar dt)
+ {
+ double energy = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ for (int j = 0; j < psb->m_tetraScratches.size(); ++j)
+ {
+ btSoftBody::Tetra& tetra = psb->m_tetras[j];
+ btSoftBody::TetraScratch& s = psb->m_tetraScratches[j];
+ energy += tetra.m_element_measure * elasticEnergyDensity(s);
+ }
+ }
+ return energy;
+ }
+
+ // The damping energy is formulated as in https://www.math.ucla.edu/~jteran/papers/GSSJT15.pdf to allow line search
+ virtual double totalDampingEnergy(btScalar dt)
+ {
+ double energy = 0;
+ int sz = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ sz = btMax(sz, psb->m_nodes[j].index);
+ }
+ }
+ TVStack dampingForce;
+ dampingForce.resize(sz + 1);
+ for (int i = 0; i < dampingForce.size(); ++i)
+ dampingForce[i].setZero();
+ addScaledDampingForce(0.5, dampingForce);
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ const btSoftBody::Node& node = psb->m_nodes[j];
+ energy -= dampingForce[node.index].dot(node.m_v) / dt;
+ }
+ }
+ return energy;
+ }
+
+ double elasticEnergyDensity(const btSoftBody::TetraScratch& s)
+ {
+ double density = 0;
+ density += m_mu * 0.5 * (s.m_trace - 3.);
+ density += m_lambda * 0.5 * (s.m_J - 1. - 0.75 * m_mu / m_lambda) * (s.m_J - 1. - 0.75 * m_mu / m_lambda);
+ density -= m_mu * 0.5 * log(s.m_trace + 1);
+ return density;
+ }
- // damping force differential
- btScalar scale1 = scale * tetra.m_element_measure;
- force[id0] -= scale1 * df_on_node0;
- force[id1] -= scale1 * df_on_node123.getColumn(0);
- force[id2] -= scale1 * df_on_node123.getColumn(1);
- force[id3] -= scale1 * df_on_node123.getColumn(2);
- }
- }
- }
-
- virtual double totalElasticEnergy(btScalar dt)
- {
- double energy = 0;
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- continue;
- }
- for (int j = 0; j < psb->m_tetraScratches.size(); ++j)
- {
- btSoftBody::Tetra& tetra = psb->m_tetras[j];
- btSoftBody::TetraScratch& s = psb->m_tetraScratches[j];
- energy += tetra.m_element_measure * elasticEnergyDensity(s);
- }
- }
- return energy;
- }
-
- // The damping energy is formulated as in https://www.math.ucla.edu/~jteran/papers/GSSJT15.pdf to allow line search
- virtual double totalDampingEnergy(btScalar dt)
- {
- double energy = 0;
- int sz = 0;
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- continue;
- }
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- sz = btMax(sz, psb->m_nodes[j].index);
- }
- }
- TVStack dampingForce;
- dampingForce.resize(sz+1);
- for (int i = 0; i < dampingForce.size(); ++i)
- dampingForce[i].setZero();
- addScaledDampingForce(0.5, dampingForce);
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- const btSoftBody::Node& node = psb->m_nodes[j];
- energy -= dampingForce[node.index].dot(node.m_v) / dt;
- }
- }
- return energy;
- }
-
- double elasticEnergyDensity(const btSoftBody::TetraScratch& s)
- {
- double density = 0;
- density += m_mu * 0.5 * (s.m_trace - 3.);
- density += m_lambda * 0.5 * (s.m_J - 1. - 0.75 * m_mu / m_lambda)* (s.m_J - 1. - 0.75 * m_mu / m_lambda);
- density -= m_mu * 0.5 * log(s.m_trace+1);
- return density;
- }
-
- virtual void addScaledElasticForce(btScalar scale, TVStack& force)
- {
- int numNodes = getNumNodes();
- btAssert(numNodes <= force.size());
- btVector3 grad_N_hat_1st_col = btVector3(-1,-1,-1);
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- continue;
- }
- btScalar max_p = psb->m_cfg.m_maxStress;
- for (int j = 0; j < psb->m_tetras.size(); ++j)
- {
- btSoftBody::Tetra& tetra = psb->m_tetras[j];
- btMatrix3x3 P;
- firstPiola(psb->m_tetraScratches[j],P);
+ virtual void addScaledElasticForce(btScalar scale, TVStack& force)
+ {
+ int numNodes = getNumNodes();
+ btAssert(numNodes <= force.size());
+ btVector3 grad_N_hat_1st_col = btVector3(-1, -1, -1);
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ btScalar max_p = psb->m_cfg.m_maxStress;
+ for (int j = 0; j < psb->m_tetras.size(); ++j)
+ {
+ btSoftBody::Tetra& tetra = psb->m_tetras[j];
+ btMatrix3x3 P;
+ firstPiola(psb->m_tetraScratches[j], P);
#ifdef USE_SVD
- if (max_p > 0)
- {
- // since we want to clamp the principal stress to max_p, we only need to
- // calculate SVD when sigma_0^2 + sigma_1^2 + sigma_2^2 > max_p * max_p
- btScalar trPTP = (P[0].length2() + P[1].length2() + P[2].length2());
- if (trPTP > max_p * max_p)
- {
- btMatrix3x3 U, V;
- btVector3 sigma;
- singularValueDecomposition(P, U, sigma, V);
- sigma[0] = btMin(sigma[0], max_p);
- sigma[1] = btMin(sigma[1], max_p);
- sigma[2] = btMin(sigma[2], max_p);
- sigma[0] = btMax(sigma[0], -max_p);
- sigma[1] = btMax(sigma[1], -max_p);
- sigma[2] = btMax(sigma[2], -max_p);
- btMatrix3x3 Sigma;
- Sigma.setIdentity();
- Sigma[0][0] = sigma[0];
- Sigma[1][1] = sigma[1];
- Sigma[2][2] = sigma[2];
- P = U * Sigma * V.transpose();
- }
- }
+ if (max_p > 0)
+ {
+ // since we want to clamp the principal stress to max_p, we only need to
+ // calculate SVD when sigma_0^2 + sigma_1^2 + sigma_2^2 > max_p * max_p
+ btScalar trPTP = (P[0].length2() + P[1].length2() + P[2].length2());
+ if (trPTP > max_p * max_p)
+ {
+ btMatrix3x3 U, V;
+ btVector3 sigma;
+ singularValueDecomposition(P, U, sigma, V);
+ sigma[0] = btMin(sigma[0], max_p);
+ sigma[1] = btMin(sigma[1], max_p);
+ sigma[2] = btMin(sigma[2], max_p);
+ sigma[0] = btMax(sigma[0], -max_p);
+ sigma[1] = btMax(sigma[1], -max_p);
+ sigma[2] = btMax(sigma[2], -max_p);
+ btMatrix3x3 Sigma;
+ Sigma.setIdentity();
+ Sigma[0][0] = sigma[0];
+ Sigma[1][1] = sigma[1];
+ Sigma[2][2] = sigma[2];
+ P = U * Sigma * V.transpose();
+ }
+ }
#endif
-// btVector3 force_on_node0 = P * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col);
- btMatrix3x3 force_on_node123 = P * tetra.m_Dm_inverse.transpose();
- btVector3 force_on_node0 = force_on_node123 * grad_N_hat_1st_col;
-
- btSoftBody::Node* node0 = tetra.m_n[0];
- btSoftBody::Node* node1 = tetra.m_n[1];
- btSoftBody::Node* node2 = tetra.m_n[2];
- btSoftBody::Node* node3 = tetra.m_n[3];
- size_t id0 = node0->index;
- size_t id1 = node1->index;
- size_t id2 = node2->index;
- size_t id3 = node3->index;
-
- // elastic force
- btScalar scale1 = scale * tetra.m_element_measure;
- force[id0] -= scale1 * force_on_node0;
- force[id1] -= scale1 * force_on_node123.getColumn(0);
- force[id2] -= scale1 * force_on_node123.getColumn(1);
- force[id3] -= scale1 * force_on_node123.getColumn(2);
- }
- }
- }
-
- // The damping matrix is calculated using the time n state as described in https://www.math.ucla.edu/~jteran/papers/GSSJT15.pdf to allow line search
- virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df)
- {
- if (m_mu_damp == 0 && m_lambda_damp == 0)
- return;
- int numNodes = getNumNodes();
- btAssert(numNodes <= df.size());
- btVector3 grad_N_hat_1st_col = btVector3(-1,-1,-1);
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- continue;
- }
- for (int j = 0; j < psb->m_tetras.size(); ++j)
- {
- btSoftBody::Tetra& tetra = psb->m_tetras[j];
- btSoftBody::Node* node0 = tetra.m_n[0];
- btSoftBody::Node* node1 = tetra.m_n[1];
- btSoftBody::Node* node2 = tetra.m_n[2];
- btSoftBody::Node* node3 = tetra.m_n[3];
- size_t id0 = node0->index;
- size_t id1 = node1->index;
- size_t id2 = node2->index;
- size_t id3 = node3->index;
- btMatrix3x3 dF = Ds(id0, id1, id2, id3, dv) * tetra.m_Dm_inverse;
- btMatrix3x3 I;
- I.setIdentity();
- btMatrix3x3 dP = (dF + dF.transpose()) * m_mu_damp + I * (dF[0][0]+dF[1][1]+dF[2][2]) * m_lambda_damp;
-// firstPiolaDampingDifferential(psb->m_tetraScratchesTn[j], dF, dP);
-// btVector3 df_on_node0 = dP * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col);
- btMatrix3x3 df_on_node123 = dP * tetra.m_Dm_inverse.transpose();
- btVector3 df_on_node0 = df_on_node123 * grad_N_hat_1st_col;
+ // btVector3 force_on_node0 = P * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col);
+ btMatrix3x3 force_on_node123 = P * tetra.m_Dm_inverse.transpose();
+ btVector3 force_on_node0 = force_on_node123 * grad_N_hat_1st_col;
+
+ btSoftBody::Node* node0 = tetra.m_n[0];
+ btSoftBody::Node* node1 = tetra.m_n[1];
+ btSoftBody::Node* node2 = tetra.m_n[2];
+ btSoftBody::Node* node3 = tetra.m_n[3];
+ size_t id0 = node0->index;
+ size_t id1 = node1->index;
+ size_t id2 = node2->index;
+ size_t id3 = node3->index;
+
+ // elastic force
+ btScalar scale1 = scale * tetra.m_element_measure;
+ force[id0] -= scale1 * force_on_node0;
+ force[id1] -= scale1 * force_on_node123.getColumn(0);
+ force[id2] -= scale1 * force_on_node123.getColumn(1);
+ force[id3] -= scale1 * force_on_node123.getColumn(2);
+ }
+ }
+ }
+
+ // The damping matrix is calculated using the time n state as described in https://www.math.ucla.edu/~jteran/papers/GSSJT15.pdf to allow line search
+ virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df)
+ {
+ if (m_mu_damp == 0 && m_lambda_damp == 0)
+ return;
+ int numNodes = getNumNodes();
+ btAssert(numNodes <= df.size());
+ btVector3 grad_N_hat_1st_col = btVector3(-1, -1, -1);
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ for (int j = 0; j < psb->m_tetras.size(); ++j)
+ {
+ btSoftBody::Tetra& tetra = psb->m_tetras[j];
+ btSoftBody::Node* node0 = tetra.m_n[0];
+ btSoftBody::Node* node1 = tetra.m_n[1];
+ btSoftBody::Node* node2 = tetra.m_n[2];
+ btSoftBody::Node* node3 = tetra.m_n[3];
+ size_t id0 = node0->index;
+ size_t id1 = node1->index;
+ size_t id2 = node2->index;
+ size_t id3 = node3->index;
+ btMatrix3x3 dF = Ds(id0, id1, id2, id3, dv) * tetra.m_Dm_inverse;
+ btMatrix3x3 I;
+ I.setIdentity();
+ btMatrix3x3 dP = (dF + dF.transpose()) * m_mu_damp + I * (dF[0][0] + dF[1][1] + dF[2][2]) * m_lambda_damp;
+ // firstPiolaDampingDifferential(psb->m_tetraScratchesTn[j], dF, dP);
+ // btVector3 df_on_node0 = dP * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col);
+ btMatrix3x3 df_on_node123 = dP * tetra.m_Dm_inverse.transpose();
+ btVector3 df_on_node0 = df_on_node123 * grad_N_hat_1st_col;
+
+ // damping force differential
+ btScalar scale1 = scale * tetra.m_element_measure;
+ df[id0] -= scale1 * df_on_node0;
+ df[id1] -= scale1 * df_on_node123.getColumn(0);
+ df[id2] -= scale1 * df_on_node123.getColumn(1);
+ df[id3] -= scale1 * df_on_node123.getColumn(2);
+ }
+ }
+ }
+
+ virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA) {}
+
+ virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df)
+ {
+ int numNodes = getNumNodes();
+ btAssert(numNodes <= df.size());
+ btVector3 grad_N_hat_1st_col = btVector3(-1, -1, -1);
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ for (int j = 0; j < psb->m_tetras.size(); ++j)
+ {
+ btSoftBody::Tetra& tetra = psb->m_tetras[j];
+ btSoftBody::Node* node0 = tetra.m_n[0];
+ btSoftBody::Node* node1 = tetra.m_n[1];
+ btSoftBody::Node* node2 = tetra.m_n[2];
+ btSoftBody::Node* node3 = tetra.m_n[3];
+ size_t id0 = node0->index;
+ size_t id1 = node1->index;
+ size_t id2 = node2->index;
+ size_t id3 = node3->index;
+ btMatrix3x3 dF = Ds(id0, id1, id2, id3, dx) * tetra.m_Dm_inverse;
+ btMatrix3x3 dP;
+ firstPiolaDifferential(psb->m_tetraScratches[j], dF, dP);
+ // btVector3 df_on_node0 = dP * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col);
+ btMatrix3x3 df_on_node123 = dP * tetra.m_Dm_inverse.transpose();
+ btVector3 df_on_node0 = df_on_node123 * grad_N_hat_1st_col;
+
+ // elastic force differential
+ btScalar scale1 = scale * tetra.m_element_measure;
+ df[id0] -= scale1 * df_on_node0;
+ df[id1] -= scale1 * df_on_node123.getColumn(0);
+ df[id2] -= scale1 * df_on_node123.getColumn(1);
+ df[id3] -= scale1 * df_on_node123.getColumn(2);
+ }
+ }
+ }
+
+ void firstPiola(const btSoftBody::TetraScratch& s, btMatrix3x3& P)
+ {
+ btScalar c1 = (m_mu * (1. - 1. / (s.m_trace + 1.)));
+ btScalar c2 = (m_lambda * (s.m_J - 1.) - 0.75 * m_mu);
+ P = s.m_F * c1 + s.m_cofF * c2;
+ }
+
+ // Let P be the first piola stress.
+ // This function calculates the dP = dP/dF * dF
+ void firstPiolaDifferential(const btSoftBody::TetraScratch& s, const btMatrix3x3& dF, btMatrix3x3& dP)
+ {
+ btScalar c1 = m_mu * (1. - 1. / (s.m_trace + 1.));
+ btScalar c2 = (2. * m_mu) * DotProduct(s.m_F, dF) * (1. / ((1. + s.m_trace) * (1. + s.m_trace)));
+ btScalar c3 = (m_lambda * DotProduct(s.m_cofF, dF));
+ dP = dF * c1 + s.m_F * c2;
+ addScaledCofactorMatrixDifferential(s.m_F, dF, m_lambda * (s.m_J - 1.) - 0.75 * m_mu, dP);
+ dP += s.m_cofF * c3;
+ }
- // damping force differential
- btScalar scale1 = scale * tetra.m_element_measure;
- df[id0] -= scale1 * df_on_node0;
- df[id1] -= scale1 * df_on_node123.getColumn(0);
- df[id2] -= scale1 * df_on_node123.getColumn(1);
- df[id3] -= scale1 * df_on_node123.getColumn(2);
- }
- }
- }
-
- virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA){}
-
- virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df)
- {
- int numNodes = getNumNodes();
- btAssert(numNodes <= df.size());
- btVector3 grad_N_hat_1st_col = btVector3(-1,-1,-1);
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- continue;
- }
- for (int j = 0; j < psb->m_tetras.size(); ++j)
- {
- btSoftBody::Tetra& tetra = psb->m_tetras[j];
- btSoftBody::Node* node0 = tetra.m_n[0];
- btSoftBody::Node* node1 = tetra.m_n[1];
- btSoftBody::Node* node2 = tetra.m_n[2];
- btSoftBody::Node* node3 = tetra.m_n[3];
- size_t id0 = node0->index;
- size_t id1 = node1->index;
- size_t id2 = node2->index;
- size_t id3 = node3->index;
- btMatrix3x3 dF = Ds(id0, id1, id2, id3, dx) * tetra.m_Dm_inverse;
- btMatrix3x3 dP;
- firstPiolaDifferential(psb->m_tetraScratches[j], dF, dP);
-// btVector3 df_on_node0 = dP * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col);
- btMatrix3x3 df_on_node123 = dP * tetra.m_Dm_inverse.transpose();
- btVector3 df_on_node0 = df_on_node123 * grad_N_hat_1st_col;
-
- // elastic force differential
- btScalar scale1 = scale * tetra.m_element_measure;
- df[id0] -= scale1 * df_on_node0;
- df[id1] -= scale1 * df_on_node123.getColumn(0);
- df[id2] -= scale1 * df_on_node123.getColumn(1);
- df[id3] -= scale1 * df_on_node123.getColumn(2);
- }
- }
- }
-
- void firstPiola(const btSoftBody::TetraScratch& s, btMatrix3x3& P)
- {
- btScalar c1 = (m_mu * ( 1. - 1. / (s.m_trace + 1.)));
- btScalar c2 = (m_lambda * (s.m_J - 1.) - 0.75 * m_mu);
- P = s.m_F * c1 + s.m_cofF * c2;
- }
-
- // Let P be the first piola stress.
- // This function calculates the dP = dP/dF * dF
- void firstPiolaDifferential(const btSoftBody::TetraScratch& s, const btMatrix3x3& dF, btMatrix3x3& dP)
- {
- btScalar c1 = m_mu * ( 1. - 1. / (s.m_trace + 1.));
- btScalar c2 = (2.*m_mu) * DotProduct(s.m_F, dF) * (1./((1.+s.m_trace)*(1.+s.m_trace)));
- btScalar c3 = (m_lambda * DotProduct(s.m_cofF, dF));
- dP = dF * c1 + s.m_F * c2;
- addScaledCofactorMatrixDifferential(s.m_F, dF, m_lambda*(s.m_J-1.) - 0.75*m_mu, dP);
- dP += s.m_cofF * c3;
- }
-
- // Let Q be the damping stress.
- // This function calculates the dP = dQ/dF * dF
- void firstPiolaDampingDifferential(const btSoftBody::TetraScratch& s, const btMatrix3x3& dF, btMatrix3x3& dP)
- {
- btScalar c1 = (m_mu_damp * ( 1. - 1. / (s.m_trace + 1.)));
- btScalar c2 = ((2.*m_mu_damp) * DotProduct(s.m_F, dF) *(1./((1.+s.m_trace)*(1.+s.m_trace))));
- btScalar c3 = (m_lambda_damp * DotProduct(s.m_cofF, dF));
- dP = dF * c1 + s.m_F * c2;
- addScaledCofactorMatrixDifferential(s.m_F, dF, m_lambda_damp*(s.m_J-1.) - 0.75*m_mu_damp, dP);
- dP += s.m_cofF * c3;
- }
-
- btScalar DotProduct(const btMatrix3x3& A, const btMatrix3x3& B)
- {
- btScalar ans = 0;
- for (int i = 0; i < 3; ++i)
- {
- ans += A[i].dot(B[i]);
- }
- return ans;
- }
-
- // Let C(A) be the cofactor of the matrix A
- // Let H = the derivative of C(A) with respect to A evaluated at F = A
- // This function calculates H*dF
- void addScaledCofactorMatrixDifferential(const btMatrix3x3& F, const btMatrix3x3& dF, btScalar scale, btMatrix3x3& M)
- {
- M[0][0] += scale * (dF[1][1] * F[2][2] + F[1][1] * dF[2][2] - dF[2][1] * F[1][2] - F[2][1] * dF[1][2]);
- M[1][0] += scale * (dF[2][1] * F[0][2] + F[2][1] * dF[0][2] - dF[0][1] * F[2][2] - F[0][1] * dF[2][2]);
- M[2][0] += scale * (dF[0][1] * F[1][2] + F[0][1] * dF[1][2] - dF[1][1] * F[0][2] - F[1][1] * dF[0][2]);
- M[0][1] += scale * (dF[2][0] * F[1][2] + F[2][0] * dF[1][2] - dF[1][0] * F[2][2] - F[1][0] * dF[2][2]);
- M[1][1] += scale * (dF[0][0] * F[2][2] + F[0][0] * dF[2][2] - dF[2][0] * F[0][2] - F[2][0] * dF[0][2]);
- M[2][1] += scale * (dF[1][0] * F[0][2] + F[1][0] * dF[0][2] - dF[0][0] * F[1][2] - F[0][0] * dF[1][2]);
- M[0][2] += scale * (dF[1][0] * F[2][1] + F[1][0] * dF[2][1] - dF[2][0] * F[1][1] - F[2][0] * dF[1][1]);
- M[1][2] += scale * (dF[2][0] * F[0][1] + F[2][0] * dF[0][1] - dF[0][0] * F[2][1] - F[0][0] * dF[2][1]);
- M[2][2] += scale * (dF[0][0] * F[1][1] + F[0][0] * dF[1][1] - dF[1][0] * F[0][1] - F[1][0] * dF[0][1]);
- }
-
- virtual btDeformableLagrangianForceType getForceType()
- {
- return BT_NEOHOOKEAN_FORCE;
- }
-
+ // Let Q be the damping stress.
+ // This function calculates the dP = dQ/dF * dF
+ void firstPiolaDampingDifferential(const btSoftBody::TetraScratch& s, const btMatrix3x3& dF, btMatrix3x3& dP)
+ {
+ btScalar c1 = (m_mu_damp * (1. - 1. / (s.m_trace + 1.)));
+ btScalar c2 = ((2. * m_mu_damp) * DotProduct(s.m_F, dF) * (1. / ((1. + s.m_trace) * (1. + s.m_trace))));
+ btScalar c3 = (m_lambda_damp * DotProduct(s.m_cofF, dF));
+ dP = dF * c1 + s.m_F * c2;
+ addScaledCofactorMatrixDifferential(s.m_F, dF, m_lambda_damp * (s.m_J - 1.) - 0.75 * m_mu_damp, dP);
+ dP += s.m_cofF * c3;
+ }
+
+ btScalar DotProduct(const btMatrix3x3& A, const btMatrix3x3& B)
+ {
+ btScalar ans = 0;
+ for (int i = 0; i < 3; ++i)
+ {
+ ans += A[i].dot(B[i]);
+ }
+ return ans;
+ }
+
+ // Let C(A) be the cofactor of the matrix A
+ // Let H = the derivative of C(A) with respect to A evaluated at F = A
+ // This function calculates H*dF
+ void addScaledCofactorMatrixDifferential(const btMatrix3x3& F, const btMatrix3x3& dF, btScalar scale, btMatrix3x3& M)
+ {
+ M[0][0] += scale * (dF[1][1] * F[2][2] + F[1][1] * dF[2][2] - dF[2][1] * F[1][2] - F[2][1] * dF[1][2]);
+ M[1][0] += scale * (dF[2][1] * F[0][2] + F[2][1] * dF[0][2] - dF[0][1] * F[2][2] - F[0][1] * dF[2][2]);
+ M[2][0] += scale * (dF[0][1] * F[1][2] + F[0][1] * dF[1][2] - dF[1][1] * F[0][2] - F[1][1] * dF[0][2]);
+ M[0][1] += scale * (dF[2][0] * F[1][2] + F[2][0] * dF[1][2] - dF[1][0] * F[2][2] - F[1][0] * dF[2][2]);
+ M[1][1] += scale * (dF[0][0] * F[2][2] + F[0][0] * dF[2][2] - dF[2][0] * F[0][2] - F[2][0] * dF[0][2]);
+ M[2][1] += scale * (dF[1][0] * F[0][2] + F[1][0] * dF[0][2] - dF[0][0] * F[1][2] - F[0][0] * dF[1][2]);
+ M[0][2] += scale * (dF[1][0] * F[2][1] + F[1][0] * dF[2][1] - dF[2][0] * F[1][1] - F[2][0] * dF[1][1]);
+ M[1][2] += scale * (dF[2][0] * F[0][1] + F[2][0] * dF[0][1] - dF[0][0] * F[2][1] - F[0][0] * dF[2][1]);
+ M[2][2] += scale * (dF[0][0] * F[1][1] + F[0][0] * dF[1][1] - dF[1][0] * F[0][1] - F[1][0] * dF[0][1]);
+ }
+
+ virtual btDeformableLagrangianForceType getForceType()
+ {
+ return BT_NEOHOOKEAN_FORCE;
+ }
};
#endif /* BT_NEOHOOKEAN_H */
diff --git a/thirdparty/bullet/BulletSoftBody/btKrylovSolver.h b/thirdparty/bullet/BulletSoftBody/btKrylovSolver.h
new file mode 100644
index 0000000000..59126b47ae
--- /dev/null
+++ b/thirdparty/bullet/BulletSoftBody/btKrylovSolver.h
@@ -0,0 +1,107 @@
+/*
+ Written by Xuchen Han <xuchenhan2015@u.northwestern.edu>
+
+ Bullet Continuous Collision Detection and Physics Library
+ Copyright (c) 2019 Google Inc. http://bulletphysics.org
+ This software is provided 'as-is', without any express or implied warranty.
+ In no event will the authors be held liable for any damages arising from the use of this software.
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it freely,
+ subject to the following restrictions:
+ 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+ */
+
+#ifndef BT_KRYLOV_SOLVER_H
+#define BT_KRYLOV_SOLVER_H
+#include <iostream>
+#include <cmath>
+#include <limits>
+#include <LinearMath/btAlignedObjectArray.h>
+#include <LinearMath/btVector3.h>
+#include <LinearMath/btScalar.h>
+#include "LinearMath/btQuickprof.h"
+
+template <class MatrixX>
+class btKrylovSolver
+{
+ typedef btAlignedObjectArray<btVector3> TVStack;
+
+public:
+ int m_maxIterations;
+ btScalar m_tolerance;
+ btKrylovSolver(int maxIterations, btScalar tolerance)
+ : m_maxIterations(maxIterations), m_tolerance(tolerance)
+ {
+ }
+
+ virtual ~btKrylovSolver() {}
+
+ virtual int solve(MatrixX& A, TVStack& x, const TVStack& b, bool verbose = false) = 0;
+
+ virtual void reinitialize(const TVStack& b) = 0;
+
+ virtual SIMD_FORCE_INLINE TVStack sub(const TVStack& a, const TVStack& b)
+ {
+ // c = a-b
+ btAssert(a.size() == b.size());
+ TVStack c;
+ c.resize(a.size());
+ for (int i = 0; i < a.size(); ++i)
+ {
+ c[i] = a[i] - b[i];
+ }
+ return c;
+ }
+
+ virtual SIMD_FORCE_INLINE btScalar squaredNorm(const TVStack& a)
+ {
+ return dot(a, a);
+ }
+
+ virtual SIMD_FORCE_INLINE btScalar norm(const TVStack& a)
+ {
+ btScalar ret = 0;
+ for (int i = 0; i < a.size(); ++i)
+ {
+ for (int d = 0; d < 3; ++d)
+ {
+ ret = btMax(ret, btFabs(a[i][d]));
+ }
+ }
+ return ret;
+ }
+
+ virtual SIMD_FORCE_INLINE btScalar dot(const TVStack& a, const TVStack& b)
+ {
+ btScalar ans(0);
+ for (int i = 0; i < a.size(); ++i)
+ ans += a[i].dot(b[i]);
+ return ans;
+ }
+
+ virtual SIMD_FORCE_INLINE void multAndAddTo(btScalar s, const TVStack& a, TVStack& result)
+ {
+ // result += s*a
+ btAssert(a.size() == result.size());
+ for (int i = 0; i < a.size(); ++i)
+ result[i] += s * a[i];
+ }
+
+ virtual SIMD_FORCE_INLINE TVStack multAndAdd(btScalar s, const TVStack& a, const TVStack& b)
+ {
+ // result = a*s + b
+ TVStack result;
+ result.resize(a.size());
+ for (int i = 0; i < a.size(); ++i)
+ result[i] = s * a[i] + b[i];
+ return result;
+ }
+
+ virtual SIMD_FORCE_INLINE void setTolerance(btScalar tolerance)
+ {
+ m_tolerance = tolerance;
+ }
+};
+#endif /* BT_KRYLOV_SOLVER_H */
diff --git a/thirdparty/bullet/BulletSoftBody/btPreconditioner.h b/thirdparty/bullet/BulletSoftBody/btPreconditioner.h
index c2db448ef8..21c1106a42 100644
--- a/thirdparty/bullet/BulletSoftBody/btPreconditioner.h
+++ b/thirdparty/bullet/BulletSoftBody/btPreconditioner.h
@@ -19,269 +19,266 @@
class Preconditioner
{
public:
- typedef btAlignedObjectArray<btVector3> TVStack;
- virtual void operator()(const TVStack& x, TVStack& b) = 0;
- virtual void reinitialize(bool nodeUpdated) = 0;
- virtual ~Preconditioner(){}
+ typedef btAlignedObjectArray<btVector3> TVStack;
+ virtual void operator()(const TVStack& x, TVStack& b) = 0;
+ virtual void reinitialize(bool nodeUpdated) = 0;
+ virtual ~Preconditioner() {}
};
class DefaultPreconditioner : public Preconditioner
{
public:
- virtual void operator()(const TVStack& x, TVStack& b)
- {
- btAssert(b.size() == x.size());
- for (int i = 0; i < b.size(); ++i)
- b[i] = x[i];
- }
- virtual void reinitialize(bool nodeUpdated)
- {
- }
-
- virtual ~DefaultPreconditioner(){}
+ virtual void operator()(const TVStack& x, TVStack& b)
+ {
+ btAssert(b.size() == x.size());
+ for (int i = 0; i < b.size(); ++i)
+ b[i] = x[i];
+ }
+ virtual void reinitialize(bool nodeUpdated)
+ {
+ }
+
+ virtual ~DefaultPreconditioner() {}
};
class MassPreconditioner : public Preconditioner
{
- btAlignedObjectArray<btScalar> m_inv_mass;
- const btAlignedObjectArray<btSoftBody *>& m_softBodies;
+ btAlignedObjectArray<btScalar> m_inv_mass;
+ const btAlignedObjectArray<btSoftBody*>& m_softBodies;
+
public:
- MassPreconditioner(const btAlignedObjectArray<btSoftBody *>& softBodies)
- : m_softBodies(softBodies)
- {
- }
-
- virtual void reinitialize(bool nodeUpdated)
- {
- if (nodeUpdated)
- {
- m_inv_mass.clear();
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- m_inv_mass.push_back(psb->m_nodes[j].m_im);
- }
- }
- }
-
- virtual void operator()(const TVStack& x, TVStack& b)
- {
- btAssert(b.size() == x.size());
- btAssert(m_inv_mass.size() <= x.size());
- for (int i = 0; i < m_inv_mass.size(); ++i)
- {
- b[i] = x[i] * m_inv_mass[i];
- }
- for (int i = m_inv_mass.size(); i < b.size(); ++i)
- {
- b[i] = x[i];
- }
- }
-};
+ MassPreconditioner(const btAlignedObjectArray<btSoftBody*>& softBodies)
+ : m_softBodies(softBodies)
+ {
+ }
+ virtual void reinitialize(bool nodeUpdated)
+ {
+ if (nodeUpdated)
+ {
+ m_inv_mass.clear();
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ m_inv_mass.push_back(psb->m_nodes[j].m_im);
+ }
+ }
+ }
+
+ virtual void operator()(const TVStack& x, TVStack& b)
+ {
+ btAssert(b.size() == x.size());
+ btAssert(m_inv_mass.size() <= x.size());
+ for (int i = 0; i < m_inv_mass.size(); ++i)
+ {
+ b[i] = x[i] * m_inv_mass[i];
+ }
+ for (int i = m_inv_mass.size(); i < b.size(); ++i)
+ {
+ b[i] = x[i];
+ }
+ }
+};
class KKTPreconditioner : public Preconditioner
{
- const btAlignedObjectArray<btSoftBody *>& m_softBodies;
- const btDeformableContactProjection& m_projections;
- const btAlignedObjectArray<btDeformableLagrangianForce*>& m_lf;
- TVStack m_inv_A, m_inv_S;
- const btScalar& m_dt;
- const bool& m_implicit;
+ const btAlignedObjectArray<btSoftBody*>& m_softBodies;
+ const btDeformableContactProjection& m_projections;
+ const btAlignedObjectArray<btDeformableLagrangianForce*>& m_lf;
+ TVStack m_inv_A, m_inv_S;
+ const btScalar& m_dt;
+ const bool& m_implicit;
+
public:
- KKTPreconditioner(const btAlignedObjectArray<btSoftBody *>& softBodies, const btDeformableContactProjection& projections, const btAlignedObjectArray<btDeformableLagrangianForce*>& lf, const btScalar& dt, const bool& implicit)
- : m_softBodies(softBodies)
- , m_projections(projections)
- , m_lf(lf)
- , m_dt(dt)
- , m_implicit(implicit)
- {
- }
-
- virtual void reinitialize(bool nodeUpdated)
- {
- if (nodeUpdated)
- {
- int num_nodes = 0;
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- num_nodes += psb->m_nodes.size();
- }
- m_inv_A.resize(num_nodes);
- }
- buildDiagonalA(m_inv_A);
- for (int i = 0; i < m_inv_A.size(); ++i)
- {
-// printf("A[%d] = %f, %f, %f \n", i, m_inv_A[i][0], m_inv_A[i][1], m_inv_A[i][2]);
- for (int d = 0; d < 3; ++d)
- {
- m_inv_A[i][d] = (m_inv_A[i][d] == 0) ? 0.0 : 1.0/ m_inv_A[i][d];
- }
- }
- m_inv_S.resize(m_projections.m_lagrangeMultipliers.size());
-// printf("S.size() = %d \n", m_inv_S.size());
- buildDiagonalS(m_inv_A, m_inv_S);
- for (int i = 0; i < m_inv_S.size(); ++i)
- {
-// printf("S[%d] = %f, %f, %f \n", i, m_inv_S[i][0], m_inv_S[i][1], m_inv_S[i][2]);
- for (int d = 0; d < 3; ++d)
- {
- m_inv_S[i][d] = (m_inv_S[i][d] == 0) ? 0.0 : 1.0/ m_inv_S[i][d];
- }
- }
- }
-
- void buildDiagonalA(TVStack& diagA) const
- {
- size_t counter = 0;
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- for (int j = 0; j < psb->m_nodes.size(); ++j)
- {
- const btSoftBody::Node& node = psb->m_nodes[j];
- diagA[counter] = (node.m_im == 0) ? btVector3(0,0,0) : btVector3(1.0/node.m_im, 1.0 / node.m_im, 1.0 / node.m_im);
- ++counter;
- }
- }
- if (m_implicit)
- {
- printf("implicit not implemented\n");
- btAssert(false);
- }
- for (int i = 0; i < m_lf.size(); ++i)
- {
- // add damping matrix
- m_lf[i]->buildDampingForceDifferentialDiagonal(-m_dt, diagA);
- }
- }
-
- void buildDiagonalS(const TVStack& inv_A, TVStack& diagS)
- {
- for (int c = 0; c < m_projections.m_lagrangeMultipliers.size(); ++c)
- {
- // S[k,k] = e_k^T * C A_d^-1 C^T * e_k
- const LagrangeMultiplier& lm = m_projections.m_lagrangeMultipliers[c];
- btVector3& t = diagS[c];
- t.setZero();
- for (int j = 0; j < lm.m_num_constraints; ++j)
- {
- for (int i = 0; i < lm.m_num_nodes; ++i)
- {
- for (int d = 0; d < 3; ++d)
- {
- t[j] += inv_A[lm.m_indices[i]][d] * lm.m_dirs[j][d] * lm.m_dirs[j][d] * lm.m_weights[i] * lm.m_weights[i];
- }
- }
- }
- }
- }
-#define USE_FULL_PRECONDITIONER
+ KKTPreconditioner(const btAlignedObjectArray<btSoftBody*>& softBodies, const btDeformableContactProjection& projections, const btAlignedObjectArray<btDeformableLagrangianForce*>& lf, const btScalar& dt, const bool& implicit)
+ : m_softBodies(softBodies), m_projections(projections), m_lf(lf), m_dt(dt), m_implicit(implicit)
+ {
+ }
+
+ virtual void reinitialize(bool nodeUpdated)
+ {
+ if (nodeUpdated)
+ {
+ int num_nodes = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ num_nodes += psb->m_nodes.size();
+ }
+ m_inv_A.resize(num_nodes);
+ }
+ buildDiagonalA(m_inv_A);
+ for (int i = 0; i < m_inv_A.size(); ++i)
+ {
+ // printf("A[%d] = %f, %f, %f \n", i, m_inv_A[i][0], m_inv_A[i][1], m_inv_A[i][2]);
+ for (int d = 0; d < 3; ++d)
+ {
+ m_inv_A[i][d] = (m_inv_A[i][d] == 0) ? 0.0 : 1.0 / m_inv_A[i][d];
+ }
+ }
+ m_inv_S.resize(m_projections.m_lagrangeMultipliers.size());
+ // printf("S.size() = %d \n", m_inv_S.size());
+ buildDiagonalS(m_inv_A, m_inv_S);
+ for (int i = 0; i < m_inv_S.size(); ++i)
+ {
+ // printf("S[%d] = %f, %f, %f \n", i, m_inv_S[i][0], m_inv_S[i][1], m_inv_S[i][2]);
+ for (int d = 0; d < 3; ++d)
+ {
+ m_inv_S[i][d] = (m_inv_S[i][d] == 0) ? 0.0 : 1.0 / m_inv_S[i][d];
+ }
+ }
+ }
+
+ void buildDiagonalA(TVStack& diagA) const
+ {
+ size_t counter = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ const btSoftBody::Node& node = psb->m_nodes[j];
+ diagA[counter] = (node.m_im == 0) ? btVector3(0, 0, 0) : btVector3(1.0 / node.m_im, 1.0 / node.m_im, 1.0 / node.m_im);
+ ++counter;
+ }
+ }
+ if (m_implicit)
+ {
+ printf("implicit not implemented\n");
+ btAssert(false);
+ }
+ for (int i = 0; i < m_lf.size(); ++i)
+ {
+ // add damping matrix
+ m_lf[i]->buildDampingForceDifferentialDiagonal(-m_dt, diagA);
+ }
+ }
+
+ void buildDiagonalS(const TVStack& inv_A, TVStack& diagS)
+ {
+ for (int c = 0; c < m_projections.m_lagrangeMultipliers.size(); ++c)
+ {
+ // S[k,k] = e_k^T * C A_d^-1 C^T * e_k
+ const LagrangeMultiplier& lm = m_projections.m_lagrangeMultipliers[c];
+ btVector3& t = diagS[c];
+ t.setZero();
+ for (int j = 0; j < lm.m_num_constraints; ++j)
+ {
+ for (int i = 0; i < lm.m_num_nodes; ++i)
+ {
+ for (int d = 0; d < 3; ++d)
+ {
+ t[j] += inv_A[lm.m_indices[i]][d] * lm.m_dirs[j][d] * lm.m_dirs[j][d] * lm.m_weights[i] * lm.m_weights[i];
+ }
+ }
+ }
+ }
+ }
+//#define USE_FULL_PRECONDITIONER
#ifndef USE_FULL_PRECONDITIONER
- virtual void operator()(const TVStack& x, TVStack& b)
- {
- btAssert(b.size() == x.size());
- for (int i = 0; i < m_inv_A.size(); ++i)
- {
- b[i] = x[i] * m_inv_A[i];
- }
- int offset = m_inv_A.size();
- for (int i = 0; i < m_inv_S.size(); ++i)
- {
- b[i+offset] = x[i+offset] * m_inv_S[i];
- }
- }
+ virtual void operator()(const TVStack& x, TVStack& b)
+ {
+ btAssert(b.size() == x.size());
+ for (int i = 0; i < m_inv_A.size(); ++i)
+ {
+ b[i] = x[i] * m_inv_A[i];
+ }
+ int offset = m_inv_A.size();
+ for (int i = 0; i < m_inv_S.size(); ++i)
+ {
+ b[i + offset] = x[i + offset] * m_inv_S[i];
+ }
+ }
#else
- virtual void operator()(const TVStack& x, TVStack& b)
- {
- btAssert(b.size() == x.size());
- int offset = m_inv_A.size();
+ virtual void operator()(const TVStack& x, TVStack& b)
+ {
+ btAssert(b.size() == x.size());
+ int offset = m_inv_A.size();
- for (int i = 0; i < m_inv_A.size(); ++i)
- {
- b[i] = x[i] * m_inv_A[i];
- }
+ for (int i = 0; i < m_inv_A.size(); ++i)
+ {
+ b[i] = x[i] * m_inv_A[i];
+ }
- for (int i = 0; i < m_inv_S.size(); ++i)
- {
- b[i+offset].setZero();
- }
+ for (int i = 0; i < m_inv_S.size(); ++i)
+ {
+ b[i + offset].setZero();
+ }
- for (int c = 0; c < m_projections.m_lagrangeMultipliers.size(); ++c)
- {
- const LagrangeMultiplier& lm = m_projections.m_lagrangeMultipliers[c];
- // C * x
- for (int d = 0; d < lm.m_num_constraints; ++d)
- {
- for (int i = 0; i < lm.m_num_nodes; ++i)
- {
- b[offset+c][d] += lm.m_weights[i] * b[lm.m_indices[i]].dot(lm.m_dirs[d]);
- }
- }
- }
+ for (int c = 0; c < m_projections.m_lagrangeMultipliers.size(); ++c)
+ {
+ const LagrangeMultiplier& lm = m_projections.m_lagrangeMultipliers[c];
+ // C * x
+ for (int d = 0; d < lm.m_num_constraints; ++d)
+ {
+ for (int i = 0; i < lm.m_num_nodes; ++i)
+ {
+ b[offset + c][d] += lm.m_weights[i] * b[lm.m_indices[i]].dot(lm.m_dirs[d]);
+ }
+ }
+ }
- for (int i = 0; i < m_inv_S.size(); ++i)
- {
- b[i+offset] = b[i+offset] * m_inv_S[i];
- }
+ for (int i = 0; i < m_inv_S.size(); ++i)
+ {
+ b[i + offset] = b[i + offset] * m_inv_S[i];
+ }
- for (int i = 0; i < m_inv_A.size(); ++i)
- {
- b[i].setZero();
- }
+ for (int i = 0; i < m_inv_A.size(); ++i)
+ {
+ b[i].setZero();
+ }
- for (int c = 0; c < m_projections.m_lagrangeMultipliers.size(); ++c)
- {
- // C^T * lambda
- const LagrangeMultiplier& lm = m_projections.m_lagrangeMultipliers[c];
- for (int i = 0; i < lm.m_num_nodes; ++i)
- {
- for (int j = 0; j < lm.m_num_constraints; ++j)
- {
- b[lm.m_indices[i]] += b[offset+c][j] * lm.m_weights[i] * lm.m_dirs[j];
- }
- }
- }
+ for (int c = 0; c < m_projections.m_lagrangeMultipliers.size(); ++c)
+ {
+ // C^T * lambda
+ const LagrangeMultiplier& lm = m_projections.m_lagrangeMultipliers[c];
+ for (int i = 0; i < lm.m_num_nodes; ++i)
+ {
+ for (int j = 0; j < lm.m_num_constraints; ++j)
+ {
+ b[lm.m_indices[i]] += b[offset + c][j] * lm.m_weights[i] * lm.m_dirs[j];
+ }
+ }
+ }
- for (int i = 0; i < m_inv_A.size(); ++i)
- {
- b[i] = (x[i] - b[i]) * m_inv_A[i];
- }
+ for (int i = 0; i < m_inv_A.size(); ++i)
+ {
+ b[i] = (x[i] - b[i]) * m_inv_A[i];
+ }
- TVStack t;
- t.resize(b.size());
- for (int i = 0; i < m_inv_S.size(); ++i)
- {
- t[i+offset] = x[i+offset] * m_inv_S[i];
- }
- for (int i = 0; i < m_inv_A.size(); ++i)
- {
- t[i].setZero();
- }
- for (int c = 0; c < m_projections.m_lagrangeMultipliers.size(); ++c)
- {
- // C^T * lambda
- const LagrangeMultiplier& lm = m_projections.m_lagrangeMultipliers[c];
- for (int i = 0; i < lm.m_num_nodes; ++i)
- {
- for (int j = 0; j < lm.m_num_constraints; ++j)
- {
- t[lm.m_indices[i]] += t[offset+c][j] * lm.m_weights[i] * lm.m_dirs[j];
- }
- }
- }
- for (int i = 0; i < m_inv_A.size(); ++i)
- {
- b[i] += t[i] * m_inv_A[i];
- }
+ TVStack t;
+ t.resize(b.size());
+ for (int i = 0; i < m_inv_S.size(); ++i)
+ {
+ t[i + offset] = x[i + offset] * m_inv_S[i];
+ }
+ for (int i = 0; i < m_inv_A.size(); ++i)
+ {
+ t[i].setZero();
+ }
+ for (int c = 0; c < m_projections.m_lagrangeMultipliers.size(); ++c)
+ {
+ // C^T * lambda
+ const LagrangeMultiplier& lm = m_projections.m_lagrangeMultipliers[c];
+ for (int i = 0; i < lm.m_num_nodes; ++i)
+ {
+ for (int j = 0; j < lm.m_num_constraints; ++j)
+ {
+ t[lm.m_indices[i]] += t[offset + c][j] * lm.m_weights[i] * lm.m_dirs[j];
+ }
+ }
+ }
+ for (int i = 0; i < m_inv_A.size(); ++i)
+ {
+ b[i] += t[i] * m_inv_A[i];
+ }
- for (int i = 0; i < m_inv_S.size(); ++i)
- {
- b[i+offset] -= x[i+offset] * m_inv_S[i];
- }
- }
+ for (int i = 0; i < m_inv_S.size(); ++i)
+ {
+ b[i + offset] -= x[i + offset] * m_inv_S[i];
+ }
+ }
#endif
};
diff --git a/thirdparty/bullet/BulletSoftBody/btSoftBody.cpp b/thirdparty/bullet/BulletSoftBody/btSoftBody.cpp
index 81b846d7f8..d1980ea6c5 100644
--- a/thirdparty/bullet/BulletSoftBody/btSoftBody.cpp
+++ b/thirdparty/bullet/BulletSoftBody/btSoftBody.cpp
@@ -37,12 +37,12 @@ static inline btDbvtNode* buildTreeBottomUp(btAlignedObjectArray<btDbvtNode*>& l
{
btAlignedObjectArray<bool> marked;
btAlignedObjectArray<btDbvtNode*> newLeafNodes;
- btAlignedObjectArray<std::pair<int,int> > childIds;
+ btAlignedObjectArray<std::pair<int, int> > childIds;
btAlignedObjectArray<btAlignedObjectArray<int> > newAdj;
marked.resize(N);
for (int i = 0; i < N; ++i)
marked[i] = false;
-
+
// pair adjacent nodes into new(parent) node
for (int i = 0; i < N; ++i)
{
@@ -61,7 +61,7 @@ static inline btDbvtNode* buildTreeBottomUp(btAlignedObjectArray<btDbvtNode*>& l
leafNodes[i]->parent = node;
leafNodes[n]->parent = node;
newLeafNodes.push_back(node);
- childIds.push_back(std::make_pair(i,n));
+ childIds.push_back(std::make_pair(i, n));
merged = true;
marked[n] = true;
break;
@@ -70,7 +70,7 @@ static inline btDbvtNode* buildTreeBottomUp(btAlignedObjectArray<btDbvtNode*>& l
if (!merged)
{
newLeafNodes.push_back(leafNodes[i]);
- childIds.push_back(std::make_pair(i,-1));
+ childIds.push_back(std::make_pair(i, -1));
}
marked[i] = true;
}
@@ -78,7 +78,7 @@ static inline btDbvtNode* buildTreeBottomUp(btAlignedObjectArray<btDbvtNode*>& l
newAdj.resize(newLeafNodes.size());
for (int i = 0; i < newLeafNodes.size(); ++i)
{
- for (int j = i+1; j < newLeafNodes.size(); ++j)
+ for (int j = i + 1; j < newLeafNodes.size(); ++j)
{
bool neighbor = false;
const btAlignedObjectArray<int>& leftChildNeighbors = adj[childIds[i].first];
@@ -143,7 +143,7 @@ btSoftBody::btSoftBody(btSoftBodyWorldInfo* worldInfo, int node_count, const btV
/* Nodes */
const btScalar margin = getCollisionShape()->getMargin();
m_nodes.resize(node_count);
- m_X.resize(node_count);
+ m_X.resize(node_count);
for (int i = 0, ni = node_count; i < ni; ++i)
{
Node& n = m_nodes[i];
@@ -154,7 +154,7 @@ btSoftBody::btSoftBody(btSoftBodyWorldInfo* worldInfo, int node_count, const btV
n.m_im = n.m_im > 0 ? 1 / n.m_im : 0;
n.m_leaf = m_ndbvt.insert(btDbvtVolume::FromCR(n.m_x, margin), &n);
n.m_material = pm;
- m_X[i] = n.m_x;
+ m_X[i] = n.m_x;
}
updateBounds();
setCollisionQuadrature(3);
@@ -195,8 +195,8 @@ void btSoftBody::initDefaults()
m_cfg.piterations = 1;
m_cfg.diterations = 0;
m_cfg.citerations = 4;
- m_cfg.drag = 0;
- m_cfg.m_maxStress = 0;
+ m_cfg.drag = 0;
+ m_cfg.m_maxStress = 0;
m_cfg.collisions = fCollision::Default;
m_pose.m_bvolume = false;
m_pose.m_bframe = false;
@@ -222,12 +222,14 @@ void btSoftBody::initDefaults()
m_windVelocity = btVector3(0, 0, 0);
m_restLengthScale = btScalar(1.0);
m_dampingCoefficient = 1.0;
- m_sleepingThreshold = .4;
+ m_sleepingThreshold = .04;
m_useSelfCollision = false;
m_collisionFlags = 0;
m_softSoftCollision = false;
m_maxSpeedSquared = 0;
m_repulsionStiffness = 0.5;
+ m_gravityFactor = 1;
+ m_cacheBarycenter = false;
m_fdbvnt = 0;
}
@@ -436,7 +438,7 @@ void btSoftBody::appendFace(int model, Material* mat)
ZeroInitialize(f);
f.m_material = mat ? mat : m_materials[0];
}
- m_faces.push_back(f);
+ m_faces.push_back(f);
}
//
@@ -525,94 +527,111 @@ void btSoftBody::appendAnchor(int node, btRigidBody* body, const btVector3& loca
//
void btSoftBody::appendDeformableAnchor(int node, btRigidBody* body)
{
- DeformableNodeRigidAnchor c;
- btSoftBody::Node& n = m_nodes[node];
- const btScalar ima = n.m_im;
- const btScalar imb = body->getInvMass();
- btVector3 nrm;
- const btCollisionShape* shp = body->getCollisionShape();
- const btTransform& wtr = body->getWorldTransform();
- btScalar dst =
- m_worldInfo->m_sparsesdf.Evaluate(
- wtr.invXform(m_nodes[node].m_x),
- shp,
- nrm,
- 0);
-
- c.m_cti.m_colObj = body;
- c.m_cti.m_normal = wtr.getBasis() * nrm;
- c.m_cti.m_offset = dst;
- c.m_node = &m_nodes[node];
- const btScalar fc = m_cfg.kDF * body->getFriction();
- c.m_c2 = ima;
- c.m_c3 = fc;
- c.m_c4 = body->isStaticOrKinematicObject() ? m_cfg.kKHR : m_cfg.kCHR;
- static const btMatrix3x3 iwiStatic(0, 0, 0, 0, 0, 0, 0, 0, 0);
- const btMatrix3x3& iwi = body->getInvInertiaTensorWorld();
- const btVector3 ra = n.m_x - wtr.getOrigin();
-
- c.m_c0 = ImpulseMatrix(1, ima, imb, iwi, ra);
- c.m_c1 = ra;
- c.m_local = body->getWorldTransform().inverse() * m_nodes[node].m_x;
- c.m_node->m_battach = 1;
- m_deformableAnchors.push_back(c);
+ DeformableNodeRigidAnchor c;
+ btSoftBody::Node& n = m_nodes[node];
+ const btScalar ima = n.m_im;
+ const btScalar imb = body->getInvMass();
+ btVector3 nrm;
+ const btCollisionShape* shp = body->getCollisionShape();
+ const btTransform& wtr = body->getWorldTransform();
+ btScalar dst =
+ m_worldInfo->m_sparsesdf.Evaluate(
+ wtr.invXform(m_nodes[node].m_x),
+ shp,
+ nrm,
+ 0);
+
+ c.m_cti.m_colObj = body;
+ c.m_cti.m_normal = wtr.getBasis() * nrm;
+ c.m_cti.m_offset = dst;
+ c.m_node = &m_nodes[node];
+ const btScalar fc = m_cfg.kDF * body->getFriction();
+ c.m_c2 = ima;
+ c.m_c3 = fc;
+ c.m_c4 = body->isStaticOrKinematicObject() ? m_cfg.kKHR : m_cfg.kCHR;
+ static const btMatrix3x3 iwiStatic(0, 0, 0, 0, 0, 0, 0, 0, 0);
+ const btMatrix3x3& iwi = body->getInvInertiaTensorWorld();
+ const btVector3 ra = n.m_x - wtr.getOrigin();
+
+ c.m_c0 = ImpulseMatrix(1, ima, imb, iwi, ra);
+ c.m_c1 = ra;
+ c.m_local = body->getWorldTransform().inverse() * m_nodes[node].m_x;
+ c.m_node->m_battach = 1;
+ m_deformableAnchors.push_back(c);
+}
+
+void btSoftBody::removeAnchor(int node)
+{
+ const btSoftBody::Node& n = m_nodes[node];
+ for (int i = 0; i < m_deformableAnchors.size();)
+ {
+ const DeformableNodeRigidAnchor& c = m_deformableAnchors[i];
+ if (c.m_node == &n)
+ {
+ m_deformableAnchors.removeAtIndex(i);
+ }
+ else
+ {
+ i++;
+ }
+ }
}
//
void btSoftBody::appendDeformableAnchor(int node, btMultiBodyLinkCollider* link)
{
- DeformableNodeRigidAnchor c;
- btSoftBody::Node& n = m_nodes[node];
- const btScalar ima = n.m_im;
- btVector3 nrm;
- const btCollisionShape* shp = link->getCollisionShape();
- const btTransform& wtr = link->getWorldTransform();
- btScalar dst =
- m_worldInfo->m_sparsesdf.Evaluate(
- wtr.invXform(m_nodes[node].m_x),
- shp,
- nrm,
- 0);
- c.m_cti.m_colObj = link;
- c.m_cti.m_normal = wtr.getBasis() * nrm;
- c.m_cti.m_offset = dst;
- c.m_node = &m_nodes[node];
- const btScalar fc = m_cfg.kDF * link->getFriction();
- c.m_c2 = ima;
- c.m_c3 = fc;
- c.m_c4 = link->isStaticOrKinematicObject() ? m_cfg.kKHR : m_cfg.kCHR;
- btVector3 normal = c.m_cti.m_normal;
- btVector3 t1 = generateUnitOrthogonalVector(normal);
- btVector3 t2 = btCross(normal, t1);
- btMultiBodyJacobianData jacobianData_normal, jacobianData_t1, jacobianData_t2;
- findJacobian(link, jacobianData_normal, c.m_node->m_x, normal);
- findJacobian(link, jacobianData_t1, c.m_node->m_x, t1);
- findJacobian(link, jacobianData_t2, c.m_node->m_x, t2);
-
- btScalar* J_n = &jacobianData_normal.m_jacobians[0];
- btScalar* J_t1 = &jacobianData_t1.m_jacobians[0];
- btScalar* J_t2 = &jacobianData_t2.m_jacobians[0];
-
- btScalar* u_n = &jacobianData_normal.m_deltaVelocitiesUnitImpulse[0];
- btScalar* u_t1 = &jacobianData_t1.m_deltaVelocitiesUnitImpulse[0];
- btScalar* u_t2 = &jacobianData_t2.m_deltaVelocitiesUnitImpulse[0];
-
- btMatrix3x3 rot(normal.getX(), normal.getY(), normal.getZ(),
- t1.getX(), t1.getY(), t1.getZ(),
- t2.getX(), t2.getY(), t2.getZ()); // world frame to local frame
- const int ndof = link->m_multiBody->getNumDofs() + 6;
- btMatrix3x3 local_impulse_matrix = (Diagonal(n.m_im) + OuterProduct(J_n, J_t1, J_t2, u_n, u_t1, u_t2, ndof)).inverse();
- c.m_c0 = rot.transpose() * local_impulse_matrix * rot;
- c.jacobianData_normal = jacobianData_normal;
- c.jacobianData_t1 = jacobianData_t1;
- c.jacobianData_t2 = jacobianData_t2;
- c.t1 = t1;
- c.t2 = t2;
- const btVector3 ra = n.m_x - wtr.getOrigin();
- c.m_c1 = ra;
- c.m_local = link->getWorldTransform().inverse() * m_nodes[node].m_x;
- c.m_node->m_battach = 1;
- m_deformableAnchors.push_back(c);
+ DeformableNodeRigidAnchor c;
+ btSoftBody::Node& n = m_nodes[node];
+ const btScalar ima = n.m_im;
+ btVector3 nrm;
+ const btCollisionShape* shp = link->getCollisionShape();
+ const btTransform& wtr = link->getWorldTransform();
+ btScalar dst =
+ m_worldInfo->m_sparsesdf.Evaluate(
+ wtr.invXform(m_nodes[node].m_x),
+ shp,
+ nrm,
+ 0);
+ c.m_cti.m_colObj = link;
+ c.m_cti.m_normal = wtr.getBasis() * nrm;
+ c.m_cti.m_offset = dst;
+ c.m_node = &m_nodes[node];
+ const btScalar fc = m_cfg.kDF * link->getFriction();
+ c.m_c2 = ima;
+ c.m_c3 = fc;
+ c.m_c4 = link->isStaticOrKinematicObject() ? m_cfg.kKHR : m_cfg.kCHR;
+ btVector3 normal = c.m_cti.m_normal;
+ btVector3 t1 = generateUnitOrthogonalVector(normal);
+ btVector3 t2 = btCross(normal, t1);
+ btMultiBodyJacobianData jacobianData_normal, jacobianData_t1, jacobianData_t2;
+ findJacobian(link, jacobianData_normal, c.m_node->m_x, normal);
+ findJacobian(link, jacobianData_t1, c.m_node->m_x, t1);
+ findJacobian(link, jacobianData_t2, c.m_node->m_x, t2);
+
+ btScalar* J_n = &jacobianData_normal.m_jacobians[0];
+ btScalar* J_t1 = &jacobianData_t1.m_jacobians[0];
+ btScalar* J_t2 = &jacobianData_t2.m_jacobians[0];
+
+ btScalar* u_n = &jacobianData_normal.m_deltaVelocitiesUnitImpulse[0];
+ btScalar* u_t1 = &jacobianData_t1.m_deltaVelocitiesUnitImpulse[0];
+ btScalar* u_t2 = &jacobianData_t2.m_deltaVelocitiesUnitImpulse[0];
+
+ btMatrix3x3 rot(normal.getX(), normal.getY(), normal.getZ(),
+ t1.getX(), t1.getY(), t1.getZ(),
+ t2.getX(), t2.getY(), t2.getZ()); // world frame to local frame
+ const int ndof = link->m_multiBody->getNumDofs() + 6;
+ btMatrix3x3 local_impulse_matrix = (Diagonal(n.m_im) + OuterProduct(J_n, J_t1, J_t2, u_n, u_t1, u_t2, ndof)).inverse();
+ c.m_c0 = rot.transpose() * local_impulse_matrix * rot;
+ c.jacobianData_normal = jacobianData_normal;
+ c.jacobianData_t1 = jacobianData_t1;
+ c.jacobianData_t2 = jacobianData_t2;
+ c.t1 = t1;
+ c.t2 = t2;
+ const btVector3 ra = n.m_x - wtr.getOrigin();
+ c.m_c1 = ra;
+ c.m_local = link->getWorldTransform().inverse() * m_nodes[node].m_x;
+ c.m_node->m_battach = 1;
+ m_deformableAnchors.push_back(c);
}
//
void btSoftBody::appendLinearJoint(const LJoint::Specs& specs, Cluster* body0, Body body1)
@@ -731,7 +750,7 @@ void btSoftBody::addAeroForceToNode(const btVector3& windVelocity, int nodeIndex
fDrag = 0.5f * kDG * medium.m_density * rel_v2 * tri_area * n_dot_v * (-rel_v_nrm);
// Check angle of attack
- // cos(10º) = 0.98480
+ // cos(10º) = 0.98480
if (0 < n_dot_v && n_dot_v < 0.98480f)
fLift = 0.5f * kLF * medium.m_density * rel_v_len * tri_area * btSqrt(1.0f - n_dot_v * n_dot_v) * (nrm.cross(rel_v_nrm).cross(rel_v_nrm));
@@ -817,7 +836,7 @@ void btSoftBody::addAeroForceToFace(const btVector3& windVelocity, int faceIndex
fDrag = 0.5f * kDG * medium.m_density * rel_v2 * tri_area * n_dot_v * (-rel_v_nrm);
// Check angle of attack
- // cos(10º) = 0.98480
+ // cos(10º) = 0.98480
if (0 < n_dot_v && n_dot_v < 0.98480f)
fLift = 0.5f * kLF * medium.m_density * rel_v_len * tri_area * btSqrt(1.0f - n_dot_v * n_dot_v) * (nrm.cross(rel_v_nrm).cross(rel_v_nrm));
@@ -882,6 +901,7 @@ void btSoftBody::setVelocity(const btVector3& velocity)
if (n.m_im > 0)
{
n.m_v = velocity;
+ n.m_vn = velocity;
}
}
}
@@ -1010,66 +1030,70 @@ void btSoftBody::setVolumeDensity(btScalar density)
//
btVector3 btSoftBody::getLinearVelocity()
{
- btVector3 total_momentum = btVector3(0,0,0);
- for (int i = 0; i < m_nodes.size(); ++i)
- {
- btScalar mass = m_nodes[i].m_im == 0 ? 0 : 1.0/m_nodes[i].m_im;
- total_momentum += mass * m_nodes[i].m_v;
- }
- btScalar total_mass = getTotalMass();
- return total_mass == 0 ? total_momentum : total_momentum / total_mass;
+ btVector3 total_momentum = btVector3(0, 0, 0);
+ for (int i = 0; i < m_nodes.size(); ++i)
+ {
+ btScalar mass = m_nodes[i].m_im == 0 ? 0 : 1.0 / m_nodes[i].m_im;
+ total_momentum += mass * m_nodes[i].m_v;
+ }
+ btScalar total_mass = getTotalMass();
+ return total_mass == 0 ? total_momentum : total_momentum / total_mass;
}
//
void btSoftBody::setLinearVelocity(const btVector3& linVel)
{
- btVector3 old_vel = getLinearVelocity();
- btVector3 diff = linVel - old_vel;
- for (int i = 0; i < m_nodes.size(); ++i)
- m_nodes[i].m_v += diff;
+ btVector3 old_vel = getLinearVelocity();
+ btVector3 diff = linVel - old_vel;
+ for (int i = 0; i < m_nodes.size(); ++i)
+ m_nodes[i].m_v += diff;
}
//
void btSoftBody::setAngularVelocity(const btVector3& angVel)
{
- btVector3 old_vel = getLinearVelocity();
- btVector3 com = getCenterOfMass();
- for (int i = 0; i < m_nodes.size(); ++i)
- {
- m_nodes[i].m_v = angVel.cross(m_nodes[i].m_x - com) + old_vel;
- }
+ btVector3 old_vel = getLinearVelocity();
+ btVector3 com = getCenterOfMass();
+ for (int i = 0; i < m_nodes.size(); ++i)
+ {
+ m_nodes[i].m_v = angVel.cross(m_nodes[i].m_x - com) + old_vel;
+ }
}
//
btTransform btSoftBody::getRigidTransform()
{
- btVector3 t = getCenterOfMass();
- btMatrix3x3 S;
- S.setZero();
- // get rotation that minimizes L2 difference: \sum_i || RX_i + t - x_i ||
- for (int i = 0; i < m_nodes.size(); ++i)
- {
- S += OuterProduct(m_X[i], t-m_nodes[i].m_x);
- }
- btVector3 sigma;
- btMatrix3x3 U,V;
- singularValueDecomposition(S,U,sigma,V);
- btMatrix3x3 R = V * U.transpose();
- btTransform trs;
- trs.setIdentity();
- trs.setOrigin(t);
- trs.setBasis(R);
- return trs;
+ btVector3 t = getCenterOfMass();
+ btMatrix3x3 S;
+ S.setZero();
+ // Get rotation that minimizes L2 difference: \sum_i || RX_i + t - x_i ||
+ // It's important to make sure that S has the correct signs.
+ // SVD is only unique up to the ordering of singular values.
+ // SVD will manipulate U and V to ensure the ordering of singular values. If all three singular
+ // vaues are negative, SVD will permute colums of U to make two of them positive.
+ for (int i = 0; i < m_nodes.size(); ++i)
+ {
+ S -= OuterProduct(m_X[i], t - m_nodes[i].m_x);
+ }
+ btVector3 sigma;
+ btMatrix3x3 U, V;
+ singularValueDecomposition(S, U, sigma, V);
+ btMatrix3x3 R = V * U.transpose();
+ btTransform trs;
+ trs.setIdentity();
+ trs.setOrigin(t);
+ trs.setBasis(R);
+ return trs;
}
//
void btSoftBody::transformTo(const btTransform& trs)
{
- // get the current best rigid fit
- btTransform current_transform = getRigidTransform();
- // apply transform in material space
- btTransform new_transform = trs * current_transform.inverse();
- transform(new_transform);
+ // get the current best rigid fit
+ btTransform current_transform = getRigidTransform();
+ // apply transform in material space
+ btTransform new_transform = trs * current_transform.inverse();
+ transform(new_transform);
}
//
@@ -1130,7 +1154,7 @@ void btSoftBody::scale(const btVector3& scl)
updateNormals();
updateBounds();
updateConstants();
- initializeDmInverse();
+ initializeDmInverse();
}
//
@@ -2010,22 +2034,22 @@ bool btSoftBody::rayTest(const btVector3& rayFrom,
}
bool btSoftBody::rayFaceTest(const btVector3& rayFrom,
- const btVector3& rayTo,
- sRayCast& results)
+ const btVector3& rayTo,
+ sRayCast& results)
{
if (m_faces.size() == 0)
return false;
else
{
- if (m_fdbvt.empty())
- initializeFaceTree();
+ if (m_fdbvt.empty())
+ initializeFaceTree();
}
-
- results.body = this;
- results.fraction = 1.f;
- results.index = -1;
-
- return (rayFaceTest(rayFrom, rayTo, results.fraction, results.index) != 0);
+
+ results.body = this;
+ results.fraction = 1.f;
+ results.index = -1;
+
+ return (rayFaceTest(rayFrom, rayTo, results.fraction, results.index) != 0);
}
//
@@ -2056,112 +2080,111 @@ void btSoftBody::setSolver(eSolverPresets::_ preset)
void btSoftBody::predictMotion(btScalar dt)
{
- int i, ni;
-
- /* Update */
- if (m_bUpdateRtCst)
- {
- m_bUpdateRtCst = false;
- updateConstants();
- m_fdbvt.clear();
- if (m_cfg.collisions & fCollision::VF_SS)
- {
- initializeFaceTree();
- }
- }
-
- /* Prepare */
- m_sst.sdt = dt * m_cfg.timescale;
- m_sst.isdt = 1 / m_sst.sdt;
- m_sst.velmrg = m_sst.sdt * 3;
- m_sst.radmrg = getCollisionShape()->getMargin();
- m_sst.updmrg = m_sst.radmrg * (btScalar)0.25;
- /* Forces */
- addVelocity(m_worldInfo->m_gravity * m_sst.sdt);
- applyForces();
- /* Integrate */
- for (i = 0, ni = m_nodes.size(); i < ni; ++i)
- {
- Node& n = m_nodes[i];
- n.m_q = n.m_x;
- btVector3 deltaV = n.m_f * n.m_im * m_sst.sdt;
- {
- btScalar maxDisplacement = m_worldInfo->m_maxDisplacement;
- btScalar clampDeltaV = maxDisplacement / m_sst.sdt;
- for (int c = 0; c < 3; c++)
- {
- if (deltaV[c] > clampDeltaV)
- {
- deltaV[c] = clampDeltaV;
- }
- if (deltaV[c] < -clampDeltaV)
- {
- deltaV[c] = -clampDeltaV;
- }
- }
- }
- n.m_v += deltaV;
- n.m_x += n.m_v * m_sst.sdt;
- n.m_f = btVector3(0, 0, 0);
- }
- /* Clusters */
- updateClusters();
- /* Bounds */
- updateBounds();
- /* Nodes */
- ATTRIBUTE_ALIGNED16(btDbvtVolume)
- vol;
- for (i = 0, ni = m_nodes.size(); i < ni; ++i)
- {
- Node& n = m_nodes[i];
- vol = btDbvtVolume::FromCR(n.m_x, m_sst.radmrg);
- m_ndbvt.update(n.m_leaf,
- vol,
- n.m_v * m_sst.velmrg,
- m_sst.updmrg);
- }
- /* Faces */
- if (!m_fdbvt.empty())
- {
- for (int i = 0; i < m_faces.size(); ++i)
- {
- Face& f = m_faces[i];
- const btVector3 v = (f.m_n[0]->m_v +
- f.m_n[1]->m_v +
- f.m_n[2]->m_v) /
- 3;
- vol = VolumeOf(f, m_sst.radmrg);
- m_fdbvt.update(f.m_leaf,
- vol,
- v * m_sst.velmrg,
- m_sst.updmrg);
- }
- }
- /* Pose */
- updatePose();
- /* Match */
- if (m_pose.m_bframe && (m_cfg.kMT > 0))
- {
- const btMatrix3x3 posetrs = m_pose.m_rot;
- for (int i = 0, ni = m_nodes.size(); i < ni; ++i)
- {
- Node& n = m_nodes[i];
- if (n.m_im > 0)
- {
- const btVector3 x = posetrs * m_pose.m_pos[i] + m_pose.m_com;
- n.m_x = Lerp(n.m_x, x, m_cfg.kMT);
- }
- }
- }
- /* Clear contacts */
- m_rcontacts.resize(0);
- m_scontacts.resize(0);
- /* Optimize dbvt's */
- m_ndbvt.optimizeIncremental(1);
- m_fdbvt.optimizeIncremental(1);
- m_cdbvt.optimizeIncremental(1);
-}
+ int i, ni;
+ /* Update */
+ if (m_bUpdateRtCst)
+ {
+ m_bUpdateRtCst = false;
+ updateConstants();
+ m_fdbvt.clear();
+ if (m_cfg.collisions & fCollision::VF_SS)
+ {
+ initializeFaceTree();
+ }
+ }
+
+ /* Prepare */
+ m_sst.sdt = dt * m_cfg.timescale;
+ m_sst.isdt = 1 / m_sst.sdt;
+ m_sst.velmrg = m_sst.sdt * 3;
+ m_sst.radmrg = getCollisionShape()->getMargin();
+ m_sst.updmrg = m_sst.radmrg * (btScalar)0.25;
+ /* Forces */
+ addVelocity(m_worldInfo->m_gravity * m_sst.sdt);
+ applyForces();
+ /* Integrate */
+ for (i = 0, ni = m_nodes.size(); i < ni; ++i)
+ {
+ Node& n = m_nodes[i];
+ n.m_q = n.m_x;
+ btVector3 deltaV = n.m_f * n.m_im * m_sst.sdt;
+ {
+ btScalar maxDisplacement = m_worldInfo->m_maxDisplacement;
+ btScalar clampDeltaV = maxDisplacement / m_sst.sdt;
+ for (int c = 0; c < 3; c++)
+ {
+ if (deltaV[c] > clampDeltaV)
+ {
+ deltaV[c] = clampDeltaV;
+ }
+ if (deltaV[c] < -clampDeltaV)
+ {
+ deltaV[c] = -clampDeltaV;
+ }
+ }
+ }
+ n.m_v += deltaV;
+ n.m_x += n.m_v * m_sst.sdt;
+ n.m_f = btVector3(0, 0, 0);
+ }
+ /* Clusters */
+ updateClusters();
+ /* Bounds */
+ updateBounds();
+ /* Nodes */
+ ATTRIBUTE_ALIGNED16(btDbvtVolume)
+ vol;
+ for (i = 0, ni = m_nodes.size(); i < ni; ++i)
+ {
+ Node& n = m_nodes[i];
+ vol = btDbvtVolume::FromCR(n.m_x, m_sst.radmrg);
+ m_ndbvt.update(n.m_leaf,
+ vol,
+ n.m_v * m_sst.velmrg,
+ m_sst.updmrg);
+ }
+ /* Faces */
+ if (!m_fdbvt.empty())
+ {
+ for (int i = 0; i < m_faces.size(); ++i)
+ {
+ Face& f = m_faces[i];
+ const btVector3 v = (f.m_n[0]->m_v +
+ f.m_n[1]->m_v +
+ f.m_n[2]->m_v) /
+ 3;
+ vol = VolumeOf(f, m_sst.radmrg);
+ m_fdbvt.update(f.m_leaf,
+ vol,
+ v * m_sst.velmrg,
+ m_sst.updmrg);
+ }
+ }
+ /* Pose */
+ updatePose();
+ /* Match */
+ if (m_pose.m_bframe && (m_cfg.kMT > 0))
+ {
+ const btMatrix3x3 posetrs = m_pose.m_rot;
+ for (int i = 0, ni = m_nodes.size(); i < ni; ++i)
+ {
+ Node& n = m_nodes[i];
+ if (n.m_im > 0)
+ {
+ const btVector3 x = posetrs * m_pose.m_pos[i] + m_pose.m_com;
+ n.m_x = Lerp(n.m_x, x, m_cfg.kMT);
+ }
+ }
+ }
+ /* Clear contacts */
+ m_rcontacts.resize(0);
+ m_scontacts.resize(0);
+ /* Optimize dbvt's */
+ m_ndbvt.optimizeIncremental(1);
+ m_fdbvt.optimizeIncremental(1);
+ m_cdbvt.optimizeIncremental(1);
+}
//
void btSoftBody::solveConstraints()
@@ -2534,12 +2557,12 @@ int btSoftBody::rayTest(const btVector3& rayFrom, const btVector3& rayTo,
}
int btSoftBody::rayFaceTest(const btVector3& rayFrom, const btVector3& rayTo,
- btScalar& mint, int& index) const
+ btScalar& mint, int& index) const
{
int cnt = 0;
{ /* Use dbvt */
RayFromToCaster collider(rayFrom, rayTo, mint);
-
+
btDbvt::rayTest(m_fdbvt.m_root, rayFrom, rayTo, collider);
if (collider.m_face)
{
@@ -2551,7 +2574,6 @@ int btSoftBody::rayFaceTest(const btVector3& rayFrom, const btVector3& rayTo,
return (cnt);
}
-
//
static inline btDbvntNode* copyToDbvnt(const btDbvtNode* n)
{
@@ -2580,7 +2602,7 @@ static inline void calculateNormalCone(btDbvntNode* root)
}
else
{
- btVector3 n0(0,0,0), n1(0,0,0);
+ btVector3 n0(0, 0, 0), n1(0, 0, 0);
btScalar a0 = 0, a1 = 0;
if (root->childs[0])
{
@@ -2594,8 +2616,8 @@ static inline void calculateNormalCone(btDbvntNode* root)
n1 = root->childs[1]->normal;
a1 = root->childs[1]->angle;
}
- root->normal = (n0+n1).safeNormalize();
- root->angle = btMax(a0,a1) + btAngle(n0, n1)*0.5;
+ root->normal = (n0 + n1).safeNormalize();
+ root->angle = btMax(a0, a1) + btAngle(n0, n1) * 0.5;
}
}
@@ -2609,7 +2631,8 @@ void btSoftBody::initializeFaceTree()
for (int i = 0; i < m_faces.size(); ++i)
{
Face& f = m_faces[i];
- ATTRIBUTE_ALIGNED16(btDbvtVolume) vol = VolumeOf(f, 0);
+ ATTRIBUTE_ALIGNED16(btDbvtVolume)
+ vol = VolumeOf(f, 0);
btDbvtNode* node = new (btAlignedAlloc(sizeof(btDbvtNode), 16)) btDbvtNode();
node->parent = NULL;
node->data = &f;
@@ -2623,7 +2646,7 @@ void btSoftBody::initializeFaceTree()
// construct the adjacency list for triangles
for (int i = 0; i < adj.size(); ++i)
{
- for (int j = i+1; j < adj.size(); ++j)
+ for (int j = i + 1; j < adj.size(); ++j)
{
int dup = 0;
for (int k = 0; k < 3; ++k)
@@ -2661,7 +2684,8 @@ void btSoftBody::rebuildNodeTree()
for (int i = 0; i < m_nodes.size(); ++i)
{
Node& n = m_nodes[i];
- ATTRIBUTE_ALIGNED16(btDbvtVolume) vol = btDbvtVolume::FromCR(n.m_x, 0);
+ ATTRIBUTE_ALIGNED16(btDbvtVolume)
+ vol = btDbvtVolume::FromCR(n.m_x, 0);
btDbvtNode* node = new (btAlignedAlloc(sizeof(btDbvtNode), 16)) btDbvtNode();
node->parent = NULL;
node->data = &n;
@@ -2704,61 +2728,61 @@ btVector3 btSoftBody::evaluateCom() const
}
bool btSoftBody::checkContact(const btCollisionObjectWrapper* colObjWrap,
- const btVector3& x,
- btScalar margin,
- btSoftBody::sCti& cti) const
-{
- btVector3 nrm;
- const btCollisionShape* shp = colObjWrap->getCollisionShape();
- // const btRigidBody *tmpRigid = btRigidBody::upcast(colObjWrap->getCollisionObject());
- //const btTransform &wtr = tmpRigid ? tmpRigid->getWorldTransform() : colObjWrap->getWorldTransform();
- const btTransform& wtr = colObjWrap->getWorldTransform();
- //todo: check which transform is needed here
-
- btScalar dst =
- m_worldInfo->m_sparsesdf.Evaluate(
- wtr.invXform(x),
- shp,
- nrm,
- margin);
- if (dst < 0)
- {
- cti.m_colObj = colObjWrap->getCollisionObject();
- cti.m_normal = wtr.getBasis() * nrm;
- cti.m_offset = -btDot(cti.m_normal, x - cti.m_normal * dst);
- return (true);
- }
- return (false);
+ const btVector3& x,
+ btScalar margin,
+ btSoftBody::sCti& cti) const
+{
+ btVector3 nrm;
+ const btCollisionShape* shp = colObjWrap->getCollisionShape();
+ // const btRigidBody *tmpRigid = btRigidBody::upcast(colObjWrap->getCollisionObject());
+ //const btTransform &wtr = tmpRigid ? tmpRigid->getWorldTransform() : colObjWrap->getWorldTransform();
+ const btTransform& wtr = colObjWrap->getWorldTransform();
+ //todo: check which transform is needed here
+
+ btScalar dst =
+ m_worldInfo->m_sparsesdf.Evaluate(
+ wtr.invXform(x),
+ shp,
+ nrm,
+ margin);
+ if (dst < 0)
+ {
+ cti.m_colObj = colObjWrap->getCollisionObject();
+ cti.m_normal = wtr.getBasis() * nrm;
+ cti.m_offset = -btDot(cti.m_normal, x - cti.m_normal * dst);
+ return (true);
+ }
+ return (false);
}
//
bool btSoftBody::checkDeformableContact(const btCollisionObjectWrapper* colObjWrap,
- const btVector3& x,
- btScalar margin,
- btSoftBody::sCti& cti, bool predict) const
+ const btVector3& x,
+ btScalar margin,
+ btSoftBody::sCti& cti, bool predict) const
{
btVector3 nrm;
const btCollisionShape* shp = colObjWrap->getCollisionShape();
- const btCollisionObject* tmpCollisionObj = colObjWrap->getCollisionObject();
- // use the position x_{n+1}^* = x_n + dt * v_{n+1}^* where v_{n+1}^* = v_n + dtg for collision detect
- // but resolve contact at x_n
- btTransform wtr = (predict) ?
- (colObjWrap->m_preTransform != NULL ? tmpCollisionObj->getInterpolationWorldTransform()*(*colObjWrap->m_preTransform) : tmpCollisionObj->getInterpolationWorldTransform())
- : colObjWrap->getWorldTransform();
+ const btCollisionObject* tmpCollisionObj = colObjWrap->getCollisionObject();
+ // use the position x_{n+1}^* = x_n + dt * v_{n+1}^* where v_{n+1}^* = v_n + dtg for collision detect
+ // but resolve contact at x_n
+ btTransform wtr = (predict) ? (colObjWrap->m_preTransform != NULL ? tmpCollisionObj->getInterpolationWorldTransform() * (*colObjWrap->m_preTransform) : tmpCollisionObj->getInterpolationWorldTransform())
+ : colObjWrap->getWorldTransform();
btScalar dst =
m_worldInfo->m_sparsesdf.Evaluate(
wtr.invXform(x),
shp,
nrm,
margin);
+
if (!predict)
{
cti.m_colObj = colObjWrap->getCollisionObject();
cti.m_normal = wtr.getBasis() * nrm;
- cti.m_offset = dst;
+ cti.m_offset = dst;
}
- if (dst < 0)
- return true;
+ if (dst < 0)
+ return true;
return (false);
}
@@ -2767,175 +2791,131 @@ bool btSoftBody::checkDeformableContact(const btCollisionObjectWrapper* colObjWr
// point p with respect to triangle (a, b, c)
static void getBarycentric(const btVector3& p, btVector3& a, btVector3& b, btVector3& c, btVector3& bary)
{
- btVector3 v0 = b - a, v1 = c - a, v2 = p - a;
- btScalar d00 = v0.dot(v0);
- btScalar d01 = v0.dot(v1);
- btScalar d11 = v1.dot(v1);
- btScalar d20 = v2.dot(v0);
- btScalar d21 = v2.dot(v1);
- btScalar denom = d00 * d11 - d01 * d01;
- bary.setY((d11 * d20 - d01 * d21) / denom);
- bary.setZ((d00 * d21 - d01 * d20) / denom);
- bary.setX(btScalar(1) - bary.getY() - bary.getZ());
+ btVector3 v0 = b - a, v1 = c - a, v2 = p - a;
+ btScalar d00 = v0.dot(v0);
+ btScalar d01 = v0.dot(v1);
+ btScalar d11 = v1.dot(v1);
+ btScalar d20 = v2.dot(v0);
+ btScalar d21 = v2.dot(v1);
+ btScalar denom = d00 * d11 - d01 * d01;
+ bary.setY((d11 * d20 - d01 * d21) / denom);
+ bary.setZ((d00 * d21 - d01 * d20) / denom);
+ bary.setX(btScalar(1) - bary.getY() - bary.getZ());
}
//
bool btSoftBody::checkDeformableFaceContact(const btCollisionObjectWrapper* colObjWrap,
- Face& f,
- btVector3& contact_point,
- btVector3& bary,
- btScalar margin,
- btSoftBody::sCti& cti, bool predict) const
-{
- btVector3 nrm;
- const btCollisionShape* shp = colObjWrap->getCollisionShape();
- const btCollisionObject* tmpCollisionObj = colObjWrap->getCollisionObject();
- // use the position x_{n+1}^* = x_n + dt * v_{n+1}^* where v_{n+1}^* = v_n + dtg for collision detect
- // but resolve contact at x_n
- btTransform wtr = (predict) ?
- (colObjWrap->m_preTransform != NULL ? tmpCollisionObj->getInterpolationWorldTransform()*(*colObjWrap->m_preTransform) : tmpCollisionObj->getInterpolationWorldTransform())
- : colObjWrap->getWorldTransform();
- btScalar dst;
-
-//#define USE_QUADRATURE 1
-//#define CACHE_PREV_COLLISION
-
- // use the contact position of the previous collision
-#ifdef CACHE_PREV_COLLISION
- if (f.m_pcontact[3] != 0)
- {
- for (int i = 0; i < 3; ++i)
- bary[i] = f.m_pcontact[i];
- contact_point = BaryEval(f.m_n[0]->m_x, f.m_n[1]->m_x, f.m_n[2]->m_x, bary);
- dst = m_worldInfo->m_sparsesdf.Evaluate(
- wtr.invXform(contact_point),
- shp,
- nrm,
- margin);
- nrm = wtr.getBasis() * nrm;
- cti.m_colObj = colObjWrap->getCollisionObject();
- // use cached contact point
- }
- else
- {
- btGjkEpaSolver2::sResults results;
- btTransform triangle_transform;
- triangle_transform.setIdentity();
- triangle_transform.setOrigin(f.m_n[0]->m_x);
- btTriangleShape triangle(btVector3(0,0,0), f.m_n[1]->m_x-f.m_n[0]->m_x, f.m_n[2]->m_x-f.m_n[0]->m_x);
- btVector3 guess(0,0,0);
- const btConvexShape* csh = static_cast<const btConvexShape*>(shp);
- btGjkEpaSolver2::SignedDistance(&triangle, triangle_transform, csh, wtr, guess, results);
- dst = results.distance - margin;
- contact_point = results.witnesses[0];
- getBarycentric(contact_point, f.m_n[0]->m_x, f.m_n[1]->m_x, f.m_n[2]->m_x, bary);
- nrm = results.normal;
- cti.m_colObj = colObjWrap->getCollisionObject();
- for (int i = 0; i < 3; ++i)
- f.m_pcontact[i] = bary[i];
- }
- return (dst < 0);
-#endif
+ Face& f,
+ btVector3& contact_point,
+ btVector3& bary,
+ btScalar margin,
+ btSoftBody::sCti& cti, bool predict) const
+{
+ btVector3 nrm;
+ const btCollisionShape* shp = colObjWrap->getCollisionShape();
+ const btCollisionObject* tmpCollisionObj = colObjWrap->getCollisionObject();
+ // use the position x_{n+1}^* = x_n + dt * v_{n+1}^* where v_{n+1}^* = v_n + dtg for collision detect
+ // but resolve contact at x_n
+ btTransform wtr = (predict) ? (colObjWrap->m_preTransform != NULL ? tmpCollisionObj->getInterpolationWorldTransform() * (*colObjWrap->m_preTransform) : tmpCollisionObj->getInterpolationWorldTransform())
+ : colObjWrap->getWorldTransform();
+ btScalar dst;
+ btGjkEpaSolver2::sResults results;
+
+// #define USE_QUADRATURE 1
- // use collision quadrature point
+ // use collision quadrature point
#ifdef USE_QUADRATURE
- {
- dst = SIMD_INFINITY;
- btVector3 local_nrm;
- for (int q = 0; q < m_quads.size(); ++q)
- {
- btVector3 p;
- if (predict)
- p = BaryEval(f.m_n[0]->m_q, f.m_n[1]->m_q, f.m_n[2]->m_q, m_quads[q]);
- else
- p = BaryEval(f.m_n[0]->m_x, f.m_n[1]->m_x, f.m_n[2]->m_x, m_quads[q]);
- btScalar local_dst = m_worldInfo->m_sparsesdf.Evaluate(
- wtr.invXform(p),
- shp,
- local_nrm,
- margin);
- if (local_dst < dst)
- {
- if (local_dst < 0 && predict)
- return true;
- dst = local_dst;
- contact_point = p;
- bary = m_quads[q];
- nrm = local_nrm;
- }
- if (!predict)
- {
- cti.m_colObj = colObjWrap->getCollisionObject();
- cti.m_normal = wtr.getBasis() * nrm;
- cti.m_offset = dst;
- }
- }
- return (dst < 0);
- }
+ {
+ dst = SIMD_INFINITY;
+ btVector3 local_nrm;
+ for (int q = 0; q < m_quads.size(); ++q)
+ {
+ btVector3 p;
+ if (predict)
+ p = BaryEval(f.m_n[0]->m_q, f.m_n[1]->m_q, f.m_n[2]->m_q, m_quads[q]);
+ else
+ p = BaryEval(f.m_n[0]->m_x, f.m_n[1]->m_x, f.m_n[2]->m_x, m_quads[q]);
+ btScalar local_dst = m_worldInfo->m_sparsesdf.Evaluate(
+ wtr.invXform(p),
+ shp,
+ local_nrm,
+ margin);
+ if (local_dst < dst)
+ {
+ if (local_dst < 0 && predict)
+ return true;
+ dst = local_dst;
+ contact_point = p;
+ bary = m_quads[q];
+ nrm = local_nrm;
+ }
+ if (!predict)
+ {
+ cti.m_colObj = colObjWrap->getCollisionObject();
+ cti.m_normal = wtr.getBasis() * nrm;
+ cti.m_offset = dst;
+ }
+ }
+ return (dst < 0);
+ }
#endif
-
-// // regular face contact
-// {
-// btGjkEpaSolver2::sResults results;
-// btTransform triangle_transform;
-// triangle_transform.setIdentity();
-// triangle_transform.setOrigin(f.m_n[0]->m_x);
-// btTriangleShape triangle(btVector3(0,0,0), f.m_n[1]->m_x-f.m_n[0]->m_x, f.m_n[2]->m_x-f.m_n[0]->m_x);
-// btVector3 guess(0,0,0);
-// if (predict)
-// {
-// triangle_transform.setOrigin(f.m_n[0]->m_q);
-// triangle = btTriangleShape(btVector3(0,0,0), f.m_n[1]->m_q-f.m_n[0]->m_q, f.m_n[2]->m_q-f.m_n[0]->m_q);
-// }
-// const btConvexShape* csh = static_cast<const btConvexShape*>(shp);
-//// btGjkEpaSolver2::SignedDistance(&triangle, triangle_transform, csh, wtr, guess, results);
-//// dst = results.distance - margin;
-//// contact_point = results.witnesses[0];
-// btGjkEpaSolver2::Penetration(&triangle, triangle_transform, csh, wtr, guess, results);
-// if (results.status == btGjkEpaSolver2::sResults::Separated)
-// return false;
-// dst = results.distance - margin;
-// contact_point = results.witnesses[1];
-// getBarycentric(contact_point, f.m_n[0]->m_x, f.m_n[1]->m_x, f.m_n[2]->m_x, bary);
-// nrm = results.normal;
-// for (int i = 0; i < 3; ++i)
-// f.m_pcontact[i] = bary[i];
-// }
-//
-// if (!predict)
-// {
-// cti.m_colObj = colObjWrap->getCollisionObject();
-// cti.m_normal = nrm;
-// cti.m_offset = dst;
-// }
-//
-
- // regular face contact
- {
- btGjkEpaSolver2::sResults results;
- btTransform triangle_transform;
- triangle_transform.setIdentity();
- triangle_transform.setOrigin(f.m_n[0]->m_q);
- btTriangleShape triangle(btVector3(0,0,0), f.m_n[1]->m_q-f.m_n[0]->m_q, f.m_n[2]->m_q-f.m_n[0]->m_q);
- btVector3 guess(0,0,0);
- const btConvexShape* csh = static_cast<const btConvexShape*>(shp);
- btGjkEpaSolver2::SignedDistance(&triangle, triangle_transform, csh, wtr, guess, results);
- dst = results.distance-csh->getMargin();
- dst -= margin;
- if (dst >= 0)
- return false;
- contact_point = results.witnesses[0];
- getBarycentric(contact_point, f.m_n[0]->m_q, f.m_n[1]->m_q, f.m_n[2]->m_q, bary);
- btVector3 curr = BaryEval(f.m_n[0]->m_x, f.m_n[1]->m_x, f.m_n[2]->m_x, bary);
- nrm = results.normal;
- cti.m_colObj = colObjWrap->getCollisionObject();
- cti.m_normal = nrm;
- cti.m_offset = dst + (curr - contact_point).dot(nrm);
- }
- return (dst < 0);
+
+ // collision detection using x*
+ btTransform triangle_transform;
+ triangle_transform.setIdentity();
+ triangle_transform.setOrigin(f.m_n[0]->m_q);
+ btTriangleShape triangle(btVector3(0, 0, 0), f.m_n[1]->m_q - f.m_n[0]->m_q, f.m_n[2]->m_q - f.m_n[0]->m_q);
+ btVector3 guess(0, 0, 0);
+ const btConvexShape* csh = static_cast<const btConvexShape*>(shp);
+ btGjkEpaSolver2::SignedDistance(&triangle, triangle_transform, csh, wtr, guess, results);
+ dst = results.distance - 2.0 * csh->getMargin() - margin; // margin padding so that the distance = the actual distance between face and rigid - margin of rigid - margin of deformable
+ if (dst >= 0)
+ return false;
+
+ // Use consistent barycenter to recalculate distance.
+ if (this->m_cacheBarycenter)
+ {
+ if (f.m_pcontact[3] != 0)
+ {
+ for (int i = 0; i < 3; ++i)
+ bary[i] = f.m_pcontact[i];
+ contact_point = BaryEval(f.m_n[0]->m_x, f.m_n[1]->m_x, f.m_n[2]->m_x, bary);
+ const btConvexShape* csh = static_cast<const btConvexShape*>(shp);
+ btGjkEpaSolver2::SignedDistance(contact_point, margin, csh, wtr, results);
+ cti.m_colObj = colObjWrap->getCollisionObject();
+ dst = results.distance;
+ cti.m_normal = results.normal;
+ cti.m_offset = dst;
+
+ //point-convex CD
+ wtr = colObjWrap->getWorldTransform();
+ btTriangleShape triangle2(btVector3(0, 0, 0), f.m_n[1]->m_x - f.m_n[0]->m_x, f.m_n[2]->m_x - f.m_n[0]->m_x);
+ triangle_transform.setOrigin(f.m_n[0]->m_x);
+ btGjkEpaSolver2::SignedDistance(&triangle2, triangle_transform, csh, wtr, guess, results);
+
+ dst = results.distance - csh->getMargin() - margin;
+ return true;
+ }
+ }
+
+ // Use triangle-convex CD.
+ wtr = colObjWrap->getWorldTransform();
+ btTriangleShape triangle2(btVector3(0, 0, 0), f.m_n[1]->m_x - f.m_n[0]->m_x, f.m_n[2]->m_x - f.m_n[0]->m_x);
+ triangle_transform.setOrigin(f.m_n[0]->m_x);
+ btGjkEpaSolver2::SignedDistance(&triangle2, triangle_transform, csh, wtr, guess, results);
+ contact_point = results.witnesses[0];
+ getBarycentric(contact_point, f.m_n[0]->m_x, f.m_n[1]->m_x, f.m_n[2]->m_x, bary);
+
+ for (int i = 0; i < 3; ++i)
+ f.m_pcontact[i] = bary[i];
+
+ dst = results.distance - csh->getMargin() - margin;
+ cti.m_colObj = colObjWrap->getCollisionObject();
+ cti.m_normal = results.normal;
+ cti.m_offset = dst;
+ return true;
}
-//
void btSoftBody::updateNormals()
{
const btVector3 zv(0, 0, 0);
@@ -2979,63 +2959,63 @@ void btSoftBody::updateBounds()
m_bounds[1] = btVector3(1000, 1000, 1000);
} else {*/
-// if (m_ndbvt.m_root)
-// {
-// const btVector3& mins = m_ndbvt.m_root->volume.Mins();
-// const btVector3& maxs = m_ndbvt.m_root->volume.Maxs();
-// const btScalar csm = getCollisionShape()->getMargin();
-// const btVector3 mrg = btVector3(csm,
-// csm,
-// csm) *
-// 1; // ??? to investigate...
-// m_bounds[0] = mins - mrg;
-// m_bounds[1] = maxs + mrg;
-// if (0 != getBroadphaseHandle())
-// {
-// m_worldInfo->m_broadphase->setAabb(getBroadphaseHandle(),
-// m_bounds[0],
-// m_bounds[1],
-// m_worldInfo->m_dispatcher);
-// }
-// }
-// else
-// {
-// m_bounds[0] =
-// m_bounds[1] = btVector3(0, 0, 0);
-// }
- if (m_nodes.size())
- {
- btVector3 mins = m_nodes[0].m_x;
- btVector3 maxs = m_nodes[0].m_x;
- for (int i = 1; i < m_nodes.size(); ++i)
- {
- for (int d = 0; d < 3; ++d)
- {
- if (m_nodes[i].m_x[d] > maxs[d])
- maxs[d] = m_nodes[i].m_x[d];
- if (m_nodes[i].m_x[d] < mins[d])
- mins[d] = m_nodes[i].m_x[d];
- }
- }
- const btScalar csm = getCollisionShape()->getMargin();
- const btVector3 mrg = btVector3(csm,
- csm,
- csm);
- m_bounds[0] = mins - mrg;
- m_bounds[1] = maxs + mrg;
- if (0 != getBroadphaseHandle())
- {
- m_worldInfo->m_broadphase->setAabb(getBroadphaseHandle(),
- m_bounds[0],
- m_bounds[1],
- m_worldInfo->m_dispatcher);
- }
- }
- else
- {
- m_bounds[0] =
- m_bounds[1] = btVector3(0, 0, 0);
- }
+ // if (m_ndbvt.m_root)
+ // {
+ // const btVector3& mins = m_ndbvt.m_root->volume.Mins();
+ // const btVector3& maxs = m_ndbvt.m_root->volume.Maxs();
+ // const btScalar csm = getCollisionShape()->getMargin();
+ // const btVector3 mrg = btVector3(csm,
+ // csm,
+ // csm) *
+ // 1; // ??? to investigate...
+ // m_bounds[0] = mins - mrg;
+ // m_bounds[1] = maxs + mrg;
+ // if (0 != getBroadphaseHandle())
+ // {
+ // m_worldInfo->m_broadphase->setAabb(getBroadphaseHandle(),
+ // m_bounds[0],
+ // m_bounds[1],
+ // m_worldInfo->m_dispatcher);
+ // }
+ // }
+ // else
+ // {
+ // m_bounds[0] =
+ // m_bounds[1] = btVector3(0, 0, 0);
+ // }
+ if (m_nodes.size())
+ {
+ btVector3 mins = m_nodes[0].m_x;
+ btVector3 maxs = m_nodes[0].m_x;
+ for (int i = 1; i < m_nodes.size(); ++i)
+ {
+ for (int d = 0; d < 3; ++d)
+ {
+ if (m_nodes[i].m_x[d] > maxs[d])
+ maxs[d] = m_nodes[i].m_x[d];
+ if (m_nodes[i].m_x[d] < mins[d])
+ mins[d] = m_nodes[i].m_x[d];
+ }
+ }
+ const btScalar csm = getCollisionShape()->getMargin();
+ const btVector3 mrg = btVector3(csm,
+ csm,
+ csm);
+ m_bounds[0] = mins - mrg;
+ m_bounds[1] = maxs + mrg;
+ if (0 != getBroadphaseHandle())
+ {
+ m_worldInfo->m_broadphase->setAabb(getBroadphaseHandle(),
+ m_bounds[0],
+ m_bounds[1],
+ m_worldInfo->m_dispatcher);
+ }
+ }
+ else
+ {
+ m_bounds[0] =
+ m_bounds[1] = btVector3(0, 0, 0);
+ }
}
//
@@ -3454,60 +3434,120 @@ void btSoftBody::dampClusters()
void btSoftBody::setSpringStiffness(btScalar k)
{
- for (int i = 0; i < m_links.size(); ++i)
- {
- m_links[i].Feature::m_material->m_kLST = k;
- }
- m_repulsionStiffness = k;
+ for (int i = 0; i < m_links.size(); ++i)
+ {
+ m_links[i].Feature::m_material->m_kLST = k;
+ }
+ m_repulsionStiffness = k;
+}
+
+void btSoftBody::setGravityFactor(btScalar gravFactor)
+{
+ m_gravityFactor = gravFactor;
+}
+
+void btSoftBody::setCacheBarycenter(bool cacheBarycenter)
+{
+ m_cacheBarycenter = cacheBarycenter;
}
void btSoftBody::initializeDmInverse()
{
- btScalar unit_simplex_measure = 1./6.;
-
- for (int i = 0; i < m_tetras.size(); ++i)
- {
- Tetra &t = m_tetras[i];
- btVector3 c1 = t.m_n[1]->m_x - t.m_n[0]->m_x;
- btVector3 c2 = t.m_n[2]->m_x - t.m_n[0]->m_x;
- btVector3 c3 = t.m_n[3]->m_x - t.m_n[0]->m_x;
- btMatrix3x3 Dm(c1.getX(), c2.getX(), c3.getX(),
- c1.getY(), c2.getY(), c3.getY(),
- c1.getZ(), c2.getZ(), c3.getZ());
- t.m_element_measure = Dm.determinant() * unit_simplex_measure;
- t.m_Dm_inverse = Dm.inverse();
- }
+ btScalar unit_simplex_measure = 1. / 6.;
+
+ for (int i = 0; i < m_tetras.size(); ++i)
+ {
+ Tetra& t = m_tetras[i];
+ btVector3 c1 = t.m_n[1]->m_x - t.m_n[0]->m_x;
+ btVector3 c2 = t.m_n[2]->m_x - t.m_n[0]->m_x;
+ btVector3 c3 = t.m_n[3]->m_x - t.m_n[0]->m_x;
+ btMatrix3x3 Dm(c1.getX(), c2.getX(), c3.getX(),
+ c1.getY(), c2.getY(), c3.getY(),
+ c1.getZ(), c2.getZ(), c3.getZ());
+ t.m_element_measure = Dm.determinant() * unit_simplex_measure;
+ t.m_Dm_inverse = Dm.inverse();
+
+ // calculate the first three columns of P^{-1}
+ btVector3 a = t.m_n[0]->m_x;
+ btVector3 b = t.m_n[1]->m_x;
+ btVector3 c = t.m_n[2]->m_x;
+ btVector3 d = t.m_n[3]->m_x;
+
+ btScalar det = 1 / (a[0] * b[1] * c[2] - a[0] * b[1] * d[2] - a[0] * b[2] * c[1] + a[0] * b[2] * d[1] + a[0] * c[1] * d[2] - a[0] * c[2] * d[1] + a[1] * (-b[0] * c[2] + b[0] * d[2] + b[2] * c[0] - b[2] * d[0] - c[0] * d[2] + c[2] * d[0]) + a[2] * (b[0] * c[1] - b[0] * d[1] + b[1] * (d[0] - c[0]) + c[0] * d[1] - c[1] * d[0]) - b[0] * c[1] * d[2] + b[0] * c[2] * d[1] + b[1] * c[0] * d[2] - b[1] * c[2] * d[0] - b[2] * c[0] * d[1] + b[2] * c[1] * d[0]);
+
+ btScalar P11 = -b[2] * c[1] + d[2] * c[1] + b[1] * c[2] + b[2] * d[1] - c[2] * d[1] - b[1] * d[2];
+ btScalar P12 = b[2] * c[0] - d[2] * c[0] - b[0] * c[2] - b[2] * d[0] + c[2] * d[0] + b[0] * d[2];
+ btScalar P13 = -b[1] * c[0] + d[1] * c[0] + b[0] * c[1] + b[1] * d[0] - c[1] * d[0] - b[0] * d[1];
+ btScalar P21 = a[2] * c[1] - d[2] * c[1] - a[1] * c[2] - a[2] * d[1] + c[2] * d[1] + a[1] * d[2];
+ btScalar P22 = -a[2] * c[0] + d[2] * c[0] + a[0] * c[2] + a[2] * d[0] - c[2] * d[0] - a[0] * d[2];
+ btScalar P23 = a[1] * c[0] - d[1] * c[0] - a[0] * c[1] - a[1] * d[0] + c[1] * d[0] + a[0] * d[1];
+ btScalar P31 = -a[2] * b[1] + d[2] * b[1] + a[1] * b[2] + a[2] * d[1] - b[2] * d[1] - a[1] * d[2];
+ btScalar P32 = a[2] * b[0] - d[2] * b[0] - a[0] * b[2] - a[2] * d[0] + b[2] * d[0] + a[0] * d[2];
+ btScalar P33 = -a[1] * b[0] + d[1] * b[0] + a[0] * b[1] + a[1] * d[0] - b[1] * d[0] - a[0] * d[1];
+ btScalar P41 = a[2] * b[1] - c[2] * b[1] - a[1] * b[2] - a[2] * c[1] + b[2] * c[1] + a[1] * c[2];
+ btScalar P42 = -a[2] * b[0] + c[2] * b[0] + a[0] * b[2] + a[2] * c[0] - b[2] * c[0] - a[0] * c[2];
+ btScalar P43 = a[1] * b[0] - c[1] * b[0] - a[0] * b[1] - a[1] * c[0] + b[1] * c[0] + a[0] * c[1];
+
+ btVector4 p1(P11 * det, P21 * det, P31 * det, P41 * det);
+ btVector4 p2(P12 * det, P22 * det, P32 * det, P42 * det);
+ btVector4 p3(P13 * det, P23 * det, P33 * det, P43 * det);
+
+ t.m_P_inv[0] = p1;
+ t.m_P_inv[1] = p2;
+ t.m_P_inv[2] = p3;
+ }
+}
+
+static btScalar Dot4(const btVector4& a, const btVector4& b)
+{
+ return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
}
void btSoftBody::updateDeformation()
{
- for (int i = 0; i < m_tetras.size(); ++i)
- {
- btSoftBody::Tetra& t = m_tetras[i];
- btVector3 c1 = t.m_n[1]->m_q - t.m_n[0]->m_q;
- btVector3 c2 = t.m_n[2]->m_q - t.m_n[0]->m_q;
- btVector3 c3 = t.m_n[3]->m_q - t.m_n[0]->m_q;
- btMatrix3x3 Ds(c1.getX(), c2.getX(), c3.getX(),
- c1.getY(), c2.getY(), c3.getY(),
- c1.getZ(), c2.getZ(), c3.getZ());
- t.m_F = Ds * t.m_Dm_inverse;
-
- btSoftBody::TetraScratch& s = m_tetraScratches[i];
- s.m_F = t.m_F;
- s.m_J = t.m_F.determinant();
- btMatrix3x3 C = t.m_F.transpose()*t.m_F;
- s.m_trace = C[0].getX() + C[1].getY() + C[2].getZ();
- s.m_cofF = t.m_F.adjoint().transpose();
- }
+ btQuaternion q;
+ for (int i = 0; i < m_tetras.size(); ++i)
+ {
+ btSoftBody::Tetra& t = m_tetras[i];
+ btVector3 c1 = t.m_n[1]->m_q - t.m_n[0]->m_q;
+ btVector3 c2 = t.m_n[2]->m_q - t.m_n[0]->m_q;
+ btVector3 c3 = t.m_n[3]->m_q - t.m_n[0]->m_q;
+ btMatrix3x3 Ds(c1.getX(), c2.getX(), c3.getX(),
+ c1.getY(), c2.getY(), c3.getY(),
+ c1.getZ(), c2.getZ(), c3.getZ());
+ t.m_F = Ds * t.m_Dm_inverse;
+
+ btSoftBody::TetraScratch& s = m_tetraScratches[i];
+ s.m_F = t.m_F;
+ s.m_J = t.m_F.determinant();
+ btMatrix3x3 C = t.m_F.transpose() * t.m_F;
+ s.m_trace = C[0].getX() + C[1].getY() + C[2].getZ();
+ s.m_cofF = t.m_F.adjoint().transpose();
+
+ btVector3 a = t.m_n[0]->m_q;
+ btVector3 b = t.m_n[1]->m_q;
+ btVector3 c = t.m_n[2]->m_q;
+ btVector3 d = t.m_n[3]->m_q;
+ btVector4 q1(a[0], b[0], c[0], d[0]);
+ btVector4 q2(a[1], b[1], c[1], d[1]);
+ btVector4 q3(a[2], b[2], c[2], d[2]);
+ btMatrix3x3 B(Dot4(q1, t.m_P_inv[0]), Dot4(q1, t.m_P_inv[1]), Dot4(q1, t.m_P_inv[2]),
+ Dot4(q2, t.m_P_inv[0]), Dot4(q2, t.m_P_inv[1]), Dot4(q2, t.m_P_inv[2]),
+ Dot4(q3, t.m_P_inv[0]), Dot4(q3, t.m_P_inv[1]), Dot4(q3, t.m_P_inv[2]));
+ q.setRotation(btVector3(0, 0, 1), 0);
+ B.extractRotation(q, 0.01); // precision of the rotation is not very important for visual correctness.
+ btMatrix3x3 Q(q);
+ s.m_corotation = Q;
+ }
}
void btSoftBody::advanceDeformation()
{
- updateDeformation();
- for (int i = 0; i < m_tetras.size(); ++i)
- {
- m_tetraScratchesTn[i] = m_tetraScratches[i];
- }
+ updateDeformation();
+ for (int i = 0; i < m_tetras.size(); ++i)
+ {
+ m_tetraScratchesTn[i] = m_tetraScratches[i];
+ }
}
//
void btSoftBody::Joint::Prepare(btScalar dt, int)
@@ -3750,7 +3790,7 @@ void btSoftBody::applyForces()
//
void btSoftBody::setMaxStress(btScalar maxStress)
{
- m_cfg.m_maxStress = maxStress;
+ m_cfg.m_maxStress = maxStress;
}
//
@@ -3765,7 +3805,7 @@ void btSoftBody::interpolateRenderMesh()
const Node* p2 = m_renderNodesParents[i][2];
btVector3 normal = btCross(p1->m_x - p0->m_x, p2->m_x - p0->m_x);
btVector3 unit_normal = normal.normalized();
- Node& n = m_renderNodes[i];
+ RenderNode& n = m_renderNodes[i];
n.m_x.setZero();
for (int j = 0; j < 3; ++j)
{
@@ -3778,7 +3818,7 @@ void btSoftBody::interpolateRenderMesh()
{
for (int i = 0; i < m_renderNodes.size(); ++i)
{
- Node& n = m_renderNodes[i];
+ RenderNode& n = m_renderNodes[i];
n.m_x.setZero();
for (int j = 0; j < 4; ++j)
{
@@ -3793,13 +3833,13 @@ void btSoftBody::interpolateRenderMesh()
void btSoftBody::setCollisionQuadrature(int N)
{
- for (int i = 0; i <= N; ++i)
- {
- for (int j = 0; i+j <= N; ++j)
- {
- m_quads.push_back(btVector3(btScalar(i)/btScalar(N), btScalar(j)/btScalar(N), btScalar(N-i-j)/btScalar(N)));
- }
- }
+ for (int i = 0; i <= N; ++i)
+ {
+ for (int j = 0; i + j <= N; ++j)
+ {
+ m_quads.push_back(btVector3(btScalar(i) / btScalar(N), btScalar(j) / btScalar(N), btScalar(N - i - j) / btScalar(N)));
+ }
+ }
}
//
@@ -4006,12 +4046,12 @@ btSoftBody::vsolver_t btSoftBody::getSolver(eVSolver::_ solver)
void btSoftBody::setSelfCollision(bool useSelfCollision)
{
- m_useSelfCollision = useSelfCollision;
+ m_useSelfCollision = useSelfCollision;
}
bool btSoftBody::useSelfCollision()
{
- return m_useSelfCollision;
+ return m_useSelfCollision;
}
//
@@ -4052,23 +4092,23 @@ void btSoftBody::defaultCollisionHandler(const btCollisionObjectWrapper* pcoWrap
collider.ProcessColObj(this, pcoWrap);
}
break;
- case fCollision::SDF_RD:
- {
- btRigidBody* prb1 = (btRigidBody*)btRigidBody::upcast(pcoWrap->getCollisionObject());
- if (pcoWrap->getCollisionObject()->isActive() || this->isActive())
- {
- const btTransform wtr = pcoWrap->getWorldTransform();
- const btScalar timemargin = 0;
- const btScalar basemargin = getCollisionShape()->getMargin();
- btVector3 mins;
- btVector3 maxs;
- ATTRIBUTE_ALIGNED16(btDbvtVolume)
- volume;
- pcoWrap->getCollisionShape()->getAabb(wtr,
- mins,
- maxs);
- volume = btDbvtVolume::FromMM(mins, maxs);
- volume.Expand(btVector3(basemargin, basemargin, basemargin));
+ case fCollision::SDF_RD:
+ {
+ btRigidBody* prb1 = (btRigidBody*)btRigidBody::upcast(pcoWrap->getCollisionObject());
+ if (pcoWrap->getCollisionObject()->isActive() || this->isActive())
+ {
+ const btTransform wtr = pcoWrap->getWorldTransform();
+ const btScalar timemargin = 0;
+ const btScalar basemargin = getCollisionShape()->getMargin();
+ btVector3 mins;
+ btVector3 maxs;
+ ATTRIBUTE_ALIGNED16(btDbvtVolume)
+ volume;
+ pcoWrap->getCollisionShape()->getAabb(wtr,
+ mins,
+ maxs);
+ volume = btDbvtVolume::FromMM(mins, maxs);
+ volume.Expand(btVector3(basemargin, basemargin, basemargin));
if (m_cfg.collisions & fCollision::SDF_RDN)
{
btSoftColliders::CollideSDF_RD docollideNode;
@@ -4080,26 +4120,26 @@ void btSoftBody::defaultCollisionHandler(const btCollisionObjectWrapper* pcoWrap
m_ndbvt.collideTV(m_ndbvt.m_root, volume, docollideNode);
}
- if (((pcoWrap->getCollisionObject()->getInternalType() == CO_RIGID_BODY) && (m_cfg.collisions & fCollision::SDF_RDF)) || ((pcoWrap->getCollisionObject()->getInternalType() == CO_FEATHERSTONE_LINK) && (m_cfg.collisions & fCollision::SDF_MDF)))
- {
- btSoftColliders::CollideSDF_RDF docollideFace;
- docollideFace.psb = this;
- docollideFace.m_colObj1Wrap = pcoWrap;
- docollideFace.m_rigidBody = prb1;
+ if (((pcoWrap->getCollisionObject()->getInternalType() == CO_RIGID_BODY) && (m_cfg.collisions & fCollision::SDF_RDF)) || ((pcoWrap->getCollisionObject()->getInternalType() == CO_FEATHERSTONE_LINK) && (m_cfg.collisions & fCollision::SDF_MDF)))
+ {
+ btSoftColliders::CollideSDF_RDF docollideFace;
+ docollideFace.psb = this;
+ docollideFace.m_colObj1Wrap = pcoWrap;
+ docollideFace.m_rigidBody = prb1;
docollideFace.dynmargin = basemargin + timemargin;
docollideFace.stamargin = basemargin;
- m_fdbvt.collideTV(m_fdbvt.m_root, volume, docollideFace);
- }
- }
- }
- break;
+ m_fdbvt.collideTV(m_fdbvt.m_root, volume, docollideFace);
+ }
+ }
+ }
+ break;
}
}
//
void btSoftBody::defaultCollisionHandler(btSoftBody* psb)
{
- BT_PROFILE("Deformable Collision");
+ BT_PROFILE("Deformable Collision");
const int cf = m_cfg.collisions & psb->m_cfg.collisions;
switch (cf & fCollision::SVSmask)
{
@@ -4137,60 +4177,60 @@ void btSoftBody::defaultCollisionHandler(btSoftBody* psb)
}
}
break;
- case fCollision::VF_DD:
- {
- if (!psb->m_softSoftCollision)
- return;
- if (psb->isActive() || this->isActive())
- {
- if (this != psb)
- {
- btSoftColliders::CollideVF_DD docollide;
- /* common */
- docollide.mrg = getCollisionShape()->getMargin() +
- psb->getCollisionShape()->getMargin();
- /* psb0 nodes vs psb1 faces */
- if (psb->m_tetras.size() > 0)
- docollide.useFaceNormal = true;
- else
- docollide.useFaceNormal = false;
- docollide.psb[0] = this;
- docollide.psb[1] = psb;
- docollide.psb[0]->m_ndbvt.collideTT(docollide.psb[0]->m_ndbvt.m_root,
- docollide.psb[1]->m_fdbvt.m_root,
- docollide);
-
- /* psb1 nodes vs psb0 faces */
- if (this->m_tetras.size() > 0)
- docollide.useFaceNormal = true;
- else
- docollide.useFaceNormal = false;
- docollide.psb[0] = psb;
- docollide.psb[1] = this;
- docollide.psb[0]->m_ndbvt.collideTT(docollide.psb[0]->m_ndbvt.m_root,
- docollide.psb[1]->m_fdbvt.m_root,
- docollide);
- }
- else
- {
- if (psb->useSelfCollision())
- {
- btSoftColliders::CollideFF_DD docollide;
- docollide.mrg = 2*getCollisionShape()->getMargin();
- docollide.psb[0] = this;
- docollide.psb[1] = psb;
- if (this->m_tetras.size() > 0)
- docollide.useFaceNormal = true;
- else
- docollide.useFaceNormal = false;
- /* psb0 faces vs psb0 faces */
- calculateNormalCone(this->m_fdbvnt);
- this->m_fdbvt.selfCollideT(m_fdbvnt,docollide);
- }
- }
- }
- }
- break;
+ case fCollision::VF_DD:
+ {
+ if (!psb->m_softSoftCollision)
+ return;
+ if (psb->isActive() || this->isActive())
+ {
+ if (this != psb)
+ {
+ btSoftColliders::CollideVF_DD docollide;
+ /* common */
+ docollide.mrg = getCollisionShape()->getMargin() +
+ psb->getCollisionShape()->getMargin();
+ /* psb0 nodes vs psb1 faces */
+ if (psb->m_tetras.size() > 0)
+ docollide.useFaceNormal = true;
+ else
+ docollide.useFaceNormal = false;
+ docollide.psb[0] = this;
+ docollide.psb[1] = psb;
+ docollide.psb[0]->m_ndbvt.collideTT(docollide.psb[0]->m_ndbvt.m_root,
+ docollide.psb[1]->m_fdbvt.m_root,
+ docollide);
+
+ /* psb1 nodes vs psb0 faces */
+ if (this->m_tetras.size() > 0)
+ docollide.useFaceNormal = true;
+ else
+ docollide.useFaceNormal = false;
+ docollide.psb[0] = psb;
+ docollide.psb[1] = this;
+ docollide.psb[0]->m_ndbvt.collideTT(docollide.psb[0]->m_ndbvt.m_root,
+ docollide.psb[1]->m_fdbvt.m_root,
+ docollide);
+ }
+ else
+ {
+ if (psb->useSelfCollision())
+ {
+ btSoftColliders::CollideFF_DD docollide;
+ docollide.mrg = 2 * getCollisionShape()->getMargin();
+ docollide.psb[0] = this;
+ docollide.psb[1] = psb;
+ if (this->m_tetras.size() > 0)
+ docollide.useFaceNormal = true;
+ else
+ docollide.useFaceNormal = false;
+ /* psb0 faces vs psb0 faces */
+ calculateNormalCone(this->m_fdbvnt);
+ this->m_fdbvt.selfCollideT(m_fdbvnt, docollide);
+ }
+ }
+ }
+ }
+ break;
default:
{
}
@@ -4205,7 +4245,7 @@ void btSoftBody::geometricCollisionHandler(btSoftBody* psb)
{
btSoftColliders::CollideCCD docollide;
/* common */
- docollide.mrg = SAFE_EPSILON; // for rounding error instead of actual margin
+ docollide.mrg = SAFE_EPSILON; // for rounding error instead of actual margin
docollide.dt = psb->m_sst.sdt;
/* psb0 nodes vs psb1 faces */
if (psb->m_tetras.size() > 0)
@@ -4215,8 +4255,8 @@ void btSoftBody::geometricCollisionHandler(btSoftBody* psb)
docollide.psb[0] = this;
docollide.psb[1] = psb;
docollide.psb[0]->m_ndbvt.collideTT(docollide.psb[0]->m_ndbvt.m_root,
- docollide.psb[1]->m_fdbvt.m_root,
- docollide);
+ docollide.psb[1]->m_fdbvt.m_root,
+ docollide);
/* psb1 nodes vs psb0 faces */
if (this->m_tetras.size() > 0)
docollide.useFaceNormal = true;
@@ -4225,8 +4265,8 @@ void btSoftBody::geometricCollisionHandler(btSoftBody* psb)
docollide.psb[0] = psb;
docollide.psb[1] = this;
docollide.psb[0]->m_ndbvt.collideTT(docollide.psb[0]->m_ndbvt.m_root,
- docollide.psb[1]->m_fdbvt.m_root,
- docollide);
+ docollide.psb[1]->m_fdbvt.m_root,
+ docollide);
}
else
{
@@ -4236,14 +4276,14 @@ void btSoftBody::geometricCollisionHandler(btSoftBody* psb)
docollide.mrg = SAFE_EPSILON;
docollide.psb[0] = this;
docollide.psb[1] = psb;
- docollide.dt = psb->m_sst.sdt;
+ docollide.dt = psb->m_sst.sdt;
if (this->m_tetras.size() > 0)
docollide.useFaceNormal = true;
else
docollide.useFaceNormal = false;
/* psb0 faces vs psb0 faces */
calculateNormalCone(this->m_fdbvnt); // should compute this outside of this scope
- this->m_fdbvt.selfCollideT(m_fdbvnt,docollide);
+ this->m_fdbvt.selfCollideT(m_fdbvnt, docollide);
}
}
}
@@ -4648,44 +4688,43 @@ const char* btSoftBody::serialize(void* dataBuffer, class btSerializer* serializ
void btSoftBody::updateDeactivation(btScalar timeStep)
{
- if ((getActivationState() == ISLAND_SLEEPING) || (getActivationState() == DISABLE_DEACTIVATION))
- return;
+ if ((getActivationState() == ISLAND_SLEEPING) || (getActivationState() == DISABLE_DEACTIVATION))
+ return;
- if (m_maxSpeedSquared < m_sleepingThreshold * m_sleepingThreshold)
- {
- m_deactivationTime += timeStep;
- }
- else
- {
- m_deactivationTime = btScalar(0.);
- setActivationState(0);
- }
+ if (m_maxSpeedSquared < m_sleepingThreshold * m_sleepingThreshold)
+ {
+ m_deactivationTime += timeStep;
+ }
+ else
+ {
+ m_deactivationTime = btScalar(0.);
+ setActivationState(0);
+ }
}
-
void btSoftBody::setZeroVelocity()
{
- for (int i = 0; i < m_nodes.size(); ++i)
- {
- m_nodes[i].m_v.setZero();
- }
+ for (int i = 0; i < m_nodes.size(); ++i)
+ {
+ m_nodes[i].m_v.setZero();
+ }
}
bool btSoftBody::wantsSleeping()
{
- if (getActivationState() == DISABLE_DEACTIVATION)
- return false;
+ if (getActivationState() == DISABLE_DEACTIVATION)
+ return false;
- //disable deactivation
- if (gDisableDeactivation || (gDeactivationTime == btScalar(0.)))
- return false;
+ //disable deactivation
+ if (gDisableDeactivation || (gDeactivationTime == btScalar(0.)))
+ return false;
- if ((getActivationState() == ISLAND_SLEEPING) || (getActivationState() == WANTS_DEACTIVATION))
- return true;
+ if ((getActivationState() == ISLAND_SLEEPING) || (getActivationState() == WANTS_DEACTIVATION))
+ return true;
- if (m_deactivationTime > gDeactivationTime)
- {
- return true;
- }
- return false;
+ if (m_deactivationTime > gDeactivationTime)
+ {
+ return true;
+ }
+ return false;
}
diff --git a/thirdparty/bullet/BulletSoftBody/btSoftBody.h b/thirdparty/bullet/BulletSoftBody/btSoftBody.h
index 6a55eccbd2..f578487b8c 100644
--- a/thirdparty/bullet/BulletSoftBody/btSoftBody.h
+++ b/thirdparty/bullet/BulletSoftBody/btSoftBody.h
@@ -35,7 +35,7 @@ subject to the following restrictions:
//#else
#define btSoftBodyData btSoftBodyFloatData
#define btSoftBodyDataName "btSoftBodyFloatData"
-static const btScalar OVERLAP_REDUCTION_FACTOR = 0.1;
+static const btScalar OVERLAP_REDUCTION_FACTOR = 0.1;
static unsigned long seed = 243703;
//#endif //BT_USE_DOUBLE_PRECISION
@@ -171,10 +171,10 @@ public:
CL_SELF = 0x0040, ///Cluster soft body self collision
VF_DD = 0x0080, ///Vertex vs face soft vs soft handling
- RVDFmask = 0x0f00, /// Rigid versus deformable face mask
- SDF_RDF = 0x0100, /// GJK based Rigid vs. deformable face
- SDF_MDF = 0x0200, /// GJK based Multibody vs. deformable face
- SDF_RDN = 0x0400, /// SDF based Rigid vs. deformable node
+ RVDFmask = 0x0f00, /// Rigid versus deformable face mask
+ SDF_RDF = 0x0100, /// GJK based Rigid vs. deformable face
+ SDF_MDF = 0x0200, /// GJK based Multibody vs. deformable face
+ SDF_RDN = 0x0400, /// SDF based Rigid vs. deformable node
/* presets */
Default = SDF_RS,
END
@@ -226,7 +226,7 @@ public:
const btCollisionObject* m_colObj; /* Rigid body */
btVector3 m_normal; /* Outward normal */
btScalar m_offset; /* Offset from origin */
- btVector3 m_bary; /* Barycentric weights for faces */
+ btVector3 m_bary; /* Barycentric weights for faces */
};
/* sMedium */
@@ -258,20 +258,29 @@ public:
Material* m_material; // Material
};
/* Node */
+ struct RenderNode
+ {
+ btVector3 m_x;
+ btVector3 m_uv1;
+ btVector3 m_normal;
+ };
struct Node : Feature
{
btVector3 m_x; // Position
btVector3 m_q; // Previous step position/Test position
btVector3 m_v; // Velocity
- btVector3 m_vn; // Previous step velocity
+ btVector3 m_vn; // Previous step velocity
btVector3 m_f; // Force accumulator
btVector3 m_n; // Normal
btScalar m_im; // 1/mass
btScalar m_area; // Area
btDbvtNode* m_leaf; // Leaf data
- btScalar m_penetration; // depth of penetration
+ int m_constrained; // depth of penetration
int m_battach : 1; // Attached
- int index;
+ int index;
+ btVector3 m_splitv; // velocity associated with split impulse
+ btMatrix3x3 m_effectiveMass; // effective mass in contact
+ btMatrix3x3 m_effectiveMass_inv; // inverse of effective mass
};
/* Link */
ATTRIBUTE_ALIGNED16(struct)
@@ -287,40 +296,47 @@ public:
BT_DECLARE_ALIGNED_ALLOCATOR();
};
+ struct RenderFace
+ {
+ RenderNode* m_n[3]; // Node pointers
+ };
+
/* Face */
struct Face : Feature
{
- Node* m_n[3]; // Node pointers
- btVector3 m_normal; // Normal
- btScalar m_ra; // Rest area
- btDbvtNode* m_leaf; // Leaf data
- btVector4 m_pcontact; // barycentric weights of the persistent contact
- btVector3 m_n0, m_n1, m_vn;
- int m_index;
+ Node* m_n[3]; // Node pointers
+ btVector3 m_normal; // Normal
+ btScalar m_ra; // Rest area
+ btDbvtNode* m_leaf; // Leaf data
+ btVector4 m_pcontact; // barycentric weights of the persistent contact
+ btVector3 m_n0, m_n1, m_vn;
+ int m_index;
};
/* Tetra */
struct Tetra : Feature
{
- Node* m_n[4]; // Node pointers
- btScalar m_rv; // Rest volume
- btDbvtNode* m_leaf; // Leaf data
- btVector3 m_c0[4]; // gradients
- btScalar m_c1; // (4*kVST)/(im0+im1+im2+im3)
- btScalar m_c2; // m_c1/sum(|g0..3|^2)
- btMatrix3x3 m_Dm_inverse; // rest Dm^-1
- btMatrix3x3 m_F;
- btScalar m_element_measure;
+ Node* m_n[4]; // Node pointers
+ btScalar m_rv; // Rest volume
+ btDbvtNode* m_leaf; // Leaf data
+ btVector3 m_c0[4]; // gradients
+ btScalar m_c1; // (4*kVST)/(im0+im1+im2+im3)
+ btScalar m_c2; // m_c1/sum(|g0..3|^2)
+ btMatrix3x3 m_Dm_inverse; // rest Dm^-1
+ btMatrix3x3 m_F;
+ btScalar m_element_measure;
+ btVector4 m_P_inv[3]; // first three columns of P_inv matrix
+ };
+
+ /* TetraScratch */
+ struct TetraScratch
+ {
+ btMatrix3x3 m_F; // deformation gradient F
+ btScalar m_trace; // trace of F^T * F
+ btScalar m_J; // det(F)
+ btMatrix3x3 m_cofF; // cofactor of F
+ btMatrix3x3 m_corotation; // corotatio of the tetra
};
-
- /* TetraScratch */
- struct TetraScratch
- {
- btMatrix3x3 m_F; // deformation gradient F
- btScalar m_trace; // trace of F^T * F
- btScalar m_J; // det(F)
- btMatrix3x3 m_cofF; // cofactor of F
- };
-
+
/* RContact */
struct RContact
{
@@ -331,67 +347,68 @@ public:
btScalar m_c2; // ima*dt
btScalar m_c3; // Friction
btScalar m_c4; // Hardness
-
- // jacobians and unit impulse responses for multibody
- btMultiBodyJacobianData jacobianData_normal;
- btMultiBodyJacobianData jacobianData_t1;
- btMultiBodyJacobianData jacobianData_t2;
- btVector3 t1;
- btVector3 t2;
+
+ // jacobians and unit impulse responses for multibody
+ btMultiBodyJacobianData jacobianData_normal;
+ btMultiBodyJacobianData jacobianData_t1;
+ btMultiBodyJacobianData jacobianData_t2;
+ btVector3 t1;
+ btVector3 t2;
};
-
- class DeformableRigidContact
- {
- public:
- sCti m_cti; // Contact infos
- btMatrix3x3 m_c0; // Impulse matrix
- btVector3 m_c1; // Relative anchor
- btScalar m_c2; // inverse mass of node/face
- btScalar m_c3; // Friction
- btScalar m_c4; // Hardness
-
- // jacobians and unit impulse responses for multibody
- btMultiBodyJacobianData jacobianData_normal;
- btMultiBodyJacobianData jacobianData_t1;
- btMultiBodyJacobianData jacobianData_t2;
- btVector3 t1;
- btVector3 t2;
- };
-
- class DeformableNodeRigidContact : public DeformableRigidContact
- {
- public:
- Node* m_node; // Owner node
- };
-
- class DeformableNodeRigidAnchor : public DeformableNodeRigidContact
- {
- public:
- btVector3 m_local; // Anchor position in body space
- };
-
- class DeformableFaceRigidContact : public DeformableRigidContact
- {
- public:
- Face* m_face; // Owner face
- btVector3 m_contactPoint; // Contact point
- btVector3 m_bary; // Barycentric weights
- btVector3 m_weights; // v_contactPoint * m_weights[i] = m_face->m_node[i]->m_v;
- };
-
- struct DeformableFaceNodeContact
- {
- Node* m_node; // Node
- Face* m_face; // Face
- btVector3 m_bary; // Barycentric weights
- btVector3 m_weights; // v_contactPoint * m_weights[i] = m_face->m_node[i]->m_v;
- btVector3 m_normal; // Normal
- btScalar m_margin; // Margin
- btScalar m_friction; // Friction
- btScalar m_imf; // inverse mass of the face at contact point
- btScalar m_c0; // scale of the impulse matrix;
- };
-
+
+ class DeformableRigidContact
+ {
+ public:
+ sCti m_cti; // Contact infos
+ btMatrix3x3 m_c0; // Impulse matrix
+ btVector3 m_c1; // Relative anchor
+ btScalar m_c2; // inverse mass of node/face
+ btScalar m_c3; // Friction
+ btScalar m_c4; // Hardness
+ btMatrix3x3 m_c5; // inverse effective mass
+
+ // jacobians and unit impulse responses for multibody
+ btMultiBodyJacobianData jacobianData_normal;
+ btMultiBodyJacobianData jacobianData_t1;
+ btMultiBodyJacobianData jacobianData_t2;
+ btVector3 t1;
+ btVector3 t2;
+ };
+
+ class DeformableNodeRigidContact : public DeformableRigidContact
+ {
+ public:
+ Node* m_node; // Owner node
+ };
+
+ class DeformableNodeRigidAnchor : public DeformableNodeRigidContact
+ {
+ public:
+ btVector3 m_local; // Anchor position in body space
+ };
+
+ class DeformableFaceRigidContact : public DeformableRigidContact
+ {
+ public:
+ Face* m_face; // Owner face
+ btVector3 m_contactPoint; // Contact point
+ btVector3 m_bary; // Barycentric weights
+ btVector3 m_weights; // v_contactPoint * m_weights[i] = m_face->m_node[i]->m_v;
+ };
+
+ struct DeformableFaceNodeContact
+ {
+ Node* m_node; // Node
+ Face* m_face; // Face
+ btVector3 m_bary; // Barycentric weights
+ btVector3 m_weights; // v_contactPoint * m_weights[i] = m_face->m_node[i]->m_v;
+ btVector3 m_normal; // Normal
+ btScalar m_margin; // Margin
+ btScalar m_friction; // Friction
+ btScalar m_imf; // inverse mass of the face at contact point
+ btScalar m_c0; // scale of the impulse matrix;
+ };
+
/* SContact */
struct SContact
{
@@ -718,19 +735,19 @@ public:
tVSolverArray m_vsequence; // Velocity solvers sequence
tPSolverArray m_psequence; // Position solvers sequence
tPSolverArray m_dsequence; // Drift solvers sequence
- btScalar drag; // deformable air drag
- btScalar m_maxStress; // Maximum principle first Piola stress
+ btScalar drag; // deformable air drag
+ btScalar m_maxStress; // Maximum principle first Piola stress
};
/* SolverState */
struct SolverState
{
//if you add new variables, always initialize them!
SolverState()
- :sdt(0),
- isdt(0),
- velmrg(0),
- radmrg(0),
- updmrg(0)
+ : sdt(0),
+ isdt(0),
+ velmrg(0),
+ radmrg(0),
+ updmrg(0)
{
}
btScalar sdt; // dt*timescale
@@ -769,9 +786,11 @@ public:
typedef btAlignedObjectArray<Cluster*> tClusterArray;
typedef btAlignedObjectArray<Note> tNoteArray;
typedef btAlignedObjectArray<Node> tNodeArray;
+ typedef btAlignedObjectArray< RenderNode> tRenderNodeArray;
typedef btAlignedObjectArray<btDbvtNode*> tLeafArray;
typedef btAlignedObjectArray<Link> tLinkArray;
typedef btAlignedObjectArray<Face> tFaceArray;
+ typedef btAlignedObjectArray<RenderFace> tRenderFaceArray;
typedef btAlignedObjectArray<Tetra> tTetraArray;
typedef btAlignedObjectArray<Anchor> tAnchorArray;
typedef btAlignedObjectArray<RContact> tRContactArray;
@@ -791,40 +810,42 @@ public:
btSoftBodyWorldInfo* m_worldInfo; // World info
tNoteArray m_notes; // Notes
tNodeArray m_nodes; // Nodes
- tNodeArray m_renderNodes; // Nodes
+ tRenderNodeArray m_renderNodes; // Render Nodes
tLinkArray m_links; // Links
tFaceArray m_faces; // Faces
- tFaceArray m_renderFaces; // Faces
+ tRenderFaceArray m_renderFaces; // Faces
tTetraArray m_tetras; // Tetras
- btAlignedObjectArray<TetraScratch> m_tetraScratches;
- btAlignedObjectArray<TetraScratch> m_tetraScratchesTn;
- tAnchorArray m_anchors; // Anchors
- btAlignedObjectArray<DeformableNodeRigidAnchor> m_deformableAnchors;
- tRContactArray m_rcontacts; // Rigid contacts
- btAlignedObjectArray<DeformableNodeRigidContact> m_nodeRigidContacts;
- btAlignedObjectArray<DeformableFaceNodeContact> m_faceNodeContacts;
- btAlignedObjectArray<DeformableFaceRigidContact> m_faceRigidContacts;
- tSContactArray m_scontacts; // Soft contacts
- tJointArray m_joints; // Joints
- tMaterialArray m_materials; // Materials
- btScalar m_timeacc; // Time accumulator
- btVector3 m_bounds[2]; // Spatial bounds
- bool m_bUpdateRtCst; // Update runtime constants
- btDbvt m_ndbvt; // Nodes tree
- btDbvt m_fdbvt; // Faces tree
- btDbvntNode* m_fdbvnt; // Faces tree with normals
- btDbvt m_cdbvt; // Clusters tree
- tClusterArray m_clusters; // Clusters
- btScalar m_dampingCoefficient; // Damping Coefficient
+ btAlignedObjectArray<TetraScratch> m_tetraScratches;
+ btAlignedObjectArray<TetraScratch> m_tetraScratchesTn;
+ tAnchorArray m_anchors; // Anchors
+ btAlignedObjectArray<DeformableNodeRigidAnchor> m_deformableAnchors;
+ tRContactArray m_rcontacts; // Rigid contacts
+ btAlignedObjectArray<DeformableNodeRigidContact> m_nodeRigidContacts;
+ btAlignedObjectArray<DeformableFaceNodeContact> m_faceNodeContacts;
+ btAlignedObjectArray<DeformableFaceRigidContact> m_faceRigidContacts;
+ tSContactArray m_scontacts; // Soft contacts
+ tJointArray m_joints; // Joints
+ tMaterialArray m_materials; // Materials
+ btScalar m_timeacc; // Time accumulator
+ btVector3 m_bounds[2]; // Spatial bounds
+ bool m_bUpdateRtCst; // Update runtime constants
+ btDbvt m_ndbvt; // Nodes tree
+ btDbvt m_fdbvt; // Faces tree
+ btDbvntNode* m_fdbvnt; // Faces tree with normals
+ btDbvt m_cdbvt; // Clusters tree
+ tClusterArray m_clusters; // Clusters
+ btScalar m_dampingCoefficient; // Damping Coefficient
btScalar m_sleepingThreshold;
btScalar m_maxSpeedSquared;
- btAlignedObjectArray<btVector3> m_quads; // quadrature points for collision detection
+ btAlignedObjectArray<btVector3> m_quads; // quadrature points for collision detection
btScalar m_repulsionStiffness;
- btAlignedObjectArray<btVector3> m_X; // initial positions
+ btScalar m_gravityFactor;
+ bool m_cacheBarycenter;
+ btAlignedObjectArray<btVector3> m_X; // initial positions
btAlignedObjectArray<btVector4> m_renderNodesInterpolationWeights;
btAlignedObjectArray<btAlignedObjectArray<const btSoftBody::Node*> > m_renderNodesParents;
- btAlignedObjectArray<btScalar> m_z; // vertical distance used in extrapolation
+ btAlignedObjectArray<btScalar> m_z; // vertical distance used in extrapolation
bool m_useSelfCollision;
bool m_softSoftCollision;
@@ -856,11 +877,11 @@ public:
{
return m_worldInfo;
}
-
- void setDampingCoefficient(btScalar damping_coeff)
- {
- m_dampingCoefficient = damping_coeff;
- }
+
+ void setDampingCoefficient(btScalar damping_coeff)
+ {
+ m_dampingCoefficient = damping_coeff;
+ }
///@todo: avoid internal softbody shape hack and move collision code to collision library
virtual void setCollisionShape(btCollisionShape* collisionShape)
@@ -921,11 +942,12 @@ public:
Material* mat = 0);
/* Append anchor */
- void appendDeformableAnchor(int node, btRigidBody* body);
- void appendDeformableAnchor(int node, btMultiBodyLinkCollider* link);
- void appendAnchor(int node,
+ void appendDeformableAnchor(int node, btRigidBody* body);
+ void appendDeformableAnchor(int node, btMultiBodyLinkCollider* link);
+ void appendAnchor(int node,
btRigidBody* body, bool disableCollisionBetweenLinkedBodies = false, btScalar influence = 1);
void appendAnchor(int node, btRigidBody* body, const btVector3& localPivot, bool disableCollisionBetweenLinkedBodies = false, btScalar influence = 1);
+ void removeAnchor(int node);
/* Append linear joint */
void appendLinearJoint(const LJoint::Specs& specs, Cluster* body0, Body body1);
void appendLinearJoint(const LJoint::Specs& specs, Body body = Body());
@@ -976,10 +998,10 @@ public:
void setLinearVelocity(const btVector3& linVel);
/* Set the angular velocity of the center of mass */
void setAngularVelocity(const btVector3& angVel);
- /* Get best fit rigid transform */
- btTransform getRigidTransform();
- /* Transform to given pose */
- void transformTo(const btTransform& trs);
+ /* Get best fit rigid transform */
+ btTransform getRigidTransform();
+ /* Transform to given pose */
+ void transformTo(const btTransform& trs);
/* Transform */
void transform(const btTransform& trs);
/* Translate */
@@ -1068,11 +1090,11 @@ public:
/* defaultCollisionHandlers */
void defaultCollisionHandler(const btCollisionObjectWrapper* pcoWrap);
void defaultCollisionHandler(btSoftBody* psb);
- void setSelfCollision(bool useSelfCollision);
- bool useSelfCollision();
- void updateDeactivation(btScalar timeStep);
- void setZeroVelocity();
- bool wantsSleeping();
+ void setSelfCollision(bool useSelfCollision);
+ bool useSelfCollision();
+ void updateDeactivation(btScalar timeStep);
+ void setZeroVelocity();
+ bool wantsSleeping();
//
// Functionality to deal with new accelerated solvers.
@@ -1151,8 +1173,8 @@ public:
void rebuildNodeTree();
btVector3 evaluateCom() const;
bool checkDeformableContact(const btCollisionObjectWrapper* colObjWrap, const btVector3& x, btScalar margin, btSoftBody::sCti& cti, bool predict = false) const;
- bool checkDeformableFaceContact(const btCollisionObjectWrapper* colObjWrap, Face& f, btVector3& contact_point, btVector3& bary, btScalar margin, btSoftBody::sCti& cti, bool predict = false) const;
- bool checkContact(const btCollisionObjectWrapper* colObjWrap, const btVector3& x, btScalar margin, btSoftBody::sCti& cti) const;
+ bool checkDeformableFaceContact(const btCollisionObjectWrapper* colObjWrap, Face& f, btVector3& contact_point, btVector3& bary, btScalar margin, btSoftBody::sCti& cti, bool predict = false) const;
+ bool checkContact(const btCollisionObjectWrapper* colObjWrap, const btVector3& x, btScalar margin, btSoftBody::sCti& cti) const;
void updateNormals();
void updateBounds();
void updatePose();
@@ -1166,14 +1188,16 @@ public:
void solveClusters(btScalar sor);
void applyClusters(bool drift);
void dampClusters();
- void setSpringStiffness(btScalar k);
- void initializeDmInverse();
- void updateDeformation();
- void advanceDeformation();
+ void setSpringStiffness(btScalar k);
+ void setGravityFactor(btScalar gravFactor);
+ void setCacheBarycenter(bool cacheBarycenter);
+ void initializeDmInverse();
+ void updateDeformation();
+ void advanceDeformation();
void applyForces();
- void setMaxStress(btScalar maxStress);
- void interpolateRenderMesh();
- void setCollisionQuadrature(int N);
+ void setMaxStress(btScalar maxStress);
+ void interpolateRenderMesh();
+ void setCollisionQuadrature(int N);
static void PSolve_Anchors(btSoftBody* psb, btScalar kst, btScalar ti);
static void PSolve_RContacts(btSoftBody* psb, btScalar kst, btScalar ti);
static void PSolve_SContacts(btSoftBody* psb, btScalar, btScalar ti);
@@ -1182,14 +1206,15 @@ public:
static psolver_t getSolver(ePSolver::_ solver);
static vsolver_t getSolver(eVSolver::_ solver);
void geometricCollisionHandler(btSoftBody* psb);
-#define SAFE_EPSILON SIMD_EPSILON*100.0
+#define SAFE_EPSILON SIMD_EPSILON * 100.0
void updateNode(btDbvtNode* node, bool use_velocity, bool margin)
{
if (node->isleaf())
{
btSoftBody::Node* n = (btSoftBody::Node*)(node->data);
- ATTRIBUTE_ALIGNED16(btDbvtVolume) vol;
- btScalar pad = margin ? m_sst.radmrg : SAFE_EPSILON; // use user defined margin or margin for floating point precision
+ ATTRIBUTE_ALIGNED16(btDbvtVolume)
+ vol;
+ btScalar pad = margin ? m_sst.radmrg : SAFE_EPSILON; // use user defined margin or margin for floating point precision
if (use_velocity)
{
btVector3 points[2] = {n->m_x, n->m_x + m_sst.sdt * n->m_v};
@@ -1207,38 +1232,40 @@ public:
{
updateNode(node->childs[0], use_velocity, margin);
updateNode(node->childs[1], use_velocity, margin);
- ATTRIBUTE_ALIGNED16(btDbvtVolume) vol;
+ ATTRIBUTE_ALIGNED16(btDbvtVolume)
+ vol;
Merge(node->childs[0]->volume, node->childs[1]->volume, vol);
node->volume = vol;
}
}
-
- void updateNodeTree(bool use_velocity, bool margin)
+
+ void updateNodeTree(bool use_velocity, bool margin)
{
if (m_ndbvt.m_root)
updateNode(m_ndbvt.m_root, use_velocity, margin);
}
- template <class DBVTNODE> // btDbvtNode or btDbvntNode
+ template <class DBVTNODE> // btDbvtNode or btDbvntNode
void updateFace(DBVTNODE* node, bool use_velocity, bool margin)
{
if (node->isleaf())
{
btSoftBody::Face* f = (btSoftBody::Face*)(node->data);
- btScalar pad = margin ? m_sst.radmrg : SAFE_EPSILON; // use user defined margin or margin for floating point precision
- ATTRIBUTE_ALIGNED16(btDbvtVolume) vol;
+ btScalar pad = margin ? m_sst.radmrg : SAFE_EPSILON; // use user defined margin or margin for floating point precision
+ ATTRIBUTE_ALIGNED16(btDbvtVolume)
+ vol;
if (use_velocity)
{
btVector3 points[6] = {f->m_n[0]->m_x, f->m_n[0]->m_x + m_sst.sdt * f->m_n[0]->m_v,
- f->m_n[1]->m_x, f->m_n[1]->m_x + m_sst.sdt * f->m_n[1]->m_v,
- f->m_n[2]->m_x, f->m_n[2]->m_x + m_sst.sdt * f->m_n[2]->m_v};
+ f->m_n[1]->m_x, f->m_n[1]->m_x + m_sst.sdt * f->m_n[1]->m_v,
+ f->m_n[2]->m_x, f->m_n[2]->m_x + m_sst.sdt * f->m_n[2]->m_v};
vol = btDbvtVolume::FromPoints(points, 6);
}
else
{
btVector3 points[3] = {f->m_n[0]->m_x,
- f->m_n[1]->m_x,
- f->m_n[2]->m_x};
+ f->m_n[1]->m_x,
+ f->m_n[2]->m_x};
vol = btDbvtVolume::FromPoints(points, 3);
}
vol.Expand(btVector3(pad, pad, pad));
@@ -1249,7 +1276,8 @@ public:
{
updateFace(node->childs[0], use_velocity, margin);
updateFace(node->childs[1], use_velocity, margin);
- ATTRIBUTE_ALIGNED16(btDbvtVolume) vol;
+ ATTRIBUTE_ALIGNED16(btDbvtVolume)
+ vol;
Merge(node->childs[0]->volume, node->childs[1]->volume, vol);
node->volume = vol;
}
@@ -1271,7 +1299,7 @@ public:
return (a * coord.x() + b * coord.y() + c * coord.z());
}
- void applyRepulsionForce(btScalar timeStep, bool applySpringForce)
+ void applyRepulsionForce(btScalar timeStep, bool applySpringForce)
{
btAlignedObjectArray<int> indices;
{
@@ -1297,58 +1325,60 @@ public:
const btVector3& n = c.m_normal;
btVector3 l = node->m_x - BaryEval(face->m_n[0]->m_x, face->m_n[1]->m_x, face->m_n[2]->m_x, w);
btScalar d = c.m_margin - n.dot(l);
- d = btMax(btScalar(0),d);
-
+ d = btMax(btScalar(0), d);
+
const btVector3& va = node->m_v;
btVector3 vb = BaryEval(face->m_n[0]->m_v, face->m_n[1]->m_v, face->m_n[2]->m_v, w);
btVector3 vr = va - vb;
- const btScalar vn = btDot(vr, n); // dn < 0 <==> opposing
+ const btScalar vn = btDot(vr, n); // dn < 0 <==> opposing
if (vn > OVERLAP_REDUCTION_FACTOR * d / timeStep)
continue;
- btVector3 vt = vr - vn*n;
+ btVector3 vt = vr - vn * n;
btScalar I = 0;
- btScalar mass = node->m_im == 0 ? 0 : btScalar(1)/node->m_im;
+ btScalar mass = node->m_im == 0 ? 0 : btScalar(1) / node->m_im;
if (applySpringForce)
I = -btMin(m_repulsionStiffness * timeStep * d, mass * (OVERLAP_REDUCTION_FACTOR * d / timeStep - vn));
if (vn < 0)
I += 0.5 * mass * vn;
- btScalar face_penetration = 0, node_penetration = node->m_penetration;
+ int face_penetration = 0, node_penetration = node->m_constrained;
for (int i = 0; i < 3; ++i)
- face_penetration = btMax(face_penetration, face->m_n[i]->m_penetration);
- btScalar I_tilde = .5 *I /(1.0+w.length2());
-
-// double the impulse if node or face is constrained.
- if (face_penetration > 0 || node_penetration > 0)
- I_tilde *= 2.0;
- if (face_penetration <= node_penetration)
+ face_penetration |= face->m_n[i]->m_constrained;
+ btScalar I_tilde = 2.0 * I / (1.0 + w.length2());
+
+ // double the impulse if node or face is constrained.
+ if (face_penetration > 0 || node_penetration > 0)
+ {
+ I_tilde *= 2.0;
+ }
+ if (face_penetration <= 0)
{
for (int j = 0; j < 3; ++j)
- face->m_n[j]->m_v += w[j]*n*I_tilde*node->m_im;
+ face->m_n[j]->m_v += w[j] * n * I_tilde * node->m_im;
}
- if (face_penetration >= node_penetration)
+ if (node_penetration <= 0)
{
- node->m_v -= I_tilde*node->m_im*n;
+ node->m_v -= I_tilde * node->m_im * n;
}
-
+
// apply frictional impulse
btScalar vt_norm = vt.safeNorm();
if (vt_norm > SIMD_EPSILON)
{
btScalar delta_vn = -2 * I * node->m_im;
btScalar mu = c.m_friction;
- btScalar vt_new = btMax(btScalar(1) - mu * delta_vn / (vt_norm + SIMD_EPSILON), btScalar(0))*vt_norm;
- I = 0.5 * mass * (vt_norm-vt_new);
+ btScalar vt_new = btMax(btScalar(1) - mu * delta_vn / (vt_norm + SIMD_EPSILON), btScalar(0)) * vt_norm;
+ I = 0.5 * mass * (vt_norm - vt_new);
vt.safeNormalize();
- I_tilde = .5 *I /(1.0+w.length2());
-// double the impulse if node or face is constrained.
-// if (face_penetration > 0 || node_penetration > 0)
-// I_tilde *= 2.0;
- if (face_penetration <= node_penetration)
+ I_tilde = 2.0 * I / (1.0 + w.length2());
+ // double the impulse if node or face is constrained.
+ if (face_penetration > 0 || node_penetration > 0)
+ I_tilde *= 2.0;
+ if (face_penetration <= 0)
{
for (int j = 0; j < 3; ++j)
face->m_n[j]->m_v += w[j] * vt * I_tilde * (face->m_n[j])->m_im;
}
- if (face_penetration >= node_penetration)
+ if (node_penetration <= 0)
{
node->m_v -= I_tilde * node->m_im * vt;
}
@@ -1356,7 +1386,7 @@ public:
}
}
virtual int calculateSerializeBufferSize() const;
-
+
///fills the dataBuffer and returns the struct name (and 0 on failure)
virtual const char* serialize(void* dataBuffer, class btSerializer* serializer) const;
};
diff --git a/thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.cpp b/thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.cpp
index c1a87c7d57..f63e48f9a5 100644
--- a/thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.cpp
+++ b/thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.cpp
@@ -727,7 +727,7 @@ btSoftBody* btSoftBodyHelpers::CreatePatch(btSoftBodyWorldInfo& worldInfo, const
int resy,
int fixeds,
bool gendiags,
- btScalar perturbation)
+ btScalar perturbation)
{
#define IDX(_x_, _y_) ((_y_)*rx + (_x_))
/* Create nodes */
@@ -747,12 +747,12 @@ btSoftBody* btSoftBodyHelpers::CreatePatch(btSoftBodyWorldInfo& worldInfo, const
for (int ix = 0; ix < rx; ++ix)
{
const btScalar tx = ix / (btScalar)(rx - 1);
- btScalar pert = perturbation * btScalar(rand())/RAND_MAX;
- btVector3 temp1 = py1;
- temp1.setY(py1.getY() + pert);
- btVector3 temp = py0;
- pert = perturbation * btScalar(rand())/RAND_MAX;
- temp.setY(py0.getY() + pert);
+ btScalar pert = perturbation * btScalar(rand()) / RAND_MAX;
+ btVector3 temp1 = py1;
+ temp1.setY(py1.getY() + pert);
+ btVector3 temp = py0;
+ pert = perturbation * btScalar(rand()) / RAND_MAX;
+ temp.setY(py0.getY() + pert);
x[IDX(ix, iy)] = lerp(temp, temp1, tx);
m[IDX(ix, iy)] = 1;
}
@@ -1233,9 +1233,9 @@ if(face&&face[0])
}
}
}
- psb->initializeDmInverse();
- psb->m_tetraScratches.resize(psb->m_tetras.size());
- psb->m_tetraScratchesTn.resize(psb->m_tetras.size());
+ psb->initializeDmInverse();
+ psb->m_tetraScratches.resize(psb->m_tetras.size());
+ psb->m_tetraScratchesTn.resize(psb->m_tetras.size());
printf("Nodes: %u\r\n", psb->m_nodes.size());
printf("Links: %u\r\n", psb->m_links.size());
printf("Faces: %u\r\n", psb->m_faces.size());
@@ -1245,61 +1245,62 @@ if(face&&face[0])
btSoftBody* btSoftBodyHelpers::CreateFromVtkFile(btSoftBodyWorldInfo& worldInfo, const char* vtk_file)
{
- std::ifstream fs;
- fs.open(vtk_file);
- btAssert(fs);
-
- typedef btAlignedObjectArray<int> Index;
- std::string line;
- btAlignedObjectArray<btVector3> X;
- btVector3 position;
- btAlignedObjectArray<Index> indices;
- bool reading_points = false;
- bool reading_tets = false;
- size_t n_points = 0;
- size_t n_tets = 0;
- size_t x_count = 0;
- size_t indices_count = 0;
- while (std::getline(fs, line))
- {
- std::stringstream ss(line);
- if (line.size() == (size_t)(0))
- {
- }
- else if (line.substr(0, 6) == "POINTS")
- {
- reading_points = true;
- reading_tets = false;
- ss.ignore(128, ' '); // ignore "POINTS"
- ss >> n_points;
- X.resize(n_points);
- }
- else if (line.substr(0, 5) == "CELLS")
- {
- reading_points = false;
- reading_tets = true;
- ss.ignore(128, ' '); // ignore "CELLS"
- ss >> n_tets;
- indices.resize(n_tets);
- }
- else if (line.substr(0, 10) == "CELL_TYPES")
- {
- reading_points = false;
- reading_tets = false;
- }
- else if (reading_points)
- {
- btScalar p;
- ss >> p;
- position.setX(p);
- ss >> p;
- position.setY(p);
- ss >> p;
- position.setZ(p);
- X[x_count++] = position;
- }
- else if (reading_tets)
- {
+ std::ifstream fs;
+ fs.open(vtk_file);
+ btAssert(fs);
+
+ typedef btAlignedObjectArray<int> Index;
+ std::string line;
+ btAlignedObjectArray<btVector3> X;
+ btVector3 position;
+ btAlignedObjectArray<Index> indices;
+ bool reading_points = false;
+ bool reading_tets = false;
+ size_t n_points = 0;
+ size_t n_tets = 0;
+ size_t x_count = 0;
+ size_t indices_count = 0;
+ while (std::getline(fs, line))
+ {
+ std::stringstream ss(line);
+ if (line.size() == (size_t)(0))
+ {
+ }
+ else if (line.substr(0, 6) == "POINTS")
+ {
+ reading_points = true;
+ reading_tets = false;
+ ss.ignore(128, ' '); // ignore "POINTS"
+ ss >> n_points;
+ X.resize(n_points);
+ }
+ else if (line.substr(0, 5) == "CELLS")
+ {
+ reading_points = false;
+ reading_tets = true;
+ ss.ignore(128, ' '); // ignore "CELLS"
+ ss >> n_tets;
+ indices.resize(n_tets);
+ }
+ else if (line.substr(0, 10) == "CELL_TYPES")
+ {
+ reading_points = false;
+ reading_tets = false;
+ }
+ else if (reading_points)
+ {
+ btScalar p;
+ ss >> p;
+ position.setX(p);
+ ss >> p;
+ position.setY(p);
+ ss >> p;
+ position.setZ(p);
+ //printf("v %f %f %f\n", position.getX(), position.getY(), position.getZ());
+ X[x_count++] = position;
+ }
+ else if (reading_tets)
+ {
int d;
ss >> d;
if (d != 4)
@@ -1308,317 +1309,355 @@ btSoftBody* btSoftBodyHelpers::CreateFromVtkFile(btSoftBodyWorldInfo& worldInfo,
fs.close();
return 0;
}
- ss.ignore(128, ' '); // ignore "4"
- Index tet;
- tet.resize(4);
- for (size_t i = 0; i < 4; i++)
- {
- ss >> tet[i];
- printf("%d ", tet[i]);
- }
- printf("\n");
- indices[indices_count++] = tet;
- }
- }
- btSoftBody* psb = new btSoftBody(&worldInfo, n_points, &X[0], 0);
-
- for (int i = 0; i < n_tets; ++i)
- {
- const Index& ni = indices[i];
- psb->appendTetra(ni[0], ni[1], ni[2], ni[3]);
- {
- psb->appendLink(ni[0], ni[1], 0, true);
- psb->appendLink(ni[1], ni[2], 0, true);
- psb->appendLink(ni[2], ni[0], 0, true);
- psb->appendLink(ni[0], ni[3], 0, true);
- psb->appendLink(ni[1], ni[3], 0, true);
- psb->appendLink(ni[2], ni[3], 0, true);
- }
- }
-
-
- generateBoundaryFaces(psb);
- psb->initializeDmInverse();
- psb->m_tetraScratches.resize(psb->m_tetras.size());
- psb->m_tetraScratchesTn.resize(psb->m_tetras.size());
- printf("Nodes: %u\r\n", psb->m_nodes.size());
- printf("Links: %u\r\n", psb->m_links.size());
- printf("Faces: %u\r\n", psb->m_faces.size());
- printf("Tetras: %u\r\n", psb->m_tetras.size());
-
- fs.close();
- return psb;
+ ss.ignore(128, ' '); // ignore "4"
+ Index tet;
+ tet.resize(4);
+ for (size_t i = 0; i < 4; i++)
+ {
+ ss >> tet[i];
+ //printf("%d ", tet[i]);
+ }
+ //printf("\n");
+ indices[indices_count++] = tet;
+ }
+ }
+ btSoftBody* psb = new btSoftBody(&worldInfo, n_points, &X[0], 0);
+
+ for (int i = 0; i < n_tets; ++i)
+ {
+ const Index& ni = indices[i];
+ psb->appendTetra(ni[0], ni[1], ni[2], ni[3]);
+ {
+ psb->appendLink(ni[0], ni[1], 0, true);
+ psb->appendLink(ni[1], ni[2], 0, true);
+ psb->appendLink(ni[2], ni[0], 0, true);
+ psb->appendLink(ni[0], ni[3], 0, true);
+ psb->appendLink(ni[1], ni[3], 0, true);
+ psb->appendLink(ni[2], ni[3], 0, true);
+ }
+ }
+
+ generateBoundaryFaces(psb);
+ psb->initializeDmInverse();
+ psb->m_tetraScratches.resize(psb->m_tetras.size());
+ psb->m_tetraScratchesTn.resize(psb->m_tetras.size());
+ printf("Nodes: %u\r\n", psb->m_nodes.size());
+ printf("Links: %u\r\n", psb->m_links.size());
+ printf("Faces: %u\r\n", psb->m_faces.size());
+ printf("Tetras: %u\r\n", psb->m_tetras.size());
+
+ fs.close();
+ return psb;
}
void btSoftBodyHelpers::generateBoundaryFaces(btSoftBody* psb)
{
- int counter = 0;
- for (int i = 0; i < psb->m_nodes.size(); ++i)
- {
- psb->m_nodes[i].index = counter++;
- }
- typedef btAlignedObjectArray<int> Index;
- btAlignedObjectArray<Index> indices;
- indices.resize(psb->m_tetras.size());
- for (int i = 0; i < indices.size(); ++i)
- {
- Index index;
- index.push_back(psb->m_tetras[i].m_n[0]->index);
- index.push_back(psb->m_tetras[i].m_n[1]->index);
- index.push_back(psb->m_tetras[i].m_n[2]->index);
- index.push_back(psb->m_tetras[i].m_n[3]->index);
- indices[i] = index;
- }
-
- std::map<std::vector<int>, std::vector<int> > dict;
- for (int i = 0; i < indices.size(); ++i)
- {
- for (int j = 0; j < 4; ++j)
- {
- std::vector<int> f;
- if (j == 0)
- {
- f.push_back(indices[i][1]);
- f.push_back(indices[i][0]);
- f.push_back(indices[i][2]);
- }
- if (j == 1)
- {
- f.push_back(indices[i][3]);
- f.push_back(indices[i][0]);
- f.push_back(indices[i][1]);
- }
- if (j == 2)
- {
- f.push_back(indices[i][3]);
- f.push_back(indices[i][1]);
- f.push_back(indices[i][2]);
- }
- if (j == 3)
- {
- f.push_back(indices[i][2]);
- f.push_back(indices[i][0]);
- f.push_back(indices[i][3]);
- }
- std::vector<int> f_sorted = f;
- std::sort(f_sorted.begin(), f_sorted.end());
- if (dict.find(f_sorted) != dict.end())
- {
- dict.erase(f_sorted);
- }
- else
- {
- dict.insert(std::make_pair(f_sorted, f));
- }
- }
- }
-
- for (std::map<std::vector<int>, std::vector<int> >::iterator it = dict.begin(); it != dict.end(); ++it)
- {
- std::vector<int> f = it->second;
- psb->appendFace(f[0], f[1], f[2]);
- }
+ int counter = 0;
+ for (int i = 0; i < psb->m_nodes.size(); ++i)
+ {
+ psb->m_nodes[i].index = counter++;
+ }
+ typedef btAlignedObjectArray<int> Index;
+ btAlignedObjectArray<Index> indices;
+ indices.resize(psb->m_tetras.size());
+ for (int i = 0; i < indices.size(); ++i)
+ {
+ Index index;
+ index.push_back(psb->m_tetras[i].m_n[0]->index);
+ index.push_back(psb->m_tetras[i].m_n[1]->index);
+ index.push_back(psb->m_tetras[i].m_n[2]->index);
+ index.push_back(psb->m_tetras[i].m_n[3]->index);
+ indices[i] = index;
+ }
+
+ std::map<std::vector<int>, std::vector<int> > dict;
+ for (int i = 0; i < indices.size(); ++i)
+ {
+ for (int j = 0; j < 4; ++j)
+ {
+ std::vector<int> f;
+ if (j == 0)
+ {
+ f.push_back(indices[i][1]);
+ f.push_back(indices[i][0]);
+ f.push_back(indices[i][2]);
+ }
+ if (j == 1)
+ {
+ f.push_back(indices[i][3]);
+ f.push_back(indices[i][0]);
+ f.push_back(indices[i][1]);
+ }
+ if (j == 2)
+ {
+ f.push_back(indices[i][3]);
+ f.push_back(indices[i][1]);
+ f.push_back(indices[i][2]);
+ }
+ if (j == 3)
+ {
+ f.push_back(indices[i][2]);
+ f.push_back(indices[i][0]);
+ f.push_back(indices[i][3]);
+ }
+ std::vector<int> f_sorted = f;
+ std::sort(f_sorted.begin(), f_sorted.end());
+ if (dict.find(f_sorted) != dict.end())
+ {
+ dict.erase(f_sorted);
+ }
+ else
+ {
+ dict.insert(std::make_pair(f_sorted, f));
+ }
+ }
+ }
+
+ for (std::map<std::vector<int>, std::vector<int> >::iterator it = dict.begin(); it != dict.end(); ++it)
+ {
+ std::vector<int> f = it->second;
+ psb->appendFace(f[0], f[1], f[2]);
+ //printf("f %d %d %d\n", f[0] + 1, f[1] + 1, f[2] + 1);
+ }
}
+//Write the surface mesh to an obj file.
void btSoftBodyHelpers::writeObj(const char* filename, const btSoftBody* psb)
{
- std::ofstream fs;
- fs.open(filename);
- btAssert(fs);
- for (int i = 0; i < psb->m_nodes.size(); ++i)
- {
- fs << "v";
- for (int d = 0; d < 3; d++)
- {
- fs << " " << psb->m_nodes[i].m_x[d];
- }
- fs << "\n";
- }
-
- for (int i = 0; i < psb->m_faces.size(); ++i)
- {
- fs << "f";
- for (int n = 0; n < 3; n++)
- {
- fs << " " << psb->m_faces[i].m_n[n]->index + 1;
- }
- fs << "\n";
- }
- fs.close();
+ std::ofstream fs;
+ fs.open(filename);
+ btAssert(fs);
+
+ if (psb->m_tetras.size() > 0)
+ {
+ // For tetrahedron mesh, we need to re-index the surface mesh for it to be in obj file/
+ std::map<int, int> dict;
+ for (int i = 0; i < psb->m_faces.size(); i++)
+ {
+ for (int d = 0; d < 3; d++)
+ {
+ int index = psb->m_faces[i].m_n[d]->index;
+ if (dict.find(index) == dict.end())
+ {
+ int dict_size = dict.size();
+ dict[index] = dict_size;
+ fs << "v";
+ for (int k = 0; k < 3; k++)
+ {
+ fs << " " << psb->m_nodes[index].m_x[k];
+ }
+ fs << "\n";
+ }
+ }
+ }
+ // Write surface mesh.
+ for (int i = 0; i < psb->m_faces.size(); ++i)
+ {
+ fs << "f";
+ for (int n = 0; n < 3; n++)
+ {
+ fs << " " << dict[psb->m_faces[i].m_n[n]->index] + 1;
+ }
+ fs << "\n";
+ }
+ }
+ else
+ {
+ // For trimesh, directly write out all the nodes and faces.xs
+ for (int i = 0; i < psb->m_nodes.size(); ++i)
+ {
+ fs << "v";
+ for (int d = 0; d < 3; d++)
+ {
+ fs << " " << psb->m_nodes[i].m_x[d];
+ }
+ fs << "\n";
+ }
+
+ for (int i = 0; i < psb->m_faces.size(); ++i)
+ {
+ fs << "f";
+ for (int n = 0; n < 3; n++)
+ {
+ fs << " " << psb->m_faces[i].m_n[n]->index + 1;
+ }
+ fs << "\n";
+ }
+ }
+ fs.close();
}
void btSoftBodyHelpers::duplicateFaces(const char* filename, const btSoftBody* psb)
{
- std::ifstream fs_read;
- fs_read.open(filename);
- std::string line;
- btVector3 pos;
- btAlignedObjectArray<btAlignedObjectArray<int> > additional_faces;
- while (std::getline(fs_read, line))
- {
- std::stringstream ss(line);
- if (line[0] == 'v')
- {
- }
- else if (line[0] == 'f')
- {
- ss.ignore();
- int id0, id1, id2;
- ss >> id0;
- ss >> id1;
- ss >> id2;
- btAlignedObjectArray<int> new_face;
- new_face.push_back(id1);
- new_face.push_back(id0);
- new_face.push_back(id2);
- additional_faces.push_back(new_face);
- }
- }
- fs_read.close();
-
- std::ofstream fs_write;
- fs_write.open(filename, std::ios_base::app);
- for (int i = 0; i < additional_faces.size(); ++i)
- {
- fs_write << "f";
- for (int n = 0; n < 3; n++)
- {
- fs_write << " " << additional_faces[i][n];
- }
- fs_write << "\n";
- }
- fs_write.close();
+ std::ifstream fs_read;
+ fs_read.open(filename);
+ std::string line;
+ btVector3 pos;
+ btAlignedObjectArray<btAlignedObjectArray<int> > additional_faces;
+ while (std::getline(fs_read, line))
+ {
+ std::stringstream ss(line);
+ if (line[0] == 'v')
+ {
+ }
+ else if (line[0] == 'f')
+ {
+ ss.ignore();
+ int id0, id1, id2;
+ ss >> id0;
+ ss >> id1;
+ ss >> id2;
+ btAlignedObjectArray<int> new_face;
+ new_face.push_back(id1);
+ new_face.push_back(id0);
+ new_face.push_back(id2);
+ additional_faces.push_back(new_face);
+ }
+ }
+ fs_read.close();
+
+ std::ofstream fs_write;
+ fs_write.open(filename, std::ios_base::app);
+ for (int i = 0; i < additional_faces.size(); ++i)
+ {
+ fs_write << "f";
+ for (int n = 0; n < 3; n++)
+ {
+ fs_write << " " << additional_faces[i][n];
+ }
+ fs_write << "\n";
+ }
+ fs_write.close();
}
// Given a simplex with vertices a,b,c,d, find the barycentric weights of p in this simplex
void btSoftBodyHelpers::getBarycentricWeights(const btVector3& a, const btVector3& b, const btVector3& c, const btVector3& d, const btVector3& p, btVector4& bary)
{
- btVector3 vap = p - a;
- btVector3 vbp = p - b;
-
- btVector3 vab = b - a;
- btVector3 vac = c - a;
- btVector3 vad = d - a;
-
- btVector3 vbc = c - b;
- btVector3 vbd = d - b;
- btScalar va6 = (vbp.cross(vbd)).dot(vbc);
- btScalar vb6 = (vap.cross(vac)).dot(vad);
- btScalar vc6 = (vap.cross(vad)).dot(vab);
- btScalar vd6 = (vap.cross(vab)).dot(vac);
- btScalar v6 = btScalar(1) / (vab.cross(vac).dot(vad));
- bary = btVector4(va6*v6, vb6*v6, vc6*v6, vd6*v6);
+ btVector3 vap = p - a;
+ btVector3 vbp = p - b;
+
+ btVector3 vab = b - a;
+ btVector3 vac = c - a;
+ btVector3 vad = d - a;
+
+ btVector3 vbc = c - b;
+ btVector3 vbd = d - b;
+ btScalar va6 = (vbp.cross(vbd)).dot(vbc);
+ btScalar vb6 = (vap.cross(vac)).dot(vad);
+ btScalar vc6 = (vap.cross(vad)).dot(vab);
+ btScalar vd6 = (vap.cross(vab)).dot(vac);
+ btScalar v6 = btScalar(1) / (vab.cross(vac).dot(vad));
+ bary = btVector4(va6 * v6, vb6 * v6, vc6 * v6, vd6 * v6);
}
// Given a simplex with vertices a,b,c, find the barycentric weights of p in this simplex. bary[3] = 0.
void btSoftBodyHelpers::getBarycentricWeights(const btVector3& a, const btVector3& b, const btVector3& c, const btVector3& p, btVector4& bary)
{
- btVector3 v0 = b - a, v1 = c - a, v2 = p - a;
- btScalar d00 = btDot(v0, v0);
- btScalar d01 = btDot(v0, v1);
- btScalar d11 = btDot(v1, v1);
- btScalar d20 = btDot(v2, v0);
- btScalar d21 = btDot(v2, v1);
- btScalar invDenom = 1.0 / (d00 * d11 - d01 * d01);
- bary[1] = (d11 * d20 - d01 * d21) * invDenom;
- bary[2] = (d00 * d21 - d01 * d20) * invDenom;
- bary[0] = 1.0 - bary[1] - bary[2];
- bary[3] = 0;
+ btVector3 v0 = b - a, v1 = c - a, v2 = p - a;
+ btScalar d00 = btDot(v0, v0);
+ btScalar d01 = btDot(v0, v1);
+ btScalar d11 = btDot(v1, v1);
+ btScalar d20 = btDot(v2, v0);
+ btScalar d21 = btDot(v2, v1);
+ btScalar invDenom = 1.0 / (d00 * d11 - d01 * d01);
+ bary[1] = (d11 * d20 - d01 * d21) * invDenom;
+ bary[2] = (d00 * d21 - d01 * d20) * invDenom;
+ bary[0] = 1.0 - bary[1] - bary[2];
+ bary[3] = 0;
}
// Iterate through all render nodes to find the simulation tetrahedron that contains the render node and record the barycentric weights
// If the node is not inside any tetrahedron, assign it to the tetrahedron in which the node has the least negative barycentric weight
void btSoftBodyHelpers::interpolateBarycentricWeights(btSoftBody* psb)
{
- psb->m_z.resize(0);
- psb->m_renderNodesInterpolationWeights.resize(psb->m_renderNodes.size());
- psb->m_renderNodesParents.resize(psb->m_renderNodes.size());
- for (int i = 0; i < psb->m_renderNodes.size(); ++i)
- {
- const btVector3& p = psb->m_renderNodes[i].m_x;
- btVector4 bary;
- btVector4 optimal_bary;
- btScalar min_bary_weight = -1e3;
- btAlignedObjectArray<const btSoftBody::Node*> optimal_parents;
- for (int j = 0; j < psb->m_tetras.size(); ++j)
- {
- const btSoftBody::Tetra& t = psb->m_tetras[j];
- getBarycentricWeights(t.m_n[0]->m_x, t.m_n[1]->m_x, t.m_n[2]->m_x, t.m_n[3]->m_x, p, bary);
- btScalar new_min_bary_weight = bary[0];
- for (int k = 1; k < 4; ++k)
- {
- new_min_bary_weight = btMin(new_min_bary_weight, bary[k]);
- }
- if (new_min_bary_weight > min_bary_weight)
- {
- btAlignedObjectArray<const btSoftBody::Node*> parents;
- parents.push_back(t.m_n[0]);
- parents.push_back(t.m_n[1]);
- parents.push_back(t.m_n[2]);
- parents.push_back(t.m_n[3]);
- optimal_parents = parents;
- optimal_bary = bary;
- min_bary_weight = new_min_bary_weight;
- // stop searching if p is inside the tetrahedron at hand
- if (bary[0]>=0. && bary[1]>=0. && bary[2]>=0. && bary[3]>=0.)
- {
- break;
- }
- }
- }
- psb->m_renderNodesInterpolationWeights[i] = optimal_bary;
- psb->m_renderNodesParents[i] = optimal_parents;
- }
+ psb->m_z.resize(0);
+ psb->m_renderNodesInterpolationWeights.resize(psb->m_renderNodes.size());
+ psb->m_renderNodesParents.resize(psb->m_renderNodes.size());
+ for (int i = 0; i < psb->m_renderNodes.size(); ++i)
+ {
+ const btVector3& p = psb->m_renderNodes[i].m_x;
+ btVector4 bary;
+ btVector4 optimal_bary;
+ btScalar min_bary_weight = -1e3;
+ btAlignedObjectArray<const btSoftBody::Node*> optimal_parents;
+ for (int j = 0; j < psb->m_tetras.size(); ++j)
+ {
+ const btSoftBody::Tetra& t = psb->m_tetras[j];
+ getBarycentricWeights(t.m_n[0]->m_x, t.m_n[1]->m_x, t.m_n[2]->m_x, t.m_n[3]->m_x, p, bary);
+ btScalar new_min_bary_weight = bary[0];
+ for (int k = 1; k < 4; ++k)
+ {
+ new_min_bary_weight = btMin(new_min_bary_weight, bary[k]);
+ }
+ if (new_min_bary_weight > min_bary_weight)
+ {
+ btAlignedObjectArray<const btSoftBody::Node*> parents;
+ parents.push_back(t.m_n[0]);
+ parents.push_back(t.m_n[1]);
+ parents.push_back(t.m_n[2]);
+ parents.push_back(t.m_n[3]);
+ optimal_parents = parents;
+ optimal_bary = bary;
+ min_bary_weight = new_min_bary_weight;
+ // stop searching if p is inside the tetrahedron at hand
+ if (bary[0] >= 0. && bary[1] >= 0. && bary[2] >= 0. && bary[3] >= 0.)
+ {
+ break;
+ }
+ }
+ }
+ psb->m_renderNodesInterpolationWeights[i] = optimal_bary;
+ psb->m_renderNodesParents[i] = optimal_parents;
+ }
}
-
// Iterate through all render nodes to find the simulation triangle that's closest to the node in the barycentric sense.
void btSoftBodyHelpers::extrapolateBarycentricWeights(btSoftBody* psb)
{
- psb->m_renderNodesInterpolationWeights.resize(psb->m_renderNodes.size());
- psb->m_renderNodesParents.resize(psb->m_renderNodes.size());
- psb->m_z.resize(psb->m_renderNodes.size());
- for (int i = 0; i < psb->m_renderNodes.size(); ++i)
- {
- const btVector3& p = psb->m_renderNodes[i].m_x;
- btVector4 bary;
- btVector4 optimal_bary;
- btScalar min_bary_weight = -SIMD_INFINITY;
- btAlignedObjectArray<const btSoftBody::Node*> optimal_parents;
- btScalar dist = 0, optimal_dist = 0;
- for (int j = 0; j < psb->m_faces.size(); ++j)
- {
- const btSoftBody::Face& f = psb->m_faces[j];
- btVector3 n = btCross(f.m_n[1]->m_x - f.m_n[0]->m_x, f.m_n[2]->m_x - f.m_n[0]->m_x);
- btVector3 unit_n = n.normalized();
- dist = (p-f.m_n[0]->m_x).dot(unit_n);
- btVector3 proj_p = p - dist*unit_n;
- getBarycentricWeights(f.m_n[0]->m_x, f.m_n[1]->m_x, f.m_n[2]->m_x, proj_p, bary);
- btScalar new_min_bary_weight = bary[0];
- for (int k = 1; k < 3; ++k)
- {
- new_min_bary_weight = btMin(new_min_bary_weight, bary[k]);
- }
-
- // p is out of the current best triangle, we found a traingle that's better
- bool better_than_closest_outisde = (new_min_bary_weight > min_bary_weight && min_bary_weight<0.);
- // p is inside of the current best triangle, we found a triangle that's better
- bool better_than_best_inside = (new_min_bary_weight>=0 && min_bary_weight>=0 && btFabs(dist)<btFabs(optimal_dist));
-
- if (better_than_closest_outisde || better_than_best_inside)
- {
- btAlignedObjectArray<const btSoftBody::Node*> parents;
- parents.push_back(f.m_n[0]);
- parents.push_back(f.m_n[1]);
- parents.push_back(f.m_n[2]);
- optimal_parents = parents;
- optimal_bary = bary;
- optimal_dist = dist;
- min_bary_weight = new_min_bary_weight;
- }
- }
- psb->m_renderNodesInterpolationWeights[i] = optimal_bary;
- psb->m_renderNodesParents[i] = optimal_parents;
- psb->m_z[i] = optimal_dist;
- }
+ psb->m_renderNodesInterpolationWeights.resize(psb->m_renderNodes.size());
+ psb->m_renderNodesParents.resize(psb->m_renderNodes.size());
+ psb->m_z.resize(psb->m_renderNodes.size());
+ for (int i = 0; i < psb->m_renderNodes.size(); ++i)
+ {
+ const btVector3& p = psb->m_renderNodes[i].m_x;
+ btVector4 bary;
+ btVector4 optimal_bary;
+ btScalar min_bary_weight = -SIMD_INFINITY;
+ btAlignedObjectArray<const btSoftBody::Node*> optimal_parents;
+ btScalar dist = 0, optimal_dist = 0;
+ for (int j = 0; j < psb->m_faces.size(); ++j)
+ {
+ const btSoftBody::Face& f = psb->m_faces[j];
+ btVector3 n = btCross(f.m_n[1]->m_x - f.m_n[0]->m_x, f.m_n[2]->m_x - f.m_n[0]->m_x);
+ btVector3 unit_n = n.normalized();
+ dist = (p - f.m_n[0]->m_x).dot(unit_n);
+ btVector3 proj_p = p - dist * unit_n;
+ getBarycentricWeights(f.m_n[0]->m_x, f.m_n[1]->m_x, f.m_n[2]->m_x, proj_p, bary);
+ btScalar new_min_bary_weight = bary[0];
+ for (int k = 1; k < 3; ++k)
+ {
+ new_min_bary_weight = btMin(new_min_bary_weight, bary[k]);
+ }
+
+ // p is out of the current best triangle, we found a traingle that's better
+ bool better_than_closest_outisde = (new_min_bary_weight > min_bary_weight && min_bary_weight < 0.);
+ // p is inside of the current best triangle, we found a triangle that's better
+ bool better_than_best_inside = (new_min_bary_weight >= 0 && min_bary_weight >= 0 && btFabs(dist) < btFabs(optimal_dist));
+
+ if (better_than_closest_outisde || better_than_best_inside)
+ {
+ btAlignedObjectArray<const btSoftBody::Node*> parents;
+ parents.push_back(f.m_n[0]);
+ parents.push_back(f.m_n[1]);
+ parents.push_back(f.m_n[2]);
+ optimal_parents = parents;
+ optimal_bary = bary;
+ optimal_dist = dist;
+ min_bary_weight = new_min_bary_weight;
+ }
+ }
+ psb->m_renderNodesInterpolationWeights[i] = optimal_bary;
+ psb->m_renderNodesParents[i] = optimal_parents;
+ psb->m_z[i] = optimal_dist;
+ }
}
diff --git a/thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.h b/thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.h
index abe1870890..237d29761d 100644
--- a/thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.h
+++ b/thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.h
@@ -93,7 +93,7 @@ struct btSoftBodyHelpers
int resy,
int fixeds,
bool gendiags,
- btScalar perturbation = 0.);
+ btScalar perturbation = 0.);
/* Create a patch with UV Texture Coordinates */
static btSoftBody* CreatePatchUV(btSoftBodyWorldInfo& worldInfo,
const btVector3& corner00,
@@ -142,21 +142,21 @@ struct btSoftBodyHelpers
bool bfacelinks,
bool btetralinks,
bool bfacesfromtetras);
- static btSoftBody* CreateFromVtkFile(btSoftBodyWorldInfo& worldInfo, const char* vtk_file);
+ static btSoftBody* CreateFromVtkFile(btSoftBodyWorldInfo& worldInfo, const char* vtk_file);
- static void writeObj(const char* file, const btSoftBody* psb);
-
- static void getBarycentricWeights(const btVector3& a, const btVector3& b, const btVector3& c, const btVector3& d, const btVector3& p, btVector4& bary);
-
- static void getBarycentricWeights(const btVector3& a, const btVector3& b, const btVector3& c, const btVector3& p, btVector4& bary);
-
- static void interpolateBarycentricWeights(btSoftBody* psb);
-
- static void extrapolateBarycentricWeights(btSoftBody* psb);
-
- static void generateBoundaryFaces(btSoftBody* psb);
-
- static void duplicateFaces(const char* filename, const btSoftBody* psb);
+ static void writeObj(const char* file, const btSoftBody* psb);
+
+ static void getBarycentricWeights(const btVector3& a, const btVector3& b, const btVector3& c, const btVector3& d, const btVector3& p, btVector4& bary);
+
+ static void getBarycentricWeights(const btVector3& a, const btVector3& b, const btVector3& c, const btVector3& p, btVector4& bary);
+
+ static void interpolateBarycentricWeights(btSoftBody* psb);
+
+ static void extrapolateBarycentricWeights(btSoftBody* psb);
+
+ static void generateBoundaryFaces(btSoftBody* psb);
+
+ static void duplicateFaces(const char* filename, const btSoftBody* psb);
/// Sort the list of links to move link calculations that are dependent upon earlier
/// ones as far as possible away from the calculation of those values
/// This tends to make adjacent loop iterations not dependent upon one another,
diff --git a/thirdparty/bullet/BulletSoftBody/btSoftBodyInternals.h b/thirdparty/bullet/BulletSoftBody/btSoftBodyInternals.h
index b9ebc95b6b..c17bbb5cd4 100644
--- a/thirdparty/bullet/BulletSoftBody/btSoftBodyInternals.h
+++ b/thirdparty/bullet/BulletSoftBody/btSoftBodyInternals.h
@@ -32,86 +32,85 @@ subject to the following restrictions:
// Given a multibody link, a contact point and a contact direction, fill in the jacobian data needed to calculate the velocity change given an impulse in the contact direction
static SIMD_FORCE_INLINE void findJacobian(const btMultiBodyLinkCollider* multibodyLinkCol,
- btMultiBodyJacobianData& jacobianData,
- const btVector3& contact_point,
- const btVector3& dir)
-{
- const int ndof = multibodyLinkCol->m_multiBody->getNumDofs() + 6;
- jacobianData.m_jacobians.resize(ndof);
- jacobianData.m_deltaVelocitiesUnitImpulse.resize(ndof);
- btScalar* jac = &jacobianData.m_jacobians[0];
-
- multibodyLinkCol->m_multiBody->fillContactJacobianMultiDof(multibodyLinkCol->m_link, contact_point, dir, jac, jacobianData.scratch_r, jacobianData.scratch_v, jacobianData.scratch_m);
- multibodyLinkCol->m_multiBody->calcAccelerationDeltasMultiDof(&jacobianData.m_jacobians[0], &jacobianData.m_deltaVelocitiesUnitImpulse[0], jacobianData.scratch_r, jacobianData.scratch_v);
+ btMultiBodyJacobianData& jacobianData,
+ const btVector3& contact_point,
+ const btVector3& dir)
+{
+ const int ndof = multibodyLinkCol->m_multiBody->getNumDofs() + 6;
+ jacobianData.m_jacobians.resize(ndof);
+ jacobianData.m_deltaVelocitiesUnitImpulse.resize(ndof);
+ btScalar* jac = &jacobianData.m_jacobians[0];
+
+ multibodyLinkCol->m_multiBody->fillContactJacobianMultiDof(multibodyLinkCol->m_link, contact_point, dir, jac, jacobianData.scratch_r, jacobianData.scratch_v, jacobianData.scratch_m);
+ multibodyLinkCol->m_multiBody->calcAccelerationDeltasMultiDof(&jacobianData.m_jacobians[0], &jacobianData.m_deltaVelocitiesUnitImpulse[0], jacobianData.scratch_r, jacobianData.scratch_v);
}
static SIMD_FORCE_INLINE btVector3 generateUnitOrthogonalVector(const btVector3& u)
{
- btScalar ux = u.getX();
- btScalar uy = u.getY();
- btScalar uz = u.getZ();
- btScalar ax = std::abs(ux);
- btScalar ay = std::abs(uy);
- btScalar az = std::abs(uz);
- btVector3 v;
- if (ax <= ay && ax <= az)
- v = btVector3(0, -uz, uy);
- else if (ay <= ax && ay <= az)
- v = btVector3(-uz, 0, ux);
- else
- v = btVector3(-uy, ux, 0);
- v.normalize();
- return v;
+ btScalar ux = u.getX();
+ btScalar uy = u.getY();
+ btScalar uz = u.getZ();
+ btScalar ax = std::abs(ux);
+ btScalar ay = std::abs(uy);
+ btScalar az = std::abs(uz);
+ btVector3 v;
+ if (ax <= ay && ax <= az)
+ v = btVector3(0, -uz, uy);
+ else if (ay <= ax && ay <= az)
+ v = btVector3(-uz, 0, ux);
+ else
+ v = btVector3(-uy, ux, 0);
+ v.normalize();
+ return v;
}
static SIMD_FORCE_INLINE bool proximityTest(const btVector3& x1, const btVector3& x2, const btVector3& x3, const btVector3& x4, const btVector3& normal, const btScalar& mrg, btVector3& bary)
{
- btVector3 x43 = x4-x3;
- if (std::abs(x43.dot(normal)) > mrg)
- return false;
- btVector3 x13 = x1-x3;
- btVector3 x23 = x2-x3;
- btScalar a11 = x13.length2();
- btScalar a22 = x23.length2();
- btScalar a12 = x13.dot(x23);
- btScalar b1 = x13.dot(x43);
- btScalar b2 = x23.dot(x43);
- btScalar det = a11*a22 - a12*a12;
- if (det < SIMD_EPSILON)
- return false;
- btScalar w1 = (b1*a22-b2*a12)/det;
- btScalar w2 = (b2*a11-b1*a12)/det;
- btScalar w3 = 1-w1-w2;
- btScalar delta = mrg / std::sqrt(0.5*std::abs(x13.cross(x23).safeNorm()));
- bary = btVector3(w1,w2,w3);
- for (int i = 0; i < 3; ++i)
- {
- if (bary[i] < -delta || bary[i] > 1+delta)
- return false;
- }
- return true;
+ btVector3 x43 = x4 - x3;
+ if (std::abs(x43.dot(normal)) > mrg)
+ return false;
+ btVector3 x13 = x1 - x3;
+ btVector3 x23 = x2 - x3;
+ btScalar a11 = x13.length2();
+ btScalar a22 = x23.length2();
+ btScalar a12 = x13.dot(x23);
+ btScalar b1 = x13.dot(x43);
+ btScalar b2 = x23.dot(x43);
+ btScalar det = a11 * a22 - a12 * a12;
+ if (det < SIMD_EPSILON)
+ return false;
+ btScalar w1 = (b1 * a22 - b2 * a12) / det;
+ btScalar w2 = (b2 * a11 - b1 * a12) / det;
+ btScalar w3 = 1 - w1 - w2;
+ btScalar delta = mrg / std::sqrt(0.5 * std::abs(x13.cross(x23).safeNorm()));
+ bary = btVector3(w1, w2, w3);
+ for (int i = 0; i < 3; ++i)
+ {
+ if (bary[i] < -delta || bary[i] > 1 + delta)
+ return false;
+ }
+ return true;
}
static const int KDOP_COUNT = 13;
-static btVector3 dop[KDOP_COUNT]={btVector3(1,0,0),
- btVector3(0,1,0),
- btVector3(0,0,1),
- btVector3(1,1,0),
- btVector3(1,0,1),
- btVector3(0,1,1),
- btVector3(1,-1,0),
- btVector3(1,0,-1),
- btVector3(0,1,-1),
- btVector3(1,1,1),
- btVector3(1,-1,1),
- btVector3(1,1,-1),
- btVector3(1,-1,-1)
-};
+static btVector3 dop[KDOP_COUNT] = {btVector3(1, 0, 0),
+ btVector3(0, 1, 0),
+ btVector3(0, 0, 1),
+ btVector3(1, 1, 0),
+ btVector3(1, 0, 1),
+ btVector3(0, 1, 1),
+ btVector3(1, -1, 0),
+ btVector3(1, 0, -1),
+ btVector3(0, 1, -1),
+ btVector3(1, 1, 1),
+ btVector3(1, -1, 1),
+ btVector3(1, 1, -1),
+ btVector3(1, -1, -1)};
static inline int getSign(const btVector3& n, const btVector3& x)
{
btScalar d = n.dot(x);
- if (d>SIMD_EPSILON)
+ if (d > SIMD_EPSILON)
return 1;
- if (d<-SIMD_EPSILON)
+ if (d < -SIMD_EPSILON)
return -1;
return 0;
}
@@ -119,13 +118,12 @@ static inline int getSign(const btVector3& n, const btVector3& x)
static SIMD_FORCE_INLINE bool hasSeparatingPlane(const btSoftBody::Face* face, const btSoftBody::Node* node, const btScalar& dt)
{
btVector3 hex[6] = {face->m_n[0]->m_x - node->m_x,
- face->m_n[1]->m_x - node->m_x,
- face->m_n[2]->m_x - node->m_x,
- face->m_n[0]->m_x + dt*face->m_n[0]->m_v - node->m_x,
- face->m_n[1]->m_x + dt*face->m_n[1]->m_v - node->m_x,
- face->m_n[2]->m_x + dt*face->m_n[2]->m_v - node->m_x
- };
- btVector3 segment = dt*node->m_v;
+ face->m_n[1]->m_x - node->m_x,
+ face->m_n[2]->m_x - node->m_x,
+ face->m_n[0]->m_x + dt * face->m_n[0]->m_v - node->m_x,
+ face->m_n[1]->m_x + dt * face->m_n[1]->m_v - node->m_x,
+ face->m_n[2]->m_x + dt * face->m_n[2]->m_v - node->m_x};
+ btVector3 segment = dt * node->m_v;
for (int i = 0; i < KDOP_COUNT; ++i)
{
int s = getSign(dop[i], segment);
@@ -143,488 +141,494 @@ static SIMD_FORCE_INLINE bool hasSeparatingPlane(const btSoftBody::Face* face, c
static SIMD_FORCE_INLINE bool nearZero(const btScalar& a)
{
- return (a>-SAFE_EPSILON && a<SAFE_EPSILON);
+ return (a > -SAFE_EPSILON && a < SAFE_EPSILON);
}
static SIMD_FORCE_INLINE bool sameSign(const btScalar& a, const btScalar& b)
{
- return (nearZero(a) || nearZero(b) || (a>SAFE_EPSILON && b>SAFE_EPSILON) || (a<-SAFE_EPSILON && b<-SAFE_EPSILON));
+ return (nearZero(a) || nearZero(b) || (a > SAFE_EPSILON && b > SAFE_EPSILON) || (a < -SAFE_EPSILON && b < -SAFE_EPSILON));
}
static SIMD_FORCE_INLINE bool diffSign(const btScalar& a, const btScalar& b)
{
- return !sameSign(a, b);
-}
-inline btScalar evaluateBezier2(const btScalar &p0, const btScalar &p1, const btScalar &p2, const btScalar &t, const btScalar &s)
-{
- btScalar s2 = s*s;
- btScalar t2 = t*t;
-
- return p0*s2+p1*btScalar(2.0)*s*t+p2*t2;
-}
-inline btScalar evaluateBezier(const btScalar &p0, const btScalar &p1, const btScalar &p2, const btScalar &p3, const btScalar &t, const btScalar &s)
-{
- btScalar s2 = s*s;
- btScalar s3 = s2*s;
- btScalar t2 = t*t;
- btScalar t3 = t2*t;
-
- return p0*s3+p1*btScalar(3.0)*s2*t+p2*btScalar(3.0)*s*t2+p3*t3;
-}
-static SIMD_FORCE_INLINE bool getSigns(bool type_c, const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btScalar& t0, const btScalar& t1, btScalar &lt0, btScalar &lt1)
-{
- if (sameSign(t0, t1)) {
- lt0 = t0;
- lt1 = t0;
- return true;
- }
-
- if (type_c || diffSign(k0, k3)) {
- btScalar ft = evaluateBezier(k0, k1, k2, k3, t0, -t1);
- if (t0<-0)
- ft = -ft;
-
- if (sameSign(ft, k0)) {
- lt0 = t1;
- lt1 = t1;
- }
- else {
- lt0 = t0;
- lt1 = t0;
- }
- return true;
- }
-
- if (!type_c) {
- btScalar ft = evaluateBezier(k0, k1, k2, k3, t0, -t1);
- if (t0<-0)
- ft = -ft;
-
- if (diffSign(ft, k0)) {
- lt0 = t0;
- lt1 = t1;
- return true;
- }
-
- btScalar fk = evaluateBezier2(k1-k0, k2-k1, k3-k2, t0, -t1);
-
- if (sameSign(fk, k1-k0))
- lt0 = lt1 = t1;
- else
- lt0 = lt1 = t0;
-
- return true;
- }
- return false;
+ return !sameSign(a, b);
+}
+inline btScalar evaluateBezier2(const btScalar& p0, const btScalar& p1, const btScalar& p2, const btScalar& t, const btScalar& s)
+{
+ btScalar s2 = s * s;
+ btScalar t2 = t * t;
+
+ return p0 * s2 + p1 * btScalar(2.0) * s * t + p2 * t2;
+}
+inline btScalar evaluateBezier(const btScalar& p0, const btScalar& p1, const btScalar& p2, const btScalar& p3, const btScalar& t, const btScalar& s)
+{
+ btScalar s2 = s * s;
+ btScalar s3 = s2 * s;
+ btScalar t2 = t * t;
+ btScalar t3 = t2 * t;
+
+ return p0 * s3 + p1 * btScalar(3.0) * s2 * t + p2 * btScalar(3.0) * s * t2 + p3 * t3;
+}
+static SIMD_FORCE_INLINE bool getSigns(bool type_c, const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btScalar& t0, const btScalar& t1, btScalar& lt0, btScalar& lt1)
+{
+ if (sameSign(t0, t1))
+ {
+ lt0 = t0;
+ lt1 = t0;
+ return true;
+ }
+
+ if (type_c || diffSign(k0, k3))
+ {
+ btScalar ft = evaluateBezier(k0, k1, k2, k3, t0, -t1);
+ if (t0 < -0)
+ ft = -ft;
+
+ if (sameSign(ft, k0))
+ {
+ lt0 = t1;
+ lt1 = t1;
+ }
+ else
+ {
+ lt0 = t0;
+ lt1 = t0;
+ }
+ return true;
+ }
+
+ if (!type_c)
+ {
+ btScalar ft = evaluateBezier(k0, k1, k2, k3, t0, -t1);
+ if (t0 < -0)
+ ft = -ft;
+
+ if (diffSign(ft, k0))
+ {
+ lt0 = t0;
+ lt1 = t1;
+ return true;
+ }
+
+ btScalar fk = evaluateBezier2(k1 - k0, k2 - k1, k3 - k2, t0, -t1);
+
+ if (sameSign(fk, k1 - k0))
+ lt0 = lt1 = t1;
+ else
+ lt0 = lt1 = t0;
+
+ return true;
+ }
+ return false;
}
static SIMD_FORCE_INLINE void getBernsteinCoeff(const btSoftBody::Face* face, const btSoftBody::Node* node, const btScalar& dt, btScalar& k0, btScalar& k1, btScalar& k2, btScalar& k3)
{
- const btVector3& n0 = face->m_n0;
- const btVector3& n1 = face->m_n1;
- btVector3 n_hat = n0 + n1 - face->m_vn;
- btVector3 p0ma0 = node->m_x - face->m_n[0]->m_x;
- btVector3 p1ma1 = node->m_q - face->m_n[0]->m_q;
- k0 = (p0ma0).dot(n0) * 3.0;
- k1 = (p0ma0).dot(n_hat) + (p1ma1).dot(n0);
- k2 = (p1ma1).dot(n_hat) + (p0ma0).dot(n1);
- k3 = (p1ma1).dot(n1) * 3.0;
+ const btVector3& n0 = face->m_n0;
+ const btVector3& n1 = face->m_n1;
+ btVector3 n_hat = n0 + n1 - face->m_vn;
+ btVector3 p0ma0 = node->m_x - face->m_n[0]->m_x;
+ btVector3 p1ma1 = node->m_q - face->m_n[0]->m_q;
+ k0 = (p0ma0).dot(n0) * 3.0;
+ k1 = (p0ma0).dot(n_hat) + (p1ma1).dot(n0);
+ k2 = (p1ma1).dot(n_hat) + (p0ma0).dot(n1);
+ k3 = (p1ma1).dot(n1) * 3.0;
}
static SIMD_FORCE_INLINE void polyDecomposition(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btScalar& j0, const btScalar& j1, const btScalar& j2, btScalar& u0, btScalar& u1, btScalar& v0, btScalar& v1)
{
- btScalar denom = 4.0 * (j1-j2) * (j1-j0) + (j2-j0) * (j2-j0);
- u0 = (2.0*(j1-j2)*(3.0*k1-2.0*k0-k3) - (j0-j2)*(3.0*k2-2.0*k3-k0)) / denom;
- u1 = (2.0*(j1-j0)*(3.0*k2-2.0*k3-k0) - (j2-j0)*(3.0*k1-2.0*k0-k3)) / denom;
- v0 = k0-u0*j0;
- v1 = k3-u1*j2;
+ btScalar denom = 4.0 * (j1 - j2) * (j1 - j0) + (j2 - j0) * (j2 - j0);
+ u0 = (2.0 * (j1 - j2) * (3.0 * k1 - 2.0 * k0 - k3) - (j0 - j2) * (3.0 * k2 - 2.0 * k3 - k0)) / denom;
+ u1 = (2.0 * (j1 - j0) * (3.0 * k2 - 2.0 * k3 - k0) - (j2 - j0) * (3.0 * k1 - 2.0 * k0 - k3)) / denom;
+ v0 = k0 - u0 * j0;
+ v1 = k3 - u1 * j2;
}
static SIMD_FORCE_INLINE bool rootFindingLemma(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3)
{
- btScalar u0, u1, v0, v1;
- btScalar j0 = 3.0*(k1-k0);
- btScalar j1 = 3.0*(k2-k1);
- btScalar j2 = 3.0*(k3-k2);
- polyDecomposition(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1);
- if (sameSign(v0, v1))
- {
- btScalar Ypa = j0*(1.0-v0)*(1.0-v0) + 2.0*j1*v0*(1.0-v0) + j2*v0*v0; // Y'(v0)
- if (sameSign(Ypa, j0))
- {
- return (diffSign(k0,v1));
- }
- }
- return diffSign(k0,v0);
-}
-
-static SIMD_FORCE_INLINE void getJs(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btSoftBody::Node* a, const btSoftBody::Node* b, const btSoftBody::Node* c, const btSoftBody::Node* p, const btScalar& dt, btScalar& j0, btScalar& j1, btScalar& j2)
-{
- const btVector3& a0 = a->m_x;
- const btVector3& b0 = b->m_x;
- const btVector3& c0 = c->m_x;
- const btVector3& va = a->m_v;
- const btVector3& vb = b->m_v;
- const btVector3& vc = c->m_v;
- const btVector3 a1 = a0 + dt*va;
- const btVector3 b1 = b0 + dt*vb;
- const btVector3 c1 = c0 + dt*vc;
- btVector3 n0 = (b0-a0).cross(c0-a0);
- btVector3 n1 = (b1-a1).cross(c1-a1);
- btVector3 n_hat = n0+n1 - dt*dt*(vb-va).cross(vc-va);
- const btVector3& p0 = p->m_x;
- const btVector3& vp = p->m_v;
- btVector3 p1 = p0 + dt*vp;
- btVector3 m0 = (b0-p0).cross(c0-p0);
- btVector3 m1 = (b1-p1).cross(c1-p1);
- btVector3 m_hat = m0+m1 - dt*dt*(vb-vp).cross(vc-vp);
- btScalar l0 = m0.dot(n0);
- btScalar l1 = 0.25 * (m0.dot(n_hat) + m_hat.dot(n0));
- btScalar l2 = btScalar(1)/btScalar(6)*(m0.dot(n1) + m_hat.dot(n_hat) + m1.dot(n0));
- btScalar l3 = 0.25 * (m_hat.dot(n1) + m1.dot(n_hat));
- btScalar l4 = m1.dot(n1);
-
- btScalar k1p = 0.25 * k0 + 0.75 * k1;
- btScalar k2p = 0.5 * k1 + 0.5 * k2;
- btScalar k3p = 0.75 * k2 + 0.25 * k3;
-
- btScalar s0 = (l1 * k0 - l0 * k1p)*4.0;
- btScalar s1 = (l2 * k0 - l0 * k2p)*2.0;
- btScalar s2 = (l3 * k0 - l0 * k3p)*btScalar(4)/btScalar(3);
- btScalar s3 = l4 * k0 - l0 * k3;
-
- j0 = (s1*k0 - s0*k1) * 3.0;
- j1 = (s2*k0 - s0*k2) * 1.5;
- j2 = (s3*k0 - s0*k3);
+ btScalar u0, u1, v0, v1;
+ btScalar j0 = 3.0 * (k1 - k0);
+ btScalar j1 = 3.0 * (k2 - k1);
+ btScalar j2 = 3.0 * (k3 - k2);
+ polyDecomposition(k0, k1, k2, k3, j0, j1, j2, u0, u1, v0, v1);
+ if (sameSign(v0, v1))
+ {
+ btScalar Ypa = j0 * (1.0 - v0) * (1.0 - v0) + 2.0 * j1 * v0 * (1.0 - v0) + j2 * v0 * v0; // Y'(v0)
+ if (sameSign(Ypa, j0))
+ {
+ return (diffSign(k0, v1));
+ }
+ }
+ return diffSign(k0, v0);
+}
+
+static SIMD_FORCE_INLINE void getJs(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btSoftBody::Node* a, const btSoftBody::Node* b, const btSoftBody::Node* c, const btSoftBody::Node* p, const btScalar& dt, btScalar& j0, btScalar& j1, btScalar& j2)
+{
+ const btVector3& a0 = a->m_x;
+ const btVector3& b0 = b->m_x;
+ const btVector3& c0 = c->m_x;
+ const btVector3& va = a->m_v;
+ const btVector3& vb = b->m_v;
+ const btVector3& vc = c->m_v;
+ const btVector3 a1 = a0 + dt * va;
+ const btVector3 b1 = b0 + dt * vb;
+ const btVector3 c1 = c0 + dt * vc;
+ btVector3 n0 = (b0 - a0).cross(c0 - a0);
+ btVector3 n1 = (b1 - a1).cross(c1 - a1);
+ btVector3 n_hat = n0 + n1 - dt * dt * (vb - va).cross(vc - va);
+ const btVector3& p0 = p->m_x;
+ const btVector3& vp = p->m_v;
+ btVector3 p1 = p0 + dt * vp;
+ btVector3 m0 = (b0 - p0).cross(c0 - p0);
+ btVector3 m1 = (b1 - p1).cross(c1 - p1);
+ btVector3 m_hat = m0 + m1 - dt * dt * (vb - vp).cross(vc - vp);
+ btScalar l0 = m0.dot(n0);
+ btScalar l1 = 0.25 * (m0.dot(n_hat) + m_hat.dot(n0));
+ btScalar l2 = btScalar(1) / btScalar(6) * (m0.dot(n1) + m_hat.dot(n_hat) + m1.dot(n0));
+ btScalar l3 = 0.25 * (m_hat.dot(n1) + m1.dot(n_hat));
+ btScalar l4 = m1.dot(n1);
+
+ btScalar k1p = 0.25 * k0 + 0.75 * k1;
+ btScalar k2p = 0.5 * k1 + 0.5 * k2;
+ btScalar k3p = 0.75 * k2 + 0.25 * k3;
+
+ btScalar s0 = (l1 * k0 - l0 * k1p) * 4.0;
+ btScalar s1 = (l2 * k0 - l0 * k2p) * 2.0;
+ btScalar s2 = (l3 * k0 - l0 * k3p) * btScalar(4) / btScalar(3);
+ btScalar s3 = l4 * k0 - l0 * k3;
+
+ j0 = (s1 * k0 - s0 * k1) * 3.0;
+ j1 = (s2 * k0 - s0 * k2) * 1.5;
+ j2 = (s3 * k0 - s0 * k3);
}
static SIMD_FORCE_INLINE bool signDetermination1Internal(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btScalar& u0, const btScalar& u1, const btScalar& v0, const btScalar& v1)
{
- btScalar Yu0 = k0*(1.0-u0)*(1.0-u0)*(1.0-u0) + 3.0*k1*u0*(1.0-u0)*(1.0-u0) + 3.0*k2*u0*u0*(1.0-u0) + k3*u0*u0*u0; // Y(u0)
- btScalar Yv0 = k0*(1.0-v0)*(1.0-v0)*(1.0-v0) + 3.0*k1*v0*(1.0-v0)*(1.0-v0) + 3.0*k2*v0*v0*(1.0-v0) + k3*v0*v0*v0; // Y(v0)
+ btScalar Yu0 = k0 * (1.0 - u0) * (1.0 - u0) * (1.0 - u0) + 3.0 * k1 * u0 * (1.0 - u0) * (1.0 - u0) + 3.0 * k2 * u0 * u0 * (1.0 - u0) + k3 * u0 * u0 * u0; // Y(u0)
+ btScalar Yv0 = k0 * (1.0 - v0) * (1.0 - v0) * (1.0 - v0) + 3.0 * k1 * v0 * (1.0 - v0) * (1.0 - v0) + 3.0 * k2 * v0 * v0 * (1.0 - v0) + k3 * v0 * v0 * v0; // Y(v0)
- btScalar sign_Ytp = (u0 > u1) ? Yu0 : -Yu0;
- btScalar L = sameSign(sign_Ytp, k0) ? u1 : u0;
- sign_Ytp = (v0 > v1) ? Yv0 : -Yv0;
- btScalar K = (sameSign(sign_Ytp,k0)) ? v1 : v0;
- return diffSign(L,K);
+ btScalar sign_Ytp = (u0 > u1) ? Yu0 : -Yu0;
+ btScalar L = sameSign(sign_Ytp, k0) ? u1 : u0;
+ sign_Ytp = (v0 > v1) ? Yv0 : -Yv0;
+ btScalar K = (sameSign(sign_Ytp, k0)) ? v1 : v0;
+ return diffSign(L, K);
}
static SIMD_FORCE_INLINE bool signDetermination2Internal(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btScalar& j0, const btScalar& j1, const btScalar& j2, const btScalar& u0, const btScalar& u1, const btScalar& v0, const btScalar& v1)
{
- btScalar Yu0 = k0*(1.0-u0)*(1.0-u0)*(1.0-u0) + 3.0*k1*u0*(1.0-u0)*(1.0-u0) + 3.0*k2*u0*u0*(1.0-u0) + k3*u0*u0*u0; // Y(u0)
- btScalar sign_Ytp = (u0 > u1) ? Yu0 : -Yu0, L1, L2;
- if (diffSign(sign_Ytp,k0))
- {
- L1 = u0;
- L2 = u1;
- }
- else
- {
- btScalar Yp_u0 = j0*(1.0-u0)*(1.0-u0) + 2.0*j1*(1.0-u0)*u0 + j2*u0*u0;
- if (sameSign(Yp_u0,j0))
- {
- L1 = u1;
- L2 = u1;
- }
- else
- {
- L1 = u0;
- L2 = u0;
- }
- }
- btScalar Yv0 = k0*(1.0-v0)*(1.0-v0)*(1.0-v0) + 3.0*k1*v0*(1.0-v0)*(1.0-v0) + 3.0*k2*v0*v0*(1.0-v0) + k3*v0*v0*v0; // Y(uv0)
- sign_Ytp = (v0 > v1) ? Yv0 : -Yv0;
- btScalar K1, K2;
- if (diffSign(sign_Ytp,k0))
- {
- K1 = v0;
- K2 = v1;
- }
- else
- {
- btScalar Yp_v0 = j0*(1.0-v0)*(1.0-v0) + 2.0*j1*(1.0-v0)*v0 + j2*v0*v0;
- if (sameSign(Yp_v0,j0))
- {
- K1 = v1;
- K2 = v1;
- }
- else
- {
- K1 = v0;
- K2 = v0;
- }
- }
- return (diffSign(K1, L1) || diffSign(L2, K2));
+ btScalar Yu0 = k0 * (1.0 - u0) * (1.0 - u0) * (1.0 - u0) + 3.0 * k1 * u0 * (1.0 - u0) * (1.0 - u0) + 3.0 * k2 * u0 * u0 * (1.0 - u0) + k3 * u0 * u0 * u0; // Y(u0)
+ btScalar sign_Ytp = (u0 > u1) ? Yu0 : -Yu0, L1, L2;
+ if (diffSign(sign_Ytp, k0))
+ {
+ L1 = u0;
+ L2 = u1;
+ }
+ else
+ {
+ btScalar Yp_u0 = j0 * (1.0 - u0) * (1.0 - u0) + 2.0 * j1 * (1.0 - u0) * u0 + j2 * u0 * u0;
+ if (sameSign(Yp_u0, j0))
+ {
+ L1 = u1;
+ L2 = u1;
+ }
+ else
+ {
+ L1 = u0;
+ L2 = u0;
+ }
+ }
+ btScalar Yv0 = k0 * (1.0 - v0) * (1.0 - v0) * (1.0 - v0) + 3.0 * k1 * v0 * (1.0 - v0) * (1.0 - v0) + 3.0 * k2 * v0 * v0 * (1.0 - v0) + k3 * v0 * v0 * v0; // Y(uv0)
+ sign_Ytp = (v0 > v1) ? Yv0 : -Yv0;
+ btScalar K1, K2;
+ if (diffSign(sign_Ytp, k0))
+ {
+ K1 = v0;
+ K2 = v1;
+ }
+ else
+ {
+ btScalar Yp_v0 = j0 * (1.0 - v0) * (1.0 - v0) + 2.0 * j1 * (1.0 - v0) * v0 + j2 * v0 * v0;
+ if (sameSign(Yp_v0, j0))
+ {
+ K1 = v1;
+ K2 = v1;
+ }
+ else
+ {
+ K1 = v0;
+ K2 = v0;
+ }
+ }
+ return (diffSign(K1, L1) || diffSign(L2, K2));
}
static SIMD_FORCE_INLINE bool signDetermination1(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btSoftBody::Face* face, const btSoftBody::Node* node, const btScalar& dt)
{
- btScalar j0, j1, j2, u0, u1, v0, v1;
- // p1
- getJs(k0,k1,k2,k3,face->m_n[0], face->m_n[1], face->m_n[2], node, dt, j0, j1, j2);
- if (nearZero(j0+j2-j1*2.0))
- {
- btScalar lt0, lt1;
- getSigns(true, k0, k1, k2, k3, j0, j2, lt0, lt1);
- if (lt0 < -SAFE_EPSILON)
- return false;
- }
- else
- {
- polyDecomposition(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1);
- if (!signDetermination1Internal(k0,k1,k2,k3,u0,u1,v0,v1))
- return false;
- }
- // p2
- getJs(k0,k1,k2,k3,face->m_n[1], face->m_n[2], face->m_n[0], node, dt, j0, j1, j2);
- if (nearZero(j0+j2-j1*2.0))
- {
- btScalar lt0, lt1;
- getSigns(true, k0, k1, k2, k3, j0, j2, lt0, lt1);
- if (lt0 < -SAFE_EPSILON)
- return false;
- }
- else
- {
- polyDecomposition(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1);
- if (!signDetermination1Internal(k0,k1,k2,k3,u0,u1,v0,v1))
- return false;
- }
- // p3
- getJs(k0,k1,k2,k3,face->m_n[2], face->m_n[0], face->m_n[1], node, dt, j0, j1, j2);
- if (nearZero(j0+j2-j1*2.0))
- {
- btScalar lt0, lt1;
- getSigns(true, k0, k1, k2, k3, j0, j2, lt0, lt1);
- if (lt0 < -SAFE_EPSILON)
- return false;
- }
- else
- {
- polyDecomposition(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1);
- if (!signDetermination1Internal(k0,k1,k2,k3,u0,u1,v0,v1))
- return false;
- }
- return true;
+ btScalar j0, j1, j2, u0, u1, v0, v1;
+ // p1
+ getJs(k0, k1, k2, k3, face->m_n[0], face->m_n[1], face->m_n[2], node, dt, j0, j1, j2);
+ if (nearZero(j0 + j2 - j1 * 2.0))
+ {
+ btScalar lt0, lt1;
+ getSigns(true, k0, k1, k2, k3, j0, j2, lt0, lt1);
+ if (lt0 < -SAFE_EPSILON)
+ return false;
+ }
+ else
+ {
+ polyDecomposition(k0, k1, k2, k3, j0, j1, j2, u0, u1, v0, v1);
+ if (!signDetermination1Internal(k0, k1, k2, k3, u0, u1, v0, v1))
+ return false;
+ }
+ // p2
+ getJs(k0, k1, k2, k3, face->m_n[1], face->m_n[2], face->m_n[0], node, dt, j0, j1, j2);
+ if (nearZero(j0 + j2 - j1 * 2.0))
+ {
+ btScalar lt0, lt1;
+ getSigns(true, k0, k1, k2, k3, j0, j2, lt0, lt1);
+ if (lt0 < -SAFE_EPSILON)
+ return false;
+ }
+ else
+ {
+ polyDecomposition(k0, k1, k2, k3, j0, j1, j2, u0, u1, v0, v1);
+ if (!signDetermination1Internal(k0, k1, k2, k3, u0, u1, v0, v1))
+ return false;
+ }
+ // p3
+ getJs(k0, k1, k2, k3, face->m_n[2], face->m_n[0], face->m_n[1], node, dt, j0, j1, j2);
+ if (nearZero(j0 + j2 - j1 * 2.0))
+ {
+ btScalar lt0, lt1;
+ getSigns(true, k0, k1, k2, k3, j0, j2, lt0, lt1);
+ if (lt0 < -SAFE_EPSILON)
+ return false;
+ }
+ else
+ {
+ polyDecomposition(k0, k1, k2, k3, j0, j1, j2, u0, u1, v0, v1);
+ if (!signDetermination1Internal(k0, k1, k2, k3, u0, u1, v0, v1))
+ return false;
+ }
+ return true;
}
static SIMD_FORCE_INLINE bool signDetermination2(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btSoftBody::Face* face, const btSoftBody::Node* node, const btScalar& dt)
{
- btScalar j0, j1, j2, u0, u1, v0, v1;
- // p1
- getJs(k0,k1,k2,k3,face->m_n[0], face->m_n[1], face->m_n[2], node, dt, j0, j1, j2);
- if (nearZero(j0+j2-j1*2.0))
- {
- btScalar lt0, lt1;
- bool bt0 = true, bt1=true;
- getSigns(false, k0, k1, k2, k3, j0, j2, lt0, lt1);
- if (lt0 < -SAFE_EPSILON)
- bt0 = false;
- if (lt1 < -SAFE_EPSILON)
- bt1 = false;
- if (!bt0 && !bt1)
- return false;
- }
- else
- {
- polyDecomposition(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1);
- if (!signDetermination2Internal(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1))
- return false;
- }
- // p2
- getJs(k0,k1,k2,k3,face->m_n[1], face->m_n[2], face->m_n[0], node, dt, j0, j1, j2);
- if (nearZero(j0+j2-j1*2.0))
- {
- btScalar lt0, lt1;
- bool bt0=true, bt1=true;
- getSigns(false, k0, k1, k2, k3, j0, j2, lt0, lt1);
- if (lt0 < -SAFE_EPSILON)
- bt0 = false;
- if (lt1 < -SAFE_EPSILON)
- bt1 = false;
- if (!bt0 && !bt1)
- return false;
- }
- else
- {
- polyDecomposition(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1);
- if (!signDetermination2Internal(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1))
- return false;
- }
- // p3
- getJs(k0,k1,k2,k3,face->m_n[2], face->m_n[0], face->m_n[1], node, dt, j0, j1, j2);
- if (nearZero(j0+j2-j1*2.0))
- {
- btScalar lt0, lt1;
- bool bt0=true, bt1=true;
- getSigns(false, k0, k1, k2, k3, j0, j2, lt0, lt1);
- if (lt0 < -SAFE_EPSILON)
- bt0 = false;
- if (lt1 < -SAFE_EPSILON)
- bt1 = false;
- if (!bt0 && !bt1)
- return false;
- }
- else
- {
- polyDecomposition(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1);
- if (!signDetermination2Internal(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1))
- return false;
- }
- return true;
+ btScalar j0, j1, j2, u0, u1, v0, v1;
+ // p1
+ getJs(k0, k1, k2, k3, face->m_n[0], face->m_n[1], face->m_n[2], node, dt, j0, j1, j2);
+ if (nearZero(j0 + j2 - j1 * 2.0))
+ {
+ btScalar lt0, lt1;
+ bool bt0 = true, bt1 = true;
+ getSigns(false, k0, k1, k2, k3, j0, j2, lt0, lt1);
+ if (lt0 < -SAFE_EPSILON)
+ bt0 = false;
+ if (lt1 < -SAFE_EPSILON)
+ bt1 = false;
+ if (!bt0 && !bt1)
+ return false;
+ }
+ else
+ {
+ polyDecomposition(k0, k1, k2, k3, j0, j1, j2, u0, u1, v0, v1);
+ if (!signDetermination2Internal(k0, k1, k2, k3, j0, j1, j2, u0, u1, v0, v1))
+ return false;
+ }
+ // p2
+ getJs(k0, k1, k2, k3, face->m_n[1], face->m_n[2], face->m_n[0], node, dt, j0, j1, j2);
+ if (nearZero(j0 + j2 - j1 * 2.0))
+ {
+ btScalar lt0, lt1;
+ bool bt0 = true, bt1 = true;
+ getSigns(false, k0, k1, k2, k3, j0, j2, lt0, lt1);
+ if (lt0 < -SAFE_EPSILON)
+ bt0 = false;
+ if (lt1 < -SAFE_EPSILON)
+ bt1 = false;
+ if (!bt0 && !bt1)
+ return false;
+ }
+ else
+ {
+ polyDecomposition(k0, k1, k2, k3, j0, j1, j2, u0, u1, v0, v1);
+ if (!signDetermination2Internal(k0, k1, k2, k3, j0, j1, j2, u0, u1, v0, v1))
+ return false;
+ }
+ // p3
+ getJs(k0, k1, k2, k3, face->m_n[2], face->m_n[0], face->m_n[1], node, dt, j0, j1, j2);
+ if (nearZero(j0 + j2 - j1 * 2.0))
+ {
+ btScalar lt0, lt1;
+ bool bt0 = true, bt1 = true;
+ getSigns(false, k0, k1, k2, k3, j0, j2, lt0, lt1);
+ if (lt0 < -SAFE_EPSILON)
+ bt0 = false;
+ if (lt1 < -SAFE_EPSILON)
+ bt1 = false;
+ if (!bt0 && !bt1)
+ return false;
+ }
+ else
+ {
+ polyDecomposition(k0, k1, k2, k3, j0, j1, j2, u0, u1, v0, v1);
+ if (!signDetermination2Internal(k0, k1, k2, k3, j0, j1, j2, u0, u1, v0, v1))
+ return false;
+ }
+ return true;
}
static SIMD_FORCE_INLINE bool coplanarAndInsideTest(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btSoftBody::Face* face, const btSoftBody::Node* node, const btScalar& dt)
{
- // Coplanar test
- if (diffSign(k1-k0, k3-k2))
- {
- // Case b:
- if (sameSign(k0, k3) && !rootFindingLemma(k0,k1,k2,k3))
- return false;
- // inside test
- return signDetermination2(k0, k1, k2, k3, face, node, dt);
- }
- else
- {
- // Case c:
- if (sameSign(k0, k3))
- return false;
- // inside test
- return signDetermination1(k0, k1, k2, k3, face, node, dt);
- }
- return false;
+ // Coplanar test
+ if (diffSign(k1 - k0, k3 - k2))
+ {
+ // Case b:
+ if (sameSign(k0, k3) && !rootFindingLemma(k0, k1, k2, k3))
+ return false;
+ // inside test
+ return signDetermination2(k0, k1, k2, k3, face, node, dt);
+ }
+ else
+ {
+ // Case c:
+ if (sameSign(k0, k3))
+ return false;
+ // inside test
+ return signDetermination1(k0, k1, k2, k3, face, node, dt);
+ }
+ return false;
}
static SIMD_FORCE_INLINE bool conservativeCulling(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btScalar& mrg)
{
- if (k0 > mrg && k1 > mrg && k2 > mrg && k3 > mrg)
- return true;
- if (k0 < -mrg && k1 < -mrg && k2 < -mrg && k3 < -mrg)
- return true;
- return false;
+ if (k0 > mrg && k1 > mrg && k2 > mrg && k3 > mrg)
+ return true;
+ if (k0 < -mrg && k1 < -mrg && k2 < -mrg && k3 < -mrg)
+ return true;
+ return false;
}
static SIMD_FORCE_INLINE bool bernsteinVFTest(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btScalar& mrg, const btSoftBody::Face* face, const btSoftBody::Node* node, const btScalar& dt)
{
- if (conservativeCulling(k0, k1, k2, k3, mrg))
- return false;
- return coplanarAndInsideTest(k0, k1, k2, k3, face, node, dt);
+ if (conservativeCulling(k0, k1, k2, k3, mrg))
+ return false;
+ return coplanarAndInsideTest(k0, k1, k2, k3, face, node, dt);
}
static SIMD_FORCE_INLINE void deCasteljau(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btScalar& t0, btScalar& k10, btScalar& k20, btScalar& k30, btScalar& k21, btScalar& k12)
{
- k10 = k0*(1.0-t0) + k1*t0;
- btScalar k11 = k1*(1.0-t0) + k2*t0;
- k12 = k2*(1.0-t0) + k3*t0;
- k20 = k10*(1.0-t0) + k11*t0;
- k21 = k11*(1.0-t0) + k12*t0;
- k30 = k20*(1.0-t0) + k21*t0;
+ k10 = k0 * (1.0 - t0) + k1 * t0;
+ btScalar k11 = k1 * (1.0 - t0) + k2 * t0;
+ k12 = k2 * (1.0 - t0) + k3 * t0;
+ k20 = k10 * (1.0 - t0) + k11 * t0;
+ k21 = k11 * (1.0 - t0) + k12 * t0;
+ k30 = k20 * (1.0 - t0) + k21 * t0;
}
static SIMD_FORCE_INLINE bool bernsteinVFTest(const btSoftBody::Face* face, const btSoftBody::Node* node, const btScalar& dt, const btScalar& mrg)
{
- btScalar k0, k1, k2, k3;
- getBernsteinCoeff(face, node, dt, k0, k1, k2, k3);
- if (conservativeCulling(k0, k1, k2, k3, mrg))
- return false;
- return true;
- if (diffSign(k2-2.0*k1+k0, k3-2.0*k2+k1))
- {
- btScalar k10, k20, k30, k21, k12;
- btScalar t0 = (k2-2.0*k1+k0)/(k0-3.0*k1+3.0*k2-k3);
- deCasteljau(k0, k1, k2, k3, t0, k10, k20, k30, k21, k12);
- return bernsteinVFTest(k0, k10, k20, k30, mrg, face, node, dt) || bernsteinVFTest(k30, k21, k12, k3, mrg, face, node, dt);
- }
- return coplanarAndInsideTest(k0, k1, k2, k3, face, node, dt);
+ btScalar k0, k1, k2, k3;
+ getBernsteinCoeff(face, node, dt, k0, k1, k2, k3);
+ if (conservativeCulling(k0, k1, k2, k3, mrg))
+ return false;
+ return true;
+ if (diffSign(k2 - 2.0 * k1 + k0, k3 - 2.0 * k2 + k1))
+ {
+ btScalar k10, k20, k30, k21, k12;
+ btScalar t0 = (k2 - 2.0 * k1 + k0) / (k0 - 3.0 * k1 + 3.0 * k2 - k3);
+ deCasteljau(k0, k1, k2, k3, t0, k10, k20, k30, k21, k12);
+ return bernsteinVFTest(k0, k10, k20, k30, mrg, face, node, dt) || bernsteinVFTest(k30, k21, k12, k3, mrg, face, node, dt);
+ }
+ return coplanarAndInsideTest(k0, k1, k2, k3, face, node, dt);
}
static SIMD_FORCE_INLINE bool continuousCollisionDetection(const btSoftBody::Face* face, const btSoftBody::Node* node, const btScalar& dt, const btScalar& mrg, btVector3& bary)
{
- if (hasSeparatingPlane(face, node, dt))
- return false;
- btVector3 x21 = face->m_n[1]->m_x - face->m_n[0]->m_x;
- btVector3 x31 = face->m_n[2]->m_x - face->m_n[0]->m_x;
- btVector3 x41 = node->m_x - face->m_n[0]->m_x;
- btVector3 v21 = face->m_n[1]->m_v - face->m_n[0]->m_v;
- btVector3 v31 = face->m_n[2]->m_v - face->m_n[0]->m_v;
- btVector3 v41 = node->m_v - face->m_n[0]->m_v;
- btVector3 a = x21.cross(x31);
- btVector3 b = x21.cross(v31) + v21.cross(x31);
- btVector3 c = v21.cross(v31);
- btVector3 d = x41;
- btVector3 e = v41;
- btScalar a0 = a.dot(d);
- btScalar a1 = a.dot(e) + b.dot(d);
- btScalar a2 = c.dot(d) + b.dot(e);
- btScalar a3 = c.dot(e);
- btScalar eps = SAFE_EPSILON;
- int num_roots = 0;
- btScalar roots[3];
- if (std::abs(a3) < eps)
- {
- // cubic term is zero
- if (std::abs(a2) < eps)
- {
- if (std::abs(a1) < eps)
- {
- if (std::abs(a0) < eps)
- {
- num_roots = 2;
- roots[0] = 0;
- roots[1] = dt;
- }
- }
- else
- {
- num_roots = 1;
- roots[0] = -a0/a1;
- }
- }
- else
- {
- num_roots = SolveP2(roots, a1/a2, a0/a2);
- }
- }
- else
- {
- num_roots = SolveP3(roots, a2/a3, a1/a3, a0/a3);
- }
-// std::sort(roots, roots+num_roots);
- if (num_roots > 1)
- {
- if (roots[0] > roots[1])
- btSwap(roots[0], roots[1]);
- }
- if (num_roots > 2)
- {
- if (roots[0] > roots[2])
- btSwap(roots[0], roots[2]);
- if (roots[1] > roots[2])
- btSwap(roots[1], roots[2]);
- }
- for (int r = 0; r < num_roots; ++r)
- {
- double root = roots[r];
- if (root <= 0)
- continue;
- if (root > dt + SIMD_EPSILON)
- return false;
- btVector3 x1 = face->m_n[0]->m_x + root * face->m_n[0]->m_v;
- btVector3 x2 = face->m_n[1]->m_x + root * face->m_n[1]->m_v;
- btVector3 x3 = face->m_n[2]->m_x + root * face->m_n[2]->m_v;
- btVector3 x4 = node->m_x + root * node->m_v;
- btVector3 normal = (x2-x1).cross(x3-x1);
- normal.safeNormalize();
- if (proximityTest(x1, x2, x3, x4, normal, mrg, bary))
- return true;
- }
- return false;
+ if (hasSeparatingPlane(face, node, dt))
+ return false;
+ btVector3 x21 = face->m_n[1]->m_x - face->m_n[0]->m_x;
+ btVector3 x31 = face->m_n[2]->m_x - face->m_n[0]->m_x;
+ btVector3 x41 = node->m_x - face->m_n[0]->m_x;
+ btVector3 v21 = face->m_n[1]->m_v - face->m_n[0]->m_v;
+ btVector3 v31 = face->m_n[2]->m_v - face->m_n[0]->m_v;
+ btVector3 v41 = node->m_v - face->m_n[0]->m_v;
+ btVector3 a = x21.cross(x31);
+ btVector3 b = x21.cross(v31) + v21.cross(x31);
+ btVector3 c = v21.cross(v31);
+ btVector3 d = x41;
+ btVector3 e = v41;
+ btScalar a0 = a.dot(d);
+ btScalar a1 = a.dot(e) + b.dot(d);
+ btScalar a2 = c.dot(d) + b.dot(e);
+ btScalar a3 = c.dot(e);
+ btScalar eps = SAFE_EPSILON;
+ int num_roots = 0;
+ btScalar roots[3];
+ if (std::abs(a3) < eps)
+ {
+ // cubic term is zero
+ if (std::abs(a2) < eps)
+ {
+ if (std::abs(a1) < eps)
+ {
+ if (std::abs(a0) < eps)
+ {
+ num_roots = 2;
+ roots[0] = 0;
+ roots[1] = dt;
+ }
+ }
+ else
+ {
+ num_roots = 1;
+ roots[0] = -a0 / a1;
+ }
+ }
+ else
+ {
+ num_roots = SolveP2(roots, a1 / a2, a0 / a2);
+ }
+ }
+ else
+ {
+ num_roots = SolveP3(roots, a2 / a3, a1 / a3, a0 / a3);
+ }
+ // std::sort(roots, roots+num_roots);
+ if (num_roots > 1)
+ {
+ if (roots[0] > roots[1])
+ btSwap(roots[0], roots[1]);
+ }
+ if (num_roots > 2)
+ {
+ if (roots[0] > roots[2])
+ btSwap(roots[0], roots[2]);
+ if (roots[1] > roots[2])
+ btSwap(roots[1], roots[2]);
+ }
+ for (int r = 0; r < num_roots; ++r)
+ {
+ double root = roots[r];
+ if (root <= 0)
+ continue;
+ if (root > dt + SIMD_EPSILON)
+ return false;
+ btVector3 x1 = face->m_n[0]->m_x + root * face->m_n[0]->m_v;
+ btVector3 x2 = face->m_n[1]->m_x + root * face->m_n[1]->m_v;
+ btVector3 x3 = face->m_n[2]->m_x + root * face->m_n[2]->m_v;
+ btVector3 x4 = node->m_x + root * node->m_v;
+ btVector3 normal = (x2 - x1).cross(x3 - x1);
+ normal.safeNormalize();
+ if (proximityTest(x1, x2, x3, x4, normal, mrg, bary))
+ return true;
+ }
+ return false;
}
static SIMD_FORCE_INLINE bool bernsteinCCD(const btSoftBody::Face* face, const btSoftBody::Node* node, const btScalar& dt, const btScalar& mrg, btVector3& bary)
{
- if (!bernsteinVFTest(face, node, dt, mrg))
- return false;
- if (!continuousCollisionDetection(face, node, dt, 1e-6, bary))
- return false;
- return true;
+ if (!bernsteinVFTest(face, node, dt, mrg))
+ return false;
+ if (!continuousCollisionDetection(face, node, dt, 1e-6, bary))
+ return false;
+ return true;
}
//
@@ -902,62 +906,61 @@ static inline btMatrix3x3 Diagonal(btScalar x)
static inline btMatrix3x3 Diagonal(const btVector3& v)
{
- btMatrix3x3 m;
- m[0] = btVector3(v.getX(), 0, 0);
- m[1] = btVector3(0, v.getY(), 0);
- m[2] = btVector3(0, 0, v.getZ());
- return (m);
-}
-
-static inline btScalar Dot(const btScalar* a,const btScalar* b, int ndof)
-{
- btScalar result = 0;
- for (int i = 0; i < ndof; ++i)
- result += a[i] * b[i];
- return result;
-}
-
-static inline btMatrix3x3 OuterProduct(const btScalar* v1,const btScalar* v2,const btScalar* v3,
- const btScalar* u1, const btScalar* u2, const btScalar* u3, int ndof)
-{
- btMatrix3x3 m;
- btScalar a11 = Dot(v1,u1,ndof);
- btScalar a12 = Dot(v1,u2,ndof);
- btScalar a13 = Dot(v1,u3,ndof);
-
- btScalar a21 = Dot(v2,u1,ndof);
- btScalar a22 = Dot(v2,u2,ndof);
- btScalar a23 = Dot(v2,u3,ndof);
-
- btScalar a31 = Dot(v3,u1,ndof);
- btScalar a32 = Dot(v3,u2,ndof);
- btScalar a33 = Dot(v3,u3,ndof);
- m[0] = btVector3(a11, a12, a13);
- m[1] = btVector3(a21, a22, a23);
- m[2] = btVector3(a31, a32, a33);
- return (m);
-}
-
-static inline btMatrix3x3 OuterProduct(const btVector3& v1,const btVector3& v2)
-{
- btMatrix3x3 m;
- btScalar a11 = v1[0] * v2[0];
- btScalar a12 = v1[0] * v2[1];
- btScalar a13 = v1[0] * v2[2];
-
- btScalar a21 = v1[1] * v2[0];
- btScalar a22 = v1[1] * v2[1];
- btScalar a23 = v1[1] * v2[2];
-
- btScalar a31 = v1[2] * v2[0];
- btScalar a32 = v1[2] * v2[1];
- btScalar a33 = v1[2] * v2[2];
- m[0] = btVector3(a11, a12, a13);
- m[1] = btVector3(a21, a22, a23);
- m[2] = btVector3(a31, a32, a33);
- return (m);
+ btMatrix3x3 m;
+ m[0] = btVector3(v.getX(), 0, 0);
+ m[1] = btVector3(0, v.getY(), 0);
+ m[2] = btVector3(0, 0, v.getZ());
+ return (m);
+}
+
+static inline btScalar Dot(const btScalar* a, const btScalar* b, int ndof)
+{
+ btScalar result = 0;
+ for (int i = 0; i < ndof; ++i)
+ result += a[i] * b[i];
+ return result;
}
+static inline btMatrix3x3 OuterProduct(const btScalar* v1, const btScalar* v2, const btScalar* v3,
+ const btScalar* u1, const btScalar* u2, const btScalar* u3, int ndof)
+{
+ btMatrix3x3 m;
+ btScalar a11 = Dot(v1, u1, ndof);
+ btScalar a12 = Dot(v1, u2, ndof);
+ btScalar a13 = Dot(v1, u3, ndof);
+
+ btScalar a21 = Dot(v2, u1, ndof);
+ btScalar a22 = Dot(v2, u2, ndof);
+ btScalar a23 = Dot(v2, u3, ndof);
+
+ btScalar a31 = Dot(v3, u1, ndof);
+ btScalar a32 = Dot(v3, u2, ndof);
+ btScalar a33 = Dot(v3, u3, ndof);
+ m[0] = btVector3(a11, a12, a13);
+ m[1] = btVector3(a21, a22, a23);
+ m[2] = btVector3(a31, a32, a33);
+ return (m);
+}
+
+static inline btMatrix3x3 OuterProduct(const btVector3& v1, const btVector3& v2)
+{
+ btMatrix3x3 m;
+ btScalar a11 = v1[0] * v2[0];
+ btScalar a12 = v1[0] * v2[1];
+ btScalar a13 = v1[0] * v2[2];
+
+ btScalar a21 = v1[1] * v2[0];
+ btScalar a22 = v1[1] * v2[1];
+ btScalar a23 = v1[1] * v2[2];
+
+ btScalar a31 = v1[2] * v2[0];
+ btScalar a32 = v1[2] * v2[1];
+ btScalar a33 = v1[2] * v2[2];
+ m[0] = btVector3(a11, a12, a13);
+ m[1] = btVector3(a21, a22, a23);
+ m[2] = btVector3(a31, a32, a33);
+ return (m);
+}
//
static inline btMatrix3x3 Add(const btMatrix3x3& a,
@@ -1008,6 +1011,20 @@ static inline btMatrix3x3 ImpulseMatrix(btScalar dt,
}
//
+static inline btMatrix3x3 ImpulseMatrix(btScalar dt,
+ const btMatrix3x3& effective_mass_inv,
+ btScalar imb,
+ const btMatrix3x3& iwi,
+ const btVector3& r)
+{
+ return (Diagonal(1 / dt) * Add(effective_mass_inv, MassMatrix(imb, iwi, r)).inverse());
+ // btMatrix3x3 iimb = MassMatrix(imb, iwi, r);
+ // if (iimb.determinant() == 0)
+ // return effective_mass_inv.inverse();
+ // return effective_mass_inv.inverse() * Add(effective_mass_inv.inverse(), iimb.inverse()).inverse() * iimb.inverse();
+}
+
+//
static inline btMatrix3x3 ImpulseMatrix(btScalar ima, const btMatrix3x3& iia, const btVector3& ra,
btScalar imb, const btMatrix3x3& iib, const btVector3& rb)
{
@@ -1091,73 +1108,70 @@ static inline void ProjectOrigin(const btVector3& a,
//
static inline bool rayIntersectsTriangle(const btVector3& origin, const btVector3& dir, const btVector3& v0, const btVector3& v1, const btVector3& v2, btScalar& t)
{
- btScalar a, f, u, v;
-
- btVector3 e1 = v1 - v0;
- btVector3 e2 = v2 - v0;
- btVector3 h = dir.cross(e2);
- a = e1.dot(h);
-
- if (a > -0.00001 && a < 0.00001)
- return (false);
-
- f = btScalar(1) / a;
- btVector3 s = origin - v0;
- u = f * s.dot(h);
-
- if (u < 0.0 || u > 1.0)
- return (false);
-
- btVector3 q = s.cross(e1);
- v = f * dir.dot(q);
- if (v < 0.0 || u + v > 1.0)
- return (false);
- // at this stage we can compute t to find out where
- // the intersection point is on the line
- t = f * e2.dot(q);
- if (t > 0) // ray intersection
- return (true);
- else // this means that there is a line intersection
- // but not a ray intersection
- return (false);
+ btScalar a, f, u, v;
+
+ btVector3 e1 = v1 - v0;
+ btVector3 e2 = v2 - v0;
+ btVector3 h = dir.cross(e2);
+ a = e1.dot(h);
+
+ if (a > -0.00001 && a < 0.00001)
+ return (false);
+
+ f = btScalar(1) / a;
+ btVector3 s = origin - v0;
+ u = f * s.dot(h);
+
+ if (u < 0.0 || u > 1.0)
+ return (false);
+
+ btVector3 q = s.cross(e1);
+ v = f * dir.dot(q);
+ if (v < 0.0 || u + v > 1.0)
+ return (false);
+ // at this stage we can compute t to find out where
+ // the intersection point is on the line
+ t = f * e2.dot(q);
+ if (t > 0) // ray intersection
+ return (true);
+ else // this means that there is a line intersection
+ // but not a ray intersection
+ return (false);
}
static inline bool lineIntersectsTriangle(const btVector3& rayStart, const btVector3& rayEnd, const btVector3& p1, const btVector3& p2, const btVector3& p3, btVector3& sect, btVector3& normal)
{
- btVector3 dir = rayEnd - rayStart;
- btScalar dir_norm = dir.norm();
- if (dir_norm < SIMD_EPSILON)
- return false;
- dir.normalize();
-
- btScalar t;
-
- bool ret = rayIntersectsTriangle(rayStart, dir, p1, p2, p3, t);
-
- if (ret)
- {
- if (t <= dir_norm)
- {
- sect = rayStart + dir * t;
- }
- else
- {
- ret = false;
- }
- }
-
- if (ret)
- {
- btVector3 n = (p3-p1).cross(p2-p1);
- n.safeNormalize();
- if (n.dot(dir) < 0)
- normal = n;
- else
- normal = -n;
- }
- return ret;
-}
+ btVector3 dir = rayEnd - rayStart;
+ btScalar dir_norm = dir.norm();
+ if (dir_norm < SIMD_EPSILON)
+ return false;
+ dir.normalize();
+ btScalar t;
+ bool ret = rayIntersectsTriangle(rayStart, dir, p1, p2, p3, t);
+
+ if (ret)
+ {
+ if (t <= dir_norm)
+ {
+ sect = rayStart + dir * t;
+ }
+ else
+ {
+ ret = false;
+ }
+ }
+ if (ret)
+ {
+ btVector3 n = (p3 - p1).cross(p2 - p1);
+ n.safeNormalize();
+ if (n.dot(dir) < 0)
+ normal = n;
+ else
+ normal = -n;
+ }
+ return ret;
+}
//
template <typename T>
@@ -1586,57 +1600,57 @@ struct btSoftColliders
psa->m_cdbvt.collideTT(psa->m_cdbvt.m_root, psb->m_cdbvt.m_root, *this);
}
};
- //
- // CollideSDF_RS
- //
- struct CollideSDF_RS : btDbvt::ICollide
- {
- void Process(const btDbvtNode* leaf)
- {
- btSoftBody::Node* node = (btSoftBody::Node*)leaf->data;
- DoNode(*node);
- }
- void DoNode(btSoftBody::Node& n) const
- {
- const btScalar m = n.m_im > 0 ? dynmargin : stamargin;
- btSoftBody::RContact c;
-
- if ((!n.m_battach) &&
- psb->checkContact(m_colObj1Wrap, n.m_x, m, c.m_cti))
- {
- const btScalar ima = n.m_im;
- const btScalar imb = m_rigidBody ? m_rigidBody->getInvMass() : 0.f;
- const btScalar ms = ima + imb;
- if (ms > 0)
- {
- const btTransform& wtr = m_rigidBody ? m_rigidBody->getWorldTransform() : m_colObj1Wrap->getCollisionObject()->getWorldTransform();
- static const btMatrix3x3 iwiStatic(0, 0, 0, 0, 0, 0, 0, 0, 0);
- const btMatrix3x3& iwi = m_rigidBody ? m_rigidBody->getInvInertiaTensorWorld() : iwiStatic;
- const btVector3 ra = n.m_x - wtr.getOrigin();
- const btVector3 va = m_rigidBody ? m_rigidBody->getVelocityInLocalPoint(ra) * psb->m_sst.sdt : btVector3(0, 0, 0);
- const btVector3 vb = n.m_x - n.m_q;
- const btVector3 vr = vb - va;
- const btScalar dn = btDot(vr, c.m_cti.m_normal);
- const btVector3 fv = vr - c.m_cti.m_normal * dn;
- const btScalar fc = psb->m_cfg.kDF * m_colObj1Wrap->getCollisionObject()->getFriction();
- c.m_node = &n;
- c.m_c0 = ImpulseMatrix(psb->m_sst.sdt, ima, imb, iwi, ra);
- c.m_c1 = ra;
- c.m_c2 = ima * psb->m_sst.sdt;
- c.m_c3 = fv.length2() < (dn * fc * dn * fc) ? 0 : 1 - fc;
- c.m_c4 = m_colObj1Wrap->getCollisionObject()->isStaticOrKinematicObject() ? psb->m_cfg.kKHR : psb->m_cfg.kCHR;
- psb->m_rcontacts.push_back(c);
- if (m_rigidBody)
- m_rigidBody->activate();
- }
- }
- }
- btSoftBody* psb;
- const btCollisionObjectWrapper* m_colObj1Wrap;
- btRigidBody* m_rigidBody;
- btScalar dynmargin;
- btScalar stamargin;
- };
+ //
+ // CollideSDF_RS
+ //
+ struct CollideSDF_RS : btDbvt::ICollide
+ {
+ void Process(const btDbvtNode* leaf)
+ {
+ btSoftBody::Node* node = (btSoftBody::Node*)leaf->data;
+ DoNode(*node);
+ }
+ void DoNode(btSoftBody::Node& n) const
+ {
+ const btScalar m = n.m_im > 0 ? dynmargin : stamargin;
+ btSoftBody::RContact c;
+
+ if ((!n.m_battach) &&
+ psb->checkContact(m_colObj1Wrap, n.m_x, m, c.m_cti))
+ {
+ const btScalar ima = n.m_im;
+ const btScalar imb = m_rigidBody ? m_rigidBody->getInvMass() : 0.f;
+ const btScalar ms = ima + imb;
+ if (ms > 0)
+ {
+ const btTransform& wtr = m_rigidBody ? m_rigidBody->getWorldTransform() : m_colObj1Wrap->getCollisionObject()->getWorldTransform();
+ static const btMatrix3x3 iwiStatic(0, 0, 0, 0, 0, 0, 0, 0, 0);
+ const btMatrix3x3& iwi = m_rigidBody ? m_rigidBody->getInvInertiaTensorWorld() : iwiStatic;
+ const btVector3 ra = n.m_x - wtr.getOrigin();
+ const btVector3 va = m_rigidBody ? m_rigidBody->getVelocityInLocalPoint(ra) * psb->m_sst.sdt : btVector3(0, 0, 0);
+ const btVector3 vb = n.m_x - n.m_q;
+ const btVector3 vr = vb - va;
+ const btScalar dn = btDot(vr, c.m_cti.m_normal);
+ const btVector3 fv = vr - c.m_cti.m_normal * dn;
+ const btScalar fc = psb->m_cfg.kDF * m_colObj1Wrap->getCollisionObject()->getFriction();
+ c.m_node = &n;
+ c.m_c0 = ImpulseMatrix(psb->m_sst.sdt, ima, imb, iwi, ra);
+ c.m_c1 = ra;
+ c.m_c2 = ima * psb->m_sst.sdt;
+ c.m_c3 = fv.length2() < (dn * fc * dn * fc) ? 0 : 1 - fc;
+ c.m_c4 = m_colObj1Wrap->getCollisionObject()->isStaticOrKinematicObject() ? psb->m_cfg.kKHR : psb->m_cfg.kCHR;
+ psb->m_rcontacts.push_back(c);
+ if (m_rigidBody)
+ m_rigidBody->activate();
+ }
+ }
+ }
+ btSoftBody* psb;
+ const btCollisionObjectWrapper* m_colObj1Wrap;
+ btRigidBody* m_rigidBody;
+ btScalar dynmargin;
+ btScalar stamargin;
+ };
//
// CollideSDF_RD
@@ -1654,72 +1668,74 @@ struct btSoftColliders
btSoftBody::DeformableNodeRigidContact c;
if (!n.m_battach)
- {
+ {
// check for collision at x_{n+1}^*
if (psb->checkDeformableContact(m_colObj1Wrap, n.m_q, m, c.m_cti, /*predict = */ true))
- {
- const btScalar ima = n.m_im;
- // todo: collision between multibody and fixed deformable node will be missed.
- const btScalar imb = m_rigidBody ? m_rigidBody->getInvMass() : 0.f;
- const btScalar ms = ima + imb;
- if (ms > 0)
- {
- // resolve contact at x_n
- psb->checkDeformableContact(m_colObj1Wrap, n.m_x, m, c.m_cti, /*predict = */ false);
- btSoftBody::sCti& cti = c.m_cti;
- c.m_node = &n;
- const btScalar fc = psb->m_cfg.kDF * m_colObj1Wrap->getCollisionObject()->getFriction();
- c.m_c2 = ima;
- c.m_c3 = fc;
- c.m_c4 = m_colObj1Wrap->getCollisionObject()->isStaticOrKinematicObject() ? psb->m_cfg.kKHR : psb->m_cfg.kCHR;
-
- if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY)
- {
- const btTransform& wtr = m_rigidBody ? m_rigidBody->getWorldTransform() : m_colObj1Wrap->getCollisionObject()->getWorldTransform();
- static const btMatrix3x3 iwiStatic(0, 0, 0, 0, 0, 0, 0, 0, 0);
- const btMatrix3x3& iwi = m_rigidBody ? m_rigidBody->getInvInertiaTensorWorld() : iwiStatic;
- const btVector3 ra = n.m_x - wtr.getOrigin();
-
- c.m_c0 = ImpulseMatrix(1, ima, imb, iwi, ra);
- c.m_c1 = ra;
- }
- else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK)
- {
- btMultiBodyLinkCollider* multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj);
- if (multibodyLinkCol)
- {
- btVector3 normal = cti.m_normal;
- btVector3 t1 = generateUnitOrthogonalVector(normal);
- btVector3 t2 = btCross(normal, t1);
- btMultiBodyJacobianData jacobianData_normal, jacobianData_t1, jacobianData_t2;
- findJacobian(multibodyLinkCol, jacobianData_normal, c.m_node->m_x, normal);
- findJacobian(multibodyLinkCol, jacobianData_t1, c.m_node->m_x, t1);
- findJacobian(multibodyLinkCol, jacobianData_t2, c.m_node->m_x, t2);
-
- btScalar* J_n = &jacobianData_normal.m_jacobians[0];
- btScalar* J_t1 = &jacobianData_t1.m_jacobians[0];
- btScalar* J_t2 = &jacobianData_t2.m_jacobians[0];
-
- btScalar* u_n = &jacobianData_normal.m_deltaVelocitiesUnitImpulse[0];
- btScalar* u_t1 = &jacobianData_t1.m_deltaVelocitiesUnitImpulse[0];
- btScalar* u_t2 = &jacobianData_t2.m_deltaVelocitiesUnitImpulse[0];
-
- btMatrix3x3 rot(normal.getX(), normal.getY(), normal.getZ(),
- t1.getX(), t1.getY(), t1.getZ(),
- t2.getX(), t2.getY(), t2.getZ()); // world frame to local frame
- const int ndof = multibodyLinkCol->m_multiBody->getNumDofs() + 6;
- btMatrix3x3 local_impulse_matrix = (Diagonal(n.m_im) + OuterProduct(J_n, J_t1, J_t2, u_n, u_t1, u_t2, ndof)).inverse();
- c.m_c0 = rot.transpose() * local_impulse_matrix * rot;
- c.jacobianData_normal = jacobianData_normal;
- c.jacobianData_t1 = jacobianData_t1;
- c.jacobianData_t2 = jacobianData_t2;
- c.t1 = t1;
- c.t2 = t2;
- }
- }
- psb->m_nodeRigidContacts.push_back(c);
- }
- }
+ {
+ const btScalar ima = n.m_im;
+ // todo: collision between multibody and fixed deformable node will be missed.
+ const btScalar imb = m_rigidBody ? m_rigidBody->getInvMass() : 0.f;
+ const btScalar ms = ima + imb;
+ if (ms > 0)
+ {
+ // resolve contact at x_n
+ psb->checkDeformableContact(m_colObj1Wrap, n.m_x, m, c.m_cti, /*predict = */ false);
+ btSoftBody::sCti& cti = c.m_cti;
+ c.m_node = &n;
+ const btScalar fc = psb->m_cfg.kDF * m_colObj1Wrap->getCollisionObject()->getFriction();
+ c.m_c2 = ima;
+ c.m_c3 = fc;
+ c.m_c4 = m_colObj1Wrap->getCollisionObject()->isStaticOrKinematicObject() ? psb->m_cfg.kKHR : psb->m_cfg.kCHR;
+ c.m_c5 = n.m_effectiveMass_inv;
+
+ if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY)
+ {
+ const btTransform& wtr = m_rigidBody ? m_rigidBody->getWorldTransform() : m_colObj1Wrap->getCollisionObject()->getWorldTransform();
+ static const btMatrix3x3 iwiStatic(0, 0, 0, 0, 0, 0, 0, 0, 0);
+ const btMatrix3x3& iwi = m_rigidBody ? m_rigidBody->getInvInertiaTensorWorld() : iwiStatic;
+ const btVector3 ra = n.m_x - wtr.getOrigin();
+
+ c.m_c0 = ImpulseMatrix(1, n.m_effectiveMass_inv, imb, iwi, ra);
+ // c.m_c0 = ImpulseMatrix(1, ima, imb, iwi, ra);
+ c.m_c1 = ra;
+ }
+ else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK)
+ {
+ btMultiBodyLinkCollider* multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj);
+ if (multibodyLinkCol)
+ {
+ btVector3 normal = cti.m_normal;
+ btVector3 t1 = generateUnitOrthogonalVector(normal);
+ btVector3 t2 = btCross(normal, t1);
+ btMultiBodyJacobianData jacobianData_normal, jacobianData_t1, jacobianData_t2;
+ findJacobian(multibodyLinkCol, jacobianData_normal, c.m_node->m_x, normal);
+ findJacobian(multibodyLinkCol, jacobianData_t1, c.m_node->m_x, t1);
+ findJacobian(multibodyLinkCol, jacobianData_t2, c.m_node->m_x, t2);
+
+ btScalar* J_n = &jacobianData_normal.m_jacobians[0];
+ btScalar* J_t1 = &jacobianData_t1.m_jacobians[0];
+ btScalar* J_t2 = &jacobianData_t2.m_jacobians[0];
+
+ btScalar* u_n = &jacobianData_normal.m_deltaVelocitiesUnitImpulse[0];
+ btScalar* u_t1 = &jacobianData_t1.m_deltaVelocitiesUnitImpulse[0];
+ btScalar* u_t2 = &jacobianData_t2.m_deltaVelocitiesUnitImpulse[0];
+
+ btMatrix3x3 rot(normal.getX(), normal.getY(), normal.getZ(),
+ t1.getX(), t1.getY(), t1.getZ(),
+ t2.getX(), t2.getY(), t2.getZ()); // world frame to local frame
+ const int ndof = multibodyLinkCol->m_multiBody->getNumDofs() + 6;
+ btMatrix3x3 local_impulse_matrix = (n.m_effectiveMass_inv + OuterProduct(J_n, J_t1, J_t2, u_n, u_t1, u_t2, ndof)).inverse();
+ c.m_c0 = rot.transpose() * local_impulse_matrix * rot;
+ c.jacobianData_normal = jacobianData_normal;
+ c.jacobianData_t1 = jacobianData_t1;
+ c.jacobianData_t2 = jacobianData_t2;
+ c.t1 = t1;
+ c.t2 = t2;
+ }
+ }
+ psb->m_nodeRigidContacts.push_back(c);
+ }
+ }
}
}
btSoftBody* psb;
@@ -1728,112 +1744,111 @@ struct btSoftColliders
btScalar dynmargin;
btScalar stamargin;
};
-
- //
- // CollideSDF_RDF
- //
- struct CollideSDF_RDF : btDbvt::ICollide
- {
- void Process(const btDbvtNode* leaf)
- {
- btSoftBody::Face* face = (btSoftBody::Face*)leaf->data;
- DoNode(*face);
- }
- void DoNode(btSoftBody::Face& f) const
- {
- btSoftBody::Node* n0 = f.m_n[0];
- btSoftBody::Node* n1 = f.m_n[1];
- btSoftBody::Node* n2 = f.m_n[2];
- const btScalar m = (n0->m_im > 0 && n1->m_im > 0 && n2->m_im > 0 )? dynmargin : stamargin;
- btSoftBody::DeformableFaceRigidContact c;
- btVector3 contact_point;
- btVector3 bary;
- if (psb->checkDeformableFaceContact(m_colObj1Wrap, f, contact_point, bary, m, c.m_cti, true))
- {
- f.m_pcontact[3] = 1;
- btScalar ima = n0->m_im + n1->m_im + n2->m_im;
- const btScalar imb = m_rigidBody ? m_rigidBody->getInvMass() : 0.f;
- // todo: collision between multibody and fixed deformable face will be missed.
- const btScalar ms = ima + imb;
- if (ms > 0)
- {
- // resolve contact at x_n
-// psb->checkDeformableFaceContact(m_colObj1Wrap, f, contact_point, bary, m, c.m_cti, /*predict = */ false);
- btSoftBody::sCti& cti = c.m_cti;
- c.m_contactPoint = contact_point;
- c.m_bary = bary;
- // todo xuchenhan@: this is assuming mass of all vertices are the same. Need to modify if mass are different for distinct vertices
- c.m_weights = btScalar(2)/(btScalar(1) + bary.length2()) * bary;
- c.m_face = &f;
+
+ //
+ // CollideSDF_RDF
+ //
+ struct CollideSDF_RDF : btDbvt::ICollide
+ {
+ void Process(const btDbvtNode* leaf)
+ {
+ btSoftBody::Face* face = (btSoftBody::Face*)leaf->data;
+ DoNode(*face);
+ }
+ void DoNode(btSoftBody::Face& f) const
+ {
+ btSoftBody::Node* n0 = f.m_n[0];
+ btSoftBody::Node* n1 = f.m_n[1];
+ btSoftBody::Node* n2 = f.m_n[2];
+ const btScalar m = (n0->m_im > 0 && n1->m_im > 0 && n2->m_im > 0) ? dynmargin : stamargin;
+ btSoftBody::DeformableFaceRigidContact c;
+ btVector3 contact_point;
+ btVector3 bary;
+ if (psb->checkDeformableFaceContact(m_colObj1Wrap, f, contact_point, bary, m, c.m_cti, true))
+ {
+ btScalar ima = n0->m_im + n1->m_im + n2->m_im;
+ const btScalar imb = m_rigidBody ? m_rigidBody->getInvMass() : 0.f;
+ // todo: collision between multibody and fixed deformable face will be missed.
+ const btScalar ms = ima + imb;
+ if (ms > 0)
+ {
+ // resolve contact at x_n
+ // psb->checkDeformableFaceContact(m_colObj1Wrap, f, contact_point, bary, m, c.m_cti, /*predict = */ false);
+ btSoftBody::sCti& cti = c.m_cti;
+ c.m_contactPoint = contact_point;
+ c.m_bary = bary;
+ // todo xuchenhan@: this is assuming mass of all vertices are the same. Need to modify if mass are different for distinct vertices
+ c.m_weights = btScalar(2) / (btScalar(1) + bary.length2()) * bary;
+ c.m_face = &f;
// friction is handled by the nodes to prevent sticking
-// const btScalar fc = 0;
- const btScalar fc = psb->m_cfg.kDF * m_colObj1Wrap->getCollisionObject()->getFriction();
-
- // the effective inverse mass of the face as in https://graphics.stanford.edu/papers/cloth-sig02/cloth.pdf
- ima = bary.getX()*c.m_weights.getX() * n0->m_im + bary.getY()*c.m_weights.getY() * n1->m_im + bary.getZ()*c.m_weights.getZ() * n2->m_im;
- c.m_c2 = ima;
- c.m_c3 = fc;
- c.m_c4 = m_colObj1Wrap->getCollisionObject()->isStaticOrKinematicObject() ? psb->m_cfg.kKHR : psb->m_cfg.kCHR;
- if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY)
- {
- const btTransform& wtr = m_rigidBody ? m_rigidBody->getWorldTransform() : m_colObj1Wrap->getCollisionObject()->getWorldTransform();
- static const btMatrix3x3 iwiStatic(0, 0, 0, 0, 0, 0, 0, 0, 0);
- const btMatrix3x3& iwi = m_rigidBody ? m_rigidBody->getInvInertiaTensorWorld() : iwiStatic;
- const btVector3 ra = contact_point - wtr.getOrigin();
-
- // we do not scale the impulse matrix by dt
- c.m_c0 = ImpulseMatrix(1, ima, imb, iwi, ra);
- c.m_c1 = ra;
- }
- else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK)
- {
- btMultiBodyLinkCollider* multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj);
- if (multibodyLinkCol)
- {
- btVector3 normal = cti.m_normal;
- btVector3 t1 = generateUnitOrthogonalVector(normal);
- btVector3 t2 = btCross(normal, t1);
- btMultiBodyJacobianData jacobianData_normal, jacobianData_t1, jacobianData_t2;
- findJacobian(multibodyLinkCol, jacobianData_normal, contact_point, normal);
- findJacobian(multibodyLinkCol, jacobianData_t1, contact_point, t1);
- findJacobian(multibodyLinkCol, jacobianData_t2, contact_point, t2);
-
- btScalar* J_n = &jacobianData_normal.m_jacobians[0];
- btScalar* J_t1 = &jacobianData_t1.m_jacobians[0];
- btScalar* J_t2 = &jacobianData_t2.m_jacobians[0];
-
- btScalar* u_n = &jacobianData_normal.m_deltaVelocitiesUnitImpulse[0];
- btScalar* u_t1 = &jacobianData_t1.m_deltaVelocitiesUnitImpulse[0];
- btScalar* u_t2 = &jacobianData_t2.m_deltaVelocitiesUnitImpulse[0];
-
- btMatrix3x3 rot(normal.getX(), normal.getY(), normal.getZ(),
- t1.getX(), t1.getY(), t1.getZ(),
- t2.getX(), t2.getY(), t2.getZ()); // world frame to local frame
- const int ndof = multibodyLinkCol->m_multiBody->getNumDofs() + 6;
- btMatrix3x3 local_impulse_matrix = (Diagonal(ima) + OuterProduct(J_n, J_t1, J_t2, u_n, u_t1, u_t2, ndof)).inverse();
- c.m_c0 = rot.transpose() * local_impulse_matrix * rot;
- c.jacobianData_normal = jacobianData_normal;
- c.jacobianData_t1 = jacobianData_t1;
- c.jacobianData_t2 = jacobianData_t2;
- c.t1 = t1;
- c.t2 = t2;
- }
- }
- psb->m_faceRigidContacts.push_back(c);
- }
- }
- else
- {
- f.m_pcontact[3] = 0;
- }
- }
- btSoftBody* psb;
- const btCollisionObjectWrapper* m_colObj1Wrap;
- btRigidBody* m_rigidBody;
- btScalar dynmargin;
- btScalar stamargin;
- };
-
+ // const btScalar fc = 0;
+ const btScalar fc = psb->m_cfg.kDF * m_colObj1Wrap->getCollisionObject()->getFriction();
+
+ // the effective inverse mass of the face as in https://graphics.stanford.edu/papers/cloth-sig02/cloth.pdf
+ ima = bary.getX() * c.m_weights.getX() * n0->m_im + bary.getY() * c.m_weights.getY() * n1->m_im + bary.getZ() * c.m_weights.getZ() * n2->m_im;
+ c.m_c2 = ima;
+ c.m_c3 = fc;
+ c.m_c4 = m_colObj1Wrap->getCollisionObject()->isStaticOrKinematicObject() ? psb->m_cfg.kKHR : psb->m_cfg.kCHR;
+ c.m_c5 = Diagonal(ima);
+ if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY)
+ {
+ const btTransform& wtr = m_rigidBody ? m_rigidBody->getWorldTransform() : m_colObj1Wrap->getCollisionObject()->getWorldTransform();
+ static const btMatrix3x3 iwiStatic(0, 0, 0, 0, 0, 0, 0, 0, 0);
+ const btMatrix3x3& iwi = m_rigidBody ? m_rigidBody->getInvInertiaTensorWorld() : iwiStatic;
+ const btVector3 ra = contact_point - wtr.getOrigin();
+
+ // we do not scale the impulse matrix by dt
+ c.m_c0 = ImpulseMatrix(1, ima, imb, iwi, ra);
+ c.m_c1 = ra;
+ }
+ else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK)
+ {
+ btMultiBodyLinkCollider* multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj);
+ if (multibodyLinkCol)
+ {
+ btVector3 normal = cti.m_normal;
+ btVector3 t1 = generateUnitOrthogonalVector(normal);
+ btVector3 t2 = btCross(normal, t1);
+ btMultiBodyJacobianData jacobianData_normal, jacobianData_t1, jacobianData_t2;
+ findJacobian(multibodyLinkCol, jacobianData_normal, contact_point, normal);
+ findJacobian(multibodyLinkCol, jacobianData_t1, contact_point, t1);
+ findJacobian(multibodyLinkCol, jacobianData_t2, contact_point, t2);
+
+ btScalar* J_n = &jacobianData_normal.m_jacobians[0];
+ btScalar* J_t1 = &jacobianData_t1.m_jacobians[0];
+ btScalar* J_t2 = &jacobianData_t2.m_jacobians[0];
+
+ btScalar* u_n = &jacobianData_normal.m_deltaVelocitiesUnitImpulse[0];
+ btScalar* u_t1 = &jacobianData_t1.m_deltaVelocitiesUnitImpulse[0];
+ btScalar* u_t2 = &jacobianData_t2.m_deltaVelocitiesUnitImpulse[0];
+
+ btMatrix3x3 rot(normal.getX(), normal.getY(), normal.getZ(),
+ t1.getX(), t1.getY(), t1.getZ(),
+ t2.getX(), t2.getY(), t2.getZ()); // world frame to local frame
+ const int ndof = multibodyLinkCol->m_multiBody->getNumDofs() + 6;
+ btMatrix3x3 local_impulse_matrix = (Diagonal(ima) + OuterProduct(J_n, J_t1, J_t2, u_n, u_t1, u_t2, ndof)).inverse();
+ c.m_c0 = rot.transpose() * local_impulse_matrix * rot;
+ c.jacobianData_normal = jacobianData_normal;
+ c.jacobianData_t1 = jacobianData_t1;
+ c.jacobianData_t2 = jacobianData_t2;
+ c.t1 = t1;
+ c.t2 = t2;
+ }
+ }
+ psb->m_faceRigidContacts.push_back(c);
+ }
+ }
+ // Set caching barycenters to be false after collision detection.
+ // Only turn on when contact is static.
+ f.m_pcontact[3] = 0;
+ }
+ btSoftBody* psb;
+ const btCollisionObjectWrapper* m_colObj1Wrap;
+ btRigidBody* m_rigidBody;
+ btScalar dynmargin;
+ btScalar stamargin;
+ };
+
//
// CollideVF_SS
//
@@ -1844,12 +1859,12 @@ struct btSoftColliders
{
btSoftBody::Node* node = (btSoftBody::Node*)lnode->data;
btSoftBody::Face* face = (btSoftBody::Face*)lface->data;
- for (int i = 0; i < 3; ++i)
- {
- if (face->m_n[i] == node)
- continue;
- }
-
+ for (int i = 0; i < 3; ++i)
+ {
+ if (face->m_n[i] == node)
+ continue;
+ }
+
btVector3 o = node->m_x;
btVector3 p;
btScalar d = SIMD_INFINITY;
@@ -1879,7 +1894,7 @@ struct btSoftColliders
c.m_node = node;
c.m_face = face;
c.m_weights = w;
- c.m_friction = btMax (psb[0]->m_cfg.kDF, psb[1]->m_cfg.kDF);
+ c.m_friction = btMax(psb[0]->m_cfg.kDF, psb[1]->m_cfg.kDF);
c.m_cfm[0] = ma / ms * psb[0]->m_cfg.kSHR;
c.m_cfm[1] = mb / ms * psb[1]->m_cfg.kSHR;
psb[0]->m_scontacts.push_back(c);
@@ -1889,206 +1904,205 @@ struct btSoftColliders
btSoftBody* psb[2];
btScalar mrg;
};
-
-
- //
- // CollideVF_DD
- //
- struct CollideVF_DD : btDbvt::ICollide
- {
- void Process(const btDbvtNode* lnode,
- const btDbvtNode* lface)
- {
- btSoftBody::Node* node = (btSoftBody::Node*)lnode->data;
- btSoftBody::Face* face = (btSoftBody::Face*)lface->data;
- btVector3 bary;
- if (proximityTest(face->m_n[0]->m_x, face->m_n[1]->m_x, face->m_n[2]->m_x, node->m_x, face->m_normal, mrg, bary))
- {
- const btSoftBody::Node* n[] = {face->m_n[0], face->m_n[1], face->m_n[2]};
- const btVector3 w = bary;
- const btScalar ma = node->m_im;
- btScalar mb = BaryEval(n[0]->m_im, n[1]->m_im, n[2]->m_im, w);
- if ((n[0]->m_im <= 0) ||
- (n[1]->m_im <= 0) ||
- (n[2]->m_im <= 0))
- {
- mb = 0;
- }
- const btScalar ms = ma + mb;
- if (ms > 0)
- {
- btSoftBody::DeformableFaceNodeContact c;
- c.m_normal = face->m_normal;
- if (!useFaceNormal && c.m_normal.dot(node->m_x - face->m_n[2]->m_x) < 0)
- c.m_normal = -face->m_normal;
- c.m_margin = mrg;
- c.m_node = node;
- c.m_face = face;
- c.m_bary = w;
- c.m_friction = psb[0]->m_cfg.kDF * psb[1]->m_cfg.kDF;
- psb[0]->m_faceNodeContacts.push_back(c);
- }
- }
- }
- btSoftBody* psb[2];
- btScalar mrg;
- bool useFaceNormal;
- };
-
- //
- // CollideFF_DD
- //
- struct CollideFF_DD : btDbvt::ICollide
- {
- void Process(const btDbvntNode* lface1,
- const btDbvntNode* lface2)
- {
- btSoftBody::Face* f1 = (btSoftBody::Face*)lface1->data;
- btSoftBody::Face* f2 = (btSoftBody::Face*)lface2->data;
- if (f1 != f2)
- {
- Repel(f1, f2);
- Repel(f2, f1);
- }
- }
- void Repel(btSoftBody::Face* f1, btSoftBody::Face* f2)
- {
- //#define REPEL_NEIGHBOR 1
+
+ //
+ // CollideVF_DD
+ //
+ struct CollideVF_DD : btDbvt::ICollide
+ {
+ void Process(const btDbvtNode* lnode,
+ const btDbvtNode* lface)
+ {
+ btSoftBody::Node* node = (btSoftBody::Node*)lnode->data;
+ btSoftBody::Face* face = (btSoftBody::Face*)lface->data;
+ btVector3 bary;
+ if (proximityTest(face->m_n[0]->m_x, face->m_n[1]->m_x, face->m_n[2]->m_x, node->m_x, face->m_normal, mrg, bary))
+ {
+ const btSoftBody::Node* n[] = {face->m_n[0], face->m_n[1], face->m_n[2]};
+ const btVector3 w = bary;
+ const btScalar ma = node->m_im;
+ btScalar mb = BaryEval(n[0]->m_im, n[1]->m_im, n[2]->m_im, w);
+ if ((n[0]->m_im <= 0) ||
+ (n[1]->m_im <= 0) ||
+ (n[2]->m_im <= 0))
+ {
+ mb = 0;
+ }
+ const btScalar ms = ma + mb;
+ if (ms > 0)
+ {
+ btSoftBody::DeformableFaceNodeContact c;
+ c.m_normal = face->m_normal;
+ if (!useFaceNormal && c.m_normal.dot(node->m_x - face->m_n[2]->m_x) < 0)
+ c.m_normal = -face->m_normal;
+ c.m_margin = mrg;
+ c.m_node = node;
+ c.m_face = face;
+ c.m_bary = w;
+ c.m_friction = psb[0]->m_cfg.kDF * psb[1]->m_cfg.kDF;
+ psb[0]->m_faceNodeContacts.push_back(c);
+ }
+ }
+ }
+ btSoftBody* psb[2];
+ btScalar mrg;
+ bool useFaceNormal;
+ };
+
+ //
+ // CollideFF_DD
+ //
+ struct CollideFF_DD : btDbvt::ICollide
+ {
+ void Process(const btDbvntNode* lface1,
+ const btDbvntNode* lface2)
+ {
+ btSoftBody::Face* f1 = (btSoftBody::Face*)lface1->data;
+ btSoftBody::Face* f2 = (btSoftBody::Face*)lface2->data;
+ if (f1 != f2)
+ {
+ Repel(f1, f2);
+ Repel(f2, f1);
+ }
+ }
+ void Repel(btSoftBody::Face* f1, btSoftBody::Face* f2)
+ {
+ //#define REPEL_NEIGHBOR 1
#ifndef REPEL_NEIGHBOR
- for (int node_id = 0; node_id < 3; ++node_id)
- {
- btSoftBody::Node* node = f1->m_n[node_id];
- for (int i = 0; i < 3; ++i)
- {
- if (f2->m_n[i] == node)
- return;
- }
- }
+ for (int node_id = 0; node_id < 3; ++node_id)
+ {
+ btSoftBody::Node* node = f1->m_n[node_id];
+ for (int i = 0; i < 3; ++i)
+ {
+ if (f2->m_n[i] == node)
+ return;
+ }
+ }
#endif
- bool skip = false;
- for (int node_id = 0; node_id < 3; ++node_id)
- {
- btSoftBody::Node* node = f1->m_n[node_id];
+ bool skip = false;
+ for (int node_id = 0; node_id < 3; ++node_id)
+ {
+ btSoftBody::Node* node = f1->m_n[node_id];
#ifdef REPEL_NEIGHBOR
- for (int i = 0; i < 3; ++i)
- {
- if (f2->m_n[i] == node)
- {
- skip = true;
- break;
- }
- }
- if (skip)
- {
- skip = false;
- continue;
- }
+ for (int i = 0; i < 3; ++i)
+ {
+ if (f2->m_n[i] == node)
+ {
+ skip = true;
+ break;
+ }
+ }
+ if (skip)
+ {
+ skip = false;
+ continue;
+ }
#endif
- btSoftBody::Face* face = f2;
- btVector3 bary;
- if (!proximityTest(face->m_n[0]->m_x, face->m_n[1]->m_x, face->m_n[2]->m_x, node->m_x, face->m_normal, mrg, bary))
- continue;
- btSoftBody::DeformableFaceNodeContact c;
- c.m_normal = face->m_normal;
- if (!useFaceNormal && c.m_normal.dot(node->m_x - face->m_n[2]->m_x) < 0)
- c.m_normal = -face->m_normal;
- c.m_margin = mrg;
- c.m_node = node;
- c.m_face = face;
- c.m_bary = bary;
- c.m_friction = psb[0]->m_cfg.kDF * psb[1]->m_cfg.kDF;
- psb[0]->m_faceNodeContacts.push_back(c);
- }
- }
- btSoftBody* psb[2];
- btScalar mrg;
- bool useFaceNormal;
- };
-
- struct CollideCCD : btDbvt::ICollide
- {
- void Process(const btDbvtNode* lnode,
- const btDbvtNode* lface)
- {
- btSoftBody::Node* node = (btSoftBody::Node*)lnode->data;
- btSoftBody::Face* face = (btSoftBody::Face*)lface->data;
- btVector3 bary;
- if (bernsteinCCD(face, node, dt, SAFE_EPSILON, bary))
- {
- btSoftBody::DeformableFaceNodeContact c;
- c.m_normal = face->m_normal;
- if (!useFaceNormal && c.m_normal.dot(node->m_x - face->m_n[2]->m_x) < 0)
- c.m_normal = -face->m_normal;
- c.m_node = node;
- c.m_face = face;
- c.m_bary = bary;
- c.m_friction = psb[0]->m_cfg.kDF * psb[1]->m_cfg.kDF;
- psb[0]->m_faceNodeContacts.push_back(c);
- }
- }
- void Process(const btDbvntNode* lface1,
- const btDbvntNode* lface2)
- {
- btSoftBody::Face* f1 = (btSoftBody::Face*)lface1->data;
- btSoftBody::Face* f2 = (btSoftBody::Face*)lface2->data;
- if (f1 != f2)
- {
- Repel(f1, f2);
- Repel(f2, f1);
- }
- }
- void Repel(btSoftBody::Face* f1, btSoftBody::Face* f2)
- {
- //#define REPEL_NEIGHBOR 1
+ btSoftBody::Face* face = f2;
+ btVector3 bary;
+ if (!proximityTest(face->m_n[0]->m_x, face->m_n[1]->m_x, face->m_n[2]->m_x, node->m_x, face->m_normal, mrg, bary))
+ continue;
+ btSoftBody::DeformableFaceNodeContact c;
+ c.m_normal = face->m_normal;
+ if (!useFaceNormal && c.m_normal.dot(node->m_x - face->m_n[2]->m_x) < 0)
+ c.m_normal = -face->m_normal;
+ c.m_margin = mrg;
+ c.m_node = node;
+ c.m_face = face;
+ c.m_bary = bary;
+ c.m_friction = psb[0]->m_cfg.kDF * psb[1]->m_cfg.kDF;
+ psb[0]->m_faceNodeContacts.push_back(c);
+ }
+ }
+ btSoftBody* psb[2];
+ btScalar mrg;
+ bool useFaceNormal;
+ };
+
+ struct CollideCCD : btDbvt::ICollide
+ {
+ void Process(const btDbvtNode* lnode,
+ const btDbvtNode* lface)
+ {
+ btSoftBody::Node* node = (btSoftBody::Node*)lnode->data;
+ btSoftBody::Face* face = (btSoftBody::Face*)lface->data;
+ btVector3 bary;
+ if (bernsteinCCD(face, node, dt, SAFE_EPSILON, bary))
+ {
+ btSoftBody::DeformableFaceNodeContact c;
+ c.m_normal = face->m_normal;
+ if (!useFaceNormal && c.m_normal.dot(node->m_x - face->m_n[2]->m_x) < 0)
+ c.m_normal = -face->m_normal;
+ c.m_node = node;
+ c.m_face = face;
+ c.m_bary = bary;
+ c.m_friction = psb[0]->m_cfg.kDF * psb[1]->m_cfg.kDF;
+ psb[0]->m_faceNodeContacts.push_back(c);
+ }
+ }
+ void Process(const btDbvntNode* lface1,
+ const btDbvntNode* lface2)
+ {
+ btSoftBody::Face* f1 = (btSoftBody::Face*)lface1->data;
+ btSoftBody::Face* f2 = (btSoftBody::Face*)lface2->data;
+ if (f1 != f2)
+ {
+ Repel(f1, f2);
+ Repel(f2, f1);
+ }
+ }
+ void Repel(btSoftBody::Face* f1, btSoftBody::Face* f2)
+ {
+ //#define REPEL_NEIGHBOR 1
#ifndef REPEL_NEIGHBOR
- for (int node_id = 0; node_id < 3; ++node_id)
- {
- btSoftBody::Node* node = f1->m_n[node_id];
- for (int i = 0; i < 3; ++i)
- {
- if (f2->m_n[i] == node)
- return;
- }
- }
+ for (int node_id = 0; node_id < 3; ++node_id)
+ {
+ btSoftBody::Node* node = f1->m_n[node_id];
+ for (int i = 0; i < 3; ++i)
+ {
+ if (f2->m_n[i] == node)
+ return;
+ }
+ }
#endif
- bool skip = false;
- for (int node_id = 0; node_id < 3; ++node_id)
- {
- btSoftBody::Node* node = f1->m_n[node_id];
+ bool skip = false;
+ for (int node_id = 0; node_id < 3; ++node_id)
+ {
+ btSoftBody::Node* node = f1->m_n[node_id];
#ifdef REPEL_NEIGHBOR
- for (int i = 0; i < 3; ++i)
- {
- if (f2->m_n[i] == node)
- {
- skip = true;
- break;
- }
- }
- if (skip)
- {
- skip = false;
- continue;
- }
+ for (int i = 0; i < 3; ++i)
+ {
+ if (f2->m_n[i] == node)
+ {
+ skip = true;
+ break;
+ }
+ }
+ if (skip)
+ {
+ skip = false;
+ continue;
+ }
#endif
- btSoftBody::Face* face = f2;
- btVector3 bary;
+ btSoftBody::Face* face = f2;
+ btVector3 bary;
if (bernsteinCCD(face, node, dt, SAFE_EPSILON, bary))
- {
- btSoftBody::DeformableFaceNodeContact c;
- c.m_normal = face->m_normal;
- if (!useFaceNormal && c.m_normal.dot(node->m_x - face->m_n[2]->m_x) < 0)
- c.m_normal = -face->m_normal;
- c.m_node = node;
- c.m_face = face;
- c.m_bary = bary;
- c.m_friction = psb[0]->m_cfg.kDF * psb[1]->m_cfg.kDF;
- psb[0]->m_faceNodeContacts.push_back(c);
- }
- }
- }
- btSoftBody* psb[2];
- btScalar dt, mrg;
- bool useFaceNormal;
- };
+ {
+ btSoftBody::DeformableFaceNodeContact c;
+ c.m_normal = face->m_normal;
+ if (!useFaceNormal && c.m_normal.dot(node->m_x - face->m_n[2]->m_x) < 0)
+ c.m_normal = -face->m_normal;
+ c.m_node = node;
+ c.m_face = face;
+ c.m_bary = bary;
+ c.m_friction = psb[0]->m_cfg.kDF * psb[1]->m_cfg.kDF;
+ psb[0]->m_faceNodeContacts.push_back(c);
+ }
+ }
+ }
+ btSoftBody* psb[2];
+ btScalar dt, mrg;
+ bool useFaceNormal;
+ };
};
#endif //_BT_SOFT_BODY_INTERNALS_H
diff --git a/thirdparty/bullet/BulletSoftBody/btSoftBodySolvers.h b/thirdparty/bullet/BulletSoftBody/btSoftBodySolvers.h
index c4ac4141aa..dbb2624eee 100644
--- a/thirdparty/bullet/BulletSoftBody/btSoftBodySolvers.h
+++ b/thirdparty/bullet/BulletSoftBody/btSoftBodySolvers.h
@@ -36,7 +36,7 @@ public:
CL_SIMD_SOLVER,
DX_SOLVER,
DX_SIMD_SOLVER,
- DEFORMABLE_SOLVER
+ DEFORMABLE_SOLVER
};
protected:
diff --git a/thirdparty/bullet/BulletSoftBody/btSoftMultiBodyDynamicsWorld.cpp b/thirdparty/bullet/BulletSoftBody/btSoftMultiBodyDynamicsWorld.cpp
index 282dbf75f0..329bd19d71 100644
--- a/thirdparty/bullet/BulletSoftBody/btSoftMultiBodyDynamicsWorld.cpp
+++ b/thirdparty/bullet/BulletSoftBody/btSoftMultiBodyDynamicsWorld.cpp
@@ -100,6 +100,11 @@ void btSoftMultiBodyDynamicsWorld::internalSingleStepSimulation(btScalar timeSte
///update soft bodies
m_softBodySolver->updateSoftBodies();
+ for (int i = 0; i < m_softBodies.size(); i++)
+ {
+ btSoftBody* psb = (btSoftBody*)m_softBodies[i];
+ psb->interpolateRenderMesh();
+ }
// End solver-wise simulation step
// ///////////////////////////////
}
diff --git a/thirdparty/bullet/BulletSoftBody/btSparseSDF.h b/thirdparty/bullet/BulletSoftBody/btSparseSDF.h
index eb290a1dbd..d611726bcd 100644
--- a/thirdparty/bullet/BulletSoftBody/btSparseSDF.h
+++ b/thirdparty/bullet/BulletSoftBody/btSparseSDF.h
@@ -22,36 +22,36 @@ subject to the following restrictions:
// Fast Hash
-#if !defined (get16bits)
-#define get16bits(d) ((((unsigned int)(((const unsigned char *)(d))[1])) << 8)\
-+(unsigned int)(((const unsigned char *)(d))[0]) )
+#if !defined(get16bits)
+#define get16bits(d) ((((unsigned int)(((const unsigned char*)(d))[1])) << 8) + (unsigned int)(((const unsigned char*)(d))[0]))
#endif
//
// super hash function by Paul Hsieh
//
-inline unsigned int HsiehHash (const char * data, int len) {
- unsigned int hash = len, tmp;
- len>>=2;
-
- /* Main loop */
- for (;len > 0; len--) {
- hash += get16bits (data);
- tmp = (get16bits (data+2) << 11) ^ hash;
- hash = (hash << 16) ^ tmp;
- data += 2*sizeof (unsigned short);
- hash += hash >> 11;
- }
+inline unsigned int HsiehHash(const char* data, int len)
+{
+ unsigned int hash = len, tmp;
+ len >>= 2;
+ /* Main loop */
+ for (; len > 0; len--)
+ {
+ hash += get16bits(data);
+ tmp = (get16bits(data + 2) << 11) ^ hash;
+ hash = (hash << 16) ^ tmp;
+ data += 2 * sizeof(unsigned short);
+ hash += hash >> 11;
+ }
- /* Force "avalanching" of final 127 bits */
- hash ^= hash << 3;
- hash += hash >> 5;
- hash ^= hash << 4;
- hash += hash >> 17;
- hash ^= hash << 25;
- hash += hash >> 6;
+ /* Force "avalanching" of final 127 bits */
+ hash ^= hash << 3;
+ hash += hash >> 5;
+ hash ^= hash << 4;
+ hash += hash >> 17;
+ hash ^= hash << 25;
+ hash += hash >> 6;
- return hash;
+ return hash;
}
template <const int CELLSIZE>
@@ -81,7 +81,7 @@ struct btSparseSdf
btAlignedObjectArray<Cell*> cells;
btScalar voxelsz;
- btScalar m_defaultVoxelsz;
+ btScalar m_defaultVoxelsz;
int puid;
int ncells;
int m_clampCells;
@@ -103,16 +103,16 @@ struct btSparseSdf
//if this limit is reached, the SDF is reset (at the cost of some performance during the reset)
m_clampCells = clampCells;
cells.resize(hashsize, 0);
- m_defaultVoxelsz = 0.25;
+ m_defaultVoxelsz = 0.25;
Reset();
}
//
-
- void setDefaultVoxelsz(btScalar sz)
- {
- m_defaultVoxelsz = sz;
- }
-
+
+ void setDefaultVoxelsz(btScalar sz)
+ {
+ m_defaultVoxelsz = sz;
+ }
+
void Reset()
{
for (int i = 0, ni = cells.size(); i < ni; ++i)
@@ -162,7 +162,7 @@ struct btSparseSdf
nqueries = 1;
nprobes = 1;
++puid; ///@todo: Reset puid's when int range limit is reached */
- /* else setup a priority list... */
+ /* else setup a priority list... */
}
//
int RemoveReferences(btCollisionShape* pcs)
@@ -221,7 +221,7 @@ struct btSparseSdf
else
{
// printf("c->hash/c[0][1][2]=%d,%d,%d,%d\n", c->hash, c->c[0], c->c[1],c->c[2]);
- //printf("h,ixb,iyb,izb=%d,%d,%d,%d\n", h,ix.b, iy.b, iz.b);
+ //printf("h,ixb,iyb,izb=%d,%d,%d,%d\n", h,ix.b, iy.b, iz.b);
c = c->next;
}
@@ -363,7 +363,7 @@ struct btSparseSdf
myset.p = (void*)shape;
const char* ptr = (const char*)&myset;
- unsigned int result = HsiehHash(ptr, sizeof(btS) );
+ unsigned int result = HsiehHash(ptr, sizeof(btS));
return result;
}
diff --git a/thirdparty/bullet/BulletSoftBody/poly34.cpp b/thirdparty/bullet/BulletSoftBody/poly34.cpp
index 819d0c79f7..ec7549c8e8 100644
--- a/thirdparty/bullet/BulletSoftBody/poly34.cpp
+++ b/thirdparty/bullet/BulletSoftBody/poly34.cpp
@@ -6,7 +6,7 @@
//
#include <math.h>
-#include "poly34.h" // solution of cubic and quartic equation
+#include "poly34.h" // solution of cubic and quartic equation
#define TwoPi 6.28318530717958648
const btScalar eps = SIMD_EPSILON;
@@ -15,50 +15,53 @@ const btScalar eps = SIMD_EPSILON;
//=============================================================================
static SIMD_FORCE_INLINE btScalar _root3(btScalar x)
{
- btScalar s = 1.;
- while (x < 1.) {
- x *= 8.;
- s *= 0.5;
- }
- while (x > 8.) {
- x *= 0.125;
- s *= 2.;
- }
- btScalar r = 1.5;
- r -= 1. / 3. * (r - x / (r * r));
- r -= 1. / 3. * (r - x / (r * r));
- r -= 1. / 3. * (r - x / (r * r));
- r -= 1. / 3. * (r - x / (r * r));
- r -= 1. / 3. * (r - x / (r * r));
- r -= 1. / 3. * (r - x / (r * r));
- return r * s;
+ btScalar s = 1.;
+ while (x < 1.)
+ {
+ x *= 8.;
+ s *= 0.5;
+ }
+ while (x > 8.)
+ {
+ x *= 0.125;
+ s *= 2.;
+ }
+ btScalar r = 1.5;
+ r -= 1. / 3. * (r - x / (r * r));
+ r -= 1. / 3. * (r - x / (r * r));
+ r -= 1. / 3. * (r - x / (r * r));
+ r -= 1. / 3. * (r - x / (r * r));
+ r -= 1. / 3. * (r - x / (r * r));
+ r -= 1. / 3. * (r - x / (r * r));
+ return r * s;
}
btScalar SIMD_FORCE_INLINE root3(btScalar x)
{
- if (x > 0)
- return _root3(x);
- else if (x < 0)
- return -_root3(-x);
- else
- return 0.;
+ if (x > 0)
+ return _root3(x);
+ else if (x < 0)
+ return -_root3(-x);
+ else
+ return 0.;
}
// x - array of size 2
// return 2: 2 real roots x[0], x[1]
// return 0: pair of complex roots: x[0]i*x[1]
int SolveP2(btScalar* x, btScalar a, btScalar b)
-{ // solve equation x^2 + a*x + b = 0
- btScalar D = 0.25 * a * a - b;
- if (D >= 0) {
- D = sqrt(D);
- x[0] = -0.5 * a + D;
- x[1] = -0.5 * a - D;
- return 2;
- }
- x[0] = -0.5 * a;
- x[1] = sqrt(-D);
- return 0;
+{ // solve equation x^2 + a*x + b = 0
+ btScalar D = 0.25 * a * a - b;
+ if (D >= 0)
+ {
+ D = sqrt(D);
+ x[0] = -0.5 * a + D;
+ x[1] = -0.5 * a - D;
+ return 2;
+ }
+ x[0] = -0.5 * a;
+ x[1] = sqrt(-D);
+ return 0;
}
//---------------------------------------------------------------------------
// x - array of size 3
@@ -66,217 +69,228 @@ int SolveP2(btScalar* x, btScalar a, btScalar b)
// 2 real roots: x[0], x[1], return 2
// 1 real root : x[0], x[1] i*x[2], return 1
int SolveP3(btScalar* x, btScalar a, btScalar b, btScalar c)
-{ // solve cubic equation x^3 + a*x^2 + b*x + c = 0
- btScalar a2 = a * a;
- btScalar q = (a2 - 3 * b) / 9;
- if (q < 0)
- q = eps;
- btScalar r = (a * (2 * a2 - 9 * b) + 27 * c) / 54;
- // equation x^3 + q*x + r = 0
- btScalar r2 = r * r;
- btScalar q3 = q * q * q;
- btScalar A, B;
- if (r2 <= (q3 + eps)) { //<<-- FIXED!
- btScalar t = r / sqrt(q3);
- if (t < -1)
- t = -1;
- if (t > 1)
- t = 1;
- t = acos(t);
- a /= 3;
- q = -2 * sqrt(q);
- x[0] = q * cos(t / 3) - a;
- x[1] = q * cos((t + TwoPi) / 3) - a;
- x[2] = q * cos((t - TwoPi) / 3) - a;
- return (3);
- }
- else {
- //A =-pow(fabs(r)+sqrt(r2-q3),1./3);
- A = -root3(fabs(r) + sqrt(r2 - q3));
- if (r < 0)
- A = -A;
- B = (A == 0 ? 0 : q / A);
-
- a /= 3;
- x[0] = (A + B) - a;
- x[1] = -0.5 * (A + B) - a;
- x[2] = 0.5 * sqrt(3.) * (A - B);
- if (fabs(x[2]) < eps) {
- x[2] = x[1];
- return (2);
- }
- return (1);
- }
-} // SolveP3(btScalar *x,btScalar a,btScalar b,btScalar c) {
+{ // solve cubic equation x^3 + a*x^2 + b*x + c = 0
+ btScalar a2 = a * a;
+ btScalar q = (a2 - 3 * b) / 9;
+ if (q < 0)
+ q = eps;
+ btScalar r = (a * (2 * a2 - 9 * b) + 27 * c) / 54;
+ // equation x^3 + q*x + r = 0
+ btScalar r2 = r * r;
+ btScalar q3 = q * q * q;
+ btScalar A, B;
+ if (r2 <= (q3 + eps))
+ { //<<-- FIXED!
+ btScalar t = r / sqrt(q3);
+ if (t < -1)
+ t = -1;
+ if (t > 1)
+ t = 1;
+ t = acos(t);
+ a /= 3;
+ q = -2 * sqrt(q);
+ x[0] = q * cos(t / 3) - a;
+ x[1] = q * cos((t + TwoPi) / 3) - a;
+ x[2] = q * cos((t - TwoPi) / 3) - a;
+ return (3);
+ }
+ else
+ {
+ //A =-pow(fabs(r)+sqrt(r2-q3),1./3);
+ A = -root3(fabs(r) + sqrt(r2 - q3));
+ if (r < 0)
+ A = -A;
+ B = (A == 0 ? 0 : q / A);
+
+ a /= 3;
+ x[0] = (A + B) - a;
+ x[1] = -0.5 * (A + B) - a;
+ x[2] = 0.5 * sqrt(3.) * (A - B);
+ if (fabs(x[2]) < eps)
+ {
+ x[2] = x[1];
+ return (2);
+ }
+ return (1);
+ }
+} // SolveP3(btScalar *x,btScalar a,btScalar b,btScalar c) {
//---------------------------------------------------------------------------
// a>=0!
-void CSqrt(btScalar x, btScalar y, btScalar& a, btScalar& b) // returns: a+i*s = sqrt(x+i*y)
+void CSqrt(btScalar x, btScalar y, btScalar& a, btScalar& b) // returns: a+i*s = sqrt(x+i*y)
{
- btScalar r = sqrt(x * x + y * y);
- if (y == 0) {
- r = sqrt(r);
- if (x >= 0) {
- a = r;
- b = 0;
- }
- else {
- a = 0;
- b = r;
- }
- }
- else { // y != 0
- a = sqrt(0.5 * (x + r));
- b = 0.5 * y / a;
- }
+ btScalar r = sqrt(x * x + y * y);
+ if (y == 0)
+ {
+ r = sqrt(r);
+ if (x >= 0)
+ {
+ a = r;
+ b = 0;
+ }
+ else
+ {
+ a = 0;
+ b = r;
+ }
+ }
+ else
+ { // y != 0
+ a = sqrt(0.5 * (x + r));
+ b = 0.5 * y / a;
+ }
}
//---------------------------------------------------------------------------
-int SolveP4Bi(btScalar* x, btScalar b, btScalar d) // solve equation x^4 + b*x^2 + d = 0
+int SolveP4Bi(btScalar* x, btScalar b, btScalar d) // solve equation x^4 + b*x^2 + d = 0
{
- btScalar D = b * b - 4 * d;
- if (D >= 0) {
- btScalar sD = sqrt(D);
- btScalar x1 = (-b + sD) / 2;
- btScalar x2 = (-b - sD) / 2; // x2 <= x1
- if (x2 >= 0) // 0 <= x2 <= x1, 4 real roots
- {
- btScalar sx1 = sqrt(x1);
- btScalar sx2 = sqrt(x2);
- x[0] = -sx1;
- x[1] = sx1;
- x[2] = -sx2;
- x[3] = sx2;
- return 4;
- }
- if (x1 < 0) // x2 <= x1 < 0, two pair of imaginary roots
- {
- btScalar sx1 = sqrt(-x1);
- btScalar sx2 = sqrt(-x2);
- x[0] = 0;
- x[1] = sx1;
- x[2] = 0;
- x[3] = sx2;
- return 0;
- }
- // now x2 < 0 <= x1 , two real roots and one pair of imginary root
- btScalar sx1 = sqrt(x1);
- btScalar sx2 = sqrt(-x2);
- x[0] = -sx1;
- x[1] = sx1;
- x[2] = 0;
- x[3] = sx2;
- return 2;
- }
- else { // if( D < 0 ), two pair of compex roots
- btScalar sD2 = 0.5 * sqrt(-D);
- CSqrt(-0.5 * b, sD2, x[0], x[1]);
- CSqrt(-0.5 * b, -sD2, x[2], x[3]);
- return 0;
- } // if( D>=0 )
-} // SolveP4Bi(btScalar *x, btScalar b, btScalar d) // solve equation x^4 + b*x^2 d
+ btScalar D = b * b - 4 * d;
+ if (D >= 0)
+ {
+ btScalar sD = sqrt(D);
+ btScalar x1 = (-b + sD) / 2;
+ btScalar x2 = (-b - sD) / 2; // x2 <= x1
+ if (x2 >= 0) // 0 <= x2 <= x1, 4 real roots
+ {
+ btScalar sx1 = sqrt(x1);
+ btScalar sx2 = sqrt(x2);
+ x[0] = -sx1;
+ x[1] = sx1;
+ x[2] = -sx2;
+ x[3] = sx2;
+ return 4;
+ }
+ if (x1 < 0) // x2 <= x1 < 0, two pair of imaginary roots
+ {
+ btScalar sx1 = sqrt(-x1);
+ btScalar sx2 = sqrt(-x2);
+ x[0] = 0;
+ x[1] = sx1;
+ x[2] = 0;
+ x[3] = sx2;
+ return 0;
+ }
+ // now x2 < 0 <= x1 , two real roots and one pair of imginary root
+ btScalar sx1 = sqrt(x1);
+ btScalar sx2 = sqrt(-x2);
+ x[0] = -sx1;
+ x[1] = sx1;
+ x[2] = 0;
+ x[3] = sx2;
+ return 2;
+ }
+ else
+ { // if( D < 0 ), two pair of compex roots
+ btScalar sD2 = 0.5 * sqrt(-D);
+ CSqrt(-0.5 * b, sD2, x[0], x[1]);
+ CSqrt(-0.5 * b, -sD2, x[2], x[3]);
+ return 0;
+ } // if( D>=0 )
+} // SolveP4Bi(btScalar *x, btScalar b, btScalar d) // solve equation x^4 + b*x^2 d
//---------------------------------------------------------------------------
#define SWAP(a, b) \
-{ \
-t = b; \
-b = a; \
-a = t; \
-}
-static void dblSort3(btScalar& a, btScalar& b, btScalar& c) // make: a <= b <= c
+ { \
+ t = b; \
+ b = a; \
+ a = t; \
+ }
+static void dblSort3(btScalar& a, btScalar& b, btScalar& c) // make: a <= b <= c
{
- btScalar t;
- if (a > b)
- SWAP(a, b); // now a<=b
- if (c < b) {
- SWAP(b, c); // now a<=b, b<=c
- if (a > b)
- SWAP(a, b); // now a<=b
- }
+ btScalar t;
+ if (a > b)
+ SWAP(a, b); // now a<=b
+ if (c < b)
+ {
+ SWAP(b, c); // now a<=b, b<=c
+ if (a > b)
+ SWAP(a, b); // now a<=b
+ }
}
//---------------------------------------------------------------------------
-int SolveP4De(btScalar* x, btScalar b, btScalar c, btScalar d) // solve equation x^4 + b*x^2 + c*x + d
+int SolveP4De(btScalar* x, btScalar b, btScalar c, btScalar d) // solve equation x^4 + b*x^2 + c*x + d
{
- //if( c==0 ) return SolveP4Bi(x,b,d); // After that, c!=0
- if (fabs(c) < 1e-14 * (fabs(b) + fabs(d)))
- return SolveP4Bi(x, b, d); // After that, c!=0
-
- int res3 = SolveP3(x, 2 * b, b * b - 4 * d, -c * c); // solve resolvent
- // by Viet theorem: x1*x2*x3=-c*c not equals to 0, so x1!=0, x2!=0, x3!=0
- if (res3 > 1) // 3 real roots,
- {
- dblSort3(x[0], x[1], x[2]); // sort roots to x[0] <= x[1] <= x[2]
- // Note: x[0]*x[1]*x[2]= c*c > 0
- if (x[0] > 0) // all roots are positive
- {
- btScalar sz1 = sqrt(x[0]);
- btScalar sz2 = sqrt(x[1]);
- btScalar sz3 = sqrt(x[2]);
- // Note: sz1*sz2*sz3= -c (and not equal to 0)
- if (c > 0) {
- x[0] = (-sz1 - sz2 - sz3) / 2;
- x[1] = (-sz1 + sz2 + sz3) / 2;
- x[2] = (+sz1 - sz2 + sz3) / 2;
- x[3] = (+sz1 + sz2 - sz3) / 2;
- return 4;
- }
- // now: c<0
- x[0] = (-sz1 - sz2 + sz3) / 2;
- x[1] = (-sz1 + sz2 - sz3) / 2;
- x[2] = (+sz1 - sz2 - sz3) / 2;
- x[3] = (+sz1 + sz2 + sz3) / 2;
- return 4;
- } // if( x[0] > 0) // all roots are positive
- // now x[0] <= x[1] < 0, x[2] > 0
- // two pair of comlex roots
- btScalar sz1 = sqrt(-x[0]);
- btScalar sz2 = sqrt(-x[1]);
- btScalar sz3 = sqrt(x[2]);
-
- if (c > 0) // sign = -1
- {
- x[0] = -sz3 / 2;
- x[1] = (sz1 - sz2) / 2; // x[0]i*x[1]
- x[2] = sz3 / 2;
- x[3] = (-sz1 - sz2) / 2; // x[2]i*x[3]
- return 0;
- }
- // now: c<0 , sign = +1
- x[0] = sz3 / 2;
- x[1] = (-sz1 + sz2) / 2;
- x[2] = -sz3 / 2;
- x[3] = (sz1 + sz2) / 2;
- return 0;
- } // if( res3>1 ) // 3 real roots,
- // now resoventa have 1 real and pair of compex roots
- // x[0] - real root, and x[0]>0,
- // x[1]i*x[2] - complex roots,
- // x[0] must be >=0. But one times x[0]=~ 1e-17, so:
- if (x[0] < 0)
- x[0] = 0;
- btScalar sz1 = sqrt(x[0]);
- btScalar szr, szi;
- CSqrt(x[1], x[2], szr, szi); // (szr+i*szi)^2 = x[1]+i*x[2]
- if (c > 0) // sign = -1
- {
- x[0] = -sz1 / 2 - szr; // 1st real root
- x[1] = -sz1 / 2 + szr; // 2nd real root
- x[2] = sz1 / 2;
- x[3] = szi;
- return 2;
- }
- // now: c<0 , sign = +1
- x[0] = sz1 / 2 - szr; // 1st real root
- x[1] = sz1 / 2 + szr; // 2nd real root
- x[2] = -sz1 / 2;
- x[3] = szi;
- return 2;
-} // SolveP4De(btScalar *x, btScalar b, btScalar c, btScalar d) // solve equation x^4 + b*x^2 + c*x + d
+ //if( c==0 ) return SolveP4Bi(x,b,d); // After that, c!=0
+ if (fabs(c) < 1e-14 * (fabs(b) + fabs(d)))
+ return SolveP4Bi(x, b, d); // After that, c!=0
+
+ int res3 = SolveP3(x, 2 * b, b * b - 4 * d, -c * c); // solve resolvent
+ // by Viet theorem: x1*x2*x3=-c*c not equals to 0, so x1!=0, x2!=0, x3!=0
+ if (res3 > 1) // 3 real roots,
+ {
+ dblSort3(x[0], x[1], x[2]); // sort roots to x[0] <= x[1] <= x[2]
+ // Note: x[0]*x[1]*x[2]= c*c > 0
+ if (x[0] > 0) // all roots are positive
+ {
+ btScalar sz1 = sqrt(x[0]);
+ btScalar sz2 = sqrt(x[1]);
+ btScalar sz3 = sqrt(x[2]);
+ // Note: sz1*sz2*sz3= -c (and not equal to 0)
+ if (c > 0)
+ {
+ x[0] = (-sz1 - sz2 - sz3) / 2;
+ x[1] = (-sz1 + sz2 + sz3) / 2;
+ x[2] = (+sz1 - sz2 + sz3) / 2;
+ x[3] = (+sz1 + sz2 - sz3) / 2;
+ return 4;
+ }
+ // now: c<0
+ x[0] = (-sz1 - sz2 + sz3) / 2;
+ x[1] = (-sz1 + sz2 - sz3) / 2;
+ x[2] = (+sz1 - sz2 - sz3) / 2;
+ x[3] = (+sz1 + sz2 + sz3) / 2;
+ return 4;
+ } // if( x[0] > 0) // all roots are positive
+ // now x[0] <= x[1] < 0, x[2] > 0
+ // two pair of comlex roots
+ btScalar sz1 = sqrt(-x[0]);
+ btScalar sz2 = sqrt(-x[1]);
+ btScalar sz3 = sqrt(x[2]);
+
+ if (c > 0) // sign = -1
+ {
+ x[0] = -sz3 / 2;
+ x[1] = (sz1 - sz2) / 2; // x[0]i*x[1]
+ x[2] = sz3 / 2;
+ x[3] = (-sz1 - sz2) / 2; // x[2]i*x[3]
+ return 0;
+ }
+ // now: c<0 , sign = +1
+ x[0] = sz3 / 2;
+ x[1] = (-sz1 + sz2) / 2;
+ x[2] = -sz3 / 2;
+ x[3] = (sz1 + sz2) / 2;
+ return 0;
+ } // if( res3>1 ) // 3 real roots,
+ // now resoventa have 1 real and pair of compex roots
+ // x[0] - real root, and x[0]>0,
+ // x[1]i*x[2] - complex roots,
+ // x[0] must be >=0. But one times x[0]=~ 1e-17, so:
+ if (x[0] < 0)
+ x[0] = 0;
+ btScalar sz1 = sqrt(x[0]);
+ btScalar szr, szi;
+ CSqrt(x[1], x[2], szr, szi); // (szr+i*szi)^2 = x[1]+i*x[2]
+ if (c > 0) // sign = -1
+ {
+ x[0] = -sz1 / 2 - szr; // 1st real root
+ x[1] = -sz1 / 2 + szr; // 2nd real root
+ x[2] = sz1 / 2;
+ x[3] = szi;
+ return 2;
+ }
+ // now: c<0 , sign = +1
+ x[0] = sz1 / 2 - szr; // 1st real root
+ x[1] = sz1 / 2 + szr; // 2nd real root
+ x[2] = -sz1 / 2;
+ x[3] = szi;
+ return 2;
+} // SolveP4De(btScalar *x, btScalar b, btScalar c, btScalar d) // solve equation x^4 + b*x^2 + c*x + d
//-----------------------------------------------------------------------------
-btScalar N4Step(btScalar x, btScalar a, btScalar b, btScalar c, btScalar d) // one Newton step for x^4 + a*x^3 + b*x^2 + c*x + d
+btScalar N4Step(btScalar x, btScalar a, btScalar b, btScalar c, btScalar d) // one Newton step for x^4 + a*x^3 + b*x^2 + c*x + d
{
- btScalar fxs = ((4 * x + 3 * a) * x + 2 * b) * x + c; // f'(x)
- if (fxs == 0)
- return x; //return 1e99; <<-- FIXED!
- btScalar fx = (((x + a) * x + b) * x + c) * x + d; // f(x)
- return x - fx / fxs;
+ btScalar fxs = ((4 * x + 3 * a) * x + 2 * b) * x + c; // f'(x)
+ if (fxs == 0)
+ return x; //return 1e99; <<-- FIXED!
+ btScalar fx = (((x + a) * x + b) * x + c) * x + d; // f(x)
+ return x - fx / fxs;
}
//-----------------------------------------------------------------------------
// x - array of size 4
@@ -284,136 +298,150 @@ btScalar N4Step(btScalar x, btScalar a, btScalar b, btScalar c, btScalar d) // o
// return 2: 2 real roots x[0], x[1] and complex x[2]i*x[3],
// return 0: two pair of complex roots: x[0]i*x[1], x[2]i*x[3],
int SolveP4(btScalar* x, btScalar a, btScalar b, btScalar c, btScalar d)
-{ // solve equation x^4 + a*x^3 + b*x^2 + c*x + d by Dekart-Euler method
- // move to a=0:
- btScalar d1 = d + 0.25 * a * (0.25 * b * a - 3. / 64 * a * a * a - c);
- btScalar c1 = c + 0.5 * a * (0.25 * a * a - b);
- btScalar b1 = b - 0.375 * a * a;
- int res = SolveP4De(x, b1, c1, d1);
- if (res == 4) {
- x[0] -= a / 4;
- x[1] -= a / 4;
- x[2] -= a / 4;
- x[3] -= a / 4;
- }
- else if (res == 2) {
- x[0] -= a / 4;
- x[1] -= a / 4;
- x[2] -= a / 4;
- }
- else {
- x[0] -= a / 4;
- x[2] -= a / 4;
- }
- // one Newton step for each real root:
- if (res > 0) {
- x[0] = N4Step(x[0], a, b, c, d);
- x[1] = N4Step(x[1], a, b, c, d);
- }
- if (res > 2) {
- x[2] = N4Step(x[2], a, b, c, d);
- x[3] = N4Step(x[3], a, b, c, d);
- }
- return res;
+{ // solve equation x^4 + a*x^3 + b*x^2 + c*x + d by Dekart-Euler method
+ // move to a=0:
+ btScalar d1 = d + 0.25 * a * (0.25 * b * a - 3. / 64 * a * a * a - c);
+ btScalar c1 = c + 0.5 * a * (0.25 * a * a - b);
+ btScalar b1 = b - 0.375 * a * a;
+ int res = SolveP4De(x, b1, c1, d1);
+ if (res == 4)
+ {
+ x[0] -= a / 4;
+ x[1] -= a / 4;
+ x[2] -= a / 4;
+ x[3] -= a / 4;
+ }
+ else if (res == 2)
+ {
+ x[0] -= a / 4;
+ x[1] -= a / 4;
+ x[2] -= a / 4;
+ }
+ else
+ {
+ x[0] -= a / 4;
+ x[2] -= a / 4;
+ }
+ // one Newton step for each real root:
+ if (res > 0)
+ {
+ x[0] = N4Step(x[0], a, b, c, d);
+ x[1] = N4Step(x[1], a, b, c, d);
+ }
+ if (res > 2)
+ {
+ x[2] = N4Step(x[2], a, b, c, d);
+ x[3] = N4Step(x[3], a, b, c, d);
+ }
+ return res;
}
//-----------------------------------------------------------------------------
#define F5(t) (((((t + a) * t + b) * t + c) * t + d) * t + e)
//-----------------------------------------------------------------------------
-btScalar SolveP5_1(btScalar a, btScalar b, btScalar c, btScalar d, btScalar e) // return real root of x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0
+btScalar SolveP5_1(btScalar a, btScalar b, btScalar c, btScalar d, btScalar e) // return real root of x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0
{
- int cnt;
- if (fabs(e) < eps)
- return 0;
-
- btScalar brd = fabs(a); // brd - border of real roots
- if (fabs(b) > brd)
- brd = fabs(b);
- if (fabs(c) > brd)
- brd = fabs(c);
- if (fabs(d) > brd)
- brd = fabs(d);
- if (fabs(e) > brd)
- brd = fabs(e);
- brd++; // brd - border of real roots
-
- btScalar x0, f0; // less than root
- btScalar x1, f1; // greater than root
- btScalar x2, f2, f2s; // next values, f(x2), f'(x2)
- btScalar dx = 0;
-
- if (e < 0) {
- x0 = 0;
- x1 = brd;
- f0 = e;
- f1 = F5(x1);
- x2 = 0.01 * brd;
- } // positive root
- else {
- x0 = -brd;
- x1 = 0;
- f0 = F5(x0);
- f1 = e;
- x2 = -0.01 * brd;
- } // negative root
-
- if (fabs(f0) < eps)
- return x0;
- if (fabs(f1) < eps)
- return x1;
-
- // now x0<x1, f(x0)<0, f(x1)>0
- // Firstly 10 bisections
- for (cnt = 0; cnt < 10; cnt++) {
- x2 = (x0 + x1) / 2; // next point
- //x2 = x0 - f0*(x1 - x0) / (f1 - f0); // next point
- f2 = F5(x2); // f(x2)
- if (fabs(f2) < eps)
- return x2;
- if (f2 > 0) {
- x1 = x2;
- f1 = f2;
- }
- else {
- x0 = x2;
- f0 = f2;
- }
- }
-
- // At each step:
- // x0<x1, f(x0)<0, f(x1)>0.
- // x2 - next value
- // we hope that x0 < x2 < x1, but not necessarily
- do {
- if (cnt++ > 50)
- break;
- if (x2 <= x0 || x2 >= x1)
- x2 = (x0 + x1) / 2; // now x0 < x2 < x1
- f2 = F5(x2); // f(x2)
- if (fabs(f2) < eps)
- return x2;
- if (f2 > 0) {
- x1 = x2;
- f1 = f2;
- }
- else {
- x0 = x2;
- f0 = f2;
- }
- f2s = (((5 * x2 + 4 * a) * x2 + 3 * b) * x2 + 2 * c) * x2 + d; // f'(x2)
- if (fabs(f2s) < eps) {
- x2 = 1e99;
- continue;
- }
- dx = f2 / f2s;
- x2 -= dx;
- } while (fabs(dx) > eps);
- return x2;
-} // SolveP5_1(btScalar a,btScalar b,btScalar c,btScalar d,btScalar e) // return real root of x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0
+ int cnt;
+ if (fabs(e) < eps)
+ return 0;
+
+ btScalar brd = fabs(a); // brd - border of real roots
+ if (fabs(b) > brd)
+ brd = fabs(b);
+ if (fabs(c) > brd)
+ brd = fabs(c);
+ if (fabs(d) > brd)
+ brd = fabs(d);
+ if (fabs(e) > brd)
+ brd = fabs(e);
+ brd++; // brd - border of real roots
+
+ btScalar x0, f0; // less than root
+ btScalar x1, f1; // greater than root
+ btScalar x2, f2, f2s; // next values, f(x2), f'(x2)
+ btScalar dx = 0;
+
+ if (e < 0)
+ {
+ x0 = 0;
+ x1 = brd;
+ f0 = e;
+ f1 = F5(x1);
+ x2 = 0.01 * brd;
+ } // positive root
+ else
+ {
+ x0 = -brd;
+ x1 = 0;
+ f0 = F5(x0);
+ f1 = e;
+ x2 = -0.01 * brd;
+ } // negative root
+
+ if (fabs(f0) < eps)
+ return x0;
+ if (fabs(f1) < eps)
+ return x1;
+
+ // now x0<x1, f(x0)<0, f(x1)>0
+ // Firstly 10 bisections
+ for (cnt = 0; cnt < 10; cnt++)
+ {
+ x2 = (x0 + x1) / 2; // next point
+ //x2 = x0 - f0*(x1 - x0) / (f1 - f0); // next point
+ f2 = F5(x2); // f(x2)
+ if (fabs(f2) < eps)
+ return x2;
+ if (f2 > 0)
+ {
+ x1 = x2;
+ f1 = f2;
+ }
+ else
+ {
+ x0 = x2;
+ f0 = f2;
+ }
+ }
+
+ // At each step:
+ // x0<x1, f(x0)<0, f(x1)>0.
+ // x2 - next value
+ // we hope that x0 < x2 < x1, but not necessarily
+ do
+ {
+ if (cnt++ > 50)
+ break;
+ if (x2 <= x0 || x2 >= x1)
+ x2 = (x0 + x1) / 2; // now x0 < x2 < x1
+ f2 = F5(x2); // f(x2)
+ if (fabs(f2) < eps)
+ return x2;
+ if (f2 > 0)
+ {
+ x1 = x2;
+ f1 = f2;
+ }
+ else
+ {
+ x0 = x2;
+ f0 = f2;
+ }
+ f2s = (((5 * x2 + 4 * a) * x2 + 3 * b) * x2 + 2 * c) * x2 + d; // f'(x2)
+ if (fabs(f2s) < eps)
+ {
+ x2 = 1e99;
+ continue;
+ }
+ dx = f2 / f2s;
+ x2 -= dx;
+ } while (fabs(dx) > eps);
+ return x2;
+} // SolveP5_1(btScalar a,btScalar b,btScalar c,btScalar d,btScalar e) // return real root of x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0
//-----------------------------------------------------------------------------
-int SolveP5(btScalar* x, btScalar a, btScalar b, btScalar c, btScalar d, btScalar e) // solve equation x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0
+int SolveP5(btScalar* x, btScalar a, btScalar b, btScalar c, btScalar d, btScalar e) // solve equation x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0
{
- btScalar r = x[0] = SolveP5_1(a, b, c, d, e);
- btScalar a1 = a + r, b1 = b + r * a1, c1 = c + r * b1, d1 = d + r * c1;
- return 1 + SolveP4(x + 1, a1, b1, c1, d1);
-} // SolveP5(btScalar *x,btScalar a,btScalar b,btScalar c,btScalar d,btScalar e) // solve equation x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0
+ btScalar r = x[0] = SolveP5_1(a, b, c, d, e);
+ btScalar a1 = a + r, b1 = b + r * a1, c1 = c + r * b1, d1 = d + r * c1;
+ return 1 + SolveP4(x + 1, a1, b1, c1, d1);
+} // SolveP5(btScalar *x,btScalar a,btScalar b,btScalar c,btScalar d,btScalar e) // solve equation x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0
//-----------------------------------------------------------------------------
diff --git a/thirdparty/bullet/BulletSoftBody/poly34.h b/thirdparty/bullet/BulletSoftBody/poly34.h
index 32ad5d7da5..35a52c5fec 100644
--- a/thirdparty/bullet/BulletSoftBody/poly34.h
+++ b/thirdparty/bullet/BulletSoftBody/poly34.h
@@ -8,31 +8,31 @@
// x - array of size 2
// return 2: 2 real roots x[0], x[1]
// return 0: pair of complex roots: x[0]i*x[1]
-int SolveP2(btScalar* x, btScalar a, btScalar b); // solve equation x^2 + a*x + b = 0
+int SolveP2(btScalar* x, btScalar a, btScalar b); // solve equation x^2 + a*x + b = 0
// x - array of size 3
// return 3: 3 real roots x[0], x[1], x[2]
// return 1: 1 real root x[0] and pair of complex roots: x[1]i*x[2]
-int SolveP3(btScalar* x, btScalar a, btScalar b, btScalar c); // solve cubic equation x^3 + a*x^2 + b*x + c = 0
+int SolveP3(btScalar* x, btScalar a, btScalar b, btScalar c); // solve cubic equation x^3 + a*x^2 + b*x + c = 0
// x - array of size 4
// return 4: 4 real roots x[0], x[1], x[2], x[3], possible multiple roots
// return 2: 2 real roots x[0], x[1] and complex x[2]i*x[3],
// return 0: two pair of complex roots: x[0]i*x[1], x[2]i*x[3],
-int SolveP4(btScalar* x, btScalar a, btScalar b, btScalar c, btScalar d); // solve equation x^4 + a*x^3 + b*x^2 + c*x + d = 0 by Dekart-Euler method
+int SolveP4(btScalar* x, btScalar a, btScalar b, btScalar c, btScalar d); // solve equation x^4 + a*x^3 + b*x^2 + c*x + d = 0 by Dekart-Euler method
// x - array of size 5
// return 5: 5 real roots x[0], x[1], x[2], x[3], x[4], possible multiple roots
// return 3: 3 real roots x[0], x[1], x[2] and complex x[3]i*x[4],
// return 1: 1 real root x[0] and two pair of complex roots: x[1]i*x[2], x[3]i*x[4],
-int SolveP5(btScalar* x, btScalar a, btScalar b, btScalar c, btScalar d, btScalar e); // solve equation x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0
+int SolveP5(btScalar* x, btScalar a, btScalar b, btScalar c, btScalar d, btScalar e); // solve equation x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0
//-----------------------------------------------------------------------------
// And some additional functions for internal use.
// Your may remove this definitions from here
-int SolveP4Bi(btScalar* x, btScalar b, btScalar d); // solve equation x^4 + b*x^2 + d = 0
-int SolveP4De(btScalar* x, btScalar b, btScalar c, btScalar d); // solve equation x^4 + b*x^2 + c*x + d = 0
-void CSqrt(btScalar x, btScalar y, btScalar& a, btScalar& b); // returns as a+i*s, sqrt(x+i*y)
-btScalar N4Step(btScalar x, btScalar a, btScalar b, btScalar c, btScalar d); // one Newton step for x^4 + a*x^3 + b*x^2 + c*x + d
-btScalar SolveP5_1(btScalar a, btScalar b, btScalar c, btScalar d, btScalar e); // return real root of x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0
+int SolveP4Bi(btScalar* x, btScalar b, btScalar d); // solve equation x^4 + b*x^2 + d = 0
+int SolveP4De(btScalar* x, btScalar b, btScalar c, btScalar d); // solve equation x^4 + b*x^2 + c*x + d = 0
+void CSqrt(btScalar x, btScalar y, btScalar& a, btScalar& b); // returns as a+i*s, sqrt(x+i*y)
+btScalar N4Step(btScalar x, btScalar a, btScalar b, btScalar c, btScalar d); // one Newton step for x^4 + a*x^3 + b*x^2 + c*x + d
+btScalar SolveP5_1(btScalar a, btScalar b, btScalar c, btScalar d, btScalar e); // return real root of x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0
#endif
diff --git a/thirdparty/bullet/LinearMath/btAlignedAllocator.cpp b/thirdparty/bullet/LinearMath/btAlignedAllocator.cpp
index 39b302b600..be8f8aa6d0 100644
--- a/thirdparty/bullet/LinearMath/btAlignedAllocator.cpp
+++ b/thirdparty/bullet/LinearMath/btAlignedAllocator.cpp
@@ -138,7 +138,7 @@ struct btDebugPtrMagic
};
};
-void *btAlignedAllocInternal(size_t size, int alignment, int line, char *filename)
+void *btAlignedAllocInternal(size_t size, int alignment, int line, const char *filename)
{
if (size == 0)
{
@@ -195,7 +195,7 @@ void *btAlignedAllocInternal(size_t size, int alignment, int line, char *filenam
return (ret);
}
-void btAlignedFreeInternal(void *ptr, int line, char *filename)
+void btAlignedFreeInternal(void *ptr, int line, const char *filename)
{
void *real;
diff --git a/thirdparty/bullet/LinearMath/btAlignedAllocator.h b/thirdparty/bullet/LinearMath/btAlignedAllocator.h
index ce4d3585f1..971f62bfb0 100644
--- a/thirdparty/bullet/LinearMath/btAlignedAllocator.h
+++ b/thirdparty/bullet/LinearMath/btAlignedAllocator.h
@@ -35,9 +35,9 @@ int btDumpMemoryLeaks();
#define btAlignedFree(ptr) \
btAlignedFreeInternal(ptr, __LINE__, __FILE__)
-void* btAlignedAllocInternal(size_t size, int alignment, int line, char* filename);
+void* btAlignedAllocInternal(size_t size, int alignment, int line, const char* filename);
-void btAlignedFreeInternal(void* ptr, int line, char* filename);
+void btAlignedFreeInternal(void* ptr, int line, const char* filename);
#else
void* btAlignedAllocInternal(size_t size, int alignment);
diff --git a/thirdparty/bullet/LinearMath/btConvexHullComputer.cpp b/thirdparty/bullet/LinearMath/btConvexHullComputer.cpp
index 8bbfdc5f25..12125fd2de 100644
--- a/thirdparty/bullet/LinearMath/btConvexHullComputer.cpp
+++ b/thirdparty/bullet/LinearMath/btConvexHullComputer.cpp
@@ -105,7 +105,7 @@ public:
Point64 cross(const Point32& b) const
{
- return Point64(y * b.z - z * b.y, z * b.x - x * b.z, x * b.y - y * b.x);
+ return Point64(((int64_t)y) * b.z - ((int64_t)z) * b.y, ((int64_t)z) * b.x - ((int64_t)x) * b.z, ((int64_t)x) * b.y - ((int64_t)y) * b.x);
}
Point64 cross(const Point64& b) const
@@ -115,7 +115,7 @@ public:
int64_t dot(const Point32& b) const
{
- return x * b.x + y * b.y + z * b.z;
+ return ((int64_t)x) * b.x + ((int64_t)y) * b.y + ((int64_t)z) * b.z;
}
int64_t dot(const Point64& b) const
@@ -2673,6 +2673,7 @@ btScalar btConvexHullComputer::compute(const void* coords, bool doubleCoords, in
}
vertices.resize(0);
+ original_vertex_index.resize(0);
edges.resize(0);
faces.resize(0);
@@ -2683,6 +2684,7 @@ btScalar btConvexHullComputer::compute(const void* coords, bool doubleCoords, in
{
btConvexHullInternal::Vertex* v = oldVertices[copied];
vertices.push_back(hull.getCoordinates(v));
+ original_vertex_index.push_back(v->point.index);
btConvexHullInternal::Edge* firstEdge = v->edges;
if (firstEdge)
{
diff --git a/thirdparty/bullet/LinearMath/btConvexHullComputer.h b/thirdparty/bullet/LinearMath/btConvexHullComputer.h
index cba684f2dc..18b26eea9a 100644
--- a/thirdparty/bullet/LinearMath/btConvexHullComputer.h
+++ b/thirdparty/bullet/LinearMath/btConvexHullComputer.h
@@ -66,6 +66,9 @@ public:
// Vertices of the output hull
btAlignedObjectArray<btVector3> vertices;
+ // The original vertex index in the input coords array
+ btAlignedObjectArray<int> original_vertex_index;
+
// Edges of the output hull
btAlignedObjectArray<Edge> edges;
diff --git a/thirdparty/bullet/LinearMath/btReducedVector.h b/thirdparty/bullet/LinearMath/btReducedVector.h
index 83b5e581e5..313a4271f0 100644
--- a/thirdparty/bullet/LinearMath/btReducedVector.h
+++ b/thirdparty/bullet/LinearMath/btReducedVector.h
@@ -267,7 +267,7 @@ public:
std::sort(tuples.begin(), tuples.end());
btAlignedObjectArray<int> new_indices;
btAlignedObjectArray<btVector3> new_vecs;
- for (int i = 0; i < tuples.size(); ++i)
+ for (size_t i = 0; i < tuples.size(); ++i)
{
new_indices.push_back(tuples[i].b);
new_vecs.push_back(m_vecs[tuples[i].a]);
diff --git a/thirdparty/bullet/LinearMath/btScalar.h b/thirdparty/bullet/LinearMath/btScalar.h
index 86d94e8974..36b90cc944 100644
--- a/thirdparty/bullet/LinearMath/btScalar.h
+++ b/thirdparty/bullet/LinearMath/btScalar.h
@@ -25,7 +25,7 @@ subject to the following restrictions:
#include <float.h>
/* SVN $Revision$ on $Date$ from http://bullet.googlecode.com*/
-#define BT_BULLET_VERSION 289
+#define BT_BULLET_VERSION 307
inline int btGetVersion()
{
diff --git a/thirdparty/bullet/LinearMath/btSerializer.h b/thirdparty/bullet/LinearMath/btSerializer.h
index 2ee712047f..9abcf031d0 100644
--- a/thirdparty/bullet/LinearMath/btSerializer.h
+++ b/thirdparty/bullet/LinearMath/btSerializer.h
@@ -479,9 +479,9 @@ public:
buffer[8] = 'V';
}
- buffer[9] = '2';
- buffer[10] = '8';
- buffer[11] = '9';
+ buffer[9] = '3';
+ buffer[10] = '0';
+ buffer[11] = '7';
}
virtual void startSerialization()
diff --git a/thirdparty/fonts/NotoSansBengali_Regular.ttf b/thirdparty/fonts/NotoSansBengali_Regular.ttf
new file mode 100644
index 0000000000..daeabcf817
--- /dev/null
+++ b/thirdparty/fonts/NotoSansBengali_Regular.ttf
Binary files differ
diff --git a/thirdparty/fonts/NotoSansGeorgian_Regular.ttf b/thirdparty/fonts/NotoSansGeorgian_Regular.ttf
new file mode 100644
index 0000000000..9bfc8d9675
--- /dev/null
+++ b/thirdparty/fonts/NotoSansGeorgian_Regular.ttf
Binary files differ
diff --git a/thirdparty/fonts/NotoSansMalayalamUI_Regular.ttf b/thirdparty/fonts/NotoSansMalayalamUI_Regular.ttf
new file mode 100644
index 0000000000..37f3591706
--- /dev/null
+++ b/thirdparty/fonts/NotoSansMalayalamUI_Regular.ttf
Binary files differ
diff --git a/thirdparty/fonts/NotoSansOriyaUI_Regular.ttf b/thirdparty/fonts/NotoSansOriyaUI_Regular.ttf
new file mode 100644
index 0000000000..7b50a71620
--- /dev/null
+++ b/thirdparty/fonts/NotoSansOriyaUI_Regular.ttf
Binary files differ
diff --git a/thirdparty/fonts/NotoSansSinhalaUI_Regular.ttf b/thirdparty/fonts/NotoSansSinhalaUI_Regular.ttf
new file mode 100644
index 0000000000..a4b297d691
--- /dev/null
+++ b/thirdparty/fonts/NotoSansSinhalaUI_Regular.ttf
Binary files differ
diff --git a/thirdparty/fonts/NotoSansTamilUI_Regular.ttf b/thirdparty/fonts/NotoSansTamilUI_Regular.ttf
new file mode 100644
index 0000000000..e65aeb8d0b
--- /dev/null
+++ b/thirdparty/fonts/NotoSansTamilUI_Regular.ttf
Binary files differ
diff --git a/thirdparty/fonts/NotoSansTeluguUI_Regular.ttf b/thirdparty/fonts/NotoSansTeluguUI_Regular.ttf
new file mode 100644
index 0000000000..5394a28cfe
--- /dev/null
+++ b/thirdparty/fonts/NotoSansTeluguUI_Regular.ttf
Binary files differ
diff --git a/thirdparty/fonts/Tamsyn10x20.png b/thirdparty/fonts/Tamsyn10x20.png
new file mode 100644
index 0000000000..b2d3b5cb5c
--- /dev/null
+++ b/thirdparty/fonts/Tamsyn10x20.png
Binary files differ
diff --git a/thirdparty/fonts/Tamsyn5x9.png b/thirdparty/fonts/Tamsyn5x9.png
new file mode 100644
index 0000000000..ac42b32641
--- /dev/null
+++ b/thirdparty/fonts/Tamsyn5x9.png
Binary files differ
diff --git a/thirdparty/graphite/COPYING b/thirdparty/graphite/COPYING
new file mode 100644
index 0000000000..f6630af533
--- /dev/null
+++ b/thirdparty/graphite/COPYING
@@ -0,0 +1,26 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+ Alternatively, you may use this library under the terms of the Mozilla
+ Public License (http://mozilla.org/MPL) or under the GNU General Public
+ License, as published by the Free Sofware Foundation; either version
+ 2 of the license or (at your option) any later version.
+*/
diff --git a/thirdparty/graphite/ChangeLog b/thirdparty/graphite/ChangeLog
new file mode 100644
index 0000000000..e36110e1c1
--- /dev/null
+++ b/thirdparty/graphite/ChangeLog
@@ -0,0 +1,238 @@
+1.3.14
+ . Bug fixes
+ . Allow features to be hidden (for aliases)
+ . Move to python3
+ . Rename doc files from .txt to .asc
+
+1.3.13
+ . Resolve minor spacing issue in rtl non-overlap kerning
+ . python3 for graphite.py
+ . Better fuzzing
+ . Better building on windows
+
+1.3.12
+ . Graphite no longer does dumb rendering for fonts with no smarts
+ . Segment caching code removed. Anything attempting to use the segment cache gets given a regular face instead
+ . Add libfuzzer support
+ . Builds now require C++11
+ . Improvements to Windows 64 bit builds
+ . Support different versions of python including 32 bit and python 3
+ . Various minor bug fixes
+
+1.3.11
+ . Fixes due to security review
+ . Minor collision avoidance fixes
+ . Fix LZ4 decompressor against high compression
+
+1.3.10
+ . Address floating point build parameters to give consistent positioning results across platforms
+ . Various bug fixes
+
+1.3.9
+ . Add Collision COLL_ISSPACE to allow for visible spaces in collision avoidance
+ . Add segment and pass direction information to tracing output
+ . Bug fix rule length testing in 32-bit
+ . Increase slanted margin distances for collision avoidance
+ . Change kerning algorithm to simple outline expansion. Seems to make no visible difference.
+ . Add trace2svg to test tools
+
+1.3.8
+ . Various bug fixes arising from fuzzing
+ . Fix regression that stopped piglatin from working
+ . Make collision avoidance kerning give more regular results
+ . Minor modification to clustering algorithm to handle variable width chars
+
+1.3.7
+ . Bug fixes
+ . Start to deprecate SegCache. This will be going away in a later release.
+
+1.3.6
+ . Bug fixes
+
+1.3.5
+ . Bug fixes
+ . Security bug fix
+ . Fix ARM misalignment problem
+ . Track latest cmake
+
+1.3.4
+ . Transition from Mercurial to Git
+ . Bug fixes
+ . Fix Collision Kerning ignoring some diacritics
+ . Handle pass bits 16-31 to speed up fonts with > 16 passes
+ . Various minor fuzz bug fixes
+ . Make Coverity happy
+ . Add GR_FALLTHROUGH macro for clang c++11
+
+1.3.3
+ . Slight speed up in Collision Avoidance
+ . Remove dead bidi code
+ . Bug fixes
+ . Between pass bidi reorderings and at the end
+ . Decompressor fuzz bugs
+ . Other fuzz bugs
+
+1.3.2
+ . Remove full bidi. All segments are assumed to be single directioned.
+ . Bug fixes:
+ . Decompressor corner cases
+ . Various fuzz bugs
+
+1.3.1
+ . Deprecation warning: Full bidi support is about to be deprecated. Make contact
+ if this impacts you.
+ . Change compression block format slightly to conform to LZ4
+ . Bug fixes:
+ . Handle mono direction text with diacritics consistently. Fonts
+ now see the direction they expect consistently and bidi now
+ gives expected results.
+ . Fixed lots of fuzz bugs
+ . Coverity cleanups
+ . Build now works for clang and/or asan and/or afl etc.
+
+1.3.0
+ . Add collision avoidance
+ . Shift Collider
+ . Kern Collider
+ . Octabox outlines and subboxes
+ . Add compressed Silf and Glat table support
+ . Bug fixes:
+ . Stop loops forming in the child, sibling tree
+ . Handle bidi mirroring correctly if no bidi occurring
+
+1.2.4
+ . Face failure now has error code reporting via debug logging
+ . can now call gr_start_logging(NULL, fname)
+ . gr2fonttest --alltrace added
+ . Format 14 table support
+ . Not done. To be handled entirely in the compiler
+ . Bidi support for Unicode 6.3 Isolating direction controls
+ . Fonts no longer require a glyf/loca table. In such cases the bounding box is always 0.
+ . Clang ASAN build support added for testing.
+ . Handle out of memory sanely.
+ . Documentation improvements
+ . Bug fixes:
+ . Enforce fonts having to store glyph attributes by monotonically increasing attribute number
+ . zeropadding was not getting called on feature tags
+ . automatic associations for unassociated characters
+ . use direct engine on Mac
+ . various extreme case reading 1 past the end errors fixed
+ . remove tabs from sources so that it becomes readable again
+
+1.2.3
+ . Bug fixes only:
+ . fix byte swapping when testing cmap subtable lengths
+ . work around armel compilation problems with conditional operators
+ . fix pseudoglyph support for advance and bbox
+
+1.2.2
+ . Add support for passKeySlot (makes Charis 2x faster) up to 32 passes
+ . Add telemetry output to json if enabled in build GRAPHITE2_TELEMETRY
+ . Shrink font memory footprint particularly in the fsm
+ . Add -S to comparerenderer
+ . Bug fixes:
+ . Fix shift.x being reversed for rtl text
+ . Fix faulty fallback justification
+ . Fix bad cmap handling
+ . Support compiling on old Solaris where bidi attributes clash with register names
+ . Follow the crowd in using Windows.h
+
+1.2.1
+ . Bug fixes:
+ . Allow glyph reattachment
+ . Allow signed glyph attributes
+ . Various build problems with MacOS, old gcc versions, etc.
+ . Various overrun read errors fixed
+
+1.2.0
+ . API Changes:
+ . Added Windows friendly gr_start_logging and gr_stop_logging, now per face
+ . Added gr_make_face_with_ops, gr_make_face_with_seg_cache_and_ops
+ . Added gr_make_font_with_ops
+ . Added gr_face_is_char_supported
+ . Added gr_face_info to give info to apps about face capabilities
+ . Deprecated gr_make_face, gr_make_face_with_seg_cache, gr_make_font_with_advance_fn
+ . Deprecated graphite_start_logging and graphite_stop_logging
+ . These functions are stubbed now and do nothing, but do compile and link.
+ . Bump API version to 3
+ . Add C# wrapper to contrib
+ . Handle justification information in a font and do something useful with it
+ . Builds clang clean (has done for a while)
+ . Bug fixes
+ . Windows build and bug fixes
+ . Add extra information to json debug output
+ . Added windows build documentation
+ . Added freetype sample code and test
+
+1.1.3
+ . Default build has GRAPHITE2_COMPARE_RENDERER to OFF to reduce dependencies
+ . Builds on Mac with clang
+ . Debug output improvements
+ . Tidy up perl wrappers
+ . Fuzz tester improvements
+ . Various bug fixes for bad font handling
+
+1.1.2
+ . Support feature ids < 4 chars when space padded for inclusion in FF 14.
+ . More fuzztesting and removal of causes of valgrind bad reads and sigabrts
+ . Remove contrib/android into its own repo (http://hg.palaso.org/grandroid)
+ . Update comparerenderer to latest harfbuzzng api
+
+1.1.1
+ . Missing Log.h included
+ . perl wrappers updated
+
+1.1.0
+ . Refactored debug output to use json
+ . Renamed VM_MACHINE_TYPE to GRAPHITE2_VM_TYPE
+ . Renamed DISABLE_SEGCACHE to GRAPHITE2_NSEGCACE
+ . Renamed DISBALE_FILE_FACE to GRAPHITE2_NFILEFACE
+ . Renamed ENABLE_COMPARE_RENDERER to GRAPHTIE2_COMPARE_RENDERER
+ . Renamed DOXYGEN_CONFIG to GRAPHITE2_DOXYGEN_CONFIG
+ . Renamed GR2_CUSTOM_HEADER to GRAPHITE2_CUSTOM_HEADER
+ . Renamed GR2_EXPORTING to GRAPHITE2_EXPORTING
+ . Added GRAPHITE2_STATIC for static only builds
+ . Added GRAPHITE2_NTRACING to compile out tracing code
+ . Documented GRAPHITE2_{EXPORTING,STATIC,NTRACING} in hacking.txt
+ . Bump libtool version to 2.1.0
+ . dumb font rendering works
+ . slot user attributes are now signed rather than unsigned
+ . add support for long class maps
+ . Rename perl library to avoid nameclash on Windows
+ . Various robustness fixes
+ . Moved internal .h files into src/inc
+ . Parallelise fuzztest
+ . General build improvements, particularly on Windows
+
+1.0.3
+ . Fix UTF16 surrogate support
+ . script and lang tags may be space padded or null padded
+ . Remove need for WORDS_BIGENDIAN, do it all automatically
+ . Remove all #include <new>. Use CLASS_NEW_DELETE instead.
+ . Fix comparerenderer to work with current hbng
+ . Add valgrind to fuzztest to ensure good memory use at all times
+ . Fix new fuzztest exposed bugs.
+ . Fix bugs exposed by Mozilla security review
+ . Add continuous integration build on Windows support
+
+1.0.2
+ . Fix Windows build
+ . Comparerenderer uses hbng enforcing ot rendering
+ . Add Bidi .hasChar support and refactor mirroring code
+ . Make cmake default Release rather than debug
+ . Don't compile in a boat load of TtfUtil that isn't used, saving 15% of binary
+ . Chase the FSF around its latest office moves
+ . WORDS_BIGENDIAN is set at the top so tests now pass on ppc, etc.
+ . More words in the manual
+
+1.0.1
+ . Release is the default build in cmake now.
+ . Refactor cmake build to not rebuild things so much.
+ . Include a missing file
+ . Remove -nostdlibs, making gcc happy everywhere
+ . Update comparerenderer to latest hbng interface
+ . Add changelog
+
+1.0.0
+ . First major release of perfect code!
+
diff --git a/thirdparty/graphite/include/graphite2/Font.h b/thirdparty/graphite/include/graphite2/Font.h
new file mode 100644
index 0000000000..fe569295a5
--- /dev/null
+++ b/thirdparty/graphite/include/graphite2/Font.h
@@ -0,0 +1,389 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+ Alternatively, the contents of this file may be used under the terms
+ of the Mozilla Public License (http://mozilla.org/MPL) or the GNU
+ General Public License, as published by the Free Software Foundation,
+ either version 2 of the License or (at your option) any later version.
+*/
+#pragma once
+
+#include "graphite2/Types.h"
+
+#define GR2_VERSION_MAJOR 1
+#define GR2_VERSION_MINOR 3
+#define GR2_VERSION_BUGFIX 14
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+typedef struct gr_face gr_face;
+typedef struct gr_font gr_font;
+typedef struct gr_feature_ref gr_feature_ref;
+typedef struct gr_feature_val gr_feature_val;
+
+/**
+* Returns version information on this engine
+*/
+GR2_API void gr_engine_version(int *nMajor, int *nMinor, int *nBugFix);
+
+/**
+* The Face Options allow the application to require that certain tables are
+* read during face construction. This may be of concern if the appFaceHandle
+* used in the gr_get_table_fn may change.
+* The values can be combined
+*/
+enum gr_face_options {
+ /** No preload, no cmap caching, fail if the graphite tables are invalid */
+ gr_face_default = 0,
+ /** Dumb rendering will be enabled if the graphite tables are invalid. @deprecated Since 1.311 */
+ gr_face_dumbRendering = 1,
+ /** preload glyphs at construction time */
+ gr_face_preloadGlyphs = 2,
+ /** Cache the lookup from code point to glyph ID at construction time */
+ gr_face_cacheCmap = 4,
+ /** Preload everything */
+ gr_face_preloadAll = gr_face_preloadGlyphs | gr_face_cacheCmap
+};
+
+/** Holds information about a particular Graphite silf table that has been loaded */
+struct gr_faceinfo {
+ gr_uint16 extra_ascent; /**< The extra_ascent in the GDL, in design units */
+ gr_uint16 extra_descent; /**< The extra_descent in the GDL, in design units */
+ gr_uint16 upem; /**< The design units for the font */
+ enum gr_space_contextuals {
+ gr_space_unknown = 0, /**< no information is known. */
+ gr_space_none = 1, /**< the space character never occurs in any rules. */
+ gr_space_left_only = 2, /**< the space character only occurs as the first element in a rule. */
+ gr_space_right_only = 3, /**< the space character only occurs as the last element in a rule. */
+ gr_space_either_only = 4, /**< the space character only occurs as the only element in a rule. */
+ gr_space_both = 5, /**< the space character may occur as the first or last element of a rule. */
+ gr_space_cross = 6 /**< the space character occurs in a rule not as a first or last element. */
+ } space_contextuals;
+ unsigned int has_bidi_pass : 1; /**< the table specifies that a bidirectional pass should run */
+ unsigned int line_ends : 1; /**< there are line end contextuals somewhere */
+ unsigned int justifies : 1; /**< there are .justify properties set somewhere on some glyphs */
+};
+
+typedef struct gr_faceinfo gr_faceinfo;
+
+/** type describing function to retrieve font table information
+ *
+ * @return a pointer to the table in memory. The pointed to memory must exist as
+ * long as the gr_face which makes the call.
+ * @param appFaceHandle is the unique information passed to gr_make_face()
+ * @param name is a 32bit tag to the table name.
+ * @param len returned by this function to say how long the table is in memory.
+ */
+typedef const void *(*gr_get_table_fn)(const void* appFaceHandle, unsigned int name, size_t *len);
+
+/** type describing function to release any resources allocated by the above get_table table function
+ *
+ * @param appFaceHandle is the unique information passed to gr_make_face()
+ * @param pointer to table memory returned by get_table.
+ */
+typedef void (*gr_release_table_fn)(const void* appFaceHandle, const void *table_buffer);
+
+/** struct housing function pointers to manage font table buffers for the graphite engine. */
+struct gr_face_ops
+{
+ /** size in bytes of this structure */
+ size_t size;
+ /** a pointer to a function to request a table from the client. */
+ gr_get_table_fn get_table;
+ /** is a pointer to a function to notify the client the a table can be released.
+ * This can be NULL to signify that the client does not wish to do any release handling. */
+ gr_release_table_fn release_table;
+};
+typedef struct gr_face_ops gr_face_ops;
+
+/** Create a gr_face object given application information and a table functions.
+ *
+ * @return gr_face or NULL if the font fails to load for some reason.
+ * @param appFaceHandle This is application specific information that is passed
+ * to the getTable function. The appFaceHandle must stay
+ * alive as long as the gr_face is alive.
+ * @param face_ops Pointer to face specific callback structure for table
+ * management. Must stay alive for the duration of the
+ * call only.
+ * @param faceOptions Bitfield describing various options. See enum gr_face_options for details.
+ */
+GR2_API gr_face* gr_make_face_with_ops(const void* appFaceHandle/*non-NULL*/, const gr_face_ops *face_ops, unsigned int faceOptions);
+
+/** @deprecated Since v1.2.0 in favour of gr_make_face_with_ops.
+ * Create a gr_face object given application information and a getTable function.
+ *
+ * @return gr_face or NULL if the font fails to load for some reason.
+ * @param appFaceHandle This is application specific information that is passed
+ * to the getTable function. The appFaceHandle must stay
+ * alive as long as the gr_face is alive.
+ * @param getTable Callback function to get table data.
+ * @param faceOptions Bitfield describing various options. See enum gr_face_options for details.
+ */
+GR2_DEPRECATED_API gr_face* gr_make_face(const void* appFaceHandle/*non-NULL*/, gr_get_table_fn getTable, unsigned int faceOptions);
+
+/** @deprecated Since 1.3.7 this function is now an alias for gr_make_face_with_ops().
+ *
+ * Create a gr_face object given application information, with subsegmental caching support
+ *
+ * @return gr_face or NULL if the font fails to load.
+ * @param appFaceHandle is a pointer to application specific information that is passed to getTable.
+ * This may not be NULL and must stay alive as long as the gr_face is alive.
+ * @param face_ops Pointer to face specific callback structure for table management. Must stay
+ * alive for the duration of the call only.
+ * @param segCacheMaxSize Unused.
+ * @param faceOptions Bitfield of values from enum gr_face_options
+ */
+GR2_DEPRECATED_API gr_face* gr_make_face_with_seg_cache_and_ops(const void* appFaceHandle, const gr_face_ops *face_ops, unsigned int segCacheMaxSize, unsigned int faceOptions);
+
+/** @deprecated Since 1.3.7 this function is now an alias for gr_make_face().
+ *
+ * Create a gr_face object given application information, with subsegmental caching support.
+ * This function is deprecated as of v1.2.0 in favour of gr_make_face_with_seg_cache_and_ops.
+ *
+ * @return gr_face or NULL if the font fails to load.
+ * @param appFaceHandle is a pointer to application specific information that is passed to getTable.
+ * This may not be NULL and must stay alive as long as the gr_face is alive.
+ * @param getTable The function graphite calls to access font table data
+ * @param segCacheMaxSize How large the segment cache is.
+ * @param faceOptions Bitfield of values from enum gr_face_options
+ */
+GR2_DEPRECATED_API gr_face* gr_make_face_with_seg_cache(const void* appFaceHandle, gr_get_table_fn getTable, unsigned int segCacheMaxSize, unsigned int faceOptions);
+
+/** Convert a tag in a string into a gr_uint32
+ *
+ * @return gr_uint32 tag, zero padded
+ * @param str a nul terminated string of which at most the first 4 characters are read
+ */
+GR2_API gr_uint32 gr_str_to_tag(const char *str);
+
+/** Convert a gr_uint32 tag into a string
+ *
+ * @param tag contains the tag to convert
+ * @param str is a pointer to a char array of at least size 4 bytes. The first 4 bytes of this array
+ * will be overwritten by this function. No nul is appended.
+ */
+GR2_API void gr_tag_to_str(gr_uint32 tag, char *str);
+
+/** Get feature values for a given language or default
+ *
+ * @return a copy of the default feature values for a given language. The application must call
+ * gr_featureval_destroy() to free this object when done.
+ * @param pFace The font face to get feature values from
+ * @param langname The language tag to get feature values for. If there is no such language or
+ * langname is 0, the default feature values for the font are returned.
+ * langname is right 0 padded and assumes lowercase. Thus the en langauge
+ * would be 0x656E0000. Langname may also be space padded, thus 0x656E2020.
+ */
+GR2_API gr_feature_val* gr_face_featureval_for_lang(const gr_face* pFace, gr_uint32 langname);
+
+/** Get feature reference for a given feature id from a face
+ *
+ * @return a feature reference corresponding to the given id. This data is part of the gr_face and
+ * will be freed when the face is destroyed.
+ * @param pFace Font face to get information on.
+ * @param featId Feature id tag to get reference to.
+ */
+GR2_API const gr_feature_ref* gr_face_find_fref(const gr_face* pFace, gr_uint32 featId);
+
+/** Returns number of feature references in a face **/
+GR2_API gr_uint16 gr_face_n_fref(const gr_face* pFace);
+
+/** Returns feature reference at given index in face **/
+GR2_API const gr_feature_ref* gr_face_fref(const gr_face* pFace, gr_uint16 i);
+
+/** Return number of languages the face knows about **/
+GR2_API unsigned short gr_face_n_languages(const gr_face* pFace);
+
+/** Returns a language id corresponding to a language of given index in the face **/
+GR2_API gr_uint32 gr_face_lang_by_index(const gr_face* pFace, gr_uint16 i);
+
+/** Destroy the given face and free its memory **/
+GR2_API void gr_face_destroy(gr_face *face);
+
+/** Returns the number of glyphs in the face **/
+GR2_API unsigned short gr_face_n_glyphs(const gr_face* pFace);
+
+/** Returns a faceinfo for the face and script **/
+GR2_API const gr_faceinfo *gr_face_info(const gr_face *pFace, gr_uint32 script);
+
+/** Returns whether the font supports a given Unicode character
+ *
+ * @return true if the character is supported.
+ * @param pFace face to test within
+ * @param usv Unicode Scalar Value of character to test
+ * @param script Tag of script for selecting which set of pseudo glyphs to test. May be NULL.
+ */
+GR2_API int gr_face_is_char_supported(const gr_face *pFace, gr_uint32 usv, gr_uint32 script);
+
+#ifndef GRAPHITE2_NFILEFACE
+/** Create gr_face from a font file
+ *
+ * @return gr_face that accesses a font file directly. Returns NULL on failure.
+ * @param filename Full path and filename to font file
+ * @param faceOptions Bitfile from enum gr_face_options to control face options.
+ */
+GR2_API gr_face* gr_make_file_face(const char *filename, unsigned int faceOptions);
+
+/** @deprecated Since 1.3.7. This function is now an alias for gr_make_file_face().
+ *
+ * Create gr_face from a font file, with subsegment caching support.
+ *
+ * @return gr_face that accesses a font file directly. Returns NULL on failure.
+ * @param filename Full path and filename to font file
+ * @param segCacheMaxSize Specifies how big to make the cache in segments.
+ * @param faceOptions Bitfield from enum gr_face_options to control face options.
+ */
+GR2_DEPRECATED_API gr_face* gr_make_file_face_with_seg_cache(const char *filename, unsigned int segCacheMaxSize, unsigned int faceOptions);
+#endif // !GRAPHITE2_NFILEFACE
+
+/** Create a font from a face
+ *
+ * @return gr_font Call font_destroy to free this font
+ * @param ppm Resolution of the font in pixels per em
+ * @param face Face this font corresponds to. This must stay alive as long as the font is alive.
+ */
+GR2_API gr_font* gr_make_font(float ppm, const gr_face *face);
+
+/** query function to find the hinted advance of a glyph
+ *
+ * @param appFontHandle is the unique information passed to gr_make_font_with_advance()
+ * @param glyphid is the glyph to retireve the hinted advance for.
+ */
+typedef float (*gr_advance_fn)(const void* appFontHandle, gr_uint16 glyphid);
+
+/** struct housing function pointers to manage font hinted metrics for the
+ * graphite engine. */
+struct gr_font_ops
+{
+ /** size of the structure in bytes to allow for future extensibility */
+ size_t size;
+ /** a pointer to a function to retrieve the hinted
+ * advance width of a glyph which the font cannot
+ * provide without client assistance. This can be
+ * NULL to signify no horizontal hinted metrics are necessary. */
+ gr_advance_fn glyph_advance_x;
+ /** a pointer to a function to retrieve the hinted
+ * advance height of a glyph which the font cannot
+ * provide without client assistance. This can be
+ * NULL to signify no horizontal hinted metrics are necessary. */
+ gr_advance_fn glyph_advance_y;
+};
+typedef struct gr_font_ops gr_font_ops;
+
+/** Creates a font with hinted advance width query functions
+ *
+ * @return gr_font to be destroyed via font_destroy
+ * @param ppm size of font in pixels per em
+ * @param appFontHandle font specific information that must stay alive as long
+ * as the font does
+ * @param font_ops pointer font specific callback structure for hinted metrics.
+ * Need only stay alive for the duration of the call.
+ * @param face the face this font corresponds to. Must stay alive as long as
+ * the font does.
+ */
+GR2_API gr_font* gr_make_font_with_ops(float ppm, const void* appFontHandle, const gr_font_ops * font_ops, const gr_face *face);
+
+/** Creates a font with hinted advance width query function.
+ * This function is deprecated. Use gr_make_font_with_ops instead.
+ *
+ * @return gr_font to be destroyed via font_destroy
+ * @param ppm size of font in pixels per em
+ * @param appFontHandle font specific information that must stay alive as long
+ * as the font does
+ * @param getAdvance callback function reference that returns horizontal advance in pixels for a glyph.
+ * @param face the face this font corresponds to. Must stay alive as long as
+ * the font does.
+ */
+GR2_API gr_font* gr_make_font_with_advance_fn(float ppm, const void* appFontHandle, gr_advance_fn getAdvance, const gr_face *face);
+
+/** Free a font **/
+GR2_API void gr_font_destroy(gr_font *font);
+
+/** get a feature value
+ *
+ * @return value of specific feature or 0 if any problems.
+ * @param pfeatureref gr_feature_ref to the feature
+ * @param feats gr_feature_val containing all the values
+ */
+GR2_API gr_uint16 gr_fref_feature_value(const gr_feature_ref* pfeatureref, const gr_feature_val* feats);
+
+/** set a feature value
+ *
+ * @return false if there were any problems (value out of range, etc.)
+ * @param pfeatureref gr_feature_ref to the feature
+ * @param val value to set the feature to
+ * @param pDest the gr_feature_val containing all the values for all the features
+ */
+GR2_API int gr_fref_set_feature_value(const gr_feature_ref* pfeatureref, gr_uint16 val, gr_feature_val* pDest);
+
+/** Returns the id tag for a gr_feature_ref **/
+GR2_API gr_uint32 gr_fref_id(const gr_feature_ref* pfeatureref);
+
+/** Returns number of values a feature may take, given a gr_feature_ref **/
+GR2_API gr_uint16 gr_fref_n_values(const gr_feature_ref* pfeatureref);
+
+/** Returns the value associated with a particular value in a feature
+ *
+ * @return value
+ * @param pfeatureref gr_feature_ref of the feature of interest
+ * @param settingno Index up to the return value of gr_fref_n_values() of the value
+ */
+GR2_API gr_int16 gr_fref_value(const gr_feature_ref* pfeatureref, gr_uint16 settingno);
+
+/** Returns a string of the UI name of a feature
+ *
+ * @return string of the UI name, in the encoding form requested. Call gr_label_destroy() after use.
+ * @param pfeatureref gr_feature_ref of the feature
+ * @param langId This is a pointer since the face may not support a string in the requested
+ * language. The actual language of the string is returned in langId
+ * @param utf Encoding form for the string
+ * @param length Used to return the length of the string returned in bytes.
+ */
+GR2_API void* gr_fref_label(const gr_feature_ref* pfeatureref, gr_uint16 *langId, enum gr_encform utf, gr_uint32 *length);
+
+/** Return a UI string for a possible value of a feature
+ *
+ * @return string of the UI name, in the encoding form requested. nul terminated. Call gr_label_destroy()
+ * after use.
+ * @param pfeatureref gr_feature_ref of the feature
+ * @param settingno Value setting index
+ * @param langId This is a pointer to the requested language. The requested language id is
+ * replaced by the actual language id of the string returned.
+ * @param utf Encoding form for the string
+ * @param length Returns the length of the string returned in bytes.
+ */
+GR2_API void* gr_fref_value_label(const gr_feature_ref* pfeatureref, gr_uint16 settingno/*rather than a value*/, gr_uint16 *langId, enum gr_encform utf, gr_uint32 *length);
+
+/** Destroy a previously returned label string **/
+GR2_API void gr_label_destroy(void * label);
+
+/** Copies a gr_feature_val **/
+GR2_API gr_feature_val* gr_featureval_clone(const gr_feature_val* pfeatures);
+
+/** Destroys a gr_feature_val **/
+GR2_API void gr_featureval_destroy(gr_feature_val *pfeatures);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/thirdparty/graphite/include/graphite2/Log.h b/thirdparty/graphite/include/graphite2/Log.h
new file mode 100644
index 0000000000..a5a6947fab
--- /dev/null
+++ b/thirdparty/graphite/include/graphite2/Log.h
@@ -0,0 +1,85 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+ Alternatively, the contents of this file may be used under the terms
+ of the Mozilla Public License (http://mozilla.org/MPL) or the GNU
+ General Public License, as published by the Free Software Foundation,
+ either version 2 of the License or (at your option) any later version.
+*/
+#pragma once
+
+#include <graphite2/Types.h>
+#include <graphite2/Font.h>
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/** deprecated mechanism that doesn't do anything now. */
+typedef enum {
+ GRLOG_NONE = 0x0,
+ GRLOG_FACE = 0x01,
+ GRLOG_SEGMENT = 0x02,
+ GRLOG_PASS = 0x04,
+ GRLOG_CACHE = 0x08,
+
+ GRLOG_OPCODE = 0x80,
+ GRLOG_ALL = 0xFF
+} GrLogMask;
+
+/** Start logging all segment creation and updates on the provided face. This
+ * is logged to a JSON file, see "Segment JSON Schema.txt" for a precise
+ * definition of the file
+ *
+ * @return true if the file was successfully created and logging is correctly
+ * initialised.
+ * @param face the gr_face whose segments you want to log to the given file
+ * @param log_path a utf8 encoded file name and path to log to.
+ */
+GR2_API bool gr_start_logging(gr_face * face, const char *log_path);
+
+
+/** Stop logging on the given face. This will close the log file created by
+ * gr_start_logging.
+ *
+ * @param face the gr_face whose segments you want to stop logging
+ */
+GR2_API void gr_stop_logging(gr_face * face);
+
+/** Start logging to a FILE object.
+ * This function is deprecated as of 1.2.0, use the _face versions instead.
+ *
+ * @return True on success
+ * @param logfile FILE reference to output logging to
+ * @param mask What aspects of logging to report (ignored)
+ */
+GR2_API bool graphite_start_logging(FILE * logFile, GrLogMask mask); //may not do anthing if disabled in the implementation of the engine.
+
+/** Stop logging to a FILE object.
+ * This function is deprecated as of 1.2.0, use the _face versions instead.
+ */
+GR2_API void graphite_stop_logging();
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/thirdparty/graphite/include/graphite2/Segment.h b/thirdparty/graphite/include/graphite2/Segment.h
new file mode 100644
index 0000000000..0e24f5d795
--- /dev/null
+++ b/thirdparty/graphite/include/graphite2/Segment.h
@@ -0,0 +1,461 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+ Alternatively, the contents of this file may be used under the terms
+ of the Mozilla Public License (http://mozilla.org/MPL) or the GNU
+ General Public License, as published by the Free Software Foundation,
+ either version 2 of the License or (at your option) any later version.
+*/
+#pragma once
+
+#include "graphite2/Types.h"
+#include "graphite2/Font.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+enum gr_break_weight {
+ gr_breakNone = 0,
+ /* after break weights */
+ gr_breakWhitespace = 10,
+ gr_breakWord = 15,
+ gr_breakIntra = 20,
+ gr_breakLetter = 30,
+ gr_breakClip = 40,
+ /* before break weights */
+ gr_breakBeforeWhitespace = -10,
+ gr_breakBeforeWord = -15,
+ gr_breakBeforeIntra = -20,
+ gr_breakBeforeLetter = -30,
+ gr_breakBeforeClip = -40
+};
+
+enum gr_justFlags {
+ /// Indicates that this segment is a complete line
+ gr_justCompleteLine = 0,
+ /// Indicates that the start of the slot list is not at the start of a line
+ gr_justStartInline = 1,
+ /// Indicates that the end of the slot list is not at the end of a line
+ gr_justEndInline = 2
+};
+
+/** Used for looking up slot attributes. Most are already available in other functions **/
+enum gr_attrCode {
+ /// adjusted glyph advance in x direction in design units
+ gr_slatAdvX = 0,
+ /// adjusted glyph advance in y direction (usually 0) in design units
+ gr_slatAdvY,
+ /// returns 0. Deprecated.
+ gr_slatAttTo,
+ /// This slot attaches to its parent at the given design units in the x direction
+ gr_slatAttX,
+ /// This slot attaches to its parent at the given design units in the y direction
+ gr_slatAttY,
+ /// This slot attaches to its parent at the given glyph point (not implemented)
+ gr_slatAttGpt,
+ /// x-direction adjustment from the given glyph point (not implemented)
+ gr_slatAttXOff,
+ /// y-direction adjustment from the given glyph point (not implemented)
+ gr_slatAttYOff,
+ /// Where on this glyph should align with the attachment point on the parent glyph in the x-direction.
+ gr_slatAttWithX,
+ /// Where on this glyph should align with the attachment point on the parent glyph in the y-direction
+ gr_slatAttWithY,
+ /// Which glyph point on this glyph should align with the attachment point on the parent glyph (not implemented).
+ gr_slatWithGpt,
+ /// Adjustment to gr_slatWithGpt in x-direction (not implemented)
+ gr_slatAttWithXOff,
+ /// Adjustment to gr_slatWithGpt in y-direction (not implemented)
+ gr_slatAttWithYOff,
+ /// Attach at given nesting level (not implemented)
+ gr_slatAttLevel,
+ /// Line break breakweight for this glyph
+ gr_slatBreak,
+ /// Ligature component reference (not implemented)
+ gr_slatCompRef,
+ /// bidi directionality of this glyph (not implemented)
+ gr_slatDir,
+ /// Whether insertion is allowed before this glyph
+ gr_slatInsert,
+ /// Final positioned position of this glyph relative to its parent in x-direction in pixels
+ gr_slatPosX,
+ /// Final positioned position of this glyph relative to its parent in y-direction in pixels
+ gr_slatPosY,
+ /// Amount to shift glyph by in x-direction design units
+ gr_slatShiftX,
+ /// Amount to shift glyph by in y-direction design units
+ gr_slatShiftY,
+ /// attribute user1
+ gr_slatUserDefnV1,
+ /// not implemented
+ gr_slatMeasureSol,
+ /// not implemented
+ gr_slatMeasureEol,
+ /// Amount this slot can stretch (not implemented)
+ gr_slatJStretch,
+ /// Amount this slot can shrink (not implemented)
+ gr_slatJShrink,
+ /// Granularity by which this slot can stretch or shrink (not implemented)
+ gr_slatJStep,
+ /// Justification weight for this glyph (not implemented)
+ gr_slatJWeight,
+ /// Amount this slot mush shrink or stretch in design units
+ gr_slatJWidth = 29,
+ /// SubSegment split point
+ gr_slatSegSplit = gr_slatJStretch + 29,
+ /// User defined attribute, see subattr for user attr number
+ gr_slatUserDefn,
+ /// Bidi level
+ gr_slatBidiLevel = 56,
+ /// Collision flags
+ gr_slatColFlags,
+ /// Collision constraint rectangle left (bl.x)
+ gr_slatColLimitblx,
+ /// Collision constraint rectangle lower (bl.y)
+ gr_slatColLimitbly,
+ /// Collision constraint rectangle right (tr.x)
+ gr_slatColLimittrx,
+ /// Collision constraint rectangle upper (tr.y)
+ gr_slatColLimittry,
+ /// Collision shift x
+ gr_slatColShiftx,
+ /// Collision shift y
+ gr_slatColShifty,
+ /// Collision margin
+ gr_slatColMargin,
+ /// Margin cost weight
+ gr_slatColMarginWt,
+ // Additional glyph that excludes movement near this one:
+ gr_slatColExclGlyph,
+ gr_slatColExclOffx,
+ gr_slatColExclOffy,
+ // Collision sequence enforcing attributes:
+ gr_slatSeqClass,
+ gr_slatSeqProxClass,
+ gr_slatSeqOrder,
+ gr_slatSeqAboveXoff,
+ gr_slatSeqAboveWt,
+ gr_slatSeqBelowXlim,
+ gr_slatSeqBelowWt,
+ gr_slatSeqValignHt,
+ gr_slatSeqValignWt,
+
+ /// not implemented
+ gr_slatMax,
+ /// not implemented
+ gr_slatNoEffect = gr_slatMax + 1
+};
+
+enum gr_bidirtl {
+ /// Underlying paragraph direction is RTL
+ gr_rtl = 1,
+ /// Set this to not run the bidi pass internally, even if the font asks for it.
+ /// This presumes that the segment is in a single direction. Most of the time
+ /// this bit should be set unless you know you are passing full paragraphs of text.
+ gr_nobidi = 2,
+ /// Disable auto mirroring for rtl text
+ gr_nomirror = 4
+};
+
+typedef struct gr_char_info gr_char_info;
+typedef struct gr_segment gr_segment;
+typedef struct gr_slot gr_slot;
+
+/** Returns Unicode character for a charinfo.
+ *
+ * @param p Pointer to charinfo to return information on.
+ */
+GR2_API unsigned int gr_cinfo_unicode_char(const gr_char_info* p/*not NULL*/);
+
+/** Returns breakweight for a charinfo.
+ *
+ * @return Breakweight is a number between -50 and 50 indicating the cost of a
+ * break before or after this character. If the value < 0, the absolute value
+ * is this character's contribution to the overall breakweight before it. If the value
+ * > 0, then the value is this character's contribution to the overall breakweight after it.
+ * The overall breakweight between two characters is the maximum of the breakweight
+ * contributions from the characters either side of it. If a character makes no
+ * contribution to the breakweight on one side of it, the contribution is considered
+ * to be 0.
+ * @param p Pointer to charinfo to return information on.
+ */
+GR2_API int gr_cinfo_break_weight(const gr_char_info* p/*not NULL*/);
+
+/** Returns the slot index that after this character is after in the slot stream
+ *
+ * In effect each character is associated with a set of slots and this returns
+ * the index of the last slot in the segment this character is associated with.
+ *
+ * @return after slot index between 0 and gr_seg_n_slots()
+ * @param p Pointer to charinfo to return information on.
+ */
+GR2_API int gr_cinfo_after(const gr_char_info* p/*not NULL*/);
+
+/** Returns the slot index that before this character is before in the slot stream
+ *
+ * In effect each character is associated with a set of slots and this returns
+ * the index of the first slot in the segment this character is associated with.
+ *
+ * @return before slot index between 0 and gr_seg_n_slots()
+ * @param p Pointer to charinfo to return information on.
+ */
+GR2_API int gr_cinfo_before(const gr_char_info* p/*not NULL*/);
+
+/** Returns the code unit index of this character in the input string
+ *
+ * @return code unit index between 0 and the end of the string
+ * @param p Pointer to charinfo to return information on.
+ */
+GR2_API size_t gr_cinfo_base(const gr_char_info* p/*not NULL*/);
+
+/** Returns the number of unicode characters in a string.
+ *
+ * @return number of characters in the string
+ * @param enc Specifies the type of data in the string: utf8, utf16, utf32
+ * @param buffer_begin The start of the string
+ * @param buffer_end Measure up to the first nul or when end is reached, whichever is earliest.
+ * This parameter may be NULL.
+ * @param pError If there is a structural fault in the string, the location is returned
+ * in this variable. If no error occurs, pError will contain NULL. NULL
+ * may be passed for pError if no such information is required.
+ */
+GR2_API size_t gr_count_unicode_characters(enum gr_encform enc, const void* buffer_begin, const void* buffer_end, const void** pError);
+
+/** Creates and returns a segment.
+ *
+ * @return a segment that needs seg_destroy called on it. May return NULL if bad problems
+ * in segment processing.
+ * @param font Gives the size of the font in pixels per em for final positioning. If
+ * NULL, positions are returned in design units, i.e. at a ppm of the upem
+ * of the face.
+ * @param face The face containing all the non-size dependent information.
+ * @param script This is a tag containing a script identifier that is used to choose
+ * which graphite table within the font to use. Maybe 0. Tag may be 4 chars
+ * NULL padded in LSBs or space padded in LSBs.
+ * @param pFeats Pointer to a feature values to be used for the segment. Only one
+ * feature values may be used for a segment. If NULL the default features
+ * for the font will be used.
+ * @param enc Specifies what encoding form the string is in (utf8, utf16, utf32)
+ * @param pStart Start of the string
+ * @param nChars Number of unicode characters to process in the string. The string will
+ * be processed either up to the first NULL or until nChars have been
+ * processed. nChars is also used to initialise the internal memory
+ * allocations of the segment. So it is wise not to make nChars too much
+ * greater than the actual number of characters being processed.
+ * @param dir Specifies whether the segment is processed right to left (1) or left to
+ * right (0) and whether to run the internal bidi pass, if a font requests it.
+ * See enum gr_bidirtl for details.
+ */
+GR2_API gr_segment* gr_make_seg(const gr_font* font, const gr_face* face, gr_uint32 script, const gr_feature_val* pFeats, enum gr_encform enc, const void* pStart, size_t nChars, int dir);
+
+/** Destroys a segment, freeing the memory.
+ *
+ * @param p The segment to destroy
+ */
+GR2_API void gr_seg_destroy(gr_segment* p);
+
+/** Returns the advance for the whole segment.
+ *
+ * Returns the width of the segment up to the next glyph origin after the segment
+ */
+GR2_API float gr_seg_advance_X(const gr_segment* pSeg/*not NULL*/);
+
+/** Returns the height advance for the segment. **/
+GR2_API float gr_seg_advance_Y(const gr_segment* pSeg/*not NULL*/);
+
+/** Returns the number of gr_char_infos in the segment. **/
+GR2_API unsigned int gr_seg_n_cinfo(const gr_segment* pSeg/*not NULL*/);
+
+/** Returns a gr_char_info at a given index in the segment. **/
+GR2_API const gr_char_info* gr_seg_cinfo(const gr_segment* pSeg/*not NULL*/, unsigned int index/*must be <number_of_CharInfo*/);
+
+/** Returns the number of glyph gr_slots in the segment. **/
+GR2_API unsigned int gr_seg_n_slots(const gr_segment* pSeg/*not NULL*/); //one slot per glyph
+
+/** Returns the first gr_slot in the segment.
+ *
+ * The first slot in a segment has a gr_slot_prev_in_segment() of NULL. Slots are owned
+ * by their segment and are destroyed along with the segment.
+ */
+GR2_API const gr_slot* gr_seg_first_slot(gr_segment* pSeg/*not NULL*/); //may give a base slot or a slot which is attached to another
+
+/** Returns the last gr_slot in the segment.
+ *
+ * The last slot in a segment has a gr_slot_next_in_segment() of NULL
+ */
+GR2_API const gr_slot* gr_seg_last_slot(gr_segment* pSeg/*not NULL*/); //may give a base slot or a slot which is attached to another
+
+/** Justifies a linked list of slots for a line to a given width
+ *
+ * Passed a pointer to the start of a linked list of slots corresponding to a line, as
+ * set up by gr_slot_linebreak_before, this function will position the glyphs in the line
+ * to take up the given width. It is possible to specify a subrange within the line to process.
+ * This allows skipping of line initial or final whitespace, for example. While this will ensure
+ * that the subrange fits width, the line will still be positioned with the first glyph of the
+ * line at 0. So the resulting positions may be beyond width.
+ *
+ * @return float The resulting width of the range of slots justified.
+ * @param pSeg Pointer to the segment
+ * @param pStart Pointer to the start of the line linked list (including skipped characters)
+ * @param pFont Font to use for positioning
+ * @param width Width in pixels in which to fit the line. If < 0. don't adjust natural width, just run justification passes
+ * to handle line end contextuals, if there are any.
+ * @param flags Indicates line ending types. Default is linked list is a full line
+ * @param pFirst If not NULL, the first slot in the list to be considered part of the line (so can skip)
+ * @param pLast If not NULL, the last slot to process in the line (allow say trailing whitespace to be skipped)
+ */
+GR2_API float gr_seg_justify(gr_segment* pSeg/*not NULL*/, const gr_slot* pStart/*not NULL*/, const gr_font *pFont, double width, enum gr_justFlags flags, const gr_slot* pFirst, const gr_slot* pLast);
+
+/** Returns the next slot along in the segment.
+ *
+ * Slots are held in a linked list. This returns the next in the linked list. The slot
+ * may or may not be attached to another slot. Returns NULL at the end of the segment.
+ */
+GR2_API const gr_slot* gr_slot_next_in_segment(const gr_slot* p);
+
+/** Returns the previous slot along in the segment.
+ *
+ * Slots are held in a doubly linked list. This returns the previos slot in the linked
+ * list. This slot may or may not be attached to it. Returns NULL at the start of the
+ * segment.
+ */
+GR2_API const gr_slot* gr_slot_prev_in_segment(const gr_slot* p);
+
+/** Returns the attachment parent slot of this slot.
+ *
+ * Attached slots form a tree. This returns the parent of this slot in that tree. A
+ * base glyph which is not attached to another glyph, always returns NULL.
+ */
+GR2_API const gr_slot* gr_slot_attached_to(const gr_slot* p);
+
+/** Returns the first slot attached to this slot.
+ *
+ * Attached slots form a singly linked list from the parent. This returns the first
+ * slot in that list. Note that this is a reference to another slot that is also in
+ * the main segment doubly linked list.
+ *
+ * if gr_slot_first_attachment(p) != NULL then gr_slot_attached_to(gr_slot_first_attachment(p)) == p.
+ */
+GR2_API const gr_slot* gr_slot_first_attachment(const gr_slot* p);
+
+/** Returns the next slot attached to our attachment parent.
+ *
+ * This returns the next slot in the singly linked list of slots attached to this
+ * slot's parent. If there are no more such slots, NULL is returned. If there is
+ * no parent, i.e. the passed slot is a cluster base, then the next cluster base
+ * in graphical order (ltr, even for rtl text) is returned.
+ *
+ * if gr_slot_next_sibling_attachment(p) != NULL then gr_slot_attached_to(gr_slot_next_sibling_attachment(p)) == gr_slot_attached_to(p).
+ */
+GR2_API const gr_slot* gr_slot_next_sibling_attachment(const gr_slot* p);
+
+
+/** Returns glyph id of the slot
+ *
+ * Each slot has a glyphid which is rendered at the position given by the slot. This
+ * glyphid is the real glyph to be rendered and never a pseudo glyph.
+ */
+GR2_API unsigned short gr_slot_gid(const gr_slot* p);
+
+/** Returns X offset of glyph from start of segment **/
+GR2_API float gr_slot_origin_X(const gr_slot* p);
+
+/** Returns Y offset of glyph from start of segment **/
+GR2_API float gr_slot_origin_Y(const gr_slot* p);
+
+/** Returns the glyph advance for this glyph as adjusted for kerning
+ *
+ * @param p Slot to give results for
+ * @param face gr_face of the glyphs. May be NULL if unhinted advances used
+ * @param font gr_font to scale for pixel results. If NULL returns design
+ * units advance. If not NULL then returns pixel advance based
+ * on hinted or scaled glyph advances in the font. face must be
+ * passed for hinted advances to be used.
+ */
+GR2_API float gr_slot_advance_X(const gr_slot* p, const gr_face* face, const gr_font *font);
+
+/** Returns the vertical advance for the glyph in the slot adjusted for kerning
+ *
+ * Returns design units unless font is not NULL in which case the pixel value
+ * is returned scaled for the given font
+ */
+GR2_API float gr_slot_advance_Y(const gr_slot* p, const gr_face* face, const gr_font *font);
+
+/** Returns the gr_char_info index before us
+ *
+ * Returns the index of the gr_char_info that a cursor before this slot, would put
+ * an underlying cursor before. This may also be interpretted as each slot holding
+ * a set of char_infos that it is associated with and this function returning the
+ * index of the char_info with lowest index, from this set.
+ */
+GR2_API int gr_slot_before(const gr_slot* p/*not NULL*/);
+
+/** Returns the gr_char_info index after us
+ *
+ * Returns the index of the gr_char_info that a cursor after this slot would put an
+ * underlying cursor after. This may also be interpretted as each slot holding a set
+ * of char_infos that it is associated with and this function returning the index of
+ * the char_info with the highest index, from this set.
+ */
+GR2_API int gr_slot_after(const gr_slot* p/*not NULL*/);
+
+/** Returns the index of this slot in the segment
+ *
+ * Returns the index given to this slot during final positioning. This corresponds
+ * to the value returned br gr_cinfo_before() and gr_cinfo_after()
+ */
+GR2_API unsigned int gr_slot_index(const gr_slot* p/*not NULL*/);
+
+/** Return a slot attribute value
+ *
+ * Given a slot and an attribute along with a possible subattribute, return the
+ * corresponding value in the slot. See enum gr_attrCode for details of each attribute.
+ */
+GR2_API int gr_slot_attr(const gr_slot* p/*not NULL*/, const gr_segment* pSeg/*not NULL*/, enum gr_attrCode index, gr_uint8 subindex); //tbd - do we need to expose this?
+
+/** Returns whether text may be inserted before this glyph.
+ *
+ * This indicates whether a cursor can be put before this slot. It applies to
+ * base glyphs that have no parent as well as attached glyphs that have the
+ * .insert attribute explicitly set to true. This is the primary mechanism
+ * for identifying contiguous sequences of base plus diacritics.
+ */
+GR2_API int gr_slot_can_insert_before(const gr_slot* p);
+
+/** Returns the original gr_char_info index this slot refers to.
+ *
+ * Each Slot has a gr_char_info that it originates from. This is that gr_char_info.
+ * The index is passed to gr_seg_cinfo(). This information is useful for testing.
+ */
+GR2_API int gr_slot_original(const gr_slot* p/*not NULL*/);
+
+/** Breaks a segment into lines.
+ *
+ * Breaks the slot linked list at the given point in the linked list. It is up
+ * to the application to keep track of the first slot on each line.
+ */
+GR2_API void gr_slot_linebreak_before(gr_slot *p/*not NULL*/);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/thirdparty/graphite/include/graphite2/Types.h b/thirdparty/graphite/include/graphite2/Types.h
new file mode 100644
index 0000000000..916c91191b
--- /dev/null
+++ b/thirdparty/graphite/include/graphite2/Types.h
@@ -0,0 +1,79 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+ Alternatively, the contents of this file may be used under the terms
+ of the Mozilla Public License (http://mozilla.org/MPL) or the GNU
+ General Public License, as published by the Free Software Foundation,
+ either version 2 of the License or (at your option) any later version.
+*/
+#pragma once
+
+#include <stddef.h>
+
+typedef unsigned char gr_uint8;
+typedef gr_uint8 gr_byte;
+typedef signed char gr_int8;
+typedef unsigned short gr_uint16;
+typedef short gr_int16;
+typedef unsigned int gr_uint32;
+typedef int gr_int32;
+
+enum gr_encform {
+ gr_utf8 = 1/*sizeof(uint8)*/, gr_utf16 = 2/*sizeof(uint16)*/, gr_utf32 = 4/*sizeof(uint32)*/
+};
+
+
+// Define API function declspec/attributes and how each supported compiler or OS
+// allows us to specify them.
+#if defined __GNUC__
+ #define _gr2_and ,
+ #define _gr2_tag_fn(a) __attribute__((a))
+ #define _gr2_deprecated_flag deprecated
+ #define _gr2_export_flag visibility("default")
+ #define _gr2_import_flag visibility("default")
+ #define _gr2_static_flag visibility("hidden")
+#endif
+
+#if defined _WIN32 || defined __CYGWIN__
+ #if defined __GNUC__ // These three will be redefined for Windows
+ #undef _gr2_export_flag
+ #undef _gr2_import_flag
+ #undef _gr2_static_flag
+ #else // How MSVC sepcifies function level attributes adn deprecation
+ #define _gr2_and
+ #define _gr2_tag_fn(a) __declspec(a)
+ #define _gr2_deprecated_flag deprecated
+ #endif
+ #define _gr2_export_flag dllexport
+ #define _gr2_import_flag dllimport
+ #define _gr2_static_flag
+#endif
+
+#if defined GRAPHITE2_STATIC
+ #define GR2_API _gr2_tag_fn(_gr2_static_flag)
+ #define GR2_DEPRECATED_API _gr2_tag_fn(_gr2_deprecated_flag _gr2_and _gr2_static_flag)
+#elif defined GRAPHITE2_EXPORTING
+ #define GR2_API _gr2_tag_fn(_gr2_export_flag)
+ #define GR2_DEPRECATED_API _gr2_tag_fn(_gr2_deprecated_flag _gr2_and _gr2_export_flag)
+#else
+ #define GR2_API _gr2_tag_fn(_gr2_import_flag)
+ #define GR2_DEPRECATED_API _gr2_tag_fn(_gr2_deprecated_flag _gr2_and _gr2_import_flag)
+#endif
diff --git a/thirdparty/graphite/src/CmapCache.cpp b/thirdparty/graphite/src/CmapCache.cpp
new file mode 100644
index 0000000000..d070019a34
--- /dev/null
+++ b/thirdparty/graphite/src/CmapCache.cpp
@@ -0,0 +1,155 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+
+#include "inc/Main.h"
+#include "inc/CmapCache.h"
+#include "inc/Face.h"
+#include "inc/TtfTypes.h"
+#include "inc/TtfUtil.h"
+
+
+using namespace graphite2;
+
+const void * bmp_subtable(const Face::Table & cmap)
+{
+ const void * stbl;
+ if (!cmap.size()) return 0;
+ if (TtfUtil::CheckCmapSubtable4(stbl = TtfUtil::FindCmapSubtable(cmap, 3, 1, cmap.size()), cmap + cmap.size())
+ || TtfUtil::CheckCmapSubtable4(stbl = TtfUtil::FindCmapSubtable(cmap, 0, 3, cmap.size()), cmap + cmap.size())
+ || TtfUtil::CheckCmapSubtable4(stbl = TtfUtil::FindCmapSubtable(cmap, 0, 2, cmap.size()), cmap + cmap.size())
+ || TtfUtil::CheckCmapSubtable4(stbl = TtfUtil::FindCmapSubtable(cmap, 0, 1, cmap.size()), cmap + cmap.size())
+ || TtfUtil::CheckCmapSubtable4(stbl = TtfUtil::FindCmapSubtable(cmap, 0, 0, cmap.size()), cmap + cmap.size()))
+ return stbl;
+ return 0;
+}
+
+const void * smp_subtable(const Face::Table & cmap)
+{
+ const void * stbl;
+ if (!cmap.size()) return 0;
+ if (TtfUtil::CheckCmapSubtable12(stbl = TtfUtil::FindCmapSubtable(cmap, 3, 10, cmap.size()), cmap + cmap.size())
+ || TtfUtil::CheckCmapSubtable12(stbl = TtfUtil::FindCmapSubtable(cmap, 0, 4, cmap.size()), cmap + cmap.size()))
+ return stbl;
+ return 0;
+}
+
+template <unsigned int (*NextCodePoint)(const void *, unsigned int, int *),
+ uint16 (*LookupCodePoint)(const void *, unsigned int, int)>
+bool cache_subtable(uint16 * blocks[], const void * cst, const unsigned int limit)
+{
+ int rangeKey = 0;
+ uint32 codePoint = NextCodePoint(cst, 0, &rangeKey),
+ prevCodePoint = 0;
+ while (codePoint < limit)
+ {
+ unsigned int block = codePoint >> 8;
+ if (!blocks[block])
+ {
+ blocks[block] = grzeroalloc<uint16>(0x100);
+ if (!blocks[block])
+ return false;
+ }
+ blocks[block][codePoint & 0xFF] = LookupCodePoint(cst, codePoint, rangeKey);
+ // prevent infinite loop
+ if (codePoint <= prevCodePoint)
+ codePoint = prevCodePoint + 1;
+ prevCodePoint = codePoint;
+ codePoint = NextCodePoint(cst, codePoint, &rangeKey);
+ }
+ return true;
+}
+
+
+CachedCmap::CachedCmap(const Face & face)
+: m_isBmpOnly(true),
+ m_blocks(0)
+{
+ const Face::Table cmap(face, Tag::cmap);
+ if (!cmap) return;
+
+ const void * bmp_cmap = bmp_subtable(cmap);
+ const void * smp_cmap = smp_subtable(cmap);
+ m_isBmpOnly = !smp_cmap;
+
+ m_blocks = grzeroalloc<uint16 *>(m_isBmpOnly ? 0x100 : 0x1100);
+ if (m_blocks && smp_cmap)
+ {
+ if (!cache_subtable<TtfUtil::CmapSubtable12NextCodepoint, TtfUtil::CmapSubtable12Lookup>(m_blocks, smp_cmap, 0x10FFFF))
+ return;
+ }
+
+ if (m_blocks && bmp_cmap)
+ {
+ if (!cache_subtable<TtfUtil::CmapSubtable4NextCodepoint, TtfUtil::CmapSubtable4Lookup>(m_blocks, bmp_cmap, 0xFFFF))
+ return;
+ }
+}
+
+CachedCmap::~CachedCmap() throw()
+{
+ if (!m_blocks) return;
+ unsigned int numBlocks = (m_isBmpOnly)? 0x100 : 0x1100;
+ for (unsigned int i = 0; i < numBlocks; i++)
+ free(m_blocks[i]);
+ free(m_blocks);
+}
+
+uint16 CachedCmap::operator [] (const uint32 usv) const throw()
+{
+ if ((m_isBmpOnly && usv > 0xFFFF) || (usv > 0x10FFFF))
+ return 0;
+ const uint32 block = 0xFFFF & (usv >> 8);
+ if (m_blocks[block])
+ return m_blocks[block][usv & 0xFF];
+ return 0;
+};
+
+CachedCmap::operator bool() const throw()
+{
+ return m_blocks != 0;
+}
+
+
+DirectCmap::DirectCmap(const Face & face)
+: _cmap(face, Tag::cmap),
+ _smp(smp_subtable(_cmap)),
+ _bmp(bmp_subtable(_cmap))
+{
+}
+
+uint16 DirectCmap::operator [] (const uint32 usv) const throw()
+{
+ return usv > 0xFFFF
+ ? (_smp ? TtfUtil::CmapSubtable12Lookup(_smp, usv, 0) : 0)
+ : TtfUtil::CmapSubtable4Lookup(_bmp, usv, 0);
+}
+
+DirectCmap::operator bool () const throw()
+{
+ return _cmap && _bmp;
+}
+
diff --git a/thirdparty/graphite/src/Code.cpp b/thirdparty/graphite/src/Code.cpp
new file mode 100644
index 0000000000..ec5ab298ca
--- /dev/null
+++ b/thirdparty/graphite/src/Code.cpp
@@ -0,0 +1,782 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+// This class represents loaded graphite stack machine code. It performs
+// basic sanity checks, on the incoming code to prevent more obvious problems
+// from crashing graphite.
+// Author: Tim Eves
+
+#include <cassert>
+#include <cstddef>
+#include <cstdlib>
+#include <cstring>
+#include "graphite2/Segment.h"
+#include "inc/Code.h"
+#include "inc/Face.h"
+#include "inc/GlyphFace.h"
+#include "inc/GlyphCache.h"
+#include "inc/Machine.h"
+#include "inc/Rule.h"
+#include "inc/Silf.h"
+
+#include <cstdio>
+
+#ifdef NDEBUG
+#ifdef __GNUC__
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#endif
+#endif
+
+
+using namespace graphite2;
+using namespace vm;
+
+namespace {
+
+inline bool is_return(const instr i) {
+ const opcode_t * opmap = Machine::getOpcodeTable();
+ const instr pop_ret = *opmap[POP_RET].impl,
+ ret_zero = *opmap[RET_ZERO].impl,
+ ret_true = *opmap[RET_TRUE].impl;
+ return i == pop_ret || i == ret_zero || i == ret_true;
+}
+
+struct context
+{
+ context(uint8 ref=0) : codeRef(ref) {flags.changed=false; flags.referenced=false;}
+ struct {
+ uint8 changed:1,
+ referenced:1;
+ } flags;
+ uint8 codeRef;
+};
+
+} // end namespace
+
+
+class Machine::Code::decoder
+{
+public:
+ struct limits;
+ static const int NUMCONTEXTS = 256;
+
+ decoder(limits & lims, Code &code, enum passtype pt) throw();
+
+ bool load(const byte * bc_begin, const byte * bc_end);
+ void apply_analysis(instr * const code, instr * code_end);
+ byte max_ref() { return _max_ref; }
+ int out_index() const { return _out_index; }
+
+private:
+ void set_ref(int index) throw();
+ void set_noref(int index) throw();
+ void set_changed(int index) throw();
+ opcode fetch_opcode(const byte * bc);
+ void analyse_opcode(const opcode, const int8 * const dp) throw();
+ bool emit_opcode(opcode opc, const byte * & bc);
+ bool validate_opcode(const byte opc, const byte * const bc);
+ bool valid_upto(const uint16 limit, const uint16 x) const throw();
+ bool test_context() const throw();
+ bool test_ref(int8 index) const throw();
+ bool test_attr(attrCode attr) const throw();
+ void failure(const status_t s) const throw() { _code.failure(s); }
+
+ Code & _code;
+ int _out_index;
+ uint16 _out_length;
+ instr * _instr;
+ byte * _data;
+ limits & _max;
+ enum passtype _passtype;
+ int _stack_depth;
+ bool _in_ctxt_item;
+ int16 _slotref;
+ context _contexts[NUMCONTEXTS];
+ byte _max_ref;
+};
+
+
+struct Machine::Code::decoder::limits
+{
+ const byte * bytecode;
+ const uint8 pre_context;
+ const uint16 rule_length,
+ classes,
+ glyf_attrs,
+ features;
+ const byte attrid[gr_slatMax];
+};
+
+inline Machine::Code::decoder::decoder(limits & lims, Code &code, enum passtype pt) throw()
+: _code(code),
+ _out_index(code._constraint ? 0 : lims.pre_context),
+ _out_length(code._constraint ? 1 : lims.rule_length),
+ _instr(code._code), _data(code._data), _max(lims), _passtype(pt),
+ _stack_depth(0),
+ _in_ctxt_item(false),
+ _slotref(0),
+ _max_ref(0)
+{ }
+
+
+
+Machine::Code::Code(bool is_constraint, const byte * bytecode_begin, const byte * const bytecode_end,
+ uint8 pre_context, uint16 rule_length, const Silf & silf, const Face & face,
+ enum passtype pt, byte * * const _out)
+ : _code(0), _data(0), _data_size(0), _instr_count(0), _max_ref(0), _status(loaded),
+ _constraint(is_constraint), _modify(false), _delete(false), _own(_out==0)
+{
+#ifdef GRAPHITE2_TELEMETRY
+ telemetry::category _code_cat(face.tele.code);
+#endif
+ assert(bytecode_begin != 0);
+ if (bytecode_begin == bytecode_end)
+ {
+ // ::new (this) Code();
+ return;
+ }
+ assert(bytecode_end > bytecode_begin);
+ const opcode_t * op_to_fn = Machine::getOpcodeTable();
+
+ // Allocate code and data target buffers, these sizes are a worst case
+ // estimate. Once we know their real sizes the we'll shrink them.
+ if (_out) _code = reinterpret_cast<instr *>(*_out);
+ else _code = static_cast<instr *>(malloc(estimateCodeDataOut(bytecode_end-bytecode_begin, 1, is_constraint ? 0 : rule_length)));
+ _data = reinterpret_cast<byte *>(_code + (bytecode_end - bytecode_begin));
+
+ if (!_code || !_data) {
+ failure(alloc_failed);
+ return;
+ }
+
+ decoder::limits lims = {
+ bytecode_end,
+ pre_context,
+ rule_length,
+ silf.numClasses(),
+ face.glyphs().numAttrs(),
+ face.numFeatures(),
+ {1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,255,
+ 1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,0,0,
+ 0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0, silf.numUser()}
+ };
+
+ decoder dec(lims, *this, pt);
+ if(!dec.load(bytecode_begin, bytecode_end))
+ return;
+
+ // Is this an empty program?
+ if (_instr_count == 0)
+ {
+ release_buffers();
+ ::new (this) Code();
+ return;
+ }
+
+ // When we reach the end check we've terminated it correctly
+ if (!is_return(_code[_instr_count-1])) {
+ failure(missing_return);
+ return;
+ }
+
+ assert((_constraint && immutable()) || !_constraint);
+ dec.apply_analysis(_code, _code + _instr_count);
+ _max_ref = dec.max_ref();
+
+ // Now we know exactly how much code and data the program really needs
+ // realloc the buffers to exactly the right size so we don't waste any
+ // memory.
+ assert((bytecode_end - bytecode_begin) >= ptrdiff_t(_instr_count));
+ assert((bytecode_end - bytecode_begin) >= ptrdiff_t(_data_size));
+ memmove(_code + (_instr_count+1), _data, _data_size*sizeof(byte));
+ size_t const total_sz = ((_instr_count+1) + (_data_size + sizeof(instr)-1)/sizeof(instr))*sizeof(instr);
+ if (_out)
+ *_out += total_sz;
+ else
+ {
+ instr * const old_code = _code;
+ _code = static_cast<instr *>(realloc(_code, total_sz));
+ if (!_code) free(old_code);
+ }
+ _data = reinterpret_cast<byte *>(_code + (_instr_count+1));
+
+ if (!_code)
+ {
+ failure(alloc_failed);
+ return;
+ }
+
+ // Make this RET_ZERO, we should never reach this but just in case ...
+ _code[_instr_count] = op_to_fn[RET_ZERO].impl[_constraint];
+
+#ifdef GRAPHITE2_TELEMETRY
+ telemetry::count_bytes(_data_size + (_instr_count+1)*sizeof(instr));
+#endif
+}
+
+Machine::Code::~Code() throw ()
+{
+ if (_own)
+ release_buffers();
+}
+
+
+bool Machine::Code::decoder::load(const byte * bc, const byte * bc_end)
+{
+ _max.bytecode = bc_end;
+ while (bc < bc_end)
+ {
+ const opcode opc = fetch_opcode(bc++);
+ if (opc == vm::MAX_OPCODE)
+ return false;
+
+ analyse_opcode(opc, reinterpret_cast<const int8 *>(bc));
+
+ if (!emit_opcode(opc, bc))
+ return false;
+ }
+
+ return bool(_code);
+}
+
+// Validation check and fixups.
+//
+
+opcode Machine::Code::decoder::fetch_opcode(const byte * bc)
+{
+ const byte opc = *bc++;
+
+ // Do some basic sanity checks based on what we know about the opcode
+ if (!validate_opcode(opc, bc)) return MAX_OPCODE;
+
+ // And check its arguments as far as possible
+ switch (opcode(opc))
+ {
+ case NOP :
+ break;
+ case PUSH_BYTE :
+ case PUSH_BYTEU :
+ case PUSH_SHORT :
+ case PUSH_SHORTU :
+ case PUSH_LONG :
+ ++_stack_depth;
+ break;
+ case ADD :
+ case SUB :
+ case MUL :
+ case DIV :
+ case MIN_ :
+ case MAX_ :
+ case AND :
+ case OR :
+ case EQUAL :
+ case NOT_EQ :
+ case LESS :
+ case GTR :
+ case LESS_EQ :
+ case GTR_EQ :
+ case BITOR :
+ case BITAND :
+ if (--_stack_depth <= 0)
+ failure(underfull_stack);
+ break;
+ case NEG :
+ case TRUNC8 :
+ case TRUNC16 :
+ case NOT :
+ case BITNOT :
+ case BITSET :
+ if (_stack_depth <= 0)
+ failure(underfull_stack);
+ break;
+ case COND :
+ _stack_depth -= 2;
+ if (_stack_depth <= 0)
+ failure(underfull_stack);
+ break;
+ case NEXT_N : // runtime checked
+ break;
+ case NEXT :
+ case COPY_NEXT :
+ ++_out_index;
+ if (_out_index < -1 || _out_index > _out_length || _slotref > _max.rule_length)
+ failure(out_of_range_data);
+ break;
+ case PUT_GLYPH_8BIT_OBS :
+ valid_upto(_max.classes, bc[0]);
+ test_context();
+ break;
+ case PUT_SUBS_8BIT_OBS :
+ test_ref(int8(bc[0]));
+ valid_upto(_max.classes, bc[1]);
+ valid_upto(_max.classes, bc[2]);
+ test_context();
+ break;
+ case PUT_COPY :
+ test_ref(int8(bc[0]));
+ test_context();
+ break;
+ case INSERT :
+ if (_passtype >= PASS_TYPE_POSITIONING)
+ failure(invalid_opcode);
+ ++_out_length;
+ if (_out_index < 0) ++_out_index;
+ if (_out_index < -1 || _out_index >= _out_length)
+ failure(out_of_range_data);
+ break;
+ case DELETE :
+ if (_passtype >= PASS_TYPE_POSITIONING)
+ failure(invalid_opcode);
+ if (_out_index < _max.pre_context)
+ failure(out_of_range_data);
+ --_out_index;
+ --_out_length;
+ if (_out_index < -1 || _out_index > _out_length)
+ failure(out_of_range_data);
+ break;
+ case ASSOC :
+ if (bc[0] == 0)
+ failure(out_of_range_data);
+ for (uint8 num = bc[0]; num; --num)
+ test_ref(int8(bc[num]));
+ test_context();
+ break;
+ case CNTXT_ITEM :
+ valid_upto(_max.rule_length, _max.pre_context + int8(bc[0]));
+ if (bc + 2 + bc[1] >= _max.bytecode) failure(jump_past_end);
+ if (_in_ctxt_item) failure(nested_context_item);
+ break;
+ case ATTR_SET :
+ case ATTR_ADD :
+ case ATTR_SUB :
+ case ATTR_SET_SLOT :
+ if (--_stack_depth < 0)
+ failure(underfull_stack);
+ valid_upto(gr_slatMax, bc[0]);
+ if (attrCode(bc[0]) == gr_slatUserDefn) // use IATTR for user attributes
+ failure(out_of_range_data);
+ test_attr(attrCode(bc[0]));
+ test_context();
+ break;
+ case IATTR_SET_SLOT :
+ if (--_stack_depth < 0)
+ failure(underfull_stack);
+ if (valid_upto(gr_slatMax, bc[0]))
+ valid_upto(_max.attrid[bc[0]], bc[1]);
+ test_attr(attrCode(bc[0]));
+ test_context();
+ break;
+ case PUSH_SLOT_ATTR :
+ ++_stack_depth;
+ valid_upto(gr_slatMax, bc[0]);
+ test_ref(int8(bc[1]));
+ if (attrCode(bc[0]) == gr_slatUserDefn) // use IATTR for user attributes
+ failure(out_of_range_data);
+ test_attr(attrCode(bc[0]));
+ break;
+ case PUSH_GLYPH_ATTR_OBS :
+ case PUSH_ATT_TO_GATTR_OBS :
+ ++_stack_depth;
+ valid_upto(_max.glyf_attrs, bc[0]);
+ test_ref(int8(bc[1]));
+ break;
+ case PUSH_ATT_TO_GLYPH_METRIC :
+ case PUSH_GLYPH_METRIC :
+ ++_stack_depth;
+ valid_upto(kgmetDescent, bc[0]);
+ test_ref(int8(bc[1]));
+ // level: dp[2] no check necessary
+ break;
+ case PUSH_FEAT :
+ ++_stack_depth;
+ valid_upto(_max.features, bc[0]);
+ test_ref(int8(bc[1]));
+ break;
+ case PUSH_ISLOT_ATTR :
+ ++_stack_depth;
+ if (valid_upto(gr_slatMax, bc[0]))
+ {
+ test_ref(int8(bc[1]));
+ valid_upto(_max.attrid[bc[0]], bc[2]);
+ }
+ test_attr(attrCode(bc[0]));
+ break;
+ case PUSH_IGLYPH_ATTR :// not implemented
+ ++_stack_depth;
+ break;
+ case POP_RET :
+ if (--_stack_depth < 0)
+ failure(underfull_stack);
+ GR_FALLTHROUGH;
+ // no break
+ case RET_ZERO :
+ case RET_TRUE :
+ break;
+ case IATTR_SET :
+ case IATTR_ADD :
+ case IATTR_SUB :
+ if (--_stack_depth < 0)
+ failure(underfull_stack);
+ if (valid_upto(gr_slatMax, bc[0]))
+ valid_upto(_max.attrid[bc[0]], bc[1]);
+ test_attr(attrCode(bc[0]));
+ test_context();
+ break;
+ case PUSH_PROC_STATE : // dummy: dp[0] no check necessary
+ case PUSH_VERSION :
+ ++_stack_depth;
+ break;
+ case PUT_SUBS :
+ test_ref(int8(bc[0]));
+ valid_upto(_max.classes, uint16(bc[1]<< 8) | bc[2]);
+ valid_upto(_max.classes, uint16(bc[3]<< 8) | bc[4]);
+ test_context();
+ break;
+ case PUT_SUBS2 : // not implemented
+ case PUT_SUBS3 : // not implemented
+ break;
+ case PUT_GLYPH :
+ valid_upto(_max.classes, uint16(bc[0]<< 8) | bc[1]);
+ test_context();
+ break;
+ case PUSH_GLYPH_ATTR :
+ case PUSH_ATT_TO_GLYPH_ATTR :
+ ++_stack_depth;
+ valid_upto(_max.glyf_attrs, uint16(bc[0]<< 8) | bc[1]);
+ test_ref(int8(bc[2]));
+ break;
+ case SET_FEAT :
+ valid_upto(_max.features, bc[0]);
+ test_ref(int8(bc[1]));
+ break;
+ default:
+ failure(invalid_opcode);
+ break;
+ }
+
+ return bool(_code) ? opcode(opc) : MAX_OPCODE;
+}
+
+
+void Machine::Code::decoder::analyse_opcode(const opcode opc, const int8 * arg) throw()
+{
+ switch (opc)
+ {
+ case DELETE :
+ _code._delete = true;
+ break;
+ case ASSOC :
+ set_changed(0);
+// for (uint8 num = arg[0]; num; --num)
+// _analysis.set_noref(num);
+ break;
+ case PUT_GLYPH_8BIT_OBS :
+ case PUT_GLYPH :
+ _code._modify = true;
+ set_changed(0);
+ break;
+ case ATTR_SET :
+ case ATTR_ADD :
+ case ATTR_SUB :
+ case ATTR_SET_SLOT :
+ case IATTR_SET_SLOT :
+ case IATTR_SET :
+ case IATTR_ADD :
+ case IATTR_SUB :
+ set_noref(0);
+ break;
+ case NEXT :
+ case COPY_NEXT :
+ ++_slotref;
+ _contexts[_slotref] = context(uint8(_code._instr_count+1));
+ // if (_analysis.slotref > _analysis.max_ref) _analysis.max_ref = _analysis.slotref;
+ break;
+ case INSERT :
+ if (_slotref >= 0) --_slotref;
+ _code._modify = true;
+ break;
+ case PUT_SUBS_8BIT_OBS : // slotref on 1st parameter
+ case PUT_SUBS :
+ _code._modify = true;
+ set_changed(0);
+ GR_FALLTHROUGH;
+ // no break
+ case PUT_COPY :
+ if (arg[0] != 0) { set_changed(0); _code._modify = true; }
+ set_ref(arg[0]);
+ break;
+ case PUSH_GLYPH_ATTR_OBS :
+ case PUSH_SLOT_ATTR :
+ case PUSH_GLYPH_METRIC :
+ case PUSH_ATT_TO_GATTR_OBS :
+ case PUSH_ATT_TO_GLYPH_METRIC :
+ case PUSH_ISLOT_ATTR :
+ case PUSH_FEAT :
+ case SET_FEAT :
+ set_ref(arg[1]);
+ break;
+ case PUSH_ATT_TO_GLYPH_ATTR :
+ case PUSH_GLYPH_ATTR :
+ set_ref(arg[2]);
+ break;
+ default:
+ break;
+ }
+}
+
+
+bool Machine::Code::decoder::emit_opcode(opcode opc, const byte * & bc)
+{
+ const opcode_t * op_to_fn = Machine::getOpcodeTable();
+ const opcode_t & op = op_to_fn[opc];
+ if (op.impl[_code._constraint] == 0)
+ {
+ failure(unimplemented_opcode_used);
+ return false;
+ }
+
+ const size_t param_sz = op.param_sz == VARARGS ? bc[0] + 1 : op.param_sz;
+
+ // Add this instruction
+ *_instr++ = op.impl[_code._constraint];
+ ++_code._instr_count;
+
+ // Grab the parameters
+ if (param_sz) {
+ memcpy(_data, bc, param_sz * sizeof(byte));
+ bc += param_sz;
+ _data += param_sz;
+ _code._data_size += param_sz;
+ }
+
+ // recursively decode a context item so we can split the skip into
+ // instruction and data portions.
+ if (opc == CNTXT_ITEM)
+ {
+ assert(_out_index == 0);
+ _in_ctxt_item = true;
+ _out_index = _max.pre_context + int8(_data[-2]);
+ _slotref = int8(_data[-2]);
+ _out_length = _max.rule_length;
+
+ const size_t ctxt_start = _code._instr_count;
+ byte & instr_skip = _data[-1];
+ byte & data_skip = *_data++;
+ ++_code._data_size;
+ const byte *curr_end = _max.bytecode;
+
+ if (load(bc, bc + instr_skip))
+ {
+ bc += instr_skip;
+ data_skip = instr_skip - byte(_code._instr_count - ctxt_start);
+ instr_skip = byte(_code._instr_count - ctxt_start);
+ _max.bytecode = curr_end;
+
+ _out_length = 1;
+ _out_index = 0;
+ _slotref = 0;
+ _in_ctxt_item = false;
+ }
+ else
+ {
+ _out_index = 0;
+ _slotref = 0;
+ return false;
+ }
+ }
+
+ return bool(_code);
+}
+
+
+void Machine::Code::decoder::apply_analysis(instr * const code, instr * code_end)
+{
+ // insert TEMP_COPY commands for slots that need them (that change and are referenced later)
+ int tempcount = 0;
+ if (_code._constraint) return;
+
+ const instr temp_copy = Machine::getOpcodeTable()[TEMP_COPY].impl[0];
+ for (const context * c = _contexts, * const ce = c + _slotref; c < ce; ++c)
+ {
+ if (!c->flags.referenced || !c->flags.changed) continue;
+
+ instr * const tip = code + c->codeRef + tempcount;
+ memmove(tip+1, tip, (code_end - tip) * sizeof(instr));
+ *tip = temp_copy;
+ ++code_end;
+ ++tempcount;
+ _code._delete = true;
+ }
+
+ _code._instr_count = code_end - code;
+}
+
+
+inline
+bool Machine::Code::decoder::validate_opcode(const byte opc, const byte * const bc)
+{
+ if (opc >= MAX_OPCODE)
+ {
+ failure(invalid_opcode);
+ return false;
+ }
+ const opcode_t & op = Machine::getOpcodeTable()[opc];
+ if (op.impl[_code._constraint] == 0)
+ {
+ failure(unimplemented_opcode_used);
+ return false;
+ }
+ if (op.param_sz == VARARGS && bc >= _max.bytecode)
+ {
+ failure(arguments_exhausted);
+ return false;
+ }
+ const size_t param_sz = op.param_sz == VARARGS ? bc[0] + 1 : op.param_sz;
+ if (bc - 1 + param_sz >= _max.bytecode)
+ {
+ failure(arguments_exhausted);
+ return false;
+ }
+ return true;
+}
+
+
+bool Machine::Code::decoder::valid_upto(const uint16 limit, const uint16 x) const throw()
+{
+ const bool t = (limit != 0) && (x < limit);
+ if (!t) failure(out_of_range_data);
+ return t;
+}
+
+inline
+bool Machine::Code::decoder::test_ref(int8 index) const throw()
+{
+ if (_code._constraint && !_in_ctxt_item)
+ {
+ if (index > 0 || -index > _max.pre_context)
+ {
+ failure(out_of_range_data);
+ return false;
+ }
+ }
+ else
+ {
+ if (_max.rule_length == 0
+ || (_slotref + _max.pre_context + index >= _max.rule_length)
+ || (_slotref + _max.pre_context + index < 0))
+ {
+ failure(out_of_range_data);
+ return false;
+ }
+ }
+ return true;
+}
+
+bool Machine::Code::decoder::test_context() const throw()
+{
+ if (_out_index >= _out_length || _out_index < 0 || _slotref >= NUMCONTEXTS - 1)
+ {
+ failure(out_of_range_data);
+ return false;
+ }
+ return true;
+}
+
+bool Machine::Code::decoder::test_attr(attrCode) const throw()
+{
+#if 0 // This code is coming but causes backward compatibility problems.
+ if (_passtype < PASS_TYPE_POSITIONING)
+ {
+ if (attr != gr_slatBreak && attr != gr_slatDir && attr != gr_slatUserDefn
+ && attr != gr_slatCompRef)
+ {
+ failure(out_of_range_data);
+ return false;
+ }
+ }
+#endif
+ return true;
+}
+
+inline
+void Machine::Code::failure(const status_t s) throw() {
+ release_buffers();
+ _status = s;
+}
+
+
+inline
+void Machine::Code::decoder::set_ref(int index) throw() {
+ if (index + _slotref < 0 || index + _slotref >= NUMCONTEXTS) return;
+ _contexts[index + _slotref].flags.referenced = true;
+ if (index + _slotref > _max_ref) _max_ref = index + _slotref;
+}
+
+
+inline
+void Machine::Code::decoder::set_noref(int index) throw() {
+ if (index + _slotref < 0 || index + _slotref >= NUMCONTEXTS) return;
+ if (index + _slotref > _max_ref) _max_ref = index + _slotref;
+}
+
+
+inline
+void Machine::Code::decoder::set_changed(int index) throw() {
+ if (index + _slotref < 0 || index + _slotref >= NUMCONTEXTS) return;
+ _contexts[index + _slotref].flags.changed= true;
+ if (index + _slotref > _max_ref) _max_ref = index + _slotref;
+}
+
+
+void Machine::Code::release_buffers() throw()
+{
+ if (_own)
+ free(_code);
+ _code = 0;
+ _data = 0;
+ _own = false;
+}
+
+
+int32 Machine::Code::run(Machine & m, slotref * & map) const
+{
+// assert(_own);
+ assert(*this); // Check we are actually runnable
+
+ if (m.slotMap().size() <= size_t(_max_ref + m.slotMap().context())
+ || m.slotMap()[_max_ref + m.slotMap().context()] == 0)
+ {
+ m._status = Machine::slot_offset_out_bounds;
+ return 1;
+// return m.run(_code, _data, map);
+ }
+
+ return m.run(_code, _data, map);
+}
diff --git a/thirdparty/graphite/src/Collider.cpp b/thirdparty/graphite/src/Collider.cpp
new file mode 100644
index 0000000000..1929b39a58
--- /dev/null
+++ b/thirdparty/graphite/src/Collider.cpp
@@ -0,0 +1,1115 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#include <algorithm>
+#include <limits>
+#include <cmath>
+#include <string>
+#include <functional>
+#include "inc/Collider.h"
+#include "inc/Segment.h"
+#include "inc/Slot.h"
+#include "inc/GlyphCache.h"
+#include "inc/Sparse.h"
+
+#define ISQRT2 0.707106781f
+
+// Possible rounding error for subbox boundaries: 0.016 = 1/64 = 1/256 * 4
+// (values in font range from 0..256)
+// #define SUBBOX_RND_ERR 0.016
+
+using namespace graphite2;
+
+//// SHIFT-COLLIDER ////
+
+// Initialize the Collider to hold the basic movement limits for the
+// target slot, the one we are focusing on fixing.
+bool ShiftCollider::initSlot(Segment *seg, Slot *aSlot, const Rect &limit, float margin, float marginWeight,
+ const Position &currShift, const Position &currOffset, int dir, GR_MAYBE_UNUSED json * const dbgout)
+{
+ int i;
+ float mx, mn;
+ float a, shift;
+ const GlyphCache &gc = seg->getFace()->glyphs();
+ unsigned short gid = aSlot->gid();
+ if (!gc.check(gid))
+ return false;
+ const BBox &bb = gc.getBoundingBBox(gid);
+ const SlantBox &sb = gc.getBoundingSlantBox(gid);
+ //float sx = aSlot->origin().x + currShift.x;
+ //float sy = aSlot->origin().y + currShift.y;
+ if (currOffset.x != 0.f || currOffset.y != 0.f)
+ _limit = Rect(limit.bl - currOffset, limit.tr - currOffset);
+ else
+ _limit = limit;
+ // For a ShiftCollider, these indices indicate which vector we are moving by:
+ // each _ranges represents absolute space with respect to the origin of the slot. Thus take into account true origins but subtract the vmin for the slot
+ for (i = 0; i < 4; ++i)
+ {
+ switch (i) {
+ case 0 : // x direction
+ mn = _limit.bl.x + currOffset.x;
+ mx = _limit.tr.x + currOffset.x;
+ _len[i] = bb.xa - bb.xi;
+ a = currOffset.y + currShift.y;
+ _ranges[i].initialise<XY>(mn, mx, margin, marginWeight, a);
+ break;
+ case 1 : // y direction
+ mn = _limit.bl.y + currOffset.y;
+ mx = _limit.tr.y + currOffset.y;
+ _len[i] = bb.ya - bb.yi;
+ a = currOffset.x + currShift.x;
+ _ranges[i].initialise<XY>(mn, mx, margin, marginWeight, a);
+ break;
+ case 2 : // sum (negatively sloped diagonal boundaries)
+ // pick closest x,y limit boundaries in s direction
+ shift = currOffset.x + currOffset.y + currShift.x + currShift.y;
+ mn = -2 * min(currShift.x - _limit.bl.x, currShift.y - _limit.bl.y) + shift;
+ mx = 2 * min(_limit.tr.x - currShift.x, _limit.tr.y - currShift.y) + shift;
+ _len[i] = sb.sa - sb.si;
+ a = currOffset.x - currOffset.y + currShift.x - currShift.y;
+ _ranges[i].initialise<SD>(mn, mx, margin / ISQRT2, marginWeight, a);
+ break;
+ case 3 : // diff (positively sloped diagonal boundaries)
+ // pick closest x,y limit boundaries in d direction
+ shift = currOffset.x - currOffset.y + currShift.x - currShift.y;
+ mn = -2 * min(currShift.x - _limit.bl.x, _limit.tr.y - currShift.y) + shift;
+ mx = 2 * min(_limit.tr.x - currShift.x, currShift.y - _limit.bl.y) + shift;
+ _len[i] = sb.da - sb.di;
+ a = currOffset.x + currOffset.y + currShift.x + currShift.y;
+ _ranges[i].initialise<SD>(mn, mx, margin / ISQRT2, marginWeight, a);
+ break;
+ }
+ }
+
+ _target = aSlot;
+ if ((dir & 1) == 0)
+ {
+ // For LTR, switch and negate x limits.
+ _limit.bl.x = -1 * limit.tr.x;
+ //_limit.tr.x = -1 * limit.bl.x;
+ }
+ _currOffset = currOffset;
+ _currShift = currShift;
+ _origin = aSlot->origin() - currOffset; // the original anchor position of the glyph
+
+ _margin = margin;
+ _marginWt = marginWeight;
+
+ SlotCollision *c = seg->collisionInfo(aSlot);
+ _seqClass = c->seqClass();
+ _seqProxClass = c->seqProxClass();
+ _seqOrder = c->seqOrder();
+ return true;
+}
+
+template <class O>
+float sdm(float vi, float va, float mx, float my, O op)
+{
+ float res = 2 * mx - vi;
+ if (op(res, vi + 2 * my))
+ {
+ res = va + 2 * my;
+ if (op(res, 2 * mx - va))
+ res = mx + my;
+ }
+ return res;
+}
+
+// Mark an area with a cost that can vary along the x or y axis. The region is expressed in terms of the centre of the target glyph in each axis
+void ShiftCollider::addBox_slope(bool isx, const Rect &box, const BBox &bb, const SlantBox &sb, const Position &org, float weight, float m, bool minright, int axis)
+{
+ float a, c;
+ switch (axis) {
+ case 0 :
+ if (box.bl.y < org.y + bb.ya && box.tr.y > org.y + bb.yi && box.width() > 0)
+ {
+ a = org.y + 0.5f * (bb.yi + bb.ya);
+ c = 0.5f * (bb.xi + bb.xa);
+ if (isx)
+ _ranges[axis].weighted<XY>(box.bl.x - c, box.tr.x - c, weight, a, m,
+ (minright ? box.tr.x : box.bl.x) - c, a, 0, false);
+ else
+ _ranges[axis].weighted<XY>(box.bl.x - c, box.tr.x - c, weight, a, 0, 0, org.y,
+ m * (a * a + sqr((minright ? box.tr.y : box.bl.y) - 0.5f * (bb.yi + bb.ya))), false);
+ }
+ break;
+ case 1 :
+ if (box.bl.x < org.x + bb.xa && box.tr.x > org.x + bb.xi && box.height() > 0)
+ {
+ a = org.x + 0.5f * (bb.xi + bb.xa);
+ c = 0.5f * (bb.yi + bb.ya);
+ if (isx)
+ _ranges[axis].weighted<XY>(box.bl.y - c, box.tr.y - c, weight, a, 0, 0, org.x,
+ m * (a * a + sqr((minright ? box.tr.x : box.bl.x) - 0.5f * (bb.xi + bb.xa))), false);
+ else
+ _ranges[axis].weighted<XY>(box.bl.y - c, box.tr.y - c, weight, a, m,
+ (minright ? box.tr.y : box.bl.y) - c, a, 0, false);
+ }
+ break;
+ case 2 :
+ if (box.bl.x - box.tr.y < org.x - org.y + sb.da && box.tr.x - box.bl.y > org.x - org.y + sb.di)
+ {
+ float d = org.x - org.y + 0.5f * (sb.di + sb.da);
+ c = 0.5f * (sb.si + sb.sa);
+ float smax = min(2 * box.tr.x - d, 2 * box.tr.y + d);
+ float smin = max(2 * box.bl.x - d, 2 * box.bl.y + d);
+ if (smin > smax) return;
+ float si;
+ a = d;
+ if (isx)
+ si = 2 * (minright ? box.tr.x : box.bl.x) - a;
+ else
+ si = 2 * (minright ? box.tr.y : box.bl.y) + a;
+ _ranges[axis].weighted<SD>(smin - c, smax - c, weight / 2, a, m / 2, si, 0, 0, isx);
+ }
+ break;
+ case 3 :
+ if (box.bl.x + box.bl.y < org.x + org.y + sb.sa && box.tr.x + box.tr.y > org.x + org.y + sb.si)
+ {
+ float s = org.x + org.y + 0.5f * (sb.si + sb.sa);
+ c = 0.5f * (sb.di + sb.da);
+ float dmax = min(2 * box.tr.x - s, s - 2 * box.bl.y);
+ float dmin = max(2 * box.bl.x - s, s - 2 * box.tr.y);
+ if (dmin > dmax) return;
+ float di;
+ a = s;
+ if (isx)
+ di = 2 * (minright ? box.tr.x : box.bl.x) - a;
+ else
+ di = 2 * (minright ? box.tr.y : box.bl.y) + a;
+ _ranges[axis].weighted<SD>(dmin - c, dmax - c, weight / 2, a, m / 2, di, 0, 0, !isx);
+ }
+ break;
+ default :
+ break;
+ }
+ return;
+}
+
+// Mark an area with an absolute cost, making it completely inaccessible.
+inline void ShiftCollider::removeBox(const Rect &box, const BBox &bb, const SlantBox &sb, const Position &org, int axis)
+{
+ float c;
+ switch (axis) {
+ case 0 :
+ if (box.bl.y < org.y + bb.ya && box.tr.y > org.y + bb.yi && box.width() > 0)
+ {
+ c = 0.5f * (bb.xi + bb.xa);
+ _ranges[axis].exclude(box.bl.x - c, box.tr.x - c);
+ }
+ break;
+ case 1 :
+ if (box.bl.x < org.x + bb.xa && box.tr.x > org.x + bb.xi && box.height() > 0)
+ {
+ c = 0.5f * (bb.yi + bb.ya);
+ _ranges[axis].exclude(box.bl.y - c, box.tr.y - c);
+ }
+ break;
+ case 2 :
+ if (box.bl.x - box.tr.y < org.x - org.y + sb.da && box.tr.x - box.bl.y > org.x - org.y + sb.di
+ && box.width() > 0 && box.height() > 0)
+ {
+ float di = org.x - org.y + sb.di;
+ float da = org.x - org.y + sb.da;
+ float smax = sdm(di, da, box.tr.x, box.tr.y, std::greater<float>());
+ float smin = sdm(da, di, box.bl.x, box.bl.y, std::less<float>());
+ c = 0.5f * (sb.si + sb.sa);
+ _ranges[axis].exclude(smin - c, smax - c);
+ }
+ break;
+ case 3 :
+ if (box.bl.x + box.bl.y < org.x + org.y + sb.sa && box.tr.x + box.tr.y > org.x + org.y + sb.si
+ && box.width() > 0 && box.height() > 0)
+ {
+ float si = org.x + org.y + sb.si;
+ float sa = org.x + org.y + sb.sa;
+ float dmax = sdm(si, sa, box.tr.x, -box.bl.y, std::greater<float>());
+ float dmin = sdm(sa, si, box.bl.x, -box.tr.y, std::less<float>());
+ c = 0.5f * (sb.di + sb.da);
+ _ranges[axis].exclude(dmin - c, dmax - c);
+ }
+ break;
+ default :
+ break;
+ }
+ return;
+}
+
+// Adjust the movement limits for the target to avoid having it collide
+// with the given neighbor slot. Also determine if there is in fact a collision
+// between the target and the given slot.
+bool ShiftCollider::mergeSlot(Segment *seg, Slot *slot, const SlotCollision *cslot, const Position &currShift,
+ bool isAfter, // slot is logically after _target
+ bool sameCluster, bool &hasCol, bool isExclusion,
+ GR_MAYBE_UNUSED json * const dbgout )
+{
+ bool isCol = false;
+ const float sx = slot->origin().x - _origin.x + currShift.x;
+ const float sy = slot->origin().y - _origin.y + currShift.y;
+ const float sd = sx - sy;
+ const float ss = sx + sy;
+ float vmin, vmax;
+ float omin, omax, otmin, otmax;
+ float cmin, cmax; // target limits
+ float torg;
+ const GlyphCache &gc = seg->getFace()->glyphs();
+ const unsigned short gid = slot->gid();
+ if (!gc.check(gid))
+ return false;
+ const BBox &bb = gc.getBoundingBBox(gid);
+
+ // SlotCollision * cslot = seg->collisionInfo(slot);
+ int orderFlags = 0;
+ bool sameClass = _seqProxClass == 0 && cslot->seqClass() == _seqClass;
+ if (sameCluster && _seqClass
+ && (sameClass || (_seqProxClass != 0 && cslot->seqClass() == _seqProxClass)))
+ // Force the target glyph to be in the specified direction from the slot we're testing.
+ orderFlags = _seqOrder;
+
+ // short circuit if only interested in direct collision and we are out of range
+ if (orderFlags || (sx + bb.xa + _margin >= _limit.bl.x && sx + bb.xi - _margin <= _limit.tr.x)
+ || (sy + bb.ya + _margin >= _limit.bl.y && sy + bb.yi - _margin <= _limit.tr.y))
+
+ {
+ const float tx = _currOffset.x + _currShift.x;
+ const float ty = _currOffset.y + _currShift.y;
+ const float td = tx - ty;
+ const float ts = tx + ty;
+ const SlantBox &sb = gc.getBoundingSlantBox(gid);
+ const unsigned short tgid = _target->gid();
+ const BBox &tbb = gc.getBoundingBBox(tgid);
+ const SlantBox &tsb = gc.getBoundingSlantBox(tgid);
+ float seq_above_wt = cslot->seqAboveWt();
+ float seq_below_wt = cslot->seqBelowWt();
+ float seq_valign_wt = cslot->seqValignWt();
+ float lmargin;
+ // if isAfter, invert orderFlags for diagonal orders.
+ if (isAfter)
+ {
+ // invert appropriate bits
+ orderFlags ^= (sameClass ? 0x3F : 0x3);
+ // consider 2 bits at a time, non overlapping. If both bits set, clear them
+ orderFlags = orderFlags ^ ((((orderFlags >> 1) & orderFlags) & 0x15) * 3);
+ }
+
+#if !defined GRAPHITE2_NTRACING
+ if (dbgout)
+ dbgout->setenv(0, slot);
+#endif
+
+ // Process main bounding octabox.
+ for (int i = 0; i < 4; ++i)
+ {
+ switch (i) {
+ case 0 : // x direction
+ vmin = max(max(bb.xi - tbb.xa + sx, sb.di - tsb.da + ty + sd), sb.si - tsb.sa - ty + ss);
+ vmax = min(min(bb.xa - tbb.xi + sx, sb.da - tsb.di + ty + sd), sb.sa - tsb.si - ty + ss);
+ otmin = tbb.yi + ty;
+ otmax = tbb.ya + ty;
+ omin = bb.yi + sy;
+ omax = bb.ya + sy;
+ torg = _currOffset.x;
+ cmin = _limit.bl.x + torg;
+ cmax = _limit.tr.x - tbb.xi + tbb.xa + torg;
+ lmargin = _margin;
+ break;
+ case 1 : // y direction
+ vmin = max(max(bb.yi - tbb.ya + sy, tsb.di - sb.da + tx - sd), sb.si - tsb.sa - tx + ss);
+ vmax = min(min(bb.ya - tbb.yi + sy, tsb.da - sb.di + tx - sd), sb.sa - tsb.si - tx + ss);
+ otmin = tbb.xi + tx;
+ otmax = tbb.xa + tx;
+ omin = bb.xi + sx;
+ omax = bb.xa + sx;
+ torg = _currOffset.y;
+ cmin = _limit.bl.y + torg;
+ cmax = _limit.tr.y - tbb.yi + tbb.ya + torg;
+ lmargin = _margin;
+ break;
+ case 2 : // sum - moving along the positively-sloped vector, so the boundaries are the
+ // negatively-sloped boundaries.
+ vmin = max(max(sb.si - tsb.sa + ss, 2 * (bb.yi - tbb.ya + sy) + td), 2 * (bb.xi - tbb.xa + sx) - td);
+ vmax = min(min(sb.sa - tsb.si + ss, 2 * (bb.ya - tbb.yi + sy) + td), 2 * (bb.xa - tbb.xi + sx) - td);
+ otmin = tsb.di + td;
+ otmax = tsb.da + td;
+ omin = sb.di + sd;
+ omax = sb.da + sd;
+ torg = _currOffset.x + _currOffset.y;
+ cmin = _limit.bl.x + _limit.bl.y + torg;
+ cmax = _limit.tr.x + _limit.tr.y - tsb.si + tsb.sa + torg;
+ lmargin = _margin / ISQRT2;
+ break;
+ case 3 : // diff - moving along the negatively-sloped vector, so the boundaries are the
+ // positively-sloped boundaries.
+ vmin = max(max(sb.di - tsb.da + sd, 2 * (bb.xi - tbb.xa + sx) - ts), -2 * (bb.ya - tbb.yi + sy) + ts);
+ vmax = min(min(sb.da - tsb.di + sd, 2 * (bb.xa - tbb.xi + sx) - ts), -2 * (bb.yi - tbb.ya + sy) + ts);
+ otmin = tsb.si + ts;
+ otmax = tsb.sa + ts;
+ omin = sb.si + ss;
+ omax = sb.sa + ss;
+ torg = _currOffset.x - _currOffset.y;
+ cmin = _limit.bl.x - _limit.tr.y + torg;
+ cmax = _limit.tr.x - _limit.bl.y - tsb.di + tsb.da + torg;
+ lmargin = _margin / ISQRT2;
+ break;
+ default :
+ continue;
+ }
+
+#if !defined GRAPHITE2_NTRACING
+ if (dbgout)
+ dbgout->setenv(1, reinterpret_cast<void *>(-1));
+#define DBGTAG(x) if (dbgout) dbgout->setenv(1, reinterpret_cast<void *>(-x));
+#else
+#define DBGTAG(x)
+#endif
+
+ if (orderFlags)
+ {
+ Position org(tx, ty);
+ float xminf = _limit.bl.x + _currOffset.x + tbb.xi;
+ float xpinf = _limit.tr.x + _currOffset.x + tbb.xa;
+ float ypinf = _limit.tr.y + _currOffset.y + tbb.ya;
+ float yminf = _limit.bl.y + _currOffset.y + tbb.yi;
+ switch (orderFlags) {
+ case SlotCollision::SEQ_ORDER_RIGHTUP :
+ {
+ float r1Xedge = cslot->seqAboveXoff() + 0.5f * (bb.xi + bb.xa) + sx;
+ float r3Xedge = cslot->seqBelowXlim() + bb.xa + sx + 0.5f * (tbb.xa - tbb.xi);
+ float r2Yedge = 0.5f * (bb.yi + bb.ya) + sy;
+
+ // DBGTAG(1x) means the regions are up and right
+ // region 1
+ DBGTAG(11)
+ addBox_slope(true, Rect(Position(xminf, r2Yedge), Position(r1Xedge, ypinf)),
+ tbb, tsb, org, 0, seq_above_wt, true, i);
+ // region 2
+ DBGTAG(12)
+ removeBox(Rect(Position(xminf, yminf), Position(r3Xedge, r2Yedge)), tbb, tsb, org, i);
+ // region 3, which end is zero is irrelevant since m weight is 0
+ DBGTAG(13)
+ addBox_slope(true, Rect(Position(r3Xedge, yminf), Position(xpinf, r2Yedge - cslot->seqValignHt())),
+ tbb, tsb, org, seq_below_wt, 0, true, i);
+ // region 4
+ DBGTAG(14)
+ addBox_slope(false, Rect(Position(sx + bb.xi, r2Yedge), Position(xpinf, r2Yedge + cslot->seqValignHt())),
+ tbb, tsb, org, 0, seq_valign_wt, true, i);
+ // region 5
+ DBGTAG(15)
+ addBox_slope(false, Rect(Position(sx + bb.xi, r2Yedge - cslot->seqValignHt()), Position(xpinf, r2Yedge)),
+ tbb, tsb, org, seq_below_wt, seq_valign_wt, false, i);
+ break;
+ }
+ case SlotCollision::SEQ_ORDER_LEFTDOWN :
+ {
+ float r1Xedge = 0.5f * (bb.xi + bb.xa) + cslot->seqAboveXoff() + sx;
+ float r3Xedge = bb.xi - cslot->seqBelowXlim() + sx - 0.5f * (tbb.xa - tbb.xi);
+ float r2Yedge = 0.5f * (bb.yi + bb.ya) + sy;
+ // DBGTAG(2x) means the regions are up and right
+ // region 1
+ DBGTAG(21)
+ addBox_slope(true, Rect(Position(r1Xedge, yminf), Position(xpinf, r2Yedge)),
+ tbb, tsb, org, 0, seq_above_wt, false, i);
+ // region 2
+ DBGTAG(22)
+ removeBox(Rect(Position(r3Xedge, r2Yedge), Position(xpinf, ypinf)), tbb, tsb, org, i);
+ // region 3
+ DBGTAG(23)
+ addBox_slope(true, Rect(Position(xminf, r2Yedge - cslot->seqValignHt()), Position(r3Xedge, ypinf)),
+ tbb, tsb, org, seq_below_wt, 0, false, i);
+ // region 4
+ DBGTAG(24)
+ addBox_slope(false, Rect(Position(xminf, r2Yedge), Position(sx + bb.xa, r2Yedge + cslot->seqValignHt())),
+ tbb, tsb, org, 0, seq_valign_wt, true, i);
+ // region 5
+ DBGTAG(25)
+ addBox_slope(false, Rect(Position(xminf, r2Yedge - cslot->seqValignHt()),
+ Position(sx + bb.xa, r2Yedge)), tbb, tsb, org, seq_below_wt, seq_valign_wt, false, i);
+ break;
+ }
+ case SlotCollision::SEQ_ORDER_NOABOVE : // enforce neighboring glyph being above
+ DBGTAG(31);
+ removeBox(Rect(Position(bb.xi - tbb.xa + sx, sy + bb.ya),
+ Position(bb.xa - tbb.xi + sx, ypinf)), tbb, tsb, org, i);
+ break;
+ case SlotCollision::SEQ_ORDER_NOBELOW : // enforce neighboring glyph being below
+ DBGTAG(32);
+ removeBox(Rect(Position(bb.xi - tbb.xa + sx, yminf),
+ Position(bb.xa - tbb.xi + sx, sy + bb.yi)), tbb, tsb, org, i);
+ break;
+ case SlotCollision::SEQ_ORDER_NOLEFT : // enforce neighboring glyph being to the left
+ DBGTAG(33)
+ removeBox(Rect(Position(xminf, bb.yi - tbb.ya + sy),
+ Position(bb.xi - tbb.xa + sx, bb.ya - tbb.yi + sy)), tbb, tsb, org, i);
+ break;
+ case SlotCollision::SEQ_ORDER_NORIGHT : // enforce neighboring glyph being to the right
+ DBGTAG(34)
+ removeBox(Rect(Position(bb.xa - tbb.xi + sx, bb.yi - tbb.ya + sy),
+ Position(xpinf, bb.ya - tbb.yi + sy)), tbb, tsb, org, i);
+ break;
+ default :
+ break;
+ }
+ }
+
+ if (vmax < cmin - lmargin || vmin > cmax + lmargin || omax < otmin - lmargin || omin > otmax + lmargin)
+ continue;
+
+ // Process sub-boxes that are defined for this glyph.
+ // We only need to do this if there was in fact a collision with the main octabox.
+ uint8 numsub = gc.numSubBounds(gid);
+ if (numsub > 0)
+ {
+ bool anyhits = false;
+ for (int j = 0; j < numsub; ++j)
+ {
+ const BBox &sbb = gc.getSubBoundingBBox(gid, j);
+ const SlantBox &ssb = gc.getSubBoundingSlantBox(gid, j);
+ switch (i) {
+ case 0 : // x
+ vmin = max(max(sbb.xi-tbb.xa+sx, ssb.di-tsb.da+sd+ty), ssb.si-tsb.sa+ss-ty);
+ vmax = min(min(sbb.xa-tbb.xi+sx, ssb.da-tsb.di+sd+ty), ssb.sa-tsb.si+ss-ty);
+ omin = sbb.yi + sy;
+ omax = sbb.ya + sy;
+ break;
+ case 1 : // y
+ vmin = max(max(sbb.yi-tbb.ya+sy, tsb.di-ssb.da-sd+tx), ssb.si-tsb.sa+ss-tx);
+ vmax = min(min(sbb.ya-tbb.yi+sy, tsb.da-ssb.di-sd+tx), ssb.sa-tsb.si+ss-tx);
+ omin = sbb.xi + sx;
+ omax = sbb.xa + sx;
+ break;
+ case 2 : // sum
+ vmin = max(max(ssb.si-tsb.sa+ss, 2*(sbb.yi-tbb.ya+sy)+td), 2*(sbb.xi-tbb.xa+sx)-td);
+ vmax = min(min(ssb.sa-tsb.si+ss, 2*(sbb.ya-tbb.yi+sy)+td), 2*(sbb.xa-tbb.xi+sx)-td);
+ omin = ssb.di + sd;
+ omax = ssb.da + sd;
+ break;
+ case 3 : // diff
+ vmin = max(max(ssb.di-tsb.da+sd, 2*(sbb.xi-tbb.xa+sx)-ts), -2*(sbb.ya-tbb.yi+sy)+ts);
+ vmax = min(min(ssb.da-tsb.di+sd, 2*(sbb.xa-tbb.xi+sx)-ts), -2*(sbb.yi-tbb.ya+sy)+ts);
+ omin = ssb.si + ss;
+ omax = ssb.sa + ss;
+ break;
+ }
+ if (vmax < cmin - lmargin || vmin > cmax + lmargin || omax < otmin - lmargin || omin > otmax + lmargin)
+ continue;
+
+#if !defined GRAPHITE2_NTRACING
+ if (dbgout)
+ dbgout->setenv(1, reinterpret_cast<void *>(j));
+#endif
+ if (omin > otmax)
+ _ranges[i].weightedAxis(i, vmin - lmargin, vmax + lmargin, 0, 0, 0, 0, 0,
+ sqr(lmargin - omin + otmax) * _marginWt, false);
+ else if (omax < otmin)
+ _ranges[i].weightedAxis(i, vmin - lmargin, vmax + lmargin, 0, 0, 0, 0, 0,
+ sqr(lmargin - otmin + omax) * _marginWt, false);
+ else
+ _ranges[i].exclude_with_margins(vmin, vmax, i);
+ anyhits = true;
+ }
+ if (anyhits)
+ isCol = true;
+ }
+ else // no sub-boxes
+ {
+#if !defined GRAPHITE2_NTRACING
+ if (dbgout)
+ dbgout->setenv(1, reinterpret_cast<void *>(-1));
+#endif
+ isCol = true;
+ if (omin > otmax)
+ _ranges[i].weightedAxis(i, vmin - lmargin, vmax + lmargin, 0, 0, 0, 0, 0,
+ sqr(lmargin - omin + otmax) * _marginWt, false);
+ else if (omax < otmin)
+ _ranges[i].weightedAxis(i, vmin - lmargin, vmax + lmargin, 0, 0, 0, 0, 0,
+ sqr(lmargin - otmin + omax) * _marginWt, false);
+ else
+ _ranges[i].exclude_with_margins(vmin, vmax, i);
+
+ }
+ }
+ }
+ bool res = true;
+ if (cslot->exclGlyph() > 0 && gc.check(cslot->exclGlyph()) && !isExclusion)
+ {
+ // Set up the bogus slot representing the exclusion glyph.
+ Slot *exclSlot = seg->newSlot();
+ if (!exclSlot)
+ return res;
+ exclSlot->setGlyph(seg, cslot->exclGlyph());
+ Position exclOrigin(slot->origin() + cslot->exclOffset());
+ exclSlot->origin(exclOrigin);
+ SlotCollision exclInfo(seg, exclSlot);
+ res &= mergeSlot(seg, exclSlot, &exclInfo, currShift, isAfter, sameCluster, isCol, true, dbgout );
+ seg->freeSlot(exclSlot);
+ }
+ hasCol |= isCol;
+ return res;
+
+} // end of ShiftCollider::mergeSlot
+
+
+// Figure out where to move the target glyph to, and return the amount to shift by.
+Position ShiftCollider::resolve(GR_MAYBE_UNUSED Segment *seg, bool &isCol, GR_MAYBE_UNUSED json * const dbgout)
+{
+ float tbase;
+ float totalCost = (float)(std::numeric_limits<float>::max() / 2);
+ Position resultPos = Position(0, 0);
+#if !defined GRAPHITE2_NTRACING
+ int bestAxis = -1;
+ if (dbgout)
+ {
+ outputJsonDbgStartSlot(dbgout, seg);
+ *dbgout << "vectors" << json::array;
+ }
+#endif
+ isCol = true;
+ for (int i = 0; i < 4; ++i)
+ {
+ float bestCost = -1;
+ float bestPos;
+ // Calculate the margin depending on whether we are moving diagonally or not:
+ switch (i) {
+ case 0 : // x direction
+ tbase = _currOffset.x;
+ break;
+ case 1 : // y direction
+ tbase = _currOffset.y;
+ break;
+ case 2 : // sum (negatively-sloped diagonals)
+ tbase = _currOffset.x + _currOffset.y;
+ break;
+ case 3 : // diff (positively-sloped diagonals)
+ tbase = _currOffset.x - _currOffset.y;
+ break;
+ }
+ Position testp;
+ bestPos = _ranges[i].closest(0, bestCost) - tbase; // Get the best relative position
+#if !defined GRAPHITE2_NTRACING
+ if (dbgout)
+ outputJsonDbgOneVector(dbgout, seg, i, tbase, bestCost, bestPos) ;
+#endif
+ if (bestCost >= 0.0f)
+ {
+ isCol = false;
+ switch (i) {
+ case 0 : testp = Position(bestPos, _currShift.y); break;
+ case 1 : testp = Position(_currShift.x, bestPos); break;
+ case 2 : testp = Position(0.5f * (_currShift.x - _currShift.y + bestPos), 0.5f * (_currShift.y - _currShift.x + bestPos)); break;
+ case 3 : testp = Position(0.5f * (_currShift.x + _currShift.y + bestPos), 0.5f * (_currShift.x + _currShift.y - bestPos)); break;
+ }
+ if (bestCost < totalCost - 0.01f)
+ {
+ totalCost = bestCost;
+ resultPos = testp;
+#if !defined GRAPHITE2_NTRACING
+ bestAxis = i;
+#endif
+ }
+ }
+ } // end of loop over 4 directions
+
+#if !defined GRAPHITE2_NTRACING
+ if (dbgout)
+ outputJsonDbgEndSlot(dbgout, resultPos, bestAxis, isCol);
+#endif
+
+ return resultPos;
+
+} // end of ShiftCollider::resolve
+
+
+#if !defined GRAPHITE2_NTRACING
+
+void ShiftCollider::outputJsonDbg(json * const dbgout, Segment *seg, int axis)
+{
+ int axisMax = axis;
+ if (axis < 0) // output all axes
+ {
+ *dbgout << "gid" << _target->gid()
+ << "limit" << _limit
+ << "target" << json::object
+ << "origin" << _target->origin()
+ << "margin" << _margin
+ << "bbox" << seg->theGlyphBBoxTemporary(_target->gid())
+ << "slantbox" << seg->getFace()->glyphs().slant(_target->gid())
+ << json::close; // target object
+ *dbgout << "ranges" << json::array;
+ axis = 0;
+ axisMax = 3;
+ }
+ for (int iAxis = axis; iAxis <= axisMax; ++iAxis)
+ {
+ *dbgout << json::flat << json::array << _ranges[iAxis].position();
+ for (Zones::const_iterator s = _ranges[iAxis].begin(), e = _ranges[iAxis].end(); s != e; ++s)
+ *dbgout << json::flat << json::array
+ << Position(s->x, s->xm) << s->sm << s->smx << s->c
+ << json::close;
+ *dbgout << json::close;
+ }
+ if (axis < axisMax) // looped through the _ranges array for all axes
+ *dbgout << json::close; // ranges array
+}
+
+void ShiftCollider::outputJsonDbgStartSlot(json * const dbgout, Segment *seg)
+{
+ *dbgout << json::object // slot - not closed till the end of the caller method
+ << "slot" << objectid(dslot(seg, _target))
+ << "gid" << _target->gid()
+ << "limit" << _limit
+ << "target" << json::object
+ << "origin" << _origin
+ << "currShift" << _currShift
+ << "currOffset" << seg->collisionInfo(_target)->offset()
+ << "bbox" << seg->theGlyphBBoxTemporary(_target->gid())
+ << "slantBox" << seg->getFace()->glyphs().slant(_target->gid())
+ << "fix" << "shift";
+ *dbgout << json::close; // target object
+}
+
+void ShiftCollider::outputJsonDbgEndSlot(GR_MAYBE_UNUSED json * const dbgout,
+ Position resultPos, int bestAxis, bool isCol)
+{
+ *dbgout << json::close // vectors array
+ << "result" << resultPos
+ //<< "scraping" << _scraping[bestAxis]
+ << "bestAxis" << bestAxis
+ << "stillBad" << isCol
+ << json::close; // slot object
+}
+
+void ShiftCollider::outputJsonDbgOneVector(json * const dbgout, Segment *seg, int axis,
+ float tleft, float bestCost, float bestVal)
+{
+ const char * label;
+ switch (axis)
+ {
+ case 0: label = "x"; break;
+ case 1: label = "y"; break;
+ case 2: label = "sum (NE-SW)"; break;
+ case 3: label = "diff (NW-SE)"; break;
+ default: label = "???"; break;
+ }
+
+ *dbgout << json::object // vector
+ << "direction" << label
+ << "targetMin" << tleft;
+
+ outputJsonDbgRemovals(dbgout, axis, seg);
+
+ *dbgout << "ranges";
+ outputJsonDbg(dbgout, seg, axis);
+
+ *dbgout << "bestCost" << bestCost
+ << "bestVal" << bestVal + tleft
+ << json::close; // vectors object
+}
+
+void ShiftCollider::outputJsonDbgRemovals(json * const dbgout, int axis, Segment *seg)
+{
+ *dbgout << "removals" << json::array;
+ _ranges[axis].jsonDbgOut(seg);
+ *dbgout << json::close; // removals array
+}
+
+#endif // !defined GRAPHITE2_NTRACING
+
+
+//// KERN-COLLIDER ////
+
+inline
+static float localmax (float al, float au, float bl, float bu, float x)
+{
+ if (al < bl)
+ { if (au < bu) return au < x ? au : x; }
+ else if (au > bu) return bl < x ? bl : x;
+ return x;
+}
+
+inline
+static float localmin(float al, float au, float bl, float bu, float x)
+{
+ if (bl > al)
+ { if (bu > au) return bl > x ? bl : x; }
+ else if (au > bu) return al > x ? al : x;
+ return x;
+}
+
+// Return the given edge of the glyph at height y, taking any slant box into account.
+static float get_edge(Segment *seg, const Slot *s, const Position &shift, float y, float width, float margin, bool isRight)
+{
+ const GlyphCache &gc = seg->getFace()->glyphs();
+ unsigned short gid = s->gid();
+ float sx = s->origin().x + shift.x;
+ float sy = s->origin().y + shift.y;
+ uint8 numsub = gc.numSubBounds(gid);
+ float res = isRight ? (float)-1e38 : (float)1e38;
+
+ if (numsub > 0)
+ {
+ for (int i = 0; i < numsub; ++i)
+ {
+ const BBox &sbb = gc.getSubBoundingBBox(gid, i);
+ const SlantBox &ssb = gc.getSubBoundingSlantBox(gid, i);
+ if (sy + sbb.yi - margin > y + width / 2 || sy + sbb.ya + margin < y - width / 2)
+ continue;
+ if (isRight)
+ {
+ float x = sx + sbb.xa + margin;
+ if (x > res)
+ {
+ float td = sx - sy + ssb.da + margin + y;
+ float ts = sx + sy + ssb.sa + margin - y;
+ x = localmax(td - width / 2, td + width / 2, ts - width / 2, ts + width / 2, x);
+ if (x > res)
+ res = x;
+ }
+ }
+ else
+ {
+ float x = sx + sbb.xi - margin;
+ if (x < res)
+ {
+ float td = sx - sy + ssb.di - margin + y;
+ float ts = sx + sy + ssb.si - margin - y;
+ x = localmin(td - width / 2, td + width / 2, ts - width / 2, ts + width / 2, x);
+ if (x < res)
+ res = x;
+ }
+ }
+ }
+ }
+ else
+ {
+ const BBox &bb = gc.getBoundingBBox(gid);
+ const SlantBox &sb = gc.getBoundingSlantBox(gid);
+ if (sy + bb.yi - margin > y + width / 2 || sy + bb.ya + margin < y - width / 2)
+ return res;
+ float td = sx - sy + y;
+ float ts = sx + sy - y;
+ if (isRight)
+ res = localmax(td + sb.da - width / 2, td + sb.da + width / 2, ts + sb.sa - width / 2, ts + sb.sa + width / 2, sx + bb.xa) + margin;
+ else
+ res = localmin(td + sb.di - width / 2, td + sb.di + width / 2, ts + sb.si - width / 2, ts + sb.si + width / 2, sx + bb.xi) - margin;
+ }
+ return res;
+}
+
+
+bool KernCollider::initSlot(Segment *seg, Slot *aSlot, const Rect &limit, float margin,
+ const Position &currShift, const Position &offsetPrev, int dir,
+ float ymin, float ymax, GR_MAYBE_UNUSED json * const dbgout)
+{
+ const GlyphCache &gc = seg->getFace()->glyphs();
+ const Slot *base = aSlot;
+ // const Slot *last = aSlot;
+ const Slot *s;
+ int numSlices;
+ while (base->attachedTo())
+ base = base->attachedTo();
+ if (margin < 10) margin = 10;
+
+ _limit = limit;
+ _offsetPrev = offsetPrev; // kern from a previous pass
+
+ // Calculate the height of the glyph and how many horizontal slices to use.
+ if (_maxy >= 1e37f)
+ {
+ _sliceWidth = margin / 1.5f;
+ _maxy = ymax + margin;
+ _miny = ymin - margin;
+ numSlices = int((_maxy - _miny + 2) / (_sliceWidth / 1.5f) + 1.f); // +2 helps with rounding errors
+ _edges.clear();
+ _edges.insert(_edges.begin(), numSlices, (dir & 1) ? 1e38f : -1e38f);
+ _xbound = (dir & 1) ? (float)1e38f : (float)-1e38f;
+ }
+ else if (_maxy != ymax || _miny != ymin)
+ {
+ if (_miny != ymin)
+ {
+ numSlices = int((ymin - margin - _miny) / _sliceWidth - 1);
+ _miny += numSlices * _sliceWidth;
+ if (numSlices < 0)
+ _edges.insert(_edges.begin(), -numSlices, (dir & 1) ? 1e38f : -1e38f);
+ else if ((unsigned)numSlices < _edges.size()) // this shouldn't fire since we always grow the range
+ {
+ Vector<float>::iterator e = _edges.begin();
+ while (numSlices--)
+ ++e;
+ _edges.erase(_edges.begin(), e);
+ }
+ }
+ if (_maxy != ymax)
+ {
+ numSlices = int((ymax + margin - _miny) / _sliceWidth + 1);
+ _maxy = numSlices * _sliceWidth + _miny;
+ if (numSlices > (int)_edges.size())
+ _edges.insert(_edges.end(), numSlices - _edges.size(), (dir & 1) ? 1e38f : -1e38f);
+ else if (numSlices < (int)_edges.size()) // this shouldn't fire since we always grow the range
+ {
+ while ((int)_edges.size() > numSlices)
+ _edges.pop_back();
+ }
+ }
+ goto done;
+ }
+ numSlices = int(_edges.size());
+
+#if !defined GRAPHITE2_NTRACING
+ // Debugging
+ _seg = seg;
+ _slotNear.clear();
+ _slotNear.insert(_slotNear.begin(), numSlices, NULL);
+ _nearEdges.clear();
+ _nearEdges.insert(_nearEdges.begin(), numSlices, (dir & 1) ? -1e38f : +1e38f);
+#endif
+
+ // Determine the trailing edge of each slice (ie, left edge for a RTL glyph).
+ for (s = base; s; s = s->nextInCluster(s))
+ {
+ SlotCollision *c = seg->collisionInfo(s);
+ if (!gc.check(s->gid()))
+ return false;
+ const BBox &bs = gc.getBoundingBBox(s->gid());
+ float x = s->origin().x + c->shift().x + ((dir & 1) ? bs.xi : bs.xa);
+ // Loop over slices.
+ // Note smin might not be zero if glyph s is not at the bottom of the cluster; similarly for smax.
+ float toffset = c->shift().y - _miny + 1 + s->origin().y;
+ int smin = max(0, int((bs.yi + toffset) / _sliceWidth));
+ int smax = min(numSlices - 1, int((bs.ya + toffset) / _sliceWidth + 1));
+ for (int i = smin; i <= smax; ++i)
+ {
+ float t;
+ float y = _miny - 1 + (i + .5f) * _sliceWidth; // vertical center of slice
+ if ((dir & 1) && x < _edges[i])
+ {
+ t = get_edge(seg, s, c->shift(), y, _sliceWidth, margin, false);
+ if (t < _edges[i])
+ {
+ _edges[i] = t;
+ if (t < _xbound)
+ _xbound = t;
+ }
+ }
+ else if (!(dir & 1) && x > _edges[i])
+ {
+ t = get_edge(seg, s, c->shift(), y, _sliceWidth, margin, true);
+ if (t > _edges[i])
+ {
+ _edges[i] = t;
+ if (t > _xbound)
+ _xbound = t;
+ }
+ }
+ }
+ }
+ done:
+ _mingap = (float)1e37; // less than 1e38 s.t. 1e38-_mingap is really big
+ _target = aSlot;
+ _margin = margin;
+ _currShift = currShift;
+ return true;
+} // end of KernCollider::initSlot
+
+
+// Determine how much the target slot needs to kern away from the given slot.
+// In other words, merge information from given slot's position with what the target slot knows
+// about how it can kern.
+// Return false if we know there is no collision, true if we think there might be one.
+bool KernCollider::mergeSlot(Segment *seg, Slot *slot, const Position &currShift, float currSpace, int dir, GR_MAYBE_UNUSED json * const dbgout)
+{
+ int rtl = (dir & 1) * 2 - 1;
+ if (!seg->getFace()->glyphs().check(slot->gid()))
+ return false;
+ const Rect &bb = seg->theGlyphBBoxTemporary(slot->gid());
+ const float sx = slot->origin().x + currShift.x;
+ float x = (sx + (rtl > 0 ? bb.tr.x : bb.bl.x)) * rtl;
+ // this isn't going to reduce _mingap so skip
+ if (_hit && x < rtl * (_xbound - _mingap - currSpace))
+ return false;
+
+ const float sy = slot->origin().y + currShift.y;
+ int smin = max(1, int((bb.bl.y + (1 - _miny + sy)) / _sliceWidth + 1)) - 1;
+ int smax = min((int)_edges.size() - 2, int((bb.tr.y + (1 - _miny + sy)) / _sliceWidth + 1)) + 1;
+ if (smin > smax)
+ return false;
+ bool collides = false;
+ bool nooverlap = true;
+
+ for (int i = smin; i <= smax; ++i)
+ {
+ float here = _edges[i] * rtl;
+ if (here > (float)9e37)
+ continue;
+ if (!_hit || x > here - _mingap - currSpace)
+ {
+ float y = (float)(_miny - 1 + (i + .5f) * _sliceWidth); // vertical center of slice
+ // 2 * currSpace to account for the space that is already separating them and the space we want to add
+ float m = get_edge(seg, slot, currShift, y, _sliceWidth, 0., rtl > 0) * rtl + 2 * currSpace;
+ if (m < (float)-8e37) // only true if the glyph has a gap in it
+ continue;
+ nooverlap = false;
+ float t = here - m;
+ // _mingap is positive to shrink
+ if (t < _mingap || (!_hit && !collides))
+ {
+ _mingap = t;
+ collides = true;
+ }
+#if !defined GRAPHITE2_NTRACING
+ // Debugging - remember the closest neighboring edge for this slice.
+ if (m > rtl * _nearEdges[i])
+ {
+ _slotNear[i] = slot;
+ _nearEdges[i] = m * rtl;
+ }
+#endif
+ }
+ else
+ nooverlap = false;
+ }
+ if (nooverlap)
+ _mingap = max(_mingap, _xbound - rtl * (currSpace + _margin + x));
+ if (collides && !nooverlap)
+ _hit = true;
+ return collides | nooverlap; // note that true is not a necessarily reliable value
+
+} // end of KernCollider::mergeSlot
+
+
+// Return the amount to kern by.
+Position KernCollider::resolve(GR_MAYBE_UNUSED Segment *seg, GR_MAYBE_UNUSED Slot *slot,
+ int dir, GR_MAYBE_UNUSED json * const dbgout)
+{
+ float resultNeeded = (1 - 2 * (dir & 1)) * _mingap;
+ // float resultNeeded = (1 - 2 * (dir & 1)) * (_mingap - margin);
+ float result = min(_limit.tr.x - _offsetPrev.x, max(resultNeeded, _limit.bl.x - _offsetPrev.x));
+
+#if !defined GRAPHITE2_NTRACING
+ if (dbgout)
+ {
+ *dbgout << json::object // slot
+ << "slot" << objectid(dslot(seg, _target))
+ << "gid" << _target->gid()
+ << "limit" << _limit
+ << "miny" << _miny
+ << "maxy" << _maxy
+ << "slicewidth" << _sliceWidth
+ << "target" << json::object
+ << "origin" << _target->origin()
+ //<< "currShift" << _currShift
+ << "offsetPrev" << _offsetPrev
+ << "bbox" << seg->theGlyphBBoxTemporary(_target->gid())
+ << "slantBox" << seg->getFace()->glyphs().slant(_target->gid())
+ << "fix" << "kern"
+ << json::close; // target object
+
+ *dbgout << "slices" << json::array;
+ for (int is = 0; is < (int)_edges.size(); is++)
+ {
+ *dbgout << json::flat << json::object
+ << "i" << is
+ << "targetEdge" << _edges[is]
+ << "neighbor" << objectid(dslot(seg, _slotNear[is]))
+ << "nearEdge" << _nearEdges[is]
+ << json::close;
+ }
+ *dbgout << json::close; // slices array
+
+ *dbgout
+ << "xbound" << _xbound
+ << "minGap" << _mingap
+ << "needed" << resultNeeded
+ << "result" << result
+ << "stillBad" << (result != resultNeeded)
+ << json::close; // slot object
+ }
+#endif
+
+ return Position(result, 0.);
+
+} // end of KernCollider::resolve
+
+void KernCollider::shift(const Position &mv, int dir)
+{
+ for (Vector<float>::iterator e = _edges.begin(); e != _edges.end(); ++e)
+ *e += mv.x;
+ _xbound += (1 - 2 * (dir & 1)) * mv.x;
+}
+
+//// SLOT-COLLISION ////
+
+// Initialize the collision attributes for the given slot.
+SlotCollision::SlotCollision(Segment *seg, Slot *slot)
+{
+ initFromSlot(seg, slot);
+}
+
+void SlotCollision::initFromSlot(Segment *seg, Slot *slot)
+{
+ // Initialize slot attributes from glyph attributes.
+ // The order here must match the order in the grcompiler code,
+ // GrcSymbolTable::AssignInternalGlyphAttrIDs.
+ uint16 gid = slot->gid();
+ uint16 aCol = seg->silf()->aCollision(); // flags attr ID
+ const GlyphFace * glyphFace = seg->getFace()->glyphs().glyphSafe(gid);
+ if (!glyphFace)
+ return;
+ const sparse &p = glyphFace->attrs();
+ _flags = p[aCol];
+ _limit = Rect(Position(int16(p[aCol+1]), int16(p[aCol+2])),
+ Position(int16(p[aCol+3]), int16(p[aCol+4])));
+ _margin = p[aCol+5];
+ _marginWt = p[aCol+6];
+
+ _seqClass = p[aCol+7];
+ _seqProxClass = p[aCol+8];
+ _seqOrder = p[aCol+9];
+ _seqAboveXoff = p[aCol+10];
+ _seqAboveWt = p[aCol+11];
+ _seqBelowXlim = p[aCol+12];
+ _seqBelowWt = p[aCol+13];
+ _seqValignHt = p[aCol+14];
+ _seqValignWt = p[aCol+15];
+
+ // These attributes do not have corresponding glyph attribute:
+ _exclGlyph = 0;
+ _exclOffset = Position(0, 0);
+}
+
+float SlotCollision::getKern(int dir) const
+{
+ if ((_flags & SlotCollision::COLL_KERN) != 0)
+ return float(_shift.x * ((dir & 1) ? -1 : 1));
+ else
+ return 0;
+}
+
+bool SlotCollision::ignore() const
+{
+ return ((flags() & SlotCollision::COLL_IGNORE) || (flags() & SlotCollision::COLL_ISSPACE));
+}
diff --git a/thirdparty/graphite/src/Decompressor.cpp b/thirdparty/graphite/src/Decompressor.cpp
new file mode 100644
index 0000000000..42dc9113e5
--- /dev/null
+++ b/thirdparty/graphite/src/Decompressor.cpp
@@ -0,0 +1,125 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2015, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#include <cassert>
+
+#include "inc/Decompressor.h"
+#include "inc/Compression.h"
+
+using namespace lz4;
+
+namespace {
+
+inline
+u32 read_literal(u8 const * &s, u8 const * const e, u32 l) {
+ if (l == 15 && s != e)
+ {
+ u8 b = 0;
+ do { l += b = *s++; } while(b==0xff && s != e);
+ }
+ return l;
+}
+
+bool read_sequence(u8 const * &src, u8 const * const end, u8 const * &literal,
+ u32 & literal_len, u32 & match_len, u32 & match_dist)
+{
+ u8 const token = *src++;
+
+ literal_len = read_literal(src, end, token >> 4);
+ literal = src;
+ src += literal_len;
+
+ // Normal exit for end of stream, wrap arround check and parital match check.
+ if (src > end - sizeof(u16) || src < literal)
+ return false;
+
+ match_dist = *src++;
+ match_dist |= *src++ << 8;
+ match_len = read_literal(src, end, token & 0xf) + MINMATCH;
+
+ // Malformed stream check.
+ return src <= end-MINCODA;
+}
+
+}
+
+int lz4::decompress(void const *in, size_t in_size, void *out, size_t out_size)
+{
+ if (out_size <= in_size || in_size < MINSRCSIZE)
+ return -1;
+
+ u8 const * src = static_cast<u8 const *>(in),
+ * literal = 0,
+ * const src_end = src + in_size;
+
+ u8 * dst = static_cast<u8*>(out),
+ * const dst_end = dst + out_size;
+
+ // Check the in and out size hasn't wrapped around.
+ if (src >= src_end || dst >= dst_end)
+ return -1;
+
+ u32 literal_len = 0,
+ match_len = 0,
+ match_dist = 0;
+
+ while (read_sequence(src, src_end, literal, literal_len, match_len,
+ match_dist))
+ {
+ if (literal_len != 0)
+ {
+ // Copy in literal. At this point the a minimal literal + minminal
+ // match plus the coda (1 + 2 + 5) must be 8 bytes or more allowing
+ // us to remain within the src buffer for an overrun_copy on
+ // machines upto 64 bits.
+ if (align(literal_len) > out_size)
+ return -1;
+ dst = overrun_copy(dst, literal, literal_len);
+ out_size -= literal_len;
+ }
+
+ // Copy, possibly repeating, match from earlier in the
+ // decoded output.
+ u8 const * const pcpy = dst - match_dist;
+ if (pcpy < static_cast<u8*>(out)
+ || match_len > unsigned(out_size - LASTLITERALS)
+ // Wrap around checks:
+ || out_size < LASTLITERALS || pcpy >= dst)
+ return -1;
+ if (dst > pcpy+sizeof(unsigned long)
+ && align(match_len) <= out_size)
+ dst = overrun_copy(dst, pcpy, match_len);
+ else
+ dst = safe_copy(dst, pcpy, match_len);
+ out_size -= match_len;
+ }
+
+ if (literal > src_end - literal_len || literal_len > out_size)
+ return -1;
+ dst = fast_copy(dst, literal, literal_len);
+
+ return int(dst - (u8*)out);
+}
diff --git a/thirdparty/graphite/src/Face.cpp b/thirdparty/graphite/src/Face.cpp
new file mode 100644
index 0000000000..3e106050d7
--- /dev/null
+++ b/thirdparty/graphite/src/Face.cpp
@@ -0,0 +1,366 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#include <cstring>
+#include "graphite2/Segment.h"
+#include "inc/CmapCache.h"
+#include "inc/debug.h"
+#include "inc/Decompressor.h"
+#include "inc/Endian.h"
+#include "inc/Face.h"
+#include "inc/FileFace.h"
+#include "inc/GlyphFace.h"
+#include "inc/json.h"
+#include "inc/Segment.h"
+#include "inc/NameTable.h"
+#include "inc/Error.h"
+
+using namespace graphite2;
+
+namespace
+{
+enum compression
+{
+ NONE,
+ LZ4
+};
+
+}
+
+Face::Face(const void* appFaceHandle/*non-NULL*/, const gr_face_ops & ops)
+: m_appFaceHandle(appFaceHandle),
+ m_pFileFace(NULL),
+ m_pGlyphFaceCache(NULL),
+ m_cmap(NULL),
+ m_pNames(NULL),
+ m_logger(NULL),
+ m_error(0), m_errcntxt(0),
+ m_silfs(NULL),
+ m_numSilf(0),
+ m_ascent(0),
+ m_descent(0)
+{
+ memset(&m_ops, 0, sizeof m_ops);
+ memcpy(&m_ops, &ops, min(sizeof m_ops, ops.size));
+}
+
+
+Face::~Face()
+{
+ setLogger(0);
+ delete m_pGlyphFaceCache;
+ delete m_cmap;
+ delete[] m_silfs;
+#ifndef GRAPHITE2_NFILEFACE
+ delete m_pFileFace;
+#endif
+ delete m_pNames;
+}
+
+float Face::default_glyph_advance(const void* font_ptr, gr_uint16 glyphid)
+{
+ const Font & font = *reinterpret_cast<const Font *>(font_ptr);
+
+ return font.face().glyphs().glyph(glyphid)->theAdvance().x * font.scale();
+}
+
+bool Face::readGlyphs(uint32 faceOptions)
+{
+ Error e;
+#ifdef GRAPHITE2_TELEMETRY
+ telemetry::category _glyph_cat(tele.glyph);
+#endif
+ error_context(EC_READGLYPHS);
+ m_pGlyphFaceCache = new GlyphCache(*this, faceOptions);
+
+ if (e.test(!m_pGlyphFaceCache, E_OUTOFMEM)
+ || e.test(m_pGlyphFaceCache->numGlyphs() == 0, E_NOGLYPHS)
+ || e.test(m_pGlyphFaceCache->unitsPerEm() == 0, E_BADUPEM))
+ {
+ return error(e);
+ }
+
+ if (faceOptions & gr_face_cacheCmap)
+ m_cmap = new CachedCmap(*this);
+ else
+ m_cmap = new DirectCmap(*this);
+ if (e.test(!m_cmap, E_OUTOFMEM) || e.test(!*m_cmap, E_BADCMAP))
+ return error(e);
+
+ if (faceOptions & gr_face_preloadGlyphs)
+ nameTable(); // preload the name table along with the glyphs.
+
+ return true;
+}
+
+bool Face::readGraphite(const Table & silf)
+{
+#ifdef GRAPHITE2_TELEMETRY
+ telemetry::category _silf_cat(tele.silf);
+#endif
+ Error e;
+ error_context(EC_READSILF);
+ const byte * p = silf;
+ if (e.test(!p, E_NOSILF) || e.test(silf.size() < 20, E_BADSIZE)) return error(e);
+
+ const uint32 version = be::read<uint32>(p);
+ if (e.test(version < 0x00020000, E_TOOOLD)) return error(e);
+ if (version >= 0x00030000)
+ be::skip<uint32>(p); // compilerVersion
+ m_numSilf = be::read<uint16>(p);
+
+ be::skip<uint16>(p); // reserved
+
+ bool havePasses = false;
+ m_silfs = new Silf[m_numSilf];
+ if (e.test(!m_silfs, E_OUTOFMEM)) return error(e);
+ for (int i = 0; i < m_numSilf; i++)
+ {
+ error_context(EC_ASILF + (i << 8));
+ const uint32 offset = be::read<uint32>(p),
+ next = i == m_numSilf - 1 ? uint32(silf.size()) : be::peek<uint32>(p);
+ if (e.test(next > silf.size() || offset >= next, E_BADSIZE))
+ return error(e);
+
+ if (!m_silfs[i].readGraphite(silf + offset, next - offset, *this, version))
+ return false;
+
+ if (m_silfs[i].numPasses())
+ havePasses = true;
+ }
+
+ return havePasses;
+}
+
+bool Face::readFeatures()
+{
+ return m_Sill.readFace(*this);
+}
+
+bool Face::runGraphite(Segment *seg, const Silf *aSilf) const
+{
+#if !defined GRAPHITE2_NTRACING
+ json * dbgout = logger();
+ if (dbgout)
+ {
+ *dbgout << json::object
+ << "id" << objectid(seg)
+ << "passes" << json::array;
+ }
+#endif
+
+// if ((seg->dir() & 1) != aSilf->dir())
+// seg->reverseSlots();
+ if ((seg->dir() & 3) == 3 && aSilf->bidiPass() == 0xFF)
+ seg->doMirror(aSilf->aMirror());
+ bool res = aSilf->runGraphite(seg, 0, aSilf->positionPass(), true);
+ if (res)
+ {
+ seg->associateChars(0, seg->charInfoCount());
+ if (aSilf->flags() & 0x20)
+ res &= seg->initCollisions();
+ if (res)
+ res &= aSilf->runGraphite(seg, aSilf->positionPass(), aSilf->numPasses(), false);
+ }
+
+#if !defined GRAPHITE2_NTRACING
+ if (dbgout)
+{
+ seg->positionSlots(0, 0, 0, seg->currdir());
+ *dbgout << json::item
+ << json::close // Close up the passes array
+ << "outputdir" << (seg->currdir() ? "rtl" : "ltr")
+ << "output" << json::array;
+ for(Slot * s = seg->first(); s; s = s->next())
+ *dbgout << dslot(seg, s);
+ *dbgout << json::close
+ << "advance" << seg->advance()
+ << "chars" << json::array;
+ for(size_t i = 0, n = seg->charInfoCount(); i != n; ++i)
+ *dbgout << json::flat << *seg->charinfo(int(i));
+ *dbgout << json::close // Close up the chars array
+ << json::close; // Close up the segment object
+ }
+#endif
+
+ return res;
+}
+
+void Face::setLogger(FILE * log_file GR_MAYBE_UNUSED)
+{
+#if !defined GRAPHITE2_NTRACING
+ delete m_logger;
+ m_logger = log_file ? new json(log_file) : 0;
+#endif
+}
+
+const Silf *Face::chooseSilf(uint32 script) const
+{
+ if (m_numSilf == 0)
+ return NULL;
+ else if (m_numSilf == 1 || script == 0)
+ return m_silfs;
+ else // do more work here
+ return m_silfs;
+}
+
+uint16 Face::findPseudo(uint32 uid) const
+{
+ return (m_numSilf) ? m_silfs[0].findPseudo(uid) : 0;
+}
+
+int32 Face::getGlyphMetric(uint16 gid, uint8 metric) const
+{
+ switch (metrics(metric))
+ {
+ case kgmetAscent : return m_ascent;
+ case kgmetDescent : return m_descent;
+ default:
+ if (gid >= glyphs().numGlyphs()) return 0;
+ return glyphs().glyph(gid)->getMetric(metric);
+ }
+}
+
+void Face::takeFileFace(FileFace* pFileFace GR_MAYBE_UNUSED/*takes ownership*/)
+{
+#ifndef GRAPHITE2_NFILEFACE
+ if (m_pFileFace==pFileFace)
+ return;
+
+ delete m_pFileFace;
+ m_pFileFace = pFileFace;
+#endif
+}
+
+NameTable * Face::nameTable() const
+{
+ if (m_pNames) return m_pNames;
+ const Table name(*this, Tag::name);
+ if (name)
+ m_pNames = new NameTable(name, name.size());
+ return m_pNames;
+}
+
+uint16 Face::languageForLocale(const char * locale) const
+{
+ nameTable();
+ if (m_pNames)
+ return m_pNames->getLanguageId(locale);
+ return 0;
+}
+
+
+
+Face::Table::Table(const Face & face, const Tag n, uint32 version) throw()
+: _f(&face), _sz(0), _compressed(false)
+{
+ _p = static_cast<const byte *>((*_f->m_ops.get_table)(_f->m_appFaceHandle, n, &_sz));
+
+ if (!TtfUtil::CheckTable(n, _p, _sz))
+ {
+ release(); // Make sure we release the table buffer even if the table failed its checks
+ return;
+ }
+
+ if (be::peek<uint32>(_p) >= version)
+ decompress();
+}
+
+void Face::Table::release()
+{
+ if (_compressed)
+ free(const_cast<byte *>(_p));
+ else if (_p && _f->m_ops.release_table)
+ (*_f->m_ops.release_table)(_f->m_appFaceHandle, _p);
+ _p = 0; _sz = 0;
+}
+
+Face::Table & Face::Table::operator = (const Table && rhs) throw()
+{
+ if (this == &rhs) return *this;
+ release();
+ new (this) Table(std::move(rhs));
+ return *this;
+}
+
+Error Face::Table::decompress()
+{
+ Error e;
+ if (e.test(_sz < 5 * sizeof(uint32), E_BADSIZE))
+ return e;
+ byte * uncompressed_table = 0;
+ size_t uncompressed_size = 0;
+
+ const byte * p = _p;
+ const uint32 version = be::read<uint32>(p); // Table version number.
+
+ // The scheme is in the top 5 bits of the 1st uint32.
+ const uint32 hdr = be::read<uint32>(p);
+ switch(compression(hdr >> 27))
+ {
+ case NONE: return e;
+
+ case LZ4:
+ {
+ uncompressed_size = hdr & 0x07ffffff;
+ uncompressed_table = gralloc<byte>(uncompressed_size);
+ if (!e.test(!uncompressed_table || uncompressed_size < 4, E_OUTOFMEM))
+ {
+ memset(uncompressed_table, 0, 4); // make sure version number is initialised
+ // coverity[forward_null : FALSE] - uncompressed_table has been checked so can't be null
+ // coverity[checked_return : FALSE] - we test e later
+ e.test(lz4::decompress(p, _sz - 2*sizeof(uint32), uncompressed_table, uncompressed_size) != signed(uncompressed_size), E_SHRINKERFAILED);
+ }
+ break;
+ }
+
+ default:
+ e.error(E_BADSCHEME);
+ };
+
+ // Check the uncompressed version number against the original.
+ if (!e)
+ // coverity[forward_null : FALSE] - uncompressed_table has already been tested so can't be null
+ // coverity[checked_return : FALSE] - we test e later
+ e.test(be::peek<uint32>(uncompressed_table) != version, E_SHRINKERFAILED);
+
+ // Tell the provider to release the compressed form since were replacing
+ // it anyway.
+ release();
+
+ if (e)
+ {
+ free(uncompressed_table);
+ uncompressed_table = 0;
+ uncompressed_size = 0;
+ }
+
+ _p = uncompressed_table;
+ _sz = uncompressed_size;
+ _compressed = true;
+
+ return e;
+}
diff --git a/thirdparty/graphite/src/FeatureMap.cpp b/thirdparty/graphite/src/FeatureMap.cpp
new file mode 100644
index 0000000000..014a88fd08
--- /dev/null
+++ b/thirdparty/graphite/src/FeatureMap.cpp
@@ -0,0 +1,293 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#include <cstring>
+
+#include "inc/Main.h"
+#include "inc/bits.h"
+#include "inc/Endian.h"
+#include "inc/FeatureMap.h"
+#include "inc/FeatureVal.h"
+#include "graphite2/Font.h"
+#include "inc/TtfUtil.h"
+#include <cstdlib>
+#include "inc/Face.h"
+
+
+using namespace graphite2;
+
+namespace
+{
+ static int cmpNameAndFeatures(const void *ap, const void *bp)
+ {
+ const NameAndFeatureRef & a = *static_cast<const NameAndFeatureRef *>(ap),
+ & b = *static_cast<const NameAndFeatureRef *>(bp);
+ return (a < b ? -1 : (b < a ? 1 : 0));
+ }
+
+ const size_t FEAT_HEADER = sizeof(uint32) + 2*sizeof(uint16) + sizeof(uint32),
+ FEATURE_SIZE = sizeof(uint32)
+ + 2*sizeof(uint16)
+ + sizeof(uint32)
+ + 2*sizeof(uint16),
+ FEATURE_SETTING_SIZE = sizeof(int16) + sizeof(uint16);
+
+ uint16 readFeatureSettings(const byte * p, FeatureSetting * s, size_t num_settings)
+ {
+ uint16 max_val = 0;
+ for (FeatureSetting * const end = s + num_settings; s != end; ++s)
+ {
+ const int16 value = be::read<int16>(p);
+ ::new (s) FeatureSetting(value, be::read<uint16>(p));
+ if (uint16(value) > max_val) max_val = value;
+ }
+
+ return max_val;
+ }
+}
+
+FeatureRef::FeatureRef(const Face & face,
+ unsigned short & bits_offset, uint32 max_val,
+ uint32 name, uint16 uiName, flags_t flags,
+ FeatureSetting *settings, uint16 num_set) throw()
+: m_face(&face),
+ m_nameValues(settings),
+ m_mask(mask_over_val(max_val)),
+ m_max(max_val),
+ m_id(name),
+ m_nameid(uiName),
+ m_numSet(num_set),
+ m_flags(flags)
+{
+ const uint8 need_bits = bit_set_count(m_mask);
+ m_index = (bits_offset + need_bits) / SIZEOF_CHUNK;
+ if (m_index > bits_offset / SIZEOF_CHUNK)
+ bits_offset = m_index*SIZEOF_CHUNK;
+ m_bits = bits_offset % SIZEOF_CHUNK;
+ bits_offset += need_bits;
+ m_mask <<= m_bits;
+}
+
+FeatureRef::~FeatureRef() throw()
+{
+ free(m_nameValues);
+}
+
+bool FeatureMap::readFeats(const Face & face)
+{
+ const Face::Table feat(face, TtfUtil::Tag::Feat);
+ const byte * p = feat;
+ if (!p) return true;
+ if (feat.size() < FEAT_HEADER) return false;
+
+ const byte *const feat_start = p,
+ *const feat_end = p + feat.size();
+
+ const uint32 version = be::read<uint32>(p);
+ m_numFeats = be::read<uint16>(p);
+ be::skip<uint16>(p);
+ be::skip<uint32>(p);
+
+ // Sanity checks
+ if (m_numFeats == 0) return true;
+ if (version < 0x00010000 ||
+ p + m_numFeats*FEATURE_SIZE > feat_end)
+ { //defensive
+ m_numFeats = 0;
+ return false;
+ }
+
+ m_feats = new FeatureRef [m_numFeats];
+ uint16 * const defVals = gralloc<uint16>(m_numFeats);
+ if (!defVals || !m_feats) return false;
+ unsigned short bits = 0; //to cause overflow on first Feature
+
+ for (int i = 0, ie = m_numFeats; i != ie; i++)
+ {
+ const uint32 label = version < 0x00020000 ? be::read<uint16>(p) : be::read<uint32>(p);
+ const uint16 num_settings = be::read<uint16>(p);
+ if (version >= 0x00020000)
+ be::skip<uint16>(p);
+ const uint32 settings_offset = be::read<uint32>(p);
+ const uint16 flags = be::read<uint16>(p),
+ uiName = be::read<uint16>(p);
+
+ if (settings_offset > size_t(feat_end - feat_start)
+ || settings_offset + num_settings * FEATURE_SETTING_SIZE > size_t(feat_end - feat_start))
+ {
+ free(defVals);
+ return false;
+ }
+
+ FeatureSetting *uiSet;
+ uint32 maxVal;
+ if (num_settings != 0)
+ {
+ uiSet = gralloc<FeatureSetting>(num_settings);
+ if (!uiSet)
+ {
+ free(defVals);
+ return false;
+ }
+ maxVal = readFeatureSettings(feat_start + settings_offset, uiSet, num_settings);
+ defVals[i] = uiSet[0].value();
+ }
+ else
+ {
+ uiSet = 0;
+ maxVal = 0xffffffff;
+ defVals[i] = 0;
+ }
+
+ ::new (m_feats + i) FeatureRef (face, bits, maxVal,
+ label, uiName,
+ FeatureRef::flags_t(flags),
+ uiSet, num_settings);
+ }
+ new (&m_defaultFeatures) Features(bits/(sizeof(uint32)*8) + 1, *this);
+ m_pNamedFeats = new NameAndFeatureRef[m_numFeats];
+ if (!m_pNamedFeats)
+ {
+ free(defVals);
+ return false;
+ }
+ for (int i = 0; i < m_numFeats; ++i)
+ {
+ m_feats[i].applyValToFeature(defVals[i], m_defaultFeatures);
+ m_pNamedFeats[i] = m_feats[i];
+ }
+
+ free(defVals);
+
+ qsort(m_pNamedFeats, m_numFeats, sizeof(NameAndFeatureRef), &cmpNameAndFeatures);
+
+ return true;
+}
+
+bool SillMap::readFace(const Face & face)
+{
+ if (!m_FeatureMap.readFeats(face)) return false;
+ if (!readSill(face)) return false;
+ return true;
+}
+
+
+bool SillMap::readSill(const Face & face)
+{
+ const Face::Table sill(face, TtfUtil::Tag::Sill);
+ const byte *p = sill;
+
+ if (!p) return true;
+ if (sill.size() < 12) return false;
+ if (be::read<uint32>(p) != 0x00010000UL) return false;
+ m_numLanguages = be::read<uint16>(p);
+ m_langFeats = new LangFeaturePair[m_numLanguages];
+ if (!m_langFeats || !m_FeatureMap.m_numFeats) { m_numLanguages = 0; return true; } //defensive
+
+ p += 6; // skip the fast search
+ if (sill.size() < m_numLanguages * 8U + 12) return false;
+
+ for (int i = 0; i < m_numLanguages; i++)
+ {
+ uint32 langid = be::read<uint32>(p);
+ uint16 numSettings = be::read<uint16>(p);
+ uint16 offset = be::read<uint16>(p);
+ if (offset + 8U * numSettings > sill.size() && numSettings > 0) return false;
+ Features* feats = new Features(m_FeatureMap.m_defaultFeatures);
+ if (!feats) return false;
+ const byte *pLSet = sill + offset;
+
+ // Apply langauge specific settings
+ for (int j = 0; j < numSettings; j++)
+ {
+ uint32 name = be::read<uint32>(pLSet);
+ uint16 val = be::read<uint16>(pLSet);
+ pLSet += 2;
+ const FeatureRef* pRef = m_FeatureMap.findFeatureRef(name);
+ if (pRef) pRef->applyValToFeature(val, *feats);
+ }
+ // Add the language id feature which is always feature id 1
+ const FeatureRef* pRef = m_FeatureMap.findFeatureRef(1);
+ if (pRef) pRef->applyValToFeature(langid, *feats);
+
+ m_langFeats[i].m_lang = langid;
+ m_langFeats[i].m_pFeatures = feats;
+ }
+ return true;
+}
+
+
+Features* SillMap::cloneFeatures(uint32 langname/*0 means default*/) const
+{
+ if (langname)
+ {
+ // the number of languages in a font is usually small e.g. 8 in Doulos
+ // so this loop is not very expensive
+ for (uint16 i = 0; i < m_numLanguages; i++)
+ {
+ if (m_langFeats[i].m_lang == langname)
+ return new Features(*m_langFeats[i].m_pFeatures);
+ }
+ }
+ return new Features (m_FeatureMap.m_defaultFeatures);
+}
+
+
+
+const FeatureRef *FeatureMap::findFeatureRef(uint32 name) const
+{
+ NameAndFeatureRef *it;
+
+ for (it = m_pNamedFeats; it < m_pNamedFeats + m_numFeats; ++it)
+ if (it->m_name == name)
+ return it->m_pFRef;
+ return NULL;
+}
+
+bool FeatureRef::applyValToFeature(uint32 val, Features & pDest) const
+{
+ if (val>maxVal() || !m_face)
+ return false;
+ if (pDest.m_pMap==NULL)
+ pDest.m_pMap = &m_face->theSill().theFeatureMap();
+ else
+ if (pDest.m_pMap!=&m_face->theSill().theFeatureMap())
+ return false; //incompatible
+ if (m_index >= pDest.size())
+ pDest.resize(m_index+1);
+ pDest[m_index] &= ~m_mask;
+ pDest[m_index] |= (uint32(val) << m_bits);
+ return true;
+}
+
+uint32 FeatureRef::getFeatureVal(const Features& feats) const
+{
+ if (m_index < feats.size() && m_face
+ && &m_face->theSill().theFeatureMap()==feats.m_pMap)
+ return (feats[m_index] & m_mask) >> m_bits;
+ else
+ return 0;
+}
diff --git a/thirdparty/graphite/src/FileFace.cpp b/thirdparty/graphite/src/FileFace.cpp
new file mode 100644
index 0000000000..7e663876a7
--- /dev/null
+++ b/thirdparty/graphite/src/FileFace.cpp
@@ -0,0 +1,115 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2012, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#include <cstring>
+#include "inc/FileFace.h"
+
+
+#ifndef GRAPHITE2_NFILEFACE
+
+using namespace graphite2;
+
+FileFace::FileFace(const char *filename)
+: _file(fopen(filename, "rb")),
+ _file_len(0),
+ _header_tbl(NULL),
+ _table_dir(NULL)
+{
+ if (!_file) return;
+
+ if (fseek(_file, 0, SEEK_END)) return;
+ _file_len = ftell(_file);
+ if (fseek(_file, 0, SEEK_SET)) return;
+
+ size_t tbl_offset, tbl_len;
+
+ // Get the header.
+ if (!TtfUtil::GetHeaderInfo(tbl_offset, tbl_len)) return;
+ if (fseek(_file, long(tbl_offset), SEEK_SET)) return;
+ _header_tbl = (TtfUtil::Sfnt::OffsetSubTable*)gralloc<char>(tbl_len);
+ if (_header_tbl)
+ {
+ if (fread(_header_tbl, 1, tbl_len, _file) != tbl_len) return;
+ if (!TtfUtil::CheckHeader(_header_tbl)) return;
+ }
+
+ // Get the table directory
+ if (!TtfUtil::GetTableDirInfo(_header_tbl, tbl_offset, tbl_len)) return;
+ _table_dir = (TtfUtil::Sfnt::OffsetSubTable::Entry*)gralloc<char>(tbl_len);
+ if (fseek(_file, long(tbl_offset), SEEK_SET)) return;
+ if (_table_dir && fread(_table_dir, 1, tbl_len, _file) != tbl_len)
+ {
+ free(_table_dir);
+ _table_dir = NULL;
+ }
+ return;
+}
+
+FileFace::~FileFace()
+{
+ free(_table_dir);
+ free(_header_tbl);
+ if (_file)
+ fclose(_file);
+}
+
+
+const void *FileFace::get_table_fn(const void* appFaceHandle, unsigned int name, size_t *len)
+{
+ if (appFaceHandle == 0) return 0;
+ const FileFace & file_face = *static_cast<const FileFace *>(appFaceHandle);
+
+ void *tbl;
+ size_t tbl_offset, tbl_len;
+ if (!TtfUtil::GetTableInfo(name, file_face._header_tbl, file_face._table_dir, tbl_offset, tbl_len))
+ return 0;
+
+ if (tbl_offset > file_face._file_len || tbl_len > file_face._file_len - tbl_offset
+ || fseek(file_face._file, long(tbl_offset), SEEK_SET) != 0)
+ return 0;
+
+ tbl = malloc(tbl_len);
+ if (!tbl || fread(tbl, 1, tbl_len, file_face._file) != tbl_len)
+ {
+ free(tbl);
+ return 0;
+ }
+
+ if (len) *len = tbl_len;
+ return tbl;
+}
+
+void FileFace::rel_table_fn(const void* appFaceHandle, const void *table_buffer)
+{
+ if (appFaceHandle == 0) return;
+
+ free(const_cast<void *>(table_buffer));
+}
+
+const gr_face_ops FileFace::ops = { sizeof FileFace::ops, &FileFace::get_table_fn, &FileFace::rel_table_fn };
+
+
+#endif //!GRAPHITE2_NFILEFACE
diff --git a/thirdparty/graphite/src/Font.cpp b/thirdparty/graphite/src/Font.cpp
new file mode 100644
index 0000000000..faf3715f9d
--- /dev/null
+++ b/thirdparty/graphite/src/Font.cpp
@@ -0,0 +1,58 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#include "inc/Face.h"
+#include "inc/Font.h"
+#include "inc/GlyphCache.h"
+
+using namespace graphite2;
+
+Font::Font(float ppm, const Face & f, const void * appFontHandle, const gr_font_ops * ops)
+: m_appFontHandle(appFontHandle ? appFontHandle : this),
+ m_face(f),
+ m_scale(ppm / f.glyphs().unitsPerEm()),
+ m_hinted(appFontHandle && ops && (ops->glyph_advance_x || ops->glyph_advance_y))
+{
+ memset(&m_ops, 0, sizeof m_ops);
+ if (m_hinted && ops)
+ memcpy(&m_ops, ops, min(sizeof m_ops, ops->size));
+ else
+ m_ops.glyph_advance_x = &Face::default_glyph_advance;
+
+ size_t nGlyphs = f.glyphs().numGlyphs();
+ m_advances = gralloc<float>(nGlyphs);
+ if (m_advances)
+ {
+ for (float *advp = m_advances; nGlyphs; --nGlyphs, ++advp)
+ *advp = INVALID_ADVANCE;
+ }
+}
+
+
+/*virtual*/ Font::~Font()
+{
+ free(m_advances);
+}
diff --git a/thirdparty/graphite/src/GlyphCache.cpp b/thirdparty/graphite/src/GlyphCache.cpp
new file mode 100644
index 0000000000..282bdc18fd
--- /dev/null
+++ b/thirdparty/graphite/src/GlyphCache.cpp
@@ -0,0 +1,492 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2012, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#include "graphite2/Font.h"
+
+#include "inc/Main.h"
+#include "inc/Face.h" //for the tags
+#include "inc/GlyphCache.h"
+#include "inc/GlyphFace.h"
+#include "inc/Endian.h"
+#include "inc/bits.h"
+
+using namespace graphite2;
+
+namespace
+{
+ // Iterator over version 1 or 2 glat entries which consist of a series of
+ // +-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+
+ // v1 |k|n|v1 |v2 |...|vN | or v2 | k | n |v1 |v2 |...|vN |
+ // +-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+
+ // variable length structures.
+
+ template<typename W>
+ class _glat_iterator : public std::iterator<std::input_iterator_tag, std::pair<sparse::key_type, sparse::mapped_type> >
+ {
+ unsigned short key() const { return uint16(be::peek<W>(_e) + _n); }
+ unsigned int run() const { return be::peek<W>(_e+sizeof(W)); }
+ void advance_entry() { _n = 0; _e = _v; be::skip<W>(_v,2); }
+ public:
+ _glat_iterator(const void * glat=0) : _e(reinterpret_cast<const byte *>(glat)), _v(_e+2*sizeof(W)), _n(0) {}
+
+ _glat_iterator<W> & operator ++ () {
+ ++_n; be::skip<uint16>(_v);
+ if (_n == run()) advance_entry();
+ return *this;
+ }
+ _glat_iterator<W> operator ++ (int) { _glat_iterator<W> tmp(*this); operator++(); return tmp; }
+
+ // This is strictly a >= operator. A true == operator could be
+ // implemented that test for overlap but it would be more expensive a
+ // test.
+ bool operator == (const _glat_iterator<W> & rhs) { return _v >= rhs._e - 1; }
+ bool operator != (const _glat_iterator<W> & rhs) { return !operator==(rhs); }
+
+ value_type operator * () const {
+ return value_type(key(), be::peek<uint16>(_v));
+ }
+
+ protected:
+ const byte * _e, * _v;
+ size_t _n;
+ };
+
+ typedef _glat_iterator<uint8> glat_iterator;
+ typedef _glat_iterator<uint16> glat2_iterator;
+}
+
+const SlantBox SlantBox::empty = {0,0,0,0};
+
+
+class GlyphCache::Loader
+{
+public:
+ Loader(const Face & face); //return result indicates success. Do not use if failed.
+
+ operator bool () const throw();
+ unsigned short int units_per_em() const throw();
+ unsigned short int num_glyphs() const throw();
+ unsigned short int num_attrs() const throw();
+ bool has_boxes() const throw();
+
+ const GlyphFace * read_glyph(unsigned short gid, GlyphFace &, int *numsubs) const throw();
+ GlyphBox * read_box(uint16 gid, GlyphBox *curr, const GlyphFace & face) const throw();
+
+ CLASS_NEW_DELETE;
+private:
+ Face::Table _head,
+ _hhea,
+ _hmtx,
+ _glyf,
+ _loca,
+ m_pGlat,
+ m_pGloc;
+
+ bool _long_fmt;
+ bool _has_boxes;
+ unsigned short _num_glyphs_graphics, //i.e. boundary box and advance
+ _num_glyphs_attributes,
+ _num_attrs; // number of glyph attributes per glyph
+};
+
+
+
+GlyphCache::GlyphCache(const Face & face, const uint32 face_options)
+: _glyph_loader(new Loader(face)),
+ _glyphs(_glyph_loader && *_glyph_loader && _glyph_loader->num_glyphs()
+ ? grzeroalloc<const GlyphFace *>(_glyph_loader->num_glyphs()) : 0),
+ _boxes(_glyph_loader && _glyph_loader->has_boxes() && _glyph_loader->num_glyphs()
+ ? grzeroalloc<GlyphBox *>(_glyph_loader->num_glyphs()) : 0),
+ _num_glyphs(_glyphs ? _glyph_loader->num_glyphs() : 0),
+ _num_attrs(_glyphs ? _glyph_loader->num_attrs() : 0),
+ _upem(_glyphs ? _glyph_loader->units_per_em() : 0)
+{
+ if ((face_options & gr_face_preloadGlyphs) && _glyph_loader && _glyphs)
+ {
+ int numsubs = 0;
+ GlyphFace * const glyphs = new GlyphFace [_num_glyphs];
+ if (!glyphs)
+ return;
+
+ // The 0 glyph is definately required.
+ _glyphs[0] = _glyph_loader->read_glyph(0, glyphs[0], &numsubs);
+
+ // glyphs[0] has the same address as the glyphs array just allocated,
+ // thus assigning the &glyphs[0] to _glyphs[0] means _glyphs[0] points
+ // to the entire array.
+ const GlyphFace * loaded = _glyphs[0];
+ for (uint16 gid = 1; loaded && gid != _num_glyphs; ++gid)
+ _glyphs[gid] = loaded = _glyph_loader->read_glyph(gid, glyphs[gid], &numsubs);
+
+ if (!loaded)
+ {
+ _glyphs[0] = 0;
+ delete [] glyphs;
+ }
+ else if (numsubs > 0 && _boxes)
+ {
+ GlyphBox * boxes = (GlyphBox *)gralloc<char>(_num_glyphs * sizeof(GlyphBox) + numsubs * 8 * sizeof(float));
+ GlyphBox * currbox = boxes;
+
+ for (uint16 gid = 0; currbox && gid != _num_glyphs; ++gid)
+ {
+ _boxes[gid] = currbox;
+ currbox = _glyph_loader->read_box(gid, currbox, *_glyphs[gid]);
+ }
+ if (!currbox)
+ {
+ free(boxes);
+ _boxes[0] = 0;
+ }
+ }
+ delete _glyph_loader;
+ _glyph_loader = 0;
+ // coverity[leaked_storage : FALSE] - calling read_glyph on index 0 saved
+ // glyphs as _glyphs[0]. Setting _glyph_loader to nullptr here flags that
+ // the dtor needs to call delete[] on _glyphs[0] to release what was allocated
+ // as glyphs
+ }
+
+ if (_glyphs && glyph(0) == 0)
+ {
+ free(_glyphs);
+ _glyphs = 0;
+ if (_boxes)
+ {
+ free(_boxes);
+ _boxes = 0;
+ }
+ _num_glyphs = _num_attrs = _upem = 0;
+ }
+}
+
+
+GlyphCache::~GlyphCache()
+{
+ if (_glyphs)
+ {
+ if (_glyph_loader)
+ {
+ const GlyphFace * * g = _glyphs;
+ for(unsigned short n = _num_glyphs; n; --n, ++g)
+ delete *g;
+ }
+ else
+ delete [] _glyphs[0];
+ free(_glyphs);
+ }
+ if (_boxes)
+ {
+ if (_glyph_loader)
+ {
+ GlyphBox * * g = _boxes;
+ for (uint16 n = _num_glyphs; n; --n, ++g)
+ free(*g);
+ }
+ else
+ free(_boxes[0]);
+ free(_boxes);
+ }
+ delete _glyph_loader;
+}
+
+const GlyphFace *GlyphCache::glyph(unsigned short glyphid) const //result may be changed by subsequent call with a different glyphid
+{
+ if (glyphid >= numGlyphs())
+ return _glyphs[0];
+ const GlyphFace * & p = _glyphs[glyphid];
+ if (p == 0 && _glyph_loader)
+ {
+ int numsubs = 0;
+ GlyphFace * g = new GlyphFace();
+ if (g) p = _glyph_loader->read_glyph(glyphid, *g, &numsubs);
+ if (!p)
+ {
+ delete g;
+ return *_glyphs;
+ }
+ if (_boxes)
+ {
+ _boxes[glyphid] = (GlyphBox *)gralloc<char>(sizeof(GlyphBox) + 8 * numsubs * sizeof(float));
+ if (!_glyph_loader->read_box(glyphid, _boxes[glyphid], *_glyphs[glyphid]))
+ {
+ free(_boxes[glyphid]);
+ _boxes[glyphid] = 0;
+ }
+ }
+ }
+ return p;
+}
+
+
+
+GlyphCache::Loader::Loader(const Face & face)
+: _head(face, Tag::head),
+ _hhea(face, Tag::hhea),
+ _hmtx(face, Tag::hmtx),
+ _glyf(face, Tag::glyf),
+ _loca(face, Tag::loca),
+ _long_fmt(false),
+ _has_boxes(false),
+ _num_glyphs_graphics(0),
+ _num_glyphs_attributes(0),
+ _num_attrs(0)
+{
+ if (!operator bool())
+ return;
+
+ const Face::Table maxp = Face::Table(face, Tag::maxp);
+ if (!maxp) { _head = Face::Table(); return; }
+
+ _num_glyphs_graphics = static_cast<unsigned short>(TtfUtil::GlyphCount(maxp));
+ // This will fail if the number of glyphs is wildly out of range.
+ if (_glyf && TtfUtil::LocaLookup(_num_glyphs_graphics-1, _loca, _loca.size(), _head) == size_t(-2))
+ {
+ _head = Face::Table();
+ return;
+ }
+
+ if ((m_pGlat = Face::Table(face, Tag::Glat, 0x00030000)) == NULL
+ || (m_pGloc = Face::Table(face, Tag::Gloc)) == NULL
+ || m_pGloc.size() < 8)
+ {
+ _head = Face::Table();
+ return;
+ }
+ const byte * p = m_pGloc;
+ int version = be::read<uint32>(p);
+ const uint16 flags = be::read<uint16>(p);
+ _num_attrs = be::read<uint16>(p);
+ // We can accurately calculate the number of attributed glyphs by
+ // subtracting the length of the attribids array (numAttribs long if present)
+ // and dividing by either 2 or 4 depending on shor or lonf format
+ _long_fmt = flags & 1;
+ ptrdiff_t tmpnumgattrs = (m_pGloc.size()
+ - (p - m_pGloc)
+ - sizeof(uint16)*(flags & 0x2 ? _num_attrs : 0))
+ / (_long_fmt ? sizeof(uint32) : sizeof(uint16)) - 1;
+
+ if (version >= 0x00020000 || tmpnumgattrs < 0 || tmpnumgattrs > 65535
+ || _num_attrs == 0 || _num_attrs > 0x3000 // is this hard limit appropriate?
+ || _num_glyphs_graphics > tmpnumgattrs
+ || m_pGlat.size() < 4)
+ {
+ _head = Face::Table();
+ return;
+ }
+
+ _num_glyphs_attributes = static_cast<unsigned short>(tmpnumgattrs);
+ p = m_pGlat;
+ version = be::read<uint32>(p);
+ if (version >= 0x00040000 || (version >= 0x00030000 && m_pGlat.size() < 8)) // reject Glat tables that are too new
+ {
+ _head = Face::Table();
+ return;
+ }
+ else if (version >= 0x00030000)
+ {
+ unsigned int glatflags = be::read<uint32>(p);
+ _has_boxes = glatflags & 1;
+ // delete this once the compiler is fixed
+ _has_boxes = true;
+ }
+}
+
+inline
+GlyphCache::Loader::operator bool () const throw()
+{
+ return _head && _hhea && _hmtx && !(bool(_glyf) != bool(_loca));
+}
+
+inline
+unsigned short int GlyphCache::Loader::units_per_em() const throw()
+{
+ return _head ? TtfUtil::DesignUnits(_head) : 0;
+}
+
+inline
+unsigned short int GlyphCache::Loader::num_glyphs() const throw()
+{
+ return max(_num_glyphs_graphics, _num_glyphs_attributes);
+}
+
+inline
+unsigned short int GlyphCache::Loader::num_attrs() const throw()
+{
+ return _num_attrs;
+}
+
+inline
+bool GlyphCache::Loader::has_boxes () const throw()
+{
+ return _has_boxes;
+}
+
+const GlyphFace * GlyphCache::Loader::read_glyph(unsigned short glyphid, GlyphFace & glyph, int *numsubs) const throw()
+{
+ Rect bbox;
+ Position advance;
+
+ if (glyphid < _num_glyphs_graphics)
+ {
+ int nLsb;
+ unsigned int nAdvWid;
+ if (_glyf)
+ {
+ int xMin, yMin, xMax, yMax;
+ size_t locidx = TtfUtil::LocaLookup(glyphid, _loca, _loca.size(), _head);
+ void *pGlyph = TtfUtil::GlyfLookup(_glyf, locidx, _glyf.size());
+
+ if (pGlyph && TtfUtil::GlyfBox(pGlyph, xMin, yMin, xMax, yMax))
+ {
+ if ((xMin > xMax) || (yMin > yMax))
+ return 0;
+ bbox = Rect(Position(static_cast<float>(xMin), static_cast<float>(yMin)),
+ Position(static_cast<float>(xMax), static_cast<float>(yMax)));
+ }
+ }
+ if (TtfUtil::HorMetrics(glyphid, _hmtx, _hmtx.size(), _hhea, nLsb, nAdvWid))
+ advance = Position(static_cast<float>(nAdvWid), 0);
+ }
+
+ if (glyphid < _num_glyphs_attributes)
+ {
+ const byte * gloc = m_pGloc;
+ size_t glocs = 0, gloce = 0;
+
+ be::skip<uint32>(gloc);
+ be::skip<uint16>(gloc,2);
+ if (_long_fmt)
+ {
+ if (8 + glyphid * sizeof(uint32) > m_pGloc.size())
+ return 0;
+ be::skip<uint32>(gloc, glyphid);
+ glocs = be::read<uint32>(gloc);
+ gloce = be::peek<uint32>(gloc);
+ }
+ else
+ {
+ if (8 + glyphid * sizeof(uint16) > m_pGloc.size())
+ return 0;
+ be::skip<uint16>(gloc, glyphid);
+ glocs = be::read<uint16>(gloc);
+ gloce = be::peek<uint16>(gloc);
+ }
+
+ if (glocs >= m_pGlat.size() - 1 || gloce > m_pGlat.size())
+ return 0;
+
+ const uint32 glat_version = be::peek<uint32>(m_pGlat);
+ if (glat_version >= 0x00030000)
+ {
+ if (glocs >= gloce)
+ return 0;
+ const byte * p = m_pGlat + glocs;
+ uint16 bmap = be::read<uint16>(p);
+ int num = bit_set_count((uint32)bmap);
+ if (numsubs) *numsubs += num;
+ glocs += 6 + 8 * num;
+ if (glocs > gloce)
+ return 0;
+ }
+ if (glat_version < 0x00020000)
+ {
+ if (gloce - glocs < 2*sizeof(byte)+sizeof(uint16)
+ || gloce - glocs > _num_attrs*(2*sizeof(byte)+sizeof(uint16)))
+ return 0;
+ new (&glyph) GlyphFace(bbox, advance, glat_iterator(m_pGlat + glocs), glat_iterator(m_pGlat + gloce));
+ }
+ else
+ {
+ if (gloce - glocs < 3*sizeof(uint16) // can a glyph have no attributes? why not?
+ || gloce - glocs > _num_attrs*3*sizeof(uint16)
+ || glocs > m_pGlat.size() - 2*sizeof(uint16))
+ return 0;
+ new (&glyph) GlyphFace(bbox, advance, glat2_iterator(m_pGlat + glocs), glat2_iterator(m_pGlat + gloce));
+ }
+ if (!glyph.attrs() || glyph.attrs().capacity() > _num_attrs)
+ return 0;
+ }
+ return &glyph;
+}
+
+inline float scale_to(uint8 t, float zmin, float zmax)
+{
+ return (zmin + t * (zmax - zmin) / 255);
+}
+
+Rect readbox(Rect &b, uint8 zxmin, uint8 zymin, uint8 zxmax, uint8 zymax)
+{
+ return Rect(Position(scale_to(zxmin, b.bl.x, b.tr.x), scale_to(zymin, b.bl.y, b.tr.y)),
+ Position(scale_to(zxmax, b.bl.x, b.tr.x), scale_to(zymax, b.bl.y, b.tr.y)));
+}
+
+GlyphBox * GlyphCache::Loader::read_box(uint16 gid, GlyphBox *curr, const GlyphFace & glyph) const throw()
+{
+ if (gid >= _num_glyphs_attributes) return 0;
+
+ const byte * gloc = m_pGloc;
+ size_t glocs = 0, gloce = 0;
+
+ be::skip<uint32>(gloc);
+ be::skip<uint16>(gloc,2);
+ if (_long_fmt)
+ {
+ be::skip<uint32>(gloc, gid);
+ glocs = be::read<uint32>(gloc);
+ gloce = be::peek<uint32>(gloc);
+ }
+ else
+ {
+ be::skip<uint16>(gloc, gid);
+ glocs = be::read<uint16>(gloc);
+ gloce = be::peek<uint16>(gloc);
+ }
+
+ if (gloce > m_pGlat.size() || glocs + 6 >= gloce)
+ return 0;
+
+ const byte * p = m_pGlat + glocs;
+ uint16 bmap = be::read<uint16>(p);
+ int num = bit_set_count((uint32)bmap);
+
+ Rect bbox = glyph.theBBox();
+ Rect diamax(Position(bbox.bl.x + bbox.bl.y, bbox.bl.x - bbox.tr.y),
+ Position(bbox.tr.x + bbox.tr.y, bbox.tr.x - bbox.bl.y));
+ Rect diabound = readbox(diamax, p[0], p[2], p[1], p[3]);
+ ::new (curr) GlyphBox(num, bmap, &diabound);
+ be::skip<uint8>(p, 4);
+ if (glocs + 6 + num * 8 >= gloce)
+ return 0;
+
+ for (int i = 0; i < num * 2; ++i)
+ {
+ Rect box = readbox((i & 1) ? diamax : bbox, p[0], p[2], p[1], p[3]);
+ curr->addSubBox(i >> 1, i & 1, &box);
+ be::skip<uint8>(p, 4);
+ }
+ return (GlyphBox *)((char *)(curr) + sizeof(GlyphBox) + 2 * num * sizeof(Rect));
+}
diff --git a/thirdparty/graphite/src/GlyphFace.cpp b/thirdparty/graphite/src/GlyphFace.cpp
new file mode 100644
index 0000000000..bc5e63a9f0
--- /dev/null
+++ b/thirdparty/graphite/src/GlyphFace.cpp
@@ -0,0 +1,48 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#include "inc/GlyphFace.h"
+
+
+using namespace graphite2;
+
+int32 GlyphFace::getMetric(uint8 metric) const
+{
+ switch (metrics(metric))
+ {
+ case kgmetLsb : return int32(m_bbox.bl.x);
+ case kgmetRsb : return int32(m_advance.x - m_bbox.tr.x);
+ case kgmetBbTop : return int32(m_bbox.tr.y);
+ case kgmetBbBottom : return int32(m_bbox.bl.y);
+ case kgmetBbLeft : return int32(m_bbox.bl.x);
+ case kgmetBbRight : return int32(m_bbox.tr.x);
+ case kgmetBbHeight : return int32(m_bbox.tr.y - m_bbox.bl.y);
+ case kgmetBbWidth : return int32(m_bbox.tr.x - m_bbox.bl.x);
+ case kgmetAdvWidth : return int32(m_advance.x);
+ case kgmetAdvHeight : return int32(m_advance.y);
+ default : return 0;
+ }
+}
diff --git a/thirdparty/graphite/src/Intervals.cpp b/thirdparty/graphite/src/Intervals.cpp
new file mode 100644
index 0000000000..0fe99a127a
--- /dev/null
+++ b/thirdparty/graphite/src/Intervals.cpp
@@ -0,0 +1,298 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#include <algorithm>
+#include <cmath>
+#include <limits>
+
+#include "inc/Intervals.h"
+#include "inc/Segment.h"
+#include "inc/Slot.h"
+#include "inc/debug.h"
+#include "inc/bits.h"
+
+using namespace graphite2;
+
+#include <cmath>
+
+inline
+Zones::Exclusion Zones::Exclusion::split_at(float p) {
+ Exclusion r(*this);
+ r.xm = x = p;
+ return r;
+}
+
+inline
+void Zones::Exclusion::left_trim(float p) {
+ x = p;
+}
+
+inline
+Zones::Exclusion & Zones::Exclusion::operator += (Exclusion const & rhs) {
+ c += rhs.c; sm += rhs.sm; smx += rhs.smx; open = false;
+ return *this;
+}
+
+inline
+uint8 Zones::Exclusion::outcode(float val) const {
+ float p = val;
+ //float d = std::numeric_limits<float>::epsilon();
+ float d = 0.;
+ return ((p - xm >= d) << 1) | (x - p > d);
+}
+
+void Zones::exclude_with_margins(float xmin, float xmax, int axis) {
+ remove(xmin, xmax);
+ weightedAxis(axis, xmin-_margin_len, xmin, 0, 0, _margin_weight, xmin-_margin_len, 0, 0, false);
+ weightedAxis(axis, xmax, xmax+_margin_len, 0, 0, _margin_weight, xmax+_margin_len, 0, 0, false);
+}
+
+namespace
+{
+
+inline
+bool separated(float a, float b) {
+ return a != b;
+ //int exp;
+ //float res = frexpf(fabs(a - b), &exp);
+ //return (*(unsigned int *)(&res) > 4);
+ //return std::fabs(a-b) > std::numeric_limits<float>::epsilon(); // std::epsilon may not work. but 0.5 fails exising 64 bit tests
+ //return std::fabs(a-b) > 0.5f;
+}
+
+}
+
+void Zones::insert(Exclusion e)
+{
+#if !defined GRAPHITE2_NTRACING
+ addDebug(&e);
+#endif
+ e.x = max(e.x, _pos);
+ e.xm = min(e.xm, _posm);
+ if (e.x >= e.xm) return;
+
+ for (iterator i = _exclusions.begin(), ie = _exclusions.end(); i != ie && e.x < e.xm; ++i)
+ {
+ const uint8 oca = e.outcode(i->x),
+ ocb = e.outcode(i->xm);
+ if ((oca & ocb) != 0) continue;
+
+ switch (oca ^ ocb) // What kind of overlap?
+ {
+ case 0: // e completely covers i
+ // split e at i.x into e1,e2
+ // split e2 at i.mx into e2,e3
+ // drop e1 ,i+e2, e=e3
+ *i += e;
+ e.left_trim(i->xm);
+ break;
+ case 1: // e overlaps on the rhs of i
+ // split i at e->x into i1,i2
+ // split e at i.mx into e1,e2
+ // trim i1, insert i2+e1, e=e2
+ if (!separated(i->xm, e.x)) break;
+ if (separated(i->x,e.x)) { i = _exclusions.insert(i,i->split_at(e.x)); ++i; }
+ *i += e;
+ e.left_trim(i->xm);
+ break;
+ case 2: // e overlaps on the lhs of i
+ // split e at i->x into e1,e2
+ // split i at e.mx into i1,i2
+ // drop e1, insert e2+i1, trim i2
+ if (!separated(e.xm, i->x)) return;
+ if (separated(e.xm, i->xm)) i = _exclusions.insert(i,i->split_at(e.xm));
+ *i += e;
+ return;
+ case 3: // i completely covers e
+ // split i at e.x into i1,i2
+ // split i2 at e.mx into i2,i3
+ // insert i1, insert e+i2
+ if (separated(e.xm, i->xm)) i = _exclusions.insert(i,i->split_at(e.xm));
+ i = _exclusions.insert(i, i->split_at(e.x));
+ *++i += e;
+ return;
+ }
+
+ ie = _exclusions.end();
+ }
+}
+
+
+void Zones::remove(float x, float xm)
+{
+#if !defined GRAPHITE2_NTRACING
+ removeDebug(x, xm);
+#endif
+ x = max(x, _pos);
+ xm = min(xm, _posm);
+ if (x >= xm) return;
+
+ for (iterator i = _exclusions.begin(), ie = _exclusions.end(); i != ie; ++i)
+ {
+ const uint8 oca = i->outcode(x),
+ ocb = i->outcode(xm);
+ if ((oca & ocb) != 0) continue;
+
+ switch (oca ^ ocb) // What kind of overlap?
+ {
+ case 0: // i completely covers e
+ if (separated(i->x, x)) { i = _exclusions.insert(i,i->split_at(x)); ++i; }
+ GR_FALLTHROUGH;
+ // no break
+ case 1: // i overlaps on the rhs of e
+ i->left_trim(xm);
+ return;
+ case 2: // i overlaps on the lhs of e
+ i->xm = x;
+ if (separated(i->x, i->xm)) break;
+ GR_FALLTHROUGH;
+ // no break
+ case 3: // e completely covers i
+ i = _exclusions.erase(i);
+ --i;
+ break;
+ }
+
+ ie = _exclusions.end();
+ }
+}
+
+
+Zones::const_iterator Zones::find_exclusion_under(float x) const
+{
+ size_t l = 0, h = _exclusions.size();
+
+ while (l < h)
+ {
+ size_t const p = (l+h) >> 1;
+ switch (_exclusions[p].outcode(x))
+ {
+ case 0 : return _exclusions.begin()+p;
+ case 1 : h = p; break;
+ case 2 :
+ case 3 : l = p+1; break;
+ }
+ }
+
+ return _exclusions.begin()+l;
+}
+
+
+float Zones::closest(float origin, float & cost) const
+{
+ float best_c = std::numeric_limits<float>::max(),
+ best_x = 0;
+
+ const const_iterator start = find_exclusion_under(origin);
+
+ // Forward scan looking for lowest cost
+ for (const_iterator i = start, ie = _exclusions.end(); i != ie; ++i)
+ if (i->track_cost(best_c, best_x, origin)) break;
+
+ // Backward scan looking for lowest cost
+ // We start from the exclusion to the immediate left of start since we've
+ // already tested start with the right most scan above.
+ for (const_iterator i = start-1, ie = _exclusions.begin()-1; i != ie; --i)
+ if (i->track_cost(best_c, best_x, origin)) break;
+
+ cost = (best_c == std::numeric_limits<float>::max() ? -1 : best_c);
+ return best_x;
+}
+
+
+// Cost and test position functions
+
+bool Zones::Exclusion::track_cost(float & best_cost, float & best_pos, float origin) const {
+ const float p = test_position(origin),
+ localc = cost(p - origin);
+ if (open && localc > best_cost) return true;
+
+ if (localc < best_cost)
+ {
+ best_cost = localc;
+ best_pos = p;
+ }
+ return false;
+}
+
+inline
+float Zones::Exclusion::cost(float p) const {
+ return (sm * p - 2 * smx) * p + c;
+}
+
+
+float Zones::Exclusion::test_position(float origin) const {
+ if (sm < 0)
+ {
+ // sigh, test both ends and perhaps the middle too!
+ float res = x;
+ float cl = cost(x);
+ if (x < origin && xm > origin)
+ {
+ float co = cost(origin);
+ if (co < cl)
+ {
+ cl = co;
+ res = origin;
+ }
+ }
+ float cr = cost(xm);
+ return cl > cr ? xm : res;
+ }
+ else
+ {
+ float zerox = smx / sm + origin;
+ if (zerox < x) return x;
+ else if (zerox > xm) return xm;
+ else return zerox;
+ }
+}
+
+
+#if !defined GRAPHITE2_NTRACING
+
+void Zones::jsonDbgOut(Segment *seg) const {
+
+ if (_dbg)
+ {
+ for (Zones::idebugs s = dbgs_begin(), e = dbgs_end(); s != e; ++s)
+ {
+ *_dbg << json::flat << json::array
+ << objectid(dslot(seg, (Slot *)(s->_env[0])))
+ << reinterpret_cast<ptrdiff_t>(s->_env[1]);
+ if (s->_isdel)
+ *_dbg << "remove" << Position(s->_excl.x, s->_excl.xm);
+ else
+ *_dbg << "exclude" << json::flat << json::array
+ << s->_excl.x << s->_excl.xm
+ << s->_excl.sm << s->_excl.smx << s->_excl.c
+ << json::close;
+ *_dbg << json::close;
+ }
+ }
+}
+
+#endif
diff --git a/thirdparty/graphite/src/Justifier.cpp b/thirdparty/graphite/src/Justifier.cpp
new file mode 100644
index 0000000000..78c11e6a51
--- /dev/null
+++ b/thirdparty/graphite/src/Justifier.cpp
@@ -0,0 +1,282 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2012, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+
+#include "inc/Segment.h"
+#include "graphite2/Font.h"
+#include "inc/debug.h"
+#include "inc/CharInfo.h"
+#include "inc/Slot.h"
+#include "inc/Main.h"
+#include <cmath>
+
+using namespace graphite2;
+
+class JustifyTotal {
+public:
+ JustifyTotal() : m_numGlyphs(0), m_tStretch(0), m_tShrink(0), m_tStep(0), m_tWeight(0) {}
+ void accumulate(Slot *s, Segment *seg, int level);
+ int weight() const { return m_tWeight; }
+
+ CLASS_NEW_DELETE
+
+private:
+ int m_numGlyphs;
+ int m_tStretch;
+ int m_tShrink;
+ int m_tStep;
+ int m_tWeight;
+};
+
+void JustifyTotal::accumulate(Slot *s, Segment *seg, int level)
+{
+ ++m_numGlyphs;
+ m_tStretch += s->getJustify(seg, level, 0);
+ m_tShrink += s->getJustify(seg, level, 1);
+ m_tStep += s->getJustify(seg, level, 2);
+ m_tWeight += s->getJustify(seg, level, 3);
+}
+
+float Segment::justify(Slot *pSlot, const Font *font, float width, GR_MAYBE_UNUSED justFlags jflags, Slot *pFirst, Slot *pLast)
+{
+ Slot *end = last();
+ float currWidth = 0.0;
+ const float scale = font ? font->scale() : 1.0f;
+ Position res;
+
+ if (width < 0 && !(silf()->flags()))
+ return width;
+
+ if ((m_dir & 1) != m_silf->dir() && m_silf->bidiPass() != m_silf->numPasses())
+ {
+ reverseSlots();
+ std::swap(pFirst, pLast);
+ }
+ if (!pFirst) pFirst = pSlot;
+ while (!pFirst->isBase()) pFirst = pFirst->attachedTo();
+ if (!pLast) pLast = last();
+ while (!pLast->isBase()) pLast = pLast->attachedTo();
+ const float base = pFirst->origin().x / scale;
+ width = width / scale;
+ if ((jflags & gr_justEndInline) == 0)
+ {
+ while (pLast != pFirst && pLast)
+ {
+ Rect bbox = theGlyphBBoxTemporary(pLast->glyph());
+ if (bbox.bl.x != 0.f || bbox.bl.y != 0.f || bbox.tr.x != 0.f || bbox.tr.y == 0.f)
+ break;
+ pLast = pLast->prev();
+ }
+ }
+
+ if (pLast)
+ end = pLast->nextSibling();
+ if (pFirst)
+ pFirst = pFirst->nextSibling();
+
+ int icount = 0;
+ int numLevels = silf()->numJustLevels();
+ if (!numLevels)
+ {
+ for (Slot *s = pSlot; s && s != end; s = s->nextSibling())
+ {
+ CharInfo *c = charinfo(s->before());
+ if (isWhitespace(c->unicodeChar()))
+ {
+ s->setJustify(this, 0, 3, 1);
+ s->setJustify(this, 0, 2, 1);
+ s->setJustify(this, 0, 0, -1);
+ ++icount;
+ }
+ }
+ if (!icount)
+ {
+ for (Slot *s = pSlot; s && s != end; s = s->nextSibling())
+ {
+ s->setJustify(this, 0, 3, 1);
+ s->setJustify(this, 0, 2, 1);
+ s->setJustify(this, 0, 0, -1);
+ }
+ }
+ ++numLevels;
+ }
+
+ Vector<JustifyTotal> stats(numLevels);
+ for (Slot *s = pFirst; s && s != end; s = s->nextSibling())
+ {
+ float w = s->origin().x / scale + s->advance() - base;
+ if (w > currWidth) currWidth = w;
+ for (int j = 0; j < numLevels; ++j)
+ stats[j].accumulate(s, this, j);
+ s->just(0);
+ }
+
+ for (int i = (width < 0.0f) ? -1 : numLevels - 1; i >= 0; --i)
+ {
+ float diff;
+ float error = 0.;
+ float diffpw;
+ int tWeight = stats[i].weight();
+ if (tWeight == 0) continue;
+
+ do {
+ error = 0.;
+ diff = width - currWidth;
+ diffpw = diff / tWeight;
+ tWeight = 0;
+ for (Slot *s = pFirst; s && s != end; s = s->nextSibling()) // don't include final glyph
+ {
+ int w = s->getJustify(this, i, 3);
+ float pref = diffpw * w + error;
+ int step = s->getJustify(this, i, 2);
+ if (!step) step = 1; // handle lazy font developers
+ if (pref > 0)
+ {
+ float max = uint16(s->getJustify(this, i, 0));
+ if (i == 0) max -= s->just();
+ if (pref > max) pref = max;
+ else tWeight += w;
+ }
+ else
+ {
+ float max = uint16(s->getJustify(this, i, 1));
+ if (i == 0) max += s->just();
+ if (-pref > max) pref = -max;
+ else tWeight += w;
+ }
+ int actual = int(pref / step) * step;
+
+ if (actual)
+ {
+ error += diffpw * w - actual;
+ if (i == 0)
+ s->just(s->just() + actual);
+ else
+ s->setJustify(this, i, 4, actual);
+ }
+ }
+ currWidth += diff - error;
+ } while (i == 0 && int(std::abs(error)) > 0 && tWeight);
+ }
+
+ Slot *oldFirst = m_first;
+ Slot *oldLast = m_last;
+ if (silf()->flags() & 1)
+ {
+ m_first = pSlot = addLineEnd(pSlot);
+ m_last = pLast = addLineEnd(end);
+ if (!m_first || !m_last) return -1.0;
+ }
+ else
+ {
+ m_first = pSlot;
+ m_last = pLast;
+ }
+
+ // run justification passes here
+#if !defined GRAPHITE2_NTRACING
+ json * const dbgout = m_face->logger();
+ if (dbgout)
+ *dbgout << json::object
+ << "justifies" << objectid(this)
+ << "passes" << json::array;
+#endif
+
+ if (m_silf->justificationPass() != m_silf->positionPass() && (width >= 0.f || (silf()->flags() & 1)))
+ m_silf->runGraphite(this, m_silf->justificationPass(), m_silf->positionPass());
+
+#if !defined GRAPHITE2_NTRACING
+ if (dbgout)
+ {
+ *dbgout << json::item << json::close; // Close up the passes array
+ positionSlots(NULL, pSlot, pLast, m_dir);
+ Slot *lEnd = pLast->nextSibling();
+ *dbgout << "output" << json::array;
+ for(Slot * t = pSlot; t != lEnd; t = t->next())
+ *dbgout << dslot(this, t);
+ *dbgout << json::close << json::close;
+ }
+#endif
+
+ res = positionSlots(font, pSlot, pLast, m_dir);
+
+ if (silf()->flags() & 1)
+ {
+ if (m_first)
+ delLineEnd(m_first);
+ if (m_last)
+ delLineEnd(m_last);
+ }
+ m_first = oldFirst;
+ m_last = oldLast;
+
+ if ((m_dir & 1) != m_silf->dir() && m_silf->bidiPass() != m_silf->numPasses())
+ reverseSlots();
+ return res.x;
+}
+
+Slot *Segment::addLineEnd(Slot *nSlot)
+{
+ Slot *eSlot = newSlot();
+ if (!eSlot) return NULL;
+ const uint16 gid = silf()->endLineGlyphid();
+ const GlyphFace * theGlyph = m_face->glyphs().glyphSafe(gid);
+ eSlot->setGlyph(this, gid, theGlyph);
+ if (nSlot)
+ {
+ eSlot->next(nSlot);
+ eSlot->prev(nSlot->prev());
+ nSlot->prev(eSlot);
+ eSlot->before(nSlot->before());
+ if (eSlot->prev())
+ eSlot->after(eSlot->prev()->after());
+ else
+ eSlot->after(nSlot->before());
+ }
+ else
+ {
+ nSlot = m_last;
+ eSlot->prev(nSlot);
+ nSlot->next(eSlot);
+ eSlot->after(eSlot->prev()->after());
+ eSlot->before(nSlot->after());
+ }
+ return eSlot;
+}
+
+void Segment::delLineEnd(Slot *s)
+{
+ Slot *nSlot = s->next();
+ if (nSlot)
+ {
+ nSlot->prev(s->prev());
+ if (s->prev())
+ s->prev()->next(nSlot);
+ }
+ else
+ s->prev()->next(NULL);
+ freeSlot(s);
+}
diff --git a/thirdparty/graphite/src/NameTable.cpp b/thirdparty/graphite/src/NameTable.cpp
new file mode 100644
index 0000000000..d42b7f95bd
--- /dev/null
+++ b/thirdparty/graphite/src/NameTable.cpp
@@ -0,0 +1,254 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#include "inc/Main.h"
+#include "inc/Endian.h"
+
+#include "inc/NameTable.h"
+#include "inc/UtfCodec.h"
+
+using namespace graphite2;
+
+NameTable::NameTable(const void* data, size_t length, uint16 platformId, uint16 encodingID)
+ : m_platformId(0), m_encodingId(0), m_languageCount(0),
+ m_platformOffset(0), m_platformLastRecord(0), m_nameDataLength(0),
+ m_table(0), m_nameData(NULL)
+{
+ void *pdata = gralloc<byte>(length);
+ if (!pdata) return;
+ memcpy(pdata, data, length);
+ m_table = reinterpret_cast<const TtfUtil::Sfnt::FontNames*>(pdata);
+
+ if ((length > sizeof(TtfUtil::Sfnt::FontNames)) &&
+ (length > sizeof(TtfUtil::Sfnt::FontNames) +
+ sizeof(TtfUtil::Sfnt::NameRecord) * ( be::swap<uint16>(m_table->count) - 1)))
+ {
+ uint16 offset = be::swap<uint16>(m_table->string_offset);
+ if (offset < length)
+ {
+ m_nameData = reinterpret_cast<const uint8*>(pdata) + offset;
+ setPlatformEncoding(platformId, encodingID);
+ m_nameDataLength = uint16(length - offset);
+ return;
+ }
+ }
+ free(const_cast<TtfUtil::Sfnt::FontNames*>(m_table));
+ m_table = NULL;
+}
+
+uint16 NameTable::setPlatformEncoding(uint16 platformId, uint16 encodingID)
+{
+ if (!m_nameData) return 0;
+ uint16 i = 0;
+ uint16 count = be::swap<uint16>(m_table->count);
+ for (; i < count; i++)
+ {
+ if (be::swap<uint16>(m_table->name_record[i].platform_id) == platformId &&
+ be::swap<uint16>(m_table->name_record[i].platform_specific_id) == encodingID)
+ {
+ m_platformOffset = i;
+ break;
+ }
+ }
+ while ((++i < count) &&
+ (be::swap<uint16>(m_table->name_record[i].platform_id) == platformId) &&
+ (be::swap<uint16>(m_table->name_record[i].platform_specific_id) == encodingID))
+ {
+ m_platformLastRecord = i;
+ }
+ m_encodingId = encodingID;
+ m_platformId = platformId;
+ return 0;
+}
+
+void* NameTable::getName(uint16& languageId, uint16 nameId, gr_encform enc, uint32& length)
+{
+ uint16 anyLang = 0;
+ uint16 enUSLang = 0;
+ uint16 bestLang = 0;
+ if (!m_table)
+ {
+ languageId = 0;
+ length = 0;
+ return NULL;
+ }
+ for (uint16 i = m_platformOffset; i <= m_platformLastRecord; i++)
+ {
+ if (be::swap<uint16>(m_table->name_record[i].name_id) == nameId)
+ {
+ uint16 langId = be::swap<uint16>(m_table->name_record[i].language_id);
+ if (langId == languageId)
+ {
+ bestLang = i;
+ break;
+ }
+ // MS language tags have the language in the lower byte, region in the higher
+ else if ((langId & 0xFF) == (languageId & 0xFF))
+ {
+ bestLang = i;
+ }
+ else if (langId == 0x409)
+ {
+ enUSLang = i;
+ }
+ else
+ {
+ anyLang = i;
+ }
+ }
+ }
+ if (!bestLang)
+ {
+ if (enUSLang) bestLang = enUSLang;
+ else
+ {
+ bestLang = anyLang;
+ if (!anyLang)
+ {
+ languageId = 0;
+ length = 0;
+ return NULL;
+ }
+ }
+ }
+ const TtfUtil::Sfnt::NameRecord & nameRecord = m_table->name_record[bestLang];
+ languageId = be::swap<uint16>(nameRecord.language_id);
+ uint16 utf16Length = be::swap<uint16>(nameRecord.length);
+ uint16 offset = be::swap<uint16>(nameRecord.offset);
+ if(offset + utf16Length > m_nameDataLength)
+ {
+ languageId = 0;
+ length = 0;
+ return NULL;
+ }
+ utf16Length >>= 1; // in utf16 units
+ utf16::codeunit_t * utf16Name = gralloc<utf16::codeunit_t>(utf16Length + 1);
+ if (!utf16Name)
+ {
+ languageId = 0;
+ length = 0;
+ return NULL;
+ }
+ const uint8* pName = m_nameData + offset;
+ for (size_t i = 0; i < utf16Length; i++)
+ {
+ utf16Name[i] = be::read<uint16>(pName);
+ }
+ utf16Name[utf16Length] = 0;
+ if (!utf16::validate(utf16Name, utf16Name + utf16Length))
+ {
+ free(utf16Name);
+ languageId = 0;
+ length = 0;
+ return NULL;
+ }
+ switch (enc)
+ {
+ case gr_utf8:
+ {
+ utf8::codeunit_t* uniBuffer = gralloc<utf8::codeunit_t>(3 * utf16Length + 1);
+ if (!uniBuffer)
+ {
+ free(utf16Name);
+ languageId = 0;
+ length = 0;
+ return NULL;
+ }
+ utf8::iterator d = uniBuffer;
+ for (utf16::const_iterator s = utf16Name, e = utf16Name + utf16Length; s != e; ++s, ++d)
+ *d = *s;
+ length = uint32(d - uniBuffer);
+ uniBuffer[length] = 0;
+ free(utf16Name);
+ return uniBuffer;
+ }
+ case gr_utf16:
+ length = utf16Length;
+ return utf16Name;
+ case gr_utf32:
+ {
+ utf32::codeunit_t * uniBuffer = gralloc<utf32::codeunit_t>(utf16Length + 1);
+ if (!uniBuffer)
+ {
+ free(utf16Name);
+ languageId = 0;
+ length = 0;
+ return NULL;
+ }
+ utf32::iterator d = uniBuffer;
+ for (utf16::const_iterator s = utf16Name, e = utf16Name + utf16Length; s != e; ++s, ++d)
+ *d = *s;
+ length = uint32(d - uniBuffer);
+ uniBuffer[length] = 0;
+ free(utf16Name);
+ return uniBuffer;
+ }
+ }
+ free(utf16Name);
+ languageId = 0;
+ length = 0;
+ return NULL;
+}
+
+uint16 NameTable::getLanguageId(const char * bcp47Locale)
+{
+ size_t localeLength = strlen(bcp47Locale);
+ uint16 localeId = m_locale2Lang.getMsId(bcp47Locale);
+ if (m_table && (be::swap<uint16>(m_table->format) == 1))
+ {
+ const uint8 * pLangEntries = reinterpret_cast<const uint8*>(m_table) +
+ sizeof(TtfUtil::Sfnt::FontNames)
+ + sizeof(TtfUtil::Sfnt::NameRecord) * ( be::swap<uint16>(m_table->count) - 1);
+ uint16 numLangEntries = be::read<uint16>(pLangEntries);
+ const TtfUtil::Sfnt::LangTagRecord * langTag =
+ reinterpret_cast<const TtfUtil::Sfnt::LangTagRecord*>(pLangEntries);
+ if (pLangEntries + numLangEntries * sizeof(TtfUtil::Sfnt::LangTagRecord) <= m_nameData)
+ {
+ for (uint16 i = 0; i < numLangEntries; i++)
+ {
+ uint16 offset = be::swap<uint16>(langTag[i].offset);
+ uint16 length = be::swap<uint16>(langTag[i].length);
+ if ((offset + length <= m_nameDataLength) && (length == 2 * localeLength))
+ {
+ const uint8* pName = m_nameData + offset;
+ bool match = true;
+ for (size_t j = 0; j < localeLength; j++)
+ {
+ uint16 code = be::read<uint16>(pName);
+ if ((code > 0x7F) || (code != bcp47Locale[j]))
+ {
+ match = false;
+ break;
+ }
+ }
+ if (match)
+ return 0x8000 + i;
+ }
+ }
+ }
+ }
+ return localeId;
+}
diff --git a/thirdparty/graphite/src/Pass.cpp b/thirdparty/graphite/src/Pass.cpp
new file mode 100644
index 0000000000..db31c22d46
--- /dev/null
+++ b/thirdparty/graphite/src/Pass.cpp
@@ -0,0 +1,1107 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#include "inc/Main.h"
+#include "inc/debug.h"
+#include "inc/Endian.h"
+#include "inc/Pass.h"
+#include <cstring>
+#include <cstdlib>
+#include <cassert>
+#include <cmath>
+#include "inc/Segment.h"
+#include "inc/Code.h"
+#include "inc/Rule.h"
+#include "inc/Error.h"
+#include "inc/Collider.h"
+
+using namespace graphite2;
+using vm::Machine;
+typedef Machine::Code Code;
+
+enum KernCollison
+{
+ None = 0,
+ CrossSpace = 1,
+ InWord = 2,
+ reserved = 3
+};
+
+Pass::Pass()
+: m_silf(0),
+ m_cols(0),
+ m_rules(0),
+ m_ruleMap(0),
+ m_startStates(0),
+ m_transitions(0),
+ m_states(0),
+ m_codes(0),
+ m_progs(0),
+ m_numCollRuns(0),
+ m_kernColls(0),
+ m_iMaxLoop(0),
+ m_numGlyphs(0),
+ m_numRules(0),
+ m_numStates(0),
+ m_numTransition(0),
+ m_numSuccess(0),
+ m_successStart(0),
+ m_numColumns(0),
+ m_minPreCtxt(0),
+ m_maxPreCtxt(0),
+ m_colThreshold(0),
+ m_isReverseDir(false)
+{
+}
+
+Pass::~Pass()
+{
+ free(m_cols);
+ free(m_startStates);
+ free(m_transitions);
+ free(m_states);
+ free(m_ruleMap);
+
+ if (m_rules) delete [] m_rules;
+ if (m_codes) delete [] m_codes;
+ free(m_progs);
+}
+
+bool Pass::readPass(const byte * const pass_start, size_t pass_length, size_t subtable_base,
+ GR_MAYBE_UNUSED Face & face, passtype pt, GR_MAYBE_UNUSED uint32 version, Error &e)
+{
+ const byte * p = pass_start,
+ * const pass_end = p + pass_length;
+ size_t numRanges;
+
+ if (e.test(pass_length < 40, E_BADPASSLENGTH)) return face.error(e);
+ // Read in basic values
+ const byte flags = be::read<byte>(p);
+ if (e.test((flags & 0x1f) &&
+ (pt < PASS_TYPE_POSITIONING || !m_silf->aCollision() || !face.glyphs().hasBoxes() || !(m_silf->flags() & 0x20)),
+ E_BADCOLLISIONPASS))
+ return face.error(e);
+ m_numCollRuns = flags & 0x7;
+ m_kernColls = (flags >> 3) & 0x3;
+ m_isReverseDir = (flags >> 5) & 0x1;
+ m_iMaxLoop = be::read<byte>(p);
+ if (m_iMaxLoop < 1) m_iMaxLoop = 1;
+ be::skip<byte>(p,2); // skip maxContext & maxBackup
+ m_numRules = be::read<uint16>(p);
+ if (e.test(!m_numRules && m_numCollRuns == 0, E_BADEMPTYPASS)) return face.error(e);
+ be::skip<uint16>(p); // fsmOffset - not sure why we would want this
+ const byte * const pcCode = pass_start + be::read<uint32>(p) - subtable_base,
+ * const rcCode = pass_start + be::read<uint32>(p) - subtable_base,
+ * const aCode = pass_start + be::read<uint32>(p) - subtable_base;
+ be::skip<uint32>(p);
+ m_numStates = be::read<uint16>(p);
+ m_numTransition = be::read<uint16>(p);
+ m_numSuccess = be::read<uint16>(p);
+ m_numColumns = be::read<uint16>(p);
+ numRanges = be::read<uint16>(p);
+ be::skip<uint16>(p, 3); // skip searchRange, entrySelector & rangeShift.
+ assert(p - pass_start == 40);
+ // Perform some sanity checks.
+ if ( e.test(m_numTransition > m_numStates, E_BADNUMTRANS)
+ || e.test(m_numSuccess > m_numStates, E_BADNUMSUCCESS)
+ || e.test(m_numSuccess + m_numTransition < m_numStates, E_BADNUMSTATES)
+ || e.test(m_numRules && numRanges == 0, E_NORANGES)
+ || e.test(m_numColumns > 0x7FFF, E_BADNUMCOLUMNS))
+ return face.error(e);
+
+ m_successStart = m_numStates - m_numSuccess;
+ // test for beyond end - 1 to account for reading uint16
+ if (e.test(p + numRanges * 6 - 2 > pass_end, E_BADPASSLENGTH)) return face.error(e);
+ m_numGlyphs = be::peek<uint16>(p + numRanges * 6 - 4) + 1;
+ // Calculate the start of various arrays.
+ const byte * const ranges = p;
+ be::skip<uint16>(p, numRanges*3);
+ const byte * const o_rule_map = p;
+ be::skip<uint16>(p, m_numSuccess + 1);
+
+ // More sanity checks
+ if (e.test(reinterpret_cast<const byte *>(o_rule_map + m_numSuccess*sizeof(uint16)) > pass_end
+ || p > pass_end, E_BADRULEMAPLEN))
+ return face.error(e);
+ const size_t numEntries = be::peek<uint16>(o_rule_map + m_numSuccess*sizeof(uint16));
+ const byte * const rule_map = p;
+ be::skip<uint16>(p, numEntries);
+
+ if (e.test(p + 2*sizeof(uint8) > pass_end, E_BADPASSLENGTH)) return face.error(e);
+ m_minPreCtxt = be::read<uint8>(p);
+ m_maxPreCtxt = be::read<uint8>(p);
+ if (e.test(m_minPreCtxt > m_maxPreCtxt, E_BADCTXTLENBOUNDS)) return face.error(e);
+ const byte * const start_states = p;
+ be::skip<int16>(p, m_maxPreCtxt - m_minPreCtxt + 1);
+ const uint16 * const sort_keys = reinterpret_cast<const uint16 *>(p);
+ be::skip<uint16>(p, m_numRules);
+ const byte * const precontext = p;
+ be::skip<byte>(p, m_numRules);
+
+ if (e.test(p + sizeof(uint16) + sizeof(uint8) > pass_end, E_BADCTXTLENS)) return face.error(e);
+ m_colThreshold = be::read<uint8>(p);
+ if (m_colThreshold == 0) m_colThreshold = 10; // A default
+ const size_t pass_constraint_len = be::read<uint16>(p);
+
+ const uint16 * const o_constraint = reinterpret_cast<const uint16 *>(p);
+ be::skip<uint16>(p, m_numRules + 1);
+ const uint16 * const o_actions = reinterpret_cast<const uint16 *>(p);
+ be::skip<uint16>(p, m_numRules + 1);
+ const byte * const states = p;
+ if (e.test(2u*m_numTransition*m_numColumns >= (unsigned)(pass_end - p), E_BADPASSLENGTH)
+ || e.test(p >= pass_end, E_BADPASSLENGTH))
+ return face.error(e);
+ be::skip<int16>(p, m_numTransition*m_numColumns);
+ be::skip<uint8>(p);
+ if (e.test(p != pcCode, E_BADPASSCCODEPTR)) return face.error(e);
+ be::skip<byte>(p, pass_constraint_len);
+ if (e.test(p != rcCode, E_BADRULECCODEPTR)
+ || e.test(size_t(rcCode - pcCode) != pass_constraint_len, E_BADCCODELEN)) return face.error(e);
+ be::skip<byte>(p, be::peek<uint16>(o_constraint + m_numRules));
+ if (e.test(p != aCode, E_BADACTIONCODEPTR)) return face.error(e);
+ be::skip<byte>(p, be::peek<uint16>(o_actions + m_numRules));
+
+ // We should be at the end or within the pass
+ if (e.test(p > pass_end, E_BADPASSLENGTH)) return face.error(e);
+
+ // Load the pass constraint if there is one.
+ if (pass_constraint_len)
+ {
+ face.error_context(face.error_context() + 1);
+ m_cPConstraint = vm::Machine::Code(true, pcCode, pcCode + pass_constraint_len,
+ precontext[0], be::peek<uint16>(sort_keys), *m_silf, face, PASS_TYPE_UNKNOWN);
+ if (e.test(!m_cPConstraint, E_OUTOFMEM)
+ || e.test(m_cPConstraint.status() != Code::loaded, m_cPConstraint.status() + E_CODEFAILURE))
+ return face.error(e);
+ face.error_context(face.error_context() - 1);
+ }
+ if (m_numRules)
+ {
+ if (!readRanges(ranges, numRanges, e)) return face.error(e);
+ if (!readRules(rule_map, numEntries, precontext, sort_keys,
+ o_constraint, rcCode, o_actions, aCode, face, pt, e)) return false;
+ }
+#ifdef GRAPHITE2_TELEMETRY
+ telemetry::category _states_cat(face.tele.states);
+#endif
+ return m_numRules ? readStates(start_states, states, o_rule_map, face, e) : true;
+}
+
+
+bool Pass::readRules(const byte * rule_map, const size_t num_entries,
+ const byte *precontext, const uint16 * sort_key,
+ const uint16 * o_constraint, const byte *rc_data,
+ const uint16 * o_action, const byte * ac_data,
+ Face & face, passtype pt, Error &e)
+{
+ const byte * const ac_data_end = ac_data + be::peek<uint16>(o_action + m_numRules);
+ const byte * const rc_data_end = rc_data + be::peek<uint16>(o_constraint + m_numRules);
+
+ precontext += m_numRules;
+ sort_key += m_numRules;
+ o_constraint += m_numRules;
+ o_action += m_numRules;
+
+ // Load rules.
+ const byte * ac_begin = 0, * rc_begin = 0,
+ * ac_end = ac_data + be::peek<uint16>(o_action),
+ * rc_end = rc_data + be::peek<uint16>(o_constraint);
+
+ // Allocate pools
+ m_rules = new Rule [m_numRules];
+ m_codes = new Code [m_numRules*2];
+ int totalSlots = 0;
+ const uint16 *tsort = sort_key;
+ for (int i = 0; i < m_numRules; ++i)
+ totalSlots += be::peek<uint16>(--tsort);
+ const size_t prog_pool_sz = vm::Machine::Code::estimateCodeDataOut(ac_end - ac_data + rc_end - rc_data, 2 * m_numRules, totalSlots);
+ m_progs = gralloc<byte>(prog_pool_sz);
+ byte * prog_pool_free = m_progs,
+ * prog_pool_end = m_progs + prog_pool_sz;
+ if (e.test(!(m_rules && m_codes && m_progs), E_OUTOFMEM)) return face.error(e);
+
+ Rule * r = m_rules + m_numRules - 1;
+ for (size_t n = m_numRules; r >= m_rules; --n, --r, ac_end = ac_begin, rc_end = rc_begin)
+ {
+ face.error_context((face.error_context() & 0xFFFF00) + EC_ARULE + int((n - 1) << 24));
+ r->preContext = *--precontext;
+ r->sort = be::peek<uint16>(--sort_key);
+#ifndef NDEBUG
+ r->rule_idx = uint16(n - 1);
+#endif
+ if (r->sort > 63 || r->preContext >= r->sort || r->preContext > m_maxPreCtxt || r->preContext < m_minPreCtxt)
+ return false;
+ ac_begin = ac_data + be::peek<uint16>(--o_action);
+ --o_constraint;
+ rc_begin = be::peek<uint16>(o_constraint) ? rc_data + be::peek<uint16>(o_constraint) : rc_end;
+
+ if (ac_begin > ac_end || ac_begin > ac_data_end || ac_end > ac_data_end
+ || rc_begin > rc_end || rc_begin > rc_data_end || rc_end > rc_data_end
+ || vm::Machine::Code::estimateCodeDataOut(ac_end - ac_begin + rc_end - rc_begin, 2, r->sort) > size_t(prog_pool_end - prog_pool_free))
+ return false;
+ r->action = new (m_codes+n*2-2) vm::Machine::Code(false, ac_begin, ac_end, r->preContext, r->sort, *m_silf, face, pt, &prog_pool_free);
+ r->constraint = new (m_codes+n*2-1) vm::Machine::Code(true, rc_begin, rc_end, r->preContext, r->sort, *m_silf, face, pt, &prog_pool_free);
+
+ if (e.test(!r->action || !r->constraint, E_OUTOFMEM)
+ || e.test(r->action->status() != Code::loaded, r->action->status() + E_CODEFAILURE)
+ || e.test(r->constraint->status() != Code::loaded, r->constraint->status() + E_CODEFAILURE)
+ || e.test(!r->constraint->immutable(), E_MUTABLECCODE))
+ return face.error(e);
+ }
+
+ byte * const moved_progs = prog_pool_free > m_progs ? static_cast<byte *>(realloc(m_progs, prog_pool_free - m_progs)) : 0;
+ if (e.test(!moved_progs, E_OUTOFMEM))
+ {
+ free(m_progs);
+ m_progs = 0;
+ return face.error(e);
+ }
+
+ if (moved_progs != m_progs)
+ {
+ for (Code * c = m_codes, * const ce = c + m_numRules*2; c != ce; ++c)
+ {
+ c->externalProgramMoved(moved_progs - m_progs);
+ }
+ m_progs = moved_progs;
+ }
+
+ // Load the rule entries map
+ face.error_context((face.error_context() & 0xFFFF00) + EC_APASS);
+ //TODO: Coverity: 1315804: FORWARD_NULL
+ RuleEntry * re = m_ruleMap = gralloc<RuleEntry>(num_entries);
+ if (e.test(!re, E_OUTOFMEM)) return face.error(e);
+ for (size_t n = num_entries; n; --n, ++re)
+ {
+ const ptrdiff_t rn = be::read<uint16>(rule_map);
+ if (e.test(rn >= m_numRules, E_BADRULENUM)) return face.error(e);
+ re->rule = m_rules + rn;
+ }
+
+ return true;
+}
+
+static int cmpRuleEntry(const void *a, const void *b) { return (*(RuleEntry *)a < *(RuleEntry *)b ? -1 :
+ (*(RuleEntry *)b < *(RuleEntry *)a ? 1 : 0)); }
+
+bool Pass::readStates(const byte * starts, const byte *states, const byte * o_rule_map, GR_MAYBE_UNUSED Face & face, Error &e)
+{
+#ifdef GRAPHITE2_TELEMETRY
+ telemetry::category _states_cat(face.tele.starts);
+#endif
+ m_startStates = gralloc<uint16>(m_maxPreCtxt - m_minPreCtxt + 1);
+#ifdef GRAPHITE2_TELEMETRY
+ telemetry::set_category(face.tele.states);
+#endif
+ m_states = gralloc<State>(m_numStates);
+#ifdef GRAPHITE2_TELEMETRY
+ telemetry::set_category(face.tele.transitions);
+#endif
+ m_transitions = gralloc<uint16>(m_numTransition * m_numColumns);
+
+ if (e.test(!m_startStates || !m_states || !m_transitions, E_OUTOFMEM)) return face.error(e);
+ // load start states
+ for (uint16 * s = m_startStates,
+ * const s_end = s + m_maxPreCtxt - m_minPreCtxt + 1; s != s_end; ++s)
+ {
+ *s = be::read<uint16>(starts);
+ if (e.test(*s >= m_numStates, E_BADSTATE))
+ {
+ face.error_context((face.error_context() & 0xFFFF00) + EC_ASTARTS + int((s - m_startStates) << 24));
+ return face.error(e); // true;
+ }
+ }
+
+ // load state transition table.
+ for (uint16 * t = m_transitions,
+ * const t_end = t + m_numTransition*m_numColumns; t != t_end; ++t)
+ {
+ *t = be::read<uint16>(states);
+ if (e.test(*t >= m_numStates, E_BADSTATE))
+ {
+ face.error_context((face.error_context() & 0xFFFF00) + EC_ATRANS + int(((t - m_transitions) / m_numColumns) << 8));
+ return face.error(e);
+ }
+ }
+
+ State * s = m_states,
+ * const success_begin = m_states + m_numStates - m_numSuccess;
+ const RuleEntry * rule_map_end = m_ruleMap + be::peek<uint16>(o_rule_map + m_numSuccess*sizeof(uint16));
+ for (size_t n = m_numStates; n; --n, ++s)
+ {
+ RuleEntry * const begin = s < success_begin ? 0 : m_ruleMap + be::read<uint16>(o_rule_map),
+ * const end = s < success_begin ? 0 : m_ruleMap + be::peek<uint16>(o_rule_map);
+
+ if (e.test(begin >= rule_map_end || end > rule_map_end || begin > end, E_BADRULEMAPPING))
+ {
+ face.error_context((face.error_context() & 0xFFFF00) + EC_ARULEMAP + int(n << 24));
+ return face.error(e);
+ }
+ s->rules = begin;
+ s->rules_end = (end - begin <= FiniteStateMachine::MAX_RULES)? end :
+ begin + FiniteStateMachine::MAX_RULES;
+ if (begin) // keep UBSan happy can't call qsort with null begin
+ qsort(begin, end - begin, sizeof(RuleEntry), &cmpRuleEntry);
+ }
+
+ return true;
+}
+
+bool Pass::readRanges(const byte * ranges, size_t num_ranges, Error &e)
+{
+ m_cols = gralloc<uint16>(m_numGlyphs);
+ if (e.test(!m_cols, E_OUTOFMEM)) return false;
+ memset(m_cols, 0xFF, m_numGlyphs * sizeof(uint16));
+ for (size_t n = num_ranges; n; --n)
+ {
+ uint16 * ci = m_cols + be::read<uint16>(ranges),
+ * ci_end = m_cols + be::read<uint16>(ranges) + 1,
+ col = be::read<uint16>(ranges);
+
+ if (e.test(ci >= ci_end || ci_end > m_cols+m_numGlyphs || col >= m_numColumns, E_BADRANGE))
+ return false;
+
+ // A glyph must only belong to one column at a time
+ while (ci != ci_end && *ci == 0xffff)
+ *ci++ = col;
+
+ if (e.test(ci != ci_end, E_BADRANGE))
+ return false;
+ }
+ return true;
+}
+
+
+bool Pass::runGraphite(vm::Machine & m, FiniteStateMachine & fsm, bool reverse) const
+{
+ Slot *s = m.slotMap().segment.first();
+ if (!s || !testPassConstraint(m)) return true;
+ if (reverse)
+ {
+ m.slotMap().segment.reverseSlots();
+ s = m.slotMap().segment.first();
+ }
+ if (m_numRules)
+ {
+ Slot *currHigh = s->next();
+
+#if !defined GRAPHITE2_NTRACING
+ if (fsm.dbgout) *fsm.dbgout << "rules" << json::array;
+ json::closer rules_array_closer(fsm.dbgout);
+#endif
+
+ m.slotMap().highwater(currHigh);
+ int lc = m_iMaxLoop;
+ do
+ {
+ findNDoRule(s, m, fsm);
+ if (m.status() != Machine::finished) return false;
+ if (s && (s == m.slotMap().highwater() || m.slotMap().highpassed() || --lc == 0)) {
+ if (!lc)
+ s = m.slotMap().highwater();
+ lc = m_iMaxLoop;
+ if (s)
+ m.slotMap().highwater(s->next());
+ }
+ } while (s);
+ }
+ //TODO: Use enums for flags
+ const bool collisions = m_numCollRuns || m_kernColls;
+
+ if (!collisions || !m.slotMap().segment.hasCollisionInfo())
+ return true;
+
+ if (m_numCollRuns)
+ {
+ if (!(m.slotMap().segment.flags() & Segment::SEG_INITCOLLISIONS))
+ {
+ m.slotMap().segment.positionSlots(0, 0, 0, m.slotMap().dir(), true);
+// m.slotMap().segment.flags(m.slotMap().segment.flags() | Segment::SEG_INITCOLLISIONS);
+ }
+ if (!collisionShift(&m.slotMap().segment, m.slotMap().dir(), fsm.dbgout))
+ return false;
+ }
+ if ((m_kernColls) && !collisionKern(&m.slotMap().segment, m.slotMap().dir(), fsm.dbgout))
+ return false;
+ if (collisions && !collisionFinish(&m.slotMap().segment, fsm.dbgout))
+ return false;
+ return true;
+}
+
+bool Pass::runFSM(FiniteStateMachine& fsm, Slot * slot) const
+{
+ fsm.reset(slot, m_maxPreCtxt);
+ if (fsm.slots.context() < m_minPreCtxt)
+ return false;
+
+ uint16 state = m_startStates[m_maxPreCtxt - fsm.slots.context()];
+ uint8 free_slots = SlotMap::MAX_SLOTS;
+ do
+ {
+ fsm.slots.pushSlot(slot);
+ if (slot->gid() >= m_numGlyphs
+ || m_cols[slot->gid()] == 0xffffU
+ || --free_slots == 0
+ || state >= m_numTransition)
+ return free_slots != 0;
+
+ const uint16 * transitions = m_transitions + state*m_numColumns;
+ state = transitions[m_cols[slot->gid()]];
+ if (state >= m_successStart)
+ fsm.rules.accumulate_rules(m_states[state]);
+
+ slot = slot->next();
+ } while (state != 0 && slot);
+
+ fsm.slots.pushSlot(slot);
+ return true;
+}
+
+#if !defined GRAPHITE2_NTRACING
+
+inline
+Slot * input_slot(const SlotMap & slots, const int n)
+{
+ Slot * s = slots[slots.context() + n];
+ if (!s->isCopied()) return s;
+
+ return s->prev() ? s->prev()->next() : (s->next() ? s->next()->prev() : slots.segment.last());
+}
+
+inline
+Slot * output_slot(const SlotMap & slots, const int n)
+{
+ Slot * s = slots[slots.context() + n - 1];
+ return s ? s->next() : slots.segment.first();
+}
+
+#endif //!defined GRAPHITE2_NTRACING
+
+void Pass::findNDoRule(Slot * & slot, Machine &m, FiniteStateMachine & fsm) const
+{
+ assert(slot);
+
+ if (runFSM(fsm, slot))
+ {
+ // Search for the first rule which passes the constraint
+ const RuleEntry * r = fsm.rules.begin(),
+ * const re = fsm.rules.end();
+ while (r != re && !testConstraint(*r->rule, m))
+ {
+ ++r;
+ if (m.status() != Machine::finished)
+ return;
+ }
+
+#if !defined GRAPHITE2_NTRACING
+ if (fsm.dbgout)
+ {
+ if (fsm.rules.size() != 0)
+ {
+ *fsm.dbgout << json::item << json::object;
+ dumpRuleEventConsidered(fsm, *r);
+ if (r != re)
+ {
+ const int adv = doAction(r->rule->action, slot, m);
+ dumpRuleEventOutput(fsm, *r->rule, slot);
+ if (r->rule->action->deletes()) fsm.slots.collectGarbage(slot);
+ adjustSlot(adv, slot, fsm.slots);
+ *fsm.dbgout << "cursor" << objectid(dslot(&fsm.slots.segment, slot))
+ << json::close; // Close RuelEvent object
+
+ return;
+ }
+ else
+ {
+ *fsm.dbgout << json::close // close "considered" array
+ << "output" << json::null
+ << "cursor" << objectid(dslot(&fsm.slots.segment, slot->next()))
+ << json::close;
+ }
+ }
+ }
+ else
+#endif
+ {
+ if (r != re)
+ {
+ const int adv = doAction(r->rule->action, slot, m);
+ if (m.status() != Machine::finished) return;
+ if (r->rule->action->deletes()) fsm.slots.collectGarbage(slot);
+ adjustSlot(adv, slot, fsm.slots);
+ return;
+ }
+ }
+ }
+
+ slot = slot->next();
+ return;
+}
+
+#if !defined GRAPHITE2_NTRACING
+
+void Pass::dumpRuleEventConsidered(const FiniteStateMachine & fsm, const RuleEntry & re) const
+{
+ *fsm.dbgout << "considered" << json::array;
+ for (const RuleEntry *r = fsm.rules.begin(); r != &re; ++r)
+ {
+ if (r->rule->preContext > fsm.slots.context())
+ continue;
+ *fsm.dbgout << json::flat << json::object
+ << "id" << r->rule - m_rules
+ << "failed" << true
+ << "input" << json::flat << json::object
+ << "start" << objectid(dslot(&fsm.slots.segment, input_slot(fsm.slots, -r->rule->preContext)))
+ << "length" << r->rule->sort
+ << json::close // close "input"
+ << json::close; // close Rule object
+ }
+}
+
+
+void Pass::dumpRuleEventOutput(const FiniteStateMachine & fsm, const Rule & r, Slot * const last_slot) const
+{
+ *fsm.dbgout << json::item << json::flat << json::object
+ << "id" << &r - m_rules
+ << "failed" << false
+ << "input" << json::flat << json::object
+ << "start" << objectid(dslot(&fsm.slots.segment, input_slot(fsm.slots, 0)))
+ << "length" << r.sort - r.preContext
+ << json::close // close "input"
+ << json::close // close Rule object
+ << json::close // close considered array
+ << "output" << json::object
+ << "range" << json::flat << json::object
+ << "start" << objectid(dslot(&fsm.slots.segment, input_slot(fsm.slots, 0)))
+ << "end" << objectid(dslot(&fsm.slots.segment, last_slot))
+ << json::close // close "input"
+ << "slots" << json::array;
+ const Position rsb_prepos = last_slot ? last_slot->origin() : fsm.slots.segment.advance();
+ fsm.slots.segment.positionSlots(0, 0, 0, fsm.slots.segment.currdir());
+
+ for(Slot * slot = output_slot(fsm.slots, 0); slot != last_slot; slot = slot->next())
+ *fsm.dbgout << dslot(&fsm.slots.segment, slot);
+ *fsm.dbgout << json::close // close "slots"
+ << "postshift" << (last_slot ? last_slot->origin() : fsm.slots.segment.advance()) - rsb_prepos
+ << json::close; // close "output" object
+
+}
+
+#endif
+
+
+inline
+bool Pass::testPassConstraint(Machine & m) const
+{
+ if (!m_cPConstraint) return true;
+
+ assert(m_cPConstraint.constraint());
+
+ m.slotMap().reset(*m.slotMap().segment.first(), 0);
+ m.slotMap().pushSlot(m.slotMap().segment.first());
+ vm::slotref * map = m.slotMap().begin();
+ const uint32 ret = m_cPConstraint.run(m, map);
+
+#if !defined GRAPHITE2_NTRACING
+ json * const dbgout = m.slotMap().segment.getFace()->logger();
+ if (dbgout)
+ *dbgout << "constraint" << (ret && m.status() == Machine::finished);
+#endif
+
+ return ret && m.status() == Machine::finished;
+}
+
+
+bool Pass::testConstraint(const Rule & r, Machine & m) const
+{
+ const uint16 curr_context = m.slotMap().context();
+ if (unsigned(r.sort + curr_context - r.preContext) > m.slotMap().size()
+ || curr_context - r.preContext < 0) return false;
+
+ vm::slotref * map = m.slotMap().begin() + curr_context - r.preContext;
+ if (map[r.sort - 1] == 0)
+ return false;
+
+ if (!*r.constraint) return true;
+ assert(r.constraint->constraint());
+ for (int n = r.sort; n && map; --n, ++map)
+ {
+ if (!*map) continue;
+ const int32 ret = r.constraint->run(m, map);
+ if (!ret || m.status() != Machine::finished)
+ return false;
+ }
+
+ return true;
+}
+
+
+void SlotMap::collectGarbage(Slot * &aSlot)
+{
+ for(Slot **s = begin(), *const *const se = end() - 1; s != se; ++s) {
+ Slot *& slot = *s;
+ if(slot && (slot->isDeleted() || slot->isCopied()))
+ {
+ if (slot == aSlot)
+ aSlot = slot->prev() ? slot->prev() : slot->next();
+ segment.freeSlot(slot);
+ }
+ }
+}
+
+
+
+int Pass::doAction(const Code *codeptr, Slot * & slot_out, vm::Machine & m) const
+{
+ assert(codeptr);
+ if (!*codeptr) return 0;
+ SlotMap & smap = m.slotMap();
+ vm::slotref * map = &smap[smap.context()];
+ smap.highpassed(false);
+
+ int32 ret = codeptr->run(m, map);
+
+ if (m.status() != Machine::finished)
+ {
+ slot_out = NULL;
+ smap.highwater(0);
+ return 0;
+ }
+
+ slot_out = *map;
+ return ret;
+}
+
+
+void Pass::adjustSlot(int delta, Slot * & slot_out, SlotMap & smap) const
+{
+ if (!slot_out)
+ {
+ if (smap.highpassed() || slot_out == smap.highwater())
+ {
+ slot_out = smap.segment.last();
+ ++delta;
+ if (!smap.highwater() || smap.highwater() == slot_out)
+ smap.highpassed(false);
+ }
+ else
+ {
+ slot_out = smap.segment.first();
+ --delta;
+ }
+ }
+ if (delta < 0)
+ {
+ while (++delta <= 0 && slot_out)
+ {
+ slot_out = slot_out->prev();
+ if (smap.highpassed() && smap.highwater() == slot_out)
+ smap.highpassed(false);
+ }
+ }
+ else if (delta > 0)
+ {
+ while (--delta >= 0 && slot_out)
+ {
+ if (slot_out == smap.highwater() && slot_out)
+ smap.highpassed(true);
+ slot_out = slot_out->next();
+ }
+ }
+}
+
+bool Pass::collisionShift(Segment *seg, int dir, json * const dbgout) const
+{
+ ShiftCollider shiftcoll(dbgout);
+ // bool isfirst = true;
+ bool hasCollisions = false;
+ Slot *start = seg->first(); // turn on collision fixing for the first slot
+ Slot *end = NULL;
+ bool moved = false;
+
+#if !defined GRAPHITE2_NTRACING
+ if (dbgout)
+ *dbgout << "collisions" << json::array
+ << json::flat << json::object << "num-loops" << m_numCollRuns << json::close;
+#endif
+
+ while (start)
+ {
+#if !defined GRAPHITE2_NTRACING
+ if (dbgout) *dbgout << json::object << "phase" << "1" << "moves" << json::array;
+#endif
+ hasCollisions = false;
+ end = NULL;
+ // phase 1 : position shiftable glyphs, ignoring kernable glyphs
+ for (Slot *s = start; s; s = s->next())
+ {
+ const SlotCollision * c = seg->collisionInfo(s);
+ if (start && (c->flags() & (SlotCollision::COLL_FIX | SlotCollision::COLL_KERN)) == SlotCollision::COLL_FIX
+ && !resolveCollisions(seg, s, start, shiftcoll, false, dir, moved, hasCollisions, dbgout))
+ return false;
+ if (s != start && (c->flags() & SlotCollision::COLL_END))
+ {
+ end = s->next();
+ break;
+ }
+ }
+
+#if !defined GRAPHITE2_NTRACING
+ if (dbgout)
+ *dbgout << json::close << json::close; // phase-1
+#endif
+
+ // phase 2 : loop until happy.
+ for (int i = 0; i < m_numCollRuns - 1; ++i)
+ {
+ if (hasCollisions || moved)
+ {
+
+#if !defined GRAPHITE2_NTRACING
+ if (dbgout)
+ *dbgout << json::object << "phase" << "2a" << "loop" << i << "moves" << json::array;
+#endif
+ // phase 2a : if any shiftable glyphs are in collision, iterate backwards,
+ // fixing them and ignoring other non-collided glyphs. Note that this handles ONLY
+ // glyphs that are actually in collision from phases 1 or 2b, and working backwards
+ // has the intended effect of breaking logjams.
+ if (hasCollisions)
+ {
+ hasCollisions = false;
+ #if 0
+ moved = true;
+ for (Slot *s = start; s != end; s = s->next())
+ {
+ SlotCollision * c = seg->collisionInfo(s);
+ c->setShift(Position(0, 0));
+ }
+ #endif
+ Slot *lend = end ? end->prev() : seg->last();
+ Slot *lstart = start->prev();
+ for (Slot *s = lend; s != lstart; s = s->prev())
+ {
+ SlotCollision * c = seg->collisionInfo(s);
+ if (start && (c->flags() & (SlotCollision::COLL_FIX | SlotCollision::COLL_KERN | SlotCollision::COLL_ISCOL))
+ == (SlotCollision::COLL_FIX | SlotCollision::COLL_ISCOL)) // ONLY if this glyph is still colliding
+ {
+ if (!resolveCollisions(seg, s, lend, shiftcoll, true, dir, moved, hasCollisions, dbgout))
+ return false;
+ c->setFlags(c->flags() | SlotCollision::COLL_TEMPLOCK);
+ }
+ }
+ }
+
+#if !defined GRAPHITE2_NTRACING
+ if (dbgout)
+ *dbgout << json::close << json::close // phase 2a
+ << json::object << "phase" << "2b" << "loop" << i << "moves" << json::array;
+#endif
+
+ // phase 2b : redo basic diacritic positioning pass for ALL glyphs. Each successive loop adjusts
+ // glyphs from their current adjusted position, which has the effect of gradually minimizing the
+ // resulting adjustment; ie, the final result will be gradually closer to the original location.
+ // Also it allows more flexibility in the final adjustment, since it is moving along the
+ // possible 8 vectors from successively different starting locations.
+ if (moved)
+ {
+ moved = false;
+ for (Slot *s = start; s != end; s = s->next())
+ {
+ SlotCollision * c = seg->collisionInfo(s);
+ if (start && (c->flags() & (SlotCollision::COLL_FIX | SlotCollision::COLL_TEMPLOCK
+ | SlotCollision::COLL_KERN)) == SlotCollision::COLL_FIX
+ && !resolveCollisions(seg, s, start, shiftcoll, false, dir, moved, hasCollisions, dbgout))
+ return false;
+ else if (c->flags() & SlotCollision::COLL_TEMPLOCK)
+ c->setFlags(c->flags() & ~SlotCollision::COLL_TEMPLOCK);
+ }
+ }
+ // if (!hasCollisions) // no, don't leave yet because phase 2b will continue to improve things
+ // break;
+#if !defined GRAPHITE2_NTRACING
+ if (dbgout)
+ *dbgout << json::close << json::close; // phase 2
+#endif
+ }
+ }
+ if (!end)
+ break;
+ start = NULL;
+ for (Slot *s = end->prev(); s; s = s->next())
+ {
+ if (seg->collisionInfo(s)->flags() & SlotCollision::COLL_START)
+ {
+ start = s;
+ break;
+ }
+ }
+ }
+ return true;
+}
+
+bool Pass::collisionKern(Segment *seg, int dir, json * const dbgout) const
+{
+ Slot *start = seg->first();
+ float ymin = 1e38f;
+ float ymax = -1e38f;
+ const GlyphCache &gc = seg->getFace()->glyphs();
+
+ // phase 3 : handle kerning of clusters
+#if !defined GRAPHITE2_NTRACING
+ if (dbgout)
+ *dbgout << json::object << "phase" << "3" << "moves" << json::array;
+#endif
+
+ for (Slot *s = seg->first(); s; s = s->next())
+ {
+ if (!gc.check(s->gid()))
+ return false;
+ const SlotCollision * c = seg->collisionInfo(s);
+ const Rect &bbox = seg->theGlyphBBoxTemporary(s->gid());
+ float y = s->origin().y + c->shift().y;
+ if (!(c->flags() & SlotCollision::COLL_ISSPACE))
+ {
+ ymax = max(y + bbox.tr.y, ymax);
+ ymin = min(y + bbox.bl.y, ymin);
+ }
+ if (start && (c->flags() & (SlotCollision::COLL_KERN | SlotCollision::COLL_FIX))
+ == (SlotCollision::COLL_KERN | SlotCollision::COLL_FIX))
+ resolveKern(seg, s, start, dir, ymin, ymax, dbgout);
+ if (c->flags() & SlotCollision::COLL_END)
+ start = NULL;
+ if (c->flags() & SlotCollision::COLL_START)
+ start = s;
+ }
+
+#if !defined GRAPHITE2_NTRACING
+ if (dbgout)
+ *dbgout << json::close << json::close; // phase 3
+#endif
+ return true;
+}
+
+bool Pass::collisionFinish(Segment *seg, GR_MAYBE_UNUSED json * const dbgout) const
+{
+ for (Slot *s = seg->first(); s; s = s->next())
+ {
+ SlotCollision *c = seg->collisionInfo(s);
+ if (c->shift().x != 0 || c->shift().y != 0)
+ {
+ const Position newOffset = c->shift();
+ const Position nullPosition(0, 0);
+ c->setOffset(newOffset + c->offset());
+ c->setShift(nullPosition);
+ }
+ }
+// seg->positionSlots();
+
+#if !defined GRAPHITE2_NTRACING
+ if (dbgout)
+ *dbgout << json::close;
+#endif
+ return true;
+}
+
+// Can slot s be kerned, or is it attached to something that can be kerned?
+static bool inKernCluster(Segment *seg, Slot *s)
+{
+ SlotCollision *c = seg->collisionInfo(s);
+ if (c->flags() & SlotCollision::COLL_KERN /** && c->flags() & SlotCollision::COLL_FIX **/ )
+ return true;
+ while (s->attachedTo())
+ {
+ s = s->attachedTo();
+ c = seg->collisionInfo(s);
+ if (c->flags() & SlotCollision::COLL_KERN /** && c->flags() & SlotCollision::COLL_FIX **/ )
+ return true;
+ }
+ return false;
+}
+
+// Fix collisions for the given slot.
+// Return true if everything was fixed, false if there are still collisions remaining.
+// isRev means be we are processing backwards.
+bool Pass::resolveCollisions(Segment *seg, Slot *slotFix, Slot *start,
+ ShiftCollider &coll, GR_MAYBE_UNUSED bool isRev, int dir, bool &moved, bool &hasCol,
+ json * const dbgout) const
+{
+ Slot * nbor; // neighboring slot
+ SlotCollision *cFix = seg->collisionInfo(slotFix);
+ if (!coll.initSlot(seg, slotFix, cFix->limit(), cFix->margin(), cFix->marginWt(),
+ cFix->shift(), cFix->offset(), dir, dbgout))
+ return false;
+ bool collides = false;
+ // When we're processing forward, ignore kernable glyphs that preceed the target glyph.
+ // When processing backward, don't ignore these until we pass slotFix.
+ bool ignoreForKern = !isRev;
+ bool rtl = dir & 1;
+ Slot *base = slotFix;
+ while (base->attachedTo())
+ base = base->attachedTo();
+ Position zero(0., 0.);
+
+ // Look for collisions with the neighboring glyphs.
+ for (nbor = start; nbor; nbor = isRev ? nbor->prev() : nbor->next())
+ {
+ SlotCollision *cNbor = seg->collisionInfo(nbor);
+ bool sameCluster = nbor->isChildOf(base);
+ if (nbor != slotFix // don't process if this is the slot of interest
+ && !(cNbor->ignore()) // don't process if ignoring
+ && (nbor == base || sameCluster // process if in the same cluster as slotFix
+ || !inKernCluster(seg, nbor)) // or this cluster is not to be kerned
+// || (rtl ^ ignoreForKern)) // or it comes before(ltr) or after(rtl)
+ && (!isRev // if processing forwards then good to merge otherwise only:
+ || !(cNbor->flags() & SlotCollision::COLL_FIX) // merge in immovable stuff
+ || ((cNbor->flags() & SlotCollision::COLL_KERN) && !sameCluster) // ignore other kernable clusters
+ || (cNbor->flags() & SlotCollision::COLL_ISCOL)) // test against other collided glyphs
+ && !coll.mergeSlot(seg, nbor, cNbor, cNbor->shift(), !ignoreForKern, sameCluster, collides, false, dbgout))
+ return false;
+ else if (nbor == slotFix)
+ // Switching sides of this glyph - if we were ignoring kernable stuff before, don't anymore.
+ ignoreForKern = !ignoreForKern;
+
+ if (nbor != start && (cNbor->flags() & (isRev ? SlotCollision::COLL_START : SlotCollision::COLL_END)))
+ break;
+ }
+ bool isCol = false;
+ if (collides || cFix->shift().x != 0.f || cFix->shift().y != 0.f)
+ {
+ Position shift = coll.resolve(seg, isCol, dbgout);
+ // isCol has been set to true if a collision remains.
+ if (std::fabs(shift.x) < 1e38f && std::fabs(shift.y) < 1e38f)
+ {
+ if (sqr(shift.x-cFix->shift().x) + sqr(shift.y-cFix->shift().y) >= m_colThreshold * m_colThreshold)
+ moved = true;
+ cFix->setShift(shift);
+ if (slotFix->firstChild())
+ {
+ Rect bbox;
+ Position here = slotFix->origin() + shift;
+ float clusterMin = here.x;
+ slotFix->firstChild()->finalise(seg, NULL, here, bbox, 0, clusterMin, rtl, false);
+ }
+ }
+ }
+ else
+ {
+ // This glyph is not colliding with anything.
+#if !defined GRAPHITE2_NTRACING
+ if (dbgout)
+ {
+ *dbgout << json::object
+ << "missed" << objectid(dslot(seg, slotFix));
+ coll.outputJsonDbg(dbgout, seg, -1);
+ *dbgout << json::close;
+ }
+#endif
+ }
+
+ // Set the is-collision flag bit.
+ if (isCol)
+ { cFix->setFlags(cFix->flags() | SlotCollision::COLL_ISCOL | SlotCollision::COLL_KNOWN); }
+ else
+ { cFix->setFlags((cFix->flags() & ~SlotCollision::COLL_ISCOL) | SlotCollision::COLL_KNOWN); }
+ hasCol |= isCol;
+ return true;
+}
+
+float Pass::resolveKern(Segment *seg, Slot *slotFix, GR_MAYBE_UNUSED Slot *start, int dir,
+ float &ymin, float &ymax, json *const dbgout) const
+{
+ Slot *nbor; // neighboring slot
+ float currSpace = 0.;
+ bool collides = false;
+ unsigned int space_count = 0;
+ Slot *base = slotFix;
+ while (base->attachedTo())
+ base = base->attachedTo();
+ SlotCollision *cFix = seg->collisionInfo(base);
+ const GlyphCache &gc = seg->getFace()->glyphs();
+ const Rect &bbb = seg->theGlyphBBoxTemporary(slotFix->gid());
+ const float by = slotFix->origin().y + cFix->shift().y;
+
+ if (base != slotFix)
+ {
+ cFix->setFlags(cFix->flags() | SlotCollision::COLL_KERN | SlotCollision::COLL_FIX);
+ return 0;
+ }
+ bool seenEnd = (cFix->flags() & SlotCollision::COLL_END) != 0;
+ bool isInit = false;
+ KernCollider coll(dbgout);
+
+ ymax = max(by + bbb.tr.y, ymax);
+ ymin = min(by + bbb.bl.y, ymin);
+ for (nbor = slotFix->next(); nbor; nbor = nbor->next())
+ {
+ if (nbor->isChildOf(base))
+ continue;
+ if (!gc.check(nbor->gid()))
+ return 0.;
+ const Rect &bb = seg->theGlyphBBoxTemporary(nbor->gid());
+ SlotCollision *cNbor = seg->collisionInfo(nbor);
+ if ((bb.bl.y == 0.f && bb.tr.y == 0.f) || (cNbor->flags() & SlotCollision::COLL_ISSPACE))
+ {
+ if (m_kernColls == InWord)
+ break;
+ // Add space for a space glyph.
+ currSpace += nbor->advance();
+ ++space_count;
+ }
+ else
+ {
+ space_count = 0;
+ if (nbor != slotFix && !cNbor->ignore())
+ {
+ seenEnd = true;
+ if (!isInit)
+ {
+ if (!coll.initSlot(seg, slotFix, cFix->limit(), cFix->margin(),
+ cFix->shift(), cFix->offset(), dir, ymin, ymax, dbgout))
+ return 0.;
+ isInit = true;
+ }
+ collides |= coll.mergeSlot(seg, nbor, cNbor->shift(), currSpace, dir, dbgout);
+ }
+ }
+ if (cNbor->flags() & SlotCollision::COLL_END)
+ {
+ if (seenEnd && space_count < 2)
+ break;
+ else
+ seenEnd = true;
+ }
+ }
+ if (collides)
+ {
+ Position mv = coll.resolve(seg, slotFix, dir, dbgout);
+ coll.shift(mv, dir);
+ Position delta = slotFix->advancePos() + mv - cFix->shift();
+ slotFix->advance(delta);
+ cFix->setShift(mv);
+ return mv.x;
+ }
+ return 0.;
+}
diff --git a/thirdparty/graphite/src/Position.cpp b/thirdparty/graphite/src/Position.cpp
new file mode 100644
index 0000000000..d2fdbd4e7c
--- /dev/null
+++ b/thirdparty/graphite/src/Position.cpp
@@ -0,0 +1,97 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#include "inc/Position.h"
+#include <cmath>
+
+using namespace graphite2;
+
+bool Rect::hitTest(Rect &other)
+{
+ if (bl.x > other.tr.x) return false;
+ if (tr.x < other.bl.x) return false;
+ if (bl.y > other.tr.y) return false;
+ if (tr.y < other.bl.y) return false;
+ return true;
+}
+
+Position Rect::overlap(Position &offset, Rect &other, Position &othero)
+{
+ float ax = (bl.x + offset.x) - (other.tr.x + othero.x);
+ float ay = (bl.y + offset.y) - (other.tr.y + othero.y);
+ float bx = (other.bl.x + othero.x) - (tr.x + offset.x);
+ float by = (other.bl.y + othero.y) - (tr.y + offset.y);
+ return Position((ax > bx ? ax : bx), (ay > by ? ay : by));
+}
+
+float boundmin(float move, float lim1, float lim2, float &error)
+{
+ // error is always positive for easy comparison
+ if (move < lim1 && move < lim2)
+ { error = 0.; return move; }
+ else if (lim1 < lim2)
+ { error = std::fabs(move - lim1); return lim1; }
+ else
+ { error = std::fabs(move - lim2); return lim2; }
+}
+
+#if 0
+Position Rect::constrainedAvoid(Position &offset, Rect &box, Rect &sdbox, Position &other, Rect &obox, Rect &osdbox)
+{
+ // a = max, i = min, s = sum, d = diff
+ float eax, eay, eix, eiy, eas, eis, ead, eid;
+ float beste = INF;
+ Position res;
+ // calculate the movements in each direction and the error (amount of remaining overlap)
+ // first param is movement, second and third are movement over the constraining box
+ float ax = boundmin(obox.tr.x + other.x - box.bl.x - offset.x + 1, tr.x - offset.x, INF, &eax);
+ float ay = boundmin(obox.tr.y + other.y - box.bl.y - offset.y + 1, tr.y - offset.y, INF, &eay);
+ float ix = boundmin(obox.bl.x + other.x - box.tr.x - offset.x + 1, bl.x - offset.x, INF, &eix);
+ float iy = boundmin(obox.bl.y + other.y - box.tr.y - offset.y + 1, bl.y - offset.y, INF, &eiy);
+ float as = boundmin(ISQRT2 * (osdbox.tr.x + other.x + other.y - sdbox.bl.x - offset.x - offset.y) + 1, tr.x - offset.x, tr.y - offset.y, &eas);
+ float is = boundmin(ISQRT2 * (osdbox.bl.x + other.x + other.y - sdbox.tr.x - offset.x - offset.y) + 1, bl.x - offset.x, bl.y - offset.y, &eis);
+ float ad = boundmin(ISQRT2 * (osdbox.tr.y + other.x - other.y - sdbox.bl.y - offset.x + offset.y) + 1, tr.y - offset.y, tr.x - offset.x, &ead);
+ float id = boundmin(ISQRT2 * (osdbox.bl.y + other.x - other.y - sdbox.tr.y - offset.x + offset.y) + 1, bl.y - offset.y, bl.x - offset.x, &eid);
+
+ if (eax < beste)
+ { res = Position(ax, 0); beste = eax; }
+ if (eay < beste)
+ { res = Position(0, ay); beste = eay; }
+ if (eix < beste)
+ { res = Position(ix, 0); beste = eix; }
+ if (eiy < beste)
+ { res = Position(0, iy); beste = eiy; }
+ if (SQRT2 * (eas) < beste)
+ { res = Position(as, ad); beste = SQRT2 * (eas); }
+ if (SQRT2 * (eis) < beste)
+ { res = Position(is, is); beste = SQRT2 * (eis); }
+ if (SQRT2 * (ead) < beste)
+ { res = Position(ad, ad); beste = SQRT2 * (ead); }
+ if (SQRT2 * (eid) < beste)
+ { res = Position(id, id); beste = SQRT2 * (eid); }
+ return res;
+}
+#endif
diff --git a/thirdparty/graphite/src/Segment.cpp b/thirdparty/graphite/src/Segment.cpp
new file mode 100644
index 0000000000..62edd4250f
--- /dev/null
+++ b/thirdparty/graphite/src/Segment.cpp
@@ -0,0 +1,423 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#include "inc/UtfCodec.h"
+#include <cstring>
+#include <cstdlib>
+
+#include "inc/bits.h"
+#include "inc/Segment.h"
+#include "graphite2/Font.h"
+#include "inc/CharInfo.h"
+#include "inc/debug.h"
+#include "inc/Slot.h"
+#include "inc/Main.h"
+#include "inc/CmapCache.h"
+#include "inc/Collider.h"
+#include "graphite2/Segment.h"
+
+
+using namespace graphite2;
+
+Segment::Segment(size_t numchars, const Face* face, uint32 script, int textDir)
+: m_freeSlots(NULL),
+ m_freeJustifies(NULL),
+ m_charinfo(new CharInfo[numchars]),
+ m_collisions(NULL),
+ m_face(face),
+ m_silf(face->chooseSilf(script)),
+ m_first(NULL),
+ m_last(NULL),
+ m_bufSize(numchars + 10),
+ m_numGlyphs(numchars),
+ m_numCharinfo(numchars),
+ m_defaultOriginal(0),
+ m_dir(textDir),
+ m_flags(((m_silf->flags() & 0x20) != 0) << 1),
+ m_passBits(m_silf->aPassBits() ? -1 : 0)
+{
+ freeSlot(newSlot());
+ m_bufSize = log_binary(numchars)+1;
+}
+
+Segment::~Segment()
+{
+ for (SlotRope::iterator i = m_slots.begin(); i != m_slots.end(); ++i)
+ free(*i);
+ for (AttributeRope::iterator i = m_userAttrs.begin(); i != m_userAttrs.end(); ++i)
+ free(*i);
+ for (JustifyRope::iterator i = m_justifies.begin(); i != m_justifies.end(); ++i)
+ free(*i);
+ delete[] m_charinfo;
+ free(m_collisions);
+}
+
+void Segment::appendSlot(int id, int cid, int gid, int iFeats, size_t coffset)
+{
+ Slot *aSlot = newSlot();
+
+ if (!aSlot) return;
+ m_charinfo[id].init(cid);
+ m_charinfo[id].feats(iFeats);
+ m_charinfo[id].base(coffset);
+ const GlyphFace * theGlyph = m_face->glyphs().glyphSafe(gid);
+ m_charinfo[id].breakWeight(theGlyph ? theGlyph->attrs()[m_silf->aBreak()] : 0);
+
+ aSlot->child(NULL);
+ aSlot->setGlyph(this, gid, theGlyph);
+ aSlot->originate(id);
+ aSlot->before(id);
+ aSlot->after(id);
+ if (m_last) m_last->next(aSlot);
+ aSlot->prev(m_last);
+ m_last = aSlot;
+ if (!m_first) m_first = aSlot;
+ if (theGlyph && m_silf->aPassBits())
+ m_passBits &= theGlyph->attrs()[m_silf->aPassBits()]
+ | (m_silf->numPasses() > 16 ? (theGlyph->attrs()[m_silf->aPassBits() + 1] << 16) : 0);
+}
+
+Slot *Segment::newSlot()
+{
+ if (!m_freeSlots)
+ {
+ // check that the segment doesn't grow indefinintely
+ if (m_numGlyphs > m_numCharinfo * MAX_SEG_GROWTH_FACTOR)
+ return NULL;
+ int numUser = m_silf->numUser();
+#if !defined GRAPHITE2_NTRACING
+ if (m_face->logger()) ++numUser;
+#endif
+ Slot *newSlots = grzeroalloc<Slot>(m_bufSize);
+ int16 *newAttrs = grzeroalloc<int16>(m_bufSize * numUser);
+ if (!newSlots || !newAttrs)
+ {
+ free(newSlots);
+ free(newAttrs);
+ return NULL;
+ }
+ for (size_t i = 0; i < m_bufSize; i++)
+ {
+ ::new (newSlots + i) Slot(newAttrs + i * numUser);
+ newSlots[i].next(newSlots + i + 1);
+ }
+ newSlots[m_bufSize - 1].next(NULL);
+ newSlots[0].next(NULL);
+ m_slots.push_back(newSlots);
+ m_userAttrs.push_back(newAttrs);
+ m_freeSlots = (m_bufSize > 1)? newSlots + 1 : NULL;
+ return newSlots;
+ }
+ Slot *res = m_freeSlots;
+ m_freeSlots = m_freeSlots->next();
+ res->next(NULL);
+ return res;
+}
+
+void Segment::freeSlot(Slot *aSlot)
+{
+ if (aSlot == nullptr) return;
+ if (m_last == aSlot) m_last = aSlot->prev();
+ if (m_first == aSlot) m_first = aSlot->next();
+ if (aSlot->attachedTo())
+ aSlot->attachedTo()->removeChild(aSlot);
+ while (aSlot->firstChild())
+ {
+ if (aSlot->firstChild()->attachedTo() == aSlot)
+ {
+ aSlot->firstChild()->attachTo(nullptr);
+ aSlot->removeChild(aSlot->firstChild());
+ }
+ else
+ aSlot->firstChild(nullptr);
+ }
+ // reset the slot incase it is reused
+ ::new (aSlot) Slot(aSlot->userAttrs());
+ memset(aSlot->userAttrs(), 0, m_silf->numUser() * sizeof(int16));
+ // Update generation counter for debug
+#if !defined GRAPHITE2_NTRACING
+ if (m_face->logger())
+ ++aSlot->userAttrs()[m_silf->numUser()];
+#endif
+ // update next pointer
+ if (!m_freeSlots)
+ aSlot->next(nullptr);
+ else
+ aSlot->next(m_freeSlots);
+ m_freeSlots = aSlot;
+}
+
+SlotJustify *Segment::newJustify()
+{
+ if (!m_freeJustifies)
+ {
+ const size_t justSize = SlotJustify::size_of(m_silf->numJustLevels());
+ byte *justs = grzeroalloc<byte>(justSize * m_bufSize);
+ if (!justs) return NULL;
+ for (ptrdiff_t i = m_bufSize - 2; i >= 0; --i)
+ {
+ SlotJustify *p = reinterpret_cast<SlotJustify *>(justs + justSize * i);
+ SlotJustify *next = reinterpret_cast<SlotJustify *>(justs + justSize * (i + 1));
+ p->next = next;
+ }
+ m_freeJustifies = (SlotJustify *)justs;
+ m_justifies.push_back(m_freeJustifies);
+ }
+ SlotJustify *res = m_freeJustifies;
+ m_freeJustifies = m_freeJustifies->next;
+ res->next = NULL;
+ return res;
+}
+
+void Segment::freeJustify(SlotJustify *aJustify)
+{
+ int numJust = m_silf->numJustLevels();
+ if (m_silf->numJustLevels() <= 0) numJust = 1;
+ aJustify->next = m_freeJustifies;
+ memset(aJustify->values, 0, numJust*SlotJustify::NUMJUSTPARAMS*sizeof(int16));
+ m_freeJustifies = aJustify;
+}
+
+// reverse the slots but keep diacritics in their same position after their bases
+void Segment::reverseSlots()
+{
+ m_dir = m_dir ^ 64; // invert the reverse flag
+ if (m_first == m_last) return; // skip 0 or 1 glyph runs
+
+ Slot *t = 0;
+ Slot *curr = m_first;
+ Slot *tlast;
+ Slot *tfirst;
+ Slot *out = 0;
+
+ while (curr && getSlotBidiClass(curr) == 16)
+ curr = curr->next();
+ if (!curr) return;
+ tfirst = curr->prev();
+ tlast = curr;
+
+ while (curr)
+ {
+ if (getSlotBidiClass(curr) == 16)
+ {
+ Slot *d = curr->next();
+ while (d && getSlotBidiClass(d) == 16)
+ d = d->next();
+
+ d = d ? d->prev() : m_last;
+ Slot *p = out->next(); // one after the diacritics. out can't be null
+ if (p)
+ p->prev(d);
+ else
+ tlast = d;
+ t = d->next();
+ d->next(p);
+ curr->prev(out);
+ out->next(curr);
+ }
+ else // will always fire first time round the loop
+ {
+ if (out)
+ out->prev(curr);
+ t = curr->next();
+ curr->next(out);
+ out = curr;
+ }
+ curr = t;
+ }
+ out->prev(tfirst);
+ if (tfirst)
+ tfirst->next(out);
+ else
+ m_first = out;
+ m_last = tlast;
+}
+
+void Segment::linkClusters(Slot *s, Slot * end)
+{
+ end = end->next();
+
+ for (; s != end && !s->isBase(); s = s->next());
+ Slot * ls = s;
+
+ if (m_dir & 1)
+ {
+ for (; s != end; s = s->next())
+ {
+ if (!s->isBase()) continue;
+
+ s->sibling(ls);
+ ls = s;
+ }
+ }
+ else
+ {
+ for (; s != end; s = s->next())
+ {
+ if (!s->isBase()) continue;
+
+ ls->sibling(s);
+ ls = s;
+ }
+ }
+}
+
+Position Segment::positionSlots(const Font *font, Slot * iStart, Slot * iEnd, bool isRtl, bool isFinal)
+{
+ Position currpos(0., 0.);
+ float clusterMin = 0.;
+ Rect bbox;
+ bool reorder = (currdir() != isRtl);
+
+ if (reorder)
+ {
+ Slot *temp;
+ reverseSlots();
+ temp = iStart;
+ iStart = iEnd;
+ iEnd = temp;
+ }
+ if (!iStart) iStart = m_first;
+ if (!iEnd) iEnd = m_last;
+
+ if (!iStart || !iEnd) // only true for empty segments
+ return currpos;
+
+ if (isRtl)
+ {
+ for (Slot * s = iEnd, * const end = iStart->prev(); s && s != end; s = s->prev())
+ {
+ if (s->isBase())
+ currpos = s->finalise(this, font, currpos, bbox, 0, clusterMin = currpos.x, isRtl, isFinal);
+ }
+ }
+ else
+ {
+ for (Slot * s = iStart, * const end = iEnd->next(); s && s != end; s = s->next())
+ {
+ if (s->isBase())
+ currpos = s->finalise(this, font, currpos, bbox, 0, clusterMin = currpos.x, isRtl, isFinal);
+ }
+ }
+ if (reorder)
+ reverseSlots();
+ return currpos;
+}
+
+
+void Segment::associateChars(int offset, size_t numChars)
+{
+ int i = 0, j = 0;
+ CharInfo *c, *cend;
+ for (c = m_charinfo + offset, cend = m_charinfo + offset + numChars; c != cend; ++c)
+ {
+ c->before(-1);
+ c->after(-1);
+ }
+ for (Slot * s = m_first; s; s->index(i++), s = s->next())
+ {
+ j = s->before();
+ if (j < 0) continue;
+
+ for (const int after = s->after(); j <= after; ++j)
+ {
+ c = charinfo(j);
+ if (c->before() == -1 || i < c->before()) c->before(i);
+ if (c->after() < i) c->after(i);
+ }
+ }
+ for (Slot *s = m_first; s; s = s->next())
+ {
+ int a;
+ for (a = s->after() + 1; a < offset + int(numChars) && charinfo(a)->after() < 0; ++a)
+ { charinfo(a)->after(s->index()); }
+ --a;
+ s->after(a);
+
+ for (a = s->before() - 1; a >= offset && charinfo(a)->before() < 0; --a)
+ { charinfo(a)->before(s->index()); }
+ ++a;
+ s->before(a);
+ }
+}
+
+
+template <typename utf_iter>
+inline void process_utf_data(Segment & seg, const Face & face, const int fid, utf_iter c, size_t n_chars)
+{
+ const Cmap & cmap = face.cmap();
+ int slotid = 0;
+
+ const typename utf_iter::codeunit_type * const base = c;
+ for (; n_chars; --n_chars, ++c, ++slotid)
+ {
+ const uint32 usv = *c;
+ uint16 gid = cmap[usv];
+ if (!gid) gid = face.findPseudo(usv);
+ seg.appendSlot(slotid, usv, gid, fid, c - base);
+ }
+}
+
+
+bool Segment::read_text(const Face *face, const Features* pFeats/*must not be NULL*/, gr_encform enc, const void* pStart, size_t nChars)
+{
+ assert(face);
+ assert(pFeats);
+ if (!m_charinfo) return false;
+
+ // utf iterator is self recovering so we don't care about the error state of the iterator.
+ switch (enc)
+ {
+ case gr_utf8: process_utf_data(*this, *face, addFeatures(*pFeats), utf8::const_iterator(pStart), nChars); break;
+ case gr_utf16: process_utf_data(*this, *face, addFeatures(*pFeats), utf16::const_iterator(pStart), nChars); break;
+ case gr_utf32: process_utf_data(*this, *face, addFeatures(*pFeats), utf32::const_iterator(pStart), nChars); break;
+ }
+ return true;
+}
+
+void Segment::doMirror(uint16 aMirror)
+{
+ Slot * s;
+ for (s = m_first; s; s = s->next())
+ {
+ unsigned short g = glyphAttr(s->gid(), aMirror);
+ if (g && (!(dir() & 4) || !glyphAttr(s->gid(), aMirror + 1)))
+ s->setGlyph(this, g);
+ }
+}
+
+bool Segment::initCollisions()
+{
+ m_collisions = grzeroalloc<SlotCollision>(slotCount());
+ if (!m_collisions) return false;
+
+ for (Slot *p = m_first; p; p = p->next())
+ if (p->index() < slotCount())
+ ::new (collisionInfo(p)) SlotCollision(this, p);
+ else
+ return false;
+ return true;
+}
diff --git a/thirdparty/graphite/src/Silf.cpp b/thirdparty/graphite/src/Silf.cpp
new file mode 100644
index 0000000000..44d3c96171
--- /dev/null
+++ b/thirdparty/graphite/src/Silf.cpp
@@ -0,0 +1,439 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#include <cstdlib>
+#include "graphite2/Segment.h"
+#include "inc/debug.h"
+#include "inc/Endian.h"
+#include "inc/Silf.h"
+#include "inc/Segment.h"
+#include "inc/Rule.h"
+#include "inc/Error.h"
+
+
+using namespace graphite2;
+
+namespace { static const uint32 ERROROFFSET = 0xFFFFFFFF; }
+
+Silf::Silf() throw()
+: m_passes(0),
+ m_pseudos(0),
+ m_classOffsets(0),
+ m_classData(0),
+ m_justs(0),
+ m_numPasses(0),
+ m_numJusts(0),
+ m_sPass(0),
+ m_pPass(0),
+ m_jPass(0),
+ m_bPass(0),
+ m_flags(0),
+ m_dir(0),
+ m_aPseudo(0),
+ m_aBreak(0),
+ m_aUser(0),
+ m_aBidi(0),
+ m_aMirror(0),
+ m_aPassBits(0),
+ m_iMaxComp(0),
+ m_aCollision(0),
+ m_aLig(0),
+ m_numPseudo(0),
+ m_nClass(0),
+ m_nLinear(0),
+ m_gEndLine(0)
+{
+ memset(&m_silfinfo, 0, sizeof m_silfinfo);
+}
+
+Silf::~Silf() throw()
+{
+ releaseBuffers();
+}
+
+void Silf::releaseBuffers() throw()
+{
+ delete [] m_passes;
+ delete [] m_pseudos;
+ free(m_classOffsets);
+ free(m_classData);
+ free(m_justs);
+ m_passes= 0;
+ m_pseudos = 0;
+ m_classOffsets = 0;
+ m_classData = 0;
+ m_justs = 0;
+}
+
+
+bool Silf::readGraphite(const byte * const silf_start, size_t lSilf, Face& face, uint32 version)
+{
+ const byte * p = silf_start,
+ * const silf_end = p + lSilf;
+ Error e;
+
+ if (e.test(version >= 0x00060000, E_BADSILFVERSION))
+ {
+ releaseBuffers(); return face.error(e);
+ }
+ if (version >= 0x00030000)
+ {
+ if (e.test(lSilf < 28, E_BADSIZE)) { releaseBuffers(); return face.error(e); }
+ be::skip<int32>(p); // ruleVersion
+ be::skip<uint16>(p,2); // passOffset & pseudosOffset
+ }
+ else if (e.test(lSilf < 20, E_BADSIZE)) { releaseBuffers(); return face.error(e); }
+ const uint16 maxGlyph = be::read<uint16>(p);
+ m_silfinfo.extra_ascent = be::read<uint16>(p);
+ m_silfinfo.extra_descent = be::read<uint16>(p);
+ m_numPasses = be::read<uint8>(p);
+ m_sPass = be::read<uint8>(p);
+ m_pPass = be::read<uint8>(p);
+ m_jPass = be::read<uint8>(p);
+ m_bPass = be::read<uint8>(p);
+ m_flags = be::read<uint8>(p);
+ be::skip<uint8>(p,2); // max{Pre,Post}Context.
+ m_aPseudo = be::read<uint8>(p);
+ m_aBreak = be::read<uint8>(p);
+ m_aBidi = be::read<uint8>(p);
+ m_aMirror = be::read<uint8>(p);
+ m_aPassBits = be::read<uint8>(p);
+
+ // Read Justification levels.
+ m_numJusts = be::read<uint8>(p);
+ if (e.test(maxGlyph >= face.glyphs().numGlyphs(), E_BADMAXGLYPH)
+ || e.test(p + m_numJusts * 8 >= silf_end, E_BADNUMJUSTS))
+ {
+ releaseBuffers(); return face.error(e);
+ }
+
+ if (m_numJusts)
+ {
+ m_justs = gralloc<Justinfo>(m_numJusts);
+ if (e.test(!m_justs, E_OUTOFMEM)) return face.error(e);
+
+ for (uint8 i = 0; i < m_numJusts; i++)
+ {
+ ::new(m_justs + i) Justinfo(p[0], p[1], p[2], p[3]);
+ be::skip<byte>(p,8);
+ }
+ }
+
+ if (e.test(p + sizeof(uint16) + sizeof(uint8)*8 >= silf_end, E_BADENDJUSTS)) { releaseBuffers(); return face.error(e); }
+ m_aLig = be::read<uint16>(p);
+ m_aUser = be::read<uint8>(p);
+ m_iMaxComp = be::read<uint8>(p);
+ m_dir = be::read<uint8>(p) - 1;
+ m_aCollision = be::read<uint8>(p);
+ be::skip<byte>(p,3);
+ be::skip<uint16>(p, be::read<uint8>(p)); // don't need critical features yet
+ be::skip<byte>(p); // reserved
+ if (e.test(p >= silf_end, E_BADCRITFEATURES)) { releaseBuffers(); return face.error(e); }
+ be::skip<uint32>(p, be::read<uint8>(p)); // don't use scriptTag array.
+ if (e.test(p + sizeof(uint16) + sizeof(uint32) >= silf_end, E_BADSCRIPTTAGS)) { releaseBuffers(); return face.error(e); }
+ m_gEndLine = be::read<uint16>(p); // lbGID
+ const byte * o_passes = p;
+ uint32 passes_start = be::read<uint32>(p);
+
+ const size_t num_attrs = face.glyphs().numAttrs();
+ if (e.test(m_aPseudo >= num_attrs, E_BADAPSEUDO)
+ || e.test(m_aBreak >= num_attrs, E_BADABREAK)
+ || e.test(m_aBidi >= num_attrs, E_BADABIDI)
+ || e.test(m_aMirror>= num_attrs, E_BADAMIRROR)
+ || e.test(m_aCollision && m_aCollision >= num_attrs - 5, E_BADACOLLISION)
+ || e.test(m_numPasses > 128, E_BADNUMPASSES) || e.test(passes_start >= lSilf, E_BADPASSESSTART)
+ || e.test(m_pPass < m_sPass, E_BADPASSBOUND) || e.test(m_pPass > m_numPasses, E_BADPPASS) || e.test(m_sPass > m_numPasses, E_BADSPASS)
+ || e.test(m_jPass < m_pPass, E_BADJPASSBOUND) || e.test(m_jPass > m_numPasses, E_BADJPASS)
+ || e.test((m_bPass != 0xFF && (m_bPass < m_jPass || m_bPass > m_numPasses)), E_BADBPASS)
+ || e.test(m_aLig > 127, E_BADALIG))
+ {
+ releaseBuffers();
+ return face.error(e);
+ }
+ be::skip<uint32>(p, m_numPasses);
+ if (e.test(unsigned(p - silf_start) + sizeof(uint16) >= passes_start, E_BADPASSESSTART)) { releaseBuffers(); return face.error(e); }
+ m_numPseudo = be::read<uint16>(p);
+ be::skip<uint16>(p, 3); // searchPseudo, pseudoSelector, pseudoShift
+ m_pseudos = new Pseudo[m_numPseudo];
+ if (e.test(unsigned(p - silf_start) + m_numPseudo*(sizeof(uint32) + sizeof(uint16)) >= passes_start, E_BADNUMPSEUDO)
+ || e.test(!m_pseudos, E_OUTOFMEM))
+ {
+ releaseBuffers(); return face.error(e);
+ }
+ for (int i = 0; i < m_numPseudo; i++)
+ {
+ m_pseudos[i].uid = be::read<uint32>(p);
+ m_pseudos[i].gid = be::read<uint16>(p);
+ }
+
+ const size_t clen = readClassMap(p, passes_start + silf_start - p, version, e);
+ m_passes = new Pass[m_numPasses];
+ if (e || e.test(clen > unsigned(passes_start + silf_start - p), E_BADPASSESSTART)
+ || e.test(!m_passes, E_OUTOFMEM))
+ { releaseBuffers(); return face.error(e); }
+
+ for (size_t i = 0; i < m_numPasses; ++i)
+ {
+ uint32 pass_start = be::read<uint32>(o_passes);
+ uint32 pass_end = be::peek<uint32>(o_passes);
+ face.error_context((face.error_context() & 0xFF00) + EC_ASILF + unsigned(i << 16));
+ if (e.test(pass_start > pass_end, E_BADPASSSTART)
+ || e.test(pass_start < passes_start, E_BADPASSSTART)
+ || e.test(pass_end > lSilf, E_BADPASSEND)) {
+ releaseBuffers(); return face.error(e);
+ }
+
+ enum passtype pt = PASS_TYPE_UNKNOWN;
+ if (i >= m_jPass) pt = PASS_TYPE_JUSTIFICATION;
+ else if (i >= m_pPass) pt = PASS_TYPE_POSITIONING;
+ else if (i >= m_sPass) pt = PASS_TYPE_SUBSTITUTE;
+ else pt = PASS_TYPE_LINEBREAK;
+
+ m_passes[i].init(this);
+ if (!m_passes[i].readPass(silf_start + pass_start, pass_end - pass_start, pass_start, face, pt,
+ version, e))
+ {
+ releaseBuffers();
+ return false;
+ }
+ }
+
+ // fill in gr_faceinfo
+ m_silfinfo.upem = face.glyphs().unitsPerEm();
+ m_silfinfo.has_bidi_pass = (m_bPass != 0xFF);
+ m_silfinfo.justifies = (m_numJusts != 0) || (m_jPass < m_pPass);
+ m_silfinfo.line_ends = (m_flags & 1);
+ m_silfinfo.space_contextuals = gr_faceinfo::gr_space_contextuals((m_flags >> 2) & 0x7);
+ return true;
+}
+
+template<typename T> inline uint32 Silf::readClassOffsets(const byte *&p, size_t data_len, Error &e)
+{
+ const T cls_off = 2*sizeof(uint16) + sizeof(T)*(m_nClass+1);
+ const uint32 max_off = (be::peek<T>(p + sizeof(T)*m_nClass) - cls_off)/sizeof(uint16);
+ // Check that the last+1 offset is less than or equal to the class map length.
+ if (e.test(be::peek<T>(p) != cls_off, E_MISALIGNEDCLASSES)
+ || e.test(max_off > (data_len - cls_off)/sizeof(uint16), E_HIGHCLASSOFFSET))
+ return ERROROFFSET;
+
+ // Read in all the offsets.
+ m_classOffsets = gralloc<uint32>(m_nClass+1);
+ if (e.test(!m_classOffsets, E_OUTOFMEM)) return ERROROFFSET;
+ for (uint32 * o = m_classOffsets, * const o_end = o + m_nClass + 1; o != o_end; ++o)
+ {
+ *o = (be::read<T>(p) - cls_off)/sizeof(uint16);
+ if (e.test(*o > max_off, E_HIGHCLASSOFFSET))
+ return ERROROFFSET;
+ }
+ return max_off;
+}
+
+size_t Silf::readClassMap(const byte *p, size_t data_len, uint32 version, Error &e)
+{
+ if (e.test(data_len < sizeof(uint16)*2, E_BADCLASSSIZE)) return ERROROFFSET;
+
+ m_nClass = be::read<uint16>(p);
+ m_nLinear = be::read<uint16>(p);
+
+ // Check that numLinear < numClass,
+ // that there is at least enough data for numClasses offsets.
+ if (e.test(m_nLinear > m_nClass, E_TOOMANYLINEAR)
+ || e.test((m_nClass + 1) * (version >= 0x00040000 ? sizeof(uint32) : sizeof(uint16)) > (data_len - 4), E_CLASSESTOOBIG))
+ return ERROROFFSET;
+
+ uint32 max_off;
+ if (version >= 0x00040000)
+ max_off = readClassOffsets<uint32>(p, data_len, e);
+ else
+ max_off = readClassOffsets<uint16>(p, data_len, e);
+
+ if (max_off == ERROROFFSET) return ERROROFFSET;
+
+ if (e.test((int)max_off < m_nLinear + (m_nClass - m_nLinear) * 6, E_CLASSESTOOBIG))
+ return ERROROFFSET;
+
+ // Check the linear offsets are sane, these must be monotonically increasing.
+ assert(m_nClass >= m_nLinear);
+ for (const uint32 *o = m_classOffsets, * const o_end = o + m_nLinear; o != o_end; ++o)
+ if (e.test(o[0] > o[1], E_BADCLASSOFFSET))
+ return ERROROFFSET;
+
+ // Fortunately the class data is all uint16s so we can decode these now
+ m_classData = gralloc<uint16>(max_off);
+ if (e.test(!m_classData, E_OUTOFMEM)) return ERROROFFSET;
+ for (uint16 *d = m_classData, * const d_end = d + max_off; d != d_end; ++d)
+ *d = be::read<uint16>(p);
+
+ // Check the lookup class invariants for each non-linear class
+ for (const uint32 *o = m_classOffsets + m_nLinear, * const o_end = m_classOffsets + m_nClass; o != o_end; ++o)
+ {
+ const uint16 * lookup = m_classData + *o;
+ if (e.test(*o + 4 > max_off, E_HIGHCLASSOFFSET) // LookupClass doesn't stretch over max_off
+ || e.test(lookup[0] == 0 // A LookupClass with no looks is a suspicious thing ...
+ || lookup[0] * 2 + *o + 4 > max_off // numIDs lookup pairs fits within (start of LookupClass' lookups array, max_off]
+ || lookup[3] + lookup[1] != lookup[0], E_BADCLASSLOOKUPINFO) // rangeShift: numIDs - searchRange
+ || e.test(((o[1] - *o) & 1) != 0, ERROROFFSET)) // glyphs are in pairs so difference must be even.
+ return ERROROFFSET;
+ }
+
+ return max_off;
+}
+
+uint16 Silf::findPseudo(uint32 uid) const
+{
+ for (int i = 0; i < m_numPseudo; i++)
+ if (m_pseudos[i].uid == uid) return m_pseudos[i].gid;
+ return 0;
+}
+
+uint16 Silf::findClassIndex(uint16 cid, uint16 gid) const
+{
+ if (cid > m_nClass) return -1;
+
+ const uint16 * cls = m_classData + m_classOffsets[cid];
+ if (cid < m_nLinear) // output class being used for input, shouldn't happen
+ {
+ for (unsigned int i = 0, n = m_classOffsets[cid + 1] - m_classOffsets[cid]; i < n; ++i, ++cls)
+ if (*cls == gid) return i;
+ return -1;
+ }
+ else
+ {
+ const uint16 * min = cls + 4, // lookups array
+ * max = min + cls[0]*2; // lookups aray is numIDs (cls[0]) uint16 pairs long
+ do
+ {
+ const uint16 * p = min + (-2 & ((max-min)/2));
+ if (p[0] > gid) max = p;
+ else min = p;
+ }
+ while (max - min > 2);
+ return min[0] == gid ? min[1] : -1;
+ }
+}
+
+uint16 Silf::getClassGlyph(uint16 cid, unsigned int index) const
+{
+ if (cid > m_nClass) return 0;
+
+ uint32 loc = m_classOffsets[cid];
+ if (cid < m_nLinear)
+ {
+ if (index < m_classOffsets[cid + 1] - loc)
+ return m_classData[index + loc];
+ }
+ else // input class being used for output. Shouldn't happen
+ {
+ for (unsigned int i = loc + 4; i < m_classOffsets[cid + 1]; i += 2)
+ if (m_classData[i + 1] == index) return m_classData[i];
+ }
+ return 0;
+}
+
+
+bool Silf::runGraphite(Segment *seg, uint8 firstPass, uint8 lastPass, int dobidi) const
+{
+ assert(seg != 0);
+ size_t maxSize = seg->slotCount() * MAX_SEG_GROWTH_FACTOR;
+ SlotMap map(*seg, m_dir, maxSize);
+ FiniteStateMachine fsm(map, seg->getFace()->logger());
+ vm::Machine m(map);
+ uint8 lbidi = m_bPass;
+#if !defined GRAPHITE2_NTRACING
+ json * const dbgout = seg->getFace()->logger();
+#endif
+
+ if (lastPass == 0)
+ {
+ if (firstPass == lastPass && lbidi == 0xFF)
+ return true;
+ lastPass = m_numPasses;
+ }
+ if ((firstPass < lbidi || (dobidi && firstPass == lbidi)) && (lastPass >= lbidi || (dobidi && lastPass + 1 == lbidi)))
+ lastPass++;
+ else
+ lbidi = 0xFF;
+
+ for (size_t i = firstPass; i < lastPass; ++i)
+ {
+ // bidi and mirroring
+ if (i == lbidi)
+ {
+#if !defined GRAPHITE2_NTRACING
+ if (dbgout)
+ {
+ *dbgout << json::item << json::object
+// << "pindex" << i // for debugging
+ << "id" << -1
+ << "slotsdir" << (seg->currdir() ? "rtl" : "ltr")
+ << "passdir" << (m_dir & 1 ? "rtl" : "ltr")
+ << "slots" << json::array;
+ seg->positionSlots(0, 0, 0, seg->currdir());
+ for(Slot * s = seg->first(); s; s = s->next())
+ *dbgout << dslot(seg, s);
+ *dbgout << json::close
+ << "rules" << json::array << json::close
+ << json::close;
+ }
+#endif
+ if (seg->currdir() != (m_dir & 1))
+ seg->reverseSlots();
+ if (m_aMirror && (seg->dir() & 3) == 3)
+ seg->doMirror(m_aMirror);
+ --i;
+ lbidi = lastPass;
+ --lastPass;
+ continue;
+ }
+
+#if !defined GRAPHITE2_NTRACING
+ if (dbgout)
+ {
+ *dbgout << json::item << json::object
+// << "pindex" << i // for debugging
+ << "id" << i+1
+ << "slotsdir" << (seg->currdir() ? "rtl" : "ltr")
+ << "passdir" << ((m_dir & 1) ^ m_passes[i].reverseDir() ? "rtl" : "ltr")
+ << "slots" << json::array;
+ seg->positionSlots(0, 0, 0, seg->currdir());
+ for(Slot * s = seg->first(); s; s = s->next())
+ *dbgout << dslot(seg, s);
+ *dbgout << json::close;
+ }
+#endif
+
+ // test whether to reorder, prepare for positioning
+ bool reverse = (lbidi == 0xFF) && (seg->currdir() != ((m_dir & 1) ^ m_passes[i].reverseDir()));
+ if ((i >= 32 || (seg->passBits() & (1 << i)) == 0 || m_passes[i].collisionLoops())
+ && !m_passes[i].runGraphite(m, fsm, reverse))
+ return false;
+ // only subsitution passes can change segment length, cached subsegments are short for their text
+ if (m.status() != vm::Machine::finished
+ || (seg->slotCount() && seg->slotCount() > maxSize))
+ return false;
+ }
+ return true;
+}
diff --git a/thirdparty/graphite/src/Slot.cpp b/thirdparty/graphite/src/Slot.cpp
new file mode 100644
index 0000000000..0fdb098952
--- /dev/null
+++ b/thirdparty/graphite/src/Slot.cpp
@@ -0,0 +1,529 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#include "inc/Segment.h"
+#include "inc/Slot.h"
+#include "inc/Silf.h"
+#include "inc/CharInfo.h"
+#include "inc/Rule.h"
+#include "inc/Collider.h"
+
+
+using namespace graphite2;
+
+Slot::Slot(int16 *user_attrs) :
+ m_next(NULL), m_prev(NULL),
+ m_glyphid(0), m_realglyphid(0), m_original(0), m_before(0), m_after(0),
+ m_index(0), m_parent(NULL), m_child(NULL), m_sibling(NULL),
+ m_position(0, 0), m_shift(0, 0), m_advance(0, 0),
+ m_attach(0, 0), m_with(0, 0), m_just(0.),
+ m_flags(0), m_attLevel(0), m_bidiCls(-1), m_bidiLevel(0),
+ m_userAttr(user_attrs), m_justs(NULL)
+{
+}
+
+// take care, this does not copy any of the GrSlot pointer fields
+void Slot::set(const Slot & orig, int charOffset, size_t sizeAttr, size_t justLevels, size_t numChars)
+{
+ // leave m_next and m_prev unchanged
+ m_glyphid = orig.m_glyphid;
+ m_realglyphid = orig.m_realglyphid;
+ m_original = orig.m_original + charOffset;
+ if (charOffset + int(orig.m_before) < 0)
+ m_before = 0;
+ else
+ m_before = orig.m_before + charOffset;
+ if (charOffset <= 0 && orig.m_after + charOffset >= numChars)
+ m_after = int(numChars) - 1;
+ else
+ m_after = orig.m_after + charOffset;
+ m_parent = NULL;
+ m_child = NULL;
+ m_sibling = NULL;
+ m_position = orig.m_position;
+ m_shift = orig.m_shift;
+ m_advance = orig.m_advance;
+ m_attach = orig.m_attach;
+ m_with = orig.m_with;
+ m_flags = orig.m_flags;
+ m_attLevel = orig.m_attLevel;
+ m_bidiCls = orig.m_bidiCls;
+ m_bidiLevel = orig.m_bidiLevel;
+ if (m_userAttr && orig.m_userAttr)
+ memcpy(m_userAttr, orig.m_userAttr, sizeAttr * sizeof(*m_userAttr));
+ if (m_justs && orig.m_justs)
+ memcpy(m_justs, orig.m_justs, SlotJustify::size_of(justLevels));
+}
+
+void Slot::update(int /*numGrSlots*/, int numCharInfo, Position &relpos)
+{
+ m_before += numCharInfo;
+ m_after += numCharInfo;
+ m_position = m_position + relpos;
+}
+
+Position Slot::finalise(const Segment *seg, const Font *font, Position & base, Rect & bbox, uint8 attrLevel, float & clusterMin, bool rtl, bool isFinal, int depth)
+{
+ SlotCollision *coll = NULL;
+ if (depth > 100 || (attrLevel && m_attLevel > attrLevel)) return Position(0, 0);
+ float scale = font ? font->scale() : 1.0f;
+ Position shift(m_shift.x * (rtl * -2 + 1) + m_just, m_shift.y);
+ float tAdvance = m_advance.x + m_just;
+ if (isFinal && (coll = seg->collisionInfo(this)))
+ {
+ const Position &collshift = coll->offset();
+ if (!(coll->flags() & SlotCollision::COLL_KERN) || rtl)
+ shift = shift + collshift;
+ }
+ const GlyphFace * glyphFace = seg->getFace()->glyphs().glyphSafe(glyph());
+ if (font)
+ {
+ scale = font->scale();
+ shift *= scale;
+ if (font->isHinted() && glyphFace)
+ tAdvance = (m_advance.x - glyphFace->theAdvance().x + m_just) * scale + font->advance(glyph());
+ else
+ tAdvance *= scale;
+ }
+ Position res;
+
+ m_position = base + shift;
+ if (!m_parent)
+ {
+ res = base + Position(tAdvance, m_advance.y * scale);
+ clusterMin = m_position.x;
+ }
+ else
+ {
+ float tAdv;
+ m_position += (m_attach - m_with) * scale;
+ tAdv = m_advance.x >= 0.5f ? m_position.x + tAdvance - shift.x : 0.f;
+ res = Position(tAdv, 0);
+ if ((m_advance.x >= 0.5f || m_position.x < 0) && m_position.x < clusterMin) clusterMin = m_position.x;
+ }
+
+ if (glyphFace)
+ {
+ Rect ourBbox = glyphFace->theBBox() * scale + m_position;
+ bbox = bbox.widen(ourBbox);
+ }
+
+ if (m_child && m_child != this && m_child->attachedTo() == this)
+ {
+ Position tRes = m_child->finalise(seg, font, m_position, bbox, attrLevel, clusterMin, rtl, isFinal, depth + 1);
+ if ((!m_parent || m_advance.x >= 0.5f) && tRes.x > res.x) res = tRes;
+ }
+
+ if (m_parent && m_sibling && m_sibling != this && m_sibling->attachedTo() == m_parent)
+ {
+ Position tRes = m_sibling->finalise(seg, font, base, bbox, attrLevel, clusterMin, rtl, isFinal, depth + 1);
+ if (tRes.x > res.x) res = tRes;
+ }
+
+ if (!m_parent && clusterMin < base.x)
+ {
+ Position adj = Position(m_position.x - clusterMin, 0.);
+ res += adj;
+ m_position += adj;
+ if (m_child) m_child->floodShift(adj);
+ }
+ return res;
+}
+
+int32 Slot::clusterMetric(const Segment *seg, uint8 metric, uint8 attrLevel, bool rtl)
+{
+ Position base;
+ if (glyph() >= seg->getFace()->glyphs().numGlyphs())
+ return 0;
+ Rect bbox = seg->theGlyphBBoxTemporary(glyph());
+ float clusterMin = 0.;
+ Position res = finalise(seg, NULL, base, bbox, attrLevel, clusterMin, rtl, false);
+
+ switch (metrics(metric))
+ {
+ case kgmetLsb :
+ return int32(bbox.bl.x);
+ case kgmetRsb :
+ return int32(res.x - bbox.tr.x);
+ case kgmetBbTop :
+ return int32(bbox.tr.y);
+ case kgmetBbBottom :
+ return int32(bbox.bl.y);
+ case kgmetBbLeft :
+ return int32(bbox.bl.x);
+ case kgmetBbRight :
+ return int32(bbox.tr.x);
+ case kgmetBbWidth :
+ return int32(bbox.tr.x - bbox.bl.x);
+ case kgmetBbHeight :
+ return int32(bbox.tr.y - bbox.bl.y);
+ case kgmetAdvWidth :
+ return int32(res.x);
+ case kgmetAdvHeight :
+ return int32(res.y);
+ default :
+ return 0;
+ }
+}
+
+#define SLOTGETCOLATTR(x) { SlotCollision *c = seg->collisionInfo(this); return c ? int(c-> x) : 0; }
+
+int Slot::getAttr(const Segment *seg, attrCode ind, uint8 subindex) const
+{
+ if (ind >= gr_slatJStretch && ind < gr_slatJStretch + 20 && ind != gr_slatJWidth)
+ {
+ int indx = ind - gr_slatJStretch;
+ return getJustify(seg, indx / 5, indx % 5);
+ }
+
+ switch (ind)
+ {
+ case gr_slatAdvX : return int(m_advance.x);
+ case gr_slatAdvY : return int(m_advance.y);
+ case gr_slatAttTo : return m_parent ? 1 : 0;
+ case gr_slatAttX : return int(m_attach.x);
+ case gr_slatAttY : return int(m_attach.y);
+ case gr_slatAttXOff :
+ case gr_slatAttYOff : return 0;
+ case gr_slatAttWithX : return int(m_with.x);
+ case gr_slatAttWithY : return int(m_with.y);
+ case gr_slatAttWithXOff:
+ case gr_slatAttWithYOff:return 0;
+ case gr_slatAttLevel : return m_attLevel;
+ case gr_slatBreak : return seg->charinfo(m_original)->breakWeight();
+ case gr_slatCompRef : return 0;
+ case gr_slatDir : return seg->dir() & 1;
+ case gr_slatInsert : return isInsertBefore();
+ case gr_slatPosX : return int(m_position.x); // but need to calculate it
+ case gr_slatPosY : return int(m_position.y);
+ case gr_slatShiftX : return int(m_shift.x);
+ case gr_slatShiftY : return int(m_shift.y);
+ case gr_slatMeasureSol: return -1; // err what's this?
+ case gr_slatMeasureEol: return -1;
+ case gr_slatJWidth: return int(m_just);
+ case gr_slatUserDefnV1: subindex = 0; GR_FALLTHROUGH;
+ // no break
+ case gr_slatUserDefn : return subindex < seg->numAttrs() ? m_userAttr[subindex] : 0;
+ case gr_slatSegSplit : return seg->charinfo(m_original)->flags() & 3;
+ case gr_slatBidiLevel: return m_bidiLevel;
+ case gr_slatColFlags : { SlotCollision *c = seg->collisionInfo(this); return c ? c->flags() : 0; }
+ case gr_slatColLimitblx:SLOTGETCOLATTR(limit().bl.x)
+ case gr_slatColLimitbly:SLOTGETCOLATTR(limit().bl.y)
+ case gr_slatColLimittrx:SLOTGETCOLATTR(limit().tr.x)
+ case gr_slatColLimittry:SLOTGETCOLATTR(limit().tr.y)
+ case gr_slatColShiftx : SLOTGETCOLATTR(offset().x)
+ case gr_slatColShifty : SLOTGETCOLATTR(offset().y)
+ case gr_slatColMargin : SLOTGETCOLATTR(margin())
+ case gr_slatColMarginWt:SLOTGETCOLATTR(marginWt())
+ case gr_slatColExclGlyph:SLOTGETCOLATTR(exclGlyph())
+ case gr_slatColExclOffx:SLOTGETCOLATTR(exclOffset().x)
+ case gr_slatColExclOffy:SLOTGETCOLATTR(exclOffset().y)
+ case gr_slatSeqClass : SLOTGETCOLATTR(seqClass())
+ case gr_slatSeqProxClass:SLOTGETCOLATTR(seqProxClass())
+ case gr_slatSeqOrder : SLOTGETCOLATTR(seqOrder())
+ case gr_slatSeqAboveXoff:SLOTGETCOLATTR(seqAboveXoff())
+ case gr_slatSeqAboveWt: SLOTGETCOLATTR(seqAboveWt())
+ case gr_slatSeqBelowXlim:SLOTGETCOLATTR(seqBelowXlim())
+ case gr_slatSeqBelowWt: SLOTGETCOLATTR(seqBelowWt())
+ case gr_slatSeqValignHt:SLOTGETCOLATTR(seqValignHt())
+ case gr_slatSeqValignWt:SLOTGETCOLATTR(seqValignWt())
+ default : return 0;
+ }
+}
+
+#define SLOTCOLSETATTR(x) { \
+ SlotCollision *c = seg->collisionInfo(this); \
+ if (c) { c-> x ; c->setFlags(c->flags() & ~SlotCollision::COLL_KNOWN); } \
+ break; }
+#define SLOTCOLSETCOMPLEXATTR(t, y, x) { \
+ SlotCollision *c = seg->collisionInfo(this); \
+ if (c) { \
+ const t &s = c-> y; \
+ c-> x ; c->setFlags(c->flags() & ~SlotCollision::COLL_KNOWN); } \
+ break; }
+
+void Slot::setAttr(Segment *seg, attrCode ind, uint8 subindex, int16 value, const SlotMap & map)
+{
+ if (ind == gr_slatUserDefnV1)
+ {
+ ind = gr_slatUserDefn;
+ subindex = 0;
+ if (seg->numAttrs() == 0)
+ return;
+ }
+ else if (ind >= gr_slatJStretch && ind < gr_slatJStretch + 20 && ind != gr_slatJWidth)
+ {
+ int indx = ind - gr_slatJStretch;
+ return setJustify(seg, indx / 5, indx % 5, value);
+ }
+
+ switch (ind)
+ {
+ case gr_slatAdvX : m_advance.x = value; break;
+ case gr_slatAdvY : m_advance.y = value; break;
+ case gr_slatAttTo :
+ {
+ const uint16 idx = uint16(value);
+ if (idx < map.size() && map[idx])
+ {
+ Slot *other = map[idx];
+ if (other == this || other == m_parent || other->isCopied()) break;
+ if (m_parent) { m_parent->removeChild(this); attachTo(NULL); }
+ Slot *pOther = other;
+ int count = 0;
+ bool foundOther = false;
+ while (pOther)
+ {
+ ++count;
+ if (pOther == this) foundOther = true;
+ pOther = pOther->attachedTo();
+ }
+ for (pOther = m_child; pOther; pOther = pOther->m_child)
+ ++count;
+ for (pOther = m_sibling; pOther; pOther = pOther->m_sibling)
+ ++count;
+ if (count < 100 && !foundOther && other->child(this))
+ {
+ attachTo(other);
+ if ((map.dir() != 0) ^ (idx > subindex))
+ m_with = Position(advance(), 0);
+ else // normal match to previous root
+ m_attach = Position(other->advance(), 0);
+ }
+ }
+ break;
+ }
+ case gr_slatAttX : m_attach.x = value; break;
+ case gr_slatAttY : m_attach.y = value; break;
+ case gr_slatAttXOff :
+ case gr_slatAttYOff : break;
+ case gr_slatAttWithX : m_with.x = value; break;
+ case gr_slatAttWithY : m_with.y = value; break;
+ case gr_slatAttWithXOff :
+ case gr_slatAttWithYOff : break;
+ case gr_slatAttLevel :
+ m_attLevel = byte(value);
+ break;
+ case gr_slatBreak :
+ seg->charinfo(m_original)->breakWeight(value);
+ break;
+ case gr_slatCompRef : break; // not sure what to do here
+ case gr_slatDir : break;
+ case gr_slatInsert :
+ markInsertBefore(value? true : false);
+ break;
+ case gr_slatPosX : break; // can't set these here
+ case gr_slatPosY : break;
+ case gr_slatShiftX : m_shift.x = value; break;
+ case gr_slatShiftY : m_shift.y = value; break;
+ case gr_slatMeasureSol : break;
+ case gr_slatMeasureEol : break;
+ case gr_slatJWidth : just(value); break;
+ case gr_slatSegSplit : seg->charinfo(m_original)->addflags(value & 3); break;
+ case gr_slatUserDefn : m_userAttr[subindex] = value; break;
+ case gr_slatColFlags : {
+ SlotCollision *c = seg->collisionInfo(this);
+ if (c)
+ c->setFlags(value);
+ break; }
+ case gr_slatColLimitblx : SLOTCOLSETCOMPLEXATTR(Rect, limit(), setLimit(Rect(Position(value, s.bl.y), s.tr)))
+ case gr_slatColLimitbly : SLOTCOLSETCOMPLEXATTR(Rect, limit(), setLimit(Rect(Position(s.bl.x, value), s.tr)))
+ case gr_slatColLimittrx : SLOTCOLSETCOMPLEXATTR(Rect, limit(), setLimit(Rect(s.bl, Position(value, s.tr.y))))
+ case gr_slatColLimittry : SLOTCOLSETCOMPLEXATTR(Rect, limit(), setLimit(Rect(s.bl, Position(s.tr.x, value))))
+ case gr_slatColMargin : SLOTCOLSETATTR(setMargin(value))
+ case gr_slatColMarginWt : SLOTCOLSETATTR(setMarginWt(value))
+ case gr_slatColExclGlyph : SLOTCOLSETATTR(setExclGlyph(value))
+ case gr_slatColExclOffx : SLOTCOLSETCOMPLEXATTR(Position, exclOffset(), setExclOffset(Position(value, s.y)))
+ case gr_slatColExclOffy : SLOTCOLSETCOMPLEXATTR(Position, exclOffset(), setExclOffset(Position(s.x, value)))
+ case gr_slatSeqClass : SLOTCOLSETATTR(setSeqClass(value))
+ case gr_slatSeqProxClass : SLOTCOLSETATTR(setSeqProxClass(value))
+ case gr_slatSeqOrder : SLOTCOLSETATTR(setSeqOrder(value))
+ case gr_slatSeqAboveXoff : SLOTCOLSETATTR(setSeqAboveXoff(value))
+ case gr_slatSeqAboveWt : SLOTCOLSETATTR(setSeqAboveWt(value))
+ case gr_slatSeqBelowXlim : SLOTCOLSETATTR(setSeqBelowXlim(value))
+ case gr_slatSeqBelowWt : SLOTCOLSETATTR(setSeqBelowWt(value))
+ case gr_slatSeqValignHt : SLOTCOLSETATTR(setSeqValignHt(value))
+ case gr_slatSeqValignWt : SLOTCOLSETATTR(setSeqValignWt(value))
+ default :
+ break;
+ }
+}
+
+int Slot::getJustify(const Segment *seg, uint8 level, uint8 subindex) const
+{
+ if (level && level >= seg->silf()->numJustLevels()) return 0;
+
+ if (m_justs)
+ return m_justs->values[level * SlotJustify::NUMJUSTPARAMS + subindex];
+
+ if (level >= seg->silf()->numJustLevels()) return 0;
+ Justinfo *jAttrs = seg->silf()->justAttrs() + level;
+
+ switch (subindex) {
+ case 0 : return seg->glyphAttr(gid(), jAttrs->attrStretch());
+ case 1 : return seg->glyphAttr(gid(), jAttrs->attrShrink());
+ case 2 : return seg->glyphAttr(gid(), jAttrs->attrStep());
+ case 3 : return seg->glyphAttr(gid(), jAttrs->attrWeight());
+ case 4 : return 0; // not been set yet, so clearly 0
+ default: return 0;
+ }
+}
+
+void Slot::setJustify(Segment *seg, uint8 level, uint8 subindex, int16 value)
+{
+ if (level && level >= seg->silf()->numJustLevels()) return;
+ if (!m_justs)
+ {
+ SlotJustify *j = seg->newJustify();
+ if (!j) return;
+ j->LoadSlot(this, seg);
+ m_justs = j;
+ }
+ m_justs->values[level * SlotJustify::NUMJUSTPARAMS + subindex] = value;
+}
+
+bool Slot::child(Slot *ap)
+{
+ if (this == ap) return false;
+ else if (ap == m_child) return true;
+ else if (!m_child)
+ m_child = ap;
+ else
+ return m_child->sibling(ap);
+ return true;
+}
+
+bool Slot::sibling(Slot *ap)
+{
+ if (this == ap) return false;
+ else if (ap == m_sibling) return true;
+ else if (!m_sibling || !ap)
+ m_sibling = ap;
+ else
+ return m_sibling->sibling(ap);
+ return true;
+}
+
+bool Slot::removeChild(Slot *ap)
+{
+ if (this == ap || !m_child || !ap) return false;
+ else if (ap == m_child)
+ {
+ Slot *nSibling = m_child->nextSibling();
+ m_child->nextSibling(NULL);
+ m_child = nSibling;
+ return true;
+ }
+ for (Slot *p = m_child; p; p = p->m_sibling)
+ {
+ if (p->m_sibling && p->m_sibling == ap)
+ {
+ p->m_sibling = p->m_sibling->m_sibling;
+ ap->nextSibling(NULL);
+ return true;
+ }
+ }
+ return false;
+}
+
+void Slot::setGlyph(Segment *seg, uint16 glyphid, const GlyphFace * theGlyph)
+{
+ m_glyphid = glyphid;
+ m_bidiCls = -1;
+ if (!theGlyph)
+ {
+ theGlyph = seg->getFace()->glyphs().glyphSafe(glyphid);
+ if (!theGlyph)
+ {
+ m_realglyphid = 0;
+ m_advance = Position(0.,0.);
+ return;
+ }
+ }
+ m_realglyphid = theGlyph->attrs()[seg->silf()->aPseudo()];
+ if (m_realglyphid > seg->getFace()->glyphs().numGlyphs())
+ m_realglyphid = 0;
+ const GlyphFace *aGlyph = theGlyph;
+ if (m_realglyphid)
+ {
+ aGlyph = seg->getFace()->glyphs().glyphSafe(m_realglyphid);
+ if (!aGlyph) aGlyph = theGlyph;
+ }
+ m_advance = Position(aGlyph->theAdvance().x, 0.);
+ if (seg->silf()->aPassBits())
+ {
+ seg->mergePassBits(uint8(theGlyph->attrs()[seg->silf()->aPassBits()]));
+ if (seg->silf()->numPasses() > 16)
+ seg->mergePassBits(theGlyph->attrs()[seg->silf()->aPassBits()+1] << 16);
+ }
+}
+
+void Slot::floodShift(Position adj, int depth)
+{
+ if (depth > 100)
+ return;
+ m_position += adj;
+ if (m_child) m_child->floodShift(adj, depth + 1);
+ if (m_sibling) m_sibling->floodShift(adj, depth + 1);
+}
+
+void SlotJustify::LoadSlot(const Slot *s, const Segment *seg)
+{
+ for (int i = seg->silf()->numJustLevels() - 1; i >= 0; --i)
+ {
+ Justinfo *justs = seg->silf()->justAttrs() + i;
+ int16 *v = values + i * NUMJUSTPARAMS;
+ v[0] = seg->glyphAttr(s->gid(), justs->attrStretch());
+ v[1] = seg->glyphAttr(s->gid(), justs->attrShrink());
+ v[2] = seg->glyphAttr(s->gid(), justs->attrStep());
+ v[3] = seg->glyphAttr(s->gid(), justs->attrWeight());
+ }
+}
+
+Slot * Slot::nextInCluster(const Slot *s) const
+{
+ Slot *base;
+ if (s->firstChild())
+ return s->firstChild();
+ else if (s->nextSibling())
+ return s->nextSibling();
+ while ((base = s->attachedTo()))
+ {
+ // if (base->firstChild() == s && base->nextSibling())
+ if (base->nextSibling())
+ return base->nextSibling();
+ s = base;
+ }
+ return NULL;
+}
+
+bool Slot::isChildOf(const Slot *base) const
+{
+ for (Slot *p = m_parent; p; p = p->m_parent)
+ if (p == base)
+ return true;
+ return false;
+}
diff --git a/thirdparty/graphite/src/Sparse.cpp b/thirdparty/graphite/src/Sparse.cpp
new file mode 100644
index 0000000000..aa43113669
--- /dev/null
+++ b/thirdparty/graphite/src/Sparse.cpp
@@ -0,0 +1,62 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2011, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#include <cassert>
+#include "inc/Sparse.h"
+#include "inc/bits.h"
+
+using namespace graphite2;
+
+const sparse::chunk sparse::empty_chunk = {0,0};
+
+sparse::~sparse() throw()
+{
+ if (m_array.map == &empty_chunk) return;
+ free(m_array.values);
+}
+
+
+sparse::mapped_type sparse::operator [] (const key_type k) const throw()
+{
+ mapped_type g = key_type(k/SIZEOF_CHUNK - m_nchunks) >> (sizeof k*8 - 1);
+ const chunk & c = m_array.map[g*k/SIZEOF_CHUNK];
+ const mask_t m = c.mask >> (SIZEOF_CHUNK - 1 - (k%SIZEOF_CHUNK));
+ g *= m & 1;
+
+ return g*m_array.values[g*(c.offset + bit_set_count(m >> 1))];
+}
+
+
+size_t sparse::capacity() const throw()
+{
+ size_t n = m_nchunks,
+ s = 0;
+
+ for (const chunk *ci=m_array.map; n; --n, ++ci)
+ s += bit_set_count(ci->mask);
+
+ return s;
+}
diff --git a/thirdparty/graphite/src/TtfUtil.cpp b/thirdparty/graphite/src/TtfUtil.cpp
new file mode 100644
index 0000000000..2eb46a11fb
--- /dev/null
+++ b/thirdparty/graphite/src/TtfUtil.cpp
@@ -0,0 +1,2053 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+/*--------------------------------------------------------------------*//*:Ignore this sentence.
+
+File: TtfUtil.cpp
+Responsibility: Alan Ward
+Last reviewed: Not yet.
+
+Description
+ Implements the methods for TtfUtil class. This file should remain portable to any C++
+ environment by only using standard C++ and the TTF structurs defined in Tt.h.
+-------------------------------------------------------------------------------*//*:End Ignore*/
+
+
+/***********************************************************************************************
+ Include files
+***********************************************************************************************/
+// Language headers
+//#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstring>
+#include <climits>
+#include <cwchar>
+//#include <stdexcept>
+// Platform headers
+// Module headers
+#include "inc/TtfUtil.h"
+#include "inc/TtfTypes.h"
+#include "inc/Endian.h"
+
+/***********************************************************************************************
+ Forward declarations
+***********************************************************************************************/
+
+/***********************************************************************************************
+ Local Constants and static variables
+***********************************************************************************************/
+namespace
+{
+#ifdef ALL_TTFUTILS
+ // max number of components allowed in composite glyphs
+ const int kMaxGlyphComponents = 8;
+#endif
+
+ template <int R, typename T>
+ inline float fixed_to_float(const T f) {
+ return float(f)/float(2^R);
+ }
+
+/*----------------------------------------------------------------------------------------------
+ Table of standard Postscript glyph names. From Martin Hosken. Disagress with ttfdump.exe
+---------------------------------------------------------------------------------------------*/
+#ifdef ALL_TTFUTILS
+ const int kcPostNames = 258;
+
+ const char * rgPostName[kcPostNames] = {
+ ".notdef", ".null", "nonmarkingreturn", "space", "exclam", "quotedbl", "numbersign",
+ "dollar", "percent", "ampersand", "quotesingle", "parenleft",
+ "parenright", "asterisk", "plus", "comma", "hyphen", "period", "slash",
+ "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
+ "nine", "colon", "semicolon", "less", "equal", "greater", "question",
+ "at", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",
+ "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
+ "bracketleft", "backslash", "bracketright", "asciicircum",
+ "underscore", "grave", "a", "b", "c", "d", "e", "f", "g", "h", "i",
+ "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w",
+ "x", "y", "z", "braceleft", "bar", "braceright", "asciitilde",
+ "Adieresis", "Aring", "Ccedilla", "Eacute", "Ntilde", "Odieresis",
+ "Udieresis", "aacute", "agrave", "acircumflex", "adieresis", "atilde",
+ "aring", "ccedilla", "eacute", "egrave", "ecircumflex", "edieresis",
+ "iacute", "igrave", "icircumflex", "idieresis", "ntilde", "oacute",
+ "ograve", "ocircumflex", "odieresis", "otilde", "uacute", "ugrave",
+ "ucircumflex", "udieresis", "dagger", "degree", "cent", "sterling",
+ "section", "bullet", "paragraph", "germandbls", "registered",
+ "copyright", "trademark", "acute", "dieresis", "notequal", "AE",
+ "Oslash", "infinity", "plusminus", "lessequal", "greaterequal", "yen",
+ "mu", "partialdiff", "summation", "product", "pi", "integral",
+ "ordfeminine", "ordmasculine", "Omega", "ae", "oslash", "questiondown",
+ "exclamdown", "logicalnot", "radical", "florin", "approxequal",
+ "Delta", "guillemotleft", "guillemotright", "ellipsis", "nonbreakingspace",
+ "Agrave", "Atilde", "Otilde", "OE", "oe", "endash", "emdash",
+ "quotedblleft", "quotedblright", "quoteleft", "quoteright", "divide",
+ "lozenge", "ydieresis", "Ydieresis", "fraction", "currency",
+ "guilsinglleft", "guilsinglright", "fi", "fl", "daggerdbl", "periodcentered",
+ "quotesinglbase", "quotedblbase", "perthousand", "Acircumflex",
+ "Ecircumflex", "Aacute", "Edieresis", "Egrave", "Iacute",
+ "Icircumflex", "Idieresis", "Igrave", "Oacute", "Ocircumflex",
+ "apple", "Ograve", "Uacute", "Ucircumflex", "Ugrave", "dotlessi",
+ "circumflex", "tilde", "macron", "breve", "dotaccent", "ring",
+ "cedilla", "hungarumlaut", "ogonek", "caron", "Lslash", "lslash",
+ "Scaron", "scaron", "Zcaron", "zcaron", "brokenbar", "Eth", "eth",
+ "Yacute", "yacute", "Thorn", "thorn", "minus", "multiply",
+ "onesuperior", "twosuperior", "threesuperior", "onehalf", "onequarter",
+ "threequarters", "franc", "Gbreve", "gbreve", "Idotaccent", "Scedilla",
+ "scedilla", "Cacute", "cacute", "Ccaron", "ccaron",
+ "dcroat" };
+#endif
+
+} // end of namespace
+
+/***********************************************************************************************
+ Methods
+***********************************************************************************************/
+
+/* Note on error processing: The code guards against bad glyph ids being used to look up data
+in open ended tables (loca, hmtx). If the glyph id comes from a cmap this shouldn't happen
+but it seems prudent to check for user errors here. The code does assume that data obtained
+from the TTF file is valid otherwise (though the CheckTable method seeks to check for
+obvious problems that might accompany a change in table versions). For example an invalid
+offset in the loca table which could exceed the size of the glyf table is NOT trapped.
+Likewise if numberOf_LongHorMetrics in the hhea table is wrong, this will NOT be trapped,
+which could cause a lookup in the hmtx table to exceed the table length. Of course, TTF tables
+that are completely corrupt will cause unpredictable results. */
+
+/* Note on composite glyphs: Glyphs that have components that are themselves composites
+are not supported. IsDeepComposite can be used to test for this. False is returned from many
+of the methods in this cases. It is unclear how to build composite glyphs in some cases,
+so this code represents my best guess until test cases can be found. See notes on the high-
+level GlyfPoints method. */
+namespace graphite2
+{
+namespace TtfUtil
+{
+
+
+/*----------------------------------------------------------------------------------------------
+ Get offset and size of the offset table needed to find table directory.
+ Return true if success, false otherwise.
+ lSize excludes any table directory entries.
+----------------------------------------------------------------------------------------------*/
+bool GetHeaderInfo(size_t & lOffset, size_t & lSize)
+{
+ lOffset = 0;
+ lSize = offsetof(Sfnt::OffsetSubTable, table_directory);
+ assert(sizeof(uint32) + 4*sizeof (uint16) == lSize);
+ return true;
+}
+
+/*----------------------------------------------------------------------------------------------
+ Check the offset table for expected data.
+ Return true if success, false otherwise.
+----------------------------------------------------------------------------------------------*/
+bool CheckHeader(const void * pHdr)
+{
+ const Sfnt::OffsetSubTable * pOffsetTable
+ = reinterpret_cast<const Sfnt::OffsetSubTable *>(pHdr);
+
+ return pHdr && be::swap(pOffsetTable->scaler_type) == Sfnt::OffsetSubTable::TrueTypeWin;
+}
+
+/*----------------------------------------------------------------------------------------------
+ Get offset and size of the table directory.
+ Return true if successful, false otherwise.
+----------------------------------------------------------------------------------------------*/
+bool GetTableDirInfo(const void * pHdr, size_t & lOffset, size_t & lSize)
+{
+ const Sfnt::OffsetSubTable * pOffsetTable
+ = reinterpret_cast<const Sfnt::OffsetSubTable *>(pHdr);
+
+ lOffset = offsetof(Sfnt::OffsetSubTable, table_directory);
+ lSize = be::swap(pOffsetTable->num_tables)
+ * sizeof(Sfnt::OffsetSubTable::Entry);
+
+ return true;
+}
+
+
+/*----------------------------------------------------------------------------------------------
+ Get offset and size of the specified table.
+ Return true if successful, false otherwise. On false, offset and size will be 0.
+----------------------------------------------------------------------------------------------*/
+bool GetTableInfo(const Tag TableTag, const void * pHdr, const void * pTableDir,
+ size_t & lOffset, size_t & lSize)
+{
+ const Sfnt::OffsetSubTable * pOffsetTable
+ = reinterpret_cast<const Sfnt::OffsetSubTable *>(pHdr);
+ const size_t num_tables = be::swap(pOffsetTable->num_tables);
+ const Sfnt::OffsetSubTable::Entry
+ * entry_itr = reinterpret_cast<const Sfnt::OffsetSubTable::Entry *>(
+ pTableDir),
+ * const dir_end = entry_itr + num_tables;
+
+ if (num_tables > 40)
+ return false;
+
+ for (;entry_itr != dir_end; ++entry_itr) // 40 - safe guard
+ {
+ if (be::swap(entry_itr->tag) == TableTag)
+ {
+ lOffset = be::swap(entry_itr->offset);
+ lSize = be::swap(entry_itr->length);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*----------------------------------------------------------------------------------------------
+ Check the specified table. Tests depend on the table type.
+ Return true if successful, false otherwise.
+----------------------------------------------------------------------------------------------*/
+bool CheckTable(const Tag TableId, const void * pTable, size_t lTableSize)
+{
+ using namespace Sfnt;
+
+ if (pTable == 0 || lTableSize < 4) return false;
+
+ switch(TableId)
+ {
+ case Tag::cmap: // cmap
+ {
+ const Sfnt::CharacterCodeMap * const pCmap
+ = reinterpret_cast<const Sfnt::CharacterCodeMap *>(pTable);
+ if (lTableSize < sizeof(Sfnt::CharacterCodeMap))
+ return false;
+ return be::swap(pCmap->version) == 0;
+ }
+
+ case Tag::head: // head
+ {
+ const Sfnt::FontHeader * const pHead
+ = reinterpret_cast<const Sfnt::FontHeader *>(pTable);
+ if (lTableSize < sizeof(Sfnt::FontHeader))
+ return false;
+ bool r = be::swap(pHead->version) == OneFix
+ && be::swap(pHead->magic_number) == FontHeader::MagicNumber
+ && be::swap(pHead->glyph_data_format)
+ == FontHeader::GlypDataFormat
+ && (be::swap(pHead->index_to_loc_format)
+ == FontHeader::ShortIndexLocFormat
+ || be::swap(pHead->index_to_loc_format)
+ == FontHeader::LongIndexLocFormat)
+ && sizeof(FontHeader) <= lTableSize;
+ return r;
+ }
+
+ case Tag::post: // post
+ {
+ const Sfnt::PostScriptGlyphName * const pPost
+ = reinterpret_cast<const Sfnt::PostScriptGlyphName *>(pTable);
+ if (lTableSize < sizeof(Sfnt::PostScriptGlyphName))
+ return false;
+ const fixed format = be::swap(pPost->format);
+ bool r = format == PostScriptGlyphName::Format1
+ || format == PostScriptGlyphName::Format2
+ || format == PostScriptGlyphName::Format3
+ || format == PostScriptGlyphName::Format25;
+ return r;
+ }
+
+ case Tag::hhea: // hhea
+ {
+ const Sfnt::HorizontalHeader * pHhea =
+ reinterpret_cast<const Sfnt::HorizontalHeader *>(pTable);
+ if (lTableSize < sizeof(Sfnt::HorizontalHeader))
+ return false;
+ bool r = be::swap(pHhea->version) == OneFix
+ && be::swap(pHhea->metric_data_format) == 0
+ && sizeof (Sfnt::HorizontalHeader) <= lTableSize;
+ return r;
+ }
+
+ case Tag::maxp: // maxp
+ {
+ const Sfnt::MaximumProfile * pMaxp =
+ reinterpret_cast<const Sfnt::MaximumProfile *>(pTable);
+ if (lTableSize < sizeof(Sfnt::MaximumProfile))
+ return false;
+ bool r = be::swap(pMaxp->version) == OneFix
+ && sizeof(Sfnt::MaximumProfile) <= lTableSize;
+ return r;
+ }
+
+ case Tag::OS_2: // OS/2
+ {
+ const Sfnt::Compatibility * pOs2
+ = reinterpret_cast<const Sfnt::Compatibility *>(pTable);
+ if (be::swap(pOs2->version) == 0)
+ { // OS/2 table version 1 size
+// if (sizeof(Sfnt::Compatibility)
+// - sizeof(uint32)*2 - sizeof(int16)*2
+// - sizeof(uint16)*3 <= lTableSize)
+ if (sizeof(Sfnt::Compatibility0) <= lTableSize)
+ return true;
+ }
+ else if (be::swap(pOs2->version) == 1)
+ { // OS/2 table version 2 size
+// if (sizeof(Sfnt::Compatibility)
+// - sizeof(int16) *2
+// - sizeof(uint16)*3 <= lTableSize)
+ if (sizeof(Sfnt::Compatibility1) <= lTableSize)
+ return true;
+ }
+ else if (be::swap(pOs2->version) == 2)
+ { // OS/2 table version 3 size
+ if (sizeof(Sfnt::Compatibility2) <= lTableSize)
+ return true;
+ }
+ else if (be::swap(pOs2->version) == 3 || be::swap(pOs2->version) == 4)
+ { // OS/2 table version 4 size - version 4 changed the meaning of some fields which we don't use
+ if (sizeof(Sfnt::Compatibility3) <= lTableSize)
+ return true;
+ }
+ else
+ return false;
+ break;
+ }
+
+ case Tag::name:
+ {
+ const Sfnt::FontNames * pName
+ = reinterpret_cast<const Sfnt::FontNames *>(pTable);
+ if (lTableSize < sizeof(Sfnt::FontNames))
+ return false;
+ return be::swap(pName->format) == 0;
+ }
+
+ case Tag::glyf:
+ {
+ return (lTableSize >= sizeof(Sfnt::Glyph));
+ }
+
+ default:
+ break;
+ }
+
+ return true;
+}
+
+/*----------------------------------------------------------------------------------------------
+ Return the number of glyphs in the font. Should never be less than zero.
+
+ Note: this method is not currently used by the Graphite engine.
+----------------------------------------------------------------------------------------------*/
+size_t GlyphCount(const void * pMaxp)
+{
+ const Sfnt::MaximumProfile * pTable =
+ reinterpret_cast<const Sfnt::MaximumProfile *>(pMaxp);
+ return be::swap(pTable->num_glyphs);
+}
+
+#ifdef ALL_TTFUTILS
+/*----------------------------------------------------------------------------------------------
+ Return the maximum number of components for any composite glyph in the font.
+
+ Note: this method is not currently used by the Graphite engine.
+----------------------------------------------------------------------------------------------*/
+size_t MaxCompositeComponentCount(const void * pMaxp)
+{
+ const Sfnt::MaximumProfile * pTable =
+ reinterpret_cast<const Sfnt::MaximumProfile *>(pMaxp);
+ return be::swap(pTable->max_component_elements);
+}
+
+/*----------------------------------------------------------------------------------------------
+ Composite glyphs can be composed of glyphs that are themselves composites.
+ This method returns the maximum number of levels like this for any glyph in the font.
+ A non-composite glyph has a level of 1.
+
+ Note: this method is not currently used by the Graphite engine.
+----------------------------------------------------------------------------------------------*/
+size_t MaxCompositeLevelCount(const void * pMaxp)
+{
+ const Sfnt::MaximumProfile * pTable =
+ reinterpret_cast<const Sfnt::MaximumProfile *>(pMaxp);
+ return be::swap(pTable->max_component_depth);
+}
+
+/*----------------------------------------------------------------------------------------------
+ Return the number of glyphs in the font according to a differt source.
+ Should never be less than zero. Return -1 on failure.
+
+ Note: this method is not currently used by the Graphite engine.
+----------------------------------------------------------------------------------------------*/
+size_t LocaGlyphCount(size_t lLocaSize, const void * pHead) //throw(std::domain_error)
+{
+
+ const Sfnt::FontHeader * pTable
+ = reinterpret_cast<const Sfnt::FontHeader *>(pHead);
+
+ if (be::swap(pTable->index_to_loc_format)
+ == Sfnt::FontHeader::ShortIndexLocFormat)
+ // loca entries are two bytes and have been divided by two
+ return (lLocaSize >> 1) - 1;
+
+ if (be::swap(pTable->index_to_loc_format)
+ == Sfnt::FontHeader::LongIndexLocFormat)
+ // loca entries are four bytes
+ return (lLocaSize >> 2) - 1;
+
+ return -1;
+ //throw std::domain_error("head table in inconsistent state. The font may be corrupted");
+}
+#endif
+
+/*----------------------------------------------------------------------------------------------
+ Return the design units the font is designed with
+----------------------------------------------------------------------------------------------*/
+int DesignUnits(const void * pHead)
+{
+ const Sfnt::FontHeader * pTable =
+ reinterpret_cast<const Sfnt::FontHeader *>(pHead);
+
+ return be::swap(pTable->units_per_em);
+}
+
+#ifdef ALL_TTFUTILS
+/*----------------------------------------------------------------------------------------------
+ Return the checksum from the head table, which serves as a unique identifer for the font.
+----------------------------------------------------------------------------------------------*/
+int HeadTableCheckSum(const void * pHead)
+{
+ const Sfnt::FontHeader * pTable =
+ reinterpret_cast<const Sfnt::FontHeader *>(pHead);
+
+ return be::swap(pTable->check_sum_adjustment);
+}
+
+/*----------------------------------------------------------------------------------------------
+ Return the create time from the head table. This consists of a 64-bit integer, which
+ we return here as two 32-bit integers.
+
+ Note: this method is not currently used by the Graphite engine.
+----------------------------------------------------------------------------------------------*/
+void HeadTableCreateTime(const void * pHead,
+ unsigned int * pnDateBC, unsigned int * pnDateAD)
+{
+ const Sfnt::FontHeader * pTable =
+ reinterpret_cast<const Sfnt::FontHeader *>(pHead);
+
+ *pnDateBC = be::swap(pTable->created[0]);
+ *pnDateAD = be::swap(pTable->created[1]);
+}
+
+/*----------------------------------------------------------------------------------------------
+ Return the modify time from the head table.This consists of a 64-bit integer, which
+ we return here as two 32-bit integers.
+
+ Note: this method is not currently used by the Graphite engine.
+----------------------------------------------------------------------------------------------*/
+void HeadTableModifyTime(const void * pHead,
+ unsigned int * pnDateBC, unsigned int *pnDateAD)
+{
+ const Sfnt::FontHeader * pTable =
+ reinterpret_cast<const Sfnt::FontHeader *>(pHead);
+ ;
+ *pnDateBC = be::swap(pTable->modified[0]);
+ *pnDateAD = be::swap(pTable->modified[1]);
+}
+
+/*----------------------------------------------------------------------------------------------
+ Return true if the font is italic.
+----------------------------------------------------------------------------------------------*/
+bool IsItalic(const void * pHead)
+{
+ const Sfnt::FontHeader * pTable =
+ reinterpret_cast<const Sfnt::FontHeader *>(pHead);
+
+ return ((be::swap(pTable->mac_style) & 0x00000002) != 0);
+}
+
+/*----------------------------------------------------------------------------------------------
+ Return the ascent for the font
+----------------------------------------------------------------------------------------------*/
+int FontAscent(const void * pOs2)
+{
+ const Sfnt::Compatibility * pTable = reinterpret_cast<const Sfnt::Compatibility *>(pOs2);
+
+ return be::swap(pTable->win_ascent);
+}
+
+/*----------------------------------------------------------------------------------------------
+ Return the descent for the font
+----------------------------------------------------------------------------------------------*/
+int FontDescent(const void * pOs2)
+{
+ const Sfnt::Compatibility * pTable = reinterpret_cast<const Sfnt::Compatibility *>(pOs2);
+
+ return be::swap(pTable->win_descent);
+}
+
+/*----------------------------------------------------------------------------------------------
+ Get the bold and italic style bits.
+ Return true if successful. false otherwise.
+ In addition to checking the OS/2 table, one could also check
+ the head table's macStyle field (overridden by the OS/2 table on Win)
+ the sub-family name in the name table (though this can contain oblique, dark, etc too)
+----------------------------------------------------------------------------------------------*/
+bool FontOs2Style(const void *pOs2, bool & fBold, bool & fItalic)
+{
+ const Sfnt::Compatibility * pTable = reinterpret_cast<const Sfnt::Compatibility *>(pOs2);
+
+ fBold = (be::swap(pTable->fs_selection) & Sfnt::Compatibility::Bold) != 0;
+ fItalic = (be::swap(pTable->fs_selection) & Sfnt::Compatibility::Italic) != 0;
+
+ return true;
+}
+#endif
+
+/*----------------------------------------------------------------------------------------------
+ Method for searching name table.
+----------------------------------------------------------------------------------------------*/
+bool GetNameInfo(const void * pName, int nPlatformId, int nEncodingId,
+ int nLangId, int nNameId, size_t & lOffset, size_t & lSize)
+{
+ lOffset = 0;
+ lSize = 0;
+
+ const Sfnt::FontNames * pTable = reinterpret_cast<const Sfnt::FontNames *>(pName);
+ uint16 cRecord = be::swap(pTable->count);
+ uint16 nRecordOffset = be::swap(pTable->string_offset);
+ const Sfnt::NameRecord * pRecord = reinterpret_cast<const Sfnt::NameRecord *>(pTable + 1);
+
+ for (int i = 0; i < cRecord; ++i)
+ {
+ if (be::swap(pRecord->platform_id) == nPlatformId &&
+ be::swap(pRecord->platform_specific_id) == nEncodingId &&
+ be::swap(pRecord->language_id) == nLangId &&
+ be::swap(pRecord->name_id) == nNameId)
+ {
+ lOffset = be::swap(pRecord->offset) + nRecordOffset;
+ lSize = be::swap(pRecord->length);
+ return true;
+ }
+ pRecord++;
+ }
+
+ return false;
+}
+
+#ifdef ALL_TTFUTILS
+/*----------------------------------------------------------------------------------------------
+ Return all the lang-IDs that have data for the given name-IDs. Assume that there is room
+ in the return array (langIdList) for 128 items. The purpose of this method is to return
+ a list of all possible lang-IDs.
+----------------------------------------------------------------------------------------------*/
+int GetLangsForNames(const void * pName, int nPlatformId, int nEncodingId,
+ int * nameIdList, int cNameIds, short * langIdList)
+{
+ const Sfnt::FontNames * pTable = reinterpret_cast<const Sfnt::FontNames *>(pName);
+ int cLangIds = 0;
+ uint16 cRecord = be::swap(pTable->count);
+ if (cRecord > 127) return cLangIds;
+ //uint16 nRecordOffset = swapw(pTable->stringOffset);
+ const Sfnt::NameRecord * pRecord = reinterpret_cast<const Sfnt::NameRecord *>(pTable + 1);
+
+ for (int i = 0; i < cRecord; ++i)
+ {
+ if (be::swap(pRecord->platform_id) == nPlatformId &&
+ be::swap(pRecord->platform_specific_id) == nEncodingId)
+ {
+ bool fNameFound = false;
+ int nLangId = be::swap(pRecord->language_id);
+ int nNameId = be::swap(pRecord->name_id);
+ for (int j = 0; j < cNameIds; j++)
+ {
+ if (nNameId == nameIdList[j])
+ {
+ fNameFound = true;
+ break;
+ }
+ }
+ if (fNameFound)
+ {
+ // Add it if it's not there.
+ int ilang;
+ for (ilang = 0; ilang < cLangIds; ilang++)
+ if (langIdList[ilang] == nLangId)
+ break;
+ if (ilang >= cLangIds)
+ {
+ langIdList[cLangIds] = short(nLangId);
+ cLangIds++;
+ }
+ if (cLangIds == 128)
+ return cLangIds;
+ }
+ }
+ pRecord++;
+ }
+
+ return cLangIds;
+}
+
+/*----------------------------------------------------------------------------------------------
+ Get the offset and size of the font family name in English for the MS Platform with Unicode
+ writing system. The offset is within the pName data. The string is double byte with MSB
+ first.
+----------------------------------------------------------------------------------------------*/
+bool Get31EngFamilyInfo(const void * pName, size_t & lOffset, size_t & lSize)
+{
+ return GetNameInfo(pName, Sfnt::NameRecord::Microsoft, 1, 1033,
+ Sfnt::NameRecord::Family, lOffset, lSize);
+}
+
+/*----------------------------------------------------------------------------------------------
+ Get the offset and size of the full font name in English for the MS Platform with Unicode
+ writing system. The offset is within the pName data. The string is double byte with MSB
+ first.
+
+ Note: this method is not currently used by the Graphite engine.
+----------------------------------------------------------------------------------------------*/
+bool Get31EngFullFontInfo(const void * pName, size_t & lOffset, size_t & lSize)
+{
+ return GetNameInfo(pName, Sfnt::NameRecord::Microsoft, 1, 1033,
+ Sfnt::NameRecord::Fullname, lOffset, lSize);
+}
+
+/*----------------------------------------------------------------------------------------------
+ Get the offset and size of the font family name in English for the MS Platform with Symbol
+ writing system. The offset is within the pName data. The string is double byte with MSB
+ first.
+----------------------------------------------------------------------------------------------*/
+bool Get30EngFamilyInfo(const void * pName, size_t & lOffset, size_t & lSize)
+{
+ return GetNameInfo(pName, Sfnt::NameRecord::Microsoft, 0, 1033,
+ Sfnt::NameRecord::Family, lOffset, lSize);
+}
+
+/*----------------------------------------------------------------------------------------------
+ Get the offset and size of the full font name in English for the MS Platform with Symbol
+ writing system. The offset is within the pName data. The string is double byte with MSB
+ first.
+
+ Note: this method is not currently used by the Graphite engine.
+----------------------------------------------------------------------------------------------*/
+bool Get30EngFullFontInfo(const void * pName, size_t & lOffset, size_t & lSize)
+{
+ return GetNameInfo(pName, Sfnt::NameRecord::Microsoft, 0, 1033,
+ Sfnt::NameRecord::Fullname, lOffset, lSize);
+}
+
+/*----------------------------------------------------------------------------------------------
+ Return the Glyph ID for a given Postscript name. This method finds the first glyph which
+ matches the requested Postscript name. Ideally every glyph should have a unique Postscript
+ name (except for special names such as .notdef), but this is not always true.
+ On failure return value less than zero.
+ -1 - table search failed
+ -2 - format 3 table (no Postscript glyph info)
+ -3 - other failures
+
+ Note: this method is not currently used by the Graphite engine.
+----------------------------------------------------------------------------------------------*/
+int PostLookup(const void * pPost, size_t lPostSize, const void * pMaxp,
+ const char * pPostName)
+{
+ using namespace Sfnt;
+
+ const Sfnt::PostScriptGlyphName * pTable
+ = reinterpret_cast<const Sfnt::PostScriptGlyphName *>(pPost);
+ fixed format = be::swap(pTable->format);
+
+ if (format == PostScriptGlyphName::Format3)
+ { // format 3 - no Postscript glyph info in font
+ return -2;
+ }
+
+ // search for given Postscript name among the standard names
+ int iPostName = -1; // index in standard names
+ for (int i = 0; i < kcPostNames; i++)
+ {
+ if (!strcmp(pPostName, rgPostName[i]))
+ {
+ iPostName = i;
+ break;
+ }
+ }
+
+ if (format == PostScriptGlyphName::Format1)
+ { // format 1 - use standard Postscript names
+ return iPostName;
+ }
+
+ if (format == PostScriptGlyphName::Format25)
+ {
+ if (iPostName == -1)
+ return -1;
+
+ const PostScriptGlyphName25 * pTable25
+ = static_cast<const PostScriptGlyphName25 *>(pTable);
+ int cnGlyphs = GlyphCount(pMaxp);
+ for (gid16 nGlyphId = 0; nGlyphId < cnGlyphs && nGlyphId < kcPostNames;
+ nGlyphId++)
+ { // glyph_name_index25 contains bytes so no byte swapping needed
+ // search for first glyph id that uses the standard name
+ if (nGlyphId + pTable25->offset[nGlyphId] == iPostName)
+ return nGlyphId;
+ }
+ }
+
+ if (format == PostScriptGlyphName::Format2)
+ { // format 2
+ const PostScriptGlyphName2 * pTable2
+ = static_cast<const PostScriptGlyphName2 *>(pTable);
+
+ int cnGlyphs = be::swap(pTable2->number_of_glyphs);
+
+ if (iPostName != -1)
+ { // did match a standard name, look for first glyph id mapped to that name
+ for (gid16 nGlyphId = 0; nGlyphId < cnGlyphs; nGlyphId++)
+ {
+ if (be::swap(pTable2->glyph_name_index[nGlyphId]) == iPostName)
+ return nGlyphId;
+ }
+ }
+
+ { // did not match a standard name, search font specific names
+ size_t nStrSizeGoal = strlen(pPostName);
+ const char * pFirstGlyphName = reinterpret_cast<const char *>(
+ &pTable2->glyph_name_index[0] + cnGlyphs);
+ const char * pGlyphName = pFirstGlyphName;
+ int iInNames = 0; // index in font specific names
+ bool fFound = false;
+ const char * const endOfTable
+ = reinterpret_cast<const char *>(pTable2) + lPostSize;
+ while (pGlyphName < endOfTable && !fFound)
+ { // search Pascal strings for first matching name
+ size_t nStringSize = size_t(*pGlyphName);
+ if (nStrSizeGoal != nStringSize ||
+ strncmp(pGlyphName + 1, pPostName, nStringSize))
+ { // did not match
+ ++iInNames;
+ pGlyphName += nStringSize + 1;
+ }
+ else
+ { // did match
+ fFound = true;
+ }
+ }
+ if (!fFound)
+ return -1; // no font specific name matches request
+
+ iInNames += kcPostNames;
+ for (gid16 nGlyphId = 0; nGlyphId < cnGlyphs; nGlyphId++)
+ { // search for first glyph id that maps to the found string index
+ if (be::swap(pTable2->glyph_name_index[nGlyphId]) == iInNames)
+ return nGlyphId;
+ }
+ return -1; // no glyph mapped to this index (very strange)
+ }
+ }
+
+ return -3;
+}
+
+/*----------------------------------------------------------------------------------------------
+ Convert a Unicode character string from big endian (MSB first, Motorola) format to little
+ endian (LSB first, Intel) format.
+ nSize is the number of Unicode characters in the string. It should not include any
+ terminating null. If nSize is 0, it is assumed the string is null terminated. nSize
+ defaults to 0.
+ Return true if successful, false otherwise.
+----------------------------------------------------------------------------------------------*/
+void SwapWString(void * pWStr, size_t nSize /* = 0 */) //throw (std::invalid_argument)
+{
+ if (pWStr == 0)
+ {
+// throw std::invalid_argument("null pointer given");
+ return;
+ }
+
+ uint16 * pStr = reinterpret_cast<uint16 *>(pWStr);
+ uint16 * const pStrEnd = pStr + (nSize == 0 ? wcslen((const wchar_t*)pStr) : nSize);
+
+ for (; pStr != pStrEnd; ++pStr)
+ *pStr = be::swap(*pStr);
+// std::transform(pStr, pStrEnd, pStr, read<uint16>);
+
+// for (int i = 0; i < nSize; i++)
+// { // swap the wide characters in the string
+// pStr[i] = utf16(be::swap(uint16(pStr[i])));
+// }
+}
+#endif
+
+/*----------------------------------------------------------------------------------------------
+ Get the left-side bearing and and advance width based on the given tables and Glyph ID
+ Return true if successful, false otherwise. On false, one or both value could be INT_MIN
+----------------------------------------------------------------------------------------------*/
+bool HorMetrics(gid16 nGlyphId, const void * pHmtx, size_t lHmtxSize, const void * pHhea,
+ int & nLsb, unsigned int & nAdvWid)
+{
+ const Sfnt::HorizontalMetric * phmtx =
+ reinterpret_cast<const Sfnt::HorizontalMetric *>(pHmtx);
+
+ const Sfnt::HorizontalHeader * phhea =
+ reinterpret_cast<const Sfnt::HorizontalHeader *>(pHhea);
+
+ size_t cLongHorMetrics = be::swap(phhea->num_long_hor_metrics);
+ if (nGlyphId < cLongHorMetrics)
+ { // glyph id is acceptable
+ if ((nGlyphId + 1) * sizeof(Sfnt::HorizontalMetric) > lHmtxSize) return false;
+ nAdvWid = be::swap(phmtx[nGlyphId].advance_width);
+ nLsb = be::swap(phmtx[nGlyphId].left_side_bearing);
+ }
+ else
+ {
+ // guard against bad glyph id
+ size_t lLsbOffset = sizeof(Sfnt::HorizontalMetric) * cLongHorMetrics +
+ sizeof(int16) * (nGlyphId - cLongHorMetrics); // offset in bytes
+ // We test like this as LsbOffset is an offset not a length.
+ if (lLsbOffset >= lHmtxSize - sizeof(int16) || cLongHorMetrics == 0)
+ {
+ nLsb = 0;
+ return false;
+ }
+ nAdvWid = be::swap(phmtx[cLongHorMetrics - 1].advance_width);
+ nLsb = be::peek<int16>(reinterpret_cast<const byte *>(phmtx) + lLsbOffset);
+ }
+
+ return true;
+}
+
+/*----------------------------------------------------------------------------------------------
+ Return a pointer to the requested cmap subtable. By default find the Microsoft Unicode
+ subtable. Pass nEncoding as -1 to find first table that matches only nPlatformId.
+ Return NULL if the subtable cannot be found.
+----------------------------------------------------------------------------------------------*/
+const void * FindCmapSubtable(const void * pCmap, int nPlatformId, /* =3 */ int nEncodingId, /* = 1 */ size_t length)
+{
+ const Sfnt::CharacterCodeMap * pTable = reinterpret_cast<const Sfnt::CharacterCodeMap *>(pCmap);
+ uint16 csuPlatforms = be::swap(pTable->num_subtables);
+ if (length && (sizeof(Sfnt::CharacterCodeMap) + 8 * (csuPlatforms - 1) > length))
+ return NULL;
+ for (int i = 0; i < csuPlatforms; i++)
+ {
+ if (be::swap(pTable->encoding[i].platform_id) == nPlatformId &&
+ (nEncodingId == -1 || be::swap(pTable->encoding[i].platform_specific_id) == nEncodingId))
+ {
+ uint32 offset = be::swap(pTable->encoding[i].offset);
+ const uint8 * pRtn = reinterpret_cast<const uint8 *>(pCmap) + offset;
+ if (length)
+ {
+ if (offset > length - 2) return NULL;
+ uint16 format = be::read<uint16>(pRtn);
+ if (format == 4)
+ {
+ if (offset > length - 4) return NULL;
+ uint16 subTableLength = be::peek<uint16>(pRtn);
+ if (i + 1 == csuPlatforms)
+ {
+ if (subTableLength > length - offset)
+ return NULL;
+ }
+ else if (subTableLength > be::swap(pTable->encoding[i+1].offset))
+ return NULL;
+ }
+ if (format == 12)
+ {
+ if (offset > length - 6) return NULL;
+ uint32 subTableLength = be::peek<uint32>(pRtn);
+ if (i + 1 == csuPlatforms)
+ {
+ if (subTableLength > length - offset)
+ return NULL;
+ }
+ else if (subTableLength > be::swap(pTable->encoding[i+1].offset))
+ return NULL;
+ }
+ }
+ return reinterpret_cast<const uint8 *>(pCmap) + offset;
+ }
+ }
+
+ return 0;
+}
+
+/*----------------------------------------------------------------------------------------------
+ Check the Microsoft Unicode subtable for expected values
+----------------------------------------------------------------------------------------------*/
+bool CheckCmapSubtable4(const void * pCmapSubtable4, const void * pCmapEnd /*, unsigned int maxgid*/)
+{
+ size_t table_len = (const byte *)pCmapEnd - (const byte *)pCmapSubtable4;
+ if (!pCmapSubtable4) return false;
+ const Sfnt::CmapSubTable * pTable = reinterpret_cast<const Sfnt::CmapSubTable *>(pCmapSubtable4);
+ // Bob H say some freeware TT fonts have version 1 (eg, CALIGULA.TTF)
+ // so don't check subtable version. 21 Mar 2002 spec changes version to language.
+ if (table_len < sizeof(*pTable) || be::swap(pTable->format) != 4) return false;
+ const Sfnt::CmapSubTableFormat4 * pTable4 = reinterpret_cast<const Sfnt::CmapSubTableFormat4 *>(pCmapSubtable4);
+ if (table_len < sizeof(*pTable4))
+ return false;
+ uint16 length = be::swap(pTable4->length);
+ if (length > table_len)
+ return false;
+ if (length < sizeof(Sfnt::CmapSubTableFormat4))
+ return false;
+ uint16 nRanges = be::swap(pTable4->seg_count_x2) >> 1;
+ if (!nRanges || length < sizeof(Sfnt::CmapSubTableFormat4) + 4 * nRanges * sizeof(uint16))
+ return false;
+ // check last range is properly terminated
+ uint16 chEnd = be::peek<uint16>(pTable4->end_code + nRanges - 1);
+ if (chEnd != 0xFFFF)
+ return false;
+#if 0
+ int lastend = -1;
+ for (int i = 0; i < nRanges; ++i)
+ {
+ uint16 end = be::peek<uint16>(pTable4->end_code + i);
+ uint16 start = be::peek<uint16>(pTable4->end_code + nRanges + 1 + i);
+ int16 delta = be::peek<int16>(pTable4->end_code + 2*nRanges + 1 + i);
+ uint16 offset = be::peek<uint16>(pTable4->end_code + 3*nRanges + 1 + i);
+ if (lastend >= end || lastend >= start)
+ return false;
+ if (offset)
+ {
+ const uint16 *gstart = pTable4->end_code + 3*nRanges + 1 + i + (offset >> 1);
+ const uint16 *gend = gstart + end - start;
+ if ((char *)gend >= (char *)pCmapSubtable4 + length)
+ return false;
+ while (gstart <= gend)
+ {
+ uint16 g = be::peek<uint16>(gstart++);
+ if (g && ((g + delta) & 0xFFFF) > maxgid)
+ return false;
+ }
+ }
+ else if (((delta + end) & 0xFFFF) > maxgid)
+ return false;
+ lastend = end;
+ }
+#endif
+ return true;
+}
+
+/*----------------------------------------------------------------------------------------------
+ Return the Glyph ID for the given Unicode ID in the Microsoft Unicode subtable.
+ (Actually this code only depends on subtable being format 4.)
+ Return 0 if the Unicode ID is not in the subtable.
+----------------------------------------------------------------------------------------------*/
+gid16 CmapSubtable4Lookup(const void * pCmapSubtabel4, unsigned int nUnicodeId, int rangeKey)
+{
+ const Sfnt::CmapSubTableFormat4 * pTable = reinterpret_cast<const Sfnt::CmapSubTableFormat4 *>(pCmapSubtabel4);
+
+ uint16 nSeg = be::swap(pTable->seg_count_x2) >> 1;
+
+ uint16 n;
+ const uint16 * pLeft, * pMid;
+ uint16 cMid, chStart, chEnd;
+
+ if (rangeKey)
+ {
+ pMid = &(pTable->end_code[rangeKey]);
+ chEnd = be::peek<uint16>(pMid);
+ }
+ else
+ {
+ // Binary search of the endCode[] array
+ pLeft = &(pTable->end_code[0]);
+ n = nSeg;
+ while (n > 0)
+ {
+ cMid = n >> 1; // Pick an element in the middle
+ pMid = pLeft + cMid;
+ chEnd = be::peek<uint16>(pMid);
+ if (nUnicodeId <= chEnd)
+ {
+ if (cMid == 0 || nUnicodeId > be::peek<uint16>(pMid -1))
+ break; // Must be this seg or none!
+ n = cMid; // Continue on left side, omitting mid point
+ }
+ else
+ {
+ pLeft = pMid + 1; // Continue on right side, omitting mid point
+ n -= (cMid + 1);
+ }
+ }
+
+ if (!n)
+ return 0;
+ }
+
+ // Ok, we're down to one segment and pMid points to the endCode element
+ // Either this is it or none is.
+
+ chStart = be::peek<uint16>(pMid += nSeg + 1);
+ if (chEnd >= nUnicodeId && nUnicodeId >= chStart)
+ {
+ // Found correct segment. Find Glyph Id
+ int16 idDelta = be::peek<uint16>(pMid += nSeg);
+ uint16 idRangeOffset = be::peek<uint16>(pMid += nSeg);
+
+ if (idRangeOffset == 0)
+ return (uint16)(idDelta + nUnicodeId); // must use modulus 2^16
+
+ // Look up value in glyphIdArray
+ const ptrdiff_t offset = (nUnicodeId - chStart) + (idRangeOffset >> 1) +
+ (pMid - reinterpret_cast<const uint16 *>(pTable));
+ if (offset * 2 + 1 >= be::swap<uint16>(pTable->length))
+ return 0;
+ gid16 nGlyphId = be::peek<uint16>(reinterpret_cast<const uint16 *>(pTable)+offset);
+ // If this value is 0, return 0. Else add the idDelta
+ return nGlyphId ? nGlyphId + idDelta : 0;
+ }
+
+ return 0;
+}
+
+/*----------------------------------------------------------------------------------------------
+ Return the next Unicode value in the cmap. Pass 0 to obtain the first item.
+ Returns 0xFFFF as the last item.
+ pRangeKey is an optional key that is used to optimize the search; its value is the range
+ in which the character is found.
+----------------------------------------------------------------------------------------------*/
+unsigned int CmapSubtable4NextCodepoint(const void *pCmap31, unsigned int nUnicodeId, int * pRangeKey)
+{
+ const Sfnt::CmapSubTableFormat4 * pTable = reinterpret_cast<const Sfnt::CmapSubTableFormat4 *>(pCmap31);
+
+ uint16 nRange = be::swap(pTable->seg_count_x2) >> 1;
+
+ uint32 nUnicodePrev = (uint32)nUnicodeId;
+
+ const uint16 * pStartCode = &(pTable->end_code[0])
+ + nRange // length of end code array
+ + 1; // reserved word
+
+ if (nUnicodePrev == 0)
+ {
+ // return the first codepoint.
+ if (pRangeKey)
+ *pRangeKey = 0;
+ return be::peek<uint16>(pStartCode);
+ }
+ else if (nUnicodePrev >= 0xFFFF)
+ {
+ if (pRangeKey)
+ *pRangeKey = nRange - 1;
+ return 0xFFFF;
+ }
+
+ int iRange = (pRangeKey) ? *pRangeKey : 0;
+ // Just in case we have a bad key:
+ while (iRange > 0 && be::peek<uint16>(pStartCode + iRange) > nUnicodePrev)
+ iRange--;
+ while (iRange < nRange - 1 && be::peek<uint16>(pTable->end_code + iRange) < nUnicodePrev)
+ iRange++;
+
+ // Now iRange is the range containing nUnicodePrev.
+ unsigned int nStartCode = be::peek<uint16>(pStartCode + iRange);
+ unsigned int nEndCode = be::peek<uint16>(pTable->end_code + iRange);
+
+ if (nStartCode > nUnicodePrev)
+ // Oops, nUnicodePrev is not in the cmap! Adjust so we get a reasonable
+ // answer this time around.
+ nUnicodePrev = nStartCode - 1;
+
+ if (nEndCode > nUnicodePrev)
+ {
+ // Next is in the same range; it is the next successive codepoint.
+ if (pRangeKey)
+ *pRangeKey = iRange;
+ return nUnicodePrev + 1;
+ }
+
+ // Otherwise the next codepoint is the first one in the next range.
+ // There is guaranteed to be a next range because there must be one that
+ // ends with 0xFFFF.
+ if (pRangeKey)
+ *pRangeKey = iRange + 1;
+ return (iRange + 1 >= nRange) ? 0xFFFF : be::peek<uint16>(pStartCode + iRange + 1);
+}
+
+/*----------------------------------------------------------------------------------------------
+ Check the Microsoft UCS-4 subtable for expected values.
+----------------------------------------------------------------------------------------------*/
+bool CheckCmapSubtable12(const void *pCmapSubtable12, const void *pCmapEnd /*, unsigned int maxgid*/)
+{
+ size_t table_len = (const byte *)pCmapEnd - (const byte *)pCmapSubtable12;
+ if (!pCmapSubtable12) return false;
+ const Sfnt::CmapSubTable * pTable = reinterpret_cast<const Sfnt::CmapSubTable *>(pCmapSubtable12);
+ if (table_len < sizeof(*pTable) || be::swap(pTable->format) != 12)
+ return false;
+ const Sfnt::CmapSubTableFormat12 * pTable12 = reinterpret_cast<const Sfnt::CmapSubTableFormat12 *>(pCmapSubtable12);
+ if (table_len < sizeof(*pTable12))
+ return false;
+ uint32 length = be::swap(pTable12->length);
+ if (length > table_len)
+ return false;
+ if (length < sizeof(Sfnt::CmapSubTableFormat12))
+ return false;
+ uint32 num_groups = be::swap(pTable12->num_groups);
+ if (num_groups > 0x10000000 || length != (sizeof(Sfnt::CmapSubTableFormat12) + (num_groups - 1) * sizeof(uint32) * 3))
+ return false;
+#if 0
+ for (unsigned int i = 0; i < num_groups; ++i)
+ {
+ if (be::swap(pTable12->group[i].end_char_code) - be::swap(pTable12->group[i].start_char_code) + be::swap(pTable12->group[i].start_glyph_id) > maxgid)
+ return false;
+ if (i > 0 && be::swap(pTable12->group[i].start_char_code) <= be::swap(pTable12->group[i-1].end_char_code))
+ return false;
+ }
+#endif
+ return true;
+}
+
+/*----------------------------------------------------------------------------------------------
+ Return the Glyph ID for the given Unicode ID in the Microsoft UCS-4 subtable.
+ (Actually this code only depends on subtable being format 12.)
+ Return 0 if the Unicode ID is not in the subtable.
+----------------------------------------------------------------------------------------------*/
+gid16 CmapSubtable12Lookup(const void * pCmap310, unsigned int uUnicodeId, int rangeKey)
+{
+ const Sfnt::CmapSubTableFormat12 * pTable = reinterpret_cast<const Sfnt::CmapSubTableFormat12 *>(pCmap310);
+
+ //uint32 uLength = be::swap(pTable->length); //could use to test for premature end of table
+ uint32 ucGroups = be::swap(pTable->num_groups);
+
+ for (unsigned int i = rangeKey; i < ucGroups; i++)
+ {
+ uint32 uStartCode = be::swap(pTable->group[i].start_char_code);
+ uint32 uEndCode = be::swap(pTable->group[i].end_char_code);
+ if (uUnicodeId >= uStartCode && uUnicodeId <= uEndCode)
+ {
+ uint32 uDiff = uUnicodeId - uStartCode;
+ uint32 uStartGid = be::swap(pTable->group[i].start_glyph_id);
+ return static_cast<gid16>(uStartGid + uDiff);
+ }
+ }
+
+ return 0;
+}
+
+/*----------------------------------------------------------------------------------------------
+ Return the next Unicode value in the cmap. Pass 0 to obtain the first item.
+ Returns 0x10FFFF as the last item.
+ pRangeKey is an optional key that is used to optimize the search; its value is the range
+ in which the character is found.
+----------------------------------------------------------------------------------------------*/
+unsigned int CmapSubtable12NextCodepoint(const void *pCmap310, unsigned int nUnicodeId, int * pRangeKey)
+{
+ const Sfnt::CmapSubTableFormat12 * pTable = reinterpret_cast<const Sfnt::CmapSubTableFormat12 *>(pCmap310);
+
+ int nRange = be::swap(pTable->num_groups);
+
+ uint32 nUnicodePrev = (uint32)nUnicodeId;
+
+ if (nUnicodePrev == 0)
+ {
+ // return the first codepoint.
+ if (pRangeKey)
+ *pRangeKey = 0;
+ return be::swap(pTable->group[0].start_char_code);
+ }
+ else if (nUnicodePrev >= 0x10FFFF)
+ {
+ if (pRangeKey)
+ *pRangeKey = nRange;
+ return 0x10FFFF;
+ }
+
+ int iRange = (pRangeKey) ? *pRangeKey : 0;
+ // Just in case we have a bad key:
+ while (iRange > 0 && be::swap(pTable->group[iRange].start_char_code) > nUnicodePrev)
+ iRange--;
+ while (iRange < nRange - 1 && be::swap(pTable->group[iRange].end_char_code) < nUnicodePrev)
+ iRange++;
+
+ // Now iRange is the range containing nUnicodePrev.
+
+ unsigned int nStartCode = be::swap(pTable->group[iRange].start_char_code);
+ unsigned int nEndCode = be::swap(pTable->group[iRange].end_char_code);
+
+ if (nStartCode > nUnicodePrev)
+ // Oops, nUnicodePrev is not in the cmap! Adjust so we get a reasonable
+ // answer this time around.
+ nUnicodePrev = nStartCode - 1;
+
+ if (nEndCode > nUnicodePrev)
+ {
+ // Next is in the same range; it is the next successive codepoint.
+ if (pRangeKey)
+ *pRangeKey = iRange;
+ return nUnicodePrev + 1;
+ }
+
+ // Otherwise the next codepoint is the first one in the next range, or 10FFFF if we're done.
+ if (pRangeKey)
+ *pRangeKey = iRange + 1;
+ return (iRange + 1 >= nRange) ? 0x10FFFF : be::swap(pTable->group[iRange + 1].start_char_code);
+}
+
+/*----------------------------------------------------------------------------------------------
+ Return the offset stored in the loca table for the given Glyph ID.
+ (This offset is into the glyf table.)
+ Return -1 if the lookup failed.
+ Technically this method should return an unsigned long but it is unlikely the offset will
+ exceed 2^31.
+----------------------------------------------------------------------------------------------*/
+size_t LocaLookup(gid16 nGlyphId,
+ const void * pLoca, size_t lLocaSize,
+ const void * pHead) // throw (std::out_of_range)
+{
+ const Sfnt::FontHeader * pTable = reinterpret_cast<const Sfnt::FontHeader *>(pHead);
+ size_t res = -2;
+
+ // CheckTable verifies the index_to_loc_format is valid
+ if (be::swap(pTable->index_to_loc_format) == Sfnt::FontHeader::ShortIndexLocFormat)
+ { // loca entries are two bytes and have been divided by two
+ if (lLocaSize > 1 && nGlyphId + 1u < lLocaSize >> 1) // allow sentinel value to be accessed
+ {
+ const uint16 * pShortTable = reinterpret_cast<const uint16 *>(pLoca);
+ res = be::peek<uint16>(pShortTable + nGlyphId) << 1;
+ if (res == static_cast<size_t>(be::peek<uint16>(pShortTable + nGlyphId + 1) << 1))
+ return -1;
+ }
+ }
+ else if (be::swap(pTable->index_to_loc_format) == Sfnt::FontHeader::LongIndexLocFormat)
+ { // loca entries are four bytes
+ if (lLocaSize > 3 && nGlyphId + 1u < lLocaSize >> 2)
+ {
+ const uint32 * pLongTable = reinterpret_cast<const uint32 *>(pLoca);
+ res = be::peek<uint32>(pLongTable + nGlyphId);
+ if (res == static_cast<size_t>(be::peek<uint32>(pLongTable + nGlyphId + 1)))
+ return -1;
+ }
+ }
+
+ // only get here if glyph id was bad
+ return res;
+ //throw std::out_of_range("glyph id out of range for font");
+}
+
+/*----------------------------------------------------------------------------------------------
+ Return a pointer into the glyf table based on the given offset (from LocaLookup).
+ Return NULL on error.
+----------------------------------------------------------------------------------------------*/
+void * GlyfLookup(const void * pGlyf, size_t nGlyfOffset, size_t nTableLen)
+{
+ const uint8 * pByte = reinterpret_cast<const uint8 *>(pGlyf);
+ if (OVERFLOW_OFFSET_CHECK(pByte, nGlyfOffset) || nGlyfOffset >= nTableLen - sizeof(Sfnt::Glyph))
+ return NULL;
+ return const_cast<uint8 *>(pByte + nGlyfOffset);
+}
+
+/*----------------------------------------------------------------------------------------------
+ Get the bounding box coordinates for a simple glyf entry (non-composite).
+ Return true if successful, false otherwise.
+----------------------------------------------------------------------------------------------*/
+bool GlyfBox(const void * pSimpleGlyf, int & xMin, int & yMin,
+ int & xMax, int & yMax)
+{
+ const Sfnt::Glyph * pGlyph = reinterpret_cast<const Sfnt::Glyph *>(pSimpleGlyf);
+
+ xMin = be::swap(pGlyph->x_min);
+ yMin = be::swap(pGlyph->y_min);
+ xMax = be::swap(pGlyph->x_max);
+ yMax = be::swap(pGlyph->y_max);
+
+ return true;
+}
+
+#ifdef ALL_TTFUTILS
+/*----------------------------------------------------------------------------------------------
+ Return the number of contours for a simple glyf entry (non-composite)
+ Returning -1 means this is a composite glyph
+----------------------------------------------------------------------------------------------*/
+int GlyfContourCount(const void * pSimpleGlyf)
+{
+ const Sfnt::Glyph * pGlyph = reinterpret_cast<const Sfnt::Glyph *>(pSimpleGlyf);
+ return be::swap(pGlyph->number_of_contours); // -1 means composite glyph
+}
+
+/*----------------------------------------------------------------------------------------------
+ Get the point numbers for the end points of the glyph contours for a simple
+ glyf entry (non-composite).
+ cnPointsTotal - count of contours from GlyfContourCount(); (same as number of end points)
+ prgnContourEndPoints - should point to a buffer large enough to hold cnPoints integers
+ cnPoints - count of points placed in above range
+ Return true if successful, false otherwise.
+ False could indicate a multi-level composite glyphs.
+----------------------------------------------------------------------------------------------*/
+bool GlyfContourEndPoints(const void * pSimpleGlyf, int * prgnContourEndPoint,
+ int cnPointsTotal, int & cnPoints)
+{
+ const Sfnt::SimpleGlyph * pGlyph = reinterpret_cast<const Sfnt::SimpleGlyph *>(pSimpleGlyf);
+
+ int cContours = be::swap(pGlyph->number_of_contours);
+ if (cContours < 0)
+ return false; // this method isn't supposed handle composite glyphs
+
+ for (int i = 0; i < cContours && i < cnPointsTotal; i++)
+ {
+ prgnContourEndPoint[i] = be::swap(pGlyph->end_pts_of_contours[i]);
+ }
+
+ cnPoints = cContours;
+ return true;
+}
+
+/*----------------------------------------------------------------------------------------------
+ Get the points for a simple glyf entry (non-composite)
+ cnPointsTotal - count of points from largest end point obtained from GlyfContourEndPoints
+ prgnX & prgnY - should point to buffers large enough to hold cnPointsTotal integers
+ The ranges are parallel so that coordinates for point(n) are found at offset n in both
+ ranges. This is raw point data with relative coordinates.
+ prgbFlag - should point to a buffer a large enough to hold cnPointsTotal bytes
+ This range is parallel to the prgnX & prgnY
+ cnPoints - count of points placed in above ranges
+ Return true if successful, false otherwise.
+ False could indicate a composite glyph
+----------------------------------------------------------------------------------------------*/
+bool GlyfPoints(const void * pSimpleGlyf, int * prgnX, int * prgnY,
+ char * prgbFlag, int cnPointsTotal, int & cnPoints)
+{
+ using namespace Sfnt;
+
+ const Sfnt::SimpleGlyph * pGlyph = reinterpret_cast<const Sfnt::SimpleGlyph *>(pSimpleGlyf);
+ int cContours = be::swap(pGlyph->number_of_contours);
+ // return false for composite glyph
+ if (cContours <= 0)
+ return false;
+ int cPts = be::swap(pGlyph->end_pts_of_contours[cContours - 1]) + 1;
+ if (cPts > cnPointsTotal)
+ return false;
+
+ // skip over bounding box data & point to byte count of instructions (hints)
+ const uint8 * pbGlyph = reinterpret_cast<const uint8 *>
+ (&pGlyph->end_pts_of_contours[cContours]);
+
+ // skip over hints & point to first flag
+ int cbHints = be::swap(*(uint16 *)pbGlyph);
+ pbGlyph += sizeof(uint16);
+ pbGlyph += cbHints;
+
+ // load flags & point to first x coordinate
+ int iFlag = 0;
+ while (iFlag < cPts)
+ {
+ if (!(*pbGlyph & SimpleGlyph::Repeat))
+ { // flag isn't repeated
+ prgbFlag[iFlag] = (char)*pbGlyph;
+ pbGlyph++;
+ iFlag++;
+ }
+ else
+ { // flag is repeated; count specified by next byte
+ char chFlag = (char)*pbGlyph;
+ pbGlyph++;
+ int cFlags = (int)*pbGlyph;
+ pbGlyph++;
+ prgbFlag[iFlag] = chFlag;
+ iFlag++;
+ for (int i = 0; i < cFlags; i++)
+ {
+ prgbFlag[iFlag + i] = chFlag;
+ }
+ iFlag += cFlags;
+ }
+ }
+ if (iFlag != cPts)
+ return false;
+
+ // load x coordinates
+ iFlag = 0;
+ while (iFlag < cPts)
+ {
+ if (prgbFlag[iFlag] & SimpleGlyph::XShort)
+ {
+ prgnX[iFlag] = *pbGlyph;
+ if (!(prgbFlag[iFlag] & SimpleGlyph::XIsPos))
+ {
+ prgnX[iFlag] = -prgnX[iFlag];
+ }
+ pbGlyph++;
+ }
+ else
+ {
+ if (prgbFlag[iFlag] & SimpleGlyph::XIsSame)
+ {
+ prgnX[iFlag] = 0;
+ // do NOT increment pbGlyph
+ }
+ else
+ {
+ prgnX[iFlag] = be::swap(*(int16 *)pbGlyph);
+ pbGlyph += sizeof(int16);
+ }
+ }
+ iFlag++;
+ }
+
+ // load y coordinates
+ iFlag = 0;
+ while (iFlag < cPts)
+ {
+ if (prgbFlag[iFlag] & SimpleGlyph::YShort)
+ {
+ prgnY[iFlag] = *pbGlyph;
+ if (!(prgbFlag[iFlag] & SimpleGlyph::YIsPos))
+ {
+ prgnY[iFlag] = -prgnY[iFlag];
+ }
+ pbGlyph++;
+ }
+ else
+ {
+ if (prgbFlag[iFlag] & SimpleGlyph::YIsSame)
+ {
+ prgnY[iFlag] = 0;
+ // do NOT increment pbGlyph
+ }
+ else
+ {
+ prgnY[iFlag] = be::swap(*(int16 *)pbGlyph);
+ pbGlyph += sizeof(int16);
+ }
+ }
+ iFlag++;
+ }
+
+ cnPoints = cPts;
+ return true;
+}
+
+/*----------------------------------------------------------------------------------------------
+ Fill prgnCompId with the component Glyph IDs from pSimpleGlyf.
+ Client must allocate space before calling.
+ pSimpleGlyf - assumed to point to a composite glyph
+ cCompIdTotal - the number of elements in prgnCompId
+ cCompId - the total number of Glyph IDs stored in prgnCompId
+ Return true if successful, false otherwise
+ False could indicate a non-composite glyph or the input array was not big enough
+----------------------------------------------------------------------------------------------*/
+bool GetComponentGlyphIds(const void * pSimpleGlyf, int * prgnCompId,
+ size_t cnCompIdTotal, size_t & cnCompId)
+{
+ using namespace Sfnt;
+
+ if (GlyfContourCount(pSimpleGlyf) >= 0)
+ return false;
+
+ const Sfnt::SimpleGlyph * pGlyph = reinterpret_cast<const Sfnt::SimpleGlyph *>(pSimpleGlyf);
+ // for a composite glyph, the special data begins here
+ const uint8 * pbGlyph = reinterpret_cast<const uint8 *>(&pGlyph->end_pts_of_contours[0]);
+
+ uint16 GlyphFlags;
+ size_t iCurrentComp = 0;
+ do
+ {
+ GlyphFlags = be::swap(*((uint16 *)pbGlyph));
+ pbGlyph += sizeof(uint16);
+ prgnCompId[iCurrentComp++] = be::swap(*((uint16 *)pbGlyph));
+ pbGlyph += sizeof(uint16);
+ if (iCurrentComp >= cnCompIdTotal)
+ return false;
+ int nOffset = 0;
+ nOffset += GlyphFlags & CompoundGlyph::Arg1Arg2Words ? 4 : 2;
+ nOffset += GlyphFlags & CompoundGlyph::HaveScale ? 2 : 0;
+ nOffset += GlyphFlags & CompoundGlyph::HaveXAndYScale ? 4 : 0;
+ nOffset += GlyphFlags & CompoundGlyph::HaveTwoByTwo ? 8 : 0;
+ pbGlyph += nOffset;
+ } while (GlyphFlags & CompoundGlyph::MoreComponents);
+
+ cnCompId = iCurrentComp;
+
+ return true;
+}
+
+/*----------------------------------------------------------------------------------------------
+ Return info on how a component glyph is to be placed
+ pSimpleGlyph - assumed to point to a composite glyph
+ nCompId - glyph id for component of interest
+ bOffset - if true, a & b are the x & y offsets for this component
+ if false, b is the point on this component that is attaching to point a on the
+ preceding glyph
+ Return true if successful, false otherwise
+ False could indicate a non-composite glyph or that component wasn't found
+----------------------------------------------------------------------------------------------*/
+bool GetComponentPlacement(const void * pSimpleGlyf, int nCompId,
+ bool fOffset, int & a, int & b)
+{
+ using namespace Sfnt;
+
+ if (GlyfContourCount(pSimpleGlyf) >= 0)
+ return false;
+
+ const Sfnt::SimpleGlyph * pGlyph = reinterpret_cast<const Sfnt::SimpleGlyph *>(pSimpleGlyf);
+ // for a composite glyph, the special data begins here
+ const uint8 * pbGlyph = reinterpret_cast<const uint8 *>(&pGlyph->end_pts_of_contours[0]);
+
+ uint16 GlyphFlags;
+ do
+ {
+ GlyphFlags = be::swap(*((uint16 *)pbGlyph));
+ pbGlyph += sizeof(uint16);
+ if (be::swap(*((uint16 *)pbGlyph)) == nCompId)
+ {
+ pbGlyph += sizeof(uint16); // skip over glyph id of component
+ fOffset = (GlyphFlags & CompoundGlyph::ArgsAreXYValues) == CompoundGlyph::ArgsAreXYValues;
+
+ if (GlyphFlags & CompoundGlyph::Arg1Arg2Words )
+ {
+ a = be::swap(*(int16 *)pbGlyph);
+ pbGlyph += sizeof(int16);
+ b = be::swap(*(int16 *)pbGlyph);
+ pbGlyph += sizeof(int16);
+ }
+ else
+ { // args are signed bytes
+ a = *pbGlyph++;
+ b = *pbGlyph++;
+ }
+ return true;
+ }
+ pbGlyph += sizeof(uint16); // skip over glyph id of component
+ int nOffset = 0;
+ nOffset += GlyphFlags & CompoundGlyph::Arg1Arg2Words ? 4 : 2;
+ nOffset += GlyphFlags & CompoundGlyph::HaveScale ? 2 : 0;
+ nOffset += GlyphFlags & CompoundGlyph::HaveXAndYScale ? 4 : 0;
+ nOffset += GlyphFlags & CompoundGlyph::HaveTwoByTwo ? 8 : 0;
+ pbGlyph += nOffset;
+ } while (GlyphFlags & CompoundGlyph::MoreComponents);
+
+ // didn't find requested component
+ fOffset = true;
+ a = 0;
+ b = 0;
+ return false;
+}
+
+/*----------------------------------------------------------------------------------------------
+ Return info on how a component glyph is to be transformed
+ pSimpleGlyph - assumed to point to a composite glyph
+ nCompId - glyph id for component of interest
+ flt11, flt11, flt11, flt11 - a 2x2 matrix giving the transform
+ bTransOffset - whether to transform the offset from above method
+ The spec is unclear about the meaning of this flag
+ Currently - initialize to true for MS rasterizer and false for Mac rasterizer, then
+ on return it will indicate whether transform should apply to offset (MSDN CD 10/99)
+ Return true if successful, false otherwise
+ False could indicate a non-composite glyph or that component wasn't found
+----------------------------------------------------------------------------------------------*/
+bool GetComponentTransform(const void * pSimpleGlyf, int nCompId,
+ float & flt11, float & flt12, float & flt21, float & flt22,
+ bool & fTransOffset)
+{
+ using namespace Sfnt;
+
+ if (GlyfContourCount(pSimpleGlyf) >= 0)
+ return false;
+
+ const Sfnt::SimpleGlyph * pGlyph = reinterpret_cast<const Sfnt::SimpleGlyph *>(pSimpleGlyf);
+ // for a composite glyph, the special data begins here
+ const uint8 * pbGlyph = reinterpret_cast<const uint8 *>(&pGlyph->end_pts_of_contours[0]);
+
+ uint16 GlyphFlags;
+ do
+ {
+ GlyphFlags = be::swap(*((uint16 *)pbGlyph));
+ pbGlyph += sizeof(uint16);
+ if (be::swap(*((uint16 *)pbGlyph)) == nCompId)
+ {
+ pbGlyph += sizeof(uint16); // skip over glyph id of component
+ pbGlyph += GlyphFlags & CompoundGlyph::Arg1Arg2Words ? 4 : 2; // skip over placement data
+
+ if (fTransOffset) // MS rasterizer
+ fTransOffset = !(GlyphFlags & CompoundGlyph::UnscaledOffset);
+ else // Apple rasterizer
+ fTransOffset = (GlyphFlags & CompoundGlyph::ScaledOffset) != 0;
+
+ if (GlyphFlags & CompoundGlyph::HaveScale)
+ {
+ flt11 = fixed_to_float<14>(be::swap(*(uint16 *)pbGlyph));
+ pbGlyph += sizeof(uint16);
+ flt12 = 0;
+ flt21 = 0;
+ flt22 = flt11;
+ }
+ else if (GlyphFlags & CompoundGlyph::HaveXAndYScale)
+ {
+ flt11 = fixed_to_float<14>(be::swap(*(uint16 *)pbGlyph));
+ pbGlyph += sizeof(uint16);
+ flt12 = 0;
+ flt21 = 0;
+ flt22 = fixed_to_float<14>(be::swap(*(uint16 *)pbGlyph));
+ pbGlyph += sizeof(uint16);
+ }
+ else if (GlyphFlags & CompoundGlyph::HaveTwoByTwo)
+ {
+ flt11 = fixed_to_float<14>(be::swap(*(uint16 *)pbGlyph));
+ pbGlyph += sizeof(uint16);
+ flt12 = fixed_to_float<14>(be::swap(*(uint16 *)pbGlyph));
+ pbGlyph += sizeof(uint16);
+ flt21 = fixed_to_float<14>(be::swap(*(uint16 *)pbGlyph));
+ pbGlyph += sizeof(uint16);
+ flt22 = fixed_to_float<14>(be::swap(*(uint16 *)pbGlyph));
+ pbGlyph += sizeof(uint16);
+ }
+ else
+ { // identity transform
+ flt11 = 1.0;
+ flt12 = 0.0;
+ flt21 = 0.0;
+ flt22 = 1.0;
+ }
+ return true;
+ }
+ pbGlyph += sizeof(uint16); // skip over glyph id of component
+ int nOffset = 0;
+ nOffset += GlyphFlags & CompoundGlyph::Arg1Arg2Words ? 4 : 2;
+ nOffset += GlyphFlags & CompoundGlyph::HaveScale ? 2 : 0;
+ nOffset += GlyphFlags & CompoundGlyph::HaveXAndYScale ? 4 : 0;
+ nOffset += GlyphFlags & CompoundGlyph::HaveTwoByTwo ? 8 : 0;
+ pbGlyph += nOffset;
+ } while (GlyphFlags & CompoundGlyph::MoreComponents);
+
+ // didn't find requested component
+ fTransOffset = false;
+ flt11 = 1;
+ flt12 = 0;
+ flt21 = 0;
+ flt22 = 1;
+ return false;
+}
+#endif
+
+/*----------------------------------------------------------------------------------------------
+ Return a pointer into the glyf table based on the given tables and Glyph ID
+ Since this method doesn't check for spaces, it is good to call IsSpace before using it.
+ Return NULL on error.
+----------------------------------------------------------------------------------------------*/
+void * GlyfLookup(gid16 nGlyphId, const void * pGlyf, const void * pLoca,
+ size_t lGlyfSize, size_t lLocaSize, const void * pHead)
+{
+ // test for valid glyph id
+ // CheckTable verifies the index_to_loc_format is valid
+
+ const Sfnt::FontHeader * pTable
+ = reinterpret_cast<const Sfnt::FontHeader *>(pHead);
+
+ if (be::swap(pTable->index_to_loc_format) == Sfnt::FontHeader::ShortIndexLocFormat)
+ { // loca entries are two bytes (and have been divided by two)
+ if (nGlyphId >= (lLocaSize >> 1) - 1) // don't allow nGlyphId to access sentinel
+ {
+// throw std::out_of_range("glyph id out of range for font");
+ return NULL;
+ }
+ }
+ if (be::swap(pTable->index_to_loc_format) == Sfnt::FontHeader::LongIndexLocFormat)
+ { // loca entries are four bytes
+ if (nGlyphId >= (lLocaSize >> 2) - 1)
+ {
+// throw std::out_of_range("glyph id out of range for font");
+ return NULL;
+ }
+ }
+
+ size_t lGlyfOffset = LocaLookup(nGlyphId, pLoca, lLocaSize, pHead);
+ void * pSimpleGlyf = GlyfLookup(pGlyf, lGlyfOffset, lGlyfSize); // invalid loca offset returns null
+ return pSimpleGlyf;
+}
+
+#ifdef ALL_TTFUTILS
+/*----------------------------------------------------------------------------------------------
+ Determine if a particular Glyph ID has any data in the glyf table. If it is white space,
+ there will be no glyf data, though there will be metric data in hmtx, etc.
+----------------------------------------------------------------------------------------------*/
+bool IsSpace(gid16 nGlyphId, const void * pLoca, size_t lLocaSize, const void * pHead)
+{
+ size_t lGlyfOffset = LocaLookup(nGlyphId, pLoca, lLocaSize, pHead);
+
+ // the +1 should always work because there is a sentinel value at the end of the loca table
+ size_t lNextGlyfOffset = LocaLookup(nGlyphId + 1, pLoca, lLocaSize, pHead);
+
+ return (lNextGlyfOffset - lGlyfOffset) == 0;
+}
+
+/*----------------------------------------------------------------------------------------------
+ Determine if a particular Glyph ID is a multi-level composite.
+----------------------------------------------------------------------------------------------*/
+bool IsDeepComposite(gid16 nGlyphId, const void * pGlyf, const void * pLoca,
+ size_t lGlyfSize, long lLocaSize, const void * pHead)
+{
+ if (IsSpace(nGlyphId, pLoca, lLocaSize, pHead)) {return false;}
+
+ void * pSimpleGlyf = GlyfLookup(nGlyphId, pGlyf, pLoca, lGlyfSize, lLocaSize, pHead);
+ if (pSimpleGlyf == NULL)
+ return false; // no way to really indicate an error occured here
+
+ if (GlyfContourCount(pSimpleGlyf) >= 0)
+ return false;
+
+ int rgnCompId[kMaxGlyphComponents]; // assumes only a limited number of glyph components
+ size_t cCompIdTotal = kMaxGlyphComponents;
+ size_t cCompId = 0;
+
+ if (!GetComponentGlyphIds(pSimpleGlyf, rgnCompId, cCompIdTotal, cCompId))
+ return false;
+
+ for (size_t i = 0; i < cCompId; i++)
+ {
+ pSimpleGlyf = GlyfLookup(static_cast<gid16>(rgnCompId[i]),
+ pGlyf, pLoca, lGlyfSize, lLocaSize, pHead);
+ if (pSimpleGlyf == NULL) {return false;}
+
+ if (GlyfContourCount(pSimpleGlyf) < 0)
+ return true;
+ }
+
+ return false;
+}
+
+/*----------------------------------------------------------------------------------------------
+ Get the bounding box coordinates based on the given tables and Glyph ID
+ Handles both simple and composite glyphs.
+ Return true if successful, false otherwise. On false, all point values will be INT_MIN
+ False may indicate a white space glyph
+----------------------------------------------------------------------------------------------*/
+bool GlyfBox(gid16 nGlyphId, const void * pGlyf, const void * pLoca,
+ size_t lGlyfSize, size_t lLocaSize, const void * pHead, int & xMin, int & yMin, int & xMax, int & yMax)
+{
+ xMin = yMin = xMax = yMax = INT_MIN;
+
+ if (IsSpace(nGlyphId, pLoca, lLocaSize, pHead)) {return false;}
+
+ void * pSimpleGlyf = GlyfLookup(nGlyphId, pGlyf, pLoca, lGlyfSize, lLocaSize, pHead);
+ if (pSimpleGlyf == NULL) {return false;}
+
+ return GlyfBox(pSimpleGlyf, xMin, yMin, xMax, yMax);
+}
+
+/*----------------------------------------------------------------------------------------------
+ Get the number of contours based on the given tables and Glyph ID
+ Handles both simple and composite glyphs.
+ Return true if successful, false otherwise. On false, cnContours will be INT_MIN
+ False may indicate a white space glyph or a multi-level composite glyph.
+----------------------------------------------------------------------------------------------*/
+bool GlyfContourCount(gid16 nGlyphId, const void * pGlyf, const void * pLoca,
+ size_t lGlyfSize, size_t lLocaSize, const void * pHead, size_t & cnContours)
+{
+ cnContours = static_cast<size_t>(INT_MIN);
+
+ if (IsSpace(nGlyphId, pLoca, lLocaSize, pHead)) {return false;}
+
+ void * pSimpleGlyf = GlyfLookup(nGlyphId, pGlyf, pLoca, lGlyfSize, lLocaSize, pHead);
+ if (pSimpleGlyf == NULL) {return false;}
+
+ int cRtnContours = GlyfContourCount(pSimpleGlyf);
+ if (cRtnContours >= 0)
+ {
+ cnContours = size_t(cRtnContours);
+ return true;
+ }
+
+ //handle composite glyphs
+
+ int rgnCompId[kMaxGlyphComponents]; // assumes no glyph will be made of more than 8 components
+ size_t cCompIdTotal = kMaxGlyphComponents;
+ size_t cCompId = 0;
+
+ if (!GetComponentGlyphIds(pSimpleGlyf, rgnCompId, cCompIdTotal, cCompId))
+ return false;
+
+ cRtnContours = 0;
+ int cTmp = 0;
+ for (size_t i = 0; i < cCompId; i++)
+ {
+ if (IsSpace(static_cast<gid16>(rgnCompId[i]), pLoca, lLocaSize, pHead)) {return false;}
+ pSimpleGlyf = GlyfLookup(static_cast<gid16>(rgnCompId[i]),
+ pGlyf, pLoca, lGlyfSize, lLocaSize, pHead);
+ if (pSimpleGlyf == 0) {return false;}
+ // return false on multi-level composite
+ if ((cTmp = GlyfContourCount(pSimpleGlyf)) < 0)
+ return false;
+ cRtnContours += cTmp;
+ }
+
+ cnContours = size_t(cRtnContours);
+ return true;
+}
+
+/*----------------------------------------------------------------------------------------------
+ Get the point numbers for the end points of the glyph contours based on the given tables
+ and Glyph ID
+ Handles both simple and composite glyphs.
+ cnPoints - count of contours from GlyfContourCount (same as number of end points)
+ prgnContourEndPoints - should point to a buffer large enough to hold cnPoints integers
+ Return true if successful, false otherwise. On false, all end points are INT_MIN
+ False may indicate a white space glyph or a multi-level composite glyph.
+----------------------------------------------------------------------------------------------*/
+bool GlyfContourEndPoints(gid16 nGlyphId, const void * pGlyf, const void * pLoca,
+ size_t lGlyfSize, size_t lLocaSize, const void * pHead,
+ int * prgnContourEndPoint, size_t cnPoints)
+{
+ memset(prgnContourEndPoint, 0xFF, cnPoints * sizeof(int));
+ // std::fill_n(prgnContourEndPoint, cnPoints, INT_MIN);
+
+ if (IsSpace(nGlyphId, pLoca, lLocaSize, pHead)) {return false;}
+
+ void * pSimpleGlyf = GlyfLookup(nGlyphId, pGlyf, pLoca, lGlyfSize, lLocaSize, pHead);
+ if (pSimpleGlyf == NULL) {return false;}
+
+ int cContours = GlyfContourCount(pSimpleGlyf);
+ int cActualPts = 0;
+ if (cContours > 0)
+ return GlyfContourEndPoints(pSimpleGlyf, prgnContourEndPoint, cnPoints, cActualPts);
+
+ // handle composite glyphs
+
+ int rgnCompId[kMaxGlyphComponents]; // assumes no glyph will be made of more than 8 components
+ size_t cCompIdTotal = kMaxGlyphComponents;
+ size_t cCompId = 0;
+
+ if (!GetComponentGlyphIds(pSimpleGlyf, rgnCompId, cCompIdTotal, cCompId))
+ return false;
+
+ int * prgnCurrentEndPoint = prgnContourEndPoint;
+ int cCurrentPoints = cnPoints;
+ int nPrevPt = 0;
+ for (size_t i = 0; i < cCompId; i++)
+ {
+ if (IsSpace(static_cast<gid16>(rgnCompId[i]), pLoca, lLocaSize, pHead)) {return false;}
+ pSimpleGlyf = GlyfLookup(static_cast<gid16>(rgnCompId[i]), pGlyf, pLoca, lGlyfSize, lLocaSize, pHead);
+ if (pSimpleGlyf == NULL) {return false;}
+ // returns false on multi-level composite
+ if (!GlyfContourEndPoints(pSimpleGlyf, prgnCurrentEndPoint, cCurrentPoints, cActualPts))
+ return false;
+ // points in composite are numbered sequentially as components are added
+ // must adjust end point numbers for new point numbers
+ for (int j = 0; j < cActualPts; j++)
+ prgnCurrentEndPoint[j] += nPrevPt;
+ nPrevPt = prgnCurrentEndPoint[cActualPts - 1] + 1;
+
+ prgnCurrentEndPoint += cActualPts;
+ cCurrentPoints -= cActualPts;
+ }
+
+ return true;
+}
+
+/*----------------------------------------------------------------------------------------------
+ Get the points for a glyph based on the given tables and Glyph ID
+ Handles both simple and composite glyphs.
+ cnPoints - count of points from largest end point obtained from GlyfContourEndPoints
+ prgnX & prgnY - should point to buffers large enough to hold cnPoints integers
+ The ranges are parallel so that coordinates for point(n) are found at offset n in
+ both ranges. These points are in absolute coordinates.
+ prgfOnCurve - should point to a buffer a large enough to hold cnPoints bytes (bool)
+ This range is parallel to the prgnX & prgnY
+ Return true if successful, false otherwise. On false, all points may be INT_MIN
+ False may indicate a white space glyph, a multi-level composite, or a corrupt font
+ It's not clear from the TTF spec when the transforms should be applied. Should the
+ transform be done before or after attachment point calcs? (current code - before)
+ Should the transform be applied to other offsets? (currently - no; however commented
+ out code is in place so that if CompoundGlyph::UnscaledOffset on the MS rasterizer is
+ clear (typical) then yes, and if CompoundGlyph::ScaledOffset on the Apple rasterizer is
+ clear (typical?) then no). See GetComponentTransform.
+ It's also unclear where point numbering with attachment poinst starts
+ (currently - first point number is relative to whole glyph, second point number is
+ relative to current glyph).
+----------------------------------------------------------------------------------------------*/
+bool GlyfPoints(gid16 nGlyphId, const void * pGlyf,
+ const void * pLoca, size_t lGlyfSize, size_t lLocaSize, const void * pHead,
+ const int * /*prgnContourEndPoint*/, size_t /*cnEndPoints*/,
+ int * prgnX, int * prgnY, bool * prgfOnCurve, size_t cnPoints)
+{
+ memset(prgnX, 0x7F, cnPoints * sizeof(int));
+ memset(prgnY, 0x7F, cnPoints * sizeof(int));
+
+ if (IsSpace(nGlyphId, pLoca, lLocaSize, pHead))
+ return false;
+
+ void * pSimpleGlyf = GlyfLookup(nGlyphId, pGlyf, pLoca, lGlyfSize, lLocaSize, pHead);
+ if (pSimpleGlyf == NULL)
+ return false;
+
+ int cContours = GlyfContourCount(pSimpleGlyf);
+ int cActualPts;
+ if (cContours > 0)
+ {
+ if (!GlyfPoints(pSimpleGlyf, prgnX, prgnY, (char *)prgfOnCurve, cnPoints, cActualPts))
+ return false;
+ CalcAbsolutePoints(prgnX, prgnY, cnPoints);
+ SimplifyFlags((char *)prgfOnCurve, cnPoints);
+ return true;
+ }
+
+ // handle composite glyphs
+ int rgnCompId[kMaxGlyphComponents]; // assumes no glyph will be made of more than 8 components
+ size_t cCompIdTotal = kMaxGlyphComponents;
+ size_t cCompId = 0;
+
+ // this will fail if there are more components than there is room for
+ if (!GetComponentGlyphIds(pSimpleGlyf, rgnCompId, cCompIdTotal, cCompId))
+ return false;
+
+ int * prgnCurrentX = prgnX;
+ int * prgnCurrentY = prgnY;
+ char * prgbCurrentFlag = (char *)prgfOnCurve; // converting bool to char should be safe
+ int cCurrentPoints = cnPoints;
+ bool fOffset = true, fTransOff = true;
+ int a, b;
+ float flt11, flt12, flt21, flt22;
+ // int * prgnPrevX = prgnX; // in case first att pt number relative to preceding glyph
+ // int * prgnPrevY = prgnY;
+ for (size_t i = 0; i < cCompId; i++)
+ {
+ if (IsSpace(static_cast<gid16>(rgnCompId[i]), pLoca, lLocaSize, pHead)) {return false;}
+ void * pCompGlyf = GlyfLookup(static_cast<gid16>(rgnCompId[i]), pGlyf, pLoca, lGlyfSize, lLocaSize, pHead);
+ if (pCompGlyf == NULL) {return false;}
+ // returns false on multi-level composite
+ if (!GlyfPoints(pCompGlyf, prgnCurrentX, prgnCurrentY, prgbCurrentFlag,
+ cCurrentPoints, cActualPts))
+ return false;
+ if (!GetComponentPlacement(pSimpleGlyf, rgnCompId[i], fOffset, a, b))
+ return false;
+ if (!GetComponentTransform(pSimpleGlyf, rgnCompId[i],
+ flt11, flt12, flt21, flt22, fTransOff))
+ return false;
+ bool fIdTrans = flt11 == 1.0 && flt12 == 0.0 && flt21 == 0.0 && flt22 == 1.0;
+
+ // convert points to absolute coordinates
+ // do before transform and attachment point placement are applied
+ CalcAbsolutePoints(prgnCurrentX, prgnCurrentY, cActualPts);
+
+ // apply transform - see main method note above
+ // do before attachment point calcs
+ if (!fIdTrans)
+ for (int j = 0; j < cActualPts; j++)
+ {
+ int x = prgnCurrentX[j]; // store before transform applied
+ int y = prgnCurrentY[j];
+ prgnCurrentX[j] = (int)(x * flt11 + y * flt12);
+ prgnCurrentY[j] = (int)(x * flt21 + y * flt22);
+ }
+
+ // apply placement - see main method note above
+ int nXOff, nYOff;
+ if (fOffset) // explicit x & y offsets
+ {
+ /* ignore fTransOff for now
+ if (fTransOff && !fIdTrans)
+ { // transform x & y offsets
+ nXOff = (int)(a * flt11 + b * flt12);
+ nYOff = (int)(a * flt21 + b * flt22);
+ }
+ else */
+ { // don't transform offset
+ nXOff = a;
+ nYOff = b;
+ }
+ }
+ else // attachment points
+ { // in case first point is relative to preceding glyph and second relative to current
+ // nXOff = prgnPrevX[a] - prgnCurrentX[b];
+ // nYOff = prgnPrevY[a] - prgnCurrentY[b];
+ // first point number relative to whole composite, second relative to current glyph
+ nXOff = prgnX[a] - prgnCurrentX[b];
+ nYOff = prgnY[a] - prgnCurrentY[b];
+ }
+ for (int j = 0; j < cActualPts; j++)
+ {
+ prgnCurrentX[j] += nXOff;
+ prgnCurrentY[j] += nYOff;
+ }
+
+ // prgnPrevX = prgnCurrentX;
+ // prgnPrevY = prgnCurrentY;
+ prgnCurrentX += cActualPts;
+ prgnCurrentY += cActualPts;
+ prgbCurrentFlag += cActualPts;
+ cCurrentPoints -= cActualPts;
+ }
+
+ SimplifyFlags((char *)prgfOnCurve, cnPoints);
+
+ return true;
+}
+
+/*----------------------------------------------------------------------------------------------
+ Simplify the meaning of flags to just indicate whether point is on-curve or off-curve.
+---------------------------------------------------------------------------------------------*/
+bool SimplifyFlags(char * prgbFlags, int cnPoints)
+{
+ for (int i = 0; i < cnPoints; i++)
+ prgbFlags[i] = static_cast<char>(prgbFlags[i] & Sfnt::SimpleGlyph::OnCurve);
+ return true;
+}
+
+/*----------------------------------------------------------------------------------------------
+ Convert relative point coordinates to absolute coordinates
+ Points are stored in the font such that they are offsets from one another except for the
+ first point of a glyph.
+---------------------------------------------------------------------------------------------*/
+bool CalcAbsolutePoints(int * prgnX, int * prgnY, int cnPoints)
+{
+ int nX = prgnX[0];
+ int nY = prgnY[0];
+ for (int i = 1; i < cnPoints; i++)
+ {
+ prgnX[i] += nX;
+ nX = prgnX[i];
+ prgnY[i] += nY;
+ nY = prgnY[i];
+ }
+
+ return true;
+}
+#endif
+
+/*----------------------------------------------------------------------------------------------
+ Return the length of the 'name' table in bytes.
+ Currently used.
+---------------------------------------------------------------------------------------------*/
+#if 0
+size_t NameTableLength(const byte * pTable)
+{
+ byte * pb = (const_cast<byte *>(pTable)) + 2; // skip format
+ size_t cRecords = *pb++ << 8; cRecords += *pb++;
+ int dbStringOffset0 = (*pb++) << 8; dbStringOffset0 += *pb++;
+ int dbMaxStringOffset = 0;
+ for (size_t irec = 0; irec < cRecords; irec++)
+ {
+ int nPlatform = (*pb++) << 8; nPlatform += *pb++;
+ int nEncoding = (*pb++) << 8; nEncoding += *pb++;
+ int nLanguage = (*pb++) << 8; nLanguage += *pb++;
+ int nName = (*pb++) << 8; nName += *pb++;
+ int cbStringLen = (*pb++) << 8; cbStringLen += *pb++;
+ int dbStringOffset = (*pb++) << 8; dbStringOffset += *pb++;
+ if (dbMaxStringOffset < dbStringOffset + cbStringLen)
+ dbMaxStringOffset = dbStringOffset + cbStringLen;
+ }
+ return dbStringOffset0 + dbMaxStringOffset;
+}
+#endif
+
+} // end of namespace TtfUtil
+} // end of namespace graphite
diff --git a/thirdparty/graphite/src/UtfCodec.cpp b/thirdparty/graphite/src/UtfCodec.cpp
new file mode 100644
index 0000000000..a944bbf9d0
--- /dev/null
+++ b/thirdparty/graphite/src/UtfCodec.cpp
@@ -0,0 +1,45 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#include "inc/UtfCodec.h"
+//using namespace graphite2;
+
+namespace graphite2 {
+
+}
+
+using namespace graphite2;
+
+const int8 _utf_codec<8>::sz_lut[16] =
+{
+ 1,1,1,1,1,1,1,1, // 1 byte
+ 0,0,0,0, // trailing byte
+ 2,2, // 2 bytes
+ 3, // 3 bytes
+ 4 // 4 bytes
+};
+
+const byte _utf_codec<8>::mask_lut[5] = {0x7f, 0xff, 0x3f, 0x1f, 0x0f};
diff --git a/thirdparty/graphite/src/call_machine.cpp b/thirdparty/graphite/src/call_machine.cpp
new file mode 100644
index 0000000000..fcd8a0c2c1
--- /dev/null
+++ b/thirdparty/graphite/src/call_machine.cpp
@@ -0,0 +1,138 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+// This call threaded interpreter implmentation for machine.h
+// Author: Tim Eves
+
+// Build either this interpreter or the direct_machine implementation.
+// The call threaded interpreter is portable across compilers and
+// architectures as well as being useful to debug (you can set breakpoints on
+// opcodes) but is slower that the direct threaded interpreter by a factor of 2
+
+#include <cassert>
+#include <cstring>
+#include <graphite2/Segment.h>
+#include "inc/Machine.h"
+#include "inc/Segment.h"
+#include "inc/Slot.h"
+#include "inc/Rule.h"
+
+// Disable the unused parameter warning as th compiler is mistaken since dp
+// is always updated (even if by 0) on every opcode.
+#ifdef __GNUC__
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#endif
+
+#define registers const byte * & dp, vm::Machine::stack_t * & sp, \
+ vm::Machine::stack_t * const sb, regbank & reg
+
+// These are required by opcodes.h and should not be changed
+#define STARTOP(name) bool name(registers) REGPARM(4);\
+ bool name(registers) {
+#define ENDOP return (sp - sb)/Machine::STACK_MAX==0; \
+ }
+
+#define EXIT(status) { push(status); return false; }
+
+// This is required by opcode_table.h
+#define do_(name) instr(name)
+
+
+using namespace graphite2;
+using namespace vm;
+
+struct regbank {
+ slotref is;
+ slotref * map;
+ SlotMap & smap;
+ slotref * const map_base;
+ const instr * & ip;
+ uint8 direction;
+ int8 flags;
+ Machine::status_t & status;
+};
+
+typedef bool (* ip_t)(registers);
+
+// Pull in the opcode definitions
+// We pull these into a private namespace so these otherwise common names dont
+// pollute the toplevel namespace.
+namespace {
+#define smap reg.smap
+#define seg smap.segment
+#define is reg.is
+#define ip reg.ip
+#define map reg.map
+#define mapb reg.map_base
+#define flags reg.flags
+#define dir reg.direction
+#define status reg.status
+
+#include "inc/opcodes.h"
+
+#undef smap
+#undef seg
+#undef is
+#undef ip
+#undef map
+#undef mapb
+#undef flags
+#undef dir
+}
+
+Machine::stack_t Machine::run(const instr * program,
+ const byte * data,
+ slotref * & map)
+
+{
+ assert(program != 0);
+
+ // Declare virtual machine registers
+ const instr * ip = program-1;
+ const byte * dp = data;
+ stack_t * sp = _stack + Machine::STACK_GUARD,
+ * const sb = sp;
+ regbank reg = {*map, map, _map, _map.begin()+_map.context(), ip, _map.dir(), 0, _status};
+
+ // Run the program
+ while ((reinterpret_cast<ip_t>(*++ip))(dp, sp, sb, reg)) {}
+ const stack_t ret = sp == _stack+STACK_GUARD+1 ? *sp-- : 0;
+
+ check_final_stack(sp);
+ map = reg.map;
+ *map = reg.is;
+ return ret;
+}
+
+// Pull in the opcode table
+namespace {
+#include "inc/opcode_table.h"
+}
+
+const opcode_t * Machine::getOpcodeTable() throw()
+{
+ return opcode_table;
+}
diff --git a/thirdparty/graphite/src/direct_machine.cpp b/thirdparty/graphite/src/direct_machine.cpp
new file mode 100644
index 0000000000..86206cfe37
--- /dev/null
+++ b/thirdparty/graphite/src/direct_machine.cpp
@@ -0,0 +1,140 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+// This direct threaded interpreter implmentation for machine.h
+// Author: Tim Eves
+
+// Build either this interpreter or the call_machine implementation.
+// The direct threaded interpreter is relies upon a gcc feature called
+// labels-as-values so is only portable to compilers that support the
+// extension (gcc only as far as I know) however it should build on any
+// architecture gcc supports.
+// This is twice as fast as the call threaded model and is likely faster on
+// inorder processors with short pipelines and little branch prediction such
+// as the ARM and possibly Atom chips.
+
+
+#include <cassert>
+#include <cstring>
+#include "inc/Machine.h"
+#include "inc/Segment.h"
+#include "inc/Slot.h"
+#include "inc/Rule.h"
+
+#define STARTOP(name) name: {
+#define ENDOP }; goto *((sp - sb)/Machine::STACK_MAX ? &&end : *++ip);
+#define EXIT(status) { push(status); goto end; }
+
+#define do_(name) &&name
+
+
+using namespace graphite2;
+using namespace vm;
+
+namespace {
+
+// The GCC manual has this to say about labels as values:
+// The &&foo expressions for the same label might have different values
+// if the containing function is inlined or cloned. If a program relies
+// on them being always the same, __attribute__((__noinline__,__noclone__))
+// should be used to prevent inlining and cloning.
+//
+// is_return in Code.cpp relies on being able to do comparisons, so it needs
+// them to be always the same.
+//
+// The GCC manual further adds:
+// If &&foo is used in a static variable initializer, inlining and
+// cloning is forbidden.
+//
+// In this file, &&foo *is* used in a static variable initializer, and it's not
+// entirely clear whether this should prevent inlining of the function or not.
+// In practice, though, clang 7 can end up inlining the function with ThinLTO,
+// which breaks at least is_return. https://bugs.llvm.org/show_bug.cgi?id=39241
+// So all in all, we need at least the __noinline__ attribute. __noclone__
+// is not supported by clang.
+__attribute__((__noinline__))
+const void * direct_run(const bool get_table_mode,
+ const instr * program,
+ const byte * data,
+ Machine::stack_t * stack,
+ slotref * & __map,
+ uint8 _dir,
+ Machine::status_t & status,
+ SlotMap * __smap=0)
+{
+ // We need to define and return to opcode table from within this function
+ // other inorder to take the addresses of the instruction bodies.
+ #include "inc/opcode_table.h"
+ if (get_table_mode)
+ return opcode_table;
+
+ // Declare virtual machine registers
+ const instr * ip = program;
+ const byte * dp = data;
+ Machine::stack_t * sp = stack + Machine::STACK_GUARD,
+ * const sb = sp;
+ SlotMap & smap = *__smap;
+ Segment & seg = smap.segment;
+ slotref is = *__map,
+ * map = __map,
+ * const mapb = smap.begin()+smap.context();
+ uint8 dir = _dir;
+ int8 flags = 0;
+
+ // start the program
+ goto **ip;
+
+ // Pull in the opcode definitions
+ #include "inc/opcodes.h"
+
+ end:
+ __map = map;
+ *__map = is;
+ return sp;
+}
+
+}
+
+const opcode_t * Machine::getOpcodeTable() throw()
+{
+ slotref * dummy;
+ Machine::status_t dumstat = Machine::finished;
+ return static_cast<const opcode_t *>(direct_run(true, 0, 0, 0, dummy, 0, dumstat));
+}
+
+
+Machine::stack_t Machine::run(const instr * program,
+ const byte * data,
+ slotref * & is)
+{
+ assert(program != 0);
+
+ const stack_t *sp = static_cast<const stack_t *>(
+ direct_run(false, program, data, _stack, is, _map.dir(), _status, &_map));
+ const stack_t ret = sp == _stack+STACK_GUARD+1 ? *sp-- : 0;
+ check_final_stack(sp);
+ return ret;
+}
diff --git a/thirdparty/graphite/src/gr_char_info.cpp b/thirdparty/graphite/src/gr_char_info.cpp
new file mode 100644
index 0000000000..612f9ba694
--- /dev/null
+++ b/thirdparty/graphite/src/gr_char_info.cpp
@@ -0,0 +1,65 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#include <cassert>
+#include "graphite2/Segment.h"
+#include "inc/CharInfo.h"
+
+extern "C"
+{
+
+unsigned int gr_cinfo_unicode_char(const gr_char_info* p/*not NULL*/)
+{
+ assert(p);
+ return p->unicodeChar();
+}
+
+
+int gr_cinfo_break_weight(const gr_char_info* p/*not NULL*/)
+{
+ assert(p);
+ return p->breakWeight();
+}
+
+int gr_cinfo_after(const gr_char_info *p/*not NULL*/)
+{
+ assert(p);
+ return p->after();
+}
+
+int gr_cinfo_before(const gr_char_info *p/*not NULL*/)
+{
+ assert(p);
+ return p->before();
+}
+
+size_t gr_cinfo_base(const gr_char_info *p/*not NULL*/)
+{
+ assert(p);
+ return p->base();
+}
+
+} // extern "C"
diff --git a/thirdparty/graphite/src/gr_face.cpp b/thirdparty/graphite/src/gr_face.cpp
new file mode 100644
index 0000000000..baa469727b
--- /dev/null
+++ b/thirdparty/graphite/src/gr_face.cpp
@@ -0,0 +1,267 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#include "graphite2/Font.h"
+#include "inc/Face.h"
+#include "inc/FileFace.h"
+#include "inc/GlyphCache.h"
+#include "inc/CmapCache.h"
+#include "inc/Silf.h"
+#include "inc/json.h"
+
+using namespace graphite2;
+
+#if !defined GRAPHITE2_NTRACING
+extern json *global_log;
+#endif
+
+namespace
+{
+ bool load_face(Face & face, unsigned int options)
+ {
+#ifdef GRAPHITE2_TELEMETRY
+ telemetry::category _misc_cat(face.tele.misc);
+#endif
+ Face::Table silf(face, Tag::Silf, 0x00050000);
+ if (!silf)
+ return false;
+
+ if (!face.readGlyphs(options))
+ return false;
+
+ if (silf)
+ {
+ if (!face.readFeatures() || !face.readGraphite(silf))
+ {
+#if !defined GRAPHITE2_NTRACING
+ if (global_log)
+ {
+ *global_log << json::object
+ << "type" << "fontload"
+ << "failure" << face.error()
+ << "context" << face.error_context()
+ << json::close;
+ }
+#endif
+ return false;
+ }
+ else
+ return true;
+ }
+ else
+ return false;
+ }
+
+ inline
+ uint32 zeropad(const uint32 x)
+ {
+ if (x == 0x20202020) return 0;
+ if ((x & 0x00FFFFFF) == 0x00202020) return x & 0xFF000000;
+ if ((x & 0x0000FFFF) == 0x00002020) return x & 0xFFFF0000;
+ if ((x & 0x000000FF) == 0x00000020) return x & 0xFFFFFF00;
+ return x;
+ }
+}
+
+extern "C" {
+
+gr_face* gr_make_face_with_ops(const void* appFaceHandle/*non-NULL*/, const gr_face_ops *ops, unsigned int faceOptions)
+ //the appFaceHandle must stay alive all the time when the gr_face is alive. When finished with the gr_face, call destroy_face
+{
+ if (ops == 0) return 0;
+
+ Face *res = new Face(appFaceHandle, *ops);
+ if (res && load_face(*res, faceOptions))
+ return static_cast<gr_face *>(res);
+
+ delete res;
+ return 0;
+}
+
+gr_face* gr_make_face(const void* appFaceHandle/*non-NULL*/, gr_get_table_fn tablefn, unsigned int faceOptions)
+{
+ const gr_face_ops ops = {sizeof(gr_face_ops), tablefn, NULL};
+ return gr_make_face_with_ops(appFaceHandle, &ops, faceOptions);
+}
+
+
+gr_face* gr_make_face_with_seg_cache_and_ops(const void* appFaceHandle/*non-NULL*/, const gr_face_ops *ops, unsigned int , unsigned int faceOptions)
+{
+ return gr_make_face_with_ops(appFaceHandle, ops, faceOptions);
+}
+
+gr_face* gr_make_face_with_seg_cache(const void* appFaceHandle/*non-NULL*/, gr_get_table_fn tablefn, unsigned int, unsigned int faceOptions)
+{
+ const gr_face_ops ops = {sizeof(gr_face_ops), tablefn, NULL};
+ return gr_make_face_with_ops(appFaceHandle, &ops, faceOptions);
+}
+
+gr_uint32 gr_str_to_tag(const char *str)
+{
+ uint32 res = 0;
+ switch(max(strlen(str),size_t(4)))
+ {
+ case 4: res |= str[3]; GR_FALLTHROUGH;
+ case 3: res |= str[2] << 8; GR_FALLTHROUGH;
+ case 2: res |= str[1] << 16; GR_FALLTHROUGH;
+ case 1: res |= str[0] << 24; GR_FALLTHROUGH;
+ default: break;
+ }
+ return res;
+}
+
+void gr_tag_to_str(gr_uint32 tag, char *str)
+{
+ if (!str) return;
+
+ *str++ = char(tag >> 24);
+ *str++ = char(tag >> 16);
+ *str++ = char(tag >> 8);
+ *str++ = char(tag);
+ *str = '\0';
+}
+
+gr_feature_val* gr_face_featureval_for_lang(const gr_face* pFace, gr_uint32 langname/*0 means clone default*/) //clones the features. if none for language, clones the default
+{
+ assert(pFace);
+ langname = zeropad(langname);
+ return static_cast<gr_feature_val *>(pFace->theSill().cloneFeatures(langname));
+}
+
+
+const gr_feature_ref* gr_face_find_fref(const gr_face* pFace, gr_uint32 featId) //When finished with the FeatureRef, call destroy_FeatureRef
+{
+ assert(pFace);
+ featId = zeropad(featId);
+ const FeatureRef* pRef = pFace->featureById(featId);
+ return static_cast<const gr_feature_ref*>(pRef);
+}
+
+unsigned short gr_face_n_fref(const gr_face* pFace)
+{
+ assert(pFace);
+ int res = 0;
+ for (int i = 0; i < pFace->numFeatures(); ++i)
+ if (!(pFace->feature(i)->getFlags() & FeatureRef::HIDDEN))
+ ++res;
+ return res;
+}
+
+const gr_feature_ref* gr_face_fref(const gr_face* pFace, gr_uint16 i) //When finished with the FeatureRef, call destroy_FeatureRef
+{
+ assert(pFace);
+ int count = 0;
+ for (int j = 0; j < pFace->numFeatures(); ++j)
+ {
+ const FeatureRef* pRef = pFace->feature(j);
+ if (!(pRef->getFlags() & FeatureRef::HIDDEN))
+ if (count++ == i)
+ return static_cast<const gr_feature_ref*>(pRef);
+ }
+ return 0;
+}
+
+unsigned short gr_face_n_languages(const gr_face* pFace)
+{
+ assert(pFace);
+ return pFace->theSill().numLanguages();
+}
+
+gr_uint32 gr_face_lang_by_index(const gr_face* pFace, gr_uint16 i)
+{
+ assert(pFace);
+ return pFace->theSill().getLangName(i);
+}
+
+
+void gr_face_destroy(gr_face *face)
+{
+ delete static_cast<Face*>(face);
+}
+
+
+gr_uint16 gr_face_name_lang_for_locale(gr_face *face, const char * locale)
+{
+ if (face)
+ {
+ return face->languageForLocale(locale);
+ }
+ return 0;
+}
+
+unsigned short gr_face_n_glyphs(const gr_face* pFace)
+{
+ return pFace->glyphs().numGlyphs();
+}
+
+const gr_faceinfo *gr_face_info(const gr_face *pFace, gr_uint32 script)
+{
+ if (!pFace) return 0;
+ const Silf *silf = pFace->chooseSilf(script);
+ if (silf) return silf->silfInfo();
+ return 0;
+}
+
+int gr_face_is_char_supported(const gr_face* pFace, gr_uint32 usv, gr_uint32 script)
+{
+ const Cmap & cmap = pFace->cmap();
+ gr_uint16 gid = cmap[usv];
+ if (!gid)
+ {
+ const Silf * silf = pFace->chooseSilf(script);
+ gid = silf->findPseudo(usv);
+ }
+ return (gid != 0);
+}
+
+#ifndef GRAPHITE2_NFILEFACE
+gr_face* gr_make_file_face(const char *filename, unsigned int faceOptions)
+{
+ FileFace* pFileFace = new FileFace(filename);
+ if (*pFileFace)
+ {
+ gr_face* pRes = gr_make_face_with_ops(pFileFace, &FileFace::ops, faceOptions);
+ if (pRes)
+ {
+ pRes->takeFileFace(pFileFace); //takes ownership
+ return pRes;
+ }
+ }
+
+ //error when loading
+
+ delete pFileFace;
+ return NULL;
+}
+
+gr_face* gr_make_file_face_with_seg_cache(const char* filename, unsigned int, unsigned int faceOptions) //returns NULL on failure. //TBD better error handling
+ //when finished with, call destroy_face
+{
+ return gr_make_file_face(filename, faceOptions);
+}
+#endif //!GRAPHITE2_NFILEFACE
+
+} // extern "C"
diff --git a/thirdparty/graphite/src/gr_features.cpp b/thirdparty/graphite/src/gr_features.cpp
new file mode 100644
index 0000000000..a560e053f2
--- /dev/null
+++ b/thirdparty/graphite/src/gr_features.cpp
@@ -0,0 +1,138 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#include "graphite2/Font.h"
+#include "inc/Face.h"
+#include "inc/FeatureMap.h"
+#include "inc/FeatureVal.h"
+#include "inc/NameTable.h"
+
+using namespace graphite2;
+
+extern "C" {
+
+
+gr_uint16 gr_fref_feature_value(const gr_feature_ref* pfeatureref, const gr_feature_val* feats) //returns 0 if either pointer is NULL
+{
+ if (!pfeatureref || !feats) return 0;
+
+ return pfeatureref->getFeatureVal(*feats);
+}
+
+
+int gr_fref_set_feature_value(const gr_feature_ref* pfeatureref, gr_uint16 val, gr_feature_val* pDest)
+{
+ if (!pfeatureref || !pDest) return 0;
+
+ return pfeatureref->applyValToFeature(val, *pDest);
+}
+
+
+gr_uint32 gr_fref_id(const gr_feature_ref* pfeatureref) //returns 0 if pointer is NULL
+{
+ if (!pfeatureref)
+ return 0;
+
+ return pfeatureref->getId();
+}
+
+
+gr_uint16 gr_fref_n_values(const gr_feature_ref* pfeatureref)
+{
+ if(!pfeatureref)
+ return 0;
+ return pfeatureref->getNumSettings();
+}
+
+
+gr_int16 gr_fref_value(const gr_feature_ref* pfeatureref, gr_uint16 settingno)
+{
+ if(!pfeatureref || (settingno >= pfeatureref->getNumSettings()))
+ {
+ return 0;
+ }
+ return pfeatureref->getSettingValue(settingno);
+}
+
+
+void* gr_fref_label(const gr_feature_ref* pfeatureref, gr_uint16 *langId, gr_encform utf, gr_uint32 *length)
+{
+ if(!pfeatureref)
+ {
+ langId = 0;
+ length = 0;
+ return NULL;
+ }
+ uint16 label = pfeatureref->getNameId();
+ NameTable * names = pfeatureref->getFace().nameTable();
+ if (!names)
+ {
+ langId = 0;
+ length = 0;
+ return NULL;
+ }
+ return names->getName(*langId, label, utf, *length);
+}
+
+
+void* gr_fref_value_label(const gr_feature_ref*pfeatureref, gr_uint16 setting,
+ gr_uint16 *langId, gr_encform utf, gr_uint32 *length)
+{
+ if(!pfeatureref || (setting >= pfeatureref->getNumSettings()))
+ {
+ langId = 0;
+ length = 0;
+ return NULL;
+ }
+ uint16 label = pfeatureref->getSettingName(setting);
+ NameTable * names = pfeatureref->getFace().nameTable();
+ if (!names)
+ {
+ langId = 0;
+ length = 0;
+ return NULL;
+ }
+ return names->getName(*langId, label, utf, *length);
+}
+
+
+void gr_label_destroy(void * label)
+{
+ free(label);
+}
+
+gr_feature_val* gr_featureval_clone(const gr_feature_val* pfeatures/*may be NULL*/)
+{ //When finished with the Features, call features_destroy
+ return static_cast<gr_feature_val*>(pfeatures ? new Features(*pfeatures) : new Features);
+}
+
+void gr_featureval_destroy(gr_feature_val *p)
+{
+ delete static_cast<Features*>(p);
+}
+
+
+} // extern "C"
diff --git a/thirdparty/graphite/src/gr_font.cpp b/thirdparty/graphite/src/gr_font.cpp
new file mode 100644
index 0000000000..724cc83c13
--- /dev/null
+++ b/thirdparty/graphite/src/gr_font.cpp
@@ -0,0 +1,74 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#include "graphite2/Font.h"
+#include "inc/Font.h"
+
+
+using namespace graphite2;
+
+extern "C" {
+
+void gr_engine_version(int *nMajor, int *nMinor, int *nBugFix)
+{
+ if (nMajor) *nMajor = GR2_VERSION_MAJOR;
+ if (nMinor) *nMinor = GR2_VERSION_MINOR;
+ if (nBugFix) *nBugFix = GR2_VERSION_BUGFIX;
+}
+
+gr_font* gr_make_font(float ppm/*pixels per em*/, const gr_face *face)
+{
+ return gr_make_font_with_advance_fn(ppm, 0, 0, face);
+}
+
+
+gr_font* gr_make_font_with_ops(float ppm/*pixels per em*/, const void* appFontHandle/*non-NULL*/, const gr_font_ops * font_ops, const gr_face * face/*needed for scaling*/)
+{ //the appFontHandle must stay alive all the time when the gr_font is alive. When finished with the gr_font, call destroy_gr_font
+ if (face == 0 || ppm <= 0) return 0;
+
+ Font * const res = new Font(ppm, *face, appFontHandle, font_ops);
+ if (*res)
+ return static_cast<gr_font*>(res);
+ else
+ {
+ delete res;
+ return 0;
+ }
+}
+
+gr_font* gr_make_font_with_advance_fn(float ppm/*pixels per em*/, const void* appFontHandle/*non-NULL*/, gr_advance_fn getAdvance, const gr_face * face/*needed for scaling*/)
+{
+ const gr_font_ops ops = {sizeof(gr_font_ops), getAdvance, NULL};
+ return gr_make_font_with_ops(ppm, appFontHandle, &ops, face);
+}
+
+void gr_font_destroy(gr_font *font)
+{
+ delete static_cast<Font*>(font);
+}
+
+
+} // extern "C"
diff --git a/thirdparty/graphite/src/gr_logging.cpp b/thirdparty/graphite/src/gr_logging.cpp
new file mode 100644
index 0000000000..8f1e675e62
--- /dev/null
+++ b/thirdparty/graphite/src/gr_logging.cpp
@@ -0,0 +1,267 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#include <cstdio>
+
+#include "graphite2/Log.h"
+#include "inc/debug.h"
+#include "inc/CharInfo.h"
+#include "inc/Slot.h"
+#include "inc/Segment.h"
+#include "inc/json.h"
+#include "inc/Collider.h"
+
+#if defined _WIN32
+#include "windows.h"
+#endif
+
+using namespace graphite2;
+
+#if !defined GRAPHITE2_NTRACING
+json *global_log = 0;
+#endif
+
+extern "C" {
+
+bool gr_start_logging(GR_MAYBE_UNUSED gr_face * face, const char *log_path)
+{
+ if (!log_path) return false;
+
+#if !defined GRAPHITE2_NTRACING
+ gr_stop_logging(face);
+#if defined _WIN32
+ int n = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, log_path, -1, 0, 0);
+ if (n == 0 || n > MAX_PATH - 12) return false;
+
+ LPWSTR wlog_path = gralloc<WCHAR>(n);
+ if (!wlog_path) return false;
+ FILE *log = 0;
+ if (wlog_path && MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, log_path, -1, wlog_path, n))
+ log = _wfopen(wlog_path, L"wt");
+
+ free(wlog_path);
+#else // _WIN32
+ FILE *log = fopen(log_path, "wt");
+#endif // _WIN32
+ if (!log) return false;
+
+ if (face)
+ {
+ face->setLogger(log);
+ if (!face->logger()) return false;
+
+ *face->logger() << json::array;
+#ifdef GRAPHITE2_TELEMETRY
+ *face->logger() << face->tele;
+#endif
+ }
+ else
+ {
+ global_log = new json(log);
+ *global_log << json::array;
+ }
+
+ return true;
+#else // GRAPHITE2_NTRACING
+ return false;
+#endif // GRAPHITE2_NTRACING
+}
+
+bool graphite_start_logging(FILE * /* log */, GrLogMask /* mask */)
+{
+//#if !defined GRAPHITE2_NTRACING
+// graphite_stop_logging();
+//
+// if (!log) return false;
+//
+// dbgout = new json(log);
+// if (!dbgout) return false;
+//
+// *dbgout << json::array;
+// return true;
+//#else
+ return false;
+//#endif
+}
+
+void gr_stop_logging(GR_MAYBE_UNUSED gr_face * face)
+{
+#if !defined GRAPHITE2_NTRACING
+ if (face && face->logger())
+ {
+ FILE * log = face->logger()->stream();
+ face->setLogger(0);
+ fclose(log);
+ }
+ else if (!face && global_log)
+ {
+ FILE * log = global_log->stream();
+ delete global_log;
+ global_log = 0;
+ fclose(log);
+ }
+#endif
+}
+
+void graphite_stop_logging()
+{
+// if (dbgout) delete dbgout;
+// dbgout = 0;
+}
+
+} // extern "C"
+
+#ifdef GRAPHITE2_TELEMETRY
+size_t * graphite2::telemetry::_category = 0UL;
+#endif
+
+#if !defined GRAPHITE2_NTRACING
+
+#ifdef GRAPHITE2_TELEMETRY
+
+json & graphite2::operator << (json & j, const telemetry & t) throw()
+{
+ j << json::object
+ << "type" << "telemetry"
+ << "silf" << t.silf
+ << "states" << t.states
+ << "starts" << t.starts
+ << "transitions" << t.transitions
+ << "glyphs" << t.glyph
+ << "code" << t.code
+ << "misc" << t.misc
+ << "total" << (t.silf + t.states + t.starts + t.transitions + t.glyph + t.code + t.misc)
+ << json::close;
+ return j;
+}
+#else
+json & graphite2::operator << (json & j, const telemetry &) throw()
+{
+ return j;
+}
+#endif
+
+
+json & graphite2::operator << (json & j, const CharInfo & ci) throw()
+{
+ return j << json::object
+ << "offset" << ci.base()
+ << "unicode" << ci.unicodeChar()
+ << "break" << ci.breakWeight()
+ << "flags" << ci.flags()
+ << "slot" << json::flat << json::object
+ << "before" << ci.before()
+ << "after" << ci.after()
+ << json::close
+ << json::close;
+}
+
+
+json & graphite2::operator << (json & j, const dslot & ds) throw()
+{
+ assert(ds.first);
+ assert(ds.second);
+ const Segment & seg = *ds.first;
+ const Slot & s = *ds.second;
+ const SlotCollision *cslot = seg.collisionInfo(ds.second);
+
+ j << json::object
+ << "id" << objectid(ds)
+ << "gid" << s.gid()
+ << "charinfo" << json::flat << json::object
+ << "original" << s.original()
+ << "before" << s.before()
+ << "after" << s.after()
+ << json::close
+ << "origin" << s.origin()
+ << "shift" << Position(float(s.getAttr(0, gr_slatShiftX, 0)),
+ float(s.getAttr(0, gr_slatShiftY, 0)))
+ << "advance" << s.advancePos()
+ << "insert" << s.isInsertBefore()
+ << "break" << s.getAttr(&seg, gr_slatBreak, 0);
+ if (s.just() > 0)
+ j << "justification" << s.just();
+ if (s.getBidiLevel() > 0)
+ j << "bidi" << s.getBidiLevel();
+ if (!s.isBase())
+ j << "parent" << json::flat << json::object
+ << "id" << objectid(dslot(&seg, s.attachedTo()))
+ << "level" << s.getAttr(0, gr_slatAttLevel, 0)
+ << "offset" << s.attachOffset()
+ << json::close;
+ j << "user" << json::flat << json::array;
+ for (int n = 0; n!= seg.numAttrs(); ++n)
+ j << s.userAttrs()[n];
+ j << json::close;
+ if (s.firstChild())
+ {
+ j << "children" << json::flat << json::array;
+ for (const Slot *c = s.firstChild(); c; c = c->nextSibling())
+ j << objectid(dslot(&seg, c));
+ j << json::close;
+ }
+ if (cslot)
+ {
+ // Note: the reason for using Positions to lump together related attributes is to make the
+ // JSON output slightly more compact.
+ j << "collision" << json::flat << json::object
+// << "shift" << cslot->shift() -- not used pass level, only within the collision routine itself
+ << "offset" << cslot->offset()
+ << "limit" << cslot->limit()
+ << "flags" << cslot->flags()
+ << "margin" << Position(cslot->margin(), cslot->marginWt())
+ << "exclude" << cslot->exclGlyph()
+ << "excludeoffset" << cslot->exclOffset();
+ if (cslot->seqOrder() != 0)
+ {
+ j << "seqclass" << Position(cslot->seqClass(), cslot->seqProxClass())
+ << "seqorder" << cslot->seqOrder()
+ << "seqabove" << Position(cslot->seqAboveXoff(), cslot->seqAboveWt())
+ << "seqbelow" << Position(cslot->seqBelowXlim(), cslot->seqBelowWt())
+ << "seqvalign" << Position(cslot->seqValignHt(), cslot->seqValignWt());
+ }
+ j << json::close;
+ }
+ return j << json::close;
+}
+
+
+graphite2::objectid::objectid(const dslot & ds) throw()
+{
+ const Slot * const p = ds.second;
+ uint32 s = uint32(reinterpret_cast<size_t>(p));
+ sprintf(name, "%.4x-%.2x-%.4hx", uint16(s >> 16), uint16(p ? p->userAttrs()[ds.first->silf()->numUser()] : 0), uint16(s));
+ name[sizeof name-1] = 0;
+}
+
+graphite2::objectid::objectid(const Segment * const p) throw()
+{
+ uint32 s = uint32(reinterpret_cast<size_t>(p));
+ sprintf(name, "%.4x-%.2x-%.4hx", uint16(s >> 16), 0, uint16(s));
+ name[sizeof name-1] = 0;
+}
+
+#endif
diff --git a/thirdparty/graphite/src/gr_segment.cpp b/thirdparty/graphite/src/gr_segment.cpp
new file mode 100644
index 0000000000..7a27e9c562
--- /dev/null
+++ b/thirdparty/graphite/src/gr_segment.cpp
@@ -0,0 +1,175 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#include "graphite2/Segment.h"
+#include "inc/UtfCodec.h"
+#include "inc/Segment.h"
+
+using namespace graphite2;
+
+namespace
+{
+
+ gr_segment* makeAndInitialize(const Font *font, const Face *face, uint32 script, const Features* pFeats/*must not be NULL*/, gr_encform enc, const void* pStart, size_t nChars, int dir)
+ {
+ if (script == 0x20202020) script = 0;
+ else if ((script & 0x00FFFFFF) == 0x00202020) script = script & 0xFF000000;
+ else if ((script & 0x0000FFFF) == 0x00002020) script = script & 0xFFFF0000;
+ else if ((script & 0x000000FF) == 0x00000020) script = script & 0xFFFFFF00;
+ // if (!font) return NULL;
+ Segment* pRes=new Segment(nChars, face, script, dir);
+
+
+ if (!pRes->read_text(face, pFeats, enc, pStart, nChars) || !pRes->runGraphite())
+ {
+ delete pRes;
+ return NULL;
+ }
+ pRes->finalise(font, true);
+
+ return static_cast<gr_segment*>(pRes);
+ }
+
+ template <typename utf_iter>
+ inline size_t count_unicode_chars(utf_iter first, const utf_iter last, const void **error)
+ {
+ size_t n_chars = 0;
+ uint32 usv = 0;
+
+ if (last)
+ {
+ if (!first.validate(last))
+ {
+ if (error) *error = last - 1;
+ return 0;
+ }
+ for (;first != last; ++first, ++n_chars)
+ if ((usv = *first) == 0 || first.error()) break;
+ }
+ else
+ {
+ while ((usv = *first) != 0 && !first.error())
+ {
+ ++first;
+ ++n_chars;
+ }
+ }
+
+ if (error) *error = first.error() ? first : 0;
+ return n_chars;
+ }
+}
+
+
+extern "C" {
+
+size_t gr_count_unicode_characters(gr_encform enc, const void* buffer_begin, const void* buffer_end/*don't go on or past end, If NULL then ignored*/, const void** pError) //Also stops on nul. Any nul is not in the count
+{
+ assert(buffer_begin);
+
+ switch (enc)
+ {
+ case gr_utf8: return count_unicode_chars<utf8::const_iterator>(buffer_begin, buffer_end, pError); break;
+ case gr_utf16: return count_unicode_chars<utf16::const_iterator>(buffer_begin, buffer_end, pError); break;
+ case gr_utf32: return count_unicode_chars<utf32::const_iterator>(buffer_begin, buffer_end, pError); break;
+ default: return 0;
+ }
+}
+
+
+gr_segment* gr_make_seg(const gr_font *font, const gr_face *face, gr_uint32 script, const gr_feature_val* pFeats, gr_encform enc, const void* pStart, size_t nChars, int dir)
+{
+ if (!face) return nullptr;
+
+ const gr_feature_val * tmp_feats = 0;
+ if (pFeats == 0)
+ pFeats = tmp_feats = static_cast<const gr_feature_val*>(face->theSill().cloneFeatures(0));
+ gr_segment * seg = makeAndInitialize(font, face, script, pFeats, enc, pStart, nChars, dir);
+ delete static_cast<const FeatureVal*>(tmp_feats);
+
+ return seg;
+}
+
+
+void gr_seg_destroy(gr_segment* p)
+{
+ delete static_cast<Segment*>(p);
+}
+
+
+float gr_seg_advance_X(const gr_segment* pSeg/*not NULL*/)
+{
+ assert(pSeg);
+ return pSeg->advance().x;
+}
+
+
+float gr_seg_advance_Y(const gr_segment* pSeg/*not NULL*/)
+{
+ assert(pSeg);
+ return pSeg->advance().y;
+}
+
+
+unsigned int gr_seg_n_cinfo(const gr_segment* pSeg/*not NULL*/)
+{
+ assert(pSeg);
+ return static_cast<unsigned int>(pSeg->charInfoCount());
+}
+
+
+const gr_char_info* gr_seg_cinfo(const gr_segment* pSeg/*not NULL*/, unsigned int index/*must be <number_of_CharInfo*/)
+{
+ assert(pSeg);
+ return static_cast<const gr_char_info*>(pSeg->charinfo(index));
+}
+
+unsigned int gr_seg_n_slots(const gr_segment* pSeg/*not NULL*/)
+{
+ assert(pSeg);
+ return static_cast<unsigned int>(pSeg->slotCount());
+}
+
+const gr_slot* gr_seg_first_slot(gr_segment* pSeg/*not NULL*/)
+{
+ assert(pSeg);
+ return static_cast<const gr_slot*>(pSeg->first());
+}
+
+const gr_slot* gr_seg_last_slot(gr_segment* pSeg/*not NULL*/)
+{
+ assert(pSeg);
+ return static_cast<const gr_slot*>(pSeg->last());
+}
+
+float gr_seg_justify(gr_segment* pSeg/*not NULL*/, const gr_slot* pSlot/*not NULL*/, const gr_font *pFont, double width, enum gr_justFlags flags, const gr_slot *pFirst, const gr_slot *pLast)
+{
+ assert(pSeg);
+ assert(pSlot);
+ return pSeg->justify(const_cast<gr_slot *>(pSlot), pFont, float(width), justFlags(flags), const_cast<gr_slot *>(pFirst), const_cast<gr_slot *>(pLast));
+}
+
+} // extern "C"
diff --git a/thirdparty/graphite/src/gr_slot.cpp b/thirdparty/graphite/src/gr_slot.cpp
new file mode 100644
index 0000000000..a3c6b46a7f
--- /dev/null
+++ b/thirdparty/graphite/src/gr_slot.cpp
@@ -0,0 +1,173 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#include "graphite2/Segment.h"
+#include "inc/Segment.h"
+#include "inc/Slot.h"
+#include "inc/Font.h"
+
+
+extern "C" {
+
+
+const gr_slot* gr_slot_next_in_segment(const gr_slot* p/*not NULL*/)
+{
+ assert(p);
+ return static_cast<const gr_slot*>(p->next());
+}
+
+const gr_slot* gr_slot_prev_in_segment(const gr_slot* p/*not NULL*/)
+{
+ assert(p);
+ return static_cast<const gr_slot*>(p->prev());
+}
+
+const gr_slot* gr_slot_attached_to(const gr_slot* p/*not NULL*/) //returns NULL iff base. If called repeatedly on result, will get to a base
+{
+ assert(p);
+ return static_cast<const gr_slot*>(p->attachedTo());
+}
+
+
+const gr_slot* gr_slot_first_attachment(const gr_slot* p/*not NULL*/) //returns NULL iff no attachments.
+{ //if slot_first_attachment(p) is not NULL, then slot_attached_to(slot_first_attachment(p))==p.
+ assert(p);
+ return static_cast<const gr_slot*>(p->firstChild());
+}
+
+
+const gr_slot* gr_slot_next_sibling_attachment(const gr_slot* p/*not NULL*/) //returns NULL iff no more attachments.
+{ //if slot_next_sibling_attachment(p) is not NULL, then slot_attached_to(slot_next_sibling_attachment(p))==slot_attached_to(p).
+ assert(p);
+ return static_cast<const gr_slot*>(p->nextSibling());
+}
+
+
+unsigned short gr_slot_gid(const gr_slot* p/*not NULL*/)
+{
+ assert(p);
+ return p->glyph();
+}
+
+
+float gr_slot_origin_X(const gr_slot* p/*not NULL*/)
+{
+ assert(p);
+ return p->origin().x;
+}
+
+
+float gr_slot_origin_Y(const gr_slot* p/*not NULL*/)
+{
+ assert(p);
+ return p->origin().y;
+}
+
+
+float gr_slot_advance_X(const gr_slot* p/*not NULL*/, const gr_face *face, const gr_font *font)
+{
+ assert(p);
+ float scale = 1.0;
+ float res = p->advance();
+ if (font)
+ {
+ scale = font->scale();
+ int gid = p->glyph();
+ if (face && font->isHinted() && gid < face->glyphs().numGlyphs())
+ res = (res - face->glyphs().glyph(gid)->theAdvance().x) * scale + font->advance(gid);
+ else
+ res = res * scale;
+ }
+ return res;
+}
+
+float gr_slot_advance_Y(const gr_slot *p/*not NULL*/, GR_MAYBE_UNUSED const gr_face *face, const gr_font *font)
+{
+ assert(p);
+ float res = p->advancePos().y;
+ if (font)
+ return res * font->scale();
+ else
+ return res;
+}
+
+int gr_slot_before(const gr_slot* p/*not NULL*/)
+{
+ assert(p);
+ return p->before();
+}
+
+
+int gr_slot_after(const gr_slot* p/*not NULL*/)
+{
+ assert(p);
+ return p->after();
+}
+
+unsigned int gr_slot_index(const gr_slot *p/*not NULL*/)
+{
+ assert(p);
+ return p->index();
+}
+
+int gr_slot_attr(const gr_slot* p/*not NULL*/, const gr_segment* pSeg/*not NULL*/, gr_attrCode index, gr_uint8 subindex)
+{
+ assert(p);
+ return p->getAttr(pSeg, index, subindex);
+}
+
+
+int gr_slot_can_insert_before(const gr_slot* p/*not NULL*/)
+{
+ assert(p);
+ return (p->isInsertBefore())? 1 : 0;
+}
+
+
+int gr_slot_original(const gr_slot* p/*not NULL*/)
+{
+ assert(p);
+ return p->original();
+}
+
+void gr_slot_linebreak_before(gr_slot* p/*not NULL*/)
+{
+ assert(p);
+ gr_slot *prev = (gr_slot *)p->prev();
+ prev->sibling(NULL);
+ prev->next(NULL);
+ p->prev(NULL);
+}
+
+#if 0 //what should this be
+size_t id(const gr_slot* p/*not NULL*/)
+{
+ return (size_t)p->id();
+}
+#endif
+
+
+} // extern "C"
diff --git a/thirdparty/graphite/src/inc/CharInfo.h b/thirdparty/graphite/src/inc/CharInfo.h
new file mode 100644
index 0000000000..01e7e31ac9
--- /dev/null
+++ b/thirdparty/graphite/src/inc/CharInfo.h
@@ -0,0 +1,66 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+#include "inc/Main.h"
+
+
+namespace graphite2 {
+
+class CharInfo
+{
+
+public:
+ CharInfo() : m_char(0), m_before(-1), m_after(-1), m_base(0), m_featureid(0), m_break(0), m_flags(0) {}
+ void init(int cid) { m_char = cid; }
+ unsigned int unicodeChar() const { return m_char; }
+ void feats(int offset) { m_featureid = offset; }
+ int fid() const { return m_featureid; }
+ int breakWeight() const { return m_break; }
+ void breakWeight(int val) { m_break = val; }
+ int after() const { return m_after; }
+ void after(int val) { m_after = val; }
+ int before() const { return m_before; }
+ void before(int val) { m_before = val; }
+ size_t base() const { return m_base; }
+ void base(size_t offset) { m_base = offset; }
+ void addflags(uint8 val) { m_flags |= val; }
+ uint8 flags() const { return m_flags; }
+
+ CLASS_NEW_DELETE
+private:
+ int m_char; // Unicode character from character stream
+ int m_before; // slot index before us, comes before
+ int m_after; // slot index after us, comes after
+ size_t m_base; // offset into input string corresponding to this charinfo
+ uint8 m_featureid; // index into features list in the segment
+ int8 m_break; // breakweight coming from lb table
+ uint8 m_flags; // 0,1 segment split.
+};
+
+} // namespace graphite2
+
+struct gr_char_info : public graphite2::CharInfo {};
diff --git a/thirdparty/graphite/src/inc/CmapCache.h b/thirdparty/graphite/src/inc/CmapCache.h
new file mode 100644
index 0000000000..7820c958b0
--- /dev/null
+++ b/thirdparty/graphite/src/inc/CmapCache.h
@@ -0,0 +1,82 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+
+#include "inc/Main.h"
+#include "inc/Face.h"
+
+namespace graphite2 {
+
+class Face;
+
+class Cmap
+{
+public:
+
+ virtual ~Cmap() throw() {}
+
+ virtual uint16 operator [] (const uint32) const throw() { return 0; }
+
+ virtual operator bool () const throw() { return false; }
+
+ CLASS_NEW_DELETE;
+};
+
+class DirectCmap : public Cmap
+{
+ DirectCmap(const DirectCmap &);
+ DirectCmap & operator = (const DirectCmap &);
+
+public:
+ DirectCmap(const Face &);
+ virtual uint16 operator [] (const uint32 usv) const throw();
+ virtual operator bool () const throw();
+
+ CLASS_NEW_DELETE;
+private:
+ const Face::Table _cmap;
+ const void * _smp,
+ * _bmp;
+};
+
+class CachedCmap : public Cmap
+{
+ CachedCmap(const CachedCmap &);
+ CachedCmap & operator = (const CachedCmap &);
+
+public:
+ CachedCmap(const Face &);
+ virtual ~CachedCmap() throw();
+ virtual uint16 operator [] (const uint32 usv) const throw();
+ virtual operator bool () const throw();
+ CLASS_NEW_DELETE;
+private:
+ bool m_isBmpOnly;
+ uint16 ** m_blocks;
+};
+
+} // namespace graphite2
diff --git a/thirdparty/graphite/src/inc/Code.h b/thirdparty/graphite/src/inc/Code.h
new file mode 100644
index 0000000000..3cee67c81d
--- /dev/null
+++ b/thirdparty/graphite/src/inc/Code.h
@@ -0,0 +1,171 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+// This class represents loaded graphite stack machine code. It performs
+// basic sanity checks, on the incoming code to prevent more obvious problems
+// from crashing graphite.
+// Author: Tim Eves
+
+#pragma once
+
+#include <cassert>
+#include <graphite2/Types.h>
+#include "inc/Main.h"
+#include "inc/Machine.h"
+
+namespace graphite2 {
+
+class Silf;
+class Face;
+
+enum passtype {
+ PASS_TYPE_UNKNOWN = 0,
+ PASS_TYPE_LINEBREAK,
+ PASS_TYPE_SUBSTITUTE,
+ PASS_TYPE_POSITIONING,
+ PASS_TYPE_JUSTIFICATION
+};
+
+namespace vm {
+
+class Machine::Code
+{
+public:
+ enum status_t
+ {
+ loaded,
+ alloc_failed,
+ invalid_opcode,
+ unimplemented_opcode_used,
+ out_of_range_data,
+ jump_past_end,
+ arguments_exhausted,
+ missing_return,
+ nested_context_item,
+ underfull_stack
+ };
+
+private:
+ class decoder;
+
+ instr * _code;
+ byte * _data;
+ size_t _data_size,
+ _instr_count;
+ byte _max_ref;
+ mutable status_t _status;
+ bool _constraint,
+ _modify,
+ _delete;
+ mutable bool _own;
+
+ void release_buffers() throw ();
+ void failure(const status_t) throw();
+
+public:
+ static size_t estimateCodeDataOut(size_t num_bytecodes, int nRules, int nSlots);
+
+ Code() throw();
+ Code(bool is_constraint, const byte * bytecode_begin, const byte * const bytecode_end,
+ uint8 pre_context, uint16 rule_length, const Silf &, const Face &,
+ enum passtype pt, byte * * const _out = 0);
+ Code(const Machine::Code &) throw();
+ ~Code() throw();
+
+ Code & operator=(const Code &rhs) throw();
+ operator bool () const throw() { return _code && status() == loaded; }
+ status_t status() const throw() { return _status; }
+ bool constraint() const throw() { return _constraint; }
+ size_t dataSize() const throw() { return _data_size; }
+ size_t instructionCount() const throw() { return _instr_count; }
+ bool immutable() const throw() { return !(_delete || _modify); }
+ bool deletes() const throw() { return _delete; }
+ size_t maxRef() const throw() { return _max_ref; }
+ void externalProgramMoved(ptrdiff_t) throw();
+
+ int32 run(Machine &m, slotref * & map) const;
+
+ CLASS_NEW_DELETE;
+};
+
+inline
+size_t Machine::Code::estimateCodeDataOut(size_t n_bc, int nRules, int nSlots)
+{
+ // max is: all codes are instructions + 1 for each rule + max tempcopies
+ // allocate space for separate maximal code and data then merge them later
+ return (n_bc + nRules + nSlots) * sizeof(instr) + n_bc * sizeof(byte);
+}
+
+
+inline Machine::Code::Code() throw()
+: _code(0), _data(0), _data_size(0), _instr_count(0), _max_ref(0),
+ _status(loaded), _constraint(false), _modify(false), _delete(false),
+ _own(false)
+{
+}
+
+inline Machine::Code::Code(const Machine::Code &obj) throw ()
+ : _code(obj._code),
+ _data(obj._data),
+ _data_size(obj._data_size),
+ _instr_count(obj._instr_count),
+ _max_ref(obj._max_ref),
+ _status(obj._status),
+ _constraint(obj._constraint),
+ _modify(obj._modify),
+ _delete(obj._delete),
+ _own(obj._own)
+{
+ obj._own = false;
+}
+
+inline Machine::Code & Machine::Code::operator=(const Machine::Code &rhs) throw() {
+ if (_instr_count > 0)
+ release_buffers();
+ _code = rhs._code;
+ _data = rhs._data;
+ _data_size = rhs._data_size;
+ _instr_count = rhs._instr_count;
+ _status = rhs._status;
+ _constraint = rhs._constraint;
+ _modify = rhs._modify;
+ _delete = rhs._delete;
+ _own = rhs._own;
+ rhs._own = false;
+ return *this;
+}
+
+inline void Machine::Code::externalProgramMoved(ptrdiff_t dist) throw()
+{
+ if (_code && !_own)
+ {
+ _code += dist / signed(sizeof(instr));
+ _data += dist;
+ }
+}
+
+} // namespace vm
+} // namespace graphite2
diff --git a/thirdparty/graphite/src/inc/Collider.h b/thirdparty/graphite/src/inc/Collider.h
new file mode 100644
index 0000000000..71e8400501
--- /dev/null
+++ b/thirdparty/graphite/src/inc/Collider.h
@@ -0,0 +1,245 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+
+#include "inc/List.h"
+#include "inc/Position.h"
+#include "inc/Intervals.h"
+#include "inc/debug.h"
+
+namespace graphite2 {
+
+class json;
+class Slot;
+class Segment;
+
+#define SLOTCOLSETUINTPROP(x, y) uint16 x() const { return _ ##x; } void y (uint16 v) { _ ##x = v; }
+#define SLOTCOLSETINTPROP(x, y) int16 x() const { return _ ##x; } void y (int16 v) { _ ##x = v; }
+#define SLOTCOLSETPOSITIONPROP(x, y) const Position &x() const { return _ ##x; } void y (const Position &v) { _ ##x = v; }
+
+// Slot attributes related to collision-fixing
+class SlotCollision
+{
+public:
+ enum {
+ // COLL_TESTONLY = 0, // default - test other glyphs for collision with this one, but don't move this one
+ COLL_FIX = 1, // fix collisions involving this glyph
+ COLL_IGNORE = 2, // ignore this glyph altogether
+ COLL_START = 4, // start of range of possible collisions
+ COLL_END = 8, // end of range of possible collisions
+ COLL_KERN = 16, // collisions with this glyph are fixed by adding kerning space after it
+ COLL_ISCOL = 32, // this glyph has a collision
+ COLL_KNOWN = 64, // we've figured out what's happening with this glyph
+ COLL_ISSPACE = 128, // treat this glyph as a space with regard to kerning
+ COLL_TEMPLOCK = 256, // Lock glyphs that have been given priority positioning
+ ////COLL_JUMPABLE = 128, // moving glyphs may jump this stationary glyph in any direction - DELETE
+ ////COLL_OVERLAP = 256, // use maxoverlap to restrict - DELETE
+ };
+
+ // Behavior for the collision.order attribute. To GDL this is an enum, to us it's a bitfield, with only 1 bit set
+ // Allows for easier inversion.
+ enum {
+ SEQ_ORDER_LEFTDOWN = 1,
+ SEQ_ORDER_RIGHTUP = 2,
+ SEQ_ORDER_NOABOVE = 4,
+ SEQ_ORDER_NOBELOW = 8,
+ SEQ_ORDER_NOLEFT = 16,
+ SEQ_ORDER_NORIGHT = 32
+ };
+
+ SlotCollision(Segment *seg, Slot *slot);
+ void initFromSlot(Segment *seg, Slot *slot);
+
+ const Rect &limit() const { return _limit; }
+ void setLimit(const Rect &r) { _limit = r; }
+ SLOTCOLSETPOSITIONPROP(shift, setShift)
+ SLOTCOLSETPOSITIONPROP(offset, setOffset)
+ SLOTCOLSETPOSITIONPROP(exclOffset, setExclOffset)
+ SLOTCOLSETUINTPROP(margin, setMargin)
+ SLOTCOLSETUINTPROP(marginWt, setMarginWt)
+ SLOTCOLSETUINTPROP(flags, setFlags)
+ SLOTCOLSETUINTPROP(exclGlyph, setExclGlyph)
+ SLOTCOLSETUINTPROP(seqClass, setSeqClass)
+ SLOTCOLSETUINTPROP(seqProxClass, setSeqProxClass)
+ SLOTCOLSETUINTPROP(seqOrder, setSeqOrder)
+ SLOTCOLSETINTPROP(seqAboveXoff, setSeqAboveXoff)
+ SLOTCOLSETUINTPROP(seqAboveWt, setSeqAboveWt)
+ SLOTCOLSETINTPROP(seqBelowXlim, setSeqBelowXlim)
+ SLOTCOLSETUINTPROP(seqBelowWt, setSeqBelowWt)
+ SLOTCOLSETUINTPROP(seqValignHt, setSeqValignHt)
+ SLOTCOLSETUINTPROP(seqValignWt, setSeqValignWt)
+
+ float getKern(int dir) const;
+ bool ignore() const;
+
+private:
+ Rect _limit;
+ Position _shift; // adjustment within the given pass
+ Position _offset; // total adjustment for collisions
+ Position _exclOffset;
+ uint16 _margin;
+ uint16 _marginWt;
+ uint16 _flags;
+ uint16 _exclGlyph;
+ uint16 _seqClass;
+ uint16 _seqProxClass;
+ uint16 _seqOrder;
+ int16 _seqAboveXoff;
+ uint16 _seqAboveWt;
+ int16 _seqBelowXlim;
+ uint16 _seqBelowWt;
+ uint16 _seqValignHt;
+ uint16 _seqValignWt;
+
+}; // end of class SlotColllision
+
+struct BBox;
+struct SlantBox;
+
+class ShiftCollider
+{
+public:
+ typedef std::pair<float, float> fpair;
+ typedef Vector<fpair> vfpairs;
+ typedef vfpairs::iterator ivfpairs;
+
+ ShiftCollider(json *dbgout);
+ ~ShiftCollider() throw() { };
+
+ bool initSlot(Segment *seg, Slot *aSlot, const Rect &constraint,
+ float margin, float marginMin, const Position &currShift,
+ const Position &currOffset, int dir, GR_MAYBE_UNUSED json * const dbgout);
+ bool mergeSlot(Segment *seg, Slot *slot, const SlotCollision *cinfo, const Position &currShift, bool isAfter,
+ bool sameCluster, bool &hasCol, bool isExclusion, GR_MAYBE_UNUSED json * const dbgout);
+ Position resolve(Segment *seg, bool &isCol, GR_MAYBE_UNUSED json * const dbgout);
+ void addBox_slope(bool isx, const Rect &box, const BBox &bb, const SlantBox &sb, const Position &org, float weight, float m, bool minright, int mode);
+ void removeBox(const Rect &box, const BBox &bb, const SlantBox &sb, const Position &org, int mode);
+ const Position &origin() const { return _origin; }
+
+#if !defined GRAPHITE2_NTRACING
+ void outputJsonDbg(json * const dbgout, Segment *seg, int axis);
+ void outputJsonDbgStartSlot(json * const dbgout, Segment *seg);
+ void outputJsonDbgEndSlot(json * const dbgout, Position resultPos, int bestAxis, bool isCol);
+ void outputJsonDbgOneVector(json * const dbgout, Segment *seg, int axis, float tleft, float bestCost, float bestVal);
+ void outputJsonDbgRawRanges(json * const dbgout, int axis);
+ void outputJsonDbgRemovals(json * const dbgout, int axis, Segment *seg);
+#endif
+
+ CLASS_NEW_DELETE;
+
+protected:
+ Zones _ranges[4]; // possible movements in 4 directions (horizontally, vertically, diagonally);
+ Slot * _target; // the glyph to fix
+ Rect _limit;
+ Position _currShift;
+ Position _currOffset;
+ Position _origin; // Base for all relative calculations
+ float _margin;
+ float _marginWt;
+ float _len[4];
+ uint16 _seqClass;
+ uint16 _seqProxClass;
+ uint16 _seqOrder;
+
+ //bool _scraping[4];
+
+}; // end of class ShiftCollider
+
+inline
+ShiftCollider::ShiftCollider(GR_MAYBE_UNUSED json *dbgout)
+: _target(0),
+ _margin(0.0),
+ _marginWt(0.0),
+ _seqClass(0),
+ _seqProxClass(0),
+ _seqOrder(0)
+{
+#if !defined GRAPHITE2_NTRACING
+ for (int i = 0; i < 4; ++i)
+ _ranges[i].setdebug(dbgout);
+#endif
+}
+
+class KernCollider
+{
+public:
+ KernCollider(json *dbg);
+ ~KernCollider() throw() { };
+ bool initSlot(Segment *seg, Slot *aSlot, const Rect &constraint, float margin,
+ const Position &currShift, const Position &offsetPrev, int dir,
+ float ymin, float ymax, json * const dbgout);
+ bool mergeSlot(Segment *seg, Slot *slot, const Position &currShift, float currSpace, int dir, json * const dbgout);
+ Position resolve(Segment *seg, Slot *slot, int dir, json * const dbgout);
+ void shift(const Position &mv, int dir);
+
+ CLASS_NEW_DELETE;
+
+private:
+ Slot * _target; // the glyph to fix
+ Rect _limit;
+ float _margin;
+ Position _offsetPrev; // kern from a previous pass
+ Position _currShift; // NOT USED??
+ float _miny; // y-coordinates offset by global slot position
+ float _maxy;
+ Vector<float> _edges; // edges of horizontal slices
+ float _sliceWidth; // width of each slice
+ float _mingap;
+ float _xbound; // max or min edge
+ bool _hit;
+
+#if !defined GRAPHITE2_NTRACING
+ // Debugging
+ Segment * _seg;
+ Vector<float> _nearEdges; // closest potential collision in each slice
+ Vector<Slot*> _slotNear;
+#endif
+}; // end of class KernCollider
+
+
+inline
+float sqr(float x) {
+ return x * x;
+}
+
+inline
+KernCollider::KernCollider(GR_MAYBE_UNUSED json *dbg)
+: _target(0),
+ _margin(0.0f),
+ _miny(-1e38f),
+ _maxy(1e38f),
+ _sliceWidth(0.0f),
+ _mingap(0.0f),
+ _xbound(0.0),
+ _hit(false)
+{
+#if !defined GRAPHITE2_NTRACING
+ _seg = 0;
+#endif
+};
+
+}; // end of namespace graphite2
diff --git a/thirdparty/graphite/src/inc/Compression.h b/thirdparty/graphite/src/inc/Compression.h
new file mode 100644
index 0000000000..9fe10e025d
--- /dev/null
+++ b/thirdparty/graphite/src/inc/Compression.h
@@ -0,0 +1,104 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2015, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+
+#pragma once
+
+#include <cassert>
+#include <cstddef>
+#include <cstring>
+
+namespace
+{
+
+#if defined(_MSC_VER)
+typedef unsigned __int8 u8;
+typedef unsigned __int16 u16;
+typedef unsigned __int32 u32;
+typedef unsigned __int64 u64;
+#else
+#include <stdint.h>
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+#endif
+
+ptrdiff_t const MINMATCH = 4,
+ LASTLITERALS = 5,
+ MINCODA = LASTLITERALS+1,
+ MINSRCSIZE = 13;
+
+template<int S>
+inline
+void unaligned_copy(void * d, void const * s) {
+ ::memcpy(d, s, S);
+}
+
+inline
+size_t align(size_t p) {
+ return (p + sizeof(unsigned long)-1) & ~(sizeof(unsigned long)-1);
+}
+
+inline
+u8 * safe_copy(u8 * d, u8 const * s, size_t n) {
+ while (n--) *d++ = *s++;
+ return d;
+}
+
+inline
+u8 * overrun_copy(u8 * d, u8 const * s, size_t n) {
+ size_t const WS = sizeof(unsigned long);
+ u8 const * e = s + n;
+ do
+ {
+ unaligned_copy<WS>(d, s);
+ d += WS;
+ s += WS;
+ }
+ while (s < e);
+ d-=(s-e);
+
+ return d;
+}
+
+
+inline
+u8 * fast_copy(u8 * d, u8 const * s, size_t n) {
+ size_t const WS = sizeof(unsigned long);
+ size_t wn = n/WS;
+ while (wn--)
+ {
+ unaligned_copy<WS>(d, s);
+ d += WS;
+ s += WS;
+ }
+ n &= WS-1;
+ return safe_copy(d, s, n);
+}
+
+
+} // end of anonymous namespace
diff --git a/thirdparty/graphite/src/inc/Decompressor.h b/thirdparty/graphite/src/inc/Decompressor.h
new file mode 100644
index 0000000000..10f21b7af1
--- /dev/null
+++ b/thirdparty/graphite/src/inc/Decompressor.h
@@ -0,0 +1,54 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2015, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+
+#pragma once
+
+#include <cstddef>
+
+namespace lz4
+{
+
+// decompress an LZ4 block
+// Parameters:
+// @in - Input buffer containing an LZ4 block.
+// @in_size - Size of the input LZ4 block in bytes.
+// @out - Output buffer to hold decompressed results.
+// @out_size - The size of the buffer pointed to by @out.
+// Invariants:
+// @in - This buffer must be at least 1 machine word in length,
+// regardless of the actual LZ4 block size.
+// @in_size - This must be at least 4 and must also be <= to the
+// allocated buffer @in.
+// @out - This must be bigger than the input buffer and at least
+// 13 bytes.
+// @out_size - Must always be big enough to hold the expected size.
+// Return:
+// -1 - Decompression failed.
+// size - Actual number of bytes decompressed.
+int decompress(void const *in, size_t in_size, void *out, size_t out_size);
+
+} // end of namespace shrinker
diff --git a/thirdparty/graphite/src/inc/Endian.h b/thirdparty/graphite/src/inc/Endian.h
new file mode 100644
index 0000000000..56ecfd8667
--- /dev/null
+++ b/thirdparty/graphite/src/inc/Endian.h
@@ -0,0 +1,111 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2011, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+
+/*
+Description:
+ A set of fast template based decoders for decoding values of any C integer
+ type up to long int size laid out with most significant byte first or least
+ significant byte first (aka big endian or little endian). These are CPU
+ byte order agnostic and will function the same regardless of the CPUs native
+ byte order.
+
+ Being template based means if the either le or be class is not used then
+ template code of unused functions will not be instantiated by the compiler
+ and thus shouldn't cause any overhead.
+*/
+
+#include <cstddef>
+
+#pragma once
+
+
+class be
+{
+ template<int S>
+ inline static unsigned long int _peek(const unsigned char * p) {
+ return _peek<S/2>(p) << (S/2)*8 | _peek<S/2>(p+S/2);
+ }
+public:
+ template<typename T>
+ inline static T peek(const void * p) {
+ return T(_peek<sizeof(T)>(static_cast<const unsigned char *>(p)));
+ }
+
+ template<typename T>
+ inline static T read(const unsigned char * &p) {
+ const T r = T(_peek<sizeof(T)>(p));
+ p += sizeof r;
+ return r;
+ }
+
+ template<typename T>
+ inline static T swap(const T x) {
+ return T(_peek<sizeof(T)>(reinterpret_cast<const unsigned char *>(&x)));
+ }
+
+ template<typename T>
+ inline static void skip(const unsigned char * &p, size_t n=1) {
+ p += sizeof(T)*n;
+ }
+};
+
+template<>
+inline unsigned long int be::_peek<1>(const unsigned char * p) { return *p; }
+
+
+class le
+{
+ template<int S>
+ inline static unsigned long int _peek(const unsigned char * p) {
+ return _peek<S/2>(p) | _peek<S/2>(p+S/2) << (S/2)*8;
+ }
+public:
+ template<typename T>
+ inline static T peek(const void * p) {
+ return T(_peek<sizeof(T)>(static_cast<const unsigned char *>(p)));
+ }
+
+ template<typename T>
+ inline static T read(const unsigned char * &p) {
+ const T r = T(_peek<sizeof(T)>(p));
+ p += sizeof r;
+ return r;
+ }
+
+ template<typename T>
+ inline static T swap(const T x) {
+ return T(_peek<sizeof(T)>(reinterpret_cast<const unsigned char *>(&x)));
+ }
+
+ template<typename T>
+ inline static void skip(const unsigned char * &p, size_t n=1) {
+ p += sizeof(T)*n;
+ }
+};
+
+template<>
+inline unsigned long int le::_peek<1>(const unsigned char * p) { return *p; }
diff --git a/thirdparty/graphite/src/inc/Error.h b/thirdparty/graphite/src/inc/Error.h
new file mode 100644
index 0000000000..2b7ab763a2
--- /dev/null
+++ b/thirdparty/graphite/src/inc/Error.h
@@ -0,0 +1,134 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2013, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+
+// numbers are explicitly assigned for future proofing
+
+namespace graphite2
+{
+
+class Error
+{
+public:
+ Error() : _e(0) {};
+ operator bool() { return (_e != 0); }
+ int error() { return _e; }
+ void error(int e) { _e = e; }
+ bool test(bool pr, int err) { return (_e = int(pr) * err); }
+
+private:
+ int _e;
+};
+
+enum errcontext {
+ EC_READGLYPHS = 1, // while reading glyphs
+ EC_READSILF = 2, // in Silf table
+ EC_ASILF = 3, // in Silf %d
+ EC_APASS = 4, // in Silf %d, pass %d
+ EC_PASSCCODE = 5, // in pass constraint code for Silf %d, pass %d
+ EC_ARULE = 6, // in Silf %d, pass %d, rule %d
+ EC_ASTARTS = 7, // in Silf %d, pass %d, start state %d
+ EC_ATRANS = 8, // in Silf %d, pass %d, fsm state %d
+ EC_ARULEMAP = 9 // in Silf %d, pass %d, state %d
+};
+
+enum errors {
+ E_OUTOFMEM = 1, // Out of memory
+ E_NOGLYPHS = 2, // There are no glyphs in the font
+ E_BADUPEM = 3, // The units per em for the font is bad (0)
+ E_BADCMAP = 4, // The font does not contain any useful cmaps
+ E_NOSILF = 5, // Missing Silf table
+ E_TOOOLD = 6, // Silf table version is too old
+ E_BADSIZE = 7, // context object has the wrong structural size
+// Silf Subtable Errors take a Silf subtable number * 256 in the context
+ E_BADMAXGLYPH = 8, // Silf max glyph id is too high
+ E_BADNUMJUSTS = 9, // Number of Silf justification blocks is too high
+ E_BADENDJUSTS = 10, // Silf justification blocks take too much of the Silf table space
+ E_BADCRITFEATURES = 11, // Critical features section in a Silf table is too big
+ E_BADSCRIPTTAGS = 12, // Silf script tags area is too big
+ E_BADAPSEUDO = 13, // The pseudo glyph attribute number is too high
+ E_BADABREAK = 14, // The linebreak glyph attribute number is too high
+ E_BADABIDI = 15, // The bidi glyph attribute number is too high
+ E_BADAMIRROR = 16, // The mirrored glyph attribute number is too high
+ E_BADNUMPASSES = 17, // The number of passes is > 128
+ E_BADPASSESSTART = 18, // The Silf table is too small to hold any passes
+ E_BADPASSBOUND = 19, // The positioning pass number is too low or the substitution pass number is too high
+ E_BADPPASS = 20, // The positioning pass number is too high
+ E_BADSPASS = 21, // the substitution pass number is too high
+ E_BADJPASSBOUND = 22, // the justification pass must be higher than the positioning pass
+ E_BADJPASS = 23, // the justification pass is too high
+ E_BADALIG = 24, // the number of initial ligature component glyph attributes is too high
+ E_BADBPASS = 25, // the bidi pass number is specified and is either too high or too low
+ E_BADNUMPSEUDO = 26, // The number of pseudo glyphs is too high
+ E_BADCLASSSIZE = 27, // The size of the classes block is bad
+ E_TOOMANYLINEAR = 28, // The number of linear classes in the silf table is too high
+ E_CLASSESTOOBIG = 29, // There are too many classes for the space allocated in the Silf subtable
+ E_MISALIGNEDCLASSES = 30, // The class offsets in the class table don't line up with the number of classes
+ E_HIGHCLASSOFFSET = 31, // The class offsets point out of the class table
+ E_BADCLASSOFFSET = 32, // A class offset is less than one following it
+ E_BADCLASSLOOKUPINFO = 33, // The search header info for a non-linear class has wrong values in it
+// Pass subtable errors. Context has pass number * 65536
+ E_BADPASSSTART = 34, // The start offset for a particular pass is bad
+ E_BADPASSEND = 35, // The end offset for a particular pass is bad
+ E_BADPASSLENGTH = 36, // The length of the pass is too small
+ E_BADNUMTRANS = 37, // The number of transition states in the fsm is bad
+ E_BADNUMSUCCESS = 38, // The number of success states in the fsm is bad
+ E_BADNUMSTATES = 39, // The number of states in the fsm is bad
+ E_NORANGES = 40, // There are no columns in the fsm
+ E_BADRULEMAPLEN = 41, // The size of the success state to rule mapping is bad
+ E_BADCTXTLENBOUNDS = 42, // The precontext maximum is greater than its minimum
+ E_BADCTXTLENS = 43, // The lists of rule lengths or pre context lengths is bad
+ E_BADPASSCCODEPTR = 44, // The pass constraint code position does not align with where the forward reference says it should be
+ E_BADRULECCODEPTR = 45, // The rule constraint code position does not align with where the forward reference says it should be
+ E_BADCCODELEN = 46, // Bad rule/pass constraint code length
+ E_BADACTIONCODEPTR = 47, // The action code position does not align with where the forward reference says it should be
+ E_MUTABLECCODE = 48, // Constraint code edits slots. It shouldn't.
+ E_BADSTATE = 49, // Bad state transition referencing an illegal state
+ E_BADRULEMAPPING = 50, // The structure of the rule mapping is bad
+ E_BADRANGE = 51, // Bad column range structure including a glyph in more than one column
+ E_BADRULENUM = 52, // A reference to a rule is out of range (too high)
+ E_BADACOLLISION = 53, // Bad Silf table collision attribute number (too high)
+ E_BADEMPTYPASS = 54, // Can't have empty passes (no rules) except for collision passes
+ E_BADSILFVERSION = 55, // The Silf table has a bad version (probably too high)
+ E_BADCOLLISIONPASS = 56, // Collision flags set on a non positioning pass
+ E_BADNUMCOLUMNS = 57, // Arbitrarily limit number of columns in fsm
+// Code errors
+ E_CODEFAILURE = 60, // Base code error. The following subcodes must align with Machine::Code::status_t in Code.h
+ E_CODEALLOC = 61, // Out of memory
+ E_INVALIDOPCODE = 62, // Invalid op code
+ E_UNIMPOPCODE = 63, // Unimplemented op code encountered
+ E_OUTOFRANGECODE = 64, // Code argument out of range
+ E_BADJUMPCODE = 65, // Code jumps past end of op codes
+ E_CODEBADARGS = 66, // Code arguments exhausted
+ E_CODENORETURN = 67, // Missing return type op code at end of code
+ E_CODENESTEDCTXT = 68, // Nested context encountered in code
+// Compression errors
+ E_BADSCHEME = 69,
+ E_SHRINKERFAILED = 70,
+};
+
+}
diff --git a/thirdparty/graphite/src/inc/Face.h b/thirdparty/graphite/src/inc/Face.h
new file mode 100644
index 0000000000..355c5aa0d3
--- /dev/null
+++ b/thirdparty/graphite/src/inc/Face.h
@@ -0,0 +1,225 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+
+#include <cstdio>
+
+#include "graphite2/Font.h"
+
+#include "inc/Main.h"
+#include "inc/FeatureMap.h"
+#include "inc/TtfUtil.h"
+#include "inc/Silf.h"
+#include "inc/Error.h"
+
+namespace graphite2 {
+
+class Cmap;
+class FileFace;
+class GlyphCache;
+class NameTable;
+class json;
+class Font;
+
+
+using TtfUtil::Tag;
+
+// These are the actual tags, as distinct from the consecutive IDs in TtfUtil.h
+
+class Face
+{
+ // Prevent any kind of copying
+ Face(const Face&);
+ Face& operator=(const Face&);
+
+public:
+ class Table;
+ static float default_glyph_advance(const void* face_ptr, gr_uint16 glyphid);
+
+ Face(const void* appFaceHandle/*non-NULL*/, const gr_face_ops & ops);
+ virtual ~Face();
+
+ virtual bool runGraphite(Segment *seg, const Silf *silf) const;
+
+public:
+ bool readGlyphs(uint32 faceOptions);
+ bool readGraphite(const Table & silf);
+ bool readFeatures();
+ void takeFileFace(FileFace* pFileFace/*takes ownership*/);
+
+ const SillMap & theSill() const;
+ const GlyphCache & glyphs() const;
+ Cmap & cmap() const;
+ NameTable * nameTable() const;
+ void setLogger(FILE *log_file);
+ json * logger() const throw();
+
+ const Silf * chooseSilf(uint32 script) const;
+ uint16 languageForLocale(const char * locale) const;
+
+ // Features
+ uint16 numFeatures() const;
+ const FeatureRef * featureById(uint32 id) const;
+ const FeatureRef * feature(uint16 index) const;
+
+ // Glyph related
+ int32 getGlyphMetric(uint16 gid, uint8 metric) const;
+ uint16 findPseudo(uint32 uid) const;
+
+ // Errors
+ unsigned int error() const { return m_error; }
+ bool error(Error e) { m_error = e.error(); return false; }
+ unsigned int error_context() const { return m_error; }
+ void error_context(unsigned int errcntxt) { m_errcntxt = errcntxt; }
+
+ CLASS_NEW_DELETE;
+private:
+ SillMap m_Sill;
+ gr_face_ops m_ops;
+ const void * m_appFaceHandle; // non-NULL
+ FileFace * m_pFileFace; //owned
+ mutable GlyphCache * m_pGlyphFaceCache; // owned - never NULL
+ mutable Cmap * m_cmap; // cmap cache if available
+ mutable NameTable * m_pNames;
+ mutable json * m_logger;
+ unsigned int m_error;
+ unsigned int m_errcntxt;
+protected:
+ Silf * m_silfs; // silf subtables.
+ uint16 m_numSilf; // num silf subtables in the silf table
+private:
+ uint16 m_ascent,
+ m_descent;
+#ifdef GRAPHITE2_TELEMETRY
+public:
+ mutable telemetry tele;
+#endif
+};
+
+
+
+inline
+const SillMap & Face::theSill() const
+{
+ return m_Sill;
+}
+
+inline
+uint16 Face::numFeatures() const
+{
+ return m_Sill.theFeatureMap().numFeats();
+}
+
+inline
+const FeatureRef * Face::featureById(uint32 id) const
+{
+ return m_Sill.theFeatureMap().findFeatureRef(id);
+}
+
+inline
+const FeatureRef *Face::feature(uint16 index) const
+{
+ return m_Sill.theFeatureMap().feature(index);
+}
+
+inline
+const GlyphCache & Face::glyphs() const
+{
+ return *m_pGlyphFaceCache;
+}
+
+inline
+Cmap & Face::cmap() const
+{
+ return *m_cmap;
+};
+
+inline
+json * Face::logger() const throw()
+{
+ return m_logger;
+}
+
+
+
+class Face::Table
+{
+ const Face * _f;
+ mutable const byte * _p;
+ size_t _sz;
+ bool _compressed;
+
+ Error decompress();
+
+ void release();
+
+public:
+ Table() throw();
+ Table(const Face & face, const Tag n, uint32 version=0xffffffff) throw();
+ ~Table() throw();
+ Table(const Table && rhs) throw();
+
+ operator const byte * () const throw();
+
+ size_t size() const throw();
+ Table & operator = (const Table && rhs) throw();
+};
+
+inline
+Face::Table::Table() throw()
+: _f(0), _p(0), _sz(0), _compressed(false)
+{
+}
+
+inline
+Face::Table::Table(const Table && rhs) throw()
+: _f(rhs._f), _p(rhs._p), _sz(rhs._sz), _compressed(rhs._compressed)
+{
+ rhs._p = 0;
+}
+
+inline
+Face::Table::~Table() throw()
+{
+ release();
+}
+
+inline
+Face::Table::operator const byte * () const throw()
+{
+ return _p;
+}
+
+inline
+size_t Face::Table::size() const throw()
+{
+ return _sz;
+}
+
+} // namespace graphite2
+
+struct gr_face : public graphite2::Face {};
diff --git a/thirdparty/graphite/src/inc/FeatureMap.h b/thirdparty/graphite/src/inc/FeatureMap.h
new file mode 100644
index 0000000000..0f05e941a2
--- /dev/null
+++ b/thirdparty/graphite/src/inc/FeatureMap.h
@@ -0,0 +1,198 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+#include "inc/Main.h"
+#include "inc/FeatureVal.h"
+
+namespace graphite2 {
+
+// Forward declarations for implmentation types
+class FeatureMap;
+class Face;
+
+
+class FeatureSetting
+{
+public:
+ FeatureSetting(int16 theValue, uint16 labelId) : m_label(labelId), m_value(theValue) {};
+ uint16 label() const { return m_label; }
+ int16 value() const { return m_value; }
+
+ CLASS_NEW_DELETE;
+private:
+ FeatureSetting(const FeatureSetting & fs) : m_label(fs.m_label), m_value(fs.m_value) {};
+
+ uint16 m_label;
+ int16 m_value;
+};
+
+class FeatureRef
+{
+ typedef uint32 chunk_t;
+ static const uint8 SIZEOF_CHUNK = sizeof(chunk_t)*8;
+
+public:
+ enum flags_t : uint16 {
+ HIDDEN = 0x0800
+ };
+ FeatureRef() throw();
+ FeatureRef(const Face & face, unsigned short & bits_offset, uint32 max_val,
+ uint32 name, uint16 uiName, flags_t flags,
+ FeatureSetting *settings, uint16 num_set) throw();
+ ~FeatureRef() throw();
+
+ bool applyValToFeature(uint32 val, Features& pDest) const; //defined in GrFaceImp.h
+ void maskFeature(Features & pDest) const {
+ if (m_index < pDest.size()) //defensive
+ pDest[m_index] |= m_mask;
+ }
+
+ uint32 getFeatureVal(const Features& feats) const; //defined in GrFaceImp.h
+
+ uint32 getId() const { return m_id; }
+ uint16 getNameId() const { return m_nameid; }
+ uint16 getNumSettings() const { return m_numSet; }
+ uint16 getSettingName(uint16 index) const { return m_nameValues[index].label(); }
+ int16 getSettingValue(uint16 index) const { return m_nameValues[index].value(); }
+ flags_t getFlags() const { return m_flags; }
+ uint32 maxVal() const { return m_max; }
+ const Face & getFace() const { assert(m_face); return *m_face;}
+ const FeatureMap* getFeatureMap() const;// { return m_pFace;}
+
+ CLASS_NEW_DELETE;
+private:
+ FeatureRef(const FeatureRef & rhs);
+
+ const Face * m_face;
+ FeatureSetting * m_nameValues; // array of name table ids for feature values
+ chunk_t m_mask, // bit mask to get the value from the vector
+ m_max; // max value the value can take
+ uint32 m_id; // feature identifier/name
+ uint16 m_nameid, // Name table id for feature name
+ m_numSet; // number of values (number of entries in m_nameValues)
+ flags_t m_flags; // feature flags see FeatureRef::flags_t.
+ byte m_bits, // how many bits to shift the value into place
+ m_index; // index into the array to find the ulong to mask
+
+private: //unimplemented
+ FeatureRef& operator=(const FeatureRef&);
+};
+
+inline
+FeatureRef::FeatureRef() throw()
+: m_face(0),
+ m_nameValues(0),
+ m_mask(0), m_max(0),
+ m_id(0), m_nameid(0), m_numSet(0),
+ m_flags(flags_t(0)),
+ m_bits(0), m_index(0)
+{
+}
+
+
+class NameAndFeatureRef
+{
+ public:
+ NameAndFeatureRef(uint32 name = 0) : m_name(name) , m_pFRef(NULL){}
+ NameAndFeatureRef(FeatureRef const & p) : m_name(p.getId()), m_pFRef(&p) {}
+
+ bool operator<(const NameAndFeatureRef& rhs) const //orders by m_name
+ { return m_name<rhs.m_name; }
+
+ CLASS_NEW_DELETE
+
+ uint32 m_name;
+ const FeatureRef* m_pFRef;
+};
+
+class FeatureMap
+{
+public:
+ FeatureMap() : m_numFeats(0), m_feats(NULL), m_pNamedFeats(NULL) {}
+ ~FeatureMap() { delete[] m_feats; delete[] m_pNamedFeats; }
+
+ bool readFeats(const Face & face);
+ const FeatureRef *findFeatureRef(uint32 name) const;
+ FeatureRef *feature(uint16 index) const { return m_feats + index; }
+ //GrFeatureRef *featureRef(byte index) { return index < m_numFeats ? m_feats + index : NULL; }
+ const FeatureRef *featureRef(byte index) const { return index < m_numFeats ? m_feats + index : NULL; }
+ FeatureVal* cloneFeatures(uint32 langname/*0 means default*/) const; //call destroy_Features when done.
+ uint16 numFeats() const { return m_numFeats; };
+ CLASS_NEW_DELETE
+private:
+friend class SillMap;
+ uint16 m_numFeats;
+
+ FeatureRef *m_feats;
+ NameAndFeatureRef* m_pNamedFeats; //owned
+ FeatureVal m_defaultFeatures; //owned
+
+private: //defensive on m_feats, m_pNamedFeats, and m_defaultFeatures
+ FeatureMap(const FeatureMap&);
+ FeatureMap& operator=(const FeatureMap&);
+};
+
+
+class SillMap
+{
+private:
+ class LangFeaturePair
+ {
+ LangFeaturePair(const LangFeaturePair &);
+ LangFeaturePair & operator = (const LangFeaturePair &);
+
+ public:
+ LangFeaturePair() : m_lang(0), m_pFeatures(0) {}
+ ~LangFeaturePair() { delete m_pFeatures; }
+
+ uint32 m_lang;
+ Features* m_pFeatures; //owns
+ CLASS_NEW_DELETE
+ };
+public:
+ SillMap() : m_langFeats(NULL), m_numLanguages(0) {}
+ ~SillMap() { delete[] m_langFeats; }
+ bool readFace(const Face & face);
+ bool readSill(const Face & face);
+ FeatureVal* cloneFeatures(uint32 langname/*0 means default*/) const; //call destroy_Features when done.
+ uint16 numLanguages() const { return m_numLanguages; };
+ uint32 getLangName(uint16 index) const { return (index < m_numLanguages)? m_langFeats[index].m_lang : 0; };
+
+ const FeatureMap & theFeatureMap() const { return m_FeatureMap; };
+private:
+ FeatureMap m_FeatureMap; //of face
+ LangFeaturePair * m_langFeats;
+ uint16 m_numLanguages;
+
+private: //defensive on m_langFeats
+ SillMap(const SillMap&);
+ SillMap& operator=(const SillMap&);
+};
+
+} // namespace graphite2
+
+struct gr_feature_ref : public graphite2::FeatureRef {};
diff --git a/thirdparty/graphite/src/inc/FeatureVal.h b/thirdparty/graphite/src/inc/FeatureVal.h
new file mode 100644
index 0000000000..cd3f93b2b5
--- /dev/null
+++ b/thirdparty/graphite/src/inc/FeatureVal.h
@@ -0,0 +1,68 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+#include <cstring>
+#include <cassert>
+#include "inc/Main.h"
+#include "inc/List.h"
+
+namespace graphite2 {
+
+class FeatureRef;
+class FeatureMap;
+
+class FeatureVal : public Vector<uint32>
+{
+public:
+ FeatureVal() : m_pMap(0) { }
+ FeatureVal(int num, const FeatureMap & pMap) : Vector<uint32>(num), m_pMap(&pMap) {}
+ FeatureVal(const FeatureVal & rhs) : Vector<uint32>(rhs), m_pMap(rhs.m_pMap) {}
+
+ FeatureVal & operator = (const FeatureVal & rhs) { Vector<uint32>::operator = (rhs); m_pMap = rhs.m_pMap; return *this; }
+
+ bool operator ==(const FeatureVal & b) const
+ {
+ size_t n = size();
+ if (n != b.size()) return false;
+
+ for(const_iterator l = begin(), r = b.begin(); n && *l == *r; --n, ++l, ++r);
+
+ return n == 0;
+ }
+
+ CLASS_NEW_DELETE
+private:
+ friend class FeatureRef; //so that FeatureRefs can manipulate m_vec directly
+ const FeatureMap* m_pMap;
+};
+
+typedef FeatureVal Features;
+
+} // namespace graphite2
+
+
+struct gr_feature_val : public graphite2::FeatureVal {};
diff --git a/thirdparty/graphite/src/inc/FileFace.h b/thirdparty/graphite/src/inc/FileFace.h
new file mode 100644
index 0000000000..35927847f8
--- /dev/null
+++ b/thirdparty/graphite/src/inc/FileFace.h
@@ -0,0 +1,80 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2012, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+
+//#include "inc/FeatureMap.h"
+//#include "inc/GlyphsCache.h"
+//#include "inc/Silf.h"
+
+#ifndef GRAPHITE2_NFILEFACE
+
+#include <cstdio>
+#include <cassert>
+
+#include "graphite2/Font.h"
+
+#include "inc/Main.h"
+#include "inc/TtfTypes.h"
+#include "inc/TtfUtil.h"
+
+namespace graphite2 {
+
+
+class FileFace
+{
+ static const void * get_table_fn(const void* appFaceHandle, unsigned int name, size_t *len);
+ static void rel_table_fn(const void* appFaceHandle, const void *table_buffer);
+
+public:
+ static const gr_face_ops ops;
+
+ FileFace(const char *filename);
+ ~FileFace();
+
+ operator bool () const throw();
+ CLASS_NEW_DELETE;
+
+private: //defensive
+ FILE * _file;
+ size_t _file_len;
+
+ TtfUtil::Sfnt::OffsetSubTable * _header_tbl;
+ TtfUtil::Sfnt::OffsetSubTable::Entry * _table_dir;
+
+ FileFace(const FileFace&);
+ FileFace& operator=(const FileFace&);
+};
+
+inline
+FileFace::operator bool() const throw()
+{
+ return _file && _header_tbl && _table_dir;
+}
+
+} // namespace graphite2
+
+#endif //!GRAPHITE2_NFILEFACE
diff --git a/thirdparty/graphite/src/inc/Font.h b/thirdparty/graphite/src/inc/Font.h
new file mode 100644
index 0000000000..9bc9ffb510
--- /dev/null
+++ b/thirdparty/graphite/src/inc/Font.h
@@ -0,0 +1,90 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+#include <cassert>
+#include "graphite2/Font.h"
+#include "inc/Main.h"
+#include "inc/Face.h"
+
+namespace graphite2 {
+
+#define INVALID_ADVANCE -1e38f // can't be a static const because non-integral
+
+class Font
+{
+public:
+ Font(float ppm, const Face & face, const void * appFontHandle=0, const gr_font_ops * ops=0);
+ virtual ~Font();
+
+ float advance(unsigned short glyphid) const;
+ float scale() const;
+ bool isHinted() const;
+ const Face & face() const;
+ operator bool () const throw() { return m_advances; }
+
+ CLASS_NEW_DELETE;
+private:
+ gr_font_ops m_ops;
+ const void * const m_appFontHandle;
+ float * m_advances; // One advance per glyph in pixels. Nan if not defined
+ const Face & m_face;
+ float m_scale; // scales from design units to ppm
+ bool m_hinted;
+
+ Font(const Font&);
+ Font& operator=(const Font&);
+};
+
+inline
+float Font::advance(unsigned short glyphid) const
+{
+ if (m_advances[glyphid] == INVALID_ADVANCE)
+ m_advances[glyphid] = (*m_ops.glyph_advance_x)(m_appFontHandle, glyphid);
+ return m_advances[glyphid];
+}
+
+inline
+float Font::scale() const
+{
+ return m_scale;
+}
+
+inline
+bool Font::isHinted() const
+{
+ return m_hinted;
+}
+
+inline
+const Face & Font::face() const
+{
+ return m_face;
+}
+
+} // namespace graphite2
+
+struct gr_font : public graphite2::Font {};
diff --git a/thirdparty/graphite/src/inc/GlyphCache.h b/thirdparty/graphite/src/inc/GlyphCache.h
new file mode 100644
index 0000000000..7d5324e522
--- /dev/null
+++ b/thirdparty/graphite/src/inc/GlyphCache.h
@@ -0,0 +1,223 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2012, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+
+
+#include "graphite2/Font.h"
+#include "inc/Main.h"
+#include "inc/Position.h"
+#include "inc/GlyphFace.h"
+
+namespace graphite2 {
+
+class Face;
+class FeatureVal;
+class Segment;
+
+
+struct SlantBox
+{
+ static const SlantBox empty;
+
+// SlantBox(float psi = 0., float pdi = 0., float psa = 0., float pda = 0.) : si(psi), di(pdi), sa(psa), da(pda) {};
+ float width() const { return sa - si; }
+ float height() const { return da - di; }
+ float si; // min
+ float di; // min
+ float sa; // max
+ float da; // max
+};
+
+
+struct BBox
+{
+ BBox(float pxi = 0, float pyi = 0., float pxa = 0., float pya = 0.) : xi(pxi), yi(pyi), xa(pxa), ya(pya) {};
+ float width() const { return xa - xi; }
+ float height() const { return ya - yi; }
+ float xi; // min
+ float yi; // min
+ float xa; // max
+ float ya; // max
+};
+
+
+class GlyphBox
+{
+ GlyphBox(const GlyphBox &);
+ GlyphBox & operator = (const GlyphBox &);
+
+public:
+ GlyphBox(uint8 numsubs, unsigned short bitmap, Rect *slanted) : _num(numsubs), _bitmap(bitmap), _slant(*slanted) {};
+
+ void addSubBox(int subindex, int boundary, Rect *val) { _subs[subindex * 2 + boundary] = *val; }
+ Rect &subVal(int subindex, int boundary) { return _subs[subindex * 2 + boundary]; }
+ const Rect &slant() const { return _slant; }
+ uint8 num() const { return _num; }
+ const Rect *subs() const { return _subs; }
+
+private:
+ uint8 _num;
+ unsigned short _bitmap;
+ Rect _slant;
+ Rect _subs[1];
+};
+
+class GlyphCache
+{
+ class Loader;
+
+ GlyphCache(const GlyphCache&);
+ GlyphCache& operator=(const GlyphCache&);
+
+public:
+ GlyphCache(const Face & face, const uint32 face_options);
+ ~GlyphCache();
+
+ unsigned short numGlyphs() const throw();
+ unsigned short numAttrs() const throw();
+ unsigned short unitsPerEm() const throw();
+
+ const GlyphFace *glyph(unsigned short glyphid) const; //result may be changed by subsequent call with a different glyphid
+ const GlyphFace *glyphSafe(unsigned short glyphid) const;
+ float getBoundingMetric(unsigned short glyphid, uint8 metric) const;
+ uint8 numSubBounds(unsigned short glyphid) const;
+ float getSubBoundingMetric(unsigned short glyphid, uint8 subindex, uint8 metric) const;
+ const Rect & slant(unsigned short glyphid) const { return _boxes[glyphid] ? _boxes[glyphid]->slant() : _empty_slant_box; }
+ const SlantBox & getBoundingSlantBox(unsigned short glyphid) const;
+ const BBox & getBoundingBBox(unsigned short glyphid) const;
+ const SlantBox & getSubBoundingSlantBox(unsigned short glyphid, uint8 subindex) const;
+ const BBox & getSubBoundingBBox(unsigned short glyphid, uint8 subindex) const;
+ bool check(unsigned short glyphid) const;
+ bool hasBoxes() const { return _boxes != 0; }
+
+ CLASS_NEW_DELETE;
+
+private:
+ const Rect _empty_slant_box;
+ const Loader * _glyph_loader;
+ const GlyphFace * * _glyphs;
+ GlyphBox * * _boxes;
+ unsigned short _num_glyphs,
+ _num_attrs,
+ _upem;
+};
+
+inline
+unsigned short GlyphCache::numGlyphs() const throw()
+{
+ return _num_glyphs;
+}
+
+inline
+unsigned short GlyphCache::numAttrs() const throw()
+{
+ return _num_attrs;
+}
+
+inline
+unsigned short GlyphCache::unitsPerEm() const throw()
+{
+ return _upem;
+}
+
+inline
+bool GlyphCache::check(unsigned short glyphid) const
+{
+ return _boxes && glyphid < _num_glyphs;
+}
+
+inline
+const GlyphFace *GlyphCache::glyphSafe(unsigned short glyphid) const
+{
+ return glyphid < _num_glyphs ? glyph(glyphid) : NULL;
+}
+
+inline
+float GlyphCache::getBoundingMetric(unsigned short glyphid, uint8 metric) const
+{
+ if (glyphid >= _num_glyphs) return 0.;
+ switch (metric) {
+ case 0: return (float)(glyph(glyphid)->theBBox().bl.x); // x_min
+ case 1: return (float)(glyph(glyphid)->theBBox().bl.y); // y_min
+ case 2: return (float)(glyph(glyphid)->theBBox().tr.x); // x_max
+ case 3: return (float)(glyph(glyphid)->theBBox().tr.y); // y_max
+ case 4: return (float)(_boxes[glyphid] ? _boxes[glyphid]->slant().bl.x : 0.f); // sum_min
+ case 5: return (float)(_boxes[glyphid] ? _boxes[glyphid]->slant().bl.y : 0.f); // diff_min
+ case 6: return (float)(_boxes[glyphid] ? _boxes[glyphid]->slant().tr.x : 0.f); // sum_max
+ case 7: return (float)(_boxes[glyphid] ? _boxes[glyphid]->slant().tr.y : 0.f); // diff_max
+ default: return 0.;
+ }
+}
+
+inline const SlantBox &GlyphCache::getBoundingSlantBox(unsigned short glyphid) const
+{
+ return _boxes[glyphid] ? *(SlantBox *)(&(_boxes[glyphid]->slant())) : SlantBox::empty;
+}
+
+inline const BBox &GlyphCache::getBoundingBBox(unsigned short glyphid) const
+{
+ return *(BBox *)(&(glyph(glyphid)->theBBox()));
+}
+
+inline
+float GlyphCache::getSubBoundingMetric(unsigned short glyphid, uint8 subindex, uint8 metric) const
+{
+ GlyphBox *b = _boxes[glyphid];
+ if (b == NULL || subindex >= b->num()) return 0;
+
+ switch (metric) {
+ case 0: return b->subVal(subindex, 0).bl.x;
+ case 1: return b->subVal(subindex, 0).bl.y;
+ case 2: return b->subVal(subindex, 0).tr.x;
+ case 3: return b->subVal(subindex, 0).tr.y;
+ case 4: return b->subVal(subindex, 1).bl.x;
+ case 5: return b->subVal(subindex, 1).bl.y;
+ case 6: return b->subVal(subindex, 1).tr.x;
+ case 7: return b->subVal(subindex, 1).tr.y;
+ default: return 0.;
+ }
+}
+
+inline const SlantBox &GlyphCache::getSubBoundingSlantBox(unsigned short glyphid, uint8 subindex) const
+{
+ GlyphBox *b = _boxes[glyphid];
+ return *(SlantBox *)(b->subs() + 2 * subindex + 1);
+}
+
+inline const BBox &GlyphCache::getSubBoundingBBox(unsigned short glyphid, uint8 subindex) const
+{
+ GlyphBox *b = _boxes[glyphid];
+ return *(BBox *)(b->subs() + 2 * subindex);
+}
+
+inline
+uint8 GlyphCache::numSubBounds(unsigned short glyphid) const
+{
+ return _boxes[glyphid] ? _boxes[glyphid]->num() : 0;
+}
+
+} // namespace graphite2
diff --git a/thirdparty/graphite/src/inc/GlyphFace.h b/thirdparty/graphite/src/inc/GlyphFace.h
new file mode 100644
index 0000000000..fc29056146
--- /dev/null
+++ b/thirdparty/graphite/src/inc/GlyphFace.h
@@ -0,0 +1,83 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+
+#include "inc/Main.h"
+#include "inc/Position.h"
+#include "inc/Sparse.h"
+
+namespace graphite2 {
+
+enum metrics {
+ kgmetLsb = 0, kgmetRsb,
+ kgmetBbTop, kgmetBbBottom, kgmetBbLeft, kgmetBbRight,
+ kgmetBbHeight, kgmetBbWidth,
+ kgmetAdvWidth, kgmetAdvHeight,
+ kgmetAscent, kgmetDescent
+};
+
+
+class GlyphFace
+{
+public:
+ GlyphFace();
+ template<typename I>
+ GlyphFace(const Rect & bbox, const Position & adv, I first, const I last);
+
+ const Position & theAdvance() const;
+ const Rect & theBBox() const { return m_bbox; }
+ const sparse & attrs() const { return m_attrs; }
+ int32 getMetric(uint8 metric) const;
+
+ CLASS_NEW_DELETE;
+private:
+ Rect m_bbox; // bounding box metrics in design units
+ Position m_advance; // Advance width and height in design units
+ sparse m_attrs;
+};
+
+
+// Inlines: class GlyphFace
+//
+inline
+GlyphFace::GlyphFace()
+{}
+
+template<typename I>
+GlyphFace::GlyphFace(const Rect & bbox, const Position & adv, I first, const I last)
+: m_bbox(bbox),
+ m_advance(adv),
+ m_attrs(first, last)
+{
+}
+
+inline
+const Position & GlyphFace::theAdvance() const {
+ return m_advance;
+}
+
+} // namespace graphite2
diff --git a/thirdparty/graphite/src/inc/Intervals.h b/thirdparty/graphite/src/inc/Intervals.h
new file mode 100644
index 0000000000..81d23187b6
--- /dev/null
+++ b/thirdparty/graphite/src/inc/Intervals.h
@@ -0,0 +1,234 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+
+#include <utility>
+
+#include "inc/Main.h"
+#include "inc/List.h"
+#include "inc/json.h"
+#include "inc/Position.h"
+
+// An IntervalSet represents the possible movement of a given glyph in a given direction
+// (horizontally, vertically, or diagonally).
+// A vector is needed to represent disjoint ranges, eg, -300..-150, 20..200, 500..750.
+// Each pair represents the min/max of a sub-range.
+
+namespace graphite2 {
+
+class Segment;
+
+enum zones_t {SD, XY};
+
+class Zones
+{
+ struct Exclusion
+ {
+ template<zones_t O>
+ static Exclusion weighted(float xmin, float xmax, float f, float a0,
+ float m, float xi, float ai, float c, bool nega);
+
+ float x, // x position
+ xm, // xmax position
+ c, // constant + sum(MiXi^2)
+ sm, // sum(Mi)
+ smx; // sum(MiXi)
+ bool open;
+
+ Exclusion(float x, float w, float smi, float smxi, float c);
+ Exclusion & operator += (Exclusion const & rhs);
+ uint8 outcode(float p) const;
+
+ Exclusion split_at(float p);
+ void left_trim(float p);
+
+ bool track_cost(float & cost, float & x, float origin) const;
+
+ private:
+ float test_position(float origin) const;
+ float cost(float x) const;
+ };
+
+ typedef Vector<Exclusion> exclusions;
+
+ typedef exclusions::iterator iterator;
+ typedef Exclusion * pointer;
+ typedef Exclusion & reference;
+ typedef std::reverse_iterator<iterator> reverse_iterator;
+
+public:
+ typedef exclusions::const_iterator const_iterator;
+ typedef Exclusion const * const_pointer;
+ typedef Exclusion const & const_reference;
+ typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+
+#if !defined GRAPHITE2_NTRACING
+ struct Debug
+ {
+ Exclusion _excl;
+ bool _isdel;
+ Vector<void *> _env;
+
+ Debug(Exclusion *e, bool isdel, json *dbg) : _excl(*e), _isdel(isdel), _env(dbg->getenvs()) { };
+ };
+
+ typedef Vector<Debug> debugs;
+ typedef debugs::const_iterator idebugs;
+ void addDebug(Exclusion *e);
+ void removeDebug(float pos, float posm);
+ void setdebug(json *dbgout) { _dbg = dbgout; }
+ idebugs dbgs_begin() const { return _dbgs.begin(); }
+ idebugs dbgs_end() const { return _dbgs.end(); }
+ void jsonDbgOut(Segment *seg) const;
+ Position position() const { return Position(_pos, _posm); }
+#endif
+
+ Zones();
+ template<zones_t O>
+ void initialise(float xmin, float xmax, float margin_len, float margin_weight, float ao);
+
+ void exclude(float xmin, float xmax);
+ void exclude_with_margins(float xmin, float xmax, int axis);
+
+ template<zones_t O>
+ void weighted(float xmin, float xmax, float f, float a0, float mi, float xi, float ai, float c, bool nega);
+ void weightedAxis(int axis, float xmin, float xmax, float f, float a0, float mi, float xi, float ai, float c, bool nega);
+
+ float closest( float origin, float &cost) const;
+
+ const_iterator begin() const { return _exclusions.begin(); }
+ const_iterator end() const { return _exclusions.end(); }
+
+private:
+ exclusions _exclusions;
+#if !defined GRAPHITE2_NTRACING
+ json * _dbg;
+ debugs _dbgs;
+#endif
+ float _margin_len,
+ _margin_weight,
+ _pos,
+ _posm;
+
+ void insert(Exclusion e);
+ void remove(float x, float xm);
+ const_iterator find_exclusion_under(float x) const;
+};
+
+
+inline
+Zones::Zones()
+: _margin_len(0), _margin_weight(0), _pos(0), _posm(0)
+{
+#if !defined GRAPHITE2_NTRACING
+ _dbg = 0;
+#endif
+ _exclusions.reserve(8);
+}
+
+inline
+Zones::Exclusion::Exclusion(float x_, float xm_, float smi, float smxi, float c_)
+: x(x_), xm(xm_), c(c_), sm(smi), smx(smxi), open(false)
+{ }
+
+template<zones_t O>
+inline
+void Zones::initialise(float xmin, float xmax, float margin_len,
+ float margin_weight, float a0)
+{
+ _margin_len = margin_len;
+ _margin_weight = margin_weight;
+ _pos = xmin;
+ _posm = xmax;
+ _exclusions.clear();
+ _exclusions.push_back(Exclusion::weighted<O>(xmin, xmax, 1, a0, 0, 0, 0, 0, false));
+ _exclusions.front().open = true;
+#if !defined GRAPHITE2_NTRACING
+ _dbgs.clear();
+#endif
+}
+
+inline
+void Zones::exclude(float xmin, float xmax) {
+ remove(xmin, xmax);
+}
+
+template<zones_t O>
+inline
+void Zones::weighted(float xmin, float xmax, float f, float a0,
+ float m, float xi, float ai, float c, bool nega) {
+ insert(Exclusion::weighted<O>(xmin, xmax, f, a0, m, xi, ai, c, nega));
+}
+
+inline
+void Zones::weightedAxis(int axis, float xmin, float xmax, float f, float a0,
+ float m, float xi, float ai, float c, bool nega) {
+ if (axis < 2)
+ weighted<XY>(xmin, xmax, f, a0, m, xi, ai, c, nega);
+ else
+ weighted<SD>(xmin, xmax, f, a0, m, xi, ai, c, nega);
+}
+
+#if !defined GRAPHITE2_NTRACING
+inline
+void Zones::addDebug(Exclusion *e) {
+ if (_dbg)
+ _dbgs.push_back(Debug(e, false, _dbg));
+}
+
+inline
+void Zones::removeDebug(float pos, float posm) {
+ if (_dbg)
+ {
+ Exclusion e(pos, posm, 0, 0, 0);
+ _dbgs.push_back(Debug(&e, true, _dbg));
+ }
+}
+#endif
+
+template<>
+inline
+Zones::Exclusion Zones::Exclusion::weighted<XY>(float xmin, float xmax, float f, float a0,
+ float m, float xi, GR_MAYBE_UNUSED float ai, float c, GR_MAYBE_UNUSED bool nega) {
+ return Exclusion(xmin, xmax,
+ m + f,
+ m * xi,
+ m * xi * xi + f * a0 * a0 + c);
+}
+
+template<>
+inline
+Zones::Exclusion Zones::Exclusion::weighted<SD>(float xmin, float xmax, float f, float a0,
+ float m, float xi, float ai,float c, bool nega) {
+ float xia = nega ? xi - ai : xi + ai;
+ return Exclusion(xmin, xmax,
+ 0.25f * (m + 2.f * f),
+ 0.25f * m * xia,
+ 0.25f * (m * xia * xia + 2.f * f * a0 * a0) + c);
+}
+
+} // end of namespace graphite2
diff --git a/thirdparty/graphite/src/inc/List.h b/thirdparty/graphite/src/inc/List.h
new file mode 100644
index 0000000000..a3b7a77961
--- /dev/null
+++ b/thirdparty/graphite/src/inc/List.h
@@ -0,0 +1,168 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+
+// designed to have a limited subset of the std::vector api
+#pragma once
+
+#include <cstddef>
+#include <cassert>
+#include <cstring>
+#include <cstdlib>
+#include <new>
+
+#include "Main.h"
+
+namespace graphite2 {
+
+template <typename T>
+inline
+ptrdiff_t distance(T* first, T* last) { return last-first; }
+
+
+template <typename T>
+class Vector
+{
+ T * m_first, *m_last, *m_end;
+public:
+ typedef T & reference;
+ typedef const T & const_reference;
+ typedef T * iterator;
+ typedef const T * const_iterator;
+
+ Vector() : m_first(0), m_last(0), m_end(0) {}
+ Vector(size_t n, const T& value = T()) : m_first(0), m_last(0), m_end(0) { insert(begin(), n, value); }
+ Vector(const Vector<T> &rhs) : m_first(0), m_last(0), m_end(0) { insert(begin(), rhs.begin(), rhs.end()); }
+ template <typename I>
+ Vector(I first, const I last) : m_first(0), m_last(0), m_end(0) { insert(begin(), first, last); }
+ ~Vector() { clear(); free(m_first); }
+
+ iterator begin() { return m_first; }
+ const_iterator begin() const { return m_first; }
+
+ iterator end() { return m_last; }
+ const_iterator end() const { return m_last; }
+
+ bool empty() const { return m_first == m_last; }
+ size_t size() const { return m_last - m_first; }
+ size_t capacity() const{ return m_end - m_first; }
+
+ void reserve(size_t n);
+ void resize(size_t n, const T & v = T());
+
+ reference front() { assert(size() > 0); return *begin(); }
+ const_reference front() const { assert(size() > 0); return *begin(); }
+ reference back() { assert(size() > 0); return *(end()-1); }
+ const_reference back() const { assert(size() > 0); return *(end()-1); }
+
+ Vector<T> & operator = (const Vector<T> & rhs) { assign(rhs.begin(), rhs.end()); return *this; }
+ reference operator [] (size_t n) { assert(size() > n); return m_first[n]; }
+ const_reference operator [] (size_t n) const { assert(size() > n); return m_first[n]; }
+
+ void assign(size_t n, const T& u) { clear(); insert(begin(), n, u); }
+ void assign(const_iterator first, const_iterator last) { clear(); insert(begin(), first, last); }
+ iterator insert(iterator p, const T & x) { p = _insert_default(p, 1); new (p) T(x); return p; }
+ void insert(iterator p, size_t n, const T & x);
+ void insert(iterator p, const_iterator first, const_iterator last);
+ void pop_back() { assert(size() > 0); --m_last; }
+ void push_back(const T &v) { if (m_last == m_end) reserve(size()+1); new (m_last++) T(v); }
+
+ void clear() { erase(begin(), end()); }
+ iterator erase(iterator p) { return erase(p, p+1); }
+ iterator erase(iterator first, iterator last);
+
+private:
+ iterator _insert_default(iterator p, size_t n);
+};
+
+template <typename T>
+inline
+void Vector<T>::reserve(size_t n)
+{
+ if (n > capacity())
+ {
+ const ptrdiff_t sz = size();
+ size_t requested;
+ if (checked_mul(n,sizeof(T), requested)) std::abort();
+ m_first = static_cast<T*>(realloc(m_first, requested));
+ if (!m_first) std::abort();
+ m_last = m_first + sz;
+ m_end = m_first + n;
+ }
+}
+
+template <typename T>
+inline
+void Vector<T>::resize(size_t n, const T & v) {
+ const ptrdiff_t d = n-size();
+ if (d < 0) erase(end()+d, end());
+ else if (d > 0) insert(end(), d, v);
+}
+
+template<typename T>
+inline
+typename Vector<T>::iterator Vector<T>::_insert_default(iterator p, size_t n)
+{
+ assert(begin() <= p && p <= end());
+ const ptrdiff_t i = p - begin();
+ reserve(((size() + n + 7) >> 3) << 3);
+ p = begin() + i;
+ // Move tail if there is one
+ if (p != end()) memmove(p + n, p, distance(p,end())*sizeof(T));
+ m_last += n;
+ return p;
+}
+
+template<typename T>
+inline
+void Vector<T>::insert(iterator p, size_t n, const T & x)
+{
+ p = _insert_default(p, n);
+ // Copy in elements
+ for (; n; --n, ++p) { new (p) T(x); }
+}
+
+template<typename T>
+inline
+void Vector<T>::insert(iterator p, const_iterator first, const_iterator last)
+{
+ p = _insert_default(p, distance(first, last));
+ // Copy in elements
+ for (;first != last; ++first, ++p) { new (p) T(*first); }
+}
+
+template<typename T>
+inline
+typename Vector<T>::iterator Vector<T>::erase(iterator first, iterator last)
+{
+ for (iterator e = first; e != last; ++e) e->~T();
+ const size_t sz = distance(first, last);
+ if (m_last != last) memmove(first, last, distance(last,end())*sizeof(T));
+ m_last -= sz;
+ return first;
+}
+
+} // namespace graphite2
diff --git a/thirdparty/graphite/src/inc/Machine.h b/thirdparty/graphite/src/inc/Machine.h
new file mode 100644
index 0000000000..b23819fb98
--- /dev/null
+++ b/thirdparty/graphite/src/inc/Machine.h
@@ -0,0 +1,207 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+// This general interpreter interface.
+// Author: Tim Eves
+
+// Build one of direct_machine.cpp or call_machine.cpp to implement this
+// interface.
+
+#pragma once
+#include <cstring>
+#include <limits>
+#include <graphite2/Types.h>
+#include "inc/Main.h"
+
+#if defined(__GNUC__)
+#if defined(__clang__) || (__GNUC__ * 100 + __GNUC_MINOR__ * 10) < 430
+#define HOT
+#if defined(__x86_64)
+#define REGPARM(n) __attribute__((regparm(n)))
+#else
+#define REGPARM(n)
+#endif
+#else
+#define HOT __attribute__((hot))
+#if defined(__x86_64)
+#define REGPARM(n) __attribute__((hot, regparm(n)))
+#else
+#define REGPARM(n)
+#endif
+#endif
+#else
+#define HOT
+#define REGPARM(n)
+#endif
+
+#if defined(__MINGW32__)
+// MinGW's <limits> at some point includes winnt.h which #define's a
+// DELETE macro, which conflicts with enum opcode below, so we undefine
+// it here.
+#undef DELETE
+#endif
+
+namespace graphite2 {
+
+// Forward declarations
+class Segment;
+class Slot;
+class SlotMap;
+
+
+namespace vm
+{
+
+
+typedef void * instr;
+typedef Slot * slotref;
+
+enum {VARARGS = 0xff, MAX_NAME_LEN=32};
+
+enum opcode {
+ NOP = 0,
+
+ PUSH_BYTE, PUSH_BYTEU, PUSH_SHORT, PUSH_SHORTU, PUSH_LONG,
+
+ ADD, SUB, MUL, DIV,
+ MIN_, MAX_,
+ NEG,
+ TRUNC8, TRUNC16,
+
+ COND,
+
+ AND, OR, NOT,
+ EQUAL, NOT_EQ,
+ LESS, GTR, LESS_EQ, GTR_EQ,
+
+ NEXT, NEXT_N, COPY_NEXT,
+ PUT_GLYPH_8BIT_OBS, PUT_SUBS_8BIT_OBS, PUT_COPY,
+ INSERT, DELETE,
+ ASSOC,
+ CNTXT_ITEM,
+
+ ATTR_SET, ATTR_ADD, ATTR_SUB,
+ ATTR_SET_SLOT,
+ IATTR_SET_SLOT,
+ PUSH_SLOT_ATTR, PUSH_GLYPH_ATTR_OBS,
+ PUSH_GLYPH_METRIC, PUSH_FEAT,
+ PUSH_ATT_TO_GATTR_OBS, PUSH_ATT_TO_GLYPH_METRIC,
+ PUSH_ISLOT_ATTR,
+
+ PUSH_IGLYPH_ATTR, // not implemented
+
+ POP_RET, RET_ZERO, RET_TRUE,
+ IATTR_SET, IATTR_ADD, IATTR_SUB,
+ PUSH_PROC_STATE, PUSH_VERSION,
+ PUT_SUBS, PUT_SUBS2, PUT_SUBS3,
+ PUT_GLYPH, PUSH_GLYPH_ATTR, PUSH_ATT_TO_GLYPH_ATTR,
+ BITOR, BITAND, BITNOT,
+ BITSET, SET_FEAT,
+ MAX_OPCODE,
+ // private opcodes for internal use only, comes after all other on disk opcodes
+ TEMP_COPY = MAX_OPCODE
+};
+
+struct opcode_t
+{
+ instr impl[2];
+ uint8 param_sz;
+ char name[MAX_NAME_LEN];
+};
+
+
+class Machine
+{
+public:
+ typedef int32 stack_t;
+ static size_t const STACK_ORDER = 10,
+ STACK_MAX = 1 << STACK_ORDER,
+ STACK_GUARD = 2;
+
+ class Code;
+
+ enum status_t {
+ finished = 0,
+ stack_underflow,
+ stack_not_empty,
+ stack_overflow,
+ slot_offset_out_bounds,
+ died_early
+ };
+
+ Machine(SlotMap &) throw();
+ static const opcode_t * getOpcodeTable() throw();
+
+ CLASS_NEW_DELETE;
+
+ SlotMap & slotMap() const throw();
+ status_t status() const throw();
+// operator bool () const throw();
+
+private:
+ void check_final_stack(const stack_t * const sp);
+ stack_t run(const instr * program, const byte * data,
+ slotref * & map) HOT;
+
+ SlotMap & _map;
+ stack_t _stack[STACK_MAX + 2*STACK_GUARD];
+ status_t _status;
+};
+
+inline Machine::Machine(SlotMap & map) throw()
+: _map(map), _status(finished)
+{
+ // Initialise stack guard +1 entries as the stack pointer points to the
+ // current top of stack, hence the first push will never write entry 0.
+ // Initialising the guard space like this is unnecessary and is only
+ // done to keep valgrind happy during fuzz testing. Hopefully loop
+ // unrolling will flatten this.
+ for (size_t n = STACK_GUARD + 1; n; --n) _stack[n-1] = 0;
+}
+
+inline SlotMap& Machine::slotMap() const throw()
+{
+ return _map;
+}
+
+inline Machine::status_t Machine::status() const throw()
+{
+ return _status;
+}
+
+inline void Machine::check_final_stack(const stack_t * const sp)
+{
+ if (_status != finished) return;
+
+ stack_t const * const base = _stack + STACK_GUARD,
+ * const limit = base + STACK_MAX;
+ if (sp < base) _status = stack_underflow; // This should be impossible now.
+ else if (sp >= limit) _status = stack_overflow; // So should this.
+ else if (sp != base) _status = stack_not_empty;
+}
+
+} // namespace vm
+} // namespace graphite2
diff --git a/thirdparty/graphite/src/inc/Main.h b/thirdparty/graphite/src/inc/Main.h
new file mode 100644
index 0000000000..ebf02dd553
--- /dev/null
+++ b/thirdparty/graphite/src/inc/Main.h
@@ -0,0 +1,199 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+
+#include <cstdlib>
+#include "graphite2/Types.h"
+
+#ifdef GRAPHITE2_CUSTOM_HEADER
+#include GRAPHITE2_CUSTOM_HEADER
+#endif
+
+namespace graphite2 {
+
+typedef gr_uint8 uint8;
+typedef gr_uint8 byte;
+typedef gr_uint16 uint16;
+typedef gr_uint32 uint32;
+typedef gr_int8 int8;
+typedef gr_int16 int16;
+typedef gr_int32 int32;
+typedef size_t uintptr;
+
+#ifdef GRAPHITE2_TELEMETRY
+struct telemetry
+{
+ class category;
+
+ static size_t * _category;
+ static void set_category(size_t & t) throw() { _category = &t; }
+ static void stop() throw() { _category = 0; }
+ static void count_bytes(size_t n) throw() { if (_category) *_category += n; }
+
+ size_t misc,
+ silf,
+ glyph,
+ code,
+ states,
+ starts,
+ transitions;
+
+ telemetry() : misc(0), silf(0), glyph(0), code(0), states(0), starts(0), transitions(0) {}
+};
+
+class telemetry::category
+{
+ size_t * _prev;
+public:
+ category(size_t & t) : _prev(_category) { _category = &t; }
+ ~category() { _category = _prev; }
+};
+
+#else
+struct telemetry {};
+#endif
+
+// Checked multiplaction to catch overflow or underflow when allocating memory
+#if defined(__has_builtin)
+ #if __has_builtin(__builtin_mul_overflow)
+ #define HAVE_BUILTIN_OVERFLOW
+ #endif
+#elif defined(__GNUC__) && (__GNUC__ >= 5) && !defined(__INTEL_COMPILER)
+ #define HAVE_BUILTIN_OVERFLOW
+#endif
+#if defined(__has_include)
+ #if __has_include(<intsafe.h>) && !defined(__CYGWIN__)
+ #define HAVE_INTSAFE_H
+ #endif
+#elif defined(_WIN32)
+ #define HAVE_INTSAFE_H
+#endif
+
+// Need to import intsafe into the top level namespace
+#if defined(HAVE_INTSAFE_H)
+} // namespace graphite2
+
+#include <intsafe.h>
+
+namespace graphite2 {
+#endif
+
+#if defined(HAVE_BUILTIN_OVERFLOW)
+inline
+bool checked_mul(const size_t a, const size_t b, size_t & t) {
+ return __builtin_mul_overflow(a, b, &t);
+}
+#elif defined(HAVE_INTSAFE_H)
+inline
+bool checked_mul(const size_t a, const size_t b, size_t & t) {
+ return SizeTMult(a, b, &t) == INTSAFE_E_ARITHMETIC_OVERFLOW;
+}
+#else
+inline
+bool checked_mul(const size_t a, const size_t b, size_t & t) {
+ t = a*b;
+ return (((a | b) & (~size_t(0) << (sizeof(size_t) << 2))) && (t / a != b));
+}
+#endif
+
+// typesafe wrapper around malloc for simple types
+// use free(pointer) to deallocate
+
+template <typename T> T * gralloc(size_t n)
+{
+ size_t total;
+ if (checked_mul(n, sizeof(T), total))
+ return 0;
+#ifdef GRAPHITE2_TELEMETRY
+ telemetry::count_bytes(total);
+#endif
+ return static_cast<T*>(malloc(total));
+}
+
+template <typename T> T * grzeroalloc(size_t n)
+{
+#ifdef GRAPHITE2_TELEMETRY
+ telemetry::count_bytes(sizeof(T) * n);
+#endif
+ return static_cast<T*>(calloc(n, sizeof(T)));
+}
+
+template <typename T>
+inline T min(const T a, const T b)
+{
+ return a < b ? a : b;
+}
+
+template <typename T>
+inline T max(const T a, const T b)
+{
+ return a > b ? a : b;
+}
+
+} // namespace graphite2
+
+#define CLASS_NEW_DELETE \
+ void * operator new (size_t size){ return gralloc<byte>(size);} \
+ void * operator new (size_t, void * p) throw() { return p; } \
+ void * operator new[] (size_t size) {return gralloc<byte>(size);} \
+ void * operator new[] (size_t, void * p) throw() { return p; } \
+ void operator delete (void * p) throw() { free(p);} \
+ void operator delete (void *, void *) throw() {} \
+ void operator delete[] (void * p)throw() { free(p); } \
+ void operator delete[] (void *, void *) throw() {}
+
+#if defined(__GNUC__) || defined(__clang__)
+#define GR_MAYBE_UNUSED __attribute__((unused))
+#else
+#define GR_MAYBE_UNUSED
+#endif
+
+#ifndef __has_cpp_attribute
+# define __has_cpp_attribute(x) 0
+#endif
+
+#if __has_cpp_attribute(clang::fallthrough)
+# define GR_FALLTHROUGH [[clang::fallthrough]]
+#elif __has_cpp_attribute(gnu::fallthrough)
+# define GR_FALLTHROUGH [[gnu::fallthrough]]
+#elif defined(_MSC_VER)
+ /*
+ * MSVC's __fallthrough annotations are checked by /analyze (Code Analysis):
+ * https://msdn.microsoft.com/en-us/library/ms235402%28VS.80%29.aspx
+ */
+ #include <sal.h>
+ #define GR_FALLTHROUGH __fallthrough
+#elif __GNUC__ >= 7
+ #define GR_FALLTHROUGH __attribute__ ((fallthrough))
+#else
+ #define GR_FALLTHROUGH /* fallthrough */
+#endif
+
+#ifdef _MSC_VER
+#pragma warning(disable: 4800)
+#pragma warning(disable: 4355)
+#endif
diff --git a/thirdparty/graphite/src/inc/NameTable.h b/thirdparty/graphite/src/inc/NameTable.h
new file mode 100644
index 0000000000..0fdbeb4d85
--- /dev/null
+++ b/thirdparty/graphite/src/inc/NameTable.h
@@ -0,0 +1,65 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+
+#include <graphite2/Segment.h>
+#include "inc/TtfTypes.h"
+#include "inc/locale2lcid.h"
+
+namespace graphite2 {
+
+class NameTable
+{
+ NameTable(const NameTable &);
+ NameTable & operator = (const NameTable &);
+
+public:
+ NameTable(const void * data, size_t length, uint16 platfromId=3, uint16 encodingID = 1);
+ ~NameTable() { free(const_cast<TtfUtil::Sfnt::FontNames *>(m_table)); }
+ enum eNameFallback {
+ eNoFallback = 0,
+ eEnUSFallbackOnly = 1,
+ eEnOrAnyFallback = 2
+ };
+ uint16 setPlatformEncoding(uint16 platfromId=3, uint16 encodingID = 1);
+ void * getName(uint16 & languageId, uint16 nameId, gr_encform enc, uint32 & length);
+ uint16 getLanguageId(const char * bcp47Locale);
+
+ CLASS_NEW_DELETE
+private:
+ uint16 m_platformId;
+ uint16 m_encodingId;
+ uint16 m_languageCount;
+ uint16 m_platformOffset; // offset of first NameRecord with for platform 3, encoding 1
+ uint16 m_platformLastRecord;
+ uint16 m_nameDataLength;
+ const TtfUtil::Sfnt::FontNames * m_table;
+ const uint8 * m_nameData;
+ Locale2Lang m_locale2Lang;
+};
+
+} // namespace graphite2
diff --git a/thirdparty/graphite/src/inc/Pass.h b/thirdparty/graphite/src/inc/Pass.h
new file mode 100644
index 0000000000..e687a87d8c
--- /dev/null
+++ b/thirdparty/graphite/src/inc/Pass.h
@@ -0,0 +1,118 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+
+#include <cstdlib>
+#include "inc/Code.h"
+
+namespace graphite2 {
+
+class Segment;
+class Face;
+class Silf;
+struct Rule;
+struct RuleEntry;
+struct State;
+class FiniteStateMachine;
+class Error;
+class ShiftCollider;
+class KernCollider;
+class json;
+
+enum passtype;
+
+class Pass
+{
+public:
+ Pass();
+ ~Pass();
+
+ bool readPass(const byte * pPass, size_t pass_length, size_t subtable_base, Face & face,
+ enum passtype pt, uint32 version, Error &e);
+ bool runGraphite(vm::Machine & m, FiniteStateMachine & fsm, bool reverse) const;
+ void init(Silf *silf) { m_silf = silf; }
+ byte collisionLoops() const { return m_numCollRuns; }
+ bool reverseDir() const { return m_isReverseDir; }
+
+ CLASS_NEW_DELETE
+private:
+ void findNDoRule(Slot* & iSlot, vm::Machine &, FiniteStateMachine& fsm) const;
+ int doAction(const vm::Machine::Code* codeptr, Slot * & slot_out, vm::Machine &) const;
+ bool testPassConstraint(vm::Machine & m) const;
+ bool testConstraint(const Rule & r, vm::Machine &) const;
+ bool readRules(const byte * rule_map, const size_t num_entries,
+ const byte *precontext, const uint16 * sort_key,
+ const uint16 * o_constraint, const byte *constraint_data,
+ const uint16 * o_action, const byte * action_data,
+ Face &, enum passtype pt, Error &e);
+ bool readStates(const byte * starts, const byte * states, const byte * o_rule_map, Face &, Error &e);
+ bool readRanges(const byte * ranges, size_t num_ranges, Error &e);
+ uint16 glyphToCol(const uint16 gid) const;
+ bool runFSM(FiniteStateMachine & fsm, Slot * slot) const;
+ void dumpRuleEventConsidered(const FiniteStateMachine & fsm, const RuleEntry & re) const;
+ void dumpRuleEventOutput(const FiniteStateMachine & fsm, const Rule & r, Slot * os) const;
+ void adjustSlot(int delta, Slot * & slot_out, SlotMap &) const;
+ bool collisionShift(Segment *seg, int dir, json * const dbgout) const;
+ bool collisionKern(Segment *seg, int dir, json * const dbgout) const;
+ bool collisionFinish(Segment *seg, GR_MAYBE_UNUSED json * const dbgout) const;
+ bool resolveCollisions(Segment *seg, Slot *slot, Slot *start, ShiftCollider &coll, bool isRev,
+ int dir, bool &moved, bool &hasCol, json * const dbgout) const;
+ float resolveKern(Segment *seg, Slot *slot, Slot *start, int dir,
+ float &ymin, float &ymax, json *const dbgout) const;
+
+ const Silf * m_silf;
+ uint16 * m_cols;
+ Rule * m_rules; // rules
+ RuleEntry * m_ruleMap;
+ uint16 * m_startStates; // prectxt length
+ uint16 * m_transitions;
+ State * m_states;
+ vm::Machine::Code * m_codes;
+ byte * m_progs;
+
+ byte m_numCollRuns;
+ byte m_kernColls;
+ byte m_iMaxLoop;
+ uint16 m_numGlyphs;
+ uint16 m_numRules;
+ uint16 m_numStates;
+ uint16 m_numTransition;
+ uint16 m_numSuccess;
+ uint16 m_successStart;
+ uint16 m_numColumns;
+ byte m_minPreCtxt;
+ byte m_maxPreCtxt;
+ byte m_colThreshold;
+ bool m_isReverseDir;
+ vm::Machine::Code m_cPConstraint;
+
+private: //defensive
+ Pass(const Pass&);
+ Pass& operator=(const Pass&);
+};
+
+} // namespace graphite2
diff --git a/thirdparty/graphite/src/inc/Position.h b/thirdparty/graphite/src/inc/Position.h
new file mode 100644
index 0000000000..510e4f4c41
--- /dev/null
+++ b/thirdparty/graphite/src/inc/Position.h
@@ -0,0 +1,68 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+
+namespace graphite2 {
+
+class Position
+{
+public:
+ Position() : x(0), y(0) { }
+ Position(const float inx, const float iny) : x(inx), y(iny) {}
+ Position operator + (const Position& a) const { return Position(x + a.x, y + a.y); }
+ Position operator - (const Position& a) const { return Position(x - a.x, y - a.y); }
+ Position operator * (const float m) const { return Position(x * m, y * m); }
+ Position &operator += (const Position &a) { x += a.x; y += a.y; return *this; }
+ Position &operator *= (const float m) { x *= m; y *= m; return *this; }
+
+ float x;
+ float y;
+};
+
+class Rect
+{
+public :
+ Rect() {}
+ Rect(const Position& botLeft, const Position& topRight): bl(botLeft), tr(topRight) {}
+ Rect widen(const Rect& other) { return Rect(Position(bl.x > other.bl.x ? other.bl.x : bl.x, bl.y > other.bl.y ? other.bl.y : bl.y), Position(tr.x > other.tr.x ? tr.x : other.tr.x, tr.y > other.tr.y ? tr.y : other.tr.y)); }
+ Rect operator + (const Position &a) const { return Rect(Position(bl.x + a.x, bl.y + a.y), Position(tr.x + a.x, tr.y + a.y)); }
+ Rect operator - (const Position &a) const { return Rect(Position(bl.x - a.x, bl.y - a.y), Position(tr.x - a.x, tr.y - a.y)); }
+ Rect operator * (float m) const { return Rect(Position(bl.x, bl.y) * m, Position(tr.x, tr.y) * m); }
+ float width() const { return tr.x - bl.x; }
+ float height() const { return tr.y - bl.y; }
+
+ bool hitTest(Rect &other);
+
+ // returns Position(overlapx, overlapy) where overlap<0 if overlapping else positive)
+ Position overlap(Position &offset, Rect &other, Position &otherOffset);
+ //Position constrainedAvoid(Position &offset, Rect &box, Rect &sdbox, Position &other, Rect &obox, Rect &osdbox);
+
+ Position bl;
+ Position tr;
+};
+
+} // namespace graphite2
diff --git a/thirdparty/graphite/src/inc/Rule.h b/thirdparty/graphite/src/inc/Rule.h
new file mode 100644
index 0000000000..5964e003a6
--- /dev/null
+++ b/thirdparty/graphite/src/inc/Rule.h
@@ -0,0 +1,305 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2011, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+
+#pragma once
+
+#include "inc/Code.h"
+#include "inc/Slot.h"
+
+namespace graphite2 {
+
+struct Rule {
+ const vm::Machine::Code * constraint,
+ * action;
+ unsigned short sort;
+ byte preContext;
+#ifndef NDEBUG
+ uint16 rule_idx;
+#endif
+
+ Rule();
+ ~Rule() {}
+
+ CLASS_NEW_DELETE;
+
+private:
+ Rule(const Rule &);
+ Rule & operator = (const Rule &);
+};
+
+inline
+Rule::Rule()
+: constraint(0),
+ action(0),
+ sort(0),
+ preContext(0)
+{
+#ifndef NDEBUG
+ rule_idx = 0;
+#endif
+}
+
+
+struct RuleEntry
+{
+ const Rule * rule;
+
+ inline
+ bool operator < (const RuleEntry &r) const
+ {
+ const unsigned short lsort = rule->sort, rsort = r.rule->sort;
+ return lsort > rsort || (lsort == rsort && rule < r.rule);
+ }
+
+ inline
+ bool operator == (const RuleEntry &r) const
+ {
+ return rule == r.rule;
+ }
+};
+
+
+struct State
+{
+ const RuleEntry * rules,
+ * rules_end;
+
+ bool empty() const;
+};
+
+inline
+bool State::empty() const
+{
+ return rules_end == rules;
+}
+
+
+class SlotMap
+{
+public:
+ enum {MAX_SLOTS=64};
+ SlotMap(Segment & seg, uint8 direction, size_t maxSize);
+
+ Slot * * begin();
+ Slot * * end();
+ size_t size() const;
+ unsigned short context() const;
+ void reset(Slot &, unsigned short);
+
+ Slot * const & operator[](int n) const;
+ Slot * & operator [] (int);
+ void pushSlot(Slot * const slot);
+ void collectGarbage(Slot *& aSlot);
+
+ Slot * highwater() { return m_highwater; }
+ void highwater(Slot *s) { m_highwater = s; m_highpassed = false; }
+ bool highpassed() const { return m_highpassed; }
+ void highpassed(bool v) { m_highpassed = v; }
+
+ uint8 dir() const { return m_dir; }
+ int decMax() { return --m_maxSize; }
+
+ Segment & segment;
+private:
+ Slot * m_slot_map[MAX_SLOTS+1];
+ unsigned short m_size;
+ unsigned short m_precontext;
+ Slot * m_highwater;
+ int m_maxSize;
+ uint8 m_dir;
+ bool m_highpassed;
+};
+
+
+class FiniteStateMachine
+{
+public:
+ enum {MAX_RULES=128};
+
+private:
+ class Rules
+ {
+ public:
+ Rules();
+ void clear();
+ const RuleEntry * begin() const;
+ const RuleEntry * end() const;
+ size_t size() const;
+
+ void accumulate_rules(const State &state);
+
+ private:
+ RuleEntry * m_begin,
+ * m_end,
+ m_rules[MAX_RULES*2];
+ };
+
+public:
+ FiniteStateMachine(SlotMap & map, json * logger);
+ void reset(Slot * & slot, const short unsigned int max_pre_ctxt);
+
+ Rules rules;
+ SlotMap & slots;
+ json * const dbgout;
+};
+
+
+inline
+FiniteStateMachine::FiniteStateMachine(SlotMap& map, json * logger)
+: slots(map),
+ dbgout(logger)
+{
+}
+
+inline
+void FiniteStateMachine::reset(Slot * & slot, const short unsigned int max_pre_ctxt)
+{
+ rules.clear();
+ int ctxt = 0;
+ for (; ctxt != max_pre_ctxt && slot->prev(); ++ctxt, slot = slot->prev());
+ slots.reset(*slot, ctxt);
+}
+
+inline
+FiniteStateMachine::Rules::Rules()
+ : m_begin(m_rules), m_end(m_rules)
+{
+}
+
+inline
+void FiniteStateMachine::Rules::clear()
+{
+ m_end = m_begin;
+}
+
+inline
+const RuleEntry * FiniteStateMachine::Rules::begin() const
+{
+ return m_begin;
+}
+
+inline
+const RuleEntry * FiniteStateMachine::Rules::end() const
+{
+ return m_end;
+}
+
+inline
+size_t FiniteStateMachine::Rules::size() const
+{
+ return m_end - m_begin;
+}
+
+inline
+void FiniteStateMachine::Rules::accumulate_rules(const State &state)
+{
+ // Only bother if there are rules in the State object.
+ if (state.empty()) return;
+
+ // Merge the new sorted rules list into the current sorted result set.
+ const RuleEntry * lre = begin(), * rre = state.rules;
+ RuleEntry * out = m_rules + (m_begin == m_rules)*MAX_RULES;
+ const RuleEntry * const lrend = out + MAX_RULES,
+ * const rrend = state.rules_end;
+ m_begin = out;
+ while (lre != end() && out != lrend)
+ {
+ if (*lre < *rre) *out++ = *lre++;
+ else if (*rre < *lre) { *out++ = *rre++; }
+ else { *out++ = *lre++; ++rre; }
+
+ if (rre == rrend)
+ {
+ while (lre != end() && out != lrend) { *out++ = *lre++; }
+ m_end = out;
+ return;
+ }
+ }
+ while (rre != rrend && out != lrend) { *out++ = *rre++; }
+ m_end = out;
+}
+
+inline
+SlotMap::SlotMap(Segment & seg, uint8 direction, size_t maxSize)
+: segment(seg), m_size(0), m_precontext(0), m_highwater(0),
+ m_maxSize(int(maxSize)), m_dir(direction), m_highpassed(false)
+{
+ m_slot_map[0] = 0;
+}
+
+inline
+Slot * * SlotMap::begin()
+{
+ return &m_slot_map[1]; // allow map to go 1 before slot_map when inserting
+ // at start of segment.
+}
+
+inline
+Slot * * SlotMap::end()
+{
+ return m_slot_map + m_size + 1;
+}
+
+inline
+size_t SlotMap::size() const
+{
+ return m_size;
+}
+
+inline
+short unsigned int SlotMap::context() const
+{
+ return m_precontext;
+}
+
+inline
+void SlotMap::reset(Slot & slot, short unsigned int ctxt)
+{
+ m_size = 0;
+ m_precontext = ctxt;
+ *m_slot_map = slot.prev();
+}
+
+inline
+void SlotMap::pushSlot(Slot*const slot)
+{
+ m_slot_map[++m_size] = slot;
+}
+
+inline
+Slot * const & SlotMap::operator[](int n) const
+{
+ return m_slot_map[n + 1];
+}
+
+inline
+Slot * & SlotMap::operator[](int n)
+{
+ return m_slot_map[n + 1];
+}
+
+} // namespace graphite2
diff --git a/thirdparty/graphite/src/inc/Segment.h b/thirdparty/graphite/src/inc/Segment.h
new file mode 100644
index 0000000000..6cf83408d4
--- /dev/null
+++ b/thirdparty/graphite/src/inc/Segment.h
@@ -0,0 +1,236 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+
+#include "inc/Main.h"
+
+#include <cassert>
+
+#include "inc/CharInfo.h"
+#include "inc/Face.h"
+#include "inc/FeatureVal.h"
+#include "inc/GlyphCache.h"
+#include "inc/GlyphFace.h"
+#include "inc/Slot.h"
+#include "inc/Position.h"
+#include "inc/List.h"
+#include "inc/Collider.h"
+
+#define MAX_SEG_GROWTH_FACTOR 64
+
+namespace graphite2 {
+
+typedef Vector<Features> FeatureList;
+typedef Vector<Slot *> SlotRope;
+typedef Vector<int16 *> AttributeRope;
+typedef Vector<SlotJustify *> JustifyRope;
+
+class Font;
+class Segment;
+class Silf;
+
+enum SpliceParam {
+/** sub-Segments longer than this are not cached
+ * (in Unicode code points) */
+ eMaxSpliceSize = 96
+};
+
+enum justFlags {
+ gr_justStartInline = 1,
+ gr_justEndInline = 2
+};
+
+class SegmentScopeState
+{
+private:
+ friend class Segment;
+ Slot * realFirstSlot;
+ Slot * slotBeforeScope;
+ Slot * slotAfterScope;
+ Slot * realLastSlot;
+ size_t numGlyphsOutsideScope;
+};
+
+class Segment
+{
+ // Prevent copying of any kind.
+ Segment(const Segment&);
+ Segment& operator=(const Segment&);
+
+public:
+
+ enum {
+ SEG_INITCOLLISIONS = 1,
+ SEG_HASCOLLISIONS = 2
+ };
+
+ size_t slotCount() const { return m_numGlyphs; } //one slot per glyph
+ void extendLength(ptrdiff_t num) { m_numGlyphs += num; }
+ Position advance() const { return m_advance; }
+ bool runGraphite() { if (m_silf) return m_face->runGraphite(this, m_silf); else return true;};
+ void chooseSilf(uint32 script) { m_silf = m_face->chooseSilf(script); }
+ const Silf *silf() const { return m_silf; }
+ size_t charInfoCount() const { return m_numCharinfo; }
+ const CharInfo *charinfo(unsigned int index) const { return index < m_numCharinfo ? m_charinfo + index : NULL; }
+ CharInfo *charinfo(unsigned int index) { return index < m_numCharinfo ? m_charinfo + index : NULL; }
+
+ Segment(size_t numchars, const Face* face, uint32 script, int dir);
+ ~Segment();
+ uint8 flags() const { return m_flags; }
+ void flags(uint8 f) { m_flags = f; }
+ Slot *first() { return m_first; }
+ void first(Slot *p) { m_first = p; }
+ Slot *last() { return m_last; }
+ void last(Slot *p) { m_last = p; }
+ void appendSlot(int i, int cid, int gid, int fid, size_t coffset);
+ Slot *newSlot();
+ void freeSlot(Slot *);
+ SlotJustify *newJustify();
+ void freeJustify(SlotJustify *aJustify);
+ Position positionSlots(const Font *font=0, Slot *first=0, Slot *last=0, bool isRtl = false, bool isFinal = true);
+ void associateChars(int offset, size_t num);
+ void linkClusters(Slot *first, Slot *last);
+ uint16 getClassGlyph(uint16 cid, uint16 offset) const { return m_silf->getClassGlyph(cid, offset); }
+ uint16 findClassIndex(uint16 cid, uint16 gid) const { return m_silf->findClassIndex(cid, gid); }
+ int addFeatures(const Features& feats) { m_feats.push_back(feats); return int(m_feats.size()) - 1; }
+ uint32 getFeature(int index, uint8 findex) const { const FeatureRef* pFR=m_face->theSill().theFeatureMap().featureRef(findex); if (!pFR) return 0; else return pFR->getFeatureVal(m_feats[index]); }
+ void setFeature(int index, uint8 findex, uint32 val) {
+ const FeatureRef* pFR=m_face->theSill().theFeatureMap().featureRef(findex);
+ if (pFR)
+ {
+ if (val > pFR->maxVal()) val = pFR->maxVal();
+ pFR->applyValToFeature(val, m_feats[index]);
+ } }
+ int8 dir() const { return m_dir; }
+ void dir(int8 val) { m_dir = val; }
+ bool currdir() const { return ((m_dir >> 6) ^ m_dir) & 1; }
+ uint8 passBits() const { return m_passBits; }
+ void mergePassBits(const uint8 val) { m_passBits &= val; }
+ int16 glyphAttr(uint16 gid, uint16 gattr) const { const GlyphFace * p = m_face->glyphs().glyphSafe(gid); return p ? p->attrs()[gattr] : 0; }
+ int32 getGlyphMetric(Slot *iSlot, uint8 metric, uint8 attrLevel, bool rtl) const;
+ float glyphAdvance(uint16 gid) const { return m_face->glyphs().glyph(gid)->theAdvance().x; }
+ const Rect &theGlyphBBoxTemporary(uint16 gid) const { return m_face->glyphs().glyph(gid)->theBBox(); } //warning value may become invalid when another glyph is accessed
+ Slot *findRoot(Slot *is) const { return is->attachedTo() ? findRoot(is->attachedTo()) : is; }
+ int numAttrs() const { return m_silf->numUser(); }
+ int defaultOriginal() const { return m_defaultOriginal; }
+ const Face * getFace() const { return m_face; }
+ const Features & getFeatures(unsigned int /*charIndex*/) { assert(m_feats.size() == 1); return m_feats[0]; }
+ void bidiPass(int paradir, uint8 aMirror);
+ int8 getSlotBidiClass(Slot *s) const;
+ void doMirror(uint16 aMirror);
+ Slot *addLineEnd(Slot *nSlot);
+ void delLineEnd(Slot *s);
+ bool hasJustification() const { return m_justifies.size() != 0; }
+ void reverseSlots();
+
+ bool isWhitespace(const int cid) const;
+ bool hasCollisionInfo() const { return (m_flags & SEG_HASCOLLISIONS) && m_collisions; }
+ SlotCollision *collisionInfo(const Slot *s) const { return m_collisions ? m_collisions + s->index() : 0; }
+ CLASS_NEW_DELETE
+
+public: //only used by: GrSegment* makeAndInitialize(const GrFont *font, const GrFace *face, uint32 script, const FeaturesHandle& pFeats/*must not be IsNull*/, encform enc, const void* pStart, size_t nChars, int dir);
+ bool read_text(const Face *face, const Features* pFeats/*must not be NULL*/, gr_encform enc, const void*pStart, size_t nChars);
+ void finalise(const Font *font, bool reverse=false);
+ float justify(Slot *pSlot, const Font *font, float width, enum justFlags flags, Slot *pFirst, Slot *pLast);
+ bool initCollisions();
+
+private:
+ Position m_advance; // whole segment advance
+ SlotRope m_slots; // Vector of slot buffers
+ AttributeRope m_userAttrs; // Vector of userAttrs buffers
+ JustifyRope m_justifies; // Slot justification info buffers
+ FeatureList m_feats; // feature settings referenced by charinfos in this segment
+ Slot * m_freeSlots; // linked list of free slots
+ SlotJustify * m_freeJustifies; // Slot justification blocks free list
+ CharInfo * m_charinfo; // character info, one per input character
+ SlotCollision * m_collisions;
+ const Face * m_face; // GrFace
+ const Silf * m_silf;
+ Slot * m_first; // first slot in segment
+ Slot * m_last; // last slot in segment
+ size_t m_bufSize, // how big a buffer to create when need more slots
+ m_numGlyphs,
+ m_numCharinfo; // size of the array and number of input characters
+ int m_defaultOriginal; // number of whitespace chars in the string
+ int8 m_dir;
+ uint8 m_flags, // General purpose flags
+ m_passBits; // if bit set then skip pass
+};
+
+inline
+int8 Segment::getSlotBidiClass(Slot *s) const
+{
+ int8 res = s->getBidiClass();
+ if (res != -1) return res;
+ res = int8(glyphAttr(s->gid(), m_silf->aBidi()));
+ s->setBidiClass(res);
+ return res;
+}
+
+inline
+void Segment::finalise(const Font *font, bool reverse)
+{
+ if (!m_first || !m_last) return;
+
+ m_advance = positionSlots(font, m_first, m_last, m_silf->dir(), true);
+ //associateChars(0, m_numCharinfo);
+ if (reverse && currdir() != (m_dir & 1))
+ reverseSlots();
+ linkClusters(m_first, m_last);
+}
+
+inline
+int32 Segment::getGlyphMetric(Slot *iSlot, uint8 metric, uint8 attrLevel, bool rtl) const {
+ if (attrLevel > 0)
+ {
+ Slot *is = findRoot(iSlot);
+ return is->clusterMetric(this, metric, attrLevel, rtl);
+ }
+ else
+ return m_face->getGlyphMetric(iSlot->gid(), metric);
+}
+
+inline
+bool Segment::isWhitespace(const int cid) const
+{
+ return ((cid >= 0x0009) * (cid <= 0x000D)
+ + (cid == 0x0020)
+ + (cid == 0x0085)
+ + (cid == 0x00A0)
+ + (cid == 0x1680)
+ + (cid == 0x180E)
+ + (cid >= 0x2000) * (cid <= 0x200A)
+ + (cid == 0x2028)
+ + (cid == 0x2029)
+ + (cid == 0x202F)
+ + (cid == 0x205F)
+ + (cid == 0x3000)) != 0;
+}
+
+} // namespace graphite2
+
+struct gr_segment : public graphite2::Segment {};
diff --git a/thirdparty/graphite/src/inc/Silf.h b/thirdparty/graphite/src/inc/Silf.h
new file mode 100644
index 0000000000..edc0c3a16d
--- /dev/null
+++ b/thirdparty/graphite/src/inc/Silf.h
@@ -0,0 +1,128 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+
+#include "graphite2/Font.h"
+#include "inc/Main.h"
+#include "inc/Pass.h"
+
+namespace graphite2 {
+
+class Face;
+class Segment;
+class FeatureVal;
+class VMScratch;
+class Error;
+
+class Pseudo
+{
+public:
+ uint32 uid;
+ uint32 gid;
+ CLASS_NEW_DELETE;
+};
+
+class Justinfo
+{
+public:
+ Justinfo(uint8 stretch, uint8 shrink, uint8 step, uint8 weight) :
+ m_astretch(stretch), m_ashrink(shrink), m_astep(step),
+ m_aweight(weight) {};
+ uint8 attrStretch() const { return m_astretch; }
+ uint8 attrShrink() const { return m_ashrink; }
+ uint8 attrStep() const { return m_astep; }
+ uint8 attrWeight() const { return m_aweight; }
+
+private:
+ uint8 m_astretch;
+ uint8 m_ashrink;
+ uint8 m_astep;
+ uint8 m_aweight;
+};
+
+class Silf
+{
+ // Prevent copying
+ Silf(const Silf&);
+ Silf& operator=(const Silf&);
+
+public:
+ Silf() throw();
+ ~Silf() throw();
+
+ bool readGraphite(const byte * const pSilf, size_t lSilf, Face &face, uint32 version);
+ bool runGraphite(Segment *seg, uint8 firstPass=0, uint8 lastPass=0, int dobidi = 0) const;
+ uint16 findClassIndex(uint16 cid, uint16 gid) const;
+ uint16 getClassGlyph(uint16 cid, unsigned int index) const;
+ uint16 findPseudo(uint32 uid) const;
+ uint8 numUser() const { return m_aUser; }
+ uint8 aPseudo() const { return m_aPseudo; }
+ uint8 aBreak() const { return m_aBreak; }
+ uint8 aMirror() const {return m_aMirror; }
+ uint8 aPassBits() const { return m_aPassBits; }
+ uint8 aBidi() const { return m_aBidi; }
+ uint8 aCollision() const { return m_aCollision; }
+ uint8 substitutionPass() const { return m_sPass; }
+ uint8 positionPass() const { return m_pPass; }
+ uint8 justificationPass() const { return m_jPass; }
+ uint8 bidiPass() const { return m_bPass; }
+ uint8 numPasses() const { return m_numPasses; }
+ uint8 maxCompPerLig() const { return m_iMaxComp; }
+ uint16 numClasses() const { return m_nClass; }
+ byte flags() const { return m_flags; }
+ byte dir() const { return m_dir; }
+ uint8 numJustLevels() const { return m_numJusts; }
+ Justinfo *justAttrs() const { return m_justs; }
+ uint16 endLineGlyphid() const { return m_gEndLine; }
+ const gr_faceinfo *silfInfo() const { return &m_silfinfo; }
+
+ CLASS_NEW_DELETE;
+
+private:
+ size_t readClassMap(const byte *p, size_t data_len, uint32 version, Error &e);
+ template<typename T> inline uint32 readClassOffsets(const byte *&p, size_t data_len, Error &e);
+
+ Pass * m_passes;
+ Pseudo * m_pseudos;
+ uint32 * m_classOffsets;
+ uint16 * m_classData;
+ Justinfo * m_justs;
+ uint8 m_numPasses;
+ uint8 m_numJusts;
+ uint8 m_sPass, m_pPass, m_jPass, m_bPass,
+ m_flags, m_dir;
+
+ uint8 m_aPseudo, m_aBreak, m_aUser, m_aBidi, m_aMirror, m_aPassBits,
+ m_iMaxComp, m_aCollision;
+ uint16 m_aLig, m_numPseudo, m_nClass, m_nLinear,
+ m_gEndLine;
+ gr_faceinfo m_silfinfo;
+
+ void releaseBuffers() throw();
+};
+
+} // namespace graphite2
diff --git a/thirdparty/graphite/src/inc/Slot.h b/thirdparty/graphite/src/inc/Slot.h
new file mode 100644
index 0000000000..df39d9a3bb
--- /dev/null
+++ b/thirdparty/graphite/src/inc/Slot.h
@@ -0,0 +1,170 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+
+#include "graphite2/Types.h"
+#include "graphite2/Segment.h"
+#include "inc/Main.h"
+#include "inc/Font.h"
+#include "inc/Position.h"
+
+namespace graphite2 {
+
+typedef gr_attrCode attrCode;
+
+class GlyphFace;
+class Segment;
+
+struct SlotJustify
+{
+ static const int NUMJUSTPARAMS = 5;
+
+ SlotJustify(const SlotJustify &);
+ SlotJustify & operator = (const SlotJustify &);
+
+public:
+ static size_t size_of(size_t levels) { return sizeof(SlotJustify) + ((levels > 1 ? levels : 1)*NUMJUSTPARAMS - 1)*sizeof(int16); }
+
+ void LoadSlot(const Slot *s, const Segment *seg);
+
+ SlotJustify *next;
+ int16 values[1];
+};
+
+class Slot
+{
+ enum Flag
+ {
+ DELETED = 1,
+ INSERTED = 2,
+ COPIED = 4,
+ POSITIONED = 8,
+ ATTACHED = 16
+ };
+
+public:
+ struct iterator;
+
+ unsigned short gid() const { return m_glyphid; }
+ Position origin() const { return m_position; }
+ float advance() const { return m_advance.x; }
+ void advance(Position &val) { m_advance = val; }
+ Position advancePos() const { return m_advance; }
+ int before() const { return m_before; }
+ int after() const { return m_after; }
+ uint32 index() const { return m_index; }
+ void index(uint32 val) { m_index = val; }
+
+ Slot(int16 *m_userAttr = NULL);
+ void set(const Slot & slot, int charOffset, size_t numUserAttr, size_t justLevels, size_t numChars);
+ Slot *next() const { return m_next; }
+ void next(Slot *s) { m_next = s; }
+ Slot *prev() const { return m_prev; }
+ void prev(Slot *s) { m_prev = s; }
+ uint16 glyph() const { return m_realglyphid ? m_realglyphid : m_glyphid; }
+ void setGlyph(Segment *seg, uint16 glyphid, const GlyphFace * theGlyph = NULL);
+ void setRealGid(uint16 realGid) { m_realglyphid = realGid; }
+ void adjKern(const Position &pos) { m_shift = m_shift + pos; m_advance = m_advance + pos; }
+ void origin(const Position &pos) { m_position = pos + m_shift; }
+ void originate(int ind) { m_original = ind; }
+ int original() const { return m_original; }
+ void before(int ind) { m_before = ind; }
+ void after(int ind) { m_after = ind; }
+ bool isBase() const { return (!m_parent); }
+ void update(int numSlots, int numCharInfo, Position &relpos);
+ Position finalise(const Segment* seg, const Font* font, Position & base, Rect & bbox, uint8 attrLevel, float & clusterMin, bool rtl, bool isFinal, int depth = 0);
+ bool isDeleted() const { return (m_flags & DELETED) ? true : false; }
+ void markDeleted(bool state) { if (state) m_flags |= DELETED; else m_flags &= ~DELETED; }
+ bool isCopied() const { return (m_flags & COPIED) ? true : false; }
+ void markCopied(bool state) { if (state) m_flags |= COPIED; else m_flags &= ~COPIED; }
+ bool isPositioned() const { return (m_flags & POSITIONED) ? true : false; }
+ void markPositioned(bool state) { if (state) m_flags |= POSITIONED; else m_flags &= ~POSITIONED; }
+ bool isInsertBefore() const { return !(m_flags & INSERTED); }
+ uint8 getBidiLevel() const { return m_bidiLevel; }
+ void setBidiLevel(uint8 level) { m_bidiLevel = level; }
+ int8 getBidiClass(const Segment *seg);
+ int8 getBidiClass() const { return m_bidiCls; }
+ void setBidiClass(int8 cls) { m_bidiCls = cls; }
+ int16 *userAttrs() const { return m_userAttr; }
+ void userAttrs(int16 *p) { m_userAttr = p; }
+ void markInsertBefore(bool state) { if (!state) m_flags |= INSERTED; else m_flags &= ~INSERTED; }
+ void setAttr(Segment* seg, attrCode ind, uint8 subindex, int16 val, const SlotMap & map);
+ int getAttr(const Segment *seg, attrCode ind, uint8 subindex) const;
+ int getJustify(const Segment *seg, uint8 level, uint8 subindex) const;
+ void setJustify(Segment *seg, uint8 level, uint8 subindex, int16 value);
+ bool isLocalJustify() const { return m_justs != NULL; };
+ void attachTo(Slot *ap) { m_parent = ap; }
+ Slot *attachedTo() const { return m_parent; }
+ Position attachOffset() const { return m_attach - m_with; }
+ Slot* firstChild() const { return m_child; }
+ void firstChild(Slot *ap) { m_child = ap; }
+ bool child(Slot *ap);
+ Slot* nextSibling() const { return m_sibling; }
+ void nextSibling(Slot *ap) { m_sibling = ap; }
+ bool sibling(Slot *ap);
+ bool removeChild(Slot *ap);
+ int32 clusterMetric(const Segment* seg, uint8 metric, uint8 attrLevel, bool rtl);
+ void positionShift(Position a) { m_position += a; }
+ void floodShift(Position adj, int depth = 0);
+ float just() const { return m_just; }
+ void just(float j) { m_just = j; }
+ Slot *nextInCluster(const Slot *s) const;
+ bool isChildOf(const Slot *base) const;
+
+ CLASS_NEW_DELETE
+
+private:
+ Slot *m_next; // linked list of slots
+ Slot *m_prev;
+ unsigned short m_glyphid; // glyph id
+ uint16 m_realglyphid;
+ uint32 m_original; // charinfo that originated this slot (e.g. for feature values)
+ uint32 m_before; // charinfo index of before association
+ uint32 m_after; // charinfo index of after association
+ uint32 m_index; // slot index given to this slot during finalising
+ Slot *m_parent; // index to parent we are attached to
+ Slot *m_child; // index to first child slot that attaches to us
+ Slot *m_sibling; // index to next child that attaches to our parent
+ Position m_position; // absolute position of glyph
+ Position m_shift; // .shift slot attribute
+ Position m_advance; // .advance slot attribute
+ Position m_attach; // attachment point on us
+ Position m_with; // attachment point position on parent
+ float m_just; // Justification inserted space
+ uint8 m_flags; // holds bit flags
+ byte m_attLevel; // attachment level
+ int8 m_bidiCls; // bidirectional class
+ byte m_bidiLevel; // bidirectional level
+ int16 *m_userAttr; // pointer to user attributes
+ SlotJustify *m_justs; // pointer to justification parameters
+
+ friend class Segment;
+};
+
+} // namespace graphite2
+
+struct gr_slot : public graphite2::Slot {};
diff --git a/thirdparty/graphite/src/inc/Sparse.h b/thirdparty/graphite/src/inc/Sparse.h
new file mode 100644
index 0000000000..fcda890171
--- /dev/null
+++ b/thirdparty/graphite/src/inc/Sparse.h
@@ -0,0 +1,168 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2011, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+#include <iterator>
+#include <utility>
+
+#include "inc/Main.h"
+
+namespace graphite2 {
+
+
+// A read-only packed fast sparse array of uint16 with uint16 keys.
+// Like most container classes this has capacity and size properties and these
+// refer to the number of stored entries and the number of addressable entries
+// as normal. However due the sparse nature the capacity is always <= than the
+// size.
+class sparse
+{
+public:
+ typedef uint16 key_type;
+ typedef uint16 mapped_type;
+ typedef std::pair<const key_type, mapped_type> value_type;
+
+private:
+ typedef unsigned long mask_t;
+
+ static const unsigned char SIZEOF_CHUNK = (sizeof(mask_t) - sizeof(key_type))*8;
+
+ struct chunk
+ {
+ mask_t mask:SIZEOF_CHUNK;
+ key_type offset;
+ };
+
+ static const chunk empty_chunk;
+ sparse(const sparse &);
+ sparse & operator = (const sparse &);
+
+public:
+ template<typename I>
+ sparse(I first, const I last);
+ sparse() throw();
+ ~sparse() throw();
+
+ operator bool () const throw();
+ mapped_type operator [] (const key_type k) const throw();
+
+ size_t capacity() const throw();
+ size_t size() const throw();
+
+ size_t _sizeof() const throw();
+
+ CLASS_NEW_DELETE;
+
+private:
+ union {
+ chunk * map;
+ mapped_type * values;
+ } m_array;
+ key_type m_nchunks;
+};
+
+
+inline
+sparse::sparse() throw() : m_nchunks(0)
+{
+ m_array.map = const_cast<graphite2::sparse::chunk *>(&empty_chunk);
+}
+
+
+template <typename I>
+sparse::sparse(I attr, const I last)
+: m_nchunks(0)
+{
+ m_array.map = 0;
+
+ // Find the maximum extent of the key space.
+ size_t n_values=0;
+ long lastkey = -1;
+ for (I i = attr; i != last; ++i, ++n_values)
+ {
+ const typename std::iterator_traits<I>::value_type v = *i;
+ if (v.second == 0) { --n_values; continue; }
+ if (v.first <= lastkey) { m_nchunks = 0; return; }
+
+ lastkey = v.first;
+ const key_type k = v.first / SIZEOF_CHUNK;
+ if (k >= m_nchunks) m_nchunks = k+1;
+ }
+ if (m_nchunks == 0)
+ {
+ m_array.map=const_cast<graphite2::sparse::chunk *>(&empty_chunk);
+ return;
+ }
+
+ m_array.values = grzeroalloc<mapped_type>((m_nchunks*sizeof(chunk) + sizeof(mapped_type)-1)
+ / sizeof(mapped_type)
+ + n_values);
+
+ if (m_array.values == 0)
+ return;
+
+ // coverity[forward_null : FALSE] Since m_array is union and m_array.values is not NULL
+ chunk * ci = m_array.map;
+ ci->offset = (m_nchunks*sizeof(chunk) + sizeof(mapped_type)-1)/sizeof(mapped_type);
+ mapped_type * vi = m_array.values + ci->offset;
+ for (; attr != last; ++attr, ++vi)
+ {
+ const typename std::iterator_traits<I>::value_type v = *attr;
+ if (v.second == 0) { --vi; continue; }
+
+ chunk * const ci_ = m_array.map + v.first/SIZEOF_CHUNK;
+
+ if (ci != ci_)
+ {
+ ci = ci_;
+ ci->offset = key_type(vi - m_array.values);
+ }
+
+ ci->mask |= 1UL << (SIZEOF_CHUNK - 1 - (v.first % SIZEOF_CHUNK));
+ *vi = v.second;
+ }
+}
+
+
+inline
+sparse::operator bool () const throw()
+{
+ return m_array.map != 0;
+}
+
+inline
+size_t sparse::size() const throw()
+{
+ return m_nchunks*SIZEOF_CHUNK;
+}
+
+inline
+size_t sparse::_sizeof() const throw()
+{
+ return sizeof(sparse) + capacity()*sizeof(mapped_type) + m_nchunks*sizeof(chunk);
+}
+
+} // namespace graphite2
diff --git a/thirdparty/graphite/src/inc/TtfTypes.h b/thirdparty/graphite/src/inc/TtfTypes.h
new file mode 100644
index 0000000000..ae67915304
--- /dev/null
+++ b/thirdparty/graphite/src/inc/TtfTypes.h
@@ -0,0 +1,419 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+/*--------------------------------------------------------------------*//*:Ignore this sentence.
+
+File: TtfTypes.h
+Responsibility: Tim Eves
+Last reviewed: Not yet.
+
+Description:
+Provides types required to represent the TTF basic types.
+-------------------------------------------------------------------------------*//*:End Ignore*/
+
+
+//**********************************************************************************************
+// Include files
+//**********************************************************************************************
+namespace graphite2
+{
+namespace TtfUtil
+{
+//**********************************************************************************************
+// Forward declarations
+//**********************************************************************************************
+
+
+//**********************************************************************************************
+// Type declarations
+//**********************************************************************************************
+typedef unsigned char uint8;
+typedef uint8 byte;
+typedef signed char int8;
+typedef unsigned short uint16;
+typedef short int16;
+typedef unsigned int uint32;
+typedef int int32;
+
+typedef int16 short_frac;
+typedef int32 fixed;
+typedef int16 fword;
+typedef uint16 ufword;
+typedef int16 f2dot14;
+typedef uint32 long_date_time[2];
+
+//**********************************************************************************************
+// Constants and enum types
+//**********************************************************************************************/
+enum
+{
+ OneFix = 1<<16
+};
+
+//**********************************************************************************************
+// Table declarations
+//**********************************************************************************************
+namespace Sfnt
+{
+#pragma pack(push,1) // We need this or the structure members aren't aligned
+ // correctly. Fortunately this form of pragma is supposed
+ // to be recognised by VS C++ too (at least according to
+ // MSDN).
+
+ struct OffsetSubTable
+ {
+ uint32 scaler_type;
+ uint16 num_tables,
+ search_range,
+ entry_selector,
+ range_shift;
+ struct Entry
+ {
+ uint32 tag,
+ checksum,
+ offset,
+ length;
+ } table_directory[1];
+
+ enum ScalerType
+ {
+ TrueTypeMac = 0x74727565U,
+ TrueTypeWin = 0x00010000U,
+ Type1 = 0x74797031U
+ };
+ };
+
+
+
+
+ struct CharacterCodeMap
+ {
+ uint16 version,
+ num_subtables;
+ struct
+ {
+ uint16 platform_id,
+ platform_specific_id;
+ uint32 offset;
+ } encoding[1];
+ };
+
+ struct CmapSubTable
+ {
+ uint16 format,
+ length,
+ language;
+ };
+
+ struct CmapSubTableFormat4 : CmapSubTable
+ {
+ uint16 seg_count_x2,
+ search_range,
+ entry_selector,
+ range_shift,
+ end_code[1];
+ // There are arrarys after this which need their
+ // start positions calculated since end_code is
+ // seg_count uint16s long.
+ };
+
+ struct CmapSubTableFormat12
+ {
+ fixed format;
+ uint32 length,
+ language,
+ num_groups;
+ struct
+ {
+ uint32 start_char_code,
+ end_char_code,
+ start_glyph_id;
+ } group[1];
+ };
+
+
+
+ struct FontHeader
+ {
+ fixed version,
+ font_revision;
+ uint32 check_sum_adjustment,
+ magic_number;
+ uint16 flags,
+ units_per_em;
+ long_date_time created,
+ modified;
+ fword x_min,
+ y_min,
+ x_max,
+ y_max;
+ uint16 mac_style,
+ lowest_rec_ppem;
+ int16 font_direction_hint,
+ index_to_loc_format,
+ glyph_data_format;
+ enum
+ {
+ MagicNumber = 0x5F0F3CF5,
+ GlypDataFormat = 0
+ };
+ enum {ShortIndexLocFormat, LongIndexLocFormat};
+ };
+
+
+
+
+ struct PostScriptGlyphName
+ {
+ fixed format,
+ italic_angle;
+ fword underline_position,
+ underline_thickness;
+ uint32 is_fixed_pitch,
+ min_mem_type42,
+ max_mem_type42,
+ min_mem_type1,
+ max_mem_type1;
+ enum
+ {
+ Format1 = 0x10000,
+ Format2 = 0x20000,
+ Format25 = 0x28000,
+ Format3 = 0x30000,
+ Format4 = 0x40000
+ };
+ };
+
+ struct PostScriptGlyphName2 : PostScriptGlyphName
+ {
+ uint16 number_of_glyphs,
+ glyph_name_index[1];
+ };
+
+ struct PostScriptGlyphName25 : PostScriptGlyphName
+ {
+ uint16 number_of_glyphs;
+ int8 offset[1];
+ };
+
+ struct PostScriptGlyphName3 : PostScriptGlyphName {};
+
+ struct PostScriptGlyphName4 : PostScriptGlyphName
+ {
+ uint16 glyph_to_char_map[1];
+ };
+
+
+ struct HorizontalHeader
+ {
+ fixed version;
+ fword ascent,
+ descent,
+ line_gap;
+ ufword advance_width_max;
+ fword min_left_side_bearing,
+ max_left_side_bearing,
+ x_max_element;
+ int16 caret_slope_rise,
+ caret_slope_run;
+ fword caret_offset;
+ int16 reserved[4],
+ metric_data_format;
+ uint16 num_long_hor_metrics;
+ };
+
+ struct MaximumProfile
+ {
+ fixed version;
+ uint16 num_glyphs,
+ max_points,
+ max_contours,
+ max_component_points,
+ max_component_contours,
+ max_zones,
+ max_twilight_points,
+ max_storage,
+ max_function_defs,
+ max_instruction_defs,
+ max_stack_elements,
+ max_size_of_instructions,
+ max_component_elements,
+ max_component_depth;
+ };
+
+
+ typedef byte Panose[10];
+
+ struct Compatibility0
+ {
+ uint16 version;
+ int16 x_avg_char_width;
+ uint16 weight_class,
+ width_class;
+ int16 fs_type,
+ y_subscript_x_size,
+ y_subscript_y_size,
+ y_subscript_x_offset,
+ y_subscript_y_offset,
+ y_superscript_x_size,
+ y_superscript_y_size,
+ y_superscript_x_offset,
+ y_superscript_y_offset,
+ y_strikeout_size,
+ y_strikeout_position,
+ family_class;
+ Panose panose;
+ uint32 unicode_range[4];
+ int8 ach_vend_id[4];
+ uint16 fs_selection,
+ fs_first_char_index,
+ fs_last_char_index, // Acording to Apple's spec this is where v0 should end
+ typo_ascender,
+ typo_descender,
+ type_linegap,
+ win_ascent,
+ win_descent;
+
+ enum
+ {
+ Italic =0x01,
+ Underscore=0x02,
+ Negative =0x04,
+ Outlined =0x08,
+ StrikeOut =0x10,
+ Bold =0x20
+ };
+ };
+
+ struct Compatibility1 : Compatibility0
+ {
+ uint32 codepage_range[2];
+ };
+
+ struct Compatibility2 : Compatibility1
+ {
+ int16 x_height,
+ cap_height;
+ uint16 default_char,
+ break_char,
+ max_context;
+ };
+
+ struct Compatibility3 : Compatibility2 {};
+
+ typedef Compatibility3 Compatibility;
+
+
+ struct NameRecord
+ {
+ uint16 platform_id,
+ platform_specific_id,
+ language_id,
+ name_id,
+ length,
+ offset;
+ enum {Unicode, Mactintosh, Reserved, Microsoft};
+ enum
+ {
+ Copyright, Family, Subfamily, UniqueSubfamily,
+ Fullname, Version, PostScript
+ };
+ };
+
+ struct LangTagRecord
+ {
+ uint16 length,
+ offset;
+ };
+
+ struct FontNames
+ {
+ uint16 format,
+ count,
+ string_offset;
+ NameRecord name_record[1];
+ };
+
+
+ struct HorizontalMetric
+ {
+ uint16 advance_width;
+ int16 left_side_bearing;
+ };
+
+
+ struct Glyph
+ {
+ int16 number_of_contours;
+ fword x_min,
+ y_min,
+ x_max,
+ y_max;
+ };
+
+ struct SimpleGlyph : Glyph
+ {
+ uint16 end_pts_of_contours[1];
+ enum
+ {
+ OnCurve = 0x01,
+ XShort = 0x02,
+ YShort = 0x04,
+ Repeat = 0x08,
+ XIsSame = 0x10,
+ XIsPos = 0x10,
+ YIsSame = 0x20,
+ YIsPos = 0x20
+ };
+ };
+
+ struct CompoundGlyph : Glyph
+ {
+ uint16 flags,
+ glyph_index;
+ enum
+ {
+ Arg1Arg2Words = 0x01,
+ ArgsAreXYValues = 0x02,
+ RoundXYToGrid = 0x04,
+ HaveScale = 0x08,
+ MoreComponents = 0x20,
+ HaveXAndYScale = 0x40,
+ HaveTwoByTwo = 0x80,
+ HaveInstructions = 0x100,
+ UseMyMetrics = 0x200,
+ OverlapCompund = 0x400,
+ ScaledOffset = 0x800,
+ UnscaledOffset = 0x1000
+ };
+ };
+
+#pragma pack(pop)
+} // end of namespace Sfnt
+
+} // end of namespace TtfUtil
+} // end of namespace graphite2
diff --git a/thirdparty/graphite/src/inc/TtfUtil.h b/thirdparty/graphite/src/inc/TtfUtil.h
new file mode 100644
index 0000000000..3952bc06fb
--- /dev/null
+++ b/thirdparty/graphite/src/inc/TtfUtil.h
@@ -0,0 +1,208 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+/*--------------------------------------------------------------------*//*:Ignore this sentence.
+
+File: TtfUtil.h
+Responsibility: Alan Ward
+Last reviewed: Not yet.
+
+Description:
+ Utility class for handling TrueType font files.
+----------------------------------------------------------------------------------------------*/
+
+
+#include <cstddef>
+
+namespace graphite2
+{
+namespace TtfUtil
+{
+
+#define OVERFLOW_OFFSET_CHECK(p, o) (o + reinterpret_cast<size_t>(p) < reinterpret_cast<size_t>(p))
+
+typedef long fontTableId32;
+typedef unsigned short gid16;
+
+#define TTF_TAG(a,b,c,d) ((a << 24UL) + (b << 16UL) + (c << 8UL) + (d))
+
+// Enumeration used to specify a table in a TTF file
+class Tag
+{
+ unsigned int _v;
+public:
+ Tag(const char n[5]) throw() : _v(TTF_TAG(n[0],n[1],n[2],n[3])) {}
+ Tag(const unsigned int tag) throw() : _v(tag) {}
+
+ operator unsigned int () const throw () { return _v; }
+
+ enum
+ {
+ Feat = TTF_TAG('F','e','a','t'),
+ Glat = TTF_TAG('G','l','a','t'),
+ Gloc = TTF_TAG('G','l','o','c'),
+ Sile = TTF_TAG('S','i','l','e'),
+ Silf = TTF_TAG('S','i','l','f'),
+ Sill = TTF_TAG('S','i','l','l'),
+ cmap = TTF_TAG('c','m','a','p'),
+ cvt = TTF_TAG('c','v','t',' '),
+ cryp = TTF_TAG('c','r','y','p'),
+ head = TTF_TAG('h','e','a','d'),
+ fpgm = TTF_TAG('f','p','g','m'),
+ gdir = TTF_TAG('g','d','i','r'),
+ glyf = TTF_TAG('g','l','y','f'),
+ hdmx = TTF_TAG('h','d','m','x'),
+ hhea = TTF_TAG('h','h','e','a'),
+ hmtx = TTF_TAG('h','m','t','x'),
+ loca = TTF_TAG('l','o','c','a'),
+ kern = TTF_TAG('k','e','r','n'),
+ LTSH = TTF_TAG('L','T','S','H'),
+ maxp = TTF_TAG('m','a','x','p'),
+ name = TTF_TAG('n','a','m','e'),
+ OS_2 = TTF_TAG('O','S','/','2'),
+ post = TTF_TAG('p','o','s','t'),
+ prep = TTF_TAG('p','r','e','p')
+ };
+};
+
+/*----------------------------------------------------------------------------------------------
+ Class providing utility methods to parse a TrueType font file (TTF).
+ Callling application handles all file input and memory allocation.
+ Assumes minimal knowledge of TTF file format.
+----------------------------------------------------------------------------------------------*/
+ ////////////////////////////////// tools to find & check TTF tables
+ bool GetHeaderInfo(size_t & lOffset, size_t & lSize);
+ bool CheckHeader(const void * pHdr);
+ bool GetTableDirInfo(const void * pHdr, size_t & lOffset, size_t & lSize);
+ bool GetTableInfo(const Tag TableTag, const void * pHdr, const void * pTableDir,
+ size_t & lOffset, size_t & lSize);
+ bool CheckTable(const Tag TableId, const void * pTable, size_t lTableSize);
+
+ ////////////////////////////////// simple font wide info
+ size_t GlyphCount(const void * pMaxp);
+#ifdef ALL_TTFUTILS
+ size_t MaxCompositeComponentCount(const void * pMaxp);
+ size_t MaxCompositeLevelCount(const void * pMaxp);
+ size_t LocaGlyphCount(size_t lLocaSize, const void * pHead); // throw (std::domain_error);
+#endif
+ int DesignUnits(const void * pHead);
+#ifdef ALL_TTFUTILS
+ int HeadTableCheckSum(const void * pHead);
+ void HeadTableCreateTime(const void * pHead, unsigned int * pnDateBC, unsigned int * pnDateAD);
+ void HeadTableModifyTime(const void * pHead, unsigned int * pnDateBC, unsigned int * pnDateAD);
+ bool IsItalic(const void * pHead);
+ int FontAscent(const void * pOs2);
+ int FontDescent(const void * pOs2);
+ bool FontOs2Style(const void *pOs2, bool & fBold, bool & fItalic);
+ bool Get31EngFamilyInfo(const void * pName, size_t & lOffset, size_t & lSize);
+ bool Get31EngFullFontInfo(const void * pName, size_t & lOffset, size_t & lSize);
+ bool Get30EngFamilyInfo(const void * pName, size_t & lOffset, size_t & lSize);
+ bool Get30EngFullFontInfo(const void * pName, size_t & lOffset, size_t & lSize);
+ int PostLookup(const void * pPost, size_t lPostSize, const void * pMaxp,
+ const char * pPostName);
+#endif
+
+ ////////////////////////////////// utility methods helpful for name table
+ bool GetNameInfo(const void * pName, int nPlatformId, int nEncodingId,
+ int nLangId, int nNameId, size_t & lOffset, size_t & lSize);
+ //size_t NameTableLength(const byte * pTable);
+#ifdef ALL_TTFUTILS
+ int GetLangsForNames(const void * pName, int nPlatformId, int nEncodingId,
+ int *nameIdList, int cNameIds, short *langIdList);
+ void SwapWString(void * pWStr, size_t nSize = 0); // throw (std::invalid_argument);
+#endif
+
+ ////////////////////////////////// cmap lookup tools
+ const void * FindCmapSubtable(const void * pCmap, int nPlatformId = 3,
+ int nEncodingId = 1, size_t length = 0);
+ bool CheckCmapSubtable4(const void * pCmap31, const void * pCmapEnd /*, unsigned int maxgid*/);
+ gid16 CmapSubtable4Lookup(const void * pCmapSubtabel4, unsigned int nUnicodeId, int rangeKey = 0);
+ unsigned int CmapSubtable4NextCodepoint(const void *pCmap31, unsigned int nUnicodeId,
+ int * pRangeKey = 0);
+ bool CheckCmapSubtable12(const void *pCmap310, const void * pCmapEnd /*, unsigned int maxgid*/);
+ gid16 CmapSubtable12Lookup(const void * pCmap310, unsigned int uUnicodeId, int rangeKey = 0);
+ unsigned int CmapSubtable12NextCodepoint(const void *pCmap310, unsigned int nUnicodeId,
+ int * pRangeKey = 0);
+
+ ///////////////////////////////// horizontal metric data for a glyph
+ bool HorMetrics(gid16 nGlyphId, const void * pHmtx, size_t lHmtxSize,
+ const void * pHhea, int & nLsb, unsigned int & nAdvWid);
+
+ ////////////////////////////////// primitives for loca and glyf lookup
+ size_t LocaLookup(gid16 nGlyphId, const void * pLoca, size_t lLocaSize,
+ const void * pHead); // throw (std::out_of_range);
+ void * GlyfLookup(const void * pGlyf, size_t lGlyfOffset, size_t lTableLen);
+
+ ////////////////////////////////// primitves for simple glyph data
+ bool GlyfBox(const void * pSimpleGlyf, int & xMin, int & yMin,
+ int & xMax, int & yMax);
+
+#ifdef ALL_TTFUTILS
+ int GlyfContourCount(const void * pSimpleGlyf);
+ bool GlyfContourEndPoints(const void * pSimpleGlyf, int * prgnContourEndPoint,
+ int cnPointsTotal, size_t & cnPoints);
+ bool GlyfPoints(const void * pSimpleGlyf, int * prgnX, int * prgnY,
+ char * prgbFlag, int cnPointsTotal, int & cnPoints);
+
+ // primitive to find the glyph ids in a composite glyph
+ bool GetComponentGlyphIds(const void * pSimpleGlyf, int * prgnCompId,
+ size_t cnCompIdTotal, size_t & cnCompId);
+ // primitive to find the placement data for a component in a composite glyph
+ bool GetComponentPlacement(const void * pSimpleGlyf, int nCompId,
+ bool fOffset, int & a, int & b);
+ // primitive to find the transform data for a component in a composite glyph
+ bool GetComponentTransform(const void * pSimpleGlyf, int nCompId,
+ float & flt11, float & flt12, float & flt21, float & flt22, bool & fTransOffset);
+#endif
+
+ ////////////////////////////////// operate on composite or simple glyph (auto glyf lookup)
+ void * GlyfLookup(gid16 nGlyphId, const void * pGlyf, const void * pLoca,
+ size_t lGlyfSize, size_t lLocaSize, const void * pHead); // primitive used by below methods
+
+#ifdef ALL_TTFUTILS
+ // below are primary user methods for handling glyf data
+ bool IsSpace(gid16 nGlyphId, const void * pLoca, size_t lLocaSize, const void * pHead);
+ bool IsDeepComposite(gid16 nGlyphId, const void * pGlyf, const void * pLoca,
+ size_t lGlyfSize, size_t lLocaSize, const void * pHead);
+
+ bool GlyfBox(gid16 nGlyphId, const void * pGlyf, const void * pLoca, size_t lGlyfSize, size_t lLocaSize,
+ const void * pHead, int & xMin, int & yMin, int & xMax, int & yMax);
+ bool GlyfContourCount(gid16 nGlyphId, const void * pGlyf, const void * pLoca,
+ size_t lGlyfSize, size_t lLocaSize, const void *pHead, size_t & cnContours);
+ bool GlyfContourEndPoints(gid16 nGlyphId, const void * pGlyf, const void * pLoca,
+ size_t lGlyfSize, size_t lLocaSize, const void * pHead, int * prgnContourEndPoint, size_t cnPoints);
+ bool GlyfPoints(gid16 nGlyphId, const void * pGlyf, const void * pLoca,
+ size_t lGlyfSize, size_t lLocaSize, const void * pHead, const int * prgnContourEndPoint, size_t cnEndPoints,
+ int * prgnX, int * prgnY, bool * prgfOnCurve, size_t cnPoints);
+
+ // utitily method used by high-level GlyfPoints
+ bool SimplifyFlags(char * prgbFlags, int cnPoints);
+ bool CalcAbsolutePoints(int * prgnX, int * prgnY, int cnPoints);
+#endif
+
+} // end of namespace TtfUtil
+} // end of namespace graphite2
diff --git a/thirdparty/graphite/src/inc/UtfCodec.h b/thirdparty/graphite/src/inc/UtfCodec.h
new file mode 100644
index 0000000000..24a343d8d9
--- /dev/null
+++ b/thirdparty/graphite/src/inc/UtfCodec.h
@@ -0,0 +1,251 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2011, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+
+#include <cstdlib>
+#include "inc/Main.h"
+
+namespace graphite2 {
+
+typedef uint32 uchar_t;
+
+template <int N>
+struct _utf_codec
+{
+ typedef uchar_t codeunit_t;
+
+ static void put(codeunit_t * cp, const uchar_t , int8 & len) throw();
+ static uchar_t get(const codeunit_t * cp, int8 & len) throw();
+ static bool validate(const codeunit_t * s, const codeunit_t * const e) throw();
+};
+
+
+template <>
+struct _utf_codec<32>
+{
+private:
+ static const uchar_t limit = 0x110000;
+public:
+ typedef uint32 codeunit_t;
+
+ inline
+ static void put(codeunit_t * cp, const uchar_t usv, int8 & l) throw()
+ {
+ *cp = usv; l = 1;
+ }
+
+ inline
+ static uchar_t get(const codeunit_t * cp, int8 & l) throw()
+ {
+ if (cp[0] < limit) { l = 1; return cp[0]; }
+ else { l = -1; return 0xFFFD; }
+ }
+
+ inline
+ static bool validate(const codeunit_t * s, const codeunit_t * const e) throw()
+ {
+ return s <= e;
+ }
+};
+
+
+template <>
+struct _utf_codec<16>
+{
+private:
+ static const int32 lead_offset = 0xD800 - (0x10000 >> 10);
+ static const int32 surrogate_offset = 0x10000 - (0xD800 << 10) - 0xDC00;
+public:
+ typedef uint16 codeunit_t;
+
+ inline
+ static void put(codeunit_t * cp, const uchar_t usv, int8 & l) throw()
+ {
+ if (usv < 0x10000) { l = 1; cp[0] = codeunit_t(usv); }
+ else
+ {
+ cp[0] = codeunit_t(lead_offset + (usv >> 10));
+ cp[1] = codeunit_t(0xDC00 + (usv & 0x3FF));
+ l = 2;
+ }
+ }
+
+ inline
+ static uchar_t get(const codeunit_t * cp, int8 & l) throw()
+ {
+ const uint32 uh = cp[0];
+ l = 1;
+
+ if (uh < 0xD800|| uh > 0xDFFF) { return uh; }
+ if (uh > 0xDBFF) { l = -1; return 0xFFFD; }
+ const uint32 ul = cp[1];
+ if (ul < 0xDC00 || ul > 0xDFFF) { l = -1; return 0xFFFD; }
+ ++l;
+ return (uh<<10) + ul + surrogate_offset;
+ }
+
+ inline
+ static bool validate(const codeunit_t * s, const codeunit_t * const e) throw()
+ {
+ const ptrdiff_t n = e-s;
+ if (n <= 0) return n == 0;
+ const uint32 u = *(e-1); // Get the last codepoint
+ return (u < 0xD800 || u > 0xDBFF);
+ }
+};
+
+
+template <>
+struct _utf_codec<8>
+{
+private:
+ static const int8 sz_lut[16];
+ static const byte mask_lut[5];
+ static const uchar_t limit = 0x110000;
+
+public:
+ typedef uint8 codeunit_t;
+
+ inline
+ static void put(codeunit_t * cp, const uchar_t usv, int8 & l) throw()
+ {
+ if (usv < 0x80) {l = 1; cp[0] = usv; return; }
+ if (usv < 0x0800) {l = 2; cp[0] = 0xC0 + (usv >> 6); cp[1] = 0x80 + (usv & 0x3F); return; }
+ if (usv < 0x10000) {l = 3; cp[0] = 0xE0 + (usv >> 12); cp[1] = 0x80 + ((usv >> 6) & 0x3F); cp[2] = 0x80 + (usv & 0x3F); return; }
+ else {l = 4; cp[0] = 0xF0 + (usv >> 18); cp[1] = 0x80 + ((usv >> 12) & 0x3F); cp[2] = 0x80 + ((usv >> 6) & 0x3F); cp[3] = 0x80 + (usv & 0x3F); return; }
+ }
+
+ inline
+ static uchar_t get(const codeunit_t * cp, int8 & l) throw()
+ {
+ const int8 seq_sz = sz_lut[*cp >> 4];
+ uchar_t u = *cp & mask_lut[seq_sz];
+ l = 1;
+ bool toolong = false;
+
+ switch(seq_sz) {
+ case 4: u <<= 6; u |= *++cp & 0x3F; if (*cp >> 6 != 2) break; ++l; toolong = (u < 0x10); GR_FALLTHROUGH;
+ // no break
+ case 3: u <<= 6; u |= *++cp & 0x3F; if (*cp >> 6 != 2) break; ++l; toolong |= (u < 0x20); GR_FALLTHROUGH;
+ // no break
+ case 2: u <<= 6; u |= *++cp & 0x3F; if (*cp >> 6 != 2) break; ++l; toolong |= (u < 0x80); GR_FALLTHROUGH;
+ // no break
+ case 1: break;
+ case 0: l = -1; return 0xFFFD;
+ }
+
+ if (l != seq_sz || toolong || u >= limit)
+ {
+ l = -l;
+ return 0xFFFD;
+ }
+ return u;
+ }
+
+ inline
+ static bool validate(const codeunit_t * s, const codeunit_t * const e) throw()
+ {
+ const ptrdiff_t n = e-s;
+ if (n <= 0) return n == 0;
+ s += (n-1);
+ if (*s < 0x80) return true;
+ if (*s >= 0xC0) return false;
+ if (n == 1) return true;
+ if (*--s < 0x80) return true;
+ if (*s >= 0xE0) return false;
+ if (n == 2 || *s >= 0xC0) return true;
+ if (*--s < 0x80) return true;
+ if (*s >= 0xF0) return false;
+ return true;
+ }
+
+};
+
+
+template <typename C>
+class _utf_iterator
+{
+ typedef _utf_codec<sizeof(C)*8> codec;
+
+ C * cp;
+ mutable int8 sl;
+
+public:
+ typedef C codeunit_type;
+ typedef uchar_t value_type;
+ typedef uchar_t * pointer;
+
+ class reference
+ {
+ const _utf_iterator & _i;
+
+ reference(const _utf_iterator & i): _i(i) {}
+ public:
+ operator value_type () const throw () { return codec::get(_i.cp, _i.sl); }
+ reference & operator = (const value_type usv) throw() { codec::put(_i.cp, usv, _i.sl); return *this; }
+
+ friend class _utf_iterator;
+ };
+
+
+ _utf_iterator(const void * us=0) : cp(reinterpret_cast<C *>(const_cast<void *>(us))), sl(1) { }
+
+ _utf_iterator & operator ++ () { cp += abs(sl); return *this; }
+ _utf_iterator operator ++ (int) { _utf_iterator tmp(*this); operator++(); return tmp; }
+
+ bool operator == (const _utf_iterator & rhs) const throw() { return cp >= rhs.cp; }
+ bool operator != (const _utf_iterator & rhs) const throw() { return !operator==(rhs); }
+
+ reference operator * () const throw() { return *this; }
+ pointer operator ->() const throw() { return &operator *(); }
+
+ operator codeunit_type * () const throw() { return cp; }
+
+ bool error() const throw() { return sl < 1; }
+ bool validate(const _utf_iterator & e) { return codec::validate(cp, e.cp); }
+};
+
+template <typename C>
+struct utf
+{
+ typedef typename _utf_codec<sizeof(C)*8>::codeunit_t codeunit_t;
+
+ typedef _utf_iterator<C> iterator;
+ typedef _utf_iterator<const C> const_iterator;
+
+ inline
+ static bool validate(codeunit_t * s, codeunit_t * e) throw() {
+ return _utf_codec<sizeof(C)*8>::validate(s,e);
+ }
+};
+
+
+typedef utf<uint32> utf32;
+typedef utf<uint16> utf16;
+typedef utf<uint8> utf8;
+
+} // namespace graphite2
diff --git a/thirdparty/graphite/src/inc/bits.h b/thirdparty/graphite/src/inc/bits.h
new file mode 100644
index 0000000000..9365986a10
--- /dev/null
+++ b/thirdparty/graphite/src/inc/bits.h
@@ -0,0 +1,150 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2012, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+
+namespace graphite2
+{
+
+
+#if defined GRAPHITE2_BUILTINS && (defined __GNUC__ || defined __clang__)
+
+template<typename T>
+inline unsigned int bit_set_count(T v)
+{
+ return __builtin_popcount(v);
+}
+
+template<>
+inline unsigned int bit_set_count(int16 v)
+{
+ return __builtin_popcount(static_cast<uint16>(v));
+}
+
+template<>
+inline unsigned int bit_set_count(int8 v)
+{
+ return __builtin_popcount(static_cast<uint8>(v));
+}
+
+template<>
+inline unsigned int bit_set_count(unsigned long v)
+{
+ return __builtin_popcountl(v);
+}
+
+template<>
+inline unsigned int bit_set_count(signed long v)
+{
+ return __builtin_popcountl(v);
+}
+
+template<>
+inline unsigned int bit_set_count(unsigned long long v)
+{
+ return __builtin_popcountll(v);
+}
+
+template<>
+inline unsigned int bit_set_count(signed long long v)
+{
+ return __builtin_popcountll(v);
+}
+
+#else
+
+template<typename T>
+inline unsigned int bit_set_count(T v)
+{
+ static size_t const ONES = ~0;
+
+ v = v - ((v >> 1) & T(ONES/3)); // temp
+ v = (v & T(ONES/15*3)) + ((v >> 2) & T(ONES/15*3)); // temp
+ v = (v + (v >> 4)) & T(ONES/255*15); // temp
+ return (T)(v * T(ONES/255)) >> (sizeof(T)-1)*8; // count
+}
+
+#endif
+
+//TODO: Changed these to uintmax_t when we go to C++11
+template<int S>
+inline size_t _mask_over_val(size_t v)
+{
+ v = _mask_over_val<S/2>(v);
+ v |= v >> S*4;
+ return v;
+}
+
+//TODO: Changed these to uintmax_t when we go to C++11
+template<>
+inline size_t _mask_over_val<1>(size_t v)
+{
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ return v;
+}
+
+template<typename T>
+inline T mask_over_val(T v)
+{
+ return T(_mask_over_val<sizeof(T)>(v));
+}
+
+template<typename T>
+inline unsigned long next_highest_power2(T v)
+{
+ return _mask_over_val<sizeof(T)>(v-1)+1;
+}
+
+template<typename T>
+inline unsigned int log_binary(T v)
+{
+ return bit_set_count(mask_over_val(v))-1;
+}
+
+template<typename T>
+inline T has_zero(const T x)
+{
+ return (x - T(~T(0)/255)) & ~x & T(~T(0)/255*128);
+}
+
+template<typename T>
+inline T zero_bytes(const T x, unsigned char n)
+{
+ const T t = T(~T(0)/255*n);
+ return T((has_zero(x^t) >> 7)*n);
+}
+
+#if 0
+inline float float_round(float x, uint32 m)
+{
+ *reinterpret_cast<unsigned int *>(&x) &= m;
+ return *reinterpret_cast<float *>(&x);
+}
+#endif
+
+}
diff --git a/thirdparty/graphite/src/inc/debug.h b/thirdparty/graphite/src/inc/debug.h
new file mode 100644
index 0000000000..97175eb2cc
--- /dev/null
+++ b/thirdparty/graphite/src/inc/debug.h
@@ -0,0 +1,89 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2011, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+// debug.h
+//
+// Created on: 22 Dec 2011
+// Author: tim
+
+#pragma once
+
+#if !defined GRAPHITE2_NTRACING
+
+#include <utility>
+#include "inc/json.h"
+#include "inc/Position.h"
+
+namespace graphite2
+{
+
+class CharInfo;
+class Segment;
+class Slot;
+
+typedef std::pair<const Segment * const, const Slot * const> dslot;
+struct objectid
+{
+ char name[16];
+ objectid(const dslot &) throw();
+ objectid(const Segment * const p) throw();
+};
+
+
+json & operator << (json & j, const Position &) throw();
+json & operator << (json & j, const Rect &) throw();
+json & operator << (json & j, const CharInfo &) throw();
+json & operator << (json & j, const dslot &) throw();
+json & operator << (json & j, const objectid &) throw();
+json & operator << (json & j, const telemetry &) throw();
+
+
+
+inline
+json & operator << (json & j, const Position & p) throw()
+{
+ return j << json::flat << json::array << p.x << p.y << json::close;
+}
+
+
+inline
+json & operator << (json & j, const Rect & p) throw()
+{
+ return j << json::flat << json::array << p.bl.x << p.bl.y << p.tr.x << p.tr.y << json::close;
+}
+
+
+inline
+json & operator << (json & j, const objectid & sid) throw()
+{
+ return j << sid.name;
+}
+
+
+} // namespace graphite2
+
+#endif //!defined GRAPHITE2_NTRACING
+
diff --git a/thirdparty/graphite/src/inc/json.h b/thirdparty/graphite/src/inc/json.h
new file mode 100644
index 0000000000..554cd9a3d1
--- /dev/null
+++ b/thirdparty/graphite/src/inc/json.h
@@ -0,0 +1,178 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2011, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+// JSON pretty printer for graphite font debug output logging.
+// Created on: 15 Dec 2011
+// Author: Tim Eves
+
+#pragma once
+
+#include "inc/Main.h"
+#include <cassert>
+#include <cstdio>
+#include <cstdint>
+#include "inc/List.h"
+
+namespace graphite2 {
+
+class json
+{
+ // Prevent copying
+ json(const json &);
+ json & operator = (const json &);
+
+ typedef void (*_context_t)(json &);
+
+ FILE * const _stream;
+ char _contexts[128], // context stack
+ * _context, // current context (top of stack)
+ * _flatten; // if !0 points to context above which
+ // pretty printed output should occur.
+ Vector<void *> _env;
+
+ void context(const char current) throw();
+ void indent(const int d=0) throw();
+ void push_context(const char, const char) throw();
+ void pop_context() throw();
+
+public:
+ class closer;
+
+ using string = const char *;
+ using number = double;
+ enum class integer : std::intmax_t {};
+ enum class integer_u : std::uintmax_t {};
+ using boolean = bool;
+ static const std::nullptr_t null;
+
+ void setenv(unsigned int index, void *val) { _env.reserve(index + 1); if (index >= _env.size()) _env.insert(_env.end(), _env.size() - index + 1, 0); _env[index] = val; }
+ void *getenv(unsigned int index) const { return _env[index]; }
+ const Vector<void *> &getenvs() const { return _env; }
+
+ static void flat(json &) throw();
+ static void close(json &) throw();
+ static void object(json &) throw();
+ static void array(json &) throw();
+ static void item(json &) throw();
+
+ json(FILE * stream) throw();
+ ~json() throw ();
+
+ FILE * stream() const throw();
+
+ json & operator << (string) throw();
+ json & operator << (number) throw();
+ json & operator << (integer) throw();
+ json & operator << (integer_u) throw();
+ json & operator << (boolean) throw();
+ json & operator << (std::nullptr_t) throw();
+ json & operator << (_context_t) throw();
+
+ operator bool() const throw();
+ bool good() const throw();
+ bool eof() const throw();
+
+ CLASS_NEW_DELETE;
+};
+
+class json::closer
+{
+ // Prevent copying.
+ closer(const closer &);
+ closer & operator = (const closer &);
+
+ json * const _j;
+public:
+ closer(json * const j) : _j(j) {}
+ ~closer() throw() { if (_j) *_j << close; }
+};
+
+inline
+json::json(FILE * s) throw()
+: _stream(s), _context(_contexts), _flatten(0)
+{
+ if (good())
+ fflush(s);
+}
+
+
+inline
+json::~json() throw ()
+{
+ while (_context > _contexts) pop_context();
+}
+
+inline
+FILE * json::stream() const throw() { return _stream; }
+
+
+inline
+json & json::operator << (json::_context_t ctxt) throw()
+{
+ ctxt(*this);
+ return *this;
+}
+
+inline
+json & operator << (json & j, signed char d) throw() { return j << json::integer(d); }
+
+inline
+json & operator << (json & j, unsigned char d) throw() { return j << json::integer_u(d); }
+
+inline
+json & operator << (json & j, short int d) throw() { return j << json::integer(d); }
+
+inline
+json & operator << (json & j, unsigned short int d) throw() { return j << json::integer_u(d); }
+
+inline
+json & operator << (json & j, int d) throw() { return j << json::integer(d); }
+
+inline
+json & operator << (json & j, unsigned int d) throw() { return j << json::integer_u(d); }
+
+inline
+json & operator << (json & j, long int d) throw() { return j << json::integer(d); }
+
+inline
+json & operator << (json & j, unsigned long int d) throw() { return j << json::integer_u(d); }
+
+inline
+json & operator << (json & j, long long int d) throw() { return j << json::integer(d); }
+
+inline
+json & operator << (json & j, unsigned long long int d) throw() { return j << json::integer_u(d); }
+
+inline
+json::operator bool() const throw() { return good(); }
+
+inline
+bool json::good() const throw() { return _stream && ferror(_stream) == 0; }
+
+inline
+bool json::eof() const throw() { return feof(_stream) != 0; }
+
+} // namespace graphite2
diff --git a/thirdparty/graphite/src/inc/locale2lcid.h b/thirdparty/graphite/src/inc/locale2lcid.h
new file mode 100644
index 0000000000..25d5c0a3c8
--- /dev/null
+++ b/thirdparty/graphite/src/inc/locale2lcid.h
@@ -0,0 +1,450 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+#include <cstring>
+#include <cassert>
+
+#include "inc/Main.h"
+
+
+namespace graphite2 {
+
+struct IsoLangEntry
+{
+ unsigned short mnLang;
+ char maLangStr[4];
+ char maCountry[3];
+};
+
+// Windows Language ID, Locale ISO-639 language, country code as used in
+// naming table of OpenType fonts
+const IsoLangEntry LANG_ENTRIES[] = {
+ { 0x0401, "ar","SA" }, // Arabic Saudi Arabia
+ { 0x0402, "bg","BG" }, // Bulgarian Bulgaria
+ { 0x0403, "ca","ES" }, // Catalan Catalan
+ { 0x0404, "zh","TW" }, // Chinese Taiwan
+ { 0x0405, "cs","CZ" }, // Czech Czech Republic
+ { 0x0406, "da","DK" }, // Danish Denmark
+ { 0x0407, "de","DE" }, // German Germany
+ { 0x0408, "el","GR" }, // Greek Greece
+ { 0x0409, "en","US" }, // English United States
+ { 0x040A, "es","ES" }, // Spanish (Traditional Sort) Spain
+ { 0x040B, "fi","FI" }, // Finnish Finland
+ { 0x040C, "fr","FR" }, // French France
+ { 0x040D, "he","IL" }, // Hebrew Israel
+ { 0x040E, "hu","HU" }, // Hungarian Hungary
+ { 0x040F, "is","IS" }, // Icelandic Iceland
+ { 0x0410, "it","IT" }, // Italian Italy
+ { 0x0411, "jp","JP" }, // Japanese Japan
+ { 0x0412, "ko","KR" }, // Korean Korea
+ { 0x0413, "nl","NL" }, // Dutch Netherlands
+ { 0x0414, "no","NO" }, // Norwegian (Bokmal) Norway
+ { 0x0415, "pl","PL" }, // Polish Poland
+ { 0x0416, "pt","BR" }, // Portuguese Brazil
+ { 0x0417, "rm","CH" }, // Romansh Switzerland
+ { 0x0418, "ro","RO" }, // Romanian Romania
+ { 0x0419, "ru","RU" }, // Russian Russia
+ { 0x041A, "hr","HR" }, // Croatian Croatia
+ { 0x041B, "sk","SK" }, // Slovak Slovakia
+ { 0x041C, "sq","AL" }, // Albanian Albania
+ { 0x041D, "sv","SE" }, // Swedish Sweden
+ { 0x041E, "th","TH" }, // Thai Thailand
+ { 0x041F, "tr","TR" }, // Turkish Turkey
+ { 0x0420, "ur","PK" }, // Urdu Islamic Republic of Pakistan
+ { 0x0421, "id","ID" }, // Indonesian Indonesia
+ { 0x0422, "uk","UA" }, // Ukrainian Ukraine
+ { 0x0423, "be","BY" }, // Belarusian Belarus
+ { 0x0424, "sl","SI" }, // Slovenian Slovenia
+ { 0x0425, "et","EE" }, // Estonian Estonia
+ { 0x0426, "lv","LV" }, // Latvian Latvia
+ { 0x0427, "lt","LT" }, // Lithuanian Lithuania
+ { 0x0428, "tg","TJ" }, // Tajik (Cyrillic) Tajikistan
+ { 0x042A, "vi","VN" }, // Vietnamese Vietnam
+ { 0x042B, "hy","AM" }, // Armenian Armenia
+ { 0x042C, "az","AZ" }, // Azeri (Latin) Azerbaijan
+ { 0x042D, "eu","" }, // Basque Basque
+ { 0x042E, "hsb","DE" }, // Upper Sorbian Germany
+ { 0x042F, "mk","MK" }, // Macedonian (FYROM) Former Yugoslav Republic of Macedonia
+ { 0x0432, "tn","ZA" }, // Setswana South Africa
+ { 0x0434, "xh","ZA" }, // isiXhosa South Africa
+ { 0x0435, "zu","ZA" }, // isiZulu South Africa
+ { 0x0436, "af","ZA" }, // Afrikaans South Africa
+ { 0x0437, "ka","GE" }, // Georgian Georgia
+ { 0x0438, "fo","FO" }, // Faroese Faroe Islands
+ { 0x0439, "hi","IN" }, // Hindi India
+ { 0x043A, "mt","MT" }, // Maltese Malta
+ { 0x043B, "se","NO" }, // Sami (Northern) Norway
+ { 0x043E, "ms","MY" }, // Malay Malaysia
+ { 0x043F, "kk","KZ" }, // Kazakh Kazakhstan
+ { 0x0440, "ky","KG" }, // Kyrgyz Kyrgyzstan
+ { 0x0441, "sw","KE" }, // Kiswahili Kenya
+ { 0x0442, "tk","TM" }, // Turkmen Turkmenistan
+ { 0x0443, "uz","UZ" }, // Uzbek (Latin) Uzbekistan
+ { 0x0444, "tt","RU" }, // Tatar Russia
+ { 0x0445, "bn","IN" }, // Bengali India
+ { 0x0446, "pa","IN" }, // Punjabi India
+ { 0x0447, "gu","IN" }, // Gujarati India
+ { 0x0448, "or","IN" }, // Oriya India
+ { 0x0448, "wo","SN" }, // Wolof Senegal
+ { 0x0449, "ta","IN" }, // Tamil India
+ { 0x044A, "te","IN" }, // Telugu India
+ { 0x044B, "kn","IN" }, // Kannada India
+ { 0x044C, "ml","IN" }, // Malayalam India
+ { 0x044D, "as","IN" }, // Assamese India
+ { 0x044E, "mr","IN" }, // Marathi India
+ { 0x044F, "sa","IN" }, // Sanskrit India
+ { 0x0450, "mn","MN" }, // Mongolian (Cyrillic) Mongolia
+ { 0x0451, "bo","CN" }, // Tibetan PRC
+ { 0x0452, "cy","GB" }, // Welsh United Kingdom
+ { 0x0453, "km","KH" }, // Khmer Cambodia
+ { 0x0454, "lo","LA" }, // Lao Lao P.D.R.
+ { 0x0455, "my","MM" }, // Burmese Myanmar - not listed in Microsoft docs anymore
+ { 0x0456, "gl","ES" }, // Galician Galician
+ { 0x0457, "kok","IN" }, // Konkani India
+ { 0x045A, "syr","TR" }, // Syriac Syria
+ { 0x045B, "si","LK" }, // Sinhala Sri Lanka
+ { 0x045D, "iu","CA" }, // Inuktitut Canada
+ { 0x045E, "am","ET" }, // Amharic Ethiopia
+ { 0x0461, "ne","NP" }, // Nepali Nepal
+ { 0x0462, "fy","NL" }, // Frisian Netherlands
+ { 0x0463, "ps","AF" }, // Pashto Afghanistan
+ { 0x0464, "fil","PH" }, // Filipino Philippines
+ { 0x0465, "dv","MV" }, // Divehi Maldives
+ { 0x0468, "ha","NG" }, // Hausa (Latin) Nigeria
+ { 0x046A, "yo","NG" }, // Yoruba Nigeria
+ { 0x046B, "qu","BO" }, // Quechua Bolivia
+ { 0x046C, "st","ZA" }, // Sesotho sa Leboa South Africa
+ { 0x046D, "ba","RU" }, // Bashkir Russia
+ { 0x046E, "lb","LU" }, // Luxembourgish Luxembourg
+ { 0x046F, "kl","GL" }, // Greenlandic Greenland
+ { 0x0470, "ig","NG" }, // Igbo Nigeria
+ { 0x0478, "ii","CN" }, // Yi PRC
+ { 0x047A, "arn","CL" }, // Mapudungun Chile
+ { 0x047C, "moh","CA" }, // Mohawk Mohawk
+ { 0x047E, "br","FR" }, // Breton France
+ { 0x0480, "ug","CN" }, // Uighur PRC
+ { 0x0481, "mi","NZ" }, // Maori New Zealand
+ { 0x0482, "oc","FR" }, // Occitan France
+ { 0x0483, "co","FR" }, // Corsican France
+ { 0x0484, "gsw","FR" }, // Alsatian France
+ { 0x0485, "sah","RU" }, // Yakut Russia
+ { 0x0486, "qut","GT" }, // K'iche Guatemala
+ { 0x0487, "rw","RW" }, // Kinyarwanda Rwanda
+ { 0x048C, "gbz","AF" }, // Dari Afghanistan
+ { 0x0801, "ar","IQ" }, // Arabic Iraq
+ { 0x0804, "zn","CH" }, // Chinese People's Republic of China
+ { 0x0807, "de","CH" }, // German Switzerland
+ { 0x0809, "en","GB" }, // English United Kingdom
+ { 0x080A, "es","MX" }, // Spanish Mexico
+ { 0x080C, "fr","BE" }, // French Belgium
+ { 0x0810, "it","CH" }, // Italian Switzerland
+ { 0x0813, "nl","BE" }, // Dutch Belgium
+ { 0x0814, "nn","NO" }, // Norwegian (Nynorsk) Norway
+ { 0x0816, "pt","PT" }, // Portuguese Portugal
+ { 0x081A, "sh","RS" }, // Serbian (Latin) Serbia
+ { 0x081D, "sv","FI" }, // Sweden Finland
+ { 0x082C, "az","AZ" }, // Azeri (Cyrillic) Azerbaijan
+ { 0x082E, "dsb","DE" }, // Lower Sorbian Germany
+ { 0x083B, "se","SE" }, // Sami (Northern) Sweden
+ { 0x083C, "ga","IE" }, // Irish Ireland
+ { 0x083E, "ms","BN" }, // Malay Brunei Darussalam
+ { 0x0843, "uz","UZ" }, // Uzbek (Cyrillic) Uzbekistan
+ { 0x0845, "bn","BD" }, // Bengali Bangladesh
+ { 0x0850, "mn","MN" }, // Mongolian (Traditional) People's Republic of China
+ { 0x085D, "iu","CA" }, // Inuktitut (Latin) Canada
+ { 0x085F, "ber","DZ" }, // Tamazight (Latin) Algeria
+ { 0x086B, "es","EC" }, // Quechua Ecuador
+ { 0x0C01, "ar","EG" }, // Arabic Egypt
+ { 0x0C04, "zh","HK" }, // Chinese Hong Kong S.A.R.
+ { 0x0C07, "de","AT" }, // German Austria
+ { 0x0C09, "en","AU" }, // English Australia
+ { 0x0C0A, "es","ES" }, // Spanish (Modern Sort) Spain
+ { 0x0C0C, "fr","CA" }, // French Canada
+ { 0x0C1A, "sr","CS" }, // Serbian (Cyrillic) Serbia
+ { 0x0C3B, "se","FI" }, // Sami (Northern) Finland
+ { 0x0C6B, "qu","PE" }, // Quechua Peru
+ { 0x1001, "ar","LY" }, // Arabic Libya
+ { 0x1004, "zh","SG" }, // Chinese Singapore
+ { 0x1007, "de","LU" }, // German Luxembourg
+ { 0x1009, "en","CA" }, // English Canada
+ { 0x100A, "es","GT" }, // Spanish Guatemala
+ { 0x100C, "fr","CH" }, // French Switzerland
+ { 0x101A, "hr","BA" }, // Croatian (Latin) Bosnia and Herzegovina
+ { 0x103B, "smj","NO" }, // Sami (Lule) Norway
+ { 0x1401, "ar","DZ" }, // Arabic Algeria
+ { 0x1404, "zh","MO" }, // Chinese Macao S.A.R.
+ { 0x1407, "de","LI" }, // German Liechtenstein
+ { 0x1409, "en","NZ" }, // English New Zealand
+ { 0x140A, "es","CR" }, // Spanish Costa Rica
+ { 0x140C, "fr","LU" }, // French Luxembourg
+ { 0x141A, "bs","BA" }, // Bosnian (Latin) Bosnia and Herzegovina
+ { 0x143B, "smj","SE" }, // Sami (Lule) Sweden
+ { 0x1801, "ar","MA" }, // Arabic Morocco
+ { 0x1809, "en","IE" }, // English Ireland
+ { 0x180A, "es","PA" }, // Spanish Panama
+ { 0x180C, "fr","MC" }, // French Principality of Monoco
+ { 0x181A, "sh","BA" }, // Serbian (Latin) Bosnia and Herzegovina
+ { 0x183B, "sma","NO" }, // Sami (Southern) Norway
+ { 0x1C01, "ar","TN" }, // Arabic Tunisia
+ { 0x1C09, "en","ZA" }, // English South Africa
+ { 0x1C0A, "es","DO" }, // Spanish Dominican Republic
+ { 0x1C1A, "sr","BA" }, // Serbian (Cyrillic) Bosnia and Herzegovina
+ { 0x1C3B, "sma","SE" }, // Sami (Southern) Sweden
+ { 0x2001, "ar","OM" }, // Arabic Oman
+ { 0x2009, "en","JM" }, // English Jamaica
+ { 0x200A, "es","VE" }, // Spanish Venezuela
+ { 0x201A, "bs","BA" }, // Bosnian (Cyrillic) Bosnia and Herzegovina
+ { 0x203B, "sms","FI" }, // Sami (Skolt) Finland
+ { 0x2401, "ar","YE" }, // Arabic Yemen
+ { 0x2409, "en","BS" }, // English Caribbean
+ { 0x240A, "es","CO" }, // Spanish Colombia
+ { 0x243B, "smn","FI" }, // Sami (Inari) Finland
+ { 0x2801, "ar","SY" }, // Arabic Syria
+ { 0x2809, "en","BZ" }, // English Belize
+ { 0x280A, "es","PE" }, // Spanish Peru
+ { 0x2C01, "ar","JO" }, // Arabic Jordan
+ { 0x2C09, "en","TT" }, // English Trinidad and Tobago
+ { 0x2C0A, "es","AR" }, // Spanish Argentina
+ { 0x3001, "ar","LB" }, // Arabic Lebanon
+ { 0x3009, "en","ZW" }, // English Zimbabwe
+ { 0x300A, "es","EC" }, // Spanish Ecuador
+ { 0x3401, "ar","KW" }, // Arabic Kuwait
+ { 0x3409, "en","PH" }, // English Republic of the Philippines
+ { 0x340A, "es","CL" }, // Spanish Chile
+ { 0x3801, "ar","AE" }, // Arabic U.A.E.
+ { 0x380A, "es","UY" }, // Spanish Uruguay
+ { 0x3C01, "ar","BH" }, // Arabic Bahrain
+ { 0x3C0A, "es","PY" }, // Spanish Paraguay
+ { 0x4001, "ar","QA" }, // Arabic Qatar
+ { 0x4009, "en","IN" }, // English India
+ { 0x400A, "es","BO" }, // Spanish Bolivia
+ { 0x4409, "en","MY" }, // English Malaysia
+ { 0x440A, "es","SV" }, // Spanish El Salvador
+ { 0x4809, "en","SG" }, // English Singapore
+ { 0x480A, "es","HN" }, // Spanish Honduras
+ { 0x4C0A, "es","NI" }, // Spanish Nicaragua
+ { 0x500A, "es","PR" }, // Spanish Puerto Rico
+ { 0x540A, "es","US" } // Spanish United States
+};
+
+class Locale2Lang
+{
+ Locale2Lang(const Locale2Lang &);
+ Locale2Lang & operator = (const Locale2Lang &);
+
+public:
+ Locale2Lang() : mSeedPosition(128)
+ {
+ memset((void*)mLangLookup, 0, sizeof(mLangLookup));
+ // create a tri lookup on first 2 letters of language code
+ static const int maxIndex = sizeof(LANG_ENTRIES)/sizeof(IsoLangEntry);
+ for (int i = 0; i < maxIndex; i++)
+ {
+ size_t a = LANG_ENTRIES[i].maLangStr[0] - 'a';
+ size_t b = LANG_ENTRIES[i].maLangStr[1] - 'a';
+ if (mLangLookup[a][b])
+ {
+ const IsoLangEntry ** old = mLangLookup[a][b];
+ int len = 1;
+ while (old[len]) len++;
+ len += 2;
+ mLangLookup[a][b] = gralloc<const IsoLangEntry *>(len);
+ if (!mLangLookup[a][b])
+ {
+ mLangLookup[a][b] = old;
+ continue;
+ }
+ mLangLookup[a][b][--len] = NULL;
+ mLangLookup[a][b][--len] = &LANG_ENTRIES[i];
+ while (--len >= 0)
+ {
+ assert(len >= 0);
+ mLangLookup[a][b][len] = old[len];
+ }
+ free(old);
+ }
+ else
+ {
+ mLangLookup[a][b] = gralloc<const IsoLangEntry *>(2);
+ if (!mLangLookup[a][b]) continue;
+ mLangLookup[a][b][1] = NULL;
+ mLangLookup[a][b][0] = &LANG_ENTRIES[i];
+ }
+ }
+ while (2 * mSeedPosition < maxIndex)
+ mSeedPosition *= 2;
+ };
+ ~Locale2Lang()
+ {
+ for (int i = 0; i != 26; ++i)
+ for (int j = 0; j != 26; ++j)
+ free(mLangLookup[i][j]);
+ }
+ unsigned short getMsId(const char * locale) const
+ {
+ size_t length = strlen(locale);
+ size_t langLength = length;
+ const char * language = locale;
+ const char * script = NULL;
+ const char * region = NULL;
+ size_t regionLength = 0;
+ const char * dash = strchr(locale, '-');
+ if (dash && (dash != locale))
+ {
+ langLength = (dash - locale);
+ size_t nextPartLength = length - langLength - 1;
+ if (nextPartLength >= 2)
+ {
+ script = ++dash;
+ dash = strchr(dash, '-');
+ if (dash)
+ {
+ nextPartLength = (dash - script);
+ region = ++dash;
+ }
+ if (nextPartLength == 2 &&
+ (locale[langLength+1] > 0x40) && (locale[langLength+1] < 0x5B) &&
+ (locale[langLength+2] > 0x40) && (locale[langLength+2] < 0x5B))
+ {
+ region = script;
+ regionLength = nextPartLength;
+ script = NULL;
+ }
+ else if (nextPartLength == 4)
+ {
+ if (dash)
+ {
+ dash = strchr(dash, '-');
+ if (dash)
+ {
+ nextPartLength = (dash - region);
+ }
+ else
+ {
+ nextPartLength = langLength - (region - locale);
+ }
+ regionLength = nextPartLength;
+ }
+ }
+ }
+ }
+ size_t a = 'e' - 'a';
+ size_t b = 'n' - 'a';
+ unsigned short langId = 0;
+ int i = 0;
+ switch (langLength)
+ {
+ case 2:
+ {
+ a = language[0] - 'a';
+ b = language[1] - 'a';
+ if ((a < 26) && (b < 26) && mLangLookup[a][b])
+ {
+ while (mLangLookup[a][b][i])
+ {
+ if (mLangLookup[a][b][i]->maLangStr[2] != '\0')
+ {
+ ++i;
+ continue;
+ }
+ if (region && (strncmp(mLangLookup[a][b][i]->maCountry, region, regionLength) == 0))
+ {
+ langId = mLangLookup[a][b][i]->mnLang;
+ break;
+ }
+ else if (langId == 0)
+ {
+ // possible fallback code
+ langId = mLangLookup[a][b][i]->mnLang;
+ }
+ ++i;
+ }
+ }
+ }
+ break;
+ case 3:
+ {
+ a = language[0] - 'a';
+ b = language[1] - 'a';
+ if (mLangLookup[a][b])
+ {
+ while (mLangLookup[a][b][i])
+ {
+ if (mLangLookup[a][b][i]->maLangStr[2] != language[2])
+ {
+ ++i;
+ continue;
+ }
+ if (region && (strncmp(mLangLookup[a][b][i]->maCountry, region, regionLength) == 0))
+ {
+ langId = mLangLookup[a][b][i]->mnLang;
+ break;
+ }
+ else if (langId == 0)
+ {
+ // possible fallback code
+ langId = mLangLookup[a][b][i]->mnLang;
+ }
+ ++i;
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ if (langId == 0) langId = 0x409;
+ return langId;
+ }
+ const IsoLangEntry * findEntryById(unsigned short langId) const
+ {
+ static const int maxIndex = sizeof(LANG_ENTRIES)/sizeof(IsoLangEntry);
+ int window = mSeedPosition;
+ int guess = mSeedPosition - 1;
+ while (LANG_ENTRIES[guess].mnLang != langId)
+ {
+ window /= 2;
+ if (window == 0) return NULL;
+ guess += (LANG_ENTRIES[guess].mnLang > langId)? -window : window;
+ while (guess >= maxIndex)
+ {
+ window /= 2;
+ guess -= window;
+ assert(window);
+ }
+ }
+ return &LANG_ENTRIES[guess];
+ }
+
+ CLASS_NEW_DELETE;
+
+private:
+ const IsoLangEntry ** mLangLookup[26][26];
+ int mSeedPosition;
+};
+
+} // namespace graphite2
diff --git a/thirdparty/graphite/src/inc/opcode_table.h b/thirdparty/graphite/src/inc/opcode_table.h
new file mode 100644
index 0000000000..cb5acde9a4
--- /dev/null
+++ b/thirdparty/graphite/src/inc/opcode_table.h
@@ -0,0 +1,124 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+// This file will be pulled into and integrated into a machine implmentation
+// DO NOT build directly
+#pragma once
+
+#define do2(n) do_(n) ,do_(n)
+#define NILOP 0U
+
+// types or parameters are: (.. is inclusive)
+// number - any byte
+// output_class - 0 .. silf.m_nClass
+// input_class - 0 .. silf.m_nClass
+// sattrnum - 0 .. 29 (gr_slatJWidth) , 55 (gr_slatUserDefn)
+// attrid - 0 .. silf.numUser() where sattrnum == 55; 0..silf.m_iMaxComp where sattrnum == 15 otherwise 0
+// gattrnum - 0 .. face->getGlyphFaceCache->numAttrs()
+// gmetric - 0 .. 11 (kgmetDescent)
+// featidx - 0 .. face.numFeatures()
+// level - any byte
+static const opcode_t opcode_table[] =
+{
+ {{do2(nop)}, 0, "NOP"},
+
+ {{do2(push_byte)}, 1, "PUSH_BYTE"}, // number
+ {{do2(push_byte_u)}, 1, "PUSH_BYTE_U"}, // number
+ {{do2(push_short)}, 2, "PUSH_SHORT"}, // number number
+ {{do2(push_short_u)}, 2, "PUSH_SHORT_U"}, // number number
+ {{do2(push_long)}, 4, "PUSH_LONG"}, // number number number number
+
+ {{do2(add)}, 0, "ADD"},
+ {{do2(sub)}, 0, "SUB"},
+ {{do2(mul)}, 0, "MUL"},
+ {{do2(div_)}, 0, "DIV"},
+ {{do2(min_)}, 0, "MIN"},
+ {{do2(max_)}, 0, "MAX"},
+ {{do2(neg)}, 0, "NEG"},
+ {{do2(trunc8)}, 0, "TRUNC8"},
+ {{do2(trunc16)}, 0, "TRUNC16"},
+
+ {{do2(cond)}, 0, "COND"},
+ {{do2(and_)}, 0, "AND"}, // 0x10
+ {{do2(or_)}, 0, "OR"},
+ {{do2(not_)}, 0, "NOT"},
+ {{do2(equal)}, 0, "EQUAL"},
+ {{do2(not_eq_)}, 0, "NOT_EQ"},
+ {{do2(less)}, 0, "LESS"},
+ {{do2(gtr)}, 0, "GTR"},
+ {{do2(less_eq)}, 0, "LESS_EQ"},
+ {{do2(gtr_eq)}, 0, "GTR_EQ"}, // 0x18
+
+ {{do_(next), NILOP}, 0, "NEXT"},
+ {{NILOP, NILOP}, 1, "NEXT_N"}, // number <= smap.end - map
+ {{do_(next), NILOP}, 0, "COPY_NEXT"},
+ {{do_(put_glyph_8bit_obs), NILOP}, 1, "PUT_GLYPH_8BIT_OBS"}, // output_class
+ {{do_(put_subs_8bit_obs), NILOP}, 3, "PUT_SUBS_8BIT_OBS"}, // slot input_class output_class
+ {{do_(put_copy), NILOP}, 1, "PUT_COPY"}, // slot
+ {{do_(insert), NILOP}, 0, "INSERT"},
+ {{do_(delete_), NILOP}, 0, "DELETE"}, // 0x20
+ {{do_(assoc), NILOP}, VARARGS, "ASSOC"},
+ {{NILOP ,do_(cntxt_item)}, 2, "CNTXT_ITEM"}, // slot offset
+
+ {{do_(attr_set), NILOP}, 1, "ATTR_SET"}, // sattrnum
+ {{do_(attr_add), NILOP}, 1, "ATTR_ADD"}, // sattrnum
+ {{do_(attr_sub), NILOP}, 1, "ATTR_SUB"}, // sattrnum
+ {{do_(attr_set_slot), NILOP}, 1, "ATTR_SET_SLOT"}, // sattrnum
+ {{do_(iattr_set_slot), NILOP}, 2, "IATTR_SET_SLOT"}, // sattrnum attrid
+ {{do2(push_slot_attr)}, 2, "PUSH_SLOT_ATTR"}, // sattrnum slot
+ {{do2(push_glyph_attr_obs)}, 2, "PUSH_GLYPH_ATTR_OBS"}, // gattrnum slot
+ {{do2(push_glyph_metric)}, 3, "PUSH_GLYPH_METRIC"}, // gmetric slot level
+ {{do2(push_feat)}, 2, "PUSH_FEAT"}, // featidx slot
+
+ {{do2(push_att_to_gattr_obs)}, 2, "PUSH_ATT_TO_GATTR_OBS"}, // gattrnum slot
+ {{do2(push_att_to_glyph_metric)}, 3, "PUSH_ATT_TO_GLYPH_METRIC"}, // gmetric slot level
+ {{do2(push_islot_attr)}, 3, "PUSH_ISLOT_ATTR"}, // sattrnum slot attrid
+
+ {{NILOP,NILOP}, 3, "PUSH_IGLYPH_ATTR"},
+
+ {{do2(pop_ret)}, 0, "POP_RET"}, // 0x30
+ {{do2(ret_zero)}, 0, "RET_ZERO"},
+ {{do2(ret_true)}, 0, "RET_TRUE"},
+
+ {{do_(iattr_set), NILOP}, 2, "IATTR_SET"}, // sattrnum attrid
+ {{do_(iattr_add), NILOP}, 2, "IATTR_ADD"}, // sattrnum attrid
+ {{do_(iattr_sub), NILOP}, 2, "IATTR_SUB"}, // sattrnum attrid
+ {{do2(push_proc_state)}, 1, "PUSH_PROC_STATE"}, // dummy
+ {{do2(push_version)}, 0, "PUSH_VERSION"},
+ {{do_(put_subs), NILOP}, 5, "PUT_SUBS"}, // slot input_class input_class output_class output_class
+ {{NILOP,NILOP}, 0, "PUT_SUBS2"},
+ {{NILOP,NILOP}, 0, "PUT_SUBS3"},
+ {{do_(put_glyph), NILOP}, 2, "PUT_GLYPH"}, // output_class output_class
+ {{do2(push_glyph_attr)}, 3, "PUSH_GLYPH_ATTR"}, // gattrnum gattrnum slot
+ {{do2(push_att_to_glyph_attr)}, 3, "PUSH_ATT_TO_GLYPH_ATTR"}, // gattrnum gattrnum slot
+ {{do2(bor)}, 0, "BITOR"},
+ {{do2(band)}, 0, "BITAND"},
+ {{do2(bnot)}, 0, "BITNOT"}, // 0x40
+ {{do2(setbits)}, 4, "BITSET"},
+ {{do_(set_feat), NILOP}, 2, "SET_FEAT"}, // featidx slot
+ // private opcodes for internal use only, comes after all other on disk opcodes.
+ {{do_(temp_copy), NILOP}, 0, "TEMP_COPY"}
+};
diff --git a/thirdparty/graphite/src/inc/opcodes.h b/thirdparty/graphite/src/inc/opcodes.h
new file mode 100644
index 0000000000..ff2f1741e2
--- /dev/null
+++ b/thirdparty/graphite/src/inc/opcodes.h
@@ -0,0 +1,691 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+// This file will be pulled into and integrated into a machine implmentation
+// DO NOT build directly and under no circumstances ever #include headers in
+// here or you will break the direct_machine.
+//
+// Implementers' notes
+// ==================
+// You have access to a few primitives and the full C++ code:
+// declare_params(n) Tells the interpreter how many bytes of parameter
+// space to claim for this instruction uses and
+// initialises the param pointer. You *must* before the
+// first use of param.
+// use_params(n) Claim n extra bytes of param space beyond what was
+// claimed using delcare_param.
+// param A const byte pointer for the parameter space claimed by
+// this instruction.
+// binop(op) Implement a binary operation on the stack using the
+// specified C++ operator.
+// NOT_IMPLEMENTED Any instruction body containing this will exit the
+// program with an assertion error. Instructions that are
+// not implemented should also be marked NILOP in the
+// opcodes tables this will cause the code class to spot
+// them in a live code stream and throw a runtime_error
+// instead.
+// push(n) Push the value n onto the stack.
+// pop() Pop the top most value and return it.
+//
+// You have access to the following named fast 'registers':
+// sp = The pointer to the current top of stack, the last value
+// pushed.
+// seg = A reference to the Segment this code is running over.
+// is = The current slot index
+// isb = The original base slot index at the start of this rule
+// isf = The first positioned slot
+// isl = The last positioned slot
+// ip = The current instruction pointer
+// endPos = Position of advance of last cluster
+// dir = writing system directionality of the font
+
+
+// #define NOT_IMPLEMENTED assert(false)
+// #define NOT_IMPLEMENTED
+
+#define binop(op) const uint32 a = pop(); *sp = uint32(*sp) op a
+#define sbinop(op) const int32 a = pop(); *sp = int32(*sp) op a
+#define use_params(n) dp += n
+
+#define declare_params(n) const byte * param = dp; \
+ use_params(n);
+
+#define push(n) { *++sp = n; }
+#define pop() (*sp--)
+#define slotat(x) (map[(x)])
+#define DIE { is=seg.last(); status = Machine::died_early; EXIT(1); }
+#define POSITIONED 1
+
+STARTOP(nop)
+ do {} while (0);
+ENDOP
+
+STARTOP(push_byte)
+ declare_params(1);
+ push(int8(*param));
+ENDOP
+
+STARTOP(push_byte_u)
+ declare_params(1);
+ push(uint8(*param));
+ENDOP
+
+STARTOP(push_short)
+ declare_params(2);
+ const int16 r = int16(param[0]) << 8
+ | uint8(param[1]);
+ push(r);
+ENDOP
+
+STARTOP(push_short_u)
+ declare_params(2);
+ const uint16 r = uint16(param[0]) << 8
+ | uint8(param[1]);
+ push(r);
+ENDOP
+
+STARTOP(push_long)
+ declare_params(4);
+ const int32 r = int32(param[0]) << 24
+ | uint32(param[1]) << 16
+ | uint32(param[2]) << 8
+ | uint8(param[3]);
+ push(r);
+ENDOP
+
+STARTOP(add)
+ binop(+);
+ENDOP
+
+STARTOP(sub)
+ binop(-);
+ENDOP
+
+STARTOP(mul)
+ binop(*);
+ENDOP
+
+STARTOP(div_)
+ const int32 b = pop();
+ const int32 a = int32(*sp);
+ if (b == 0 || (a == std::numeric_limits<int32>::min() && b == -1)) DIE;
+ *sp = int32(*sp) / b;
+ENDOP
+
+STARTOP(min_)
+ const int32 a = pop(), b = *sp;
+ if (a < b) *sp = a;
+ENDOP
+
+STARTOP(max_)
+ const int32 a = pop(), b = *sp;
+ if (a > b) *sp = a;
+ENDOP
+
+STARTOP(neg)
+ *sp = uint32(-int32(*sp));
+ENDOP
+
+STARTOP(trunc8)
+ *sp = uint8(*sp);
+ENDOP
+
+STARTOP(trunc16)
+ *sp = uint16(*sp);
+ENDOP
+
+STARTOP(cond)
+ const uint32 f = pop(), t = pop(), c = pop();
+ push(c ? t : f);
+ENDOP
+
+STARTOP(and_)
+ binop(&&);
+ENDOP
+
+STARTOP(or_)
+ binop(||);
+ENDOP
+
+STARTOP(not_)
+ *sp = !*sp;
+ENDOP
+
+STARTOP(equal)
+ binop(==);
+ENDOP
+
+STARTOP(not_eq_)
+ binop(!=);
+ENDOP
+
+STARTOP(less)
+ sbinop(<);
+ENDOP
+
+STARTOP(gtr)
+ sbinop(>);
+ENDOP
+
+STARTOP(less_eq)
+ sbinop(<=);
+ENDOP
+
+STARTOP(gtr_eq)
+ sbinop(>=);
+ENDOP
+
+STARTOP(next)
+ if (map - &smap[0] >= int(smap.size())) DIE
+ if (is)
+ {
+ if (is == smap.highwater())
+ smap.highpassed(true);
+ is = is->next();
+ }
+ ++map;
+ENDOP
+
+//STARTOP(next_n)
+// use_params(1);
+// NOT_IMPLEMENTED;
+ //declare_params(1);
+ //const size_t num = uint8(*param);
+//ENDOP
+
+//STARTOP(copy_next)
+// if (is) is = is->next();
+// ++map;
+// ENDOP
+
+STARTOP(put_glyph_8bit_obs)
+ declare_params(1);
+ const unsigned int output_class = uint8(*param);
+ is->setGlyph(&seg, seg.getClassGlyph(output_class, 0));
+ENDOP
+
+STARTOP(put_subs_8bit_obs)
+ declare_params(3);
+ const int slot_ref = int8(param[0]);
+ const unsigned int input_class = uint8(param[1]),
+ output_class = uint8(param[2]);
+ uint16 index;
+ slotref slot = slotat(slot_ref);
+ if (slot)
+ {
+ index = seg.findClassIndex(input_class, slot->gid());
+ is->setGlyph(&seg, seg.getClassGlyph(output_class, index));
+ }
+ENDOP
+
+STARTOP(put_copy)
+ declare_params(1);
+ const int slot_ref = int8(*param);
+ if (is && !is->isDeleted())
+ {
+ slotref ref = slotat(slot_ref);
+ if (ref && ref != is)
+ {
+ int16 *tempUserAttrs = is->userAttrs();
+ if (is->attachedTo() || is->firstChild()) DIE
+ Slot *prev = is->prev();
+ Slot *next = is->next();
+ memcpy(tempUserAttrs, ref->userAttrs(), seg.numAttrs() * sizeof(uint16));
+ memcpy(is, ref, sizeof(Slot));
+ is->firstChild(NULL);
+ is->nextSibling(NULL);
+ is->userAttrs(tempUserAttrs);
+ is->next(next);
+ is->prev(prev);
+ if (is->attachedTo())
+ is->attachedTo()->child(is);
+ }
+ is->markCopied(false);
+ is->markDeleted(false);
+ }
+ENDOP
+
+STARTOP(insert)
+ if (smap.decMax() <= 0) DIE;
+ Slot *newSlot = seg.newSlot();
+ if (!newSlot) DIE;
+ Slot *iss = is;
+ while (iss && iss->isDeleted()) iss = iss->next();
+ if (!iss)
+ {
+ if (seg.last())
+ {
+ seg.last()->next(newSlot);
+ newSlot->prev(seg.last());
+ newSlot->before(seg.last()->before());
+ seg.last(newSlot);
+ }
+ else
+ {
+ seg.first(newSlot);
+ seg.last(newSlot);
+ }
+ }
+ else if (iss->prev())
+ {
+ iss->prev()->next(newSlot);
+ newSlot->prev(iss->prev());
+ newSlot->before(iss->prev()->after());
+ }
+ else
+ {
+ newSlot->prev(NULL);
+ newSlot->before(iss->before());
+ seg.first(newSlot);
+ }
+ newSlot->next(iss);
+ if (iss)
+ {
+ iss->prev(newSlot);
+ newSlot->originate(iss->original());
+ newSlot->after(iss->before());
+ }
+ else if (newSlot->prev())
+ {
+ newSlot->originate(newSlot->prev()->original());
+ newSlot->after(newSlot->prev()->after());
+ }
+ else
+ {
+ newSlot->originate(seg.defaultOriginal());
+ }
+ if (is == smap.highwater())
+ smap.highpassed(false);
+ is = newSlot;
+ seg.extendLength(1);
+ if (map != &smap[-1])
+ --map;
+ENDOP
+
+STARTOP(delete_)
+ if (!is || is->isDeleted()) DIE
+ is->markDeleted(true);
+ if (is->prev())
+ is->prev()->next(is->next());
+ else
+ seg.first(is->next());
+
+ if (is->next())
+ is->next()->prev(is->prev());
+ else
+ seg.last(is->prev());
+
+
+ if (is == smap.highwater())
+ smap.highwater(is->next());
+ if (is->prev())
+ is = is->prev();
+ seg.extendLength(-1);
+ENDOP
+
+STARTOP(assoc)
+ declare_params(1);
+ unsigned int num = uint8(*param);
+ const int8 * assocs = reinterpret_cast<const int8 *>(param+1);
+ use_params(num);
+ int max = -1;
+ int min = -1;
+
+ while (num-- > 0)
+ {
+ int sr = *assocs++;
+ slotref ts = slotat(sr);
+ if (ts && (min == -1 || ts->before() < min)) min = ts->before();
+ if (ts && ts->after() > max) max = ts->after();
+ }
+ if (min > -1) // implies max > -1
+ {
+ is->before(min);
+ is->after(max);
+ }
+ENDOP
+
+STARTOP(cntxt_item)
+ // It turns out this is a cunningly disguised condition forward jump.
+ declare_params(3);
+ const int is_arg = int8(param[0]);
+ const size_t iskip = uint8(param[1]),
+ dskip = uint8(param[2]);
+
+ if (mapb + is_arg != map)
+ {
+ ip += iskip;
+ dp += dskip;
+ push(true);
+ }
+ENDOP
+
+STARTOP(attr_set)
+ declare_params(1);
+ const attrCode slat = attrCode(uint8(*param));
+ const int val = pop();
+ is->setAttr(&seg, slat, 0, val, smap);
+ENDOP
+
+STARTOP(attr_add)
+ declare_params(1);
+ const attrCode slat = attrCode(uint8(*param));
+ const uint32_t val = pop();
+ if ((slat == gr_slatPosX || slat == gr_slatPosY) && (flags & POSITIONED) == 0)
+ {
+ seg.positionSlots(0, *smap.begin(), *(smap.end()-1), seg.currdir());
+ flags |= POSITIONED;
+ }
+ uint32_t res = uint32_t(is->getAttr(&seg, slat, 0));
+ is->setAttr(&seg, slat, 0, int32_t(val + res), smap);
+ENDOP
+
+STARTOP(attr_sub)
+ declare_params(1);
+ const attrCode slat = attrCode(uint8(*param));
+ const uint32_t val = pop();
+ if ((slat == gr_slatPosX || slat == gr_slatPosY) && (flags & POSITIONED) == 0)
+ {
+ seg.positionSlots(0, *smap.begin(), *(smap.end()-1), seg.currdir());
+ flags |= POSITIONED;
+ }
+ uint32_t res = uint32_t(is->getAttr(&seg, slat, 0));
+ is->setAttr(&seg, slat, 0, int32_t(res - val), smap);
+ENDOP
+
+STARTOP(attr_set_slot)
+ declare_params(1);
+ const attrCode slat = attrCode(uint8(*param));
+ const int offset = int(map - smap.begin())*int(slat == gr_slatAttTo);
+ const int val = pop() + offset;
+ is->setAttr(&seg, slat, offset, val, smap);
+ENDOP
+
+STARTOP(iattr_set_slot)
+ declare_params(2);
+ const attrCode slat = attrCode(uint8(param[0]));
+ const uint8 idx = uint8(param[1]);
+ const int val = int(pop() + (map - smap.begin())*int(slat == gr_slatAttTo));
+ is->setAttr(&seg, slat, idx, val, smap);
+ENDOP
+
+STARTOP(push_slot_attr)
+ declare_params(2);
+ const attrCode slat = attrCode(uint8(param[0]));
+ const int slot_ref = int8(param[1]);
+ if ((slat == gr_slatPosX || slat == gr_slatPosY) && (flags & POSITIONED) == 0)
+ {
+ seg.positionSlots(0, *smap.begin(), *(smap.end()-1), seg.currdir());
+ flags |= POSITIONED;
+ }
+ slotref slot = slotat(slot_ref);
+ if (slot)
+ {
+ int res = slot->getAttr(&seg, slat, 0);
+ push(res);
+ }
+ENDOP
+
+STARTOP(push_glyph_attr_obs)
+ declare_params(2);
+ const unsigned int glyph_attr = uint8(param[0]);
+ const int slot_ref = int8(param[1]);
+ slotref slot = slotat(slot_ref);
+ if (slot)
+ push(int32(seg.glyphAttr(slot->gid(), glyph_attr)));
+ENDOP
+
+STARTOP(push_glyph_metric)
+ declare_params(3);
+ const unsigned int glyph_attr = uint8(param[0]);
+ const int slot_ref = int8(param[1]);
+ const signed int attr_level = uint8(param[2]);
+ slotref slot = slotat(slot_ref);
+ if (slot)
+ push(seg.getGlyphMetric(slot, glyph_attr, attr_level, dir));
+ENDOP
+
+STARTOP(push_feat)
+ declare_params(2);
+ const unsigned int feat = uint8(param[0]);
+ const int slot_ref = int8(param[1]);
+ slotref slot = slotat(slot_ref);
+ if (slot)
+ {
+ uint8 fid = seg.charinfo(slot->original())->fid();
+ push(seg.getFeature(fid, feat));
+ }
+ENDOP
+
+STARTOP(push_att_to_gattr_obs)
+ declare_params(2);
+ const unsigned int glyph_attr = uint8(param[0]);
+ const int slot_ref = int8(param[1]);
+ slotref slot = slotat(slot_ref);
+ if (slot)
+ {
+ slotref att = slot->attachedTo();
+ if (att) slot = att;
+ push(int32(seg.glyphAttr(slot->gid(), glyph_attr)));
+ }
+ENDOP
+
+STARTOP(push_att_to_glyph_metric)
+ declare_params(3);
+ const unsigned int glyph_attr = uint8(param[0]);
+ const int slot_ref = int8(param[1]);
+ const signed int attr_level = uint8(param[2]);
+ slotref slot = slotat(slot_ref);
+ if (slot)
+ {
+ slotref att = slot->attachedTo();
+ if (att) slot = att;
+ push(int32(seg.getGlyphMetric(slot, glyph_attr, attr_level, dir)));
+ }
+ENDOP
+
+STARTOP(push_islot_attr)
+ declare_params(3);
+ const attrCode slat = attrCode(uint8(param[0]));
+ const int slot_ref = int8(param[1]),
+ idx = uint8(param[2]);
+ if ((slat == gr_slatPosX || slat == gr_slatPosY) && (flags & POSITIONED) == 0)
+ {
+ seg.positionSlots(0, *smap.begin(), *(smap.end()-1), seg.currdir());
+ flags |= POSITIONED;
+ }
+ slotref slot = slotat(slot_ref);
+ if (slot)
+ {
+ int res = slot->getAttr(&seg, slat, idx);
+ push(res);
+ }
+ENDOP
+
+#if 0
+STARTOP(push_iglyph_attr) // not implemented
+ NOT_IMPLEMENTED;
+ENDOP
+#endif
+
+STARTOP(pop_ret)
+ const uint32 ret = pop();
+ EXIT(ret);
+ENDOP
+
+STARTOP(ret_zero)
+ EXIT(0);
+ENDOP
+
+STARTOP(ret_true)
+ EXIT(1);
+ENDOP
+
+STARTOP(iattr_set)
+ declare_params(2);
+ const attrCode slat = attrCode(uint8(param[0]));
+ const uint8 idx = uint8(param[1]);
+ const int val = pop();
+ is->setAttr(&seg, slat, idx, val, smap);
+ENDOP
+
+STARTOP(iattr_add)
+ declare_params(2);
+ const attrCode slat = attrCode(uint8(param[0]));
+ const uint8 idx = uint8(param[1]);
+ const uint32_t val = pop();
+ if ((slat == gr_slatPosX || slat == gr_slatPosY) && (flags & POSITIONED) == 0)
+ {
+ seg.positionSlots(0, *smap.begin(), *(smap.end()-1), seg.currdir());
+ flags |= POSITIONED;
+ }
+ uint32_t res = uint32_t(is->getAttr(&seg, slat, idx));
+ is->setAttr(&seg, slat, idx, int32_t(val + res), smap);
+ENDOP
+
+STARTOP(iattr_sub)
+ declare_params(2);
+ const attrCode slat = attrCode(uint8(param[0]));
+ const uint8 idx = uint8(param[1]);
+ const uint32_t val = pop();
+ if ((slat == gr_slatPosX || slat == gr_slatPosY) && (flags & POSITIONED) == 0)
+ {
+ seg.positionSlots(0, *smap.begin(), *(smap.end()-1), seg.currdir());
+ flags |= POSITIONED;
+ }
+ uint32_t res = uint32_t(is->getAttr(&seg, slat, idx));
+ is->setAttr(&seg, slat, idx, int32_t(res - val), smap);
+ENDOP
+
+STARTOP(push_proc_state)
+ use_params(1);
+ push(1);
+ENDOP
+
+STARTOP(push_version)
+ push(0x00030000);
+ENDOP
+
+STARTOP(put_subs)
+ declare_params(5);
+ const int slot_ref = int8(param[0]);
+ const unsigned int input_class = uint8(param[1]) << 8
+ | uint8(param[2]);
+ const unsigned int output_class = uint8(param[3]) << 8
+ | uint8(param[4]);
+ slotref slot = slotat(slot_ref);
+ if (slot)
+ {
+ int index = seg.findClassIndex(input_class, slot->gid());
+ is->setGlyph(&seg, seg.getClassGlyph(output_class, index));
+ }
+ENDOP
+
+#if 0
+STARTOP(put_subs2) // not implemented
+ NOT_IMPLEMENTED;
+ENDOP
+
+STARTOP(put_subs3) // not implemented
+ NOT_IMPLEMENTED;
+ENDOP
+#endif
+
+STARTOP(put_glyph)
+ declare_params(2);
+ const unsigned int output_class = uint8(param[0]) << 8
+ | uint8(param[1]);
+ is->setGlyph(&seg, seg.getClassGlyph(output_class, 0));
+ENDOP
+
+STARTOP(push_glyph_attr)
+ declare_params(3);
+ const unsigned int glyph_attr = uint8(param[0]) << 8
+ | uint8(param[1]);
+ const int slot_ref = int8(param[2]);
+ slotref slot = slotat(slot_ref);
+ if (slot)
+ push(int32(seg.glyphAttr(slot->gid(), glyph_attr)));
+ENDOP
+
+STARTOP(push_att_to_glyph_attr)
+ declare_params(3);
+ const unsigned int glyph_attr = uint8(param[0]) << 8
+ | uint8(param[1]);
+ const int slot_ref = int8(param[2]);
+ slotref slot = slotat(slot_ref);
+ if (slot)
+ {
+ slotref att = slot->attachedTo();
+ if (att) slot = att;
+ push(int32(seg.glyphAttr(slot->gid(), glyph_attr)));
+ }
+ENDOP
+
+STARTOP(temp_copy)
+ slotref newSlot = seg.newSlot();
+ if (!newSlot || !is) DIE;
+ int16 *tempUserAttrs = newSlot->userAttrs();
+ memcpy(newSlot, is, sizeof(Slot));
+ memcpy(tempUserAttrs, is->userAttrs(), seg.numAttrs() * sizeof(uint16));
+ newSlot->userAttrs(tempUserAttrs);
+ newSlot->markCopied(true);
+ *map = newSlot;
+ENDOP
+
+STARTOP(band)
+ binop(&);
+ENDOP
+
+STARTOP(bor)
+ binop(|);
+ENDOP
+
+STARTOP(bnot)
+ *sp = ~*sp;
+ENDOP
+
+STARTOP(setbits)
+ declare_params(4);
+ const uint16 m = uint16(param[0]) << 8
+ | uint8(param[1]);
+ const uint16 v = uint16(param[2]) << 8
+ | uint8(param[3]);
+ *sp = ((*sp) & ~m) | v;
+ENDOP
+
+STARTOP(set_feat)
+ declare_params(2);
+ const unsigned int feat = uint8(param[0]);
+ const int slot_ref = int8(param[1]);
+ slotref slot = slotat(slot_ref);
+ if (slot)
+ {
+ uint8 fid = seg.charinfo(slot->original())->fid();
+ seg.setFeature(fid, feat, pop());
+ }
+ENDOP
diff --git a/thirdparty/graphite/src/json.cpp b/thirdparty/graphite/src/json.cpp
new file mode 100644
index 0000000000..25f2190f71
--- /dev/null
+++ b/thirdparty/graphite/src/json.cpp
@@ -0,0 +1,147 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2011, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+// JSON debug logging
+// Author: Tim Eves
+
+#if !defined GRAPHITE2_NTRACING
+
+#include <cstdio>
+#include <limits>
+#include "inc/json.h"
+
+#if defined(_MSC_VER)
+#define FORMAT_INTMAX "%lli"
+#define FORMAT_UINTMAX "%llu"
+#else
+#define FORMAT_INTMAX "%ji"
+#define FORMAT_UINTMAX "%ju"
+#endif
+
+using namespace graphite2;
+
+namespace
+{
+ enum
+ {
+ seq = ',',
+ obj='}', member=':', empty_obj='{',
+ arr=']', empty_arr='['
+ };
+}
+
+const std::nullptr_t json::null = nullptr;
+
+inline
+void json::context(const char current) throw()
+{
+ fprintf(_stream, "%c", *_context);
+ indent();
+ *_context = current;
+}
+
+
+void json::indent(const int d) throw()
+{
+ if (*_context == member || (_flatten && _flatten < _context))
+ fputc(' ', _stream);
+ else
+ fprintf(_stream, "\n%*s", 4*int(_context - _contexts + d), "");
+}
+
+
+inline
+void json::push_context(const char prefix, const char suffix) throw()
+{
+ assert(_context - _contexts < ptrdiff_t(sizeof _contexts));
+
+ if (_context == _contexts)
+ *_context = suffix;
+ else
+ context(suffix);
+ *++_context = prefix;
+}
+
+
+void json::pop_context() throw()
+{
+ assert(_context > _contexts);
+
+ if (*_context == seq) indent(-1);
+ else fputc(*_context, _stream);
+
+ fputc(*--_context, _stream);
+ if (_context == _contexts) fputc('\n', _stream);
+ fflush(_stream);
+
+ if (_flatten >= _context) _flatten = 0;
+ *_context = seq;
+}
+
+
+// These four functions cannot be inlined as pointers to these
+// functions are needed for operator << (_context_t) to work.
+void json::flat(json & j) throw() { if (!j._flatten) j._flatten = j._context; }
+void json::close(json & j) throw() { j.pop_context(); }
+void json::object(json & j) throw() { j.push_context('{', '}'); }
+void json::array(json & j) throw() { j.push_context('[', ']'); }
+void json::item(json & j) throw()
+{
+ while (j._context > j._contexts+1 && j._context[-1] != arr)
+ j.pop_context();
+}
+
+
+json & json::operator << (json::string s) throw()
+{
+ const char ctxt = _context[-1] == obj ? *_context == member ? seq : member : seq;
+ context(ctxt);
+ fprintf(_stream, "\"%s\"", s);
+ if (ctxt == member) fputc(' ', _stream);
+
+ return *this;
+}
+
+json & json::operator << (json::number f) throw()
+{
+ context(seq);
+ if (std::numeric_limits<json::number>::infinity() == f)
+ fputs("Infinity", _stream);
+ else if (-std::numeric_limits<json::number>::infinity() == f)
+ fputs("-Infinity", _stream);
+ else if (std::numeric_limits<json::number>::quiet_NaN() == f ||
+ std::numeric_limits<json::number>::signaling_NaN() == f)
+ fputs("NaN", _stream);
+ else
+ fprintf(_stream, "%g", f);
+ return *this;
+}
+json & json::operator << (json::integer d) throw() { context(seq); fprintf(_stream, FORMAT_INTMAX, intmax_t(d)); return *this; }
+json & json::operator << (json::integer_u d) throw() { context(seq); fprintf(_stream, FORMAT_UINTMAX, uintmax_t(d)); return *this; }
+json & json::operator << (json::boolean b) throw() { context(seq); fputs(b ? "true" : "false", _stream); return *this; }
+json & json::operator << (std::nullptr_t) throw() { context(seq); fputs("null",_stream); return *this; }
+
+#endif
diff --git a/thirdparty/harfbuzz/AUTHORS b/thirdparty/harfbuzz/AUTHORS
new file mode 100644
index 0000000000..83c0c66f99
--- /dev/null
+++ b/thirdparty/harfbuzz/AUTHORS
@@ -0,0 +1,14 @@
+Behdad Esfahbod
+David Corbett
+David Turner
+Ebrahim Byagowi
+Garret Rieger
+Jonathan Kew
+Khaled Hosny
+Lars Knoll
+Martin Hosken
+Owen Taylor
+Roderick Sheeter
+Roozbeh Pournader
+Simon Hausmann
+Werner Lemberg
diff --git a/thirdparty/harfbuzz/COPYING b/thirdparty/harfbuzz/COPYING
new file mode 100644
index 0000000000..57343164f2
--- /dev/null
+++ b/thirdparty/harfbuzz/COPYING
@@ -0,0 +1,38 @@
+HarfBuzz is licensed under the so-called "Old MIT" license. Details follow.
+For parts of HarfBuzz that are licensed under different licenses see individual
+files names COPYING in subdirectories where applicable.
+
+Copyright © 2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020 Google, Inc.
+Copyright © 2018,2019,2020 Ebrahim Byagowi
+Copyright © 2019,2020 Facebook, Inc.
+Copyright © 2012 Mozilla Foundation
+Copyright © 2011 Codethink Limited
+Copyright © 2008,2010 Nokia Corporation and/or its subsidiary(-ies)
+Copyright © 2009 Keith Stribley
+Copyright © 2009 Martin Hosken and SIL International
+Copyright © 2007 Chris Wilson
+Copyright © 2006 Behdad Esfahbod
+Copyright © 2005 David Turner
+Copyright © 2004,2007,2008,2009,2010 Red Hat, Inc.
+Copyright © 1998-2004 David Turner and Werner Lemberg
+
+For full copyright notices consult the individual files in the package.
+
+
+Permission is hereby granted, without written agreement and without
+license or royalty fees, to use, copy, modify, and distribute this
+software and its documentation for any purpose, provided that the
+above copyright notice and the following two paragraphs appear in
+all copies of this software.
+
+IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+
+THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
diff --git a/thirdparty/harfbuzz/NEWS b/thirdparty/harfbuzz/NEWS
new file mode 100644
index 0000000000..f211a3781c
--- /dev/null
+++ b/thirdparty/harfbuzz/NEWS
@@ -0,0 +1,2412 @@
+Overview of changes leading to 2.7.2
+Saturday, August 29, 2020
+====================================
+- Fix a regression in the previous release that caused a crash with Kaithi.
+- More OOM fixes.
+
+
+Overview of changes leading to 2.7.1
+Thursday, August 13, 2020
+====================================
+- ot-funcs now handles variable empty glyphs better when hvar/vvar isn't present.
+- Reverted a GDEF processing regression.
+- A couple of fixes to handle OOM better.
+
+
+Overview of changes leading to 2.7.0
+Saturday, July 25, 2020
+====================================
+- Use an implementation for round that always rounds up, some minor fluctuations
+ are expected on var font specially when hb-ot callback is used.
+- Fix an AAT's `kerx` issue on broken rendering of Devanagari Sangam MN.
+- Remove AAT's `lcar` table support from _get_ligature_carets API, not even much
+ use on macOS installed fonts (only two files). GDEF support is the recommended
+ one and expected to work properly after issues fixed two releases ago.
+- Minor memory fixes to handle OOM better specially in hb-ft.
+- Minor .so files versioning scheme change and remove stable/unstable scheme
+ differences, was never used in practice (always default to stable scheme).
+- We are now suggesting careful packaging of the library using meson,
+ https://github.com/harfbuzz/harfbuzz/wiki/Notes-on-migration-to-meson
+ for more information.
+- Distribution package URL is changed, either use GitHub generated tarballs,
+ `https://github.com/harfbuzz/harfbuzz/archive/$pkgver.tar.gz`
+ or, even more preferably use commit hash of the release and git checkouts like,
+ `git+https://github.com/harfbuzz/harfbuzz#commit=$commit`
+
+
+Overview of changes leading to 2.6.8
+Monday, June 22, 2020
+====================================
+- New API to fetch glyph alternates from GSUB table.
+- hb-coretext build fix for macOS < 10.10.
+- Meson build fixes, cmake port removal is postponed but please prepare for
+ it and give us feedback.
+ Autotools is still our main build system however please consider
+ experimenting with meson also for packaging the library.
+- New API:
++hb_ot_layout_lookup_get_glyph_alternates()
+
+
+Overview of changes leading to 2.6.7
+Wednesday, June 3, 2020
+====================================
+- Update to Unicode 13.0.0.
+- Fix hb_ot_layout_get_ligature_carets for fonts without lcar table, it was
+ completely broken for all the other fonts since 2.1.2.
+- As a part of our migration to meson, this release will be the last one
+ to provide cmake port files but autotools still is our main build system.
+ There is a possibility that the next version or the after be released
+ using meson.
+
+
+Overview of changes leading to 2.6.6
+Tuesday, May 12, 2020
+====================================
+- A fix in AAT kerning for Geeza Pro.
+- Better support for resource fork fonts on macOS.
+
+
+Overview of changes leading to 2.6.5
+Friday, April 17, 2020
+====================================
+- Add experimental meson build system. Autotools is still the primary
+ and supported build system.
+- AAT is now always preferred for horizontal scripts when both AAT and OT
+ layout tables exist at the same time.
+- Subsetter improvements.
+- New API:
++hb_ft_font_lock_face()
++hb_ft_font_unlock_face()
+
+
+Overview of changes leading to 2.6.4
+Monday, October 29, 2019
+====================================
+- Small bug fix.
+- Build fixes.
+
+
+Overview of changes leading to 2.6.3
+Monday, October 28, 2019
+====================================
+- Misc small fixes, mostly to build-related issues.
+- New API:
++hb_font_get_nominal_glyphs()
+
+
+Overview of changes leading to 2.6.2
+Monday, September 30, 2019
+====================================
+- Misc small fixes, mostly to build-related issues.
+
+
+Overview of changes leading to 2.6.1
+Thursday, August 22, 2019
+====================================
+- Fix regression with hb_font_create_sub_font scaling introduced in 2.6.0.
+- Change interpretation of font PTEM size / CoreText font size handling.
+ See https://github.com/harfbuzz/harfbuzz/pull/1484
+- hb-ot-font: Prefer symbol cmap subtable if present.
+- Apply 'dist'/'abvm'/'blwm' features to all scripts.
+- Drop experimental DirectWrite API.
+
+
+Overview of changes leading to 2.6.0
+Tuesday, August 13, 2019
+====================================
+- New OpenType metrics, baseline, and metadata table access APIs.
+- New API to set font variations to a named-instance.
+- New hb-gdi.h header and API for creating hb_face_t from HFONT.
+- Amalgam: Provide a single-file harfbuzz.cc file for easier alternate building.
+- More size-reduction configurable options, enabled by HB_TINY.
+- New API:
++hb_font_set_var_named_instance()
++hb_gdi_face_create()
++hb_ot_layout_baseline_tag_t
++hb_ot_layout_get_baseline()
++hb_ot_meta_tag_t
++hb_ot_meta_get_entry_tags()
++hb_ot_meta_reference_entry()
++hb_ot_metrics_tag_t
++hb_ot_metrics_get_position()
++hb_ot_metrics_get_variation()
++hb_ot_metrics_get_x_variation()
++hb_ot_metrics_get_y_variation()
+
+
+Overview of changes leading to 2.5.3
+Wednesday, June 26, 2019
+====================================
+- Fix UCD script data for Unicode 10+ scripts. This was broken since 2.5.0.
+- More optimizations for HB_TINY.
+
+
+Overview of changes leading to 2.5.2
+Thursday, June 20, 2019
+====================================
+- More hb-config.hh facilities to shrink library size, namely when built as
+ HB_TINY.
+- New documentation of custom configurations in CONFIG.md.
+- Fix build on gcc 4.8. That's supported again.
+- Universal Shaping Engine improvements thanks to David Corbett.
+- API Changes: Undeprecate some horizontal-kerning API and re-enable in hb-ft,
+ such that Type1 fonts will continue kerning.
+
+
+Overview of changes leading to 2.5.1
+Friday, May 31, 2019
+====================================
+- Fix build with various versions of Visual Studio.
+- Improved documentation, thanks to Nathan Willis.
+- Bugfix in subsetting glyf table.
+- Improved scripts for cross-compiling for Windows using mingw.
+- Rename HB_MATH_GLYPH_PART_FLAG_EXTENDER to HB_OT_MATH_GLYPH_PART_FLAG_EXTENDER.
+ A deprecated macro is added for backwards-compatibility.
+
+
+Overview of changes leading to 2.5.0
+Friday, May 24, 2019
+====================================
+- This release does not include much functional changes, but includes major internal
+ code-base changes. We now require C++11. Support for gcc 4.8 and earlier has been
+ dropped.
+- New hb-config.hh facility for compiling smaller library for embedded and web usecases.
+- New Unicode Character Databse implementation that is half the size of previously-used
+ UCDN.
+- Subsetter improvements.
+- Improved documentation, thanks to Nathan Willis.
+- Misc shaping fixes.
+
+
+Overview of changes leading to 2.4.0
+Monday, March 25, 2019
+====================================
+- Unicode 12.
+- Misc fixes.
+- Subsetter improvements.
+- New API:
+HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE
+hb_directwrite_face_create()
+
+
+Overview of changes leading to 2.3.1
+Wednesday, January 30, 2019
+====================================
+- AAT bug fixes.
+- Misc internal housekeeping cleanup.
+
+
+Overview of changes leading to 2.3.0
+Thursday, December 20, 2018
+====================================
+- Fix regression on big-endian architectures. Ouch!
+- Misc bug and build fixes.
+- Fix subsetting of simple GSUB/GDEF.
+- Merge CFF / CFF2 support contributed by Adobe. This mostly involves
+ the subsetter, but also get_glyph_extents on CFF fonts.
+
+New API in hb-aat.h:
++hb_aat_layout_has_substitution()
++hb_aat_layout_has_positioning()
++hb_aat_layout_has_tracking()
+
+
+Overview of changes leading to 2.2.0
+Thursday, November 29, 2018
+====================================
+- Misc shaping bug fixes.
+- Add font variations named-instance API.
+- Deprecate font variations axis enumeration API and add replacement.
+- AAT shaping improvements:
+ o Fixed 'kern' table Format 2 implementation.
+ o Implement 'feat' table API for feature detection.
+ o Blacklist 'GSUB' table of fonts from 'MUTF' foundry that also have 'morx'.
+
+New API:
++hb_aat_layout_feature_type_t
++hb_aat_layout_feature_selector_t
++hb_aat_layout_get_feature_types()
++hb_aat_layout_feature_type_get_name_id
++hb_aat_layout_feature_selector_info_t
++HB_AAT_LAYOUT_NO_SELECTOR_INDEX
++hb_aat_layout_feature_type_get_selector_infos()
++hb_ot_var_axis_flags_t
++hb_ot_var_axis_info_t
++hb_ot_var_get_axis_infos()
++hb_ot_var_find_axis_info()
++hb_ot_var_get_named_instance_count()
++hb_ot_var_named_instance_get_subfamily_name_id()
++hb_ot_var_named_instance_get_postscript_name_id()
++hb_ot_var_named_instance_get_design_coords()
+
+Deprecated API:
++HB_OT_VAR_NO_AXIS_INDEX
++hb_ot_var_axis_t
++hb_ot_var_get_axes()
++hb_ot_var_find_axis()
+
+
+Overview of changes leading to 2.1.3
+Friday, November 16, 2018
+====================================
+- Fix AAT 'mort' shaping, which was broken in 2.1.2
+
+
+Overview of changes leading to 2.1.2
+Friday, November 16, 2018
+====================================
+- Various internal changes.
+- AAT shaping improvements:
+ o Implement kern table Format 1 state-machine-based kerning.
+ o Implement cross-stream kerning (cursive positioning, etc).
+ o Ignore emptyish GSUB tables (zero scripts) if morx present.
+ o Don't apply GPOS if morx is being applied. Matches Apple.
+
+
+-Overview of changes leading to 2.1.1
+Monday, November 5, 2018
+====================================
+- AAT improvements:
+ o Implement 'mort' table.
+ o Implement 'kern' subtables Format 1 and Format 3.
+
+
+Overview of changes leading to 2.1.0
+Tuesday, October 30, 2018
+====================================
+- AAT shaping improvements:
+ o Allow user controlling AAT features, for whole buffer only currently.
+ o Several 'morx' fixes.
+ o Implement tuple-kerns in 'kerx'; Fixes kerning with Apple default
+ San Francisco fonts.
+- Support for color fonts:
+ o COLR/CPAL API to fetch color layers.
+ o SVG table to fetch SVG documents.
+ o CBDT/sbix API to fetch PNG images.
+- New 'name' table API.
+- hb-ot-font now uses 'VORG' table to correctly position CFF glyphs
+ in vertical layout.
+- Various fuzzer-found bug fixes.
+
+Changed API:
+
+A type and a macro added in 2.0.0 were renamed:
+
+hb_name_id_t -> hb_ot_name_id_t
+HB_NAME_ID_INVALID -> HB_OT_NAME_ID_INVALID
+
+New API:
+
++hb_color_t
++HB_COLOR
++hb_color_get_alpha()
++hb_color_get_red()
++hb_color_get_green()
++hb_color_get_blue()
++hb_ot_color_has_palettes()
++hb_ot_color_palette_get_count()
++hb_ot_color_palette_get_name_id()
++hb_ot_color_palette_color_get_name_id()
++hb_ot_color_palette_flags_t
++hb_ot_color_palette_get_flags()
++hb_ot_color_palette_get_colors()
++hb_ot_color_has_layers()
++hb_ot_color_layer_t
++hb_ot_color_glyph_get_layers()
++hb_ot_color_has_svg()
++hb_ot_color_glyph_reference_svg()
++hb_ot_color_has_png()
++hb_ot_color_glyph_reference_png()
+
++hb_ot_name_id_t
++HB_OT_NAME_ID_INVALID
++HB_OT_NAME_ID_COPYRIGHT
++HB_OT_NAME_ID_FONT_FAMILY
++HB_OT_NAME_ID_FONT_SUBFAMILY
++HB_OT_NAME_ID_UNIQUE_ID
++HB_OT_NAME_ID_FULL_NAME
++HB_OT_NAME_ID_VERSION_STRING
++HB_OT_NAME_ID_POSTSCRIPT_NAME
++HB_OT_NAME_ID_TRADEMARK
++HB_OT_NAME_ID_MANUFACTURER
++HB_OT_NAME_ID_DESIGNER
++HB_OT_NAME_ID_DESCRIPTION
++HB_OT_NAME_ID_VENDOR_URL
++HB_OT_NAME_ID_DESIGNER_URL
++HB_OT_NAME_ID_LICENSE
++HB_OT_NAME_ID_LICENSE_URL
++HB_OT_NAME_ID_TYPOGRAPHIC_FAMILY
++HB_OT_NAME_ID_TYPOGRAPHIC_SUBFAMILY
++HB_OT_NAME_ID_MAC_FULL_NAME
++HB_OT_NAME_ID_SAMPLE_TEXT
++HB_OT_NAME_ID_CID_FINDFONT_NAME
++HB_OT_NAME_ID_WWS_FAMILY
++HB_OT_NAME_ID_WWS_SUBFAMILY
++HB_OT_NAME_ID_LIGHT_BACKGROUND
++HB_OT_NAME_ID_DARK_BACKGROUND
++HB_OT_NAME_ID_VARIATIONS_PS_PREFIX
++hb_ot_name_entry_t
++hb_ot_name_list_names()
++hb_ot_name_get_utf8()
++hb_ot_name_get_utf16()
++hb_ot_name_get_utf32()
+
+
+Overview of changes leading to 2.0.2
+Saturday, October 20, 2018
+====================================
+- Fix two minor memory access issues in AAT tables.
+
+
+Overview of changes leading to 2.0.1
+Friday, October 19, 2018
+====================================
+- Fix hb-version.h reported release version that went wrong (1.8.0)
+ with previous release.
+- Fix extrapolation in 'trak' table.
+- Fix hb-font infinite-recursion issue with some font funcs and
+ subclassed fonts.
+- Implement variation-kerning format in kerx table, although without
+ variation.
+- Fix return value of hb_map_is_empty().
+
+
+Overview of changes leading to 2.0.0
+Thursday, October 18, 2018
+====================================
+- Added AAT shaping support (morx/kerx/trak).
+ Automatically used if GSUB/GPOS are not available respectively.
+ Set HB_OPTIONS=aat env var to have morx/kerx preferred over
+ GSUB/GPOS.
+- Apply TrueType kern table internally, instead of relying on
+ hb_font_t callbacks.
+- Khmer shaper significantly rewritten to better match Uniscribe.
+- Indic3 tags ('dev3', etc) are passed to USE shaper.
+- .dfont Mac font containers implemented.
+- Script- and language-mapping revamped to better use BCP 47.
+- Misc USE and Indic fixes.
+- Misc everything fixes.
+- Too many things to list. Biggest release since 0.9.1, with
+ over 500 commits in just over 5 weeks! Didn't intend it to
+ be a big release. Just happened to become.
+- hb-ft now locks underlying FT_Face during use.
+
+API changes:
+
+- Newly-created hb_font_t's now have our internal "hb-ot-font"
+ callbacks set on them, so they should work out of the box
+ without any callbacks set. If callbacks are set, everything
+ is back to what it was before, the fallback callbacks are
+ null. If you to get the internal implementation modified,
+ sub_font it.
+
+- New hb_font_funcs_set_nominal_glyphs_func() allows speeding
+ up character to glyph mapping.
+
+New API:
++HB_FEATURE_GLOBAL_START
++HB_FEATURE_GLOBAL_END
++hb_buffer_set_invisible_glyph()
++hb_buffer_get_invisible_glyph()
++hb_font_funcs_set_nominal_glyphs_func()
++hb_ot_layout_table_select_script()
++hb_ot_layout_script_select_language()
++hb_ot_layout_feature_get_name_ids()
++hb_ot_layout_feature_get_characters()
++hb_name_id_t
++HB_NAME_ID_INVALID
++HB_OT_MAX_TAGS_PER_SCRIPT
++hb_ot_tags_from_script_and_language()
++hb_ot_tags_to_script_and_language()
+
+Deprecated API:
+-hb_font_funcs_set_glyph_func()
+-hb_unicode_eastasian_width_func_t
+-hb_unicode_funcs_set_eastasian_width_func()
+-hb_unicode_eastasian_width()
+-hb_unicode_decompose_compatibility_func_t
+-HB_UNICODE_MAX_DECOMPOSITION_LEN
+-hb_unicode_funcs_set_decompose_compatibility_func()
+-hb_unicode_decompose_compatibility()
+-hb_font_funcs_set_glyph_h_kerning_func()
+-hb_font_funcs_set_glyph_v_kerning_func()
+-hb_font_get_glyph_h_kerning()
+-hb_font_get_glyph_v_kerning()
+-hb_font_get_glyph_kerning_for_direction()
+-hb_ot_layout_table_choose_script()
+-hb_ot_layout_script_find_language()
+-hb_ot_tags_from_script()
+-hb_ot_tag_from_language()
+
+
+Overview of changes leading to 1.9.0
+Monday, September 10, 2018
+====================================
+- Added 'cmap' API to hb_face_t.
+- Face-builder API.
+- hb-ot-font re-creation should be much leaner now, as the
+ font tables it uses are cached on hb_face_t now.
+- Internal source header file name changes:
+ hb-*-private.hh is renamed to hb-*.hh.
+
+New API:
++HB_UNICODE_MAX
++hb_face_collect_unicodes()
++hb_face_collect_variation_selectors()
++hb_face_collect_variation_unicodes()
++hb_face_builder_create()
++hb_face_builder_add_table()
+
+
+Overview of changes leading to 1.8.8
+Tuesday, August 14, 2018
+====================================
+- Fix hb-icu crash on architectures where compare_exchange_weak() can
+ fail falsely. This bug was introduced in 1.8.4.
+ https://bugs.chromium.org/p/chromium/issues/detail?id=873568
+- More internal refactoring of atomic operations and singletons.
+- API changes:
+ The following functions do NOT reference their return value before
+ returning:
+ * hb_unicode_funcs_get_default()
+ * hb_glib_get_unicode_funcs()
+ * hb_icu_get_unicode_funcs()
+ This is consistent with their naming ("get", instead of "reference")
+ as well as how they are used in the wild (ie. no one calls destroy()
+ on their return value.)
+
+
+Overview of changes leading to 1.8.7
+Wednesday, August 8, 2018
+====================================
+- Fix assertion failure with GDEF-blacklisted fonts.
+
+
+Overview of changes leading to 1.8.6
+Tuesday, August 7, 2018
+====================================
+- Internal code shuffling.
+- New API to speed up getting advance widths for implementations
+ that have heavy overhead in get_h_advance callback:
++hb_font_funcs_set_glyph_h_advances_func
++hb_font_funcs_set_glyph_v_advances_func
++hb_font_get_glyph_advances_for_direction
++hb_font_get_glyph_h_advances
++hb_font_get_glyph_h_advances_func_t
++hb_font_get_glyph_v_advances
++hb_font_get_glyph_v_advances_func_t
+
+
+Overview of changes leading to 1.8.5
+Wednesday, August 1, 2018
+====================================
+- Major Khmer shaper improvements to better match Microsoft.
+- Indic bug fixes.
+- Internal improvements to atomic operations.
+
+
+Overview of changes leading to 1.8.4
+Tuesday, July 17, 2018
+====================================
+- Fix build on non-C++11.
+- Use C++-style GCC atomics and C++11 atomics.
+
+
+Overview of changes leading to 1.8.3
+Wednesday, July 11, 2018
+====================================
+- A couple of Indic / USE bug fixes.
+- Disable vectorization, as it was causing unaligned access bus error on
+ certain 32bit architectures.
+
+
+Overview of changes leading to 1.8.2
+Tuesday, July 3, 2018
+====================================
+- Fix infinite loop in Khmer shaper.
+- Improve hb_blob_create_from_file() for streams.
+
+
+Overview of changes leading to 1.8.1
+Tuesday, June 12, 2018
+====================================
+- Fix hb-version.h file generation; last two releases went out with wrong ones.
+- Add correctness bug in hb_set_t operations, introduced in 1.7.7.
+- Remove HB_SUBSET_BUILTIN build option. Not necessary.
+
+
+Overview of changes leading to 1.8.0
+Tuesday, June 5, 2018
+====================================
+- Update to Unicode 11.0.0.
+
+
+Overview of changes leading to 1.7.7
+Tuesday, June 5, 2018
+====================================
+- Lots of internal changes, but not yet exposed externally.
+- All HarfBuzz objects are significantly smaller in size now.
+- Sinhala: Position repha on top of post-consonant, not base.
+ This better matches Windows 10 behavior, which was changed
+ from previous Windows versions.
+- New build options:
+ o New cpp macro HB_NO_ATEXIT
+ o New cpp macro HB_SUBSET_BUILTIN
+- Significant libharfbuzz-subset changes. API subject to change.
+- New API in libharfbuzz:
+
++hb_blob_create_from_file()
++hb_face_count()
+
+A hashmap implementation:
++hb-map.h
++HB_MAP_VALUE_INVALID
++hb_map_t
++hb_map_create()
++hb_map_get_empty()
++hb_map_reference()
++hb_map_destroy()
++hb_map_set_user_data()
++hb_map_get_user_data()
++hb_map_allocation_successful()
++hb_map_clear()
++hb_map_is_empty()
++hb_map_get_population()
++hb_map_set()
++hb_map_get()
++hb_map_del()
++hb_map_has()
+
+
+Overview of changes leading to 1.7.6
+Wednesday, March 7, 2018
+====================================
+
+- Fix to hb_set_t binary operations. Ouch.
+- New experimental harfbuzz-subset library. All of hb-subset.h
+ is experimental right now and API WILL change.
+
+- New API:
+hb_blob_copy_writable_or_fail()
+HB_OT_TAG_BASE
+hb_set_previous()
+hb_set_previous_range()
+
+
+Overview of changes leading to 1.7.5
+Tuesday, January 30, 2018
+====================================
+
+- Separate Khmer shaper from Indic.
+- First stab at AAT morx. Not hooked up.
+- Misc bug fixes.
+
+
+Overview of changes leading to 1.7.4
+Wednesday, December 20, 2017
+====================================
+
+- Fix collect_glyphs() regression caused by hb_set_t changes.
+
+
+Overview of changes leading to 1.7.3
+Monday, December 18, 2017
+====================================
+
+- hb_set_t performance tuning and optimizations.
+- Speed up collect_glyphs() and reject garbage data.
+- In hb_coretext_font_create() set font point-size (ptem).
+- Misc fixes.
+
+
+Overview of changes leading to 1.7.2
+Monday, December 4, 2017
+====================================
+
+- Optimize hb_set_add_range().
+- Misc fixes.
+- New API:
+hb_coretext_font_create()
+
+
+Overview of changes leading to 1.7.1
+Tuesday, November 14, 2017
+====================================
+
+- Fix atexit object destruction regression.
+- Fix minor integer-overflow.
+
+
+Overview of changes leading to 1.7.0
+Monday, November 13, 2017
+====================================
+
+- Minor Indic fixes.
+- Implement kerning and glyph names in hb-ot-font.
+- Various DSO optimization re .data and .bss sizes.
+- Make C++11 optional; build fixes.
+- Mark all other backends "unsafe-to-break".
+- Graphite fix.
+
+
+Overview of changes leading to 1.6.3
+Thursday, October 26th, 2017
+====================================
+
+- Fix hb_set_t some more. Should be solid now.
+- Implement get_glyph_name() for hb-ot-font.
+- Misc fixes.
+
+
+Overview of changes leading to 1.6.2
+Monday, October 23nd, 2017
+====================================
+
+- Yesterday's release had a bad crasher; don't use it. That's what
+ happens when one works on Sunday...
+ https://github.com/harfbuzz/harfbuzz/issues/578
+- Build fixes for FreeBSD and Chrome Android.
+
+
+Overview of changes leading to 1.6.1
+Sunday, October 22nd, 2017
+====================================
+
+- Don't skip over COMBINING GRAPHEME JOINER when ligating, etc.
+ To be refined: https://github.com/harfbuzz/harfbuzz/issues/554
+- Faster hb_set_t implementation.
+- Don't use deprecated ICU API.
+- Fix undefined-behavior in Myanmar shaper, introduced in 1.6.0
+- Deprecated API:
+ hb_set_invert()
+
+
+Overview of changes leading to 1.6.0
+Friday, October the 13th, 2017
+====================================
+
+- Update to Unicode 10.
+
+- Various Indic and Universal Shaping Engine fixes as a result of
+ HarfBuzz Hackfest with Jonathan Kew at Web Engines Hackfest at
+ the Igalia offices in A Coruña, Spain. Thanks Igalia for having
+ us!
+
+- Implement Unicode Arabic Mark Ordering Algorithm UTR#53.
+
+- Implement optical sizing / tracking in CoreText backend, using
+ new API hb_font_set_ptem().
+
+- Allow notifying hb_font_t that underlying FT_Face changed sizing,
+ using new API hb_ft_font_changed().
+
+- More Graphite backend RTL fixes.
+
+- Fix caching of variable font shaping plans.
+
+- hb-view / hb-shape now accept following new arguments:
+
+ o --unicodes: takes a list of hex numbers that represent Unicode
+ codepoints.
+
+New API:
++hb_face_get_table_tags()
++hb_font_set_ptem()
++hb_font_get_ptem()
++hb_ft_font_changed()
+
+
+Overview of changes leading to 1.5.1
+Tuesday, September 5, 2017
+====================================
+
+- Fix "unsafe-to-break" in fallback shaping and other corner cases.
+ All our tests pass with --verify now, meaning unsafe-to-break API
+ works as expected.
+- Add --unicodes to hb-view / hb-shape.
+- [indic] Treat Consonant_With_Stacker as consonant. This will need
+ further tweaking.
+- hb_buffer_diff() tweaks.
+
+
+Overview of changes leading to 1.5.0
+Wednesday, August 23, 2017
+====================================
+
+- Misc new API, for appending a buffer to another, and for comparing
+ contents of two buffers for types of differences.
+
+- New "unsafe-to-break" API. Can be used to speed up reshaping
+ in line-breaking situations. Essentially, after shaping, it returns
+ positions in the input string (some of the cluster boundaries) that
+ are "safe to break" in that if the text is segmented at that position
+ and two sides reshaped and concatenated, the shaping result is
+ exactly the same as shaping the text in one piece.
+
+ hb-view and hb-shape and hb-shape now take --verify, which verifies
+ the above property.
+
+ Some corner cases of the implementation are still not quite working.
+ Those will be fixed in subsequent releases.
+
+- New API:
+
+hb_buffer_append()
+
+hb_glyph_flags_t
+HB_GLYPH_FLAG_UNSAFE_TO_BREAK
+HB_GLYPH_FLAG_DEFINED
+hb_glyph_info_get_glyph_flags()
+
+HB_BUFFER_SERIALIZE_FLAG_GLYPH_FLAGS
+
+hb_buffer_diff_flags_t
+HB_BUFFER_DIFF_FLAG_EQUAL
+HB_BUFFER_DIFF_FLAG_CONTENT_TYPE_MISMATCH
+HB_BUFFER_DIFF_FLAG_LENGTH_MISMATCH
+HB_BUFFER_DIFF_FLAG_NOTDEF_PRESENT
+HB_BUFFER_DIFF_FLAG_DOTTED_CIRCLE_PRESENT
+HB_BUFFER_DIFF_FLAG_CODEPOINT_MISMATCH
+HB_BUFFER_DIFF_FLAG_CLUSTER_MISMATCH
+HB_BUFFER_DIFF_FLAG_GLYPH_FLAGS_MISMATCH
+HB_BUFFER_DIFF_FLAG_POSITION_MISMATCH
+hb_buffer_diff
+
+
+Overview of changes leading to 1.4.8
+Tuesday, August 8, 2017
+====================================
+
+- Major fix to avar table handling.
+- Rename hb-shape --show-message to --trace.
+- Build fixes.
+
+
+Overview of changes leading to 1.4.7
+Tuesday, July 18, 2017
+====================================
+
+- Multiple Indic, Tibetan, and Cham fixes.
+- CoreText: Allow disabling kerning.
+- Adjust Arabic feature order again.
+- Misc build fixes.
+
+
+Overview of changes leading to 1.4.6
+Sunday, April 23, 2017
+====================================
+
+- Graphite2: Fix RTL positioning issue.
+- Backlist GDEF of more versions of Padauk and Tahoma.
+- New, experimental, cmake alternative build system.
+
+
+Overview of changes leading to 1.4.5
+Friday, March 10, 2017
+====================================
+
+- Revert "Fix Context lookup application when moving back after a glyph..."
+ This introduced memory access problems. To be fixed properly soon.
+
+
+Overview of changes leading to 1.4.4
+Sunday, March 5, 2017
+====================================
+
+- Fix Context lookup application when moving back after a glyph deletion.
+- Fix buffer-overrun in Bengali.
+
+
+Overview of changes leading to 1.4.3
+Saturday, February 25, 2017
+====================================
+
+- Route Adlam script to Arabic shaper.
+- Misc fixes.
+- New API:
+ hb_font_set_face()
+- Deprecate API:
+ hb_graphite2_font_get_gr_font()
+
+
+Overview of changes leading to 1.4.2
+Monday, January 23, 2017
+====================================
+
+- Implement OpenType Font Variation tables avar/fvar/HVAR/VVAR.
+- hb-shape and hb-view now accept --variations.
+- New API:
+
+hb_variation_t
+hb_variation_from_string()
+hb_variation_to_string()
+
+hb_font_set_variations()
+hb_font_set_var_coords_design()
+hb_font_get_var_coords_normalized()
+
+hb-ot-var.h:
+hb_ot_var_axis_t
+hb_ot_var_has_data()
+hb_ot_var_get_axis_count()
+hb_ot_var_get_axes()
+hb_ot_var_find_axis()
+hb_ot_var_normalize_variations()
+hb_ot_var_normalize_coords()
+
+- MVAR to be implemented later. Access to named instances to be
+ implemented later as well.
+
+- Misc fixes.
+
+
+Overview of changes leading to 1.4.1
+Thursday, January 5, 2017
+====================================
+
+- Always build and use UCDN for Unicode data by default.
+ Reduces dependence on version of Unicode data in glib,
+ specially in the Windows bundles we are shipping, which
+ have very old glib.
+
+
+Overview of changes leading to 1.4.0
+Thursday, January 5, 2017
+====================================
+
+- Merged "OpenType GX" branch which adds core of support for
+ OpenType 1.8 Font Variations. To that extent, the relevant
+ new API is:
+
+New API:
+hb_font_set_var_coords_normalized()
+
+ with supporting API:
+
+New API:
+HB_OT_LAYOUT_NO_VARIATIONS_INDEX
+hb_ot_layout_table_find_feature_variations()
+hb_ot_layout_feature_with_variations_get_lookups()
+hb_shape_plan_create2()
+hb_shape_plan_create_cached2()
+
+ Currently variations in GSUB/GPOS/GDEF are fully supported,
+ and no other tables are supported. In particular, fvar/avar
+ are NOT supported, hence the hb_font_set_var_coords_normalized()
+ taking normalized coordinates. API to take design coordinates
+ will be added in the future.
+
+ HVAR/VVAR/MVAR support will also be added to hb-ot-font in the
+ future.
+
+- Fix regression in GDEF glyph class processing.
+- Add decompositions for Chakma, Limbu, and Balinese in USE shaper.
+- Misc fixes.
+
+
+Overview of changes leading to 1.3.4
+Monday, December 5, 2016
+====================================
+
+- Fix vertical glyph origin in hb-ot-font.
+- Implement CBDT/CBLC color font glyph extents in hb-ot-font.
+
+
+Overview of changes leading to 1.3.3
+Wednesday, September 28, 2016
+====================================
+
+- Implement parsing of OpenType MATH table.
+New API:
+HB_OT_TAG_MATH
+HB_OT_MATH_SCRIPT
+hb_ot_math_constant_t
+hb_ot_math_kern_t
+hb_ot_math_glyph_variant_t
+hb_ot_math_glyph_part_flags_t
+hb_ot_math_glyph_part_t
+hb_ot_math_has_data
+hb_ot_math_get_constant
+hb_ot_math_get_glyph_italics_correction
+hb_ot_math_get_glyph_top_accent_attachment
+hb_ot_math_get_glyph_kerning
+hb_ot_math_is_glyph_extended_shape
+hb_ot_math_get_glyph_variants
+hb_ot_math_get_min_connector_overlap
+hb_ot_math_get_glyph_assembly
+
+
+Overview of changes leading to 1.3.2
+Wednesday, September 27, 2016
+====================================
+
+- Fix build of hb-coretext on older OS X versions.
+
+
+Overview of changes leading to 1.3.1
+Wednesday, September 7, 2016
+====================================
+
+- Blacklist bad GDEF of more fonts (Padauk).
+- More CoreText backend crash fixes with OS X 10.9.5.
+- Misc fixes.
+
+
+Overview of changes leading to 1.3.0
+Thursday, July 21, 2016
+====================================
+
+- Update to Unicode 9.0.0
+- Move Javanese from Indic shaper to Universal Shaping Engine.
+- Allow MultipleSubst to delete a glyph (matching Windows engine).
+- Update Universal Shaping Engine to latest draft from Microsoft.
+- DirectWrite backend improvements. Note: this backend is for testing ONLY.
+- CoreText backend improvements with unreachable fonts.
+- Implement symbol fonts (cmap 3.0.0) in hb-ft and hb-ot-font.
+- Blacklist bad GDEF of more fonts (Tahoma & others).
+- Misc fixes.
+
+
+Overview of changes leading to 1.2.7
+Monday, May 2, 2016
+====================================
+
+- Blacklist another version of Times New Roman (Bold) Italic from Windows 7.
+- Fix Mongolian Free Variation Selectors shaping with certain fonts.
+- Fix Tibetan shorthand contractions shaping.
+- Improved list of language tag mappings.
+- Unbreak build on Windows CE.
+- Make 'glyf' table loading lazy in hb-ot-font.
+
+
+Overview of changes leading to 1.2.6
+Friday, April 8, 2016
+====================================
+
+- Blacklist GDEF table of another set of Times New Roman (Bold) Italic.
+- DirectWrite backend improvements. Note: DirectWrite backend is
+ exclusively for our internal testing and should NOT be used in any
+ production system whatsoever.
+
+
+Overview of changes leading to 1.2.5
+Monday, April 4, 2016
+====================================
+
+- Fix GDEF mark-filtering-set, which was broken in 1.2.3.
+
+
+Overview of changes leading to 1.2.4
+Thursday, March 17, 2016
+====================================
+
+- Synthesize GDEF glyph class for any glyph that does not have one in GDEF.
+ I really hope we don't discover broken fonts that shape badly with this
+ change.
+- Misc build and other minor fixes.
+- API changes:
+ - Added HB_NDEBUG. It's fine for production systems to define this to
+ disable high-overhead debugging checks. However, I also reduced the
+ overhead of those checks, so it's a non-issue right now. You can
+ forget it. Just not defining anything at all is fine.
+
+
+Overview of changes leading to 1.2.3
+Thursday, February 25, 2016
+====================================
+
+- Blacklist GDEF table of certain versions of Times New Roman (Bold) Italic,
+ due to bug in glyph class of ASCII double-quote character. This should
+ address "regression" introduced in 1.2.0 when we switched mark zeroing
+ in most shapers from BY_UNICODE_LATE to BY_GDEF_LATE.
+ This fourth release in a week should finally stablize things...
+
+- hb-ot-font's get_glyph() implementation saw some optimizations. Though,
+ might be really hard to measure in real-world situations.
+
+- Also, two rather small API changes:
+
+We now disable some time-consuming internal bookkeeping if built with NDEBUG
+defined. This is a first time that we use NDEBUG to disable debug code. If
+there exist production systems that do NOT want to enable NDEBUG, please let
+me know and I'll add HB_NDEBUG.
+
+Added get_nominal_glyph() and get_variation_glyph() instead of get_glyph()
+
+New API:
+- hb_font_get_nominal_glyph_func_t
+- hb_font_get_variation_glyph_func_t
+- hb_font_funcs_set_nominal_glyph_func()
+- hb_font_funcs_set_variation_glyph_func()
+- hb_font_get_nominal_glyph()
+- hb_font_get_variation_glyph()
+
+Deprecated API:
+- hb_font_get_glyph_func_t
+- hb_font_funcs_set_glyph_func()
+
+Clients that implement their own font-funcs are encouraged to replace
+their get_glyph() implementation with a get_nominal_glyph() and
+get_variation_glyph() pair. The variation version can assume that
+variation_selector argument is not zero. Old (deprecated) functions
+will continue working indefinitely using internal gymnastics; it is
+just more efficient to use the new functions.
+
+
+Overview of changes leading to 1.2.2
+Wednesday, February 24, 2016
+====================================
+
+- Fix regression with mark positioning with fonts that have
+ non-zero mark advances. This was introduced in 1.2.0 while
+ trying to make mark and cursive attachments to work together.
+ I have partially reverted that, so this version is much more
+ like what we had before. All clients who updated to 1.2.0
+ should update to this version.
+
+
+Overview of changes leading to 1.2.1
+Tuesday, February 23, 2016
+====================================
+
+- CoreText: Fix bug with wrong scale if font scale was changed later.
+ https://github.com/libass/libass/issues/212
+- CoreText: Drastically speed up font initialization.
+- CoreText: Fix tiny leak.
+- Group ZWJ/ZWNJ with previous syllable under cluster-level=0.
+ https://github.com/harfbuzz/harfbuzz/issues/217
+- Add test/shaping/README.md about how to add tests to the suite.
+
+
+Overview of changes leading to 1.2.0
+Friday, February 19, 2016
+====================================
+
+- Fix various issues (hangs mostly) in case of memory allocation failure.
+- Change mark zeroing types of most shapers from BY_UNICODE_LATE to
+ BY_GDEF_LATE. This seems to be what Uniscribe does.
+- Change mark zeroing of USE shaper from NONE to BY_GDEF_EARLY. That's
+ what Windows does.
+- Allow GPOS cursive connection on marks, and fix the interaction with
+ mark attachment. This work resulted in some changes to how mark
+ attachments work. See:
+ https://github.com/harfbuzz/harfbuzz/issues/211
+ https://github.com/harfbuzz/harfbuzz/commit/86c68c7a2c971efe8e35b1f1bd99401dc8b688d2
+- Graphite2 shaper: improved negative advance handling (eg. Nastaliq).
+- Add nmake-based build system for Windows.
+- Minor speedup.
+- Misc. improvements.
+
+
+Overview of changes leading to 1.1.3
+Monday, January 11, 2016
+====================================
+
+- Ported Indic shaper to Unicode 8.0 data.
+- Universal Shaping Engine fixes.
+- Speed up CoreText shaper when font fallback happens in CoreText.
+- Documentation improvements, thanks to Khaled Hosny.
+- Very rough directwrite shaper for testing, thanks to Ebrahim Byagowi.
+- Misc bug fixes.
+- New API:
+
+ * Font extents:
+ hb_font_extents_t
+ hb_font_get_font_extents_func_t
+ hb_font_get_font_h_extents_func_t
+ hb_font_get_font_v_extents_func_t
+ hb_font_funcs_set_font_h_extents_func
+ hb_font_funcs_set_font_v_extents_func
+ hb_font_get_h_extents
+ hb_font_get_v_extents
+ hb_font_get_extents_for_direction
+
+ * Buffer message (aka debug):
+ hb_buffer_message_func_t
+ hb_buffer_set_message_func()
+ Actual message protocol to be fleshed out later.
+
+
+Overview of changes leading to 1.1.2
+Wednesday, November 26, 2015
+====================================
+
+- Fix badly-broken fallback shaper that affected terminology.
+ https://github.com/harfbuzz/harfbuzz/issues/187
+- Fix y_scaling in Graphite shaper.
+- API changes:
+ * An unset glyph_h_origin() function in font-funcs now (sensibly)
+ implies horizontal origin at 0,0. Ie, the nil callback returns
+ true instead of false. As such, implementations that have a
+ glyph_h_origin() that simply returns true, can remove that function
+ with HarfBuzz >= 1.1.2. This results in a tiny speedup.
+
+
+Overview of changes leading to 1.1.1
+Wednesday, November 24, 2015
+====================================
+
+- Build fixes, specially for hb-coretext.
+
+
+Overview of changes leading to 1.1.0
+Wednesday, November 18, 2015
+====================================
+
+- Implement 'stch' stretch feature for Syriac Abbreviation Mark.
+ https://github.com/harfbuzz/harfbuzz/issues/141
+- Disable use of decompose_compatibility() callback.
+- Implement "shaping" of various Unicode space characters, even
+ if the font does not support them.
+ https://github.com/harfbuzz/harfbuzz/issues/153
+- If font does not support U+2011 NO-BREAK HYPHEN, fallback to
+ U+2010 HYPHEN.
+- Changes resulting from libFuzzer continuous fuzzing:
+ * Reject font tables that need more than 8 edits,
+ * Bound buffer growth during shaping to 32x,
+ * Fix assertions and other issues at OOM / buffer max-growth.
+- Misc fixes and optimizations.
+- API changes:
+ * All fonts created with hb_font_create() now inherit from
+ (ie. have parent) hb_font_get_empty().
+
+
+Overview of changes leading to 1.0.6
+Thursday, October 15, 2015
+====================================
+
+- Reduce max nesting level in OT lookups from 8 to 6.
+ Should not affect any real font as far as I know.
+- Fix memory access issue in ot-font.
+- Revert default load-flags of fonts created using hb_ft_font_create()
+ back to FT_LOAD_DEFAULT|FT_LOAD_NO_HINTING. This was changed in
+ last release (1.0.5), but caused major issues, so revert.
+ https://github.com/harfbuzz/harfbuzz/issues/143
+
+
+Overview of changes leading to 1.0.5
+Tuesday, October 13, 2015
+====================================
+
+- Fix multiple memory access bugs discovered using libFuzzer.
+ https://github.com/harfbuzz/harfbuzz/issues/139
+ Everyone should upgrade to this version as soon as possible.
+ We now have continuous fuzzing set up, to avoid issues like
+ these creeping in again.
+- Misc fixes.
+
+- New API:
+ * hb_font_set_parent().
+ * hb_ft_font_[sg]et_load_flags()
+ The default flags for fonts created using hb_ft_font_create()
+ has changed to default to FT_LOAD_DEFAULT now. Previously it
+ was defaulting to FT_LOAD_DFEAULT|FT_LOAD_NO_HINTING.
+
+- API changes:
+ * Fonts now default to units-per-EM as their scale, instead of 0.
+ * hb_font_create_sub_font() does NOT make parent font immutable
+ anymore. hb_font_make_immutable() does.
+
+
+Overview of changes leading to 1.0.4
+Wednesday, September 30, 2015
+====================================
+
+- Fix minor out-of-bounds read error.
+
+
+Overview of changes leading to 1.0.3
+Tuesday, September 1, 2015
+====================================
+
+- Start of user documentation, from Simon Cozens!
+- Implement glyph_extents() for TrueType fonts in hb-ot-font.
+- Improve GPOS cursive attachments with conflicting lookups.
+- More fixes for cluster-level = 1.
+- Uniscribe positioning fix.
+
+
+Overview of changes leading to 1.0.2
+Wednesday, August 19, 2015
+====================================
+
+- Fix shaping with cluster-level > 0.
+- Fix Uniscribe backend font-size scaling.
+- Declare dependencies in harfbuzz.pc.
+ FreeType is not declared though, to avoid bugs in pkg-config
+ 0.26 with recursive dependencies.
+- Slightly improved debug infrastructure. More to come later.
+- Misc build fixes.
+
+
+Overview of changes leading to 1.0.1
+Monday, July 27, 2015
+====================================
+
+- Fix out-of-bounds access in USE shaper.
+
+
+Overview of changes leading to 1.0.0
+Sunday, July 26, 2015
+====================================
+
+- Implement Universal Shaping Engine:
+ https://www.microsoft.com/typography/OpenTypeDev/USE/intro.htm
+ http://blogs.windows.com/bloggingwindows/2015/02/23/windows-shapes-the-worlds-languages/
+- Bump version to 1.0.0. The soname was NOT bumped.
+
+
+Overview of changes leading to 0.9.42
+Thursday, July 26, 2015
+=====================================
+
+- New API to allow for retrieving finer-grained cluster
+ mappings if the client desires to handle them. Default
+ behavior is unchanged.
+- Fix cluster merging when removing default-ignorables.
+- Update to Unicode 8.0
+- hb-graphite2 fixes.
+- Misc fixes.
+- Removed HB_NO_MERGE_CLUSTERS hack.
+- New API:
+ hb_buffer_cluster_level_t enum
+ hb_buffer_get_cluster_level()
+ hb_buffer_set_cluster_level()
+ hb-shape / hb-view --cluster-level
+
+
+Overview of changes leading to 0.9.41
+Thursday, June 18, 2015
+=====================================
+
+- Fix hb-coretext with trailing whitespace in right-to-left.
+- New API: hb_buffer_reverse_range().
+- Allow implementing atomic ops in config.h.
+- Fix hb_language_t in language bindings.
+- Misc fixes.
+
+
+Overview of changes leading to 0.9.40
+Friday, March 20, 2015
+=====================================
+
+- Another hb-coretext crasher fix. Ouch!
+- Happy Norouz!
+
+
+Overview of changes leading to 0.9.39
+Wednesday, March 4, 2015
+=====================================
+
+- Critical hb-coretext fixes.
+- Optimizations and refactoring; no functional change
+ expected.
+- Misc build fixes.
+
+
+Overview of changes leading to 0.9.38
+Friday, January 23, 2015
+=====================================
+
+- Fix minor out-of-bounds access in Indic shaper.
+- Change New Tai Lue shaping engine from South-East Asian to default,
+ reflecting change in Unicode encoding model.
+- Add hb-shape --font-size. Can take up to two numbers for separate
+ x / y size.
+- Fix CoreText and FreeType scale issues with negative scales.
+- Reject blobs larger than 2GB. This might break some icu-le-hb clients
+ that need security fixes. See:
+ http://www.icu-project.org/trac/ticket/11450
+- Avoid accessing font tables during face destruction, in casce rogue
+ clients released face data already.
+- Fix up gobject-introspection a bit. Python bindings kinda working.
+ See README.python.
+- Misc fixes.
+- API additions:
+ hb_ft_face_create_referenced()
+ hb_ft_font_create_referenced()
+
+
+Overview of changes leading to 0.9.37
+Wednesday, December 17, 2014
+=====================================
+
+- Fix out-of-bounds access in Context lookup format 3.
+- Indic: Allow ZWJ/ZWNJ before syllable modifiers.
+
+
+Overview of changes leading to 0.9.36
+Thursday, November 20, 2014
+=====================================
+
+- First time that three months went by without a release since
+ 0.9.2 was released on August 10, 2012!
+- Fix performance bug in hb_ot_collect_glyphs():
+ https://bugzilla.mozilla.org/show_bug.cgi?id=1090869
+- Add basic vertical-text support to hb-ot-font.
+- Misc build fixes.
+
+
+Overview of changes leading to 0.9.35
+Saturday, August 13, 2014
+=====================================
+
+- Fix major shape-plan caching bug when more than one shaper were
+ provided to hb_shape_full() (as exercised by XeTeX).
+ http://www.mail-archive.com/debian-bugs-dist@lists.debian.org/msg1246370.html
+- Fix Arabic fallback shaping regression. This was broken in 0.9.32.
+- Major hb-coretext fixes. That backend is complete now, including
+ respecing buffer direction and language, down to vertical writing.
+- Build fixes for Windows CE. Should build fine now.
+- Misc fixes:
+ Use atexit() only if it's safe to call from shared library
+ https://bugs.freedesktop.org/show_bug.cgi?id=82246
+ Mandaic had errors in its Unicode Joining_Type
+ https://bugs.freedesktop.org/show_bug.cgi?id=82306
+- API changes:
+
+ * hb_buffer_clear_contents() does not reset buffer flags now.
+
+ After 763e5466c0a03a7c27020e1e2598e488612529a7, one doesn't
+ need to set flags for different pieces of text. The flags now
+ are something the client sets up once, depending on how it
+ actually uses the buffer. As such, don't clear it in
+ clear_contents().
+
+ I don't expect any changes to be needed to any existing client.
+
+
+Overview of changes leading to 0.9.34
+Saturday, August 2, 2014
+=====================================
+
+- hb_feature_from_string() now accepts CSS font-feature-settings format.
+- As a result, hb-shape / hb-view --features also accept CSS-style strings.
+ Eg, "'liga' off" is accepted now.
+- Add old-spec Myanmar shaper:
+ https://bugs.freedesktop.org/show_bug.cgi?id=81775
+- Don't apply 'calt' in Hangul shaper.
+- Fix mark advance zeroing for Hebrew shaper:
+ https://bugs.freedesktop.org/show_bug.cgi?id=76767
+- Implement Windows-1256 custom Arabic shaping. Only built on Windows,
+ and requires help from get_glyph(). Used by Firefox.
+ https://bugzilla.mozilla.org/show_bug.cgi?id=1045139
+- Disable 'liga' in vertical text.
+- Build fixes.
+- API changes:
+
+ * Make HB_BUFFER_FLAG_BOT/EOT easier to use.
+
+ Previously, we expected users to provide BOT/EOT flags when the
+ text *segment* was at paragraph boundaries. This meant that for
+ clients that provide full paragraph to HarfBuzz (eg. Pango), they
+ had code like this:
+
+ hb_buffer_set_flags (hb_buffer,
+ (item_offset == 0 ? HB_BUFFER_FLAG_BOT : 0) |
+ (item_offset + item_length == paragraph_length ?
+ HB_BUFFER_FLAG_EOT : 0));
+
+ hb_buffer_add_utf8 (hb_buffer,
+ paragraph_text, paragraph_length,
+ item_offset, item_length);
+
+ After this change such clients can simply say:
+
+ hb_buffer_set_flags (hb_buffer,
+ HB_BUFFER_FLAG_BOT | HB_BUFFER_FLAG_EOT);
+
+ hb_buffer_add_utf8 (hb_buffer,
+ paragraph_text, paragraph_length,
+ item_offset, item_length);
+
+ Ie, HarfBuzz itself checks whether the segment is at the beginning/end
+ of the paragraph. Clients that only pass item-at-a-time to HarfBuzz
+ continue not setting any flags whatsoever.
+
+ Another way to put it is: if there's pre-context text in the buffer,
+ HarfBuzz ignores the BOT flag. If there's post-context, it ignores
+ EOT flag.
+
+
+Overview of changes leading to 0.9.33
+Tuesday, July 22, 2014
+=====================================
+
+- Turn off ARabic 'cswh' feature that was accidentally turned on.
+- Add HB_TAG_MAX_SIGNED.
+- Make hb_face_make_immutable() really make face immutable!
+- Windows build fixes.
+
+
+Overview of changes leading to 0.9.32
+Thursday, July 17, 2014
+=====================================
+
+- Apply Arabic shaping features in spec order exactly.
+- Another fix for Mongolian free variation selectors.
+- For non-Arabic scripts in Arabic shaper apply 'rlig' and 'calt'
+ together.
+- Minor adjustment to U+FFFD logic.
+- Fix hb-coretext build.
+
+
+Overview of changes leading to 0.9.31
+Wednesday, July 16, 2014
+=====================================
+
+- Only accept valid UTF-8/16/32; we missed many cases before.
+- Better shaping of invalid UTF-8/16/32. Falls back to
+ U+FFFD REPLACEMENT CHARACTER now.
+- With all changes in this release, the buffer will contain fully
+ valid Unicode after hb_buffer_add_utf8/16/32 no matter how
+ broken the input is. This can be overridden though. See below.
+- Fix Mongolian Variation Selectors for fonts without GDEF.
+- Fix minor invalid buffer access.
+- Accept zh-Hant and zh-Hans language tags. hb_ot_tag_to_language()
+ now uses these instead of private tags.
+- Build fixes.
+- New API:
+ * hb_buffer_add_codepoints(). This does what hb_buffer_add_utf32()
+ used to do, ie. no validity check on the input at all. add_utf32
+ now replaces invalid Unicode codepoints with the replacement
+ character (see below).
+ * hb_buffer_set_replacement_codepoint()
+ * hb_buffer_get_replacement_codepoint()
+ Previously, in hb_buffer_add_utf8 and hb_buffer_add_utf16, when
+ we detected broken input, we replaced that with (hb_codepoint_t)-1.
+ This has changed to use U+FFFD now, but can be changed using these
+ new API.
+
+
+Overview of changes leading to 0.9.30
+Wednesday, July 9, 2014
+=====================================
+
+- Update to Unicode 7.0.0:
+ * New scripts Manichaean and Psalter Pahlavi are shaped using
+ Arabic shaper.
+ * All the other new scripts to through the generic shaper for
+ now.
+- Minor Indic improvements.
+- Fix graphite2 backend cluster mapping [crasher!]
+- API changes:
+ * New HB_SCRIPT_* values for Unicode 7.0 scripts.
+ * New function hb_ot_layout_language_get_required_feature().
+- Build fixes.
+
+
+Overview of changes leading to 0.9.29
+Thursday, May 29, 2014
+=====================================
+
+- Implement cmap in hb-ot-font.h. No variation-selectors yet.
+- Myanmar: Allow MedialYa+Asat.
+- Various Indic fixes:
+ * Support most characters in Extended Devanagary and Vedic
+ Unicode blocks.
+ * Allow digits and a some punctuation as consonant placeholders.
+- Build fixes.
+
+
+Overview of changes leading to 0.9.28
+Monday, April 28, 2014
+=====================================
+
+- Unbreak old-spec Indic shaping. (bug 76705)
+- Fix shaping of U+17DD and U+0FC6.
+- Add HB_NO_MERGE_CLUSTERS build option. NOT to be enabled by default
+ for shipping libraries. It's an option for further experimentation
+ right now. When we are sure how to do it properly, we will add
+ public run-time API for the functionality.
+- Build fixes.
+
+
+Overview of changes leading to 0.9.27
+Tuesday, March 18, 2014
+=====================================
+
+- Don't use "register" storage class specifier
+- Wrap definition of free_langs() with HAVE_ATEXIT
+- Add coretext_aat shaper and hb_coretext_face_create() constructor
+- If HAVE_ICU_BUILTIN is defined, use hb-icu Unicode callbacks
+- Add Myanmar test case from OpenType Myanmar spec
+- Only do fallback Hebrew composition if no GPOS 'mark' available
+- Allow bootstrapping without gtk-doc
+- Use AM_MISSING_PROG for ragel and git
+- Typo in ucdn's Makefile.am
+- Improve MemoryBarrier() implementation
+
+
+Overview of changes leading to 0.9.26
+Thursday, January 30, 2014
+=====================================
+
+- Misc fixes.
+- Fix application of 'rtlm' feature.
+- Automatically apply frac/numr/dnom around U+2044 FRACTION SLASH.
+- New header: hb-ot-shape.h
+- Uniscribe: fix scratch-buffer accounting.
+- Reorder Tai Tham SAKOT to after tone-marks.
+- Add Hangul shaper.
+- New files:
+ hb-ot-shape-complex-hangul.cc
+ hb-ot-shape-complex-hebrew.cc
+ hb-ot-shape-complex-tibetan.cc
+- Disable 'cswh' feature in Arabic shaper.
+- Coretext: better handle surrogate pairs.
+- Add HB_TAG_MAX and _HB_SCRIPT_MAX_VALUE.
+
+
+Overview of changes leading to 0.9.25
+Wednesday, December 4, 2013
+=====================================
+
+- Myanmar shaper improvements.
+- Avoid font fallback in CoreText backend.
+- Additional OpenType language tag mappiongs.
+- More aggressive shape-plan caching.
+- Build with / require automake 1.13.
+- Build with libtool 2.4.2.418 alpha to support ppc64le.
+
+
+Overview of changes leading to 0.9.24
+Tuesday, November 13, 2013
+=====================================
+
+- Misc compiler warning fixes with clang.
+- No functional changes.
+
+
+Overview of changes leading to 0.9.23
+Monday, October 28, 2013
+=====================================
+
+- "Udupi HarfBuzz Hackfest", Paris, October 14..18 2013.
+- Fix (Chain)Context recursion with non-monotone lookup positions.
+- Misc Indic bug fixes.
+- New Javanese / Buginese shaping, similar to Windows 8.1.
+
+
+Overview of changes leading to 0.9.22
+Thursday, October 3, 2013
+=====================================
+
+- Fix use-after-end-of-scope in hb_language_from_string().
+- Fix hiding of default_ignorables if font doesn't have space glyph.
+- Protect against out-of-range lookup indices.
+
+- API Changes:
+
+ * Added hb_ot_layout_table_get_lookup_count()
+
+
+Overview of changes leading to 0.9.21
+Monday, September 16, 2013
+=====================================
+
+- Rename gobject-introspection library name from harfbuzz to HarfBuzz.
+- Remove (long disabled) hb-old and hb-icu-le test shapers.
+- Misc gtk-doc and gobject-introspection annotations.
+- Misc fixes.
+- API changes:
+
+ * Add HB_SET_VALUE_INVALID
+
+Overview of changes leading to 0.9.20
+Thursday, August 29, 2013
+=====================================
+
+General:
+- Misc substitute_closure() fixes.
+- Build fixes.
+
+Documentation:
+- gtk-doc boilerplate integrated. Docs are built now, but
+ contain no contents. By next release hopefully we have
+ some content in. Enable using --enable-gtk-doc.
+
+GObject and Introspection:
+- Added harfbuzz-gobject library (hb-gobject.h) that has type
+ bindings for all HarfBuzz objects and enums. Enable using
+ --with-gobject.
+- Added gobject-introspection boilerplate. Nothing useful
+ right now. Work in progress. Gets enabled automatically if
+ --with-gobject is used. Override with --disable-introspection.
+
+OpenType shaper:
+- Apply 'mark' in Myanmar shaper.
+- Don't apply 'dlig' by default.
+
+Uniscribe shaper:
+- Support user features.
+- Fix loading of fonts that are also installed on the system.
+- Fix shaping of Arabic Presentation Forms.
+- Fix build with wide chars.
+
+CoreText shaper:
+- Support user features.
+
+Source changes:
+- hb_face_t code moved to hb-face.h / hb-face.cc.
+- Added hb-deprecated.h.
+
+API changes:
+- Added HB_DISABLE_DEPRECATED.
+- Deprecated HB_SCRIPT_CANADIAN_ABORIGINAL; replaced by
+ HB_SCRIPT_CANADIAN_SYLLABICS.
+- Deprecated HB_BUFFER_FLAGS_DEFAULT; replaced by
+ HB_BUFFER_FLAG_DEFAULT.
+- Deprecated HB_BUFFER_SERIALIZE_FLAGS_DEFAULT; replaced by
+ HB_BUFFER_SERIALIZE_FLAG_DEFAULT.
+
+
+Overview of changes leading to 0.9.19
+Tuesday, July 16, 2013
+=====================================
+
+- Build fixes.
+- Better handling of multiple variation selectors in a row.
+- Pass on variation selector to GSUB if not consumed by cmap.
+- Fix undefined memory access.
+- Add Javanese config to Indic shaper.
+- Misc bug fixes.
+
+Overview of changes leading to 0.9.18
+Tuesday, May 28, 2013
+=====================================
+
+New build system:
+
+- All unneeded code is all disabled by default,
+
+- Uniscribe and CoreText shapers can be enabled with their --with options,
+
+- icu_le and old shapers cannot be enabled for now,
+
+- glib, freetype, and cairo will be detected automatically.
+ They can be force on/off'ed with their --with options,
+
+- icu and graphite2 are default off, can be enabled with their --with
+ options,
+
+Moreover, ICU support is now build into a separate library:
+libharfbuzz-icu.so, and a new harfbuzz-icu.pc is shipped for it.
+Distros can enable ICU now without every application on earth
+getting linked to via libharfbuzz.so.
+
+For distros I recommend that they make sure they are building --with-glib
+--with-freetype --with-cairo, --with-icu, and optionally --with-graphite2;
+And package harfbuzz and harfbuzz-icu separately.
+
+
+Overview of changes leading to 0.9.17
+Monday, May 20, 2013
+=====================================
+
+- Build fixes.
+- Fix bug in hb_set_get_min().
+- Fix regression with Arabic mark positioning / width-zeroing.
+
+Overview of changes leading to 0.9.16
+Friday, April 19, 2013
+=====================================
+
+- Major speedup in OpenType lookup processing. With the Amiri
+ Arabic font, this release is over 3x faster than previous
+ release. All scripts / languages should see this speedup.
+
+- New --num-iterations option for hb-shape / hb-view; useful for
+ profiling.
+
+Overview of changes leading to 0.9.15
+Friday, April 05, 2013
+=====================================
+
+- Build fixes.
+- Fix crasher in graphite2 shaper.
+- Fix Arabic mark width zeroing regression.
+- Don't compose Hangul jamo into Unicode syllables.
+
+
+Overview of changes leading to 0.9.14
+Thursday, March 21, 2013
+=====================================
+
+- Build fixes.
+- Fix time-consuming sanitize with malicious fonts.
+- Implement hb_buffer_deserialize_glyphs() for both json and text.
+- Do not ignore Hangul filler characters.
+- Indic fixes:
+ * Fix Malayalam pre-base reordering interaction with post-forms.
+ * Further adjust ZWJ handling. Should fix known regressions from
+ 0.9.13.
+
+
+Overview of changes leading to 0.9.13
+Thursday, February 25, 2013
+=====================================
+
+- Build fixes.
+- Ngapi HarfBuzz Hackfest in London (February 2013):
+ * Fixed all known Indic bugs,
+ * New Win8-style Myanmar shaper,
+ * New South-East Asian shaper for Tai Tham, Cham, and New Tai Lue,
+ * Smartly ignore Default_Ignorable characters (joiners, etc) wheb
+ matching GSUB/GPOS lookups,
+ * Fix 'Phags-Pa U+A872 shaping,
+ * Fix partial disabling of default-on features,
+ * Allow disabling of TrueType kerning.
+- Fix possible crasher with broken fonts with overlapping tables.
+- Removed generated files from git again. So, one needs ragel to
+ bootstrap from the git tree.
+
+API changes:
+- hb_shape() and related APIs now abort if buffer direction is
+ HB_DIRECTION_INVALID. Previously, hb_shape() was calling
+ hb_buffer_guess_segment_properties() on the buffer before
+ shaping. The heuristics in that function are fragile. If the
+ user really wants the old behvaior, they can call that function
+ right before calling hb_shape() to get the old behavior.
+- hb_blob_create_sub_blob() always creates sub-blob with
+ HB_MEMORY_MODE_READONLY. See comments for the reason.
+
+
+Overview of changes leading to 0.9.12
+Thursday, January 18, 2013
+=====================================
+
+- Build fixes for Sun compiler.
+- Minor bug fix.
+
+Overview of changes leading to 0.9.11
+Thursday, January 10, 2013
+=====================================
+
+- Build fixes.
+- Fix GPOS mark attachment with null Anchor offsets.
+- [Indic] Fix old-spec reordering of viramas if sequence ends in one.
+- Fix multi-threaded shaper data creation crash.
+- Add atomic ops for Solaris.
+
+API changes:
+- Rename hb_buffer_clear() to hb_buffer_clear_contents().
+
+
+Overview of changes leading to 0.9.10
+Thursday, January 3, 2013
+=====================================
+
+- [Indic] Fixed rendering of Malayalam dot-reph
+- Updated OT language tags.
+- Updated graphite2 backend.
+- Improved hb_ot_layout_get_size_params() logic.
+- Improve hb-shape/hb-view help output.
+- Fixed hb-set.h implementation to not crash.
+- Fixed various issues with hb_ot_layout_collect_lookups().
+- Various build fixes.
+
+New API:
+
+hb_graphite2_face_get_gr_face()
+hb_graphite2_font_get_gr_font()
+hb_coretext_face_get_cg_font()
+
+Modified API:
+
+hb_ot_layout_get_size_params()
+
+
+Overview of changes leading to 0.9.9
+Wednesday, December 5, 2012
+====================================
+
+- Fix build on Windows.
+- Minor improvements.
+
+
+Overview of changes leading to 0.9.8
+Tuesday, December 4, 2012
+====================================
+
+
+- Actually implement hb_shape_plan_get_shaper ().
+- Make UCDB data tables const.
+- Lots of internal refactoring in OTLayout tables.
+- Flesh out hb_ot_layout_lookup_collect_glyphs().
+
+New API:
+
+hb_ot_layout_collect_lookups()
+hb_ot_layout_get_size_params()
+
+
+Overview of changes leading to 0.9.7
+Sunday, November 21, 2012
+====================================
+
+
+HarfBuzz "All-You-Can-Eat-Sushi" (aka Vancouver) Hackfest and follow-on fixes.
+
+- Fix Arabic contextual joining using pre-context text.
+- Fix Sinhala "split matra" mess.
+- Fix Khmer shaping with broken fonts.
+- Implement Thai "PUA" shaping for old fonts.
+- Do NOT route Kharoshthi script through the Indic shaper.
+- Disable fallback positioning for Indic and Thai shapers.
+- Misc fixes.
+
+
+hb-shape / hb-view changes:
+
+- Add --text-before and --text-after
+- Add --bot / --eot / --preserve-default-ignorables
+- hb-shape --output-format=json
+
+
+New API:
+
+hb_buffer_clear()
+
+hb_buffer_flags_t
+
+HB_BUFFER_FLAGS_DEFAULT
+HB_BUFFER_FLAG_BOT
+HB_BUFFER_FLAG_EOT
+HB_BUFFER_FLAG_PRESERVE_DEFAULT_IGNORABLES
+
+hb_buffer_set_flags()
+hb_buffer_get_flags()
+
+HB_BUFFER_SERIALIZE_FLAGS
+hb_buffer_serialize_glyphs()
+hb_buffer_deserialize_glyphs()
+hb_buffer_serialize_list_formats()
+
+hb_set_add_range()
+hb_set_del_range()
+hb_set_get_population()
+hb_set_next_range()
+
+hb_face_[sg]et_glyph_count()
+
+hb_segment_properties_t
+HB_SEGMENT_PROPERTIES_DEFAULT
+hb_segment_properties_equal()
+hb_segment_properties_hash()
+
+hb_buffer_set_segment_properties()
+hb_buffer_get_segment_properties()
+
+hb_ot_layout_glyph_class_t
+hb_ot_layout_get_glyph_class()
+hb_ot_layout_get_glyphs_in_class()
+
+hb_shape_plan_t
+hb_shape_plan_create()
+hb_shape_plan_create_cached()
+hb_shape_plan_get_empty()
+hb_shape_plan_reference()
+hb_shape_plan_destroy()
+hb_shape_plan_set_user_data()
+hb_shape_plan_get_user_data()
+hb_shape_plan_execute()
+hb_shape_plan_get_shaper()
+
+hb_ot_shape_plan_collect_lookups()
+
+
+API changes:
+
+- Remove "mask" parameter from hb_buffer_add().
+- Rename hb_ot_layout_would_substitute_lookup() and hb_ot_layout_substitute_closure_lookup().
+- hb-set.h API const correction.
+- Renamed hb_set_min/max() to hb_set_get_min/max().
+- Rename hb_ot_layout_feature_get_lookup_indexes() to hb_ot_layout_feature_get_lookups().
+- Rename hb_buffer_guess_properties() to hb_buffer_guess_segment_properties().
+
+
+
+Overview of changes leading to 0.9.6
+Sunday, November 13, 2012
+====================================
+
+- Don't clear pre-context text if no new context is provided.
+- Fix ReverseChainingSubstLookup, which was totally borked.
+- Adjust output format of hb-shape a bit.
+- Include config.h.in in-tree. Makes it easier for alternate build systems.
+- Fix hb_buffer_set_length(buffer, 0) invalid memory allocation.
+- Use ICU LayoutEngine's C API instead of C++. Avoids much headache.
+- Drop glyphs for all of Unicode Default_Ignorable characters.
+- Misc build fixes.
+
+Arabic shaper:
+- Enable 'dlig' and 'mset' features in Arabic shaper.
+- Implement 'Phags-pa shaping, improve Mongolian.
+
+Indic shaper:
+- Decompose Sinhala split matras the way old HarfBuzz / Pango did.
+- Initial support for Consonant Medials.
+- Start adding new-style Myanmar shaping.
+- Make reph and 'pref' logic introspect the font.
+- Route Meetei-Mayek through the Indic shaper.
+- Don't apply 'liga' in Indic shaper.
+- Improve Malayalam pre-base reordering Ra interaction with Chillus.
+
+
+
+Overview of changes leading to 0.9.5
+Sunday, October 14, 2012
+====================================
+
+- Synthetic-GSUB Arabic fallback shaping.
+
+- Misc Indic improvements.
+
+- Add build system support for pthread.
+
+- Imported UCDN for in-tree Unicode callbacks implementation.
+
+- Context-aware Arabic joining.
+
+- Misc other fixes.
+
+- New API:
+
+ hb_feature_to/from-string()
+ hb_buffer_[sg]et_content_type()
+
+
+
+Overview of changes leading to 0.9.4
+Tuesday, Sep 03, 2012
+====================================
+
+- Indic improvements with old-spec Malayalam.
+
+- Better fallback glyph positioning, specially with Thai / Lao marks.
+
+- Implement dotted-circle insertion.
+
+- Better Arabic fallback shaping / ligation.
+
+- Added ICU LayoutEngine backend for testing. Call it by the 'icu_le' name.
+
+- Misc fixes.
+
+
+
+Overview of changes leading to 0.9.3
+Friday, Aug 18, 2012
+====================================
+
+- Fixed fallback mark positioning for left-to-right text.
+
+- Improve mark positioning for the remaining combining classes.
+
+- Unbreak Thai and fallback Arabic shaping.
+
+- Port Arabic shaper to shape-plan caching.
+
+- Use new ICU normalizer functions.
+
+
+
+Overview of changes leading to 0.9.2
+Friday, Aug 10, 2012
+====================================
+
+- Over a thousand commits! This is the first major release of HarfBuzz.
+
+- HarfBuzz is feature-complete now! It should be in par, or better, than
+ both Pango's shapers and old HarfBuzz / Qt shapers.
+
+- New Indic shaper, supporting main Indic scripts, Sinhala, and Khmer.
+
+- Improved Arabic shaper, with fallback Arabic shaping, supporting Arabic,
+ Sinhala, N'ko, Mongolian, and Mandaic.
+
+- New Thai / Lao shaper.
+
+- Tibetan / Hangul support in the generic shaper.
+
+- Synthetic GDEF support for fonts without a GDEF table.
+
+- Fallback mark positioning for fonts without a GPOS table.
+
+- Unicode normalization shaping heuristic during glyph mapping.
+
+- New experimental Graphite2 backend.
+
+- New Uniscribe backend (primarily for testing).
+
+- New CoreText backend (primarily for testing).
+
+- Major optimization and speedup.
+
+- Test suites and testing infrastructure (work in progress).
+
+- Greatly improved hb-view cmdline tool.
+
+- hb-shape cmdline tool.
+
+- Unicode 6.1 support.
+
+Summary of API changes:
+
+o Changed API:
+
+ - Users are expected to only include main header files now (ie. hb.h,
+ hb-glib.h, hb-ft.h, ...)
+
+ - All struct tag names had their initial underscore removed.
+ Ie. "struct _hb_buffer_t" is "struct hb_buffer_t" now.
+
+ - All set_user_data() functions now take a "replace" boolean parameter.
+
+ - hb_buffer_create() takes zero arguments now.
+ Use hb_buffer_pre_allocate() to pre-allocate.
+
+ - hb_buffer_add_utf*() now accept -1 for length parameteres,
+ meaning "nul-terminated".
+
+ - hb_direction_t enum values changed.
+
+ - All *_from_string() APIs now take a length parameter to allow for
+ non-nul-terminated strings. A -1 length means "nul-terminated".
+
+ - Typedef for hb_language_t changed.
+
+ - hb_get_table_func_t renamed to hb_reference_table_func_t.
+
+ - hb_ot_layout_table_choose_script()
+
+ - Various renames in hb-unicode.h.
+
+o New API:
+
+ - hb_buffer_guess_properties()
+ Automatically called by hb_shape().
+
+ - hb_buffer_normalize_glyphs()
+
+ - hb_tag_from_string()
+
+ - hb-coretext.h
+
+ - hb-uniscribe.h
+
+ - hb_face_reference_blob()
+ - hb_face_[sg]et_index()
+ - hb_face_set_upem()
+
+ - hb_font_get_glyph_name_func_t
+ hb_font_get_glyph_from_name_func_t
+ hb_font_funcs_set_glyph_name_func()
+ hb_font_funcs_set_glyph_from_name_func()
+ hb_font_get_glyph_name()
+ hb_font_get_glyph_from_name()
+ hb_font_glyph_to_string()
+ hb_font_glyph_from_string()
+
+ - hb_font_set_funcs_data()
+
+ - hb_ft_font_set_funcs()
+ - hb_ft_font_get_face()
+
+ - hb-gobject.h (work in progress)
+
+ - hb_ot_shape_glyphs_closure()
+ hb_ot_layout_substitute_closure_lookup()
+
+ - hb-set.h
+
+ - hb_shape_full()
+
+ - hb_unicode_combining_class_t
+
+ - hb_unicode_compose_func_t
+ hb_unicode_decompose_func_t
+ hb_unicode_decompose_compatibility_func_t
+ hb_unicode_funcs_set_compose_func()
+ hb_unicode_funcs_set_decompose_func()
+ hb_unicode_funcs_set_decompose_compatibility_func()
+ hb_unicode_compose()
+ hb_unicode_decompose()
+ hb_unicode_decompose_compatibility()
+
+o Removed API:
+
+ - hb_ft_get_font_funcs()
+
+ - hb_ot_layout_substitute_start()
+ hb_ot_layout_substitute_lookup()
+ hb_ot_layout_substitute_finish()
+ hb_ot_layout_position_start()
+ hb_ot_layout_position_lookup()
+ hb_ot_layout_position_finish()
+
+
+
+Overview of changes leading to 0.6.0
+Friday, May 27, 2011
+====================================
+
+- Vertical text support in GPOS
+- Almost all API entries have unit tests now, under test/
+- All thread-safety issues are fixed
+
+Summary of API changes follows.
+
+
+* Simple Types API:
+
+ o New API:
+ HB_LANGUAGE_INVALID
+ hb_language_get_default()
+ hb_direction_to_string()
+ hb_direction_from_string()
+ hb_script_get_horizontal_direction()
+ HB_UNTAG()
+
+ o Renamed API:
+ hb_category_t renamed to hb_unicode_general_category_t
+
+ o Changed API:
+ hb_language_t is a typed pointers now
+
+ o Removed API:
+ HB_TAG_STR()
+
+
+* Use ISO 15924 tags for hb_script_t:
+
+ o New API:
+ hb_script_from_iso15924_tag()
+ hb_script_to_iso15924_tag()
+ hb_script_from_string()
+
+ o Changed API:
+ HB_SCRIPT_* enum members changed value.
+
+
+* Buffer API streamlined:
+
+ o New API:
+ hb_buffer_reset()
+ hb_buffer_set_length()
+ hb_buffer_allocation_successful()
+
+ o Renamed API:
+ hb_buffer_ensure() renamed to hb_buffer_pre_allocate()
+ hb_buffer_add_glyph() renamed to hb_buffer_add()
+
+ o Removed API:
+ hb_buffer_clear()
+ hb_buffer_clear_positions()
+
+ o Changed API:
+ hb_buffer_get_glyph_infos() takes an out length parameter now
+ hb_buffer_get_glyph_positions() takes an out length parameter now
+
+
+* Blob API streamlined:
+
+ o New API:
+ hb_blob_get_data()
+ hb_blob_get_data_writable()
+
+ o Renamed API:
+ hb_blob_create_empty() renamed to hb_blob_get_empty()
+
+ o Removed API:
+ hb_blob_lock()
+ hb_blob_unlock()
+ hb_blob_is_writable()
+ hb_blob_try_writable()
+
+ o Changed API:
+ hb_blob_create() takes user_data before destroy now
+
+
+* Unicode functions API:
+
+ o Unicode function vectors can subclass other unicode function vectors now.
+ Unimplemented callbacks in the subclass automatically chainup to the parent.
+
+ o All hb_unicode_funcs_t callbacks take a user_data now. Their setters
+ take a user_data and its respective destroy callback.
+
+ o New API:
+ hb_unicode_funcs_get_empty()
+ hb_unicode_funcs_get_default()
+ hb_unicode_funcs_get_parent()
+
+ o Changed API:
+ hb_unicode_funcs_create() now takes a parent_funcs.
+
+ o Removed func getter functions:
+ hb_unicode_funcs_get_mirroring_func()
+ hb_unicode_funcs_get_general_category_func()
+ hb_unicode_funcs_get_script_func()
+ hb_unicode_funcs_get_combining_class_func()
+ hb_unicode_funcs_get_eastasian_width_func()
+
+
+* Face API:
+
+ o Renamed API:
+ hb_face_get_table() renamed to hb_face_reference_table()
+ hb_face_create_for_data() renamed to hb_face_create()
+
+ o Changed API:
+ hb_face_create_for_tables() takes user_data before destroy now
+ hb_face_reference_table() returns empty blob instead of NULL
+ hb_get_table_func_t accepts the face as first parameter now
+
+* Font API:
+
+ o Fonts can subclass other fonts now. Unimplemented callbacks in the
+ subclass automatically chainup to the parent. When chaining up,
+ scale is adjusted if the parent font has a different scale.
+
+ o All hb_font_funcs_t callbacks take a user_data now. Their setters
+ take a user_data and its respective destroy callback.
+
+ o New API:
+ hb_font_get_parent()
+ hb_font_funcs_get_empty()
+ hb_font_create_sub_font()
+
+ o Removed API:
+ hb_font_funcs_copy()
+ hb_font_unset_funcs()
+
+ o Removed func getter functions:
+ hb_font_funcs_get_glyph_func()
+ hb_font_funcs_get_glyph_advance_func()
+ hb_font_funcs_get_glyph_extents_func()
+ hb_font_funcs_get_contour_point_func()
+ hb_font_funcs_get_kerning_func()
+
+ o Changed API:
+ hb_font_create() takes a face and references it now
+ hb_font_set_funcs() takes user_data before destroy now
+ hb_font_set_scale() accepts signed integers now
+ hb_font_get_contour_point_func_t now takes glyph first, then point_index
+ hb_font_get_glyph_func_t returns a success boolean now
+
+
+* Changed object model:
+
+ o All object types have a _get_empty() now:
+ hb_blob_get_empty()
+ hb_buffer_get_empty()
+ hb_face_get_empty()
+ hb_font_get_empty()
+ hb_font_funcs_get_empty()
+ hb_unicode_funcs_get_empty()
+
+ o Added _set_user_data() and _get_user_data() for all object types:
+ hb_blob_get_user_data()
+ hb_blob_set_user_data()
+ hb_buffer_get_user_data()
+ hb_buffer_set_user_data()
+ hb_face_get_user_data()
+ hb_face_set_user_data()
+ hb_font_funcs_get_user_data()
+ hb_font_funcs_set_user_data()
+ hb_font_get_user_data()
+ hb_font_set_user_data()
+ hb_unicode_funcs_get_user_data()
+ hb_unicode_funcs_set_user_data()
+
+ o Removed the _get_reference_count() from all object types:
+ hb_blob_get_reference_count()
+ hb_buffer_get_reference_count()
+ hb_face_get_reference_count()
+ hb_font_funcs_get_reference_count()
+ hb_font_get_reference_count()
+ hb_unicode_funcs_get_reference_count()
+
+ o Added _make_immutable() and _is_immutable() for all object types except for buffer:
+ hb_blob_make_immutable()
+ hb_blob_is_immutable()
+ hb_face_make_immutable()
+ hb_face_is_immutable()
+
+
+* Changed API for vertical text support
+
+ o The following callbacks where removed:
+ hb_font_get_glyph_advance_func_t
+ hb_font_get_kerning_func_t
+
+ o The following new callbacks added instead:
+ hb_font_get_glyph_h_advance_func_t
+ hb_font_get_glyph_v_advance_func_t
+ hb_font_get_glyph_h_origin_func_t
+ hb_font_get_glyph_v_origin_func_t
+ hb_font_get_glyph_h_kerning_func_t
+ hb_font_get_glyph_v_kerning_func_t
+
+ o The following API removed as such:
+ hb_font_funcs_set_glyph_advance_func()
+ hb_font_funcs_set_kerning_func()
+ hb_font_get_glyph_advance()
+ hb_font_get_kerning()
+
+ o New API added instead:
+ hb_font_funcs_set_glyph_h_advance_func()
+ hb_font_funcs_set_glyph_v_advance_func()
+ hb_font_funcs_set_glyph_h_origin_func()
+ hb_font_funcs_set_glyph_v_origin_func()
+ hb_font_funcs_set_glyph_h_kerning_func()
+ hb_font_funcs_set_glyph_v_kerning_func()
+ hb_font_get_glyph_h_advance()
+ hb_font_get_glyph_v_advance()
+ hb_font_get_glyph_h_origin()
+ hb_font_get_glyph_v_origin()
+ hb_font_get_glyph_h_kerning()
+ hb_font_get_glyph_v_kerning()
+
+ o The following higher-leve API added for convenience:
+ hb_font_get_glyph_advance_for_direction()
+ hb_font_get_glyph_origin_for_direction()
+ hb_font_add_glyph_origin_for_direction()
+ hb_font_subtract_glyph_origin_for_direction()
+ hb_font_get_glyph_kerning_for_direction()
+ hb_font_get_glyph_extents_for_origin()
+ hb_font_get_glyph_contour_point_for_origin()
+
+
+* OpenType Layout API:
+
+ o New API:
+ hb_ot_layout_position_start()
+ hb_ot_layout_substitute_start()
+ hb_ot_layout_substitute_finish()
+
+
+* Glue code:
+
+ o New API:
+ hb_glib_script_to_script()
+ hb_glib_script_from_script()
+ hb_icu_script_to_script()
+ hb_icu_script_from_script()
+
+
+* Version API added:
+
+ o New API:
+ HB_VERSION_MAJOR
+ HB_VERSION_MINOR
+ HB_VERSION_MICRO
+ HB_VERSION_STRING
+ HB_VERSION_CHECK()
+ hb_version()
+ hb_version_string()
+ hb_version_check()
+
+
diff --git a/thirdparty/harfbuzz/THANKS b/thirdparty/harfbuzz/THANKS
new file mode 100644
index 0000000000..88cb7e9ea1
--- /dev/null
+++ b/thirdparty/harfbuzz/THANKS
@@ -0,0 +1,7 @@
+Bradley Grainger
+Kenichi Ishibashi
+Ivan Kuckir <https://photopea.com/>
+Ryan Lortie
+Jeff Muizelaar
+suzuki toshiya
+Philip Withnall
diff --git a/thirdparty/harfbuzz/src/hb-aat-layout-ankr-table.hh b/thirdparty/harfbuzz/src/hb-aat-layout-ankr-table.hh
new file mode 100644
index 0000000000..f2785a6f58
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-aat-layout-ankr-table.hh
@@ -0,0 +1,98 @@
+/*
+ * Copyright © 2018 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#ifndef HB_AAT_LAYOUT_ANKR_TABLE_HH
+#define HB_AAT_LAYOUT_ANKR_TABLE_HH
+
+#include "hb-aat-layout-common.hh"
+
+/*
+ * ankr -- Anchor Point
+ * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6ankr.html
+ */
+#define HB_AAT_TAG_ankr HB_TAG('a','n','k','r')
+
+
+namespace AAT {
+
+using namespace OT;
+
+
+struct Anchor
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ public:
+ FWORD xCoordinate;
+ FWORD yCoordinate;
+ public:
+ DEFINE_SIZE_STATIC (4);
+};
+
+typedef LArrayOf<Anchor> GlyphAnchors;
+
+struct ankr
+{
+ static constexpr hb_tag_t tableTag = HB_AAT_TAG_ankr;
+
+ const Anchor &get_anchor (hb_codepoint_t glyph_id,
+ unsigned int i,
+ unsigned int num_glyphs) const
+ {
+ const NNOffsetTo<GlyphAnchors> *offset = (this+lookupTable).get_value (glyph_id, num_glyphs);
+ if (!offset)
+ return Null (Anchor);
+ const GlyphAnchors &anchors = &(this+anchorData) + *offset;
+ return anchors[i];
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ version == 0 &&
+ c->check_range (this, anchorData) &&
+ lookupTable.sanitize (c, this, &(this+anchorData))));
+ }
+
+ protected:
+ HBUINT16 version; /* Version number (set to zero) */
+ HBUINT16 flags; /* Flags (currently unused; set to zero) */
+ LOffsetTo<Lookup<NNOffsetTo<GlyphAnchors>>>
+ lookupTable; /* Offset to the table's lookup table */
+ LNNOffsetTo<HBUINT8>
+ anchorData; /* Offset to the glyph data table */
+
+ public:
+ DEFINE_SIZE_STATIC (12);
+};
+
+} /* namespace AAT */
+
+
+#endif /* HB_AAT_LAYOUT_ANKR_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-aat-layout-bsln-table.hh b/thirdparty/harfbuzz/src/hb-aat-layout-bsln-table.hh
new file mode 100644
index 0000000000..cd36fc8953
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-aat-layout-bsln-table.hh
@@ -0,0 +1,158 @@
+/*
+ * Copyright © 2018 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#ifndef HB_AAT_LAYOUT_BSLN_TABLE_HH
+#define HB_AAT_LAYOUT_BSLN_TABLE_HH
+
+#include "hb-aat-layout-common.hh"
+
+/*
+ * bsln -- Baseline
+ * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6bsln.html
+ */
+#define HB_AAT_TAG_bsln HB_TAG('b','s','l','n')
+
+
+namespace AAT {
+
+
+struct BaselineTableFormat0Part
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this)));
+ }
+
+ protected:
+ // Roman, Ideographic centered, Ideographic low, Hanging and Math
+ // are the default defined ones, but any other maybe accessed also.
+ HBINT16 deltas[32]; /* These are the FUnit distance deltas from
+ * the font's natural baseline to the other
+ * baselines used in the font. */
+ public:
+ DEFINE_SIZE_STATIC (64);
+};
+
+struct BaselineTableFormat1Part
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ lookupTable.sanitize (c)));
+ }
+
+ protected:
+ HBINT16 deltas[32]; /* ditto */
+ Lookup<HBUINT16>
+ lookupTable; /* Lookup table that maps glyphs to their
+ * baseline values. */
+ public:
+ DEFINE_SIZE_MIN (66);
+};
+
+struct BaselineTableFormat2Part
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this)));
+ }
+
+ protected:
+ HBGlyphID stdGlyph; /* The specific glyph index number in this
+ * font that is used to set the baseline values.
+ * This is the standard glyph.
+ * This glyph must contain a set of control points
+ * (whose numbers are contained in the ctlPoints field)
+ * that are used to determine baseline distances. */
+ HBUINT16 ctlPoints[32]; /* Set of control point numbers,
+ * associated with the standard glyph.
+ * A value of 0xFFFF means there is no corresponding
+ * control point in the standard glyph. */
+ public:
+ DEFINE_SIZE_STATIC (66);
+};
+
+struct BaselineTableFormat3Part
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) && lookupTable.sanitize (c)));
+ }
+
+ protected:
+ HBGlyphID stdGlyph; /* ditto */
+ HBUINT16 ctlPoints[32]; /* ditto */
+ Lookup<HBUINT16>
+ lookupTable; /* Lookup table that maps glyphs to their
+ * baseline values. */
+ public:
+ DEFINE_SIZE_MIN (68);
+};
+
+struct bsln
+{
+ static constexpr hb_tag_t tableTag = HB_AAT_TAG_bsln;
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!(c->check_struct (this) && defaultBaseline < 32)))
+ return_trace (false);
+
+ switch (format)
+ {
+ case 0: return_trace (parts.format0.sanitize (c));
+ case 1: return_trace (parts.format1.sanitize (c));
+ case 2: return_trace (parts.format2.sanitize (c));
+ case 3: return_trace (parts.format3.sanitize (c));
+ default:return_trace (true);
+ }
+ }
+
+ protected:
+ FixedVersion<>version; /* Version number of the Baseline table. */
+ HBUINT16 format; /* Format of the baseline table. Only one baseline
+ * format may be selected for the font. */
+ HBUINT16 defaultBaseline;/* Default baseline value for all glyphs.
+ * This value can be from 0 through 31. */
+ union {
+ // Distance-Based Formats
+ BaselineTableFormat0Part format0;
+ BaselineTableFormat1Part format1;
+ // Control Point-based Formats
+ BaselineTableFormat2Part format2;
+ BaselineTableFormat3Part format3;
+ } parts;
+ public:
+ DEFINE_SIZE_MIN (8);
+};
+
+} /* namespace AAT */
+
+
+#endif /* HB_AAT_LAYOUT_BSLN_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-aat-layout-common.hh b/thirdparty/harfbuzz/src/hb-aat-layout-common.hh
new file mode 100644
index 0000000000..75d523f5fc
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-aat-layout-common.hh
@@ -0,0 +1,840 @@
+/*
+ * Copyright © 2017 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_AAT_LAYOUT_COMMON_HH
+#define HB_AAT_LAYOUT_COMMON_HH
+
+#include "hb-aat-layout.hh"
+#include "hb-open-type.hh"
+
+
+namespace AAT {
+
+using namespace OT;
+
+
+/*
+ * Lookup Table
+ */
+
+template <typename T> struct Lookup;
+
+template <typename T>
+struct LookupFormat0
+{
+ friend struct Lookup<T>;
+
+ private:
+ const T* get_value (hb_codepoint_t glyph_id, unsigned int num_glyphs) const
+ {
+ if (unlikely (glyph_id >= num_glyphs)) return nullptr;
+ return &arrayZ[glyph_id];
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (arrayZ.sanitize (c, c->get_num_glyphs ()));
+ }
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (arrayZ.sanitize (c, c->get_num_glyphs (), base));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 0 */
+ UnsizedArrayOf<T>
+ arrayZ; /* Array of lookup values, indexed by glyph index. */
+ public:
+ DEFINE_SIZE_UNBOUNDED (2);
+};
+
+
+template <typename T>
+struct LookupSegmentSingle
+{
+ static constexpr unsigned TerminationWordCount = 2u;
+
+ int cmp (hb_codepoint_t g) const
+ { return g < first ? -1 : g <= last ? 0 : +1 ; }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) && value.sanitize (c));
+ }
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) && value.sanitize (c, base));
+ }
+
+ HBGlyphID last; /* Last GlyphID in this segment */
+ HBGlyphID first; /* First GlyphID in this segment */
+ T value; /* The lookup value (only one) */
+ public:
+ DEFINE_SIZE_STATIC (4 + T::static_size);
+};
+
+template <typename T>
+struct LookupFormat2
+{
+ friend struct Lookup<T>;
+
+ private:
+ const T* get_value (hb_codepoint_t glyph_id) const
+ {
+ const LookupSegmentSingle<T> *v = segments.bsearch (glyph_id);
+ return v ? &v->value : nullptr;
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (segments.sanitize (c));
+ }
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (segments.sanitize (c, base));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 2 */
+ VarSizedBinSearchArrayOf<LookupSegmentSingle<T>>
+ segments; /* The actual segments. These must already be sorted,
+ * according to the first word in each one (the last
+ * glyph in each segment). */
+ public:
+ DEFINE_SIZE_ARRAY (8, segments);
+};
+
+template <typename T>
+struct LookupSegmentArray
+{
+ static constexpr unsigned TerminationWordCount = 2u;
+
+ const T* get_value (hb_codepoint_t glyph_id, const void *base) const
+ {
+ return first <= glyph_id && glyph_id <= last ? &(base+valuesZ)[glyph_id - first] : nullptr;
+ }
+
+ int cmp (hb_codepoint_t g) const
+ { return g < first ? -1 : g <= last ? 0 : +1; }
+
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ first <= last &&
+ valuesZ.sanitize (c, base, last - first + 1));
+ }
+ template <typename ...Ts>
+ bool sanitize (hb_sanitize_context_t *c, const void *base, Ts&&... ds) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ first <= last &&
+ valuesZ.sanitize (c, base, last - first + 1, hb_forward<Ts> (ds)...));
+ }
+
+ HBGlyphID last; /* Last GlyphID in this segment */
+ HBGlyphID first; /* First GlyphID in this segment */
+ NNOffsetTo<UnsizedArrayOf<T>>
+ valuesZ; /* A 16-bit offset from the start of
+ * the table to the data. */
+ public:
+ DEFINE_SIZE_STATIC (6);
+};
+
+template <typename T>
+struct LookupFormat4
+{
+ friend struct Lookup<T>;
+
+ private:
+ const T* get_value (hb_codepoint_t glyph_id) const
+ {
+ const LookupSegmentArray<T> *v = segments.bsearch (glyph_id);
+ return v ? v->get_value (glyph_id, this) : nullptr;
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (segments.sanitize (c, this));
+ }
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (segments.sanitize (c, this, base));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 4 */
+ VarSizedBinSearchArrayOf<LookupSegmentArray<T>>
+ segments; /* The actual segments. These must already be sorted,
+ * according to the first word in each one (the last
+ * glyph in each segment). */
+ public:
+ DEFINE_SIZE_ARRAY (8, segments);
+};
+
+template <typename T>
+struct LookupSingle
+{
+ static constexpr unsigned TerminationWordCount = 1u;
+
+ int cmp (hb_codepoint_t g) const { return glyph.cmp (g); }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) && value.sanitize (c));
+ }
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) && value.sanitize (c, base));
+ }
+
+ HBGlyphID glyph; /* Last GlyphID */
+ T value; /* The lookup value (only one) */
+ public:
+ DEFINE_SIZE_STATIC (2 + T::static_size);
+};
+
+template <typename T>
+struct LookupFormat6
+{
+ friend struct Lookup<T>;
+
+ private:
+ const T* get_value (hb_codepoint_t glyph_id) const
+ {
+ const LookupSingle<T> *v = entries.bsearch (glyph_id);
+ return v ? &v->value : nullptr;
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (entries.sanitize (c));
+ }
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (entries.sanitize (c, base));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 6 */
+ VarSizedBinSearchArrayOf<LookupSingle<T>>
+ entries; /* The actual entries, sorted by glyph index. */
+ public:
+ DEFINE_SIZE_ARRAY (8, entries);
+};
+
+template <typename T>
+struct LookupFormat8
+{
+ friend struct Lookup<T>;
+
+ private:
+ const T* get_value (hb_codepoint_t glyph_id) const
+ {
+ return firstGlyph <= glyph_id && glyph_id - firstGlyph < glyphCount ?
+ &valueArrayZ[glyph_id - firstGlyph] : nullptr;
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) && valueArrayZ.sanitize (c, glyphCount));
+ }
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) && valueArrayZ.sanitize (c, glyphCount, base));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 8 */
+ HBGlyphID firstGlyph; /* First glyph index included in the trimmed array. */
+ HBUINT16 glyphCount; /* Total number of glyphs (equivalent to the last
+ * glyph minus the value of firstGlyph plus 1). */
+ UnsizedArrayOf<T>
+ valueArrayZ; /* The lookup values (indexed by the glyph index
+ * minus the value of firstGlyph). */
+ public:
+ DEFINE_SIZE_ARRAY (6, valueArrayZ);
+};
+
+template <typename T>
+struct LookupFormat10
+{
+ friend struct Lookup<T>;
+
+ private:
+ const typename T::type get_value_or_null (hb_codepoint_t glyph_id) const
+ {
+ if (!(firstGlyph <= glyph_id && glyph_id - firstGlyph < glyphCount))
+ return Null (T);
+
+ const HBUINT8 *p = &valueArrayZ[(glyph_id - firstGlyph) * valueSize];
+
+ unsigned int v = 0;
+ unsigned int count = valueSize;
+ for (unsigned int i = 0; i < count; i++)
+ v = (v << 8) | *p++;
+
+ return v;
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ valueSize <= 4 &&
+ valueArrayZ.sanitize (c, glyphCount * valueSize));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 8 */
+ HBUINT16 valueSize; /* Byte size of each value. */
+ HBGlyphID firstGlyph; /* First glyph index included in the trimmed array. */
+ HBUINT16 glyphCount; /* Total number of glyphs (equivalent to the last
+ * glyph minus the value of firstGlyph plus 1). */
+ UnsizedArrayOf<HBUINT8>
+ valueArrayZ; /* The lookup values (indexed by the glyph index
+ * minus the value of firstGlyph). */
+ public:
+ DEFINE_SIZE_ARRAY (8, valueArrayZ);
+};
+
+template <typename T>
+struct Lookup
+{
+ const T* get_value (hb_codepoint_t glyph_id, unsigned int num_glyphs) const
+ {
+ switch (u.format) {
+ case 0: return u.format0.get_value (glyph_id, num_glyphs);
+ case 2: return u.format2.get_value (glyph_id);
+ case 4: return u.format4.get_value (glyph_id);
+ case 6: return u.format6.get_value (glyph_id);
+ case 8: return u.format8.get_value (glyph_id);
+ default:return nullptr;
+ }
+ }
+
+ const typename T::type get_value_or_null (hb_codepoint_t glyph_id, unsigned int num_glyphs) const
+ {
+ switch (u.format) {
+ /* Format 10 cannot return a pointer. */
+ case 10: return u.format10.get_value_or_null (glyph_id);
+ default:
+ const T *v = get_value (glyph_id, num_glyphs);
+ return v ? *v : Null (T);
+ }
+ }
+
+ typename T::type get_class (hb_codepoint_t glyph_id,
+ unsigned int num_glyphs,
+ unsigned int outOfRange) const
+ {
+ const T *v = get_value (glyph_id, num_glyphs);
+ return v ? *v : outOfRange;
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (!u.format.sanitize (c)) return_trace (false);
+ switch (u.format) {
+ case 0: return_trace (u.format0.sanitize (c));
+ case 2: return_trace (u.format2.sanitize (c));
+ case 4: return_trace (u.format4.sanitize (c));
+ case 6: return_trace (u.format6.sanitize (c));
+ case 8: return_trace (u.format8.sanitize (c));
+ case 10: return_trace (u.format10.sanitize (c));
+ default:return_trace (true);
+ }
+ }
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ if (!u.format.sanitize (c)) return_trace (false);
+ switch (u.format) {
+ case 0: return_trace (u.format0.sanitize (c, base));
+ case 2: return_trace (u.format2.sanitize (c, base));
+ case 4: return_trace (u.format4.sanitize (c, base));
+ case 6: return_trace (u.format6.sanitize (c, base));
+ case 8: return_trace (u.format8.sanitize (c, base));
+ case 10: return_trace (false); /* We don't support format10 here currently. */
+ default:return_trace (true);
+ }
+ }
+
+ protected:
+ union {
+ HBUINT16 format; /* Format identifier */
+ LookupFormat0<T> format0;
+ LookupFormat2<T> format2;
+ LookupFormat4<T> format4;
+ LookupFormat6<T> format6;
+ LookupFormat8<T> format8;
+ LookupFormat10<T> format10;
+ } u;
+ public:
+ DEFINE_SIZE_UNION (2, format);
+};
+/* Lookup 0 has unbounded size (dependant on num_glyphs). So we need to defined
+ * special NULL objects for Lookup<> objects, but since it's template our macros
+ * don't work. So we have to hand-code them here. UGLY. */
+} /* Close namespace. */
+/* Ugly hand-coded null objects for template Lookup<> :(. */
+extern HB_INTERNAL const unsigned char _hb_Null_AAT_Lookup[2];
+template <typename T>
+struct Null<AAT::Lookup<T>> {
+ static AAT::Lookup<T> const & get_null ()
+ { return *reinterpret_cast<const AAT::Lookup<T> *> (_hb_Null_AAT_Lookup); }
+};
+namespace AAT {
+
+enum { DELETED_GLYPH = 0xFFFF };
+
+/*
+ * (Extended) State Table
+ */
+
+template <typename T>
+struct Entry
+{
+ bool sanitize (hb_sanitize_context_t *c, unsigned int count) const
+ {
+ TRACE_SANITIZE (this);
+ /* Note, we don't recurse-sanitize data because we don't access it.
+ * That said, in our DEFINE_SIZE_STATIC we access T::static_size,
+ * which ensures that data has a simple sanitize(). To be determined
+ * if I need to remove that as well.
+ *
+ * HOWEVER! Because we are a template, our DEFINE_SIZE_STATIC
+ * assertion wouldn't be checked, hence the line below. */
+ static_assert (T::static_size, "");
+
+ return_trace (c->check_struct (this));
+ }
+
+ public:
+ HBUINT16 newState; /* Byte offset from beginning of state table
+ * to the new state. Really?!?! Or just state
+ * number? The latter in morx for sure. */
+ HBUINT16 flags; /* Table specific. */
+ T data; /* Optional offsets to per-glyph tables. */
+ public:
+ DEFINE_SIZE_STATIC (4 + T::static_size);
+};
+
+template <>
+struct Entry<void>
+{
+ bool sanitize (hb_sanitize_context_t *c, unsigned int count /*XXX Unused?*/) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ public:
+ HBUINT16 newState; /* Byte offset from beginning of state table to the new state. */
+ HBUINT16 flags; /* Table specific. */
+ public:
+ DEFINE_SIZE_STATIC (4);
+};
+
+template <typename Types, typename Extra>
+struct StateTable
+{
+ typedef typename Types::HBUINT HBUINT;
+ typedef typename Types::HBUSHORT HBUSHORT;
+ typedef typename Types::ClassTypeNarrow ClassType;
+
+ enum State
+ {
+ STATE_START_OF_TEXT = 0,
+ STATE_START_OF_LINE = 1,
+ };
+ enum Class
+ {
+ CLASS_END_OF_TEXT = 0,
+ CLASS_OUT_OF_BOUNDS = 1,
+ CLASS_DELETED_GLYPH = 2,
+ CLASS_END_OF_LINE = 3,
+ };
+
+ int new_state (unsigned int newState) const
+ { return Types::extended ? newState : ((int) newState - (int) stateArrayTable) / (int) nClasses; }
+
+ unsigned int get_class (hb_codepoint_t glyph_id, unsigned int num_glyphs) const
+ {
+ if (unlikely (glyph_id == DELETED_GLYPH)) return CLASS_DELETED_GLYPH;
+ return (this+classTable).get_class (glyph_id, num_glyphs, 1);
+ }
+
+ const Entry<Extra> *get_entries () const
+ { return (this+entryTable).arrayZ; }
+
+ const Entry<Extra> &get_entry (int state, unsigned int klass) const
+ {
+ if (unlikely (klass >= nClasses))
+ klass = StateTable<Types, Entry<Extra>>::CLASS_OUT_OF_BOUNDS;
+
+ const HBUSHORT *states = (this+stateArrayTable).arrayZ;
+ const Entry<Extra> *entries = (this+entryTable).arrayZ;
+
+ unsigned int entry = states[state * nClasses + klass];
+ DEBUG_MSG (APPLY, nullptr, "e%u", entry);
+
+ return entries[entry];
+ }
+
+ bool sanitize (hb_sanitize_context_t *c,
+ unsigned int *num_entries_out = nullptr) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!(c->check_struct (this) &&
+ nClasses >= 4 /* Ensure pre-defined classes fit. */ &&
+ classTable.sanitize (c, this)))) return_trace (false);
+
+ const HBUSHORT *states = (this+stateArrayTable).arrayZ;
+ const Entry<Extra> *entries = (this+entryTable).arrayZ;
+
+ unsigned int num_classes = nClasses;
+ if (unlikely (hb_unsigned_mul_overflows (num_classes, states[0].static_size)))
+ return_trace (false);
+ unsigned int row_stride = num_classes * states[0].static_size;
+
+ /* Apple 'kern' table has this peculiarity:
+ *
+ * "Because the stateTableOffset in the state table header is (strictly
+ * speaking) redundant, some 'kern' tables use it to record an initial
+ * state where that should not be StartOfText. To determine if this is
+ * done, calculate what the stateTableOffset should be. If it's different
+ * from the actual stateTableOffset, use it as the initial state."
+ *
+ * We implement this by calling the initial state zero, but allow *negative*
+ * states if the start state indeed was not the first state. Since the code
+ * is shared, this will also apply to 'mort' table. The 'kerx' / 'morx'
+ * tables are not affected since those address states by index, not offset.
+ */
+
+ int min_state = 0;
+ int max_state = 0;
+ unsigned int num_entries = 0;
+
+ int state_pos = 0;
+ int state_neg = 0;
+ unsigned int entry = 0;
+ while (min_state < state_neg || state_pos <= max_state)
+ {
+ if (min_state < state_neg)
+ {
+ /* Negative states. */
+ if (unlikely (hb_unsigned_mul_overflows (min_state, num_classes)))
+ return_trace (false);
+ if (unlikely (!c->check_range (&states[min_state * num_classes],
+ -min_state,
+ row_stride)))
+ return_trace (false);
+ if ((c->max_ops -= state_neg - min_state) <= 0)
+ return_trace (false);
+ { /* Sweep new states. */
+ const HBUSHORT *stop = &states[min_state * num_classes];
+ if (unlikely (stop > states))
+ return_trace (false);
+ for (const HBUSHORT *p = states; stop < p; p--)
+ num_entries = hb_max (num_entries, *(p - 1) + 1);
+ state_neg = min_state;
+ }
+ }
+
+ if (state_pos <= max_state)
+ {
+ /* Positive states. */
+ if (unlikely (!c->check_range (states,
+ max_state + 1,
+ row_stride)))
+ return_trace (false);
+ if ((c->max_ops -= max_state - state_pos + 1) <= 0)
+ return_trace (false);
+ { /* Sweep new states. */
+ if (unlikely (hb_unsigned_mul_overflows ((max_state + 1), num_classes)))
+ return_trace (false);
+ const HBUSHORT *stop = &states[(max_state + 1) * num_classes];
+ if (unlikely (stop < states))
+ return_trace (false);
+ for (const HBUSHORT *p = &states[state_pos * num_classes]; p < stop; p++)
+ num_entries = hb_max (num_entries, *p + 1);
+ state_pos = max_state + 1;
+ }
+ }
+
+ if (unlikely (!c->check_array (entries, num_entries)))
+ return_trace (false);
+ if ((c->max_ops -= num_entries - entry) <= 0)
+ return_trace (false);
+ { /* Sweep new entries. */
+ const Entry<Extra> *stop = &entries[num_entries];
+ for (const Entry<Extra> *p = &entries[entry]; p < stop; p++)
+ {
+ int newState = new_state (p->newState);
+ min_state = hb_min (min_state, newState);
+ max_state = hb_max (max_state, newState);
+ }
+ entry = num_entries;
+ }
+ }
+
+ if (num_entries_out)
+ *num_entries_out = num_entries;
+
+ return_trace (true);
+ }
+
+ protected:
+ HBUINT nClasses; /* Number of classes, which is the number of indices
+ * in a single line in the state array. */
+ NNOffsetTo<ClassType, HBUINT>
+ classTable; /* Offset to the class table. */
+ NNOffsetTo<UnsizedArrayOf<HBUSHORT>, HBUINT>
+ stateArrayTable;/* Offset to the state array. */
+ NNOffsetTo<UnsizedArrayOf<Entry<Extra>>, HBUINT>
+ entryTable; /* Offset to the entry array. */
+
+ public:
+ DEFINE_SIZE_STATIC (4 * sizeof (HBUINT));
+};
+
+template <typename HBUCHAR>
+struct ClassTable
+{
+ unsigned int get_class (hb_codepoint_t glyph_id, unsigned int outOfRange) const
+ {
+ unsigned int i = glyph_id - firstGlyph;
+ return i >= classArray.len ? outOfRange : classArray.arrayZ[i];
+ }
+ unsigned int get_class (hb_codepoint_t glyph_id,
+ unsigned int num_glyphs HB_UNUSED,
+ unsigned int outOfRange) const
+ {
+ return get_class (glyph_id, outOfRange);
+ }
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) && classArray.sanitize (c));
+ }
+ protected:
+ HBGlyphID firstGlyph; /* First glyph index included in the trimmed array. */
+ ArrayOf<HBUCHAR> classArray; /* The class codes (indexed by glyph index minus
+ * firstGlyph). */
+ public:
+ DEFINE_SIZE_ARRAY (4, classArray);
+};
+
+struct ObsoleteTypes
+{
+ static constexpr bool extended = false;
+ typedef HBUINT16 HBUINT;
+ typedef HBUINT8 HBUSHORT;
+ typedef ClassTable<HBUINT8> ClassTypeNarrow;
+ typedef ClassTable<HBUINT16> ClassTypeWide;
+
+ template <typename T>
+ static unsigned int offsetToIndex (unsigned int offset,
+ const void *base,
+ const T *array)
+ {
+ return (offset - ((const char *) array - (const char *) base)) / T::static_size;
+ }
+ template <typename T>
+ static unsigned int byteOffsetToIndex (unsigned int offset,
+ const void *base,
+ const T *array)
+ {
+ return offsetToIndex (offset, base, array);
+ }
+ template <typename T>
+ static unsigned int wordOffsetToIndex (unsigned int offset,
+ const void *base,
+ const T *array)
+ {
+ return offsetToIndex (2 * offset, base, array);
+ }
+};
+struct ExtendedTypes
+{
+ static constexpr bool extended = true;
+ typedef HBUINT32 HBUINT;
+ typedef HBUINT16 HBUSHORT;
+ typedef Lookup<HBUINT16> ClassTypeNarrow;
+ typedef Lookup<HBUINT16> ClassTypeWide;
+
+ template <typename T>
+ static unsigned int offsetToIndex (unsigned int offset,
+ const void *base HB_UNUSED,
+ const T *array HB_UNUSED)
+ {
+ return offset;
+ }
+ template <typename T>
+ static unsigned int byteOffsetToIndex (unsigned int offset,
+ const void *base HB_UNUSED,
+ const T *array HB_UNUSED)
+ {
+ return offset / 2;
+ }
+ template <typename T>
+ static unsigned int wordOffsetToIndex (unsigned int offset,
+ const void *base HB_UNUSED,
+ const T *array HB_UNUSED)
+ {
+ return offset;
+ }
+};
+
+template <typename Types, typename EntryData>
+struct StateTableDriver
+{
+ StateTableDriver (const StateTable<Types, EntryData> &machine_,
+ hb_buffer_t *buffer_,
+ hb_face_t *face_) :
+ machine (machine_),
+ buffer (buffer_),
+ num_glyphs (face_->get_num_glyphs ()) {}
+
+ template <typename context_t>
+ void drive (context_t *c)
+ {
+ if (!c->in_place)
+ buffer->clear_output ();
+
+ int state = StateTable<Types, EntryData>::STATE_START_OF_TEXT;
+ for (buffer->idx = 0; buffer->successful;)
+ {
+ unsigned int klass = buffer->idx < buffer->len ?
+ machine.get_class (buffer->info[buffer->idx].codepoint, num_glyphs) :
+ (unsigned) StateTable<Types, EntryData>::CLASS_END_OF_TEXT;
+ DEBUG_MSG (APPLY, nullptr, "c%u at %u", klass, buffer->idx);
+ const Entry<EntryData> &entry = machine.get_entry (state, klass);
+
+ /* Unsafe-to-break before this if not in state 0, as things might
+ * go differently if we start from state 0 here.
+ *
+ * Ugh. The indexing here is ugly... */
+ if (state && buffer->backtrack_len () && buffer->idx < buffer->len)
+ {
+ /* If there's no action and we're just epsilon-transitioning to state 0,
+ * safe to break. */
+ if (c->is_actionable (this, entry) ||
+ !(entry.newState == StateTable<Types, EntryData>::STATE_START_OF_TEXT &&
+ entry.flags == context_t::DontAdvance))
+ buffer->unsafe_to_break_from_outbuffer (buffer->backtrack_len () - 1, buffer->idx + 1);
+ }
+
+ /* Unsafe-to-break if end-of-text would kick in here. */
+ if (buffer->idx + 2 <= buffer->len)
+ {
+ const Entry<EntryData> &end_entry = machine.get_entry (state, StateTable<Types, EntryData>::CLASS_END_OF_TEXT);
+ if (c->is_actionable (this, end_entry))
+ buffer->unsafe_to_break (buffer->idx, buffer->idx + 2);
+ }
+
+ c->transition (this, entry);
+
+ state = machine.new_state (entry.newState);
+ DEBUG_MSG (APPLY, nullptr, "s%d", state);
+
+ if (buffer->idx == buffer->len)
+ break;
+
+ if (!(entry.flags & context_t::DontAdvance) || buffer->max_ops-- <= 0)
+ buffer->next_glyph ();
+ }
+
+ if (!c->in_place)
+ {
+ for (; buffer->successful && buffer->idx < buffer->len;)
+ buffer->next_glyph ();
+ buffer->swap_buffers ();
+ }
+ }
+
+ public:
+ const StateTable<Types, EntryData> &machine;
+ hb_buffer_t *buffer;
+ unsigned int num_glyphs;
+};
+
+
+struct ankr;
+
+struct hb_aat_apply_context_t :
+ hb_dispatch_context_t<hb_aat_apply_context_t, bool, HB_DEBUG_APPLY>
+{
+ const char *get_name () { return "APPLY"; }
+ template <typename T>
+ return_t dispatch (const T &obj) { return obj.apply (this); }
+ static return_t default_return_value () { return false; }
+ bool stop_sublookup_iteration (return_t r) const { return r; }
+
+ const hb_ot_shape_plan_t *plan;
+ hb_font_t *font;
+ hb_face_t *face;
+ hb_buffer_t *buffer;
+ hb_sanitize_context_t sanitizer;
+ const ankr *ankr_table;
+
+ /* Unused. For debug tracing only. */
+ unsigned int lookup_index;
+
+ HB_INTERNAL hb_aat_apply_context_t (const hb_ot_shape_plan_t *plan_,
+ hb_font_t *font_,
+ hb_buffer_t *buffer_,
+ hb_blob_t *blob = const_cast<hb_blob_t *> (&Null (hb_blob_t)));
+
+ HB_INTERNAL ~hb_aat_apply_context_t ();
+
+ HB_INTERNAL void set_ankr_table (const AAT::ankr *ankr_table_);
+
+ void set_lookup_index (unsigned int i) { lookup_index = i; }
+};
+
+
+} /* namespace AAT */
+
+
+#endif /* HB_AAT_LAYOUT_COMMON_HH */
diff --git a/thirdparty/harfbuzz/src/hb-aat-layout-feat-table.hh b/thirdparty/harfbuzz/src/hb-aat-layout-feat-table.hh
new file mode 100644
index 0000000000..359e859cfc
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-aat-layout-feat-table.hh
@@ -0,0 +1,222 @@
+/*
+ * Copyright © 2018 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#ifndef HB_AAT_LAYOUT_FEAT_TABLE_HH
+#define HB_AAT_LAYOUT_FEAT_TABLE_HH
+
+#include "hb-aat-layout-common.hh"
+
+/*
+ * feat -- Feature Name
+ * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6feat.html
+ */
+#define HB_AAT_TAG_feat HB_TAG('f','e','a','t')
+
+
+namespace AAT {
+
+
+struct SettingName
+{
+ friend struct FeatureName;
+
+ int cmp (hb_aat_layout_feature_selector_t key) const
+ { return (int) key - (int) setting; }
+
+ hb_aat_layout_feature_selector_t get_selector () const
+ { return (hb_aat_layout_feature_selector_t) (unsigned) setting; }
+
+ hb_aat_layout_feature_selector_info_t get_info (hb_aat_layout_feature_selector_t default_selector) const
+ {
+ return {
+ nameIndex,
+ (hb_aat_layout_feature_selector_t) (unsigned int) setting,
+ default_selector == HB_AAT_LAYOUT_FEATURE_SELECTOR_INVALID
+ ? (hb_aat_layout_feature_selector_t) (setting + 1)
+ : default_selector,
+ 0
+ };
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this)));
+ }
+
+ protected:
+ HBUINT16 setting; /* The setting. */
+ NameID nameIndex; /* The name table index for the setting's name. */
+ public:
+ DEFINE_SIZE_STATIC (4);
+};
+DECLARE_NULL_NAMESPACE_BYTES (AAT, SettingName);
+
+struct feat;
+
+struct FeatureName
+{
+ int cmp (hb_aat_layout_feature_type_t key) const
+ { return (int) key - (int) feature; }
+
+ enum {
+ Exclusive = 0x8000, /* If set, the feature settings are mutually exclusive. */
+ NotDefault = 0x4000, /* If clear, then the setting with an index of 0 in
+ * the setting name array for this feature should
+ * be taken as the default for the feature
+ * (if one is required). If set, then bits 0-15 of this
+ * featureFlags field contain the index of the setting
+ * which is to be taken as the default. */
+ IndexMask = 0x00FF /* If bits 30 and 31 are set, then these sixteen bits
+ * indicate the index of the setting in the setting name
+ * array for this feature which should be taken
+ * as the default. */
+ };
+
+ unsigned int get_selector_infos (unsigned int start_offset,
+ unsigned int *selectors_count, /* IN/OUT. May be NULL. */
+ hb_aat_layout_feature_selector_info_t *selectors, /* OUT. May be NULL. */
+ unsigned int *pdefault_index, /* OUT. May be NULL. */
+ const void *base) const
+ {
+ hb_array_t< const SettingName> settings_table = (base+settingTableZ).as_array (nSettings);
+
+ static_assert (Index::NOT_FOUND_INDEX == HB_AAT_LAYOUT_NO_SELECTOR_INDEX, "");
+
+ hb_aat_layout_feature_selector_t default_selector = HB_AAT_LAYOUT_FEATURE_SELECTOR_INVALID;
+ unsigned int default_index = Index::NOT_FOUND_INDEX;
+ if (featureFlags & Exclusive)
+ {
+ default_index = (featureFlags & NotDefault) ? featureFlags & IndexMask : 0;
+ default_selector = settings_table[default_index].get_selector ();
+ }
+ if (pdefault_index)
+ *pdefault_index = default_index;
+
+ if (selectors_count)
+ {
+ + settings_table.sub_array (start_offset, selectors_count)
+ | hb_map ([=] (const SettingName& setting) { return setting.get_info (default_selector); })
+ | hb_sink (hb_array (selectors, *selectors_count))
+ ;
+ }
+ return settings_table.length;
+ }
+
+ hb_aat_layout_feature_type_t get_feature_type () const
+ { return (hb_aat_layout_feature_type_t) (unsigned int) feature; }
+
+ hb_ot_name_id_t get_feature_name_id () const { return nameIndex; }
+
+ bool is_exclusive () const { return featureFlags & Exclusive; }
+
+ /* A FeatureName with no settings is meaningless */
+ bool has_data () const { return nSettings; }
+
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ (base+settingTableZ).sanitize (c, nSettings)));
+ }
+
+ protected:
+ HBUINT16 feature; /* Feature type. */
+ HBUINT16 nSettings; /* The number of records in the setting name array. */
+ LNNOffsetTo<UnsizedArrayOf<SettingName>>
+ settingTableZ; /* Offset in bytes from the beginning of this table to
+ * this feature's setting name array. The actual type of
+ * record this offset refers to will depend on the
+ * exclusivity value, as described below. */
+ HBUINT16 featureFlags; /* Single-bit flags associated with the feature type. */
+ HBINT16 nameIndex; /* The name table index for the feature's name.
+ * This index has values greater than 255 and
+ * less than 32768. */
+ public:
+ DEFINE_SIZE_STATIC (12);
+};
+
+struct feat
+{
+ static constexpr hb_tag_t tableTag = HB_AAT_TAG_feat;
+
+ bool has_data () const { return version.to_int (); }
+
+ unsigned int get_feature_types (unsigned int start_offset,
+ unsigned int *count,
+ hb_aat_layout_feature_type_t *features) const
+ {
+ if (count)
+ {
+ + namesZ.as_array (featureNameCount).sub_array (start_offset, count)
+ | hb_map (&FeatureName::get_feature_type)
+ | hb_sink (hb_array (features, *count))
+ ;
+ }
+ return featureNameCount;
+ }
+
+ bool exposes_feature (hb_aat_layout_feature_type_t feature_type) const
+ { return get_feature (feature_type).has_data (); }
+
+ const FeatureName& get_feature (hb_aat_layout_feature_type_t feature_type) const
+ { return namesZ.bsearch (featureNameCount, feature_type); }
+
+ hb_ot_name_id_t get_feature_name_id (hb_aat_layout_feature_type_t feature) const
+ { return get_feature (feature).get_feature_name_id (); }
+
+ unsigned int get_selector_infos (hb_aat_layout_feature_type_t feature_type,
+ unsigned int start_offset,
+ unsigned int *selectors_count, /* IN/OUT. May be NULL. */
+ hb_aat_layout_feature_selector_info_t *selectors, /* OUT. May be NULL. */
+ unsigned int *default_index /* OUT. May be NULL. */) const
+ {
+ return get_feature (feature_type).get_selector_infos (start_offset, selectors_count, selectors,
+ default_index, this);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ version.major == 1 &&
+ namesZ.sanitize (c, featureNameCount, this)));
+ }
+
+ protected:
+ FixedVersion<>version; /* Version number of the feature name table
+ * (0x00010000 for the current version). */
+ HBUINT16 featureNameCount;
+ /* The number of entries in the feature name array. */
+ HBUINT16 reserved1; /* Reserved (set to zero). */
+ HBUINT32 reserved2; /* Reserved (set to zero). */
+ SortedUnsizedArrayOf<FeatureName>
+ namesZ; /* The feature name array. */
+ public:
+ DEFINE_SIZE_ARRAY (12, namesZ);
+};
+
+} /* namespace AAT */
+
+#endif /* HB_AAT_LAYOUT_FEAT_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-aat-layout-just-table.hh b/thirdparty/harfbuzz/src/hb-aat-layout-just-table.hh
new file mode 100644
index 0000000000..49506e9f5a
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-aat-layout-just-table.hh
@@ -0,0 +1,417 @@
+/*
+ * Copyright © 2018 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#ifndef HB_AAT_LAYOUT_JUST_TABLE_HH
+#define HB_AAT_LAYOUT_JUST_TABLE_HH
+
+#include "hb-aat-layout-common.hh"
+#include "hb-ot-layout.hh"
+#include "hb-open-type.hh"
+
+#include "hb-aat-layout-morx-table.hh"
+
+/*
+ * just -- Justification
+ * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6just.html
+ */
+#define HB_AAT_TAG_just HB_TAG('j','u','s','t')
+
+
+namespace AAT {
+
+using namespace OT;
+
+
+struct ActionSubrecordHeader
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this)));
+ }
+
+ HBUINT16 actionClass; /* The JustClass value associated with this
+ * ActionSubrecord. */
+ HBUINT16 actionType; /* The type of postcompensation action. */
+ HBUINT16 actionLength; /* Length of this ActionSubrecord record, which
+ * must be a multiple of 4. */
+ public:
+ DEFINE_SIZE_STATIC (6);
+};
+
+struct DecompositionAction
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this)));
+ }
+
+ ActionSubrecordHeader
+ header;
+ HBFixed lowerLimit; /* If the distance factor is less than this value,
+ * then the ligature is decomposed. */
+ HBFixed upperLimit; /* If the distance factor is greater than this value,
+ * then the ligature is decomposed. */
+ HBUINT16 order; /* Numerical order in which this ligature will
+ * be decomposed; you may want infrequent ligatures
+ * to decompose before more frequent ones. The ligatures
+ * on the line of text will decompose in increasing
+ * value of this field. */
+ ArrayOf<HBUINT16>
+ decomposedglyphs;
+ /* Number of 16-bit glyph indexes that follow;
+ * the ligature will be decomposed into these glyphs.
+ *
+ * Array of decomposed glyphs. */
+ public:
+ DEFINE_SIZE_ARRAY (18, decomposedglyphs);
+};
+
+struct UnconditionalAddGlyphAction
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ protected:
+ ActionSubrecordHeader
+ header;
+ HBGlyphID addGlyph; /* Glyph that should be added if the distance factor
+ * is growing. */
+
+ public:
+ DEFINE_SIZE_STATIC (8);
+};
+
+struct ConditionalAddGlyphAction
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this)));
+ }
+
+ protected:
+ ActionSubrecordHeader
+ header;
+ HBFixed substThreshold; /* Distance growth factor (in ems) at which
+ * this glyph is replaced and the growth factor
+ * recalculated. */
+ HBGlyphID addGlyph; /* Glyph to be added as kashida. If this value is
+ * 0xFFFF, no extra glyph will be added. Note that
+ * generally when a glyph is added, justification
+ * will need to be redone. */
+ HBGlyphID substGlyph; /* Glyph to be substituted for this glyph if the
+ * growth factor equals or exceeds the value of
+ * substThreshold. */
+ public:
+ DEFINE_SIZE_STATIC (14);
+};
+
+struct DuctileGlyphAction
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this)));
+ }
+
+ protected:
+ ActionSubrecordHeader
+ header;
+ HBUINT32 variationAxis; /* The 4-byte tag identifying the ductile axis.
+ * This would normally be 0x64756374 ('duct'),
+ * but you may use any axis the font contains. */
+ HBFixed minimumLimit; /* The lowest value for the ductility axis tha
+ * still yields an acceptable appearance. Normally
+ * this will be 1.0. */
+ HBFixed noStretchValue; /* This is the default value that corresponds to
+ * no change in appearance. Normally, this will
+ * be 1.0. */
+ HBFixed maximumLimit; /* The highest value for the ductility axis that
+ * still yields an acceptable appearance. */
+ public:
+ DEFINE_SIZE_STATIC (22);
+};
+
+struct RepeatedAddGlyphAction
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this)));
+ }
+
+ protected:
+ ActionSubrecordHeader
+ header;
+ HBUINT16 flags; /* Currently unused; set to 0. */
+ HBGlyphID glyph; /* Glyph that should be added if the distance factor
+ * is growing. */
+ public:
+ DEFINE_SIZE_STATIC (10);
+};
+
+struct ActionSubrecord
+{
+ unsigned int get_length () const { return u.header.actionLength; }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!c->check_struct (this)))
+ return_trace (false);
+
+ switch (u.header.actionType)
+ {
+ case 0: return_trace (u.decompositionAction.sanitize (c));
+ case 1: return_trace (u.unconditionalAddGlyphAction.sanitize (c));
+ case 2: return_trace (u.conditionalAddGlyphAction.sanitize (c));
+ // case 3: return_trace (u.stretchGlyphAction.sanitize (c));
+ case 4: return_trace (u.decompositionAction.sanitize (c));
+ case 5: return_trace (u.decompositionAction.sanitize (c));
+ default: return_trace (true);
+ }
+ }
+
+ protected:
+ union {
+ ActionSubrecordHeader header;
+ DecompositionAction decompositionAction;
+ UnconditionalAddGlyphAction unconditionalAddGlyphAction;
+ ConditionalAddGlyphAction conditionalAddGlyphAction;
+ /* StretchGlyphAction stretchGlyphAction; -- Not supported by CoreText */
+ DuctileGlyphAction ductileGlyphAction;
+ RepeatedAddGlyphAction repeatedAddGlyphAction;
+ } u; /* Data. The format of this data depends on
+ * the value of the actionType field. */
+ public:
+ DEFINE_SIZE_UNION (6, header);
+};
+
+struct PostcompensationActionChain
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!c->check_struct (this)))
+ return_trace (false);
+
+ unsigned int offset = min_size;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ const ActionSubrecord& subrecord = StructAtOffset<ActionSubrecord> (this, offset);
+ if (unlikely (!subrecord.sanitize (c))) return_trace (false);
+ offset += subrecord.get_length ();
+ }
+
+ return_trace (true);
+ }
+
+ protected:
+ HBUINT32 count;
+
+ public:
+ DEFINE_SIZE_STATIC (4);
+};
+
+struct JustWidthDeltaEntry
+{
+ enum Flags
+ {
+ Reserved1 =0xE000,/* Reserved. You should set these bits to zero. */
+ UnlimiteGap =0x1000,/* The glyph can take unlimited gap. When this
+ * glyph participates in the justification process,
+ * it and any other glyphs on the line having this
+ * bit set absorb all the remaining gap. */
+ Reserved2 =0x0FF0,/* Reserved. You should set these bits to zero. */
+ Priority =0x000F /* The justification priority of the glyph. */
+ };
+
+ enum Priority
+ {
+ Kashida = 0, /* Kashida priority. This is the highest priority
+ * during justification. */
+ Whitespace = 1, /* Whitespace priority. Any whitespace glyphs (as
+ * identified in the glyph properties table) will
+ * get this priority. */
+ InterCharacter = 2, /* Inter-character priority. Give this to any
+ * remaining glyphs. */
+ NullPriority = 3 /* Null priority. You should set this priority for
+ * glyphs that only participate in justification
+ * after the above priorities. Normally all glyphs
+ * have one of the previous three values. If you
+ * don't want a glyph to participate in justification,
+ * and you don't want to set its factors to zero,
+ * you may instead assign it to the null priority. */
+ };
+
+ protected:
+ HBFixed beforeGrowLimit;/* The ratio by which the advance width of the
+ * glyph is permitted to grow on the left or top side. */
+ HBFixed beforeShrinkLimit;
+ /* The ratio by which the advance width of the
+ * glyph is permitted to shrink on the left or top side. */
+ HBFixed afterGrowLimit; /* The ratio by which the advance width of the glyph
+ * is permitted to shrink on the left or top side. */
+ HBFixed afterShrinkLimit;
+ /* The ratio by which the advance width of the glyph
+ * is at most permitted to shrink on the right or
+ * bottom side. */
+ HBUINT16 growFlags; /* Flags controlling the grow case. */
+ HBUINT16 shrinkFlags; /* Flags controlling the shrink case. */
+
+ public:
+ DEFINE_SIZE_STATIC (20);
+};
+
+struct WidthDeltaPair
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this)));
+ }
+
+ protected:
+ HBUINT32 justClass; /* The justification category associated
+ * with the wdRecord field. Only 7 bits of
+ * this field are used. (The other bits are
+ * used as padding to guarantee longword
+ * alignment of the following record). */
+ JustWidthDeltaEntry
+ wdRecord; /* The actual width delta record. */
+
+ public:
+ DEFINE_SIZE_STATIC (24);
+};
+
+typedef OT::LArrayOf<WidthDeltaPair> WidthDeltaCluster;
+
+struct JustificationCategory
+{
+ typedef void EntryData;
+
+ enum Flags
+ {
+ SetMark =0x8000,/* If set, make the current glyph the marked
+ * glyph. */
+ DontAdvance =0x4000,/* If set, don't advance to the next glyph before
+ * going to the new state. */
+ MarkCategory =0x3F80,/* The justification category for the marked
+ * glyph if nonzero. */
+ CurrentCategory =0x007F /* The justification category for the current
+ * glyph if nonzero. */
+ };
+
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ morphHeader.sanitize (c) &&
+ stHeader.sanitize (c)));
+ }
+
+ protected:
+ ChainSubtable<ObsoleteTypes>
+ morphHeader; /* Metamorphosis-style subtable header. */
+ StateTable<ObsoleteTypes, EntryData>
+ stHeader; /* The justification insertion state table header */
+ public:
+ DEFINE_SIZE_STATIC (30);
+};
+
+struct JustificationHeader
+{
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ justClassTable.sanitize (c, base, base) &&
+ wdcTable.sanitize (c, base) &&
+ pcTable.sanitize (c, base) &&
+ lookupTable.sanitize (c, base)));
+ }
+
+ protected:
+ OffsetTo<JustificationCategory>
+ justClassTable; /* Offset to the justification category state table. */
+ OffsetTo<WidthDeltaCluster>
+ wdcTable; /* Offset from start of justification table to start
+ * of the subtable containing the width delta factors
+ * for the glyphs in your font.
+ *
+ * The width delta clusters table. */
+ OffsetTo<PostcompensationActionChain>
+ pcTable; /* Offset from start of justification table to start
+ * of postcompensation subtable (set to zero if none).
+ *
+ * The postcompensation subtable, if present in the font. */
+ Lookup<OffsetTo<WidthDeltaCluster>>
+ lookupTable; /* Lookup table associating glyphs with width delta
+ * clusters. See the description of Width Delta Clusters
+ * table for details on how to interpret the lookup values. */
+
+ public:
+ DEFINE_SIZE_MIN (8);
+};
+
+struct just
+{
+ static constexpr hb_tag_t tableTag = HB_AAT_TAG_just;
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+
+ return_trace (likely (c->check_struct (this) &&
+ version.major == 1 &&
+ horizData.sanitize (c, this, this) &&
+ vertData.sanitize (c, this, this)));
+ }
+
+ protected:
+ FixedVersion<>version; /* Version of the justification table
+ * (0x00010000u for version 1.0). */
+ HBUINT16 format; /* Format of the justification table (set to 0). */
+ OffsetTo<JustificationHeader>
+ horizData; /* Byte offset from the start of the justification table
+ * to the header for tables that contain justification
+ * information for horizontal text.
+ * If you are not including this information,
+ * store 0. */
+ OffsetTo<JustificationHeader>
+ vertData; /* ditto, vertical */
+
+ public:
+ DEFINE_SIZE_STATIC (10);
+};
+
+} /* namespace AAT */
+
+
+#endif /* HB_AAT_LAYOUT_JUST_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-aat-layout-kerx-table.hh b/thirdparty/harfbuzz/src/hb-aat-layout-kerx-table.hh
new file mode 100644
index 0000000000..1cd412164e
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-aat-layout-kerx-table.hh
@@ -0,0 +1,999 @@
+/*
+ * Copyright © 2018 Ebrahim Byagowi
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_AAT_LAYOUT_KERX_TABLE_HH
+#define HB_AAT_LAYOUT_KERX_TABLE_HH
+
+#include "hb-kern.hh"
+#include "hb-aat-layout-ankr-table.hh"
+
+/*
+ * kerx -- Extended Kerning
+ * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6kerx.html
+ */
+#define HB_AAT_TAG_kerx HB_TAG('k','e','r','x')
+
+
+namespace AAT {
+
+using namespace OT;
+
+
+static inline int
+kerxTupleKern (int value,
+ unsigned int tupleCount,
+ const void *base,
+ hb_aat_apply_context_t *c)
+{
+ if (likely (!tupleCount || !c)) return value;
+
+ unsigned int offset = value;
+ const FWORD *pv = &StructAtOffset<FWORD> (base, offset);
+ if (unlikely (!c->sanitizer.check_array (pv, tupleCount))) return 0;
+ return *pv;
+}
+
+
+struct hb_glyph_pair_t
+{
+ hb_codepoint_t left;
+ hb_codepoint_t right;
+};
+
+struct KernPair
+{
+ int get_kerning () const { return value; }
+
+ int cmp (const hb_glyph_pair_t &o) const
+ {
+ int ret = left.cmp (o.left);
+ if (ret) return ret;
+ return right.cmp (o.right);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ protected:
+ HBGlyphID left;
+ HBGlyphID right;
+ FWORD value;
+ public:
+ DEFINE_SIZE_STATIC (6);
+};
+
+template <typename KernSubTableHeader>
+struct KerxSubTableFormat0
+{
+ int get_kerning (hb_codepoint_t left, hb_codepoint_t right,
+ hb_aat_apply_context_t *c = nullptr) const
+ {
+ hb_glyph_pair_t pair = {left, right};
+ int v = pairs.bsearch (pair).get_kerning ();
+ return kerxTupleKern (v, header.tuple_count (), this, c);
+ }
+
+ bool apply (hb_aat_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+
+ if (!c->plan->requested_kerning)
+ return false;
+
+ if (header.coverage & header.Backwards)
+ return false;
+
+ accelerator_t accel (*this, c);
+ hb_kern_machine_t<accelerator_t> machine (accel, header.coverage & header.CrossStream);
+ machine.kern (c->font, c->buffer, c->plan->kern_mask);
+
+ return_trace (true);
+ }
+
+ struct accelerator_t
+ {
+ const KerxSubTableFormat0 &table;
+ hb_aat_apply_context_t *c;
+
+ accelerator_t (const KerxSubTableFormat0 &table_,
+ hb_aat_apply_context_t *c_) :
+ table (table_), c (c_) {}
+
+ int get_kerning (hb_codepoint_t left, hb_codepoint_t right) const
+ { return table.get_kerning (left, right, c); }
+ };
+
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (pairs.sanitize (c)));
+ }
+
+ protected:
+ KernSubTableHeader header;
+ BinSearchArrayOf<KernPair, typename KernSubTableHeader::Types::HBUINT>
+ pairs; /* Sorted kern records. */
+ public:
+ DEFINE_SIZE_ARRAY (KernSubTableHeader::static_size + 16, pairs);
+};
+
+
+template <bool extended>
+struct Format1Entry;
+
+template <>
+struct Format1Entry<true>
+{
+ enum Flags
+ {
+ Push = 0x8000, /* If set, push this glyph on the kerning stack. */
+ DontAdvance = 0x4000, /* If set, don't advance to the next glyph
+ * before going to the new state. */
+ Reset = 0x2000, /* If set, reset the kerning data (clear the stack) */
+ Reserved = 0x1FFF, /* Not used; set to 0. */
+ };
+
+ struct EntryData
+ {
+ HBUINT16 kernActionIndex;/* Index into the kerning value array. If
+ * this index is 0xFFFF, then no kerning
+ * is to be performed. */
+ public:
+ DEFINE_SIZE_STATIC (2);
+ };
+
+ static bool performAction (const Entry<EntryData> &entry)
+ { return entry.data.kernActionIndex != 0xFFFF; }
+
+ static unsigned int kernActionIndex (const Entry<EntryData> &entry)
+ { return entry.data.kernActionIndex; }
+};
+template <>
+struct Format1Entry<false>
+{
+ enum Flags
+ {
+ Push = 0x8000, /* If set, push this glyph on the kerning stack. */
+ DontAdvance = 0x4000, /* If set, don't advance to the next glyph
+ * before going to the new state. */
+ Offset = 0x3FFF, /* Byte offset from beginning of subtable to the
+ * value table for the glyphs on the kerning stack. */
+
+ Reset = 0x0000, /* Not supported? */
+ };
+
+ typedef void EntryData;
+
+ static bool performAction (const Entry<EntryData> &entry)
+ { return entry.flags & Offset; }
+
+ static unsigned int kernActionIndex (const Entry<EntryData> &entry)
+ { return entry.flags & Offset; }
+};
+
+template <typename KernSubTableHeader>
+struct KerxSubTableFormat1
+{
+ typedef typename KernSubTableHeader::Types Types;
+ typedef typename Types::HBUINT HBUINT;
+
+ typedef Format1Entry<Types::extended> Format1EntryT;
+ typedef typename Format1EntryT::EntryData EntryData;
+
+ struct driver_context_t
+ {
+ static constexpr bool in_place = true;
+ enum
+ {
+ DontAdvance = Format1EntryT::DontAdvance,
+ };
+
+ driver_context_t (const KerxSubTableFormat1 *table_,
+ hb_aat_apply_context_t *c_) :
+ c (c_),
+ table (table_),
+ /* Apparently the offset kernAction is from the beginning of the state-machine,
+ * similar to offsets in morx table, NOT from beginning of this table, like
+ * other subtables in kerx. Discovered via testing. */
+ kernAction (&table->machine + table->kernAction),
+ depth (0),
+ crossStream (table->header.coverage & table->header.CrossStream) {}
+
+ bool is_actionable (StateTableDriver<Types, EntryData> *driver HB_UNUSED,
+ const Entry<EntryData> &entry)
+ { return Format1EntryT::performAction (entry); }
+ void transition (StateTableDriver<Types, EntryData> *driver,
+ const Entry<EntryData> &entry)
+ {
+ hb_buffer_t *buffer = driver->buffer;
+ unsigned int flags = entry.flags;
+
+ if (flags & Format1EntryT::Reset)
+ depth = 0;
+
+ if (flags & Format1EntryT::Push)
+ {
+ if (likely (depth < ARRAY_LENGTH (stack)))
+ stack[depth++] = buffer->idx;
+ else
+ depth = 0; /* Probably not what CoreText does, but better? */
+ }
+
+ if (Format1EntryT::performAction (entry) && depth)
+ {
+ unsigned int tuple_count = hb_max (1u, table->header.tuple_count ());
+
+ unsigned int kern_idx = Format1EntryT::kernActionIndex (entry);
+ kern_idx = Types::byteOffsetToIndex (kern_idx, &table->machine, kernAction.arrayZ);
+ const FWORD *actions = &kernAction[kern_idx];
+ if (!c->sanitizer.check_array (actions, depth, tuple_count))
+ {
+ depth = 0;
+ return;
+ }
+
+ hb_mask_t kern_mask = c->plan->kern_mask;
+
+ /* From Apple 'kern' spec:
+ * "Each pops one glyph from the kerning stack and applies the kerning value to it.
+ * The end of the list is marked by an odd value... */
+ bool last = false;
+ while (!last && depth)
+ {
+ unsigned int idx = stack[--depth];
+ int v = *actions;
+ actions += tuple_count;
+ if (idx >= buffer->len) continue;
+
+ /* "The end of the list is marked by an odd value..." */
+ last = v & 1;
+ v &= ~1;
+
+ hb_glyph_position_t &o = buffer->pos[idx];
+
+ if (HB_DIRECTION_IS_HORIZONTAL (buffer->props.direction))
+ {
+ if (crossStream)
+ {
+ /* The following flag is undocumented in the spec, but described
+ * in the 'kern' table example. */
+ if (v == -0x8000)
+ {
+ o.attach_type() = ATTACH_TYPE_NONE;
+ o.attach_chain() = 0;
+ o.y_offset = 0;
+ }
+ else if (o.attach_type())
+ {
+ o.y_offset += c->font->em_scale_y (v);
+ buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT;
+ }
+ }
+ else if (buffer->info[idx].mask & kern_mask)
+ {
+ o.x_advance += c->font->em_scale_x (v);
+ o.x_offset += c->font->em_scale_x (v);
+ }
+ }
+ else
+ {
+ if (crossStream)
+ {
+ /* CoreText doesn't do crossStream kerning in vertical. We do. */
+ if (v == -0x8000)
+ {
+ o.attach_type() = ATTACH_TYPE_NONE;
+ o.attach_chain() = 0;
+ o.x_offset = 0;
+ }
+ else if (o.attach_type())
+ {
+ o.x_offset += c->font->em_scale_x (v);
+ buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT;
+ }
+ }
+ else if (buffer->info[idx].mask & kern_mask)
+ {
+ o.y_advance += c->font->em_scale_y (v);
+ o.y_offset += c->font->em_scale_y (v);
+ }
+ }
+ }
+ }
+ }
+
+ private:
+ hb_aat_apply_context_t *c;
+ const KerxSubTableFormat1 *table;
+ const UnsizedArrayOf<FWORD> &kernAction;
+ unsigned int stack[8];
+ unsigned int depth;
+ bool crossStream;
+ };
+
+ bool apply (hb_aat_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+
+ if (!c->plan->requested_kerning &&
+ !(header.coverage & header.CrossStream))
+ return false;
+
+ driver_context_t dc (this, c);
+
+ StateTableDriver<Types, EntryData> driver (machine, c->buffer, c->font->face);
+ driver.drive (&dc);
+
+ return_trace (true);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ /* The rest of array sanitizations are done at run-time. */
+ return_trace (likely (c->check_struct (this) &&
+ machine.sanitize (c)));
+ }
+
+ protected:
+ KernSubTableHeader header;
+ StateTable<Types, EntryData> machine;
+ NNOffsetTo<UnsizedArrayOf<FWORD>, HBUINT> kernAction;
+ public:
+ DEFINE_SIZE_STATIC (KernSubTableHeader::static_size + 5 * sizeof (HBUINT));
+};
+
+template <typename KernSubTableHeader>
+struct KerxSubTableFormat2
+{
+ typedef typename KernSubTableHeader::Types Types;
+ typedef typename Types::HBUINT HBUINT;
+
+ int get_kerning (hb_codepoint_t left, hb_codepoint_t right,
+ hb_aat_apply_context_t *c) const
+ {
+ unsigned int num_glyphs = c->sanitizer.get_num_glyphs ();
+ unsigned int l = (this+leftClassTable).get_class (left, num_glyphs, 0);
+ unsigned int r = (this+rightClassTable).get_class (right, num_glyphs, 0);
+
+ const UnsizedArrayOf<FWORD> &arrayZ = this+array;
+ unsigned int kern_idx = l + r;
+ kern_idx = Types::offsetToIndex (kern_idx, this, arrayZ.arrayZ);
+ const FWORD *v = &arrayZ[kern_idx];
+ if (unlikely (!v->sanitize (&c->sanitizer))) return 0;
+
+ return kerxTupleKern (*v, header.tuple_count (), this, c);
+ }
+
+ bool apply (hb_aat_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+
+ if (!c->plan->requested_kerning)
+ return false;
+
+ if (header.coverage & header.Backwards)
+ return false;
+
+ accelerator_t accel (*this, c);
+ hb_kern_machine_t<accelerator_t> machine (accel, header.coverage & header.CrossStream);
+ machine.kern (c->font, c->buffer, c->plan->kern_mask);
+
+ return_trace (true);
+ }
+
+ struct accelerator_t
+ {
+ const KerxSubTableFormat2 &table;
+ hb_aat_apply_context_t *c;
+
+ accelerator_t (const KerxSubTableFormat2 &table_,
+ hb_aat_apply_context_t *c_) :
+ table (table_), c (c_) {}
+
+ int get_kerning (hb_codepoint_t left, hb_codepoint_t right) const
+ { return table.get_kerning (left, right, c); }
+ };
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ leftClassTable.sanitize (c, this) &&
+ rightClassTable.sanitize (c, this) &&
+ c->check_range (this, array)));
+ }
+
+ protected:
+ KernSubTableHeader header;
+ HBUINT rowWidth; /* The width, in bytes, of a row in the table. */
+ NNOffsetTo<typename Types::ClassTypeWide, HBUINT>
+ leftClassTable; /* Offset from beginning of this subtable to
+ * left-hand class table. */
+ NNOffsetTo<typename Types::ClassTypeWide, HBUINT>
+ rightClassTable;/* Offset from beginning of this subtable to
+ * right-hand class table. */
+ NNOffsetTo<UnsizedArrayOf<FWORD>, HBUINT>
+ array; /* Offset from beginning of this subtable to
+ * the start of the kerning array. */
+ public:
+ DEFINE_SIZE_STATIC (KernSubTableHeader::static_size + 4 * sizeof (HBUINT));
+};
+
+template <typename KernSubTableHeader>
+struct KerxSubTableFormat4
+{
+ typedef ExtendedTypes Types;
+
+ struct EntryData
+ {
+ HBUINT16 ankrActionIndex;/* Either 0xFFFF (for no action) or the index of
+ * the action to perform. */
+ public:
+ DEFINE_SIZE_STATIC (2);
+ };
+
+ struct driver_context_t
+ {
+ static constexpr bool in_place = true;
+ enum Flags
+ {
+ Mark = 0x8000, /* If set, remember this glyph as the marked glyph. */
+ DontAdvance = 0x4000, /* If set, don't advance to the next glyph before
+ * going to the new state. */
+ Reserved = 0x3FFF, /* Not used; set to 0. */
+ };
+
+ enum SubTableFlags
+ {
+ ActionType = 0xC0000000, /* A two-bit field containing the action type. */
+ Unused = 0x3F000000, /* Unused - must be zero. */
+ Offset = 0x00FFFFFF, /* Masks the offset in bytes from the beginning
+ * of the subtable to the beginning of the control
+ * point table. */
+ };
+
+ driver_context_t (const KerxSubTableFormat4 *table,
+ hb_aat_apply_context_t *c_) :
+ c (c_),
+ action_type ((table->flags & ActionType) >> 30),
+ ankrData ((HBUINT16 *) ((const char *) &table->machine + (table->flags & Offset))),
+ mark_set (false),
+ mark (0) {}
+
+ bool is_actionable (StateTableDriver<Types, EntryData> *driver HB_UNUSED,
+ const Entry<EntryData> &entry)
+ { return entry.data.ankrActionIndex != 0xFFFF; }
+ void transition (StateTableDriver<Types, EntryData> *driver,
+ const Entry<EntryData> &entry)
+ {
+ hb_buffer_t *buffer = driver->buffer;
+
+ if (mark_set && entry.data.ankrActionIndex != 0xFFFF && buffer->idx < buffer->len)
+ {
+ hb_glyph_position_t &o = buffer->cur_pos();
+ switch (action_type)
+ {
+ case 0: /* Control Point Actions.*/
+ {
+ /* Indexed into glyph outline. */
+ /* Each action (record in ankrData) contains two 16-bit fields, so we must
+ double the ankrActionIndex to get the correct offset here. */
+ const HBUINT16 *data = &ankrData[entry.data.ankrActionIndex * 2];
+ if (!c->sanitizer.check_array (data, 2)) return;
+ unsigned int markControlPoint = *data++;
+ unsigned int currControlPoint = *data++;
+ hb_position_t markX = 0;
+ hb_position_t markY = 0;
+ hb_position_t currX = 0;
+ hb_position_t currY = 0;
+ if (!c->font->get_glyph_contour_point_for_origin (c->buffer->info[mark].codepoint,
+ markControlPoint,
+ HB_DIRECTION_LTR /*XXX*/,
+ &markX, &markY) ||
+ !c->font->get_glyph_contour_point_for_origin (c->buffer->cur ().codepoint,
+ currControlPoint,
+ HB_DIRECTION_LTR /*XXX*/,
+ &currX, &currY))
+ return;
+
+ o.x_offset = markX - currX;
+ o.y_offset = markY - currY;
+ }
+ break;
+
+ case 1: /* Anchor Point Actions. */
+ {
+ /* Indexed into 'ankr' table. */
+ /* Each action (record in ankrData) contains two 16-bit fields, so we must
+ double the ankrActionIndex to get the correct offset here. */
+ const HBUINT16 *data = &ankrData[entry.data.ankrActionIndex * 2];
+ if (!c->sanitizer.check_array (data, 2)) return;
+ unsigned int markAnchorPoint = *data++;
+ unsigned int currAnchorPoint = *data++;
+ const Anchor &markAnchor = c->ankr_table->get_anchor (c->buffer->info[mark].codepoint,
+ markAnchorPoint,
+ c->sanitizer.get_num_glyphs ());
+ const Anchor &currAnchor = c->ankr_table->get_anchor (c->buffer->cur ().codepoint,
+ currAnchorPoint,
+ c->sanitizer.get_num_glyphs ());
+
+ o.x_offset = c->font->em_scale_x (markAnchor.xCoordinate) - c->font->em_scale_x (currAnchor.xCoordinate);
+ o.y_offset = c->font->em_scale_y (markAnchor.yCoordinate) - c->font->em_scale_y (currAnchor.yCoordinate);
+ }
+ break;
+
+ case 2: /* Control Point Coordinate Actions. */
+ {
+ /* Each action contains four 16-bit fields, so we multiply the ankrActionIndex
+ by 4 to get the correct offset for the given action. */
+ const FWORD *data = (const FWORD *) &ankrData[entry.data.ankrActionIndex * 4];
+ if (!c->sanitizer.check_array (data, 4)) return;
+ int markX = *data++;
+ int markY = *data++;
+ int currX = *data++;
+ int currY = *data++;
+
+ o.x_offset = c->font->em_scale_x (markX) - c->font->em_scale_x (currX);
+ o.y_offset = c->font->em_scale_y (markY) - c->font->em_scale_y (currY);
+ }
+ break;
+ }
+ o.attach_type() = ATTACH_TYPE_MARK;
+ o.attach_chain() = (int) mark - (int) buffer->idx;
+ buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT;
+ }
+
+ if (entry.flags & Mark)
+ {
+ mark_set = true;
+ mark = buffer->idx;
+ }
+ }
+
+ private:
+ hb_aat_apply_context_t *c;
+ unsigned int action_type;
+ const HBUINT16 *ankrData;
+ bool mark_set;
+ unsigned int mark;
+ };
+
+ bool apply (hb_aat_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+
+ driver_context_t dc (this, c);
+
+ StateTableDriver<Types, EntryData> driver (machine, c->buffer, c->font->face);
+ driver.drive (&dc);
+
+ return_trace (true);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ /* The rest of array sanitizations are done at run-time. */
+ return_trace (likely (c->check_struct (this) &&
+ machine.sanitize (c)));
+ }
+
+ protected:
+ KernSubTableHeader header;
+ StateTable<Types, EntryData> machine;
+ HBUINT32 flags;
+ public:
+ DEFINE_SIZE_STATIC (KernSubTableHeader::static_size + 20);
+};
+
+template <typename KernSubTableHeader>
+struct KerxSubTableFormat6
+{
+ enum Flags
+ {
+ ValuesAreLong = 0x00000001,
+ };
+
+ bool is_long () const { return flags & ValuesAreLong; }
+
+ int get_kerning (hb_codepoint_t left, hb_codepoint_t right,
+ hb_aat_apply_context_t *c) const
+ {
+ unsigned int num_glyphs = c->sanitizer.get_num_glyphs ();
+ if (is_long ())
+ {
+ const typename U::Long &t = u.l;
+ unsigned int l = (this+t.rowIndexTable).get_value_or_null (left, num_glyphs);
+ unsigned int r = (this+t.columnIndexTable).get_value_or_null (right, num_glyphs);
+ unsigned int offset = l + r;
+ if (unlikely (offset < l)) return 0; /* Addition overflow. */
+ if (unlikely (hb_unsigned_mul_overflows (offset, sizeof (FWORD32)))) return 0;
+ const FWORD32 *v = &StructAtOffset<FWORD32> (&(this+t.array), offset * sizeof (FWORD32));
+ if (unlikely (!v->sanitize (&c->sanitizer))) return 0;
+ return kerxTupleKern (*v, header.tuple_count (), &(this+vector), c);
+ }
+ else
+ {
+ const typename U::Short &t = u.s;
+ unsigned int l = (this+t.rowIndexTable).get_value_or_null (left, num_glyphs);
+ unsigned int r = (this+t.columnIndexTable).get_value_or_null (right, num_glyphs);
+ unsigned int offset = l + r;
+ const FWORD *v = &StructAtOffset<FWORD> (&(this+t.array), offset * sizeof (FWORD));
+ if (unlikely (!v->sanitize (&c->sanitizer))) return 0;
+ return kerxTupleKern (*v, header.tuple_count (), &(this+vector), c);
+ }
+ }
+
+ bool apply (hb_aat_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+
+ if (!c->plan->requested_kerning)
+ return false;
+
+ if (header.coverage & header.Backwards)
+ return false;
+
+ accelerator_t accel (*this, c);
+ hb_kern_machine_t<accelerator_t> machine (accel, header.coverage & header.CrossStream);
+ machine.kern (c->font, c->buffer, c->plan->kern_mask);
+
+ return_trace (true);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ (is_long () ?
+ (
+ u.l.rowIndexTable.sanitize (c, this) &&
+ u.l.columnIndexTable.sanitize (c, this) &&
+ c->check_range (this, u.l.array)
+ ) : (
+ u.s.rowIndexTable.sanitize (c, this) &&
+ u.s.columnIndexTable.sanitize (c, this) &&
+ c->check_range (this, u.s.array)
+ )) &&
+ (header.tuple_count () == 0 ||
+ c->check_range (this, vector))));
+ }
+
+ struct accelerator_t
+ {
+ const KerxSubTableFormat6 &table;
+ hb_aat_apply_context_t *c;
+
+ accelerator_t (const KerxSubTableFormat6 &table_,
+ hb_aat_apply_context_t *c_) :
+ table (table_), c (c_) {}
+
+ int get_kerning (hb_codepoint_t left, hb_codepoint_t right) const
+ { return table.get_kerning (left, right, c); }
+ };
+
+ protected:
+ KernSubTableHeader header;
+ HBUINT32 flags;
+ HBUINT16 rowCount;
+ HBUINT16 columnCount;
+ union U
+ {
+ struct Long
+ {
+ LNNOffsetTo<Lookup<HBUINT32>> rowIndexTable;
+ LNNOffsetTo<Lookup<HBUINT32>> columnIndexTable;
+ LNNOffsetTo<UnsizedArrayOf<FWORD32>> array;
+ } l;
+ struct Short
+ {
+ LNNOffsetTo<Lookup<HBUINT16>> rowIndexTable;
+ LNNOffsetTo<Lookup<HBUINT16>> columnIndexTable;
+ LNNOffsetTo<UnsizedArrayOf<FWORD>> array;
+ } s;
+ } u;
+ LNNOffsetTo<UnsizedArrayOf<FWORD>> vector;
+ public:
+ DEFINE_SIZE_STATIC (KernSubTableHeader::static_size + 24);
+};
+
+
+struct KerxSubTableHeader
+{
+ typedef ExtendedTypes Types;
+
+ unsigned tuple_count () const { return tupleCount; }
+ bool is_horizontal () const { return !(coverage & Vertical); }
+
+ enum Coverage
+ {
+ Vertical = 0x80000000u, /* Set if table has vertical kerning values. */
+ CrossStream = 0x40000000u, /* Set if table has cross-stream kerning values. */
+ Variation = 0x20000000u, /* Set if table has variation kerning values. */
+ Backwards = 0x10000000u, /* If clear, process the glyphs forwards, that
+ * is, from first to last in the glyph stream.
+ * If we, process them from last to first.
+ * This flag only applies to state-table based
+ * 'kerx' subtables (types 1 and 4). */
+ Reserved = 0x0FFFFF00u, /* Reserved, set to zero. */
+ SubtableType= 0x000000FFu, /* Subtable type. */
+ };
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this)));
+ }
+
+ public:
+ HBUINT32 length;
+ HBUINT32 coverage;
+ HBUINT32 tupleCount;
+ public:
+ DEFINE_SIZE_STATIC (12);
+};
+
+struct KerxSubTable
+{
+ friend struct kerx;
+
+ unsigned int get_size () const { return u.header.length; }
+ unsigned int get_type () const { return u.header.coverage & u.header.SubtableType; }
+
+ template <typename context_t, typename ...Ts>
+ typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const
+ {
+ unsigned int subtable_type = get_type ();
+ TRACE_DISPATCH (this, subtable_type);
+ switch (subtable_type) {
+ case 0: return_trace (c->dispatch (u.format0, hb_forward<Ts> (ds)...));
+ case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...));
+ case 2: return_trace (c->dispatch (u.format2, hb_forward<Ts> (ds)...));
+ case 4: return_trace (c->dispatch (u.format4, hb_forward<Ts> (ds)...));
+ case 6: return_trace (c->dispatch (u.format6, hb_forward<Ts> (ds)...));
+ default: return_trace (c->default_return_value ());
+ }
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (!u.header.sanitize (c) ||
+ u.header.length <= u.header.static_size ||
+ !c->check_range (this, u.header.length))
+ return_trace (false);
+
+ return_trace (dispatch (c));
+ }
+
+ public:
+ union {
+ KerxSubTableHeader header;
+ KerxSubTableFormat0<KerxSubTableHeader> format0;
+ KerxSubTableFormat1<KerxSubTableHeader> format1;
+ KerxSubTableFormat2<KerxSubTableHeader> format2;
+ KerxSubTableFormat4<KerxSubTableHeader> format4;
+ KerxSubTableFormat6<KerxSubTableHeader> format6;
+ } u;
+ public:
+ DEFINE_SIZE_MIN (12);
+};
+
+
+/*
+ * The 'kerx' Table
+ */
+
+template <typename T>
+struct KerxTable
+{
+ /* https://en.wikipedia.org/wiki/Curiously_recurring_template_pattern */
+ const T* thiz () const { return static_cast<const T *> (this); }
+
+ bool has_state_machine () const
+ {
+ typedef typename T::SubTable SubTable;
+
+ const SubTable *st = &thiz()->firstSubTable;
+ unsigned int count = thiz()->tableCount;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ if (st->get_type () == 1)
+ return true;
+ st = &StructAfter<SubTable> (*st);
+ }
+ return false;
+ }
+
+ bool has_cross_stream () const
+ {
+ typedef typename T::SubTable SubTable;
+
+ const SubTable *st = &thiz()->firstSubTable;
+ unsigned int count = thiz()->tableCount;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ if (st->u.header.coverage & st->u.header.CrossStream)
+ return true;
+ st = &StructAfter<SubTable> (*st);
+ }
+ return false;
+ }
+
+ int get_h_kerning (hb_codepoint_t left, hb_codepoint_t right) const
+ {
+ typedef typename T::SubTable SubTable;
+
+ int v = 0;
+ const SubTable *st = &thiz()->firstSubTable;
+ unsigned int count = thiz()->tableCount;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ if ((st->u.header.coverage & (st->u.header.Variation | st->u.header.CrossStream)) ||
+ !st->u.header.is_horizontal ())
+ continue;
+ v += st->get_kerning (left, right);
+ st = &StructAfter<SubTable> (*st);
+ }
+ return v;
+ }
+
+ bool apply (AAT::hb_aat_apply_context_t *c) const
+ {
+ typedef typename T::SubTable SubTable;
+
+ bool ret = false;
+ bool seenCrossStream = false;
+ c->set_lookup_index (0);
+ const SubTable *st = &thiz()->firstSubTable;
+ unsigned int count = thiz()->tableCount;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ bool reverse;
+
+ if (!T::Types::extended && (st->u.header.coverage & st->u.header.Variation))
+ goto skip;
+
+ if (HB_DIRECTION_IS_HORIZONTAL (c->buffer->props.direction) != st->u.header.is_horizontal ())
+ goto skip;
+
+ reverse = bool (st->u.header.coverage & st->u.header.Backwards) !=
+ HB_DIRECTION_IS_BACKWARD (c->buffer->props.direction);
+
+ if (!c->buffer->message (c->font, "start subtable %d", c->lookup_index))
+ goto skip;
+
+ if (!seenCrossStream &&
+ (st->u.header.coverage & st->u.header.CrossStream))
+ {
+ /* Attach all glyphs into a chain. */
+ seenCrossStream = true;
+ hb_glyph_position_t *pos = c->buffer->pos;
+ unsigned int count = c->buffer->len;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ pos[i].attach_type() = ATTACH_TYPE_CURSIVE;
+ pos[i].attach_chain() = HB_DIRECTION_IS_FORWARD (c->buffer->props.direction) ? -1 : +1;
+ /* We intentionally don't set HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT,
+ * since there needs to be a non-zero attachment for post-positioning to
+ * be needed. */
+ }
+ }
+
+ if (reverse)
+ c->buffer->reverse ();
+
+ {
+ /* See comment in sanitize() for conditional here. */
+ hb_sanitize_with_object_t with (&c->sanitizer, i < count - 1 ? st : (const SubTable *) nullptr);
+ ret |= st->dispatch (c);
+ }
+
+ if (reverse)
+ c->buffer->reverse ();
+
+ (void) c->buffer->message (c->font, "end subtable %d", c->lookup_index);
+
+ skip:
+ st = &StructAfter<SubTable> (*st);
+ c->set_lookup_index (c->lookup_index + 1);
+ }
+
+ return ret;
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!thiz()->version.sanitize (c) ||
+ (unsigned) thiz()->version < (unsigned) T::minVersion ||
+ !thiz()->tableCount.sanitize (c)))
+ return_trace (false);
+
+ typedef typename T::SubTable SubTable;
+
+ const SubTable *st = &thiz()->firstSubTable;
+ unsigned int count = thiz()->tableCount;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ if (unlikely (!st->u.header.sanitize (c)))
+ return_trace (false);
+ /* OpenType kern table has 2-byte subtable lengths. That's limiting.
+ * MS implementation also only supports one subtable, of format 0,
+ * anyway. Certain versions of some fonts, like Calibry, contain
+ * kern subtable that exceeds 64kb. Looks like, the subtable length
+ * is simply ignored. Which makes sense. It's only needed if you
+ * have multiple subtables. To handle such fonts, we just ignore
+ * the length for the last subtable. */
+ hb_sanitize_with_object_t with (c, i < count - 1 ? st : (const SubTable *) nullptr);
+
+ if (unlikely (!st->sanitize (c)))
+ return_trace (false);
+
+ st = &StructAfter<SubTable> (*st);
+ }
+
+ return_trace (true);
+ }
+};
+
+struct kerx : KerxTable<kerx>
+{
+ friend struct KerxTable<kerx>;
+
+ static constexpr hb_tag_t tableTag = HB_AAT_TAG_kerx;
+ static constexpr unsigned minVersion = 2u;
+
+ typedef KerxSubTableHeader SubTableHeader;
+ typedef SubTableHeader::Types Types;
+ typedef KerxSubTable SubTable;
+
+ bool has_data () const { return version; }
+
+ protected:
+ HBUINT16 version; /* The version number of the extended kerning table
+ * (currently 2, 3, or 4). */
+ HBUINT16 unused; /* Set to 0. */
+ HBUINT32 tableCount; /* The number of subtables included in the extended kerning
+ * table. */
+ SubTable firstSubTable; /* Subtables. */
+/*subtableGlyphCoverageArray*/ /* Only if version >= 3. We don't use. */
+
+ public:
+ DEFINE_SIZE_MIN (8);
+};
+
+
+} /* namespace AAT */
+
+
+#endif /* HB_AAT_LAYOUT_KERX_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-aat-layout-morx-table.hh b/thirdparty/harfbuzz/src/hb-aat-layout-morx-table.hh
new file mode 100644
index 0000000000..04027a61be
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-aat-layout-morx-table.hh
@@ -0,0 +1,1157 @@
+/*
+ * Copyright © 2017 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_AAT_LAYOUT_MORX_TABLE_HH
+#define HB_AAT_LAYOUT_MORX_TABLE_HH
+
+#include "hb-open-type.hh"
+#include "hb-aat-layout-common.hh"
+#include "hb-ot-layout-common.hh"
+#include "hb-aat-map.hh"
+
+/*
+ * morx -- Extended Glyph Metamorphosis
+ * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6morx.html
+ * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6mort.html
+ */
+#define HB_AAT_TAG_morx HB_TAG('m','o','r','x')
+#define HB_AAT_TAG_mort HB_TAG('m','o','r','t')
+
+
+namespace AAT {
+
+using namespace OT;
+
+template <typename Types>
+struct RearrangementSubtable
+{
+ typedef typename Types::HBUINT HBUINT;
+
+ typedef void EntryData;
+
+ struct driver_context_t
+ {
+ static constexpr bool in_place = true;
+ enum Flags
+ {
+ MarkFirst = 0x8000, /* If set, make the current glyph the first
+ * glyph to be rearranged. */
+ DontAdvance = 0x4000, /* If set, don't advance to the next glyph
+ * before going to the new state. This means
+ * that the glyph index doesn't change, even
+ * if the glyph at that index has changed. */
+ MarkLast = 0x2000, /* If set, make the current glyph the last
+ * glyph to be rearranged. */
+ Reserved = 0x1FF0, /* These bits are reserved and should be set to 0. */
+ Verb = 0x000F, /* The type of rearrangement specified. */
+ };
+
+ driver_context_t (const RearrangementSubtable *table HB_UNUSED) :
+ ret (false),
+ start (0), end (0) {}
+
+ bool is_actionable (StateTableDriver<Types, EntryData> *driver HB_UNUSED,
+ const Entry<EntryData> &entry)
+ {
+ return (entry.flags & Verb) && start < end;
+ }
+ void transition (StateTableDriver<Types, EntryData> *driver,
+ const Entry<EntryData> &entry)
+ {
+ hb_buffer_t *buffer = driver->buffer;
+ unsigned int flags = entry.flags;
+
+ if (flags & MarkFirst)
+ start = buffer->idx;
+
+ if (flags & MarkLast)
+ end = hb_min (buffer->idx + 1, buffer->len);
+
+ if ((flags & Verb) && start < end)
+ {
+ /* The following map has two nibbles, for start-side
+ * and end-side. Values of 0,1,2 mean move that many
+ * to the other side. Value of 3 means move 2 and
+ * flip them. */
+ const unsigned char map[16] =
+ {
+ 0x00, /* 0 no change */
+ 0x10, /* 1 Ax => xA */
+ 0x01, /* 2 xD => Dx */
+ 0x11, /* 3 AxD => DxA */
+ 0x20, /* 4 ABx => xAB */
+ 0x30, /* 5 ABx => xBA */
+ 0x02, /* 6 xCD => CDx */
+ 0x03, /* 7 xCD => DCx */
+ 0x12, /* 8 AxCD => CDxA */
+ 0x13, /* 9 AxCD => DCxA */
+ 0x21, /* 10 ABxD => DxAB */
+ 0x31, /* 11 ABxD => DxBA */
+ 0x22, /* 12 ABxCD => CDxAB */
+ 0x32, /* 13 ABxCD => CDxBA */
+ 0x23, /* 14 ABxCD => DCxAB */
+ 0x33, /* 15 ABxCD => DCxBA */
+ };
+
+ unsigned int m = map[flags & Verb];
+ unsigned int l = hb_min (2u, m >> 4);
+ unsigned int r = hb_min (2u, m & 0x0F);
+ bool reverse_l = 3 == (m >> 4);
+ bool reverse_r = 3 == (m & 0x0F);
+
+ if (end - start >= l + r)
+ {
+ buffer->merge_clusters (start, hb_min (buffer->idx + 1, buffer->len));
+ buffer->merge_clusters (start, end);
+
+ hb_glyph_info_t *info = buffer->info;
+ hb_glyph_info_t buf[4];
+
+ memcpy (buf, info + start, l * sizeof (buf[0]));
+ memcpy (buf + 2, info + end - r, r * sizeof (buf[0]));
+
+ if (l != r)
+ memmove (info + start + r, info + start + l, (end - start - l - r) * sizeof (buf[0]));
+
+ memcpy (info + start, buf + 2, r * sizeof (buf[0]));
+ memcpy (info + end - l, buf, l * sizeof (buf[0]));
+ if (reverse_l)
+ {
+ buf[0] = info[end - 1];
+ info[end - 1] = info[end - 2];
+ info[end - 2] = buf[0];
+ }
+ if (reverse_r)
+ {
+ buf[0] = info[start];
+ info[start] = info[start + 1];
+ info[start + 1] = buf[0];
+ }
+ }
+ }
+ }
+
+ public:
+ bool ret;
+ private:
+ unsigned int start;
+ unsigned int end;
+ };
+
+ bool apply (hb_aat_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+
+ driver_context_t dc (this);
+
+ StateTableDriver<Types, EntryData> driver (machine, c->buffer, c->face);
+ driver.drive (&dc);
+
+ return_trace (dc.ret);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (machine.sanitize (c));
+ }
+
+ protected:
+ StateTable<Types, EntryData> machine;
+ public:
+ DEFINE_SIZE_STATIC (16);
+};
+
+template <typename Types>
+struct ContextualSubtable
+{
+ typedef typename Types::HBUINT HBUINT;
+
+ struct EntryData
+ {
+ HBUINT16 markIndex; /* Index of the substitution table for the
+ * marked glyph (use 0xFFFF for none). */
+ HBUINT16 currentIndex; /* Index of the substitution table for the
+ * current glyph (use 0xFFFF for none). */
+ public:
+ DEFINE_SIZE_STATIC (4);
+ };
+
+ struct driver_context_t
+ {
+ static constexpr bool in_place = true;
+ enum Flags
+ {
+ SetMark = 0x8000, /* If set, make the current glyph the marked glyph. */
+ DontAdvance = 0x4000, /* If set, don't advance to the next glyph before
+ * going to the new state. */
+ Reserved = 0x3FFF, /* These bits are reserved and should be set to 0. */
+ };
+
+ driver_context_t (const ContextualSubtable *table_,
+ hb_aat_apply_context_t *c_) :
+ ret (false),
+ c (c_),
+ mark_set (false),
+ mark (0),
+ table (table_),
+ subs (table+table->substitutionTables) {}
+
+ bool is_actionable (StateTableDriver<Types, EntryData> *driver,
+ const Entry<EntryData> &entry)
+ {
+ hb_buffer_t *buffer = driver->buffer;
+
+ if (buffer->idx == buffer->len && !mark_set)
+ return false;
+
+ return entry.data.markIndex != 0xFFFF || entry.data.currentIndex != 0xFFFF;
+ }
+ void transition (StateTableDriver<Types, EntryData> *driver,
+ const Entry<EntryData> &entry)
+ {
+ hb_buffer_t *buffer = driver->buffer;
+
+ /* Looks like CoreText applies neither mark nor current substitution for
+ * end-of-text if mark was not explicitly set. */
+ if (buffer->idx == buffer->len && !mark_set)
+ return;
+
+ const HBGlyphID *replacement;
+
+ replacement = nullptr;
+ if (Types::extended)
+ {
+ if (entry.data.markIndex != 0xFFFF)
+ {
+ const Lookup<HBGlyphID> &lookup = subs[entry.data.markIndex];
+ replacement = lookup.get_value (buffer->info[mark].codepoint, driver->num_glyphs);
+ }
+ }
+ else
+ {
+ unsigned int offset = entry.data.markIndex + buffer->info[mark].codepoint;
+ const UnsizedArrayOf<HBGlyphID> &subs_old = (const UnsizedArrayOf<HBGlyphID> &) subs;
+ replacement = &subs_old[Types::wordOffsetToIndex (offset, table, subs_old.arrayZ)];
+ if (!replacement->sanitize (&c->sanitizer) || !*replacement)
+ replacement = nullptr;
+ }
+ if (replacement)
+ {
+ buffer->unsafe_to_break (mark, hb_min (buffer->idx + 1, buffer->len));
+ buffer->info[mark].codepoint = *replacement;
+ ret = true;
+ }
+
+ replacement = nullptr;
+ unsigned int idx = hb_min (buffer->idx, buffer->len - 1);
+ if (Types::extended)
+ {
+ if (entry.data.currentIndex != 0xFFFF)
+ {
+ const Lookup<HBGlyphID> &lookup = subs[entry.data.currentIndex];
+ replacement = lookup.get_value (buffer->info[idx].codepoint, driver->num_glyphs);
+ }
+ }
+ else
+ {
+ unsigned int offset = entry.data.currentIndex + buffer->info[idx].codepoint;
+ const UnsizedArrayOf<HBGlyphID> &subs_old = (const UnsizedArrayOf<HBGlyphID> &) subs;
+ replacement = &subs_old[Types::wordOffsetToIndex (offset, table, subs_old.arrayZ)];
+ if (!replacement->sanitize (&c->sanitizer) || !*replacement)
+ replacement = nullptr;
+ }
+ if (replacement)
+ {
+ buffer->info[idx].codepoint = *replacement;
+ ret = true;
+ }
+
+ if (entry.flags & SetMark)
+ {
+ mark_set = true;
+ mark = buffer->idx;
+ }
+ }
+
+ public:
+ bool ret;
+ private:
+ hb_aat_apply_context_t *c;
+ bool mark_set;
+ unsigned int mark;
+ const ContextualSubtable *table;
+ const UnsizedOffsetListOf<Lookup<HBGlyphID>, HBUINT, false> &subs;
+ };
+
+ bool apply (hb_aat_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+
+ driver_context_t dc (this, c);
+
+ StateTableDriver<Types, EntryData> driver (machine, c->buffer, c->face);
+ driver.drive (&dc);
+
+ return_trace (dc.ret);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+
+ unsigned int num_entries = 0;
+ if (unlikely (!machine.sanitize (c, &num_entries))) return_trace (false);
+
+ if (!Types::extended)
+ return_trace (substitutionTables.sanitize (c, this, 0));
+
+ unsigned int num_lookups = 0;
+
+ const Entry<EntryData> *entries = machine.get_entries ();
+ for (unsigned int i = 0; i < num_entries; i++)
+ {
+ const EntryData &data = entries[i].data;
+
+ if (data.markIndex != 0xFFFF)
+ num_lookups = hb_max (num_lookups, 1 + data.markIndex);
+ if (data.currentIndex != 0xFFFF)
+ num_lookups = hb_max (num_lookups, 1 + data.currentIndex);
+ }
+
+ return_trace (substitutionTables.sanitize (c, this, num_lookups));
+ }
+
+ protected:
+ StateTable<Types, EntryData>
+ machine;
+ NNOffsetTo<UnsizedOffsetListOf<Lookup<HBGlyphID>, HBUINT, false>, HBUINT>
+ substitutionTables;
+ public:
+ DEFINE_SIZE_STATIC (20);
+};
+
+
+template <bool extended>
+struct LigatureEntry;
+
+template <>
+struct LigatureEntry<true>
+{
+ enum Flags
+ {
+ SetComponent = 0x8000, /* Push this glyph onto the component stack for
+ * eventual processing. */
+ DontAdvance = 0x4000, /* Leave the glyph pointer at this glyph for the
+ next iteration. */
+ PerformAction = 0x2000, /* Use the ligActionIndex to process a ligature
+ * group. */
+ Reserved = 0x1FFF, /* These bits are reserved and should be set to 0. */
+ };
+
+ struct EntryData
+ {
+ HBUINT16 ligActionIndex; /* Index to the first ligActionTable entry
+ * for processing this group, if indicated
+ * by the flags. */
+ public:
+ DEFINE_SIZE_STATIC (2);
+ };
+
+ static bool performAction (const Entry<EntryData> &entry)
+ { return entry.flags & PerformAction; }
+
+ static unsigned int ligActionIndex (const Entry<EntryData> &entry)
+ { return entry.data.ligActionIndex; }
+};
+template <>
+struct LigatureEntry<false>
+{
+ enum Flags
+ {
+ SetComponent = 0x8000, /* Push this glyph onto the component stack for
+ * eventual processing. */
+ DontAdvance = 0x4000, /* Leave the glyph pointer at this glyph for the
+ next iteration. */
+ Offset = 0x3FFF, /* Byte offset from beginning of subtable to the
+ * ligature action list. This value must be a
+ * multiple of 4. */
+ };
+
+ typedef void EntryData;
+
+ static bool performAction (const Entry<EntryData> &entry)
+ { return entry.flags & Offset; }
+
+ static unsigned int ligActionIndex (const Entry<EntryData> &entry)
+ { return entry.flags & Offset; }
+};
+
+
+template <typename Types>
+struct LigatureSubtable
+{
+ typedef typename Types::HBUINT HBUINT;
+
+ typedef LigatureEntry<Types::extended> LigatureEntryT;
+ typedef typename LigatureEntryT::EntryData EntryData;
+
+ struct driver_context_t
+ {
+ static constexpr bool in_place = false;
+ enum
+ {
+ DontAdvance = LigatureEntryT::DontAdvance,
+ };
+ enum LigActionFlags
+ {
+ LigActionLast = 0x80000000, /* This is the last action in the list. This also
+ * implies storage. */
+ LigActionStore = 0x40000000, /* Store the ligature at the current cumulated index
+ * in the ligature table in place of the marked
+ * (i.e. currently-popped) glyph. */
+ LigActionOffset = 0x3FFFFFFF, /* A 30-bit value which is sign-extended to 32-bits
+ * and added to the glyph ID, resulting in an index
+ * into the component table. */
+ };
+
+ driver_context_t (const LigatureSubtable *table_,
+ hb_aat_apply_context_t *c_) :
+ ret (false),
+ c (c_),
+ table (table_),
+ ligAction (table+table->ligAction),
+ component (table+table->component),
+ ligature (table+table->ligature),
+ match_length (0) {}
+
+ bool is_actionable (StateTableDriver<Types, EntryData> *driver HB_UNUSED,
+ const Entry<EntryData> &entry)
+ {
+ return LigatureEntryT::performAction (entry);
+ }
+ void transition (StateTableDriver<Types, EntryData> *driver,
+ const Entry<EntryData> &entry)
+ {
+ hb_buffer_t *buffer = driver->buffer;
+
+ DEBUG_MSG (APPLY, nullptr, "Ligature transition at %u", buffer->idx);
+ if (entry.flags & LigatureEntryT::SetComponent)
+ {
+ /* Never mark same index twice, in case DontAdvance was used... */
+ if (match_length && match_positions[(match_length - 1u) % ARRAY_LENGTH (match_positions)] == buffer->out_len)
+ match_length--;
+
+ match_positions[match_length++ % ARRAY_LENGTH (match_positions)] = buffer->out_len;
+ DEBUG_MSG (APPLY, nullptr, "Set component at %u", buffer->out_len);
+ }
+
+ if (LigatureEntryT::performAction (entry))
+ {
+ DEBUG_MSG (APPLY, nullptr, "Perform action with %u", match_length);
+ unsigned int end = buffer->out_len;
+
+ if (unlikely (!match_length))
+ return;
+
+ if (buffer->idx >= buffer->len)
+ return; /* TODO Work on previous instead? */
+
+ unsigned int cursor = match_length;
+
+ unsigned int action_idx = LigatureEntryT::ligActionIndex (entry);
+ action_idx = Types::offsetToIndex (action_idx, table, ligAction.arrayZ);
+ const HBUINT32 *actionData = &ligAction[action_idx];
+
+ unsigned int ligature_idx = 0;
+ unsigned int action;
+ do
+ {
+ if (unlikely (!cursor))
+ {
+ /* Stack underflow. Clear the stack. */
+ DEBUG_MSG (APPLY, nullptr, "Stack underflow");
+ match_length = 0;
+ break;
+ }
+
+ DEBUG_MSG (APPLY, nullptr, "Moving to stack position %u", cursor - 1);
+ buffer->move_to (match_positions[--cursor % ARRAY_LENGTH (match_positions)]);
+
+ if (unlikely (!actionData->sanitize (&c->sanitizer))) break;
+ action = *actionData;
+
+ uint32_t uoffset = action & LigActionOffset;
+ if (uoffset & 0x20000000)
+ uoffset |= 0xC0000000; /* Sign-extend. */
+ int32_t offset = (int32_t) uoffset;
+ unsigned int component_idx = buffer->cur().codepoint + offset;
+ component_idx = Types::wordOffsetToIndex (component_idx, table, component.arrayZ);
+ const HBUINT16 &componentData = component[component_idx];
+ if (unlikely (!componentData.sanitize (&c->sanitizer))) break;
+ ligature_idx += componentData;
+
+ DEBUG_MSG (APPLY, nullptr, "Action store %u last %u",
+ bool (action & LigActionStore),
+ bool (action & LigActionLast));
+ if (action & (LigActionStore | LigActionLast))
+ {
+ ligature_idx = Types::offsetToIndex (ligature_idx, table, ligature.arrayZ);
+ const HBGlyphID &ligatureData = ligature[ligature_idx];
+ if (unlikely (!ligatureData.sanitize (&c->sanitizer))) break;
+ hb_codepoint_t lig = ligatureData;
+
+ DEBUG_MSG (APPLY, nullptr, "Produced ligature %u", lig);
+ buffer->replace_glyph (lig);
+
+ unsigned int lig_end = match_positions[(match_length - 1u) % ARRAY_LENGTH (match_positions)] + 1u;
+ /* Now go and delete all subsequent components. */
+ while (match_length - 1u > cursor)
+ {
+ DEBUG_MSG (APPLY, nullptr, "Skipping ligature component");
+ buffer->move_to (match_positions[--match_length % ARRAY_LENGTH (match_positions)]);
+ buffer->replace_glyph (DELETED_GLYPH);
+ }
+
+ buffer->move_to (lig_end);
+ buffer->merge_out_clusters (match_positions[cursor % ARRAY_LENGTH (match_positions)], buffer->out_len);
+ }
+
+ actionData++;
+ }
+ while (!(action & LigActionLast));
+ buffer->move_to (end);
+ }
+ }
+
+ public:
+ bool ret;
+ private:
+ hb_aat_apply_context_t *c;
+ const LigatureSubtable *table;
+ const UnsizedArrayOf<HBUINT32> &ligAction;
+ const UnsizedArrayOf<HBUINT16> &component;
+ const UnsizedArrayOf<HBGlyphID> &ligature;
+ unsigned int match_length;
+ unsigned int match_positions[HB_MAX_CONTEXT_LENGTH];
+ };
+
+ bool apply (hb_aat_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+
+ driver_context_t dc (this, c);
+
+ StateTableDriver<Types, EntryData> driver (machine, c->buffer, c->face);
+ driver.drive (&dc);
+
+ return_trace (dc.ret);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ /* The rest of array sanitizations are done at run-time. */
+ return_trace (c->check_struct (this) && machine.sanitize (c) &&
+ ligAction && component && ligature);
+ }
+
+ protected:
+ StateTable<Types, EntryData>
+ machine;
+ NNOffsetTo<UnsizedArrayOf<HBUINT32>, HBUINT>
+ ligAction; /* Offset to the ligature action table. */
+ NNOffsetTo<UnsizedArrayOf<HBUINT16>, HBUINT>
+ component; /* Offset to the component table. */
+ NNOffsetTo<UnsizedArrayOf<HBGlyphID>, HBUINT>
+ ligature; /* Offset to the actual ligature lists. */
+ public:
+ DEFINE_SIZE_STATIC (28);
+};
+
+template <typename Types>
+struct NoncontextualSubtable
+{
+ bool apply (hb_aat_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+
+ bool ret = false;
+ unsigned int num_glyphs = c->face->get_num_glyphs ();
+
+ hb_glyph_info_t *info = c->buffer->info;
+ unsigned int count = c->buffer->len;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ const HBGlyphID *replacement = substitute.get_value (info[i].codepoint, num_glyphs);
+ if (replacement)
+ {
+ info[i].codepoint = *replacement;
+ ret = true;
+ }
+ }
+
+ return_trace (ret);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (substitute.sanitize (c));
+ }
+
+ protected:
+ Lookup<HBGlyphID> substitute;
+ public:
+ DEFINE_SIZE_MIN (2);
+};
+
+template <typename Types>
+struct InsertionSubtable
+{
+ typedef typename Types::HBUINT HBUINT;
+
+ struct EntryData
+ {
+ HBUINT16 currentInsertIndex; /* Zero-based index into the insertion glyph table.
+ * The number of glyphs to be inserted is contained
+ * in the currentInsertCount field in the flags.
+ * A value of 0xFFFF indicates no insertion is to
+ * be done. */
+ HBUINT16 markedInsertIndex; /* Zero-based index into the insertion glyph table.
+ * The number of glyphs to be inserted is contained
+ * in the markedInsertCount field in the flags.
+ * A value of 0xFFFF indicates no insertion is to
+ * be done. */
+ public:
+ DEFINE_SIZE_STATIC (4);
+ };
+
+ struct driver_context_t
+ {
+ static constexpr bool in_place = false;
+ enum Flags
+ {
+ SetMark = 0x8000, /* If set, mark the current glyph. */
+ DontAdvance = 0x4000, /* If set, don't advance to the next glyph before
+ * going to the new state. This does not mean
+ * that the glyph pointed to is the same one as
+ * before. If you've made insertions immediately
+ * downstream of the current glyph, the next glyph
+ * processed would in fact be the first one
+ * inserted. */
+ CurrentIsKashidaLike= 0x2000, /* If set, and the currentInsertList is nonzero,
+ * then the specified glyph list will be inserted
+ * as a kashida-like insertion, either before or
+ * after the current glyph (depending on the state
+ * of the currentInsertBefore flag). If clear, and
+ * the currentInsertList is nonzero, then the
+ * specified glyph list will be inserted as a
+ * split-vowel-like insertion, either before or
+ * after the current glyph (depending on the state
+ * of the currentInsertBefore flag). */
+ MarkedIsKashidaLike= 0x1000, /* If set, and the markedInsertList is nonzero,
+ * then the specified glyph list will be inserted
+ * as a kashida-like insertion, either before or
+ * after the marked glyph (depending on the state
+ * of the markedInsertBefore flag). If clear, and
+ * the markedInsertList is nonzero, then the
+ * specified glyph list will be inserted as a
+ * split-vowel-like insertion, either before or
+ * after the marked glyph (depending on the state
+ * of the markedInsertBefore flag). */
+ CurrentInsertBefore= 0x0800, /* If set, specifies that insertions are to be made
+ * to the left of the current glyph. If clear,
+ * they're made to the right of the current glyph. */
+ MarkedInsertBefore= 0x0400, /* If set, specifies that insertions are to be
+ * made to the left of the marked glyph. If clear,
+ * they're made to the right of the marked glyph. */
+ CurrentInsertCount= 0x3E0, /* This 5-bit field is treated as a count of the
+ * number of glyphs to insert at the current
+ * position. Since zero means no insertions, the
+ * largest number of insertions at any given
+ * current location is 31 glyphs. */
+ MarkedInsertCount= 0x001F, /* This 5-bit field is treated as a count of the
+ * number of glyphs to insert at the marked
+ * position. Since zero means no insertions, the
+ * largest number of insertions at any given
+ * marked location is 31 glyphs. */
+ };
+
+ driver_context_t (const InsertionSubtable *table,
+ hb_aat_apply_context_t *c_) :
+ ret (false),
+ c (c_),
+ mark (0),
+ insertionAction (table+table->insertionAction) {}
+
+ bool is_actionable (StateTableDriver<Types, EntryData> *driver HB_UNUSED,
+ const Entry<EntryData> &entry)
+ {
+ return (entry.flags & (CurrentInsertCount | MarkedInsertCount)) &&
+ (entry.data.currentInsertIndex != 0xFFFF ||entry.data.markedInsertIndex != 0xFFFF);
+ }
+ void transition (StateTableDriver<Types, EntryData> *driver,
+ const Entry<EntryData> &entry)
+ {
+ hb_buffer_t *buffer = driver->buffer;
+ unsigned int flags = entry.flags;
+
+ unsigned mark_loc = buffer->out_len;
+
+ if (entry.data.markedInsertIndex != 0xFFFF)
+ {
+ unsigned int count = (flags & MarkedInsertCount);
+ if (unlikely ((buffer->max_ops -= count) <= 0)) return;
+ unsigned int start = entry.data.markedInsertIndex;
+ const HBGlyphID *glyphs = &insertionAction[start];
+ if (unlikely (!c->sanitizer.check_array (glyphs, count))) count = 0;
+
+ bool before = flags & MarkedInsertBefore;
+
+ unsigned int end = buffer->out_len;
+ buffer->move_to (mark);
+
+ if (buffer->idx < buffer->len && !before)
+ buffer->copy_glyph ();
+ /* TODO We ignore KashidaLike setting. */
+ for (unsigned int i = 0; i < count; i++)
+ buffer->output_glyph (glyphs[i]);
+ if (buffer->idx < buffer->len && !before)
+ buffer->skip_glyph ();
+
+ buffer->move_to (end + count);
+
+ buffer->unsafe_to_break_from_outbuffer (mark, hb_min (buffer->idx + 1, buffer->len));
+ }
+
+ if (flags & SetMark)
+ mark = mark_loc;
+
+ if (entry.data.currentInsertIndex != 0xFFFF)
+ {
+ unsigned int count = (flags & CurrentInsertCount) >> 5;
+ if (unlikely ((buffer->max_ops -= count) <= 0)) return;
+ unsigned int start = entry.data.currentInsertIndex;
+ const HBGlyphID *glyphs = &insertionAction[start];
+ if (unlikely (!c->sanitizer.check_array (glyphs, count))) count = 0;
+
+ bool before = flags & CurrentInsertBefore;
+
+ unsigned int end = buffer->out_len;
+
+ if (buffer->idx < buffer->len && !before)
+ buffer->copy_glyph ();
+ /* TODO We ignore KashidaLike setting. */
+ for (unsigned int i = 0; i < count; i++)
+ buffer->output_glyph (glyphs[i]);
+ if (buffer->idx < buffer->len && !before)
+ buffer->skip_glyph ();
+
+ /* Humm. Not sure where to move to. There's this wording under
+ * DontAdvance flag:
+ *
+ * "If set, don't update the glyph index before going to the new state.
+ * This does not mean that the glyph pointed to is the same one as
+ * before. If you've made insertions immediately downstream of the
+ * current glyph, the next glyph processed would in fact be the first
+ * one inserted."
+ *
+ * This suggests that if DontAdvance is NOT set, we should move to
+ * end+count. If it *was*, then move to end, such that newly inserted
+ * glyphs are now visible.
+ *
+ * https://github.com/harfbuzz/harfbuzz/issues/1224#issuecomment-427691417
+ */
+ buffer->move_to ((flags & DontAdvance) ? end : end + count);
+ }
+ }
+
+ public:
+ bool ret;
+ private:
+ hb_aat_apply_context_t *c;
+ unsigned int mark;
+ const UnsizedArrayOf<HBGlyphID> &insertionAction;
+ };
+
+ bool apply (hb_aat_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+
+ driver_context_t dc (this, c);
+
+ StateTableDriver<Types, EntryData> driver (machine, c->buffer, c->face);
+ driver.drive (&dc);
+
+ return_trace (dc.ret);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ /* The rest of array sanitizations are done at run-time. */
+ return_trace (c->check_struct (this) && machine.sanitize (c) &&
+ insertionAction);
+ }
+
+ protected:
+ StateTable<Types, EntryData>
+ machine;
+ NNOffsetTo<UnsizedArrayOf<HBGlyphID>, HBUINT>
+ insertionAction; /* Byte offset from stateHeader to the start of
+ * the insertion glyph table. */
+ public:
+ DEFINE_SIZE_STATIC (20);
+};
+
+
+struct Feature
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ public:
+ HBUINT16 featureType; /* The type of feature. */
+ HBUINT16 featureSetting; /* The feature's setting (aka selector). */
+ HBUINT32 enableFlags; /* Flags for the settings that this feature
+ * and setting enables. */
+ HBUINT32 disableFlags; /* Complement of flags for the settings that this
+ * feature and setting disable. */
+
+ public:
+ DEFINE_SIZE_STATIC (12);
+};
+
+template <typename Types>
+struct ChainSubtable
+{
+ typedef typename Types::HBUINT HBUINT;
+
+ template <typename T>
+ friend struct Chain;
+
+ unsigned int get_size () const { return length; }
+ unsigned int get_type () const { return coverage & 0xFF; }
+ unsigned int get_coverage () const { return coverage >> (sizeof (HBUINT) * 8 - 8); }
+
+ enum Coverage
+ {
+ Vertical = 0x80, /* If set, this subtable will only be applied
+ * to vertical text. If clear, this subtable
+ * will only be applied to horizontal text. */
+ Backwards = 0x40, /* If set, this subtable will process glyphs
+ * in descending order. If clear, it will
+ * process the glyphs in ascending order. */
+ AllDirections = 0x20, /* If set, this subtable will be applied to
+ * both horizontal and vertical text (i.e.
+ * the state of bit 0x80000000 is ignored). */
+ Logical = 0x10, /* If set, this subtable will process glyphs
+ * in logical order (or reverse logical order,
+ * depending on the value of bit 0x80000000). */
+ };
+ enum Type
+ {
+ Rearrangement = 0,
+ Contextual = 1,
+ Ligature = 2,
+ Noncontextual = 4,
+ Insertion = 5
+ };
+
+ template <typename context_t, typename ...Ts>
+ typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const
+ {
+ unsigned int subtable_type = get_type ();
+ TRACE_DISPATCH (this, subtable_type);
+ switch (subtable_type) {
+ case Rearrangement: return_trace (c->dispatch (u.rearrangement, hb_forward<Ts> (ds)...));
+ case Contextual: return_trace (c->dispatch (u.contextual, hb_forward<Ts> (ds)...));
+ case Ligature: return_trace (c->dispatch (u.ligature, hb_forward<Ts> (ds)...));
+ case Noncontextual: return_trace (c->dispatch (u.noncontextual, hb_forward<Ts> (ds)...));
+ case Insertion: return_trace (c->dispatch (u.insertion, hb_forward<Ts> (ds)...));
+ default: return_trace (c->default_return_value ());
+ }
+ }
+
+ bool apply (hb_aat_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+ hb_sanitize_with_object_t with (&c->sanitizer, this);
+ return_trace (dispatch (c));
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (!length.sanitize (c) ||
+ length <= min_size ||
+ !c->check_range (this, length))
+ return_trace (false);
+
+ hb_sanitize_with_object_t with (c, this);
+ return_trace (dispatch (c));
+ }
+
+ protected:
+ HBUINT length; /* Total subtable length, including this header. */
+ HBUINT coverage; /* Coverage flags and subtable type. */
+ HBUINT32 subFeatureFlags;/* The 32-bit mask identifying which subtable this is. */
+ union {
+ RearrangementSubtable<Types> rearrangement;
+ ContextualSubtable<Types> contextual;
+ LigatureSubtable<Types> ligature;
+ NoncontextualSubtable<Types> noncontextual;
+ InsertionSubtable<Types> insertion;
+ } u;
+ public:
+ DEFINE_SIZE_MIN (2 * sizeof (HBUINT) + 4);
+};
+
+template <typename Types>
+struct Chain
+{
+ typedef typename Types::HBUINT HBUINT;
+
+ hb_mask_t compile_flags (const hb_aat_map_builder_t *map) const
+ {
+ hb_mask_t flags = defaultFlags;
+ {
+ unsigned int count = featureCount;
+ for (unsigned i = 0; i < count; i++)
+ {
+ const Feature &feature = featureZ[i];
+ hb_aat_layout_feature_type_t type = (hb_aat_layout_feature_type_t) (unsigned int) feature.featureType;
+ hb_aat_layout_feature_selector_t setting = (hb_aat_layout_feature_selector_t) (unsigned int) feature.featureSetting;
+ retry:
+ // Check whether this type/setting pair was requested in the map, and if so, apply its flags.
+ // (The search here only looks at the type and setting fields of feature_info_t.)
+ hb_aat_map_builder_t::feature_info_t info = { type, setting, false, 0 };
+ if (map->features.bsearch (info))
+ {
+ flags &= feature.disableFlags;
+ flags |= feature.enableFlags;
+ }
+ else if (type == HB_AAT_LAYOUT_FEATURE_TYPE_LETTER_CASE && setting == HB_AAT_LAYOUT_FEATURE_SELECTOR_SMALL_CAPS)
+ {
+ /* Deprecated. https://github.com/harfbuzz/harfbuzz/issues/1342 */
+ type = HB_AAT_LAYOUT_FEATURE_TYPE_LOWER_CASE;
+ setting = HB_AAT_LAYOUT_FEATURE_SELECTOR_LOWER_CASE_SMALL_CAPS;
+ goto retry;
+ }
+ }
+ }
+ return flags;
+ }
+
+ void apply (hb_aat_apply_context_t *c,
+ hb_mask_t flags) const
+ {
+ const ChainSubtable<Types> *subtable = &StructAfter<ChainSubtable<Types>> (featureZ.as_array (featureCount));
+ unsigned int count = subtableCount;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ bool reverse;
+
+ if (!(subtable->subFeatureFlags & flags))
+ goto skip;
+
+ if (!(subtable->get_coverage() & ChainSubtable<Types>::AllDirections) &&
+ HB_DIRECTION_IS_VERTICAL (c->buffer->props.direction) !=
+ bool (subtable->get_coverage() & ChainSubtable<Types>::Vertical))
+ goto skip;
+
+ /* Buffer contents is always in logical direction. Determine if
+ * we need to reverse before applying this subtable. We reverse
+ * back after if we did reverse indeed.
+ *
+ * Quoting the spac:
+ * """
+ * Bits 28 and 30 of the coverage field control the order in which
+ * glyphs are processed when the subtable is run by the layout engine.
+ * Bit 28 is used to indicate if the glyph processing direction is
+ * the same as logical order or layout order. Bit 30 is used to
+ * indicate whether glyphs are processed forwards or backwards within
+ * that order.
+
+ Bit 30 Bit 28 Interpretation for Horizontal Text
+ 0 0 The subtable is processed in layout order
+ (the same order as the glyphs, which is
+ always left-to-right).
+ 1 0 The subtable is processed in reverse layout order
+ (the order opposite that of the glyphs, which is
+ always right-to-left).
+ 0 1 The subtable is processed in logical order
+ (the same order as the characters, which may be
+ left-to-right or right-to-left).
+ 1 1 The subtable is processed in reverse logical order
+ (the order opposite that of the characters, which
+ may be right-to-left or left-to-right).
+ */
+ reverse = subtable->get_coverage () & ChainSubtable<Types>::Logical ?
+ bool (subtable->get_coverage () & ChainSubtable<Types>::Backwards) :
+ bool (subtable->get_coverage () & ChainSubtable<Types>::Backwards) !=
+ HB_DIRECTION_IS_BACKWARD (c->buffer->props.direction);
+
+ if (!c->buffer->message (c->font, "start chainsubtable %d", c->lookup_index))
+ goto skip;
+
+ if (reverse)
+ c->buffer->reverse ();
+
+ subtable->apply (c);
+
+ if (reverse)
+ c->buffer->reverse ();
+
+ (void) c->buffer->message (c->font, "end chainsubtable %d", c->lookup_index);
+
+ if (unlikely (!c->buffer->successful)) return;
+
+ skip:
+ subtable = &StructAfter<ChainSubtable<Types>> (*subtable);
+ c->set_lookup_index (c->lookup_index + 1);
+ }
+ }
+
+ unsigned int get_size () const { return length; }
+
+ bool sanitize (hb_sanitize_context_t *c, unsigned int version HB_UNUSED) const
+ {
+ TRACE_SANITIZE (this);
+ if (!length.sanitize (c) ||
+ length < min_size ||
+ !c->check_range (this, length))
+ return_trace (false);
+
+ if (!c->check_array (featureZ.arrayZ, featureCount))
+ return_trace (false);
+
+ const ChainSubtable<Types> *subtable = &StructAfter<ChainSubtable<Types>> (featureZ.as_array (featureCount));
+ unsigned int count = subtableCount;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ if (!subtable->sanitize (c))
+ return_trace (false);
+ subtable = &StructAfter<ChainSubtable<Types>> (*subtable);
+ }
+
+ return_trace (true);
+ }
+
+ protected:
+ HBUINT32 defaultFlags; /* The default specification for subtables. */
+ HBUINT32 length; /* Total byte count, including this header. */
+ HBUINT featureCount; /* Number of feature subtable entries. */
+ HBUINT subtableCount; /* The number of subtables in the chain. */
+
+ UnsizedArrayOf<Feature> featureZ; /* Features. */
+/*ChainSubtable firstSubtable;*//* Subtables. */
+/*subtableGlyphCoverageArray*/ /* Only if version >= 3. We don't use. */
+
+ public:
+ DEFINE_SIZE_MIN (8 + 2 * sizeof (HBUINT));
+};
+
+
+/*
+ * The 'mort'/'morx' Table
+ */
+
+template <typename Types, hb_tag_t TAG>
+struct mortmorx
+{
+ static constexpr hb_tag_t tableTag = TAG;
+
+ bool has_data () const { return version != 0; }
+
+ void compile_flags (const hb_aat_map_builder_t *mapper,
+ hb_aat_map_t *map) const
+ {
+ const Chain<Types> *chain = &firstChain;
+ unsigned int count = chainCount;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ map->chain_flags.push (chain->compile_flags (mapper));
+ chain = &StructAfter<Chain<Types>> (*chain);
+ }
+ }
+
+ void apply (hb_aat_apply_context_t *c) const
+ {
+ if (unlikely (!c->buffer->successful)) return;
+ c->set_lookup_index (0);
+ const Chain<Types> *chain = &firstChain;
+ unsigned int count = chainCount;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ chain->apply (c, c->plan->aat_map.chain_flags[i]);
+ if (unlikely (!c->buffer->successful)) return;
+ chain = &StructAfter<Chain<Types>> (*chain);
+ }
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (!version.sanitize (c) || !version || !chainCount.sanitize (c))
+ return_trace (false);
+
+ const Chain<Types> *chain = &firstChain;
+ unsigned int count = chainCount;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ if (!chain->sanitize (c, version))
+ return_trace (false);
+ chain = &StructAfter<Chain<Types>> (*chain);
+ }
+
+ return_trace (true);
+ }
+
+ protected:
+ HBUINT16 version; /* Version number of the glyph metamorphosis table.
+ * 1, 2, or 3. */
+ HBUINT16 unused; /* Set to 0. */
+ HBUINT32 chainCount; /* Number of metamorphosis chains contained in this
+ * table. */
+ Chain<Types> firstChain; /* Chains. */
+
+ public:
+ DEFINE_SIZE_MIN (8);
+};
+
+struct morx : mortmorx<ExtendedTypes, HB_AAT_TAG_morx> {};
+struct mort : mortmorx<ObsoleteTypes, HB_AAT_TAG_mort> {};
+
+
+} /* namespace AAT */
+
+
+#endif /* HB_AAT_LAYOUT_MORX_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-aat-layout-opbd-table.hh b/thirdparty/harfbuzz/src/hb-aat-layout-opbd-table.hh
new file mode 100644
index 0000000000..8c04a6482f
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-aat-layout-opbd-table.hh
@@ -0,0 +1,173 @@
+/*
+ * Copyright © 2019 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#ifndef HB_AAT_LAYOUT_OPBD_TABLE_HH
+#define HB_AAT_LAYOUT_OPBD_TABLE_HH
+
+#include "hb-aat-layout-common.hh"
+#include "hb-open-type.hh"
+
+/*
+ * opbd -- Optical Bounds
+ * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6opbd.html
+ */
+#define HB_AAT_TAG_opbd HB_TAG('o','p','b','d')
+
+
+namespace AAT {
+
+struct OpticalBounds
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this)));
+ }
+
+ FWORD leftSide;
+ FWORD topSide;
+ FWORD rightSide;
+ FWORD bottomSide;
+ public:
+ DEFINE_SIZE_STATIC (8);
+};
+
+struct opbdFormat0
+{
+ bool get_bounds (hb_font_t *font, hb_codepoint_t glyph_id,
+ hb_glyph_extents_t *extents, const void *base) const
+ {
+ const OffsetTo<OpticalBounds> *bounds_offset = lookupTable.get_value (glyph_id, font->face->get_num_glyphs ());
+ if (!bounds_offset) return false;
+ const OpticalBounds &bounds = base+*bounds_offset;
+
+ if (extents)
+ *extents = {
+ font->em_scale_x (bounds.leftSide),
+ font->em_scale_y (bounds.topSide),
+ font->em_scale_x (bounds.rightSide),
+ font->em_scale_y (bounds.bottomSide)
+ };
+ return true;
+ }
+
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) && lookupTable.sanitize (c, base)));
+ }
+
+ protected:
+ Lookup<OffsetTo<OpticalBounds>>
+ lookupTable; /* Lookup table associating glyphs with the four
+ * int16 values for the left-side, top-side,
+ * right-side, and bottom-side optical bounds. */
+ public:
+ DEFINE_SIZE_MIN (2);
+};
+
+struct opbdFormat1
+{
+ bool get_bounds (hb_font_t *font, hb_codepoint_t glyph_id,
+ hb_glyph_extents_t *extents, const void *base) const
+ {
+ const OffsetTo<OpticalBounds> *bounds_offset = lookupTable.get_value (glyph_id, font->face->get_num_glyphs ());
+ if (!bounds_offset) return false;
+ const OpticalBounds &bounds = base+*bounds_offset;
+
+ hb_position_t left = 0, top = 0, right = 0, bottom = 0, ignore;
+ if (font->get_glyph_contour_point (glyph_id, bounds.leftSide, &left, &ignore) ||
+ font->get_glyph_contour_point (glyph_id, bounds.topSide, &ignore, &top) ||
+ font->get_glyph_contour_point (glyph_id, bounds.rightSide, &right, &ignore) ||
+ font->get_glyph_contour_point (glyph_id, bounds.bottomSide, &ignore, &bottom))
+ {
+ if (extents)
+ *extents = {left, top, right, bottom};
+ return true;
+ }
+ return false;
+ }
+
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) && lookupTable.sanitize (c, base)));
+ }
+
+ protected:
+ Lookup<OffsetTo<OpticalBounds>>
+ lookupTable; /* Lookup table associating glyphs with the four
+ * int16 values for the left-side, top-side,
+ * right-side, and bottom-side optical bounds. */
+ public:
+ DEFINE_SIZE_MIN (2);
+};
+
+struct opbd
+{
+ static constexpr hb_tag_t tableTag = HB_AAT_TAG_opbd;
+
+ bool get_bounds (hb_font_t *font, hb_codepoint_t glyph_id,
+ hb_glyph_extents_t *extents) const
+ {
+ switch (format)
+ {
+ case 0: return u.format0.get_bounds (font, glyph_id, extents, this);
+ case 1: return u.format1.get_bounds (font, glyph_id, extents, this);
+ default:return false;
+ }
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!c->check_struct (this) || version.major != 1))
+ return_trace (false);
+
+ switch (format)
+ {
+ case 0: return_trace (u.format0.sanitize (c, this));
+ case 1: return_trace (u.format1.sanitize (c, this));
+ default:return_trace (true);
+ }
+ }
+
+ protected:
+ FixedVersion<>version; /* Version number of the optical bounds
+ * table (0x00010000 for the current version). */
+ HBUINT16 format; /* Format of the optical bounds table.
+ * Format 0 indicates distance and Format 1 indicates
+ * control point. */
+ union {
+ opbdFormat0 format0;
+ opbdFormat1 format1;
+ } u;
+ public:
+ DEFINE_SIZE_MIN (8);
+};
+
+} /* namespace AAT */
+
+
+#endif /* HB_AAT_LAYOUT_OPBD_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-aat-layout-trak-table.hh b/thirdparty/harfbuzz/src/hb-aat-layout-trak-table.hh
new file mode 100644
index 0000000000..baa1c72020
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-aat-layout-trak-table.hh
@@ -0,0 +1,230 @@
+/*
+ * Copyright © 2018 Ebrahim Byagowi
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_AAT_LAYOUT_TRAK_TABLE_HH
+#define HB_AAT_LAYOUT_TRAK_TABLE_HH
+
+#include "hb-aat-layout-common.hh"
+#include "hb-ot-layout.hh"
+#include "hb-open-type.hh"
+
+/*
+ * trak -- Tracking
+ * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6trak.html
+ */
+#define HB_AAT_TAG_trak HB_TAG('t','r','a','k')
+
+
+namespace AAT {
+
+
+struct TrackTableEntry
+{
+ friend struct TrackData;
+
+ float get_track_value () const { return track.to_float (); }
+
+ int get_value (const void *base, unsigned int index,
+ unsigned int table_size) const
+ { return (base+valuesZ).as_array (table_size)[index]; }
+
+ public:
+ bool sanitize (hb_sanitize_context_t *c, const void *base,
+ unsigned int table_size) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ (valuesZ.sanitize (c, base, table_size))));
+ }
+
+ protected:
+ HBFixed track; /* Track value for this record. */
+ NameID trackNameID; /* The 'name' table index for this track.
+ * (a short word or phrase like "loose"
+ * or "very tight") */
+ NNOffsetTo<UnsizedArrayOf<FWORD>>
+ valuesZ; /* Offset from start of tracking table to
+ * per-size tracking values for this track. */
+
+ public:
+ DEFINE_SIZE_STATIC (8);
+};
+
+struct TrackData
+{
+ float interpolate_at (unsigned int idx,
+ float target_size,
+ const TrackTableEntry &trackTableEntry,
+ const void *base) const
+ {
+ unsigned int sizes = nSizes;
+ hb_array_t<const HBFixed> size_table ((base+sizeTable).arrayZ, sizes);
+
+ float s0 = size_table[idx].to_float ();
+ float s1 = size_table[idx + 1].to_float ();
+ float t = unlikely (s0 == s1) ? 0.f : (target_size - s0) / (s1 - s0);
+ return t * trackTableEntry.get_value (base, idx + 1, sizes) +
+ (1.f - t) * trackTableEntry.get_value (base, idx, sizes);
+ }
+
+ int get_tracking (const void *base, float ptem) const
+ {
+ /*
+ * Choose track.
+ */
+ const TrackTableEntry *trackTableEntry = nullptr;
+ unsigned int count = nTracks;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ /* Note: Seems like the track entries are sorted by values. But the
+ * spec doesn't explicitly say that. It just mentions it in the example. */
+
+ /* For now we only seek for track entries with zero tracking value */
+
+ if (trackTable[i].get_track_value () == 0.f)
+ {
+ trackTableEntry = &trackTable[i];
+ break;
+ }
+ }
+ if (!trackTableEntry) return 0.;
+
+ /*
+ * Choose size.
+ */
+ unsigned int sizes = nSizes;
+ if (!sizes) return 0.;
+ if (sizes == 1) return trackTableEntry->get_value (base, 0, sizes);
+
+ hb_array_t<const HBFixed> size_table ((base+sizeTable).arrayZ, sizes);
+ unsigned int size_index;
+ for (size_index = 0; size_index < sizes - 1; size_index++)
+ if (size_table[size_index].to_float () >= ptem)
+ break;
+
+ return roundf (interpolate_at (size_index ? size_index - 1 : 0, ptem,
+ *trackTableEntry, base));
+ }
+
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ sizeTable.sanitize (c, base, nSizes) &&
+ trackTable.sanitize (c, nTracks, base, nSizes)));
+ }
+
+ protected:
+ HBUINT16 nTracks; /* Number of separate tracks included in this table. */
+ HBUINT16 nSizes; /* Number of point sizes included in this table. */
+ LNNOffsetTo<UnsizedArrayOf<HBFixed>>
+ sizeTable; /* Offset from start of the tracking table to
+ * Array[nSizes] of size values.. */
+ UnsizedArrayOf<TrackTableEntry>
+ trackTable; /* Array[nTracks] of TrackTableEntry records. */
+
+ public:
+ DEFINE_SIZE_ARRAY (8, trackTable);
+};
+
+struct trak
+{
+ static constexpr hb_tag_t tableTag = HB_AAT_TAG_trak;
+
+ bool has_data () const { return version.to_int (); }
+
+ bool apply (hb_aat_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+
+ hb_mask_t trak_mask = c->plan->trak_mask;
+
+ const float ptem = c->font->ptem;
+ if (unlikely (ptem <= 0.f))
+ return_trace (false);
+
+ hb_buffer_t *buffer = c->buffer;
+ if (HB_DIRECTION_IS_HORIZONTAL (buffer->props.direction))
+ {
+ const TrackData &trackData = this+horizData;
+ int tracking = trackData.get_tracking (this, ptem);
+ hb_position_t offset_to_add = c->font->em_scalef_x (tracking / 2);
+ hb_position_t advance_to_add = c->font->em_scalef_x (tracking);
+ foreach_grapheme (buffer, start, end)
+ {
+ if (!(buffer->info[start].mask & trak_mask)) continue;
+ buffer->pos[start].x_advance += advance_to_add;
+ buffer->pos[start].x_offset += offset_to_add;
+ }
+ }
+ else
+ {
+ const TrackData &trackData = this+vertData;
+ int tracking = trackData.get_tracking (this, ptem);
+ hb_position_t offset_to_add = c->font->em_scalef_y (tracking / 2);
+ hb_position_t advance_to_add = c->font->em_scalef_y (tracking);
+ foreach_grapheme (buffer, start, end)
+ {
+ if (!(buffer->info[start].mask & trak_mask)) continue;
+ buffer->pos[start].y_advance += advance_to_add;
+ buffer->pos[start].y_offset += offset_to_add;
+ }
+ }
+
+ return_trace (true);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+
+ return_trace (likely (c->check_struct (this) &&
+ version.major == 1 &&
+ horizData.sanitize (c, this, this) &&
+ vertData.sanitize (c, this, this)));
+ }
+
+ protected:
+ FixedVersion<>version; /* Version of the tracking table
+ * (0x00010000u for version 1.0). */
+ HBUINT16 format; /* Format of the tracking table (set to 0). */
+ OffsetTo<TrackData>
+ horizData; /* Offset from start of tracking table to TrackData
+ * for horizontal text (or 0 if none). */
+ OffsetTo<TrackData>
+ vertData; /* Offset from start of tracking table to TrackData
+ * for vertical text (or 0 if none). */
+ HBUINT16 reserved; /* Reserved. Set to 0. */
+
+ public:
+ DEFINE_SIZE_STATIC (12);
+};
+
+} /* namespace AAT */
+
+
+#endif /* HB_AAT_LAYOUT_TRAK_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-aat-layout.cc b/thirdparty/harfbuzz/src/hb-aat-layout.cc
new file mode 100644
index 0000000000..fac510e9e6
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-aat-layout.cc
@@ -0,0 +1,382 @@
+/*
+ * Copyright © 2017 Google, Inc.
+ * Copyright © 2018 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#include "hb-aat-layout.hh"
+#include "hb-aat-layout-ankr-table.hh"
+#include "hb-aat-layout-bsln-table.hh" // Just so we compile it; unused otherwise.
+#include "hb-aat-layout-feat-table.hh"
+#include "hb-aat-layout-just-table.hh" // Just so we compile it; unused otherwise.
+#include "hb-aat-layout-kerx-table.hh"
+#include "hb-aat-layout-morx-table.hh"
+#include "hb-aat-layout-trak-table.hh"
+#include "hb-aat-ltag-table.hh"
+
+
+/*
+ * hb_aat_apply_context_t
+ */
+
+/* Note: This context is used for kerning, even without AAT, hence the condition. */
+#if !defined(HB_NO_AAT) || !defined(HB_NO_OT_KERN)
+
+AAT::hb_aat_apply_context_t::hb_aat_apply_context_t (const hb_ot_shape_plan_t *plan_,
+ hb_font_t *font_,
+ hb_buffer_t *buffer_,
+ hb_blob_t *blob) :
+ plan (plan_),
+ font (font_),
+ face (font->face),
+ buffer (buffer_),
+ sanitizer (),
+ ankr_table (&Null (AAT::ankr)),
+ lookup_index (0)
+{
+ sanitizer.init (blob);
+ sanitizer.set_num_glyphs (face->get_num_glyphs ());
+ sanitizer.start_processing ();
+ sanitizer.set_max_ops (HB_SANITIZE_MAX_OPS_MAX);
+}
+
+AAT::hb_aat_apply_context_t::~hb_aat_apply_context_t ()
+{ sanitizer.end_processing (); }
+
+void
+AAT::hb_aat_apply_context_t::set_ankr_table (const AAT::ankr *ankr_table_)
+{ ankr_table = ankr_table_; }
+
+#endif
+
+
+/**
+ * SECTION:hb-aat-layout
+ * @title: hb-aat-layout
+ * @short_description: Apple Advanced Typography Layout
+ * @include: hb-aat.h
+ *
+ * Functions for querying OpenType Layout features in the font face.
+ **/
+
+
+#if !defined(HB_NO_AAT) || defined(HAVE_CORETEXT)
+
+/* Table data courtesy of Apple. Converted from mnemonics to integers
+ * when moving to this file. */
+static const hb_aat_feature_mapping_t feature_mappings[] =
+{
+ {HB_TAG ('a','f','r','c'), HB_AAT_LAYOUT_FEATURE_TYPE_FRACTIONS, HB_AAT_LAYOUT_FEATURE_SELECTOR_VERTICAL_FRACTIONS, HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_FRACTIONS},
+ {HB_TAG ('c','2','p','c'), HB_AAT_LAYOUT_FEATURE_TYPE_UPPER_CASE, HB_AAT_LAYOUT_FEATURE_SELECTOR_UPPER_CASE_PETITE_CAPS, HB_AAT_LAYOUT_FEATURE_SELECTOR_DEFAULT_UPPER_CASE},
+ {HB_TAG ('c','2','s','c'), HB_AAT_LAYOUT_FEATURE_TYPE_UPPER_CASE, HB_AAT_LAYOUT_FEATURE_SELECTOR_UPPER_CASE_SMALL_CAPS, HB_AAT_LAYOUT_FEATURE_SELECTOR_DEFAULT_UPPER_CASE},
+ {HB_TAG ('c','a','l','t'), HB_AAT_LAYOUT_FEATURE_TYPE_CONTEXTUAL_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_CONTEXTUAL_ALTERNATES_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_CONTEXTUAL_ALTERNATES_OFF},
+ {HB_TAG ('c','a','s','e'), HB_AAT_LAYOUT_FEATURE_TYPE_CASE_SENSITIVE_LAYOUT, HB_AAT_LAYOUT_FEATURE_SELECTOR_CASE_SENSITIVE_LAYOUT_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_CASE_SENSITIVE_LAYOUT_OFF},
+ {HB_TAG ('c','l','i','g'), HB_AAT_LAYOUT_FEATURE_TYPE_LIGATURES, HB_AAT_LAYOUT_FEATURE_SELECTOR_CONTEXTUAL_LIGATURES_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_CONTEXTUAL_LIGATURES_OFF},
+ {HB_TAG ('c','p','s','p'), HB_AAT_LAYOUT_FEATURE_TYPE_CASE_SENSITIVE_LAYOUT, HB_AAT_LAYOUT_FEATURE_SELECTOR_CASE_SENSITIVE_SPACING_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_CASE_SENSITIVE_SPACING_OFF},
+ {HB_TAG ('c','s','w','h'), HB_AAT_LAYOUT_FEATURE_TYPE_CONTEXTUAL_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_CONTEXTUAL_SWASH_ALTERNATES_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_CONTEXTUAL_SWASH_ALTERNATES_OFF},
+ {HB_TAG ('d','l','i','g'), HB_AAT_LAYOUT_FEATURE_TYPE_LIGATURES, HB_AAT_LAYOUT_FEATURE_SELECTOR_RARE_LIGATURES_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_RARE_LIGATURES_OFF},
+ {HB_TAG ('e','x','p','t'), HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_SHAPE, HB_AAT_LAYOUT_FEATURE_SELECTOR_EXPERT_CHARACTERS, (hb_aat_layout_feature_selector_t) 16},
+ {HB_TAG ('f','r','a','c'), HB_AAT_LAYOUT_FEATURE_TYPE_FRACTIONS, HB_AAT_LAYOUT_FEATURE_SELECTOR_DIAGONAL_FRACTIONS, HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_FRACTIONS},
+ {HB_TAG ('f','w','i','d'), HB_AAT_LAYOUT_FEATURE_TYPE_TEXT_SPACING, HB_AAT_LAYOUT_FEATURE_SELECTOR_MONOSPACED_TEXT, (hb_aat_layout_feature_selector_t) 7},
+ {HB_TAG ('h','a','l','t'), HB_AAT_LAYOUT_FEATURE_TYPE_TEXT_SPACING, HB_AAT_LAYOUT_FEATURE_SELECTOR_ALT_HALF_WIDTH_TEXT, (hb_aat_layout_feature_selector_t) 7},
+ {HB_TAG ('h','i','s','t'), HB_AAT_LAYOUT_FEATURE_TYPE_LIGATURES, HB_AAT_LAYOUT_FEATURE_SELECTOR_HISTORICAL_LIGATURES_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_HISTORICAL_LIGATURES_OFF},
+ {HB_TAG ('h','k','n','a'), HB_AAT_LAYOUT_FEATURE_TYPE_ALTERNATE_KANA, HB_AAT_LAYOUT_FEATURE_SELECTOR_ALTERNATE_HORIZ_KANA_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_ALTERNATE_HORIZ_KANA_OFF},
+ {HB_TAG ('h','l','i','g'), HB_AAT_LAYOUT_FEATURE_TYPE_LIGATURES, HB_AAT_LAYOUT_FEATURE_SELECTOR_HISTORICAL_LIGATURES_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_HISTORICAL_LIGATURES_OFF},
+ {HB_TAG ('h','n','g','l'), HB_AAT_LAYOUT_FEATURE_TYPE_TRANSLITERATION, HB_AAT_LAYOUT_FEATURE_SELECTOR_HANJA_TO_HANGUL, HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_TRANSLITERATION},
+ {HB_TAG ('h','o','j','o'), HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_SHAPE, HB_AAT_LAYOUT_FEATURE_SELECTOR_HOJO_CHARACTERS, (hb_aat_layout_feature_selector_t) 16},
+ {HB_TAG ('h','w','i','d'), HB_AAT_LAYOUT_FEATURE_TYPE_TEXT_SPACING, HB_AAT_LAYOUT_FEATURE_SELECTOR_HALF_WIDTH_TEXT, (hb_aat_layout_feature_selector_t) 7},
+ {HB_TAG ('i','t','a','l'), HB_AAT_LAYOUT_FEATURE_TYPE_ITALIC_CJK_ROMAN, HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_ITALIC_ROMAN_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_ITALIC_ROMAN_OFF},
+ {HB_TAG ('j','p','0','4'), HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_SHAPE, HB_AAT_LAYOUT_FEATURE_SELECTOR_JIS2004_CHARACTERS, (hb_aat_layout_feature_selector_t) 16},
+ {HB_TAG ('j','p','7','8'), HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_SHAPE, HB_AAT_LAYOUT_FEATURE_SELECTOR_JIS1978_CHARACTERS, (hb_aat_layout_feature_selector_t) 16},
+ {HB_TAG ('j','p','8','3'), HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_SHAPE, HB_AAT_LAYOUT_FEATURE_SELECTOR_JIS1983_CHARACTERS, (hb_aat_layout_feature_selector_t) 16},
+ {HB_TAG ('j','p','9','0'), HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_SHAPE, HB_AAT_LAYOUT_FEATURE_SELECTOR_JIS1990_CHARACTERS, (hb_aat_layout_feature_selector_t) 16},
+ {HB_TAG ('l','i','g','a'), HB_AAT_LAYOUT_FEATURE_TYPE_LIGATURES, HB_AAT_LAYOUT_FEATURE_SELECTOR_COMMON_LIGATURES_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_COMMON_LIGATURES_OFF},
+ {HB_TAG ('l','n','u','m'), HB_AAT_LAYOUT_FEATURE_TYPE_NUMBER_CASE, HB_AAT_LAYOUT_FEATURE_SELECTOR_UPPER_CASE_NUMBERS, (hb_aat_layout_feature_selector_t) 2},
+ {HB_TAG ('m','g','r','k'), HB_AAT_LAYOUT_FEATURE_TYPE_MATHEMATICAL_EXTRAS, HB_AAT_LAYOUT_FEATURE_SELECTOR_MATHEMATICAL_GREEK_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_MATHEMATICAL_GREEK_OFF},
+ {HB_TAG ('n','l','c','k'), HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_SHAPE, HB_AAT_LAYOUT_FEATURE_SELECTOR_NLCCHARACTERS, (hb_aat_layout_feature_selector_t) 16},
+ {HB_TAG ('o','n','u','m'), HB_AAT_LAYOUT_FEATURE_TYPE_NUMBER_CASE, HB_AAT_LAYOUT_FEATURE_SELECTOR_LOWER_CASE_NUMBERS, (hb_aat_layout_feature_selector_t) 2},
+ {HB_TAG ('o','r','d','n'), HB_AAT_LAYOUT_FEATURE_TYPE_VERTICAL_POSITION, HB_AAT_LAYOUT_FEATURE_SELECTOR_ORDINALS, HB_AAT_LAYOUT_FEATURE_SELECTOR_NORMAL_POSITION},
+ {HB_TAG ('p','a','l','t'), HB_AAT_LAYOUT_FEATURE_TYPE_TEXT_SPACING, HB_AAT_LAYOUT_FEATURE_SELECTOR_ALT_PROPORTIONAL_TEXT, (hb_aat_layout_feature_selector_t) 7},
+ {HB_TAG ('p','c','a','p'), HB_AAT_LAYOUT_FEATURE_TYPE_LOWER_CASE, HB_AAT_LAYOUT_FEATURE_SELECTOR_LOWER_CASE_PETITE_CAPS, HB_AAT_LAYOUT_FEATURE_SELECTOR_DEFAULT_LOWER_CASE},
+ {HB_TAG ('p','k','n','a'), HB_AAT_LAYOUT_FEATURE_TYPE_TEXT_SPACING, HB_AAT_LAYOUT_FEATURE_SELECTOR_PROPORTIONAL_TEXT, (hb_aat_layout_feature_selector_t) 7},
+ {HB_TAG ('p','n','u','m'), HB_AAT_LAYOUT_FEATURE_TYPE_NUMBER_SPACING, HB_AAT_LAYOUT_FEATURE_SELECTOR_PROPORTIONAL_NUMBERS, (hb_aat_layout_feature_selector_t) 4},
+ {HB_TAG ('p','w','i','d'), HB_AAT_LAYOUT_FEATURE_TYPE_TEXT_SPACING, HB_AAT_LAYOUT_FEATURE_SELECTOR_PROPORTIONAL_TEXT, (hb_aat_layout_feature_selector_t) 7},
+ {HB_TAG ('q','w','i','d'), HB_AAT_LAYOUT_FEATURE_TYPE_TEXT_SPACING, HB_AAT_LAYOUT_FEATURE_SELECTOR_QUARTER_WIDTH_TEXT, (hb_aat_layout_feature_selector_t) 7},
+ {HB_TAG ('r','u','b','y'), HB_AAT_LAYOUT_FEATURE_TYPE_RUBY_KANA, HB_AAT_LAYOUT_FEATURE_SELECTOR_RUBY_KANA_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_RUBY_KANA_OFF},
+ {HB_TAG ('s','i','n','f'), HB_AAT_LAYOUT_FEATURE_TYPE_VERTICAL_POSITION, HB_AAT_LAYOUT_FEATURE_SELECTOR_SCIENTIFIC_INFERIORS, HB_AAT_LAYOUT_FEATURE_SELECTOR_NORMAL_POSITION},
+ {HB_TAG ('s','m','c','p'), HB_AAT_LAYOUT_FEATURE_TYPE_LOWER_CASE, HB_AAT_LAYOUT_FEATURE_SELECTOR_LOWER_CASE_SMALL_CAPS, HB_AAT_LAYOUT_FEATURE_SELECTOR_DEFAULT_LOWER_CASE},
+ {HB_TAG ('s','m','p','l'), HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_SHAPE, HB_AAT_LAYOUT_FEATURE_SELECTOR_SIMPLIFIED_CHARACTERS, (hb_aat_layout_feature_selector_t) 16},
+ {HB_TAG ('s','s','0','1'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_ONE_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_ONE_OFF},
+ {HB_TAG ('s','s','0','2'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TWO_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TWO_OFF},
+ {HB_TAG ('s','s','0','3'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_THREE_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_THREE_OFF},
+ {HB_TAG ('s','s','0','4'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FOUR_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FOUR_OFF},
+ {HB_TAG ('s','s','0','5'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FIVE_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FIVE_OFF},
+ {HB_TAG ('s','s','0','6'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SIX_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SIX_OFF},
+ {HB_TAG ('s','s','0','7'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SEVEN_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SEVEN_OFF},
+ {HB_TAG ('s','s','0','8'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_EIGHT_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_EIGHT_OFF},
+ {HB_TAG ('s','s','0','9'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_NINE_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_NINE_OFF},
+ {HB_TAG ('s','s','1','0'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TEN_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TEN_OFF},
+ {HB_TAG ('s','s','1','1'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_ELEVEN_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_ELEVEN_OFF},
+ {HB_TAG ('s','s','1','2'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TWELVE_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TWELVE_OFF},
+ {HB_TAG ('s','s','1','3'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_THIRTEEN_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_THIRTEEN_OFF},
+ {HB_TAG ('s','s','1','4'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FOURTEEN_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FOURTEEN_OFF},
+ {HB_TAG ('s','s','1','5'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FIFTEEN_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FIFTEEN_OFF},
+ {HB_TAG ('s','s','1','6'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SIXTEEN_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SIXTEEN_OFF},
+ {HB_TAG ('s','s','1','7'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SEVENTEEN_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SEVENTEEN_OFF},
+ {HB_TAG ('s','s','1','8'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_EIGHTEEN_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_EIGHTEEN_OFF},
+ {HB_TAG ('s','s','1','9'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_NINETEEN_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_NINETEEN_OFF},
+ {HB_TAG ('s','s','2','0'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TWENTY_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TWENTY_OFF},
+ {HB_TAG ('s','u','b','s'), HB_AAT_LAYOUT_FEATURE_TYPE_VERTICAL_POSITION, HB_AAT_LAYOUT_FEATURE_SELECTOR_INFERIORS, HB_AAT_LAYOUT_FEATURE_SELECTOR_NORMAL_POSITION},
+ {HB_TAG ('s','u','p','s'), HB_AAT_LAYOUT_FEATURE_TYPE_VERTICAL_POSITION, HB_AAT_LAYOUT_FEATURE_SELECTOR_SUPERIORS, HB_AAT_LAYOUT_FEATURE_SELECTOR_NORMAL_POSITION},
+ {HB_TAG ('s','w','s','h'), HB_AAT_LAYOUT_FEATURE_TYPE_CONTEXTUAL_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_SWASH_ALTERNATES_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_SWASH_ALTERNATES_OFF},
+ {HB_TAG ('t','i','t','l'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLE_OPTIONS, HB_AAT_LAYOUT_FEATURE_SELECTOR_TITLING_CAPS, HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_STYLE_OPTIONS},
+ {HB_TAG ('t','n','a','m'), HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_SHAPE, HB_AAT_LAYOUT_FEATURE_SELECTOR_TRADITIONAL_NAMES_CHARACTERS, (hb_aat_layout_feature_selector_t) 16},
+ {HB_TAG ('t','n','u','m'), HB_AAT_LAYOUT_FEATURE_TYPE_NUMBER_SPACING, HB_AAT_LAYOUT_FEATURE_SELECTOR_MONOSPACED_NUMBERS, (hb_aat_layout_feature_selector_t) 4},
+ {HB_TAG ('t','r','a','d'), HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_SHAPE, HB_AAT_LAYOUT_FEATURE_SELECTOR_TRADITIONAL_CHARACTERS, (hb_aat_layout_feature_selector_t) 16},
+ {HB_TAG ('t','w','i','d'), HB_AAT_LAYOUT_FEATURE_TYPE_TEXT_SPACING, HB_AAT_LAYOUT_FEATURE_SELECTOR_THIRD_WIDTH_TEXT, (hb_aat_layout_feature_selector_t) 7},
+ {HB_TAG ('u','n','i','c'), HB_AAT_LAYOUT_FEATURE_TYPE_LETTER_CASE, (hb_aat_layout_feature_selector_t) 14, (hb_aat_layout_feature_selector_t) 15},
+ {HB_TAG ('v','a','l','t'), HB_AAT_LAYOUT_FEATURE_TYPE_TEXT_SPACING, HB_AAT_LAYOUT_FEATURE_SELECTOR_ALT_PROPORTIONAL_TEXT, (hb_aat_layout_feature_selector_t) 7},
+ {HB_TAG ('v','e','r','t'), HB_AAT_LAYOUT_FEATURE_TYPE_VERTICAL_SUBSTITUTION, HB_AAT_LAYOUT_FEATURE_SELECTOR_SUBSTITUTE_VERTICAL_FORMS_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_SUBSTITUTE_VERTICAL_FORMS_OFF},
+ {HB_TAG ('v','h','a','l'), HB_AAT_LAYOUT_FEATURE_TYPE_TEXT_SPACING, HB_AAT_LAYOUT_FEATURE_SELECTOR_ALT_HALF_WIDTH_TEXT, (hb_aat_layout_feature_selector_t) 7},
+ {HB_TAG ('v','k','n','a'), HB_AAT_LAYOUT_FEATURE_TYPE_ALTERNATE_KANA, HB_AAT_LAYOUT_FEATURE_SELECTOR_ALTERNATE_VERT_KANA_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_ALTERNATE_VERT_KANA_OFF},
+ {HB_TAG ('v','p','a','l'), HB_AAT_LAYOUT_FEATURE_TYPE_TEXT_SPACING, HB_AAT_LAYOUT_FEATURE_SELECTOR_ALT_PROPORTIONAL_TEXT, (hb_aat_layout_feature_selector_t) 7},
+ {HB_TAG ('v','r','t','2'), HB_AAT_LAYOUT_FEATURE_TYPE_VERTICAL_SUBSTITUTION, HB_AAT_LAYOUT_FEATURE_SELECTOR_SUBSTITUTE_VERTICAL_FORMS_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_SUBSTITUTE_VERTICAL_FORMS_OFF},
+ {HB_TAG ('z','e','r','o'), HB_AAT_LAYOUT_FEATURE_TYPE_TYPOGRAPHIC_EXTRAS, HB_AAT_LAYOUT_FEATURE_SELECTOR_SLASHED_ZERO_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_SLASHED_ZERO_OFF},
+};
+
+const hb_aat_feature_mapping_t *
+hb_aat_layout_find_feature_mapping (hb_tag_t tag)
+{
+ return hb_sorted_array (feature_mappings).bsearch (tag);
+}
+#endif
+
+
+#ifndef HB_NO_AAT
+
+/*
+ * mort/morx/kerx/trak
+ */
+
+
+void
+hb_aat_layout_compile_map (const hb_aat_map_builder_t *mapper,
+ hb_aat_map_t *map)
+{
+ const AAT::morx& morx = *mapper->face->table.morx;
+ if (morx.has_data ())
+ {
+ morx.compile_flags (mapper, map);
+ return;
+ }
+
+ const AAT::mort& mort = *mapper->face->table.mort;
+ if (mort.has_data ())
+ {
+ mort.compile_flags (mapper, map);
+ return;
+ }
+}
+
+
+/*
+ * hb_aat_layout_has_substitution:
+ * @face:
+ *
+ * Returns:
+ * Since: 2.3.0
+ */
+hb_bool_t
+hb_aat_layout_has_substitution (hb_face_t *face)
+{
+ return face->table.morx->has_data () ||
+ face->table.mort->has_data ();
+}
+
+void
+hb_aat_layout_substitute (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer)
+{
+ hb_blob_t *morx_blob = font->face->table.morx.get_blob ();
+ const AAT::morx& morx = *morx_blob->as<AAT::morx> ();
+ if (morx.has_data ())
+ {
+ AAT::hb_aat_apply_context_t c (plan, font, buffer, morx_blob);
+ morx.apply (&c);
+ return;
+ }
+
+ hb_blob_t *mort_blob = font->face->table.mort.get_blob ();
+ const AAT::mort& mort = *mort_blob->as<AAT::mort> ();
+ if (mort.has_data ())
+ {
+ AAT::hb_aat_apply_context_t c (plan, font, buffer, mort_blob);
+ mort.apply (&c);
+ return;
+ }
+}
+
+void
+hb_aat_layout_zero_width_deleted_glyphs (hb_buffer_t *buffer)
+{
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ hb_glyph_position_t *pos = buffer->pos;
+ for (unsigned int i = 0; i < count; i++)
+ if (unlikely (info[i].codepoint == AAT::DELETED_GLYPH))
+ pos[i].x_advance = pos[i].y_advance = pos[i].x_offset = pos[i].y_offset = 0;
+}
+
+static bool
+is_deleted_glyph (const hb_glyph_info_t *info)
+{
+ return info->codepoint == AAT::DELETED_GLYPH;
+}
+
+void
+hb_aat_layout_remove_deleted_glyphs (hb_buffer_t *buffer)
+{
+ hb_ot_layout_delete_glyphs_inplace (buffer, is_deleted_glyph);
+}
+
+/*
+ * hb_aat_layout_has_positioning:
+ * @face:
+ *
+ * Returns:
+ * Since: 2.3.0
+ */
+hb_bool_t
+hb_aat_layout_has_positioning (hb_face_t *face)
+{
+ return face->table.kerx->has_data ();
+}
+
+void
+hb_aat_layout_position (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer)
+{
+ hb_blob_t *kerx_blob = font->face->table.kerx.get_blob ();
+ const AAT::kerx& kerx = *kerx_blob->as<AAT::kerx> ();
+
+ AAT::hb_aat_apply_context_t c (plan, font, buffer, kerx_blob);
+ c.set_ankr_table (font->face->table.ankr.get ());
+ kerx.apply (&c);
+}
+
+
+/*
+ * hb_aat_layout_has_tracking:
+ * @face:
+ *
+ * Returns:
+ * Since: 2.3.0
+ */
+hb_bool_t
+hb_aat_layout_has_tracking (hb_face_t *face)
+{
+ return face->table.trak->has_data ();
+}
+
+void
+hb_aat_layout_track (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer)
+{
+ const AAT::trak& trak = *font->face->table.trak;
+
+ AAT::hb_aat_apply_context_t c (plan, font, buffer);
+ trak.apply (&c);
+}
+
+/**
+ * hb_aat_layout_get_feature_types:
+ * @face: a face object
+ * @start_offset: iteration's start offset
+ * @feature_count:(inout) (allow-none): buffer size as input, filled size as output
+ * @features: (out caller-allocates) (array length=feature_count): features buffer
+ *
+ * Return value: Number of all available feature types.
+ *
+ * Since: 2.2.0
+ */
+unsigned int
+hb_aat_layout_get_feature_types (hb_face_t *face,
+ unsigned int start_offset,
+ unsigned int *feature_count, /* IN/OUT. May be NULL. */
+ hb_aat_layout_feature_type_t *features /* OUT. May be NULL. */)
+{
+ return face->table.feat->get_feature_types (start_offset, feature_count, features);
+}
+
+/**
+ * hb_aat_layout_feature_type_get_name_id:
+ * @face: a face object
+ * @feature_type: feature id
+ *
+ * Return value: Name ID index
+ *
+ * Since: 2.2.0
+ */
+hb_ot_name_id_t
+hb_aat_layout_feature_type_get_name_id (hb_face_t *face,
+ hb_aat_layout_feature_type_t feature_type)
+{
+ return face->table.feat->get_feature_name_id (feature_type);
+}
+
+/**
+ * hb_aat_layout_feature_type_get_selectors:
+ * @face: a face object
+ * @feature_type: feature id
+ * @start_offset: iteration's start offset
+ * @selector_count: (inout) (allow-none): buffer size as input, filled size as output
+ * @selectors: (out caller-allocates) (array length=selector_count): settings buffer
+ * @default_index: (out) (allow-none): index of default selector if any
+ *
+ * If upon return, @default_index is set to #HB_AAT_LAYOUT_NO_SELECTOR_INDEX, then
+ * the feature type is non-exclusive. Otherwise, @default_index is the index of
+ * the selector that is selected by default.
+ *
+ * Return value: Number of all available feature selectors.
+ *
+ * Since: 2.2.0
+ */
+unsigned int
+hb_aat_layout_feature_type_get_selector_infos (hb_face_t *face,
+ hb_aat_layout_feature_type_t feature_type,
+ unsigned int start_offset,
+ unsigned int *selector_count, /* IN/OUT. May be NULL. */
+ hb_aat_layout_feature_selector_info_t *selectors, /* OUT. May be NULL. */
+ unsigned int *default_index /* OUT. May be NULL. */)
+{
+ return face->table.feat->get_selector_infos (feature_type, start_offset, selector_count, selectors, default_index);
+}
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-aat-layout.h b/thirdparty/harfbuzz/src/hb-aat-layout.h
new file mode 100644
index 0000000000..b617e8b703
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-aat-layout.h
@@ -0,0 +1,486 @@
+/*
+ * Copyright © 2018 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#ifndef HB_AAT_H_IN
+#error "Include <hb-aat.h> instead."
+#endif
+
+#ifndef HB_AAT_LAYOUT_H
+#define HB_AAT_LAYOUT_H
+
+#include "hb.h"
+
+#include "hb-ot.h"
+
+HB_BEGIN_DECLS
+
+/**
+ * hb_aat_layout_feature_type_t:
+ *
+ *
+ * Since: 2.2.0
+ */
+typedef enum
+{
+ HB_AAT_LAYOUT_FEATURE_TYPE_INVALID = 0xFFFF,
+
+ HB_AAT_LAYOUT_FEATURE_TYPE_ALL_TYPOGRAPHIC = 0,
+ HB_AAT_LAYOUT_FEATURE_TYPE_LIGATURES = 1,
+ HB_AAT_LAYOUT_FEATURE_TYPE_CURISVE_CONNECTION = 2,
+ HB_AAT_LAYOUT_FEATURE_TYPE_LETTER_CASE = 3,
+ HB_AAT_LAYOUT_FEATURE_TYPE_VERTICAL_SUBSTITUTION = 4,
+ HB_AAT_LAYOUT_FEATURE_TYPE_LINGUISTIC_REARRANGEMENT = 5,
+ HB_AAT_LAYOUT_FEATURE_TYPE_NUMBER_SPACING = 6,
+ HB_AAT_LAYOUT_FEATURE_TYPE_SMART_SWASH_TYPE = 8,
+ HB_AAT_LAYOUT_FEATURE_TYPE_DIACRITICS_TYPE = 9,
+ HB_AAT_LAYOUT_FEATURE_TYPE_VERTICAL_POSITION = 10,
+ HB_AAT_LAYOUT_FEATURE_TYPE_FRACTIONS = 11,
+ HB_AAT_LAYOUT_FEATURE_TYPE_OVERLAPPING_CHARACTERS_TYPE = 13,
+ HB_AAT_LAYOUT_FEATURE_TYPE_TYPOGRAPHIC_EXTRAS = 14,
+ HB_AAT_LAYOUT_FEATURE_TYPE_MATHEMATICAL_EXTRAS = 15,
+ HB_AAT_LAYOUT_FEATURE_TYPE_ORNAMENT_SETS_TYPE = 16,
+ HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_ALTERNATIVES = 17,
+ HB_AAT_LAYOUT_FEATURE_TYPE_DESIGN_COMPLEXITY_TYPE = 18,
+ HB_AAT_LAYOUT_FEATURE_TYPE_STYLE_OPTIONS = 19,
+ HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_SHAPE = 20,
+ HB_AAT_LAYOUT_FEATURE_TYPE_NUMBER_CASE = 21,
+ HB_AAT_LAYOUT_FEATURE_TYPE_TEXT_SPACING = 22,
+ HB_AAT_LAYOUT_FEATURE_TYPE_TRANSLITERATION = 23,
+ HB_AAT_LAYOUT_FEATURE_TYPE_ANNOTATION_TYPE = 24,
+ HB_AAT_LAYOUT_FEATURE_TYPE_KANA_SPACING_TYPE = 25,
+ HB_AAT_LAYOUT_FEATURE_TYPE_IDEOGRAPHIC_SPACING_TYPE = 26,
+ HB_AAT_LAYOUT_FEATURE_TYPE_UNICODE_DECOMPOSITION_TYPE = 27,
+ HB_AAT_LAYOUT_FEATURE_TYPE_RUBY_KANA = 28,
+ HB_AAT_LAYOUT_FEATURE_TYPE_CJK_SYMBOL_ALTERNATIVES_TYPE = 29,
+ HB_AAT_LAYOUT_FEATURE_TYPE_IDEOGRAPHIC_ALTERNATIVES_TYPE = 30,
+ HB_AAT_LAYOUT_FEATURE_TYPE_CJK_VERTICAL_ROMAN_PLACEMENT_TYPE = 31,
+ HB_AAT_LAYOUT_FEATURE_TYPE_ITALIC_CJK_ROMAN = 32,
+ HB_AAT_LAYOUT_FEATURE_TYPE_CASE_SENSITIVE_LAYOUT = 33,
+ HB_AAT_LAYOUT_FEATURE_TYPE_ALTERNATE_KANA = 34,
+ HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES = 35,
+ HB_AAT_LAYOUT_FEATURE_TYPE_CONTEXTUAL_ALTERNATIVES = 36,
+ HB_AAT_LAYOUT_FEATURE_TYPE_LOWER_CASE = 37,
+ HB_AAT_LAYOUT_FEATURE_TYPE_UPPER_CASE = 38,
+ HB_AAT_LAYOUT_FEATURE_TYPE_LANGUAGE_TAG_TYPE = 39,
+ HB_AAT_LAYOUT_FEATURE_TYPE_CJK_ROMAN_SPACING_TYPE = 103,
+
+ _HB_AAT_LAYOUT_FEATURE_TYPE_MAX_VALUE = HB_TAG_MAX_SIGNED /*< skip >*/
+} hb_aat_layout_feature_type_t;
+
+/**
+ * hb_aat_layout_feature_selector_t:
+ *
+ *
+ * Since: 2.2.0
+ */
+typedef enum
+{
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_INVALID = 0xFFFF,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_ALL_TYPOGRAPHIC */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_ALL_TYPE_FEATURES_ON = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_ALL_TYPE_FEATURES_OFF = 1,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_LIGATURES */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_REQUIRED_LIGATURES_ON = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_REQUIRED_LIGATURES_OFF = 1,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_COMMON_LIGATURES_ON = 2,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_COMMON_LIGATURES_OFF = 3,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_RARE_LIGATURES_ON = 4,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_RARE_LIGATURES_OFF = 5,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_LOGOS_ON = 6,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_LOGOS_OFF = 7,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_REBUS_PICTURES_ON = 8,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_REBUS_PICTURES_OFF = 9,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_DIPHTHONG_LIGATURES_ON = 10,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_DIPHTHONG_LIGATURES_OFF = 11,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_SQUARED_LIGATURES_ON = 12,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_SQUARED_LIGATURES_OFF = 13,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_ABBREV_SQUARED_LIGATURES_ON = 14,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_ABBREV_SQUARED_LIGATURES_OFF = 15,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_SYMBOL_LIGATURES_ON = 16,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_SYMBOL_LIGATURES_OFF = 17,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_CONTEXTUAL_LIGATURES_ON = 18,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_CONTEXTUAL_LIGATURES_OFF = 19,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_HISTORICAL_LIGATURES_ON = 20,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_HISTORICAL_LIGATURES_OFF = 21,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_LIGATURES */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_UNCONNECTED = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_PARTIALLY_CONNECTED = 1,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_CURSIVE = 2,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_LETTER_CASE */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_UPPER_AND_LOWER_CASE = 0, /* deprecated */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_ALL_CAPS = 1, /* deprecated */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_ALL_LOWER_CASE = 2, /* deprecated */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_SMALL_CAPS = 3, /* deprecated */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_INITIAL_CAPS = 4, /* deprecated */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_INITIAL_CAPS_AND_SMALL_CAPS = 5, /* deprecated */
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_VERTICAL_SUBSTITUTION */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_SUBSTITUTE_VERTICAL_FORMS_ON = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_SUBSTITUTE_VERTICAL_FORMS_OFF = 1,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_LINGUISTIC_REARRANGEMENT */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_LINGUISTIC_REARRANGEMENT_ON = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_LINGUISTIC_REARRANGEMENT_OFF = 1,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_NUMBER_SPACING */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_MONOSPACED_NUMBERS = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_PROPORTIONAL_NUMBERS = 1,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_THIRD_WIDTH_NUMBERS = 2,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_QUARTER_WIDTH_NUMBERS = 3,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_SMART_SWASH_TYPE */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_WORD_INITIAL_SWASHES_ON = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_WORD_INITIAL_SWASHES_OFF = 1,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_WORD_FINAL_SWASHES_ON = 2,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_WORD_FINAL_SWASHES_OFF = 3,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_LINE_INITIAL_SWASHES_ON = 4,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_LINE_INITIAL_SWASHES_OFF = 5,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_LINE_FINAL_SWASHES_ON = 6,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_LINE_FINAL_SWASHES_OFF = 7,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_NON_FINAL_SWASHES_ON = 8,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_NON_FINAL_SWASHES_OFF = 9,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_DIACRITICS_TYPE */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_SHOW_DIACRITICS = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_HIDE_DIACRITICS = 1,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_DECOMPOSE_DIACRITICS = 2,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_VERTICAL_POSITION */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_NORMAL_POSITION = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_SUPERIORS = 1,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_INFERIORS = 2,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_ORDINALS = 3,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_SCIENTIFIC_INFERIORS = 4,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_FRACTIONS */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_FRACTIONS = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_VERTICAL_FRACTIONS = 1,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_DIAGONAL_FRACTIONS = 2,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_OVERLAPPING_CHARACTERS_TYPE */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_PREVENT_OVERLAP_ON = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_PREVENT_OVERLAP_OFF = 1,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_TYPOGRAPHIC_EXTRAS */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_HYPHENS_TO_EM_DASH_ON = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_HYPHENS_TO_EM_DASH_OFF = 1,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_HYPHEN_TO_EN_DASH_ON = 2,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_HYPHEN_TO_EN_DASH_OFF = 3,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_SLASHED_ZERO_ON = 4,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_SLASHED_ZERO_OFF = 5,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_FORM_INTERROBANG_ON = 6,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_FORM_INTERROBANG_OFF = 7,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_SMART_QUOTES_ON = 8,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_SMART_QUOTES_OFF = 9,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_PERIODS_TO_ELLIPSIS_ON = 10,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_PERIODS_TO_ELLIPSIS_OFF = 11,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_MATHEMATICAL_EXTRAS */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_HYPHEN_TO_MINUS_ON = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_HYPHEN_TO_MINUS_OFF = 1,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_ASTERISK_TO_MULTIPLY_ON = 2,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_ASTERISK_TO_MULTIPLY_OFF = 3,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_SLASH_TO_DIVIDE_ON = 4,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_SLASH_TO_DIVIDE_OFF = 5,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_INEQUALITY_LIGATURES_ON = 6,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_INEQUALITY_LIGATURES_OFF = 7,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_EXPONENTS_ON = 8,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_EXPONENTS_OFF = 9,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_MATHEMATICAL_GREEK_ON = 10,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_MATHEMATICAL_GREEK_OFF = 11,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_ORNAMENT_SETS_TYPE */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_ORNAMENTS = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_DINGBATS = 1,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_PI_CHARACTERS = 2,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_FLEURONS = 3,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_DECORATIVE_BORDERS = 4,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_INTERNATIONAL_SYMBOLS = 5,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_MATH_SYMBOLS = 6,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_ALTERNATIVES */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_ALTERNATES = 0,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_DESIGN_COMPLEXITY_TYPE */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_DESIGN_LEVEL1 = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_DESIGN_LEVEL2 = 1,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_DESIGN_LEVEL3 = 2,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_DESIGN_LEVEL4 = 3,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_DESIGN_LEVEL5 = 4,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_STYLE_OPTIONS */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_STYLE_OPTIONS = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_DISPLAY_TEXT = 1,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_ENGRAVED_TEXT = 2,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_ILLUMINATED_CAPS = 3,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_TITLING_CAPS = 4,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_TALL_CAPS = 5,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_SHAPE */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_TRADITIONAL_CHARACTERS = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_SIMPLIFIED_CHARACTERS = 1,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_JIS1978_CHARACTERS = 2,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_JIS1983_CHARACTERS = 3,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_JIS1990_CHARACTERS = 4,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_TRADITIONAL_ALT_ONE = 5,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_TRADITIONAL_ALT_TWO = 6,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_TRADITIONAL_ALT_THREE = 7,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_TRADITIONAL_ALT_FOUR = 8,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_TRADITIONAL_ALT_FIVE = 9,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_EXPERT_CHARACTERS = 10,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_JIS2004_CHARACTERS = 11,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_HOJO_CHARACTERS = 12,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_NLCCHARACTERS = 13,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_TRADITIONAL_NAMES_CHARACTERS = 14,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_NUMBER_CASE */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_LOWER_CASE_NUMBERS = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_UPPER_CASE_NUMBERS = 1,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_TEXT_SPACING */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_PROPORTIONAL_TEXT = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_MONOSPACED_TEXT = 1,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_HALF_WIDTH_TEXT = 2,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_THIRD_WIDTH_TEXT = 3,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_QUARTER_WIDTH_TEXT = 4,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_ALT_PROPORTIONAL_TEXT = 5,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_ALT_HALF_WIDTH_TEXT = 6,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_TRANSLITERATION */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_TRANSLITERATION = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_HANJA_TO_HANGUL = 1,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_HIRAGANA_TO_KATAKANA = 2,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_KATAKANA_TO_HIRAGANA = 3,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_KANA_TO_ROMANIZATION = 4,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_ROMANIZATION_TO_HIRAGANA = 5,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_ROMANIZATION_TO_KATAKANA = 6,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_HANJA_TO_HANGUL_ALT_ONE = 7,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_HANJA_TO_HANGUL_ALT_TWO = 8,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_HANJA_TO_HANGUL_ALT_THREE = 9,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_ANNOTATION_TYPE */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_ANNOTATION = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_BOX_ANNOTATION = 1,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_ROUNDED_BOX_ANNOTATION = 2,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_CIRCLE_ANNOTATION = 3,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_INVERTED_CIRCLE_ANNOTATION = 4,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_PARENTHESIS_ANNOTATION = 5,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_PERIOD_ANNOTATION = 6,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_ROMAN_NUMERAL_ANNOTATION = 7,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_DIAMOND_ANNOTATION = 8,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_INVERTED_BOX_ANNOTATION = 9,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_INVERTED_ROUNDED_BOX_ANNOTATION= 10,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_KANA_SPACING_TYPE */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_FULL_WIDTH_KANA = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_PROPORTIONAL_KANA = 1,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_IDEOGRAPHIC_SPACING_TYPE */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_FULL_WIDTH_IDEOGRAPHS = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_PROPORTIONAL_IDEOGRAPHS = 1,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_HALF_WIDTH_IDEOGRAPHS = 2,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_UNICODE_DECOMPOSITION_TYPE */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_CANONICAL_COMPOSITION_ON = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_CANONICAL_COMPOSITION_OFF = 1,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_COMPATIBILITY_COMPOSITION_ON = 2,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_COMPATIBILITY_COMPOSITION_OFF = 3,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_TRANSCODING_COMPOSITION_ON = 4,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_TRANSCODING_COMPOSITION_OFF = 5,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_RUBY_KANA */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_RUBY_KANA = 0, /* deprecated - use HB_AAT_LAYOUT_FEATURE_SELECTOR_RUBY_KANA_OFF instead */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_RUBY_KANA = 1, /* deprecated - use HB_AAT_LAYOUT_FEATURE_SELECTOR_RUBY_KANA_ON instead */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_RUBY_KANA_ON = 2,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_RUBY_KANA_OFF = 3,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_CJK_SYMBOL_ALTERNATIVES_TYPE */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_CJK_SYMBOL_ALTERNATIVES = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_SYMBOL_ALT_ONE = 1,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_SYMBOL_ALT_TWO = 2,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_SYMBOL_ALT_THREE = 3,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_SYMBOL_ALT_FOUR = 4,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_SYMBOL_ALT_FIVE = 5,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_IDEOGRAPHIC_ALTERNATIVES_TYPE */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_IDEOGRAPHIC_ALTERNATIVES = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_IDEOGRAPHIC_ALT_ONE = 1,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_IDEOGRAPHIC_ALT_TWO = 2,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_IDEOGRAPHIC_ALT_THREE = 3,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_IDEOGRAPHIC_ALT_FOUR = 4,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_IDEOGRAPHIC_ALT_FIVE = 5,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_CJK_VERTICAL_ROMAN_PLACEMENT_TYPE */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_VERTICAL_ROMAN_CENTERED = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_VERTICAL_ROMAN_HBASELINE = 1,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_ITALIC_CJK_ROMAN */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_CJK_ITALIC_ROMAN = 0, /* deprecated - use HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_ITALIC_ROMAN_OFF instead */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_ITALIC_ROMAN = 1, /* deprecated - use HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_ITALIC_ROMAN_ON instead */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_ITALIC_ROMAN_ON = 2,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_ITALIC_ROMAN_OFF = 3,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_CASE_SENSITIVE_LAYOUT */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_CASE_SENSITIVE_LAYOUT_ON = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_CASE_SENSITIVE_LAYOUT_OFF = 1,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_CASE_SENSITIVE_SPACING_ON = 2,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_CASE_SENSITIVE_SPACING_OFF = 3,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_ALTERNATE_KANA */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_ALTERNATE_HORIZ_KANA_ON = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_ALTERNATE_HORIZ_KANA_OFF = 1,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_ALTERNATE_VERT_KANA_ON = 2,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_ALTERNATE_VERT_KANA_OFF = 3,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_STYLISTIC_ALTERNATES = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_ONE_ON = 2,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_ONE_OFF = 3,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TWO_ON = 4,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TWO_OFF = 5,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_THREE_ON = 6,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_THREE_OFF = 7,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FOUR_ON = 8,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FOUR_OFF = 9,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FIVE_ON = 10,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FIVE_OFF = 11,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SIX_ON = 12,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SIX_OFF = 13,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SEVEN_ON = 14,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SEVEN_OFF = 15,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_EIGHT_ON = 16,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_EIGHT_OFF = 17,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_NINE_ON = 18,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_NINE_OFF = 19,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TEN_ON = 20,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TEN_OFF = 21,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_ELEVEN_ON = 22,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_ELEVEN_OFF = 23,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TWELVE_ON = 24,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TWELVE_OFF = 25,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_THIRTEEN_ON = 26,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_THIRTEEN_OFF = 27,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FOURTEEN_ON = 28,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FOURTEEN_OFF = 29,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FIFTEEN_ON = 30,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FIFTEEN_OFF = 31,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SIXTEEN_ON = 32,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SIXTEEN_OFF = 33,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SEVENTEEN_ON = 34,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SEVENTEEN_OFF = 35,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_EIGHTEEN_ON = 36,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_EIGHTEEN_OFF = 37,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_NINETEEN_ON = 38,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_NINETEEN_OFF = 39,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TWENTY_ON = 40,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TWENTY_OFF = 41,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_CONTEXTUAL_ALTERNATIVES */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_CONTEXTUAL_ALTERNATES_ON = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_CONTEXTUAL_ALTERNATES_OFF = 1,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_SWASH_ALTERNATES_ON = 2,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_SWASH_ALTERNATES_OFF = 3,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_CONTEXTUAL_SWASH_ALTERNATES_ON = 4,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_CONTEXTUAL_SWASH_ALTERNATES_OFF= 5,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_LOWER_CASE */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_DEFAULT_LOWER_CASE = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_LOWER_CASE_SMALL_CAPS = 1,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_LOWER_CASE_PETITE_CAPS = 2,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_UPPER_CASE */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_DEFAULT_UPPER_CASE = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_UPPER_CASE_SMALL_CAPS = 1,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_UPPER_CASE_PETITE_CAPS = 2,
+
+ /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_CJK_ROMAN_SPACING_TYPE */
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_HALF_WIDTH_CJK_ROMAN = 0,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_PROPORTIONAL_CJK_ROMAN = 1,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_DEFAULT_CJK_ROMAN = 2,
+ HB_AAT_LAYOUT_FEATURE_SELECTOR_FULL_WIDTH_CJK_ROMAN = 3,
+
+ _HB_AAT_LAYOUT_FEATURE_SELECTOR_MAX_VALUE = HB_TAG_MAX_SIGNED /*< skip >*/
+} hb_aat_layout_feature_selector_t;
+
+HB_EXTERN unsigned int
+hb_aat_layout_get_feature_types (hb_face_t *face,
+ unsigned int start_offset,
+ unsigned int *feature_count, /* IN/OUT. May be NULL. */
+ hb_aat_layout_feature_type_t *features /* OUT. May be NULL. */);
+
+HB_EXTERN hb_ot_name_id_t
+hb_aat_layout_feature_type_get_name_id (hb_face_t *face,
+ hb_aat_layout_feature_type_t feature_type);
+
+typedef struct hb_aat_layout_feature_selector_info_t
+{
+ hb_ot_name_id_t name_id;
+ hb_aat_layout_feature_selector_t enable;
+ hb_aat_layout_feature_selector_t disable;
+ /*< private >*/
+ unsigned int reserved;
+} hb_aat_layout_feature_selector_info_t;
+
+#define HB_AAT_LAYOUT_NO_SELECTOR_INDEX 0xFFFFu
+
+HB_EXTERN unsigned int
+hb_aat_layout_feature_type_get_selector_infos (hb_face_t *face,
+ hb_aat_layout_feature_type_t feature_type,
+ unsigned int start_offset,
+ unsigned int *selector_count, /* IN/OUT. May be NULL. */
+ hb_aat_layout_feature_selector_info_t *selectors, /* OUT. May be NULL. */
+ unsigned int *default_index /* OUT. May be NULL. */);
+
+
+/*
+ * morx/mort
+ */
+
+HB_EXTERN hb_bool_t
+hb_aat_layout_has_substitution (hb_face_t *face);
+
+
+/*
+ * kerx
+ */
+
+HB_EXTERN hb_bool_t
+hb_aat_layout_has_positioning (hb_face_t *face);
+
+
+/*
+ * trak
+ */
+
+HB_EXTERN hb_bool_t
+hb_aat_layout_has_tracking (hb_face_t *face);
+
+
+HB_END_DECLS
+
+#endif /* HB_AAT_LAYOUT_H */
diff --git a/thirdparty/harfbuzz/src/hb-aat-layout.hh b/thirdparty/harfbuzz/src/hb-aat-layout.hh
new file mode 100644
index 0000000000..5e4e3bda15
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-aat-layout.hh
@@ -0,0 +1,75 @@
+/*
+ * Copyright © 2017 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_AAT_LAYOUT_HH
+#define HB_AAT_LAYOUT_HH
+
+#include "hb.hh"
+
+#include "hb-ot-shape.hh"
+#include "hb-aat-ltag-table.hh"
+
+struct hb_aat_feature_mapping_t
+{
+ hb_tag_t otFeatureTag;
+ hb_aat_layout_feature_type_t aatFeatureType;
+ hb_aat_layout_feature_selector_t selectorToEnable;
+ hb_aat_layout_feature_selector_t selectorToDisable;
+
+ int cmp (hb_tag_t key) const
+ { return key < otFeatureTag ? -1 : key > otFeatureTag ? 1 : 0; }
+};
+
+HB_INTERNAL const hb_aat_feature_mapping_t *
+hb_aat_layout_find_feature_mapping (hb_tag_t tag);
+
+HB_INTERNAL void
+hb_aat_layout_compile_map (const hb_aat_map_builder_t *mapper,
+ hb_aat_map_t *map);
+
+HB_INTERNAL void
+hb_aat_layout_substitute (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer);
+
+HB_INTERNAL void
+hb_aat_layout_zero_width_deleted_glyphs (hb_buffer_t *buffer);
+
+HB_INTERNAL void
+hb_aat_layout_remove_deleted_glyphs (hb_buffer_t *buffer);
+
+HB_INTERNAL void
+hb_aat_layout_position (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer);
+
+HB_INTERNAL void
+hb_aat_layout_track (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer);
+
+
+#endif /* HB_AAT_LAYOUT_HH */
diff --git a/thirdparty/harfbuzz/src/hb-aat-ltag-table.hh b/thirdparty/harfbuzz/src/hb-aat-ltag-table.hh
new file mode 100644
index 0000000000..711f9aa6c1
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-aat-ltag-table.hh
@@ -0,0 +1,92 @@
+/*
+ * Copyright © 2018 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#ifndef HB_AAT_LTAG_TABLE_HH
+#define HB_AAT_LTAG_TABLE_HH
+
+#include "hb-open-type.hh"
+
+/*
+ * ltag -- Language Tag
+ * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6ltag.html
+ */
+#define HB_AAT_TAG_ltag HB_TAG('l','t','a','g')
+
+
+namespace AAT {
+
+using namespace OT;
+
+
+struct FTStringRange
+{
+ friend struct ltag;
+
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) && (base+tag).sanitize (c, length));
+ }
+
+ protected:
+ NNOffsetTo<UnsizedArrayOf<HBUINT8>>
+ tag; /* Offset from the start of the table to
+ * the beginning of the string */
+ HBUINT16 length; /* String length (in bytes) */
+ public:
+ DEFINE_SIZE_STATIC (4);
+};
+
+struct ltag
+{
+ static constexpr hb_tag_t tableTag = HB_AAT_TAG_ltag;
+
+ hb_language_t get_language (unsigned int i) const
+ {
+ const FTStringRange &range = tagRanges[i];
+ return hb_language_from_string ((const char *) (this+range.tag).arrayZ,
+ range.length);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ version >= 1 &&
+ tagRanges.sanitize (c, this)));
+ }
+
+ protected:
+ HBUINT32 version; /* Table version; currently 1 */
+ HBUINT32 flags; /* Table flags; currently none defined */
+ LArrayOf<FTStringRange>
+ tagRanges; /* Range for each tag's string */
+ public:
+ DEFINE_SIZE_ARRAY (12, tagRanges);
+};
+
+} /* namespace AAT */
+
+
+#endif /* HB_AAT_LTAG_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-aat-map.cc b/thirdparty/harfbuzz/src/hb-aat-map.cc
new file mode 100644
index 0000000000..2c38c35029
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-aat-map.cc
@@ -0,0 +1,102 @@
+/*
+ * Copyright © 2009,2010 Red Hat, Inc.
+ * Copyright © 2010,2011,2013 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_AAT_SHAPE
+
+#include "hb-aat-map.hh"
+
+#include "hb-aat-layout.hh"
+#include "hb-aat-layout-feat-table.hh"
+
+
+void hb_aat_map_builder_t::add_feature (hb_tag_t tag, unsigned value)
+{
+ if (!face->table.feat->has_data ()) return;
+
+ if (tag == HB_TAG ('a','a','l','t'))
+ {
+ if (!face->table.feat->exposes_feature (HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_ALTERNATIVES))
+ return;
+ feature_info_t *info = features.push();
+ info->type = HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_ALTERNATIVES;
+ info->setting = (hb_aat_layout_feature_selector_t) value;
+ info->seq = features.length;
+ info->is_exclusive = true;
+ return;
+ }
+
+ const hb_aat_feature_mapping_t *mapping = hb_aat_layout_find_feature_mapping (tag);
+ if (!mapping) return;
+
+ const AAT::FeatureName* feature = &face->table.feat->get_feature (mapping->aatFeatureType);
+ if (!feature->has_data ())
+ {
+ /* Special case: Chain::compile_flags will fall back to the deprecated version of
+ * small-caps if necessary, so we need to check for that possibility.
+ * https://github.com/harfbuzz/harfbuzz/issues/2307 */
+ if (mapping->aatFeatureType == HB_AAT_LAYOUT_FEATURE_TYPE_LOWER_CASE &&
+ mapping->selectorToEnable == HB_AAT_LAYOUT_FEATURE_SELECTOR_LOWER_CASE_SMALL_CAPS)
+ {
+ feature = &face->table.feat->get_feature (HB_AAT_LAYOUT_FEATURE_TYPE_LETTER_CASE);
+ if (!feature->has_data ()) return;
+ }
+ else return;
+ }
+
+ feature_info_t *info = features.push();
+ info->type = mapping->aatFeatureType;
+ info->setting = value ? mapping->selectorToEnable : mapping->selectorToDisable;
+ info->seq = features.length;
+ info->is_exclusive = feature->is_exclusive ();
+}
+
+void
+hb_aat_map_builder_t::compile (hb_aat_map_t &m)
+{
+ /* Sort features and merge duplicates */
+ if (features.length)
+ {
+ features.qsort ();
+ unsigned int j = 0;
+ for (unsigned int i = 1; i < features.length; i++)
+ if (features[i].type != features[j].type ||
+ /* Nonexclusive feature selectors come in even/odd pairs to turn a setting on/off
+ * respectively, so we mask out the low-order bit when checking for "duplicates"
+ * (selectors referring to the same feature setting) here. */
+ (!features[i].is_exclusive && ((features[i].setting & ~1) != (features[j].setting & ~1))))
+ features[++j] = features[i];
+ features.shrink (j + 1);
+ }
+
+ hb_aat_layout_compile_map (this, &m);
+}
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-aat-map.hh b/thirdparty/harfbuzz/src/hb-aat-map.hh
new file mode 100644
index 0000000000..5a0fa70544
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-aat-map.hh
@@ -0,0 +1,96 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_AAT_MAP_HH
+#define HB_AAT_MAP_HH
+
+#include "hb.hh"
+
+
+struct hb_aat_map_t
+{
+ friend struct hb_aat_map_builder_t;
+
+ public:
+
+ void init ()
+ {
+ memset (this, 0, sizeof (*this));
+ chain_flags.init ();
+ }
+ void fini () { chain_flags.fini (); }
+
+ public:
+ hb_vector_t<hb_mask_t> chain_flags;
+};
+
+struct hb_aat_map_builder_t
+{
+ public:
+
+ HB_INTERNAL hb_aat_map_builder_t (hb_face_t *face_,
+ const hb_segment_properties_t *props_ HB_UNUSED) :
+ face (face_) {}
+
+ HB_INTERNAL void add_feature (hb_tag_t tag, unsigned int value=1);
+
+ HB_INTERNAL void compile (hb_aat_map_t &m);
+
+ public:
+ struct feature_info_t
+ {
+ hb_aat_layout_feature_type_t type;
+ hb_aat_layout_feature_selector_t setting;
+ bool is_exclusive;
+ unsigned seq; /* For stable sorting only. */
+
+ HB_INTERNAL static int cmp (const void *pa, const void *pb)
+ {
+ const feature_info_t *a = (const feature_info_t *) pa;
+ const feature_info_t *b = (const feature_info_t *) pb;
+ if (a->type != b->type) return (a->type < b->type ? -1 : 1);
+ if (!a->is_exclusive &&
+ (a->setting & ~1) != (b->setting & ~1)) return (a->setting < b->setting ? -1 : 1);
+ return (a->seq < b->seq ? -1 : a->seq > b->seq ? 1 : 0);
+ }
+
+ /* compares type & setting only, not is_exclusive flag or seq number */
+ int cmp (const feature_info_t& f) const
+ {
+ return (f.type != type) ? (f.type < type ? -1 : 1) :
+ (f.setting != setting) ? (f.setting < setting ? -1 : 1) : 0;
+ }
+ };
+
+ public:
+ hb_face_t *face;
+
+ public:
+ hb_sorted_vector_t<feature_info_t> features;
+};
+
+
+#endif /* HB_AAT_MAP_HH */
diff --git a/thirdparty/harfbuzz/src/hb-aat.h b/thirdparty/harfbuzz/src/hb-aat.h
new file mode 100644
index 0000000000..c14313d1e2
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-aat.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright © 2018 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#ifndef HB_AAT_H
+#define HB_AAT_H
+#define HB_AAT_H_IN
+
+#include "hb.h"
+
+#include "hb-aat-layout.h"
+
+HB_BEGIN_DECLS
+
+HB_END_DECLS
+
+#undef HB_AAT_H_IN
+#endif /* HB_AAT_H */
diff --git a/thirdparty/harfbuzz/src/hb-algs.hh b/thirdparty/harfbuzz/src/hb-algs.hh
new file mode 100644
index 0000000000..30b5812e12
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-algs.hh
@@ -0,0 +1,1127 @@
+/*
+ * Copyright © 2017 Google, Inc.
+ * Copyright © 2019 Facebook, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ * Facebook Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_ALGS_HH
+#define HB_ALGS_HH
+
+#include "hb.hh"
+#include "hb-meta.hh"
+#include "hb-null.hh"
+#include "hb-number.hh"
+
+
+/* Encodes three unsigned integers in one 64-bit number. If the inputs have more than 21 bits,
+ * values will be truncated / overlap, and might not decode exactly. */
+#define HB_CODEPOINT_ENCODE3(x,y,z) (((uint64_t) (x) << 42) | ((uint64_t) (y) << 21) | (uint64_t) (z))
+#define HB_CODEPOINT_DECODE3_1(v) ((hb_codepoint_t) ((v) >> 42))
+#define HB_CODEPOINT_DECODE3_2(v) ((hb_codepoint_t) ((v) >> 21) & 0x1FFFFFu)
+#define HB_CODEPOINT_DECODE3_3(v) ((hb_codepoint_t) (v) & 0x1FFFFFu)
+
+/* Custom encoding used by hb-ucd. */
+#define HB_CODEPOINT_ENCODE3_11_7_14(x,y,z) (((uint32_t) ((x) & 0x07FFu) << 21) | (((uint32_t) (y) & 0x007Fu) << 14) | (uint32_t) ((z) & 0x3FFFu))
+#define HB_CODEPOINT_DECODE3_11_7_14_1(v) ((hb_codepoint_t) ((v) >> 21))
+#define HB_CODEPOINT_DECODE3_11_7_14_2(v) ((hb_codepoint_t) (((v) >> 14) & 0x007Fu) | 0x0300)
+#define HB_CODEPOINT_DECODE3_11_7_14_3(v) ((hb_codepoint_t) (v) & 0x3FFFu)
+
+struct
+{
+ /* Note. This is dangerous in that if it's passed an rvalue, it returns rvalue-reference. */
+ template <typename T> constexpr auto
+ operator () (T&& v) const HB_AUTO_RETURN ( hb_forward<T> (v) )
+}
+HB_FUNCOBJ (hb_identity);
+struct
+{
+ /* Like identity(), but only retains lvalue-references. Rvalues are returned as rvalues. */
+ template <typename T> constexpr T&
+ operator () (T& v) const { return v; }
+
+ template <typename T> constexpr hb_remove_reference<T>
+ operator () (T&& v) const { return v; }
+}
+HB_FUNCOBJ (hb_lidentity);
+struct
+{
+ /* Like identity(), but always returns rvalue. */
+ template <typename T> constexpr hb_remove_reference<T>
+ operator () (T&& v) const { return v; }
+}
+HB_FUNCOBJ (hb_ridentity);
+
+struct
+{
+ template <typename T> constexpr bool
+ operator () (T&& v) const { return bool (hb_forward<T> (v)); }
+}
+HB_FUNCOBJ (hb_bool);
+
+struct
+{
+ private:
+
+ template <typename T> constexpr auto
+ impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, hb_deref (v).hash ())
+
+ template <typename T,
+ hb_enable_if (hb_is_integral (T))> constexpr auto
+ impl (const T& v, hb_priority<0>) const HB_AUTO_RETURN
+ (
+ /* Knuth's multiplicative method: */
+ (uint32_t) v * 2654435761u
+ )
+
+ public:
+
+ template <typename T> constexpr auto
+ operator () (const T& v) const HB_RETURN (uint32_t, impl (v, hb_prioritize))
+}
+HB_FUNCOBJ (hb_hash);
+
+
+struct
+{
+ private:
+
+ /* Pointer-to-member-function. */
+ template <typename Appl, typename T, typename ...Ts> auto
+ impl (Appl&& a, hb_priority<2>, T &&v, Ts&&... ds) const HB_AUTO_RETURN
+ ((hb_deref (hb_forward<T> (v)).*hb_forward<Appl> (a)) (hb_forward<Ts> (ds)...))
+
+ /* Pointer-to-member. */
+ template <typename Appl, typename T> auto
+ impl (Appl&& a, hb_priority<1>, T &&v) const HB_AUTO_RETURN
+ ((hb_deref (hb_forward<T> (v))).*hb_forward<Appl> (a))
+
+ /* Operator(). */
+ template <typename Appl, typename ...Ts> auto
+ impl (Appl&& a, hb_priority<0>, Ts&&... ds) const HB_AUTO_RETURN
+ (hb_deref (hb_forward<Appl> (a)) (hb_forward<Ts> (ds)...))
+
+ public:
+
+ template <typename Appl, typename ...Ts> auto
+ operator () (Appl&& a, Ts&&... ds) const HB_AUTO_RETURN
+ (
+ impl (hb_forward<Appl> (a),
+ hb_prioritize,
+ hb_forward<Ts> (ds)...)
+ )
+}
+HB_FUNCOBJ (hb_invoke);
+
+template <unsigned Pos, typename Appl, typename V>
+struct hb_partial_t
+{
+ hb_partial_t (Appl a, V v) : a (a), v (v) {}
+
+ static_assert (Pos > 0, "");
+
+ template <typename ...Ts,
+ unsigned P = Pos,
+ hb_enable_if (P == 1)> auto
+ operator () (Ts&& ...ds) -> decltype (hb_invoke (hb_declval (Appl),
+ hb_declval (V),
+ hb_declval (Ts)...))
+ {
+ return hb_invoke (hb_forward<Appl> (a),
+ hb_forward<V> (v),
+ hb_forward<Ts> (ds)...);
+ }
+ template <typename T0, typename ...Ts,
+ unsigned P = Pos,
+ hb_enable_if (P == 2)> auto
+ operator () (T0&& d0, Ts&& ...ds) -> decltype (hb_invoke (hb_declval (Appl),
+ hb_declval (T0),
+ hb_declval (V),
+ hb_declval (Ts)...))
+ {
+ return hb_invoke (hb_forward<Appl> (a),
+ hb_forward<T0> (d0),
+ hb_forward<V> (v),
+ hb_forward<Ts> (ds)...);
+ }
+
+ private:
+ hb_reference_wrapper<Appl> a;
+ V v;
+};
+template <unsigned Pos=1, typename Appl, typename V>
+auto hb_partial (Appl&& a, V&& v) HB_AUTO_RETURN
+(( hb_partial_t<Pos, Appl, V> (a, v) ))
+
+/* The following, HB_PARTIALIZE, macro uses a particular corner-case
+ * of C++11 that is not particularly well-supported by all compilers.
+ * What's happening is that it's using "this" in a trailing return-type
+ * via decltype(). Broken compilers deduce the type of "this" pointer
+ * in that context differently from what it resolves to in the body
+ * of the function.
+ *
+ * One probable cause of this is that at the time of trailing return
+ * type declaration, "this" points to an incomplete type, whereas in
+ * the function body the type is complete. That doesn't justify the
+ * error in any way, but is probably what's happening.
+ *
+ * In the case of MSVC, we get around this by using C++14 "decltype(auto)"
+ * which deduces the type from the actual return statement. For gcc 4.8
+ * we use "+this" instead of "this" which produces an rvalue that seems
+ * to be deduced as the same type with this particular compiler, and seem
+ * to be fine as default code path as well.
+ */
+#ifdef _MSC_VER
+/* https://github.com/harfbuzz/harfbuzz/issues/1730 */ \
+#define HB_PARTIALIZE(Pos) \
+ template <typename _T> \
+ decltype(auto) operator () (_T&& _v) const \
+ { return hb_partial<Pos> (this, hb_forward<_T> (_v)); } \
+ static_assert (true, "")
+#else
+/* https://github.com/harfbuzz/harfbuzz/issues/1724 */
+#define HB_PARTIALIZE(Pos) \
+ template <typename _T> \
+ auto operator () (_T&& _v) const HB_AUTO_RETURN \
+ (hb_partial<Pos> (+this, hb_forward<_T> (_v))) \
+ static_assert (true, "")
+#endif
+
+
+struct
+{
+ private:
+
+ template <typename Pred, typename Val> auto
+ impl (Pred&& p, Val &&v, hb_priority<1>) const HB_AUTO_RETURN
+ (hb_deref (hb_forward<Pred> (p)).has (hb_forward<Val> (v)))
+
+ template <typename Pred, typename Val> auto
+ impl (Pred&& p, Val &&v, hb_priority<0>) const HB_AUTO_RETURN
+ (
+ hb_invoke (hb_forward<Pred> (p),
+ hb_forward<Val> (v))
+ )
+
+ public:
+
+ template <typename Pred, typename Val> auto
+ operator () (Pred&& p, Val &&v) const HB_RETURN (bool,
+ impl (hb_forward<Pred> (p),
+ hb_forward<Val> (v),
+ hb_prioritize)
+ )
+}
+HB_FUNCOBJ (hb_has);
+
+struct
+{
+ private:
+
+ template <typename Pred, typename Val> auto
+ impl (Pred&& p, Val &&v, hb_priority<1>) const HB_AUTO_RETURN
+ (
+ hb_has (hb_forward<Pred> (p),
+ hb_forward<Val> (v))
+ )
+
+ template <typename Pred, typename Val> auto
+ impl (Pred&& p, Val &&v, hb_priority<0>) const HB_AUTO_RETURN
+ (
+ hb_forward<Pred> (p) == hb_forward<Val> (v)
+ )
+
+ public:
+
+ template <typename Pred, typename Val> auto
+ operator () (Pred&& p, Val &&v) const HB_RETURN (bool,
+ impl (hb_forward<Pred> (p),
+ hb_forward<Val> (v),
+ hb_prioritize)
+ )
+}
+HB_FUNCOBJ (hb_match);
+
+struct
+{
+ private:
+
+ template <typename Proj, typename Val> auto
+ impl (Proj&& f, Val &&v, hb_priority<2>) const HB_AUTO_RETURN
+ (hb_deref (hb_forward<Proj> (f)).get (hb_forward<Val> (v)))
+
+ template <typename Proj, typename Val> auto
+ impl (Proj&& f, Val &&v, hb_priority<1>) const HB_AUTO_RETURN
+ (
+ hb_invoke (hb_forward<Proj> (f),
+ hb_forward<Val> (v))
+ )
+
+ template <typename Proj, typename Val> auto
+ impl (Proj&& f, Val &&v, hb_priority<0>) const HB_AUTO_RETURN
+ (
+ hb_forward<Proj> (f)[hb_forward<Val> (v)]
+ )
+
+ public:
+
+ template <typename Proj, typename Val> auto
+ operator () (Proj&& f, Val &&v) const HB_AUTO_RETURN
+ (
+ impl (hb_forward<Proj> (f),
+ hb_forward<Val> (v),
+ hb_prioritize)
+ )
+}
+HB_FUNCOBJ (hb_get);
+
+
+template <typename T1, typename T2>
+struct hb_pair_t
+{
+ typedef T1 first_t;
+ typedef T2 second_t;
+ typedef hb_pair_t<T1, T2> pair_t;
+
+ hb_pair_t (T1 a, T2 b) : first (a), second (b) {}
+
+ template <typename Q1, typename Q2,
+ hb_enable_if (hb_is_convertible (T1, Q1) &&
+ hb_is_convertible (T2, T2))>
+ operator hb_pair_t<Q1, Q2> () { return hb_pair_t<Q1, Q2> (first, second); }
+
+ hb_pair_t<T1, T2> reverse () const
+ { return hb_pair_t<T1, T2> (second, first); }
+
+ bool operator == (const pair_t& o) const { return first == o.first && second == o.second; }
+ bool operator != (const pair_t& o) const { return !(*this == o); }
+ bool operator < (const pair_t& o) const { return first < o.first || (first == o.first && second < o.second); }
+ bool operator >= (const pair_t& o) const { return !(*this < o); }
+ bool operator > (const pair_t& o) const { return first > o.first || (first == o.first && second > o.second); }
+ bool operator <= (const pair_t& o) const { return !(*this > o); }
+
+ T1 first;
+ T2 second;
+};
+#define hb_pair_t(T1,T2) hb_pair_t<T1, T2>
+template <typename T1, typename T2> static inline hb_pair_t<T1, T2>
+hb_pair (T1&& a, T2&& b) { return hb_pair_t<T1, T2> (a, b); }
+
+struct
+{
+ template <typename Pair> constexpr typename Pair::first_t
+ operator () (const Pair& pair) const { return pair.first; }
+}
+HB_FUNCOBJ (hb_first);
+
+struct
+{
+ template <typename Pair> constexpr typename Pair::second_t
+ operator () (const Pair& pair) const { return pair.second; }
+}
+HB_FUNCOBJ (hb_second);
+
+/* Note. In min/max impl, we can use hb_type_identity<T> for second argument.
+ * However, that would silently convert between different-signedness integers.
+ * Instead we accept two different types, such that compiler can err if
+ * comparing integers of different signedness. */
+struct
+{
+ template <typename T, typename T2> constexpr auto
+ operator () (T&& a, T2&& b) const HB_AUTO_RETURN
+ (hb_forward<T> (a) <= hb_forward<T2> (b) ? hb_forward<T> (a) : hb_forward<T2> (b))
+}
+HB_FUNCOBJ (hb_min);
+struct
+{
+ template <typename T, typename T2> constexpr auto
+ operator () (T&& a, T2&& b) const HB_AUTO_RETURN
+ (hb_forward<T> (a) >= hb_forward<T2> (b) ? hb_forward<T> (a) : hb_forward<T2> (b))
+}
+HB_FUNCOBJ (hb_max);
+struct
+{
+ template <typename T, typename T2, typename T3> constexpr auto
+ operator () (T&& x, T2&& min, T3&& max) const HB_AUTO_RETURN
+ (hb_min (hb_max (hb_forward<T> (x), hb_forward<T2> (min)), hb_forward<T3> (max)))
+}
+HB_FUNCOBJ (hb_clamp);
+
+
+/*
+ * Bithacks.
+ */
+
+/* Return the number of 1 bits in v. */
+template <typename T>
+static inline HB_CONST_FUNC unsigned int
+hb_popcount (T v)
+{
+#if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__)
+ if (sizeof (T) <= sizeof (unsigned int))
+ return __builtin_popcount (v);
+
+ if (sizeof (T) <= sizeof (unsigned long))
+ return __builtin_popcountl (v);
+
+ if (sizeof (T) <= sizeof (unsigned long long))
+ return __builtin_popcountll (v);
+#endif
+
+ if (sizeof (T) <= 4)
+ {
+ /* "HACKMEM 169" */
+ uint32_t y;
+ y = (v >> 1) &033333333333;
+ y = v - y - ((y >>1) & 033333333333);
+ return (((y + (y >> 3)) & 030707070707) % 077);
+ }
+
+ if (sizeof (T) == 8)
+ {
+ unsigned int shift = 32;
+ return hb_popcount<uint32_t> ((uint32_t) v) + hb_popcount ((uint32_t) (v >> shift));
+ }
+
+ if (sizeof (T) == 16)
+ {
+ unsigned int shift = 64;
+ return hb_popcount<uint64_t> ((uint64_t) v) + hb_popcount ((uint64_t) (v >> shift));
+ }
+
+ assert (0);
+ return 0; /* Shut up stupid compiler. */
+}
+
+/* Returns the number of bits needed to store number */
+template <typename T>
+static inline HB_CONST_FUNC unsigned int
+hb_bit_storage (T v)
+{
+ if (unlikely (!v)) return 0;
+
+#if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__)
+ if (sizeof (T) <= sizeof (unsigned int))
+ return sizeof (unsigned int) * 8 - __builtin_clz (v);
+
+ if (sizeof (T) <= sizeof (unsigned long))
+ return sizeof (unsigned long) * 8 - __builtin_clzl (v);
+
+ if (sizeof (T) <= sizeof (unsigned long long))
+ return sizeof (unsigned long long) * 8 - __builtin_clzll (v);
+#endif
+
+#if (defined(_MSC_VER) && _MSC_VER >= 1500) || (defined(__MINGW32__) && (__GNUC__ < 4))
+ if (sizeof (T) <= sizeof (unsigned int))
+ {
+ unsigned long where;
+ _BitScanReverse (&where, v);
+ return 1 + where;
+ }
+# if defined(_WIN64)
+ if (sizeof (T) <= 8)
+ {
+ unsigned long where;
+ _BitScanReverse64 (&where, v);
+ return 1 + where;
+ }
+# endif
+#endif
+
+ if (sizeof (T) <= 4)
+ {
+ /* "bithacks" */
+ const unsigned int b[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000};
+ const unsigned int S[] = {1, 2, 4, 8, 16};
+ unsigned int r = 0;
+ for (int i = 4; i >= 0; i--)
+ if (v & b[i])
+ {
+ v >>= S[i];
+ r |= S[i];
+ }
+ return r + 1;
+ }
+ if (sizeof (T) <= 8)
+ {
+ /* "bithacks" */
+ const uint64_t b[] = {0x2ULL, 0xCULL, 0xF0ULL, 0xFF00ULL, 0xFFFF0000ULL, 0xFFFFFFFF00000000ULL};
+ const unsigned int S[] = {1, 2, 4, 8, 16, 32};
+ unsigned int r = 0;
+ for (int i = 5; i >= 0; i--)
+ if (v & b[i])
+ {
+ v >>= S[i];
+ r |= S[i];
+ }
+ return r + 1;
+ }
+ if (sizeof (T) == 16)
+ {
+ unsigned int shift = 64;
+ return (v >> shift) ? hb_bit_storage<uint64_t> ((uint64_t) (v >> shift)) + shift :
+ hb_bit_storage<uint64_t> ((uint64_t) v);
+ }
+
+ assert (0);
+ return 0; /* Shut up stupid compiler. */
+}
+
+/* Returns the number of zero bits in the least significant side of v */
+template <typename T>
+static inline HB_CONST_FUNC unsigned int
+hb_ctz (T v)
+{
+ if (unlikely (!v)) return 8 * sizeof (T);
+
+#if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__)
+ if (sizeof (T) <= sizeof (unsigned int))
+ return __builtin_ctz (v);
+
+ if (sizeof (T) <= sizeof (unsigned long))
+ return __builtin_ctzl (v);
+
+ if (sizeof (T) <= sizeof (unsigned long long))
+ return __builtin_ctzll (v);
+#endif
+
+#if (defined(_MSC_VER) && _MSC_VER >= 1500) || (defined(__MINGW32__) && (__GNUC__ < 4))
+ if (sizeof (T) <= sizeof (unsigned int))
+ {
+ unsigned long where;
+ _BitScanForward (&where, v);
+ return where;
+ }
+# if defined(_WIN64)
+ if (sizeof (T) <= 8)
+ {
+ unsigned long where;
+ _BitScanForward64 (&where, v);
+ return where;
+ }
+# endif
+#endif
+
+ if (sizeof (T) <= 4)
+ {
+ /* "bithacks" */
+ unsigned int c = 32;
+ v &= - (int32_t) v;
+ if (v) c--;
+ if (v & 0x0000FFFF) c -= 16;
+ if (v & 0x00FF00FF) c -= 8;
+ if (v & 0x0F0F0F0F) c -= 4;
+ if (v & 0x33333333) c -= 2;
+ if (v & 0x55555555) c -= 1;
+ return c;
+ }
+ if (sizeof (T) <= 8)
+ {
+ /* "bithacks" */
+ unsigned int c = 64;
+ v &= - (int64_t) (v);
+ if (v) c--;
+ if (v & 0x00000000FFFFFFFFULL) c -= 32;
+ if (v & 0x0000FFFF0000FFFFULL) c -= 16;
+ if (v & 0x00FF00FF00FF00FFULL) c -= 8;
+ if (v & 0x0F0F0F0F0F0F0F0FULL) c -= 4;
+ if (v & 0x3333333333333333ULL) c -= 2;
+ if (v & 0x5555555555555555ULL) c -= 1;
+ return c;
+ }
+ if (sizeof (T) == 16)
+ {
+ unsigned int shift = 64;
+ return (uint64_t) v ? hb_bit_storage<uint64_t> ((uint64_t) v) :
+ hb_bit_storage<uint64_t> ((uint64_t) (v >> shift)) + shift;
+ }
+
+ assert (0);
+ return 0; /* Shut up stupid compiler. */
+}
+
+
+/*
+ * Tiny stuff.
+ */
+
+/* ASCII tag/character handling */
+static inline bool ISALPHA (unsigned char c)
+{ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); }
+static inline bool ISALNUM (unsigned char c)
+{ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9'); }
+static inline bool ISSPACE (unsigned char c)
+{ return c == ' ' || c =='\f'|| c =='\n'|| c =='\r'|| c =='\t'|| c =='\v'; }
+static inline unsigned char TOUPPER (unsigned char c)
+{ return (c >= 'a' && c <= 'z') ? c - 'a' + 'A' : c; }
+static inline unsigned char TOLOWER (unsigned char c)
+{ return (c >= 'A' && c <= 'Z') ? c - 'A' + 'a' : c; }
+static inline bool ISHEX (unsigned char c)
+{ return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); }
+static inline unsigned char TOHEX (uint8_t c)
+{ return (c & 0xF) <= 9 ? (c & 0xF) + '0' : (c & 0xF) + 'a' - 10; }
+static inline uint8_t FROMHEX (unsigned char c)
+{ return (c >= '0' && c <= '9') ? c - '0' : TOLOWER (c) - 'a' + 10; }
+
+static inline unsigned int DIV_CEIL (const unsigned int a, unsigned int b)
+{ return (a + (b - 1)) / b; }
+
+
+#undef ARRAY_LENGTH
+template <typename Type, unsigned int n>
+static inline unsigned int ARRAY_LENGTH (const Type (&)[n]) { return n; }
+/* A const version, but does not detect erratically being called on pointers. */
+#define ARRAY_LENGTH_CONST(__array) ((signed int) (sizeof (__array) / sizeof (__array[0])))
+
+
+static inline int
+hb_memcmp (const void *a, const void *b, unsigned int len)
+{
+ /* It's illegal to pass NULL to memcmp(), even if len is zero.
+ * So, wrap it.
+ * https://sourceware.org/bugzilla/show_bug.cgi?id=23878 */
+ if (unlikely (!len)) return 0;
+ return memcmp (a, b, len);
+}
+
+static inline void *
+hb_memset (void *s, int c, unsigned int n)
+{
+ /* It's illegal to pass NULL to memset(), even if n is zero. */
+ if (unlikely (!n)) return 0;
+ return memset (s, c, n);
+}
+
+static inline unsigned int
+hb_ceil_to_4 (unsigned int v)
+{
+ return ((v - 1) | 3) + 1;
+}
+
+template <typename T> static inline bool
+hb_in_range (T u, T lo, T hi)
+{
+ static_assert (!hb_is_signed<T>::value, "");
+
+ /* The casts below are important as if T is smaller than int,
+ * the subtract results will become a signed int! */
+ return (T)(u - lo) <= (T)(hi - lo);
+}
+template <typename T> static inline bool
+hb_in_ranges (T u, T lo1, T hi1, T lo2, T hi2)
+{
+ return hb_in_range (u, lo1, hi1) || hb_in_range (u, lo2, hi2);
+}
+template <typename T> static inline bool
+hb_in_ranges (T u, T lo1, T hi1, T lo2, T hi2, T lo3, T hi3)
+{
+ return hb_in_range (u, lo1, hi1) || hb_in_range (u, lo2, hi2) || hb_in_range (u, lo3, hi3);
+}
+
+
+/*
+ * Overflow checking.
+ */
+
+/* Consider __builtin_mul_overflow use here also */
+static inline bool
+hb_unsigned_mul_overflows (unsigned int count, unsigned int size)
+{
+ return (size > 0) && (count >= ((unsigned int) -1) / size);
+}
+
+
+/*
+ * Sort and search.
+ */
+
+template <typename K, typename V, typename ...Ts>
+static int
+_hb_cmp_method (const void *pkey, const void *pval, Ts... ds)
+{
+ const K& key = * (const K*) pkey;
+ const V& val = * (const V*) pval;
+
+ return val.cmp (key, ds...);
+}
+
+template <typename V, typename K, typename ...Ts>
+static inline bool
+hb_bsearch_impl (unsigned *pos, /* Out */
+ const K& key,
+ V* base, size_t nmemb, size_t stride,
+ int (*compar)(const void *_key, const void *_item, Ts... _ds),
+ Ts... ds)
+{
+ /* This is our *only* bsearch implementation. */
+
+ int min = 0, max = (int) nmemb - 1;
+ while (min <= max)
+ {
+ int mid = ((unsigned int) min + (unsigned int) max) / 2;
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-align"
+ V* p = (V*) (((const char *) base) + (mid * stride));
+#pragma GCC diagnostic pop
+ int c = compar ((const void *) hb_addressof (key), (const void *) p, ds...);
+ if (c < 0)
+ max = mid - 1;
+ else if (c > 0)
+ min = mid + 1;
+ else
+ {
+ *pos = mid;
+ return true;
+ }
+ }
+ *pos = min;
+ return false;
+}
+
+template <typename V, typename K>
+static inline V*
+hb_bsearch (const K& key, V* base,
+ size_t nmemb, size_t stride = sizeof (V),
+ int (*compar)(const void *_key, const void *_item) = _hb_cmp_method<K, V>)
+{
+ unsigned pos;
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-align"
+ return hb_bsearch_impl (&pos, key, base, nmemb, stride, compar) ?
+ (V*) (((const char *) base) + (pos * stride)) : nullptr;
+#pragma GCC diagnostic pop
+}
+template <typename V, typename K, typename ...Ts>
+static inline V*
+hb_bsearch (const K& key, V* base,
+ size_t nmemb, size_t stride,
+ int (*compar)(const void *_key, const void *_item, Ts... _ds),
+ Ts... ds)
+{
+ unsigned pos;
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-align"
+ return hb_bsearch_impl (&pos, key, base, nmemb, stride, compar, ds...) ?
+ (V*) (((const char *) base) + (pos * stride)) : nullptr;
+#pragma GCC diagnostic pop
+}
+
+
+/* From https://github.com/noporpoise/sort_r
+ Feb 5, 2019 (c8c65c1e)
+ Modified to support optional argument using templates */
+
+/* Isaac Turner 29 April 2014 Public Domain */
+
+/*
+hb_qsort function to be exported.
+Parameters:
+ base is the array to be sorted
+ nel is the number of elements in the array
+ width is the size in bytes of each element of the array
+ compar is the comparison function
+ arg (optional) is a pointer to be passed to the comparison function
+
+void hb_qsort(void *base, size_t nel, size_t width,
+ int (*compar)(const void *_a, const void *_b, [void *_arg]),
+ [void *arg]);
+*/
+
+#define SORT_R_SWAP(a,b,tmp) ((tmp) = (a), (a) = (b), (b) = (tmp))
+
+/* swap a and b */
+/* a and b must not be equal! */
+static inline void sort_r_swap(char *__restrict a, char *__restrict b,
+ size_t w)
+{
+ char tmp, *end = a+w;
+ for(; a < end; a++, b++) { SORT_R_SWAP(*a, *b, tmp); }
+}
+
+/* swap a, b iff a>b */
+/* a and b must not be equal! */
+/* __restrict is same as restrict but better support on old machines */
+template <typename ...Ts>
+static inline int sort_r_cmpswap(char *__restrict a,
+ char *__restrict b, size_t w,
+ int (*compar)(const void *_a,
+ const void *_b,
+ Ts... _ds),
+ Ts... ds)
+{
+ if(compar(a, b, ds...) > 0) {
+ sort_r_swap(a, b, w);
+ return 1;
+ }
+ return 0;
+}
+
+/*
+Swap consecutive blocks of bytes of size na and nb starting at memory addr ptr,
+with the smallest swap so that the blocks are in the opposite order. Blocks may
+be internally re-ordered e.g.
+ 12345ab -> ab34512
+ 123abc -> abc123
+ 12abcde -> deabc12
+*/
+static inline void sort_r_swap_blocks(char *ptr, size_t na, size_t nb)
+{
+ if(na > 0 && nb > 0) {
+ if(na > nb) { sort_r_swap(ptr, ptr+na, nb); }
+ else { sort_r_swap(ptr, ptr+nb, na); }
+ }
+}
+
+/* Implement recursive quicksort ourselves */
+/* Note: quicksort is not stable, equivalent values may be swapped */
+template <typename ...Ts>
+static inline void sort_r_simple(void *base, size_t nel, size_t w,
+ int (*compar)(const void *_a,
+ const void *_b,
+ Ts... _ds),
+ Ts... ds)
+{
+ char *b = (char *)base, *end = b + nel*w;
+
+ /* for(size_t i=0; i<nel; i++) {printf("%4i", *(int*)(b + i*sizeof(int)));}
+ printf("\n"); */
+
+ if(nel < 10) {
+ /* Insertion sort for arbitrarily small inputs */
+ char *pi, *pj;
+ for(pi = b+w; pi < end; pi += w) {
+ for(pj = pi; pj > b && sort_r_cmpswap(pj-w,pj,w,compar,ds...); pj -= w) {}
+ }
+ }
+ else
+ {
+ /* nel > 9; Quicksort */
+
+ int cmp;
+ char *pl, *ple, *pr, *pre, *pivot;
+ char *last = b+w*(nel-1), *tmp;
+
+ /*
+ Use median of second, middle and second-last items as pivot.
+ First and last may have been swapped with pivot and therefore be extreme
+ */
+ char *l[3];
+ l[0] = b + w;
+ l[1] = b+w*(nel/2);
+ l[2] = last - w;
+
+ /* printf("pivots: %i, %i, %i\n", *(int*)l[0], *(int*)l[1], *(int*)l[2]); */
+
+ if(compar(l[0],l[1],ds...) > 0) { SORT_R_SWAP(l[0], l[1], tmp); }
+ if(compar(l[1],l[2],ds...) > 0) {
+ SORT_R_SWAP(l[1], l[2], tmp);
+ if(compar(l[0],l[1],ds...) > 0) { SORT_R_SWAP(l[0], l[1], tmp); }
+ }
+
+ /* swap mid value (l[1]), and last element to put pivot as last element */
+ if(l[1] != last) { sort_r_swap(l[1], last, w); }
+
+ /*
+ pl is the next item on the left to be compared to the pivot
+ pr is the last item on the right that was compared to the pivot
+ ple is the left position to put the next item that equals the pivot
+ ple is the last right position where we put an item that equals the pivot
+ v- end (beyond the array)
+ EEEEEELLLLLLLLuuuuuuuuGGGGGGGEEEEEEEE.
+ ^- b ^- ple ^- pl ^- pr ^- pre ^- last (where the pivot is)
+ Pivot comparison key:
+ E = equal, L = less than, u = unknown, G = greater than, E = equal
+ */
+ pivot = last;
+ ple = pl = b;
+ pre = pr = last;
+
+ /*
+ Strategy:
+ Loop into the list from the left and right at the same time to find:
+ - an item on the left that is greater than the pivot
+ - an item on the right that is less than the pivot
+ Once found, they are swapped and the loop continues.
+ Meanwhile items that are equal to the pivot are moved to the edges of the
+ array.
+ */
+ while(pl < pr) {
+ /* Move left hand items which are equal to the pivot to the far left.
+ break when we find an item that is greater than the pivot */
+ for(; pl < pr; pl += w) {
+ cmp = compar(pl, pivot, ds...);
+ if(cmp > 0) { break; }
+ else if(cmp == 0) {
+ if(ple < pl) { sort_r_swap(ple, pl, w); }
+ ple += w;
+ }
+ }
+ /* break if last batch of left hand items were equal to pivot */
+ if(pl >= pr) { break; }
+ /* Move right hand items which are equal to the pivot to the far right.
+ break when we find an item that is less than the pivot */
+ for(; pl < pr; ) {
+ pr -= w; /* Move right pointer onto an unprocessed item */
+ cmp = compar(pr, pivot, ds...);
+ if(cmp == 0) {
+ pre -= w;
+ if(pr < pre) { sort_r_swap(pr, pre, w); }
+ }
+ else if(cmp < 0) {
+ if(pl < pr) { sort_r_swap(pl, pr, w); }
+ pl += w;
+ break;
+ }
+ }
+ }
+
+ pl = pr; /* pr may have gone below pl */
+
+ /*
+ Now we need to go from: EEELLLGGGGEEEE
+ to: LLLEEEEEEEGGGG
+ Pivot comparison key:
+ E = equal, L = less than, u = unknown, G = greater than, E = equal
+ */
+ sort_r_swap_blocks(b, ple-b, pl-ple);
+ sort_r_swap_blocks(pr, pre-pr, end-pre);
+
+ /*for(size_t i=0; i<nel; i++) {printf("%4i", *(int*)(b + i*sizeof(int)));}
+ printf("\n");*/
+
+ sort_r_simple(b, (pl-ple)/w, w, compar, ds...);
+ sort_r_simple(end-(pre-pr), (pre-pr)/w, w, compar, ds...);
+ }
+}
+
+static inline void
+hb_qsort (void *base, size_t nel, size_t width,
+ int (*compar)(const void *_a, const void *_b))
+{
+#if defined(__OPTIMIZE_SIZE__) && !defined(HB_USE_INTERNAL_QSORT)
+ qsort (base, nel, width, compar);
+#else
+ sort_r_simple (base, nel, width, compar);
+#endif
+}
+
+static inline void
+hb_qsort (void *base, size_t nel, size_t width,
+ int (*compar)(const void *_a, const void *_b, void *_arg),
+ void *arg)
+{
+#ifdef HAVE_GNU_QSORT_R
+ qsort_r (base, nel, width, compar, arg);
+#else
+ sort_r_simple (base, nel, width, compar, arg);
+#endif
+}
+
+
+template <typename T, typename T2, typename T3> static inline void
+hb_stable_sort (T *array, unsigned int len, int(*compar)(const T2 *, const T2 *), T3 *array2)
+{
+ for (unsigned int i = 1; i < len; i++)
+ {
+ unsigned int j = i;
+ while (j && compar (&array[j - 1], &array[i]) > 0)
+ j--;
+ if (i == j)
+ continue;
+ /* Move item i to occupy place for item j, shift what's in between. */
+ {
+ T t = array[i];
+ memmove (&array[j + 1], &array[j], (i - j) * sizeof (T));
+ array[j] = t;
+ }
+ if (array2)
+ {
+ T3 t = array2[i];
+ memmove (&array2[j + 1], &array2[j], (i - j) * sizeof (T3));
+ array2[j] = t;
+ }
+ }
+}
+
+template <typename T> static inline void
+hb_stable_sort (T *array, unsigned int len, int(*compar)(const T *, const T *))
+{
+ hb_stable_sort (array, len, compar, (int *) nullptr);
+}
+
+static inline hb_bool_t
+hb_codepoint_parse (const char *s, unsigned int len, int base, hb_codepoint_t *out)
+{
+ unsigned int v;
+ const char *p = s;
+ const char *end = p + len;
+ if (unlikely (!hb_parse_uint (&p, end, &v, true/* whole buffer */, base)))
+ return false;
+
+ *out = v;
+ return true;
+}
+
+
+/* Operators. */
+
+struct hb_bitwise_and
+{ HB_PARTIALIZE(2);
+ static constexpr bool passthru_left = false;
+ static constexpr bool passthru_right = false;
+ template <typename T> constexpr auto
+ operator () (const T &a, const T &b) const HB_AUTO_RETURN (a & b)
+}
+HB_FUNCOBJ (hb_bitwise_and);
+struct hb_bitwise_or
+{ HB_PARTIALIZE(2);
+ static constexpr bool passthru_left = true;
+ static constexpr bool passthru_right = true;
+ template <typename T> constexpr auto
+ operator () (const T &a, const T &b) const HB_AUTO_RETURN (a | b)
+}
+HB_FUNCOBJ (hb_bitwise_or);
+struct hb_bitwise_xor
+{ HB_PARTIALIZE(2);
+ static constexpr bool passthru_left = true;
+ static constexpr bool passthru_right = true;
+ template <typename T> constexpr auto
+ operator () (const T &a, const T &b) const HB_AUTO_RETURN (a ^ b)
+}
+HB_FUNCOBJ (hb_bitwise_xor);
+struct hb_bitwise_sub
+{ HB_PARTIALIZE(2);
+ static constexpr bool passthru_left = true;
+ static constexpr bool passthru_right = false;
+ template <typename T> constexpr auto
+ operator () (const T &a, const T &b) const HB_AUTO_RETURN (a & ~b)
+}
+HB_FUNCOBJ (hb_bitwise_sub);
+struct
+{
+ template <typename T> constexpr auto
+ operator () (const T &a) const HB_AUTO_RETURN (~a)
+}
+HB_FUNCOBJ (hb_bitwise_neg);
+
+struct
+{ HB_PARTIALIZE(2);
+ template <typename T, typename T2> constexpr auto
+ operator () (const T &a, const T2 &b) const HB_AUTO_RETURN (a + b)
+}
+HB_FUNCOBJ (hb_add);
+struct
+{ HB_PARTIALIZE(2);
+ template <typename T, typename T2> constexpr auto
+ operator () (const T &a, const T2 &b) const HB_AUTO_RETURN (a - b)
+}
+HB_FUNCOBJ (hb_sub);
+struct
+{ HB_PARTIALIZE(2);
+ template <typename T, typename T2> constexpr auto
+ operator () (const T &a, const T2 &b) const HB_AUTO_RETURN (a * b)
+}
+HB_FUNCOBJ (hb_mul);
+struct
+{ HB_PARTIALIZE(2);
+ template <typename T, typename T2> constexpr auto
+ operator () (const T &a, const T2 &b) const HB_AUTO_RETURN (a / b)
+}
+HB_FUNCOBJ (hb_div);
+struct
+{ HB_PARTIALIZE(2);
+ template <typename T, typename T2> constexpr auto
+ operator () (const T &a, const T2 &b) const HB_AUTO_RETURN (a % b)
+}
+HB_FUNCOBJ (hb_mod);
+struct
+{
+ template <typename T> constexpr auto
+ operator () (const T &a) const HB_AUTO_RETURN (+a)
+}
+HB_FUNCOBJ (hb_pos);
+struct
+{
+ template <typename T> constexpr auto
+ operator () (const T &a) const HB_AUTO_RETURN (-a)
+}
+HB_FUNCOBJ (hb_neg);
+struct
+{
+ template <typename T> constexpr auto
+ operator () (T &a) const HB_AUTO_RETURN (++a)
+}
+HB_FUNCOBJ (hb_inc);
+struct
+{
+ template <typename T> constexpr auto
+ operator () (T &a) const HB_AUTO_RETURN (--a)
+}
+HB_FUNCOBJ (hb_dec);
+
+
+/* Compiler-assisted vectorization. */
+
+/* Type behaving similar to vectorized vars defined using __attribute__((vector_size(...))),
+ * basically a fixed-size bitset. */
+template <typename elt_t, unsigned int byte_size>
+struct hb_vector_size_t
+{
+ elt_t& operator [] (unsigned int i) { return v[i]; }
+ const elt_t& operator [] (unsigned int i) const { return v[i]; }
+
+ void clear (unsigned char v = 0) { memset (this, v, sizeof (*this)); }
+
+ template <typename Op>
+ hb_vector_size_t process (const Op& op) const
+ {
+ hb_vector_size_t r;
+ for (unsigned int i = 0; i < ARRAY_LENGTH (v); i++)
+ r.v[i] = op (v[i]);
+ return r;
+ }
+ template <typename Op>
+ hb_vector_size_t process (const Op& op, const hb_vector_size_t &o) const
+ {
+ hb_vector_size_t r;
+ for (unsigned int i = 0; i < ARRAY_LENGTH (v); i++)
+ r.v[i] = op (v[i], o.v[i]);
+ return r;
+ }
+ hb_vector_size_t operator | (const hb_vector_size_t &o) const
+ { return process (hb_bitwise_or, o); }
+ hb_vector_size_t operator & (const hb_vector_size_t &o) const
+ { return process (hb_bitwise_and, o); }
+ hb_vector_size_t operator ^ (const hb_vector_size_t &o) const
+ { return process (hb_bitwise_xor, o); }
+ hb_vector_size_t operator ~ () const
+ { return process (hb_bitwise_neg); }
+
+ private:
+ static_assert (0 == byte_size % sizeof (elt_t), "");
+ elt_t v[byte_size / sizeof (elt_t)];
+};
+
+
+#endif /* HB_ALGS_HH */
diff --git a/thirdparty/harfbuzz/src/hb-array.hh b/thirdparty/harfbuzz/src/hb-array.hh
new file mode 100644
index 0000000000..568cd02c79
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-array.hh
@@ -0,0 +1,408 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_ARRAY_HH
+#define HB_ARRAY_HH
+
+#include "hb.hh"
+#include "hb-algs.hh"
+#include "hb-iter.hh"
+#include "hb-null.hh"
+
+
+template <typename Type>
+struct hb_sorted_array_t;
+
+template <typename Type>
+struct hb_array_t : hb_iter_with_fallback_t<hb_array_t<Type>, Type&>
+{
+ /*
+ * Constructors.
+ */
+ hb_array_t () : arrayZ (nullptr), length (0), backwards_length (0) {}
+ hb_array_t (Type *array_, unsigned int length_) : arrayZ (array_), length (length_), backwards_length (0) {}
+ template <unsigned int length_>
+ hb_array_t (Type (&array_)[length_]) : arrayZ (array_), length (length_), backwards_length (0) {}
+
+ template <typename U,
+ hb_enable_if (hb_is_cr_convertible(U, Type))>
+ hb_array_t (const hb_array_t<U> &o) :
+ hb_iter_with_fallback_t<hb_array_t, Type&> (),
+ arrayZ (o.arrayZ), length (o.length), backwards_length (o.backwards_length) {}
+ template <typename U,
+ hb_enable_if (hb_is_cr_convertible(U, Type))>
+ hb_array_t& operator = (const hb_array_t<U> &o)
+ { arrayZ = o.arrayZ; length = o.length; backwards_length = o.backwards_length; return *this; }
+
+ /*
+ * Iterator implementation.
+ */
+ typedef Type& __item_t__;
+ static constexpr bool is_random_access_iterator = true;
+ Type& __item_at__ (unsigned i) const
+ {
+ if (unlikely (i >= length)) return CrapOrNull (Type);
+ return arrayZ[i];
+ }
+ void __forward__ (unsigned n)
+ {
+ if (unlikely (n > length))
+ n = length;
+ length -= n;
+ backwards_length += n;
+ arrayZ += n;
+ }
+ void __rewind__ (unsigned n)
+ {
+ if (unlikely (n > backwards_length))
+ n = backwards_length;
+ length += n;
+ backwards_length -= n;
+ arrayZ -= n;
+ }
+ unsigned __len__ () const { return length; }
+ /* Ouch. The operator== compares the contents of the array. For range-based for loops,
+ * it's best if we can just compare arrayZ, though comparing contents is still fast,
+ * but also would require that Type has operator==. As such, we optimize this operator
+ * for range-based for loop and just compare arrayZ. No need to compare length, as we
+ * assume we're only compared to .end(). */
+ bool operator != (const hb_array_t& o) const
+ { return arrayZ != o.arrayZ; }
+
+ /* Extra operators.
+ */
+ Type * operator & () const { return arrayZ; }
+ operator hb_array_t<const Type> () { return hb_array_t<const Type> (arrayZ, length); }
+ template <typename T> operator T * () const { return arrayZ; }
+
+ HB_INTERNAL bool operator == (const hb_array_t &o) const;
+
+ uint32_t hash () const {
+ uint32_t current = 0;
+ for (unsigned int i = 0; i < this->length; i++) {
+ current = current * 31 + hb_hash (this->arrayZ[i]);
+ }
+ return current;
+ }
+
+ /*
+ * Compare, Sort, and Search.
+ */
+
+ /* Note: our compare is NOT lexicographic; it also does NOT call Type::cmp. */
+ int cmp (const hb_array_t &a) const
+ {
+ if (length != a.length)
+ return (int) a.length - (int) length;
+ return hb_memcmp (a.arrayZ, arrayZ, get_size ());
+ }
+ HB_INTERNAL static int cmp (const void *pa, const void *pb)
+ {
+ hb_array_t *a = (hb_array_t *) pa;
+ hb_array_t *b = (hb_array_t *) pb;
+ return b->cmp (*a);
+ }
+
+ template <typename T>
+ Type *lsearch (const T &x, Type *not_found = nullptr)
+ {
+ unsigned i;
+ return lfind (x, &i) ? &this->arrayZ[i] : not_found;
+ }
+ template <typename T>
+ const Type *lsearch (const T &x, const Type *not_found = nullptr) const
+ {
+ unsigned i;
+ return lfind (x, &i) ? &this->arrayZ[i] : not_found;
+ }
+ template <typename T>
+ bool lfind (const T &x, unsigned *pos = nullptr) const
+ {
+ for (unsigned i = 0; i < length; ++i)
+ if (!this->arrayZ[i].cmp (x))
+ {
+ if (pos)
+ *pos = i;
+ return true;
+ }
+
+ return false;
+ }
+
+ hb_sorted_array_t<Type> qsort (int (*cmp_)(const void*, const void*))
+ {
+ if (likely (length))
+ hb_qsort (arrayZ, length, this->get_item_size (), cmp_);
+ return hb_sorted_array_t<Type> (*this);
+ }
+ hb_sorted_array_t<Type> qsort ()
+ {
+ if (likely (length))
+ hb_qsort (arrayZ, length, this->get_item_size (), Type::cmp);
+ return hb_sorted_array_t<Type> (*this);
+ }
+ void qsort (unsigned int start, unsigned int end)
+ {
+ end = hb_min (end, length);
+ assert (start <= end);
+ if (likely (start < end))
+ hb_qsort (arrayZ + start, end - start, this->get_item_size (), Type::cmp);
+ }
+
+ /*
+ * Other methods.
+ */
+
+ unsigned int get_size () const { return length * this->get_item_size (); }
+
+ /*
+ * Reverse the order of items in this array in the range [start, end).
+ */
+ void reverse (unsigned start = 0, unsigned end = -1)
+ {
+ start = hb_min (start, length);
+ end = hb_min (end, length);
+
+ if (end < start + 2)
+ return;
+
+ for (unsigned lhs = start, rhs = end - 1; lhs < rhs; lhs++, rhs--) {
+ Type temp = arrayZ[rhs];
+ arrayZ[rhs] = arrayZ[lhs];
+ arrayZ[lhs] = temp;
+ }
+ }
+
+ hb_array_t sub_array (unsigned int start_offset = 0, unsigned int *seg_count = nullptr /* IN/OUT */) const
+ {
+ if (!start_offset && !seg_count)
+ return *this;
+
+ unsigned int count = length;
+ if (unlikely (start_offset > count))
+ count = 0;
+ else
+ count -= start_offset;
+ if (seg_count)
+ count = *seg_count = hb_min (count, *seg_count);
+ return hb_array_t (arrayZ + start_offset, count);
+ }
+ hb_array_t sub_array (unsigned int start_offset, unsigned int seg_count) const
+ { return sub_array (start_offset, &seg_count); }
+
+ hb_array_t truncate (unsigned length) const { return sub_array (0, length); }
+
+ template <typename T,
+ unsigned P = sizeof (Type),
+ hb_enable_if (P == 1)>
+ const T *as () const
+ { return length < hb_null_size (T) ? &Null (T) : reinterpret_cast<const T *> (arrayZ); }
+
+ template <typename T,
+ unsigned P = sizeof (Type),
+ hb_enable_if (P == 1)>
+ bool check_range (const T *p, unsigned int size = T::static_size) const
+ {
+ return arrayZ <= ((const char *) p)
+ && ((const char *) p) <= arrayZ + length
+ && (unsigned int) (arrayZ + length - (const char *) p) >= size;
+ }
+
+ /* Only call if you allocated the underlying array using malloc() or similar. */
+ void free ()
+ { ::free ((void *) arrayZ); arrayZ = nullptr; length = 0; }
+
+ template <typename hb_serialize_context_t>
+ hb_array_t copy (hb_serialize_context_t *c) const
+ {
+ TRACE_SERIALIZE (this);
+ auto* out = c->start_embed (arrayZ);
+ if (unlikely (!c->extend_size (out, get_size ()))) return_trace (hb_array_t ());
+ for (unsigned i = 0; i < length; i++)
+ out[i] = arrayZ[i]; /* TODO: add version that calls c->copy() */
+ return_trace (hb_array_t (out, length));
+ }
+
+ template <typename hb_sanitize_context_t>
+ bool sanitize (hb_sanitize_context_t *c) const
+ { return c->check_array (arrayZ, length); }
+
+ /*
+ * Members
+ */
+
+ public:
+ Type *arrayZ;
+ unsigned int length;
+ unsigned int backwards_length;
+};
+template <typename T> inline hb_array_t<T>
+hb_array (T *array, unsigned int length)
+{ return hb_array_t<T> (array, length); }
+template <typename T, unsigned int length_> inline hb_array_t<T>
+hb_array (T (&array_)[length_])
+{ return hb_array_t<T> (array_); }
+
+enum hb_bfind_not_found_t
+{
+ HB_BFIND_NOT_FOUND_DONT_STORE,
+ HB_BFIND_NOT_FOUND_STORE,
+ HB_BFIND_NOT_FOUND_STORE_CLOSEST,
+};
+
+template <typename Type>
+struct hb_sorted_array_t :
+ hb_iter_t<hb_sorted_array_t<Type>, Type&>,
+ hb_array_t<Type>
+{
+ typedef hb_iter_t<hb_sorted_array_t, Type&> iter_base_t;
+ HB_ITER_USING (iter_base_t);
+ static constexpr bool is_random_access_iterator = true;
+ static constexpr bool is_sorted_iterator = true;
+
+ hb_sorted_array_t () : hb_array_t<Type> () {}
+ hb_sorted_array_t (Type *array_, unsigned int length_) : hb_array_t<Type> (array_, length_) {}
+ template <unsigned int length_>
+ hb_sorted_array_t (Type (&array_)[length_]) : hb_array_t<Type> (array_) {}
+
+ template <typename U,
+ hb_enable_if (hb_is_cr_convertible(U, Type))>
+ hb_sorted_array_t (const hb_array_t<U> &o) :
+ hb_iter_t<hb_sorted_array_t, Type&> (),
+ hb_array_t<Type> (o) {}
+ template <typename U,
+ hb_enable_if (hb_is_cr_convertible(U, Type))>
+ hb_sorted_array_t& operator = (const hb_array_t<U> &o)
+ { hb_array_t<Type> (*this) = o; return *this; }
+
+ /* Iterator implementation. */
+ bool operator != (const hb_sorted_array_t& o) const
+ { return this->arrayZ != o.arrayZ || this->length != o.length; }
+
+ hb_sorted_array_t sub_array (unsigned int start_offset, unsigned int *seg_count /* IN/OUT */) const
+ { return hb_sorted_array_t (((const hb_array_t<Type> *) (this))->sub_array (start_offset, seg_count)); }
+ hb_sorted_array_t sub_array (unsigned int start_offset, unsigned int seg_count) const
+ { return sub_array (start_offset, &seg_count); }
+
+ hb_sorted_array_t truncate (unsigned length) const { return sub_array (0, length); }
+
+ template <typename T>
+ Type *bsearch (const T &x, Type *not_found = nullptr)
+ {
+ unsigned int i;
+ return bfind (x, &i) ? &this->arrayZ[i] : not_found;
+ }
+ template <typename T>
+ const Type *bsearch (const T &x, const Type *not_found = nullptr) const
+ {
+ unsigned int i;
+ return bfind (x, &i) ? &this->arrayZ[i] : not_found;
+ }
+ template <typename T>
+ bool bfind (const T &x, unsigned int *i = nullptr,
+ hb_bfind_not_found_t not_found = HB_BFIND_NOT_FOUND_DONT_STORE,
+ unsigned int to_store = (unsigned int) -1) const
+ {
+ unsigned pos;
+
+ if (bsearch_impl (x, &pos))
+ {
+ if (i)
+ *i = pos;
+ return true;
+ }
+
+ if (i)
+ {
+ switch (not_found)
+ {
+ case HB_BFIND_NOT_FOUND_DONT_STORE:
+ break;
+
+ case HB_BFIND_NOT_FOUND_STORE:
+ *i = to_store;
+ break;
+
+ case HB_BFIND_NOT_FOUND_STORE_CLOSEST:
+ *i = pos;
+ break;
+ }
+ }
+ return false;
+ }
+ template <typename T>
+ bool bsearch_impl (const T &x, unsigned *pos) const
+ {
+ return hb_bsearch_impl (pos,
+ x,
+ this->arrayZ,
+ this->length,
+ sizeof (Type),
+ _hb_cmp_method<T, Type>);
+ }
+};
+template <typename T> inline hb_sorted_array_t<T>
+hb_sorted_array (T *array, unsigned int length)
+{ return hb_sorted_array_t<T> (array, length); }
+template <typename T, unsigned int length_> inline hb_sorted_array_t<T>
+hb_sorted_array (T (&array_)[length_])
+{ return hb_sorted_array_t<T> (array_); }
+
+template <typename T>
+bool hb_array_t<T>::operator == (const hb_array_t<T> &o) const
+{
+ if (o.length != this->length) return false;
+ for (unsigned int i = 0; i < this->length; i++) {
+ if (this->arrayZ[i] != o.arrayZ[i]) return false;
+ }
+ return true;
+}
+
+/* TODO Specialize opeator== for hb_bytes_t and hb_ubytes_t. */
+
+template <>
+inline uint32_t hb_array_t<const char>::hash () const {
+ uint32_t current = 0;
+ for (unsigned int i = 0; i < this->length; i++)
+ current = current * 31 + (uint32_t) (this->arrayZ[i] * 2654435761u);
+ return current;
+}
+
+template <>
+inline uint32_t hb_array_t<const unsigned char>::hash () const {
+ uint32_t current = 0;
+ for (unsigned int i = 0; i < this->length; i++)
+ current = current * 31 + (uint32_t) (this->arrayZ[i] * 2654435761u);
+ return current;
+}
+
+
+typedef hb_array_t<const char> hb_bytes_t;
+typedef hb_array_t<const unsigned char> hb_ubytes_t;
+
+
+
+#endif /* HB_ARRAY_HH */
diff --git a/thirdparty/harfbuzz/src/hb-atomic.hh b/thirdparty/harfbuzz/src/hb-atomic.hh
new file mode 100644
index 0000000000..b3fb296b4e
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-atomic.hh
@@ -0,0 +1,295 @@
+/*
+ * Copyright © 2007 Chris Wilson
+ * Copyright © 2009,2010 Red Hat, Inc.
+ * Copyright © 2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Contributor(s):
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_ATOMIC_HH
+#define HB_ATOMIC_HH
+
+#include "hb.hh"
+#include "hb-meta.hh"
+
+
+/*
+ * Atomic integers and pointers.
+ */
+
+
+/* We need external help for these */
+
+#if defined(hb_atomic_int_impl_add) \
+ && defined(hb_atomic_ptr_impl_get) \
+ && defined(hb_atomic_ptr_impl_cmpexch)
+
+/* Defined externally, i.e. in config.h. */
+
+
+#elif !defined(HB_NO_MT) && defined(__ATOMIC_ACQUIRE)
+
+/* C++11-style GCC primitives. */
+
+#define _hb_memory_barrier() __sync_synchronize ()
+
+#define hb_atomic_int_impl_add(AI, V) __atomic_fetch_add ((AI), (V), __ATOMIC_ACQ_REL)
+#define hb_atomic_int_impl_set_relaxed(AI, V) __atomic_store_n ((AI), (V), __ATOMIC_RELAXED)
+#define hb_atomic_int_impl_set(AI, V) __atomic_store_n ((AI), (V), __ATOMIC_RELEASE)
+#define hb_atomic_int_impl_get_relaxed(AI) __atomic_load_n ((AI), __ATOMIC_RELAXED)
+#define hb_atomic_int_impl_get(AI) __atomic_load_n ((AI), __ATOMIC_ACQUIRE)
+
+#define hb_atomic_ptr_impl_set_relaxed(P, V) __atomic_store_n ((P), (V), __ATOMIC_RELAXED)
+#define hb_atomic_ptr_impl_get_relaxed(P) __atomic_load_n ((P), __ATOMIC_RELAXED)
+#define hb_atomic_ptr_impl_get(P) __atomic_load_n ((P), __ATOMIC_ACQUIRE)
+static inline bool
+_hb_atomic_ptr_impl_cmplexch (const void **P, const void *O_, const void *N)
+{
+ const void *O = O_; // Need lvalue
+ return __atomic_compare_exchange_n ((void **) P, (void **) &O, (void *) N, true, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED);
+}
+#define hb_atomic_ptr_impl_cmpexch(P,O,N) _hb_atomic_ptr_impl_cmplexch ((const void **) (P), (O), (N))
+
+#elif !defined(HB_NO_MT) && __cplusplus >= 201103L
+
+/* C++11 atomics. */
+
+#include <atomic>
+
+#define _hb_memory_barrier() std::atomic_thread_fence(std::memory_order_ack_rel)
+#define _hb_memory_r_barrier() std::atomic_thread_fence(std::memory_order_acquire)
+#define _hb_memory_w_barrier() std::atomic_thread_fence(std::memory_order_release)
+
+#define hb_atomic_int_impl_add(AI, V) (reinterpret_cast<std::atomic<int> *> (AI)->fetch_add ((V), std::memory_order_acq_rel))
+#define hb_atomic_int_impl_set_relaxed(AI, V) (reinterpret_cast<std::atomic<int> *> (AI)->store ((V), std::memory_order_relaxed))
+#define hb_atomic_int_impl_set(AI, V) (reinterpret_cast<std::atomic<int> *> (AI)->store ((V), std::memory_order_release))
+#define hb_atomic_int_impl_get_relaxed(AI) (reinterpret_cast<std::atomic<int> const *> (AI)->load (std::memory_order_relaxed))
+#define hb_atomic_int_impl_get(AI) (reinterpret_cast<std::atomic<int> const *> (AI)->load (std::memory_order_acquire))
+
+#define hb_atomic_ptr_impl_set_relaxed(P, V) (reinterpret_cast<std::atomic<void*> *> (P)->store ((V), std::memory_order_relaxed))
+#define hb_atomic_ptr_impl_get_relaxed(P) (reinterpret_cast<std::atomic<void*> const *> (P)->load (std::memory_order_relaxed))
+#define hb_atomic_ptr_impl_get(P) (reinterpret_cast<std::atomic<void*> *> (P)->load (std::memory_order_acquire))
+static inline bool
+_hb_atomic_ptr_impl_cmplexch (const void **P, const void *O_, const void *N)
+{
+ const void *O = O_; // Need lvalue
+ return reinterpret_cast<std::atomic<const void*> *> (P)->compare_exchange_weak (O, N, std::memory_order_acq_rel, std::memory_order_relaxed);
+}
+#define hb_atomic_ptr_impl_cmpexch(P,O,N) _hb_atomic_ptr_impl_cmplexch ((const void **) (P), (O), (N))
+
+
+#elif !defined(HB_NO_MT) && defined(_WIN32)
+
+#include <windows.h>
+
+static inline void _hb_memory_barrier ()
+{
+#if !defined(MemoryBarrier) && !defined(__MINGW32_VERSION)
+ /* MinGW has a convoluted history of supporting MemoryBarrier. */
+ LONG dummy = 0;
+ InterlockedExchange (&dummy, 1);
+#else
+ MemoryBarrier ();
+#endif
+}
+#define _hb_memory_barrier() _hb_memory_barrier ()
+
+#define hb_atomic_int_impl_add(AI, V) InterlockedExchangeAdd ((LONG *) (AI), (V))
+static_assert ((sizeof (LONG) == sizeof (int)), "");
+
+#define hb_atomic_ptr_impl_cmpexch(P,O,N) (InterlockedCompareExchangePointer ((P), (N), (O)) == (O))
+
+
+#elif !defined(HB_NO_MT) && defined(HAVE_INTEL_ATOMIC_PRIMITIVES)
+
+#define _hb_memory_barrier() __sync_synchronize ()
+
+#define hb_atomic_int_impl_add(AI, V) __sync_fetch_and_add ((AI), (V))
+
+#define hb_atomic_ptr_impl_cmpexch(P,O,N) __sync_bool_compare_and_swap ((P), (O), (N))
+
+
+#elif !defined(HB_NO_MT) && defined(HAVE_SOLARIS_ATOMIC_OPS)
+
+#include <atomic.h>
+#include <mbarrier.h>
+
+#define _hb_memory_r_barrier() __machine_r_barrier ()
+#define _hb_memory_w_barrier() __machine_w_barrier ()
+#define _hb_memory_barrier() __machine_rw_barrier ()
+
+static inline int _hb_fetch_and_add (int *AI, int V)
+{
+ _hb_memory_w_barrier ();
+ int result = atomic_add_int_nv ((uint_t *) AI, V) - V;
+ _hb_memory_r_barrier ();
+ return result;
+}
+static inline bool _hb_compare_and_swap_ptr (void **P, void *O, void *N)
+{
+ _hb_memory_w_barrier ();
+ bool result = atomic_cas_ptr (P, O, N) == O;
+ _hb_memory_r_barrier ();
+ return result;
+}
+
+#define hb_atomic_int_impl_add(AI, V) _hb_fetch_and_add ((AI), (V))
+
+#define hb_atomic_ptr_impl_cmpexch(P,O,N) _hb_compare_and_swap_ptr ((P), (O), (N))
+
+
+#elif !defined(HB_NO_MT) && defined(__APPLE__)
+
+#include <libkern/OSAtomic.h>
+#ifdef __MAC_OS_X_MIN_REQUIRED
+#include <AvailabilityMacros.h>
+#elif defined(__IPHONE_OS_MIN_REQUIRED)
+#include <Availability.h>
+#endif
+
+#define _hb_memory_barrier() OSMemoryBarrier ()
+
+#define hb_atomic_int_impl_add(AI, V) (OSAtomicAdd32Barrier ((V), (AI)) - (V))
+
+#if (MAC_OS_X_VERSION_MIN_REQUIRED > MAC_OS_X_VERSION_10_4 || __IPHONE_VERSION_MIN_REQUIRED >= 20100)
+#define hb_atomic_ptr_impl_cmpexch(P,O,N) OSAtomicCompareAndSwapPtrBarrier ((O), (N), (P))
+#else
+#if __ppc64__ || __x86_64__ || __aarch64__
+#define hb_atomic_ptr_impl_cmpexch(P,O,N) OSAtomicCompareAndSwap64Barrier ((int64_t) (O), (int64_t) (N), (int64_t*) (P))
+#else
+#define hb_atomic_ptr_impl_cmpexch(P,O,N) OSAtomicCompareAndSwap32Barrier ((int32_t) (O), (int32_t) (N), (int32_t*) (P))
+#endif
+#endif
+
+
+#elif !defined(HB_NO_MT) && defined(_AIX) && (defined(__IBMCPP__) || defined(__ibmxl__))
+
+#include <builtins.h>
+
+#define _hb_memory_barrier() __lwsync ()
+
+static inline int _hb_fetch_and_add (int *AI, int V)
+{
+ _hb_memory_barrier ();
+ int result = __fetch_and_add (AI, V);
+ _hb_memory_barrier ();
+ return result;
+}
+static inline bool _hb_compare_and_swaplp (long *P, long O, long N)
+{
+ _hb_memory_barrier ();
+ bool result = __compare_and_swaplp (P, &O, N);
+ _hb_memory_barrier ();
+ return result;
+}
+
+#define hb_atomic_int_impl_add(AI, V) _hb_fetch_and_add ((AI), (V))
+
+#define hb_atomic_ptr_impl_cmpexch(P,O,N) _hb_compare_and_swaplp ((long *) (P), (long) (O), (long) (N))
+static_assert ((sizeof (long) == sizeof (void *)), "");
+
+
+#elif defined(HB_NO_MT)
+
+#define hb_atomic_int_impl_add(AI, V) ((*(AI) += (V)) - (V))
+
+#define _hb_memory_barrier() do {} while (0)
+
+#define hb_atomic_ptr_impl_cmpexch(P,O,N) (* (void **) (P) == (void *) (O) ? (* (void **) (P) = (void *) (N), true) : false)
+
+
+#else
+
+#error "Could not find any system to define atomic_int macros."
+#error "Check hb-atomic.hh for possible resolutions."
+
+#endif
+
+
+#ifndef _hb_memory_r_barrier
+#define _hb_memory_r_barrier() _hb_memory_barrier ()
+#endif
+#ifndef _hb_memory_w_barrier
+#define _hb_memory_w_barrier() _hb_memory_barrier ()
+#endif
+#ifndef hb_atomic_int_impl_set_relaxed
+#define hb_atomic_int_impl_set_relaxed(AI, V) (*(AI) = (V))
+#endif
+#ifndef hb_atomic_int_impl_get_relaxed
+#define hb_atomic_int_impl_get_relaxed(AI) (*(AI))
+#endif
+
+#ifndef hb_atomic_ptr_impl_set_relaxed
+#define hb_atomic_ptr_impl_set_relaxed(P, V) (*(P) = (V))
+#endif
+#ifndef hb_atomic_ptr_impl_get_relaxed
+#define hb_atomic_ptr_impl_get_relaxed(P) (*(P))
+#endif
+#ifndef hb_atomic_int_impl_set
+inline void hb_atomic_int_impl_set (int *AI, int v) { _hb_memory_w_barrier (); *AI = v; }
+#endif
+#ifndef hb_atomic_int_impl_get
+inline int hb_atomic_int_impl_get (const int *AI) { int v = *AI; _hb_memory_r_barrier (); return v; }
+#endif
+#ifndef hb_atomic_ptr_impl_get
+inline void *hb_atomic_ptr_impl_get (void ** const P) { void *v = *P; _hb_memory_r_barrier (); return v; }
+#endif
+
+
+#define HB_ATOMIC_INT_INIT(V) {V}
+struct hb_atomic_int_t
+{
+ void set_relaxed (int v_) { hb_atomic_int_impl_set_relaxed (&v, v_); }
+ void set (int v_) { hb_atomic_int_impl_set (&v, v_); }
+ int get_relaxed () const { return hb_atomic_int_impl_get_relaxed (&v); }
+ int get () const { return hb_atomic_int_impl_get (&v); }
+ int inc () { return hb_atomic_int_impl_add (&v, 1); }
+ int dec () { return hb_atomic_int_impl_add (&v, -1); }
+
+ int v;
+};
+
+
+#define HB_ATOMIC_PTR_INIT(V) {V}
+template <typename P>
+struct hb_atomic_ptr_t
+{
+ typedef hb_remove_pointer<P> T;
+
+ void init (T* v_ = nullptr) { set_relaxed (v_); }
+ void set_relaxed (T* v_) { hb_atomic_ptr_impl_set_relaxed (&v, v_); }
+ T *get_relaxed () const { return (T *) hb_atomic_ptr_impl_get_relaxed (&v); }
+ T *get () const { return (T *) hb_atomic_ptr_impl_get ((void **) &v); }
+ bool cmpexch (const T *old, T *new_) const { return hb_atomic_ptr_impl_cmpexch ((void **) &v, (void *) old, (void *) new_); }
+
+ T * operator -> () const { return get (); }
+ template <typename C> operator C * () const { return get (); }
+
+ T *v;
+};
+
+
+#endif /* HB_ATOMIC_HH */
diff --git a/thirdparty/harfbuzz/src/hb-bimap.hh b/thirdparty/harfbuzz/src/hb-bimap.hh
new file mode 100644
index 0000000000..e9f3a6a52d
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-bimap.hh
@@ -0,0 +1,166 @@
+/*
+ * Copyright © 2019 Adobe Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Adobe Author(s): Michiharu Ariza
+ */
+
+#ifndef HB_BIMAP_HH
+#define HB_BIMAP_HH
+
+#include "hb.hh"
+#include "hb-map.hh"
+
+/* Bi-directional map */
+struct hb_bimap_t
+{
+ hb_bimap_t () { init (); }
+ ~hb_bimap_t () { fini (); }
+
+ void init ()
+ {
+ forw_map.init ();
+ back_map.init ();
+ }
+
+ void fini ()
+ {
+ forw_map.fini ();
+ back_map.fini ();
+ }
+
+ void reset ()
+ {
+ forw_map.reset ();
+ back_map.reset ();
+ }
+
+ bool in_error () const { return forw_map.in_error () || back_map.in_error (); }
+
+ void set (hb_codepoint_t lhs, hb_codepoint_t rhs)
+ {
+ if (unlikely (lhs == HB_MAP_VALUE_INVALID)) return;
+ if (unlikely (rhs == HB_MAP_VALUE_INVALID)) { del (lhs); return; }
+ forw_map.set (lhs, rhs);
+ back_map.set (rhs, lhs);
+ }
+
+ hb_codepoint_t get (hb_codepoint_t lhs) const { return forw_map.get (lhs); }
+ hb_codepoint_t backward (hb_codepoint_t rhs) const { return back_map.get (rhs); }
+
+ hb_codepoint_t operator [] (hb_codepoint_t lhs) const { return get (lhs); }
+ bool has (hb_codepoint_t lhs, hb_codepoint_t *vp = nullptr) const { return forw_map.has (lhs, vp); }
+
+ void del (hb_codepoint_t lhs)
+ {
+ back_map.del (get (lhs));
+ forw_map.del (lhs);
+ }
+
+ void clear ()
+ {
+ forw_map.clear ();
+ back_map.clear ();
+ }
+
+ bool is_empty () const { return get_population () == 0; }
+
+ unsigned int get_population () const { return forw_map.get_population (); }
+
+ protected:
+ hb_map_t forw_map;
+ hb_map_t back_map;
+};
+
+/* Inremental bimap: only lhs is given, rhs is incrementally assigned */
+struct hb_inc_bimap_t : hb_bimap_t
+{
+ hb_inc_bimap_t () { init (); }
+
+ void init ()
+ {
+ hb_bimap_t::init ();
+ next_value = 0;
+ }
+
+ /* Add a mapping from lhs to rhs with a unique value if lhs is unknown.
+ * Return the rhs value as the result.
+ */
+ hb_codepoint_t add (hb_codepoint_t lhs)
+ {
+ hb_codepoint_t rhs = forw_map[lhs];
+ if (rhs == HB_MAP_VALUE_INVALID)
+ {
+ rhs = next_value++;
+ set (lhs, rhs);
+ }
+ return rhs;
+ }
+
+ hb_codepoint_t skip ()
+ { return next_value++; }
+
+ hb_codepoint_t get_next_value () const
+ { return next_value; }
+
+ void add_set (const hb_set_t *set)
+ {
+ hb_codepoint_t i = HB_SET_VALUE_INVALID;
+ while (hb_set_next (set, &i)) add (i);
+ }
+
+ /* Create an identity map. */
+ bool identity (unsigned int size)
+ {
+ clear ();
+ for (hb_codepoint_t i = 0; i < size; i++) set (i, i);
+ return !in_error ();
+ }
+
+ protected:
+ static int cmp_id (const void* a, const void* b)
+ { return (int)*(const hb_codepoint_t *)a - (int)*(const hb_codepoint_t *)b; }
+
+ public:
+ /* Optional: after finished adding all mappings in a random order,
+ * reassign rhs to lhs so that they are in the same order. */
+ void sort ()
+ {
+ hb_codepoint_t count = get_population ();
+ hb_vector_t <hb_codepoint_t> work;
+ work.resize (count);
+
+ for (hb_codepoint_t rhs = 0; rhs < count; rhs++)
+ work[rhs] = back_map[rhs];
+
+ work.qsort (cmp_id);
+
+ clear ();
+ for (hb_codepoint_t rhs = 0; rhs < count; rhs++)
+ set (work[rhs], rhs);
+ }
+
+ protected:
+ unsigned int next_value;
+};
+
+#endif /* HB_BIMAP_HH */
diff --git a/thirdparty/harfbuzz/src/hb-blob.cc b/thirdparty/harfbuzz/src/hb-blob.cc
new file mode 100644
index 0000000000..94ed50fd3c
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-blob.cc
@@ -0,0 +1,717 @@
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ * Copyright © 2018 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+#include "hb-blob.hh"
+
+#ifdef HAVE_SYS_MMAN_H
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif /* HAVE_UNISTD_H */
+#include <sys/mman.h>
+#endif /* HAVE_SYS_MMAN_H */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+
+/**
+ * SECTION: hb-blob
+ * @title: hb-blob
+ * @short_description: Binary data containers
+ * @include: hb.h
+ *
+ * Blobs wrap a chunk of binary data to handle lifecycle management of data
+ * while it is passed between client and HarfBuzz. Blobs are primarily used
+ * to create font faces, but also to access font face tables, as well as
+ * pass around other binary data.
+ **/
+
+
+/**
+ * hb_blob_create: (skip)
+ * @data: Pointer to blob data.
+ * @length: Length of @data in bytes.
+ * @mode: Memory mode for @data.
+ * @user_data: Data parameter to pass to @destroy.
+ * @destroy: Callback to call when @data is not needed anymore.
+ *
+ * Creates a new "blob" object wrapping @data. The @mode parameter is used
+ * to negotiate ownership and lifecycle of @data.
+ *
+ * Return value: New blob, or the empty blob if something failed or if @length is
+ * zero. Destroy with hb_blob_destroy().
+ *
+ * Since: 0.9.2
+ **/
+hb_blob_t *
+hb_blob_create (const char *data,
+ unsigned int length,
+ hb_memory_mode_t mode,
+ void *user_data,
+ hb_destroy_func_t destroy)
+{
+ hb_blob_t *blob;
+
+ if (!length ||
+ length >= 1u << 31 ||
+ !(blob = hb_object_create<hb_blob_t> ())) {
+ if (destroy)
+ destroy (user_data);
+ return hb_blob_get_empty ();
+ }
+
+ blob->data = data;
+ blob->length = length;
+ blob->mode = mode;
+
+ blob->user_data = user_data;
+ blob->destroy = destroy;
+
+ if (blob->mode == HB_MEMORY_MODE_DUPLICATE) {
+ blob->mode = HB_MEMORY_MODE_READONLY;
+ if (!blob->try_make_writable ()) {
+ hb_blob_destroy (blob);
+ return hb_blob_get_empty ();
+ }
+ }
+
+ return blob;
+}
+
+static void
+_hb_blob_destroy (void *data)
+{
+ hb_blob_destroy ((hb_blob_t *) data);
+}
+
+/**
+ * hb_blob_create_sub_blob:
+ * @parent: Parent blob.
+ * @offset: Start offset of sub-blob within @parent, in bytes.
+ * @length: Length of sub-blob.
+ *
+ * Returns a blob that represents a range of bytes in @parent. The new
+ * blob is always created with %HB_MEMORY_MODE_READONLY, meaning that it
+ * will never modify data in the parent blob. The parent data is not
+ * expected to be modified, and will result in undefined behavior if it
+ * is.
+ *
+ * Makes @parent immutable.
+ *
+ * Return value: New blob, or the empty blob if something failed or if
+ * @length is zero or @offset is beyond the end of @parent's data. Destroy
+ * with hb_blob_destroy().
+ *
+ * Since: 0.9.2
+ **/
+hb_blob_t *
+hb_blob_create_sub_blob (hb_blob_t *parent,
+ unsigned int offset,
+ unsigned int length)
+{
+ hb_blob_t *blob;
+
+ if (!length || !parent || offset >= parent->length)
+ return hb_blob_get_empty ();
+
+ hb_blob_make_immutable (parent);
+
+ blob = hb_blob_create (parent->data + offset,
+ hb_min (length, parent->length - offset),
+ HB_MEMORY_MODE_READONLY,
+ hb_blob_reference (parent),
+ _hb_blob_destroy);
+
+ return blob;
+}
+
+/**
+ * hb_blob_copy_writable_or_fail:
+ * @blob: A blob.
+ *
+ * Makes a writable copy of @blob.
+ *
+ * Return value: New blob, or nullptr if allocation failed.
+ *
+ * Since: 1.8.0
+ **/
+hb_blob_t *
+hb_blob_copy_writable_or_fail (hb_blob_t *blob)
+{
+ blob = hb_blob_create (blob->data,
+ blob->length,
+ HB_MEMORY_MODE_DUPLICATE,
+ nullptr,
+ nullptr);
+
+ if (unlikely (blob == hb_blob_get_empty ()))
+ blob = nullptr;
+
+ return blob;
+}
+
+/**
+ * hb_blob_get_empty:
+ *
+ * Returns the singleton empty blob.
+ *
+ * See TODO:link object types for more information.
+ *
+ * Return value: (transfer full): the empty blob.
+ *
+ * Since: 0.9.2
+ **/
+hb_blob_t *
+hb_blob_get_empty ()
+{
+ return const_cast<hb_blob_t *> (&Null (hb_blob_t));
+}
+
+/**
+ * hb_blob_reference: (skip)
+ * @blob: a blob.
+ *
+ * Increases the reference count on @blob.
+ *
+ * See TODO:link object types for more information.
+ *
+ * Return value: @blob.
+ *
+ * Since: 0.9.2
+ **/
+hb_blob_t *
+hb_blob_reference (hb_blob_t *blob)
+{
+ return hb_object_reference (blob);
+}
+
+/**
+ * hb_blob_destroy: (skip)
+ * @blob: a blob.
+ *
+ * Decreases the reference count on @blob, and if it reaches zero, destroys
+ * @blob, freeing all memory, possibly calling the destroy-callback the blob
+ * was created for if it has not been called already.
+ *
+ * See TODO:link object types for more information.
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_blob_destroy (hb_blob_t *blob)
+{
+ if (!hb_object_destroy (blob)) return;
+
+ blob->fini_shallow ();
+
+ free (blob);
+}
+
+/**
+ * hb_blob_set_user_data: (skip)
+ * @blob: a blob.
+ * @key: key for data to set.
+ * @data: data to set.
+ * @destroy: callback to call when @data is not needed anymore.
+ * @replace: whether to replace an existing data with the same key.
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_blob_set_user_data (hb_blob_t *blob,
+ hb_user_data_key_t *key,
+ void * data,
+ hb_destroy_func_t destroy,
+ hb_bool_t replace)
+{
+ return hb_object_set_user_data (blob, key, data, destroy, replace);
+}
+
+/**
+ * hb_blob_get_user_data: (skip)
+ * @blob: a blob.
+ * @key: key for data to get.
+ *
+ *
+ *
+ * Return value: (transfer none):
+ *
+ * Since: 0.9.2
+ **/
+void *
+hb_blob_get_user_data (hb_blob_t *blob,
+ hb_user_data_key_t *key)
+{
+ return hb_object_get_user_data (blob, key);
+}
+
+
+/**
+ * hb_blob_make_immutable:
+ * @blob: a blob.
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_blob_make_immutable (hb_blob_t *blob)
+{
+ if (hb_object_is_immutable (blob))
+ return;
+
+ hb_object_make_immutable (blob);
+}
+
+/**
+ * hb_blob_is_immutable:
+ * @blob: a blob.
+ *
+ *
+ *
+ * Return value: TODO
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_blob_is_immutable (hb_blob_t *blob)
+{
+ return hb_object_is_immutable (blob);
+}
+
+
+/**
+ * hb_blob_get_length:
+ * @blob: a blob.
+ *
+ *
+ *
+ * Return value: the length of blob data in bytes.
+ *
+ * Since: 0.9.2
+ **/
+unsigned int
+hb_blob_get_length (hb_blob_t *blob)
+{
+ return blob->length;
+}
+
+/**
+ * hb_blob_get_data:
+ * @blob: a blob.
+ * @length: (out):
+ *
+ *
+ *
+ * Returns: (transfer none) (array length=length):
+ *
+ * Since: 0.9.2
+ **/
+const char *
+hb_blob_get_data (hb_blob_t *blob, unsigned int *length)
+{
+ if (length)
+ *length = blob->length;
+
+ return blob->data;
+}
+
+/**
+ * hb_blob_get_data_writable:
+ * @blob: a blob.
+ * @length: (out): output length of the writable data.
+ *
+ * Tries to make blob data writable (possibly copying it) and
+ * return pointer to data.
+ *
+ * Fails if blob has been made immutable, or if memory allocation
+ * fails.
+ *
+ * Returns: (transfer none) (array length=length): Writable blob data,
+ * or %NULL if failed.
+ *
+ * Since: 0.9.2
+ **/
+char *
+hb_blob_get_data_writable (hb_blob_t *blob, unsigned int *length)
+{
+ if (!blob->try_make_writable ()) {
+ if (length)
+ *length = 0;
+
+ return nullptr;
+ }
+
+ if (length)
+ *length = blob->length;
+
+ return const_cast<char *> (blob->data);
+}
+
+
+bool
+hb_blob_t::try_make_writable_inplace_unix ()
+{
+#if defined(HAVE_SYS_MMAN_H) && defined(HAVE_MPROTECT)
+ uintptr_t pagesize = -1, mask, length;
+ const char *addr;
+
+#if defined(HAVE_SYSCONF) && defined(_SC_PAGE_SIZE)
+ pagesize = (uintptr_t) sysconf (_SC_PAGE_SIZE);
+#elif defined(HAVE_SYSCONF) && defined(_SC_PAGESIZE)
+ pagesize = (uintptr_t) sysconf (_SC_PAGESIZE);
+#elif defined(HAVE_GETPAGESIZE)
+ pagesize = (uintptr_t) getpagesize ();
+#endif
+
+ if ((uintptr_t) -1L == pagesize) {
+ DEBUG_MSG_FUNC (BLOB, this, "failed to get pagesize: %s", strerror (errno));
+ return false;
+ }
+ DEBUG_MSG_FUNC (BLOB, this, "pagesize is %lu", (unsigned long) pagesize);
+
+ mask = ~(pagesize-1);
+ addr = (const char *) (((uintptr_t) this->data) & mask);
+ length = (const char *) (((uintptr_t) this->data + this->length + pagesize-1) & mask) - addr;
+ DEBUG_MSG_FUNC (BLOB, this,
+ "calling mprotect on [%p..%p] (%lu bytes)",
+ addr, addr+length, (unsigned long) length);
+ if (-1 == mprotect ((void *) addr, length, PROT_READ | PROT_WRITE)) {
+ DEBUG_MSG_FUNC (BLOB, this, "mprotect failed: %s", strerror (errno));
+ return false;
+ }
+
+ this->mode = HB_MEMORY_MODE_WRITABLE;
+
+ DEBUG_MSG_FUNC (BLOB, this,
+ "successfully made [%p..%p] (%lu bytes) writable\n",
+ addr, addr+length, (unsigned long) length);
+ return true;
+#else
+ return false;
+#endif
+}
+
+bool
+hb_blob_t::try_make_writable_inplace ()
+{
+ DEBUG_MSG_FUNC (BLOB, this, "making writable inplace\n");
+
+ if (this->try_make_writable_inplace_unix ())
+ return true;
+
+ DEBUG_MSG_FUNC (BLOB, this, "making writable -> FAILED\n");
+
+ /* Failed to make writable inplace, mark that */
+ this->mode = HB_MEMORY_MODE_READONLY;
+ return false;
+}
+
+bool
+hb_blob_t::try_make_writable ()
+{
+ if (hb_object_is_immutable (this))
+ return false;
+
+ if (this->mode == HB_MEMORY_MODE_WRITABLE)
+ return true;
+
+ if (this->mode == HB_MEMORY_MODE_READONLY_MAY_MAKE_WRITABLE && this->try_make_writable_inplace ())
+ return true;
+
+ if (this->mode == HB_MEMORY_MODE_WRITABLE)
+ return true;
+
+
+ DEBUG_MSG_FUNC (BLOB, this, "current data is -> %p\n", this->data);
+
+ char *new_data;
+
+ new_data = (char *) malloc (this->length);
+ if (unlikely (!new_data))
+ return false;
+
+ DEBUG_MSG_FUNC (BLOB, this, "dupped successfully -> %p\n", this->data);
+
+ memcpy (new_data, this->data, this->length);
+ this->destroy_user_data ();
+ this->mode = HB_MEMORY_MODE_WRITABLE;
+ this->data = new_data;
+ this->user_data = new_data;
+ this->destroy = free;
+
+ return true;
+}
+
+/*
+ * Mmap
+ */
+
+#ifndef HB_NO_OPEN
+#ifdef HAVE_MMAP
+# if !defined(HB_NO_RESOURCE_FORK) && defined(__APPLE__)
+# include <sys/paths.h>
+# endif
+# include <sys/types.h>
+# include <sys/stat.h>
+# include <fcntl.h>
+#endif
+
+#ifdef _WIN32
+# include <windows.h>
+#else
+# ifndef O_BINARY
+# define O_BINARY 0
+# endif
+#endif
+
+#ifndef MAP_NORESERVE
+# define MAP_NORESERVE 0
+#endif
+
+struct hb_mapped_file_t
+{
+ char *contents;
+ unsigned long length;
+#ifdef _WIN32
+ HANDLE mapping;
+#endif
+};
+
+#if (defined(HAVE_MMAP) || defined(_WIN32)) && !defined(HB_NO_MMAP)
+static void
+_hb_mapped_file_destroy (void *file_)
+{
+ hb_mapped_file_t *file = (hb_mapped_file_t *) file_;
+#ifdef HAVE_MMAP
+ munmap (file->contents, file->length);
+#elif defined(_WIN32)
+ UnmapViewOfFile (file->contents);
+ CloseHandle (file->mapping);
+#else
+ assert (0); // If we don't have mmap we shouldn't reach here
+#endif
+
+ free (file);
+}
+#endif
+
+#ifdef _PATH_RSRCFORKSPEC
+static int
+_open_resource_fork (const char *file_name, hb_mapped_file_t *file)
+{
+ size_t name_len = strlen (file_name);
+ size_t len = name_len + sizeof (_PATH_RSRCFORKSPEC);
+
+ char *rsrc_name = (char *) malloc (len);
+ if (unlikely (!rsrc_name)) return -1;
+
+ strncpy (rsrc_name, file_name, name_len);
+ strncpy (rsrc_name + name_len, _PATH_RSRCFORKSPEC,
+ sizeof (_PATH_RSRCFORKSPEC) - 1);
+
+ int fd = open (rsrc_name, O_RDONLY | O_BINARY, 0);
+ free (rsrc_name);
+
+ if (fd != -1)
+ {
+ struct stat st;
+ if (fstat (fd, &st) != -1)
+ file->length = (unsigned long) st.st_size;
+ else
+ {
+ close (fd);
+ fd = -1;
+ }
+ }
+
+ return fd;
+}
+#endif
+
+/**
+ * hb_blob_create_from_file:
+ * @file_name: font filename.
+ *
+ * Returns: A hb_blob_t pointer with the content of the file
+ *
+ * Since: 1.7.7
+ **/
+hb_blob_t *
+hb_blob_create_from_file (const char *file_name)
+{
+ /* Adopted from glib's gmappedfile.c with Matthias Clasen and
+ Allison Lortie permission but changed a lot to suit our need. */
+#if defined(HAVE_MMAP) && !defined(HB_NO_MMAP)
+ hb_mapped_file_t *file = (hb_mapped_file_t *) calloc (1, sizeof (hb_mapped_file_t));
+ if (unlikely (!file)) return hb_blob_get_empty ();
+
+ int fd = open (file_name, O_RDONLY | O_BINARY, 0);
+ if (unlikely (fd == -1)) goto fail_without_close;
+
+ struct stat st;
+ if (unlikely (fstat (fd, &st) == -1)) goto fail;
+
+ file->length = (unsigned long) st.st_size;
+
+#ifdef _PATH_RSRCFORKSPEC
+ if (unlikely (file->length == 0))
+ {
+ int rfd = _open_resource_fork (file_name, file);
+ if (rfd != -1)
+ {
+ close (fd);
+ fd = rfd;
+ }
+ }
+#endif
+
+ file->contents = (char *) mmap (nullptr, file->length, PROT_READ,
+ MAP_PRIVATE | MAP_NORESERVE, fd, 0);
+
+ if (unlikely (file->contents == MAP_FAILED)) goto fail;
+
+ close (fd);
+
+ return hb_blob_create (file->contents, file->length,
+ HB_MEMORY_MODE_READONLY_MAY_MAKE_WRITABLE, (void *) file,
+ (hb_destroy_func_t) _hb_mapped_file_destroy);
+
+fail:
+ close (fd);
+fail_without_close:
+ free (file);
+
+#elif defined(_WIN32) && !defined(HB_NO_MMAP)
+ hb_mapped_file_t *file = (hb_mapped_file_t *) calloc (1, sizeof (hb_mapped_file_t));
+ if (unlikely (!file)) return hb_blob_get_empty ();
+
+ HANDLE fd;
+ unsigned int size = strlen (file_name) + 1;
+ wchar_t * wchar_file_name = (wchar_t *) malloc (sizeof (wchar_t) * size);
+ if (unlikely (!wchar_file_name)) goto fail_without_close;
+ mbstowcs (wchar_file_name, file_name, size);
+#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+ {
+ CREATEFILE2_EXTENDED_PARAMETERS ceparams = { 0 };
+ ceparams.dwSize = sizeof(CREATEFILE2_EXTENDED_PARAMETERS);
+ ceparams.dwFileAttributes = FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED & 0xFFFF;
+ ceparams.dwFileFlags = FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED & 0xFFF00000;
+ ceparams.dwSecurityQosFlags = FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED & 0x000F0000;
+ ceparams.lpSecurityAttributes = nullptr;
+ ceparams.hTemplateFile = nullptr;
+ fd = CreateFile2 (wchar_file_name, GENERIC_READ, FILE_SHARE_READ,
+ OPEN_EXISTING, &ceparams);
+ }
+#else
+ fd = CreateFileW (wchar_file_name, GENERIC_READ, FILE_SHARE_READ, nullptr,
+ OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL|FILE_FLAG_OVERLAPPED,
+ nullptr);
+#endif
+ free (wchar_file_name);
+
+ if (unlikely (fd == INVALID_HANDLE_VALUE)) goto fail_without_close;
+
+#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+ {
+ LARGE_INTEGER length;
+ GetFileSizeEx (fd, &length);
+ file->length = length.LowPart;
+ file->mapping = CreateFileMappingFromApp (fd, nullptr, PAGE_READONLY, length.QuadPart, nullptr);
+ }
+#else
+ file->length = (unsigned long) GetFileSize (fd, nullptr);
+ file->mapping = CreateFileMapping (fd, nullptr, PAGE_READONLY, 0, 0, nullptr);
+#endif
+ if (unlikely (!file->mapping)) goto fail;
+
+#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+ file->contents = (char *) MapViewOfFileFromApp (file->mapping, FILE_MAP_READ, 0, 0);
+#else
+ file->contents = (char *) MapViewOfFile (file->mapping, FILE_MAP_READ, 0, 0, 0);
+#endif
+ if (unlikely (!file->contents)) goto fail;
+
+ CloseHandle (fd);
+ return hb_blob_create (file->contents, file->length,
+ HB_MEMORY_MODE_READONLY_MAY_MAKE_WRITABLE, (void *) file,
+ (hb_destroy_func_t) _hb_mapped_file_destroy);
+
+fail:
+ CloseHandle (fd);
+fail_without_close:
+ free (file);
+
+#endif
+
+ /* The following tries to read a file without knowing its size beforehand
+ It's used as a fallback for systems without mmap or to read from pipes */
+ unsigned long len = 0, allocated = BUFSIZ * 16;
+ char *data = (char *) malloc (allocated);
+ if (unlikely (!data)) return hb_blob_get_empty ();
+
+ FILE *fp = fopen (file_name, "rb");
+ if (unlikely (!fp)) goto fread_fail_without_close;
+
+ while (!feof (fp))
+ {
+ if (allocated - len < BUFSIZ)
+ {
+ allocated *= 2;
+ /* Don't allocate and go more than ~536MB, our mmap reader still
+ can cover files like that but lets limit our fallback reader */
+ if (unlikely (allocated > (2 << 28))) goto fread_fail;
+ char *new_data = (char *) realloc (data, allocated);
+ if (unlikely (!new_data)) goto fread_fail;
+ data = new_data;
+ }
+
+ unsigned long addition = fread (data + len, 1, allocated - len, fp);
+
+ int err = ferror (fp);
+#ifdef EINTR // armcc doesn't have it
+ if (unlikely (err == EINTR)) continue;
+#endif
+ if (unlikely (err)) goto fread_fail;
+
+ len += addition;
+ }
+ fclose (fp);
+
+ return hb_blob_create (data, len, HB_MEMORY_MODE_WRITABLE, data,
+ (hb_destroy_func_t) free);
+
+fread_fail:
+ fclose (fp);
+fread_fail_without_close:
+ free (data);
+ return hb_blob_get_empty ();
+}
+#endif /* !HB_NO_OPEN */
diff --git a/thirdparty/harfbuzz/src/hb-blob.h b/thirdparty/harfbuzz/src/hb-blob.h
new file mode 100644
index 0000000000..f80e9af2d9
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-blob.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_H_IN
+#error "Include <hb.h> instead."
+#endif
+
+#ifndef HB_BLOB_H
+#define HB_BLOB_H
+
+#include "hb-common.h"
+
+HB_BEGIN_DECLS
+
+
+/*
+ * Note re various memory-modes:
+ *
+ * - In no case shall the HarfBuzz client modify memory
+ * that is passed to HarfBuzz in a blob. If there is
+ * any such possibility, MODE_DUPLICATE should be used
+ * such that HarfBuzz makes a copy immediately,
+ *
+ * - Use MODE_READONLY otherwise, unless you really really
+ * really know what you are doing,
+ *
+ * - MODE_WRITABLE is appropriate if you really made a
+ * copy of data solely for the purpose of passing to
+ * HarfBuzz and doing that just once (no reuse!),
+ *
+ * - If the font is mmap()ed, it's ok to use
+ * READONLY_MAY_MAKE_WRITABLE, however, using that mode
+ * correctly is very tricky. Use MODE_READONLY instead.
+ */
+typedef enum {
+ HB_MEMORY_MODE_DUPLICATE,
+ HB_MEMORY_MODE_READONLY,
+ HB_MEMORY_MODE_WRITABLE,
+ HB_MEMORY_MODE_READONLY_MAY_MAKE_WRITABLE
+} hb_memory_mode_t;
+
+typedef struct hb_blob_t hb_blob_t;
+
+HB_EXTERN hb_blob_t *
+hb_blob_create (const char *data,
+ unsigned int length,
+ hb_memory_mode_t mode,
+ void *user_data,
+ hb_destroy_func_t destroy);
+
+HB_EXTERN hb_blob_t *
+hb_blob_create_from_file (const char *file_name);
+
+/* Always creates with MEMORY_MODE_READONLY.
+ * Even if the parent blob is writable, we don't
+ * want the user of the sub-blob to be able to
+ * modify the parent data as that data may be
+ * shared among multiple sub-blobs.
+ */
+HB_EXTERN hb_blob_t *
+hb_blob_create_sub_blob (hb_blob_t *parent,
+ unsigned int offset,
+ unsigned int length);
+
+HB_EXTERN hb_blob_t *
+hb_blob_copy_writable_or_fail (hb_blob_t *blob);
+
+HB_EXTERN hb_blob_t *
+hb_blob_get_empty (void);
+
+HB_EXTERN hb_blob_t *
+hb_blob_reference (hb_blob_t *blob);
+
+HB_EXTERN void
+hb_blob_destroy (hb_blob_t *blob);
+
+HB_EXTERN hb_bool_t
+hb_blob_set_user_data (hb_blob_t *blob,
+ hb_user_data_key_t *key,
+ void * data,
+ hb_destroy_func_t destroy,
+ hb_bool_t replace);
+
+
+HB_EXTERN void *
+hb_blob_get_user_data (hb_blob_t *blob,
+ hb_user_data_key_t *key);
+
+
+HB_EXTERN void
+hb_blob_make_immutable (hb_blob_t *blob);
+
+HB_EXTERN hb_bool_t
+hb_blob_is_immutable (hb_blob_t *blob);
+
+
+HB_EXTERN unsigned int
+hb_blob_get_length (hb_blob_t *blob);
+
+HB_EXTERN const char *
+hb_blob_get_data (hb_blob_t *blob, unsigned int *length);
+
+HB_EXTERN char *
+hb_blob_get_data_writable (hb_blob_t *blob, unsigned int *length);
+
+HB_END_DECLS
+
+#endif /* HB_BLOB_H */
diff --git a/thirdparty/harfbuzz/src/hb-blob.hh b/thirdparty/harfbuzz/src/hb-blob.hh
new file mode 100644
index 0000000000..d85bd823b0
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-blob.hh
@@ -0,0 +1,97 @@
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_BLOB_HH
+#define HB_BLOB_HH
+
+#include "hb.hh"
+
+
+/*
+ * hb_blob_t
+ */
+
+struct hb_blob_t
+{
+ void fini_shallow () { destroy_user_data (); }
+
+ void destroy_user_data ()
+ {
+ if (destroy)
+ {
+ destroy (user_data);
+ user_data = nullptr;
+ destroy = nullptr;
+ }
+ }
+
+ HB_INTERNAL bool try_make_writable ();
+ HB_INTERNAL bool try_make_writable_inplace ();
+ HB_INTERNAL bool try_make_writable_inplace_unix ();
+
+ hb_bytes_t as_bytes () const { return hb_bytes_t (data, length); }
+ template <typename Type>
+ const Type* as () const { return as_bytes ().as<Type> (); }
+
+ public:
+ hb_object_header_t header;
+
+ const char *data;
+ unsigned int length;
+ hb_memory_mode_t mode;
+
+ void *user_data;
+ hb_destroy_func_t destroy;
+};
+
+
+/*
+ * hb_blob_ptr_t
+ */
+
+template <typename P>
+struct hb_blob_ptr_t
+{
+ typedef hb_remove_pointer<P> T;
+
+ hb_blob_ptr_t (hb_blob_t *b_ = nullptr) : b (b_) {}
+ hb_blob_t * operator = (hb_blob_t *b_) { return b = b_; }
+ const T * operator -> () const { return get (); }
+ const T & operator * () const { return *get (); }
+ template <typename C> operator const C * () const { return get (); }
+ operator const char * () const { return (const char *) get (); }
+ const T * get () const { return b->as<T> (); }
+ hb_blob_t * get_blob () const { return b.get_raw (); }
+ unsigned int get_length () const { return b.get ()->length; }
+ void destroy () { hb_blob_destroy (b.get ()); b = nullptr; }
+
+ hb_nonnull_ptr_t<hb_blob_t> b;
+};
+
+
+#endif /* HB_BLOB_HH */
diff --git a/thirdparty/harfbuzz/src/hb-buffer-deserialize-json.hh b/thirdparty/harfbuzz/src/hb-buffer-deserialize-json.hh
new file mode 100644
index 0000000000..1f9e2e91db
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-buffer-deserialize-json.hh
@@ -0,0 +1,643 @@
+
+#line 1 "hb-buffer-deserialize-json.rl"
+/*
+ * Copyright © 2013 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_BUFFER_DESERIALIZE_JSON_HH
+#define HB_BUFFER_DESERIALIZE_JSON_HH
+
+#include "hb.hh"
+
+
+#line 36 "hb-buffer-deserialize-json.hh"
+static const unsigned char _deserialize_json_trans_keys[] = {
+ 0u, 0u, 9u, 123u, 9u, 34u, 97u, 103u, 120u, 121u, 34u, 34u, 9u, 58u, 9u, 57u,
+ 48u, 57u, 9u, 125u, 9u, 125u, 9u, 125u, 34u, 34u, 9u, 58u, 9u, 57u, 48u, 57u,
+ 9u, 125u, 9u, 125u, 108u, 108u, 34u, 34u, 9u, 58u, 9u, 57u, 9u, 125u, 9u, 125u,
+ 120u, 121u, 34u, 34u, 9u, 58u, 9u, 57u, 48u, 57u, 9u, 125u, 9u, 125u, 34u, 34u,
+ 9u, 58u, 9u, 57u, 48u, 57u, 9u, 125u, 9u, 125u, 34u, 34u, 9u, 58u, 9u, 57u,
+ 65u, 122u, 34u, 122u, 9u, 125u, 9u, 125u, 9u, 93u, 9u, 123u, 0u, 0u, 0
+};
+
+static const char _deserialize_json_key_spans[] = {
+ 0, 115, 26, 7, 2, 1, 50, 49,
+ 10, 117, 117, 117, 1, 50, 49, 10,
+ 117, 117, 1, 1, 50, 49, 117, 117,
+ 2, 1, 50, 49, 10, 117, 117, 1,
+ 50, 49, 10, 117, 117, 1, 50, 49,
+ 58, 89, 117, 117, 85, 115, 0
+};
+
+static const short _deserialize_json_index_offsets[] = {
+ 0, 0, 116, 143, 151, 154, 156, 207,
+ 257, 268, 386, 504, 622, 624, 675, 725,
+ 736, 854, 972, 974, 976, 1027, 1077, 1195,
+ 1313, 1316, 1318, 1369, 1419, 1430, 1548, 1666,
+ 1668, 1719, 1769, 1780, 1898, 2016, 2018, 2069,
+ 2119, 2178, 2268, 2386, 2504, 2590, 2706
+};
+
+static const char _deserialize_json_indicies[] = {
+ 0, 0, 0, 0, 0, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 2, 1, 3, 3, 3,
+ 3, 3, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 3, 1, 4, 1,
+ 5, 1, 6, 7, 1, 1, 8, 1,
+ 9, 10, 1, 11, 1, 11, 11, 11,
+ 11, 11, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 11, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 12, 1,
+ 12, 12, 12, 12, 12, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 12,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 13, 1, 1, 14,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 1, 16, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 1, 18, 18, 18,
+ 18, 18, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 18, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 19, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 20, 1, 21, 21, 21, 21, 21,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 21, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 3, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 22,
+ 1, 18, 18, 18, 18, 18, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 18, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 19, 1, 1, 1,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 20, 1, 23,
+ 1, 23, 23, 23, 23, 23, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 23, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 24, 1, 24, 24, 24, 24,
+ 24, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 24, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 25, 1, 1, 26, 27, 27, 27, 27,
+ 27, 27, 27, 27, 27, 1, 28, 29,
+ 29, 29, 29, 29, 29, 29, 29, 29,
+ 1, 30, 30, 30, 30, 30, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 30, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 31, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 32, 1, 30,
+ 30, 30, 30, 30, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 30, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 31, 1, 1, 1, 29, 29,
+ 29, 29, 29, 29, 29, 29, 29, 29,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 32, 1, 33, 1, 34,
+ 1, 34, 34, 34, 34, 34, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 34, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 35, 1, 35, 35, 35, 35,
+ 35, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 35, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 36, 37, 37, 37, 37,
+ 37, 37, 37, 37, 37, 1, 38, 38,
+ 38, 38, 38, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 38, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 39, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 40, 1, 38, 38, 38, 38,
+ 38, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 38, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 39,
+ 1, 1, 1, 41, 41, 41, 41, 41,
+ 41, 41, 41, 41, 41, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 40, 1, 42, 43, 1, 44, 1, 44,
+ 44, 44, 44, 44, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 44, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 45, 1, 45, 45, 45, 45, 45, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 45, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 46, 1,
+ 1, 47, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 1, 49, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 1, 51,
+ 51, 51, 51, 51, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 51, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 52, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 53, 1, 51, 51, 51,
+ 51, 51, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 51, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 52, 1, 1, 1, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 53, 1, 54, 1, 54, 54, 54,
+ 54, 54, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 54, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 55, 1,
+ 55, 55, 55, 55, 55, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 55,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 56, 1, 1, 57,
+ 58, 58, 58, 58, 58, 58, 58, 58,
+ 58, 1, 59, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 1, 61, 61, 61,
+ 61, 61, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 61, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 62, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 63, 1, 61, 61, 61, 61, 61,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 61, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 62, 1,
+ 1, 1, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 63,
+ 1, 64, 1, 64, 64, 64, 64, 64,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 64, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 65, 1, 65, 65,
+ 65, 65, 65, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 65, 1, 66,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 67, 68, 68,
+ 68, 68, 68, 68, 68, 68, 68, 1,
+ 69, 69, 69, 69, 69, 69, 69, 69,
+ 69, 69, 69, 69, 69, 69, 69, 69,
+ 69, 69, 69, 69, 69, 69, 69, 69,
+ 69, 69, 1, 1, 1, 1, 1, 1,
+ 69, 69, 69, 69, 69, 69, 69, 69,
+ 69, 69, 69, 69, 69, 69, 69, 69,
+ 69, 69, 69, 69, 69, 69, 69, 69,
+ 69, 69, 1, 70, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 71, 71,
+ 1, 71, 71, 71, 71, 71, 71, 71,
+ 71, 71, 71, 1, 1, 1, 1, 1,
+ 1, 1, 71, 71, 71, 71, 71, 71,
+ 71, 71, 71, 71, 71, 71, 71, 71,
+ 71, 71, 71, 71, 71, 71, 71, 71,
+ 71, 71, 71, 71, 1, 1, 1, 1,
+ 71, 1, 71, 71, 71, 71, 71, 71,
+ 71, 71, 71, 71, 71, 71, 71, 71,
+ 71, 71, 71, 71, 71, 71, 71, 71,
+ 71, 71, 71, 71, 1, 72, 72, 72,
+ 72, 72, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 72, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 73, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 74, 1, 72, 72, 72, 72, 72,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 72, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 73, 1,
+ 1, 1, 75, 75, 75, 75, 75, 75,
+ 75, 75, 75, 75, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 74,
+ 1, 76, 76, 76, 76, 76, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 76, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 77, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 78, 1, 0,
+ 0, 0, 0, 0, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 0, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 1, 1, 0
+};
+
+static const char _deserialize_json_trans_targs[] = {
+ 1, 0, 2, 2, 3, 4, 18, 24,
+ 37, 5, 12, 6, 7, 8, 9, 11,
+ 9, 11, 10, 2, 44, 10, 44, 13,
+ 14, 15, 16, 17, 16, 17, 10, 2,
+ 44, 19, 20, 21, 22, 23, 10, 2,
+ 44, 23, 25, 31, 26, 27, 28, 29,
+ 30, 29, 30, 10, 2, 44, 32, 33,
+ 34, 35, 36, 35, 36, 10, 2, 44,
+ 38, 39, 40, 42, 43, 41, 10, 41,
+ 10, 2, 44, 43, 44, 45, 46
+};
+
+static const char _deserialize_json_trans_actions[] = {
+ 0, 0, 1, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 2, 2, 2,
+ 0, 0, 3, 3, 4, 0, 5, 0,
+ 0, 2, 2, 2, 0, 0, 6, 6,
+ 7, 0, 0, 0, 2, 2, 8, 8,
+ 9, 0, 0, 0, 0, 0, 2, 2,
+ 2, 0, 0, 10, 10, 11, 0, 0,
+ 2, 2, 2, 0, 0, 12, 12, 13,
+ 0, 0, 0, 2, 2, 2, 14, 0,
+ 15, 15, 16, 0, 0, 0, 0
+};
+
+static const int deserialize_json_start = 1;
+static const int deserialize_json_first_final = 44;
+static const int deserialize_json_error = 0;
+
+static const int deserialize_json_en_main = 1;
+
+
+#line 97 "hb-buffer-deserialize-json.rl"
+
+
+static hb_bool_t
+_hb_buffer_deserialize_glyphs_json (hb_buffer_t *buffer,
+ const char *buf,
+ unsigned int buf_len,
+ const char **end_ptr,
+ hb_font_t *font)
+{
+ const char *p = buf, *pe = buf + buf_len;
+
+ /* Ensure we have positions. */
+ (void) hb_buffer_get_glyph_positions (buffer, nullptr);
+
+ while (p < pe && ISSPACE (*p))
+ p++;
+ if (p < pe && *p == (buffer->len ? ',' : '['))
+ {
+ *end_ptr = ++p;
+ }
+
+ const char *tok = nullptr;
+ int cs;
+ hb_glyph_info_t info = {0};
+ hb_glyph_position_t pos = {0};
+
+#line 466 "hb-buffer-deserialize-json.hh"
+ {
+ cs = deserialize_json_start;
+ }
+
+#line 471 "hb-buffer-deserialize-json.hh"
+ {
+ int _slen;
+ int _trans;
+ const unsigned char *_keys;
+ const char *_inds;
+ if ( p == pe )
+ goto _test_eof;
+ if ( cs == 0 )
+ goto _out;
+_resume:
+ _keys = _deserialize_json_trans_keys + (cs<<1);
+ _inds = _deserialize_json_indicies + _deserialize_json_index_offsets[cs];
+
+ _slen = _deserialize_json_key_spans[cs];
+ _trans = _inds[ _slen > 0 && _keys[0] <=(*p) &&
+ (*p) <= _keys[1] ?
+ (*p) - _keys[0] : _slen ];
+
+ cs = _deserialize_json_trans_targs[_trans];
+
+ if ( _deserialize_json_trans_actions[_trans] == 0 )
+ goto _again;
+
+ switch ( _deserialize_json_trans_actions[_trans] ) {
+ case 1:
+#line 38 "hb-buffer-deserialize-json.rl"
+ {
+ memset (&info, 0, sizeof (info));
+ memset (&pos , 0, sizeof (pos ));
+}
+ break;
+ case 5:
+#line 43 "hb-buffer-deserialize-json.rl"
+ {
+ buffer->add_info (info);
+ if (unlikely (!buffer->successful))
+ return false;
+ buffer->pos[buffer->len - 1] = pos;
+ *end_ptr = p;
+}
+ break;
+ case 2:
+#line 51 "hb-buffer-deserialize-json.rl"
+ {
+ tok = p;
+}
+ break;
+ case 14:
+#line 55 "hb-buffer-deserialize-json.rl"
+ {
+ if (!hb_font_glyph_from_string (font,
+ tok, p - tok,
+ &info.codepoint))
+ return false;
+}
+ break;
+ case 15:
+#line 62 "hb-buffer-deserialize-json.rl"
+ { if (!parse_uint (tok, p, &info.codepoint)) return false; }
+ break;
+ case 8:
+#line 63 "hb-buffer-deserialize-json.rl"
+ { if (!parse_uint (tok, p, &info.cluster )) return false; }
+ break;
+ case 10:
+#line 64 "hb-buffer-deserialize-json.rl"
+ { if (!parse_int (tok, p, &pos.x_offset )) return false; }
+ break;
+ case 12:
+#line 65 "hb-buffer-deserialize-json.rl"
+ { if (!parse_int (tok, p, &pos.y_offset )) return false; }
+ break;
+ case 3:
+#line 66 "hb-buffer-deserialize-json.rl"
+ { if (!parse_int (tok, p, &pos.x_advance)) return false; }
+ break;
+ case 6:
+#line 67 "hb-buffer-deserialize-json.rl"
+ { if (!parse_int (tok, p, &pos.y_advance)) return false; }
+ break;
+ case 16:
+#line 62 "hb-buffer-deserialize-json.rl"
+ { if (!parse_uint (tok, p, &info.codepoint)) return false; }
+#line 43 "hb-buffer-deserialize-json.rl"
+ {
+ buffer->add_info (info);
+ if (unlikely (!buffer->successful))
+ return false;
+ buffer->pos[buffer->len - 1] = pos;
+ *end_ptr = p;
+}
+ break;
+ case 9:
+#line 63 "hb-buffer-deserialize-json.rl"
+ { if (!parse_uint (tok, p, &info.cluster )) return false; }
+#line 43 "hb-buffer-deserialize-json.rl"
+ {
+ buffer->add_info (info);
+ if (unlikely (!buffer->successful))
+ return false;
+ buffer->pos[buffer->len - 1] = pos;
+ *end_ptr = p;
+}
+ break;
+ case 11:
+#line 64 "hb-buffer-deserialize-json.rl"
+ { if (!parse_int (tok, p, &pos.x_offset )) return false; }
+#line 43 "hb-buffer-deserialize-json.rl"
+ {
+ buffer->add_info (info);
+ if (unlikely (!buffer->successful))
+ return false;
+ buffer->pos[buffer->len - 1] = pos;
+ *end_ptr = p;
+}
+ break;
+ case 13:
+#line 65 "hb-buffer-deserialize-json.rl"
+ { if (!parse_int (tok, p, &pos.y_offset )) return false; }
+#line 43 "hb-buffer-deserialize-json.rl"
+ {
+ buffer->add_info (info);
+ if (unlikely (!buffer->successful))
+ return false;
+ buffer->pos[buffer->len - 1] = pos;
+ *end_ptr = p;
+}
+ break;
+ case 4:
+#line 66 "hb-buffer-deserialize-json.rl"
+ { if (!parse_int (tok, p, &pos.x_advance)) return false; }
+#line 43 "hb-buffer-deserialize-json.rl"
+ {
+ buffer->add_info (info);
+ if (unlikely (!buffer->successful))
+ return false;
+ buffer->pos[buffer->len - 1] = pos;
+ *end_ptr = p;
+}
+ break;
+ case 7:
+#line 67 "hb-buffer-deserialize-json.rl"
+ { if (!parse_int (tok, p, &pos.y_advance)) return false; }
+#line 43 "hb-buffer-deserialize-json.rl"
+ {
+ buffer->add_info (info);
+ if (unlikely (!buffer->successful))
+ return false;
+ buffer->pos[buffer->len - 1] = pos;
+ *end_ptr = p;
+}
+ break;
+#line 624 "hb-buffer-deserialize-json.hh"
+ }
+
+_again:
+ if ( cs == 0 )
+ goto _out;
+ if ( ++p != pe )
+ goto _resume;
+ _test_eof: {}
+ _out: {}
+ }
+
+#line 125 "hb-buffer-deserialize-json.rl"
+
+
+ *end_ptr = p;
+
+ return p == pe && *(p-1) != ']';
+}
+
+#endif /* HB_BUFFER_DESERIALIZE_JSON_HH */
diff --git a/thirdparty/harfbuzz/src/hb-buffer-deserialize-text.hh b/thirdparty/harfbuzz/src/hb-buffer-deserialize-text.hh
new file mode 100644
index 0000000000..67f0a1252f
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-buffer-deserialize-text.hh
@@ -0,0 +1,571 @@
+
+#line 1 "hb-buffer-deserialize-text.rl"
+/*
+ * Copyright © 2013 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_BUFFER_DESERIALIZE_TEXT_HH
+#define HB_BUFFER_DESERIALIZE_TEXT_HH
+
+#include "hb.hh"
+
+
+#line 36 "hb-buffer-deserialize-text.hh"
+static const unsigned char _deserialize_text_trans_keys[] = {
+ 0u, 0u, 9u, 122u, 45u, 57u, 48u, 57u, 45u, 57u, 48u, 57u, 48u, 57u, 45u, 57u,
+ 48u, 57u, 44u, 44u, 45u, 57u, 48u, 57u, 44u, 57u, 9u, 124u, 9u, 124u, 0u, 0u,
+ 9u, 122u, 9u, 124u, 9u, 124u, 9u, 124u, 9u, 124u, 9u, 124u, 9u, 124u, 9u, 124u,
+ 9u, 124u, 9u, 124u, 9u, 124u, 0
+};
+
+static const char _deserialize_text_key_spans[] = {
+ 0, 114, 13, 10, 13, 10, 10, 13,
+ 10, 1, 13, 10, 14, 116, 116, 0,
+ 114, 116, 116, 116, 116, 116, 116, 116,
+ 116, 116, 116
+};
+
+static const short _deserialize_text_index_offsets[] = {
+ 0, 0, 115, 129, 140, 154, 165, 176,
+ 190, 201, 203, 217, 228, 243, 360, 477,
+ 478, 593, 710, 827, 944, 1061, 1178, 1295,
+ 1412, 1529, 1646
+};
+
+static const char _deserialize_text_indicies[] = {
+ 0, 0, 0, 0, 0, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 1, 1, 1, 1, 1, 1,
+ 1, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 1, 1, 1, 1, 1,
+ 1, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 1, 5, 1, 1, 6,
+ 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 1, 8, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 1, 10, 1, 1,
+ 11, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 1, 13, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 1, 15, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 1, 17, 1, 1, 18, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 1, 20,
+ 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 1, 22, 1, 23, 1, 1, 24,
+ 25, 25, 25, 25, 25, 25, 25, 25,
+ 25, 1, 26, 27, 27, 27, 27, 27,
+ 27, 27, 27, 27, 1, 22, 1, 1,
+ 1, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 1, 28, 28, 28, 28,
+ 28, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 28, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 29, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 30, 1, 1, 31, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 32, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 33,
+ 1, 34, 34, 34, 34, 34, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 34, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 35, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 36, 1, 1, 0,
+ 0, 0, 0, 0, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 0, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 2, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ 1, 1, 1, 1, 1, 1, 1, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 1, 1, 1, 1, 1, 1, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 1, 28, 28, 28, 28, 28, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 28, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 29, 1, 1, 1,
+ 1, 37, 37, 37, 37, 37, 37, 37,
+ 37, 37, 37, 1, 1, 1, 30, 1,
+ 1, 31, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 32, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 33, 1, 38,
+ 38, 38, 38, 38, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 38, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 39, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 40, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 41, 1, 42, 42, 42, 42,
+ 42, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 42, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 43, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 44,
+ 1, 42, 42, 42, 42, 42, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 42, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 43, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 44, 1, 38, 38,
+ 38, 38, 38, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 38, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 39, 1, 1, 1, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 40, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 41, 1, 45, 45, 45, 45, 45,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 45, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 46, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 47, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 48,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 49, 1,
+ 50, 50, 50, 50, 50, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 50,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 51, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 52, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 53, 1, 50, 50, 50,
+ 50, 50, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 50, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 51,
+ 1, 1, 1, 1, 27, 27, 27, 27,
+ 27, 27, 27, 27, 27, 27, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 52, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 53, 1, 45, 45, 45, 45, 45, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 45, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 46, 1, 1, 1,
+ 1, 54, 54, 54, 54, 54, 54, 54,
+ 54, 54, 54, 1, 1, 1, 1, 1,
+ 1, 47, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 48, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 49, 1, 28,
+ 28, 28, 28, 28, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 28, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 29, 1, 55, 55, 1, 55, 55,
+ 55, 55, 55, 55, 55, 55, 55, 55,
+ 1, 1, 1, 30, 1, 1, 31, 55,
+ 55, 55, 55, 55, 55, 55, 55, 55,
+ 55, 55, 55, 55, 55, 55, 55, 55,
+ 55, 55, 55, 55, 55, 55, 55, 55,
+ 55, 1, 1, 32, 1, 55, 1, 55,
+ 55, 55, 55, 55, 55, 55, 55, 55,
+ 55, 55, 55, 55, 55, 55, 55, 55,
+ 55, 55, 55, 55, 55, 55, 55, 55,
+ 55, 1, 33, 1, 0
+};
+
+static const char _deserialize_text_trans_targs[] = {
+ 1, 0, 13, 17, 26, 3, 18, 21,
+ 18, 21, 5, 19, 20, 19, 20, 22,
+ 25, 8, 9, 12, 9, 12, 10, 11,
+ 23, 24, 23, 24, 14, 2, 6, 7,
+ 15, 16, 14, 15, 16, 17, 14, 4,
+ 15, 16, 14, 15, 16, 14, 2, 7,
+ 15, 16, 14, 2, 15, 16, 25, 26
+};
+
+static const char _deserialize_text_trans_actions[] = {
+ 0, 0, 1, 1, 1, 2, 2, 2,
+ 0, 0, 2, 2, 2, 0, 0, 2,
+ 2, 2, 2, 2, 0, 0, 3, 2,
+ 2, 2, 0, 0, 4, 5, 5, 5,
+ 4, 4, 0, 0, 0, 0, 6, 7,
+ 6, 6, 8, 8, 8, 9, 10, 10,
+ 9, 9, 11, 12, 11, 11, 0, 0
+};
+
+static const char _deserialize_text_eof_actions[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 4, 0, 0,
+ 0, 4, 6, 8, 8, 6, 9, 11,
+ 11, 9, 4
+};
+
+static const int deserialize_text_start = 1;
+static const int deserialize_text_first_final = 13;
+static const int deserialize_text_error = 0;
+
+static const int deserialize_text_en_main = 1;
+
+
+#line 91 "hb-buffer-deserialize-text.rl"
+
+
+static hb_bool_t
+_hb_buffer_deserialize_glyphs_text (hb_buffer_t *buffer,
+ const char *buf,
+ unsigned int buf_len,
+ const char **end_ptr,
+ hb_font_t *font)
+{
+ const char *p = buf, *pe = buf + buf_len;
+
+ /* Ensure we have positions. */
+ (void) hb_buffer_get_glyph_positions (buffer, nullptr);
+
+ while (p < pe && ISSPACE (*p))
+ p++;
+ if (p < pe && *p == (buffer->len ? '|' : '['))
+ {
+ *end_ptr = ++p;
+ }
+
+ const char *eof = pe, *tok = nullptr;
+ int cs;
+ hb_glyph_info_t info = {0};
+ hb_glyph_position_t pos = {0};
+
+#line 343 "hb-buffer-deserialize-text.hh"
+ {
+ cs = deserialize_text_start;
+ }
+
+#line 348 "hb-buffer-deserialize-text.hh"
+ {
+ int _slen;
+ int _trans;
+ const unsigned char *_keys;
+ const char *_inds;
+ if ( p == pe )
+ goto _test_eof;
+ if ( cs == 0 )
+ goto _out;
+_resume:
+ _keys = _deserialize_text_trans_keys + (cs<<1);
+ _inds = _deserialize_text_indicies + _deserialize_text_index_offsets[cs];
+
+ _slen = _deserialize_text_key_spans[cs];
+ _trans = _inds[ _slen > 0 && _keys[0] <=(*p) &&
+ (*p) <= _keys[1] ?
+ (*p) - _keys[0] : _slen ];
+
+ cs = _deserialize_text_trans_targs[_trans];
+
+ if ( _deserialize_text_trans_actions[_trans] == 0 )
+ goto _again;
+
+ switch ( _deserialize_text_trans_actions[_trans] ) {
+ case 2:
+#line 51 "hb-buffer-deserialize-text.rl"
+ {
+ tok = p;
+}
+ break;
+ case 5:
+#line 55 "hb-buffer-deserialize-text.rl"
+ {
+ if (!hb_font_glyph_from_string (font,
+ tok, p - tok,
+ &info.codepoint))
+ return false;
+}
+ break;
+ case 10:
+#line 62 "hb-buffer-deserialize-text.rl"
+ { if (!parse_uint (tok, p, &info.cluster )) return false; }
+ break;
+ case 3:
+#line 63 "hb-buffer-deserialize-text.rl"
+ { if (!parse_int (tok, p, &pos.x_offset )) return false; }
+ break;
+ case 12:
+#line 64 "hb-buffer-deserialize-text.rl"
+ { if (!parse_int (tok, p, &pos.y_offset )) return false; }
+ break;
+ case 7:
+#line 65 "hb-buffer-deserialize-text.rl"
+ { if (!parse_int (tok, p, &pos.x_advance)) return false; }
+ break;
+ case 1:
+#line 38 "hb-buffer-deserialize-text.rl"
+ {
+ memset (&info, 0, sizeof (info));
+ memset (&pos , 0, sizeof (pos ));
+}
+#line 51 "hb-buffer-deserialize-text.rl"
+ {
+ tok = p;
+}
+ break;
+ case 4:
+#line 55 "hb-buffer-deserialize-text.rl"
+ {
+ if (!hb_font_glyph_from_string (font,
+ tok, p - tok,
+ &info.codepoint))
+ return false;
+}
+#line 43 "hb-buffer-deserialize-text.rl"
+ {
+ buffer->add_info (info);
+ if (unlikely (!buffer->successful))
+ return false;
+ buffer->pos[buffer->len - 1] = pos;
+ *end_ptr = p;
+}
+ break;
+ case 9:
+#line 62 "hb-buffer-deserialize-text.rl"
+ { if (!parse_uint (tok, p, &info.cluster )) return false; }
+#line 43 "hb-buffer-deserialize-text.rl"
+ {
+ buffer->add_info (info);
+ if (unlikely (!buffer->successful))
+ return false;
+ buffer->pos[buffer->len - 1] = pos;
+ *end_ptr = p;
+}
+ break;
+ case 11:
+#line 64 "hb-buffer-deserialize-text.rl"
+ { if (!parse_int (tok, p, &pos.y_offset )) return false; }
+#line 43 "hb-buffer-deserialize-text.rl"
+ {
+ buffer->add_info (info);
+ if (unlikely (!buffer->successful))
+ return false;
+ buffer->pos[buffer->len - 1] = pos;
+ *end_ptr = p;
+}
+ break;
+ case 6:
+#line 65 "hb-buffer-deserialize-text.rl"
+ { if (!parse_int (tok, p, &pos.x_advance)) return false; }
+#line 43 "hb-buffer-deserialize-text.rl"
+ {
+ buffer->add_info (info);
+ if (unlikely (!buffer->successful))
+ return false;
+ buffer->pos[buffer->len - 1] = pos;
+ *end_ptr = p;
+}
+ break;
+ case 8:
+#line 66 "hb-buffer-deserialize-text.rl"
+ { if (!parse_int (tok, p, &pos.y_advance)) return false; }
+#line 43 "hb-buffer-deserialize-text.rl"
+ {
+ buffer->add_info (info);
+ if (unlikely (!buffer->successful))
+ return false;
+ buffer->pos[buffer->len - 1] = pos;
+ *end_ptr = p;
+}
+ break;
+#line 480 "hb-buffer-deserialize-text.hh"
+ }
+
+_again:
+ if ( cs == 0 )
+ goto _out;
+ if ( ++p != pe )
+ goto _resume;
+ _test_eof: {}
+ if ( p == eof )
+ {
+ switch ( _deserialize_text_eof_actions[cs] ) {
+ case 4:
+#line 55 "hb-buffer-deserialize-text.rl"
+ {
+ if (!hb_font_glyph_from_string (font,
+ tok, p - tok,
+ &info.codepoint))
+ return false;
+}
+#line 43 "hb-buffer-deserialize-text.rl"
+ {
+ buffer->add_info (info);
+ if (unlikely (!buffer->successful))
+ return false;
+ buffer->pos[buffer->len - 1] = pos;
+ *end_ptr = p;
+}
+ break;
+ case 9:
+#line 62 "hb-buffer-deserialize-text.rl"
+ { if (!parse_uint (tok, p, &info.cluster )) return false; }
+#line 43 "hb-buffer-deserialize-text.rl"
+ {
+ buffer->add_info (info);
+ if (unlikely (!buffer->successful))
+ return false;
+ buffer->pos[buffer->len - 1] = pos;
+ *end_ptr = p;
+}
+ break;
+ case 11:
+#line 64 "hb-buffer-deserialize-text.rl"
+ { if (!parse_int (tok, p, &pos.y_offset )) return false; }
+#line 43 "hb-buffer-deserialize-text.rl"
+ {
+ buffer->add_info (info);
+ if (unlikely (!buffer->successful))
+ return false;
+ buffer->pos[buffer->len - 1] = pos;
+ *end_ptr = p;
+}
+ break;
+ case 6:
+#line 65 "hb-buffer-deserialize-text.rl"
+ { if (!parse_int (tok, p, &pos.x_advance)) return false; }
+#line 43 "hb-buffer-deserialize-text.rl"
+ {
+ buffer->add_info (info);
+ if (unlikely (!buffer->successful))
+ return false;
+ buffer->pos[buffer->len - 1] = pos;
+ *end_ptr = p;
+}
+ break;
+ case 8:
+#line 66 "hb-buffer-deserialize-text.rl"
+ { if (!parse_int (tok, p, &pos.y_advance)) return false; }
+#line 43 "hb-buffer-deserialize-text.rl"
+ {
+ buffer->add_info (info);
+ if (unlikely (!buffer->successful))
+ return false;
+ buffer->pos[buffer->len - 1] = pos;
+ *end_ptr = p;
+}
+ break;
+#line 557 "hb-buffer-deserialize-text.hh"
+ }
+ }
+
+ _out: {}
+ }
+
+#line 119 "hb-buffer-deserialize-text.rl"
+
+
+ *end_ptr = p;
+
+ return p == pe && *(p-1) != ']';
+}
+
+#endif /* HB_BUFFER_DESERIALIZE_TEXT_HH */
diff --git a/thirdparty/harfbuzz/src/hb-buffer-serialize.cc b/thirdparty/harfbuzz/src/hb-buffer-serialize.cc
new file mode 100644
index 0000000000..bc6c978b38
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-buffer-serialize.cc
@@ -0,0 +1,474 @@
+/*
+ * Copyright © 2012,2013 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_BUFFER_SERIALIZE
+
+#include "hb-buffer.hh"
+
+
+static const char *serialize_formats[] = {
+ "text",
+ "json",
+ nullptr
+};
+
+/**
+ * hb_buffer_serialize_list_formats:
+ *
+ * Returns a list of supported buffer serialization formats.
+ *
+ * Return value: (transfer none):
+ * A string array of buffer serialization formats. Should not be freed.
+ *
+ * Since: 0.9.7
+ **/
+const char **
+hb_buffer_serialize_list_formats ()
+{
+ return serialize_formats;
+}
+
+/**
+ * hb_buffer_serialize_format_from_string:
+ * @str: (array length=len) (element-type uint8_t): a string to parse
+ * @len: length of @str, or -1 if string is %NULL terminated
+ *
+ * Parses a string into an #hb_buffer_serialize_format_t. Does not check if
+ * @str is a valid buffer serialization format, use
+ * hb_buffer_serialize_list_formats() to get the list of supported formats.
+ *
+ * Return value:
+ * The parsed #hb_buffer_serialize_format_t.
+ *
+ * Since: 0.9.7
+ **/
+hb_buffer_serialize_format_t
+hb_buffer_serialize_format_from_string (const char *str, int len)
+{
+ /* Upper-case it. */
+ return (hb_buffer_serialize_format_t) (hb_tag_from_string (str, len) & ~0x20202020u);
+}
+
+/**
+ * hb_buffer_serialize_format_to_string:
+ * @format: an #hb_buffer_serialize_format_t to convert.
+ *
+ * Converts @format to the string corresponding it, or %NULL if it is not a valid
+ * #hb_buffer_serialize_format_t.
+ *
+ * Return value: (transfer none):
+ * A %NULL terminated string corresponding to @format. Should not be freed.
+ *
+ * Since: 0.9.7
+ **/
+const char *
+hb_buffer_serialize_format_to_string (hb_buffer_serialize_format_t format)
+{
+ switch ((unsigned) format)
+ {
+ case HB_BUFFER_SERIALIZE_FORMAT_TEXT: return serialize_formats[0];
+ case HB_BUFFER_SERIALIZE_FORMAT_JSON: return serialize_formats[1];
+ default:
+ case HB_BUFFER_SERIALIZE_FORMAT_INVALID: return nullptr;
+ }
+}
+
+static unsigned int
+_hb_buffer_serialize_glyphs_json (hb_buffer_t *buffer,
+ unsigned int start,
+ unsigned int end,
+ char *buf,
+ unsigned int buf_size,
+ unsigned int *buf_consumed,
+ hb_font_t *font,
+ hb_buffer_serialize_flags_t flags)
+{
+ hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, nullptr);
+ hb_glyph_position_t *pos = (flags & HB_BUFFER_SERIALIZE_FLAG_NO_POSITIONS) ?
+ nullptr : hb_buffer_get_glyph_positions (buffer, nullptr);
+
+ *buf_consumed = 0;
+ hb_position_t x = 0, y = 0;
+ for (unsigned int i = start; i < end; i++)
+ {
+ char b[1024];
+ char *p = b;
+
+ /* In the following code, we know b is large enough that no overflow can happen. */
+
+#define APPEND(s) HB_STMT_START { strcpy (p, s); p += strlen (s); } HB_STMT_END
+
+ if (i)
+ *p++ = ',';
+
+ *p++ = '{';
+
+ APPEND ("\"g\":");
+ if (!(flags & HB_BUFFER_SERIALIZE_FLAG_NO_GLYPH_NAMES))
+ {
+ char g[128];
+ hb_font_glyph_to_string (font, info[i].codepoint, g, sizeof (g));
+ *p++ = '"';
+ for (char *q = g; *q; q++) {
+ if (*q == '"')
+ *p++ = '\\';
+ *p++ = *q;
+ }
+ *p++ = '"';
+ }
+ else
+ p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), "%u", info[i].codepoint));
+
+ if (!(flags & HB_BUFFER_SERIALIZE_FLAG_NO_CLUSTERS)) {
+ p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"cl\":%u", info[i].cluster));
+ }
+
+ if (!(flags & HB_BUFFER_SERIALIZE_FLAG_NO_POSITIONS))
+ {
+ p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"dx\":%d,\"dy\":%d",
+ x+pos[i].x_offset, y+pos[i].y_offset));
+ if (!(flags & HB_BUFFER_SERIALIZE_FLAG_NO_ADVANCES))
+ p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"ax\":%d,\"ay\":%d",
+ pos[i].x_advance, pos[i].y_advance));
+ }
+
+ if (flags & HB_BUFFER_SERIALIZE_FLAG_GLYPH_FLAGS)
+ {
+ if (info[i].mask & HB_GLYPH_FLAG_DEFINED)
+ p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"fl\":%u", info[i].mask & HB_GLYPH_FLAG_DEFINED));
+ }
+
+ if (flags & HB_BUFFER_SERIALIZE_FLAG_GLYPH_EXTENTS)
+ {
+ hb_glyph_extents_t extents;
+ hb_font_get_glyph_extents(font, info[i].codepoint, &extents);
+ p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"xb\":%d,\"yb\":%d",
+ extents.x_bearing, extents.y_bearing));
+ p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"w\":%d,\"h\":%d",
+ extents.width, extents.height));
+ }
+
+ *p++ = '}';
+
+ unsigned int l = p - b;
+ if (buf_size > l)
+ {
+ memcpy (buf, b, l);
+ buf += l;
+ buf_size -= l;
+ *buf_consumed += l;
+ *buf = '\0';
+ } else
+ return i - start;
+
+ if (pos && (flags & HB_BUFFER_SERIALIZE_FLAG_NO_ADVANCES))
+ {
+ x += pos[i].x_advance;
+ y += pos[i].y_advance;
+ }
+ }
+
+ return end - start;
+}
+
+static unsigned int
+_hb_buffer_serialize_glyphs_text (hb_buffer_t *buffer,
+ unsigned int start,
+ unsigned int end,
+ char *buf,
+ unsigned int buf_size,
+ unsigned int *buf_consumed,
+ hb_font_t *font,
+ hb_buffer_serialize_flags_t flags)
+{
+ hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, nullptr);
+ hb_glyph_position_t *pos = (flags & HB_BUFFER_SERIALIZE_FLAG_NO_POSITIONS) ?
+ nullptr : hb_buffer_get_glyph_positions (buffer, nullptr);
+
+ *buf_consumed = 0;
+ hb_position_t x = 0, y = 0;
+ for (unsigned int i = start; i < end; i++)
+ {
+ char b[1024];
+ char *p = b;
+
+ /* In the following code, we know b is large enough that no overflow can happen. */
+
+ if (i)
+ *p++ = '|';
+
+ if (!(flags & HB_BUFFER_SERIALIZE_FLAG_NO_GLYPH_NAMES))
+ {
+ hb_font_glyph_to_string (font, info[i].codepoint, p, 128);
+ p += strlen (p);
+ }
+ else
+ p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), "%u", info[i].codepoint));
+
+ if (!(flags & HB_BUFFER_SERIALIZE_FLAG_NO_CLUSTERS)) {
+ p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), "=%u", info[i].cluster));
+ }
+
+ if (!(flags & HB_BUFFER_SERIALIZE_FLAG_NO_POSITIONS))
+ {
+ if (x+pos[i].x_offset || y+pos[i].y_offset)
+ p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), "@%d,%d", x+pos[i].x_offset, y+pos[i].y_offset));
+
+ if (!(flags & HB_BUFFER_SERIALIZE_FLAG_NO_ADVANCES))
+ {
+ *p++ = '+';
+ p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), "%d", pos[i].x_advance));
+ if (pos[i].y_advance)
+ p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), ",%d", pos[i].y_advance));
+ }
+ }
+
+ if (flags & HB_BUFFER_SERIALIZE_FLAG_GLYPH_FLAGS)
+ {
+ if (info[i].mask & HB_GLYPH_FLAG_DEFINED)
+ p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), "#%X", info[i].mask &HB_GLYPH_FLAG_DEFINED));
+ }
+
+ if (flags & HB_BUFFER_SERIALIZE_FLAG_GLYPH_EXTENTS)
+ {
+ hb_glyph_extents_t extents;
+ hb_font_get_glyph_extents(font, info[i].codepoint, &extents);
+ p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), "<%d,%d,%d,%d>", extents.x_bearing, extents.y_bearing, extents.width, extents.height));
+ }
+
+ unsigned int l = p - b;
+ if (buf_size > l)
+ {
+ memcpy (buf, b, l);
+ buf += l;
+ buf_size -= l;
+ *buf_consumed += l;
+ *buf = '\0';
+ } else
+ return i - start;
+
+ if (pos && (flags & HB_BUFFER_SERIALIZE_FLAG_NO_ADVANCES))
+ {
+ x += pos[i].x_advance;
+ y += pos[i].y_advance;
+ }
+ }
+
+ return end - start;
+}
+
+/**
+ * hb_buffer_serialize_glyphs:
+ * @buffer: an #hb_buffer_t buffer.
+ * @start: the first item in @buffer to serialize.
+ * @end: the last item in @buffer to serialize.
+ * @buf: (out) (array length=buf_size) (element-type uint8_t): output string to
+ * write serialized buffer into.
+ * @buf_size: the size of @buf.
+ * @buf_consumed: (out) (allow-none): if not %NULL, will be set to the number of byes written into @buf.
+ * @font: (allow-none): the #hb_font_t used to shape this buffer, needed to
+ * read glyph names and extents. If %NULL, and empty font will be used.
+ * @format: the #hb_buffer_serialize_format_t to use for formatting the output.
+ * @flags: the #hb_buffer_serialize_flags_t that control what glyph properties
+ * to serialize.
+ *
+ * Serializes @buffer into a textual representation of its glyph content,
+ * useful for showing the contents of the buffer, for example during debugging.
+ * There are currently two supported serialization formats:
+ *
+ * ## text
+ * A human-readable, plain text format.
+ * The serialized glyphs will look something like:
+ *
+ * ```
+ * [uni0651=0@518,0+0|uni0628=0+1897]
+ * ```
+ * - The serialized glyphs are delimited with `[` and `]`.
+ * - Glyphs are separated with `|`
+ * - Each glyph starts with glyph name, or glyph index if
+ * #HB_BUFFER_SERIALIZE_FLAG_NO_GLYPH_NAMES flag is set. Then,
+ * - If #HB_BUFFER_SERIALIZE_FLAG_NO_CLUSTERS is not set, `=` then #hb_glyph_info_t.cluster.
+ * - If #HB_BUFFER_SERIALIZE_FLAG_NO_POSITIONS is not set, the #hb_glyph_position_t in the format:
+ * - If both #hb_glyph_position_t.x_offset and #hb_glyph_position_t.y_offset are not 0, `@x_offset,y_offset`. Then,
+ * - `+x_advance`, then `,y_advance` if #hb_glyph_position_t.y_advance is not 0. Then,
+ * - If #HB_BUFFER_SERIALIZE_FLAG_GLYPH_EXTENTS is set, the
+ * #hb_glyph_extents_t in the format
+ * `&lt;x_bearing,y_bearing,width,height&gt;`
+ *
+ * ## json
+ * TODO.
+ *
+ * Return value:
+ * The number of serialized items.
+ *
+ * Since: 0.9.7
+ **/
+unsigned int
+hb_buffer_serialize_glyphs (hb_buffer_t *buffer,
+ unsigned int start,
+ unsigned int end,
+ char *buf,
+ unsigned int buf_size,
+ unsigned int *buf_consumed,
+ hb_font_t *font,
+ hb_buffer_serialize_format_t format,
+ hb_buffer_serialize_flags_t flags)
+{
+ assert (start <= end && end <= buffer->len);
+
+ unsigned int sconsumed;
+ if (!buf_consumed)
+ buf_consumed = &sconsumed;
+ *buf_consumed = 0;
+ if (buf_size)
+ *buf = '\0';
+
+ assert ((!buffer->len && (buffer->content_type == HB_BUFFER_CONTENT_TYPE_INVALID)) ||
+ (buffer->content_type == HB_BUFFER_CONTENT_TYPE_GLYPHS));
+
+ if (!buffer->have_positions)
+ flags |= HB_BUFFER_SERIALIZE_FLAG_NO_POSITIONS;
+
+ if (unlikely (start == end))
+ return 0;
+
+ if (!font)
+ font = hb_font_get_empty ();
+
+ switch (format)
+ {
+ case HB_BUFFER_SERIALIZE_FORMAT_TEXT:
+ return _hb_buffer_serialize_glyphs_text (buffer, start, end,
+ buf, buf_size, buf_consumed,
+ font, flags);
+
+ case HB_BUFFER_SERIALIZE_FORMAT_JSON:
+ return _hb_buffer_serialize_glyphs_json (buffer, start, end,
+ buf, buf_size, buf_consumed,
+ font, flags);
+
+ default:
+ case HB_BUFFER_SERIALIZE_FORMAT_INVALID:
+ return 0;
+
+ }
+}
+
+static bool
+parse_int (const char *pp, const char *end, int32_t *pv)
+{
+ int v;
+ const char *p = pp;
+ if (unlikely (!hb_parse_int (&p, end, &v, true/* whole buffer */)))
+ return false;
+
+ *pv = v;
+ return true;
+}
+
+static bool
+parse_uint (const char *pp, const char *end, uint32_t *pv)
+{
+ unsigned int v;
+ const char *p = pp;
+ if (unlikely (!hb_parse_uint (&p, end, &v, true/* whole buffer */)))
+ return false;
+
+ *pv = v;
+ return true;
+}
+
+#include "hb-buffer-deserialize-json.hh"
+#include "hb-buffer-deserialize-text.hh"
+
+/**
+ * hb_buffer_deserialize_glyphs:
+ * @buffer: an #hb_buffer_t buffer.
+ * @buf: (array length=buf_len):
+ * @buf_len:
+ * @end_ptr: (out):
+ * @font:
+ * @format:
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.7
+ **/
+hb_bool_t
+hb_buffer_deserialize_glyphs (hb_buffer_t *buffer,
+ const char *buf,
+ int buf_len, /* -1 means nul-terminated */
+ const char **end_ptr, /* May be NULL */
+ hb_font_t *font, /* May be NULL */
+ hb_buffer_serialize_format_t format)
+{
+ const char *end;
+ if (!end_ptr)
+ end_ptr = &end;
+ *end_ptr = buf;
+
+ assert ((!buffer->len && (buffer->content_type == HB_BUFFER_CONTENT_TYPE_INVALID)) ||
+ (buffer->content_type == HB_BUFFER_CONTENT_TYPE_GLYPHS));
+
+ if (buf_len == -1)
+ buf_len = strlen (buf);
+
+ if (!buf_len)
+ {
+ *end_ptr = buf;
+ return false;
+ }
+
+ hb_buffer_set_content_type (buffer, HB_BUFFER_CONTENT_TYPE_GLYPHS);
+
+ if (!font)
+ font = hb_font_get_empty ();
+
+ switch (format)
+ {
+ case HB_BUFFER_SERIALIZE_FORMAT_TEXT:
+ return _hb_buffer_deserialize_glyphs_text (buffer,
+ buf, buf_len, end_ptr,
+ font);
+
+ case HB_BUFFER_SERIALIZE_FORMAT_JSON:
+ return _hb_buffer_deserialize_glyphs_json (buffer,
+ buf, buf_len, end_ptr,
+ font);
+
+ default:
+ case HB_BUFFER_SERIALIZE_FORMAT_INVALID:
+ return false;
+
+ }
+}
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-buffer.cc b/thirdparty/harfbuzz/src/hb-buffer.cc
new file mode 100644
index 0000000000..4fadbb78d2
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-buffer.cc
@@ -0,0 +1,2004 @@
+/*
+ * Copyright © 1998-2004 David Turner and Werner Lemberg
+ * Copyright © 2004,2007,2009,2010 Red Hat, Inc.
+ * Copyright © 2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Owen Taylor, Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb-buffer.hh"
+#include "hb-utf.hh"
+
+
+/**
+ * SECTION: hb-buffer
+ * @title: hb-buffer
+ * @short_description: Input and output buffers
+ * @include: hb.h
+ *
+ * Buffers serve dual role in HarfBuzz; they hold the input characters that are
+ * passed to hb_shape(), and after shaping they hold the output glyphs.
+ **/
+
+
+/**
+ * hb_segment_properties_equal:
+ * @a: first #hb_segment_properties_t to compare.
+ * @b: second #hb_segment_properties_t to compare.
+ *
+ * Checks the equality of two #hb_segment_properties_t's.
+ *
+ * Return value:
+ * %true if all properties of @a equal those of @b, false otherwise.
+ *
+ * Since: 0.9.7
+ **/
+hb_bool_t
+hb_segment_properties_equal (const hb_segment_properties_t *a,
+ const hb_segment_properties_t *b)
+{
+ return a->direction == b->direction &&
+ a->script == b->script &&
+ a->language == b->language &&
+ a->reserved1 == b->reserved1 &&
+ a->reserved2 == b->reserved2;
+
+}
+
+/**
+ * hb_segment_properties_hash:
+ * @p: #hb_segment_properties_t to hash.
+ *
+ * Creates a hash representing @p.
+ *
+ * Return value:
+ * A hash of @p.
+ *
+ * Since: 0.9.7
+ **/
+unsigned int
+hb_segment_properties_hash (const hb_segment_properties_t *p)
+{
+ return (unsigned int) p->direction ^
+ (unsigned int) p->script ^
+ (intptr_t) (p->language);
+}
+
+
+
+/* Here is how the buffer works internally:
+ *
+ * There are two info pointers: info and out_info. They always have
+ * the same allocated size, but different lengths.
+ *
+ * As an optimization, both info and out_info may point to the
+ * same piece of memory, which is owned by info. This remains the
+ * case as long as out_len doesn't exceed i at any time.
+ * In that case, swap_buffers() is no-op and the glyph operations operate
+ * mostly in-place.
+ *
+ * As soon as out_info gets longer than info, out_info is moved over
+ * to an alternate buffer (which we reuse the pos buffer for!), and its
+ * current contents (out_len entries) are copied to the new place.
+ * This should all remain transparent to the user. swap_buffers() then
+ * switches info and out_info.
+ */
+
+
+
+/* Internal API */
+
+bool
+hb_buffer_t::enlarge (unsigned int size)
+{
+ if (unlikely (!successful))
+ return false;
+ if (unlikely (size > max_len))
+ {
+ successful = false;
+ return false;
+ }
+
+ unsigned int new_allocated = allocated;
+ hb_glyph_position_t *new_pos = nullptr;
+ hb_glyph_info_t *new_info = nullptr;
+ bool separate_out = out_info != info;
+
+ if (unlikely (hb_unsigned_mul_overflows (size, sizeof (info[0]))))
+ goto done;
+
+ while (size >= new_allocated)
+ new_allocated += (new_allocated >> 1) + 32;
+
+ static_assert ((sizeof (info[0]) == sizeof (pos[0])), "");
+ if (unlikely (hb_unsigned_mul_overflows (new_allocated, sizeof (info[0]))))
+ goto done;
+
+ new_pos = (hb_glyph_position_t *) realloc (pos, new_allocated * sizeof (pos[0]));
+ new_info = (hb_glyph_info_t *) realloc (info, new_allocated * sizeof (info[0]));
+
+done:
+ if (unlikely (!new_pos || !new_info))
+ successful = false;
+
+ if (likely (new_pos))
+ pos = new_pos;
+
+ if (likely (new_info))
+ info = new_info;
+
+ out_info = separate_out ? (hb_glyph_info_t *) pos : info;
+ if (likely (successful))
+ allocated = new_allocated;
+
+ return likely (successful);
+}
+
+bool
+hb_buffer_t::make_room_for (unsigned int num_in,
+ unsigned int num_out)
+{
+ if (unlikely (!ensure (out_len + num_out))) return false;
+
+ if (out_info == info &&
+ out_len + num_out > idx + num_in)
+ {
+ assert (have_output);
+
+ out_info = (hb_glyph_info_t *) pos;
+ memcpy (out_info, info, out_len * sizeof (out_info[0]));
+ }
+
+ return true;
+}
+
+bool
+hb_buffer_t::shift_forward (unsigned int count)
+{
+ assert (have_output);
+ if (unlikely (!ensure (len + count))) return false;
+
+ memmove (info + idx + count, info + idx, (len - idx) * sizeof (info[0]));
+ if (idx + count > len)
+ {
+ /* Under memory failure we might expose this area. At least
+ * clean it up. Oh well...
+ *
+ * Ideally, we should at least set Default_Ignorable bits on
+ * these, as well as consistent cluster values. But the former
+ * is layering violation... */
+ memset (info + len, 0, (idx + count - len) * sizeof (info[0]));
+ }
+ len += count;
+ idx += count;
+
+ return true;
+}
+
+hb_buffer_t::scratch_buffer_t *
+hb_buffer_t::get_scratch_buffer (unsigned int *size)
+{
+ have_output = false;
+ have_positions = false;
+
+ out_len = 0;
+ out_info = info;
+
+ assert ((uintptr_t) pos % sizeof (scratch_buffer_t) == 0);
+ *size = allocated * sizeof (pos[0]) / sizeof (scratch_buffer_t);
+ return (scratch_buffer_t *) (void *) pos;
+}
+
+
+
+/* HarfBuzz-Internal API */
+
+void
+hb_buffer_t::reset ()
+{
+ if (unlikely (hb_object_is_immutable (this)))
+ return;
+
+ hb_unicode_funcs_destroy (unicode);
+ unicode = hb_unicode_funcs_reference (hb_unicode_funcs_get_default ());
+ flags = HB_BUFFER_FLAG_DEFAULT;
+ replacement = HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT;
+ invisible = 0;
+
+ clear ();
+}
+
+void
+hb_buffer_t::clear ()
+{
+ if (unlikely (hb_object_is_immutable (this)))
+ return;
+
+ hb_segment_properties_t default_props = HB_SEGMENT_PROPERTIES_DEFAULT;
+ props = default_props;
+ scratch_flags = HB_BUFFER_SCRATCH_FLAG_DEFAULT;
+
+ content_type = HB_BUFFER_CONTENT_TYPE_INVALID;
+ successful = true;
+ have_output = false;
+ have_positions = false;
+
+ idx = 0;
+ len = 0;
+ out_len = 0;
+ out_info = info;
+
+ serial = 0;
+
+ memset (context, 0, sizeof context);
+ memset (context_len, 0, sizeof context_len);
+
+ deallocate_var_all ();
+}
+
+void
+hb_buffer_t::add (hb_codepoint_t codepoint,
+ unsigned int cluster)
+{
+ hb_glyph_info_t *glyph;
+
+ if (unlikely (!ensure (len + 1))) return;
+
+ glyph = &info[len];
+
+ memset (glyph, 0, sizeof (*glyph));
+ glyph->codepoint = codepoint;
+ glyph->mask = 0;
+ glyph->cluster = cluster;
+
+ len++;
+}
+
+void
+hb_buffer_t::add_info (const hb_glyph_info_t &glyph_info)
+{
+ if (unlikely (!ensure (len + 1))) return;
+
+ info[len] = glyph_info;
+
+ len++;
+}
+
+
+void
+hb_buffer_t::remove_output ()
+{
+ if (unlikely (hb_object_is_immutable (this)))
+ return;
+
+ have_output = false;
+ have_positions = false;
+
+ out_len = 0;
+ out_info = info;
+}
+
+void
+hb_buffer_t::clear_output ()
+{
+ if (unlikely (hb_object_is_immutable (this)))
+ return;
+
+ have_output = true;
+ have_positions = false;
+
+ out_len = 0;
+ out_info = info;
+}
+
+void
+hb_buffer_t::clear_positions ()
+{
+ if (unlikely (hb_object_is_immutable (this)))
+ return;
+
+ have_output = false;
+ have_positions = true;
+
+ out_len = 0;
+ out_info = info;
+
+ hb_memset (pos, 0, sizeof (pos[0]) * len);
+}
+
+void
+hb_buffer_t::swap_buffers ()
+{
+ if (unlikely (!successful)) return;
+
+ assert (have_output);
+ have_output = false;
+
+ if (out_info != info)
+ {
+ hb_glyph_info_t *tmp_string;
+ tmp_string = info;
+ info = out_info;
+ out_info = tmp_string;
+ pos = (hb_glyph_position_t *) out_info;
+ }
+
+ unsigned int tmp;
+ tmp = len;
+ len = out_len;
+ out_len = tmp;
+
+ idx = 0;
+}
+
+
+void
+hb_buffer_t::replace_glyphs (unsigned int num_in,
+ unsigned int num_out,
+ const uint32_t *glyph_data)
+{
+ if (unlikely (!make_room_for (num_in, num_out))) return;
+
+ assert (idx + num_in <= len);
+
+ merge_clusters (idx, idx + num_in);
+
+ hb_glyph_info_t orig_info = info[idx];
+ hb_glyph_info_t *pinfo = &out_info[out_len];
+ for (unsigned int i = 0; i < num_out; i++)
+ {
+ *pinfo = orig_info;
+ pinfo->codepoint = glyph_data[i];
+ pinfo++;
+ }
+
+ idx += num_in;
+ out_len += num_out;
+}
+
+bool
+hb_buffer_t::move_to (unsigned int i)
+{
+ if (!have_output)
+ {
+ assert (i <= len);
+ idx = i;
+ return true;
+ }
+ if (unlikely (!successful))
+ return false;
+
+ assert (i <= out_len + (len - idx));
+
+ if (out_len < i)
+ {
+ unsigned int count = i - out_len;
+ if (unlikely (!make_room_for (count, count))) return false;
+
+ memmove (out_info + out_len, info + idx, count * sizeof (out_info[0]));
+ idx += count;
+ out_len += count;
+ }
+ else if (out_len > i)
+ {
+ /* Tricky part: rewinding... */
+ unsigned int count = out_len - i;
+
+ /* This will blow in our face if memory allocation fails later
+ * in this same lookup...
+ *
+ * We used to shift with extra 32 items, instead of the 0 below.
+ * But that would leave empty slots in the buffer in case of allocation
+ * failures. Setting to zero for now to avoid other problems (see
+ * comments in shift_forward(). This can cause O(N^2) behavior more
+ * severely than adding 32 empty slots can... */
+ if (unlikely (idx < count && !shift_forward (count + 0))) return false;
+
+ assert (idx >= count);
+
+ idx -= count;
+ out_len -= count;
+ memmove (info + idx, out_info + out_len, count * sizeof (out_info[0]));
+ }
+
+ return true;
+}
+
+
+void
+hb_buffer_t::set_masks (hb_mask_t value,
+ hb_mask_t mask,
+ unsigned int cluster_start,
+ unsigned int cluster_end)
+{
+ hb_mask_t not_mask = ~mask;
+ value &= mask;
+
+ if (!mask)
+ return;
+
+ unsigned int count = len;
+ for (unsigned int i = 0; i < count; i++)
+ if (cluster_start <= info[i].cluster && info[i].cluster < cluster_end)
+ info[i].mask = (info[i].mask & not_mask) | value;
+}
+
+void
+hb_buffer_t::reverse_range (unsigned int start,
+ unsigned int end)
+{
+ if (end - start < 2)
+ return;
+
+ hb_array_t<hb_glyph_info_t> (info, len).reverse (start, end);
+
+ if (have_positions) {
+ hb_array_t<hb_glyph_position_t> (pos, len).reverse (start, end);
+ }
+}
+
+void
+hb_buffer_t::reverse ()
+{
+ if (unlikely (!len))
+ return;
+
+ reverse_range (0, len);
+}
+
+void
+hb_buffer_t::reverse_clusters ()
+{
+ unsigned int i, start, count, last_cluster;
+
+ if (unlikely (!len))
+ return;
+
+ reverse ();
+
+ count = len;
+ start = 0;
+ last_cluster = info[0].cluster;
+ for (i = 1; i < count; i++) {
+ if (last_cluster != info[i].cluster) {
+ reverse_range (start, i);
+ start = i;
+ last_cluster = info[i].cluster;
+ }
+ }
+ reverse_range (start, i);
+}
+
+void
+hb_buffer_t::merge_clusters_impl (unsigned int start,
+ unsigned int end)
+{
+ if (cluster_level == HB_BUFFER_CLUSTER_LEVEL_CHARACTERS)
+ {
+ unsafe_to_break (start, end);
+ return;
+ }
+
+ unsigned int cluster = info[start].cluster;
+
+ for (unsigned int i = start + 1; i < end; i++)
+ cluster = hb_min (cluster, info[i].cluster);
+
+ /* Extend end */
+ while (end < len && info[end - 1].cluster == info[end].cluster)
+ end++;
+
+ /* Extend start */
+ while (idx < start && info[start - 1].cluster == info[start].cluster)
+ start--;
+
+ /* If we hit the start of buffer, continue in out-buffer. */
+ if (idx == start)
+ for (unsigned int i = out_len; i && out_info[i - 1].cluster == info[start].cluster; i--)
+ set_cluster (out_info[i - 1], cluster);
+
+ for (unsigned int i = start; i < end; i++)
+ set_cluster (info[i], cluster);
+}
+void
+hb_buffer_t::merge_out_clusters (unsigned int start,
+ unsigned int end)
+{
+ if (cluster_level == HB_BUFFER_CLUSTER_LEVEL_CHARACTERS)
+ return;
+
+ if (unlikely (end - start < 2))
+ return;
+
+ unsigned int cluster = out_info[start].cluster;
+
+ for (unsigned int i = start + 1; i < end; i++)
+ cluster = hb_min (cluster, out_info[i].cluster);
+
+ /* Extend start */
+ while (start && out_info[start - 1].cluster == out_info[start].cluster)
+ start--;
+
+ /* Extend end */
+ while (end < out_len && out_info[end - 1].cluster == out_info[end].cluster)
+ end++;
+
+ /* If we hit the end of out-buffer, continue in buffer. */
+ if (end == out_len)
+ for (unsigned int i = idx; i < len && info[i].cluster == out_info[end - 1].cluster; i++)
+ set_cluster (info[i], cluster);
+
+ for (unsigned int i = start; i < end; i++)
+ set_cluster (out_info[i], cluster);
+}
+void
+hb_buffer_t::delete_glyph ()
+{
+ /* The logic here is duplicated in hb_ot_hide_default_ignorables(). */
+
+ unsigned int cluster = info[idx].cluster;
+ if (idx + 1 < len && cluster == info[idx + 1].cluster)
+ {
+ /* Cluster survives; do nothing. */
+ goto done;
+ }
+
+ if (out_len)
+ {
+ /* Merge cluster backward. */
+ if (cluster < out_info[out_len - 1].cluster)
+ {
+ unsigned int mask = info[idx].mask;
+ unsigned int old_cluster = out_info[out_len - 1].cluster;
+ for (unsigned i = out_len; i && out_info[i - 1].cluster == old_cluster; i--)
+ set_cluster (out_info[i - 1], cluster, mask);
+ }
+ goto done;
+ }
+
+ if (idx + 1 < len)
+ {
+ /* Merge cluster forward. */
+ merge_clusters (idx, idx + 2);
+ goto done;
+ }
+
+done:
+ skip_glyph ();
+}
+
+void
+hb_buffer_t::unsafe_to_break_impl (unsigned int start, unsigned int end)
+{
+ unsigned int cluster = UINT_MAX;
+ cluster = _unsafe_to_break_find_min_cluster (info, start, end, cluster);
+ _unsafe_to_break_set_mask (info, start, end, cluster);
+}
+void
+hb_buffer_t::unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end)
+{
+ if (!have_output)
+ {
+ unsafe_to_break_impl (start, end);
+ return;
+ }
+
+ assert (start <= out_len);
+ assert (idx <= end);
+
+ unsigned int cluster = UINT_MAX;
+ cluster = _unsafe_to_break_find_min_cluster (out_info, start, out_len, cluster);
+ cluster = _unsafe_to_break_find_min_cluster (info, idx, end, cluster);
+ _unsafe_to_break_set_mask (out_info, start, out_len, cluster);
+ _unsafe_to_break_set_mask (info, idx, end, cluster);
+}
+
+void
+hb_buffer_t::guess_segment_properties ()
+{
+ assert ((content_type == HB_BUFFER_CONTENT_TYPE_UNICODE) ||
+ (!len && (content_type == HB_BUFFER_CONTENT_TYPE_INVALID)));
+
+ /* If script is set to INVALID, guess from buffer contents */
+ if (props.script == HB_SCRIPT_INVALID) {
+ for (unsigned int i = 0; i < len; i++) {
+ hb_script_t script = unicode->script (info[i].codepoint);
+ if (likely (script != HB_SCRIPT_COMMON &&
+ script != HB_SCRIPT_INHERITED &&
+ script != HB_SCRIPT_UNKNOWN)) {
+ props.script = script;
+ break;
+ }
+ }
+ }
+
+ /* If direction is set to INVALID, guess from script */
+ if (props.direction == HB_DIRECTION_INVALID) {
+ props.direction = hb_script_get_horizontal_direction (props.script);
+ if (props.direction == HB_DIRECTION_INVALID)
+ props.direction = HB_DIRECTION_LTR;
+ }
+
+ /* If language is not set, use default language from locale */
+ if (props.language == HB_LANGUAGE_INVALID) {
+ /* TODO get_default_for_script? using $LANGUAGE */
+ props.language = hb_language_get_default ();
+ }
+}
+
+
+/* Public API */
+
+DEFINE_NULL_INSTANCE (hb_buffer_t) =
+{
+ HB_OBJECT_HEADER_STATIC,
+
+ const_cast<hb_unicode_funcs_t *> (&_hb_Null_hb_unicode_funcs_t),
+ HB_BUFFER_FLAG_DEFAULT,
+ HB_BUFFER_CLUSTER_LEVEL_DEFAULT,
+ HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT,
+ 0, /* invisible */
+ HB_BUFFER_SCRATCH_FLAG_DEFAULT,
+ HB_BUFFER_MAX_LEN_DEFAULT,
+ HB_BUFFER_MAX_OPS_DEFAULT,
+
+ HB_BUFFER_CONTENT_TYPE_INVALID,
+ HB_SEGMENT_PROPERTIES_DEFAULT,
+ false, /* successful */
+ true, /* have_output */
+ true /* have_positions */
+
+ /* Zero is good enough for everything else. */
+};
+
+
+/**
+ * hb_buffer_create: (Xconstructor)
+ *
+ * Creates a new #hb_buffer_t with all properties to defaults.
+ *
+ * Return value: (transfer full):
+ * A newly allocated #hb_buffer_t with a reference count of 1. The initial
+ * reference count should be released with hb_buffer_destroy() when you are done
+ * using the #hb_buffer_t. This function never returns %NULL. If memory cannot
+ * be allocated, a special #hb_buffer_t object will be returned on which
+ * hb_buffer_allocation_successful() returns %false.
+ *
+ * Since: 0.9.2
+ **/
+hb_buffer_t *
+hb_buffer_create ()
+{
+ hb_buffer_t *buffer;
+
+ if (!(buffer = hb_object_create<hb_buffer_t> ()))
+ return hb_buffer_get_empty ();
+
+ buffer->max_len = HB_BUFFER_MAX_LEN_DEFAULT;
+ buffer->max_ops = HB_BUFFER_MAX_OPS_DEFAULT;
+
+ buffer->reset ();
+
+ return buffer;
+}
+
+/**
+ * hb_buffer_get_empty:
+ *
+ *
+ *
+ * Return value: (transfer full):
+ *
+ * Since: 0.9.2
+ **/
+hb_buffer_t *
+hb_buffer_get_empty ()
+{
+ return const_cast<hb_buffer_t *> (&Null (hb_buffer_t));
+}
+
+/**
+ * hb_buffer_reference: (skip)
+ * @buffer: an #hb_buffer_t.
+ *
+ * Increases the reference count on @buffer by one. This prevents @buffer from
+ * being destroyed until a matching call to hb_buffer_destroy() is made.
+ *
+ * Return value: (transfer full):
+ * The referenced #hb_buffer_t.
+ *
+ * Since: 0.9.2
+ **/
+hb_buffer_t *
+hb_buffer_reference (hb_buffer_t *buffer)
+{
+ return hb_object_reference (buffer);
+}
+
+/**
+ * hb_buffer_destroy: (skip)
+ * @buffer: an #hb_buffer_t.
+ *
+ * Deallocate the @buffer.
+ * Decreases the reference count on @buffer by one. If the result is zero, then
+ * @buffer and all associated resources are freed. See hb_buffer_reference().
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_buffer_destroy (hb_buffer_t *buffer)
+{
+ if (!hb_object_destroy (buffer)) return;
+
+ hb_unicode_funcs_destroy (buffer->unicode);
+
+ free (buffer->info);
+ free (buffer->pos);
+#ifndef HB_NO_BUFFER_MESSAGE
+ if (buffer->message_destroy)
+ buffer->message_destroy (buffer->message_data);
+#endif
+
+ free (buffer);
+}
+
+/**
+ * hb_buffer_set_user_data: (skip)
+ * @buffer: an #hb_buffer_t.
+ * @key:
+ * @data:
+ * @destroy:
+ * @replace:
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_buffer_set_user_data (hb_buffer_t *buffer,
+ hb_user_data_key_t *key,
+ void * data,
+ hb_destroy_func_t destroy,
+ hb_bool_t replace)
+{
+ return hb_object_set_user_data (buffer, key, data, destroy, replace);
+}
+
+/**
+ * hb_buffer_get_user_data: (skip)
+ * @buffer: an #hb_buffer_t.
+ * @key:
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+void *
+hb_buffer_get_user_data (hb_buffer_t *buffer,
+ hb_user_data_key_t *key)
+{
+ return hb_object_get_user_data (buffer, key);
+}
+
+
+/**
+ * hb_buffer_set_content_type:
+ * @buffer: an #hb_buffer_t.
+ * @content_type: the type of buffer contents to set
+ *
+ * Sets the type of @buffer contents, buffers are either empty, contain
+ * characters (before shaping) or glyphs (the result of shaping).
+ *
+ * Since: 0.9.5
+ **/
+void
+hb_buffer_set_content_type (hb_buffer_t *buffer,
+ hb_buffer_content_type_t content_type)
+{
+ buffer->content_type = content_type;
+}
+
+/**
+ * hb_buffer_get_content_type:
+ * @buffer: an #hb_buffer_t.
+ *
+ * see hb_buffer_set_content_type().
+ *
+ * Return value:
+ * The type of @buffer contents.
+ *
+ * Since: 0.9.5
+ **/
+hb_buffer_content_type_t
+hb_buffer_get_content_type (hb_buffer_t *buffer)
+{
+ return buffer->content_type;
+}
+
+
+/**
+ * hb_buffer_set_unicode_funcs:
+ * @buffer: an #hb_buffer_t.
+ * @unicode_funcs:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_buffer_set_unicode_funcs (hb_buffer_t *buffer,
+ hb_unicode_funcs_t *unicode_funcs)
+{
+ if (unlikely (hb_object_is_immutable (buffer)))
+ return;
+
+ if (!unicode_funcs)
+ unicode_funcs = hb_unicode_funcs_get_default ();
+
+ hb_unicode_funcs_reference (unicode_funcs);
+ hb_unicode_funcs_destroy (buffer->unicode);
+ buffer->unicode = unicode_funcs;
+}
+
+/**
+ * hb_buffer_get_unicode_funcs:
+ * @buffer: an #hb_buffer_t.
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_unicode_funcs_t *
+hb_buffer_get_unicode_funcs (hb_buffer_t *buffer)
+{
+ return buffer->unicode;
+}
+
+/**
+ * hb_buffer_set_direction:
+ * @buffer: an #hb_buffer_t.
+ * @direction: the #hb_direction_t of the @buffer
+ *
+ * Set the text flow direction of the buffer. No shaping can happen without
+ * setting @buffer direction, and it controls the visual direction for the
+ * output glyphs; for RTL direction the glyphs will be reversed. Many layout
+ * features depend on the proper setting of the direction, for example,
+ * reversing RTL text before shaping, then shaping with LTR direction is not
+ * the same as keeping the text in logical order and shaping with RTL
+ * direction.
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_buffer_set_direction (hb_buffer_t *buffer,
+ hb_direction_t direction)
+
+{
+ if (unlikely (hb_object_is_immutable (buffer)))
+ return;
+
+ buffer->props.direction = direction;
+}
+
+/**
+ * hb_buffer_get_direction:
+ * @buffer: an #hb_buffer_t.
+ *
+ * See hb_buffer_set_direction()
+ *
+ * Return value:
+ * The direction of the @buffer.
+ *
+ * Since: 0.9.2
+ **/
+hb_direction_t
+hb_buffer_get_direction (hb_buffer_t *buffer)
+{
+ return buffer->props.direction;
+}
+
+/**
+ * hb_buffer_set_script:
+ * @buffer: an #hb_buffer_t.
+ * @script: an #hb_script_t to set.
+ *
+ * Sets the script of @buffer to @script.
+ *
+ * Script is crucial for choosing the proper shaping behaviour for scripts that
+ * require it (e.g. Arabic) and the which OpenType features defined in the font
+ * to be applied.
+ *
+ * You can pass one of the predefined #hb_script_t values, or use
+ * hb_script_from_string() or hb_script_from_iso15924_tag() to get the
+ * corresponding script from an ISO 15924 script tag.
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_buffer_set_script (hb_buffer_t *buffer,
+ hb_script_t script)
+{
+ if (unlikely (hb_object_is_immutable (buffer)))
+ return;
+
+ buffer->props.script = script;
+}
+
+/**
+ * hb_buffer_get_script:
+ * @buffer: an #hb_buffer_t.
+ *
+ * See hb_buffer_set_script().
+ *
+ * Return value:
+ * The #hb_script_t of the @buffer.
+ *
+ * Since: 0.9.2
+ **/
+hb_script_t
+hb_buffer_get_script (hb_buffer_t *buffer)
+{
+ return buffer->props.script;
+}
+
+/**
+ * hb_buffer_set_language:
+ * @buffer: an #hb_buffer_t.
+ * @language: an hb_language_t to set.
+ *
+ * Sets the language of @buffer to @language.
+ *
+ * Languages are crucial for selecting which OpenType feature to apply to the
+ * buffer which can result in applying language-specific behaviour. Languages
+ * are orthogonal to the scripts, and though they are related, they are
+ * different concepts and should not be confused with each other.
+ *
+ * Use hb_language_from_string() to convert from BCP 47 language tags to
+ * #hb_language_t.
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_buffer_set_language (hb_buffer_t *buffer,
+ hb_language_t language)
+{
+ if (unlikely (hb_object_is_immutable (buffer)))
+ return;
+
+ buffer->props.language = language;
+}
+
+/**
+ * hb_buffer_get_language:
+ * @buffer: an #hb_buffer_t.
+ *
+ * See hb_buffer_set_language().
+ *
+ * Return value: (transfer none):
+ * The #hb_language_t of the buffer. Must not be freed by the caller.
+ *
+ * Since: 0.9.2
+ **/
+hb_language_t
+hb_buffer_get_language (hb_buffer_t *buffer)
+{
+ return buffer->props.language;
+}
+
+/**
+ * hb_buffer_set_segment_properties:
+ * @buffer: an #hb_buffer_t.
+ * @props: an #hb_segment_properties_t to use.
+ *
+ * Sets the segment properties of the buffer, a shortcut for calling
+ * hb_buffer_set_direction(), hb_buffer_set_script() and
+ * hb_buffer_set_language() individually.
+ *
+ * Since: 0.9.7
+ **/
+void
+hb_buffer_set_segment_properties (hb_buffer_t *buffer,
+ const hb_segment_properties_t *props)
+{
+ if (unlikely (hb_object_is_immutable (buffer)))
+ return;
+
+ buffer->props = *props;
+}
+
+/**
+ * hb_buffer_get_segment_properties:
+ * @buffer: an #hb_buffer_t.
+ * @props: (out): the output #hb_segment_properties_t.
+ *
+ * Sets @props to the #hb_segment_properties_t of @buffer.
+ *
+ * Since: 0.9.7
+ **/
+void
+hb_buffer_get_segment_properties (hb_buffer_t *buffer,
+ hb_segment_properties_t *props)
+{
+ *props = buffer->props;
+}
+
+
+/**
+ * hb_buffer_set_flags:
+ * @buffer: an #hb_buffer_t.
+ * @flags: the buffer flags to set.
+ *
+ * Sets @buffer flags to @flags. See #hb_buffer_flags_t.
+ *
+ * Since: 0.9.7
+ **/
+void
+hb_buffer_set_flags (hb_buffer_t *buffer,
+ hb_buffer_flags_t flags)
+{
+ if (unlikely (hb_object_is_immutable (buffer)))
+ return;
+
+ buffer->flags = flags;
+}
+
+/**
+ * hb_buffer_get_flags:
+ * @buffer: an #hb_buffer_t.
+ *
+ * See hb_buffer_set_flags().
+ *
+ * Return value:
+ * The @buffer flags.
+ *
+ * Since: 0.9.7
+ **/
+hb_buffer_flags_t
+hb_buffer_get_flags (hb_buffer_t *buffer)
+{
+ return buffer->flags;
+}
+
+/**
+ * hb_buffer_set_cluster_level:
+ * @buffer: an #hb_buffer_t.
+ * @cluster_level:
+ *
+ *
+ *
+ * Since: 0.9.42
+ **/
+void
+hb_buffer_set_cluster_level (hb_buffer_t *buffer,
+ hb_buffer_cluster_level_t cluster_level)
+{
+ if (unlikely (hb_object_is_immutable (buffer)))
+ return;
+
+ buffer->cluster_level = cluster_level;
+}
+
+/**
+ * hb_buffer_get_cluster_level:
+ * @buffer: an #hb_buffer_t.
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.42
+ **/
+hb_buffer_cluster_level_t
+hb_buffer_get_cluster_level (hb_buffer_t *buffer)
+{
+ return buffer->cluster_level;
+}
+
+
+/**
+ * hb_buffer_set_replacement_codepoint:
+ * @buffer: an #hb_buffer_t.
+ * @replacement: the replacement #hb_codepoint_t
+ *
+ * Sets the #hb_codepoint_t that replaces invalid entries for a given encoding
+ * when adding text to @buffer.
+ *
+ * Default is %HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT.
+ *
+ * Since: 0.9.31
+ **/
+void
+hb_buffer_set_replacement_codepoint (hb_buffer_t *buffer,
+ hb_codepoint_t replacement)
+{
+ if (unlikely (hb_object_is_immutable (buffer)))
+ return;
+
+ buffer->replacement = replacement;
+}
+
+/**
+ * hb_buffer_get_replacement_codepoint:
+ * @buffer: an #hb_buffer_t.
+ *
+ * See hb_buffer_set_replacement_codepoint().
+ *
+ * Return value:
+ * The @buffer replacement #hb_codepoint_t.
+ *
+ * Since: 0.9.31
+ **/
+hb_codepoint_t
+hb_buffer_get_replacement_codepoint (hb_buffer_t *buffer)
+{
+ return buffer->replacement;
+}
+
+
+/**
+ * hb_buffer_set_invisible_glyph:
+ * @buffer: an #hb_buffer_t.
+ * @invisible: the invisible #hb_codepoint_t
+ *
+ * Sets the #hb_codepoint_t that replaces invisible characters in
+ * the shaping result. If set to zero (default), the glyph for the
+ * U+0020 SPACE character is used. Otherwise, this value is used
+ * verbatim.
+ *
+ * Since: 2.0.0
+ **/
+void
+hb_buffer_set_invisible_glyph (hb_buffer_t *buffer,
+ hb_codepoint_t invisible)
+{
+ if (unlikely (hb_object_is_immutable (buffer)))
+ return;
+
+ buffer->invisible = invisible;
+}
+
+/**
+ * hb_buffer_get_invisible_glyph:
+ * @buffer: an #hb_buffer_t.
+ *
+ * See hb_buffer_set_invisible_glyph().
+ *
+ * Return value:
+ * The @buffer invisible #hb_codepoint_t.
+ *
+ * Since: 2.0.0
+ **/
+hb_codepoint_t
+hb_buffer_get_invisible_glyph (hb_buffer_t *buffer)
+{
+ return buffer->invisible;
+}
+
+
+/**
+ * hb_buffer_reset:
+ * @buffer: an #hb_buffer_t.
+ *
+ * Resets the buffer to its initial status, as if it was just newly created
+ * with hb_buffer_create().
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_buffer_reset (hb_buffer_t *buffer)
+{
+ buffer->reset ();
+}
+
+/**
+ * hb_buffer_clear_contents:
+ * @buffer: an #hb_buffer_t.
+ *
+ * Similar to hb_buffer_reset(), but does not clear the Unicode functions and
+ * the replacement code point.
+ *
+ * Since: 0.9.11
+ **/
+void
+hb_buffer_clear_contents (hb_buffer_t *buffer)
+{
+ buffer->clear ();
+}
+
+/**
+ * hb_buffer_pre_allocate:
+ * @buffer: an #hb_buffer_t.
+ * @size: number of items to pre allocate.
+ *
+ * Pre allocates memory for @buffer to fit at least @size number of items.
+ *
+ * Return value:
+ * %true if @buffer memory allocation succeeded, %false otherwise.
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_buffer_pre_allocate (hb_buffer_t *buffer, unsigned int size)
+{
+ return buffer->ensure (size);
+}
+
+/**
+ * hb_buffer_allocation_successful:
+ * @buffer: an #hb_buffer_t.
+ *
+ * Check if allocating memory for the buffer succeeded.
+ *
+ * Return value:
+ * %true if @buffer memory allocation succeeded, %false otherwise.
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_buffer_allocation_successful (hb_buffer_t *buffer)
+{
+ return buffer->successful;
+}
+
+/**
+ * hb_buffer_add:
+ * @buffer: an #hb_buffer_t.
+ * @codepoint: a Unicode code point.
+ * @cluster: the cluster value of @codepoint.
+ *
+ * Appends a character with the Unicode value of @codepoint to @buffer, and
+ * gives it the initial cluster value of @cluster. Clusters can be any thing
+ * the client wants, they are usually used to refer to the index of the
+ * character in the input text stream and are output in
+ * #hb_glyph_info_t.cluster field.
+ *
+ * This function does not check the validity of @codepoint, it is up to the
+ * caller to ensure it is a valid Unicode code point.
+ *
+ * Since: 0.9.7
+ **/
+void
+hb_buffer_add (hb_buffer_t *buffer,
+ hb_codepoint_t codepoint,
+ unsigned int cluster)
+{
+ buffer->add (codepoint, cluster);
+ buffer->clear_context (1);
+}
+
+/**
+ * hb_buffer_set_length:
+ * @buffer: an #hb_buffer_t.
+ * @length: the new length of @buffer.
+ *
+ * Similar to hb_buffer_pre_allocate(), but clears any new items added at the
+ * end.
+ *
+ * Return value:
+ * %true if @buffer memory allocation succeeded, %false otherwise.
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_buffer_set_length (hb_buffer_t *buffer,
+ unsigned int length)
+{
+ if (unlikely (hb_object_is_immutable (buffer)))
+ return length == 0;
+
+ if (!buffer->ensure (length))
+ return false;
+
+ /* Wipe the new space */
+ if (length > buffer->len) {
+ memset (buffer->info + buffer->len, 0, sizeof (buffer->info[0]) * (length - buffer->len));
+ if (buffer->have_positions)
+ memset (buffer->pos + buffer->len, 0, sizeof (buffer->pos[0]) * (length - buffer->len));
+ }
+
+ buffer->len = length;
+
+ if (!length)
+ {
+ buffer->content_type = HB_BUFFER_CONTENT_TYPE_INVALID;
+ buffer->clear_context (0);
+ }
+ buffer->clear_context (1);
+
+ return true;
+}
+
+/**
+ * hb_buffer_get_length:
+ * @buffer: an #hb_buffer_t.
+ *
+ * Returns the number of items in the buffer.
+ *
+ * Return value:
+ * The @buffer length.
+ * The value valid as long as buffer has not been modified.
+ *
+ * Since: 0.9.2
+ **/
+unsigned int
+hb_buffer_get_length (hb_buffer_t *buffer)
+{
+ return buffer->len;
+}
+
+/**
+ * hb_buffer_get_glyph_infos:
+ * @buffer: an #hb_buffer_t.
+ * @length: (out): output array length.
+ *
+ * Returns @buffer glyph information array. Returned pointer
+ * is valid as long as @buffer contents are not modified.
+ *
+ * Return value: (transfer none) (array length=length):
+ * The @buffer glyph information array.
+ * The value valid as long as buffer has not been modified.
+ *
+ * Since: 0.9.2
+ **/
+hb_glyph_info_t *
+hb_buffer_get_glyph_infos (hb_buffer_t *buffer,
+ unsigned int *length)
+{
+ if (length)
+ *length = buffer->len;
+
+ return (hb_glyph_info_t *) buffer->info;
+}
+
+/**
+ * hb_buffer_get_glyph_positions:
+ * @buffer: an #hb_buffer_t.
+ * @length: (out): output length.
+ *
+ * Returns @buffer glyph position array. Returned pointer
+ * is valid as long as @buffer contents are not modified.
+ *
+ * Return value: (transfer none) (array length=length):
+ * The @buffer glyph position array.
+ * The value valid as long as buffer has not been modified.
+ *
+ * Since: 0.9.2
+ **/
+hb_glyph_position_t *
+hb_buffer_get_glyph_positions (hb_buffer_t *buffer,
+ unsigned int *length)
+{
+ if (!buffer->have_positions)
+ buffer->clear_positions ();
+
+ if (length)
+ *length = buffer->len;
+
+ return (hb_glyph_position_t *) buffer->pos;
+}
+
+/**
+ * hb_glyph_info_get_glyph_flags:
+ * @info: a #hb_glyph_info_t.
+ *
+ * Returns glyph flags encoded within a #hb_glyph_info_t.
+ *
+ * Return value:
+ * The #hb_glyph_flags_t encoded within @info.
+ *
+ * Since: 1.5.0
+ **/
+hb_glyph_flags_t
+(hb_glyph_info_get_glyph_flags) (const hb_glyph_info_t *info)
+{
+ return hb_glyph_info_get_glyph_flags (info);
+}
+
+/**
+ * hb_buffer_reverse:
+ * @buffer: an #hb_buffer_t.
+ *
+ * Reverses buffer contents.
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_buffer_reverse (hb_buffer_t *buffer)
+{
+ buffer->reverse ();
+}
+
+/**
+ * hb_buffer_reverse_range:
+ * @buffer: an #hb_buffer_t.
+ * @start: start index.
+ * @end: end index.
+ *
+ * Reverses buffer contents between start to end.
+ *
+ * Since: 0.9.41
+ **/
+void
+hb_buffer_reverse_range (hb_buffer_t *buffer,
+ unsigned int start, unsigned int end)
+{
+ buffer->reverse_range (start, end);
+}
+
+/**
+ * hb_buffer_reverse_clusters:
+ * @buffer: an #hb_buffer_t.
+ *
+ * Reverses buffer clusters. That is, the buffer contents are
+ * reversed, then each cluster (consecutive items having the
+ * same cluster number) are reversed again.
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_buffer_reverse_clusters (hb_buffer_t *buffer)
+{
+ buffer->reverse_clusters ();
+}
+
+/**
+ * hb_buffer_guess_segment_properties:
+ * @buffer: an #hb_buffer_t.
+ *
+ * Sets unset buffer segment properties based on buffer Unicode
+ * contents. If buffer is not empty, it must have content type
+ * %HB_BUFFER_CONTENT_TYPE_UNICODE.
+ *
+ * If buffer script is not set (ie. is %HB_SCRIPT_INVALID), it
+ * will be set to the Unicode script of the first character in
+ * the buffer that has a script other than %HB_SCRIPT_COMMON,
+ * %HB_SCRIPT_INHERITED, and %HB_SCRIPT_UNKNOWN.
+ *
+ * Next, if buffer direction is not set (ie. is %HB_DIRECTION_INVALID),
+ * it will be set to the natural horizontal direction of the
+ * buffer script as returned by hb_script_get_horizontal_direction().
+ * If hb_script_get_horizontal_direction() returns %HB_DIRECTION_INVALID,
+ * then %HB_DIRECTION_LTR is used.
+ *
+ * Finally, if buffer language is not set (ie. is %HB_LANGUAGE_INVALID),
+ * it will be set to the process's default language as returned by
+ * hb_language_get_default(). This may change in the future by
+ * taking buffer script into consideration when choosing a language.
+ * Note that hb_language_get_default() is NOT threadsafe the first time
+ * it is called. See documentation for that function for details.
+ *
+ * Since: 0.9.7
+ **/
+void
+hb_buffer_guess_segment_properties (hb_buffer_t *buffer)
+{
+ buffer->guess_segment_properties ();
+}
+
+template <typename utf_t>
+static inline void
+hb_buffer_add_utf (hb_buffer_t *buffer,
+ const typename utf_t::codepoint_t *text,
+ int text_length,
+ unsigned int item_offset,
+ int item_length)
+{
+ typedef typename utf_t::codepoint_t T;
+ const hb_codepoint_t replacement = buffer->replacement;
+
+ assert ((buffer->content_type == HB_BUFFER_CONTENT_TYPE_UNICODE) ||
+ (!buffer->len && (buffer->content_type == HB_BUFFER_CONTENT_TYPE_INVALID)));
+
+ if (unlikely (hb_object_is_immutable (buffer)))
+ return;
+
+ if (text_length == -1)
+ text_length = utf_t::strlen (text);
+
+ if (item_length == -1)
+ item_length = text_length - item_offset;
+
+ buffer->ensure (buffer->len + item_length * sizeof (T) / 4);
+
+ /* If buffer is empty and pre-context provided, install it.
+ * This check is written this way, to make sure people can
+ * provide pre-context in one add_utf() call, then provide
+ * text in a follow-up call. See:
+ *
+ * https://bugzilla.mozilla.org/show_bug.cgi?id=801410#c13
+ */
+ if (!buffer->len && item_offset > 0)
+ {
+ /* Add pre-context */
+ buffer->clear_context (0);
+ const T *prev = text + item_offset;
+ const T *start = text;
+ while (start < prev && buffer->context_len[0] < buffer->CONTEXT_LENGTH)
+ {
+ hb_codepoint_t u;
+ prev = utf_t::prev (prev, start, &u, replacement);
+ buffer->context[0][buffer->context_len[0]++] = u;
+ }
+ }
+
+ const T *next = text + item_offset;
+ const T *end = next + item_length;
+ while (next < end)
+ {
+ hb_codepoint_t u;
+ const T *old_next = next;
+ next = utf_t::next (next, end, &u, replacement);
+ buffer->add (u, old_next - (const T *) text);
+ }
+
+ /* Add post-context */
+ buffer->clear_context (1);
+ end = text + text_length;
+ while (next < end && buffer->context_len[1] < buffer->CONTEXT_LENGTH)
+ {
+ hb_codepoint_t u;
+ next = utf_t::next (next, end, &u, replacement);
+ buffer->context[1][buffer->context_len[1]++] = u;
+ }
+
+ buffer->content_type = HB_BUFFER_CONTENT_TYPE_UNICODE;
+}
+
+/**
+ * hb_buffer_add_utf8:
+ * @buffer: an #hb_buffer_t.
+ * @text: (array length=text_length) (element-type uint8_t): an array of UTF-8
+ * characters to append.
+ * @text_length: the length of the @text, or -1 if it is %NULL terminated.
+ * @item_offset: the offset of the first character to add to the @buffer.
+ * @item_length: the number of characters to add to the @buffer, or -1 for the
+ * end of @text (assuming it is %NULL terminated).
+ *
+ * See hb_buffer_add_codepoints().
+ *
+ * Replaces invalid UTF-8 characters with the @buffer replacement code point,
+ * see hb_buffer_set_replacement_codepoint().
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_buffer_add_utf8 (hb_buffer_t *buffer,
+ const char *text,
+ int text_length,
+ unsigned int item_offset,
+ int item_length)
+{
+ hb_buffer_add_utf<hb_utf8_t> (buffer, (const uint8_t *) text, text_length, item_offset, item_length);
+}
+
+/**
+ * hb_buffer_add_utf16:
+ * @buffer: an #hb_buffer_t.
+ * @text: (array length=text_length): an array of UTF-16 characters to append.
+ * @text_length: the length of the @text, or -1 if it is %NULL terminated.
+ * @item_offset: the offset of the first character to add to the @buffer.
+ * @item_length: the number of characters to add to the @buffer, or -1 for the
+ * end of @text (assuming it is %NULL terminated).
+ *
+ * See hb_buffer_add_codepoints().
+ *
+ * Replaces invalid UTF-16 characters with the @buffer replacement code point,
+ * see hb_buffer_set_replacement_codepoint().
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_buffer_add_utf16 (hb_buffer_t *buffer,
+ const uint16_t *text,
+ int text_length,
+ unsigned int item_offset,
+ int item_length)
+{
+ hb_buffer_add_utf<hb_utf16_t> (buffer, text, text_length, item_offset, item_length);
+}
+
+/**
+ * hb_buffer_add_utf32:
+ * @buffer: an #hb_buffer_t.
+ * @text: (array length=text_length): an array of UTF-32 characters to append.
+ * @text_length: the length of the @text, or -1 if it is %NULL terminated.
+ * @item_offset: the offset of the first character to add to the @buffer.
+ * @item_length: the number of characters to add to the @buffer, or -1 for the
+ * end of @text (assuming it is %NULL terminated).
+ *
+ * See hb_buffer_add_codepoints().
+ *
+ * Replaces invalid UTF-32 characters with the @buffer replacement code point,
+ * see hb_buffer_set_replacement_codepoint().
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_buffer_add_utf32 (hb_buffer_t *buffer,
+ const uint32_t *text,
+ int text_length,
+ unsigned int item_offset,
+ int item_length)
+{
+ hb_buffer_add_utf<hb_utf32_t> (buffer, text, text_length, item_offset, item_length);
+}
+
+/**
+ * hb_buffer_add_latin1:
+ * @buffer: an #hb_buffer_t.
+ * @text: (array length=text_length) (element-type uint8_t): an array of UTF-8
+ * characters to append.
+ * @text_length: the length of the @text, or -1 if it is %NULL terminated.
+ * @item_offset: the offset of the first character to add to the @buffer.
+ * @item_length: the number of characters to add to the @buffer, or -1 for the
+ * end of @text (assuming it is %NULL terminated).
+ *
+ * Similar to hb_buffer_add_codepoints(), but allows only access to first 256
+ * Unicode code points that can fit in 8-bit strings.
+ *
+ * <note>Has nothing to do with non-Unicode Latin-1 encoding.</note>
+ *
+ * Since: 0.9.39
+ **/
+void
+hb_buffer_add_latin1 (hb_buffer_t *buffer,
+ const uint8_t *text,
+ int text_length,
+ unsigned int item_offset,
+ int item_length)
+{
+ hb_buffer_add_utf<hb_latin1_t> (buffer, text, text_length, item_offset, item_length);
+}
+
+/**
+ * hb_buffer_add_codepoints:
+ * @buffer: a #hb_buffer_t to append characters to.
+ * @text: (array length=text_length): an array of Unicode code points to append.
+ * @text_length: the length of the @text, or -1 if it is %NULL terminated.
+ * @item_offset: the offset of the first code point to add to the @buffer.
+ * @item_length: the number of code points to add to the @buffer, or -1 for the
+ * end of @text (assuming it is %NULL terminated).
+ *
+ * Appends characters from @text array to @buffer. The @item_offset is the
+ * position of the first character from @text that will be appended, and
+ * @item_length is the number of character. When shaping part of a larger text
+ * (e.g. a run of text from a paragraph), instead of passing just the substring
+ * corresponding to the run, it is preferable to pass the whole
+ * paragraph and specify the run start and length as @item_offset and
+ * @item_length, respectively, to give HarfBuzz the full context to be able,
+ * for example, to do cross-run Arabic shaping or properly handle combining
+ * marks at stat of run.
+ *
+ * This function does not check the validity of @text, it is up to the caller
+ * to ensure it contains a valid Unicode code points.
+ *
+ * Since: 0.9.31
+ **/
+void
+hb_buffer_add_codepoints (hb_buffer_t *buffer,
+ const hb_codepoint_t *text,
+ int text_length,
+ unsigned int item_offset,
+ int item_length)
+{
+ hb_buffer_add_utf<hb_utf32_novalidate_t> (buffer, text, text_length, item_offset, item_length);
+}
+
+
+/**
+ * hb_buffer_append:
+ * @buffer: an #hb_buffer_t.
+ * @source: source #hb_buffer_t.
+ * @start: start index into source buffer to copy. Use 0 to copy from start of buffer.
+ * @end: end index into source buffer to copy. Use @HB_FEATURE_GLOBAL_END to copy to end of buffer.
+ *
+ * Append (part of) contents of another buffer to this buffer.
+ *
+ * Since: 1.5.0
+ **/
+HB_EXTERN void
+hb_buffer_append (hb_buffer_t *buffer,
+ hb_buffer_t *source,
+ unsigned int start,
+ unsigned int end)
+{
+ assert (!buffer->have_output && !source->have_output);
+ assert (buffer->have_positions == source->have_positions ||
+ !buffer->len || !source->len);
+ assert (buffer->content_type == source->content_type ||
+ !buffer->len || !source->len);
+
+ if (end > source->len)
+ end = source->len;
+ if (start > end)
+ start = end;
+ if (start == end)
+ return;
+
+ if (!buffer->len)
+ buffer->content_type = source->content_type;
+ if (!buffer->have_positions && source->have_positions)
+ buffer->clear_positions ();
+
+ if (buffer->len + (end - start) < buffer->len) /* Overflows. */
+ {
+ buffer->successful = false;
+ return;
+ }
+
+ unsigned int orig_len = buffer->len;
+ hb_buffer_set_length (buffer, buffer->len + (end - start));
+ if (unlikely (!buffer->successful))
+ return;
+
+ memcpy (buffer->info + orig_len, source->info + start, (end - start) * sizeof (buffer->info[0]));
+ if (buffer->have_positions)
+ memcpy (buffer->pos + orig_len, source->pos + start, (end - start) * sizeof (buffer->pos[0]));
+}
+
+
+static int
+compare_info_codepoint (const hb_glyph_info_t *pa,
+ const hb_glyph_info_t *pb)
+{
+ return (int) pb->codepoint - (int) pa->codepoint;
+}
+
+static inline void
+normalize_glyphs_cluster (hb_buffer_t *buffer,
+ unsigned int start,
+ unsigned int end,
+ bool backward)
+{
+ hb_glyph_position_t *pos = buffer->pos;
+
+ /* Total cluster advance */
+ hb_position_t total_x_advance = 0, total_y_advance = 0;
+ for (unsigned int i = start; i < end; i++)
+ {
+ total_x_advance += pos[i].x_advance;
+ total_y_advance += pos[i].y_advance;
+ }
+
+ hb_position_t x_advance = 0, y_advance = 0;
+ for (unsigned int i = start; i < end; i++)
+ {
+ pos[i].x_offset += x_advance;
+ pos[i].y_offset += y_advance;
+
+ x_advance += pos[i].x_advance;
+ y_advance += pos[i].y_advance;
+
+ pos[i].x_advance = 0;
+ pos[i].y_advance = 0;
+ }
+
+ if (backward)
+ {
+ /* Transfer all cluster advance to the last glyph. */
+ pos[end - 1].x_advance = total_x_advance;
+ pos[end - 1].y_advance = total_y_advance;
+
+ hb_stable_sort (buffer->info + start, end - start - 1, compare_info_codepoint, buffer->pos + start);
+ } else {
+ /* Transfer all cluster advance to the first glyph. */
+ pos[start].x_advance += total_x_advance;
+ pos[start].y_advance += total_y_advance;
+ for (unsigned int i = start + 1; i < end; i++) {
+ pos[i].x_offset -= total_x_advance;
+ pos[i].y_offset -= total_y_advance;
+ }
+ hb_stable_sort (buffer->info + start + 1, end - start - 1, compare_info_codepoint, buffer->pos + start + 1);
+ }
+}
+
+/**
+ * hb_buffer_normalize_glyphs:
+ * @buffer: an #hb_buffer_t.
+ *
+ * Reorders a glyph buffer to have canonical in-cluster glyph order / position.
+ * The resulting clusters should behave identical to pre-reordering clusters.
+ *
+ * <note>This has nothing to do with Unicode normalization.</note>
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_buffer_normalize_glyphs (hb_buffer_t *buffer)
+{
+ assert (buffer->have_positions);
+ assert ((buffer->content_type == HB_BUFFER_CONTENT_TYPE_GLYPHS) ||
+ (!buffer->len && (buffer->content_type == HB_BUFFER_CONTENT_TYPE_INVALID)));
+
+ bool backward = HB_DIRECTION_IS_BACKWARD (buffer->props.direction);
+
+ foreach_cluster (buffer, start, end)
+ normalize_glyphs_cluster (buffer, start, end, backward);
+}
+
+void
+hb_buffer_t::sort (unsigned int start, unsigned int end, int(*compar)(const hb_glyph_info_t *, const hb_glyph_info_t *))
+{
+ assert (!have_positions);
+ for (unsigned int i = start + 1; i < end; i++)
+ {
+ unsigned int j = i;
+ while (j > start && compar (&info[j - 1], &info[i]) > 0)
+ j--;
+ if (i == j)
+ continue;
+ /* Move item i to occupy place for item j, shift what's in between. */
+ merge_clusters (j, i + 1);
+ {
+ hb_glyph_info_t t = info[i];
+ memmove (&info[j + 1], &info[j], (i - j) * sizeof (hb_glyph_info_t));
+ info[j] = t;
+ }
+ }
+}
+
+
+/*
+ * Comparing buffers.
+ */
+
+/**
+ * hb_buffer_diff:
+ * @buffer: a buffer.
+ * @reference: other buffer to compare to.
+ * @dottedcircle_glyph: glyph id of U+25CC DOTTED CIRCLE, or (hb_codepont_t) -1.
+ * @position_fuzz: allowed absolute difference in position values.
+ *
+ * If dottedcircle_glyph is (hb_codepoint_t) -1 then %HB_BUFFER_DIFF_FLAG_DOTTED_CIRCLE_PRESENT
+ * and %HB_BUFFER_DIFF_FLAG_NOTDEF_PRESENT are never returned. This should be used by most
+ * callers if just comparing two buffers is needed.
+ *
+ * Since: 1.5.0
+ **/
+hb_buffer_diff_flags_t
+hb_buffer_diff (hb_buffer_t *buffer,
+ hb_buffer_t *reference,
+ hb_codepoint_t dottedcircle_glyph,
+ unsigned int position_fuzz)
+{
+ if (buffer->content_type != reference->content_type && buffer->len && reference->len)
+ return HB_BUFFER_DIFF_FLAG_CONTENT_TYPE_MISMATCH;
+
+ hb_buffer_diff_flags_t result = HB_BUFFER_DIFF_FLAG_EQUAL;
+ bool contains = dottedcircle_glyph != (hb_codepoint_t) -1;
+
+ unsigned int count = reference->len;
+
+ if (buffer->len != count)
+ {
+ /*
+ * we can't compare glyph-by-glyph, but we do want to know if there
+ * are .notdef or dottedcircle glyphs present in the reference buffer
+ */
+ const hb_glyph_info_t *info = reference->info;
+ unsigned int i;
+ for (i = 0; i < count; i++)
+ {
+ if (contains && info[i].codepoint == dottedcircle_glyph)
+ result |= HB_BUFFER_DIFF_FLAG_DOTTED_CIRCLE_PRESENT;
+ if (contains && info[i].codepoint == 0)
+ result |= HB_BUFFER_DIFF_FLAG_NOTDEF_PRESENT;
+ }
+ result |= HB_BUFFER_DIFF_FLAG_LENGTH_MISMATCH;
+ return hb_buffer_diff_flags_t (result);
+ }
+
+ if (!count)
+ return hb_buffer_diff_flags_t (result);
+
+ const hb_glyph_info_t *buf_info = buffer->info;
+ const hb_glyph_info_t *ref_info = reference->info;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ if (buf_info->codepoint != ref_info->codepoint)
+ result |= HB_BUFFER_DIFF_FLAG_CODEPOINT_MISMATCH;
+ if (buf_info->cluster != ref_info->cluster)
+ result |= HB_BUFFER_DIFF_FLAG_CLUSTER_MISMATCH;
+ if ((buf_info->mask & ~ref_info->mask & HB_GLYPH_FLAG_DEFINED))
+ result |= HB_BUFFER_DIFF_FLAG_GLYPH_FLAGS_MISMATCH;
+ if (contains && ref_info->codepoint == dottedcircle_glyph)
+ result |= HB_BUFFER_DIFF_FLAG_DOTTED_CIRCLE_PRESENT;
+ if (contains && ref_info->codepoint == 0)
+ result |= HB_BUFFER_DIFF_FLAG_NOTDEF_PRESENT;
+ buf_info++;
+ ref_info++;
+ }
+
+ if (buffer->content_type == HB_BUFFER_CONTENT_TYPE_GLYPHS)
+ {
+ assert (buffer->have_positions);
+ const hb_glyph_position_t *buf_pos = buffer->pos;
+ const hb_glyph_position_t *ref_pos = reference->pos;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ if ((unsigned int) abs (buf_pos->x_advance - ref_pos->x_advance) > position_fuzz ||
+ (unsigned int) abs (buf_pos->y_advance - ref_pos->y_advance) > position_fuzz ||
+ (unsigned int) abs (buf_pos->x_offset - ref_pos->x_offset) > position_fuzz ||
+ (unsigned int) abs (buf_pos->y_offset - ref_pos->y_offset) > position_fuzz)
+ {
+ result |= HB_BUFFER_DIFF_FLAG_POSITION_MISMATCH;
+ break;
+ }
+ buf_pos++;
+ ref_pos++;
+ }
+ }
+
+ return result;
+}
+
+
+/*
+ * Debugging.
+ */
+
+#ifndef HB_NO_BUFFER_MESSAGE
+/**
+ * hb_buffer_set_message_func:
+ * @buffer: an #hb_buffer_t.
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 1.1.3
+ **/
+void
+hb_buffer_set_message_func (hb_buffer_t *buffer,
+ hb_buffer_message_func_t func,
+ void *user_data, hb_destroy_func_t destroy)
+{
+ if (buffer->message_destroy)
+ buffer->message_destroy (buffer->message_data);
+
+ if (func) {
+ buffer->message_func = func;
+ buffer->message_data = user_data;
+ buffer->message_destroy = destroy;
+ } else {
+ buffer->message_func = nullptr;
+ buffer->message_data = nullptr;
+ buffer->message_destroy = nullptr;
+ }
+}
+bool
+hb_buffer_t::message_impl (hb_font_t *font, const char *fmt, va_list ap)
+{
+ char buf[100];
+ vsnprintf (buf, sizeof (buf), fmt, ap);
+ return (bool) this->message_func (this, font, buf, this->message_data);
+}
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-buffer.h b/thirdparty/harfbuzz/src/hb-buffer.h
new file mode 100644
index 0000000000..2f581f3c73
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-buffer.h
@@ -0,0 +1,586 @@
+/*
+ * Copyright © 1998-2004 David Turner and Werner Lemberg
+ * Copyright © 2004,2007,2009 Red Hat, Inc.
+ * Copyright © 2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Owen Taylor, Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_H_IN
+#error "Include <hb.h> instead."
+#endif
+
+#ifndef HB_BUFFER_H
+#define HB_BUFFER_H
+
+#include "hb-common.h"
+#include "hb-unicode.h"
+#include "hb-font.h"
+
+HB_BEGIN_DECLS
+
+/**
+ * hb_glyph_info_t:
+ * @codepoint: either a Unicode code point (before shaping) or a glyph index
+ * (after shaping).
+ * @cluster: the index of the character in the original text that corresponds
+ * to this #hb_glyph_info_t, or whatever the client passes to
+ * hb_buffer_add(). More than one #hb_glyph_info_t can have the same
+ * @cluster value, if they resulted from the same character (e.g. one
+ * to many glyph substitution), and when more than one character gets
+ * merged in the same glyph (e.g. many to one glyph substitution) the
+ * #hb_glyph_info_t will have the smallest cluster value of them.
+ * By default some characters are merged into the same cluster
+ * (e.g. combining marks have the same cluster as their bases)
+ * even if they are separate glyphs, hb_buffer_set_cluster_level()
+ * allow selecting more fine-grained cluster handling.
+ *
+ * The #hb_glyph_info_t is the structure that holds information about the
+ * glyphs and their relation to input text.
+ */
+typedef struct hb_glyph_info_t
+{
+ hb_codepoint_t codepoint;
+ /*< private >*/
+ hb_mask_t mask;
+ /*< public >*/
+ uint32_t cluster;
+
+ /*< private >*/
+ hb_var_int_t var1;
+ hb_var_int_t var2;
+} hb_glyph_info_t;
+
+/**
+ * hb_glyph_flags_t:
+ * @HB_GLYPH_FLAG_UNSAFE_TO_BREAK: Indicates that if input text is broken at the
+ * beginning of the cluster this glyph is part of,
+ * then both sides need to be re-shaped, as the
+ * result might be different. On the flip side,
+ * it means that when this flag is not present,
+ * then it's safe to break the glyph-run at the
+ * beginning of this cluster, and the two sides
+ * represent the exact same result one would get
+ * if breaking input text at the beginning of
+ * this cluster and shaping the two sides
+ * separately. This can be used to optimize
+ * paragraph layout, by avoiding re-shaping
+ * of each line after line-breaking, or limiting
+ * the reshaping to a small piece around the
+ * breaking point only.
+ * @HB_GLYPH_FLAG_DEFINED: All the currently defined flags.
+ *
+ * Since: 1.5.0
+ */
+typedef enum { /*< flags >*/
+ HB_GLYPH_FLAG_UNSAFE_TO_BREAK = 0x00000001,
+
+ HB_GLYPH_FLAG_DEFINED = 0x00000001 /* OR of all defined flags */
+} hb_glyph_flags_t;
+
+HB_EXTERN hb_glyph_flags_t
+hb_glyph_info_get_glyph_flags (const hb_glyph_info_t *info);
+
+#define hb_glyph_info_get_glyph_flags(info) \
+ ((hb_glyph_flags_t) ((unsigned int) (info)->mask & HB_GLYPH_FLAG_DEFINED))
+
+
+/**
+ * hb_glyph_position_t:
+ * @x_advance: how much the line advances after drawing this glyph when setting
+ * text in horizontal direction.
+ * @y_advance: how much the line advances after drawing this glyph when setting
+ * text in vertical direction.
+ * @x_offset: how much the glyph moves on the X-axis before drawing it, this
+ * should not affect how much the line advances.
+ * @y_offset: how much the glyph moves on the Y-axis before drawing it, this
+ * should not affect how much the line advances.
+ *
+ * The #hb_glyph_position_t is the structure that holds the positions of the
+ * glyph in both horizontal and vertical directions. All positions in
+ * #hb_glyph_position_t are relative to the current point.
+ *
+ */
+typedef struct hb_glyph_position_t {
+ hb_position_t x_advance;
+ hb_position_t y_advance;
+ hb_position_t x_offset;
+ hb_position_t y_offset;
+
+ /*< private >*/
+ hb_var_int_t var;
+} hb_glyph_position_t;
+
+/**
+ * hb_segment_properties_t:
+ * @direction: the #hb_direction_t of the buffer, see hb_buffer_set_direction().
+ * @script: the #hb_script_t of the buffer, see hb_buffer_set_script().
+ * @language: the #hb_language_t of the buffer, see hb_buffer_set_language().
+ *
+ * The structure that holds various text properties of an #hb_buffer_t. Can be
+ * set and retrieved using hb_buffer_set_segment_properties() and
+ * hb_buffer_get_segment_properties(), respectively.
+ */
+typedef struct hb_segment_properties_t {
+ hb_direction_t direction;
+ hb_script_t script;
+ hb_language_t language;
+ /*< private >*/
+ void *reserved1;
+ void *reserved2;
+} hb_segment_properties_t;
+
+#define HB_SEGMENT_PROPERTIES_DEFAULT {HB_DIRECTION_INVALID, \
+ HB_SCRIPT_INVALID, \
+ HB_LANGUAGE_INVALID, \
+ (void *) 0, \
+ (void *) 0}
+
+HB_EXTERN hb_bool_t
+hb_segment_properties_equal (const hb_segment_properties_t *a,
+ const hb_segment_properties_t *b);
+
+HB_EXTERN unsigned int
+hb_segment_properties_hash (const hb_segment_properties_t *p);
+
+
+
+/**
+ * hb_buffer_t:
+ *
+ * The main structure holding the input text and its properties before shaping,
+ * and output glyphs and their information after shaping.
+ */
+
+typedef struct hb_buffer_t hb_buffer_t;
+
+HB_EXTERN hb_buffer_t *
+hb_buffer_create (void);
+
+HB_EXTERN hb_buffer_t *
+hb_buffer_get_empty (void);
+
+HB_EXTERN hb_buffer_t *
+hb_buffer_reference (hb_buffer_t *buffer);
+
+HB_EXTERN void
+hb_buffer_destroy (hb_buffer_t *buffer);
+
+HB_EXTERN hb_bool_t
+hb_buffer_set_user_data (hb_buffer_t *buffer,
+ hb_user_data_key_t *key,
+ void * data,
+ hb_destroy_func_t destroy,
+ hb_bool_t replace);
+
+HB_EXTERN void *
+hb_buffer_get_user_data (hb_buffer_t *buffer,
+ hb_user_data_key_t *key);
+
+
+/**
+ * hb_buffer_content_type_t:
+ * @HB_BUFFER_CONTENT_TYPE_INVALID: Initial value for new buffer.
+ * @HB_BUFFER_CONTENT_TYPE_UNICODE: The buffer contains input characters (before shaping).
+ * @HB_BUFFER_CONTENT_TYPE_GLYPHS: The buffer contains output glyphs (after shaping).
+ */
+typedef enum {
+ HB_BUFFER_CONTENT_TYPE_INVALID = 0,
+ HB_BUFFER_CONTENT_TYPE_UNICODE,
+ HB_BUFFER_CONTENT_TYPE_GLYPHS
+} hb_buffer_content_type_t;
+
+HB_EXTERN void
+hb_buffer_set_content_type (hb_buffer_t *buffer,
+ hb_buffer_content_type_t content_type);
+
+HB_EXTERN hb_buffer_content_type_t
+hb_buffer_get_content_type (hb_buffer_t *buffer);
+
+
+HB_EXTERN void
+hb_buffer_set_unicode_funcs (hb_buffer_t *buffer,
+ hb_unicode_funcs_t *unicode_funcs);
+
+HB_EXTERN hb_unicode_funcs_t *
+hb_buffer_get_unicode_funcs (hb_buffer_t *buffer);
+
+HB_EXTERN void
+hb_buffer_set_direction (hb_buffer_t *buffer,
+ hb_direction_t direction);
+
+HB_EXTERN hb_direction_t
+hb_buffer_get_direction (hb_buffer_t *buffer);
+
+HB_EXTERN void
+hb_buffer_set_script (hb_buffer_t *buffer,
+ hb_script_t script);
+
+HB_EXTERN hb_script_t
+hb_buffer_get_script (hb_buffer_t *buffer);
+
+HB_EXTERN void
+hb_buffer_set_language (hb_buffer_t *buffer,
+ hb_language_t language);
+
+
+HB_EXTERN hb_language_t
+hb_buffer_get_language (hb_buffer_t *buffer);
+
+HB_EXTERN void
+hb_buffer_set_segment_properties (hb_buffer_t *buffer,
+ const hb_segment_properties_t *props);
+
+HB_EXTERN void
+hb_buffer_get_segment_properties (hb_buffer_t *buffer,
+ hb_segment_properties_t *props);
+
+HB_EXTERN void
+hb_buffer_guess_segment_properties (hb_buffer_t *buffer);
+
+
+/**
+ * hb_buffer_flags_t:
+ * @HB_BUFFER_FLAG_DEFAULT: the default buffer flag.
+ * @HB_BUFFER_FLAG_BOT: flag indicating that special handling of the beginning
+ * of text paragraph can be applied to this buffer. Should usually
+ * be set, unless you are passing to the buffer only part
+ * of the text without the full context.
+ * @HB_BUFFER_FLAG_EOT: flag indicating that special handling of the end of text
+ * paragraph can be applied to this buffer, similar to
+ * @HB_BUFFER_FLAG_BOT.
+ * @HB_BUFFER_FLAG_PRESERVE_DEFAULT_IGNORABLES:
+ * flag indication that character with Default_Ignorable
+ * Unicode property should use the corresponding glyph
+ * from the font, instead of hiding them (done by
+ * replacing them with the space glyph and zeroing the
+ * advance width.) This flag takes precedence over
+ * @HB_BUFFER_FLAG_REMOVE_DEFAULT_IGNORABLES.
+ * @HB_BUFFER_FLAG_REMOVE_DEFAULT_IGNORABLES:
+ * flag indication that character with Default_Ignorable
+ * Unicode property should be removed from glyph string
+ * instead of hiding them (done by replacing them with the
+ * space glyph and zeroing the advance width.)
+ * @HB_BUFFER_FLAG_PRESERVE_DEFAULT_IGNORABLES takes
+ * precedence over this flag. Since: 1.8.0
+ * @HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE:
+ * flag indicating that a dotted circle should
+ * not be inserted in the rendering of incorrect
+ * character sequences (such at <0905 093E>). Since: 2.4
+ *
+ * Since: 0.9.20
+ */
+typedef enum { /*< flags >*/
+ HB_BUFFER_FLAG_DEFAULT = 0x00000000u,
+ HB_BUFFER_FLAG_BOT = 0x00000001u, /* Beginning-of-text */
+ HB_BUFFER_FLAG_EOT = 0x00000002u, /* End-of-text */
+ HB_BUFFER_FLAG_PRESERVE_DEFAULT_IGNORABLES = 0x00000004u,
+ HB_BUFFER_FLAG_REMOVE_DEFAULT_IGNORABLES = 0x00000008u,
+ HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE = 0x00000010u
+} hb_buffer_flags_t;
+
+HB_EXTERN void
+hb_buffer_set_flags (hb_buffer_t *buffer,
+ hb_buffer_flags_t flags);
+
+HB_EXTERN hb_buffer_flags_t
+hb_buffer_get_flags (hb_buffer_t *buffer);
+
+/**
+ * hb_buffer_cluster_level_t:
+ * @HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES: Return cluster values grouped by graphemes into
+ * monotone order.
+ * @HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS: Return cluster values grouped into monotone order.
+ * @HB_BUFFER_CLUSTER_LEVEL_CHARACTERS: Don't group cluster values.
+ * @HB_BUFFER_CLUSTER_LEVEL_DEFAULT: Default cluster level,
+ * equal to @HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES.
+ *
+ * Since: 0.9.42
+ */
+typedef enum {
+ HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES = 0,
+ HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS = 1,
+ HB_BUFFER_CLUSTER_LEVEL_CHARACTERS = 2,
+ HB_BUFFER_CLUSTER_LEVEL_DEFAULT = HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES
+} hb_buffer_cluster_level_t;
+
+HB_EXTERN void
+hb_buffer_set_cluster_level (hb_buffer_t *buffer,
+ hb_buffer_cluster_level_t cluster_level);
+
+HB_EXTERN hb_buffer_cluster_level_t
+hb_buffer_get_cluster_level (hb_buffer_t *buffer);
+
+/**
+ * HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT:
+ *
+ * The default code point for replacing invalid characters in a given encoding.
+ * Set to U+FFFD REPLACEMENT CHARACTER.
+ *
+ * Since: 0.9.31
+ */
+#define HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT 0xFFFDu
+
+HB_EXTERN void
+hb_buffer_set_replacement_codepoint (hb_buffer_t *buffer,
+ hb_codepoint_t replacement);
+
+HB_EXTERN hb_codepoint_t
+hb_buffer_get_replacement_codepoint (hb_buffer_t *buffer);
+
+HB_EXTERN void
+hb_buffer_set_invisible_glyph (hb_buffer_t *buffer,
+ hb_codepoint_t invisible);
+
+HB_EXTERN hb_codepoint_t
+hb_buffer_get_invisible_glyph (hb_buffer_t *buffer);
+
+
+HB_EXTERN void
+hb_buffer_reset (hb_buffer_t *buffer);
+
+HB_EXTERN void
+hb_buffer_clear_contents (hb_buffer_t *buffer);
+
+HB_EXTERN hb_bool_t
+hb_buffer_pre_allocate (hb_buffer_t *buffer,
+ unsigned int size);
+
+
+HB_EXTERN hb_bool_t
+hb_buffer_allocation_successful (hb_buffer_t *buffer);
+
+HB_EXTERN void
+hb_buffer_reverse (hb_buffer_t *buffer);
+
+HB_EXTERN void
+hb_buffer_reverse_range (hb_buffer_t *buffer,
+ unsigned int start, unsigned int end);
+
+HB_EXTERN void
+hb_buffer_reverse_clusters (hb_buffer_t *buffer);
+
+
+/* Filling the buffer in */
+
+HB_EXTERN void
+hb_buffer_add (hb_buffer_t *buffer,
+ hb_codepoint_t codepoint,
+ unsigned int cluster);
+
+HB_EXTERN void
+hb_buffer_add_utf8 (hb_buffer_t *buffer,
+ const char *text,
+ int text_length,
+ unsigned int item_offset,
+ int item_length);
+
+HB_EXTERN void
+hb_buffer_add_utf16 (hb_buffer_t *buffer,
+ const uint16_t *text,
+ int text_length,
+ unsigned int item_offset,
+ int item_length);
+
+HB_EXTERN void
+hb_buffer_add_utf32 (hb_buffer_t *buffer,
+ const uint32_t *text,
+ int text_length,
+ unsigned int item_offset,
+ int item_length);
+
+HB_EXTERN void
+hb_buffer_add_latin1 (hb_buffer_t *buffer,
+ const uint8_t *text,
+ int text_length,
+ unsigned int item_offset,
+ int item_length);
+
+HB_EXTERN void
+hb_buffer_add_codepoints (hb_buffer_t *buffer,
+ const hb_codepoint_t *text,
+ int text_length,
+ unsigned int item_offset,
+ int item_length);
+
+HB_EXTERN void
+hb_buffer_append (hb_buffer_t *buffer,
+ hb_buffer_t *source,
+ unsigned int start,
+ unsigned int end);
+
+HB_EXTERN hb_bool_t
+hb_buffer_set_length (hb_buffer_t *buffer,
+ unsigned int length);
+
+HB_EXTERN unsigned int
+hb_buffer_get_length (hb_buffer_t *buffer);
+
+/* Getting glyphs out of the buffer */
+
+HB_EXTERN hb_glyph_info_t *
+hb_buffer_get_glyph_infos (hb_buffer_t *buffer,
+ unsigned int *length);
+
+HB_EXTERN hb_glyph_position_t *
+hb_buffer_get_glyph_positions (hb_buffer_t *buffer,
+ unsigned int *length);
+
+
+HB_EXTERN void
+hb_buffer_normalize_glyphs (hb_buffer_t *buffer);
+
+
+/*
+ * Serialize
+ */
+
+/**
+ * hb_buffer_serialize_flags_t:
+ * @HB_BUFFER_SERIALIZE_FLAG_DEFAULT: serialize glyph names, clusters and positions.
+ * @HB_BUFFER_SERIALIZE_FLAG_NO_CLUSTERS: do not serialize glyph cluster.
+ * @HB_BUFFER_SERIALIZE_FLAG_NO_POSITIONS: do not serialize glyph position information.
+ * @HB_BUFFER_SERIALIZE_FLAG_NO_GLYPH_NAMES: do no serialize glyph name.
+ * @HB_BUFFER_SERIALIZE_FLAG_GLYPH_EXTENTS: serialize glyph extents.
+ * @HB_BUFFER_SERIALIZE_FLAG_GLYPH_FLAGS: serialize glyph flags. Since: 1.5.0
+ * @HB_BUFFER_SERIALIZE_FLAG_NO_ADVANCES: do not serialize glyph advances,
+ * glyph offsets will reflect absolute glyph positions. Since: 1.8.0
+ *
+ * Flags that control what glyph information are serialized in hb_buffer_serialize_glyphs().
+ *
+ * Since: 0.9.20
+ */
+typedef enum { /*< flags >*/
+ HB_BUFFER_SERIALIZE_FLAG_DEFAULT = 0x00000000u,
+ HB_BUFFER_SERIALIZE_FLAG_NO_CLUSTERS = 0x00000001u,
+ HB_BUFFER_SERIALIZE_FLAG_NO_POSITIONS = 0x00000002u,
+ HB_BUFFER_SERIALIZE_FLAG_NO_GLYPH_NAMES = 0x00000004u,
+ HB_BUFFER_SERIALIZE_FLAG_GLYPH_EXTENTS = 0x00000008u,
+ HB_BUFFER_SERIALIZE_FLAG_GLYPH_FLAGS = 0x00000010u,
+ HB_BUFFER_SERIALIZE_FLAG_NO_ADVANCES = 0x00000020u
+} hb_buffer_serialize_flags_t;
+
+/**
+ * hb_buffer_serialize_format_t:
+ * @HB_BUFFER_SERIALIZE_FORMAT_TEXT: a human-readable, plain text format.
+ * @HB_BUFFER_SERIALIZE_FORMAT_JSON: a machine-readable JSON format.
+ * @HB_BUFFER_SERIALIZE_FORMAT_INVALID: invalid format.
+ *
+ * The buffer serialization and de-serialization format used in
+ * hb_buffer_serialize_glyphs() and hb_buffer_deserialize_glyphs().
+ *
+ * Since: 0.9.2
+ */
+typedef enum {
+ HB_BUFFER_SERIALIZE_FORMAT_TEXT = HB_TAG('T','E','X','T'),
+ HB_BUFFER_SERIALIZE_FORMAT_JSON = HB_TAG('J','S','O','N'),
+ HB_BUFFER_SERIALIZE_FORMAT_INVALID = HB_TAG_NONE
+} hb_buffer_serialize_format_t;
+
+HB_EXTERN hb_buffer_serialize_format_t
+hb_buffer_serialize_format_from_string (const char *str, int len);
+
+HB_EXTERN const char *
+hb_buffer_serialize_format_to_string (hb_buffer_serialize_format_t format);
+
+HB_EXTERN const char **
+hb_buffer_serialize_list_formats (void);
+
+HB_EXTERN unsigned int
+hb_buffer_serialize_glyphs (hb_buffer_t *buffer,
+ unsigned int start,
+ unsigned int end,
+ char *buf,
+ unsigned int buf_size,
+ unsigned int *buf_consumed,
+ hb_font_t *font,
+ hb_buffer_serialize_format_t format,
+ hb_buffer_serialize_flags_t flags);
+
+HB_EXTERN hb_bool_t
+hb_buffer_deserialize_glyphs (hb_buffer_t *buffer,
+ const char *buf,
+ int buf_len,
+ const char **end_ptr,
+ hb_font_t *font,
+ hb_buffer_serialize_format_t format);
+
+
+/*
+ * Compare buffers
+ */
+
+typedef enum { /*< flags >*/
+ HB_BUFFER_DIFF_FLAG_EQUAL = 0x0000,
+
+ /* Buffers with different content_type cannot be meaningfully compared
+ * in any further detail. */
+ HB_BUFFER_DIFF_FLAG_CONTENT_TYPE_MISMATCH = 0x0001,
+
+ /* For buffers with differing length, the per-glyph comparison is not
+ * attempted, though we do still scan reference for dottedcircle / .notdef
+ * glyphs. */
+ HB_BUFFER_DIFF_FLAG_LENGTH_MISMATCH = 0x0002,
+
+ /* We want to know if dottedcircle / .notdef glyphs are present in the
+ * reference, as we may not care so much about other differences in this
+ * case. */
+ HB_BUFFER_DIFF_FLAG_NOTDEF_PRESENT = 0x0004,
+ HB_BUFFER_DIFF_FLAG_DOTTED_CIRCLE_PRESENT = 0x0008,
+
+ /* If the buffers have the same length, we compare them glyph-by-glyph
+ * and report which aspect(s) of the glyph info/position are different. */
+ HB_BUFFER_DIFF_FLAG_CODEPOINT_MISMATCH = 0x0010,
+ HB_BUFFER_DIFF_FLAG_CLUSTER_MISMATCH = 0x0020,
+ HB_BUFFER_DIFF_FLAG_GLYPH_FLAGS_MISMATCH = 0x0040,
+ HB_BUFFER_DIFF_FLAG_POSITION_MISMATCH = 0x0080
+
+} hb_buffer_diff_flags_t;
+
+/* Compare the contents of two buffers, report types of differences. */
+HB_EXTERN hb_buffer_diff_flags_t
+hb_buffer_diff (hb_buffer_t *buffer,
+ hb_buffer_t *reference,
+ hb_codepoint_t dottedcircle_glyph,
+ unsigned int position_fuzz);
+
+
+/*
+ * Debugging.
+ */
+
+typedef hb_bool_t (*hb_buffer_message_func_t) (hb_buffer_t *buffer,
+ hb_font_t *font,
+ const char *message,
+ void *user_data);
+
+HB_EXTERN void
+hb_buffer_set_message_func (hb_buffer_t *buffer,
+ hb_buffer_message_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
+
+
+HB_END_DECLS
+
+#endif /* HB_BUFFER_H */
diff --git a/thirdparty/harfbuzz/src/hb-buffer.hh b/thirdparty/harfbuzz/src/hb-buffer.hh
new file mode 100644
index 0000000000..3420ba434a
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-buffer.hh
@@ -0,0 +1,451 @@
+/*
+ * Copyright © 1998-2004 David Turner and Werner Lemberg
+ * Copyright © 2004,2007,2009,2010 Red Hat, Inc.
+ * Copyright © 2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Owen Taylor, Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_BUFFER_HH
+#define HB_BUFFER_HH
+
+#include "hb.hh"
+#include "hb-unicode.hh"
+
+
+#ifndef HB_BUFFER_MAX_LEN_FACTOR
+#define HB_BUFFER_MAX_LEN_FACTOR 32
+#endif
+#ifndef HB_BUFFER_MAX_LEN_MIN
+#define HB_BUFFER_MAX_LEN_MIN 8192
+#endif
+#ifndef HB_BUFFER_MAX_LEN_DEFAULT
+#define HB_BUFFER_MAX_LEN_DEFAULT 0x3FFFFFFF /* Shaping more than a billion chars? Let us know! */
+#endif
+
+#ifndef HB_BUFFER_MAX_OPS_FACTOR
+#define HB_BUFFER_MAX_OPS_FACTOR 64
+#endif
+#ifndef HB_BUFFER_MAX_OPS_MIN
+#define HB_BUFFER_MAX_OPS_MIN 1024
+#endif
+#ifndef HB_BUFFER_MAX_OPS_DEFAULT
+#define HB_BUFFER_MAX_OPS_DEFAULT 0x1FFFFFFF /* Shaping more than a billion operations? Let us know! */
+#endif
+
+static_assert ((sizeof (hb_glyph_info_t) == 20), "");
+static_assert ((sizeof (hb_glyph_info_t) == sizeof (hb_glyph_position_t)), "");
+
+HB_MARK_AS_FLAG_T (hb_buffer_flags_t);
+HB_MARK_AS_FLAG_T (hb_buffer_serialize_flags_t);
+HB_MARK_AS_FLAG_T (hb_buffer_diff_flags_t);
+
+enum hb_buffer_scratch_flags_t {
+ HB_BUFFER_SCRATCH_FLAG_DEFAULT = 0x00000000u,
+ HB_BUFFER_SCRATCH_FLAG_HAS_NON_ASCII = 0x00000001u,
+ HB_BUFFER_SCRATCH_FLAG_HAS_DEFAULT_IGNORABLES = 0x00000002u,
+ HB_BUFFER_SCRATCH_FLAG_HAS_SPACE_FALLBACK = 0x00000004u,
+ HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT = 0x00000008u,
+ HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK = 0x00000010u,
+ HB_BUFFER_SCRATCH_FLAG_HAS_CGJ = 0x00000020u,
+
+ /* Reserved for complex shapers' internal use. */
+ HB_BUFFER_SCRATCH_FLAG_COMPLEX0 = 0x01000000u,
+ HB_BUFFER_SCRATCH_FLAG_COMPLEX1 = 0x02000000u,
+ HB_BUFFER_SCRATCH_FLAG_COMPLEX2 = 0x04000000u,
+ HB_BUFFER_SCRATCH_FLAG_COMPLEX3 = 0x08000000u,
+};
+HB_MARK_AS_FLAG_T (hb_buffer_scratch_flags_t);
+
+
+/*
+ * hb_buffer_t
+ */
+
+struct hb_buffer_t
+{
+ hb_object_header_t header;
+
+ /* Information about how the text in the buffer should be treated */
+ hb_unicode_funcs_t *unicode; /* Unicode functions */
+ hb_buffer_flags_t flags; /* BOT / EOT / etc. */
+ hb_buffer_cluster_level_t cluster_level;
+ hb_codepoint_t replacement; /* U+FFFD or something else. */
+ hb_codepoint_t invisible; /* 0 or something else. */
+ hb_buffer_scratch_flags_t scratch_flags; /* Have space-fallback, etc. */
+ unsigned int max_len; /* Maximum allowed len. */
+ int max_ops; /* Maximum allowed operations. */
+
+ /* Buffer contents */
+ hb_buffer_content_type_t content_type;
+ hb_segment_properties_t props; /* Script, language, direction */
+
+ bool successful; /* Allocations successful */
+ bool have_output; /* Whether we have an output buffer going on */
+ bool have_positions; /* Whether we have positions */
+
+ unsigned int idx; /* Cursor into ->info and ->pos arrays */
+ unsigned int len; /* Length of ->info and ->pos arrays */
+ unsigned int out_len; /* Length of ->out array if have_output */
+
+ unsigned int allocated; /* Length of allocated arrays */
+ hb_glyph_info_t *info;
+ hb_glyph_info_t *out_info;
+ hb_glyph_position_t *pos;
+
+ unsigned int serial;
+
+ /* Text before / after the main buffer contents.
+ * Always in Unicode, and ordered outward.
+ * Index 0 is for "pre-context", 1 for "post-context". */
+ static constexpr unsigned CONTEXT_LENGTH = 5u;
+ hb_codepoint_t context[2][CONTEXT_LENGTH];
+ unsigned int context_len[2];
+
+ /* Debugging API */
+#ifndef HB_NO_BUFFER_MESSAGE
+ hb_buffer_message_func_t message_func;
+ void *message_data;
+ hb_destroy_func_t message_destroy;
+#endif
+
+ /* Internal debugging. */
+ /* The bits here reflect current allocations of the bytes in glyph_info_t's var1 and var2. */
+#ifndef HB_NDEBUG
+ uint8_t allocated_var_bits;
+#endif
+
+
+ /* Methods */
+
+ bool in_error () const { return !successful; }
+
+ void allocate_var (unsigned int start, unsigned int count)
+ {
+#ifndef HB_NDEBUG
+ unsigned int end = start + count;
+ assert (end <= 8);
+ unsigned int bits = (1u<<end) - (1u<<start);
+ assert (0 == (allocated_var_bits & bits));
+ allocated_var_bits |= bits;
+#endif
+ }
+ void deallocate_var (unsigned int start, unsigned int count)
+ {
+#ifndef HB_NDEBUG
+ unsigned int end = start + count;
+ assert (end <= 8);
+ unsigned int bits = (1u<<end) - (1u<<start);
+ assert (bits == (allocated_var_bits & bits));
+ allocated_var_bits &= ~bits;
+#endif
+ }
+ void assert_var (unsigned int start, unsigned int count)
+ {
+#ifndef HB_NDEBUG
+ unsigned int end = start + count;
+ assert (end <= 8);
+ unsigned int bits = (1u<<end) - (1u<<start);
+ assert (bits == (allocated_var_bits & bits));
+#endif
+ }
+ void deallocate_var_all ()
+ {
+#ifndef HB_NDEBUG
+ allocated_var_bits = 0;
+#endif
+ }
+
+ hb_glyph_info_t &cur (unsigned int i = 0) { return info[idx + i]; }
+ hb_glyph_info_t cur (unsigned int i = 0) const { return info[idx + i]; }
+
+ hb_glyph_position_t &cur_pos (unsigned int i = 0) { return pos[idx + i]; }
+ hb_glyph_position_t cur_pos (unsigned int i = 0) const { return pos[idx + i]; }
+
+ hb_glyph_info_t &prev () { return out_info[out_len ? out_len - 1 : 0]; }
+ hb_glyph_info_t prev () const { return out_info[out_len ? out_len - 1 : 0]; }
+
+ bool has_separate_output () const { return info != out_info; }
+
+
+ HB_INTERNAL void reset ();
+ HB_INTERNAL void clear ();
+
+ unsigned int backtrack_len () const { return have_output? out_len : idx; }
+ unsigned int lookahead_len () const { return len - idx; }
+ unsigned int next_serial () { return serial++; }
+
+ HB_INTERNAL void add (hb_codepoint_t codepoint,
+ unsigned int cluster);
+ HB_INTERNAL void add_info (const hb_glyph_info_t &glyph_info);
+
+ HB_INTERNAL void reverse_range (unsigned int start, unsigned int end);
+ HB_INTERNAL void reverse ();
+ HB_INTERNAL void reverse_clusters ();
+ HB_INTERNAL void guess_segment_properties ();
+
+ HB_INTERNAL void swap_buffers ();
+ HB_INTERNAL void remove_output ();
+ HB_INTERNAL void clear_output ();
+ HB_INTERNAL void clear_positions ();
+
+ HB_INTERNAL void replace_glyphs (unsigned int num_in,
+ unsigned int num_out,
+ const hb_codepoint_t *glyph_data);
+
+ void replace_glyph (hb_codepoint_t glyph_index)
+ {
+ if (unlikely (out_info != info || out_len != idx)) {
+ if (unlikely (!make_room_for (1, 1))) return;
+ out_info[out_len] = info[idx];
+ }
+ out_info[out_len].codepoint = glyph_index;
+
+ idx++;
+ out_len++;
+ }
+ /* Makes a copy of the glyph at idx to output and replace glyph_index */
+ hb_glyph_info_t & output_glyph (hb_codepoint_t glyph_index)
+ {
+ if (unlikely (!make_room_for (0, 1))) return Crap (hb_glyph_info_t);
+
+ if (unlikely (idx == len && !out_len))
+ return Crap (hb_glyph_info_t);
+
+ out_info[out_len] = idx < len ? info[idx] : out_info[out_len - 1];
+ out_info[out_len].codepoint = glyph_index;
+
+ out_len++;
+
+ return out_info[out_len - 1];
+ }
+ void output_info (const hb_glyph_info_t &glyph_info)
+ {
+ if (unlikely (!make_room_for (0, 1))) return;
+
+ out_info[out_len] = glyph_info;
+
+ out_len++;
+ }
+ /* Copies glyph at idx to output but doesn't advance idx */
+ void copy_glyph ()
+ {
+ if (unlikely (!make_room_for (0, 1))) return;
+
+ out_info[out_len] = info[idx];
+
+ out_len++;
+ }
+ /* Copies glyph at idx to output and advance idx.
+ * If there's no output, just advance idx. */
+ void
+ next_glyph ()
+ {
+ if (have_output)
+ {
+ if (out_info != info || out_len != idx)
+ {
+ if (unlikely (!make_room_for (1, 1))) return;
+ out_info[out_len] = info[idx];
+ }
+ out_len++;
+ }
+
+ idx++;
+ }
+ /* Copies n glyphs at idx to output and advance idx.
+ * If there's no output, just advance idx. */
+ void
+ next_glyphs (unsigned int n)
+ {
+ if (have_output)
+ {
+ if (out_info != info || out_len != idx)
+ {
+ if (unlikely (!make_room_for (n, n))) return;
+ memmove (out_info + out_len, info + idx, n * sizeof (out_info[0]));
+ }
+ out_len += n;
+ }
+
+ idx += n;
+ }
+ /* Advance idx without copying to output. */
+ void skip_glyph () { idx++; }
+ void reset_masks (hb_mask_t mask)
+ {
+ for (unsigned int j = 0; j < len; j++)
+ info[j].mask = mask;
+ }
+ void add_masks (hb_mask_t mask)
+ {
+ for (unsigned int j = 0; j < len; j++)
+ info[j].mask |= mask;
+ }
+ HB_INTERNAL void set_masks (hb_mask_t value, hb_mask_t mask,
+ unsigned int cluster_start, unsigned int cluster_end);
+
+ void merge_clusters (unsigned int start, unsigned int end)
+ {
+ if (end - start < 2)
+ return;
+ merge_clusters_impl (start, end);
+ }
+ HB_INTERNAL void merge_clusters_impl (unsigned int start, unsigned int end);
+ HB_INTERNAL void merge_out_clusters (unsigned int start, unsigned int end);
+ /* Merge clusters for deleting current glyph, and skip it. */
+ HB_INTERNAL void delete_glyph ();
+
+ void unsafe_to_break (unsigned int start,
+ unsigned int end)
+ {
+ if (end - start < 2)
+ return;
+ unsafe_to_break_impl (start, end);
+ }
+ HB_INTERNAL void unsafe_to_break_impl (unsigned int start, unsigned int end);
+ HB_INTERNAL void unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end);
+
+
+ /* Internal methods */
+ HB_INTERNAL bool move_to (unsigned int i); /* i is output-buffer index. */
+
+ HB_INTERNAL bool enlarge (unsigned int size);
+
+ bool ensure (unsigned int size)
+ { return likely (!size || size < allocated) ? true : enlarge (size); }
+
+ bool ensure_inplace (unsigned int size)
+ { return likely (!size || size < allocated); }
+
+ HB_INTERNAL bool make_room_for (unsigned int num_in, unsigned int num_out);
+ HB_INTERNAL bool shift_forward (unsigned int count);
+
+ typedef long scratch_buffer_t;
+ HB_INTERNAL scratch_buffer_t *get_scratch_buffer (unsigned int *size);
+
+ void clear_context (unsigned int side) { context_len[side] = 0; }
+
+ HB_INTERNAL void sort (unsigned int start, unsigned int end, int(*compar)(const hb_glyph_info_t *, const hb_glyph_info_t *));
+
+ bool messaging ()
+ {
+#ifdef HB_NO_BUFFER_MESSAGE
+ return false;
+#else
+ return unlikely (message_func);
+#endif
+ }
+ bool message (hb_font_t *font, const char *fmt, ...) HB_PRINTF_FUNC(3, 4)
+ {
+#ifdef HB_NO_BUFFER_MESSAGE
+ return true;
+#else
+ if (!messaging ())
+ return true;
+ va_list ap;
+ va_start (ap, fmt);
+ bool ret = message_impl (font, fmt, ap);
+ va_end (ap);
+ return ret;
+#endif
+ }
+ HB_INTERNAL bool message_impl (hb_font_t *font, const char *fmt, va_list ap) HB_PRINTF_FUNC(3, 0);
+
+ static void
+ set_cluster (hb_glyph_info_t &inf, unsigned int cluster, unsigned int mask = 0)
+ {
+ if (inf.cluster != cluster)
+ {
+ if (mask & HB_GLYPH_FLAG_UNSAFE_TO_BREAK)
+ inf.mask |= HB_GLYPH_FLAG_UNSAFE_TO_BREAK;
+ else
+ inf.mask &= ~HB_GLYPH_FLAG_UNSAFE_TO_BREAK;
+ }
+ inf.cluster = cluster;
+ }
+
+ unsigned int
+ _unsafe_to_break_find_min_cluster (const hb_glyph_info_t *infos,
+ unsigned int start, unsigned int end,
+ unsigned int cluster) const
+ {
+ for (unsigned int i = start; i < end; i++)
+ cluster = hb_min (cluster, infos[i].cluster);
+ return cluster;
+ }
+ void
+ _unsafe_to_break_set_mask (hb_glyph_info_t *infos,
+ unsigned int start, unsigned int end,
+ unsigned int cluster)
+ {
+ for (unsigned int i = start; i < end; i++)
+ if (cluster != infos[i].cluster)
+ {
+ scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK;
+ infos[i].mask |= HB_GLYPH_FLAG_UNSAFE_TO_BREAK;
+ }
+ }
+
+ void unsafe_to_break_all () { unsafe_to_break_impl (0, len); }
+ void safe_to_break_all ()
+ {
+ for (unsigned int i = 0; i < len; i++)
+ info[i].mask &= ~HB_GLYPH_FLAG_UNSAFE_TO_BREAK;
+ }
+};
+DECLARE_NULL_INSTANCE (hb_buffer_t);
+
+
+/* Loop over clusters. Duplicated in foreach_syllable(). */
+#define foreach_cluster(buffer, start, end) \
+ for (unsigned int \
+ _count = buffer->len, \
+ start = 0, end = _count ? _next_cluster (buffer, 0) : 0; \
+ start < _count; \
+ start = end, end = _next_cluster (buffer, start))
+
+static inline unsigned int
+_next_cluster (hb_buffer_t *buffer, unsigned int start)
+{
+ hb_glyph_info_t *info = buffer->info;
+ unsigned int count = buffer->len;
+
+ unsigned int cluster = info[start].cluster;
+ while (++start < count && cluster == info[start].cluster)
+ ;
+
+ return start;
+}
+
+
+#define HB_BUFFER_XALLOCATE_VAR(b, func, var) \
+ b->func (offsetof (hb_glyph_info_t, var) - offsetof(hb_glyph_info_t, var1), \
+ sizeof (b->info[0].var))
+#define HB_BUFFER_ALLOCATE_VAR(b, var) HB_BUFFER_XALLOCATE_VAR (b, allocate_var, var ())
+#define HB_BUFFER_DEALLOCATE_VAR(b, var) HB_BUFFER_XALLOCATE_VAR (b, deallocate_var, var ())
+#define HB_BUFFER_ASSERT_VAR(b, var) HB_BUFFER_XALLOCATE_VAR (b, assert_var, var ())
+
+
+#endif /* HB_BUFFER_HH */
diff --git a/thirdparty/harfbuzz/src/hb-cache.hh b/thirdparty/harfbuzz/src/hb-cache.hh
new file mode 100644
index 0000000000..bf26d96be4
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-cache.hh
@@ -0,0 +1,80 @@
+/*
+ * Copyright © 2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_CACHE_HH
+#define HB_CACHE_HH
+
+#include "hb.hh"
+
+
+/* Implements a lock-free cache for int->int functions. */
+
+template <unsigned int key_bits, unsigned int value_bits, unsigned int cache_bits>
+struct hb_cache_t
+{
+ static_assert ((key_bits >= cache_bits), "");
+ static_assert ((key_bits + value_bits - cache_bits <= 8 * sizeof (hb_atomic_int_t)), "");
+ static_assert (sizeof (hb_atomic_int_t) == sizeof (unsigned int), "");
+
+ void init () { clear (); }
+ void fini () {}
+
+ void clear ()
+ {
+ for (unsigned i = 0; i < ARRAY_LENGTH (values); i++)
+ values[i].set_relaxed (-1);
+ }
+
+ bool get (unsigned int key, unsigned int *value) const
+ {
+ unsigned int k = key & ((1u<<cache_bits)-1);
+ unsigned int v = values[k].get_relaxed ();
+ if ((key_bits + value_bits - cache_bits == 8 * sizeof (hb_atomic_int_t) && v == (unsigned int) -1) ||
+ (v >> value_bits) != (key >> cache_bits))
+ return false;
+ *value = v & ((1u<<value_bits)-1);
+ return true;
+ }
+
+ bool set (unsigned int key, unsigned int value)
+ {
+ if (unlikely ((key >> key_bits) || (value >> value_bits)))
+ return false; /* Overflows */
+ unsigned int k = key & ((1u<<cache_bits)-1);
+ unsigned int v = ((key>>cache_bits)<<value_bits) | value;
+ values[k].set_relaxed (v);
+ return true;
+ }
+
+ private:
+ hb_atomic_int_t values[1u<<cache_bits];
+};
+
+typedef hb_cache_t<21, 16, 8> hb_cmap_cache_t;
+typedef hb_cache_t<16, 24, 8> hb_advance_cache_t;
+
+
+#endif /* HB_CACHE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-cff-interp-common.hh b/thirdparty/harfbuzz/src/hb-cff-interp-common.hh
new file mode 100644
index 0000000000..91a9b7d0d1
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-cff-interp-common.hh
@@ -0,0 +1,688 @@
+/*
+ * Copyright © 2018 Adobe Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Adobe Author(s): Michiharu Ariza
+ */
+#ifndef HB_CFF_INTERP_COMMON_HH
+#define HB_CFF_INTERP_COMMON_HH
+
+namespace CFF {
+
+using namespace OT;
+
+typedef unsigned int op_code_t;
+
+
+/* === Dict operators === */
+
+/* One byte operators (0-31) */
+#define OpCode_version 0 /* CFF Top */
+#define OpCode_Notice 1 /* CFF Top */
+#define OpCode_FullName 2 /* CFF Top */
+#define OpCode_FamilyName 3 /* CFF Top */
+#define OpCode_Weight 4 /* CFF Top */
+#define OpCode_FontBBox 5 /* CFF Top */
+#define OpCode_BlueValues 6 /* CFF Private, CFF2 Private */
+#define OpCode_OtherBlues 7 /* CFF Private, CFF2 Private */
+#define OpCode_FamilyBlues 8 /* CFF Private, CFF2 Private */
+#define OpCode_FamilyOtherBlues 9 /* CFF Private, CFF2 Private */
+#define OpCode_StdHW 10 /* CFF Private, CFF2 Private */
+#define OpCode_StdVW 11 /* CFF Private, CFF2 Private */
+#define OpCode_escape 12 /* All. Shared with CS */
+#define OpCode_UniqueID 13 /* CFF Top */
+#define OpCode_XUID 14 /* CFF Top */
+#define OpCode_charset 15 /* CFF Top (0) */
+#define OpCode_Encoding 16 /* CFF Top (0) */
+#define OpCode_CharStrings 17 /* CFF Top, CFF2 Top */
+#define OpCode_Private 18 /* CFF Top, CFF2 FD */
+#define OpCode_Subrs 19 /* CFF Private, CFF2 Private */
+#define OpCode_defaultWidthX 20 /* CFF Private (0) */
+#define OpCode_nominalWidthX 21 /* CFF Private (0) */
+#define OpCode_vsindexdict 22 /* CFF2 Private/CS */
+#define OpCode_blenddict 23 /* CFF2 Private/CS */
+#define OpCode_vstore 24 /* CFF2 Top */
+#define OpCode_reserved25 25
+#define OpCode_reserved26 26
+#define OpCode_reserved27 27
+
+/* Numbers */
+#define OpCode_shortint 28 /* 16-bit integer, All */
+#define OpCode_longintdict 29 /* 32-bit integer, All */
+#define OpCode_BCD 30 /* Real number, CFF2 Top/FD */
+#define OpCode_reserved31 31
+
+/* 1-byte integers */
+#define OpCode_OneByteIntFirst 32 /* All. beginning of the range of first byte ints */
+#define OpCode_OneByteIntLast 246 /* All. ending of the range of first byte int */
+
+/* 2-byte integers */
+#define OpCode_TwoBytePosInt0 247 /* All. first byte of two byte positive int (+108 to +1131) */
+#define OpCode_TwoBytePosInt1 248
+#define OpCode_TwoBytePosInt2 249
+#define OpCode_TwoBytePosInt3 250
+
+#define OpCode_TwoByteNegInt0 251 /* All. first byte of two byte negative int (-1131 to -108) */
+#define OpCode_TwoByteNegInt1 252
+#define OpCode_TwoByteNegInt2 253
+#define OpCode_TwoByteNegInt3 254
+
+/* Two byte escape operators 12, (0-41) */
+#define OpCode_ESC_Base 256
+#define Make_OpCode_ESC(byte2) ((op_code_t)(OpCode_ESC_Base + (byte2)))
+
+inline op_code_t Unmake_OpCode_ESC (op_code_t op) { return (op_code_t)(op - OpCode_ESC_Base); }
+inline bool Is_OpCode_ESC (op_code_t op) { return op >= OpCode_ESC_Base; }
+inline unsigned int OpCode_Size (op_code_t op) { return Is_OpCode_ESC (op) ? 2: 1; }
+
+#define OpCode_Copyright Make_OpCode_ESC(0) /* CFF Top */
+#define OpCode_isFixedPitch Make_OpCode_ESC(1) /* CFF Top (false) */
+#define OpCode_ItalicAngle Make_OpCode_ESC(2) /* CFF Top (0) */
+#define OpCode_UnderlinePosition Make_OpCode_ESC(3) /* CFF Top (-100) */
+#define OpCode_UnderlineThickness Make_OpCode_ESC(4) /* CFF Top (50) */
+#define OpCode_PaintType Make_OpCode_ESC(5) /* CFF Top (0) */
+#define OpCode_CharstringType Make_OpCode_ESC(6) /* CFF Top (2) */
+#define OpCode_FontMatrix Make_OpCode_ESC(7) /* CFF Top, CFF2 Top (.001 0 0 .001 0 0)*/
+#define OpCode_StrokeWidth Make_OpCode_ESC(8) /* CFF Top (0) */
+#define OpCode_BlueScale Make_OpCode_ESC(9) /* CFF Private, CFF2 Private (0.039625) */
+#define OpCode_BlueShift Make_OpCode_ESC(10) /* CFF Private, CFF2 Private (7) */
+#define OpCode_BlueFuzz Make_OpCode_ESC(11) /* CFF Private, CFF2 Private (1) */
+#define OpCode_StemSnapH Make_OpCode_ESC(12) /* CFF Private, CFF2 Private */
+#define OpCode_StemSnapV Make_OpCode_ESC(13) /* CFF Private, CFF2 Private */
+#define OpCode_ForceBold Make_OpCode_ESC(14) /* CFF Private (false) */
+#define OpCode_reservedESC15 Make_OpCode_ESC(15)
+#define OpCode_reservedESC16 Make_OpCode_ESC(16)
+#define OpCode_LanguageGroup Make_OpCode_ESC(17) /* CFF Private, CFF2 Private (0) */
+#define OpCode_ExpansionFactor Make_OpCode_ESC(18) /* CFF Private, CFF2 Private (0.06) */
+#define OpCode_initialRandomSeed Make_OpCode_ESC(19) /* CFF Private (0) */
+#define OpCode_SyntheticBase Make_OpCode_ESC(20) /* CFF Top */
+#define OpCode_PostScript Make_OpCode_ESC(21) /* CFF Top */
+#define OpCode_BaseFontName Make_OpCode_ESC(22) /* CFF Top */
+#define OpCode_BaseFontBlend Make_OpCode_ESC(23) /* CFF Top */
+#define OpCode_reservedESC24 Make_OpCode_ESC(24)
+#define OpCode_reservedESC25 Make_OpCode_ESC(25)
+#define OpCode_reservedESC26 Make_OpCode_ESC(26)
+#define OpCode_reservedESC27 Make_OpCode_ESC(27)
+#define OpCode_reservedESC28 Make_OpCode_ESC(28)
+#define OpCode_reservedESC29 Make_OpCode_ESC(29)
+#define OpCode_ROS Make_OpCode_ESC(30) /* CFF Top_CID */
+#define OpCode_CIDFontVersion Make_OpCode_ESC(31) /* CFF Top_CID (0) */
+#define OpCode_CIDFontRevision Make_OpCode_ESC(32) /* CFF Top_CID (0) */
+#define OpCode_CIDFontType Make_OpCode_ESC(33) /* CFF Top_CID (0) */
+#define OpCode_CIDCount Make_OpCode_ESC(34) /* CFF Top_CID (8720) */
+#define OpCode_UIDBase Make_OpCode_ESC(35) /* CFF Top_CID */
+#define OpCode_FDArray Make_OpCode_ESC(36) /* CFF Top_CID, CFF2 Top */
+#define OpCode_FDSelect Make_OpCode_ESC(37) /* CFF Top_CID, CFF2 Top */
+#define OpCode_FontName Make_OpCode_ESC(38) /* CFF Top_CID */
+
+
+/* === CharString operators === */
+
+#define OpCode_hstem 1 /* CFF, CFF2 */
+#define OpCode_Reserved2 2
+#define OpCode_vstem 3 /* CFF, CFF2 */
+#define OpCode_vmoveto 4 /* CFF, CFF2 */
+#define OpCode_rlineto 5 /* CFF, CFF2 */
+#define OpCode_hlineto 6 /* CFF, CFF2 */
+#define OpCode_vlineto 7 /* CFF, CFF2 */
+#define OpCode_rrcurveto 8 /* CFF, CFF2 */
+#define OpCode_Reserved9 9
+#define OpCode_callsubr 10 /* CFF, CFF2 */
+#define OpCode_return 11 /* CFF */
+//#define OpCode_escape 12 /* CFF, CFF2 */
+#define OpCode_Reserved13 13
+#define OpCode_endchar 14 /* CFF */
+#define OpCode_vsindexcs 15 /* CFF2 */
+#define OpCode_blendcs 16 /* CFF2 */
+#define OpCode_Reserved17 17
+#define OpCode_hstemhm 18 /* CFF, CFF2 */
+#define OpCode_hintmask 19 /* CFF, CFF2 */
+#define OpCode_cntrmask 20 /* CFF, CFF2 */
+#define OpCode_rmoveto 21 /* CFF, CFF2 */
+#define OpCode_hmoveto 22 /* CFF, CFF2 */
+#define OpCode_vstemhm 23 /* CFF, CFF2 */
+#define OpCode_rcurveline 24 /* CFF, CFF2 */
+#define OpCode_rlinecurve 25 /* CFF, CFF2 */
+#define OpCode_vvcurveto 26 /* CFF, CFF2 */
+#define OpCode_hhcurveto 27 /* CFF, CFF2 */
+//#define OpCode_shortint 28 /* CFF, CFF2 */
+#define OpCode_callgsubr 29 /* CFF, CFF2 */
+#define OpCode_vhcurveto 30 /* CFF, CFF2 */
+#define OpCode_hvcurveto 31 /* CFF, CFF2 */
+
+#define OpCode_fixedcs 255 /* 32-bit fixed */
+
+/* Two byte escape operators 12, (0-41) */
+#define OpCode_dotsection Make_OpCode_ESC(0) /* CFF (obsoleted) */
+#define OpCode_ReservedESC1 Make_OpCode_ESC(1)
+#define OpCode_ReservedESC2 Make_OpCode_ESC(2)
+#define OpCode_and Make_OpCode_ESC(3) /* CFF */
+#define OpCode_or Make_OpCode_ESC(4) /* CFF */
+#define OpCode_not Make_OpCode_ESC(5) /* CFF */
+#define OpCode_ReservedESC6 Make_OpCode_ESC(6)
+#define OpCode_ReservedESC7 Make_OpCode_ESC(7)
+#define OpCode_ReservedESC8 Make_OpCode_ESC(8)
+#define OpCode_abs Make_OpCode_ESC(9) /* CFF */
+#define OpCode_add Make_OpCode_ESC(10) /* CFF */
+#define OpCode_sub Make_OpCode_ESC(11) /* CFF */
+#define OpCode_div Make_OpCode_ESC(12) /* CFF */
+#define OpCode_ReservedESC13 Make_OpCode_ESC(13)
+#define OpCode_neg Make_OpCode_ESC(14) /* CFF */
+#define OpCode_eq Make_OpCode_ESC(15) /* CFF */
+#define OpCode_ReservedESC16 Make_OpCode_ESC(16)
+#define OpCode_ReservedESC17 Make_OpCode_ESC(17)
+#define OpCode_drop Make_OpCode_ESC(18) /* CFF */
+#define OpCode_ReservedESC19 Make_OpCode_ESC(19)
+#define OpCode_put Make_OpCode_ESC(20) /* CFF */
+#define OpCode_get Make_OpCode_ESC(21) /* CFF */
+#define OpCode_ifelse Make_OpCode_ESC(22) /* CFF */
+#define OpCode_random Make_OpCode_ESC(23) /* CFF */
+#define OpCode_mul Make_OpCode_ESC(24) /* CFF */
+//#define OpCode_reservedESC25 Make_OpCode_ESC(25)
+#define OpCode_sqrt Make_OpCode_ESC(26) /* CFF */
+#define OpCode_dup Make_OpCode_ESC(27) /* CFF */
+#define OpCode_exch Make_OpCode_ESC(28) /* CFF */
+#define OpCode_index Make_OpCode_ESC(29) /* CFF */
+#define OpCode_roll Make_OpCode_ESC(30) /* CFF */
+#define OpCode_reservedESC31 Make_OpCode_ESC(31)
+#define OpCode_reservedESC32 Make_OpCode_ESC(32)
+#define OpCode_reservedESC33 Make_OpCode_ESC(33)
+#define OpCode_hflex Make_OpCode_ESC(34) /* CFF, CFF2 */
+#define OpCode_flex Make_OpCode_ESC(35) /* CFF, CFF2 */
+#define OpCode_hflex1 Make_OpCode_ESC(36) /* CFF, CFF2 */
+#define OpCode_flex1 Make_OpCode_ESC(37) /* CFF, CFF2 */
+
+
+#define OpCode_Invalid 0xFFFFu
+
+
+struct number_t
+{
+ void init () { set_real (0.0); }
+ void fini () {}
+
+ void set_int (int v) { value = v; }
+ int to_int () const { return value; }
+
+ void set_fixed (int32_t v) { value = v / 65536.0; }
+ int32_t to_fixed () const { return value * 65536.0; }
+
+ void set_real (double v) { value = v; }
+ double to_real () const { return value; }
+
+ bool in_int_range () const
+ { return ((double) (int16_t) to_int () == value); }
+
+ bool operator > (const number_t &n) const { return value > n.to_real (); }
+ bool operator < (const number_t &n) const { return n > *this; }
+ bool operator >= (const number_t &n) const { return !(*this < n); }
+ bool operator <= (const number_t &n) const { return !(*this > n); }
+
+ const number_t &operator += (const number_t &n)
+ {
+ set_real (to_real () + n.to_real ());
+
+ return *this;
+ }
+
+ protected:
+ double value;
+};
+
+/* byte string */
+struct UnsizedByteStr : UnsizedArrayOf <HBUINT8>
+{
+ // encode 2-byte int (Dict/CharString) or 4-byte int (Dict)
+ template <typename T, typename V>
+ static bool serialize_int (hb_serialize_context_t *c, op_code_t intOp, V value)
+ {
+ TRACE_SERIALIZE (this);
+
+ HBUINT8 *p = c->allocate_size<HBUINT8> (1);
+ if (unlikely (!p)) return_trace (false);
+ *p = intOp;
+
+ T *ip = c->allocate_size<T> (T::static_size);
+ if (unlikely (!ip)) return_trace (false);
+ return_trace (c->check_assign (*ip, value));
+ }
+
+ template <typename V>
+ static bool serialize_int4 (hb_serialize_context_t *c, V value)
+ { return serialize_int<HBINT32> (c, OpCode_longintdict, value); }
+
+ template <typename V>
+ static bool serialize_int2 (hb_serialize_context_t *c, V value)
+ { return serialize_int<HBINT16> (c, OpCode_shortint, value); }
+
+ /* Defining null_size allows a Null object may be created. Should be safe because:
+ * A descendent struct Dict uses a Null pointer to indicate a missing table,
+ * checked before access.
+ * byte_str_t, a wrapper struct pairing a byte pointer along with its length, always
+ * checks the length before access. A Null pointer is used as the initial pointer
+ * along with zero length by the default ctor.
+ */
+ DEFINE_SIZE_MIN(0);
+};
+
+/* Holder of a section of byte string within a CFFIndex entry */
+struct byte_str_t : hb_ubytes_t
+{
+ byte_str_t ()
+ : hb_ubytes_t () {}
+ byte_str_t (const UnsizedByteStr& s, unsigned int l)
+ : hb_ubytes_t ((const unsigned char*)&s, l) {}
+ byte_str_t (const unsigned char *s, unsigned int l)
+ : hb_ubytes_t (s, l) {}
+ byte_str_t (const hb_ubytes_t &ub) /* conversion from hb_ubytes_t */
+ : hb_ubytes_t (ub) {}
+
+ /* sub-string */
+ byte_str_t sub_str (unsigned int offset, unsigned int len_) const
+ { return byte_str_t (hb_ubytes_t::sub_array (offset, len_)); }
+
+ bool check_limit (unsigned int offset, unsigned int count) const
+ { return (offset + count <= length); }
+};
+
+/* A byte string associated with the current offset and an error condition */
+struct byte_str_ref_t
+{
+ byte_str_ref_t () { init (); }
+
+ void init ()
+ {
+ str = byte_str_t ();
+ offset = 0;
+ error = false;
+ }
+
+ void fini () {}
+
+ byte_str_ref_t (const byte_str_t &str_, unsigned int offset_ = 0)
+ : str (str_), offset (offset_), error (false) {}
+
+ void reset (const byte_str_t &str_, unsigned int offset_ = 0)
+ {
+ str = str_;
+ offset = offset_;
+ error = false;
+ }
+
+ const unsigned char& operator [] (int i) {
+ if (unlikely ((unsigned int) (offset + i) >= str.length))
+ {
+ set_error ();
+ return Null (unsigned char);
+ }
+ return str[offset + i];
+ }
+
+ /* Conversion to byte_str_t */
+ operator byte_str_t () const { return str.sub_str (offset, str.length - offset); }
+
+ byte_str_t sub_str (unsigned int offset_, unsigned int len_) const
+ { return str.sub_str (offset_, len_); }
+
+ bool avail (unsigned int count=1) const
+ { return (!in_error () && str.check_limit (offset, count)); }
+ void inc (unsigned int count=1)
+ {
+ if (likely (!in_error () && (offset <= str.length) && (offset + count <= str.length)))
+ {
+ offset += count;
+ }
+ else
+ {
+ offset = str.length;
+ set_error ();
+ }
+ }
+
+ void set_error () { error = true; }
+ bool in_error () const { return error; }
+
+ byte_str_t str;
+ unsigned int offset; /* beginning of the sub-string within str */
+
+ protected:
+ bool error;
+};
+
+typedef hb_vector_t<byte_str_t> byte_str_array_t;
+
+/* stack */
+template <typename ELEM, int LIMIT>
+struct cff_stack_t
+{
+ void init ()
+ {
+ error = false;
+ count = 0;
+ elements.init ();
+ elements.resize (kSizeLimit);
+ for (unsigned int i = 0; i < elements.length; i++)
+ elements[i].init ();
+ }
+ void fini () { elements.fini_deep (); }
+
+ ELEM& operator [] (unsigned int i)
+ {
+ if (unlikely (i >= count)) set_error ();
+ return elements[i];
+ }
+
+ void push (const ELEM &v)
+ {
+ if (likely (count < elements.length))
+ elements[count++] = v;
+ else
+ set_error ();
+ }
+ ELEM &push ()
+ {
+ if (likely (count < elements.length))
+ return elements[count++];
+ else
+ {
+ set_error ();
+ return Crap (ELEM);
+ }
+ }
+
+ ELEM& pop ()
+ {
+ if (likely (count > 0))
+ return elements[--count];
+ else
+ {
+ set_error ();
+ return Crap (ELEM);
+ }
+ }
+ void pop (unsigned int n)
+ {
+ if (likely (count >= n))
+ count -= n;
+ else
+ set_error ();
+ }
+
+ const ELEM& peek ()
+ {
+ if (unlikely (count < 0))
+ {
+ set_error ();
+ return Null (ELEM);
+ }
+ return elements[count - 1];
+ }
+
+ void unpop ()
+ {
+ if (likely (count < elements.length))
+ count++;
+ else
+ set_error ();
+ }
+
+ void clear () { count = 0; }
+
+ bool in_error () const { return (error || elements.in_error ()); }
+ void set_error () { error = true; }
+
+ unsigned int get_count () const { return count; }
+ bool is_empty () const { return !count; }
+
+ static constexpr unsigned kSizeLimit = LIMIT;
+
+ protected:
+ bool error;
+ unsigned int count;
+ hb_vector_t<ELEM> elements;
+};
+
+/* argument stack */
+template <typename ARG=number_t>
+struct arg_stack_t : cff_stack_t<ARG, 513>
+{
+ void push_int (int v)
+ {
+ ARG &n = S::push ();
+ n.set_int (v);
+ }
+
+ void push_fixed (int32_t v)
+ {
+ ARG &n = S::push ();
+ n.set_fixed (v);
+ }
+
+ void push_real (double v)
+ {
+ ARG &n = S::push ();
+ n.set_real (v);
+ }
+
+ ARG& pop_num () { return this->pop (); }
+
+ int pop_int () { return this->pop ().to_int (); }
+
+ unsigned int pop_uint ()
+ {
+ int i = pop_int ();
+ if (unlikely (i < 0))
+ {
+ i = 0;
+ S::set_error ();
+ }
+ return (unsigned) i;
+ }
+
+ void push_longint_from_substr (byte_str_ref_t& str_ref)
+ {
+ push_int ((str_ref[0] << 24) | (str_ref[1] << 16) | (str_ref[2] << 8) | (str_ref[3]));
+ str_ref.inc (4);
+ }
+
+ bool push_fixed_from_substr (byte_str_ref_t& str_ref)
+ {
+ if (unlikely (!str_ref.avail (4)))
+ return false;
+ push_fixed ((int32_t)*(const HBUINT32*)&str_ref[0]);
+ str_ref.inc (4);
+ return true;
+ }
+
+ hb_array_t<const ARG> get_subarray (unsigned int start) const
+ { return S::elements.sub_array (start); }
+
+ private:
+ typedef cff_stack_t<ARG, 513> S;
+};
+
+/* an operator prefixed by its operands in a byte string */
+struct op_str_t
+{
+ void init () {}
+ void fini () {}
+
+ op_code_t op;
+ byte_str_t str;
+};
+
+/* base of OP_SERIALIZER */
+struct op_serializer_t
+{
+ protected:
+ bool copy_opstr (hb_serialize_context_t *c, const op_str_t& opstr) const
+ {
+ TRACE_SERIALIZE (this);
+
+ HBUINT8 *d = c->allocate_size<HBUINT8> (opstr.str.length);
+ if (unlikely (!d)) return_trace (false);
+ memcpy (d, &opstr.str[0], opstr.str.length);
+ return_trace (true);
+ }
+};
+
+template <typename VAL>
+struct parsed_values_t
+{
+ void init ()
+ {
+ opStart = 0;
+ values.init ();
+ }
+ void fini () { values.fini_deep (); }
+
+ void add_op (op_code_t op, const byte_str_ref_t& str_ref = byte_str_ref_t ())
+ {
+ VAL *val = values.push ();
+ val->op = op;
+ val->str = str_ref.str.sub_str (opStart, str_ref.offset - opStart);
+ opStart = str_ref.offset;
+ }
+
+ void add_op (op_code_t op, const byte_str_ref_t& str_ref, const VAL &v)
+ {
+ VAL *val = values.push (v);
+ val->op = op;
+ val->str = str_ref.sub_str ( opStart, str_ref.offset - opStart);
+ opStart = str_ref.offset;
+ }
+
+ bool has_op (op_code_t op) const
+ {
+ for (unsigned int i = 0; i < get_count (); i++)
+ if (get_value (i).op == op) return true;
+ return false;
+ }
+
+ unsigned get_count () const { return values.length; }
+ const VAL &get_value (unsigned int i) const { return values[i]; }
+ const VAL &operator [] (unsigned int i) const { return get_value (i); }
+
+ unsigned int opStart;
+ hb_vector_t<VAL> values;
+};
+
+template <typename ARG=number_t>
+struct interp_env_t
+{
+ void init (const byte_str_t &str_)
+ {
+ str_ref.reset (str_);
+ argStack.init ();
+ error = false;
+ }
+ void fini () { argStack.fini (); }
+
+ bool in_error () const
+ { return error || str_ref.in_error () || argStack.in_error (); }
+
+ void set_error () { error = true; }
+
+ op_code_t fetch_op ()
+ {
+ op_code_t op = OpCode_Invalid;
+ if (unlikely (!str_ref.avail ()))
+ return OpCode_Invalid;
+ op = (op_code_t)(unsigned char)str_ref[0];
+ if (op == OpCode_escape) {
+ if (unlikely (!str_ref.avail ()))
+ return OpCode_Invalid;
+ op = Make_OpCode_ESC(str_ref[1]);
+ str_ref.inc ();
+ }
+ str_ref.inc ();
+ return op;
+ }
+
+ const ARG& eval_arg (unsigned int i) { return argStack[i]; }
+
+ ARG& pop_arg () { return argStack.pop (); }
+ void pop_n_args (unsigned int n) { argStack.pop (n); }
+
+ void clear_args () { pop_n_args (argStack.get_count ()); }
+
+ byte_str_ref_t
+ str_ref;
+ arg_stack_t<ARG>
+ argStack;
+ protected:
+ bool error;
+};
+
+typedef interp_env_t<> num_interp_env_t;
+
+template <typename ARG=number_t>
+struct opset_t
+{
+ static void process_op (op_code_t op, interp_env_t<ARG>& env)
+ {
+ switch (op) {
+ case OpCode_shortint:
+ env.argStack.push_int ((int16_t)((env.str_ref[0] << 8) | env.str_ref[1]));
+ env.str_ref.inc (2);
+ break;
+
+ case OpCode_TwoBytePosInt0: case OpCode_TwoBytePosInt1:
+ case OpCode_TwoBytePosInt2: case OpCode_TwoBytePosInt3:
+ env.argStack.push_int ((int16_t)((op - OpCode_TwoBytePosInt0) * 256 + env.str_ref[0] + 108));
+ env.str_ref.inc ();
+ break;
+
+ case OpCode_TwoByteNegInt0: case OpCode_TwoByteNegInt1:
+ case OpCode_TwoByteNegInt2: case OpCode_TwoByteNegInt3:
+ env.argStack.push_int ((-(int16_t)(op - OpCode_TwoByteNegInt0) * 256 - env.str_ref[0] - 108));
+ env.str_ref.inc ();
+ break;
+
+ default:
+ /* 1-byte integer */
+ if (likely ((OpCode_OneByteIntFirst <= op) && (op <= OpCode_OneByteIntLast)))
+ {
+ env.argStack.push_int ((int)op - 139);
+ } else {
+ /* invalid unknown operator */
+ env.clear_args ();
+ env.set_error ();
+ }
+ break;
+ }
+ }
+};
+
+template <typename ENV>
+struct interpreter_t
+{
+ ~interpreter_t() { fini (); }
+
+ void fini () { env.fini (); }
+
+ ENV env;
+};
+
+} /* namespace CFF */
+
+#endif /* HB_CFF_INTERP_COMMON_HH */
diff --git a/thirdparty/harfbuzz/src/hb-cff-interp-cs-common.hh b/thirdparty/harfbuzz/src/hb-cff-interp-cs-common.hh
new file mode 100644
index 0000000000..52d778ffe2
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-cff-interp-cs-common.hh
@@ -0,0 +1,911 @@
+/*
+ * Copyright © 2018 Adobe Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Adobe Author(s): Michiharu Ariza
+ */
+#ifndef HB_CFF_INTERP_CS_COMMON_HH
+#define HB_CFF_INTERP_CS_COMMON_HH
+
+#include "hb.hh"
+#include "hb-cff-interp-common.hh"
+
+namespace CFF {
+
+using namespace OT;
+
+enum cs_type_t {
+ CSType_CharString,
+ CSType_GlobalSubr,
+ CSType_LocalSubr
+};
+
+struct call_context_t
+{
+ void init (const byte_str_ref_t substr_=byte_str_ref_t (), cs_type_t type_=CSType_CharString, unsigned int subr_num_=0)
+ {
+ str_ref = substr_;
+ type = type_;
+ subr_num = subr_num_;
+ }
+
+ void fini () {}
+
+ byte_str_ref_t str_ref;
+ cs_type_t type;
+ unsigned int subr_num;
+};
+
+/* call stack */
+const unsigned int kMaxCallLimit = 10;
+struct call_stack_t : cff_stack_t<call_context_t, kMaxCallLimit> {};
+
+template <typename SUBRS>
+struct biased_subrs_t
+{
+ void init (const SUBRS *subrs_)
+ {
+ subrs = subrs_;
+ unsigned int nSubrs = get_count ();
+ if (nSubrs < 1240)
+ bias = 107;
+ else if (nSubrs < 33900)
+ bias = 1131;
+ else
+ bias = 32768;
+ }
+
+ void fini () {}
+
+ unsigned int get_count () const { return subrs ? subrs->count : 0; }
+ unsigned int get_bias () const { return bias; }
+
+ byte_str_t operator [] (unsigned int index) const
+ {
+ if (unlikely (!subrs || index >= subrs->count))
+ return Null (byte_str_t);
+ else
+ return (*subrs)[index];
+ }
+
+ protected:
+ unsigned int bias;
+ const SUBRS *subrs;
+};
+
+struct point_t
+{
+ void init ()
+ {
+ x.init ();
+ y.init ();
+ }
+
+ void set_int (int _x, int _y)
+ {
+ x.set_int (_x);
+ y.set_int (_y);
+ }
+
+ void move_x (const number_t &dx) { x += dx; }
+ void move_y (const number_t &dy) { y += dy; }
+ void move (const number_t &dx, const number_t &dy) { move_x (dx); move_y (dy); }
+ void move (const point_t &d) { move_x (d.x); move_y (d.y); }
+
+ number_t x;
+ number_t y;
+};
+
+template <typename ARG, typename SUBRS>
+struct cs_interp_env_t : interp_env_t<ARG>
+{
+ void init (const byte_str_t &str, const SUBRS *globalSubrs_, const SUBRS *localSubrs_)
+ {
+ interp_env_t<ARG>::init (str);
+
+ context.init (str, CSType_CharString);
+ seen_moveto = true;
+ seen_hintmask = false;
+ hstem_count = 0;
+ vstem_count = 0;
+ hintmask_size = 0;
+ pt.init ();
+ callStack.init ();
+ globalSubrs.init (globalSubrs_);
+ localSubrs.init (localSubrs_);
+ }
+ void fini ()
+ {
+ interp_env_t<ARG>::fini ();
+
+ callStack.fini ();
+ globalSubrs.fini ();
+ localSubrs.fini ();
+ }
+
+ bool in_error () const
+ {
+ return callStack.in_error () || SUPER::in_error ();
+ }
+
+ bool pop_subr_num (const biased_subrs_t<SUBRS>& biasedSubrs, unsigned int &subr_num)
+ {
+ subr_num = 0;
+ int n = SUPER::argStack.pop_int ();
+ n += biasedSubrs.get_bias ();
+ if (unlikely ((n < 0) || ((unsigned int)n >= biasedSubrs.get_count ())))
+ return false;
+
+ subr_num = (unsigned int)n;
+ return true;
+ }
+
+ void call_subr (const biased_subrs_t<SUBRS>& biasedSubrs, cs_type_t type)
+ {
+ unsigned int subr_num = 0;
+
+ if (unlikely (!pop_subr_num (biasedSubrs, subr_num)
+ || callStack.get_count () >= kMaxCallLimit))
+ {
+ SUPER::set_error ();
+ return;
+ }
+ context.str_ref = SUPER::str_ref;
+ callStack.push (context);
+
+ context.init ( biasedSubrs[subr_num], type, subr_num);
+ SUPER::str_ref = context.str_ref;
+ }
+
+ void return_from_subr ()
+ {
+ if (unlikely (SUPER::str_ref.in_error ()))
+ SUPER::set_error ();
+ context = callStack.pop ();
+ SUPER::str_ref = context.str_ref;
+ }
+
+ void determine_hintmask_size ()
+ {
+ if (!seen_hintmask)
+ {
+ vstem_count += SUPER::argStack.get_count() / 2;
+ hintmask_size = (hstem_count + vstem_count + 7) >> 3;
+ seen_hintmask = true;
+ }
+ }
+
+ void set_endchar (bool endchar_flag_) { endchar_flag = endchar_flag_; }
+ bool is_endchar () const { return endchar_flag; }
+
+ const number_t &get_x () const { return pt.x; }
+ const number_t &get_y () const { return pt.y; }
+ const point_t &get_pt () const { return pt; }
+
+ void moveto (const point_t &pt_ ) { pt = pt_; }
+
+ public:
+ call_context_t context;
+ bool endchar_flag;
+ bool seen_moveto;
+ bool seen_hintmask;
+
+ unsigned int hstem_count;
+ unsigned int vstem_count;
+ unsigned int hintmask_size;
+ call_stack_t callStack;
+ biased_subrs_t<SUBRS> globalSubrs;
+ biased_subrs_t<SUBRS> localSubrs;
+
+ private:
+ point_t pt;
+
+ typedef interp_env_t<ARG> SUPER;
+};
+
+template <typename ENV, typename PARAM>
+struct path_procs_null_t
+{
+ static void rmoveto (ENV &env, PARAM& param) {}
+ static void hmoveto (ENV &env, PARAM& param) {}
+ static void vmoveto (ENV &env, PARAM& param) {}
+ static void rlineto (ENV &env, PARAM& param) {}
+ static void hlineto (ENV &env, PARAM& param) {}
+ static void vlineto (ENV &env, PARAM& param) {}
+ static void rrcurveto (ENV &env, PARAM& param) {}
+ static void rcurveline (ENV &env, PARAM& param) {}
+ static void rlinecurve (ENV &env, PARAM& param) {}
+ static void vvcurveto (ENV &env, PARAM& param) {}
+ static void hhcurveto (ENV &env, PARAM& param) {}
+ static void vhcurveto (ENV &env, PARAM& param) {}
+ static void hvcurveto (ENV &env, PARAM& param) {}
+ static void moveto (ENV &env, PARAM& param, const point_t &pt) {}
+ static void line (ENV &env, PARAM& param, const point_t &pt1) {}
+ static void curve (ENV &env, PARAM& param, const point_t &pt1, const point_t &pt2, const point_t &pt3) {}
+ static void hflex (ENV &env, PARAM& param) {}
+ static void flex (ENV &env, PARAM& param) {}
+ static void hflex1 (ENV &env, PARAM& param) {}
+ static void flex1 (ENV &env, PARAM& param) {}
+};
+
+template <typename ARG, typename OPSET, typename ENV, typename PARAM, typename PATH=path_procs_null_t<ENV, PARAM>>
+struct cs_opset_t : opset_t<ARG>
+{
+ static void process_op (op_code_t op, ENV &env, PARAM& param)
+ {
+ switch (op) {
+
+ case OpCode_return:
+ env.return_from_subr ();
+ break;
+ case OpCode_endchar:
+ OPSET::check_width (op, env, param);
+ env.set_endchar (true);
+ OPSET::flush_args_and_op (op, env, param);
+ break;
+
+ case OpCode_fixedcs:
+ env.argStack.push_fixed_from_substr (env.str_ref);
+ break;
+
+ case OpCode_callsubr:
+ env.call_subr (env.localSubrs, CSType_LocalSubr);
+ break;
+
+ case OpCode_callgsubr:
+ env.call_subr (env.globalSubrs, CSType_GlobalSubr);
+ break;
+
+ case OpCode_hstem:
+ case OpCode_hstemhm:
+ OPSET::check_width (op, env, param);
+ OPSET::process_hstem (op, env, param);
+ break;
+ case OpCode_vstem:
+ case OpCode_vstemhm:
+ OPSET::check_width (op, env, param);
+ OPSET::process_vstem (op, env, param);
+ break;
+ case OpCode_hintmask:
+ case OpCode_cntrmask:
+ OPSET::check_width (op, env, param);
+ OPSET::process_hintmask (op, env, param);
+ break;
+ case OpCode_rmoveto:
+ OPSET::check_width (op, env, param);
+ PATH::rmoveto (env, param);
+ OPSET::process_post_move (op, env, param);
+ break;
+ case OpCode_hmoveto:
+ OPSET::check_width (op, env, param);
+ PATH::hmoveto (env, param);
+ OPSET::process_post_move (op, env, param);
+ break;
+ case OpCode_vmoveto:
+ OPSET::check_width (op, env, param);
+ PATH::vmoveto (env, param);
+ OPSET::process_post_move (op, env, param);
+ break;
+ case OpCode_rlineto:
+ PATH::rlineto (env, param);
+ process_post_path (op, env, param);
+ break;
+ case OpCode_hlineto:
+ PATH::hlineto (env, param);
+ process_post_path (op, env, param);
+ break;
+ case OpCode_vlineto:
+ PATH::vlineto (env, param);
+ process_post_path (op, env, param);
+ break;
+ case OpCode_rrcurveto:
+ PATH::rrcurveto (env, param);
+ process_post_path (op, env, param);
+ break;
+ case OpCode_rcurveline:
+ PATH::rcurveline (env, param);
+ process_post_path (op, env, param);
+ break;
+ case OpCode_rlinecurve:
+ PATH::rlinecurve (env, param);
+ process_post_path (op, env, param);
+ break;
+ case OpCode_vvcurveto:
+ PATH::vvcurveto (env, param);
+ process_post_path (op, env, param);
+ break;
+ case OpCode_hhcurveto:
+ PATH::hhcurveto (env, param);
+ process_post_path (op, env, param);
+ break;
+ case OpCode_vhcurveto:
+ PATH::vhcurveto (env, param);
+ process_post_path (op, env, param);
+ break;
+ case OpCode_hvcurveto:
+ PATH::hvcurveto (env, param);
+ process_post_path (op, env, param);
+ break;
+
+ case OpCode_hflex:
+ PATH::hflex (env, param);
+ OPSET::process_post_flex (op, env, param);
+ break;
+
+ case OpCode_flex:
+ PATH::flex (env, param);
+ OPSET::process_post_flex (op, env, param);
+ break;
+
+ case OpCode_hflex1:
+ PATH::hflex1 (env, param);
+ OPSET::process_post_flex (op, env, param);
+ break;
+
+ case OpCode_flex1:
+ PATH::flex1 (env, param);
+ OPSET::process_post_flex (op, env, param);
+ break;
+
+ default:
+ SUPER::process_op (op, env);
+ break;
+ }
+ }
+
+ static void process_hstem (op_code_t op, ENV &env, PARAM& param)
+ {
+ env.hstem_count += env.argStack.get_count () / 2;
+ OPSET::flush_args_and_op (op, env, param);
+ }
+
+ static void process_vstem (op_code_t op, ENV &env, PARAM& param)
+ {
+ env.vstem_count += env.argStack.get_count () / 2;
+ OPSET::flush_args_and_op (op, env, param);
+ }
+
+ static void process_hintmask (op_code_t op, ENV &env, PARAM& param)
+ {
+ env.determine_hintmask_size ();
+ if (likely (env.str_ref.avail (env.hintmask_size)))
+ {
+ OPSET::flush_hintmask (op, env, param);
+ env.str_ref.inc (env.hintmask_size);
+ }
+ }
+
+ static void process_post_flex (op_code_t op, ENV &env, PARAM& param)
+ {
+ OPSET::flush_args_and_op (op, env, param);
+ }
+
+ static void check_width (op_code_t op, ENV &env, PARAM& param)
+ {}
+
+ static void process_post_move (op_code_t op, ENV &env, PARAM& param)
+ {
+ if (!env.seen_moveto)
+ {
+ env.determine_hintmask_size ();
+ env.seen_moveto = true;
+ }
+ OPSET::flush_args_and_op (op, env, param);
+ }
+
+ static void process_post_path (op_code_t op, ENV &env, PARAM& param)
+ {
+ OPSET::flush_args_and_op (op, env, param);
+ }
+
+ static void flush_args_and_op (op_code_t op, ENV &env, PARAM& param)
+ {
+ OPSET::flush_args (env, param);
+ OPSET::flush_op (op, env, param);
+ }
+
+ static void flush_args (ENV &env, PARAM& param)
+ {
+ env.pop_n_args (env.argStack.get_count ());
+ }
+
+ static void flush_op (op_code_t op, ENV &env, PARAM& param)
+ {
+ }
+
+ static void flush_hintmask (op_code_t op, ENV &env, PARAM& param)
+ {
+ OPSET::flush_args_and_op (op, env, param);
+ }
+
+ static bool is_number_op (op_code_t op)
+ {
+ switch (op)
+ {
+ case OpCode_shortint:
+ case OpCode_fixedcs:
+ case OpCode_TwoBytePosInt0: case OpCode_TwoBytePosInt1:
+ case OpCode_TwoBytePosInt2: case OpCode_TwoBytePosInt3:
+ case OpCode_TwoByteNegInt0: case OpCode_TwoByteNegInt1:
+ case OpCode_TwoByteNegInt2: case OpCode_TwoByteNegInt3:
+ return true;
+
+ default:
+ /* 1-byte integer */
+ return (OpCode_OneByteIntFirst <= op) && (op <= OpCode_OneByteIntLast);
+ }
+ }
+
+ protected:
+ typedef opset_t<ARG> SUPER;
+};
+
+template <typename PATH, typename ENV, typename PARAM>
+struct path_procs_t
+{
+ static void rmoveto (ENV &env, PARAM& param)
+ {
+ point_t pt1 = env.get_pt ();
+ const number_t &dy = env.pop_arg ();
+ const number_t &dx = env.pop_arg ();
+ pt1.move (dx, dy);
+ PATH::moveto (env, param, pt1);
+ }
+
+ static void hmoveto (ENV &env, PARAM& param)
+ {
+ point_t pt1 = env.get_pt ();
+ pt1.move_x (env.pop_arg ());
+ PATH::moveto (env, param, pt1);
+ }
+
+ static void vmoveto (ENV &env, PARAM& param)
+ {
+ point_t pt1 = env.get_pt ();
+ pt1.move_y (env.pop_arg ());
+ PATH::moveto (env, param, pt1);
+ }
+
+ static void rlineto (ENV &env, PARAM& param)
+ {
+ for (unsigned int i = 0; i + 2 <= env.argStack.get_count (); i += 2)
+ {
+ point_t pt1 = env.get_pt ();
+ pt1.move (env.eval_arg (i), env.eval_arg (i+1));
+ PATH::line (env, param, pt1);
+ }
+ }
+
+ static void hlineto (ENV &env, PARAM& param)
+ {
+ point_t pt1;
+ unsigned int i = 0;
+ for (; i + 2 <= env.argStack.get_count (); i += 2)
+ {
+ pt1 = env.get_pt ();
+ pt1.move_x (env.eval_arg (i));
+ PATH::line (env, param, pt1);
+ pt1.move_y (env.eval_arg (i+1));
+ PATH::line (env, param, pt1);
+ }
+ if (i < env.argStack.get_count ())
+ {
+ pt1 = env.get_pt ();
+ pt1.move_x (env.eval_arg (i));
+ PATH::line (env, param, pt1);
+ }
+ }
+
+ static void vlineto (ENV &env, PARAM& param)
+ {
+ point_t pt1;
+ unsigned int i = 0;
+ for (; i + 2 <= env.argStack.get_count (); i += 2)
+ {
+ pt1 = env.get_pt ();
+ pt1.move_y (env.eval_arg (i));
+ PATH::line (env, param, pt1);
+ pt1.move_x (env.eval_arg (i+1));
+ PATH::line (env, param, pt1);
+ }
+ if (i < env.argStack.get_count ())
+ {
+ pt1 = env.get_pt ();
+ pt1.move_y (env.eval_arg (i));
+ PATH::line (env, param, pt1);
+ }
+ }
+
+ static void rrcurveto (ENV &env, PARAM& param)
+ {
+ for (unsigned int i = 0; i + 6 <= env.argStack.get_count (); i += 6)
+ {
+ point_t pt1 = env.get_pt ();
+ pt1.move (env.eval_arg (i), env.eval_arg (i+1));
+ point_t pt2 = pt1;
+ pt2.move (env.eval_arg (i+2), env.eval_arg (i+3));
+ point_t pt3 = pt2;
+ pt3.move (env.eval_arg (i+4), env.eval_arg (i+5));
+ PATH::curve (env, param, pt1, pt2, pt3);
+ }
+ }
+
+ static void rcurveline (ENV &env, PARAM& param)
+ {
+ unsigned int arg_count = env.argStack.get_count ();
+ if (unlikely (arg_count < 8))
+ return;
+
+ unsigned int i = 0;
+ unsigned int curve_limit = arg_count - 2;
+ for (; i + 6 <= curve_limit; i += 6)
+ {
+ point_t pt1 = env.get_pt ();
+ pt1.move (env.eval_arg (i), env.eval_arg (i+1));
+ point_t pt2 = pt1;
+ pt2.move (env.eval_arg (i+2), env.eval_arg (i+3));
+ point_t pt3 = pt2;
+ pt3.move (env.eval_arg (i+4), env.eval_arg (i+5));
+ PATH::curve (env, param, pt1, pt2, pt3);
+ }
+
+ point_t pt1 = env.get_pt ();
+ pt1.move (env.eval_arg (i), env.eval_arg (i+1));
+ PATH::line (env, param, pt1);
+ }
+
+ static void rlinecurve (ENV &env, PARAM& param)
+ {
+ unsigned int arg_count = env.argStack.get_count ();
+ if (unlikely (arg_count < 8))
+ return;
+
+ unsigned int i = 0;
+ unsigned int line_limit = arg_count - 6;
+ for (; i + 2 <= line_limit; i += 2)
+ {
+ point_t pt1 = env.get_pt ();
+ pt1.move (env.eval_arg (i), env.eval_arg (i+1));
+ PATH::line (env, param, pt1);
+ }
+
+ point_t pt1 = env.get_pt ();
+ pt1.move (env.eval_arg (i), env.eval_arg (i+1));
+ point_t pt2 = pt1;
+ pt2.move (env.eval_arg (i+2), env.eval_arg (i+3));
+ point_t pt3 = pt2;
+ pt3.move (env.eval_arg (i+4), env.eval_arg (i+5));
+ PATH::curve (env, param, pt1, pt2, pt3);
+ }
+
+ static void vvcurveto (ENV &env, PARAM& param)
+ {
+ unsigned int i = 0;
+ point_t pt1 = env.get_pt ();
+ if ((env.argStack.get_count () & 1) != 0)
+ pt1.move_x (env.eval_arg (i++));
+ for (; i + 4 <= env.argStack.get_count (); i += 4)
+ {
+ pt1.move_y (env.eval_arg (i));
+ point_t pt2 = pt1;
+ pt2.move (env.eval_arg (i+1), env.eval_arg (i+2));
+ point_t pt3 = pt2;
+ pt3.move_y (env.eval_arg (i+3));
+ PATH::curve (env, param, pt1, pt2, pt3);
+ pt1 = env.get_pt ();
+ }
+ }
+
+ static void hhcurveto (ENV &env, PARAM& param)
+ {
+ unsigned int i = 0;
+ point_t pt1 = env.get_pt ();
+ if ((env.argStack.get_count () & 1) != 0)
+ pt1.move_y (env.eval_arg (i++));
+ for (; i + 4 <= env.argStack.get_count (); i += 4)
+ {
+ pt1.move_x (env.eval_arg (i));
+ point_t pt2 = pt1;
+ pt2.move (env.eval_arg (i+1), env.eval_arg (i+2));
+ point_t pt3 = pt2;
+ pt3.move_x (env.eval_arg (i+3));
+ PATH::curve (env, param, pt1, pt2, pt3);
+ pt1 = env.get_pt ();
+ }
+ }
+
+ static void vhcurveto (ENV &env, PARAM& param)
+ {
+ point_t pt1, pt2, pt3;
+ unsigned int i = 0;
+ if ((env.argStack.get_count () % 8) >= 4)
+ {
+ point_t pt1 = env.get_pt ();
+ pt1.move_y (env.eval_arg (i));
+ point_t pt2 = pt1;
+ pt2.move (env.eval_arg (i+1), env.eval_arg (i+2));
+ point_t pt3 = pt2;
+ pt3.move_x (env.eval_arg (i+3));
+ i += 4;
+
+ for (; i + 8 <= env.argStack.get_count (); i += 8)
+ {
+ PATH::curve (env, param, pt1, pt2, pt3);
+ pt1 = env.get_pt ();
+ pt1.move_x (env.eval_arg (i));
+ pt2 = pt1;
+ pt2.move (env.eval_arg (i+1), env.eval_arg (i+2));
+ pt3 = pt2;
+ pt3.move_y (env.eval_arg (i+3));
+ PATH::curve (env, param, pt1, pt2, pt3);
+
+ pt1 = pt3;
+ pt1.move_y (env.eval_arg (i+4));
+ pt2 = pt1;
+ pt2.move (env.eval_arg (i+5), env.eval_arg (i+6));
+ pt3 = pt2;
+ pt3.move_x (env.eval_arg (i+7));
+ }
+ if (i < env.argStack.get_count ())
+ pt3.move_y (env.eval_arg (i));
+ PATH::curve (env, param, pt1, pt2, pt3);
+ }
+ else
+ {
+ for (; i + 8 <= env.argStack.get_count (); i += 8)
+ {
+ pt1 = env.get_pt ();
+ pt1.move_y (env.eval_arg (i));
+ pt2 = pt1;
+ pt2.move (env.eval_arg (i+1), env.eval_arg (i+2));
+ pt3 = pt2;
+ pt3.move_x (env.eval_arg (i+3));
+ PATH::curve (env, param, pt1, pt2, pt3);
+
+ pt1 = pt3;
+ pt1.move_x (env.eval_arg (i+4));
+ pt2 = pt1;
+ pt2.move (env.eval_arg (i+5), env.eval_arg (i+6));
+ pt3 = pt2;
+ pt3.move_y (env.eval_arg (i+7));
+ if ((env.argStack.get_count () - i < 16) && ((env.argStack.get_count () & 1) != 0))
+ pt3.move_x (env.eval_arg (i+8));
+ PATH::curve (env, param, pt1, pt2, pt3);
+ }
+ }
+ }
+
+ static void hvcurveto (ENV &env, PARAM& param)
+ {
+ point_t pt1, pt2, pt3;
+ unsigned int i = 0;
+ if ((env.argStack.get_count () % 8) >= 4)
+ {
+ point_t pt1 = env.get_pt ();
+ pt1.move_x (env.eval_arg (i));
+ point_t pt2 = pt1;
+ pt2.move (env.eval_arg (i+1), env.eval_arg (i+2));
+ point_t pt3 = pt2;
+ pt3.move_y (env.eval_arg (i+3));
+ i += 4;
+
+ for (; i + 8 <= env.argStack.get_count (); i += 8)
+ {
+ PATH::curve (env, param, pt1, pt2, pt3);
+ pt1 = env.get_pt ();
+ pt1.move_y (env.eval_arg (i));
+ pt2 = pt1;
+ pt2.move (env.eval_arg (i+1), env.eval_arg (i+2));
+ pt3 = pt2;
+ pt3.move_x (env.eval_arg (i+3));
+ PATH::curve (env, param, pt1, pt2, pt3);
+
+ pt1 = pt3;
+ pt1.move_x (env.eval_arg (i+4));
+ pt2 = pt1;
+ pt2.move (env.eval_arg (i+5), env.eval_arg (i+6));
+ pt3 = pt2;
+ pt3.move_y (env.eval_arg (i+7));
+ }
+ if (i < env.argStack.get_count ())
+ pt3.move_x (env.eval_arg (i));
+ PATH::curve (env, param, pt1, pt2, pt3);
+ }
+ else
+ {
+ for (; i + 8 <= env.argStack.get_count (); i += 8)
+ {
+ pt1 = env.get_pt ();
+ pt1.move_x (env.eval_arg (i));
+ pt2 = pt1;
+ pt2.move (env.eval_arg (i+1), env.eval_arg (i+2));
+ pt3 = pt2;
+ pt3.move_y (env.eval_arg (i+3));
+ PATH::curve (env, param, pt1, pt2, pt3);
+
+ pt1 = pt3;
+ pt1.move_y (env.eval_arg (i+4));
+ pt2 = pt1;
+ pt2.move (env.eval_arg (i+5), env.eval_arg (i+6));
+ pt3 = pt2;
+ pt3.move_x (env.eval_arg (i+7));
+ if ((env.argStack.get_count () - i < 16) && ((env.argStack.get_count () & 1) != 0))
+ pt3.move_y (env.eval_arg (i+8));
+ PATH::curve (env, param, pt1, pt2, pt3);
+ }
+ }
+ }
+
+ /* default actions to be overridden */
+ static void moveto (ENV &env, PARAM& param, const point_t &pt)
+ { env.moveto (pt); }
+
+ static void line (ENV &env, PARAM& param, const point_t &pt1)
+ { PATH::moveto (env, param, pt1); }
+
+ static void curve (ENV &env, PARAM& param, const point_t &pt1, const point_t &pt2, const point_t &pt3)
+ { PATH::moveto (env, param, pt3); }
+
+ static void hflex (ENV &env, PARAM& param)
+ {
+ if (likely (env.argStack.get_count () == 7))
+ {
+ point_t pt1 = env.get_pt ();
+ pt1.move_x (env.eval_arg (0));
+ point_t pt2 = pt1;
+ pt2.move (env.eval_arg (1), env.eval_arg (2));
+ point_t pt3 = pt2;
+ pt3.move_x (env.eval_arg (3));
+ point_t pt4 = pt3;
+ pt4.move_x (env.eval_arg (4));
+ point_t pt5 = pt4;
+ pt5.move_x (env.eval_arg (5));
+ pt5.y = pt1.y;
+ point_t pt6 = pt5;
+ pt6.move_x (env.eval_arg (6));
+
+ curve2 (env, param, pt1, pt2, pt3, pt4, pt5, pt6);
+ }
+ else
+ env.set_error ();
+ }
+
+ static void flex (ENV &env, PARAM& param)
+ {
+ if (likely (env.argStack.get_count () == 13))
+ {
+ point_t pt1 = env.get_pt ();
+ pt1.move (env.eval_arg (0), env.eval_arg (1));
+ point_t pt2 = pt1;
+ pt2.move (env.eval_arg (2), env.eval_arg (3));
+ point_t pt3 = pt2;
+ pt3.move (env.eval_arg (4), env.eval_arg (5));
+ point_t pt4 = pt3;
+ pt4.move (env.eval_arg (6), env.eval_arg (7));
+ point_t pt5 = pt4;
+ pt5.move (env.eval_arg (8), env.eval_arg (9));
+ point_t pt6 = pt5;
+ pt6.move (env.eval_arg (10), env.eval_arg (11));
+
+ curve2 (env, param, pt1, pt2, pt3, pt4, pt5, pt6);
+ }
+ else
+ env.set_error ();
+ }
+
+ static void hflex1 (ENV &env, PARAM& param)
+ {
+ if (likely (env.argStack.get_count () == 9))
+ {
+ point_t pt1 = env.get_pt ();
+ pt1.move (env.eval_arg (0), env.eval_arg (1));
+ point_t pt2 = pt1;
+ pt2.move (env.eval_arg (2), env.eval_arg (3));
+ point_t pt3 = pt2;
+ pt3.move_x (env.eval_arg (4));
+ point_t pt4 = pt3;
+ pt4.move_x (env.eval_arg (5));
+ point_t pt5 = pt4;
+ pt5.move (env.eval_arg (6), env.eval_arg (7));
+ point_t pt6 = pt5;
+ pt6.move_x (env.eval_arg (8));
+ pt6.y = env.get_pt ().y;
+
+ curve2 (env, param, pt1, pt2, pt3, pt4, pt5, pt6);
+ }
+ else
+ env.set_error ();
+ }
+
+ static void flex1 (ENV &env, PARAM& param)
+ {
+ if (likely (env.argStack.get_count () == 11))
+ {
+ point_t d;
+ d.init ();
+ for (unsigned int i = 0; i < 10; i += 2)
+ d.move (env.eval_arg (i), env.eval_arg (i+1));
+
+ point_t pt1 = env.get_pt ();
+ pt1.move (env.eval_arg (0), env.eval_arg (1));
+ point_t pt2 = pt1;
+ pt2.move (env.eval_arg (2), env.eval_arg (3));
+ point_t pt3 = pt2;
+ pt3.move (env.eval_arg (4), env.eval_arg (5));
+ point_t pt4 = pt3;
+ pt4.move (env.eval_arg (6), env.eval_arg (7));
+ point_t pt5 = pt4;
+ pt5.move (env.eval_arg (8), env.eval_arg (9));
+ point_t pt6 = pt5;
+
+ if (fabs (d.x.to_real ()) > fabs (d.y.to_real ()))
+ {
+ pt6.move_x (env.eval_arg (10));
+ pt6.y = env.get_pt ().y;
+ }
+ else
+ {
+ pt6.x = env.get_pt ().x;
+ pt6.move_y (env.eval_arg (10));
+ }
+
+ curve2 (env, param, pt1, pt2, pt3, pt4, pt5, pt6);
+ }
+ else
+ env.set_error ();
+ }
+
+ protected:
+ static void curve2 (ENV &env, PARAM& param,
+ const point_t &pt1, const point_t &pt2, const point_t &pt3,
+ const point_t &pt4, const point_t &pt5, const point_t &pt6)
+ {
+ PATH::curve (env, param, pt1, pt2, pt3);
+ PATH::curve (env, param, pt4, pt5, pt6);
+ }
+};
+
+template <typename ENV, typename OPSET, typename PARAM>
+struct cs_interpreter_t : interpreter_t<ENV>
+{
+ bool interpret (PARAM& param)
+ {
+ SUPER::env.set_endchar (false);
+
+ for (;;) {
+ OPSET::process_op (SUPER::env.fetch_op (), SUPER::env, param);
+ if (unlikely (SUPER::env.in_error ()))
+ return false;
+ if (SUPER::env.is_endchar ())
+ break;
+ }
+
+ return true;
+ }
+
+ private:
+ typedef interpreter_t<ENV> SUPER;
+};
+
+} /* namespace CFF */
+
+#endif /* HB_CFF_INTERP_CS_COMMON_HH */
diff --git a/thirdparty/harfbuzz/src/hb-cff-interp-dict-common.hh b/thirdparty/harfbuzz/src/hb-cff-interp-dict-common.hh
new file mode 100644
index 0000000000..a520ca3bce
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-cff-interp-dict-common.hh
@@ -0,0 +1,201 @@
+/*
+ * Copyright © 2018 Adobe Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Adobe Author(s): Michiharu Ariza
+ */
+#ifndef HB_CFF_INTERP_DICT_COMMON_HH
+#define HB_CFF_INTERP_DICT_COMMON_HH
+
+#include "hb-cff-interp-common.hh"
+
+namespace CFF {
+
+using namespace OT;
+
+/* an opstr and the parsed out dict value(s) */
+struct dict_val_t : op_str_t
+{
+ void init () { single_val.set_int (0); }
+ void fini () {}
+
+ number_t single_val;
+};
+
+typedef dict_val_t num_dict_val_t;
+
+template <typename VAL> struct dict_values_t : parsed_values_t<VAL> {};
+
+template <typename OPSTR=op_str_t>
+struct top_dict_values_t : dict_values_t<OPSTR>
+{
+ void init ()
+ {
+ dict_values_t<OPSTR>::init ();
+ charStringsOffset = 0;
+ FDArrayOffset = 0;
+ }
+ void fini () { dict_values_t<OPSTR>::fini (); }
+
+ unsigned int charStringsOffset;
+ unsigned int FDArrayOffset;
+};
+
+struct dict_opset_t : opset_t<number_t>
+{
+ static void process_op (op_code_t op, interp_env_t<number_t>& env)
+ {
+ switch (op) {
+ case OpCode_longintdict: /* 5-byte integer */
+ env.argStack.push_longint_from_substr (env.str_ref);
+ break;
+
+ case OpCode_BCD: /* real number */
+ env.argStack.push_real (parse_bcd (env.str_ref));
+ break;
+
+ default:
+ opset_t<number_t>::process_op (op, env);
+ break;
+ }
+ }
+
+ /* Turns CFF's BCD format into strtod understandable string */
+ static double parse_bcd (byte_str_ref_t& str_ref)
+ {
+ if (unlikely (str_ref.in_error ())) return .0;
+
+ enum Nibble { DECIMAL=10, EXP_POS, EXP_NEG, RESERVED, NEG, END };
+
+ char buf[32];
+ unsigned char byte = 0;
+ for (unsigned i = 0, count = 0; count < ARRAY_LENGTH (buf); ++i, ++count)
+ {
+ unsigned nibble;
+ if (!(i & 1))
+ {
+ if (unlikely (!str_ref.avail ())) break;
+
+ byte = str_ref[0];
+ str_ref.inc ();
+ nibble = byte >> 4;
+ }
+ else
+ nibble = byte & 0x0F;
+
+ if (unlikely (nibble == RESERVED)) break;
+ else if (nibble == END)
+ {
+ const char *p = buf;
+ double pv;
+ if (unlikely (!hb_parse_double (&p, p + count, &pv, true/* whole buffer */)))
+ break;
+ return pv;
+ }
+ else
+ {
+ buf[count] = "0123456789.EE?-?"[nibble];
+ if (nibble == EXP_NEG)
+ {
+ ++count;
+ if (unlikely (count == ARRAY_LENGTH (buf))) break;
+ buf[count] = '-';
+ }
+ }
+ }
+
+ str_ref.set_error ();
+ return .0;
+ }
+
+ static bool is_hint_op (op_code_t op)
+ {
+ switch (op)
+ {
+ case OpCode_BlueValues:
+ case OpCode_OtherBlues:
+ case OpCode_FamilyBlues:
+ case OpCode_FamilyOtherBlues:
+ case OpCode_StemSnapH:
+ case OpCode_StemSnapV:
+ case OpCode_StdHW:
+ case OpCode_StdVW:
+ case OpCode_BlueScale:
+ case OpCode_BlueShift:
+ case OpCode_BlueFuzz:
+ case OpCode_ForceBold:
+ case OpCode_LanguageGroup:
+ case OpCode_ExpansionFactor:
+ return true;
+ default:
+ return false;
+ }
+ }
+};
+
+template <typename VAL=op_str_t>
+struct top_dict_opset_t : dict_opset_t
+{
+ static void process_op (op_code_t op, interp_env_t<number_t>& env, top_dict_values_t<VAL> & dictval)
+ {
+ switch (op) {
+ case OpCode_CharStrings:
+ dictval.charStringsOffset = env.argStack.pop_uint ();
+ env.clear_args ();
+ break;
+ case OpCode_FDArray:
+ dictval.FDArrayOffset = env.argStack.pop_uint ();
+ env.clear_args ();
+ break;
+ case OpCode_FontMatrix:
+ env.clear_args ();
+ break;
+ default:
+ dict_opset_t::process_op (op, env);
+ break;
+ }
+ }
+};
+
+template <typename OPSET, typename PARAM, typename ENV=num_interp_env_t>
+struct dict_interpreter_t : interpreter_t<ENV>
+{
+ bool interpret (PARAM& param)
+ {
+ param.init ();
+ while (SUPER::env.str_ref.avail ())
+ {
+ OPSET::process_op (SUPER::env.fetch_op (), SUPER::env, param);
+ if (unlikely (SUPER::env.in_error ()))
+ return false;
+ }
+
+ return true;
+ }
+
+ private:
+ typedef interpreter_t<ENV> SUPER;
+};
+
+} /* namespace CFF */
+
+#endif /* HB_CFF_INTERP_DICT_COMMON_HH */
diff --git a/thirdparty/harfbuzz/src/hb-cff1-interp-cs.hh b/thirdparty/harfbuzz/src/hb-cff1-interp-cs.hh
new file mode 100644
index 0000000000..1c8762c172
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-cff1-interp-cs.hh
@@ -0,0 +1,161 @@
+/*
+ * Copyright © 2018 Adobe Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Adobe Author(s): Michiharu Ariza
+ */
+#ifndef HB_CFF1_INTERP_CS_HH
+#define HB_CFF1_INTERP_CS_HH
+
+#include "hb.hh"
+#include "hb-cff-interp-cs-common.hh"
+
+namespace CFF {
+
+using namespace OT;
+
+typedef biased_subrs_t<CFF1Subrs> cff1_biased_subrs_t;
+
+struct cff1_cs_interp_env_t : cs_interp_env_t<number_t, CFF1Subrs>
+{
+ template <typename ACC>
+ void init (const byte_str_t &str, ACC &acc, unsigned int fd)
+ {
+ SUPER::init (str, acc.globalSubrs, acc.privateDicts[fd].localSubrs);
+ processed_width = false;
+ has_width = false;
+ arg_start = 0;
+ in_seac = false;
+ }
+
+ void fini () { SUPER::fini (); }
+
+ void set_width (bool has_width_)
+ {
+ if (likely (!processed_width && (SUPER::argStack.get_count () > 0)))
+ {
+ if (has_width_)
+ {
+ width = SUPER::argStack[0];
+ has_width = true;
+ arg_start = 1;
+ }
+ }
+ processed_width = true;
+ }
+
+ void clear_args ()
+ {
+ arg_start = 0;
+ SUPER::clear_args ();
+ }
+
+ void set_in_seac (bool _in_seac) { in_seac = _in_seac; }
+
+ bool processed_width;
+ bool has_width;
+ unsigned int arg_start;
+ number_t width;
+ bool in_seac;
+
+ private:
+ typedef cs_interp_env_t<number_t, CFF1Subrs> SUPER;
+};
+
+template <typename OPSET, typename PARAM, typename PATH=path_procs_null_t<cff1_cs_interp_env_t, PARAM>>
+struct cff1_cs_opset_t : cs_opset_t<number_t, OPSET, cff1_cs_interp_env_t, PARAM, PATH>
+{
+ /* PostScript-originated legacy opcodes (OpCode_add etc) are unsupported */
+ /* Type 1-originated deprecated opcodes, seac behavior of endchar and dotsection are supported */
+
+ static void process_op (op_code_t op, cff1_cs_interp_env_t &env, PARAM& param)
+ {
+ switch (op) {
+ case OpCode_dotsection:
+ SUPER::flush_args_and_op (op, env, param);
+ break;
+
+ case OpCode_endchar:
+ OPSET::check_width (op, env, param);
+ if (env.argStack.get_count () >= 4)
+ {
+ OPSET::process_seac (env, param);
+ }
+ OPSET::flush_args_and_op (op, env, param);
+ env.set_endchar (true);
+ break;
+
+ default:
+ SUPER::process_op (op, env, param);
+ }
+ }
+
+ static void check_width (op_code_t op, cff1_cs_interp_env_t &env, PARAM& param)
+ {
+ if (!env.processed_width)
+ {
+ bool has_width = false;
+ switch (op)
+ {
+ case OpCode_endchar:
+ case OpCode_hstem:
+ case OpCode_hstemhm:
+ case OpCode_vstem:
+ case OpCode_vstemhm:
+ case OpCode_hintmask:
+ case OpCode_cntrmask:
+ has_width = ((env.argStack.get_count () & 1) != 0);
+ break;
+ case OpCode_hmoveto:
+ case OpCode_vmoveto:
+ has_width = (env.argStack.get_count () > 1);
+ break;
+ case OpCode_rmoveto:
+ has_width = (env.argStack.get_count () > 2);
+ break;
+ default:
+ return;
+ }
+ env.set_width (has_width);
+ }
+ }
+
+ static void process_seac (cff1_cs_interp_env_t &env, PARAM& param)
+ {
+ }
+
+ static void flush_args (cff1_cs_interp_env_t &env, PARAM& param)
+ {
+ SUPER::flush_args (env, param);
+ env.clear_args (); /* pop off width */
+ }
+
+ private:
+ typedef cs_opset_t<number_t, OPSET, cff1_cs_interp_env_t, PARAM, PATH> SUPER;
+};
+
+template <typename OPSET, typename PARAM>
+struct cff1_cs_interpreter_t : cs_interpreter_t<cff1_cs_interp_env_t, OPSET, PARAM> {};
+
+} /* namespace CFF */
+
+#endif /* HB_CFF1_INTERP_CS_HH */
diff --git a/thirdparty/harfbuzz/src/hb-cff2-interp-cs.hh b/thirdparty/harfbuzz/src/hb-cff2-interp-cs.hh
new file mode 100644
index 0000000000..332ece31cd
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-cff2-interp-cs.hh
@@ -0,0 +1,272 @@
+/*
+ * Copyright © 2018 Adobe Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Adobe Author(s): Michiharu Ariza
+ */
+#ifndef HB_CFF2_INTERP_CS_HH
+#define HB_CFF2_INTERP_CS_HH
+
+#include "hb.hh"
+#include "hb-cff-interp-cs-common.hh"
+
+namespace CFF {
+
+using namespace OT;
+
+struct blend_arg_t : number_t
+{
+ void init ()
+ {
+ number_t::init ();
+ deltas.init ();
+ }
+
+ void fini ()
+ {
+ number_t::fini ();
+ deltas.fini_deep ();
+ }
+
+ void set_int (int v) { reset_blends (); number_t::set_int (v); }
+ void set_fixed (int32_t v) { reset_blends (); number_t::set_fixed (v); }
+ void set_real (double v) { reset_blends (); number_t::set_real (v); }
+
+ void set_blends (unsigned int numValues_, unsigned int valueIndex_,
+ unsigned int numBlends, hb_array_t<const blend_arg_t> blends_)
+ {
+ numValues = numValues_;
+ valueIndex = valueIndex_;
+ deltas.resize (numBlends);
+ for (unsigned int i = 0; i < numBlends; i++)
+ deltas[i] = blends_[i];
+ }
+
+ bool blending () const { return deltas.length > 0; }
+ void reset_blends ()
+ {
+ numValues = valueIndex = 0;
+ deltas.resize (0);
+ }
+
+ unsigned int numValues;
+ unsigned int valueIndex;
+ hb_vector_t<number_t> deltas;
+};
+
+typedef interp_env_t<blend_arg_t> BlendInterpEnv;
+typedef biased_subrs_t<CFF2Subrs> cff2_biased_subrs_t;
+
+struct cff2_cs_interp_env_t : cs_interp_env_t<blend_arg_t, CFF2Subrs>
+{
+ template <typename ACC>
+ void init (const byte_str_t &str, ACC &acc, unsigned int fd,
+ const int *coords_=nullptr, unsigned int num_coords_=0)
+ {
+ SUPER::init (str, acc.globalSubrs, acc.privateDicts[fd].localSubrs);
+
+ coords = coords_;
+ num_coords = num_coords_;
+ varStore = acc.varStore;
+ seen_blend = false;
+ seen_vsindex_ = false;
+ scalars.init ();
+ do_blend = num_coords && coords && varStore->size;
+ set_ivs (acc.privateDicts[fd].ivs);
+ }
+
+ void fini ()
+ {
+ scalars.fini ();
+ SUPER::fini ();
+ }
+
+ op_code_t fetch_op ()
+ {
+ if (this->str_ref.avail ())
+ return SUPER::fetch_op ();
+
+ /* make up return or endchar op */
+ if (this->callStack.is_empty ())
+ return OpCode_endchar;
+ else
+ return OpCode_return;
+ }
+
+ const blend_arg_t& eval_arg (unsigned int i)
+ {
+ blend_arg_t &arg = argStack[i];
+ blend_arg (arg);
+ return arg;
+ }
+
+ const blend_arg_t& pop_arg ()
+ {
+ blend_arg_t &arg = argStack.pop ();
+ blend_arg (arg);
+ return arg;
+ }
+
+ void process_blend ()
+ {
+ if (!seen_blend)
+ {
+ region_count = varStore->varStore.get_region_index_count (get_ivs ());
+ if (do_blend)
+ {
+ if (unlikely (!scalars.resize (region_count)))
+ set_error ();
+ else
+ varStore->varStore.get_scalars (get_ivs (), coords, num_coords,
+ &scalars[0], region_count);
+ }
+ seen_blend = true;
+ }
+ }
+
+ void process_vsindex ()
+ {
+ unsigned int index = argStack.pop_uint ();
+ if (unlikely (seen_vsindex () || seen_blend))
+ {
+ set_error ();
+ }
+ else
+ {
+ set_ivs (index);
+ }
+ seen_vsindex_ = true;
+ }
+
+ unsigned int get_region_count () const { return region_count; }
+ void set_region_count (unsigned int region_count_) { region_count = region_count_; }
+ unsigned int get_ivs () const { return ivs; }
+ void set_ivs (unsigned int ivs_) { ivs = ivs_; }
+ bool seen_vsindex () const { return seen_vsindex_; }
+
+ protected:
+ void blend_arg (blend_arg_t &arg)
+ {
+ if (do_blend && arg.blending ())
+ {
+ if (likely (scalars.length == arg.deltas.length))
+ {
+ double v = arg.to_real ();
+ for (unsigned int i = 0; i < scalars.length; i++)
+ {
+ v += (double)scalars[i] * arg.deltas[i].to_real ();
+ }
+ arg.set_real (v);
+ arg.deltas.resize (0);
+ }
+ }
+ }
+
+ protected:
+ const int *coords;
+ unsigned int num_coords;
+ const CFF2VariationStore *varStore;
+ unsigned int region_count;
+ unsigned int ivs;
+ hb_vector_t<float> scalars;
+ bool do_blend;
+ bool seen_vsindex_;
+ bool seen_blend;
+
+ typedef cs_interp_env_t<blend_arg_t, CFF2Subrs> SUPER;
+};
+template <typename OPSET, typename PARAM, typename PATH=path_procs_null_t<cff2_cs_interp_env_t, PARAM>>
+struct cff2_cs_opset_t : cs_opset_t<blend_arg_t, OPSET, cff2_cs_interp_env_t, PARAM, PATH>
+{
+ static void process_op (op_code_t op, cff2_cs_interp_env_t &env, PARAM& param)
+ {
+ switch (op) {
+ case OpCode_callsubr:
+ case OpCode_callgsubr:
+ /* a subroutine number shoudln't be a blended value */
+ if (unlikely (env.argStack.peek ().blending ()))
+ {
+ env.set_error ();
+ break;
+ }
+ SUPER::process_op (op, env, param);
+ break;
+
+ case OpCode_blendcs:
+ OPSET::process_blend (env, param);
+ break;
+
+ case OpCode_vsindexcs:
+ if (unlikely (env.argStack.peek ().blending ()))
+ {
+ env.set_error ();
+ break;
+ }
+ OPSET::process_vsindex (env, param);
+ break;
+
+ default:
+ SUPER::process_op (op, env, param);
+ }
+ }
+
+ static void process_blend (cff2_cs_interp_env_t &env, PARAM& param)
+ {
+ unsigned int n, k;
+
+ env.process_blend ();
+ k = env.get_region_count ();
+ n = env.argStack.pop_uint ();
+ /* copy the blend values into blend array of the default values */
+ unsigned int start = env.argStack.get_count () - ((k+1) * n);
+ /* let an obvious error case fail, but note CFF2 spec doesn't forbid n==0 */
+ if (unlikely (start > env.argStack.get_count ()))
+ {
+ env.set_error ();
+ return;
+ }
+ for (unsigned int i = 0; i < n; i++)
+ {
+ const hb_array_t<const blend_arg_t> blends = env.argStack.get_subarray (start + n + (i * k));
+ env.argStack[start + i].set_blends (n, i, k, blends);
+ }
+
+ /* pop off blend values leaving default values now adorned with blend values */
+ env.argStack.pop (k * n);
+ }
+
+ static void process_vsindex (cff2_cs_interp_env_t &env, PARAM& param)
+ {
+ env.process_vsindex ();
+ env.clear_args ();
+ }
+
+ private:
+ typedef cs_opset_t<blend_arg_t, OPSET, cff2_cs_interp_env_t, PARAM, PATH> SUPER;
+};
+
+template <typename OPSET, typename PARAM>
+struct cff2_cs_interpreter_t : cs_interpreter_t<cff2_cs_interp_env_t, OPSET, PARAM> {};
+
+} /* namespace CFF */
+
+#endif /* HB_CFF2_INTERP_CS_HH */
diff --git a/thirdparty/harfbuzz/src/hb-common.cc b/thirdparty/harfbuzz/src/hb-common.cc
new file mode 100644
index 0000000000..5acfa78431
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-common.cc
@@ -0,0 +1,1098 @@
+/*
+ * Copyright © 2009,2010 Red Hat, Inc.
+ * Copyright © 2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+#include "hb-machinery.hh"
+
+#include <locale.h>
+
+#ifdef HB_NO_SETLOCALE
+#define setlocale(Category, Locale) "C"
+#endif
+
+/**
+ * SECTION:hb-common
+ * @title: hb-common
+ * @short_description: Common data types
+ * @include: hb.h
+ *
+ * Common data types used across HarfBuzz are defined here.
+ **/
+
+
+/* hb_options_t */
+
+hb_atomic_int_t _hb_options;
+
+void
+_hb_options_init ()
+{
+ hb_options_union_t u;
+ u.i = 0;
+ u.opts.initialized = true;
+
+ const char *c = getenv ("HB_OPTIONS");
+ if (c)
+ {
+ while (*c)
+ {
+ const char *p = strchr (c, ':');
+ if (!p)
+ p = c + strlen (c);
+
+#define OPTION(name, symbol) \
+ if (0 == strncmp (c, name, p - c) && strlen (name) == static_cast<size_t>(p - c)) do { u.opts.symbol = true; } while (0)
+
+ OPTION ("uniscribe-bug-compatible", uniscribe_bug_compatible);
+
+#undef OPTION
+
+ c = *p ? p + 1 : p;
+ }
+
+ }
+
+ /* This is idempotent and threadsafe. */
+ _hb_options.set_relaxed (u.i);
+}
+
+
+/* hb_tag_t */
+
+/**
+ * hb_tag_from_string:
+ * @str: (array length=len) (element-type uint8_t):
+ * @len:
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_tag_t
+hb_tag_from_string (const char *str, int len)
+{
+ char tag[4];
+ unsigned int i;
+
+ if (!str || !len || !*str)
+ return HB_TAG_NONE;
+
+ if (len < 0 || len > 4)
+ len = 4;
+ for (i = 0; i < (unsigned) len && str[i]; i++)
+ tag[i] = str[i];
+ for (; i < 4; i++)
+ tag[i] = ' ';
+
+ return HB_TAG (tag[0], tag[1], tag[2], tag[3]);
+}
+
+/**
+ * hb_tag_to_string:
+ * @tag:
+ * @buf: (out caller-allocates) (array fixed-size=4) (element-type uint8_t):
+ *
+ *
+ *
+ * Since: 0.9.5
+ **/
+void
+hb_tag_to_string (hb_tag_t tag, char *buf)
+{
+ buf[0] = (char) (uint8_t) (tag >> 24);
+ buf[1] = (char) (uint8_t) (tag >> 16);
+ buf[2] = (char) (uint8_t) (tag >> 8);
+ buf[3] = (char) (uint8_t) (tag >> 0);
+}
+
+
+/* hb_direction_t */
+
+const char direction_strings[][4] = {
+ "ltr",
+ "rtl",
+ "ttb",
+ "btt"
+};
+
+/**
+ * hb_direction_from_string:
+ * @str: (array length=len) (element-type uint8_t):
+ * @len:
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_direction_t
+hb_direction_from_string (const char *str, int len)
+{
+ if (unlikely (!str || !len || !*str))
+ return HB_DIRECTION_INVALID;
+
+ /* Lets match loosely: just match the first letter, such that
+ * all of "ltr", "left-to-right", etc work!
+ */
+ char c = TOLOWER (str[0]);
+ for (unsigned int i = 0; i < ARRAY_LENGTH (direction_strings); i++)
+ if (c == direction_strings[i][0])
+ return (hb_direction_t) (HB_DIRECTION_LTR + i);
+
+ return HB_DIRECTION_INVALID;
+}
+
+/**
+ * hb_direction_to_string:
+ * @direction:
+ *
+ *
+ *
+ * Return value: (transfer none):
+ *
+ * Since: 0.9.2
+ **/
+const char *
+hb_direction_to_string (hb_direction_t direction)
+{
+ if (likely ((unsigned int) (direction - HB_DIRECTION_LTR)
+ < ARRAY_LENGTH (direction_strings)))
+ return direction_strings[direction - HB_DIRECTION_LTR];
+
+ return "invalid";
+}
+
+
+/* hb_language_t */
+
+struct hb_language_impl_t {
+ const char s[1];
+};
+
+static const char canon_map[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '-', 0, 0,
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0, 0, 0, 0, 0, 0,
+ 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, '-',
+ 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, 0
+};
+
+static bool
+lang_equal (hb_language_t v1,
+ const void *v2)
+{
+ const unsigned char *p1 = (const unsigned char *) v1;
+ const unsigned char *p2 = (const unsigned char *) v2;
+
+ while (*p1 && *p1 == canon_map[*p2]) {
+ p1++;
+ p2++;
+ }
+
+ return *p1 == canon_map[*p2];
+}
+
+#if 0
+static unsigned int
+lang_hash (const void *key)
+{
+ const unsigned char *p = key;
+ unsigned int h = 0;
+ while (canon_map[*p])
+ {
+ h = (h << 5) - h + canon_map[*p];
+ p++;
+ }
+
+ return h;
+}
+#endif
+
+
+struct hb_language_item_t {
+
+ struct hb_language_item_t *next;
+ hb_language_t lang;
+
+ bool operator == (const char *s) const
+ { return lang_equal (lang, s); }
+
+ hb_language_item_t & operator = (const char *s) {
+ /* If a custom allocated is used calling strdup() pairs
+ badly with a call to the custom free() in fini() below.
+ Therefore don't call strdup(), implement its behavior.
+ */
+ size_t len = strlen(s) + 1;
+ lang = (hb_language_t) malloc(len);
+ if (likely (lang))
+ {
+ memcpy((unsigned char *) lang, s, len);
+ for (unsigned char *p = (unsigned char *) lang; *p; p++)
+ *p = canon_map[*p];
+ }
+
+ return *this;
+ }
+
+ void fini () { free ((void *) lang); }
+};
+
+
+/* Thread-safe lock-free language list */
+
+static hb_atomic_ptr_t <hb_language_item_t> langs;
+
+#if HB_USE_ATEXIT
+static void
+free_langs ()
+{
+retry:
+ hb_language_item_t *first_lang = langs;
+ if (unlikely (!langs.cmpexch (first_lang, nullptr)))
+ goto retry;
+
+ while (first_lang) {
+ hb_language_item_t *next = first_lang->next;
+ first_lang->fini ();
+ free (first_lang);
+ first_lang = next;
+ }
+}
+#endif
+
+static hb_language_item_t *
+lang_find_or_insert (const char *key)
+{
+retry:
+ hb_language_item_t *first_lang = langs;
+
+ for (hb_language_item_t *lang = first_lang; lang; lang = lang->next)
+ if (*lang == key)
+ return lang;
+
+ /* Not found; allocate one. */
+ hb_language_item_t *lang = (hb_language_item_t *) calloc (1, sizeof (hb_language_item_t));
+ if (unlikely (!lang))
+ return nullptr;
+ lang->next = first_lang;
+ *lang = key;
+ if (unlikely (!lang->lang))
+ {
+ free (lang);
+ return nullptr;
+ }
+
+ if (unlikely (!langs.cmpexch (first_lang, lang)))
+ {
+ lang->fini ();
+ free (lang);
+ goto retry;
+ }
+
+#if HB_USE_ATEXIT
+ if (!first_lang)
+ atexit (free_langs); /* First person registers atexit() callback. */
+#endif
+
+ return lang;
+}
+
+
+/**
+ * hb_language_from_string:
+ * @str: (array length=len) (element-type uint8_t): a string representing
+ * a BCP 47 language tag
+ * @len: length of the @str, or -1 if it is %NULL-terminated.
+ *
+ * Converts @str representing a BCP 47 language tag to the corresponding
+ * #hb_language_t.
+ *
+ * Return value: (transfer none):
+ * The #hb_language_t corresponding to the BCP 47 language tag.
+ *
+ * Since: 0.9.2
+ **/
+hb_language_t
+hb_language_from_string (const char *str, int len)
+{
+ if (!str || !len || !*str)
+ return HB_LANGUAGE_INVALID;
+
+ hb_language_item_t *item = nullptr;
+ if (len >= 0)
+ {
+ /* NUL-terminate it. */
+ char strbuf[64];
+ len = hb_min (len, (int) sizeof (strbuf) - 1);
+ memcpy (strbuf, str, len);
+ strbuf[len] = '\0';
+ item = lang_find_or_insert (strbuf);
+ }
+ else
+ item = lang_find_or_insert (str);
+
+ return likely (item) ? item->lang : HB_LANGUAGE_INVALID;
+}
+
+/**
+ * hb_language_to_string:
+ * @language: an #hb_language_t to convert.
+ *
+ * See hb_language_from_string().
+ *
+ * Return value: (transfer none):
+ * A %NULL-terminated string representing the @language. Must not be freed by
+ * the caller.
+ *
+ * Since: 0.9.2
+ **/
+const char *
+hb_language_to_string (hb_language_t language)
+{
+ if (unlikely (!language)) return nullptr;
+
+ return language->s;
+}
+
+/**
+ * hb_language_get_default:
+ *
+ * Get default language from current locale.
+ *
+ * Note that the first time this function is called, it calls
+ * "setlocale (LC_CTYPE, nullptr)" to fetch current locale. The underlying
+ * setlocale function is, in many implementations, NOT threadsafe. To avoid
+ * problems, call this function once before multiple threads can call it.
+ * This function is only used from hb_buffer_guess_segment_properties() by
+ * HarfBuzz itself.
+ *
+ * Return value: (transfer none):
+ *
+ * Since: 0.9.2
+ **/
+hb_language_t
+hb_language_get_default ()
+{
+ static hb_atomic_ptr_t <hb_language_t> default_language;
+
+ hb_language_t language = default_language;
+ if (unlikely (language == HB_LANGUAGE_INVALID))
+ {
+ language = hb_language_from_string (setlocale (LC_CTYPE, nullptr), -1);
+ (void) default_language.cmpexch (HB_LANGUAGE_INVALID, language);
+ }
+
+ return language;
+}
+
+
+/* hb_script_t */
+
+/**
+ * hb_script_from_iso15924_tag:
+ * @tag: an #hb_tag_t representing an ISO 15924 tag.
+ *
+ * Converts an ISO 15924 script tag to a corresponding #hb_script_t.
+ *
+ * Return value:
+ * An #hb_script_t corresponding to the ISO 15924 tag.
+ *
+ * Since: 0.9.2
+ **/
+hb_script_t
+hb_script_from_iso15924_tag (hb_tag_t tag)
+{
+ if (unlikely (tag == HB_TAG_NONE))
+ return HB_SCRIPT_INVALID;
+
+ /* Be lenient, adjust case (one capital letter followed by three small letters) */
+ tag = (tag & 0xDFDFDFDFu) | 0x00202020u;
+
+ switch (tag) {
+
+ /* These graduated from the 'Q' private-area codes, but
+ * the old code is still aliased by Unicode, and the Qaai
+ * one in use by ICU. */
+ case HB_TAG('Q','a','a','i'): return HB_SCRIPT_INHERITED;
+ case HB_TAG('Q','a','a','c'): return HB_SCRIPT_COPTIC;
+
+ /* Script variants from https://unicode.org/iso15924/ */
+ case HB_TAG('C','y','r','s'): return HB_SCRIPT_CYRILLIC;
+ case HB_TAG('L','a','t','f'): return HB_SCRIPT_LATIN;
+ case HB_TAG('L','a','t','g'): return HB_SCRIPT_LATIN;
+ case HB_TAG('S','y','r','e'): return HB_SCRIPT_SYRIAC;
+ case HB_TAG('S','y','r','j'): return HB_SCRIPT_SYRIAC;
+ case HB_TAG('S','y','r','n'): return HB_SCRIPT_SYRIAC;
+ }
+
+ /* If it looks right, just use the tag as a script */
+ if (((uint32_t) tag & 0xE0E0E0E0u) == 0x40606060u)
+ return (hb_script_t) tag;
+
+ /* Otherwise, return unknown */
+ return HB_SCRIPT_UNKNOWN;
+}
+
+/**
+ * hb_script_from_string:
+ * @str: (array length=len) (element-type uint8_t): a string representing an
+ * ISO 15924 tag.
+ * @len: length of the @str, or -1 if it is %NULL-terminated.
+ *
+ * Converts a string @str representing an ISO 15924 script tag to a
+ * corresponding #hb_script_t. Shorthand for hb_tag_from_string() then
+ * hb_script_from_iso15924_tag().
+ *
+ * Return value:
+ * An #hb_script_t corresponding to the ISO 15924 tag.
+ *
+ * Since: 0.9.2
+ **/
+hb_script_t
+hb_script_from_string (const char *str, int len)
+{
+ return hb_script_from_iso15924_tag (hb_tag_from_string (str, len));
+}
+
+/**
+ * hb_script_to_iso15924_tag:
+ * @script: an #hb_script_t to convert.
+ *
+ * See hb_script_from_iso15924_tag().
+ *
+ * Return value:
+ * An #hb_tag_t representing an ISO 15924 script tag.
+ *
+ * Since: 0.9.2
+ **/
+hb_tag_t
+hb_script_to_iso15924_tag (hb_script_t script)
+{
+ return (hb_tag_t) script;
+}
+
+/**
+ * hb_script_get_horizontal_direction:
+ * @script:
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_direction_t
+hb_script_get_horizontal_direction (hb_script_t script)
+{
+ /* https://docs.google.com/spreadsheets/d/1Y90M0Ie3MUJ6UVCRDOypOtijlMDLNNyyLk36T6iMu0o */
+ switch ((hb_tag_t) script)
+ {
+ /* Unicode-1.1 additions */
+ case HB_SCRIPT_ARABIC:
+ case HB_SCRIPT_HEBREW:
+
+ /* Unicode-3.0 additions */
+ case HB_SCRIPT_SYRIAC:
+ case HB_SCRIPT_THAANA:
+
+ /* Unicode-4.0 additions */
+ case HB_SCRIPT_CYPRIOT:
+
+ /* Unicode-4.1 additions */
+ case HB_SCRIPT_KHAROSHTHI:
+
+ /* Unicode-5.0 additions */
+ case HB_SCRIPT_PHOENICIAN:
+ case HB_SCRIPT_NKO:
+
+ /* Unicode-5.1 additions */
+ case HB_SCRIPT_LYDIAN:
+
+ /* Unicode-5.2 additions */
+ case HB_SCRIPT_AVESTAN:
+ case HB_SCRIPT_IMPERIAL_ARAMAIC:
+ case HB_SCRIPT_INSCRIPTIONAL_PAHLAVI:
+ case HB_SCRIPT_INSCRIPTIONAL_PARTHIAN:
+ case HB_SCRIPT_OLD_SOUTH_ARABIAN:
+ case HB_SCRIPT_OLD_TURKIC:
+ case HB_SCRIPT_SAMARITAN:
+
+ /* Unicode-6.0 additions */
+ case HB_SCRIPT_MANDAIC:
+
+ /* Unicode-6.1 additions */
+ case HB_SCRIPT_MEROITIC_CURSIVE:
+ case HB_SCRIPT_MEROITIC_HIEROGLYPHS:
+
+ /* Unicode-7.0 additions */
+ case HB_SCRIPT_MANICHAEAN:
+ case HB_SCRIPT_MENDE_KIKAKUI:
+ case HB_SCRIPT_NABATAEAN:
+ case HB_SCRIPT_OLD_NORTH_ARABIAN:
+ case HB_SCRIPT_PALMYRENE:
+ case HB_SCRIPT_PSALTER_PAHLAVI:
+
+ /* Unicode-8.0 additions */
+ case HB_SCRIPT_HATRAN:
+
+ /* Unicode-9.0 additions */
+ case HB_SCRIPT_ADLAM:
+
+ /* Unicode-11.0 additions */
+ case HB_SCRIPT_HANIFI_ROHINGYA:
+ case HB_SCRIPT_OLD_SOGDIAN:
+ case HB_SCRIPT_SOGDIAN:
+
+ /* Unicode-12.0 additions */
+ case HB_SCRIPT_ELYMAIC:
+
+ /* Unicode-13.0 additions */
+ case HB_SCRIPT_CHORASMIAN:
+ case HB_SCRIPT_YEZIDI:
+
+ return HB_DIRECTION_RTL;
+
+
+ /* https://github.com/harfbuzz/harfbuzz/issues/1000 */
+ case HB_SCRIPT_OLD_HUNGARIAN:
+ case HB_SCRIPT_OLD_ITALIC:
+ case HB_SCRIPT_RUNIC:
+
+ return HB_DIRECTION_INVALID;
+ }
+
+ return HB_DIRECTION_LTR;
+}
+
+
+/* hb_version */
+
+
+/**
+ * SECTION:hb-version
+ * @title: hb-version
+ * @short_description: Information about the version of HarfBuzz in use
+ * @include: hb.h
+ *
+ * These functions and macros allow accessing version of the HarfBuzz
+ * library used at compile- as well as run-time, and to direct code
+ * conditionally based on those versions, again, at compile- or run-time.
+ **/
+
+
+/**
+ * hb_version:
+ * @major: (out): Library major version component.
+ * @minor: (out): Library minor version component.
+ * @micro: (out): Library micro version component.
+ *
+ * Returns library version as three integer components.
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_version (unsigned int *major,
+ unsigned int *minor,
+ unsigned int *micro)
+{
+ *major = HB_VERSION_MAJOR;
+ *minor = HB_VERSION_MINOR;
+ *micro = HB_VERSION_MICRO;
+}
+
+/**
+ * hb_version_string:
+ *
+ * Returns library version as a string with three components.
+ *
+ * Return value: library version string.
+ *
+ * Since: 0.9.2
+ **/
+const char *
+hb_version_string ()
+{
+ return HB_VERSION_STRING;
+}
+
+/**
+ * hb_version_atleast:
+ * @major:
+ * @minor:
+ * @micro:
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.30
+ **/
+hb_bool_t
+hb_version_atleast (unsigned int major,
+ unsigned int minor,
+ unsigned int micro)
+{
+ return HB_VERSION_ATLEAST (major, minor, micro);
+}
+
+
+
+/* hb_feature_t and hb_variation_t */
+
+static bool
+parse_space (const char **pp, const char *end)
+{
+ while (*pp < end && ISSPACE (**pp))
+ (*pp)++;
+ return true;
+}
+
+static bool
+parse_char (const char **pp, const char *end, char c)
+{
+ parse_space (pp, end);
+
+ if (*pp == end || **pp != c)
+ return false;
+
+ (*pp)++;
+ return true;
+}
+
+static bool
+parse_uint (const char **pp, const char *end, unsigned int *pv)
+{
+ /* Intentionally use hb_parse_int inside instead of hb_parse_uint,
+ * such that -1 turns into "big number"... */
+ int v;
+ if (unlikely (!hb_parse_int (pp, end, &v))) return false;
+
+ *pv = v;
+ return true;
+}
+
+static bool
+parse_uint32 (const char **pp, const char *end, uint32_t *pv)
+{
+ /* Intentionally use hb_parse_int inside instead of hb_parse_uint,
+ * such that -1 turns into "big number"... */
+ int v;
+ if (unlikely (!hb_parse_int (pp, end, &v))) return false;
+
+ *pv = v;
+ return true;
+}
+
+static bool
+parse_bool (const char **pp, const char *end, uint32_t *pv)
+{
+ parse_space (pp, end);
+
+ const char *p = *pp;
+ while (*pp < end && ISALPHA(**pp))
+ (*pp)++;
+
+ /* CSS allows on/off as aliases 1/0. */
+ if (*pp - p == 2
+ && TOLOWER (p[0]) == 'o'
+ && TOLOWER (p[1]) == 'n')
+ *pv = 1;
+ else if (*pp - p == 3
+ && TOLOWER (p[0]) == 'o'
+ && TOLOWER (p[1]) == 'f'
+ && TOLOWER (p[2]) == 'f')
+ *pv = 0;
+ else
+ return false;
+
+ return true;
+}
+
+/* hb_feature_t */
+
+static bool
+parse_feature_value_prefix (const char **pp, const char *end, hb_feature_t *feature)
+{
+ if (parse_char (pp, end, '-'))
+ feature->value = 0;
+ else {
+ parse_char (pp, end, '+');
+ feature->value = 1;
+ }
+
+ return true;
+}
+
+static bool
+parse_tag (const char **pp, const char *end, hb_tag_t *tag)
+{
+ parse_space (pp, end);
+
+ char quote = 0;
+
+ if (*pp < end && (**pp == '\'' || **pp == '"'))
+ {
+ quote = **pp;
+ (*pp)++;
+ }
+
+ const char *p = *pp;
+ while (*pp < end && (ISALNUM(**pp) || **pp == '_'))
+ (*pp)++;
+
+ if (p == *pp || *pp - p > 4)
+ return false;
+
+ *tag = hb_tag_from_string (p, *pp - p);
+
+ if (quote)
+ {
+ /* CSS expects exactly four bytes. And we only allow quotations for
+ * CSS compatibility. So, enforce the length. */
+ if (*pp - p != 4)
+ return false;
+ if (*pp == end || **pp != quote)
+ return false;
+ (*pp)++;
+ }
+
+ return true;
+}
+
+static bool
+parse_feature_indices (const char **pp, const char *end, hb_feature_t *feature)
+{
+ parse_space (pp, end);
+
+ bool has_start;
+
+ feature->start = HB_FEATURE_GLOBAL_START;
+ feature->end = HB_FEATURE_GLOBAL_END;
+
+ if (!parse_char (pp, end, '['))
+ return true;
+
+ has_start = parse_uint (pp, end, &feature->start);
+
+ if (parse_char (pp, end, ':') || parse_char (pp, end, ';')) {
+ parse_uint (pp, end, &feature->end);
+ } else {
+ if (has_start)
+ feature->end = feature->start + 1;
+ }
+
+ return parse_char (pp, end, ']');
+}
+
+static bool
+parse_feature_value_postfix (const char **pp, const char *end, hb_feature_t *feature)
+{
+ bool had_equal = parse_char (pp, end, '=');
+ bool had_value = parse_uint32 (pp, end, &feature->value) ||
+ parse_bool (pp, end, &feature->value);
+ /* CSS doesn't use equal-sign between tag and value.
+ * If there was an equal-sign, then there *must* be a value.
+ * A value without an equal-sign is ok, but not required. */
+ return !had_equal || had_value;
+}
+
+static bool
+parse_one_feature (const char **pp, const char *end, hb_feature_t *feature)
+{
+ return parse_feature_value_prefix (pp, end, feature) &&
+ parse_tag (pp, end, &feature->tag) &&
+ parse_feature_indices (pp, end, feature) &&
+ parse_feature_value_postfix (pp, end, feature) &&
+ parse_space (pp, end) &&
+ *pp == end;
+}
+
+/**
+ * hb_feature_from_string:
+ * @str: (array length=len) (element-type uint8_t): a string to parse
+ * @len: length of @str, or -1 if string is %NULL terminated
+ * @feature: (out): the #hb_feature_t to initialize with the parsed values
+ *
+ * Parses a string into a #hb_feature_t.
+ *
+ * The format for specifying feature strings follows. All valid CSS
+ * font-feature-settings values other than 'normal' and the global values are
+ * also accepted, though not documented below. CSS string escapes are not
+ * supported.
+ *
+ * The range indices refer to the positions between Unicode characters. The
+ * position before the first character is always 0.
+ *
+ * The format is Python-esque. Here is how it all works:
+ *
+ * <informaltable pgwide='1' align='left' frame='none'>
+ * <tgroup cols='5'>
+ * <thead>
+ * <row><entry>Syntax</entry> <entry>Value</entry> <entry>Start</entry> <entry>End</entry></row>
+ * </thead>
+ * <tbody>
+ * <row><entry>Setting value:</entry></row>
+ * <row><entry>kern</entry> <entry>1</entry> <entry>0</entry> <entry>∞</entry> <entry>Turn feature on</entry></row>
+ * <row><entry>+kern</entry> <entry>1</entry> <entry>0</entry> <entry>∞</entry> <entry>Turn feature on</entry></row>
+ * <row><entry>-kern</entry> <entry>0</entry> <entry>0</entry> <entry>∞</entry> <entry>Turn feature off</entry></row>
+ * <row><entry>kern=0</entry> <entry>0</entry> <entry>0</entry> <entry>∞</entry> <entry>Turn feature off</entry></row>
+ * <row><entry>kern=1</entry> <entry>1</entry> <entry>0</entry> <entry>∞</entry> <entry>Turn feature on</entry></row>
+ * <row><entry>aalt=2</entry> <entry>2</entry> <entry>0</entry> <entry>∞</entry> <entry>Choose 2nd alternate</entry></row>
+ * <row><entry>Setting index:</entry></row>
+ * <row><entry>kern[]</entry> <entry>1</entry> <entry>0</entry> <entry>∞</entry> <entry>Turn feature on</entry></row>
+ * <row><entry>kern[:]</entry> <entry>1</entry> <entry>0</entry> <entry>∞</entry> <entry>Turn feature on</entry></row>
+ * <row><entry>kern[5:]</entry> <entry>1</entry> <entry>5</entry> <entry>∞</entry> <entry>Turn feature on, partial</entry></row>
+ * <row><entry>kern[:5]</entry> <entry>1</entry> <entry>0</entry> <entry>5</entry> <entry>Turn feature on, partial</entry></row>
+ * <row><entry>kern[3:5]</entry> <entry>1</entry> <entry>3</entry> <entry>5</entry> <entry>Turn feature on, range</entry></row>
+ * <row><entry>kern[3]</entry> <entry>1</entry> <entry>3</entry> <entry>3+1</entry> <entry>Turn feature on, single char</entry></row>
+ * <row><entry>Mixing it all:</entry></row>
+ * <row><entry>aalt[3:5]=2</entry> <entry>2</entry> <entry>3</entry> <entry>5</entry> <entry>Turn 2nd alternate on for range</entry></row>
+ * </tbody>
+ * </tgroup>
+ * </informaltable>
+ *
+ * Return value:
+ * %true if @str is successfully parsed, %false otherwise.
+ *
+ * Since: 0.9.5
+ **/
+hb_bool_t
+hb_feature_from_string (const char *str, int len,
+ hb_feature_t *feature)
+{
+ hb_feature_t feat;
+
+ if (len < 0)
+ len = strlen (str);
+
+ if (likely (parse_one_feature (&str, str + len, &feat)))
+ {
+ if (feature)
+ *feature = feat;
+ return true;
+ }
+
+ if (feature)
+ memset (feature, 0, sizeof (*feature));
+ return false;
+}
+
+/**
+ * hb_feature_to_string:
+ * @feature: an #hb_feature_t to convert
+ * @buf: (array length=size) (out): output string
+ * @size: the allocated size of @buf
+ *
+ * Converts a #hb_feature_t into a %NULL-terminated string in the format
+ * understood by hb_feature_from_string(). The client in responsible for
+ * allocating big enough size for @buf, 128 bytes is more than enough.
+ *
+ * Since: 0.9.5
+ **/
+void
+hb_feature_to_string (hb_feature_t *feature,
+ char *buf, unsigned int size)
+{
+ if (unlikely (!size)) return;
+
+ char s[128];
+ unsigned int len = 0;
+ if (feature->value == 0)
+ s[len++] = '-';
+ hb_tag_to_string (feature->tag, s + len);
+ len += 4;
+ while (len && s[len - 1] == ' ')
+ len--;
+ if (feature->start != HB_FEATURE_GLOBAL_START || feature->end != HB_FEATURE_GLOBAL_END)
+ {
+ s[len++] = '[';
+ if (feature->start)
+ len += hb_max (0, snprintf (s + len, ARRAY_LENGTH (s) - len, "%u", feature->start));
+ if (feature->end != feature->start + 1) {
+ s[len++] = ':';
+ if (feature->end != HB_FEATURE_GLOBAL_END)
+ len += hb_max (0, snprintf (s + len, ARRAY_LENGTH (s) - len, "%u", feature->end));
+ }
+ s[len++] = ']';
+ }
+ if (feature->value > 1)
+ {
+ s[len++] = '=';
+ len += hb_max (0, snprintf (s + len, ARRAY_LENGTH (s) - len, "%u", feature->value));
+ }
+ assert (len < ARRAY_LENGTH (s));
+ len = hb_min (len, size - 1);
+ memcpy (buf, s, len);
+ buf[len] = '\0';
+}
+
+/* hb_variation_t */
+
+static bool
+parse_variation_value (const char **pp, const char *end, hb_variation_t *variation)
+{
+ parse_char (pp, end, '='); /* Optional. */
+ double v;
+ if (unlikely (!hb_parse_double (pp, end, &v))) return false;
+
+ variation->value = v;
+ return true;
+}
+
+static bool
+parse_one_variation (const char **pp, const char *end, hb_variation_t *variation)
+{
+ return parse_tag (pp, end, &variation->tag) &&
+ parse_variation_value (pp, end, variation) &&
+ parse_space (pp, end) &&
+ *pp == end;
+}
+
+/**
+ * hb_variation_from_string:
+ *
+ * Since: 1.4.2
+ */
+hb_bool_t
+hb_variation_from_string (const char *str, int len,
+ hb_variation_t *variation)
+{
+ hb_variation_t var;
+
+ if (len < 0)
+ len = strlen (str);
+
+ if (likely (parse_one_variation (&str, str + len, &var)))
+ {
+ if (variation)
+ *variation = var;
+ return true;
+ }
+
+ if (variation)
+ memset (variation, 0, sizeof (*variation));
+ return false;
+}
+
+/**
+ * hb_variation_to_string:
+ *
+ * Since: 1.4.2
+ */
+void
+hb_variation_to_string (hb_variation_t *variation,
+ char *buf, unsigned int size)
+{
+ if (unlikely (!size)) return;
+
+ char s[128];
+ unsigned int len = 0;
+ hb_tag_to_string (variation->tag, s + len);
+ len += 4;
+ while (len && s[len - 1] == ' ')
+ len--;
+ s[len++] = '=';
+ len += hb_max (0, snprintf (s + len, ARRAY_LENGTH (s) - len, "%g", (double) variation->value));
+
+ assert (len < ARRAY_LENGTH (s));
+ len = hb_min (len, size - 1);
+ memcpy (buf, s, len);
+ buf[len] = '\0';
+}
+
+/**
+ * hb_color_get_alpha:
+ * color: a #hb_color_t we are interested in its channels.
+ *
+ * Return value: Alpha channel value of the given color
+ *
+ * Since: 2.1.0
+ */
+uint8_t
+(hb_color_get_alpha) (hb_color_t color)
+{
+ return hb_color_get_alpha (color);
+}
+
+/**
+ * hb_color_get_red:
+ * color: a #hb_color_t we are interested in its channels.
+ *
+ * Return value: Red channel value of the given color
+ *
+ * Since: 2.1.0
+ */
+uint8_t
+(hb_color_get_red) (hb_color_t color)
+{
+ return hb_color_get_red (color);
+}
+
+/**
+ * hb_color_get_green:
+ * color: a #hb_color_t we are interested in its channels.
+ *
+ * Return value: Green channel value of the given color
+ *
+ * Since: 2.1.0
+ */
+uint8_t
+(hb_color_get_green) (hb_color_t color)
+{
+ return hb_color_get_green (color);
+}
+
+/**
+ * hb_color_get_blue:
+ * color: a #hb_color_t we are interested in its channels.
+ *
+ * Return value: Blue channel value of the given color
+ *
+ * Since: 2.1.0
+ */
+uint8_t
+(hb_color_get_blue) (hb_color_t color)
+{
+ return hb_color_get_blue (color);
+}
+
+
+/* If there is no visibility control, then hb-static.cc will NOT
+ * define anything. Instead, we get it to define one set in here
+ * only, so only libharfbuzz.so defines them, not other libs. */
+#ifdef HB_NO_VISIBILITY
+#undef HB_NO_VISIBILITY
+#include "hb-static.cc"
+#define HB_NO_VISIBILITY 1
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-common.h b/thirdparty/harfbuzz/src/hb-common.h
new file mode 100644
index 0000000000..a97a5f5a04
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-common.h
@@ -0,0 +1,513 @@
+/*
+ * Copyright © 2007,2008,2009 Red Hat, Inc.
+ * Copyright © 2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_H_IN
+#error "Include <hb.h> instead."
+#endif
+
+#ifndef HB_COMMON_H
+#define HB_COMMON_H
+
+#ifndef HB_EXTERN
+#define HB_EXTERN extern
+#endif
+
+#ifndef HB_BEGIN_DECLS
+# ifdef __cplusplus
+# define HB_BEGIN_DECLS extern "C" {
+# define HB_END_DECLS }
+# else /* !__cplusplus */
+# define HB_BEGIN_DECLS
+# define HB_END_DECLS
+# endif /* !__cplusplus */
+#endif
+
+#if defined (_SVR4) || defined (SVR4) || defined (__OpenBSD__) || \
+ defined (_sgi) || defined (__sun) || defined (sun) || \
+ defined (__digital__) || defined (__HP_cc)
+# include <inttypes.h>
+#elif defined (_AIX)
+# include <sys/inttypes.h>
+#elif defined (_MSC_VER) && _MSC_VER < 1600
+/* VS 2010 (_MSC_VER 1600) has stdint.h */
+typedef __int8 int8_t;
+typedef unsigned __int8 uint8_t;
+typedef __int16 int16_t;
+typedef unsigned __int16 uint16_t;
+typedef __int32 int32_t;
+typedef unsigned __int32 uint32_t;
+typedef __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+#elif defined (__KERNEL__)
+# include <linux/types.h>
+#else
+# include <stdint.h>
+#endif
+
+#if defined(__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
+#define HB_DEPRECATED __attribute__((__deprecated__))
+#elif defined(_MSC_VER) && (_MSC_VER >= 1300)
+#define HB_DEPRECATED __declspec(deprecated)
+#else
+#define HB_DEPRECATED
+#endif
+
+#if defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))
+#define HB_DEPRECATED_FOR(f) __attribute__((__deprecated__("Use '" #f "' instead")))
+#elif defined(_MSC_FULL_VER) && (_MSC_FULL_VER > 140050320)
+#define HB_DEPRECATED_FOR(f) __declspec(deprecated("is deprecated. Use '" #f "' instead"))
+#else
+#define HB_DEPRECATED_FOR(f) HB_DEPRECATED
+#endif
+
+
+HB_BEGIN_DECLS
+
+
+typedef int hb_bool_t;
+
+typedef uint32_t hb_codepoint_t;
+typedef int32_t hb_position_t;
+typedef uint32_t hb_mask_t;
+
+typedef union _hb_var_int_t {
+ uint32_t u32;
+ int32_t i32;
+ uint16_t u16[2];
+ int16_t i16[2];
+ uint8_t u8[4];
+ int8_t i8[4];
+} hb_var_int_t;
+
+
+/* hb_tag_t */
+
+typedef uint32_t hb_tag_t;
+
+#define HB_TAG(c1,c2,c3,c4) ((hb_tag_t)((((uint32_t)(c1)&0xFF)<<24)|(((uint32_t)(c2)&0xFF)<<16)|(((uint32_t)(c3)&0xFF)<<8)|((uint32_t)(c4)&0xFF)))
+#define HB_UNTAG(tag) (uint8_t)(((tag)>>24)&0xFF), (uint8_t)(((tag)>>16)&0xFF), (uint8_t)(((tag)>>8)&0xFF), (uint8_t)((tag)&0xFF)
+
+#define HB_TAG_NONE HB_TAG(0,0,0,0)
+#define HB_TAG_MAX HB_TAG(0xff,0xff,0xff,0xff)
+#define HB_TAG_MAX_SIGNED HB_TAG(0x7f,0xff,0xff,0xff)
+
+/* len=-1 means str is NUL-terminated. */
+HB_EXTERN hb_tag_t
+hb_tag_from_string (const char *str, int len);
+
+/* buf should have 4 bytes. */
+HB_EXTERN void
+hb_tag_to_string (hb_tag_t tag, char *buf);
+
+
+/**
+ * hb_direction_t:
+ * @HB_DIRECTION_INVALID: Initial, unset direction.
+ * @HB_DIRECTION_LTR: Text is set horizontally from left to right.
+ * @HB_DIRECTION_RTL: Text is set horizontally from right to left.
+ * @HB_DIRECTION_TTB: Text is set vertically from top to bottom.
+ * @HB_DIRECTION_BTT: Text is set vertically from bottom to top.
+ */
+typedef enum {
+ HB_DIRECTION_INVALID = 0,
+ HB_DIRECTION_LTR = 4,
+ HB_DIRECTION_RTL,
+ HB_DIRECTION_TTB,
+ HB_DIRECTION_BTT
+} hb_direction_t;
+
+/* len=-1 means str is NUL-terminated */
+HB_EXTERN hb_direction_t
+hb_direction_from_string (const char *str, int len);
+
+HB_EXTERN const char *
+hb_direction_to_string (hb_direction_t direction);
+
+#define HB_DIRECTION_IS_VALID(dir) ((((unsigned int) (dir)) & ~3U) == 4)
+/* Direction must be valid for the following */
+#define HB_DIRECTION_IS_HORIZONTAL(dir) ((((unsigned int) (dir)) & ~1U) == 4)
+#define HB_DIRECTION_IS_VERTICAL(dir) ((((unsigned int) (dir)) & ~1U) == 6)
+#define HB_DIRECTION_IS_FORWARD(dir) ((((unsigned int) (dir)) & ~2U) == 4)
+#define HB_DIRECTION_IS_BACKWARD(dir) ((((unsigned int) (dir)) & ~2U) == 5)
+#define HB_DIRECTION_REVERSE(dir) ((hb_direction_t) (((unsigned int) (dir)) ^ 1))
+
+
+/* hb_language_t */
+
+typedef const struct hb_language_impl_t *hb_language_t;
+
+HB_EXTERN hb_language_t
+hb_language_from_string (const char *str, int len);
+
+HB_EXTERN const char *
+hb_language_to_string (hb_language_t language);
+
+#define HB_LANGUAGE_INVALID ((hb_language_t) 0)
+
+HB_EXTERN hb_language_t
+hb_language_get_default (void);
+
+
+/* hb_script_t */
+
+/* https://unicode.org/iso15924/ */
+/* https://docs.google.com/spreadsheets/d/1Y90M0Ie3MUJ6UVCRDOypOtijlMDLNNyyLk36T6iMu0o */
+/* Unicode Character Database property: Script (sc) */
+typedef enum
+{
+ /*1.1*/ HB_SCRIPT_COMMON = HB_TAG ('Z','y','y','y'),
+ /*1.1*/ HB_SCRIPT_INHERITED = HB_TAG ('Z','i','n','h'),
+ /*5.0*/ HB_SCRIPT_UNKNOWN = HB_TAG ('Z','z','z','z'),
+
+ /*1.1*/ HB_SCRIPT_ARABIC = HB_TAG ('A','r','a','b'),
+ /*1.1*/ HB_SCRIPT_ARMENIAN = HB_TAG ('A','r','m','n'),
+ /*1.1*/ HB_SCRIPT_BENGALI = HB_TAG ('B','e','n','g'),
+ /*1.1*/ HB_SCRIPT_CYRILLIC = HB_TAG ('C','y','r','l'),
+ /*1.1*/ HB_SCRIPT_DEVANAGARI = HB_TAG ('D','e','v','a'),
+ /*1.1*/ HB_SCRIPT_GEORGIAN = HB_TAG ('G','e','o','r'),
+ /*1.1*/ HB_SCRIPT_GREEK = HB_TAG ('G','r','e','k'),
+ /*1.1*/ HB_SCRIPT_GUJARATI = HB_TAG ('G','u','j','r'),
+ /*1.1*/ HB_SCRIPT_GURMUKHI = HB_TAG ('G','u','r','u'),
+ /*1.1*/ HB_SCRIPT_HANGUL = HB_TAG ('H','a','n','g'),
+ /*1.1*/ HB_SCRIPT_HAN = HB_TAG ('H','a','n','i'),
+ /*1.1*/ HB_SCRIPT_HEBREW = HB_TAG ('H','e','b','r'),
+ /*1.1*/ HB_SCRIPT_HIRAGANA = HB_TAG ('H','i','r','a'),
+ /*1.1*/ HB_SCRIPT_KANNADA = HB_TAG ('K','n','d','a'),
+ /*1.1*/ HB_SCRIPT_KATAKANA = HB_TAG ('K','a','n','a'),
+ /*1.1*/ HB_SCRIPT_LAO = HB_TAG ('L','a','o','o'),
+ /*1.1*/ HB_SCRIPT_LATIN = HB_TAG ('L','a','t','n'),
+ /*1.1*/ HB_SCRIPT_MALAYALAM = HB_TAG ('M','l','y','m'),
+ /*1.1*/ HB_SCRIPT_ORIYA = HB_TAG ('O','r','y','a'),
+ /*1.1*/ HB_SCRIPT_TAMIL = HB_TAG ('T','a','m','l'),
+ /*1.1*/ HB_SCRIPT_TELUGU = HB_TAG ('T','e','l','u'),
+ /*1.1*/ HB_SCRIPT_THAI = HB_TAG ('T','h','a','i'),
+
+ /*2.0*/ HB_SCRIPT_TIBETAN = HB_TAG ('T','i','b','t'),
+
+ /*3.0*/ HB_SCRIPT_BOPOMOFO = HB_TAG ('B','o','p','o'),
+ /*3.0*/ HB_SCRIPT_BRAILLE = HB_TAG ('B','r','a','i'),
+ /*3.0*/ HB_SCRIPT_CANADIAN_SYLLABICS = HB_TAG ('C','a','n','s'),
+ /*3.0*/ HB_SCRIPT_CHEROKEE = HB_TAG ('C','h','e','r'),
+ /*3.0*/ HB_SCRIPT_ETHIOPIC = HB_TAG ('E','t','h','i'),
+ /*3.0*/ HB_SCRIPT_KHMER = HB_TAG ('K','h','m','r'),
+ /*3.0*/ HB_SCRIPT_MONGOLIAN = HB_TAG ('M','o','n','g'),
+ /*3.0*/ HB_SCRIPT_MYANMAR = HB_TAG ('M','y','m','r'),
+ /*3.0*/ HB_SCRIPT_OGHAM = HB_TAG ('O','g','a','m'),
+ /*3.0*/ HB_SCRIPT_RUNIC = HB_TAG ('R','u','n','r'),
+ /*3.0*/ HB_SCRIPT_SINHALA = HB_TAG ('S','i','n','h'),
+ /*3.0*/ HB_SCRIPT_SYRIAC = HB_TAG ('S','y','r','c'),
+ /*3.0*/ HB_SCRIPT_THAANA = HB_TAG ('T','h','a','a'),
+ /*3.0*/ HB_SCRIPT_YI = HB_TAG ('Y','i','i','i'),
+
+ /*3.1*/ HB_SCRIPT_DESERET = HB_TAG ('D','s','r','t'),
+ /*3.1*/ HB_SCRIPT_GOTHIC = HB_TAG ('G','o','t','h'),
+ /*3.1*/ HB_SCRIPT_OLD_ITALIC = HB_TAG ('I','t','a','l'),
+
+ /*3.2*/ HB_SCRIPT_BUHID = HB_TAG ('B','u','h','d'),
+ /*3.2*/ HB_SCRIPT_HANUNOO = HB_TAG ('H','a','n','o'),
+ /*3.2*/ HB_SCRIPT_TAGALOG = HB_TAG ('T','g','l','g'),
+ /*3.2*/ HB_SCRIPT_TAGBANWA = HB_TAG ('T','a','g','b'),
+
+ /*4.0*/ HB_SCRIPT_CYPRIOT = HB_TAG ('C','p','r','t'),
+ /*4.0*/ HB_SCRIPT_LIMBU = HB_TAG ('L','i','m','b'),
+ /*4.0*/ HB_SCRIPT_LINEAR_B = HB_TAG ('L','i','n','b'),
+ /*4.0*/ HB_SCRIPT_OSMANYA = HB_TAG ('O','s','m','a'),
+ /*4.0*/ HB_SCRIPT_SHAVIAN = HB_TAG ('S','h','a','w'),
+ /*4.0*/ HB_SCRIPT_TAI_LE = HB_TAG ('T','a','l','e'),
+ /*4.0*/ HB_SCRIPT_UGARITIC = HB_TAG ('U','g','a','r'),
+
+ /*4.1*/ HB_SCRIPT_BUGINESE = HB_TAG ('B','u','g','i'),
+ /*4.1*/ HB_SCRIPT_COPTIC = HB_TAG ('C','o','p','t'),
+ /*4.1*/ HB_SCRIPT_GLAGOLITIC = HB_TAG ('G','l','a','g'),
+ /*4.1*/ HB_SCRIPT_KHAROSHTHI = HB_TAG ('K','h','a','r'),
+ /*4.1*/ HB_SCRIPT_NEW_TAI_LUE = HB_TAG ('T','a','l','u'),
+ /*4.1*/ HB_SCRIPT_OLD_PERSIAN = HB_TAG ('X','p','e','o'),
+ /*4.1*/ HB_SCRIPT_SYLOTI_NAGRI = HB_TAG ('S','y','l','o'),
+ /*4.1*/ HB_SCRIPT_TIFINAGH = HB_TAG ('T','f','n','g'),
+
+ /*5.0*/ HB_SCRIPT_BALINESE = HB_TAG ('B','a','l','i'),
+ /*5.0*/ HB_SCRIPT_CUNEIFORM = HB_TAG ('X','s','u','x'),
+ /*5.0*/ HB_SCRIPT_NKO = HB_TAG ('N','k','o','o'),
+ /*5.0*/ HB_SCRIPT_PHAGS_PA = HB_TAG ('P','h','a','g'),
+ /*5.0*/ HB_SCRIPT_PHOENICIAN = HB_TAG ('P','h','n','x'),
+
+ /*5.1*/ HB_SCRIPT_CARIAN = HB_TAG ('C','a','r','i'),
+ /*5.1*/ HB_SCRIPT_CHAM = HB_TAG ('C','h','a','m'),
+ /*5.1*/ HB_SCRIPT_KAYAH_LI = HB_TAG ('K','a','l','i'),
+ /*5.1*/ HB_SCRIPT_LEPCHA = HB_TAG ('L','e','p','c'),
+ /*5.1*/ HB_SCRIPT_LYCIAN = HB_TAG ('L','y','c','i'),
+ /*5.1*/ HB_SCRIPT_LYDIAN = HB_TAG ('L','y','d','i'),
+ /*5.1*/ HB_SCRIPT_OL_CHIKI = HB_TAG ('O','l','c','k'),
+ /*5.1*/ HB_SCRIPT_REJANG = HB_TAG ('R','j','n','g'),
+ /*5.1*/ HB_SCRIPT_SAURASHTRA = HB_TAG ('S','a','u','r'),
+ /*5.1*/ HB_SCRIPT_SUNDANESE = HB_TAG ('S','u','n','d'),
+ /*5.1*/ HB_SCRIPT_VAI = HB_TAG ('V','a','i','i'),
+
+ /*5.2*/ HB_SCRIPT_AVESTAN = HB_TAG ('A','v','s','t'),
+ /*5.2*/ HB_SCRIPT_BAMUM = HB_TAG ('B','a','m','u'),
+ /*5.2*/ HB_SCRIPT_EGYPTIAN_HIEROGLYPHS = HB_TAG ('E','g','y','p'),
+ /*5.2*/ HB_SCRIPT_IMPERIAL_ARAMAIC = HB_TAG ('A','r','m','i'),
+ /*5.2*/ HB_SCRIPT_INSCRIPTIONAL_PAHLAVI = HB_TAG ('P','h','l','i'),
+ /*5.2*/ HB_SCRIPT_INSCRIPTIONAL_PARTHIAN = HB_TAG ('P','r','t','i'),
+ /*5.2*/ HB_SCRIPT_JAVANESE = HB_TAG ('J','a','v','a'),
+ /*5.2*/ HB_SCRIPT_KAITHI = HB_TAG ('K','t','h','i'),
+ /*5.2*/ HB_SCRIPT_LISU = HB_TAG ('L','i','s','u'),
+ /*5.2*/ HB_SCRIPT_MEETEI_MAYEK = HB_TAG ('M','t','e','i'),
+ /*5.2*/ HB_SCRIPT_OLD_SOUTH_ARABIAN = HB_TAG ('S','a','r','b'),
+ /*5.2*/ HB_SCRIPT_OLD_TURKIC = HB_TAG ('O','r','k','h'),
+ /*5.2*/ HB_SCRIPT_SAMARITAN = HB_TAG ('S','a','m','r'),
+ /*5.2*/ HB_SCRIPT_TAI_THAM = HB_TAG ('L','a','n','a'),
+ /*5.2*/ HB_SCRIPT_TAI_VIET = HB_TAG ('T','a','v','t'),
+
+ /*6.0*/ HB_SCRIPT_BATAK = HB_TAG ('B','a','t','k'),
+ /*6.0*/ HB_SCRIPT_BRAHMI = HB_TAG ('B','r','a','h'),
+ /*6.0*/ HB_SCRIPT_MANDAIC = HB_TAG ('M','a','n','d'),
+
+ /*6.1*/ HB_SCRIPT_CHAKMA = HB_TAG ('C','a','k','m'),
+ /*6.1*/ HB_SCRIPT_MEROITIC_CURSIVE = HB_TAG ('M','e','r','c'),
+ /*6.1*/ HB_SCRIPT_MEROITIC_HIEROGLYPHS = HB_TAG ('M','e','r','o'),
+ /*6.1*/ HB_SCRIPT_MIAO = HB_TAG ('P','l','r','d'),
+ /*6.1*/ HB_SCRIPT_SHARADA = HB_TAG ('S','h','r','d'),
+ /*6.1*/ HB_SCRIPT_SORA_SOMPENG = HB_TAG ('S','o','r','a'),
+ /*6.1*/ HB_SCRIPT_TAKRI = HB_TAG ('T','a','k','r'),
+
+ /*
+ * Since: 0.9.30
+ */
+ /*7.0*/ HB_SCRIPT_BASSA_VAH = HB_TAG ('B','a','s','s'),
+ /*7.0*/ HB_SCRIPT_CAUCASIAN_ALBANIAN = HB_TAG ('A','g','h','b'),
+ /*7.0*/ HB_SCRIPT_DUPLOYAN = HB_TAG ('D','u','p','l'),
+ /*7.0*/ HB_SCRIPT_ELBASAN = HB_TAG ('E','l','b','a'),
+ /*7.0*/ HB_SCRIPT_GRANTHA = HB_TAG ('G','r','a','n'),
+ /*7.0*/ HB_SCRIPT_KHOJKI = HB_TAG ('K','h','o','j'),
+ /*7.0*/ HB_SCRIPT_KHUDAWADI = HB_TAG ('S','i','n','d'),
+ /*7.0*/ HB_SCRIPT_LINEAR_A = HB_TAG ('L','i','n','a'),
+ /*7.0*/ HB_SCRIPT_MAHAJANI = HB_TAG ('M','a','h','j'),
+ /*7.0*/ HB_SCRIPT_MANICHAEAN = HB_TAG ('M','a','n','i'),
+ /*7.0*/ HB_SCRIPT_MENDE_KIKAKUI = HB_TAG ('M','e','n','d'),
+ /*7.0*/ HB_SCRIPT_MODI = HB_TAG ('M','o','d','i'),
+ /*7.0*/ HB_SCRIPT_MRO = HB_TAG ('M','r','o','o'),
+ /*7.0*/ HB_SCRIPT_NABATAEAN = HB_TAG ('N','b','a','t'),
+ /*7.0*/ HB_SCRIPT_OLD_NORTH_ARABIAN = HB_TAG ('N','a','r','b'),
+ /*7.0*/ HB_SCRIPT_OLD_PERMIC = HB_TAG ('P','e','r','m'),
+ /*7.0*/ HB_SCRIPT_PAHAWH_HMONG = HB_TAG ('H','m','n','g'),
+ /*7.0*/ HB_SCRIPT_PALMYRENE = HB_TAG ('P','a','l','m'),
+ /*7.0*/ HB_SCRIPT_PAU_CIN_HAU = HB_TAG ('P','a','u','c'),
+ /*7.0*/ HB_SCRIPT_PSALTER_PAHLAVI = HB_TAG ('P','h','l','p'),
+ /*7.0*/ HB_SCRIPT_SIDDHAM = HB_TAG ('S','i','d','d'),
+ /*7.0*/ HB_SCRIPT_TIRHUTA = HB_TAG ('T','i','r','h'),
+ /*7.0*/ HB_SCRIPT_WARANG_CITI = HB_TAG ('W','a','r','a'),
+
+ /*8.0*/ HB_SCRIPT_AHOM = HB_TAG ('A','h','o','m'),
+ /*8.0*/ HB_SCRIPT_ANATOLIAN_HIEROGLYPHS = HB_TAG ('H','l','u','w'),
+ /*8.0*/ HB_SCRIPT_HATRAN = HB_TAG ('H','a','t','r'),
+ /*8.0*/ HB_SCRIPT_MULTANI = HB_TAG ('M','u','l','t'),
+ /*8.0*/ HB_SCRIPT_OLD_HUNGARIAN = HB_TAG ('H','u','n','g'),
+ /*8.0*/ HB_SCRIPT_SIGNWRITING = HB_TAG ('S','g','n','w'),
+
+ /*
+ * Since 1.3.0
+ */
+ /*9.0*/ HB_SCRIPT_ADLAM = HB_TAG ('A','d','l','m'),
+ /*9.0*/ HB_SCRIPT_BHAIKSUKI = HB_TAG ('B','h','k','s'),
+ /*9.0*/ HB_SCRIPT_MARCHEN = HB_TAG ('M','a','r','c'),
+ /*9.0*/ HB_SCRIPT_OSAGE = HB_TAG ('O','s','g','e'),
+ /*9.0*/ HB_SCRIPT_TANGUT = HB_TAG ('T','a','n','g'),
+ /*9.0*/ HB_SCRIPT_NEWA = HB_TAG ('N','e','w','a'),
+
+ /*
+ * Since 1.6.0
+ */
+ /*10.0*/HB_SCRIPT_MASARAM_GONDI = HB_TAG ('G','o','n','m'),
+ /*10.0*/HB_SCRIPT_NUSHU = HB_TAG ('N','s','h','u'),
+ /*10.0*/HB_SCRIPT_SOYOMBO = HB_TAG ('S','o','y','o'),
+ /*10.0*/HB_SCRIPT_ZANABAZAR_SQUARE = HB_TAG ('Z','a','n','b'),
+
+ /*
+ * Since 1.8.0
+ */
+ /*11.0*/HB_SCRIPT_DOGRA = HB_TAG ('D','o','g','r'),
+ /*11.0*/HB_SCRIPT_GUNJALA_GONDI = HB_TAG ('G','o','n','g'),
+ /*11.0*/HB_SCRIPT_HANIFI_ROHINGYA = HB_TAG ('R','o','h','g'),
+ /*11.0*/HB_SCRIPT_MAKASAR = HB_TAG ('M','a','k','a'),
+ /*11.0*/HB_SCRIPT_MEDEFAIDRIN = HB_TAG ('M','e','d','f'),
+ /*11.0*/HB_SCRIPT_OLD_SOGDIAN = HB_TAG ('S','o','g','o'),
+ /*11.0*/HB_SCRIPT_SOGDIAN = HB_TAG ('S','o','g','d'),
+
+ /*
+ * Since 2.4.0
+ */
+ /*12.0*/HB_SCRIPT_ELYMAIC = HB_TAG ('E','l','y','m'),
+ /*12.0*/HB_SCRIPT_NANDINAGARI = HB_TAG ('N','a','n','d'),
+ /*12.0*/HB_SCRIPT_NYIAKENG_PUACHUE_HMONG = HB_TAG ('H','m','n','p'),
+ /*12.0*/HB_SCRIPT_WANCHO = HB_TAG ('W','c','h','o'),
+
+ /*
+ * Since 2.6.7
+ */
+ /*13.0*/HB_SCRIPT_CHORASMIAN = HB_TAG ('C','h','r','s'),
+ /*13.0*/HB_SCRIPT_DIVES_AKURU = HB_TAG ('D','i','a','k'),
+ /*13.0*/HB_SCRIPT_KHITAN_SMALL_SCRIPT = HB_TAG ('K','i','t','s'),
+ /*13.0*/HB_SCRIPT_YEZIDI = HB_TAG ('Y','e','z','i'),
+
+ /* No script set. */
+ HB_SCRIPT_INVALID = HB_TAG_NONE,
+
+ /* Dummy values to ensure any hb_tag_t value can be passed/stored as hb_script_t
+ * without risking undefined behavior. We have two, for historical reasons.
+ * HB_TAG_MAX used to be unsigned, but that was invalid Ansi C, so was changed
+ * to _HB_SCRIPT_MAX_VALUE to be equal to HB_TAG_MAX_SIGNED as well.
+ *
+ * See this thread for technicalities:
+ *
+ * https://lists.freedesktop.org/archives/harfbuzz/2014-March/004150.html
+ */
+ _HB_SCRIPT_MAX_VALUE = HB_TAG_MAX_SIGNED, /*< skip >*/
+ _HB_SCRIPT_MAX_VALUE_SIGNED = HB_TAG_MAX_SIGNED /*< skip >*/
+
+} hb_script_t;
+
+
+/* Script functions */
+
+HB_EXTERN hb_script_t
+hb_script_from_iso15924_tag (hb_tag_t tag);
+
+HB_EXTERN hb_script_t
+hb_script_from_string (const char *str, int len);
+
+HB_EXTERN hb_tag_t
+hb_script_to_iso15924_tag (hb_script_t script);
+
+HB_EXTERN hb_direction_t
+hb_script_get_horizontal_direction (hb_script_t script);
+
+
+/* User data */
+
+typedef struct hb_user_data_key_t {
+ /*< private >*/
+ char unused;
+} hb_user_data_key_t;
+
+typedef void (*hb_destroy_func_t) (void *user_data);
+
+
+/* Font features and variations. */
+
+/**
+ * HB_FEATURE_GLOBAL_START
+ *
+ * Since: 2.0.0
+ */
+#define HB_FEATURE_GLOBAL_START 0
+/**
+ * HB_FEATURE_GLOBAL_END
+ *
+ * Since: 2.0.0
+ */
+#define HB_FEATURE_GLOBAL_END ((unsigned int) -1)
+
+/**
+ * hb_feature_t:
+ * @tag: a feature tag
+ * @value: 0 disables the feature, non-zero (usually 1) enables the feature.
+ * For features implemented as lookup type 3 (like 'salt') the @value is a one
+ * based index into the alternates.
+ * @start: the cluster to start applying this feature setting (inclusive).
+ * @end: the cluster to end applying this feature setting (exclusive).
+ *
+ * The #hb_feature_t is the structure that holds information about requested
+ * feature application. The feature will be applied with the given value to all
+ * glyphs which are in clusters between @start (inclusive) and @end (exclusive).
+ * Setting start to @HB_FEATURE_GLOBAL_START and end to @HB_FEATURE_GLOBAL_END
+ * specifies that the feature always applies to the entire buffer.
+ */
+typedef struct hb_feature_t {
+ hb_tag_t tag;
+ uint32_t value;
+ unsigned int start;
+ unsigned int end;
+} hb_feature_t;
+
+HB_EXTERN hb_bool_t
+hb_feature_from_string (const char *str, int len,
+ hb_feature_t *feature);
+
+HB_EXTERN void
+hb_feature_to_string (hb_feature_t *feature,
+ char *buf, unsigned int size);
+
+/**
+ * hb_variation_t:
+ *
+ * Since: 1.4.2
+ */
+typedef struct hb_variation_t {
+ hb_tag_t tag;
+ float value;
+} hb_variation_t;
+
+HB_EXTERN hb_bool_t
+hb_variation_from_string (const char *str, int len,
+ hb_variation_t *variation);
+
+HB_EXTERN void
+hb_variation_to_string (hb_variation_t *variation,
+ char *buf, unsigned int size);
+
+/**
+ * hb_color_t:
+ *
+ * Data type for holding color values.
+ *
+ * Since: 2.1.0
+ */
+typedef uint32_t hb_color_t;
+
+#define HB_COLOR(b,g,r,a) ((hb_color_t) HB_TAG ((b),(g),(r),(a)))
+
+HB_EXTERN uint8_t
+hb_color_get_alpha (hb_color_t color);
+#define hb_color_get_alpha(color) ((color) & 0xFF)
+
+HB_EXTERN uint8_t
+hb_color_get_red (hb_color_t color);
+#define hb_color_get_red(color) (((color) >> 8) & 0xFF)
+
+HB_EXTERN uint8_t
+hb_color_get_green (hb_color_t color);
+#define hb_color_get_green(color) (((color) >> 16) & 0xFF)
+
+HB_EXTERN uint8_t
+hb_color_get_blue (hb_color_t color);
+#define hb_color_get_blue(color) (((color) >> 24) & 0xFF)
+
+HB_END_DECLS
+
+#endif /* HB_COMMON_H */
diff --git a/thirdparty/harfbuzz/src/hb-config.hh b/thirdparty/harfbuzz/src/hb-config.hh
new file mode 100644
index 0000000000..fc8d424bfb
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-config.hh
@@ -0,0 +1,163 @@
+/*
+ * Copyright © 2019 Facebook, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Facebook Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_CONFIG_HH
+#define HB_CONFIG_HH
+
+#if 0 /* Make test happy. */
+#include "hb.hh"
+#endif
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#ifdef HB_TINY
+#define HB_LEAN
+#define HB_MINI
+#define HB_NO_MT
+#define HB_NO_UCD_UNASSIGNED
+#ifndef NDEBUG
+#define NDEBUG
+#endif
+#ifndef __OPTIMIZE_SIZE__
+#define __OPTIMIZE_SIZE__
+#endif
+#endif
+
+#ifdef HB_LEAN
+#define HB_DISABLE_DEPRECATED
+#define HB_NDEBUG
+#define HB_NO_ATEXIT
+#define HB_NO_BUFFER_MESSAGE
+#define HB_NO_BUFFER_SERIALIZE
+#define HB_NO_BITMAP
+#define HB_NO_CFF
+#define HB_NO_COLOR
+#define HB_NO_DRAW
+#define HB_NO_ERRNO
+#define HB_NO_FACE_COLLECT_UNICODES
+#define HB_NO_GETENV
+#define HB_NO_HINTING
+#define HB_NO_LANGUAGE_PRIVATE_SUBTAG
+#define HB_NO_LAYOUT_FEATURE_PARAMS
+#define HB_NO_LAYOUT_COLLECT_GLYPHS
+#define HB_NO_LAYOUT_UNUSED
+#define HB_NO_MATH
+#define HB_NO_META
+#define HB_NO_METRICS
+#define HB_NO_MMAP
+#define HB_NO_NAME
+#define HB_NO_OPEN
+#define HB_NO_SETLOCALE
+#define HB_NO_OT_FONT_GLYPH_NAMES
+#define HB_NO_OT_SHAPE_FRACTIONS
+#define HB_NO_STYLE
+#define HB_NO_SUBSET_LAYOUT
+#define HB_NO_VAR
+#endif
+
+#ifdef HB_MINI
+#define HB_NO_AAT
+#define HB_NO_LEGACY
+#endif
+
+
+/* Closure of options. */
+
+#ifdef HB_DISABLE_DEPRECATED
+#define HB_IF_NOT_DEPRECATED(x)
+#else
+#define HB_IF_NOT_DEPRECATED(x) x
+#endif
+
+#ifdef HB_NO_AAT
+#define HB_NO_OT_NAME_LANGUAGE_AAT
+#define HB_NO_AAT_SHAPE
+#endif
+
+#ifdef HB_NO_BITMAP
+#define HB_NO_OT_FONT_BITMAP
+#endif
+
+#ifdef HB_NO_CFF
+#define HB_NO_OT_FONT_CFF
+#define HB_NO_SUBSET_CFF
+#endif
+
+#ifdef HB_NO_GETENV
+#define HB_NO_UNISCRIBE_BUG_COMPATIBLE
+#endif
+
+#ifdef HB_NO_LEGACY
+#define HB_NO_CMAP_LEGACY_SUBTABLES
+#define HB_NO_FALLBACK_SHAPE
+#define HB_NO_OT_KERN
+#define HB_NO_OT_LAYOUT_BLACKLIST
+#define HB_NO_OT_SHAPE_FALLBACK
+#endif
+
+#ifdef HB_NO_NAME
+#define HB_NO_OT_NAME_LANGUAGE
+#endif
+
+#ifdef HB_NO_OT
+#define HB_NO_OT_FONT
+#define HB_NO_OT_LAYOUT
+#define HB_NO_OT_TAG
+#define HB_NO_OT_SHAPE
+#endif
+
+#ifdef HB_NO_OT_SHAPE
+#define HB_NO_AAT_SHAPE
+#endif
+
+#ifdef HB_NO_OT_SHAPE_FALLBACK
+#define HB_NO_OT_SHAPE_COMPLEX_ARABIC_FALLBACK
+#define HB_NO_OT_SHAPE_COMPLEX_HEBREW_FALLBACK
+#define HB_NO_OT_SHAPE_COMPLEX_THAI_FALLBACK
+#define HB_NO_OT_SHAPE_COMPLEX_VOWEL_CONSTRAINTS
+#endif
+
+#ifdef NDEBUG
+#ifndef HB_NDEBUG
+#define HB_NDEBUG
+#endif
+#endif
+
+#ifdef __OPTIMIZE_SIZE__
+#ifndef HB_OPTIMIZE_SIZE
+#define HB_OPTIMIZE_SIZE
+#endif
+#endif
+
+#ifdef HAVE_CONFIG_OVERRIDE_H
+#include "config-override.h"
+#endif
+
+
+#endif /* HB_CONFIG_HH */
diff --git a/thirdparty/harfbuzz/src/hb-coretext.cc b/thirdparty/harfbuzz/src/hb-coretext.cc
new file mode 100644
index 0000000000..7b6b2bd5ef
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-coretext.cc
@@ -0,0 +1,1194 @@
+/*
+ * Copyright © 2012,2013 Mozilla Foundation.
+ * Copyright © 2012,2013 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Mozilla Author(s): Jonathan Kew
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#ifdef HAVE_CORETEXT
+
+#include "hb-shaper-impl.hh"
+
+#include "hb-coretext.h"
+#include "hb-aat-layout.hh"
+#include <math.h>
+
+
+/**
+ * SECTION:hb-coretext
+ * @title: hb-coretext
+ * @short_description: CoreText integration
+ * @include: hb-coretext.h
+ *
+ * Functions for using HarfBuzz with the CoreText fonts.
+ **/
+
+/* https://developer.apple.com/documentation/coretext/1508745-ctfontcreatewithgraphicsfont */
+#define HB_CORETEXT_DEFAULT_FONT_SIZE 12.f
+
+static void
+release_table_data (void *user_data)
+{
+ CFDataRef cf_data = reinterpret_cast<CFDataRef> (user_data);
+ CFRelease(cf_data);
+}
+
+static hb_blob_t *
+_hb_cg_reference_table (hb_face_t *face HB_UNUSED, hb_tag_t tag, void *user_data)
+{
+ CGFontRef cg_font = reinterpret_cast<CGFontRef> (user_data);
+ CFDataRef cf_data = CGFontCopyTableForTag (cg_font, tag);
+ if (unlikely (!cf_data))
+ return nullptr;
+
+ const char *data = reinterpret_cast<const char*> (CFDataGetBytePtr (cf_data));
+ const size_t length = CFDataGetLength (cf_data);
+ if (!data || !length)
+ {
+ CFRelease (cf_data);
+ return nullptr;
+ }
+
+ return hb_blob_create (data, length, HB_MEMORY_MODE_READONLY,
+ reinterpret_cast<void *> (const_cast<__CFData *> (cf_data)),
+ release_table_data);
+}
+
+static void
+_hb_cg_font_release (void *data)
+{
+ CGFontRelease ((CGFontRef) data);
+}
+
+
+static CTFontDescriptorRef
+get_last_resort_font_desc ()
+{
+ // TODO Handle allocation failures?
+ CTFontDescriptorRef last_resort = CTFontDescriptorCreateWithNameAndSize (CFSTR("LastResort"), 0);
+ CFArrayRef cascade_list = CFArrayCreate (kCFAllocatorDefault,
+ (const void **) &last_resort,
+ 1,
+ &kCFTypeArrayCallBacks);
+ CFRelease (last_resort);
+ CFDictionaryRef attributes = CFDictionaryCreate (kCFAllocatorDefault,
+ (const void **) &kCTFontCascadeListAttribute,
+ (const void **) &cascade_list,
+ 1,
+ &kCFTypeDictionaryKeyCallBacks,
+ &kCFTypeDictionaryValueCallBacks);
+ CFRelease (cascade_list);
+
+ CTFontDescriptorRef font_desc = CTFontDescriptorCreateWithAttributes (attributes);
+ CFRelease (attributes);
+ return font_desc;
+}
+
+static void
+release_data (void *info, const void *data, size_t size)
+{
+ assert (hb_blob_get_length ((hb_blob_t *) info) == size &&
+ hb_blob_get_data ((hb_blob_t *) info, nullptr) == data);
+
+ hb_blob_destroy ((hb_blob_t *) info);
+}
+
+static CGFontRef
+create_cg_font (hb_face_t *face)
+{
+ CGFontRef cg_font = nullptr;
+ if (face->destroy == _hb_cg_font_release)
+ {
+ cg_font = CGFontRetain ((CGFontRef) face->user_data);
+ }
+ else
+ {
+ hb_blob_t *blob = hb_face_reference_blob (face);
+ unsigned int blob_length;
+ const char *blob_data = hb_blob_get_data (blob, &blob_length);
+ if (unlikely (!blob_length))
+ DEBUG_MSG (CORETEXT, face, "Face has empty blob");
+
+ CGDataProviderRef provider = CGDataProviderCreateWithData (blob, blob_data, blob_length, &release_data);
+ if (likely (provider))
+ {
+ cg_font = CGFontCreateWithDataProvider (provider);
+ if (unlikely (!cg_font))
+ DEBUG_MSG (CORETEXT, face, "Face CGFontCreateWithDataProvider() failed");
+ CGDataProviderRelease (provider);
+ }
+ }
+ return cg_font;
+}
+
+static CTFontRef
+create_ct_font (CGFontRef cg_font, CGFloat font_size)
+{
+ CTFontRef ct_font = nullptr;
+
+ /* CoreText does not enable trak table usage / tracking when creating a CTFont
+ * using CTFontCreateWithGraphicsFont. The only way of enabling tracking seems
+ * to be through the CTFontCreateUIFontForLanguage call. */
+ CFStringRef cg_postscript_name = CGFontCopyPostScriptName (cg_font);
+ if (CFStringHasPrefix (cg_postscript_name, CFSTR (".SFNSText")) ||
+ CFStringHasPrefix (cg_postscript_name, CFSTR (".SFNSDisplay")))
+ {
+#if !(defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE) && MAC_OS_X_VERSION_MIN_REQUIRED < 1080
+# define kCTFontUIFontSystem kCTFontSystemFontType
+# define kCTFontUIFontEmphasizedSystem kCTFontEmphasizedSystemFontType
+#endif
+ CTFontUIFontType font_type = kCTFontUIFontSystem;
+ if (CFStringHasSuffix (cg_postscript_name, CFSTR ("-Bold")))
+ font_type = kCTFontUIFontEmphasizedSystem;
+
+ ct_font = CTFontCreateUIFontForLanguage (font_type, font_size, nullptr);
+ CFStringRef ct_result_name = CTFontCopyPostScriptName(ct_font);
+ if (CFStringCompare (ct_result_name, cg_postscript_name, 0) != kCFCompareEqualTo)
+ {
+ CFRelease(ct_font);
+ ct_font = nullptr;
+ }
+ CFRelease (ct_result_name);
+ }
+ CFRelease (cg_postscript_name);
+
+ if (!ct_font)
+ ct_font = CTFontCreateWithGraphicsFont (cg_font, font_size, nullptr, nullptr);
+
+ if (unlikely (!ct_font)) {
+ DEBUG_MSG (CORETEXT, cg_font, "Font CTFontCreateWithGraphicsFont() failed");
+ return nullptr;
+ }
+
+ /* crbug.com/576941 and crbug.com/625902 and the investigation in the latter
+ * bug indicate that the cascade list reconfiguration occasionally causes
+ * crashes in CoreText on OS X 10.9, thus let's skip this step on older
+ * operating system versions. Except for the emoji font, where _not_
+ * reconfiguring the cascade list causes CoreText crashes. For details, see
+ * crbug.com/549610 */
+ // 0x00070000 stands for "kCTVersionNumber10_10", see CoreText.h
+ if (&CTGetCoreTextVersion != nullptr && CTGetCoreTextVersion() < 0x00070000) {
+ CFStringRef fontName = CTFontCopyPostScriptName (ct_font);
+ bool isEmojiFont = CFStringCompare (fontName, CFSTR("AppleColorEmoji"), 0) == kCFCompareEqualTo;
+ CFRelease (fontName);
+ if (!isEmojiFont)
+ return ct_font;
+ }
+
+ CFURLRef original_url = nullptr;
+#if !(defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE) && MAC_OS_X_VERSION_MIN_REQUIRED < 1060
+ ATSFontRef atsFont;
+ FSRef fsref;
+ OSStatus status;
+ atsFont = CTFontGetPlatformFont (ct_font, NULL);
+ status = ATSFontGetFileReference (atsFont, &fsref);
+ if (status == noErr)
+ original_url = CFURLCreateFromFSRef (NULL, &fsref);
+#else
+ original_url = (CFURLRef) CTFontCopyAttribute (ct_font, kCTFontURLAttribute);
+#endif
+
+ /* Create font copy with cascade list that has LastResort first; this speeds up CoreText
+ * font fallback which we don't need anyway. */
+ {
+ CTFontDescriptorRef last_resort_font_desc = get_last_resort_font_desc ();
+ CTFontRef new_ct_font = CTFontCreateCopyWithAttributes (ct_font, 0.0, nullptr, last_resort_font_desc);
+ CFRelease (last_resort_font_desc);
+ if (new_ct_font)
+ {
+ /* The CTFontCreateCopyWithAttributes call fails to stay on the same font
+ * when reconfiguring the cascade list and may switch to a different font
+ * when there are fonts that go by the same name, since the descriptor is
+ * just name and size.
+ *
+ * Avoid reconfiguring the cascade lists if the new font is outside the
+ * system locations that we cannot access from the sandboxed renderer
+ * process in Blink. This can be detected by the new file URL location
+ * that the newly found font points to. */
+ CFURLRef new_url = nullptr;
+#if !(defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE) && MAC_OS_X_VERSION_MIN_REQUIRED < 1060
+ atsFont = CTFontGetPlatformFont (new_ct_font, NULL);
+ status = ATSFontGetFileReference (atsFont, &fsref);
+ if (status == noErr)
+ new_url = CFURLCreateFromFSRef (NULL, &fsref);
+#else
+ new_url = (CFURLRef) CTFontCopyAttribute (new_ct_font, kCTFontURLAttribute);
+#endif
+ // Keep reconfigured font if URL cannot be retrieved (seems to be the case
+ // on Mac OS 10.12 Sierra), speculative fix for crbug.com/625606
+ if (!original_url || !new_url || CFEqual (original_url, new_url)) {
+ CFRelease (ct_font);
+ ct_font = new_ct_font;
+ } else {
+ CFRelease (new_ct_font);
+ DEBUG_MSG (CORETEXT, ct_font, "Discarding reconfigured CTFont, location changed.");
+ }
+ if (new_url)
+ CFRelease (new_url);
+ }
+ else
+ DEBUG_MSG (CORETEXT, ct_font, "Font copy with empty cascade list failed");
+ }
+
+ if (original_url)
+ CFRelease (original_url);
+ return ct_font;
+}
+
+hb_coretext_face_data_t *
+_hb_coretext_shaper_face_data_create (hb_face_t *face)
+{
+ CGFontRef cg_font = create_cg_font (face);
+
+ if (unlikely (!cg_font))
+ {
+ DEBUG_MSG (CORETEXT, face, "CGFont creation failed..");
+ return nullptr;
+ }
+
+ return (hb_coretext_face_data_t *) cg_font;
+}
+
+void
+_hb_coretext_shaper_face_data_destroy (hb_coretext_face_data_t *data)
+{
+ CFRelease ((CGFontRef) data);
+}
+
+/**
+ * hb_coretext_face_create:
+ * @cg_font: The CGFontRef to work upon
+ *
+ * Creates an #hb_face_t face object from the specified
+ * CGFontRef.
+ *
+ * Return value: the new #hb_face_t face object
+ *
+ * Since: 0.9.10
+ */
+hb_face_t *
+hb_coretext_face_create (CGFontRef cg_font)
+{
+ return hb_face_create_for_tables (_hb_cg_reference_table, CGFontRetain (cg_font), _hb_cg_font_release);
+}
+
+/**
+ * hb_coretext_face_get_cg_font:
+ * @face: The #hb_face_t to work upon
+ *
+ * Fetches the CGFontRef associated with an #hb_face_t
+ * face object
+ *
+ * Return value: the CGFontRef found
+ *
+ * Since: 0.9.10
+ */
+CGFontRef
+hb_coretext_face_get_cg_font (hb_face_t *face)
+{
+ return (CGFontRef) (const void *) face->data.coretext;
+}
+
+
+hb_coretext_font_data_t *
+_hb_coretext_shaper_font_data_create (hb_font_t *font)
+{
+ hb_face_t *face = font->face;
+ const hb_coretext_face_data_t *face_data = face->data.coretext;
+ if (unlikely (!face_data)) return nullptr;
+ CGFontRef cg_font = (CGFontRef) (const void *) face->data.coretext;
+
+ CGFloat font_size = (CGFloat) (font->ptem <= 0.f ? HB_CORETEXT_DEFAULT_FONT_SIZE : font->ptem);
+ CTFontRef ct_font = create_ct_font (cg_font, font_size);
+
+ if (unlikely (!ct_font))
+ {
+ DEBUG_MSG (CORETEXT, font, "CGFont creation failed..");
+ return nullptr;
+ }
+
+ return (hb_coretext_font_data_t *) ct_font;
+}
+
+void
+_hb_coretext_shaper_font_data_destroy (hb_coretext_font_data_t *data)
+{
+ CFRelease ((CTFontRef) data);
+}
+
+static const hb_coretext_font_data_t *
+hb_coretext_font_data_sync (hb_font_t *font)
+{
+retry:
+ const hb_coretext_font_data_t *data = font->data.coretext;
+ if (unlikely (!data)) return nullptr;
+
+ if (fabs (CTFontGetSize ((CTFontRef) data) - (CGFloat) font->ptem) > .5)
+ {
+ /* XXX-MT-bug
+ * Note that evaluating condition above can be dangerous if another thread
+ * got here first and destructed data. That's, as always, bad use pattern.
+ * If you modify the font (change font size), other threads must not be
+ * using it at the same time. However, since this check is delayed to
+ * when one actually tries to shape something, this is a XXX race condition
+ * (and the only one we have that I know of) right now. Ie. you modify the
+ * font size in one thread, then (supposedly safely) try to use it from two
+ * or more threads and BOOM! I'm not sure how to fix this. We want RCU.
+ */
+
+ /* Drop and recreate. */
+ /* If someone dropped it in the mean time, throw it away and don't touch it.
+ * Otherwise, destruct it. */
+ if (likely (font->data.coretext.cmpexch (const_cast<hb_coretext_font_data_t *> (data), nullptr)))
+ _hb_coretext_shaper_font_data_destroy (const_cast<hb_coretext_font_data_t *> (data));
+ else
+ goto retry;
+ }
+ return font->data.coretext;
+}
+
+/**
+ * hb_coretext_font_create:
+ * @ct_font: The CTFontRef to work upon
+ *
+ * Creates an #hb_font_t font object from the specified
+ * CTFontRef.
+ *
+ * Return value: the new #hb_font_t font object
+ *
+ * Since: 1.7.2
+ **/
+hb_font_t *
+hb_coretext_font_create (CTFontRef ct_font)
+{
+ CGFontRef cg_font = CTFontCopyGraphicsFont (ct_font, nullptr);
+ hb_face_t *face = hb_coretext_face_create (cg_font);
+ CFRelease (cg_font);
+ hb_font_t *font = hb_font_create (face);
+ hb_face_destroy (face);
+
+ if (unlikely (hb_object_is_immutable (font)))
+ return font;
+
+ hb_font_set_ptem (font, CTFontGetSize (ct_font));
+
+ /* Let there be dragons here... */
+ font->data.coretext.cmpexch (nullptr, (hb_coretext_font_data_t *) CFRetain (ct_font));
+
+ return font;
+}
+
+/**
+ * hb_coretext_face_get_ct_font:
+ * @font: #hb_font_t to work upon
+ *
+ * Fetches the CTFontRef associated with the specified
+ * #hb_font_t font object.
+ *
+ * Return value: the CTFontRef found
+ *
+ * Since: 0.9.10
+ */
+CTFontRef
+hb_coretext_font_get_ct_font (hb_font_t *font)
+{
+ const hb_coretext_font_data_t *data = hb_coretext_font_data_sync (font);
+ return data ? (CTFontRef) data : nullptr;
+}
+
+
+/*
+ * shaper
+ */
+
+struct feature_record_t {
+ unsigned int feature;
+ unsigned int setting;
+};
+
+struct active_feature_t {
+ feature_record_t rec;
+ unsigned int order;
+
+ HB_INTERNAL static int cmp (const void *pa, const void *pb) {
+ const active_feature_t *a = (const active_feature_t *) pa;
+ const active_feature_t *b = (const active_feature_t *) pb;
+ return a->rec.feature < b->rec.feature ? -1 : a->rec.feature > b->rec.feature ? 1 :
+ a->order < b->order ? -1 : a->order > b->order ? 1 :
+ a->rec.setting < b->rec.setting ? -1 : a->rec.setting > b->rec.setting ? 1 :
+ 0;
+ }
+ bool operator== (const active_feature_t *f) {
+ return cmp (this, f) == 0;
+ }
+};
+
+struct feature_event_t {
+ unsigned int index;
+ bool start;
+ active_feature_t feature;
+
+ HB_INTERNAL static int cmp (const void *pa, const void *pb) {
+ const feature_event_t *a = (const feature_event_t *) pa;
+ const feature_event_t *b = (const feature_event_t *) pb;
+ return a->index < b->index ? -1 : a->index > b->index ? 1 :
+ a->start < b->start ? -1 : a->start > b->start ? 1 :
+ active_feature_t::cmp (&a->feature, &b->feature);
+ }
+};
+
+struct range_record_t {
+ CTFontRef font;
+ unsigned int index_first; /* == start */
+ unsigned int index_last; /* == end - 1 */
+};
+
+
+hb_bool_t
+_hb_coretext_shape (hb_shape_plan_t *shape_plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer,
+ const hb_feature_t *features,
+ unsigned int num_features)
+{
+ hb_face_t *face = font->face;
+ CGFontRef cg_font = (CGFontRef) (const void *) face->data.coretext;
+ CTFontRef ct_font = (CTFontRef) hb_coretext_font_data_sync (font);
+
+ CGFloat ct_font_size = CTFontGetSize (ct_font);
+ CGFloat x_mult = (CGFloat) font->x_scale / ct_font_size;
+ CGFloat y_mult = (CGFloat) font->y_scale / ct_font_size;
+
+ /* Attach marks to their bases, to match the 'ot' shaper.
+ * Adapted from a very old version of hb-ot-shape:hb_form_clusters().
+ * Note that this only makes us be closer to the 'ot' shaper,
+ * but by no means the same. For example, if there's
+ * B1 M1 B2 M2, and B1-B2 form a ligature, M2's cluster will
+ * continue pointing to B2 even though B2 was merged into B1's
+ * cluster... */
+ if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES)
+ {
+ hb_unicode_funcs_t *unicode = buffer->unicode;
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ for (unsigned int i = 1; i < count; i++)
+ if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (unicode->general_category (info[i].codepoint)))
+ buffer->merge_clusters (i - 1, i + 1);
+ }
+
+ hb_vector_t<feature_record_t> feature_records;
+ hb_vector_t<range_record_t> range_records;
+
+ /*
+ * Set up features.
+ * (copied + modified from code from hb-uniscribe.cc)
+ */
+ if (num_features)
+ {
+ /* Sort features by start/end events. */
+ hb_vector_t<feature_event_t> feature_events;
+ for (unsigned int i = 0; i < num_features; i++)
+ {
+ active_feature_t feature;
+
+#if MAC_OS_X_VERSION_MIN_REQUIRED < 101000
+ const hb_aat_feature_mapping_t * mapping = hb_aat_layout_find_feature_mapping (features[i].tag);
+ if (!mapping)
+ continue;
+
+ feature.rec.feature = mapping->aatFeatureType;
+ feature.rec.setting = features[i].value ? mapping->selectorToEnable : mapping->selectorToDisable;
+#else
+ feature.rec.feature = features[i].tag;
+ feature.rec.setting = features[i].value;
+#endif
+ feature.order = i;
+
+ feature_event_t *event;
+
+ event = feature_events.push ();
+ event->index = features[i].start;
+ event->start = true;
+ event->feature = feature;
+
+ event = feature_events.push ();
+ event->index = features[i].end;
+ event->start = false;
+ event->feature = feature;
+ }
+ feature_events.qsort ();
+ /* Add a strategic final event. */
+ {
+ active_feature_t feature;
+ feature.rec.feature = HB_TAG_NONE;
+ feature.rec.setting = 0;
+ feature.order = num_features + 1;
+
+ feature_event_t *event = feature_events.push ();
+ event->index = 0; /* This value does magic. */
+ event->start = false;
+ event->feature = feature;
+ }
+
+ /* Scan events and save features for each range. */
+ hb_vector_t<active_feature_t> active_features;
+ unsigned int last_index = 0;
+ for (unsigned int i = 0; i < feature_events.length; i++)
+ {
+ feature_event_t *event = &feature_events[i];
+
+ if (event->index != last_index)
+ {
+ /* Save a snapshot of active features and the range. */
+ range_record_t *range = range_records.push ();
+
+ if (active_features.length)
+ {
+ CFMutableArrayRef features_array = CFArrayCreateMutable(kCFAllocatorDefault, 0, &kCFTypeArrayCallBacks);
+
+ /* TODO sort and resolve conflicting features? */
+ /* active_features.qsort (); */
+ for (unsigned int j = 0; j < active_features.length; j++)
+ {
+#if MAC_OS_X_VERSION_MIN_REQUIRED < 101000
+ CFStringRef keys[] = {
+ kCTFontFeatureTypeIdentifierKey,
+ kCTFontFeatureSelectorIdentifierKey
+ };
+ CFNumberRef values[] = {
+ CFNumberCreate (kCFAllocatorDefault, kCFNumberIntType, &active_features[j].rec.feature),
+ CFNumberCreate (kCFAllocatorDefault, kCFNumberIntType, &active_features[j].rec.setting)
+ };
+#else
+ char tag[5] = {HB_UNTAG (active_features[j].rec.feature)};
+ CFTypeRef keys[] = {
+ kCTFontOpenTypeFeatureTag,
+ kCTFontOpenTypeFeatureValue
+ };
+ CFTypeRef values[] = {
+ CFStringCreateWithCString (kCFAllocatorDefault, tag, kCFStringEncodingASCII),
+ CFNumberCreate (kCFAllocatorDefault, kCFNumberIntType, &active_features[j].rec.setting)
+ };
+#endif
+ static_assert ((ARRAY_LENGTH_CONST (keys) == ARRAY_LENGTH_CONST (values)), "");
+ CFDictionaryRef dict = CFDictionaryCreate (kCFAllocatorDefault,
+ (const void **) keys,
+ (const void **) values,
+ ARRAY_LENGTH (keys),
+ &kCFTypeDictionaryKeyCallBacks,
+ &kCFTypeDictionaryValueCallBacks);
+ for (unsigned int i = 0; i < ARRAY_LENGTH (values); i++)
+ CFRelease (values[i]);
+
+ CFArrayAppendValue (features_array, dict);
+ CFRelease (dict);
+
+ }
+
+ CFDictionaryRef attributes = CFDictionaryCreate (kCFAllocatorDefault,
+ (const void **) &kCTFontFeatureSettingsAttribute,
+ (const void **) &features_array,
+ 1,
+ &kCFTypeDictionaryKeyCallBacks,
+ &kCFTypeDictionaryValueCallBacks);
+ CFRelease (features_array);
+
+ CTFontDescriptorRef font_desc = CTFontDescriptorCreateWithAttributes (attributes);
+ CFRelease (attributes);
+
+ range->font = CTFontCreateCopyWithAttributes (ct_font, 0.0, nullptr, font_desc);
+ CFRelease (font_desc);
+ }
+ else
+ {
+ range->font = nullptr;
+ }
+
+ range->index_first = last_index;
+ range->index_last = event->index - 1;
+
+ last_index = event->index;
+ }
+
+ if (event->start)
+ {
+ active_features.push (event->feature);
+ } else {
+ active_feature_t *feature = active_features.find (&event->feature);
+ if (feature)
+ active_features.remove (feature - active_features.arrayZ);
+ }
+ }
+ }
+
+ unsigned int scratch_size;
+ hb_buffer_t::scratch_buffer_t *scratch = buffer->get_scratch_buffer (&scratch_size);
+
+#define ALLOCATE_ARRAY(Type, name, len, on_no_room) \
+ Type *name = (Type *) scratch; \
+ do { \
+ unsigned int _consumed = DIV_CEIL ((len) * sizeof (Type), sizeof (*scratch)); \
+ if (unlikely (_consumed > scratch_size)) \
+ { \
+ on_no_room; \
+ assert (0); \
+ } \
+ scratch += _consumed; \
+ scratch_size -= _consumed; \
+ } while (0)
+
+ ALLOCATE_ARRAY (UniChar, pchars, buffer->len * 2, ((void)nullptr) /*nothing*/);
+ unsigned int chars_len = 0;
+ for (unsigned int i = 0; i < buffer->len; i++) {
+ hb_codepoint_t c = buffer->info[i].codepoint;
+ if (likely (c <= 0xFFFFu))
+ pchars[chars_len++] = c;
+ else if (unlikely (c > 0x10FFFFu))
+ pchars[chars_len++] = 0xFFFDu;
+ else {
+ pchars[chars_len++] = 0xD800u + ((c - 0x10000u) >> 10);
+ pchars[chars_len++] = 0xDC00u + ((c - 0x10000u) & ((1u << 10) - 1));
+ }
+ }
+
+ ALLOCATE_ARRAY (unsigned int, log_clusters, chars_len, ((void)nullptr) /*nothing*/);
+ chars_len = 0;
+ for (unsigned int i = 0; i < buffer->len; i++)
+ {
+ hb_codepoint_t c = buffer->info[i].codepoint;
+ unsigned int cluster = buffer->info[i].cluster;
+ log_clusters[chars_len++] = cluster;
+ if (hb_in_range (c, 0x10000u, 0x10FFFFu))
+ log_clusters[chars_len++] = cluster; /* Surrogates. */
+ }
+
+#define FAIL(...) \
+ HB_STMT_START { \
+ DEBUG_MSG (CORETEXT, nullptr, __VA_ARGS__); \
+ ret = false; \
+ goto fail; \
+ } HB_STMT_END
+
+ bool ret = true;
+ CFStringRef string_ref = nullptr;
+ CTLineRef line = nullptr;
+
+ if (false)
+ {
+resize_and_retry:
+ DEBUG_MSG (CORETEXT, buffer, "Buffer resize");
+ /* string_ref uses the scratch-buffer for backing store, and line references
+ * string_ref (via attr_string). We must release those before resizing buffer. */
+ assert (string_ref);
+ assert (line);
+ CFRelease (string_ref);
+ CFRelease (line);
+ string_ref = nullptr;
+ line = nullptr;
+
+ /* Get previous start-of-scratch-area, that we use later for readjusting
+ * our existing scratch arrays. */
+ unsigned int old_scratch_used;
+ hb_buffer_t::scratch_buffer_t *old_scratch;
+ old_scratch = buffer->get_scratch_buffer (&old_scratch_used);
+ old_scratch_used = scratch - old_scratch;
+
+ if (unlikely (!buffer->ensure (buffer->allocated * 2)))
+ FAIL ("Buffer resize failed");
+
+ /* Adjust scratch, pchars, and log_cluster arrays. This is ugly, but really the
+ * cleanest way to do without completely restructuring the rest of this shaper. */
+ scratch = buffer->get_scratch_buffer (&scratch_size);
+ pchars = reinterpret_cast<UniChar *> (((char *) scratch + ((char *) pchars - (char *) old_scratch)));
+ log_clusters = reinterpret_cast<unsigned int *> (((char *) scratch + ((char *) log_clusters - (char *) old_scratch)));
+ scratch += old_scratch_used;
+ scratch_size -= old_scratch_used;
+ }
+ {
+ string_ref = CFStringCreateWithCharactersNoCopy (nullptr,
+ pchars, chars_len,
+ kCFAllocatorNull);
+ if (unlikely (!string_ref))
+ FAIL ("CFStringCreateWithCharactersNoCopy failed");
+
+ /* Create an attributed string, populate it, and create a line from it, then release attributed string. */
+ {
+ CFMutableAttributedStringRef attr_string = CFAttributedStringCreateMutable (kCFAllocatorDefault,
+ chars_len);
+ if (unlikely (!attr_string))
+ FAIL ("CFAttributedStringCreateMutable failed");
+ CFAttributedStringReplaceString (attr_string, CFRangeMake (0, 0), string_ref);
+ if (HB_DIRECTION_IS_VERTICAL (buffer->props.direction))
+ {
+ CFAttributedStringSetAttribute (attr_string, CFRangeMake (0, chars_len),
+ kCTVerticalFormsAttributeName, kCFBooleanTrue);
+ }
+
+ if (buffer->props.language)
+ {
+/* What's the iOS equivalent of this check?
+ * The symbols was introduced in iOS 7.0.
+ * At any rate, our fallback is safe and works fine. */
+#if !(defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE) && MAC_OS_X_VERSION_MIN_REQUIRED < 1090
+# define kCTLanguageAttributeName CFSTR ("NSLanguage")
+#endif
+ CFStringRef lang = CFStringCreateWithCStringNoCopy (kCFAllocatorDefault,
+ hb_language_to_string (buffer->props.language),
+ kCFStringEncodingUTF8,
+ kCFAllocatorNull);
+ if (unlikely (!lang))
+ {
+ CFRelease (attr_string);
+ FAIL ("CFStringCreateWithCStringNoCopy failed");
+ }
+ CFAttributedStringSetAttribute (attr_string, CFRangeMake (0, chars_len),
+ kCTLanguageAttributeName, lang);
+ CFRelease (lang);
+ }
+ CFAttributedStringSetAttribute (attr_string, CFRangeMake (0, chars_len),
+ kCTFontAttributeName, ct_font);
+
+ if (num_features && range_records.length)
+ {
+ unsigned int start = 0;
+ range_record_t *last_range = &range_records[0];
+ for (unsigned int k = 0; k < chars_len; k++)
+ {
+ range_record_t *range = last_range;
+ while (log_clusters[k] < range->index_first)
+ range--;
+ while (log_clusters[k] > range->index_last)
+ range++;
+ if (range != last_range)
+ {
+ if (last_range->font)
+ CFAttributedStringSetAttribute (attr_string, CFRangeMake (start, k - start),
+ kCTFontAttributeName, last_range->font);
+
+ start = k;
+ }
+
+ last_range = range;
+ }
+ if (start != chars_len && last_range->font)
+ CFAttributedStringSetAttribute (attr_string, CFRangeMake (start, chars_len - start),
+ kCTFontAttributeName, last_range->font);
+ }
+ /* Enable/disable kern if requested.
+ *
+ * Note: once kern is disabled, reenabling it doesn't currently seem to work in CoreText.
+ */
+ if (num_features)
+ {
+ unsigned int zeroint = 0;
+ CFNumberRef zero = CFNumberCreate (kCFAllocatorDefault, kCFNumberIntType, &zeroint);
+ for (unsigned int i = 0; i < num_features; i++)
+ {
+ const hb_feature_t &feature = features[i];
+ if (feature.tag == HB_TAG('k','e','r','n') &&
+ feature.start < chars_len && feature.start < feature.end)
+ {
+ CFRange feature_range = CFRangeMake (feature.start,
+ hb_min (feature.end, chars_len) - feature.start);
+ if (feature.value)
+ CFAttributedStringRemoveAttribute (attr_string, feature_range, kCTKernAttributeName);
+ else
+ CFAttributedStringSetAttribute (attr_string, feature_range, kCTKernAttributeName, zero);
+ }
+ }
+ CFRelease (zero);
+ }
+
+ int level = HB_DIRECTION_IS_FORWARD (buffer->props.direction) ? 0 : 1;
+ CFNumberRef level_number = CFNumberCreate (kCFAllocatorDefault, kCFNumberIntType, &level);
+#if !(defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE) && MAC_OS_X_VERSION_MIN_REQUIRED < 1060
+ extern const CFStringRef kCTTypesetterOptionForcedEmbeddingLevel;
+#endif
+ CFDictionaryRef options = CFDictionaryCreate (kCFAllocatorDefault,
+ (const void **) &kCTTypesetterOptionForcedEmbeddingLevel,
+ (const void **) &level_number,
+ 1,
+ &kCFTypeDictionaryKeyCallBacks,
+ &kCFTypeDictionaryValueCallBacks);
+ CFRelease (level_number);
+ if (unlikely (!options))
+ {
+ CFRelease (attr_string);
+ FAIL ("CFDictionaryCreate failed");
+ }
+
+ CTTypesetterRef typesetter = CTTypesetterCreateWithAttributedStringAndOptions (attr_string, options);
+ CFRelease (options);
+ CFRelease (attr_string);
+ if (unlikely (!typesetter))
+ FAIL ("CTTypesetterCreateWithAttributedStringAndOptions failed");
+
+ line = CTTypesetterCreateLine (typesetter, CFRangeMake(0, 0));
+ CFRelease (typesetter);
+ if (unlikely (!line))
+ FAIL ("CTTypesetterCreateLine failed");
+ }
+
+ CFArrayRef glyph_runs = CTLineGetGlyphRuns (line);
+ unsigned int num_runs = CFArrayGetCount (glyph_runs);
+ DEBUG_MSG (CORETEXT, nullptr, "Num runs: %d", num_runs);
+
+ buffer->len = 0;
+ uint32_t status_and = ~0, status_or = 0;
+ double advances_so_far = 0;
+ /* For right-to-left runs, CoreText returns the glyphs positioned such that
+ * any trailing whitespace is to the left of (0,0). Adjust coordinate system
+ * to fix for that. Test with any RTL string with trailing spaces.
+ * https://crbug.com/469028
+ */
+ if (HB_DIRECTION_IS_BACKWARD (buffer->props.direction))
+ {
+ advances_so_far -= CTLineGetTrailingWhitespaceWidth (line);
+ if (HB_DIRECTION_IS_VERTICAL (buffer->props.direction))
+ advances_so_far = -advances_so_far;
+ }
+
+ const CFRange range_all = CFRangeMake (0, 0);
+
+ for (unsigned int i = 0; i < num_runs; i++)
+ {
+ CTRunRef run = static_cast<CTRunRef>(CFArrayGetValueAtIndex (glyph_runs, i));
+ CTRunStatus run_status = CTRunGetStatus (run);
+ status_or |= run_status;
+ status_and &= run_status;
+ DEBUG_MSG (CORETEXT, run, "CTRunStatus: %x", run_status);
+ double run_advance = CTRunGetTypographicBounds (run, range_all, nullptr, nullptr, nullptr);
+ if (HB_DIRECTION_IS_VERTICAL (buffer->props.direction))
+ run_advance = -run_advance;
+ DEBUG_MSG (CORETEXT, run, "Run advance: %g", run_advance);
+
+ /* CoreText does automatic font fallback (AKA "cascading") for characters
+ * not supported by the requested font, and provides no way to turn it off,
+ * so we must detect if the returned run uses a font other than the requested
+ * one and fill in the buffer with .notdef glyphs instead of random glyph
+ * indices from a different font.
+ */
+ CFDictionaryRef attributes = CTRunGetAttributes (run);
+ CTFontRef run_ct_font = static_cast<CTFontRef>(CFDictionaryGetValue (attributes, kCTFontAttributeName));
+ if (!CFEqual (run_ct_font, ct_font))
+ {
+ /* The run doesn't use our main font instance. We have to figure out
+ * whether font fallback happened, or this is just CoreText giving us
+ * another CTFont using the same underlying CGFont. CoreText seems
+ * to do that in a variety of situations, one of which being vertical
+ * text, but also perhaps for caching reasons.
+ *
+ * First, see if it uses any of our subfonts created to set font features...
+ *
+ * Next, compare the CGFont to the one we used to create our fonts.
+ * Even this doesn't work all the time.
+ *
+ * Finally, we compare PS names, which I don't think are unique...
+ *
+ * Looks like if we really want to be sure here we have to modify the
+ * font to change the name table, similar to what we do in the uniscribe
+ * backend.
+ *
+ * However, even that wouldn't work if we were passed in the CGFont to
+ * construct a hb_face to begin with.
+ *
+ * See: https://github.com/harfbuzz/harfbuzz/pull/36
+ *
+ * Also see: https://bugs.chromium.org/p/chromium/issues/detail?id=597098
+ */
+ bool matched = false;
+ for (unsigned int i = 0; i < range_records.length; i++)
+ if (range_records[i].font && CFEqual (run_ct_font, range_records[i].font))
+ {
+ matched = true;
+ break;
+ }
+ if (!matched)
+ {
+ CGFontRef run_cg_font = CTFontCopyGraphicsFont (run_ct_font, nullptr);
+ if (run_cg_font)
+ {
+ matched = CFEqual (run_cg_font, cg_font);
+ CFRelease (run_cg_font);
+ }
+ }
+ if (!matched)
+ {
+ CFStringRef font_ps_name = CTFontCopyName (ct_font, kCTFontPostScriptNameKey);
+ CFStringRef run_ps_name = CTFontCopyName (run_ct_font, kCTFontPostScriptNameKey);
+ CFComparisonResult result = CFStringCompare (run_ps_name, font_ps_name, 0);
+ CFRelease (run_ps_name);
+ CFRelease (font_ps_name);
+ if (result == kCFCompareEqualTo)
+ matched = true;
+ }
+ if (!matched)
+ {
+ CFRange range = CTRunGetStringRange (run);
+ DEBUG_MSG (CORETEXT, run, "Run used fallback font: %ld..%ld",
+ range.location, range.location + range.length);
+ if (!buffer->ensure_inplace (buffer->len + range.length))
+ goto resize_and_retry;
+ hb_glyph_info_t *info = buffer->info + buffer->len;
+
+ hb_codepoint_t notdef = 0;
+ hb_direction_t dir = buffer->props.direction;
+ hb_position_t x_advance, y_advance, x_offset, y_offset;
+ hb_font_get_glyph_advance_for_direction (font, notdef, dir, &x_advance, &y_advance);
+ hb_font_get_glyph_origin_for_direction (font, notdef, dir, &x_offset, &y_offset);
+ hb_position_t advance = x_advance + y_advance;
+ x_offset = -x_offset;
+ y_offset = -y_offset;
+
+ unsigned int old_len = buffer->len;
+ for (CFIndex j = range.location; j < range.location + range.length; j++)
+ {
+ UniChar ch = CFStringGetCharacterAtIndex (string_ref, j);
+ if (hb_in_range<UniChar> (ch, 0xDC00u, 0xDFFFu) && range.location < j)
+ {
+ ch = CFStringGetCharacterAtIndex (string_ref, j - 1);
+ if (hb_in_range<UniChar> (ch, 0xD800u, 0xDBFFu))
+ /* This is the second of a surrogate pair. Don't need .notdef
+ * for this one. */
+ continue;
+ }
+ if (buffer->unicode->is_default_ignorable (ch))
+ continue;
+
+ info->codepoint = notdef;
+ info->cluster = log_clusters[j];
+
+ info->mask = advance;
+ info->var1.i32 = x_offset;
+ info->var2.i32 = y_offset;
+
+ info++;
+ buffer->len++;
+ }
+ if (HB_DIRECTION_IS_BACKWARD (buffer->props.direction))
+ buffer->reverse_range (old_len, buffer->len);
+ advances_so_far += run_advance;
+ continue;
+ }
+ }
+
+ unsigned int num_glyphs = CTRunGetGlyphCount (run);
+ if (num_glyphs == 0)
+ continue;
+
+ if (!buffer->ensure_inplace (buffer->len + num_glyphs))
+ goto resize_and_retry;
+
+ hb_glyph_info_t *run_info = buffer->info + buffer->len;
+
+ /* Testing used to indicate that CTRunGetGlyphsPtr, etc (almost?) always
+ * succeed, and so copying data to our own buffer will be rare. Reports
+ * have it that this changed in OS X 10.10 Yosemite, and nullptr is returned
+ * frequently. At any rate, we can test that codepath by setting USE_PTR
+ * to false. */
+
+#define USE_PTR true
+
+#define SCRATCH_SAVE() \
+ unsigned int scratch_size_saved = scratch_size; \
+ hb_buffer_t::scratch_buffer_t *scratch_saved = scratch
+
+#define SCRATCH_RESTORE() \
+ scratch_size = scratch_size_saved; \
+ scratch = scratch_saved
+
+ { /* Setup glyphs */
+ SCRATCH_SAVE();
+ const CGGlyph* glyphs = USE_PTR ? CTRunGetGlyphsPtr (run) : nullptr;
+ if (!glyphs) {
+ ALLOCATE_ARRAY (CGGlyph, glyph_buf, num_glyphs, goto resize_and_retry);
+ CTRunGetGlyphs (run, range_all, glyph_buf);
+ glyphs = glyph_buf;
+ }
+ const CFIndex* string_indices = USE_PTR ? CTRunGetStringIndicesPtr (run) : nullptr;
+ if (!string_indices) {
+ ALLOCATE_ARRAY (CFIndex, index_buf, num_glyphs, goto resize_and_retry);
+ CTRunGetStringIndices (run, range_all, index_buf);
+ string_indices = index_buf;
+ }
+ hb_glyph_info_t *info = run_info;
+ for (unsigned int j = 0; j < num_glyphs; j++)
+ {
+ info->codepoint = glyphs[j];
+ info->cluster = log_clusters[string_indices[j]];
+ info++;
+ }
+ SCRATCH_RESTORE();
+ }
+ {
+ /* Setup positions.
+ * Note that CoreText does not return advances for glyphs. As such,
+ * for all but last glyph, we use the delta position to next glyph as
+ * advance (in the advance direction only), and for last glyph we set
+ * whatever is needed to make the whole run's advance add up. */
+ SCRATCH_SAVE();
+ const CGPoint* positions = USE_PTR ? CTRunGetPositionsPtr (run) : nullptr;
+ if (!positions) {
+ ALLOCATE_ARRAY (CGPoint, position_buf, num_glyphs, goto resize_and_retry);
+ CTRunGetPositions (run, range_all, position_buf);
+ positions = position_buf;
+ }
+ hb_glyph_info_t *info = run_info;
+ if (HB_DIRECTION_IS_HORIZONTAL (buffer->props.direction))
+ {
+ hb_position_t x_offset = (positions[0].x - advances_so_far) * x_mult;
+ for (unsigned int j = 0; j < num_glyphs; j++)
+ {
+ double advance;
+ if (likely (j + 1 < num_glyphs))
+ advance = positions[j + 1].x - positions[j].x;
+ else /* last glyph */
+ advance = run_advance - (positions[j].x - positions[0].x);
+ info->mask = advance * x_mult;
+ info->var1.i32 = x_offset;
+ info->var2.i32 = positions[j].y * y_mult;
+ info++;
+ }
+ }
+ else
+ {
+ hb_position_t y_offset = (positions[0].y - advances_so_far) * y_mult;
+ for (unsigned int j = 0; j < num_glyphs; j++)
+ {
+ double advance;
+ if (likely (j + 1 < num_glyphs))
+ advance = positions[j + 1].y - positions[j].y;
+ else /* last glyph */
+ advance = run_advance - (positions[j].y - positions[0].y);
+ info->mask = advance * y_mult;
+ info->var1.i32 = positions[j].x * x_mult;
+ info->var2.i32 = y_offset;
+ info++;
+ }
+ }
+ SCRATCH_RESTORE();
+ advances_so_far += run_advance;
+ }
+#undef SCRATCH_RESTORE
+#undef SCRATCH_SAVE
+#undef USE_PTR
+#undef ALLOCATE_ARRAY
+
+ buffer->len += num_glyphs;
+ }
+
+ /* Mac OS 10.6 doesn't have kCTTypesetterOptionForcedEmbeddingLevel,
+ * or if it does, it doesn't respect it. So we get runs with wrong
+ * directions. As such, disable the assert... It wouldn't crash, but
+ * cursoring will be off...
+ *
+ * https://crbug.com/419769
+ */
+ if (false)
+ {
+ /* Make sure all runs had the expected direction. */
+ HB_UNUSED bool backward = HB_DIRECTION_IS_BACKWARD (buffer->props.direction);
+ assert (bool (status_and & kCTRunStatusRightToLeft) == backward);
+ assert (bool (status_or & kCTRunStatusRightToLeft) == backward);
+ }
+
+ buffer->clear_positions ();
+
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ hb_glyph_position_t *pos = buffer->pos;
+ if (HB_DIRECTION_IS_HORIZONTAL (buffer->props.direction))
+ for (unsigned int i = 0; i < count; i++)
+ {
+ pos->x_advance = info->mask;
+ pos->x_offset = info->var1.i32;
+ pos->y_offset = info->var2.i32;
+
+ info++, pos++;
+ }
+ else
+ for (unsigned int i = 0; i < count; i++)
+ {
+ pos->y_advance = info->mask;
+ pos->x_offset = info->var1.i32;
+ pos->y_offset = info->var2.i32;
+
+ info++, pos++;
+ }
+
+ /* Fix up clusters so that we never return out-of-order indices;
+ * if core text has reordered glyphs, we'll merge them to the
+ * beginning of the reordered cluster. CoreText is nice enough
+ * to tell us whenever it has produced nonmonotonic results...
+ * Note that we assume the input clusters were nonmonotonic to
+ * begin with.
+ *
+ * This does *not* mean we'll form the same clusters as Uniscribe
+ * or the native OT backend, only that the cluster indices will be
+ * monotonic in the output buffer. */
+ if (count > 1 && (status_or & kCTRunStatusNonMonotonic))
+ {
+ hb_glyph_info_t *info = buffer->info;
+ if (HB_DIRECTION_IS_FORWARD (buffer->props.direction))
+ {
+ unsigned int cluster = info[count - 1].cluster;
+ for (unsigned int i = count - 1; i > 0; i--)
+ {
+ cluster = hb_min (cluster, info[i - 1].cluster);
+ info[i - 1].cluster = cluster;
+ }
+ }
+ else
+ {
+ unsigned int cluster = info[0].cluster;
+ for (unsigned int i = 1; i < count; i++)
+ {
+ cluster = hb_min (cluster, info[i].cluster);
+ info[i].cluster = cluster;
+ }
+ }
+ }
+ }
+
+ buffer->unsafe_to_break_all ();
+
+#undef FAIL
+
+fail:
+ if (string_ref)
+ CFRelease (string_ref);
+ if (line)
+ CFRelease (line);
+
+ for (unsigned int i = 0; i < range_records.length; i++)
+ if (range_records[i].font)
+ CFRelease (range_records[i].font);
+
+ return ret;
+}
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-coretext.h b/thirdparty/harfbuzz/src/hb-coretext.h
new file mode 100644
index 0000000000..e53dbaf2c7
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-coretext.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright © 2012 Mozilla Foundation.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Mozilla Author(s): Jonathan Kew
+ */
+
+#ifndef HB_CORETEXT_H
+#define HB_CORETEXT_H
+
+#include "hb.h"
+
+#include <TargetConditionals.h>
+#if TARGET_OS_IPHONE
+# include <CoreText/CoreText.h>
+# include <CoreGraphics/CoreGraphics.h>
+#else
+# include <ApplicationServices/ApplicationServices.h>
+#endif
+
+HB_BEGIN_DECLS
+
+
+/**
+ * HB_CORETEXT_TAG_MORT:
+ *
+ * The #hb_tag_t tag for the `mort` (glyph metamorphosis) table,
+ * which holds AAT features.
+ *
+ * For more information, see
+ * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6mort.html
+ *
+ **/
+#define HB_CORETEXT_TAG_MORT HB_TAG('m','o','r','t')
+
+/**
+ * HB_CORETEXT_TAG_MORX:
+ *
+ * The #hb_tag_t tag for the `morx` (extended glyph metamorphosis)
+ * table, which holds AAT features.
+ *
+ * For more information, see
+ * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6morx.html
+ *
+ **/
+#define HB_CORETEXT_TAG_MORX HB_TAG('m','o','r','x')
+
+/**
+ * HB_CORETEXT_TAG_KERX:
+ *
+ * The #hb_tag_t tag for the `kerx` (extended kerning) table, which
+ * holds AAT kerning information.
+ *
+ * For more information, see
+ * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6kerx.html
+ *
+ **/
+#define HB_CORETEXT_TAG_KERX HB_TAG('k','e','r','x')
+
+
+HB_EXTERN hb_face_t *
+hb_coretext_face_create (CGFontRef cg_font);
+
+HB_EXTERN hb_font_t *
+hb_coretext_font_create (CTFontRef ct_font);
+
+
+HB_EXTERN CGFontRef
+hb_coretext_face_get_cg_font (hb_face_t *face);
+
+HB_EXTERN CTFontRef
+hb_coretext_font_get_ct_font (hb_font_t *font);
+
+
+HB_END_DECLS
+
+#endif /* HB_CORETEXT_H */
diff --git a/thirdparty/harfbuzz/src/hb-debug.hh b/thirdparty/harfbuzz/src/hb-debug.hh
new file mode 100644
index 0000000000..ec3a1ff211
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-debug.hh
@@ -0,0 +1,459 @@
+/*
+ * Copyright © 2017 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_DEBUG_HH
+#define HB_DEBUG_HH
+
+#include "hb.hh"
+#include "hb-atomic.hh"
+#include "hb-algs.hh"
+
+
+#ifndef HB_DEBUG
+#define HB_DEBUG 0
+#endif
+
+
+/*
+ * Global runtime options.
+ */
+
+struct hb_options_t
+{
+ bool unused : 1; /* In-case sign bit is here. */
+ bool initialized : 1;
+ bool uniscribe_bug_compatible : 1;
+};
+
+union hb_options_union_t {
+ int i;
+ hb_options_t opts;
+};
+static_assert ((sizeof (hb_atomic_int_t) >= sizeof (hb_options_union_t)), "");
+
+HB_INTERNAL void
+_hb_options_init ();
+
+extern HB_INTERNAL hb_atomic_int_t _hb_options;
+
+static inline hb_options_t
+hb_options ()
+{
+#ifdef HB_NO_GETENV
+ return hb_options_t ();
+#endif
+ /* Make a local copy, so we can access bitfield threadsafely. */
+ hb_options_union_t u;
+ u.i = _hb_options.get_relaxed ();
+
+ if (unlikely (!u.i))
+ {
+ _hb_options_init ();
+ u.i = _hb_options.get_relaxed ();
+ }
+
+ return u.opts;
+}
+
+
+/*
+ * Debug output (needs enabling at compile time.)
+ */
+
+static inline bool
+_hb_debug (unsigned int level,
+ unsigned int max_level)
+{
+ return level < max_level;
+}
+
+#define DEBUG_LEVEL_ENABLED(WHAT, LEVEL) (_hb_debug ((LEVEL), HB_DEBUG_##WHAT))
+#define DEBUG_ENABLED(WHAT) (DEBUG_LEVEL_ENABLED (WHAT, 0))
+
+static inline void
+_hb_print_func (const char *func)
+{
+ if (func)
+ {
+ unsigned int func_len = strlen (func);
+ /* Skip "static" */
+ if (0 == strncmp (func, "static ", 7))
+ func += 7;
+ /* Skip "typename" */
+ if (0 == strncmp (func, "typename ", 9))
+ func += 9;
+ /* Skip return type */
+ const char *space = strchr (func, ' ');
+ if (space)
+ func = space + 1;
+ /* Skip parameter list */
+ const char *paren = strchr (func, '(');
+ if (paren)
+ func_len = paren - func;
+ fprintf (stderr, "%.*s", func_len, func);
+ }
+}
+
+template <int max_level> static inline void
+_hb_debug_msg_va (const char *what,
+ const void *obj,
+ const char *func,
+ bool indented,
+ unsigned int level,
+ int level_dir,
+ const char *message,
+ va_list ap) HB_PRINTF_FUNC(7, 0);
+template <int max_level> static inline void
+_hb_debug_msg_va (const char *what,
+ const void *obj,
+ const char *func,
+ bool indented,
+ unsigned int level,
+ int level_dir,
+ const char *message,
+ va_list ap)
+{
+ if (!_hb_debug (level, max_level))
+ return;
+
+ fprintf (stderr, "%-10s", what ? what : "");
+
+ if (obj)
+ fprintf (stderr, "(%*p) ", (unsigned int) (2 * sizeof (void *)), obj);
+ else
+ fprintf (stderr, " %*s ", (unsigned int) (2 * sizeof (void *)), "");
+
+ if (indented) {
+#define VBAR "\342\224\202" /* U+2502 BOX DRAWINGS LIGHT VERTICAL */
+#define VRBAR "\342\224\234" /* U+251C BOX DRAWINGS LIGHT VERTICAL AND RIGHT */
+#define DLBAR "\342\225\256" /* U+256E BOX DRAWINGS LIGHT ARC DOWN AND LEFT */
+#define ULBAR "\342\225\257" /* U+256F BOX DRAWINGS LIGHT ARC UP AND LEFT */
+#define LBAR "\342\225\264" /* U+2574 BOX DRAWINGS LIGHT LEFT */
+ static const char bars[] =
+ VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR
+ VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR
+ VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR
+ VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR
+ VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR;
+ fprintf (stderr, "%2u %s" VRBAR "%s",
+ level,
+ bars + sizeof (bars) - 1 - hb_min ((unsigned int) sizeof (bars) - 1, (unsigned int) (sizeof (VBAR) - 1) * level),
+ level_dir ? (level_dir > 0 ? DLBAR : ULBAR) : LBAR);
+ } else
+ fprintf (stderr, " " VRBAR LBAR);
+
+ _hb_print_func (func);
+
+ if (message)
+ {
+ fprintf (stderr, ": ");
+ vfprintf (stderr, message, ap);
+ }
+
+ fprintf (stderr, "\n");
+}
+template <> inline void HB_PRINTF_FUNC(7, 0)
+_hb_debug_msg_va<0> (const char *what HB_UNUSED,
+ const void *obj HB_UNUSED,
+ const char *func HB_UNUSED,
+ bool indented HB_UNUSED,
+ unsigned int level HB_UNUSED,
+ int level_dir HB_UNUSED,
+ const char *message HB_UNUSED,
+ va_list ap HB_UNUSED) {}
+
+template <int max_level> static inline void
+_hb_debug_msg (const char *what,
+ const void *obj,
+ const char *func,
+ bool indented,
+ unsigned int level,
+ int level_dir,
+ const char *message,
+ ...) HB_PRINTF_FUNC(7, 8);
+template <int max_level> static inline void HB_PRINTF_FUNC(7, 8)
+_hb_debug_msg (const char *what,
+ const void *obj,
+ const char *func,
+ bool indented,
+ unsigned int level,
+ int level_dir,
+ const char *message,
+ ...)
+{
+ va_list ap;
+ va_start (ap, message);
+ _hb_debug_msg_va<max_level> (what, obj, func, indented, level, level_dir, message, ap);
+ va_end (ap);
+}
+template <> inline void
+_hb_debug_msg<0> (const char *what HB_UNUSED,
+ const void *obj HB_UNUSED,
+ const char *func HB_UNUSED,
+ bool indented HB_UNUSED,
+ unsigned int level HB_UNUSED,
+ int level_dir HB_UNUSED,
+ const char *message HB_UNUSED,
+ ...) HB_PRINTF_FUNC(7, 8);
+template <> inline void HB_PRINTF_FUNC(7, 8)
+_hb_debug_msg<0> (const char *what HB_UNUSED,
+ const void *obj HB_UNUSED,
+ const char *func HB_UNUSED,
+ bool indented HB_UNUSED,
+ unsigned int level HB_UNUSED,
+ int level_dir HB_UNUSED,
+ const char *message HB_UNUSED,
+ ...) {}
+
+#define DEBUG_MSG_LEVEL(WHAT, OBJ, LEVEL, LEVEL_DIR, ...) _hb_debug_msg<HB_DEBUG_##WHAT> (#WHAT, (OBJ), nullptr, true, (LEVEL), (LEVEL_DIR), __VA_ARGS__)
+#define DEBUG_MSG(WHAT, OBJ, ...) _hb_debug_msg<HB_DEBUG_##WHAT> (#WHAT, (OBJ), nullptr, false, 0, 0, __VA_ARGS__)
+#define DEBUG_MSG_FUNC(WHAT, OBJ, ...) _hb_debug_msg<HB_DEBUG_##WHAT> (#WHAT, (OBJ), HB_FUNC, false, 0, 0, __VA_ARGS__)
+
+
+/*
+ * Printer
+ */
+
+template <typename T>
+struct hb_printer_t {
+ const char *print (const T&) { return "something"; }
+};
+
+template <>
+struct hb_printer_t<bool> {
+ const char *print (bool v) { return v ? "true" : "false"; }
+};
+
+template <>
+struct hb_printer_t<hb_empty_t> {
+ const char *print (hb_empty_t) { return ""; }
+};
+
+
+/*
+ * Trace
+ */
+
+template <typename T>
+static inline void _hb_warn_no_return (bool returned)
+{
+ if (unlikely (!returned)) {
+ fprintf (stderr, "OUCH, returned with no call to return_trace(). This is a bug, please report.\n");
+ }
+}
+template <>
+/*static*/ inline void _hb_warn_no_return<hb_empty_t> (bool returned HB_UNUSED)
+{}
+
+template <int max_level, typename ret_t>
+struct hb_auto_trace_t
+{
+ explicit inline hb_auto_trace_t (unsigned int *plevel_,
+ const char *what_,
+ const void *obj_,
+ const char *func,
+ const char *message,
+ ...) HB_PRINTF_FUNC(6, 7)
+ : plevel (plevel_), what (what_), obj (obj_), returned (false)
+ {
+ if (plevel) ++*plevel;
+
+ va_list ap;
+ va_start (ap, message);
+ _hb_debug_msg_va<max_level> (what, obj, func, true, plevel ? *plevel : 0, +1, message, ap);
+ va_end (ap);
+ }
+ ~hb_auto_trace_t ()
+ {
+ _hb_warn_no_return<ret_t> (returned);
+ if (!returned) {
+ _hb_debug_msg<max_level> (what, obj, nullptr, true, plevel ? *plevel : 1, -1, " ");
+ }
+ if (plevel) --*plevel;
+ }
+
+ template <typename T>
+ T ret (T&& v,
+ const char *func = "",
+ unsigned int line = 0)
+ {
+ if (unlikely (returned)) {
+ fprintf (stderr, "OUCH, double calls to return_trace(). This is a bug, please report.\n");
+ return hb_forward<T> (v);
+ }
+
+ _hb_debug_msg<max_level> (what, obj, func, true, plevel ? *plevel : 1, -1,
+ "return %s (line %d)",
+ hb_printer_t<decltype (v)>().print (v), line);
+ if (plevel) --*plevel;
+ plevel = nullptr;
+ returned = true;
+ return hb_forward<T> (v);
+ }
+
+ private:
+ unsigned int *plevel;
+ const char *what;
+ const void *obj;
+ bool returned;
+};
+template <typename ret_t> /* Make sure we don't use hb_auto_trace_t when not tracing. */
+struct hb_auto_trace_t<0, ret_t>
+{
+ explicit inline hb_auto_trace_t (unsigned int *plevel_,
+ const char *what_,
+ const void *obj_,
+ const char *func,
+ const char *message,
+ ...) HB_PRINTF_FUNC(6, 7) {}
+
+ template <typename T>
+ T ret (T&& v,
+ const char *func HB_UNUSED = nullptr,
+ unsigned int line HB_UNUSED = 0) { return hb_forward<T> (v); }
+};
+
+/* For disabled tracing; optimize out everything.
+ * https://github.com/harfbuzz/harfbuzz/pull/605 */
+template <typename ret_t>
+struct hb_no_trace_t {
+ template <typename T>
+ T ret (T&& v,
+ const char *func HB_UNUSED = nullptr,
+ unsigned int line HB_UNUSED = 0) { return hb_forward<T> (v); }
+};
+
+#define return_trace(RET) return trace.ret (RET, HB_FUNC, __LINE__)
+
+
+/*
+ * Instances.
+ */
+
+#ifndef HB_DEBUG_ARABIC
+#define HB_DEBUG_ARABIC (HB_DEBUG+0)
+#endif
+
+#ifndef HB_DEBUG_BLOB
+#define HB_DEBUG_BLOB (HB_DEBUG+0)
+#endif
+
+#ifndef HB_DEBUG_CORETEXT
+#define HB_DEBUG_CORETEXT (HB_DEBUG+0)
+#endif
+
+#ifndef HB_DEBUG_DIRECTWRITE
+#define HB_DEBUG_DIRECTWRITE (HB_DEBUG+0)
+#endif
+
+#ifndef HB_DEBUG_FT
+#define HB_DEBUG_FT (HB_DEBUG+0)
+#endif
+
+#ifndef HB_DEBUG_OBJECT
+#define HB_DEBUG_OBJECT (HB_DEBUG+0)
+#endif
+
+#ifndef HB_DEBUG_SHAPE_PLAN
+#define HB_DEBUG_SHAPE_PLAN (HB_DEBUG+0)
+#endif
+
+#ifndef HB_DEBUG_UNISCRIBE
+#define HB_DEBUG_UNISCRIBE (HB_DEBUG+0)
+#endif
+
+/*
+ * With tracing.
+ */
+
+#ifndef HB_DEBUG_APPLY
+#define HB_DEBUG_APPLY (HB_DEBUG+0)
+#endif
+#if HB_DEBUG_APPLY
+#define TRACE_APPLY(this) \
+ hb_auto_trace_t<HB_DEBUG_APPLY, bool> trace \
+ (&c->debug_depth, c->get_name (), this, HB_FUNC, \
+ "idx %d gid %u lookup %d", \
+ c->buffer->idx, c->buffer->cur().codepoint, (int) c->lookup_index)
+#else
+#define TRACE_APPLY(this) hb_no_trace_t<bool> trace
+#endif
+
+#ifndef HB_DEBUG_SANITIZE
+#define HB_DEBUG_SANITIZE (HB_DEBUG+0)
+#endif
+#if HB_DEBUG_SANITIZE
+#define TRACE_SANITIZE(this) \
+ hb_auto_trace_t<HB_DEBUG_SANITIZE, bool> trace \
+ (&c->debug_depth, c->get_name (), this, HB_FUNC, \
+ " ")
+#else
+#define TRACE_SANITIZE(this) hb_no_trace_t<bool> trace
+#endif
+
+#ifndef HB_DEBUG_SERIALIZE
+#define HB_DEBUG_SERIALIZE (HB_DEBUG+0)
+#endif
+#if HB_DEBUG_SERIALIZE
+#define TRACE_SERIALIZE(this) \
+ hb_auto_trace_t<HB_DEBUG_SERIALIZE, bool> trace \
+ (&c->debug_depth, "SERIALIZE", c, HB_FUNC, \
+ " ")
+#else
+#define TRACE_SERIALIZE(this) hb_no_trace_t<bool> trace
+#endif
+
+#ifndef HB_DEBUG_SUBSET
+#define HB_DEBUG_SUBSET (HB_DEBUG+0)
+#endif
+#if HB_DEBUG_SUBSET
+#define TRACE_SUBSET(this) \
+ hb_auto_trace_t<HB_DEBUG_SUBSET, bool> trace \
+ (&c->debug_depth, c->get_name (), this, HB_FUNC, \
+ " ")
+#else
+#define TRACE_SUBSET(this) hb_no_trace_t<bool> trace
+#endif
+
+#ifndef HB_DEBUG_DISPATCH
+#define HB_DEBUG_DISPATCH ( \
+ HB_DEBUG_APPLY + \
+ HB_DEBUG_SANITIZE + \
+ HB_DEBUG_SERIALIZE + \
+ HB_DEBUG_SUBSET + \
+ 0)
+#endif
+#if HB_DEBUG_DISPATCH
+#define TRACE_DISPATCH(this, format) \
+ hb_auto_trace_t<context_t::max_debug_depth, typename context_t::return_t> trace \
+ (&c->debug_depth, c->get_name (), this, HB_FUNC, \
+ "format %d", (int) format)
+#else
+#define TRACE_DISPATCH(this, format) hb_no_trace_t<typename context_t::return_t> trace
+#endif
+
+
+#endif /* HB_DEBUG_HH */
diff --git a/thirdparty/harfbuzz/src/hb-deprecated.h b/thirdparty/harfbuzz/src/hb-deprecated.h
new file mode 100644
index 0000000000..43f89a4c4e
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-deprecated.h
@@ -0,0 +1,195 @@
+/*
+ * Copyright © 2013 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_H_IN
+#error "Include <hb.h> instead."
+#endif
+
+#ifndef HB_DEPRECATED_H
+#define HB_DEPRECATED_H
+
+#include "hb-common.h"
+#include "hb-unicode.h"
+#include "hb-font.h"
+#include "hb-set.h"
+
+
+/**
+ * SECTION:hb-deprecated
+ * @title: hb-deprecated
+ * @short_description: Deprecated API
+ * @include: hb.h
+ *
+ * These API have been deprecated in favor of newer API, or because they
+ * were deemed unnecessary.
+ **/
+
+
+HB_BEGIN_DECLS
+
+#ifndef HB_DISABLE_DEPRECATED
+
+
+#define HB_SCRIPT_CANADIAN_ABORIGINAL HB_SCRIPT_CANADIAN_SYLLABICS
+
+#define HB_BUFFER_FLAGS_DEFAULT HB_BUFFER_FLAG_DEFAULT
+#define HB_BUFFER_SERIALIZE_FLAGS_DEFAULT HB_BUFFER_SERIALIZE_FLAG_DEFAULT
+
+typedef hb_bool_t (*hb_font_get_glyph_func_t) (hb_font_t *font, void *font_data,
+ hb_codepoint_t unicode, hb_codepoint_t variation_selector,
+ hb_codepoint_t *glyph,
+ void *user_data);
+
+HB_EXTERN HB_DEPRECATED_FOR(hb_font_funcs_set_nominal_glyph_func and hb_font_funcs_set_variation_glyph_func) void
+hb_font_funcs_set_glyph_func (hb_font_funcs_t *ffuncs,
+ hb_font_get_glyph_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
+
+HB_EXTERN HB_DEPRECATED void
+hb_set_invert (hb_set_t *set);
+
+/**
+ * hb_unicode_eastasian_width_func_t:
+ *
+ * Deprecated: 2.0.0
+ */
+typedef unsigned int (*hb_unicode_eastasian_width_func_t) (hb_unicode_funcs_t *ufuncs,
+ hb_codepoint_t unicode,
+ void *user_data);
+
+/**
+ * hb_unicode_funcs_set_eastasian_width_func:
+ * @ufuncs: a Unicode function structure
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 0.9.2
+ * Deprecated: 2.0.0
+ **/
+HB_EXTERN HB_DEPRECATED void
+hb_unicode_funcs_set_eastasian_width_func (hb_unicode_funcs_t *ufuncs,
+ hb_unicode_eastasian_width_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
+
+/**
+ * hb_unicode_eastasian_width:
+ *
+ * Since: 0.9.2
+ * Deprecated: 2.0.0
+ **/
+HB_EXTERN HB_DEPRECATED unsigned int
+hb_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs,
+ hb_codepoint_t unicode);
+
+
+/**
+ * hb_unicode_decompose_compatibility_func_t:
+ * @ufuncs: a Unicode function structure
+ * @u: codepoint to decompose
+ * @decomposed: address of codepoint array (of length %HB_UNICODE_MAX_DECOMPOSITION_LEN) to write decomposition into
+ * @user_data: user data pointer as passed to hb_unicode_funcs_set_decompose_compatibility_func()
+ *
+ * Fully decompose @u to its Unicode compatibility decomposition. The codepoints of the decomposition will be written to @decomposed.
+ * The complete length of the decomposition will be returned.
+ *
+ * If @u has no compatibility decomposition, zero should be returned.
+ *
+ * The Unicode standard guarantees that a buffer of length %HB_UNICODE_MAX_DECOMPOSITION_LEN codepoints will always be sufficient for any
+ * compatibility decomposition plus an terminating value of 0. Consequently, @decompose must be allocated by the caller to be at least this length. Implementations
+ * of this function type must ensure that they do not write past the provided array.
+ *
+ * Return value: number of codepoints in the full compatibility decomposition of @u, or 0 if no decomposition available.
+ *
+ * Deprecated: 2.0.0
+ */
+typedef unsigned int (*hb_unicode_decompose_compatibility_func_t) (hb_unicode_funcs_t *ufuncs,
+ hb_codepoint_t u,
+ hb_codepoint_t *decomposed,
+ void *user_data);
+
+/**
+ * HB_UNICODE_MAX_DECOMPOSITION_LEN:
+ *
+ * See Unicode 6.1 for details on the maximum decomposition length.
+ *
+ * Deprecated: 2.0.0
+ */
+#define HB_UNICODE_MAX_DECOMPOSITION_LEN (18+1) /* codepoints */
+
+/**
+ * hb_unicode_funcs_set_decompose_compatibility_func:
+ * @ufuncs: a Unicode function structure
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 0.9.2
+ * Deprecated: 2.0.0
+ **/
+HB_EXTERN HB_DEPRECATED void
+hb_unicode_funcs_set_decompose_compatibility_func (hb_unicode_funcs_t *ufuncs,
+ hb_unicode_decompose_compatibility_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
+
+HB_EXTERN HB_DEPRECATED unsigned int
+hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs,
+ hb_codepoint_t u,
+ hb_codepoint_t *decomposed);
+
+
+typedef hb_font_get_glyph_kerning_func_t hb_font_get_glyph_v_kerning_func_t;
+
+/**
+ * hb_font_funcs_set_glyph_v_kerning_func:
+ * @ffuncs: font functions.
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 0.9.2
+ * Deprecated: 2.0.0
+ **/
+HB_EXTERN void
+hb_font_funcs_set_glyph_v_kerning_func (hb_font_funcs_t *ffuncs,
+ hb_font_get_glyph_v_kerning_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
+
+HB_EXTERN hb_position_t
+hb_font_get_glyph_v_kerning (hb_font_t *font,
+ hb_codepoint_t top_glyph, hb_codepoint_t bottom_glyph);
+
+#endif
+
+HB_END_DECLS
+
+#endif /* HB_DEPRECATED_H */
diff --git a/thirdparty/harfbuzz/src/hb-directwrite.cc b/thirdparty/harfbuzz/src/hb-directwrite.cc
new file mode 100644
index 0000000000..f2fce073e0
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-directwrite.cc
@@ -0,0 +1,979 @@
+/*
+ * Copyright © 2015-2019 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#include "hb.hh"
+
+#ifdef HAVE_DIRECTWRITE
+
+#include "hb-shaper-impl.hh"
+
+#include <dwrite_1.h>
+
+#include "hb-directwrite.h"
+
+
+/* Declare object creator for dynamic support of DWRITE */
+typedef HRESULT (* WINAPI t_DWriteCreateFactory)(
+ DWRITE_FACTORY_TYPE factoryType,
+ REFIID iid,
+ IUnknown **factory
+);
+
+/*
+ * hb-directwrite uses new/delete syntatically but as we let users
+ * to override malloc/free, we will redefine new/delete so users
+ * won't need to do that by their own.
+ */
+void* operator new (size_t size) { return malloc (size); }
+void* operator new [] (size_t size) { return malloc (size); }
+void operator delete (void* pointer) { free (pointer); }
+void operator delete [] (void* pointer) { free (pointer); }
+
+
+/*
+ * DirectWrite font stream helpers
+ */
+
+// This is a font loader which provides only one font (unlike its original design).
+// For a better implementation which was also source of this
+// and DWriteFontFileStream, have a look at to NativeFontResourceDWrite.cpp in Mozilla
+class DWriteFontFileLoader : public IDWriteFontFileLoader
+{
+private:
+ IDWriteFontFileStream *mFontFileStream;
+public:
+ DWriteFontFileLoader (IDWriteFontFileStream *fontFileStream)
+ { mFontFileStream = fontFileStream; }
+
+ // IUnknown interface
+ IFACEMETHOD (QueryInterface) (IID const& iid, OUT void** ppObject)
+ { return S_OK; }
+ IFACEMETHOD_ (ULONG, AddRef) () { return 1; }
+ IFACEMETHOD_ (ULONG, Release) () { return 1; }
+
+ // IDWriteFontFileLoader methods
+ virtual HRESULT STDMETHODCALLTYPE
+ CreateStreamFromKey (void const* fontFileReferenceKey,
+ uint32_t fontFileReferenceKeySize,
+ OUT IDWriteFontFileStream** fontFileStream)
+ {
+ *fontFileStream = mFontFileStream;
+ return S_OK;
+ }
+
+ virtual ~DWriteFontFileLoader() {}
+};
+
+class DWriteFontFileStream : public IDWriteFontFileStream
+{
+private:
+ uint8_t *mData;
+ uint32_t mSize;
+public:
+ DWriteFontFileStream (uint8_t *aData, uint32_t aSize)
+ {
+ mData = aData;
+ mSize = aSize;
+ }
+
+ // IUnknown interface
+ IFACEMETHOD (QueryInterface) (IID const& iid, OUT void** ppObject)
+ { return S_OK; }
+ IFACEMETHOD_ (ULONG, AddRef) () { return 1; }
+ IFACEMETHOD_ (ULONG, Release) () { return 1; }
+
+ // IDWriteFontFileStream methods
+ virtual HRESULT STDMETHODCALLTYPE
+ ReadFileFragment (void const** fragmentStart,
+ UINT64 fileOffset,
+ UINT64 fragmentSize,
+ OUT void** fragmentContext)
+ {
+ // We are required to do bounds checking.
+ if (fileOffset + fragmentSize > mSize) return E_FAIL;
+
+ // truncate the 64 bit fileOffset to size_t sized index into mData
+ size_t index = static_cast<size_t> (fileOffset);
+
+ // We should be alive for the duration of this.
+ *fragmentStart = &mData[index];
+ *fragmentContext = nullptr;
+ return S_OK;
+ }
+
+ virtual void STDMETHODCALLTYPE
+ ReleaseFileFragment (void* fragmentContext) {}
+
+ virtual HRESULT STDMETHODCALLTYPE
+ GetFileSize (OUT UINT64* fileSize)
+ {
+ *fileSize = mSize;
+ return S_OK;
+ }
+
+ virtual HRESULT STDMETHODCALLTYPE
+ GetLastWriteTime (OUT UINT64* lastWriteTime) { return E_NOTIMPL; }
+
+ virtual ~DWriteFontFileStream() {}
+};
+
+
+/*
+* shaper face data
+*/
+
+struct hb_directwrite_face_data_t
+{
+ HMODULE dwrite_dll;
+ IDWriteFactory *dwriteFactory;
+ IDWriteFontFile *fontFile;
+ DWriteFontFileStream *fontFileStream;
+ DWriteFontFileLoader *fontFileLoader;
+ IDWriteFontFace *fontFace;
+ hb_blob_t *faceBlob;
+};
+
+hb_directwrite_face_data_t *
+_hb_directwrite_shaper_face_data_create (hb_face_t *face)
+{
+ hb_directwrite_face_data_t *data = new hb_directwrite_face_data_t;
+ if (unlikely (!data))
+ return nullptr;
+
+#define FAIL(...) \
+ HB_STMT_START { \
+ DEBUG_MSG (DIRECTWRITE, nullptr, __VA_ARGS__); \
+ return nullptr; \
+ } HB_STMT_END
+
+ data->dwrite_dll = LoadLibrary (TEXT ("DWRITE"));
+ if (unlikely (!data->dwrite_dll))
+ FAIL ("Cannot find DWrite.DLL");
+
+ t_DWriteCreateFactory p_DWriteCreateFactory;
+
+#if defined(__GNUC__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-function-type"
+#endif
+
+ p_DWriteCreateFactory = (t_DWriteCreateFactory)
+ GetProcAddress (data->dwrite_dll, "DWriteCreateFactory");
+
+#if defined(__GNUC__)
+#pragma GCC diagnostic pop
+#endif
+
+ if (unlikely (!p_DWriteCreateFactory))
+ FAIL ("Cannot find DWriteCreateFactory().");
+
+ HRESULT hr;
+
+ // TODO: factory and fontFileLoader should be cached separately
+ IDWriteFactory* dwriteFactory;
+ hr = p_DWriteCreateFactory (DWRITE_FACTORY_TYPE_SHARED, __uuidof (IDWriteFactory),
+ (IUnknown**) &dwriteFactory);
+
+ if (unlikely (hr != S_OK))
+ FAIL ("Failed to run DWriteCreateFactory().");
+
+ hb_blob_t *blob = hb_face_reference_blob (face);
+ DWriteFontFileStream *fontFileStream;
+ fontFileStream = new DWriteFontFileStream ((uint8_t *) hb_blob_get_data (blob, nullptr),
+ hb_blob_get_length (blob));
+
+ DWriteFontFileLoader *fontFileLoader = new DWriteFontFileLoader (fontFileStream);
+ dwriteFactory->RegisterFontFileLoader (fontFileLoader);
+
+ IDWriteFontFile *fontFile;
+ uint64_t fontFileKey = 0;
+ hr = dwriteFactory->CreateCustomFontFileReference (&fontFileKey, sizeof (fontFileKey),
+ fontFileLoader, &fontFile);
+
+ if (FAILED (hr))
+ FAIL ("Failed to load font file from data!");
+
+ BOOL isSupported;
+ DWRITE_FONT_FILE_TYPE fileType;
+ DWRITE_FONT_FACE_TYPE faceType;
+ uint32_t numberOfFaces;
+ hr = fontFile->Analyze (&isSupported, &fileType, &faceType, &numberOfFaces);
+ if (FAILED (hr) || !isSupported)
+ FAIL ("Font file is not supported.");
+
+#undef FAIL
+
+ IDWriteFontFace *fontFace;
+ dwriteFactory->CreateFontFace (faceType, 1, &fontFile, 0,
+ DWRITE_FONT_SIMULATIONS_NONE, &fontFace);
+
+ data->dwriteFactory = dwriteFactory;
+ data->fontFile = fontFile;
+ data->fontFileStream = fontFileStream;
+ data->fontFileLoader = fontFileLoader;
+ data->fontFace = fontFace;
+ data->faceBlob = blob;
+
+ return data;
+}
+
+void
+_hb_directwrite_shaper_face_data_destroy (hb_directwrite_face_data_t *data)
+{
+ if (data->fontFace)
+ data->fontFace->Release ();
+ if (data->fontFile)
+ data->fontFile->Release ();
+ if (data->dwriteFactory)
+ {
+ if (data->fontFileLoader)
+ data->dwriteFactory->UnregisterFontFileLoader (data->fontFileLoader);
+ data->dwriteFactory->Release ();
+ }
+ if (data->fontFileLoader)
+ delete data->fontFileLoader;
+ if (data->fontFileStream)
+ delete data->fontFileStream;
+ if (data->faceBlob)
+ hb_blob_destroy (data->faceBlob);
+ if (data->dwrite_dll)
+ FreeLibrary (data->dwrite_dll);
+ if (data)
+ delete data;
+}
+
+
+/*
+ * shaper font data
+ */
+
+struct hb_directwrite_font_data_t {};
+
+hb_directwrite_font_data_t *
+_hb_directwrite_shaper_font_data_create (hb_font_t *font)
+{
+ hb_directwrite_font_data_t *data = new hb_directwrite_font_data_t;
+ if (unlikely (!data))
+ return nullptr;
+
+ return data;
+}
+
+void
+_hb_directwrite_shaper_font_data_destroy (hb_directwrite_font_data_t *data)
+{
+ delete data;
+}
+
+
+// Most of TextAnalysis is originally written by Bas Schouten for Mozilla project
+// but now is relicensed to MIT for HarfBuzz use
+class TextAnalysis : public IDWriteTextAnalysisSource, public IDWriteTextAnalysisSink
+{
+public:
+
+ IFACEMETHOD (QueryInterface) (IID const& iid, OUT void** ppObject)
+ { return S_OK; }
+ IFACEMETHOD_ (ULONG, AddRef) () { return 1; }
+ IFACEMETHOD_ (ULONG, Release) () { return 1; }
+
+ // A single contiguous run of characters containing the same analysis
+ // results.
+ struct Run
+ {
+ uint32_t mTextStart; // starting text position of this run
+ uint32_t mTextLength; // number of contiguous code units covered
+ uint32_t mGlyphStart; // starting glyph in the glyphs array
+ uint32_t mGlyphCount; // number of glyphs associated with this run
+ // text
+ DWRITE_SCRIPT_ANALYSIS mScript;
+ uint8_t mBidiLevel;
+ bool mIsSideways;
+
+ bool ContainsTextPosition (uint32_t aTextPosition) const
+ {
+ return aTextPosition >= mTextStart &&
+ aTextPosition < mTextStart + mTextLength;
+ }
+
+ Run *nextRun;
+ };
+
+public:
+ TextAnalysis (const wchar_t* text, uint32_t textLength,
+ const wchar_t* localeName, DWRITE_READING_DIRECTION readingDirection)
+ : mTextLength (textLength), mText (text), mLocaleName (localeName),
+ mReadingDirection (readingDirection), mCurrentRun (nullptr) {}
+ ~TextAnalysis ()
+ {
+ // delete runs, except mRunHead which is part of the TextAnalysis object
+ for (Run *run = mRunHead.nextRun; run;)
+ {
+ Run *origRun = run;
+ run = run->nextRun;
+ delete origRun;
+ }
+ }
+
+ STDMETHODIMP
+ GenerateResults (IDWriteTextAnalyzer* textAnalyzer, Run **runHead)
+ {
+ // Analyzes the text using the script analyzer and returns
+ // the result as a series of runs.
+
+ HRESULT hr = S_OK;
+
+ // Initially start out with one result that covers the entire range.
+ // This result will be subdivided by the analysis processes.
+ mRunHead.mTextStart = 0;
+ mRunHead.mTextLength = mTextLength;
+ mRunHead.mBidiLevel =
+ (mReadingDirection == DWRITE_READING_DIRECTION_RIGHT_TO_LEFT);
+ mRunHead.nextRun = nullptr;
+ mCurrentRun = &mRunHead;
+
+ // Call each of the analyzers in sequence, recording their results.
+ if (SUCCEEDED (hr = textAnalyzer->AnalyzeScript (this, 0, mTextLength, this)))
+ *runHead = &mRunHead;
+
+ return hr;
+ }
+
+ // IDWriteTextAnalysisSource implementation
+
+ IFACEMETHODIMP
+ GetTextAtPosition (uint32_t textPosition,
+ OUT wchar_t const** textString,
+ OUT uint32_t* textLength)
+ {
+ if (textPosition >= mTextLength)
+ {
+ // No text at this position, valid query though.
+ *textString = nullptr;
+ *textLength = 0;
+ }
+ else
+ {
+ *textString = mText + textPosition;
+ *textLength = mTextLength - textPosition;
+ }
+ return S_OK;
+ }
+
+ IFACEMETHODIMP
+ GetTextBeforePosition (uint32_t textPosition,
+ OUT wchar_t const** textString,
+ OUT uint32_t* textLength)
+ {
+ if (textPosition == 0 || textPosition > mTextLength)
+ {
+ // Either there is no text before here (== 0), or this
+ // is an invalid position. The query is considered valid though.
+ *textString = nullptr;
+ *textLength = 0;
+ }
+ else
+ {
+ *textString = mText;
+ *textLength = textPosition;
+ }
+ return S_OK;
+ }
+
+ IFACEMETHODIMP_ (DWRITE_READING_DIRECTION)
+ GetParagraphReadingDirection () { return mReadingDirection; }
+
+ IFACEMETHODIMP GetLocaleName (uint32_t textPosition, uint32_t* textLength,
+ wchar_t const** localeName)
+ { return S_OK; }
+
+ IFACEMETHODIMP
+ GetNumberSubstitution (uint32_t textPosition,
+ OUT uint32_t* textLength,
+ OUT IDWriteNumberSubstitution** numberSubstitution)
+ {
+ // We do not support number substitution.
+ *numberSubstitution = nullptr;
+ *textLength = mTextLength - textPosition;
+
+ return S_OK;
+ }
+
+ // IDWriteTextAnalysisSink implementation
+
+ IFACEMETHODIMP
+ SetScriptAnalysis (uint32_t textPosition, uint32_t textLength,
+ DWRITE_SCRIPT_ANALYSIS const* scriptAnalysis)
+ {
+ SetCurrentRun (textPosition);
+ SplitCurrentRun (textPosition);
+ while (textLength > 0)
+ {
+ Run *run = FetchNextRun (&textLength);
+ run->mScript = *scriptAnalysis;
+ }
+
+ return S_OK;
+ }
+
+ IFACEMETHODIMP
+ SetLineBreakpoints (uint32_t textPosition,
+ uint32_t textLength,
+ const DWRITE_LINE_BREAKPOINT* lineBreakpoints)
+ { return S_OK; }
+
+ IFACEMETHODIMP SetBidiLevel (uint32_t textPosition, uint32_t textLength,
+ uint8_t explicitLevel, uint8_t resolvedLevel)
+ { return S_OK; }
+
+ IFACEMETHODIMP
+ SetNumberSubstitution (uint32_t textPosition, uint32_t textLength,
+ IDWriteNumberSubstitution* numberSubstitution)
+ { return S_OK; }
+
+protected:
+ Run *FetchNextRun (IN OUT uint32_t* textLength)
+ {
+ // Used by the sink setters, this returns a reference to the next run.
+ // Position and length are adjusted to now point after the current run
+ // being returned.
+
+ Run *origRun = mCurrentRun;
+ // Split the tail if needed (the length remaining is less than the
+ // current run's size).
+ if (*textLength < mCurrentRun->mTextLength)
+ SplitCurrentRun (mCurrentRun->mTextStart + *textLength);
+ else
+ // Just advance the current run.
+ mCurrentRun = mCurrentRun->nextRun;
+ *textLength -= origRun->mTextLength;
+
+ // Return a reference to the run that was just current.
+ return origRun;
+ }
+
+ void SetCurrentRun (uint32_t textPosition)
+ {
+ // Move the current run to the given position.
+ // Since the analyzers generally return results in a forward manner,
+ // this will usually just return early. If not, find the
+ // corresponding run for the text position.
+
+ if (mCurrentRun && mCurrentRun->ContainsTextPosition (textPosition))
+ return;
+
+ for (Run *run = &mRunHead; run; run = run->nextRun)
+ if (run->ContainsTextPosition (textPosition))
+ {
+ mCurrentRun = run;
+ return;
+ }
+ assert (0); // We should always be able to find the text position in one of our runs
+ }
+
+ void SplitCurrentRun (uint32_t splitPosition)
+ {
+ if (!mCurrentRun)
+ {
+ assert (0); // SplitCurrentRun called without current run
+ // Shouldn't be calling this when no current run is set!
+ return;
+ }
+ // Split the current run.
+ if (splitPosition <= mCurrentRun->mTextStart)
+ {
+ // No need to split, already the start of a run
+ // or before it. Usually the first.
+ return;
+ }
+ Run *newRun = new Run;
+
+ *newRun = *mCurrentRun;
+
+ // Insert the new run in our linked list.
+ newRun->nextRun = mCurrentRun->nextRun;
+ mCurrentRun->nextRun = newRun;
+
+ // Adjust runs' text positions and lengths.
+ uint32_t splitPoint = splitPosition - mCurrentRun->mTextStart;
+ newRun->mTextStart += splitPoint;
+ newRun->mTextLength -= splitPoint;
+ mCurrentRun->mTextLength = splitPoint;
+ mCurrentRun = newRun;
+ }
+
+protected:
+ // Input
+ // (weak references are fine here, since this class is a transient
+ // stack-based helper that doesn't need to copy data)
+ uint32_t mTextLength;
+ const wchar_t* mText;
+ const wchar_t* mLocaleName;
+ DWRITE_READING_DIRECTION mReadingDirection;
+
+ // Current processing state.
+ Run *mCurrentRun;
+
+ // Output is a list of runs starting here
+ Run mRunHead;
+};
+
+/*
+ * shaper
+ */
+
+static hb_bool_t
+_hb_directwrite_shape_full (hb_shape_plan_t *shape_plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer,
+ const hb_feature_t *features,
+ unsigned int num_features,
+ float lineWidth)
+{
+ hb_face_t *face = font->face;
+ const hb_directwrite_face_data_t *face_data = face->data.directwrite;
+ IDWriteFactory *dwriteFactory = face_data->dwriteFactory;
+ IDWriteFontFace *fontFace = face_data->fontFace;
+
+ IDWriteTextAnalyzer* analyzer;
+ dwriteFactory->CreateTextAnalyzer (&analyzer);
+
+ unsigned int scratch_size;
+ hb_buffer_t::scratch_buffer_t *scratch = buffer->get_scratch_buffer (&scratch_size);
+#define ALLOCATE_ARRAY(Type, name, len) \
+ Type *name = (Type *) scratch; \
+ do { \
+ unsigned int _consumed = DIV_CEIL ((len) * sizeof (Type), sizeof (*scratch)); \
+ assert (_consumed <= scratch_size); \
+ scratch += _consumed; \
+ scratch_size -= _consumed; \
+ } while (0)
+
+#define utf16_index() var1.u32
+
+ ALLOCATE_ARRAY (wchar_t, textString, buffer->len * 2);
+
+ unsigned int chars_len = 0;
+ for (unsigned int i = 0; i < buffer->len; i++)
+ {
+ hb_codepoint_t c = buffer->info[i].codepoint;
+ buffer->info[i].utf16_index () = chars_len;
+ if (likely (c <= 0xFFFFu))
+ textString[chars_len++] = c;
+ else if (unlikely (c > 0x10FFFFu))
+ textString[chars_len++] = 0xFFFDu;
+ else
+ {
+ textString[chars_len++] = 0xD800u + ((c - 0x10000u) >> 10);
+ textString[chars_len++] = 0xDC00u + ((c - 0x10000u) & ((1u << 10) - 1));
+ }
+ }
+
+ ALLOCATE_ARRAY (WORD, log_clusters, chars_len);
+ /* Need log_clusters to assign features. */
+ chars_len = 0;
+ for (unsigned int i = 0; i < buffer->len; i++)
+ {
+ hb_codepoint_t c = buffer->info[i].codepoint;
+ unsigned int cluster = buffer->info[i].cluster;
+ log_clusters[chars_len++] = cluster;
+ if (hb_in_range (c, 0x10000u, 0x10FFFFu))
+ log_clusters[chars_len++] = cluster; /* Surrogates. */
+ }
+
+ // TODO: Handle TEST_DISABLE_OPTIONAL_LIGATURES
+
+ DWRITE_READING_DIRECTION readingDirection;
+ readingDirection = buffer->props.direction ?
+ DWRITE_READING_DIRECTION_RIGHT_TO_LEFT :
+ DWRITE_READING_DIRECTION_LEFT_TO_RIGHT;
+
+ /*
+ * There's an internal 16-bit limit on some things inside the analyzer,
+ * but we never attempt to shape a word longer than 64K characters
+ * in a single gfxShapedWord, so we cannot exceed that limit.
+ */
+ uint32_t textLength = buffer->len;
+
+ TextAnalysis analysis (textString, textLength, nullptr, readingDirection);
+ TextAnalysis::Run *runHead;
+ HRESULT hr;
+ hr = analysis.GenerateResults (analyzer, &runHead);
+
+#define FAIL(...) \
+ HB_STMT_START { \
+ DEBUG_MSG (DIRECTWRITE, nullptr, __VA_ARGS__); \
+ return false; \
+ } HB_STMT_END
+
+ if (FAILED (hr))
+ FAIL ("Analyzer failed to generate results.");
+
+ uint32_t maxGlyphCount = 3 * textLength / 2 + 16;
+ uint32_t glyphCount;
+ bool isRightToLeft = HB_DIRECTION_IS_BACKWARD (buffer->props.direction);
+
+ const wchar_t localeName[20] = {0};
+ if (buffer->props.language)
+ mbstowcs ((wchar_t*) localeName,
+ hb_language_to_string (buffer->props.language), 20);
+
+ // TODO: it does work but doesn't care about ranges
+ DWRITE_TYPOGRAPHIC_FEATURES typographic_features;
+ typographic_features.featureCount = num_features;
+ if (num_features)
+ {
+ typographic_features.features = new DWRITE_FONT_FEATURE[num_features];
+ for (unsigned int i = 0; i < num_features; ++i)
+ {
+ typographic_features.features[i].nameTag = (DWRITE_FONT_FEATURE_TAG)
+ hb_uint32_swap (features[i].tag);
+ typographic_features.features[i].parameter = features[i].value;
+ }
+ }
+ const DWRITE_TYPOGRAPHIC_FEATURES* dwFeatures;
+ dwFeatures = (const DWRITE_TYPOGRAPHIC_FEATURES*) &typographic_features;
+ const uint32_t featureRangeLengths[] = { textLength };
+ //
+
+ uint16_t* clusterMap;
+ clusterMap = new uint16_t[textLength];
+ DWRITE_SHAPING_TEXT_PROPERTIES* textProperties;
+ textProperties = new DWRITE_SHAPING_TEXT_PROPERTIES[textLength];
+retry_getglyphs:
+ uint16_t* glyphIndices = new uint16_t[maxGlyphCount];
+ DWRITE_SHAPING_GLYPH_PROPERTIES* glyphProperties;
+ glyphProperties = new DWRITE_SHAPING_GLYPH_PROPERTIES[maxGlyphCount];
+
+ hr = analyzer->GetGlyphs (textString, textLength, fontFace, false,
+ isRightToLeft, &runHead->mScript, localeName,
+ nullptr, &dwFeatures, featureRangeLengths, 1,
+ maxGlyphCount, clusterMap, textProperties,
+ glyphIndices, glyphProperties, &glyphCount);
+
+ if (unlikely (hr == HRESULT_FROM_WIN32 (ERROR_INSUFFICIENT_BUFFER)))
+ {
+ delete [] glyphIndices;
+ delete [] glyphProperties;
+
+ maxGlyphCount *= 2;
+
+ goto retry_getglyphs;
+ }
+ if (FAILED (hr))
+ FAIL ("Analyzer failed to get glyphs.");
+
+ float* glyphAdvances = new float[maxGlyphCount];
+ DWRITE_GLYPH_OFFSET* glyphOffsets = new DWRITE_GLYPH_OFFSET[maxGlyphCount];
+
+ /* The -2 in the following is to compensate for possible
+ * alignment needed after the WORD array. sizeof (WORD) == 2. */
+ unsigned int glyphs_size = (scratch_size * sizeof (int) - 2)
+ / (sizeof (WORD) +
+ sizeof (DWRITE_SHAPING_GLYPH_PROPERTIES) +
+ sizeof (int) +
+ sizeof (DWRITE_GLYPH_OFFSET) +
+ sizeof (uint32_t));
+ ALLOCATE_ARRAY (uint32_t, vis_clusters, glyphs_size);
+
+#undef ALLOCATE_ARRAY
+
+ int fontEmSize = font->face->get_upem ();
+ if (fontEmSize < 0) fontEmSize = -fontEmSize;
+
+ if (fontEmSize < 0) fontEmSize = -fontEmSize;
+ double x_mult = (double) font->x_scale / fontEmSize;
+ double y_mult = (double) font->y_scale / fontEmSize;
+
+ hr = analyzer->GetGlyphPlacements (textString, clusterMap, textProperties,
+ textLength, glyphIndices, glyphProperties,
+ glyphCount, fontFace, fontEmSize,
+ false, isRightToLeft, &runHead->mScript, localeName,
+ &dwFeatures, featureRangeLengths, 1,
+ glyphAdvances, glyphOffsets);
+
+ if (FAILED (hr))
+ FAIL ("Analyzer failed to get glyph placements.");
+
+ IDWriteTextAnalyzer1* analyzer1;
+ analyzer->QueryInterface (&analyzer1);
+
+ if (analyzer1 && lineWidth)
+ {
+ DWRITE_JUSTIFICATION_OPPORTUNITY* justificationOpportunities =
+ new DWRITE_JUSTIFICATION_OPPORTUNITY[maxGlyphCount];
+ hr = analyzer1->GetJustificationOpportunities (fontFace, fontEmSize, runHead->mScript,
+ textLength, glyphCount, textString,
+ clusterMap, glyphProperties,
+ justificationOpportunities);
+
+ if (FAILED (hr))
+ FAIL ("Analyzer failed to get justification opportunities.");
+
+ float* justifiedGlyphAdvances = new float[maxGlyphCount];
+ DWRITE_GLYPH_OFFSET* justifiedGlyphOffsets = new DWRITE_GLYPH_OFFSET[glyphCount];
+ hr = analyzer1->JustifyGlyphAdvances (lineWidth, glyphCount, justificationOpportunities,
+ glyphAdvances, glyphOffsets, justifiedGlyphAdvances,
+ justifiedGlyphOffsets);
+
+ if (FAILED (hr)) FAIL ("Analyzer failed to get justify glyph advances.");
+
+ DWRITE_SCRIPT_PROPERTIES scriptProperties;
+ hr = analyzer1->GetScriptProperties (runHead->mScript, &scriptProperties);
+ if (FAILED (hr)) FAIL ("Analyzer failed to get script properties.");
+ uint32_t justificationCharacter = scriptProperties.justificationCharacter;
+
+ // if a script justificationCharacter is not space, it can have GetJustifiedGlyphs
+ if (justificationCharacter != 32)
+ {
+ uint16_t* modifiedClusterMap = new uint16_t[textLength];
+ retry_getjustifiedglyphs:
+ uint16_t* modifiedGlyphIndices = new uint16_t[maxGlyphCount];
+ float* modifiedGlyphAdvances = new float[maxGlyphCount];
+ DWRITE_GLYPH_OFFSET* modifiedGlyphOffsets = new DWRITE_GLYPH_OFFSET[maxGlyphCount];
+ uint32_t actualGlyphsCount;
+ hr = analyzer1->GetJustifiedGlyphs (fontFace, fontEmSize, runHead->mScript,
+ textLength, glyphCount, maxGlyphCount,
+ clusterMap, glyphIndices, glyphAdvances,
+ justifiedGlyphAdvances, justifiedGlyphOffsets,
+ glyphProperties, &actualGlyphsCount,
+ modifiedClusterMap, modifiedGlyphIndices,
+ modifiedGlyphAdvances, modifiedGlyphOffsets);
+
+ if (hr == HRESULT_FROM_WIN32 (ERROR_INSUFFICIENT_BUFFER))
+ {
+ maxGlyphCount = actualGlyphsCount;
+ delete [] modifiedGlyphIndices;
+ delete [] modifiedGlyphAdvances;
+ delete [] modifiedGlyphOffsets;
+
+ maxGlyphCount = actualGlyphsCount;
+
+ goto retry_getjustifiedglyphs;
+ }
+ if (FAILED (hr))
+ FAIL ("Analyzer failed to get justified glyphs.");
+
+ delete [] clusterMap;
+ delete [] glyphIndices;
+ delete [] glyphAdvances;
+ delete [] glyphOffsets;
+
+ glyphCount = actualGlyphsCount;
+ clusterMap = modifiedClusterMap;
+ glyphIndices = modifiedGlyphIndices;
+ glyphAdvances = modifiedGlyphAdvances;
+ glyphOffsets = modifiedGlyphOffsets;
+
+ delete [] justifiedGlyphAdvances;
+ delete [] justifiedGlyphOffsets;
+ }
+ else
+ {
+ delete [] glyphAdvances;
+ delete [] glyphOffsets;
+
+ glyphAdvances = justifiedGlyphAdvances;
+ glyphOffsets = justifiedGlyphOffsets;
+ }
+
+ delete [] justificationOpportunities;
+ }
+
+ /* Ok, we've got everything we need, now compose output buffer,
+ * very, *very*, carefully! */
+
+ /* Calculate visual-clusters. That's what we ship. */
+ for (unsigned int i = 0; i < glyphCount; i++)
+ vis_clusters[i] = (uint32_t) -1;
+ for (unsigned int i = 0; i < buffer->len; i++)
+ {
+ uint32_t *p =
+ &vis_clusters[log_clusters[buffer->info[i].utf16_index ()]];
+ *p = hb_min (*p, buffer->info[i].cluster);
+ }
+ for (unsigned int i = 1; i < glyphCount; i++)
+ if (vis_clusters[i] == (uint32_t) -1)
+ vis_clusters[i] = vis_clusters[i - 1];
+
+#undef utf16_index
+
+ if (unlikely (!buffer->ensure (glyphCount)))
+ FAIL ("Buffer in error");
+
+#undef FAIL
+
+ /* Set glyph infos */
+ buffer->len = 0;
+ for (unsigned int i = 0; i < glyphCount; i++)
+ {
+ hb_glyph_info_t *info = &buffer->info[buffer->len++];
+
+ info->codepoint = glyphIndices[i];
+ info->cluster = vis_clusters[i];
+
+ /* The rest is crap. Let's store position info there for now. */
+ info->mask = glyphAdvances[i];
+ info->var1.i32 = glyphOffsets[i].advanceOffset;
+ info->var2.i32 = glyphOffsets[i].ascenderOffset;
+ }
+
+ /* Set glyph positions */
+ buffer->clear_positions ();
+ for (unsigned int i = 0; i < glyphCount; i++)
+ {
+ hb_glyph_info_t *info = &buffer->info[i];
+ hb_glyph_position_t *pos = &buffer->pos[i];
+
+ /* TODO vertical */
+ pos->x_advance = x_mult * (int32_t) info->mask;
+ pos->x_offset = x_mult * (isRightToLeft ? -info->var1.i32 : info->var1.i32);
+ pos->y_offset = y_mult * info->var2.i32;
+ }
+
+ if (isRightToLeft) hb_buffer_reverse (buffer);
+
+ delete [] clusterMap;
+ delete [] glyphIndices;
+ delete [] textProperties;
+ delete [] glyphProperties;
+ delete [] glyphAdvances;
+ delete [] glyphOffsets;
+
+ if (num_features)
+ delete [] typographic_features.features;
+
+ /* Wow, done! */
+ return true;
+}
+
+hb_bool_t
+_hb_directwrite_shape (hb_shape_plan_t *shape_plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer,
+ const hb_feature_t *features,
+ unsigned int num_features)
+{
+ return _hb_directwrite_shape_full (shape_plan, font, buffer,
+ features, num_features, 0);
+}
+
+HB_UNUSED static bool
+_hb_directwrite_shape_experimental_width (hb_font_t *font,
+ hb_buffer_t *buffer,
+ const hb_feature_t *features,
+ unsigned int num_features,
+ float width)
+{
+ static const char *shapers = "directwrite";
+ hb_shape_plan_t *shape_plan;
+ shape_plan = hb_shape_plan_create_cached (font->face, &buffer->props,
+ features, num_features, &shapers);
+ hb_bool_t res = _hb_directwrite_shape_full (shape_plan, font, buffer,
+ features, num_features, width);
+
+ buffer->unsafe_to_break_all ();
+
+ return res;
+}
+
+struct _hb_directwrite_font_table_context {
+ IDWriteFontFace *face;
+ void *table_context;
+};
+
+static void
+_hb_directwrite_table_data_release (void *data)
+{
+ _hb_directwrite_font_table_context *context = (_hb_directwrite_font_table_context *) data;
+ context->face->ReleaseFontTable (context->table_context);
+ delete context;
+}
+
+static hb_blob_t *
+_hb_directwrite_reference_table (hb_face_t *face HB_UNUSED, hb_tag_t tag, void *user_data)
+{
+ IDWriteFontFace *dw_face = ((IDWriteFontFace *) user_data);
+ const void *data;
+ uint32_t length;
+ void *table_context;
+ BOOL exists;
+ if (!dw_face || FAILED (dw_face->TryGetFontTable (hb_uint32_swap (tag), &data,
+ &length, &table_context, &exists)))
+ return nullptr;
+
+ if (!data || !exists || !length)
+ {
+ dw_face->ReleaseFontTable (table_context);
+ return nullptr;
+ }
+
+ _hb_directwrite_font_table_context *context = new _hb_directwrite_font_table_context;
+ context->face = dw_face;
+ context->table_context = table_context;
+
+ return hb_blob_create ((const char *) data, length, HB_MEMORY_MODE_READONLY,
+ context, _hb_directwrite_table_data_release);
+}
+
+static void
+_hb_directwrite_font_release (void *data)
+{
+ if (data)
+ ((IDWriteFontFace *) data)->Release ();
+}
+
+/**
+ * hb_directwrite_face_create:
+ * @font_face: a DirectWrite IDWriteFontFace object.
+ *
+ * Return value: #hb_face_t object corresponding to the given input
+ *
+ * Since: 2.4.0
+ **/
+hb_face_t *
+hb_directwrite_face_create (IDWriteFontFace *font_face)
+{
+ if (font_face)
+ font_face->AddRef ();
+ return hb_face_create_for_tables (_hb_directwrite_reference_table, font_face,
+ _hb_directwrite_font_release);
+}
+
+/**
+* hb_directwrite_face_get_font_face:
+* @face: a #hb_face_t object
+*
+* Return value: DirectWrite IDWriteFontFace object corresponding to the given input
+*
+* Since: 2.5.0
+**/
+IDWriteFontFace *
+hb_directwrite_face_get_font_face (hb_face_t *face)
+{
+ return face->data.directwrite->fontFace;
+}
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-directwrite.h b/thirdparty/harfbuzz/src/hb-directwrite.h
new file mode 100644
index 0000000000..f837627a28
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-directwrite.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright © 2015-2019 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#ifndef HB_DIRECTWRITE_H
+#define HB_DIRECTWRITE_H
+
+#include "hb.h"
+
+HB_BEGIN_DECLS
+
+HB_EXTERN hb_face_t *
+hb_directwrite_face_create (IDWriteFontFace *font_face);
+
+HB_EXTERN IDWriteFontFace *
+hb_directwrite_face_get_font_face (hb_face_t *face);
+
+HB_END_DECLS
+
+#endif /* HB_DIRECTWRITE_H */
diff --git a/thirdparty/harfbuzz/src/hb-dispatch.hh b/thirdparty/harfbuzz/src/hb-dispatch.hh
new file mode 100644
index 0000000000..7eace86e54
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-dispatch.hh
@@ -0,0 +1,61 @@
+/*
+ * Copyright © 2007,2008,2009,2010 Red Hat, Inc.
+ * Copyright © 2012,2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_DISPATCH_HH
+#define HB_DISPATCH_HH
+
+#include "hb.hh"
+
+/*
+ * Dispatch
+ */
+
+template <typename Context, typename Return=hb_empty_t, unsigned int MaxDebugDepth=0>
+struct hb_dispatch_context_t
+{
+ hb_dispatch_context_t () : debug_depth (0) {}
+ private:
+ /* https://en.wikipedia.org/wiki/Curiously_recurring_template_pattern */
+ const Context* thiz () const { return static_cast<const Context *> (this); }
+ Context* thiz () { return static_cast< Context *> (this); }
+ public:
+ const char *get_name () { return "UNKNOWN"; }
+ static constexpr unsigned max_debug_depth = MaxDebugDepth;
+ typedef Return return_t;
+ template <typename T, typename F>
+ bool may_dispatch (const T *obj HB_UNUSED, const F *format HB_UNUSED) { return true; }
+ template <typename T, typename ...Ts>
+ return_t dispatch (const T &obj, Ts&&... ds)
+ { return obj.dispatch (thiz (), hb_forward<Ts> (ds)...); }
+ static return_t no_dispatch_return_value () { return Context::default_return_value (); }
+ static bool stop_sublookup_iteration (const return_t r HB_UNUSED) { return false; }
+ unsigned debug_depth;
+};
+
+
+#endif /* HB_DISPATCH_HH */
diff --git a/thirdparty/harfbuzz/src/hb-draw.cc b/thirdparty/harfbuzz/src/hb-draw.cc
new file mode 100644
index 0000000000..1a5f9c8c6b
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-draw.cc
@@ -0,0 +1,261 @@
+/*
+ * Copyright © 2019-2020 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_DRAW
+#ifdef HB_EXPERIMENTAL_API
+
+#include "hb-draw.hh"
+#include "hb-ot.h"
+#include "hb-ot-glyf-table.hh"
+#include "hb-ot-cff1-table.hh"
+#include "hb-ot-cff2-table.hh"
+
+/**
+ * hb_draw_funcs_set_move_to_func:
+ * @funcs: draw functions object
+ * @move_to: move-to callback
+ *
+ * Sets move-to callback to the draw functions object.
+ *
+ * Since: EXPERIMENTAL
+ **/
+void
+hb_draw_funcs_set_move_to_func (hb_draw_funcs_t *funcs,
+ hb_draw_move_to_func_t move_to)
+{
+ if (unlikely (hb_object_is_immutable (funcs))) return;
+ funcs->move_to = move_to;
+}
+
+/**
+ * hb_draw_funcs_set_line_to_func:
+ * @funcs: draw functions object
+ * @line_to: line-to callback
+ *
+ * Sets line-to callback to the draw functions object.
+ *
+ * Since: EXPERIMENTAL
+ **/
+void
+hb_draw_funcs_set_line_to_func (hb_draw_funcs_t *funcs,
+ hb_draw_line_to_func_t line_to)
+{
+ if (unlikely (hb_object_is_immutable (funcs))) return;
+ funcs->line_to = line_to;
+}
+
+/**
+ * hb_draw_funcs_set_quadratic_to_func:
+ * @funcs: draw functions object
+ * @move_to: quadratic-to callback
+ *
+ * Sets quadratic-to callback to the draw functions object.
+ *
+ * Since: EXPERIMENTAL
+ **/
+void
+hb_draw_funcs_set_quadratic_to_func (hb_draw_funcs_t *funcs,
+ hb_draw_quadratic_to_func_t quadratic_to)
+{
+ if (unlikely (hb_object_is_immutable (funcs))) return;
+ funcs->quadratic_to = quadratic_to;
+ funcs->is_quadratic_to_set = true;
+}
+
+/**
+ * hb_draw_funcs_set_cubic_to_func:
+ * @funcs: draw functions
+ * @cubic_to: cubic-to callback
+ *
+ * Sets cubic-to callback to the draw functions object.
+ *
+ * Since: EXPERIMENTAL
+ **/
+void
+hb_draw_funcs_set_cubic_to_func (hb_draw_funcs_t *funcs,
+ hb_draw_cubic_to_func_t cubic_to)
+{
+ if (unlikely (hb_object_is_immutable (funcs))) return;
+ funcs->cubic_to = cubic_to;
+}
+
+/**
+ * hb_draw_funcs_set_close_path_func:
+ * @funcs: draw functions object
+ * @close_path: close-path callback
+ *
+ * Sets close-path callback to the draw functions object.
+ *
+ * Since: EXPERIMENTAL
+ **/
+void
+hb_draw_funcs_set_close_path_func (hb_draw_funcs_t *funcs,
+ hb_draw_close_path_func_t close_path)
+{
+ if (unlikely (hb_object_is_immutable (funcs))) return;
+ funcs->close_path = close_path;
+}
+
+static void
+_move_to_nil (hb_position_t to_x HB_UNUSED, hb_position_t to_y HB_UNUSED, void *user_data HB_UNUSED) {}
+
+static void
+_line_to_nil (hb_position_t to_x HB_UNUSED, hb_position_t to_y HB_UNUSED, void *user_data HB_UNUSED) {}
+
+static void
+_quadratic_to_nil (hb_position_t control_x HB_UNUSED, hb_position_t control_y HB_UNUSED,
+ hb_position_t to_x HB_UNUSED, hb_position_t to_y HB_UNUSED,
+ void *user_data HB_UNUSED) {}
+
+static void
+_cubic_to_nil (hb_position_t control1_x HB_UNUSED, hb_position_t control1_y HB_UNUSED,
+ hb_position_t control2_x HB_UNUSED, hb_position_t control2_y HB_UNUSED,
+ hb_position_t to_x HB_UNUSED, hb_position_t to_y HB_UNUSED,
+ void *user_data HB_UNUSED) {}
+
+static void
+_close_path_nil (void *user_data HB_UNUSED) {}
+
+/**
+ * hb_draw_funcs_create:
+ *
+ * Creates a new draw callbacks object.
+ *
+ * Since: EXPERIMENTAL
+ **/
+hb_draw_funcs_t *
+hb_draw_funcs_create ()
+{
+ hb_draw_funcs_t *funcs;
+ if (unlikely (!(funcs = hb_object_create<hb_draw_funcs_t> ())))
+ return const_cast<hb_draw_funcs_t *> (&Null (hb_draw_funcs_t));
+
+ funcs->move_to = (hb_draw_move_to_func_t) _move_to_nil;
+ funcs->line_to = (hb_draw_line_to_func_t) _line_to_nil;
+ funcs->quadratic_to = (hb_draw_quadratic_to_func_t) _quadratic_to_nil;
+ funcs->is_quadratic_to_set = false;
+ funcs->cubic_to = (hb_draw_cubic_to_func_t) _cubic_to_nil;
+ funcs->close_path = (hb_draw_close_path_func_t) _close_path_nil;
+ return funcs;
+}
+
+/**
+ * hb_draw_funcs_reference:
+ * @funcs: draw functions
+ *
+ * Add to callbacks object refcount.
+ *
+ * Returns: The same object.
+ * Since: EXPERIMENTAL
+ **/
+hb_draw_funcs_t *
+hb_draw_funcs_reference (hb_draw_funcs_t *funcs)
+{
+ return hb_object_reference (funcs);
+}
+
+/**
+ * hb_draw_funcs_destroy:
+ * @funcs: draw functions
+ *
+ * Decreases refcount of callbacks object and deletes the object if it reaches
+ * to zero.
+ *
+ * Since: EXPERIMENTAL
+ **/
+void
+hb_draw_funcs_destroy (hb_draw_funcs_t *funcs)
+{
+ if (!hb_object_destroy (funcs)) return;
+
+ free (funcs);
+}
+
+/**
+ * hb_draw_funcs_make_immutable:
+ * @funcs: draw functions
+ *
+ * Makes funcs object immutable.
+ *
+ * Since: EXPERIMENTAL
+ **/
+void
+hb_draw_funcs_make_immutable (hb_draw_funcs_t *funcs)
+{
+ if (hb_object_is_immutable (funcs))
+ return;
+
+ hb_object_make_immutable (funcs);
+}
+
+/**
+ * hb_draw_funcs_is_immutable:
+ * @funcs: draw functions
+ *
+ * Checks whether funcs is immutable.
+ *
+ * Returns: If is immutable.
+ * Since: EXPERIMENTAL
+ **/
+hb_bool_t
+hb_draw_funcs_is_immutable (hb_draw_funcs_t *funcs)
+{
+ return hb_object_is_immutable (funcs);
+}
+
+/**
+ * hb_font_draw_glyph:
+ * @font: a font object
+ * @glyph: a glyph id
+ * @funcs: draw callbacks object
+ * @user_data: parameter you like be passed to the callbacks when are called
+ *
+ * Draw a glyph.
+ *
+ * Returns: Whether the font had the glyph and the operation completed successfully.
+ * Since: EXPERIMENTAL
+ **/
+hb_bool_t
+hb_font_draw_glyph (hb_font_t *font, hb_codepoint_t glyph,
+ const hb_draw_funcs_t *funcs,
+ void *user_data)
+{
+ if (unlikely (funcs == &Null (hb_draw_funcs_t) ||
+ glyph >= font->face->get_num_glyphs ()))
+ return false;
+
+ draw_helper_t draw_helper (funcs, user_data);
+ if (font->face->table.glyf->get_path (font, glyph, draw_helper)) return true;
+#ifndef HB_NO_CFF
+ if (font->face->table.cff1->get_path (font, glyph, draw_helper)) return true;
+ if (font->face->table.cff2->get_path (font, glyph, draw_helper)) return true;
+#endif
+
+ return false;
+}
+
+#endif
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-draw.h b/thirdparty/harfbuzz/src/hb-draw.h
new file mode 100644
index 0000000000..98eccf4c0c
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-draw.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright © 2019-2020 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#ifndef HB_H_IN
+#error "Include <hb.h> instead."
+#endif
+
+#ifndef HB_DRAW_H
+#define HB_DRAW_H
+
+#include "hb.h"
+
+HB_BEGIN_DECLS
+
+#ifdef HB_EXPERIMENTAL_API
+typedef void (*hb_draw_move_to_func_t) (hb_position_t to_x, hb_position_t to_y, void *user_data);
+typedef void (*hb_draw_line_to_func_t) (hb_position_t to_x, hb_position_t to_y, void *user_data);
+typedef void (*hb_draw_quadratic_to_func_t) (hb_position_t control_x, hb_position_t control_y,
+ hb_position_t to_x, hb_position_t to_y,
+ void *user_data);
+typedef void (*hb_draw_cubic_to_func_t) (hb_position_t control1_x, hb_position_t control1_y,
+ hb_position_t control2_x, hb_position_t control2_y,
+ hb_position_t to_x, hb_position_t to_y,
+ void *user_data);
+typedef void (*hb_draw_close_path_func_t) (void *user_data);
+
+/**
+ * hb_draw_funcs_t:
+ *
+ * Glyph draw callbacks.
+ *
+ * _move_to, _line_to and _cubic_to calls are nessecary to be defined but we
+ * translate _quadratic_to calls to _cubic_to if the callback isn't defined.
+ *
+ * Since: EXPERIMENTAL
+ **/
+typedef struct hb_draw_funcs_t hb_draw_funcs_t;
+
+HB_EXTERN void
+hb_draw_funcs_set_move_to_func (hb_draw_funcs_t *funcs,
+ hb_draw_move_to_func_t move_to);
+
+HB_EXTERN void
+hb_draw_funcs_set_line_to_func (hb_draw_funcs_t *funcs,
+ hb_draw_line_to_func_t line_to);
+
+HB_EXTERN void
+hb_draw_funcs_set_quadratic_to_func (hb_draw_funcs_t *funcs,
+ hb_draw_quadratic_to_func_t quadratic_to);
+
+HB_EXTERN void
+hb_draw_funcs_set_cubic_to_func (hb_draw_funcs_t *funcs,
+ hb_draw_cubic_to_func_t cubic_to);
+
+HB_EXTERN void
+hb_draw_funcs_set_close_path_func (hb_draw_funcs_t *funcs,
+ hb_draw_close_path_func_t close_path);
+
+HB_EXTERN hb_draw_funcs_t *
+hb_draw_funcs_create (void);
+
+HB_EXTERN hb_draw_funcs_t *
+hb_draw_funcs_reference (hb_draw_funcs_t *funcs);
+
+HB_EXTERN void
+hb_draw_funcs_destroy (hb_draw_funcs_t *funcs);
+
+HB_EXTERN void
+hb_draw_funcs_make_immutable (hb_draw_funcs_t *funcs);
+
+HB_EXTERN hb_bool_t
+hb_draw_funcs_is_immutable (hb_draw_funcs_t *funcs);
+#endif
+
+HB_END_DECLS
+
+#endif /* HB_DRAW_H */
diff --git a/thirdparty/harfbuzz/src/hb-draw.hh b/thirdparty/harfbuzz/src/hb-draw.hh
new file mode 100644
index 0000000000..2aa0a5b4db
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-draw.hh
@@ -0,0 +1,139 @@
+/*
+ * Copyright © 2020 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#ifndef HB_DRAW_HH
+#define HB_DRAW_HH
+
+#include "hb.hh"
+
+#ifdef HB_EXPERIMENTAL_API
+struct hb_draw_funcs_t
+{
+ hb_object_header_t header;
+
+ hb_draw_move_to_func_t move_to;
+ hb_draw_line_to_func_t line_to;
+ hb_draw_quadratic_to_func_t quadratic_to;
+ bool is_quadratic_to_set;
+ hb_draw_cubic_to_func_t cubic_to;
+ hb_draw_close_path_func_t close_path;
+};
+
+struct draw_helper_t
+{
+ draw_helper_t (const hb_draw_funcs_t *funcs_, void *user_data_)
+ {
+ funcs = funcs_;
+ user_data = user_data_;
+ path_open = false;
+ path_start_x = current_x = path_start_y = current_y = 0;
+ }
+ ~draw_helper_t () { end_path (); }
+
+ void move_to (hb_position_t x, hb_position_t y)
+ {
+ if (path_open) end_path ();
+ current_x = path_start_x = x;
+ current_y = path_start_y = y;
+ }
+
+ void line_to (hb_position_t x, hb_position_t y)
+ {
+ if (equal_to_current (x, y)) return;
+ if (!path_open) start_path ();
+ funcs->line_to (x, y, user_data);
+ current_x = x;
+ current_y = y;
+ }
+
+ void
+ quadratic_to (hb_position_t control_x, hb_position_t control_y,
+ hb_position_t to_x, hb_position_t to_y)
+ {
+ if (equal_to_current (control_x, control_y) && equal_to_current (to_x, to_y))
+ return;
+ if (!path_open) start_path ();
+ if (funcs->is_quadratic_to_set)
+ funcs->quadratic_to (control_x, control_y, to_x, to_y, user_data);
+ else
+ funcs->cubic_to (roundf ((current_x + 2.f * control_x) / 3.f),
+ roundf ((current_y + 2.f * control_y) / 3.f),
+ roundf ((to_x + 2.f * control_x) / 3.f),
+ roundf ((to_y + 2.f * control_y) / 3.f),
+ to_x, to_y, user_data);
+ current_x = to_x;
+ current_y = to_y;
+ }
+
+ void
+ cubic_to (hb_position_t control1_x, hb_position_t control1_y,
+ hb_position_t control2_x, hb_position_t control2_y,
+ hb_position_t to_x, hb_position_t to_y)
+ {
+ if (equal_to_current (control1_x, control1_y) &&
+ equal_to_current (control2_x, control2_y) &&
+ equal_to_current (to_x, to_y))
+ return;
+ if (!path_open) start_path ();
+ funcs->cubic_to (control1_x, control1_y, control2_x, control2_y, to_x, to_y, user_data);
+ current_x = to_x;
+ current_y = to_y;
+ }
+
+ void end_path ()
+ {
+ if (path_open)
+ {
+ if ((path_start_x != current_x) || (path_start_y != current_y))
+ funcs->line_to (path_start_x, path_start_y, user_data);
+ funcs->close_path (user_data);
+ }
+ path_open = false;
+ path_start_x = current_x = path_start_y = current_y = 0;
+ }
+
+ protected:
+ bool equal_to_current (hb_position_t x, hb_position_t y)
+ { return current_x == x && current_y == y; }
+
+ void start_path ()
+ {
+ if (path_open) end_path ();
+ path_open = true;
+ funcs->move_to (path_start_x, path_start_y, user_data);
+ }
+
+ hb_position_t path_start_x;
+ hb_position_t path_start_y;
+
+ hb_position_t current_x;
+ hb_position_t current_y;
+
+ bool path_open;
+ const hb_draw_funcs_t *funcs;
+ void *user_data;
+};
+#endif
+
+#endif /* HB_DRAW_HH */
diff --git a/thirdparty/harfbuzz/src/hb-face.cc b/thirdparty/harfbuzz/src/hb-face.cc
new file mode 100644
index 0000000000..7bde50df5b
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-face.cc
@@ -0,0 +1,733 @@
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ * Copyright © 2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#include "hb-face.hh"
+#include "hb-blob.hh"
+#include "hb-open-file.hh"
+#include "hb-ot-face.hh"
+#include "hb-ot-cmap-table.hh"
+
+
+/**
+ * SECTION:hb-face
+ * @title: hb-face
+ * @short_description: Font face objects
+ * @include: hb.h
+ *
+ * Font face is objects represent a single face in a font family.
+ * More exactly, a font face represents a single face in a binary font file.
+ * Font faces are typically built from a binary blob and a face index.
+ * Font faces are used to create fonts.
+ **/
+
+
+/**
+ * hb_face_count:
+ * @blob: a blob.
+ *
+ * Get number of faces in a blob.
+ *
+ * Return value: Number of faces in @blob
+ *
+ * Since: 1.7.7
+ **/
+unsigned int
+hb_face_count (hb_blob_t *blob)
+{
+ if (unlikely (!blob))
+ return 0;
+
+ /* TODO We shouldn't be sanitizing blob. Port to run sanitizer and return if not sane. */
+ /* Make API signature const after. */
+ hb_blob_t *sanitized = hb_sanitize_context_t ().sanitize_blob<OT::OpenTypeFontFile> (hb_blob_reference (blob));
+ const OT::OpenTypeFontFile& ot = *sanitized->as<OT::OpenTypeFontFile> ();
+ unsigned int ret = ot.get_face_count ();
+ hb_blob_destroy (sanitized);
+
+ return ret;
+}
+
+/*
+ * hb_face_t
+ */
+
+DEFINE_NULL_INSTANCE (hb_face_t) =
+{
+ HB_OBJECT_HEADER_STATIC,
+
+ nullptr, /* reference_table_func */
+ nullptr, /* user_data */
+ nullptr, /* destroy */
+
+ 0, /* index */
+ HB_ATOMIC_INT_INIT (1000), /* upem */
+ HB_ATOMIC_INT_INIT (0), /* num_glyphs */
+
+ /* Zero for the rest is fine. */
+};
+
+
+/**
+ * hb_face_create_for_tables:
+ * @reference_table_func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Return value: (transfer full)
+ *
+ * Since: 0.9.2
+ **/
+hb_face_t *
+hb_face_create_for_tables (hb_reference_table_func_t reference_table_func,
+ void *user_data,
+ hb_destroy_func_t destroy)
+{
+ hb_face_t *face;
+
+ if (!reference_table_func || !(face = hb_object_create<hb_face_t> ())) {
+ if (destroy)
+ destroy (user_data);
+ return hb_face_get_empty ();
+ }
+
+ face->reference_table_func = reference_table_func;
+ face->user_data = user_data;
+ face->destroy = destroy;
+
+ face->num_glyphs.set_relaxed (-1);
+
+ face->data.init0 (face);
+ face->table.init0 (face);
+
+ return face;
+}
+
+
+typedef struct hb_face_for_data_closure_t {
+ hb_blob_t *blob;
+ unsigned int index;
+} hb_face_for_data_closure_t;
+
+static hb_face_for_data_closure_t *
+_hb_face_for_data_closure_create (hb_blob_t *blob, unsigned int index)
+{
+ hb_face_for_data_closure_t *closure;
+
+ closure = (hb_face_for_data_closure_t *) calloc (1, sizeof (hb_face_for_data_closure_t));
+ if (unlikely (!closure))
+ return nullptr;
+
+ closure->blob = blob;
+ closure->index = index;
+
+ return closure;
+}
+
+static void
+_hb_face_for_data_closure_destroy (void *data)
+{
+ hb_face_for_data_closure_t *closure = (hb_face_for_data_closure_t *) data;
+
+ hb_blob_destroy (closure->blob);
+ free (closure);
+}
+
+static hb_blob_t *
+_hb_face_for_data_reference_table (hb_face_t *face HB_UNUSED, hb_tag_t tag, void *user_data)
+{
+ hb_face_for_data_closure_t *data = (hb_face_for_data_closure_t *) user_data;
+
+ if (tag == HB_TAG_NONE)
+ return hb_blob_reference (data->blob);
+
+ const OT::OpenTypeFontFile &ot_file = *data->blob->as<OT::OpenTypeFontFile> ();
+ unsigned int base_offset;
+ const OT::OpenTypeFontFace &ot_face = ot_file.get_face (data->index, &base_offset);
+
+ const OT::OpenTypeTable &table = ot_face.get_table_by_tag (tag);
+
+ hb_blob_t *blob = hb_blob_create_sub_blob (data->blob, base_offset + table.offset, table.length);
+
+ return blob;
+}
+
+/**
+ * hb_face_create: (Xconstructor)
+ * @blob:
+ * @index:
+ *
+ *
+ *
+ * Return value: (transfer full):
+ *
+ * Since: 0.9.2
+ **/
+hb_face_t *
+hb_face_create (hb_blob_t *blob,
+ unsigned int index)
+{
+ hb_face_t *face;
+
+ if (unlikely (!blob))
+ blob = hb_blob_get_empty ();
+
+ blob = hb_sanitize_context_t ().sanitize_blob<OT::OpenTypeFontFile> (hb_blob_reference (blob));
+
+ hb_face_for_data_closure_t *closure = _hb_face_for_data_closure_create (blob, index);
+
+ if (unlikely (!closure))
+ {
+ hb_blob_destroy (blob);
+ return hb_face_get_empty ();
+ }
+
+ face = hb_face_create_for_tables (_hb_face_for_data_reference_table,
+ closure,
+ _hb_face_for_data_closure_destroy);
+
+ face->index = index;
+
+ return face;
+}
+
+/**
+ * hb_face_get_empty:
+ *
+ *
+ *
+ * Return value: (transfer full)
+ *
+ * Since: 0.9.2
+ **/
+hb_face_t *
+hb_face_get_empty ()
+{
+ return const_cast<hb_face_t *> (&Null (hb_face_t));
+}
+
+
+/**
+ * hb_face_reference: (skip)
+ * @face: a face.
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_face_t *
+hb_face_reference (hb_face_t *face)
+{
+ return hb_object_reference (face);
+}
+
+/**
+ * hb_face_destroy: (skip)
+ * @face: a face.
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_face_destroy (hb_face_t *face)
+{
+ if (!hb_object_destroy (face)) return;
+
+ for (hb_face_t::plan_node_t *node = face->shape_plans; node; )
+ {
+ hb_face_t::plan_node_t *next = node->next;
+ hb_shape_plan_destroy (node->shape_plan);
+ free (node);
+ node = next;
+ }
+
+ face->data.fini ();
+ face->table.fini ();
+
+ if (face->destroy)
+ face->destroy (face->user_data);
+
+ free (face);
+}
+
+/**
+ * hb_face_set_user_data: (skip)
+ * @face: a face.
+ * @key:
+ * @data:
+ * @destroy:
+ * @replace:
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_face_set_user_data (hb_face_t *face,
+ hb_user_data_key_t *key,
+ void * data,
+ hb_destroy_func_t destroy,
+ hb_bool_t replace)
+{
+ return hb_object_set_user_data (face, key, data, destroy, replace);
+}
+
+/**
+ * hb_face_get_user_data: (skip)
+ * @face: a face.
+ * @key:
+ *
+ *
+ *
+ * Return value: (transfer none):
+ *
+ * Since: 0.9.2
+ **/
+void *
+hb_face_get_user_data (const hb_face_t *face,
+ hb_user_data_key_t *key)
+{
+ return hb_object_get_user_data (face, key);
+}
+
+/**
+ * hb_face_make_immutable:
+ * @face: a face.
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_face_make_immutable (hb_face_t *face)
+{
+ if (hb_object_is_immutable (face))
+ return;
+
+ hb_object_make_immutable (face);
+}
+
+/**
+ * hb_face_is_immutable:
+ * @face: a face.
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_face_is_immutable (const hb_face_t *face)
+{
+ return hb_object_is_immutable (face);
+}
+
+
+/**
+ * hb_face_reference_table:
+ * @face: a face.
+ * @tag:
+ *
+ *
+ *
+ * Return value: (transfer full):
+ *
+ * Since: 0.9.2
+ **/
+hb_blob_t *
+hb_face_reference_table (const hb_face_t *face,
+ hb_tag_t tag)
+{
+ if (unlikely (tag == HB_TAG_NONE))
+ return hb_blob_get_empty ();
+
+ return face->reference_table (tag);
+}
+
+/**
+ * hb_face_reference_blob:
+ * @face: a face.
+ *
+ *
+ *
+ * Return value: (transfer full):
+ *
+ * Since: 0.9.2
+ **/
+hb_blob_t *
+hb_face_reference_blob (hb_face_t *face)
+{
+ return face->reference_table (HB_TAG_NONE);
+}
+
+/**
+ * hb_face_set_index:
+ * @face: a face.
+ * @index:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_face_set_index (hb_face_t *face,
+ unsigned int index)
+{
+ if (hb_object_is_immutable (face))
+ return;
+
+ face->index = index;
+}
+
+/**
+ * hb_face_get_index:
+ * @face: a face.
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+unsigned int
+hb_face_get_index (const hb_face_t *face)
+{
+ return face->index;
+}
+
+/**
+ * hb_face_set_upem:
+ * @face: a face.
+ * @upem:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_face_set_upem (hb_face_t *face,
+ unsigned int upem)
+{
+ if (hb_object_is_immutable (face))
+ return;
+
+ face->upem.set_relaxed (upem);
+}
+
+/**
+ * hb_face_get_upem:
+ * @face: a face.
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+unsigned int
+hb_face_get_upem (const hb_face_t *face)
+{
+ return face->get_upem ();
+}
+
+/**
+ * hb_face_set_glyph_count:
+ * @face: a face.
+ * @glyph_count:
+ *
+ *
+ *
+ * Since: 0.9.7
+ **/
+void
+hb_face_set_glyph_count (hb_face_t *face,
+ unsigned int glyph_count)
+{
+ if (hb_object_is_immutable (face))
+ return;
+
+ face->num_glyphs.set_relaxed (glyph_count);
+}
+
+/**
+ * hb_face_get_glyph_count:
+ * @face: a face.
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.7
+ **/
+unsigned int
+hb_face_get_glyph_count (const hb_face_t *face)
+{
+ return face->get_num_glyphs ();
+}
+
+/**
+ * hb_face_get_table_tags:
+ * @face: a face.
+ * @start_offset: index of first tag to return.
+ * @table_count: input length of @table_tags array, output number of items written.
+ * @table_tags: array to write tags into.
+ *
+ * Retrieves table tags for a face, if possible.
+ *
+ * Return value: total number of tables, or 0 if not possible to list.
+ *
+ * Since: 1.6.0
+ **/
+unsigned int
+hb_face_get_table_tags (const hb_face_t *face,
+ unsigned int start_offset,
+ unsigned int *table_count, /* IN/OUT */
+ hb_tag_t *table_tags /* OUT */)
+{
+ if (face->destroy != (hb_destroy_func_t) _hb_face_for_data_closure_destroy)
+ {
+ if (table_count)
+ *table_count = 0;
+ return 0;
+ }
+
+ hb_face_for_data_closure_t *data = (hb_face_for_data_closure_t *) face->user_data;
+
+ const OT::OpenTypeFontFile &ot_file = *data->blob->as<OT::OpenTypeFontFile> ();
+ const OT::OpenTypeFontFace &ot_face = ot_file.get_face (data->index);
+
+ return ot_face.get_table_tags (start_offset, table_count, table_tags);
+}
+
+
+/*
+ * Character set.
+ */
+
+
+#ifndef HB_NO_FACE_COLLECT_UNICODES
+/**
+ * hb_face_collect_unicodes:
+ * @face: font face.
+ * @out: set to add Unicode characters covered by @face to.
+ *
+ * Since: 1.9.0
+ */
+void
+hb_face_collect_unicodes (hb_face_t *face,
+ hb_set_t *out)
+{
+ face->table.cmap->collect_unicodes (out, face->get_num_glyphs ());
+}
+/**
+ * hb_face_collect_variation_selectors:
+ * @face: font face.
+ * @out: set to add Variation Selector characters covered by @face to.
+ *
+ *
+ *
+ * Since: 1.9.0
+ */
+void
+hb_face_collect_variation_selectors (hb_face_t *face,
+ hb_set_t *out)
+{
+ face->table.cmap->collect_variation_selectors (out);
+}
+/**
+ * hb_face_collect_variation_unicodes:
+ * @face: font face.
+ * @out: set to add Unicode characters for @variation_selector covered by @face to.
+ *
+ *
+ *
+ * Since: 1.9.0
+ */
+void
+hb_face_collect_variation_unicodes (hb_face_t *face,
+ hb_codepoint_t variation_selector,
+ hb_set_t *out)
+{
+ face->table.cmap->collect_variation_unicodes (variation_selector, out);
+}
+#endif
+
+
+/*
+ * face-builder: A face that has add_table().
+ */
+
+struct hb_face_builder_data_t
+{
+ struct table_entry_t
+ {
+ int cmp (hb_tag_t t) const
+ {
+ if (t < tag) return -1;
+ if (t > tag) return -1;
+ return 0;
+ }
+
+ hb_tag_t tag;
+ hb_blob_t *blob;
+ };
+
+ hb_vector_t<table_entry_t> tables;
+};
+
+static hb_face_builder_data_t *
+_hb_face_builder_data_create ()
+{
+ hb_face_builder_data_t *data = (hb_face_builder_data_t *) calloc (1, sizeof (hb_face_builder_data_t));
+ if (unlikely (!data))
+ return nullptr;
+
+ data->tables.init ();
+
+ return data;
+}
+
+static void
+_hb_face_builder_data_destroy (void *user_data)
+{
+ hb_face_builder_data_t *data = (hb_face_builder_data_t *) user_data;
+
+ for (unsigned int i = 0; i < data->tables.length; i++)
+ hb_blob_destroy (data->tables[i].blob);
+
+ data->tables.fini ();
+
+ free (data);
+}
+
+static hb_blob_t *
+_hb_face_builder_data_reference_blob (hb_face_builder_data_t *data)
+{
+
+ unsigned int table_count = data->tables.length;
+ unsigned int face_length = table_count * 16 + 12;
+
+ for (unsigned int i = 0; i < table_count; i++)
+ face_length += hb_ceil_to_4 (hb_blob_get_length (data->tables[i].blob));
+
+ char *buf = (char *) malloc (face_length);
+ if (unlikely (!buf))
+ return nullptr;
+
+ hb_serialize_context_t c (buf, face_length);
+ c.propagate_error (data->tables);
+ OT::OpenTypeFontFile *f = c.start_serialize<OT::OpenTypeFontFile> ();
+
+ bool is_cff = data->tables.lsearch (HB_TAG ('C','F','F',' ')) || data->tables.lsearch (HB_TAG ('C','F','F','2'));
+ hb_tag_t sfnt_tag = is_cff ? OT::OpenTypeFontFile::CFFTag : OT::OpenTypeFontFile::TrueTypeTag;
+
+ bool ret = f->serialize_single (&c, sfnt_tag, data->tables.as_array ());
+
+ c.end_serialize ();
+
+ if (unlikely (!ret))
+ {
+ free (buf);
+ return nullptr;
+ }
+
+ return hb_blob_create (buf, face_length, HB_MEMORY_MODE_WRITABLE, buf, free);
+}
+
+static hb_blob_t *
+_hb_face_builder_reference_table (hb_face_t *face HB_UNUSED, hb_tag_t tag, void *user_data)
+{
+ hb_face_builder_data_t *data = (hb_face_builder_data_t *) user_data;
+
+ if (!tag)
+ return _hb_face_builder_data_reference_blob (data);
+
+ hb_face_builder_data_t::table_entry_t *entry = data->tables.lsearch (tag);
+ if (entry)
+ return hb_blob_reference (entry->blob);
+
+ return nullptr;
+}
+
+
+/**
+ * hb_face_builder_create:
+ *
+ * Creates a #hb_face_t that can be used with hb_face_builder_add_table().
+ * After tables are added to the face, it can be compiled to a binary
+ * font file by calling hb_face_reference_blob().
+ *
+ * Return value: (transfer full): New face.
+ *
+ * Since: 1.9.0
+ **/
+hb_face_t *
+hb_face_builder_create ()
+{
+ hb_face_builder_data_t *data = _hb_face_builder_data_create ();
+ if (unlikely (!data)) return hb_face_get_empty ();
+
+ return hb_face_create_for_tables (_hb_face_builder_reference_table,
+ data,
+ _hb_face_builder_data_destroy);
+}
+
+/**
+ * hb_face_builder_add_table:
+ *
+ * Add table for @tag with data provided by @blob to the face. @face must
+ * be created using hb_face_builder_create().
+ *
+ * Since: 1.9.0
+ **/
+hb_bool_t
+hb_face_builder_add_table (hb_face_t *face, hb_tag_t tag, hb_blob_t *blob)
+{
+ if (unlikely (face->destroy != (hb_destroy_func_t) _hb_face_builder_data_destroy))
+ return false;
+
+ hb_face_builder_data_t *data = (hb_face_builder_data_t *) face->user_data;
+
+ hb_face_builder_data_t::table_entry_t *entry = data->tables.push ();
+ if (data->tables.in_error())
+ return false;
+
+ entry->tag = tag;
+ entry->blob = hb_blob_reference (blob);
+
+ return true;
+}
diff --git a/thirdparty/harfbuzz/src/hb-face.h b/thirdparty/harfbuzz/src/hb-face.h
new file mode 100644
index 0000000000..e8ff090d55
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-face.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_H_IN
+#error "Include <hb.h> instead."
+#endif
+
+#ifndef HB_FACE_H
+#define HB_FACE_H
+
+#include "hb-common.h"
+#include "hb-blob.h"
+#include "hb-set.h"
+
+HB_BEGIN_DECLS
+
+
+HB_EXTERN unsigned int
+hb_face_count (hb_blob_t *blob);
+
+
+/*
+ * hb_face_t
+ */
+
+typedef struct hb_face_t hb_face_t;
+
+HB_EXTERN hb_face_t *
+hb_face_create (hb_blob_t *blob,
+ unsigned int index);
+
+typedef hb_blob_t * (*hb_reference_table_func_t) (hb_face_t *face, hb_tag_t tag, void *user_data);
+
+/* calls destroy() when not needing user_data anymore */
+HB_EXTERN hb_face_t *
+hb_face_create_for_tables (hb_reference_table_func_t reference_table_func,
+ void *user_data,
+ hb_destroy_func_t destroy);
+
+HB_EXTERN hb_face_t *
+hb_face_get_empty (void);
+
+HB_EXTERN hb_face_t *
+hb_face_reference (hb_face_t *face);
+
+HB_EXTERN void
+hb_face_destroy (hb_face_t *face);
+
+HB_EXTERN hb_bool_t
+hb_face_set_user_data (hb_face_t *face,
+ hb_user_data_key_t *key,
+ void * data,
+ hb_destroy_func_t destroy,
+ hb_bool_t replace);
+
+HB_EXTERN void *
+hb_face_get_user_data (const hb_face_t *face,
+ hb_user_data_key_t *key);
+
+HB_EXTERN void
+hb_face_make_immutable (hb_face_t *face);
+
+HB_EXTERN hb_bool_t
+hb_face_is_immutable (const hb_face_t *face);
+
+
+HB_EXTERN hb_blob_t *
+hb_face_reference_table (const hb_face_t *face,
+ hb_tag_t tag);
+
+HB_EXTERN hb_blob_t *
+hb_face_reference_blob (hb_face_t *face);
+
+HB_EXTERN void
+hb_face_set_index (hb_face_t *face,
+ unsigned int index);
+
+HB_EXTERN unsigned int
+hb_face_get_index (const hb_face_t *face);
+
+HB_EXTERN void
+hb_face_set_upem (hb_face_t *face,
+ unsigned int upem);
+
+HB_EXTERN unsigned int
+hb_face_get_upem (const hb_face_t *face);
+
+HB_EXTERN void
+hb_face_set_glyph_count (hb_face_t *face,
+ unsigned int glyph_count);
+
+HB_EXTERN unsigned int
+hb_face_get_glyph_count (const hb_face_t *face);
+
+HB_EXTERN unsigned int
+hb_face_get_table_tags (const hb_face_t *face,
+ unsigned int start_offset,
+ unsigned int *table_count, /* IN/OUT */
+ hb_tag_t *table_tags /* OUT */);
+
+
+/*
+ * Character set.
+ */
+
+HB_EXTERN void
+hb_face_collect_unicodes (hb_face_t *face,
+ hb_set_t *out);
+
+HB_EXTERN void
+hb_face_collect_variation_selectors (hb_face_t *face,
+ hb_set_t *out);
+
+HB_EXTERN void
+hb_face_collect_variation_unicodes (hb_face_t *face,
+ hb_codepoint_t variation_selector,
+ hb_set_t *out);
+
+
+/*
+ * Builder face.
+ */
+
+HB_EXTERN hb_face_t *
+hb_face_builder_create (void);
+
+HB_EXTERN hb_bool_t
+hb_face_builder_add_table (hb_face_t *face,
+ hb_tag_t tag,
+ hb_blob_t *blob);
+
+
+HB_END_DECLS
+
+#endif /* HB_FACE_H */
diff --git a/thirdparty/harfbuzz/src/hb-face.hh b/thirdparty/harfbuzz/src/hb-face.hh
new file mode 100644
index 0000000000..f1b472ccf3
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-face.hh
@@ -0,0 +1,109 @@
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ * Copyright © 2011 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_FACE_HH
+#define HB_FACE_HH
+
+#include "hb.hh"
+
+#include "hb-shaper.hh"
+#include "hb-shape-plan.hh"
+#include "hb-ot-face.hh"
+
+
+/*
+ * hb_face_t
+ */
+
+#define HB_SHAPER_IMPLEMENT(shaper) HB_SHAPER_DATA_INSTANTIATE_SHAPERS(shaper, face);
+#include "hb-shaper-list.hh"
+#undef HB_SHAPER_IMPLEMENT
+
+struct hb_face_t
+{
+ hb_object_header_t header;
+
+ hb_reference_table_func_t reference_table_func;
+ void *user_data;
+ hb_destroy_func_t destroy;
+
+ unsigned int index; /* Face index in a collection, zero-based. */
+ mutable hb_atomic_int_t upem; /* Units-per-EM. */
+ mutable hb_atomic_int_t num_glyphs; /* Number of glyphs. */
+
+ hb_shaper_object_dataset_t<hb_face_t> data;/* Various shaper data. */
+ hb_ot_face_t table; /* All the face's tables. */
+
+ /* Cache */
+ struct plan_node_t
+ {
+ hb_shape_plan_t *shape_plan;
+ plan_node_t *next;
+ };
+ hb_atomic_ptr_t<plan_node_t> shape_plans;
+
+ hb_blob_t *reference_table (hb_tag_t tag) const
+ {
+ hb_blob_t *blob;
+
+ if (unlikely (!reference_table_func))
+ return hb_blob_get_empty ();
+
+ blob = reference_table_func (/*XXX*/const_cast<hb_face_t *> (this), tag, user_data);
+ if (unlikely (!blob))
+ return hb_blob_get_empty ();
+
+ return blob;
+ }
+
+ HB_PURE_FUNC unsigned int get_upem () const
+ {
+ unsigned int ret = upem.get_relaxed ();
+ if (unlikely (!ret))
+ {
+ return load_upem ();
+ }
+ return ret;
+ }
+
+ unsigned int get_num_glyphs () const
+ {
+ unsigned int ret = num_glyphs.get_relaxed ();
+ if (unlikely (ret == UINT_MAX))
+ return load_num_glyphs ();
+ return ret;
+ }
+
+ private:
+ HB_INTERNAL unsigned int load_upem () const;
+ HB_INTERNAL unsigned int load_num_glyphs () const;
+};
+DECLARE_NULL_INSTANCE (hb_face_t);
+
+
+#endif /* HB_FACE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-fallback-shape.cc b/thirdparty/harfbuzz/src/hb-fallback-shape.cc
new file mode 100644
index 0000000000..c5b7c2c230
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-fallback-shape.cc
@@ -0,0 +1,125 @@
+/*
+ * Copyright © 2011 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb-shaper-impl.hh"
+
+#ifndef HB_NO_FALLBACK_SHAPE
+
+/*
+ * shaper face data
+ */
+
+struct hb_fallback_face_data_t {};
+
+hb_fallback_face_data_t *
+_hb_fallback_shaper_face_data_create (hb_face_t *face HB_UNUSED)
+{
+ return (hb_fallback_face_data_t *) HB_SHAPER_DATA_SUCCEEDED;
+}
+
+void
+_hb_fallback_shaper_face_data_destroy (hb_fallback_face_data_t *data HB_UNUSED)
+{
+}
+
+
+/*
+ * shaper font data
+ */
+
+struct hb_fallback_font_data_t {};
+
+hb_fallback_font_data_t *
+_hb_fallback_shaper_font_data_create (hb_font_t *font HB_UNUSED)
+{
+ return (hb_fallback_font_data_t *) HB_SHAPER_DATA_SUCCEEDED;
+}
+
+void
+_hb_fallback_shaper_font_data_destroy (hb_fallback_font_data_t *data HB_UNUSED)
+{
+}
+
+
+/*
+ * shaper
+ */
+
+hb_bool_t
+_hb_fallback_shape (hb_shape_plan_t *shape_plan HB_UNUSED,
+ hb_font_t *font,
+ hb_buffer_t *buffer,
+ const hb_feature_t *features HB_UNUSED,
+ unsigned int num_features HB_UNUSED)
+{
+ /* TODO
+ *
+ * - Apply fallback kern.
+ * - Handle Variation Selectors?
+ * - Apply normalization?
+ *
+ * This will make the fallback shaper into a dumb "TrueType"
+ * shaper which many people unfortunately still request.
+ */
+
+ hb_codepoint_t space;
+ bool has_space = (bool) font->get_nominal_glyph (' ', &space);
+
+ buffer->clear_positions ();
+
+ hb_direction_t direction = buffer->props.direction;
+ hb_unicode_funcs_t *unicode = buffer->unicode;
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ hb_glyph_position_t *pos = buffer->pos;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ if (has_space && unicode->is_default_ignorable (info[i].codepoint)) {
+ info[i].codepoint = space;
+ pos[i].x_advance = 0;
+ pos[i].y_advance = 0;
+ continue;
+ }
+ (void) font->get_nominal_glyph (info[i].codepoint, &info[i].codepoint);
+ font->get_glyph_advance_for_direction (info[i].codepoint,
+ direction,
+ &pos[i].x_advance,
+ &pos[i].y_advance);
+ font->subtract_glyph_origin_for_direction (info[i].codepoint,
+ direction,
+ &pos[i].x_offset,
+ &pos[i].y_offset);
+ }
+
+ if (HB_DIRECTION_IS_BACKWARD (direction))
+ hb_buffer_reverse (buffer);
+
+ buffer->safe_to_break_all ();
+
+ return true;
+}
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-font.cc b/thirdparty/harfbuzz/src/hb-font.cc
new file mode 100644
index 0000000000..27959487dc
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-font.cc
@@ -0,0 +1,2186 @@
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ * Copyright © 2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#include "hb-font.hh"
+#include "hb-machinery.hh"
+
+#include "hb-ot.h"
+
+#include "hb-ot-var-avar-table.hh"
+#include "hb-ot-var-fvar-table.hh"
+
+
+/**
+ * SECTION:hb-font
+ * @title: hb-font
+ * @short_description: Font objects
+ * @include: hb.h
+ *
+ * Font objects represent a font face at a certain size and other
+ * parameters (pixels per EM, points per EM, variation settings.)
+ * Fonts are created from font faces, and are used as input to
+ * hb_shape() among other things.
+ **/
+
+
+/*
+ * hb_font_funcs_t
+ */
+
+static hb_bool_t
+hb_font_get_font_h_extents_nil (hb_font_t *font HB_UNUSED,
+ void *font_data HB_UNUSED,
+ hb_font_extents_t *extents,
+ void *user_data HB_UNUSED)
+{
+ memset (extents, 0, sizeof (*extents));
+ return false;
+}
+static hb_bool_t
+hb_font_get_font_h_extents_default (hb_font_t *font,
+ void *font_data HB_UNUSED,
+ hb_font_extents_t *extents,
+ void *user_data HB_UNUSED)
+{
+ hb_bool_t ret = font->parent->get_font_h_extents (extents);
+ if (ret) {
+ extents->ascender = font->parent_scale_y_distance (extents->ascender);
+ extents->descender = font->parent_scale_y_distance (extents->descender);
+ extents->line_gap = font->parent_scale_y_distance (extents->line_gap);
+ }
+ return ret;
+}
+
+static hb_bool_t
+hb_font_get_font_v_extents_nil (hb_font_t *font HB_UNUSED,
+ void *font_data HB_UNUSED,
+ hb_font_extents_t *extents,
+ void *user_data HB_UNUSED)
+{
+ memset (extents, 0, sizeof (*extents));
+ return false;
+}
+static hb_bool_t
+hb_font_get_font_v_extents_default (hb_font_t *font,
+ void *font_data HB_UNUSED,
+ hb_font_extents_t *extents,
+ void *user_data HB_UNUSED)
+{
+ hb_bool_t ret = font->parent->get_font_v_extents (extents);
+ if (ret) {
+ extents->ascender = font->parent_scale_x_distance (extents->ascender);
+ extents->descender = font->parent_scale_x_distance (extents->descender);
+ extents->line_gap = font->parent_scale_x_distance (extents->line_gap);
+ }
+ return ret;
+}
+
+static hb_bool_t
+hb_font_get_nominal_glyph_nil (hb_font_t *font HB_UNUSED,
+ void *font_data HB_UNUSED,
+ hb_codepoint_t unicode HB_UNUSED,
+ hb_codepoint_t *glyph,
+ void *user_data HB_UNUSED)
+{
+ *glyph = 0;
+ return false;
+}
+static hb_bool_t
+hb_font_get_nominal_glyph_default (hb_font_t *font,
+ void *font_data HB_UNUSED,
+ hb_codepoint_t unicode,
+ hb_codepoint_t *glyph,
+ void *user_data HB_UNUSED)
+{
+ if (font->has_nominal_glyphs_func_set ())
+ {
+ return font->get_nominal_glyphs (1, &unicode, 0, glyph, 0);
+ }
+ return font->parent->get_nominal_glyph (unicode, glyph);
+}
+
+#define hb_font_get_nominal_glyphs_nil hb_font_get_nominal_glyphs_default
+static unsigned int
+hb_font_get_nominal_glyphs_default (hb_font_t *font,
+ void *font_data HB_UNUSED,
+ unsigned int count,
+ const hb_codepoint_t *first_unicode,
+ unsigned int unicode_stride,
+ hb_codepoint_t *first_glyph,
+ unsigned int glyph_stride,
+ void *user_data HB_UNUSED)
+{
+ if (font->has_nominal_glyph_func_set ())
+ {
+ for (unsigned int i = 0; i < count; i++)
+ {
+ if (!font->get_nominal_glyph (*first_unicode, first_glyph))
+ return i;
+
+ first_unicode = &StructAtOffsetUnaligned<hb_codepoint_t> (first_unicode, unicode_stride);
+ first_glyph = &StructAtOffsetUnaligned<hb_codepoint_t> (first_glyph, glyph_stride);
+ }
+ return count;
+ }
+
+ return font->parent->get_nominal_glyphs (count,
+ first_unicode, unicode_stride,
+ first_glyph, glyph_stride);
+}
+
+static hb_bool_t
+hb_font_get_variation_glyph_nil (hb_font_t *font HB_UNUSED,
+ void *font_data HB_UNUSED,
+ hb_codepoint_t unicode HB_UNUSED,
+ hb_codepoint_t variation_selector HB_UNUSED,
+ hb_codepoint_t *glyph,
+ void *user_data HB_UNUSED)
+{
+ *glyph = 0;
+ return false;
+}
+static hb_bool_t
+hb_font_get_variation_glyph_default (hb_font_t *font,
+ void *font_data HB_UNUSED,
+ hb_codepoint_t unicode,
+ hb_codepoint_t variation_selector,
+ hb_codepoint_t *glyph,
+ void *user_data HB_UNUSED)
+{
+ return font->parent->get_variation_glyph (unicode, variation_selector, glyph);
+}
+
+
+static hb_position_t
+hb_font_get_glyph_h_advance_nil (hb_font_t *font,
+ void *font_data HB_UNUSED,
+ hb_codepoint_t glyph HB_UNUSED,
+ void *user_data HB_UNUSED)
+{
+ return font->x_scale;
+}
+static hb_position_t
+hb_font_get_glyph_h_advance_default (hb_font_t *font,
+ void *font_data HB_UNUSED,
+ hb_codepoint_t glyph,
+ void *user_data HB_UNUSED)
+{
+ if (font->has_glyph_h_advances_func_set ())
+ {
+ hb_position_t ret;
+ font->get_glyph_h_advances (1, &glyph, 0, &ret, 0);
+ return ret;
+ }
+ return font->parent_scale_x_distance (font->parent->get_glyph_h_advance (glyph));
+}
+
+static hb_position_t
+hb_font_get_glyph_v_advance_nil (hb_font_t *font,
+ void *font_data HB_UNUSED,
+ hb_codepoint_t glyph HB_UNUSED,
+ void *user_data HB_UNUSED)
+{
+ /* TODO use font_extents.ascender+descender */
+ return font->y_scale;
+}
+static hb_position_t
+hb_font_get_glyph_v_advance_default (hb_font_t *font,
+ void *font_data HB_UNUSED,
+ hb_codepoint_t glyph,
+ void *user_data HB_UNUSED)
+{
+ if (font->has_glyph_v_advances_func_set ())
+ {
+ hb_position_t ret;
+ font->get_glyph_v_advances (1, &glyph, 0, &ret, 0);
+ return ret;
+ }
+ return font->parent_scale_y_distance (font->parent->get_glyph_v_advance (glyph));
+}
+
+#define hb_font_get_glyph_h_advances_nil hb_font_get_glyph_h_advances_default
+static void
+hb_font_get_glyph_h_advances_default (hb_font_t* font,
+ void* font_data HB_UNUSED,
+ unsigned int count,
+ const hb_codepoint_t *first_glyph,
+ unsigned int glyph_stride,
+ hb_position_t *first_advance,
+ unsigned int advance_stride,
+ void *user_data HB_UNUSED)
+{
+ if (font->has_glyph_h_advance_func_set ())
+ {
+ for (unsigned int i = 0; i < count; i++)
+ {
+ *first_advance = font->get_glyph_h_advance (*first_glyph);
+ first_glyph = &StructAtOffsetUnaligned<hb_codepoint_t> (first_glyph, glyph_stride);
+ first_advance = &StructAtOffsetUnaligned<hb_position_t> (first_advance, advance_stride);
+ }
+ return;
+ }
+
+ font->parent->get_glyph_h_advances (count,
+ first_glyph, glyph_stride,
+ first_advance, advance_stride);
+ for (unsigned int i = 0; i < count; i++)
+ {
+ *first_advance = font->parent_scale_x_distance (*first_advance);
+ first_advance = &StructAtOffsetUnaligned<hb_position_t> (first_advance, advance_stride);
+ }
+}
+
+#define hb_font_get_glyph_v_advances_nil hb_font_get_glyph_v_advances_default
+static void
+hb_font_get_glyph_v_advances_default (hb_font_t* font,
+ void* font_data HB_UNUSED,
+ unsigned int count,
+ const hb_codepoint_t *first_glyph,
+ unsigned int glyph_stride,
+ hb_position_t *first_advance,
+ unsigned int advance_stride,
+ void *user_data HB_UNUSED)
+{
+ if (font->has_glyph_v_advance_func_set ())
+ {
+ for (unsigned int i = 0; i < count; i++)
+ {
+ *first_advance = font->get_glyph_v_advance (*first_glyph);
+ first_glyph = &StructAtOffsetUnaligned<hb_codepoint_t> (first_glyph, glyph_stride);
+ first_advance = &StructAtOffsetUnaligned<hb_position_t> (first_advance, advance_stride);
+ }
+ return;
+ }
+
+ font->parent->get_glyph_v_advances (count,
+ first_glyph, glyph_stride,
+ first_advance, advance_stride);
+ for (unsigned int i = 0; i < count; i++)
+ {
+ *first_advance = font->parent_scale_y_distance (*first_advance);
+ first_advance = &StructAtOffsetUnaligned<hb_position_t> (first_advance, advance_stride);
+ }
+}
+
+static hb_bool_t
+hb_font_get_glyph_h_origin_nil (hb_font_t *font HB_UNUSED,
+ void *font_data HB_UNUSED,
+ hb_codepoint_t glyph HB_UNUSED,
+ hb_position_t *x,
+ hb_position_t *y,
+ void *user_data HB_UNUSED)
+{
+ *x = *y = 0;
+ return true;
+}
+static hb_bool_t
+hb_font_get_glyph_h_origin_default (hb_font_t *font,
+ void *font_data HB_UNUSED,
+ hb_codepoint_t glyph,
+ hb_position_t *x,
+ hb_position_t *y,
+ void *user_data HB_UNUSED)
+{
+ hb_bool_t ret = font->parent->get_glyph_h_origin (glyph, x, y);
+ if (ret)
+ font->parent_scale_position (x, y);
+ return ret;
+}
+
+static hb_bool_t
+hb_font_get_glyph_v_origin_nil (hb_font_t *font HB_UNUSED,
+ void *font_data HB_UNUSED,
+ hb_codepoint_t glyph HB_UNUSED,
+ hb_position_t *x,
+ hb_position_t *y,
+ void *user_data HB_UNUSED)
+{
+ *x = *y = 0;
+ return false;
+}
+static hb_bool_t
+hb_font_get_glyph_v_origin_default (hb_font_t *font,
+ void *font_data HB_UNUSED,
+ hb_codepoint_t glyph,
+ hb_position_t *x,
+ hb_position_t *y,
+ void *user_data HB_UNUSED)
+{
+ hb_bool_t ret = font->parent->get_glyph_v_origin (glyph, x, y);
+ if (ret)
+ font->parent_scale_position (x, y);
+ return ret;
+}
+
+static hb_position_t
+hb_font_get_glyph_h_kerning_nil (hb_font_t *font HB_UNUSED,
+ void *font_data HB_UNUSED,
+ hb_codepoint_t left_glyph HB_UNUSED,
+ hb_codepoint_t right_glyph HB_UNUSED,
+ void *user_data HB_UNUSED)
+{
+ return 0;
+}
+static hb_position_t
+hb_font_get_glyph_h_kerning_default (hb_font_t *font,
+ void *font_data HB_UNUSED,
+ hb_codepoint_t left_glyph,
+ hb_codepoint_t right_glyph,
+ void *user_data HB_UNUSED)
+{
+ return font->parent_scale_x_distance (font->parent->get_glyph_h_kerning (left_glyph, right_glyph));
+}
+
+#ifndef HB_DISABLE_DEPRECATED
+static hb_position_t
+hb_font_get_glyph_v_kerning_nil (hb_font_t *font HB_UNUSED,
+ void *font_data HB_UNUSED,
+ hb_codepoint_t top_glyph HB_UNUSED,
+ hb_codepoint_t bottom_glyph HB_UNUSED,
+ void *user_data HB_UNUSED)
+{
+ return 0;
+}
+static hb_position_t
+hb_font_get_glyph_v_kerning_default (hb_font_t *font,
+ void *font_data HB_UNUSED,
+ hb_codepoint_t top_glyph,
+ hb_codepoint_t bottom_glyph,
+ void *user_data HB_UNUSED)
+{
+ return font->parent_scale_y_distance (font->parent->get_glyph_v_kerning (top_glyph, bottom_glyph));
+}
+#endif
+
+static hb_bool_t
+hb_font_get_glyph_extents_nil (hb_font_t *font HB_UNUSED,
+ void *font_data HB_UNUSED,
+ hb_codepoint_t glyph HB_UNUSED,
+ hb_glyph_extents_t *extents,
+ void *user_data HB_UNUSED)
+{
+ memset (extents, 0, sizeof (*extents));
+ return false;
+}
+static hb_bool_t
+hb_font_get_glyph_extents_default (hb_font_t *font,
+ void *font_data HB_UNUSED,
+ hb_codepoint_t glyph,
+ hb_glyph_extents_t *extents,
+ void *user_data HB_UNUSED)
+{
+ hb_bool_t ret = font->parent->get_glyph_extents (glyph, extents);
+ if (ret) {
+ font->parent_scale_position (&extents->x_bearing, &extents->y_bearing);
+ font->parent_scale_distance (&extents->width, &extents->height);
+ }
+ return ret;
+}
+
+static hb_bool_t
+hb_font_get_glyph_contour_point_nil (hb_font_t *font HB_UNUSED,
+ void *font_data HB_UNUSED,
+ hb_codepoint_t glyph HB_UNUSED,
+ unsigned int point_index HB_UNUSED,
+ hb_position_t *x,
+ hb_position_t *y,
+ void *user_data HB_UNUSED)
+{
+ *x = *y = 0;
+ return false;
+}
+static hb_bool_t
+hb_font_get_glyph_contour_point_default (hb_font_t *font,
+ void *font_data HB_UNUSED,
+ hb_codepoint_t glyph,
+ unsigned int point_index,
+ hb_position_t *x,
+ hb_position_t *y,
+ void *user_data HB_UNUSED)
+{
+ hb_bool_t ret = font->parent->get_glyph_contour_point (glyph, point_index, x, y);
+ if (ret)
+ font->parent_scale_position (x, y);
+ return ret;
+}
+
+static hb_bool_t
+hb_font_get_glyph_name_nil (hb_font_t *font HB_UNUSED,
+ void *font_data HB_UNUSED,
+ hb_codepoint_t glyph HB_UNUSED,
+ char *name, unsigned int size,
+ void *user_data HB_UNUSED)
+{
+ if (size) *name = '\0';
+ return false;
+}
+static hb_bool_t
+hb_font_get_glyph_name_default (hb_font_t *font,
+ void *font_data HB_UNUSED,
+ hb_codepoint_t glyph,
+ char *name, unsigned int size,
+ void *user_data HB_UNUSED)
+{
+ return font->parent->get_glyph_name (glyph, name, size);
+}
+
+static hb_bool_t
+hb_font_get_glyph_from_name_nil (hb_font_t *font HB_UNUSED,
+ void *font_data HB_UNUSED,
+ const char *name HB_UNUSED,
+ int len HB_UNUSED, /* -1 means nul-terminated */
+ hb_codepoint_t *glyph,
+ void *user_data HB_UNUSED)
+{
+ *glyph = 0;
+ return false;
+}
+static hb_bool_t
+hb_font_get_glyph_from_name_default (hb_font_t *font,
+ void *font_data HB_UNUSED,
+ const char *name, int len, /* -1 means nul-terminated */
+ hb_codepoint_t *glyph,
+ void *user_data HB_UNUSED)
+{
+ return font->parent->get_glyph_from_name (name, len, glyph);
+}
+
+DEFINE_NULL_INSTANCE (hb_font_funcs_t) =
+{
+ HB_OBJECT_HEADER_STATIC,
+
+ {
+#define HB_FONT_FUNC_IMPLEMENT(name) nullptr,
+ HB_FONT_FUNCS_IMPLEMENT_CALLBACKS
+#undef HB_FONT_FUNC_IMPLEMENT
+ },
+ {
+#define HB_FONT_FUNC_IMPLEMENT(name) nullptr,
+ HB_FONT_FUNCS_IMPLEMENT_CALLBACKS
+#undef HB_FONT_FUNC_IMPLEMENT
+ },
+ {
+ {
+#define HB_FONT_FUNC_IMPLEMENT(name) hb_font_get_##name##_nil,
+ HB_FONT_FUNCS_IMPLEMENT_CALLBACKS
+#undef HB_FONT_FUNC_IMPLEMENT
+ }
+ }
+};
+
+static const hb_font_funcs_t _hb_font_funcs_default = {
+ HB_OBJECT_HEADER_STATIC,
+
+ {
+#define HB_FONT_FUNC_IMPLEMENT(name) nullptr,
+ HB_FONT_FUNCS_IMPLEMENT_CALLBACKS
+#undef HB_FONT_FUNC_IMPLEMENT
+ },
+ {
+#define HB_FONT_FUNC_IMPLEMENT(name) nullptr,
+ HB_FONT_FUNCS_IMPLEMENT_CALLBACKS
+#undef HB_FONT_FUNC_IMPLEMENT
+ },
+ {
+ {
+#define HB_FONT_FUNC_IMPLEMENT(name) hb_font_get_##name##_default,
+ HB_FONT_FUNCS_IMPLEMENT_CALLBACKS
+#undef HB_FONT_FUNC_IMPLEMENT
+ }
+ }
+};
+
+
+/**
+ * hb_font_funcs_create: (Xconstructor)
+ *
+ *
+ *
+ * Return value: (transfer full):
+ *
+ * Since: 0.9.2
+ **/
+hb_font_funcs_t *
+hb_font_funcs_create ()
+{
+ hb_font_funcs_t *ffuncs;
+
+ if (!(ffuncs = hb_object_create<hb_font_funcs_t> ()))
+ return hb_font_funcs_get_empty ();
+
+ ffuncs->get = _hb_font_funcs_default.get;
+
+ return ffuncs;
+}
+
+/**
+ * hb_font_funcs_get_empty:
+ *
+ *
+ *
+ * Return value: (transfer full):
+ *
+ * Since: 0.9.2
+ **/
+hb_font_funcs_t *
+hb_font_funcs_get_empty ()
+{
+ return const_cast<hb_font_funcs_t *> (&_hb_font_funcs_default);
+}
+
+/**
+ * hb_font_funcs_reference: (skip)
+ * @ffuncs: font functions.
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_font_funcs_t *
+hb_font_funcs_reference (hb_font_funcs_t *ffuncs)
+{
+ return hb_object_reference (ffuncs);
+}
+
+/**
+ * hb_font_funcs_destroy: (skip)
+ * @ffuncs: font functions.
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_font_funcs_destroy (hb_font_funcs_t *ffuncs)
+{
+ if (!hb_object_destroy (ffuncs)) return;
+
+#define HB_FONT_FUNC_IMPLEMENT(name) if (ffuncs->destroy.name) \
+ ffuncs->destroy.name (ffuncs->user_data.name);
+ HB_FONT_FUNCS_IMPLEMENT_CALLBACKS
+#undef HB_FONT_FUNC_IMPLEMENT
+
+ free (ffuncs);
+}
+
+/**
+ * hb_font_funcs_set_user_data: (skip)
+ * @ffuncs: font functions.
+ * @key:
+ * @data:
+ * @destroy:
+ * @replace:
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_font_funcs_set_user_data (hb_font_funcs_t *ffuncs,
+ hb_user_data_key_t *key,
+ void * data,
+ hb_destroy_func_t destroy,
+ hb_bool_t replace)
+{
+ return hb_object_set_user_data (ffuncs, key, data, destroy, replace);
+}
+
+/**
+ * hb_font_funcs_get_user_data: (skip)
+ * @ffuncs: font functions.
+ * @key:
+ *
+ *
+ *
+ * Return value: (transfer none):
+ *
+ * Since: 0.9.2
+ **/
+void *
+hb_font_funcs_get_user_data (hb_font_funcs_t *ffuncs,
+ hb_user_data_key_t *key)
+{
+ return hb_object_get_user_data (ffuncs, key);
+}
+
+
+/**
+ * hb_font_funcs_make_immutable:
+ * @ffuncs: font functions.
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_font_funcs_make_immutable (hb_font_funcs_t *ffuncs)
+{
+ if (hb_object_is_immutable (ffuncs))
+ return;
+
+ hb_object_make_immutable (ffuncs);
+}
+
+/**
+ * hb_font_funcs_is_immutable:
+ * @ffuncs: font functions.
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_font_funcs_is_immutable (hb_font_funcs_t *ffuncs)
+{
+ return hb_object_is_immutable (ffuncs);
+}
+
+
+#define HB_FONT_FUNC_IMPLEMENT(name) \
+ \
+void \
+hb_font_funcs_set_##name##_func (hb_font_funcs_t *ffuncs, \
+ hb_font_get_##name##_func_t func, \
+ void *user_data, \
+ hb_destroy_func_t destroy) \
+{ \
+ if (hb_object_is_immutable (ffuncs)) \
+ { \
+ if (destroy) \
+ destroy (user_data); \
+ return; \
+ } \
+ \
+ if (ffuncs->destroy.name) \
+ ffuncs->destroy.name (ffuncs->user_data.name); \
+ \
+ if (func) { \
+ ffuncs->get.f.name = func; \
+ ffuncs->user_data.name = user_data; \
+ ffuncs->destroy.name = destroy; \
+ } else { \
+ ffuncs->get.f.name = hb_font_get_##name##_default; \
+ ffuncs->user_data.name = nullptr; \
+ ffuncs->destroy.name = nullptr; \
+ } \
+}
+
+HB_FONT_FUNCS_IMPLEMENT_CALLBACKS
+#undef HB_FONT_FUNC_IMPLEMENT
+
+bool
+hb_font_t::has_func_set (unsigned int i)
+{
+ return this->klass->get.array[i] != _hb_font_funcs_default.get.array[i];
+}
+
+bool
+hb_font_t::has_func (unsigned int i)
+{
+ return has_func_set (i) ||
+ (parent && parent != &_hb_Null_hb_font_t && parent->has_func (i));
+}
+
+/* Public getters */
+
+/**
+ * hb_font_get_h_extents:
+ * @font: a font.
+ * @extents: (out):
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 1.1.3
+ **/
+hb_bool_t
+hb_font_get_h_extents (hb_font_t *font,
+ hb_font_extents_t *extents)
+{
+ return font->get_font_h_extents (extents);
+}
+
+/**
+ * hb_font_get_v_extents:
+ * @font: a font.
+ * @extents: (out):
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 1.1.3
+ **/
+hb_bool_t
+hb_font_get_v_extents (hb_font_t *font,
+ hb_font_extents_t *extents)
+{
+ return font->get_font_v_extents (extents);
+}
+
+/**
+ * hb_font_get_glyph:
+ * @font: a font.
+ * @unicode:
+ * @variation_selector:
+ * @glyph: (out):
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_font_get_glyph (hb_font_t *font,
+ hb_codepoint_t unicode, hb_codepoint_t variation_selector,
+ hb_codepoint_t *glyph)
+{
+ if (unlikely (variation_selector))
+ return font->get_variation_glyph (unicode, variation_selector, glyph);
+ return font->get_nominal_glyph (unicode, glyph);
+}
+
+/**
+ * hb_font_get_nominal_glyph:
+ * @font: a font.
+ * @unicode:
+ * @glyph: (out):
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 1.2.3
+ **/
+hb_bool_t
+hb_font_get_nominal_glyph (hb_font_t *font,
+ hb_codepoint_t unicode,
+ hb_codepoint_t *glyph)
+{
+ return font->get_nominal_glyph (unicode, glyph);
+}
+
+/**
+ * hb_font_get_nominal_glyphs:
+ * @font: a font.
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 2.6.3
+ **/
+unsigned int
+hb_font_get_nominal_glyphs (hb_font_t *font,
+ unsigned int count,
+ const hb_codepoint_t *first_unicode,
+ unsigned int unicode_stride,
+ hb_codepoint_t *first_glyph,
+ unsigned int glyph_stride)
+{
+ return font->get_nominal_glyphs (count,
+ first_unicode, unicode_stride,
+ first_glyph, glyph_stride);
+}
+
+/**
+ * hb_font_get_variation_glyph:
+ * @font: a font.
+ * @unicode:
+ * @variation_selector:
+ * @glyph: (out):
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 1.2.3
+ **/
+hb_bool_t
+hb_font_get_variation_glyph (hb_font_t *font,
+ hb_codepoint_t unicode, hb_codepoint_t variation_selector,
+ hb_codepoint_t *glyph)
+{
+ return font->get_variation_glyph (unicode, variation_selector, glyph);
+}
+
+/**
+ * hb_font_get_glyph_h_advance:
+ * @font: a font.
+ * @glyph:
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_position_t
+hb_font_get_glyph_h_advance (hb_font_t *font,
+ hb_codepoint_t glyph)
+{
+ return font->get_glyph_h_advance (glyph);
+}
+
+/**
+ * hb_font_get_glyph_v_advance:
+ * @font: a font.
+ * @glyph:
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_position_t
+hb_font_get_glyph_v_advance (hb_font_t *font,
+ hb_codepoint_t glyph)
+{
+ return font->get_glyph_v_advance (glyph);
+}
+
+/**
+ * hb_font_get_glyph_h_advances:
+ * @font: a font.
+ *
+ *
+ *
+ * Since: 1.8.6
+ **/
+void
+hb_font_get_glyph_h_advances (hb_font_t* font,
+ unsigned int count,
+ const hb_codepoint_t *first_glyph,
+ unsigned glyph_stride,
+ hb_position_t *first_advance,
+ unsigned advance_stride)
+{
+ font->get_glyph_h_advances (count, first_glyph, glyph_stride, first_advance, advance_stride);
+}
+/**
+ * hb_font_get_glyph_v_advances:
+ * @font: a font.
+ *
+ *
+ *
+ * Since: 1.8.6
+ **/
+void
+hb_font_get_glyph_v_advances (hb_font_t* font,
+ unsigned int count,
+ const hb_codepoint_t *first_glyph,
+ unsigned glyph_stride,
+ hb_position_t *first_advance,
+ unsigned advance_stride)
+{
+ font->get_glyph_v_advances (count, first_glyph, glyph_stride, first_advance, advance_stride);
+}
+
+/**
+ * hb_font_get_glyph_h_origin:
+ * @font: a font.
+ * @glyph:
+ * @x: (out):
+ * @y: (out):
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_font_get_glyph_h_origin (hb_font_t *font,
+ hb_codepoint_t glyph,
+ hb_position_t *x, hb_position_t *y)
+{
+ return font->get_glyph_h_origin (glyph, x, y);
+}
+
+/**
+ * hb_font_get_glyph_v_origin:
+ * @font: a font.
+ * @glyph:
+ * @x: (out):
+ * @y: (out):
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_font_get_glyph_v_origin (hb_font_t *font,
+ hb_codepoint_t glyph,
+ hb_position_t *x, hb_position_t *y)
+{
+ return font->get_glyph_v_origin (glyph, x, y);
+}
+
+/**
+ * hb_font_get_glyph_h_kerning:
+ * @font: a font.
+ * @left_glyph:
+ * @right_glyph:
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_position_t
+hb_font_get_glyph_h_kerning (hb_font_t *font,
+ hb_codepoint_t left_glyph, hb_codepoint_t right_glyph)
+{
+ return font->get_glyph_h_kerning (left_glyph, right_glyph);
+}
+
+#ifndef HB_DISABLE_DEPRECATED
+/**
+ * hb_font_get_glyph_v_kerning:
+ * @font: a font.
+ * @top_glyph:
+ * @bottom_glyph:
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ * Deprecated: 2.0.0
+ **/
+hb_position_t
+hb_font_get_glyph_v_kerning (hb_font_t *font,
+ hb_codepoint_t top_glyph, hb_codepoint_t bottom_glyph)
+{
+ return font->get_glyph_v_kerning (top_glyph, bottom_glyph);
+}
+#endif
+
+/**
+ * hb_font_get_glyph_extents:
+ * @font: a font.
+ * @glyph:
+ * @extents: (out):
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_font_get_glyph_extents (hb_font_t *font,
+ hb_codepoint_t glyph,
+ hb_glyph_extents_t *extents)
+{
+ return font->get_glyph_extents (glyph, extents);
+}
+
+/**
+ * hb_font_get_glyph_contour_point:
+ * @font: a font.
+ * @glyph:
+ * @point_index:
+ * @x: (out):
+ * @y: (out):
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_font_get_glyph_contour_point (hb_font_t *font,
+ hb_codepoint_t glyph, unsigned int point_index,
+ hb_position_t *x, hb_position_t *y)
+{
+ return font->get_glyph_contour_point (glyph, point_index, x, y);
+}
+
+/**
+ * hb_font_get_glyph_name:
+ * @font: a font.
+ * @glyph:
+ * @name: (array length=size):
+ * @size:
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_font_get_glyph_name (hb_font_t *font,
+ hb_codepoint_t glyph,
+ char *name, unsigned int size)
+{
+ return font->get_glyph_name (glyph, name, size);
+}
+
+/**
+ * hb_font_get_glyph_from_name:
+ * @font: a font.
+ * @name: (array length=len):
+ * @len:
+ * @glyph: (out):
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_font_get_glyph_from_name (hb_font_t *font,
+ const char *name, int len, /* -1 means nul-terminated */
+ hb_codepoint_t *glyph)
+{
+ return font->get_glyph_from_name (name, len, glyph);
+}
+
+
+/* A bit higher-level, and with fallback */
+
+/**
+ * hb_font_get_extents_for_direction:
+ * @font: a font.
+ * @direction:
+ * @extents: (out):
+ *
+ *
+ *
+ * Since: 1.1.3
+ **/
+void
+hb_font_get_extents_for_direction (hb_font_t *font,
+ hb_direction_t direction,
+ hb_font_extents_t *extents)
+{
+ return font->get_extents_for_direction (direction, extents);
+}
+/**
+ * hb_font_get_glyph_advance_for_direction:
+ * @font: a font.
+ * @glyph:
+ * @direction:
+ * @x: (out):
+ * @y: (out):
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_font_get_glyph_advance_for_direction (hb_font_t *font,
+ hb_codepoint_t glyph,
+ hb_direction_t direction,
+ hb_position_t *x, hb_position_t *y)
+{
+ return font->get_glyph_advance_for_direction (glyph, direction, x, y);
+}
+/**
+ * hb_font_get_glyph_advances_for_direction:
+ * @font: a font.
+ * @direction:
+ *
+ *
+ *
+ * Since: 1.8.6
+ **/
+HB_EXTERN void
+hb_font_get_glyph_advances_for_direction (hb_font_t* font,
+ hb_direction_t direction,
+ unsigned int count,
+ const hb_codepoint_t *first_glyph,
+ unsigned glyph_stride,
+ hb_position_t *first_advance,
+ unsigned advance_stride)
+{
+ font->get_glyph_advances_for_direction (direction, count, first_glyph, glyph_stride, first_advance, advance_stride);
+}
+
+/**
+ * hb_font_get_glyph_origin_for_direction:
+ * @font: a font.
+ * @glyph:
+ * @direction:
+ * @x: (out):
+ * @y: (out):
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_font_get_glyph_origin_for_direction (hb_font_t *font,
+ hb_codepoint_t glyph,
+ hb_direction_t direction,
+ hb_position_t *x, hb_position_t *y)
+{
+ return font->get_glyph_origin_for_direction (glyph, direction, x, y);
+}
+
+/**
+ * hb_font_add_glyph_origin_for_direction:
+ * @font: a font.
+ * @glyph:
+ * @direction:
+ * @x: (out):
+ * @y: (out):
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_font_add_glyph_origin_for_direction (hb_font_t *font,
+ hb_codepoint_t glyph,
+ hb_direction_t direction,
+ hb_position_t *x, hb_position_t *y)
+{
+ return font->add_glyph_origin_for_direction (glyph, direction, x, y);
+}
+
+/**
+ * hb_font_subtract_glyph_origin_for_direction:
+ * @font: a font.
+ * @glyph:
+ * @direction:
+ * @x: (out):
+ * @y: (out):
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_font_subtract_glyph_origin_for_direction (hb_font_t *font,
+ hb_codepoint_t glyph,
+ hb_direction_t direction,
+ hb_position_t *x, hb_position_t *y)
+{
+ return font->subtract_glyph_origin_for_direction (glyph, direction, x, y);
+}
+
+/**
+ * hb_font_get_glyph_kerning_for_direction:
+ * @font: a font.
+ * @first_glyph:
+ * @second_glyph:
+ * @direction:
+ * @x: (out):
+ * @y: (out):
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_font_get_glyph_kerning_for_direction (hb_font_t *font,
+ hb_codepoint_t first_glyph, hb_codepoint_t second_glyph,
+ hb_direction_t direction,
+ hb_position_t *x, hb_position_t *y)
+{
+ return font->get_glyph_kerning_for_direction (first_glyph, second_glyph, direction, x, y);
+}
+
+/**
+ * hb_font_get_glyph_extents_for_origin:
+ * @font: a font.
+ * @glyph:
+ * @direction:
+ * @extents: (out):
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_font_get_glyph_extents_for_origin (hb_font_t *font,
+ hb_codepoint_t glyph,
+ hb_direction_t direction,
+ hb_glyph_extents_t *extents)
+{
+ return font->get_glyph_extents_for_origin (glyph, direction, extents);
+}
+
+/**
+ * hb_font_get_glyph_contour_point_for_origin:
+ * @font: a font.
+ * @glyph:
+ * @point_index:
+ * @direction:
+ * @x: (out):
+ * @y: (out):
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_font_get_glyph_contour_point_for_origin (hb_font_t *font,
+ hb_codepoint_t glyph, unsigned int point_index,
+ hb_direction_t direction,
+ hb_position_t *x, hb_position_t *y)
+{
+ return font->get_glyph_contour_point_for_origin (glyph, point_index, direction, x, y);
+}
+
+/* Generates gidDDD if glyph has no name. */
+/**
+ * hb_font_glyph_to_string:
+ * @font: a font.
+ * @glyph:
+ * @s: (array length=size):
+ * @size:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_font_glyph_to_string (hb_font_t *font,
+ hb_codepoint_t glyph,
+ char *s, unsigned int size)
+{
+ font->glyph_to_string (glyph, s, size);
+}
+
+/* Parses gidDDD and uniUUUU strings automatically. */
+/**
+ * hb_font_glyph_from_string:
+ * @font: a font.
+ * @s: (array length=len) (element-type uint8_t):
+ * @len:
+ * @glyph: (out):
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_font_glyph_from_string (hb_font_t *font,
+ const char *s, int len, /* -1 means nul-terminated */
+ hb_codepoint_t *glyph)
+{
+ return font->glyph_from_string (s, len, glyph);
+}
+
+
+/*
+ * hb_font_t
+ */
+
+DEFINE_NULL_INSTANCE (hb_font_t) =
+{
+ HB_OBJECT_HEADER_STATIC,
+
+ nullptr, /* parent */
+ const_cast<hb_face_t *> (&_hb_Null_hb_face_t),
+
+ 1000, /* x_scale */
+ 1000, /* y_scale */
+ 1<<16, /* x_mult */
+ 1<<16, /* y_mult */
+
+ 0, /* x_ppem */
+ 0, /* y_ppem */
+ 0, /* ptem */
+
+ 0, /* num_coords */
+ nullptr, /* coords */
+ nullptr, /* design_coords */
+
+ const_cast<hb_font_funcs_t *> (&_hb_Null_hb_font_funcs_t),
+
+ /* Zero for the rest is fine. */
+};
+
+
+static hb_font_t *
+_hb_font_create (hb_face_t *face)
+{
+ hb_font_t *font;
+
+ if (unlikely (!face))
+ face = hb_face_get_empty ();
+ if (!(font = hb_object_create<hb_font_t> ()))
+ return hb_font_get_empty ();
+
+ hb_face_make_immutable (face);
+ font->parent = hb_font_get_empty ();
+ font->face = hb_face_reference (face);
+ font->klass = hb_font_funcs_get_empty ();
+ font->data.init0 (font);
+ font->x_scale = font->y_scale = hb_face_get_upem (face);
+ font->x_mult = font->y_mult = 1 << 16;
+
+ return font;
+}
+
+/**
+ * hb_font_create: (Xconstructor)
+ * @face: a face.
+ *
+ *
+ *
+ * Return value: (transfer full):
+ *
+ * Since: 0.9.2
+ **/
+hb_font_t *
+hb_font_create (hb_face_t *face)
+{
+ hb_font_t *font = _hb_font_create (face);
+
+#ifndef HB_NO_OT_FONT
+ /* Install our in-house, very lightweight, funcs. */
+ hb_ot_font_set_funcs (font);
+#endif
+
+ return font;
+}
+
+static void
+_hb_font_adopt_var_coords (hb_font_t *font,
+ int *coords, /* 2.14 normalized */
+ float *design_coords,
+ unsigned int coords_length)
+{
+ free (font->coords);
+ free (font->design_coords);
+
+ font->coords = coords;
+ font->design_coords = design_coords;
+ font->num_coords = coords_length;
+}
+
+/**
+ * hb_font_create_sub_font:
+ * @parent: parent font.
+ *
+ *
+ *
+ * Return value: (transfer full):
+ *
+ * Since: 0.9.2
+ **/
+hb_font_t *
+hb_font_create_sub_font (hb_font_t *parent)
+{
+ if (unlikely (!parent))
+ parent = hb_font_get_empty ();
+
+ hb_font_t *font = _hb_font_create (parent->face);
+
+ if (unlikely (hb_object_is_immutable (font)))
+ return font;
+
+ font->parent = hb_font_reference (parent);
+
+ font->x_scale = parent->x_scale;
+ font->y_scale = parent->y_scale;
+ font->mults_changed ();
+ font->x_ppem = parent->x_ppem;
+ font->y_ppem = parent->y_ppem;
+ font->ptem = parent->ptem;
+
+ unsigned int num_coords = parent->num_coords;
+ if (num_coords)
+ {
+ int *coords = (int *) calloc (num_coords, sizeof (parent->coords[0]));
+ float *design_coords = (float *) calloc (num_coords, sizeof (parent->design_coords[0]));
+ if (likely (coords && design_coords))
+ {
+ memcpy (coords, parent->coords, num_coords * sizeof (parent->coords[0]));
+ memcpy (design_coords, parent->design_coords, num_coords * sizeof (parent->design_coords[0]));
+ _hb_font_adopt_var_coords (font, coords, design_coords, num_coords);
+ }
+ else
+ {
+ free (coords);
+ free (design_coords);
+ }
+ }
+
+ return font;
+}
+
+/**
+ * hb_font_get_empty:
+ *
+ *
+ *
+ * Return value: (transfer full)
+ *
+ * Since: 0.9.2
+ **/
+hb_font_t *
+hb_font_get_empty ()
+{
+ return const_cast<hb_font_t *> (&Null (hb_font_t));
+}
+
+/**
+ * hb_font_reference: (skip)
+ * @font: a font.
+ *
+ *
+ *
+ * Return value: (transfer full):
+ *
+ * Since: 0.9.2
+ **/
+hb_font_t *
+hb_font_reference (hb_font_t *font)
+{
+ return hb_object_reference (font);
+}
+
+/**
+ * hb_font_destroy: (skip)
+ * @font: a font.
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_font_destroy (hb_font_t *font)
+{
+ if (!hb_object_destroy (font)) return;
+
+ font->data.fini ();
+
+ if (font->destroy)
+ font->destroy (font->user_data);
+
+ hb_font_destroy (font->parent);
+ hb_face_destroy (font->face);
+ hb_font_funcs_destroy (font->klass);
+
+ free (font->coords);
+ free (font->design_coords);
+
+ free (font);
+}
+
+/**
+ * hb_font_set_user_data: (skip)
+ * @font: a font.
+ * @key:
+ * @data:
+ * @destroy:
+ * @replace:
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_font_set_user_data (hb_font_t *font,
+ hb_user_data_key_t *key,
+ void * data,
+ hb_destroy_func_t destroy,
+ hb_bool_t replace)
+{
+ return hb_object_set_user_data (font, key, data, destroy, replace);
+}
+
+/**
+ * hb_font_get_user_data: (skip)
+ * @font: a font.
+ * @key:
+ *
+ *
+ *
+ * Return value: (transfer none):
+ *
+ * Since: 0.9.2
+ **/
+void *
+hb_font_get_user_data (hb_font_t *font,
+ hb_user_data_key_t *key)
+{
+ return hb_object_get_user_data (font, key);
+}
+
+/**
+ * hb_font_make_immutable:
+ * @font: a font.
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_font_make_immutable (hb_font_t *font)
+{
+ if (hb_object_is_immutable (font))
+ return;
+
+ if (font->parent)
+ hb_font_make_immutable (font->parent);
+
+ hb_object_make_immutable (font);
+}
+
+/**
+ * hb_font_is_immutable:
+ * @font: a font.
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_font_is_immutable (hb_font_t *font)
+{
+ return hb_object_is_immutable (font);
+}
+
+/**
+ * hb_font_set_parent:
+ * @font: a font.
+ * @parent: new parent.
+ *
+ * Sets parent font of @font.
+ *
+ * Since: 1.0.5
+ **/
+void
+hb_font_set_parent (hb_font_t *font,
+ hb_font_t *parent)
+{
+ if (hb_object_is_immutable (font))
+ return;
+
+ if (!parent)
+ parent = hb_font_get_empty ();
+
+ hb_font_t *old = font->parent;
+
+ font->parent = hb_font_reference (parent);
+
+ hb_font_destroy (old);
+}
+
+/**
+ * hb_font_get_parent:
+ * @font: a font.
+ *
+ *
+ *
+ * Return value: (transfer none):
+ *
+ * Since: 0.9.2
+ **/
+hb_font_t *
+hb_font_get_parent (hb_font_t *font)
+{
+ return font->parent;
+}
+
+/**
+ * hb_font_set_face:
+ * @font: a font.
+ * @face: new face.
+ *
+ * Sets font-face of @font.
+ *
+ * Since: 1.4.3
+ **/
+void
+hb_font_set_face (hb_font_t *font,
+ hb_face_t *face)
+{
+ if (hb_object_is_immutable (font))
+ return;
+
+ if (unlikely (!face))
+ face = hb_face_get_empty ();
+
+ hb_face_t *old = font->face;
+
+ hb_face_make_immutable (face);
+ font->face = hb_face_reference (face);
+ font->mults_changed ();
+
+ hb_face_destroy (old);
+}
+
+/**
+ * hb_font_get_face:
+ * @font: a font.
+ *
+ *
+ *
+ * Return value: (transfer none):
+ *
+ * Since: 0.9.2
+ **/
+hb_face_t *
+hb_font_get_face (hb_font_t *font)
+{
+ return font->face;
+}
+
+
+/**
+ * hb_font_set_funcs:
+ * @font: a font.
+ * @klass: (closure font_data) (destroy destroy) (scope notified):
+ * @font_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_font_set_funcs (hb_font_t *font,
+ hb_font_funcs_t *klass,
+ void *font_data,
+ hb_destroy_func_t destroy)
+{
+ if (hb_object_is_immutable (font))
+ {
+ if (destroy)
+ destroy (font_data);
+ return;
+ }
+
+ if (font->destroy)
+ font->destroy (font->user_data);
+
+ if (!klass)
+ klass = hb_font_funcs_get_empty ();
+
+ hb_font_funcs_reference (klass);
+ hb_font_funcs_destroy (font->klass);
+ font->klass = klass;
+ font->user_data = font_data;
+ font->destroy = destroy;
+}
+
+/**
+ * hb_font_set_funcs_data:
+ * @font: a font.
+ * @font_data: (destroy destroy) (scope notified):
+ * @destroy:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_font_set_funcs_data (hb_font_t *font,
+ void *font_data,
+ hb_destroy_func_t destroy)
+{
+ /* Destroy user_data? */
+ if (hb_object_is_immutable (font))
+ {
+ if (destroy)
+ destroy (font_data);
+ return;
+ }
+
+ if (font->destroy)
+ font->destroy (font->user_data);
+
+ font->user_data = font_data;
+ font->destroy = destroy;
+}
+
+
+/**
+ * hb_font_set_scale:
+ * @font: a font.
+ * @x_scale:
+ * @y_scale:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_font_set_scale (hb_font_t *font,
+ int x_scale,
+ int y_scale)
+{
+ if (hb_object_is_immutable (font))
+ return;
+
+ font->x_scale = x_scale;
+ font->y_scale = y_scale;
+ font->mults_changed ();
+}
+
+/**
+ * hb_font_get_scale:
+ * @font: a font.
+ * @x_scale: (out):
+ * @y_scale: (out):
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_font_get_scale (hb_font_t *font,
+ int *x_scale,
+ int *y_scale)
+{
+ if (x_scale) *x_scale = font->x_scale;
+ if (y_scale) *y_scale = font->y_scale;
+}
+
+/**
+ * hb_font_set_ppem:
+ * @font: a font.
+ * @x_ppem:
+ * @y_ppem:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_font_set_ppem (hb_font_t *font,
+ unsigned int x_ppem,
+ unsigned int y_ppem)
+{
+ if (hb_object_is_immutable (font))
+ return;
+
+ font->x_ppem = x_ppem;
+ font->y_ppem = y_ppem;
+}
+
+/**
+ * hb_font_get_ppem:
+ * @font: a font.
+ * @x_ppem: (out):
+ * @y_ppem: (out):
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_font_get_ppem (hb_font_t *font,
+ unsigned int *x_ppem,
+ unsigned int *y_ppem)
+{
+ if (x_ppem) *x_ppem = font->x_ppem;
+ if (y_ppem) *y_ppem = font->y_ppem;
+}
+
+/**
+ * hb_font_set_ptem:
+ * @font: a font.
+ * @ptem: font size in points.
+ *
+ * Sets "point size" of the font. Set to 0 to unset.
+ *
+ * There are 72 points in an inch.
+ *
+ * Since: 1.6.0
+ **/
+void
+hb_font_set_ptem (hb_font_t *font, float ptem)
+{
+ if (hb_object_is_immutable (font))
+ return;
+
+ font->ptem = ptem;
+}
+
+/**
+ * hb_font_get_ptem:
+ * @font: a font.
+ *
+ * Gets the "point size" of the font. A value of 0 means unset.
+ *
+ * Return value: Point size.
+ *
+ * Since: 0.9.2
+ **/
+float
+hb_font_get_ptem (hb_font_t *font)
+{
+ return font->ptem;
+}
+
+#ifndef HB_NO_VAR
+/*
+ * Variations
+ */
+
+/**
+ * hb_font_set_variations:
+ *
+ * Since: 1.4.2
+ */
+void
+hb_font_set_variations (hb_font_t *font,
+ const hb_variation_t *variations,
+ unsigned int variations_length)
+{
+ if (hb_object_is_immutable (font))
+ return;
+
+ if (!variations_length)
+ {
+ hb_font_set_var_coords_normalized (font, nullptr, 0);
+ return;
+ }
+
+ unsigned int coords_length = hb_ot_var_get_axis_count (font->face);
+
+ int *normalized = coords_length ? (int *) calloc (coords_length, sizeof (int)) : nullptr;
+ float *design_coords = coords_length ? (float *) calloc (coords_length, sizeof (float)) : nullptr;
+
+ if (unlikely (coords_length && !(normalized && design_coords)))
+ {
+ free (normalized);
+ free (design_coords);
+ return;
+ }
+
+ const OT::fvar &fvar = *font->face->table.fvar;
+ for (unsigned int i = 0; i < variations_length; i++)
+ {
+ hb_ot_var_axis_info_t info;
+ if (hb_ot_var_find_axis_info (font->face, variations[i].tag, &info) &&
+ info.axis_index < coords_length)
+ {
+ float v = variations[i].value;
+ design_coords[info.axis_index] = v;
+ normalized[info.axis_index] = fvar.normalize_axis_value (info.axis_index, v);
+ }
+ }
+ font->face->table.avar->map_coords (normalized, coords_length);
+
+ _hb_font_adopt_var_coords (font, normalized, design_coords, coords_length);
+}
+
+/**
+ * hb_font_set_var_coords_design:
+ *
+ * Since: 1.4.2
+ */
+void
+hb_font_set_var_coords_design (hb_font_t *font,
+ const float *coords,
+ unsigned int coords_length)
+{
+ if (hb_object_is_immutable (font))
+ return;
+
+ int *normalized = coords_length ? (int *) calloc (coords_length, sizeof (int)) : nullptr;
+ float *design_coords = coords_length ? (float *) calloc (coords_length, sizeof (float)) : nullptr;
+
+ if (unlikely (coords_length && !(normalized && design_coords)))
+ {
+ free (normalized);
+ free (design_coords);
+ return;
+ }
+
+ if (coords_length)
+ memcpy (design_coords, coords, coords_length * sizeof (font->design_coords[0]));
+
+ hb_ot_var_normalize_coords (font->face, coords_length, coords, normalized);
+ _hb_font_adopt_var_coords (font, normalized, design_coords, coords_length);
+}
+
+/**
+ * hb_font_set_var_named_instance:
+ * @font: a font.
+ * @instance_index: named instance index.
+ *
+ * Sets design coords of a font from a named instance index.
+ *
+ * Since: 2.6.0
+ */
+void
+hb_font_set_var_named_instance (hb_font_t *font,
+ unsigned instance_index)
+{
+ if (hb_object_is_immutable (font))
+ return;
+
+ unsigned int coords_length = hb_ot_var_named_instance_get_design_coords (font->face, instance_index, nullptr, nullptr);
+
+ float *coords = coords_length ? (float *) calloc (coords_length, sizeof (float)) : nullptr;
+ if (unlikely (coords_length && !coords))
+ return;
+
+ hb_ot_var_named_instance_get_design_coords (font->face, instance_index, &coords_length, coords);
+ hb_font_set_var_coords_design (font, coords, coords_length);
+ free (coords);
+}
+
+/**
+ * hb_font_set_var_coords_normalized:
+ *
+ * Since: 1.4.2
+ */
+void
+hb_font_set_var_coords_normalized (hb_font_t *font,
+ const int *coords, /* 2.14 normalized */
+ unsigned int coords_length)
+{
+ if (hb_object_is_immutable (font))
+ return;
+
+ int *copy = coords_length ? (int *) calloc (coords_length, sizeof (coords[0])) : nullptr;
+ int *unmapped = coords_length ? (int *) calloc (coords_length, sizeof (coords[0])) : nullptr;
+ float *design_coords = coords_length ? (float *) calloc (coords_length, sizeof (design_coords[0])) : nullptr;
+
+ if (unlikely (coords_length && !(copy && unmapped && design_coords)))
+ {
+ free (copy);
+ free (unmapped);
+ free (design_coords);
+ return;
+ }
+
+ if (coords_length)
+ {
+ memcpy (copy, coords, coords_length * sizeof (coords[0]));
+ memcpy (unmapped, coords, coords_length * sizeof (coords[0]));
+ }
+
+ /* Best effort design coords simulation */
+ font->face->table.avar->unmap_coords (unmapped, coords_length);
+ for (unsigned int i = 0; i < coords_length; ++i)
+ design_coords[i] = font->face->table.fvar->unnormalize_axis_value (i, unmapped[i]);
+ free (unmapped);
+
+ _hb_font_adopt_var_coords (font, copy, design_coords, coords_length);
+}
+
+/**
+ * hb_font_get_var_coords_normalized:
+ *
+ * Return value is valid as long as variation coordinates of the font
+ * are not modified.
+ *
+ * Since: 1.4.2
+ */
+const int *
+hb_font_get_var_coords_normalized (hb_font_t *font,
+ unsigned int *length)
+{
+ if (length)
+ *length = font->num_coords;
+
+ return font->coords;
+}
+
+#ifdef HB_EXPERIMENTAL_API
+/**
+ * hb_font_get_var_coords_design:
+ *
+ * Return value is valid as long as variation coordinates of the font
+ * are not modified.
+ *
+ * Since: EXPERIMENTAL
+ */
+const float *
+hb_font_get_var_coords_design (hb_font_t *font,
+ unsigned int *length)
+{
+ if (length)
+ *length = font->num_coords;
+
+ return font->design_coords;
+}
+#endif
+#endif
+
+#ifndef HB_DISABLE_DEPRECATED
+/*
+ * Deprecated get_glyph_func():
+ */
+
+struct hb_trampoline_closure_t
+{
+ void *user_data;
+ hb_destroy_func_t destroy;
+ unsigned int ref_count;
+};
+
+template <typename FuncType>
+struct hb_trampoline_t
+{
+ hb_trampoline_closure_t closure; /* Must be first. */
+ FuncType func;
+};
+
+template <typename FuncType>
+static hb_trampoline_t<FuncType> *
+trampoline_create (FuncType func,
+ void *user_data,
+ hb_destroy_func_t destroy)
+{
+ typedef hb_trampoline_t<FuncType> trampoline_t;
+
+ trampoline_t *trampoline = (trampoline_t *) calloc (1, sizeof (trampoline_t));
+
+ if (unlikely (!trampoline))
+ return nullptr;
+
+ trampoline->closure.user_data = user_data;
+ trampoline->closure.destroy = destroy;
+ trampoline->closure.ref_count = 1;
+ trampoline->func = func;
+
+ return trampoline;
+}
+
+static void
+trampoline_reference (hb_trampoline_closure_t *closure)
+{
+ closure->ref_count++;
+}
+
+static void
+trampoline_destroy (void *user_data)
+{
+ hb_trampoline_closure_t *closure = (hb_trampoline_closure_t *) user_data;
+
+ if (--closure->ref_count)
+ return;
+
+ if (closure->destroy)
+ closure->destroy (closure->user_data);
+ free (closure);
+}
+
+typedef hb_trampoline_t<hb_font_get_glyph_func_t> hb_font_get_glyph_trampoline_t;
+
+static hb_bool_t
+hb_font_get_nominal_glyph_trampoline (hb_font_t *font,
+ void *font_data,
+ hb_codepoint_t unicode,
+ hb_codepoint_t *glyph,
+ void *user_data)
+{
+ hb_font_get_glyph_trampoline_t *trampoline = (hb_font_get_glyph_trampoline_t *) user_data;
+ return trampoline->func (font, font_data, unicode, 0, glyph, trampoline->closure.user_data);
+}
+
+static hb_bool_t
+hb_font_get_variation_glyph_trampoline (hb_font_t *font,
+ void *font_data,
+ hb_codepoint_t unicode,
+ hb_codepoint_t variation_selector,
+ hb_codepoint_t *glyph,
+ void *user_data)
+{
+ hb_font_get_glyph_trampoline_t *trampoline = (hb_font_get_glyph_trampoline_t *) user_data;
+ return trampoline->func (font, font_data, unicode, variation_selector, glyph, trampoline->closure.user_data);
+}
+
+/**
+ * hb_font_funcs_set_glyph_func:
+ * @ffuncs: font functions.
+ * @func: (closure user_data) (destroy destroy) (scope notified): callback function.
+ * @user_data: data to pass to @func.
+ * @destroy: function to call when @user_data is not needed anymore.
+ *
+ * Deprecated. Use hb_font_funcs_set_nominal_glyph_func() and
+ * hb_font_funcs_set_variation_glyph_func() instead.
+ *
+ * Since: 0.9.2
+ * Deprecated: 1.2.3
+ **/
+void
+hb_font_funcs_set_glyph_func (hb_font_funcs_t *ffuncs,
+ hb_font_get_glyph_func_t func,
+ void *user_data, hb_destroy_func_t destroy)
+{
+ if (hb_object_is_immutable (ffuncs))
+ {
+ if (destroy)
+ destroy (user_data);
+ return;
+ }
+
+ hb_font_get_glyph_trampoline_t *trampoline;
+
+ trampoline = trampoline_create (func, user_data, destroy);
+ if (unlikely (!trampoline))
+ {
+ if (destroy)
+ destroy (user_data);
+ return;
+ }
+
+ hb_font_funcs_set_nominal_glyph_func (ffuncs,
+ hb_font_get_nominal_glyph_trampoline,
+ trampoline,
+ trampoline_destroy);
+
+ trampoline_reference (&trampoline->closure);
+ hb_font_funcs_set_variation_glyph_func (ffuncs,
+ hb_font_get_variation_glyph_trampoline,
+ trampoline,
+ trampoline_destroy);
+}
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-font.h b/thirdparty/harfbuzz/src/hb-font.h
new file mode 100644
index 0000000000..e1a5719f1d
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-font.h
@@ -0,0 +1,735 @@
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_H_IN
+#error "Include <hb.h> instead."
+#endif
+
+#ifndef HB_FONT_H
+#define HB_FONT_H
+
+#include "hb-common.h"
+#include "hb-face.h"
+#include "hb-draw.h"
+
+HB_BEGIN_DECLS
+
+
+typedef struct hb_font_t hb_font_t;
+
+
+/*
+ * hb_font_funcs_t
+ */
+
+typedef struct hb_font_funcs_t hb_font_funcs_t;
+
+HB_EXTERN hb_font_funcs_t *
+hb_font_funcs_create (void);
+
+HB_EXTERN hb_font_funcs_t *
+hb_font_funcs_get_empty (void);
+
+HB_EXTERN hb_font_funcs_t *
+hb_font_funcs_reference (hb_font_funcs_t *ffuncs);
+
+HB_EXTERN void
+hb_font_funcs_destroy (hb_font_funcs_t *ffuncs);
+
+HB_EXTERN hb_bool_t
+hb_font_funcs_set_user_data (hb_font_funcs_t *ffuncs,
+ hb_user_data_key_t *key,
+ void * data,
+ hb_destroy_func_t destroy,
+ hb_bool_t replace);
+
+
+HB_EXTERN void *
+hb_font_funcs_get_user_data (hb_font_funcs_t *ffuncs,
+ hb_user_data_key_t *key);
+
+
+HB_EXTERN void
+hb_font_funcs_make_immutable (hb_font_funcs_t *ffuncs);
+
+HB_EXTERN hb_bool_t
+hb_font_funcs_is_immutable (hb_font_funcs_t *ffuncs);
+
+
+/* font and glyph extents */
+
+/* Note that typically ascender is positive and descender negative in coordinate systems that grow up. */
+typedef struct hb_font_extents_t
+{
+ hb_position_t ascender; /* typographic ascender. */
+ hb_position_t descender; /* typographic descender. */
+ hb_position_t line_gap; /* suggested line spacing gap. */
+ /*< private >*/
+ hb_position_t reserved9;
+ hb_position_t reserved8;
+ hb_position_t reserved7;
+ hb_position_t reserved6;
+ hb_position_t reserved5;
+ hb_position_t reserved4;
+ hb_position_t reserved3;
+ hb_position_t reserved2;
+ hb_position_t reserved1;
+} hb_font_extents_t;
+
+/* Note that height is negative in coordinate systems that grow up. */
+typedef struct hb_glyph_extents_t
+{
+ hb_position_t x_bearing; /* left side of glyph from origin. */
+ hb_position_t y_bearing; /* top side of glyph from origin. */
+ hb_position_t width; /* distance from left to right side. */
+ hb_position_t height; /* distance from top to bottom side. */
+} hb_glyph_extents_t;
+
+/* func types */
+
+typedef hb_bool_t (*hb_font_get_font_extents_func_t) (hb_font_t *font, void *font_data,
+ hb_font_extents_t *extents,
+ void *user_data);
+typedef hb_font_get_font_extents_func_t hb_font_get_font_h_extents_func_t;
+typedef hb_font_get_font_extents_func_t hb_font_get_font_v_extents_func_t;
+
+
+typedef hb_bool_t (*hb_font_get_nominal_glyph_func_t) (hb_font_t *font, void *font_data,
+ hb_codepoint_t unicode,
+ hb_codepoint_t *glyph,
+ void *user_data);
+typedef hb_bool_t (*hb_font_get_variation_glyph_func_t) (hb_font_t *font, void *font_data,
+ hb_codepoint_t unicode, hb_codepoint_t variation_selector,
+ hb_codepoint_t *glyph,
+ void *user_data);
+
+typedef unsigned int (*hb_font_get_nominal_glyphs_func_t) (hb_font_t *font, void *font_data,
+ unsigned int count,
+ const hb_codepoint_t *first_unicode,
+ unsigned int unicode_stride,
+ hb_codepoint_t *first_glyph,
+ unsigned int glyph_stride,
+ void *user_data);
+
+
+typedef hb_position_t (*hb_font_get_glyph_advance_func_t) (hb_font_t *font, void *font_data,
+ hb_codepoint_t glyph,
+ void *user_data);
+typedef hb_font_get_glyph_advance_func_t hb_font_get_glyph_h_advance_func_t;
+typedef hb_font_get_glyph_advance_func_t hb_font_get_glyph_v_advance_func_t;
+
+typedef void (*hb_font_get_glyph_advances_func_t) (hb_font_t* font, void* font_data,
+ unsigned int count,
+ const hb_codepoint_t *first_glyph,
+ unsigned glyph_stride,
+ hb_position_t *first_advance,
+ unsigned advance_stride,
+ void *user_data);
+typedef hb_font_get_glyph_advances_func_t hb_font_get_glyph_h_advances_func_t;
+typedef hb_font_get_glyph_advances_func_t hb_font_get_glyph_v_advances_func_t;
+
+typedef hb_bool_t (*hb_font_get_glyph_origin_func_t) (hb_font_t *font, void *font_data,
+ hb_codepoint_t glyph,
+ hb_position_t *x, hb_position_t *y,
+ void *user_data);
+typedef hb_font_get_glyph_origin_func_t hb_font_get_glyph_h_origin_func_t;
+typedef hb_font_get_glyph_origin_func_t hb_font_get_glyph_v_origin_func_t;
+
+typedef hb_position_t (*hb_font_get_glyph_kerning_func_t) (hb_font_t *font, void *font_data,
+ hb_codepoint_t first_glyph, hb_codepoint_t second_glyph,
+ void *user_data);
+typedef hb_font_get_glyph_kerning_func_t hb_font_get_glyph_h_kerning_func_t;
+
+
+typedef hb_bool_t (*hb_font_get_glyph_extents_func_t) (hb_font_t *font, void *font_data,
+ hb_codepoint_t glyph,
+ hb_glyph_extents_t *extents,
+ void *user_data);
+typedef hb_bool_t (*hb_font_get_glyph_contour_point_func_t) (hb_font_t *font, void *font_data,
+ hb_codepoint_t glyph, unsigned int point_index,
+ hb_position_t *x, hb_position_t *y,
+ void *user_data);
+
+
+typedef hb_bool_t (*hb_font_get_glyph_name_func_t) (hb_font_t *font, void *font_data,
+ hb_codepoint_t glyph,
+ char *name, unsigned int size,
+ void *user_data);
+typedef hb_bool_t (*hb_font_get_glyph_from_name_func_t) (hb_font_t *font, void *font_data,
+ const char *name, int len, /* -1 means nul-terminated */
+ hb_codepoint_t *glyph,
+ void *user_data);
+
+
+/* func setters */
+
+/**
+ * hb_font_funcs_set_font_h_extents_func:
+ * @ffuncs: font functions.
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 1.1.2
+ **/
+HB_EXTERN void
+hb_font_funcs_set_font_h_extents_func (hb_font_funcs_t *ffuncs,
+ hb_font_get_font_h_extents_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
+
+/**
+ * hb_font_funcs_set_font_v_extents_func:
+ * @ffuncs: font functions.
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 1.1.2
+ **/
+HB_EXTERN void
+hb_font_funcs_set_font_v_extents_func (hb_font_funcs_t *ffuncs,
+ hb_font_get_font_v_extents_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
+
+/**
+ * hb_font_funcs_set_nominal_glyph_func:
+ * @ffuncs: font functions.
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 1.2.3
+ **/
+HB_EXTERN void
+hb_font_funcs_set_nominal_glyph_func (hb_font_funcs_t *ffuncs,
+ hb_font_get_nominal_glyph_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
+
+/**
+ * hb_font_funcs_set_nominal_glyphs_func:
+ * @ffuncs: font functions.
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 2.0.0
+ **/
+HB_EXTERN void
+hb_font_funcs_set_nominal_glyphs_func (hb_font_funcs_t *ffuncs,
+ hb_font_get_nominal_glyphs_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
+
+/**
+ * hb_font_funcs_set_variation_glyph_func:
+ * @ffuncs: font functions.
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 1.2.3
+ **/
+HB_EXTERN void
+hb_font_funcs_set_variation_glyph_func (hb_font_funcs_t *ffuncs,
+ hb_font_get_variation_glyph_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
+
+/**
+ * hb_font_funcs_set_glyph_h_advance_func:
+ * @ffuncs: font functions.
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+HB_EXTERN void
+hb_font_funcs_set_glyph_h_advance_func (hb_font_funcs_t *ffuncs,
+ hb_font_get_glyph_h_advance_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
+
+/**
+ * hb_font_funcs_set_glyph_v_advance_func:
+ * @ffuncs: font functions.
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+HB_EXTERN void
+hb_font_funcs_set_glyph_v_advance_func (hb_font_funcs_t *ffuncs,
+ hb_font_get_glyph_v_advance_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
+
+/**
+ * hb_font_funcs_set_glyph_h_advances_func:
+ * @ffuncs: font functions.
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 1.8.6
+ **/
+HB_EXTERN void
+hb_font_funcs_set_glyph_h_advances_func (hb_font_funcs_t *ffuncs,
+ hb_font_get_glyph_h_advances_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
+
+/**
+ * hb_font_funcs_set_glyph_v_advances_func:
+ * @ffuncs: font functions.
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 1.8.6
+ **/
+HB_EXTERN void
+hb_font_funcs_set_glyph_v_advances_func (hb_font_funcs_t *ffuncs,
+ hb_font_get_glyph_v_advances_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
+
+/**
+ * hb_font_funcs_set_glyph_h_origin_func:
+ * @ffuncs: font functions.
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+HB_EXTERN void
+hb_font_funcs_set_glyph_h_origin_func (hb_font_funcs_t *ffuncs,
+ hb_font_get_glyph_h_origin_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
+
+/**
+ * hb_font_funcs_set_glyph_v_origin_func:
+ * @ffuncs: font functions.
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+HB_EXTERN void
+hb_font_funcs_set_glyph_v_origin_func (hb_font_funcs_t *ffuncs,
+ hb_font_get_glyph_v_origin_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
+
+/**
+ * hb_font_funcs_set_glyph_h_kerning_func:
+ * @ffuncs: font functions.
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+HB_EXTERN void
+hb_font_funcs_set_glyph_h_kerning_func (hb_font_funcs_t *ffuncs,
+ hb_font_get_glyph_h_kerning_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
+
+/**
+ * hb_font_funcs_set_glyph_extents_func:
+ * @ffuncs: font functions.
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+HB_EXTERN void
+hb_font_funcs_set_glyph_extents_func (hb_font_funcs_t *ffuncs,
+ hb_font_get_glyph_extents_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
+
+/**
+ * hb_font_funcs_set_glyph_contour_point_func:
+ * @ffuncs: font functions.
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+HB_EXTERN void
+hb_font_funcs_set_glyph_contour_point_func (hb_font_funcs_t *ffuncs,
+ hb_font_get_glyph_contour_point_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
+
+/**
+ * hb_font_funcs_set_glyph_name_func:
+ * @ffuncs: font functions.
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+HB_EXTERN void
+hb_font_funcs_set_glyph_name_func (hb_font_funcs_t *ffuncs,
+ hb_font_get_glyph_name_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
+
+/**
+ * hb_font_funcs_set_glyph_from_name_func:
+ * @ffuncs: font functions.
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+HB_EXTERN void
+hb_font_funcs_set_glyph_from_name_func (hb_font_funcs_t *ffuncs,
+ hb_font_get_glyph_from_name_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
+
+/* func dispatch */
+
+HB_EXTERN hb_bool_t
+hb_font_get_h_extents (hb_font_t *font,
+ hb_font_extents_t *extents);
+HB_EXTERN hb_bool_t
+hb_font_get_v_extents (hb_font_t *font,
+ hb_font_extents_t *extents);
+
+HB_EXTERN hb_bool_t
+hb_font_get_nominal_glyph (hb_font_t *font,
+ hb_codepoint_t unicode,
+ hb_codepoint_t *glyph);
+HB_EXTERN hb_bool_t
+hb_font_get_variation_glyph (hb_font_t *font,
+ hb_codepoint_t unicode, hb_codepoint_t variation_selector,
+ hb_codepoint_t *glyph);
+
+HB_EXTERN unsigned int
+hb_font_get_nominal_glyphs (hb_font_t *font,
+ unsigned int count,
+ const hb_codepoint_t *first_unicode,
+ unsigned int unicode_stride,
+ hb_codepoint_t *first_glyph,
+ unsigned int glyph_stride);
+
+HB_EXTERN hb_position_t
+hb_font_get_glyph_h_advance (hb_font_t *font,
+ hb_codepoint_t glyph);
+HB_EXTERN hb_position_t
+hb_font_get_glyph_v_advance (hb_font_t *font,
+ hb_codepoint_t glyph);
+
+HB_EXTERN void
+hb_font_get_glyph_h_advances (hb_font_t* font,
+ unsigned int count,
+ const hb_codepoint_t *first_glyph,
+ unsigned glyph_stride,
+ hb_position_t *first_advance,
+ unsigned advance_stride);
+HB_EXTERN void
+hb_font_get_glyph_v_advances (hb_font_t* font,
+ unsigned int count,
+ const hb_codepoint_t *first_glyph,
+ unsigned glyph_stride,
+ hb_position_t *first_advance,
+ unsigned advance_stride);
+
+HB_EXTERN hb_bool_t
+hb_font_get_glyph_h_origin (hb_font_t *font,
+ hb_codepoint_t glyph,
+ hb_position_t *x, hb_position_t *y);
+HB_EXTERN hb_bool_t
+hb_font_get_glyph_v_origin (hb_font_t *font,
+ hb_codepoint_t glyph,
+ hb_position_t *x, hb_position_t *y);
+
+HB_EXTERN hb_position_t
+hb_font_get_glyph_h_kerning (hb_font_t *font,
+ hb_codepoint_t left_glyph, hb_codepoint_t right_glyph);
+
+HB_EXTERN hb_bool_t
+hb_font_get_glyph_extents (hb_font_t *font,
+ hb_codepoint_t glyph,
+ hb_glyph_extents_t *extents);
+
+HB_EXTERN hb_bool_t
+hb_font_get_glyph_contour_point (hb_font_t *font,
+ hb_codepoint_t glyph, unsigned int point_index,
+ hb_position_t *x, hb_position_t *y);
+
+HB_EXTERN hb_bool_t
+hb_font_get_glyph_name (hb_font_t *font,
+ hb_codepoint_t glyph,
+ char *name, unsigned int size);
+HB_EXTERN hb_bool_t
+hb_font_get_glyph_from_name (hb_font_t *font,
+ const char *name, int len, /* -1 means nul-terminated */
+ hb_codepoint_t *glyph);
+
+
+/* high-level funcs, with fallback */
+
+/* Calls either hb_font_get_nominal_glyph() if variation_selector is 0,
+ * otherwise calls hb_font_get_variation_glyph(). */
+HB_EXTERN hb_bool_t
+hb_font_get_glyph (hb_font_t *font,
+ hb_codepoint_t unicode, hb_codepoint_t variation_selector,
+ hb_codepoint_t *glyph);
+
+HB_EXTERN void
+hb_font_get_extents_for_direction (hb_font_t *font,
+ hb_direction_t direction,
+ hb_font_extents_t *extents);
+HB_EXTERN void
+hb_font_get_glyph_advance_for_direction (hb_font_t *font,
+ hb_codepoint_t glyph,
+ hb_direction_t direction,
+ hb_position_t *x, hb_position_t *y);
+HB_EXTERN void
+hb_font_get_glyph_advances_for_direction (hb_font_t* font,
+ hb_direction_t direction,
+ unsigned int count,
+ const hb_codepoint_t *first_glyph,
+ unsigned glyph_stride,
+ hb_position_t *first_advance,
+ unsigned advance_stride);
+HB_EXTERN void
+hb_font_get_glyph_origin_for_direction (hb_font_t *font,
+ hb_codepoint_t glyph,
+ hb_direction_t direction,
+ hb_position_t *x, hb_position_t *y);
+HB_EXTERN void
+hb_font_add_glyph_origin_for_direction (hb_font_t *font,
+ hb_codepoint_t glyph,
+ hb_direction_t direction,
+ hb_position_t *x, hb_position_t *y);
+HB_EXTERN void
+hb_font_subtract_glyph_origin_for_direction (hb_font_t *font,
+ hb_codepoint_t glyph,
+ hb_direction_t direction,
+ hb_position_t *x, hb_position_t *y);
+
+HB_EXTERN void
+hb_font_get_glyph_kerning_for_direction (hb_font_t *font,
+ hb_codepoint_t first_glyph, hb_codepoint_t second_glyph,
+ hb_direction_t direction,
+ hb_position_t *x, hb_position_t *y);
+
+HB_EXTERN hb_bool_t
+hb_font_get_glyph_extents_for_origin (hb_font_t *font,
+ hb_codepoint_t glyph,
+ hb_direction_t direction,
+ hb_glyph_extents_t *extents);
+
+HB_EXTERN hb_bool_t
+hb_font_get_glyph_contour_point_for_origin (hb_font_t *font,
+ hb_codepoint_t glyph, unsigned int point_index,
+ hb_direction_t direction,
+ hb_position_t *x, hb_position_t *y);
+
+/* Generates gidDDD if glyph has no name. */
+HB_EXTERN void
+hb_font_glyph_to_string (hb_font_t *font,
+ hb_codepoint_t glyph,
+ char *s, unsigned int size);
+/* Parses gidDDD and uniUUUU strings automatically. */
+HB_EXTERN hb_bool_t
+hb_font_glyph_from_string (hb_font_t *font,
+ const char *s, int len, /* -1 means nul-terminated */
+ hb_codepoint_t *glyph);
+
+
+/*
+ * hb_font_t
+ */
+
+/* Fonts are very light-weight objects */
+
+HB_EXTERN hb_font_t *
+hb_font_create (hb_face_t *face);
+
+HB_EXTERN hb_font_t *
+hb_font_create_sub_font (hb_font_t *parent);
+
+HB_EXTERN hb_font_t *
+hb_font_get_empty (void);
+
+HB_EXTERN hb_font_t *
+hb_font_reference (hb_font_t *font);
+
+HB_EXTERN void
+hb_font_destroy (hb_font_t *font);
+
+HB_EXTERN hb_bool_t
+hb_font_set_user_data (hb_font_t *font,
+ hb_user_data_key_t *key,
+ void * data,
+ hb_destroy_func_t destroy,
+ hb_bool_t replace);
+
+
+HB_EXTERN void *
+hb_font_get_user_data (hb_font_t *font,
+ hb_user_data_key_t *key);
+
+HB_EXTERN void
+hb_font_make_immutable (hb_font_t *font);
+
+HB_EXTERN hb_bool_t
+hb_font_is_immutable (hb_font_t *font);
+
+HB_EXTERN void
+hb_font_set_parent (hb_font_t *font,
+ hb_font_t *parent);
+
+HB_EXTERN hb_font_t *
+hb_font_get_parent (hb_font_t *font);
+
+HB_EXTERN void
+hb_font_set_face (hb_font_t *font,
+ hb_face_t *face);
+
+HB_EXTERN hb_face_t *
+hb_font_get_face (hb_font_t *font);
+
+
+HB_EXTERN void
+hb_font_set_funcs (hb_font_t *font,
+ hb_font_funcs_t *klass,
+ void *font_data,
+ hb_destroy_func_t destroy);
+
+/* Be *very* careful with this function! */
+HB_EXTERN void
+hb_font_set_funcs_data (hb_font_t *font,
+ void *font_data,
+ hb_destroy_func_t destroy);
+
+
+HB_EXTERN void
+hb_font_set_scale (hb_font_t *font,
+ int x_scale,
+ int y_scale);
+
+HB_EXTERN void
+hb_font_get_scale (hb_font_t *font,
+ int *x_scale,
+ int *y_scale);
+
+/*
+ * A zero value means "no hinting in that direction"
+ */
+HB_EXTERN void
+hb_font_set_ppem (hb_font_t *font,
+ unsigned int x_ppem,
+ unsigned int y_ppem);
+
+HB_EXTERN void
+hb_font_get_ppem (hb_font_t *font,
+ unsigned int *x_ppem,
+ unsigned int *y_ppem);
+
+/*
+ * Point size per EM. Used for optical-sizing in CoreText.
+ * A value of zero means "not set".
+ */
+HB_EXTERN void
+hb_font_set_ptem (hb_font_t *font, float ptem);
+
+HB_EXTERN float
+hb_font_get_ptem (hb_font_t *font);
+
+HB_EXTERN void
+hb_font_set_variations (hb_font_t *font,
+ const hb_variation_t *variations,
+ unsigned int variations_length);
+
+HB_EXTERN void
+hb_font_set_var_coords_design (hb_font_t *font,
+ const float *coords,
+ unsigned int coords_length);
+
+#ifdef HB_EXPERIMENTAL_API
+HB_EXTERN const float *
+hb_font_get_var_coords_design (hb_font_t *font,
+ unsigned int *length);
+#endif
+
+HB_EXTERN void
+hb_font_set_var_coords_normalized (hb_font_t *font,
+ const int *coords, /* 2.14 normalized */
+ unsigned int coords_length);
+
+HB_EXTERN const int *
+hb_font_get_var_coords_normalized (hb_font_t *font,
+ unsigned int *length);
+
+HB_EXTERN void
+hb_font_set_var_named_instance (hb_font_t *font,
+ unsigned instance_index);
+
+#ifdef HB_EXPERIMENTAL_API
+HB_EXTERN hb_bool_t
+hb_font_draw_glyph (hb_font_t *font, hb_codepoint_t glyph,
+ const hb_draw_funcs_t *funcs, void *user_data);
+#endif
+
+HB_END_DECLS
+
+#endif /* HB_FONT_H */
diff --git a/thirdparty/harfbuzz/src/hb-font.hh b/thirdparty/harfbuzz/src/hb-font.hh
new file mode 100644
index 0000000000..8fc7f44d44
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-font.hh
@@ -0,0 +1,632 @@
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ * Copyright © 2011 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_FONT_HH
+#define HB_FONT_HH
+
+#include "hb.hh"
+
+#include "hb-face.hh"
+#include "hb-shaper.hh"
+
+
+/*
+ * hb_font_funcs_t
+ */
+
+#define HB_FONT_FUNCS_IMPLEMENT_CALLBACKS \
+ HB_FONT_FUNC_IMPLEMENT (font_h_extents) \
+ HB_FONT_FUNC_IMPLEMENT (font_v_extents) \
+ HB_FONT_FUNC_IMPLEMENT (nominal_glyph) \
+ HB_FONT_FUNC_IMPLEMENT (nominal_glyphs) \
+ HB_FONT_FUNC_IMPLEMENT (variation_glyph) \
+ HB_FONT_FUNC_IMPLEMENT (glyph_h_advance) \
+ HB_FONT_FUNC_IMPLEMENT (glyph_v_advance) \
+ HB_FONT_FUNC_IMPLEMENT (glyph_h_advances) \
+ HB_FONT_FUNC_IMPLEMENT (glyph_v_advances) \
+ HB_FONT_FUNC_IMPLEMENT (glyph_h_origin) \
+ HB_FONT_FUNC_IMPLEMENT (glyph_v_origin) \
+ HB_FONT_FUNC_IMPLEMENT (glyph_h_kerning) \
+ HB_IF_NOT_DEPRECATED (HB_FONT_FUNC_IMPLEMENT (glyph_v_kerning)) \
+ HB_FONT_FUNC_IMPLEMENT (glyph_extents) \
+ HB_FONT_FUNC_IMPLEMENT (glyph_contour_point) \
+ HB_FONT_FUNC_IMPLEMENT (glyph_name) \
+ HB_FONT_FUNC_IMPLEMENT (glyph_from_name) \
+ /* ^--- Add new callbacks here */
+
+struct hb_font_funcs_t
+{
+ hb_object_header_t header;
+
+ struct {
+#define HB_FONT_FUNC_IMPLEMENT(name) void *name;
+ HB_FONT_FUNCS_IMPLEMENT_CALLBACKS
+#undef HB_FONT_FUNC_IMPLEMENT
+ } user_data;
+
+ struct {
+#define HB_FONT_FUNC_IMPLEMENT(name) hb_destroy_func_t name;
+ HB_FONT_FUNCS_IMPLEMENT_CALLBACKS
+#undef HB_FONT_FUNC_IMPLEMENT
+ } destroy;
+
+ /* Don't access these directly. Call font->get_*() instead. */
+ union get_t {
+ struct get_funcs_t {
+#define HB_FONT_FUNC_IMPLEMENT(name) hb_font_get_##name##_func_t name;
+ HB_FONT_FUNCS_IMPLEMENT_CALLBACKS
+#undef HB_FONT_FUNC_IMPLEMENT
+ } f;
+ void (*array[0
+#define HB_FONT_FUNC_IMPLEMENT(name) +1
+ HB_FONT_FUNCS_IMPLEMENT_CALLBACKS
+#undef HB_FONT_FUNC_IMPLEMENT
+ ]) ();
+ } get;
+};
+DECLARE_NULL_INSTANCE (hb_font_funcs_t);
+
+
+/*
+ * hb_font_t
+ */
+
+#define HB_SHAPER_IMPLEMENT(shaper) HB_SHAPER_DATA_INSTANTIATE_SHAPERS(shaper, font);
+#include "hb-shaper-list.hh"
+#undef HB_SHAPER_IMPLEMENT
+
+struct hb_font_t
+{
+ hb_object_header_t header;
+
+ hb_font_t *parent;
+ hb_face_t *face;
+
+ int32_t x_scale;
+ int32_t y_scale;
+ int64_t x_mult;
+ int64_t y_mult;
+
+ unsigned int x_ppem;
+ unsigned int y_ppem;
+
+ float ptem;
+
+ /* Font variation coordinates. */
+ unsigned int num_coords;
+ int *coords;
+ float *design_coords;
+
+ hb_font_funcs_t *klass;
+ void *user_data;
+ hb_destroy_func_t destroy;
+
+ hb_shaper_object_dataset_t<hb_font_t> data; /* Various shaper data. */
+
+
+ /* Convert from font-space to user-space */
+ int64_t dir_mult (hb_direction_t direction)
+ { return HB_DIRECTION_IS_VERTICAL(direction) ? y_mult : x_mult; }
+ hb_position_t em_scale_x (int16_t v) { return em_mult (v, x_mult); }
+ hb_position_t em_scale_y (int16_t v) { return em_mult (v, y_mult); }
+ hb_position_t em_scalef_x (float v) { return em_scalef (v, x_scale); }
+ hb_position_t em_scalef_y (float v) { return em_scalef (v, y_scale); }
+ float em_fscale_x (int16_t v) { return em_fscale (v, x_scale); }
+ float em_fscale_y (int16_t v) { return em_fscale (v, y_scale); }
+ hb_position_t em_scale_dir (int16_t v, hb_direction_t direction)
+ { return em_mult (v, dir_mult (direction)); }
+
+ /* Convert from parent-font user-space to our user-space */
+ hb_position_t parent_scale_x_distance (hb_position_t v)
+ {
+ if (unlikely (parent && parent->x_scale != x_scale))
+ return (hb_position_t) (v * (int64_t) this->x_scale / this->parent->x_scale);
+ return v;
+ }
+ hb_position_t parent_scale_y_distance (hb_position_t v)
+ {
+ if (unlikely (parent && parent->y_scale != y_scale))
+ return (hb_position_t) (v * (int64_t) this->y_scale / this->parent->y_scale);
+ return v;
+ }
+ hb_position_t parent_scale_x_position (hb_position_t v)
+ { return parent_scale_x_distance (v); }
+ hb_position_t parent_scale_y_position (hb_position_t v)
+ { return parent_scale_y_distance (v); }
+
+ void parent_scale_distance (hb_position_t *x, hb_position_t *y)
+ {
+ *x = parent_scale_x_distance (*x);
+ *y = parent_scale_y_distance (*y);
+ }
+ void parent_scale_position (hb_position_t *x, hb_position_t *y)
+ {
+ *x = parent_scale_x_position (*x);
+ *y = parent_scale_y_position (*y);
+ }
+
+
+ /* Public getters */
+
+ HB_INTERNAL bool has_func (unsigned int i);
+ HB_INTERNAL bool has_func_set (unsigned int i);
+
+ /* has_* ... */
+#define HB_FONT_FUNC_IMPLEMENT(name) \
+ bool \
+ has_##name##_func () \
+ { \
+ hb_font_funcs_t *funcs = this->klass; \
+ unsigned int i = offsetof (hb_font_funcs_t::get_t::get_funcs_t, name) / sizeof (funcs->get.array[0]); \
+ return has_func (i); \
+ } \
+ bool \
+ has_##name##_func_set () \
+ { \
+ hb_font_funcs_t *funcs = this->klass; \
+ unsigned int i = offsetof (hb_font_funcs_t::get_t::get_funcs_t, name) / sizeof (funcs->get.array[0]); \
+ return has_func_set (i); \
+ }
+ HB_FONT_FUNCS_IMPLEMENT_CALLBACKS
+#undef HB_FONT_FUNC_IMPLEMENT
+
+ hb_bool_t get_font_h_extents (hb_font_extents_t *extents)
+ {
+ memset (extents, 0, sizeof (*extents));
+ return klass->get.f.font_h_extents (this, user_data,
+ extents,
+ klass->user_data.font_h_extents);
+ }
+ hb_bool_t get_font_v_extents (hb_font_extents_t *extents)
+ {
+ memset (extents, 0, sizeof (*extents));
+ return klass->get.f.font_v_extents (this, user_data,
+ extents,
+ klass->user_data.font_v_extents);
+ }
+
+ bool has_glyph (hb_codepoint_t unicode)
+ {
+ hb_codepoint_t glyph;
+ return get_nominal_glyph (unicode, &glyph);
+ }
+
+ hb_bool_t get_nominal_glyph (hb_codepoint_t unicode,
+ hb_codepoint_t *glyph)
+ {
+ *glyph = 0;
+ return klass->get.f.nominal_glyph (this, user_data,
+ unicode, glyph,
+ klass->user_data.nominal_glyph);
+ }
+ unsigned int get_nominal_glyphs (unsigned int count,
+ const hb_codepoint_t *first_unicode,
+ unsigned int unicode_stride,
+ hb_codepoint_t *first_glyph,
+ unsigned int glyph_stride)
+ {
+ return klass->get.f.nominal_glyphs (this, user_data,
+ count,
+ first_unicode, unicode_stride,
+ first_glyph, glyph_stride,
+ klass->user_data.nominal_glyphs);
+ }
+
+ hb_bool_t get_variation_glyph (hb_codepoint_t unicode, hb_codepoint_t variation_selector,
+ hb_codepoint_t *glyph)
+ {
+ *glyph = 0;
+ return klass->get.f.variation_glyph (this, user_data,
+ unicode, variation_selector, glyph,
+ klass->user_data.variation_glyph);
+ }
+
+ hb_position_t get_glyph_h_advance (hb_codepoint_t glyph)
+ {
+ return klass->get.f.glyph_h_advance (this, user_data,
+ glyph,
+ klass->user_data.glyph_h_advance);
+ }
+
+ hb_position_t get_glyph_v_advance (hb_codepoint_t glyph)
+ {
+ return klass->get.f.glyph_v_advance (this, user_data,
+ glyph,
+ klass->user_data.glyph_v_advance);
+ }
+
+ void get_glyph_h_advances (unsigned int count,
+ const hb_codepoint_t *first_glyph,
+ unsigned int glyph_stride,
+ hb_position_t *first_advance,
+ unsigned int advance_stride)
+ {
+ return klass->get.f.glyph_h_advances (this, user_data,
+ count,
+ first_glyph, glyph_stride,
+ first_advance, advance_stride,
+ klass->user_data.glyph_h_advances);
+ }
+
+ void get_glyph_v_advances (unsigned int count,
+ const hb_codepoint_t *first_glyph,
+ unsigned int glyph_stride,
+ hb_position_t *first_advance,
+ unsigned int advance_stride)
+ {
+ return klass->get.f.glyph_v_advances (this, user_data,
+ count,
+ first_glyph, glyph_stride,
+ first_advance, advance_stride,
+ klass->user_data.glyph_v_advances);
+ }
+
+ hb_bool_t get_glyph_h_origin (hb_codepoint_t glyph,
+ hb_position_t *x, hb_position_t *y)
+ {
+ *x = *y = 0;
+ return klass->get.f.glyph_h_origin (this, user_data,
+ glyph, x, y,
+ klass->user_data.glyph_h_origin);
+ }
+
+ hb_bool_t get_glyph_v_origin (hb_codepoint_t glyph,
+ hb_position_t *x, hb_position_t *y)
+ {
+ *x = *y = 0;
+ return klass->get.f.glyph_v_origin (this, user_data,
+ glyph, x, y,
+ klass->user_data.glyph_v_origin);
+ }
+
+ hb_position_t get_glyph_h_kerning (hb_codepoint_t left_glyph,
+ hb_codepoint_t right_glyph)
+ {
+#ifdef HB_DISABLE_DEPRECATED
+ return 0;
+#else
+ return klass->get.f.glyph_h_kerning (this, user_data,
+ left_glyph, right_glyph,
+ klass->user_data.glyph_h_kerning);
+#endif
+ }
+
+ hb_position_t get_glyph_v_kerning (hb_codepoint_t top_glyph,
+ hb_codepoint_t bottom_glyph)
+ {
+#ifdef HB_DISABLE_DEPRECATED
+ return 0;
+#else
+ return klass->get.f.glyph_v_kerning (this, user_data,
+ top_glyph, bottom_glyph,
+ klass->user_data.glyph_v_kerning);
+#endif
+ }
+
+ hb_bool_t get_glyph_extents (hb_codepoint_t glyph,
+ hb_glyph_extents_t *extents)
+ {
+ memset (extents, 0, sizeof (*extents));
+ return klass->get.f.glyph_extents (this, user_data,
+ glyph,
+ extents,
+ klass->user_data.glyph_extents);
+ }
+
+ hb_bool_t get_glyph_contour_point (hb_codepoint_t glyph, unsigned int point_index,
+ hb_position_t *x, hb_position_t *y)
+ {
+ *x = *y = 0;
+ return klass->get.f.glyph_contour_point (this, user_data,
+ glyph, point_index,
+ x, y,
+ klass->user_data.glyph_contour_point);
+ }
+
+ hb_bool_t get_glyph_name (hb_codepoint_t glyph,
+ char *name, unsigned int size)
+ {
+ if (size) *name = '\0';
+ return klass->get.f.glyph_name (this, user_data,
+ glyph,
+ name, size,
+ klass->user_data.glyph_name);
+ }
+
+ hb_bool_t get_glyph_from_name (const char *name, int len, /* -1 means nul-terminated */
+ hb_codepoint_t *glyph)
+ {
+ *glyph = 0;
+ if (len == -1) len = strlen (name);
+ return klass->get.f.glyph_from_name (this, user_data,
+ name, len,
+ glyph,
+ klass->user_data.glyph_from_name);
+ }
+
+
+ /* A bit higher-level, and with fallback */
+
+ void get_h_extents_with_fallback (hb_font_extents_t *extents)
+ {
+ if (!get_font_h_extents (extents))
+ {
+ extents->ascender = y_scale * .8;
+ extents->descender = extents->ascender - y_scale;
+ extents->line_gap = 0;
+ }
+ }
+ void get_v_extents_with_fallback (hb_font_extents_t *extents)
+ {
+ if (!get_font_v_extents (extents))
+ {
+ extents->ascender = x_scale / 2;
+ extents->descender = extents->ascender - x_scale;
+ extents->line_gap = 0;
+ }
+ }
+
+ void get_extents_for_direction (hb_direction_t direction,
+ hb_font_extents_t *extents)
+ {
+ if (likely (HB_DIRECTION_IS_HORIZONTAL (direction)))
+ get_h_extents_with_fallback (extents);
+ else
+ get_v_extents_with_fallback (extents);
+ }
+
+ void get_glyph_advance_for_direction (hb_codepoint_t glyph,
+ hb_direction_t direction,
+ hb_position_t *x, hb_position_t *y)
+ {
+ *x = *y = 0;
+ if (likely (HB_DIRECTION_IS_HORIZONTAL (direction)))
+ *x = get_glyph_h_advance (glyph);
+ else
+ *y = get_glyph_v_advance (glyph);
+ }
+ void get_glyph_advances_for_direction (hb_direction_t direction,
+ unsigned int count,
+ const hb_codepoint_t *first_glyph,
+ unsigned glyph_stride,
+ hb_position_t *first_advance,
+ unsigned advance_stride)
+ {
+ if (likely (HB_DIRECTION_IS_HORIZONTAL (direction)))
+ get_glyph_h_advances (count, first_glyph, glyph_stride, first_advance, advance_stride);
+ else
+ get_glyph_v_advances (count, first_glyph, glyph_stride, first_advance, advance_stride);
+ }
+
+ void guess_v_origin_minus_h_origin (hb_codepoint_t glyph,
+ hb_position_t *x, hb_position_t *y)
+ {
+ *x = get_glyph_h_advance (glyph) / 2;
+
+ /* TODO cache this somehow?! */
+ hb_font_extents_t extents;
+ get_h_extents_with_fallback (&extents);
+ *y = extents.ascender;
+ }
+
+ void get_glyph_h_origin_with_fallback (hb_codepoint_t glyph,
+ hb_position_t *x, hb_position_t *y)
+ {
+ if (!get_glyph_h_origin (glyph, x, y) &&
+ get_glyph_v_origin (glyph, x, y))
+ {
+ hb_position_t dx, dy;
+ guess_v_origin_minus_h_origin (glyph, &dx, &dy);
+ *x -= dx; *y -= dy;
+ }
+ }
+ void get_glyph_v_origin_with_fallback (hb_codepoint_t glyph,
+ hb_position_t *x, hb_position_t *y)
+ {
+ if (!get_glyph_v_origin (glyph, x, y) &&
+ get_glyph_h_origin (glyph, x, y))
+ {
+ hb_position_t dx, dy;
+ guess_v_origin_minus_h_origin (glyph, &dx, &dy);
+ *x += dx; *y += dy;
+ }
+ }
+
+ void get_glyph_origin_for_direction (hb_codepoint_t glyph,
+ hb_direction_t direction,
+ hb_position_t *x, hb_position_t *y)
+ {
+ if (likely (HB_DIRECTION_IS_HORIZONTAL (direction)))
+ get_glyph_h_origin_with_fallback (glyph, x, y);
+ else
+ get_glyph_v_origin_with_fallback (glyph, x, y);
+ }
+
+ void add_glyph_h_origin (hb_codepoint_t glyph,
+ hb_position_t *x, hb_position_t *y)
+ {
+ hb_position_t origin_x, origin_y;
+
+ get_glyph_h_origin_with_fallback (glyph, &origin_x, &origin_y);
+
+ *x += origin_x;
+ *y += origin_y;
+ }
+ void add_glyph_v_origin (hb_codepoint_t glyph,
+ hb_position_t *x, hb_position_t *y)
+ {
+ hb_position_t origin_x, origin_y;
+
+ get_glyph_v_origin_with_fallback (glyph, &origin_x, &origin_y);
+
+ *x += origin_x;
+ *y += origin_y;
+ }
+ void add_glyph_origin_for_direction (hb_codepoint_t glyph,
+ hb_direction_t direction,
+ hb_position_t *x, hb_position_t *y)
+ {
+ hb_position_t origin_x, origin_y;
+
+ get_glyph_origin_for_direction (glyph, direction, &origin_x, &origin_y);
+
+ *x += origin_x;
+ *y += origin_y;
+ }
+
+ void subtract_glyph_h_origin (hb_codepoint_t glyph,
+ hb_position_t *x, hb_position_t *y)
+ {
+ hb_position_t origin_x, origin_y;
+
+ get_glyph_h_origin_with_fallback (glyph, &origin_x, &origin_y);
+
+ *x -= origin_x;
+ *y -= origin_y;
+ }
+ void subtract_glyph_v_origin (hb_codepoint_t glyph,
+ hb_position_t *x, hb_position_t *y)
+ {
+ hb_position_t origin_x, origin_y;
+
+ get_glyph_v_origin_with_fallback (glyph, &origin_x, &origin_y);
+
+ *x -= origin_x;
+ *y -= origin_y;
+ }
+ void subtract_glyph_origin_for_direction (hb_codepoint_t glyph,
+ hb_direction_t direction,
+ hb_position_t *x, hb_position_t *y)
+ {
+ hb_position_t origin_x, origin_y;
+
+ get_glyph_origin_for_direction (glyph, direction, &origin_x, &origin_y);
+
+ *x -= origin_x;
+ *y -= origin_y;
+ }
+
+ void get_glyph_kerning_for_direction (hb_codepoint_t first_glyph, hb_codepoint_t second_glyph,
+ hb_direction_t direction,
+ hb_position_t *x, hb_position_t *y)
+ {
+ if (likely (HB_DIRECTION_IS_HORIZONTAL (direction))) {
+ *y = 0;
+ *x = get_glyph_h_kerning (first_glyph, second_glyph);
+ } else {
+ *x = 0;
+ *y = get_glyph_v_kerning (first_glyph, second_glyph);
+ }
+ }
+
+ hb_bool_t get_glyph_extents_for_origin (hb_codepoint_t glyph,
+ hb_direction_t direction,
+ hb_glyph_extents_t *extents)
+ {
+ hb_bool_t ret = get_glyph_extents (glyph, extents);
+
+ if (ret)
+ subtract_glyph_origin_for_direction (glyph, direction, &extents->x_bearing, &extents->y_bearing);
+
+ return ret;
+ }
+
+ hb_bool_t get_glyph_contour_point_for_origin (hb_codepoint_t glyph, unsigned int point_index,
+ hb_direction_t direction,
+ hb_position_t *x, hb_position_t *y)
+ {
+ hb_bool_t ret = get_glyph_contour_point (glyph, point_index, x, y);
+
+ if (ret)
+ subtract_glyph_origin_for_direction (glyph, direction, x, y);
+
+ return ret;
+ }
+
+ /* Generates gidDDD if glyph has no name. */
+ void
+ glyph_to_string (hb_codepoint_t glyph,
+ char *s, unsigned int size)
+ {
+ if (get_glyph_name (glyph, s, size)) return;
+
+ if (size && snprintf (s, size, "gid%u", glyph) < 0)
+ *s = '\0';
+ }
+
+ /* Parses gidDDD and uniUUUU strings automatically. */
+ hb_bool_t
+ glyph_from_string (const char *s, int len, /* -1 means nul-terminated */
+ hb_codepoint_t *glyph)
+ {
+ if (get_glyph_from_name (s, len, glyph)) return true;
+
+ if (len == -1) len = strlen (s);
+
+ /* Straight glyph index. */
+ if (hb_codepoint_parse (s, len, 10, glyph))
+ return true;
+
+ if (len > 3)
+ {
+ /* gidDDD syntax for glyph indices. */
+ if (0 == strncmp (s, "gid", 3) &&
+ hb_codepoint_parse (s + 3, len - 3, 10, glyph))
+ return true;
+
+ /* uniUUUU and other Unicode character indices. */
+ hb_codepoint_t unichar;
+ if (0 == strncmp (s, "uni", 3) &&
+ hb_codepoint_parse (s + 3, len - 3, 16, &unichar) &&
+ get_nominal_glyph (unichar, glyph))
+ return true;
+ }
+
+ return false;
+ }
+
+ void mults_changed ()
+ {
+ signed upem = face->get_upem ();
+ x_mult = ((int64_t) x_scale << 16) / upem;
+ y_mult = ((int64_t) y_scale << 16) / upem;
+ }
+
+ hb_position_t em_mult (int16_t v, int64_t mult)
+ {
+ return (hb_position_t) ((v * mult) >> 16);
+ }
+ hb_position_t em_scalef (float v, int scale)
+ { return (hb_position_t) roundf (v * scale / face->get_upem ()); }
+ float em_fscale (int16_t v, int scale)
+ { return (float) v * scale / face->get_upem (); }
+};
+DECLARE_NULL_INSTANCE (hb_font_t);
+
+
+#endif /* HB_FONT_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ft.cc b/thirdparty/harfbuzz/src/hb-ft.cc
new file mode 100644
index 0000000000..2680873c27
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ft.cc
@@ -0,0 +1,1042 @@
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ * Copyright © 2009 Keith Stribley
+ * Copyright © 2015 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#ifdef HAVE_FREETYPE
+
+#include "hb-ft.h"
+
+#include "hb-font.hh"
+#include "hb-machinery.hh"
+#include "hb-cache.hh"
+
+#include FT_ADVANCES_H
+#include FT_MULTIPLE_MASTERS_H
+#include FT_TRUETYPE_TABLES_H
+
+
+/**
+ * SECTION:hb-ft
+ * @title: hb-ft
+ * @short_description: FreeType integration
+ * @include: hb-ft.h
+ *
+ * Functions for using HarfBuzz with the FreeType library.
+ *
+ * HarfBuzz supports using FreeType to provide face and
+ * font data.
+ *
+ * <note>Note that FreeType is not thread-safe, therefore these
+ * functions are not thread-safe either.</note>
+ **/
+
+
+/* TODO:
+ *
+ * In general, this file does a fine job of what it's supposed to do.
+ * There are, however, things that need more work:
+ *
+ * - FreeType works in 26.6 mode. Clients can decide to use that mode, and everything
+ * would work fine. However, we also abuse this API for performing in font-space,
+ * but don't pass the correct flags to FreeType. We just abuse the no-hinting mode
+ * for that, such that no rounding etc happens. As such, we don't set ppem, and
+ * pass NO_HINTING as load_flags. Would be much better to use NO_SCALE, and scale
+ * ourselves.
+ *
+ * - We don't handle / allow for emboldening / obliqueing.
+ *
+ * - In the future, we should add constructors to create fonts in font space?
+ */
+
+
+struct hb_ft_font_t
+{
+ mutable hb_mutex_t lock;
+ FT_Face ft_face;
+ int load_flags;
+ bool symbol; /* Whether selected cmap is symbol cmap. */
+ bool unref; /* Whether to destroy ft_face when done. */
+
+ mutable hb_atomic_int_t cached_x_scale;
+ mutable hb_advance_cache_t advance_cache;
+};
+
+static hb_ft_font_t *
+_hb_ft_font_create (FT_Face ft_face, bool symbol, bool unref)
+{
+ hb_ft_font_t *ft_font = (hb_ft_font_t *) calloc (1, sizeof (hb_ft_font_t));
+ if (unlikely (!ft_font)) return nullptr;
+
+ ft_font->lock.init ();
+ ft_font->ft_face = ft_face;
+ ft_font->symbol = symbol;
+ ft_font->unref = unref;
+
+ ft_font->load_flags = FT_LOAD_DEFAULT | FT_LOAD_NO_HINTING;
+
+ ft_font->cached_x_scale.set_relaxed (0);
+ ft_font->advance_cache.init ();
+
+ return ft_font;
+}
+
+static void
+_hb_ft_face_destroy (void *data)
+{
+ FT_Done_Face ((FT_Face) data);
+}
+
+static void
+_hb_ft_font_destroy (void *data)
+{
+ hb_ft_font_t *ft_font = (hb_ft_font_t *) data;
+
+ ft_font->advance_cache.fini ();
+
+ if (ft_font->unref)
+ _hb_ft_face_destroy (ft_font->ft_face);
+
+ ft_font->lock.fini ();
+
+ free (ft_font);
+}
+
+/**
+ * hb_ft_font_set_load_flags:
+ * @font: #hb_font_t to work upon
+ * @load_flags: The FreeType load flags to set
+ *
+ * Sets the FT_Load_Glyph load flags for the specified #hb_font_t.
+ *
+ * For more information, see
+ * https://www.freetype.org/freetype2/docs/reference/ft2-base_interface.html#ft_load_xxx
+ *
+ * Since: 1.0.5
+ **/
+void
+hb_ft_font_set_load_flags (hb_font_t *font, int load_flags)
+{
+ if (hb_object_is_immutable (font))
+ return;
+
+ if (unlikely (font->destroy != (hb_destroy_func_t) _hb_ft_font_destroy))
+ return;
+
+ hb_ft_font_t *ft_font = (hb_ft_font_t *) font->user_data;
+
+ ft_font->load_flags = load_flags;
+}
+
+/**
+ * hb_ft_font_get_load_flags:
+ * @font: #hb_font_t to work upon
+ *
+ * Fetches the FT_Load_Glyph load flags of the specified #hb_font_t.
+ *
+ * For more information, see
+ * https://www.freetype.org/freetype2/docs/reference/ft2-base_interface.html#ft_load_xxx
+ *
+ * Return value: FT_Load_Glyph flags found
+ *
+ * Since: 1.0.5
+ **/
+int
+hb_ft_font_get_load_flags (hb_font_t *font)
+{
+ if (unlikely (font->destroy != (hb_destroy_func_t) _hb_ft_font_destroy))
+ return 0;
+
+ const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font->user_data;
+
+ return ft_font->load_flags;
+}
+
+/**
+ * hb_ft_get_face:
+ * @font: #hb_font_t to work upon
+ *
+ * Fetches the FT_Face associated with the specified #hb_font_t
+ * font object.
+ *
+ * Return value: the FT_Face found
+ *
+ * Since: 0.9.2
+ **/
+FT_Face
+hb_ft_font_get_face (hb_font_t *font)
+{
+ if (unlikely (font->destroy != (hb_destroy_func_t) _hb_ft_font_destroy))
+ return nullptr;
+
+ const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font->user_data;
+
+ return ft_font->ft_face;
+}
+
+/**
+ * hb_ft_font_lock_face:
+ * @font:
+ *
+ *
+ *
+ * Return value:
+ * Since: 2.6.5
+ **/
+FT_Face
+hb_ft_font_lock_face (hb_font_t *font)
+{
+ if (unlikely (font->destroy != (hb_destroy_func_t) _hb_ft_font_destroy))
+ return nullptr;
+
+ const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font->user_data;
+
+ ft_font->lock.lock ();
+
+ return ft_font->ft_face;
+}
+
+/**
+ * hb_ft_font_unlock_face:
+ * @font:
+ *
+ *
+ *
+ * Return value:
+ * Since: 2.6.5
+ **/
+void
+hb_ft_font_unlock_face (hb_font_t *font)
+{
+ if (unlikely (font->destroy != (hb_destroy_func_t) _hb_ft_font_destroy))
+ return;
+
+ const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font->user_data;
+
+ ft_font->lock.unlock ();
+}
+
+
+static hb_bool_t
+hb_ft_get_nominal_glyph (hb_font_t *font HB_UNUSED,
+ void *font_data,
+ hb_codepoint_t unicode,
+ hb_codepoint_t *glyph,
+ void *user_data HB_UNUSED)
+{
+ const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font_data;
+ hb_lock_t lock (ft_font->lock);
+ unsigned int g = FT_Get_Char_Index (ft_font->ft_face, unicode);
+
+ if (unlikely (!g))
+ {
+ if (unlikely (ft_font->symbol) && unicode <= 0x00FFu)
+ {
+ /* For symbol-encoded OpenType fonts, we duplicate the
+ * U+F000..F0FF range at U+0000..U+00FF. That's what
+ * Windows seems to do, and that's hinted about at:
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/recom
+ * under "Non-Standard (Symbol) Fonts". */
+ g = FT_Get_Char_Index (ft_font->ft_face, 0xF000u + unicode);
+ if (!g)
+ return false;
+ }
+ else
+ return false;
+ }
+
+ *glyph = g;
+ return true;
+}
+
+static unsigned int
+hb_ft_get_nominal_glyphs (hb_font_t *font HB_UNUSED,
+ void *font_data,
+ unsigned int count,
+ const hb_codepoint_t *first_unicode,
+ unsigned int unicode_stride,
+ hb_codepoint_t *first_glyph,
+ unsigned int glyph_stride,
+ void *user_data HB_UNUSED)
+{
+ const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font_data;
+ hb_lock_t lock (ft_font->lock);
+ unsigned int done;
+ for (done = 0;
+ done < count && (*first_glyph = FT_Get_Char_Index (ft_font->ft_face, *first_unicode));
+ done++)
+ {
+ first_unicode = &StructAtOffsetUnaligned<hb_codepoint_t> (first_unicode, unicode_stride);
+ first_glyph = &StructAtOffsetUnaligned<hb_codepoint_t> (first_glyph, glyph_stride);
+ }
+ /* We don't need to do ft_font->symbol dance here, since HB calls the singular
+ * nominal_glyph() for what we don't handle here. */
+ return done;
+}
+
+
+static hb_bool_t
+hb_ft_get_variation_glyph (hb_font_t *font HB_UNUSED,
+ void *font_data,
+ hb_codepoint_t unicode,
+ hb_codepoint_t variation_selector,
+ hb_codepoint_t *glyph,
+ void *user_data HB_UNUSED)
+{
+ const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font_data;
+ hb_lock_t lock (ft_font->lock);
+ unsigned int g = FT_Face_GetCharVariantIndex (ft_font->ft_face, unicode, variation_selector);
+
+ if (unlikely (!g))
+ return false;
+
+ *glyph = g;
+ return true;
+}
+
+static void
+hb_ft_get_glyph_h_advances (hb_font_t* font, void* font_data,
+ unsigned count,
+ const hb_codepoint_t *first_glyph,
+ unsigned glyph_stride,
+ hb_position_t *first_advance,
+ unsigned advance_stride,
+ void *user_data HB_UNUSED)
+{
+ const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font_data;
+ hb_lock_t lock (ft_font->lock);
+ FT_Face ft_face = ft_font->ft_face;
+ int load_flags = ft_font->load_flags;
+ int mult = font->x_scale < 0 ? -1 : +1;
+
+ if (font->x_scale != ft_font->cached_x_scale.get ())
+ {
+ ft_font->advance_cache.clear ();
+ ft_font->cached_x_scale.set (font->x_scale);
+ }
+
+ for (unsigned int i = 0; i < count; i++)
+ {
+ FT_Fixed v = 0;
+ hb_codepoint_t glyph = *first_glyph;
+
+ unsigned int cv;
+ if (ft_font->advance_cache.get (glyph, &cv))
+ v = cv;
+ else
+ {
+ FT_Get_Advance (ft_face, glyph, load_flags, &v);
+ ft_font->advance_cache.set (glyph, v);
+ }
+
+ *first_advance = (v * mult + (1<<9)) >> 10;
+ first_glyph = &StructAtOffsetUnaligned<hb_codepoint_t> (first_glyph, glyph_stride);
+ first_advance = &StructAtOffsetUnaligned<hb_position_t> (first_advance, advance_stride);
+ }
+}
+
+static hb_position_t
+hb_ft_get_glyph_v_advance (hb_font_t *font,
+ void *font_data,
+ hb_codepoint_t glyph,
+ void *user_data HB_UNUSED)
+{
+ const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font_data;
+ hb_lock_t lock (ft_font->lock);
+ FT_Fixed v;
+
+ if (unlikely (FT_Get_Advance (ft_font->ft_face, glyph, ft_font->load_flags | FT_LOAD_VERTICAL_LAYOUT, &v)))
+ return 0;
+
+ if (font->y_scale < 0)
+ v = -v;
+
+ /* Note: FreeType's vertical metrics grows downward while other FreeType coordinates
+ * have a Y growing upward. Hence the extra negation. */
+ return (-v + (1<<9)) >> 10;
+}
+
+static hb_bool_t
+hb_ft_get_glyph_v_origin (hb_font_t *font,
+ void *font_data,
+ hb_codepoint_t glyph,
+ hb_position_t *x,
+ hb_position_t *y,
+ void *user_data HB_UNUSED)
+{
+ const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font_data;
+ hb_lock_t lock (ft_font->lock);
+ FT_Face ft_face = ft_font->ft_face;
+
+ if (unlikely (FT_Load_Glyph (ft_face, glyph, ft_font->load_flags)))
+ return false;
+
+ /* Note: FreeType's vertical metrics grows downward while other FreeType coordinates
+ * have a Y growing upward. Hence the extra negation. */
+ *x = ft_face->glyph->metrics.horiBearingX - ft_face->glyph->metrics.vertBearingX;
+ *y = ft_face->glyph->metrics.horiBearingY - (-ft_face->glyph->metrics.vertBearingY);
+
+ if (font->x_scale < 0)
+ *x = -*x;
+ if (font->y_scale < 0)
+ *y = -*y;
+
+ return true;
+}
+
+#ifndef HB_NO_OT_SHAPE_FALLBACK
+static hb_position_t
+hb_ft_get_glyph_h_kerning (hb_font_t *font,
+ void *font_data,
+ hb_codepoint_t left_glyph,
+ hb_codepoint_t right_glyph,
+ void *user_data HB_UNUSED)
+{
+ const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font_data;
+ FT_Vector kerningv;
+
+ FT_Kerning_Mode mode = font->x_ppem ? FT_KERNING_DEFAULT : FT_KERNING_UNFITTED;
+ if (FT_Get_Kerning (ft_font->ft_face, left_glyph, right_glyph, mode, &kerningv))
+ return 0;
+
+ return kerningv.x;
+}
+#endif
+
+static hb_bool_t
+hb_ft_get_glyph_extents (hb_font_t *font,
+ void *font_data,
+ hb_codepoint_t glyph,
+ hb_glyph_extents_t *extents,
+ void *user_data HB_UNUSED)
+{
+ const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font_data;
+ hb_lock_t lock (ft_font->lock);
+ FT_Face ft_face = ft_font->ft_face;
+
+ if (unlikely (FT_Load_Glyph (ft_face, glyph, ft_font->load_flags)))
+ return false;
+
+ extents->x_bearing = ft_face->glyph->metrics.horiBearingX;
+ extents->y_bearing = ft_face->glyph->metrics.horiBearingY;
+ extents->width = ft_face->glyph->metrics.width;
+ extents->height = -ft_face->glyph->metrics.height;
+ if (font->x_scale < 0)
+ {
+ extents->x_bearing = -extents->x_bearing;
+ extents->width = -extents->width;
+ }
+ if (font->y_scale < 0)
+ {
+ extents->y_bearing = -extents->y_bearing;
+ extents->height = -extents->height;
+ }
+ return true;
+}
+
+static hb_bool_t
+hb_ft_get_glyph_contour_point (hb_font_t *font HB_UNUSED,
+ void *font_data,
+ hb_codepoint_t glyph,
+ unsigned int point_index,
+ hb_position_t *x,
+ hb_position_t *y,
+ void *user_data HB_UNUSED)
+{
+ const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font_data;
+ hb_lock_t lock (ft_font->lock);
+ FT_Face ft_face = ft_font->ft_face;
+
+ if (unlikely (FT_Load_Glyph (ft_face, glyph, ft_font->load_flags)))
+ return false;
+
+ if (unlikely (ft_face->glyph->format != FT_GLYPH_FORMAT_OUTLINE))
+ return false;
+
+ if (unlikely (point_index >= (unsigned int) ft_face->glyph->outline.n_points))
+ return false;
+
+ *x = ft_face->glyph->outline.points[point_index].x;
+ *y = ft_face->glyph->outline.points[point_index].y;
+
+ return true;
+}
+
+static hb_bool_t
+hb_ft_get_glyph_name (hb_font_t *font HB_UNUSED,
+ void *font_data,
+ hb_codepoint_t glyph,
+ char *name, unsigned int size,
+ void *user_data HB_UNUSED)
+{
+ const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font_data;
+ hb_lock_t lock (ft_font->lock);
+ FT_Face ft_face = ft_font->ft_face;
+
+ hb_bool_t ret = !FT_Get_Glyph_Name (ft_face, glyph, name, size);
+ if (ret && (size && !*name))
+ ret = false;
+
+ return ret;
+}
+
+static hb_bool_t
+hb_ft_get_glyph_from_name (hb_font_t *font HB_UNUSED,
+ void *font_data,
+ const char *name, int len, /* -1 means nul-terminated */
+ hb_codepoint_t *glyph,
+ void *user_data HB_UNUSED)
+{
+ const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font_data;
+ hb_lock_t lock (ft_font->lock);
+ FT_Face ft_face = ft_font->ft_face;
+
+ if (len < 0)
+ *glyph = FT_Get_Name_Index (ft_face, (FT_String *) name);
+ else {
+ /* Make a nul-terminated version. */
+ char buf[128];
+ len = hb_min (len, (int) sizeof (buf) - 1);
+ strncpy (buf, name, len);
+ buf[len] = '\0';
+ *glyph = FT_Get_Name_Index (ft_face, buf);
+ }
+
+ if (*glyph == 0)
+ {
+ /* Check whether the given name was actually the name of glyph 0. */
+ char buf[128];
+ if (!FT_Get_Glyph_Name(ft_face, 0, buf, sizeof (buf)) &&
+ len < 0 ? !strcmp (buf, name) : !strncmp (buf, name, len))
+ return true;
+ }
+
+ return *glyph != 0;
+}
+
+static hb_bool_t
+hb_ft_get_font_h_extents (hb_font_t *font HB_UNUSED,
+ void *font_data,
+ hb_font_extents_t *metrics,
+ void *user_data HB_UNUSED)
+{
+ const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font_data;
+ hb_lock_t lock (ft_font->lock);
+ FT_Face ft_face = ft_font->ft_face;
+ metrics->ascender = FT_MulFix(ft_face->ascender, ft_face->size->metrics.y_scale);
+ metrics->descender = FT_MulFix(ft_face->descender, ft_face->size->metrics.y_scale);
+ metrics->line_gap = FT_MulFix( ft_face->height, ft_face->size->metrics.y_scale ) - (metrics->ascender - metrics->descender);
+ if (font->y_scale < 0)
+ {
+ metrics->ascender = -metrics->ascender;
+ metrics->descender = -metrics->descender;
+ metrics->line_gap = -metrics->line_gap;
+ }
+ return true;
+}
+
+#if HB_USE_ATEXIT
+static void free_static_ft_funcs ();
+#endif
+
+static struct hb_ft_font_funcs_lazy_loader_t : hb_font_funcs_lazy_loader_t<hb_ft_font_funcs_lazy_loader_t>
+{
+ static hb_font_funcs_t *create ()
+ {
+ hb_font_funcs_t *funcs = hb_font_funcs_create ();
+
+ hb_font_funcs_set_font_h_extents_func (funcs, hb_ft_get_font_h_extents, nullptr, nullptr);
+ //hb_font_funcs_set_font_v_extents_func (funcs, hb_ft_get_font_v_extents, nullptr, nullptr);
+ hb_font_funcs_set_nominal_glyph_func (funcs, hb_ft_get_nominal_glyph, nullptr, nullptr);
+ hb_font_funcs_set_nominal_glyphs_func (funcs, hb_ft_get_nominal_glyphs, nullptr, nullptr);
+ hb_font_funcs_set_variation_glyph_func (funcs, hb_ft_get_variation_glyph, nullptr, nullptr);
+ hb_font_funcs_set_glyph_h_advances_func (funcs, hb_ft_get_glyph_h_advances, nullptr, nullptr);
+ hb_font_funcs_set_glyph_v_advance_func (funcs, hb_ft_get_glyph_v_advance, nullptr, nullptr);
+ //hb_font_funcs_set_glyph_h_origin_func (funcs, hb_ft_get_glyph_h_origin, nullptr, nullptr);
+ hb_font_funcs_set_glyph_v_origin_func (funcs, hb_ft_get_glyph_v_origin, nullptr, nullptr);
+#ifndef HB_NO_OT_SHAPE_FALLBACK
+ hb_font_funcs_set_glyph_h_kerning_func (funcs, hb_ft_get_glyph_h_kerning, nullptr, nullptr);
+#endif
+ //hb_font_funcs_set_glyph_v_kerning_func (funcs, hb_ft_get_glyph_v_kerning, nullptr, nullptr);
+ hb_font_funcs_set_glyph_extents_func (funcs, hb_ft_get_glyph_extents, nullptr, nullptr);
+ hb_font_funcs_set_glyph_contour_point_func (funcs, hb_ft_get_glyph_contour_point, nullptr, nullptr);
+ hb_font_funcs_set_glyph_name_func (funcs, hb_ft_get_glyph_name, nullptr, nullptr);
+ hb_font_funcs_set_glyph_from_name_func (funcs, hb_ft_get_glyph_from_name, nullptr, nullptr);
+
+ hb_font_funcs_make_immutable (funcs);
+
+#if HB_USE_ATEXIT
+ atexit (free_static_ft_funcs);
+#endif
+
+ return funcs;
+ }
+} static_ft_funcs;
+
+#if HB_USE_ATEXIT
+static
+void free_static_ft_funcs ()
+{
+ static_ft_funcs.free_instance ();
+}
+#endif
+
+static hb_font_funcs_t *
+_hb_ft_get_font_funcs ()
+{
+ return static_ft_funcs.get_unconst ();
+}
+
+static void
+_hb_ft_font_set_funcs (hb_font_t *font, FT_Face ft_face, bool unref)
+{
+ bool symbol = ft_face->charmap && ft_face->charmap->encoding == FT_ENCODING_MS_SYMBOL;
+
+ hb_ft_font_t *ft_font = _hb_ft_font_create (ft_face, symbol, unref);
+ if (unlikely (!ft_font)) return;
+
+ hb_font_set_funcs (font,
+ _hb_ft_get_font_funcs (),
+ ft_font,
+ _hb_ft_font_destroy);
+}
+
+
+static hb_blob_t *
+_hb_ft_reference_table (hb_face_t *face HB_UNUSED, hb_tag_t tag, void *user_data)
+{
+ FT_Face ft_face = (FT_Face) user_data;
+ FT_Byte *buffer;
+ FT_ULong length = 0;
+ FT_Error error;
+
+ /* Note: FreeType like HarfBuzz uses the NONE tag for fetching the entire blob */
+
+ error = FT_Load_Sfnt_Table (ft_face, tag, 0, nullptr, &length);
+ if (error)
+ return nullptr;
+
+ buffer = (FT_Byte *) malloc (length);
+ if (!buffer)
+ return nullptr;
+
+ error = FT_Load_Sfnt_Table (ft_face, tag, 0, buffer, &length);
+ if (error)
+ {
+ free (buffer);
+ return nullptr;
+ }
+
+ return hb_blob_create ((const char *) buffer, length,
+ HB_MEMORY_MODE_WRITABLE,
+ buffer, free);
+}
+
+/**
+ * hb_ft_face_create:
+ * @ft_face: (destroy destroy) (scope notified): FT_Face to work upon
+ * @destroy: A callback to call when the face object is not needed anymore
+ *
+ * Creates an #hb_face_t face object from the specified FT_Face.
+ *
+ * This variant of the function does not provide any life-cycle management.
+ *
+ * Most client programs should use hb_ft_face_create_referenced()
+ * (or, perhaps, hb_ft_face_create_cached()) instead.
+ *
+ * If you know you have valid reasons not to use hb_ft_face_create_referenced(),
+ * then it is the client program's responsibility to destroy @ft_face
+ * after the #hb_face_t face object has been destroyed.
+ *
+ * Return value: (transfer full): the new #hb_face_t face object
+ *
+ * Since: 0.9.2
+ **/
+hb_face_t *
+hb_ft_face_create (FT_Face ft_face,
+ hb_destroy_func_t destroy)
+{
+ hb_face_t *face;
+
+ if (!ft_face->stream->read) {
+ hb_blob_t *blob;
+
+ blob = hb_blob_create ((const char *) ft_face->stream->base,
+ (unsigned int) ft_face->stream->size,
+ HB_MEMORY_MODE_READONLY,
+ ft_face, destroy);
+ face = hb_face_create (blob, ft_face->face_index);
+ hb_blob_destroy (blob);
+ } else {
+ face = hb_face_create_for_tables (_hb_ft_reference_table, ft_face, destroy);
+ }
+
+ hb_face_set_index (face, ft_face->face_index);
+ hb_face_set_upem (face, ft_face->units_per_EM);
+
+ return face;
+}
+
+/**
+ * hb_ft_face_create_referenced:
+ * @ft_face: FT_Face to work upon
+ *
+ * Creates an #hb_face_t face object from the specified FT_Face.
+ *
+ * This is the preferred variant of the hb_ft_face_create*
+ * function family, because it calls FT_Reference_Face() on @ft_face,
+ * ensuring that @ft_face remains alive as long as the resulting
+ * #hb_face_t face object remains alive. Also calls FT_Done_Face()
+ * when the #hb_face_t face object is destroyed.
+ *
+ * Use this version unless you know you have good reasons not to.
+ *
+ * Return value: (transfer full): the new #hb_face_t face object
+ *
+ * Since: 0.9.38
+ **/
+hb_face_t *
+hb_ft_face_create_referenced (FT_Face ft_face)
+{
+ FT_Reference_Face (ft_face);
+ return hb_ft_face_create (ft_face, _hb_ft_face_destroy);
+}
+
+static void
+hb_ft_face_finalize (FT_Face ft_face)
+{
+ hb_face_destroy ((hb_face_t *) ft_face->generic.data);
+}
+
+/**
+ * hb_ft_face_create_cached:
+ * @ft_face: FT_Face to work upon
+ *
+ * Creates an #hb_face_t face object from the specified FT_Face.
+ *
+ * This variant of the function caches the newly created #hb_face_t
+ * face object, using the @generic pointer of @ft_face. Subsequent function
+ * calls that are passed the same @ft_face parameter will have the same
+ * #hb_face_t returned to them, and that #hb_face_t will be correctly
+ * reference counted.
+ *
+ * However, client programs are still responsible for destroying
+ * @ft_face after the last #hb_face_t face object has been destroyed.
+ *
+ * Return value: (transfer full): the new #hb_face_t face object
+ *
+ * Since: 0.9.2
+ **/
+hb_face_t *
+hb_ft_face_create_cached (FT_Face ft_face)
+{
+ if (unlikely (!ft_face->generic.data || ft_face->generic.finalizer != (FT_Generic_Finalizer) hb_ft_face_finalize))
+ {
+ if (ft_face->generic.finalizer)
+ ft_face->generic.finalizer (ft_face);
+
+ ft_face->generic.data = hb_ft_face_create (ft_face, nullptr);
+ ft_face->generic.finalizer = (FT_Generic_Finalizer) hb_ft_face_finalize;
+ }
+
+ return hb_face_reference ((hb_face_t *) ft_face->generic.data);
+}
+
+/**
+ * hb_ft_font_create:
+ * @ft_face: (destroy destroy) (scope notified): FT_Face to work upon
+ * @destroy: (optional): A callback to call when the font object is not needed anymore
+ *
+ * Creates an #hb_font_t font object from the specified FT_Face.
+ *
+ * <note>Note: You must set the face size on @ft_face before calling
+ * hb_ft_font_create() on it. Otherwise, HarfBuzz will not pick up
+ * the face size.</note>
+ *
+ * This variant of the function does not provide any life-cycle management.
+ *
+ * Most client programs should use hb_ft_font_create_referenced()
+ * instead.
+ *
+ * If you know you have valid reasons not to use hb_ft_font_create_referenced(),
+ * then it is the client program's responsibility to destroy @ft_face
+ * after the #hb_font_t font object has been destroyed.
+ *
+ * HarfBuzz will use the @destroy callback on the #hb_font_t font object
+ * if it is supplied when you use this function. However, even if @destroy
+ * is provided, it is the client program's responsibility to destroy @ft_face,
+ * and it is the client program's responsibility to ensure that @ft_face is
+ * destroyed only after the #hb_font_t font object has been destroyed.
+ *
+ * Return value: (transfer full): the new #hb_font_t font object
+ *
+ * Since: 0.9.2
+ **/
+hb_font_t *
+hb_ft_font_create (FT_Face ft_face,
+ hb_destroy_func_t destroy)
+{
+ hb_font_t *font;
+ hb_face_t *face;
+
+ face = hb_ft_face_create (ft_face, destroy);
+ font = hb_font_create (face);
+ hb_face_destroy (face);
+ _hb_ft_font_set_funcs (font, ft_face, false);
+ hb_ft_font_changed (font);
+ return font;
+}
+
+/**
+ * hb_ft_font_has_changed:
+ * @font: #hb_font_t to work upon
+ *
+ * Refreshes the state of @font when the underlying FT_Face has changed.
+ * This function should be called after changing the size or
+ * variation-axis settings on the FT_Face.
+ *
+ * Since: 1.0.5
+ **/
+void
+hb_ft_font_changed (hb_font_t *font)
+{
+ if (font->destroy != (hb_destroy_func_t) _hb_ft_font_destroy)
+ return;
+
+ hb_ft_font_t *ft_font = (hb_ft_font_t *) font->user_data;
+
+ FT_Face ft_face = ft_font->ft_face;
+
+ hb_font_set_scale (font,
+ (int) (((uint64_t) ft_face->size->metrics.x_scale * (uint64_t) ft_face->units_per_EM + (1u<<15)) >> 16),
+ (int) (((uint64_t) ft_face->size->metrics.y_scale * (uint64_t) ft_face->units_per_EM + (1u<<15)) >> 16));
+#if 0 /* hb-ft works in no-hinting model */
+ hb_font_set_ppem (font,
+ ft_face->size->metrics.x_ppem,
+ ft_face->size->metrics.y_ppem);
+#endif
+
+#if defined(HAVE_FT_GET_VAR_BLEND_COORDINATES) && !defined(HB_NO_VAR)
+ FT_MM_Var *mm_var = nullptr;
+ if (!FT_Get_MM_Var (ft_face, &mm_var))
+ {
+ FT_Fixed *ft_coords = (FT_Fixed *) calloc (mm_var->num_axis, sizeof (FT_Fixed));
+ int *coords = (int *) calloc (mm_var->num_axis, sizeof (int));
+ if (coords && ft_coords)
+ {
+ if (!FT_Get_Var_Blend_Coordinates (ft_face, mm_var->num_axis, ft_coords))
+ {
+ bool nonzero = false;
+
+ for (unsigned int i = 0; i < mm_var->num_axis; ++i)
+ {
+ coords[i] = ft_coords[i] >>= 2;
+ nonzero = nonzero || coords[i];
+ }
+
+ if (nonzero)
+ hb_font_set_var_coords_normalized (font, coords, mm_var->num_axis);
+ else
+ hb_font_set_var_coords_normalized (font, nullptr, 0);
+ }
+ }
+ free (coords);
+ free (ft_coords);
+#ifdef HAVE_FT_DONE_MM_VAR
+ FT_Done_MM_Var (ft_face->glyph->library, mm_var);
+#else
+ free (mm_var);
+#endif
+ }
+#endif
+}
+
+/**
+ * hb_ft_font_create_referenced:
+ * @ft_face: FT_Face to work upon
+ *
+ * Creates an #hb_font_t font object from the specified FT_Face.
+ *
+ * <note>Note: You must set the face size on @ft_face before calling
+ * hb_ft_font_create_references() on it. Otherwise, HarfBuzz will not pick up
+ * the face size.</note>
+ *
+ * This is the preferred variant of the hb_ft_font_create*
+ * function family, because it calls FT_Reference_Face() on @ft_face,
+ * ensuring that @ft_face remains alive as long as the resulting
+ * #hb_font_t font object remains alive.
+ *
+ * Use this version unless you know you have good reasons not to.
+ *
+ * Return value: (transfer full): the new #hb_font_t font object
+ *
+ * Since: 0.9.38
+ **/
+hb_font_t *
+hb_ft_font_create_referenced (FT_Face ft_face)
+{
+ FT_Reference_Face (ft_face);
+ return hb_ft_font_create (ft_face, _hb_ft_face_destroy);
+}
+
+#if HB_USE_ATEXIT
+static void free_static_ft_library ();
+#endif
+
+static struct hb_ft_library_lazy_loader_t : hb_lazy_loader_t<hb_remove_pointer<FT_Library>,
+ hb_ft_library_lazy_loader_t>
+{
+ static FT_Library create ()
+ {
+ FT_Library l;
+ if (FT_Init_FreeType (&l))
+ return nullptr;
+
+#if HB_USE_ATEXIT
+ atexit (free_static_ft_library);
+#endif
+
+ return l;
+ }
+ static void destroy (FT_Library l)
+ {
+ FT_Done_FreeType (l);
+ }
+ static FT_Library get_null ()
+ {
+ return nullptr;
+ }
+} static_ft_library;
+
+#if HB_USE_ATEXIT
+static
+void free_static_ft_library ()
+{
+ static_ft_library.free_instance ();
+}
+#endif
+
+static FT_Library
+get_ft_library ()
+{
+ return static_ft_library.get_unconst ();
+}
+
+static void
+_release_blob (FT_Face ft_face)
+{
+ hb_blob_destroy ((hb_blob_t *) ft_face->generic.data);
+}
+
+/**
+ * hb_ft_font_set_funcs:
+ * @font: #hb_font_t to work upon
+ *
+ * Configures the font-functions structure of the specified
+ * #hb_font_t font object to use FreeType font functions.
+ *
+ * In particular, you can use this function to configure an
+ * existing #hb_face_t face object for use with FreeType font
+ * functions even if that #hb_face_t face object was initially
+ * created with hb_face_create(), and therefore was not
+ * initially configured to use FreeType font functions.
+ *
+ * An #hb_face_t face object created with hb_ft_face_create()
+ * is preconfigured for FreeType font functions and does not
+ * require this function to be used.
+ *
+ * <note>Note: Internally, this function creates an FT_Face.
+* </note>
+ *
+ * Since: 1.0.5
+ **/
+void
+hb_ft_font_set_funcs (hb_font_t *font)
+{
+ hb_blob_t *blob = hb_face_reference_blob (font->face);
+ unsigned int blob_length;
+ const char *blob_data = hb_blob_get_data (blob, &blob_length);
+ if (unlikely (!blob_length))
+ DEBUG_MSG (FT, font, "Font face has empty blob");
+
+ FT_Face ft_face = nullptr;
+ FT_Error err = FT_New_Memory_Face (get_ft_library (),
+ (const FT_Byte *) blob_data,
+ blob_length,
+ hb_face_get_index (font->face),
+ &ft_face);
+
+ if (unlikely (err)) {
+ hb_blob_destroy (blob);
+ DEBUG_MSG (FT, font, "Font face FT_New_Memory_Face() failed");
+ return;
+ }
+
+ if (FT_Select_Charmap (ft_face, FT_ENCODING_MS_SYMBOL))
+ FT_Select_Charmap (ft_face, FT_ENCODING_UNICODE);
+
+ FT_Set_Char_Size (ft_face,
+ abs (font->x_scale), abs (font->y_scale),
+ 0, 0);
+#if 0
+ font->x_ppem * 72 * 64 / font->x_scale,
+ font->y_ppem * 72 * 64 / font->y_scale);
+#endif
+ if (font->x_scale < 0 || font->y_scale < 0)
+ {
+ FT_Matrix matrix = { font->x_scale < 0 ? -1 : +1, 0,
+ 0, font->y_scale < 0 ? -1 : +1};
+ FT_Set_Transform (ft_face, &matrix, nullptr);
+ }
+
+#if defined(HAVE_FT_GET_VAR_BLEND_COORDINATES) && !defined(HB_NO_VAR)
+ unsigned int num_coords;
+ const int *coords = hb_font_get_var_coords_normalized (font, &num_coords);
+ if (num_coords)
+ {
+ FT_Fixed *ft_coords = (FT_Fixed *) calloc (num_coords, sizeof (FT_Fixed));
+ if (ft_coords)
+ {
+ for (unsigned int i = 0; i < num_coords; i++)
+ ft_coords[i] = coords[i] * 4;
+ FT_Set_Var_Blend_Coordinates (ft_face, num_coords, ft_coords);
+ free (ft_coords);
+ }
+ }
+#endif
+
+ ft_face->generic.data = blob;
+ ft_face->generic.finalizer = (FT_Generic_Finalizer) _release_blob;
+
+ _hb_ft_font_set_funcs (font, ft_face, true);
+ hb_ft_font_set_load_flags (font, FT_LOAD_DEFAULT | FT_LOAD_NO_HINTING);
+}
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-ft.h b/thirdparty/harfbuzz/src/hb-ft.h
new file mode 100644
index 0000000000..bf07115ab9
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ft.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ * Copyright © 2015 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_FT_H
+#define HB_FT_H
+
+#include "hb.h"
+
+#include <ft2build.h>
+#include FT_FREETYPE_H
+
+HB_BEGIN_DECLS
+
+/*
+ * Note: FreeType is not thread-safe.
+ * Hence, these functions are not either.
+ */
+
+/*
+ * hb-face from ft-face.
+ */
+
+/* This one creates a new hb-face for given ft-face.
+ * When the returned hb-face is destroyed, the destroy
+ * callback is called (if not NULL), with the ft-face passed
+ * to it.
+ *
+ * The client is responsible to make sure that ft-face is
+ * destroyed after hb-face is destroyed.
+ *
+ * Most often you don't want this function. You should use either
+ * hb_ft_face_create_cached(), or hb_ft_face_create_referenced().
+ * In particular, if you are going to pass NULL as destroy, you
+ * probably should use (the more recent) hb_ft_face_create_referenced()
+ * instead.
+ */
+HB_EXTERN hb_face_t *
+hb_ft_face_create (FT_Face ft_face,
+ hb_destroy_func_t destroy);
+
+/* This version is like hb_ft_face_create(), except that it caches
+ * the hb-face using the generic pointer of the ft-face. This means
+ * that subsequent calls to this function with the same ft-face will
+ * return the same hb-face (correctly referenced).
+ *
+ * Client is still responsible for making sure that ft-face is destroyed
+ * after hb-face is.
+ */
+HB_EXTERN hb_face_t *
+hb_ft_face_create_cached (FT_Face ft_face);
+
+/* This version is like hb_ft_face_create(), except that it calls
+ * FT_Reference_Face() on ft-face, as such keeping ft-face alive
+ * as long as the hb-face is.
+ *
+ * This is the most convenient version to use. Use it unless you have
+ * very good reasons not to.
+ */
+HB_EXTERN hb_face_t *
+hb_ft_face_create_referenced (FT_Face ft_face);
+
+
+/*
+ * hb-font from ft-face.
+ */
+
+/*
+ * Note:
+ *
+ * Set face size on ft-face before creating hb-font from it.
+ * Otherwise hb-ft would NOT pick up the font size correctly.
+ */
+
+/* See notes on hb_ft_face_create(). Same issues re lifecycle-management
+ * apply here. Use hb_ft_font_create_referenced() if you can. */
+HB_EXTERN hb_font_t *
+hb_ft_font_create (FT_Face ft_face,
+ hb_destroy_func_t destroy);
+
+/* See notes on hb_ft_face_create_referenced() re lifecycle-management
+ * issues. */
+HB_EXTERN hb_font_t *
+hb_ft_font_create_referenced (FT_Face ft_face);
+
+HB_EXTERN FT_Face
+hb_ft_font_get_face (hb_font_t *font);
+
+HB_EXTERN FT_Face
+hb_ft_font_lock_face (hb_font_t *font);
+
+HB_EXTERN void
+hb_ft_font_unlock_face (hb_font_t *font);
+
+HB_EXTERN void
+hb_ft_font_set_load_flags (hb_font_t *font, int load_flags);
+
+HB_EXTERN int
+hb_ft_font_get_load_flags (hb_font_t *font);
+
+/* Call when size or variations settings on underlying FT_Face change. */
+HB_EXTERN void
+hb_ft_font_changed (hb_font_t *font);
+
+/* Makes an hb_font_t use FreeType internally to implement font functions.
+ * Note: this internally creates an FT_Face. Use it when you create your
+ * hb_face_t using hb_face_create(). */
+HB_EXTERN void
+hb_ft_font_set_funcs (hb_font_t *font);
+
+
+HB_END_DECLS
+
+#endif /* HB_FT_H */
diff --git a/thirdparty/harfbuzz/src/hb-gdi.cc b/thirdparty/harfbuzz/src/hb-gdi.cc
new file mode 100644
index 0000000000..f6306ef89f
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-gdi.cc
@@ -0,0 +1,73 @@
+/*
+ * Copyright © 2019 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#include "hb.hh"
+
+#ifdef HAVE_GDI
+
+#include "hb-gdi.h"
+
+static hb_blob_t *
+_hb_gdi_reference_table (hb_face_t *face HB_UNUSED, hb_tag_t tag, void *user_data)
+{
+ char *buffer = nullptr;
+ DWORD length = 0;
+
+ HDC hdc = GetDC (nullptr);
+ if (unlikely (!SelectObject (hdc, (HFONT) user_data))) goto fail;
+
+ length = GetFontData (hdc, hb_uint32_swap (tag), 0, buffer, length);
+ if (unlikely (length == GDI_ERROR)) goto fail_with_releasedc;
+
+ buffer = (char *) malloc (length);
+ if (unlikely (!buffer)) goto fail_with_releasedc;
+ length = GetFontData (hdc, hb_uint32_swap (tag), 0, buffer, length);
+ if (unlikely (length == GDI_ERROR)) goto fail_with_releasedc_and_free;
+ ReleaseDC (nullptr, hdc);
+
+ return hb_blob_create ((const char *) buffer, length, HB_MEMORY_MODE_WRITABLE, buffer, free);
+
+fail_with_releasedc_and_free:
+ free (buffer);
+fail_with_releasedc:
+ ReleaseDC (nullptr, hdc);
+fail:
+ return hb_blob_get_empty ();
+}
+
+/**
+ * hb_gdi_face_create:
+ * @hfont: a HFONT object.
+ *
+ * Return value: #hb_face_t object corresponding to the given input
+ *
+ * Since: 2.6.0
+ **/
+hb_face_t *
+hb_gdi_face_create (HFONT hfont)
+{
+ return hb_face_create_for_tables (_hb_gdi_reference_table, (void *) hfont, nullptr);
+}
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-gdi.h b/thirdparty/harfbuzz/src/hb-gdi.h
new file mode 100644
index 0000000000..68cc43917e
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-gdi.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright © 2019 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#ifndef HB_GDI_H
+#define HB_GDI_H
+
+#include "hb.h"
+
+#include <windows.h>
+
+HB_BEGIN_DECLS
+
+HB_EXTERN hb_face_t *
+hb_gdi_face_create (HFONT hfont);
+
+HB_END_DECLS
+
+#endif /* HB_GDI_H */
diff --git a/thirdparty/harfbuzz/src/hb-glib.cc b/thirdparty/harfbuzz/src/hb-glib.cc
new file mode 100644
index 0000000000..f93bb8853c
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-glib.cc
@@ -0,0 +1,307 @@
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ * Copyright © 2011 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#ifdef HAVE_GLIB
+
+#include "hb-glib.h"
+
+#include "hb-machinery.hh"
+
+
+/**
+ * SECTION:hb-glib
+ * @title: hb-glib
+ * @short_description: GLib integration
+ * @include: hb-glib.h
+ *
+ * Functions for using HarfBuzz with the GLib library.
+ *
+ * HarfBuzz supports using GLib to provide Unicode data, by attaching
+ * GLib functions to the virtual methods in a #hb_unicode_funcs_t function
+ * structure.
+ **/
+
+
+/**
+ * hb_glib_script_to_script:
+ * @script: The GUnicodeScript identifier to query
+ *
+ * Fetches the #hb_script_t script that corresponds to the
+ * specified GUnicodeScript identifier.
+ *
+ * Return value: the #hb_script_t script found
+ *
+ * Since: 0.9.38
+ **/
+hb_script_t
+hb_glib_script_to_script (GUnicodeScript script)
+{
+ return (hb_script_t) g_unicode_script_to_iso15924 (script);
+}
+
+/**
+ * hb_glib_script_from_script:
+ * @script: The #hb_script_t to query
+ *
+ * Fetches the GUnicodeScript identifier that corresponds to the
+ * specified #hb_script_t script.
+ *
+ * Return value: the GUnicodeScript identifier found
+ *
+ * Since: 0.9.38
+ **/
+GUnicodeScript
+hb_glib_script_from_script (hb_script_t script)
+{
+ return g_unicode_script_from_iso15924 (script);
+}
+
+
+static hb_unicode_combining_class_t
+hb_glib_unicode_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+ hb_codepoint_t unicode,
+ void *user_data HB_UNUSED)
+
+{
+ return (hb_unicode_combining_class_t) g_unichar_combining_class (unicode);
+}
+
+static hb_unicode_general_category_t
+hb_glib_unicode_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+ hb_codepoint_t unicode,
+ void *user_data HB_UNUSED)
+
+{
+ /* hb_unicode_general_category_t and GUnicodeType are identical */
+ return (hb_unicode_general_category_t) g_unichar_type (unicode);
+}
+
+static hb_codepoint_t
+hb_glib_unicode_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+ hb_codepoint_t unicode,
+ void *user_data HB_UNUSED)
+{
+ g_unichar_get_mirror_char (unicode, &unicode);
+ return unicode;
+}
+
+static hb_script_t
+hb_glib_unicode_script (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+ hb_codepoint_t unicode,
+ void *user_data HB_UNUSED)
+{
+ return hb_glib_script_to_script (g_unichar_get_script (unicode));
+}
+
+static hb_bool_t
+hb_glib_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+ hb_codepoint_t a,
+ hb_codepoint_t b,
+ hb_codepoint_t *ab,
+ void *user_data HB_UNUSED)
+{
+#if GLIB_CHECK_VERSION(2,29,12)
+ return g_unichar_compose (a, b, ab);
+#endif
+
+ /* We don't ifdef-out the fallback code such that compiler always
+ * sees it and makes sure it's compilable. */
+
+ gchar utf8[12];
+ gchar *normalized;
+ int len;
+ hb_bool_t ret;
+
+ len = g_unichar_to_utf8 (a, utf8);
+ len += g_unichar_to_utf8 (b, utf8 + len);
+ normalized = g_utf8_normalize (utf8, len, G_NORMALIZE_NFC);
+ len = g_utf8_strlen (normalized, -1);
+ if (unlikely (!len))
+ return false;
+
+ if (len == 1) {
+ *ab = g_utf8_get_char (normalized);
+ ret = true;
+ } else {
+ ret = false;
+ }
+
+ g_free (normalized);
+ return ret;
+}
+
+static hb_bool_t
+hb_glib_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+ hb_codepoint_t ab,
+ hb_codepoint_t *a,
+ hb_codepoint_t *b,
+ void *user_data HB_UNUSED)
+{
+#if GLIB_CHECK_VERSION(2,29,12)
+ return g_unichar_decompose (ab, a, b);
+#endif
+
+ /* We don't ifdef-out the fallback code such that compiler always
+ * sees it and makes sure it's compilable. */
+
+ gchar utf8[6];
+ gchar *normalized;
+ int len;
+ hb_bool_t ret;
+
+ len = g_unichar_to_utf8 (ab, utf8);
+ normalized = g_utf8_normalize (utf8, len, G_NORMALIZE_NFD);
+ len = g_utf8_strlen (normalized, -1);
+ if (unlikely (!len))
+ return false;
+
+ if (len == 1) {
+ *a = g_utf8_get_char (normalized);
+ *b = 0;
+ ret = *a != ab;
+ } else if (len == 2) {
+ *a = g_utf8_get_char (normalized);
+ *b = g_utf8_get_char (g_utf8_next_char (normalized));
+ /* Here's the ugly part: if ab decomposes to a single character and
+ * that character decomposes again, we have to detect that and undo
+ * the second part :-(. */
+ gchar *recomposed = g_utf8_normalize (normalized, -1, G_NORMALIZE_NFC);
+ hb_codepoint_t c = g_utf8_get_char (recomposed);
+ if (c != ab && c != *a) {
+ *a = c;
+ *b = 0;
+ }
+ g_free (recomposed);
+ ret = true;
+ } else {
+ /* If decomposed to more than two characters, take the last one,
+ * and recompose the rest to get the first component. */
+ gchar *end = g_utf8_offset_to_pointer (normalized, len - 1);
+ gchar *recomposed;
+ *b = g_utf8_get_char (end);
+ recomposed = g_utf8_normalize (normalized, end - normalized, G_NORMALIZE_NFC);
+ /* We expect that recomposed has exactly one character now. */
+ *a = g_utf8_get_char (recomposed);
+ g_free (recomposed);
+ ret = true;
+ }
+
+ g_free (normalized);
+ return ret;
+}
+
+
+#if HB_USE_ATEXIT
+static void free_static_glib_funcs ();
+#endif
+
+static struct hb_glib_unicode_funcs_lazy_loader_t : hb_unicode_funcs_lazy_loader_t<hb_glib_unicode_funcs_lazy_loader_t>
+{
+ static hb_unicode_funcs_t *create ()
+ {
+ hb_unicode_funcs_t *funcs = hb_unicode_funcs_create (nullptr);
+
+ hb_unicode_funcs_set_combining_class_func (funcs, hb_glib_unicode_combining_class, nullptr, nullptr);
+ hb_unicode_funcs_set_general_category_func (funcs, hb_glib_unicode_general_category, nullptr, nullptr);
+ hb_unicode_funcs_set_mirroring_func (funcs, hb_glib_unicode_mirroring, nullptr, nullptr);
+ hb_unicode_funcs_set_script_func (funcs, hb_glib_unicode_script, nullptr, nullptr);
+ hb_unicode_funcs_set_compose_func (funcs, hb_glib_unicode_compose, nullptr, nullptr);
+ hb_unicode_funcs_set_decompose_func (funcs, hb_glib_unicode_decompose, nullptr, nullptr);
+
+ hb_unicode_funcs_make_immutable (funcs);
+
+#if HB_USE_ATEXIT
+ atexit (free_static_glib_funcs);
+#endif
+
+ return funcs;
+ }
+} static_glib_funcs;
+
+#if HB_USE_ATEXIT
+static
+void free_static_glib_funcs ()
+{
+ static_glib_funcs.free_instance ();
+}
+#endif
+
+/**
+ * hb_glib_get_unicode_funcs:
+ *
+ * Fetches a Unicode-functions structure that is populated
+ * with the appropriate GLib function for each method.
+ *
+ * Return value: (transfer none): a pointer to the #hb_unicode_funcs_t Unicode-functions structure
+ *
+ * Since: 0.9.38
+ **/
+hb_unicode_funcs_t *
+hb_glib_get_unicode_funcs ()
+{
+ return static_glib_funcs.get_unconst ();
+}
+
+
+
+#if GLIB_CHECK_VERSION(2,31,10)
+
+static void
+_hb_g_bytes_unref (void *data)
+{
+ g_bytes_unref ((GBytes *) data);
+}
+
+/**
+ * hb_glib_blob_create:
+ * @gbytes: the GBytes structure to work upon
+ *
+ * Creates an #hb_blob_t blob from the specified
+ * GBytes data structure.
+ *
+ * Return value: (transfer full): the new #hb_blob_t blob object
+ *
+ * Since: 0.9.38
+ **/
+hb_blob_t *
+hb_glib_blob_create (GBytes *gbytes)
+{
+ gsize size = 0;
+ gconstpointer data = g_bytes_get_data (gbytes, &size);
+ return hb_blob_create ((const char *) data,
+ size,
+ HB_MEMORY_MODE_READONLY,
+ g_bytes_ref (gbytes),
+ _hb_g_bytes_unref);
+}
+#endif
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-glib.h b/thirdparty/harfbuzz/src/hb-glib.h
new file mode 100644
index 0000000000..5f04183ba1
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-glib.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ * Copyright © 2011 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_GLIB_H
+#define HB_GLIB_H
+
+#include "hb.h"
+
+#include <glib.h>
+
+HB_BEGIN_DECLS
+
+
+HB_EXTERN hb_script_t
+hb_glib_script_to_script (GUnicodeScript script);
+
+HB_EXTERN GUnicodeScript
+hb_glib_script_from_script (hb_script_t script);
+
+
+HB_EXTERN hb_unicode_funcs_t *
+hb_glib_get_unicode_funcs (void);
+
+#if GLIB_CHECK_VERSION(2,31,10)
+HB_EXTERN hb_blob_t *
+hb_glib_blob_create (GBytes *gbytes);
+#endif
+
+HB_END_DECLS
+
+#endif /* HB_GLIB_H */
diff --git a/thirdparty/harfbuzz/src/hb-gobject-structs.cc b/thirdparty/harfbuzz/src/hb-gobject-structs.cc
new file mode 100644
index 0000000000..7c46e26400
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-gobject-structs.cc
@@ -0,0 +1,110 @@
+/*
+ * Copyright © 2011 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#ifdef HAVE_GOBJECT
+
+
+/**
+ * SECTION:hb-gobject
+ * @title: hb-gobject
+ * @short_description: GObject integration support
+ * @include: hb-gobject.h
+ *
+ * Support for using HarfBuzz with the GObject library to provide
+ * type data.
+ *
+ * The types and functions listed here are solely a linkage between
+ * HarfBuzz's public data types and the GTypes used by the GObject framework.
+ * HarfBuzz uses GObject introspection to generate its Python bindings
+ * (and potentially other language bindings); client programs should never need
+ * to access the GObject-integration mechanics.
+ *
+ * For client programs using the GNOME and GTK software stack, please see the
+ * GLib and FreeType integration pages.
+ **/
+
+
+/* g++ didn't like older gtype.h gcc-only code path. */
+#include <glib.h>
+#if !GLIB_CHECK_VERSION(2,29,16)
+#undef __GNUC__
+#undef __GNUC_MINOR__
+#define __GNUC__ 2
+#define __GNUC_MINOR__ 6
+#endif
+
+#include "hb-gobject.h"
+
+#define HB_DEFINE_BOXED_TYPE(name,copy_func,free_func) \
+GType \
+hb_gobject_##name##_get_type () \
+{ \
+ static gsize type_id = 0; \
+ if (g_once_init_enter (&type_id)) { \
+ GType id = g_boxed_type_register_static (g_intern_static_string ("hb_" #name "_t"), \
+ (GBoxedCopyFunc) copy_func, \
+ (GBoxedFreeFunc) free_func); \
+ g_once_init_leave (&type_id, id); \
+ } \
+ return type_id; \
+}
+
+#define HB_DEFINE_OBJECT_TYPE(name) \
+ HB_DEFINE_BOXED_TYPE (name, hb_##name##_reference, hb_##name##_destroy)
+
+#define HB_DEFINE_VALUE_TYPE(name) \
+ static hb_##name##_t *_hb_##name##_reference (const hb_##name##_t *l) \
+ { \
+ hb_##name##_t *c = (hb_##name##_t *) calloc (1, sizeof (hb_##name##_t)); \
+ if (unlikely (!c)) return nullptr; \
+ *c = *l; \
+ return c; \
+ } \
+ static void _hb_##name##_destroy (hb_##name##_t *l) { free (l); } \
+ HB_DEFINE_BOXED_TYPE (name, _hb_##name##_reference, _hb_##name##_destroy)
+
+HB_DEFINE_OBJECT_TYPE (buffer)
+HB_DEFINE_OBJECT_TYPE (blob)
+HB_DEFINE_OBJECT_TYPE (face)
+HB_DEFINE_OBJECT_TYPE (font)
+HB_DEFINE_OBJECT_TYPE (font_funcs)
+HB_DEFINE_OBJECT_TYPE (set)
+HB_DEFINE_OBJECT_TYPE (map)
+HB_DEFINE_OBJECT_TYPE (shape_plan)
+HB_DEFINE_OBJECT_TYPE (unicode_funcs)
+HB_DEFINE_VALUE_TYPE (feature)
+HB_DEFINE_VALUE_TYPE (glyph_info)
+HB_DEFINE_VALUE_TYPE (glyph_position)
+HB_DEFINE_VALUE_TYPE (segment_properties)
+HB_DEFINE_VALUE_TYPE (user_data_key)
+
+HB_DEFINE_VALUE_TYPE (ot_math_glyph_variant)
+HB_DEFINE_VALUE_TYPE (ot_math_glyph_part)
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-gobject-structs.h b/thirdparty/harfbuzz/src/hb-gobject-structs.h
new file mode 100644
index 0000000000..6fad8d7019
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-gobject-structs.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2011 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_GOBJECT_H_IN
+#error "Include <hb-gobject.h> instead."
+#endif
+
+#ifndef HB_GOBJECT_STRUCTS_H
+#define HB_GOBJECT_STRUCTS_H
+
+#include "hb.h"
+
+#include <glib-object.h>
+
+HB_BEGIN_DECLS
+
+
+/* Object types */
+
+/**
+ * hb_gobject_blob_get_type:
+ *
+ * Since: 0.9.2
+ **/
+HB_EXTERN GType
+hb_gobject_blob_get_type (void);
+#define HB_GOBJECT_TYPE_BLOB (hb_gobject_blob_get_type ())
+
+/**
+ * hb_gobject_buffer_get_type:
+ *
+ * Since: 0.9.2
+ **/
+HB_EXTERN GType
+hb_gobject_buffer_get_type (void);
+#define HB_GOBJECT_TYPE_BUFFER (hb_gobject_buffer_get_type ())
+
+/**
+ * hb_gobject_face_get_type:
+ *
+ * Since: 0.9.2
+ **/
+HB_EXTERN GType
+hb_gobject_face_get_type (void);
+#define HB_GOBJECT_TYPE_FACE (hb_gobject_face_get_type ())
+
+/**
+ * hb_gobject_font_get_type:
+ *
+ * Since: 0.9.2
+ **/
+HB_EXTERN GType
+hb_gobject_font_get_type (void);
+#define HB_GOBJECT_TYPE_FONT (hb_gobject_font_get_type ())
+
+/**
+ * hb_gobject_font_funcs_get_type:
+ *
+ * Since: 0.9.2
+ **/
+HB_EXTERN GType
+hb_gobject_font_funcs_get_type (void);
+#define HB_GOBJECT_TYPE_FONT_FUNCS (hb_gobject_font_funcs_get_type ())
+
+HB_EXTERN GType
+hb_gobject_set_get_type (void);
+#define HB_GOBJECT_TYPE_SET (hb_gobject_set_get_type ())
+
+HB_EXTERN GType
+hb_gobject_map_get_type (void);
+#define HB_GOBJECT_TYPE_MAP (hb_gobject_map_get_type ())
+
+HB_EXTERN GType
+hb_gobject_shape_plan_get_type (void);
+#define HB_GOBJECT_TYPE_SHAPE_PLAN (hb_gobject_shape_plan_get_type ())
+
+/**
+ * hb_gobject_unicode_funcs_get_type:
+ *
+ * Since: 0.9.2
+ **/
+HB_EXTERN GType
+hb_gobject_unicode_funcs_get_type (void);
+#define HB_GOBJECT_TYPE_UNICODE_FUNCS (hb_gobject_unicode_funcs_get_type ())
+
+/* Value types */
+
+HB_EXTERN GType
+hb_gobject_feature_get_type (void);
+#define HB_GOBJECT_TYPE_FEATURE (hb_gobject_feature_get_type ())
+
+HB_EXTERN GType
+hb_gobject_glyph_info_get_type (void);
+#define HB_GOBJECT_TYPE_GLYPH_INFO (hb_gobject_glyph_info_get_type ())
+
+HB_EXTERN GType
+hb_gobject_glyph_position_get_type (void);
+#define HB_GOBJECT_TYPE_GLYPH_POSITION (hb_gobject_glyph_position_get_type ())
+
+HB_EXTERN GType
+hb_gobject_segment_properties_get_type (void);
+#define HB_GOBJECT_TYPE_SEGMENT_PROPERTIES (hb_gobject_segment_properties_get_type ())
+
+HB_EXTERN GType
+hb_gobject_user_data_key_get_type (void);
+#define HB_GOBJECT_TYPE_USER_DATA_KEY (hb_gobject_user_data_key_get_type ())
+
+HB_EXTERN GType
+hb_gobject_ot_math_glyph_variant_get_type (void);
+#define HB_GOBJECT_TYPE_OT_MATH_GLYPH_VARIANT (hb_gobject_ot_math_glyph_variant_get_type ())
+
+HB_EXTERN GType
+hb_gobject_ot_math_glyph_part_get_type (void);
+#define HB_GOBJECT_TYPE_OT_MATH_GLYPH_PART (hb_gobject_ot_math_glyph_part_get_type ())
+
+
+HB_END_DECLS
+
+#endif /* HB_GOBJECT_H */
diff --git a/thirdparty/harfbuzz/src/hb-gobject.h b/thirdparty/harfbuzz/src/hb-gobject.h
new file mode 100644
index 0000000000..8891aa0ee7
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-gobject.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2011 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_GOBJECT_H
+#define HB_GOBJECT_H
+#define HB_GOBJECT_H_IN
+
+#include "hb.h"
+
+#include "hb-gobject-enums.h"
+#include "hb-gobject-structs.h"
+
+HB_BEGIN_DECLS
+HB_END_DECLS
+
+#undef HB_GOBJECT_H_IN
+#endif /* HB_GOBJECT_H */
diff --git a/thirdparty/harfbuzz/src/hb-graphite2.cc b/thirdparty/harfbuzz/src/hb-graphite2.cc
new file mode 100644
index 0000000000..d8a72dc2f1
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-graphite2.cc
@@ -0,0 +1,442 @@
+/*
+ * Copyright © 2011 Martin Hosken
+ * Copyright © 2011 SIL International
+ * Copyright © 2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#ifdef HAVE_GRAPHITE2
+
+#include "hb-shaper-impl.hh"
+
+#include "hb-graphite2.h"
+
+#include <graphite2/Segment.h>
+
+#include "hb-ot-layout.h"
+
+
+/**
+ * SECTION:hb-graphite2
+ * @title: hb-graphite2
+ * @short_description: Graphite2 integration
+ * @include: hb-graphite2.h
+ *
+ * Functions for using HarfBuzz with fonts that include Graphite features.
+ *
+ * For Graphite features to work, you must be sure that HarfBuzz was compiled
+ * with the `graphite2` shaping engine enabled. Currently, the default is to
+ * not enable `graphite2` shaping.
+ **/
+
+
+/*
+ * shaper face data
+ */
+
+typedef struct hb_graphite2_tablelist_t
+{
+ struct hb_graphite2_tablelist_t *next;
+ hb_blob_t *blob;
+ unsigned int tag;
+} hb_graphite2_tablelist_t;
+
+struct hb_graphite2_face_data_t
+{
+ hb_face_t *face;
+ gr_face *grface;
+ hb_atomic_ptr_t<hb_graphite2_tablelist_t> tlist;
+};
+
+static const void *hb_graphite2_get_table (const void *data, unsigned int tag, size_t *len)
+{
+ hb_graphite2_face_data_t *face_data = (hb_graphite2_face_data_t *) data;
+ hb_graphite2_tablelist_t *tlist = face_data->tlist;
+
+ hb_blob_t *blob = nullptr;
+
+ for (hb_graphite2_tablelist_t *p = tlist; p; p = p->next)
+ if (p->tag == tag) {
+ blob = p->blob;
+ break;
+ }
+
+ if (unlikely (!blob))
+ {
+ blob = face_data->face->reference_table (tag);
+
+ hb_graphite2_tablelist_t *p = (hb_graphite2_tablelist_t *) calloc (1, sizeof (hb_graphite2_tablelist_t));
+ if (unlikely (!p)) {
+ hb_blob_destroy (blob);
+ return nullptr;
+ }
+ p->blob = blob;
+ p->tag = tag;
+
+retry:
+ hb_graphite2_tablelist_t *tlist = face_data->tlist;
+ p->next = tlist;
+
+ if (unlikely (!face_data->tlist.cmpexch (tlist, p)))
+ goto retry;
+ }
+
+ unsigned int tlen;
+ const char *d = hb_blob_get_data (blob, &tlen);
+ *len = tlen;
+ return d;
+}
+
+hb_graphite2_face_data_t *
+_hb_graphite2_shaper_face_data_create (hb_face_t *face)
+{
+ hb_blob_t *silf_blob = face->reference_table (HB_GRAPHITE2_TAG_SILF);
+ /* Umm, we just reference the table to check whether it exists.
+ * Maybe add better API for this? */
+ if (!hb_blob_get_length (silf_blob))
+ {
+ hb_blob_destroy (silf_blob);
+ return nullptr;
+ }
+ hb_blob_destroy (silf_blob);
+
+ hb_graphite2_face_data_t *data = (hb_graphite2_face_data_t *) calloc (1, sizeof (hb_graphite2_face_data_t));
+ if (unlikely (!data))
+ return nullptr;
+
+ data->face = face;
+ data->grface = gr_make_face (data, &hb_graphite2_get_table, gr_face_preloadAll);
+
+ if (unlikely (!data->grface)) {
+ free (data);
+ return nullptr;
+ }
+
+ return data;
+}
+
+void
+_hb_graphite2_shaper_face_data_destroy (hb_graphite2_face_data_t *data)
+{
+ hb_graphite2_tablelist_t *tlist = data->tlist;
+
+ while (tlist)
+ {
+ hb_graphite2_tablelist_t *old = tlist;
+ hb_blob_destroy (tlist->blob);
+ tlist = tlist->next;
+ free (old);
+ }
+
+ gr_face_destroy (data->grface);
+
+ free (data);
+}
+
+/**
+ * hb_graphite2_face_get_gr_face:
+ * @face: @hb_face_t to query
+ *
+ * Fetches the Graphite2 gr_face corresponding to the specified
+ * #hb_face_t face object.
+ *
+ * Return value: the gr_face found
+ *
+ * Since: 0.9.10
+ */
+gr_face *
+hb_graphite2_face_get_gr_face (hb_face_t *face)
+{
+ const hb_graphite2_face_data_t *data = face->data.graphite2;
+ return data ? data->grface : nullptr;
+}
+
+
+/*
+ * shaper font data
+ */
+
+struct hb_graphite2_font_data_t {};
+
+hb_graphite2_font_data_t *
+_hb_graphite2_shaper_font_data_create (hb_font_t *font HB_UNUSED)
+{
+ return (hb_graphite2_font_data_t *) HB_SHAPER_DATA_SUCCEEDED;
+}
+
+void
+_hb_graphite2_shaper_font_data_destroy (hb_graphite2_font_data_t *data HB_UNUSED)
+{
+}
+
+#ifndef HB_DISABLE_DEPRECATED
+/**
+ * hb_graphite2_font_get_gr_font:
+ *
+ * Since: 0.9.10
+ * Deprecated: 1.4.2
+ */
+gr_font *
+hb_graphite2_font_get_gr_font (hb_font_t *font HB_UNUSED)
+{
+ return nullptr;
+}
+#endif
+
+
+/*
+ * shaper
+ */
+
+struct hb_graphite2_cluster_t {
+ unsigned int base_char;
+ unsigned int num_chars;
+ unsigned int base_glyph;
+ unsigned int num_glyphs;
+ unsigned int cluster;
+ unsigned int advance;
+};
+
+hb_bool_t
+_hb_graphite2_shape (hb_shape_plan_t *shape_plan HB_UNUSED,
+ hb_font_t *font,
+ hb_buffer_t *buffer,
+ const hb_feature_t *features,
+ unsigned int num_features)
+{
+ hb_face_t *face = font->face;
+ gr_face *grface = face->data.graphite2->grface;
+
+ const char *lang = hb_language_to_string (hb_buffer_get_language (buffer));
+ const char *lang_end = lang ? strchr (lang, '-') : nullptr;
+ int lang_len = lang_end ? lang_end - lang : -1;
+ gr_feature_val *feats = gr_face_featureval_for_lang (grface, lang ? hb_tag_from_string (lang, lang_len) : 0);
+
+ for (unsigned int i = 0; i < num_features; i++)
+ {
+ const gr_feature_ref *fref = gr_face_find_fref (grface, features[i].tag);
+ if (fref)
+ gr_fref_set_feature_value (fref, features[i].value, feats);
+ }
+
+ gr_segment *seg = nullptr;
+ const gr_slot *is;
+ unsigned int ci = 0, ic = 0;
+ unsigned int curradvx = 0, curradvy = 0;
+
+ unsigned int scratch_size;
+ hb_buffer_t::scratch_buffer_t *scratch = buffer->get_scratch_buffer (&scratch_size);
+
+ uint32_t *chars = (uint32_t *) scratch;
+
+ for (unsigned int i = 0; i < buffer->len; ++i)
+ chars[i] = buffer->info[i].codepoint;
+
+ /* TODO ensure_native_direction. */
+
+ hb_tag_t script_tag[HB_OT_MAX_TAGS_PER_SCRIPT];
+ unsigned int count = HB_OT_MAX_TAGS_PER_SCRIPT;
+ hb_ot_tags_from_script_and_language (hb_buffer_get_script (buffer),
+ HB_LANGUAGE_INVALID,
+ &count,
+ script_tag,
+ nullptr, nullptr);
+
+ seg = gr_make_seg (nullptr, grface,
+ count ? script_tag[count - 1] : HB_OT_TAG_DEFAULT_SCRIPT,
+ feats,
+ gr_utf32, chars, buffer->len,
+ 2 | (hb_buffer_get_direction (buffer) == HB_DIRECTION_RTL ? 1 : 0));
+
+ if (unlikely (!seg)) {
+ if (feats) gr_featureval_destroy (feats);
+ return false;
+ }
+
+ unsigned int glyph_count = gr_seg_n_slots (seg);
+ if (unlikely (!glyph_count)) {
+ if (feats) gr_featureval_destroy (feats);
+ gr_seg_destroy (seg);
+ buffer->len = 0;
+ return true;
+ }
+
+ buffer->ensure (glyph_count);
+ scratch = buffer->get_scratch_buffer (&scratch_size);
+ while ((DIV_CEIL (sizeof (hb_graphite2_cluster_t) * buffer->len, sizeof (*scratch)) +
+ DIV_CEIL (sizeof (hb_codepoint_t) * glyph_count, sizeof (*scratch))) > scratch_size)
+ {
+ if (unlikely (!buffer->ensure (buffer->allocated * 2)))
+ {
+ if (feats) gr_featureval_destroy (feats);
+ gr_seg_destroy (seg);
+ return false;
+ }
+ scratch = buffer->get_scratch_buffer (&scratch_size);
+ }
+
+#define ALLOCATE_ARRAY(Type, name, len) \
+ Type *name = (Type *) scratch; \
+ do { \
+ unsigned int _consumed = DIV_CEIL ((len) * sizeof (Type), sizeof (*scratch)); \
+ assert (_consumed <= scratch_size); \
+ scratch += _consumed; \
+ scratch_size -= _consumed; \
+ } while (0)
+
+ ALLOCATE_ARRAY (hb_graphite2_cluster_t, clusters, buffer->len);
+ ALLOCATE_ARRAY (hb_codepoint_t, gids, glyph_count);
+
+#undef ALLOCATE_ARRAY
+
+ memset (clusters, 0, sizeof (clusters[0]) * buffer->len);
+
+ hb_codepoint_t *pg = gids;
+ clusters[0].cluster = buffer->info[0].cluster;
+ unsigned int upem = hb_face_get_upem (face);
+ float xscale = (float) font->x_scale / upem;
+ float yscale = (float) font->y_scale / upem;
+ yscale *= yscale / xscale;
+ unsigned int curradv = 0;
+ if (HB_DIRECTION_IS_BACKWARD(buffer->props.direction))
+ {
+ curradv = gr_slot_origin_X(gr_seg_first_slot(seg)) * xscale;
+ clusters[0].advance = gr_seg_advance_X(seg) * xscale - curradv;
+ }
+ else
+ clusters[0].advance = 0;
+ for (is = gr_seg_first_slot (seg), ic = 0; is; is = gr_slot_next_in_segment (is), ic++)
+ {
+ unsigned int before = gr_slot_before (is);
+ unsigned int after = gr_slot_after (is);
+ *pg = gr_slot_gid (is);
+ pg++;
+ while (clusters[ci].base_char > before && ci)
+ {
+ clusters[ci-1].num_chars += clusters[ci].num_chars;
+ clusters[ci-1].num_glyphs += clusters[ci].num_glyphs;
+ clusters[ci-1].advance += clusters[ci].advance;
+ ci--;
+ }
+
+ if (gr_slot_can_insert_before (is) && clusters[ci].num_chars && before >= clusters[ci].base_char + clusters[ci].num_chars)
+ {
+ hb_graphite2_cluster_t *c = clusters + ci + 1;
+ c->base_char = clusters[ci].base_char + clusters[ci].num_chars;
+ c->cluster = buffer->info[c->base_char].cluster;
+ c->num_chars = before - c->base_char;
+ c->base_glyph = ic;
+ c->num_glyphs = 0;
+ if (HB_DIRECTION_IS_BACKWARD(buffer->props.direction))
+ {
+ c->advance = curradv - gr_slot_origin_X(is) * xscale;
+ curradv -= c->advance;
+ }
+ else
+ {
+ c->advance = 0;
+ clusters[ci].advance += gr_slot_origin_X(is) * xscale - curradv;
+ curradv += clusters[ci].advance;
+ }
+ ci++;
+ }
+ clusters[ci].num_glyphs++;
+
+ if (clusters[ci].base_char + clusters[ci].num_chars < after + 1)
+ clusters[ci].num_chars = after + 1 - clusters[ci].base_char;
+ }
+
+ if (HB_DIRECTION_IS_BACKWARD(buffer->props.direction))
+ clusters[ci].advance += curradv;
+ else
+ clusters[ci].advance += gr_seg_advance_X(seg) * xscale - curradv;
+ ci++;
+
+ for (unsigned int i = 0; i < ci; ++i)
+ {
+ for (unsigned int j = 0; j < clusters[i].num_glyphs; ++j)
+ {
+ hb_glyph_info_t *info = &buffer->info[clusters[i].base_glyph + j];
+ info->codepoint = gids[clusters[i].base_glyph + j];
+ info->cluster = clusters[i].cluster;
+ info->var1.i32 = clusters[i].advance; // all glyphs in the cluster get the same advance
+ }
+ }
+ buffer->len = glyph_count;
+
+ /* Positioning. */
+ unsigned int currclus = UINT_MAX;
+ const hb_glyph_info_t *info = buffer->info;
+ hb_glyph_position_t *pPos = hb_buffer_get_glyph_positions (buffer, nullptr);
+ if (!HB_DIRECTION_IS_BACKWARD(buffer->props.direction))
+ {
+ curradvx = 0;
+ for (is = gr_seg_first_slot (seg); is; pPos++, ++info, is = gr_slot_next_in_segment (is))
+ {
+ pPos->x_offset = gr_slot_origin_X (is) * xscale - curradvx;
+ pPos->y_offset = gr_slot_origin_Y (is) * yscale - curradvy;
+ if (info->cluster != currclus) {
+ pPos->x_advance = info->var1.i32;
+ curradvx += pPos->x_advance;
+ currclus = info->cluster;
+ } else
+ pPos->x_advance = 0.;
+
+ pPos->y_advance = gr_slot_advance_Y (is, grface, nullptr) * yscale;
+ curradvy += pPos->y_advance;
+ }
+ }
+ else
+ {
+ curradvx = gr_seg_advance_X(seg) * xscale;
+ for (is = gr_seg_first_slot (seg); is; pPos++, info++, is = gr_slot_next_in_segment (is))
+ {
+ if (info->cluster != currclus)
+ {
+ pPos->x_advance = info->var1.i32;
+ curradvx -= pPos->x_advance;
+ currclus = info->cluster;
+ } else
+ pPos->x_advance = 0.;
+
+ pPos->y_advance = gr_slot_advance_Y (is, grface, nullptr) * yscale;
+ curradvy -= pPos->y_advance;
+ pPos->x_offset = gr_slot_origin_X (is) * xscale - info->var1.i32 - curradvx + pPos->x_advance;
+ pPos->y_offset = gr_slot_origin_Y (is) * yscale - curradvy;
+ }
+ hb_buffer_reverse_clusters (buffer);
+ }
+
+ if (feats) gr_featureval_destroy (feats);
+ gr_seg_destroy (seg);
+
+ buffer->unsafe_to_break_all ();
+
+ return true;
+}
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-graphite2.h b/thirdparty/harfbuzz/src/hb-graphite2.h
new file mode 100644
index 0000000000..f299da9f71
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-graphite2.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright © 2011 Martin Hosken
+ * Copyright © 2011 SIL International
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#ifndef HB_GRAPHITE2_H
+#define HB_GRAPHITE2_H
+
+#include "hb.h"
+
+#include <graphite2/Font.h>
+
+HB_BEGIN_DECLS
+
+/**
+ * HB_GRAPHITE2_TAG_SILF:
+ *
+ * The #hb_tag_t tag for the `Silf` table, which holds Graphite
+ * features.
+ *
+ * For more information, see http://graphite.sil.org/
+ *
+ **/
+#define HB_GRAPHITE2_TAG_SILF HB_TAG('S','i','l','f')
+
+
+HB_EXTERN gr_face *
+hb_graphite2_face_get_gr_face (hb_face_t *face);
+
+#ifndef HB_DISABLE_DEPRECATED
+
+HB_EXTERN HB_DEPRECATED_FOR (hb_graphite2_face_get_gr_face) gr_font *
+hb_graphite2_font_get_gr_font (hb_font_t *font);
+
+#endif
+
+
+HB_END_DECLS
+
+#endif /* HB_GRAPHITE2_H */
diff --git a/thirdparty/harfbuzz/src/hb-icu.cc b/thirdparty/harfbuzz/src/hb-icu.cc
new file mode 100644
index 0000000000..008a39e414
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-icu.cc
@@ -0,0 +1,296 @@
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ * Copyright © 2009 Keith Stribley
+ * Copyright © 2011 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#ifdef HAVE_ICU
+
+#include "hb-icu.h"
+
+#include "hb-machinery.hh"
+
+#include <unicode/uchar.h>
+#include <unicode/unorm2.h>
+#include <unicode/ustring.h>
+#include <unicode/utf16.h>
+#include <unicode/uversion.h>
+
+/* ICU extra semicolon, fixed since 65, https://github.com/unicode-org/icu/commit/480bec3 */
+#if U_ICU_VERSION_MAJOR_NUM < 65 && (defined(__GNUC__) || defined(__clang__))
+#define HB_ICU_EXTRA_SEMI_IGNORED
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wextra-semi-stmt"
+#endif
+
+/**
+ * SECTION:hb-icu
+ * @title: hb-icu
+ * @short_description: ICU integration
+ * @include: hb-icu.h
+ *
+ * Functions for using HarfBuzz with the International Components for Unicode
+ * (ICU) library. HarfBuzz supports using ICU to provide Unicode data, by attaching
+ * ICU functions to the virtual methods in a #hb_unicode_funcs_t function
+ * structure.
+ **/
+
+/**
+ * hb_icu_script_to_script:
+ * @script: The UScriptCode identifier to query
+ *
+ * Fetches the #hb_script_t script that corresponds to the
+ * specified UScriptCode identifier.
+ *
+ * Return value: the #hb_script_t script found
+ *
+ **/
+
+hb_script_t
+hb_icu_script_to_script (UScriptCode script)
+{
+ if (unlikely (script == USCRIPT_INVALID_CODE))
+ return HB_SCRIPT_INVALID;
+
+ return hb_script_from_string (uscript_getShortName (script), -1);
+}
+
+/**
+ * hb_icu_script_from_script:
+ * @script: The #hb_script_t script to query
+ *
+ * Fetches the UScriptCode identifier that corresponds to the
+ * specified #hb_script_t script.
+ *
+ * Return value: the UScriptCode identifier found
+ *
+ **/
+UScriptCode
+hb_icu_script_from_script (hb_script_t script)
+{
+ if (unlikely (script == HB_SCRIPT_INVALID))
+ return USCRIPT_INVALID_CODE;
+
+ unsigned int numScriptCode = 1 + u_getIntPropertyMaxValue (UCHAR_SCRIPT);
+ for (unsigned int i = 0; i < numScriptCode; i++)
+ if (unlikely (hb_icu_script_to_script ((UScriptCode) i) == script))
+ return (UScriptCode) i;
+
+ return USCRIPT_UNKNOWN;
+}
+
+
+static hb_unicode_combining_class_t
+hb_icu_unicode_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+ hb_codepoint_t unicode,
+ void *user_data HB_UNUSED)
+
+{
+ return (hb_unicode_combining_class_t) u_getCombiningClass (unicode);
+}
+
+static hb_unicode_general_category_t
+hb_icu_unicode_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+ hb_codepoint_t unicode,
+ void *user_data HB_UNUSED)
+{
+ switch (u_getIntPropertyValue(unicode, UCHAR_GENERAL_CATEGORY))
+ {
+ case U_UNASSIGNED: return HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
+
+ case U_UPPERCASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER;
+ case U_LOWERCASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER;
+ case U_TITLECASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER;
+ case U_MODIFIER_LETTER: return HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER;
+ case U_OTHER_LETTER: return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER;
+
+ case U_NON_SPACING_MARK: return HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK;
+ case U_ENCLOSING_MARK: return HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK;
+ case U_COMBINING_SPACING_MARK: return HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK;
+
+ case U_DECIMAL_DIGIT_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER;
+ case U_LETTER_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER;
+ case U_OTHER_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER;
+
+ case U_SPACE_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR;
+ case U_LINE_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR;
+ case U_PARAGRAPH_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR;
+
+ case U_CONTROL_CHAR: return HB_UNICODE_GENERAL_CATEGORY_CONTROL;
+ case U_FORMAT_CHAR: return HB_UNICODE_GENERAL_CATEGORY_FORMAT;
+ case U_PRIVATE_USE_CHAR: return HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE;
+ case U_SURROGATE: return HB_UNICODE_GENERAL_CATEGORY_SURROGATE;
+
+
+ case U_DASH_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION;
+ case U_START_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION;
+ case U_END_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION;
+ case U_CONNECTOR_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION;
+ case U_OTHER_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION;
+
+ case U_MATH_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL;
+ case U_CURRENCY_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL;
+ case U_MODIFIER_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL;
+ case U_OTHER_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL;
+
+ case U_INITIAL_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION;
+ case U_FINAL_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION;
+ }
+
+ return HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
+}
+
+static hb_codepoint_t
+hb_icu_unicode_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+ hb_codepoint_t unicode,
+ void *user_data HB_UNUSED)
+{
+ return u_charMirror(unicode);
+}
+
+static hb_script_t
+hb_icu_unicode_script (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+ hb_codepoint_t unicode,
+ void *user_data HB_UNUSED)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ UScriptCode scriptCode = uscript_getScript(unicode, &status);
+
+ if (unlikely (U_FAILURE (status)))
+ return HB_SCRIPT_UNKNOWN;
+
+ return hb_icu_script_to_script (scriptCode);
+}
+
+static hb_bool_t
+hb_icu_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+ hb_codepoint_t a,
+ hb_codepoint_t b,
+ hb_codepoint_t *ab,
+ void *user_data)
+{
+ const UNormalizer2 *normalizer = (const UNormalizer2 *) user_data;
+ UChar32 ret = unorm2_composePair (normalizer, a, b);
+ if (ret < 0) return false;
+ *ab = ret;
+ return true;
+}
+
+static hb_bool_t
+hb_icu_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+ hb_codepoint_t ab,
+ hb_codepoint_t *a,
+ hb_codepoint_t *b,
+ void *user_data)
+{
+ const UNormalizer2 *normalizer = (const UNormalizer2 *) user_data;
+ UChar decomposed[4];
+ int len;
+ UErrorCode icu_err = U_ZERO_ERROR;
+ len = unorm2_getRawDecomposition (normalizer, ab, decomposed,
+ ARRAY_LENGTH (decomposed), &icu_err);
+ if (U_FAILURE (icu_err) || len < 0) return false;
+
+ len = u_countChar32 (decomposed, len);
+ if (len == 1)
+ {
+ U16_GET_UNSAFE (decomposed, 0, *a);
+ *b = 0;
+ return *a != ab;
+ }
+ else if (len == 2)
+ {
+ len = 0;
+ U16_NEXT_UNSAFE (decomposed, len, *a);
+ U16_NEXT_UNSAFE (decomposed, len, *b);
+ }
+ return true;
+}
+
+
+#if HB_USE_ATEXIT
+static void free_static_icu_funcs ();
+#endif
+
+static struct hb_icu_unicode_funcs_lazy_loader_t : hb_unicode_funcs_lazy_loader_t<hb_icu_unicode_funcs_lazy_loader_t>
+{
+ static hb_unicode_funcs_t *create ()
+ {
+ void *user_data = nullptr;
+ UErrorCode icu_err = U_ZERO_ERROR;
+ user_data = (void *) unorm2_getNFCInstance (&icu_err);
+ assert (user_data);
+
+ hb_unicode_funcs_t *funcs = hb_unicode_funcs_create (nullptr);
+
+ hb_unicode_funcs_set_combining_class_func (funcs, hb_icu_unicode_combining_class, nullptr, nullptr);
+ hb_unicode_funcs_set_general_category_func (funcs, hb_icu_unicode_general_category, nullptr, nullptr);
+ hb_unicode_funcs_set_mirroring_func (funcs, hb_icu_unicode_mirroring, nullptr, nullptr);
+ hb_unicode_funcs_set_script_func (funcs, hb_icu_unicode_script, nullptr, nullptr);
+ hb_unicode_funcs_set_compose_func (funcs, hb_icu_unicode_compose, user_data, nullptr);
+ hb_unicode_funcs_set_decompose_func (funcs, hb_icu_unicode_decompose, user_data, nullptr);
+
+ hb_unicode_funcs_make_immutable (funcs);
+
+#if HB_USE_ATEXIT
+ atexit (free_static_icu_funcs);
+#endif
+
+ return funcs;
+ }
+} static_icu_funcs;
+
+#if HB_USE_ATEXIT
+static
+void free_static_icu_funcs ()
+{
+ static_icu_funcs.free_instance ();
+}
+#endif
+
+/**
+ * hb_icu_get_unicode_funcs:
+ *
+ * Fetches a Unicode-functions structure that is populated
+ * with the appropriate ICU function for each method.
+ *
+ * Return value: (transfer none): a pointer to the #hb_unicode_funcs_t Unicode-functions structure
+ *
+ * Since: 0.9.38
+ **/
+hb_unicode_funcs_t *
+hb_icu_get_unicode_funcs ()
+{
+ return static_icu_funcs.get_unconst ();
+}
+
+#ifdef HB_ICU_EXTRA_SEMI_IGNORED
+#pragma GCC diagnostic pop
+#endif
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-icu.h b/thirdparty/harfbuzz/src/hb-icu.h
new file mode 100644
index 0000000000..2db6a7b679
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-icu.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ * Copyright © 2011 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_ICU_H
+#define HB_ICU_H
+
+#include "hb.h"
+
+#include <unicode/uscript.h>
+
+HB_BEGIN_DECLS
+
+
+HB_EXTERN hb_script_t
+hb_icu_script_to_script (UScriptCode script);
+
+HB_EXTERN UScriptCode
+hb_icu_script_from_script (hb_script_t script);
+
+
+HB_EXTERN hb_unicode_funcs_t *
+hb_icu_get_unicode_funcs (void);
+
+
+HB_END_DECLS
+
+#endif /* HB_ICU_H */
diff --git a/thirdparty/harfbuzz/src/hb-iter.hh b/thirdparty/harfbuzz/src/hb-iter.hh
new file mode 100644
index 0000000000..981c5c218c
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-iter.hh
@@ -0,0 +1,939 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ * Copyright © 2019 Facebook, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ * Facebook Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_ITER_HH
+#define HB_ITER_HH
+
+#include "hb.hh"
+#include "hb-algs.hh"
+#include "hb-meta.hh"
+
+
+/* Unified iterator object.
+ *
+ * The goal of this template is to make the same iterator interface
+ * available to all types, and make it very easy and compact to use.
+ * hb_iter_tator objects are small, light-weight, objects that can be
+ * copied by value. If the collection / object being iterated on
+ * is writable, then the iterator returns lvalues, otherwise it
+ * returns rvalues.
+ *
+ * TODO Document more.
+ *
+ * If iterator implementation implements operator!=, then can be
+ * used in range-based for loop. That comes free if the iterator
+ * is random-access. Otherwise, the range-based for loop incurs
+ * one traversal to find end(), which can be avoided if written
+ * as a while-style for loop, or if iterator implements a faster
+ * __end__() method.
+ * TODO When opting in for C++17, address this by changing return
+ * type of .end()?
+ */
+
+/*
+ * Base classes for iterators.
+ */
+
+/* Base class for all iterators. */
+template <typename iter_t, typename Item = typename iter_t::__item_t__>
+struct hb_iter_t
+{
+ typedef Item item_t;
+ constexpr unsigned get_item_size () const { return hb_static_size (Item); }
+ static constexpr bool is_iterator = true;
+ static constexpr bool is_random_access_iterator = false;
+ static constexpr bool is_sorted_iterator = false;
+
+ private:
+ /* https://en.wikipedia.org/wiki/Curiously_recurring_template_pattern */
+ const iter_t* thiz () const { return static_cast<const iter_t *> (this); }
+ iter_t* thiz () { return static_cast< iter_t *> (this); }
+ public:
+
+ /* TODO:
+ * Port operators below to use hb_enable_if to sniff which method implements
+ * an operator and use it, and remove hb_iter_fallback_mixin_t completely. */
+
+ /* Operators. */
+ iter_t iter () const { return *thiz(); }
+ iter_t operator + () const { return *thiz(); }
+ iter_t begin () const { return *thiz(); }
+ iter_t end () const { return thiz()->__end__ (); }
+ explicit operator bool () const { return thiz()->__more__ (); }
+ unsigned len () const { return thiz()->__len__ (); }
+ /* The following can only be enabled if item_t is reference type. Otherwise
+ * it will be returning pointer to temporary rvalue.
+ * TODO Use a wrapper return type to fix for non-reference type. */
+ template <typename T = item_t,
+ hb_enable_if (hb_is_reference (T))>
+ hb_remove_reference<item_t>* operator -> () const { return hb_addressof (**thiz()); }
+ item_t operator * () const { return thiz()->__item__ (); }
+ item_t operator * () { return thiz()->__item__ (); }
+ item_t operator [] (unsigned i) const { return thiz()->__item_at__ (i); }
+ item_t operator [] (unsigned i) { return thiz()->__item_at__ (i); }
+ iter_t& operator += (unsigned count) & { thiz()->__forward__ (count); return *thiz(); }
+ iter_t operator += (unsigned count) && { thiz()->__forward__ (count); return *thiz(); }
+ iter_t& operator ++ () & { thiz()->__next__ (); return *thiz(); }
+ iter_t operator ++ () && { thiz()->__next__ (); return *thiz(); }
+ iter_t& operator -= (unsigned count) & { thiz()->__rewind__ (count); return *thiz(); }
+ iter_t operator -= (unsigned count) && { thiz()->__rewind__ (count); return *thiz(); }
+ iter_t& operator -- () & { thiz()->__prev__ (); return *thiz(); }
+ iter_t operator -- () && { thiz()->__prev__ (); return *thiz(); }
+ iter_t operator + (unsigned count) const { auto c = thiz()->iter (); c += count; return c; }
+ friend iter_t operator + (unsigned count, const iter_t &it) { return it + count; }
+ iter_t operator ++ (int) { iter_t c (*thiz()); ++*thiz(); return c; }
+ iter_t operator - (unsigned count) const { auto c = thiz()->iter (); c -= count; return c; }
+ iter_t operator -- (int) { iter_t c (*thiz()); --*thiz(); return c; }
+ template <typename T>
+ iter_t& operator >> (T &v) & { v = **thiz(); ++*thiz(); return *thiz(); }
+ template <typename T>
+ iter_t operator >> (T &v) && { v = **thiz(); ++*thiz(); return *thiz(); }
+ template <typename T>
+ iter_t& operator << (const T v) & { **thiz() = v; ++*thiz(); return *thiz(); }
+ template <typename T>
+ iter_t operator << (const T v) && { **thiz() = v; ++*thiz(); return *thiz(); }
+
+ protected:
+ hb_iter_t () = default;
+ hb_iter_t (const hb_iter_t &o HB_UNUSED) = default;
+ hb_iter_t (hb_iter_t &&o HB_UNUSED) = default;
+ hb_iter_t& operator = (const hb_iter_t &o HB_UNUSED) = default;
+ hb_iter_t& operator = (hb_iter_t &&o HB_UNUSED) = default;
+};
+
+#define HB_ITER_USING(Name) \
+ using item_t = typename Name::item_t; \
+ using Name::begin; \
+ using Name::end; \
+ using Name::get_item_size; \
+ using Name::is_iterator; \
+ using Name::iter; \
+ using Name::operator bool; \
+ using Name::len; \
+ using Name::operator ->; \
+ using Name::operator *; \
+ using Name::operator []; \
+ using Name::operator +=; \
+ using Name::operator ++; \
+ using Name::operator -=; \
+ using Name::operator --; \
+ using Name::operator +; \
+ using Name::operator -; \
+ using Name::operator >>; \
+ using Name::operator <<; \
+ static_assert (true, "")
+
+/* Returns iterator / item type of a type. */
+template <typename Iterable>
+using hb_iter_type = decltype (hb_deref (hb_declval (Iterable)).iter ());
+template <typename Iterable>
+using hb_item_type = decltype (*hb_deref (hb_declval (Iterable)).iter ());
+
+
+template <typename> struct hb_array_t;
+template <typename> struct hb_sorted_array_t;
+
+struct
+{
+ template <typename T> hb_iter_type<T>
+ operator () (T&& c) const
+ { return hb_deref (hb_forward<T> (c)).iter (); }
+
+ /* Specialization for C arrays. */
+
+ template <typename Type> inline hb_array_t<Type>
+ operator () (Type *array, unsigned int length) const
+ { return hb_array_t<Type> (array, length); }
+
+ template <typename Type, unsigned int length> hb_array_t<Type>
+ operator () (Type (&array)[length]) const
+ { return hb_array_t<Type> (array, length); }
+
+}
+HB_FUNCOBJ (hb_iter);
+struct
+{
+ template <typename T> unsigned
+ operator () (T&& c) const
+ { return c.len (); }
+
+}
+HB_FUNCOBJ (hb_len);
+
+/* Mixin to fill in what the subclass doesn't provide. */
+template <typename iter_t, typename item_t = typename iter_t::__item_t__>
+struct hb_iter_fallback_mixin_t
+{
+ private:
+ /* https://en.wikipedia.org/wiki/Curiously_recurring_template_pattern */
+ const iter_t* thiz () const { return static_cast<const iter_t *> (this); }
+ iter_t* thiz () { return static_cast< iter_t *> (this); }
+ public:
+
+ /* Access: Implement __item__(), or __item_at__() if random-access. */
+ item_t __item__ () const { return (*thiz())[0]; }
+ item_t __item_at__ (unsigned i) const { return *(*thiz() + i); }
+
+ /* Termination: Implement __more__(), or __len__() if random-access. */
+ bool __more__ () const { return bool (thiz()->len ()); }
+ unsigned __len__ () const
+ { iter_t c (*thiz()); unsigned l = 0; while (c) { c++; l++; } return l; }
+
+ /* Advancing: Implement __next__(), or __forward__() if random-access. */
+ void __next__ () { *thiz() += 1; }
+ void __forward__ (unsigned n) { while (*thiz() && n--) ++*thiz(); }
+
+ /* Rewinding: Implement __prev__() or __rewind__() if bidirectional. */
+ void __prev__ () { *thiz() -= 1; }
+ void __rewind__ (unsigned n) { while (*thiz() && n--) --*thiz(); }
+
+ /* Range-based for: Implement __end__() if can be done faster,
+ * and operator!=. */
+ iter_t __end__ () const
+ {
+ if (thiz()->is_random_access_iterator)
+ return *thiz() + thiz()->len ();
+ /* Above expression loops twice. Following loops once. */
+ auto it = *thiz();
+ while (it) ++it;
+ return it;
+ }
+
+ protected:
+ hb_iter_fallback_mixin_t () = default;
+ hb_iter_fallback_mixin_t (const hb_iter_fallback_mixin_t &o HB_UNUSED) = default;
+ hb_iter_fallback_mixin_t (hb_iter_fallback_mixin_t &&o HB_UNUSED) = default;
+ hb_iter_fallback_mixin_t& operator = (const hb_iter_fallback_mixin_t &o HB_UNUSED) = default;
+ hb_iter_fallback_mixin_t& operator = (hb_iter_fallback_mixin_t &&o HB_UNUSED) = default;
+};
+
+template <typename iter_t, typename item_t = typename iter_t::__item_t__>
+struct hb_iter_with_fallback_t :
+ hb_iter_t<iter_t, item_t>,
+ hb_iter_fallback_mixin_t<iter_t, item_t>
+{
+ protected:
+ hb_iter_with_fallback_t () = default;
+ hb_iter_with_fallback_t (const hb_iter_with_fallback_t &o HB_UNUSED) = default;
+ hb_iter_with_fallback_t (hb_iter_with_fallback_t &&o HB_UNUSED) = default;
+ hb_iter_with_fallback_t& operator = (const hb_iter_with_fallback_t &o HB_UNUSED) = default;
+ hb_iter_with_fallback_t& operator = (hb_iter_with_fallback_t &&o HB_UNUSED) = default;
+};
+
+/*
+ * Meta-programming predicates.
+ */
+
+/* hb_is_iterator() / hb_is_iterator_of() */
+
+template<typename Iter, typename Item>
+struct hb_is_iterator_of
+{
+ template <typename Item2 = Item>
+ static hb_true_type impl (hb_priority<2>, hb_iter_t<Iter, hb_type_identity<Item2>> *);
+ static hb_false_type impl (hb_priority<0>, const void *);
+
+ public:
+ static constexpr bool value = decltype (impl (hb_prioritize, hb_declval (Iter*)))::value;
+};
+#define hb_is_iterator_of(Iter, Item) hb_is_iterator_of<Iter, Item>::value
+#define hb_is_iterator(Iter) hb_is_iterator_of (Iter, typename Iter::item_t)
+
+/* hb_is_iterable() */
+
+template <typename T>
+struct hb_is_iterable
+{
+ private:
+
+ template <typename U>
+ static auto impl (hb_priority<1>) -> decltype (hb_declval (U).iter (), hb_true_type ());
+
+ template <typename>
+ static hb_false_type impl (hb_priority<0>);
+
+ public:
+ static constexpr bool value = decltype (impl<T> (hb_prioritize))::value;
+};
+#define hb_is_iterable(Iterable) hb_is_iterable<Iterable>::value
+
+/* hb_is_source_of() / hb_is_sink_of() */
+
+template<typename Iter, typename Item>
+struct hb_is_source_of
+{
+ private:
+ template <typename Iter2 = Iter,
+ hb_enable_if (hb_is_convertible (typename Iter2::item_t, hb_add_lvalue_reference<hb_add_const<Item>>))>
+ static hb_true_type impl (hb_priority<2>);
+ template <typename Iter2 = Iter>
+ static auto impl (hb_priority<1>) -> decltype (hb_declval (Iter2) >> hb_declval (Item &), hb_true_type ());
+ static hb_false_type impl (hb_priority<0>);
+
+ public:
+ static constexpr bool value = decltype (impl (hb_prioritize))::value;
+};
+#define hb_is_source_of(Iter, Item) hb_is_source_of<Iter, Item>::value
+
+template<typename Iter, typename Item>
+struct hb_is_sink_of
+{
+ private:
+ template <typename Iter2 = Iter,
+ hb_enable_if (hb_is_convertible (typename Iter2::item_t, hb_add_lvalue_reference<Item>))>
+ static hb_true_type impl (hb_priority<2>);
+ template <typename Iter2 = Iter>
+ static auto impl (hb_priority<1>) -> decltype (hb_declval (Iter2) << hb_declval (Item), hb_true_type ());
+ static hb_false_type impl (hb_priority<0>);
+
+ public:
+ static constexpr bool value = decltype (impl (hb_prioritize))::value;
+};
+#define hb_is_sink_of(Iter, Item) hb_is_sink_of<Iter, Item>::value
+
+/* This is commonly used, so define: */
+#define hb_is_sorted_source_of(Iter, Item) \
+ (hb_is_source_of(Iter, Item) && Iter::is_sorted_iterator)
+
+
+/* Range-based 'for' for iterables. */
+
+template <typename Iterable,
+ hb_requires (hb_is_iterable (Iterable))>
+static inline auto begin (Iterable&& iterable) HB_AUTO_RETURN (hb_iter (iterable).begin ())
+
+template <typename Iterable,
+ hb_requires (hb_is_iterable (Iterable))>
+static inline auto end (Iterable&& iterable) HB_AUTO_RETURN (hb_iter (iterable).end ())
+
+/* begin()/end() are NOT looked up non-ADL. So each namespace must declare them.
+ * Do it for namespace OT. */
+namespace OT {
+
+template <typename Iterable,
+ hb_requires (hb_is_iterable (Iterable))>
+static inline auto begin (Iterable&& iterable) HB_AUTO_RETURN (hb_iter (iterable).begin ())
+
+template <typename Iterable,
+ hb_requires (hb_is_iterable (Iterable))>
+static inline auto end (Iterable&& iterable) HB_AUTO_RETURN (hb_iter (iterable).end ())
+
+}
+
+
+/*
+ * Adaptors, combiners, etc.
+ */
+
+template <typename Lhs, typename Rhs,
+ hb_requires (hb_is_iterator (Lhs))>
+static inline auto
+operator | (Lhs&& lhs, Rhs&& rhs) HB_AUTO_RETURN (hb_forward<Rhs> (rhs) (hb_forward<Lhs> (lhs)))
+
+/* hb_map(), hb_filter(), hb_reduce() */
+
+enum class hb_function_sortedness_t {
+ NOT_SORTED,
+ RETAINS_SORTING,
+ SORTED,
+};
+
+template <typename Iter, typename Proj, hb_function_sortedness_t Sorted,
+ hb_requires (hb_is_iterator (Iter))>
+struct hb_map_iter_t :
+ hb_iter_t<hb_map_iter_t<Iter, Proj, Sorted>,
+ decltype (hb_get (hb_declval (Proj), *hb_declval (Iter)))>
+{
+ hb_map_iter_t (const Iter& it, Proj f_) : it (it), f (f_) {}
+
+ typedef decltype (hb_get (hb_declval (Proj), *hb_declval (Iter))) __item_t__;
+ static constexpr bool is_random_access_iterator = Iter::is_random_access_iterator;
+ static constexpr bool is_sorted_iterator =
+ Sorted == hb_function_sortedness_t::SORTED ? true :
+ Sorted == hb_function_sortedness_t::RETAINS_SORTING ? Iter::is_sorted_iterator :
+ false;
+ __item_t__ __item__ () const { return hb_get (f.get (), *it); }
+ __item_t__ __item_at__ (unsigned i) const { return hb_get (f.get (), it[i]); }
+ bool __more__ () const { return bool (it); }
+ unsigned __len__ () const { return it.len (); }
+ void __next__ () { ++it; }
+ void __forward__ (unsigned n) { it += n; }
+ void __prev__ () { --it; }
+ void __rewind__ (unsigned n) { it -= n; }
+ hb_map_iter_t __end__ () const { return hb_map_iter_t (it.end (), f); }
+ bool operator != (const hb_map_iter_t& o) const
+ { return it != o.it; }
+
+ private:
+ Iter it;
+ hb_reference_wrapper<Proj> f;
+};
+
+template <typename Proj, hb_function_sortedness_t Sorted>
+struct hb_map_iter_factory_t
+{
+ hb_map_iter_factory_t (Proj f) : f (f) {}
+
+ template <typename Iter,
+ hb_requires (hb_is_iterator (Iter))>
+ hb_map_iter_t<Iter, Proj, Sorted>
+ operator () (Iter it)
+ { return hb_map_iter_t<Iter, Proj, Sorted> (it, f); }
+
+ private:
+ Proj f;
+};
+struct
+{
+ template <typename Proj>
+ hb_map_iter_factory_t<Proj, hb_function_sortedness_t::NOT_SORTED>
+ operator () (Proj&& f) const
+ { return hb_map_iter_factory_t<Proj, hb_function_sortedness_t::NOT_SORTED> (f); }
+}
+HB_FUNCOBJ (hb_map);
+struct
+{
+ template <typename Proj>
+ hb_map_iter_factory_t<Proj, hb_function_sortedness_t::RETAINS_SORTING>
+ operator () (Proj&& f) const
+ { return hb_map_iter_factory_t<Proj, hb_function_sortedness_t::RETAINS_SORTING> (f); }
+}
+HB_FUNCOBJ (hb_map_retains_sorting);
+struct
+{
+ template <typename Proj>
+ hb_map_iter_factory_t<Proj, hb_function_sortedness_t::SORTED>
+ operator () (Proj&& f) const
+ { return hb_map_iter_factory_t<Proj, hb_function_sortedness_t::SORTED> (f); }
+}
+HB_FUNCOBJ (hb_map_sorted);
+
+template <typename Iter, typename Pred, typename Proj,
+ hb_requires (hb_is_iterator (Iter))>
+struct hb_filter_iter_t :
+ hb_iter_with_fallback_t<hb_filter_iter_t<Iter, Pred, Proj>,
+ typename Iter::item_t>
+{
+ hb_filter_iter_t (const Iter& it_, Pred p_, Proj f_) : it (it_), p (p_), f (f_)
+ { while (it && !hb_has (p.get (), hb_get (f.get (), *it))) ++it; }
+
+ typedef typename Iter::item_t __item_t__;
+ static constexpr bool is_sorted_iterator = Iter::is_sorted_iterator;
+ __item_t__ __item__ () const { return *it; }
+ bool __more__ () const { return bool (it); }
+ void __next__ () { do ++it; while (it && !hb_has (p.get (), hb_get (f.get (), *it))); }
+ void __prev__ () { do --it; while (it && !hb_has (p.get (), hb_get (f.get (), *it))); }
+ hb_filter_iter_t __end__ () const { return hb_filter_iter_t (it.end (), p, f); }
+ bool operator != (const hb_filter_iter_t& o) const
+ { return it != o.it; }
+
+ private:
+ Iter it;
+ hb_reference_wrapper<Pred> p;
+ hb_reference_wrapper<Proj> f;
+};
+template <typename Pred, typename Proj>
+struct hb_filter_iter_factory_t
+{
+ hb_filter_iter_factory_t (Pred p, Proj f) : p (p), f (f) {}
+
+ template <typename Iter,
+ hb_requires (hb_is_iterator (Iter))>
+ hb_filter_iter_t<Iter, Pred, Proj>
+ operator () (Iter it)
+ { return hb_filter_iter_t<Iter, Pred, Proj> (it, p, f); }
+
+ private:
+ Pred p;
+ Proj f;
+};
+struct
+{
+ template <typename Pred = decltype ((hb_identity)),
+ typename Proj = decltype ((hb_identity))>
+ hb_filter_iter_factory_t<Pred, Proj>
+ operator () (Pred&& p = hb_identity, Proj&& f = hb_identity) const
+ { return hb_filter_iter_factory_t<Pred, Proj> (p, f); }
+}
+HB_FUNCOBJ (hb_filter);
+
+template <typename Redu, typename InitT>
+struct hb_reduce_t
+{
+ hb_reduce_t (Redu r, InitT init_value) : r (r), init_value (init_value) {}
+
+ template <typename Iter,
+ hb_requires (hb_is_iterator (Iter)),
+ typename AccuT = hb_decay<decltype (hb_declval (Redu) (hb_declval (InitT), hb_declval (typename Iter::item_t)))>>
+ AccuT
+ operator () (Iter it)
+ {
+ AccuT value = init_value;
+ for (; it; ++it)
+ value = r (value, *it);
+ return value;
+ }
+
+ private:
+ Redu r;
+ InitT init_value;
+};
+struct
+{
+ template <typename Redu, typename InitT>
+ hb_reduce_t<Redu, InitT>
+ operator () (Redu&& r, InitT init_value) const
+ { return hb_reduce_t<Redu, InitT> (r, init_value); }
+}
+HB_FUNCOBJ (hb_reduce);
+
+
+/* hb_zip() */
+
+template <typename A, typename B>
+struct hb_zip_iter_t :
+ hb_iter_t<hb_zip_iter_t<A, B>,
+ hb_pair_t<typename A::item_t, typename B::item_t>>
+{
+ hb_zip_iter_t () {}
+ hb_zip_iter_t (const A& a, const B& b) : a (a), b (b) {}
+
+ typedef hb_pair_t<typename A::item_t, typename B::item_t> __item_t__;
+ static constexpr bool is_random_access_iterator =
+ A::is_random_access_iterator &&
+ B::is_random_access_iterator;
+ /* Note. The following categorization is only valid if A is strictly sorted,
+ * ie. does NOT have duplicates. Previously I tried to categorize sortedness
+ * more granularly, see commits:
+ *
+ * 513762849a683914fc266a17ddf38f133cccf072
+ * 4d3cf2adb669c345cc43832d11689271995e160a
+ *
+ * However, that was not enough, since hb_sorted_array_t, hb_sorted_vector_t,
+ * SortedArrayOf, etc all needed to be updated to add more variants. At that
+ * point I saw it not worth the effort, and instead we now deem all sorted
+ * collections as essentially strictly-sorted for the purposes of zip.
+ *
+ * The above assumption is not as bad as it sounds. Our "sorted" comes with
+ * no guarantees. It's just a contract, put in place to help you remember,
+ * and think about, whether an iterator you receive is expected to be
+ * sorted or not. As such, it's not perfect by definition, and should not
+ * be treated so. The inaccuracy here just errs in the direction of being
+ * more permissive, so your code compiles instead of erring on the side of
+ * marking your zipped iterator unsorted in which case your code won't
+ * compile.
+ *
+ * This semantical limitation does NOT affect logic in any other place I
+ * know of as of this writing.
+ */
+ static constexpr bool is_sorted_iterator = A::is_sorted_iterator;
+
+ __item_t__ __item__ () const { return __item_t__ (*a, *b); }
+ __item_t__ __item_at__ (unsigned i) const { return __item_t__ (a[i], b[i]); }
+ bool __more__ () const { return bool (a) && bool (b); }
+ unsigned __len__ () const { return hb_min (a.len (), b.len ()); }
+ void __next__ () { ++a; ++b; }
+ void __forward__ (unsigned n) { a += n; b += n; }
+ void __prev__ () { --a; --b; }
+ void __rewind__ (unsigned n) { a -= n; b -= n; }
+ hb_zip_iter_t __end__ () const { return hb_zip_iter_t (a.end (), b.end ()); }
+ /* Note, we should stop if ANY of the iters reaches end. As such two compare
+ * unequal if both items are unequal, NOT if either is unequal. */
+ bool operator != (const hb_zip_iter_t& o) const
+ { return a != o.a && b != o.b; }
+
+ private:
+ A a;
+ B b;
+};
+struct
+{ HB_PARTIALIZE(2);
+ template <typename A, typename B,
+ hb_requires (hb_is_iterable (A) && hb_is_iterable (B))>
+ hb_zip_iter_t<hb_iter_type<A>, hb_iter_type<B>>
+ operator () (A&& a, B&& b) const
+ { return hb_zip_iter_t<hb_iter_type<A>, hb_iter_type<B>> (hb_iter (a), hb_iter (b)); }
+}
+HB_FUNCOBJ (hb_zip);
+
+/* hb_apply() */
+
+template <typename Appl>
+struct hb_apply_t
+{
+ hb_apply_t (Appl a) : a (a) {}
+
+ template <typename Iter,
+ hb_requires (hb_is_iterator (Iter))>
+ void operator () (Iter it)
+ {
+ for (; it; ++it)
+ (void) hb_invoke (a, *it);
+ }
+
+ private:
+ Appl a;
+};
+struct
+{
+ template <typename Appl> hb_apply_t<Appl>
+ operator () (Appl&& a) const
+ { return hb_apply_t<Appl> (a); }
+
+ template <typename Appl> hb_apply_t<Appl&>
+ operator () (Appl *a) const
+ { return hb_apply_t<Appl&> (*a); }
+}
+HB_FUNCOBJ (hb_apply);
+
+/* hb_range()/hb_iota()/hb_repeat() */
+
+template <typename T, typename S>
+struct hb_range_iter_t :
+ hb_iter_t<hb_range_iter_t<T, S>, T>
+{
+ hb_range_iter_t (T start, T end_, S step) : v (start), end_ (end_for (start, end_, step)), step (step) {}
+
+ typedef T __item_t__;
+ static constexpr bool is_random_access_iterator = true;
+ static constexpr bool is_sorted_iterator = true;
+ __item_t__ __item__ () const { return hb_ridentity (v); }
+ __item_t__ __item_at__ (unsigned j) const { return v + j * step; }
+ bool __more__ () const { return v != end_; }
+ unsigned __len__ () const { return !step ? UINT_MAX : (end_ - v) / step; }
+ void __next__ () { v += step; }
+ void __forward__ (unsigned n) { v += n * step; }
+ void __prev__ () { v -= step; }
+ void __rewind__ (unsigned n) { v -= n * step; }
+ hb_range_iter_t __end__ () const { return hb_range_iter_t (end_, end_, step); }
+ bool operator != (const hb_range_iter_t& o) const
+ { return v != o.v; }
+
+ private:
+ static inline T end_for (T start, T end_, S step)
+ {
+ if (!step)
+ return end_;
+ auto res = (end_ - start) % step;
+ if (!res)
+ return end_;
+ end_ += step - res;
+ return end_;
+ }
+
+ private:
+ T v;
+ T end_;
+ S step;
+};
+struct
+{
+ template <typename T = unsigned> hb_range_iter_t<T, unsigned>
+ operator () (T end = (unsigned) -1) const
+ { return hb_range_iter_t<T, unsigned> (0, end, 1u); }
+
+ template <typename T, typename S = unsigned> hb_range_iter_t<T, S>
+ operator () (T start, T end, S step = 1u) const
+ { return hb_range_iter_t<T, S> (start, end, step); }
+}
+HB_FUNCOBJ (hb_range);
+
+template <typename T, typename S>
+struct hb_iota_iter_t :
+ hb_iter_with_fallback_t<hb_iota_iter_t<T, S>, T>
+{
+ hb_iota_iter_t (T start, S step) : v (start), step (step) {}
+
+ private:
+
+ template <typename S2 = S>
+ auto
+ inc (hb_type_identity<S2> s, hb_priority<1>)
+ -> hb_void_t<decltype (hb_invoke (hb_forward<S2> (s), hb_declval<T&> ()))>
+ { v = hb_invoke (hb_forward<S2> (s), v); }
+
+ void
+ inc (S s, hb_priority<0>)
+ { v += s; }
+
+ public:
+
+ typedef T __item_t__;
+ static constexpr bool is_random_access_iterator = true;
+ static constexpr bool is_sorted_iterator = true;
+ __item_t__ __item__ () const { return hb_ridentity (v); }
+ bool __more__ () const { return true; }
+ unsigned __len__ () const { return UINT_MAX; }
+ void __next__ () { inc (step, hb_prioritize); }
+ void __prev__ () { v -= step; }
+ hb_iota_iter_t __end__ () const { return *this; }
+ bool operator != (const hb_iota_iter_t& o) const { return true; }
+
+ private:
+ T v;
+ S step;
+};
+struct
+{
+ template <typename T = unsigned, typename S = unsigned> hb_iota_iter_t<T, S>
+ operator () (T start = 0u, S step = 1u) const
+ { return hb_iota_iter_t<T, S> (start, step); }
+}
+HB_FUNCOBJ (hb_iota);
+
+template <typename T>
+struct hb_repeat_iter_t :
+ hb_iter_t<hb_repeat_iter_t<T>, T>
+{
+ hb_repeat_iter_t (T value) : v (value) {}
+
+ typedef T __item_t__;
+ static constexpr bool is_random_access_iterator = true;
+ static constexpr bool is_sorted_iterator = true;
+ __item_t__ __item__ () const { return v; }
+ __item_t__ __item_at__ (unsigned j) const { return v; }
+ bool __more__ () const { return true; }
+ unsigned __len__ () const { return UINT_MAX; }
+ void __next__ () {}
+ void __forward__ (unsigned) {}
+ void __prev__ () {}
+ void __rewind__ (unsigned) {}
+ hb_repeat_iter_t __end__ () const { return *this; }
+ bool operator != (const hb_repeat_iter_t& o) const { return true; }
+
+ private:
+ T v;
+};
+struct
+{
+ template <typename T> hb_repeat_iter_t<T>
+ operator () (T value) const
+ { return hb_repeat_iter_t<T> (value); }
+}
+HB_FUNCOBJ (hb_repeat);
+
+/* hb_enumerate()/hb_take() */
+
+struct
+{
+ template <typename Iterable,
+ typename Index = unsigned,
+ hb_requires (hb_is_iterable (Iterable))>
+ auto operator () (Iterable&& it, Index start = 0u) const HB_AUTO_RETURN
+ ( hb_zip (hb_iota (start), it) )
+}
+HB_FUNCOBJ (hb_enumerate);
+
+struct
+{ HB_PARTIALIZE(2);
+ template <typename Iterable,
+ hb_requires (hb_is_iterable (Iterable))>
+ auto operator () (Iterable&& it, unsigned count) const HB_AUTO_RETURN
+ ( hb_zip (hb_range (count), it) | hb_map (hb_second) )
+
+ /* Specialization arrays. */
+
+ template <typename Type> inline hb_array_t<Type>
+ operator () (hb_array_t<Type> array, unsigned count) const
+ { return array.sub_array (0, count); }
+
+ template <typename Type> inline hb_sorted_array_t<Type>
+ operator () (hb_sorted_array_t<Type> array, unsigned count) const
+ { return array.sub_array (0, count); }
+}
+HB_FUNCOBJ (hb_take);
+
+struct
+{ HB_PARTIALIZE(2);
+ template <typename Iter,
+ hb_requires (hb_is_iterator (Iter))>
+ auto operator () (Iter it, unsigned count) const HB_AUTO_RETURN
+ (
+ + hb_iota (it, hb_add (count))
+ | hb_map (hb_take (count))
+ | hb_take ((hb_len (it) + count - 1) / count)
+ )
+}
+HB_FUNCOBJ (hb_chop);
+
+/* hb_sink() */
+
+template <typename Sink>
+struct hb_sink_t
+{
+ hb_sink_t (Sink s) : s (s) {}
+
+ template <typename Iter,
+ hb_requires (hb_is_iterator (Iter))>
+ void operator () (Iter it)
+ {
+ for (; it; ++it)
+ s << *it;
+ }
+
+ private:
+ Sink s;
+};
+struct
+{
+ template <typename Sink> hb_sink_t<Sink>
+ operator () (Sink&& s) const
+ { return hb_sink_t<Sink> (s); }
+
+ template <typename Sink> hb_sink_t<Sink&>
+ operator () (Sink *s) const
+ { return hb_sink_t<Sink&> (*s); }
+}
+HB_FUNCOBJ (hb_sink);
+
+/* hb-drain: hb_sink to void / blackhole / /dev/null. */
+
+struct
+{
+ template <typename Iter,
+ hb_requires (hb_is_iterator (Iter))>
+ void operator () (Iter it) const
+ {
+ for (; it; ++it)
+ (void) *it;
+ }
+}
+HB_FUNCOBJ (hb_drain);
+
+/* hb_unzip(): unzip and sink to two sinks. */
+
+template <typename Sink1, typename Sink2>
+struct hb_unzip_t
+{
+ hb_unzip_t (Sink1 s1, Sink2 s2) : s1 (s1), s2 (s2) {}
+
+ template <typename Iter,
+ hb_requires (hb_is_iterator (Iter))>
+ void operator () (Iter it)
+ {
+ for (; it; ++it)
+ {
+ const auto &v = *it;
+ s1 << v.first;
+ s2 << v.second;
+ }
+ }
+
+ private:
+ Sink1 s1;
+ Sink2 s2;
+};
+struct
+{
+ template <typename Sink1, typename Sink2> hb_unzip_t<Sink1, Sink2>
+ operator () (Sink1&& s1, Sink2&& s2) const
+ { return hb_unzip_t<Sink1, Sink2> (s1, s2); }
+
+ template <typename Sink1, typename Sink2> hb_unzip_t<Sink1&, Sink2&>
+ operator () (Sink1 *s1, Sink2 *s2) const
+ { return hb_unzip_t<Sink1&, Sink2&> (*s1, *s2); }
+}
+HB_FUNCOBJ (hb_unzip);
+
+
+/* hb-all, hb-any, hb-none. */
+
+struct
+{
+ template <typename Iterable,
+ typename Pred = decltype ((hb_identity)),
+ typename Proj = decltype ((hb_identity)),
+ hb_requires (hb_is_iterable (Iterable))>
+ bool operator () (Iterable&& c,
+ Pred&& p = hb_identity,
+ Proj&& f = hb_identity) const
+ {
+ for (auto it = hb_iter (c); it; ++it)
+ if (!hb_match (hb_forward<Pred> (p), hb_get (hb_forward<Proj> (f), *it)))
+ return false;
+ return true;
+ }
+}
+HB_FUNCOBJ (hb_all);
+struct
+{
+ template <typename Iterable,
+ typename Pred = decltype ((hb_identity)),
+ typename Proj = decltype ((hb_identity)),
+ hb_requires (hb_is_iterable (Iterable))>
+ bool operator () (Iterable&& c,
+ Pred&& p = hb_identity,
+ Proj&& f = hb_identity) const
+ {
+ for (auto it = hb_iter (c); it; ++it)
+ if (hb_match (hb_forward<Pred> (p), hb_get (hb_forward<Proj> (f), *it)))
+ return true;
+ return false;
+ }
+}
+HB_FUNCOBJ (hb_any);
+struct
+{
+ template <typename Iterable,
+ typename Pred = decltype ((hb_identity)),
+ typename Proj = decltype ((hb_identity)),
+ hb_requires (hb_is_iterable (Iterable))>
+ bool operator () (Iterable&& c,
+ Pred&& p = hb_identity,
+ Proj&& f = hb_identity) const
+ {
+ for (auto it = hb_iter (c); it; ++it)
+ if (hb_match (hb_forward<Pred> (p), hb_get (hb_forward<Proj> (f), *it)))
+ return false;
+ return true;
+ }
+}
+HB_FUNCOBJ (hb_none);
+
+/*
+ * Algorithms operating on iterators.
+ */
+
+template <typename C, typename V,
+ hb_requires (hb_is_iterable (C))>
+inline void
+hb_fill (C& c, const V &v)
+{
+ for (auto i = hb_iter (c); i; i++)
+ *i = v;
+}
+
+template <typename S, typename D>
+inline void
+hb_copy (S&& is, D&& id)
+{
+ hb_iter (is) | hb_sink (id);
+}
+
+
+#endif /* HB_ITER_HH */
diff --git a/thirdparty/harfbuzz/src/hb-kern.hh b/thirdparty/harfbuzz/src/hb-kern.hh
new file mode 100644
index 0000000000..3f952fe7fc
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-kern.hh
@@ -0,0 +1,138 @@
+/*
+ * Copyright © 2017 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_KERN_HH
+#define HB_KERN_HH
+
+#include "hb-open-type.hh"
+#include "hb-aat-layout-common.hh"
+#include "hb-ot-layout-gpos-table.hh"
+
+
+namespace OT {
+
+
+template <typename Driver>
+struct hb_kern_machine_t
+{
+ hb_kern_machine_t (const Driver &driver_,
+ bool crossStream_ = false) :
+ driver (driver_),
+ crossStream (crossStream_) {}
+
+ HB_NO_SANITIZE_SIGNED_INTEGER_OVERFLOW
+ void kern (hb_font_t *font,
+ hb_buffer_t *buffer,
+ hb_mask_t kern_mask,
+ bool scale = true) const
+ {
+ OT::hb_ot_apply_context_t c (1, font, buffer);
+ c.set_lookup_mask (kern_mask);
+ c.set_lookup_props (OT::LookupFlag::IgnoreMarks);
+ auto &skippy_iter = c.iter_input;
+
+ bool horizontal = HB_DIRECTION_IS_HORIZONTAL (buffer->props.direction);
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ hb_glyph_position_t *pos = buffer->pos;
+ for (unsigned int idx = 0; idx < count;)
+ {
+ if (!(info[idx].mask & kern_mask))
+ {
+ idx++;
+ continue;
+ }
+
+ skippy_iter.reset (idx, 1);
+ if (!skippy_iter.next ())
+ {
+ idx++;
+ continue;
+ }
+
+ unsigned int i = idx;
+ unsigned int j = skippy_iter.idx;
+
+ hb_position_t kern = driver.get_kerning (info[i].codepoint,
+ info[j].codepoint);
+
+
+ if (likely (!kern))
+ goto skip;
+
+ if (horizontal)
+ {
+ if (scale)
+ kern = font->em_scale_x (kern);
+ if (crossStream)
+ {
+ pos[j].y_offset = kern;
+ buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT;
+ }
+ else
+ {
+ hb_position_t kern1 = kern >> 1;
+ hb_position_t kern2 = kern - kern1;
+ pos[i].x_advance += kern1;
+ pos[j].x_advance += kern2;
+ pos[j].x_offset += kern2;
+ }
+ }
+ else
+ {
+ if (scale)
+ kern = font->em_scale_y (kern);
+ if (crossStream)
+ {
+ pos[j].x_offset = kern;
+ buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT;
+ }
+ else
+ {
+ hb_position_t kern1 = kern >> 1;
+ hb_position_t kern2 = kern - kern1;
+ pos[i].y_advance += kern1;
+ pos[j].y_advance += kern2;
+ pos[j].y_offset += kern2;
+ }
+ }
+
+ buffer->unsafe_to_break (i, j + 1);
+
+ skip:
+ idx = skippy_iter.idx;
+ }
+ }
+
+ const Driver &driver;
+ bool crossStream;
+};
+
+
+} /* namespace OT */
+
+
+#endif /* HB_KERN_HH */
diff --git a/thirdparty/harfbuzz/src/hb-machinery.hh b/thirdparty/harfbuzz/src/hb-machinery.hh
new file mode 100644
index 0000000000..54bc60d4c8
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-machinery.hh
@@ -0,0 +1,307 @@
+/*
+ * Copyright © 2007,2008,2009,2010 Red Hat, Inc.
+ * Copyright © 2012,2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_MACHINERY_HH
+#define HB_MACHINERY_HH
+
+#include "hb.hh"
+#include "hb-blob.hh"
+
+#include "hb-dispatch.hh"
+#include "hb-sanitize.hh"
+#include "hb-serialize.hh"
+
+
+/*
+ * Casts
+ */
+
+/* StructAtOffset<T>(P,Ofs) returns the struct T& that is placed at memory
+ * location pointed to by P plus Ofs bytes. */
+template<typename Type>
+static inline const Type& StructAtOffset(const void *P, unsigned int offset)
+{ return * reinterpret_cast<const Type*> ((const char *) P + offset); }
+template<typename Type>
+static inline Type& StructAtOffset(void *P, unsigned int offset)
+{ return * reinterpret_cast<Type*> ((char *) P + offset); }
+template<typename Type>
+static inline const Type& StructAtOffsetUnaligned(const void *P, unsigned int offset)
+{
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-align"
+ return * reinterpret_cast<const Type*> ((const char *) P + offset);
+#pragma GCC diagnostic pop
+}
+template<typename Type>
+static inline Type& StructAtOffsetUnaligned(void *P, unsigned int offset)
+{
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-align"
+ return * reinterpret_cast<Type*> ((char *) P + offset);
+#pragma GCC diagnostic pop
+}
+
+/* StructAfter<T>(X) returns the struct T& that is placed after X.
+ * Works with X of variable size also. X must implement get_size() */
+template<typename Type, typename TObject>
+static inline const Type& StructAfter(const TObject &X)
+{ return StructAtOffset<Type>(&X, X.get_size()); }
+template<typename Type, typename TObject>
+static inline Type& StructAfter(TObject &X)
+{ return StructAtOffset<Type>(&X, X.get_size()); }
+
+
+/*
+ * Size checking
+ */
+
+/* Check _assertion in a method environment */
+#define _DEFINE_INSTANCE_ASSERTION1(_line, _assertion) \
+ void _instance_assertion_on_line_##_line () const \
+ { static_assert ((_assertion), ""); }
+# define _DEFINE_INSTANCE_ASSERTION0(_line, _assertion) _DEFINE_INSTANCE_ASSERTION1 (_line, _assertion)
+# define DEFINE_INSTANCE_ASSERTION(_assertion) _DEFINE_INSTANCE_ASSERTION0 (__LINE__, _assertion)
+
+/* Check that _code compiles in a method environment */
+#define _DEFINE_COMPILES_ASSERTION1(_line, _code) \
+ void _compiles_assertion_on_line_##_line () const \
+ { _code; }
+# define _DEFINE_COMPILES_ASSERTION0(_line, _code) _DEFINE_COMPILES_ASSERTION1 (_line, _code)
+# define DEFINE_COMPILES_ASSERTION(_code) _DEFINE_COMPILES_ASSERTION0 (__LINE__, _code)
+
+
+#define DEFINE_SIZE_STATIC(size) \
+ DEFINE_INSTANCE_ASSERTION (sizeof (*this) == (size)) \
+ unsigned int get_size () const { return (size); } \
+ static constexpr unsigned null_size = (size); \
+ static constexpr unsigned min_size = (size); \
+ static constexpr unsigned static_size = (size)
+
+#define DEFINE_SIZE_UNION(size, _member) \
+ DEFINE_COMPILES_ASSERTION ((void) this->u._member.static_size) \
+ DEFINE_INSTANCE_ASSERTION (sizeof(this->u._member) == (size)) \
+ static constexpr unsigned null_size = (size); \
+ static constexpr unsigned min_size = (size)
+
+#define DEFINE_SIZE_MIN(size) \
+ DEFINE_INSTANCE_ASSERTION (sizeof (*this) >= (size)) \
+ static constexpr unsigned null_size = (size); \
+ static constexpr unsigned min_size = (size)
+
+#define DEFINE_SIZE_UNBOUNDED(size) \
+ DEFINE_INSTANCE_ASSERTION (sizeof (*this) >= (size)) \
+ static constexpr unsigned min_size = (size)
+
+#define DEFINE_SIZE_ARRAY(size, array) \
+ DEFINE_COMPILES_ASSERTION ((void) (array)[0].static_size) \
+ DEFINE_INSTANCE_ASSERTION (sizeof (*this) == (size) + (HB_VAR_ARRAY+0) * sizeof ((array)[0])) \
+ static constexpr unsigned null_size = (size); \
+ static constexpr unsigned min_size = (size)
+
+#define DEFINE_SIZE_ARRAY_SIZED(size, array) \
+ unsigned int get_size () const { return (size - (array).min_size + (array).get_size ()); } \
+ DEFINE_SIZE_ARRAY(size, array)
+
+
+
+/*
+ * Lazy loaders.
+ */
+
+template <typename Data, unsigned int WheresData>
+struct hb_data_wrapper_t
+{
+ static_assert (WheresData > 0, "");
+
+ Data * get_data () const
+ { return *(((Data **) (void *) this) - WheresData); }
+
+ bool is_inert () const { return !get_data (); }
+
+ template <typename Stored, typename Subclass>
+ Stored * call_create () const { return Subclass::create (get_data ()); }
+};
+template <>
+struct hb_data_wrapper_t<void, 0>
+{
+ bool is_inert () const { return false; }
+
+ template <typename Stored, typename Funcs>
+ Stored * call_create () const { return Funcs::create (); }
+};
+
+template <typename T1, typename T2> struct hb_non_void_t { typedef T1 value; };
+template <typename T2> struct hb_non_void_t<void, T2> { typedef T2 value; };
+
+template <typename Returned,
+ typename Subclass = void,
+ typename Data = void,
+ unsigned int WheresData = 0,
+ typename Stored = Returned>
+struct hb_lazy_loader_t : hb_data_wrapper_t<Data, WheresData>
+{
+ typedef typename hb_non_void_t<Subclass,
+ hb_lazy_loader_t<Returned,Subclass,Data,WheresData,Stored>
+ >::value Funcs;
+
+ void init0 () {} /* Init, when memory is already set to 0. No-op for us. */
+ void init () { instance.set_relaxed (nullptr); }
+ void fini () { do_destroy (instance.get ()); }
+
+ void free_instance ()
+ {
+ retry:
+ Stored *p = instance.get ();
+ if (unlikely (p && !cmpexch (p, nullptr)))
+ goto retry;
+ do_destroy (p);
+ }
+
+ static void do_destroy (Stored *p)
+ {
+ if (p && p != const_cast<Stored *> (Funcs::get_null ()))
+ Funcs::destroy (p);
+ }
+
+ const Returned * operator -> () const { return get (); }
+ const Returned & operator * () const { return *get (); }
+ explicit operator bool () const
+ { return get_stored () != Funcs::get_null (); }
+ template <typename C> operator const C * () const { return get (); }
+
+ Stored * get_stored () const
+ {
+ retry:
+ Stored *p = this->instance.get ();
+ if (unlikely (!p))
+ {
+ if (unlikely (this->is_inert ()))
+ return const_cast<Stored *> (Funcs::get_null ());
+
+ p = this->template call_create<Stored, Funcs> ();
+ if (unlikely (!p))
+ p = const_cast<Stored *> (Funcs::get_null ());
+
+ if (unlikely (!cmpexch (nullptr, p)))
+ {
+ do_destroy (p);
+ goto retry;
+ }
+ }
+ return p;
+ }
+ Stored * get_stored_relaxed () const
+ {
+ return this->instance.get_relaxed ();
+ }
+
+ bool cmpexch (Stored *current, Stored *value) const
+ {
+ /* This *must* be called when there are no other threads accessing. */
+ return this->instance.cmpexch (current, value);
+ }
+
+ const Returned * get () const { return Funcs::convert (get_stored ()); }
+ const Returned * get_relaxed () const { return Funcs::convert (get_stored_relaxed ()); }
+ Returned * get_unconst () const { return const_cast<Returned *> (Funcs::convert (get_stored ())); }
+
+ /* To be possibly overloaded by subclasses. */
+ static Returned* convert (Stored *p) { return p; }
+
+ /* By default null/init/fini the object. */
+ static const Stored* get_null () { return &Null (Stored); }
+ static Stored *create (Data *data)
+ {
+ Stored *p = (Stored *) calloc (1, sizeof (Stored));
+ if (likely (p))
+ p->init (data);
+ return p;
+ }
+ static Stored *create ()
+ {
+ Stored *p = (Stored *) calloc (1, sizeof (Stored));
+ if (likely (p))
+ p->init ();
+ return p;
+ }
+ static void destroy (Stored *p)
+ {
+ p->fini ();
+ free (p);
+ }
+
+// private:
+ /* Must only have one pointer. */
+ hb_atomic_ptr_t<Stored *> instance;
+};
+
+/* Specializations. */
+
+template <typename T, unsigned int WheresFace>
+struct hb_face_lazy_loader_t : hb_lazy_loader_t<T,
+ hb_face_lazy_loader_t<T, WheresFace>,
+ hb_face_t, WheresFace> {};
+
+template <typename T, unsigned int WheresFace>
+struct hb_table_lazy_loader_t : hb_lazy_loader_t<T,
+ hb_table_lazy_loader_t<T, WheresFace>,
+ hb_face_t, WheresFace,
+ hb_blob_t>
+{
+ static hb_blob_t *create (hb_face_t *face)
+ { return hb_sanitize_context_t ().reference_table<T> (face); }
+ static void destroy (hb_blob_t *p) { hb_blob_destroy (p); }
+
+ static const hb_blob_t *get_null ()
+ { return hb_blob_get_empty (); }
+
+ static const T* convert (const hb_blob_t *blob)
+ { return blob->as<T> (); }
+
+ hb_blob_t* get_blob () const { return this->get_stored (); }
+};
+
+template <typename Subclass>
+struct hb_font_funcs_lazy_loader_t : hb_lazy_loader_t<hb_font_funcs_t, Subclass>
+{
+ static void destroy (hb_font_funcs_t *p)
+ { hb_font_funcs_destroy (p); }
+ static const hb_font_funcs_t *get_null ()
+ { return hb_font_funcs_get_empty (); }
+};
+template <typename Subclass>
+struct hb_unicode_funcs_lazy_loader_t : hb_lazy_loader_t<hb_unicode_funcs_t, Subclass>
+{
+ static void destroy (hb_unicode_funcs_t *p)
+ { hb_unicode_funcs_destroy (p); }
+ static const hb_unicode_funcs_t *get_null ()
+ { return hb_unicode_funcs_get_empty (); }
+};
+
+
+#endif /* HB_MACHINERY_HH */
diff --git a/thirdparty/harfbuzz/src/hb-map.cc b/thirdparty/harfbuzz/src/hb-map.cc
new file mode 100644
index 0000000000..191be14372
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-map.cc
@@ -0,0 +1,268 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb-map.hh"
+
+
+/**
+ * SECTION:hb-map
+ * @title: hb-map
+ * @short_description: Object representing integer to integer mapping
+ * @include: hb.h
+ *
+ * Map objects are integer-to-integer hash-maps. Currently they are
+ * not used in the HarfBuzz public API, but are provided for client's
+ * use if desired.
+ **/
+
+
+/**
+ * hb_map_create: (Xconstructor)
+ *
+ * Return value: (transfer full):
+ *
+ * Since: 1.7.7
+ **/
+hb_map_t *
+hb_map_create ()
+{
+ hb_map_t *map;
+
+ if (!(map = hb_object_create<hb_map_t> ()))
+ return hb_map_get_empty ();
+
+ map->init_shallow ();
+
+ return map;
+}
+
+/**
+ * hb_map_get_empty:
+ *
+ * Return value: (transfer full):
+ *
+ * Since: 1.7.7
+ **/
+hb_map_t *
+hb_map_get_empty ()
+{
+ return const_cast<hb_map_t *> (&Null (hb_map_t));
+}
+
+/**
+ * hb_map_reference: (skip)
+ * @map: a map.
+ *
+ * Return value: (transfer full):
+ *
+ * Since: 1.7.7
+ **/
+hb_map_t *
+hb_map_reference (hb_map_t *map)
+{
+ return hb_object_reference (map);
+}
+
+/**
+ * hb_map_destroy: (skip)
+ * @map: a map.
+ *
+ * Since: 1.7.7
+ **/
+void
+hb_map_destroy (hb_map_t *map)
+{
+ if (!hb_object_destroy (map)) return;
+
+ map->fini_shallow ();
+
+ free (map);
+}
+
+/**
+ * hb_map_set_user_data: (skip)
+ * @map: a map.
+ * @key:
+ * @data:
+ * @destroy:
+ * @replace:
+ *
+ * Return value:
+ *
+ * Since: 1.7.7
+ **/
+hb_bool_t
+hb_map_set_user_data (hb_map_t *map,
+ hb_user_data_key_t *key,
+ void * data,
+ hb_destroy_func_t destroy,
+ hb_bool_t replace)
+{
+ return hb_object_set_user_data (map, key, data, destroy, replace);
+}
+
+/**
+ * hb_map_get_user_data: (skip)
+ * @map: a map.
+ * @key:
+ *
+ * Return value: (transfer none):
+ *
+ * Since: 1.7.7
+ **/
+void *
+hb_map_get_user_data (hb_map_t *map,
+ hb_user_data_key_t *key)
+{
+ return hb_object_get_user_data (map, key);
+}
+
+
+/**
+ * hb_map_allocation_successful:
+ * @map: a map.
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 1.7.7
+ **/
+hb_bool_t
+hb_map_allocation_successful (const hb_map_t *map)
+{
+ return map->successful;
+}
+
+
+/**
+ * hb_map_set:
+ * @map: a map.
+ * @key:
+ * @value:
+ *
+ *
+ *
+ * Since: 1.7.7
+ **/
+void
+hb_map_set (hb_map_t *map,
+ hb_codepoint_t key,
+ hb_codepoint_t value)
+{
+ map->set (key, value);
+}
+
+/**
+ * hb_map_get:
+ * @map: a map.
+ * @key:
+ *
+ *
+ *
+ * Since: 1.7.7
+ **/
+hb_codepoint_t
+hb_map_get (const hb_map_t *map,
+ hb_codepoint_t key)
+{
+ return map->get (key);
+}
+
+/**
+ * hb_map_del:
+ * @map: a map.
+ * @key:
+ *
+ *
+ *
+ * Since: 1.7.7
+ **/
+void
+hb_map_del (hb_map_t *map,
+ hb_codepoint_t key)
+{
+ map->del (key);
+}
+
+/**
+ * hb_map_has:
+ * @map: a map.
+ * @key:
+ *
+ *
+ *
+ * Since: 1.7.7
+ **/
+hb_bool_t
+hb_map_has (const hb_map_t *map,
+ hb_codepoint_t key)
+{
+ return map->has (key);
+}
+
+
+/**
+ * hb_map_clear:
+ * @map: a map.
+ *
+ *
+ *
+ * Since: 1.7.7
+ **/
+void
+hb_map_clear (hb_map_t *map)
+{
+ return map->clear ();
+}
+
+/**
+ * hb_map_is_empty:
+ * @map: a map.
+ *
+ *
+ *
+ * Since: 1.7.7
+ **/
+hb_bool_t
+hb_map_is_empty (const hb_map_t *map)
+{
+ return map->is_empty ();
+}
+
+/**
+ * hb_map_get_population:
+ * @map: a map.
+ *
+ *
+ *
+ * Since: 1.7.7
+ **/
+unsigned int
+hb_map_get_population (const hb_map_t *map)
+{
+ return map->get_population ();
+}
diff --git a/thirdparty/harfbuzz/src/hb-map.h b/thirdparty/harfbuzz/src/hb-map.h
new file mode 100644
index 0000000000..b77843c2ba
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-map.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_H_IN
+#error "Include <hb.h> instead."
+#endif
+
+#ifndef HB_MAP_H
+#define HB_MAP_H
+
+#include "hb-common.h"
+
+HB_BEGIN_DECLS
+
+
+/*
+ * Since: 1.7.7
+ */
+#define HB_MAP_VALUE_INVALID ((hb_codepoint_t) -1)
+
+typedef struct hb_map_t hb_map_t;
+
+
+HB_EXTERN hb_map_t *
+hb_map_create (void);
+
+HB_EXTERN hb_map_t *
+hb_map_get_empty (void);
+
+HB_EXTERN hb_map_t *
+hb_map_reference (hb_map_t *map);
+
+HB_EXTERN void
+hb_map_destroy (hb_map_t *map);
+
+HB_EXTERN hb_bool_t
+hb_map_set_user_data (hb_map_t *map,
+ hb_user_data_key_t *key,
+ void * data,
+ hb_destroy_func_t destroy,
+ hb_bool_t replace);
+
+HB_EXTERN void *
+hb_map_get_user_data (hb_map_t *map,
+ hb_user_data_key_t *key);
+
+
+/* Returns false if allocation has failed before */
+HB_EXTERN hb_bool_t
+hb_map_allocation_successful (const hb_map_t *map);
+
+HB_EXTERN void
+hb_map_clear (hb_map_t *map);
+
+HB_EXTERN hb_bool_t
+hb_map_is_empty (const hb_map_t *map);
+
+HB_EXTERN unsigned int
+hb_map_get_population (const hb_map_t *map);
+
+HB_EXTERN void
+hb_map_set (hb_map_t *map,
+ hb_codepoint_t key,
+ hb_codepoint_t value);
+
+HB_EXTERN hb_codepoint_t
+hb_map_get (const hb_map_t *map,
+ hb_codepoint_t key);
+
+HB_EXTERN void
+hb_map_del (hb_map_t *map,
+ hb_codepoint_t key);
+
+HB_EXTERN hb_bool_t
+hb_map_has (const hb_map_t *map,
+ hb_codepoint_t key);
+
+
+HB_END_DECLS
+
+#endif /* HB_MAP_H */
diff --git a/thirdparty/harfbuzz/src/hb-map.hh b/thirdparty/harfbuzz/src/hb-map.hh
new file mode 100644
index 0000000000..92c1bd67e5
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-map.hh
@@ -0,0 +1,326 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_MAP_HH
+#define HB_MAP_HH
+
+#include "hb.hh"
+
+
+/*
+ * hb_hashmap_t
+ */
+
+template <typename K, typename V,
+ K kINVALID = hb_is_pointer (K) ? 0 : hb_is_signed (K) ? hb_int_min (K) : (K) -1,
+ V vINVALID = hb_is_pointer (V) ? 0 : hb_is_signed (V) ? hb_int_min (V) : (V) -1>
+struct hb_hashmap_t
+{
+ HB_DELETE_COPY_ASSIGN (hb_hashmap_t);
+ hb_hashmap_t () { init (); }
+ ~hb_hashmap_t () { fini (); }
+
+ static_assert (hb_is_integral (K) || hb_is_pointer (K), "");
+ static_assert (hb_is_integral (V) || hb_is_pointer (V), "");
+
+ struct item_t
+ {
+ K key;
+ V value;
+ uint32_t hash;
+
+ void clear () { key = kINVALID; value = vINVALID; hash = 0; }
+
+ bool operator == (const K &o) { return hb_deref (key) == hb_deref (o); }
+ bool operator == (const item_t &o) { return *this == o.key; }
+ bool is_unused () const { return key == kINVALID; }
+ bool is_tombstone () const { return key != kINVALID && value == vINVALID; }
+ bool is_real () const { return key != kINVALID && value != vINVALID; }
+ hb_pair_t<K, V> get_pair() const { return hb_pair_t<K, V> (key, value); }
+ };
+
+ hb_object_header_t header;
+ bool successful; /* Allocations successful */
+ unsigned int population; /* Not including tombstones. */
+ unsigned int occupancy; /* Including tombstones. */
+ unsigned int mask;
+ unsigned int prime;
+ item_t *items;
+
+ void init_shallow ()
+ {
+ successful = true;
+ population = occupancy = 0;
+ mask = 0;
+ prime = 0;
+ items = nullptr;
+ }
+ void init ()
+ {
+ hb_object_init (this);
+ init_shallow ();
+ }
+ void fini_shallow ()
+ {
+ free (items);
+ items = nullptr;
+ population = occupancy = 0;
+ }
+ void fini ()
+ {
+ hb_object_fini (this);
+ fini_shallow ();
+ }
+
+ void reset ()
+ {
+ if (unlikely (hb_object_is_immutable (this)))
+ return;
+ successful = true;
+ clear ();
+ }
+
+ bool in_error () const { return !successful; }
+
+ bool resize ()
+ {
+ if (unlikely (!successful)) return false;
+
+ unsigned int power = hb_bit_storage (population * 2 + 8);
+ unsigned int new_size = 1u << power;
+ item_t *new_items = (item_t *) malloc ((size_t) new_size * sizeof (item_t));
+ if (unlikely (!new_items))
+ {
+ successful = false;
+ return false;
+ }
+ for (auto &_ : hb_iter (new_items, new_size))
+ _.clear ();
+
+ unsigned int old_size = mask + 1;
+ item_t *old_items = items;
+
+ /* Switch to new, empty, array. */
+ population = occupancy = 0;
+ mask = new_size - 1;
+ prime = prime_for (power);
+ items = new_items;
+
+ /* Insert back old items. */
+ if (old_items)
+ for (unsigned int i = 0; i < old_size; i++)
+ if (old_items[i].is_real ())
+ set_with_hash (old_items[i].key,
+ old_items[i].hash,
+ old_items[i].value);
+
+ free (old_items);
+
+ return true;
+ }
+
+ void set (K key, V value)
+ {
+ set_with_hash (key, hb_hash (key), value);
+ }
+
+ V get (K key) const
+ {
+ if (unlikely (!items)) return vINVALID;
+ unsigned int i = bucket_for (key);
+ return items[i].is_real () && items[i] == key ? items[i].value : vINVALID;
+ }
+
+ void del (K key) { set (key, vINVALID); }
+
+ /* Has interface. */
+ static constexpr V SENTINEL = vINVALID;
+ typedef V value_t;
+ value_t operator [] (K k) const { return get (k); }
+ bool has (K k, V *vp = nullptr) const
+ {
+ V v = (*this)[k];
+ if (vp) *vp = v;
+ return v != SENTINEL;
+ }
+ /* Projection. */
+ V operator () (K k) const { return get (k); }
+
+ void clear ()
+ {
+ if (unlikely (hb_object_is_immutable (this)))
+ return;
+ if (items)
+ for (auto &_ : hb_iter (items, mask + 1))
+ _.clear ();
+
+ population = occupancy = 0;
+ }
+
+ bool is_empty () const { return population == 0; }
+
+ unsigned int get_population () const { return population; }
+
+ /*
+ * Iterator
+ */
+ auto iter () const HB_AUTO_RETURN
+ (
+ + hb_array (items, mask ? mask + 1 : 0)
+ | hb_filter (&item_t::is_real)
+ | hb_map (&item_t::get_pair)
+ )
+ auto keys () const HB_AUTO_RETURN
+ (
+ + hb_array (items, mask ? mask + 1 : 0)
+ | hb_filter (&item_t::is_real)
+ | hb_map (&item_t::key)
+ | hb_map (hb_ridentity)
+ )
+ auto values () const HB_AUTO_RETURN
+ (
+ + hb_array (items, mask ? mask + 1 : 0)
+ | hb_filter (&item_t::is_real)
+ | hb_map (&item_t::value)
+ | hb_map (hb_ridentity)
+ )
+
+ /* Sink interface. */
+ hb_hashmap_t& operator << (const hb_pair_t<K, V>& v)
+ { set (v.first, v.second); return *this; }
+
+ protected:
+
+ void set_with_hash (K key, uint32_t hash, V value)
+ {
+ if (unlikely (!successful)) return;
+ if (unlikely (key == kINVALID)) return;
+ if ((occupancy + occupancy / 2) >= mask && !resize ()) return;
+ unsigned int i = bucket_for_hash (key, hash);
+
+ if (value == vINVALID && items[i].key != key)
+ return; /* Trying to delete non-existent key. */
+
+ if (!items[i].is_unused ())
+ {
+ occupancy--;
+ if (items[i].is_tombstone ())
+ population--;
+ }
+
+ items[i].key = key;
+ items[i].value = value;
+ items[i].hash = hash;
+
+ occupancy++;
+ if (!items[i].is_tombstone ())
+ population++;
+ }
+
+ unsigned int bucket_for (K key) const
+ {
+ return bucket_for_hash (key, hb_hash (key));
+ }
+
+ unsigned int bucket_for_hash (K key, uint32_t hash) const
+ {
+ unsigned int i = hash % prime;
+ unsigned int step = 0;
+ unsigned int tombstone = (unsigned) -1;
+ while (!items[i].is_unused ())
+ {
+ if (items[i].hash == hash && items[i] == key)
+ return i;
+ if (tombstone == (unsigned) -1 && items[i].is_tombstone ())
+ tombstone = i;
+ i = (i + ++step) & mask;
+ }
+ return tombstone == (unsigned) -1 ? i : tombstone;
+ }
+
+ static unsigned int prime_for (unsigned int shift)
+ {
+ /* Following comment and table copied from glib. */
+ /* Each table size has an associated prime modulo (the first prime
+ * lower than the table size) used to find the initial bucket. Probing
+ * then works modulo 2^n. The prime modulo is necessary to get a
+ * good distribution with poor hash functions.
+ */
+ /* Not declaring static to make all kinds of compilers happy... */
+ /*static*/ const unsigned int prime_mod [32] =
+ {
+ 1, /* For 1 << 0 */
+ 2,
+ 3,
+ 7,
+ 13,
+ 31,
+ 61,
+ 127,
+ 251,
+ 509,
+ 1021,
+ 2039,
+ 4093,
+ 8191,
+ 16381,
+ 32749,
+ 65521, /* For 1 << 16 */
+ 131071,
+ 262139,
+ 524287,
+ 1048573,
+ 2097143,
+ 4194301,
+ 8388593,
+ 16777213,
+ 33554393,
+ 67108859,
+ 134217689,
+ 268435399,
+ 536870909,
+ 1073741789,
+ 2147483647 /* For 1 << 31 */
+ };
+
+ if (unlikely (shift >= ARRAY_LENGTH (prime_mod)))
+ return prime_mod[ARRAY_LENGTH (prime_mod) - 1];
+
+ return prime_mod[shift];
+ }
+};
+
+/*
+ * hb_map_t
+ */
+
+struct hb_map_t : hb_hashmap_t<hb_codepoint_t,
+ hb_codepoint_t,
+ HB_MAP_VALUE_INVALID,
+ HB_MAP_VALUE_INVALID> {};
+
+
+#endif /* HB_MAP_HH */
diff --git a/thirdparty/harfbuzz/src/hb-meta.hh b/thirdparty/harfbuzz/src/hb-meta.hh
new file mode 100644
index 0000000000..4c0898b1b7
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-meta.hh
@@ -0,0 +1,410 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_META_HH
+#define HB_META_HH
+
+#include "hb.hh"
+
+
+/*
+ * C++ template meta-programming & fundamentals used with them.
+ */
+
+/* Void! For when we need a expression-type of void. */
+struct hb_empty_t {};
+
+/* https://en.cppreference.com/w/cpp/types/void_t */
+template<typename... Ts> struct _hb_void_t { typedef void type; };
+template<typename... Ts> using hb_void_t = typename _hb_void_t<Ts...>::type;
+
+template<typename Head, typename... Ts> struct _hb_head_t { typedef Head type; };
+template<typename... Ts> using hb_head_t = typename _hb_head_t<Ts...>::type;
+
+template <typename T, T v> struct hb_integral_constant { static constexpr T value = v; };
+template <bool b> using hb_bool_constant = hb_integral_constant<bool, b>;
+using hb_true_type = hb_bool_constant<true>;
+using hb_false_type = hb_bool_constant<false>;
+
+
+/* Basic type SFINAE. */
+
+template <bool B, typename T = void> struct hb_enable_if {};
+template <typename T> struct hb_enable_if<true, T> { typedef T type; };
+#define hb_enable_if(Cond) typename hb_enable_if<(Cond)>::type* = nullptr
+/* Concepts/Requires alias: */
+#define hb_requires(Cond) hb_enable_if((Cond))
+
+template <typename T, typename T2> struct hb_is_same : hb_false_type {};
+template <typename T> struct hb_is_same<T, T> : hb_true_type {};
+#define hb_is_same(T, T2) hb_is_same<T, T2>::value
+
+/* Function overloading SFINAE and priority. */
+
+#define HB_RETURN(Ret, E) -> hb_head_t<Ret, decltype ((E))> { return (E); }
+#define HB_AUTO_RETURN(E) -> decltype ((E)) { return (E); }
+#define HB_VOID_RETURN(E) -> hb_void_t<decltype ((E))> { (E); }
+
+template <unsigned Pri> struct hb_priority : hb_priority<Pri - 1> {};
+template <> struct hb_priority<0> {};
+#define hb_prioritize hb_priority<16> ()
+
+#define HB_FUNCOBJ(x) static_const x HB_UNUSED
+
+
+template <typename T> struct hb_type_identity_t { typedef T type; };
+template <typename T> using hb_type_identity = typename hb_type_identity_t<T>::type;
+
+struct
+{
+ template <typename T> constexpr T*
+ operator () (T& arg) const
+ {
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-align"
+ /* https://en.cppreference.com/w/cpp/memory/addressof */
+ return reinterpret_cast<T*> (
+ &const_cast<char&> (
+ reinterpret_cast<const volatile char&> (arg)));
+#pragma GCC diagnostic pop
+ }
+}
+HB_FUNCOBJ (hb_addressof);
+
+template <typename T> static inline T hb_declval ();
+#define hb_declval(T) (hb_declval<T> ())
+
+template <typename T> struct hb_match_const : hb_type_identity_t<T>, hb_bool_constant<false>{};
+template <typename T> struct hb_match_const<const T> : hb_type_identity_t<T>, hb_bool_constant<true> {};
+template <typename T> using hb_remove_const = typename hb_match_const<T>::type;
+template <typename T> using hb_add_const = const T;
+#define hb_is_const(T) hb_match_const<T>::value
+template <typename T> struct hb_match_reference : hb_type_identity_t<T>, hb_bool_constant<false>{};
+template <typename T> struct hb_match_reference<T &> : hb_type_identity_t<T>, hb_bool_constant<true> {};
+template <typename T> struct hb_match_reference<T &&> : hb_type_identity_t<T>, hb_bool_constant<true> {};
+template <typename T> using hb_remove_reference = typename hb_match_reference<T>::type;
+template <typename T> auto _hb_try_add_lvalue_reference (hb_priority<1>) -> hb_type_identity<T&>;
+template <typename T> auto _hb_try_add_lvalue_reference (hb_priority<0>) -> hb_type_identity<T>;
+template <typename T> using hb_add_lvalue_reference = decltype (_hb_try_add_lvalue_reference<T> (hb_prioritize));
+template <typename T> auto _hb_try_add_rvalue_reference (hb_priority<1>) -> hb_type_identity<T&&>;
+template <typename T> auto _hb_try_add_rvalue_reference (hb_priority<0>) -> hb_type_identity<T>;
+template <typename T> using hb_add_rvalue_reference = decltype (_hb_try_add_rvalue_reference<T> (hb_prioritize));
+#define hb_is_reference(T) hb_match_reference<T>::value
+template <typename T> struct hb_match_pointer : hb_type_identity_t<T>, hb_bool_constant<false>{};
+template <typename T> struct hb_match_pointer<T *> : hb_type_identity_t<T>, hb_bool_constant<true> {};
+template <typename T> using hb_remove_pointer = typename hb_match_pointer<T>::type;
+template <typename T> auto _hb_try_add_pointer (hb_priority<1>) -> hb_type_identity<hb_remove_reference<T>*>;
+template <typename T> auto _hb_try_add_pointer (hb_priority<1>) -> hb_type_identity<T>;
+template <typename T> using hb_add_pointer = decltype (_hb_try_add_pointer<T> (hb_prioritize));
+#define hb_is_pointer(T) hb_match_pointer<T>::value
+
+
+/* TODO Add feature-parity to std::decay. */
+template <typename T> using hb_decay = hb_remove_const<hb_remove_reference<T>>;
+
+
+template<bool B, class T, class F>
+struct _hb_conditional { typedef T type; };
+template<class T, class F>
+struct _hb_conditional<false, T, F> { typedef F type; };
+template<bool B, class T, class F>
+using hb_conditional = typename _hb_conditional<B, T, F>::type;
+
+
+template <typename From, typename To>
+struct hb_is_convertible
+{
+ private:
+ static constexpr bool from_void = hb_is_same (void, hb_decay<From>);
+ static constexpr bool to_void = hb_is_same (void, hb_decay<To> );
+ static constexpr bool either_void = from_void || to_void;
+ static constexpr bool both_void = from_void && to_void;
+
+ static hb_true_type impl2 (hb_conditional<to_void, int, To>);
+
+ template <typename T>
+ static auto impl (hb_priority<1>) -> decltype (impl2 (hb_declval (T)));
+ template <typename T>
+ static hb_false_type impl (hb_priority<0>);
+ public:
+ static constexpr bool value = both_void ||
+ (!either_void &&
+ decltype (impl<hb_conditional<from_void, int, From>> (hb_prioritize))::value);
+};
+#define hb_is_convertible(From,To) hb_is_convertible<From, To>::value
+
+template <typename Base, typename Derived>
+using hb_is_base_of = hb_is_convertible<hb_decay<Derived> *, hb_decay<Base> *>;
+#define hb_is_base_of(Base,Derived) hb_is_base_of<Base, Derived>::value
+
+template <typename From, typename To>
+using hb_is_cr_convertible = hb_bool_constant<
+ hb_is_same (hb_decay<From>, hb_decay<To>) &&
+ (!hb_is_const (From) || hb_is_const (To)) &&
+ (!hb_is_reference (To) || hb_is_const (To) || hb_is_reference (To))
+>;
+#define hb_is_cr_convertible(From,To) hb_is_cr_convertible<From, To>::value
+
+/* std::move and std::forward */
+
+template <typename T>
+static constexpr hb_remove_reference<T>&& hb_move (T&& t) { return (hb_remove_reference<T>&&) (t); }
+
+template <typename T>
+static constexpr T&& hb_forward (hb_remove_reference<T>& t) { return (T&&) t; }
+template <typename T>
+static constexpr T&& hb_forward (hb_remove_reference<T>&& t) { return (T&&) t; }
+
+struct
+{
+ template <typename T> constexpr auto
+ operator () (T&& v) const HB_AUTO_RETURN (hb_forward<T> (v))
+
+ template <typename T> constexpr auto
+ operator () (T *v) const HB_AUTO_RETURN (*v)
+}
+HB_FUNCOBJ (hb_deref);
+
+struct
+{
+ template <typename T> constexpr auto
+ operator () (T&& v) const HB_AUTO_RETURN (hb_forward<T> (v))
+
+ template <typename T> constexpr auto
+ operator () (T& v) const HB_AUTO_RETURN (hb_addressof (v))
+}
+HB_FUNCOBJ (hb_ref);
+
+template <typename T>
+struct hb_reference_wrapper
+{
+ hb_reference_wrapper (T v) : v (v) {}
+ bool operator == (const hb_reference_wrapper& o) const { return v == o.v; }
+ bool operator != (const hb_reference_wrapper& o) const { return v != o.v; }
+ operator T () const { return v; }
+ T get () const { return v; }
+ T v;
+};
+template <typename T>
+struct hb_reference_wrapper<T&>
+{
+ hb_reference_wrapper (T& v) : v (hb_addressof (v)) {}
+ bool operator == (const hb_reference_wrapper& o) const { return v == o.v; }
+ bool operator != (const hb_reference_wrapper& o) const { return v != o.v; }
+ operator T& () const { return *v; }
+ T& get () const { return *v; }
+ T* v;
+};
+
+
+template <typename T>
+using hb_is_integral = hb_bool_constant<
+ hb_is_same (hb_decay<T>, char) ||
+ hb_is_same (hb_decay<T>, signed char) ||
+ hb_is_same (hb_decay<T>, unsigned char) ||
+ hb_is_same (hb_decay<T>, signed int) ||
+ hb_is_same (hb_decay<T>, unsigned int) ||
+ hb_is_same (hb_decay<T>, signed short) ||
+ hb_is_same (hb_decay<T>, unsigned short) ||
+ hb_is_same (hb_decay<T>, signed long) ||
+ hb_is_same (hb_decay<T>, unsigned long) ||
+ hb_is_same (hb_decay<T>, signed long long) ||
+ hb_is_same (hb_decay<T>, unsigned long long) ||
+ false
+>;
+#define hb_is_integral(T) hb_is_integral<T>::value
+template <typename T>
+using hb_is_floating_point = hb_bool_constant<
+ hb_is_same (hb_decay<T>, float) ||
+ hb_is_same (hb_decay<T>, double) ||
+ hb_is_same (hb_decay<T>, long double) ||
+ false
+>;
+#define hb_is_floating_point(T) hb_is_floating_point<T>::value
+template <typename T>
+using hb_is_arithmetic = hb_bool_constant<
+ hb_is_integral (T) ||
+ hb_is_floating_point (T) ||
+ false
+>;
+#define hb_is_arithmetic(T) hb_is_arithmetic<T>::value
+
+
+template <typename T>
+using hb_is_signed = hb_conditional<hb_is_arithmetic (T),
+ hb_bool_constant<(T) -1 < (T) 0>,
+ hb_false_type>;
+#define hb_is_signed(T) hb_is_signed<T>::value
+template <typename T>
+using hb_is_unsigned = hb_conditional<hb_is_arithmetic (T),
+ hb_bool_constant<(T) 0 < (T) -1>,
+ hb_false_type>;
+#define hb_is_unsigned(T) hb_is_unsigned<T>::value
+
+template <typename T> struct hb_int_min;
+template <> struct hb_int_min<char> : hb_integral_constant<char, CHAR_MIN> {};
+template <> struct hb_int_min<signed char> : hb_integral_constant<signed char, SCHAR_MIN> {};
+template <> struct hb_int_min<unsigned char> : hb_integral_constant<unsigned char, 0> {};
+template <> struct hb_int_min<signed short> : hb_integral_constant<signed short, SHRT_MIN> {};
+template <> struct hb_int_min<unsigned short> : hb_integral_constant<unsigned short, 0> {};
+template <> struct hb_int_min<signed int> : hb_integral_constant<signed int, INT_MIN> {};
+template <> struct hb_int_min<unsigned int> : hb_integral_constant<unsigned int, 0> {};
+template <> struct hb_int_min<signed long> : hb_integral_constant<signed long, LONG_MIN> {};
+template <> struct hb_int_min<unsigned long> : hb_integral_constant<unsigned long, 0> {};
+template <> struct hb_int_min<signed long long> : hb_integral_constant<signed long long, LLONG_MIN> {};
+template <> struct hb_int_min<unsigned long long> : hb_integral_constant<unsigned long long, 0> {};
+#define hb_int_min(T) hb_int_min<T>::value
+template <typename T> struct hb_int_max;
+template <> struct hb_int_max<char> : hb_integral_constant<char, CHAR_MAX> {};
+template <> struct hb_int_max<signed char> : hb_integral_constant<signed char, SCHAR_MAX> {};
+template <> struct hb_int_max<unsigned char> : hb_integral_constant<unsigned char, UCHAR_MAX> {};
+template <> struct hb_int_max<signed short> : hb_integral_constant<signed short, SHRT_MAX> {};
+template <> struct hb_int_max<unsigned short> : hb_integral_constant<unsigned short, USHRT_MAX> {};
+template <> struct hb_int_max<signed int> : hb_integral_constant<signed int, INT_MAX> {};
+template <> struct hb_int_max<unsigned int> : hb_integral_constant<unsigned int, UINT_MAX> {};
+template <> struct hb_int_max<signed long> : hb_integral_constant<signed long, LONG_MAX> {};
+template <> struct hb_int_max<unsigned long> : hb_integral_constant<unsigned long, ULONG_MAX> {};
+template <> struct hb_int_max<signed long long> : hb_integral_constant<signed long long, LLONG_MAX> {};
+template <> struct hb_int_max<unsigned long long> : hb_integral_constant<unsigned long long, ULLONG_MAX> {};
+#define hb_int_max(T) hb_int_max<T>::value
+
+
+
+template <typename T, typename>
+struct _hb_is_destructible : hb_false_type {};
+template <typename T>
+struct _hb_is_destructible<T, hb_void_t<decltype (hb_declval (T).~T ())>> : hb_true_type {};
+template <typename T>
+using hb_is_destructible = _hb_is_destructible<T, void>;
+#define hb_is_destructible(T) hb_is_destructible<T>::value
+
+template <typename T, typename, typename ...Ts>
+struct _hb_is_constructible : hb_false_type {};
+template <typename T, typename ...Ts>
+struct _hb_is_constructible<T, hb_void_t<decltype (T (hb_declval (Ts)...))>, Ts...> : hb_true_type {};
+template <typename T, typename ...Ts>
+using hb_is_constructible = _hb_is_constructible<T, void, Ts...>;
+#define hb_is_constructible(...) hb_is_constructible<__VA_ARGS__>::value
+
+template <typename T>
+using hb_is_default_constructible = hb_is_constructible<T>;
+#define hb_is_default_constructible(T) hb_is_default_constructible<T>::value
+
+template <typename T>
+using hb_is_copy_constructible = hb_is_constructible<T, hb_add_lvalue_reference<hb_add_const<T>>>;
+#define hb_is_copy_constructible(T) hb_is_copy_constructible<T>::value
+
+template <typename T>
+using hb_is_move_constructible = hb_is_constructible<T, hb_add_rvalue_reference<hb_add_const<T>>>;
+#define hb_is_move_constructible(T) hb_is_move_constructible<T>::value
+
+template <typename T, typename U, typename>
+struct _hb_is_assignable : hb_false_type {};
+template <typename T, typename U>
+struct _hb_is_assignable<T, U, hb_void_t<decltype (hb_declval (T) = hb_declval (U))>> : hb_true_type {};
+template <typename T, typename U>
+using hb_is_assignable = _hb_is_assignable<T, U, void>;
+#define hb_is_assignable(T,U) hb_is_assignable<T, U>::value
+
+template <typename T>
+using hb_is_copy_assignable = hb_is_assignable<hb_add_lvalue_reference<T>,
+ hb_add_lvalue_reference<hb_add_const<T>>>;
+#define hb_is_copy_assignable(T) hb_is_copy_assignable<T>::value
+
+template <typename T>
+using hb_is_move_assignable = hb_is_assignable<hb_add_lvalue_reference<T>,
+ hb_add_rvalue_reference<T>>;
+#define hb_is_move_assignable(T) hb_is_move_assignable<T>::value
+
+/* Trivial versions. */
+
+template <typename T> union hb_trivial { T value; };
+
+template <typename T>
+using hb_is_trivially_destructible= hb_is_destructible<hb_trivial<T>>;
+#define hb_is_trivially_destructible(T) hb_is_trivially_destructible<T>::value
+
+/* Don't know how to do the following. */
+//template <typename T, typename ...Ts>
+//using hb_is_trivially_constructible= hb_is_constructible<hb_trivial<T>, hb_trivial<Ts>...>;
+//#define hb_is_trivially_constructible(...) hb_is_trivially_constructible<__VA_ARGS__>::value
+
+template <typename T>
+using hb_is_trivially_default_constructible= hb_is_default_constructible<hb_trivial<T>>;
+#define hb_is_trivially_default_constructible(T) hb_is_trivially_default_constructible<T>::value
+
+template <typename T>
+using hb_is_trivially_copy_constructible= hb_is_copy_constructible<hb_trivial<T>>;
+#define hb_is_trivially_copy_constructible(T) hb_is_trivially_copy_constructible<T>::value
+
+template <typename T>
+using hb_is_trivially_move_constructible= hb_is_move_constructible<hb_trivial<T>>;
+#define hb_is_trivially_move_constructible(T) hb_is_trivially_move_constructible<T>::value
+
+/* Don't know how to do the following. */
+//template <typename T, typename U>
+//using hb_is_trivially_assignable= hb_is_assignable<hb_trivial<T>, hb_trivial<U>>;
+//#define hb_is_trivially_assignable(T,U) hb_is_trivially_assignable<T, U>::value
+
+template <typename T>
+using hb_is_trivially_copy_assignable= hb_is_copy_assignable<hb_trivial<T>>;
+#define hb_is_trivially_copy_assignable(T) hb_is_trivially_copy_assignable<T>::value
+
+template <typename T>
+using hb_is_trivially_move_assignable= hb_is_move_assignable<hb_trivial<T>>;
+#define hb_is_trivially_move_assignable(T) hb_is_trivially_move_assignable<T>::value
+
+template <typename T>
+using hb_is_trivially_copyable= hb_bool_constant<
+ hb_is_trivially_destructible (T) &&
+ (!hb_is_move_assignable (T) || hb_is_trivially_move_assignable (T)) &&
+ (!hb_is_move_constructible (T) || hb_is_trivially_move_constructible (T)) &&
+ (!hb_is_copy_assignable (T) || hb_is_trivially_copy_assignable (T)) &&
+ (!hb_is_copy_constructible (T) || hb_is_trivially_copy_constructible (T)) &&
+ true
+>;
+#define hb_is_trivially_copyable(T) hb_is_trivially_copyable<T>::value
+
+template <typename T>
+using hb_is_trivial= hb_bool_constant<
+ hb_is_trivially_copyable (T) &&
+ hb_is_trivially_default_constructible (T)
+>;
+#define hb_is_trivial(T) hb_is_trivial<T>::value
+
+/* hb_unwrap_type (T)
+ * If T has no T::type, returns T. Otherwise calls itself on T::type recursively.
+ */
+
+template <typename T, typename>
+struct _hb_unwrap_type : hb_type_identity_t<T> {};
+template <typename T>
+struct _hb_unwrap_type<T, hb_void_t<typename T::type>> : _hb_unwrap_type<typename T::type, void> {};
+template <typename T>
+using hb_unwrap_type = _hb_unwrap_type<T, void>;
+#define hb_unwrap_type(T) typename hb_unwrap_type<T>::type
+
+#endif /* HB_META_HH */
diff --git a/thirdparty/harfbuzz/src/hb-mutex.hh b/thirdparty/harfbuzz/src/hb-mutex.hh
new file mode 100644
index 0000000000..56392d049b
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-mutex.hh
@@ -0,0 +1,133 @@
+/*
+ * Copyright © 2007 Chris Wilson
+ * Copyright © 2009,2010 Red Hat, Inc.
+ * Copyright © 2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Contributor(s):
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_MUTEX_HH
+#define HB_MUTEX_HH
+
+#include "hb.hh"
+
+
+/* mutex */
+
+/* We need external help for these */
+
+#if defined(HB_MUTEX_IMPL_INIT) \
+ && defined(hb_mutex_impl_init) \
+ && defined(hb_mutex_impl_lock) \
+ && defined(hb_mutex_impl_unlock) \
+ && defined(hb_mutex_impl_finish)
+
+/* Defined externally, i.e. in config.h; must have typedef'ed hb_mutex_impl_t as well. */
+
+
+#elif !defined(HB_NO_MT) && (defined(HAVE_PTHREAD) || defined(__APPLE__))
+
+#include <pthread.h>
+typedef pthread_mutex_t hb_mutex_impl_t;
+#define HB_MUTEX_IMPL_INIT PTHREAD_MUTEX_INITIALIZER
+#define hb_mutex_impl_init(M) pthread_mutex_init (M, nullptr)
+#define hb_mutex_impl_lock(M) pthread_mutex_lock (M)
+#define hb_mutex_impl_unlock(M) pthread_mutex_unlock (M)
+#define hb_mutex_impl_finish(M) pthread_mutex_destroy (M)
+
+
+#elif !defined(HB_NO_MT) && defined(_WIN32)
+
+typedef CRITICAL_SECTION hb_mutex_impl_t;
+#define HB_MUTEX_IMPL_INIT {0}
+#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+#define hb_mutex_impl_init(M) InitializeCriticalSectionEx (M, 0, 0)
+#else
+#define hb_mutex_impl_init(M) InitializeCriticalSection (M)
+#endif
+#define hb_mutex_impl_lock(M) EnterCriticalSection (M)
+#define hb_mutex_impl_unlock(M) LeaveCriticalSection (M)
+#define hb_mutex_impl_finish(M) DeleteCriticalSection (M)
+
+
+#elif !defined(HB_NO_MT) && defined(HAVE_INTEL_ATOMIC_PRIMITIVES)
+
+#if defined(HAVE_SCHED_H) && defined(HAVE_SCHED_YIELD)
+# include <sched.h>
+# define HB_SCHED_YIELD() sched_yield ()
+#else
+# define HB_SCHED_YIELD() HB_STMT_START {} HB_STMT_END
+#endif
+
+/* This actually is not a totally awful implementation. */
+typedef volatile int hb_mutex_impl_t;
+#define HB_MUTEX_IMPL_INIT 0
+#define hb_mutex_impl_init(M) *(M) = 0
+#define hb_mutex_impl_lock(M) HB_STMT_START { while (__sync_lock_test_and_set((M), 1)) HB_SCHED_YIELD (); } HB_STMT_END
+#define hb_mutex_impl_unlock(M) __sync_lock_release (M)
+#define hb_mutex_impl_finish(M) HB_STMT_START {} HB_STMT_END
+
+
+#elif defined(HB_NO_MT)
+
+typedef int hb_mutex_impl_t;
+#define HB_MUTEX_IMPL_INIT 0
+#define hb_mutex_impl_init(M) HB_STMT_START {} HB_STMT_END
+#define hb_mutex_impl_lock(M) HB_STMT_START {} HB_STMT_END
+#define hb_mutex_impl_unlock(M) HB_STMT_START {} HB_STMT_END
+#define hb_mutex_impl_finish(M) HB_STMT_START {} HB_STMT_END
+
+
+#else
+
+#error "Could not find any system to define mutex macros."
+#error "Check hb-mutex.hh for possible resolutions."
+
+#endif
+
+
+#define HB_MUTEX_INIT {HB_MUTEX_IMPL_INIT}
+
+struct hb_mutex_t
+{
+ hb_mutex_impl_t m;
+
+ void init () { hb_mutex_impl_init (&m); }
+ void lock () { hb_mutex_impl_lock (&m); }
+ void unlock () { hb_mutex_impl_unlock (&m); }
+ void fini () { hb_mutex_impl_finish (&m); }
+};
+
+struct hb_lock_t
+{
+ hb_lock_t (hb_mutex_t &mutex_) : mutex (mutex_) { mutex.lock (); }
+ ~hb_lock_t () { mutex.unlock (); }
+ private:
+ hb_mutex_t &mutex;
+};
+
+
+#endif /* HB_MUTEX_HH */
diff --git a/thirdparty/harfbuzz/src/hb-null.hh b/thirdparty/harfbuzz/src/hb-null.hh
new file mode 100644
index 0000000000..9853939b07
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-null.hh
@@ -0,0 +1,184 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_NULL_HH
+#define HB_NULL_HH
+
+#include "hb.hh"
+#include "hb-meta.hh"
+
+
+/*
+ * Static pools
+ */
+
+/* Global nul-content Null pool. Enlarge as necessary. */
+
+#define HB_NULL_POOL_SIZE 384
+
+/* Use SFINAE to sniff whether T has min_size; in which case return T::null_size,
+ * otherwise return sizeof(T). */
+
+/* The hard way...
+ * https://stackoverflow.com/questions/7776448/sfinae-tried-with-bool-gives-compiler-error-template-argument-tvalue-invol
+ */
+
+template <typename T, typename>
+struct _hb_null_size : hb_integral_constant<unsigned, sizeof (T)> {};
+template <typename T>
+struct _hb_null_size<T, hb_void_t<decltype (T::min_size)>> : hb_integral_constant<unsigned, T::null_size> {};
+
+template <typename T>
+using hb_null_size = _hb_null_size<T, void>;
+#define hb_null_size(T) hb_null_size<T>::value
+
+/* These doesn't belong here, but since is copy/paste from above, put it here. */
+
+/* hb_static_size (T)
+ * Returns T::static_size if T::min_size is defined, or sizeof (T) otherwise. */
+
+template <typename T, typename>
+struct _hb_static_size : hb_integral_constant<unsigned, sizeof (T)> {};
+template <typename T>
+struct _hb_static_size<T, hb_void_t<decltype (T::min_size)>> : hb_integral_constant<unsigned, T::static_size> {};
+template <typename T>
+using hb_static_size = _hb_static_size<T, void>;
+#define hb_static_size(T) hb_static_size<T>::value
+
+
+/*
+ * Null()
+ */
+
+extern HB_INTERNAL
+uint64_t const _hb_NullPool[(HB_NULL_POOL_SIZE + sizeof (uint64_t) - 1) / sizeof (uint64_t)];
+
+/* Generic nul-content Null objects. */
+template <typename Type>
+struct Null {
+ static Type const & get_null ()
+ {
+ static_assert (hb_null_size (Type) <= HB_NULL_POOL_SIZE, "Increase HB_NULL_POOL_SIZE.");
+ return *reinterpret_cast<Type const *> (_hb_NullPool);
+ }
+};
+template <typename QType>
+struct NullHelper
+{
+ typedef hb_remove_const<hb_remove_reference<QType>> Type;
+ static const Type & get_null () { return Null<Type>::get_null (); }
+};
+#define Null(Type) NullHelper<Type>::get_null ()
+
+/* Specializations for arbitrary-content Null objects expressed in bytes. */
+#define DECLARE_NULL_NAMESPACE_BYTES(Namespace, Type) \
+ } /* Close namespace. */ \
+ extern HB_INTERNAL const unsigned char _hb_Null_##Namespace##_##Type[Namespace::Type::null_size]; \
+ template <> \
+ struct Null<Namespace::Type> { \
+ static Namespace::Type const & get_null () { \
+ return *reinterpret_cast<const Namespace::Type *> (_hb_Null_##Namespace##_##Type); \
+ } \
+ }; \
+ namespace Namespace { \
+ static_assert (true, "") /* Require semicolon after. */
+#define DEFINE_NULL_NAMESPACE_BYTES(Namespace, Type) \
+ const unsigned char _hb_Null_##Namespace##_##Type[Namespace::Type::null_size]
+
+/* Specializations for arbitrary-content Null objects expressed as struct initializer. */
+#define DECLARE_NULL_INSTANCE(Type) \
+ extern HB_INTERNAL const Type _hb_Null_##Type; \
+ template <> \
+ struct Null<Type> { \
+ static Type const & get_null () { \
+ return _hb_Null_##Type; \
+ } \
+ }; \
+ static_assert (true, "") /* Require semicolon after. */
+#define DEFINE_NULL_INSTANCE(Type) \
+ const Type _hb_Null_##Type
+
+/* Global writable pool. Enlarge as necessary. */
+
+/* To be fully correct, CrapPool must be thread_local. However, we do not rely on CrapPool
+ * for correct operation. It only exist to catch and divert program logic bugs instead of
+ * causing bad memory access. So, races there are not actually introducing incorrectness
+ * in the code. Has ~12kb binary size overhead to have it, also clang build fails with it. */
+extern HB_INTERNAL
+/*thread_local*/ uint64_t _hb_CrapPool[(HB_NULL_POOL_SIZE + sizeof (uint64_t) - 1) / sizeof (uint64_t)];
+
+/* CRAP pool: Common Region for Access Protection. */
+template <typename Type>
+static inline Type& Crap () {
+ static_assert (hb_null_size (Type) <= HB_NULL_POOL_SIZE, "Increase HB_NULL_POOL_SIZE.");
+ Type *obj = reinterpret_cast<Type *> (_hb_CrapPool);
+ memcpy (obj, &Null (Type), sizeof (*obj));
+ return *obj;
+}
+template <typename QType>
+struct CrapHelper
+{
+ typedef hb_remove_const<hb_remove_reference<QType>> Type;
+ static Type & get_crap () { return Crap<Type> (); }
+};
+#define Crap(Type) CrapHelper<Type>::get_crap ()
+
+template <typename Type>
+struct CrapOrNullHelper {
+ static Type & get () { return Crap (Type); }
+};
+template <typename Type>
+struct CrapOrNullHelper<const Type> {
+ static const Type & get () { return Null (Type); }
+};
+#define CrapOrNull(Type) CrapOrNullHelper<Type>::get ()
+
+
+/*
+ * hb_nonnull_ptr_t
+ */
+
+template <typename P>
+struct hb_nonnull_ptr_t
+{
+ typedef hb_remove_pointer<P> T;
+
+ hb_nonnull_ptr_t (T *v_ = nullptr) : v (v_) {}
+ T * operator = (T *v_) { return v = v_; }
+ T * operator -> () const { return get (); }
+ T & operator * () const { return *get (); }
+ T ** operator & () const { return &v; }
+ /* Only auto-cast to const types. */
+ template <typename C> operator const C * () const { return get (); }
+ operator const char * () const { return (const char *) get (); }
+ T * get () const { return v ? v : const_cast<T *> (&Null (T)); }
+ T * get_raw () const { return v; }
+
+ T *v;
+};
+
+
+#endif /* HB_NULL_HH */
diff --git a/thirdparty/harfbuzz/src/hb-number-parser.hh b/thirdparty/harfbuzz/src/hb-number-parser.hh
new file mode 100644
index 0000000000..1a9dbba6dd
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-number-parser.hh
@@ -0,0 +1,237 @@
+
+#line 1 "hb-number-parser.rl"
+/*
+ * Copyright © 2019 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ */
+
+#ifndef HB_NUMBER_PARSER_HH
+#define HB_NUMBER_PARSER_HH
+
+#include "hb.hh"
+
+
+#line 35 "hb-number-parser.hh"
+static const unsigned char _double_parser_trans_keys[] = {
+ 0u, 0u, 43u, 57u, 46u, 57u, 48u, 57u, 43u, 57u, 48u, 57u, 48u, 101u, 48u, 57u,
+ 46u, 101u, 0
+};
+
+static const char _double_parser_key_spans[] = {
+ 0, 15, 12, 10, 15, 10, 54, 10,
+ 56
+};
+
+static const unsigned char _double_parser_index_offsets[] = {
+ 0, 0, 16, 29, 40, 56, 67, 122,
+ 133
+};
+
+static const char _double_parser_indicies[] = {
+ 0, 1, 2, 3, 1, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 1, 3, 1, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 1, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5,
+ 1, 6, 1, 7, 1, 1, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 1, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 1, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 9, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 9, 1, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 1, 3, 1,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 9, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 9, 1, 0
+};
+
+static const char _double_parser_trans_targs[] = {
+ 2, 0, 2, 3, 8, 6, 5, 5,
+ 7, 4
+};
+
+static const char _double_parser_trans_actions[] = {
+ 0, 0, 1, 0, 2, 3, 0, 4,
+ 5, 0
+};
+
+static const int double_parser_start = 1;
+static const int double_parser_first_final = 6;
+static const int double_parser_error = 0;
+
+static const int double_parser_en_main = 1;
+
+
+#line 68 "hb-number-parser.rl"
+
+
+/* Works only for n < 512 */
+static inline double
+_pow10 (unsigned exponent)
+{
+ static const double _powers_of_10[] =
+ {
+ 1.0e+256,
+ 1.0e+128,
+ 1.0e+64,
+ 1.0e+32,
+ 1.0e+16,
+ 1.0e+8,
+ 10000.,
+ 100.,
+ 10.
+ };
+ unsigned mask = 1 << (ARRAY_LENGTH (_powers_of_10) - 1);
+ double result = 1;
+ for (const double *power = _powers_of_10; mask; ++power, mask >>= 1)
+ if (exponent & mask) result *= *power;
+ return result;
+}
+
+/* a variant of strtod that also gets end of buffer in its second argument */
+static inline double
+strtod_rl (const char *p, const char **end_ptr /* IN/OUT */)
+{
+ double value = 0;
+ double frac = 0;
+ double frac_count = 0;
+ unsigned exp = 0;
+ bool neg = false, exp_neg = false, exp_overflow = false;
+ const unsigned long long MAX_FRACT = 0xFFFFFFFFFFFFFull; /* 2^52-1 */
+ const unsigned MAX_EXP = 0x7FFu; /* 2^11-1 */
+
+ const char *pe = *end_ptr;
+ while (p < pe && ISSPACE (*p))
+ p++;
+
+ int cs;
+
+#line 139 "hb-number-parser.hh"
+ {
+ cs = double_parser_start;
+ }
+
+#line 144 "hb-number-parser.hh"
+ {
+ int _slen;
+ int _trans;
+ const unsigned char *_keys;
+ const char *_inds;
+ if ( p == pe )
+ goto _test_eof;
+ if ( cs == 0 )
+ goto _out;
+_resume:
+ _keys = _double_parser_trans_keys + (cs<<1);
+ _inds = _double_parser_indicies + _double_parser_index_offsets[cs];
+
+ _slen = _double_parser_key_spans[cs];
+ _trans = _inds[ _slen > 0 && _keys[0] <=(*p) &&
+ (*p) <= _keys[1] ?
+ (*p) - _keys[0] : _slen ];
+
+ cs = _double_parser_trans_targs[_trans];
+
+ if ( _double_parser_trans_actions[_trans] == 0 )
+ goto _again;
+
+ switch ( _double_parser_trans_actions[_trans] ) {
+ case 1:
+#line 37 "hb-number-parser.rl"
+ { neg = true; }
+ break;
+ case 4:
+#line 38 "hb-number-parser.rl"
+ { exp_neg = true; }
+ break;
+ case 2:
+#line 40 "hb-number-parser.rl"
+ {
+ value = value * 10. + ((*p) - '0');
+}
+ break;
+ case 3:
+#line 43 "hb-number-parser.rl"
+ {
+ if (likely (frac <= MAX_FRACT / 10))
+ {
+ frac = frac * 10. + ((*p) - '0');
+ ++frac_count;
+ }
+}
+ break;
+ case 5:
+#line 50 "hb-number-parser.rl"
+ {
+ if (likely (exp * 10 + ((*p) - '0') <= MAX_EXP))
+ exp = exp * 10 + ((*p) - '0');
+ else
+ exp_overflow = true;
+}
+ break;
+#line 202 "hb-number-parser.hh"
+ }
+
+_again:
+ if ( cs == 0 )
+ goto _out;
+ if ( ++p != pe )
+ goto _resume;
+ _test_eof: {}
+ _out: {}
+ }
+
+#line 113 "hb-number-parser.rl"
+
+
+ *end_ptr = p;
+
+ if (frac_count) value += frac / _pow10 (frac_count);
+ if (neg) value *= -1.;
+
+ if (unlikely (exp_overflow))
+ {
+ if (value == 0) return value;
+ if (exp_neg) return neg ? -DBL_MIN : DBL_MIN;
+ else return neg ? -DBL_MAX : DBL_MAX;
+ }
+
+ if (exp)
+ {
+ if (exp_neg) value /= _pow10 (exp);
+ else value *= _pow10 (exp);
+ }
+
+ return value;
+}
+
+#endif /* HB_NUMBER_PARSER_HH */
diff --git a/thirdparty/harfbuzz/src/hb-number.cc b/thirdparty/harfbuzz/src/hb-number.cc
new file mode 100644
index 0000000000..6e4f3f7ebd
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-number.cc
@@ -0,0 +1,80 @@
+/*
+ * Copyright © 2019 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ */
+
+#include "hb.hh"
+#include "hb-machinery.hh"
+#include "hb-number.hh"
+#include "hb-number-parser.hh"
+
+template<typename T, typename Func>
+static bool
+_parse_number (const char **pp, const char *end, T *pv,
+ bool whole_buffer, Func f)
+{
+ char buf[32];
+ unsigned len = hb_min (ARRAY_LENGTH (buf) - 1, (unsigned) (end - *pp));
+ strncpy (buf, *pp, len);
+ buf[len] = '\0';
+
+ char *p = buf;
+ char *pend = p;
+
+ errno = 0;
+ *pv = f (p, &pend);
+ if (unlikely (errno || p == pend ||
+ /* Check if consumed whole buffer if is requested */
+ (whole_buffer && pend - p != end - *pp)))
+ return false;
+
+ *pp += pend - p;
+ return true;
+}
+
+bool
+hb_parse_int (const char **pp, const char *end, int *pv, bool whole_buffer)
+{
+ return _parse_number<int> (pp, end, pv, whole_buffer,
+ [] (const char *p, char **end)
+ { return strtol (p, end, 10); });
+}
+
+bool
+hb_parse_uint (const char **pp, const char *end, unsigned *pv,
+ bool whole_buffer, int base)
+{
+ return _parse_number<unsigned> (pp, end, pv, whole_buffer,
+ [base] (const char *p, char **end)
+ { return strtoul (p, end, base); });
+}
+
+bool
+hb_parse_double (const char **pp, const char *end, double *pv, bool whole_buffer)
+{
+ const char *pend = end;
+ *pv = strtod_rl (*pp, &pend);
+ if (unlikely (*pp == pend)) return false;
+ *pp = pend;
+ return !whole_buffer || end == pend;
+}
diff --git a/thirdparty/harfbuzz/src/hb-number.hh b/thirdparty/harfbuzz/src/hb-number.hh
new file mode 100644
index 0000000000..14d1260aa3
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-number.hh
@@ -0,0 +1,41 @@
+/*
+ * Copyright © 2019 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ */
+
+#ifndef HB_NUMBER_HH
+#define HB_NUMBER_HH
+
+HB_INTERNAL bool
+hb_parse_int (const char **pp, const char *end, int *pv,
+ bool whole_buffer = false);
+
+HB_INTERNAL bool
+hb_parse_uint (const char **pp, const char *end, unsigned int *pv,
+ bool whole_buffer = false, int base = 10);
+
+HB_INTERNAL bool
+hb_parse_double (const char **pp, const char *end, double *pv,
+ bool whole_buffer = false);
+
+#endif /* HB_NUMBER_HH */
diff --git a/thirdparty/harfbuzz/src/hb-object.hh b/thirdparty/harfbuzz/src/hb-object.hh
new file mode 100644
index 0000000000..39845a70e7
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-object.hh
@@ -0,0 +1,342 @@
+/*
+ * Copyright © 2007 Chris Wilson
+ * Copyright © 2009,2010 Red Hat, Inc.
+ * Copyright © 2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Contributor(s):
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OBJECT_HH
+#define HB_OBJECT_HH
+
+#include "hb.hh"
+#include "hb-atomic.hh"
+#include "hb-mutex.hh"
+#include "hb-vector.hh"
+
+
+/*
+ * Lockable set
+ */
+
+template <typename item_t, typename lock_t>
+struct hb_lockable_set_t
+{
+ hb_vector_t<item_t> items;
+
+ void init () { items.init (); }
+
+ template <typename T>
+ item_t *replace_or_insert (T v, lock_t &l, bool replace)
+ {
+ l.lock ();
+ item_t *item = items.find (v);
+ if (item) {
+ if (replace) {
+ item_t old = *item;
+ *item = v;
+ l.unlock ();
+ old.fini ();
+ }
+ else {
+ item = nullptr;
+ l.unlock ();
+ }
+ } else {
+ item = items.push (v);
+ l.unlock ();
+ }
+ return item;
+ }
+
+ template <typename T>
+ void remove (T v, lock_t &l)
+ {
+ l.lock ();
+ item_t *item = items.find (v);
+ if (item)
+ {
+ item_t old = *item;
+ *item = items[items.length - 1];
+ items.pop ();
+ l.unlock ();
+ old.fini ();
+ } else {
+ l.unlock ();
+ }
+ }
+
+ template <typename T>
+ bool find (T v, item_t *i, lock_t &l)
+ {
+ l.lock ();
+ item_t *item = items.find (v);
+ if (item)
+ *i = *item;
+ l.unlock ();
+ return !!item;
+ }
+
+ template <typename T>
+ item_t *find_or_insert (T v, lock_t &l)
+ {
+ l.lock ();
+ item_t *item = items.find (v);
+ if (!item) {
+ item = items.push (v);
+ }
+ l.unlock ();
+ return item;
+ }
+
+ void fini (lock_t &l)
+ {
+ if (!items.length)
+ {
+ /* No need to lock. */
+ items.fini ();
+ return;
+ }
+ l.lock ();
+ while (items.length)
+ {
+ item_t old = items[items.length - 1];
+ items.pop ();
+ l.unlock ();
+ old.fini ();
+ l.lock ();
+ }
+ items.fini ();
+ l.unlock ();
+ }
+
+};
+
+
+/*
+ * Reference-count.
+ */
+
+#define HB_REFERENCE_COUNT_INERT_VALUE 0
+#define HB_REFERENCE_COUNT_POISON_VALUE -0x0000DEAD
+#define HB_REFERENCE_COUNT_INIT {HB_ATOMIC_INT_INIT (HB_REFERENCE_COUNT_INERT_VALUE)}
+
+struct hb_reference_count_t
+{
+ mutable hb_atomic_int_t ref_count;
+
+ void init (int v = 1) { ref_count.set_relaxed (v); }
+ int get_relaxed () const { return ref_count.get_relaxed (); }
+ int inc () const { return ref_count.inc (); }
+ int dec () const { return ref_count.dec (); }
+ void fini () { ref_count.set_relaxed (HB_REFERENCE_COUNT_POISON_VALUE); }
+
+ bool is_inert () const { return ref_count.get_relaxed () == HB_REFERENCE_COUNT_INERT_VALUE; }
+ bool is_valid () const { return ref_count.get_relaxed () > 0; }
+};
+
+
+/* user_data */
+
+struct hb_user_data_array_t
+{
+ struct hb_user_data_item_t {
+ hb_user_data_key_t *key;
+ void *data;
+ hb_destroy_func_t destroy;
+
+ bool operator == (const hb_user_data_key_t *other_key) const { return key == other_key; }
+ bool operator == (const hb_user_data_item_t &other) const { return key == other.key; }
+
+ void fini () { if (destroy) destroy (data); }
+ };
+
+ hb_mutex_t lock;
+ hb_lockable_set_t<hb_user_data_item_t, hb_mutex_t> items;
+
+ void init () { lock.init (); items.init (); }
+
+ HB_INTERNAL bool set (hb_user_data_key_t *key,
+ void * data,
+ hb_destroy_func_t destroy,
+ hb_bool_t replace);
+
+ HB_INTERNAL void *get (hb_user_data_key_t *key);
+
+ void fini () { items.fini (lock); lock.fini (); }
+};
+
+
+/*
+ * Object header
+ */
+
+struct hb_object_header_t
+{
+ hb_reference_count_t ref_count;
+ mutable hb_atomic_int_t writable;
+ hb_atomic_ptr_t<hb_user_data_array_t> user_data;
+};
+#define HB_OBJECT_HEADER_STATIC \
+ { \
+ HB_REFERENCE_COUNT_INIT, \
+ HB_ATOMIC_INT_INIT (false), \
+ HB_ATOMIC_PTR_INIT (nullptr) \
+ }
+
+
+/*
+ * Object
+ */
+
+template <typename Type>
+static inline void hb_object_trace (const Type *obj, const char *function)
+{
+ DEBUG_MSG (OBJECT, (void *) obj,
+ "%s refcount=%d",
+ function,
+ obj ? obj->header.ref_count.get_relaxed () : 0);
+}
+
+template <typename Type>
+static inline Type *hb_object_create ()
+{
+ Type *obj = (Type *) calloc (1, sizeof (Type));
+
+ if (unlikely (!obj))
+ return obj;
+
+ hb_object_init (obj);
+ hb_object_trace (obj, HB_FUNC);
+ return obj;
+}
+template <typename Type>
+static inline void hb_object_init (Type *obj)
+{
+ obj->header.ref_count.init ();
+ obj->header.writable.set_relaxed (true);
+ obj->header.user_data.init ();
+}
+template <typename Type>
+static inline bool hb_object_is_inert (const Type *obj)
+{
+ return unlikely (obj->header.ref_count.is_inert ());
+}
+template <typename Type>
+static inline bool hb_object_is_valid (const Type *obj)
+{
+ return likely (obj->header.ref_count.is_valid ());
+}
+template <typename Type>
+static inline bool hb_object_is_immutable (const Type *obj)
+{
+ return !obj->header.writable.get_relaxed ();
+}
+template <typename Type>
+static inline void hb_object_make_immutable (const Type *obj)
+{
+ obj->header.writable.set_relaxed (false);
+}
+template <typename Type>
+static inline Type *hb_object_reference (Type *obj)
+{
+ hb_object_trace (obj, HB_FUNC);
+ if (unlikely (!obj || hb_object_is_inert (obj)))
+ return obj;
+ assert (hb_object_is_valid (obj));
+ obj->header.ref_count.inc ();
+ return obj;
+}
+template <typename Type>
+static inline bool hb_object_destroy (Type *obj)
+{
+ hb_object_trace (obj, HB_FUNC);
+ if (unlikely (!obj || hb_object_is_inert (obj)))
+ return false;
+ assert (hb_object_is_valid (obj));
+ if (obj->header.ref_count.dec () != 1)
+ return false;
+
+ hb_object_fini (obj);
+ return true;
+}
+template <typename Type>
+static inline void hb_object_fini (Type *obj)
+{
+ obj->header.ref_count.fini (); /* Do this before user_data */
+ hb_user_data_array_t *user_data = obj->header.user_data.get ();
+ if (user_data)
+ {
+ user_data->fini ();
+ free (user_data);
+ user_data = nullptr;
+ }
+}
+template <typename Type>
+static inline bool hb_object_set_user_data (Type *obj,
+ hb_user_data_key_t *key,
+ void * data,
+ hb_destroy_func_t destroy,
+ hb_bool_t replace)
+{
+ if (unlikely (!obj || hb_object_is_inert (obj)))
+ return false;
+ assert (hb_object_is_valid (obj));
+
+retry:
+ hb_user_data_array_t *user_data = obj->header.user_data.get ();
+ if (unlikely (!user_data))
+ {
+ user_data = (hb_user_data_array_t *) calloc (sizeof (hb_user_data_array_t), 1);
+ if (unlikely (!user_data))
+ return false;
+ user_data->init ();
+ if (unlikely (!obj->header.user_data.cmpexch (nullptr, user_data)))
+ {
+ user_data->fini ();
+ free (user_data);
+ goto retry;
+ }
+ }
+
+ return user_data->set (key, data, destroy, replace);
+}
+
+template <typename Type>
+static inline void *hb_object_get_user_data (Type *obj,
+ hb_user_data_key_t *key)
+{
+ if (unlikely (!obj || hb_object_is_inert (obj)))
+ return nullptr;
+ assert (hb_object_is_valid (obj));
+ hb_user_data_array_t *user_data = obj->header.user_data.get ();
+ if (!user_data)
+ return nullptr;
+ return user_data->get (key);
+}
+
+
+#endif /* HB_OBJECT_HH */
diff --git a/thirdparty/harfbuzz/src/hb-open-file.hh b/thirdparty/harfbuzz/src/hb-open-file.hh
new file mode 100644
index 0000000000..ac13dd23c3
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-open-file.hh
@@ -0,0 +1,521 @@
+/*
+ * Copyright © 2007,2008,2009 Red Hat, Inc.
+ * Copyright © 2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OPEN_FILE_HH
+#define HB_OPEN_FILE_HH
+
+#include "hb-open-type.hh"
+#include "hb-ot-head-table.hh"
+
+
+namespace OT {
+
+
+/*
+ *
+ * The OpenType Font File
+ *
+ */
+
+
+/*
+ * Organization of an OpenType Font
+ */
+
+struct OpenTypeFontFile;
+struct OffsetTable;
+struct TTCHeader;
+
+
+typedef struct TableRecord
+{
+ int cmp (Tag t) const { return -t.cmp (tag); }
+
+ HB_INTERNAL static int cmp (const void *pa, const void *pb)
+ {
+ const TableRecord *a = (const TableRecord *) pa;
+ const TableRecord *b = (const TableRecord *) pb;
+ return b->cmp (a->tag);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ Tag tag; /* 4-byte identifier. */
+ CheckSum checkSum; /* CheckSum for this table. */
+ Offset32 offset; /* Offset from beginning of TrueType font
+ * file. */
+ HBUINT32 length; /* Length of this table. */
+ public:
+ DEFINE_SIZE_STATIC (16);
+} OpenTypeTable;
+
+typedef struct OffsetTable
+{
+ friend struct OpenTypeFontFile;
+
+ unsigned int get_table_count () const { return tables.len; }
+ const TableRecord& get_table (unsigned int i) const
+ { return tables[i]; }
+ unsigned int get_table_tags (unsigned int start_offset,
+ unsigned int *table_count, /* IN/OUT */
+ hb_tag_t *table_tags /* OUT */) const
+ {
+ if (table_count)
+ {
+ + tables.sub_array (start_offset, table_count)
+ | hb_map (&TableRecord::tag)
+ | hb_sink (hb_array (table_tags, *table_count))
+ ;
+ }
+ return tables.len;
+ }
+ bool find_table_index (hb_tag_t tag, unsigned int *table_index) const
+ {
+ Tag t;
+ t = tag;
+ return tables.bfind (t, table_index, HB_BFIND_NOT_FOUND_STORE, Index::NOT_FOUND_INDEX);
+ }
+ const TableRecord& get_table_by_tag (hb_tag_t tag) const
+ {
+ unsigned int table_index;
+ find_table_index (tag, &table_index);
+ return get_table (table_index);
+ }
+
+ public:
+
+ template <typename item_t>
+ bool serialize (hb_serialize_context_t *c,
+ hb_tag_t sfnt_tag,
+ hb_array_t<item_t> items)
+ {
+ TRACE_SERIALIZE (this);
+ /* Alloc 12 for the OTHeader. */
+ if (unlikely (!c->extend_min (*this))) return_trace (false);
+ /* Write sfntVersion (bytes 0..3). */
+ sfnt_version = sfnt_tag;
+ /* Take space for numTables, searchRange, entrySelector, RangeShift
+ * and the TableRecords themselves. */
+ if (unlikely (!tables.serialize (c, items.length))) return_trace (false);
+
+ const char *dir_end = (const char *) c->head;
+ HBUINT32 *checksum_adjustment = nullptr;
+
+ /* Write OffsetTables, alloc for and write actual table blobs. */
+ for (unsigned int i = 0; i < tables.len; i++)
+ {
+ TableRecord &rec = tables.arrayZ[i];
+ hb_blob_t *blob = items[i].blob;
+ rec.tag = items[i].tag;
+ rec.length = blob->length;
+ rec.offset.serialize (c, this);
+
+ /* Allocate room for the table and copy it. */
+ char *start = (char *) c->allocate_size<void> (rec.length);
+ if (unlikely (!start)) return false;
+
+ if (likely (rec.length))
+ memcpy (start, blob->data, rec.length);
+
+ /* 4-byte alignment. */
+ c->align (4);
+ const char *end = (const char *) c->head;
+
+ if (items[i].tag == HB_OT_TAG_head &&
+ (unsigned) (end - start) >= head::static_size)
+ {
+ head *h = (head *) start;
+ checksum_adjustment = &h->checkSumAdjustment;
+ *checksum_adjustment = 0;
+ }
+
+ rec.checkSum.set_for_data (start, end - start);
+ }
+
+ tables.qsort ();
+
+ if (checksum_adjustment)
+ {
+ CheckSum checksum;
+
+ /* The following line is a slower version of the following block. */
+ //checksum.set_for_data (this, (const char *) c->head - (const char *) this);
+ checksum.set_for_data (this, dir_end - (const char *) this);
+ for (unsigned int i = 0; i < items.length; i++)
+ {
+ TableRecord &rec = tables.arrayZ[i];
+ checksum = checksum + rec.checkSum;
+ }
+
+ *checksum_adjustment = 0xB1B0AFBAu - checksum;
+ }
+
+ return_trace (true);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) && tables.sanitize (c));
+ }
+
+ protected:
+ Tag sfnt_version; /* '\0\001\0\00' if TrueType / 'OTTO' if CFF */
+ BinSearchArrayOf<TableRecord>
+ tables;
+ public:
+ DEFINE_SIZE_ARRAY (12, tables);
+} OpenTypeFontFace;
+
+
+/*
+ * TrueType Collections
+ */
+
+struct TTCHeaderVersion1
+{
+ friend struct TTCHeader;
+
+ unsigned int get_face_count () const { return table.len; }
+ const OpenTypeFontFace& get_face (unsigned int i) const { return this+table[i]; }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (table.sanitize (c, this));
+ }
+
+ protected:
+ Tag ttcTag; /* TrueType Collection ID string: 'ttcf' */
+ FixedVersion<>version; /* Version of the TTC Header (1.0),
+ * 0x00010000u */
+ LArrayOf<LOffsetTo<OffsetTable>>
+ table; /* Array of offsets to the OffsetTable for each font
+ * from the beginning of the file */
+ public:
+ DEFINE_SIZE_ARRAY (12, table);
+};
+
+struct TTCHeader
+{
+ friend struct OpenTypeFontFile;
+
+ private:
+
+ unsigned int get_face_count () const
+ {
+ switch (u.header.version.major) {
+ case 2: /* version 2 is compatible with version 1 */
+ case 1: return u.version1.get_face_count ();
+ default:return 0;
+ }
+ }
+ const OpenTypeFontFace& get_face (unsigned int i) const
+ {
+ switch (u.header.version.major) {
+ case 2: /* version 2 is compatible with version 1 */
+ case 1: return u.version1.get_face (i);
+ default:return Null (OpenTypeFontFace);
+ }
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!u.header.version.sanitize (c))) return_trace (false);
+ switch (u.header.version.major) {
+ case 2: /* version 2 is compatible with version 1 */
+ case 1: return_trace (u.version1.sanitize (c));
+ default:return_trace (true);
+ }
+ }
+
+ protected:
+ union {
+ struct {
+ Tag ttcTag; /* TrueType Collection ID string: 'ttcf' */
+ FixedVersion<>version; /* Version of the TTC Header (1.0 or 2.0),
+ * 0x00010000u or 0x00020000u */
+ } header;
+ TTCHeaderVersion1 version1;
+ } u;
+};
+
+/*
+ * Mac Resource Fork
+ *
+ * http://mirror.informatimago.com/next/developer.apple.com/documentation/mac/MoreToolbox/MoreToolbox-99.html
+ */
+
+struct ResourceRecord
+{
+ const OpenTypeFontFace & get_face (const void *data_base) const
+ { return * reinterpret_cast<const OpenTypeFontFace *> ((data_base+offset).arrayZ); }
+
+ bool sanitize (hb_sanitize_context_t *c,
+ const void *data_base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ offset.sanitize (c, data_base) &&
+ get_face (data_base).sanitize (c));
+ }
+
+ protected:
+ HBUINT16 id; /* Resource ID. */
+ HBINT16 nameOffset; /* Offset from beginning of resource name list
+ * to resource name, -1 means there is none. */
+ HBUINT8 attrs; /* Resource attributes */
+ NNOffsetTo<LArrayOf<HBUINT8>, HBUINT24>
+ offset; /* Offset from beginning of data block to
+ * data for this resource */
+ HBUINT32 reserved; /* Reserved for handle to resource */
+ public:
+ DEFINE_SIZE_STATIC (12);
+};
+
+#define HB_TAG_sfnt HB_TAG ('s','f','n','t')
+
+struct ResourceTypeRecord
+{
+ unsigned int get_resource_count () const
+ { return tag == HB_TAG_sfnt ? resCountM1 + 1 : 0; }
+
+ bool is_sfnt () const { return tag == HB_TAG_sfnt; }
+
+ const ResourceRecord& get_resource_record (unsigned int i,
+ const void *type_base) const
+ { return (type_base+resourcesZ).as_array (get_resource_count ())[i]; }
+
+ bool sanitize (hb_sanitize_context_t *c,
+ const void *type_base,
+ const void *data_base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ resourcesZ.sanitize (c, type_base,
+ get_resource_count (),
+ data_base));
+ }
+
+ protected:
+ Tag tag; /* Resource type. */
+ HBUINT16 resCountM1; /* Number of resources minus 1. */
+ NNOffsetTo<UnsizedArrayOf<ResourceRecord>>
+ resourcesZ; /* Offset from beginning of resource type list
+ * to reference item list for this type. */
+ public:
+ DEFINE_SIZE_STATIC (8);
+};
+
+struct ResourceMap
+{
+ unsigned int get_face_count () const
+ {
+ unsigned int count = get_type_count ();
+ for (unsigned int i = 0; i < count; i++)
+ {
+ const ResourceTypeRecord& type = get_type_record (i);
+ if (type.is_sfnt ())
+ return type.get_resource_count ();
+ }
+ return 0;
+ }
+
+ const OpenTypeFontFace& get_face (unsigned int idx,
+ const void *data_base) const
+ {
+ unsigned int count = get_type_count ();
+ for (unsigned int i = 0; i < count; i++)
+ {
+ const ResourceTypeRecord& type = get_type_record (i);
+ /* The check for idx < count is here because ResourceRecord is NOT null-safe.
+ * Because an offset of 0 there does NOT mean null. */
+ if (type.is_sfnt () && idx < type.get_resource_count ())
+ return type.get_resource_record (idx, &(this+typeList)).get_face (data_base);
+ }
+ return Null (OpenTypeFontFace);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c, const void *data_base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ typeList.sanitize (c, this,
+ &(this+typeList),
+ data_base));
+ }
+
+ private:
+ unsigned int get_type_count () const { return (this+typeList).lenM1 + 1; }
+
+ const ResourceTypeRecord& get_type_record (unsigned int i) const
+ { return (this+typeList)[i]; }
+
+ protected:
+ HBUINT8 reserved0[16]; /* Reserved for copy of resource header */
+ HBUINT32 reserved1; /* Reserved for handle to next resource map */
+ HBUINT16 resreved2; /* Reserved for file reference number */
+ HBUINT16 attrs; /* Resource fork attribute */
+ NNOffsetTo<ArrayOfM1<ResourceTypeRecord>>
+ typeList; /* Offset from beginning of map to
+ * resource type list */
+ Offset16 nameList; /* Offset from beginning of map to
+ * resource name list */
+ public:
+ DEFINE_SIZE_STATIC (28);
+};
+
+struct ResourceForkHeader
+{
+ unsigned int get_face_count () const
+ { return (this+map).get_face_count (); }
+
+ const OpenTypeFontFace& get_face (unsigned int idx,
+ unsigned int *base_offset = nullptr) const
+ {
+ const OpenTypeFontFace &face = (this+map).get_face (idx, &(this+data));
+ if (base_offset)
+ *base_offset = (const char *) &face - (const char *) this;
+ return face;
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ data.sanitize (c, this, dataLen) &&
+ map.sanitize (c, this, &(this+data)));
+ }
+
+ protected:
+ LNNOffsetTo<UnsizedArrayOf<HBUINT8>>
+ data; /* Offset from beginning of resource fork
+ * to resource data */
+ LNNOffsetTo<ResourceMap >
+ map; /* Offset from beginning of resource fork
+ * to resource map */
+ HBUINT32 dataLen; /* Length of resource data */
+ HBUINT32 mapLen; /* Length of resource map */
+ public:
+ DEFINE_SIZE_STATIC (16);
+};
+
+/*
+ * OpenType Font File
+ */
+
+struct OpenTypeFontFile
+{
+ enum {
+ CFFTag = HB_TAG ('O','T','T','O'), /* OpenType with Postscript outlines */
+ TrueTypeTag = HB_TAG ( 0 , 1 , 0 , 0 ), /* OpenType with TrueType outlines */
+ TTCTag = HB_TAG ('t','t','c','f'), /* TrueType Collection */
+ DFontTag = HB_TAG ( 0 , 0 , 1 , 0 ), /* DFont Mac Resource Fork */
+ TrueTag = HB_TAG ('t','r','u','e'), /* Obsolete Apple TrueType */
+ Typ1Tag = HB_TAG ('t','y','p','1') /* Obsolete Apple Type1 font in SFNT container */
+ };
+
+ hb_tag_t get_tag () const { return u.tag; }
+
+ unsigned int get_face_count () const
+ {
+ switch (u.tag) {
+ case CFFTag: /* All the non-collection tags */
+ case TrueTag:
+ case Typ1Tag:
+ case TrueTypeTag: return 1;
+ case TTCTag: return u.ttcHeader.get_face_count ();
+ case DFontTag: return u.rfHeader.get_face_count ();
+ default: return 0;
+ }
+ }
+ const OpenTypeFontFace& get_face (unsigned int i, unsigned int *base_offset = nullptr) const
+ {
+ if (base_offset)
+ *base_offset = 0;
+ switch (u.tag) {
+ /* Note: for non-collection SFNT data we ignore index. This is because
+ * Apple dfont container is a container of SFNT's. So each SFNT is a
+ * non-TTC, but the index is more than zero. */
+ case CFFTag: /* All the non-collection tags */
+ case TrueTag:
+ case Typ1Tag:
+ case TrueTypeTag: return u.fontFace;
+ case TTCTag: return u.ttcHeader.get_face (i);
+ case DFontTag: return u.rfHeader.get_face (i, base_offset);
+ default: return Null (OpenTypeFontFace);
+ }
+ }
+
+ template <typename item_t>
+ bool serialize_single (hb_serialize_context_t *c,
+ hb_tag_t sfnt_tag,
+ hb_array_t<item_t> items)
+ {
+ TRACE_SERIALIZE (this);
+ assert (sfnt_tag != TTCTag);
+ if (unlikely (!c->extend_min (*this))) return_trace (false);
+ return_trace (u.fontFace.serialize (c, sfnt_tag, items));
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!u.tag.sanitize (c))) return_trace (false);
+ switch (u.tag) {
+ case CFFTag: /* All the non-collection tags */
+ case TrueTag:
+ case Typ1Tag:
+ case TrueTypeTag: return_trace (u.fontFace.sanitize (c));
+ case TTCTag: return_trace (u.ttcHeader.sanitize (c));
+ case DFontTag: return_trace (u.rfHeader.sanitize (c));
+ default: return_trace (true);
+ }
+ }
+
+ protected:
+ union {
+ Tag tag; /* 4-byte identifier. */
+ OpenTypeFontFace fontFace;
+ TTCHeader ttcHeader;
+ ResourceForkHeader rfHeader;
+ } u;
+ public:
+ DEFINE_SIZE_UNION (4, tag);
+};
+
+
+} /* namespace OT */
+
+
+#endif /* HB_OPEN_FILE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-open-type.hh b/thirdparty/harfbuzz/src/hb-open-type.hh
new file mode 100644
index 0000000000..50558cf8d3
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-open-type.hh
@@ -0,0 +1,1078 @@
+/*
+ * Copyright © 2007,2008,2009,2010 Red Hat, Inc.
+ * Copyright © 2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OPEN_TYPE_HH
+#define HB_OPEN_TYPE_HH
+
+#include "hb.hh"
+#include "hb-blob.hh"
+#include "hb-face.hh"
+#include "hb-machinery.hh"
+#include "hb-subset.hh"
+
+
+namespace OT {
+
+
+/*
+ *
+ * The OpenType Font File: Data Types
+ */
+
+
+/* "The following data types are used in the OpenType font file.
+ * All OpenType fonts use Motorola-style byte ordering (Big Endian):" */
+
+/*
+ * Int types
+ */
+
+/* Integer types in big-endian order and no alignment requirement */
+template <typename Type, unsigned int Size>
+struct IntType
+{
+ typedef Type type;
+ typedef hb_conditional<hb_is_signed (Type), signed, unsigned> wide_type;
+
+ IntType& operator = (wide_type i) { v = i; return *this; }
+ operator wide_type () const { return v; }
+ bool operator == (const IntType &o) const { return (Type) v == (Type) o.v; }
+ bool operator != (const IntType &o) const { return !(*this == o); }
+
+ IntType& operator += (unsigned count) { *this = *this + count; return *this; }
+ IntType& operator -= (unsigned count) { *this = *this - count; return *this; }
+ IntType& operator ++ () { *this += 1; return *this; }
+ IntType& operator -- () { *this -= 1; return *this; }
+ IntType operator ++ (int) { IntType c (*this); ++*this; return c; }
+ IntType operator -- (int) { IntType c (*this); --*this; return c; }
+
+ HB_INTERNAL static int cmp (const IntType *a, const IntType *b)
+ { return b->cmp (*a); }
+ HB_INTERNAL static int cmp (const void *a, const void *b)
+ {
+ IntType *pa = (IntType *) a;
+ IntType *pb = (IntType *) b;
+
+ return pb->cmp (*pa);
+ }
+ template <typename Type2>
+ int cmp (Type2 a) const
+ {
+ Type b = v;
+ if (sizeof (Type) < sizeof (int) && sizeof (Type2) < sizeof (int))
+ return (int) a - (int) b;
+ else
+ return a < b ? -1 : a == b ? 0 : +1;
+ }
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this)));
+ }
+ protected:
+ BEInt<Type, Size> v;
+ public:
+ DEFINE_SIZE_STATIC (Size);
+};
+
+typedef IntType<uint8_t, 1> HBUINT8; /* 8-bit unsigned integer. */
+typedef IntType<int8_t, 1> HBINT8; /* 8-bit signed integer. */
+typedef IntType<uint16_t, 2> HBUINT16; /* 16-bit unsigned integer. */
+typedef IntType<int16_t, 2> HBINT16; /* 16-bit signed integer. */
+typedef IntType<uint32_t, 4> HBUINT32; /* 32-bit unsigned integer. */
+typedef IntType<int32_t, 4> HBINT32; /* 32-bit signed integer. */
+/* Note: we cannot defined a signed HBINT24 because there's no corresponding C type.
+ * Works for unsigned, but not signed, since we rely on compiler for sign-extension. */
+typedef IntType<uint32_t, 3> HBUINT24; /* 24-bit unsigned integer. */
+
+/* 16-bit signed integer (HBINT16) that describes a quantity in FUnits. */
+typedef HBINT16 FWORD;
+
+/* 32-bit signed integer (HBINT32) that describes a quantity in FUnits. */
+typedef HBINT32 FWORD32;
+
+/* 16-bit unsigned integer (HBUINT16) that describes a quantity in FUnits. */
+typedef HBUINT16 UFWORD;
+
+/* 16-bit signed fixed number with the low 14 bits of fraction (2.14). */
+struct F2DOT14 : HBINT16
+{
+ F2DOT14& operator = (uint16_t i ) { HBINT16::operator= (i); return *this; }
+ // 16384 means 1<<14
+ float to_float () const { return ((int32_t) v) / 16384.f; }
+ void set_float (float f) { v = roundf (f * 16384.f); }
+ public:
+ DEFINE_SIZE_STATIC (2);
+};
+
+/* 32-bit signed fixed-point number (16.16). */
+struct HBFixed : HBINT32
+{
+ HBFixed& operator = (uint32_t i) { HBINT32::operator= (i); return *this; }
+ // 65536 means 1<<16
+ float to_float () const { return ((int32_t) v) / 65536.f; }
+ void set_float (float f) { v = roundf (f * 65536.f); }
+ public:
+ DEFINE_SIZE_STATIC (4);
+};
+
+/* Date represented in number of seconds since 12:00 midnight, January 1,
+ * 1904. The value is represented as a signed 64-bit integer. */
+struct LONGDATETIME
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this)));
+ }
+ protected:
+ HBINT32 major;
+ HBUINT32 minor;
+ public:
+ DEFINE_SIZE_STATIC (8);
+};
+
+/* Array of four uint8s (length = 32 bits) used to identify a script, language
+ * system, feature, or baseline */
+struct Tag : HBUINT32
+{
+ Tag& operator = (hb_tag_t i) { HBUINT32::operator= (i); return *this; }
+ /* What the char* converters return is NOT nul-terminated. Print using "%.4s" */
+ operator const char* () const { return reinterpret_cast<const char *> (&this->v); }
+ operator char* () { return reinterpret_cast<char *> (&this->v); }
+ public:
+ DEFINE_SIZE_STATIC (4);
+};
+
+/* Glyph index number, same as uint16 (length = 16 bits) */
+struct HBGlyphID : HBUINT16
+{
+ HBGlyphID& operator = (uint16_t i) { HBUINT16::operator= (i); return *this; }
+};
+
+/* Script/language-system/feature index */
+struct Index : HBUINT16 {
+ static constexpr unsigned NOT_FOUND_INDEX = 0xFFFFu;
+ Index& operator = (uint16_t i) { HBUINT16::operator= (i); return *this; }
+};
+DECLARE_NULL_NAMESPACE_BYTES (OT, Index);
+
+typedef Index NameID;
+
+/* Offset, Null offset = 0 */
+template <typename Type, bool has_null=true>
+struct Offset : Type
+{
+ Offset& operator = (typename Type::type i) { Type::operator= (i); return *this; }
+
+ typedef Type type;
+
+ bool is_null () const { return has_null && 0 == *this; }
+
+ void *serialize (hb_serialize_context_t *c, const void *base)
+ {
+ void *t = c->start_embed<void> ();
+ c->check_assign (*this, (unsigned) ((char *) t - (char *) base));
+ return t;
+ }
+
+ public:
+ DEFINE_SIZE_STATIC (sizeof (Type));
+};
+
+typedef Offset<HBUINT16> Offset16;
+typedef Offset<HBUINT32> Offset32;
+
+
+/* CheckSum */
+struct CheckSum : HBUINT32
+{
+ CheckSum& operator = (uint32_t i) { HBUINT32::operator= (i); return *this; }
+
+ /* This is reference implementation from the spec. */
+ static uint32_t CalcTableChecksum (const HBUINT32 *Table, uint32_t Length)
+ {
+ uint32_t Sum = 0L;
+ assert (0 == (Length & 3));
+ const HBUINT32 *EndPtr = Table + Length / HBUINT32::static_size;
+
+ while (Table < EndPtr)
+ Sum += *Table++;
+ return Sum;
+ }
+
+ /* Note: data should be 4byte aligned and have 4byte padding at the end. */
+ void set_for_data (const void *data, unsigned int length)
+ { *this = CalcTableChecksum ((const HBUINT32 *) data, length); }
+
+ public:
+ DEFINE_SIZE_STATIC (4);
+};
+
+
+/*
+ * Version Numbers
+ */
+
+template <typename FixedType=HBUINT16>
+struct FixedVersion
+{
+ uint32_t to_int () const { return (major << (sizeof (FixedType) * 8)) + minor; }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ FixedType major;
+ FixedType minor;
+ public:
+ DEFINE_SIZE_STATIC (2 * sizeof (FixedType));
+};
+
+
+/*
+ * Template subclasses of Offset that do the dereferencing.
+ * Use: (base+offset)
+ */
+
+template <typename Type, bool has_null>
+struct _hb_has_null
+{
+ static const Type *get_null () { return nullptr; }
+ static Type *get_crap () { return nullptr; }
+};
+template <typename Type>
+struct _hb_has_null<Type, true>
+{
+ static const Type *get_null () { return &Null (Type); }
+ static Type *get_crap () { return &Crap (Type); }
+};
+
+template <typename Type, typename OffsetType=HBUINT16, bool has_null=true>
+struct OffsetTo : Offset<OffsetType, has_null>
+{
+ HB_DELETE_COPY_ASSIGN (OffsetTo);
+ OffsetTo () = default;
+
+ OffsetTo& operator = (typename OffsetType::type i) { OffsetType::operator= (i); return *this; }
+
+ const Type& operator () (const void *base) const
+ {
+ if (unlikely (this->is_null ())) return *_hb_has_null<Type, has_null>::get_null ();
+ return StructAtOffset<const Type> (base, *this);
+ }
+ Type& operator () (void *base) const
+ {
+ if (unlikely (this->is_null ())) return *_hb_has_null<Type, has_null>::get_crap ();
+ return StructAtOffset<Type> (base, *this);
+ }
+
+ template <typename Base,
+ hb_enable_if (hb_is_convertible (const Base, const void *))>
+ friend const Type& operator + (const Base &base, const OffsetTo &offset) { return offset ((const void *) base); }
+ template <typename Base,
+ hb_enable_if (hb_is_convertible (const Base, const void *))>
+ friend const Type& operator + (const OffsetTo &offset, const Base &base) { return offset ((const void *) base); }
+ template <typename Base,
+ hb_enable_if (hb_is_convertible (Base, void *))>
+ friend Type& operator + (Base &&base, OffsetTo &offset) { return offset ((void *) base); }
+ template <typename Base,
+ hb_enable_if (hb_is_convertible (Base, void *))>
+ friend Type& operator + (OffsetTo &offset, Base &&base) { return offset ((void *) base); }
+
+ Type& serialize (hb_serialize_context_t *c, const void *base)
+ {
+ return * (Type *) Offset<OffsetType>::serialize (c, base);
+ }
+
+ template <typename ...Ts>
+ bool serialize_subset (hb_subset_context_t *c, const OffsetTo& src,
+ const void *src_base, Ts&&... ds)
+ {
+ *this = 0;
+ if (src.is_null ())
+ return false;
+
+ auto *s = c->serializer;
+
+ s->push ();
+
+ bool ret = c->dispatch (src_base+src, hb_forward<Ts> (ds)...);
+
+ if (ret || !has_null)
+ s->add_link (*this, s->pop_pack ());
+ else
+ s->pop_discard ();
+
+ return ret;
+ }
+
+ /* TODO: Somehow merge this with previous function into a serialize_dispatch(). */
+ /* Workaround clang bug: https://bugs.llvm.org/show_bug.cgi?id=23029
+ * Can't compile: whence = hb_serialize_context_t::Head followed by Ts&&...
+ */
+ template <typename ...Ts>
+ bool serialize_copy (hb_serialize_context_t *c, const OffsetTo& src,
+ const void *src_base, unsigned dst_bias,
+ hb_serialize_context_t::whence_t whence,
+ Ts&&... ds)
+ {
+ *this = 0;
+ if (src.is_null ())
+ return false;
+
+ c->push ();
+
+ bool ret = c->copy (src_base+src, hb_forward<Ts> (ds)...);
+
+ c->add_link (*this, c->pop_pack (), whence, dst_bias);
+
+ return ret;
+ }
+
+ bool serialize_copy (hb_serialize_context_t *c, const OffsetTo& src,
+ const void *src_base, unsigned dst_bias = 0)
+ { return serialize_copy (c, src, src_base, dst_bias, hb_serialize_context_t::Head); }
+
+ bool sanitize_shallow (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!c->check_struct (this))) return_trace (false);
+ if (unlikely (this->is_null ())) return_trace (true);
+ if (unlikely (!c->check_range (base, *this))) return_trace (false);
+ return_trace (true);
+ }
+
+ template <typename ...Ts>
+ bool sanitize (hb_sanitize_context_t *c, const void *base, Ts&&... ds) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (sanitize_shallow (c, base) &&
+ (this->is_null () ||
+ c->dispatch (StructAtOffset<Type> (base, *this), hb_forward<Ts> (ds)...) ||
+ neuter (c)));
+ }
+
+ /* Set the offset to Null */
+ bool neuter (hb_sanitize_context_t *c) const
+ {
+ if (!has_null) return false;
+ return c->try_set (this, 0);
+ }
+ DEFINE_SIZE_STATIC (sizeof (OffsetType));
+};
+/* Partial specializations. */
+template <typename Type, bool has_null=true>
+using LOffsetTo = OffsetTo<Type, HBUINT32, has_null>;
+template <typename Type, typename OffsetType=HBUINT16>
+using NNOffsetTo = OffsetTo<Type, OffsetType, false>;
+template <typename Type>
+using LNNOffsetTo = LOffsetTo<Type, false>;
+
+
+/*
+ * Array Types
+ */
+
+template <typename Type>
+struct UnsizedArrayOf
+{
+ typedef Type item_t;
+ static constexpr unsigned item_size = hb_static_size (Type);
+
+ HB_DELETE_CREATE_COPY_ASSIGN (UnsizedArrayOf);
+
+ const Type& operator [] (int i_) const
+ {
+ unsigned int i = (unsigned int) i_;
+ const Type *p = &arrayZ[i];
+ if (unlikely (p < arrayZ)) return Null (Type); /* Overflowed. */
+ return *p;
+ }
+ Type& operator [] (int i_)
+ {
+ unsigned int i = (unsigned int) i_;
+ Type *p = &arrayZ[i];
+ if (unlikely (p < arrayZ)) return Crap (Type); /* Overflowed. */
+ return *p;
+ }
+
+ unsigned int get_size (unsigned int len) const
+ { return len * Type::static_size; }
+
+ template <typename T> operator T * () { return arrayZ; }
+ template <typename T> operator const T * () const { return arrayZ; }
+ hb_array_t<Type> as_array (unsigned int len)
+ { return hb_array (arrayZ, len); }
+ hb_array_t<const Type> as_array (unsigned int len) const
+ { return hb_array (arrayZ, len); }
+ operator hb_array_t< Type> () { return as_array (); }
+ operator hb_array_t<const Type> () const { return as_array (); }
+
+ template <typename T>
+ Type &lsearch (unsigned int len, const T &x, Type &not_found = Crap (Type))
+ { return *as_array (len).lsearch (x, &not_found); }
+ template <typename T>
+ const Type &lsearch (unsigned int len, const T &x, const Type &not_found = Null (Type)) const
+ { return *as_array (len).lsearch (x, &not_found); }
+ template <typename T>
+ bool lfind (unsigned int len, const T &x, unsigned *pos = nullptr) const
+ { return as_array (len).lfind (x, pos); }
+
+ void qsort (unsigned int len, unsigned int start = 0, unsigned int end = (unsigned int) -1)
+ { as_array (len).qsort (start, end); }
+
+ bool serialize (hb_serialize_context_t *c, unsigned int items_len)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely (!c->extend (*this, items_len))) return_trace (false);
+ return_trace (true);
+ }
+ template <typename Iterator,
+ hb_requires (hb_is_source_of (Iterator, Type))>
+ bool serialize (hb_serialize_context_t *c, Iterator items)
+ {
+ TRACE_SERIALIZE (this);
+ unsigned count = items.len ();
+ if (unlikely (!serialize (c, count))) return_trace (false);
+ /* TODO Umm. Just exhaust the iterator instead? Being extra
+ * cautious right now.. */
+ for (unsigned i = 0; i < count; i++, ++items)
+ arrayZ[i] = *items;
+ return_trace (true);
+ }
+
+ UnsizedArrayOf* copy (hb_serialize_context_t *c, unsigned count) const
+ {
+ TRACE_SERIALIZE (this);
+ auto *out = c->start_embed (this);
+ if (unlikely (!as_array (count).copy (c))) return_trace (nullptr);
+ return_trace (out);
+ }
+
+ template <typename ...Ts>
+ bool sanitize (hb_sanitize_context_t *c, unsigned int count, Ts&&... ds) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!sanitize_shallow (c, count))) return_trace (false);
+ if (!sizeof... (Ts) && hb_is_trivially_copyable (Type)) return_trace (true);
+ for (unsigned int i = 0; i < count; i++)
+ if (unlikely (!c->dispatch (arrayZ[i], hb_forward<Ts> (ds)...)))
+ return_trace (false);
+ return_trace (true);
+ }
+
+ bool sanitize_shallow (hb_sanitize_context_t *c, unsigned int count) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_array (arrayZ, count));
+ }
+
+ public:
+ Type arrayZ[HB_VAR_ARRAY];
+ public:
+ DEFINE_SIZE_UNBOUNDED (0);
+};
+
+/* Unsized array of offset's */
+template <typename Type, typename OffsetType, bool has_null=true>
+using UnsizedOffsetArrayOf = UnsizedArrayOf<OffsetTo<Type, OffsetType, has_null>>;
+
+/* Unsized array of offsets relative to the beginning of the array itself. */
+template <typename Type, typename OffsetType, bool has_null=true>
+struct UnsizedOffsetListOf : UnsizedOffsetArrayOf<Type, OffsetType, has_null>
+{
+ const Type& operator [] (int i_) const
+ {
+ unsigned int i = (unsigned int) i_;
+ const OffsetTo<Type, OffsetType, has_null> *p = &this->arrayZ[i];
+ if (unlikely (p < this->arrayZ)) return Null (Type); /* Overflowed. */
+ return this+*p;
+ }
+ Type& operator [] (int i_)
+ {
+ unsigned int i = (unsigned int) i_;
+ const OffsetTo<Type, OffsetType, has_null> *p = &this->arrayZ[i];
+ if (unlikely (p < this->arrayZ)) return Crap (Type); /* Overflowed. */
+ return this+*p;
+ }
+
+ template <typename ...Ts>
+ bool sanitize (hb_sanitize_context_t *c, unsigned int count, Ts&&... ds) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace ((UnsizedOffsetArrayOf<Type, OffsetType, has_null>
+ ::sanitize (c, count, this, hb_forward<Ts> (ds)...)));
+ }
+};
+
+/* An array with sorted elements. Supports binary searching. */
+template <typename Type>
+struct SortedUnsizedArrayOf : UnsizedArrayOf<Type>
+{
+ hb_sorted_array_t<Type> as_array (unsigned int len)
+ { return hb_sorted_array (this->arrayZ, len); }
+ hb_sorted_array_t<const Type> as_array (unsigned int len) const
+ { return hb_sorted_array (this->arrayZ, len); }
+ operator hb_sorted_array_t<Type> () { return as_array (); }
+ operator hb_sorted_array_t<const Type> () const { return as_array (); }
+
+ template <typename T>
+ Type &bsearch (unsigned int len, const T &x, Type &not_found = Crap (Type))
+ { return *as_array (len).bsearch (x, &not_found); }
+ template <typename T>
+ const Type &bsearch (unsigned int len, const T &x, const Type &not_found = Null (Type)) const
+ { return *as_array (len).bsearch (x, &not_found); }
+ template <typename T>
+ bool bfind (unsigned int len, const T &x, unsigned int *i = nullptr,
+ hb_bfind_not_found_t not_found = HB_BFIND_NOT_FOUND_DONT_STORE,
+ unsigned int to_store = (unsigned int) -1) const
+ { return as_array (len).bfind (x, i, not_found, to_store); }
+};
+
+
+/* An array with a number of elements. */
+template <typename Type, typename LenType=HBUINT16>
+struct ArrayOf
+{
+ typedef Type item_t;
+ static constexpr unsigned item_size = hb_static_size (Type);
+
+ HB_DELETE_CREATE_COPY_ASSIGN (ArrayOf);
+
+ const Type& operator [] (int i_) const
+ {
+ unsigned int i = (unsigned int) i_;
+ if (unlikely (i >= len)) return Null (Type);
+ return arrayZ[i];
+ }
+ Type& operator [] (int i_)
+ {
+ unsigned int i = (unsigned int) i_;
+ if (unlikely (i >= len)) return Crap (Type);
+ return arrayZ[i];
+ }
+
+ unsigned int get_size () const
+ { return len.static_size + len * Type::static_size; }
+
+ explicit operator bool () const { return len; }
+
+ void pop () { len--; }
+
+ hb_array_t< Type> as_array () { return hb_array (arrayZ, len); }
+ hb_array_t<const Type> as_array () const { return hb_array (arrayZ, len); }
+
+ /* Iterator. */
+ typedef hb_array_t<const Type> iter_t;
+ typedef hb_array_t< Type> writer_t;
+ iter_t iter () const { return as_array (); }
+ writer_t writer () { return as_array (); }
+ operator iter_t () const { return iter (); }
+ operator writer_t () { return writer (); }
+
+ hb_array_t<const Type> sub_array (unsigned int start_offset, unsigned int count) const
+ { return as_array ().sub_array (start_offset, count); }
+ hb_array_t<const Type> sub_array (unsigned int start_offset, unsigned int *count = nullptr /* IN/OUT */) const
+ { return as_array ().sub_array (start_offset, count); }
+ hb_array_t<Type> sub_array (unsigned int start_offset, unsigned int count)
+ { return as_array ().sub_array (start_offset, count); }
+ hb_array_t<Type> sub_array (unsigned int start_offset, unsigned int *count = nullptr /* IN/OUT */)
+ { return as_array ().sub_array (start_offset, count); }
+
+ hb_success_t serialize (hb_serialize_context_t *c, unsigned items_len)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely (!c->extend_min (*this))) return_trace (false);
+ c->check_assign (len, items_len);
+ if (unlikely (!c->extend (*this))) return_trace (false);
+ return_trace (true);
+ }
+ template <typename Iterator,
+ hb_requires (hb_is_source_of (Iterator, Type))>
+ hb_success_t serialize (hb_serialize_context_t *c, Iterator items)
+ {
+ TRACE_SERIALIZE (this);
+ unsigned count = items.len ();
+ if (unlikely (!serialize (c, count))) return_trace (false);
+ /* TODO Umm. Just exhaust the iterator instead? Being extra
+ * cautious right now.. */
+ for (unsigned i = 0; i < count; i++, ++items)
+ arrayZ[i] = *items;
+ return_trace (true);
+ }
+
+ Type* serialize_append (hb_serialize_context_t *c)
+ {
+ TRACE_SERIALIZE (this);
+ len++;
+ if (unlikely (!len || !c->extend (*this)))
+ {
+ len--;
+ return_trace (nullptr);
+ }
+ return_trace (&arrayZ[len - 1]);
+ }
+
+ ArrayOf* copy (hb_serialize_context_t *c) const
+ {
+ TRACE_SERIALIZE (this);
+ auto *out = c->start_embed (this);
+ if (unlikely (!c->extend_min (out))) return_trace (nullptr);
+ c->check_assign (out->len, len);
+ if (unlikely (!as_array ().copy (c))) return_trace (nullptr);
+ return_trace (out);
+ }
+
+ template <typename ...Ts>
+ bool sanitize (hb_sanitize_context_t *c, Ts&&... ds) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!sanitize_shallow (c))) return_trace (false);
+ if (!sizeof... (Ts) && hb_is_trivially_copyable (Type)) return_trace (true);
+ unsigned int count = len;
+ for (unsigned int i = 0; i < count; i++)
+ if (unlikely (!c->dispatch (arrayZ[i], hb_forward<Ts> (ds)...)))
+ return_trace (false);
+ return_trace (true);
+ }
+
+ template <typename T>
+ Type &lsearch (const T &x, Type &not_found = Crap (Type))
+ { return *as_array ().lsearch (x, &not_found); }
+ template <typename T>
+ const Type &lsearch (const T &x, const Type &not_found = Null (Type)) const
+ { return *as_array ().lsearch (x, &not_found); }
+ template <typename T>
+ bool lfind (const T &x, unsigned *pos = nullptr) const
+ { return as_array ().lfind (x, pos); }
+
+ void qsort (unsigned int start = 0, unsigned int end = (unsigned int) -1)
+ { as_array ().qsort (start, end); }
+
+ bool sanitize_shallow (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (len.sanitize (c) && c->check_array (arrayZ, len));
+ }
+
+ public:
+ LenType len;
+ Type arrayZ[HB_VAR_ARRAY];
+ public:
+ DEFINE_SIZE_ARRAY (sizeof (LenType), arrayZ);
+};
+template <typename Type>
+using LArrayOf = ArrayOf<Type, HBUINT32>;
+using PString = ArrayOf<HBUINT8, HBUINT8>;
+
+/* Array of Offset's */
+template <typename Type>
+using OffsetArrayOf = ArrayOf<OffsetTo<Type, HBUINT16>>;
+template <typename Type>
+using LOffsetArrayOf = ArrayOf<OffsetTo<Type, HBUINT32>>;
+template <typename Type>
+using LOffsetLArrayOf = ArrayOf<OffsetTo<Type, HBUINT32>, HBUINT32>;
+
+/* Array of offsets relative to the beginning of the array itself. */
+template <typename Type>
+struct OffsetListOf : OffsetArrayOf<Type>
+{
+ const Type& operator [] (int i_) const
+ {
+ unsigned int i = (unsigned int) i_;
+ if (unlikely (i >= this->len)) return Null (Type);
+ return this+this->arrayZ[i];
+ }
+ const Type& operator [] (int i_)
+ {
+ unsigned int i = (unsigned int) i_;
+ if (unlikely (i >= this->len)) return Crap (Type);
+ return this+this->arrayZ[i];
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ struct OffsetListOf<Type> *out = c->serializer->embed (*this);
+ if (unlikely (!out)) return_trace (false);
+ unsigned int count = this->len;
+ for (unsigned int i = 0; i < count; i++)
+ out->arrayZ[i].serialize_subset (c, this->arrayZ[i], this, out);
+ return_trace (true);
+ }
+
+ template <typename ...Ts>
+ bool sanitize (hb_sanitize_context_t *c, Ts&&... ds) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (OffsetArrayOf<Type>::sanitize (c, this, hb_forward<Ts> (ds)...));
+ }
+};
+
+/* An array starting at second element. */
+template <typename Type, typename LenType=HBUINT16>
+struct HeadlessArrayOf
+{
+ static constexpr unsigned item_size = Type::static_size;
+
+ HB_DELETE_CREATE_COPY_ASSIGN (HeadlessArrayOf);
+
+ const Type& operator [] (int i_) const
+ {
+ unsigned int i = (unsigned int) i_;
+ if (unlikely (i >= lenP1 || !i)) return Null (Type);
+ return arrayZ[i-1];
+ }
+ Type& operator [] (int i_)
+ {
+ unsigned int i = (unsigned int) i_;
+ if (unlikely (i >= lenP1 || !i)) return Crap (Type);
+ return arrayZ[i-1];
+ }
+ unsigned int get_size () const
+ { return lenP1.static_size + get_length () * Type::static_size; }
+
+ unsigned get_length () const { return lenP1 ? lenP1 - 1 : 0; }
+
+ hb_array_t< Type> as_array () { return hb_array (arrayZ, get_length ()); }
+ hb_array_t<const Type> as_array () const { return hb_array (arrayZ, get_length ()); }
+
+ /* Iterator. */
+ typedef hb_array_t<const Type> iter_t;
+ typedef hb_array_t< Type> writer_t;
+ iter_t iter () const { return as_array (); }
+ writer_t writer () { return as_array (); }
+ operator iter_t () const { return iter (); }
+ operator writer_t () { return writer (); }
+
+ bool serialize (hb_serialize_context_t *c, unsigned int items_len)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely (!c->extend_min (*this))) return_trace (false);
+ c->check_assign (lenP1, items_len + 1);
+ if (unlikely (!c->extend (*this))) return_trace (false);
+ return_trace (true);
+ }
+ template <typename Iterator,
+ hb_requires (hb_is_source_of (Iterator, Type))>
+ bool serialize (hb_serialize_context_t *c, Iterator items)
+ {
+ TRACE_SERIALIZE (this);
+ unsigned count = items.len ();
+ if (unlikely (!serialize (c, count))) return_trace (false);
+ /* TODO Umm. Just exhaust the iterator instead? Being extra
+ * cautious right now.. */
+ for (unsigned i = 0; i < count; i++, ++items)
+ arrayZ[i] = *items;
+ return_trace (true);
+ }
+
+ template <typename ...Ts>
+ bool sanitize (hb_sanitize_context_t *c, Ts&&... ds) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!sanitize_shallow (c))) return_trace (false);
+ if (!sizeof... (Ts) && hb_is_trivially_copyable (Type)) return_trace (true);
+ unsigned int count = get_length ();
+ for (unsigned int i = 0; i < count; i++)
+ if (unlikely (!c->dispatch (arrayZ[i], hb_forward<Ts> (ds)...)))
+ return_trace (false);
+ return_trace (true);
+ }
+
+ private:
+ bool sanitize_shallow (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (lenP1.sanitize (c) &&
+ (!lenP1 || c->check_array (arrayZ, lenP1 - 1)));
+ }
+
+ public:
+ LenType lenP1;
+ Type arrayZ[HB_VAR_ARRAY];
+ public:
+ DEFINE_SIZE_ARRAY (sizeof (LenType), arrayZ);
+};
+
+/* An array storing length-1. */
+template <typename Type, typename LenType=HBUINT16>
+struct ArrayOfM1
+{
+ HB_DELETE_CREATE_COPY_ASSIGN (ArrayOfM1);
+
+ const Type& operator [] (int i_) const
+ {
+ unsigned int i = (unsigned int) i_;
+ if (unlikely (i > lenM1)) return Null (Type);
+ return arrayZ[i];
+ }
+ Type& operator [] (int i_)
+ {
+ unsigned int i = (unsigned int) i_;
+ if (unlikely (i > lenM1)) return Crap (Type);
+ return arrayZ[i];
+ }
+ unsigned int get_size () const
+ { return lenM1.static_size + (lenM1 + 1) * Type::static_size; }
+
+ template <typename ...Ts>
+ bool sanitize (hb_sanitize_context_t *c, Ts&&... ds) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!sanitize_shallow (c))) return_trace (false);
+ unsigned int count = lenM1 + 1;
+ for (unsigned int i = 0; i < count; i++)
+ if (unlikely (!c->dispatch (arrayZ[i], hb_forward<Ts> (ds)...)))
+ return_trace (false);
+ return_trace (true);
+ }
+
+ private:
+ bool sanitize_shallow (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (lenM1.sanitize (c) &&
+ (c->check_array (arrayZ, lenM1 + 1)));
+ }
+
+ public:
+ LenType lenM1;
+ Type arrayZ[HB_VAR_ARRAY];
+ public:
+ DEFINE_SIZE_ARRAY (sizeof (LenType), arrayZ);
+};
+
+/* An array with sorted elements. Supports binary searching. */
+template <typename Type, typename LenType=HBUINT16>
+struct SortedArrayOf : ArrayOf<Type, LenType>
+{
+ hb_sorted_array_t< Type> as_array () { return hb_sorted_array (this->arrayZ, this->len); }
+ hb_sorted_array_t<const Type> as_array () const { return hb_sorted_array (this->arrayZ, this->len); }
+
+ /* Iterator. */
+ typedef hb_sorted_array_t<const Type> iter_t;
+ typedef hb_sorted_array_t< Type> writer_t;
+ iter_t iter () const { return as_array (); }
+ writer_t writer () { return as_array (); }
+ operator iter_t () const { return iter (); }
+ operator writer_t () { return writer (); }
+
+ hb_sorted_array_t<const Type> sub_array (unsigned int start_offset, unsigned int count) const
+ { return as_array ().sub_array (start_offset, count); }
+ hb_sorted_array_t<const Type> sub_array (unsigned int start_offset, unsigned int *count = nullptr /* IN/OUT */) const
+ { return as_array ().sub_array (start_offset, count); }
+ hb_sorted_array_t<Type> sub_array (unsigned int start_offset, unsigned int count)
+ { return as_array ().sub_array (start_offset, count); }
+ hb_sorted_array_t<Type> sub_array (unsigned int start_offset, unsigned int *count = nullptr /* IN/OUT */)
+ { return as_array ().sub_array (start_offset, count); }
+
+ bool serialize (hb_serialize_context_t *c, unsigned int items_len)
+ {
+ TRACE_SERIALIZE (this);
+ bool ret = ArrayOf<Type, LenType>::serialize (c, items_len);
+ return_trace (ret);
+ }
+ template <typename Iterator,
+ hb_requires (hb_is_sorted_source_of (Iterator, Type))>
+ bool serialize (hb_serialize_context_t *c, Iterator items)
+ {
+ TRACE_SERIALIZE (this);
+ bool ret = ArrayOf<Type, LenType>::serialize (c, items);
+ return_trace (ret);
+ }
+
+ template <typename T>
+ Type &bsearch (const T &x, Type &not_found = Crap (Type))
+ { return *as_array ().bsearch (x, &not_found); }
+ template <typename T>
+ const Type &bsearch (const T &x, const Type &not_found = Null (Type)) const
+ { return *as_array ().bsearch (x, &not_found); }
+ template <typename T>
+ bool bfind (const T &x, unsigned int *i = nullptr,
+ hb_bfind_not_found_t not_found = HB_BFIND_NOT_FOUND_DONT_STORE,
+ unsigned int to_store = (unsigned int) -1) const
+ { return as_array ().bfind (x, i, not_found, to_store); }
+};
+
+/*
+ * Binary-search arrays
+ */
+
+template <typename LenType=HBUINT16>
+struct BinSearchHeader
+{
+ operator uint32_t () const { return len; }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ BinSearchHeader& operator = (unsigned int v)
+ {
+ len = v;
+ assert (len == v);
+ entrySelector = hb_max (1u, hb_bit_storage (v)) - 1;
+ searchRange = 16 * (1u << entrySelector);
+ rangeShift = v * 16 > searchRange
+ ? 16 * v - searchRange
+ : 0;
+ return *this;
+ }
+
+ protected:
+ LenType len;
+ LenType searchRange;
+ LenType entrySelector;
+ LenType rangeShift;
+
+ public:
+ DEFINE_SIZE_STATIC (8);
+};
+
+template <typename Type, typename LenType=HBUINT16>
+using BinSearchArrayOf = SortedArrayOf<Type, BinSearchHeader<LenType>>;
+
+
+struct VarSizedBinSearchHeader
+{
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ HBUINT16 unitSize; /* Size of a lookup unit for this search in bytes. */
+ HBUINT16 nUnits; /* Number of units of the preceding size to be searched. */
+ HBUINT16 searchRange; /* The value of unitSize times the largest power of 2
+ * that is less than or equal to the value of nUnits. */
+ HBUINT16 entrySelector; /* The log base 2 of the largest power of 2 less than
+ * or equal to the value of nUnits. */
+ HBUINT16 rangeShift; /* The value of unitSize times the difference of the
+ * value of nUnits minus the largest power of 2 less
+ * than or equal to the value of nUnits. */
+ public:
+ DEFINE_SIZE_STATIC (10);
+};
+
+template <typename Type>
+struct VarSizedBinSearchArrayOf
+{
+ static constexpr unsigned item_size = Type::static_size;
+
+ HB_DELETE_CREATE_COPY_ASSIGN (VarSizedBinSearchArrayOf);
+
+ bool last_is_terminator () const
+ {
+ if (unlikely (!header.nUnits)) return false;
+
+ /* Gah.
+ *
+ * "The number of termination values that need to be included is table-specific.
+ * The value that indicates binary search termination is 0xFFFF." */
+ const HBUINT16 *words = &StructAtOffset<HBUINT16> (&bytesZ, (header.nUnits - 1) * header.unitSize);
+ unsigned int count = Type::TerminationWordCount;
+ for (unsigned int i = 0; i < count; i++)
+ if (words[i] != 0xFFFFu)
+ return false;
+ return true;
+ }
+
+ const Type& operator [] (int i_) const
+ {
+ unsigned int i = (unsigned int) i_;
+ if (unlikely (i >= get_length ())) return Null (Type);
+ return StructAtOffset<Type> (&bytesZ, i * header.unitSize);
+ }
+ Type& operator [] (int i_)
+ {
+ unsigned int i = (unsigned int) i_;
+ if (unlikely (i >= get_length ())) return Crap (Type);
+ return StructAtOffset<Type> (&bytesZ, i * header.unitSize);
+ }
+ unsigned int get_length () const
+ { return header.nUnits - last_is_terminator (); }
+ unsigned int get_size () const
+ { return header.static_size + header.nUnits * header.unitSize; }
+
+ template <typename ...Ts>
+ bool sanitize (hb_sanitize_context_t *c, Ts&&... ds) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!sanitize_shallow (c))) return_trace (false);
+ if (!sizeof... (Ts) && hb_is_trivially_copyable (Type)) return_trace (true);
+ unsigned int count = get_length ();
+ for (unsigned int i = 0; i < count; i++)
+ if (unlikely (!(*this)[i].sanitize (c, hb_forward<Ts> (ds)...)))
+ return_trace (false);
+ return_trace (true);
+ }
+
+ template <typename T>
+ const Type *bsearch (const T &key) const
+ {
+ unsigned pos;
+ return hb_bsearch_impl (&pos,
+ key,
+ (const void *) bytesZ,
+ get_length (),
+ header.unitSize,
+ _hb_cmp_method<T, Type>)
+ ? (const Type *) (((const char *) &bytesZ) + (pos * header.unitSize))
+ : nullptr;
+ }
+
+ private:
+ bool sanitize_shallow (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (header.sanitize (c) &&
+ Type::static_size <= header.unitSize &&
+ c->check_range (bytesZ.arrayZ,
+ header.nUnits,
+ header.unitSize));
+ }
+
+ protected:
+ VarSizedBinSearchHeader header;
+ UnsizedArrayOf<HBUINT8> bytesZ;
+ public:
+ DEFINE_SIZE_ARRAY (10, bytesZ);
+};
+
+
+} /* namespace OT */
+
+
+#endif /* HB_OPEN_TYPE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-cff-common.hh b/thirdparty/harfbuzz/src/hb-ot-cff-common.hh
new file mode 100644
index 0000000000..e5286cd792
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-cff-common.hh
@@ -0,0 +1,622 @@
+/*
+ * Copyright © 2018 Adobe Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Adobe Author(s): Michiharu Ariza
+ */
+#ifndef HB_OT_CFF_COMMON_HH
+#define HB_OT_CFF_COMMON_HH
+
+#include "hb-open-type.hh"
+#include "hb-bimap.hh"
+#include "hb-ot-layout-common.hh"
+#include "hb-cff-interp-dict-common.hh"
+#include "hb-subset-plan.hh"
+
+namespace CFF {
+
+using namespace OT;
+
+#define CFF_UNDEF_CODE 0xFFFFFFFF
+
+using objidx_t = hb_serialize_context_t::objidx_t;
+using whence_t = hb_serialize_context_t::whence_t;
+
+/* utility macro */
+template<typename Type>
+static inline const Type& StructAtOffsetOrNull (const void *P, unsigned int offset)
+{ return offset ? StructAtOffset<Type> (P, offset) : Null (Type); }
+
+inline unsigned int calcOffSize (unsigned int dataSize)
+{
+ unsigned int size = 1;
+ unsigned int offset = dataSize + 1;
+ while (offset & ~0xFF)
+ {
+ size++;
+ offset >>= 8;
+ }
+ /* format does not support size > 4; caller should handle it as an error */
+ return size;
+}
+
+struct code_pair_t
+{
+ hb_codepoint_t code;
+ hb_codepoint_t glyph;
+};
+
+typedef hb_vector_t<unsigned char> str_buff_t;
+struct str_buff_vec_t : hb_vector_t<str_buff_t>
+{
+ void fini () { SUPER::fini_deep (); }
+
+ unsigned int total_size () const
+ {
+ unsigned int size = 0;
+ for (unsigned int i = 0; i < length; i++)
+ size += (*this)[i].length;
+ return size;
+ }
+
+ private:
+ typedef hb_vector_t<str_buff_t> SUPER;
+};
+
+/* CFF INDEX */
+template <typename COUNT>
+struct CFFIndex
+{
+ static unsigned int calculate_offset_array_size (unsigned int offSize, unsigned int count)
+ { return offSize * (count + 1); }
+
+ unsigned int offset_array_size () const
+ { return calculate_offset_array_size (offSize, count); }
+
+ CFFIndex *copy (hb_serialize_context_t *c) const
+ {
+ TRACE_SERIALIZE (this);
+ unsigned int size = get_size ();
+ CFFIndex *out = c->allocate_size<CFFIndex> (size);
+ if (likely (out))
+ memcpy (out, this, size);
+ return_trace (out);
+ }
+
+ bool serialize (hb_serialize_context_t *c, const CFFIndex &src)
+ {
+ TRACE_SERIALIZE (this);
+ unsigned int size = src.get_size ();
+ CFFIndex *dest = c->allocate_size<CFFIndex> (size);
+ if (unlikely (!dest)) return_trace (false);
+ memcpy (dest, &src, size);
+ return_trace (true);
+ }
+
+ bool serialize (hb_serialize_context_t *c,
+ unsigned int offSize_,
+ const byte_str_array_t &byteArray)
+ {
+ TRACE_SERIALIZE (this);
+ if (byteArray.length == 0)
+ {
+ COUNT *dest = c->allocate_min<COUNT> ();
+ if (unlikely (!dest)) return_trace (false);
+ *dest = 0;
+ }
+ else
+ {
+ /* serialize CFFIndex header */
+ if (unlikely (!c->extend_min (*this))) return_trace (false);
+ this->count = byteArray.length;
+ this->offSize = offSize_;
+ if (unlikely (!c->allocate_size<HBUINT8> (offSize_ * (byteArray.length + 1))))
+ return_trace (false);
+
+ /* serialize indices */
+ unsigned int offset = 1;
+ unsigned int i = 0;
+ for (; i < byteArray.length; i++)
+ {
+ set_offset_at (i, offset);
+ offset += byteArray[i].get_size ();
+ }
+ set_offset_at (i, offset);
+
+ /* serialize data */
+ for (unsigned int i = 0; i < byteArray.length; i++)
+ {
+ const byte_str_t &bs = byteArray[i];
+ unsigned char *dest = c->allocate_size<unsigned char> (bs.length);
+ if (unlikely (!dest)) return_trace (false);
+ memcpy (dest, &bs[0], bs.length);
+ }
+ }
+ return_trace (true);
+ }
+
+ bool serialize (hb_serialize_context_t *c,
+ unsigned int offSize_,
+ const str_buff_vec_t &buffArray)
+ {
+ byte_str_array_t byteArray;
+ byteArray.init ();
+ byteArray.resize (buffArray.length);
+ for (unsigned int i = 0; i < byteArray.length; i++)
+ byteArray[i] = byte_str_t (buffArray[i].arrayZ, buffArray[i].length);
+ bool result = this->serialize (c, offSize_, byteArray);
+ byteArray.fini ();
+ return result;
+ }
+
+ template <typename Iterator,
+ hb_requires (hb_is_iterator (Iterator))>
+ bool serialize (hb_serialize_context_t *c,
+ Iterator it)
+ {
+ TRACE_SERIALIZE (this);
+ if (it.len () == 0)
+ {
+ COUNT *dest = c->allocate_min<COUNT> ();
+ if (unlikely (!dest)) return_trace (false);
+ *dest = 0;
+ }
+ else
+ {
+ serialize_header(c, + it | hb_map ([] (const byte_str_t &_) { return _.length; }));
+ for (const byte_str_t &_ : +it)
+ _.copy (c);
+ }
+ return_trace (true);
+ }
+
+ bool serialize (hb_serialize_context_t *c,
+ const byte_str_array_t &byteArray)
+ { return serialize (c, + hb_iter (byteArray)); }
+
+ bool serialize (hb_serialize_context_t *c,
+ const str_buff_vec_t &buffArray)
+ {
+ auto it =
+ + hb_iter (buffArray)
+ | hb_map ([] (const str_buff_t &_) { return byte_str_t (_.arrayZ, _.length); })
+ ;
+ return serialize (c, it);
+ }
+
+ template <typename Iterator,
+ hb_requires (hb_is_iterator (Iterator))>
+ bool serialize_header (hb_serialize_context_t *c,
+ Iterator it)
+ {
+ TRACE_SERIALIZE (this);
+
+ unsigned total = + it | hb_reduce (hb_add, 0);
+ unsigned off_size = calcOffSize (total);
+
+ /* serialize CFFIndex header */
+ if (unlikely (!c->extend_min (*this))) return_trace (false);
+ this->count = it.len ();
+ this->offSize = off_size;
+ if (unlikely (!c->allocate_size<HBUINT8> (off_size * (it.len () + 1))))
+ return_trace (false);
+
+ /* serialize indices */
+ unsigned int offset = 1;
+ unsigned int i = 0;
+ for (unsigned _ : +it)
+ {
+ CFFIndex<COUNT>::set_offset_at (i++, offset);
+ offset += _;
+ }
+ CFFIndex<COUNT>::set_offset_at (i, offset);
+
+ return_trace (true);
+ }
+
+ void set_offset_at (unsigned int index, unsigned int offset)
+ {
+ HBUINT8 *p = offsets + offSize * index + offSize;
+ unsigned int size = offSize;
+ for (; size; size--)
+ {
+ --p;
+ *p = offset & 0xFF;
+ offset >>= 8;
+ }
+ }
+
+ unsigned int offset_at (unsigned int index) const
+ {
+ assert (index <= count);
+ const HBUINT8 *p = offsets + offSize * index;
+ unsigned int size = offSize;
+ unsigned int offset = 0;
+ for (; size; size--)
+ offset = (offset << 8) + *p++;
+ return offset;
+ }
+
+ unsigned int length_at (unsigned int index) const
+ {
+ if (unlikely ((offset_at (index + 1) < offset_at (index)) ||
+ (offset_at (index + 1) > offset_at (count))))
+ return 0;
+ return offset_at (index + 1) - offset_at (index);
+ }
+
+ const unsigned char *data_base () const
+ { return (const unsigned char *) this + min_size + offset_array_size (); }
+
+ unsigned int data_size () const { return HBINT8::static_size; }
+
+ byte_str_t operator [] (unsigned int index) const
+ {
+ if (unlikely (index >= count)) return Null (byte_str_t);
+ return byte_str_t (data_base () + offset_at (index) - 1, length_at (index));
+ }
+
+ unsigned int get_size () const
+ {
+ if (this == &Null (CFFIndex)) return 0;
+ if (count > 0)
+ return min_size + offset_array_size () + (offset_at (count) - 1);
+ return count.static_size; /* empty CFFIndex contains count only */
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely ((c->check_struct (this) && count == 0) || /* empty INDEX */
+ (c->check_struct (this) && offSize >= 1 && offSize <= 4 &&
+ c->check_array (offsets, offSize, count + 1) &&
+ c->check_array ((const HBUINT8*) data_base (), 1, max_offset () - 1))));
+ }
+
+ protected:
+ unsigned int max_offset () const
+ {
+ unsigned int max = 0;
+ for (unsigned int i = 0; i < count + 1u; i++)
+ {
+ unsigned int off = offset_at (i);
+ if (off > max) max = off;
+ }
+ return max;
+ }
+
+ public:
+ COUNT count; /* Number of object data. Note there are (count+1) offsets */
+ HBUINT8 offSize; /* The byte size of each offset in the offsets array. */
+ HBUINT8 offsets[HB_VAR_ARRAY];
+ /* The array of (count + 1) offsets into objects array (1-base). */
+ /* HBUINT8 data[HB_VAR_ARRAY]; Object data */
+ public:
+ DEFINE_SIZE_ARRAY (COUNT::static_size + HBUINT8::static_size, offsets);
+};
+
+template <typename COUNT, typename TYPE>
+struct CFFIndexOf : CFFIndex<COUNT>
+{
+ const byte_str_t operator [] (unsigned int index) const
+ {
+ if (likely (index < CFFIndex<COUNT>::count))
+ return byte_str_t (CFFIndex<COUNT>::data_base () + CFFIndex<COUNT>::offset_at (index) - 1, CFFIndex<COUNT>::length_at (index));
+ return Null (byte_str_t);
+ }
+
+ template <typename DATA, typename PARAM1, typename PARAM2>
+ bool serialize (hb_serialize_context_t *c,
+ unsigned int offSize_,
+ const DATA *dataArray,
+ unsigned int dataArrayLen,
+ const hb_vector_t<unsigned int> &dataSizeArray,
+ const PARAM1 &param1,
+ const PARAM2 &param2)
+ {
+ TRACE_SERIALIZE (this);
+ /* serialize CFFIndex header */
+ if (unlikely (!c->extend_min (*this))) return_trace (false);
+ this->count = dataArrayLen;
+ this->offSize = offSize_;
+ if (unlikely (!c->allocate_size<HBUINT8> (offSize_ * (dataArrayLen + 1))))
+ return_trace (false);
+
+ /* serialize indices */
+ unsigned int offset = 1;
+ unsigned int i = 0;
+ for (; i < dataArrayLen; i++)
+ {
+ CFFIndex<COUNT>::set_offset_at (i, offset);
+ offset += dataSizeArray[i];
+ }
+ CFFIndex<COUNT>::set_offset_at (i, offset);
+
+ /* serialize data */
+ for (unsigned int i = 0; i < dataArrayLen; i++)
+ {
+ TYPE *dest = c->start_embed<TYPE> ();
+ if (unlikely (!dest || !dest->serialize (c, dataArray[i], param1, param2)))
+ return_trace (false);
+ }
+ return_trace (true);
+ }
+};
+
+/* Top Dict, Font Dict, Private Dict */
+struct Dict : UnsizedByteStr
+{
+ template <typename DICTVAL, typename OP_SERIALIZER, typename ...Ts>
+ bool serialize (hb_serialize_context_t *c,
+ const DICTVAL &dictval,
+ OP_SERIALIZER& opszr,
+ Ts&&... ds)
+ {
+ TRACE_SERIALIZE (this);
+ for (unsigned int i = 0; i < dictval.get_count (); i++)
+ if (unlikely (!opszr.serialize (c, dictval[i], hb_forward<Ts> (ds)...)))
+ return_trace (false);
+
+ return_trace (true);
+ }
+
+ template <typename T, typename V>
+ static bool serialize_int_op (hb_serialize_context_t *c, op_code_t op, V value, op_code_t intOp)
+ {
+ // XXX: not sure why but LLVM fails to compile the following 'unlikely' macro invocation
+ if (/*unlikely*/ (!serialize_int<T, V> (c, intOp, value)))
+ return false;
+
+ TRACE_SERIALIZE (this);
+ /* serialize the opcode */
+ HBUINT8 *p = c->allocate_size<HBUINT8> (OpCode_Size (op));
+ if (unlikely (!p)) return_trace (false);
+ if (Is_OpCode_ESC (op))
+ {
+ *p = OpCode_escape;
+ op = Unmake_OpCode_ESC (op);
+ p++;
+ }
+ *p = op;
+ return_trace (true);
+ }
+
+ template <typename V>
+ static bool serialize_int4_op (hb_serialize_context_t *c, op_code_t op, V value)
+ { return serialize_int_op<HBINT32> (c, op, value, OpCode_longintdict); }
+
+ template <typename V>
+ static bool serialize_int2_op (hb_serialize_context_t *c, op_code_t op, V value)
+ { return serialize_int_op<HBINT16> (c, op, value, OpCode_shortint); }
+
+ template <typename T, int int_op>
+ static bool serialize_link_op (hb_serialize_context_t *c, op_code_t op, objidx_t link, whence_t whence)
+ {
+ T &ofs = *(T *) (c->head + OpCode_Size (int_op));
+ if (unlikely (!serialize_int_op<T> (c, op, 0, int_op))) return false;
+ c->add_link (ofs, link, whence);
+ return true;
+ }
+
+ static bool serialize_link4_op (hb_serialize_context_t *c, op_code_t op, objidx_t link, whence_t whence = whence_t::Head)
+ { return serialize_link_op<HBINT32, OpCode_longintdict> (c, op, link, whence); }
+
+ static bool serialize_link2_op (hb_serialize_context_t *c, op_code_t op, objidx_t link, whence_t whence = whence_t::Head)
+ { return serialize_link_op<HBINT16, OpCode_shortint> (c, op, link, whence); }
+};
+
+struct TopDict : Dict {};
+struct FontDict : Dict {};
+struct PrivateDict : Dict {};
+
+struct table_info_t
+{
+ void init () { offset = size = 0; link = 0; }
+
+ unsigned int offset;
+ unsigned int size;
+ objidx_t link;
+};
+
+template <typename COUNT>
+struct FDArray : CFFIndexOf<COUNT, FontDict>
+{
+ template <typename DICTVAL, typename INFO, typename Iterator, typename OP_SERIALIZER>
+ bool serialize (hb_serialize_context_t *c,
+ Iterator it,
+ OP_SERIALIZER& opszr)
+ {
+ TRACE_SERIALIZE (this);
+
+ /* serialize INDEX data */
+ hb_vector_t<unsigned> sizes;
+ c->push ();
+ + it
+ | hb_map ([&] (const hb_pair_t<const DICTVAL&, const INFO&> &_)
+ {
+ FontDict *dict = c->start_embed<FontDict> ();
+ dict->serialize (c, _.first, opszr, _.second);
+ return c->head - (const char*)dict;
+ })
+ | hb_sink (sizes)
+ ;
+ c->pop_pack (false);
+
+ /* serialize INDEX header */
+ return_trace (CFFIndex<COUNT>::serialize_header (c, hb_iter (sizes)));
+ }
+};
+
+/* FDSelect */
+struct FDSelect0 {
+ bool sanitize (hb_sanitize_context_t *c, unsigned int fdcount) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!(c->check_struct (this))))
+ return_trace (false);
+ for (unsigned int i = 0; i < c->get_num_glyphs (); i++)
+ if (unlikely (!fds[i].sanitize (c)))
+ return_trace (false);
+
+ return_trace (true);
+ }
+
+ hb_codepoint_t get_fd (hb_codepoint_t glyph) const
+ { return (hb_codepoint_t) fds[glyph]; }
+
+ unsigned int get_size (unsigned int num_glyphs) const
+ { return HBUINT8::static_size * num_glyphs; }
+
+ HBUINT8 fds[HB_VAR_ARRAY];
+
+ DEFINE_SIZE_MIN (0);
+};
+
+template <typename GID_TYPE, typename FD_TYPE>
+struct FDSelect3_4_Range
+{
+ bool sanitize (hb_sanitize_context_t *c, const void * /*nullptr*/, unsigned int fdcount) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (first < c->get_num_glyphs () && (fd < fdcount));
+ }
+
+ GID_TYPE first;
+ FD_TYPE fd;
+ public:
+ DEFINE_SIZE_STATIC (GID_TYPE::static_size + FD_TYPE::static_size);
+};
+
+template <typename GID_TYPE, typename FD_TYPE>
+struct FDSelect3_4
+{
+ unsigned int get_size () const
+ { return GID_TYPE::static_size * 2 + ranges.get_size (); }
+
+ bool sanitize (hb_sanitize_context_t *c, unsigned int fdcount) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!c->check_struct (this) || !ranges.sanitize (c, nullptr, fdcount) ||
+ (nRanges () == 0) || ranges[0].first != 0))
+ return_trace (false);
+
+ for (unsigned int i = 1; i < nRanges (); i++)
+ if (unlikely (ranges[i - 1].first >= ranges[i].first))
+ return_trace (false);
+
+ if (unlikely (!sentinel().sanitize (c) || (sentinel() != c->get_num_glyphs ())))
+ return_trace (false);
+
+ return_trace (true);
+ }
+
+ hb_codepoint_t get_fd (hb_codepoint_t glyph) const
+ {
+ unsigned int i;
+ for (i = 1; i < nRanges (); i++)
+ if (glyph < ranges[i].first)
+ break;
+
+ return (hb_codepoint_t) ranges[i - 1].fd;
+ }
+
+ GID_TYPE &nRanges () { return ranges.len; }
+ GID_TYPE nRanges () const { return ranges.len; }
+ GID_TYPE &sentinel () { return StructAfter<GID_TYPE> (ranges[nRanges () - 1]); }
+ const GID_TYPE &sentinel () const { return StructAfter<GID_TYPE> (ranges[nRanges () - 1]); }
+
+ ArrayOf<FDSelect3_4_Range<GID_TYPE, FD_TYPE>, GID_TYPE> ranges;
+ /* GID_TYPE sentinel */
+
+ DEFINE_SIZE_ARRAY (GID_TYPE::static_size, ranges);
+};
+
+typedef FDSelect3_4<HBUINT16, HBUINT8> FDSelect3;
+typedef FDSelect3_4_Range<HBUINT16, HBUINT8> FDSelect3_Range;
+
+struct FDSelect
+{
+ bool serialize (hb_serialize_context_t *c, const FDSelect &src, unsigned int num_glyphs)
+ {
+ TRACE_SERIALIZE (this);
+ unsigned int size = src.get_size (num_glyphs);
+ FDSelect *dest = c->allocate_size<FDSelect> (size);
+ if (unlikely (!dest)) return_trace (false);
+ memcpy (dest, &src, size);
+ return_trace (true);
+ }
+
+ unsigned int get_size (unsigned int num_glyphs) const
+ {
+ switch (format)
+ {
+ case 0: return format.static_size + u.format0.get_size (num_glyphs);
+ case 3: return format.static_size + u.format3.get_size ();
+ default:return 0;
+ }
+ }
+
+ hb_codepoint_t get_fd (hb_codepoint_t glyph) const
+ {
+ if (this == &Null (FDSelect)) return 0;
+
+ switch (format)
+ {
+ case 0: return u.format0.get_fd (glyph);
+ case 3: return u.format3.get_fd (glyph);
+ default:return 0;
+ }
+ }
+
+ bool sanitize (hb_sanitize_context_t *c, unsigned int fdcount) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!c->check_struct (this)))
+ return_trace (false);
+
+ switch (format)
+ {
+ case 0: return_trace (u.format0.sanitize (c, fdcount));
+ case 3: return_trace (u.format3.sanitize (c, fdcount));
+ default:return_trace (false);
+ }
+ }
+
+ HBUINT8 format;
+ union {
+ FDSelect0 format0;
+ FDSelect3 format3;
+ } u;
+ public:
+ DEFINE_SIZE_MIN (1);
+};
+
+template <typename COUNT>
+struct Subrs : CFFIndex<COUNT>
+{
+ typedef COUNT count_type;
+ typedef CFFIndex<COUNT> SUPER;
+};
+
+} /* namespace CFF */
+
+#endif /* HB_OT_CFF_COMMON_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-cff1-std-str.hh b/thirdparty/harfbuzz/src/hb-ot-cff1-std-str.hh
new file mode 100644
index 0000000000..65d56ae18b
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-cff1-std-str.hh
@@ -0,0 +1,425 @@
+/*
+ * Copyright © 2019 Adobe, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Adobe Author(s): Michiharu Ariza
+ */
+
+#ifndef HB_OT_CFF1_STD_STR_HH
+#if 0 /* Make checks happy. */
+#define HB_OT_CFF1_STD_STR_HH
+#include "hb.hh"
+#endif
+
+_S(".notdef")
+_S("space")
+_S("exclam")
+_S("quotedbl")
+_S("numbersign")
+_S("dollar")
+_S("percent")
+_S("ampersand")
+_S("quoteright")
+_S("parenleft")
+_S("parenright")
+_S("asterisk")
+_S("plus")
+_S("comma")
+_S("hyphen")
+_S("period")
+_S("slash")
+_S("zero")
+_S("one")
+_S("two")
+_S("three")
+_S("four")
+_S("five")
+_S("six")
+_S("seven")
+_S("eight")
+_S("nine")
+_S("colon")
+_S("semicolon")
+_S("less")
+_S("equal")
+_S("greater")
+_S("question")
+_S("at")
+_S("A")
+_S("B")
+_S("C")
+_S("D")
+_S("E")
+_S("F")
+_S("G")
+_S("H")
+_S("I")
+_S("J")
+_S("K")
+_S("L")
+_S("M")
+_S("N")
+_S("O")
+_S("P")
+_S("Q")
+_S("R")
+_S("S")
+_S("T")
+_S("U")
+_S("V")
+_S("W")
+_S("X")
+_S("Y")
+_S("Z")
+_S("bracketleft")
+_S("backslash")
+_S("bracketright")
+_S("asciicircum")
+_S("underscore")
+_S("quoteleft")
+_S("a")
+_S("b")
+_S("c")
+_S("d")
+_S("e")
+_S("f")
+_S("g")
+_S("h")
+_S("i")
+_S("j")
+_S("k")
+_S("l")
+_S("m")
+_S("n")
+_S("o")
+_S("p")
+_S("q")
+_S("r")
+_S("s")
+_S("t")
+_S("u")
+_S("v")
+_S("w")
+_S("x")
+_S("y")
+_S("z")
+_S("braceleft")
+_S("bar")
+_S("braceright")
+_S("asciitilde")
+_S("exclamdown")
+_S("cent")
+_S("sterling")
+_S("fraction")
+_S("yen")
+_S("florin")
+_S("section")
+_S("currency")
+_S("quotesingle")
+_S("quotedblleft")
+_S("guillemotleft")
+_S("guilsinglleft")
+_S("guilsinglright")
+_S("fi")
+_S("fl")
+_S("endash")
+_S("dagger")
+_S("daggerdbl")
+_S("periodcentered")
+_S("paragraph")
+_S("bullet")
+_S("quotesinglbase")
+_S("quotedblbase")
+_S("quotedblright")
+_S("guillemotright")
+_S("ellipsis")
+_S("perthousand")
+_S("questiondown")
+_S("grave")
+_S("acute")
+_S("circumflex")
+_S("tilde")
+_S("macron")
+_S("breve")
+_S("dotaccent")
+_S("dieresis")
+_S("ring")
+_S("cedilla")
+_S("hungarumlaut")
+_S("ogonek")
+_S("caron")
+_S("emdash")
+_S("AE")
+_S("ordfeminine")
+_S("Lslash")
+_S("Oslash")
+_S("OE")
+_S("ordmasculine")
+_S("ae")
+_S("dotlessi")
+_S("lslash")
+_S("oslash")
+_S("oe")
+_S("germandbls")
+_S("onesuperior")
+_S("logicalnot")
+_S("mu")
+_S("trademark")
+_S("Eth")
+_S("onehalf")
+_S("plusminus")
+_S("Thorn")
+_S("onequarter")
+_S("divide")
+_S("brokenbar")
+_S("degree")
+_S("thorn")
+_S("threequarters")
+_S("twosuperior")
+_S("registered")
+_S("minus")
+_S("eth")
+_S("multiply")
+_S("threesuperior")
+_S("copyright")
+_S("Aacute")
+_S("Acircumflex")
+_S("Adieresis")
+_S("Agrave")
+_S("Aring")
+_S("Atilde")
+_S("Ccedilla")
+_S("Eacute")
+_S("Ecircumflex")
+_S("Edieresis")
+_S("Egrave")
+_S("Iacute")
+_S("Icircumflex")
+_S("Idieresis")
+_S("Igrave")
+_S("Ntilde")
+_S("Oacute")
+_S("Ocircumflex")
+_S("Odieresis")
+_S("Ograve")
+_S("Otilde")
+_S("Scaron")
+_S("Uacute")
+_S("Ucircumflex")
+_S("Udieresis")
+_S("Ugrave")
+_S("Yacute")
+_S("Ydieresis")
+_S("Zcaron")
+_S("aacute")
+_S("acircumflex")
+_S("adieresis")
+_S("agrave")
+_S("aring")
+_S("atilde")
+_S("ccedilla")
+_S("eacute")
+_S("ecircumflex")
+_S("edieresis")
+_S("egrave")
+_S("iacute")
+_S("icircumflex")
+_S("idieresis")
+_S("igrave")
+_S("ntilde")
+_S("oacute")
+_S("ocircumflex")
+_S("odieresis")
+_S("ograve")
+_S("otilde")
+_S("scaron")
+_S("uacute")
+_S("ucircumflex")
+_S("udieresis")
+_S("ugrave")
+_S("yacute")
+_S("ydieresis")
+_S("zcaron")
+_S("exclamsmall")
+_S("Hungarumlautsmall")
+_S("dollaroldstyle")
+_S("dollarsuperior")
+_S("ampersandsmall")
+_S("Acutesmall")
+_S("parenleftsuperior")
+_S("parenrightsuperior")
+_S("twodotenleader")
+_S("onedotenleader")
+_S("zerooldstyle")
+_S("oneoldstyle")
+_S("twooldstyle")
+_S("threeoldstyle")
+_S("fouroldstyle")
+_S("fiveoldstyle")
+_S("sixoldstyle")
+_S("sevenoldstyle")
+_S("eightoldstyle")
+_S("nineoldstyle")
+_S("commasuperior")
+_S("threequartersemdash")
+_S("periodsuperior")
+_S("questionsmall")
+_S("asuperior")
+_S("bsuperior")
+_S("centsuperior")
+_S("dsuperior")
+_S("esuperior")
+_S("isuperior")
+_S("lsuperior")
+_S("msuperior")
+_S("nsuperior")
+_S("osuperior")
+_S("rsuperior")
+_S("ssuperior")
+_S("tsuperior")
+_S("ff")
+_S("ffi")
+_S("ffl")
+_S("parenleftinferior")
+_S("parenrightinferior")
+_S("Circumflexsmall")
+_S("hyphensuperior")
+_S("Gravesmall")
+_S("Asmall")
+_S("Bsmall")
+_S("Csmall")
+_S("Dsmall")
+_S("Esmall")
+_S("Fsmall")
+_S("Gsmall")
+_S("Hsmall")
+_S("Ismall")
+_S("Jsmall")
+_S("Ksmall")
+_S("Lsmall")
+_S("Msmall")
+_S("Nsmall")
+_S("Osmall")
+_S("Psmall")
+_S("Qsmall")
+_S("Rsmall")
+_S("Ssmall")
+_S("Tsmall")
+_S("Usmall")
+_S("Vsmall")
+_S("Wsmall")
+_S("Xsmall")
+_S("Ysmall")
+_S("Zsmall")
+_S("colonmonetary")
+_S("onefitted")
+_S("rupiah")
+_S("Tildesmall")
+_S("exclamdownsmall")
+_S("centoldstyle")
+_S("Lslashsmall")
+_S("Scaronsmall")
+_S("Zcaronsmall")
+_S("Dieresissmall")
+_S("Brevesmall")
+_S("Caronsmall")
+_S("Dotaccentsmall")
+_S("Macronsmall")
+_S("figuredash")
+_S("hypheninferior")
+_S("Ogoneksmall")
+_S("Ringsmall")
+_S("Cedillasmall")
+_S("questiondownsmall")
+_S("oneeighth")
+_S("threeeighths")
+_S("fiveeighths")
+_S("seveneighths")
+_S("onethird")
+_S("twothirds")
+_S("zerosuperior")
+_S("foursuperior")
+_S("fivesuperior")
+_S("sixsuperior")
+_S("sevensuperior")
+_S("eightsuperior")
+_S("ninesuperior")
+_S("zeroinferior")
+_S("oneinferior")
+_S("twoinferior")
+_S("threeinferior")
+_S("fourinferior")
+_S("fiveinferior")
+_S("sixinferior")
+_S("seveninferior")
+_S("eightinferior")
+_S("nineinferior")
+_S("centinferior")
+_S("dollarinferior")
+_S("periodinferior")
+_S("commainferior")
+_S("Agravesmall")
+_S("Aacutesmall")
+_S("Acircumflexsmall")
+_S("Atildesmall")
+_S("Adieresissmall")
+_S("Aringsmall")
+_S("AEsmall")
+_S("Ccedillasmall")
+_S("Egravesmall")
+_S("Eacutesmall")
+_S("Ecircumflexsmall")
+_S("Edieresissmall")
+_S("Igravesmall")
+_S("Iacutesmall")
+_S("Icircumflexsmall")
+_S("Idieresissmall")
+_S("Ethsmall")
+_S("Ntildesmall")
+_S("Ogravesmall")
+_S("Oacutesmall")
+_S("Ocircumflexsmall")
+_S("Otildesmall")
+_S("Odieresissmall")
+_S("OEsmall")
+_S("Oslashsmall")
+_S("Ugravesmall")
+_S("Uacutesmall")
+_S("Ucircumflexsmall")
+_S("Udieresissmall")
+_S("Yacutesmall")
+_S("Thornsmall")
+_S("Ydieresissmall")
+_S("001.000")
+_S("001.001")
+_S("001.002")
+_S("001.003")
+_S("Black")
+_S("Bold")
+_S("Book")
+_S("Light")
+_S("Medium")
+_S("Regular")
+_S("Roman")
+_S("Semibold")
+
+#endif /* HB_OT_CFF1_STD_STR_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-cff1-table.cc b/thirdparty/harfbuzz/src/hb-ot-cff1-table.cc
new file mode 100644
index 0000000000..66b9c8c907
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-cff1-table.cc
@@ -0,0 +1,620 @@
+/*
+ * Copyright © 2018 Adobe Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Adobe Author(s): Michiharu Ariza
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_CFF
+
+#include "hb-draw.hh"
+#include "hb-algs.hh"
+#include "hb-ot-cff1-table.hh"
+#include "hb-cff1-interp-cs.hh"
+
+using namespace CFF;
+
+struct sid_to_gid_t
+{
+ uint16_t sid;
+ uint8_t gid;
+
+ int cmp (uint16_t a) const
+ {
+ if (a == sid) return 0;
+ return (a < sid) ? -1 : 1;
+ }
+};
+
+/* SID to code */
+static const uint8_t standard_encoding_to_code [] =
+{
+ 0, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
+ 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
+ 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78,
+ 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94,
+ 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
+ 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
+ 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 177,
+ 178, 179, 180, 182, 183, 184, 185, 186, 187, 188, 189, 191, 193, 194, 195, 196,
+ 197, 198, 199, 200, 202, 203, 205, 206, 207, 208, 225, 227, 232, 233, 234, 235,
+ 241, 245, 248, 249, 250, 251
+};
+
+/* SID to code */
+static const uint8_t expert_encoding_to_code [] =
+{
+ 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, 45, 46,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 59, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 87, 88, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 201, 0, 0, 0, 0, 189, 0, 0, 188, 0,
+ 0, 0, 0, 190, 202, 0, 0, 0, 0, 203, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 33, 34, 36, 37, 38, 39, 40, 41, 42, 43, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56, 57, 60, 61, 62, 63, 65, 66, 67,
+ 68, 69, 73, 76, 77, 78, 79, 82, 83, 84, 86, 89, 90, 91, 93, 94,
+ 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
+ 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
+ 161, 162, 163, 166, 167, 168, 169, 170, 172, 175, 178, 179, 182, 183, 184, 191,
+ 192, 193, 194, 195, 196, 197, 200, 204, 205, 206, 207, 208, 209, 210, 211, 212,
+ 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228,
+ 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244,
+ 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
+};
+
+/* glyph ID to SID */
+static const uint16_t expert_charset_to_sid [] =
+{
+ 0, 1, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 13, 14, 15, 99,
+ 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 27, 28, 249, 250, 251, 252,
+ 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 109, 110,
+ 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282,
+ 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298,
+ 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314,
+ 315, 316, 317, 318, 158, 155, 163, 319, 320, 321, 322, 323, 324, 325, 326, 150,
+ 164, 169, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340,
+ 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356,
+ 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372,
+ 373, 374, 375, 376, 377, 378
+};
+
+/* glyph ID to SID */
+static const uint16_t expert_subset_charset_to_sid [] =
+{
+ 0, 1, 231, 232, 235, 236, 237, 238, 13, 14, 15, 99, 239, 240, 241, 242,
+ 243, 244, 245, 246, 247, 248, 27, 28, 249, 250, 251, 253, 254, 255, 256, 257,
+ 258, 259, 260, 261, 262, 263, 264, 265, 266, 109, 110, 267, 268, 269, 270, 272,
+ 300, 301, 302, 305, 314, 315, 158, 155, 163, 320, 321, 322, 323, 324, 325, 326,
+ 150, 164, 169, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339,
+ 340, 341, 342, 343, 344, 345, 346
+};
+
+/* SID to glyph ID */
+static const sid_to_gid_t expert_charset_sid_to_gid [] =
+{
+ { 1, 1 }, { 13, 12 }, { 14, 13 }, { 15, 14 },
+ { 27, 26 }, { 28, 27 }, { 99, 15 }, { 109, 46 },
+ { 110, 47 }, { 150, 111 }, { 155, 101 }, { 158, 100 },
+ { 163, 102 }, { 164, 112 }, { 169, 113 }, { 229, 2 },
+ { 230, 3 }, { 231, 4 }, { 232, 5 }, { 233, 6 },
+ { 234, 7 }, { 235, 8 }, { 236, 9 }, { 237, 10 },
+ { 238, 11 }, { 239, 16 }, { 240, 17 }, { 241, 18 },
+ { 242, 19 }, { 243, 20 }, { 244, 21 }, { 245, 22 },
+ { 246, 23 }, { 247, 24 }, { 248, 25 }, { 249, 28 },
+ { 250, 29 }, { 251, 30 }, { 252, 31 }, { 253, 32 },
+ { 254, 33 }, { 255, 34 }, { 256, 35 }, { 257, 36 },
+ { 258, 37 }, { 259, 38 }, { 260, 39 }, { 261, 40 },
+ { 262, 41 }, { 263, 42 }, { 264, 43 }, { 265, 44 },
+ { 266, 45 }, { 267, 48 }, { 268, 49 }, { 269, 50 },
+ { 270, 51 }, { 271, 52 }, { 272, 53 }, { 273, 54 },
+ { 274, 55 }, { 275, 56 }, { 276, 57 }, { 277, 58 },
+ { 278, 59 }, { 279, 60 }, { 280, 61 }, { 281, 62 },
+ { 282, 63 }, { 283, 64 }, { 284, 65 }, { 285, 66 },
+ { 286, 67 }, { 287, 68 }, { 288, 69 }, { 289, 70 },
+ { 290, 71 }, { 291, 72 }, { 292, 73 }, { 293, 74 },
+ { 294, 75 }, { 295, 76 }, { 296, 77 }, { 297, 78 },
+ { 298, 79 }, { 299, 80 }, { 300, 81 }, { 301, 82 },
+ { 302, 83 }, { 303, 84 }, { 304, 85 }, { 305, 86 },
+ { 306, 87 }, { 307, 88 }, { 308, 89 }, { 309, 90 },
+ { 310, 91 }, { 311, 92 }, { 312, 93 }, { 313, 94 },
+ { 314, 95 }, { 315, 96 }, { 316, 97 }, { 317, 98 },
+ { 318, 99 }, { 319, 103 }, { 320, 104 }, { 321, 105 },
+ { 322, 106 }, { 323, 107 }, { 324, 108 }, { 325, 109 },
+ { 326, 110 }, { 327, 114 }, { 328, 115 }, { 329, 116 },
+ { 330, 117 }, { 331, 118 }, { 332, 119 }, { 333, 120 },
+ { 334, 121 }, { 335, 122 }, { 336, 123 }, { 337, 124 },
+ { 338, 125 }, { 339, 126 }, { 340, 127 }, { 341, 128 },
+ { 342, 129 }, { 343, 130 }, { 344, 131 }, { 345, 132 },
+ { 346, 133 }, { 347, 134 }, { 348, 135 }, { 349, 136 },
+ { 350, 137 }, { 351, 138 }, { 352, 139 }, { 353, 140 },
+ { 354, 141 }, { 355, 142 }, { 356, 143 }, { 357, 144 },
+ { 358, 145 }, { 359, 146 }, { 360, 147 }, { 361, 148 },
+ { 362, 149 }, { 363, 150 }, { 364, 151 }, { 365, 152 },
+ { 366, 153 }, { 367, 154 }, { 368, 155 }, { 369, 156 },
+ { 370, 157 }, { 371, 158 }, { 372, 159 }, { 373, 160 },
+ { 374, 161 }, { 375, 162 }, { 376, 163 }, { 377, 164 },
+ { 378, 165 }
+};
+
+/* SID to glyph ID */
+static const sid_to_gid_t expert_subset_charset_sid_to_gid [] =
+{
+ { 1, 1 }, { 13, 8 }, { 14, 9 }, { 15, 10 },
+ { 27, 22 }, { 28, 23 }, { 99, 11 }, { 109, 41 },
+ { 110, 42 }, { 150, 64 }, { 155, 55 }, { 158, 54 },
+ { 163, 56 }, { 164, 65 }, { 169, 66 }, { 231, 2 },
+ { 232, 3 }, { 235, 4 }, { 236, 5 }, { 237, 6 },
+ { 238, 7 }, { 239, 12 }, { 240, 13 }, { 241, 14 },
+ { 242, 15 }, { 243, 16 }, { 244, 17 }, { 245, 18 },
+ { 246, 19 }, { 247, 20 }, { 248, 21 }, { 249, 24 },
+ { 250, 25 }, { 251, 26 }, { 253, 27 }, { 254, 28 },
+ { 255, 29 }, { 256, 30 }, { 257, 31 }, { 258, 32 },
+ { 259, 33 }, { 260, 34 }, { 261, 35 }, { 262, 36 },
+ { 263, 37 }, { 264, 38 }, { 265, 39 }, { 266, 40 },
+ { 267, 43 }, { 268, 44 }, { 269, 45 }, { 270, 46 },
+ { 272, 47 }, { 300, 48 }, { 301, 49 }, { 302, 50 },
+ { 305, 51 }, { 314, 52 }, { 315, 53 }, { 320, 57 },
+ { 321, 58 }, { 322, 59 }, { 323, 60 }, { 324, 61 },
+ { 325, 62 }, { 326, 63 }, { 327, 67 }, { 328, 68 },
+ { 329, 69 }, { 330, 70 }, { 331, 71 }, { 332, 72 },
+ { 333, 73 }, { 334, 74 }, { 335, 75 }, { 336, 76 },
+ { 337, 77 }, { 338, 78 }, { 339, 79 }, { 340, 80 },
+ { 341, 81 }, { 342, 82 }, { 343, 83 }, { 344, 84 },
+ { 345, 85 }, { 346, 86 }
+};
+
+/* code to SID */
+static const uint8_t standard_encoding_to_sid [] =
+{
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+ 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
+ 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
+ 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
+ 0, 111, 112, 113, 114, 0, 115, 116, 117, 118, 119, 120, 121, 122, 0, 123,
+ 0, 124, 125, 126, 127, 128, 129, 130, 131, 0, 132, 133, 0, 134, 135, 136,
+ 137, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 138, 0, 139, 0, 0, 0, 0, 140, 141, 142, 143, 0, 0, 0, 0,
+ 0, 144, 0, 0, 0, 145, 0, 0, 146, 147, 148, 149, 0, 0, 0, 0
+};
+
+hb_codepoint_t OT::cff1::lookup_standard_encoding_for_code (hb_codepoint_t sid)
+{
+ if (sid < ARRAY_LENGTH (standard_encoding_to_code))
+ return (hb_codepoint_t)standard_encoding_to_code[sid];
+ else
+ return 0;
+}
+
+hb_codepoint_t OT::cff1::lookup_expert_encoding_for_code (hb_codepoint_t sid)
+{
+ if (sid < ARRAY_LENGTH (expert_encoding_to_code))
+ return (hb_codepoint_t)expert_encoding_to_code[sid];
+ else
+ return 0;
+}
+
+hb_codepoint_t OT::cff1::lookup_expert_charset_for_sid (hb_codepoint_t glyph)
+{
+ if (glyph < ARRAY_LENGTH (expert_charset_to_sid))
+ return (hb_codepoint_t)expert_charset_to_sid[glyph];
+ else
+ return 0;
+}
+
+hb_codepoint_t OT::cff1::lookup_expert_subset_charset_for_sid (hb_codepoint_t glyph)
+{
+ if (glyph < ARRAY_LENGTH (expert_subset_charset_to_sid))
+ return (hb_codepoint_t)expert_subset_charset_to_sid[glyph];
+ else
+ return 0;
+}
+
+hb_codepoint_t OT::cff1::lookup_expert_charset_for_glyph (hb_codepoint_t sid)
+{
+ const auto *pair = hb_sorted_array (expert_charset_sid_to_gid).bsearch (sid);
+ return pair ? pair->gid : 0;
+}
+
+hb_codepoint_t OT::cff1::lookup_expert_subset_charset_for_glyph (hb_codepoint_t sid)
+{
+ const auto *pair = hb_sorted_array (expert_subset_charset_sid_to_gid).bsearch (sid);
+ return pair ? pair->gid : 0;
+}
+
+hb_codepoint_t OT::cff1::lookup_standard_encoding_for_sid (hb_codepoint_t code)
+{
+ if (code < ARRAY_LENGTH (standard_encoding_to_sid))
+ return (hb_codepoint_t)standard_encoding_to_sid[code];
+ else
+ return CFF_UNDEF_SID;
+}
+
+struct bounds_t
+{
+ void init ()
+ {
+ min.set_int (INT_MAX, INT_MAX);
+ max.set_int (INT_MIN, INT_MIN);
+ }
+
+ void update (const point_t &pt)
+ {
+ if (pt.x < min.x) min.x = pt.x;
+ if (pt.x > max.x) max.x = pt.x;
+ if (pt.y < min.y) min.y = pt.y;
+ if (pt.y > max.y) max.y = pt.y;
+ }
+
+ void merge (const bounds_t &b)
+ {
+ if (empty ())
+ *this = b;
+ else if (!b.empty ())
+ {
+ if (b.min.x < min.x) min.x = b.min.x;
+ if (b.max.x > max.x) max.x = b.max.x;
+ if (b.min.y < min.y) min.y = b.min.y;
+ if (b.max.y > max.y) max.y = b.max.y;
+ }
+ }
+
+ void offset (const point_t &delta)
+ {
+ if (!empty ())
+ {
+ min.move (delta);
+ max.move (delta);
+ }
+ }
+
+ bool empty () const { return (min.x >= max.x) || (min.y >= max.y); }
+
+ point_t min;
+ point_t max;
+};
+
+struct cff1_extents_param_t
+{
+ void init (const OT::cff1::accelerator_t *_cff)
+ {
+ path_open = false;
+ cff = _cff;
+ bounds.init ();
+ }
+
+ void start_path () { path_open = true; }
+ void end_path () { path_open = false; }
+ bool is_path_open () const { return path_open; }
+
+ bool path_open;
+ bounds_t bounds;
+
+ const OT::cff1::accelerator_t *cff;
+};
+
+struct cff1_path_procs_extents_t : path_procs_t<cff1_path_procs_extents_t, cff1_cs_interp_env_t, cff1_extents_param_t>
+{
+ static void moveto (cff1_cs_interp_env_t &env, cff1_extents_param_t& param, const point_t &pt)
+ {
+ param.end_path ();
+ env.moveto (pt);
+ }
+
+ static void line (cff1_cs_interp_env_t &env, cff1_extents_param_t& param, const point_t &pt1)
+ {
+ if (!param.is_path_open ())
+ {
+ param.start_path ();
+ param.bounds.update (env.get_pt ());
+ }
+ env.moveto (pt1);
+ param.bounds.update (env.get_pt ());
+ }
+
+ static void curve (cff1_cs_interp_env_t &env, cff1_extents_param_t& param, const point_t &pt1, const point_t &pt2, const point_t &pt3)
+ {
+ if (!param.is_path_open ())
+ {
+ param.start_path ();
+ param.bounds.update (env.get_pt ());
+ }
+ /* include control points */
+ param.bounds.update (pt1);
+ param.bounds.update (pt2);
+ env.moveto (pt3);
+ param.bounds.update (env.get_pt ());
+ }
+};
+
+static bool _get_bounds (const OT::cff1::accelerator_t *cff, hb_codepoint_t glyph, bounds_t &bounds, bool in_seac=false);
+
+struct cff1_cs_opset_extents_t : cff1_cs_opset_t<cff1_cs_opset_extents_t, cff1_extents_param_t, cff1_path_procs_extents_t>
+{
+ static void process_seac (cff1_cs_interp_env_t &env, cff1_extents_param_t& param)
+ {
+ unsigned int n = env.argStack.get_count ();
+ point_t delta;
+ delta.x = env.argStack[n-4];
+ delta.y = env.argStack[n-3];
+ hb_codepoint_t base = param.cff->std_code_to_glyph (env.argStack[n-2].to_int ());
+ hb_codepoint_t accent = param.cff->std_code_to_glyph (env.argStack[n-1].to_int ());
+
+ bounds_t base_bounds, accent_bounds;
+ if (likely (!env.in_seac && base && accent
+ && _get_bounds (param.cff, base, base_bounds, true)
+ && _get_bounds (param.cff, accent, accent_bounds, true)))
+ {
+ param.bounds.merge (base_bounds);
+ accent_bounds.offset (delta);
+ param.bounds.merge (accent_bounds);
+ }
+ else
+ env.set_error ();
+ }
+};
+
+bool _get_bounds (const OT::cff1::accelerator_t *cff, hb_codepoint_t glyph, bounds_t &bounds, bool in_seac)
+{
+ bounds.init ();
+ if (unlikely (!cff->is_valid () || (glyph >= cff->num_glyphs))) return false;
+
+ unsigned int fd = cff->fdSelect->get_fd (glyph);
+ cff1_cs_interpreter_t<cff1_cs_opset_extents_t, cff1_extents_param_t> interp;
+ const byte_str_t str = (*cff->charStrings)[glyph];
+ interp.env.init (str, *cff, fd);
+ interp.env.set_in_seac (in_seac);
+ cff1_extents_param_t param;
+ param.init (cff);
+ if (unlikely (!interp.interpret (param))) return false;
+ bounds = param.bounds;
+ return true;
+}
+
+bool OT::cff1::accelerator_t::get_extents (hb_font_t *font, hb_codepoint_t glyph, hb_glyph_extents_t *extents) const
+{
+#ifdef HB_NO_OT_FONT_CFF
+ /* XXX Remove check when this code moves to .hh file. */
+ return true;
+#endif
+
+ bounds_t bounds;
+
+ if (!_get_bounds (this, glyph, bounds))
+ return false;
+
+ if (bounds.min.x >= bounds.max.x)
+ {
+ extents->width = 0;
+ extents->x_bearing = 0;
+ }
+ else
+ {
+ extents->x_bearing = font->em_scalef_x (bounds.min.x.to_real ());
+ extents->width = font->em_scalef_x (bounds.max.x.to_real () - bounds.min.x.to_real ());
+ }
+ if (bounds.min.y >= bounds.max.y)
+ {
+ extents->height = 0;
+ extents->y_bearing = 0;
+ }
+ else
+ {
+ extents->y_bearing = font->em_scalef_y (bounds.max.y.to_real ());
+ extents->height = font->em_scalef_y (bounds.min.y.to_real () - bounds.max.y.to_real ());
+ }
+
+ return true;
+}
+
+#ifdef HB_EXPERIMENTAL_API
+struct cff1_path_param_t
+{
+ cff1_path_param_t (const OT::cff1::accelerator_t *cff_, hb_font_t *font_,
+ draw_helper_t &draw_helper_, point_t *delta_)
+ {
+ draw_helper = &draw_helper_;
+ cff = cff_;
+ font = font_;
+ delta = delta_;
+ }
+
+ void move_to (const point_t &p)
+ {
+ point_t point = p;
+ if (delta) point.move (*delta);
+ draw_helper->move_to (font->em_scalef_x (point.x.to_real ()), font->em_scalef_y (point.y.to_real ()));
+ }
+
+ void line_to (const point_t &p)
+ {
+ point_t point = p;
+ if (delta) point.move (*delta);
+ draw_helper->line_to (font->em_scalef_x (point.x.to_real ()), font->em_scalef_y (point.y.to_real ()));
+ }
+
+ void cubic_to (const point_t &p1, const point_t &p2, const point_t &p3)
+ {
+ point_t point1 = p1, point2 = p2, point3 = p3;
+ if (delta)
+ {
+ point1.move (*delta);
+ point2.move (*delta);
+ point3.move (*delta);
+ }
+ draw_helper->cubic_to (font->em_scalef_x (point1.x.to_real ()), font->em_scalef_y (point1.y.to_real ()),
+ font->em_scalef_x (point2.x.to_real ()), font->em_scalef_y (point2.y.to_real ()),
+ font->em_scalef_x (point3.x.to_real ()), font->em_scalef_y (point3.y.to_real ()));
+ }
+
+ void end_path () { draw_helper->end_path (); }
+
+ hb_font_t *font;
+ draw_helper_t *draw_helper;
+ point_t *delta;
+
+ const OT::cff1::accelerator_t *cff;
+};
+
+struct cff1_path_procs_path_t : path_procs_t<cff1_path_procs_path_t, cff1_cs_interp_env_t, cff1_path_param_t>
+{
+ static void moveto (cff1_cs_interp_env_t &env, cff1_path_param_t& param, const point_t &pt)
+ {
+ param.move_to (pt);
+ env.moveto (pt);
+ }
+
+ static void line (cff1_cs_interp_env_t &env, cff1_path_param_t &param, const point_t &pt1)
+ {
+ param.line_to (pt1);
+ env.moveto (pt1);
+ }
+
+ static void curve (cff1_cs_interp_env_t &env, cff1_path_param_t &param, const point_t &pt1, const point_t &pt2, const point_t &pt3)
+ {
+ param.cubic_to (pt1, pt2, pt3);
+ env.moveto (pt3);
+ }
+};
+
+static bool _get_path (const OT::cff1::accelerator_t *cff, hb_font_t *font, hb_codepoint_t glyph,
+ draw_helper_t &draw_helper, bool in_seac = false, point_t *delta = nullptr);
+
+struct cff1_cs_opset_path_t : cff1_cs_opset_t<cff1_cs_opset_path_t, cff1_path_param_t, cff1_path_procs_path_t>
+{
+ static void process_seac (cff1_cs_interp_env_t &env, cff1_path_param_t& param)
+ {
+ /* End previous path */
+ param.end_path ();
+
+ unsigned int n = env.argStack.get_count ();
+ point_t delta;
+ delta.x = env.argStack[n-4];
+ delta.y = env.argStack[n-3];
+ hb_codepoint_t base = param.cff->std_code_to_glyph (env.argStack[n-2].to_int ());
+ hb_codepoint_t accent = param.cff->std_code_to_glyph (env.argStack[n-1].to_int ());
+
+ if (unlikely (!(!env.in_seac && base && accent
+ && _get_path (param.cff, param.font, base, *param.draw_helper, true)
+ && _get_path (param.cff, param.font, accent, *param.draw_helper, true, &delta))))
+ env.set_error ();
+ }
+};
+
+bool _get_path (const OT::cff1::accelerator_t *cff, hb_font_t *font, hb_codepoint_t glyph,
+ draw_helper_t &draw_helper, bool in_seac, point_t *delta)
+{
+ if (unlikely (!cff->is_valid () || (glyph >= cff->num_glyphs))) return false;
+
+ unsigned int fd = cff->fdSelect->get_fd (glyph);
+ cff1_cs_interpreter_t<cff1_cs_opset_path_t, cff1_path_param_t> interp;
+ const byte_str_t str = (*cff->charStrings)[glyph];
+ interp.env.init (str, *cff, fd);
+ interp.env.set_in_seac (in_seac);
+ cff1_path_param_t param (cff, font, draw_helper, delta);
+ if (unlikely (!interp.interpret (param))) return false;
+
+ /* Let's end the path specially since it is called inside seac also */
+ param.end_path ();
+
+ return true;
+}
+
+bool OT::cff1::accelerator_t::get_path (hb_font_t *font, hb_codepoint_t glyph, draw_helper_t &draw_helper) const
+{
+#ifdef HB_NO_OT_FONT_CFF
+ /* XXX Remove check when this code moves to .hh file. */
+ return true;
+#endif
+
+ return _get_path (this, font, glyph, draw_helper);
+}
+#endif
+
+struct get_seac_param_t
+{
+ void init (const OT::cff1::accelerator_t *_cff)
+ {
+ cff = _cff;
+ base = 0;
+ accent = 0;
+ }
+
+ bool has_seac () const { return base && accent; }
+
+ const OT::cff1::accelerator_t *cff;
+ hb_codepoint_t base;
+ hb_codepoint_t accent;
+};
+
+struct cff1_cs_opset_seac_t : cff1_cs_opset_t<cff1_cs_opset_seac_t, get_seac_param_t>
+{
+ static void process_seac (cff1_cs_interp_env_t &env, get_seac_param_t& param)
+ {
+ unsigned int n = env.argStack.get_count ();
+ hb_codepoint_t base_char = (hb_codepoint_t)env.argStack[n-2].to_int ();
+ hb_codepoint_t accent_char = (hb_codepoint_t)env.argStack[n-1].to_int ();
+
+ param.base = param.cff->std_code_to_glyph (base_char);
+ param.accent = param.cff->std_code_to_glyph (accent_char);
+ }
+};
+
+bool OT::cff1::accelerator_t::get_seac_components (hb_codepoint_t glyph, hb_codepoint_t *base, hb_codepoint_t *accent) const
+{
+ if (unlikely (!is_valid () || (glyph >= num_glyphs))) return false;
+
+ unsigned int fd = fdSelect->get_fd (glyph);
+ cff1_cs_interpreter_t<cff1_cs_opset_seac_t, get_seac_param_t> interp;
+ const byte_str_t str = (*charStrings)[glyph];
+ interp.env.init (str, *this, fd);
+ get_seac_param_t param;
+ param.init (this);
+ if (unlikely (!interp.interpret (param))) return false;
+
+ if (param.has_seac ())
+ {
+ *base = param.base;
+ *accent = param.accent;
+ return true;
+ }
+ return false;
+}
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-ot-cff1-table.hh b/thirdparty/harfbuzz/src/hb-ot-cff1-table.hh
new file mode 100644
index 0000000000..7228f77727
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-cff1-table.hh
@@ -0,0 +1,1403 @@
+/*
+ * Copyright © 2018 Adobe Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Adobe Author(s): Michiharu Ariza
+ */
+
+#ifndef HB_OT_CFF1_TABLE_HH
+#define HB_OT_CFF1_TABLE_HH
+
+#include "hb-ot-cff-common.hh"
+#include "hb-subset-cff1.hh"
+#include "hb-draw.hh"
+
+#define HB_STRING_ARRAY_NAME cff1_std_strings
+#define HB_STRING_ARRAY_LIST "hb-ot-cff1-std-str.hh"
+#include "hb-string-array.hh"
+#undef HB_STRING_ARRAY_LIST
+#undef HB_STRING_ARRAY_NAME
+
+namespace CFF {
+
+/*
+ * CFF -- Compact Font Format (CFF)
+ * https://www.adobe.com/content/dam/acom/en/devnet/font/pdfs/5176.CFF.pdf
+ */
+#define HB_OT_TAG_cff1 HB_TAG('C','F','F',' ')
+
+#define CFF_UNDEF_SID CFF_UNDEF_CODE
+
+enum EncodingID { StandardEncoding = 0, ExpertEncoding = 1 };
+enum CharsetID { ISOAdobeCharset = 0, ExpertCharset = 1, ExpertSubsetCharset = 2 };
+
+typedef CFFIndex<HBUINT16> CFF1Index;
+template <typename Type> struct CFF1IndexOf : CFFIndexOf<HBUINT16, Type> {};
+
+typedef CFFIndex<HBUINT16> CFF1Index;
+typedef CFF1Index CFF1CharStrings;
+typedef Subrs<HBUINT16> CFF1Subrs;
+
+struct CFF1FDSelect : FDSelect {};
+
+/* Encoding */
+struct Encoding0 {
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (codes.sanitize (c));
+ }
+
+ hb_codepoint_t get_code (hb_codepoint_t glyph) const
+ {
+ assert (glyph > 0);
+ glyph--;
+ if (glyph < nCodes ())
+ {
+ return (hb_codepoint_t)codes[glyph];
+ }
+ else
+ return CFF_UNDEF_CODE;
+ }
+
+ HBUINT8 &nCodes () { return codes.len; }
+ HBUINT8 nCodes () const { return codes.len; }
+
+ ArrayOf<HBUINT8, HBUINT8> codes;
+
+ DEFINE_SIZE_ARRAY_SIZED (1, codes);
+};
+
+struct Encoding1_Range {
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ HBUINT8 first;
+ HBUINT8 nLeft;
+
+ DEFINE_SIZE_STATIC (2);
+};
+
+struct Encoding1 {
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (ranges.sanitize (c));
+ }
+
+ hb_codepoint_t get_code (hb_codepoint_t glyph) const
+ {
+ assert (glyph > 0);
+ glyph--;
+ for (unsigned int i = 0; i < nRanges (); i++)
+ {
+ if (glyph <= ranges[i].nLeft)
+ {
+ hb_codepoint_t code = (hb_codepoint_t) ranges[i].first + glyph;
+ return (likely (code < 0x100) ? code: CFF_UNDEF_CODE);
+ }
+ glyph -= (ranges[i].nLeft + 1);
+ }
+ return CFF_UNDEF_CODE;
+ }
+
+ HBUINT8 &nRanges () { return ranges.len; }
+ HBUINT8 nRanges () const { return ranges.len; }
+
+ ArrayOf<Encoding1_Range, HBUINT8> ranges;
+
+ DEFINE_SIZE_ARRAY_SIZED (1, ranges);
+};
+
+struct SuppEncoding {
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ HBUINT8 code;
+ HBUINT16 glyph;
+
+ DEFINE_SIZE_STATIC (3);
+};
+
+struct CFF1SuppEncData {
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (supps.sanitize (c));
+ }
+
+ void get_codes (hb_codepoint_t sid, hb_vector_t<hb_codepoint_t> &codes) const
+ {
+ for (unsigned int i = 0; i < nSups (); i++)
+ if (sid == supps[i].glyph)
+ codes.push (supps[i].code);
+ }
+
+ HBUINT8 &nSups () { return supps.len; }
+ HBUINT8 nSups () const { return supps.len; }
+
+ ArrayOf<SuppEncoding, HBUINT8> supps;
+
+ DEFINE_SIZE_ARRAY_SIZED (1, supps);
+};
+
+struct Encoding
+{
+ /* serialize a fullset Encoding */
+ bool serialize (hb_serialize_context_t *c, const Encoding &src)
+ {
+ TRACE_SERIALIZE (this);
+ unsigned int size = src.get_size ();
+ Encoding *dest = c->allocate_size<Encoding> (size);
+ if (unlikely (!dest)) return_trace (false);
+ memcpy (dest, &src, size);
+ return_trace (true);
+ }
+
+ /* serialize a subset Encoding */
+ bool serialize (hb_serialize_context_t *c,
+ uint8_t format,
+ unsigned int enc_count,
+ const hb_vector_t<code_pair_t>& code_ranges,
+ const hb_vector_t<code_pair_t>& supp_codes)
+ {
+ TRACE_SERIALIZE (this);
+ Encoding *dest = c->extend_min (*this);
+ if (unlikely (!dest)) return_trace (false);
+ dest->format = format | ((supp_codes.length > 0) ? 0x80 : 0);
+ switch (format) {
+ case 0:
+ {
+ Encoding0 *fmt0 = c->allocate_size<Encoding0> (Encoding0::min_size + HBUINT8::static_size * enc_count);
+ if (unlikely (!fmt0)) return_trace (false);
+ fmt0->nCodes () = enc_count;
+ unsigned int glyph = 0;
+ for (unsigned int i = 0; i < code_ranges.length; i++)
+ {
+ hb_codepoint_t code = code_ranges[i].code;
+ for (int left = (int)code_ranges[i].glyph; left >= 0; left--)
+ fmt0->codes[glyph++] = code++;
+ if (unlikely (!((glyph <= 0x100) && (code <= 0x100))))
+ return_trace (false);
+ }
+ }
+ break;
+
+ case 1:
+ {
+ Encoding1 *fmt1 = c->allocate_size<Encoding1> (Encoding1::min_size + Encoding1_Range::static_size * code_ranges.length);
+ if (unlikely (!fmt1)) return_trace (false);
+ fmt1->nRanges () = code_ranges.length;
+ for (unsigned int i = 0; i < code_ranges.length; i++)
+ {
+ if (unlikely (!((code_ranges[i].code <= 0xFF) && (code_ranges[i].glyph <= 0xFF))))
+ return_trace (false);
+ fmt1->ranges[i].first = code_ranges[i].code;
+ fmt1->ranges[i].nLeft = code_ranges[i].glyph;
+ }
+ }
+ break;
+
+ }
+
+ if (supp_codes.length)
+ {
+ CFF1SuppEncData *suppData = c->allocate_size<CFF1SuppEncData> (CFF1SuppEncData::min_size + SuppEncoding::static_size * supp_codes.length);
+ if (unlikely (!suppData)) return_trace (false);
+ suppData->nSups () = supp_codes.length;
+ for (unsigned int i = 0; i < supp_codes.length; i++)
+ {
+ suppData->supps[i].code = supp_codes[i].code;
+ suppData->supps[i].glyph = supp_codes[i].glyph; /* actually SID */
+ }
+ }
+
+ return_trace (true);
+ }
+
+ unsigned int get_size () const
+ {
+ unsigned int size = min_size;
+ switch (table_format ())
+ {
+ case 0: size += u.format0.get_size (); break;
+ case 1: size += u.format1.get_size (); break;
+ }
+ if (has_supplement ())
+ size += suppEncData ().get_size ();
+ return size;
+ }
+
+ hb_codepoint_t get_code (hb_codepoint_t glyph) const
+ {
+ switch (table_format ())
+ {
+ case 0: return u.format0.get_code (glyph);
+ case 1: return u.format1.get_code (glyph);
+ default:return 0;
+ }
+ }
+
+ uint8_t table_format () const { return format & 0x7F; }
+ bool has_supplement () const { return format & 0x80; }
+
+ void get_supplement_codes (hb_codepoint_t sid, hb_vector_t<hb_codepoint_t> &codes) const
+ {
+ codes.resize (0);
+ if (has_supplement ())
+ suppEncData().get_codes (sid, codes);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!c->check_struct (this)))
+ return_trace (false);
+
+ switch (table_format ())
+ {
+ case 0: if (unlikely (!u.format0.sanitize (c))) { return_trace (false); } break;
+ case 1: if (unlikely (!u.format1.sanitize (c))) { return_trace (false); } break;
+ default:return_trace (false);
+ }
+ return_trace (likely (!has_supplement () || suppEncData ().sanitize (c)));
+ }
+
+ protected:
+ const CFF1SuppEncData &suppEncData () const
+ {
+ switch (table_format ())
+ {
+ case 0: return StructAfter<CFF1SuppEncData> (u.format0.codes[u.format0.nCodes ()-1]);
+ case 1: return StructAfter<CFF1SuppEncData> (u.format1.ranges[u.format1.nRanges ()-1]);
+ default:return Null (CFF1SuppEncData);
+ }
+ }
+
+ public:
+ HBUINT8 format;
+ union {
+ Encoding0 format0;
+ Encoding1 format1;
+ } u;
+ /* CFF1SuppEncData suppEncData; */
+
+ DEFINE_SIZE_MIN (1);
+};
+
+/* Charset */
+struct Charset0 {
+ bool sanitize (hb_sanitize_context_t *c, unsigned int num_glyphs) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) && sids[num_glyphs - 1].sanitize (c));
+ }
+
+ hb_codepoint_t get_sid (hb_codepoint_t glyph) const
+ {
+ if (glyph == 0)
+ return 0;
+ else
+ return sids[glyph - 1];
+ }
+
+ hb_codepoint_t get_glyph (hb_codepoint_t sid, unsigned int num_glyphs) const
+ {
+ if (sid == 0)
+ return 0;
+
+ for (unsigned int glyph = 1; glyph < num_glyphs; glyph++)
+ {
+ if (sids[glyph-1] == sid)
+ return glyph;
+ }
+ return 0;
+ }
+
+ unsigned int get_size (unsigned int num_glyphs) const
+ {
+ assert (num_glyphs > 0);
+ return HBUINT16::static_size * (num_glyphs - 1);
+ }
+
+ HBUINT16 sids[HB_VAR_ARRAY];
+
+ DEFINE_SIZE_ARRAY(0, sids);
+};
+
+template <typename TYPE>
+struct Charset_Range {
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ HBUINT16 first;
+ TYPE nLeft;
+
+ DEFINE_SIZE_STATIC (HBUINT16::static_size + TYPE::static_size);
+};
+
+template <typename TYPE>
+struct Charset1_2 {
+ bool sanitize (hb_sanitize_context_t *c, unsigned int num_glyphs) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!c->check_struct (this)))
+ return_trace (false);
+ num_glyphs--;
+ for (unsigned int i = 0; num_glyphs > 0; i++)
+ {
+ if (unlikely (!ranges[i].sanitize (c) || (num_glyphs < ranges[i].nLeft + 1)))
+ return_trace (false);
+ num_glyphs -= (ranges[i].nLeft + 1);
+ }
+ return_trace (true);
+ }
+
+ hb_codepoint_t get_sid (hb_codepoint_t glyph) const
+ {
+ if (glyph == 0) return 0;
+ glyph--;
+ for (unsigned int i = 0;; i++)
+ {
+ if (glyph <= ranges[i].nLeft)
+ return (hb_codepoint_t)ranges[i].first + glyph;
+ glyph -= (ranges[i].nLeft + 1);
+ }
+
+ return 0;
+ }
+
+ hb_codepoint_t get_glyph (hb_codepoint_t sid, unsigned int num_glyphs) const
+ {
+ if (sid == 0) return 0;
+ hb_codepoint_t glyph = 1;
+ for (unsigned int i = 0;; i++)
+ {
+ if (glyph >= num_glyphs)
+ return 0;
+ if ((ranges[i].first <= sid) && (sid <= ranges[i].first + ranges[i].nLeft))
+ return glyph + (sid - ranges[i].first);
+ glyph += (ranges[i].nLeft + 1);
+ }
+
+ return 0;
+ }
+
+ unsigned int get_size (unsigned int num_glyphs) const
+ {
+ unsigned int size = HBUINT8::static_size;
+ int glyph = (int)num_glyphs;
+
+ assert (glyph > 0);
+ glyph--;
+ for (unsigned int i = 0; glyph > 0; i++)
+ {
+ glyph -= (ranges[i].nLeft + 1);
+ size += Charset_Range<TYPE>::static_size;
+ }
+
+ return size;
+ }
+
+ Charset_Range<TYPE> ranges[HB_VAR_ARRAY];
+
+ DEFINE_SIZE_ARRAY (0, ranges);
+};
+
+typedef Charset1_2<HBUINT8> Charset1;
+typedef Charset1_2<HBUINT16> Charset2;
+typedef Charset_Range<HBUINT8> Charset1_Range;
+typedef Charset_Range<HBUINT16> Charset2_Range;
+
+struct Charset
+{
+ /* serialize a fullset Charset */
+ bool serialize (hb_serialize_context_t *c, const Charset &src, unsigned int num_glyphs)
+ {
+ TRACE_SERIALIZE (this);
+ unsigned int size = src.get_size (num_glyphs);
+ Charset *dest = c->allocate_size<Charset> (size);
+ if (unlikely (!dest)) return_trace (false);
+ memcpy (dest, &src, size);
+ return_trace (true);
+ }
+
+ /* serialize a subset Charset */
+ bool serialize (hb_serialize_context_t *c,
+ uint8_t format,
+ unsigned int num_glyphs,
+ const hb_vector_t<code_pair_t>& sid_ranges)
+ {
+ TRACE_SERIALIZE (this);
+ Charset *dest = c->extend_min (*this);
+ if (unlikely (!dest)) return_trace (false);
+ dest->format = format;
+ switch (format)
+ {
+ case 0:
+ {
+ Charset0 *fmt0 = c->allocate_size<Charset0> (Charset0::min_size + HBUINT16::static_size * (num_glyphs - 1));
+ if (unlikely (!fmt0)) return_trace (false);
+ unsigned int glyph = 0;
+ for (unsigned int i = 0; i < sid_ranges.length; i++)
+ {
+ hb_codepoint_t sid = sid_ranges[i].code;
+ for (int left = (int)sid_ranges[i].glyph; left >= 0; left--)
+ fmt0->sids[glyph++] = sid++;
+ }
+ }
+ break;
+
+ case 1:
+ {
+ Charset1 *fmt1 = c->allocate_size<Charset1> (Charset1::min_size + Charset1_Range::static_size * sid_ranges.length);
+ if (unlikely (!fmt1)) return_trace (false);
+ for (unsigned int i = 0; i < sid_ranges.length; i++)
+ {
+ if (unlikely (!(sid_ranges[i].glyph <= 0xFF)))
+ return_trace (false);
+ fmt1->ranges[i].first = sid_ranges[i].code;
+ fmt1->ranges[i].nLeft = sid_ranges[i].glyph;
+ }
+ }
+ break;
+
+ case 2:
+ {
+ Charset2 *fmt2 = c->allocate_size<Charset2> (Charset2::min_size + Charset2_Range::static_size * sid_ranges.length);
+ if (unlikely (!fmt2)) return_trace (false);
+ for (unsigned int i = 0; i < sid_ranges.length; i++)
+ {
+ if (unlikely (!(sid_ranges[i].glyph <= 0xFFFF)))
+ return_trace (false);
+ fmt2->ranges[i].first = sid_ranges[i].code;
+ fmt2->ranges[i].nLeft = sid_ranges[i].glyph;
+ }
+ }
+ break;
+
+ }
+ return_trace (true);
+ }
+
+ unsigned int get_size (unsigned int num_glyphs) const
+ {
+ switch (format)
+ {
+ case 0: return min_size + u.format0.get_size (num_glyphs);
+ case 1: return min_size + u.format1.get_size (num_glyphs);
+ case 2: return min_size + u.format2.get_size (num_glyphs);
+ default:return 0;
+ }
+ }
+
+ hb_codepoint_t get_sid (hb_codepoint_t glyph, unsigned int num_glyphs) const
+ {
+ if (unlikely (glyph >= num_glyphs)) return 0;
+ switch (format)
+ {
+ case 0: return u.format0.get_sid (glyph);
+ case 1: return u.format1.get_sid (glyph);
+ case 2: return u.format2.get_sid (glyph);
+ default:return 0;
+ }
+ }
+
+ hb_codepoint_t get_glyph (hb_codepoint_t sid, unsigned int num_glyphs) const
+ {
+ switch (format)
+ {
+ case 0: return u.format0.get_glyph (sid, num_glyphs);
+ case 1: return u.format1.get_glyph (sid, num_glyphs);
+ case 2: return u.format2.get_glyph (sid, num_glyphs);
+ default:return 0;
+ }
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!c->check_struct (this)))
+ return_trace (false);
+
+ switch (format)
+ {
+ case 0: return_trace (u.format0.sanitize (c, c->get_num_glyphs ()));
+ case 1: return_trace (u.format1.sanitize (c, c->get_num_glyphs ()));
+ case 2: return_trace (u.format2.sanitize (c, c->get_num_glyphs ()));
+ default:return_trace (false);
+ }
+ }
+
+ HBUINT8 format;
+ union {
+ Charset0 format0;
+ Charset1 format1;
+ Charset2 format2;
+ } u;
+
+ DEFINE_SIZE_MIN (1);
+};
+
+struct CFF1StringIndex : CFF1Index
+{
+ bool serialize (hb_serialize_context_t *c, const CFF1StringIndex &strings,
+ const hb_inc_bimap_t &sidmap)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely ((strings.count == 0) || (sidmap.get_population () == 0)))
+ {
+ if (unlikely (!c->extend_min (this->count)))
+ return_trace (false);
+ count = 0;
+ return_trace (true);
+ }
+
+ byte_str_array_t bytesArray;
+ bytesArray.init ();
+ if (!bytesArray.resize (sidmap.get_population ()))
+ return_trace (false);
+ for (unsigned int i = 0; i < strings.count; i++)
+ {
+ hb_codepoint_t j = sidmap[i];
+ if (j != HB_MAP_VALUE_INVALID)
+ bytesArray[j] = strings[i];
+ }
+
+ bool result = CFF1Index::serialize (c, bytesArray);
+ bytesArray.fini ();
+ return_trace (result);
+ }
+};
+
+struct cff1_top_dict_interp_env_t : num_interp_env_t
+{
+ cff1_top_dict_interp_env_t ()
+ : num_interp_env_t(), prev_offset(0), last_offset(0) {}
+
+ unsigned int prev_offset;
+ unsigned int last_offset;
+};
+
+struct name_dict_values_t
+{
+ enum name_dict_val_index_t
+ {
+ version,
+ notice,
+ copyright,
+ fullName,
+ familyName,
+ weight,
+ postscript,
+ fontName,
+ baseFontName,
+ registry,
+ ordering,
+
+ ValCount
+ };
+
+ void init ()
+ {
+ for (unsigned int i = 0; i < ValCount; i++)
+ values[i] = CFF_UNDEF_SID;
+ }
+
+ unsigned int& operator[] (unsigned int i)
+ { assert (i < ValCount); return values[i]; }
+
+ unsigned int operator[] (unsigned int i) const
+ { assert (i < ValCount); return values[i]; }
+
+ static enum name_dict_val_index_t name_op_to_index (op_code_t op)
+ {
+ switch (op) {
+ default: // can't happen - just make some compiler happy
+ case OpCode_version:
+ return version;
+ case OpCode_Notice:
+ return notice;
+ case OpCode_Copyright:
+ return copyright;
+ case OpCode_FullName:
+ return fullName;
+ case OpCode_FamilyName:
+ return familyName;
+ case OpCode_Weight:
+ return weight;
+ case OpCode_PostScript:
+ return postscript;
+ case OpCode_FontName:
+ return fontName;
+ case OpCode_BaseFontName:
+ return baseFontName;
+ }
+ }
+
+ unsigned int values[ValCount];
+};
+
+struct cff1_top_dict_val_t : op_str_t
+{
+ unsigned int last_arg_offset;
+};
+
+struct cff1_top_dict_values_t : top_dict_values_t<cff1_top_dict_val_t>
+{
+ void init ()
+ {
+ top_dict_values_t<cff1_top_dict_val_t>::init ();
+
+ nameSIDs.init ();
+ ros_supplement = 0;
+ cidCount = 8720;
+ EncodingOffset = 0;
+ CharsetOffset = 0;
+ FDSelectOffset = 0;
+ privateDictInfo.init ();
+ }
+ void fini () { top_dict_values_t<cff1_top_dict_val_t>::fini (); }
+
+ bool is_CID () const
+ { return nameSIDs[name_dict_values_t::registry] != CFF_UNDEF_SID; }
+
+ name_dict_values_t nameSIDs;
+ unsigned int ros_supplement_offset;
+ unsigned int ros_supplement;
+ unsigned int cidCount;
+
+ unsigned int EncodingOffset;
+ unsigned int CharsetOffset;
+ unsigned int FDSelectOffset;
+ table_info_t privateDictInfo;
+};
+
+struct cff1_top_dict_opset_t : top_dict_opset_t<cff1_top_dict_val_t>
+{
+ static void process_op (op_code_t op, cff1_top_dict_interp_env_t& env, cff1_top_dict_values_t& dictval)
+ {
+ cff1_top_dict_val_t val;
+ val.last_arg_offset = (env.last_offset-1) - dictval.opStart; /* offset to the last argument */
+
+ switch (op) {
+ case OpCode_version:
+ case OpCode_Notice:
+ case OpCode_Copyright:
+ case OpCode_FullName:
+ case OpCode_FamilyName:
+ case OpCode_Weight:
+ case OpCode_PostScript:
+ case OpCode_BaseFontName:
+ dictval.nameSIDs[name_dict_values_t::name_op_to_index (op)] = env.argStack.pop_uint ();
+ env.clear_args ();
+ break;
+ case OpCode_isFixedPitch:
+ case OpCode_ItalicAngle:
+ case OpCode_UnderlinePosition:
+ case OpCode_UnderlineThickness:
+ case OpCode_PaintType:
+ case OpCode_CharstringType:
+ case OpCode_UniqueID:
+ case OpCode_StrokeWidth:
+ case OpCode_SyntheticBase:
+ case OpCode_CIDFontVersion:
+ case OpCode_CIDFontRevision:
+ case OpCode_CIDFontType:
+ case OpCode_UIDBase:
+ case OpCode_FontBBox:
+ case OpCode_XUID:
+ case OpCode_BaseFontBlend:
+ env.clear_args ();
+ break;
+
+ case OpCode_CIDCount:
+ dictval.cidCount = env.argStack.pop_uint ();
+ env.clear_args ();
+ break;
+
+ case OpCode_ROS:
+ dictval.ros_supplement = env.argStack.pop_uint ();
+ dictval.nameSIDs[name_dict_values_t::ordering] = env.argStack.pop_uint ();
+ dictval.nameSIDs[name_dict_values_t::registry] = env.argStack.pop_uint ();
+ env.clear_args ();
+ break;
+
+ case OpCode_Encoding:
+ dictval.EncodingOffset = env.argStack.pop_uint ();
+ env.clear_args ();
+ if (unlikely (dictval.EncodingOffset == 0)) return;
+ break;
+
+ case OpCode_charset:
+ dictval.CharsetOffset = env.argStack.pop_uint ();
+ env.clear_args ();
+ if (unlikely (dictval.CharsetOffset == 0)) return;
+ break;
+
+ case OpCode_FDSelect:
+ dictval.FDSelectOffset = env.argStack.pop_uint ();
+ env.clear_args ();
+ break;
+
+ case OpCode_Private:
+ dictval.privateDictInfo.offset = env.argStack.pop_uint ();
+ dictval.privateDictInfo.size = env.argStack.pop_uint ();
+ env.clear_args ();
+ break;
+
+ default:
+ env.last_offset = env.str_ref.offset;
+ top_dict_opset_t<cff1_top_dict_val_t>::process_op (op, env, dictval);
+ /* Record this operand below if stack is empty, otherwise done */
+ if (!env.argStack.is_empty ()) return;
+ break;
+ }
+
+ if (unlikely (env.in_error ())) return;
+
+ dictval.add_op (op, env.str_ref, val);
+ }
+};
+
+struct cff1_font_dict_values_t : dict_values_t<op_str_t>
+{
+ void init ()
+ {
+ dict_values_t<op_str_t>::init ();
+ privateDictInfo.init ();
+ fontName = CFF_UNDEF_SID;
+ }
+ void fini () { dict_values_t<op_str_t>::fini (); }
+
+ table_info_t privateDictInfo;
+ unsigned int fontName;
+};
+
+struct cff1_font_dict_opset_t : dict_opset_t
+{
+ static void process_op (op_code_t op, num_interp_env_t& env, cff1_font_dict_values_t& dictval)
+ {
+ switch (op) {
+ case OpCode_FontName:
+ dictval.fontName = env.argStack.pop_uint ();
+ env.clear_args ();
+ break;
+ case OpCode_FontMatrix:
+ case OpCode_PaintType:
+ env.clear_args ();
+ break;
+ case OpCode_Private:
+ dictval.privateDictInfo.offset = env.argStack.pop_uint ();
+ dictval.privateDictInfo.size = env.argStack.pop_uint ();
+ env.clear_args ();
+ break;
+
+ default:
+ dict_opset_t::process_op (op, env);
+ if (!env.argStack.is_empty ()) return;
+ break;
+ }
+
+ if (unlikely (env.in_error ())) return;
+
+ dictval.add_op (op, env.str_ref);
+ }
+};
+
+template <typename VAL>
+struct cff1_private_dict_values_base_t : dict_values_t<VAL>
+{
+ void init ()
+ {
+ dict_values_t<VAL>::init ();
+ subrsOffset = 0;
+ localSubrs = &Null (CFF1Subrs);
+ }
+ void fini () { dict_values_t<VAL>::fini (); }
+
+ unsigned int subrsOffset;
+ const CFF1Subrs *localSubrs;
+};
+
+typedef cff1_private_dict_values_base_t<op_str_t> cff1_private_dict_values_subset_t;
+typedef cff1_private_dict_values_base_t<num_dict_val_t> cff1_private_dict_values_t;
+
+struct cff1_private_dict_opset_t : dict_opset_t
+{
+ static void process_op (op_code_t op, num_interp_env_t& env, cff1_private_dict_values_t& dictval)
+ {
+ num_dict_val_t val;
+ val.init ();
+
+ switch (op) {
+ case OpCode_BlueValues:
+ case OpCode_OtherBlues:
+ case OpCode_FamilyBlues:
+ case OpCode_FamilyOtherBlues:
+ case OpCode_StemSnapH:
+ case OpCode_StemSnapV:
+ env.clear_args ();
+ break;
+ case OpCode_StdHW:
+ case OpCode_StdVW:
+ case OpCode_BlueScale:
+ case OpCode_BlueShift:
+ case OpCode_BlueFuzz:
+ case OpCode_ForceBold:
+ case OpCode_LanguageGroup:
+ case OpCode_ExpansionFactor:
+ case OpCode_initialRandomSeed:
+ case OpCode_defaultWidthX:
+ case OpCode_nominalWidthX:
+ val.single_val = env.argStack.pop_num ();
+ env.clear_args ();
+ break;
+ case OpCode_Subrs:
+ dictval.subrsOffset = env.argStack.pop_uint ();
+ env.clear_args ();
+ break;
+
+ default:
+ dict_opset_t::process_op (op, env);
+ if (!env.argStack.is_empty ()) return;
+ break;
+ }
+
+ if (unlikely (env.in_error ())) return;
+
+ dictval.add_op (op, env.str_ref, val);
+ }
+};
+
+struct cff1_private_dict_opset_subset : dict_opset_t
+{
+ static void process_op (op_code_t op, num_interp_env_t& env, cff1_private_dict_values_subset_t& dictval)
+ {
+ switch (op) {
+ case OpCode_BlueValues:
+ case OpCode_OtherBlues:
+ case OpCode_FamilyBlues:
+ case OpCode_FamilyOtherBlues:
+ case OpCode_StemSnapH:
+ case OpCode_StemSnapV:
+ case OpCode_StdHW:
+ case OpCode_StdVW:
+ case OpCode_BlueScale:
+ case OpCode_BlueShift:
+ case OpCode_BlueFuzz:
+ case OpCode_ForceBold:
+ case OpCode_LanguageGroup:
+ case OpCode_ExpansionFactor:
+ case OpCode_initialRandomSeed:
+ case OpCode_defaultWidthX:
+ case OpCode_nominalWidthX:
+ env.clear_args ();
+ break;
+
+ case OpCode_Subrs:
+ dictval.subrsOffset = env.argStack.pop_uint ();
+ env.clear_args ();
+ break;
+
+ default:
+ dict_opset_t::process_op (op, env);
+ if (!env.argStack.is_empty ()) return;
+ break;
+ }
+
+ if (unlikely (env.in_error ())) return;
+
+ dictval.add_op (op, env.str_ref);
+ }
+};
+
+typedef dict_interpreter_t<cff1_top_dict_opset_t, cff1_top_dict_values_t, cff1_top_dict_interp_env_t> cff1_top_dict_interpreter_t;
+typedef dict_interpreter_t<cff1_font_dict_opset_t, cff1_font_dict_values_t> cff1_font_dict_interpreter_t;
+
+typedef CFF1Index CFF1NameIndex;
+typedef CFF1IndexOf<TopDict> CFF1TopDictIndex;
+
+struct cff1_font_dict_values_mod_t
+{
+ cff1_font_dict_values_mod_t() { init (); }
+
+ void init () { init ( &Null (cff1_font_dict_values_t), CFF_UNDEF_SID ); }
+
+ void init (const cff1_font_dict_values_t *base_,
+ unsigned int fontName_)
+ {
+ base = base_;
+ fontName = fontName_;
+ privateDictInfo.init ();
+ }
+
+ unsigned get_count () const { return base->get_count (); }
+
+ const op_str_t &operator [] (unsigned int i) const { return (*base)[i]; }
+
+ const cff1_font_dict_values_t *base;
+ table_info_t privateDictInfo;
+ unsigned int fontName;
+};
+
+struct CFF1FDArray : FDArray<HBUINT16>
+{
+ /* FDArray::serialize() requires this partial specialization to compile */
+ template <typename ITER, typename OP_SERIALIZER>
+ bool serialize (hb_serialize_context_t *c, ITER it, OP_SERIALIZER& opszr)
+ { return FDArray<HBUINT16>::serialize<cff1_font_dict_values_mod_t, cff1_font_dict_values_mod_t> (c, it, opszr); }
+};
+
+} /* namespace CFF */
+
+namespace OT {
+
+using namespace CFF;
+
+struct cff1
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_cff1;
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ likely (version.major == 1));
+ }
+
+ template <typename PRIVOPSET, typename PRIVDICTVAL>
+ struct accelerator_templ_t
+ {
+ void init (hb_face_t *face)
+ {
+ topDict.init ();
+ fontDicts.init ();
+ privateDicts.init ();
+
+ this->blob = sc.reference_table<cff1> (face);
+
+ /* setup for run-time santization */
+ sc.init (this->blob);
+ sc.start_processing ();
+
+ const OT::cff1 *cff = this->blob->template as<OT::cff1> ();
+
+ if (cff == &Null (OT::cff1))
+ { fini (); return; }
+
+ nameIndex = &cff->nameIndex (cff);
+ if ((nameIndex == &Null (CFF1NameIndex)) || !nameIndex->sanitize (&sc))
+ { fini (); return; }
+
+ topDictIndex = &StructAtOffset<CFF1TopDictIndex> (nameIndex, nameIndex->get_size ());
+ if ((topDictIndex == &Null (CFF1TopDictIndex)) || !topDictIndex->sanitize (&sc) || (topDictIndex->count == 0))
+ { fini (); return; }
+
+ { /* parse top dict */
+ const byte_str_t topDictStr = (*topDictIndex)[0];
+ if (unlikely (!topDictStr.sanitize (&sc))) { fini (); return; }
+ cff1_top_dict_interpreter_t top_interp;
+ top_interp.env.init (topDictStr);
+ topDict.init ();
+ if (unlikely (!top_interp.interpret (topDict))) { fini (); return; }
+ }
+
+ if (is_predef_charset ())
+ charset = &Null (Charset);
+ else
+ {
+ charset = &StructAtOffsetOrNull<Charset> (cff, topDict.CharsetOffset);
+ if (unlikely ((charset == &Null (Charset)) || !charset->sanitize (&sc))) { fini (); return; }
+ }
+
+ fdCount = 1;
+ if (is_CID ())
+ {
+ fdArray = &StructAtOffsetOrNull<CFF1FDArray> (cff, topDict.FDArrayOffset);
+ fdSelect = &StructAtOffsetOrNull<CFF1FDSelect> (cff, topDict.FDSelectOffset);
+ if (unlikely ((fdArray == &Null (CFF1FDArray)) || !fdArray->sanitize (&sc) ||
+ (fdSelect == &Null (CFF1FDSelect)) || !fdSelect->sanitize (&sc, fdArray->count)))
+ { fini (); return; }
+
+ fdCount = fdArray->count;
+ }
+ else
+ {
+ fdArray = &Null (CFF1FDArray);
+ fdSelect = &Null (CFF1FDSelect);
+ }
+
+ encoding = &Null (Encoding);
+ if (is_CID ())
+ {
+ if (unlikely (charset == &Null (Charset))) { fini (); return; }
+ }
+ else
+ {
+ if (!is_predef_encoding ())
+ {
+ encoding = &StructAtOffsetOrNull<Encoding> (cff, topDict.EncodingOffset);
+ if (unlikely ((encoding == &Null (Encoding)) || !encoding->sanitize (&sc))) { fini (); return; }
+ }
+ }
+
+ stringIndex = &StructAtOffset<CFF1StringIndex> (topDictIndex, topDictIndex->get_size ());
+ if ((stringIndex == &Null (CFF1StringIndex)) || !stringIndex->sanitize (&sc))
+ { fini (); return; }
+
+ globalSubrs = &StructAtOffset<CFF1Subrs> (stringIndex, stringIndex->get_size ());
+ if ((globalSubrs != &Null (CFF1Subrs)) && !globalSubrs->sanitize (&sc))
+ { fini (); return; }
+
+ charStrings = &StructAtOffsetOrNull<CFF1CharStrings> (cff, topDict.charStringsOffset);
+
+ if ((charStrings == &Null (CFF1CharStrings)) || unlikely (!charStrings->sanitize (&sc)))
+ { fini (); return; }
+
+ num_glyphs = charStrings->count;
+ if (num_glyphs != sc.get_num_glyphs ())
+ { fini (); return; }
+
+ if (unlikely (!privateDicts.resize (fdCount)))
+ { fini (); return; }
+ for (unsigned int i = 0; i < fdCount; i++)
+ privateDicts[i].init ();
+
+ // parse CID font dicts and gather private dicts
+ if (is_CID ())
+ {
+ for (unsigned int i = 0; i < fdCount; i++)
+ {
+ byte_str_t fontDictStr = (*fdArray)[i];
+ if (unlikely (!fontDictStr.sanitize (&sc))) { fini (); return; }
+ cff1_font_dict_values_t *font;
+ cff1_font_dict_interpreter_t font_interp;
+ font_interp.env.init (fontDictStr);
+ font = fontDicts.push ();
+ if (unlikely (font == &Crap (cff1_font_dict_values_t))) { fini (); return; }
+ font->init ();
+ if (unlikely (!font_interp.interpret (*font))) { fini (); return; }
+ PRIVDICTVAL *priv = &privateDicts[i];
+ const byte_str_t privDictStr (StructAtOffset<UnsizedByteStr> (cff, font->privateDictInfo.offset), font->privateDictInfo.size);
+ if (unlikely (!privDictStr.sanitize (&sc))) { fini (); return; }
+ dict_interpreter_t<PRIVOPSET, PRIVDICTVAL> priv_interp;
+ priv_interp.env.init (privDictStr);
+ priv->init ();
+ if (unlikely (!priv_interp.interpret (*priv))) { fini (); return; }
+
+ priv->localSubrs = &StructAtOffsetOrNull<CFF1Subrs> (&privDictStr, priv->subrsOffset);
+ if (priv->localSubrs != &Null (CFF1Subrs) &&
+ unlikely (!priv->localSubrs->sanitize (&sc)))
+ { fini (); return; }
+ }
+ }
+ else /* non-CID */
+ {
+ cff1_top_dict_values_t *font = &topDict;
+ PRIVDICTVAL *priv = &privateDicts[0];
+
+ const byte_str_t privDictStr (StructAtOffset<UnsizedByteStr> (cff, font->privateDictInfo.offset), font->privateDictInfo.size);
+ if (unlikely (!privDictStr.sanitize (&sc))) { fini (); return; }
+ dict_interpreter_t<PRIVOPSET, PRIVDICTVAL> priv_interp;
+ priv_interp.env.init (privDictStr);
+ priv->init ();
+ if (unlikely (!priv_interp.interpret (*priv))) { fini (); return; }
+
+ priv->localSubrs = &StructAtOffsetOrNull<CFF1Subrs> (&privDictStr, priv->subrsOffset);
+ if (priv->localSubrs != &Null (CFF1Subrs) &&
+ unlikely (!priv->localSubrs->sanitize (&sc)))
+ { fini (); return; }
+ }
+ }
+
+ void fini ()
+ {
+ sc.end_processing ();
+ topDict.fini ();
+ fontDicts.fini_deep ();
+ privateDicts.fini_deep ();
+ hb_blob_destroy (blob);
+ blob = nullptr;
+ }
+
+ bool is_valid () const { return blob; }
+ bool is_CID () const { return topDict.is_CID (); }
+
+ bool is_predef_charset () const { return topDict.CharsetOffset <= ExpertSubsetCharset; }
+
+ unsigned int std_code_to_glyph (hb_codepoint_t code) const
+ {
+ hb_codepoint_t sid = lookup_standard_encoding_for_sid (code);
+ if (unlikely (sid == CFF_UNDEF_SID))
+ return 0;
+
+ if (charset != &Null (Charset))
+ return charset->get_glyph (sid, num_glyphs);
+ else if ((topDict.CharsetOffset == ISOAdobeCharset)
+ && (code <= 228 /*zcaron*/)) return sid;
+ return 0;
+ }
+
+ bool is_predef_encoding () const { return topDict.EncodingOffset <= ExpertEncoding; }
+
+ hb_codepoint_t glyph_to_code (hb_codepoint_t glyph) const
+ {
+ if (encoding != &Null (Encoding))
+ return encoding->get_code (glyph);
+ else
+ {
+ hb_codepoint_t sid = glyph_to_sid (glyph);
+ if (sid == 0) return 0;
+ hb_codepoint_t code = 0;
+ switch (topDict.EncodingOffset)
+ {
+ case StandardEncoding:
+ code = lookup_standard_encoding_for_code (sid);
+ break;
+ case ExpertEncoding:
+ code = lookup_expert_encoding_for_code (sid);
+ break;
+ default:
+ break;
+ }
+ return code;
+ }
+ }
+
+ hb_codepoint_t glyph_to_sid (hb_codepoint_t glyph) const
+ {
+ if (charset != &Null (Charset))
+ return charset->get_sid (glyph, num_glyphs);
+ else
+ {
+ hb_codepoint_t sid = 0;
+ switch (topDict.CharsetOffset)
+ {
+ case ISOAdobeCharset:
+ if (glyph <= 228 /*zcaron*/) sid = glyph;
+ break;
+ case ExpertCharset:
+ sid = lookup_expert_charset_for_sid (glyph);
+ break;
+ case ExpertSubsetCharset:
+ sid = lookup_expert_subset_charset_for_sid (glyph);
+ break;
+ default:
+ break;
+ }
+ return sid;
+ }
+ }
+
+ hb_codepoint_t sid_to_glyph (hb_codepoint_t sid) const
+ {
+ if (charset != &Null (Charset))
+ return charset->get_glyph (sid, num_glyphs);
+ else
+ {
+ hb_codepoint_t glyph = 0;
+ switch (topDict.CharsetOffset)
+ {
+ case ISOAdobeCharset:
+ if (sid <= 228 /*zcaron*/) glyph = sid;
+ break;
+ case ExpertCharset:
+ glyph = lookup_expert_charset_for_glyph (sid);
+ break;
+ case ExpertSubsetCharset:
+ glyph = lookup_expert_subset_charset_for_glyph (sid);
+ break;
+ default:
+ break;
+ }
+ return glyph;
+ }
+ }
+
+ protected:
+ hb_blob_t *blob;
+ hb_sanitize_context_t sc;
+
+ public:
+ const Encoding *encoding;
+ const Charset *charset;
+ const CFF1NameIndex *nameIndex;
+ const CFF1TopDictIndex *topDictIndex;
+ const CFF1StringIndex *stringIndex;
+ const CFF1Subrs *globalSubrs;
+ const CFF1CharStrings *charStrings;
+ const CFF1FDArray *fdArray;
+ const CFF1FDSelect *fdSelect;
+ unsigned int fdCount;
+
+ cff1_top_dict_values_t topDict;
+ hb_vector_t<cff1_font_dict_values_t>
+ fontDicts;
+ hb_vector_t<PRIVDICTVAL> privateDicts;
+
+ unsigned int num_glyphs;
+ };
+
+ struct accelerator_t : accelerator_templ_t<cff1_private_dict_opset_t, cff1_private_dict_values_t>
+ {
+ void init (hb_face_t *face)
+ {
+ SUPER::init (face);
+
+ if (!is_valid ()) return;
+ if (is_CID ()) return;
+
+ /* fill glyph_names */
+ for (hb_codepoint_t gid = 0; gid < num_glyphs; gid++)
+ {
+ hb_codepoint_t sid = glyph_to_sid (gid);
+ gname_t gname;
+ gname.sid = sid;
+ if (sid < cff1_std_strings_length)
+ gname.name = cff1_std_strings (sid);
+ else
+ {
+ byte_str_t ustr = (*stringIndex)[sid - cff1_std_strings_length];
+ gname.name = hb_bytes_t ((const char*)ustr.arrayZ, ustr.length);
+ }
+ if (unlikely (!gname.name.arrayZ)) { fini (); return; }
+ glyph_names.push (gname);
+ }
+ glyph_names.qsort ();
+ }
+
+ void fini ()
+ {
+ glyph_names.fini ();
+
+ SUPER::fini ();
+ }
+
+ bool get_glyph_name (hb_codepoint_t glyph,
+ char *buf, unsigned int buf_len) const
+ {
+ if (!buf) return true;
+ if (unlikely (!is_valid ())) return false;
+ if (is_CID()) return false;
+ hb_codepoint_t sid = glyph_to_sid (glyph);
+ const char *str;
+ size_t str_len;
+ if (sid < cff1_std_strings_length)
+ {
+ hb_bytes_t byte_str = cff1_std_strings (sid);
+ str = byte_str.arrayZ;
+ str_len = byte_str.length;
+ }
+ else
+ {
+ byte_str_t ubyte_str = (*stringIndex)[sid - cff1_std_strings_length];
+ str = (const char *)ubyte_str.arrayZ;
+ str_len = ubyte_str.length;
+ }
+ if (!str_len) return false;
+ unsigned int len = hb_min (buf_len - 1, str_len);
+ strncpy (buf, (const char*)str, len);
+ buf[len] = '\0';
+ return true;
+ }
+
+ bool get_glyph_from_name (const char *name, int len,
+ hb_codepoint_t *glyph) const
+ {
+ if (len < 0) len = strlen (name);
+ if (unlikely (!len)) return false;
+
+ gname_t key = { hb_bytes_t (name, len), 0 };
+ const gname_t *gname = glyph_names.bsearch (key);
+ if (!gname) return false;
+ hb_codepoint_t gid = sid_to_glyph (gname->sid);
+ if (!gid && gname->sid) return false;
+ *glyph = gid;
+ return true;
+ }
+
+ HB_INTERNAL bool get_extents (hb_font_t *font, hb_codepoint_t glyph, hb_glyph_extents_t *extents) const;
+ HB_INTERNAL bool get_seac_components (hb_codepoint_t glyph, hb_codepoint_t *base, hb_codepoint_t *accent) const;
+#ifdef HB_EXPERIMENTAL_API
+ HB_INTERNAL bool get_path (hb_font_t *font, hb_codepoint_t glyph, draw_helper_t &draw_helper) const;
+#endif
+
+ private:
+ struct gname_t
+ {
+ hb_bytes_t name;
+ uint16_t sid;
+
+ static int cmp (const void *a_, const void *b_)
+ {
+ const gname_t *a = (const gname_t *)a_;
+ const gname_t *b = (const gname_t *)b_;
+ int minlen = hb_min (a->name.length, b->name.length);
+ int ret = strncmp (a->name.arrayZ, b->name.arrayZ, minlen);
+ if (ret) return ret;
+ return a->name.length - b->name.length;
+ }
+
+ int cmp (const gname_t &a) const { return cmp (&a, this); }
+ };
+
+ hb_sorted_vector_t<gname_t> glyph_names;
+
+ typedef accelerator_templ_t<cff1_private_dict_opset_t, cff1_private_dict_values_t> SUPER;
+ };
+
+ struct accelerator_subset_t : accelerator_templ_t<cff1_private_dict_opset_subset, cff1_private_dict_values_subset_t> {};
+
+ bool subset (hb_subset_context_t *c) const { return hb_subset_cff1 (c); }
+
+ protected:
+ HB_INTERNAL static hb_codepoint_t lookup_standard_encoding_for_code (hb_codepoint_t sid);
+ HB_INTERNAL static hb_codepoint_t lookup_expert_encoding_for_code (hb_codepoint_t sid);
+ HB_INTERNAL static hb_codepoint_t lookup_expert_charset_for_sid (hb_codepoint_t glyph);
+ HB_INTERNAL static hb_codepoint_t lookup_expert_subset_charset_for_sid (hb_codepoint_t glyph);
+ HB_INTERNAL static hb_codepoint_t lookup_expert_charset_for_glyph (hb_codepoint_t sid);
+ HB_INTERNAL static hb_codepoint_t lookup_expert_subset_charset_for_glyph (hb_codepoint_t sid);
+ HB_INTERNAL static hb_codepoint_t lookup_standard_encoding_for_sid (hb_codepoint_t code);
+
+ public:
+ FixedVersion<HBUINT8> version; /* Version of CFF table. set to 0x0100u */
+ OffsetTo<CFF1NameIndex, HBUINT8> nameIndex; /* headerSize = Offset to Name INDEX. */
+ HBUINT8 offSize; /* offset size (unused?) */
+
+ public:
+ DEFINE_SIZE_STATIC (4);
+};
+
+struct cff1_accelerator_t : cff1::accelerator_t {};
+} /* namespace OT */
+
+#endif /* HB_OT_CFF1_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-cff2-table.cc b/thirdparty/harfbuzz/src/hb-ot-cff2-table.cc
new file mode 100644
index 0000000000..ac0feeee21
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-cff2-table.cc
@@ -0,0 +1,215 @@
+/*
+ * Copyright © 2018 Adobe Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Adobe Author(s): Michiharu Ariza
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_OT_FONT_CFF
+
+#include "hb-ot-cff2-table.hh"
+#include "hb-cff2-interp-cs.hh"
+#include "hb-draw.hh"
+
+using namespace CFF;
+
+struct cff2_extents_param_t
+{
+ void init ()
+ {
+ path_open = false;
+ min_x.set_int (INT_MAX);
+ min_y.set_int (INT_MAX);
+ max_x.set_int (INT_MIN);
+ max_y.set_int (INT_MIN);
+ }
+
+ void start_path () { path_open = true; }
+ void end_path () { path_open = false; }
+ bool is_path_open () const { return path_open; }
+
+ void update_bounds (const point_t &pt)
+ {
+ if (pt.x < min_x) min_x = pt.x;
+ if (pt.x > max_x) max_x = pt.x;
+ if (pt.y < min_y) min_y = pt.y;
+ if (pt.y > max_y) max_y = pt.y;
+ }
+
+ bool path_open;
+ number_t min_x;
+ number_t min_y;
+ number_t max_x;
+ number_t max_y;
+};
+
+struct cff2_path_procs_extents_t : path_procs_t<cff2_path_procs_extents_t, cff2_cs_interp_env_t, cff2_extents_param_t>
+{
+ static void moveto (cff2_cs_interp_env_t &env, cff2_extents_param_t& param, const point_t &pt)
+ {
+ param.end_path ();
+ env.moveto (pt);
+ }
+
+ static void line (cff2_cs_interp_env_t &env, cff2_extents_param_t& param, const point_t &pt1)
+ {
+ if (!param.is_path_open ())
+ {
+ param.start_path ();
+ param.update_bounds (env.get_pt ());
+ }
+ env.moveto (pt1);
+ param.update_bounds (env.get_pt ());
+ }
+
+ static void curve (cff2_cs_interp_env_t &env, cff2_extents_param_t& param, const point_t &pt1, const point_t &pt2, const point_t &pt3)
+ {
+ if (!param.is_path_open ())
+ {
+ param.start_path ();
+ param.update_bounds (env.get_pt ());
+ }
+ /* include control points */
+ param.update_bounds (pt1);
+ param.update_bounds (pt2);
+ env.moveto (pt3);
+ param.update_bounds (env.get_pt ());
+ }
+};
+
+struct cff2_cs_opset_extents_t : cff2_cs_opset_t<cff2_cs_opset_extents_t, cff2_extents_param_t, cff2_path_procs_extents_t> {};
+
+bool OT::cff2::accelerator_t::get_extents (hb_font_t *font,
+ hb_codepoint_t glyph,
+ hb_glyph_extents_t *extents) const
+{
+#ifdef HB_NO_OT_FONT_CFF
+ /* XXX Remove check when this code moves to .hh file. */
+ return true;
+#endif
+
+ if (unlikely (!is_valid () || (glyph >= num_glyphs))) return false;
+
+ unsigned int fd = fdSelect->get_fd (glyph);
+ cff2_cs_interpreter_t<cff2_cs_opset_extents_t, cff2_extents_param_t> interp;
+ const byte_str_t str = (*charStrings)[glyph];
+ interp.env.init (str, *this, fd, font->coords, font->num_coords);
+ cff2_extents_param_t param;
+ param.init ();
+ if (unlikely (!interp.interpret (param))) return false;
+
+ if (param.min_x >= param.max_x)
+ {
+ extents->width = 0;
+ extents->x_bearing = 0;
+ }
+ else
+ {
+ extents->x_bearing = font->em_scalef_x (param.min_x.to_real ());
+ extents->width = font->em_scalef_x (param.max_x.to_real () - param.min_x.to_real ());
+ }
+ if (param.min_y >= param.max_y)
+ {
+ extents->height = 0;
+ extents->y_bearing = 0;
+ }
+ else
+ {
+ extents->y_bearing = font->em_scalef_y (param.max_y.to_real ());
+ extents->height = font->em_scalef_y (param.min_y.to_real () - param.max_y.to_real ());
+ }
+
+ return true;
+}
+
+#ifdef HB_EXPERIMENTAL_API
+struct cff2_path_param_t
+{
+ cff2_path_param_t (hb_font_t *font_, draw_helper_t &draw_helper_)
+ {
+ draw_helper = &draw_helper_;
+ font = font_;
+ }
+
+ void move_to (const point_t &p)
+ { draw_helper->move_to (font->em_scalef_x (p.x.to_real ()), font->em_scalef_y (p.y.to_real ())); }
+
+ void line_to (const point_t &p)
+ { draw_helper->line_to (font->em_scalef_x (p.x.to_real ()), font->em_scalef_y (p.y.to_real ())); }
+
+ void cubic_to (const point_t &p1, const point_t &p2, const point_t &p3)
+ {
+ draw_helper->cubic_to (font->em_scalef_x (p1.x.to_real ()), font->em_scalef_y (p1.y.to_real ()),
+ font->em_scalef_x (p2.x.to_real ()), font->em_scalef_y (p2.y.to_real ()),
+ font->em_scalef_x (p3.x.to_real ()), font->em_scalef_y (p3.y.to_real ()));
+ }
+
+ protected:
+ draw_helper_t *draw_helper;
+ hb_font_t *font;
+};
+
+struct cff2_path_procs_path_t : path_procs_t<cff2_path_procs_path_t, cff2_cs_interp_env_t, cff2_path_param_t>
+{
+ static void moveto (cff2_cs_interp_env_t &env, cff2_path_param_t& param, const point_t &pt)
+ {
+ param.move_to (pt);
+ env.moveto (pt);
+ }
+
+ static void line (cff2_cs_interp_env_t &env, cff2_path_param_t& param, const point_t &pt1)
+ {
+ param.line_to (pt1);
+ env.moveto (pt1);
+ }
+
+ static void curve (cff2_cs_interp_env_t &env, cff2_path_param_t& param, const point_t &pt1, const point_t &pt2, const point_t &pt3)
+ {
+ param.cubic_to (pt1, pt2, pt3);
+ env.moveto (pt3);
+ }
+};
+
+struct cff2_cs_opset_path_t : cff2_cs_opset_t<cff2_cs_opset_path_t, cff2_path_param_t, cff2_path_procs_path_t> {};
+
+bool OT::cff2::accelerator_t::get_path (hb_font_t *font, hb_codepoint_t glyph, draw_helper_t &draw_helper) const
+{
+#ifdef HB_NO_OT_FONT_CFF
+ /* XXX Remove check when this code moves to .hh file. */
+ return true;
+#endif
+
+ if (unlikely (!is_valid () || (glyph >= num_glyphs))) return false;
+
+ unsigned int fd = fdSelect->get_fd (glyph);
+ cff2_cs_interpreter_t<cff2_cs_opset_path_t, cff2_path_param_t> interp;
+ const byte_str_t str = (*charStrings)[glyph];
+ interp.env.init (str, *this, fd, font->coords, font->num_coords);
+ cff2_path_param_t param (font, draw_helper);
+ if (unlikely (!interp.interpret (param))) return false;
+ return true;
+}
+#endif
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-ot-cff2-table.hh b/thirdparty/harfbuzz/src/hb-ot-cff2-table.hh
new file mode 100644
index 0000000000..829217feaa
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-cff2-table.hh
@@ -0,0 +1,531 @@
+/*
+ * Copyright © 2018 Adobe Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Adobe Author(s): Michiharu Ariza
+ */
+
+#ifndef HB_OT_CFF2_TABLE_HH
+#define HB_OT_CFF2_TABLE_HH
+
+#include "hb-ot-cff-common.hh"
+#include "hb-subset-cff2.hh"
+#include "hb-draw.hh"
+
+namespace CFF {
+
+/*
+ * CFF2 -- Compact Font Format (CFF) Version 2
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/cff2
+ */
+#define HB_OT_TAG_cff2 HB_TAG('C','F','F','2')
+
+typedef CFFIndex<HBUINT32> CFF2Index;
+template <typename Type> struct CFF2IndexOf : CFFIndexOf<HBUINT32, Type> {};
+
+typedef CFF2Index CFF2CharStrings;
+typedef Subrs<HBUINT32> CFF2Subrs;
+
+typedef FDSelect3_4<HBUINT32, HBUINT16> FDSelect4;
+typedef FDSelect3_4_Range<HBUINT32, HBUINT16> FDSelect4_Range;
+
+struct CFF2FDSelect
+{
+ bool serialize (hb_serialize_context_t *c, const CFF2FDSelect &src, unsigned int num_glyphs)
+ {
+ TRACE_SERIALIZE (this);
+ unsigned int size = src.get_size (num_glyphs);
+ CFF2FDSelect *dest = c->allocate_size<CFF2FDSelect> (size);
+ if (unlikely (!dest)) return_trace (false);
+ memcpy (dest, &src, size);
+ return_trace (true);
+ }
+
+ unsigned int get_size (unsigned int num_glyphs) const
+ {
+ switch (format)
+ {
+ case 0: return format.static_size + u.format0.get_size (num_glyphs);
+ case 3: return format.static_size + u.format3.get_size ();
+ case 4: return format.static_size + u.format4.get_size ();
+ default:return 0;
+ }
+ }
+
+ hb_codepoint_t get_fd (hb_codepoint_t glyph) const
+ {
+ if (this == &Null (CFF2FDSelect))
+ return 0;
+
+ switch (format)
+ {
+ case 0: return u.format0.get_fd (glyph);
+ case 3: return u.format3.get_fd (glyph);
+ case 4: return u.format4.get_fd (glyph);
+ default:return 0;
+ }
+ }
+
+ bool sanitize (hb_sanitize_context_t *c, unsigned int fdcount) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!c->check_struct (this)))
+ return_trace (false);
+
+ switch (format)
+ {
+ case 0: return_trace (u.format0.sanitize (c, fdcount));
+ case 3: return_trace (u.format3.sanitize (c, fdcount));
+ case 4: return_trace (u.format4.sanitize (c, fdcount));
+ default:return_trace (false);
+ }
+ }
+
+ HBUINT8 format;
+ union {
+ FDSelect0 format0;
+ FDSelect3 format3;
+ FDSelect4 format4;
+ } u;
+ public:
+ DEFINE_SIZE_MIN (2);
+};
+
+struct CFF2VariationStore
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this)) && c->check_range (&varStore, size) && varStore.sanitize (c));
+ }
+
+ bool serialize (hb_serialize_context_t *c, const CFF2VariationStore *varStore)
+ {
+ TRACE_SERIALIZE (this);
+ unsigned int size_ = varStore->get_size ();
+ CFF2VariationStore *dest = c->allocate_size<CFF2VariationStore> (size_);
+ if (unlikely (!dest)) return_trace (false);
+ memcpy (dest, varStore, size_);
+ return_trace (true);
+ }
+
+ unsigned int get_size () const { return HBUINT16::static_size + size; }
+
+ HBUINT16 size;
+ VariationStore varStore;
+
+ DEFINE_SIZE_MIN (2 + VariationStore::min_size);
+};
+
+struct cff2_top_dict_values_t : top_dict_values_t<>
+{
+ void init ()
+ {
+ top_dict_values_t<>::init ();
+ vstoreOffset = 0;
+ FDSelectOffset = 0;
+ }
+ void fini () { top_dict_values_t<>::fini (); }
+
+ unsigned int vstoreOffset;
+ unsigned int FDSelectOffset;
+};
+
+struct cff2_top_dict_opset_t : top_dict_opset_t<>
+{
+ static void process_op (op_code_t op, num_interp_env_t& env, cff2_top_dict_values_t& dictval)
+ {
+ switch (op) {
+ case OpCode_FontMatrix:
+ {
+ dict_val_t val;
+ val.init ();
+ dictval.add_op (op, env.str_ref);
+ env.clear_args ();
+ }
+ break;
+
+ case OpCode_vstore:
+ dictval.vstoreOffset = env.argStack.pop_uint ();
+ env.clear_args ();
+ break;
+ case OpCode_FDSelect:
+ dictval.FDSelectOffset = env.argStack.pop_uint ();
+ env.clear_args ();
+ break;
+
+ default:
+ SUPER::process_op (op, env, dictval);
+ /* Record this operand below if stack is empty, otherwise done */
+ if (!env.argStack.is_empty ()) return;
+ }
+
+ if (unlikely (env.in_error ())) return;
+
+ dictval.add_op (op, env.str_ref);
+ }
+
+ typedef top_dict_opset_t<> SUPER;
+};
+
+struct cff2_font_dict_values_t : dict_values_t<op_str_t>
+{
+ void init ()
+ {
+ dict_values_t<op_str_t>::init ();
+ privateDictInfo.init ();
+ }
+ void fini () { dict_values_t<op_str_t>::fini (); }
+
+ table_info_t privateDictInfo;
+};
+
+struct cff2_font_dict_opset_t : dict_opset_t
+{
+ static void process_op (op_code_t op, num_interp_env_t& env, cff2_font_dict_values_t& dictval)
+ {
+ switch (op) {
+ case OpCode_Private:
+ dictval.privateDictInfo.offset = env.argStack.pop_uint ();
+ dictval.privateDictInfo.size = env.argStack.pop_uint ();
+ env.clear_args ();
+ break;
+
+ default:
+ SUPER::process_op (op, env);
+ if (!env.argStack.is_empty ())
+ return;
+ }
+
+ if (unlikely (env.in_error ())) return;
+
+ dictval.add_op (op, env.str_ref);
+ }
+
+ private:
+ typedef dict_opset_t SUPER;
+};
+
+template <typename VAL>
+struct cff2_private_dict_values_base_t : dict_values_t<VAL>
+{
+ void init ()
+ {
+ dict_values_t<VAL>::init ();
+ subrsOffset = 0;
+ localSubrs = &Null (CFF2Subrs);
+ ivs = 0;
+ }
+ void fini () { dict_values_t<VAL>::fini (); }
+
+ unsigned int subrsOffset;
+ const CFF2Subrs *localSubrs;
+ unsigned int ivs;
+};
+
+typedef cff2_private_dict_values_base_t<op_str_t> cff2_private_dict_values_subset_t;
+typedef cff2_private_dict_values_base_t<num_dict_val_t> cff2_private_dict_values_t;
+
+struct cff2_priv_dict_interp_env_t : num_interp_env_t
+{
+ void init (const byte_str_t &str)
+ {
+ num_interp_env_t::init (str);
+ ivs = 0;
+ seen_vsindex = false;
+ }
+
+ void process_vsindex ()
+ {
+ if (likely (!seen_vsindex))
+ {
+ set_ivs (argStack.pop_uint ());
+ }
+ seen_vsindex = true;
+ }
+
+ unsigned int get_ivs () const { return ivs; }
+ void set_ivs (unsigned int ivs_) { ivs = ivs_; }
+
+ protected:
+ unsigned int ivs;
+ bool seen_vsindex;
+};
+
+struct cff2_private_dict_opset_t : dict_opset_t
+{
+ static void process_op (op_code_t op, cff2_priv_dict_interp_env_t& env, cff2_private_dict_values_t& dictval)
+ {
+ num_dict_val_t val;
+ val.init ();
+
+ switch (op) {
+ case OpCode_StdHW:
+ case OpCode_StdVW:
+ case OpCode_BlueScale:
+ case OpCode_BlueShift:
+ case OpCode_BlueFuzz:
+ case OpCode_ExpansionFactor:
+ case OpCode_LanguageGroup:
+ val.single_val = env.argStack.pop_num ();
+ env.clear_args ();
+ break;
+ case OpCode_BlueValues:
+ case OpCode_OtherBlues:
+ case OpCode_FamilyBlues:
+ case OpCode_FamilyOtherBlues:
+ case OpCode_StemSnapH:
+ case OpCode_StemSnapV:
+ env.clear_args ();
+ break;
+ case OpCode_Subrs:
+ dictval.subrsOffset = env.argStack.pop_uint ();
+ env.clear_args ();
+ break;
+ case OpCode_vsindexdict:
+ env.process_vsindex ();
+ dictval.ivs = env.get_ivs ();
+ env.clear_args ();
+ break;
+ case OpCode_blenddict:
+ break;
+
+ default:
+ dict_opset_t::process_op (op, env);
+ if (!env.argStack.is_empty ()) return;
+ break;
+ }
+
+ if (unlikely (env.in_error ())) return;
+
+ dictval.add_op (op, env.str_ref, val);
+ }
+};
+
+struct cff2_private_dict_opset_subset_t : dict_opset_t
+{
+ static void process_op (op_code_t op, cff2_priv_dict_interp_env_t& env, cff2_private_dict_values_subset_t& dictval)
+ {
+ switch (op) {
+ case OpCode_BlueValues:
+ case OpCode_OtherBlues:
+ case OpCode_FamilyBlues:
+ case OpCode_FamilyOtherBlues:
+ case OpCode_StdHW:
+ case OpCode_StdVW:
+ case OpCode_BlueScale:
+ case OpCode_BlueShift:
+ case OpCode_BlueFuzz:
+ case OpCode_StemSnapH:
+ case OpCode_StemSnapV:
+ case OpCode_LanguageGroup:
+ case OpCode_ExpansionFactor:
+ env.clear_args ();
+ break;
+
+ case OpCode_blenddict:
+ env.clear_args ();
+ return;
+
+ case OpCode_Subrs:
+ dictval.subrsOffset = env.argStack.pop_uint ();
+ env.clear_args ();
+ break;
+
+ default:
+ SUPER::process_op (op, env);
+ if (!env.argStack.is_empty ()) return;
+ break;
+ }
+
+ if (unlikely (env.in_error ())) return;
+
+ dictval.add_op (op, env.str_ref);
+ }
+
+ private:
+ typedef dict_opset_t SUPER;
+};
+
+typedef dict_interpreter_t<cff2_top_dict_opset_t, cff2_top_dict_values_t> cff2_top_dict_interpreter_t;
+typedef dict_interpreter_t<cff2_font_dict_opset_t, cff2_font_dict_values_t> cff2_font_dict_interpreter_t;
+
+struct CFF2FDArray : FDArray<HBUINT32>
+{
+ /* FDArray::serialize does not compile without this partial specialization */
+ template <typename ITER, typename OP_SERIALIZER>
+ bool serialize (hb_serialize_context_t *c, ITER it, OP_SERIALIZER& opszr)
+ { return FDArray<HBUINT32>::serialize<cff2_font_dict_values_t, table_info_t> (c, it, opszr); }
+};
+
+} /* namespace CFF */
+
+namespace OT {
+
+using namespace CFF;
+
+struct cff2
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_cff2;
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ likely (version.major == 2));
+ }
+
+ template <typename PRIVOPSET, typename PRIVDICTVAL>
+ struct accelerator_templ_t
+ {
+ void init (hb_face_t *face)
+ {
+ topDict.init ();
+ fontDicts.init ();
+ privateDicts.init ();
+
+ this->blob = sc.reference_table<cff2> (face);
+
+ /* setup for run-time santization */
+ sc.init (this->blob);
+ sc.start_processing ();
+
+ const OT::cff2 *cff2 = this->blob->template as<OT::cff2> ();
+
+ if (cff2 == &Null (OT::cff2))
+ { fini (); return; }
+
+ { /* parse top dict */
+ byte_str_t topDictStr (cff2 + cff2->topDict, cff2->topDictSize);
+ if (unlikely (!topDictStr.sanitize (&sc))) { fini (); return; }
+ cff2_top_dict_interpreter_t top_interp;
+ top_interp.env.init (topDictStr);
+ topDict.init ();
+ if (unlikely (!top_interp.interpret (topDict))) { fini (); return; }
+ }
+
+ globalSubrs = &StructAtOffset<CFF2Subrs> (cff2, cff2->topDict + cff2->topDictSize);
+ varStore = &StructAtOffsetOrNull<CFF2VariationStore> (cff2, topDict.vstoreOffset);
+ charStrings = &StructAtOffsetOrNull<CFF2CharStrings> (cff2, topDict.charStringsOffset);
+ fdArray = &StructAtOffsetOrNull<CFF2FDArray> (cff2, topDict.FDArrayOffset);
+ fdSelect = &StructAtOffsetOrNull<CFF2FDSelect> (cff2, topDict.FDSelectOffset);
+
+ if (((varStore != &Null (CFF2VariationStore)) && unlikely (!varStore->sanitize (&sc))) ||
+ (charStrings == &Null (CFF2CharStrings)) || unlikely (!charStrings->sanitize (&sc)) ||
+ (globalSubrs == &Null (CFF2Subrs)) || unlikely (!globalSubrs->sanitize (&sc)) ||
+ (fdArray == &Null (CFF2FDArray)) || unlikely (!fdArray->sanitize (&sc)) ||
+ (((fdSelect != &Null (CFF2FDSelect)) && unlikely (!fdSelect->sanitize (&sc, fdArray->count)))))
+ { fini (); return; }
+
+ num_glyphs = charStrings->count;
+ if (num_glyphs != sc.get_num_glyphs ())
+ { fini (); return; }
+
+ fdCount = fdArray->count;
+ if (!privateDicts.resize (fdCount))
+ { fini (); return; }
+
+ /* parse font dicts and gather private dicts */
+ for (unsigned int i = 0; i < fdCount; i++)
+ {
+ const byte_str_t fontDictStr = (*fdArray)[i];
+ if (unlikely (!fontDictStr.sanitize (&sc))) { fini (); return; }
+ cff2_font_dict_values_t *font;
+ cff2_font_dict_interpreter_t font_interp;
+ font_interp.env.init (fontDictStr);
+ font = fontDicts.push ();
+ if (unlikely (font == &Crap (cff2_font_dict_values_t))) { fini (); return; }
+ font->init ();
+ if (unlikely (!font_interp.interpret (*font))) { fini (); return; }
+
+ const byte_str_t privDictStr (StructAtOffsetOrNull<UnsizedByteStr> (cff2, font->privateDictInfo.offset), font->privateDictInfo.size);
+ if (unlikely (!privDictStr.sanitize (&sc))) { fini (); return; }
+ dict_interpreter_t<PRIVOPSET, PRIVDICTVAL, cff2_priv_dict_interp_env_t> priv_interp;
+ priv_interp.env.init(privDictStr);
+ privateDicts[i].init ();
+ if (unlikely (!priv_interp.interpret (privateDicts[i]))) { fini (); return; }
+
+ privateDicts[i].localSubrs = &StructAtOffsetOrNull<CFF2Subrs> (&privDictStr[0], privateDicts[i].subrsOffset);
+ if (privateDicts[i].localSubrs != &Null (CFF2Subrs) &&
+ unlikely (!privateDicts[i].localSubrs->sanitize (&sc)))
+ { fini (); return; }
+ }
+ }
+
+ void fini ()
+ {
+ sc.end_processing ();
+ topDict.fini ();
+ fontDicts.fini_deep ();
+ privateDicts.fini_deep ();
+ hb_blob_destroy (blob);
+ blob = nullptr;
+ }
+
+ bool is_valid () const { return blob; }
+
+ protected:
+ hb_blob_t *blob;
+ hb_sanitize_context_t sc;
+
+ public:
+ cff2_top_dict_values_t topDict;
+ const CFF2Subrs *globalSubrs;
+ const CFF2VariationStore *varStore;
+ const CFF2CharStrings *charStrings;
+ const CFF2FDArray *fdArray;
+ const CFF2FDSelect *fdSelect;
+ unsigned int fdCount;
+
+ hb_vector_t<cff2_font_dict_values_t> fontDicts;
+ hb_vector_t<PRIVDICTVAL> privateDicts;
+
+ unsigned int num_glyphs;
+ };
+
+ struct accelerator_t : accelerator_templ_t<cff2_private_dict_opset_t, cff2_private_dict_values_t>
+ {
+ HB_INTERNAL bool get_extents (hb_font_t *font,
+ hb_codepoint_t glyph,
+ hb_glyph_extents_t *extents) const;
+#ifdef HB_EXPERIMENTAL_API
+ HB_INTERNAL bool get_path (hb_font_t *font, hb_codepoint_t glyph, draw_helper_t &draw_helper) const;
+#endif
+ };
+
+ typedef accelerator_templ_t<cff2_private_dict_opset_subset_t, cff2_private_dict_values_subset_t> accelerator_subset_t;
+
+ bool subset (hb_subset_context_t *c) const { return hb_subset_cff2 (c); }
+
+ public:
+ FixedVersion<HBUINT8> version; /* Version of CFF2 table. set to 0x0200u */
+ NNOffsetTo<TopDict, HBUINT8> topDict; /* headerSize = Offset to Top DICT. */
+ HBUINT16 topDictSize; /* Top DICT size */
+
+ public:
+ DEFINE_SIZE_STATIC (5);
+};
+
+struct cff2_accelerator_t : cff2::accelerator_t {};
+} /* namespace OT */
+
+#endif /* HB_OT_CFF2_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-cmap-table.hh b/thirdparty/harfbuzz/src/hb-ot-cmap-table.hh
new file mode 100644
index 0000000000..cc48379bb8
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-cmap-table.hh
@@ -0,0 +1,1711 @@
+/*
+ * Copyright © 2014 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_CMAP_TABLE_HH
+#define HB_OT_CMAP_TABLE_HH
+
+#include "hb-open-type.hh"
+#include "hb-set.hh"
+
+/*
+ * cmap -- Character to Glyph Index Mapping
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/cmap
+ */
+#define HB_OT_TAG_cmap HB_TAG('c','m','a','p')
+
+namespace OT {
+
+
+struct CmapSubtableFormat0
+{
+ bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
+ {
+ hb_codepoint_t gid = codepoint < 256 ? glyphIdArray[codepoint] : 0;
+ if (!gid)
+ return false;
+ *glyph = gid;
+ return true;
+ }
+ void collect_unicodes (hb_set_t *out) const
+ {
+ for (unsigned int i = 0; i < 256; i++)
+ if (glyphIdArray[i])
+ out->add (i);
+ }
+
+ void collect_mapping (hb_set_t *unicodes, /* OUT */
+ hb_map_t *mapping /* OUT */) const
+ {
+ for (unsigned i = 0; i < 256; i++)
+ if (glyphIdArray[i])
+ {
+ hb_codepoint_t glyph = glyphIdArray[i];
+ unicodes->add (i);
+ mapping->set (i, glyph);
+ }
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ protected:
+ HBUINT16 format; /* Format number is set to 0. */
+ HBUINT16 length; /* Byte length of this subtable. */
+ HBUINT16 language; /* Ignore. */
+ HBUINT8 glyphIdArray[256];/* An array that maps character
+ * code to glyph index values. */
+ public:
+ DEFINE_SIZE_STATIC (6 + 256);
+};
+
+struct CmapSubtableFormat4
+{
+
+ template<typename Iterator,
+ hb_requires (hb_is_iterator (Iterator))>
+ HBUINT16* serialize_endcode_array (hb_serialize_context_t *c,
+ Iterator it)
+ {
+ HBUINT16 *endCode = c->start_embed<HBUINT16> ();
+ hb_codepoint_t prev_endcp = 0xFFFF;
+
+ for (const hb_item_type<Iterator> _ : +it)
+ {
+ if (prev_endcp != 0xFFFF && prev_endcp + 1u != _.first)
+ {
+ HBUINT16 end_code;
+ end_code = prev_endcp;
+ c->copy<HBUINT16> (end_code);
+ }
+ prev_endcp = _.first;
+ }
+
+ {
+ // last endCode
+ HBUINT16 endcode;
+ endcode = prev_endcp;
+ if (unlikely (!c->copy<HBUINT16> (endcode))) return nullptr;
+ // There must be a final entry with end_code == 0xFFFF.
+ if (prev_endcp != 0xFFFF)
+ {
+ HBUINT16 finalcode;
+ finalcode = 0xFFFF;
+ if (unlikely (!c->copy<HBUINT16> (finalcode))) return nullptr;
+ }
+ }
+
+ return endCode;
+ }
+
+ template<typename Iterator,
+ hb_requires (hb_is_iterator (Iterator))>
+ HBUINT16* serialize_startcode_array (hb_serialize_context_t *c,
+ Iterator it)
+ {
+ HBUINT16 *startCode = c->start_embed<HBUINT16> ();
+ hb_codepoint_t prev_cp = 0xFFFF;
+
+ for (const hb_item_type<Iterator> _ : +it)
+ {
+ if (prev_cp == 0xFFFF || prev_cp + 1u != _.first)
+ {
+ HBUINT16 start_code;
+ start_code = _.first;
+ c->copy<HBUINT16> (start_code);
+ }
+
+ prev_cp = _.first;
+ }
+
+ // There must be a final entry with end_code == 0xFFFF.
+ if (it.len () == 0 || prev_cp != 0xFFFF)
+ {
+ HBUINT16 finalcode;
+ finalcode = 0xFFFF;
+ if (unlikely (!c->copy<HBUINT16> (finalcode))) return nullptr;
+ }
+
+ return startCode;
+ }
+
+ template<typename Iterator,
+ hb_requires (hb_is_iterator (Iterator))>
+ HBINT16* serialize_idDelta_array (hb_serialize_context_t *c,
+ Iterator it,
+ HBUINT16 *endCode,
+ HBUINT16 *startCode,
+ unsigned segcount)
+ {
+ unsigned i = 0;
+ hb_codepoint_t last_gid = 0, start_gid = 0, last_cp = 0xFFFF;
+ bool use_delta = true;
+
+ HBINT16 *idDelta = c->start_embed<HBINT16> ();
+ if ((char *)idDelta - (char *)startCode != (int) segcount * (int) HBINT16::static_size)
+ return nullptr;
+
+ for (const hb_item_type<Iterator> _ : +it)
+ {
+ if (_.first == startCode[i])
+ {
+ use_delta = true;
+ start_gid = _.second;
+ }
+ else if (_.second != last_gid + 1) use_delta = false;
+
+ if (_.first == endCode[i])
+ {
+ HBINT16 delta;
+ if (use_delta) delta = (int)start_gid - (int)startCode[i];
+ else delta = 0;
+ c->copy<HBINT16> (delta);
+
+ i++;
+ }
+
+ last_gid = _.second;
+ last_cp = _.first;
+ }
+
+ if (it.len () == 0 || last_cp != 0xFFFF)
+ {
+ HBINT16 delta;
+ delta = 1;
+ if (unlikely (!c->copy<HBINT16> (delta))) return nullptr;
+ }
+
+ return idDelta;
+ }
+
+ template<typename Iterator,
+ hb_requires (hb_is_iterator (Iterator))>
+ HBUINT16* serialize_rangeoffset_glyid (hb_serialize_context_t *c,
+ Iterator it,
+ HBUINT16 *endCode,
+ HBUINT16 *startCode,
+ HBINT16 *idDelta,
+ unsigned segcount)
+ {
+ HBUINT16 *idRangeOffset = c->allocate_size<HBUINT16> (HBUINT16::static_size * segcount);
+ if (unlikely (!c->check_success (idRangeOffset))) return nullptr;
+ if (unlikely ((char *)idRangeOffset - (char *)idDelta != (int) segcount * (int) HBINT16::static_size)) return nullptr;
+
+ + hb_range (segcount)
+ | hb_filter ([&] (const unsigned _) { return idDelta[_] == 0; })
+ | hb_apply ([&] (const unsigned i)
+ {
+ idRangeOffset[i] = 2 * (c->start_embed<HBUINT16> () - idRangeOffset - i);
+
+ + it
+ | hb_filter ([&] (const hb_item_type<Iterator> _) { return _.first >= startCode[i] && _.first <= endCode[i]; })
+ | hb_apply ([&] (const hb_item_type<Iterator> _)
+ {
+ HBUINT16 glyID;
+ glyID = _.second;
+ c->copy<HBUINT16> (glyID);
+ })
+ ;
+
+
+ })
+ ;
+
+ return idRangeOffset;
+ }
+
+ template<typename Iterator,
+ hb_requires (hb_is_iterator (Iterator))>
+ void serialize (hb_serialize_context_t *c,
+ Iterator it)
+ {
+ auto format4_iter =
+ + it
+ | hb_filter ([&] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t> _)
+ { return _.first <= 0xFFFF; })
+ ;
+
+ if (format4_iter.len () == 0) return;
+
+ unsigned table_initpos = c->length ();
+ if (unlikely (!c->extend_min (*this))) return;
+ this->format = 4;
+
+ //serialize endCode[]
+ HBUINT16 *endCode = serialize_endcode_array (c, format4_iter);
+ if (unlikely (!endCode)) return;
+
+ unsigned segcount = (c->length () - min_size) / HBUINT16::static_size;
+
+ // 2 bytes of padding.
+ if (unlikely (!c->allocate_size<HBUINT16> (HBUINT16::static_size))) return; // 2 bytes of padding.
+
+ // serialize startCode[]
+ HBUINT16 *startCode = serialize_startcode_array (c, format4_iter);
+ if (unlikely (!startCode)) return;
+
+ //serialize idDelta[]
+ HBINT16 *idDelta = serialize_idDelta_array (c, format4_iter, endCode, startCode, segcount);
+ if (unlikely (!idDelta)) return;
+
+ HBUINT16 *idRangeOffset = serialize_rangeoffset_glyid (c, format4_iter, endCode, startCode, idDelta, segcount);
+ if (unlikely (!c->check_success (idRangeOffset))) return;
+
+ if (unlikely (!c->check_assign(this->length, c->length () - table_initpos))) return;
+ this->segCountX2 = segcount * 2;
+ this->entrySelector = hb_max (1u, hb_bit_storage (segcount)) - 1;
+ this->searchRange = 2 * (1u << this->entrySelector);
+ this->rangeShift = segcount * 2 > this->searchRange
+ ? 2 * segcount - this->searchRange
+ : 0;
+ }
+
+ struct accelerator_t
+ {
+ accelerator_t () {}
+ accelerator_t (const CmapSubtableFormat4 *subtable) { init (subtable); }
+ ~accelerator_t () { fini (); }
+
+ void init (const CmapSubtableFormat4 *subtable)
+ {
+ segCount = subtable->segCountX2 / 2;
+ endCount = subtable->values.arrayZ;
+ startCount = endCount + segCount + 1;
+ idDelta = startCount + segCount;
+ idRangeOffset = idDelta + segCount;
+ glyphIdArray = idRangeOffset + segCount;
+ glyphIdArrayLength = (subtable->length - 16 - 8 * segCount) / 2;
+ }
+ void fini () {}
+
+ bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
+ {
+ struct CustomRange
+ {
+ int cmp (hb_codepoint_t k,
+ unsigned distance) const
+ {
+ if (k > last) return +1;
+ if (k < (&last)[distance]) return -1;
+ return 0;
+ }
+ HBUINT16 last;
+ };
+
+ const HBUINT16 *found = hb_bsearch (codepoint,
+ this->endCount,
+ this->segCount,
+ 2,
+ _hb_cmp_method<hb_codepoint_t, CustomRange, unsigned>,
+ this->segCount + 1);
+ if (!found)
+ return false;
+ unsigned int i = found - endCount;
+
+ hb_codepoint_t gid;
+ unsigned int rangeOffset = this->idRangeOffset[i];
+ if (rangeOffset == 0)
+ gid = codepoint + this->idDelta[i];
+ else
+ {
+ /* Somebody has been smoking... */
+ unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount;
+ if (unlikely (index >= this->glyphIdArrayLength))
+ return false;
+ gid = this->glyphIdArray[index];
+ if (unlikely (!gid))
+ return false;
+ gid += this->idDelta[i];
+ }
+ gid &= 0xFFFFu;
+ if (!gid)
+ return false;
+ *glyph = gid;
+ return true;
+ }
+
+ HB_INTERNAL static bool get_glyph_func (const void *obj, hb_codepoint_t codepoint, hb_codepoint_t *glyph)
+ { return ((const accelerator_t *) obj)->get_glyph (codepoint, glyph); }
+
+ void collect_unicodes (hb_set_t *out) const
+ {
+ unsigned int count = this->segCount;
+ if (count && this->startCount[count - 1] == 0xFFFFu)
+ count--; /* Skip sentinel segment. */
+ for (unsigned int i = 0; i < count; i++)
+ {
+ hb_codepoint_t start = this->startCount[i];
+ hb_codepoint_t end = this->endCount[i];
+ unsigned int rangeOffset = this->idRangeOffset[i];
+ if (rangeOffset == 0)
+ {
+ for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++)
+ {
+ hb_codepoint_t gid = (codepoint + this->idDelta[i]) & 0xFFFFu;
+ if (unlikely (!gid))
+ continue;
+ out->add (codepoint);
+ }
+ }
+ else
+ {
+ for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++)
+ {
+ unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount;
+ if (unlikely (index >= this->glyphIdArrayLength))
+ break;
+ hb_codepoint_t gid = this->glyphIdArray[index];
+ if (unlikely (!gid))
+ continue;
+ out->add (codepoint);
+ }
+ }
+ }
+ }
+
+ void collect_mapping (hb_set_t *unicodes, /* OUT */
+ hb_map_t *mapping /* OUT */) const
+ {
+ unsigned count = this->segCount;
+ if (count && this->startCount[count - 1] == 0xFFFFu)
+ count--; /* Skip sentinel segment. */
+ for (unsigned i = 0; i < count; i++)
+ {
+ hb_codepoint_t start = this->startCount[i];
+ hb_codepoint_t end = this->endCount[i];
+ unsigned rangeOffset = this->idRangeOffset[i];
+ if (rangeOffset == 0)
+ {
+ for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++)
+ {
+ hb_codepoint_t gid = (codepoint + this->idDelta[i]) & 0xFFFFu;
+ if (unlikely (!gid))
+ continue;
+ unicodes->add (codepoint);
+ mapping->set (codepoint, gid);
+ }
+ }
+ else
+ {
+ for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++)
+ {
+ unsigned index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount;
+ if (unlikely (index >= this->glyphIdArrayLength))
+ break;
+ hb_codepoint_t gid = this->glyphIdArray[index];
+ if (unlikely (!gid))
+ continue;
+ unicodes->add (codepoint);
+ mapping->set (codepoint, gid);
+ }
+ }
+ }
+ }
+
+ const HBUINT16 *endCount;
+ const HBUINT16 *startCount;
+ const HBUINT16 *idDelta;
+ const HBUINT16 *idRangeOffset;
+ const HBUINT16 *glyphIdArray;
+ unsigned int segCount;
+ unsigned int glyphIdArrayLength;
+ };
+
+ bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
+ {
+ accelerator_t accel (this);
+ return accel.get_glyph_func (&accel, codepoint, glyph);
+ }
+ void collect_unicodes (hb_set_t *out) const
+ {
+ accelerator_t accel (this);
+ accel.collect_unicodes (out);
+ }
+
+ void collect_mapping (hb_set_t *unicodes, /* OUT */
+ hb_map_t *mapping /* OUT */) const
+ {
+ accelerator_t accel (this);
+ accel.collect_mapping (unicodes, mapping);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!c->check_struct (this)))
+ return_trace (false);
+
+ if (unlikely (!c->check_range (this, length)))
+ {
+ /* Some broken fonts have too long of a "length" value.
+ * If that is the case, just change the value to truncate
+ * the subtable at the end of the blob. */
+ uint16_t new_length = (uint16_t) hb_min ((uintptr_t) 65535,
+ (uintptr_t) (c->end -
+ (char *) this));
+ if (!c->try_set (&length, new_length))
+ return_trace (false);
+ }
+
+ return_trace (16 + 4 * (unsigned int) segCountX2 <= length);
+ }
+
+
+
+ protected:
+ HBUINT16 format; /* Format number is set to 4. */
+ HBUINT16 length; /* This is the length in bytes of the
+ * subtable. */
+ HBUINT16 language; /* Ignore. */
+ HBUINT16 segCountX2; /* 2 x segCount. */
+ HBUINT16 searchRange; /* 2 * (2**floor(log2(segCount))) */
+ HBUINT16 entrySelector; /* log2(searchRange/2) */
+ HBUINT16 rangeShift; /* 2 x segCount - searchRange */
+
+ UnsizedArrayOf<HBUINT16>
+ values;
+#if 0
+ HBUINT16 endCount[segCount]; /* End characterCode for each segment,
+ * last=0xFFFFu. */
+ HBUINT16 reservedPad; /* Set to 0. */
+ HBUINT16 startCount[segCount]; /* Start character code for each segment. */
+ HBINT16 idDelta[segCount]; /* Delta for all character codes in segment. */
+ HBUINT16 idRangeOffset[segCount];/* Offsets into glyphIdArray or 0 */
+ UnsizedArrayOf<HBUINT16>
+ glyphIdArray; /* Glyph index array (arbitrary length) */
+#endif
+
+ public:
+ DEFINE_SIZE_ARRAY (14, values);
+};
+
+struct CmapSubtableLongGroup
+{
+ friend struct CmapSubtableFormat12;
+ friend struct CmapSubtableFormat13;
+ template<typename U>
+ friend struct CmapSubtableLongSegmented;
+ friend struct cmap;
+
+ int cmp (hb_codepoint_t codepoint) const
+ {
+ if (codepoint < startCharCode) return -1;
+ if (codepoint > endCharCode) return +1;
+ return 0;
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ private:
+ HBUINT32 startCharCode; /* First character code in this group. */
+ HBUINT32 endCharCode; /* Last character code in this group. */
+ HBUINT32 glyphID; /* Glyph index; interpretation depends on
+ * subtable format. */
+ public:
+ DEFINE_SIZE_STATIC (12);
+};
+DECLARE_NULL_NAMESPACE_BYTES (OT, CmapSubtableLongGroup);
+
+template <typename UINT>
+struct CmapSubtableTrimmed
+{
+ bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
+ {
+ /* Rely on our implicit array bound-checking. */
+ hb_codepoint_t gid = glyphIdArray[codepoint - startCharCode];
+ if (!gid)
+ return false;
+ *glyph = gid;
+ return true;
+ }
+ void collect_unicodes (hb_set_t *out) const
+ {
+ hb_codepoint_t start = startCharCode;
+ unsigned int count = glyphIdArray.len;
+ for (unsigned int i = 0; i < count; i++)
+ if (glyphIdArray[i])
+ out->add (start + i);
+ }
+
+ void collect_mapping (hb_set_t *unicodes, /* OUT */
+ hb_map_t *mapping /* OUT */) const
+ {
+ hb_codepoint_t start_cp = startCharCode;
+ unsigned count = glyphIdArray.len;
+ for (unsigned i = 0; i < count; i++)
+ if (glyphIdArray[i])
+ {
+ hb_codepoint_t unicode = start_cp + i;
+ hb_codepoint_t glyphid = glyphIdArray[i];
+ unicodes->add (unicode);
+ mapping->set (unicode, glyphid);
+ }
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) && glyphIdArray.sanitize (c));
+ }
+
+ protected:
+ UINT formatReserved; /* Subtable format and (maybe) padding. */
+ UINT length; /* Byte length of this subtable. */
+ UINT language; /* Ignore. */
+ UINT startCharCode; /* First character code covered. */
+ ArrayOf<HBGlyphID, UINT>
+ glyphIdArray; /* Array of glyph index values for character
+ * codes in the range. */
+ public:
+ DEFINE_SIZE_ARRAY (5 * sizeof (UINT), glyphIdArray);
+};
+
+struct CmapSubtableFormat6 : CmapSubtableTrimmed<HBUINT16> {};
+struct CmapSubtableFormat10 : CmapSubtableTrimmed<HBUINT32 > {};
+
+template <typename T>
+struct CmapSubtableLongSegmented
+{
+ friend struct cmap;
+
+ bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
+ {
+ hb_codepoint_t gid = T::group_get_glyph (groups.bsearch (codepoint), codepoint);
+ if (!gid)
+ return false;
+ *glyph = gid;
+ return true;
+ }
+
+ void collect_unicodes (hb_set_t *out, unsigned int num_glyphs) const
+ {
+ for (unsigned int i = 0; i < this->groups.len; i++)
+ {
+ hb_codepoint_t start = this->groups[i].startCharCode;
+ hb_codepoint_t end = hb_min ((hb_codepoint_t) this->groups[i].endCharCode,
+ (hb_codepoint_t) HB_UNICODE_MAX);
+ hb_codepoint_t gid = this->groups[i].glyphID;
+ if (!gid)
+ {
+ /* Intention is: if (hb_is_same (T, CmapSubtableFormat13)) continue; */
+ if (! T::group_get_glyph (this->groups[i], end)) continue;
+ start++;
+ gid++;
+ }
+ if (unlikely ((unsigned int) gid >= num_glyphs)) continue;
+ if (unlikely ((unsigned int) (gid + end - start) >= num_glyphs))
+ end = start + (hb_codepoint_t) num_glyphs - gid;
+
+ out->add_range (start, end);
+ }
+ }
+
+ void collect_mapping (hb_set_t *unicodes, /* OUT */
+ hb_map_t *mapping, /* OUT */
+ unsigned num_glyphs) const
+ {
+ for (unsigned i = 0; i < this->groups.len; i++)
+ {
+ hb_codepoint_t start = this->groups[i].startCharCode;
+ hb_codepoint_t end = hb_min ((hb_codepoint_t) this->groups[i].endCharCode,
+ (hb_codepoint_t) HB_UNICODE_MAX);
+ hb_codepoint_t gid = this->groups[i].glyphID;
+ if (!gid)
+ {
+ /* Intention is: if (hb_is_same (T, CmapSubtableFormat13)) continue; */
+ if (! T::group_get_glyph (this->groups[i], end)) continue;
+ start++;
+ gid++;
+ }
+ if (unlikely ((unsigned int) gid >= num_glyphs)) continue;
+ if (unlikely ((unsigned int) (gid + end - start) >= num_glyphs))
+ end = start + (hb_codepoint_t) num_glyphs - gid;
+
+ for (unsigned cp = start; cp <= end; cp++)
+ {
+ unicodes->add (cp);
+ mapping->set (cp, gid);
+ gid++;
+ }
+ }
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) && groups.sanitize (c));
+ }
+
+ protected:
+ HBUINT16 format; /* Subtable format; set to 12. */
+ HBUINT16 reserved; /* Reserved; set to 0. */
+ HBUINT32 length; /* Byte length of this subtable. */
+ HBUINT32 language; /* Ignore. */
+ SortedArrayOf<CmapSubtableLongGroup, HBUINT32>
+ groups; /* Groupings. */
+ public:
+ DEFINE_SIZE_ARRAY (16, groups);
+};
+
+struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12>
+{
+ static hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
+ hb_codepoint_t u)
+ { return likely (group.startCharCode <= group.endCharCode) ?
+ group.glyphID + (u - group.startCharCode) : 0; }
+
+
+ template<typename Iterator,
+ hb_requires (hb_is_iterator (Iterator))>
+ void serialize (hb_serialize_context_t *c,
+ Iterator it)
+ {
+ if (it.len () == 0) return;
+ unsigned table_initpos = c->length ();
+ if (unlikely (!c->extend_min (*this))) return;
+
+ hb_codepoint_t startCharCode = 0xFFFF, endCharCode = 0xFFFF;
+ hb_codepoint_t glyphID = 0;
+
+ for (const hb_item_type<Iterator> _ : +it)
+ {
+ if (startCharCode == 0xFFFF)
+ {
+ startCharCode = _.first;
+ endCharCode = _.first;
+ glyphID = _.second;
+ }
+ else if (!_is_gid_consecutive (endCharCode, startCharCode, glyphID, _.first, _.second))
+ {
+ CmapSubtableLongGroup grouprecord;
+ grouprecord.startCharCode = startCharCode;
+ grouprecord.endCharCode = endCharCode;
+ grouprecord.glyphID = glyphID;
+ c->copy<CmapSubtableLongGroup> (grouprecord);
+
+ startCharCode = _.first;
+ endCharCode = _.first;
+ glyphID = _.second;
+ }
+ else
+ endCharCode = _.first;
+ }
+
+ CmapSubtableLongGroup record;
+ record.startCharCode = startCharCode;
+ record.endCharCode = endCharCode;
+ record.glyphID = glyphID;
+ c->copy<CmapSubtableLongGroup> (record);
+
+ this->format = 12;
+ this->reserved = 0;
+ this->length = c->length () - table_initpos;
+ this->groups.len = (this->length - min_size)/CmapSubtableLongGroup::static_size;
+ }
+
+ static size_t get_sub_table_size (const hb_sorted_vector_t<CmapSubtableLongGroup> &groups_data)
+ { return 16 + 12 * groups_data.length; }
+
+ private:
+ static bool _is_gid_consecutive (hb_codepoint_t endCharCode,
+ hb_codepoint_t startCharCode,
+ hb_codepoint_t glyphID,
+ hb_codepoint_t cp,
+ hb_codepoint_t new_gid)
+ {
+ return (cp - 1 == endCharCode) &&
+ new_gid == glyphID + (cp - startCharCode);
+ }
+
+};
+
+struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13>
+{
+ static hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
+ hb_codepoint_t u HB_UNUSED)
+ { return group.glyphID; }
+};
+
+typedef enum
+{
+ GLYPH_VARIANT_NOT_FOUND = 0,
+ GLYPH_VARIANT_FOUND = 1,
+ GLYPH_VARIANT_USE_DEFAULT = 2
+} glyph_variant_t;
+
+struct UnicodeValueRange
+{
+ int cmp (const hb_codepoint_t &codepoint) const
+ {
+ if (codepoint < startUnicodeValue) return -1;
+ if (codepoint > startUnicodeValue + additionalCount) return +1;
+ return 0;
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ HBUINT24 startUnicodeValue; /* First value in this range. */
+ HBUINT8 additionalCount; /* Number of additional values in this
+ * range. */
+ public:
+ DEFINE_SIZE_STATIC (4);
+};
+
+struct DefaultUVS : SortedArrayOf<UnicodeValueRange, HBUINT32>
+{
+ void collect_unicodes (hb_set_t *out) const
+ {
+ unsigned int count = len;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ hb_codepoint_t first = arrayZ[i].startUnicodeValue;
+ hb_codepoint_t last = hb_min ((hb_codepoint_t) (first + arrayZ[i].additionalCount),
+ (hb_codepoint_t) HB_UNICODE_MAX);
+ out->add_range (first, last);
+ }
+ }
+
+ DefaultUVS* copy (hb_serialize_context_t *c,
+ const hb_set_t *unicodes) const
+ {
+ DefaultUVS *out = c->start_embed<DefaultUVS> ();
+ if (unlikely (!out)) return nullptr;
+ auto snap = c->snapshot ();
+
+ HBUINT32 len;
+ len = 0;
+ if (unlikely (!c->copy<HBUINT32> (len))) return nullptr;
+ unsigned init_len = c->length ();
+
+ hb_codepoint_t lastCode = HB_MAP_VALUE_INVALID;
+ int count = -1;
+
+ for (const UnicodeValueRange& _ : as_array ())
+ {
+ for (const unsigned addcnt : hb_range ((unsigned) _.additionalCount + 1))
+ {
+ unsigned curEntry = (unsigned) _.startUnicodeValue + addcnt;
+ if (!unicodes->has (curEntry)) continue;
+ count += 1;
+ if (lastCode == HB_MAP_VALUE_INVALID)
+ lastCode = curEntry;
+ else if (lastCode + count != curEntry)
+ {
+ UnicodeValueRange rec;
+ rec.startUnicodeValue = lastCode;
+ rec.additionalCount = count - 1;
+ c->copy<UnicodeValueRange> (rec);
+
+ lastCode = curEntry;
+ count = 0;
+ }
+ }
+ }
+
+ if (lastCode != HB_MAP_VALUE_INVALID)
+ {
+ UnicodeValueRange rec;
+ rec.startUnicodeValue = lastCode;
+ rec.additionalCount = count;
+ c->copy<UnicodeValueRange> (rec);
+ }
+
+ if (c->length () - init_len == 0)
+ {
+ c->revert (snap);
+ return nullptr;
+ }
+ else
+ {
+ if (unlikely (!c->check_assign (out->len, (c->length () - init_len) / UnicodeValueRange::static_size))) return nullptr;
+ return out;
+ }
+ }
+
+ public:
+ DEFINE_SIZE_ARRAY (4, *this);
+};
+
+struct UVSMapping
+{
+ int cmp (const hb_codepoint_t &codepoint) const
+ { return unicodeValue.cmp (codepoint); }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ HBUINT24 unicodeValue; /* Base Unicode value of the UVS */
+ HBGlyphID glyphID; /* Glyph ID of the UVS */
+ public:
+ DEFINE_SIZE_STATIC (5);
+};
+
+struct NonDefaultUVS : SortedArrayOf<UVSMapping, HBUINT32>
+{
+ void collect_unicodes (hb_set_t *out) const
+ {
+ unsigned int count = len;
+ for (unsigned int i = 0; i < count; i++)
+ out->add (arrayZ[i].unicodeValue);
+ }
+
+ void collect_mapping (hb_set_t *unicodes, /* OUT */
+ hb_map_t *mapping /* OUT */) const
+ {
+ unsigned count = len;
+ for (unsigned i = 0; i < count; i++)
+ {
+ hb_codepoint_t unicode = arrayZ[i].unicodeValue;
+ hb_codepoint_t glyphid = arrayZ[i].glyphID;
+ unicodes->add (unicode);
+ mapping->set (unicode, glyphid);
+ }
+ }
+
+ void closure_glyphs (const hb_set_t *unicodes,
+ hb_set_t *glyphset) const
+ {
+ + as_array ()
+ | hb_filter (unicodes, &UVSMapping::unicodeValue)
+ | hb_map (&UVSMapping::glyphID)
+ | hb_sink (glyphset)
+ ;
+ }
+
+ NonDefaultUVS* copy (hb_serialize_context_t *c,
+ const hb_set_t *unicodes,
+ const hb_set_t *glyphs_requested,
+ const hb_map_t *glyph_map) const
+ {
+ NonDefaultUVS *out = c->start_embed<NonDefaultUVS> ();
+ if (unlikely (!out)) return nullptr;
+
+ auto it =
+ + as_array ()
+ | hb_filter ([&] (const UVSMapping& _)
+ {
+ return unicodes->has (_.unicodeValue) || glyphs_requested->has (_.glyphID);
+ })
+ ;
+
+ if (!it) return nullptr;
+
+ HBUINT32 len;
+ len = it.len ();
+ if (unlikely (!c->copy<HBUINT32> (len))) return nullptr;
+
+ for (const UVSMapping& _ : it)
+ {
+ UVSMapping mapping;
+ mapping.unicodeValue = _.unicodeValue;
+ mapping.glyphID = glyph_map->get (_.glyphID);
+ c->copy<UVSMapping> (mapping);
+ }
+
+ return out;
+ }
+
+ public:
+ DEFINE_SIZE_ARRAY (4, *this);
+};
+
+struct VariationSelectorRecord
+{
+ glyph_variant_t get_glyph (hb_codepoint_t codepoint,
+ hb_codepoint_t *glyph,
+ const void *base) const
+ {
+ if ((base+defaultUVS).bfind (codepoint))
+ return GLYPH_VARIANT_USE_DEFAULT;
+ const UVSMapping &nonDefault = (base+nonDefaultUVS).bsearch (codepoint);
+ if (nonDefault.glyphID)
+ {
+ *glyph = nonDefault.glyphID;
+ return GLYPH_VARIANT_FOUND;
+ }
+ return GLYPH_VARIANT_NOT_FOUND;
+ }
+
+ VariationSelectorRecord(const VariationSelectorRecord& other)
+ {
+ *this = other;
+ }
+
+ void operator= (const VariationSelectorRecord& other)
+ {
+ varSelector = other.varSelector;
+ HBUINT32 offset = other.defaultUVS;
+ defaultUVS = offset;
+ offset = other.nonDefaultUVS;
+ nonDefaultUVS = offset;
+ }
+
+ void collect_unicodes (hb_set_t *out, const void *base) const
+ {
+ (base+defaultUVS).collect_unicodes (out);
+ (base+nonDefaultUVS).collect_unicodes (out);
+ }
+
+ void collect_mapping (const void *base,
+ hb_set_t *unicodes, /* OUT */
+ hb_map_t *mapping /* OUT */) const
+ {
+ (base+defaultUVS).collect_unicodes (unicodes);
+ (base+nonDefaultUVS).collect_mapping (unicodes, mapping);
+ }
+
+ int cmp (const hb_codepoint_t &variation_selector) const
+ { return varSelector.cmp (variation_selector); }
+
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ defaultUVS.sanitize (c, base) &&
+ nonDefaultUVS.sanitize (c, base));
+ }
+
+ hb_pair_t<unsigned, unsigned>
+ copy (hb_serialize_context_t *c,
+ const hb_set_t *unicodes,
+ const hb_set_t *glyphs_requested,
+ const hb_map_t *glyph_map,
+ const void *base) const
+ {
+ auto snap = c->snapshot ();
+ auto *out = c->embed<VariationSelectorRecord> (*this);
+ if (unlikely (!out)) return hb_pair (0, 0);
+
+ out->defaultUVS = 0;
+ out->nonDefaultUVS = 0;
+
+ unsigned non_default_uvs_objidx = 0;
+ if (nonDefaultUVS != 0)
+ {
+ c->push ();
+ if (c->copy (base+nonDefaultUVS, unicodes, glyphs_requested, glyph_map))
+ non_default_uvs_objidx = c->pop_pack ();
+ else c->pop_discard ();
+ }
+
+ unsigned default_uvs_objidx = 0;
+ if (defaultUVS != 0)
+ {
+ c->push ();
+ if (c->copy (base+defaultUVS, unicodes))
+ default_uvs_objidx = c->pop_pack ();
+ else c->pop_discard ();
+ }
+
+
+ if (!default_uvs_objidx && !non_default_uvs_objidx)
+ c->revert (snap);
+
+ return hb_pair (default_uvs_objidx, non_default_uvs_objidx);
+ }
+
+ HBUINT24 varSelector; /* Variation selector. */
+ LOffsetTo<DefaultUVS>
+ defaultUVS; /* Offset to Default UVS Table. May be 0. */
+ LOffsetTo<NonDefaultUVS>
+ nonDefaultUVS; /* Offset to Non-Default UVS Table. May be 0. */
+ public:
+ DEFINE_SIZE_STATIC (11);
+};
+
+struct CmapSubtableFormat14
+{
+ glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint,
+ hb_codepoint_t variation_selector,
+ hb_codepoint_t *glyph) const
+ { return record.bsearch (variation_selector).get_glyph (codepoint, glyph, this); }
+
+ void collect_variation_selectors (hb_set_t *out) const
+ {
+ unsigned int count = record.len;
+ for (unsigned int i = 0; i < count; i++)
+ out->add (record.arrayZ[i].varSelector);
+ }
+ void collect_variation_unicodes (hb_codepoint_t variation_selector,
+ hb_set_t *out) const
+ { record.bsearch (variation_selector).collect_unicodes (out, this); }
+
+ void serialize (hb_serialize_context_t *c,
+ const hb_set_t *unicodes,
+ const hb_set_t *glyphs_requested,
+ const hb_map_t *glyph_map,
+ const void *base)
+ {
+ auto snap = c->snapshot ();
+ unsigned table_initpos = c->length ();
+ const char* init_tail = c->tail;
+
+ if (unlikely (!c->extend_min (*this))) return;
+ this->format = 14;
+
+ auto src_tbl = reinterpret_cast<const CmapSubtableFormat14*> (base);
+
+ /*
+ * Some versions of OTS require that offsets are in order. Due to the use
+ * of push()/pop_pack() serializing the variation records in order results
+ * in the offsets being in reverse order (first record has the largest
+ * offset). While this is perfectly valid, it will cause some versions of
+ * OTS to consider this table bad.
+ *
+ * So to prevent this issue we serialize the variation records in reverse
+ * order, so that the offsets are ordered from small to large. Since
+ * variation records are supposed to be in increasing order of varSelector
+ * we then have to reverse the order of the written variation selector
+ * records after everything is finalized.
+ */
+ hb_vector_t<hb_pair_t<unsigned, unsigned>> obj_indices;
+ for (int i = src_tbl->record.len - 1; i >= 0; i--)
+ {
+ hb_pair_t<unsigned, unsigned> result = src_tbl->record[i].copy (c, unicodes, glyphs_requested, glyph_map, base);
+ if (result.first || result.second)
+ obj_indices.push (result);
+ }
+
+ if (c->length () - table_initpos == CmapSubtableFormat14::min_size)
+ {
+ c->revert (snap);
+ return;
+ }
+
+ if (unlikely (!c->check_success (!obj_indices.in_error ())))
+ return;
+
+ int tail_len = init_tail - c->tail;
+ c->check_assign (this->length, c->length () - table_initpos + tail_len);
+ c->check_assign (this->record.len,
+ (c->length () - table_initpos - CmapSubtableFormat14::min_size) /
+ VariationSelectorRecord::static_size);
+
+ /* Correct the incorrect write order by reversing the order of the variation
+ records array. */
+ _reverse_variation_records ();
+
+ /* Now that records are in the right order, we can set up the offsets. */
+ _add_links_to_variation_records (c, obj_indices);
+ }
+
+ void _reverse_variation_records ()
+ {
+ record.as_array ().reverse ();
+ }
+
+ void _add_links_to_variation_records (hb_serialize_context_t *c,
+ const hb_vector_t<hb_pair_t<unsigned, unsigned>>& obj_indices)
+ {
+ for (unsigned i = 0; i < obj_indices.length; i++)
+ {
+ /*
+ * Since the record array has been reversed (see comments in copy())
+ * but obj_indices has not been, the indices at obj_indices[i]
+ * are for the variation record at record[j].
+ */
+ int j = obj_indices.length - 1 - i;
+ c->add_link (record[j].defaultUVS, obj_indices[i].first);
+ c->add_link (record[j].nonDefaultUVS, obj_indices[i].second);
+ }
+ }
+
+ void closure_glyphs (const hb_set_t *unicodes,
+ hb_set_t *glyphset) const
+ {
+ + hb_iter (record)
+ | hb_filter (hb_bool, &VariationSelectorRecord::nonDefaultUVS)
+ | hb_map (&VariationSelectorRecord::nonDefaultUVS)
+ | hb_map (hb_add (this))
+ | hb_apply ([=] (const NonDefaultUVS& _) { _.closure_glyphs (unicodes, glyphset); })
+ ;
+ }
+
+ void collect_unicodes (hb_set_t *out) const
+ {
+ for (const VariationSelectorRecord& _ : record)
+ _.collect_unicodes (out, this);
+ }
+
+ void collect_mapping (hb_set_t *unicodes, /* OUT */
+ hb_map_t *mapping /* OUT */) const
+ {
+ for (const VariationSelectorRecord& _ : record)
+ _.collect_mapping (this, unicodes, mapping);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ record.sanitize (c, this));
+ }
+
+ protected:
+ HBUINT16 format; /* Format number is set to 14. */
+ HBUINT32 length; /* Byte length of this subtable. */
+ SortedArrayOf<VariationSelectorRecord, HBUINT32>
+ record; /* Variation selector records; sorted
+ * in increasing order of `varSelector'. */
+ public:
+ DEFINE_SIZE_ARRAY (10, record);
+};
+
+struct CmapSubtable
+{
+ /* Note: We intentionally do NOT implement subtable formats 2 and 8. */
+
+ bool get_glyph (hb_codepoint_t codepoint,
+ hb_codepoint_t *glyph) const
+ {
+ switch (u.format) {
+ case 0: return u.format0 .get_glyph (codepoint, glyph);
+ case 4: return u.format4 .get_glyph (codepoint, glyph);
+ case 6: return u.format6 .get_glyph (codepoint, glyph);
+ case 10: return u.format10.get_glyph (codepoint, glyph);
+ case 12: return u.format12.get_glyph (codepoint, glyph);
+ case 13: return u.format13.get_glyph (codepoint, glyph);
+ case 14:
+ default: return false;
+ }
+ }
+ void collect_unicodes (hb_set_t *out, unsigned int num_glyphs = UINT_MAX) const
+ {
+ switch (u.format) {
+ case 0: u.format0 .collect_unicodes (out); return;
+ case 4: u.format4 .collect_unicodes (out); return;
+ case 6: u.format6 .collect_unicodes (out); return;
+ case 10: u.format10.collect_unicodes (out); return;
+ case 12: u.format12.collect_unicodes (out, num_glyphs); return;
+ case 13: u.format13.collect_unicodes (out, num_glyphs); return;
+ case 14:
+ default: return;
+ }
+ }
+
+ void collect_mapping (hb_set_t *unicodes, /* OUT */
+ hb_map_t *mapping, /* OUT */
+ unsigned num_glyphs = UINT_MAX) const
+ {
+ switch (u.format) {
+ case 0: u.format0 .collect_mapping (unicodes, mapping); return;
+ case 4: u.format4 .collect_mapping (unicodes, mapping); return;
+ case 6: u.format6 .collect_mapping (unicodes, mapping); return;
+ case 10: u.format10.collect_mapping (unicodes, mapping); return;
+ case 12: u.format12.collect_mapping (unicodes, mapping, num_glyphs); return;
+ case 13: u.format13.collect_mapping (unicodes, mapping, num_glyphs); return;
+ case 14:
+ default: return;
+ }
+ }
+
+ template<typename Iterator,
+ hb_requires (hb_is_iterator (Iterator))>
+ void serialize (hb_serialize_context_t *c,
+ Iterator it,
+ unsigned format,
+ const hb_subset_plan_t *plan,
+ const void *base)
+ {
+ switch (format) {
+ case 4: return u.format4.serialize (c, it);
+ case 12: return u.format12.serialize (c, it);
+ case 14: return u.format14.serialize (c, plan->unicodes, plan->glyphs_requested, plan->glyph_map, base);
+ default: return;
+ }
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (!u.format.sanitize (c)) return_trace (false);
+ switch (u.format) {
+ case 0: return_trace (u.format0 .sanitize (c));
+ case 4: return_trace (u.format4 .sanitize (c));
+ case 6: return_trace (u.format6 .sanitize (c));
+ case 10: return_trace (u.format10.sanitize (c));
+ case 12: return_trace (u.format12.sanitize (c));
+ case 13: return_trace (u.format13.sanitize (c));
+ case 14: return_trace (u.format14.sanitize (c));
+ default:return_trace (true);
+ }
+ }
+
+ public:
+ union {
+ HBUINT16 format; /* Format identifier */
+ CmapSubtableFormat0 format0;
+ CmapSubtableFormat4 format4;
+ CmapSubtableFormat6 format6;
+ CmapSubtableFormat10 format10;
+ CmapSubtableFormat12 format12;
+ CmapSubtableFormat13 format13;
+ CmapSubtableFormat14 format14;
+ } u;
+ public:
+ DEFINE_SIZE_UNION (2, format);
+};
+
+
+struct EncodingRecord
+{
+ int cmp (const EncodingRecord &other) const
+ {
+ int ret;
+ ret = platformID.cmp (other.platformID);
+ if (ret) return ret;
+ ret = encodingID.cmp (other.encodingID);
+ if (ret) return ret;
+ return 0;
+ }
+
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ subtable.sanitize (c, base));
+ }
+
+ template<typename Iterator,
+ hb_requires (hb_is_iterator (Iterator))>
+ EncodingRecord* copy (hb_serialize_context_t *c,
+ Iterator it,
+ unsigned format,
+ const void *base,
+ const hb_subset_plan_t *plan,
+ /* INOUT */ unsigned *objidx) const
+ {
+ TRACE_SERIALIZE (this);
+ auto snap = c->snapshot ();
+ auto *out = c->embed (this);
+ if (unlikely (!out)) return_trace (nullptr);
+ out->subtable = 0;
+
+ if (*objidx == 0)
+ {
+ CmapSubtable *cmapsubtable = c->push<CmapSubtable> ();
+ unsigned origin_length = c->length ();
+ cmapsubtable->serialize (c, it, format, plan, &(base+subtable));
+ if (c->length () - origin_length > 0) *objidx = c->pop_pack ();
+ else c->pop_discard ();
+ }
+
+ if (*objidx == 0)
+ {
+ c->revert (snap);
+ return_trace (nullptr);
+ }
+
+ c->add_link (out->subtable, *objidx);
+ return_trace (out);
+ }
+
+ HBUINT16 platformID; /* Platform ID. */
+ HBUINT16 encodingID; /* Platform-specific encoding ID. */
+ LOffsetTo<CmapSubtable>
+ subtable; /* Byte offset from beginning of table to the subtable for this encoding. */
+ public:
+ DEFINE_SIZE_STATIC (8);
+};
+
+struct cmap
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_cmap;
+
+ template<typename Iterator, typename EncodingRecIter,
+ hb_requires (hb_is_iterator (EncodingRecIter))>
+ void serialize (hb_serialize_context_t *c,
+ Iterator it,
+ EncodingRecIter encodingrec_iter,
+ const void *base,
+ const hb_subset_plan_t *plan)
+ {
+ if (unlikely (!c->extend_min ((*this)))) return;
+ this->version = 0;
+
+ unsigned format4objidx = 0, format12objidx = 0, format14objidx = 0;
+
+ for (const EncodingRecord& _ : encodingrec_iter)
+ {
+ unsigned format = (base+_.subtable).u.format;
+ if (!plan->glyphs_requested->is_empty ())
+ {
+ hb_set_t unicodes_set;
+ hb_map_t cp_glyphid_map;
+ (base+_.subtable).collect_mapping (&unicodes_set, &cp_glyphid_map);
+
+ auto table_iter =
+ + hb_zip (unicodes_set.iter(), unicodes_set.iter() | hb_map(cp_glyphid_map))
+ | hb_filter (plan->_glyphset, hb_second)
+ | hb_filter ([plan] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t>& p)
+ {
+ return plan->unicodes->has (p.first) ||
+ plan->glyphs_requested->has (p.second);
+ })
+ | hb_map ([plan] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t>& p_org)
+ {
+ return hb_pair_t<hb_codepoint_t, hb_codepoint_t> (p_org.first, plan->glyph_map->get(p_org.second));
+ })
+ ;
+
+ if (format == 4) c->copy (_, table_iter, 4u, base, plan, &format4objidx);
+ else if (format == 12) c->copy (_, table_iter, 12u, base, plan, &format12objidx);
+ else if (format == 14) c->copy (_, table_iter, 14u, base, plan, &format14objidx);
+ }
+ /* when --gids option is not used, we iterate input unicodes instead of
+ * all codepoints in each subtable, which is more efficient */
+ else
+ {
+ hb_set_t unicodes_set;
+ (base+_.subtable).collect_unicodes (&unicodes_set);
+
+ if (format == 4) c->copy (_, + it | hb_filter (unicodes_set, hb_first), 4u, base, plan, &format4objidx);
+ else if (format == 12) c->copy (_, + it | hb_filter (unicodes_set, hb_first), 12u, base, plan, &format12objidx);
+ else if (format == 14) c->copy (_, it, 14u, base, plan, &format14objidx);
+ }
+ }
+
+ c->check_assign(this->encodingRecord.len, (c->length () - cmap::min_size)/EncodingRecord::static_size);
+ }
+
+ void closure_glyphs (const hb_set_t *unicodes,
+ hb_set_t *glyphset) const
+ {
+ + hb_iter (encodingRecord)
+ | hb_map (&EncodingRecord::subtable)
+ | hb_map (hb_add (this))
+ | hb_filter ([&] (const CmapSubtable& _) { return _.u.format == 14; })
+ | hb_apply ([=] (const CmapSubtable& _) { _.u.format14.closure_glyphs (unicodes, glyphset); })
+ ;
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+
+ cmap *cmap_prime = c->serializer->start_embed<cmap> ();
+ if (unlikely (!c->serializer->check_success (cmap_prime))) return_trace (false);
+
+ auto encodingrec_iter =
+ + hb_iter (encodingRecord)
+ | hb_filter ([&] (const EncodingRecord& _)
+ {
+ if ((_.platformID == 0 && _.encodingID == 3) ||
+ (_.platformID == 0 && _.encodingID == 4) ||
+ (_.platformID == 3 && _.encodingID == 1) ||
+ (_.platformID == 3 && _.encodingID == 10) ||
+ (this + _.subtable).u.format == 14)
+ return true;
+
+ return false;
+ })
+ ;
+
+ if (unlikely (!encodingrec_iter.len ())) return_trace (false);
+
+ const EncodingRecord *unicode_bmp= nullptr, *unicode_ucs4 = nullptr, *ms_bmp = nullptr, *ms_ucs4 = nullptr;
+ bool has_format12 = false;
+
+ for (const EncodingRecord& _ : encodingrec_iter)
+ {
+ unsigned format = (this + _.subtable).u.format;
+ if (format == 12) has_format12 = true;
+
+ const EncodingRecord *table = hb_addressof (_);
+ if (_.platformID == 0 && _.encodingID == 3) unicode_bmp = table;
+ else if (_.platformID == 0 && _.encodingID == 4) unicode_ucs4 = table;
+ else if (_.platformID == 3 && _.encodingID == 1) ms_bmp = table;
+ else if (_.platformID == 3 && _.encodingID == 10) ms_ucs4 = table;
+ }
+
+ if (unlikely (!has_format12 && !unicode_bmp && !ms_bmp)) return_trace (false);
+ if (unlikely (has_format12 && (!unicode_ucs4 && !ms_ucs4))) return_trace (false);
+
+ auto it =
+ + hb_iter (c->plan->unicodes)
+ | hb_map ([&] (hb_codepoint_t _)
+ {
+ hb_codepoint_t new_gid = HB_MAP_VALUE_INVALID;
+ c->plan->new_gid_for_codepoint (_, &new_gid);
+ return hb_pair_t<hb_codepoint_t, hb_codepoint_t> (_, new_gid);
+ })
+ | hb_filter ([&] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t> _)
+ { return (_.second != HB_MAP_VALUE_INVALID); })
+ ;
+ cmap_prime->serialize (c->serializer, it, encodingrec_iter, this, c->plan);
+ return_trace (true);
+ }
+
+ const CmapSubtable *find_best_subtable (bool *symbol = nullptr) const
+ {
+ if (symbol) *symbol = false;
+
+ const CmapSubtable *subtable;
+
+ /* Symbol subtable.
+ * Prefer symbol if available.
+ * https://github.com/harfbuzz/harfbuzz/issues/1918 */
+ if ((subtable = this->find_subtable (3, 0)))
+ {
+ if (symbol) *symbol = true;
+ return subtable;
+ }
+
+ /* 32-bit subtables. */
+ if ((subtable = this->find_subtable (3, 10))) return subtable;
+ if ((subtable = this->find_subtable (0, 6))) return subtable;
+ if ((subtable = this->find_subtable (0, 4))) return subtable;
+
+ /* 16-bit subtables. */
+ if ((subtable = this->find_subtable (3, 1))) return subtable;
+ if ((subtable = this->find_subtable (0, 3))) return subtable;
+ if ((subtable = this->find_subtable (0, 2))) return subtable;
+ if ((subtable = this->find_subtable (0, 1))) return subtable;
+ if ((subtable = this->find_subtable (0, 0))) return subtable;
+
+ /* Meh. */
+ return &Null (CmapSubtable);
+ }
+
+ struct accelerator_t
+ {
+ void init (hb_face_t *face)
+ {
+ this->table = hb_sanitize_context_t ().reference_table<cmap> (face);
+ bool symbol;
+ this->subtable = table->find_best_subtable (&symbol);
+ this->subtable_uvs = &Null (CmapSubtableFormat14);
+ {
+ const CmapSubtable *st = table->find_subtable (0, 5);
+ if (st && st->u.format == 14)
+ subtable_uvs = &st->u.format14;
+ }
+
+ this->get_glyph_data = subtable;
+ if (unlikely (symbol))
+ this->get_glyph_funcZ = get_glyph_from_symbol<CmapSubtable>;
+ else
+ {
+ switch (subtable->u.format) {
+ /* Accelerate format 4 and format 12. */
+ default:
+ this->get_glyph_funcZ = get_glyph_from<CmapSubtable>;
+ break;
+ case 12:
+ this->get_glyph_funcZ = get_glyph_from<CmapSubtableFormat12>;
+ break;
+ case 4:
+ {
+ this->format4_accel.init (&subtable->u.format4);
+ this->get_glyph_data = &this->format4_accel;
+ this->get_glyph_funcZ = this->format4_accel.get_glyph_func;
+ break;
+ }
+ }
+ }
+ }
+
+ void fini () { this->table.destroy (); }
+
+ bool get_nominal_glyph (hb_codepoint_t unicode,
+ hb_codepoint_t *glyph) const
+ {
+ if (unlikely (!this->get_glyph_funcZ)) return false;
+ return this->get_glyph_funcZ (this->get_glyph_data, unicode, glyph);
+ }
+ unsigned int get_nominal_glyphs (unsigned int count,
+ const hb_codepoint_t *first_unicode,
+ unsigned int unicode_stride,
+ hb_codepoint_t *first_glyph,
+ unsigned int glyph_stride) const
+ {
+ if (unlikely (!this->get_glyph_funcZ)) return 0;
+
+ hb_cmap_get_glyph_func_t get_glyph_funcZ = this->get_glyph_funcZ;
+ const void *get_glyph_data = this->get_glyph_data;
+
+ unsigned int done;
+ for (done = 0;
+ done < count && get_glyph_funcZ (get_glyph_data, *first_unicode, first_glyph);
+ done++)
+ {
+ first_unicode = &StructAtOffsetUnaligned<hb_codepoint_t> (first_unicode, unicode_stride);
+ first_glyph = &StructAtOffsetUnaligned<hb_codepoint_t> (first_glyph, glyph_stride);
+ }
+ return done;
+ }
+
+ bool get_variation_glyph (hb_codepoint_t unicode,
+ hb_codepoint_t variation_selector,
+ hb_codepoint_t *glyph) const
+ {
+ switch (this->subtable_uvs->get_glyph_variant (unicode,
+ variation_selector,
+ glyph))
+ {
+ case GLYPH_VARIANT_NOT_FOUND: return false;
+ case GLYPH_VARIANT_FOUND: return true;
+ case GLYPH_VARIANT_USE_DEFAULT: break;
+ }
+
+ return get_nominal_glyph (unicode, glyph);
+ }
+
+ void collect_unicodes (hb_set_t *out, unsigned int num_glyphs) const
+ { subtable->collect_unicodes (out, num_glyphs); }
+ void collect_mapping (hb_set_t *unicodes, hb_map_t *mapping,
+ unsigned num_glyphs = UINT_MAX) const
+ { subtable->collect_mapping (unicodes, mapping, num_glyphs); }
+ void collect_variation_selectors (hb_set_t *out) const
+ { subtable_uvs->collect_variation_selectors (out); }
+ void collect_variation_unicodes (hb_codepoint_t variation_selector,
+ hb_set_t *out) const
+ { subtable_uvs->collect_variation_unicodes (variation_selector, out); }
+
+ protected:
+ typedef bool (*hb_cmap_get_glyph_func_t) (const void *obj,
+ hb_codepoint_t codepoint,
+ hb_codepoint_t *glyph);
+
+ template <typename Type>
+ HB_INTERNAL static bool get_glyph_from (const void *obj,
+ hb_codepoint_t codepoint,
+ hb_codepoint_t *glyph)
+ {
+ const Type *typed_obj = (const Type *) obj;
+ return typed_obj->get_glyph (codepoint, glyph);
+ }
+
+ template <typename Type>
+ HB_INTERNAL static bool get_glyph_from_symbol (const void *obj,
+ hb_codepoint_t codepoint,
+ hb_codepoint_t *glyph)
+ {
+ const Type *typed_obj = (const Type *) obj;
+ if (likely (typed_obj->get_glyph (codepoint, glyph)))
+ return true;
+
+ if (codepoint <= 0x00FFu)
+ {
+ /* For symbol-encoded OpenType fonts, we duplicate the
+ * U+F000..F0FF range at U+0000..U+00FF. That's what
+ * Windows seems to do, and that's hinted about at:
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/recom
+ * under "Non-Standard (Symbol) Fonts". */
+ return typed_obj->get_glyph (0xF000u + codepoint, glyph);
+ }
+
+ return false;
+ }
+
+ private:
+ hb_nonnull_ptr_t<const CmapSubtable> subtable;
+ hb_nonnull_ptr_t<const CmapSubtableFormat14> subtable_uvs;
+
+ hb_cmap_get_glyph_func_t get_glyph_funcZ;
+ const void *get_glyph_data;
+
+ CmapSubtableFormat4::accelerator_t format4_accel;
+
+ public:
+ hb_blob_ptr_t<cmap> table;
+ };
+
+ protected:
+
+ const CmapSubtable *find_subtable (unsigned int platform_id,
+ unsigned int encoding_id) const
+ {
+ EncodingRecord key;
+ key.platformID = platform_id;
+ key.encodingID = encoding_id;
+
+ const EncodingRecord &result = encodingRecord.bsearch (key);
+ if (!result.subtable)
+ return nullptr;
+
+ return &(this+result.subtable);
+ }
+
+ const EncodingRecord *find_encodingrec (unsigned int platform_id,
+ unsigned int encoding_id) const
+ {
+ EncodingRecord key;
+ key.platformID = platform_id;
+ key.encodingID = encoding_id;
+
+ return encodingRecord.as_array ().bsearch (key);
+ }
+
+ bool find_subtable (unsigned format) const
+ {
+ auto it =
+ + hb_iter (encodingRecord)
+ | hb_map (&EncodingRecord::subtable)
+ | hb_map (hb_add (this))
+ | hb_filter ([&] (const CmapSubtable& _) { return _.u.format == format; })
+ ;
+
+ return it.len ();
+ }
+
+ public:
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ likely (version == 0) &&
+ encodingRecord.sanitize (c, this));
+ }
+
+ protected:
+ HBUINT16 version; /* Table version number (0). */
+ SortedArrayOf<EncodingRecord>
+ encodingRecord; /* Encoding tables. */
+ public:
+ DEFINE_SIZE_ARRAY (4, encodingRecord);
+};
+
+struct cmap_accelerator_t : cmap::accelerator_t {};
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_CMAP_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-color-cbdt-table.hh b/thirdparty/harfbuzz/src/hb-ot-color-cbdt-table.hh
new file mode 100644
index 0000000000..aaa1c37c64
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-color-cbdt-table.hh
@@ -0,0 +1,985 @@
+/*
+ * Copyright © 2016 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Seigo Nonaka, Calder Kitagawa
+ */
+
+#ifndef HB_OT_COLOR_CBDT_TABLE_HH
+#define HB_OT_COLOR_CBDT_TABLE_HH
+
+#include "hb-open-type.hh"
+
+/*
+ * CBLC -- Color Bitmap Location
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/cblc
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/eblc
+ * CBDT -- Color Bitmap Data
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/cbdt
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/ebdt
+ */
+#define HB_OT_TAG_CBLC HB_TAG('C','B','L','C')
+#define HB_OT_TAG_CBDT HB_TAG('C','B','D','T')
+
+
+namespace OT {
+
+struct cblc_bitmap_size_subset_context_t
+{
+ const char *cbdt;
+ unsigned int cbdt_length;
+ hb_vector_t<char> *cbdt_prime;
+ unsigned int size; /* INOUT
+ * Input: old size of IndexSubtable
+ * Output: new size of IndexSubtable
+ */
+ unsigned int num_tables; /* INOUT
+ * Input: old number of subtables.
+ * Output: new number of subtables.
+ */
+ hb_codepoint_t start_glyph; /* OUT */
+ hb_codepoint_t end_glyph; /* OUT */
+};
+
+static inline bool
+_copy_data_to_cbdt (hb_vector_t<char> *cbdt_prime,
+ const void *data,
+ unsigned length)
+{
+ unsigned int new_len = cbdt_prime->length + length;
+ if (unlikely (!cbdt_prime->alloc (new_len))) return false;
+ memcpy (cbdt_prime->arrayZ + cbdt_prime->length, data, length);
+ cbdt_prime->length = new_len;
+ return true;
+}
+
+struct SmallGlyphMetrics
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ void get_extents (hb_font_t *font, hb_glyph_extents_t *extents) const
+ {
+ extents->x_bearing = font->em_scale_x (bearingX);
+ extents->y_bearing = font->em_scale_y (bearingY);
+ extents->width = font->em_scale_x (width);
+ extents->height = font->em_scale_y (-static_cast<int>(height));
+ }
+
+ HBUINT8 height;
+ HBUINT8 width;
+ HBINT8 bearingX;
+ HBINT8 bearingY;
+ HBUINT8 advance;
+ public:
+ DEFINE_SIZE_STATIC (5);
+};
+
+struct BigGlyphMetrics : SmallGlyphMetrics
+{
+ HBINT8 vertBearingX;
+ HBINT8 vertBearingY;
+ HBUINT8 vertAdvance;
+ public:
+ DEFINE_SIZE_STATIC (8);
+};
+
+struct SBitLineMetrics
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ HBINT8 ascender;
+ HBINT8 decender;
+ HBUINT8 widthMax;
+ HBINT8 caretSlopeNumerator;
+ HBINT8 caretSlopeDenominator;
+ HBINT8 caretOffset;
+ HBINT8 minOriginSB;
+ HBINT8 minAdvanceSB;
+ HBINT8 maxBeforeBL;
+ HBINT8 minAfterBL;
+ HBINT8 padding1;
+ HBINT8 padding2;
+ public:
+ DEFINE_SIZE_STATIC (12);
+};
+
+
+/*
+ * Index Subtables.
+ */
+
+struct IndexSubtableHeader
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ HBUINT16 indexFormat;
+ HBUINT16 imageFormat;
+ HBUINT32 imageDataOffset;
+ public:
+ DEFINE_SIZE_STATIC (8);
+};
+
+template <typename OffsetType>
+struct IndexSubtableFormat1Or3
+{
+ bool sanitize (hb_sanitize_context_t *c, unsigned int glyph_count) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ offsetArrayZ.sanitize (c, glyph_count + 1));
+ }
+
+ bool get_image_data (unsigned int idx,
+ unsigned int *offset,
+ unsigned int *length) const
+ {
+ if (unlikely (offsetArrayZ[idx + 1] <= offsetArrayZ[idx]))
+ return false;
+
+ *offset = header.imageDataOffset + offsetArrayZ[idx];
+ *length = offsetArrayZ[idx + 1] - offsetArrayZ[idx];
+ return true;
+ }
+
+ bool add_offset (hb_serialize_context_t *c,
+ unsigned int offset,
+ unsigned int *size /* OUT (accumulated) */)
+ {
+ TRACE_SERIALIZE (this);
+ Offset<OffsetType> embedded_offset;
+ embedded_offset = offset;
+ *size += sizeof (OffsetType);
+ auto *o = c->embed (embedded_offset);
+ return_trace ((bool) o);
+ }
+
+ IndexSubtableHeader header;
+ UnsizedArrayOf<Offset<OffsetType>>
+ offsetArrayZ;
+ public:
+ DEFINE_SIZE_ARRAY (8, offsetArrayZ);
+};
+
+struct IndexSubtableFormat1 : IndexSubtableFormat1Or3<HBUINT32> {};
+struct IndexSubtableFormat3 : IndexSubtableFormat1Or3<HBUINT16> {};
+
+struct IndexSubtable
+{
+ bool sanitize (hb_sanitize_context_t *c, unsigned int glyph_count) const
+ {
+ TRACE_SANITIZE (this);
+ if (!u.header.sanitize (c)) return_trace (false);
+ switch (u.header.indexFormat)
+ {
+ case 1: return_trace (u.format1.sanitize (c, glyph_count));
+ case 3: return_trace (u.format3.sanitize (c, glyph_count));
+ default:return_trace (true);
+ }
+ }
+
+ bool
+ finish_subtable (hb_serialize_context_t *c,
+ unsigned int cbdt_prime_len,
+ unsigned int num_glyphs,
+ unsigned int *size /* OUT (accumulated) */)
+ {
+ TRACE_SERIALIZE (this);
+
+ unsigned int local_offset = cbdt_prime_len - u.header.imageDataOffset;
+ switch (u.header.indexFormat)
+ {
+ case 1: return_trace (u.format1.add_offset (c, local_offset, size));
+ case 3: {
+ if (!u.format3.add_offset (c, local_offset, size))
+ return_trace (false);
+ if (!(num_glyphs & 0x01)) // Pad to 32-bit alignment if needed.
+ return_trace (u.format3.add_offset (c, 0, size));
+ return_trace (true);
+ }
+ // TODO: implement 2, 4, 5.
+ case 2: case 4: // No-op.
+ case 5: // Pad to 32-bit aligned.
+ default: return_trace (false);
+ }
+ }
+
+ bool
+ fill_missing_glyphs (hb_serialize_context_t *c,
+ unsigned int cbdt_prime_len,
+ unsigned int num_missing,
+ unsigned int *size /* OUT (accumulated) */,
+ unsigned int *num_glyphs /* OUT (accumulated) */)
+ {
+ TRACE_SERIALIZE (this);
+
+ unsigned int local_offset = cbdt_prime_len - u.header.imageDataOffset;
+ switch (u.header.indexFormat)
+ {
+ case 1: {
+ for (unsigned int i = 0; i < num_missing; i++)
+ {
+ if (unlikely (!u.format1.add_offset (c, local_offset, size)))
+ return_trace (false);
+ *num_glyphs += 1;
+ }
+ return_trace (true);
+ }
+ case 3: {
+ for (unsigned int i = 0; i < num_missing; i++)
+ {
+ if (unlikely (!u.format3.add_offset (c, local_offset, size)))
+ return_trace (false);
+ *num_glyphs += 1;
+ }
+ return_trace (true);
+ }
+ // TODO: implement 2, 4, 5.
+ case 2: // Add empty space in cbdt_prime?.
+ case 4: case 5: // No-op as sparse is supported.
+ default: return_trace (false);
+ }
+ }
+
+ bool
+ copy_glyph_at_idx (hb_serialize_context_t *c, unsigned int idx,
+ const char *cbdt, unsigned int cbdt_length,
+ hb_vector_t<char> *cbdt_prime /* INOUT */,
+ IndexSubtable *subtable_prime /* INOUT */,
+ unsigned int *size /* OUT (accumulated) */) const
+ {
+ TRACE_SERIALIZE (this);
+
+ unsigned int offset, length, format;
+ if (unlikely (!get_image_data (idx, &offset, &length, &format))) return_trace (false);
+ if (unlikely (offset > cbdt_length || cbdt_length - offset < length)) return_trace (false);
+
+ auto *header_prime = subtable_prime->get_header ();
+ unsigned int new_local_offset = cbdt_prime->length - (unsigned int) header_prime->imageDataOffset;
+ if (unlikely (!_copy_data_to_cbdt (cbdt_prime, cbdt + offset, length))) return_trace (false);
+
+ return_trace (subtable_prime->add_offset (c, new_local_offset, size));
+ }
+
+ bool
+ add_offset (hb_serialize_context_t *c, unsigned int local_offset,
+ unsigned int *size /* OUT (accumulated) */)
+ {
+ TRACE_SERIALIZE (this);
+ switch (u.header.indexFormat)
+ {
+ case 1: return_trace (u.format1.add_offset (c, local_offset, size));
+ case 3: return_trace (u.format3.add_offset (c, local_offset, size));
+ // TODO: Implement tables 2, 4, 5
+ case 2: // Should be a no-op.
+ case 4: case 5: // Handle sparse cases.
+ default: return_trace (false);
+ }
+ }
+
+ bool get_extents (hb_glyph_extents_t *extents HB_UNUSED) const
+ {
+ switch (u.header.indexFormat)
+ {
+ case 2: case 5: /* TODO */
+ case 1: case 3: case 4: /* Variable-metrics formats do not have metrics here. */
+ default:return (false);
+ }
+ }
+
+ bool
+ get_image_data (unsigned int idx, unsigned int *offset,
+ unsigned int *length, unsigned int *format) const
+ {
+ *format = u.header.imageFormat;
+ switch (u.header.indexFormat)
+ {
+ case 1: return u.format1.get_image_data (idx, offset, length);
+ case 3: return u.format3.get_image_data (idx, offset, length);
+ default: return false;
+ }
+ }
+
+ const IndexSubtableHeader* get_header () const { return &u.header; }
+
+ void populate_header (unsigned index_format,
+ unsigned image_format,
+ unsigned int image_data_offset,
+ unsigned int *size)
+ {
+ u.header.indexFormat = index_format;
+ u.header.imageFormat = image_format;
+ u.header.imageDataOffset = image_data_offset;
+ switch (u.header.indexFormat)
+ {
+ case 1: *size += IndexSubtableFormat1::min_size; break;
+ case 3: *size += IndexSubtableFormat3::min_size; break;
+ }
+ }
+
+ protected:
+ union {
+ IndexSubtableHeader header;
+ IndexSubtableFormat1 format1;
+ IndexSubtableFormat3 format3;
+ /* TODO: Format 2, 4, 5. */
+ } u;
+ public:
+ DEFINE_SIZE_UNION (8, header);
+};
+
+struct IndexSubtableRecord
+{
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ firstGlyphIndex <= lastGlyphIndex &&
+ offsetToSubtable.sanitize (c, base, lastGlyphIndex - firstGlyphIndex + 1));
+ }
+
+ const IndexSubtable* get_subtable (const void *base) const
+ {
+ return &(base+offsetToSubtable);
+ }
+
+ bool add_new_subtable (hb_subset_context_t* c,
+ cblc_bitmap_size_subset_context_t *bitmap_size_context,
+ IndexSubtableRecord *record,
+ const hb_vector_t<hb_pair_t<hb_codepoint_t, const IndexSubtableRecord*>> *lookup, /* IN */
+ const void *base,
+ unsigned int *start /* INOUT */) const
+ {
+ TRACE_SERIALIZE (this);
+
+ auto *subtable = c->serializer->start_embed<IndexSubtable> ();
+ if (unlikely (!subtable)) return_trace (false);
+ if (unlikely (!c->serializer->extend_min (subtable))) return_trace (false);
+
+ auto *old_subtable = get_subtable (base);
+ auto *old_header = old_subtable->get_header ();
+
+ subtable->populate_header (old_header->indexFormat,
+ old_header->imageFormat,
+ bitmap_size_context->cbdt_prime->length,
+ &bitmap_size_context->size);
+
+ unsigned int num_glyphs = 0;
+ bool early_exit = false;
+ for (unsigned int i = *start; i < lookup->length; i++)
+ {
+ hb_codepoint_t new_gid = (*lookup)[i].first;
+ const IndexSubtableRecord *next_record = (*lookup)[i].second;
+ const IndexSubtable *next_subtable = next_record->get_subtable (base);
+ auto *next_header = next_subtable->get_header ();
+ if (next_header != old_header)
+ {
+ *start = i;
+ early_exit = true;
+ break;
+ }
+ unsigned int num_missing = record->add_glyph_for_subset (new_gid);
+ if (unlikely (!subtable->fill_missing_glyphs (c->serializer,
+ bitmap_size_context->cbdt_prime->length,
+ num_missing,
+ &bitmap_size_context->size,
+ &num_glyphs)))
+ return_trace (false);
+
+ hb_codepoint_t old_gid = 0;
+ c->plan->old_gid_for_new_gid (new_gid, &old_gid);
+ if (old_gid < next_record->firstGlyphIndex)
+ return_trace (false);
+
+ unsigned int old_idx = (unsigned int) old_gid - next_record->firstGlyphIndex;
+ if (unlikely (!next_subtable->copy_glyph_at_idx (c->serializer,
+ old_idx,
+ bitmap_size_context->cbdt,
+ bitmap_size_context->cbdt_length,
+ bitmap_size_context->cbdt_prime,
+ subtable,
+ &bitmap_size_context->size)))
+ return_trace (false);
+ num_glyphs += 1;
+ }
+ if (!early_exit)
+ *start = lookup->length;
+ if (unlikely (!subtable->finish_subtable (c->serializer,
+ bitmap_size_context->cbdt_prime->length,
+ num_glyphs,
+ &bitmap_size_context->size)))
+ return_trace (false);
+ return_trace (true);
+ }
+
+ bool add_new_record (hb_subset_context_t *c,
+ cblc_bitmap_size_subset_context_t *bitmap_size_context,
+ const hb_vector_t<hb_pair_t<hb_codepoint_t, const IndexSubtableRecord*>> *lookup, /* IN */
+ const void *base,
+ unsigned int *start, /* INOUT */
+ hb_vector_t<IndexSubtableRecord>* records /* INOUT */) const
+ {
+ TRACE_SERIALIZE (this);
+ auto snap = c->serializer->snapshot ();
+ unsigned int old_size = bitmap_size_context->size;
+ unsigned int old_cbdt_prime_length = bitmap_size_context->cbdt_prime->length;
+
+ // Set to invalid state to indicate filling glyphs is not yet started.
+ if (unlikely (!records->resize (records->length + 1)))
+ return_trace (c->serializer->check_success (false));
+
+ (*records)[records->length - 1].firstGlyphIndex = 1;
+ (*records)[records->length - 1].lastGlyphIndex = 0;
+ bitmap_size_context->size += IndexSubtableRecord::min_size;
+
+ c->serializer->push ();
+
+ if (unlikely (!add_new_subtable (c, bitmap_size_context, &((*records)[records->length - 1]), lookup, base, start)))
+ {
+ c->serializer->pop_discard ();
+ c->serializer->revert (snap);
+ bitmap_size_context->cbdt_prime->shrink (old_cbdt_prime_length);
+ bitmap_size_context->size = old_size;
+ records->resize (records->length - 1);
+ return_trace (false);
+ }
+
+ bitmap_size_context->num_tables += 1;
+ return_trace (true);
+ }
+
+ unsigned int add_glyph_for_subset (hb_codepoint_t gid)
+ {
+ if (firstGlyphIndex > lastGlyphIndex)
+ {
+ firstGlyphIndex = gid;
+ lastGlyphIndex = gid;
+ return 0;
+ }
+ // TODO maybe assert? this shouldn't occur.
+ if (lastGlyphIndex > gid)
+ return 0;
+ unsigned int num_missing = (unsigned int) (gid - lastGlyphIndex - 1);
+ lastGlyphIndex = gid;
+ return num_missing;
+ }
+
+ bool get_extents (hb_glyph_extents_t *extents, const void *base) const
+ { return (base+offsetToSubtable).get_extents (extents); }
+
+ bool get_image_data (unsigned int gid,
+ const void *base,
+ unsigned int *offset,
+ unsigned int *length,
+ unsigned int *format) const
+ {
+ if (gid < firstGlyphIndex || gid > lastGlyphIndex) return false;
+ return (base+offsetToSubtable).get_image_data (gid - firstGlyphIndex,
+ offset, length, format);
+ }
+
+ HBGlyphID firstGlyphIndex;
+ HBGlyphID lastGlyphIndex;
+ LOffsetTo<IndexSubtable> offsetToSubtable;
+ public:
+ DEFINE_SIZE_STATIC (8);
+};
+
+struct IndexSubtableArray
+{
+ friend struct CBDT;
+
+ bool sanitize (hb_sanitize_context_t *c, unsigned int count) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (indexSubtablesZ.sanitize (c, count, this));
+ }
+
+ void
+ build_lookup (hb_subset_context_t *c, cblc_bitmap_size_subset_context_t *bitmap_size_context,
+ hb_vector_t<hb_pair_t<hb_codepoint_t,
+ const IndexSubtableRecord*>> *lookup /* OUT */) const
+ {
+ bool start_glyph_is_set = false;
+ for (hb_codepoint_t new_gid = 0; new_gid < c->plan->num_output_glyphs (); new_gid++)
+ {
+ hb_codepoint_t old_gid;
+ if (unlikely (!c->plan->old_gid_for_new_gid (new_gid, &old_gid))) continue;
+
+ const IndexSubtableRecord* record = find_table (old_gid, bitmap_size_context->num_tables);
+ if (unlikely (!record)) continue;
+
+ // Don't add gaps to the lookup. The best way to determine if a glyph is a
+ // gap is that it has no image data.
+ unsigned int offset, length, format;
+ if (unlikely (!record->get_image_data (old_gid, this, &offset, &length, &format))) continue;
+
+ lookup->push (hb_pair_t<hb_codepoint_t, const IndexSubtableRecord*> (new_gid, record));
+
+ if (!start_glyph_is_set)
+ {
+ bitmap_size_context->start_glyph = new_gid;
+ start_glyph_is_set = true;
+ }
+
+ bitmap_size_context->end_glyph = new_gid;
+ }
+ }
+
+ bool
+ subset (hb_subset_context_t *c,
+ cblc_bitmap_size_subset_context_t *bitmap_size_context) const
+ {
+ TRACE_SUBSET (this);
+
+ auto *dst = c->serializer->start_embed<IndexSubtableArray> ();
+ if (unlikely (!dst)) return_trace (false);
+
+ hb_vector_t<hb_pair_t<hb_codepoint_t, const IndexSubtableRecord*>> lookup;
+ build_lookup (c, bitmap_size_context, &lookup);
+ if (unlikely (lookup.in_error ()))
+ return c->serializer->check_success (false);
+
+ bitmap_size_context->size = 0;
+ bitmap_size_context->num_tables = 0;
+ hb_vector_t<IndexSubtableRecord> records;
+ for (unsigned int start = 0; start < lookup.length;)
+ {
+ if (unlikely (!lookup[start].second->add_new_record (c, bitmap_size_context, &lookup, this, &start, &records)))
+ {
+ // Discard any leftover pushes to the serializer from successful records.
+ for (unsigned int i = 0; i < records.length; i++)
+ c->serializer->pop_discard ();
+ return_trace (false);
+ }
+ }
+
+ /* Workaround to ensure offset ordering is from least to greatest when
+ * resolving links. */
+ hb_vector_t<hb_serialize_context_t::objidx_t> objidxs;
+ for (unsigned int i = 0; i < records.length; i++)
+ objidxs.push (c->serializer->pop_pack ());
+ for (unsigned int i = 0; i < records.length; i++)
+ {
+ IndexSubtableRecord* record = c->serializer->embed (records[i]);
+ if (unlikely (!record)) return_trace (false);
+ c->serializer->add_link (record->offsetToSubtable, objidxs[records.length - 1 - i]);
+ }
+ return_trace (true);
+ }
+
+ public:
+ const IndexSubtableRecord* find_table (hb_codepoint_t glyph, unsigned int numTables) const
+ {
+ for (unsigned int i = 0; i < numTables; ++i)
+ {
+ unsigned int firstGlyphIndex = indexSubtablesZ[i].firstGlyphIndex;
+ unsigned int lastGlyphIndex = indexSubtablesZ[i].lastGlyphIndex;
+ if (firstGlyphIndex <= glyph && glyph <= lastGlyphIndex)
+ return &indexSubtablesZ[i];
+ }
+ return nullptr;
+ }
+
+ protected:
+ UnsizedArrayOf<IndexSubtableRecord> indexSubtablesZ;
+};
+
+struct BitmapSizeTable
+{
+ friend struct CBLC;
+ friend struct CBDT;
+
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ indexSubtableArrayOffset.sanitize (c, base, numberOfIndexSubtables) &&
+ horizontal.sanitize (c) &&
+ vertical.sanitize (c));
+ }
+
+ const IndexSubtableRecord *
+ find_table (hb_codepoint_t glyph, const void *base, const void **out_base) const
+ {
+ *out_base = &(base+indexSubtableArrayOffset);
+ return (base+indexSubtableArrayOffset).find_table (glyph, numberOfIndexSubtables);
+ }
+
+ bool
+ subset (hb_subset_context_t *c, const void *base,
+ const char *cbdt, unsigned int cbdt_length,
+ hb_vector_t<char> *cbdt_prime /* INOUT */) const
+ {
+ TRACE_SUBSET (this);
+ auto *out_table = c->serializer->embed (this);
+ if (unlikely (!out_table)) return_trace (false);
+
+ cblc_bitmap_size_subset_context_t bitmap_size_context;
+ bitmap_size_context.cbdt = cbdt;
+ bitmap_size_context.cbdt_length = cbdt_length;
+ bitmap_size_context.cbdt_prime = cbdt_prime;
+ bitmap_size_context.size = indexTablesSize;
+ bitmap_size_context.num_tables = numberOfIndexSubtables;
+ bitmap_size_context.start_glyph = 1;
+ bitmap_size_context.end_glyph = 0;
+
+ if (!out_table->indexSubtableArrayOffset.serialize_subset (c,
+ indexSubtableArrayOffset,
+ base,
+ &bitmap_size_context))
+ return_trace (false);
+ if (!bitmap_size_context.size ||
+ !bitmap_size_context.num_tables ||
+ bitmap_size_context.start_glyph > bitmap_size_context.end_glyph)
+ return_trace (false);
+
+ out_table->indexTablesSize = bitmap_size_context.size;
+ out_table->numberOfIndexSubtables = bitmap_size_context.num_tables;
+ out_table->startGlyphIndex = bitmap_size_context.start_glyph;
+ out_table->endGlyphIndex = bitmap_size_context.end_glyph;
+ return_trace (true);
+ }
+
+ protected:
+ LNNOffsetTo<IndexSubtableArray>
+ indexSubtableArrayOffset;
+ HBUINT32 indexTablesSize;
+ HBUINT32 numberOfIndexSubtables;
+ HBUINT32 colorRef;
+ SBitLineMetrics horizontal;
+ SBitLineMetrics vertical;
+ HBGlyphID startGlyphIndex;
+ HBGlyphID endGlyphIndex;
+ HBUINT8 ppemX;
+ HBUINT8 ppemY;
+ HBUINT8 bitDepth;
+ HBINT8 flags;
+ public:
+ DEFINE_SIZE_STATIC (48);
+};
+
+
+/*
+ * Glyph Bitmap Data Formats.
+ */
+
+struct GlyphBitmapDataFormat17
+{
+ SmallGlyphMetrics glyphMetrics;
+ LArrayOf<HBUINT8> data;
+ public:
+ DEFINE_SIZE_ARRAY (9, data);
+};
+
+struct GlyphBitmapDataFormat18
+{
+ BigGlyphMetrics glyphMetrics;
+ LArrayOf<HBUINT8> data;
+ public:
+ DEFINE_SIZE_ARRAY (12, data);
+};
+
+struct GlyphBitmapDataFormat19
+{
+ LArrayOf<HBUINT8> data;
+ public:
+ DEFINE_SIZE_ARRAY (4, data);
+};
+
+struct CBLC
+{
+ friend struct CBDT;
+
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_CBLC;
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ likely (version.major == 2 || version.major == 3) &&
+ sizeTables.sanitize (c, this));
+ }
+
+ static bool
+ sink_cbdt (hb_subset_context_t *c, hb_vector_t<char>* cbdt_prime)
+ {
+ hb_blob_t *cbdt_prime_blob = hb_blob_create (cbdt_prime->arrayZ,
+ cbdt_prime->length,
+ HB_MEMORY_MODE_WRITABLE,
+ cbdt_prime->arrayZ,
+ free);
+ cbdt_prime->init (); // Leak arrayZ to the blob.
+ bool ret = c->plan->add_table (HB_OT_TAG_CBDT, cbdt_prime_blob);
+ hb_blob_destroy (cbdt_prime_blob);
+ return ret;
+ }
+
+ bool
+ subset_size_table (hb_subset_context_t *c, const BitmapSizeTable& table,
+ const char *cbdt /* IN */, unsigned int cbdt_length,
+ CBLC *cblc_prime /* INOUT */, hb_vector_t<char> *cbdt_prime /* INOUT */) const
+ {
+ TRACE_SUBSET (this);
+ cblc_prime->sizeTables.len++;
+
+ auto snap = c->serializer->snapshot ();
+ auto cbdt_prime_len = cbdt_prime->length;
+
+ if (!table.subset (c, this, cbdt, cbdt_length, cbdt_prime))
+ {
+ cblc_prime->sizeTables.len--;
+ c->serializer->revert (snap);
+ cbdt_prime->shrink (cbdt_prime_len);
+ return_trace (false);
+ }
+ return_trace (true);
+ }
+
+ // Implemented in cc file as it depends on definition of CBDT.
+ HB_INTERNAL bool subset (hb_subset_context_t *c) const;
+
+ protected:
+ const BitmapSizeTable &choose_strike (hb_font_t *font) const
+ {
+ unsigned count = sizeTables.len;
+ if (unlikely (!count))
+ return Null (BitmapSizeTable);
+
+ unsigned int requested_ppem = hb_max (font->x_ppem, font->y_ppem);
+ if (!requested_ppem)
+ requested_ppem = 1<<30; /* Choose largest strike. */
+ unsigned int best_i = 0;
+ unsigned int best_ppem = hb_max (sizeTables[0].ppemX, sizeTables[0].ppemY);
+
+ for (unsigned int i = 1; i < count; i++)
+ {
+ unsigned int ppem = hb_max (sizeTables[i].ppemX, sizeTables[i].ppemY);
+ if ((requested_ppem <= ppem && ppem < best_ppem) ||
+ (requested_ppem > best_ppem && ppem > best_ppem))
+ {
+ best_i = i;
+ best_ppem = ppem;
+ }
+ }
+
+ return sizeTables[best_i];
+ }
+
+ protected:
+ FixedVersion<> version;
+ LArrayOf<BitmapSizeTable> sizeTables;
+ public:
+ DEFINE_SIZE_ARRAY (8, sizeTables);
+};
+
+struct CBDT
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_CBDT;
+
+ struct accelerator_t
+ {
+ void init (hb_face_t *face)
+ {
+ cblc = hb_sanitize_context_t ().reference_table<CBLC> (face);
+ cbdt = hb_sanitize_context_t ().reference_table<CBDT> (face);
+
+ upem = hb_face_get_upem (face);
+ }
+
+ void fini ()
+ {
+ this->cblc.destroy ();
+ this->cbdt.destroy ();
+ }
+
+ bool
+ get_extents (hb_font_t *font, hb_codepoint_t glyph, hb_glyph_extents_t *extents) const
+ {
+ const void *base;
+ const BitmapSizeTable &strike = this->cblc->choose_strike (font);
+ const IndexSubtableRecord *subtable_record = strike.find_table (glyph, cblc, &base);
+ if (!subtable_record || !strike.ppemX || !strike.ppemY)
+ return false;
+
+ if (subtable_record->get_extents (extents, base))
+ return true;
+
+ unsigned int image_offset = 0, image_length = 0, image_format = 0;
+ if (!subtable_record->get_image_data (glyph, base, &image_offset, &image_length, &image_format))
+ return false;
+
+ unsigned int cbdt_len = cbdt.get_length ();
+ if (unlikely (image_offset > cbdt_len || cbdt_len - image_offset < image_length))
+ return false;
+
+ switch (image_format)
+ {
+ case 17: {
+ if (unlikely (image_length < GlyphBitmapDataFormat17::min_size))
+ return false;
+ auto &glyphFormat17 = StructAtOffset<GlyphBitmapDataFormat17> (this->cbdt, image_offset);
+ glyphFormat17.glyphMetrics.get_extents (font, extents);
+ break;
+ }
+ case 18: {
+ if (unlikely (image_length < GlyphBitmapDataFormat18::min_size))
+ return false;
+ auto &glyphFormat18 = StructAtOffset<GlyphBitmapDataFormat18> (this->cbdt, image_offset);
+ glyphFormat18.glyphMetrics.get_extents (font, extents);
+ break;
+ }
+ default: return false; /* TODO: Support other image formats. */
+ }
+
+ /* Convert to font units. */
+ float x_scale = upem / (float) strike.ppemX;
+ float y_scale = upem / (float) strike.ppemY;
+ extents->x_bearing = roundf (extents->x_bearing * x_scale);
+ extents->y_bearing = roundf (extents->y_bearing * y_scale);
+ extents->width = roundf (extents->width * x_scale);
+ extents->height = roundf (extents->height * y_scale);
+
+ return true;
+ }
+
+ hb_blob_t*
+ reference_png (hb_font_t *font, hb_codepoint_t glyph) const
+ {
+ const void *base;
+ const BitmapSizeTable &strike = this->cblc->choose_strike (font);
+ const IndexSubtableRecord *subtable_record = strike.find_table (glyph, cblc, &base);
+ if (!subtable_record || !strike.ppemX || !strike.ppemY)
+ return hb_blob_get_empty ();
+
+ unsigned int image_offset = 0, image_length = 0, image_format = 0;
+ if (!subtable_record->get_image_data (glyph, base, &image_offset, &image_length, &image_format))
+ return hb_blob_get_empty ();
+
+ unsigned int cbdt_len = cbdt.get_length ();
+ if (unlikely (image_offset > cbdt_len || cbdt_len - image_offset < image_length))
+ return hb_blob_get_empty ();
+
+ switch (image_format)
+ {
+ case 17:
+ {
+ if (unlikely (image_length < GlyphBitmapDataFormat17::min_size))
+ return hb_blob_get_empty ();
+ auto &glyphFormat17 = StructAtOffset<GlyphBitmapDataFormat17> (this->cbdt, image_offset);
+ return hb_blob_create_sub_blob (cbdt.get_blob (),
+ image_offset + GlyphBitmapDataFormat17::min_size,
+ glyphFormat17.data.len);
+ }
+ case 18:
+ {
+ if (unlikely (image_length < GlyphBitmapDataFormat18::min_size))
+ return hb_blob_get_empty ();
+ auto &glyphFormat18 = StructAtOffset<GlyphBitmapDataFormat18> (this->cbdt, image_offset);
+ return hb_blob_create_sub_blob (cbdt.get_blob (),
+ image_offset + GlyphBitmapDataFormat18::min_size,
+ glyphFormat18.data.len);
+ }
+ case 19:
+ {
+ if (unlikely (image_length < GlyphBitmapDataFormat19::min_size))
+ return hb_blob_get_empty ();
+ auto &glyphFormat19 = StructAtOffset<GlyphBitmapDataFormat19> (this->cbdt, image_offset);
+ return hb_blob_create_sub_blob (cbdt.get_blob (),
+ image_offset + GlyphBitmapDataFormat19::min_size,
+ glyphFormat19.data.len);
+ }
+ default: return hb_blob_get_empty (); /* TODO: Support other image formats. */
+ }
+ }
+
+ bool has_data () const { return cbdt.get_length (); }
+
+ private:
+ hb_blob_ptr_t<CBLC> cblc;
+ hb_blob_ptr_t<CBDT> cbdt;
+
+ unsigned int upem;
+ };
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ likely (version.major == 2 || version.major == 3));
+ }
+
+ protected:
+ FixedVersion<> version;
+ UnsizedArrayOf<HBUINT8> dataZ;
+ public:
+ DEFINE_SIZE_ARRAY (4, dataZ);
+};
+
+inline bool
+CBLC::subset (hb_subset_context_t *c) const
+{
+ TRACE_SUBSET (this);
+
+ auto *cblc_prime = c->serializer->start_embed<CBLC> ();
+
+ // Use a vector as a secondary buffer as the tables need to be built in parallel.
+ hb_vector_t<char> cbdt_prime;
+
+ if (unlikely (!cblc_prime)) return_trace (false);
+ if (unlikely (!c->serializer->extend_min (cblc_prime))) return_trace (false);
+ cblc_prime->version = version;
+
+ hb_blob_t* cbdt_blob = hb_sanitize_context_t ().reference_table<CBDT> (c->plan->source);
+ unsigned int cbdt_length;
+ CBDT* cbdt = (CBDT *) hb_blob_get_data (cbdt_blob, &cbdt_length);
+ if (unlikely (cbdt_length < CBDT::min_size))
+ {
+ hb_blob_destroy (cbdt_blob);
+ return_trace (false);
+ }
+ _copy_data_to_cbdt (&cbdt_prime, cbdt, CBDT::min_size);
+
+ for (const BitmapSizeTable& table : + sizeTables.iter ())
+ subset_size_table (c, table, (const char *) cbdt, cbdt_length, cblc_prime, &cbdt_prime);
+
+ hb_blob_destroy (cbdt_blob);
+
+ return_trace (CBLC::sink_cbdt (c, &cbdt_prime));
+}
+
+struct CBDT_accelerator_t : CBDT::accelerator_t {};
+
+} /* namespace OT */
+
+#endif /* HB_OT_COLOR_CBDT_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-color-colr-table.hh b/thirdparty/harfbuzz/src/hb-ot-color-colr-table.hh
new file mode 100644
index 0000000000..92a49bb4f4
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-color-colr-table.hh
@@ -0,0 +1,278 @@
+/*
+ * Copyright © 2018 Ebrahim Byagowi
+ * Copyright © 2020 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Calder Kitagawa
+ */
+
+#ifndef HB_OT_COLOR_COLR_TABLE_HH
+#define HB_OT_COLOR_COLR_TABLE_HH
+
+#include "hb-open-type.hh"
+
+/*
+ * COLR -- Color
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/colr
+ */
+#define HB_OT_TAG_COLR HB_TAG('C','O','L','R')
+
+
+namespace OT {
+
+
+struct LayerRecord
+{
+ operator hb_ot_color_layer_t () const { return {glyphId, colorIdx}; }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ public:
+ HBGlyphID glyphId; /* Glyph ID of layer glyph */
+ Index colorIdx; /* Index value to use with a
+ * selected color palette.
+ * An index value of 0xFFFF
+ * is a special case indicating
+ * that the text foreground
+ * color (defined by a
+ * higher-level client) should
+ * be used and shall not be
+ * treated as actual index
+ * into CPAL ColorRecord array. */
+ public:
+ DEFINE_SIZE_STATIC (4);
+};
+
+struct BaseGlyphRecord
+{
+ int cmp (hb_codepoint_t g) const
+ { return g < glyphId ? -1 : g > glyphId ? 1 : 0; }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this)));
+ }
+
+ public:
+ HBGlyphID glyphId; /* Glyph ID of reference glyph */
+ HBUINT16 firstLayerIdx; /* Index (from beginning of
+ * the Layer Records) to the
+ * layer record. There will be
+ * numLayers consecutive entries
+ * for this base glyph. */
+ HBUINT16 numLayers; /* Number of color layers
+ * associated with this glyph */
+ public:
+ DEFINE_SIZE_STATIC (6);
+};
+
+struct COLR
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_COLR;
+
+ bool has_data () const { return numBaseGlyphs; }
+
+ unsigned int get_glyph_layers (hb_codepoint_t glyph,
+ unsigned int start_offset,
+ unsigned int *count, /* IN/OUT. May be NULL. */
+ hb_ot_color_layer_t *layers /* OUT. May be NULL. */) const
+ {
+ const BaseGlyphRecord &record = (this+baseGlyphsZ).bsearch (numBaseGlyphs, glyph);
+
+ hb_array_t<const LayerRecord> all_layers = (this+layersZ).as_array (numLayers);
+ hb_array_t<const LayerRecord> glyph_layers = all_layers.sub_array (record.firstLayerIdx,
+ record.numLayers);
+ if (count)
+ {
+ + glyph_layers.sub_array (start_offset, count)
+ | hb_sink (hb_array (layers, *count))
+ ;
+ }
+ return glyph_layers.length;
+ }
+
+ struct accelerator_t
+ {
+ accelerator_t () {}
+ ~accelerator_t () { fini (); }
+
+ void init (hb_face_t *face)
+ { colr = hb_sanitize_context_t ().reference_table<COLR> (face); }
+
+ void fini () { this->colr.destroy (); }
+
+ bool is_valid () { return colr.get_blob ()->length; }
+
+ void closure_glyphs (hb_codepoint_t glyph,
+ hb_set_t *related_ids /* OUT */) const
+ { colr->closure_glyphs (glyph, related_ids); }
+
+ private:
+ hb_blob_ptr_t<COLR> colr;
+ };
+
+ void closure_glyphs (hb_codepoint_t glyph,
+ hb_set_t *related_ids /* OUT */) const
+ {
+ const BaseGlyphRecord *record = get_base_glyph_record (glyph);
+ if (!record) return;
+
+ auto glyph_layers = (this+layersZ).as_array (numLayers).sub_array (record->firstLayerIdx,
+ record->numLayers);
+ if (!glyph_layers.length) return;
+ related_ids->add_array (&glyph_layers[0].glyphId, glyph_layers.length, LayerRecord::min_size);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ (this+baseGlyphsZ).sanitize (c, numBaseGlyphs) &&
+ (this+layersZ).sanitize (c, numLayers)));
+ }
+
+ template<typename BaseIterator, typename LayerIterator,
+ hb_requires (hb_is_iterator (BaseIterator)),
+ hb_requires (hb_is_iterator (LayerIterator))>
+ bool serialize (hb_serialize_context_t *c,
+ unsigned version,
+ BaseIterator base_it,
+ LayerIterator layer_it)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely (base_it.len () != layer_it.len ()))
+ return_trace (false);
+
+ if (unlikely (!c->extend_min (this))) return_trace (false);
+ this->version = version;
+ numLayers = 0;
+ numBaseGlyphs = base_it.len ();
+ baseGlyphsZ = COLR::min_size;
+ layersZ = COLR::min_size + numBaseGlyphs * BaseGlyphRecord::min_size;
+
+ for (const hb_item_type<BaseIterator> _ : + base_it.iter ())
+ {
+ auto* record = c->embed (_);
+ if (unlikely (!record)) return_trace (false);
+ record->firstLayerIdx = numLayers;
+ numLayers += record->numLayers;
+ }
+
+ for (const hb_item_type<LayerIterator>& _ : + layer_it.iter ())
+ _.as_array ().copy (c);
+
+ return_trace (true);
+ }
+
+ const BaseGlyphRecord* get_base_glyph_record (hb_codepoint_t gid) const
+ {
+ if ((unsigned int) gid == 0) // Ignore notdef.
+ return nullptr;
+ const BaseGlyphRecord* record = &(this+baseGlyphsZ).bsearch (numBaseGlyphs, (unsigned int) gid);
+ if ((record && (hb_codepoint_t) record->glyphId != gid))
+ record = nullptr;
+ return record;
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+
+ const hb_map_t &reverse_glyph_map = *c->plan->reverse_glyph_map;
+
+ auto base_it =
+ + hb_range (c->plan->num_output_glyphs ())
+ | hb_map_retains_sorting ([&](hb_codepoint_t new_gid)
+ {
+ hb_codepoint_t old_gid = reverse_glyph_map.get (new_gid);
+
+ const BaseGlyphRecord* old_record = get_base_glyph_record (old_gid);
+ if (unlikely (!old_record))
+ return hb_pair_t<bool, BaseGlyphRecord> (false, Null (BaseGlyphRecord));
+
+ BaseGlyphRecord new_record;
+ new_record.glyphId = new_gid;
+ new_record.numLayers = old_record->numLayers;
+ return hb_pair_t<bool, BaseGlyphRecord> (true, new_record);
+ })
+ | hb_filter (hb_first)
+ | hb_map_retains_sorting (hb_second)
+ ;
+
+ auto layer_it =
+ + hb_range (c->plan->num_output_glyphs ())
+ | hb_map (reverse_glyph_map)
+ | hb_map_retains_sorting ([&](hb_codepoint_t old_gid)
+ {
+ const BaseGlyphRecord* old_record = get_base_glyph_record (old_gid);
+ hb_vector_t<LayerRecord> out_layers;
+
+ if (unlikely (!old_record ||
+ old_record->firstLayerIdx >= numLayers ||
+ old_record->firstLayerIdx + old_record->numLayers > numLayers))
+ return hb_pair_t<bool, hb_vector_t<LayerRecord>> (false, out_layers);
+
+ auto layers = (this+layersZ).as_array (numLayers).sub_array (old_record->firstLayerIdx,
+ old_record->numLayers);
+ out_layers.resize (layers.length);
+ for (unsigned int i = 0; i < layers.length; i++) {
+ out_layers[i] = layers[i];
+ hb_codepoint_t new_gid = 0;
+ if (unlikely (!c->plan->new_gid_for_old_gid (out_layers[i].glyphId, &new_gid)))
+ return hb_pair_t<bool, hb_vector_t<LayerRecord>> (false, out_layers);
+ out_layers[i].glyphId = new_gid;
+ }
+
+ return hb_pair_t<bool, hb_vector_t<LayerRecord>> (true, out_layers);
+ })
+ | hb_filter (hb_first)
+ | hb_map_retains_sorting (hb_second)
+ ;
+
+ if (unlikely (!base_it || !layer_it || base_it.len () != layer_it.len ()))
+ return_trace (false);
+
+ COLR *colr_prime = c->serializer->start_embed<COLR> ();
+ return_trace (colr_prime->serialize (c->serializer, version, base_it, layer_it));
+ }
+
+ protected:
+ HBUINT16 version; /* Table version number (starts at 0). */
+ HBUINT16 numBaseGlyphs; /* Number of Base Glyph Records. */
+ LNNOffsetTo<SortedUnsizedArrayOf<BaseGlyphRecord>>
+ baseGlyphsZ; /* Offset to Base Glyph records. */
+ LNNOffsetTo<UnsizedArrayOf<LayerRecord>>
+ layersZ; /* Offset to Layer Records. */
+ HBUINT16 numLayers; /* Number of Layer Records. */
+ public:
+ DEFINE_SIZE_STATIC (14);
+};
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_COLOR_COLR_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-color-cpal-table.hh b/thirdparty/harfbuzz/src/hb-ot-color-cpal-table.hh
new file mode 100644
index 0000000000..fa7d3207be
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-color-cpal-table.hh
@@ -0,0 +1,190 @@
+/*
+ * Copyright © 2016 Google, Inc.
+ * Copyright © 2018 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Sascha Brawer
+ */
+
+#ifndef HB_OT_COLOR_CPAL_TABLE_HH
+#define HB_OT_COLOR_CPAL_TABLE_HH
+
+#include "hb-open-type.hh"
+#include "hb-ot-color.h"
+#include "hb-ot-name.h"
+
+
+/*
+ * CPAL -- Color Palette
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/cpal
+ */
+#define HB_OT_TAG_CPAL HB_TAG('C','P','A','L')
+
+
+namespace OT {
+
+
+struct CPALV1Tail
+{
+ friend struct CPAL;
+
+ private:
+ hb_ot_color_palette_flags_t get_palette_flags (const void *base,
+ unsigned int palette_index,
+ unsigned int palette_count) const
+ {
+ if (!paletteFlagsZ) return HB_OT_COLOR_PALETTE_FLAG_DEFAULT;
+ return (hb_ot_color_palette_flags_t) (uint32_t)
+ (base+paletteFlagsZ).as_array (palette_count)[palette_index];
+ }
+
+ hb_ot_name_id_t get_palette_name_id (const void *base,
+ unsigned int palette_index,
+ unsigned int palette_count) const
+ {
+ if (!paletteLabelsZ) return HB_OT_NAME_ID_INVALID;
+ return (base+paletteLabelsZ).as_array (palette_count)[palette_index];
+ }
+
+ hb_ot_name_id_t get_color_name_id (const void *base,
+ unsigned int color_index,
+ unsigned int color_count) const
+ {
+ if (!colorLabelsZ) return HB_OT_NAME_ID_INVALID;
+ return (base+colorLabelsZ).as_array (color_count)[color_index];
+ }
+
+ public:
+ bool sanitize (hb_sanitize_context_t *c,
+ const void *base,
+ unsigned int palette_count,
+ unsigned int color_count) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ (!paletteFlagsZ || (base+paletteFlagsZ).sanitize (c, palette_count)) &&
+ (!paletteLabelsZ || (base+paletteLabelsZ).sanitize (c, palette_count)) &&
+ (!colorLabelsZ || (base+colorLabelsZ).sanitize (c, color_count)));
+ }
+
+ protected:
+ LNNOffsetTo<UnsizedArrayOf<HBUINT32>>
+ paletteFlagsZ; /* Offset from the beginning of CPAL table to
+ * the Palette Type Array. Set to 0 if no array
+ * is provided. */
+ LNNOffsetTo<UnsizedArrayOf<NameID>>
+ paletteLabelsZ; /* Offset from the beginning of CPAL table to
+ * the palette labels array. Set to 0 if no
+ * array is provided. */
+ LNNOffsetTo<UnsizedArrayOf<NameID>>
+ colorLabelsZ; /* Offset from the beginning of CPAL table to
+ * the color labels array. Set to 0
+ * if no array is provided. */
+ public:
+ DEFINE_SIZE_STATIC (12);
+};
+
+typedef HBUINT32 BGRAColor;
+
+struct CPAL
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_CPAL;
+
+ bool has_data () const { return numPalettes; }
+
+ unsigned int get_size () const
+ { return min_size + numPalettes * sizeof (colorRecordIndicesZ[0]); }
+
+ unsigned int get_palette_count () const { return numPalettes; }
+ unsigned int get_color_count () const { return numColors; }
+
+ hb_ot_color_palette_flags_t get_palette_flags (unsigned int palette_index) const
+ { return v1 ().get_palette_flags (this, palette_index, numPalettes); }
+
+ hb_ot_name_id_t get_palette_name_id (unsigned int palette_index) const
+ { return v1 ().get_palette_name_id (this, palette_index, numPalettes); }
+
+ hb_ot_name_id_t get_color_name_id (unsigned int color_index) const
+ { return v1 ().get_color_name_id (this, color_index, numColors); }
+
+ unsigned int get_palette_colors (unsigned int palette_index,
+ unsigned int start_offset,
+ unsigned int *color_count, /* IN/OUT. May be NULL. */
+ hb_color_t *colors /* OUT. May be NULL. */) const
+ {
+ if (unlikely (palette_index >= numPalettes))
+ {
+ if (color_count) *color_count = 0;
+ return 0;
+ }
+ unsigned int start_index = colorRecordIndicesZ[palette_index];
+ hb_array_t<const BGRAColor> all_colors ((this+colorRecordsZ).arrayZ, numColorRecords);
+ hb_array_t<const BGRAColor> palette_colors = all_colors.sub_array (start_index,
+ numColors);
+ if (color_count)
+ {
+ + palette_colors.sub_array (start_offset, color_count)
+ | hb_sink (hb_array (colors, *color_count))
+ ;
+ }
+ return numColors;
+ }
+
+ private:
+ const CPALV1Tail& v1 () const
+ {
+ if (version == 0) return Null (CPALV1Tail);
+ return StructAfter<CPALV1Tail> (*this);
+ }
+
+ public:
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ (this+colorRecordsZ).sanitize (c, numColorRecords) &&
+ colorRecordIndicesZ.sanitize (c, numPalettes) &&
+ (version == 0 || v1 ().sanitize (c, this, numPalettes, numColors)));
+ }
+
+ protected:
+ HBUINT16 version; /* Table version number */
+ /* Version 0 */
+ HBUINT16 numColors; /* Number of colors in each palette. */
+ HBUINT16 numPalettes; /* Number of palettes in the table. */
+ HBUINT16 numColorRecords; /* Total number of color records, combined for
+ * all palettes. */
+ LNNOffsetTo<UnsizedArrayOf<BGRAColor>>
+ colorRecordsZ; /* Offset from the beginning of CPAL table to
+ * the first ColorRecord. */
+ UnsizedArrayOf<HBUINT16>
+ colorRecordIndicesZ; /* Index of each palette’s first color record in
+ * the combined color record array. */
+/*CPALV1Tail v1;*/
+ public:
+ DEFINE_SIZE_ARRAY (12, colorRecordIndicesZ);
+};
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_COLOR_CPAL_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-color-sbix-table.hh b/thirdparty/harfbuzz/src/hb-ot-color-sbix-table.hh
new file mode 100644
index 0000000000..09da11597d
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-color-sbix-table.hh
@@ -0,0 +1,414 @@
+/*
+ * Copyright © 2018 Ebrahim Byagowi
+ * Copyright © 2020 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Calder Kitagawa
+ */
+
+#ifndef HB_OT_COLOR_SBIX_TABLE_HH
+#define HB_OT_COLOR_SBIX_TABLE_HH
+
+#include "hb-open-type.hh"
+#include "hb-ot-layout-common.hh"
+
+/*
+ * sbix -- Standard Bitmap Graphics
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/sbix
+ * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6sbix.html
+ */
+#define HB_OT_TAG_sbix HB_TAG('s','b','i','x')
+
+
+namespace OT {
+
+
+struct SBIXGlyph
+{
+ SBIXGlyph* copy (hb_serialize_context_t *c, unsigned int data_length) const
+ {
+ TRACE_SERIALIZE (this);
+ SBIXGlyph* new_glyph = c->start_embed<SBIXGlyph> ();
+ if (unlikely (!new_glyph)) return_trace (nullptr);
+ if (unlikely (!c->extend_min (new_glyph))) return_trace (nullptr);
+
+ new_glyph->xOffset = xOffset;
+ new_glyph->yOffset = yOffset;
+ new_glyph->graphicType = graphicType;
+ data.copy (c, data_length);
+ return_trace (new_glyph);
+ }
+
+ HBINT16 xOffset; /* The horizontal (x-axis) offset from the left
+ * edge of the graphic to the glyph’s origin.
+ * That is, the x-coordinate of the point on the
+ * baseline at the left edge of the glyph. */
+ HBINT16 yOffset; /* The vertical (y-axis) offset from the bottom
+ * edge of the graphic to the glyph’s origin.
+ * That is, the y-coordinate of the point on the
+ * baseline at the left edge of the glyph. */
+ Tag graphicType; /* Indicates the format of the embedded graphic
+ * data: one of 'jpg ', 'png ' or 'tiff', or the
+ * special format 'dupe'. */
+ UnsizedArrayOf<HBUINT8>
+ data; /* The actual embedded graphic data. The total
+ * length is inferred from sequential entries in
+ * the glyphDataOffsets array and the fixed size
+ * (8 bytes) of the preceding fields. */
+ public:
+ DEFINE_SIZE_ARRAY (8, data);
+};
+
+struct SBIXStrike
+{
+ static unsigned int get_size (unsigned num_glyphs)
+ { return min_size + num_glyphs * HBUINT32::static_size; }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ imageOffsetsZ.sanitize_shallow (c, c->get_num_glyphs () + 1));
+ }
+
+ hb_blob_t *get_glyph_blob (unsigned int glyph_id,
+ hb_blob_t *sbix_blob,
+ hb_tag_t file_type,
+ int *x_offset,
+ int *y_offset,
+ unsigned int num_glyphs,
+ unsigned int *strike_ppem) const
+ {
+ if (unlikely (!ppem)) return hb_blob_get_empty (); /* To get Null() object out of the way. */
+
+ unsigned int retry_count = 8;
+ unsigned int sbix_len = sbix_blob->length;
+ unsigned int strike_offset = (const char *) this - (const char *) sbix_blob->data;
+ assert (strike_offset < sbix_len);
+
+ retry:
+ if (unlikely (glyph_id >= num_glyphs ||
+ imageOffsetsZ[glyph_id + 1] <= imageOffsetsZ[glyph_id] ||
+ imageOffsetsZ[glyph_id + 1] - imageOffsetsZ[glyph_id] <= SBIXGlyph::min_size ||
+ (unsigned int) imageOffsetsZ[glyph_id + 1] > sbix_len - strike_offset))
+ return hb_blob_get_empty ();
+
+ unsigned int glyph_offset = strike_offset + (unsigned int) imageOffsetsZ[glyph_id] + SBIXGlyph::min_size;
+ unsigned int glyph_length = imageOffsetsZ[glyph_id + 1] - imageOffsetsZ[glyph_id] - SBIXGlyph::min_size;
+
+ const SBIXGlyph *glyph = &(this+imageOffsetsZ[glyph_id]);
+
+ if (glyph->graphicType == HB_TAG ('d','u','p','e'))
+ {
+ if (glyph_length >= 2)
+ {
+ glyph_id = *((HBUINT16 *) &glyph->data);
+ if (retry_count--)
+ goto retry;
+ }
+ return hb_blob_get_empty ();
+ }
+
+ if (unlikely (file_type != glyph->graphicType))
+ return hb_blob_get_empty ();
+
+ if (strike_ppem) *strike_ppem = ppem;
+ if (x_offset) *x_offset = glyph->xOffset;
+ if (y_offset) *y_offset = glyph->yOffset;
+ return hb_blob_create_sub_blob (sbix_blob, glyph_offset, glyph_length);
+ }
+
+ bool subset (hb_subset_context_t *c, unsigned int available_len) const
+ {
+ TRACE_SUBSET (this);
+ unsigned int num_output_glyphs = c->plan->num_output_glyphs ();
+
+ auto* out = c->serializer->start_embed<SBIXStrike> ();
+ if (unlikely (!out)) return_trace (false);
+ auto snap = c->serializer->snapshot ();
+ if (unlikely (!c->serializer->extend (*out, num_output_glyphs + 1))) return_trace (false);
+ out->ppem = ppem;
+ out->resolution = resolution;
+ HBUINT32 head;
+ head = get_size (num_output_glyphs + 1);
+
+ bool has_glyphs = false;
+ for (unsigned new_gid = 0; new_gid < num_output_glyphs; new_gid++)
+ {
+ hb_codepoint_t old_gid;
+ if (!c->plan->old_gid_for_new_gid (new_gid, &old_gid) ||
+ unlikely (imageOffsetsZ[old_gid].is_null () ||
+ imageOffsetsZ[old_gid + 1].is_null () ||
+ imageOffsetsZ[old_gid + 1] <= imageOffsetsZ[old_gid] ||
+ imageOffsetsZ[old_gid + 1] - imageOffsetsZ[old_gid] <= SBIXGlyph::min_size) ||
+ (unsigned int) imageOffsetsZ[old_gid + 1] > available_len)
+ {
+ out->imageOffsetsZ[new_gid] = head;
+ continue;
+ }
+ has_glyphs = true;
+ unsigned int delta = imageOffsetsZ[old_gid + 1] - imageOffsetsZ[old_gid];
+ unsigned int glyph_data_length = delta - SBIXGlyph::min_size;
+ if (!(this+imageOffsetsZ[old_gid]).copy (c->serializer, glyph_data_length))
+ return_trace (false);
+ out->imageOffsetsZ[new_gid] = head;
+ head += delta;
+ }
+ if (has_glyphs)
+ out->imageOffsetsZ[num_output_glyphs] = head;
+ else
+ c->serializer->revert (snap);
+ return_trace (has_glyphs);
+ }
+
+ public:
+ HBUINT16 ppem; /* The PPEM size for which this strike was designed. */
+ HBUINT16 resolution; /* The device pixel density (in PPI) for which this
+ * strike was designed. (E.g., 96 PPI, 192 PPI.) */
+ protected:
+ UnsizedArrayOf<LOffsetTo<SBIXGlyph>>
+ imageOffsetsZ; /* Offset from the beginning of the strike data header
+ * to bitmap data for an individual glyph ID. */
+ public:
+ DEFINE_SIZE_ARRAY (4, imageOffsetsZ);
+};
+
+struct sbix
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_sbix;
+
+ bool has_data () const { return version; }
+
+ const SBIXStrike &get_strike (unsigned int i) const { return this+strikes[i]; }
+
+ struct accelerator_t
+ {
+ void init (hb_face_t *face)
+ {
+ table = hb_sanitize_context_t ().reference_table<sbix> (face);
+ num_glyphs = face->get_num_glyphs ();
+ }
+ void fini () { table.destroy (); }
+
+ bool has_data () const { return table->has_data (); }
+
+ bool get_extents (hb_font_t *font,
+ hb_codepoint_t glyph,
+ hb_glyph_extents_t *extents) const
+ {
+ /* We only support PNG right now, and following function checks type. */
+ return get_png_extents (font, glyph, extents);
+ }
+
+ hb_blob_t *reference_png (hb_font_t *font,
+ hb_codepoint_t glyph_id,
+ int *x_offset,
+ int *y_offset,
+ unsigned int *available_ppem) const
+ {
+ return choose_strike (font).get_glyph_blob (glyph_id, table.get_blob (),
+ HB_TAG ('p','n','g',' '),
+ x_offset, y_offset,
+ num_glyphs, available_ppem);
+ }
+
+ private:
+
+ const SBIXStrike &choose_strike (hb_font_t *font) const
+ {
+ unsigned count = table->strikes.len;
+ if (unlikely (!count))
+ return Null (SBIXStrike);
+
+ unsigned int requested_ppem = hb_max (font->x_ppem, font->y_ppem);
+ if (!requested_ppem)
+ requested_ppem = 1<<30; /* Choose largest strike. */
+ /* TODO Add DPI sensitivity as well? */
+ unsigned int best_i = 0;
+ unsigned int best_ppem = table->get_strike (0).ppem;
+
+ for (unsigned int i = 1; i < count; i++)
+ {
+ unsigned int ppem = (table->get_strike (i)).ppem;
+ if ((requested_ppem <= ppem && ppem < best_ppem) ||
+ (requested_ppem > best_ppem && ppem > best_ppem))
+ {
+ best_i = i;
+ best_ppem = ppem;
+ }
+ }
+
+ return table->get_strike (best_i);
+ }
+
+ struct PNGHeader
+ {
+ HBUINT8 signature[8];
+ struct
+ {
+ struct
+ {
+ HBUINT32 length;
+ Tag type;
+ } header;
+ HBUINT32 width;
+ HBUINT32 height;
+ HBUINT8 bitDepth;
+ HBUINT8 colorType;
+ HBUINT8 compressionMethod;
+ HBUINT8 filterMethod;
+ HBUINT8 interlaceMethod;
+ } IHDR;
+
+ public:
+ DEFINE_SIZE_STATIC (29);
+ };
+
+ bool get_png_extents (hb_font_t *font,
+ hb_codepoint_t glyph,
+ hb_glyph_extents_t *extents) const
+ {
+ /* Following code is safe to call even without data.
+ * But faster to short-circuit. */
+ if (!has_data ())
+ return false;
+
+ int x_offset = 0, y_offset = 0;
+ unsigned int strike_ppem = 0;
+ hb_blob_t *blob = reference_png (font, glyph, &x_offset, &y_offset, &strike_ppem);
+
+ const PNGHeader &png = *blob->as<PNGHeader>();
+
+ extents->x_bearing = x_offset;
+ extents->y_bearing = png.IHDR.height + y_offset;
+ extents->width = png.IHDR.width;
+ extents->height = -1 * png.IHDR.height;
+
+ /* Convert to font units. */
+ if (strike_ppem)
+ {
+ float scale = font->face->get_upem () / (float) strike_ppem;
+ extents->x_bearing = font->em_scalef_x (extents->x_bearing * scale);
+ extents->y_bearing = font->em_scalef_y (extents->y_bearing * scale);
+ extents->width = font->em_scalef_x (extents->width * scale);
+ extents->height = font->em_scalef_y (extents->height * scale);
+ }
+ else
+ {
+ extents->x_bearing = font->em_scale_x (extents->x_bearing);
+ extents->y_bearing = font->em_scale_y (extents->y_bearing);
+ extents->width = font->em_scale_x (extents->width);
+ extents->height = font->em_scale_y (extents->height);
+ }
+
+ hb_blob_destroy (blob);
+
+ return strike_ppem;
+ }
+
+ private:
+ hb_blob_ptr_t<sbix> table;
+
+ unsigned int num_glyphs;
+ };
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ version >= 1 &&
+ strikes.sanitize (c, this)));
+ }
+
+ bool
+ add_strike (hb_subset_context_t *c, unsigned i) const
+ {
+ if (strikes[i].is_null () || c->source_blob->length < (unsigned) strikes[i])
+ return false;
+
+ return (this+strikes[i]).subset (c, c->source_blob->length - (unsigned) strikes[i]);
+ }
+
+ bool serialize_strike_offsets (hb_subset_context_t *c) const
+ {
+ TRACE_SERIALIZE (this);
+
+ auto *out = c->serializer->start_embed<LOffsetLArrayOf<SBIXStrike>> ();
+ if (unlikely (!out)) return_trace (false);
+ if (unlikely (!c->serializer->extend_min (out))) return_trace (false);
+
+ hb_vector_t<LOffsetTo<SBIXStrike>*> new_strikes;
+ hb_vector_t<hb_serialize_context_t::objidx_t> objidxs;
+ for (int i = strikes.len - 1; i >= 0; --i)
+ {
+ auto* o = out->serialize_append (c->serializer);
+ if (unlikely (!o)) return_trace (false);
+ *o = 0;
+ auto snap = c->serializer->snapshot ();
+ c->serializer->push ();
+ bool ret = add_strike (c, i);
+ if (!ret)
+ {
+ c->serializer->pop_discard ();
+ out->pop ();
+ c->serializer->revert (snap);
+ }
+ else
+ {
+ objidxs.push (c->serializer->pop_pack ());
+ new_strikes.push (o);
+ }
+ }
+ for (unsigned int i = 0; i < new_strikes.length; ++i)
+ c->serializer->add_link (*new_strikes[i], objidxs[new_strikes.length - 1 - i]);
+
+ return_trace (true);
+ }
+
+ bool subset (hb_subset_context_t* c) const
+ {
+ TRACE_SUBSET (this);
+
+ sbix *sbix_prime = c->serializer->start_embed<sbix> ();
+ if (unlikely (!sbix_prime)) return_trace (false);
+ if (unlikely (!c->serializer->embed (this->version))) return_trace (false);
+ if (unlikely (!c->serializer->embed (this->flags))) return_trace (false);
+
+ return_trace (serialize_strike_offsets (c));
+ }
+
+ protected:
+ HBUINT16 version; /* Table version number — set to 1 */
+ HBUINT16 flags; /* Bit 0: Set to 1. Bit 1: Draw outlines.
+ * Bits 2 to 15: reserved (set to 0). */
+ LOffsetLArrayOf<SBIXStrike>
+ strikes; /* Offsets from the beginning of the 'sbix'
+ * table to data for each individual bitmap strike. */
+ public:
+ DEFINE_SIZE_ARRAY (8, strikes);
+};
+
+struct sbix_accelerator_t : sbix::accelerator_t {};
+
+} /* namespace OT */
+
+#endif /* HB_OT_COLOR_SBIX_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-color-svg-table.hh b/thirdparty/harfbuzz/src/hb-ot-color-svg-table.hh
new file mode 100644
index 0000000000..1cc40ae53f
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-color-svg-table.hh
@@ -0,0 +1,124 @@
+/*
+ * Copyright © 2018 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#ifndef HB_OT_COLOR_SVG_TABLE_HH
+#define HB_OT_COLOR_SVG_TABLE_HH
+
+#include "hb-open-type.hh"
+
+/*
+ * SVG -- SVG (Scalable Vector Graphics)
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/svg
+ */
+
+#define HB_OT_TAG_SVG HB_TAG('S','V','G',' ')
+
+
+namespace OT {
+
+
+struct SVGDocumentIndexEntry
+{
+ int cmp (hb_codepoint_t g) const
+ { return g < startGlyphID ? -1 : g > endGlyphID ? 1 : 0; }
+
+ hb_blob_t *reference_blob (hb_blob_t *svg_blob, unsigned int index_offset) const
+ {
+ return hb_blob_create_sub_blob (svg_blob,
+ index_offset + (unsigned int) svgDoc,
+ svgDocLength);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ svgDoc.sanitize (c, base, svgDocLength));
+ }
+
+ protected:
+ HBUINT16 startGlyphID; /* The first glyph ID in the range described by
+ * this index entry. */
+ HBUINT16 endGlyphID; /* The last glyph ID in the range described by
+ * this index entry. Must be >= startGlyphID. */
+ LNNOffsetTo<UnsizedArrayOf<HBUINT8>>
+ svgDoc; /* Offset from the beginning of the SVG Document Index
+ * to an SVG document. Must be non-zero. */
+ HBUINT32 svgDocLength; /* Length of the SVG document.
+ * Must be non-zero. */
+ public:
+ DEFINE_SIZE_STATIC (12);
+};
+
+struct SVG
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_SVG;
+
+ bool has_data () const { return svgDocEntries; }
+
+ struct accelerator_t
+ {
+ void init (hb_face_t *face)
+ { table = hb_sanitize_context_t ().reference_table<SVG> (face); }
+ void fini () { table.destroy (); }
+
+ hb_blob_t *reference_blob_for_glyph (hb_codepoint_t glyph_id) const
+ {
+ return table->get_glyph_entry (glyph_id).reference_blob (table.get_blob (),
+ table->svgDocEntries);
+ }
+
+ bool has_data () const { return table->has_data (); }
+
+ private:
+ hb_blob_ptr_t<SVG> table;
+ };
+
+ const SVGDocumentIndexEntry &get_glyph_entry (hb_codepoint_t glyph_id) const
+ { return (this+svgDocEntries).bsearch (glyph_id); }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ (this+svgDocEntries).sanitize_shallow (c)));
+ }
+
+ protected:
+ HBUINT16 version; /* Table version (starting at 0). */
+ LOffsetTo<SortedArrayOf<SVGDocumentIndexEntry>>
+ svgDocEntries; /* Offset (relative to the start of the SVG table) to the
+ * SVG Documents Index. Must be non-zero. */
+ /* Array of SVG Document Index Entries. */
+ HBUINT32 reserved; /* Set to 0. */
+ public:
+ DEFINE_SIZE_STATIC (10);
+};
+
+struct SVG_accelerator_t : SVG::accelerator_t {};
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_COLOR_SVG_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-color.cc b/thirdparty/harfbuzz/src/hb-ot-color.cc
new file mode 100644
index 0000000000..0e7203a88b
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-color.cc
@@ -0,0 +1,321 @@
+/*
+ * Copyright © 2016 Google, Inc.
+ * Copyright © 2018 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Sascha Brawer, Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_COLOR
+
+#include "hb-ot.h"
+
+#include "hb-ot-color-cbdt-table.hh"
+#include "hb-ot-color-colr-table.hh"
+#include "hb-ot-color-cpal-table.hh"
+#include "hb-ot-color-sbix-table.hh"
+#include "hb-ot-color-svg-table.hh"
+
+#include <stdlib.h>
+#include <string.h>
+
+
+/**
+ * SECTION:hb-ot-color
+ * @title: hb-ot-color
+ * @short_description: OpenType Color Fonts
+ * @include: hb-ot.h
+ *
+ * Functions for fetching color-font information from OpenType font faces.
+ *
+ * HarfBuzz supports `COLR`/`CPAL`, `sbix`, `CBDT`, and `SVG` color fonts.
+ **/
+
+
+/*
+ * CPAL
+ */
+
+
+/**
+ * hb_ot_color_has_palettes:
+ * @face: #hb_face_t to work upon
+ *
+ * Tests whether a face includes a `CPAL` color-palette table.
+ *
+ * Return value: true if data found, false otherwise
+ *
+ * Since: 2.1.0
+ */
+hb_bool_t
+hb_ot_color_has_palettes (hb_face_t *face)
+{
+ return face->table.CPAL->has_data ();
+}
+
+/**
+ * hb_ot_color_palette_get_count:
+ * @face: #hb_face_t to work upon
+ *
+ * Fetches the number of color palettes in a face.
+ *
+ * Return value: the number of palettes found
+ *
+ * Since: 2.1.0
+ */
+unsigned int
+hb_ot_color_palette_get_count (hb_face_t *face)
+{
+ return face->table.CPAL->get_palette_count ();
+}
+
+/**
+ * hb_ot_color_palette_get_name_id:
+ * @face: #hb_face_t to work upon
+ * @palette_index: The index of the color palette
+ *
+ * Fetches the `name` table Name ID that provides display names for
+ * a `CPAL` color palette.
+ *
+ * Palette display names can be generic (e.g., "Default") or provide
+ * specific, themed names (e.g., "Spring", "Summer", "Fall", and "Winter").
+ *
+ * Return value: the Named ID found for the palette.
+ * If the requested palette has no name the result is #HB_OT_NAME_ID_INVALID.
+ *
+ * Since: 2.1.0
+ */
+hb_ot_name_id_t
+hb_ot_color_palette_get_name_id (hb_face_t *face,
+ unsigned int palette_index)
+{
+ return face->table.CPAL->get_palette_name_id (palette_index);
+}
+
+/**
+ * hb_ot_color_palette_color_get_name_id:
+ * @face: #hb_face_t to work upon
+ * @color_index: The index of the color
+ *
+ * Fetches the `name` table Name ID that provides display names for
+ * the specificed color in a face's `CPAL` color palette.
+ *
+ * Display names can be generic (e.g., "Background") or specific
+ * (e.g., "Eye color").
+ *
+ * Return value: the Name ID found for the color.
+ *
+ * Since: 2.1.0
+ */
+hb_ot_name_id_t
+hb_ot_color_palette_color_get_name_id (hb_face_t *face,
+ unsigned int color_index)
+{
+ return face->table.CPAL->get_color_name_id (color_index);
+}
+
+/**
+ * hb_ot_color_palette_get_flags:
+ * @face: #hb_face_t to work upon
+ * @palette_index: The index of the color palette
+ *
+ * Fetches the flags defined for a color palette.
+ *
+ * Return value: the #hb_ot_color_palette_flags_t of the requested color palette
+ *
+ * Since: 2.1.0
+ */
+hb_ot_color_palette_flags_t
+hb_ot_color_palette_get_flags (hb_face_t *face,
+ unsigned int palette_index)
+{
+ return face->table.CPAL->get_palette_flags (palette_index);
+}
+
+/**
+ * hb_ot_color_palette_get_colors:
+ * @face: #hb_face_t to work upon
+ * @palette_index: the index of the color palette to query
+ * @start_offset: offset of the first color to retrieve
+ * @color_count: (inout) (optional): Input = the maximum number of colors to return;
+ * Output = the actual number of colors returned (may be zero)
+ * @colors: (out) (array length=color_count) (nullable): The array of #hb_color_t records found
+ *
+ * Fetches a list of the colors in a color palette.
+ *
+ * After calling this function, @colors will be filled with the palette
+ * colors. If @colors is NULL, the function will just return the number
+ * of total colors without storing any actual colors; this can be used
+ * for allocating a buffer of suitable size before calling
+ * hb_ot_color_palette_get_colors() a second time.
+ *
+ * Return value: the total number of colors in the palette
+ *
+ * Since: 2.1.0
+ */
+unsigned int
+hb_ot_color_palette_get_colors (hb_face_t *face,
+ unsigned int palette_index,
+ unsigned int start_offset,
+ unsigned int *colors_count /* IN/OUT. May be NULL. */,
+ hb_color_t *colors /* OUT. May be NULL. */)
+{
+ return face->table.CPAL->get_palette_colors (palette_index, start_offset, colors_count, colors);
+}
+
+
+/*
+ * COLR
+ */
+
+/**
+ * hb_ot_color_has_layers:
+ * @face: #hb_face_t to work upon
+ *
+ * Tests whether a face includes any `COLR` color layers.
+ *
+ * Return value: true if data found, false otherwise
+ *
+ * Since: 2.1.0
+ */
+hb_bool_t
+hb_ot_color_has_layers (hb_face_t *face)
+{
+ return face->table.COLR->has_data ();
+}
+
+/**
+ * hb_ot_color_glyph_get_layers:
+ * @face: #hb_face_t to work upon
+ * @glyph: The glyph index to query
+ * @start_offset: offset of the first layer to retrieve
+ * @layer_count: (inout) (optional): Input = the maximum number of layers to return;
+ * Output = the actual number of layers returned (may be zero)
+ * @layers: (out) (array length=layer_count) (nullable): The array of layers found
+ *
+ * Fetches a list of all color layers for the specified glyph index in the specified
+ * face. The list returned will begin at the offset provided.
+ *
+ * Return value: Total number of layers available for the glyph index queried
+ *
+ * Since: 2.1.0
+ */
+unsigned int
+hb_ot_color_glyph_get_layers (hb_face_t *face,
+ hb_codepoint_t glyph,
+ unsigned int start_offset,
+ unsigned int *layer_count, /* IN/OUT. May be NULL. */
+ hb_ot_color_layer_t *layers /* OUT. May be NULL. */)
+{
+ return face->table.COLR->get_glyph_layers (glyph, start_offset, layer_count, layers);
+}
+
+
+/*
+ * SVG
+ */
+
+/**
+ * hb_ot_color_has_svg:
+ * @face: #hb_face_t to work upon.
+ *
+ * Tests whether a face includes any `SVG` glyph images.
+ *
+ * Return value: true if data found, false otherwise.
+ *
+ * Since: 2.1.0
+ */
+hb_bool_t
+hb_ot_color_has_svg (hb_face_t *face)
+{
+ return face->table.SVG->has_data ();
+}
+
+/**
+ * hb_ot_color_glyph_reference_svg:
+ * @face: #hb_face_t to work upon
+ * @glyph: a svg glyph index
+ *
+ * Fetches the SVG document for a glyph. The blob may be either plain text or gzip-encoded.
+ *
+ * Return value: (transfer full): An #hb_blob_t containing the SVG document of the glyph, if available
+ *
+ * Since: 2.1.0
+ */
+hb_blob_t *
+hb_ot_color_glyph_reference_svg (hb_face_t *face, hb_codepoint_t glyph)
+{
+ return face->table.SVG->reference_blob_for_glyph (glyph);
+}
+
+
+/*
+ * PNG: CBDT or sbix
+ */
+
+/**
+ * hb_ot_color_has_png:
+ * @face: #hb_face_t to work upon
+ *
+ * Tests whether a face has PNG glyph images (either in `CBDT` or `sbix` tables).
+ *
+ * Return value: true if data found, false otherwise
+ *
+ * Since: 2.1.0
+ */
+hb_bool_t
+hb_ot_color_has_png (hb_face_t *face)
+{
+ return face->table.CBDT->has_data () || face->table.sbix->has_data ();
+}
+
+/**
+ * hb_ot_color_glyph_reference_png:
+ * @font: #hb_font_t to work upon
+ * @glyph: a glyph index
+ *
+ * Fetches the PNG image for a glyph. This function takes a font object, not a face object,
+ * as input. To get an optimally sized PNG blob, the UPEM value must be set on the @font
+ * object. If UPEM is unset, the blob returned will be the largest PNG available.
+ *
+ * Return value: (transfer full): An #hb_blob_t containing the PNG image for the glyph, if available
+ *
+ * Since: 2.1.0
+ */
+hb_blob_t *
+hb_ot_color_glyph_reference_png (hb_font_t *font, hb_codepoint_t glyph)
+{
+ hb_blob_t *blob = hb_blob_get_empty ();
+
+ if (font->face->table.sbix->has_data ())
+ blob = font->face->table.sbix->reference_png (font, glyph, nullptr, nullptr, nullptr);
+
+ if (!blob->length && font->face->table.CBDT->has_data ())
+ blob = font->face->table.CBDT->reference_png (font, glyph);
+
+ return blob;
+}
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-ot-color.h b/thirdparty/harfbuzz/src/hb-ot-color.h
new file mode 100644
index 0000000000..63ef20a1a0
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-color.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright © 2016 Google, Inc.
+ * Copyright © 2018 Khaled Hosny
+ * Copyright © 2018 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Sascha Brawer, Behdad Esfahbod
+ */
+
+#ifndef HB_OT_H_IN
+#error "Include <hb-ot.h> instead."
+#endif
+
+#ifndef HB_OT_COLOR_H
+#define HB_OT_COLOR_H
+
+#include "hb.h"
+#include "hb-ot-name.h"
+
+HB_BEGIN_DECLS
+
+
+/*
+ * Color palettes.
+ */
+
+HB_EXTERN hb_bool_t
+hb_ot_color_has_palettes (hb_face_t *face);
+
+HB_EXTERN unsigned int
+hb_ot_color_palette_get_count (hb_face_t *face);
+
+HB_EXTERN hb_ot_name_id_t
+hb_ot_color_palette_get_name_id (hb_face_t *face,
+ unsigned int palette_index);
+
+HB_EXTERN hb_ot_name_id_t
+hb_ot_color_palette_color_get_name_id (hb_face_t *face,
+ unsigned int color_index);
+
+/**
+ * hb_ot_color_palette_flags_t:
+ * @HB_OT_COLOR_PALETTE_FLAG_DEFAULT: Default indicating that there is nothing special
+ * to note about a color palette.
+ * @HB_OT_COLOR_PALETTE_FLAG_USABLE_WITH_LIGHT_BACKGROUND: Flag indicating that the color
+ * palette is appropriate to use when displaying the font on a light background such as white.
+ * @HB_OT_COLOR_PALETTE_FLAG_USABLE_WITH_DARK_BACKGROUND: Flag indicating that the color
+ * palette is appropriate to use when displaying the font on a dark background such as black.
+ *
+ * Since: 2.1.0
+ */
+typedef enum { /*< flags >*/
+ HB_OT_COLOR_PALETTE_FLAG_DEFAULT = 0x00000000u,
+ HB_OT_COLOR_PALETTE_FLAG_USABLE_WITH_LIGHT_BACKGROUND = 0x00000001u,
+ HB_OT_COLOR_PALETTE_FLAG_USABLE_WITH_DARK_BACKGROUND = 0x00000002u
+} hb_ot_color_palette_flags_t;
+
+HB_EXTERN hb_ot_color_palette_flags_t
+hb_ot_color_palette_get_flags (hb_face_t *face,
+ unsigned int palette_index);
+
+HB_EXTERN unsigned int
+hb_ot_color_palette_get_colors (hb_face_t *face,
+ unsigned int palette_index,
+ unsigned int start_offset,
+ unsigned int *color_count, /* IN/OUT. May be NULL. */
+ hb_color_t *colors /* OUT. May be NULL. */);
+
+
+/*
+ * Color layers.
+ */
+
+HB_EXTERN hb_bool_t
+hb_ot_color_has_layers (hb_face_t *face);
+
+/**
+ * hb_ot_color_layer_t:
+ *
+ * Pairs of glyph and color index.
+ *
+ * Since: 2.1.0
+ **/
+typedef struct hb_ot_color_layer_t
+{
+ hb_codepoint_t glyph;
+ unsigned int color_index;
+} hb_ot_color_layer_t;
+
+HB_EXTERN unsigned int
+hb_ot_color_glyph_get_layers (hb_face_t *face,
+ hb_codepoint_t glyph,
+ unsigned int start_offset,
+ unsigned int *layer_count, /* IN/OUT. May be NULL. */
+ hb_ot_color_layer_t *layers /* OUT. May be NULL. */);
+
+/*
+ * SVG
+ */
+
+HB_EXTERN hb_bool_t
+hb_ot_color_has_svg (hb_face_t *face);
+
+HB_EXTERN hb_blob_t *
+hb_ot_color_glyph_reference_svg (hb_face_t *face, hb_codepoint_t glyph);
+
+/*
+ * PNG: CBDT or sbix
+ */
+
+HB_EXTERN hb_bool_t
+hb_ot_color_has_png (hb_face_t *face);
+
+HB_EXTERN hb_blob_t *
+hb_ot_color_glyph_reference_png (hb_font_t *font, hb_codepoint_t glyph);
+
+
+HB_END_DECLS
+
+#endif /* HB_OT_COLOR_H */
diff --git a/thirdparty/harfbuzz/src/hb-ot-deprecated.h b/thirdparty/harfbuzz/src/hb-ot-deprecated.h
new file mode 100644
index 0000000000..bc72f8a701
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-deprecated.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_H_IN
+#error "Include <hb-ot.h> instead."
+#endif
+
+#ifndef HB_OT_DEPRECATED_H
+#define HB_OT_DEPRECATED_H
+
+#include "hb.h"
+#include "hb-ot-name.h"
+
+
+HB_BEGIN_DECLS
+
+#ifndef HB_DISABLE_DEPRECATED
+
+
+/* https://github.com/harfbuzz/harfbuzz/issues/1734 */
+#define HB_MATH_GLYPH_PART_FLAG_EXTENDER HB_OT_MATH_GLYPH_PART_FLAG_EXTENDER
+
+
+/* Like hb_ot_layout_table_find_script, but takes zero-terminated array of scripts to test */
+HB_EXTERN HB_DEPRECATED_FOR (hb_ot_layout_table_select_script) hb_bool_t
+hb_ot_layout_table_choose_script (hb_face_t *face,
+ hb_tag_t table_tag,
+ const hb_tag_t *script_tags,
+ unsigned int *script_index,
+ hb_tag_t *chosen_script);
+
+HB_EXTERN HB_DEPRECATED_FOR (hb_ot_layout_script_select_language) hb_bool_t
+hb_ot_layout_script_find_language (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int script_index,
+ hb_tag_t language_tag,
+ unsigned int *language_index);
+
+HB_EXTERN HB_DEPRECATED_FOR (hb_ot_tags_from_script_and_language) void
+hb_ot_tags_from_script (hb_script_t script,
+ hb_tag_t *script_tag_1,
+ hb_tag_t *script_tag_2);
+
+HB_EXTERN HB_DEPRECATED_FOR (hb_ot_tags_from_script_and_language) hb_tag_t
+hb_ot_tag_from_language (hb_language_t language);
+
+
+/**
+ * HB_OT_VAR_NO_AXIS_INDEX:
+ *
+ * Since: 1.4.2
+ * Deprecated: 2.2.0
+ */
+#define HB_OT_VAR_NO_AXIS_INDEX 0xFFFFFFFFu
+
+/**
+ * hb_ot_var_axis_t:
+ *
+ * Since: 1.4.2
+ * Deprecated: 2.2.0
+ */
+typedef struct hb_ot_var_axis_t
+{
+ hb_tag_t tag;
+ hb_ot_name_id_t name_id;
+ float min_value;
+ float default_value;
+ float max_value;
+} hb_ot_var_axis_t;
+
+HB_EXTERN HB_DEPRECATED_FOR (hb_ot_var_get_axis_infos) unsigned int
+hb_ot_var_get_axes (hb_face_t *face,
+ unsigned int start_offset,
+ unsigned int *axes_count /* IN/OUT */,
+ hb_ot_var_axis_t *axes_array /* OUT */);
+
+HB_EXTERN HB_DEPRECATED_FOR (hb_ot_var_find_axis_info) hb_bool_t
+hb_ot_var_find_axis (hb_face_t *face,
+ hb_tag_t axis_tag,
+ unsigned int *axis_index,
+ hb_ot_var_axis_t *axis_info);
+
+
+#endif
+
+HB_END_DECLS
+
+#endif /* HB_OT_DEPRECATED_H */
diff --git a/thirdparty/harfbuzz/src/hb-ot-face-table-list.hh b/thirdparty/harfbuzz/src/hb-ot-face-table-list.hh
new file mode 100644
index 0000000000..367e143fdf
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-face-table-list.hh
@@ -0,0 +1,138 @@
+/*
+ * Copyright © 2007,2008,2009 Red Hat, Inc.
+ * Copyright © 2012,2013 Google, Inc.
+ * Copyright © 2019, Facebook Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ * Facebook Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_FACE_TABLE_LIST_HH
+#define HB_OT_FACE_TABLE_LIST_HH
+#endif /* HB_OT_FACE_TABLE_LIST_HH */ /* Dummy header guards */
+
+#ifndef HB_OT_ACCELERATOR
+#define HB_OT_ACCELERATOR(Namespace, Type) HB_OT_TABLE (Namespace, Type)
+#define _HB_OT_ACCELERATOR_UNDEF
+#endif
+
+
+/* This lists font tables that the hb_face_t will contain and lazily
+ * load. Don't add a table unless it's used though. This is not
+ * exactly free. */
+
+/* v--- Add new tables in the right place here. */
+
+
+/* OpenType fundamentals. */
+HB_OT_TABLE (OT, head)
+#if !defined(HB_NO_FACE_COLLECT_UNICODES) || !defined(HB_NO_OT_FONT)
+HB_OT_ACCELERATOR (OT, cmap)
+#endif
+HB_OT_TABLE (OT, hhea)
+HB_OT_ACCELERATOR (OT, hmtx)
+HB_OT_TABLE (OT, OS2)
+#if !defined(HB_NO_OT_FONT_GLYPH_NAMES) || !defined(HB_NO_METRICS) || !defined(HB_NO_STYLE)
+HB_OT_ACCELERATOR (OT, post)
+#endif
+#ifndef HB_NO_NAME
+HB_OT_ACCELERATOR (OT, name)
+#endif
+#ifndef HB_NO_STYLE
+HB_OT_TABLE (OT, STAT)
+#endif
+#ifndef HB_NO_META
+HB_OT_ACCELERATOR (OT, meta)
+#endif
+
+/* Vertical layout. */
+HB_OT_TABLE (OT, vhea)
+HB_OT_ACCELERATOR (OT, vmtx)
+
+/* TrueType outlines. */
+HB_OT_ACCELERATOR (OT, glyf)
+
+/* CFF outlines. */
+#ifndef HB_NO_CFF
+HB_OT_ACCELERATOR (OT, cff1)
+HB_OT_ACCELERATOR (OT, cff2)
+HB_OT_TABLE (OT, VORG)
+#endif
+
+/* OpenType variations. */
+#ifndef HB_NO_VAR
+HB_OT_TABLE (OT, fvar)
+HB_OT_TABLE (OT, avar)
+HB_OT_ACCELERATOR (OT, gvar)
+HB_OT_TABLE (OT, MVAR)
+#endif
+
+/* Legacy kern. */
+#ifndef HB_NO_OT_KERN
+HB_OT_TABLE (OT, kern)
+#endif
+
+/* OpenType shaping. */
+#ifndef HB_NO_OT_LAYOUT
+HB_OT_ACCELERATOR (OT, GDEF)
+HB_OT_ACCELERATOR (OT, GSUB)
+HB_OT_ACCELERATOR (OT, GPOS)
+//HB_OT_TABLE (OT, JSTF)
+#endif
+
+/* OpenType baseline. */
+#ifndef HB_NO_BASE
+HB_OT_TABLE (OT, BASE)
+#endif
+
+/* AAT shaping. */
+#ifndef HB_NO_AAT
+HB_OT_TABLE (AAT, morx)
+HB_OT_TABLE (AAT, mort)
+HB_OT_TABLE (AAT, kerx)
+HB_OT_TABLE (AAT, ankr)
+HB_OT_TABLE (AAT, trak)
+HB_OT_TABLE (AAT, ltag)
+HB_OT_TABLE (AAT, feat)
+// HB_OT_TABLE (AAT, opbd)
+#endif
+
+/* OpenType color fonts. */
+#ifndef HB_NO_COLOR
+HB_OT_TABLE (OT, COLR)
+HB_OT_TABLE (OT, CPAL)
+HB_OT_ACCELERATOR (OT, CBDT)
+HB_OT_ACCELERATOR (OT, sbix)
+HB_OT_ACCELERATOR (OT, SVG)
+#endif
+
+/* OpenType math. */
+#ifndef HB_NO_MATH
+HB_OT_TABLE (OT, MATH)
+#endif
+
+
+#ifdef _HB_OT_ACCELERATOR_UNDEF
+#undef HB_OT_ACCELERATOR
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-ot-face.cc b/thirdparty/harfbuzz/src/hb-ot-face.cc
new file mode 100644
index 0000000000..5ef8df43ce
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-face.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb-ot-face.hh"
+
+#include "hb-ot-cmap-table.hh"
+#include "hb-ot-glyf-table.hh"
+#include "hb-ot-cff1-table.hh"
+#include "hb-ot-cff2-table.hh"
+#include "hb-ot-hmtx-table.hh"
+#include "hb-ot-kern-table.hh"
+#include "hb-ot-meta-table.hh"
+#include "hb-ot-name-table.hh"
+#include "hb-ot-post-table.hh"
+#include "hb-ot-color-cbdt-table.hh"
+#include "hb-ot-color-sbix-table.hh"
+#include "hb-ot-color-svg-table.hh"
+#include "hb-ot-layout-gdef-table.hh"
+#include "hb-ot-layout-gsub-table.hh"
+#include "hb-ot-layout-gpos-table.hh"
+
+
+void hb_ot_face_t::init0 (hb_face_t *face)
+{
+ this->face = face;
+#define HB_OT_TABLE(Namespace, Type) Type.init0 ();
+#include "hb-ot-face-table-list.hh"
+#undef HB_OT_TABLE
+}
+void hb_ot_face_t::fini ()
+{
+#define HB_OT_TABLE(Namespace, Type) Type.fini ();
+#include "hb-ot-face-table-list.hh"
+#undef HB_OT_TABLE
+}
diff --git a/thirdparty/harfbuzz/src/hb-ot-face.hh b/thirdparty/harfbuzz/src/hb-ot-face.hh
new file mode 100644
index 0000000000..e24d380bca
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-face.hh
@@ -0,0 +1,74 @@
+/*
+ * Copyright © 2007,2008,2009 Red Hat, Inc.
+ * Copyright © 2012,2013 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_FACE_HH
+#define HB_OT_FACE_HH
+
+#include "hb.hh"
+
+#include "hb-machinery.hh"
+
+
+/*
+ * hb_ot_face_t
+ */
+
+/* Declare tables. */
+#define HB_OT_TABLE(Namespace, Type) namespace Namespace { struct Type; }
+#define HB_OT_ACCELERATOR(Namespace, Type) HB_OT_TABLE (Namespace, Type##_accelerator_t)
+#include "hb-ot-face-table-list.hh"
+#undef HB_OT_ACCELERATOR
+#undef HB_OT_TABLE
+
+struct hb_ot_face_t
+{
+ HB_INTERNAL void init0 (hb_face_t *face);
+ HB_INTERNAL void fini ();
+
+#define HB_OT_TABLE_ORDER(Namespace, Type) \
+ HB_PASTE (ORDER_, HB_PASTE (Namespace, HB_PASTE (_, Type)))
+ enum order_t
+ {
+ ORDER_ZERO,
+#define HB_OT_TABLE(Namespace, Type) HB_OT_TABLE_ORDER (Namespace, Type),
+#include "hb-ot-face-table-list.hh"
+#undef HB_OT_TABLE
+ };
+
+ hb_face_t *face; /* MUST be JUST before the lazy loaders. */
+#define HB_OT_TABLE(Namespace, Type) \
+ hb_table_lazy_loader_t<Namespace::Type, HB_OT_TABLE_ORDER (Namespace, Type)> Type;
+#define HB_OT_ACCELERATOR(Namespace, Type) \
+ hb_face_lazy_loader_t<Namespace::Type##_accelerator_t, HB_OT_TABLE_ORDER (Namespace, Type)> Type;
+#include "hb-ot-face-table-list.hh"
+#undef HB_OT_ACCELERATOR
+#undef HB_OT_TABLE
+};
+
+
+#endif /* HB_OT_FACE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-font.cc b/thirdparty/harfbuzz/src/hb-ot-font.cc
new file mode 100644
index 0000000000..a1dc88603a
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-font.cc
@@ -0,0 +1,336 @@
+/*
+ * Copyright © 2011,2014 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod, Roozbeh Pournader
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_OT_FONT
+
+#include "hb-ot.h"
+
+#include "hb-font.hh"
+#include "hb-machinery.hh"
+#include "hb-ot-face.hh"
+
+#include "hb-ot-cmap-table.hh"
+#include "hb-ot-glyf-table.hh"
+#include "hb-ot-cff1-table.hh"
+#include "hb-ot-cff2-table.hh"
+#include "hb-ot-hmtx-table.hh"
+#include "hb-ot-os2-table.hh"
+#include "hb-ot-post-table.hh"
+#include "hb-ot-stat-table.hh" // Just so we compile it; unused otherwise.
+#include "hb-ot-vorg-table.hh"
+#include "hb-ot-color-cbdt-table.hh"
+#include "hb-ot-color-sbix-table.hh"
+
+
+/**
+ * SECTION:hb-ot-font
+ * @title: hb-ot-font
+ * @short_description: OpenType font implementation
+ * @include: hb-ot.h
+ *
+ * Functions for using OpenType fonts with hb_shape(). Note that fonts returned
+ * by hb_font_create() default to using these functions, so most clients would
+ * never need to call these functions directly.
+ **/
+
+
+static hb_bool_t
+hb_ot_get_nominal_glyph (hb_font_t *font HB_UNUSED,
+ void *font_data,
+ hb_codepoint_t unicode,
+ hb_codepoint_t *glyph,
+ void *user_data HB_UNUSED)
+{
+ const hb_ot_face_t *ot_face = (const hb_ot_face_t *) font_data;
+ return ot_face->cmap->get_nominal_glyph (unicode, glyph);
+}
+
+static unsigned int
+hb_ot_get_nominal_glyphs (hb_font_t *font HB_UNUSED,
+ void *font_data,
+ unsigned int count,
+ const hb_codepoint_t *first_unicode,
+ unsigned int unicode_stride,
+ hb_codepoint_t *first_glyph,
+ unsigned int glyph_stride,
+ void *user_data HB_UNUSED)
+{
+ const hb_ot_face_t *ot_face = (const hb_ot_face_t *) font_data;
+ return ot_face->cmap->get_nominal_glyphs (count,
+ first_unicode, unicode_stride,
+ first_glyph, glyph_stride);
+}
+
+static hb_bool_t
+hb_ot_get_variation_glyph (hb_font_t *font HB_UNUSED,
+ void *font_data,
+ hb_codepoint_t unicode,
+ hb_codepoint_t variation_selector,
+ hb_codepoint_t *glyph,
+ void *user_data HB_UNUSED)
+{
+ const hb_ot_face_t *ot_face = (const hb_ot_face_t *) font_data;
+ return ot_face->cmap->get_variation_glyph (unicode, variation_selector, glyph);
+}
+
+static void
+hb_ot_get_glyph_h_advances (hb_font_t* font, void* font_data,
+ unsigned count,
+ const hb_codepoint_t *first_glyph,
+ unsigned glyph_stride,
+ hb_position_t *first_advance,
+ unsigned advance_stride,
+ void *user_data HB_UNUSED)
+{
+ const hb_ot_face_t *ot_face = (const hb_ot_face_t *) font_data;
+ const OT::hmtx_accelerator_t &hmtx = *ot_face->hmtx;
+
+ for (unsigned int i = 0; i < count; i++)
+ {
+ *first_advance = font->em_scale_x (hmtx.get_advance (*first_glyph, font));
+ first_glyph = &StructAtOffsetUnaligned<hb_codepoint_t> (first_glyph, glyph_stride);
+ first_advance = &StructAtOffsetUnaligned<hb_position_t> (first_advance, advance_stride);
+ }
+}
+
+static void
+hb_ot_get_glyph_v_advances (hb_font_t* font, void* font_data,
+ unsigned count,
+ const hb_codepoint_t *first_glyph,
+ unsigned glyph_stride,
+ hb_position_t *first_advance,
+ unsigned advance_stride,
+ void *user_data HB_UNUSED)
+{
+ const hb_ot_face_t *ot_face = (const hb_ot_face_t *) font_data;
+ const OT::vmtx_accelerator_t &vmtx = *ot_face->vmtx;
+
+ for (unsigned int i = 0; i < count; i++)
+ {
+ *first_advance = font->em_scale_y (-(int) vmtx.get_advance (*first_glyph, font));
+ first_glyph = &StructAtOffsetUnaligned<hb_codepoint_t> (first_glyph, glyph_stride);
+ first_advance = &StructAtOffsetUnaligned<hb_position_t> (first_advance, advance_stride);
+ }
+}
+
+static hb_bool_t
+hb_ot_get_glyph_v_origin (hb_font_t *font,
+ void *font_data,
+ hb_codepoint_t glyph,
+ hb_position_t *x,
+ hb_position_t *y,
+ void *user_data HB_UNUSED)
+{
+ const hb_ot_face_t *ot_face = (const hb_ot_face_t *) font_data;
+
+ *x = font->get_glyph_h_advance (glyph) / 2;
+
+#ifndef HB_NO_OT_FONT_CFF
+ const OT::VORG &VORG = *ot_face->VORG;
+ if (VORG.has_data ())
+ {
+ *y = font->em_scale_y (VORG.get_y_origin (glyph));
+ return true;
+ }
+#endif
+
+ hb_glyph_extents_t extents = {0};
+ if (ot_face->glyf->get_extents (font, glyph, &extents))
+ {
+ const OT::vmtx_accelerator_t &vmtx = *ot_face->vmtx;
+ hb_position_t tsb = vmtx.get_side_bearing (font, glyph);
+ *y = extents.y_bearing + font->em_scale_y (tsb);
+ return true;
+ }
+
+ hb_font_extents_t font_extents;
+ font->get_h_extents_with_fallback (&font_extents);
+ *y = font_extents.ascender;
+
+ return true;
+}
+
+static hb_bool_t
+hb_ot_get_glyph_extents (hb_font_t *font,
+ void *font_data,
+ hb_codepoint_t glyph,
+ hb_glyph_extents_t *extents,
+ void *user_data HB_UNUSED)
+{
+ const hb_ot_face_t *ot_face = (const hb_ot_face_t *) font_data;
+
+#if !defined(HB_NO_OT_FONT_BITMAP) && !defined(HB_NO_COLOR)
+ if (ot_face->sbix->get_extents (font, glyph, extents)) return true;
+#endif
+ if (ot_face->glyf->get_extents (font, glyph, extents)) return true;
+#ifndef HB_NO_OT_FONT_CFF
+ if (ot_face->cff1->get_extents (font, glyph, extents)) return true;
+ if (ot_face->cff2->get_extents (font, glyph, extents)) return true;
+#endif
+#if !defined(HB_NO_OT_FONT_BITMAP) && !defined(HB_NO_COLOR)
+ if (ot_face->CBDT->get_extents (font, glyph, extents)) return true;
+#endif
+
+ // TODO Hook up side-bearings variations.
+ return false;
+}
+
+#ifndef HB_NO_OT_FONT_GLYPH_NAMES
+static hb_bool_t
+hb_ot_get_glyph_name (hb_font_t *font HB_UNUSED,
+ void *font_data,
+ hb_codepoint_t glyph,
+ char *name, unsigned int size,
+ void *user_data HB_UNUSED)
+{
+ const hb_ot_face_t *ot_face = (const hb_ot_face_t *) font_data;
+ if (ot_face->post->get_glyph_name (glyph, name, size)) return true;
+#ifndef HB_NO_OT_FONT_CFF
+ if (ot_face->cff1->get_glyph_name (glyph, name, size)) return true;
+#endif
+ return false;
+}
+static hb_bool_t
+hb_ot_get_glyph_from_name (hb_font_t *font HB_UNUSED,
+ void *font_data,
+ const char *name, int len,
+ hb_codepoint_t *glyph,
+ void *user_data HB_UNUSED)
+{
+ const hb_ot_face_t *ot_face = (const hb_ot_face_t *) font_data;
+ if (ot_face->post->get_glyph_from_name (name, len, glyph)) return true;
+#ifndef HB_NO_OT_FONT_CFF
+ if (ot_face->cff1->get_glyph_from_name (name, len, glyph)) return true;
+#endif
+ return false;
+}
+#endif
+
+static hb_bool_t
+hb_ot_get_font_h_extents (hb_font_t *font,
+ void *font_data HB_UNUSED,
+ hb_font_extents_t *metrics,
+ void *user_data HB_UNUSED)
+{
+ return _hb_ot_metrics_get_position_common (font, HB_OT_METRICS_TAG_HORIZONTAL_ASCENDER, &metrics->ascender) &&
+ _hb_ot_metrics_get_position_common (font, HB_OT_METRICS_TAG_HORIZONTAL_DESCENDER, &metrics->descender) &&
+ _hb_ot_metrics_get_position_common (font, HB_OT_METRICS_TAG_HORIZONTAL_LINE_GAP, &metrics->line_gap);
+}
+
+static hb_bool_t
+hb_ot_get_font_v_extents (hb_font_t *font,
+ void *font_data HB_UNUSED,
+ hb_font_extents_t *metrics,
+ void *user_data HB_UNUSED)
+{
+ return _hb_ot_metrics_get_position_common (font, HB_OT_METRICS_TAG_VERTICAL_ASCENDER, &metrics->ascender) &&
+ _hb_ot_metrics_get_position_common (font, HB_OT_METRICS_TAG_VERTICAL_DESCENDER, &metrics->descender) &&
+ _hb_ot_metrics_get_position_common (font, HB_OT_METRICS_TAG_VERTICAL_LINE_GAP, &metrics->line_gap);
+}
+
+#if HB_USE_ATEXIT
+static void free_static_ot_funcs ();
+#endif
+
+static struct hb_ot_font_funcs_lazy_loader_t : hb_font_funcs_lazy_loader_t<hb_ot_font_funcs_lazy_loader_t>
+{
+ static hb_font_funcs_t *create ()
+ {
+ hb_font_funcs_t *funcs = hb_font_funcs_create ();
+
+ hb_font_funcs_set_font_h_extents_func (funcs, hb_ot_get_font_h_extents, nullptr, nullptr);
+ hb_font_funcs_set_font_v_extents_func (funcs, hb_ot_get_font_v_extents, nullptr, nullptr);
+ hb_font_funcs_set_nominal_glyph_func (funcs, hb_ot_get_nominal_glyph, nullptr, nullptr);
+ hb_font_funcs_set_nominal_glyphs_func (funcs, hb_ot_get_nominal_glyphs, nullptr, nullptr);
+ hb_font_funcs_set_variation_glyph_func (funcs, hb_ot_get_variation_glyph, nullptr, nullptr);
+ hb_font_funcs_set_glyph_h_advances_func (funcs, hb_ot_get_glyph_h_advances, nullptr, nullptr);
+ hb_font_funcs_set_glyph_v_advances_func (funcs, hb_ot_get_glyph_v_advances, nullptr, nullptr);
+ //hb_font_funcs_set_glyph_h_origin_func (funcs, hb_ot_get_glyph_h_origin, nullptr, nullptr);
+ hb_font_funcs_set_glyph_v_origin_func (funcs, hb_ot_get_glyph_v_origin, nullptr, nullptr);
+ hb_font_funcs_set_glyph_extents_func (funcs, hb_ot_get_glyph_extents, nullptr, nullptr);
+ //hb_font_funcs_set_glyph_contour_point_func (funcs, hb_ot_get_glyph_contour_point, nullptr, nullptr);
+#ifndef HB_NO_OT_FONT_GLYPH_NAMES
+ hb_font_funcs_set_glyph_name_func (funcs, hb_ot_get_glyph_name, nullptr, nullptr);
+ hb_font_funcs_set_glyph_from_name_func (funcs, hb_ot_get_glyph_from_name, nullptr, nullptr);
+#endif
+
+ hb_font_funcs_make_immutable (funcs);
+
+#if HB_USE_ATEXIT
+ atexit (free_static_ot_funcs);
+#endif
+
+ return funcs;
+ }
+} static_ot_funcs;
+
+#if HB_USE_ATEXIT
+static
+void free_static_ot_funcs ()
+{
+ static_ot_funcs.free_instance ();
+}
+#endif
+
+static hb_font_funcs_t *
+_hb_ot_get_font_funcs ()
+{
+ return static_ot_funcs.get_unconst ();
+}
+
+
+/**
+ * hb_ot_font_set_funcs:
+ *
+ * Since: 0.9.28
+ **/
+void
+hb_ot_font_set_funcs (hb_font_t *font)
+{
+ hb_font_set_funcs (font,
+ _hb_ot_get_font_funcs (),
+ &font->face->table,
+ nullptr);
+}
+
+#ifndef HB_NO_VAR
+int
+_glyf_get_side_bearing_var (hb_font_t *font, hb_codepoint_t glyph, bool is_vertical)
+{
+ return font->face->table.glyf->get_side_bearing_var (font, glyph, is_vertical);
+}
+
+unsigned
+_glyf_get_advance_var (hb_font_t *font, hb_codepoint_t glyph, bool is_vertical)
+{
+ return font->face->table.glyf->get_advance_var (font, glyph, is_vertical);
+}
+#endif
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-ot-font.h b/thirdparty/harfbuzz/src/hb-ot-font.h
new file mode 100644
index 0000000000..80eaa54b1a
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-font.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright © 2014 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod, Roozbeh Pournader
+ */
+
+#ifndef HB_OT_H_IN
+#error "Include <hb-ot.h> instead."
+#endif
+
+#ifndef HB_OT_FONT_H
+#define HB_OT_FONT_H
+
+#include "hb.h"
+
+HB_BEGIN_DECLS
+
+
+HB_EXTERN void
+hb_ot_font_set_funcs (hb_font_t *font);
+
+
+HB_END_DECLS
+
+#endif /* HB_OT_FONT_H */
diff --git a/thirdparty/harfbuzz/src/hb-ot-gasp-table.hh b/thirdparty/harfbuzz/src/hb-ot-gasp-table.hh
new file mode 100644
index 0000000000..4f291924af
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-gasp-table.hh
@@ -0,0 +1,84 @@
+/*
+ * Copyright © 2018 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#ifndef HB_OT_GASP_TABLE_HH
+#define HB_OT_GASP_TABLE_HH
+
+#include "hb-open-type.hh"
+#include "hb-ot-hhea-table.hh"
+#include "hb-ot-os2-table.hh"
+#include "hb-ot-var-hvar-table.hh"
+
+/*
+ * gasp -- Grid-fitting and Scan-conversion Procedure
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/gasp
+ */
+#define HB_OT_TAG_gasp HB_TAG('g','a','s','p')
+
+
+namespace OT {
+
+struct GaspRange
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ public:
+ HBUINT16 rangeMaxPPEM; /* Upper limit of range, in PPEM */
+ HBUINT16 rangeGaspBehavior;
+ /* Flags describing desired rasterizer behavior. */
+ public:
+ DEFINE_SIZE_STATIC (4);
+};
+
+struct gasp
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_gasp;
+
+ const GaspRange &get_gasp_range (unsigned int i) const
+ { return gaspRanges[i]; }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ gaspRanges.sanitize (c));
+ }
+
+ protected:
+ HBUINT16 version; /* Version number (set to 1) */
+ ArrayOf<GaspRange>
+ gaspRanges; /* Number of records to follow
+ * Sorted by ppem */
+ public:
+ DEFINE_SIZE_ARRAY (4, gaspRanges);
+};
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_GASP_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-glyf-table.hh b/thirdparty/harfbuzz/src/hb-ot-glyf-table.hh
new file mode 100644
index 0000000000..5470bd96da
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-glyf-table.hh
@@ -0,0 +1,1261 @@
+/*
+ * Copyright © 2015 Google, Inc.
+ * Copyright © 2019 Adobe Inc.
+ * Copyright © 2019 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod, Garret Rieger, Roderick Sheeter
+ * Adobe Author(s): Michiharu Ariza
+ */
+
+#ifndef HB_OT_GLYF_TABLE_HH
+#define HB_OT_GLYF_TABLE_HH
+
+#include "hb-open-type.hh"
+#include "hb-ot-head-table.hh"
+#include "hb-ot-hmtx-table.hh"
+#include "hb-ot-var-gvar-table.hh"
+#include "hb-draw.hh"
+
+namespace OT {
+
+
+/*
+ * loca -- Index to Location
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/loca
+ */
+#define HB_OT_TAG_loca HB_TAG('l','o','c','a')
+
+
+struct loca
+{
+ friend struct glyf;
+
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_loca;
+
+ bool sanitize (hb_sanitize_context_t *c HB_UNUSED) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (true);
+ }
+
+ protected:
+ UnsizedArrayOf<HBUINT8>
+ dataZ; /* Location data. */
+ public:
+ DEFINE_SIZE_MIN (0); /* In reality, this is UNBOUNDED() type; but since we always
+ * check the size externally, allow Null() object of it by
+ * defining it _MIN instead. */
+};
+
+
+/*
+ * glyf -- TrueType Glyph Data
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/glyf
+ */
+#define HB_OT_TAG_glyf HB_TAG('g','l','y','f')
+
+
+struct glyf
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_glyf;
+
+ bool sanitize (hb_sanitize_context_t *c HB_UNUSED) const
+ {
+ TRACE_SANITIZE (this);
+ /* Runtime checks as eager sanitizing each glyph is costy */
+ return_trace (true);
+ }
+
+ template<typename Iterator,
+ hb_requires (hb_is_source_of (Iterator, unsigned int))>
+ static bool
+ _add_loca_and_head (hb_subset_plan_t * plan, Iterator padded_offsets)
+ {
+ unsigned max_offset =
+ + padded_offsets
+ | hb_reduce (hb_add, 0)
+ ;
+ unsigned num_offsets = padded_offsets.len () + 1;
+ bool use_short_loca = max_offset < 0x1FFFF;
+ unsigned entry_size = use_short_loca ? 2 : 4;
+ char *loca_prime_data = (char *) calloc (entry_size, num_offsets);
+
+ if (unlikely (!loca_prime_data)) return false;
+
+ DEBUG_MSG (SUBSET, nullptr, "loca entry_size %d num_offsets %d "
+ "max_offset %d size %d",
+ entry_size, num_offsets, max_offset, entry_size * num_offsets);
+
+ if (use_short_loca)
+ _write_loca (padded_offsets, 1, hb_array ((HBUINT16 *) loca_prime_data, num_offsets));
+ else
+ _write_loca (padded_offsets, 0, hb_array ((HBUINT32 *) loca_prime_data, num_offsets));
+
+ hb_blob_t *loca_blob = hb_blob_create (loca_prime_data,
+ entry_size * num_offsets,
+ HB_MEMORY_MODE_WRITABLE,
+ loca_prime_data,
+ free);
+
+ bool result = plan->add_table (HB_OT_TAG_loca, loca_blob)
+ && _add_head_and_set_loca_version (plan, use_short_loca);
+
+ hb_blob_destroy (loca_blob);
+ return result;
+ }
+
+ template<typename IteratorIn, typename IteratorOut,
+ hb_requires (hb_is_source_of (IteratorIn, unsigned int)),
+ hb_requires (hb_is_sink_of (IteratorOut, unsigned))>
+ static void
+ _write_loca (IteratorIn it, unsigned right_shift, IteratorOut dest)
+ {
+ unsigned int offset = 0;
+ dest << 0;
+ + it
+ | hb_map ([=, &offset] (unsigned int padded_size)
+ {
+ offset += padded_size;
+ DEBUG_MSG (SUBSET, nullptr, "loca entry offset %d", offset);
+ return offset >> right_shift;
+ })
+ | hb_sink (dest)
+ ;
+ }
+
+ /* requires source of SubsetGlyph complains the identifier isn't declared */
+ template <typename Iterator>
+ bool serialize (hb_serialize_context_t *c,
+ Iterator it,
+ const hb_subset_plan_t *plan)
+ {
+ TRACE_SERIALIZE (this);
+ unsigned init_len = c->length ();
+ for (const auto &_ : it) _.serialize (c, plan);
+
+ /* As a special case when all glyph in the font are empty, add a zero byte
+ * to the table, so that OTS doesn’t reject it, and to make the table work
+ * on Windows as well.
+ * See https://github.com/khaledhosny/ots/issues/52 */
+ if (init_len == c->length ())
+ {
+ HBUINT8 empty_byte;
+ empty_byte = 0;
+ c->copy (empty_byte);
+ }
+ return_trace (true);
+ }
+
+ /* Byte region(s) per glyph to output
+ unpadded, hints removed if so requested
+ If we fail to process a glyph we produce an empty (0-length) glyph */
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+
+ glyf *glyf_prime = c->serializer->start_embed <glyf> ();
+ if (unlikely (!c->serializer->check_success (glyf_prime))) return_trace (false);
+
+ hb_vector_t<SubsetGlyph> glyphs;
+ _populate_subset_glyphs (c->plan, &glyphs);
+
+ glyf_prime->serialize (c->serializer, hb_iter (glyphs), c->plan);
+
+ auto padded_offsets =
+ + hb_iter (glyphs)
+ | hb_map (&SubsetGlyph::padded_size)
+ ;
+
+ if (c->serializer->in_error ()) return_trace (false);
+ return_trace (c->serializer->check_success (_add_loca_and_head (c->plan,
+ padded_offsets)));
+ }
+
+ template <typename SubsetGlyph>
+ void
+ _populate_subset_glyphs (const hb_subset_plan_t *plan,
+ hb_vector_t<SubsetGlyph> *glyphs /* OUT */) const
+ {
+ OT::glyf::accelerator_t glyf;
+ glyf.init (plan->source);
+
+ + hb_range (plan->num_output_glyphs ())
+ | hb_map ([&] (hb_codepoint_t new_gid)
+ {
+ SubsetGlyph subset_glyph = {0};
+ subset_glyph.new_gid = new_gid;
+
+ /* should never fail: all old gids should be mapped */
+ if (!plan->old_gid_for_new_gid (new_gid, &subset_glyph.old_gid))
+ return subset_glyph;
+
+ subset_glyph.source_glyph = glyf.glyph_for_gid (subset_glyph.old_gid, true);
+ if (plan->drop_hints) subset_glyph.drop_hints_bytes ();
+ else subset_glyph.dest_start = subset_glyph.source_glyph.get_bytes ();
+
+ return subset_glyph;
+ })
+ | hb_sink (glyphs)
+ ;
+
+ glyf.fini ();
+ }
+
+ static bool
+ _add_head_and_set_loca_version (hb_subset_plan_t *plan, bool use_short_loca)
+ {
+ hb_blob_t *head_blob = hb_sanitize_context_t ().reference_table<head> (plan->source);
+ hb_blob_t *head_prime_blob = hb_blob_copy_writable_or_fail (head_blob);
+ hb_blob_destroy (head_blob);
+
+ if (unlikely (!head_prime_blob))
+ return false;
+
+ head *head_prime = (head *) hb_blob_get_data_writable (head_prime_blob, nullptr);
+ head_prime->indexToLocFormat = use_short_loca ? 0 : 1;
+ bool success = plan->add_table (HB_OT_TAG_head, head_prime_blob);
+
+ hb_blob_destroy (head_prime_blob);
+ return success;
+ }
+
+ struct CompositeGlyphChain
+ {
+ protected:
+ enum composite_glyph_flag_t
+ {
+ ARG_1_AND_2_ARE_WORDS = 0x0001,
+ ARGS_ARE_XY_VALUES = 0x0002,
+ ROUND_XY_TO_GRID = 0x0004,
+ WE_HAVE_A_SCALE = 0x0008,
+ MORE_COMPONENTS = 0x0020,
+ WE_HAVE_AN_X_AND_Y_SCALE = 0x0040,
+ WE_HAVE_A_TWO_BY_TWO = 0x0080,
+ WE_HAVE_INSTRUCTIONS = 0x0100,
+ USE_MY_METRICS = 0x0200,
+ OVERLAP_COMPOUND = 0x0400,
+ SCALED_COMPONENT_OFFSET = 0x0800,
+ UNSCALED_COMPONENT_OFFSET = 0x1000
+ };
+
+ public:
+ unsigned int get_size () const
+ {
+ unsigned int size = min_size;
+ /* arg1 and 2 are int16 */
+ if (flags & ARG_1_AND_2_ARE_WORDS) size += 4;
+ /* arg1 and 2 are int8 */
+ else size += 2;
+
+ /* One x 16 bit (scale) */
+ if (flags & WE_HAVE_A_SCALE) size += 2;
+ /* Two x 16 bit (xscale, yscale) */
+ else if (flags & WE_HAVE_AN_X_AND_Y_SCALE) size += 4;
+ /* Four x 16 bit (xscale, scale01, scale10, yscale) */
+ else if (flags & WE_HAVE_A_TWO_BY_TWO) size += 8;
+
+ return size;
+ }
+
+ void set_glyph_index (hb_codepoint_t new_gid) { glyphIndex = new_gid; }
+ hb_codepoint_t get_glyph_index () const { return glyphIndex; }
+
+ void drop_instructions_flag () { flags = (uint16_t) flags & ~WE_HAVE_INSTRUCTIONS; }
+ bool has_instructions () const { return flags & WE_HAVE_INSTRUCTIONS; }
+
+ bool has_more () const { return flags & MORE_COMPONENTS; }
+ bool is_use_my_metrics () const { return flags & USE_MY_METRICS; }
+ bool is_anchored () const { return !(flags & ARGS_ARE_XY_VALUES); }
+ void get_anchor_points (unsigned int &point1, unsigned int &point2) const
+ {
+ const HBUINT8 *p = &StructAfter<const HBUINT8> (glyphIndex);
+ if (flags & ARG_1_AND_2_ARE_WORDS)
+ {
+ point1 = ((const HBUINT16 *) p)[0];
+ point2 = ((const HBUINT16 *) p)[1];
+ }
+ else
+ {
+ point1 = p[0];
+ point2 = p[1];
+ }
+ }
+
+ void transform_points (contour_point_vector_t &points) const
+ {
+ float matrix[4];
+ contour_point_t trans;
+ if (get_transformation (matrix, trans))
+ {
+ if (scaled_offsets ())
+ {
+ points.translate (trans);
+ points.transform (matrix);
+ }
+ else
+ {
+ points.transform (matrix);
+ points.translate (trans);
+ }
+ }
+ }
+
+ protected:
+ bool scaled_offsets () const
+ { return (flags & (SCALED_COMPONENT_OFFSET | UNSCALED_COMPONENT_OFFSET)) == SCALED_COMPONENT_OFFSET; }
+
+ bool get_transformation (float (&matrix)[4], contour_point_t &trans) const
+ {
+ matrix[0] = matrix[3] = 1.f;
+ matrix[1] = matrix[2] = 0.f;
+
+ int tx, ty;
+ const HBINT8 *p = &StructAfter<const HBINT8> (glyphIndex);
+ if (flags & ARG_1_AND_2_ARE_WORDS)
+ {
+ tx = *(const HBINT16 *) p;
+ p += HBINT16::static_size;
+ ty = *(const HBINT16 *) p;
+ p += HBINT16::static_size;
+ }
+ else
+ {
+ tx = *p++;
+ ty = *p++;
+ }
+ if (is_anchored ()) tx = ty = 0;
+
+ trans.init ((float) tx, (float) ty);
+
+ {
+ const F2DOT14 *points = (const F2DOT14 *) p;
+ if (flags & WE_HAVE_A_SCALE)
+ {
+ matrix[0] = matrix[3] = points[0].to_float ();
+ return true;
+ }
+ else if (flags & WE_HAVE_AN_X_AND_Y_SCALE)
+ {
+ matrix[0] = points[0].to_float ();
+ matrix[3] = points[1].to_float ();
+ return true;
+ }
+ else if (flags & WE_HAVE_A_TWO_BY_TWO)
+ {
+ matrix[0] = points[0].to_float ();
+ matrix[1] = points[1].to_float ();
+ matrix[2] = points[2].to_float ();
+ matrix[3] = points[3].to_float ();
+ return true;
+ }
+ }
+ return tx || ty;
+ }
+
+ protected:
+ HBUINT16 flags;
+ HBGlyphID glyphIndex;
+ public:
+ DEFINE_SIZE_MIN (4);
+ };
+
+ struct composite_iter_t : hb_iter_with_fallback_t<composite_iter_t, const CompositeGlyphChain &>
+ {
+ typedef const CompositeGlyphChain *__item_t__;
+ composite_iter_t (hb_bytes_t glyph_, __item_t__ current_) :
+ glyph (glyph_), current (current_)
+ { if (!check_range (current)) current = nullptr; }
+ composite_iter_t () : glyph (hb_bytes_t ()), current (nullptr) {}
+
+ const CompositeGlyphChain &__item__ () const { return *current; }
+ bool __more__ () const { return current; }
+ void __next__ ()
+ {
+ if (!current->has_more ()) { current = nullptr; return; }
+
+ const CompositeGlyphChain *possible = &StructAfter<CompositeGlyphChain,
+ CompositeGlyphChain> (*current);
+ if (!check_range (possible)) { current = nullptr; return; }
+ current = possible;
+ }
+ bool operator != (const composite_iter_t& o) const
+ { return glyph != o.glyph || current != o.current; }
+
+ bool check_range (const CompositeGlyphChain *composite) const
+ {
+ return glyph.check_range (composite, CompositeGlyphChain::min_size)
+ && glyph.check_range (composite, composite->get_size ());
+ }
+
+ private:
+ hb_bytes_t glyph;
+ __item_t__ current;
+ };
+
+ enum phantom_point_index_t
+ {
+ PHANTOM_LEFT = 0,
+ PHANTOM_RIGHT = 1,
+ PHANTOM_TOP = 2,
+ PHANTOM_BOTTOM = 3,
+ PHANTOM_COUNT = 4
+ };
+
+ struct accelerator_t;
+
+ struct Glyph
+ {
+ enum simple_glyph_flag_t
+ {
+ FLAG_ON_CURVE = 0x01,
+ FLAG_X_SHORT = 0x02,
+ FLAG_Y_SHORT = 0x04,
+ FLAG_REPEAT = 0x08,
+ FLAG_X_SAME = 0x10,
+ FLAG_Y_SAME = 0x20,
+ FLAG_RESERVED1 = 0x40,
+ FLAG_RESERVED2 = 0x80
+ };
+
+ private:
+ struct GlyphHeader
+ {
+ bool has_data () const { return numberOfContours; }
+
+ bool get_extents (hb_font_t *font, const accelerator_t &glyf_accelerator,
+ hb_codepoint_t gid, hb_glyph_extents_t *extents) const
+ {
+ /* Undocumented rasterizer behavior: shift glyph to the left by (lsb - xMin), i.e., xMin = lsb */
+ /* extents->x_bearing = hb_min (glyph_header.xMin, glyph_header.xMax); */
+ extents->x_bearing = font->em_scale_x (glyf_accelerator.hmtx->get_side_bearing (gid));
+ extents->y_bearing = font->em_scale_y (hb_max (yMin, yMax));
+ extents->width = font->em_scale_x (hb_max (xMin, xMax) - hb_min (xMin, xMax));
+ extents->height = font->em_scale_y (hb_min (yMin, yMax) - hb_max (yMin, yMax));
+
+ return true;
+ }
+
+ HBINT16 numberOfContours;
+ /* If the number of contours is
+ * greater than or equal to zero,
+ * this is a simple glyph; if negative,
+ * this is a composite glyph. */
+ FWORD xMin; /* Minimum x for coordinate data. */
+ FWORD yMin; /* Minimum y for coordinate data. */
+ FWORD xMax; /* Maximum x for coordinate data. */
+ FWORD yMax; /* Maximum y for coordinate data. */
+ public:
+ DEFINE_SIZE_STATIC (10);
+ };
+
+ struct SimpleGlyph
+ {
+ const GlyphHeader &header;
+ hb_bytes_t bytes;
+ SimpleGlyph (const GlyphHeader &header_, hb_bytes_t bytes_) :
+ header (header_), bytes (bytes_) {}
+
+ unsigned int instruction_len_offset () const
+ { return GlyphHeader::static_size + 2 * header.numberOfContours; }
+
+ unsigned int length (unsigned int instruction_len) const
+ { return instruction_len_offset () + 2 + instruction_len; }
+
+ unsigned int instructions_length () const
+ {
+ unsigned int instruction_length_offset = instruction_len_offset ();
+ if (unlikely (instruction_length_offset + 2 > bytes.length)) return 0;
+
+ const HBUINT16 &instructionLength = StructAtOffset<HBUINT16> (&bytes, instruction_length_offset);
+ /* Out of bounds of the current glyph */
+ if (unlikely (length (instructionLength) > bytes.length)) return 0;
+ return instructionLength;
+ }
+
+ const Glyph trim_padding () const
+ {
+ /* based on FontTools _g_l_y_f.py::trim */
+ const char *glyph = bytes.arrayZ;
+ const char *glyph_end = glyph + bytes.length;
+ /* simple glyph w/contours, possibly trimmable */
+ glyph += instruction_len_offset ();
+
+ if (unlikely (glyph + 2 >= glyph_end)) return Glyph ();
+ unsigned int num_coordinates = StructAtOffset<HBUINT16> (glyph - 2, 0) + 1;
+ unsigned int num_instructions = StructAtOffset<HBUINT16> (glyph, 0);
+
+ glyph += 2 + num_instructions;
+
+ unsigned int coord_bytes = 0;
+ unsigned int coords_with_flags = 0;
+ while (glyph < glyph_end)
+ {
+ uint8_t flag = *glyph;
+ glyph++;
+
+ unsigned int repeat = 1;
+ if (flag & FLAG_REPEAT)
+ {
+ if (unlikely (glyph >= glyph_end)) return Glyph ();
+ repeat = *glyph + 1;
+ glyph++;
+ }
+
+ unsigned int xBytes, yBytes;
+ xBytes = yBytes = 0;
+ if (flag & FLAG_X_SHORT) xBytes = 1;
+ else if ((flag & FLAG_X_SAME) == 0) xBytes = 2;
+
+ if (flag & FLAG_Y_SHORT) yBytes = 1;
+ else if ((flag & FLAG_Y_SAME) == 0) yBytes = 2;
+
+ coord_bytes += (xBytes + yBytes) * repeat;
+ coords_with_flags += repeat;
+ if (coords_with_flags >= num_coordinates) break;
+ }
+
+ if (unlikely (coords_with_flags != num_coordinates)) return Glyph ();
+ return Glyph (bytes.sub_array (0, bytes.length + coord_bytes - (glyph_end - glyph)));
+ }
+
+ /* zero instruction length */
+ void drop_hints ()
+ {
+ GlyphHeader &glyph_header = const_cast<GlyphHeader &> (header);
+ (HBUINT16 &) StructAtOffset<HBUINT16> (&glyph_header, instruction_len_offset ()) = 0;
+ }
+
+ void drop_hints_bytes (hb_bytes_t &dest_start, hb_bytes_t &dest_end) const
+ {
+ unsigned int instructions_len = instructions_length ();
+ unsigned int glyph_length = length (instructions_len);
+ dest_start = bytes.sub_array (0, glyph_length - instructions_len);
+ dest_end = bytes.sub_array (glyph_length, bytes.length - glyph_length);
+ }
+
+ static bool read_points (const HBUINT8 *&p /* IN/OUT */,
+ contour_point_vector_t &points_ /* IN/OUT */,
+ const hb_bytes_t &bytes,
+ void (* setter) (contour_point_t &_, float v),
+ const simple_glyph_flag_t short_flag,
+ const simple_glyph_flag_t same_flag)
+ {
+ float v = 0;
+ for (unsigned i = 0; i < points_.length; i++)
+ {
+ uint8_t flag = points_[i].flag;
+ if (flag & short_flag)
+ {
+ if (unlikely (!bytes.check_range (p))) return false;
+ if (flag & same_flag)
+ v += *p++;
+ else
+ v -= *p++;
+ }
+ else
+ {
+ if (!(flag & same_flag))
+ {
+ if (unlikely (!bytes.check_range ((const HBUINT16 *) p))) return false;
+ v += *(const HBINT16 *) p;
+ p += HBINT16::static_size;
+ }
+ }
+ setter (points_[i], v);
+ }
+ return true;
+ }
+
+ bool get_contour_points (contour_point_vector_t &points_ /* OUT */,
+ bool phantom_only = false) const
+ {
+ const HBUINT16 *endPtsOfContours = &StructAfter<HBUINT16> (header);
+ int num_contours = header.numberOfContours;
+ if (unlikely (!bytes.check_range (&endPtsOfContours[num_contours + 1]))) return false;
+ unsigned int num_points = endPtsOfContours[num_contours - 1] + 1;
+
+ points_.resize (num_points);
+ for (unsigned int i = 0; i < points_.length; i++) points_[i].init ();
+ if (phantom_only) return true;
+
+ for (int i = 0; i < num_contours; i++)
+ points_[endPtsOfContours[i]].is_end_point = true;
+
+ /* Skip instructions */
+ const HBUINT8 *p = &StructAtOffset<HBUINT8> (&endPtsOfContours[num_contours + 1],
+ endPtsOfContours[num_contours]);
+
+ /* Read flags */
+ for (unsigned int i = 0; i < num_points; i++)
+ {
+ if (unlikely (!bytes.check_range (p))) return false;
+ uint8_t flag = *p++;
+ points_[i].flag = flag;
+ if (flag & FLAG_REPEAT)
+ {
+ if (unlikely (!bytes.check_range (p))) return false;
+ unsigned int repeat_count = *p++;
+ while ((repeat_count-- > 0) && (++i < num_points))
+ points_[i].flag = flag;
+ }
+ }
+
+ /* Read x & y coordinates */
+ return read_points (p, points_, bytes, [] (contour_point_t &p, float v) { p.x = v; },
+ FLAG_X_SHORT, FLAG_X_SAME)
+ && read_points (p, points_, bytes, [] (contour_point_t &p, float v) { p.y = v; },
+ FLAG_Y_SHORT, FLAG_Y_SAME);
+ }
+ };
+
+ struct CompositeGlyph
+ {
+ const GlyphHeader &header;
+ hb_bytes_t bytes;
+ CompositeGlyph (const GlyphHeader &header_, hb_bytes_t bytes_) :
+ header (header_), bytes (bytes_) {}
+
+ composite_iter_t get_iterator () const
+ { return composite_iter_t (bytes, &StructAfter<CompositeGlyphChain, GlyphHeader> (header)); }
+
+ unsigned int instructions_length (hb_bytes_t bytes) const
+ {
+ unsigned int start = bytes.length;
+ unsigned int end = bytes.length;
+ const CompositeGlyphChain *last = nullptr;
+ for (auto &item : get_iterator ())
+ last = &item;
+ if (unlikely (!last)) return 0;
+
+ if (last->has_instructions ())
+ start = (char *) last - &bytes + last->get_size ();
+ if (unlikely (start > end)) return 0;
+ return end - start;
+ }
+
+ /* Trimming for composites not implemented.
+ * If removing hints it falls out of that. */
+ const Glyph trim_padding () const { return Glyph (bytes); }
+
+ void drop_hints ()
+ {
+ for (const auto &_ : get_iterator ())
+ const_cast<CompositeGlyphChain &> (_).drop_instructions_flag ();
+ }
+
+ /* Chop instructions off the end */
+ void drop_hints_bytes (hb_bytes_t &dest_start) const
+ { dest_start = bytes.sub_array (0, bytes.length - instructions_length (bytes)); }
+ };
+
+ enum glyph_type_t { EMPTY, SIMPLE, COMPOSITE };
+
+ public:
+ composite_iter_t get_composite_iterator () const
+ {
+ if (type != COMPOSITE) return composite_iter_t ();
+ return CompositeGlyph (*header, bytes).get_iterator ();
+ }
+
+ const Glyph trim_padding () const
+ {
+ switch (type) {
+ case COMPOSITE: return CompositeGlyph (*header, bytes).trim_padding ();
+ case SIMPLE: return SimpleGlyph (*header, bytes).trim_padding ();
+ default: return bytes;
+ }
+ }
+
+ void drop_hints ()
+ {
+ switch (type) {
+ case COMPOSITE: CompositeGlyph (*header, bytes).drop_hints (); return;
+ case SIMPLE: SimpleGlyph (*header, bytes).drop_hints (); return;
+ default: return;
+ }
+ }
+
+ void drop_hints_bytes (hb_bytes_t &dest_start, hb_bytes_t &dest_end) const
+ {
+ switch (type) {
+ case COMPOSITE: CompositeGlyph (*header, bytes).drop_hints_bytes (dest_start); return;
+ case SIMPLE: SimpleGlyph (*header, bytes).drop_hints_bytes (dest_start, dest_end); return;
+ default: return;
+ }
+ }
+
+ /* Note: Recursively calls itself.
+ * all_points includes phantom points
+ */
+ bool get_points (hb_font_t *font, const accelerator_t &glyf_accelerator,
+ contour_point_vector_t &all_points /* OUT */,
+ bool phantom_only = false,
+ unsigned int depth = 0) const
+ {
+ if (unlikely (depth > HB_MAX_NESTING_LEVEL)) return false;
+ contour_point_vector_t points;
+
+ switch (type) {
+ case COMPOSITE:
+ {
+ /* pseudo component points for each component in composite glyph */
+ unsigned num_points = hb_len (CompositeGlyph (*header, bytes).get_iterator ());
+ if (unlikely (!points.resize (num_points))) return false;
+ for (unsigned i = 0; i < points.length; i++)
+ points[i].init ();
+ break;
+ }
+ case SIMPLE:
+ if (unlikely (!SimpleGlyph (*header, bytes).get_contour_points (points, phantom_only)))
+ return false;
+ break;
+ }
+
+ /* Init phantom points */
+ if (unlikely (!points.resize (points.length + PHANTOM_COUNT))) return false;
+ hb_array_t<contour_point_t> phantoms = points.sub_array (points.length - PHANTOM_COUNT, PHANTOM_COUNT);
+ {
+ for (unsigned i = 0; i < PHANTOM_COUNT; ++i) phantoms[i].init ();
+ int h_delta = (int) header->xMin - glyf_accelerator.hmtx->get_side_bearing (gid);
+ int v_orig = (int) header->yMax + glyf_accelerator.vmtx->get_side_bearing (gid);
+ unsigned h_adv = glyf_accelerator.hmtx->get_advance (gid);
+ unsigned v_adv = glyf_accelerator.vmtx->get_advance (gid);
+ phantoms[PHANTOM_LEFT].x = h_delta;
+ phantoms[PHANTOM_RIGHT].x = h_adv + h_delta;
+ phantoms[PHANTOM_TOP].y = v_orig;
+ phantoms[PHANTOM_BOTTOM].y = v_orig - (int) v_adv;
+ }
+
+#ifndef HB_NO_VAR
+ if (unlikely (!glyf_accelerator.gvar->apply_deltas_to_points (gid, font, points.as_array ())))
+ return false;
+#endif
+
+ switch (type) {
+ case SIMPLE:
+ all_points.extend (points.as_array ());
+ break;
+ case COMPOSITE:
+ {
+ unsigned int comp_index = 0;
+ for (auto &item : get_composite_iterator ())
+ {
+ contour_point_vector_t comp_points;
+ if (unlikely (!glyf_accelerator.glyph_for_gid (item.get_glyph_index ())
+ .get_points (font, glyf_accelerator, comp_points,
+ phantom_only, depth + 1)
+ || comp_points.length < PHANTOM_COUNT))
+ return false;
+
+ /* Copy phantom points from component if USE_MY_METRICS flag set */
+ if (item.is_use_my_metrics ())
+ for (unsigned int i = 0; i < PHANTOM_COUNT; i++)
+ phantoms[i] = comp_points[comp_points.length - PHANTOM_COUNT + i];
+
+ /* Apply component transformation & translation */
+ item.transform_points (comp_points);
+
+ /* Apply translation from gvar */
+ comp_points.translate (points[comp_index]);
+
+ if (item.is_anchored ())
+ {
+ unsigned int p1, p2;
+ item.get_anchor_points (p1, p2);
+ if (likely (p1 < all_points.length && p2 < comp_points.length))
+ {
+ contour_point_t delta;
+ delta.init (all_points[p1].x - comp_points[p2].x,
+ all_points[p1].y - comp_points[p2].y);
+
+ comp_points.translate (delta);
+ }
+ }
+
+ all_points.extend (comp_points.sub_array (0, comp_points.length - PHANTOM_COUNT));
+
+ comp_index++;
+ }
+
+ all_points.extend (phantoms);
+ } break;
+ default:
+ all_points.extend (phantoms);
+ }
+
+ if (depth == 0) /* Apply at top level */
+ {
+ /* Undocumented rasterizer behavior:
+ * Shift points horizontally by the updated left side bearing
+ */
+ contour_point_t delta;
+ delta.init (-phantoms[PHANTOM_LEFT].x, 0.f);
+ if (delta.x) all_points.translate (delta);
+ }
+
+ return true;
+ }
+
+ bool get_extents (hb_font_t *font, const accelerator_t &glyf_accelerator,
+ hb_glyph_extents_t *extents) const
+ {
+ if (type == EMPTY) return true; /* Empty glyph; zero extents. */
+ return header->get_extents (font, glyf_accelerator, gid, extents);
+ }
+
+ hb_bytes_t get_bytes () const { return bytes; }
+
+ Glyph (hb_bytes_t bytes_ = hb_bytes_t (),
+ hb_codepoint_t gid_ = (hb_codepoint_t) -1) : bytes (bytes_), gid (gid_),
+ header (bytes.as<GlyphHeader> ())
+ {
+ int num_contours = header->numberOfContours;
+ if (unlikely (num_contours == 0)) type = EMPTY;
+ else if (num_contours > 0) type = SIMPLE;
+ else type = COMPOSITE; /* negative numbers */
+ }
+
+ protected:
+ hb_bytes_t bytes;
+ hb_codepoint_t gid;
+ const GlyphHeader *header;
+ unsigned type;
+ };
+
+ struct accelerator_t
+ {
+ void init (hb_face_t *face_)
+ {
+ short_offset = false;
+ num_glyphs = 0;
+ loca_table = nullptr;
+ glyf_table = nullptr;
+#ifndef HB_NO_VAR
+ gvar = nullptr;
+#endif
+ hmtx = nullptr;
+ vmtx = nullptr;
+ face = face_;
+ const OT::head &head = *face->table.head;
+ if (head.indexToLocFormat > 1 || head.glyphDataFormat > 0)
+ /* Unknown format. Leave num_glyphs=0, that takes care of disabling us. */
+ return;
+ short_offset = 0 == head.indexToLocFormat;
+
+ loca_table = hb_sanitize_context_t ().reference_table<loca> (face);
+ glyf_table = hb_sanitize_context_t ().reference_table<glyf> (face);
+#ifndef HB_NO_VAR
+ gvar = face->table.gvar;
+#endif
+ hmtx = face->table.hmtx;
+ vmtx = face->table.vmtx;
+
+ num_glyphs = hb_max (1u, loca_table.get_length () / (short_offset ? 2 : 4)) - 1;
+ num_glyphs = hb_min (num_glyphs, face->get_num_glyphs ());
+ }
+
+ void fini ()
+ {
+ loca_table.destroy ();
+ glyf_table.destroy ();
+ }
+
+ protected:
+ template<typename T>
+ bool get_points (hb_font_t *font, hb_codepoint_t gid, T consumer) const
+ {
+ if (gid >= num_glyphs) return false;
+
+ /* Making this alloc free is not that easy
+ https://github.com/harfbuzz/harfbuzz/issues/2095
+ mostly because of gvar handling in VF fonts,
+ perhaps a separate path for non-VF fonts can be considered */
+ contour_point_vector_t all_points;
+
+ bool phantom_only = !consumer.is_consuming_contour_points ();
+ if (unlikely (!glyph_for_gid (gid).get_points (font, *this, all_points, phantom_only)))
+ return false;
+
+ if (consumer.is_consuming_contour_points ())
+ {
+ for (unsigned point_index = 0; point_index + 4 < all_points.length; ++point_index)
+ consumer.consume_point (all_points[point_index]);
+ consumer.points_end ();
+ }
+
+ /* Where to write phantoms, nullptr if not requested */
+ contour_point_t *phantoms = consumer.get_phantoms_sink ();
+ if (phantoms)
+ for (unsigned i = 0; i < PHANTOM_COUNT; ++i)
+ phantoms[i] = all_points[all_points.length - PHANTOM_COUNT + i];
+
+ return true;
+ }
+
+#ifndef HB_NO_VAR
+ struct points_aggregator_t
+ {
+ hb_font_t *font;
+ hb_glyph_extents_t *extents;
+ contour_point_t *phantoms;
+
+ struct contour_bounds_t
+ {
+ contour_bounds_t () { min_x = min_y = FLT_MAX; max_x = max_y = -FLT_MAX; }
+
+ void add (const contour_point_t &p)
+ {
+ min_x = hb_min (min_x, p.x);
+ min_y = hb_min (min_y, p.y);
+ max_x = hb_max (max_x, p.x);
+ max_y = hb_max (max_y, p.y);
+ }
+
+ bool empty () const { return (min_x >= max_x) || (min_y >= max_y); }
+
+ void get_extents (hb_font_t *font, hb_glyph_extents_t *extents)
+ {
+ if (unlikely (empty ()))
+ {
+ extents->width = 0;
+ extents->x_bearing = 0;
+ extents->height = 0;
+ extents->y_bearing = 0;
+ return;
+ }
+ extents->x_bearing = font->em_scalef_x (min_x);
+ extents->width = font->em_scalef_x (max_x - min_x);
+ extents->y_bearing = font->em_scalef_y (max_y);
+ extents->height = font->em_scalef_y (min_y - max_y);
+ }
+
+ protected:
+ float min_x, min_y, max_x, max_y;
+ } bounds;
+
+ points_aggregator_t (hb_font_t *font_, hb_glyph_extents_t *extents_, contour_point_t *phantoms_)
+ {
+ font = font_;
+ extents = extents_;
+ phantoms = phantoms_;
+ if (extents) bounds = contour_bounds_t ();
+ }
+
+ void consume_point (const contour_point_t &point) { bounds.add (point); }
+ void points_end () { bounds.get_extents (font, extents); }
+
+ bool is_consuming_contour_points () { return extents; }
+ contour_point_t *get_phantoms_sink () { return phantoms; }
+ };
+
+ public:
+ unsigned
+ get_advance_var (hb_font_t *font, hb_codepoint_t gid, bool is_vertical) const
+ {
+ if (unlikely (gid >= num_glyphs)) return 0;
+
+ bool success = false;
+
+ contour_point_t phantoms[PHANTOM_COUNT];
+ if (likely (font->num_coords == gvar->get_axis_count ()))
+ success = get_points (font, gid, points_aggregator_t (font, nullptr, phantoms));
+
+ if (unlikely (!success))
+ return is_vertical ? vmtx->get_advance (gid) : hmtx->get_advance (gid);
+
+ float result = is_vertical
+ ? phantoms[PHANTOM_TOP].y - phantoms[PHANTOM_BOTTOM].y
+ : phantoms[PHANTOM_RIGHT].x - phantoms[PHANTOM_LEFT].x;
+ return hb_clamp (roundf (result), 0.f, (float) UINT_MAX / 2);
+ }
+
+ int get_side_bearing_var (hb_font_t *font, hb_codepoint_t gid, bool is_vertical) const
+ {
+ if (unlikely (gid >= num_glyphs)) return 0;
+
+ hb_glyph_extents_t extents;
+
+ contour_point_t phantoms[PHANTOM_COUNT];
+ if (unlikely (!get_points (font, gid, points_aggregator_t (font, &extents, phantoms))))
+ return is_vertical ? vmtx->get_side_bearing (gid) : hmtx->get_side_bearing (gid);
+
+ return is_vertical
+ ? ceilf (phantoms[PHANTOM_TOP].y) - extents.y_bearing
+ : floorf (phantoms[PHANTOM_LEFT].x);
+ }
+#endif
+
+ public:
+ bool get_extents (hb_font_t *font, hb_codepoint_t gid, hb_glyph_extents_t *extents) const
+ {
+ if (unlikely (gid >= num_glyphs)) return false;
+
+#ifndef HB_NO_VAR
+ if (font->num_coords && font->num_coords == gvar->get_axis_count ())
+ return get_points (font, gid, points_aggregator_t (font, extents, nullptr));
+#endif
+ return glyph_for_gid (gid).get_extents (font, *this, extents);
+ }
+
+ const Glyph
+ glyph_for_gid (hb_codepoint_t gid, bool needs_padding_removal = false) const
+ {
+ if (unlikely (gid >= num_glyphs)) return Glyph ();
+
+ unsigned int start_offset, end_offset;
+
+ if (short_offset)
+ {
+ const HBUINT16 *offsets = (const HBUINT16 *) loca_table->dataZ.arrayZ;
+ start_offset = 2 * offsets[gid];
+ end_offset = 2 * offsets[gid + 1];
+ }
+ else
+ {
+ const HBUINT32 *offsets = (const HBUINT32 *) loca_table->dataZ.arrayZ;
+ start_offset = offsets[gid];
+ end_offset = offsets[gid + 1];
+ }
+
+ if (unlikely (start_offset > end_offset || end_offset > glyf_table.get_length ()))
+ return Glyph ();
+
+ Glyph glyph (hb_bytes_t ((const char *) this->glyf_table + start_offset,
+ end_offset - start_offset), gid);
+ return needs_padding_removal ? glyph.trim_padding () : glyph;
+ }
+
+ void
+ add_gid_and_children (hb_codepoint_t gid, hb_set_t *gids_to_retain,
+ unsigned int depth = 0) const
+ {
+ if (unlikely (depth++ > HB_MAX_NESTING_LEVEL)) return;
+ /* Check if is already visited */
+ if (gids_to_retain->has (gid)) return;
+
+ gids_to_retain->add (gid);
+
+ for (auto &item : glyph_for_gid (gid).get_composite_iterator ())
+ add_gid_and_children (item.get_glyph_index (), gids_to_retain, depth);
+ }
+
+#ifdef HB_EXPERIMENTAL_API
+ struct path_builder_t
+ {
+ hb_font_t *font;
+ draw_helper_t *draw_helper;
+
+ struct optional_point_t
+ {
+ optional_point_t () { has_data = false; }
+ optional_point_t (float x_, float y_) { x = x_; y = y_; has_data = true; }
+
+ bool has_data;
+ float x;
+ float y;
+
+ optional_point_t lerp (optional_point_t p, float t)
+ { return optional_point_t (x + t * (p.x - x), y + t * (p.y - y)); }
+ } first_oncurve, first_offcurve, last_offcurve;
+
+ path_builder_t (hb_font_t *font_, draw_helper_t &draw_helper_)
+ {
+ font = font_;
+ draw_helper = &draw_helper_;
+ first_oncurve = first_offcurve = last_offcurve = optional_point_t ();
+ }
+
+ /* based on https://github.com/RazrFalcon/ttf-parser/blob/4f32821/src/glyf.rs#L287
+ See also:
+ * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM01/Chap1.html
+ * https://stackoverflow.com/a/20772557 */
+ void consume_point (const contour_point_t &point)
+ {
+ /* Skip empty contours */
+ if (unlikely (point.is_end_point && !first_oncurve.has_data && !first_offcurve.has_data))
+ return;
+
+ bool is_on_curve = point.flag & Glyph::FLAG_ON_CURVE;
+ optional_point_t p (point.x, point.y);
+ if (!first_oncurve.has_data)
+ {
+ if (is_on_curve)
+ {
+ first_oncurve = p;
+ draw_helper->move_to (font->em_scalef_x (p.x), font->em_scalef_y (p.y));
+ }
+ else
+ {
+ if (first_offcurve.has_data)
+ {
+ optional_point_t mid = first_offcurve.lerp (p, .5f);
+ first_oncurve = mid;
+ last_offcurve = p;
+ draw_helper->move_to (font->em_scalef_x (mid.x), font->em_scalef_y (mid.y));
+ }
+ else
+ first_offcurve = p;
+ }
+ }
+ else
+ {
+ if (last_offcurve.has_data)
+ {
+ if (is_on_curve)
+ {
+ draw_helper->quadratic_to (font->em_scalef_x (last_offcurve.x), font->em_scalef_y (last_offcurve.y),
+ font->em_scalef_x (p.x), font->em_scalef_y (p.y));
+ last_offcurve = optional_point_t ();
+ }
+ else
+ {
+ optional_point_t mid = last_offcurve.lerp (p, .5f);
+ draw_helper->quadratic_to (font->em_scalef_x (last_offcurve.x), font->em_scalef_y (last_offcurve.y),
+ font->em_scalef_x (mid.x), font->em_scalef_y (mid.y));
+ last_offcurve = p;
+ }
+ }
+ else
+ {
+ if (is_on_curve)
+ draw_helper->line_to (font->em_scalef_x (p.x), font->em_scalef_y (p.y));
+ else
+ last_offcurve = p;
+ }
+ }
+
+ if (point.is_end_point)
+ {
+ if (first_offcurve.has_data && last_offcurve.has_data)
+ {
+ optional_point_t mid = last_offcurve.lerp (first_offcurve, .5f);
+ draw_helper->quadratic_to (font->em_scalef_x (last_offcurve.x), font->em_scalef_y (last_offcurve.y),
+ font->em_scalef_x (mid.x), font->em_scalef_y (mid.y));
+ last_offcurve = optional_point_t ();
+ /* now check the rest */
+ }
+
+ if (first_offcurve.has_data && first_oncurve.has_data)
+ draw_helper->quadratic_to (font->em_scalef_x (first_offcurve.x), font->em_scalef_y (first_offcurve.y),
+ font->em_scalef_x (first_oncurve.x), font->em_scalef_y (first_oncurve.y));
+ else if (last_offcurve.has_data && first_oncurve.has_data)
+ draw_helper->quadratic_to (font->em_scalef_x (last_offcurve.x), font->em_scalef_y (last_offcurve.y),
+ font->em_scalef_x (first_oncurve.x), font->em_scalef_y (first_oncurve.y));
+ else if (first_oncurve.has_data)
+ draw_helper->line_to (font->em_scalef_x (first_oncurve.x), font->em_scalef_y (first_oncurve.y));
+
+ /* Getting ready for the next contour */
+ first_oncurve = first_offcurve = last_offcurve = optional_point_t ();
+ draw_helper->end_path ();
+ }
+ }
+ void points_end () {}
+
+ bool is_consuming_contour_points () { return true; }
+ contour_point_t *get_phantoms_sink () { return nullptr; }
+ };
+
+ bool
+ get_path (hb_font_t *font, hb_codepoint_t gid, draw_helper_t &draw_helper) const
+ { return get_points (font, gid, path_builder_t (font, draw_helper)); }
+#endif
+
+#ifndef HB_NO_VAR
+ const gvar_accelerator_t *gvar;
+#endif
+ const hmtx_accelerator_t *hmtx;
+ const vmtx_accelerator_t *vmtx;
+
+ private:
+ bool short_offset;
+ unsigned int num_glyphs;
+ hb_blob_ptr_t<loca> loca_table;
+ hb_blob_ptr_t<glyf> glyf_table;
+ hb_face_t *face;
+ };
+
+ struct SubsetGlyph
+ {
+ hb_codepoint_t new_gid;
+ hb_codepoint_t old_gid;
+ Glyph source_glyph;
+ hb_bytes_t dest_start; /* region of source_glyph to copy first */
+ hb_bytes_t dest_end; /* region of source_glyph to copy second */
+
+ bool serialize (hb_serialize_context_t *c,
+ const hb_subset_plan_t *plan) const
+ {
+ TRACE_SERIALIZE (this);
+
+ hb_bytes_t dest_glyph = dest_start.copy (c);
+ dest_glyph = hb_bytes_t (&dest_glyph, dest_glyph.length + dest_end.copy (c).length);
+ unsigned int pad_length = padding ();
+ DEBUG_MSG (SUBSET, nullptr, "serialize %d byte glyph, width %d pad %d", dest_glyph.length, dest_glyph.length + pad_length, pad_length);
+
+ HBUINT8 pad;
+ pad = 0;
+ while (pad_length > 0)
+ {
+ c->embed (pad);
+ pad_length--;
+ }
+
+ if (unlikely (!dest_glyph.length)) return_trace (true);
+
+ /* update components gids */
+ for (auto &_ : Glyph (dest_glyph).get_composite_iterator ())
+ {
+ hb_codepoint_t new_gid;
+ if (plan->new_gid_for_old_gid (_.get_glyph_index (), &new_gid))
+ const_cast<CompositeGlyphChain &> (_).set_glyph_index (new_gid);
+ }
+
+ if (plan->drop_hints) Glyph (dest_glyph).drop_hints ();
+
+ return_trace (true);
+ }
+
+ void drop_hints_bytes ()
+ { source_glyph.drop_hints_bytes (dest_start, dest_end); }
+
+ unsigned int length () const { return dest_start.length + dest_end.length; }
+ /* pad to 2 to ensure 2-byte loca will be ok */
+ unsigned int padding () const { return length () % 2; }
+ unsigned int padded_size () const { return length () + padding (); }
+ };
+
+ protected:
+ UnsizedArrayOf<HBUINT8>
+ dataZ; /* Glyphs data. */
+ public:
+ DEFINE_SIZE_MIN (0); /* In reality, this is UNBOUNDED() type; but since we always
+ * check the size externally, allow Null() object of it by
+ * defining it _MIN instead. */
+};
+
+struct glyf_accelerator_t : glyf::accelerator_t {};
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_GLYF_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-hdmx-table.hh b/thirdparty/harfbuzz/src/hb-ot-hdmx-table.hh
new file mode 100644
index 0000000000..c9c391bad5
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-hdmx-table.hh
@@ -0,0 +1,177 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Garret Rieger
+ */
+
+#ifndef HB_OT_HDMX_TABLE_HH
+#define HB_OT_HDMX_TABLE_HH
+
+#include "hb-open-type.hh"
+
+/*
+ * hdmx -- Horizontal Device Metrics
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/hdmx
+ */
+#define HB_OT_TAG_hdmx HB_TAG('h','d','m','x')
+
+
+namespace OT {
+
+
+struct DeviceRecord
+{
+ static unsigned int get_size (unsigned count)
+ { return hb_ceil_to_4 (min_size + count * HBUINT8::static_size); }
+
+ template<typename Iterator,
+ hb_requires (hb_is_iterator (Iterator))>
+ bool serialize (hb_serialize_context_t *c, unsigned pixelSize, Iterator it)
+ {
+ TRACE_SERIALIZE (this);
+
+ unsigned length = it.len ();
+
+ if (unlikely (!c->extend (*this, length))) return_trace (false);
+
+ this->pixelSize = pixelSize;
+ this->maxWidth =
+ + it
+ | hb_reduce (hb_max, 0u);
+
+ + it
+ | hb_sink (widthsZ.as_array (length));
+
+ return_trace (true);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c, unsigned sizeDeviceRecord) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ c->check_range (this, sizeDeviceRecord)));
+ }
+
+ HBUINT8 pixelSize; /* Pixel size for following widths (as ppem). */
+ HBUINT8 maxWidth; /* Maximum width. */
+ UnsizedArrayOf<HBUINT8> widthsZ; /* Array of widths (numGlyphs is from the 'maxp' table). */
+ public:
+ DEFINE_SIZE_ARRAY (2, widthsZ);
+};
+
+
+struct hdmx
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_hdmx;
+
+ unsigned int get_size () const
+ { return min_size + numRecords * sizeDeviceRecord; }
+
+ const DeviceRecord& operator [] (unsigned int i) const
+ {
+ /* XXX Null(DeviceRecord) is NOT safe as it's num-glyphs lengthed.
+ * https://github.com/harfbuzz/harfbuzz/issues/1300 */
+ if (unlikely (i >= numRecords)) return Null (DeviceRecord);
+ return StructAtOffset<DeviceRecord> (&this->firstDeviceRecord, i * sizeDeviceRecord);
+ }
+
+ template<typename Iterator,
+ hb_requires (hb_is_iterator (Iterator))>
+ bool serialize (hb_serialize_context_t *c, unsigned version, Iterator it)
+ {
+ TRACE_SERIALIZE (this);
+
+ if (unlikely (!c->extend_min ((*this)))) return_trace (false);
+
+ this->version = version;
+ this->numRecords = it.len ();
+ this->sizeDeviceRecord = DeviceRecord::get_size (it ? (*it).second.len () : 0);
+
+ for (const hb_item_type<Iterator>& _ : +it)
+ c->start_embed<DeviceRecord> ()->serialize (c, _.first, _.second);
+
+ return_trace (c->successful);
+ }
+
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+
+ hdmx *hdmx_prime = c->serializer->start_embed <hdmx> ();
+ if (unlikely (!hdmx_prime)) return_trace (false);
+
+ auto it =
+ + hb_range ((unsigned) numRecords)
+ | hb_map ([c, this] (unsigned _)
+ {
+ const DeviceRecord *device_record =
+ &StructAtOffset<DeviceRecord> (&firstDeviceRecord,
+ _ * sizeDeviceRecord);
+ auto row =
+ + hb_range (c->plan->num_output_glyphs ())
+ | hb_map (c->plan->reverse_glyph_map)
+ | hb_map ([this, c, device_record] (hb_codepoint_t _)
+ {
+ if (c->plan->is_empty_glyph (_))
+ return Null (HBUINT8);
+ return device_record->widthsZ.as_array (get_num_glyphs ()) [_];
+ })
+ ;
+ return hb_pair ((unsigned) device_record->pixelSize, +row);
+ })
+ ;
+
+ hdmx_prime->serialize (c->serializer, version, it);
+ return_trace (true);
+ }
+
+ unsigned get_num_glyphs () const
+ {
+ return sizeDeviceRecord - DeviceRecord::min_size;
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ !hb_unsigned_mul_overflows (numRecords, sizeDeviceRecord) &&
+ sizeDeviceRecord >= DeviceRecord::min_size &&
+ c->check_range (this, get_size ()));
+ }
+
+ protected:
+ HBUINT16 version; /* Table version number (0) */
+ HBUINT16 numRecords; /* Number of device records. */
+ HBUINT32 sizeDeviceRecord;
+ /* Size of a device record, 32-bit aligned. */
+ DeviceRecord firstDeviceRecord;
+ /* Array of device records. */
+ public:
+ DEFINE_SIZE_MIN (8);
+};
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_HDMX_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-head-table.hh b/thirdparty/harfbuzz/src/hb-ot-head-table.hh
new file mode 100644
index 0000000000..5613a96dbf
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-head-table.hh
@@ -0,0 +1,179 @@
+/*
+ * Copyright © 2010 Red Hat, Inc.
+ * Copyright © 2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_HEAD_TABLE_HH
+#define HB_OT_HEAD_TABLE_HH
+
+#include "hb-open-type.hh"
+
+/*
+ * head -- Font Header
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/head
+ */
+#define HB_OT_TAG_head HB_TAG('h','e','a','d')
+
+
+namespace OT {
+
+
+struct head
+{
+ friend struct OffsetTable;
+
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_head;
+
+ unsigned int get_upem () const
+ {
+ unsigned int upem = unitsPerEm;
+ /* If no valid head table found, assume 1000, which matches typical Type1 usage. */
+ return 16 <= upem && upem <= 16384 ? upem : 1000;
+ }
+
+ bool serialize (hb_serialize_context_t *c) const
+ {
+ TRACE_SERIALIZE (this);
+ return_trace ((bool) c->embed (this));
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ return_trace (serialize (c->serializer));
+ }
+
+ enum mac_style_flag_t {
+ BOLD = 1u<<0,
+ ITALIC = 1u<<1,
+ UNDERLINE = 1u<<2,
+ OUTLINE = 1u<<3,
+ SHADOW = 1u<<4,
+ CONDENSED = 1u<<5
+ };
+
+ bool is_bold () const { return macStyle & BOLD; }
+ bool is_italic () const { return macStyle & ITALIC; }
+ bool is_condensed () const { return macStyle & CONDENSED; }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ version.major == 1 &&
+ magicNumber == 0x5F0F3CF5u);
+ }
+
+ protected:
+ FixedVersion<>version; /* Version of the head table--currently
+ * 0x00010000u for version 1.0. */
+ FixedVersion<>fontRevision; /* Set by font manufacturer. */
+ HBUINT32 checkSumAdjustment; /* To compute: set it to 0, sum the
+ * entire font as HBUINT32, then store
+ * 0xB1B0AFBAu - sum. */
+ HBUINT32 magicNumber; /* Set to 0x5F0F3CF5u. */
+ HBUINT16 flags; /* Bit 0: Baseline for font at y=0;
+ * Bit 1: Left sidebearing point at x=0;
+ * Bit 2: Instructions may depend on point size;
+ * Bit 3: Force ppem to integer values for all
+ * internal scaler math; may use fractional
+ * ppem sizes if this bit is clear;
+ * Bit 4: Instructions may alter advance width
+ * (the advance widths might not scale linearly);
+ * Bits 5-10: These should be set according to
+ * Apple's specification. However, they are not
+ * implemented in OpenType.
+ * Bit 5: This bit should be set in fonts that are
+ * intended to e laid out vertically, and in
+ * which the glyphs have been drawn such that an
+ * x-coordinate of 0 corresponds to the desired
+ * vertical baseline.
+ * Bit 6: This bit must be set to zero.
+ * Bit 7: This bit should be set if the font
+ * requires layout for correct linguistic
+ * rendering (e.g. Arabic fonts).
+ * Bit 8: This bit should be set for a GX font
+ * which has one or more metamorphosis effects
+ * designated as happening by default.
+ * Bit 9: This bit should be set if the font
+ * contains any strong right-to-left glyphs.
+ * Bit 10: This bit should be set if the font
+ * contains Indic-style rearrangement effects.
+ * Bit 11: Font data is 'lossless,' as a result
+ * of having been compressed and decompressed
+ * with the Agfa MicroType Express engine.
+ * Bit 12: Font converted (produce compatible metrics)
+ * Bit 13: Font optimized for ClearTypeâ„¢.
+ * Note, fonts that rely on embedded bitmaps (EBDT)
+ * for rendering should not be considered optimized
+ * for ClearType, and therefore should keep this bit
+ * cleared.
+ * Bit 14: Last Resort font. If set, indicates that
+ * the glyphs encoded in the cmap subtables are simply
+ * generic symbolic representations of code point
+ * ranges and don’t truly represent support for those
+ * code points. If unset, indicates that the glyphs
+ * encoded in the cmap subtables represent proper
+ * support for those code points.
+ * Bit 15: Reserved, set to 0. */
+ HBUINT16 unitsPerEm; /* Valid range is from 16 to 16384. This value
+ * should be a power of 2 for fonts that have
+ * TrueType outlines. */
+ LONGDATETIME created; /* Number of seconds since 12:00 midnight,
+ January 1, 1904. 64-bit integer */
+ LONGDATETIME modified; /* Number of seconds since 12:00 midnight,
+ January 1, 1904. 64-bit integer */
+ HBINT16 xMin; /* For all glyph bounding boxes. */
+ HBINT16 yMin; /* For all glyph bounding boxes. */
+ HBINT16 xMax; /* For all glyph bounding boxes. */
+ HBINT16 yMax; /* For all glyph bounding boxes. */
+ HBUINT16 macStyle; /* Bit 0: Bold (if set to 1);
+ * Bit 1: Italic (if set to 1)
+ * Bit 2: Underline (if set to 1)
+ * Bit 3: Outline (if set to 1)
+ * Bit 4: Shadow (if set to 1)
+ * Bit 5: Condensed (if set to 1)
+ * Bit 6: Extended (if set to 1)
+ * Bits 7-15: Reserved (set to 0). */
+ HBUINT16 lowestRecPPEM; /* Smallest readable size in pixels. */
+ HBINT16 fontDirectionHint; /* Deprecated (Set to 2).
+ * 0: Fully mixed directional glyphs;
+ * 1: Only strongly left to right;
+ * 2: Like 1 but also contains neutrals;
+ * -1: Only strongly right to left;
+ * -2: Like -1 but also contains neutrals. */
+ public:
+ HBUINT16 indexToLocFormat; /* 0 for short offsets, 1 for long. */
+ HBUINT16 glyphDataFormat; /* 0 for current format. */
+
+ DEFINE_SIZE_STATIC (54);
+};
+
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_HEAD_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-hhea-table.hh b/thirdparty/harfbuzz/src/hb-ot-hhea-table.hh
new file mode 100644
index 0000000000..d9c9bd3537
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-hhea-table.hh
@@ -0,0 +1,104 @@
+/*
+ * Copyright © 2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_HHEA_TABLE_HH
+#define HB_OT_HHEA_TABLE_HH
+
+#include "hb-open-type.hh"
+
+/*
+ * hhea -- Horizontal Header
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/hhea
+ * vhea -- Vertical Header
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/vhea
+ */
+#define HB_OT_TAG_hhea HB_TAG('h','h','e','a')
+#define HB_OT_TAG_vhea HB_TAG('v','h','e','a')
+
+
+namespace OT {
+
+
+template <typename T>
+struct _hea
+{
+ bool has_data () const { return version.major; }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) && likely (version.major == 1));
+ }
+
+ public:
+ FixedVersion<>version; /* 0x00010000u for version 1.0. */
+ FWORD ascender; /* Typographic ascent. */
+ FWORD descender; /* Typographic descent. */
+ FWORD lineGap; /* Typographic line gap. */
+ UFWORD advanceMax; /* Maximum advance width/height value in
+ * metrics table. */
+ FWORD minLeadingBearing;
+ /* Minimum left/top sidebearing value in
+ * metrics table. */
+ FWORD minTrailingBearing;
+ /* Minimum right/bottom sidebearing value;
+ * calculated as Min(aw - lsb -
+ * (xMax - xMin)) for horizontal. */
+ FWORD maxExtent; /* horizontal: Max(lsb + (xMax - xMin)),
+ * vertical: minLeadingBearing+(yMax-yMin). */
+ HBINT16 caretSlopeRise; /* Used to calculate the slope of the
+ * cursor (rise/run); 1 for vertical caret,
+ * 0 for horizontal.*/
+ HBINT16 caretSlopeRun; /* 0 for vertical caret, 1 for horizontal. */
+ HBINT16 caretOffset; /* The amount by which a slanted
+ * highlight on a glyph needs
+ * to be shifted to produce the
+ * best appearance. Set to 0 for
+ * non-slanted fonts. */
+ HBINT16 reserved1; /* Set to 0. */
+ HBINT16 reserved2; /* Set to 0. */
+ HBINT16 reserved3; /* Set to 0. */
+ HBINT16 reserved4; /* Set to 0. */
+ HBINT16 metricDataFormat;/* 0 for current format. */
+ HBUINT16 numberOfLongMetrics;
+ /* Number of LongMetric entries in metric
+ * table. */
+ public:
+ DEFINE_SIZE_STATIC (36);
+};
+
+struct hhea : _hea<hhea> {
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_hhea;
+};
+struct vhea : _hea<vhea> {
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_vhea;
+};
+
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_HHEA_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-hmtx-table.hh b/thirdparty/harfbuzz/src/hb-ot-hmtx-table.hh
new file mode 100644
index 0000000000..d06c0fa4a4
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-hmtx-table.hh
@@ -0,0 +1,340 @@
+/*
+ * Copyright © 2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod, Roderick Sheeter
+ */
+
+#ifndef HB_OT_HMTX_TABLE_HH
+#define HB_OT_HMTX_TABLE_HH
+
+#include "hb-open-type.hh"
+#include "hb-ot-hhea-table.hh"
+#include "hb-ot-var-hvar-table.hh"
+#include "hb-ot-metrics.hh"
+
+/*
+ * hmtx -- Horizontal Metrics
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/hmtx
+ * vmtx -- Vertical Metrics
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/vmtx
+ */
+#define HB_OT_TAG_hmtx HB_TAG('h','m','t','x')
+#define HB_OT_TAG_vmtx HB_TAG('v','m','t','x')
+
+
+HB_INTERNAL int
+_glyf_get_side_bearing_var (hb_font_t *font, hb_codepoint_t glyph, bool is_vertical);
+
+HB_INTERNAL unsigned
+_glyf_get_advance_var (hb_font_t *font, hb_codepoint_t glyph, bool is_vertical);
+
+
+namespace OT {
+
+
+struct LongMetric
+{
+ UFWORD advance; /* Advance width/height. */
+ FWORD sb; /* Leading (left/top) side bearing. */
+ public:
+ DEFINE_SIZE_STATIC (4);
+};
+
+
+template <typename T, typename H>
+struct hmtxvmtx
+{
+ bool sanitize (hb_sanitize_context_t *c HB_UNUSED) const
+ {
+ TRACE_SANITIZE (this);
+ /* We don't check for anything specific here. The users of the
+ * struct do all the hard work... */
+ return_trace (true);
+ }
+
+
+ bool subset_update_header (hb_subset_plan_t *plan,
+ unsigned int num_hmetrics) const
+ {
+ hb_blob_t *src_blob = hb_sanitize_context_t ().reference_table<H> (plan->source, H::tableTag);
+ hb_blob_t *dest_blob = hb_blob_copy_writable_or_fail (src_blob);
+ hb_blob_destroy (src_blob);
+
+ if (unlikely (!dest_blob)) {
+ return false;
+ }
+
+ unsigned int length;
+ H *table = (H *) hb_blob_get_data (dest_blob, &length);
+ table->numberOfLongMetrics = num_hmetrics;
+
+ bool result = plan->add_table (H::tableTag, dest_blob);
+ hb_blob_destroy (dest_blob);
+
+ return result;
+ }
+
+ template<typename Iterator,
+ hb_requires (hb_is_iterator (Iterator))>
+ void serialize (hb_serialize_context_t *c,
+ Iterator it,
+ unsigned num_advances)
+ {
+ unsigned idx = 0;
+ for (auto _ : it)
+ {
+ if (idx < num_advances)
+ {
+ LongMetric lm;
+ lm.advance = _.first;
+ lm.sb = _.second;
+ if (unlikely (!c->embed<LongMetric> (&lm))) return;
+ }
+ else
+ {
+ FWORD *sb = c->allocate_size<FWORD> (FWORD::static_size);
+ if (unlikely (!sb)) return;
+ *sb = _.second;
+ }
+ idx++;
+ }
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+
+ T *table_prime = c->serializer->start_embed <T> ();
+ if (unlikely (!table_prime)) return_trace (false);
+
+ accelerator_t _mtx;
+ _mtx.init (c->plan->source);
+ unsigned num_advances = _mtx.num_advances_for_subset (c->plan);
+
+ auto it =
+ + hb_range (c->plan->num_output_glyphs ())
+ | hb_map ([c, &_mtx] (unsigned _)
+ {
+ hb_codepoint_t old_gid;
+ if (!c->plan->old_gid_for_new_gid (_, &old_gid))
+ return hb_pair (0u, 0);
+ return hb_pair (_mtx.get_advance (old_gid), _mtx.get_side_bearing (old_gid));
+ })
+ ;
+
+ table_prime->serialize (c->serializer, it, num_advances);
+
+ _mtx.fini ();
+
+ if (unlikely (c->serializer->ran_out_of_room || c->serializer->in_error ()))
+ return_trace (false);
+
+ // Amend header num hmetrics
+ if (unlikely (!subset_update_header (c->plan, num_advances)))
+ return_trace (false);
+
+ return_trace (true);
+ }
+
+ struct accelerator_t
+ {
+ friend struct hmtxvmtx;
+
+ void init (hb_face_t *face,
+ unsigned int default_advance_ = 0)
+ {
+ default_advance = default_advance_ ? default_advance_ : hb_face_get_upem (face);
+
+ num_advances = T::is_horizontal ? face->table.hhea->numberOfLongMetrics : face->table.vhea->numberOfLongMetrics;
+
+ table = hb_sanitize_context_t ().reference_table<hmtxvmtx> (face, T::tableTag);
+
+ /* Cap num_metrics() and num_advances() based on table length. */
+ unsigned int len = table.get_length ();
+ if (unlikely (num_advances * 4 > len))
+ num_advances = len / 4;
+ num_metrics = num_advances + (len - 4 * num_advances) / 2;
+
+ /* We MUST set num_metrics to zero if num_advances is zero.
+ * Our get_advance() depends on that. */
+ if (unlikely (!num_advances))
+ {
+ num_metrics = num_advances = 0;
+ table.destroy ();
+ table = hb_blob_get_empty ();
+ }
+
+ var_table = hb_sanitize_context_t ().reference_table<HVARVVAR> (face, T::variationsTag);
+ }
+
+ void fini ()
+ {
+ table.destroy ();
+ var_table.destroy ();
+ }
+
+ int get_side_bearing (hb_codepoint_t glyph) const
+ {
+ if (glyph < num_advances)
+ return table->longMetricZ[glyph].sb;
+
+ if (unlikely (glyph >= num_metrics))
+ return 0;
+
+ const FWORD *bearings = (const FWORD *) &table->longMetricZ[num_advances];
+ return bearings[glyph - num_advances];
+ }
+
+ int get_side_bearing (hb_font_t *font, hb_codepoint_t glyph) const
+ {
+ int side_bearing = get_side_bearing (glyph);
+
+#ifndef HB_NO_VAR
+ if (unlikely (glyph >= num_metrics) || !font->num_coords)
+ return side_bearing;
+
+ if (var_table.get_length ())
+ return side_bearing + var_table->get_side_bearing_var (glyph, font->coords, font->num_coords); // TODO Optimize?!
+
+ return _glyf_get_side_bearing_var (font, glyph, T::tableTag == HB_OT_TAG_vmtx);
+#else
+ return side_bearing;
+#endif
+ }
+
+ unsigned int get_advance (hb_codepoint_t glyph) const
+ {
+ if (unlikely (glyph >= num_metrics))
+ {
+ /* If num_metrics is zero, it means we don't have the metrics table
+ * for this direction: return default advance. Otherwise, it means that the
+ * glyph index is out of bound: return zero. */
+ if (num_metrics)
+ return 0;
+ else
+ return default_advance;
+ }
+
+ return table->longMetricZ[hb_min (glyph, (uint32_t) num_advances - 1)].advance;
+ }
+
+ unsigned int get_advance (hb_codepoint_t glyph,
+ hb_font_t *font) const
+ {
+ unsigned int advance = get_advance (glyph);
+
+#ifndef HB_NO_VAR
+ if (unlikely (glyph >= num_metrics) || !font->num_coords)
+ return advance;
+
+ if (var_table.get_length ())
+ return advance + roundf (var_table->get_advance_var (glyph, font)); // TODO Optimize?!
+
+ return _glyf_get_advance_var (font, glyph, T::tableTag == HB_OT_TAG_vmtx);
+#else
+ return advance;
+#endif
+ }
+
+ unsigned int num_advances_for_subset (const hb_subset_plan_t *plan) const
+ {
+ unsigned int num_advances = plan->num_output_glyphs ();
+ unsigned int last_advance = _advance_for_new_gid (plan,
+ num_advances - 1);
+ while (num_advances > 1 &&
+ last_advance == _advance_for_new_gid (plan,
+ num_advances - 2))
+ {
+ num_advances--;
+ }
+
+ return num_advances;
+ }
+
+ private:
+ unsigned int _advance_for_new_gid (const hb_subset_plan_t *plan,
+ hb_codepoint_t new_gid) const
+ {
+ hb_codepoint_t old_gid;
+ if (!plan->old_gid_for_new_gid (new_gid, &old_gid))
+ return 0;
+
+ return get_advance (old_gid);
+ }
+
+ protected:
+ unsigned int num_metrics;
+ unsigned int num_advances;
+ unsigned int default_advance;
+
+ private:
+ hb_blob_ptr_t<hmtxvmtx> table;
+ hb_blob_ptr_t<HVARVVAR> var_table;
+ };
+
+ protected:
+ UnsizedArrayOf<LongMetric>
+ longMetricZ; /* Paired advance width and leading
+ * bearing values for each glyph. The
+ * value numOfHMetrics comes from
+ * the 'hhea' table. If the font is
+ * monospaced, only one entry need
+ * be in the array, but that entry is
+ * required. The last entry applies to
+ * all subsequent glyphs. */
+/*UnsizedArrayOf<FWORD> leadingBearingX;*/
+ /* Here the advance is assumed
+ * to be the same as the advance
+ * for the last entry above. The
+ * number of entries in this array is
+ * derived from numGlyphs (from 'maxp'
+ * table) minus numberOfLongMetrics.
+ * This generally is used with a run
+ * of monospaced glyphs (e.g., Kanji
+ * fonts or Courier fonts). Only one
+ * run is allowed and it must be at
+ * the end. This allows a monospaced
+ * font to vary the side bearing
+ * values for each glyph. */
+ public:
+ DEFINE_SIZE_ARRAY (0, longMetricZ);
+};
+
+struct hmtx : hmtxvmtx<hmtx, hhea> {
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_hmtx;
+ static constexpr hb_tag_t variationsTag = HB_OT_TAG_HVAR;
+ static constexpr bool is_horizontal = true;
+};
+struct vmtx : hmtxvmtx<vmtx, vhea> {
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_vmtx;
+ static constexpr hb_tag_t variationsTag = HB_OT_TAG_VVAR;
+ static constexpr bool is_horizontal = false;
+};
+
+struct hmtx_accelerator_t : hmtx::accelerator_t {};
+struct vmtx_accelerator_t : vmtx::accelerator_t {};
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_HMTX_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-kern-table.hh b/thirdparty/harfbuzz/src/hb-ot-kern-table.hh
new file mode 100644
index 0000000000..3563cab8bd
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-kern-table.hh
@@ -0,0 +1,359 @@
+/*
+ * Copyright © 2017 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_KERN_TABLE_HH
+#define HB_OT_KERN_TABLE_HH
+
+#include "hb-aat-layout-kerx-table.hh"
+
+
+/*
+ * kern -- Kerning
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/kern
+ * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6kern.html
+ */
+#define HB_OT_TAG_kern HB_TAG('k','e','r','n')
+
+
+namespace OT {
+
+
+template <typename KernSubTableHeader>
+struct KernSubTableFormat3
+{
+ int get_kerning (hb_codepoint_t left, hb_codepoint_t right) const
+ {
+ hb_array_t<const FWORD> kernValue = kernValueZ.as_array (kernValueCount);
+ hb_array_t<const HBUINT8> leftClass = StructAfter<const UnsizedArrayOf<HBUINT8>> (kernValue).as_array (glyphCount);
+ hb_array_t<const HBUINT8> rightClass = StructAfter<const UnsizedArrayOf<HBUINT8>> (leftClass).as_array (glyphCount);
+ hb_array_t<const HBUINT8> kernIndex = StructAfter<const UnsizedArrayOf<HBUINT8>> (rightClass).as_array (leftClassCount * rightClassCount);
+
+ unsigned int leftC = leftClass[left];
+ unsigned int rightC = rightClass[right];
+ if (unlikely (leftC >= leftClassCount || rightC >= rightClassCount))
+ return 0;
+ unsigned int i = leftC * rightClassCount + rightC;
+ return kernValue[kernIndex[i]];
+ }
+
+ bool apply (AAT::hb_aat_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+
+ if (!c->plan->requested_kerning)
+ return false;
+
+ if (header.coverage & header.Backwards)
+ return false;
+
+ hb_kern_machine_t<KernSubTableFormat3> machine (*this, header.coverage & header.CrossStream);
+ machine.kern (c->font, c->buffer, c->plan->kern_mask);
+
+ return_trace (true);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ c->check_range (kernValueZ,
+ kernValueCount * sizeof (FWORD) +
+ glyphCount * 2 +
+ leftClassCount * rightClassCount));
+ }
+
+ protected:
+ KernSubTableHeader
+ header;
+ HBUINT16 glyphCount; /* The number of glyphs in this font. */
+ HBUINT8 kernValueCount; /* The number of kerning values. */
+ HBUINT8 leftClassCount; /* The number of left-hand classes. */
+ HBUINT8 rightClassCount;/* The number of right-hand classes. */
+ HBUINT8 flags; /* Set to zero (reserved for future use). */
+ UnsizedArrayOf<FWORD>
+ kernValueZ; /* The kerning values.
+ * Length kernValueCount. */
+#if 0
+ UnsizedArrayOf<HBUINT8>
+ leftClass; /* The left-hand classes.
+ * Length glyphCount. */
+ UnsizedArrayOf<HBUINT8>
+ rightClass; /* The right-hand classes.
+ * Length glyphCount. */
+ UnsizedArrayOf<HBUINT8>kernIndex;
+ /* The indices into the kernValue array.
+ * Length leftClassCount * rightClassCount */
+#endif
+ public:
+ DEFINE_SIZE_ARRAY (KernSubTableHeader::static_size + 6, kernValueZ);
+};
+
+template <typename KernSubTableHeader>
+struct KernSubTable
+{
+ unsigned int get_size () const { return u.header.length; }
+ unsigned int get_type () const { return u.header.format; }
+
+ int get_kerning (hb_codepoint_t left, hb_codepoint_t right) const
+ {
+ switch (get_type ()) {
+ /* This method hooks up to hb_font_t's get_h_kerning. Only support Format0. */
+ case 0: return u.format0.get_kerning (left, right);
+ default:return 0;
+ }
+ }
+
+ template <typename context_t, typename ...Ts>
+ typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const
+ {
+ unsigned int subtable_type = get_type ();
+ TRACE_DISPATCH (this, subtable_type);
+ switch (subtable_type) {
+ case 0: return_trace (c->dispatch (u.format0));
+#ifndef HB_NO_AAT_SHAPE
+ case 1: return_trace (u.header.apple ? c->dispatch (u.format1, hb_forward<Ts> (ds)...) : c->default_return_value ());
+#endif
+ case 2: return_trace (c->dispatch (u.format2));
+#ifndef HB_NO_AAT_SHAPE
+ case 3: return_trace (u.header.apple ? c->dispatch (u.format3, hb_forward<Ts> (ds)...) : c->default_return_value ());
+#endif
+ default: return_trace (c->default_return_value ());
+ }
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!u.header.sanitize (c) ||
+ u.header.length < u.header.min_size ||
+ !c->check_range (this, u.header.length))) return_trace (false);
+
+ return_trace (dispatch (c));
+ }
+
+ public:
+ union {
+ KernSubTableHeader header;
+ AAT::KerxSubTableFormat0<KernSubTableHeader> format0;
+ AAT::KerxSubTableFormat1<KernSubTableHeader> format1;
+ AAT::KerxSubTableFormat2<KernSubTableHeader> format2;
+ KernSubTableFormat3<KernSubTableHeader> format3;
+ } u;
+ public:
+ DEFINE_SIZE_MIN (KernSubTableHeader::static_size);
+};
+
+
+struct KernOTSubTableHeader
+{
+ static constexpr bool apple = false;
+ typedef AAT::ObsoleteTypes Types;
+
+ unsigned tuple_count () const { return 0; }
+ bool is_horizontal () const { return (coverage & Horizontal); }
+
+ enum Coverage
+ {
+ Horizontal = 0x01u,
+ Minimum = 0x02u,
+ CrossStream = 0x04u,
+ Override = 0x08u,
+
+ /* Not supported: */
+ Backwards = 0x00u,
+ Variation = 0x00u,
+ };
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ public:
+ HBUINT16 versionZ; /* Unused. */
+ HBUINT16 length; /* Length of the subtable (including this header). */
+ HBUINT8 format; /* Subtable format. */
+ HBUINT8 coverage; /* Coverage bits. */
+ public:
+ DEFINE_SIZE_STATIC (6);
+};
+
+struct KernOT : AAT::KerxTable<KernOT>
+{
+ friend struct AAT::KerxTable<KernOT>;
+
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_kern;
+ static constexpr unsigned minVersion = 0u;
+
+ typedef KernOTSubTableHeader SubTableHeader;
+ typedef SubTableHeader::Types Types;
+ typedef KernSubTable<SubTableHeader> SubTable;
+
+ protected:
+ HBUINT16 version; /* Version--0x0000u */
+ HBUINT16 tableCount; /* Number of subtables in the kerning table. */
+ SubTable firstSubTable; /* Subtables. */
+ public:
+ DEFINE_SIZE_MIN (4);
+};
+
+
+struct KernAATSubTableHeader
+{
+ static constexpr bool apple = true;
+ typedef AAT::ObsoleteTypes Types;
+
+ unsigned tuple_count () const { return 0; }
+ bool is_horizontal () const { return !(coverage & Vertical); }
+
+ enum Coverage
+ {
+ Vertical = 0x80u,
+ CrossStream = 0x40u,
+ Variation = 0x20u,
+
+ /* Not supported: */
+ Backwards = 0x00u,
+ };
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ public:
+ HBUINT32 length; /* Length of the subtable (including this header). */
+ HBUINT8 coverage; /* Coverage bits. */
+ HBUINT8 format; /* Subtable format. */
+ HBUINT16 tupleIndex; /* The tuple index (used for variations fonts).
+ * This value specifies which tuple this subtable covers.
+ * Note: We don't implement. */
+ public:
+ DEFINE_SIZE_STATIC (8);
+};
+
+struct KernAAT : AAT::KerxTable<KernAAT>
+{
+ friend struct AAT::KerxTable<KernAAT>;
+
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_kern;
+ static constexpr unsigned minVersion = 0x00010000u;
+
+ typedef KernAATSubTableHeader SubTableHeader;
+ typedef SubTableHeader::Types Types;
+ typedef KernSubTable<SubTableHeader> SubTable;
+
+ protected:
+ HBUINT32 version; /* Version--0x00010000u */
+ HBUINT32 tableCount; /* Number of subtables in the kerning table. */
+ SubTable firstSubTable; /* Subtables. */
+ public:
+ DEFINE_SIZE_MIN (8);
+};
+
+struct kern
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_kern;
+
+ bool has_data () const { return u.version32; }
+ unsigned get_type () const { return u.major; }
+
+ bool has_state_machine () const
+ {
+ switch (get_type ()) {
+ case 0: return u.ot.has_state_machine ();
+#ifndef HB_NO_AAT_SHAPE
+ case 1: return u.aat.has_state_machine ();
+#endif
+ default:return false;
+ }
+ }
+
+ bool has_cross_stream () const
+ {
+ switch (get_type ()) {
+ case 0: return u.ot.has_cross_stream ();
+#ifndef HB_NO_AAT_SHAPE
+ case 1: return u.aat.has_cross_stream ();
+#endif
+ default:return false;
+ }
+ }
+
+ int get_h_kerning (hb_codepoint_t left, hb_codepoint_t right) const
+ {
+ switch (get_type ()) {
+ case 0: return u.ot.get_h_kerning (left, right);
+#ifndef HB_NO_AAT_SHAPE
+ case 1: return u.aat.get_h_kerning (left, right);
+#endif
+ default:return 0;
+ }
+ }
+
+ bool apply (AAT::hb_aat_apply_context_t *c) const
+ { return dispatch (c); }
+
+ template <typename context_t, typename ...Ts>
+ typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const
+ {
+ unsigned int subtable_type = get_type ();
+ TRACE_DISPATCH (this, subtable_type);
+ switch (subtable_type) {
+ case 0: return_trace (c->dispatch (u.ot, hb_forward<Ts> (ds)...));
+#ifndef HB_NO_AAT_SHAPE
+ case 1: return_trace (c->dispatch (u.aat, hb_forward<Ts> (ds)...));
+#endif
+ default: return_trace (c->default_return_value ());
+ }
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (!u.version32.sanitize (c)) return_trace (false);
+ return_trace (dispatch (c));
+ }
+
+ protected:
+ union {
+ HBUINT32 version32;
+ HBUINT16 major;
+ KernOT ot;
+#ifndef HB_NO_AAT_SHAPE
+ KernAAT aat;
+#endif
+ } u;
+ public:
+ DEFINE_SIZE_UNION (4, version32);
+};
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_KERN_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-layout-base-table.hh b/thirdparty/harfbuzz/src/hb-ot-layout-base-table.hh
new file mode 100644
index 0000000000..02fe14fa06
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-layout-base-table.hh
@@ -0,0 +1,509 @@
+/*
+ * Copyright © 2016 Elie Roux <elie.roux@telecom-bretagne.eu>
+ * Copyright © 2018 Google, Inc.
+ * Copyright © 2018-2019 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_LAYOUT_BASE_TABLE_HH
+#define HB_OT_LAYOUT_BASE_TABLE_HH
+
+#include "hb-open-type.hh"
+#include "hb-ot-layout-common.hh"
+
+namespace OT {
+
+/*
+ * BASE -- Baseline
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/base
+ */
+
+struct BaseCoordFormat1
+{
+ hb_position_t get_coord () const { return coordinate; }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this)));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 1 */
+ FWORD coordinate; /* X or Y value, in design units */
+ public:
+ DEFINE_SIZE_STATIC (4);
+};
+
+struct BaseCoordFormat2
+{
+ hb_position_t get_coord () const
+ {
+ /* TODO */
+ return coordinate;
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 2 */
+ FWORD coordinate; /* X or Y value, in design units */
+ HBGlyphID referenceGlyph; /* Glyph ID of control glyph */
+ HBUINT16 coordPoint; /* Index of contour point on the
+ * reference glyph */
+ public:
+ DEFINE_SIZE_STATIC (8);
+};
+
+struct BaseCoordFormat3
+{
+ hb_position_t get_coord (hb_font_t *font,
+ const VariationStore &var_store,
+ hb_direction_t direction) const
+ {
+ const Device &device = this+deviceTable;
+ return coordinate + (HB_DIRECTION_IS_VERTICAL (direction) ?
+ device.get_y_delta (font, var_store) :
+ device.get_x_delta (font, var_store));
+ }
+
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ deviceTable.sanitize (c, this)));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 3 */
+ FWORD coordinate; /* X or Y value, in design units */
+ OffsetTo<Device>
+ deviceTable; /* Offset to Device table for X or
+ * Y value, from beginning of
+ * BaseCoord table (may be NULL). */
+ public:
+ DEFINE_SIZE_STATIC (6);
+};
+
+struct BaseCoord
+{
+ bool has_data () const { return u.format; }
+
+ hb_position_t get_coord (hb_font_t *font,
+ const VariationStore &var_store,
+ hb_direction_t direction) const
+ {
+ switch (u.format) {
+ case 1: return u.format1.get_coord ();
+ case 2: return u.format2.get_coord ();
+ case 3: return u.format3.get_coord (font, var_store, direction);
+ default:return 0;
+ }
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!u.format.sanitize (c))) return_trace (false);
+ switch (u.format) {
+ case 1: return_trace (u.format1.sanitize (c));
+ case 2: return_trace (u.format2.sanitize (c));
+ case 3: return_trace (u.format3.sanitize (c));
+ default:return_trace (false);
+ }
+ }
+
+ protected:
+ union {
+ HBUINT16 format;
+ BaseCoordFormat1 format1;
+ BaseCoordFormat2 format2;
+ BaseCoordFormat3 format3;
+ } u;
+ public:
+ DEFINE_SIZE_UNION (2, format);
+};
+
+struct FeatMinMaxRecord
+{
+ int cmp (hb_tag_t key) const { return tag.cmp (key); }
+
+ bool has_data () const { return tag; }
+
+ void get_min_max (const BaseCoord **min, const BaseCoord **max) const
+ {
+ if (likely (min)) *min = &(this+minCoord);
+ if (likely (max)) *max = &(this+maxCoord);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ minCoord.sanitize (c, this) &&
+ maxCoord.sanitize (c, this)));
+ }
+
+ protected:
+ Tag tag; /* 4-byte feature identification tag--must
+ * match feature tag in FeatureList */
+ OffsetTo<BaseCoord>
+ minCoord; /* Offset to BaseCoord table that defines
+ * the minimum extent value, from beginning
+ * of MinMax table (may be NULL) */
+ OffsetTo<BaseCoord>
+ maxCoord; /* Offset to BaseCoord table that defines
+ * the maximum extent value, from beginning
+ * of MinMax table (may be NULL) */
+ public:
+ DEFINE_SIZE_STATIC (8);
+
+};
+
+struct MinMax
+{
+ void get_min_max (hb_tag_t feature_tag,
+ const BaseCoord **min,
+ const BaseCoord **max) const
+ {
+ const FeatMinMaxRecord &minMaxCoord = featMinMaxRecords.bsearch (feature_tag);
+ if (minMaxCoord.has_data ())
+ minMaxCoord.get_min_max (min, max);
+ else
+ {
+ if (likely (min)) *min = &(this+minCoord);
+ if (likely (max)) *max = &(this+maxCoord);
+ }
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ minCoord.sanitize (c, this) &&
+ maxCoord.sanitize (c, this) &&
+ featMinMaxRecords.sanitize (c, this)));
+ }
+
+ protected:
+ OffsetTo<BaseCoord>
+ minCoord; /* Offset to BaseCoord table that defines
+ * minimum extent value, from the beginning
+ * of MinMax table (may be NULL) */
+ OffsetTo<BaseCoord>
+ maxCoord; /* Offset to BaseCoord table that defines
+ * maximum extent value, from the beginning
+ * of MinMax table (may be NULL) */
+ SortedArrayOf<FeatMinMaxRecord>
+ featMinMaxRecords;
+ /* Array of FeatMinMaxRecords, in alphabetical
+ * order by featureTableTag */
+ public:
+ DEFINE_SIZE_ARRAY (6, featMinMaxRecords);
+};
+
+struct BaseValues
+{
+ const BaseCoord &get_base_coord (int baseline_tag_index) const
+ {
+ if (baseline_tag_index == -1) baseline_tag_index = defaultIndex;
+ return this+baseCoords[baseline_tag_index];
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ baseCoords.sanitize (c, this)));
+ }
+
+ protected:
+ Index defaultIndex; /* Index number of default baseline for this
+ * script — equals index position of baseline tag
+ * in baselineTags array of the BaseTagList */
+ OffsetArrayOf<BaseCoord>
+ baseCoords; /* Number of BaseCoord tables defined — should equal
+ * baseTagCount in the BaseTagList
+ *
+ * Array of offsets to BaseCoord tables, from beginning of
+ * BaseValues table — order matches baselineTags array in
+ * the BaseTagList */
+ public:
+ DEFINE_SIZE_ARRAY (4, baseCoords);
+};
+
+struct BaseLangSysRecord
+{
+ int cmp (hb_tag_t key) const { return baseLangSysTag.cmp (key); }
+
+ bool has_data () const { return baseLangSysTag; }
+
+ const MinMax &get_min_max () const { return this+minMax; }
+
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ minMax.sanitize (c, this)));
+ }
+
+ protected:
+ Tag baseLangSysTag; /* 4-byte language system identification tag */
+ OffsetTo<MinMax>
+ minMax; /* Offset to MinMax table, from beginning
+ * of BaseScript table */
+ public:
+ DEFINE_SIZE_STATIC (6);
+};
+
+struct BaseScript
+{
+ const MinMax &get_min_max (hb_tag_t language_tag) const
+ {
+ const BaseLangSysRecord& record = baseLangSysRecords.bsearch (language_tag);
+ return record.has_data () ? record.get_min_max () : this+defaultMinMax;
+ }
+
+ const BaseCoord &get_base_coord (int baseline_tag_index) const
+ { return (this+baseValues).get_base_coord (baseline_tag_index); }
+
+ bool has_data () const { return baseValues; }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ baseValues.sanitize (c, this) &&
+ defaultMinMax.sanitize (c, this) &&
+ baseLangSysRecords.sanitize (c, this)));
+ }
+
+ protected:
+ OffsetTo<BaseValues>
+ baseValues; /* Offset to BaseValues table, from beginning
+ * of BaseScript table (may be NULL) */
+ OffsetTo<MinMax>
+ defaultMinMax; /* Offset to MinMax table, from beginning of
+ * BaseScript table (may be NULL) */
+ SortedArrayOf<BaseLangSysRecord>
+ baseLangSysRecords;
+ /* Number of BaseLangSysRecords
+ * defined — may be zero (0) */
+
+ public:
+ DEFINE_SIZE_ARRAY (6, baseLangSysRecords);
+};
+
+struct BaseScriptList;
+struct BaseScriptRecord
+{
+ int cmp (hb_tag_t key) const { return baseScriptTag.cmp (key); }
+
+ bool has_data () const { return baseScriptTag; }
+
+ const BaseScript &get_base_script (const BaseScriptList *list) const
+ { return list+baseScript; }
+
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ baseScript.sanitize (c, base)));
+ }
+
+ protected:
+ Tag baseScriptTag; /* 4-byte script identification tag */
+ OffsetTo<BaseScript>
+ baseScript; /* Offset to BaseScript table, from beginning
+ * of BaseScriptList */
+
+ public:
+ DEFINE_SIZE_STATIC (6);
+};
+
+struct BaseScriptList
+{
+ const BaseScript &get_base_script (hb_tag_t script) const
+ {
+ const BaseScriptRecord *record = &baseScriptRecords.bsearch (script);
+ if (!record->has_data ()) record = &baseScriptRecords.bsearch (HB_TAG ('D','F','L','T'));
+ return record->has_data () ? record->get_base_script (this) : Null (BaseScript);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ baseScriptRecords.sanitize (c, this));
+ }
+
+ protected:
+ SortedArrayOf<BaseScriptRecord>
+ baseScriptRecords;
+
+ public:
+ DEFINE_SIZE_ARRAY (2, baseScriptRecords);
+};
+
+struct Axis
+{
+ bool get_baseline (hb_tag_t baseline_tag,
+ hb_tag_t script_tag,
+ hb_tag_t language_tag,
+ const BaseCoord **coord) const
+ {
+ const BaseScript &base_script = (this+baseScriptList).get_base_script (script_tag);
+ if (!base_script.has_data ()) return false;
+
+ if (likely (coord))
+ {
+ unsigned int tag_index = 0;
+ (this+baseTagList).bfind (baseline_tag, &tag_index);
+ *coord = &base_script.get_base_coord (tag_index);
+ }
+
+ return true;
+ }
+
+ bool get_min_max (hb_tag_t script_tag,
+ hb_tag_t language_tag,
+ hb_tag_t feature_tag,
+ const BaseCoord **min_coord,
+ const BaseCoord **max_coord) const
+ {
+ const BaseScript &base_script = (this+baseScriptList).get_base_script (script_tag);
+ if (!base_script.has_data ()) return false;
+
+ base_script.get_min_max (language_tag).get_min_max (feature_tag, min_coord, max_coord);
+
+ return true;
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ (this+baseTagList).sanitize (c) &&
+ (this+baseScriptList).sanitize (c)));
+ }
+
+ protected:
+ OffsetTo<SortedArrayOf<Tag>>
+ baseTagList; /* Offset to BaseTagList table, from beginning
+ * of Axis table (may be NULL)
+ * Array of 4-byte baseline identification tags — must
+ * be in alphabetical order */
+ OffsetTo<BaseScriptList>
+ baseScriptList; /* Offset to BaseScriptList table, from beginning
+ * of Axis table
+ * Array of BaseScriptRecords, in alphabetical order
+ * by baseScriptTag */
+
+ public:
+ DEFINE_SIZE_STATIC (4);
+};
+
+struct BASE
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_BASE;
+
+ const Axis &get_axis (hb_direction_t direction) const
+ { return HB_DIRECTION_IS_VERTICAL (direction) ? this+vAxis : this+hAxis; }
+
+ const VariationStore &get_var_store () const
+ { return version.to_int () < 0x00010001u ? Null (VariationStore) : this+varStore; }
+
+ bool get_baseline (hb_font_t *font,
+ hb_tag_t baseline_tag,
+ hb_direction_t direction,
+ hb_tag_t script_tag,
+ hb_tag_t language_tag,
+ hb_position_t *base) const
+ {
+ const BaseCoord *base_coord = nullptr;
+ if (unlikely (!get_axis (direction).get_baseline (baseline_tag, script_tag, language_tag, &base_coord) ||
+ !base_coord || !base_coord->has_data ()))
+ return false;
+
+ if (likely (base))
+ *base = base_coord->get_coord (font, get_var_store (), direction);
+
+ return true;
+ }
+
+ /* TODO: Expose this separately sometime? */
+ bool get_min_max (hb_font_t *font,
+ hb_direction_t direction,
+ hb_tag_t script_tag,
+ hb_tag_t language_tag,
+ hb_tag_t feature_tag,
+ hb_position_t *min,
+ hb_position_t *max)
+ {
+ const BaseCoord *min_coord, *max_coord;
+ if (!get_axis (direction).get_min_max (script_tag, language_tag, feature_tag,
+ &min_coord, &max_coord))
+ return false;
+
+ const VariationStore &var_store = get_var_store ();
+ if (likely (min && min_coord)) *min = min_coord->get_coord (font, var_store, direction);
+ if (likely (max && max_coord)) *max = max_coord->get_coord (font, var_store, direction);
+ return true;
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ likely (version.major == 1) &&
+ hAxis.sanitize (c, this) &&
+ vAxis.sanitize (c, this) &&
+ (version.to_int () < 0x00010001u || varStore.sanitize (c, this))));
+ }
+
+ protected:
+ FixedVersion<>version; /* Version of the BASE table */
+ OffsetTo<Axis>hAxis; /* Offset to horizontal Axis table, from beginning
+ * of BASE table (may be NULL) */
+ OffsetTo<Axis>vAxis; /* Offset to vertical Axis table, from beginning
+ * of BASE table (may be NULL) */
+ LOffsetTo<VariationStore>
+ varStore; /* Offset to the table of Item Variation
+ * Store--from beginning of BASE
+ * header (may be NULL). Introduced
+ * in version 0x00010001. */
+ public:
+ DEFINE_SIZE_MIN (8);
+};
+
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_LAYOUT_BASE_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-layout-common.hh b/thirdparty/harfbuzz/src/hb-ot-layout-common.hh
new file mode 100644
index 0000000000..3140dd6328
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-layout-common.hh
@@ -0,0 +1,3178 @@
+/*
+ * Copyright © 2007,2008,2009 Red Hat, Inc.
+ * Copyright © 2010,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_LAYOUT_COMMON_HH
+#define HB_OT_LAYOUT_COMMON_HH
+
+#include "hb.hh"
+#include "hb-ot-layout.hh"
+#include "hb-open-type.hh"
+#include "hb-set.hh"
+#include "hb-bimap.hh"
+
+
+#ifndef HB_MAX_NESTING_LEVEL
+#define HB_MAX_NESTING_LEVEL 6
+#endif
+#ifndef HB_MAX_CONTEXT_LENGTH
+#define HB_MAX_CONTEXT_LENGTH 64
+#endif
+#ifndef HB_CLOSURE_MAX_STAGES
+/*
+ * The maximum number of times a lookup can be applied during shaping.
+ * Used to limit the number of iterations of the closure algorithm.
+ * This must be larger than the number of times add_pause() is
+ * called in a collect_features call of any shaper.
+ */
+#define HB_CLOSURE_MAX_STAGES 32
+#endif
+
+#ifndef HB_MAX_SCRIPTS
+#define HB_MAX_SCRIPTS 500
+#endif
+
+#ifndef HB_MAX_LANGSYS
+#define HB_MAX_LANGSYS 2000
+#endif
+
+#ifndef HB_MAX_FEATURES
+#define HB_MAX_FEATURES 750
+#endif
+
+#ifndef HB_MAX_FEATURE_INDICES
+#define HB_MAX_FEATURE_INDICES 1500
+#endif
+
+#ifndef HB_MAX_LOOKUP_INDICES
+#define HB_MAX_LOOKUP_INDICES 20000
+#endif
+
+
+namespace OT {
+
+
+#define NOT_COVERED ((unsigned int) -1)
+
+
+template<typename Iterator>
+static inline void Coverage_serialize (hb_serialize_context_t *c,
+ Iterator it);
+
+template<typename Iterator>
+static inline void ClassDef_serialize (hb_serialize_context_t *c,
+ Iterator it);
+
+static void ClassDef_remap_and_serialize (hb_serialize_context_t *c,
+ const hb_set_t &glyphset,
+ const hb_map_t &gid_klass_map,
+ hb_sorted_vector_t<HBGlyphID> &glyphs,
+ const hb_set_t &klasses,
+ hb_map_t *klass_map /*INOUT*/);
+
+struct hb_subset_layout_context_t :
+ hb_dispatch_context_t<hb_subset_layout_context_t, hb_empty_t, HB_DEBUG_SUBSET>
+{
+ const char *get_name () { return "SUBSET_LAYOUT"; }
+ static return_t default_return_value () { return hb_empty_t (); }
+
+ bool visitScript ()
+ {
+ return script_count++ < HB_MAX_SCRIPTS;
+ }
+
+ bool visitLangSys ()
+ {
+ return langsys_count++ < HB_MAX_LANGSYS;
+ }
+
+ bool visitFeatureIndex (int count)
+ {
+ feature_index_count += count;
+ return feature_index_count < HB_MAX_FEATURE_INDICES;
+ }
+
+ bool visitLookupIndex()
+ {
+ lookup_index_count++;
+ return lookup_index_count < HB_MAX_LOOKUP_INDICES;
+ }
+
+ hb_subset_context_t *subset_context;
+ const hb_tag_t table_tag;
+ const hb_map_t *lookup_index_map;
+ const hb_map_t *feature_index_map;
+
+ hb_subset_layout_context_t (hb_subset_context_t *c_,
+ hb_tag_t tag_,
+ hb_map_t *lookup_map_,
+ hb_map_t *feature_map_) :
+ subset_context (c_),
+ table_tag (tag_),
+ lookup_index_map (lookup_map_),
+ feature_index_map (feature_map_),
+ script_count (0),
+ langsys_count (0),
+ feature_index_count (0),
+ lookup_index_count (0)
+ {}
+
+ private:
+ unsigned script_count;
+ unsigned langsys_count;
+ unsigned feature_index_count;
+ unsigned lookup_index_count;
+};
+
+struct hb_collect_variation_indices_context_t :
+ hb_dispatch_context_t<hb_collect_variation_indices_context_t>
+{
+ template <typename T>
+ return_t dispatch (const T &obj) { obj.collect_variation_indices (this); return hb_empty_t (); }
+ static return_t default_return_value () { return hb_empty_t (); }
+
+ hb_set_t *layout_variation_indices;
+ const hb_set_t *glyph_set;
+ const hb_map_t *gpos_lookups;
+
+ hb_collect_variation_indices_context_t (hb_set_t *layout_variation_indices_,
+ const hb_set_t *glyph_set_,
+ const hb_map_t *gpos_lookups_) :
+ layout_variation_indices (layout_variation_indices_),
+ glyph_set (glyph_set_),
+ gpos_lookups (gpos_lookups_) {}
+};
+
+template<typename OutputArray>
+struct subset_offset_array_t
+{
+ subset_offset_array_t (hb_subset_context_t *subset_context_,
+ OutputArray& out_,
+ const void *base_) : subset_context (subset_context_),
+ out (out_), base (base_) {}
+
+ template <typename T>
+ bool operator () (T&& offset)
+ {
+ auto *o = out.serialize_append (subset_context->serializer);
+ if (unlikely (!o)) return false;
+ auto snap = subset_context->serializer->snapshot ();
+ bool ret = o->serialize_subset (subset_context, offset, base);
+ if (!ret)
+ {
+ out.pop ();
+ subset_context->serializer->revert (snap);
+ }
+ return ret;
+ }
+
+ private:
+ hb_subset_context_t *subset_context;
+ OutputArray &out;
+ const void *base;
+};
+
+
+template<typename OutputArray, typename Arg>
+struct subset_offset_array_arg_t
+{
+ subset_offset_array_arg_t (hb_subset_context_t *subset_context_,
+ OutputArray& out_,
+ const void *base_,
+ Arg &&arg_) : subset_context (subset_context_), out (out_),
+ base (base_), arg (arg_) {}
+
+ template <typename T>
+ bool operator () (T&& offset)
+ {
+ auto *o = out.serialize_append (subset_context->serializer);
+ if (unlikely (!o)) return false;
+ auto snap = subset_context->serializer->snapshot ();
+ bool ret = o->serialize_subset (subset_context, offset, base, arg);
+ if (!ret)
+ {
+ out.pop ();
+ subset_context->serializer->revert (snap);
+ }
+ return ret;
+ }
+
+ private:
+ hb_subset_context_t *subset_context;
+ OutputArray &out;
+ const void *base;
+ Arg &&arg;
+};
+
+/*
+ * Helper to subset an array of offsets. Subsets the thing pointed to by each offset
+ * and discards the offset in the array if the subset operation results in an empty
+ * thing.
+ */
+struct
+{
+ template<typename OutputArray>
+ subset_offset_array_t<OutputArray>
+ operator () (hb_subset_context_t *subset_context, OutputArray& out,
+ const void *base) const
+ { return subset_offset_array_t<OutputArray> (subset_context, out, base); }
+
+ /* Variant with one extra argument passed to serialize_subset */
+ template<typename OutputArray, typename Arg>
+ subset_offset_array_arg_t<OutputArray, Arg>
+ operator () (hb_subset_context_t *subset_context, OutputArray& out,
+ const void *base, Arg &&arg) const
+ { return subset_offset_array_arg_t<OutputArray, Arg> (subset_context, out, base, arg); }
+}
+HB_FUNCOBJ (subset_offset_array);
+
+template<typename OutputArray>
+struct subset_record_array_t
+{
+ subset_record_array_t (hb_subset_layout_context_t *c_, OutputArray* out_,
+ const void *base_) : subset_layout_context (c_),
+ out (out_), base (base_) {}
+
+ template <typename T>
+ void
+ operator () (T&& record)
+ {
+ auto snap = subset_layout_context->subset_context->serializer->snapshot ();
+ bool ret = record.subset (subset_layout_context, base);
+ if (!ret) subset_layout_context->subset_context->serializer->revert (snap);
+ else out->len++;
+ }
+
+ private:
+ hb_subset_layout_context_t *subset_layout_context;
+ OutputArray *out;
+ const void *base;
+};
+
+/*
+ * Helper to subset a RecordList/record array. Subsets each Record in the array and
+ * discards the record if the subset operation returns false.
+ */
+struct
+{
+ template<typename OutputArray>
+ subset_record_array_t<OutputArray>
+ operator () (hb_subset_layout_context_t *c, OutputArray* out,
+ const void *base) const
+ { return subset_record_array_t<OutputArray> (c, out, base); }
+}
+HB_FUNCOBJ (subset_record_array);
+
+/*
+ *
+ * OpenType Layout Common Table Formats
+ *
+ */
+
+
+/*
+ * Script, ScriptList, LangSys, Feature, FeatureList, Lookup, LookupList
+ */
+
+struct Record_sanitize_closure_t {
+ hb_tag_t tag;
+ const void *list_base;
+};
+
+template <typename Type>
+struct Record
+{
+ int cmp (hb_tag_t a) const { return tag.cmp (a); }
+
+ bool subset (hb_subset_layout_context_t *c, const void *base) const
+ {
+ TRACE_SUBSET (this);
+ auto *out = c->subset_context->serializer->embed (this);
+ if (unlikely (!out)) return_trace (false);
+ bool ret = out->offset.serialize_subset (c->subset_context, offset, base, c, &tag);
+ return_trace (ret);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ const Record_sanitize_closure_t closure = {tag, base};
+ return_trace (c->check_struct (this) && offset.sanitize (c, base, &closure));
+ }
+
+ Tag tag; /* 4-byte Tag identifier */
+ OffsetTo<Type>
+ offset; /* Offset from beginning of object holding
+ * the Record */
+ public:
+ DEFINE_SIZE_STATIC (6);
+};
+
+template <typename Type>
+struct RecordArrayOf : SortedArrayOf<Record<Type>>
+{
+ const OffsetTo<Type>& get_offset (unsigned int i) const
+ { return (*this)[i].offset; }
+ OffsetTo<Type>& get_offset (unsigned int i)
+ { return (*this)[i].offset; }
+ const Tag& get_tag (unsigned int i) const
+ { return (*this)[i].tag; }
+ unsigned int get_tags (unsigned int start_offset,
+ unsigned int *record_count /* IN/OUT */,
+ hb_tag_t *record_tags /* OUT */) const
+ {
+ if (record_count)
+ {
+ + this->sub_array (start_offset, record_count)
+ | hb_map (&Record<Type>::tag)
+ | hb_sink (hb_array (record_tags, *record_count))
+ ;
+ }
+ return this->len;
+ }
+ bool find_index (hb_tag_t tag, unsigned int *index) const
+ {
+ return this->bfind (tag, index, HB_BFIND_NOT_FOUND_STORE, Index::NOT_FOUND_INDEX);
+ }
+};
+
+template <typename Type>
+struct RecordListOf : RecordArrayOf<Type>
+{
+ const Type& operator [] (unsigned int i) const
+ { return this+this->get_offset (i); }
+
+ bool subset (hb_subset_context_t *c,
+ hb_subset_layout_context_t *l) const
+ {
+ TRACE_SUBSET (this);
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!c->serializer->extend_min (out))) return_trace (false);
+
+ + this->iter ()
+ | hb_apply (subset_record_array (l, out, this))
+ ;
+ return_trace (true);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (RecordArrayOf<Type>::sanitize (c, this));
+ }
+};
+
+struct Feature;
+
+struct RecordListOfFeature : RecordListOf<Feature>
+{
+ bool subset (hb_subset_context_t *c,
+ hb_subset_layout_context_t *l) const
+ {
+ TRACE_SUBSET (this);
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!out || !c->serializer->extend_min (out))) return_trace (false);
+
+ unsigned count = this->len;
+ + hb_zip (*this, hb_range (count))
+ | hb_filter (l->feature_index_map, hb_second)
+ | hb_map (hb_first)
+ | hb_apply (subset_record_array (l, out, this))
+ ;
+ return_trace (true);
+ }
+};
+
+struct RangeRecord
+{
+ int cmp (hb_codepoint_t g) const
+ { return g < first ? -1 : g <= last ? 0 : +1; }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ bool intersects (const hb_set_t *glyphs) const
+ { return glyphs->intersects (first, last); }
+
+ template <typename set_t>
+ bool collect_coverage (set_t *glyphs) const
+ { return glyphs->add_range (first, last); }
+
+ HBGlyphID first; /* First GlyphID in the range */
+ HBGlyphID last; /* Last GlyphID in the range */
+ HBUINT16 value; /* Value */
+ public:
+ DEFINE_SIZE_STATIC (6);
+};
+DECLARE_NULL_NAMESPACE_BYTES (OT, RangeRecord);
+
+
+struct IndexArray : ArrayOf<Index>
+{
+ bool intersects (const hb_map_t *indexes) const
+ { return hb_any (*this, indexes); }
+
+ template <typename Iterator,
+ hb_requires (hb_is_iterator (Iterator))>
+ void serialize (hb_serialize_context_t *c,
+ hb_subset_layout_context_t *l,
+ Iterator it)
+ {
+ if (!it) return;
+ if (unlikely (!c->extend_min ((*this)))) return;
+
+ for (const auto _ : it)
+ {
+ if (!l->visitLookupIndex()) break;
+
+ Index i;
+ i = _;
+ c->copy (i);
+ this->len++;
+ }
+ }
+
+ unsigned int get_indexes (unsigned int start_offset,
+ unsigned int *_count /* IN/OUT */,
+ unsigned int *_indexes /* OUT */) const
+ {
+ if (_count)
+ {
+ + this->sub_array (start_offset, _count)
+ | hb_sink (hb_array (_indexes, *_count))
+ ;
+ }
+ return this->len;
+ }
+
+ void add_indexes_to (hb_set_t* output /* OUT */) const
+ {
+ output->add_array (arrayZ, len);
+ }
+};
+
+
+struct LangSys
+{
+ unsigned int get_feature_count () const
+ { return featureIndex.len; }
+ hb_tag_t get_feature_index (unsigned int i) const
+ { return featureIndex[i]; }
+ unsigned int get_feature_indexes (unsigned int start_offset,
+ unsigned int *feature_count /* IN/OUT */,
+ unsigned int *feature_indexes /* OUT */) const
+ { return featureIndex.get_indexes (start_offset, feature_count, feature_indexes); }
+ void add_feature_indexes_to (hb_set_t *feature_indexes) const
+ { featureIndex.add_indexes_to (feature_indexes); }
+
+ bool has_required_feature () const { return reqFeatureIndex != 0xFFFFu; }
+ unsigned int get_required_feature_index () const
+ {
+ if (reqFeatureIndex == 0xFFFFu)
+ return Index::NOT_FOUND_INDEX;
+ return reqFeatureIndex;
+ }
+
+ LangSys* copy (hb_serialize_context_t *c) const
+ {
+ TRACE_SERIALIZE (this);
+ return_trace (c->embed (*this));
+ }
+
+ bool operator == (const LangSys& o) const
+ {
+ if (featureIndex.len != o.featureIndex.len ||
+ reqFeatureIndex != o.reqFeatureIndex)
+ return false;
+
+ for (const auto _ : + hb_zip (featureIndex, o.featureIndex))
+ if (_.first != _.second) return false;
+
+ return true;
+ }
+
+ bool subset (hb_subset_context_t *c,
+ hb_subset_layout_context_t *l,
+ const Tag *tag = nullptr) const
+ {
+ TRACE_SUBSET (this);
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!out || !c->serializer->extend_min (out))) return_trace (false);
+
+ out->reqFeatureIndex = l->feature_index_map->has (reqFeatureIndex) ? l->feature_index_map->get (reqFeatureIndex) : 0xFFFFu;
+
+ if (!l->visitFeatureIndex (featureIndex.len))
+ return_trace (false);
+
+ auto it =
+ + hb_iter (featureIndex)
+ | hb_filter (l->feature_index_map)
+ | hb_map (l->feature_index_map)
+ ;
+
+ bool ret = bool (it);
+ out->featureIndex.serialize (c->serializer, l, it);
+ return_trace (ret);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c,
+ const Record_sanitize_closure_t * = nullptr) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) && featureIndex.sanitize (c));
+ }
+
+ Offset16 lookupOrderZ; /* = Null (reserved for an offset to a
+ * reordering table) */
+ HBUINT16 reqFeatureIndex;/* Index of a feature required for this
+ * language system--if no required features
+ * = 0xFFFFu */
+ IndexArray featureIndex; /* Array of indices into the FeatureList */
+ public:
+ DEFINE_SIZE_ARRAY_SIZED (6, featureIndex);
+};
+DECLARE_NULL_NAMESPACE_BYTES (OT, LangSys);
+
+struct Script
+{
+ unsigned int get_lang_sys_count () const
+ { return langSys.len; }
+ const Tag& get_lang_sys_tag (unsigned int i) const
+ { return langSys.get_tag (i); }
+ unsigned int get_lang_sys_tags (unsigned int start_offset,
+ unsigned int *lang_sys_count /* IN/OUT */,
+ hb_tag_t *lang_sys_tags /* OUT */) const
+ { return langSys.get_tags (start_offset, lang_sys_count, lang_sys_tags); }
+ const LangSys& get_lang_sys (unsigned int i) const
+ {
+ if (i == Index::NOT_FOUND_INDEX) return get_default_lang_sys ();
+ return this+langSys[i].offset;
+ }
+ bool find_lang_sys_index (hb_tag_t tag, unsigned int *index) const
+ { return langSys.find_index (tag, index); }
+
+ bool has_default_lang_sys () const { return defaultLangSys != 0; }
+ const LangSys& get_default_lang_sys () const { return this+defaultLangSys; }
+
+ bool subset (hb_subset_context_t *c,
+ hb_subset_layout_context_t *l,
+ const Tag *tag) const
+ {
+ TRACE_SUBSET (this);
+ if (!l->visitScript ()) return_trace (false);
+
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!out || !c->serializer->extend_min (out))) return_trace (false);
+
+ bool defaultLang = false;
+ if (has_default_lang_sys ())
+ {
+ c->serializer->push ();
+ const LangSys& ls = this+defaultLangSys;
+ bool ret = ls.subset (c, l);
+ if (!ret && tag && *tag != HB_TAG ('D', 'F', 'L', 'T'))
+ {
+ c->serializer->pop_discard ();
+ out->defaultLangSys = 0;
+ }
+ else
+ {
+ c->serializer->add_link (out->defaultLangSys, c->serializer->pop_pack ());
+ defaultLang = true;
+ }
+ }
+
+ + langSys.iter ()
+ | hb_filter ([=] (const Record<LangSys>& record) {return l->visitLangSys (); })
+ | hb_filter ([&] (const Record<LangSys>& record)
+ {
+ const LangSys& d = this+defaultLangSys;
+ const LangSys& l = this+record.offset;
+ return !(l == d);
+ })
+ | hb_apply (subset_record_array (l, &(out->langSys), this))
+ ;
+
+ return_trace (bool (out->langSys.len) || defaultLang || l->table_tag == HB_OT_TAG_GSUB);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c,
+ const Record_sanitize_closure_t * = nullptr) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (defaultLangSys.sanitize (c, this) && langSys.sanitize (c, this));
+ }
+
+ protected:
+ OffsetTo<LangSys>
+ defaultLangSys; /* Offset to DefaultLangSys table--from
+ * beginning of Script table--may be Null */
+ RecordArrayOf<LangSys>
+ langSys; /* Array of LangSysRecords--listed
+ * alphabetically by LangSysTag */
+ public:
+ DEFINE_SIZE_ARRAY_SIZED (4, langSys);
+};
+
+typedef RecordListOf<Script> ScriptList;
+
+
+/* https://docs.microsoft.com/en-us/typography/opentype/spec/features_pt#size */
+struct FeatureParamsSize
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!c->check_struct (this))) return_trace (false);
+
+ /* This subtable has some "history", if you will. Some earlier versions of
+ * Adobe tools calculated the offset of the FeatureParams sutable from the
+ * beginning of the FeatureList table! Now, that is dealt with in the
+ * Feature implementation. But we still need to be able to tell junk from
+ * real data. Note: We don't check that the nameID actually exists.
+ *
+ * Read Roberts wrote on 9/15/06 on opentype-list@indx.co.uk :
+ *
+ * Yes, it is correct that a new version of the AFDKO (version 2.0) will be
+ * coming out soon, and that the makeotf program will build a font with a
+ * 'size' feature that is correct by the specification.
+ *
+ * The specification for this feature tag is in the "OpenType Layout Tag
+ * Registry". You can see a copy of this at:
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/features_pt#tag-size
+ *
+ * Here is one set of rules to determine if the 'size' feature is built
+ * correctly, or as by the older versions of MakeOTF. You may be able to do
+ * better.
+ *
+ * Assume that the offset to the size feature is according to specification,
+ * and make the following value checks. If it fails, assume the size
+ * feature is calculated as versions of MakeOTF before the AFDKO 2.0 built it.
+ * If this fails, reject the 'size' feature. The older makeOTF's calculated the
+ * offset from the beginning of the FeatureList table, rather than from the
+ * beginning of the 'size' Feature table.
+ *
+ * If "design size" == 0:
+ * fails check
+ *
+ * Else if ("subfamily identifier" == 0 and
+ * "range start" == 0 and
+ * "range end" == 0 and
+ * "range start" == 0 and
+ * "menu name ID" == 0)
+ * passes check: this is the format used when there is a design size
+ * specified, but there is no recommended size range.
+ *
+ * Else if ("design size" < "range start" or
+ * "design size" > "range end" or
+ * "range end" <= "range start" or
+ * "menu name ID" < 256 or
+ * "menu name ID" > 32767 or
+ * menu name ID is not a name ID which is actually in the name table)
+ * fails test
+ * Else
+ * passes test.
+ */
+
+ if (!designSize)
+ return_trace (false);
+ else if (subfamilyID == 0 &&
+ subfamilyNameID == 0 &&
+ rangeStart == 0 &&
+ rangeEnd == 0)
+ return_trace (true);
+ else if (designSize < rangeStart ||
+ designSize > rangeEnd ||
+ subfamilyNameID < 256 ||
+ subfamilyNameID > 32767)
+ return_trace (false);
+ else
+ return_trace (true);
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ return_trace ((bool) c->serializer->embed (*this));
+ }
+
+ HBUINT16 designSize; /* Represents the design size in 720/inch
+ * units (decipoints). The design size entry
+ * must be non-zero. When there is a design
+ * size but no recommended size range, the
+ * rest of the array will consist of zeros. */
+ HBUINT16 subfamilyID; /* Has no independent meaning, but serves
+ * as an identifier that associates fonts
+ * in a subfamily. All fonts which share a
+ * Preferred or Font Family name and which
+ * differ only by size range shall have the
+ * same subfamily value, and no fonts which
+ * differ in weight or style shall have the
+ * same subfamily value. If this value is
+ * zero, the remaining fields in the array
+ * will be ignored. */
+ NameID subfamilyNameID;/* If the preceding value is non-zero, this
+ * value must be set in the range 256 - 32767
+ * (inclusive). It records the value of a
+ * field in the name table, which must
+ * contain English-language strings encoded
+ * in Windows Unicode and Macintosh Roman,
+ * and may contain additional strings
+ * localized to other scripts and languages.
+ * Each of these strings is the name an
+ * application should use, in combination
+ * with the family name, to represent the
+ * subfamily in a menu. Applications will
+ * choose the appropriate version based on
+ * their selection criteria. */
+ HBUINT16 rangeStart; /* Large end of the recommended usage range
+ * (inclusive), stored in 720/inch units
+ * (decipoints). */
+ HBUINT16 rangeEnd; /* Small end of the recommended usage range
+ (exclusive), stored in 720/inch units
+ * (decipoints). */
+ public:
+ DEFINE_SIZE_STATIC (10);
+};
+
+/* https://docs.microsoft.com/en-us/typography/opentype/spec/features_pt#ssxx */
+struct FeatureParamsStylisticSet
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ /* Right now minorVersion is at zero. Which means, any table supports
+ * the uiNameID field. */
+ return_trace (c->check_struct (this));
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ return_trace ((bool) c->serializer->embed (*this));
+ }
+
+ HBUINT16 version; /* (set to 0): This corresponds to a “minorâ€
+ * version number. Additional data may be
+ * added to the end of this Feature Parameters
+ * table in the future. */
+
+ NameID uiNameID; /* The 'name' table name ID that specifies a
+ * string (or strings, for multiple languages)
+ * for a user-interface label for this
+ * feature. The values of uiLabelNameId and
+ * sampleTextNameId are expected to be in the
+ * font-specific name ID range (256-32767),
+ * though that is not a requirement in this
+ * Feature Parameters specification. The
+ * user-interface label for the feature can
+ * be provided in multiple languages. An
+ * English string should be included as a
+ * fallback. The string should be kept to a
+ * minimal length to fit comfortably with
+ * different application interfaces. */
+ public:
+ DEFINE_SIZE_STATIC (4);
+};
+
+/* https://docs.microsoft.com/en-us/typography/opentype/spec/features_ae#cv01-cv99 */
+struct FeatureParamsCharacterVariants
+{
+ unsigned
+ get_characters (unsigned start_offset, unsigned *char_count, hb_codepoint_t *chars) const
+ {
+ if (char_count)
+ {
+ + characters.sub_array (start_offset, char_count)
+ | hb_sink (hb_array (chars, *char_count))
+ ;
+ }
+ return characters.len;
+ }
+
+ unsigned get_size () const
+ { return min_size + characters.len * HBUINT24::static_size; }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ return_trace ((bool) c->serializer->embed (*this));
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ characters.sanitize (c));
+ }
+
+ HBUINT16 format; /* Format number is set to 0. */
+ NameID featUILableNameID; /* The ‘name’ table name ID that
+ * specifies a string (or strings,
+ * for multiple languages) for a
+ * user-interface label for this
+ * feature. (May be NULL.) */
+ NameID featUITooltipTextNameID;/* The ‘name’ table name ID that
+ * specifies a string (or strings,
+ * for multiple languages) that an
+ * application can use for tooltip
+ * text for this feature. (May be
+ * nullptr.) */
+ NameID sampleTextNameID; /* The ‘name’ table name ID that
+ * specifies sample text that
+ * illustrates the effect of this
+ * feature. (May be NULL.) */
+ HBUINT16 numNamedParameters; /* Number of named parameters. (May
+ * be zero.) */
+ NameID firstParamUILabelNameID;/* The first ‘name’ table name ID
+ * used to specify strings for
+ * user-interface labels for the
+ * feature parameters. (Must be zero
+ * if numParameters is zero.) */
+ ArrayOf<HBUINT24>
+ characters; /* Array of the Unicode Scalar Value
+ * of the characters for which this
+ * feature provides glyph variants.
+ * (May be zero.) */
+ public:
+ DEFINE_SIZE_ARRAY (14, characters);
+};
+
+struct FeatureParams
+{
+ bool sanitize (hb_sanitize_context_t *c, hb_tag_t tag) const
+ {
+#ifdef HB_NO_LAYOUT_FEATURE_PARAMS
+ return true;
+#endif
+ TRACE_SANITIZE (this);
+ if (tag == HB_TAG ('s','i','z','e'))
+ return_trace (u.size.sanitize (c));
+ if ((tag & 0xFFFF0000u) == HB_TAG ('s','s','\0','\0')) /* ssXX */
+ return_trace (u.stylisticSet.sanitize (c));
+ if ((tag & 0xFFFF0000u) == HB_TAG ('c','v','\0','\0')) /* cvXX */
+ return_trace (u.characterVariants.sanitize (c));
+ return_trace (true);
+ }
+
+ bool subset (hb_subset_context_t *c, const Tag* tag) const
+ {
+ TRACE_SUBSET (this);
+ if (!tag) return_trace (false);
+ if (*tag == HB_TAG ('s','i','z','e'))
+ return_trace (u.size.subset (c));
+ if ((*tag & 0xFFFF0000u) == HB_TAG ('s','s','\0','\0')) /* ssXX */
+ return_trace (u.stylisticSet.subset (c));
+ if ((*tag & 0xFFFF0000u) == HB_TAG ('c','v','\0','\0')) /* cvXX */
+ return_trace (u.characterVariants.subset (c));
+ return_trace (false);
+ }
+
+#ifndef HB_NO_LAYOUT_FEATURE_PARAMS
+ const FeatureParamsSize& get_size_params (hb_tag_t tag) const
+ {
+ if (tag == HB_TAG ('s','i','z','e'))
+ return u.size;
+ return Null (FeatureParamsSize);
+ }
+ const FeatureParamsStylisticSet& get_stylistic_set_params (hb_tag_t tag) const
+ {
+ if ((tag & 0xFFFF0000u) == HB_TAG ('s','s','\0','\0')) /* ssXX */
+ return u.stylisticSet;
+ return Null (FeatureParamsStylisticSet);
+ }
+ const FeatureParamsCharacterVariants& get_character_variants_params (hb_tag_t tag) const
+ {
+ if ((tag & 0xFFFF0000u) == HB_TAG ('c','v','\0','\0')) /* cvXX */
+ return u.characterVariants;
+ return Null (FeatureParamsCharacterVariants);
+ }
+#endif
+
+ private:
+ union {
+ FeatureParamsSize size;
+ FeatureParamsStylisticSet stylisticSet;
+ FeatureParamsCharacterVariants characterVariants;
+ } u;
+ public:
+ DEFINE_SIZE_MIN (0);
+};
+
+struct Feature
+{
+ unsigned int get_lookup_count () const
+ { return lookupIndex.len; }
+ hb_tag_t get_lookup_index (unsigned int i) const
+ { return lookupIndex[i]; }
+ unsigned int get_lookup_indexes (unsigned int start_index,
+ unsigned int *lookup_count /* IN/OUT */,
+ unsigned int *lookup_tags /* OUT */) const
+ { return lookupIndex.get_indexes (start_index, lookup_count, lookup_tags); }
+ void add_lookup_indexes_to (hb_set_t *lookup_indexes) const
+ { lookupIndex.add_indexes_to (lookup_indexes); }
+
+ const FeatureParams &get_feature_params () const
+ { return this+featureParams; }
+
+ bool intersects_lookup_indexes (const hb_map_t *lookup_indexes) const
+ { return lookupIndex.intersects (lookup_indexes); }
+
+ bool subset (hb_subset_context_t *c,
+ hb_subset_layout_context_t *l,
+ const Tag *tag = nullptr) const
+ {
+ TRACE_SUBSET (this);
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!out || !c->serializer->extend_min (out))) return_trace (false);
+
+ bool subset_featureParams = out->featureParams.serialize_subset (c, featureParams, this, tag);
+
+ auto it =
+ + hb_iter (lookupIndex)
+ | hb_filter (l->lookup_index_map)
+ | hb_map (l->lookup_index_map)
+ ;
+
+ out->lookupIndex.serialize (c->serializer, l, it);
+ return_trace (bool (it) || subset_featureParams
+ || (tag && *tag == HB_TAG ('p', 'r', 'e', 'f')));
+ }
+
+ bool sanitize (hb_sanitize_context_t *c,
+ const Record_sanitize_closure_t *closure = nullptr) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!(c->check_struct (this) && lookupIndex.sanitize (c))))
+ return_trace (false);
+
+ /* Some earlier versions of Adobe tools calculated the offset of the
+ * FeatureParams subtable from the beginning of the FeatureList table!
+ *
+ * If sanitizing "failed" for the FeatureParams subtable, try it with the
+ * alternative location. We would know sanitize "failed" if old value
+ * of the offset was non-zero, but it's zeroed now.
+ *
+ * Only do this for the 'size' feature, since at the time of the faulty
+ * Adobe tools, only the 'size' feature had FeatureParams defined.
+ */
+
+ if (likely (featureParams.is_null ()))
+ return_trace (true);
+
+ unsigned int orig_offset = featureParams;
+ if (unlikely (!featureParams.sanitize (c, this, closure ? closure->tag : HB_TAG_NONE)))
+ return_trace (false);
+
+ if (featureParams == 0 && closure &&
+ closure->tag == HB_TAG ('s','i','z','e') &&
+ closure->list_base && closure->list_base < this)
+ {
+ unsigned int new_offset_int = orig_offset -
+ (((char *) this) - ((char *) closure->list_base));
+
+ OffsetTo<FeatureParams> new_offset;
+ /* Check that it would not overflow. */
+ new_offset = new_offset_int;
+ if (new_offset == new_offset_int &&
+ c->try_set (&featureParams, new_offset_int) &&
+ !featureParams.sanitize (c, this, closure ? closure->tag : HB_TAG_NONE))
+ return_trace (false);
+ }
+
+ return_trace (true);
+ }
+
+ OffsetTo<FeatureParams>
+ featureParams; /* Offset to Feature Parameters table (if one
+ * has been defined for the feature), relative
+ * to the beginning of the Feature Table; = Null
+ * if not required */
+ IndexArray lookupIndex; /* Array of LookupList indices */
+ public:
+ DEFINE_SIZE_ARRAY_SIZED (4, lookupIndex);
+};
+
+typedef RecordListOf<Feature> FeatureList;
+
+
+struct LookupFlag : HBUINT16
+{
+ enum Flags {
+ RightToLeft = 0x0001u,
+ IgnoreBaseGlyphs = 0x0002u,
+ IgnoreLigatures = 0x0004u,
+ IgnoreMarks = 0x0008u,
+ IgnoreFlags = 0x000Eu,
+ UseMarkFilteringSet = 0x0010u,
+ Reserved = 0x00E0u,
+ MarkAttachmentType = 0xFF00u
+ };
+ public:
+ DEFINE_SIZE_STATIC (2);
+};
+
+} /* namespace OT */
+/* This has to be outside the namespace. */
+HB_MARK_AS_FLAG_T (OT::LookupFlag::Flags);
+namespace OT {
+
+struct Lookup
+{
+ unsigned int get_subtable_count () const { return subTable.len; }
+
+ template <typename TSubTable>
+ const OffsetArrayOf<TSubTable>& get_subtables () const
+ { return reinterpret_cast<const OffsetArrayOf<TSubTable> &> (subTable); }
+ template <typename TSubTable>
+ OffsetArrayOf<TSubTable>& get_subtables ()
+ { return reinterpret_cast<OffsetArrayOf<TSubTable> &> (subTable); }
+
+ template <typename TSubTable>
+ const TSubTable& get_subtable (unsigned int i) const
+ { return this+get_subtables<TSubTable> ()[i]; }
+ template <typename TSubTable>
+ TSubTable& get_subtable (unsigned int i)
+ { return this+get_subtables<TSubTable> ()[i]; }
+
+ unsigned int get_size () const
+ {
+ const HBUINT16 &markFilteringSet = StructAfter<const HBUINT16> (subTable);
+ if (lookupFlag & LookupFlag::UseMarkFilteringSet)
+ return (const char *) &StructAfter<const char> (markFilteringSet) - (const char *) this;
+ return (const char *) &markFilteringSet - (const char *) this;
+ }
+
+ unsigned int get_type () const { return lookupType; }
+
+ /* lookup_props is a 32-bit integer where the lower 16-bit is LookupFlag and
+ * higher 16-bit is mark-filtering-set if the lookup uses one.
+ * Not to be confused with glyph_props which is very similar. */
+ uint32_t get_props () const
+ {
+ unsigned int flag = lookupFlag;
+ if (unlikely (flag & LookupFlag::UseMarkFilteringSet))
+ {
+ const HBUINT16 &markFilteringSet = StructAfter<HBUINT16> (subTable);
+ flag += (markFilteringSet << 16);
+ }
+ return flag;
+ }
+
+ template <typename TSubTable, typename context_t, typename ...Ts>
+ typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const
+ {
+ unsigned int lookup_type = get_type ();
+ TRACE_DISPATCH (this, lookup_type);
+ unsigned int count = get_subtable_count ();
+ for (unsigned int i = 0; i < count; i++) {
+ typename context_t::return_t r = get_subtable<TSubTable> (i).dispatch (c, lookup_type, hb_forward<Ts> (ds)...);
+ if (c->stop_sublookup_iteration (r))
+ return_trace (r);
+ }
+ return_trace (c->default_return_value ());
+ }
+
+ bool serialize (hb_serialize_context_t *c,
+ unsigned int lookup_type,
+ uint32_t lookup_props,
+ unsigned int num_subtables)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely (!c->extend_min (*this))) return_trace (false);
+ lookupType = lookup_type;
+ lookupFlag = lookup_props & 0xFFFFu;
+ if (unlikely (!subTable.serialize (c, num_subtables))) return_trace (false);
+ if (lookupFlag & LookupFlag::UseMarkFilteringSet)
+ {
+ if (unlikely (!c->extend (*this))) return_trace (false);
+ HBUINT16 &markFilteringSet = StructAfter<HBUINT16> (subTable);
+ markFilteringSet = lookup_props >> 16;
+ }
+ return_trace (true);
+ }
+
+ template <typename TSubTable>
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!out || !c->serializer->extend_min (out))) return_trace (false);
+ out->lookupType = lookupType;
+ out->lookupFlag = lookupFlag;
+
+ const hb_set_t *glyphset = c->plan->glyphset ();
+ unsigned int lookup_type = get_type ();
+ + hb_iter (get_subtables <TSubTable> ())
+ | hb_filter ([this, glyphset, lookup_type] (const OffsetTo<TSubTable> &_) { return (this+_).intersects (glyphset, lookup_type); })
+ | hb_apply (subset_offset_array (c, out->get_subtables<TSubTable> (), this, lookup_type))
+ ;
+
+ return_trace (true);
+ }
+
+ template <typename TSubTable>
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (!(c->check_struct (this) && subTable.sanitize (c))) return_trace (false);
+
+ unsigned subtables = get_subtable_count ();
+ if (unlikely (!c->visit_subtables (subtables))) return_trace (false);
+
+ if (lookupFlag & LookupFlag::UseMarkFilteringSet)
+ {
+ const HBUINT16 &markFilteringSet = StructAfter<HBUINT16> (subTable);
+ if (!markFilteringSet.sanitize (c)) return_trace (false);
+ }
+
+ if (unlikely (!get_subtables<TSubTable> ().sanitize (c, this, get_type ())))
+ return_trace (false);
+
+ if (unlikely (get_type () == TSubTable::Extension && !c->get_edit_count ()))
+ {
+ /* The spec says all subtables of an Extension lookup should
+ * have the same type, which shall not be the Extension type
+ * itself (but we already checked for that).
+ * This is specially important if one has a reverse type!
+ *
+ * We only do this if sanitizer edit_count is zero. Otherwise,
+ * some of the subtables might have become insane after they
+ * were sanity-checked by the edits of subsequent subtables.
+ * https://bugs.chromium.org/p/chromium/issues/detail?id=960331
+ */
+ unsigned int type = get_subtable<TSubTable> (0).u.extension.get_type ();
+ for (unsigned int i = 1; i < subtables; i++)
+ if (get_subtable<TSubTable> (i).u.extension.get_type () != type)
+ return_trace (false);
+ }
+ return_trace (true);
+ }
+
+ private:
+ HBUINT16 lookupType; /* Different enumerations for GSUB and GPOS */
+ HBUINT16 lookupFlag; /* Lookup qualifiers */
+ ArrayOf<Offset16>
+ subTable; /* Array of SubTables */
+/*HBUINT16 markFilteringSetX[HB_VAR_ARRAY];*//* Index (base 0) into GDEF mark glyph sets
+ * structure. This field is only present if bit
+ * UseMarkFilteringSet of lookup flags is set. */
+ public:
+ DEFINE_SIZE_ARRAY (6, subTable);
+};
+
+typedef OffsetListOf<Lookup> LookupList;
+
+template <typename TLookup>
+struct LookupOffsetList : OffsetListOf<TLookup>
+{
+ bool subset (hb_subset_context_t *c,
+ hb_subset_layout_context_t *l) const
+ {
+ TRACE_SUBSET (this);
+ auto *out = c->serializer->start_embed (this);
+ if (unlikely (!out || !c->serializer->extend_min (out))) return_trace (false);
+
+ unsigned count = this->len;
+ + hb_zip (*this, hb_range (count))
+ | hb_filter (l->lookup_index_map, hb_second)
+ | hb_map (hb_first)
+ | hb_apply (subset_offset_array (c, *out, this))
+ ;
+ return_trace (true);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (OffsetListOf<TLookup>::sanitize (c, this));
+ }
+};
+
+
+/*
+ * Coverage Table
+ */
+
+struct CoverageFormat1
+{
+ friend struct Coverage;
+
+ private:
+ unsigned int get_coverage (hb_codepoint_t glyph_id) const
+ {
+ unsigned int i;
+ glyphArray.bfind (glyph_id, &i, HB_BFIND_NOT_FOUND_STORE, NOT_COVERED);
+ return i;
+ }
+
+ template <typename Iterator,
+ hb_requires (hb_is_sorted_source_of (Iterator, hb_codepoint_t))>
+ bool serialize (hb_serialize_context_t *c, Iterator glyphs)
+ {
+ TRACE_SERIALIZE (this);
+ return_trace (glyphArray.serialize (c, glyphs));
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (glyphArray.sanitize (c));
+ }
+
+ bool intersects (const hb_set_t *glyphs) const
+ {
+ /* TODO Speed up, using hb_set_next() and bsearch()? */
+ unsigned int count = glyphArray.len;
+ for (unsigned int i = 0; i < count; i++)
+ if (glyphs->has (glyphArray[i]))
+ return true;
+ return false;
+ }
+ bool intersects_coverage (const hb_set_t *glyphs, unsigned int index) const
+ { return glyphs->has (glyphArray[index]); }
+
+ template <typename set_t>
+ bool collect_coverage (set_t *glyphs) const
+ { return glyphs->add_sorted_array (glyphArray.arrayZ, glyphArray.len); }
+
+ public:
+ /* Older compilers need this to be public. */
+ struct iter_t
+ {
+ void init (const struct CoverageFormat1 &c_) { c = &c_; i = 0; }
+ void fini () {}
+ bool more () const { return i < c->glyphArray.len; }
+ void next () { i++; }
+ hb_codepoint_t get_glyph () const { return c->glyphArray[i]; }
+ bool operator != (const iter_t& o) const
+ { return i != o.i || c != o.c; }
+
+ private:
+ const struct CoverageFormat1 *c;
+ unsigned int i;
+ };
+ private:
+
+ protected:
+ HBUINT16 coverageFormat; /* Format identifier--format = 1 */
+ SortedArrayOf<HBGlyphID>
+ glyphArray; /* Array of GlyphIDs--in numerical order */
+ public:
+ DEFINE_SIZE_ARRAY (4, glyphArray);
+};
+
+struct CoverageFormat2
+{
+ friend struct Coverage;
+
+ private:
+ unsigned int get_coverage (hb_codepoint_t glyph_id) const
+ {
+ const RangeRecord &range = rangeRecord.bsearch (glyph_id);
+ return likely (range.first <= range.last)
+ ? (unsigned int) range.value + (glyph_id - range.first)
+ : NOT_COVERED;
+ }
+
+ template <typename Iterator,
+ hb_requires (hb_is_sorted_source_of (Iterator, hb_codepoint_t))>
+ bool serialize (hb_serialize_context_t *c, Iterator glyphs)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely (!c->extend_min (*this))) return_trace (false);
+
+ if (unlikely (!glyphs))
+ {
+ rangeRecord.len = 0;
+ return_trace (true);
+ }
+
+ /* TODO(iter) Write more efficiently? */
+
+ unsigned num_ranges = 0;
+ hb_codepoint_t last = (hb_codepoint_t) -2;
+ for (auto g: glyphs)
+ {
+ if (last + 1 != g)
+ num_ranges++;
+ last = g;
+ }
+
+ if (unlikely (!rangeRecord.serialize (c, num_ranges))) return_trace (false);
+
+ unsigned count = 0;
+ unsigned range = (unsigned) -1;
+ last = (hb_codepoint_t) -2;
+ for (auto g: glyphs)
+ {
+ if (last + 1 != g)
+ {
+ range++;
+ rangeRecord[range].first = g;
+ rangeRecord[range].value = count;
+ }
+ rangeRecord[range].last = g;
+ last = g;
+ count++;
+ }
+
+ return_trace (true);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (rangeRecord.sanitize (c));
+ }
+
+ bool intersects (const hb_set_t *glyphs) const
+ {
+ /* TODO Speed up, using hb_set_next() and bsearch()? */
+ unsigned int count = rangeRecord.len;
+ for (unsigned int i = 0; i < count; i++)
+ if (rangeRecord[i].intersects (glyphs))
+ return true;
+ return false;
+ }
+ bool intersects_coverage (const hb_set_t *glyphs, unsigned int index) const
+ {
+ unsigned int i;
+ unsigned int count = rangeRecord.len;
+ for (i = 0; i < count; i++) {
+ const RangeRecord &range = rangeRecord[i];
+ if (range.value <= index &&
+ index < (unsigned int) range.value + (range.last - range.first) &&
+ range.intersects (glyphs))
+ return true;
+ else if (index < range.value)
+ return false;
+ }
+ return false;
+ }
+
+ template <typename set_t>
+ bool collect_coverage (set_t *glyphs) const
+ {
+ unsigned int count = rangeRecord.len;
+ for (unsigned int i = 0; i < count; i++)
+ if (unlikely (!rangeRecord[i].collect_coverage (glyphs)))
+ return false;
+ return true;
+ }
+
+ public:
+ /* Older compilers need this to be public. */
+ struct iter_t
+ {
+ void init (const CoverageFormat2 &c_)
+ {
+ c = &c_;
+ coverage = 0;
+ i = 0;
+ j = c->rangeRecord.len ? c->rangeRecord[0].first : 0;
+ if (unlikely (c->rangeRecord[0].first > c->rangeRecord[0].last))
+ {
+ /* Broken table. Skip. */
+ i = c->rangeRecord.len;
+ }
+ }
+ void fini () {}
+ bool more () const { return i < c->rangeRecord.len; }
+ void next ()
+ {
+ if (j >= c->rangeRecord[i].last)
+ {
+ i++;
+ if (more ())
+ {
+ unsigned int old = coverage;
+ j = c->rangeRecord[i].first;
+ coverage = c->rangeRecord[i].value;
+ if (unlikely (coverage != old + 1))
+ {
+ /* Broken table. Skip. Important to avoid DoS.
+ * Also, our callers depend on coverage being
+ * consecutive and monotonically increasing,
+ * ie. iota(). */
+ i = c->rangeRecord.len;
+ return;
+ }
+ }
+ return;
+ }
+ coverage++;
+ j++;
+ }
+ hb_codepoint_t get_glyph () const { return j; }
+ bool operator != (const iter_t& o) const
+ { return i != o.i || j != o.j || c != o.c; }
+
+ private:
+ const struct CoverageFormat2 *c;
+ unsigned int i, coverage;
+ hb_codepoint_t j;
+ };
+ private:
+
+ protected:
+ HBUINT16 coverageFormat; /* Format identifier--format = 2 */
+ SortedArrayOf<RangeRecord>
+ rangeRecord; /* Array of glyph ranges--ordered by
+ * Start GlyphID. rangeCount entries
+ * long */
+ public:
+ DEFINE_SIZE_ARRAY (4, rangeRecord);
+};
+
+struct Coverage
+{
+ /* Has interface. */
+ static constexpr unsigned SENTINEL = NOT_COVERED;
+ typedef unsigned int value_t;
+ value_t operator [] (hb_codepoint_t k) const { return get (k); }
+ bool has (hb_codepoint_t k) const { return (*this)[k] != SENTINEL; }
+ /* Predicate. */
+ bool operator () (hb_codepoint_t k) const { return has (k); }
+
+ unsigned int get (hb_codepoint_t k) const { return get_coverage (k); }
+ unsigned int get_coverage (hb_codepoint_t glyph_id) const
+ {
+ switch (u.format) {
+ case 1: return u.format1.get_coverage (glyph_id);
+ case 2: return u.format2.get_coverage (glyph_id);
+ default:return NOT_COVERED;
+ }
+ }
+
+ template <typename Iterator,
+ hb_requires (hb_is_sorted_source_of (Iterator, hb_codepoint_t))>
+ bool serialize (hb_serialize_context_t *c, Iterator glyphs)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely (!c->extend_min (*this))) return_trace (false);
+
+ unsigned count = 0;
+ unsigned num_ranges = 0;
+ hb_codepoint_t last = (hb_codepoint_t) -2;
+ for (auto g: glyphs)
+ {
+ if (last + 1 != g)
+ num_ranges++;
+ last = g;
+ count++;
+ }
+ u.format = count <= num_ranges * 3 ? 1 : 2;
+
+ switch (u.format)
+ {
+ case 1: return_trace (u.format1.serialize (c, glyphs));
+ case 2: return_trace (u.format2.serialize (c, glyphs));
+ default:return_trace (false);
+ }
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ const hb_set_t &glyphset = *c->plan->glyphset ();
+ const hb_map_t &glyph_map = *c->plan->glyph_map;
+
+ auto it =
+ + iter ()
+ | hb_filter (glyphset)
+ | hb_map_retains_sorting (glyph_map)
+ ;
+
+ bool ret = bool (it);
+ Coverage_serialize (c->serializer, it);
+ return_trace (ret);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (!u.format.sanitize (c)) return_trace (false);
+ switch (u.format)
+ {
+ case 1: return_trace (u.format1.sanitize (c));
+ case 2: return_trace (u.format2.sanitize (c));
+ default:return_trace (true);
+ }
+ }
+
+ bool intersects (const hb_set_t *glyphs) const
+ {
+ switch (u.format)
+ {
+ case 1: return u.format1.intersects (glyphs);
+ case 2: return u.format2.intersects (glyphs);
+ default:return false;
+ }
+ }
+ bool intersects_coverage (const hb_set_t *glyphs, unsigned int index) const
+ {
+ switch (u.format)
+ {
+ case 1: return u.format1.intersects_coverage (glyphs, index);
+ case 2: return u.format2.intersects_coverage (glyphs, index);
+ default:return false;
+ }
+ }
+
+ /* Might return false if array looks unsorted.
+ * Used for faster rejection of corrupt data. */
+ template <typename set_t>
+ bool collect_coverage (set_t *glyphs) const
+ {
+ switch (u.format)
+ {
+ case 1: return u.format1.collect_coverage (glyphs);
+ case 2: return u.format2.collect_coverage (glyphs);
+ default:return false;
+ }
+ }
+
+ struct iter_t : hb_iter_with_fallback_t<iter_t, hb_codepoint_t>
+ {
+ static constexpr bool is_sorted_iterator = true;
+ iter_t (const Coverage &c_ = Null (Coverage))
+ {
+ memset (this, 0, sizeof (*this));
+ format = c_.u.format;
+ switch (format)
+ {
+ case 1: u.format1.init (c_.u.format1); return;
+ case 2: u.format2.init (c_.u.format2); return;
+ default: return;
+ }
+ }
+ bool __more__ () const
+ {
+ switch (format)
+ {
+ case 1: return u.format1.more ();
+ case 2: return u.format2.more ();
+ default:return false;
+ }
+ }
+ void __next__ ()
+ {
+ switch (format)
+ {
+ case 1: u.format1.next (); break;
+ case 2: u.format2.next (); break;
+ default: break;
+ }
+ }
+ typedef hb_codepoint_t __item_t__;
+ __item_t__ __item__ () const { return get_glyph (); }
+
+ hb_codepoint_t get_glyph () const
+ {
+ switch (format)
+ {
+ case 1: return u.format1.get_glyph ();
+ case 2: return u.format2.get_glyph ();
+ default:return 0;
+ }
+ }
+ bool operator != (const iter_t& o) const
+ {
+ if (format != o.format) return true;
+ switch (format)
+ {
+ case 1: return u.format1 != o.u.format1;
+ case 2: return u.format2 != o.u.format2;
+ default:return false;
+ }
+ }
+
+ private:
+ unsigned int format;
+ union {
+ CoverageFormat2::iter_t format2; /* Put this one first since it's larger; helps shut up compiler. */
+ CoverageFormat1::iter_t format1;
+ } u;
+ };
+ iter_t iter () const { return iter_t (*this); }
+
+ protected:
+ union {
+ HBUINT16 format; /* Format identifier */
+ CoverageFormat1 format1;
+ CoverageFormat2 format2;
+ } u;
+ public:
+ DEFINE_SIZE_UNION (2, format);
+};
+
+template<typename Iterator>
+static inline void
+Coverage_serialize (hb_serialize_context_t *c,
+ Iterator it)
+{ c->start_embed<Coverage> ()->serialize (c, it); }
+
+static void ClassDef_remap_and_serialize (hb_serialize_context_t *c,
+ const hb_set_t &glyphset,
+ const hb_map_t &gid_klass_map,
+ hb_sorted_vector_t<HBGlyphID> &glyphs,
+ const hb_set_t &klasses,
+ hb_map_t *klass_map /*INOUT*/)
+{
+ if (!klass_map)
+ {
+ ClassDef_serialize (c, hb_zip (glyphs.iter (), + glyphs.iter ()
+ | hb_map (gid_klass_map)));
+ return;
+ }
+
+ /* any glyph not assigned a class value falls into Class zero (0),
+ * if any glyph assigned to class 0, remapping must start with 0->0*/
+ if (glyphset.get_population () > gid_klass_map.get_population ())
+ klass_map->set (0, 0);
+
+ unsigned idx = klass_map->has (0) ? 1 : 0;
+ for (const unsigned k: klasses.iter ())
+ {
+ if (klass_map->has (k)) continue;
+ klass_map->set (k, idx);
+ idx++;
+ }
+
+ auto it =
+ + glyphs.iter ()
+ | hb_map_retains_sorting ([&] (const HBGlyphID& gid) -> hb_pair_t<hb_codepoint_t, unsigned>
+ {
+ unsigned new_klass = klass_map->get (gid_klass_map[gid]);
+ return hb_pair ((hb_codepoint_t)gid, new_klass);
+ })
+ ;
+
+ c->propagate_error (glyphs, klasses);
+ ClassDef_serialize (c, it);
+}
+
+/*
+ * Class Definition Table
+ */
+
+struct ClassDefFormat1
+{
+ friend struct ClassDef;
+
+ private:
+ unsigned int get_class (hb_codepoint_t glyph_id) const
+ {
+ return classValue[(unsigned int) (glyph_id - startGlyph)];
+ }
+
+ template<typename Iterator,
+ hb_requires (hb_is_iterator (Iterator))>
+ bool serialize (hb_serialize_context_t *c,
+ Iterator it)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely (!c->extend_min (*this))) return_trace (false);
+
+ if (unlikely (!it))
+ {
+ startGlyph = 0;
+ classValue.len = 0;
+ return_trace (true);
+ }
+
+ hb_codepoint_t glyph_min = (*it).first;
+ hb_codepoint_t glyph_max = + it
+ | hb_map (hb_first)
+ | hb_reduce (hb_max, 0u);
+ unsigned glyph_count = glyph_max - glyph_min + 1;
+
+ startGlyph = glyph_min;
+ if (unlikely (!classValue.serialize (c, glyph_count))) return_trace (false);
+ for (const hb_pair_t<hb_codepoint_t, unsigned> gid_klass_pair : + it)
+ {
+ unsigned idx = gid_klass_pair.first - glyph_min;
+ classValue[idx] = gid_klass_pair.second;
+ }
+ return_trace (true);
+ }
+
+ bool subset (hb_subset_context_t *c,
+ hb_map_t *klass_map = nullptr /*OUT*/) const
+ {
+ TRACE_SUBSET (this);
+ const hb_set_t &glyphset = *c->plan->_glyphset_gsub;
+ const hb_map_t &glyph_map = *c->plan->glyph_map;
+
+ hb_sorted_vector_t<HBGlyphID> glyphs;
+ hb_set_t orig_klasses;
+ hb_map_t gid_org_klass_map;
+
+ hb_codepoint_t start = startGlyph;
+ hb_codepoint_t end = start + classValue.len;
+ for (const hb_codepoint_t gid : + hb_range (start, end)
+ | hb_filter (glyphset))
+ {
+ unsigned klass = classValue[gid - start];
+ if (!klass) continue;
+
+ glyphs.push (glyph_map[gid]);
+ gid_org_klass_map.set (glyph_map[gid], klass);
+ orig_klasses.add (klass);
+ }
+
+ ClassDef_remap_and_serialize (c->serializer, glyphset, gid_org_klass_map,
+ glyphs, orig_klasses, klass_map);
+ return_trace ((bool) glyphs);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) && classValue.sanitize (c));
+ }
+
+ template <typename set_t>
+ bool collect_coverage (set_t *glyphs) const
+ {
+ unsigned int start = 0;
+ unsigned int count = classValue.len;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ if (classValue[i])
+ continue;
+
+ if (start != i)
+ if (unlikely (!glyphs->add_range (startGlyph + start, startGlyph + i)))
+ return false;
+
+ start = i + 1;
+ }
+ if (start != count)
+ if (unlikely (!glyphs->add_range (startGlyph + start, startGlyph + count)))
+ return false;
+
+ return true;
+ }
+
+ template <typename set_t>
+ bool collect_class (set_t *glyphs, unsigned int klass) const
+ {
+ unsigned int count = classValue.len;
+ for (unsigned int i = 0; i < count; i++)
+ if (classValue[i] == klass) glyphs->add (startGlyph + i);
+ return true;
+ }
+
+ bool intersects (const hb_set_t *glyphs) const
+ {
+ /* TODO Speed up, using hb_set_next()? */
+ hb_codepoint_t start = startGlyph;
+ hb_codepoint_t end = startGlyph + classValue.len;
+ for (hb_codepoint_t iter = startGlyph - 1;
+ hb_set_next (glyphs, &iter) && iter < end;)
+ if (classValue[iter - start]) return true;
+ return false;
+ }
+ bool intersects_class (const hb_set_t *glyphs, unsigned int klass) const
+ {
+ unsigned int count = classValue.len;
+ if (klass == 0)
+ {
+ /* Match if there's any glyph that is not listed! */
+ hb_codepoint_t g = HB_SET_VALUE_INVALID;
+ if (!hb_set_next (glyphs, &g)) return false;
+ if (g < startGlyph) return true;
+ g = startGlyph + count - 1;
+ if (hb_set_next (glyphs, &g)) return true;
+ /* Fall through. */
+ }
+ for (unsigned int i = 0; i < count; i++)
+ if (classValue[i] == klass && glyphs->has (startGlyph + i))
+ return true;
+ return false;
+ }
+
+ protected:
+ HBUINT16 classFormat; /* Format identifier--format = 1 */
+ HBGlyphID startGlyph; /* First GlyphID of the classValueArray */
+ ArrayOf<HBUINT16>
+ classValue; /* Array of Class Values--one per GlyphID */
+ public:
+ DEFINE_SIZE_ARRAY (6, classValue);
+};
+
+struct ClassDefFormat2
+{
+ friend struct ClassDef;
+
+ private:
+ unsigned int get_class (hb_codepoint_t glyph_id) const
+ {
+ return rangeRecord.bsearch (glyph_id).value;
+ }
+
+ template<typename Iterator,
+ hb_requires (hb_is_iterator (Iterator))>
+ bool serialize (hb_serialize_context_t *c,
+ Iterator it)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely (!c->extend_min (*this))) return_trace (false);
+
+ if (unlikely (!it))
+ {
+ rangeRecord.len = 0;
+ return_trace (true);
+ }
+
+ unsigned num_ranges = 1;
+ hb_codepoint_t prev_gid = (*it).first;
+ unsigned prev_klass = (*it).second;
+
+ RangeRecord range_rec;
+ range_rec.first = prev_gid;
+ range_rec.last = prev_gid;
+ range_rec.value = prev_klass;
+
+ RangeRecord *record = c->copy (range_rec);
+ if (unlikely (!record)) return_trace (false);
+
+ for (const auto gid_klass_pair : + (++it))
+ {
+ hb_codepoint_t cur_gid = gid_klass_pair.first;
+ unsigned cur_klass = gid_klass_pair.second;
+
+ if (cur_gid != prev_gid + 1 ||
+ cur_klass != prev_klass)
+ {
+ if (unlikely (!record)) break;
+ record->last = prev_gid;
+ num_ranges++;
+
+ range_rec.first = cur_gid;
+ range_rec.last = cur_gid;
+ range_rec.value = cur_klass;
+
+ record = c->copy (range_rec);
+ }
+
+ prev_klass = cur_klass;
+ prev_gid = cur_gid;
+ }
+
+ if (likely (record)) record->last = prev_gid;
+ rangeRecord.len = num_ranges;
+ return_trace (true);
+ }
+
+ bool subset (hb_subset_context_t *c,
+ hb_map_t *klass_map = nullptr /*OUT*/) const
+ {
+ TRACE_SUBSET (this);
+ const hb_set_t &glyphset = *c->plan->_glyphset_gsub;
+ const hb_map_t &glyph_map = *c->plan->glyph_map;
+
+ hb_sorted_vector_t<HBGlyphID> glyphs;
+ hb_set_t orig_klasses;
+ hb_map_t gid_org_klass_map;
+
+ unsigned count = rangeRecord.len;
+ for (unsigned i = 0; i < count; i++)
+ {
+ unsigned klass = rangeRecord[i].value;
+ if (!klass) continue;
+ hb_codepoint_t start = rangeRecord[i].first;
+ hb_codepoint_t end = rangeRecord[i].last + 1;
+ for (hb_codepoint_t g = start; g < end; g++)
+ {
+ if (!glyphset.has (g)) continue;
+ glyphs.push (glyph_map[g]);
+ gid_org_klass_map.set (glyph_map[g], klass);
+ orig_klasses.add (klass);
+ }
+ }
+
+ ClassDef_remap_and_serialize (c->serializer, glyphset, gid_org_klass_map,
+ glyphs, orig_klasses, klass_map);
+ return_trace ((bool) glyphs);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (rangeRecord.sanitize (c));
+ }
+
+ template <typename set_t>
+ bool collect_coverage (set_t *glyphs) const
+ {
+ unsigned int count = rangeRecord.len;
+ for (unsigned int i = 0; i < count; i++)
+ if (rangeRecord[i].value)
+ if (unlikely (!rangeRecord[i].collect_coverage (glyphs)))
+ return false;
+ return true;
+ }
+
+ template <typename set_t>
+ bool collect_class (set_t *glyphs, unsigned int klass) const
+ {
+ unsigned int count = rangeRecord.len;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ if (rangeRecord[i].value == klass)
+ if (unlikely (!rangeRecord[i].collect_coverage (glyphs)))
+ return false;
+ }
+ return true;
+ }
+
+ bool intersects (const hb_set_t *glyphs) const
+ {
+ /* TODO Speed up, using hb_set_next() and bsearch()? */
+ unsigned int count = rangeRecord.len;
+ for (unsigned int i = 0; i < count; i++)
+ if (rangeRecord[i].intersects (glyphs))
+ return true;
+ return false;
+ }
+ bool intersects_class (const hb_set_t *glyphs, unsigned int klass) const
+ {
+ unsigned int count = rangeRecord.len;
+ if (klass == 0)
+ {
+ /* Match if there's any glyph that is not listed! */
+ hb_codepoint_t g = HB_SET_VALUE_INVALID;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ if (!hb_set_next (glyphs, &g))
+ break;
+ if (g < rangeRecord[i].first)
+ return true;
+ g = rangeRecord[i].last;
+ }
+ if (g != HB_SET_VALUE_INVALID && hb_set_next (glyphs, &g))
+ return true;
+ /* Fall through. */
+ }
+ for (unsigned int i = 0; i < count; i++)
+ if (rangeRecord[i].value == klass && rangeRecord[i].intersects (glyphs))
+ return true;
+ return false;
+ }
+
+ protected:
+ HBUINT16 classFormat; /* Format identifier--format = 2 */
+ SortedArrayOf<RangeRecord>
+ rangeRecord; /* Array of glyph ranges--ordered by
+ * Start GlyphID */
+ public:
+ DEFINE_SIZE_ARRAY (4, rangeRecord);
+};
+
+struct ClassDef
+{
+ /* Has interface. */
+ static constexpr unsigned SENTINEL = 0;
+ typedef unsigned int value_t;
+ value_t operator [] (hb_codepoint_t k) const { return get (k); }
+ bool has (hb_codepoint_t k) const { return (*this)[k] != SENTINEL; }
+ /* Projection. */
+ hb_codepoint_t operator () (hb_codepoint_t k) const { return get (k); }
+
+ unsigned int get (hb_codepoint_t k) const { return get_class (k); }
+ unsigned int get_class (hb_codepoint_t glyph_id) const
+ {
+ switch (u.format) {
+ case 1: return u.format1.get_class (glyph_id);
+ case 2: return u.format2.get_class (glyph_id);
+ default:return 0;
+ }
+ }
+
+ template<typename Iterator,
+ hb_requires (hb_is_iterator (Iterator))>
+ bool serialize (hb_serialize_context_t *c, Iterator it)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely (!c->extend_min (*this))) return_trace (false);
+
+ unsigned format = 2;
+ if (likely (it))
+ {
+ hb_codepoint_t glyph_min = (*it).first;
+ hb_codepoint_t glyph_max = + it
+ | hb_map (hb_first)
+ | hb_reduce (hb_max, 0u);
+
+ unsigned num_ranges = 1;
+ hb_codepoint_t prev_gid = glyph_min;
+ unsigned prev_klass = (*it).second;
+
+ for (const auto gid_klass_pair : it)
+ {
+ hb_codepoint_t cur_gid = gid_klass_pair.first;
+ unsigned cur_klass = gid_klass_pair.second;
+ if (cur_gid == glyph_min || !cur_klass) continue;
+ if (cur_gid != prev_gid + 1 ||
+ cur_klass != prev_klass)
+ num_ranges++;
+
+ prev_gid = cur_gid;
+ prev_klass = cur_klass;
+ }
+
+ if (1 + (glyph_max - glyph_min + 1) <= num_ranges * 3)
+ format = 1;
+ }
+ u.format = format;
+
+ switch (u.format)
+ {
+ case 1: return_trace (u.format1.serialize (c, it));
+ case 2: return_trace (u.format2.serialize (c, it));
+ default:return_trace (false);
+ }
+ }
+
+ bool subset (hb_subset_context_t *c,
+ hb_map_t *klass_map = nullptr /*OUT*/) const
+ {
+ TRACE_SUBSET (this);
+ switch (u.format) {
+ case 1: return_trace (u.format1.subset (c, klass_map));
+ case 2: return_trace (u.format2.subset (c, klass_map));
+ default:return_trace (false);
+ }
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (!u.format.sanitize (c)) return_trace (false);
+ switch (u.format) {
+ case 1: return_trace (u.format1.sanitize (c));
+ case 2: return_trace (u.format2.sanitize (c));
+ default:return_trace (true);
+ }
+ }
+
+ /* Might return false if array looks unsorted.
+ * Used for faster rejection of corrupt data. */
+ template <typename set_t>
+ bool collect_coverage (set_t *glyphs) const
+ {
+ switch (u.format) {
+ case 1: return u.format1.collect_coverage (glyphs);
+ case 2: return u.format2.collect_coverage (glyphs);
+ default:return false;
+ }
+ }
+
+ /* Might return false if array looks unsorted.
+ * Used for faster rejection of corrupt data. */
+ template <typename set_t>
+ bool collect_class (set_t *glyphs, unsigned int klass) const
+ {
+ switch (u.format) {
+ case 1: return u.format1.collect_class (glyphs, klass);
+ case 2: return u.format2.collect_class (glyphs, klass);
+ default:return false;
+ }
+ }
+
+ bool intersects (const hb_set_t *glyphs) const
+ {
+ switch (u.format) {
+ case 1: return u.format1.intersects (glyphs);
+ case 2: return u.format2.intersects (glyphs);
+ default:return false;
+ }
+ }
+ bool intersects_class (const hb_set_t *glyphs, unsigned int klass) const
+ {
+ switch (u.format) {
+ case 1: return u.format1.intersects_class (glyphs, klass);
+ case 2: return u.format2.intersects_class (glyphs, klass);
+ default:return false;
+ }
+ }
+
+ protected:
+ union {
+ HBUINT16 format; /* Format identifier */
+ ClassDefFormat1 format1;
+ ClassDefFormat2 format2;
+ } u;
+ public:
+ DEFINE_SIZE_UNION (2, format);
+};
+
+template<typename Iterator>
+static inline void ClassDef_serialize (hb_serialize_context_t *c,
+ Iterator it)
+{ c->start_embed<ClassDef> ()->serialize (c, it); }
+
+
+/*
+ * Item Variation Store
+ */
+
+struct VarRegionAxis
+{
+ float evaluate (int coord) const
+ {
+ int start = startCoord, peak = peakCoord, end = endCoord;
+
+ /* TODO Move these to sanitize(). */
+ if (unlikely (start > peak || peak > end))
+ return 1.;
+ if (unlikely (start < 0 && end > 0 && peak != 0))
+ return 1.;
+
+ if (peak == 0 || coord == peak)
+ return 1.;
+
+ if (coord <= start || end <= coord)
+ return 0.;
+
+ /* Interpolate */
+ if (coord < peak)
+ return float (coord - start) / (peak - start);
+ else
+ return float (end - coord) / (end - peak);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ /* TODO Handle invalid start/peak/end configs, so we don't
+ * have to do that at runtime. */
+ }
+
+ public:
+ F2DOT14 startCoord;
+ F2DOT14 peakCoord;
+ F2DOT14 endCoord;
+ public:
+ DEFINE_SIZE_STATIC (6);
+};
+
+struct VarRegionList
+{
+ float evaluate (unsigned int region_index,
+ const int *coords, unsigned int coord_len) const
+ {
+ if (unlikely (region_index >= regionCount))
+ return 0.;
+
+ const VarRegionAxis *axes = axesZ.arrayZ + (region_index * axisCount);
+
+ float v = 1.;
+ unsigned int count = axisCount;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ int coord = i < coord_len ? coords[i] : 0;
+ float factor = axes[i].evaluate (coord);
+ if (factor == 0.f)
+ return 0.;
+ v *= factor;
+ }
+ return v;
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ axesZ.sanitize (c, (unsigned int) axisCount * (unsigned int) regionCount));
+ }
+
+ bool serialize (hb_serialize_context_t *c, const VarRegionList *src, const hb_bimap_t &region_map)
+ {
+ TRACE_SERIALIZE (this);
+ VarRegionList *out = c->allocate_min<VarRegionList> ();
+ if (unlikely (!out)) return_trace (false);
+ axisCount = src->axisCount;
+ regionCount = region_map.get_population ();
+ if (unlikely (!c->allocate_size<VarRegionList> (get_size () - min_size))) return_trace (false);
+ unsigned int region_count = src->get_region_count ();
+ for (unsigned int r = 0; r < regionCount; r++)
+ {
+ unsigned int backward = region_map.backward (r);
+ if (backward >= region_count) return_trace (false);
+ memcpy (&axesZ[axisCount * r], &src->axesZ[axisCount * backward], VarRegionAxis::static_size * axisCount);
+ }
+
+ return_trace (true);
+ }
+
+ unsigned int get_size () const { return min_size + VarRegionAxis::static_size * axisCount * regionCount; }
+ unsigned int get_region_count () const { return regionCount; }
+
+ protected:
+ HBUINT16 axisCount;
+ HBUINT16 regionCount;
+ UnsizedArrayOf<VarRegionAxis>
+ axesZ;
+ public:
+ DEFINE_SIZE_ARRAY (4, axesZ);
+};
+
+struct VarData
+{
+ unsigned int get_region_index_count () const
+ { return regionIndices.len; }
+
+ unsigned int get_row_size () const
+ { return shortCount + regionIndices.len; }
+
+ unsigned int get_size () const
+ { return itemCount * get_row_size (); }
+
+ float get_delta (unsigned int inner,
+ const int *coords, unsigned int coord_count,
+ const VarRegionList &regions) const
+ {
+ if (unlikely (inner >= itemCount))
+ return 0.;
+
+ unsigned int count = regionIndices.len;
+ unsigned int scount = shortCount;
+
+ const HBUINT8 *bytes = get_delta_bytes ();
+ const HBUINT8 *row = bytes + inner * (scount + count);
+
+ float delta = 0.;
+ unsigned int i = 0;
+
+ const HBINT16 *scursor = reinterpret_cast<const HBINT16 *> (row);
+ for (; i < scount; i++)
+ {
+ float scalar = regions.evaluate (regionIndices.arrayZ[i], coords, coord_count);
+ delta += scalar * *scursor++;
+ }
+ const HBINT8 *bcursor = reinterpret_cast<const HBINT8 *> (scursor);
+ for (; i < count; i++)
+ {
+ float scalar = regions.evaluate (regionIndices.arrayZ[i], coords, coord_count);
+ delta += scalar * *bcursor++;
+ }
+
+ return delta;
+ }
+
+ void get_scalars (const int *coords, unsigned int coord_count,
+ const VarRegionList &regions,
+ float *scalars /*OUT */,
+ unsigned int num_scalars) const
+ {
+ unsigned count = hb_min (num_scalars, regionIndices.len);
+ for (unsigned int i = 0; i < count; i++)
+ scalars[i] = regions.evaluate (regionIndices.arrayZ[i], coords, coord_count);
+ for (unsigned int i = count; i < num_scalars; i++)
+ scalars[i] = 0.f;
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ regionIndices.sanitize (c) &&
+ shortCount <= regionIndices.len &&
+ c->check_range (get_delta_bytes (),
+ itemCount,
+ get_row_size ()));
+ }
+
+ bool serialize (hb_serialize_context_t *c,
+ const VarData *src,
+ const hb_inc_bimap_t &inner_map,
+ const hb_bimap_t &region_map)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely (!c->extend_min (*this))) return_trace (false);
+ itemCount = inner_map.get_next_value ();
+
+ /* Optimize short count */
+ unsigned short ri_count = src->regionIndices.len;
+ enum delta_size_t { kZero=0, kByte, kShort };
+ hb_vector_t<delta_size_t> delta_sz;
+ hb_vector_t<unsigned int> ri_map; /* maps old index to new index */
+ delta_sz.resize (ri_count);
+ ri_map.resize (ri_count);
+ unsigned int new_short_count = 0;
+ unsigned int r;
+ for (r = 0; r < ri_count; r++)
+ {
+ delta_sz[r] = kZero;
+ for (unsigned int i = 0; i < inner_map.get_next_value (); i++)
+ {
+ unsigned int old = inner_map.backward (i);
+ int16_t delta = src->get_item_delta (old, r);
+ if (delta < -128 || 127 < delta)
+ {
+ delta_sz[r] = kShort;
+ new_short_count++;
+ break;
+ }
+ else if (delta != 0)
+ delta_sz[r] = kByte;
+ }
+ }
+ unsigned int short_index = 0;
+ unsigned int byte_index = new_short_count;
+ unsigned int new_ri_count = 0;
+ for (r = 0; r < ri_count; r++)
+ if (delta_sz[r])
+ {
+ ri_map[r] = (delta_sz[r] == kShort)? short_index++ : byte_index++;
+ new_ri_count++;
+ }
+
+ shortCount = new_short_count;
+ regionIndices.len = new_ri_count;
+
+ unsigned int size = regionIndices.get_size () - HBUINT16::static_size/*regionIndices.len*/ + (get_row_size () * itemCount);
+ if (unlikely (!c->allocate_size<HBUINT8> (size)))
+ return_trace (false);
+
+ for (r = 0; r < ri_count; r++)
+ if (delta_sz[r]) regionIndices[ri_map[r]] = region_map[src->regionIndices[r]];
+
+ for (unsigned int i = 0; i < itemCount; i++)
+ {
+ unsigned int old = inner_map.backward (i);
+ for (unsigned int r = 0; r < ri_count; r++)
+ if (delta_sz[r]) set_item_delta (i, ri_map[r], src->get_item_delta (old, r));
+ }
+
+ return_trace (true);
+ }
+
+ void collect_region_refs (hb_inc_bimap_t &region_map, const hb_inc_bimap_t &inner_map) const
+ {
+ for (unsigned int r = 0; r < regionIndices.len; r++)
+ {
+ unsigned int region = regionIndices[r];
+ if (region_map.has (region)) continue;
+ for (unsigned int i = 0; i < inner_map.get_next_value (); i++)
+ if (get_item_delta (inner_map.backward (i), r) != 0)
+ {
+ region_map.add (region);
+ break;
+ }
+ }
+ }
+
+ protected:
+ const HBUINT8 *get_delta_bytes () const
+ { return &StructAfter<HBUINT8> (regionIndices); }
+
+ HBUINT8 *get_delta_bytes ()
+ { return &StructAfter<HBUINT8> (regionIndices); }
+
+ int16_t get_item_delta (unsigned int item, unsigned int region) const
+ {
+ if ( item >= itemCount || unlikely (region >= regionIndices.len)) return 0;
+ const HBINT8 *p = (const HBINT8 *)get_delta_bytes () + item * get_row_size ();
+ if (region < shortCount)
+ return ((const HBINT16 *)p)[region];
+ else
+ return (p + HBINT16::static_size * shortCount)[region - shortCount];
+ }
+
+ void set_item_delta (unsigned int item, unsigned int region, int16_t delta)
+ {
+ HBINT8 *p = (HBINT8 *)get_delta_bytes () + item * get_row_size ();
+ if (region < shortCount)
+ ((HBINT16 *)p)[region] = delta;
+ else
+ (p + HBINT16::static_size * shortCount)[region - shortCount] = delta;
+ }
+
+ protected:
+ HBUINT16 itemCount;
+ HBUINT16 shortCount;
+ ArrayOf<HBUINT16> regionIndices;
+/*UnsizedArrayOf<HBUINT8>bytesX;*/
+ public:
+ DEFINE_SIZE_ARRAY (6, regionIndices);
+};
+
+struct VariationStore
+{
+ float get_delta (unsigned int outer, unsigned int inner,
+ const int *coords, unsigned int coord_count) const
+ {
+#ifdef HB_NO_VAR
+ return 0.f;
+#endif
+
+ if (unlikely (outer >= dataSets.len))
+ return 0.f;
+
+ return (this+dataSets[outer]).get_delta (inner,
+ coords, coord_count,
+ this+regions);
+ }
+
+ float get_delta (unsigned int index,
+ const int *coords, unsigned int coord_count) const
+ {
+ unsigned int outer = index >> 16;
+ unsigned int inner = index & 0xFFFF;
+ return get_delta (outer, inner, coords, coord_count);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+#ifdef HB_NO_VAR
+ return true;
+#endif
+
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ format == 1 &&
+ regions.sanitize (c, this) &&
+ dataSets.sanitize (c, this));
+ }
+
+ bool serialize (hb_serialize_context_t *c,
+ const VariationStore *src,
+ const hb_array_t <hb_inc_bimap_t> &inner_maps)
+ {
+ TRACE_SERIALIZE (this);
+ unsigned int set_count = 0;
+ for (unsigned int i = 0; i < inner_maps.length; i++)
+ if (inner_maps[i].get_population () > 0) set_count++;
+
+ unsigned int size = min_size + HBUINT32::static_size * set_count;
+ if (unlikely (!c->allocate_size<HBUINT32> (size))) return_trace (false);
+ format = 1;
+
+ hb_inc_bimap_t region_map;
+ for (unsigned int i = 0; i < inner_maps.length; i++)
+ (src+src->dataSets[i]).collect_region_refs (region_map, inner_maps[i]);
+ region_map.sort ();
+
+ if (unlikely (!regions.serialize (c, this)
+ .serialize (c, &(src+src->regions), region_map))) return_trace (false);
+
+ /* TODO: The following code could be simplified when
+ * OffsetListOf::subset () can take a custom param to be passed to VarData::serialize ()
+ */
+ dataSets.len = set_count;
+ unsigned int set_index = 0;
+ for (unsigned int i = 0; i < inner_maps.length; i++)
+ {
+ if (inner_maps[i].get_population () == 0) continue;
+ if (unlikely (!dataSets[set_index++].serialize (c, this)
+ .serialize (c, &(src+src->dataSets[i]), inner_maps[i], region_map)))
+ return_trace (false);
+ }
+
+ return_trace (true);
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+
+ VariationStore *varstore_prime = c->serializer->start_embed<VariationStore> ();
+ if (unlikely (!varstore_prime)) return_trace (false);
+
+ const hb_set_t *variation_indices = c->plan->layout_variation_indices;
+ if (variation_indices->is_empty ()) return_trace (false);
+
+ hb_vector_t<hb_inc_bimap_t> inner_maps;
+ inner_maps.resize ((unsigned) dataSets.len);
+ for (unsigned i = 0; i < inner_maps.length; i++)
+ inner_maps[i].init ();
+
+ for (unsigned idx : c->plan->layout_variation_indices->iter ())
+ {
+ uint16_t major = idx >> 16;
+ uint16_t minor = idx & 0xFFFF;
+
+ if (major >= inner_maps.length)
+ {
+ for (unsigned i = 0; i < inner_maps.length; i++)
+ inner_maps[i].fini ();
+ return_trace (false);
+ }
+ inner_maps[major].add (minor);
+ }
+ varstore_prime->serialize (c->serializer, this, inner_maps.as_array ());
+
+ for (unsigned i = 0; i < inner_maps.length; i++)
+ inner_maps[i].fini ();
+ return_trace (bool (varstore_prime->dataSets));
+ }
+
+ unsigned int get_region_index_count (unsigned int ivs) const
+ { return (this+dataSets[ivs]).get_region_index_count (); }
+
+ void get_scalars (unsigned int ivs,
+ const int *coords, unsigned int coord_count,
+ float *scalars /*OUT*/,
+ unsigned int num_scalars) const
+ {
+#ifdef HB_NO_VAR
+ for (unsigned i = 0; i < num_scalars; i++)
+ scalars[i] = 0.f;
+ return;
+#endif
+
+ (this+dataSets[ivs]).get_scalars (coords, coord_count, this+regions,
+ &scalars[0], num_scalars);
+ }
+
+ unsigned int get_sub_table_count () const { return dataSets.len; }
+
+ protected:
+ HBUINT16 format;
+ LOffsetTo<VarRegionList> regions;
+ LOffsetArrayOf<VarData> dataSets;
+ public:
+ DEFINE_SIZE_ARRAY (8, dataSets);
+};
+
+/*
+ * Feature Variations
+ */
+
+struct ConditionFormat1
+{
+ friend struct Condition;
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ auto *out = c->serializer->embed (this);
+ if (unlikely (!out)) return_trace (false);
+ return_trace (true);
+ }
+
+ private:
+ bool evaluate (const int *coords, unsigned int coord_len) const
+ {
+ int coord = axisIndex < coord_len ? coords[axisIndex] : 0;
+ return filterRangeMinValue <= coord && coord <= filterRangeMaxValue;
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 1 */
+ HBUINT16 axisIndex;
+ F2DOT14 filterRangeMinValue;
+ F2DOT14 filterRangeMaxValue;
+ public:
+ DEFINE_SIZE_STATIC (8);
+};
+
+struct Condition
+{
+ bool evaluate (const int *coords, unsigned int coord_len) const
+ {
+ switch (u.format) {
+ case 1: return u.format1.evaluate (coords, coord_len);
+ default:return false;
+ }
+ }
+
+ template <typename context_t, typename ...Ts>
+ typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const
+ {
+ TRACE_DISPATCH (this, u.format);
+ if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ());
+ switch (u.format) {
+ case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...));
+ default:return_trace (c->default_return_value ());
+ }
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (!u.format.sanitize (c)) return_trace (false);
+ switch (u.format) {
+ case 1: return_trace (u.format1.sanitize (c));
+ default:return_trace (true);
+ }
+ }
+
+ protected:
+ union {
+ HBUINT16 format; /* Format identifier */
+ ConditionFormat1 format1;
+ } u;
+ public:
+ DEFINE_SIZE_UNION (2, format);
+};
+
+struct ConditionSet
+{
+ bool evaluate (const int *coords, unsigned int coord_len) const
+ {
+ unsigned int count = conditions.len;
+ for (unsigned int i = 0; i < count; i++)
+ if (!(this+conditions.arrayZ[i]).evaluate (coords, coord_len))
+ return false;
+ return true;
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ auto *out = c->serializer->start_embed (this);
+ if (unlikely (!out || !c->serializer->extend_min (out))) return_trace (false);
+
+ + conditions.iter ()
+ | hb_apply (subset_offset_array (c, out->conditions, this))
+ ;
+ return_trace (true);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (conditions.sanitize (c, this));
+ }
+
+ protected:
+ LOffsetArrayOf<Condition> conditions;
+ public:
+ DEFINE_SIZE_ARRAY (2, conditions);
+};
+
+struct FeatureTableSubstitutionRecord
+{
+ friend struct FeatureTableSubstitution;
+
+ void collect_lookups (const void *base, hb_set_t *lookup_indexes /* OUT */) const
+ {
+ return (base+feature).add_lookup_indexes_to (lookup_indexes);
+ }
+
+ void closure_features (const void *base,
+ const hb_map_t *lookup_indexes,
+ hb_set_t *feature_indexes /* OUT */) const
+ {
+ if ((base+feature).intersects_lookup_indexes (lookup_indexes))
+ feature_indexes->add (featureIndex);
+ }
+
+ bool subset (hb_subset_layout_context_t *c, const void *base) const
+ {
+ TRACE_SUBSET (this);
+ auto *out = c->subset_context->serializer->embed (this);
+ if (unlikely (!out)) return_trace (false);
+
+ out->featureIndex = c->feature_index_map->get (featureIndex);
+ bool ret = out->feature.serialize_subset (c->subset_context, feature, base, c);
+ return_trace (ret);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) && feature.sanitize (c, base));
+ }
+
+ protected:
+ HBUINT16 featureIndex;
+ LOffsetTo<Feature> feature;
+ public:
+ DEFINE_SIZE_STATIC (6);
+};
+
+struct FeatureTableSubstitution
+{
+ const Feature *find_substitute (unsigned int feature_index) const
+ {
+ unsigned int count = substitutions.len;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ const FeatureTableSubstitutionRecord &record = substitutions.arrayZ[i];
+ if (record.featureIndex == feature_index)
+ return &(this+record.feature);
+ }
+ return nullptr;
+ }
+
+ void collect_lookups (const hb_set_t *feature_indexes,
+ hb_set_t *lookup_indexes /* OUT */) const
+ {
+ + hb_iter (substitutions)
+ | hb_filter (feature_indexes, &FeatureTableSubstitutionRecord::featureIndex)
+ | hb_apply ([this, lookup_indexes] (const FeatureTableSubstitutionRecord& r)
+ { r.collect_lookups (this, lookup_indexes); })
+ ;
+ }
+
+ void closure_features (const hb_map_t *lookup_indexes,
+ hb_set_t *feature_indexes /* OUT */) const
+ {
+ for (const FeatureTableSubstitutionRecord& record : substitutions)
+ record.closure_features (this, lookup_indexes, feature_indexes);
+ }
+
+ bool subset (hb_subset_context_t *c,
+ hb_subset_layout_context_t *l) const
+ {
+ TRACE_SUBSET (this);
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!out || !c->serializer->extend_min (out))) return_trace (false);
+
+ out->version.major = version.major;
+ out->version.minor = version.minor;
+
+ + substitutions.iter ()
+ | hb_apply (subset_record_array (l, &(out->substitutions), this))
+ ;
+ return_trace (true);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (version.sanitize (c) &&
+ likely (version.major == 1) &&
+ substitutions.sanitize (c, this));
+ }
+
+ protected:
+ FixedVersion<> version; /* Version--0x00010000u */
+ ArrayOf<FeatureTableSubstitutionRecord>
+ substitutions;
+ public:
+ DEFINE_SIZE_ARRAY (6, substitutions);
+};
+
+struct FeatureVariationRecord
+{
+ friend struct FeatureVariations;
+
+ void collect_lookups (const void *base,
+ const hb_set_t *feature_indexes,
+ hb_set_t *lookup_indexes /* OUT */) const
+ {
+ return (base+substitutions).collect_lookups (feature_indexes, lookup_indexes);
+ }
+
+ void closure_features (const void *base,
+ const hb_map_t *lookup_indexes,
+ hb_set_t *feature_indexes /* OUT */) const
+ {
+ (base+substitutions).closure_features (lookup_indexes, feature_indexes);
+ }
+
+ bool subset (hb_subset_layout_context_t *c, const void *base) const
+ {
+ TRACE_SUBSET (this);
+ auto *out = c->subset_context->serializer->embed (this);
+ if (unlikely (!out)) return_trace (false);
+
+ out->conditions.serialize_subset (c->subset_context, conditions, base);
+ out->substitutions.serialize_subset (c->subset_context, substitutions, base, c);
+
+ return_trace (true);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (conditions.sanitize (c, base) &&
+ substitutions.sanitize (c, base));
+ }
+
+ protected:
+ LOffsetTo<ConditionSet>
+ conditions;
+ LOffsetTo<FeatureTableSubstitution>
+ substitutions;
+ public:
+ DEFINE_SIZE_STATIC (8);
+};
+
+struct FeatureVariations
+{
+ static constexpr unsigned NOT_FOUND_INDEX = 0xFFFFFFFFu;
+
+ bool find_index (const int *coords, unsigned int coord_len,
+ unsigned int *index) const
+ {
+ unsigned int count = varRecords.len;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ const FeatureVariationRecord &record = varRecords.arrayZ[i];
+ if ((this+record.conditions).evaluate (coords, coord_len))
+ {
+ *index = i;
+ return true;
+ }
+ }
+ *index = NOT_FOUND_INDEX;
+ return false;
+ }
+
+ const Feature *find_substitute (unsigned int variations_index,
+ unsigned int feature_index) const
+ {
+ const FeatureVariationRecord &record = varRecords[variations_index];
+ return (this+record.substitutions).find_substitute (feature_index);
+ }
+
+ FeatureVariations* copy (hb_serialize_context_t *c) const
+ {
+ TRACE_SERIALIZE (this);
+ return_trace (c->embed (*this));
+ }
+
+ void collect_lookups (const hb_set_t *feature_indexes,
+ hb_set_t *lookup_indexes /* OUT */) const
+ {
+ for (const FeatureVariationRecord& r : varRecords)
+ r.collect_lookups (this, feature_indexes, lookup_indexes);
+ }
+
+ void closure_features (const hb_map_t *lookup_indexes,
+ hb_set_t *feature_indexes /* OUT */) const
+ {
+ for (const FeatureVariationRecord& record : varRecords)
+ record.closure_features (this, lookup_indexes, feature_indexes);
+ }
+
+ bool subset (hb_subset_context_t *c,
+ hb_subset_layout_context_t *l) const
+ {
+ TRACE_SUBSET (this);
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!out || !c->serializer->extend_min (out))) return_trace (false);
+
+ out->version.major = version.major;
+ out->version.minor = version.minor;
+
+ + varRecords.iter ()
+ | hb_apply (subset_record_array (l, &(out->varRecords), this))
+ ;
+ return_trace (bool (out->varRecords));
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (version.sanitize (c) &&
+ likely (version.major == 1) &&
+ varRecords.sanitize (c, this));
+ }
+
+ protected:
+ FixedVersion<> version; /* Version--0x00010000u */
+ LArrayOf<FeatureVariationRecord>
+ varRecords;
+ public:
+ DEFINE_SIZE_ARRAY_SIZED (8, varRecords);
+};
+
+
+/*
+ * Device Tables
+ */
+
+struct HintingDevice
+{
+ friend struct Device;
+
+ private:
+
+ hb_position_t get_x_delta (hb_font_t *font) const
+ { return get_delta (font->x_ppem, font->x_scale); }
+
+ hb_position_t get_y_delta (hb_font_t *font) const
+ { return get_delta (font->y_ppem, font->y_scale); }
+
+ public:
+
+ unsigned int get_size () const
+ {
+ unsigned int f = deltaFormat;
+ if (unlikely (f < 1 || f > 3 || startSize > endSize)) return 3 * HBUINT16::static_size;
+ return HBUINT16::static_size * (4 + ((endSize - startSize) >> (4 - f)));
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) && c->check_range (this, this->get_size ()));
+ }
+
+ HintingDevice* copy (hb_serialize_context_t *c) const
+ {
+ TRACE_SERIALIZE (this);
+ return_trace (c->embed<HintingDevice> (this));
+ }
+
+ private:
+
+ int get_delta (unsigned int ppem, int scale) const
+ {
+ if (!ppem) return 0;
+
+ int pixels = get_delta_pixels (ppem);
+
+ if (!pixels) return 0;
+
+ return (int) (pixels * (int64_t) scale / ppem);
+ }
+ int get_delta_pixels (unsigned int ppem_size) const
+ {
+ unsigned int f = deltaFormat;
+ if (unlikely (f < 1 || f > 3))
+ return 0;
+
+ if (ppem_size < startSize || ppem_size > endSize)
+ return 0;
+
+ unsigned int s = ppem_size - startSize;
+
+ unsigned int byte = deltaValueZ[s >> (4 - f)];
+ unsigned int bits = (byte >> (16 - (((s & ((1 << (4 - f)) - 1)) + 1) << f)));
+ unsigned int mask = (0xFFFFu >> (16 - (1 << f)));
+
+ int delta = bits & mask;
+
+ if ((unsigned int) delta >= ((mask + 1) >> 1))
+ delta -= mask + 1;
+
+ return delta;
+ }
+
+ protected:
+ HBUINT16 startSize; /* Smallest size to correct--in ppem */
+ HBUINT16 endSize; /* Largest size to correct--in ppem */
+ HBUINT16 deltaFormat; /* Format of DeltaValue array data: 1, 2, or 3
+ * 1 Signed 2-bit value, 8 values per uint16
+ * 2 Signed 4-bit value, 4 values per uint16
+ * 3 Signed 8-bit value, 2 values per uint16
+ */
+ UnsizedArrayOf<HBUINT16>
+ deltaValueZ; /* Array of compressed data */
+ public:
+ DEFINE_SIZE_ARRAY (6, deltaValueZ);
+};
+
+struct VariationDevice
+{
+ friend struct Device;
+
+ private:
+
+ hb_position_t get_x_delta (hb_font_t *font, const VariationStore &store) const
+ { return font->em_scalef_x (get_delta (font, store)); }
+
+ hb_position_t get_y_delta (hb_font_t *font, const VariationStore &store) const
+ { return font->em_scalef_y (get_delta (font, store)); }
+
+ VariationDevice* copy (hb_serialize_context_t *c, const hb_map_t *layout_variation_idx_map) const
+ {
+ TRACE_SERIALIZE (this);
+ auto snap = c->snapshot ();
+ auto *out = c->embed (this);
+ if (unlikely (!out)) return_trace (nullptr);
+ if (!layout_variation_idx_map || layout_variation_idx_map->is_empty ()) return_trace (out);
+
+ unsigned org_idx = (outerIndex << 16) + innerIndex;
+ if (!layout_variation_idx_map->has (org_idx))
+ {
+ c->revert (snap);
+ return_trace (nullptr);
+ }
+ unsigned new_idx = layout_variation_idx_map->get (org_idx);
+ out->outerIndex = new_idx >> 16;
+ out->innerIndex = new_idx & 0xFFFF;
+ return_trace (out);
+ }
+
+ void record_variation_index (hb_set_t *layout_variation_indices) const
+ {
+ unsigned var_idx = (outerIndex << 16) + innerIndex;
+ layout_variation_indices->add (var_idx);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ private:
+
+ float get_delta (hb_font_t *font, const VariationStore &store) const
+ {
+ return store.get_delta (outerIndex, innerIndex, font->coords, font->num_coords);
+ }
+
+ protected:
+ HBUINT16 outerIndex;
+ HBUINT16 innerIndex;
+ HBUINT16 deltaFormat; /* Format identifier for this table: 0x0x8000 */
+ public:
+ DEFINE_SIZE_STATIC (6);
+};
+
+struct DeviceHeader
+{
+ protected:
+ HBUINT16 reserved1;
+ HBUINT16 reserved2;
+ public:
+ HBUINT16 format; /* Format identifier */
+ public:
+ DEFINE_SIZE_STATIC (6);
+};
+
+struct Device
+{
+ hb_position_t get_x_delta (hb_font_t *font, const VariationStore &store=Null (VariationStore)) const
+ {
+ switch (u.b.format)
+ {
+#ifndef HB_NO_HINTING
+ case 1: case 2: case 3:
+ return u.hinting.get_x_delta (font);
+#endif
+#ifndef HB_NO_VAR
+ case 0x8000:
+ return u.variation.get_x_delta (font, store);
+#endif
+ default:
+ return 0;
+ }
+ }
+ hb_position_t get_y_delta (hb_font_t *font, const VariationStore &store=Null (VariationStore)) const
+ {
+ switch (u.b.format)
+ {
+ case 1: case 2: case 3:
+#ifndef HB_NO_HINTING
+ return u.hinting.get_y_delta (font);
+#endif
+#ifndef HB_NO_VAR
+ case 0x8000:
+ return u.variation.get_y_delta (font, store);
+#endif
+ default:
+ return 0;
+ }
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (!u.b.format.sanitize (c)) return_trace (false);
+ switch (u.b.format) {
+#ifndef HB_NO_HINTING
+ case 1: case 2: case 3:
+ return_trace (u.hinting.sanitize (c));
+#endif
+#ifndef HB_NO_VAR
+ case 0x8000:
+ return_trace (u.variation.sanitize (c));
+#endif
+ default:
+ return_trace (true);
+ }
+ }
+
+ Device* copy (hb_serialize_context_t *c, const hb_map_t *layout_variation_idx_map=nullptr) const
+ {
+ TRACE_SERIALIZE (this);
+ switch (u.b.format) {
+#ifndef HB_NO_HINTING
+ case 1:
+ case 2:
+ case 3:
+ return_trace (reinterpret_cast<Device *> (u.hinting.copy (c)));
+#endif
+#ifndef HB_NO_VAR
+ case 0x8000:
+ return_trace (reinterpret_cast<Device *> (u.variation.copy (c, layout_variation_idx_map)));
+#endif
+ default:
+ return_trace (nullptr);
+ }
+ }
+
+ void collect_variation_indices (hb_set_t *layout_variation_indices) const
+ {
+ switch (u.b.format) {
+#ifndef HB_NO_HINTING
+ case 1:
+ case 2:
+ case 3:
+ return;
+#endif
+#ifndef HB_NO_VAR
+ case 0x8000:
+ u.variation.record_variation_index (layout_variation_indices);
+ return;
+#endif
+ default:
+ return;
+ }
+ }
+
+ protected:
+ union {
+ DeviceHeader b;
+ HintingDevice hinting;
+#ifndef HB_NO_VAR
+ VariationDevice variation;
+#endif
+ } u;
+ public:
+ DEFINE_SIZE_UNION (6, b);
+};
+
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_LAYOUT_COMMON_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-layout-gdef-table.hh b/thirdparty/harfbuzz/src/hb-ot-layout-gdef-table.hh
new file mode 100644
index 0000000000..437e760f64
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-layout-gdef-table.hh
@@ -0,0 +1,725 @@
+/*
+ * Copyright © 2007,2008,2009 Red Hat, Inc.
+ * Copyright © 2010,2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_LAYOUT_GDEF_TABLE_HH
+#define HB_OT_LAYOUT_GDEF_TABLE_HH
+
+#include "hb-ot-layout-common.hh"
+
+#include "hb-font.hh"
+
+
+namespace OT {
+
+
+/*
+ * Attachment List Table
+ */
+
+/* Array of contour point indices--in increasing numerical order */
+struct AttachPoint : ArrayOf<HBUINT16>
+{
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!out)) return_trace (false);
+
+ return_trace (out->serialize (c->serializer, + iter ()));
+ }
+};
+
+struct AttachList
+{
+ unsigned int get_attach_points (hb_codepoint_t glyph_id,
+ unsigned int start_offset,
+ unsigned int *point_count /* IN/OUT */,
+ unsigned int *point_array /* OUT */) const
+ {
+ unsigned int index = (this+coverage).get_coverage (glyph_id);
+ if (index == NOT_COVERED)
+ {
+ if (point_count)
+ *point_count = 0;
+ return 0;
+ }
+
+ const AttachPoint &points = this+attachPoint[index];
+
+ if (point_count)
+ {
+ + points.sub_array (start_offset, point_count)
+ | hb_sink (hb_array (point_array, *point_count))
+ ;
+ }
+
+ return points.len;
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ const hb_set_t &glyphset = *c->plan->glyphset ();
+ const hb_map_t &glyph_map = *c->plan->glyph_map;
+
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!c->serializer->extend_min (out))) return_trace (false);
+
+ hb_sorted_vector_t<hb_codepoint_t> new_coverage;
+ + hb_zip (this+coverage, attachPoint)
+ | hb_filter (glyphset, hb_first)
+ | hb_filter (subset_offset_array (c, out->attachPoint, this), hb_second)
+ | hb_map (hb_first)
+ | hb_map (glyph_map)
+ | hb_sink (new_coverage)
+ ;
+ out->coverage.serialize (c->serializer, out)
+ .serialize (c->serializer, new_coverage.iter ());
+ return_trace (bool (new_coverage));
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (coverage.sanitize (c, this) && attachPoint.sanitize (c, this));
+ }
+
+ protected:
+ OffsetTo<Coverage>
+ coverage; /* Offset to Coverage table -- from
+ * beginning of AttachList table */
+ OffsetArrayOf<AttachPoint>
+ attachPoint; /* Array of AttachPoint tables
+ * in Coverage Index order */
+ public:
+ DEFINE_SIZE_ARRAY (4, attachPoint);
+};
+
+/*
+ * Ligature Caret Table
+ */
+
+struct CaretValueFormat1
+{
+ friend struct CaretValue;
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ auto *out = c->serializer->embed (this);
+ if (unlikely (!out)) return_trace (false);
+ return_trace (true);
+ }
+
+ private:
+ hb_position_t get_caret_value (hb_font_t *font, hb_direction_t direction) const
+ {
+ return HB_DIRECTION_IS_HORIZONTAL (direction) ? font->em_scale_x (coordinate) : font->em_scale_y (coordinate);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ protected:
+ HBUINT16 caretValueFormat; /* Format identifier--format = 1 */
+ FWORD coordinate; /* X or Y value, in design units */
+ public:
+ DEFINE_SIZE_STATIC (4);
+};
+
+struct CaretValueFormat2
+{
+ friend struct CaretValue;
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ auto *out = c->serializer->embed (this);
+ if (unlikely (!out)) return_trace (false);
+ return_trace (true);
+ }
+
+ private:
+ hb_position_t get_caret_value (hb_font_t *font, hb_direction_t direction, hb_codepoint_t glyph_id) const
+ {
+ hb_position_t x, y;
+ font->get_glyph_contour_point_for_origin (glyph_id, caretValuePoint, direction, &x, &y);
+ return HB_DIRECTION_IS_HORIZONTAL (direction) ? x : y;
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ protected:
+ HBUINT16 caretValueFormat; /* Format identifier--format = 2 */
+ HBUINT16 caretValuePoint; /* Contour point index on glyph */
+ public:
+ DEFINE_SIZE_STATIC (4);
+};
+
+struct CaretValueFormat3
+{
+ friend struct CaretValue;
+
+ hb_position_t get_caret_value (hb_font_t *font, hb_direction_t direction,
+ const VariationStore &var_store) const
+ {
+ return HB_DIRECTION_IS_HORIZONTAL (direction) ?
+ font->em_scale_x (coordinate) + (this+deviceTable).get_x_delta (font, var_store) :
+ font->em_scale_y (coordinate) + (this+deviceTable).get_y_delta (font, var_store);
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ auto *out = c->serializer->embed (this);
+ if (unlikely (!out)) return_trace (false);
+
+ return_trace (out->deviceTable.serialize_copy (c->serializer, deviceTable, this, c->serializer->to_bias (out),
+ hb_serialize_context_t::Head, c->plan->layout_variation_idx_map));
+ }
+
+ void collect_variation_indices (hb_set_t *layout_variation_indices) const
+ { (this+deviceTable).collect_variation_indices (layout_variation_indices); }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) && deviceTable.sanitize (c, this));
+ }
+
+ protected:
+ HBUINT16 caretValueFormat; /* Format identifier--format = 3 */
+ FWORD coordinate; /* X or Y value, in design units */
+ OffsetTo<Device>
+ deviceTable; /* Offset to Device table for X or Y
+ * value--from beginning of CaretValue
+ * table */
+ public:
+ DEFINE_SIZE_STATIC (6);
+};
+
+struct CaretValue
+{
+ hb_position_t get_caret_value (hb_font_t *font,
+ hb_direction_t direction,
+ hb_codepoint_t glyph_id,
+ const VariationStore &var_store) const
+ {
+ switch (u.format) {
+ case 1: return u.format1.get_caret_value (font, direction);
+ case 2: return u.format2.get_caret_value (font, direction, glyph_id);
+ case 3: return u.format3.get_caret_value (font, direction, var_store);
+ default:return 0;
+ }
+ }
+
+ template <typename context_t, typename ...Ts>
+ typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const
+ {
+ TRACE_DISPATCH (this, u.format);
+ if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ());
+ switch (u.format) {
+ case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...));
+ case 2: return_trace (c->dispatch (u.format2, hb_forward<Ts> (ds)...));
+ case 3: return_trace (c->dispatch (u.format3, hb_forward<Ts> (ds)...));
+ default:return_trace (c->default_return_value ());
+ }
+ }
+
+ void collect_variation_indices (hb_set_t *layout_variation_indices) const
+ {
+ switch (u.format) {
+ case 1:
+ case 2:
+ return;
+ case 3:
+ u.format3.collect_variation_indices (layout_variation_indices);
+ return;
+ default: return;
+ }
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (!u.format.sanitize (c)) return_trace (false);
+ switch (u.format) {
+ case 1: return_trace (u.format1.sanitize (c));
+ case 2: return_trace (u.format2.sanitize (c));
+ case 3: return_trace (u.format3.sanitize (c));
+ default:return_trace (true);
+ }
+ }
+
+ protected:
+ union {
+ HBUINT16 format; /* Format identifier */
+ CaretValueFormat1 format1;
+ CaretValueFormat2 format2;
+ CaretValueFormat3 format3;
+ } u;
+ public:
+ DEFINE_SIZE_UNION (2, format);
+};
+
+struct LigGlyph
+{
+ unsigned get_lig_carets (hb_font_t *font,
+ hb_direction_t direction,
+ hb_codepoint_t glyph_id,
+ const VariationStore &var_store,
+ unsigned start_offset,
+ unsigned *caret_count /* IN/OUT */,
+ hb_position_t *caret_array /* OUT */) const
+ {
+ if (caret_count)
+ {
+ + carets.sub_array (start_offset, caret_count)
+ | hb_map (hb_add (this))
+ | hb_map ([&] (const CaretValue &value) { return value.get_caret_value (font, direction, glyph_id, var_store); })
+ | hb_sink (hb_array (caret_array, *caret_count))
+ ;
+ }
+
+ return carets.len;
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!c->serializer->extend_min (out))) return_trace (false);
+
+ + hb_iter (carets)
+ | hb_apply (subset_offset_array (c, out->carets, this))
+ ;
+
+ return_trace (bool (out->carets));
+ }
+
+ void collect_variation_indices (hb_collect_variation_indices_context_t *c) const
+ {
+ for (const OffsetTo<CaretValue>& offset : carets.iter ())
+ (this+offset).collect_variation_indices (c->layout_variation_indices);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (carets.sanitize (c, this));
+ }
+
+ protected:
+ OffsetArrayOf<CaretValue>
+ carets; /* Offset array of CaretValue tables
+ * --from beginning of LigGlyph table
+ * --in increasing coordinate order */
+ public:
+ DEFINE_SIZE_ARRAY (2, carets);
+};
+
+struct LigCaretList
+{
+ unsigned int get_lig_carets (hb_font_t *font,
+ hb_direction_t direction,
+ hb_codepoint_t glyph_id,
+ const VariationStore &var_store,
+ unsigned int start_offset,
+ unsigned int *caret_count /* IN/OUT */,
+ hb_position_t *caret_array /* OUT */) const
+ {
+ unsigned int index = (this+coverage).get_coverage (glyph_id);
+ if (index == NOT_COVERED)
+ {
+ if (caret_count)
+ *caret_count = 0;
+ return 0;
+ }
+ const LigGlyph &lig_glyph = this+ligGlyph[index];
+ return lig_glyph.get_lig_carets (font, direction, glyph_id, var_store, start_offset, caret_count, caret_array);
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ const hb_set_t &glyphset = *c->plan->glyphset ();
+ const hb_map_t &glyph_map = *c->plan->glyph_map;
+
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!c->serializer->extend_min (out))) return_trace (false);
+
+ hb_sorted_vector_t<hb_codepoint_t> new_coverage;
+ + hb_zip (this+coverage, ligGlyph)
+ | hb_filter (glyphset, hb_first)
+ | hb_filter (subset_offset_array (c, out->ligGlyph, this), hb_second)
+ | hb_map (hb_first)
+ | hb_map (glyph_map)
+ | hb_sink (new_coverage)
+ ;
+ out->coverage.serialize (c->serializer, out)
+ .serialize (c->serializer, new_coverage.iter ());
+ return_trace (bool (new_coverage));
+ }
+
+ void collect_variation_indices (hb_collect_variation_indices_context_t *c) const
+ {
+ + hb_zip (this+coverage, ligGlyph)
+ | hb_filter (c->glyph_set, hb_first)
+ | hb_map (hb_second)
+ | hb_map (hb_add (this))
+ | hb_apply ([c] (const LigGlyph& _) { _.collect_variation_indices (c); })
+ ;
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (coverage.sanitize (c, this) && ligGlyph.sanitize (c, this));
+ }
+
+ protected:
+ OffsetTo<Coverage>
+ coverage; /* Offset to Coverage table--from
+ * beginning of LigCaretList table */
+ OffsetArrayOf<LigGlyph>
+ ligGlyph; /* Array of LigGlyph tables
+ * in Coverage Index order */
+ public:
+ DEFINE_SIZE_ARRAY (4, ligGlyph);
+};
+
+
+struct MarkGlyphSetsFormat1
+{
+ bool covers (unsigned int set_index, hb_codepoint_t glyph_id) const
+ { return (this+coverage[set_index]).get_coverage (glyph_id) != NOT_COVERED; }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!c->serializer->extend_min (out))) return_trace (false);
+ out->format = format;
+
+ bool ret = true;
+ for (const LOffsetTo<Coverage>& offset : coverage.iter ())
+ {
+ auto *o = out->coverage.serialize_append (c->serializer);
+ if (unlikely (!o))
+ {
+ ret = false;
+ break;
+ }
+
+ //not using o->serialize_subset (c, offset, this, out) here because
+ //OTS doesn't allow null offset.
+ //See issue: https://github.com/khaledhosny/ots/issues/172
+ c->serializer->push ();
+ c->dispatch (this+offset);
+ c->serializer->add_link (*o, c->serializer->pop_pack ());
+ }
+
+ return_trace (ret && out->coverage.len);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (coverage.sanitize (c, this));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 1 */
+ ArrayOf<LOffsetTo<Coverage>>
+ coverage; /* Array of long offsets to mark set
+ * coverage tables */
+ public:
+ DEFINE_SIZE_ARRAY (4, coverage);
+};
+
+struct MarkGlyphSets
+{
+ bool covers (unsigned int set_index, hb_codepoint_t glyph_id) const
+ {
+ switch (u.format) {
+ case 1: return u.format1.covers (set_index, glyph_id);
+ default:return false;
+ }
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ switch (u.format) {
+ case 1: return_trace (u.format1.subset (c));
+ default:return_trace (false);
+ }
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (!u.format.sanitize (c)) return_trace (false);
+ switch (u.format) {
+ case 1: return_trace (u.format1.sanitize (c));
+ default:return_trace (true);
+ }
+ }
+
+ protected:
+ union {
+ HBUINT16 format; /* Format identifier */
+ MarkGlyphSetsFormat1 format1;
+ } u;
+ public:
+ DEFINE_SIZE_UNION (2, format);
+};
+
+
+/*
+ * GDEF -- Glyph Definition
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/gdef
+ */
+
+
+struct GDEF
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_GDEF;
+
+ enum GlyphClasses {
+ UnclassifiedGlyph = 0,
+ BaseGlyph = 1,
+ LigatureGlyph = 2,
+ MarkGlyph = 3,
+ ComponentGlyph = 4
+ };
+
+ bool has_data () const { return version.to_int (); }
+ bool has_glyph_classes () const { return glyphClassDef != 0; }
+ unsigned int get_glyph_class (hb_codepoint_t glyph) const
+ { return (this+glyphClassDef).get_class (glyph); }
+ void get_glyphs_in_class (unsigned int klass, hb_set_t *glyphs) const
+ { (this+glyphClassDef).collect_class (glyphs, klass); }
+
+ bool has_mark_attachment_types () const { return markAttachClassDef != 0; }
+ unsigned int get_mark_attachment_type (hb_codepoint_t glyph) const
+ { return (this+markAttachClassDef).get_class (glyph); }
+
+ bool has_attach_points () const { return attachList != 0; }
+ unsigned int get_attach_points (hb_codepoint_t glyph_id,
+ unsigned int start_offset,
+ unsigned int *point_count /* IN/OUT */,
+ unsigned int *point_array /* OUT */) const
+ { return (this+attachList).get_attach_points (glyph_id, start_offset, point_count, point_array); }
+
+ bool has_lig_carets () const { return ligCaretList != 0; }
+ unsigned int get_lig_carets (hb_font_t *font,
+ hb_direction_t direction,
+ hb_codepoint_t glyph_id,
+ unsigned int start_offset,
+ unsigned int *caret_count /* IN/OUT */,
+ hb_position_t *caret_array /* OUT */) const
+ { return (this+ligCaretList).get_lig_carets (font,
+ direction, glyph_id, get_var_store(),
+ start_offset, caret_count, caret_array); }
+
+ bool has_mark_sets () const { return version.to_int () >= 0x00010002u && markGlyphSetsDef != 0; }
+ bool mark_set_covers (unsigned int set_index, hb_codepoint_t glyph_id) const
+ { return version.to_int () >= 0x00010002u && (this+markGlyphSetsDef).covers (set_index, glyph_id); }
+
+ bool has_var_store () const { return version.to_int () >= 0x00010003u && varStore != 0; }
+ const VariationStore &get_var_store () const
+ { return version.to_int () >= 0x00010003u ? this+varStore : Null (VariationStore); }
+
+ /* glyph_props is a 16-bit integer where the lower 8-bit have bits representing
+ * glyph class and other bits, and high 8-bit the mark attachment type (if any).
+ * Not to be confused with lookup_props which is very similar. */
+ unsigned int get_glyph_props (hb_codepoint_t glyph) const
+ {
+ unsigned int klass = get_glyph_class (glyph);
+
+ static_assert (((unsigned int) HB_OT_LAYOUT_GLYPH_PROPS_BASE_GLYPH == (unsigned int) LookupFlag::IgnoreBaseGlyphs), "");
+ static_assert (((unsigned int) HB_OT_LAYOUT_GLYPH_PROPS_LIGATURE == (unsigned int) LookupFlag::IgnoreLigatures), "");
+ static_assert (((unsigned int) HB_OT_LAYOUT_GLYPH_PROPS_MARK == (unsigned int) LookupFlag::IgnoreMarks), "");
+
+ switch (klass) {
+ default: return 0;
+ case BaseGlyph: return HB_OT_LAYOUT_GLYPH_PROPS_BASE_GLYPH;
+ case LigatureGlyph: return HB_OT_LAYOUT_GLYPH_PROPS_LIGATURE;
+ case MarkGlyph:
+ klass = get_mark_attachment_type (glyph);
+ return HB_OT_LAYOUT_GLYPH_PROPS_MARK | (klass << 8);
+ }
+ }
+
+ HB_INTERNAL bool is_blocklisted (hb_blob_t *blob,
+ hb_face_t *face) const;
+
+ struct accelerator_t
+ {
+ void init (hb_face_t *face)
+ {
+ this->table = hb_sanitize_context_t ().reference_table<GDEF> (face);
+ if (unlikely (this->table->is_blocklisted (this->table.get_blob (), face)))
+ {
+ hb_blob_destroy (this->table.get_blob ());
+ this->table = hb_blob_get_empty ();
+ }
+ }
+
+ void fini () { this->table.destroy (); }
+
+ hb_blob_ptr_t<GDEF> table;
+ };
+
+ unsigned int get_size () const
+ {
+ return min_size +
+ (version.to_int () >= 0x00010002u ? markGlyphSetsDef.static_size : 0) +
+ (version.to_int () >= 0x00010003u ? varStore.static_size : 0);
+ }
+
+ void collect_variation_indices (hb_collect_variation_indices_context_t *c) const
+ { (this+ligCaretList).collect_variation_indices (c); }
+
+ void remap_layout_variation_indices (const hb_set_t *layout_variation_indices,
+ hb_map_t *layout_variation_idx_map /* OUT */) const
+ {
+ if (version.to_int () < 0x00010003u || !varStore) return;
+ if (layout_variation_indices->is_empty ()) return;
+
+ unsigned new_major = 0, new_minor = 0;
+ unsigned last_major = (layout_variation_indices->get_min ()) >> 16;
+ for (unsigned idx : layout_variation_indices->iter ())
+ {
+ uint16_t major = idx >> 16;
+ if (major >= (this+varStore).get_sub_table_count ()) break;
+ if (major != last_major)
+ {
+ new_minor = 0;
+ ++new_major;
+ }
+
+ unsigned new_idx = (new_major << 16) + new_minor;
+ layout_variation_idx_map->set (idx, new_idx);
+ ++new_minor;
+ last_major = major;
+ }
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ auto *out = c->serializer->embed (*this);
+ if (unlikely (!out)) return_trace (false);
+
+ bool subset_glyphclassdef = out->glyphClassDef.serialize_subset (c, glyphClassDef, this);
+ bool subset_attachlist = out->attachList.serialize_subset (c, attachList, this);
+ bool subset_ligcaretlist = out->ligCaretList.serialize_subset (c, ligCaretList, this);
+ bool subset_markattachclassdef = out->markAttachClassDef.serialize_subset (c, markAttachClassDef, this);
+
+ bool subset_markglyphsetsdef = true;
+ if (version.to_int () >= 0x00010002u)
+ {
+ subset_markglyphsetsdef = out->markGlyphSetsDef.serialize_subset (c, markGlyphSetsDef, this);
+ if (!subset_markglyphsetsdef &&
+ version.to_int () == 0x00010002u)
+ out->version.minor = 0;
+ }
+
+ bool subset_varstore = true;
+ if (version.to_int () >= 0x00010003u)
+ {
+ subset_varstore = out->varStore.serialize_subset (c, varStore, this);
+ if (!subset_varstore && version.to_int () == 0x00010003u)
+ out->version.minor = 2;
+ }
+
+ return_trace (subset_glyphclassdef || subset_attachlist ||
+ subset_ligcaretlist || subset_markattachclassdef ||
+ (out->version.to_int () >= 0x00010002u && subset_markglyphsetsdef) ||
+ (out->version.to_int () >= 0x00010003u && subset_varstore));
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (version.sanitize (c) &&
+ likely (version.major == 1) &&
+ glyphClassDef.sanitize (c, this) &&
+ attachList.sanitize (c, this) &&
+ ligCaretList.sanitize (c, this) &&
+ markAttachClassDef.sanitize (c, this) &&
+ (version.to_int () < 0x00010002u || markGlyphSetsDef.sanitize (c, this)) &&
+ (version.to_int () < 0x00010003u || varStore.sanitize (c, this)));
+ }
+
+ protected:
+ FixedVersion<>version; /* Version of the GDEF table--currently
+ * 0x00010003u */
+ OffsetTo<ClassDef>
+ glyphClassDef; /* Offset to class definition table
+ * for glyph type--from beginning of
+ * GDEF header (may be Null) */
+ OffsetTo<AttachList>
+ attachList; /* Offset to list of glyphs with
+ * attachment points--from beginning
+ * of GDEF header (may be Null) */
+ OffsetTo<LigCaretList>
+ ligCaretList; /* Offset to list of positioning points
+ * for ligature carets--from beginning
+ * of GDEF header (may be Null) */
+ OffsetTo<ClassDef>
+ markAttachClassDef; /* Offset to class definition table for
+ * mark attachment type--from beginning
+ * of GDEF header (may be Null) */
+ OffsetTo<MarkGlyphSets>
+ markGlyphSetsDef; /* Offset to the table of mark set
+ * definitions--from beginning of GDEF
+ * header (may be NULL). Introduced
+ * in version 0x00010002. */
+ LOffsetTo<VariationStore>
+ varStore; /* Offset to the table of Item Variation
+ * Store--from beginning of GDEF
+ * header (may be NULL). Introduced
+ * in version 0x00010003. */
+ public:
+ DEFINE_SIZE_MIN (12);
+};
+
+struct GDEF_accelerator_t : GDEF::accelerator_t {};
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_LAYOUT_GDEF_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-layout-gpos-table.hh b/thirdparty/harfbuzz/src/hb-ot-layout-gpos-table.hh
new file mode 100644
index 0000000000..2217d298fb
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-layout-gpos-table.hh
@@ -0,0 +1,2740 @@
+/*
+ * Copyright © 2007,2008,2009,2010 Red Hat, Inc.
+ * Copyright © 2010,2012,2013 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_LAYOUT_GPOS_TABLE_HH
+#define HB_OT_LAYOUT_GPOS_TABLE_HH
+
+#include "hb-ot-layout-gsubgpos.hh"
+
+
+namespace OT {
+
+struct MarkArray;
+static void Markclass_closure_and_remap_indexes (const Coverage &mark_coverage,
+ const MarkArray &mark_array,
+ const hb_set_t &glyphset,
+ hb_map_t* klass_mapping /* INOUT */);
+
+/* buffer **position** var allocations */
+#define attach_chain() var.i16[0] /* glyph to which this attaches to, relative to current glyphs; negative for going back, positive for forward. */
+#define attach_type() var.u8[2] /* attachment type */
+/* Note! if attach_chain() is zero, the value of attach_type() is irrelevant. */
+
+enum attach_type_t {
+ ATTACH_TYPE_NONE = 0X00,
+
+ /* Each attachment should be either a mark or a cursive; can't be both. */
+ ATTACH_TYPE_MARK = 0X01,
+ ATTACH_TYPE_CURSIVE = 0X02,
+};
+
+
+/* Shared Tables: ValueRecord, Anchor Table, and MarkArray */
+
+typedef HBUINT16 Value;
+
+typedef UnsizedArrayOf<Value> ValueRecord;
+
+struct ValueFormat : HBUINT16
+{
+ enum Flags {
+ xPlacement = 0x0001u, /* Includes horizontal adjustment for placement */
+ yPlacement = 0x0002u, /* Includes vertical adjustment for placement */
+ xAdvance = 0x0004u, /* Includes horizontal adjustment for advance */
+ yAdvance = 0x0008u, /* Includes vertical adjustment for advance */
+ xPlaDevice = 0x0010u, /* Includes horizontal Device table for placement */
+ yPlaDevice = 0x0020u, /* Includes vertical Device table for placement */
+ xAdvDevice = 0x0040u, /* Includes horizontal Device table for advance */
+ yAdvDevice = 0x0080u, /* Includes vertical Device table for advance */
+ ignored = 0x0F00u, /* Was used in TrueType Open for MM fonts */
+ reserved = 0xF000u, /* For future use */
+
+ devices = 0x00F0u /* Mask for having any Device table */
+ };
+
+/* All fields are options. Only those available advance the value pointer. */
+#if 0
+ HBINT16 xPlacement; /* Horizontal adjustment for
+ * placement--in design units */
+ HBINT16 yPlacement; /* Vertical adjustment for
+ * placement--in design units */
+ HBINT16 xAdvance; /* Horizontal adjustment for
+ * advance--in design units (only used
+ * for horizontal writing) */
+ HBINT16 yAdvance; /* Vertical adjustment for advance--in
+ * design units (only used for vertical
+ * writing) */
+ OffsetTo<Device> xPlaDevice; /* Offset to Device table for
+ * horizontal placement--measured from
+ * beginning of PosTable (may be NULL) */
+ OffsetTo<Device> yPlaDevice; /* Offset to Device table for vertical
+ * placement--measured from beginning
+ * of PosTable (may be NULL) */
+ OffsetTo<Device> xAdvDevice; /* Offset to Device table for
+ * horizontal advance--measured from
+ * beginning of PosTable (may be NULL) */
+ OffsetTo<Device> yAdvDevice; /* Offset to Device table for vertical
+ * advance--measured from beginning of
+ * PosTable (may be NULL) */
+#endif
+
+ unsigned int get_len () const { return hb_popcount ((unsigned int) *this); }
+ unsigned int get_size () const { return get_len () * Value::static_size; }
+
+ bool apply_value (hb_ot_apply_context_t *c,
+ const void *base,
+ const Value *values,
+ hb_glyph_position_t &glyph_pos) const
+ {
+ bool ret = false;
+ unsigned int format = *this;
+ if (!format) return ret;
+
+ hb_font_t *font = c->font;
+ bool horizontal = HB_DIRECTION_IS_HORIZONTAL (c->direction);
+
+ if (format & xPlacement) glyph_pos.x_offset += font->em_scale_x (get_short (values++, &ret));
+ if (format & yPlacement) glyph_pos.y_offset += font->em_scale_y (get_short (values++, &ret));
+ if (format & xAdvance) {
+ if (likely (horizontal)) glyph_pos.x_advance += font->em_scale_x (get_short (values, &ret));
+ values++;
+ }
+ /* y_advance values grow downward but font-space grows upward, hence negation */
+ if (format & yAdvance) {
+ if (unlikely (!horizontal)) glyph_pos.y_advance -= font->em_scale_y (get_short (values, &ret));
+ values++;
+ }
+
+ if (!has_device ()) return ret;
+
+ bool use_x_device = font->x_ppem || font->num_coords;
+ bool use_y_device = font->y_ppem || font->num_coords;
+
+ if (!use_x_device && !use_y_device) return ret;
+
+ const VariationStore &store = c->var_store;
+
+ /* pixel -> fractional pixel */
+ if (format & xPlaDevice) {
+ if (use_x_device) glyph_pos.x_offset += (base + get_device (values, &ret)).get_x_delta (font, store);
+ values++;
+ }
+ if (format & yPlaDevice) {
+ if (use_y_device) glyph_pos.y_offset += (base + get_device (values, &ret)).get_y_delta (font, store);
+ values++;
+ }
+ if (format & xAdvDevice) {
+ if (horizontal && use_x_device) glyph_pos.x_advance += (base + get_device (values, &ret)).get_x_delta (font, store);
+ values++;
+ }
+ if (format & yAdvDevice) {
+ /* y_advance values grow downward but font-space grows upward, hence negation */
+ if (!horizontal && use_y_device) glyph_pos.y_advance -= (base + get_device (values, &ret)).get_y_delta (font, store);
+ values++;
+ }
+ return ret;
+ }
+
+ void serialize_copy (hb_serialize_context_t *c, const void *base,
+ const Value *values, const hb_map_t *layout_variation_idx_map) const
+ {
+ unsigned int format = *this;
+ if (!format) return;
+
+ if (format & xPlacement) c->copy (*values++);
+ if (format & yPlacement) c->copy (*values++);
+ if (format & xAdvance) c->copy (*values++);
+ if (format & yAdvance) c->copy (*values++);
+
+ if (format & xPlaDevice) copy_device (c, base, values++, layout_variation_idx_map);
+ if (format & yPlaDevice) copy_device (c, base, values++, layout_variation_idx_map);
+ if (format & xAdvDevice) copy_device (c, base, values++, layout_variation_idx_map);
+ if (format & yAdvDevice) copy_device (c, base, values++, layout_variation_idx_map);
+ }
+
+ void collect_variation_indices (hb_collect_variation_indices_context_t *c,
+ const void *base,
+ const hb_array_t<const Value>& values) const
+ {
+ unsigned format = *this;
+ unsigned i = 0;
+ if (format & xPlacement) i++;
+ if (format & yPlacement) i++;
+ if (format & xAdvance) i++;
+ if (format & yAdvance) i++;
+ if (format & xPlaDevice)
+ {
+ (base + get_device (&(values[i]))).collect_variation_indices (c->layout_variation_indices);
+ i++;
+ }
+
+ if (format & ValueFormat::yPlaDevice)
+ {
+ (base + get_device (&(values[i]))).collect_variation_indices (c->layout_variation_indices);
+ i++;
+ }
+
+ if (format & ValueFormat::xAdvDevice)
+ {
+
+ (base + get_device (&(values[i]))).collect_variation_indices (c->layout_variation_indices);
+ i++;
+ }
+
+ if (format & ValueFormat::yAdvDevice)
+ {
+
+ (base + get_device (&(values[i]))).collect_variation_indices (c->layout_variation_indices);
+ i++;
+ }
+ }
+
+ private:
+ bool sanitize_value_devices (hb_sanitize_context_t *c, const void *base, const Value *values) const
+ {
+ unsigned int format = *this;
+
+ if (format & xPlacement) values++;
+ if (format & yPlacement) values++;
+ if (format & xAdvance) values++;
+ if (format & yAdvance) values++;
+
+ if ((format & xPlaDevice) && !get_device (values++).sanitize (c, base)) return false;
+ if ((format & yPlaDevice) && !get_device (values++).sanitize (c, base)) return false;
+ if ((format & xAdvDevice) && !get_device (values++).sanitize (c, base)) return false;
+ if ((format & yAdvDevice) && !get_device (values++).sanitize (c, base)) return false;
+
+ return true;
+ }
+
+ static inline OffsetTo<Device>& get_device (Value* value)
+ {
+ return *static_cast<OffsetTo<Device> *> (value);
+ }
+ static inline const OffsetTo<Device>& get_device (const Value* value, bool *worked=nullptr)
+ {
+ if (worked) *worked |= bool (*value);
+ return *static_cast<const OffsetTo<Device> *> (value);
+ }
+
+ bool copy_device (hb_serialize_context_t *c, const void *base,
+ const Value *src_value, const hb_map_t *layout_variation_idx_map) const
+ {
+ Value *dst_value = c->copy (*src_value);
+
+ if (!dst_value) return false;
+ if (*dst_value == 0) return true;
+
+ *dst_value = 0;
+ c->push ();
+ if ((base + get_device (src_value)).copy (c, layout_variation_idx_map))
+ {
+ c->add_link (*dst_value, c->pop_pack ());
+ return true;
+ }
+ else
+ {
+ c->pop_discard ();
+ return false;
+ }
+ }
+
+ static inline const HBINT16& get_short (const Value* value, bool *worked=nullptr)
+ {
+ if (worked) *worked |= bool (*value);
+ return *reinterpret_cast<const HBINT16 *> (value);
+ }
+
+ public:
+
+ bool has_device () const
+ {
+ unsigned int format = *this;
+ return (format & devices) != 0;
+ }
+
+ bool sanitize_value (hb_sanitize_context_t *c, const void *base, const Value *values) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_range (values, get_size ()) && (!has_device () || sanitize_value_devices (c, base, values)));
+ }
+
+ bool sanitize_values (hb_sanitize_context_t *c, const void *base, const Value *values, unsigned int count) const
+ {
+ TRACE_SANITIZE (this);
+ unsigned int len = get_len ();
+
+ if (!c->check_range (values, count, get_size ())) return_trace (false);
+
+ if (!has_device ()) return_trace (true);
+
+ for (unsigned int i = 0; i < count; i++) {
+ if (!sanitize_value_devices (c, base, values))
+ return_trace (false);
+ values += len;
+ }
+
+ return_trace (true);
+ }
+
+ /* Just sanitize referenced Device tables. Doesn't check the values themselves. */
+ bool sanitize_values_stride_unsafe (hb_sanitize_context_t *c, const void *base, const Value *values, unsigned int count, unsigned int stride) const
+ {
+ TRACE_SANITIZE (this);
+
+ if (!has_device ()) return_trace (true);
+
+ for (unsigned int i = 0; i < count; i++) {
+ if (!sanitize_value_devices (c, base, values))
+ return_trace (false);
+ values += stride;
+ }
+
+ return_trace (true);
+ }
+};
+
+template<typename Iterator>
+static void SinglePos_serialize (hb_serialize_context_t *c,
+ const void *src,
+ Iterator it,
+ ValueFormat valFormat,
+ const hb_map_t *layout_variation_idx_map);
+
+
+struct AnchorFormat1
+{
+ void get_anchor (hb_ot_apply_context_t *c, hb_codepoint_t glyph_id HB_UNUSED,
+ float *x, float *y) const
+ {
+ hb_font_t *font = c->font;
+ *x = font->em_fscale_x (xCoordinate);
+ *y = font->em_fscale_y (yCoordinate);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ AnchorFormat1* copy (hb_serialize_context_t *c) const
+ {
+ TRACE_SERIALIZE (this);
+ return_trace (c->embed<AnchorFormat1> (this));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 1 */
+ FWORD xCoordinate; /* Horizontal value--in design units */
+ FWORD yCoordinate; /* Vertical value--in design units */
+ public:
+ DEFINE_SIZE_STATIC (6);
+};
+
+struct AnchorFormat2
+{
+ void get_anchor (hb_ot_apply_context_t *c, hb_codepoint_t glyph_id,
+ float *x, float *y) const
+ {
+ hb_font_t *font = c->font;
+
+#ifdef HB_NO_HINTING
+ *x = font->em_fscale_x (xCoordinate);
+ *y = font->em_fscale_y (yCoordinate);
+ return;
+#endif
+
+ unsigned int x_ppem = font->x_ppem;
+ unsigned int y_ppem = font->y_ppem;
+ hb_position_t cx = 0, cy = 0;
+ bool ret;
+
+ ret = (x_ppem || y_ppem) &&
+ font->get_glyph_contour_point_for_origin (glyph_id, anchorPoint, HB_DIRECTION_LTR, &cx, &cy);
+ *x = ret && x_ppem ? cx : font->em_fscale_x (xCoordinate);
+ *y = ret && y_ppem ? cy : font->em_fscale_y (yCoordinate);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ AnchorFormat2* copy (hb_serialize_context_t *c) const
+ {
+ TRACE_SERIALIZE (this);
+ return_trace (c->embed<AnchorFormat2> (this));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 2 */
+ FWORD xCoordinate; /* Horizontal value--in design units */
+ FWORD yCoordinate; /* Vertical value--in design units */
+ HBUINT16 anchorPoint; /* Index to glyph contour point */
+ public:
+ DEFINE_SIZE_STATIC (8);
+};
+
+struct AnchorFormat3
+{
+ void get_anchor (hb_ot_apply_context_t *c, hb_codepoint_t glyph_id HB_UNUSED,
+ float *x, float *y) const
+ {
+ hb_font_t *font = c->font;
+ *x = font->em_fscale_x (xCoordinate);
+ *y = font->em_fscale_y (yCoordinate);
+
+ if (font->x_ppem || font->num_coords)
+ *x += (this+xDeviceTable).get_x_delta (font, c->var_store);
+ if (font->y_ppem || font->num_coords)
+ *y += (this+yDeviceTable).get_y_delta (font, c->var_store);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) && xDeviceTable.sanitize (c, this) && yDeviceTable.sanitize (c, this));
+ }
+
+ AnchorFormat3* copy (hb_serialize_context_t *c,
+ const hb_map_t *layout_variation_idx_map) const
+ {
+ TRACE_SERIALIZE (this);
+ if (!layout_variation_idx_map) return_trace (nullptr);
+
+ auto *out = c->embed<AnchorFormat3> (this);
+ if (unlikely (!out)) return_trace (nullptr);
+
+ out->xDeviceTable.serialize_copy (c, xDeviceTable, this, 0, hb_serialize_context_t::Head, layout_variation_idx_map);
+ out->yDeviceTable.serialize_copy (c, yDeviceTable, this, 0, hb_serialize_context_t::Head, layout_variation_idx_map);
+ return_trace (out);
+ }
+
+ void collect_variation_indices (hb_collect_variation_indices_context_t *c) const
+ {
+ (this+xDeviceTable).collect_variation_indices (c->layout_variation_indices);
+ (this+yDeviceTable).collect_variation_indices (c->layout_variation_indices);
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 3 */
+ FWORD xCoordinate; /* Horizontal value--in design units */
+ FWORD yCoordinate; /* Vertical value--in design units */
+ OffsetTo<Device>
+ xDeviceTable; /* Offset to Device table for X
+ * coordinate-- from beginning of
+ * Anchor table (may be NULL) */
+ OffsetTo<Device>
+ yDeviceTable; /* Offset to Device table for Y
+ * coordinate-- from beginning of
+ * Anchor table (may be NULL) */
+ public:
+ DEFINE_SIZE_STATIC (10);
+};
+
+struct Anchor
+{
+ void get_anchor (hb_ot_apply_context_t *c, hb_codepoint_t glyph_id,
+ float *x, float *y) const
+ {
+ *x = *y = 0;
+ switch (u.format) {
+ case 1: u.format1.get_anchor (c, glyph_id, x, y); return;
+ case 2: u.format2.get_anchor (c, glyph_id, x, y); return;
+ case 3: u.format3.get_anchor (c, glyph_id, x, y); return;
+ default: return;
+ }
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (!u.format.sanitize (c)) return_trace (false);
+ switch (u.format) {
+ case 1: return_trace (u.format1.sanitize (c));
+ case 2: return_trace (u.format2.sanitize (c));
+ case 3: return_trace (u.format3.sanitize (c));
+ default:return_trace (true);
+ }
+ }
+
+ Anchor* copy (hb_serialize_context_t *c, const hb_map_t *layout_variation_idx_map) const
+ {
+ TRACE_SERIALIZE (this);
+ switch (u.format) {
+ case 1: return_trace (reinterpret_cast<Anchor *> (u.format1.copy (c)));
+ case 2: return_trace (reinterpret_cast<Anchor *> (u.format2.copy (c)));
+ case 3: return_trace (reinterpret_cast<Anchor *> (u.format3.copy (c, layout_variation_idx_map)));
+ default:return_trace (nullptr);
+ }
+ }
+
+ void collect_variation_indices (hb_collect_variation_indices_context_t *c) const
+ {
+ switch (u.format) {
+ case 1: case 2:
+ return;
+ case 3:
+ u.format3.collect_variation_indices (c);
+ return;
+ default: return;
+ }
+ }
+
+ protected:
+ union {
+ HBUINT16 format; /* Format identifier */
+ AnchorFormat1 format1;
+ AnchorFormat2 format2;
+ AnchorFormat3 format3;
+ } u;
+ public:
+ DEFINE_SIZE_UNION (2, format);
+};
+
+
+struct AnchorMatrix
+{
+ const Anchor& get_anchor (unsigned int row, unsigned int col,
+ unsigned int cols, bool *found) const
+ {
+ *found = false;
+ if (unlikely (row >= rows || col >= cols)) return Null (Anchor);
+ *found = !matrixZ[row * cols + col].is_null ();
+ return this+matrixZ[row * cols + col];
+ }
+
+ template <typename Iterator,
+ hb_requires (hb_is_iterator (Iterator))>
+ void collect_variation_indices (hb_collect_variation_indices_context_t *c,
+ Iterator index_iter) const
+ {
+ for (unsigned i : index_iter)
+ (this+matrixZ[i]).collect_variation_indices (c);
+ }
+
+ template <typename Iterator,
+ hb_requires (hb_is_iterator (Iterator))>
+ bool serialize (hb_serialize_context_t *c,
+ unsigned num_rows,
+ AnchorMatrix const *offset_matrix,
+ const hb_map_t *layout_variation_idx_map,
+ Iterator index_iter)
+ {
+ TRACE_SERIALIZE (this);
+ if (!index_iter) return_trace (false);
+ if (unlikely (!c->extend_min ((*this)))) return_trace (false);
+
+ this->rows = num_rows;
+ for (const unsigned i : index_iter)
+ {
+ auto *offset = c->embed (offset_matrix->matrixZ[i]);
+ if (!offset) return_trace (false);
+ offset->serialize_copy (c, offset_matrix->matrixZ[i],
+ offset_matrix, c->to_bias (this),
+ hb_serialize_context_t::Head,
+ layout_variation_idx_map);
+ }
+
+ return_trace (true);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c, unsigned int cols) const
+ {
+ TRACE_SANITIZE (this);
+ if (!c->check_struct (this)) return_trace (false);
+ if (unlikely (hb_unsigned_mul_overflows (rows, cols))) return_trace (false);
+ unsigned int count = rows * cols;
+ if (!c->check_array (matrixZ.arrayZ, count)) return_trace (false);
+ for (unsigned int i = 0; i < count; i++)
+ if (!matrixZ[i].sanitize (c, this)) return_trace (false);
+ return_trace (true);
+ }
+
+ HBUINT16 rows; /* Number of rows */
+ UnsizedArrayOf<OffsetTo<Anchor>>
+ matrixZ; /* Matrix of offsets to Anchor tables--
+ * from beginning of AnchorMatrix table */
+ public:
+ DEFINE_SIZE_ARRAY (2, matrixZ);
+};
+
+
+struct MarkRecord
+{
+ friend struct MarkArray;
+
+ unsigned get_class () const { return (unsigned) klass; }
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) && markAnchor.sanitize (c, base));
+ }
+
+ MarkRecord *copy (hb_serialize_context_t *c,
+ const void *src_base,
+ unsigned dst_bias,
+ const hb_map_t *klass_mapping,
+ const hb_map_t *layout_variation_idx_map) const
+ {
+ TRACE_SERIALIZE (this);
+ auto *out = c->embed (this);
+ if (unlikely (!out)) return_trace (nullptr);
+
+ out->klass = klass_mapping->get (klass);
+ out->markAnchor.serialize_copy (c, markAnchor, src_base, dst_bias, hb_serialize_context_t::Head, layout_variation_idx_map);
+ return_trace (out);
+ }
+
+ void collect_variation_indices (hb_collect_variation_indices_context_t *c,
+ const void *src_base) const
+ {
+ (src_base+markAnchor).collect_variation_indices (c);
+ }
+
+ protected:
+ HBUINT16 klass; /* Class defined for this mark */
+ OffsetTo<Anchor>
+ markAnchor; /* Offset to Anchor table--from
+ * beginning of MarkArray table */
+ public:
+ DEFINE_SIZE_STATIC (4);
+};
+
+struct MarkArray : ArrayOf<MarkRecord> /* Array of MarkRecords--in Coverage order */
+{
+ bool apply (hb_ot_apply_context_t *c,
+ unsigned int mark_index, unsigned int glyph_index,
+ const AnchorMatrix &anchors, unsigned int class_count,
+ unsigned int glyph_pos) const
+ {
+ TRACE_APPLY (this);
+ hb_buffer_t *buffer = c->buffer;
+ const MarkRecord &record = ArrayOf<MarkRecord>::operator[](mark_index);
+ unsigned int mark_class = record.klass;
+
+ const Anchor& mark_anchor = this + record.markAnchor;
+ bool found;
+ const Anchor& glyph_anchor = anchors.get_anchor (glyph_index, mark_class, class_count, &found);
+ /* If this subtable doesn't have an anchor for this base and this class,
+ * return false such that the subsequent subtables have a chance at it. */
+ if (unlikely (!found)) return_trace (false);
+
+ float mark_x, mark_y, base_x, base_y;
+
+ buffer->unsafe_to_break (glyph_pos, buffer->idx);
+ mark_anchor.get_anchor (c, buffer->cur().codepoint, &mark_x, &mark_y);
+ glyph_anchor.get_anchor (c, buffer->info[glyph_pos].codepoint, &base_x, &base_y);
+
+ hb_glyph_position_t &o = buffer->cur_pos();
+ o.x_offset = roundf (base_x - mark_x);
+ o.y_offset = roundf (base_y - mark_y);
+ o.attach_type() = ATTACH_TYPE_MARK;
+ o.attach_chain() = (int) glyph_pos - (int) buffer->idx;
+ buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT;
+
+ buffer->idx++;
+ return_trace (true);
+ }
+
+ template<typename Iterator,
+ hb_requires (hb_is_source_of (Iterator, MarkRecord))>
+ bool serialize (hb_serialize_context_t *c,
+ const hb_map_t *klass_mapping,
+ const hb_map_t *layout_variation_idx_map,
+ const void *base,
+ Iterator it)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely (!c->extend_min (*this))) return_trace (false);
+ if (unlikely (!c->check_assign (len, it.len ()))) return_trace (false);
+ c->copy_all (it, base, c->to_bias (this), klass_mapping, layout_variation_idx_map);
+ return_trace (true);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (ArrayOf<MarkRecord>::sanitize (c, this));
+ }
+};
+
+
+/* Lookups */
+
+struct SinglePosFormat1
+{
+ bool intersects (const hb_set_t *glyphs) const
+ { return (this+coverage).intersects (glyphs); }
+
+ void closure_lookups (hb_closure_lookups_context_t *c) const {}
+ void collect_variation_indices (hb_collect_variation_indices_context_t *c) const
+ {
+ if (!valueFormat.has_device ()) return;
+
+ auto it =
+ + hb_iter (this+coverage)
+ | hb_filter (c->glyph_set)
+ ;
+
+ if (!it) return;
+ valueFormat.collect_variation_indices (c, this, values.as_array (valueFormat.get_len ()));
+ }
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c) const
+ { if (unlikely (!(this+coverage).collect_coverage (c->input))) return; }
+
+ const Coverage &get_coverage () const { return this+coverage; }
+
+ bool apply (hb_ot_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+ hb_buffer_t *buffer = c->buffer;
+ unsigned int index = (this+coverage).get_coverage (buffer->cur().codepoint);
+ if (likely (index == NOT_COVERED)) return_trace (false);
+
+ valueFormat.apply_value (c, this, values, buffer->cur_pos());
+
+ buffer->idx++;
+ return_trace (true);
+ }
+
+ template<typename Iterator,
+ hb_requires (hb_is_iterator (Iterator))>
+ void serialize (hb_serialize_context_t *c,
+ const void *src,
+ Iterator it,
+ ValueFormat valFormat,
+ const hb_map_t *layout_variation_idx_map)
+ {
+ auto out = c->extend_min (*this);
+ if (unlikely (!out)) return;
+ if (unlikely (!c->check_assign (valueFormat, valFormat))) return;
+
+ + it
+ | hb_map (hb_second)
+ | hb_apply ([&] (hb_array_t<const Value> _)
+ { valFormat.serialize_copy (c, src, &_, layout_variation_idx_map); })
+ ;
+
+ auto glyphs =
+ + it
+ | hb_map_retains_sorting (hb_first)
+ ;
+
+ coverage.serialize (c, this).serialize (c, glyphs);
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ const hb_set_t &glyphset = *c->plan->glyphset ();
+ const hb_map_t &glyph_map = *c->plan->glyph_map;
+
+ auto it =
+ + hb_iter (this+coverage)
+ | hb_filter (glyphset)
+ | hb_map_retains_sorting (glyph_map)
+ | hb_zip (hb_repeat (values.as_array (valueFormat.get_len ())))
+ ;
+
+ bool ret = bool (it);
+ SinglePos_serialize (c->serializer, this, it, valueFormat, c->plan->layout_variation_idx_map);
+ return_trace (ret);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ coverage.sanitize (c, this) &&
+ valueFormat.sanitize_value (c, this, values));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 1 */
+ OffsetTo<Coverage>
+ coverage; /* Offset to Coverage table--from
+ * beginning of subtable */
+ ValueFormat valueFormat; /* Defines the types of data in the
+ * ValueRecord */
+ ValueRecord values; /* Defines positioning
+ * value(s)--applied to all glyphs in
+ * the Coverage table */
+ public:
+ DEFINE_SIZE_ARRAY (6, values);
+};
+
+struct SinglePosFormat2
+{
+ bool intersects (const hb_set_t *glyphs) const
+ { return (this+coverage).intersects (glyphs); }
+
+ void closure_lookups (hb_closure_lookups_context_t *c) const {}
+ void collect_variation_indices (hb_collect_variation_indices_context_t *c) const
+ {
+ if (!valueFormat.has_device ()) return;
+
+ auto it =
+ + hb_zip (this+coverage, hb_range ((unsigned) valueCount))
+ | hb_filter (c->glyph_set, hb_first)
+ ;
+
+ if (!it) return;
+
+ unsigned sub_length = valueFormat.get_len ();
+ const hb_array_t<const Value> values_array = values.as_array (valueCount * sub_length);
+
+ for (unsigned i : + it
+ | hb_map (hb_second))
+ valueFormat.collect_variation_indices (c, this, values_array.sub_array (i * sub_length, sub_length));
+
+ }
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c) const
+ { if (unlikely (!(this+coverage).collect_coverage (c->input))) return; }
+
+ const Coverage &get_coverage () const { return this+coverage; }
+
+ bool apply (hb_ot_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+ hb_buffer_t *buffer = c->buffer;
+ unsigned int index = (this+coverage).get_coverage (buffer->cur().codepoint);
+ if (likely (index == NOT_COVERED)) return_trace (false);
+
+ if (likely (index >= valueCount)) return_trace (false);
+
+ valueFormat.apply_value (c, this,
+ &values[index * valueFormat.get_len ()],
+ buffer->cur_pos());
+
+ buffer->idx++;
+ return_trace (true);
+ }
+
+ template<typename Iterator,
+ hb_requires (hb_is_iterator (Iterator))>
+ void serialize (hb_serialize_context_t *c,
+ const void *src,
+ Iterator it,
+ ValueFormat valFormat,
+ const hb_map_t *layout_variation_idx_map)
+ {
+ auto out = c->extend_min (*this);
+ if (unlikely (!out)) return;
+ if (unlikely (!c->check_assign (valueFormat, valFormat))) return;
+ if (unlikely (!c->check_assign (valueCount, it.len ()))) return;
+
+ + it
+ | hb_map (hb_second)
+ | hb_apply ([&] (hb_array_t<const Value> _)
+ { valFormat.serialize_copy (c, src, &_, layout_variation_idx_map); })
+ ;
+
+ auto glyphs =
+ + it
+ | hb_map_retains_sorting (hb_first)
+ ;
+
+ coverage.serialize (c, this).serialize (c, glyphs);
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ const hb_set_t &glyphset = *c->plan->glyphset ();
+ const hb_map_t &glyph_map = *c->plan->glyph_map;
+
+ unsigned sub_length = valueFormat.get_len ();
+ auto values_array = values.as_array (valueCount * sub_length);
+
+ auto it =
+ + hb_zip (this+coverage, hb_range ((unsigned) valueCount))
+ | hb_filter (glyphset, hb_first)
+ | hb_map_retains_sorting ([&] (const hb_pair_t<hb_codepoint_t, unsigned>& _)
+ {
+ return hb_pair (glyph_map[_.first],
+ values_array.sub_array (_.second * sub_length,
+ sub_length));
+ })
+ ;
+
+ bool ret = bool (it);
+ SinglePos_serialize (c->serializer, this, it, valueFormat, c->plan->layout_variation_idx_map);
+ return_trace (ret);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ coverage.sanitize (c, this) &&
+ valueFormat.sanitize_values (c, this, values, valueCount));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 2 */
+ OffsetTo<Coverage>
+ coverage; /* Offset to Coverage table--from
+ * beginning of subtable */
+ ValueFormat valueFormat; /* Defines the types of data in the
+ * ValueRecord */
+ HBUINT16 valueCount; /* Number of ValueRecords */
+ ValueRecord values; /* Array of ValueRecords--positioning
+ * values applied to glyphs */
+ public:
+ DEFINE_SIZE_ARRAY (8, values);
+};
+
+struct SinglePos
+{
+ template<typename Iterator,
+ hb_requires (hb_is_iterator (Iterator))>
+ unsigned get_format (Iterator glyph_val_iter_pairs)
+ {
+ hb_array_t<const Value> first_val_iter = hb_second (*glyph_val_iter_pairs);
+
+ for (const auto iter : glyph_val_iter_pairs)
+ for (const auto _ : hb_zip (iter.second, first_val_iter))
+ if (_.first != _.second)
+ return 2;
+
+ return 1;
+ }
+
+
+ template<typename Iterator,
+ hb_requires (hb_is_iterator (Iterator))>
+ void serialize (hb_serialize_context_t *c,
+ const void *src,
+ Iterator glyph_val_iter_pairs,
+ ValueFormat valFormat,
+ const hb_map_t *layout_variation_idx_map)
+ {
+ if (unlikely (!c->extend_min (u.format))) return;
+ unsigned format = 2;
+
+ if (glyph_val_iter_pairs) format = get_format (glyph_val_iter_pairs);
+
+ u.format = format;
+ switch (u.format) {
+ case 1: u.format1.serialize (c, src, glyph_val_iter_pairs, valFormat, layout_variation_idx_map);
+ return;
+ case 2: u.format2.serialize (c, src, glyph_val_iter_pairs, valFormat, layout_variation_idx_map);
+ return;
+ default:return;
+ }
+ }
+
+ template <typename context_t, typename ...Ts>
+ typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const
+ {
+ TRACE_DISPATCH (this, u.format);
+ if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ());
+ switch (u.format) {
+ case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...));
+ case 2: return_trace (c->dispatch (u.format2, hb_forward<Ts> (ds)...));
+ default:return_trace (c->default_return_value ());
+ }
+ }
+
+ protected:
+ union {
+ HBUINT16 format; /* Format identifier */
+ SinglePosFormat1 format1;
+ SinglePosFormat2 format2;
+ } u;
+};
+
+template<typename Iterator>
+static void
+SinglePos_serialize (hb_serialize_context_t *c,
+ const void *src,
+ Iterator it,
+ ValueFormat valFormat,
+ const hb_map_t *layout_variation_idx_map)
+{ c->start_embed<SinglePos> ()->serialize (c, src, it, valFormat, layout_variation_idx_map); }
+
+
+struct PairValueRecord
+{
+ friend struct PairSet;
+
+ int cmp (hb_codepoint_t k) const
+ { return secondGlyph.cmp (k); }
+
+ struct serialize_closure_t
+ {
+ const void *base;
+ const ValueFormat *valueFormats;
+ unsigned len1; /* valueFormats[0].get_len() */
+ const hb_map_t *glyph_map;
+ const hb_map_t *layout_variation_idx_map;
+ };
+
+ bool serialize (hb_serialize_context_t *c,
+ serialize_closure_t *closure) const
+ {
+ TRACE_SERIALIZE (this);
+ auto *out = c->start_embed (*this);
+ if (unlikely (!c->extend_min (out))) return_trace (false);
+
+ out->secondGlyph = (*closure->glyph_map)[secondGlyph];
+
+ closure->valueFormats[0].serialize_copy (c, closure->base, &values[0], closure->layout_variation_idx_map);
+ closure->valueFormats[1].serialize_copy (c, closure->base, &values[closure->len1], closure->layout_variation_idx_map);
+
+ return_trace (true);
+ }
+
+ void collect_variation_indices (hb_collect_variation_indices_context_t *c,
+ const ValueFormat *valueFormats,
+ const void *base) const
+ {
+ unsigned record1_len = valueFormats[0].get_len ();
+ unsigned record2_len = valueFormats[1].get_len ();
+ const hb_array_t<const Value> values_array = values.as_array (record1_len + record2_len);
+
+ if (valueFormats[0].has_device ())
+ valueFormats[0].collect_variation_indices (c, base, values_array.sub_array (0, record1_len));
+
+ if (valueFormats[1].has_device ())
+ valueFormats[1].collect_variation_indices (c, base, values_array.sub_array (record1_len, record2_len));
+ }
+
+ protected:
+ HBGlyphID secondGlyph; /* GlyphID of second glyph in the
+ * pair--first glyph is listed in the
+ * Coverage table */
+ ValueRecord values; /* Positioning data for the first glyph
+ * followed by for second glyph */
+ public:
+ DEFINE_SIZE_ARRAY (2, values);
+};
+
+struct PairSet
+{
+ friend struct PairPosFormat1;
+
+ bool intersects (const hb_set_t *glyphs,
+ const ValueFormat *valueFormats) const
+ {
+ unsigned int len1 = valueFormats[0].get_len ();
+ unsigned int len2 = valueFormats[1].get_len ();
+ unsigned int record_size = HBUINT16::static_size * (1 + len1 + len2);
+
+ const PairValueRecord *record = &firstPairValueRecord;
+ unsigned int count = len;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ if (glyphs->has (record->secondGlyph))
+ return true;
+ record = &StructAtOffset<const PairValueRecord> (record, record_size);
+ }
+ return false;
+ }
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c,
+ const ValueFormat *valueFormats) const
+ {
+ unsigned int len1 = valueFormats[0].get_len ();
+ unsigned int len2 = valueFormats[1].get_len ();
+ unsigned int record_size = HBUINT16::static_size * (1 + len1 + len2);
+
+ const PairValueRecord *record = &firstPairValueRecord;
+ c->input->add_array (&record->secondGlyph, len, record_size);
+ }
+
+ void collect_variation_indices (hb_collect_variation_indices_context_t *c,
+ const ValueFormat *valueFormats) const
+ {
+ unsigned len1 = valueFormats[0].get_len ();
+ unsigned len2 = valueFormats[1].get_len ();
+ unsigned record_size = HBUINT16::static_size * (1 + len1 + len2);
+
+ const PairValueRecord *record = &firstPairValueRecord;
+ unsigned count = len;
+ for (unsigned i = 0; i < count; i++)
+ {
+ if (c->glyph_set->has (record->secondGlyph))
+ { record->collect_variation_indices (c, valueFormats, this); }
+
+ record = &StructAtOffset<const PairValueRecord> (record, record_size);
+ }
+ }
+
+ bool apply (hb_ot_apply_context_t *c,
+ const ValueFormat *valueFormats,
+ unsigned int pos) const
+ {
+ TRACE_APPLY (this);
+ hb_buffer_t *buffer = c->buffer;
+ unsigned int len1 = valueFormats[0].get_len ();
+ unsigned int len2 = valueFormats[1].get_len ();
+ unsigned int record_size = HBUINT16::static_size * (1 + len1 + len2);
+
+ const PairValueRecord *record = hb_bsearch (buffer->info[pos].codepoint,
+ &firstPairValueRecord,
+ len,
+ record_size);
+ if (record)
+ {
+ /* Note the intentional use of "|" instead of short-circuit "||". */
+ if (valueFormats[0].apply_value (c, this, &record->values[0], buffer->cur_pos()) |
+ valueFormats[1].apply_value (c, this, &record->values[len1], buffer->pos[pos]))
+ buffer->unsafe_to_break (buffer->idx, pos + 1);
+ if (len2)
+ pos++;
+ buffer->idx = pos;
+ return_trace (true);
+ }
+ return_trace (false);
+ }
+
+ bool subset (hb_subset_context_t *c,
+ const ValueFormat valueFormats[2]) const
+ {
+ TRACE_SUBSET (this);
+ auto snap = c->serializer->snapshot ();
+
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!c->serializer->extend_min (out))) return_trace (false);
+ out->len = 0;
+
+ const hb_set_t &glyphset = *c->plan->glyphset ();
+ const hb_map_t &glyph_map = *c->plan->glyph_map;
+
+ unsigned len1 = valueFormats[0].get_len ();
+ unsigned len2 = valueFormats[1].get_len ();
+ unsigned record_size = HBUINT16::static_size + Value::static_size * (len1 + len2);
+
+ PairValueRecord::serialize_closure_t closure =
+ {
+ this,
+ valueFormats,
+ len1,
+ &glyph_map,
+ c->plan->layout_variation_idx_map
+ };
+
+ const PairValueRecord *record = &firstPairValueRecord;
+ unsigned count = len, num = 0;
+ for (unsigned i = 0; i < count; i++)
+ {
+ if (glyphset.has (record->secondGlyph)
+ && record->serialize (c->serializer, &closure)) num++;
+ record = &StructAtOffset<const PairValueRecord> (record, record_size);
+ }
+
+ out->len = num;
+ if (!num) c->serializer->revert (snap);
+ return_trace (num);
+ }
+
+ struct sanitize_closure_t
+ {
+ const ValueFormat *valueFormats;
+ unsigned int len1; /* valueFormats[0].get_len() */
+ unsigned int stride; /* 1 + len1 + len2 */
+ };
+
+ bool sanitize (hb_sanitize_context_t *c, const sanitize_closure_t *closure) const
+ {
+ TRACE_SANITIZE (this);
+ if (!(c->check_struct (this)
+ && c->check_range (&firstPairValueRecord,
+ len,
+ HBUINT16::static_size,
+ closure->stride))) return_trace (false);
+
+ unsigned int count = len;
+ const PairValueRecord *record = &firstPairValueRecord;
+ return_trace (closure->valueFormats[0].sanitize_values_stride_unsafe (c, this, &record->values[0], count, closure->stride) &&
+ closure->valueFormats[1].sanitize_values_stride_unsafe (c, this, &record->values[closure->len1], count, closure->stride));
+ }
+
+ protected:
+ HBUINT16 len; /* Number of PairValueRecords */
+ PairValueRecord firstPairValueRecord;
+ /* Array of PairValueRecords--ordered
+ * by GlyphID of the second glyph */
+ public:
+ DEFINE_SIZE_MIN (2);
+};
+
+struct PairPosFormat1
+{
+ bool intersects (const hb_set_t *glyphs) const
+ {
+ return
+ + hb_zip (this+coverage, pairSet)
+ | hb_filter (*glyphs, hb_first)
+ | hb_map (hb_second)
+ | hb_map ([glyphs, this] (const OffsetTo<PairSet> &_)
+ { return (this+_).intersects (glyphs, valueFormat); })
+ | hb_any
+ ;
+ }
+
+ void closure_lookups (hb_closure_lookups_context_t *c) const {}
+ void collect_variation_indices (hb_collect_variation_indices_context_t *c) const
+ {
+ if ((!valueFormat[0].has_device ()) && (!valueFormat[1].has_device ())) return;
+
+ auto it =
+ + hb_zip (this+coverage, pairSet)
+ | hb_filter (c->glyph_set, hb_first)
+ | hb_map (hb_second)
+ ;
+
+ if (!it) return;
+ + it
+ | hb_map (hb_add (this))
+ | hb_apply ([&] (const PairSet& _) { _.collect_variation_indices (c, valueFormat); })
+ ;
+ }
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c) const
+ {
+ if (unlikely (!(this+coverage).collect_coverage (c->input))) return;
+ unsigned int count = pairSet.len;
+ for (unsigned int i = 0; i < count; i++)
+ (this+pairSet[i]).collect_glyphs (c, valueFormat);
+ }
+
+ const Coverage &get_coverage () const { return this+coverage; }
+
+ bool apply (hb_ot_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+ hb_buffer_t *buffer = c->buffer;
+ unsigned int index = (this+coverage).get_coverage (buffer->cur().codepoint);
+ if (likely (index == NOT_COVERED)) return_trace (false);
+
+ hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input;
+ skippy_iter.reset (buffer->idx, 1);
+ if (!skippy_iter.next ()) return_trace (false);
+
+ return_trace ((this+pairSet[index]).apply (c, valueFormat, skippy_iter.idx));
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+
+ const hb_set_t &glyphset = *c->plan->glyphset ();
+ const hb_map_t &glyph_map = *c->plan->glyph_map;
+
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!c->serializer->extend_min (out))) return_trace (false);
+ out->format = format;
+ out->valueFormat[0] = valueFormat[0];
+ out->valueFormat[1] = valueFormat[1];
+
+ hb_sorted_vector_t<hb_codepoint_t> new_coverage;
+
+ + hb_zip (this+coverage, pairSet)
+ | hb_filter (glyphset, hb_first)
+ | hb_filter ([this, c, out] (const OffsetTo<PairSet>& _)
+ {
+ auto *o = out->pairSet.serialize_append (c->serializer);
+ if (unlikely (!o)) return false;
+ auto snap = c->serializer->snapshot ();
+ bool ret = o->serialize_subset (c, _, this, valueFormat);
+ if (!ret)
+ {
+ out->pairSet.pop ();
+ c->serializer->revert (snap);
+ }
+ return ret;
+ },
+ hb_second)
+ | hb_map (hb_first)
+ | hb_map (glyph_map)
+ | hb_sink (new_coverage)
+ ;
+
+ out->coverage.serialize (c->serializer, out)
+ .serialize (c->serializer, new_coverage.iter ());
+
+ return_trace (bool (new_coverage));
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+
+ if (!c->check_struct (this)) return_trace (false);
+
+ unsigned int len1 = valueFormat[0].get_len ();
+ unsigned int len2 = valueFormat[1].get_len ();
+ PairSet::sanitize_closure_t closure =
+ {
+ valueFormat,
+ len1,
+ 1 + len1 + len2
+ };
+
+ return_trace (coverage.sanitize (c, this) && pairSet.sanitize (c, this, &closure));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 1 */
+ OffsetTo<Coverage>
+ coverage; /* Offset to Coverage table--from
+ * beginning of subtable */
+ ValueFormat valueFormat[2]; /* [0] Defines the types of data in
+ * ValueRecord1--for the first glyph
+ * in the pair--may be zero (0) */
+ /* [1] Defines the types of data in
+ * ValueRecord2--for the second glyph
+ * in the pair--may be zero (0) */
+ OffsetArrayOf<PairSet>
+ pairSet; /* Array of PairSet tables
+ * ordered by Coverage Index */
+ public:
+ DEFINE_SIZE_ARRAY (10, pairSet);
+};
+
+struct PairPosFormat2
+{
+ bool intersects (const hb_set_t *glyphs) const
+ {
+ return (this+coverage).intersects (glyphs) &&
+ (this+classDef2).intersects (glyphs);
+ }
+
+ void closure_lookups (hb_closure_lookups_context_t *c) const {}
+ void collect_variation_indices (hb_collect_variation_indices_context_t *c) const
+ {
+ if ((!valueFormat1.has_device ()) && (!valueFormat2.has_device ())) return;
+
+ hb_set_t class1_set, class2_set;
+ for (const unsigned cp : c->glyph_set->iter ())
+ {
+ unsigned klass1 = (this+classDef1).get (cp);
+ unsigned klass2 = (this+classDef2).get (cp);
+ class1_set.add (klass1);
+ class2_set.add (klass2);
+ }
+
+ if (class1_set.is_empty () || class2_set.is_empty ()) return;
+
+ unsigned len1 = valueFormat1.get_len ();
+ unsigned len2 = valueFormat2.get_len ();
+ const hb_array_t<const Value> values_array = values.as_array ((unsigned)class1Count * (unsigned) class2Count * (len1 + len2));
+ for (const unsigned class1_idx : class1_set.iter ())
+ {
+ for (const unsigned class2_idx : class2_set.iter ())
+ {
+ unsigned start_offset = (class1_idx * (unsigned) class2Count + class2_idx) * (len1 + len2);
+ if (valueFormat1.has_device ())
+ valueFormat1.collect_variation_indices (c, this, values_array.sub_array (start_offset, len1));
+
+ if (valueFormat2.has_device ())
+ valueFormat2.collect_variation_indices (c, this, values_array.sub_array (start_offset+len1, len2));
+ }
+ }
+ }
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c) const
+ {
+ if (unlikely (!(this+coverage).collect_coverage (c->input))) return;
+ if (unlikely (!(this+classDef2).collect_coverage (c->input))) return;
+ }
+
+ const Coverage &get_coverage () const { return this+coverage; }
+
+ bool apply (hb_ot_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+ hb_buffer_t *buffer = c->buffer;
+ unsigned int index = (this+coverage).get_coverage (buffer->cur().codepoint);
+ if (likely (index == NOT_COVERED)) return_trace (false);
+
+ hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input;
+ skippy_iter.reset (buffer->idx, 1);
+ if (!skippy_iter.next ()) return_trace (false);
+
+ unsigned int len1 = valueFormat1.get_len ();
+ unsigned int len2 = valueFormat2.get_len ();
+ unsigned int record_len = len1 + len2;
+
+ unsigned int klass1 = (this+classDef1).get_class (buffer->cur().codepoint);
+ unsigned int klass2 = (this+classDef2).get_class (buffer->info[skippy_iter.idx].codepoint);
+ if (unlikely (klass1 >= class1Count || klass2 >= class2Count)) return_trace (false);
+
+ const Value *v = &values[record_len * (klass1 * class2Count + klass2)];
+ /* Note the intentional use of "|" instead of short-circuit "||". */
+ if (valueFormat1.apply_value (c, this, v, buffer->cur_pos()) |
+ valueFormat2.apply_value (c, this, v + len1, buffer->pos[skippy_iter.idx]))
+ buffer->unsafe_to_break (buffer->idx, skippy_iter.idx + 1);
+
+ buffer->idx = skippy_iter.idx;
+ if (len2)
+ buffer->idx++;
+
+ return_trace (true);
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!c->serializer->extend_min (out))) return_trace (false);
+ out->format = format;
+ out->valueFormat1 = valueFormat1;
+ out->valueFormat2 = valueFormat2;
+
+ hb_map_t klass1_map;
+ out->classDef1.serialize_subset (c, classDef1, this, &klass1_map);
+ out->class1Count = klass1_map.get_population ();
+
+ hb_map_t klass2_map;
+ out->classDef2.serialize_subset (c, classDef2, this, &klass2_map);
+ out->class2Count = klass2_map.get_population ();
+
+ unsigned len1 = valueFormat1.get_len ();
+ unsigned len2 = valueFormat2.get_len ();
+
+ + hb_range ((unsigned) class1Count)
+ | hb_filter (klass1_map)
+ | hb_apply ([&] (const unsigned class1_idx)
+ {
+ + hb_range ((unsigned) class2Count)
+ | hb_filter (klass2_map)
+ | hb_apply ([&] (const unsigned class2_idx)
+ {
+ unsigned idx = (class1_idx * (unsigned) class2Count + class2_idx) * (len1 + len2);
+ valueFormat1.serialize_copy (c->serializer, this, &values[idx], c->plan->layout_variation_idx_map);
+ valueFormat2.serialize_copy (c->serializer, this, &values[idx + len1], c->plan->layout_variation_idx_map);
+ })
+ ;
+ })
+ ;
+
+ const hb_set_t &glyphset = *c->plan->_glyphset_gsub;
+ const hb_map_t &glyph_map = *c->plan->glyph_map;
+
+ auto it =
+ + hb_iter (this+coverage)
+ | hb_filter (glyphset)
+ | hb_map_retains_sorting (glyph_map)
+ ;
+
+ out->coverage.serialize (c->serializer, out).serialize (c->serializer, it);
+ return_trace (out->class1Count && out->class2Count && bool (it));
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (!(c->check_struct (this)
+ && coverage.sanitize (c, this)
+ && classDef1.sanitize (c, this)
+ && classDef2.sanitize (c, this))) return_trace (false);
+
+ unsigned int len1 = valueFormat1.get_len ();
+ unsigned int len2 = valueFormat2.get_len ();
+ unsigned int stride = len1 + len2;
+ unsigned int record_size = valueFormat1.get_size () + valueFormat2.get_size ();
+ unsigned int count = (unsigned int) class1Count * (unsigned int) class2Count;
+ return_trace (c->check_range ((const void *) values,
+ count,
+ record_size) &&
+ valueFormat1.sanitize_values_stride_unsafe (c, this, &values[0], count, stride) &&
+ valueFormat2.sanitize_values_stride_unsafe (c, this, &values[len1], count, stride));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 2 */
+ OffsetTo<Coverage>
+ coverage; /* Offset to Coverage table--from
+ * beginning of subtable */
+ ValueFormat valueFormat1; /* ValueRecord definition--for the
+ * first glyph of the pair--may be zero
+ * (0) */
+ ValueFormat valueFormat2; /* ValueRecord definition--for the
+ * second glyph of the pair--may be
+ * zero (0) */
+ OffsetTo<ClassDef>
+ classDef1; /* Offset to ClassDef table--from
+ * beginning of PairPos subtable--for
+ * the first glyph of the pair */
+ OffsetTo<ClassDef>
+ classDef2; /* Offset to ClassDef table--from
+ * beginning of PairPos subtable--for
+ * the second glyph of the pair */
+ HBUINT16 class1Count; /* Number of classes in ClassDef1
+ * table--includes Class0 */
+ HBUINT16 class2Count; /* Number of classes in ClassDef2
+ * table--includes Class0 */
+ ValueRecord values; /* Matrix of value pairs:
+ * class1-major, class2-minor,
+ * Each entry has value1 and value2 */
+ public:
+ DEFINE_SIZE_ARRAY (16, values);
+};
+
+struct PairPos
+{
+ template <typename context_t, typename ...Ts>
+ typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const
+ {
+ TRACE_DISPATCH (this, u.format);
+ if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ());
+ switch (u.format) {
+ case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...));
+ case 2: return_trace (c->dispatch (u.format2, hb_forward<Ts> (ds)...));
+ default:return_trace (c->default_return_value ());
+ }
+ }
+
+ protected:
+ union {
+ HBUINT16 format; /* Format identifier */
+ PairPosFormat1 format1;
+ PairPosFormat2 format2;
+ } u;
+};
+
+
+struct EntryExitRecord
+{
+ friend struct CursivePosFormat1;
+
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (entryAnchor.sanitize (c, base) && exitAnchor.sanitize (c, base));
+ }
+
+ void collect_variation_indices (hb_collect_variation_indices_context_t *c,
+ const void *src_base) const
+ {
+ (src_base+entryAnchor).collect_variation_indices (c);
+ (src_base+exitAnchor).collect_variation_indices (c);
+ }
+
+ EntryExitRecord* copy (hb_serialize_context_t *c,
+ const void *src_base,
+ const void *dst_base,
+ const hb_map_t *layout_variation_idx_map) const
+ {
+ TRACE_SERIALIZE (this);
+ auto *out = c->embed (this);
+ if (unlikely (!out)) return_trace (nullptr);
+
+ out->entryAnchor.serialize_copy (c, entryAnchor, src_base, c->to_bias (dst_base), hb_serialize_context_t::Head, layout_variation_idx_map);
+ out->exitAnchor.serialize_copy (c, exitAnchor, src_base, c->to_bias (dst_base), hb_serialize_context_t::Head, layout_variation_idx_map);
+ return_trace (out);
+ }
+
+ protected:
+ OffsetTo<Anchor>
+ entryAnchor; /* Offset to EntryAnchor table--from
+ * beginning of CursivePos
+ * subtable--may be NULL */
+ OffsetTo<Anchor>
+ exitAnchor; /* Offset to ExitAnchor table--from
+ * beginning of CursivePos
+ * subtable--may be NULL */
+ public:
+ DEFINE_SIZE_STATIC (4);
+};
+
+static void
+reverse_cursive_minor_offset (hb_glyph_position_t *pos, unsigned int i, hb_direction_t direction, unsigned int new_parent);
+
+struct CursivePosFormat1
+{
+ bool intersects (const hb_set_t *glyphs) const
+ { return (this+coverage).intersects (glyphs); }
+
+ void closure_lookups (hb_closure_lookups_context_t *c) const {}
+
+ void collect_variation_indices (hb_collect_variation_indices_context_t *c) const
+ {
+ + hb_zip (this+coverage, entryExitRecord)
+ | hb_filter (c->glyph_set, hb_first)
+ | hb_map (hb_second)
+ | hb_apply ([&] (const EntryExitRecord& record) { record.collect_variation_indices (c, this); })
+ ;
+ }
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c) const
+ { if (unlikely (!(this+coverage).collect_coverage (c->input))) return; }
+
+ const Coverage &get_coverage () const { return this+coverage; }
+
+ bool apply (hb_ot_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+ hb_buffer_t *buffer = c->buffer;
+
+ const EntryExitRecord &this_record = entryExitRecord[(this+coverage).get_coverage (buffer->cur().codepoint)];
+ if (!this_record.entryAnchor) return_trace (false);
+
+ hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input;
+ skippy_iter.reset (buffer->idx, 1);
+ if (!skippy_iter.prev ()) return_trace (false);
+
+ const EntryExitRecord &prev_record = entryExitRecord[(this+coverage).get_coverage (buffer->info[skippy_iter.idx].codepoint)];
+ if (!prev_record.exitAnchor) return_trace (false);
+
+ unsigned int i = skippy_iter.idx;
+ unsigned int j = buffer->idx;
+
+ buffer->unsafe_to_break (i, j);
+ float entry_x, entry_y, exit_x, exit_y;
+ (this+prev_record.exitAnchor).get_anchor (c, buffer->info[i].codepoint, &exit_x, &exit_y);
+ (this+this_record.entryAnchor).get_anchor (c, buffer->info[j].codepoint, &entry_x, &entry_y);
+
+ hb_glyph_position_t *pos = buffer->pos;
+
+ hb_position_t d;
+ /* Main-direction adjustment */
+ switch (c->direction) {
+ case HB_DIRECTION_LTR:
+ pos[i].x_advance = roundf (exit_x) + pos[i].x_offset;
+
+ d = roundf (entry_x) + pos[j].x_offset;
+ pos[j].x_advance -= d;
+ pos[j].x_offset -= d;
+ break;
+ case HB_DIRECTION_RTL:
+ d = roundf (exit_x) + pos[i].x_offset;
+ pos[i].x_advance -= d;
+ pos[i].x_offset -= d;
+
+ pos[j].x_advance = roundf (entry_x) + pos[j].x_offset;
+ break;
+ case HB_DIRECTION_TTB:
+ pos[i].y_advance = roundf (exit_y) + pos[i].y_offset;
+
+ d = roundf (entry_y) + pos[j].y_offset;
+ pos[j].y_advance -= d;
+ pos[j].y_offset -= d;
+ break;
+ case HB_DIRECTION_BTT:
+ d = roundf (exit_y) + pos[i].y_offset;
+ pos[i].y_advance -= d;
+ pos[i].y_offset -= d;
+
+ pos[j].y_advance = roundf (entry_y);
+ break;
+ case HB_DIRECTION_INVALID:
+ default:
+ break;
+ }
+
+ /* Cross-direction adjustment */
+
+ /* We attach child to parent (think graph theory and rooted trees whereas
+ * the root stays on baseline and each node aligns itself against its
+ * parent.
+ *
+ * Optimize things for the case of RightToLeft, as that's most common in
+ * Arabic. */
+ unsigned int child = i;
+ unsigned int parent = j;
+ hb_position_t x_offset = entry_x - exit_x;
+ hb_position_t y_offset = entry_y - exit_y;
+ if (!(c->lookup_props & LookupFlag::RightToLeft))
+ {
+ unsigned int k = child;
+ child = parent;
+ parent = k;
+ x_offset = -x_offset;
+ y_offset = -y_offset;
+ }
+
+ /* If child was already connected to someone else, walk through its old
+ * chain and reverse the link direction, such that the whole tree of its
+ * previous connection now attaches to new parent. Watch out for case
+ * where new parent is on the path from old chain...
+ */
+ reverse_cursive_minor_offset (pos, child, c->direction, parent);
+
+ pos[child].attach_type() = ATTACH_TYPE_CURSIVE;
+ pos[child].attach_chain() = (int) parent - (int) child;
+ buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT;
+ if (likely (HB_DIRECTION_IS_HORIZONTAL (c->direction)))
+ pos[child].y_offset = y_offset;
+ else
+ pos[child].x_offset = x_offset;
+
+ /* If parent was attached to child, break them free.
+ * https://github.com/harfbuzz/harfbuzz/issues/2469
+ */
+ if (unlikely (pos[parent].attach_chain() == -pos[child].attach_chain()))
+ pos[parent].attach_chain() = 0;
+
+ buffer->idx++;
+ return_trace (true);
+ }
+
+ template <typename Iterator,
+ hb_requires (hb_is_iterator (Iterator))>
+ void serialize (hb_serialize_context_t *c,
+ Iterator it,
+ const void *src_base,
+ const hb_map_t *layout_variation_idx_map)
+ {
+ if (unlikely (!c->extend_min ((*this)))) return;
+ this->format = 1;
+ this->entryExitRecord.len = it.len ();
+
+ for (const EntryExitRecord& entry_record : + it
+ | hb_map (hb_second))
+ c->copy (entry_record, src_base, this, layout_variation_idx_map);
+
+ auto glyphs =
+ + it
+ | hb_map_retains_sorting (hb_first)
+ ;
+
+ coverage.serialize (c, this).serialize (c, glyphs);
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ const hb_set_t &glyphset = *c->plan->glyphset ();
+ const hb_map_t &glyph_map = *c->plan->glyph_map;
+
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!out)) return_trace (false);
+
+ auto it =
+ + hb_zip (this+coverage, entryExitRecord)
+ | hb_filter (glyphset, hb_first)
+ | hb_map_retains_sorting ([&] (hb_pair_t<hb_codepoint_t, const EntryExitRecord&> p) -> hb_pair_t<hb_codepoint_t, const EntryExitRecord&>
+ { return hb_pair (glyph_map[p.first], p.second);})
+ ;
+
+ bool ret = bool (it);
+ out->serialize (c->serializer, it, this, c->plan->layout_variation_idx_map);
+ return_trace (ret);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (coverage.sanitize (c, this) && entryExitRecord.sanitize (c, this));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 1 */
+ OffsetTo<Coverage>
+ coverage; /* Offset to Coverage table--from
+ * beginning of subtable */
+ ArrayOf<EntryExitRecord>
+ entryExitRecord; /* Array of EntryExit records--in
+ * Coverage Index order */
+ public:
+ DEFINE_SIZE_ARRAY (6, entryExitRecord);
+};
+
+struct CursivePos
+{
+ template <typename context_t, typename ...Ts>
+ typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const
+ {
+ TRACE_DISPATCH (this, u.format);
+ if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ());
+ switch (u.format) {
+ case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...));
+ default:return_trace (c->default_return_value ());
+ }
+ }
+
+ protected:
+ union {
+ HBUINT16 format; /* Format identifier */
+ CursivePosFormat1 format1;
+ } u;
+};
+
+
+typedef AnchorMatrix BaseArray; /* base-major--
+ * in order of BaseCoverage Index--,
+ * mark-minor--
+ * ordered by class--zero-based. */
+
+static void Markclass_closure_and_remap_indexes (const Coverage &mark_coverage,
+ const MarkArray &mark_array,
+ const hb_set_t &glyphset,
+ hb_map_t* klass_mapping /* INOUT */)
+{
+ hb_set_t orig_classes;
+
+ + hb_zip (mark_coverage, mark_array)
+ | hb_filter (glyphset, hb_first)
+ | hb_map (hb_second)
+ | hb_map (&MarkRecord::get_class)
+ | hb_sink (orig_classes)
+ ;
+
+ unsigned idx = 0;
+ for (auto klass : orig_classes.iter ())
+ {
+ if (klass_mapping->has (klass)) continue;
+ klass_mapping->set (klass, idx);
+ idx++;
+ }
+}
+
+struct MarkBasePosFormat1
+{
+ bool intersects (const hb_set_t *glyphs) const
+ {
+ return (this+markCoverage).intersects (glyphs) &&
+ (this+baseCoverage).intersects (glyphs);
+ }
+
+ void closure_lookups (hb_closure_lookups_context_t *c) const {}
+
+ void collect_variation_indices (hb_collect_variation_indices_context_t *c) const
+ {
+ + hb_zip (this+markCoverage, this+markArray)
+ | hb_filter (c->glyph_set, hb_first)
+ | hb_map (hb_second)
+ | hb_apply ([&] (const MarkRecord& record) { record.collect_variation_indices (c, &(this+markArray)); })
+ ;
+
+ hb_map_t klass_mapping;
+ Markclass_closure_and_remap_indexes (this+markCoverage, this+markArray, *c->glyph_set, &klass_mapping);
+
+ unsigned basecount = (this+baseArray).rows;
+ auto base_iter =
+ + hb_zip (this+baseCoverage, hb_range (basecount))
+ | hb_filter (c->glyph_set, hb_first)
+ | hb_map (hb_second)
+ ;
+
+ hb_sorted_vector_t<unsigned> base_indexes;
+ for (const unsigned row : base_iter)
+ {
+ + hb_range ((unsigned) classCount)
+ | hb_filter (klass_mapping)
+ | hb_map ([&] (const unsigned col) { return row * (unsigned) classCount + col; })
+ | hb_sink (base_indexes)
+ ;
+ }
+ (this+baseArray).collect_variation_indices (c, base_indexes.iter ());
+ }
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c) const
+ {
+ if (unlikely (!(this+markCoverage).collect_coverage (c->input))) return;
+ if (unlikely (!(this+baseCoverage).collect_coverage (c->input))) return;
+ }
+
+ const Coverage &get_coverage () const { return this+markCoverage; }
+
+ bool apply (hb_ot_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+ hb_buffer_t *buffer = c->buffer;
+ unsigned int mark_index = (this+markCoverage).get_coverage (buffer->cur().codepoint);
+ if (likely (mark_index == NOT_COVERED)) return_trace (false);
+
+ /* Now we search backwards for a non-mark glyph */
+ hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input;
+ skippy_iter.reset (buffer->idx, 1);
+ skippy_iter.set_lookup_props (LookupFlag::IgnoreMarks);
+ do {
+ if (!skippy_iter.prev ()) return_trace (false);
+ /* We only want to attach to the first of a MultipleSubst sequence.
+ * https://github.com/harfbuzz/harfbuzz/issues/740
+ * Reject others...
+ * ...but stop if we find a mark in the MultipleSubst sequence:
+ * https://github.com/harfbuzz/harfbuzz/issues/1020 */
+ if (!_hb_glyph_info_multiplied (&buffer->info[skippy_iter.idx]) ||
+ 0 == _hb_glyph_info_get_lig_comp (&buffer->info[skippy_iter.idx]) ||
+ (skippy_iter.idx == 0 ||
+ _hb_glyph_info_is_mark (&buffer->info[skippy_iter.idx - 1]) ||
+ _hb_glyph_info_get_lig_id (&buffer->info[skippy_iter.idx]) !=
+ _hb_glyph_info_get_lig_id (&buffer->info[skippy_iter.idx - 1]) ||
+ _hb_glyph_info_get_lig_comp (&buffer->info[skippy_iter.idx]) !=
+ _hb_glyph_info_get_lig_comp (&buffer->info[skippy_iter.idx - 1]) + 1
+ ))
+ break;
+ skippy_iter.reject ();
+ } while (true);
+
+ /* Checking that matched glyph is actually a base glyph by GDEF is too strong; disabled */
+ //if (!_hb_glyph_info_is_base_glyph (&buffer->info[skippy_iter.idx])) { return_trace (false); }
+
+ unsigned int base_index = (this+baseCoverage).get_coverage (buffer->info[skippy_iter.idx].codepoint);
+ if (base_index == NOT_COVERED) return_trace (false);
+
+ return_trace ((this+markArray).apply (c, mark_index, base_index, this+baseArray, classCount, skippy_iter.idx));
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ const hb_set_t &glyphset = *c->plan->glyphset ();
+ const hb_map_t &glyph_map = *c->plan->glyph_map;
+
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!c->serializer->extend_min (out))) return_trace (false);
+ out->format = format;
+
+ hb_map_t klass_mapping;
+ Markclass_closure_and_remap_indexes (this+markCoverage, this+markArray, glyphset, &klass_mapping);
+
+ if (!klass_mapping.get_population ()) return_trace (false);
+ out->classCount = klass_mapping.get_population ();
+
+ auto mark_iter =
+ + hb_zip (this+markCoverage, this+markArray)
+ | hb_filter (glyphset, hb_first)
+ ;
+
+ hb_sorted_vector_t<hb_codepoint_t> new_coverage;
+ + mark_iter
+ | hb_map (hb_first)
+ | hb_map (glyph_map)
+ | hb_sink (new_coverage)
+ ;
+
+ if (!out->markCoverage.serialize (c->serializer, out)
+ .serialize (c->serializer, new_coverage.iter ()))
+ return_trace (false);
+
+ out->markArray.serialize (c->serializer, out)
+ .serialize (c->serializer, &klass_mapping, c->plan->layout_variation_idx_map, &(this+markArray), + mark_iter
+ | hb_map (hb_second));
+
+ unsigned basecount = (this+baseArray).rows;
+ auto base_iter =
+ + hb_zip (this+baseCoverage, hb_range (basecount))
+ | hb_filter (glyphset, hb_first)
+ ;
+
+ new_coverage.reset ();
+ + base_iter
+ | hb_map (hb_first)
+ | hb_map (glyph_map)
+ | hb_sink (new_coverage)
+ ;
+
+ if (!out->baseCoverage.serialize (c->serializer, out)
+ .serialize (c->serializer, new_coverage.iter ()))
+ return_trace (false);
+
+ hb_sorted_vector_t<unsigned> base_indexes;
+ for (const unsigned row : + base_iter
+ | hb_map (hb_second))
+ {
+ + hb_range ((unsigned) classCount)
+ | hb_filter (klass_mapping)
+ | hb_map ([&] (const unsigned col) { return row * (unsigned) classCount + col; })
+ | hb_sink (base_indexes)
+ ;
+ }
+ out->baseArray.serialize (c->serializer, out)
+ .serialize (c->serializer, base_iter.len (), &(this+baseArray), c->plan->layout_variation_idx_map, base_indexes.iter ());
+
+ return_trace (true);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ markCoverage.sanitize (c, this) &&
+ baseCoverage.sanitize (c, this) &&
+ markArray.sanitize (c, this) &&
+ baseArray.sanitize (c, this, (unsigned int) classCount));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 1 */
+ OffsetTo<Coverage>
+ markCoverage; /* Offset to MarkCoverage table--from
+ * beginning of MarkBasePos subtable */
+ OffsetTo<Coverage>
+ baseCoverage; /* Offset to BaseCoverage table--from
+ * beginning of MarkBasePos subtable */
+ HBUINT16 classCount; /* Number of classes defined for marks */
+ OffsetTo<MarkArray>
+ markArray; /* Offset to MarkArray table--from
+ * beginning of MarkBasePos subtable */
+ OffsetTo<BaseArray>
+ baseArray; /* Offset to BaseArray table--from
+ * beginning of MarkBasePos subtable */
+ public:
+ DEFINE_SIZE_STATIC (12);
+};
+
+struct MarkBasePos
+{
+ template <typename context_t, typename ...Ts>
+ typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const
+ {
+ TRACE_DISPATCH (this, u.format);
+ if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ());
+ switch (u.format) {
+ case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...));
+ default:return_trace (c->default_return_value ());
+ }
+ }
+
+ protected:
+ union {
+ HBUINT16 format; /* Format identifier */
+ MarkBasePosFormat1 format1;
+ } u;
+};
+
+
+typedef AnchorMatrix LigatureAttach; /* component-major--
+ * in order of writing direction--,
+ * mark-minor--
+ * ordered by class--zero-based. */
+
+typedef OffsetListOf<LigatureAttach> LigatureArray;
+ /* Array of LigatureAttach
+ * tables ordered by
+ * LigatureCoverage Index */
+
+struct MarkLigPosFormat1
+{
+ bool intersects (const hb_set_t *glyphs) const
+ {
+ return (this+markCoverage).intersects (glyphs) &&
+ (this+ligatureCoverage).intersects (glyphs);
+ }
+
+ void closure_lookups (hb_closure_lookups_context_t *c) const {}
+
+ void collect_variation_indices (hb_collect_variation_indices_context_t *c) const
+ {
+ + hb_zip (this+markCoverage, this+markArray)
+ | hb_filter (c->glyph_set, hb_first)
+ | hb_map (hb_second)
+ | hb_apply ([&] (const MarkRecord& record) { record.collect_variation_indices (c, &(this+markArray)); })
+ ;
+
+ hb_map_t klass_mapping;
+ Markclass_closure_and_remap_indexes (this+markCoverage, this+markArray, *c->glyph_set, &klass_mapping);
+
+ unsigned ligcount = (this+ligatureArray).len;
+ auto lig_iter =
+ + hb_zip (this+ligatureCoverage, hb_range (ligcount))
+ | hb_filter (c->glyph_set, hb_first)
+ | hb_map (hb_second)
+ ;
+
+ const LigatureArray& lig_array = this+ligatureArray;
+ for (const unsigned i : lig_iter)
+ {
+ hb_sorted_vector_t<unsigned> lig_indexes;
+ unsigned row_count = lig_array[i].rows;
+ for (unsigned row : + hb_range (row_count))
+ {
+ + hb_range ((unsigned) classCount)
+ | hb_filter (klass_mapping)
+ | hb_map ([&] (const unsigned col) { return row * (unsigned) classCount + col; })
+ | hb_sink (lig_indexes)
+ ;
+ }
+
+ lig_array[i].collect_variation_indices (c, lig_indexes.iter ());
+ }
+ }
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c) const
+ {
+ if (unlikely (!(this+markCoverage).collect_coverage (c->input))) return;
+ if (unlikely (!(this+ligatureCoverage).collect_coverage (c->input))) return;
+ }
+
+ const Coverage &get_coverage () const { return this+markCoverage; }
+
+ bool apply (hb_ot_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+ hb_buffer_t *buffer = c->buffer;
+ unsigned int mark_index = (this+markCoverage).get_coverage (buffer->cur().codepoint);
+ if (likely (mark_index == NOT_COVERED)) return_trace (false);
+
+ /* Now we search backwards for a non-mark glyph */
+ hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input;
+ skippy_iter.reset (buffer->idx, 1);
+ skippy_iter.set_lookup_props (LookupFlag::IgnoreMarks);
+ if (!skippy_iter.prev ()) return_trace (false);
+
+ /* Checking that matched glyph is actually a ligature by GDEF is too strong; disabled */
+ //if (!_hb_glyph_info_is_ligature (&buffer->info[skippy_iter.idx])) { return_trace (false); }
+
+ unsigned int j = skippy_iter.idx;
+ unsigned int lig_index = (this+ligatureCoverage).get_coverage (buffer->info[j].codepoint);
+ if (lig_index == NOT_COVERED) return_trace (false);
+
+ const LigatureArray& lig_array = this+ligatureArray;
+ const LigatureAttach& lig_attach = lig_array[lig_index];
+
+ /* Find component to attach to */
+ unsigned int comp_count = lig_attach.rows;
+ if (unlikely (!comp_count)) return_trace (false);
+
+ /* We must now check whether the ligature ID of the current mark glyph
+ * is identical to the ligature ID of the found ligature. If yes, we
+ * can directly use the component index. If not, we attach the mark
+ * glyph to the last component of the ligature. */
+ unsigned int comp_index;
+ unsigned int lig_id = _hb_glyph_info_get_lig_id (&buffer->info[j]);
+ unsigned int mark_id = _hb_glyph_info_get_lig_id (&buffer->cur());
+ unsigned int mark_comp = _hb_glyph_info_get_lig_comp (&buffer->cur());
+ if (lig_id && lig_id == mark_id && mark_comp > 0)
+ comp_index = hb_min (comp_count, _hb_glyph_info_get_lig_comp (&buffer->cur())) - 1;
+ else
+ comp_index = comp_count - 1;
+
+ return_trace ((this+markArray).apply (c, mark_index, comp_index, lig_attach, classCount, j));
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ // TODO(subset)
+ return_trace (false);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ markCoverage.sanitize (c, this) &&
+ ligatureCoverage.sanitize (c, this) &&
+ markArray.sanitize (c, this) &&
+ ligatureArray.sanitize (c, this, (unsigned int) classCount));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 1 */
+ OffsetTo<Coverage>
+ markCoverage; /* Offset to Mark Coverage table--from
+ * beginning of MarkLigPos subtable */
+ OffsetTo<Coverage>
+ ligatureCoverage; /* Offset to Ligature Coverage
+ * table--from beginning of MarkLigPos
+ * subtable */
+ HBUINT16 classCount; /* Number of defined mark classes */
+ OffsetTo<MarkArray>
+ markArray; /* Offset to MarkArray table--from
+ * beginning of MarkLigPos subtable */
+ OffsetTo<LigatureArray>
+ ligatureArray; /* Offset to LigatureArray table--from
+ * beginning of MarkLigPos subtable */
+ public:
+ DEFINE_SIZE_STATIC (12);
+};
+
+struct MarkLigPos
+{
+ template <typename context_t, typename ...Ts>
+ typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const
+ {
+ TRACE_DISPATCH (this, u.format);
+ if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ());
+ switch (u.format) {
+ case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...));
+ default:return_trace (c->default_return_value ());
+ }
+ }
+
+ protected:
+ union {
+ HBUINT16 format; /* Format identifier */
+ MarkLigPosFormat1 format1;
+ } u;
+};
+
+
+typedef AnchorMatrix Mark2Array; /* mark2-major--
+ * in order of Mark2Coverage Index--,
+ * mark1-minor--
+ * ordered by class--zero-based. */
+
+struct MarkMarkPosFormat1
+{
+ bool intersects (const hb_set_t *glyphs) const
+ {
+ return (this+mark1Coverage).intersects (glyphs) &&
+ (this+mark2Coverage).intersects (glyphs);
+ }
+
+ void closure_lookups (hb_closure_lookups_context_t *c) const {}
+
+ void collect_variation_indices (hb_collect_variation_indices_context_t *c) const
+ {
+ + hb_zip (this+mark1Coverage, this+mark1Array)
+ | hb_filter (c->glyph_set, hb_first)
+ | hb_map (hb_second)
+ | hb_apply ([&] (const MarkRecord& record) { record.collect_variation_indices (c, &(this+mark1Array)); })
+ ;
+
+ hb_map_t klass_mapping;
+ Markclass_closure_and_remap_indexes (this+mark1Coverage, this+mark1Array, *c->glyph_set, &klass_mapping);
+
+ unsigned mark2_count = (this+mark2Array).rows;
+ auto mark2_iter =
+ + hb_zip (this+mark2Coverage, hb_range (mark2_count))
+ | hb_filter (c->glyph_set, hb_first)
+ | hb_map (hb_second)
+ ;
+
+ hb_sorted_vector_t<unsigned> mark2_indexes;
+ for (const unsigned row : mark2_iter)
+ {
+ + hb_range ((unsigned) classCount)
+ | hb_filter (klass_mapping)
+ | hb_map ([&] (const unsigned col) { return row * (unsigned) classCount + col; })
+ | hb_sink (mark2_indexes)
+ ;
+ }
+ (this+mark2Array).collect_variation_indices (c, mark2_indexes.iter ());
+ }
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c) const
+ {
+ if (unlikely (!(this+mark1Coverage).collect_coverage (c->input))) return;
+ if (unlikely (!(this+mark2Coverage).collect_coverage (c->input))) return;
+ }
+
+ const Coverage &get_coverage () const { return this+mark1Coverage; }
+
+ bool apply (hb_ot_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+ hb_buffer_t *buffer = c->buffer;
+ unsigned int mark1_index = (this+mark1Coverage).get_coverage (buffer->cur().codepoint);
+ if (likely (mark1_index == NOT_COVERED)) return_trace (false);
+
+ /* now we search backwards for a suitable mark glyph until a non-mark glyph */
+ hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input;
+ skippy_iter.reset (buffer->idx, 1);
+ skippy_iter.set_lookup_props (c->lookup_props & ~LookupFlag::IgnoreFlags);
+ if (!skippy_iter.prev ()) return_trace (false);
+
+ if (!_hb_glyph_info_is_mark (&buffer->info[skippy_iter.idx])) { return_trace (false); }
+
+ unsigned int j = skippy_iter.idx;
+
+ unsigned int id1 = _hb_glyph_info_get_lig_id (&buffer->cur());
+ unsigned int id2 = _hb_glyph_info_get_lig_id (&buffer->info[j]);
+ unsigned int comp1 = _hb_glyph_info_get_lig_comp (&buffer->cur());
+ unsigned int comp2 = _hb_glyph_info_get_lig_comp (&buffer->info[j]);
+
+ if (likely (id1 == id2))
+ {
+ if (id1 == 0) /* Marks belonging to the same base. */
+ goto good;
+ else if (comp1 == comp2) /* Marks belonging to the same ligature component. */
+ goto good;
+ }
+ else
+ {
+ /* If ligature ids don't match, it may be the case that one of the marks
+ * itself is a ligature. In which case match. */
+ if ((id1 > 0 && !comp1) || (id2 > 0 && !comp2))
+ goto good;
+ }
+
+ /* Didn't match. */
+ return_trace (false);
+
+ good:
+ unsigned int mark2_index = (this+mark2Coverage).get_coverage (buffer->info[j].codepoint);
+ if (mark2_index == NOT_COVERED) return_trace (false);
+
+ return_trace ((this+mark1Array).apply (c, mark1_index, mark2_index, this+mark2Array, classCount, j));
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ const hb_set_t &glyphset = *c->plan->glyphset ();
+ const hb_map_t &glyph_map = *c->plan->glyph_map;
+
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!c->serializer->extend_min (out))) return_trace (false);
+ out->format = format;
+
+ hb_map_t klass_mapping;
+ Markclass_closure_and_remap_indexes (this+mark1Coverage, this+mark1Array, glyphset, &klass_mapping);
+
+ if (!klass_mapping.get_population ()) return_trace (false);
+ out->classCount = klass_mapping.get_population ();
+
+ auto mark1_iter =
+ + hb_zip (this+mark1Coverage, this+mark1Array)
+ | hb_filter (glyphset, hb_first)
+ ;
+
+ hb_sorted_vector_t<hb_codepoint_t> new_coverage;
+ + mark1_iter
+ | hb_map (hb_first)
+ | hb_map (glyph_map)
+ | hb_sink (new_coverage)
+ ;
+
+ if (!out->mark1Coverage.serialize (c->serializer, out)
+ .serialize (c->serializer, new_coverage.iter ()))
+ return_trace (false);
+
+ out->mark1Array.serialize (c->serializer, out)
+ .serialize (c->serializer, &klass_mapping, c->plan->layout_variation_idx_map, &(this+mark1Array), + mark1_iter
+ | hb_map (hb_second));
+
+ unsigned mark2count = (this+mark2Array).rows;
+ auto mark2_iter =
+ + hb_zip (this+mark2Coverage, hb_range (mark2count))
+ | hb_filter (glyphset, hb_first)
+ ;
+
+ new_coverage.reset ();
+ + mark2_iter
+ | hb_map (hb_first)
+ | hb_map (glyph_map)
+ | hb_sink (new_coverage)
+ ;
+
+ if (!out->mark2Coverage.serialize (c->serializer, out)
+ .serialize (c->serializer, new_coverage.iter ()))
+ return_trace (false);
+
+ hb_sorted_vector_t<unsigned> mark2_indexes;
+ for (const unsigned row : + mark2_iter
+ | hb_map (hb_second))
+ {
+ + hb_range ((unsigned) classCount)
+ | hb_filter (klass_mapping)
+ | hb_map ([&] (const unsigned col) { return row * (unsigned) classCount + col; })
+ | hb_sink (mark2_indexes)
+ ;
+ }
+ out->mark2Array.serialize (c->serializer, out)
+ .serialize (c->serializer, mark2_iter.len (), &(this+mark2Array), c->plan->layout_variation_idx_map, mark2_indexes.iter ());
+
+ return_trace (true);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ mark1Coverage.sanitize (c, this) &&
+ mark2Coverage.sanitize (c, this) &&
+ mark1Array.sanitize (c, this) &&
+ mark2Array.sanitize (c, this, (unsigned int) classCount));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 1 */
+ OffsetTo<Coverage>
+ mark1Coverage; /* Offset to Combining Mark1 Coverage
+ * table--from beginning of MarkMarkPos
+ * subtable */
+ OffsetTo<Coverage>
+ mark2Coverage; /* Offset to Combining Mark2 Coverage
+ * table--from beginning of MarkMarkPos
+ * subtable */
+ HBUINT16 classCount; /* Number of defined mark classes */
+ OffsetTo<MarkArray>
+ mark1Array; /* Offset to Mark1Array table--from
+ * beginning of MarkMarkPos subtable */
+ OffsetTo<Mark2Array>
+ mark2Array; /* Offset to Mark2Array table--from
+ * beginning of MarkMarkPos subtable */
+ public:
+ DEFINE_SIZE_STATIC (12);
+};
+
+struct MarkMarkPos
+{
+ template <typename context_t, typename ...Ts>
+ typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const
+ {
+ TRACE_DISPATCH (this, u.format);
+ if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ());
+ switch (u.format) {
+ case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...));
+ default:return_trace (c->default_return_value ());
+ }
+ }
+
+ protected:
+ union {
+ HBUINT16 format; /* Format identifier */
+ MarkMarkPosFormat1 format1;
+ } u;
+};
+
+
+struct ContextPos : Context {};
+
+struct ChainContextPos : ChainContext {};
+
+struct ExtensionPos : Extension<ExtensionPos>
+{
+ typedef struct PosLookupSubTable SubTable;
+};
+
+
+
+/*
+ * PosLookup
+ */
+
+
+struct PosLookupSubTable
+{
+ friend struct Lookup;
+ friend struct PosLookup;
+
+ enum Type {
+ Single = 1,
+ Pair = 2,
+ Cursive = 3,
+ MarkBase = 4,
+ MarkLig = 5,
+ MarkMark = 6,
+ Context = 7,
+ ChainContext = 8,
+ Extension = 9
+ };
+
+ template <typename context_t, typename ...Ts>
+ typename context_t::return_t dispatch (context_t *c, unsigned int lookup_type, Ts&&... ds) const
+ {
+ TRACE_DISPATCH (this, lookup_type);
+ switch (lookup_type) {
+ case Single: return_trace (u.single.dispatch (c, hb_forward<Ts> (ds)...));
+ case Pair: return_trace (u.pair.dispatch (c, hb_forward<Ts> (ds)...));
+ case Cursive: return_trace (u.cursive.dispatch (c, hb_forward<Ts> (ds)...));
+ case MarkBase: return_trace (u.markBase.dispatch (c, hb_forward<Ts> (ds)...));
+ case MarkLig: return_trace (u.markLig.dispatch (c, hb_forward<Ts> (ds)...));
+ case MarkMark: return_trace (u.markMark.dispatch (c, hb_forward<Ts> (ds)...));
+ case Context: return_trace (u.context.dispatch (c, hb_forward<Ts> (ds)...));
+ case ChainContext: return_trace (u.chainContext.dispatch (c, hb_forward<Ts> (ds)...));
+ case Extension: return_trace (u.extension.dispatch (c, hb_forward<Ts> (ds)...));
+ default: return_trace (c->default_return_value ());
+ }
+ }
+
+ bool intersects (const hb_set_t *glyphs, unsigned int lookup_type) const
+ {
+ hb_intersects_context_t c (glyphs);
+ return dispatch (&c, lookup_type);
+ }
+
+ protected:
+ union {
+ SinglePos single;
+ PairPos pair;
+ CursivePos cursive;
+ MarkBasePos markBase;
+ MarkLigPos markLig;
+ MarkMarkPos markMark;
+ ContextPos context;
+ ChainContextPos chainContext;
+ ExtensionPos extension;
+ } u;
+ public:
+ DEFINE_SIZE_MIN (0);
+};
+
+
+struct PosLookup : Lookup
+{
+ typedef struct PosLookupSubTable SubTable;
+
+ const SubTable& get_subtable (unsigned int i) const
+ { return Lookup::get_subtable<SubTable> (i); }
+
+ bool is_reverse () const
+ {
+ return false;
+ }
+
+ bool apply (hb_ot_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+ return_trace (dispatch (c));
+ }
+
+ bool intersects (const hb_set_t *glyphs) const
+ {
+ hb_intersects_context_t c (glyphs);
+ return dispatch (&c);
+ }
+
+ hb_collect_glyphs_context_t::return_t collect_glyphs (hb_collect_glyphs_context_t *c) const
+ { return dispatch (c); }
+
+ hb_closure_lookups_context_t::return_t closure_lookups (hb_closure_lookups_context_t *c, unsigned this_index) const
+ {
+ if (c->is_lookup_visited (this_index))
+ return hb_closure_lookups_context_t::default_return_value ();
+
+ c->set_lookup_visited (this_index);
+ if (!intersects (c->glyphs))
+ {
+ c->set_lookup_inactive (this_index);
+ return hb_closure_lookups_context_t::default_return_value ();
+ }
+ c->set_recurse_func (dispatch_closure_lookups_recurse_func);
+
+ hb_closure_lookups_context_t::return_t ret = dispatch (c);
+ return ret;
+ }
+
+ template <typename set_t>
+ void collect_coverage (set_t *glyphs) const
+ {
+ hb_collect_coverage_context_t<set_t> c (glyphs);
+ dispatch (&c);
+ }
+
+ static inline bool apply_recurse_func (hb_ot_apply_context_t *c, unsigned int lookup_index);
+
+ template <typename context_t>
+ static typename context_t::return_t dispatch_recurse_func (context_t *c, unsigned int lookup_index);
+
+ HB_INTERNAL static hb_closure_lookups_context_t::return_t dispatch_closure_lookups_recurse_func (hb_closure_lookups_context_t *c, unsigned this_index);
+
+ template <typename context_t, typename ...Ts>
+ typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const
+ { return Lookup::dispatch<SubTable> (c, hb_forward<Ts> (ds)...); }
+
+ bool subset (hb_subset_context_t *c) const
+ { return Lookup::subset<SubTable> (c); }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ { return Lookup::sanitize<SubTable> (c); }
+};
+
+/*
+ * GPOS -- Glyph Positioning
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/gpos
+ */
+
+struct GPOS : GSUBGPOS
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_GPOS;
+
+ const PosLookup& get_lookup (unsigned int i) const
+ { return static_cast<const PosLookup &> (GSUBGPOS::get_lookup (i)); }
+
+ static inline void position_start (hb_font_t *font, hb_buffer_t *buffer);
+ static inline void position_finish_advances (hb_font_t *font, hb_buffer_t *buffer);
+ static inline void position_finish_offsets (hb_font_t *font, hb_buffer_t *buffer);
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ hb_subset_layout_context_t l (c, tableTag, c->plan->gpos_lookups, c->plan->gpos_features);
+ return GSUBGPOS::subset<PosLookup> (&l);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ { return GSUBGPOS::sanitize<PosLookup> (c); }
+
+ HB_INTERNAL bool is_blocklisted (hb_blob_t *blob,
+ hb_face_t *face) const;
+
+ void collect_variation_indices (hb_collect_variation_indices_context_t *c) const
+ {
+ for (unsigned i = 0; i < GSUBGPOS::get_lookup_count (); i++)
+ {
+ if (!c->gpos_lookups->has (i)) continue;
+ const PosLookup &l = get_lookup (i);
+ l.dispatch (c);
+ }
+ }
+
+ void closure_lookups (hb_face_t *face,
+ const hb_set_t *glyphs,
+ hb_set_t *lookup_indexes /* IN/OUT */) const
+ { GSUBGPOS::closure_lookups<PosLookup> (face, glyphs, lookup_indexes); }
+
+ typedef GSUBGPOS::accelerator_t<GPOS> accelerator_t;
+};
+
+
+static void
+reverse_cursive_minor_offset (hb_glyph_position_t *pos, unsigned int i, hb_direction_t direction, unsigned int new_parent)
+{
+ int chain = pos[i].attach_chain(), type = pos[i].attach_type();
+ if (likely (!chain || 0 == (type & ATTACH_TYPE_CURSIVE)))
+ return;
+
+ pos[i].attach_chain() = 0;
+
+ unsigned int j = (int) i + chain;
+
+ /* Stop if we see new parent in the chain. */
+ if (j == new_parent)
+ return;
+
+ reverse_cursive_minor_offset (pos, j, direction, new_parent);
+
+ if (HB_DIRECTION_IS_HORIZONTAL (direction))
+ pos[j].y_offset = -pos[i].y_offset;
+ else
+ pos[j].x_offset = -pos[i].x_offset;
+
+ pos[j].attach_chain() = -chain;
+ pos[j].attach_type() = type;
+}
+static void
+propagate_attachment_offsets (hb_glyph_position_t *pos,
+ unsigned int len,
+ unsigned int i,
+ hb_direction_t direction)
+{
+ /* Adjusts offsets of attached glyphs (both cursive and mark) to accumulate
+ * offset of glyph they are attached to. */
+ int chain = pos[i].attach_chain(), type = pos[i].attach_type();
+ if (likely (!chain))
+ return;
+
+ pos[i].attach_chain() = 0;
+
+ unsigned int j = (int) i + chain;
+
+ if (unlikely (j >= len))
+ return;
+
+ propagate_attachment_offsets (pos, len, j, direction);
+
+ assert (!!(type & ATTACH_TYPE_MARK) ^ !!(type & ATTACH_TYPE_CURSIVE));
+
+ if (type & ATTACH_TYPE_CURSIVE)
+ {
+ if (HB_DIRECTION_IS_HORIZONTAL (direction))
+ pos[i].y_offset += pos[j].y_offset;
+ else
+ pos[i].x_offset += pos[j].x_offset;
+ }
+ else /*if (type & ATTACH_TYPE_MARK)*/
+ {
+ pos[i].x_offset += pos[j].x_offset;
+ pos[i].y_offset += pos[j].y_offset;
+
+ assert (j < i);
+ if (HB_DIRECTION_IS_FORWARD (direction))
+ for (unsigned int k = j; k < i; k++) {
+ pos[i].x_offset -= pos[k].x_advance;
+ pos[i].y_offset -= pos[k].y_advance;
+ }
+ else
+ for (unsigned int k = j + 1; k < i + 1; k++) {
+ pos[i].x_offset += pos[k].x_advance;
+ pos[i].y_offset += pos[k].y_advance;
+ }
+ }
+}
+
+void
+GPOS::position_start (hb_font_t *font HB_UNUSED, hb_buffer_t *buffer)
+{
+ unsigned int count = buffer->len;
+ for (unsigned int i = 0; i < count; i++)
+ buffer->pos[i].attach_chain() = buffer->pos[i].attach_type() = 0;
+}
+
+void
+GPOS::position_finish_advances (hb_font_t *font HB_UNUSED, hb_buffer_t *buffer HB_UNUSED)
+{
+ //_hb_buffer_assert_gsubgpos_vars (buffer);
+}
+
+void
+GPOS::position_finish_offsets (hb_font_t *font HB_UNUSED, hb_buffer_t *buffer)
+{
+ _hb_buffer_assert_gsubgpos_vars (buffer);
+
+ unsigned int len;
+ hb_glyph_position_t *pos = hb_buffer_get_glyph_positions (buffer, &len);
+ hb_direction_t direction = buffer->props.direction;
+
+ /* Handle attachments */
+ if (buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT)
+ for (unsigned int i = 0; i < len; i++)
+ propagate_attachment_offsets (pos, len, i, direction);
+}
+
+
+struct GPOS_accelerator_t : GPOS::accelerator_t {};
+
+
+/* Out-of-class implementation for methods recursing */
+
+#ifndef HB_NO_OT_LAYOUT
+template <typename context_t>
+/*static*/ typename context_t::return_t PosLookup::dispatch_recurse_func (context_t *c, unsigned int lookup_index)
+{
+ const PosLookup &l = c->face->table.GPOS.get_relaxed ()->table->get_lookup (lookup_index);
+ return l.dispatch (c);
+}
+
+/*static*/ inline hb_closure_lookups_context_t::return_t PosLookup::dispatch_closure_lookups_recurse_func (hb_closure_lookups_context_t *c, unsigned this_index)
+{
+ const PosLookup &l = c->face->table.GPOS.get_relaxed ()->table->get_lookup (this_index);
+ return l.closure_lookups (c, this_index);
+}
+
+/*static*/ bool PosLookup::apply_recurse_func (hb_ot_apply_context_t *c, unsigned int lookup_index)
+{
+ const PosLookup &l = c->face->table.GPOS.get_relaxed ()->table->get_lookup (lookup_index);
+ unsigned int saved_lookup_props = c->lookup_props;
+ unsigned int saved_lookup_index = c->lookup_index;
+ c->set_lookup_index (lookup_index);
+ c->set_lookup_props (l.get_props ());
+ bool ret = l.dispatch (c);
+ c->set_lookup_index (saved_lookup_index);
+ c->set_lookup_props (saved_lookup_props);
+ return ret;
+}
+#endif
+
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_LAYOUT_GPOS_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-layout-gsub-table.hh b/thirdparty/harfbuzz/src/hb-ot-layout-gsub-table.hh
new file mode 100644
index 0000000000..2f41d67819
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-layout-gsub-table.hh
@@ -0,0 +1,1627 @@
+/*
+ * Copyright © 2007,2008,2009,2010 Red Hat, Inc.
+ * Copyright © 2010,2012,2013 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_LAYOUT_GSUB_TABLE_HH
+#define HB_OT_LAYOUT_GSUB_TABLE_HH
+
+#include "hb-ot-layout-gsubgpos.hh"
+
+
+namespace OT {
+
+typedef hb_pair_t<hb_codepoint_t, hb_codepoint_t> hb_codepoint_pair_t;
+
+template<typename Iterator>
+static void SingleSubst_serialize (hb_serialize_context_t *c,
+ Iterator it);
+
+
+struct SingleSubstFormat1
+{
+ bool intersects (const hb_set_t *glyphs) const
+ { return (this+coverage).intersects (glyphs); }
+
+ void closure (hb_closure_context_t *c) const
+ {
+ unsigned d = deltaGlyphID;
+ + hb_iter (this+coverage)
+ | hb_filter (*c->glyphs)
+ | hb_map ([d] (hb_codepoint_t g) { return (g + d) & 0xFFFFu; })
+ | hb_sink (c->output)
+ ;
+ }
+
+ void closure_lookups (hb_closure_lookups_context_t *c) const {}
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c) const
+ {
+ if (unlikely (!(this+coverage).collect_coverage (c->input))) return;
+ unsigned d = deltaGlyphID;
+ + hb_iter (this+coverage)
+ | hb_map ([d] (hb_codepoint_t g) { return (g + d) & 0xFFFFu; })
+ | hb_sink (c->output)
+ ;
+ }
+
+ const Coverage &get_coverage () const { return this+coverage; }
+
+ bool would_apply (hb_would_apply_context_t *c) const
+ { return c->len == 1 && (this+coverage).get_coverage (c->glyphs[0]) != NOT_COVERED; }
+
+ bool apply (hb_ot_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+ hb_codepoint_t glyph_id = c->buffer->cur().codepoint;
+ unsigned int index = (this+coverage).get_coverage (glyph_id);
+ if (likely (index == NOT_COVERED)) return_trace (false);
+
+ /* According to the Adobe Annotated OpenType Suite, result is always
+ * limited to 16bit. */
+ glyph_id = (glyph_id + deltaGlyphID) & 0xFFFFu;
+ c->replace_glyph (glyph_id);
+
+ return_trace (true);
+ }
+
+ template<typename Iterator,
+ hb_requires (hb_is_sorted_source_of (Iterator, hb_codepoint_t))>
+ bool serialize (hb_serialize_context_t *c,
+ Iterator glyphs,
+ unsigned delta)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely (!c->extend_min (*this))) return_trace (false);
+ if (unlikely (!coverage.serialize (c, this).serialize (c, glyphs))) return_trace (false);
+ c->check_assign (deltaGlyphID, delta);
+ return_trace (true);
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ const hb_set_t &glyphset = *c->plan->glyphset_gsub ();
+ const hb_map_t &glyph_map = *c->plan->glyph_map;
+
+ hb_codepoint_t delta = deltaGlyphID;
+
+ auto it =
+ + hb_iter (this+coverage)
+ | hb_filter (glyphset)
+ | hb_map_retains_sorting ([&] (hb_codepoint_t g) {
+ return hb_codepoint_pair_t (g,
+ (g + delta) & 0xFFFF); })
+ | hb_filter (glyphset, hb_second)
+ | hb_map_retains_sorting ([&] (hb_codepoint_pair_t p) -> hb_codepoint_pair_t
+ { return hb_pair (glyph_map[p.first], glyph_map[p.second]); })
+ ;
+
+ bool ret = bool (it);
+ SingleSubst_serialize (c->serializer, it);
+ return_trace (ret);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (coverage.sanitize (c, this) && deltaGlyphID.sanitize (c));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 1 */
+ OffsetTo<Coverage>
+ coverage; /* Offset to Coverage table--from
+ * beginning of Substitution table */
+ HBUINT16 deltaGlyphID; /* Add to original GlyphID to get
+ * substitute GlyphID, modulo 0x10000 */
+ public:
+ DEFINE_SIZE_STATIC (6);
+};
+
+struct SingleSubstFormat2
+{
+ bool intersects (const hb_set_t *glyphs) const
+ { return (this+coverage).intersects (glyphs); }
+
+ void closure (hb_closure_context_t *c) const
+ {
+ + hb_zip (this+coverage, substitute)
+ | hb_filter (*c->glyphs, hb_first)
+ | hb_map (hb_second)
+ | hb_sink (c->output)
+ ;
+ }
+
+ void closure_lookups (hb_closure_lookups_context_t *c) const {}
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c) const
+ {
+ if (unlikely (!(this+coverage).collect_coverage (c->input))) return;
+ + hb_zip (this+coverage, substitute)
+ | hb_map (hb_second)
+ | hb_sink (c->output)
+ ;
+ }
+
+ const Coverage &get_coverage () const { return this+coverage; }
+
+ bool would_apply (hb_would_apply_context_t *c) const
+ { return c->len == 1 && (this+coverage).get_coverage (c->glyphs[0]) != NOT_COVERED; }
+
+ bool apply (hb_ot_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+ unsigned int index = (this+coverage).get_coverage (c->buffer->cur().codepoint);
+ if (likely (index == NOT_COVERED)) return_trace (false);
+
+ if (unlikely (index >= substitute.len)) return_trace (false);
+
+ c->replace_glyph (substitute[index]);
+
+ return_trace (true);
+ }
+
+ template<typename Iterator,
+ hb_requires (hb_is_sorted_source_of (Iterator,
+ hb_codepoint_pair_t))>
+ bool serialize (hb_serialize_context_t *c,
+ Iterator it)
+ {
+ TRACE_SERIALIZE (this);
+ auto substitutes =
+ + it
+ | hb_map (hb_second)
+ ;
+ auto glyphs =
+ + it
+ | hb_map_retains_sorting (hb_first)
+ ;
+ if (unlikely (!c->extend_min (*this))) return_trace (false);
+ if (unlikely (!substitute.serialize (c, substitutes))) return_trace (false);
+ if (unlikely (!coverage.serialize (c, this).serialize (c, glyphs))) return_trace (false);
+ return_trace (true);
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ const hb_set_t &glyphset = *c->plan->glyphset_gsub ();
+ const hb_map_t &glyph_map = *c->plan->glyph_map;
+
+ auto it =
+ + hb_zip (this+coverage, substitute)
+ | hb_filter (glyphset, hb_first)
+ | hb_filter (glyphset, hb_second)
+ | hb_map_retains_sorting ([&] (hb_pair_t<hb_codepoint_t, const HBGlyphID &> p) -> hb_codepoint_pair_t
+ { return hb_pair (glyph_map[p.first], glyph_map[p.second]); })
+ ;
+
+ bool ret = bool (it);
+ SingleSubst_serialize (c->serializer, it);
+ return_trace (ret);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (coverage.sanitize (c, this) && substitute.sanitize (c));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 2 */
+ OffsetTo<Coverage>
+ coverage; /* Offset to Coverage table--from
+ * beginning of Substitution table */
+ ArrayOf<HBGlyphID>
+ substitute; /* Array of substitute
+ * GlyphIDs--ordered by Coverage Index */
+ public:
+ DEFINE_SIZE_ARRAY (6, substitute);
+};
+
+struct SingleSubst
+{
+
+ template<typename Iterator,
+ hb_requires (hb_is_sorted_source_of (Iterator,
+ const hb_codepoint_pair_t))>
+ bool serialize (hb_serialize_context_t *c,
+ Iterator glyphs)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely (!c->extend_min (u.format))) return_trace (false);
+ unsigned format = 2;
+ unsigned delta = 0;
+ if (glyphs)
+ {
+ format = 1;
+ auto get_delta = [=] (hb_codepoint_pair_t _)
+ { return (unsigned) (_.second - _.first) & 0xFFFF; };
+ delta = get_delta (*glyphs);
+ if (!hb_all (++(+glyphs), delta, get_delta)) format = 2;
+ }
+ u.format = format;
+ switch (u.format) {
+ case 1: return_trace (u.format1.serialize (c,
+ + glyphs
+ | hb_map_retains_sorting (hb_first),
+ delta));
+ case 2: return_trace (u.format2.serialize (c, glyphs));
+ default:return_trace (false);
+ }
+ }
+
+ template <typename context_t, typename ...Ts>
+ typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const
+ {
+ TRACE_DISPATCH (this, u.format);
+ if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ());
+ switch (u.format) {
+ case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...));
+ case 2: return_trace (c->dispatch (u.format2, hb_forward<Ts> (ds)...));
+ default:return_trace (c->default_return_value ());
+ }
+ }
+
+ protected:
+ union {
+ HBUINT16 format; /* Format identifier */
+ SingleSubstFormat1 format1;
+ SingleSubstFormat2 format2;
+ } u;
+};
+
+template<typename Iterator>
+static void
+SingleSubst_serialize (hb_serialize_context_t *c,
+ Iterator it)
+{ c->start_embed<SingleSubst> ()->serialize (c, it); }
+
+struct Sequence
+{
+ bool intersects (const hb_set_t *glyphs) const
+ { return hb_all (substitute, glyphs); }
+
+ void closure (hb_closure_context_t *c) const
+ { c->output->add_array (substitute.arrayZ, substitute.len); }
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c) const
+ { c->output->add_array (substitute.arrayZ, substitute.len); }
+
+ bool apply (hb_ot_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+ unsigned int count = substitute.len;
+
+ /* Special-case to make it in-place and not consider this
+ * as a "multiplied" substitution. */
+ if (unlikely (count == 1))
+ {
+ c->replace_glyph (substitute.arrayZ[0]);
+ return_trace (true);
+ }
+ /* Spec disallows this, but Uniscribe allows it.
+ * https://github.com/harfbuzz/harfbuzz/issues/253 */
+ else if (unlikely (count == 0))
+ {
+ c->buffer->delete_glyph ();
+ return_trace (true);
+ }
+
+ unsigned int klass = _hb_glyph_info_is_ligature (&c->buffer->cur()) ?
+ HB_OT_LAYOUT_GLYPH_PROPS_BASE_GLYPH : 0;
+
+ for (unsigned int i = 0; i < count; i++) {
+ _hb_glyph_info_set_lig_props_for_component (&c->buffer->cur(), i);
+ c->output_glyph_for_component (substitute.arrayZ[i], klass);
+ }
+ c->buffer->skip_glyph ();
+
+ return_trace (true);
+ }
+
+ template <typename Iterator,
+ hb_requires (hb_is_source_of (Iterator, hb_codepoint_t))>
+ bool serialize (hb_serialize_context_t *c,
+ Iterator subst)
+ {
+ TRACE_SERIALIZE (this);
+ return_trace (substitute.serialize (c, subst));
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ const hb_set_t &glyphset = *c->plan->glyphset ();
+ const hb_map_t &glyph_map = *c->plan->glyph_map;
+
+ if (!intersects (&glyphset)) return_trace (false);
+
+ auto it =
+ + hb_iter (substitute)
+ | hb_map (glyph_map)
+ ;
+
+ auto *out = c->serializer->start_embed (*this);
+ return_trace (out->serialize (c->serializer, it));
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (substitute.sanitize (c));
+ }
+
+ protected:
+ ArrayOf<HBGlyphID>
+ substitute; /* String of GlyphIDs to substitute */
+ public:
+ DEFINE_SIZE_ARRAY (2, substitute);
+};
+
+struct MultipleSubstFormat1
+{
+ bool intersects (const hb_set_t *glyphs) const
+ { return (this+coverage).intersects (glyphs); }
+
+ void closure (hb_closure_context_t *c) const
+ {
+ + hb_zip (this+coverage, sequence)
+ | hb_filter (*c->glyphs, hb_first)
+ | hb_map (hb_second)
+ | hb_map (hb_add (this))
+ | hb_apply ([c] (const Sequence &_) { _.closure (c); })
+ ;
+ }
+
+ void closure_lookups (hb_closure_lookups_context_t *c) const {}
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c) const
+ {
+ if (unlikely (!(this+coverage).collect_coverage (c->input))) return;
+ + hb_zip (this+coverage, sequence)
+ | hb_map (hb_second)
+ | hb_map (hb_add (this))
+ | hb_apply ([c] (const Sequence &_) { _.collect_glyphs (c); })
+ ;
+ }
+
+ const Coverage &get_coverage () const { return this+coverage; }
+
+ bool would_apply (hb_would_apply_context_t *c) const
+ { return c->len == 1 && (this+coverage).get_coverage (c->glyphs[0]) != NOT_COVERED; }
+
+ bool apply (hb_ot_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+
+ unsigned int index = (this+coverage).get_coverage (c->buffer->cur().codepoint);
+ if (likely (index == NOT_COVERED)) return_trace (false);
+
+ return_trace ((this+sequence[index]).apply (c));
+ }
+
+ bool serialize (hb_serialize_context_t *c,
+ hb_sorted_array_t<const HBGlyphID> glyphs,
+ hb_array_t<const unsigned int> substitute_len_list,
+ hb_array_t<const HBGlyphID> substitute_glyphs_list)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely (!c->extend_min (*this))) return_trace (false);
+ if (unlikely (!sequence.serialize (c, glyphs.length))) return_trace (false);
+ for (unsigned int i = 0; i < glyphs.length; i++)
+ {
+ unsigned int substitute_len = substitute_len_list[i];
+ if (unlikely (!sequence[i].serialize (c, this)
+ .serialize (c, substitute_glyphs_list.sub_array (0, substitute_len))))
+ return_trace (false);
+ substitute_glyphs_list += substitute_len;
+ }
+ return_trace (coverage.serialize (c, this).serialize (c, glyphs));
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ const hb_set_t &glyphset = *c->plan->glyphset ();
+ const hb_map_t &glyph_map = *c->plan->glyph_map;
+
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!c->serializer->extend_min (out))) return_trace (false);
+ out->format = format;
+
+ hb_sorted_vector_t<hb_codepoint_t> new_coverage;
+ + hb_zip (this+coverage, sequence)
+ | hb_filter (glyphset, hb_first)
+ | hb_filter (subset_offset_array (c, out->sequence, this), hb_second)
+ | hb_map (hb_first)
+ | hb_map (glyph_map)
+ | hb_sink (new_coverage)
+ ;
+ out->coverage.serialize (c->serializer, out)
+ .serialize (c->serializer, new_coverage.iter ());
+ return_trace (bool (new_coverage));
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (coverage.sanitize (c, this) && sequence.sanitize (c, this));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 1 */
+ OffsetTo<Coverage>
+ coverage; /* Offset to Coverage table--from
+ * beginning of Substitution table */
+ OffsetArrayOf<Sequence>
+ sequence; /* Array of Sequence tables
+ * ordered by Coverage Index */
+ public:
+ DEFINE_SIZE_ARRAY (6, sequence);
+};
+
+struct MultipleSubst
+{
+ bool serialize (hb_serialize_context_t *c,
+ hb_sorted_array_t<const HBGlyphID> glyphs,
+ hb_array_t<const unsigned int> substitute_len_list,
+ hb_array_t<const HBGlyphID> substitute_glyphs_list)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely (!c->extend_min (u.format))) return_trace (false);
+ unsigned int format = 1;
+ u.format = format;
+ switch (u.format) {
+ case 1: return_trace (u.format1.serialize (c, glyphs, substitute_len_list, substitute_glyphs_list));
+ default:return_trace (false);
+ }
+ }
+
+ template <typename context_t, typename ...Ts>
+ typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const
+ {
+ TRACE_DISPATCH (this, u.format);
+ if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ());
+ switch (u.format) {
+ case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...));
+ default:return_trace (c->default_return_value ());
+ }
+ }
+
+ protected:
+ union {
+ HBUINT16 format; /* Format identifier */
+ MultipleSubstFormat1 format1;
+ } u;
+};
+
+struct AlternateSet
+{
+ bool intersects (const hb_set_t *glyphs) const
+ { return hb_any (alternates, glyphs); }
+
+ void closure (hb_closure_context_t *c) const
+ { c->output->add_array (alternates.arrayZ, alternates.len); }
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c) const
+ { c->output->add_array (alternates.arrayZ, alternates.len); }
+
+ bool apply (hb_ot_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+ unsigned int count = alternates.len;
+
+ if (unlikely (!count)) return_trace (false);
+
+ hb_mask_t glyph_mask = c->buffer->cur().mask;
+ hb_mask_t lookup_mask = c->lookup_mask;
+
+ /* Note: This breaks badly if two features enabled this lookup together. */
+ unsigned int shift = hb_ctz (lookup_mask);
+ unsigned int alt_index = ((lookup_mask & glyph_mask) >> shift);
+
+ /* If alt_index is MAX_VALUE, randomize feature if it is the rand feature. */
+ if (alt_index == HB_OT_MAP_MAX_VALUE && c->random)
+ alt_index = c->random_number () % count + 1;
+
+ if (unlikely (alt_index > count || alt_index == 0)) return_trace (false);
+
+ c->replace_glyph (alternates[alt_index - 1]);
+
+ return_trace (true);
+ }
+
+ unsigned
+ get_alternates (unsigned start_offset,
+ unsigned *alternate_count /* IN/OUT. May be NULL. */,
+ hb_codepoint_t *alternate_glyphs /* OUT. May be NULL. */) const
+ {
+ if (alternates.len && alternate_count)
+ {
+ + alternates.sub_array (start_offset, alternate_count)
+ | hb_sink (hb_array (alternate_glyphs, *alternate_count))
+ ;
+ }
+ return alternates.len;
+ }
+
+ template <typename Iterator,
+ hb_requires (hb_is_source_of (Iterator, hb_codepoint_t))>
+ bool serialize (hb_serialize_context_t *c,
+ Iterator alts)
+ {
+ TRACE_SERIALIZE (this);
+ return_trace (alternates.serialize (c, alts));
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ const hb_set_t &glyphset = *c->plan->glyphset ();
+ const hb_map_t &glyph_map = *c->plan->glyph_map;
+
+ auto it =
+ + hb_iter (alternates)
+ | hb_filter (glyphset)
+ | hb_map (glyph_map)
+ ;
+
+ auto *out = c->serializer->start_embed (*this);
+ return_trace (out->serialize (c->serializer, it) &&
+ out->alternates);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (alternates.sanitize (c));
+ }
+
+ protected:
+ ArrayOf<HBGlyphID>
+ alternates; /* Array of alternate GlyphIDs--in
+ * arbitrary order */
+ public:
+ DEFINE_SIZE_ARRAY (2, alternates);
+};
+
+struct AlternateSubstFormat1
+{
+ bool intersects (const hb_set_t *glyphs) const
+ { return (this+coverage).intersects (glyphs); }
+
+ void closure (hb_closure_context_t *c) const
+ {
+ + hb_zip (this+coverage, alternateSet)
+ | hb_filter (c->glyphs, hb_first)
+ | hb_map (hb_second)
+ | hb_map (hb_add (this))
+ | hb_apply ([c] (const AlternateSet &_) { _.closure (c); })
+ ;
+ }
+
+ void closure_lookups (hb_closure_lookups_context_t *c) const {}
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c) const
+ {
+ if (unlikely (!(this+coverage).collect_coverage (c->input))) return;
+ + hb_zip (this+coverage, alternateSet)
+ | hb_map (hb_second)
+ | hb_map (hb_add (this))
+ | hb_apply ([c] (const AlternateSet &_) { _.collect_glyphs (c); })
+ ;
+ }
+
+ const Coverage &get_coverage () const { return this+coverage; }
+
+ bool would_apply (hb_would_apply_context_t *c) const
+ { return c->len == 1 && (this+coverage).get_coverage (c->glyphs[0]) != NOT_COVERED; }
+
+ unsigned
+ get_glyph_alternates (hb_codepoint_t gid,
+ unsigned start_offset,
+ unsigned *alternate_count /* IN/OUT. May be NULL. */,
+ hb_codepoint_t *alternate_glyphs /* OUT. May be NULL. */) const
+ { return (this+alternateSet[(this+coverage).get_coverage (gid)])
+ .get_alternates (start_offset, alternate_count, alternate_glyphs); }
+
+ bool apply (hb_ot_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+
+ unsigned int index = (this+coverage).get_coverage (c->buffer->cur().codepoint);
+ if (likely (index == NOT_COVERED)) return_trace (false);
+
+ return_trace ((this+alternateSet[index]).apply (c));
+ }
+
+ bool serialize (hb_serialize_context_t *c,
+ hb_sorted_array_t<const HBGlyphID> glyphs,
+ hb_array_t<const unsigned int> alternate_len_list,
+ hb_array_t<const HBGlyphID> alternate_glyphs_list)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely (!c->extend_min (*this))) return_trace (false);
+ if (unlikely (!alternateSet.serialize (c, glyphs.length))) return_trace (false);
+ for (unsigned int i = 0; i < glyphs.length; i++)
+ {
+ unsigned int alternate_len = alternate_len_list[i];
+ if (unlikely (!alternateSet[i].serialize (c, this)
+ .serialize (c, alternate_glyphs_list.sub_array (0, alternate_len))))
+ return_trace (false);
+ alternate_glyphs_list += alternate_len;
+ }
+ return_trace (coverage.serialize (c, this).serialize (c, glyphs));
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ const hb_set_t &glyphset = *c->plan->glyphset ();
+ const hb_map_t &glyph_map = *c->plan->glyph_map;
+
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!c->serializer->extend_min (out))) return_trace (false);
+ out->format = format;
+
+ hb_sorted_vector_t<hb_codepoint_t> new_coverage;
+ + hb_zip (this+coverage, alternateSet)
+ | hb_filter (glyphset, hb_first)
+ | hb_filter (subset_offset_array (c, out->alternateSet, this), hb_second)
+ | hb_map (hb_first)
+ | hb_map (glyph_map)
+ | hb_sink (new_coverage)
+ ;
+ out->coverage.serialize (c->serializer, out)
+ .serialize (c->serializer, new_coverage.iter ());
+ return_trace (bool (new_coverage));
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (coverage.sanitize (c, this) && alternateSet.sanitize (c, this));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 1 */
+ OffsetTo<Coverage>
+ coverage; /* Offset to Coverage table--from
+ * beginning of Substitution table */
+ OffsetArrayOf<AlternateSet>
+ alternateSet; /* Array of AlternateSet tables
+ * ordered by Coverage Index */
+ public:
+ DEFINE_SIZE_ARRAY (6, alternateSet);
+};
+
+struct AlternateSubst
+{
+ bool serialize (hb_serialize_context_t *c,
+ hb_sorted_array_t<const HBGlyphID> glyphs,
+ hb_array_t<const unsigned int> alternate_len_list,
+ hb_array_t<const HBGlyphID> alternate_glyphs_list)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely (!c->extend_min (u.format))) return_trace (false);
+ unsigned int format = 1;
+ u.format = format;
+ switch (u.format) {
+ case 1: return_trace (u.format1.serialize (c, glyphs, alternate_len_list, alternate_glyphs_list));
+ default:return_trace (false);
+ }
+ }
+
+ template <typename context_t, typename ...Ts>
+ typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const
+ {
+ TRACE_DISPATCH (this, u.format);
+ if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ());
+ switch (u.format) {
+ case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...));
+ default:return_trace (c->default_return_value ());
+ }
+ }
+
+ protected:
+ union {
+ HBUINT16 format; /* Format identifier */
+ AlternateSubstFormat1 format1;
+ } u;
+};
+
+
+struct Ligature
+{
+ bool intersects (const hb_set_t *glyphs) const
+ { return hb_all (component, glyphs); }
+
+ void closure (hb_closure_context_t *c) const
+ {
+ if (!intersects (c->glyphs)) return;
+ c->output->add (ligGlyph);
+ }
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c) const
+ {
+ c->input->add_array (component.arrayZ, component.get_length ());
+ c->output->add (ligGlyph);
+ }
+
+ bool would_apply (hb_would_apply_context_t *c) const
+ {
+ if (c->len != component.lenP1)
+ return false;
+
+ for (unsigned int i = 1; i < c->len; i++)
+ if (likely (c->glyphs[i] != component[i]))
+ return false;
+
+ return true;
+ }
+
+ bool apply (hb_ot_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+ unsigned int count = component.lenP1;
+
+ if (unlikely (!count)) return_trace (false);
+
+ /* Special-case to make it in-place and not consider this
+ * as a "ligated" substitution. */
+ if (unlikely (count == 1))
+ {
+ c->replace_glyph (ligGlyph);
+ return_trace (true);
+ }
+
+ unsigned int total_component_count = 0;
+
+ unsigned int match_length = 0;
+ unsigned int match_positions[HB_MAX_CONTEXT_LENGTH];
+
+ if (likely (!match_input (c, count,
+ &component[1],
+ match_glyph,
+ nullptr,
+ &match_length,
+ match_positions,
+ &total_component_count)))
+ return_trace (false);
+
+ ligate_input (c,
+ count,
+ match_positions,
+ match_length,
+ ligGlyph,
+ total_component_count);
+
+ return_trace (true);
+ }
+
+ template <typename Iterator,
+ hb_requires (hb_is_source_of (Iterator, hb_codepoint_t))>
+ bool serialize (hb_serialize_context_t *c,
+ hb_codepoint_t ligature,
+ Iterator components /* Starting from second */)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely (!c->extend_min (*this))) return_trace (false);
+ ligGlyph = ligature;
+ if (unlikely (!component.serialize (c, components))) return_trace (false);
+ return_trace (true);
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ const hb_set_t &glyphset = *c->plan->glyphset ();
+ const hb_map_t &glyph_map = *c->plan->glyph_map;
+
+ if (!intersects (&glyphset) || !glyphset.has (ligGlyph)) return_trace (false);
+
+ auto it =
+ + hb_iter (component)
+ | hb_map (glyph_map)
+ ;
+
+ auto *out = c->serializer->start_embed (*this);
+ return_trace (out->serialize (c->serializer,
+ glyph_map[ligGlyph],
+ it));
+ }
+
+ public:
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (ligGlyph.sanitize (c) && component.sanitize (c));
+ }
+
+ protected:
+ HBGlyphID ligGlyph; /* GlyphID of ligature to substitute */
+ HeadlessArrayOf<HBGlyphID>
+ component; /* Array of component GlyphIDs--start
+ * with the second component--ordered
+ * in writing direction */
+ public:
+ DEFINE_SIZE_ARRAY (4, component);
+};
+
+struct LigatureSet
+{
+ bool intersects (const hb_set_t *glyphs) const
+ {
+ return
+ + hb_iter (ligature)
+ | hb_map (hb_add (this))
+ | hb_map ([glyphs] (const Ligature &_) { return _.intersects (glyphs); })
+ | hb_any
+ ;
+ }
+
+ void closure (hb_closure_context_t *c) const
+ {
+ + hb_iter (ligature)
+ | hb_map (hb_add (this))
+ | hb_apply ([c] (const Ligature &_) { _.closure (c); })
+ ;
+ }
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c) const
+ {
+ + hb_iter (ligature)
+ | hb_map (hb_add (this))
+ | hb_apply ([c] (const Ligature &_) { _.collect_glyphs (c); })
+ ;
+ }
+
+ bool would_apply (hb_would_apply_context_t *c) const
+ {
+ return
+ + hb_iter (ligature)
+ | hb_map (hb_add (this))
+ | hb_map ([c] (const Ligature &_) { return _.would_apply (c); })
+ | hb_any
+ ;
+ }
+
+ bool apply (hb_ot_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+ unsigned int num_ligs = ligature.len;
+ for (unsigned int i = 0; i < num_ligs; i++)
+ {
+ const Ligature &lig = this+ligature[i];
+ if (lig.apply (c)) return_trace (true);
+ }
+
+ return_trace (false);
+ }
+
+ bool serialize (hb_serialize_context_t *c,
+ hb_array_t<const HBGlyphID> ligatures,
+ hb_array_t<const unsigned int> component_count_list,
+ hb_array_t<const HBGlyphID> &component_list /* Starting from second for each ligature */)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely (!c->extend_min (*this))) return_trace (false);
+ if (unlikely (!ligature.serialize (c, ligatures.length))) return_trace (false);
+ for (unsigned int i = 0; i < ligatures.length; i++)
+ {
+ unsigned int component_count = (unsigned) hb_max ((int) component_count_list[i] - 1, 0);
+ if (unlikely (!ligature[i].serialize (c, this)
+ .serialize (c,
+ ligatures[i],
+ component_list.sub_array (0, component_count))))
+ return_trace (false);
+ component_list += component_count;
+ }
+ return_trace (true);
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!c->serializer->extend_min (out))) return_trace (false);
+
+ + hb_iter (ligature)
+ | hb_filter (subset_offset_array (c, out->ligature, this))
+ | hb_drain
+ ;
+ return_trace (bool (out->ligature));
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (ligature.sanitize (c, this));
+ }
+
+ protected:
+ OffsetArrayOf<Ligature>
+ ligature; /* Array LigatureSet tables
+ * ordered by preference */
+ public:
+ DEFINE_SIZE_ARRAY (2, ligature);
+};
+
+struct LigatureSubstFormat1
+{
+ bool intersects (const hb_set_t *glyphs) const
+ {
+ return
+ + hb_zip (this+coverage, ligatureSet)
+ | hb_filter (*glyphs, hb_first)
+ | hb_map (hb_second)
+ | hb_map ([this, glyphs] (const OffsetTo<LigatureSet> &_)
+ { return (this+_).intersects (glyphs); })
+ | hb_any
+ ;
+ }
+
+ void closure (hb_closure_context_t *c) const
+ {
+ + hb_zip (this+coverage, ligatureSet)
+ | hb_filter (*c->glyphs, hb_first)
+ | hb_map (hb_second)
+ | hb_map (hb_add (this))
+ | hb_apply ([c] (const LigatureSet &_) { _.closure (c); })
+ ;
+ }
+
+ void closure_lookups (hb_closure_lookups_context_t *c) const {}
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c) const
+ {
+ if (unlikely (!(this+coverage).collect_coverage (c->input))) return;
+
+ + hb_zip (this+coverage, ligatureSet)
+ | hb_map (hb_second)
+ | hb_map (hb_add (this))
+ | hb_apply ([c] (const LigatureSet &_) { _.collect_glyphs (c); })
+ ;
+ }
+
+ const Coverage &get_coverage () const { return this+coverage; }
+
+ bool would_apply (hb_would_apply_context_t *c) const
+ {
+ unsigned int index = (this+coverage).get_coverage (c->glyphs[0]);
+ if (likely (index == NOT_COVERED)) return false;
+
+ const LigatureSet &lig_set = this+ligatureSet[index];
+ return lig_set.would_apply (c);
+ }
+
+ bool apply (hb_ot_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+
+ unsigned int index = (this+coverage).get_coverage (c->buffer->cur ().codepoint);
+ if (likely (index == NOT_COVERED)) return_trace (false);
+
+ const LigatureSet &lig_set = this+ligatureSet[index];
+ return_trace (lig_set.apply (c));
+ }
+
+ bool serialize (hb_serialize_context_t *c,
+ hb_sorted_array_t<const HBGlyphID> first_glyphs,
+ hb_array_t<const unsigned int> ligature_per_first_glyph_count_list,
+ hb_array_t<const HBGlyphID> ligatures_list,
+ hb_array_t<const unsigned int> component_count_list,
+ hb_array_t<const HBGlyphID> component_list /* Starting from second for each ligature */)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely (!c->extend_min (*this))) return_trace (false);
+ if (unlikely (!ligatureSet.serialize (c, first_glyphs.length))) return_trace (false);
+ for (unsigned int i = 0; i < first_glyphs.length; i++)
+ {
+ unsigned int ligature_count = ligature_per_first_glyph_count_list[i];
+ if (unlikely (!ligatureSet[i].serialize (c, this)
+ .serialize (c,
+ ligatures_list.sub_array (0, ligature_count),
+ component_count_list.sub_array (0, ligature_count),
+ component_list))) return_trace (false);
+ ligatures_list += ligature_count;
+ component_count_list += ligature_count;
+ }
+ return_trace (coverage.serialize (c, this).serialize (c, first_glyphs));
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ const hb_set_t &glyphset = *c->plan->glyphset ();
+ const hb_map_t &glyph_map = *c->plan->glyph_map;
+
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!c->serializer->extend_min (out))) return_trace (false);
+ out->format = format;
+
+ hb_sorted_vector_t<hb_codepoint_t> new_coverage;
+ + hb_zip (this+coverage, ligatureSet)
+ | hb_filter (glyphset, hb_first)
+ | hb_filter (subset_offset_array (c, out->ligatureSet, this), hb_second)
+ | hb_map (hb_first)
+ | hb_map (glyph_map)
+ | hb_sink (new_coverage)
+ ;
+ out->coverage.serialize (c->serializer, out)
+ .serialize (c->serializer, new_coverage.iter ());
+ return_trace (bool (new_coverage));
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (coverage.sanitize (c, this) && ligatureSet.sanitize (c, this));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 1 */
+ OffsetTo<Coverage>
+ coverage; /* Offset to Coverage table--from
+ * beginning of Substitution table */
+ OffsetArrayOf<LigatureSet>
+ ligatureSet; /* Array LigatureSet tables
+ * ordered by Coverage Index */
+ public:
+ DEFINE_SIZE_ARRAY (6, ligatureSet);
+};
+
+struct LigatureSubst
+{
+ bool serialize (hb_serialize_context_t *c,
+ hb_sorted_array_t<const HBGlyphID> first_glyphs,
+ hb_array_t<const unsigned int> ligature_per_first_glyph_count_list,
+ hb_array_t<const HBGlyphID> ligatures_list,
+ hb_array_t<const unsigned int> component_count_list,
+ hb_array_t<const HBGlyphID> component_list /* Starting from second for each ligature */)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely (!c->extend_min (u.format))) return_trace (false);
+ unsigned int format = 1;
+ u.format = format;
+ switch (u.format) {
+ case 1: return_trace (u.format1.serialize (c,
+ first_glyphs,
+ ligature_per_first_glyph_count_list,
+ ligatures_list,
+ component_count_list,
+ component_list));
+ default:return_trace (false);
+ }
+ }
+
+ template <typename context_t, typename ...Ts>
+ typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const
+ {
+ TRACE_DISPATCH (this, u.format);
+ if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ());
+ switch (u.format) {
+ case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...));
+ default:return_trace (c->default_return_value ());
+ }
+ }
+
+ protected:
+ union {
+ HBUINT16 format; /* Format identifier */
+ LigatureSubstFormat1 format1;
+ } u;
+};
+
+
+struct ContextSubst : Context {};
+
+struct ChainContextSubst : ChainContext {};
+
+struct ExtensionSubst : Extension<ExtensionSubst>
+{
+ typedef struct SubstLookupSubTable SubTable;
+ bool is_reverse () const;
+};
+
+
+struct ReverseChainSingleSubstFormat1
+{
+ bool intersects (const hb_set_t *glyphs) const
+ {
+ if (!(this+coverage).intersects (glyphs))
+ return false;
+
+ const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage>> (backtrack);
+
+ unsigned int count;
+
+ count = backtrack.len;
+ for (unsigned int i = 0; i < count; i++)
+ if (!(this+backtrack[i]).intersects (glyphs))
+ return false;
+
+ count = lookahead.len;
+ for (unsigned int i = 0; i < count; i++)
+ if (!(this+lookahead[i]).intersects (glyphs))
+ return false;
+
+ return true;
+ }
+
+ void closure (hb_closure_context_t *c) const
+ {
+ if (!intersects (c->glyphs)) return;
+
+ const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage>> (backtrack);
+ const ArrayOf<HBGlyphID> &substitute = StructAfter<ArrayOf<HBGlyphID>> (lookahead);
+
+ + hb_zip (this+coverage, substitute)
+ | hb_filter (*c->glyphs, hb_first)
+ | hb_map (hb_second)
+ | hb_sink (c->output)
+ ;
+ }
+
+ void closure_lookups (hb_closure_lookups_context_t *c) const {}
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c) const
+ {
+ if (unlikely (!(this+coverage).collect_coverage (c->input))) return;
+
+ unsigned int count;
+
+ count = backtrack.len;
+ for (unsigned int i = 0; i < count; i++)
+ if (unlikely (!(this+backtrack[i]).collect_coverage (c->before))) return;
+
+ const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage>> (backtrack);
+ count = lookahead.len;
+ for (unsigned int i = 0; i < count; i++)
+ if (unlikely (!(this+lookahead[i]).collect_coverage (c->after))) return;
+
+ const ArrayOf<HBGlyphID> &substitute = StructAfter<ArrayOf<HBGlyphID>> (lookahead);
+ count = substitute.len;
+ c->output->add_array (substitute.arrayZ, substitute.len);
+ }
+
+ const Coverage &get_coverage () const { return this+coverage; }
+
+ bool would_apply (hb_would_apply_context_t *c) const
+ { return c->len == 1 && (this+coverage).get_coverage (c->glyphs[0]) != NOT_COVERED; }
+
+ bool apply (hb_ot_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+ if (unlikely (c->nesting_level_left != HB_MAX_NESTING_LEVEL))
+ return_trace (false); /* No chaining to this type */
+
+ unsigned int index = (this+coverage).get_coverage (c->buffer->cur ().codepoint);
+ if (likely (index == NOT_COVERED)) return_trace (false);
+
+ const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage>> (backtrack);
+ const ArrayOf<HBGlyphID> &substitute = StructAfter<ArrayOf<HBGlyphID>> (lookahead);
+
+ if (unlikely (index >= substitute.len)) return_trace (false);
+
+ unsigned int start_index = 0, end_index = 0;
+ if (match_backtrack (c,
+ backtrack.len, (HBUINT16 *) backtrack.arrayZ,
+ match_coverage, this,
+ &start_index) &&
+ match_lookahead (c,
+ lookahead.len, (HBUINT16 *) lookahead.arrayZ,
+ match_coverage, this,
+ 1, &end_index))
+ {
+ c->buffer->unsafe_to_break_from_outbuffer (start_index, end_index);
+ c->replace_glyph_inplace (substitute[index]);
+ /* Note: We DON'T decrease buffer->idx. The main loop does it
+ * for us. This is useful for preventing surprises if someone
+ * calls us through a Context lookup. */
+ return_trace (true);
+ }
+
+ return_trace (false);
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ // TODO(subset)
+ return_trace (false);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (!(coverage.sanitize (c, this) && backtrack.sanitize (c, this)))
+ return_trace (false);
+ const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage>> (backtrack);
+ if (!lookahead.sanitize (c, this))
+ return_trace (false);
+ const ArrayOf<HBGlyphID> &substitute = StructAfter<ArrayOf<HBGlyphID>> (lookahead);
+ return_trace (substitute.sanitize (c));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 1 */
+ OffsetTo<Coverage>
+ coverage; /* Offset to Coverage table--from
+ * beginning of table */
+ OffsetArrayOf<Coverage>
+ backtrack; /* Array of coverage tables
+ * in backtracking sequence, in glyph
+ * sequence order */
+ OffsetArrayOf<Coverage>
+ lookaheadX; /* Array of coverage tables
+ * in lookahead sequence, in glyph
+ * sequence order */
+ ArrayOf<HBGlyphID>
+ substituteX; /* Array of substitute
+ * GlyphIDs--ordered by Coverage Index */
+ public:
+ DEFINE_SIZE_MIN (10);
+};
+
+struct ReverseChainSingleSubst
+{
+ template <typename context_t, typename ...Ts>
+ typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const
+ {
+ TRACE_DISPATCH (this, u.format);
+ if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ());
+ switch (u.format) {
+ case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...));
+ default:return_trace (c->default_return_value ());
+ }
+ }
+
+ protected:
+ union {
+ HBUINT16 format; /* Format identifier */
+ ReverseChainSingleSubstFormat1 format1;
+ } u;
+};
+
+
+
+/*
+ * SubstLookup
+ */
+
+struct SubstLookupSubTable
+{
+ friend struct Lookup;
+ friend struct SubstLookup;
+
+ enum Type {
+ Single = 1,
+ Multiple = 2,
+ Alternate = 3,
+ Ligature = 4,
+ Context = 5,
+ ChainContext = 6,
+ Extension = 7,
+ ReverseChainSingle = 8
+ };
+
+ template <typename context_t, typename ...Ts>
+ typename context_t::return_t dispatch (context_t *c, unsigned int lookup_type, Ts&&... ds) const
+ {
+ TRACE_DISPATCH (this, lookup_type);
+ switch (lookup_type) {
+ case Single: return_trace (u.single.dispatch (c, hb_forward<Ts> (ds)...));
+ case Multiple: return_trace (u.multiple.dispatch (c, hb_forward<Ts> (ds)...));
+ case Alternate: return_trace (u.alternate.dispatch (c, hb_forward<Ts> (ds)...));
+ case Ligature: return_trace (u.ligature.dispatch (c, hb_forward<Ts> (ds)...));
+ case Context: return_trace (u.context.dispatch (c, hb_forward<Ts> (ds)...));
+ case ChainContext: return_trace (u.chainContext.dispatch (c, hb_forward<Ts> (ds)...));
+ case Extension: return_trace (u.extension.dispatch (c, hb_forward<Ts> (ds)...));
+ case ReverseChainSingle: return_trace (u.reverseChainContextSingle.dispatch (c, hb_forward<Ts> (ds)...));
+ default: return_trace (c->default_return_value ());
+ }
+ }
+
+ bool intersects (const hb_set_t *glyphs, unsigned int lookup_type) const
+ {
+ hb_intersects_context_t c (glyphs);
+ return dispatch (&c, lookup_type);
+ }
+
+ protected:
+ union {
+ SingleSubst single;
+ MultipleSubst multiple;
+ AlternateSubst alternate;
+ LigatureSubst ligature;
+ ContextSubst context;
+ ChainContextSubst chainContext;
+ ExtensionSubst extension;
+ ReverseChainSingleSubst reverseChainContextSingle;
+ } u;
+ public:
+ DEFINE_SIZE_MIN (0);
+};
+
+
+struct SubstLookup : Lookup
+{
+ typedef SubstLookupSubTable SubTable;
+
+ const SubTable& get_subtable (unsigned int i) const
+ { return Lookup::get_subtable<SubTable> (i); }
+
+ static inline bool lookup_type_is_reverse (unsigned int lookup_type)
+ { return lookup_type == SubTable::ReverseChainSingle; }
+
+ bool is_reverse () const
+ {
+ unsigned int type = get_type ();
+ if (unlikely (type == SubTable::Extension))
+ return reinterpret_cast<const ExtensionSubst &> (get_subtable (0)).is_reverse ();
+ return lookup_type_is_reverse (type);
+ }
+
+ bool apply (hb_ot_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+ return_trace (dispatch (c));
+ }
+
+ bool intersects (const hb_set_t *glyphs) const
+ {
+ hb_intersects_context_t c (glyphs);
+ return dispatch (&c);
+ }
+
+ hb_closure_context_t::return_t closure (hb_closure_context_t *c, unsigned int this_index) const
+ {
+ if (!c->should_visit_lookup (this_index))
+ return hb_closure_context_t::default_return_value ();
+
+ c->set_recurse_func (dispatch_closure_recurse_func);
+
+ hb_closure_context_t::return_t ret = dispatch (c);
+
+ c->flush ();
+
+ return ret;
+ }
+
+ hb_closure_lookups_context_t::return_t closure_lookups (hb_closure_lookups_context_t *c, unsigned this_index) const
+ {
+ if (c->is_lookup_visited (this_index))
+ return hb_closure_lookups_context_t::default_return_value ();
+
+ c->set_lookup_visited (this_index);
+ if (!intersects (c->glyphs))
+ {
+ c->set_lookup_inactive (this_index);
+ return hb_closure_lookups_context_t::default_return_value ();
+ }
+
+ c->set_recurse_func (dispatch_closure_lookups_recurse_func);
+
+ hb_closure_lookups_context_t::return_t ret = dispatch (c);
+ return ret;
+ }
+
+ hb_collect_glyphs_context_t::return_t collect_glyphs (hb_collect_glyphs_context_t *c) const
+ {
+ c->set_recurse_func (dispatch_recurse_func<hb_collect_glyphs_context_t>);
+ return dispatch (c);
+ }
+
+ template <typename set_t>
+ void collect_coverage (set_t *glyphs) const
+ {
+ hb_collect_coverage_context_t<set_t> c (glyphs);
+ dispatch (&c);
+ }
+
+ bool would_apply (hb_would_apply_context_t *c,
+ const hb_ot_layout_lookup_accelerator_t *accel) const
+ {
+ if (unlikely (!c->len)) return false;
+ if (!accel->may_have (c->glyphs[0])) return false;
+ return dispatch (c);
+ }
+
+ static inline bool apply_recurse_func (hb_ot_apply_context_t *c, unsigned int lookup_index);
+
+ SubTable& serialize_subtable (hb_serialize_context_t *c,
+ unsigned int i)
+ { return get_subtables<SubTable> ()[i].serialize (c, this); }
+
+ bool serialize_single (hb_serialize_context_t *c,
+ uint32_t lookup_props,
+ hb_sorted_array_t<const HBGlyphID> glyphs,
+ hb_array_t<const HBGlyphID> substitutes)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely (!Lookup::serialize (c, SubTable::Single, lookup_props, 1))) return_trace (false);
+ return_trace (serialize_subtable (c, 0).u.single.
+ serialize (c, hb_zip (glyphs, substitutes)));
+ }
+
+ bool serialize_multiple (hb_serialize_context_t *c,
+ uint32_t lookup_props,
+ hb_sorted_array_t<const HBGlyphID> glyphs,
+ hb_array_t<const unsigned int> substitute_len_list,
+ hb_array_t<const HBGlyphID> substitute_glyphs_list)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely (!Lookup::serialize (c, SubTable::Multiple, lookup_props, 1))) return_trace (false);
+ return_trace (serialize_subtable (c, 0).u.multiple.
+ serialize (c,
+ glyphs,
+ substitute_len_list,
+ substitute_glyphs_list));
+ }
+
+ bool serialize_alternate (hb_serialize_context_t *c,
+ uint32_t lookup_props,
+ hb_sorted_array_t<const HBGlyphID> glyphs,
+ hb_array_t<const unsigned int> alternate_len_list,
+ hb_array_t<const HBGlyphID> alternate_glyphs_list)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely (!Lookup::serialize (c, SubTable::Alternate, lookup_props, 1))) return_trace (false);
+ return_trace (serialize_subtable (c, 0).u.alternate.
+ serialize (c,
+ glyphs,
+ alternate_len_list,
+ alternate_glyphs_list));
+ }
+
+ bool serialize_ligature (hb_serialize_context_t *c,
+ uint32_t lookup_props,
+ hb_sorted_array_t<const HBGlyphID> first_glyphs,
+ hb_array_t<const unsigned int> ligature_per_first_glyph_count_list,
+ hb_array_t<const HBGlyphID> ligatures_list,
+ hb_array_t<const unsigned int> component_count_list,
+ hb_array_t<const HBGlyphID> component_list /* Starting from second for each ligature */)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely (!Lookup::serialize (c, SubTable::Ligature, lookup_props, 1))) return_trace (false);
+ return_trace (serialize_subtable (c, 0).u.ligature.
+ serialize (c,
+ first_glyphs,
+ ligature_per_first_glyph_count_list,
+ ligatures_list,
+ component_count_list,
+ component_list));
+ }
+
+ template <typename context_t>
+ static inline typename context_t::return_t dispatch_recurse_func (context_t *c, unsigned int lookup_index);
+
+ static inline hb_closure_context_t::return_t dispatch_closure_recurse_func (hb_closure_context_t *c, unsigned int lookup_index)
+ {
+ if (!c->should_visit_lookup (lookup_index))
+ return hb_empty_t ();
+
+ hb_closure_context_t::return_t ret = dispatch_recurse_func (c, lookup_index);
+
+ /* While in theory we should flush here, it will cause timeouts because a recursive
+ * lookup can keep growing the glyph set. Skip, and outer loop will retry up to
+ * HB_CLOSURE_MAX_STAGES time, which should be enough for every realistic font. */
+ //c->flush ();
+
+ return ret;
+ }
+
+ HB_INTERNAL static hb_closure_lookups_context_t::return_t dispatch_closure_lookups_recurse_func (hb_closure_lookups_context_t *c, unsigned lookup_index);
+
+ template <typename context_t, typename ...Ts>
+ typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const
+ { return Lookup::dispatch<SubTable> (c, hb_forward<Ts> (ds)...); }
+
+ bool subset (hb_subset_context_t *c) const
+ { return Lookup::subset<SubTable> (c); }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ { return Lookup::sanitize<SubTable> (c); }
+};
+
+/*
+ * GSUB -- Glyph Substitution
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/gsub
+ */
+
+struct GSUB : GSUBGPOS
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_GSUB;
+
+ const SubstLookup& get_lookup (unsigned int i) const
+ { return static_cast<const SubstLookup &> (GSUBGPOS::get_lookup (i)); }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ hb_subset_layout_context_t l (c, tableTag, c->plan->gsub_lookups, c->plan->gsub_features);
+ return GSUBGPOS::subset<SubstLookup> (&l);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ { return GSUBGPOS::sanitize<SubstLookup> (c); }
+
+ HB_INTERNAL bool is_blocklisted (hb_blob_t *blob,
+ hb_face_t *face) const;
+
+ void closure_lookups (hb_face_t *face,
+ const hb_set_t *glyphs,
+ hb_set_t *lookup_indexes /* IN/OUT */) const
+ { GSUBGPOS::closure_lookups<SubstLookup> (face, glyphs, lookup_indexes); }
+
+ typedef GSUBGPOS::accelerator_t<GSUB> accelerator_t;
+};
+
+
+struct GSUB_accelerator_t : GSUB::accelerator_t {};
+
+
+/* Out-of-class implementation for methods recursing */
+
+#ifndef HB_NO_OT_LAYOUT
+/*static*/ inline bool ExtensionSubst::is_reverse () const
+{
+ return SubstLookup::lookup_type_is_reverse (get_type ());
+}
+template <typename context_t>
+/*static*/ typename context_t::return_t SubstLookup::dispatch_recurse_func (context_t *c, unsigned int lookup_index)
+{
+ const SubstLookup &l = c->face->table.GSUB.get_relaxed ()->table->get_lookup (lookup_index);
+ return l.dispatch (c);
+}
+
+/*static*/ inline hb_closure_lookups_context_t::return_t SubstLookup::dispatch_closure_lookups_recurse_func (hb_closure_lookups_context_t *c, unsigned this_index)
+{
+ const SubstLookup &l = c->face->table.GSUB.get_relaxed ()->table->get_lookup (this_index);
+ return l.closure_lookups (c, this_index);
+}
+
+/*static*/ bool SubstLookup::apply_recurse_func (hb_ot_apply_context_t *c, unsigned int lookup_index)
+{
+ const SubstLookup &l = c->face->table.GSUB.get_relaxed ()->table->get_lookup (lookup_index);
+ unsigned int saved_lookup_props = c->lookup_props;
+ unsigned int saved_lookup_index = c->lookup_index;
+ c->set_lookup_index (lookup_index);
+ c->set_lookup_props (l.get_props ());
+ bool ret = l.dispatch (c);
+ c->set_lookup_index (saved_lookup_index);
+ c->set_lookup_props (saved_lookup_props);
+ return ret;
+}
+#endif
+
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_LAYOUT_GSUB_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-layout-gsubgpos.hh b/thirdparty/harfbuzz/src/hb-ot-layout-gsubgpos.hh
new file mode 100644
index 0000000000..03a578cec0
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-layout-gsubgpos.hh
@@ -0,0 +1,3422 @@
+/*
+ * Copyright © 2007,2008,2009,2010 Red Hat, Inc.
+ * Copyright © 2010,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_LAYOUT_GSUBGPOS_HH
+#define HB_OT_LAYOUT_GSUBGPOS_HH
+
+#include "hb.hh"
+#include "hb-buffer.hh"
+#include "hb-map.hh"
+#include "hb-set.hh"
+#include "hb-ot-map.hh"
+#include "hb-ot-layout-common.hh"
+#include "hb-ot-layout-gdef-table.hh"
+
+
+namespace OT {
+
+
+struct hb_intersects_context_t :
+ hb_dispatch_context_t<hb_intersects_context_t, bool>
+{
+ template <typename T>
+ return_t dispatch (const T &obj) { return obj.intersects (this->glyphs); }
+ static return_t default_return_value () { return false; }
+ bool stop_sublookup_iteration (return_t r) const { return r; }
+
+ const hb_set_t *glyphs;
+
+ hb_intersects_context_t (const hb_set_t *glyphs_) :
+ glyphs (glyphs_) {}
+};
+
+struct hb_closure_context_t :
+ hb_dispatch_context_t<hb_closure_context_t>
+{
+ typedef return_t (*recurse_func_t) (hb_closure_context_t *c, unsigned int lookup_index);
+ template <typename T>
+ return_t dispatch (const T &obj) { obj.closure (this); return hb_empty_t (); }
+ static return_t default_return_value () { return hb_empty_t (); }
+ void recurse (unsigned int lookup_index)
+ {
+ if (unlikely (nesting_level_left == 0 || !recurse_func))
+ return;
+
+ nesting_level_left--;
+ recurse_func (this, lookup_index);
+ nesting_level_left++;
+ }
+
+ bool lookup_limit_exceeded ()
+ { return lookup_count > HB_MAX_LOOKUP_INDICES; }
+
+ bool should_visit_lookup (unsigned int lookup_index)
+ {
+ if (lookup_count++ > HB_MAX_LOOKUP_INDICES)
+ return false;
+
+ if (is_lookup_done (lookup_index))
+ return false;
+
+ done_lookups->set (lookup_index, glyphs->get_population ());
+ return true;
+ }
+
+ bool is_lookup_done (unsigned int lookup_index)
+ {
+ if (done_lookups->in_error ())
+ return true;
+
+ /* Have we visited this lookup with the current set of glyphs? */
+ return done_lookups->get (lookup_index) == glyphs->get_population ();
+ }
+
+ hb_face_t *face;
+ hb_set_t *glyphs;
+ hb_set_t output[1];
+ recurse_func_t recurse_func;
+ unsigned int nesting_level_left;
+
+ hb_closure_context_t (hb_face_t *face_,
+ hb_set_t *glyphs_,
+ hb_map_t *done_lookups_,
+ unsigned int nesting_level_left_ = HB_MAX_NESTING_LEVEL) :
+ face (face_),
+ glyphs (glyphs_),
+ recurse_func (nullptr),
+ nesting_level_left (nesting_level_left_),
+ done_lookups (done_lookups_),
+ lookup_count (0)
+ {}
+
+ ~hb_closure_context_t () { flush (); }
+
+ void set_recurse_func (recurse_func_t func) { recurse_func = func; }
+
+ void flush ()
+ {
+ hb_set_del_range (output, face->get_num_glyphs (), hb_set_get_max (output)); /* Remove invalid glyphs. */
+ hb_set_union (glyphs, output);
+ hb_set_clear (output);
+ }
+
+ private:
+ hb_map_t *done_lookups;
+ unsigned int lookup_count;
+};
+
+struct hb_closure_lookups_context_t :
+ hb_dispatch_context_t<hb_closure_lookups_context_t>
+{
+ typedef return_t (*recurse_func_t) (hb_closure_lookups_context_t *c, unsigned lookup_index);
+ template <typename T>
+ return_t dispatch (const T &obj) { obj.closure_lookups (this); return hb_empty_t (); }
+ static return_t default_return_value () { return hb_empty_t (); }
+ void recurse (unsigned lookup_index)
+ {
+ if (unlikely (nesting_level_left == 0 || !recurse_func))
+ return;
+
+ /* Return if new lookup was recursed to before. */
+ if (is_lookup_visited (lookup_index))
+ return;
+
+ set_lookup_visited (lookup_index);
+ nesting_level_left--;
+ recurse_func (this, lookup_index);
+ nesting_level_left++;
+ }
+
+ void set_lookup_visited (unsigned lookup_index)
+ { visited_lookups->add (lookup_index); }
+
+ void set_lookup_inactive (unsigned lookup_index)
+ { inactive_lookups->add (lookup_index); }
+
+ bool lookup_limit_exceeded ()
+ { return lookup_count > HB_MAX_LOOKUP_INDICES; }
+
+ bool is_lookup_visited (unsigned lookup_index)
+ {
+ if (lookup_count++ > HB_MAX_LOOKUP_INDICES)
+ return true;
+
+ if (visited_lookups->in_error ())
+ return true;
+
+ return visited_lookups->has (lookup_index);
+ }
+
+ hb_face_t *face;
+ const hb_set_t *glyphs;
+ recurse_func_t recurse_func;
+ unsigned int nesting_level_left;
+
+ hb_closure_lookups_context_t (hb_face_t *face_,
+ const hb_set_t *glyphs_,
+ hb_set_t *visited_lookups_,
+ hb_set_t *inactive_lookups_,
+ unsigned nesting_level_left_ = HB_MAX_NESTING_LEVEL) :
+ face (face_),
+ glyphs (glyphs_),
+ recurse_func (nullptr),
+ nesting_level_left (nesting_level_left_),
+ visited_lookups (visited_lookups_),
+ inactive_lookups (inactive_lookups_),
+ lookup_count (0) {}
+
+ void set_recurse_func (recurse_func_t func) { recurse_func = func; }
+
+ private:
+ hb_set_t *visited_lookups;
+ hb_set_t *inactive_lookups;
+ unsigned int lookup_count;
+};
+
+struct hb_would_apply_context_t :
+ hb_dispatch_context_t<hb_would_apply_context_t, bool>
+{
+ template <typename T>
+ return_t dispatch (const T &obj) { return obj.would_apply (this); }
+ static return_t default_return_value () { return false; }
+ bool stop_sublookup_iteration (return_t r) const { return r; }
+
+ hb_face_t *face;
+ const hb_codepoint_t *glyphs;
+ unsigned int len;
+ bool zero_context;
+
+ hb_would_apply_context_t (hb_face_t *face_,
+ const hb_codepoint_t *glyphs_,
+ unsigned int len_,
+ bool zero_context_) :
+ face (face_),
+ glyphs (glyphs_),
+ len (len_),
+ zero_context (zero_context_) {}
+};
+
+struct hb_collect_glyphs_context_t :
+ hb_dispatch_context_t<hb_collect_glyphs_context_t>
+{
+ typedef return_t (*recurse_func_t) (hb_collect_glyphs_context_t *c, unsigned int lookup_index);
+ template <typename T>
+ return_t dispatch (const T &obj) { obj.collect_glyphs (this); return hb_empty_t (); }
+ static return_t default_return_value () { return hb_empty_t (); }
+ void recurse (unsigned int lookup_index)
+ {
+ if (unlikely (nesting_level_left == 0 || !recurse_func))
+ return;
+
+ /* Note that GPOS sets recurse_func to nullptr already, so it doesn't get
+ * past the previous check. For GSUB, we only want to collect the output
+ * glyphs in the recursion. If output is not requested, we can go home now.
+ *
+ * Note further, that the above is not exactly correct. A recursed lookup
+ * is allowed to match input that is not matched in the context, but that's
+ * not how most fonts are built. It's possible to relax that and recurse
+ * with all sets here if it proves to be an issue.
+ */
+
+ if (output == hb_set_get_empty ())
+ return;
+
+ /* Return if new lookup was recursed to before. */
+ if (recursed_lookups->has (lookup_index))
+ return;
+
+ hb_set_t *old_before = before;
+ hb_set_t *old_input = input;
+ hb_set_t *old_after = after;
+ before = input = after = hb_set_get_empty ();
+
+ nesting_level_left--;
+ recurse_func (this, lookup_index);
+ nesting_level_left++;
+
+ before = old_before;
+ input = old_input;
+ after = old_after;
+
+ recursed_lookups->add (lookup_index);
+ }
+
+ hb_face_t *face;
+ hb_set_t *before;
+ hb_set_t *input;
+ hb_set_t *after;
+ hb_set_t *output;
+ recurse_func_t recurse_func;
+ hb_set_t *recursed_lookups;
+ unsigned int nesting_level_left;
+
+ hb_collect_glyphs_context_t (hb_face_t *face_,
+ hb_set_t *glyphs_before, /* OUT. May be NULL */
+ hb_set_t *glyphs_input, /* OUT. May be NULL */
+ hb_set_t *glyphs_after, /* OUT. May be NULL */
+ hb_set_t *glyphs_output, /* OUT. May be NULL */
+ unsigned int nesting_level_left_ = HB_MAX_NESTING_LEVEL) :
+ face (face_),
+ before (glyphs_before ? glyphs_before : hb_set_get_empty ()),
+ input (glyphs_input ? glyphs_input : hb_set_get_empty ()),
+ after (glyphs_after ? glyphs_after : hb_set_get_empty ()),
+ output (glyphs_output ? glyphs_output : hb_set_get_empty ()),
+ recurse_func (nullptr),
+ recursed_lookups (hb_set_create ()),
+ nesting_level_left (nesting_level_left_) {}
+ ~hb_collect_glyphs_context_t () { hb_set_destroy (recursed_lookups); }
+
+ void set_recurse_func (recurse_func_t func) { recurse_func = func; }
+};
+
+
+
+template <typename set_t>
+struct hb_collect_coverage_context_t :
+ hb_dispatch_context_t<hb_collect_coverage_context_t<set_t>, const Coverage &>
+{
+ typedef const Coverage &return_t; // Stoopid that we have to dupe this here.
+ template <typename T>
+ return_t dispatch (const T &obj) { return obj.get_coverage (); }
+ static return_t default_return_value () { return Null (Coverage); }
+ bool stop_sublookup_iteration (return_t r) const
+ {
+ r.collect_coverage (set);
+ return false;
+ }
+
+ hb_collect_coverage_context_t (set_t *set_) :
+ set (set_) {}
+
+ set_t *set;
+};
+
+
+struct hb_ot_apply_context_t :
+ hb_dispatch_context_t<hb_ot_apply_context_t, bool, HB_DEBUG_APPLY>
+{
+ struct matcher_t
+ {
+ matcher_t () :
+ lookup_props (0),
+ ignore_zwnj (false),
+ ignore_zwj (false),
+ mask (-1),
+#define arg1(arg) (arg) /* Remove the macro to see why it's needed! */
+ syllable arg1(0),
+#undef arg1
+ match_func (nullptr),
+ match_data (nullptr) {}
+
+ typedef bool (*match_func_t) (hb_codepoint_t glyph_id, const HBUINT16 &value, const void *data);
+
+ void set_ignore_zwnj (bool ignore_zwnj_) { ignore_zwnj = ignore_zwnj_; }
+ void set_ignore_zwj (bool ignore_zwj_) { ignore_zwj = ignore_zwj_; }
+ void set_lookup_props (unsigned int lookup_props_) { lookup_props = lookup_props_; }
+ void set_mask (hb_mask_t mask_) { mask = mask_; }
+ void set_syllable (uint8_t syllable_) { syllable = syllable_; }
+ void set_match_func (match_func_t match_func_,
+ const void *match_data_)
+ { match_func = match_func_; match_data = match_data_; }
+
+ enum may_match_t {
+ MATCH_NO,
+ MATCH_YES,
+ MATCH_MAYBE
+ };
+
+ may_match_t may_match (const hb_glyph_info_t &info,
+ const HBUINT16 *glyph_data) const
+ {
+ if (!(info.mask & mask) ||
+ (syllable && syllable != info.syllable ()))
+ return MATCH_NO;
+
+ if (match_func)
+ return match_func (info.codepoint, *glyph_data, match_data) ? MATCH_YES : MATCH_NO;
+
+ return MATCH_MAYBE;
+ }
+
+ enum may_skip_t {
+ SKIP_NO,
+ SKIP_YES,
+ SKIP_MAYBE
+ };
+
+ may_skip_t may_skip (const hb_ot_apply_context_t *c,
+ const hb_glyph_info_t &info) const
+ {
+ if (!c->check_glyph_property (&info, lookup_props))
+ return SKIP_YES;
+
+ if (unlikely (_hb_glyph_info_is_default_ignorable_and_not_hidden (&info) &&
+ (ignore_zwnj || !_hb_glyph_info_is_zwnj (&info)) &&
+ (ignore_zwj || !_hb_glyph_info_is_zwj (&info))))
+ return SKIP_MAYBE;
+
+ return SKIP_NO;
+ }
+
+ protected:
+ unsigned int lookup_props;
+ bool ignore_zwnj;
+ bool ignore_zwj;
+ hb_mask_t mask;
+ uint8_t syllable;
+ match_func_t match_func;
+ const void *match_data;
+ };
+
+ struct skipping_iterator_t
+ {
+ void init (hb_ot_apply_context_t *c_, bool context_match = false)
+ {
+ c = c_;
+ match_glyph_data = nullptr;
+ matcher.set_match_func (nullptr, nullptr);
+ matcher.set_lookup_props (c->lookup_props);
+ /* Ignore ZWNJ if we are matching GPOS, or matching GSUB context and asked to. */
+ matcher.set_ignore_zwnj (c->table_index == 1 || (context_match && c->auto_zwnj));
+ /* Ignore ZWJ if we are matching context, or asked to. */
+ matcher.set_ignore_zwj (context_match || c->auto_zwj);
+ matcher.set_mask (context_match ? -1 : c->lookup_mask);
+ }
+ void set_lookup_props (unsigned int lookup_props)
+ {
+ matcher.set_lookup_props (lookup_props);
+ }
+ void set_match_func (matcher_t::match_func_t match_func_,
+ const void *match_data_,
+ const HBUINT16 glyph_data[])
+ {
+ matcher.set_match_func (match_func_, match_data_);
+ match_glyph_data = glyph_data;
+ }
+
+ void reset (unsigned int start_index_,
+ unsigned int num_items_)
+ {
+ idx = start_index_;
+ num_items = num_items_;
+ end = c->buffer->len;
+ matcher.set_syllable (start_index_ == c->buffer->idx ? c->buffer->cur().syllable () : 0);
+ }
+
+ void reject ()
+ {
+ num_items++;
+ if (match_glyph_data) match_glyph_data--;
+ }
+
+ matcher_t::may_skip_t
+ may_skip (const hb_glyph_info_t &info) const
+ { return matcher.may_skip (c, info); }
+
+ bool next ()
+ {
+ assert (num_items > 0);
+ while (idx + num_items < end)
+ {
+ idx++;
+ const hb_glyph_info_t &info = c->buffer->info[idx];
+
+ matcher_t::may_skip_t skip = matcher.may_skip (c, info);
+ if (unlikely (skip == matcher_t::SKIP_YES))
+ continue;
+
+ matcher_t::may_match_t match = matcher.may_match (info, match_glyph_data);
+ if (match == matcher_t::MATCH_YES ||
+ (match == matcher_t::MATCH_MAYBE &&
+ skip == matcher_t::SKIP_NO))
+ {
+ num_items--;
+ if (match_glyph_data) match_glyph_data++;
+ return true;
+ }
+
+ if (skip == matcher_t::SKIP_NO)
+ return false;
+ }
+ return false;
+ }
+ bool prev ()
+ {
+ assert (num_items > 0);
+ while (idx > num_items - 1)
+ {
+ idx--;
+ const hb_glyph_info_t &info = c->buffer->out_info[idx];
+
+ matcher_t::may_skip_t skip = matcher.may_skip (c, info);
+ if (unlikely (skip == matcher_t::SKIP_YES))
+ continue;
+
+ matcher_t::may_match_t match = matcher.may_match (info, match_glyph_data);
+ if (match == matcher_t::MATCH_YES ||
+ (match == matcher_t::MATCH_MAYBE &&
+ skip == matcher_t::SKIP_NO))
+ {
+ num_items--;
+ if (match_glyph_data) match_glyph_data++;
+ return true;
+ }
+
+ if (skip == matcher_t::SKIP_NO)
+ return false;
+ }
+ return false;
+ }
+
+ unsigned int idx;
+ protected:
+ hb_ot_apply_context_t *c;
+ matcher_t matcher;
+ const HBUINT16 *match_glyph_data;
+
+ unsigned int num_items;
+ unsigned int end;
+ };
+
+
+ const char *get_name () { return "APPLY"; }
+ typedef return_t (*recurse_func_t) (hb_ot_apply_context_t *c, unsigned int lookup_index);
+ template <typename T>
+ return_t dispatch (const T &obj) { return obj.apply (this); }
+ static return_t default_return_value () { return false; }
+ bool stop_sublookup_iteration (return_t r) const { return r; }
+ return_t recurse (unsigned int sub_lookup_index)
+ {
+ if (unlikely (nesting_level_left == 0 || !recurse_func || buffer->max_ops-- <= 0))
+ return default_return_value ();
+
+ nesting_level_left--;
+ bool ret = recurse_func (this, sub_lookup_index);
+ nesting_level_left++;
+ return ret;
+ }
+
+ skipping_iterator_t iter_input, iter_context;
+
+ hb_font_t *font;
+ hb_face_t *face;
+ hb_buffer_t *buffer;
+ recurse_func_t recurse_func;
+ const GDEF &gdef;
+ const VariationStore &var_store;
+
+ hb_direction_t direction;
+ hb_mask_t lookup_mask;
+ unsigned int table_index; /* GSUB/GPOS */
+ unsigned int lookup_index;
+ unsigned int lookup_props;
+ unsigned int nesting_level_left;
+
+ bool has_glyph_classes;
+ bool auto_zwnj;
+ bool auto_zwj;
+ bool random;
+
+ uint32_t random_state;
+
+
+ hb_ot_apply_context_t (unsigned int table_index_,
+ hb_font_t *font_,
+ hb_buffer_t *buffer_) :
+ iter_input (), iter_context (),
+ font (font_), face (font->face), buffer (buffer_),
+ recurse_func (nullptr),
+ gdef (
+#ifndef HB_NO_OT_LAYOUT
+ *face->table.GDEF->table
+#else
+ Null (GDEF)
+#endif
+ ),
+ var_store (gdef.get_var_store ()),
+ direction (buffer_->props.direction),
+ lookup_mask (1),
+ table_index (table_index_),
+ lookup_index ((unsigned int) -1),
+ lookup_props (0),
+ nesting_level_left (HB_MAX_NESTING_LEVEL),
+ has_glyph_classes (gdef.has_glyph_classes ()),
+ auto_zwnj (true),
+ auto_zwj (true),
+ random (false),
+ random_state (1) { init_iters (); }
+
+ void init_iters ()
+ {
+ iter_input.init (this, false);
+ iter_context.init (this, true);
+ }
+
+ void set_lookup_mask (hb_mask_t mask) { lookup_mask = mask; init_iters (); }
+ void set_auto_zwj (bool auto_zwj_) { auto_zwj = auto_zwj_; init_iters (); }
+ void set_auto_zwnj (bool auto_zwnj_) { auto_zwnj = auto_zwnj_; init_iters (); }
+ void set_random (bool random_) { random = random_; }
+ void set_recurse_func (recurse_func_t func) { recurse_func = func; }
+ void set_lookup_index (unsigned int lookup_index_) { lookup_index = lookup_index_; }
+ void set_lookup_props (unsigned int lookup_props_) { lookup_props = lookup_props_; init_iters (); }
+
+ uint32_t random_number ()
+ {
+ /* http://www.cplusplus.com/reference/random/minstd_rand/ */
+ random_state = random_state * 48271 % 2147483647;
+ return random_state;
+ }
+
+ bool match_properties_mark (hb_codepoint_t glyph,
+ unsigned int glyph_props,
+ unsigned int match_props) const
+ {
+ /* If using mark filtering sets, the high short of
+ * match_props has the set index.
+ */
+ if (match_props & LookupFlag::UseMarkFilteringSet)
+ return gdef.mark_set_covers (match_props >> 16, glyph);
+
+ /* The second byte of match_props has the meaning
+ * "ignore marks of attachment type different than
+ * the attachment type specified."
+ */
+ if (match_props & LookupFlag::MarkAttachmentType)
+ return (match_props & LookupFlag::MarkAttachmentType) == (glyph_props & LookupFlag::MarkAttachmentType);
+
+ return true;
+ }
+
+ bool check_glyph_property (const hb_glyph_info_t *info,
+ unsigned int match_props) const
+ {
+ hb_codepoint_t glyph = info->codepoint;
+ unsigned int glyph_props = _hb_glyph_info_get_glyph_props (info);
+
+ /* Not covered, if, for example, glyph class is ligature and
+ * match_props includes LookupFlags::IgnoreLigatures
+ */
+ if (glyph_props & match_props & LookupFlag::IgnoreFlags)
+ return false;
+
+ if (unlikely (glyph_props & HB_OT_LAYOUT_GLYPH_PROPS_MARK))
+ return match_properties_mark (glyph, glyph_props, match_props);
+
+ return true;
+ }
+
+ void _set_glyph_props (hb_codepoint_t glyph_index,
+ unsigned int class_guess = 0,
+ bool ligature = false,
+ bool component = false) const
+ {
+ unsigned int add_in = _hb_glyph_info_get_glyph_props (&buffer->cur()) &
+ HB_OT_LAYOUT_GLYPH_PROPS_PRESERVE;
+ add_in |= HB_OT_LAYOUT_GLYPH_PROPS_SUBSTITUTED;
+ if (ligature)
+ {
+ add_in |= HB_OT_LAYOUT_GLYPH_PROPS_LIGATED;
+ /* In the only place that the MULTIPLIED bit is used, Uniscribe
+ * seems to only care about the "last" transformation between
+ * Ligature and Multiple substitutions. Ie. if you ligate, expand,
+ * and ligate again, it forgives the multiplication and acts as
+ * if only ligation happened. As such, clear MULTIPLIED bit.
+ */
+ add_in &= ~HB_OT_LAYOUT_GLYPH_PROPS_MULTIPLIED;
+ }
+ if (component)
+ add_in |= HB_OT_LAYOUT_GLYPH_PROPS_MULTIPLIED;
+ if (likely (has_glyph_classes))
+ _hb_glyph_info_set_glyph_props (&buffer->cur(), add_in | gdef.get_glyph_props (glyph_index));
+ else if (class_guess)
+ _hb_glyph_info_set_glyph_props (&buffer->cur(), add_in | class_guess);
+ }
+
+ void replace_glyph (hb_codepoint_t glyph_index) const
+ {
+ _set_glyph_props (glyph_index);
+ buffer->replace_glyph (glyph_index);
+ }
+ void replace_glyph_inplace (hb_codepoint_t glyph_index) const
+ {
+ _set_glyph_props (glyph_index);
+ buffer->cur().codepoint = glyph_index;
+ }
+ void replace_glyph_with_ligature (hb_codepoint_t glyph_index,
+ unsigned int class_guess) const
+ {
+ _set_glyph_props (glyph_index, class_guess, true);
+ buffer->replace_glyph (glyph_index);
+ }
+ void output_glyph_for_component (hb_codepoint_t glyph_index,
+ unsigned int class_guess) const
+ {
+ _set_glyph_props (glyph_index, class_guess, false, true);
+ buffer->output_glyph (glyph_index);
+ }
+};
+
+
+struct hb_get_subtables_context_t :
+ hb_dispatch_context_t<hb_get_subtables_context_t>
+{
+ template <typename Type>
+ static inline bool apply_to (const void *obj, OT::hb_ot_apply_context_t *c)
+ {
+ const Type *typed_obj = (const Type *) obj;
+ return typed_obj->apply (c);
+ }
+
+ typedef bool (*hb_apply_func_t) (const void *obj, OT::hb_ot_apply_context_t *c);
+
+ struct hb_applicable_t
+ {
+ template <typename T>
+ void init (const T &obj_, hb_apply_func_t apply_func_)
+ {
+ obj = &obj_;
+ apply_func = apply_func_;
+ digest.init ();
+ obj_.get_coverage ().collect_coverage (&digest);
+ }
+
+ bool apply (OT::hb_ot_apply_context_t *c) const
+ {
+ return digest.may_have (c->buffer->cur().codepoint) && apply_func (obj, c);
+ }
+
+ private:
+ const void *obj;
+ hb_apply_func_t apply_func;
+ hb_set_digest_t digest;
+ };
+
+ typedef hb_vector_t<hb_applicable_t> array_t;
+
+ /* Dispatch interface. */
+ template <typename T>
+ return_t dispatch (const T &obj)
+ {
+ hb_applicable_t *entry = array.push();
+ entry->init (obj, apply_to<T>);
+ return hb_empty_t ();
+ }
+ static return_t default_return_value () { return hb_empty_t (); }
+
+ hb_get_subtables_context_t (array_t &array_) :
+ array (array_) {}
+
+ array_t &array;
+};
+
+
+
+
+typedef bool (*intersects_func_t) (const hb_set_t *glyphs, const HBUINT16 &value, const void *data);
+typedef void (*collect_glyphs_func_t) (hb_set_t *glyphs, const HBUINT16 &value, const void *data);
+typedef bool (*match_func_t) (hb_codepoint_t glyph_id, const HBUINT16 &value, const void *data);
+
+struct ContextClosureFuncs
+{
+ intersects_func_t intersects;
+};
+struct ContextCollectGlyphsFuncs
+{
+ collect_glyphs_func_t collect;
+};
+struct ContextApplyFuncs
+{
+ match_func_t match;
+};
+
+
+static inline bool intersects_glyph (const hb_set_t *glyphs, const HBUINT16 &value, const void *data HB_UNUSED)
+{
+ return glyphs->has (value);
+}
+static inline bool intersects_class (const hb_set_t *glyphs, const HBUINT16 &value, const void *data)
+{
+ const ClassDef &class_def = *reinterpret_cast<const ClassDef *>(data);
+ return class_def.intersects_class (glyphs, value);
+}
+static inline bool intersects_coverage (const hb_set_t *glyphs, const HBUINT16 &value, const void *data)
+{
+ const OffsetTo<Coverage> &coverage = (const OffsetTo<Coverage>&)value;
+ return (data+coverage).intersects (glyphs);
+}
+
+static inline bool array_is_subset_of (const hb_set_t *glyphs,
+ unsigned int count,
+ const HBUINT16 values[],
+ intersects_func_t intersects_func,
+ const void *intersects_data)
+{
+ for (const HBUINT16 &_ : + hb_iter (values, count))
+ if (!intersects_func (glyphs, _, intersects_data)) return false;
+ return true;
+}
+
+
+static inline void collect_glyph (hb_set_t *glyphs, const HBUINT16 &value, const void *data HB_UNUSED)
+{
+ glyphs->add (value);
+}
+static inline void collect_class (hb_set_t *glyphs, const HBUINT16 &value, const void *data)
+{
+ const ClassDef &class_def = *reinterpret_cast<const ClassDef *>(data);
+ class_def.collect_class (glyphs, value);
+}
+static inline void collect_coverage (hb_set_t *glyphs, const HBUINT16 &value, const void *data)
+{
+ const OffsetTo<Coverage> &coverage = (const OffsetTo<Coverage>&)value;
+ (data+coverage).collect_coverage (glyphs);
+}
+static inline void collect_array (hb_collect_glyphs_context_t *c HB_UNUSED,
+ hb_set_t *glyphs,
+ unsigned int count,
+ const HBUINT16 values[],
+ collect_glyphs_func_t collect_func,
+ const void *collect_data)
+{
+ return
+ + hb_iter (values, count)
+ | hb_apply ([&] (const HBUINT16 &_) { collect_func (glyphs, _, collect_data); })
+ ;
+}
+
+
+static inline bool match_glyph (hb_codepoint_t glyph_id, const HBUINT16 &value, const void *data HB_UNUSED)
+{
+ return glyph_id == value;
+}
+static inline bool match_class (hb_codepoint_t glyph_id, const HBUINT16 &value, const void *data)
+{
+ const ClassDef &class_def = *reinterpret_cast<const ClassDef *>(data);
+ return class_def.get_class (glyph_id) == value;
+}
+static inline bool match_coverage (hb_codepoint_t glyph_id, const HBUINT16 &value, const void *data)
+{
+ const OffsetTo<Coverage> &coverage = (const OffsetTo<Coverage>&)value;
+ return (data+coverage).get_coverage (glyph_id) != NOT_COVERED;
+}
+
+static inline bool would_match_input (hb_would_apply_context_t *c,
+ unsigned int count, /* Including the first glyph (not matched) */
+ const HBUINT16 input[], /* Array of input values--start with second glyph */
+ match_func_t match_func,
+ const void *match_data)
+{
+ if (count != c->len)
+ return false;
+
+ for (unsigned int i = 1; i < count; i++)
+ if (likely (!match_func (c->glyphs[i], input[i - 1], match_data)))
+ return false;
+
+ return true;
+}
+static inline bool match_input (hb_ot_apply_context_t *c,
+ unsigned int count, /* Including the first glyph (not matched) */
+ const HBUINT16 input[], /* Array of input values--start with second glyph */
+ match_func_t match_func,
+ const void *match_data,
+ unsigned int *end_offset,
+ unsigned int match_positions[HB_MAX_CONTEXT_LENGTH],
+ unsigned int *p_total_component_count = nullptr)
+{
+ TRACE_APPLY (nullptr);
+
+ if (unlikely (count > HB_MAX_CONTEXT_LENGTH)) return_trace (false);
+
+ hb_buffer_t *buffer = c->buffer;
+
+ hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input;
+ skippy_iter.reset (buffer->idx, count - 1);
+ skippy_iter.set_match_func (match_func, match_data, input);
+
+ /*
+ * This is perhaps the trickiest part of OpenType... Remarks:
+ *
+ * - If all components of the ligature were marks, we call this a mark ligature.
+ *
+ * - If there is no GDEF, and the ligature is NOT a mark ligature, we categorize
+ * it as a ligature glyph.
+ *
+ * - Ligatures cannot be formed across glyphs attached to different components
+ * of previous ligatures. Eg. the sequence is LAM,SHADDA,LAM,FATHA,HEH, and
+ * LAM,LAM,HEH form a ligature, leaving SHADDA,FATHA next to eachother.
+ * However, it would be wrong to ligate that SHADDA,FATHA sequence.
+ * There are a couple of exceptions to this:
+ *
+ * o If a ligature tries ligating with marks that belong to it itself, go ahead,
+ * assuming that the font designer knows what they are doing (otherwise it can
+ * break Indic stuff when a matra wants to ligate with a conjunct,
+ *
+ * o If two marks want to ligate and they belong to different components of the
+ * same ligature glyph, and said ligature glyph is to be ignored according to
+ * mark-filtering rules, then allow.
+ * https://github.com/harfbuzz/harfbuzz/issues/545
+ */
+
+ unsigned int total_component_count = 0;
+ total_component_count += _hb_glyph_info_get_lig_num_comps (&buffer->cur());
+
+ unsigned int first_lig_id = _hb_glyph_info_get_lig_id (&buffer->cur());
+ unsigned int first_lig_comp = _hb_glyph_info_get_lig_comp (&buffer->cur());
+
+ enum {
+ LIGBASE_NOT_CHECKED,
+ LIGBASE_MAY_NOT_SKIP,
+ LIGBASE_MAY_SKIP
+ } ligbase = LIGBASE_NOT_CHECKED;
+
+ match_positions[0] = buffer->idx;
+ for (unsigned int i = 1; i < count; i++)
+ {
+ if (!skippy_iter.next ()) return_trace (false);
+
+ match_positions[i] = skippy_iter.idx;
+
+ unsigned int this_lig_id = _hb_glyph_info_get_lig_id (&buffer->info[skippy_iter.idx]);
+ unsigned int this_lig_comp = _hb_glyph_info_get_lig_comp (&buffer->info[skippy_iter.idx]);
+
+ if (first_lig_id && first_lig_comp)
+ {
+ /* If first component was attached to a previous ligature component,
+ * all subsequent components should be attached to the same ligature
+ * component, otherwise we shouldn't ligate them... */
+ if (first_lig_id != this_lig_id || first_lig_comp != this_lig_comp)
+ {
+ /* ...unless, we are attached to a base ligature and that base
+ * ligature is ignorable. */
+ if (ligbase == LIGBASE_NOT_CHECKED)
+ {
+ bool found = false;
+ const auto *out = buffer->out_info;
+ unsigned int j = buffer->out_len;
+ while (j && _hb_glyph_info_get_lig_id (&out[j - 1]) == first_lig_id)
+ {
+ if (_hb_glyph_info_get_lig_comp (&out[j - 1]) == 0)
+ {
+ j--;
+ found = true;
+ break;
+ }
+ j--;
+ }
+
+ if (found && skippy_iter.may_skip (out[j]) == hb_ot_apply_context_t::matcher_t::SKIP_YES)
+ ligbase = LIGBASE_MAY_SKIP;
+ else
+ ligbase = LIGBASE_MAY_NOT_SKIP;
+ }
+
+ if (ligbase == LIGBASE_MAY_NOT_SKIP)
+ return_trace (false);
+ }
+ }
+ else
+ {
+ /* If first component was NOT attached to a previous ligature component,
+ * all subsequent components should also NOT be attached to any ligature
+ * component, unless they are attached to the first component itself! */
+ if (this_lig_id && this_lig_comp && (this_lig_id != first_lig_id))
+ return_trace (false);
+ }
+
+ total_component_count += _hb_glyph_info_get_lig_num_comps (&buffer->info[skippy_iter.idx]);
+ }
+
+ *end_offset = skippy_iter.idx - buffer->idx + 1;
+
+ if (p_total_component_count)
+ *p_total_component_count = total_component_count;
+
+ return_trace (true);
+}
+static inline bool ligate_input (hb_ot_apply_context_t *c,
+ unsigned int count, /* Including the first glyph */
+ const unsigned int match_positions[HB_MAX_CONTEXT_LENGTH], /* Including the first glyph */
+ unsigned int match_length,
+ hb_codepoint_t lig_glyph,
+ unsigned int total_component_count)
+{
+ TRACE_APPLY (nullptr);
+
+ hb_buffer_t *buffer = c->buffer;
+
+ buffer->merge_clusters (buffer->idx, buffer->idx + match_length);
+
+ /* - If a base and one or more marks ligate, consider that as a base, NOT
+ * ligature, such that all following marks can still attach to it.
+ * https://github.com/harfbuzz/harfbuzz/issues/1109
+ *
+ * - If all components of the ligature were marks, we call this a mark ligature.
+ * If it *is* a mark ligature, we don't allocate a new ligature id, and leave
+ * the ligature to keep its old ligature id. This will allow it to attach to
+ * a base ligature in GPOS. Eg. if the sequence is: LAM,LAM,SHADDA,FATHA,HEH,
+ * and LAM,LAM,HEH for a ligature, they will leave SHADDA and FATHA with a
+ * ligature id and component value of 2. Then if SHADDA,FATHA form a ligature
+ * later, we don't want them to lose their ligature id/component, otherwise
+ * GPOS will fail to correctly position the mark ligature on top of the
+ * LAM,LAM,HEH ligature. See:
+ * https://bugzilla.gnome.org/show_bug.cgi?id=676343
+ *
+ * - If a ligature is formed of components that some of which are also ligatures
+ * themselves, and those ligature components had marks attached to *their*
+ * components, we have to attach the marks to the new ligature component
+ * positions! Now *that*'s tricky! And these marks may be following the
+ * last component of the whole sequence, so we should loop forward looking
+ * for them and update them.
+ *
+ * Eg. the sequence is LAM,LAM,SHADDA,FATHA,HEH, and the font first forms a
+ * 'calt' ligature of LAM,HEH, leaving the SHADDA and FATHA with a ligature
+ * id and component == 1. Now, during 'liga', the LAM and the LAM-HEH ligature
+ * form a LAM-LAM-HEH ligature. We need to reassign the SHADDA and FATHA to
+ * the new ligature with a component value of 2.
+ *
+ * This in fact happened to a font... See:
+ * https://bugzilla.gnome.org/show_bug.cgi?id=437633
+ */
+
+ bool is_base_ligature = _hb_glyph_info_is_base_glyph (&buffer->info[match_positions[0]]);
+ bool is_mark_ligature = _hb_glyph_info_is_mark (&buffer->info[match_positions[0]]);
+ for (unsigned int i = 1; i < count; i++)
+ if (!_hb_glyph_info_is_mark (&buffer->info[match_positions[i]]))
+ {
+ is_base_ligature = false;
+ is_mark_ligature = false;
+ break;
+ }
+ bool is_ligature = !is_base_ligature && !is_mark_ligature;
+
+ unsigned int klass = is_ligature ? HB_OT_LAYOUT_GLYPH_PROPS_LIGATURE : 0;
+ unsigned int lig_id = is_ligature ? _hb_allocate_lig_id (buffer) : 0;
+ unsigned int last_lig_id = _hb_glyph_info_get_lig_id (&buffer->cur());
+ unsigned int last_num_components = _hb_glyph_info_get_lig_num_comps (&buffer->cur());
+ unsigned int components_so_far = last_num_components;
+
+ if (is_ligature)
+ {
+ _hb_glyph_info_set_lig_props_for_ligature (&buffer->cur(), lig_id, total_component_count);
+ if (_hb_glyph_info_get_general_category (&buffer->cur()) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)
+ {
+ _hb_glyph_info_set_general_category (&buffer->cur(), HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER);
+ }
+ }
+ c->replace_glyph_with_ligature (lig_glyph, klass);
+
+ for (unsigned int i = 1; i < count; i++)
+ {
+ while (buffer->idx < match_positions[i] && buffer->successful)
+ {
+ if (is_ligature)
+ {
+ unsigned int this_comp = _hb_glyph_info_get_lig_comp (&buffer->cur());
+ if (this_comp == 0)
+ this_comp = last_num_components;
+ unsigned int new_lig_comp = components_so_far - last_num_components +
+ hb_min (this_comp, last_num_components);
+ _hb_glyph_info_set_lig_props_for_mark (&buffer->cur(), lig_id, new_lig_comp);
+ }
+ buffer->next_glyph ();
+ }
+
+ last_lig_id = _hb_glyph_info_get_lig_id (&buffer->cur());
+ last_num_components = _hb_glyph_info_get_lig_num_comps (&buffer->cur());
+ components_so_far += last_num_components;
+
+ /* Skip the base glyph */
+ buffer->idx++;
+ }
+
+ if (!is_mark_ligature && last_lig_id)
+ {
+ /* Re-adjust components for any marks following. */
+ for (unsigned i = buffer->idx; i < buffer->len; ++i)
+ {
+ if (last_lig_id != _hb_glyph_info_get_lig_id (&buffer->info[i])) break;
+
+ unsigned this_comp = _hb_glyph_info_get_lig_comp (&buffer->info[i]);
+ if (!this_comp) break;
+
+ unsigned new_lig_comp = components_so_far - last_num_components +
+ hb_min (this_comp, last_num_components);
+ _hb_glyph_info_set_lig_props_for_mark (&buffer->info[i], lig_id, new_lig_comp);
+ }
+ }
+ return_trace (true);
+}
+
+static inline bool match_backtrack (hb_ot_apply_context_t *c,
+ unsigned int count,
+ const HBUINT16 backtrack[],
+ match_func_t match_func,
+ const void *match_data,
+ unsigned int *match_start)
+{
+ TRACE_APPLY (nullptr);
+
+ hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_context;
+ skippy_iter.reset (c->buffer->backtrack_len (), count);
+ skippy_iter.set_match_func (match_func, match_data, backtrack);
+
+ for (unsigned int i = 0; i < count; i++)
+ if (!skippy_iter.prev ())
+ return_trace (false);
+
+ *match_start = skippy_iter.idx;
+
+ return_trace (true);
+}
+
+static inline bool match_lookahead (hb_ot_apply_context_t *c,
+ unsigned int count,
+ const HBUINT16 lookahead[],
+ match_func_t match_func,
+ const void *match_data,
+ unsigned int offset,
+ unsigned int *end_index)
+{
+ TRACE_APPLY (nullptr);
+
+ hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_context;
+ skippy_iter.reset (c->buffer->idx + offset - 1, count);
+ skippy_iter.set_match_func (match_func, match_data, lookahead);
+
+ for (unsigned int i = 0; i < count; i++)
+ if (!skippy_iter.next ())
+ return_trace (false);
+
+ *end_index = skippy_iter.idx + 1;
+
+ return_trace (true);
+}
+
+
+
+struct LookupRecord
+{
+ LookupRecord* copy (hb_serialize_context_t *c,
+ const hb_map_t *lookup_map) const
+ {
+ TRACE_SERIALIZE (this);
+ auto *out = c->embed (*this);
+ if (unlikely (!out)) return_trace (nullptr);
+
+ out->lookupListIndex = hb_map_get (lookup_map, lookupListIndex);
+ return_trace (out);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ HBUINT16 sequenceIndex; /* Index into current glyph
+ * sequence--first glyph = 0 */
+ HBUINT16 lookupListIndex; /* Lookup to apply to that
+ * position--zero--based */
+ public:
+ DEFINE_SIZE_STATIC (4);
+};
+
+template <typename context_t>
+static inline void recurse_lookups (context_t *c,
+ unsigned int lookupCount,
+ const LookupRecord lookupRecord[] /* Array of LookupRecords--in design order */)
+{
+ for (unsigned int i = 0; i < lookupCount; i++)
+ c->recurse (lookupRecord[i].lookupListIndex);
+}
+
+static inline bool apply_lookup (hb_ot_apply_context_t *c,
+ unsigned int count, /* Including the first glyph */
+ unsigned int match_positions[HB_MAX_CONTEXT_LENGTH], /* Including the first glyph */
+ unsigned int lookupCount,
+ const LookupRecord lookupRecord[], /* Array of LookupRecords--in design order */
+ unsigned int match_length)
+{
+ TRACE_APPLY (nullptr);
+
+ hb_buffer_t *buffer = c->buffer;
+ int end;
+
+ /* All positions are distance from beginning of *output* buffer.
+ * Adjust. */
+ {
+ unsigned int bl = buffer->backtrack_len ();
+ end = bl + match_length;
+
+ int delta = bl - buffer->idx;
+ /* Convert positions to new indexing. */
+ for (unsigned int j = 0; j < count; j++)
+ match_positions[j] += delta;
+ }
+
+ for (unsigned int i = 0; i < lookupCount && buffer->successful; i++)
+ {
+ unsigned int idx = lookupRecord[i].sequenceIndex;
+ if (idx >= count)
+ continue;
+
+ /* Don't recurse to ourself at same position.
+ * Note that this test is too naive, it doesn't catch longer loops. */
+ if (idx == 0 && lookupRecord[i].lookupListIndex == c->lookup_index)
+ continue;
+
+ if (unlikely (!buffer->move_to (match_positions[idx])))
+ break;
+
+ if (unlikely (buffer->max_ops <= 0))
+ break;
+
+ unsigned int orig_len = buffer->backtrack_len () + buffer->lookahead_len ();
+ if (!c->recurse (lookupRecord[i].lookupListIndex))
+ continue;
+
+ unsigned int new_len = buffer->backtrack_len () + buffer->lookahead_len ();
+ int delta = new_len - orig_len;
+
+ if (!delta)
+ continue;
+
+ /* Recursed lookup changed buffer len. Adjust.
+ *
+ * TODO:
+ *
+ * Right now, if buffer length increased by n, we assume n new glyphs
+ * were added right after the current position, and if buffer length
+ * was decreased by n, we assume n match positions after the current
+ * one where removed. The former (buffer length increased) case is
+ * fine, but the decrease case can be improved in at least two ways,
+ * both of which are significant:
+ *
+ * - If recursed-to lookup is MultipleSubst and buffer length
+ * decreased, then it's current match position that was deleted,
+ * NOT the one after it.
+ *
+ * - If buffer length was decreased by n, it does not necessarily
+ * mean that n match positions where removed, as there might
+ * have been marks and default-ignorables in the sequence. We
+ * should instead drop match positions between current-position
+ * and current-position + n instead.
+ *
+ * It should be possible to construct tests for both of these cases.
+ */
+
+ end += delta;
+ if (end <= int (match_positions[idx]))
+ {
+ /* End might end up being smaller than match_positions[idx] if the recursed
+ * lookup ended up removing many items, more than we have had matched.
+ * Just never rewind end back and get out of here.
+ * https://bugs.chromium.org/p/chromium/issues/detail?id=659496 */
+ end = match_positions[idx];
+ /* There can't be any further changes. */
+ break;
+ }
+
+ unsigned int next = idx + 1; /* next now is the position after the recursed lookup. */
+
+ if (delta > 0)
+ {
+ if (unlikely (delta + count > HB_MAX_CONTEXT_LENGTH))
+ break;
+ }
+ else
+ {
+ /* NOTE: delta is negative. */
+ delta = hb_max (delta, (int) next - (int) count);
+ next -= delta;
+ }
+
+ /* Shift! */
+ memmove (match_positions + next + delta, match_positions + next,
+ (count - next) * sizeof (match_positions[0]));
+ next += delta;
+ count += delta;
+
+ /* Fill in new entries. */
+ for (unsigned int j = idx + 1; j < next; j++)
+ match_positions[j] = match_positions[j - 1] + 1;
+
+ /* And fixup the rest. */
+ for (; next < count; next++)
+ match_positions[next] += delta;
+ }
+
+ buffer->move_to (end);
+
+ return_trace (true);
+}
+
+
+
+/* Contextual lookups */
+
+struct ContextClosureLookupContext
+{
+ ContextClosureFuncs funcs;
+ const void *intersects_data;
+};
+
+struct ContextCollectGlyphsLookupContext
+{
+ ContextCollectGlyphsFuncs funcs;
+ const void *collect_data;
+};
+
+struct ContextApplyLookupContext
+{
+ ContextApplyFuncs funcs;
+ const void *match_data;
+};
+
+static inline bool context_intersects (const hb_set_t *glyphs,
+ unsigned int inputCount, /* Including the first glyph (not matched) */
+ const HBUINT16 input[], /* Array of input values--start with second glyph */
+ ContextClosureLookupContext &lookup_context)
+{
+ return array_is_subset_of (glyphs,
+ inputCount ? inputCount - 1 : 0, input,
+ lookup_context.funcs.intersects, lookup_context.intersects_data);
+}
+
+static inline void context_closure_lookup (hb_closure_context_t *c,
+ unsigned int inputCount, /* Including the first glyph (not matched) */
+ const HBUINT16 input[], /* Array of input values--start with second glyph */
+ unsigned int lookupCount,
+ const LookupRecord lookupRecord[],
+ ContextClosureLookupContext &lookup_context)
+{
+ if (context_intersects (c->glyphs,
+ inputCount, input,
+ lookup_context))
+ recurse_lookups (c,
+ lookupCount, lookupRecord);
+}
+
+static inline void context_collect_glyphs_lookup (hb_collect_glyphs_context_t *c,
+ unsigned int inputCount, /* Including the first glyph (not matched) */
+ const HBUINT16 input[], /* Array of input values--start with second glyph */
+ unsigned int lookupCount,
+ const LookupRecord lookupRecord[],
+ ContextCollectGlyphsLookupContext &lookup_context)
+{
+ collect_array (c, c->input,
+ inputCount ? inputCount - 1 : 0, input,
+ lookup_context.funcs.collect, lookup_context.collect_data);
+ recurse_lookups (c,
+ lookupCount, lookupRecord);
+}
+
+static inline bool context_would_apply_lookup (hb_would_apply_context_t *c,
+ unsigned int inputCount, /* Including the first glyph (not matched) */
+ const HBUINT16 input[], /* Array of input values--start with second glyph */
+ unsigned int lookupCount HB_UNUSED,
+ const LookupRecord lookupRecord[] HB_UNUSED,
+ ContextApplyLookupContext &lookup_context)
+{
+ return would_match_input (c,
+ inputCount, input,
+ lookup_context.funcs.match, lookup_context.match_data);
+}
+static inline bool context_apply_lookup (hb_ot_apply_context_t *c,
+ unsigned int inputCount, /* Including the first glyph (not matched) */
+ const HBUINT16 input[], /* Array of input values--start with second glyph */
+ unsigned int lookupCount,
+ const LookupRecord lookupRecord[],
+ ContextApplyLookupContext &lookup_context)
+{
+ unsigned int match_length = 0;
+ unsigned int match_positions[HB_MAX_CONTEXT_LENGTH];
+ return match_input (c,
+ inputCount, input,
+ lookup_context.funcs.match, lookup_context.match_data,
+ &match_length, match_positions)
+ && (c->buffer->unsafe_to_break (c->buffer->idx, c->buffer->idx + match_length),
+ apply_lookup (c,
+ inputCount, match_positions,
+ lookupCount, lookupRecord,
+ match_length));
+}
+
+struct Rule
+{
+ bool intersects (const hb_set_t *glyphs, ContextClosureLookupContext &lookup_context) const
+ {
+ return context_intersects (glyphs,
+ inputCount, inputZ.arrayZ,
+ lookup_context);
+ }
+
+ void closure (hb_closure_context_t *c, ContextClosureLookupContext &lookup_context) const
+ {
+ if (unlikely (c->lookup_limit_exceeded ())) return;
+
+ const UnsizedArrayOf<LookupRecord> &lookupRecord = StructAfter<UnsizedArrayOf<LookupRecord>>
+ (inputZ.as_array ((inputCount ? inputCount - 1 : 0)));
+ context_closure_lookup (c,
+ inputCount, inputZ.arrayZ,
+ lookupCount, lookupRecord.arrayZ,
+ lookup_context);
+ }
+
+ void closure_lookups (hb_closure_lookups_context_t *c) const
+ {
+ if (unlikely (c->lookup_limit_exceeded ())) return;
+
+ const UnsizedArrayOf<LookupRecord> &lookupRecord = StructAfter<UnsizedArrayOf<LookupRecord>>
+ (inputZ.as_array (inputCount ? inputCount - 1 : 0));
+ recurse_lookups (c, lookupCount, lookupRecord.arrayZ);
+ }
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c,
+ ContextCollectGlyphsLookupContext &lookup_context) const
+ {
+ const UnsizedArrayOf<LookupRecord> &lookupRecord = StructAfter<UnsizedArrayOf<LookupRecord>>
+ (inputZ.as_array (inputCount ? inputCount - 1 : 0));
+ context_collect_glyphs_lookup (c,
+ inputCount, inputZ.arrayZ,
+ lookupCount, lookupRecord.arrayZ,
+ lookup_context);
+ }
+
+ bool would_apply (hb_would_apply_context_t *c,
+ ContextApplyLookupContext &lookup_context) const
+ {
+ const UnsizedArrayOf<LookupRecord> &lookupRecord = StructAfter<UnsizedArrayOf<LookupRecord>>
+ (inputZ.as_array (inputCount ? inputCount - 1 : 0));
+ return context_would_apply_lookup (c,
+ inputCount, inputZ.arrayZ,
+ lookupCount, lookupRecord.arrayZ,
+ lookup_context);
+ }
+
+ bool apply (hb_ot_apply_context_t *c,
+ ContextApplyLookupContext &lookup_context) const
+ {
+ TRACE_APPLY (this);
+ const UnsizedArrayOf<LookupRecord> &lookupRecord = StructAfter<UnsizedArrayOf<LookupRecord>>
+ (inputZ.as_array (inputCount ? inputCount - 1 : 0));
+ return_trace (context_apply_lookup (c, inputCount, inputZ.arrayZ, lookupCount, lookupRecord.arrayZ, lookup_context));
+ }
+
+ bool serialize (hb_serialize_context_t *c,
+ const hb_map_t *input_mapping, /* old->new glyphid or class mapping */
+ const hb_map_t *lookup_map) const
+ {
+ TRACE_SERIALIZE (this);
+ auto *out = c->start_embed (this);
+ if (unlikely (!c->extend_min (out))) return_trace (false);
+
+ out->inputCount = inputCount;
+ out->lookupCount = lookupCount;
+
+ const hb_array_t<const HBUINT16> input = inputZ.as_array (inputCount - 1);
+ for (const auto org : input)
+ {
+ HBUINT16 d;
+ d = input_mapping->get (org);
+ c->copy (d);
+ }
+
+ const UnsizedArrayOf<LookupRecord> &lookupRecord = StructAfter<UnsizedArrayOf<LookupRecord>>
+ (inputZ.as_array ((inputCount ? inputCount - 1 : 0)));
+ for (unsigned i = 0; i < (unsigned) lookupCount; i++)
+ c->copy (lookupRecord[i], lookup_map);
+
+ return_trace (true);
+ }
+
+ bool subset (hb_subset_context_t *c,
+ const hb_map_t *lookup_map,
+ const hb_map_t *klass_map = nullptr) const
+ {
+ TRACE_SUBSET (this);
+
+ const hb_array_t<const HBUINT16> input = inputZ.as_array ((inputCount ? inputCount - 1 : 0));
+ if (!input.length) return_trace (false);
+
+ const hb_map_t *mapping = klass_map == nullptr ? c->plan->glyph_map : klass_map;
+ if (!hb_all (input, mapping)) return_trace (false);
+ return_trace (serialize (c->serializer, mapping, lookup_map));
+ }
+
+ public:
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (inputCount.sanitize (c) &&
+ lookupCount.sanitize (c) &&
+ c->check_range (inputZ.arrayZ,
+ inputZ.item_size * (inputCount ? inputCount - 1 : 0) +
+ LookupRecord::static_size * lookupCount));
+ }
+
+ protected:
+ HBUINT16 inputCount; /* Total number of glyphs in input
+ * glyph sequence--includes the first
+ * glyph */
+ HBUINT16 lookupCount; /* Number of LookupRecords */
+ UnsizedArrayOf<HBUINT16>
+ inputZ; /* Array of match inputs--start with
+ * second glyph */
+/*UnsizedArrayOf<LookupRecord>
+ lookupRecordX;*/ /* Array of LookupRecords--in
+ * design order */
+ public:
+ DEFINE_SIZE_ARRAY (4, inputZ);
+};
+
+struct RuleSet
+{
+ bool intersects (const hb_set_t *glyphs,
+ ContextClosureLookupContext &lookup_context) const
+ {
+ return
+ + hb_iter (rule)
+ | hb_map (hb_add (this))
+ | hb_map ([&] (const Rule &_) { return _.intersects (glyphs, lookup_context); })
+ | hb_any
+ ;
+ }
+
+ void closure (hb_closure_context_t *c,
+ ContextClosureLookupContext &lookup_context) const
+ {
+ if (unlikely (c->lookup_limit_exceeded ())) return;
+
+ return
+ + hb_iter (rule)
+ | hb_map (hb_add (this))
+ | hb_apply ([&] (const Rule &_) { _.closure (c, lookup_context); })
+ ;
+ }
+
+ void closure_lookups (hb_closure_lookups_context_t *c) const
+ {
+ if (unlikely (c->lookup_limit_exceeded ())) return;
+
+ return
+ + hb_iter (rule)
+ | hb_map (hb_add (this))
+ | hb_apply ([&] (const Rule &_) { _.closure_lookups (c); })
+ ;
+ }
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c,
+ ContextCollectGlyphsLookupContext &lookup_context) const
+ {
+ return
+ + hb_iter (rule)
+ | hb_map (hb_add (this))
+ | hb_apply ([&] (const Rule &_) { _.collect_glyphs (c, lookup_context); })
+ ;
+ }
+
+ bool would_apply (hb_would_apply_context_t *c,
+ ContextApplyLookupContext &lookup_context) const
+ {
+ return
+ + hb_iter (rule)
+ | hb_map (hb_add (this))
+ | hb_map ([&] (const Rule &_) { return _.would_apply (c, lookup_context); })
+ | hb_any
+ ;
+ }
+
+ bool apply (hb_ot_apply_context_t *c,
+ ContextApplyLookupContext &lookup_context) const
+ {
+ TRACE_APPLY (this);
+ return_trace (
+ + hb_iter (rule)
+ | hb_map (hb_add (this))
+ | hb_map ([&] (const Rule &_) { return _.apply (c, lookup_context); })
+ | hb_any
+ )
+ ;
+ }
+
+ bool subset (hb_subset_context_t *c,
+ const hb_map_t *lookup_map,
+ const hb_map_t *klass_map = nullptr) const
+ {
+ TRACE_SUBSET (this);
+
+ auto snap = c->serializer->snapshot ();
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!c->serializer->extend_min (out))) return_trace (false);
+
+ for (const OffsetTo<Rule>& _ : rule)
+ {
+ if (!_) continue;
+ auto *o = out->rule.serialize_append (c->serializer);
+ if (unlikely (!o)) continue;
+
+ auto o_snap = c->serializer->snapshot ();
+ if (!o->serialize_subset (c, _, this, lookup_map, klass_map))
+ {
+ out->rule.pop ();
+ c->serializer->revert (o_snap);
+ }
+ }
+
+ bool ret = bool (out->rule);
+ if (!ret) c->serializer->revert (snap);
+
+ return_trace (ret);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (rule.sanitize (c, this));
+ }
+
+ protected:
+ OffsetArrayOf<Rule>
+ rule; /* Array of Rule tables
+ * ordered by preference */
+ public:
+ DEFINE_SIZE_ARRAY (2, rule);
+};
+
+
+struct ContextFormat1
+{
+ bool intersects (const hb_set_t *glyphs) const
+ {
+ struct ContextClosureLookupContext lookup_context = {
+ {intersects_glyph},
+ nullptr
+ };
+
+ return
+ + hb_zip (this+coverage, ruleSet)
+ | hb_filter (*glyphs, hb_first)
+ | hb_map (hb_second)
+ | hb_map (hb_add (this))
+ | hb_map ([&] (const RuleSet &_) { return _.intersects (glyphs, lookup_context); })
+ | hb_any
+ ;
+ }
+
+ void closure (hb_closure_context_t *c) const
+ {
+ struct ContextClosureLookupContext lookup_context = {
+ {intersects_glyph},
+ nullptr
+ };
+
+ + hb_zip (this+coverage, ruleSet)
+ | hb_filter (*c->glyphs, hb_first)
+ | hb_map (hb_second)
+ | hb_map (hb_add (this))
+ | hb_apply ([&] (const RuleSet &_) { _.closure (c, lookup_context); })
+ ;
+ }
+
+ void closure_lookups (hb_closure_lookups_context_t *c) const
+ {
+ + hb_iter (ruleSet)
+ | hb_map (hb_add (this))
+ | hb_apply ([&] (const RuleSet &_) { _.closure_lookups (c); })
+ ;
+ }
+
+ void collect_variation_indices (hb_collect_variation_indices_context_t *c) const {}
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c) const
+ {
+ (this+coverage).collect_coverage (c->input);
+
+ struct ContextCollectGlyphsLookupContext lookup_context = {
+ {collect_glyph},
+ nullptr
+ };
+
+ + hb_iter (ruleSet)
+ | hb_map (hb_add (this))
+ | hb_apply ([&] (const RuleSet &_) { _.collect_glyphs (c, lookup_context); })
+ ;
+ }
+
+ bool would_apply (hb_would_apply_context_t *c) const
+ {
+ const RuleSet &rule_set = this+ruleSet[(this+coverage).get_coverage (c->glyphs[0])];
+ struct ContextApplyLookupContext lookup_context = {
+ {match_glyph},
+ nullptr
+ };
+ return rule_set.would_apply (c, lookup_context);
+ }
+
+ const Coverage &get_coverage () const { return this+coverage; }
+
+ bool apply (hb_ot_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+ unsigned int index = (this+coverage).get_coverage (c->buffer->cur().codepoint);
+ if (likely (index == NOT_COVERED))
+ return_trace (false);
+
+ const RuleSet &rule_set = this+ruleSet[index];
+ struct ContextApplyLookupContext lookup_context = {
+ {match_glyph},
+ nullptr
+ };
+ return_trace (rule_set.apply (c, lookup_context));
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ const hb_set_t &glyphset = *c->plan->glyphset ();
+ const hb_map_t &glyph_map = *c->plan->glyph_map;
+
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!c->serializer->extend_min (out))) return_trace (false);
+ out->format = format;
+
+ const hb_map_t *lookup_map = c->table_tag == HB_OT_TAG_GSUB ? c->plan->gsub_lookups : c->plan->gpos_lookups;
+ hb_sorted_vector_t<hb_codepoint_t> new_coverage;
+ + hb_zip (this+coverage, ruleSet)
+ | hb_filter (glyphset, hb_first)
+ | hb_filter (subset_offset_array (c, out->ruleSet, this, lookup_map), hb_second)
+ | hb_map (hb_first)
+ | hb_map (glyph_map)
+ | hb_sink (new_coverage)
+ ;
+
+ out->coverage.serialize (c->serializer, out)
+ .serialize (c->serializer, new_coverage.iter ());
+ return_trace (bool (new_coverage));
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (coverage.sanitize (c, this) && ruleSet.sanitize (c, this));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 1 */
+ OffsetTo<Coverage>
+ coverage; /* Offset to Coverage table--from
+ * beginning of table */
+ OffsetArrayOf<RuleSet>
+ ruleSet; /* Array of RuleSet tables
+ * ordered by Coverage Index */
+ public:
+ DEFINE_SIZE_ARRAY (6, ruleSet);
+};
+
+
+struct ContextFormat2
+{
+ bool intersects (const hb_set_t *glyphs) const
+ {
+ if (!(this+coverage).intersects (glyphs))
+ return false;
+
+ const ClassDef &class_def = this+classDef;
+
+ struct ContextClosureLookupContext lookup_context = {
+ {intersects_class},
+ &class_def
+ };
+
+ return
+ + hb_iter (ruleSet)
+ | hb_map (hb_add (this))
+ | hb_enumerate
+ | hb_map ([&] (const hb_pair_t<unsigned, const RuleSet &> p)
+ { return class_def.intersects_class (glyphs, p.first) &&
+ p.second.intersects (glyphs, lookup_context); })
+ | hb_any
+ ;
+ }
+
+ void closure (hb_closure_context_t *c) const
+ {
+ if (!(this+coverage).intersects (c->glyphs))
+ return;
+
+ const ClassDef &class_def = this+classDef;
+
+ struct ContextClosureLookupContext lookup_context = {
+ {intersects_class},
+ &class_def
+ };
+
+ return
+ + hb_enumerate (ruleSet)
+ | hb_filter ([&] (unsigned _)
+ { return class_def.intersects_class (c->glyphs, _); },
+ hb_first)
+ | hb_map (hb_second)
+ | hb_map (hb_add (this))
+ | hb_apply ([&] (const RuleSet &_) { _.closure (c, lookup_context); })
+ ;
+ }
+
+ void closure_lookups (hb_closure_lookups_context_t *c) const
+ {
+ + hb_iter (ruleSet)
+ | hb_map (hb_add (this))
+ | hb_apply ([&] (const RuleSet &_) { _.closure_lookups (c); })
+ ;
+ }
+
+ void collect_variation_indices (hb_collect_variation_indices_context_t *c) const {}
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c) const
+ {
+ (this+coverage).collect_coverage (c->input);
+
+ const ClassDef &class_def = this+classDef;
+ struct ContextCollectGlyphsLookupContext lookup_context = {
+ {collect_class},
+ &class_def
+ };
+
+ + hb_iter (ruleSet)
+ | hb_map (hb_add (this))
+ | hb_apply ([&] (const RuleSet &_) { _.collect_glyphs (c, lookup_context); })
+ ;
+ }
+
+ bool would_apply (hb_would_apply_context_t *c) const
+ {
+ const ClassDef &class_def = this+classDef;
+ unsigned int index = class_def.get_class (c->glyphs[0]);
+ const RuleSet &rule_set = this+ruleSet[index];
+ struct ContextApplyLookupContext lookup_context = {
+ {match_class},
+ &class_def
+ };
+ return rule_set.would_apply (c, lookup_context);
+ }
+
+ const Coverage &get_coverage () const { return this+coverage; }
+
+ bool apply (hb_ot_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+ unsigned int index = (this+coverage).get_coverage (c->buffer->cur().codepoint);
+ if (likely (index == NOT_COVERED)) return_trace (false);
+
+ const ClassDef &class_def = this+classDef;
+ index = class_def.get_class (c->buffer->cur().codepoint);
+ const RuleSet &rule_set = this+ruleSet[index];
+ struct ContextApplyLookupContext lookup_context = {
+ {match_class},
+ &class_def
+ };
+ return_trace (rule_set.apply (c, lookup_context));
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!c->serializer->extend_min (out))) return_trace (false);
+ out->format = format;
+ if (unlikely (!out->coverage.serialize_subset (c, coverage, this)))
+ return_trace (false);
+
+ hb_map_t klass_map;
+ out->classDef.serialize_subset (c, classDef, this, &klass_map);
+
+ const hb_map_t *lookup_map = c->table_tag == HB_OT_TAG_GSUB ? c->plan->gsub_lookups : c->plan->gpos_lookups;
+ bool ret = true;
+ unsigned non_zero_index = 0, index = 0;
+ for (const hb_pair_t<unsigned, const OffsetTo<RuleSet>&> _ : + hb_enumerate (ruleSet)
+ | hb_filter (klass_map, hb_first))
+ {
+ auto *o = out->ruleSet.serialize_append (c->serializer);
+ if (unlikely (!o))
+ {
+ ret = false;
+ break;
+ }
+
+ if (o->serialize_subset (c, _.second, this, lookup_map, &klass_map))
+ non_zero_index = index;
+
+ index++;
+ }
+
+ if (!ret) return_trace (ret);
+
+ //prune empty trailing ruleSets
+ --index;
+ while (index > non_zero_index)
+ {
+ out->ruleSet.pop ();
+ index--;
+ }
+
+ return_trace (bool (out->ruleSet));
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (coverage.sanitize (c, this) && classDef.sanitize (c, this) && ruleSet.sanitize (c, this));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 2 */
+ OffsetTo<Coverage>
+ coverage; /* Offset to Coverage table--from
+ * beginning of table */
+ OffsetTo<ClassDef>
+ classDef; /* Offset to glyph ClassDef table--from
+ * beginning of table */
+ OffsetArrayOf<RuleSet>
+ ruleSet; /* Array of RuleSet tables
+ * ordered by class */
+ public:
+ DEFINE_SIZE_ARRAY (8, ruleSet);
+};
+
+
+struct ContextFormat3
+{
+ bool intersects (const hb_set_t *glyphs) const
+ {
+ if (!(this+coverageZ[0]).intersects (glyphs))
+ return false;
+
+ struct ContextClosureLookupContext lookup_context = {
+ {intersects_coverage},
+ this
+ };
+ return context_intersects (glyphs,
+ glyphCount, (const HBUINT16 *) (coverageZ.arrayZ + 1),
+ lookup_context);
+ }
+
+ void closure (hb_closure_context_t *c) const
+ {
+ if (!(this+coverageZ[0]).intersects (c->glyphs))
+ return;
+
+ const LookupRecord *lookupRecord = &StructAfter<LookupRecord> (coverageZ.as_array (glyphCount));
+ struct ContextClosureLookupContext lookup_context = {
+ {intersects_coverage},
+ this
+ };
+ context_closure_lookup (c,
+ glyphCount, (const HBUINT16 *) (coverageZ.arrayZ + 1),
+ lookupCount, lookupRecord,
+ lookup_context);
+ }
+
+ void closure_lookups (hb_closure_lookups_context_t *c) const
+ {
+ const LookupRecord *lookupRecord = &StructAfter<LookupRecord> (coverageZ.as_array (glyphCount));
+ recurse_lookups (c, lookupCount, lookupRecord);
+ }
+
+ void collect_variation_indices (hb_collect_variation_indices_context_t *c) const {}
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c) const
+ {
+ (this+coverageZ[0]).collect_coverage (c->input);
+
+ const LookupRecord *lookupRecord = &StructAfter<LookupRecord> (coverageZ.as_array (glyphCount));
+ struct ContextCollectGlyphsLookupContext lookup_context = {
+ {collect_coverage},
+ this
+ };
+
+ context_collect_glyphs_lookup (c,
+ glyphCount, (const HBUINT16 *) (coverageZ.arrayZ + 1),
+ lookupCount, lookupRecord,
+ lookup_context);
+ }
+
+ bool would_apply (hb_would_apply_context_t *c) const
+ {
+ const LookupRecord *lookupRecord = &StructAfter<LookupRecord> (coverageZ.as_array (glyphCount));
+ struct ContextApplyLookupContext lookup_context = {
+ {match_coverage},
+ this
+ };
+ return context_would_apply_lookup (c,
+ glyphCount, (const HBUINT16 *) (coverageZ.arrayZ + 1),
+ lookupCount, lookupRecord,
+ lookup_context);
+ }
+
+ const Coverage &get_coverage () const { return this+coverageZ[0]; }
+
+ bool apply (hb_ot_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+ unsigned int index = (this+coverageZ[0]).get_coverage (c->buffer->cur().codepoint);
+ if (likely (index == NOT_COVERED)) return_trace (false);
+
+ const LookupRecord *lookupRecord = &StructAfter<LookupRecord> (coverageZ.as_array (glyphCount));
+ struct ContextApplyLookupContext lookup_context = {
+ {match_coverage},
+ this
+ };
+ return_trace (context_apply_lookup (c, glyphCount, (const HBUINT16 *) (coverageZ.arrayZ + 1), lookupCount, lookupRecord, lookup_context));
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ auto *out = c->serializer->start_embed (this);
+ if (unlikely (!c->serializer->extend_min (out))) return_trace (false);
+
+ out->format = format;
+ out->glyphCount = glyphCount;
+ out->lookupCount = lookupCount;
+
+ auto coverages = coverageZ.as_array (glyphCount);
+
+ for (const OffsetTo<Coverage>& offset : coverages)
+ {
+ auto *o = c->serializer->allocate_size<OffsetTo<Coverage>> (OffsetTo<Coverage>::static_size);
+ if (unlikely (!o)) return_trace (false);
+ if (!o->serialize_subset (c, offset, this)) return_trace (false);
+ }
+
+ const LookupRecord *lookupRecord = &StructAfter<LookupRecord> (coverageZ.as_array (glyphCount));
+ const hb_map_t *lookup_map = c->table_tag == HB_OT_TAG_GSUB ? c->plan->gsub_lookups : c->plan->gpos_lookups;
+ for (unsigned i = 0; i < (unsigned) lookupCount; i++)
+ c->serializer->copy (lookupRecord[i], lookup_map);
+
+ return_trace (true);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (!c->check_struct (this)) return_trace (false);
+ unsigned int count = glyphCount;
+ if (!count) return_trace (false); /* We want to access coverageZ[0] freely. */
+ if (!c->check_array (coverageZ.arrayZ, count)) return_trace (false);
+ for (unsigned int i = 0; i < count; i++)
+ if (!coverageZ[i].sanitize (c, this)) return_trace (false);
+ const LookupRecord *lookupRecord = &StructAfter<LookupRecord> (coverageZ.as_array (glyphCount));
+ return_trace (c->check_array (lookupRecord, lookupCount));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 3 */
+ HBUINT16 glyphCount; /* Number of glyphs in the input glyph
+ * sequence */
+ HBUINT16 lookupCount; /* Number of LookupRecords */
+ UnsizedArrayOf<OffsetTo<Coverage>>
+ coverageZ; /* Array of offsets to Coverage
+ * table in glyph sequence order */
+/*UnsizedArrayOf<LookupRecord>
+ lookupRecordX;*/ /* Array of LookupRecords--in
+ * design order */
+ public:
+ DEFINE_SIZE_ARRAY (6, coverageZ);
+};
+
+struct Context
+{
+ template <typename context_t, typename ...Ts>
+ typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const
+ {
+ TRACE_DISPATCH (this, u.format);
+ if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ());
+ switch (u.format) {
+ case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...));
+ case 2: return_trace (c->dispatch (u.format2, hb_forward<Ts> (ds)...));
+ case 3: return_trace (c->dispatch (u.format3, hb_forward<Ts> (ds)...));
+ default:return_trace (c->default_return_value ());
+ }
+ }
+
+ protected:
+ union {
+ HBUINT16 format; /* Format identifier */
+ ContextFormat1 format1;
+ ContextFormat2 format2;
+ ContextFormat3 format3;
+ } u;
+};
+
+
+/* Chaining Contextual lookups */
+
+struct ChainContextClosureLookupContext
+{
+ ContextClosureFuncs funcs;
+ const void *intersects_data[3];
+};
+
+struct ChainContextCollectGlyphsLookupContext
+{
+ ContextCollectGlyphsFuncs funcs;
+ const void *collect_data[3];
+};
+
+struct ChainContextApplyLookupContext
+{
+ ContextApplyFuncs funcs;
+ const void *match_data[3];
+};
+
+static inline bool chain_context_intersects (const hb_set_t *glyphs,
+ unsigned int backtrackCount,
+ const HBUINT16 backtrack[],
+ unsigned int inputCount, /* Including the first glyph (not matched) */
+ const HBUINT16 input[], /* Array of input values--start with second glyph */
+ unsigned int lookaheadCount,
+ const HBUINT16 lookahead[],
+ ChainContextClosureLookupContext &lookup_context)
+{
+ return array_is_subset_of (glyphs,
+ backtrackCount, backtrack,
+ lookup_context.funcs.intersects, lookup_context.intersects_data[0])
+ && array_is_subset_of (glyphs,
+ inputCount ? inputCount - 1 : 0, input,
+ lookup_context.funcs.intersects, lookup_context.intersects_data[1])
+ && array_is_subset_of (glyphs,
+ lookaheadCount, lookahead,
+ lookup_context.funcs.intersects, lookup_context.intersects_data[2]);
+}
+
+static inline void chain_context_closure_lookup (hb_closure_context_t *c,
+ unsigned int backtrackCount,
+ const HBUINT16 backtrack[],
+ unsigned int inputCount, /* Including the first glyph (not matched) */
+ const HBUINT16 input[], /* Array of input values--start with second glyph */
+ unsigned int lookaheadCount,
+ const HBUINT16 lookahead[],
+ unsigned int lookupCount,
+ const LookupRecord lookupRecord[],
+ ChainContextClosureLookupContext &lookup_context)
+{
+ if (chain_context_intersects (c->glyphs,
+ backtrackCount, backtrack,
+ inputCount, input,
+ lookaheadCount, lookahead,
+ lookup_context))
+ recurse_lookups (c,
+ lookupCount, lookupRecord);
+}
+
+static inline void chain_context_collect_glyphs_lookup (hb_collect_glyphs_context_t *c,
+ unsigned int backtrackCount,
+ const HBUINT16 backtrack[],
+ unsigned int inputCount, /* Including the first glyph (not matched) */
+ const HBUINT16 input[], /* Array of input values--start with second glyph */
+ unsigned int lookaheadCount,
+ const HBUINT16 lookahead[],
+ unsigned int lookupCount,
+ const LookupRecord lookupRecord[],
+ ChainContextCollectGlyphsLookupContext &lookup_context)
+{
+ collect_array (c, c->before,
+ backtrackCount, backtrack,
+ lookup_context.funcs.collect, lookup_context.collect_data[0]);
+ collect_array (c, c->input,
+ inputCount ? inputCount - 1 : 0, input,
+ lookup_context.funcs.collect, lookup_context.collect_data[1]);
+ collect_array (c, c->after,
+ lookaheadCount, lookahead,
+ lookup_context.funcs.collect, lookup_context.collect_data[2]);
+ recurse_lookups (c,
+ lookupCount, lookupRecord);
+}
+
+static inline bool chain_context_would_apply_lookup (hb_would_apply_context_t *c,
+ unsigned int backtrackCount,
+ const HBUINT16 backtrack[] HB_UNUSED,
+ unsigned int inputCount, /* Including the first glyph (not matched) */
+ const HBUINT16 input[], /* Array of input values--start with second glyph */
+ unsigned int lookaheadCount,
+ const HBUINT16 lookahead[] HB_UNUSED,
+ unsigned int lookupCount HB_UNUSED,
+ const LookupRecord lookupRecord[] HB_UNUSED,
+ ChainContextApplyLookupContext &lookup_context)
+{
+ return (c->zero_context ? !backtrackCount && !lookaheadCount : true)
+ && would_match_input (c,
+ inputCount, input,
+ lookup_context.funcs.match, lookup_context.match_data[1]);
+}
+
+static inline bool chain_context_apply_lookup (hb_ot_apply_context_t *c,
+ unsigned int backtrackCount,
+ const HBUINT16 backtrack[],
+ unsigned int inputCount, /* Including the first glyph (not matched) */
+ const HBUINT16 input[], /* Array of input values--start with second glyph */
+ unsigned int lookaheadCount,
+ const HBUINT16 lookahead[],
+ unsigned int lookupCount,
+ const LookupRecord lookupRecord[],
+ ChainContextApplyLookupContext &lookup_context)
+{
+ unsigned int start_index = 0, match_length = 0, end_index = 0;
+ unsigned int match_positions[HB_MAX_CONTEXT_LENGTH];
+ return match_input (c,
+ inputCount, input,
+ lookup_context.funcs.match, lookup_context.match_data[1],
+ &match_length, match_positions)
+ && match_backtrack (c,
+ backtrackCount, backtrack,
+ lookup_context.funcs.match, lookup_context.match_data[0],
+ &start_index)
+ && match_lookahead (c,
+ lookaheadCount, lookahead,
+ lookup_context.funcs.match, lookup_context.match_data[2],
+ match_length, &end_index)
+ && (c->buffer->unsafe_to_break_from_outbuffer (start_index, end_index),
+ apply_lookup (c,
+ inputCount, match_positions,
+ lookupCount, lookupRecord,
+ match_length));
+}
+
+struct ChainRule
+{
+ bool intersects (const hb_set_t *glyphs, ChainContextClosureLookupContext &lookup_context) const
+ {
+ const HeadlessArrayOf<HBUINT16> &input = StructAfter<HeadlessArrayOf<HBUINT16>> (backtrack);
+ const ArrayOf<HBUINT16> &lookahead = StructAfter<ArrayOf<HBUINT16>> (input);
+ return chain_context_intersects (glyphs,
+ backtrack.len, backtrack.arrayZ,
+ input.lenP1, input.arrayZ,
+ lookahead.len, lookahead.arrayZ,
+ lookup_context);
+ }
+
+ void closure (hb_closure_context_t *c,
+ ChainContextClosureLookupContext &lookup_context) const
+ {
+ if (unlikely (c->lookup_limit_exceeded ())) return;
+
+ const HeadlessArrayOf<HBUINT16> &input = StructAfter<HeadlessArrayOf<HBUINT16>> (backtrack);
+ const ArrayOf<HBUINT16> &lookahead = StructAfter<ArrayOf<HBUINT16>> (input);
+ const ArrayOf<LookupRecord> &lookup = StructAfter<ArrayOf<LookupRecord>> (lookahead);
+ chain_context_closure_lookup (c,
+ backtrack.len, backtrack.arrayZ,
+ input.lenP1, input.arrayZ,
+ lookahead.len, lookahead.arrayZ,
+ lookup.len, lookup.arrayZ,
+ lookup_context);
+ }
+
+ void closure_lookups (hb_closure_lookups_context_t *c) const
+ {
+ if (unlikely (c->lookup_limit_exceeded ())) return;
+
+ const HeadlessArrayOf<HBUINT16> &input = StructAfter<HeadlessArrayOf<HBUINT16>> (backtrack);
+ const ArrayOf<HBUINT16> &lookahead = StructAfter<ArrayOf<HBUINT16>> (input);
+ const ArrayOf<LookupRecord> &lookup = StructAfter<ArrayOf<LookupRecord>> (lookahead);
+ recurse_lookups (c, lookup.len, lookup.arrayZ);
+ }
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c,
+ ChainContextCollectGlyphsLookupContext &lookup_context) const
+ {
+ const HeadlessArrayOf<HBUINT16> &input = StructAfter<HeadlessArrayOf<HBUINT16>> (backtrack);
+ const ArrayOf<HBUINT16> &lookahead = StructAfter<ArrayOf<HBUINT16>> (input);
+ const ArrayOf<LookupRecord> &lookup = StructAfter<ArrayOf<LookupRecord>> (lookahead);
+ chain_context_collect_glyphs_lookup (c,
+ backtrack.len, backtrack.arrayZ,
+ input.lenP1, input.arrayZ,
+ lookahead.len, lookahead.arrayZ,
+ lookup.len, lookup.arrayZ,
+ lookup_context);
+ }
+
+ bool would_apply (hb_would_apply_context_t *c,
+ ChainContextApplyLookupContext &lookup_context) const
+ {
+ const HeadlessArrayOf<HBUINT16> &input = StructAfter<HeadlessArrayOf<HBUINT16>> (backtrack);
+ const ArrayOf<HBUINT16> &lookahead = StructAfter<ArrayOf<HBUINT16>> (input);
+ const ArrayOf<LookupRecord> &lookup = StructAfter<ArrayOf<LookupRecord>> (lookahead);
+ return chain_context_would_apply_lookup (c,
+ backtrack.len, backtrack.arrayZ,
+ input.lenP1, input.arrayZ,
+ lookahead.len, lookahead.arrayZ, lookup.len,
+ lookup.arrayZ, lookup_context);
+ }
+
+ bool apply (hb_ot_apply_context_t *c, ChainContextApplyLookupContext &lookup_context) const
+ {
+ TRACE_APPLY (this);
+ const HeadlessArrayOf<HBUINT16> &input = StructAfter<HeadlessArrayOf<HBUINT16>> (backtrack);
+ const ArrayOf<HBUINT16> &lookahead = StructAfter<ArrayOf<HBUINT16>> (input);
+ const ArrayOf<LookupRecord> &lookup = StructAfter<ArrayOf<LookupRecord>> (lookahead);
+ return_trace (chain_context_apply_lookup (c,
+ backtrack.len, backtrack.arrayZ,
+ input.lenP1, input.arrayZ,
+ lookahead.len, lookahead.arrayZ, lookup.len,
+ lookup.arrayZ, lookup_context));
+ }
+
+ template<typename Iterator,
+ hb_requires (hb_is_iterator (Iterator))>
+ void serialize_array (hb_serialize_context_t *c,
+ HBUINT16 len,
+ Iterator it) const
+ {
+ c->copy (len);
+ for (const auto g : it)
+ {
+ HBUINT16 gid;
+ gid = g;
+ c->copy (gid);
+ }
+ }
+
+ ChainRule* copy (hb_serialize_context_t *c,
+ const hb_map_t *lookup_map,
+ const hb_map_t *backtrack_map,
+ const hb_map_t *input_map = nullptr,
+ const hb_map_t *lookahead_map = nullptr) const
+ {
+ TRACE_SERIALIZE (this);
+ auto *out = c->start_embed (this);
+ if (unlikely (!out)) return_trace (nullptr);
+
+ const hb_map_t *mapping = backtrack_map;
+ serialize_array (c, backtrack.len, + backtrack.iter ()
+ | hb_map (mapping));
+
+ const HeadlessArrayOf<HBUINT16> &input = StructAfter<HeadlessArrayOf<HBUINT16>> (backtrack);
+ if (input_map) mapping = input_map;
+ serialize_array (c, input.lenP1, + input.iter ()
+ | hb_map (mapping));
+
+ const ArrayOf<HBUINT16> &lookahead = StructAfter<ArrayOf<HBUINT16>> (input);
+ if (lookahead_map) mapping = lookahead_map;
+ serialize_array (c, lookahead.len, + lookahead.iter ()
+ | hb_map (mapping));
+
+ const ArrayOf<LookupRecord> &lookupRecord = StructAfter<ArrayOf<LookupRecord>> (lookahead);
+ HBUINT16 lookupCount;
+ lookupCount = lookupRecord.len;
+ if (!c->copy (lookupCount)) return_trace (nullptr);
+
+ for (unsigned i = 0; i < (unsigned) lookupCount; i++)
+ if (!c->copy (lookupRecord[i], lookup_map)) return_trace (nullptr);
+
+ return_trace (out);
+ }
+
+ bool subset (hb_subset_context_t *c,
+ const hb_map_t *lookup_map,
+ const hb_map_t *backtrack_map = nullptr,
+ const hb_map_t *input_map = nullptr,
+ const hb_map_t *lookahead_map = nullptr) const
+ {
+ TRACE_SUBSET (this);
+
+ const HeadlessArrayOf<HBUINT16> &input = StructAfter<HeadlessArrayOf<HBUINT16>> (backtrack);
+ const ArrayOf<HBUINT16> &lookahead = StructAfter<ArrayOf<HBUINT16>> (input);
+
+ if (!backtrack_map)
+ {
+ const hb_set_t &glyphset = *c->plan->glyphset ();
+ if (!hb_all (backtrack, glyphset) ||
+ !hb_all (input, glyphset) ||
+ !hb_all (lookahead, glyphset))
+ return_trace (false);
+
+ copy (c->serializer, lookup_map, c->plan->glyph_map);
+ }
+ else
+ {
+ if (!hb_all (backtrack, backtrack_map) ||
+ !hb_all (input, input_map) ||
+ !hb_all (lookahead, lookahead_map))
+ return_trace (false);
+
+ copy (c->serializer, lookup_map, backtrack_map, input_map, lookahead_map);
+ }
+
+ return_trace (true);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (!backtrack.sanitize (c)) return_trace (false);
+ const HeadlessArrayOf<HBUINT16> &input = StructAfter<HeadlessArrayOf<HBUINT16>> (backtrack);
+ if (!input.sanitize (c)) return_trace (false);
+ const ArrayOf<HBUINT16> &lookahead = StructAfter<ArrayOf<HBUINT16>> (input);
+ if (!lookahead.sanitize (c)) return_trace (false);
+ const ArrayOf<LookupRecord> &lookup = StructAfter<ArrayOf<LookupRecord>> (lookahead);
+ return_trace (lookup.sanitize (c));
+ }
+
+ protected:
+ ArrayOf<HBUINT16>
+ backtrack; /* Array of backtracking values
+ * (to be matched before the input
+ * sequence) */
+ HeadlessArrayOf<HBUINT16>
+ inputX; /* Array of input values (start with
+ * second glyph) */
+ ArrayOf<HBUINT16>
+ lookaheadX; /* Array of lookahead values's (to be
+ * matched after the input sequence) */
+ ArrayOf<LookupRecord>
+ lookupX; /* Array of LookupRecords--in
+ * design order) */
+ public:
+ DEFINE_SIZE_MIN (8);
+};
+
+struct ChainRuleSet
+{
+ bool intersects (const hb_set_t *glyphs, ChainContextClosureLookupContext &lookup_context) const
+ {
+ return
+ + hb_iter (rule)
+ | hb_map (hb_add (this))
+ | hb_map ([&] (const ChainRule &_) { return _.intersects (glyphs, lookup_context); })
+ | hb_any
+ ;
+ }
+ void closure (hb_closure_context_t *c, ChainContextClosureLookupContext &lookup_context) const
+ {
+ if (unlikely (c->lookup_limit_exceeded ())) return;
+
+ return
+ + hb_iter (rule)
+ | hb_map (hb_add (this))
+ | hb_apply ([&] (const ChainRule &_) { _.closure (c, lookup_context); })
+ ;
+ }
+
+ void closure_lookups (hb_closure_lookups_context_t *c) const
+ {
+ if (unlikely (c->lookup_limit_exceeded ())) return;
+
+ return
+ + hb_iter (rule)
+ | hb_map (hb_add (this))
+ | hb_apply ([&] (const ChainRule &_) { _.closure_lookups (c); })
+ ;
+ }
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c, ChainContextCollectGlyphsLookupContext &lookup_context) const
+ {
+ return
+ + hb_iter (rule)
+ | hb_map (hb_add (this))
+ | hb_apply ([&] (const ChainRule &_) { _.collect_glyphs (c, lookup_context); })
+ ;
+ }
+
+ bool would_apply (hb_would_apply_context_t *c, ChainContextApplyLookupContext &lookup_context) const
+ {
+ return
+ + hb_iter (rule)
+ | hb_map (hb_add (this))
+ | hb_map ([&] (const ChainRule &_) { return _.would_apply (c, lookup_context); })
+ | hb_any
+ ;
+ }
+
+ bool apply (hb_ot_apply_context_t *c, ChainContextApplyLookupContext &lookup_context) const
+ {
+ TRACE_APPLY (this);
+ return_trace (
+ + hb_iter (rule)
+ | hb_map (hb_add (this))
+ | hb_map ([&] (const ChainRule &_) { return _.apply (c, lookup_context); })
+ | hb_any
+ )
+ ;
+ }
+
+ bool subset (hb_subset_context_t *c,
+ const hb_map_t *lookup_map,
+ const hb_map_t *backtrack_klass_map = nullptr,
+ const hb_map_t *input_klass_map = nullptr,
+ const hb_map_t *lookahead_klass_map = nullptr) const
+ {
+ TRACE_SUBSET (this);
+
+ auto snap = c->serializer->snapshot ();
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!c->serializer->extend_min (out))) return_trace (false);
+
+ for (const OffsetTo<ChainRule>& _ : rule)
+ {
+ if (!_) continue;
+ auto *o = out->rule.serialize_append (c->serializer);
+ if (unlikely (!o)) continue;
+
+ auto o_snap = c->serializer->snapshot ();
+ if (!o->serialize_subset (c, _, this,
+ lookup_map,
+ backtrack_klass_map,
+ input_klass_map,
+ lookahead_klass_map))
+ {
+ out->rule.pop ();
+ c->serializer->revert (o_snap);
+ }
+ }
+
+ bool ret = bool (out->rule);
+ if (!ret) c->serializer->revert (snap);
+
+ return_trace (ret);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (rule.sanitize (c, this));
+ }
+
+ protected:
+ OffsetArrayOf<ChainRule>
+ rule; /* Array of ChainRule tables
+ * ordered by preference */
+ public:
+ DEFINE_SIZE_ARRAY (2, rule);
+};
+
+struct ChainContextFormat1
+{
+ bool intersects (const hb_set_t *glyphs) const
+ {
+ struct ChainContextClosureLookupContext lookup_context = {
+ {intersects_glyph},
+ {nullptr, nullptr, nullptr}
+ };
+
+ return
+ + hb_zip (this+coverage, ruleSet)
+ | hb_filter (*glyphs, hb_first)
+ | hb_map (hb_second)
+ | hb_map (hb_add (this))
+ | hb_map ([&] (const ChainRuleSet &_) { return _.intersects (glyphs, lookup_context); })
+ | hb_any
+ ;
+ }
+
+ void closure (hb_closure_context_t *c) const
+ {
+ struct ChainContextClosureLookupContext lookup_context = {
+ {intersects_glyph},
+ {nullptr, nullptr, nullptr}
+ };
+
+ + hb_zip (this+coverage, ruleSet)
+ | hb_filter (*c->glyphs, hb_first)
+ | hb_map (hb_second)
+ | hb_map (hb_add (this))
+ | hb_apply ([&] (const ChainRuleSet &_) { _.closure (c, lookup_context); })
+ ;
+ }
+
+ void closure_lookups (hb_closure_lookups_context_t *c) const
+ {
+ + hb_iter (ruleSet)
+ | hb_map (hb_add (this))
+ | hb_apply ([&] (const ChainRuleSet &_) { _.closure_lookups (c); })
+ ;
+ }
+
+ void collect_variation_indices (hb_collect_variation_indices_context_t *c) const {}
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c) const
+ {
+ (this+coverage).collect_coverage (c->input);
+
+ struct ChainContextCollectGlyphsLookupContext lookup_context = {
+ {collect_glyph},
+ {nullptr, nullptr, nullptr}
+ };
+
+ + hb_iter (ruleSet)
+ | hb_map (hb_add (this))
+ | hb_apply ([&] (const ChainRuleSet &_) { _.collect_glyphs (c, lookup_context); })
+ ;
+ }
+
+ bool would_apply (hb_would_apply_context_t *c) const
+ {
+ const ChainRuleSet &rule_set = this+ruleSet[(this+coverage).get_coverage (c->glyphs[0])];
+ struct ChainContextApplyLookupContext lookup_context = {
+ {match_glyph},
+ {nullptr, nullptr, nullptr}
+ };
+ return rule_set.would_apply (c, lookup_context);
+ }
+
+ const Coverage &get_coverage () const { return this+coverage; }
+
+ bool apply (hb_ot_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+ unsigned int index = (this+coverage).get_coverage (c->buffer->cur().codepoint);
+ if (likely (index == NOT_COVERED)) return_trace (false);
+
+ const ChainRuleSet &rule_set = this+ruleSet[index];
+ struct ChainContextApplyLookupContext lookup_context = {
+ {match_glyph},
+ {nullptr, nullptr, nullptr}
+ };
+ return_trace (rule_set.apply (c, lookup_context));
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ const hb_set_t &glyphset = *c->plan->glyphset ();
+ const hb_map_t &glyph_map = *c->plan->glyph_map;
+
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!c->serializer->extend_min (out))) return_trace (false);
+ out->format = format;
+
+ const hb_map_t *lookup_map = c->table_tag == HB_OT_TAG_GSUB ? c->plan->gsub_lookups : c->plan->gpos_lookups;
+ hb_sorted_vector_t<hb_codepoint_t> new_coverage;
+ + hb_zip (this+coverage, ruleSet)
+ | hb_filter (glyphset, hb_first)
+ | hb_filter (subset_offset_array (c, out->ruleSet, this, lookup_map), hb_second)
+ | hb_map (hb_first)
+ | hb_map (glyph_map)
+ | hb_sink (new_coverage)
+ ;
+
+ out->coverage.serialize (c->serializer, out)
+ .serialize (c->serializer, new_coverage.iter ());
+ return_trace (bool (new_coverage));
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (coverage.sanitize (c, this) && ruleSet.sanitize (c, this));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 1 */
+ OffsetTo<Coverage>
+ coverage; /* Offset to Coverage table--from
+ * beginning of table */
+ OffsetArrayOf<ChainRuleSet>
+ ruleSet; /* Array of ChainRuleSet tables
+ * ordered by Coverage Index */
+ public:
+ DEFINE_SIZE_ARRAY (6, ruleSet);
+};
+
+struct ChainContextFormat2
+{
+ bool intersects (const hb_set_t *glyphs) const
+ {
+ if (!(this+coverage).intersects (glyphs))
+ return false;
+
+ const ClassDef &backtrack_class_def = this+backtrackClassDef;
+ const ClassDef &input_class_def = this+inputClassDef;
+ const ClassDef &lookahead_class_def = this+lookaheadClassDef;
+
+ struct ChainContextClosureLookupContext lookup_context = {
+ {intersects_class},
+ {&backtrack_class_def,
+ &input_class_def,
+ &lookahead_class_def}
+ };
+
+ return
+ + hb_iter (ruleSet)
+ | hb_map (hb_add (this))
+ | hb_enumerate
+ | hb_map ([&] (const hb_pair_t<unsigned, const ChainRuleSet &> p)
+ { return input_class_def.intersects_class (glyphs, p.first) &&
+ p.second.intersects (glyphs, lookup_context); })
+ | hb_any
+ ;
+ }
+ void closure (hb_closure_context_t *c) const
+ {
+ if (!(this+coverage).intersects (c->glyphs))
+ return;
+
+ const ClassDef &backtrack_class_def = this+backtrackClassDef;
+ const ClassDef &input_class_def = this+inputClassDef;
+ const ClassDef &lookahead_class_def = this+lookaheadClassDef;
+
+ struct ChainContextClosureLookupContext lookup_context = {
+ {intersects_class},
+ {&backtrack_class_def,
+ &input_class_def,
+ &lookahead_class_def}
+ };
+
+ return
+ + hb_enumerate (ruleSet)
+ | hb_filter ([&] (unsigned _)
+ { return input_class_def.intersects_class (c->glyphs, _); },
+ hb_first)
+ | hb_map (hb_second)
+ | hb_map (hb_add (this))
+ | hb_apply ([&] (const ChainRuleSet &_) { _.closure (c, lookup_context); })
+ ;
+ }
+
+ void closure_lookups (hb_closure_lookups_context_t *c) const
+ {
+ + hb_iter (ruleSet)
+ | hb_map (hb_add (this))
+ | hb_apply ([&] (const ChainRuleSet &_) { _.closure_lookups (c); })
+ ;
+ }
+
+ void collect_variation_indices (hb_collect_variation_indices_context_t *c) const {}
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c) const
+ {
+ (this+coverage).collect_coverage (c->input);
+
+ const ClassDef &backtrack_class_def = this+backtrackClassDef;
+ const ClassDef &input_class_def = this+inputClassDef;
+ const ClassDef &lookahead_class_def = this+lookaheadClassDef;
+
+ struct ChainContextCollectGlyphsLookupContext lookup_context = {
+ {collect_class},
+ {&backtrack_class_def,
+ &input_class_def,
+ &lookahead_class_def}
+ };
+
+ + hb_iter (ruleSet)
+ | hb_map (hb_add (this))
+ | hb_apply ([&] (const ChainRuleSet &_) { _.collect_glyphs (c, lookup_context); })
+ ;
+ }
+
+ bool would_apply (hb_would_apply_context_t *c) const
+ {
+ const ClassDef &backtrack_class_def = this+backtrackClassDef;
+ const ClassDef &input_class_def = this+inputClassDef;
+ const ClassDef &lookahead_class_def = this+lookaheadClassDef;
+
+ unsigned int index = input_class_def.get_class (c->glyphs[0]);
+ const ChainRuleSet &rule_set = this+ruleSet[index];
+ struct ChainContextApplyLookupContext lookup_context = {
+ {match_class},
+ {&backtrack_class_def,
+ &input_class_def,
+ &lookahead_class_def}
+ };
+ return rule_set.would_apply (c, lookup_context);
+ }
+
+ const Coverage &get_coverage () const { return this+coverage; }
+
+ bool apply (hb_ot_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+ unsigned int index = (this+coverage).get_coverage (c->buffer->cur().codepoint);
+ if (likely (index == NOT_COVERED)) return_trace (false);
+
+ const ClassDef &backtrack_class_def = this+backtrackClassDef;
+ const ClassDef &input_class_def = this+inputClassDef;
+ const ClassDef &lookahead_class_def = this+lookaheadClassDef;
+
+ index = input_class_def.get_class (c->buffer->cur().codepoint);
+ const ChainRuleSet &rule_set = this+ruleSet[index];
+ struct ChainContextApplyLookupContext lookup_context = {
+ {match_class},
+ {&backtrack_class_def,
+ &input_class_def,
+ &lookahead_class_def}
+ };
+ return_trace (rule_set.apply (c, lookup_context));
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ auto *out = c->serializer->start_embed (*this);
+ if (unlikely (!c->serializer->extend_min (out))) return_trace (false);
+ out->format = format;
+ out->coverage.serialize_subset (c, coverage, this);
+
+ hb_map_t backtrack_klass_map;
+ out->backtrackClassDef.serialize_subset (c, backtrackClassDef, this, &backtrack_klass_map);
+ if (unlikely (!c->serializer->check_success (!backtrack_klass_map.in_error ())))
+ return_trace (false);
+
+ // subset inputClassDef based on glyphs survived in Coverage subsetting
+ hb_map_t input_klass_map;
+ out->inputClassDef.serialize_subset (c, inputClassDef, this, &input_klass_map);
+ if (unlikely (!c->serializer->check_success (!input_klass_map.in_error ())))
+ return_trace (false);
+
+ hb_map_t lookahead_klass_map;
+ out->lookaheadClassDef.serialize_subset (c, lookaheadClassDef, this, &lookahead_klass_map);
+ if (unlikely (!c->serializer->check_success (!lookahead_klass_map.in_error ())))
+ return_trace (false);
+
+ unsigned non_zero_index = 0, index = 0;
+ bool ret = true;
+ const hb_map_t *lookup_map = c->table_tag == HB_OT_TAG_GSUB ? c->plan->gsub_lookups : c->plan->gpos_lookups;
+ for (const OffsetTo<ChainRuleSet>& _ : + hb_enumerate (ruleSet)
+ | hb_filter (input_klass_map, hb_first)
+ | hb_map (hb_second))
+ {
+ auto *o = out->ruleSet.serialize_append (c->serializer);
+ if (unlikely (!o))
+ {
+ ret = false;
+ break;
+ }
+ if (o->serialize_subset (c, _, this,
+ lookup_map,
+ &backtrack_klass_map,
+ &input_klass_map,
+ &lookahead_klass_map))
+ non_zero_index = index;
+
+ index++;
+ }
+
+ if (!ret) return_trace (ret);
+
+ //prune empty trailing ruleSets
+ --index;
+ while (index > non_zero_index)
+ {
+ out->ruleSet.pop ();
+ index--;
+ }
+
+ return_trace (bool (out->ruleSet));
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (coverage.sanitize (c, this) &&
+ backtrackClassDef.sanitize (c, this) &&
+ inputClassDef.sanitize (c, this) &&
+ lookaheadClassDef.sanitize (c, this) &&
+ ruleSet.sanitize (c, this));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 2 */
+ OffsetTo<Coverage>
+ coverage; /* Offset to Coverage table--from
+ * beginning of table */
+ OffsetTo<ClassDef>
+ backtrackClassDef; /* Offset to glyph ClassDef table
+ * containing backtrack sequence
+ * data--from beginning of table */
+ OffsetTo<ClassDef>
+ inputClassDef; /* Offset to glyph ClassDef
+ * table containing input sequence
+ * data--from beginning of table */
+ OffsetTo<ClassDef>
+ lookaheadClassDef; /* Offset to glyph ClassDef table
+ * containing lookahead sequence
+ * data--from beginning of table */
+ OffsetArrayOf<ChainRuleSet>
+ ruleSet; /* Array of ChainRuleSet tables
+ * ordered by class */
+ public:
+ DEFINE_SIZE_ARRAY (12, ruleSet);
+};
+
+struct ChainContextFormat3
+{
+ bool intersects (const hb_set_t *glyphs) const
+ {
+ const OffsetArrayOf<Coverage> &input = StructAfter<OffsetArrayOf<Coverage>> (backtrack);
+
+ if (!(this+input[0]).intersects (glyphs))
+ return false;
+
+ const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage>> (input);
+ struct ChainContextClosureLookupContext lookup_context = {
+ {intersects_coverage},
+ {this, this, this}
+ };
+ return chain_context_intersects (glyphs,
+ backtrack.len, (const HBUINT16 *) backtrack.arrayZ,
+ input.len, (const HBUINT16 *) input.arrayZ + 1,
+ lookahead.len, (const HBUINT16 *) lookahead.arrayZ,
+ lookup_context);
+ }
+
+ void closure (hb_closure_context_t *c) const
+ {
+ const OffsetArrayOf<Coverage> &input = StructAfter<OffsetArrayOf<Coverage>> (backtrack);
+
+ if (!(this+input[0]).intersects (c->glyphs))
+ return;
+
+ const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage>> (input);
+ const ArrayOf<LookupRecord> &lookup = StructAfter<ArrayOf<LookupRecord>> (lookahead);
+ struct ChainContextClosureLookupContext lookup_context = {
+ {intersects_coverage},
+ {this, this, this}
+ };
+ chain_context_closure_lookup (c,
+ backtrack.len, (const HBUINT16 *) backtrack.arrayZ,
+ input.len, (const HBUINT16 *) input.arrayZ + 1,
+ lookahead.len, (const HBUINT16 *) lookahead.arrayZ,
+ lookup.len, lookup.arrayZ,
+ lookup_context);
+ }
+
+ void closure_lookups (hb_closure_lookups_context_t *c) const
+ {
+ const OffsetArrayOf<Coverage> &input = StructAfter<OffsetArrayOf<Coverage>> (backtrack);
+ const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage>> (input);
+ const ArrayOf<LookupRecord> &lookup = StructAfter<ArrayOf<LookupRecord>> (lookahead);
+ recurse_lookups (c, lookup.len, lookup.arrayZ);
+ }
+
+ void collect_variation_indices (hb_collect_variation_indices_context_t *c) const {}
+
+ void collect_glyphs (hb_collect_glyphs_context_t *c) const
+ {
+ const OffsetArrayOf<Coverage> &input = StructAfter<OffsetArrayOf<Coverage>> (backtrack);
+
+ (this+input[0]).collect_coverage (c->input);
+
+ const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage>> (input);
+ const ArrayOf<LookupRecord> &lookup = StructAfter<ArrayOf<LookupRecord>> (lookahead);
+ struct ChainContextCollectGlyphsLookupContext lookup_context = {
+ {collect_coverage},
+ {this, this, this}
+ };
+ chain_context_collect_glyphs_lookup (c,
+ backtrack.len, (const HBUINT16 *) backtrack.arrayZ,
+ input.len, (const HBUINT16 *) input.arrayZ + 1,
+ lookahead.len, (const HBUINT16 *) lookahead.arrayZ,
+ lookup.len, lookup.arrayZ,
+ lookup_context);
+ }
+
+ bool would_apply (hb_would_apply_context_t *c) const
+ {
+ const OffsetArrayOf<Coverage> &input = StructAfter<OffsetArrayOf<Coverage>> (backtrack);
+ const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage>> (input);
+ const ArrayOf<LookupRecord> &lookup = StructAfter<ArrayOf<LookupRecord>> (lookahead);
+ struct ChainContextApplyLookupContext lookup_context = {
+ {match_coverage},
+ {this, this, this}
+ };
+ return chain_context_would_apply_lookup (c,
+ backtrack.len, (const HBUINT16 *) backtrack.arrayZ,
+ input.len, (const HBUINT16 *) input.arrayZ + 1,
+ lookahead.len, (const HBUINT16 *) lookahead.arrayZ,
+ lookup.len, lookup.arrayZ, lookup_context);
+ }
+
+ const Coverage &get_coverage () const
+ {
+ const OffsetArrayOf<Coverage> &input = StructAfter<OffsetArrayOf<Coverage>> (backtrack);
+ return this+input[0];
+ }
+
+ bool apply (hb_ot_apply_context_t *c) const
+ {
+ TRACE_APPLY (this);
+ const OffsetArrayOf<Coverage> &input = StructAfter<OffsetArrayOf<Coverage>> (backtrack);
+
+ unsigned int index = (this+input[0]).get_coverage (c->buffer->cur().codepoint);
+ if (likely (index == NOT_COVERED)) return_trace (false);
+
+ const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage>> (input);
+ const ArrayOf<LookupRecord> &lookup = StructAfter<ArrayOf<LookupRecord>> (lookahead);
+ struct ChainContextApplyLookupContext lookup_context = {
+ {match_coverage},
+ {this, this, this}
+ };
+ return_trace (chain_context_apply_lookup (c,
+ backtrack.len, (const HBUINT16 *) backtrack.arrayZ,
+ input.len, (const HBUINT16 *) input.arrayZ + 1,
+ lookahead.len, (const HBUINT16 *) lookahead.arrayZ,
+ lookup.len, lookup.arrayZ, lookup_context));
+ }
+
+ template<typename Iterator,
+ hb_requires (hb_is_iterator (Iterator))>
+ bool serialize_coverage_offsets (hb_subset_context_t *c, Iterator it, const void* base) const
+ {
+ TRACE_SERIALIZE (this);
+ auto *out = c->serializer->start_embed<OffsetArrayOf<Coverage>> ();
+
+ if (unlikely (!c->serializer->allocate_size<HBUINT16> (HBUINT16::static_size))) return_trace (false);
+
+ + it
+ | hb_apply (subset_offset_array (c, *out, base))
+ ;
+
+ return_trace (out->len);
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+
+ auto *out = c->serializer->start_embed (this);
+ if (unlikely (!out)) return_trace (false);
+ if (unlikely (!c->serializer->embed (this->format))) return_trace (false);
+
+ if (!serialize_coverage_offsets (c, backtrack.iter (), this))
+ return_trace (false);
+
+ const OffsetArrayOf<Coverage> &input = StructAfter<OffsetArrayOf<Coverage>> (backtrack);
+ if (!serialize_coverage_offsets (c, input.iter (), this))
+ return_trace (false);
+
+ const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage>> (input);
+ if (!serialize_coverage_offsets (c, lookahead.iter (), this))
+ return_trace (false);
+
+ const ArrayOf<LookupRecord> &lookupRecord = StructAfter<ArrayOf<LookupRecord>> (lookahead);
+ HBUINT16 lookupCount;
+ lookupCount = lookupRecord.len;
+ if (!c->serializer->copy (lookupCount)) return_trace (false);
+
+ const hb_map_t *lookup_map = c->table_tag == HB_OT_TAG_GSUB ? c->plan->gsub_lookups : c->plan->gpos_lookups;
+ for (unsigned i = 0; i < (unsigned) lookupCount; i++)
+ if (!c->serializer->copy (lookupRecord[i], lookup_map)) return_trace (false);
+
+ return_trace (true);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (!backtrack.sanitize (c, this)) return_trace (false);
+ const OffsetArrayOf<Coverage> &input = StructAfter<OffsetArrayOf<Coverage>> (backtrack);
+ if (!input.sanitize (c, this)) return_trace (false);
+ if (!input.len) return_trace (false); /* To be consistent with Context. */
+ const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage>> (input);
+ if (!lookahead.sanitize (c, this)) return_trace (false);
+ const ArrayOf<LookupRecord> &lookup = StructAfter<ArrayOf<LookupRecord>> (lookahead);
+ return_trace (lookup.sanitize (c));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier--format = 3 */
+ OffsetArrayOf<Coverage>
+ backtrack; /* Array of coverage tables
+ * in backtracking sequence, in glyph
+ * sequence order */
+ OffsetArrayOf<Coverage>
+ inputX ; /* Array of coverage
+ * tables in input sequence, in glyph
+ * sequence order */
+ OffsetArrayOf<Coverage>
+ lookaheadX; /* Array of coverage tables
+ * in lookahead sequence, in glyph
+ * sequence order */
+ ArrayOf<LookupRecord>
+ lookupX; /* Array of LookupRecords--in
+ * design order) */
+ public:
+ DEFINE_SIZE_MIN (10);
+};
+
+struct ChainContext
+{
+ template <typename context_t, typename ...Ts>
+ typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const
+ {
+ TRACE_DISPATCH (this, u.format);
+ if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ());
+ switch (u.format) {
+ case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...));
+ case 2: return_trace (c->dispatch (u.format2, hb_forward<Ts> (ds)...));
+ case 3: return_trace (c->dispatch (u.format3, hb_forward<Ts> (ds)...));
+ default:return_trace (c->default_return_value ());
+ }
+ }
+
+ protected:
+ union {
+ HBUINT16 format; /* Format identifier */
+ ChainContextFormat1 format1;
+ ChainContextFormat2 format2;
+ ChainContextFormat3 format3;
+ } u;
+};
+
+
+template <typename T>
+struct ExtensionFormat1
+{
+ unsigned int get_type () const { return extensionLookupType; }
+
+ template <typename X>
+ const X& get_subtable () const
+ { return this + reinterpret_cast<const LOffsetTo<typename T::SubTable> &> (extensionOffset); }
+
+ template <typename context_t, typename ...Ts>
+ typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const
+ {
+ TRACE_DISPATCH (this, format);
+ if (unlikely (!c->may_dispatch (this, this))) return_trace (c->no_dispatch_return_value ());
+ return_trace (get_subtable<typename T::SubTable> ().dispatch (c, get_type (), hb_forward<Ts> (ds)...));
+ }
+
+ void collect_variation_indices (hb_collect_variation_indices_context_t *c) const
+ { dispatch (c); }
+
+ /* This is called from may_dispatch() above with hb_sanitize_context_t. */
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ extensionLookupType != T::SubTable::Extension);
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier. Set to 1. */
+ HBUINT16 extensionLookupType; /* Lookup type of subtable referenced
+ * by ExtensionOffset (i.e. the
+ * extension subtable). */
+ Offset32 extensionOffset; /* Offset to the extension subtable,
+ * of lookup type subtable. */
+ public:
+ DEFINE_SIZE_STATIC (8);
+};
+
+template <typename T>
+struct Extension
+{
+ unsigned int get_type () const
+ {
+ switch (u.format) {
+ case 1: return u.format1.get_type ();
+ default:return 0;
+ }
+ }
+ template <typename X>
+ const X& get_subtable () const
+ {
+ switch (u.format) {
+ case 1: return u.format1.template get_subtable<typename T::SubTable> ();
+ default:return Null (typename T::SubTable);
+ }
+ }
+
+ template <typename context_t, typename ...Ts>
+ typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const
+ {
+ TRACE_DISPATCH (this, u.format);
+ if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ());
+ switch (u.format) {
+ case 1: return_trace (u.format1.dispatch (c, hb_forward<Ts> (ds)...));
+ default:return_trace (c->default_return_value ());
+ }
+ }
+
+ protected:
+ union {
+ HBUINT16 format; /* Format identifier */
+ ExtensionFormat1<T> format1;
+ } u;
+};
+
+
+/*
+ * GSUB/GPOS Common
+ */
+
+struct hb_ot_layout_lookup_accelerator_t
+{
+ template <typename TLookup>
+ void init (const TLookup &lookup)
+ {
+ digest.init ();
+ lookup.collect_coverage (&digest);
+
+ subtables.init ();
+ OT::hb_get_subtables_context_t c_get_subtables (subtables);
+ lookup.dispatch (&c_get_subtables);
+ }
+ void fini () { subtables.fini (); }
+
+ bool may_have (hb_codepoint_t g) const
+ { return digest.may_have (g); }
+
+ bool apply (hb_ot_apply_context_t *c) const
+ {
+ for (unsigned int i = 0; i < subtables.length; i++)
+ if (subtables[i].apply (c))
+ return true;
+ return false;
+ }
+
+ private:
+ hb_set_digest_t digest;
+ hb_get_subtables_context_t::array_t subtables;
+};
+
+struct GSUBGPOS
+{
+ bool has_data () const { return version.to_int (); }
+ unsigned int get_script_count () const
+ { return (this+scriptList).len; }
+ const Tag& get_script_tag (unsigned int i) const
+ { return (this+scriptList).get_tag (i); }
+ unsigned int get_script_tags (unsigned int start_offset,
+ unsigned int *script_count /* IN/OUT */,
+ hb_tag_t *script_tags /* OUT */) const
+ { return (this+scriptList).get_tags (start_offset, script_count, script_tags); }
+ const Script& get_script (unsigned int i) const
+ { return (this+scriptList)[i]; }
+ bool find_script_index (hb_tag_t tag, unsigned int *index) const
+ { return (this+scriptList).find_index (tag, index); }
+
+ unsigned int get_feature_count () const
+ { return (this+featureList).len; }
+ hb_tag_t get_feature_tag (unsigned int i) const
+ { return i == Index::NOT_FOUND_INDEX ? HB_TAG_NONE : (this+featureList).get_tag (i); }
+ unsigned int get_feature_tags (unsigned int start_offset,
+ unsigned int *feature_count /* IN/OUT */,
+ hb_tag_t *feature_tags /* OUT */) const
+ { return (this+featureList).get_tags (start_offset, feature_count, feature_tags); }
+ const Feature& get_feature (unsigned int i) const
+ { return (this+featureList)[i]; }
+ bool find_feature_index (hb_tag_t tag, unsigned int *index) const
+ { return (this+featureList).find_index (tag, index); }
+
+ unsigned int get_lookup_count () const
+ { return (this+lookupList).len; }
+ const Lookup& get_lookup (unsigned int i) const
+ { return (this+lookupList)[i]; }
+
+ bool find_variations_index (const int *coords, unsigned int num_coords,
+ unsigned int *index) const
+ {
+#ifdef HB_NO_VAR
+ *index = FeatureVariations::NOT_FOUND_INDEX;
+ return false;
+#endif
+ return (version.to_int () >= 0x00010001u ? this+featureVars : Null (FeatureVariations))
+ .find_index (coords, num_coords, index);
+ }
+ const Feature& get_feature_variation (unsigned int feature_index,
+ unsigned int variations_index) const
+ {
+#ifndef HB_NO_VAR
+ if (FeatureVariations::NOT_FOUND_INDEX != variations_index &&
+ version.to_int () >= 0x00010001u)
+ {
+ const Feature *feature = (this+featureVars).find_substitute (variations_index,
+ feature_index);
+ if (feature)
+ return *feature;
+ }
+#endif
+ return get_feature (feature_index);
+ }
+
+ void feature_variation_collect_lookups (const hb_set_t *feature_indexes,
+ hb_set_t *lookup_indexes /* OUT */) const
+ {
+#ifndef HB_NO_VAR
+ if (version.to_int () >= 0x00010001u)
+ (this+featureVars).collect_lookups (feature_indexes, lookup_indexes);
+#endif
+ }
+
+ template <typename TLookup>
+ void closure_lookups (hb_face_t *face,
+ const hb_set_t *glyphs,
+ hb_set_t *lookup_indexes /* IN/OUT */) const
+ {
+ hb_set_t visited_lookups, inactive_lookups;
+ OT::hb_closure_lookups_context_t c (face, glyphs, &visited_lookups, &inactive_lookups);
+
+ for (unsigned lookup_index : + hb_iter (lookup_indexes))
+ reinterpret_cast<const TLookup &> (get_lookup (lookup_index)).closure_lookups (&c, lookup_index);
+
+ hb_set_union (lookup_indexes, &visited_lookups);
+ hb_set_subtract (lookup_indexes, &inactive_lookups);
+ }
+
+ template <typename TLookup>
+ bool subset (hb_subset_layout_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ auto *out = c->subset_context->serializer->embed (*this);
+ if (unlikely (!out)) return_trace (false);
+
+ typedef LookupOffsetList<TLookup> TLookupList;
+ reinterpret_cast<OffsetTo<TLookupList> &> (out->lookupList)
+ .serialize_subset (c->subset_context,
+ reinterpret_cast<const OffsetTo<TLookupList> &> (lookupList),
+ this,
+ c);
+
+ reinterpret_cast<OffsetTo<RecordListOfFeature> &> (out->featureList)
+ .serialize_subset (c->subset_context,
+ reinterpret_cast<const OffsetTo<RecordListOfFeature> &> (featureList),
+ this,
+ c);
+
+ out->scriptList.serialize_subset (c->subset_context,
+ scriptList,
+ this,
+ c);
+
+#ifndef HB_NO_VAR
+ if (version.to_int () >= 0x00010001u)
+ {
+ bool ret = out->featureVars.serialize_subset (c->subset_context, featureVars, this, c);
+ if (!ret)
+ {
+ out->version.major = 1;
+ out->version.minor = 0;
+ }
+ }
+#endif
+
+ return_trace (true);
+ }
+
+ void closure_features (const hb_map_t *lookup_indexes, /* IN */
+ hb_set_t *feature_indexes /* OUT */) const
+ {
+ unsigned int feature_count = hb_min (get_feature_count (), (unsigned) HB_MAX_FEATURES);
+ for (unsigned i = 0; i < feature_count; i++)
+ {
+ const Feature& f = get_feature (i);
+ if ((!f.featureParams.is_null ()) || f.intersects_lookup_indexes (lookup_indexes))
+ feature_indexes->add (i);
+ }
+#ifndef HB_NO_VAR
+ if (version.to_int () >= 0x00010001u)
+ (this+featureVars).closure_features (lookup_indexes, feature_indexes);
+#endif
+ }
+
+ unsigned int get_size () const
+ {
+ return min_size +
+ (version.to_int () >= 0x00010001u ? featureVars.static_size : 0);
+ }
+
+ template <typename TLookup>
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ typedef OffsetListOf<TLookup> TLookupList;
+ if (unlikely (!(version.sanitize (c) &&
+ likely (version.major == 1) &&
+ scriptList.sanitize (c, this) &&
+ featureList.sanitize (c, this) &&
+ reinterpret_cast<const OffsetTo<TLookupList> &> (lookupList).sanitize (c, this))))
+ return_trace (false);
+
+#ifndef HB_NO_VAR
+ if (unlikely (!(version.to_int () < 0x00010001u || featureVars.sanitize (c, this))))
+ return_trace (false);
+#endif
+
+ return_trace (true);
+ }
+
+ template <typename T>
+ struct accelerator_t
+ {
+ void init (hb_face_t *face)
+ {
+ this->table = hb_sanitize_context_t ().reference_table<T> (face);
+ if (unlikely (this->table->is_blocklisted (this->table.get_blob (), face)))
+ {
+ hb_blob_destroy (this->table.get_blob ());
+ this->table = hb_blob_get_empty ();
+ }
+
+ this->lookup_count = table->get_lookup_count ();
+
+ this->accels = (hb_ot_layout_lookup_accelerator_t *) calloc (this->lookup_count, sizeof (hb_ot_layout_lookup_accelerator_t));
+ if (unlikely (!this->accels))
+ this->lookup_count = 0;
+
+ for (unsigned int i = 0; i < this->lookup_count; i++)
+ this->accels[i].init (table->get_lookup (i));
+ }
+
+ void fini ()
+ {
+ for (unsigned int i = 0; i < this->lookup_count; i++)
+ this->accels[i].fini ();
+ free (this->accels);
+ this->table.destroy ();
+ }
+
+ hb_blob_ptr_t<T> table;
+ unsigned int lookup_count;
+ hb_ot_layout_lookup_accelerator_t *accels;
+ };
+
+ protected:
+ FixedVersion<>version; /* Version of the GSUB/GPOS table--initially set
+ * to 0x00010000u */
+ OffsetTo<ScriptList>
+ scriptList; /* ScriptList table */
+ OffsetTo<FeatureList>
+ featureList; /* FeatureList table */
+ OffsetTo<LookupList>
+ lookupList; /* LookupList table */
+ LOffsetTo<FeatureVariations>
+ featureVars; /* Offset to Feature Variations
+ table--from beginning of table
+ * (may be NULL). Introduced
+ * in version 0x00010001. */
+ public:
+ DEFINE_SIZE_MIN (10);
+};
+
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_LAYOUT_GSUBGPOS_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-layout-jstf-table.hh b/thirdparty/harfbuzz/src/hb-ot-layout-jstf-table.hh
new file mode 100644
index 0000000000..ffd2bf4574
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-layout-jstf-table.hh
@@ -0,0 +1,235 @@
+/*
+ * Copyright © 2013 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_LAYOUT_JSTF_TABLE_HH
+#define HB_OT_LAYOUT_JSTF_TABLE_HH
+
+#include "hb-open-type.hh"
+#include "hb-ot-layout-gpos-table.hh"
+
+
+namespace OT {
+
+
+/*
+ * JstfModList -- Justification Modification List Tables
+ */
+
+typedef IndexArray JstfModList;
+
+
+/*
+ * JstfMax -- Justification Maximum Table
+ */
+
+typedef OffsetListOf<PosLookup> JstfMax;
+
+
+/*
+ * JstfPriority -- Justification Priority Table
+ */
+
+struct JstfPriority
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ shrinkageEnableGSUB.sanitize (c, this) &&
+ shrinkageDisableGSUB.sanitize (c, this) &&
+ shrinkageEnableGPOS.sanitize (c, this) &&
+ shrinkageDisableGPOS.sanitize (c, this) &&
+ shrinkageJstfMax.sanitize (c, this) &&
+ extensionEnableGSUB.sanitize (c, this) &&
+ extensionDisableGSUB.sanitize (c, this) &&
+ extensionEnableGPOS.sanitize (c, this) &&
+ extensionDisableGPOS.sanitize (c, this) &&
+ extensionJstfMax.sanitize (c, this));
+ }
+
+ protected:
+ OffsetTo<JstfModList>
+ shrinkageEnableGSUB; /* Offset to Shrinkage Enable GSUB
+ * JstfModList table--from beginning of
+ * JstfPriority table--may be NULL */
+ OffsetTo<JstfModList>
+ shrinkageDisableGSUB; /* Offset to Shrinkage Disable GSUB
+ * JstfModList table--from beginning of
+ * JstfPriority table--may be NULL */
+ OffsetTo<JstfModList>
+ shrinkageEnableGPOS; /* Offset to Shrinkage Enable GPOS
+ * JstfModList table--from beginning of
+ * JstfPriority table--may be NULL */
+ OffsetTo<JstfModList>
+ shrinkageDisableGPOS; /* Offset to Shrinkage Disable GPOS
+ * JstfModList table--from beginning of
+ * JstfPriority table--may be NULL */
+ OffsetTo<JstfMax>
+ shrinkageJstfMax; /* Offset to Shrinkage JstfMax table--
+ * from beginning of JstfPriority table
+ * --may be NULL */
+ OffsetTo<JstfModList>
+ extensionEnableGSUB; /* Offset to Extension Enable GSUB
+ * JstfModList table--from beginning of
+ * JstfPriority table--may be NULL */
+ OffsetTo<JstfModList>
+ extensionDisableGSUB; /* Offset to Extension Disable GSUB
+ * JstfModList table--from beginning of
+ * JstfPriority table--may be NULL */
+ OffsetTo<JstfModList>
+ extensionEnableGPOS; /* Offset to Extension Enable GPOS
+ * JstfModList table--from beginning of
+ * JstfPriority table--may be NULL */
+ OffsetTo<JstfModList>
+ extensionDisableGPOS; /* Offset to Extension Disable GPOS
+ * JstfModList table--from beginning of
+ * JstfPriority table--may be NULL */
+ OffsetTo<JstfMax>
+ extensionJstfMax; /* Offset to Extension JstfMax table--
+ * from beginning of JstfPriority table
+ * --may be NULL */
+
+ public:
+ DEFINE_SIZE_STATIC (20);
+};
+
+
+/*
+ * JstfLangSys -- Justification Language System Table
+ */
+
+struct JstfLangSys : OffsetListOf<JstfPriority>
+{
+ bool sanitize (hb_sanitize_context_t *c,
+ const Record_sanitize_closure_t * = nullptr) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (OffsetListOf<JstfPriority>::sanitize (c));
+ }
+};
+
+
+/*
+ * ExtenderGlyphs -- Extender Glyph Table
+ */
+
+typedef SortedArrayOf<HBGlyphID> ExtenderGlyphs;
+
+
+/*
+ * JstfScript -- The Justification Table
+ */
+
+struct JstfScript
+{
+ unsigned int get_lang_sys_count () const
+ { return langSys.len; }
+ const Tag& get_lang_sys_tag (unsigned int i) const
+ { return langSys.get_tag (i); }
+ unsigned int get_lang_sys_tags (unsigned int start_offset,
+ unsigned int *lang_sys_count /* IN/OUT */,
+ hb_tag_t *lang_sys_tags /* OUT */) const
+ { return langSys.get_tags (start_offset, lang_sys_count, lang_sys_tags); }
+ const JstfLangSys& get_lang_sys (unsigned int i) const
+ {
+ if (i == Index::NOT_FOUND_INDEX) return get_default_lang_sys ();
+ return this+langSys[i].offset;
+ }
+ bool find_lang_sys_index (hb_tag_t tag, unsigned int *index) const
+ { return langSys.find_index (tag, index); }
+
+ bool has_default_lang_sys () const { return defaultLangSys != 0; }
+ const JstfLangSys& get_default_lang_sys () const { return this+defaultLangSys; }
+
+ bool sanitize (hb_sanitize_context_t *c,
+ const Record_sanitize_closure_t * = nullptr) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (extenderGlyphs.sanitize (c, this) &&
+ defaultLangSys.sanitize (c, this) &&
+ langSys.sanitize (c, this));
+ }
+
+ protected:
+ OffsetTo<ExtenderGlyphs>
+ extenderGlyphs; /* Offset to ExtenderGlyph table--from beginning
+ * of JstfScript table-may be NULL */
+ OffsetTo<JstfLangSys>
+ defaultLangSys; /* Offset to DefaultJstfLangSys table--from
+ * beginning of JstfScript table--may be Null */
+ RecordArrayOf<JstfLangSys>
+ langSys; /* Array of JstfLangSysRecords--listed
+ * alphabetically by LangSysTag */
+ public:
+ DEFINE_SIZE_ARRAY (6, langSys);
+};
+
+
+/*
+ * JSTF -- Justification
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/jstf
+ */
+
+struct JSTF
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_JSTF;
+
+ unsigned int get_script_count () const
+ { return scriptList.len; }
+ const Tag& get_script_tag (unsigned int i) const
+ { return scriptList.get_tag (i); }
+ unsigned int get_script_tags (unsigned int start_offset,
+ unsigned int *script_count /* IN/OUT */,
+ hb_tag_t *script_tags /* OUT */) const
+ { return scriptList.get_tags (start_offset, script_count, script_tags); }
+ const JstfScript& get_script (unsigned int i) const
+ { return this+scriptList[i].offset; }
+ bool find_script_index (hb_tag_t tag, unsigned int *index) const
+ { return scriptList.find_index (tag, index); }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (version.sanitize (c) &&
+ likely (version.major == 1) &&
+ scriptList.sanitize (c, this));
+ }
+
+ protected:
+ FixedVersion<>version; /* Version of the JSTF table--initially set
+ * to 0x00010000u */
+ RecordArrayOf<JstfScript>
+ scriptList; /* Array of JstfScripts--listed
+ * alphabetically by ScriptTag */
+ public:
+ DEFINE_SIZE_ARRAY (6, scriptList);
+};
+
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_LAYOUT_JSTF_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-layout.cc b/thirdparty/harfbuzz/src/hb-ot-layout.cc
new file mode 100644
index 0000000000..46408bb9d3
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-layout.cc
@@ -0,0 +1,1993 @@
+/*
+ * Copyright © 1998-2004 David Turner and Werner Lemberg
+ * Copyright © 2006 Behdad Esfahbod
+ * Copyright © 2007,2008,2009 Red Hat, Inc.
+ * Copyright © 2012,2013 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_OT_LAYOUT
+
+#ifdef HB_NO_OT_TAG
+#error "Cannot compile hb-ot-layout.cc with HB_NO_OT_TAG."
+#endif
+
+#include "hb-open-type.hh"
+#include "hb-ot-layout.hh"
+#include "hb-ot-face.hh"
+#include "hb-ot-map.hh"
+#include "hb-map.hh"
+
+#include "hb-ot-kern-table.hh"
+#include "hb-ot-layout-gdef-table.hh"
+#include "hb-ot-layout-gsub-table.hh"
+#include "hb-ot-layout-gpos-table.hh"
+#include "hb-ot-layout-base-table.hh" // Just so we compile it; unused otherwise.
+#include "hb-ot-layout-jstf-table.hh" // Just so we compile it; unused otherwise.
+#include "hb-ot-name-table.hh"
+#include "hb-ot-os2-table.hh"
+
+#include "hb-aat-layout-morx-table.hh"
+#include "hb-aat-layout-opbd-table.hh" // Just so we compile it; unused otherwise.
+
+/**
+ * SECTION:hb-ot-layout
+ * @title: hb-ot-layout
+ * @short_description: OpenType Layout
+ * @include: hb-ot.h
+ *
+ * Functions for querying OpenType Layout features in the font face.
+ **/
+
+
+/*
+ * kern
+ */
+
+#ifndef HB_NO_OT_KERN
+/**
+ * hb_ot_layout_has_kerning:
+ * @face: The #hb_face_t to work on
+ *
+ * Tests whether a face includes any kerning data in the 'kern' table.
+ * Does NOT test for kerning lookups in the GPOS table.
+ *
+ * Return value: true if data found, false otherwise
+ *
+ **/
+bool
+hb_ot_layout_has_kerning (hb_face_t *face)
+{
+ return face->table.kern->has_data ();
+}
+
+/**
+ * hb_ot_layout_has_machine_kerning:
+ * @face: The #hb_face_t to work on
+ *
+ * Tests whether a face includes any state-machine kerning in the 'kern' table.
+ * Does NOT examine the GPOS table.
+ *
+ * Return value: true if data found, false otherwise
+ *
+ **/
+bool
+hb_ot_layout_has_machine_kerning (hb_face_t *face)
+{
+ return face->table.kern->has_state_machine ();
+}
+
+/**
+ * hb_ot_layout_has_cross_kerning:
+ * @face: The #hb_face_t to work on
+ *
+ * Tests whether a face has any cross-stream kerning (i.e., kerns
+ * that make adjustments perpendicular to the direction of the text
+ * flow: Y adjustments in horizontal text or X adjustments in
+ * vertical text) in the 'kern' table.
+ *
+ * Does NOT examine the GPOS table.
+ *
+ * Return value: true is data found, false otherwise
+ *
+ **/
+bool
+hb_ot_layout_has_cross_kerning (hb_face_t *face)
+{
+ return face->table.kern->has_cross_stream ();
+}
+
+void
+hb_ot_layout_kern (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer)
+{
+ hb_blob_t *blob = font->face->table.kern.get_blob ();
+ const AAT::kern& kern = *blob->as<AAT::kern> ();
+
+ AAT::hb_aat_apply_context_t c (plan, font, buffer, blob);
+
+ kern.apply (&c);
+}
+#endif
+
+
+/*
+ * GDEF
+ */
+
+bool
+OT::GDEF::is_blocklisted (hb_blob_t *blob,
+ hb_face_t *face) const
+{
+#ifdef HB_NO_OT_LAYOUT_BLACKLIST
+ return false;
+#endif
+ /* The ugly business of blocklisting individual fonts' tables happen here!
+ * See this thread for why we finally had to bend in and do this:
+ * https://lists.freedesktop.org/archives/harfbuzz/2016-February/005489.html
+ *
+ * In certain versions of Times New Roman Italic and Bold Italic,
+ * ASCII double quotation mark U+0022 has wrong glyph class 3 (mark)
+ * in GDEF. Many versions of Tahoma have bad GDEF tables that
+ * incorrectly classify some spacing marks such as certain IPA
+ * symbols as glyph class 3. So do older versions of Microsoft
+ * Himalaya, and the version of Cantarell shipped by Ubuntu 16.04.
+ *
+ * Nuke the GDEF tables of to avoid unwanted width-zeroing.
+ *
+ * See https://bugzilla.mozilla.org/show_bug.cgi?id=1279925
+ * https://bugzilla.mozilla.org/show_bug.cgi?id=1279693
+ * https://bugzilla.mozilla.org/show_bug.cgi?id=1279875
+ */
+ switch HB_CODEPOINT_ENCODE3(blob->length,
+ face->table.GSUB->table.get_length (),
+ face->table.GPOS->table.get_length ())
+ {
+ /* sha1sum:c5ee92f0bca4bfb7d06c4d03e8cf9f9cf75d2e8a Windows 7? timesi.ttf */
+ case HB_CODEPOINT_ENCODE3 (442, 2874, 42038):
+ /* sha1sum:37fc8c16a0894ab7b749e35579856c73c840867b Windows 7? timesbi.ttf */
+ case HB_CODEPOINT_ENCODE3 (430, 2874, 40662):
+ /* sha1sum:19fc45110ea6cd3cdd0a5faca256a3797a069a80 Windows 7 timesi.ttf */
+ case HB_CODEPOINT_ENCODE3 (442, 2874, 39116):
+ /* sha1sum:6d2d3c9ed5b7de87bc84eae0df95ee5232ecde26 Windows 7 timesbi.ttf */
+ case HB_CODEPOINT_ENCODE3 (430, 2874, 39374):
+ /* sha1sum:8583225a8b49667c077b3525333f84af08c6bcd8 OS X 10.11.3 Times New Roman Italic.ttf */
+ case HB_CODEPOINT_ENCODE3 (490, 3046, 41638):
+ /* sha1sum:ec0f5a8751845355b7c3271d11f9918a966cb8c9 OS X 10.11.3 Times New Roman Bold Italic.ttf */
+ case HB_CODEPOINT_ENCODE3 (478, 3046, 41902):
+ /* sha1sum:96eda93f7d33e79962451c6c39a6b51ee893ce8c tahoma.ttf from Windows 8 */
+ case HB_CODEPOINT_ENCODE3 (898, 12554, 46470):
+ /* sha1sum:20928dc06014e0cd120b6fc942d0c3b1a46ac2bc tahomabd.ttf from Windows 8 */
+ case HB_CODEPOINT_ENCODE3 (910, 12566, 47732):
+ /* sha1sum:4f95b7e4878f60fa3a39ca269618dfde9721a79e tahoma.ttf from Windows 8.1 */
+ case HB_CODEPOINT_ENCODE3 (928, 23298, 59332):
+ /* sha1sum:6d400781948517c3c0441ba42acb309584b73033 tahomabd.ttf from Windows 8.1 */
+ case HB_CODEPOINT_ENCODE3 (940, 23310, 60732):
+ /* tahoma.ttf v6.04 from Windows 8.1 x64, see https://bugzilla.mozilla.org/show_bug.cgi?id=1279925 */
+ case HB_CODEPOINT_ENCODE3 (964, 23836, 60072):
+ /* tahomabd.ttf v6.04 from Windows 8.1 x64, see https://bugzilla.mozilla.org/show_bug.cgi?id=1279925 */
+ case HB_CODEPOINT_ENCODE3 (976, 23832, 61456):
+ /* sha1sum:e55fa2dfe957a9f7ec26be516a0e30b0c925f846 tahoma.ttf from Windows 10 */
+ case HB_CODEPOINT_ENCODE3 (994, 24474, 60336):
+ /* sha1sum:7199385abb4c2cc81c83a151a7599b6368e92343 tahomabd.ttf from Windows 10 */
+ case HB_CODEPOINT_ENCODE3 (1006, 24470, 61740):
+ /* tahoma.ttf v6.91 from Windows 10 x64, see https://bugzilla.mozilla.org/show_bug.cgi?id=1279925 */
+ case HB_CODEPOINT_ENCODE3 (1006, 24576, 61346):
+ /* tahomabd.ttf v6.91 from Windows 10 x64, see https://bugzilla.mozilla.org/show_bug.cgi?id=1279925 */
+ case HB_CODEPOINT_ENCODE3 (1018, 24572, 62828):
+ /* sha1sum:b9c84d820c49850d3d27ec498be93955b82772b5 tahoma.ttf from Windows 10 AU */
+ case HB_CODEPOINT_ENCODE3 (1006, 24576, 61352):
+ /* sha1sum:2bdfaab28174bdadd2f3d4200a30a7ae31db79d2 tahomabd.ttf from Windows 10 AU */
+ case HB_CODEPOINT_ENCODE3 (1018, 24572, 62834):
+ /* sha1sum:b0d36cf5a2fbe746a3dd277bffc6756a820807a7 Tahoma.ttf from Mac OS X 10.9 */
+ case HB_CODEPOINT_ENCODE3 (832, 7324, 47162):
+ /* sha1sum:12fc4538e84d461771b30c18b5eb6bd434e30fba Tahoma Bold.ttf from Mac OS X 10.9 */
+ case HB_CODEPOINT_ENCODE3 (844, 7302, 45474):
+ /* sha1sum:eb8afadd28e9cf963e886b23a30b44ab4fd83acc himalaya.ttf from Windows 7 */
+ case HB_CODEPOINT_ENCODE3 (180, 13054, 7254):
+ /* sha1sum:73da7f025b238a3f737aa1fde22577a6370f77b0 himalaya.ttf from Windows 8 */
+ case HB_CODEPOINT_ENCODE3 (192, 12638, 7254):
+ /* sha1sum:6e80fd1c0b059bbee49272401583160dc1e6a427 himalaya.ttf from Windows 8.1 */
+ case HB_CODEPOINT_ENCODE3 (192, 12690, 7254):
+ /* 8d9267aea9cd2c852ecfb9f12a6e834bfaeafe44 cantarell-fonts-0.0.21/otf/Cantarell-Regular.otf */
+ /* 983988ff7b47439ab79aeaf9a45bd4a2c5b9d371 cantarell-fonts-0.0.21/otf/Cantarell-Oblique.otf */
+ case HB_CODEPOINT_ENCODE3 (188, 248, 3852):
+ /* 2c0c90c6f6087ffbfea76589c93113a9cbb0e75f cantarell-fonts-0.0.21/otf/Cantarell-Bold.otf */
+ /* 55461f5b853c6da88069ffcdf7f4dd3f8d7e3e6b cantarell-fonts-0.0.21/otf/Cantarell-Bold-Oblique.otf */
+ case HB_CODEPOINT_ENCODE3 (188, 264, 3426):
+ /* d125afa82a77a6475ac0e74e7c207914af84b37a padauk-2.80/Padauk.ttf RHEL 7.2 */
+ case HB_CODEPOINT_ENCODE3 (1058, 47032, 11818):
+ /* 0f7b80437227b90a577cc078c0216160ae61b031 padauk-2.80/Padauk-Bold.ttf RHEL 7.2*/
+ case HB_CODEPOINT_ENCODE3 (1046, 47030, 12600):
+ /* d3dde9aa0a6b7f8f6a89ef1002e9aaa11b882290 padauk-2.80/Padauk.ttf Ubuntu 16.04 */
+ case HB_CODEPOINT_ENCODE3 (1058, 71796, 16770):
+ /* 5f3c98ccccae8a953be2d122c1b3a77fd805093f padauk-2.80/Padauk-Bold.ttf Ubuntu 16.04 */
+ case HB_CODEPOINT_ENCODE3 (1046, 71790, 17862):
+ /* 6c93b63b64e8b2c93f5e824e78caca555dc887c7 padauk-2.80/Padauk-book.ttf */
+ case HB_CODEPOINT_ENCODE3 (1046, 71788, 17112):
+ /* d89b1664058359b8ec82e35d3531931125991fb9 padauk-2.80/Padauk-bookbold.ttf */
+ case HB_CODEPOINT_ENCODE3 (1058, 71794, 17514):
+ /* 824cfd193aaf6234b2b4dc0cf3c6ef576c0d00ef padauk-3.0/Padauk-book.ttf */
+ case HB_CODEPOINT_ENCODE3 (1330, 109904, 57938):
+ /* 91fcc10cf15e012d27571e075b3b4dfe31754a8a padauk-3.0/Padauk-bookbold.ttf */
+ case HB_CODEPOINT_ENCODE3 (1330, 109904, 58972):
+ /* sha1sum: c26e41d567ed821bed997e937bc0c41435689e85 Padauk.ttf
+ * "Padauk Regular" "Version 2.5", see https://crbug.com/681813 */
+ case HB_CODEPOINT_ENCODE3 (1004, 59092, 14836):
+ return true;
+ }
+ return false;
+}
+
+static void
+_hb_ot_layout_set_glyph_props (hb_font_t *font,
+ hb_buffer_t *buffer)
+{
+ _hb_buffer_assert_gsubgpos_vars (buffer);
+
+ const OT::GDEF &gdef = *font->face->table.GDEF->table;
+ unsigned int count = buffer->len;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ _hb_glyph_info_set_glyph_props (&buffer->info[i], gdef.get_glyph_props (buffer->info[i].codepoint));
+ _hb_glyph_info_clear_lig_props (&buffer->info[i]);
+ buffer->info[i].syllable() = 0;
+ }
+}
+
+/* Public API */
+
+/**
+ * hb_ot_layout_has_glyph_classes:
+ * @face: #hb_face_t to work upon
+ *
+ * Tests whether a face has any glyph classes defined in its GDEF table.
+ *
+ * Return value: true if data found, false otherwise
+ *
+ **/
+hb_bool_t
+hb_ot_layout_has_glyph_classes (hb_face_t *face)
+{
+ return face->table.GDEF->table->has_glyph_classes ();
+}
+
+/**
+ * hb_ot_layout_get_glyph_class:
+ * @face: The #hb_face_t to work on
+ * @glyph: The #hb_codepoint_t code point to query
+ *
+ * Fetches the GDEF class of the requested glyph in the specified face.
+ *
+ * Return value: The #hb_ot_layout_glyph_class_t glyph class of the given code
+ * point in the GDEF table of the face.
+ *
+ * Since: 0.9.7
+ **/
+hb_ot_layout_glyph_class_t
+hb_ot_layout_get_glyph_class (hb_face_t *face,
+ hb_codepoint_t glyph)
+{
+ return (hb_ot_layout_glyph_class_t) face->table.GDEF->table->get_glyph_class (glyph);
+}
+
+/**
+ * hb_ot_layout_get_glyphs_in_class:
+ * @face: The #hb_face_t to work on
+ * @klass: The #hb_ot_layout_glyph_class_t GDEF class to retrieve
+ * @glyphs: (out): The #hb_set_t set of all glyphs belonging to the requested
+ * class.
+ *
+ * Retrieves the set of all glyphs from the face that belong to the requested
+ * glyph class in the face's GDEF table.
+ *
+ * Since: 0.9.7
+ **/
+void
+hb_ot_layout_get_glyphs_in_class (hb_face_t *face,
+ hb_ot_layout_glyph_class_t klass,
+ hb_set_t *glyphs /* OUT */)
+{
+ return face->table.GDEF->table->get_glyphs_in_class (klass, glyphs);
+}
+
+#ifndef HB_NO_LAYOUT_UNUSED
+/**
+ * hb_ot_layout_get_attach_points:
+ * @face: The #hb_face_t to work on
+ * @glyph: The #hb_codepoint_t code point to query
+ * @start_offset: offset of the first attachment point to retrieve
+ * @point_count: (inout) (allow-none): Input = the maximum number of attachment points to return;
+ * Output = the actual number of attachment points returned (may be zero)
+ * @point_array: (out) (array length=point_count): The array of attachment points found for the query
+ *
+ * Fetches a list of all attachment points for the specified glyph in the GDEF
+ * table of the face. The list returned will begin at the offset provided.
+ *
+ * Useful if the client program wishes to cache the list.
+ *
+ **/
+unsigned int
+hb_ot_layout_get_attach_points (hb_face_t *face,
+ hb_codepoint_t glyph,
+ unsigned int start_offset,
+ unsigned int *point_count /* IN/OUT */,
+ unsigned int *point_array /* OUT */)
+{
+ return face->table.GDEF->table->get_attach_points (glyph,
+ start_offset,
+ point_count,
+ point_array);
+}
+/**
+ * hb_ot_layout_get_ligature_carets:
+ * @font: The #hb_font_t to work on
+ * @direction: The #hb_direction_t text direction to use
+ * @glyph: The #hb_codepoint_t code point to query
+ * @start_offset: offset of the first caret position to retrieve
+ * @caret_count: (inout) (allow-none): Input = the maximum number of caret positions to return;
+ * Output = the actual number of caret positions returned (may be zero)
+ * @caret_array: (out) (array length=caret_count): The array of caret positions found for the query
+ *
+ * Fetches a list of the caret positions defined for a ligature glyph in the GDEF
+ * table of the font. The list returned will begin at the offset provided.
+ *
+ **/
+unsigned int
+hb_ot_layout_get_ligature_carets (hb_font_t *font,
+ hb_direction_t direction,
+ hb_codepoint_t glyph,
+ unsigned int start_offset,
+ unsigned int *caret_count /* IN/OUT */,
+ hb_position_t *caret_array /* OUT */)
+{
+ return font->face->table.GDEF->table->get_lig_carets (font, direction, glyph, start_offset, caret_count, caret_array);
+}
+#endif
+
+
+/*
+ * GSUB/GPOS
+ */
+
+bool
+OT::GSUB::is_blocklisted (hb_blob_t *blob HB_UNUSED,
+ hb_face_t *face) const
+{
+#ifdef HB_NO_OT_LAYOUT_BLACKLIST
+ return false;
+#endif
+ return false;
+}
+
+bool
+OT::GPOS::is_blocklisted (hb_blob_t *blob HB_UNUSED,
+ hb_face_t *face HB_UNUSED) const
+{
+#ifdef HB_NO_OT_LAYOUT_BLACKLIST
+ return false;
+#endif
+ return false;
+}
+
+static const OT::GSUBGPOS&
+get_gsubgpos_table (hb_face_t *face,
+ hb_tag_t table_tag)
+{
+ switch (table_tag) {
+ case HB_OT_TAG_GSUB: return *face->table.GSUB->table;
+ case HB_OT_TAG_GPOS: return *face->table.GPOS->table;
+ default: return Null (OT::GSUBGPOS);
+ }
+}
+
+
+/**
+ * hb_ot_layout_table_get_script_tags:
+ * @face: #hb_face_t to work upon
+ * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS
+ * @start_offset: offset of the first script tag to retrieve
+ * @script_count: (inout) (allow-none): Input = the maximum number of script tags to return;
+ * Output = the actual number of script tags returned (may be zero)
+ * @script_tags: (out) (array length=script_count): The array of #hb_tag_t script tags found for the query
+ *
+ * Fetches a list of all scripts enumerated in the specified face's GSUB table
+ * or GPOS table. The list returned will begin at the offset provided.
+ *
+ **/
+unsigned int
+hb_ot_layout_table_get_script_tags (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int start_offset,
+ unsigned int *script_count /* IN/OUT */,
+ hb_tag_t *script_tags /* OUT */)
+{
+ const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag);
+
+ return g.get_script_tags (start_offset, script_count, script_tags);
+}
+
+#define HB_OT_TAG_LATIN_SCRIPT HB_TAG ('l', 'a', 't', 'n')
+
+/**
+ * hb_ot_layout_table_find_script:
+ * @face: #hb_face_t to work upon
+ * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS
+ * @script_tag: #hb_tag_t of the script tag requested
+ * @script_index: (out): The index of the requested script tag
+ *
+ * Fetches the index if a given script tag in the specified face's GSUB table
+ * or GPOS table.
+ *
+ * Return value: true if the script is found, false otherwise
+ *
+ **/
+hb_bool_t
+hb_ot_layout_table_find_script (hb_face_t *face,
+ hb_tag_t table_tag,
+ hb_tag_t script_tag,
+ unsigned int *script_index /* OUT */)
+{
+ static_assert ((OT::Index::NOT_FOUND_INDEX == HB_OT_LAYOUT_NO_SCRIPT_INDEX), "");
+ const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag);
+
+ if (g.find_script_index (script_tag, script_index))
+ return true;
+
+ /* try finding 'DFLT' */
+ if (g.find_script_index (HB_OT_TAG_DEFAULT_SCRIPT, script_index))
+ return false;
+
+ /* try with 'dflt'; MS site has had typos and many fonts use it now :(.
+ * including many versions of DejaVu Sans Mono! */
+ if (g.find_script_index (HB_OT_TAG_DEFAULT_LANGUAGE, script_index))
+ return false;
+
+ /* try with 'latn'; some old fonts put their features there even though
+ they're really trying to support Thai, for example :( */
+ if (g.find_script_index (HB_OT_TAG_LATIN_SCRIPT, script_index))
+ return false;
+
+ if (script_index) *script_index = HB_OT_LAYOUT_NO_SCRIPT_INDEX;
+ return false;
+}
+
+#ifndef HB_DISABLE_DEPRECATED
+/**
+ * hb_ot_layout_table_choose_script:
+ * @face: #hb_face_t to work upon
+ * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS
+ * @script_tags: Array of #hb_tag_t script tags
+ * @script_index: (out): The index of the requested script tag
+ * @chosen_script: (out): #hb_tag_t of the script tag requested
+ *
+ * Deprecated since 2.0.0
+ **/
+hb_bool_t
+hb_ot_layout_table_choose_script (hb_face_t *face,
+ hb_tag_t table_tag,
+ const hb_tag_t *script_tags,
+ unsigned int *script_index /* OUT */,
+ hb_tag_t *chosen_script /* OUT */)
+{
+ const hb_tag_t *t;
+ for (t = script_tags; *t; t++);
+ return hb_ot_layout_table_select_script (face, table_tag, t - script_tags, script_tags, script_index, chosen_script);
+}
+#endif
+
+/**
+ * hb_ot_layout_table_select_script:
+ * @face: #hb_face_t to work upon
+ * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS
+ * @script_count: Number of script tags in the array
+ * @script_tags: Array of #hb_tag_t script tags
+ * @script_index: (out): The index of the requested script
+ * @chosen_script: (out): #hb_tag_t of the requested script
+ *
+ * Since: 2.0.0
+ **/
+hb_bool_t
+hb_ot_layout_table_select_script (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int script_count,
+ const hb_tag_t *script_tags,
+ unsigned int *script_index /* OUT */,
+ hb_tag_t *chosen_script /* OUT */)
+{
+ static_assert ((OT::Index::NOT_FOUND_INDEX == HB_OT_LAYOUT_NO_SCRIPT_INDEX), "");
+ const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag);
+ unsigned int i;
+
+ for (i = 0; i < script_count; i++)
+ {
+ if (g.find_script_index (script_tags[i], script_index))
+ {
+ if (chosen_script)
+ *chosen_script = script_tags[i];
+ return true;
+ }
+ }
+
+ /* try finding 'DFLT' */
+ if (g.find_script_index (HB_OT_TAG_DEFAULT_SCRIPT, script_index)) {
+ if (chosen_script)
+ *chosen_script = HB_OT_TAG_DEFAULT_SCRIPT;
+ return false;
+ }
+
+ /* try with 'dflt'; MS site has had typos and many fonts use it now :( */
+ if (g.find_script_index (HB_OT_TAG_DEFAULT_LANGUAGE, script_index)) {
+ if (chosen_script)
+ *chosen_script = HB_OT_TAG_DEFAULT_LANGUAGE;
+ return false;
+ }
+
+ /* try with 'latn'; some old fonts put their features there even though
+ they're really trying to support Thai, for example :( */
+ if (g.find_script_index (HB_OT_TAG_LATIN_SCRIPT, script_index)) {
+ if (chosen_script)
+ *chosen_script = HB_OT_TAG_LATIN_SCRIPT;
+ return false;
+ }
+
+ if (script_index) *script_index = HB_OT_LAYOUT_NO_SCRIPT_INDEX;
+ if (chosen_script)
+ *chosen_script = HB_OT_LAYOUT_NO_SCRIPT_INDEX;
+ return false;
+}
+
+
+/**
+ * hb_ot_layout_table_get_feature_tags:
+ * @face: #hb_face_t to work upon
+ * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS
+ * @start_offset: offset of the first feature tag to retrieve
+ * @feature_count: (inout) (allow-none): Input = the maximum number of feature tags to return;
+ * Output = the actual number of feature tags returned (may be zero)
+ * @feature_tags: (out) (array length=feature_count): Array of feature tags found in the table
+ *
+ * Fetches a list of all feature tags in the given face's GSUB or GPOS table.
+ *
+ **/
+unsigned int
+hb_ot_layout_table_get_feature_tags (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int start_offset,
+ unsigned int *feature_count /* IN/OUT */,
+ hb_tag_t *feature_tags /* OUT */)
+{
+ const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag);
+
+ return g.get_feature_tags (start_offset, feature_count, feature_tags);
+}
+
+
+/**
+ * hb_ot_layout_table_find_feature:
+ * @face: #hb_face_t to work upon
+ * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS
+ * @feature_tag: The #hb_tag_t og the requested feature tag
+ * @feature_index: (out): The index of the requested feature
+ *
+ * Fetches the index for a given feature tag in the specified face's GSUB table
+ * or GPOS table.
+ *
+ * Return value: true if the feature is found, false otherwise
+ **/
+bool
+hb_ot_layout_table_find_feature (hb_face_t *face,
+ hb_tag_t table_tag,
+ hb_tag_t feature_tag,
+ unsigned int *feature_index /* OUT */)
+{
+ static_assert ((OT::Index::NOT_FOUND_INDEX == HB_OT_LAYOUT_NO_FEATURE_INDEX), "");
+ const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag);
+
+ unsigned int num_features = g.get_feature_count ();
+ for (unsigned int i = 0; i < num_features; i++)
+ {
+ if (feature_tag == g.get_feature_tag (i)) {
+ if (feature_index) *feature_index = i;
+ return true;
+ }
+ }
+
+ if (feature_index) *feature_index = HB_OT_LAYOUT_NO_FEATURE_INDEX;
+ return false;
+}
+
+
+/**
+ * hb_ot_layout_script_get_language_tags:
+ * @face: #hb_face_t to work upon
+ * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS
+ * @script_index: The index of the requested script tag
+ * @start_offset: offset of the first language tag to retrieve
+ * @language_count: (inout) (allow-none): Input = the maximum number of language tags to return;
+ * Output = the actual number of language tags returned (may be zero)
+ * @language_tags: (out) (array length=language_count): Array of language tags found in the table
+ *
+ * Fetches a list of language tags in the given face's GSUB or GPOS table, underneath
+ * the specified script index. The list returned will begin at the offset provided.
+ *
+ **/
+unsigned int
+hb_ot_layout_script_get_language_tags (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int script_index,
+ unsigned int start_offset,
+ unsigned int *language_count /* IN/OUT */,
+ hb_tag_t *language_tags /* OUT */)
+{
+ const OT::Script &s = get_gsubgpos_table (face, table_tag).get_script (script_index);
+
+ return s.get_lang_sys_tags (start_offset, language_count, language_tags);
+}
+
+
+#ifndef HB_DISABLE_DEPRECATED
+/**
+ * hb_ot_layout_script_find_language:
+ * @face: #hb_face_t to work upon
+ * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS
+ * @script_index: The index of the requested script tag
+ * @language_tag: The #hb_tag_t of the requested language
+ * @language_index: The index of the requested language
+ *
+ * Fetches the index of a given language tag in the specified face's GSUB table
+ * or GPOS table, underneath the specified script tag.
+ *
+ * Return value: true if the language tag is found, false otherwise
+ *
+ * Since: ??
+ * Deprecated: ??
+ **/
+hb_bool_t
+hb_ot_layout_script_find_language (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int script_index,
+ hb_tag_t language_tag,
+ unsigned int *language_index)
+{
+ return hb_ot_layout_script_select_language (face,
+ table_tag,
+ script_index,
+ 1,
+ &language_tag,
+ language_index);
+}
+#endif
+
+
+/**
+ * hb_ot_layout_script_select_language:
+ * @face: #hb_face_t to work upon
+ * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS
+ * @script_index: The index of the requested script tag
+ * @language_count: The number of languages in the specified script
+ * @language_tags: The array of language tags
+ * @language_index: (out): The index of the requested language
+ *
+ * Fetches the index of a given language tag in the specified face's GSUB table
+ * or GPOS table, underneath the specified script index.
+ *
+ * Return value: true if the language tag is found, false otherwise
+ *
+ * Since: 2.0.0
+ **/
+hb_bool_t
+hb_ot_layout_script_select_language (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int script_index,
+ unsigned int language_count,
+ const hb_tag_t *language_tags,
+ unsigned int *language_index /* OUT */)
+{
+ static_assert ((OT::Index::NOT_FOUND_INDEX == HB_OT_LAYOUT_DEFAULT_LANGUAGE_INDEX), "");
+ const OT::Script &s = get_gsubgpos_table (face, table_tag).get_script (script_index);
+ unsigned int i;
+
+ for (i = 0; i < language_count; i++)
+ {
+ if (s.find_lang_sys_index (language_tags[i], language_index))
+ return true;
+ }
+
+ /* try finding 'dflt' */
+ if (s.find_lang_sys_index (HB_OT_TAG_DEFAULT_LANGUAGE, language_index))
+ return false;
+
+ if (language_index) *language_index = HB_OT_LAYOUT_DEFAULT_LANGUAGE_INDEX;
+ return false;
+}
+
+
+/**
+ * hb_ot_layout_language_get_required_feature_index:
+ * @face: #hb_face_t to work upon
+ * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS
+ * @script_index: The index of the requested script tag
+ * @language_index: The index of the requested language tag
+ * @feature_index: (out): The index of the requested feature
+ *
+ * Fetches the index of a requested feature in the given face's GSUB or GPOS table,
+ * underneath the specified script and language.
+ *
+ * Return value: true if the feature is found, false otherwise
+ *
+ **/
+hb_bool_t
+hb_ot_layout_language_get_required_feature_index (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int script_index,
+ unsigned int language_index,
+ unsigned int *feature_index /* OUT */)
+{
+ return hb_ot_layout_language_get_required_feature (face,
+ table_tag,
+ script_index,
+ language_index,
+ feature_index,
+ nullptr);
+}
+
+
+/**
+ * hb_ot_layout_language_get_required_feature:
+ * @face: #hb_face_t to work upon
+ * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS
+ * @script_index: The index of the requested script tag
+ * @language_index: The index of the requested language tag
+ * @feature_index: (out): The index of the requested feature
+ * @feature_tag: (out): The #hb_tag_t of the requested feature
+ *
+ * Fetches the tag of a requested feature index in the given face's GSUB or GPOS table,
+ * underneath the specified script and language.
+ *
+ * Return value: true if the feature is found, false otherwise
+ *
+ * Since: 0.9.30
+ **/
+hb_bool_t
+hb_ot_layout_language_get_required_feature (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int script_index,
+ unsigned int language_index,
+ unsigned int *feature_index /* OUT */,
+ hb_tag_t *feature_tag /* OUT */)
+{
+ const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag);
+ const OT::LangSys &l = g.get_script (script_index).get_lang_sys (language_index);
+
+ unsigned int index = l.get_required_feature_index ();
+ if (feature_index) *feature_index = index;
+ if (feature_tag) *feature_tag = g.get_feature_tag (index);
+
+ return l.has_required_feature ();
+}
+
+
+/**
+ * hb_ot_layout_language_get_feature_indexes:
+ * @face: #hb_face_t to work upon
+ * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS
+ * @script_index: The index of the requested script tag
+ * @language_index: The index of the requested language tag
+ * @start_offset: offset of the first feature tag to retrieve
+ * @feature_count: (inout) (allow-none): Input = the maximum number of feature tags to return;
+ * Output: the actual number of feature tags returned (may be zero)
+ * @feature_indexes: (out) (array length=feature_count): The array of feature indexes found for the query
+ *
+ * Fetches a list of all features in the specified face's GSUB table
+ * or GPOS table, underneath the specified script and language. The list
+ * returned will begin at the offset provided.
+ **/
+unsigned int
+hb_ot_layout_language_get_feature_indexes (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int script_index,
+ unsigned int language_index,
+ unsigned int start_offset,
+ unsigned int *feature_count /* IN/OUT */,
+ unsigned int *feature_indexes /* OUT */)
+{
+ const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag);
+ const OT::LangSys &l = g.get_script (script_index).get_lang_sys (language_index);
+
+ return l.get_feature_indexes (start_offset, feature_count, feature_indexes);
+}
+
+
+/**
+ * hb_ot_layout_language_get_feature_tags:
+ * @face: #hb_face_t to work upon
+ * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS
+ * @script_index: The index of the requested script tag
+ * @language_index: The index of the requested language tag
+ * @start_offset: offset of the first feature tag to retrieve
+ * @feature_count: (inout) (allow-none): Input = the maximum number of feature tags to return;
+ * Output = the actual number of feature tags returned (may be zero)
+ * @feature_tags: (out) (array length=feature_count): The array of #hb_tag_t feature tags found for the query
+ *
+ * Fetches a list of all features in the specified face's GSUB table
+ * or GPOS table, underneath the specified script and language. The list
+ * returned will begin at the offset provided.
+ *
+ **/
+unsigned int
+hb_ot_layout_language_get_feature_tags (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int script_index,
+ unsigned int language_index,
+ unsigned int start_offset,
+ unsigned int *feature_count /* IN/OUT */,
+ hb_tag_t *feature_tags /* OUT */)
+{
+ const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag);
+ const OT::LangSys &l = g.get_script (script_index).get_lang_sys (language_index);
+
+ static_assert ((sizeof (unsigned int) == sizeof (hb_tag_t)), "");
+ unsigned int ret = l.get_feature_indexes (start_offset, feature_count, (unsigned int *) feature_tags);
+
+ if (feature_tags) {
+ unsigned int count = *feature_count;
+ for (unsigned int i = 0; i < count; i++)
+ feature_tags[i] = g.get_feature_tag ((unsigned int) feature_tags[i]);
+ }
+
+ return ret;
+}
+
+
+/**
+ * hb_ot_layout_language_find_feature:
+ * @face: #hb_face_t to work upon
+ * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS
+ * @script_index: The index of the requested script tag
+ * @language_index: The index of the requested language tag
+ * @feature_tag: #hb_tag_t of the feature tag requested
+ * @feature_index: (out): The index of the requested feature
+ *
+ * Fetches the index of a given feature tag in the specified face's GSUB table
+ * or GPOS table, underneath the specified script and language.
+ *
+ * Return value: true if the feature is found, false otherwise
+ *
+ **/
+hb_bool_t
+hb_ot_layout_language_find_feature (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int script_index,
+ unsigned int language_index,
+ hb_tag_t feature_tag,
+ unsigned int *feature_index /* OUT */)
+{
+ static_assert ((OT::Index::NOT_FOUND_INDEX == HB_OT_LAYOUT_NO_FEATURE_INDEX), "");
+ const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag);
+ const OT::LangSys &l = g.get_script (script_index).get_lang_sys (language_index);
+
+ unsigned int num_features = l.get_feature_count ();
+ for (unsigned int i = 0; i < num_features; i++) {
+ unsigned int f_index = l.get_feature_index (i);
+
+ if (feature_tag == g.get_feature_tag (f_index)) {
+ if (feature_index) *feature_index = f_index;
+ return true;
+ }
+ }
+
+ if (feature_index) *feature_index = HB_OT_LAYOUT_NO_FEATURE_INDEX;
+ return false;
+}
+
+
+/**
+ * hb_ot_layout_feature_get_lookups:
+ * @face: #hb_face_t to work upon
+ * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS
+ * @feature_index: The index of the requested feature
+ * @start_offset: offset of the first lookup to retrieve
+ * @lookup_count: (inout) (allow-none): Input = the maximum number of lookups to return;
+ * Output = the actual number of lookups returned (may be zero)
+ * @lookup_indexes: (out) (array length=lookup_count): The array of lookup indexes found for the query
+ *
+ * Fetches a list of all lookups enumerated for the specified feature, in
+ * the specified face's GSUB table or GPOS table. The list returned will
+ * begin at the offset provided.
+ *
+ * Since: 0.9.7
+ **/
+unsigned int
+hb_ot_layout_feature_get_lookups (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int feature_index,
+ unsigned int start_offset,
+ unsigned int *lookup_count /* IN/OUT */,
+ unsigned int *lookup_indexes /* OUT */)
+{
+ return hb_ot_layout_feature_with_variations_get_lookups (face,
+ table_tag,
+ feature_index,
+ HB_OT_LAYOUT_NO_VARIATIONS_INDEX,
+ start_offset,
+ lookup_count,
+ lookup_indexes);
+}
+
+
+/**
+ * hb_ot_layout_table_get_lookup_count:
+ * @face: #hb_face_t to work upon
+ * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS
+ *
+ * Fetches the total number of lookups enumerated in the specified
+ * face's GSUB table or GPOS table.
+ *
+ * Since: 0.9.22
+ **/
+unsigned int
+hb_ot_layout_table_get_lookup_count (hb_face_t *face,
+ hb_tag_t table_tag)
+{
+ return get_gsubgpos_table (face, table_tag).get_lookup_count ();
+}
+
+
+struct hb_collect_features_context_t
+{
+ hb_collect_features_context_t (hb_face_t *face,
+ hb_tag_t table_tag,
+ hb_set_t *feature_indexes_)
+ : g (get_gsubgpos_table (face, table_tag)),
+ feature_indexes (feature_indexes_),
+ script_count (0),langsys_count (0), feature_index_count (0) {}
+
+ bool visited (const OT::Script &s)
+ {
+ /* We might have Null() object here. Don't want to involve
+ * that in the memoize. So, detect empty objects and return. */
+ if (unlikely (!s.has_default_lang_sys () &&
+ !s.get_lang_sys_count ()))
+ return true;
+
+ if (script_count++ > HB_MAX_SCRIPTS)
+ return true;
+
+ return visited (s, visited_script);
+ }
+ bool visited (const OT::LangSys &l)
+ {
+ /* We might have Null() object here. Don't want to involve
+ * that in the memoize. So, detect empty objects and return. */
+ if (unlikely (!l.has_required_feature () &&
+ !l.get_feature_count ()))
+ return true;
+
+ if (langsys_count++ > HB_MAX_LANGSYS)
+ return true;
+
+ return visited (l, visited_langsys);
+ }
+
+ bool visited_feature_indices (unsigned count)
+ {
+ feature_index_count += count;
+ return feature_index_count > HB_MAX_FEATURE_INDICES;
+ }
+
+ private:
+ template <typename T>
+ bool visited (const T &p, hb_set_t &visited_set)
+ {
+ hb_codepoint_t delta = (hb_codepoint_t) ((uintptr_t) &p - (uintptr_t) &g);
+ if (visited_set.has (delta))
+ return true;
+
+ visited_set.add (delta);
+ return false;
+ }
+
+ public:
+ const OT::GSUBGPOS &g;
+ hb_set_t *feature_indexes;
+
+ private:
+ hb_set_t visited_script;
+ hb_set_t visited_langsys;
+ unsigned int script_count;
+ unsigned int langsys_count;
+ unsigned int feature_index_count;
+};
+
+static void
+langsys_collect_features (hb_collect_features_context_t *c,
+ const OT::LangSys &l,
+ const hb_tag_t *features)
+{
+ if (c->visited (l)) return;
+
+ if (!features)
+ {
+ /* All features. */
+ if (l.has_required_feature () && !c->visited_feature_indices (1))
+ c->feature_indexes->add (l.get_required_feature_index ());
+
+ if (!c->visited_feature_indices (l.featureIndex.len))
+ l.add_feature_indexes_to (c->feature_indexes);
+ }
+ else
+ {
+ /* Ugh. Any faster way? */
+ for (; *features; features++)
+ {
+ hb_tag_t feature_tag = *features;
+ unsigned int num_features = l.get_feature_count ();
+ for (unsigned int i = 0; i < num_features; i++)
+ {
+ unsigned int feature_index = l.get_feature_index (i);
+
+ if (feature_tag == c->g.get_feature_tag (feature_index))
+ {
+ c->feature_indexes->add (feature_index);
+ break;
+ }
+ }
+ }
+ }
+}
+
+static void
+script_collect_features (hb_collect_features_context_t *c,
+ const OT::Script &s,
+ const hb_tag_t *languages,
+ const hb_tag_t *features)
+{
+ if (c->visited (s)) return;
+
+ if (!languages)
+ {
+ /* All languages. */
+ if (s.has_default_lang_sys ())
+ langsys_collect_features (c,
+ s.get_default_lang_sys (),
+ features);
+
+ unsigned int count = s.get_lang_sys_count ();
+ for (unsigned int language_index = 0; language_index < count; language_index++)
+ langsys_collect_features (c,
+ s.get_lang_sys (language_index),
+ features);
+ }
+ else
+ {
+ for (; *languages; languages++)
+ {
+ unsigned int language_index;
+ if (s.find_lang_sys_index (*languages, &language_index))
+ langsys_collect_features (c,
+ s.get_lang_sys (language_index),
+ features);
+ }
+ }
+}
+
+
+/**
+ * hb_ot_layout_collect_features:
+ * @face: #hb_face_t to work upon
+ * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS
+ * @scripts: The array of scripts to collect features for
+ * @languages: The array of languages to collect features for
+ * @features: The array of features to collect
+ * @feature_indexes: (out): The array of feature indexes found for the query
+ *
+ * Fetches a list of all feature indexes in the specified face's GSUB table
+ * or GPOS table, underneath the specified scripts, languages, and features.
+ * If no list of scripts is provided, all scripts will be queried. If no list
+ * of languages is provided, all languages will be queried. If no list of
+ * features is provided, all features will be queried.
+ *
+ * Since: 1.8.5
+ **/
+void
+hb_ot_layout_collect_features (hb_face_t *face,
+ hb_tag_t table_tag,
+ const hb_tag_t *scripts,
+ const hb_tag_t *languages,
+ const hb_tag_t *features,
+ hb_set_t *feature_indexes /* OUT */)
+{
+ hb_collect_features_context_t c (face, table_tag, feature_indexes);
+ if (!scripts)
+ {
+ /* All scripts. */
+ unsigned int count = c.g.get_script_count ();
+ for (unsigned int script_index = 0; script_index < count; script_index++)
+ script_collect_features (&c,
+ c.g.get_script (script_index),
+ languages,
+ features);
+ }
+ else
+ {
+ for (; *scripts; scripts++)
+ {
+ unsigned int script_index;
+ if (c.g.find_script_index (*scripts, &script_index))
+ script_collect_features (&c,
+ c.g.get_script (script_index),
+ languages,
+ features);
+ }
+ }
+}
+
+
+/**
+ * hb_ot_layout_collect_lookups:
+ * @face: #hb_face_t to work upon
+ * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS
+ * @scripts: The array of scripts to collect lookups for
+ * @languages: The array of languages to collect lookups for
+ * @features: The array of features to collect lookups for
+ * @lookup_indexes: (out): The array of lookup indexes found for the query
+ *
+ * Fetches a list of all feature-lookup indexes in the specified face's GSUB
+ * table or GPOS table, underneath the specified scripts, languages, and
+ * features. If no list of scripts is provided, all scripts will be queried.
+ * If no list of languages is provided, all languages will be queried. If no
+ * list of features is provided, all features will be queried.
+ *
+ * Since: 0.9.8
+ **/
+void
+hb_ot_layout_collect_lookups (hb_face_t *face,
+ hb_tag_t table_tag,
+ const hb_tag_t *scripts,
+ const hb_tag_t *languages,
+ const hb_tag_t *features,
+ hb_set_t *lookup_indexes /* OUT */)
+{
+ const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag);
+
+ hb_set_t feature_indexes;
+ hb_ot_layout_collect_features (face, table_tag, scripts, languages, features, &feature_indexes);
+
+ for (hb_codepoint_t feature_index = HB_SET_VALUE_INVALID;
+ hb_set_next (&feature_indexes, &feature_index);)
+ g.get_feature (feature_index).add_lookup_indexes_to (lookup_indexes);
+
+ g.feature_variation_collect_lookups (&feature_indexes, lookup_indexes);
+}
+
+
+#ifndef HB_NO_LAYOUT_COLLECT_GLYPHS
+/**
+ * hb_ot_layout_lookup_collect_glyphs:
+ * @face: #hb_face_t to work upon
+ * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS
+ * @lookup_index: The index of the feature lookup to query
+ * @glyphs_before: (out): Array of glyphs preceding the substitution range
+ * @glyphs_input: (out): Array of input glyphs that would be substituted by the lookup
+ * @glyphs_after: (out): Array of glyphs following the substitution range
+ * @glyphs_output: (out): Array of glyphs that would be the substitued output of the lookup
+ *
+ * Fetches a list of all glyphs affected by the specified lookup in the
+ * specified face's GSUB table or GPOS table.
+ *
+ * Since: 0.9.7
+ **/
+void
+hb_ot_layout_lookup_collect_glyphs (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int lookup_index,
+ hb_set_t *glyphs_before, /* OUT. May be NULL */
+ hb_set_t *glyphs_input, /* OUT. May be NULL */
+ hb_set_t *glyphs_after, /* OUT. May be NULL */
+ hb_set_t *glyphs_output /* OUT. May be NULL */)
+{
+ OT::hb_collect_glyphs_context_t c (face,
+ glyphs_before,
+ glyphs_input,
+ glyphs_after,
+ glyphs_output);
+
+ switch (table_tag)
+ {
+ case HB_OT_TAG_GSUB:
+ {
+ const OT::SubstLookup& l = face->table.GSUB->table->get_lookup (lookup_index);
+ l.collect_glyphs (&c);
+ return;
+ }
+ case HB_OT_TAG_GPOS:
+ {
+ const OT::PosLookup& l = face->table.GPOS->table->get_lookup (lookup_index);
+ l.collect_glyphs (&c);
+ return;
+ }
+ }
+}
+#endif
+
+
+/* Variations support */
+
+
+/**
+ * hb_ot_layout_table_find_feature_variations:
+ * @face: #hb_face_t to work upon
+ * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS
+ * @coords: The variation coordinates to query
+ * @num_coords: The number of variation coorinates
+ * @variations_index: (out): The array of feature variations found for the query
+ *
+ * Fetches a list of feature variations in the specified face's GSUB table
+ * or GPOS table, at the specified variation coordinates.
+ *
+ **/
+hb_bool_t
+hb_ot_layout_table_find_feature_variations (hb_face_t *face,
+ hb_tag_t table_tag,
+ const int *coords,
+ unsigned int num_coords,
+ unsigned int *variations_index /* out */)
+{
+ const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag);
+
+ return g.find_variations_index (coords, num_coords, variations_index);
+}
+
+
+/**
+ * hb_ot_layout_feature_with_variations_get_lookups:
+ * @face: #hb_face_t to work upon
+ * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS
+ * @feature_index: The index of the feature to query
+ * @variations_index: The index of the feature variation to query
+ * @start_offset: offset of the first lookup to retrieve
+ * @lookup_count: (inout) (allow-none): Input = the maximum number of lookups to return;
+ * Output = the actual number of lookups returned (may be zero)
+ * @lookup_indexes: (out) (array length=lookup_count): The array of lookups found for the query
+ *
+ * Fetches a list of all lookups enumerated for the specified feature, in
+ * the specified face's GSUB table or GPOS table, enabled at the specified
+ * variations index. The list returned will begin at the offset provided.
+ *
+ **/
+unsigned int
+hb_ot_layout_feature_with_variations_get_lookups (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int feature_index,
+ unsigned int variations_index,
+ unsigned int start_offset,
+ unsigned int *lookup_count /* IN/OUT */,
+ unsigned int *lookup_indexes /* OUT */)
+{
+ static_assert ((OT::FeatureVariations::NOT_FOUND_INDEX == HB_OT_LAYOUT_NO_VARIATIONS_INDEX), "");
+ const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag);
+
+ const OT::Feature &f = g.get_feature_variation (feature_index, variations_index);
+
+ return f.get_lookup_indexes (start_offset, lookup_count, lookup_indexes);
+}
+
+
+/*
+ * OT::GSUB
+ */
+
+
+/**
+ * hb_ot_layout_has_substitution:
+ * @face: #hb_face_t to work upon
+ *
+ * Tests whether the specified face includes any GSUB substitutions.
+ *
+ * Return value: true if data found, false otherwise
+ *
+ **/
+hb_bool_t
+hb_ot_layout_has_substitution (hb_face_t *face)
+{
+ return face->table.GSUB->table->has_data ();
+}
+
+
+/**
+ * hb_ot_layout_lookup_would_substitute:
+ * @face: #hb_face_t to work upon
+ * @lookup_index: The index of the lookup to query
+ * @glyphs: The sequence of glyphs to query for substitution
+ * @glyphs_length: The length of the glyph sequence
+ * @zero_context: #hb_bool_t indicating whether substitutions should be context-free
+ *
+ * Tests whether a specified lookup in the specified face would
+ * trigger a substitution on the given glyph sequence.
+ *
+ * Return value: true if a substitution would be triggered, false otherwise
+ *
+ * Since: 0.9.7
+ **/
+hb_bool_t
+hb_ot_layout_lookup_would_substitute (hb_face_t *face,
+ unsigned int lookup_index,
+ const hb_codepoint_t *glyphs,
+ unsigned int glyphs_length,
+ hb_bool_t zero_context)
+{
+ if (unlikely (lookup_index >= face->table.GSUB->lookup_count)) return false;
+ OT::hb_would_apply_context_t c (face, glyphs, glyphs_length, (bool) zero_context);
+
+ const OT::SubstLookup& l = face->table.GSUB->table->get_lookup (lookup_index);
+ return l.would_apply (&c, &face->table.GSUB->accels[lookup_index]);
+}
+
+
+/**
+ * hb_ot_layout_substitute_start:
+ * @font: #hb_font_t to use
+ * @buffer: #hb_buffer_t buffer to work upon
+ *
+ * Called before substitution lookups are performed, to ensure that glyph
+ * class and other properties are set on the glyphs in the buffer.
+ *
+ **/
+void
+hb_ot_layout_substitute_start (hb_font_t *font,
+ hb_buffer_t *buffer)
+{
+ _hb_ot_layout_set_glyph_props (font, buffer);
+}
+
+void
+hb_ot_layout_delete_glyphs_inplace (hb_buffer_t *buffer,
+ bool (*filter) (const hb_glyph_info_t *info))
+{
+ /* Merge clusters and delete filtered glyphs.
+ * NOTE! We can't use out-buffer as we have positioning data. */
+ unsigned int j = 0;
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ hb_glyph_position_t *pos = buffer->pos;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ if (filter (&info[i]))
+ {
+ /* Merge clusters.
+ * Same logic as buffer->delete_glyph(), but for in-place removal. */
+
+ unsigned int cluster = info[i].cluster;
+ if (i + 1 < count && cluster == info[i + 1].cluster)
+ continue; /* Cluster survives; do nothing. */
+
+ if (j)
+ {
+ /* Merge cluster backward. */
+ if (cluster < info[j - 1].cluster)
+ {
+ unsigned int mask = info[i].mask;
+ unsigned int old_cluster = info[j - 1].cluster;
+ for (unsigned k = j; k && info[k - 1].cluster == old_cluster; k--)
+ buffer->set_cluster (info[k - 1], cluster, mask);
+ }
+ continue;
+ }
+
+ if (i + 1 < count)
+ buffer->merge_clusters (i, i + 2); /* Merge cluster forward. */
+
+ continue;
+ }
+
+ if (j != i)
+ {
+ info[j] = info[i];
+ pos[j] = pos[i];
+ }
+ j++;
+ }
+ buffer->len = j;
+}
+
+/**
+ * hb_ot_layout_lookup_substitute_closure:
+ * @face: #hb_face_t to work upon
+ * @lookup_index: index of the feature lookup to query
+ * @glyphs: (out): Array of glyphs comprising the transitive closure of the lookup
+ *
+ * Compute the transitive closure of glyphs needed for a
+ * specified lookup.
+ *
+ * Since: 0.9.7
+ **/
+void
+hb_ot_layout_lookup_substitute_closure (hb_face_t *face,
+ unsigned int lookup_index,
+ hb_set_t *glyphs /* OUT */)
+{
+ hb_map_t done_lookups;
+ OT::hb_closure_context_t c (face, glyphs, &done_lookups);
+
+ const OT::SubstLookup& l = face->table.GSUB->table->get_lookup (lookup_index);
+
+ l.closure (&c, lookup_index);
+}
+
+/**
+ * hb_ot_layout_lookups_substitute_closure:
+ * @face: #hb_face_t to work upon
+ * @lookups: The set of lookups to query
+ * @glyphs: (out): Array of glyphs comprising the transitive closure of the lookups
+ *
+ * Compute the transitive closure of glyphs needed for all of the
+ * provided lookups.
+ *
+ * Since: 1.8.1
+ **/
+void
+hb_ot_layout_lookups_substitute_closure (hb_face_t *face,
+ const hb_set_t *lookups,
+ hb_set_t *glyphs /* OUT */)
+{
+ hb_map_t done_lookups;
+ OT::hb_closure_context_t c (face, glyphs, &done_lookups);
+ const OT::GSUB& gsub = *face->table.GSUB->table;
+
+ unsigned int iteration_count = 0;
+ unsigned int glyphs_length;
+ do
+ {
+ glyphs_length = glyphs->get_population ();
+ if (lookups)
+ {
+ for (hb_codepoint_t lookup_index = HB_SET_VALUE_INVALID; hb_set_next (lookups, &lookup_index);)
+ gsub.get_lookup (lookup_index).closure (&c, lookup_index);
+ }
+ else
+ {
+ for (unsigned int i = 0; i < gsub.get_lookup_count (); i++)
+ gsub.get_lookup (i).closure (&c, i);
+ }
+ } while (iteration_count++ <= HB_CLOSURE_MAX_STAGES &&
+ glyphs_length != glyphs->get_population ());
+}
+
+/*
+ * OT::GPOS
+ */
+
+
+/**
+ * hb_ot_layout_has_positioning:
+ * @face: #hb_face_t to work upon
+ *
+ * Return value: true if the face has GPOS data, false otherwise
+ *
+ **/
+hb_bool_t
+hb_ot_layout_has_positioning (hb_face_t *face)
+{
+ return face->table.GPOS->table->has_data ();
+}
+
+/**
+ * hb_ot_layout_position_start:
+ * @font: #hb_font_t to use
+ * @buffer: #hb_buffer_t buffer to work upon
+ *
+ * Called before positioning lookups are performed, to ensure that glyph
+ * attachment types and glyph-attachment chains are set for the glyphs in the buffer.
+ *
+ **/
+void
+hb_ot_layout_position_start (hb_font_t *font, hb_buffer_t *buffer)
+{
+ OT::GPOS::position_start (font, buffer);
+}
+
+
+/**
+ * hb_ot_layout_position_finish_advances:
+ * @font: #hb_font_t to use
+ * @buffer: #hb_buffer_t buffer to work upon
+ *
+ * Called after positioning lookups are performed, to finish glyph advances.
+ *
+ **/
+void
+hb_ot_layout_position_finish_advances (hb_font_t *font, hb_buffer_t *buffer)
+{
+ OT::GPOS::position_finish_advances (font, buffer);
+}
+
+/**
+ * hb_ot_layout_position_finish_offsets:
+ * @font: #hb_font_t to use
+ * @buffer: #hb_buffer_t buffer to work upon
+ *
+ * Called after positioning lookups are performed, to finish glyph offsets.
+ *
+ **/
+void
+hb_ot_layout_position_finish_offsets (hb_font_t *font, hb_buffer_t *buffer)
+{
+ OT::GPOS::position_finish_offsets (font, buffer);
+}
+
+
+#ifndef HB_NO_LAYOUT_FEATURE_PARAMS
+/**
+ * hb_ot_layout_get_size_params:
+ * @face: #hb_face_t to work upon
+ * @design_size: (out): The design size of the face
+ * @subfamily_id: (out): The identifier of the face within the font subfamily
+ * @subfamily_name_id: (out): The ‘name’ table name ID of the face within the font subfamily
+ * @range_start: (out): The minimum size of the recommended size range for the face
+ * @range_end: (out): The maximum size of the recommended size range for the face
+ *
+ * Fetches optical-size feature data (i.e., the `size` feature from GPOS). Note that
+ * the subfamily_id and the subfamily name string (accessible via the subfamily_name_id)
+ * as used here are defined as pertaining only to fonts within a font family that differ
+ * specifically in their respective size ranges; other ways to differentiate fonts within
+ * a subfamily are not covered by the `size` feature.
+ *
+ * For more information on this distinction, see the [`size` feature documentation](
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/features_pt#tag-size).
+ *
+ * Return value: true if data found, false otherwise
+ *
+ * Since: 0.9.10
+ **/
+hb_bool_t
+hb_ot_layout_get_size_params (hb_face_t *face,
+ unsigned int *design_size, /* OUT. May be NULL */
+ unsigned int *subfamily_id, /* OUT. May be NULL */
+ hb_ot_name_id_t *subfamily_name_id, /* OUT. May be NULL */
+ unsigned int *range_start, /* OUT. May be NULL */
+ unsigned int *range_end /* OUT. May be NULL */)
+{
+ const OT::GPOS &gpos = *face->table.GPOS->table;
+ const hb_tag_t tag = HB_TAG ('s','i','z','e');
+
+ unsigned int num_features = gpos.get_feature_count ();
+ for (unsigned int i = 0; i < num_features; i++)
+ {
+ if (tag == gpos.get_feature_tag (i))
+ {
+ const OT::Feature &f = gpos.get_feature (i);
+ const OT::FeatureParamsSize &params = f.get_feature_params ().get_size_params (tag);
+
+ if (params.designSize)
+ {
+ if (design_size) *design_size = params.designSize;
+ if (subfamily_id) *subfamily_id = params.subfamilyID;
+ if (subfamily_name_id) *subfamily_name_id = params.subfamilyNameID;
+ if (range_start) *range_start = params.rangeStart;
+ if (range_end) *range_end = params.rangeEnd;
+
+ return true;
+ }
+ }
+ }
+
+ if (design_size) *design_size = 0;
+ if (subfamily_id) *subfamily_id = 0;
+ if (subfamily_name_id) *subfamily_name_id = HB_OT_NAME_ID_INVALID;
+ if (range_start) *range_start = 0;
+ if (range_end) *range_end = 0;
+
+ return false;
+}
+/**
+ * hb_ot_layout_feature_get_name_ids:
+ * @face: #hb_face_t to work upon
+ * @table_tag: table tag to query, "GSUB" or "GPOS".
+ * @feature_index: index of feature to query.
+ * @label_id: (out) (allow-none): The ‘name’ table name ID that specifies a string
+ * for a user-interface label for this feature. (May be NULL.)
+ * @tooltip_id: (out) (allow-none): The ‘name’ table name ID that specifies a string
+ * that an application can use for tooltip text for this
+ * feature. (May be NULL.)
+ * @sample_id: (out) (allow-none): The ‘name’ table name ID that specifies sample text
+ * that illustrates the effect of this feature. (May be NULL.)
+ * @num_named_parameters: (out) (allow-none): Number of named parameters. (May be zero.)
+ * @first_param_id: (out) (allow-none): The first ‘name’ table name ID used to specify
+ * strings for user-interface labels for the feature
+ * parameters. (Must be zero if numParameters is zero.)
+ *
+ * Fetches name indices from feature parameters for "Stylistic Set" ('ssXX') or
+ * "Character Variant" ('cvXX') features.
+ *
+ * Return value: true if data found, false otherwise
+ *
+ * Since: 2.0.0
+ **/
+hb_bool_t
+hb_ot_layout_feature_get_name_ids (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int feature_index,
+ hb_ot_name_id_t *label_id, /* OUT. May be NULL */
+ hb_ot_name_id_t *tooltip_id, /* OUT. May be NULL */
+ hb_ot_name_id_t *sample_id, /* OUT. May be NULL */
+ unsigned int *num_named_parameters, /* OUT. May be NULL */
+ hb_ot_name_id_t *first_param_id /* OUT. May be NULL */)
+{
+ const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag);
+
+ hb_tag_t feature_tag = g.get_feature_tag (feature_index);
+ const OT::Feature &f = g.get_feature (feature_index);
+
+ const OT::FeatureParams &feature_params = f.get_feature_params ();
+ if (&feature_params != &Null (OT::FeatureParams))
+ {
+ const OT::FeatureParamsStylisticSet& ss_params =
+ feature_params.get_stylistic_set_params (feature_tag);
+ if (&ss_params != &Null (OT::FeatureParamsStylisticSet)) /* ssXX */
+ {
+ if (label_id) *label_id = ss_params.uiNameID;
+ // ssXX features don't have the rest
+ if (tooltip_id) *tooltip_id = HB_OT_NAME_ID_INVALID;
+ if (sample_id) *sample_id = HB_OT_NAME_ID_INVALID;
+ if (num_named_parameters) *num_named_parameters = 0;
+ if (first_param_id) *first_param_id = HB_OT_NAME_ID_INVALID;
+ return true;
+ }
+ const OT::FeatureParamsCharacterVariants& cv_params =
+ feature_params.get_character_variants_params (feature_tag);
+ if (&cv_params != &Null (OT::FeatureParamsCharacterVariants)) /* cvXX */
+ {
+ if (label_id) *label_id = cv_params.featUILableNameID;
+ if (tooltip_id) *tooltip_id = cv_params.featUITooltipTextNameID;
+ if (sample_id) *sample_id = cv_params.sampleTextNameID;
+ if (num_named_parameters) *num_named_parameters = cv_params.numNamedParameters;
+ if (first_param_id) *first_param_id = cv_params.firstParamUILabelNameID;
+ return true;
+ }
+ }
+
+ if (label_id) *label_id = HB_OT_NAME_ID_INVALID;
+ if (tooltip_id) *tooltip_id = HB_OT_NAME_ID_INVALID;
+ if (sample_id) *sample_id = HB_OT_NAME_ID_INVALID;
+ if (num_named_parameters) *num_named_parameters = 0;
+ if (first_param_id) *first_param_id = HB_OT_NAME_ID_INVALID;
+ return false;
+}
+/**
+ * hb_ot_layout_feature_get_characters:
+ * @face: #hb_face_t to work upon
+ * @table_tag: table tag to query, "GSUB" or "GPOS".
+ * @feature_index: index of feature to query.
+ * @start_offset: offset of the first character to retrieve
+ * @char_count: (inout) (allow-none): Input = the maximum number of characters to return;
+ * Output = the actual number of characters returned (may be zero)
+ * @characters: (out caller-allocates) (array length=char_count): A buffer pointer.
+ * The Unicode codepoints of the characters for which this feature provides
+ * glyph variants.
+ *
+ * Fetches a list of the characters defined as having a variant under the specified
+ * "Character Variant" ("cvXX") feature tag.
+ *
+ * Return value: Number of total sample characters in the cvXX feature.
+ *
+ * Since: 2.0.0
+ **/
+unsigned int
+hb_ot_layout_feature_get_characters (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int feature_index,
+ unsigned int start_offset,
+ unsigned int *char_count, /* IN/OUT. May be NULL */
+ hb_codepoint_t *characters /* OUT. May be NULL */)
+{
+ const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag);
+ return g.get_feature (feature_index)
+ .get_feature_params ()
+ .get_character_variants_params(g.get_feature_tag (feature_index))
+ .get_characters (start_offset, char_count, characters);
+}
+#endif
+
+
+/*
+ * Parts of different types are implemented here such that they have direct
+ * access to GSUB/GPOS lookups.
+ */
+
+
+struct GSUBProxy
+{
+ static constexpr unsigned table_index = 0u;
+ static constexpr bool inplace = false;
+ typedef OT::SubstLookup Lookup;
+
+ GSUBProxy (hb_face_t *face) :
+ table (*face->table.GSUB->table),
+ accels (face->table.GSUB->accels) {}
+
+ const OT::GSUB &table;
+ const OT::hb_ot_layout_lookup_accelerator_t *accels;
+};
+
+struct GPOSProxy
+{
+ static constexpr unsigned table_index = 1u;
+ static constexpr bool inplace = true;
+ typedef OT::PosLookup Lookup;
+
+ GPOSProxy (hb_face_t *face) :
+ table (*face->table.GPOS->table),
+ accels (face->table.GPOS->accels) {}
+
+ const OT::GPOS &table;
+ const OT::hb_ot_layout_lookup_accelerator_t *accels;
+};
+
+
+static inline bool
+apply_forward (OT::hb_ot_apply_context_t *c,
+ const OT::hb_ot_layout_lookup_accelerator_t &accel)
+{
+ bool ret = false;
+ hb_buffer_t *buffer = c->buffer;
+ while (buffer->idx < buffer->len && buffer->successful)
+ {
+ bool applied = false;
+ if (accel.may_have (buffer->cur().codepoint) &&
+ (buffer->cur().mask & c->lookup_mask) &&
+ c->check_glyph_property (&buffer->cur(), c->lookup_props))
+ {
+ applied = accel.apply (c);
+ }
+
+ if (applied)
+ ret = true;
+ else
+ buffer->next_glyph ();
+ }
+ return ret;
+}
+
+static inline bool
+apply_backward (OT::hb_ot_apply_context_t *c,
+ const OT::hb_ot_layout_lookup_accelerator_t &accel)
+{
+ bool ret = false;
+ hb_buffer_t *buffer = c->buffer;
+ do
+ {
+ if (accel.may_have (buffer->cur().codepoint) &&
+ (buffer->cur().mask & c->lookup_mask) &&
+ c->check_glyph_property (&buffer->cur(), c->lookup_props))
+ ret |= accel.apply (c);
+
+ /* The reverse lookup doesn't "advance" cursor (for good reason). */
+ buffer->idx--;
+
+ }
+ while ((int) buffer->idx >= 0);
+ return ret;
+}
+
+template <typename Proxy>
+static inline void
+apply_string (OT::hb_ot_apply_context_t *c,
+ const typename Proxy::Lookup &lookup,
+ const OT::hb_ot_layout_lookup_accelerator_t &accel)
+{
+ hb_buffer_t *buffer = c->buffer;
+
+ if (unlikely (!buffer->len || !c->lookup_mask))
+ return;
+
+ c->set_lookup_props (lookup.get_props ());
+
+ if (likely (!lookup.is_reverse ()))
+ {
+ /* in/out forward substitution/positioning */
+ if (Proxy::table_index == 0u)
+ buffer->clear_output ();
+ buffer->idx = 0;
+
+ bool ret;
+ ret = apply_forward (c, accel);
+ if (ret)
+ {
+ if (!Proxy::inplace)
+ buffer->swap_buffers ();
+ else
+ assert (!buffer->has_separate_output ());
+ }
+ }
+ else
+ {
+ /* in-place backward substitution/positioning */
+ if (Proxy::table_index == 0u)
+ buffer->remove_output ();
+ buffer->idx = buffer->len - 1;
+
+ apply_backward (c, accel);
+ }
+}
+
+template <typename Proxy>
+inline void hb_ot_map_t::apply (const Proxy &proxy,
+ const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer) const
+{
+ const unsigned int table_index = proxy.table_index;
+ unsigned int i = 0;
+ OT::hb_ot_apply_context_t c (table_index, font, buffer);
+ c.set_recurse_func (Proxy::Lookup::apply_recurse_func);
+
+ for (unsigned int stage_index = 0; stage_index < stages[table_index].length; stage_index++) {
+ const stage_map_t *stage = &stages[table_index][stage_index];
+ for (; i < stage->last_lookup; i++)
+ {
+ unsigned int lookup_index = lookups[table_index][i].index;
+ if (!buffer->message (font, "start lookup %d", lookup_index)) continue;
+ c.set_lookup_index (lookup_index);
+ c.set_lookup_mask (lookups[table_index][i].mask);
+ c.set_auto_zwj (lookups[table_index][i].auto_zwj);
+ c.set_auto_zwnj (lookups[table_index][i].auto_zwnj);
+ if (lookups[table_index][i].random)
+ {
+ c.set_random (true);
+ buffer->unsafe_to_break_all ();
+ }
+ apply_string<Proxy> (&c,
+ proxy.table.get_lookup (lookup_index),
+ proxy.accels[lookup_index]);
+ (void) buffer->message (font, "end lookup %d", lookup_index);
+ }
+
+ if (stage->pause_func)
+ {
+ buffer->clear_output ();
+ stage->pause_func (plan, font, buffer);
+ }
+ }
+}
+
+void hb_ot_map_t::substitute (const hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer) const
+{
+ GSUBProxy proxy (font->face);
+ if (!buffer->message (font, "start table GSUB")) return;
+ apply (proxy, plan, font, buffer);
+ (void)buffer->message (font, "end table GSUB");
+}
+
+void hb_ot_map_t::position (const hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer) const
+{
+ GPOSProxy proxy (font->face);
+ if (!buffer->message (font, "start table GPOS")) return;
+ apply (proxy, plan, font, buffer);
+ (void)buffer->message (font, "end table GPOS");
+}
+
+void
+hb_ot_layout_substitute_lookup (OT::hb_ot_apply_context_t *c,
+ const OT::SubstLookup &lookup,
+ const OT::hb_ot_layout_lookup_accelerator_t &accel)
+{
+ apply_string<GSUBProxy> (c, lookup, accel);
+}
+
+#ifndef HB_NO_BASE
+/**
+ * hb_ot_layout_get_baseline:
+ * @font: a font
+ * @baseline_tag: a baseline tag
+ * @direction: text direction.
+ * @script_tag: script tag.
+ * @language_tag: language tag.
+ * @coord: (out): baseline value if found.
+ *
+ * Fetches a baseline value from the face.
+ *
+ * Return value: if found baseline value in the font.
+ *
+ * Since: 2.6.0
+ **/
+hb_bool_t
+hb_ot_layout_get_baseline (hb_font_t *font,
+ hb_ot_layout_baseline_tag_t baseline_tag,
+ hb_direction_t direction,
+ hb_tag_t script_tag,
+ hb_tag_t language_tag,
+ hb_position_t *coord /* OUT. May be NULL. */)
+{
+ bool result = font->face->table.BASE->get_baseline (font, baseline_tag, direction, script_tag, language_tag, coord);
+
+ if (result && coord)
+ *coord = HB_DIRECTION_IS_HORIZONTAL (direction) ? font->em_scale_y (*coord) : font->em_scale_x (*coord);
+
+ return result;
+}
+#endif
+
+
+struct hb_get_glyph_alternates_dispatch_t :
+ hb_dispatch_context_t<hb_get_glyph_alternates_dispatch_t, unsigned>
+{
+ static return_t default_return_value () { return 0; }
+ bool stop_sublookup_iteration (return_t r) const { return r; }
+
+ hb_face_t *face;
+
+ hb_get_glyph_alternates_dispatch_t (hb_face_t *face) :
+ face (face) {}
+
+ private:
+ template <typename T, typename ...Ts> auto
+ _dispatch (const T &obj, hb_priority<1>, Ts&&... ds) HB_AUTO_RETURN
+ ( obj.get_glyph_alternates (hb_forward<Ts> (ds)...) )
+ template <typename T, typename ...Ts> auto
+ _dispatch (const T &obj, hb_priority<0>, Ts&&... ds) HB_AUTO_RETURN
+ ( default_return_value () )
+ public:
+ template <typename T, typename ...Ts> auto
+ dispatch (const T &obj, Ts&&... ds) HB_AUTO_RETURN
+ ( _dispatch (obj, hb_prioritize, hb_forward<Ts> (ds)...) )
+};
+
+/**
+ * hb_ot_layout_lookup_get_glyph_alternates:
+ * @face: a face.
+ * @lookup_index: index of the feature lookup to query.
+ * @glyph: a glyph id.
+ * @start_offset: starting offset.
+ * @alternate_count: (inout) (allow-none): Input = the maximum number of alternate glyphs to return;
+ * Output = the actual number of alternate glyphs returned (may be zero).
+ * @alternate_glyphs: (out caller-allocates) (array length=alternate_count): A glyphs buffer.
+ * Alternate glyphs associated with the glyph id.
+ *
+ * Fetches alternates of a glyph from a given GSUB lookup index.
+ *
+ * Return value: total number of alternates found in the specific lookup index for the given glyph id.
+ *
+ * Since: 2.6.8
+ **/
+HB_EXTERN unsigned
+hb_ot_layout_lookup_get_glyph_alternates (hb_face_t *face,
+ unsigned lookup_index,
+ hb_codepoint_t glyph,
+ unsigned start_offset,
+ unsigned *alternate_count /* IN/OUT. May be NULL. */,
+ hb_codepoint_t *alternate_glyphs /* OUT. May be NULL. */)
+{
+ hb_get_glyph_alternates_dispatch_t c (face);
+ const OT::SubstLookup &lookup = face->table.GSUB->table->get_lookup (lookup_index);
+ auto ret = lookup.dispatch (&c, glyph, start_offset, alternate_count, alternate_glyphs);
+ if (!ret && alternate_count) *alternate_count = 0;
+ return ret;
+}
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-ot-layout.h b/thirdparty/harfbuzz/src/hb-ot-layout.h
new file mode 100644
index 0000000000..545d5f7fc4
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-layout.h
@@ -0,0 +1,462 @@
+/*
+ * Copyright © 2007,2008,2009 Red Hat, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_H_IN
+#error "Include <hb-ot.h> instead."
+#endif
+
+#ifndef HB_OT_LAYOUT_H
+#define HB_OT_LAYOUT_H
+
+#include "hb.h"
+
+#include "hb-ot-name.h"
+
+HB_BEGIN_DECLS
+
+
+#define HB_OT_TAG_BASE HB_TAG('B','A','S','E')
+#define HB_OT_TAG_GDEF HB_TAG('G','D','E','F')
+#define HB_OT_TAG_GSUB HB_TAG('G','S','U','B')
+#define HB_OT_TAG_GPOS HB_TAG('G','P','O','S')
+#define HB_OT_TAG_JSTF HB_TAG('J','S','T','F')
+
+
+/*
+ * Script & Language tags.
+ */
+
+#define HB_OT_TAG_DEFAULT_SCRIPT HB_TAG ('D', 'F', 'L', 'T')
+#define HB_OT_TAG_DEFAULT_LANGUAGE HB_TAG ('d', 'f', 'l', 't')
+
+/**
+ * HB_OT_MAX_TAGS_PER_SCRIPT:
+ *
+ * Since: 2.0.0
+ **/
+#define HB_OT_MAX_TAGS_PER_SCRIPT 3u
+/**
+ * HB_OT_MAX_TAGS_PER_LANGUAGE:
+ *
+ * Since: 2.0.0
+ **/
+#define HB_OT_MAX_TAGS_PER_LANGUAGE 3u
+
+HB_EXTERN void
+hb_ot_tags_from_script_and_language (hb_script_t script,
+ hb_language_t language,
+ unsigned int *script_count /* IN/OUT */,
+ hb_tag_t *script_tags /* OUT */,
+ unsigned int *language_count /* IN/OUT */,
+ hb_tag_t *language_tags /* OUT */);
+
+HB_EXTERN hb_script_t
+hb_ot_tag_to_script (hb_tag_t tag);
+
+HB_EXTERN hb_language_t
+hb_ot_tag_to_language (hb_tag_t tag);
+
+HB_EXTERN void
+hb_ot_tags_to_script_and_language (hb_tag_t script_tag,
+ hb_tag_t language_tag,
+ hb_script_t *script /* OUT */,
+ hb_language_t *language /* OUT */);
+
+
+/*
+ * GDEF
+ */
+
+HB_EXTERN hb_bool_t
+hb_ot_layout_has_glyph_classes (hb_face_t *face);
+
+/**
+ * hb_ot_layout_glyph_class_t:
+ * @HB_OT_LAYOUT_GLYPH_CLASS_UNCLASSIFIED: Glyphs not matching the other classifications
+ * @HB_OT_LAYOUT_GLYPH_CLASS_BASE_GLYPH: Spacing, single characters, capable of accepting marks
+ * @HB_OT_LAYOUT_GLYPH_CLASS_LIGATURE: Glyphs that represent ligation of multiple characters
+ * @HB_OT_LAYOUT_GLYPH_CLASS_MARK: Non-spacing, combining glyphs that represent marks
+ * @HB_OT_LAYOUT_GLYPH_CLASS_COMPONENT: Spacing glyphs that represent part of a single character
+ *
+ * The GDEF classes defined for glyphs.
+ *
+ **/
+typedef enum {
+ HB_OT_LAYOUT_GLYPH_CLASS_UNCLASSIFIED = 0,
+ HB_OT_LAYOUT_GLYPH_CLASS_BASE_GLYPH = 1,
+ HB_OT_LAYOUT_GLYPH_CLASS_LIGATURE = 2,
+ HB_OT_LAYOUT_GLYPH_CLASS_MARK = 3,
+ HB_OT_LAYOUT_GLYPH_CLASS_COMPONENT = 4
+} hb_ot_layout_glyph_class_t;
+
+HB_EXTERN hb_ot_layout_glyph_class_t
+hb_ot_layout_get_glyph_class (hb_face_t *face,
+ hb_codepoint_t glyph);
+
+HB_EXTERN void
+hb_ot_layout_get_glyphs_in_class (hb_face_t *face,
+ hb_ot_layout_glyph_class_t klass,
+ hb_set_t *glyphs /* OUT */);
+
+/* Not that useful. Provides list of attach points for a glyph that a
+ * client may want to cache */
+HB_EXTERN unsigned int
+hb_ot_layout_get_attach_points (hb_face_t *face,
+ hb_codepoint_t glyph,
+ unsigned int start_offset,
+ unsigned int *point_count /* IN/OUT */,
+ unsigned int *point_array /* OUT */);
+
+/* Ligature caret positions */
+HB_EXTERN unsigned int
+hb_ot_layout_get_ligature_carets (hb_font_t *font,
+ hb_direction_t direction,
+ hb_codepoint_t glyph,
+ unsigned int start_offset,
+ unsigned int *caret_count /* IN/OUT */,
+ hb_position_t *caret_array /* OUT */);
+
+
+/*
+ * GSUB/GPOS feature query and enumeration interface
+ */
+
+#define HB_OT_LAYOUT_NO_SCRIPT_INDEX 0xFFFFu
+#define HB_OT_LAYOUT_NO_FEATURE_INDEX 0xFFFFu
+#define HB_OT_LAYOUT_DEFAULT_LANGUAGE_INDEX 0xFFFFu
+#define HB_OT_LAYOUT_NO_VARIATIONS_INDEX 0xFFFFFFFFu
+
+HB_EXTERN unsigned int
+hb_ot_layout_table_get_script_tags (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int start_offset,
+ unsigned int *script_count /* IN/OUT */,
+ hb_tag_t *script_tags /* OUT */);
+
+HB_EXTERN hb_bool_t
+hb_ot_layout_table_find_script (hb_face_t *face,
+ hb_tag_t table_tag,
+ hb_tag_t script_tag,
+ unsigned int *script_index /* OUT */);
+
+HB_EXTERN hb_bool_t
+hb_ot_layout_table_select_script (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int script_count,
+ const hb_tag_t *script_tags,
+ unsigned int *script_index /* OUT */,
+ hb_tag_t *chosen_script /* OUT */);
+
+HB_EXTERN unsigned int
+hb_ot_layout_table_get_feature_tags (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int start_offset,
+ unsigned int *feature_count /* IN/OUT */,
+ hb_tag_t *feature_tags /* OUT */);
+
+HB_EXTERN unsigned int
+hb_ot_layout_script_get_language_tags (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int script_index,
+ unsigned int start_offset,
+ unsigned int *language_count /* IN/OUT */,
+ hb_tag_t *language_tags /* OUT */);
+
+HB_EXTERN hb_bool_t
+hb_ot_layout_script_select_language (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int script_index,
+ unsigned int language_count,
+ const hb_tag_t *language_tags,
+ unsigned int *language_index /* OUT */);
+
+HB_EXTERN hb_bool_t
+hb_ot_layout_language_get_required_feature_index (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int script_index,
+ unsigned int language_index,
+ unsigned int *feature_index /* OUT */);
+
+HB_EXTERN hb_bool_t
+hb_ot_layout_language_get_required_feature (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int script_index,
+ unsigned int language_index,
+ unsigned int *feature_index /* OUT */,
+ hb_tag_t *feature_tag /* OUT */);
+
+HB_EXTERN unsigned int
+hb_ot_layout_language_get_feature_indexes (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int script_index,
+ unsigned int language_index,
+ unsigned int start_offset,
+ unsigned int *feature_count /* IN/OUT */,
+ unsigned int *feature_indexes /* OUT */);
+
+HB_EXTERN unsigned int
+hb_ot_layout_language_get_feature_tags (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int script_index,
+ unsigned int language_index,
+ unsigned int start_offset,
+ unsigned int *feature_count /* IN/OUT */,
+ hb_tag_t *feature_tags /* OUT */);
+
+HB_EXTERN hb_bool_t
+hb_ot_layout_language_find_feature (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int script_index,
+ unsigned int language_index,
+ hb_tag_t feature_tag,
+ unsigned int *feature_index /* OUT */);
+
+HB_EXTERN unsigned int
+hb_ot_layout_feature_get_lookups (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int feature_index,
+ unsigned int start_offset,
+ unsigned int *lookup_count /* IN/OUT */,
+ unsigned int *lookup_indexes /* OUT */);
+
+HB_EXTERN unsigned int
+hb_ot_layout_table_get_lookup_count (hb_face_t *face,
+ hb_tag_t table_tag);
+
+HB_EXTERN void
+hb_ot_layout_collect_features (hb_face_t *face,
+ hb_tag_t table_tag,
+ const hb_tag_t *scripts,
+ const hb_tag_t *languages,
+ const hb_tag_t *features,
+ hb_set_t *feature_indexes /* OUT */);
+
+HB_EXTERN void
+hb_ot_layout_collect_lookups (hb_face_t *face,
+ hb_tag_t table_tag,
+ const hb_tag_t *scripts,
+ const hb_tag_t *languages,
+ const hb_tag_t *features,
+ hb_set_t *lookup_indexes /* OUT */);
+
+HB_EXTERN void
+hb_ot_layout_lookup_collect_glyphs (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int lookup_index,
+ hb_set_t *glyphs_before, /* OUT. May be NULL */
+ hb_set_t *glyphs_input, /* OUT. May be NULL */
+ hb_set_t *glyphs_after, /* OUT. May be NULL */
+ hb_set_t *glyphs_output /* OUT. May be NULL */);
+
+#ifdef HB_NOT_IMPLEMENTED
+typedef struct
+{
+ const hb_codepoint_t *before,
+ unsigned int before_length,
+ const hb_codepoint_t *input,
+ unsigned int input_length,
+ const hb_codepoint_t *after,
+ unsigned int after_length,
+} hb_ot_layout_glyph_sequence_t;
+
+typedef hb_bool_t
+(*hb_ot_layout_glyph_sequence_func_t) (hb_font_t *font,
+ hb_tag_t table_tag,
+ unsigned int lookup_index,
+ const hb_ot_layout_glyph_sequence_t *sequence,
+ void *user_data);
+
+HB_EXTERN void
+Xhb_ot_layout_lookup_enumerate_sequences (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int lookup_index,
+ hb_ot_layout_glyph_sequence_func_t callback,
+ void *user_data);
+#endif
+
+/* Variations support */
+
+HB_EXTERN hb_bool_t
+hb_ot_layout_table_find_feature_variations (hb_face_t *face,
+ hb_tag_t table_tag,
+ const int *coords,
+ unsigned int num_coords,
+ unsigned int *variations_index /* out */);
+
+HB_EXTERN unsigned int
+hb_ot_layout_feature_with_variations_get_lookups (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int feature_index,
+ unsigned int variations_index,
+ unsigned int start_offset,
+ unsigned int *lookup_count /* IN/OUT */,
+ unsigned int *lookup_indexes /* OUT */);
+
+
+/*
+ * GSUB
+ */
+
+HB_EXTERN hb_bool_t
+hb_ot_layout_has_substitution (hb_face_t *face);
+
+HB_EXTERN unsigned
+hb_ot_layout_lookup_get_glyph_alternates (hb_face_t *face,
+ unsigned lookup_index,
+ hb_codepoint_t glyph,
+ unsigned start_offset,
+ unsigned *alternate_count /* IN/OUT */,
+ hb_codepoint_t *alternate_glyphs /* OUT */);
+
+HB_EXTERN hb_bool_t
+hb_ot_layout_lookup_would_substitute (hb_face_t *face,
+ unsigned int lookup_index,
+ const hb_codepoint_t *glyphs,
+ unsigned int glyphs_length,
+ hb_bool_t zero_context);
+
+HB_EXTERN void
+hb_ot_layout_lookup_substitute_closure (hb_face_t *face,
+ unsigned int lookup_index,
+ hb_set_t *glyphs
+ /*TODO , hb_bool_t inclusive */);
+
+HB_EXTERN void
+hb_ot_layout_lookups_substitute_closure (hb_face_t *face,
+ const hb_set_t *lookups,
+ hb_set_t *glyphs);
+
+
+#ifdef HB_NOT_IMPLEMENTED
+/* Note: You better have GDEF when using this API, or marks won't do much. */
+HB_EXTERN hb_bool_t
+Xhb_ot_layout_lookup_substitute (hb_font_t *font,
+ unsigned int lookup_index,
+ const hb_ot_layout_glyph_sequence_t *sequence,
+ unsigned int out_size,
+ hb_codepoint_t *glyphs_out, /* OUT */
+ unsigned int *clusters_out, /* OUT */
+ unsigned int *out_length /* OUT */);
+#endif
+
+
+/*
+ * GPOS
+ */
+
+HB_EXTERN hb_bool_t
+hb_ot_layout_has_positioning (hb_face_t *face);
+
+#ifdef HB_NOT_IMPLEMENTED
+/* Note: You better have GDEF when using this API, or marks won't do much. */
+HB_EXTERN hb_bool_t
+Xhb_ot_layout_lookup_position (hb_font_t *font,
+ unsigned int lookup_index,
+ const hb_ot_layout_glyph_sequence_t *sequence,
+ hb_glyph_position_t *positions /* IN / OUT */);
+#endif
+
+/* Optical 'size' feature info. Returns true if found.
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/features_pt#size */
+HB_EXTERN hb_bool_t
+hb_ot_layout_get_size_params (hb_face_t *face,
+ unsigned int *design_size, /* OUT. May be NULL */
+ unsigned int *subfamily_id, /* OUT. May be NULL */
+ hb_ot_name_id_t *subfamily_name_id, /* OUT. May be NULL */
+ unsigned int *range_start, /* OUT. May be NULL */
+ unsigned int *range_end /* OUT. May be NULL */);
+
+
+HB_EXTERN hb_bool_t
+hb_ot_layout_feature_get_name_ids (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int feature_index,
+ hb_ot_name_id_t *label_id /* OUT. May be NULL */,
+ hb_ot_name_id_t *tooltip_id /* OUT. May be NULL */,
+ hb_ot_name_id_t *sample_id /* OUT. May be NULL */,
+ unsigned int *num_named_parameters /* OUT. May be NULL */,
+ hb_ot_name_id_t *first_param_id /* OUT. May be NULL */);
+
+
+HB_EXTERN unsigned int
+hb_ot_layout_feature_get_characters (hb_face_t *face,
+ hb_tag_t table_tag,
+ unsigned int feature_index,
+ unsigned int start_offset,
+ unsigned int *char_count /* IN/OUT. May be NULL */,
+ hb_codepoint_t *characters /* OUT. May be NULL */);
+
+/*
+ * BASE
+ */
+
+/**
+ * hb_ot_layout_baseline_tag_t:
+ * @HB_OT_LAYOUT_BASELINE_TAG_ROMAN: The baseline used by alphabetic scripts such as Latin, Cyrillic and Greek.
+ * In vertical writing mode, the alphabetic baseline for characters rotated 90 degrees clockwise.
+ * (This would not apply to alphabetic characters that remain upright in vertical writing mode, since these
+ * characters are not rotated.)
+ * @HB_OT_LAYOUT_BASELINE_TAG_HANGING: The hanging baseline. In horizontal direction, this is the horizontal
+ * line from which syllables seem, to hang in Tibetan and other similar scripts. In vertical writing mode,
+ * for Tibetan (or some other similar script) characters rotated 90 degrees clockwise.
+ * @HB_OT_LAYOUT_BASELINE_TAG_IDEO_FACE_BOTTOM_OR_LEFT: Ideographic character face bottom or left edge,
+ * if the direction is horizontal or vertical, respectively.
+ * @HB_OT_LAYOUT_BASELINE_TAG_IDEO_FACE_TOP_OR_RIGHT: Ideographic character face top or right edge,
+ * if the direction is horizontal or vertical, respectively.
+ * @HB_OT_LAYOUT_BASELINE_TAG_IDEO_EMBOX_BOTTOM_OR_LEFT: Ideographic em-box bottom or left edge,
+ * if the direction is horizontal or vertical, respectively.
+ * @HB_OT_LAYOUT_BASELINE_TAG_IDEO_EMBOX_TOP_OR_RIGHT: Ideographic em-box top or right edge baseline,
+ * if the direction is horizontal or vertical, respectively.
+ * @HB_OT_LAYOUT_BASELINE_TAG_MATH: The baseline about which mathematical characters are centered.
+ * In vertical writing mode when mathematical characters rotated 90 degrees clockwise, are centered.
+ *
+ * Baseline tags from https://docs.microsoft.com/en-us/typography/opentype/spec/baselinetags
+ *
+ * Since: 2.6.0
+ */
+typedef enum {
+ HB_OT_LAYOUT_BASELINE_TAG_ROMAN = HB_TAG ('r','o','m','n'),
+ HB_OT_LAYOUT_BASELINE_TAG_HANGING = HB_TAG ('h','a','n','g'),
+ HB_OT_LAYOUT_BASELINE_TAG_IDEO_FACE_BOTTOM_OR_LEFT = HB_TAG ('i','c','f','b'),
+ HB_OT_LAYOUT_BASELINE_TAG_IDEO_FACE_TOP_OR_RIGHT = HB_TAG ('i','c','f','t'),
+ HB_OT_LAYOUT_BASELINE_TAG_IDEO_EMBOX_BOTTOM_OR_LEFT = HB_TAG ('i','d','e','o'),
+ HB_OT_LAYOUT_BASELINE_TAG_IDEO_EMBOX_TOP_OR_RIGHT = HB_TAG ('i','d','t','p'),
+ HB_OT_LAYOUT_BASELINE_TAG_MATH = HB_TAG ('m','a','t','h'),
+
+ _HB_OT_LAYOUT_BASELINE_TAG_MAX_VALUE = HB_TAG_MAX_SIGNED /*< skip >*/
+} hb_ot_layout_baseline_tag_t;
+
+HB_EXTERN hb_bool_t
+hb_ot_layout_get_baseline (hb_font_t *font,
+ hb_ot_layout_baseline_tag_t baseline_tag,
+ hb_direction_t direction,
+ hb_tag_t script_tag,
+ hb_tag_t language_tag,
+ hb_position_t *coord /* OUT. May be NULL. */);
+
+HB_END_DECLS
+
+#endif /* HB_OT_LAYOUT_H */
diff --git a/thirdparty/harfbuzz/src/hb-ot-layout.hh b/thirdparty/harfbuzz/src/hb-ot-layout.hh
new file mode 100644
index 0000000000..f3bb15581a
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-layout.hh
@@ -0,0 +1,627 @@
+/*
+ * Copyright © 2007,2008,2009 Red Hat, Inc.
+ * Copyright © 2012,2013 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_LAYOUT_HH
+#define HB_OT_LAYOUT_HH
+
+#include "hb.hh"
+
+#include "hb-font.hh"
+#include "hb-buffer.hh"
+#include "hb-open-type.hh"
+#include "hb-ot-shape.hh"
+#include "hb-set-digest.hh"
+
+
+struct hb_ot_shape_plan_t;
+
+
+/*
+ * kern
+ */
+
+HB_INTERNAL bool
+hb_ot_layout_has_kerning (hb_face_t *face);
+
+HB_INTERNAL bool
+hb_ot_layout_has_machine_kerning (hb_face_t *face);
+
+HB_INTERNAL bool
+hb_ot_layout_has_cross_kerning (hb_face_t *face);
+
+HB_INTERNAL void
+hb_ot_layout_kern (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer);
+
+
+/* Private API corresponding to hb-ot-layout.h: */
+
+HB_INTERNAL bool
+hb_ot_layout_table_find_feature (hb_face_t *face,
+ hb_tag_t table_tag,
+ hb_tag_t feature_tag,
+ unsigned int *feature_index);
+
+
+/*
+ * GDEF
+ */
+
+enum hb_ot_layout_glyph_props_flags_t
+{
+ /* The following three match LookupFlags::Ignore* numbers. */
+ HB_OT_LAYOUT_GLYPH_PROPS_BASE_GLYPH = 0x02u,
+ HB_OT_LAYOUT_GLYPH_PROPS_LIGATURE = 0x04u,
+ HB_OT_LAYOUT_GLYPH_PROPS_MARK = 0x08u,
+
+ /* The following are used internally; not derived from GDEF. */
+ HB_OT_LAYOUT_GLYPH_PROPS_SUBSTITUTED = 0x10u,
+ HB_OT_LAYOUT_GLYPH_PROPS_LIGATED = 0x20u,
+ HB_OT_LAYOUT_GLYPH_PROPS_MULTIPLIED = 0x40u,
+
+ HB_OT_LAYOUT_GLYPH_PROPS_PRESERVE = HB_OT_LAYOUT_GLYPH_PROPS_SUBSTITUTED |
+ HB_OT_LAYOUT_GLYPH_PROPS_LIGATED |
+ HB_OT_LAYOUT_GLYPH_PROPS_MULTIPLIED
+};
+HB_MARK_AS_FLAG_T (hb_ot_layout_glyph_props_flags_t);
+
+
+/*
+ * GSUB/GPOS
+ */
+
+
+/* Should be called before all the substitute_lookup's are done. */
+HB_INTERNAL void
+hb_ot_layout_substitute_start (hb_font_t *font,
+ hb_buffer_t *buffer);
+
+HB_INTERNAL void
+hb_ot_layout_delete_glyphs_inplace (hb_buffer_t *buffer,
+ bool (*filter) (const hb_glyph_info_t *info));
+
+namespace OT {
+ struct hb_ot_apply_context_t;
+ struct SubstLookup;
+ struct hb_ot_layout_lookup_accelerator_t;
+}
+
+HB_INTERNAL void
+hb_ot_layout_substitute_lookup (OT::hb_ot_apply_context_t *c,
+ const OT::SubstLookup &lookup,
+ const OT::hb_ot_layout_lookup_accelerator_t &accel);
+
+
+/* Should be called before all the position_lookup's are done. */
+HB_INTERNAL void
+hb_ot_layout_position_start (hb_font_t *font,
+ hb_buffer_t *buffer);
+
+/* Should be called after all the position_lookup's are done, to fini advances. */
+HB_INTERNAL void
+hb_ot_layout_position_finish_advances (hb_font_t *font,
+ hb_buffer_t *buffer);
+
+/* Should be called after hb_ot_layout_position_finish_advances, to fini offsets. */
+HB_INTERNAL void
+hb_ot_layout_position_finish_offsets (hb_font_t *font,
+ hb_buffer_t *buffer);
+
+
+/*
+ * Buffer var routines.
+ */
+
+/* buffer var allocations, used during the entire shaping process */
+#define unicode_props() var2.u16[0]
+
+/* buffer var allocations, used during the GSUB/GPOS processing */
+#define glyph_props() var1.u16[0] /* GDEF glyph properties */
+#define lig_props() var1.u8[2] /* GSUB/GPOS ligature tracking */
+#define syllable() var1.u8[3] /* GSUB/GPOS shaping boundaries */
+
+
+/* Loop over syllables. Based on foreach_cluster(). */
+#define foreach_syllable(buffer, start, end) \
+ for (unsigned int \
+ _count = buffer->len, \
+ start = 0, end = _count ? _hb_next_syllable (buffer, 0) : 0; \
+ start < _count; \
+ start = end, end = _hb_next_syllable (buffer, start))
+
+static inline unsigned int
+_hb_next_syllable (hb_buffer_t *buffer, unsigned int start)
+{
+ hb_glyph_info_t *info = buffer->info;
+ unsigned int count = buffer->len;
+
+ unsigned int syllable = info[start].syllable();
+ while (++start < count && syllable == info[start].syllable())
+ ;
+
+ return start;
+}
+
+static inline void
+_hb_clear_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED,
+ hb_font_t *font HB_UNUSED,
+ hb_buffer_t *buffer)
+{
+ hb_glyph_info_t *info = buffer->info;
+ unsigned int count = buffer->len;
+ for (unsigned int i = 0; i < count; i++)
+ info[i].syllable() = 0;
+}
+
+
+/* unicode_props */
+
+/* Design:
+ * unicode_props() is a two-byte number. The low byte includes:
+ * - General_Category: 5 bits.
+ * - A bit each for:
+ * * Is it Default_Ignorable(); we have a modified Default_Ignorable().
+ * * Whether it's one of the three Mongolian Free Variation Selectors,
+ * CGJ, or other characters that are hidden but should not be ignored
+ * like most other Default_Ignorable()s do during matching.
+ * * Whether it's a grapheme continuation.
+ *
+ * The high-byte has different meanings, switched by the Gen-Cat:
+ * - For Mn,Mc,Me: the modified Combining_Class.
+ * - For Cf: whether it's ZWJ, ZWNJ, or something else.
+ * - For Ws: index of which space character this is, if space fallback
+ * is needed, ie. we don't set this by default, only if asked to.
+ */
+
+enum hb_unicode_props_flags_t {
+ UPROPS_MASK_GEN_CAT = 0x001Fu,
+ UPROPS_MASK_IGNORABLE = 0x0020u,
+ UPROPS_MASK_HIDDEN = 0x0040u, /* MONGOLIAN FREE VARIATION SELECTOR 1..3, or TAG characters */
+ UPROPS_MASK_CONTINUATION=0x0080u,
+
+ /* If GEN_CAT=FORMAT, top byte masks: */
+ UPROPS_MASK_Cf_ZWJ = 0x0100u,
+ UPROPS_MASK_Cf_ZWNJ = 0x0200u
+};
+HB_MARK_AS_FLAG_T (hb_unicode_props_flags_t);
+
+static inline void
+_hb_glyph_info_set_unicode_props (hb_glyph_info_t *info, hb_buffer_t *buffer)
+{
+ hb_unicode_funcs_t *unicode = buffer->unicode;
+ unsigned int u = info->codepoint;
+ unsigned int gen_cat = (unsigned int) unicode->general_category (u);
+ unsigned int props = gen_cat;
+
+ if (u >= 0x80u)
+ {
+ buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_NON_ASCII;
+
+ if (unlikely (unicode->is_default_ignorable (u)))
+ {
+ buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_DEFAULT_IGNORABLES;
+ props |= UPROPS_MASK_IGNORABLE;
+ if (u == 0x200Cu) props |= UPROPS_MASK_Cf_ZWNJ;
+ else if (u == 0x200Du) props |= UPROPS_MASK_Cf_ZWJ;
+ /* Mongolian Free Variation Selectors need to be remembered
+ * because although we need to hide them like default-ignorables,
+ * they need to non-ignorable during shaping. This is similar to
+ * what we do for joiners in Indic-like shapers, but since the
+ * FVSes are GC=Mn, we have use a separate bit to remember them.
+ * Fixes:
+ * https://github.com/harfbuzz/harfbuzz/issues/234 */
+ else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x180Bu, 0x180Du))) props |= UPROPS_MASK_HIDDEN;
+ /* TAG characters need similar treatment. Fixes:
+ * https://github.com/harfbuzz/harfbuzz/issues/463 */
+ else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0xE0020u, 0xE007Fu))) props |= UPROPS_MASK_HIDDEN;
+ /* COMBINING GRAPHEME JOINER should not be skipped; at least some times.
+ * https://github.com/harfbuzz/harfbuzz/issues/554 */
+ else if (unlikely (u == 0x034Fu))
+ {
+ buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_CGJ;
+ props |= UPROPS_MASK_HIDDEN;
+ }
+ }
+
+ if (unlikely (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (gen_cat)))
+ {
+ props |= UPROPS_MASK_CONTINUATION;
+ props |= unicode->modified_combining_class (u)<<8;
+ }
+ }
+
+ info->unicode_props() = props;
+}
+
+static inline void
+_hb_glyph_info_set_general_category (hb_glyph_info_t *info,
+ hb_unicode_general_category_t gen_cat)
+{
+ /* Clears top-byte. */
+ info->unicode_props() = (unsigned int) gen_cat | (info->unicode_props() & (0xFF & ~UPROPS_MASK_GEN_CAT));
+}
+
+static inline hb_unicode_general_category_t
+_hb_glyph_info_get_general_category (const hb_glyph_info_t *info)
+{
+ return (hb_unicode_general_category_t) (info->unicode_props() & UPROPS_MASK_GEN_CAT);
+}
+
+static inline bool
+_hb_glyph_info_is_unicode_mark (const hb_glyph_info_t *info)
+{
+ return HB_UNICODE_GENERAL_CATEGORY_IS_MARK (info->unicode_props() & UPROPS_MASK_GEN_CAT);
+}
+static inline void
+_hb_glyph_info_set_modified_combining_class (hb_glyph_info_t *info,
+ unsigned int modified_class)
+{
+ if (unlikely (!_hb_glyph_info_is_unicode_mark (info)))
+ return;
+ info->unicode_props() = (modified_class<<8) | (info->unicode_props() & 0xFF);
+}
+static inline unsigned int
+_hb_glyph_info_get_modified_combining_class (const hb_glyph_info_t *info)
+{
+ return _hb_glyph_info_is_unicode_mark (info) ? info->unicode_props()>>8 : 0;
+}
+#define info_cc(info) (_hb_glyph_info_get_modified_combining_class (&(info)))
+
+static inline bool
+_hb_glyph_info_is_unicode_space (const hb_glyph_info_t *info)
+{
+ return _hb_glyph_info_get_general_category (info) ==
+ HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR;
+}
+static inline void
+_hb_glyph_info_set_unicode_space_fallback_type (hb_glyph_info_t *info, hb_unicode_funcs_t::space_t s)
+{
+ if (unlikely (!_hb_glyph_info_is_unicode_space (info)))
+ return;
+ info->unicode_props() = (((unsigned int) s)<<8) | (info->unicode_props() & 0xFF);
+}
+static inline hb_unicode_funcs_t::space_t
+_hb_glyph_info_get_unicode_space_fallback_type (const hb_glyph_info_t *info)
+{
+ return _hb_glyph_info_is_unicode_space (info) ?
+ (hb_unicode_funcs_t::space_t) (info->unicode_props()>>8) :
+ hb_unicode_funcs_t::NOT_SPACE;
+}
+
+static inline bool _hb_glyph_info_ligated (const hb_glyph_info_t *info);
+
+static inline bool
+_hb_glyph_info_is_default_ignorable (const hb_glyph_info_t *info)
+{
+ return (info->unicode_props() & UPROPS_MASK_IGNORABLE) &&
+ !_hb_glyph_info_ligated (info);
+}
+static inline bool
+_hb_glyph_info_is_default_ignorable_and_not_hidden (const hb_glyph_info_t *info)
+{
+ return ((info->unicode_props() & (UPROPS_MASK_IGNORABLE|UPROPS_MASK_HIDDEN))
+ == UPROPS_MASK_IGNORABLE) &&
+ !_hb_glyph_info_ligated (info);
+}
+static inline void
+_hb_glyph_info_unhide (hb_glyph_info_t *info)
+{
+ info->unicode_props() &= ~ UPROPS_MASK_HIDDEN;
+}
+
+static inline void
+_hb_glyph_info_set_continuation (hb_glyph_info_t *info)
+{
+ info->unicode_props() |= UPROPS_MASK_CONTINUATION;
+}
+static inline void
+_hb_glyph_info_reset_continuation (hb_glyph_info_t *info)
+{
+ info->unicode_props() &= ~ UPROPS_MASK_CONTINUATION;
+}
+static inline bool
+_hb_glyph_info_is_continuation (const hb_glyph_info_t *info)
+{
+ return info->unicode_props() & UPROPS_MASK_CONTINUATION;
+}
+/* Loop over grapheme. Based on foreach_cluster(). */
+#define foreach_grapheme(buffer, start, end) \
+ for (unsigned int \
+ _count = buffer->len, \
+ start = 0, end = _count ? _hb_next_grapheme (buffer, 0) : 0; \
+ start < _count; \
+ start = end, end = _hb_next_grapheme (buffer, start))
+
+static inline unsigned int
+_hb_next_grapheme (hb_buffer_t *buffer, unsigned int start)
+{
+ hb_glyph_info_t *info = buffer->info;
+ unsigned int count = buffer->len;
+
+ while (++start < count && _hb_glyph_info_is_continuation (&info[start]))
+ ;
+
+ return start;
+}
+
+static inline bool
+_hb_glyph_info_is_unicode_format (const hb_glyph_info_t *info)
+{
+ return _hb_glyph_info_get_general_category (info) ==
+ HB_UNICODE_GENERAL_CATEGORY_FORMAT;
+}
+static inline bool
+_hb_glyph_info_is_zwnj (const hb_glyph_info_t *info)
+{
+ return _hb_glyph_info_is_unicode_format (info) && (info->unicode_props() & UPROPS_MASK_Cf_ZWNJ);
+}
+static inline bool
+_hb_glyph_info_is_zwj (const hb_glyph_info_t *info)
+{
+ return _hb_glyph_info_is_unicode_format (info) && (info->unicode_props() & UPROPS_MASK_Cf_ZWJ);
+}
+static inline bool
+_hb_glyph_info_is_joiner (const hb_glyph_info_t *info)
+{
+ return _hb_glyph_info_is_unicode_format (info) && (info->unicode_props() & (UPROPS_MASK_Cf_ZWNJ|UPROPS_MASK_Cf_ZWJ));
+}
+static inline void
+_hb_glyph_info_flip_joiners (hb_glyph_info_t *info)
+{
+ if (!_hb_glyph_info_is_unicode_format (info))
+ return;
+ info->unicode_props() ^= UPROPS_MASK_Cf_ZWNJ | UPROPS_MASK_Cf_ZWJ;
+}
+
+/* lig_props: aka lig_id / lig_comp
+ *
+ * When a ligature is formed:
+ *
+ * - The ligature glyph and any marks in between all the same newly allocated
+ * lig_id,
+ * - The ligature glyph will get lig_num_comps set to the number of components
+ * - The marks get lig_comp > 0, reflecting which component of the ligature
+ * they were applied to.
+ * - This is used in GPOS to attach marks to the right component of a ligature
+ * in MarkLigPos,
+ * - Note that when marks are ligated together, much of the above is skipped
+ * and the current lig_id reused.
+ *
+ * When a multiple-substitution is done:
+ *
+ * - All resulting glyphs will have lig_id = 0,
+ * - The resulting glyphs will have lig_comp = 0, 1, 2, ... respectively.
+ * - This is used in GPOS to attach marks to the first component of a
+ * multiple substitution in MarkBasePos.
+ *
+ * The numbers are also used in GPOS to do mark-to-mark positioning only
+ * to marks that belong to the same component of the same ligature.
+ */
+
+static inline void
+_hb_glyph_info_clear_lig_props (hb_glyph_info_t *info)
+{
+ info->lig_props() = 0;
+}
+
+#define IS_LIG_BASE 0x10
+
+static inline void
+_hb_glyph_info_set_lig_props_for_ligature (hb_glyph_info_t *info,
+ unsigned int lig_id,
+ unsigned int lig_num_comps)
+{
+ info->lig_props() = (lig_id << 5) | IS_LIG_BASE | (lig_num_comps & 0x0F);
+}
+
+static inline void
+_hb_glyph_info_set_lig_props_for_mark (hb_glyph_info_t *info,
+ unsigned int lig_id,
+ unsigned int lig_comp)
+{
+ info->lig_props() = (lig_id << 5) | (lig_comp & 0x0F);
+}
+
+static inline void
+_hb_glyph_info_set_lig_props_for_component (hb_glyph_info_t *info, unsigned int comp)
+{
+ _hb_glyph_info_set_lig_props_for_mark (info, 0, comp);
+}
+
+static inline unsigned int
+_hb_glyph_info_get_lig_id (const hb_glyph_info_t *info)
+{
+ return info->lig_props() >> 5;
+}
+
+static inline bool
+_hb_glyph_info_ligated_internal (const hb_glyph_info_t *info)
+{
+ return !!(info->lig_props() & IS_LIG_BASE);
+}
+
+static inline unsigned int
+_hb_glyph_info_get_lig_comp (const hb_glyph_info_t *info)
+{
+ if (_hb_glyph_info_ligated_internal (info))
+ return 0;
+ else
+ return info->lig_props() & 0x0F;
+}
+
+static inline unsigned int
+_hb_glyph_info_get_lig_num_comps (const hb_glyph_info_t *info)
+{
+ if ((info->glyph_props() & HB_OT_LAYOUT_GLYPH_PROPS_LIGATURE) &&
+ _hb_glyph_info_ligated_internal (info))
+ return info->lig_props() & 0x0F;
+ else
+ return 1;
+}
+
+static inline uint8_t
+_hb_allocate_lig_id (hb_buffer_t *buffer) {
+ uint8_t lig_id = buffer->next_serial () & 0x07;
+ if (unlikely (!lig_id))
+ lig_id = _hb_allocate_lig_id (buffer); /* in case of overflow */
+ return lig_id;
+}
+
+/* glyph_props: */
+
+static inline void
+_hb_glyph_info_set_glyph_props (hb_glyph_info_t *info, unsigned int props)
+{
+ info->glyph_props() = props;
+}
+
+static inline unsigned int
+_hb_glyph_info_get_glyph_props (const hb_glyph_info_t *info)
+{
+ return info->glyph_props();
+}
+
+static inline bool
+_hb_glyph_info_is_base_glyph (const hb_glyph_info_t *info)
+{
+ return !!(info->glyph_props() & HB_OT_LAYOUT_GLYPH_PROPS_BASE_GLYPH);
+}
+
+static inline bool
+_hb_glyph_info_is_ligature (const hb_glyph_info_t *info)
+{
+ return !!(info->glyph_props() & HB_OT_LAYOUT_GLYPH_PROPS_LIGATURE);
+}
+
+static inline bool
+_hb_glyph_info_is_mark (const hb_glyph_info_t *info)
+{
+ return !!(info->glyph_props() & HB_OT_LAYOUT_GLYPH_PROPS_MARK);
+}
+
+static inline bool
+_hb_glyph_info_substituted (const hb_glyph_info_t *info)
+{
+ return !!(info->glyph_props() & HB_OT_LAYOUT_GLYPH_PROPS_SUBSTITUTED);
+}
+
+static inline bool
+_hb_glyph_info_ligated (const hb_glyph_info_t *info)
+{
+ return !!(info->glyph_props() & HB_OT_LAYOUT_GLYPH_PROPS_LIGATED);
+}
+
+static inline bool
+_hb_glyph_info_multiplied (const hb_glyph_info_t *info)
+{
+ return !!(info->glyph_props() & HB_OT_LAYOUT_GLYPH_PROPS_MULTIPLIED);
+}
+
+static inline bool
+_hb_glyph_info_ligated_and_didnt_multiply (const hb_glyph_info_t *info)
+{
+ return _hb_glyph_info_ligated (info) && !_hb_glyph_info_multiplied (info);
+}
+
+static inline void
+_hb_glyph_info_clear_ligated_and_multiplied (hb_glyph_info_t *info)
+{
+ info->glyph_props() &= ~(HB_OT_LAYOUT_GLYPH_PROPS_LIGATED |
+ HB_OT_LAYOUT_GLYPH_PROPS_MULTIPLIED);
+}
+
+static inline void
+_hb_glyph_info_clear_substituted (hb_glyph_info_t *info)
+{
+ info->glyph_props() &= ~(HB_OT_LAYOUT_GLYPH_PROPS_SUBSTITUTED);
+}
+
+static inline void
+_hb_clear_substitution_flags (const hb_ot_shape_plan_t *plan HB_UNUSED,
+ hb_font_t *font HB_UNUSED,
+ hb_buffer_t *buffer)
+{
+ hb_glyph_info_t *info = buffer->info;
+ unsigned int count = buffer->len;
+ for (unsigned int i = 0; i < count; i++)
+ _hb_glyph_info_clear_substituted (&info[i]);
+}
+
+
+/* Allocation / deallocation. */
+
+static inline void
+_hb_buffer_allocate_unicode_vars (hb_buffer_t *buffer)
+{
+ HB_BUFFER_ALLOCATE_VAR (buffer, unicode_props);
+}
+
+static inline void
+_hb_buffer_deallocate_unicode_vars (hb_buffer_t *buffer)
+{
+ HB_BUFFER_DEALLOCATE_VAR (buffer, unicode_props);
+}
+
+static inline void
+_hb_buffer_assert_unicode_vars (hb_buffer_t *buffer)
+{
+ HB_BUFFER_ASSERT_VAR (buffer, unicode_props);
+}
+
+static inline void
+_hb_buffer_allocate_gsubgpos_vars (hb_buffer_t *buffer)
+{
+ HB_BUFFER_ALLOCATE_VAR (buffer, glyph_props);
+ HB_BUFFER_ALLOCATE_VAR (buffer, lig_props);
+ HB_BUFFER_ALLOCATE_VAR (buffer, syllable);
+}
+
+static inline void
+_hb_buffer_deallocate_gsubgpos_vars (hb_buffer_t *buffer)
+{
+ HB_BUFFER_DEALLOCATE_VAR (buffer, syllable);
+ HB_BUFFER_DEALLOCATE_VAR (buffer, lig_props);
+ HB_BUFFER_DEALLOCATE_VAR (buffer, glyph_props);
+}
+
+static inline void
+_hb_buffer_assert_gsubgpos_vars (hb_buffer_t *buffer)
+{
+ HB_BUFFER_ASSERT_VAR (buffer, glyph_props);
+ HB_BUFFER_ASSERT_VAR (buffer, lig_props);
+ HB_BUFFER_ASSERT_VAR (buffer, syllable);
+}
+
+/* Make sure no one directly touches our props... */
+#undef unicode_props0
+#undef unicode_props1
+#undef lig_props
+#undef glyph_props
+
+#endif /* HB_OT_LAYOUT_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-map.cc b/thirdparty/harfbuzz/src/hb-ot-map.cc
new file mode 100644
index 0000000000..e4bb4b6366
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-map.cc
@@ -0,0 +1,342 @@
+/*
+ * Copyright © 2009,2010 Red Hat, Inc.
+ * Copyright © 2010,2011,2013 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_OT_SHAPE
+
+#include "hb-ot-map.hh"
+#include "hb-ot-shape.hh"
+#include "hb-ot-layout.hh"
+
+
+void hb_ot_map_t::collect_lookups (unsigned int table_index, hb_set_t *lookups_out) const
+{
+ for (unsigned int i = 0; i < lookups[table_index].length; i++)
+ lookups_out->add (lookups[table_index][i].index);
+}
+
+
+hb_ot_map_builder_t::hb_ot_map_builder_t (hb_face_t *face_,
+ const hb_segment_properties_t *props_)
+{
+ memset (this, 0, sizeof (*this));
+
+ feature_infos.init ();
+ for (unsigned int table_index = 0; table_index < 2; table_index++)
+ stages[table_index].init ();
+
+ face = face_;
+ props = *props_;
+
+
+ /* Fetch script/language indices for GSUB/GPOS. We need these later to skip
+ * features not available in either table and not waste precious bits for them. */
+
+ unsigned int script_count = HB_OT_MAX_TAGS_PER_SCRIPT;
+ unsigned int language_count = HB_OT_MAX_TAGS_PER_LANGUAGE;
+ hb_tag_t script_tags[HB_OT_MAX_TAGS_PER_SCRIPT];
+ hb_tag_t language_tags[HB_OT_MAX_TAGS_PER_LANGUAGE];
+
+ hb_ot_tags_from_script_and_language (props.script, props.language, &script_count, script_tags, &language_count, language_tags);
+
+ for (unsigned int table_index = 0; table_index < 2; table_index++) {
+ hb_tag_t table_tag = table_tags[table_index];
+ found_script[table_index] = (bool) hb_ot_layout_table_select_script (face, table_tag, script_count, script_tags, &script_index[table_index], &chosen_script[table_index]);
+ hb_ot_layout_script_select_language (face, table_tag, script_index[table_index], language_count, language_tags, &language_index[table_index]);
+ }
+}
+
+hb_ot_map_builder_t::~hb_ot_map_builder_t ()
+{
+ feature_infos.fini ();
+ for (unsigned int table_index = 0; table_index < 2; table_index++)
+ stages[table_index].fini ();
+}
+
+void hb_ot_map_builder_t::add_feature (hb_tag_t tag,
+ hb_ot_map_feature_flags_t flags,
+ unsigned int value)
+{
+ if (unlikely (!tag)) return;
+ feature_info_t *info = feature_infos.push();
+ info->tag = tag;
+ info->seq = feature_infos.length;
+ info->max_value = value;
+ info->flags = flags;
+ info->default_value = (flags & F_GLOBAL) ? value : 0;
+ info->stage[0] = current_stage[0];
+ info->stage[1] = current_stage[1];
+}
+
+void
+hb_ot_map_builder_t::add_lookups (hb_ot_map_t &m,
+ unsigned int table_index,
+ unsigned int feature_index,
+ unsigned int variations_index,
+ hb_mask_t mask,
+ bool auto_zwnj,
+ bool auto_zwj,
+ bool random)
+{
+ unsigned int lookup_indices[32];
+ unsigned int offset, len;
+ unsigned int table_lookup_count;
+
+ table_lookup_count = hb_ot_layout_table_get_lookup_count (face, table_tags[table_index]);
+
+ offset = 0;
+ do {
+ len = ARRAY_LENGTH (lookup_indices);
+ hb_ot_layout_feature_with_variations_get_lookups (face,
+ table_tags[table_index],
+ feature_index,
+ variations_index,
+ offset, &len,
+ lookup_indices);
+
+ for (unsigned int i = 0; i < len; i++)
+ {
+ if (lookup_indices[i] >= table_lookup_count)
+ continue;
+ hb_ot_map_t::lookup_map_t *lookup = m.lookups[table_index].push ();
+ lookup->mask = mask;
+ lookup->index = lookup_indices[i];
+ lookup->auto_zwnj = auto_zwnj;
+ lookup->auto_zwj = auto_zwj;
+ lookup->random = random;
+ }
+
+ offset += len;
+ } while (len == ARRAY_LENGTH (lookup_indices));
+}
+
+
+void hb_ot_map_builder_t::add_pause (unsigned int table_index, hb_ot_map_t::pause_func_t pause_func)
+{
+ stage_info_t *s = stages[table_index].push ();
+ s->index = current_stage[table_index];
+ s->pause_func = pause_func;
+
+ current_stage[table_index]++;
+}
+
+void
+hb_ot_map_builder_t::compile (hb_ot_map_t &m,
+ const hb_ot_shape_plan_key_t &key)
+{
+ static_assert ((!(HB_GLYPH_FLAG_DEFINED & (HB_GLYPH_FLAG_DEFINED + 1))), "");
+ unsigned int global_bit_mask = HB_GLYPH_FLAG_DEFINED + 1;
+ unsigned int global_bit_shift = hb_popcount (HB_GLYPH_FLAG_DEFINED);
+
+ m.global_mask = global_bit_mask;
+
+ unsigned int required_feature_index[2];
+ hb_tag_t required_feature_tag[2];
+ /* We default to applying required feature in stage 0. If the required
+ * feature has a tag that is known to the shaper, we apply required feature
+ * in the stage for that tag.
+ */
+ unsigned int required_feature_stage[2] = {0, 0};
+
+ for (unsigned int table_index = 0; table_index < 2; table_index++)
+ {
+ m.chosen_script[table_index] = chosen_script[table_index];
+ m.found_script[table_index] = found_script[table_index];
+
+ hb_ot_layout_language_get_required_feature (face,
+ table_tags[table_index],
+ script_index[table_index],
+ language_index[table_index],
+ &required_feature_index[table_index],
+ &required_feature_tag[table_index]);
+ }
+
+ /* Sort features and merge duplicates */
+ if (feature_infos.length)
+ {
+ feature_infos.qsort ();
+ unsigned int j = 0;
+ for (unsigned int i = 1; i < feature_infos.length; i++)
+ if (feature_infos[i].tag != feature_infos[j].tag)
+ feature_infos[++j] = feature_infos[i];
+ else {
+ if (feature_infos[i].flags & F_GLOBAL) {
+ feature_infos[j].flags |= F_GLOBAL;
+ feature_infos[j].max_value = feature_infos[i].max_value;
+ feature_infos[j].default_value = feature_infos[i].default_value;
+ } else {
+ if (feature_infos[j].flags & F_GLOBAL)
+ feature_infos[j].flags ^= F_GLOBAL;
+ feature_infos[j].max_value = hb_max (feature_infos[j].max_value, feature_infos[i].max_value);
+ /* Inherit default_value from j */
+ }
+ feature_infos[j].flags |= (feature_infos[i].flags & F_HAS_FALLBACK);
+ feature_infos[j].stage[0] = hb_min (feature_infos[j].stage[0], feature_infos[i].stage[0]);
+ feature_infos[j].stage[1] = hb_min (feature_infos[j].stage[1], feature_infos[i].stage[1]);
+ }
+ feature_infos.shrink (j + 1);
+ }
+
+
+ /* Allocate bits now */
+ unsigned int next_bit = global_bit_shift + 1;
+
+ for (unsigned int i = 0; i < feature_infos.length; i++)
+ {
+ const feature_info_t *info = &feature_infos[i];
+
+ unsigned int bits_needed;
+
+ if ((info->flags & F_GLOBAL) && info->max_value == 1)
+ /* Uses the global bit */
+ bits_needed = 0;
+ else
+ /* Limit bits per feature. */
+ bits_needed = hb_min (HB_OT_MAP_MAX_BITS, hb_bit_storage (info->max_value));
+
+ if (!info->max_value || next_bit + bits_needed > 8 * sizeof (hb_mask_t))
+ continue; /* Feature disabled, or not enough bits. */
+
+
+ bool found = false;
+ unsigned int feature_index[2];
+ for (unsigned int table_index = 0; table_index < 2; table_index++)
+ {
+ if (required_feature_tag[table_index] == info->tag)
+ required_feature_stage[table_index] = info->stage[table_index];
+
+ found |= (bool) hb_ot_layout_language_find_feature (face,
+ table_tags[table_index],
+ script_index[table_index],
+ language_index[table_index],
+ info->tag,
+ &feature_index[table_index]);
+ }
+ if (!found && (info->flags & F_GLOBAL_SEARCH))
+ {
+ for (unsigned int table_index = 0; table_index < 2; table_index++)
+ {
+ found |= (bool) hb_ot_layout_table_find_feature (face,
+ table_tags[table_index],
+ info->tag,
+ &feature_index[table_index]);
+ }
+ }
+ if (!found && !(info->flags & F_HAS_FALLBACK))
+ continue;
+
+
+ hb_ot_map_t::feature_map_t *map = m.features.push ();
+
+ map->tag = info->tag;
+ map->index[0] = feature_index[0];
+ map->index[1] = feature_index[1];
+ map->stage[0] = info->stage[0];
+ map->stage[1] = info->stage[1];
+ map->auto_zwnj = !(info->flags & F_MANUAL_ZWNJ);
+ map->auto_zwj = !(info->flags & F_MANUAL_ZWJ);
+ map->random = !!(info->flags & F_RANDOM);
+ if ((info->flags & F_GLOBAL) && info->max_value == 1) {
+ /* Uses the global bit */
+ map->shift = global_bit_shift;
+ map->mask = global_bit_mask;
+ } else {
+ map->shift = next_bit;
+ map->mask = (1u << (next_bit + bits_needed)) - (1u << next_bit);
+ next_bit += bits_needed;
+ m.global_mask |= (info->default_value << map->shift) & map->mask;
+ }
+ map->_1_mask = (1u << map->shift) & map->mask;
+ map->needs_fallback = !found;
+
+ }
+ feature_infos.shrink (0); /* Done with these */
+
+
+ add_gsub_pause (nullptr);
+ add_gpos_pause (nullptr);
+
+ for (unsigned int table_index = 0; table_index < 2; table_index++)
+ {
+ /* Collect lookup indices for features */
+
+ unsigned int stage_index = 0;
+ unsigned int last_num_lookups = 0;
+ for (unsigned stage = 0; stage < current_stage[table_index]; stage++)
+ {
+ if (required_feature_index[table_index] != HB_OT_LAYOUT_NO_FEATURE_INDEX &&
+ required_feature_stage[table_index] == stage)
+ add_lookups (m, table_index,
+ required_feature_index[table_index],
+ key.variations_index[table_index],
+ global_bit_mask);
+
+ for (unsigned i = 0; i < m.features.length; i++)
+ if (m.features[i].stage[table_index] == stage)
+ add_lookups (m, table_index,
+ m.features[i].index[table_index],
+ key.variations_index[table_index],
+ m.features[i].mask,
+ m.features[i].auto_zwnj,
+ m.features[i].auto_zwj,
+ m.features[i].random);
+
+ /* Sort lookups and merge duplicates */
+ if (last_num_lookups < m.lookups[table_index].length)
+ {
+ m.lookups[table_index].qsort (last_num_lookups, m.lookups[table_index].length);
+
+ unsigned int j = last_num_lookups;
+ for (unsigned int i = j + 1; i < m.lookups[table_index].length; i++)
+ if (m.lookups[table_index][i].index != m.lookups[table_index][j].index)
+ m.lookups[table_index][++j] = m.lookups[table_index][i];
+ else
+ {
+ m.lookups[table_index][j].mask |= m.lookups[table_index][i].mask;
+ m.lookups[table_index][j].auto_zwnj &= m.lookups[table_index][i].auto_zwnj;
+ m.lookups[table_index][j].auto_zwj &= m.lookups[table_index][i].auto_zwj;
+ }
+ m.lookups[table_index].shrink (j + 1);
+ }
+
+ last_num_lookups = m.lookups[table_index].length;
+
+ if (stage_index < stages[table_index].length && stages[table_index][stage_index].index == stage) {
+ hb_ot_map_t::stage_map_t *stage_map = m.stages[table_index].push ();
+ stage_map->last_lookup = last_num_lookups;
+ stage_map->pause_func = stages[table_index][stage_index].pause_func;
+
+ stage_index++;
+ }
+ }
+ }
+}
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-ot-map.hh b/thirdparty/harfbuzz/src/hb-ot-map.hh
new file mode 100644
index 0000000000..7629a869bd
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-map.hh
@@ -0,0 +1,284 @@
+/*
+ * Copyright © 2009,2010 Red Hat, Inc.
+ * Copyright © 2010,2011,2012,2013 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_MAP_HH
+#define HB_OT_MAP_HH
+
+#include "hb-buffer.hh"
+
+
+#define HB_OT_MAP_MAX_BITS 8u
+#define HB_OT_MAP_MAX_VALUE ((1u << HB_OT_MAP_MAX_BITS) - 1u)
+
+struct hb_ot_shape_plan_t;
+
+static const hb_tag_t table_tags[2] = {HB_OT_TAG_GSUB, HB_OT_TAG_GPOS};
+
+struct hb_ot_map_t
+{
+ friend struct hb_ot_map_builder_t;
+
+ public:
+
+ struct feature_map_t {
+ hb_tag_t tag; /* should be first for our bsearch to work */
+ unsigned int index[2]; /* GSUB/GPOS */
+ unsigned int stage[2]; /* GSUB/GPOS */
+ unsigned int shift;
+ hb_mask_t mask;
+ hb_mask_t _1_mask; /* mask for value=1, for quick access */
+ unsigned int needs_fallback : 1;
+ unsigned int auto_zwnj : 1;
+ unsigned int auto_zwj : 1;
+ unsigned int random : 1;
+
+ int cmp (const hb_tag_t tag_) const
+ { return tag_ < tag ? -1 : tag_ > tag ? 1 : 0; }
+ };
+
+ struct lookup_map_t {
+ unsigned short index;
+ unsigned short auto_zwnj : 1;
+ unsigned short auto_zwj : 1;
+ unsigned short random : 1;
+ hb_mask_t mask;
+
+ HB_INTERNAL static int cmp (const void *pa, const void *pb)
+ {
+ const lookup_map_t *a = (const lookup_map_t *) pa;
+ const lookup_map_t *b = (const lookup_map_t *) pb;
+ return a->index < b->index ? -1 : a->index > b->index ? 1 : 0;
+ }
+ };
+
+ typedef void (*pause_func_t) (const struct hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer);
+
+ struct stage_map_t {
+ unsigned int last_lookup; /* Cumulative */
+ pause_func_t pause_func;
+ };
+
+ void init ()
+ {
+ memset (this, 0, sizeof (*this));
+
+ features.init ();
+ for (unsigned int table_index = 0; table_index < 2; table_index++)
+ {
+ lookups[table_index].init ();
+ stages[table_index].init ();
+ }
+ }
+ void fini ()
+ {
+ features.fini ();
+ for (unsigned int table_index = 0; table_index < 2; table_index++)
+ {
+ lookups[table_index].fini ();
+ stages[table_index].fini ();
+ }
+ }
+
+ hb_mask_t get_global_mask () const { return global_mask; }
+
+ hb_mask_t get_mask (hb_tag_t feature_tag, unsigned int *shift = nullptr) const
+ {
+ const feature_map_t *map = features.bsearch (feature_tag);
+ if (shift) *shift = map ? map->shift : 0;
+ return map ? map->mask : 0;
+ }
+
+ bool needs_fallback (hb_tag_t feature_tag) const
+ {
+ const feature_map_t *map = features.bsearch (feature_tag);
+ return map ? map->needs_fallback : false;
+ }
+
+ hb_mask_t get_1_mask (hb_tag_t feature_tag) const
+ {
+ const feature_map_t *map = features.bsearch (feature_tag);
+ return map ? map->_1_mask : 0;
+ }
+
+ unsigned int get_feature_index (unsigned int table_index, hb_tag_t feature_tag) const
+ {
+ const feature_map_t *map = features.bsearch (feature_tag);
+ return map ? map->index[table_index] : HB_OT_LAYOUT_NO_FEATURE_INDEX;
+ }
+
+ unsigned int get_feature_stage (unsigned int table_index, hb_tag_t feature_tag) const
+ {
+ const feature_map_t *map = features.bsearch (feature_tag);
+ return map ? map->stage[table_index] : UINT_MAX;
+ }
+
+ void get_stage_lookups (unsigned int table_index, unsigned int stage,
+ const struct lookup_map_t **plookups, unsigned int *lookup_count) const
+ {
+ if (unlikely (stage == UINT_MAX)) {
+ *plookups = nullptr;
+ *lookup_count = 0;
+ return;
+ }
+ assert (stage <= stages[table_index].length);
+ unsigned int start = stage ? stages[table_index][stage - 1].last_lookup : 0;
+ unsigned int end = stage < stages[table_index].length ? stages[table_index][stage].last_lookup : lookups[table_index].length;
+ *plookups = end == start ? nullptr : &lookups[table_index][start];
+ *lookup_count = end - start;
+ }
+
+ HB_INTERNAL void collect_lookups (unsigned int table_index, hb_set_t *lookups) const;
+ template <typename Proxy>
+ HB_INTERNAL void apply (const Proxy &proxy,
+ const struct hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer) const;
+ HB_INTERNAL void substitute (const struct hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer) const;
+ HB_INTERNAL void position (const struct hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer) const;
+
+ public:
+ hb_tag_t chosen_script[2];
+ bool found_script[2];
+
+ private:
+
+ hb_mask_t global_mask;
+
+ hb_sorted_vector_t<feature_map_t> features;
+ hb_vector_t<lookup_map_t> lookups[2]; /* GSUB/GPOS */
+ hb_vector_t<stage_map_t> stages[2]; /* GSUB/GPOS */
+};
+
+enum hb_ot_map_feature_flags_t
+{
+ F_NONE = 0x0000u,
+ F_GLOBAL = 0x0001u, /* Feature applies to all characters; results in no mask allocated for it. */
+ F_HAS_FALLBACK = 0x0002u, /* Has fallback implementation, so include mask bit even if feature not found. */
+ F_MANUAL_ZWNJ = 0x0004u, /* Don't skip over ZWNJ when matching **context**. */
+ F_MANUAL_ZWJ = 0x0008u, /* Don't skip over ZWJ when matching **input**. */
+ F_MANUAL_JOINERS = F_MANUAL_ZWNJ | F_MANUAL_ZWJ,
+ F_GLOBAL_MANUAL_JOINERS= F_GLOBAL | F_MANUAL_JOINERS,
+ F_GLOBAL_HAS_FALLBACK = F_GLOBAL | F_HAS_FALLBACK,
+ F_GLOBAL_SEARCH = 0x0010u, /* If feature not found in LangSys, look for it in global feature list and pick one. */
+ F_RANDOM = 0x0020u /* Randomly select a glyph from an AlternateSubstFormat1 subtable. */
+};
+HB_MARK_AS_FLAG_T (hb_ot_map_feature_flags_t);
+
+
+struct hb_ot_map_feature_t
+{
+ hb_tag_t tag;
+ hb_ot_map_feature_flags_t flags;
+};
+
+struct hb_ot_shape_plan_key_t;
+
+struct hb_ot_map_builder_t
+{
+ public:
+
+ HB_INTERNAL hb_ot_map_builder_t (hb_face_t *face_,
+ const hb_segment_properties_t *props_);
+
+ HB_INTERNAL ~hb_ot_map_builder_t ();
+
+ HB_INTERNAL void add_feature (hb_tag_t tag,
+ hb_ot_map_feature_flags_t flags=F_NONE,
+ unsigned int value=1);
+
+ void add_feature (const hb_ot_map_feature_t &feat)
+ { add_feature (feat.tag, feat.flags); }
+
+ void enable_feature (hb_tag_t tag,
+ hb_ot_map_feature_flags_t flags=F_NONE,
+ unsigned int value=1)
+ { add_feature (tag, F_GLOBAL | flags, value); }
+
+ void disable_feature (hb_tag_t tag)
+ { add_feature (tag, F_GLOBAL, 0); }
+
+ void add_gsub_pause (hb_ot_map_t::pause_func_t pause_func)
+ { add_pause (0, pause_func); }
+ void add_gpos_pause (hb_ot_map_t::pause_func_t pause_func)
+ { add_pause (1, pause_func); }
+
+ HB_INTERNAL void compile (hb_ot_map_t &m,
+ const hb_ot_shape_plan_key_t &key);
+
+ private:
+
+ HB_INTERNAL void add_lookups (hb_ot_map_t &m,
+ unsigned int table_index,
+ unsigned int feature_index,
+ unsigned int variations_index,
+ hb_mask_t mask,
+ bool auto_zwnj = true,
+ bool auto_zwj = true,
+ bool random = false);
+
+ struct feature_info_t {
+ hb_tag_t tag;
+ unsigned int seq; /* sequence#, used for stable sorting only */
+ unsigned int max_value;
+ hb_ot_map_feature_flags_t flags;
+ unsigned int default_value; /* for non-global features, what should the unset glyphs take */
+ unsigned int stage[2]; /* GSUB/GPOS */
+
+ HB_INTERNAL static int cmp (const void *pa, const void *pb)
+ {
+ const feature_info_t *a = (const feature_info_t *) pa;
+ const feature_info_t *b = (const feature_info_t *) pb;
+ return (a->tag != b->tag) ? (a->tag < b->tag ? -1 : 1) :
+ (a->seq < b->seq ? -1 : a->seq > b->seq ? 1 : 0);
+ }
+ };
+
+ struct stage_info_t {
+ unsigned int index;
+ hb_ot_map_t::pause_func_t pause_func;
+ };
+
+ HB_INTERNAL void add_pause (unsigned int table_index, hb_ot_map_t::pause_func_t pause_func);
+
+ public:
+
+ hb_face_t *face;
+ hb_segment_properties_t props;
+
+ hb_tag_t chosen_script[2];
+ bool found_script[2];
+ unsigned int script_index[2], language_index[2];
+
+ private:
+
+ unsigned int current_stage[2]; /* GSUB/GPOS */
+ hb_vector_t<feature_info_t> feature_infos;
+ hb_vector_t<stage_info_t> stages[2]; /* GSUB/GPOS */
+};
+
+
+
+#endif /* HB_OT_MAP_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-math-table.hh b/thirdparty/harfbuzz/src/hb-ot-math-table.hh
new file mode 100644
index 0000000000..26aa080603
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-math-table.hh
@@ -0,0 +1,728 @@
+/*
+ * Copyright © 2016 Igalia S.L.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Igalia Author(s): Frédéric Wang
+ */
+
+#ifndef HB_OT_MATH_TABLE_HH
+#define HB_OT_MATH_TABLE_HH
+
+#include "hb-open-type.hh"
+#include "hb-ot-layout-common.hh"
+#include "hb-ot-math.h"
+
+namespace OT {
+
+
+struct MathValueRecord
+{
+ hb_position_t get_x_value (hb_font_t *font, const void *base) const
+ { return font->em_scale_x (value) + (base+deviceTable).get_x_delta (font); }
+ hb_position_t get_y_value (hb_font_t *font, const void *base) const
+ { return font->em_scale_y (value) + (base+deviceTable).get_y_delta (font); }
+
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) && deviceTable.sanitize (c, base));
+ }
+
+ protected:
+ HBINT16 value; /* The X or Y value in design units */
+ OffsetTo<Device> deviceTable; /* Offset to the device table - from the
+ * beginning of parent table. May be NULL.
+ * Suggested format for device table is 1. */
+
+ public:
+ DEFINE_SIZE_STATIC (4);
+};
+
+struct MathConstants
+{
+ bool sanitize_math_value_records (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+
+ unsigned int count = ARRAY_LENGTH (mathValueRecords);
+ for (unsigned int i = 0; i < count; i++)
+ if (!mathValueRecords[i].sanitize (c, this))
+ return_trace (false);
+
+ return_trace (true);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) && sanitize_math_value_records (c));
+ }
+
+ hb_position_t get_value (hb_ot_math_constant_t constant,
+ hb_font_t *font) const
+ {
+ switch (constant) {
+
+ case HB_OT_MATH_CONSTANT_SCRIPT_PERCENT_SCALE_DOWN:
+ case HB_OT_MATH_CONSTANT_SCRIPT_SCRIPT_PERCENT_SCALE_DOWN:
+ return percentScaleDown[constant - HB_OT_MATH_CONSTANT_SCRIPT_PERCENT_SCALE_DOWN];
+
+ case HB_OT_MATH_CONSTANT_DELIMITED_SUB_FORMULA_MIN_HEIGHT:
+ case HB_OT_MATH_CONSTANT_DISPLAY_OPERATOR_MIN_HEIGHT:
+ return font->em_scale_y (minHeight[constant - HB_OT_MATH_CONSTANT_DELIMITED_SUB_FORMULA_MIN_HEIGHT]);
+
+ case HB_OT_MATH_CONSTANT_RADICAL_KERN_AFTER_DEGREE:
+ case HB_OT_MATH_CONSTANT_RADICAL_KERN_BEFORE_DEGREE:
+ case HB_OT_MATH_CONSTANT_SKEWED_FRACTION_HORIZONTAL_GAP:
+ case HB_OT_MATH_CONSTANT_SPACE_AFTER_SCRIPT:
+ return mathValueRecords[constant - HB_OT_MATH_CONSTANT_MATH_LEADING].get_x_value (font, this);
+
+ case HB_OT_MATH_CONSTANT_ACCENT_BASE_HEIGHT:
+ case HB_OT_MATH_CONSTANT_AXIS_HEIGHT:
+ case HB_OT_MATH_CONSTANT_FLATTENED_ACCENT_BASE_HEIGHT:
+ case HB_OT_MATH_CONSTANT_FRACTION_DENOMINATOR_DISPLAY_STYLE_SHIFT_DOWN:
+ case HB_OT_MATH_CONSTANT_FRACTION_DENOMINATOR_GAP_MIN:
+ case HB_OT_MATH_CONSTANT_FRACTION_DENOMINATOR_SHIFT_DOWN:
+ case HB_OT_MATH_CONSTANT_FRACTION_DENOM_DISPLAY_STYLE_GAP_MIN:
+ case HB_OT_MATH_CONSTANT_FRACTION_NUMERATOR_DISPLAY_STYLE_SHIFT_UP:
+ case HB_OT_MATH_CONSTANT_FRACTION_NUMERATOR_GAP_MIN:
+ case HB_OT_MATH_CONSTANT_FRACTION_NUMERATOR_SHIFT_UP:
+ case HB_OT_MATH_CONSTANT_FRACTION_NUM_DISPLAY_STYLE_GAP_MIN:
+ case HB_OT_MATH_CONSTANT_FRACTION_RULE_THICKNESS:
+ case HB_OT_MATH_CONSTANT_LOWER_LIMIT_BASELINE_DROP_MIN:
+ case HB_OT_MATH_CONSTANT_LOWER_LIMIT_GAP_MIN:
+ case HB_OT_MATH_CONSTANT_MATH_LEADING:
+ case HB_OT_MATH_CONSTANT_OVERBAR_EXTRA_ASCENDER:
+ case HB_OT_MATH_CONSTANT_OVERBAR_RULE_THICKNESS:
+ case HB_OT_MATH_CONSTANT_OVERBAR_VERTICAL_GAP:
+ case HB_OT_MATH_CONSTANT_RADICAL_DISPLAY_STYLE_VERTICAL_GAP:
+ case HB_OT_MATH_CONSTANT_RADICAL_EXTRA_ASCENDER:
+ case HB_OT_MATH_CONSTANT_RADICAL_RULE_THICKNESS:
+ case HB_OT_MATH_CONSTANT_RADICAL_VERTICAL_GAP:
+ case HB_OT_MATH_CONSTANT_SKEWED_FRACTION_VERTICAL_GAP:
+ case HB_OT_MATH_CONSTANT_STACK_BOTTOM_DISPLAY_STYLE_SHIFT_DOWN:
+ case HB_OT_MATH_CONSTANT_STACK_BOTTOM_SHIFT_DOWN:
+ case HB_OT_MATH_CONSTANT_STACK_DISPLAY_STYLE_GAP_MIN:
+ case HB_OT_MATH_CONSTANT_STACK_GAP_MIN:
+ case HB_OT_MATH_CONSTANT_STACK_TOP_DISPLAY_STYLE_SHIFT_UP:
+ case HB_OT_MATH_CONSTANT_STACK_TOP_SHIFT_UP:
+ case HB_OT_MATH_CONSTANT_STRETCH_STACK_BOTTOM_SHIFT_DOWN:
+ case HB_OT_MATH_CONSTANT_STRETCH_STACK_GAP_ABOVE_MIN:
+ case HB_OT_MATH_CONSTANT_STRETCH_STACK_GAP_BELOW_MIN:
+ case HB_OT_MATH_CONSTANT_STRETCH_STACK_TOP_SHIFT_UP:
+ case HB_OT_MATH_CONSTANT_SUBSCRIPT_BASELINE_DROP_MIN:
+ case HB_OT_MATH_CONSTANT_SUBSCRIPT_SHIFT_DOWN:
+ case HB_OT_MATH_CONSTANT_SUBSCRIPT_TOP_MAX:
+ case HB_OT_MATH_CONSTANT_SUB_SUPERSCRIPT_GAP_MIN:
+ case HB_OT_MATH_CONSTANT_SUPERSCRIPT_BASELINE_DROP_MAX:
+ case HB_OT_MATH_CONSTANT_SUPERSCRIPT_BOTTOM_MAX_WITH_SUBSCRIPT:
+ case HB_OT_MATH_CONSTANT_SUPERSCRIPT_BOTTOM_MIN:
+ case HB_OT_MATH_CONSTANT_SUPERSCRIPT_SHIFT_UP:
+ case HB_OT_MATH_CONSTANT_SUPERSCRIPT_SHIFT_UP_CRAMPED:
+ case HB_OT_MATH_CONSTANT_UNDERBAR_EXTRA_DESCENDER:
+ case HB_OT_MATH_CONSTANT_UNDERBAR_RULE_THICKNESS:
+ case HB_OT_MATH_CONSTANT_UNDERBAR_VERTICAL_GAP:
+ case HB_OT_MATH_CONSTANT_UPPER_LIMIT_BASELINE_RISE_MIN:
+ case HB_OT_MATH_CONSTANT_UPPER_LIMIT_GAP_MIN:
+ return mathValueRecords[constant - HB_OT_MATH_CONSTANT_MATH_LEADING].get_y_value (font, this);
+
+ case HB_OT_MATH_CONSTANT_RADICAL_DEGREE_BOTTOM_RAISE_PERCENT:
+ return radicalDegreeBottomRaisePercent;
+
+ default:
+ return 0;
+ }
+ }
+
+ protected:
+ HBINT16 percentScaleDown[2];
+ HBUINT16 minHeight[2];
+ MathValueRecord mathValueRecords[51];
+ HBINT16 radicalDegreeBottomRaisePercent;
+
+ public:
+ DEFINE_SIZE_STATIC (214);
+};
+
+struct MathItalicsCorrectionInfo
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ coverage.sanitize (c, this) &&
+ italicsCorrection.sanitize (c, this));
+ }
+
+ hb_position_t get_value (hb_codepoint_t glyph,
+ hb_font_t *font) const
+ {
+ unsigned int index = (this+coverage).get_coverage (glyph);
+ return italicsCorrection[index].get_x_value (font, this);
+ }
+
+ protected:
+ OffsetTo<Coverage> coverage; /* Offset to Coverage table -
+ * from the beginning of
+ * MathItalicsCorrectionInfo
+ * table. */
+ ArrayOf<MathValueRecord> italicsCorrection; /* Array of MathValueRecords
+ * defining italics correction
+ * values for each
+ * covered glyph. */
+
+ public:
+ DEFINE_SIZE_ARRAY (4, italicsCorrection);
+};
+
+struct MathTopAccentAttachment
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ topAccentCoverage.sanitize (c, this) &&
+ topAccentAttachment.sanitize (c, this));
+ }
+
+ hb_position_t get_value (hb_codepoint_t glyph,
+ hb_font_t *font) const
+ {
+ unsigned int index = (this+topAccentCoverage).get_coverage (glyph);
+ if (index == NOT_COVERED)
+ return font->get_glyph_h_advance (glyph) / 2;
+ return topAccentAttachment[index].get_x_value (font, this);
+ }
+
+ protected:
+ OffsetTo<Coverage> topAccentCoverage; /* Offset to Coverage table -
+ * from the beginning of
+ * MathTopAccentAttachment
+ * table. */
+ ArrayOf<MathValueRecord> topAccentAttachment; /* Array of MathValueRecords
+ * defining top accent
+ * attachment points for each
+ * covered glyph. */
+
+ public:
+ DEFINE_SIZE_ARRAY (2 + 2, topAccentAttachment);
+};
+
+struct MathKern
+{
+ bool sanitize_math_value_records (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ unsigned int count = 2 * heightCount + 1;
+ for (unsigned int i = 0; i < count; i++)
+ if (!mathValueRecordsZ.arrayZ[i].sanitize (c, this)) return_trace (false);
+ return_trace (true);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ c->check_array (mathValueRecordsZ.arrayZ, 2 * heightCount + 1) &&
+ sanitize_math_value_records (c));
+ }
+
+ hb_position_t get_value (hb_position_t correction_height, hb_font_t *font) const
+ {
+ const MathValueRecord* correctionHeight = mathValueRecordsZ.arrayZ;
+ const MathValueRecord* kernValue = mathValueRecordsZ.arrayZ + heightCount;
+ int sign = font->y_scale < 0 ? -1 : +1;
+
+ /* The description of the MathKern table is a ambiguous, but interpreting
+ * "between the two heights found at those indexes" for 0 < i < len as
+ *
+ * correctionHeight[i-1] < correction_height <= correctionHeight[i]
+ *
+ * makes the result consistent with the limit cases and we can just use the
+ * binary search algorithm of std::upper_bound:
+ */
+ unsigned int i = 0;
+ unsigned int count = heightCount;
+ while (count > 0)
+ {
+ unsigned int half = count / 2;
+ hb_position_t height = correctionHeight[i + half].get_y_value (font, this);
+ if (sign * height < sign * correction_height)
+ {
+ i += half + 1;
+ count -= half + 1;
+ } else
+ count = half;
+ }
+ return kernValue[i].get_x_value (font, this);
+ }
+
+ protected:
+ HBUINT16 heightCount;
+ UnsizedArrayOf<MathValueRecord>
+ mathValueRecordsZ;
+ /* Array of correction heights at
+ * which the kern value changes.
+ * Sorted by the height value in
+ * design units (heightCount entries),
+ * Followed by:
+ * Array of kern values corresponding
+ * to heights. (heightCount+1 entries).
+ */
+
+ public:
+ DEFINE_SIZE_ARRAY (2, mathValueRecordsZ);
+};
+
+struct MathKernInfoRecord
+{
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+
+ unsigned int count = ARRAY_LENGTH (mathKern);
+ for (unsigned int i = 0; i < count; i++)
+ if (unlikely (!mathKern[i].sanitize (c, base)))
+ return_trace (false);
+
+ return_trace (true);
+ }
+
+ hb_position_t get_kerning (hb_ot_math_kern_t kern,
+ hb_position_t correction_height,
+ hb_font_t *font,
+ const void *base) const
+ {
+ unsigned int idx = kern;
+ if (unlikely (idx >= ARRAY_LENGTH (mathKern))) return 0;
+ return (base+mathKern[idx]).get_value (correction_height, font);
+ }
+
+ protected:
+ /* Offset to MathKern table for each corner -
+ * from the beginning of MathKernInfo table. May be NULL. */
+ OffsetTo<MathKern> mathKern[4];
+
+ public:
+ DEFINE_SIZE_STATIC (8);
+};
+
+struct MathKernInfo
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ mathKernCoverage.sanitize (c, this) &&
+ mathKernInfoRecords.sanitize (c, this));
+ }
+
+ hb_position_t get_kerning (hb_codepoint_t glyph,
+ hb_ot_math_kern_t kern,
+ hb_position_t correction_height,
+ hb_font_t *font) const
+ {
+ unsigned int index = (this+mathKernCoverage).get_coverage (glyph);
+ return mathKernInfoRecords[index].get_kerning (kern, correction_height, font, this);
+ }
+
+ protected:
+ OffsetTo<Coverage>
+ mathKernCoverage;
+ /* Offset to Coverage table -
+ * from the beginning of the
+ * MathKernInfo table. */
+ ArrayOf<MathKernInfoRecord>
+ mathKernInfoRecords;
+ /* Array of MathKernInfoRecords,
+ * per-glyph information for
+ * mathematical positioning
+ * of subscripts and
+ * superscripts. */
+
+ public:
+ DEFINE_SIZE_ARRAY (4, mathKernInfoRecords);
+};
+
+struct MathGlyphInfo
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ mathItalicsCorrectionInfo.sanitize (c, this) &&
+ mathTopAccentAttachment.sanitize (c, this) &&
+ extendedShapeCoverage.sanitize (c, this) &&
+ mathKernInfo.sanitize (c, this));
+ }
+
+ hb_position_t
+ get_italics_correction (hb_codepoint_t glyph, hb_font_t *font) const
+ { return (this+mathItalicsCorrectionInfo).get_value (glyph, font); }
+
+ hb_position_t
+ get_top_accent_attachment (hb_codepoint_t glyph, hb_font_t *font) const
+ { return (this+mathTopAccentAttachment).get_value (glyph, font); }
+
+ bool is_extended_shape (hb_codepoint_t glyph) const
+ { return (this+extendedShapeCoverage).get_coverage (glyph) != NOT_COVERED; }
+
+ hb_position_t get_kerning (hb_codepoint_t glyph,
+ hb_ot_math_kern_t kern,
+ hb_position_t correction_height,
+ hb_font_t *font) const
+ { return (this+mathKernInfo).get_kerning (glyph, kern, correction_height, font); }
+
+ protected:
+ /* Offset to MathItalicsCorrectionInfo table -
+ * from the beginning of MathGlyphInfo table. */
+ OffsetTo<MathItalicsCorrectionInfo> mathItalicsCorrectionInfo;
+
+ /* Offset to MathTopAccentAttachment table -
+ * from the beginning of MathGlyphInfo table. */
+ OffsetTo<MathTopAccentAttachment> mathTopAccentAttachment;
+
+ /* Offset to coverage table for Extended Shape glyphs -
+ * from the beginning of MathGlyphInfo table. When the left or right glyph of
+ * a box is an extended shape variant, the (ink) box (and not the default
+ * position defined by values in MathConstants table) should be used for
+ * vertical positioning purposes. May be NULL.. */
+ OffsetTo<Coverage> extendedShapeCoverage;
+
+ /* Offset to MathKernInfo table -
+ * from the beginning of MathGlyphInfo table. */
+ OffsetTo<MathKernInfo> mathKernInfo;
+
+ public:
+ DEFINE_SIZE_STATIC (8);
+};
+
+struct MathGlyphVariantRecord
+{
+ friend struct MathGlyphConstruction;
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ protected:
+ HBGlyphID variantGlyph; /* Glyph ID for the variant. */
+ HBUINT16 advanceMeasurement; /* Advance width/height, in design units, of the
+ * variant, in the direction of requested
+ * glyph extension. */
+
+ public:
+ DEFINE_SIZE_STATIC (4);
+};
+
+struct PartFlags : HBUINT16
+{
+ enum Flags {
+ Extender = 0x0001u, /* If set, the part can be skipped or repeated. */
+
+ Defined = 0x0001u, /* All defined flags. */
+ };
+
+ public:
+ DEFINE_SIZE_STATIC (2);
+};
+
+struct MathGlyphPartRecord
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ void extract (hb_ot_math_glyph_part_t &out,
+ int64_t mult,
+ hb_font_t *font) const
+ {
+ out.glyph = glyph;
+
+ out.start_connector_length = font->em_mult (startConnectorLength, mult);
+ out.end_connector_length = font->em_mult (endConnectorLength, mult);
+ out.full_advance = font->em_mult (fullAdvance, mult);
+
+ static_assert ((unsigned int) HB_OT_MATH_GLYPH_PART_FLAG_EXTENDER ==
+ (unsigned int) PartFlags::Extender, "");
+
+ out.flags = (hb_ot_math_glyph_part_flags_t)
+ (unsigned int)
+ (partFlags & PartFlags::Defined);
+ }
+
+ protected:
+ HBGlyphID glyph; /* Glyph ID for the part. */
+ HBUINT16 startConnectorLength;
+ /* Advance width/ height of the straight bar
+ * connector material, in design units, is at
+ * the beginning of the glyph, in the
+ * direction of the extension. */
+ HBUINT16 endConnectorLength;
+ /* Advance width/ height of the straight bar
+ * connector material, in design units, is at
+ * the end of the glyph, in the direction of
+ * the extension. */
+ HBUINT16 fullAdvance; /* Full advance width/height for this part,
+ * in the direction of the extension.
+ * In design units. */
+ PartFlags partFlags; /* Part qualifiers. */
+
+ public:
+ DEFINE_SIZE_STATIC (10);
+};
+
+struct MathGlyphAssembly
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ italicsCorrection.sanitize (c, this) &&
+ partRecords.sanitize (c));
+ }
+
+ unsigned int get_parts (hb_direction_t direction,
+ hb_font_t *font,
+ unsigned int start_offset,
+ unsigned int *parts_count, /* IN/OUT */
+ hb_ot_math_glyph_part_t *parts /* OUT */,
+ hb_position_t *italics_correction /* OUT */) const
+ {
+ if (parts_count)
+ {
+ int64_t mult = font->dir_mult (direction);
+ for (auto _ : hb_zip (partRecords.sub_array (start_offset, parts_count),
+ hb_array (parts, *parts_count)))
+ _.first.extract (_.second, mult, font);
+ }
+
+ if (italics_correction)
+ *italics_correction = italicsCorrection.get_x_value (font, this);
+
+ return partRecords.len;
+ }
+
+ protected:
+ MathValueRecord
+ italicsCorrection;
+ /* Italics correction of this
+ * MathGlyphAssembly. Should not
+ * depend on the assembly size. */
+ ArrayOf<MathGlyphPartRecord>
+ partRecords; /* Array of part records, from
+ * left to right and bottom to
+ * top. */
+
+ public:
+ DEFINE_SIZE_ARRAY (6, partRecords);
+};
+
+struct MathGlyphConstruction
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ glyphAssembly.sanitize (c, this) &&
+ mathGlyphVariantRecord.sanitize (c));
+ }
+
+ const MathGlyphAssembly &get_assembly () const { return this+glyphAssembly; }
+
+ unsigned int get_variants (hb_direction_t direction,
+ hb_font_t *font,
+ unsigned int start_offset,
+ unsigned int *variants_count, /* IN/OUT */
+ hb_ot_math_glyph_variant_t *variants /* OUT */) const
+ {
+ if (variants_count)
+ {
+ int64_t mult = font->dir_mult (direction);
+ for (auto _ : hb_zip (mathGlyphVariantRecord.sub_array (start_offset, variants_count),
+ hb_array (variants, *variants_count)))
+ _.second = {_.first.variantGlyph, font->em_mult (_.first.advanceMeasurement, mult)};
+ }
+ return mathGlyphVariantRecord.len;
+ }
+
+ protected:
+ /* Offset to MathGlyphAssembly table for this shape - from the beginning of
+ MathGlyphConstruction table. May be NULL. */
+ OffsetTo<MathGlyphAssembly> glyphAssembly;
+
+ /* MathGlyphVariantRecords for alternative variants of the glyphs. */
+ ArrayOf<MathGlyphVariantRecord> mathGlyphVariantRecord;
+
+ public:
+ DEFINE_SIZE_ARRAY (4, mathGlyphVariantRecord);
+};
+
+struct MathVariants
+{
+ bool sanitize_offsets (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ unsigned int count = vertGlyphCount + horizGlyphCount;
+ for (unsigned int i = 0; i < count; i++)
+ if (!glyphConstruction.arrayZ[i].sanitize (c, this)) return_trace (false);
+ return_trace (true);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ vertGlyphCoverage.sanitize (c, this) &&
+ horizGlyphCoverage.sanitize (c, this) &&
+ c->check_array (glyphConstruction.arrayZ, vertGlyphCount + horizGlyphCount) &&
+ sanitize_offsets (c));
+ }
+
+ hb_position_t get_min_connector_overlap (hb_direction_t direction,
+ hb_font_t *font) const
+ { return font->em_scale_dir (minConnectorOverlap, direction); }
+
+ unsigned int get_glyph_variants (hb_codepoint_t glyph,
+ hb_direction_t direction,
+ hb_font_t *font,
+ unsigned int start_offset,
+ unsigned int *variants_count, /* IN/OUT */
+ hb_ot_math_glyph_variant_t *variants /* OUT */) const
+ { return get_glyph_construction (glyph, direction, font)
+ .get_variants (direction, font, start_offset, variants_count, variants); }
+
+ unsigned int get_glyph_parts (hb_codepoint_t glyph,
+ hb_direction_t direction,
+ hb_font_t *font,
+ unsigned int start_offset,
+ unsigned int *parts_count, /* IN/OUT */
+ hb_ot_math_glyph_part_t *parts /* OUT */,
+ hb_position_t *italics_correction /* OUT */) const
+ { return get_glyph_construction (glyph, direction, font)
+ .get_assembly ()
+ .get_parts (direction, font,
+ start_offset, parts_count, parts,
+ italics_correction); }
+
+ private:
+ const MathGlyphConstruction &
+ get_glyph_construction (hb_codepoint_t glyph,
+ hb_direction_t direction,
+ hb_font_t *font HB_UNUSED) const
+ {
+ bool vertical = HB_DIRECTION_IS_VERTICAL (direction);
+ unsigned int count = vertical ? vertGlyphCount : horizGlyphCount;
+ const OffsetTo<Coverage> &coverage = vertical ? vertGlyphCoverage
+ : horizGlyphCoverage;
+
+ unsigned int index = (this+coverage).get_coverage (glyph);
+ if (unlikely (index >= count)) return Null (MathGlyphConstruction);
+
+ if (!vertical)
+ index += vertGlyphCount;
+
+ return this+glyphConstruction[index];
+ }
+
+ protected:
+ HBUINT16 minConnectorOverlap;
+ /* Minimum overlap of connecting
+ * glyphs during glyph construction,
+ * in design units. */
+ OffsetTo<Coverage> vertGlyphCoverage;
+ /* Offset to Coverage table -
+ * from the beginning of MathVariants
+ * table. */
+ OffsetTo<Coverage> horizGlyphCoverage;
+ /* Offset to Coverage table -
+ * from the beginning of MathVariants
+ * table. */
+ HBUINT16 vertGlyphCount; /* Number of glyphs for which
+ * information is provided for
+ * vertically growing variants. */
+ HBUINT16 horizGlyphCount;/* Number of glyphs for which
+ * information is provided for
+ * horizontally growing variants. */
+
+ /* Array of offsets to MathGlyphConstruction tables - from the beginning of
+ the MathVariants table, for shapes growing in vertical/horizontal
+ direction. */
+ UnsizedArrayOf<OffsetTo<MathGlyphConstruction>>
+ glyphConstruction;
+
+ public:
+ DEFINE_SIZE_ARRAY (10, glyphConstruction);
+};
+
+
+/*
+ * MATH -- Mathematical typesetting
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/math
+ */
+
+struct MATH
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_MATH;
+
+ bool has_data () const { return version.to_int (); }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (version.sanitize (c) &&
+ likely (version.major == 1) &&
+ mathConstants.sanitize (c, this) &&
+ mathGlyphInfo.sanitize (c, this) &&
+ mathVariants.sanitize (c, this));
+ }
+
+ hb_position_t get_constant (hb_ot_math_constant_t constant,
+ hb_font_t *font) const
+ { return (this+mathConstants).get_value (constant, font); }
+
+ const MathGlyphInfo &get_glyph_info () const { return this+mathGlyphInfo; }
+
+ const MathVariants &get_variants () const { return this+mathVariants; }
+
+ protected:
+ FixedVersion<>version; /* Version of the MATH table
+ * initially set to 0x00010000u */
+ OffsetTo<MathConstants>
+ mathConstants; /* MathConstants table */
+ OffsetTo<MathGlyphInfo>
+ mathGlyphInfo; /* MathGlyphInfo table */
+ OffsetTo<MathVariants>
+ mathVariants; /* MathVariants table */
+
+ public:
+ DEFINE_SIZE_STATIC (10);
+};
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_MATH_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-math.cc b/thirdparty/harfbuzz/src/hb-ot-math.cc
new file mode 100644
index 0000000000..9d8c6e735a
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-math.cc
@@ -0,0 +1,293 @@
+/*
+ * Copyright © 2016 Igalia S.L.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Igalia Author(s): Frédéric Wang
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_MATH
+
+#include "hb-ot-math-table.hh"
+
+
+/**
+ * SECTION:hb-ot-math
+ * @title: hb-ot-math
+ * @short_description: OpenType Math information
+ * @include: hb-ot.h
+ *
+ * Functions for fetching mathematics layout data from OpenType fonts.
+ *
+ * HarfBuzz itself does not implement a math layout solution. The
+ * functions and types provided can be used by client programs to access
+ * the font data necessary for typesetting OpenType Math layout.
+ *
+ **/
+
+
+/*
+ * OT::MATH
+ */
+
+/**
+ * hb_ot_math_has_data:
+ * @face: #hb_face_t to test
+ *
+ * Tests whether a face has a `MATH` table.
+ *
+ * Return value: true if the table is found, false otherwise
+ *
+ * Since: 1.3.3
+ **/
+hb_bool_t
+hb_ot_math_has_data (hb_face_t *face)
+{
+ return face->table.MATH->has_data ();
+}
+
+/**
+ * hb_ot_math_get_constant:
+ * @font: #hb_font_t to work upon
+ * @constant: #hb_ot_math_constant_t the constant to retrieve
+ *
+ * Fetches the specified math constant. For most constants, the value returned
+ * is an #hb_position_t.
+ *
+ * However, if the requested constant is #HB_OT_MATH_CONSTANT_SCRIPT_PERCENT_SCALE_DOWN,
+ * #HB_OT_MATH_CONSTANT_SCRIPT_SCRIPT_PERCENT_SCALE_DOWN or
+ * #HB_OT_MATH_CONSTANT_SCRIPT_PERCENT_SCALE_DOWN, then the return value is
+ * an integer between 0 and 100 representing that percentage.
+ *
+ * Return value: the requested constant or zero
+ *
+ * Since: 1.3.3
+ **/
+hb_position_t
+hb_ot_math_get_constant (hb_font_t *font,
+ hb_ot_math_constant_t constant)
+{
+ return font->face->table.MATH->get_constant(constant, font);
+}
+
+/**
+ * hb_ot_math_get_glyph_italics_correction:
+ * @font: #hb_font_t to work upon
+ * @glyph: The glyph index from which to retrieve the value
+ *
+ * Fetches an italics-correction value (if one exists) for the specified
+ * glyph index.
+ *
+ * Return value: the italics correction of the glyph or zero
+ *
+ * Since: 1.3.3
+ **/
+hb_position_t
+hb_ot_math_get_glyph_italics_correction (hb_font_t *font,
+ hb_codepoint_t glyph)
+{
+ return font->face->table.MATH->get_glyph_info().get_italics_correction (glyph, font);
+}
+
+/**
+ * hb_ot_math_get_glyph_top_accent_attachment:
+ * @font: #hb_font_t to work upon
+ * @glyph: The glyph index from which to retrieve the value
+ *
+ * Fetches a top-accent-attachment value (if one exists) for the specified
+ * glyph index.
+ *
+ * For any glyph that does not have a top-accent-attachment value - that is,
+ * a glyph not covered by the `MathTopAccentAttachment` table (or, when
+ * @font has no `MathTopAccentAttachment` table or no `MATH` table, any
+ * glyph) - the function synthesizes a value, returning the position at
+ * one-half the glyph's advance width.
+ *
+ * Return value: the top accent attachment of the glyph or 0.5 * the advance
+ * width of @glyph
+ *
+ * Since: 1.3.3
+ **/
+hb_position_t
+hb_ot_math_get_glyph_top_accent_attachment (hb_font_t *font,
+ hb_codepoint_t glyph)
+{
+ return font->face->table.MATH->get_glyph_info().get_top_accent_attachment (glyph, font);
+}
+
+/**
+ * hb_ot_math_is_glyph_extended_shape:
+ * @face: #hb_face_t to work upon
+ * @glyph: The glyph index to test
+ *
+ * Tests whether the given glyph index is an extended shape in the face.
+ *
+ * Return value: true if the glyph is an extended shape, false otherwise
+ *
+ * Since: 1.3.3
+ **/
+hb_bool_t
+hb_ot_math_is_glyph_extended_shape (hb_face_t *face,
+ hb_codepoint_t glyph)
+{
+ return face->table.MATH->get_glyph_info().is_extended_shape (glyph);
+}
+
+/**
+ * hb_ot_math_get_glyph_kerning:
+ * @font: #hb_font_t to work upon
+ * @glyph: The glyph index from which to retrieve the value
+ * @kern: The #hb_ot_math_kern_t from which to retrieve the value
+ * @correction_height: the correction height to use to determine the kerning.
+ *
+ * Fetches the math kerning (cut-ins) value for the specified font, glyph index, and
+ * @kern.
+ *
+ * If the MathKern table is found, the function examines it to find a height
+ * value that is greater or equal to @correction_height. If such a height
+ * value is found, corresponding kerning value from the table is returned. If
+ * no such height value is found, the last kerning value is returned.
+ *
+ * Return value: requested kerning value or zero
+ *
+ * Since: 1.3.3
+ **/
+hb_position_t
+hb_ot_math_get_glyph_kerning (hb_font_t *font,
+ hb_codepoint_t glyph,
+ hb_ot_math_kern_t kern,
+ hb_position_t correction_height)
+{
+ return font->face->table.MATH->get_glyph_info().get_kerning (glyph,
+ kern,
+ correction_height,
+ font);
+}
+
+/**
+ * hb_ot_math_get_glyph_variants:
+ * @font: #hb_font_t to work upon
+ * @glyph: The index of the glyph to stretch
+ * @direction: The direction of the stretching (horizontal or vertical)
+ * @start_offset: offset of the first variant to retrieve
+ * @variants_count: (inout): Input = the maximum number of variants to return;
+ * Output = the actual number of variants returned
+ * @variants: (out) (array length=variants_count): array of variants returned
+ *
+ * Fetches the MathGlyphConstruction for the specified font, glyph index, and
+ * direction. The corresponding list of size variants is returned as a list of
+ * #hb_ot_math_glyph_variant_t structs.
+ *
+ * <note>The @direction parameter is only used to select between horizontal
+ * or vertical directions for the construction. Even though all #hb_direction_t
+ * values are accepted, only the result of #HB_DIRECTION_IS_HORIZONTAL is
+ * considered.</note>
+ *
+ * Return value: the total number of size variants available or zero
+ *
+ * Since: 1.3.3
+ **/
+unsigned int
+hb_ot_math_get_glyph_variants (hb_font_t *font,
+ hb_codepoint_t glyph,
+ hb_direction_t direction,
+ unsigned int start_offset,
+ unsigned int *variants_count, /* IN/OUT */
+ hb_ot_math_glyph_variant_t *variants /* OUT */)
+{
+ return font->face->table.MATH->get_variants().get_glyph_variants (glyph, direction, font,
+ start_offset,
+ variants_count,
+ variants);
+}
+
+/**
+ * hb_ot_math_get_min_connector_overlap:
+ * @font: #hb_font_t to work upon
+ * @direction: direction of the stretching (horizontal or vertical)
+ *
+ * Fetches the MathVariants table for the specified font and returns the
+ * minimum overlap of connecting glyphs that are required to draw a glyph
+ * assembly in the specified direction.
+ *
+ * <note>The @direction parameter is only used to select between horizontal
+ * or vertical directions for the construction. Even though all #hb_direction_t
+ * values are accepted, only the result of #HB_DIRECTION_IS_HORIZONTAL is
+ * considered.</note>
+ *
+ * Return value: requested minimum connector overlap or zero
+ *
+ * Since: 1.3.3
+ **/
+hb_position_t
+hb_ot_math_get_min_connector_overlap (hb_font_t *font,
+ hb_direction_t direction)
+{
+ return font->face->table.MATH->get_variants().get_min_connector_overlap (direction, font);
+}
+
+/**
+ * hb_ot_math_get_glyph_assembly:
+ * @font: #hb_font_t to work upon
+ * @glyph: The index of the glyph to stretch
+ * @direction: direction of the stretching (horizontal or vertical)
+ * @start_offset: offset of the first glyph part to retrieve
+ * @parts_count: (inout): Input = maximum number of glyph parts to return;
+ * Output = actual number of parts returned
+ * @parts: (out) (array length=parts_count): the glyph parts returned
+ * @italics_correction: (out): italics correction of the glyph assembly
+ *
+ * Fetches the GlyphAssembly for the specified font, glyph index, and direction.
+ * Returned are a list of #hb_ot_math_glyph_part_t glyph parts that can be
+ * used to draw the glyph and an italics-correction value (if one is defined
+ * in the font).
+ *
+ * <note>The @direction parameter is only used to select between horizontal
+ * or vertical directions for the construction. Even though all #hb_direction_t
+ * values are accepted, only the result of #HB_DIRECTION_IS_HORIZONTAL is
+ * considered.</note>
+ *
+ * Return value: the total number of parts in the glyph assembly
+ *
+ * Since: 1.3.3
+ **/
+unsigned int
+hb_ot_math_get_glyph_assembly (hb_font_t *font,
+ hb_codepoint_t glyph,
+ hb_direction_t direction,
+ unsigned int start_offset,
+ unsigned int *parts_count, /* IN/OUT */
+ hb_ot_math_glyph_part_t *parts, /* OUT */
+ hb_position_t *italics_correction /* OUT */)
+{
+ return font->face->table.MATH->get_variants().get_glyph_parts (glyph,
+ direction,
+ font,
+ start_offset,
+ parts_count,
+ parts,
+ italics_correction);
+}
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-ot-math.h b/thirdparty/harfbuzz/src/hb-ot-math.h
new file mode 100644
index 0000000000..ad864a762d
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-math.h
@@ -0,0 +1,230 @@
+/*
+ * Copyright © 2016 Igalia S.L.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Igalia Author(s): Frédéric Wang
+ */
+
+#ifndef HB_OT_H_IN
+#error "Include <hb-ot.h> instead."
+#endif
+
+#ifndef HB_OT_MATH_H
+#define HB_OT_MATH_H
+
+#include "hb.h"
+
+HB_BEGIN_DECLS
+
+
+/*
+ * MATH
+ */
+
+#define HB_OT_TAG_MATH HB_TAG('M','A','T','H')
+
+/* Use with hb_buffer_set_script() for math shaping. */
+#define HB_OT_MATH_SCRIPT HB_TAG('m','a','t','h')
+
+/* Types */
+
+/**
+ * hb_ot_math_constant_t:
+ *
+ * The 'MATH' table constants specified at
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/math
+ *
+ * Since: 1.3.3
+ */
+typedef enum {
+ HB_OT_MATH_CONSTANT_SCRIPT_PERCENT_SCALE_DOWN = 0,
+ HB_OT_MATH_CONSTANT_SCRIPT_SCRIPT_PERCENT_SCALE_DOWN = 1,
+ HB_OT_MATH_CONSTANT_DELIMITED_SUB_FORMULA_MIN_HEIGHT = 2,
+ HB_OT_MATH_CONSTANT_DISPLAY_OPERATOR_MIN_HEIGHT = 3,
+ HB_OT_MATH_CONSTANT_MATH_LEADING = 4,
+ HB_OT_MATH_CONSTANT_AXIS_HEIGHT = 5,
+ HB_OT_MATH_CONSTANT_ACCENT_BASE_HEIGHT = 6,
+ HB_OT_MATH_CONSTANT_FLATTENED_ACCENT_BASE_HEIGHT = 7,
+ HB_OT_MATH_CONSTANT_SUBSCRIPT_SHIFT_DOWN = 8,
+ HB_OT_MATH_CONSTANT_SUBSCRIPT_TOP_MAX = 9,
+ HB_OT_MATH_CONSTANT_SUBSCRIPT_BASELINE_DROP_MIN = 10,
+ HB_OT_MATH_CONSTANT_SUPERSCRIPT_SHIFT_UP = 11,
+ HB_OT_MATH_CONSTANT_SUPERSCRIPT_SHIFT_UP_CRAMPED = 12,
+ HB_OT_MATH_CONSTANT_SUPERSCRIPT_BOTTOM_MIN = 13,
+ HB_OT_MATH_CONSTANT_SUPERSCRIPT_BASELINE_DROP_MAX = 14,
+ HB_OT_MATH_CONSTANT_SUB_SUPERSCRIPT_GAP_MIN = 15,
+ HB_OT_MATH_CONSTANT_SUPERSCRIPT_BOTTOM_MAX_WITH_SUBSCRIPT = 16,
+ HB_OT_MATH_CONSTANT_SPACE_AFTER_SCRIPT = 17,
+ HB_OT_MATH_CONSTANT_UPPER_LIMIT_GAP_MIN = 18,
+ HB_OT_MATH_CONSTANT_UPPER_LIMIT_BASELINE_RISE_MIN = 19,
+ HB_OT_MATH_CONSTANT_LOWER_LIMIT_GAP_MIN = 20,
+ HB_OT_MATH_CONSTANT_LOWER_LIMIT_BASELINE_DROP_MIN = 21,
+ HB_OT_MATH_CONSTANT_STACK_TOP_SHIFT_UP = 22,
+ HB_OT_MATH_CONSTANT_STACK_TOP_DISPLAY_STYLE_SHIFT_UP = 23,
+ HB_OT_MATH_CONSTANT_STACK_BOTTOM_SHIFT_DOWN = 24,
+ HB_OT_MATH_CONSTANT_STACK_BOTTOM_DISPLAY_STYLE_SHIFT_DOWN = 25,
+ HB_OT_MATH_CONSTANT_STACK_GAP_MIN = 26,
+ HB_OT_MATH_CONSTANT_STACK_DISPLAY_STYLE_GAP_MIN = 27,
+ HB_OT_MATH_CONSTANT_STRETCH_STACK_TOP_SHIFT_UP = 28,
+ HB_OT_MATH_CONSTANT_STRETCH_STACK_BOTTOM_SHIFT_DOWN = 29,
+ HB_OT_MATH_CONSTANT_STRETCH_STACK_GAP_ABOVE_MIN = 30,
+ HB_OT_MATH_CONSTANT_STRETCH_STACK_GAP_BELOW_MIN = 31,
+ HB_OT_MATH_CONSTANT_FRACTION_NUMERATOR_SHIFT_UP = 32,
+ HB_OT_MATH_CONSTANT_FRACTION_NUMERATOR_DISPLAY_STYLE_SHIFT_UP = 33,
+ HB_OT_MATH_CONSTANT_FRACTION_DENOMINATOR_SHIFT_DOWN = 34,
+ HB_OT_MATH_CONSTANT_FRACTION_DENOMINATOR_DISPLAY_STYLE_SHIFT_DOWN = 35,
+ HB_OT_MATH_CONSTANT_FRACTION_NUMERATOR_GAP_MIN = 36,
+ HB_OT_MATH_CONSTANT_FRACTION_NUM_DISPLAY_STYLE_GAP_MIN = 37,
+ HB_OT_MATH_CONSTANT_FRACTION_RULE_THICKNESS = 38,
+ HB_OT_MATH_CONSTANT_FRACTION_DENOMINATOR_GAP_MIN = 39,
+ HB_OT_MATH_CONSTANT_FRACTION_DENOM_DISPLAY_STYLE_GAP_MIN = 40,
+ HB_OT_MATH_CONSTANT_SKEWED_FRACTION_HORIZONTAL_GAP = 41,
+ HB_OT_MATH_CONSTANT_SKEWED_FRACTION_VERTICAL_GAP = 42,
+ HB_OT_MATH_CONSTANT_OVERBAR_VERTICAL_GAP = 43,
+ HB_OT_MATH_CONSTANT_OVERBAR_RULE_THICKNESS = 44,
+ HB_OT_MATH_CONSTANT_OVERBAR_EXTRA_ASCENDER = 45,
+ HB_OT_MATH_CONSTANT_UNDERBAR_VERTICAL_GAP = 46,
+ HB_OT_MATH_CONSTANT_UNDERBAR_RULE_THICKNESS = 47,
+ HB_OT_MATH_CONSTANT_UNDERBAR_EXTRA_DESCENDER = 48,
+ HB_OT_MATH_CONSTANT_RADICAL_VERTICAL_GAP = 49,
+ HB_OT_MATH_CONSTANT_RADICAL_DISPLAY_STYLE_VERTICAL_GAP = 50,
+ HB_OT_MATH_CONSTANT_RADICAL_RULE_THICKNESS = 51,
+ HB_OT_MATH_CONSTANT_RADICAL_EXTRA_ASCENDER = 52,
+ HB_OT_MATH_CONSTANT_RADICAL_KERN_BEFORE_DEGREE = 53,
+ HB_OT_MATH_CONSTANT_RADICAL_KERN_AFTER_DEGREE = 54,
+ HB_OT_MATH_CONSTANT_RADICAL_DEGREE_BOTTOM_RAISE_PERCENT = 55
+} hb_ot_math_constant_t;
+
+/**
+ * hb_ot_math_kern_t:
+ *
+ * The math kerning-table types defined for the four corners
+ * of a glyph.
+ *
+ * Since: 1.3.3
+ */
+typedef enum {
+ HB_OT_MATH_KERN_TOP_RIGHT = 0,
+ HB_OT_MATH_KERN_TOP_LEFT = 1,
+ HB_OT_MATH_KERN_BOTTOM_RIGHT = 2,
+ HB_OT_MATH_KERN_BOTTOM_LEFT = 3
+} hb_ot_math_kern_t;
+
+/**
+ * hb_ot_math_glyph_variant_t:
+ * @glyph: The glyph index of the variant
+ * @advance: The advance width of the variant
+ *
+ * Data type to hold math-variant information for a glyph.
+ *
+ * Since: 1.3.3
+ */
+typedef struct hb_ot_math_glyph_variant_t {
+ hb_codepoint_t glyph;
+ hb_position_t advance;
+} hb_ot_math_glyph_variant_t;
+
+/**
+ * hb_ot_math_glyph_part_flags_t:
+ *
+ * Flags for math glyph parts.
+ *
+ * Since: 1.3.3
+ */
+typedef enum { /*< flags >*/
+ HB_OT_MATH_GLYPH_PART_FLAG_EXTENDER = 0x00000001u /* Extender glyph */
+} hb_ot_math_glyph_part_flags_t;
+
+/**
+ * hb_ot_math_glyph_part_t:
+ * @glyph: The glyph index of the variant part
+ * @start_connector_length: The length of the connector on the starting side of the variant part
+ * @end_connector_length: The length of the connector on the ending side of the variant part
+ * @full_advance: The total advance of the part
+ * @flags: #hb_ot_math_glyph_part_flags_t flags for the part
+ *
+ * Data type to hold information for a "part" component of a math-variant glyph.
+ * Large variants for stretchable math glyphs (such as parentheses) can be constructed
+ * on the fly from parts.
+ *
+ * Since: 1.3.3
+ */
+typedef struct hb_ot_math_glyph_part_t {
+ hb_codepoint_t glyph;
+ hb_position_t start_connector_length;
+ hb_position_t end_connector_length;
+ hb_position_t full_advance;
+ hb_ot_math_glyph_part_flags_t flags;
+} hb_ot_math_glyph_part_t;
+
+/* Methods */
+
+HB_EXTERN hb_bool_t
+hb_ot_math_has_data (hb_face_t *face);
+
+HB_EXTERN hb_position_t
+hb_ot_math_get_constant (hb_font_t *font,
+ hb_ot_math_constant_t constant);
+
+HB_EXTERN hb_position_t
+hb_ot_math_get_glyph_italics_correction (hb_font_t *font,
+ hb_codepoint_t glyph);
+
+HB_EXTERN hb_position_t
+hb_ot_math_get_glyph_top_accent_attachment (hb_font_t *font,
+ hb_codepoint_t glyph);
+
+HB_EXTERN hb_bool_t
+hb_ot_math_is_glyph_extended_shape (hb_face_t *face,
+ hb_codepoint_t glyph);
+
+HB_EXTERN hb_position_t
+hb_ot_math_get_glyph_kerning (hb_font_t *font,
+ hb_codepoint_t glyph,
+ hb_ot_math_kern_t kern,
+ hb_position_t correction_height);
+
+HB_EXTERN unsigned int
+hb_ot_math_get_glyph_variants (hb_font_t *font,
+ hb_codepoint_t glyph,
+ hb_direction_t direction,
+ unsigned int start_offset,
+ unsigned int *variants_count, /* IN/OUT */
+ hb_ot_math_glyph_variant_t *variants /* OUT */);
+
+HB_EXTERN hb_position_t
+hb_ot_math_get_min_connector_overlap (hb_font_t *font,
+ hb_direction_t direction);
+
+HB_EXTERN unsigned int
+hb_ot_math_get_glyph_assembly (hb_font_t *font,
+ hb_codepoint_t glyph,
+ hb_direction_t direction,
+ unsigned int start_offset,
+ unsigned int *parts_count, /* IN/OUT */
+ hb_ot_math_glyph_part_t *parts, /* OUT */
+ hb_position_t *italics_correction /* OUT */);
+
+
+HB_END_DECLS
+
+#endif /* HB_OT_MATH_H */
diff --git a/thirdparty/harfbuzz/src/hb-ot-maxp-table.hh b/thirdparty/harfbuzz/src/hb-ot-maxp-table.hh
new file mode 100644
index 0000000000..929956d12b
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-maxp-table.hh
@@ -0,0 +1,142 @@
+/*
+ * Copyright © 2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_MAXP_TABLE_HH
+#define HB_OT_MAXP_TABLE_HH
+
+#include "hb-open-type.hh"
+
+namespace OT {
+
+
+/*
+ * maxp -- Maximum Profile
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/maxp
+ */
+
+#define HB_OT_TAG_maxp HB_TAG('m','a','x','p')
+
+struct maxpV1Tail
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ HBUINT16 maxPoints; /* Maximum points in a non-composite glyph. */
+ HBUINT16 maxContours; /* Maximum contours in a non-composite glyph. */
+ HBUINT16 maxCompositePoints; /* Maximum points in a composite glyph. */
+ HBUINT16 maxCompositeContours; /* Maximum contours in a composite glyph. */
+ HBUINT16 maxZones; /* 1 if instructions do not use the twilight zone (Z0),
+ * or 2 if instructions do use Z0; should be set to 2 in
+ * most cases. */
+ HBUINT16 maxTwilightPoints; /* Maximum points used in Z0. */
+ HBUINT16 maxStorage; /* Number of Storage Area locations. */
+ HBUINT16 maxFunctionDefs; /* Number of FDEFs, equal to the highest function number + 1. */
+ HBUINT16 maxInstructionDefs; /* Number of IDEFs. */
+ HBUINT16 maxStackElements; /* Maximum stack depth. (This includes Font and CVT
+ * Programs, as well as the instructions for each glyph.) */
+ HBUINT16 maxSizeOfInstructions; /* Maximum byte count for glyph instructions. */
+ HBUINT16 maxComponentElements; /* Maximum number of components referenced at
+ * "top level" for any composite glyph. */
+ HBUINT16 maxComponentDepth; /* Maximum levels of recursion; 1 for simple components. */
+ public:
+ DEFINE_SIZE_STATIC (26);
+};
+
+
+struct maxp
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_maxp;
+
+ unsigned int get_num_glyphs () const { return numGlyphs; }
+
+ void set_num_glyphs (unsigned int count)
+ {
+ numGlyphs = count;
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!c->check_struct (this)))
+ return_trace (false);
+
+ if (version.major == 1)
+ {
+ const maxpV1Tail &v1 = StructAfter<maxpV1Tail> (*this);
+ return_trace (v1.sanitize (c));
+ }
+ return_trace (likely (version.major == 0 && version.minor == 0x5000u));
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ maxp *maxp_prime = c->serializer->embed (this);
+ if (unlikely (!maxp_prime)) return_trace (false);
+
+ maxp_prime->numGlyphs = c->plan->num_output_glyphs ();
+ if (maxp_prime->version.major == 1)
+ {
+ const maxpV1Tail *src_v1 = &StructAfter<maxpV1Tail> (*this);
+ maxpV1Tail *dest_v1 = c->serializer->embed<maxpV1Tail> (src_v1);
+ if (unlikely (!dest_v1)) return_trace (false);
+
+ if (c->plan->drop_hints)
+ drop_hint_fields (dest_v1);
+ }
+
+ return_trace (true);
+ }
+
+ static void drop_hint_fields (maxpV1Tail* dest_v1)
+ {
+ dest_v1->maxZones = 1;
+ dest_v1->maxTwilightPoints = 0;
+ dest_v1->maxStorage = 0;
+ dest_v1->maxFunctionDefs = 0;
+ dest_v1->maxInstructionDefs = 0;
+ dest_v1->maxStackElements = 0;
+ dest_v1->maxSizeOfInstructions = 0;
+ }
+
+ protected:
+ FixedVersion<>version;/* Version of the maxp table (0.5 or 1.0),
+ * 0x00005000u or 0x00010000u. */
+ HBUINT16 numGlyphs;
+ /* The number of glyphs in the font. */
+/*maxpV1Tail v1Tail[HB_VAR_ARRAY]; */
+ public:
+ DEFINE_SIZE_STATIC (6);
+};
+
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_MAXP_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-meta-table.hh b/thirdparty/harfbuzz/src/hb-ot-meta-table.hh
new file mode 100644
index 0000000000..1225e26ce1
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-meta-table.hh
@@ -0,0 +1,127 @@
+/*
+ * Copyright © 2019 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#ifndef HB_OT_META_TABLE_HH
+#define HB_OT_META_TABLE_HH
+
+#include "hb-open-type.hh"
+
+/*
+ * meta -- Metadata Table
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/meta
+ * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6meta.html
+ */
+#define HB_OT_TAG_meta HB_TAG ('m','e','t','a')
+
+
+namespace OT {
+
+
+struct DataMap
+{
+ int cmp (hb_tag_t a) const { return tag.cmp (a); }
+
+ hb_tag_t get_tag () const { return tag; }
+
+ hb_blob_t *reference_entry (hb_blob_t *meta_blob) const
+ { return hb_blob_create_sub_blob (meta_blob, dataZ, dataLength); }
+
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ dataZ.sanitize (c, base, dataLength)));
+ }
+
+ protected:
+ Tag tag; /* A tag indicating the type of metadata. */
+ LNNOffsetTo<UnsizedArrayOf<HBUINT8>>
+ dataZ; /* Offset in bytes from the beginning of the
+ * metadata table to the data for this tag. */
+ HBUINT32 dataLength; /* Length of the data. The data is not required to
+ * be padded to any byte boundary. */
+ public:
+ DEFINE_SIZE_STATIC (12);
+};
+
+struct meta
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_meta;
+
+ struct accelerator_t
+ {
+ void init (hb_face_t *face)
+ { table = hb_sanitize_context_t ().reference_table<meta> (face); }
+ void fini () { table.destroy (); }
+
+ hb_blob_t *reference_entry (hb_tag_t tag) const
+ { return table->dataMaps.lsearch (tag).reference_entry (table.get_blob ()); }
+
+ unsigned int get_entries (unsigned int start_offset,
+ unsigned int *count,
+ hb_ot_meta_tag_t *entries) const
+ {
+ if (count)
+ {
+ + table->dataMaps.sub_array (start_offset, count)
+ | hb_map (&DataMap::get_tag)
+ | hb_map ([](hb_tag_t tag) { return (hb_ot_meta_tag_t) tag; })
+ | hb_sink (hb_array (entries, *count))
+ ;
+ }
+ return table->dataMaps.len;
+ }
+
+ private:
+ hb_blob_ptr_t<meta> table;
+ };
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ version == 1 &&
+ dataMaps.sanitize (c, this)));
+ }
+
+ protected:
+ HBUINT32 version; /* Version number of the metadata table — set to 1. */
+ HBUINT32 flags; /* Flags — currently unused; set to 0. */
+ HBUINT32 dataOffset;
+ /* Per Apple specification:
+ * Offset from the beginning of the table to the data.
+ * Per OT specification:
+ * Reserved. Not used; should be set to 0. */
+ LArrayOf<DataMap>
+ dataMaps;/* Array of data map records. */
+ public:
+ DEFINE_SIZE_ARRAY (16, dataMaps);
+};
+
+struct meta_accelerator_t : meta::accelerator_t {};
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_META_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-meta.cc b/thirdparty/harfbuzz/src/hb-ot-meta.cc
new file mode 100644
index 0000000000..54a0e10f9b
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-meta.cc
@@ -0,0 +1,77 @@
+/*
+ * Copyright © 2019 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_META
+
+#include "hb-ot-meta-table.hh"
+
+/**
+ * SECTION:hb-ot-meta
+ * @title: hb-ot-meta
+ * @short_description: OpenType Metadata
+ * @include: hb-ot.h
+ *
+ * Functions for fetching metadata from fonts.
+ **/
+
+/**
+ * hb_ot_meta_get_entry_tags:
+ * @face: a face object
+ * @start_offset: iteration's start offset
+ * @entries_count:(inout) (allow-none): buffer size as input, filled size as output
+ * @entries: (out caller-allocates) (array length=entries_count): entries tags buffer
+ *
+ * Return value: Number of all available feature types.
+ *
+ * Since: 2.6.0
+ **/
+unsigned int
+hb_ot_meta_get_entry_tags (hb_face_t *face,
+ unsigned int start_offset,
+ unsigned int *entries_count, /* IN/OUT. May be NULL. */
+ hb_ot_meta_tag_t *entries /* OUT. May be NULL. */)
+{
+ return face->table.meta->get_entries (start_offset, entries_count, entries);
+}
+
+/**
+ * hb_ot_meta_reference_entry:
+ * @face: a #hb_face_t object.
+ * @meta_tag: tag of metadata you like to have.
+ *
+ * It fetches metadata entry of a given tag from a font.
+ *
+ * Returns: (transfer full): A blob containing the blob.
+ *
+ * Since: 2.6.0
+ **/
+hb_blob_t *
+hb_ot_meta_reference_entry (hb_face_t *face, hb_ot_meta_tag_t meta_tag)
+{
+ return face->table.meta->reference_entry (meta_tag);
+}
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-ot-meta.h b/thirdparty/harfbuzz/src/hb-ot-meta.h
new file mode 100644
index 0000000000..0278d84148
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-meta.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright © 2019 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#ifndef HB_OT_H_IN
+#error "Include <hb-ot.h> instead."
+#endif
+
+#ifndef HB_OT_META_H
+#define HB_OT_META_H
+
+#include "hb.h"
+
+HB_BEGIN_DECLS
+
+/**
+ * hb_ot_meta_tag_t:
+ * @HB_OT_META_TAG_DESIGN_LANGUAGES: Design languages. Text, using only
+ * Basic Latin (ASCII) characters. Indicates languages and/or scripts
+ * for the user audiences that the font was primarily designed for.
+ * @HB_OT_META_TAG_SUPPORTED_LANGUAGES: Supported languages. Text, using
+ * only Basic Latin (ASCII) characters. Indicates languages and/or scripts
+ * that the font is declared to be capable of supporting.
+ *
+ * Known metadata tags from https://docs.microsoft.com/en-us/typography/opentype/spec/meta
+ *
+ * Since: 2.6.0
+ **/
+typedef enum {
+/*
+ HB_OT_META_TAG_APPL = HB_TAG ('a','p','p','l'),
+ HB_OT_META_TAG_BILD = HB_TAG ('b','i','l','d'),
+*/
+ HB_OT_META_TAG_DESIGN_LANGUAGES = HB_TAG ('d','l','n','g'),
+ HB_OT_META_TAG_SUPPORTED_LANGUAGES = HB_TAG ('s','l','n','g'),
+
+ _HB_OT_META_TAG_MAX_VALUE = HB_TAG_MAX_SIGNED /*< skip >*/
+} hb_ot_meta_tag_t;
+
+HB_EXTERN unsigned int
+hb_ot_meta_get_entry_tags (hb_face_t *face,
+ unsigned int start_offset,
+ unsigned int *entries_count, /* IN/OUT. May be NULL. */
+ hb_ot_meta_tag_t *entries /* OUT. May be NULL. */);
+
+HB_EXTERN hb_blob_t *
+hb_ot_meta_reference_entry (hb_face_t *face, hb_ot_meta_tag_t meta_tag);
+
+HB_END_DECLS
+
+#endif /* HB_OT_META_H */
diff --git a/thirdparty/harfbuzz/src/hb-ot-metrics.cc b/thirdparty/harfbuzz/src/hb-ot-metrics.cc
new file mode 100644
index 0000000000..181ac4d57e
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-metrics.cc
@@ -0,0 +1,231 @@
+/*
+ * Copyright © 2018-2019 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#include "hb.hh"
+
+#include "hb-ot-var-mvar-table.hh"
+#include "hb-ot-gasp-table.hh" // Just so we compile it; unused otherwise.
+#include "hb-ot-os2-table.hh"
+#include "hb-ot-post-table.hh"
+#include "hb-ot-hhea-table.hh"
+#include "hb-ot-metrics.hh"
+#include "hb-ot-face.hh"
+
+
+static float
+_fix_ascender_descender (float value, hb_ot_metrics_tag_t metrics_tag)
+{
+ if (metrics_tag == HB_OT_METRICS_TAG_HORIZONTAL_ASCENDER ||
+ metrics_tag == HB_OT_METRICS_TAG_VERTICAL_ASCENDER)
+ return fabs ((double) value);
+ if (metrics_tag == HB_OT_METRICS_TAG_HORIZONTAL_DESCENDER ||
+ metrics_tag == HB_OT_METRICS_TAG_VERTICAL_DESCENDER)
+ return -fabs ((double) value);
+ return value;
+}
+
+/* The common part of _get_position logic needed on hb-ot-font and here
+ to be able to have slim builds without the not always needed parts */
+bool
+_hb_ot_metrics_get_position_common (hb_font_t *font,
+ hb_ot_metrics_tag_t metrics_tag,
+ hb_position_t *position /* OUT. May be NULL. */)
+{
+ hb_face_t *face = font->face;
+ switch ((unsigned) metrics_tag)
+ {
+#ifndef HB_NO_VAR
+#define GET_VAR face->table.MVAR->get_var (metrics_tag, font->coords, font->num_coords)
+#else
+#define GET_VAR .0f
+#endif
+#define GET_METRIC_X(TABLE, ATTR) \
+ (face->table.TABLE->has_data () && \
+ (position && (*position = font->em_scalef_x (_fix_ascender_descender ( \
+ face->table.TABLE->ATTR + GET_VAR, metrics_tag))), true))
+#define GET_METRIC_Y(TABLE, ATTR) \
+ (face->table.TABLE->has_data () && \
+ (position && (*position = font->em_scalef_y (_fix_ascender_descender ( \
+ face->table.TABLE->ATTR + GET_VAR, metrics_tag))), true))
+ case HB_OT_METRICS_TAG_HORIZONTAL_ASCENDER:
+ return (face->table.OS2->use_typo_metrics () && GET_METRIC_Y (OS2, sTypoAscender)) ||
+ GET_METRIC_Y (hhea, ascender);
+ case HB_OT_METRICS_TAG_HORIZONTAL_DESCENDER:
+ return (face->table.OS2->use_typo_metrics () && GET_METRIC_Y (OS2, sTypoDescender)) ||
+ GET_METRIC_Y (hhea, descender);
+ case HB_OT_METRICS_TAG_HORIZONTAL_LINE_GAP:
+ return (face->table.OS2->use_typo_metrics () && GET_METRIC_Y (OS2, sTypoLineGap)) ||
+ GET_METRIC_Y (hhea, lineGap);
+ case HB_OT_METRICS_TAG_VERTICAL_ASCENDER: return GET_METRIC_X (vhea, ascender);
+ case HB_OT_METRICS_TAG_VERTICAL_DESCENDER: return GET_METRIC_X (vhea, descender);
+ case HB_OT_METRICS_TAG_VERTICAL_LINE_GAP: return GET_METRIC_X (vhea, lineGap);
+#undef GET_METRIC_Y
+#undef GET_METRIC_X
+#undef GET_VAR
+ default: assert (0); return false;
+ }
+}
+
+#ifndef HB_NO_METRICS
+
+#if 0
+static bool
+_get_gasp (hb_face_t *face, float *result, hb_ot_metrics_tag_t metrics_tag)
+{
+ const OT::GaspRange& range = face->table.gasp->get_gasp_range (metrics_tag - HB_TAG ('g','s','p','0'));
+ if (&range == &Null (OT::GaspRange)) return false;
+ if (result) *result = range.rangeMaxPPEM + font->face->table.MVAR->get_var (metrics_tag, font->coords, font->num_coords);
+ return true;
+}
+#endif
+
+/* Private tags for https://github.com/harfbuzz/harfbuzz/issues/1866 */
+#define _HB_OT_METRICS_TAG_HORIZONTAL_ASCENDER_OS2 HB_TAG ('O','a','s','c')
+#define _HB_OT_METRICS_TAG_HORIZONTAL_ASCENDER_HHEA HB_TAG ('H','a','s','c')
+#define _HB_OT_METRICS_TAG_HORIZONTAL_DESCENDER_OS2 HB_TAG ('O','d','s','c')
+#define _HB_OT_METRICS_TAG_HORIZONTAL_DESCENDER_HHEA HB_TAG ('H','d','s','c')
+#define _HB_OT_METRICS_TAG_HORIZONTAL_LINE_GAP_OS2 HB_TAG ('O','l','g','p')
+#define _HB_OT_METRICS_TAG_HORIZONTAL_LINE_GAP_HHEA HB_TAG ('H','l','g','p')
+
+/**
+ * hb_ot_metrics_get_position:
+ * @font: a #hb_font_t object.
+ * @metrics_tag: tag of metrics value you like to fetch.
+ * @position: (out) (optional): result of metrics value from the font.
+ *
+ * It fetches metrics value corresponding to a given tag from a font.
+ *
+ * Returns: Whether found the requested metrics in the font.
+ * Since: 2.6.0
+ **/
+hb_bool_t
+hb_ot_metrics_get_position (hb_font_t *font,
+ hb_ot_metrics_tag_t metrics_tag,
+ hb_position_t *position /* OUT. May be NULL. */)
+{
+ hb_face_t *face = font->face;
+ switch ((unsigned) metrics_tag)
+ {
+ case HB_OT_METRICS_TAG_HORIZONTAL_ASCENDER:
+ case HB_OT_METRICS_TAG_HORIZONTAL_DESCENDER:
+ case HB_OT_METRICS_TAG_HORIZONTAL_LINE_GAP:
+ case HB_OT_METRICS_TAG_VERTICAL_ASCENDER:
+ case HB_OT_METRICS_TAG_VERTICAL_DESCENDER:
+ case HB_OT_METRICS_TAG_VERTICAL_LINE_GAP: return _hb_ot_metrics_get_position_common (font, metrics_tag, position);
+#ifndef HB_NO_VAR
+#define GET_VAR hb_ot_metrics_get_variation (font, metrics_tag)
+#else
+#define GET_VAR 0
+#endif
+#define GET_METRIC_X(TABLE, ATTR) \
+ (face->table.TABLE->has_data () && \
+ (position && (*position = font->em_scalef_x (face->table.TABLE->ATTR + GET_VAR)), true))
+#define GET_METRIC_Y(TABLE, ATTR) \
+ (face->table.TABLE->has_data () && \
+ (position && (*position = font->em_scalef_y (face->table.TABLE->ATTR + GET_VAR)), true))
+ case HB_OT_METRICS_TAG_HORIZONTAL_CLIPPING_ASCENT: return GET_METRIC_Y (OS2, usWinAscent);
+ case HB_OT_METRICS_TAG_HORIZONTAL_CLIPPING_DESCENT: return GET_METRIC_Y (OS2, usWinDescent);
+ case HB_OT_METRICS_TAG_HORIZONTAL_CARET_RISE: return GET_METRIC_Y (hhea, caretSlopeRise);
+ case HB_OT_METRICS_TAG_HORIZONTAL_CARET_RUN: return GET_METRIC_X (hhea, caretSlopeRun);
+ case HB_OT_METRICS_TAG_HORIZONTAL_CARET_OFFSET: return GET_METRIC_X (hhea, caretOffset);
+ case HB_OT_METRICS_TAG_VERTICAL_CARET_RISE: return GET_METRIC_X (vhea, caretSlopeRise);
+ case HB_OT_METRICS_TAG_VERTICAL_CARET_RUN: return GET_METRIC_Y (vhea, caretSlopeRun);
+ case HB_OT_METRICS_TAG_VERTICAL_CARET_OFFSET: return GET_METRIC_Y (vhea, caretOffset);
+ case HB_OT_METRICS_TAG_X_HEIGHT: return GET_METRIC_Y (OS2->v2 (), sxHeight);
+ case HB_OT_METRICS_TAG_CAP_HEIGHT: return GET_METRIC_Y (OS2->v2 (), sCapHeight);
+ case HB_OT_METRICS_TAG_SUBSCRIPT_EM_X_SIZE: return GET_METRIC_X (OS2, ySubscriptXSize);
+ case HB_OT_METRICS_TAG_SUBSCRIPT_EM_Y_SIZE: return GET_METRIC_Y (OS2, ySubscriptYSize);
+ case HB_OT_METRICS_TAG_SUBSCRIPT_EM_X_OFFSET: return GET_METRIC_X (OS2, ySubscriptXOffset);
+ case HB_OT_METRICS_TAG_SUBSCRIPT_EM_Y_OFFSET: return GET_METRIC_Y (OS2, ySubscriptYOffset);
+ case HB_OT_METRICS_TAG_SUPERSCRIPT_EM_X_SIZE: return GET_METRIC_X (OS2, ySuperscriptXSize);
+ case HB_OT_METRICS_TAG_SUPERSCRIPT_EM_Y_SIZE: return GET_METRIC_Y (OS2, ySuperscriptYSize);
+ case HB_OT_METRICS_TAG_SUPERSCRIPT_EM_X_OFFSET: return GET_METRIC_X (OS2, ySuperscriptXOffset);
+ case HB_OT_METRICS_TAG_SUPERSCRIPT_EM_Y_OFFSET: return GET_METRIC_Y (OS2, ySuperscriptYOffset);
+ case HB_OT_METRICS_TAG_STRIKEOUT_SIZE: return GET_METRIC_Y (OS2, yStrikeoutSize);
+ case HB_OT_METRICS_TAG_STRIKEOUT_OFFSET: return GET_METRIC_Y (OS2, yStrikeoutPosition);
+ case HB_OT_METRICS_TAG_UNDERLINE_SIZE: return GET_METRIC_Y (post->table, underlineThickness);
+ case HB_OT_METRICS_TAG_UNDERLINE_OFFSET: return GET_METRIC_Y (post->table, underlinePosition);
+
+ /* Private tags */
+ case _HB_OT_METRICS_TAG_HORIZONTAL_ASCENDER_OS2: return GET_METRIC_Y (OS2, sTypoAscender);
+ case _HB_OT_METRICS_TAG_HORIZONTAL_ASCENDER_HHEA: return GET_METRIC_Y (hhea, ascender);
+ case _HB_OT_METRICS_TAG_HORIZONTAL_DESCENDER_OS2: return GET_METRIC_Y (OS2, sTypoDescender);
+ case _HB_OT_METRICS_TAG_HORIZONTAL_DESCENDER_HHEA: return GET_METRIC_Y (hhea, descender);
+ case _HB_OT_METRICS_TAG_HORIZONTAL_LINE_GAP_OS2: return GET_METRIC_Y (OS2, sTypoLineGap);
+ case _HB_OT_METRICS_TAG_HORIZONTAL_LINE_GAP_HHEA: return GET_METRIC_Y (hhea, lineGap);
+#undef GET_METRIC_Y
+#undef GET_METRIC_X
+#undef GET_VAR
+ default: return false;
+ }
+}
+
+#ifndef HB_NO_VAR
+/**
+ * hb_ot_metrics_get_variation:
+ * @font:
+ * @metrics_tag:
+ *
+ * Returns:
+ *
+ * Since: 2.6.0
+ **/
+float
+hb_ot_metrics_get_variation (hb_font_t *font, hb_ot_metrics_tag_t metrics_tag)
+{
+ return font->face->table.MVAR->get_var (metrics_tag, font->coords, font->num_coords);
+}
+
+/**
+ * hb_ot_metrics_get_x_variation:
+ * @font:
+ * @metrics_tag:
+ *
+ * Returns:
+ *
+ * Since: 2.6.0
+ **/
+hb_position_t
+hb_ot_metrics_get_x_variation (hb_font_t *font, hb_ot_metrics_tag_t metrics_tag)
+{
+ return font->em_scalef_x (hb_ot_metrics_get_variation (font, metrics_tag));
+}
+
+/**
+ * hb_ot_metrics_get_y_variation:
+ * @font:
+ * @metrics_tag:
+ *
+ * Returns:
+ *
+ * Since: 2.6.0
+ **/
+hb_position_t
+hb_ot_metrics_get_y_variation (hb_font_t *font, hb_ot_metrics_tag_t metrics_tag)
+{
+ return font->em_scalef_y (hb_ot_metrics_get_variation (font, metrics_tag));
+}
+#endif
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-ot-metrics.h b/thirdparty/harfbuzz/src/hb-ot-metrics.h
new file mode 100644
index 0000000000..42c7363c03
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-metrics.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright © 2018 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#ifndef HB_OT_H_IN
+#error "Include <hb-ot.h> instead."
+#endif
+
+#ifndef HB_OT_METRICS_H
+#define HB_OT_METRICS_H
+
+#include "hb.h"
+#include "hb-ot-name.h"
+
+HB_BEGIN_DECLS
+
+
+/**
+ * hb_ot_metrics_tag_t:
+ * @HB_OT_METRICS_TAG_HORIZONTAL_ASCENDER: horizontal ascender.
+ * @HB_OT_METRICS_TAG_HORIZONTAL_DESCENDER: horizontal descender.
+ * @HB_OT_METRICS_TAG_HORIZONTAL_LINE_GAP: horizontal line gap.
+ * @HB_OT_METRICS_TAG_HORIZONTAL_CLIPPING_ASCENT: horizontal clipping ascent.
+ * @HB_OT_METRICS_TAG_HORIZONTAL_CLIPPING_DESCENT: horizontal clipping descent.
+ * @HB_OT_METRICS_TAG_VERTICAL_ASCENDER: vertical ascender.
+ * @HB_OT_METRICS_TAG_VERTICAL_DESCENDER: vertical descender.
+ * @HB_OT_METRICS_TAG_VERTICAL_LINE_GAP: vertical line gap.
+ * @HB_OT_METRICS_TAG_HORIZONTAL_CARET_RISE: horizontal caret rise.
+ * @HB_OT_METRICS_TAG_HORIZONTAL_CARET_RUN: horizontal caret run.
+ * @HB_OT_METRICS_TAG_HORIZONTAL_CARET_OFFSET: horizontal caret offset.
+ * @HB_OT_METRICS_TAG_VERTICAL_CARET_RISE: vertical caret rise.
+ * @HB_OT_METRICS_TAG_VERTICAL_CARET_RUN: vertical caret run.
+ * @HB_OT_METRICS_TAG_VERTICAL_CARET_OFFSET: vertical caret offset.
+ * @HB_OT_METRICS_TAG_X_HEIGHT: x height.
+ * @HB_OT_METRICS_TAG_CAP_HEIGHT: cap height.
+ * @HB_OT_METRICS_TAG_SUBSCRIPT_EM_X_SIZE: subscript em x size.
+ * @HB_OT_METRICS_TAG_SUBSCRIPT_EM_Y_SIZE: subscript em y size.
+ * @HB_OT_METRICS_TAG_SUBSCRIPT_EM_X_OFFSET: subscript em x offset.
+ * @HB_OT_METRICS_TAG_SUBSCRIPT_EM_Y_OFFSET: subscript em y offset.
+ * @HB_OT_METRICS_TAG_SUPERSCRIPT_EM_X_SIZE: superscript em x size.
+ * @HB_OT_METRICS_TAG_SUPERSCRIPT_EM_Y_SIZE: superscript em y size.
+ * @HB_OT_METRICS_TAG_SUPERSCRIPT_EM_X_OFFSET: superscript em x offset.
+ * @HB_OT_METRICS_TAG_SUPERSCRIPT_EM_Y_OFFSET: superscript em y offset.
+ * @HB_OT_METRICS_TAG_STRIKEOUT_SIZE: strikeout size.
+ * @HB_OT_METRICS_TAG_STRIKEOUT_OFFSET: strikeout offset.
+ * @HB_OT_METRICS_TAG_UNDERLINE_SIZE: underline size.
+ * @HB_OT_METRICS_TAG_UNDERLINE_OFFSET: underline offset.
+ *
+ * From https://docs.microsoft.com/en-us/typography/opentype/spec/mvar#value-tags
+ *
+ * Since: 2.6.0
+ **/
+typedef enum {
+ HB_OT_METRICS_TAG_HORIZONTAL_ASCENDER = HB_TAG ('h','a','s','c'),
+ HB_OT_METRICS_TAG_HORIZONTAL_DESCENDER = HB_TAG ('h','d','s','c'),
+ HB_OT_METRICS_TAG_HORIZONTAL_LINE_GAP = HB_TAG ('h','l','g','p'),
+ HB_OT_METRICS_TAG_HORIZONTAL_CLIPPING_ASCENT = HB_TAG ('h','c','l','a'),
+ HB_OT_METRICS_TAG_HORIZONTAL_CLIPPING_DESCENT = HB_TAG ('h','c','l','d'),
+ HB_OT_METRICS_TAG_VERTICAL_ASCENDER = HB_TAG ('v','a','s','c'),
+ HB_OT_METRICS_TAG_VERTICAL_DESCENDER = HB_TAG ('v','d','s','c'),
+ HB_OT_METRICS_TAG_VERTICAL_LINE_GAP = HB_TAG ('v','l','g','p'),
+ HB_OT_METRICS_TAG_HORIZONTAL_CARET_RISE = HB_TAG ('h','c','r','s'),
+ HB_OT_METRICS_TAG_HORIZONTAL_CARET_RUN = HB_TAG ('h','c','r','n'),
+ HB_OT_METRICS_TAG_HORIZONTAL_CARET_OFFSET = HB_TAG ('h','c','o','f'),
+ HB_OT_METRICS_TAG_VERTICAL_CARET_RISE = HB_TAG ('v','c','r','s'),
+ HB_OT_METRICS_TAG_VERTICAL_CARET_RUN = HB_TAG ('v','c','r','n'),
+ HB_OT_METRICS_TAG_VERTICAL_CARET_OFFSET = HB_TAG ('v','c','o','f'),
+ HB_OT_METRICS_TAG_X_HEIGHT = HB_TAG ('x','h','g','t'),
+ HB_OT_METRICS_TAG_CAP_HEIGHT = HB_TAG ('c','p','h','t'),
+ HB_OT_METRICS_TAG_SUBSCRIPT_EM_X_SIZE = HB_TAG ('s','b','x','s'),
+ HB_OT_METRICS_TAG_SUBSCRIPT_EM_Y_SIZE = HB_TAG ('s','b','y','s'),
+ HB_OT_METRICS_TAG_SUBSCRIPT_EM_X_OFFSET = HB_TAG ('s','b','x','o'),
+ HB_OT_METRICS_TAG_SUBSCRIPT_EM_Y_OFFSET = HB_TAG ('s','b','y','o'),
+ HB_OT_METRICS_TAG_SUPERSCRIPT_EM_X_SIZE = HB_TAG ('s','p','x','s'),
+ HB_OT_METRICS_TAG_SUPERSCRIPT_EM_Y_SIZE = HB_TAG ('s','p','y','s'),
+ HB_OT_METRICS_TAG_SUPERSCRIPT_EM_X_OFFSET = HB_TAG ('s','p','x','o'),
+ HB_OT_METRICS_TAG_SUPERSCRIPT_EM_Y_OFFSET = HB_TAG ('s','p','y','o'),
+ HB_OT_METRICS_TAG_STRIKEOUT_SIZE = HB_TAG ('s','t','r','s'),
+ HB_OT_METRICS_TAG_STRIKEOUT_OFFSET = HB_TAG ('s','t','r','o'),
+ HB_OT_METRICS_TAG_UNDERLINE_SIZE = HB_TAG ('u','n','d','s'),
+ HB_OT_METRICS_TAG_UNDERLINE_OFFSET = HB_TAG ('u','n','d','o'),
+
+ _HB_OT_METRICS_TAG_MAX_VALUE = HB_TAG_MAX_SIGNED /*< skip >*/
+} hb_ot_metrics_tag_t;
+
+HB_EXTERN hb_bool_t
+hb_ot_metrics_get_position (hb_font_t *font,
+ hb_ot_metrics_tag_t metrics_tag,
+ hb_position_t *position /* OUT. May be NULL. */);
+
+HB_EXTERN float
+hb_ot_metrics_get_variation (hb_font_t *font, hb_ot_metrics_tag_t metrics_tag);
+
+HB_EXTERN hb_position_t
+hb_ot_metrics_get_x_variation (hb_font_t *font, hb_ot_metrics_tag_t metrics_tag);
+
+HB_EXTERN hb_position_t
+hb_ot_metrics_get_y_variation (hb_font_t *font, hb_ot_metrics_tag_t metrics_tag);
+
+HB_END_DECLS
+
+#endif /* HB_OT_METRICS_H */
diff --git a/thirdparty/harfbuzz/src/hb-ot-metrics.hh b/thirdparty/harfbuzz/src/hb-ot-metrics.hh
new file mode 100644
index 0000000000..19a5e9ed41
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-metrics.hh
@@ -0,0 +1,35 @@
+/*
+ * Copyright © 2018 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#ifndef HB_OT_METRICS_HH
+#define HB_OT_METRICS_HH
+
+#include "hb.hh"
+
+HB_INTERNAL bool
+_hb_ot_metrics_get_position_common (hb_font_t *font,
+ hb_ot_metrics_tag_t metrics_tag,
+ hb_position_t *position /* OUT. May be NULL. */);
+
+#endif /* HB_OT_METRICS_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-name-language-static.hh b/thirdparty/harfbuzz/src/hb-ot-name-language-static.hh
new file mode 100644
index 0000000000..c496dc2981
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-name-language-static.hh
@@ -0,0 +1,456 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_NAME_LANGUAGE_STATIC_HH
+#define HB_OT_NAME_LANGUAGE_STATIC_HH
+
+#include "hb-ot-name-language.hh"
+
+/* Following two tables were generated by joining FreeType, FontConfig,
+ * and OpenType specification language lists, then filled in missing
+ * entries using:
+ * https://docs.microsoft.com/en-us/windows/desktop/intl/language-identifier-constants-and-strings
+ */
+
+struct hb_ot_language_map_t
+{
+ int cmp (unsigned int key) const
+ { return key < code ? -1 : key > code ? +1 : 0; }
+
+ uint16_t code;
+ char lang[6];
+};
+
+static const hb_ot_language_map_t
+hb_ms_language_map[] =
+{
+ {0x0001, "ar"}, /* ??? */
+ {0x0004, "zh"}, /* ??? */
+ {0x0009, "en"}, /* ??? */
+ {0x0401, "ar"}, /* Arabic (Saudi Arabia) */
+ {0x0402, "bg"}, /* Bulgarian (Bulgaria) */
+ {0x0403, "ca"}, /* Catalan (Catalan) */
+ {0x0404, "zh-tw"}, /* Chinese (Taiwan) */
+ {0x0405, "cs"}, /* Czech (Czech Republic) */
+ {0x0406, "da"}, /* Danish (Denmark) */
+ {0x0407, "de"}, /* German (Germany) */
+ {0x0408, "el"}, /* Greek (Greece) */
+ {0x0409, "en"}, /* English (United States) */
+ {0x040A, "es"}, /* Spanish (Traditional Sort) (Spain) */
+ {0x040B, "fi"}, /* Finnish (Finland) */
+ {0x040C, "fr"}, /* French (France) */
+ {0x040D, "he"}, /* Hebrew (Israel) */
+ {0x040E, "hu"}, /* Hungarian (Hungary) */
+ {0x040F, "is"}, /* Icelandic (Iceland) */
+ {0x0410, "it"}, /* Italian (Italy) */
+ {0x0411, "ja"}, /* Japanese (Japan) */
+ {0x0412, "ko"}, /* Korean (Korea) */
+ {0x0413, "nl"}, /* Dutch (Netherlands) */
+ {0x0414, "no"}, /* Norwegian (Bokmal) (Norway) */
+ {0x0415, "pl"}, /* Polish (Poland) */
+ {0x0416, "pt"}, /* Portuguese (Brazil) */
+ {0x0417, "rm"}, /* Romansh (Switzerland) */
+ {0x0418, "ro"}, /* Romanian (Romania) */
+ {0x0419, "ru"}, /* Russian (Russia) */
+ {0x041A, "hr"}, /* Croatian (Croatia) */
+ {0x041B, "sk"}, /* Slovak (Slovakia) */
+ {0x041C, "sq"}, /* Albanian (Albania) */
+ {0x041D, "sv"}, /* Swedish (Sweden) */
+ {0x041E, "th"}, /* Thai (Thailand) */
+ {0x041F, "tr"}, /* Turkish (Turkey) */
+ {0x0420, "ur"}, /* Urdu (Islamic Republic of Pakistan) */
+ {0x0421, "id"}, /* Indonesian (Indonesia) */
+ {0x0422, "uk"}, /* Ukrainian (Ukraine) */
+ {0x0423, "be"}, /* Belarusian (Belarus) */
+ {0x0424, "sl"}, /* Slovenian (Slovenia) */
+ {0x0425, "et"}, /* Estonian (Estonia) */
+ {0x0426, "lv"}, /* Latvian (Latvia) */
+ {0x0427, "lt"}, /* Lithuanian (Lithuania) */
+ {0x0428, "tg"}, /* Tajik (Cyrillic) (Tajikistan) */
+ {0x0429, "fa"}, /* Persian (Iran) */
+ {0x042A, "vi"}, /* Vietnamese (Vietnam) */
+ {0x042B, "hy"}, /* Armenian (Armenia) */
+ {0x042C, "az"}, /* Azeri (Latin) (Azerbaijan) */
+ {0x042D, "eu"}, /* Basque (Basque) */
+ {0x042E, "hsb"}, /* Upper Sorbian (Germany) */
+ {0x042F, "mk"}, /* Macedonian (FYROM) (Former Yugoslav Republic of Macedonia) */
+ {0x0430, "st"}, /* ??? */
+ {0x0431, "ts"}, /* ??? */
+ {0x0432, "tn"}, /* Setswana (South Africa) */
+ {0x0433, "ven"}, /* ??? */
+ {0x0434, "xh"}, /* isiXhosa (South Africa) */
+ {0x0435, "zu"}, /* isiZulu (South Africa) */
+ {0x0436, "af"}, /* Afrikaans (South Africa) */
+ {0x0437, "ka"}, /* Georgian (Georgia) */
+ {0x0438, "fo"}, /* Faroese (Faroe Islands) */
+ {0x0439, "hi"}, /* Hindi (India) */
+ {0x043A, "mt"}, /* Maltese (Malta) */
+ {0x043B, "se"}, /* Sami (Northern) (Norway) */
+ {0x043C, "ga"}, /* ??? */
+ {0x043D, "yi"}, /* ??? */
+ {0x043E, "ms"}, /* Malay (Malaysia) */
+ {0x043F, "kk"}, /* Kazakh (Kazakhstan) */
+ {0x0440, "ky"}, /* Kyrgyz (Kyrgyzstan) */
+ {0x0441, "sw"}, /* Kiswahili (Kenya) */
+ {0x0442, "tk"}, /* Turkmen (Turkmenistan) */
+ {0x0443, "uz"}, /* Uzbek (Latin) (Uzbekistan) */
+ {0x0444, "tt"}, /* Tatar (Russia) */
+ {0x0445, "bn"}, /* Bengali (India) */
+ {0x0446, "pa"}, /* Punjabi (India) */
+ {0x0447, "gu"}, /* Gujarati (India) */
+ {0x0448, "or"}, /* Odia (formerly Oriya) (India) */
+ {0x0449, "ta"}, /* Tamil (India) */
+ {0x044A, "te"}, /* Telugu (India) */
+ {0x044B, "kn"}, /* Kannada (India) */
+ {0x044C, "ml"}, /* Malayalam (India) */
+ {0x044D, "as"}, /* Assamese (India) */
+ {0x044E, "mr"}, /* Marathi (India) */
+ {0x044F, "sa"}, /* Sanskrit (India) */
+ {0x0450, "mn"}, /* Mongolian (Cyrillic) (Mongolia) */
+ {0x0451, "bo"}, /* Tibetan (PRC) */
+ {0x0452, "cy"}, /* Welsh (United Kingdom) */
+ {0x0453, "km"}, /* Khmer (Cambodia) */
+ {0x0454, "lo"}, /* Lao (Lao P.D.R.) */
+ {0x0455, "my"}, /* ??? */
+ {0x0456, "gl"}, /* Galician (Galician) */
+ {0x0457, "kok"}, /* Konkani (India) */
+ {0x0458, "mni"}, /* ??? */
+ {0x0459, "sd"}, /* ??? */
+ {0x045A, "syr"}, /* Syriac (Syria) */
+ {0x045B, "si"}, /* Sinhala (Sri Lanka) */
+ {0x045C, "chr"}, /* ??? */
+ {0x045D, "iu"}, /* Inuktitut (Canada) */
+ {0x045E, "am"}, /* Amharic (Ethiopia) */
+ {0x0460, "ks"}, /* ??? */
+ {0x0461, "ne"}, /* Nepali (Nepal) */
+ {0x0462, "fy"}, /* Frisian (Netherlands) */
+ {0x0463, "ps"}, /* Pashto (Afghanistan) */
+ {0x0464, "phi"}, /* Filipino (Philippines) */
+ {0x0465, "div"}, /* Divehi (Maldives) */
+ {0x0468, "ha"}, /* Hausa (Latin) (Nigeria) */
+ {0x046A, "yo"}, /* Yoruba (Nigeria) */
+ {0x046B, "quz"}, /* Quechua (Bolivia) */
+ {0x046C, "nso"}, /* Sesotho sa Leboa (South Africa) */
+ {0x046D, "ba"}, /* Bashkir (Russia) */
+ {0x046E, "lb"}, /* Luxembourgish (Luxembourg) */
+ {0x046F, "kl"}, /* Greenlandic (Greenland) */
+ {0x0470, "ibo"}, /* Igbo (Nigeria) */
+ {0x0471, "kau"}, /* ??? */
+ {0x0472, "om"}, /* ??? */
+ {0x0473, "ti"}, /* ??? */
+ {0x0474, "gn"}, /* ??? */
+ {0x0475, "haw"}, /* ??? */
+ {0x0476, "la"}, /* ??? */
+ {0x0477, "so"}, /* ??? */
+ {0x0478, "ii"}, /* Yi (PRC) */
+ {0x0479, "pap"}, /* ??? */
+ {0x047A, "arn"}, /* Mapudungun (Chile) */
+ {0x047C, "moh"}, /* Mohawk (Mohawk) */
+ {0x047E, "br"}, /* Breton (France) */
+ {0x0480, "ug"}, /* Uighur (PRC) */
+ {0x0481, "mi"}, /* Maori (New Zealand) */
+ {0x0482, "oc"}, /* Occitan (France) */
+ {0x0483, "co"}, /* Corsican (France) */
+ {0x0484, "gsw"}, /* Alsatian (France) */
+ {0x0485, "sah"}, /* Yakut (Russia) */
+ {0x0486, "qut"}, /* K'iche (Guatemala) */
+ {0x0487, "rw"}, /* Kinyarwanda (Rwanda) */
+ {0x0488, "wo"}, /* Wolof (Senegal) */
+ {0x048C, "fa"}, /* Dari (Afghanistan) */
+ {0x0801, "ar"}, /* Arabic (Iraq) */
+ {0x0804, "zh-cn"}, /* Chinese (People’s Republic of China) */
+ {0x0807, "de"}, /* German (Switzerland) */
+ {0x0809, "en"}, /* English (United Kingdom) */
+ {0x080A, "es"}, /* Spanish (Mexico) */
+ {0x080C, "fr"}, /* French (Belgium) */
+ {0x0810, "it"}, /* Italian (Switzerland) */
+ {0x0812, "ko"}, /* ??? */
+ {0x0813, "nl"}, /* Dutch (Belgium) */
+ {0x0814, "nn"}, /* Norwegian (Nynorsk) (Norway) */
+ {0x0816, "pt"}, /* Portuguese (Portugal) */
+ {0x0818, "mo"}, /* ??? */
+ {0x0819, "ru"}, /* ??? */
+ {0x081A, "sr"}, /* Serbian (Latin) (Serbia) */
+ {0x081D, "sv"}, /* Sweden (Finland) */
+ {0x0820, "ur"}, /* ??? */
+ {0x0827, "lt"}, /* ??? */
+ {0x082C, "az"}, /* Azeri (Cyrillic) (Azerbaijan) */
+ {0x082E, "dsb"}, /* Lower Sorbian (Germany) */
+//{0x083B, ""}, /* Sami (Northern) (Sweden) */
+ {0x083C, "gd"}, /* Irish (Ireland) */
+ {0x083E, "ms"}, /* Malay (Brunei Darussalam) */
+ {0x0843, "uz"}, /* Uzbek (Cyrillic) (Uzbekistan) */
+ {0x0845, "bn"}, /* Bengali (Bangladesh) */
+ {0x0846, "ar"}, /* ??? */
+ {0x0850, "mn"}, /* Mongolian (Traditional) (People’s Republic of China) */
+ {0x0851, "dz"}, /* ??? */
+ {0x085D, "iu"}, /* Inuktitut (Latin) (Canada) */
+ {0x085F, "tzm"}, /* Tamazight (Latin) (Algeria) */
+ {0x0861, "ne"}, /* ??? */
+//{0x086B, ""}, /* Quechua (Ecuador) */
+ {0x0873, "ti"}, /* ??? */
+ {0x0C01, "ar"}, /* Arabic (Egypt) */
+ {0x0C04, "zh-hk"}, /* Chinese (Hong Kong S.A.R.) */
+ {0x0C07, "de"}, /* German (Austria) */
+ {0x0C09, "en"}, /* English (Australia) */
+ {0x0C0A, "es"}, /* Spanish (Modern Sort) (Spain) */
+ {0x0C0C, "fr"}, /* French (Canada) */
+ {0x0C1A, "sr"}, /* Serbian (Cyrillic) (Serbia) */
+ {0x0C3B, "se"}, /* Sami (Northern) (Finland) */
+//{0x0C6B, ""}, /* Quechua (Peru) */
+ {0x1001, "ar"}, /* Arabic (Libya) */
+ {0x1004, "zh-sg"}, /* Chinese (Singapore) */
+ {0x1007, "de"}, /* German (Luxembourg) */
+ {0x1009, "en"}, /* English (Canada) */
+ {0x100A, "es"}, /* Spanish (Guatemala) */
+ {0x100C, "fr"}, /* French (Switzerland) */
+ {0x101A, "hr"}, /* Croatian (Latin) (Bosnia and Herzegovina) */
+ {0x103B, "smj"}, /* Sami (Lule) (Norway) */
+ {0x1401, "ar"}, /* Arabic (Algeria) */
+//{0x1404, ""}, /* Chinese (Macao S.A.R.) */
+ {0x1407, "de"}, /* German (Liechtenstein) */
+ {0x1409, "en"}, /* English (New Zealand) */
+ {0x140A, "es"}, /* Spanish (Costa Rica) */
+ {0x140C, "fr"}, /* French (Luxembourg) */
+ {0x141A, "bs"}, /* Bosnian (Latin) (Bosnia and Herzegovina) */
+//{0x143B, ""}, /* Sami (Lule) (Sweden) */
+ {0x1801, "ar"}, /* Arabic (Morocco) */
+ {0x1809, "en"}, /* English (Ireland) */
+ {0x180A, "es"}, /* Spanish (Panama) */
+ {0x180C, "fr"}, /* French (Principality of Monaco) */
+//{0x181A, ""}, /* Serbian (Latin) (Bosnia and Herzegovina) */
+ {0x183B, "sma"}, /* Sami (Southern) (Norway) */
+ {0x1C01, "ar"}, /* Arabic (Tunisia) */
+ {0x1C09, "en"}, /* English (South Africa) */
+ {0x1C0A, "es"}, /* Spanish (Dominican Republic) */
+ {0x1C0C, "fr"}, /* ??? */
+//{0x1C1A, ""}, /* Serbian (Cyrillic) (Bosnia and Herzegovina) */
+//{0x1C3B, ""}, /* Sami (Southern) (Sweden) */
+ {0x2001, "ar"}, /* Arabic (Oman) */
+ {0x2009, "en"}, /* English (Jamaica) */
+ {0x200A, "es"}, /* Spanish (Venezuela) */
+ {0x200C, "fr"}, /* ??? */
+ {0x201A, "bs"}, /* Bosnian (Cyrillic) (Bosnia and Herzegovina) */
+ {0x203B, "sms"}, /* Sami (Skolt) (Finland) */
+ {0x2401, "ar"}, /* Arabic (Yemen) */
+ {0x2409, "en"}, /* English (Caribbean) */
+ {0x240A, "es"}, /* Spanish (Colombia) */
+ {0x240C, "fr"}, /* ??? */
+ {0x243B, "smn"}, /* Sami (Inari) (Finland) */
+ {0x2801, "ar"}, /* Arabic (Syria) */
+ {0x2809, "en"}, /* English (Belize) */
+ {0x280A, "es"}, /* Spanish (Peru) */
+ {0x280C, "fr"}, /* ??? */
+ {0x2C01, "ar"}, /* Arabic (Jordan) */
+ {0x2C09, "en"}, /* English (Trinidad and Tobago) */
+ {0x2C0A, "es"}, /* Spanish (Argentina) */
+ {0x2C0C, "fr"}, /* ??? */
+ {0x3001, "ar"}, /* Arabic (Lebanon) */
+ {0x3009, "en"}, /* English (Zimbabwe) */
+ {0x300A, "es"}, /* Spanish (Ecuador) */
+ {0x300C, "fr"}, /* ??? */
+ {0x3401, "ar"}, /* Arabic (Kuwait) */
+ {0x3409, "en"}, /* English (Republic of the Philippines) */
+ {0x340A, "es"}, /* Spanish (Chile) */
+ {0x340C, "fr"}, /* ??? */
+ {0x3801, "ar"}, /* Arabic (U.A.E.) */
+ {0x380A, "es"}, /* Spanish (Uruguay) */
+ {0x380C, "fr"}, /* ??? */
+ {0x3C01, "ar"}, /* Arabic (Bahrain) */
+ {0x3C09, "en"}, /* ??? */
+ {0x3C0A, "es"}, /* Spanish (Paraguay) */
+ {0x3C0C, "fr"}, /* ??? */
+ {0x4001, "ar"}, /* Arabic (Qatar) */
+ {0x4009, "en"}, /* English (India) */
+ {0x400A, "es"}, /* Spanish (Bolivia) */
+ {0x4409, "en"}, /* English (Malaysia) */
+ {0x440A, "es"}, /* Spanish (El Salvador) */
+ {0x4809, "en"}, /* English (Singapore) */
+ {0x480A, "es"}, /* Spanish (Honduras) */
+ {0x4C0A, "es"}, /* Spanish (Nicaragua) */
+ {0x500A, "es"}, /* Spanish (Puerto Rico) */
+ {0x540A, "es"}, /* Spanish (United States) */
+ {0xE40A, "es"}, /* ??? */
+ {0xE40C, "fr"}, /* ??? */
+};
+
+static const hb_ot_language_map_t
+hb_mac_language_map[] =
+{
+ { 0, "en"}, /* English */
+ { 1, "fr"}, /* French */
+ { 2, "de"}, /* German */
+ { 3, "it"}, /* Italian */
+ { 4, "nl"}, /* Dutch */
+ { 5, "sv"}, /* Swedish */
+ { 6, "es"}, /* Spanish */
+ { 7, "da"}, /* Danish */
+ { 8, "pt"}, /* Portuguese */
+ { 9, "no"}, /* Norwegian */
+ { 10, "he"}, /* Hebrew */
+ { 11, "ja"}, /* Japanese */
+ { 12, "ar"}, /* Arabic */
+ { 13, "fi"}, /* Finnish */
+ { 14, "el"}, /* Greek */
+ { 15, "is"}, /* Icelandic */
+ { 16, "mt"}, /* Maltese */
+ { 17, "tr"}, /* Turkish */
+ { 18, "hr"}, /* Croatian */
+ { 19, "zh-tw"}, /* Chinese (Traditional) */
+ { 20, "ur"}, /* Urdu */
+ { 21, "hi"}, /* Hindi */
+ { 22, "th"}, /* Thai */
+ { 23, "ko"}, /* Korean */
+ { 24, "lt"}, /* Lithuanian */
+ { 25, "pl"}, /* Polish */
+ { 26, "hu"}, /* Hungarian */
+ { 27, "et"}, /* Estonian */
+ { 28, "lv"}, /* Latvian */
+//{ 29, ""}, /* Sami */
+ { 30, "fo"}, /* Faroese */
+ { 31, "fa"}, /* Farsi/Persian */
+ { 32, "ru"}, /* Russian */
+ { 33, "zh-cn"}, /* Chinese (Simplified) */
+ { 34, "nl"}, /* Flemish */
+ { 35, "ga"}, /* Irish Gaelic */
+ { 36, "sq"}, /* Albanian */
+ { 37, "ro"}, /* Romanian */
+ { 38, "cs"}, /* Czech */
+ { 39, "sk"}, /* Slovak */
+ { 40, "sl"}, /* Slovenian */
+ { 41, "yi"}, /* Yiddish */
+ { 42, "sr"}, /* Serbian */
+ { 43, "mk"}, /* Macedonian */
+ { 44, "bg"}, /* Bulgarian */
+ { 45, "uk"}, /* Ukrainian */
+ { 46, "be"}, /* Byelorussian */
+ { 47, "uz"}, /* Uzbek */
+ { 48, "kk"}, /* Kazakh */
+ { 49, "az"}, /* Azerbaijani (Cyrillic script) */
+ { 50, "az"}, /* Azerbaijani (Arabic script) */
+ { 51, "hy"}, /* Armenian */
+ { 52, "ka"}, /* Georgian */
+ { 53, "mo"}, /* Moldavian */
+ { 54, "ky"}, /* Kirghiz */
+ { 55, "tg"}, /* Tajiki */
+ { 56, "tk"}, /* Turkmen */
+ { 57, "mn"}, /* Mongolian (Mongolian script) */
+ { 58, "mn"}, /* Mongolian (Cyrillic script) */
+ { 59, "ps"}, /* Pashto */
+ { 60, "ku"}, /* Kurdish */
+ { 61, "ks"}, /* Kashmiri */
+ { 62, "sd"}, /* Sindhi */
+ { 63, "bo"}, /* Tibetan */
+ { 64, "ne"}, /* Nepali */
+ { 65, "sa"}, /* Sanskrit */
+ { 66, "mr"}, /* Marathi */
+ { 67, "bn"}, /* Bengali */
+ { 68, "as"}, /* Assamese */
+ { 69, "gu"}, /* Gujarati */
+ { 70, "pa"}, /* Punjabi */
+ { 71, "or"}, /* Oriya */
+ { 72, "ml"}, /* Malayalam */
+ { 73, "kn"}, /* Kannada */
+ { 74, "ta"}, /* Tamil */
+ { 75, "te"}, /* Telugu */
+ { 76, "si"}, /* Sinhalese */
+ { 77, "my"}, /* Burmese */
+ { 78, "km"}, /* Khmer */
+ { 79, "lo"}, /* Lao */
+ { 80, "vi"}, /* Vietnamese */
+ { 81, "id"}, /* Indonesian */
+ { 82, "tl"}, /* Tagalog */
+ { 83, "ms"}, /* Malay (Roman script) */
+ { 84, "ms"}, /* Malay (Arabic script) */
+ { 85, "am"}, /* Amharic */
+ { 86, "ti"}, /* Tigrinya */
+ { 87, "om"}, /* Galla */
+ { 88, "so"}, /* Somali */
+ { 89, "sw"}, /* Swahili */
+ { 90, "rw"}, /* Kinyarwanda/Ruanda */
+ { 91, "rn"}, /* Rundi */
+ { 92, "ny"}, /* Nyanja/Chewa */
+ { 93, "mg"}, /* Malagasy */
+ { 94, "eo"}, /* Esperanto */
+ {128, "cy"}, /* Welsh */
+ {129, "eu"}, /* Basque */
+ {130, "ca"}, /* Catalan */
+ {131, "la"}, /* Latin */
+ {132, "qu"}, /* Quechua */
+ {133, "gn"}, /* Guarani */
+ {134, "ay"}, /* Aymara */
+ {135, "tt"}, /* Tatar */
+ {136, "ug"}, /* Uighur */
+ {137, "dz"}, /* Dzongkha */
+ {138, "jw"}, /* Javanese (Roman script) */
+ {139, "su"}, /* Sundanese (Roman script) */
+ {140, "gl"}, /* Galician */
+ {141, "af"}, /* Afrikaans */
+ {142, "br"}, /* Breton */
+ {143, "iu"}, /* Inuktitut */
+ {144, "gd"}, /* Scottish Gaelic */
+ {145, "gv"}, /* Manx Gaelic */
+ {146, "ga"}, /* Irish Gaelic (with dot above) */
+ {147, "to"}, /* Tongan */
+ {148, "el"}, /* Greek (polytonic) */
+ {149, "ik"}, /* Greenlandic */
+ {150, "az"}, /* Azerbaijani (Roman script) */
+};
+
+
+static hb_language_t
+_hb_ot_name_language_for (unsigned int code,
+ const hb_ot_language_map_t *array,
+ unsigned int len)
+{
+#ifdef HB_NO_OT_NAME_LANGUAGE
+ return HB_LANGUAGE_INVALID;
+#endif
+ auto *entry = hb_bsearch (code, array, len);
+
+ if (entry)
+ return hb_language_from_string (entry->lang, -1);
+
+ return HB_LANGUAGE_INVALID;
+}
+
+hb_language_t
+_hb_ot_name_language_for_ms_code (unsigned int code)
+{
+ return _hb_ot_name_language_for (code,
+ hb_ms_language_map,
+ ARRAY_LENGTH (hb_ms_language_map));
+}
+
+hb_language_t
+_hb_ot_name_language_for_mac_code (unsigned int code)
+{
+ return _hb_ot_name_language_for (code,
+ hb_mac_language_map,
+ ARRAY_LENGTH (hb_mac_language_map));
+}
+
+#endif /* HB_OT_NAME_LANGUAGE_STATIC_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-name-language.hh b/thirdparty/harfbuzz/src/hb-ot-name-language.hh
new file mode 100644
index 0000000000..903076c0d5
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-name-language.hh
@@ -0,0 +1,40 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_NAME_LANGUAGE_HH
+#define HB_OT_NAME_LANGUAGE_HH
+
+#include "hb.hh"
+
+
+HB_INTERNAL hb_language_t
+_hb_ot_name_language_for_ms_code (unsigned int code);
+
+HB_INTERNAL hb_language_t
+_hb_ot_name_language_for_mac_code (unsigned int code);
+
+
+#endif /* HB_OT_NAME_LANGUAGE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-name-table.hh b/thirdparty/harfbuzz/src/hb-ot-name-table.hh
new file mode 100644
index 0000000000..ece3c28466
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-name-table.hh
@@ -0,0 +1,376 @@
+/*
+ * Copyright © 2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_NAME_TABLE_HH
+#define HB_OT_NAME_TABLE_HH
+
+#include "hb-open-type.hh"
+#include "hb-ot-name-language.hh"
+#include "hb-aat-layout.hh"
+
+
+namespace OT {
+
+
+#define entry_score var.u16[0]
+#define entry_index var.u16[1]
+
+
+/*
+ * name -- Naming
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/name
+ */
+#define HB_OT_TAG_name HB_TAG('n','a','m','e')
+
+#define UNSUPPORTED 42
+
+struct NameRecord
+{
+ hb_language_t language (hb_face_t *face) const
+ {
+#ifndef HB_NO_OT_NAME_LANGUAGE
+ unsigned int p = platformID;
+ unsigned int l = languageID;
+
+ if (p == 3)
+ return _hb_ot_name_language_for_ms_code (l);
+
+ if (p == 1)
+ return _hb_ot_name_language_for_mac_code (l);
+
+#ifndef HB_NO_OT_NAME_LANGUAGE_AAT
+ if (p == 0)
+ return face->table.ltag->get_language (l);
+#endif
+
+#endif
+ return HB_LANGUAGE_INVALID;
+ }
+
+ uint16_t score () const
+ {
+ /* Same order as in cmap::find_best_subtable(). */
+ unsigned int p = platformID;
+ unsigned int e = encodingID;
+
+ /* 32-bit. */
+ if (p == 3 && e == 10) return 0;
+ if (p == 0 && e == 6) return 1;
+ if (p == 0 && e == 4) return 2;
+
+ /* 16-bit. */
+ if (p == 3 && e == 1) return 3;
+ if (p == 0 && e == 3) return 4;
+ if (p == 0 && e == 2) return 5;
+ if (p == 0 && e == 1) return 6;
+ if (p == 0 && e == 0) return 7;
+
+ /* Symbol. */
+ if (p == 3 && e == 0) return 8;
+
+ /* We treat all Mac Latin names as ASCII only. */
+ if (p == 1 && e == 0) return 10; /* 10 is magic number :| */
+
+ return UNSUPPORTED;
+ }
+
+ NameRecord* copy (hb_serialize_context_t *c, const void *base) const
+ {
+ TRACE_SERIALIZE (this);
+ auto *out = c->embed (this);
+ if (unlikely (!out)) return_trace (nullptr);
+ out->offset.serialize_copy (c, offset, base, 0, hb_serialize_context_t::Tail, length);
+ return_trace (out);
+ }
+
+ bool isUnicode () const
+ {
+ unsigned int p = platformID;
+ unsigned int e = encodingID;
+
+ return (p == 0 ||
+ (p == 3 && (e == 0 || e == 1 || e == 10)));
+ }
+
+ static int cmp (const void *pa, const void *pb)
+ {
+ const NameRecord *a = (const NameRecord *)pa;
+ const NameRecord *b = (const NameRecord *)pb;
+
+ if (a->platformID != b->platformID)
+ return a->platformID - b->platformID;
+
+ if (a->encodingID != b->encodingID)
+ return a->encodingID - b->encodingID;
+
+ if (a->languageID != b->languageID)
+ return a->languageID - b->languageID;
+
+ if (a->nameID != b->nameID)
+ return a->nameID - b->nameID;
+
+ if (a->length != b->length)
+ return a->length - b->length;
+
+ return 0;
+ }
+
+ bool sanitize (hb_sanitize_context_t *c, const void *base) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) && offset.sanitize (c, base, length));
+ }
+
+ HBUINT16 platformID; /* Platform ID. */
+ HBUINT16 encodingID; /* Platform-specific encoding ID. */
+ HBUINT16 languageID; /* Language ID. */
+ HBUINT16 nameID; /* Name ID. */
+ HBUINT16 length; /* String length (in bytes). */
+ NNOffsetTo<UnsizedArrayOf<HBUINT8>>
+ offset; /* String offset from start of storage area (in bytes). */
+ public:
+ DEFINE_SIZE_STATIC (12);
+};
+
+static int
+_hb_ot_name_entry_cmp_key (const void *pa, const void *pb)
+{
+ const hb_ot_name_entry_t *a = (const hb_ot_name_entry_t *) pa;
+ const hb_ot_name_entry_t *b = (const hb_ot_name_entry_t *) pb;
+
+ /* Compare by name_id, then language. */
+
+ if (a->name_id != b->name_id)
+ return a->name_id - b->name_id;
+
+ if (a->language == b->language) return 0;
+ if (!a->language) return -1;
+ if (!b->language) return +1;
+ return strcmp (hb_language_to_string (a->language),
+ hb_language_to_string (b->language));
+}
+
+static int
+_hb_ot_name_entry_cmp (const void *pa, const void *pb)
+{
+ /* Compare by name_id, then language, then score, then index. */
+
+ int v = _hb_ot_name_entry_cmp_key (pa, pb);
+ if (v)
+ return v;
+
+ const hb_ot_name_entry_t *a = (const hb_ot_name_entry_t *) pa;
+ const hb_ot_name_entry_t *b = (const hb_ot_name_entry_t *) pb;
+
+ if (a->entry_score != b->entry_score)
+ return a->entry_score - b->entry_score;
+
+ if (a->entry_index != b->entry_index)
+ return a->entry_index - b->entry_index;
+
+ return 0;
+}
+
+struct name
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_name;
+
+ unsigned int get_size () const
+ { return min_size + count * nameRecordZ.item_size; }
+
+ template <typename Iterator,
+ hb_requires (hb_is_source_of (Iterator, const NameRecord &))>
+ bool serialize (hb_serialize_context_t *c,
+ Iterator it,
+ const void *src_string_pool)
+ {
+ TRACE_SERIALIZE (this);
+
+ if (unlikely (!c->extend_min ((*this)))) return_trace (false);
+
+ this->format = 0;
+ this->count = it.len ();
+
+ NameRecord *name_records = (NameRecord *) calloc (it.len (), NameRecord::static_size);
+ if (unlikely (!name_records)) return_trace (false);
+
+ hb_array_t<NameRecord> records (name_records, it.len ());
+
+ for (const NameRecord& record : it)
+ {
+ memcpy (name_records, &record, NameRecord::static_size);
+ name_records++;
+ }
+
+ records.qsort ();
+
+ c->copy_all (records, src_string_pool);
+ free (records.arrayZ);
+
+ if (unlikely (c->ran_out_of_room)) return_trace (false);
+
+ this->stringOffset = c->length ();
+
+ return_trace (true);
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+
+ name *name_prime = c->serializer->start_embed<name> ();
+ if (unlikely (!name_prime)) return_trace (false);
+
+ auto it =
+ + nameRecordZ.as_array (count)
+ | hb_filter (c->plan->name_ids, &NameRecord::nameID)
+ | hb_filter (c->plan->name_languages, &NameRecord::languageID)
+ | hb_filter ([&] (const NameRecord& namerecord) { return c->plan->name_legacy || namerecord.isUnicode (); })
+ ;
+
+ name_prime->serialize (c->serializer, it, hb_addressof (this + stringOffset));
+ return_trace (name_prime->count);
+ }
+
+ bool sanitize_records (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ const void *string_pool = (this+stringOffset).arrayZ;
+ return_trace (nameRecordZ.sanitize (c, count, string_pool));
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ likely (format == 0 || format == 1) &&
+ c->check_array (nameRecordZ.arrayZ, count) &&
+ c->check_range (this, stringOffset) &&
+ sanitize_records (c));
+ }
+
+ struct accelerator_t
+ {
+ void init (hb_face_t *face)
+ {
+ this->table = hb_sanitize_context_t ().reference_table<name> (face);
+ assert (this->table.get_length () >= this->table->stringOffset);
+ this->pool = (const char *) (const void *) (this->table+this->table->stringOffset);
+ this->pool_len = this->table.get_length () - this->table->stringOffset;
+ const hb_array_t<const NameRecord> all_names (this->table->nameRecordZ.arrayZ,
+ this->table->count);
+
+ this->names.init ();
+ this->names.alloc (all_names.length);
+
+ for (unsigned int i = 0; i < all_names.length; i++)
+ {
+ hb_ot_name_entry_t *entry = this->names.push ();
+
+ entry->name_id = all_names[i].nameID;
+ entry->language = all_names[i].language (face);
+ entry->entry_score = all_names[i].score ();
+ entry->entry_index = i;
+ }
+
+ this->names.qsort (_hb_ot_name_entry_cmp);
+ /* Walk and pick best only for each name_id,language pair,
+ * while dropping unsupported encodings. */
+ unsigned int j = 0;
+ for (unsigned int i = 0; i < this->names.length; i++)
+ {
+ if (this->names[i].entry_score == UNSUPPORTED ||
+ this->names[i].language == HB_LANGUAGE_INVALID)
+ continue;
+ if (i &&
+ this->names[i - 1].name_id == this->names[i].name_id &&
+ this->names[i - 1].language == this->names[i].language)
+ continue;
+ this->names[j++] = this->names[i];
+ }
+ this->names.resize (j);
+ }
+
+ void fini ()
+ {
+ this->names.fini ();
+ this->table.destroy ();
+ }
+
+ int get_index (hb_ot_name_id_t name_id,
+ hb_language_t language,
+ unsigned int *width=nullptr) const
+ {
+ const hb_ot_name_entry_t key = {name_id, {0}, language};
+ const hb_ot_name_entry_t *entry = hb_bsearch (key, (const hb_ot_name_entry_t *) this->names,
+ this->names.length,
+ sizeof (hb_ot_name_entry_t),
+ _hb_ot_name_entry_cmp_key);
+ if (!entry)
+ return -1;
+
+ if (width)
+ *width = entry->entry_score < 10 ? 2 : 1;
+
+ return entry->entry_index;
+ }
+
+ hb_bytes_t get_name (unsigned int idx) const
+ {
+ const hb_array_t<const NameRecord> all_names (table->nameRecordZ.arrayZ, table->count);
+ const NameRecord &record = all_names[idx];
+ const hb_bytes_t string_pool (pool, pool_len);
+ return string_pool.sub_array (record.offset, record.length);
+ }
+
+ private:
+ const char *pool;
+ unsigned int pool_len;
+ public:
+ hb_blob_ptr_t<name> table;
+ hb_vector_t<hb_ot_name_entry_t> names;
+ };
+
+ /* We only implement format 0 for now. */
+ HBUINT16 format; /* Format selector (=0/1). */
+ HBUINT16 count; /* Number of name records. */
+ NNOffsetTo<UnsizedArrayOf<HBUINT8>>
+ stringOffset; /* Offset to start of string storage (from start of table). */
+ UnsizedArrayOf<NameRecord>
+ nameRecordZ; /* The name records where count is the number of records. */
+ public:
+ DEFINE_SIZE_ARRAY (6, nameRecordZ);
+};
+
+#undef entry_index
+#undef entry_score
+
+struct name_accelerator_t : name::accelerator_t {};
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_NAME_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-name.cc b/thirdparty/harfbuzz/src/hb-ot-name.cc
new file mode 100644
index 0000000000..10122b8c2e
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-name.cc
@@ -0,0 +1,228 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_NAME
+
+#include "hb-ot-name-table.hh"
+
+#include "hb-utf.hh"
+
+
+/**
+ * SECTION:hb-ot-name
+ * @title: hb-ot-name
+ * @short_description: OpenType font name information
+ * @include: hb-ot.h
+ *
+ * Functions for fetching name strings from OpenType fonts.
+ **/
+
+
+/**
+ * hb_ot_name_list_names:
+ * @face: font face.
+ * @num_entries: (out) (allow-none): number of returned entries.
+ *
+ * Enumerates all available name IDs and language combinations. Returned
+ * array is owned by the @face and should not be modified. It can be
+ * used as long as @face is alive.
+ *
+ * Returns: (out) (transfer none) (array length=num_entries): Array of available name entries.
+ * Since: 2.1.0
+ **/
+const hb_ot_name_entry_t *
+hb_ot_name_list_names (hb_face_t *face,
+ unsigned int *num_entries /* OUT */)
+{
+ const OT::name_accelerator_t &name = *face->table.name;
+ if (num_entries) *num_entries = name.names.length;
+ return (const hb_ot_name_entry_t *) name.names;
+}
+
+
+template <typename in_utf_t, typename out_utf_t>
+static inline unsigned int
+hb_ot_name_convert_utf (hb_bytes_t bytes,
+ unsigned int *text_size /* IN/OUT */,
+ typename out_utf_t::codepoint_t *text /* OUT */)
+{
+ unsigned int src_len = bytes.length / sizeof (typename in_utf_t::codepoint_t);
+ const typename in_utf_t::codepoint_t *src = (const typename in_utf_t::codepoint_t *) bytes.arrayZ;
+ const typename in_utf_t::codepoint_t *src_end = src + src_len;
+
+ typename out_utf_t::codepoint_t *dst = text;
+
+ hb_codepoint_t unicode;
+ const hb_codepoint_t replacement = HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT;
+
+ if (text_size && *text_size)
+ {
+ (*text_size)--; /* Same room for NUL-termination. */
+ const typename out_utf_t::codepoint_t *dst_end = text + *text_size;
+
+ while (src < src_end && dst < dst_end)
+ {
+ const typename in_utf_t::codepoint_t *src_next = in_utf_t::next (src, src_end, &unicode, replacement);
+ typename out_utf_t::codepoint_t *dst_next = out_utf_t::encode (dst, dst_end, unicode);
+ if (dst_next == dst)
+ break; /* Out-of-room. */
+
+ dst = dst_next;
+ src = src_next;
+ }
+
+ *text_size = dst - text;
+ *dst = 0; /* NUL-terminate. */
+ }
+
+ /* Accumulate length of rest. */
+ unsigned int dst_len = dst - text;
+ while (src < src_end)
+ {
+ src = in_utf_t::next (src, src_end, &unicode, replacement);
+ dst_len += out_utf_t::encode_len (unicode);
+ }
+ return dst_len;
+}
+
+template <typename utf_t>
+static inline unsigned int
+hb_ot_name_get_utf (hb_face_t *face,
+ hb_ot_name_id_t name_id,
+ hb_language_t language,
+ unsigned int *text_size /* IN/OUT */,
+ typename utf_t::codepoint_t *text /* OUT */)
+{
+ const OT::name_accelerator_t &name = *face->table.name;
+
+ if (!language)
+ language = hb_language_from_string ("en", 2);
+
+ unsigned int width;
+ int idx = name.get_index (name_id, language, &width);
+ if (idx != -1)
+ {
+ hb_bytes_t bytes = name.get_name (idx);
+
+ if (width == 2) /* UTF16-BE */
+ return hb_ot_name_convert_utf<hb_utf16_be_t, utf_t> (bytes, text_size, text);
+
+ if (width == 1) /* ASCII */
+ return hb_ot_name_convert_utf<hb_ascii_t, utf_t> (bytes, text_size, text);
+ }
+
+ if (text_size)
+ {
+ if (*text_size)
+ *text = 0;
+ *text_size = 0;
+ }
+ return 0;
+}
+
+/**
+ * hb_ot_name_get_utf8:
+ * @face: font face.
+ * @name_id: OpenType name identifier to fetch.
+ * @language: language to fetch the name for.
+ * @text_size: (inout) (allow-none): input size of @text buffer, and output size of
+ * text written to buffer.
+ * @text: (out caller-allocates) (array length=text_size): buffer to write fetched name into.
+ *
+ * Fetches a font name from the OpenType 'name' table.
+ * If @language is #HB_LANGUAGE_INVALID, English ("en") is assumed.
+ * Returns string in UTF-8 encoding.
+ *
+ * Returns: full length of the requested string, or 0 if not found.
+ * Since: 2.1.0
+ **/
+unsigned int
+hb_ot_name_get_utf8 (hb_face_t *face,
+ hb_ot_name_id_t name_id,
+ hb_language_t language,
+ unsigned int *text_size /* IN/OUT */,
+ char *text /* OUT */)
+{
+ return hb_ot_name_get_utf<hb_utf8_t> (face, name_id, language, text_size,
+ (hb_utf8_t::codepoint_t *) text);
+}
+
+/**
+ * hb_ot_name_get_utf16:
+ * @face: font face.
+ * @name_id: OpenType name identifier to fetch.
+ * @language: language to fetch the name for.
+ * @text_size: (inout) (allow-none): input size of @text buffer, and output size of
+ * text written to buffer.
+ * @text: (out caller-allocates) (array length=text_size): buffer to write fetched name into.
+ *
+ * Fetches a font name from the OpenType 'name' table.
+ * If @language is #HB_LANGUAGE_INVALID, English ("en") is assumed.
+ * Returns string in UTF-16 encoding.
+ *
+ * Returns: full length of the requested string, or 0 if not found.
+ * Since: 2.1.0
+ **/
+unsigned int
+hb_ot_name_get_utf16 (hb_face_t *face,
+ hb_ot_name_id_t name_id,
+ hb_language_t language,
+ unsigned int *text_size /* IN/OUT */,
+ uint16_t *text /* OUT */)
+{
+ return hb_ot_name_get_utf<hb_utf16_t> (face, name_id, language, text_size, text);
+}
+
+/**
+ * hb_ot_name_get_utf32:
+ * @face: font face.
+ * @name_id: OpenType name identifier to fetch.
+ * @language: language to fetch the name for.
+ * @text_size: (inout) (allow-none): input size of @text buffer, and output size of
+ * text written to buffer.
+ * @text: (out caller-allocates) (array length=text_size): buffer to write fetched name into.
+ *
+ * Fetches a font name from the OpenType 'name' table.
+ * If @language is #HB_LANGUAGE_INVALID, English ("en") is assumed.
+ * Returns string in UTF-32 encoding.
+ *
+ * Returns: full length of the requested string, or 0 if not found.
+ * Since: 2.1.0
+ **/
+unsigned int
+hb_ot_name_get_utf32 (hb_face_t *face,
+ hb_ot_name_id_t name_id,
+ hb_language_t language,
+ unsigned int *text_size /* IN/OUT */,
+ uint32_t *text /* OUT */)
+{
+ return hb_ot_name_get_utf<hb_utf32_t> (face, name_id, language, text_size, text);
+}
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-ot-name.h b/thirdparty/harfbuzz/src/hb-ot-name.h
new file mode 100644
index 0000000000..3b4ad581c7
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-name.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright © 2018 Ebrahim Byagowi.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#ifndef HB_OT_H_IN
+#error "Include <hb-ot.h> instead."
+#endif
+
+#ifndef HB_OT_NAME_H
+#define HB_OT_NAME_H
+
+#include "hb.h"
+
+HB_BEGIN_DECLS
+
+
+/**
+ * hb_ot_name_id_t:
+ * @HB_OT_NAME_ID_INVALID: Value to represent a nonexistent name ID.
+ *
+ * An integral type representing an OpenType 'name' table name identifier.
+ * There are predefined name IDs, as well as name IDs return from other
+ * API. These can be used to fetch name strings from a font face.
+ *
+ * Since: 2.0.0
+ **/
+enum
+{
+ HB_OT_NAME_ID_COPYRIGHT = 0,
+ HB_OT_NAME_ID_FONT_FAMILY = 1,
+ HB_OT_NAME_ID_FONT_SUBFAMILY = 2,
+ HB_OT_NAME_ID_UNIQUE_ID = 3,
+ HB_OT_NAME_ID_FULL_NAME = 4,
+ HB_OT_NAME_ID_VERSION_STRING = 5,
+ HB_OT_NAME_ID_POSTSCRIPT_NAME = 6,
+ HB_OT_NAME_ID_TRADEMARK = 7,
+ HB_OT_NAME_ID_MANUFACTURER = 8,
+ HB_OT_NAME_ID_DESIGNER = 9,
+ HB_OT_NAME_ID_DESCRIPTION = 10,
+ HB_OT_NAME_ID_VENDOR_URL = 11,
+ HB_OT_NAME_ID_DESIGNER_URL = 12,
+ HB_OT_NAME_ID_LICENSE = 13,
+ HB_OT_NAME_ID_LICENSE_URL = 14,
+/*HB_OT_NAME_ID_RESERVED = 15,*/
+ HB_OT_NAME_ID_TYPOGRAPHIC_FAMILY = 16,
+ HB_OT_NAME_ID_TYPOGRAPHIC_SUBFAMILY = 17,
+ HB_OT_NAME_ID_MAC_FULL_NAME = 18,
+ HB_OT_NAME_ID_SAMPLE_TEXT = 19,
+ HB_OT_NAME_ID_CID_FINDFONT_NAME = 20,
+ HB_OT_NAME_ID_WWS_FAMILY = 21,
+ HB_OT_NAME_ID_WWS_SUBFAMILY = 22,
+ HB_OT_NAME_ID_LIGHT_BACKGROUND = 23,
+ HB_OT_NAME_ID_DARK_BACKGROUND = 24,
+ HB_OT_NAME_ID_VARIATIONS_PS_PREFIX = 25,
+
+ HB_OT_NAME_ID_INVALID = 0xFFFF
+};
+
+typedef unsigned int hb_ot_name_id_t;
+
+
+/**
+ * hb_ot_name_entry_t:
+ * @name_id: name ID
+ * @language: language
+ *
+ * Structure representing a name ID in a particular language.
+ *
+ * Since: 2.1.0
+ **/
+typedef struct hb_ot_name_entry_t
+{
+ hb_ot_name_id_t name_id;
+ /*< private >*/
+ hb_var_int_t var;
+ /*< public >*/
+ hb_language_t language;
+} hb_ot_name_entry_t;
+
+HB_EXTERN const hb_ot_name_entry_t *
+hb_ot_name_list_names (hb_face_t *face,
+ unsigned int *num_entries /* OUT */);
+
+
+HB_EXTERN unsigned int
+hb_ot_name_get_utf8 (hb_face_t *face,
+ hb_ot_name_id_t name_id,
+ hb_language_t language,
+ unsigned int *text_size /* IN/OUT */,
+ char *text /* OUT */);
+
+HB_EXTERN unsigned int
+hb_ot_name_get_utf16 (hb_face_t *face,
+ hb_ot_name_id_t name_id,
+ hb_language_t language,
+ unsigned int *text_size /* IN/OUT */,
+ uint16_t *text /* OUT */);
+
+HB_EXTERN unsigned int
+hb_ot_name_get_utf32 (hb_face_t *face,
+ hb_ot_name_id_t name_id,
+ hb_language_t language,
+ unsigned int *text_size /* IN/OUT */,
+ uint32_t *text /* OUT */);
+
+
+HB_END_DECLS
+
+#endif /* HB_OT_NAME_H */
diff --git a/thirdparty/harfbuzz/src/hb-ot-os2-table.hh b/thirdparty/harfbuzz/src/hb-ot-os2-table.hh
new file mode 100644
index 0000000000..7d31b712c4
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-os2-table.hh
@@ -0,0 +1,316 @@
+/*
+ * Copyright © 2011,2012 Google, Inc.
+ * Copyright © 2018 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_OS2_TABLE_HH
+#define HB_OT_OS2_TABLE_HH
+
+#include "hb-open-type.hh"
+#include "hb-ot-os2-unicode-ranges.hh"
+#include "hb-ot-cmap-table.hh"
+
+#include "hb-set.hh"
+
+/*
+ * OS/2 and Windows Metrics
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/os2
+ */
+#define HB_OT_TAG_OS2 HB_TAG('O','S','/','2')
+
+
+namespace OT {
+
+struct OS2V1Tail
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ public:
+ HBUINT32 ulCodePageRange1;
+ HBUINT32 ulCodePageRange2;
+ public:
+ DEFINE_SIZE_STATIC (8);
+};
+
+struct OS2V2Tail
+{
+ bool has_data () const { return sxHeight || sCapHeight; }
+
+ const OS2V2Tail * operator -> () const { return this; }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ public:
+ HBINT16 sxHeight;
+ HBINT16 sCapHeight;
+ HBUINT16 usDefaultChar;
+ HBUINT16 usBreakChar;
+ HBUINT16 usMaxContext;
+ public:
+ DEFINE_SIZE_STATIC (10);
+};
+
+struct OS2V5Tail
+{
+ inline bool get_optical_size (unsigned int *lower, unsigned int *upper) const
+ {
+ unsigned int lower_optical_size = usLowerOpticalPointSize;
+ unsigned int upper_optical_size = usUpperOpticalPointSize;
+
+ /* Per https://docs.microsoft.com/en-us/typography/opentype/spec/os2#lps */
+ if (lower_optical_size < upper_optical_size &&
+ lower_optical_size >= 1 && lower_optical_size <= 0xFFFE &&
+ upper_optical_size >= 2 && upper_optical_size <= 0xFFFF)
+ {
+ *lower = lower_optical_size;
+ *upper = upper_optical_size;
+ return true;
+ }
+ return false;
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ public:
+ HBUINT16 usLowerOpticalPointSize;
+ HBUINT16 usUpperOpticalPointSize;
+ public:
+ DEFINE_SIZE_STATIC (4);
+};
+
+struct OS2
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_OS2;
+
+ bool has_data () const { return usWeightClass || usWidthClass || usFirstCharIndex || usLastCharIndex; }
+
+ const OS2V1Tail &v1 () const { return version >= 1 ? v1X : Null (OS2V1Tail); }
+ const OS2V2Tail &v2 () const { return version >= 2 ? v2X : Null (OS2V2Tail); }
+ const OS2V5Tail &v5 () const { return version >= 5 ? v5X : Null (OS2V5Tail); }
+
+ enum selection_flag_t {
+ ITALIC = 1u<<0,
+ UNDERSCORE = 1u<<1,
+ NEGATIVE = 1u<<2,
+ OUTLINED = 1u<<3,
+ STRIKEOUT = 1u<<4,
+ BOLD = 1u<<5,
+ REGULAR = 1u<<6,
+ USE_TYPO_METRICS = 1u<<7,
+ WWS = 1u<<8,
+ OBLIQUE = 1u<<9
+ };
+
+ bool is_italic () const { return fsSelection & ITALIC; }
+ bool is_oblique () const { return fsSelection & OBLIQUE; }
+ bool use_typo_metrics () const { return fsSelection & USE_TYPO_METRICS; }
+
+ enum width_class_t {
+ FWIDTH_ULTRA_CONDENSED = 1, /* 50% */
+ FWIDTH_EXTRA_CONDENSED = 2, /* 62.5% */
+ FWIDTH_CONDENSED = 3, /* 75% */
+ FWIDTH_SEMI_CONDENSED = 4, /* 87.5% */
+ FWIDTH_NORMAL = 5, /* 100% */
+ FWIDTH_SEMI_EXPANDED = 6, /* 112.5% */
+ FWIDTH_EXPANDED = 7, /* 125% */
+ FWIDTH_EXTRA_EXPANDED = 8, /* 150% */
+ FWIDTH_ULTRA_EXPANDED = 9 /* 200% */
+ };
+
+ float get_width () const
+ {
+ switch (usWidthClass) {
+ case FWIDTH_ULTRA_CONDENSED:return 50.f;
+ case FWIDTH_EXTRA_CONDENSED:return 62.5f;
+ case FWIDTH_CONDENSED: return 75.f;
+ case FWIDTH_SEMI_CONDENSED: return 87.5f;
+ default:
+ case FWIDTH_NORMAL: return 100.f;
+ case FWIDTH_SEMI_EXPANDED: return 112.5f;
+ case FWIDTH_EXPANDED: return 125.f;
+ case FWIDTH_EXTRA_EXPANDED: return 150.f;
+ case FWIDTH_ULTRA_EXPANDED: return 200.f;
+ }
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ OS2 *os2_prime = c->serializer->embed (this);
+ if (unlikely (!os2_prime)) return_trace (false);
+
+ hb_set_t unicodes;
+ if (!c->plan->glyphs_requested->is_empty ())
+ {
+ hb_map_t unicode_glyphid_map;
+
+ OT::cmap::accelerator_t cmap;
+ cmap.init (c->plan->source);
+ cmap.collect_mapping (&unicodes, &unicode_glyphid_map);
+ cmap.fini ();
+
+ if (c->plan->unicodes->is_empty ()) unicodes.clear ();
+ else hb_set_set (&unicodes, c->plan->unicodes);
+
+ + unicode_glyphid_map.iter ()
+ | hb_filter (c->plan->glyphs_requested, hb_second)
+ | hb_map (hb_first)
+ | hb_sink (unicodes)
+ ;
+ }
+ /* when --gids option is not used, no need to do collect_mapping that is
+ * iterating all codepoints in each subtable, which is not efficient */
+ uint16_t min_cp, max_cp;
+ find_min_and_max_codepoint (unicodes.is_empty () ? c->plan->unicodes : &unicodes, &min_cp, &max_cp);
+ os2_prime->usFirstCharIndex = min_cp;
+ os2_prime->usLastCharIndex = max_cp;
+
+ _update_unicode_ranges (unicodes.is_empty () ? c->plan->unicodes : &unicodes, os2_prime->ulUnicodeRange);
+
+ return_trace (true);
+ }
+
+ void _update_unicode_ranges (const hb_set_t *codepoints,
+ HBUINT32 ulUnicodeRange[4]) const
+ {
+ HBUINT32 newBits[4];
+ for (unsigned int i = 0; i < 4; i++)
+ newBits[i] = 0;
+
+ hb_codepoint_t cp = HB_SET_VALUE_INVALID;
+ while (codepoints->next (&cp)) {
+ unsigned int bit = _hb_ot_os2_get_unicode_range_bit (cp);
+ if (bit < 128)
+ {
+ unsigned int block = bit / 32;
+ unsigned int bit_in_block = bit % 32;
+ unsigned int mask = 1 << bit_in_block;
+ newBits[block] = newBits[block] | mask;
+ }
+ if (cp >= 0x10000 && cp <= 0x110000)
+ {
+ /* the spec says that bit 57 ("Non Plane 0") implies that there's
+ at least one codepoint beyond the BMP; so I also include all
+ the non-BMP codepoints here */
+ newBits[1] = newBits[1] | (1 << 25);
+ }
+ }
+
+ for (unsigned int i = 0; i < 4; i++)
+ ulUnicodeRange[i] = ulUnicodeRange[i] & newBits[i]; // set bits only if set in the original
+ }
+
+ static void find_min_and_max_codepoint (const hb_set_t *codepoints,
+ uint16_t *min_cp, /* OUT */
+ uint16_t *max_cp /* OUT */)
+ {
+ *min_cp = hb_min (0xFFFFu, codepoints->get_min ());
+ *max_cp = hb_min (0xFFFFu, codepoints->get_max ());
+ }
+
+ /* https://github.com/Microsoft/Font-Validator/blob/520aaae/OTFontFileVal/val_OS2.cs#L644-L681 */
+ enum font_page_t
+ {
+ FONT_PAGE_HEBREW = 0xB100, /* Hebrew Windows 3.1 font page */
+ FONT_PAGE_SIMP_ARABIC = 0xB200, /* Simplified Arabic Windows 3.1 font page */
+ FONT_PAGE_TRAD_ARABIC = 0xB300, /* Traditional Arabic Windows 3.1 font page */
+ FONT_PAGE_OEM_ARABIC = 0xB400, /* OEM Arabic Windows 3.1 font page */
+ FONT_PAGE_SIMP_FARSI = 0xBA00, /* Simplified Farsi Windows 3.1 font page */
+ FONT_PAGE_TRAD_FARSI = 0xBB00, /* Traditional Farsi Windows 3.1 font page */
+ FONT_PAGE_THAI = 0xDE00 /* Thai Windows 3.1 font page */
+ };
+ font_page_t get_font_page () const
+ { return (font_page_t) (version == 0 ? fsSelection & 0xFF00 : 0); }
+
+ unsigned get_size () const
+ {
+ unsigned result = min_size;
+ if (version >= 1) result += v1X.get_size ();
+ if (version >= 2) result += v2X.get_size ();
+ if (version >= 5) result += v5X.get_size ();
+ return result;
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!c->check_struct (this))) return_trace (false);
+ if (unlikely (version >= 1 && !v1X.sanitize (c))) return_trace (false);
+ if (unlikely (version >= 2 && !v2X.sanitize (c))) return_trace (false);
+ if (unlikely (version >= 5 && !v5X.sanitize (c))) return_trace (false);
+ return_trace (true);
+ }
+
+ public:
+ HBUINT16 version;
+ HBINT16 xAvgCharWidth;
+ HBUINT16 usWeightClass;
+ HBUINT16 usWidthClass;
+ HBUINT16 fsType;
+ HBINT16 ySubscriptXSize;
+ HBINT16 ySubscriptYSize;
+ HBINT16 ySubscriptXOffset;
+ HBINT16 ySubscriptYOffset;
+ HBINT16 ySuperscriptXSize;
+ HBINT16 ySuperscriptYSize;
+ HBINT16 ySuperscriptXOffset;
+ HBINT16 ySuperscriptYOffset;
+ HBINT16 yStrikeoutSize;
+ HBINT16 yStrikeoutPosition;
+ HBINT16 sFamilyClass;
+ HBUINT8 panose[10];
+ HBUINT32 ulUnicodeRange[4];
+ Tag achVendID;
+ HBUINT16 fsSelection;
+ HBUINT16 usFirstCharIndex;
+ HBUINT16 usLastCharIndex;
+ HBINT16 sTypoAscender;
+ HBINT16 sTypoDescender;
+ HBINT16 sTypoLineGap;
+ HBUINT16 usWinAscent;
+ HBUINT16 usWinDescent;
+ OS2V1Tail v1X;
+ OS2V2Tail v2X;
+ OS2V5Tail v5X;
+ public:
+ DEFINE_SIZE_MIN (78);
+};
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_OS2_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-os2-unicode-ranges.hh b/thirdparty/harfbuzz/src/hb-ot-os2-unicode-ranges.hh
new file mode 100644
index 0000000000..9613d2d186
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-os2-unicode-ranges.hh
@@ -0,0 +1,231 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Garret Rieger
+ */
+
+#ifndef HB_OT_OS2_UNICODE_RANGES_HH
+#define HB_OT_OS2_UNICODE_RANGES_HH
+
+#include "hb.hh"
+
+namespace OT {
+
+struct OS2Range
+{
+ int cmp (hb_codepoint_t key) const
+ { return (key < start) ? -1 : key <= end ? 0 : +1; }
+
+ hb_codepoint_t start;
+ hb_codepoint_t end;
+ unsigned int bit;
+};
+
+/* Note: The contents of this array was generated using gen-os2-unicode-ranges.py. */
+static const OS2Range _hb_os2_unicode_ranges[] =
+{
+ { 0x0, 0x7F, 0}, // Basic Latin
+ { 0x80, 0xFF, 1}, // Latin-1 Supplement
+ { 0x100, 0x17F, 2}, // Latin Extended-A
+ { 0x180, 0x24F, 3}, // Latin Extended-B
+ { 0x250, 0x2AF, 4}, // IPA Extensions
+ { 0x2B0, 0x2FF, 5}, // Spacing Modifier Letters
+ { 0x300, 0x36F, 6}, // Combining Diacritical Marks
+ { 0x370, 0x3FF, 7}, // Greek and Coptic
+ { 0x400, 0x4FF, 9}, // Cyrillic
+ { 0x500, 0x52F, 9}, // Cyrillic Supplement
+ { 0x530, 0x58F, 10}, // Armenian
+ { 0x590, 0x5FF, 11}, // Hebrew
+ { 0x600, 0x6FF, 13}, // Arabic
+ { 0x700, 0x74F, 71}, // Syriac
+ { 0x750, 0x77F, 13}, // Arabic Supplement
+ { 0x780, 0x7BF, 72}, // Thaana
+ { 0x7C0, 0x7FF, 14}, // NKo
+ { 0x900, 0x97F, 15}, // Devanagari
+ { 0x980, 0x9FF, 16}, // Bengali
+ { 0xA00, 0xA7F, 17}, // Gurmukhi
+ { 0xA80, 0xAFF, 18}, // Gujarati
+ { 0xB00, 0xB7F, 19}, // Oriya
+ { 0xB80, 0xBFF, 20}, // Tamil
+ { 0xC00, 0xC7F, 21}, // Telugu
+ { 0xC80, 0xCFF, 22}, // Kannada
+ { 0xD00, 0xD7F, 23}, // Malayalam
+ { 0xD80, 0xDFF, 73}, // Sinhala
+ { 0xE00, 0xE7F, 24}, // Thai
+ { 0xE80, 0xEFF, 25}, // Lao
+ { 0xF00, 0xFFF, 70}, // Tibetan
+ { 0x1000, 0x109F, 74}, // Myanmar
+ { 0x10A0, 0x10FF, 26}, // Georgian
+ { 0x1100, 0x11FF, 28}, // Hangul Jamo
+ { 0x1200, 0x137F, 75}, // Ethiopic
+ { 0x1380, 0x139F, 75}, // Ethiopic Supplement
+ { 0x13A0, 0x13FF, 76}, // Cherokee
+ { 0x1400, 0x167F, 77}, // Unified Canadian Aboriginal Syllabics
+ { 0x1680, 0x169F, 78}, // Ogham
+ { 0x16A0, 0x16FF, 79}, // Runic
+ { 0x1700, 0x171F, 84}, // Tagalog
+ { 0x1720, 0x173F, 84}, // Hanunoo
+ { 0x1740, 0x175F, 84}, // Buhid
+ { 0x1760, 0x177F, 84}, // Tagbanwa
+ { 0x1780, 0x17FF, 80}, // Khmer
+ { 0x1800, 0x18AF, 81}, // Mongolian
+ { 0x1900, 0x194F, 93}, // Limbu
+ { 0x1950, 0x197F, 94}, // Tai Le
+ { 0x1980, 0x19DF, 95}, // New Tai Lue
+ { 0x19E0, 0x19FF, 80}, // Khmer Symbols
+ { 0x1A00, 0x1A1F, 96}, // Buginese
+ { 0x1B00, 0x1B7F, 27}, // Balinese
+ { 0x1B80, 0x1BBF, 112}, // Sundanese
+ { 0x1C00, 0x1C4F, 113}, // Lepcha
+ { 0x1C50, 0x1C7F, 114}, // Ol Chiki
+ { 0x1D00, 0x1D7F, 4}, // Phonetic Extensions
+ { 0x1D80, 0x1DBF, 4}, // Phonetic Extensions Supplement
+ { 0x1DC0, 0x1DFF, 6}, // Combining Diacritical Marks Supplement
+ { 0x1E00, 0x1EFF, 29}, // Latin Extended Additional
+ { 0x1F00, 0x1FFF, 30}, // Greek Extended
+ { 0x2000, 0x206F, 31}, // General Punctuation
+ { 0x2070, 0x209F, 32}, // Superscripts And Subscripts
+ { 0x20A0, 0x20CF, 33}, // Currency Symbols
+ { 0x20D0, 0x20FF, 34}, // Combining Diacritical Marks For Symbols
+ { 0x2100, 0x214F, 35}, // Letterlike Symbols
+ { 0x2150, 0x218F, 36}, // Number Forms
+ { 0x2190, 0x21FF, 37}, // Arrows
+ { 0x2200, 0x22FF, 38}, // Mathematical Operators
+ { 0x2300, 0x23FF, 39}, // Miscellaneous Technical
+ { 0x2400, 0x243F, 40}, // Control Pictures
+ { 0x2440, 0x245F, 41}, // Optical Character Recognition
+ { 0x2460, 0x24FF, 42}, // Enclosed Alphanumerics
+ { 0x2500, 0x257F, 43}, // Box Drawing
+ { 0x2580, 0x259F, 44}, // Block Elements
+ { 0x25A0, 0x25FF, 45}, // Geometric Shapes
+ { 0x2600, 0x26FF, 46}, // Miscellaneous Symbols
+ { 0x2700, 0x27BF, 47}, // Dingbats
+ { 0x27C0, 0x27EF, 38}, // Miscellaneous Mathematical Symbols-A
+ { 0x27F0, 0x27FF, 37}, // Supplemental Arrows-A
+ { 0x2800, 0x28FF, 82}, // Braille Patterns
+ { 0x2900, 0x297F, 37}, // Supplemental Arrows-B
+ { 0x2980, 0x29FF, 38}, // Miscellaneous Mathematical Symbols-B
+ { 0x2A00, 0x2AFF, 38}, // Supplemental Mathematical Operators
+ { 0x2B00, 0x2BFF, 37}, // Miscellaneous Symbols and Arrows
+ { 0x2C00, 0x2C5F, 97}, // Glagolitic
+ { 0x2C60, 0x2C7F, 29}, // Latin Extended-C
+ { 0x2C80, 0x2CFF, 8}, // Coptic
+ { 0x2D00, 0x2D2F, 26}, // Georgian Supplement
+ { 0x2D30, 0x2D7F, 98}, // Tifinagh
+ { 0x2D80, 0x2DDF, 75}, // Ethiopic Extended
+ { 0x2DE0, 0x2DFF, 9}, // Cyrillic Extended-A
+ { 0x2E00, 0x2E7F, 31}, // Supplemental Punctuation
+ { 0x2E80, 0x2EFF, 59}, // CJK Radicals Supplement
+ { 0x2F00, 0x2FDF, 59}, // Kangxi Radicals
+ { 0x2FF0, 0x2FFF, 59}, // Ideographic Description Characters
+ { 0x3000, 0x303F, 48}, // CJK Symbols And Punctuation
+ { 0x3040, 0x309F, 49}, // Hiragana
+ { 0x30A0, 0x30FF, 50}, // Katakana
+ { 0x3100, 0x312F, 51}, // Bopomofo
+ { 0x3130, 0x318F, 52}, // Hangul Compatibility Jamo
+ { 0x3190, 0x319F, 59}, // Kanbun
+ { 0x31A0, 0x31BF, 51}, // Bopomofo Extended
+ { 0x31C0, 0x31EF, 61}, // CJK Strokes
+ { 0x31F0, 0x31FF, 50}, // Katakana Phonetic Extensions
+ { 0x3200, 0x32FF, 54}, // Enclosed CJK Letters And Months
+ { 0x3300, 0x33FF, 55}, // CJK Compatibility
+ { 0x3400, 0x4DBF, 59}, // CJK Unified Ideographs Extension A
+ { 0x4DC0, 0x4DFF, 99}, // Yijing Hexagram Symbols
+ { 0x4E00, 0x9FFF, 59}, // CJK Unified Ideographs
+ { 0xA000, 0xA48F, 83}, // Yi Syllables
+ { 0xA490, 0xA4CF, 83}, // Yi Radicals
+ { 0xA500, 0xA63F, 12}, // Vai
+ { 0xA640, 0xA69F, 9}, // Cyrillic Extended-B
+ { 0xA700, 0xA71F, 5}, // Modifier Tone Letters
+ { 0xA720, 0xA7FF, 29}, // Latin Extended-D
+ { 0xA800, 0xA82F, 100}, // Syloti Nagri
+ { 0xA840, 0xA87F, 53}, // Phags-pa
+ { 0xA880, 0xA8DF, 115}, // Saurashtra
+ { 0xA900, 0xA92F, 116}, // Kayah Li
+ { 0xA930, 0xA95F, 117}, // Rejang
+ { 0xAA00, 0xAA5F, 118}, // Cham
+ { 0xAC00, 0xD7AF, 56}, // Hangul Syllables
+ { 0xD800, 0xDFFF, 57}, // Non-Plane 0 *
+ { 0xE000, 0xF8FF, 60}, // Private Use Area (plane 0)
+ { 0xF900, 0xFAFF, 61}, // CJK Compatibility Ideographs
+ { 0xFB00, 0xFB4F, 62}, // Alphabetic Presentation Forms
+ { 0xFB50, 0xFDFF, 63}, // Arabic Presentation Forms-A
+ { 0xFE00, 0xFE0F, 91}, // Variation Selectors
+ { 0xFE10, 0xFE1F, 65}, // Vertical Forms
+ { 0xFE20, 0xFE2F, 64}, // Combining Half Marks
+ { 0xFE30, 0xFE4F, 65}, // CJK Compatibility Forms
+ { 0xFE50, 0xFE6F, 66}, // Small Form Variants
+ { 0xFE70, 0xFEFF, 67}, // Arabic Presentation Forms-B
+ { 0xFF00, 0xFFEF, 68}, // Halfwidth And Fullwidth Forms
+ { 0xFFF0, 0xFFFF, 69}, // Specials
+ { 0x10000, 0x1007F, 101}, // Linear B Syllabary
+ { 0x10080, 0x100FF, 101}, // Linear B Ideograms
+ { 0x10100, 0x1013F, 101}, // Aegean Numbers
+ { 0x10140, 0x1018F, 102}, // Ancient Greek Numbers
+ { 0x10190, 0x101CF, 119}, // Ancient Symbols
+ { 0x101D0, 0x101FF, 120}, // Phaistos Disc
+ { 0x10280, 0x1029F, 121}, // Lycian
+ { 0x102A0, 0x102DF, 121}, // Carian
+ { 0x10300, 0x1032F, 85}, // Old Italic
+ { 0x10330, 0x1034F, 86}, // Gothic
+ { 0x10380, 0x1039F, 103}, // Ugaritic
+ { 0x103A0, 0x103DF, 104}, // Old Persian
+ { 0x10400, 0x1044F, 87}, // Deseret
+ { 0x10450, 0x1047F, 105}, // Shavian
+ { 0x10480, 0x104AF, 106}, // Osmanya
+ { 0x10800, 0x1083F, 107}, // Cypriot Syllabary
+ { 0x10900, 0x1091F, 58}, // Phoenician
+ { 0x10920, 0x1093F, 121}, // Lydian
+ { 0x10A00, 0x10A5F, 108}, // Kharoshthi
+ { 0x12000, 0x123FF, 110}, // Cuneiform
+ { 0x12400, 0x1247F, 110}, // Cuneiform Numbers and Punctuation
+ { 0x1D000, 0x1D0FF, 88}, // Byzantine Musical Symbols
+ { 0x1D100, 0x1D1FF, 88}, // Musical Symbols
+ { 0x1D200, 0x1D24F, 88}, // Ancient Greek Musical Notation
+ { 0x1D300, 0x1D35F, 109}, // Tai Xuan Jing Symbols
+ { 0x1D360, 0x1D37F, 111}, // Counting Rod Numerals
+ { 0x1D400, 0x1D7FF, 89}, // Mathematical Alphanumeric Symbols
+ { 0x1F000, 0x1F02F, 122}, // Mahjong Tiles
+ { 0x1F030, 0x1F09F, 122}, // Domino Tiles
+ { 0x20000, 0x2A6DF, 59}, // CJK Unified Ideographs Extension B
+ { 0x2F800, 0x2FA1F, 61}, // CJK Compatibility Ideographs Supplement
+ { 0xE0000, 0xE007F, 92}, // Tags
+ { 0xE0100, 0xE01EF, 91}, // Variation Selectors Supplement
+ { 0xF0000, 0xFFFFD, 90}, // Private Use (plane 15)
+ {0x100000, 0x10FFFD, 90}, // Private Use (plane 16)
+};
+
+/**
+ * _hb_ot_os2_get_unicode_range_bit:
+ * Returns the bit to be set in os/2 ulUnicodeOS2Range for a given codepoint.
+ **/
+static unsigned int
+_hb_ot_os2_get_unicode_range_bit (hb_codepoint_t cp)
+{
+ auto *range = hb_sorted_array (_hb_os2_unicode_ranges).bsearch (cp);
+ return range ? range->bit : -1;
+}
+
+} /* namespace OT */
+
+#endif /* HB_OT_OS2_UNICODE_RANGES_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-post-macroman.hh b/thirdparty/harfbuzz/src/hb-ot-post-macroman.hh
new file mode 100644
index 0000000000..b4df8aaeea
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-post-macroman.hh
@@ -0,0 +1,294 @@
+/*
+ * Copyright © 2017 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_POST_MACROMAN_HH
+#if 0 /* Make checks happy. */
+#define HB_OT_POST_MACROMAN_HH
+#include "hb.hh"
+#endif
+
+
+_S(".notdef")
+_S(".null")
+_S("nonmarkingreturn")
+_S("space")
+_S("exclam")
+_S("quotedbl")
+_S("numbersign")
+_S("dollar")
+_S("percent")
+_S("ampersand")
+_S("quotesingle")
+_S("parenleft")
+_S("parenright")
+_S("asterisk")
+_S("plus")
+_S("comma")
+_S("hyphen")
+_S("period")
+_S("slash")
+_S("zero")
+_S("one")
+_S("two")
+_S("three")
+_S("four")
+_S("five")
+_S("six")
+_S("seven")
+_S("eight")
+_S("nine")
+_S("colon")
+_S("semicolon")
+_S("less")
+_S("equal")
+_S("greater")
+_S("question")
+_S("at")
+_S("A")
+_S("B")
+_S("C")
+_S("D")
+_S("E")
+_S("F")
+_S("G")
+_S("H")
+_S("I")
+_S("J")
+_S("K")
+_S("L")
+_S("M")
+_S("N")
+_S("O")
+_S("P")
+_S("Q")
+_S("R")
+_S("S")
+_S("T")
+_S("U")
+_S("V")
+_S("W")
+_S("X")
+_S("Y")
+_S("Z")
+_S("bracketleft")
+_S("backslash")
+_S("bracketright")
+_S("asciicircum")
+_S("underscore")
+_S("grave")
+_S("a")
+_S("b")
+_S("c")
+_S("d")
+_S("e")
+_S("f")
+_S("g")
+_S("h")
+_S("i")
+_S("j")
+_S("k")
+_S("l")
+_S("m")
+_S("n")
+_S("o")
+_S("p")
+_S("q")
+_S("r")
+_S("s")
+_S("t")
+_S("u")
+_S("v")
+_S("w")
+_S("x")
+_S("y")
+_S("z")
+_S("braceleft")
+_S("bar")
+_S("braceright")
+_S("asciitilde")
+_S("Adieresis")
+_S("Aring")
+_S("Ccedilla")
+_S("Eacute")
+_S("Ntilde")
+_S("Odieresis")
+_S("Udieresis")
+_S("aacute")
+_S("agrave")
+_S("acircumflex")
+_S("adieresis")
+_S("atilde")
+_S("aring")
+_S("ccedilla")
+_S("eacute")
+_S("egrave")
+_S("ecircumflex")
+_S("edieresis")
+_S("iacute")
+_S("igrave")
+_S("icircumflex")
+_S("idieresis")
+_S("ntilde")
+_S("oacute")
+_S("ograve")
+_S("ocircumflex")
+_S("odieresis")
+_S("otilde")
+_S("uacute")
+_S("ugrave")
+_S("ucircumflex")
+_S("udieresis")
+_S("dagger")
+_S("degree")
+_S("cent")
+_S("sterling")
+_S("section")
+_S("bullet")
+_S("paragraph")
+_S("germandbls")
+_S("registered")
+_S("copyright")
+_S("trademark")
+_S("acute")
+_S("dieresis")
+_S("notequal")
+_S("AE")
+_S("Oslash")
+_S("infinity")
+_S("plusminus")
+_S("lessequal")
+_S("greaterequal")
+_S("yen")
+_S("mu")
+_S("partialdiff")
+_S("summation")
+_S("product")
+_S("pi")
+_S("integral")
+_S("ordfeminine")
+_S("ordmasculine")
+_S("Omega")
+_S("ae")
+_S("oslash")
+_S("questiondown")
+_S("exclamdown")
+_S("logicalnot")
+_S("radical")
+_S("florin")
+_S("approxequal")
+_S("Delta")
+_S("guillemotleft")
+_S("guillemotright")
+_S("ellipsis")
+_S("nonbreakingspace")
+_S("Agrave")
+_S("Atilde")
+_S("Otilde")
+_S("OE")
+_S("oe")
+_S("endash")
+_S("emdash")
+_S("quotedblleft")
+_S("quotedblright")
+_S("quoteleft")
+_S("quoteright")
+_S("divide")
+_S("lozenge")
+_S("ydieresis")
+_S("Ydieresis")
+_S("fraction")
+_S("currency")
+_S("guilsinglleft")
+_S("guilsinglright")
+_S("fi")
+_S("fl")
+_S("daggerdbl")
+_S("periodcentered")
+_S("quotesinglbase")
+_S("quotedblbase")
+_S("perthousand")
+_S("Acircumflex")
+_S("Ecircumflex")
+_S("Aacute")
+_S("Edieresis")
+_S("Egrave")
+_S("Iacute")
+_S("Icircumflex")
+_S("Idieresis")
+_S("Igrave")
+_S("Oacute")
+_S("Ocircumflex")
+_S("apple")
+_S("Ograve")
+_S("Uacute")
+_S("Ucircumflex")
+_S("Ugrave")
+_S("dotlessi")
+_S("circumflex")
+_S("tilde")
+_S("macron")
+_S("breve")
+_S("dotaccent")
+_S("ring")
+_S("cedilla")
+_S("hungarumlaut")
+_S("ogonek")
+_S("caron")
+_S("Lslash")
+_S("lslash")
+_S("Scaron")
+_S("scaron")
+_S("Zcaron")
+_S("zcaron")
+_S("brokenbar")
+_S("Eth")
+_S("eth")
+_S("Yacute")
+_S("yacute")
+_S("Thorn")
+_S("thorn")
+_S("minus")
+_S("multiply")
+_S("onesuperior")
+_S("twosuperior")
+_S("threesuperior")
+_S("onehalf")
+_S("onequarter")
+_S("threequarters")
+_S("franc")
+_S("Gbreve")
+_S("gbreve")
+_S("Idotaccent")
+_S("Scedilla")
+_S("scedilla")
+_S("Cacute")
+_S("cacute")
+_S("Ccaron")
+_S("ccaron")
+_S("dcroat")
+
+
+#endif /* HB_OT_POST_MACROMAN_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-post-table.hh b/thirdparty/harfbuzz/src/hb-ot-post-table.hh
new file mode 100644
index 0000000000..8586331cd4
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-post-table.hh
@@ -0,0 +1,298 @@
+/*
+ * Copyright © 2016 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_POST_TABLE_HH
+#define HB_OT_POST_TABLE_HH
+
+#include "hb-open-type.hh"
+
+#define HB_STRING_ARRAY_NAME format1_names
+#define HB_STRING_ARRAY_LIST "hb-ot-post-macroman.hh"
+#include "hb-string-array.hh"
+#undef HB_STRING_ARRAY_LIST
+#undef HB_STRING_ARRAY_NAME
+
+/*
+ * post -- PostScript
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/post
+ */
+#define HB_OT_TAG_post HB_TAG('p','o','s','t')
+
+
+namespace OT {
+
+
+struct postV2Tail
+{
+ friend struct post;
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (glyphNameIndex.sanitize (c));
+ }
+
+ protected:
+ ArrayOf<HBUINT16> glyphNameIndex; /* This is not an offset, but is the
+ * ordinal number of the glyph in 'post'
+ * string tables. */
+/*UnsizedArrayOf<HBUINT8>
+ namesX;*/ /* Glyph names with length bytes [variable]
+ * (a Pascal string). */
+
+ public:
+ DEFINE_SIZE_ARRAY (2, glyphNameIndex);
+};
+
+struct post
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_post;
+
+ void serialize (hb_serialize_context_t *c) const
+ {
+ post *post_prime = c->allocate_min<post> ();
+ if (unlikely (!post_prime)) return;
+
+ memcpy (post_prime, this, post::min_size);
+ post_prime->version.major = 3; // Version 3 does not have any glyph names.
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ post *post_prime = c->serializer->start_embed<post> ();
+ if (unlikely (!post_prime)) return_trace (false);
+
+ serialize (c->serializer);
+ if (c->serializer->in_error () || c->serializer->ran_out_of_room) return_trace (false);
+
+ return_trace (true);
+ }
+
+ struct accelerator_t
+ {
+ void init (hb_face_t *face)
+ {
+ index_to_offset.init ();
+
+ table = hb_sanitize_context_t ().reference_table<post> (face);
+ unsigned int table_length = table.get_length ();
+
+ version = table->version.to_int ();
+ if (version != 0x00020000) return;
+
+ const postV2Tail &v2 = table->v2X;
+
+ glyphNameIndex = &v2.glyphNameIndex;
+ pool = &StructAfter<uint8_t> (v2.glyphNameIndex);
+
+ const uint8_t *end = (const uint8_t *) (const void *) table + table_length;
+ for (const uint8_t *data = pool;
+ index_to_offset.length < 65535 && data < end && data + *data < end;
+ data += 1 + *data)
+ index_to_offset.push (data - pool);
+ }
+ void fini ()
+ {
+ index_to_offset.fini ();
+ free (gids_sorted_by_name.get ());
+ table.destroy ();
+ }
+
+ bool get_glyph_name (hb_codepoint_t glyph,
+ char *buf, unsigned int buf_len) const
+ {
+ hb_bytes_t s = find_glyph_name (glyph);
+ if (!s.length) return false;
+ if (!buf_len) return true;
+ unsigned int len = hb_min (buf_len - 1, s.length);
+ strncpy (buf, s.arrayZ, len);
+ buf[len] = '\0';
+ return true;
+ }
+
+ bool get_glyph_from_name (const char *name, int len,
+ hb_codepoint_t *glyph) const
+ {
+ unsigned int count = get_glyph_count ();
+ if (unlikely (!count)) return false;
+
+ if (len < 0) len = strlen (name);
+
+ if (unlikely (!len)) return false;
+
+ retry:
+ uint16_t *gids = gids_sorted_by_name.get ();
+
+ if (unlikely (!gids))
+ {
+ gids = (uint16_t *) malloc (count * sizeof (gids[0]));
+ if (unlikely (!gids))
+ return false; /* Anything better?! */
+
+ for (unsigned int i = 0; i < count; i++)
+ gids[i] = i;
+ hb_qsort (gids, count, sizeof (gids[0]), cmp_gids, (void *) this);
+
+ if (unlikely (!gids_sorted_by_name.cmpexch (nullptr, gids)))
+ {
+ free (gids);
+ goto retry;
+ }
+ }
+
+ hb_bytes_t st (name, len);
+ auto* gid = hb_bsearch (st, gids, count, sizeof (gids[0]), cmp_key, (void *) this);
+ if (gid)
+ {
+ *glyph = *gid;
+ return true;
+ }
+
+ return false;
+ }
+
+ hb_blob_ptr_t<post> table;
+
+ protected:
+
+ unsigned int get_glyph_count () const
+ {
+ if (version == 0x00010000)
+ return format1_names_length;
+
+ if (version == 0x00020000)
+ return glyphNameIndex->len;
+
+ return 0;
+ }
+
+ static int cmp_gids (const void *pa, const void *pb, void *arg)
+ {
+ const accelerator_t *thiz = (const accelerator_t *) arg;
+ uint16_t a = * (const uint16_t *) pa;
+ uint16_t b = * (const uint16_t *) pb;
+ return thiz->find_glyph_name (b).cmp (thiz->find_glyph_name (a));
+ }
+
+ static int cmp_key (const void *pk, const void *po, void *arg)
+ {
+ const accelerator_t *thiz = (const accelerator_t *) arg;
+ const hb_bytes_t *key = (const hb_bytes_t *) pk;
+ uint16_t o = * (const uint16_t *) po;
+ return thiz->find_glyph_name (o).cmp (*key);
+ }
+
+ hb_bytes_t find_glyph_name (hb_codepoint_t glyph) const
+ {
+ if (version == 0x00010000)
+ {
+ if (glyph >= format1_names_length)
+ return hb_bytes_t ();
+
+ return format1_names (glyph);
+ }
+
+ if (version != 0x00020000 || glyph >= glyphNameIndex->len)
+ return hb_bytes_t ();
+
+ unsigned int index = glyphNameIndex->arrayZ[glyph];
+ if (index < format1_names_length)
+ return format1_names (index);
+ index -= format1_names_length;
+
+ if (index >= index_to_offset.length)
+ return hb_bytes_t ();
+ unsigned int offset = index_to_offset[index];
+
+ const uint8_t *data = pool + offset;
+ unsigned int name_length = *data;
+ data++;
+
+ return hb_bytes_t ((const char *) data, name_length);
+ }
+
+ private:
+ uint32_t version;
+ const ArrayOf<HBUINT16> *glyphNameIndex;
+ hb_vector_t<uint32_t> index_to_offset;
+ const uint8_t *pool;
+ hb_atomic_ptr_t<uint16_t *> gids_sorted_by_name;
+ };
+
+ bool has_data () const { return version.to_int (); }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ (version.to_int () == 0x00010000 ||
+ (version.to_int () == 0x00020000 && v2X.sanitize (c)) ||
+ version.to_int () == 0x00030000)));
+ }
+
+ public:
+ FixedVersion<>version; /* 0x00010000 for version 1.0
+ * 0x00020000 for version 2.0
+ * 0x00025000 for version 2.5 (deprecated)
+ * 0x00030000 for version 3.0 */
+ HBFixed italicAngle; /* Italic angle in counter-clockwise degrees
+ * from the vertical. Zero for upright text,
+ * negative for text that leans to the right
+ * (forward). */
+ FWORD underlinePosition; /* This is the suggested distance of the top
+ * of the underline from the baseline
+ * (negative values indicate below baseline).
+ * The PostScript definition of this FontInfo
+ * dictionary key (the y coordinate of the
+ * center of the stroke) is not used for
+ * historical reasons. The value of the
+ * PostScript key may be calculated by
+ * subtracting half the underlineThickness
+ * from the value of this field. */
+ FWORD underlineThickness; /* Suggested values for the underline
+ thickness. */
+ HBUINT32 isFixedPitch; /* Set to 0 if the font is proportionally
+ * spaced, non-zero if the font is not
+ * proportionally spaced (i.e. monospaced). */
+ HBUINT32 minMemType42; /* Minimum memory usage when an OpenType font
+ * is downloaded. */
+ HBUINT32 maxMemType42; /* Maximum memory usage when an OpenType font
+ * is downloaded. */
+ HBUINT32 minMemType1; /* Minimum memory usage when an OpenType font
+ * is downloaded as a Type 1 font. */
+ HBUINT32 maxMemType1; /* Maximum memory usage when an OpenType font
+ * is downloaded as a Type 1 font. */
+ postV2Tail v2X;
+ DEFINE_SIZE_MIN (32);
+};
+
+struct post_accelerator_t : post::accelerator_t {};
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_POST_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-fallback.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-fallback.hh
new file mode 100644
index 0000000000..244e967b12
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-fallback.hh
@@ -0,0 +1,348 @@
+/*
+ * Copyright © 2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_FALLBACK_HH
+#define HB_OT_SHAPE_COMPLEX_ARABIC_FALLBACK_HH
+
+#include "hb.hh"
+
+#include "hb-ot-shape.hh"
+#include "hb-ot-layout-gsub-table.hh"
+
+
+/* Features ordered the same as the entries in shaping_table rows,
+ * followed by rlig. Don't change. */
+static const hb_tag_t arabic_fallback_features[] =
+{
+ HB_TAG('i','n','i','t'),
+ HB_TAG('m','e','d','i'),
+ HB_TAG('f','i','n','a'),
+ HB_TAG('i','s','o','l'),
+ HB_TAG('r','l','i','g'),
+};
+
+static OT::SubstLookup *
+arabic_fallback_synthesize_lookup_single (const hb_ot_shape_plan_t *plan HB_UNUSED,
+ hb_font_t *font,
+ unsigned int feature_index)
+{
+ OT::HBGlyphID glyphs[SHAPING_TABLE_LAST - SHAPING_TABLE_FIRST + 1];
+ OT::HBGlyphID substitutes[SHAPING_TABLE_LAST - SHAPING_TABLE_FIRST + 1];
+ unsigned int num_glyphs = 0;
+
+ /* Populate arrays */
+ for (hb_codepoint_t u = SHAPING_TABLE_FIRST; u < SHAPING_TABLE_LAST + 1; u++)
+ {
+ hb_codepoint_t s = shaping_table[u - SHAPING_TABLE_FIRST][feature_index];
+ hb_codepoint_t u_glyph, s_glyph;
+
+ if (!s ||
+ !hb_font_get_glyph (font, u, 0, &u_glyph) ||
+ !hb_font_get_glyph (font, s, 0, &s_glyph) ||
+ u_glyph == s_glyph ||
+ u_glyph > 0xFFFFu || s_glyph > 0xFFFFu)
+ continue;
+
+ glyphs[num_glyphs] = u_glyph;
+ substitutes[num_glyphs] = s_glyph;
+
+ num_glyphs++;
+ }
+
+ if (!num_glyphs)
+ return nullptr;
+
+ /* Bubble-sort or something equally good!
+ * May not be good-enough for presidential candidate interviews, but good-enough for us... */
+ hb_stable_sort (&glyphs[0], num_glyphs,
+ (int(*)(const OT::HBUINT16*, const OT::HBUINT16 *)) OT::HBGlyphID::cmp,
+ &substitutes[0]);
+
+
+ /* Each glyph takes four bytes max, and there's some overhead. */
+ char buf[(SHAPING_TABLE_LAST - SHAPING_TABLE_FIRST + 1) * 4 + 128];
+ hb_serialize_context_t c (buf, sizeof (buf));
+ OT::SubstLookup *lookup = c.start_serialize<OT::SubstLookup> ();
+ bool ret = lookup->serialize_single (&c,
+ OT::LookupFlag::IgnoreMarks,
+ hb_sorted_array (glyphs, num_glyphs),
+ hb_array (substitutes, num_glyphs));
+ c.end_serialize ();
+
+ return ret && !c.in_error () ? c.copy<OT::SubstLookup> () : nullptr;
+}
+
+static OT::SubstLookup *
+arabic_fallback_synthesize_lookup_ligature (const hb_ot_shape_plan_t *plan HB_UNUSED,
+ hb_font_t *font)
+{
+ OT::HBGlyphID first_glyphs[ARRAY_LENGTH_CONST (ligature_table)];
+ unsigned int first_glyphs_indirection[ARRAY_LENGTH_CONST (ligature_table)];
+ unsigned int ligature_per_first_glyph_count_list[ARRAY_LENGTH_CONST (first_glyphs)];
+ unsigned int num_first_glyphs = 0;
+
+ /* We know that all our ligatures are 2-component */
+ OT::HBGlyphID ligature_list[ARRAY_LENGTH_CONST (first_glyphs) * ARRAY_LENGTH_CONST(ligature_table[0].ligatures)];
+ unsigned int component_count_list[ARRAY_LENGTH_CONST (ligature_list)];
+ OT::HBGlyphID component_list[ARRAY_LENGTH_CONST (ligature_list) * 1/* One extra component per ligature */];
+ unsigned int num_ligatures = 0;
+
+ /* Populate arrays */
+
+ /* Sort out the first-glyphs */
+ for (unsigned int first_glyph_idx = 0; first_glyph_idx < ARRAY_LENGTH (first_glyphs); first_glyph_idx++)
+ {
+ hb_codepoint_t first_u = ligature_table[first_glyph_idx].first;
+ hb_codepoint_t first_glyph;
+ if (!hb_font_get_glyph (font, first_u, 0, &first_glyph))
+ continue;
+ first_glyphs[num_first_glyphs] = first_glyph;
+ ligature_per_first_glyph_count_list[num_first_glyphs] = 0;
+ first_glyphs_indirection[num_first_glyphs] = first_glyph_idx;
+ num_first_glyphs++;
+ }
+ hb_stable_sort (&first_glyphs[0], num_first_glyphs,
+ (int(*)(const OT::HBUINT16*, const OT::HBUINT16 *)) OT::HBGlyphID::cmp,
+ &first_glyphs_indirection[0]);
+
+ /* Now that the first-glyphs are sorted, walk again, populate ligatures. */
+ for (unsigned int i = 0; i < num_first_glyphs; i++)
+ {
+ unsigned int first_glyph_idx = first_glyphs_indirection[i];
+
+ for (unsigned int second_glyph_idx = 0; second_glyph_idx < ARRAY_LENGTH (ligature_table[0].ligatures); second_glyph_idx++)
+ {
+ hb_codepoint_t second_u = ligature_table[first_glyph_idx].ligatures[second_glyph_idx].second;
+ hb_codepoint_t ligature_u = ligature_table[first_glyph_idx].ligatures[second_glyph_idx].ligature;
+ hb_codepoint_t second_glyph, ligature_glyph;
+ if (!second_u ||
+ !hb_font_get_glyph (font, second_u, 0, &second_glyph) ||
+ !hb_font_get_glyph (font, ligature_u, 0, &ligature_glyph))
+ continue;
+
+ ligature_per_first_glyph_count_list[i]++;
+
+ ligature_list[num_ligatures] = ligature_glyph;
+ component_count_list[num_ligatures] = 2;
+ component_list[num_ligatures] = second_glyph;
+ num_ligatures++;
+ }
+ }
+
+ if (!num_ligatures)
+ return nullptr;
+
+
+ /* 16 bytes per ligature ought to be enough... */
+ char buf[ARRAY_LENGTH_CONST (ligature_list) * 16 + 128];
+ hb_serialize_context_t c (buf, sizeof (buf));
+ OT::SubstLookup *lookup = c.start_serialize<OT::SubstLookup> ();
+ bool ret = lookup->serialize_ligature (&c,
+ OT::LookupFlag::IgnoreMarks,
+ hb_sorted_array (first_glyphs, num_first_glyphs),
+ hb_array (ligature_per_first_glyph_count_list, num_first_glyphs),
+ hb_array (ligature_list, num_ligatures),
+ hb_array (component_count_list, num_ligatures),
+ hb_array (component_list, num_ligatures));
+ c.end_serialize ();
+ /* TODO sanitize the results? */
+
+ return ret && !c.in_error () ? c.copy<OT::SubstLookup> () : nullptr;
+}
+
+static OT::SubstLookup *
+arabic_fallback_synthesize_lookup (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ unsigned int feature_index)
+{
+ if (feature_index < 4)
+ return arabic_fallback_synthesize_lookup_single (plan, font, feature_index);
+ else
+ return arabic_fallback_synthesize_lookup_ligature (plan, font);
+}
+
+#define ARABIC_FALLBACK_MAX_LOOKUPS 5
+
+struct arabic_fallback_plan_t
+{
+ unsigned int num_lookups;
+ bool free_lookups;
+
+ hb_mask_t mask_array[ARABIC_FALLBACK_MAX_LOOKUPS];
+ OT::SubstLookup *lookup_array[ARABIC_FALLBACK_MAX_LOOKUPS];
+ OT::hb_ot_layout_lookup_accelerator_t accel_array[ARABIC_FALLBACK_MAX_LOOKUPS];
+};
+
+#if defined(_WIN32) && !defined(HB_NO_WIN1256)
+#define HB_WITH_WIN1256
+#endif
+
+#ifdef HB_WITH_WIN1256
+#include "hb-ot-shape-complex-arabic-win1256.hh"
+#endif
+
+struct ManifestLookup
+{
+ public:
+ OT::Tag tag;
+ OT::OffsetTo<OT::SubstLookup> lookupOffset;
+ public:
+ DEFINE_SIZE_STATIC (6);
+};
+typedef OT::ArrayOf<ManifestLookup> Manifest;
+
+static bool
+arabic_fallback_plan_init_win1256 (arabic_fallback_plan_t *fallback_plan HB_UNUSED,
+ const hb_ot_shape_plan_t *plan HB_UNUSED,
+ hb_font_t *font HB_UNUSED)
+{
+#ifdef HB_WITH_WIN1256
+ /* Does this font look like it's Windows-1256-encoded? */
+ hb_codepoint_t g;
+ if (!(hb_font_get_glyph (font, 0x0627u, 0, &g) && g == 199 /* ALEF */ &&
+ hb_font_get_glyph (font, 0x0644u, 0, &g) && g == 225 /* LAM */ &&
+ hb_font_get_glyph (font, 0x0649u, 0, &g) && g == 236 /* ALEF MAKSURA */ &&
+ hb_font_get_glyph (font, 0x064Au, 0, &g) && g == 237 /* YEH */ &&
+ hb_font_get_glyph (font, 0x0652u, 0, &g) && g == 250 /* SUKUN */))
+ return false;
+
+ const Manifest &manifest = reinterpret_cast<const Manifest&> (arabic_win1256_gsub_lookups.manifest);
+ static_assert (sizeof (arabic_win1256_gsub_lookups.manifestData) ==
+ ARABIC_FALLBACK_MAX_LOOKUPS * sizeof (ManifestLookup), "");
+ /* TODO sanitize the table? */
+
+ unsigned j = 0;
+ unsigned int count = manifest.len;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ fallback_plan->mask_array[j] = plan->map.get_1_mask (manifest[i].tag);
+ if (fallback_plan->mask_array[j])
+ {
+ fallback_plan->lookup_array[j] = const_cast<OT::SubstLookup*> (&(&manifest+manifest[i].lookupOffset));
+ if (fallback_plan->lookup_array[j])
+ {
+ fallback_plan->accel_array[j].init (*fallback_plan->lookup_array[j]);
+ j++;
+ }
+ }
+ }
+
+ fallback_plan->num_lookups = j;
+ fallback_plan->free_lookups = false;
+
+ return j > 0;
+#else
+ return false;
+#endif
+}
+
+static bool
+arabic_fallback_plan_init_unicode (arabic_fallback_plan_t *fallback_plan,
+ const hb_ot_shape_plan_t *plan,
+ hb_font_t *font)
+{
+ static_assert ((ARRAY_LENGTH_CONST(arabic_fallback_features) <= ARABIC_FALLBACK_MAX_LOOKUPS), "");
+ unsigned int j = 0;
+ for (unsigned int i = 0; i < ARRAY_LENGTH(arabic_fallback_features) ; i++)
+ {
+ fallback_plan->mask_array[j] = plan->map.get_1_mask (arabic_fallback_features[i]);
+ if (fallback_plan->mask_array[j])
+ {
+ fallback_plan->lookup_array[j] = arabic_fallback_synthesize_lookup (plan, font, i);
+ if (fallback_plan->lookup_array[j])
+ {
+ fallback_plan->accel_array[j].init (*fallback_plan->lookup_array[j]);
+ j++;
+ }
+ }
+ }
+
+ fallback_plan->num_lookups = j;
+ fallback_plan->free_lookups = true;
+
+ return j > 0;
+}
+
+static arabic_fallback_plan_t *
+arabic_fallback_plan_create (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font)
+{
+ arabic_fallback_plan_t *fallback_plan = (arabic_fallback_plan_t *) calloc (1, sizeof (arabic_fallback_plan_t));
+ if (unlikely (!fallback_plan))
+ return const_cast<arabic_fallback_plan_t *> (&Null (arabic_fallback_plan_t));
+
+ fallback_plan->num_lookups = 0;
+ fallback_plan->free_lookups = false;
+
+ /* Try synthesizing GSUB table using Unicode Arabic Presentation Forms,
+ * in case the font has cmap entries for the presentation-forms characters. */
+ if (arabic_fallback_plan_init_unicode (fallback_plan, plan, font))
+ return fallback_plan;
+
+ /* See if this looks like a Windows-1256-encoded font. If it does, use a
+ * hand-coded GSUB table. */
+ if (arabic_fallback_plan_init_win1256 (fallback_plan, plan, font))
+ return fallback_plan;
+
+ assert (fallback_plan->num_lookups == 0);
+ free (fallback_plan);
+ return const_cast<arabic_fallback_plan_t *> (&Null (arabic_fallback_plan_t));
+}
+
+static void
+arabic_fallback_plan_destroy (arabic_fallback_plan_t *fallback_plan)
+{
+ if (!fallback_plan || fallback_plan->num_lookups == 0)
+ return;
+
+ for (unsigned int i = 0; i < fallback_plan->num_lookups; i++)
+ if (fallback_plan->lookup_array[i])
+ {
+ fallback_plan->accel_array[i].fini ();
+ if (fallback_plan->free_lookups)
+ free (fallback_plan->lookup_array[i]);
+ }
+
+ free (fallback_plan);
+}
+
+static void
+arabic_fallback_plan_shape (arabic_fallback_plan_t *fallback_plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer)
+{
+ OT::hb_ot_apply_context_t c (0, font, buffer);
+ for (unsigned int i = 0; i < fallback_plan->num_lookups; i++)
+ if (fallback_plan->lookup_array[i]) {
+ c.set_lookup_mask (fallback_plan->mask_array[i]);
+ hb_ot_layout_substitute_lookup (&c,
+ *fallback_plan->lookup_array[i],
+ fallback_plan->accel_array[i]);
+ }
+}
+
+
+#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_FALLBACK_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-joining-list.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-joining-list.hh
new file mode 100644
index 0000000000..c022d4bb06
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-joining-list.hh
@@ -0,0 +1,46 @@
+/* == Start of generated function == */
+/*
+ * The following function is generated by running:
+ *
+ * ./gen-arabic-joining-list.py ArabicShaping.txt Scripts.txt
+ *
+ * on files with these headers:
+ *
+ * # ArabicShaping-13.0.0.txt
+ * # Date: 2020-01-31, 23:55:00 GMT [KW, RP]
+ * # Scripts-13.0.0.txt
+ * # Date: 2020-01-22, 00:07:43 GMT
+ */
+
+#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_JOINING_LIST_HH
+#define HB_OT_SHAPE_COMPLEX_ARABIC_JOINING_LIST_HH
+
+static bool
+has_arabic_joining (hb_script_t script)
+{
+ /* List of scripts that have data in arabic-table. */
+ switch ((int) script)
+ {
+ case HB_SCRIPT_ADLAM:
+ case HB_SCRIPT_ARABIC:
+ case HB_SCRIPT_CHORASMIAN:
+ case HB_SCRIPT_HANIFI_ROHINGYA:
+ case HB_SCRIPT_MANDAIC:
+ case HB_SCRIPT_MANICHAEAN:
+ case HB_SCRIPT_MONGOLIAN:
+ case HB_SCRIPT_NKO:
+ case HB_SCRIPT_PHAGS_PA:
+ case HB_SCRIPT_PSALTER_PAHLAVI:
+ case HB_SCRIPT_SOGDIAN:
+ case HB_SCRIPT_SYRIAC:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+
+#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_JOINING_LIST_HH */
+
+/* == End of generated function == */
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-table.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-table.hh
new file mode 100644
index 0000000000..70ffe623c0
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-table.hh
@@ -0,0 +1,433 @@
+/* == Start of generated table == */
+/*
+ * The following table is generated by running:
+ *
+ * ./gen-arabic-table.py ArabicShaping.txt UnicodeData.txt Blocks.txt
+ *
+ * on files with these headers:
+ *
+ * # ArabicShaping-13.0.0.txt
+ * # Date: 2020-01-31, 23:55:00 GMT [KW, RP]
+ * # Blocks-13.0.0.txt
+ * # Date: 2019-07-10, 19:06:00 GMT [KW]
+ * UnicodeData.txt does not have a header.
+ */
+
+#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH
+#define HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH
+
+
+#define A JOINING_GROUP_ALAPH
+#define DR JOINING_GROUP_DALATH_RISH
+#define C JOINING_TYPE_C
+#define D JOINING_TYPE_D
+#define L JOINING_TYPE_L
+#define R JOINING_TYPE_R
+#define T JOINING_TYPE_T
+#define U JOINING_TYPE_U
+#define X JOINING_TYPE_X
+
+static const uint8_t joining_table[] =
+{
+
+#define joining_offset_0x0600u 0
+
+ /* Arabic */
+
+ /* 0600 */ U,U,U,U,U,U,X,X,U,X,X,U,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
+ /* 0620 */ D,U,R,R,R,R,D,R,D,R,D,D,D,D,D,R,R,R,R,D,D,D,D,D,D,D,D,D,D,D,D,D,
+ /* 0640 */ C,D,D,D,D,D,D,D,R,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
+ /* 0660 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,D,D,X,R,R,R,U,R,R,R,D,D,D,D,D,D,D,D,
+ /* 0680 */ D,D,D,D,D,D,D,D,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,D,D,D,D,D,D,
+ /* 06A0 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
+ /* 06C0 */ R,D,D,R,R,R,R,R,R,R,R,R,D,R,D,R,D,D,R,R,X,R,X,X,X,X,X,X,X,U,X,X,
+ /* 06E0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,R,R,X,X,X,X,X,X,X,X,X,X,D,D,D,X,X,D,
+
+ /* Syriac */
+
+ /* 0700 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,T,A,X,D,D,D,DR,DR,R,R,R,D,D,D,D,R,D,
+ /* 0720 */ D,D,D,D,D,D,D,D,R,D,DR,D,R,D,D,DR,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
+ /* 0740 */ X,X,X,X,X,X,X,X,X,X,X,X,X,R,D,D,
+
+ /* Arabic Supplement */
+
+ /* 0740 */ D,D,D,D,D,D,D,D,D,R,R,R,D,D,D,D,
+ /* 0760 */ D,D,D,D,D,D,D,D,D,D,D,R,R,D,D,D,D,R,D,R,R,D,D,D,R,R,D,D,D,D,D,D,
+
+ /* FILLER */
+
+ /* 0780 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
+ /* 07A0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
+
+ /* NKo */
+
+ /* 07C0 */ X,X,X,X,X,X,X,X,X,X,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
+ /* 07E0 */ D,D,D,D,D,D,D,D,D,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,C,X,X,X,X,X,
+
+ /* FILLER */
+
+ /* 0800 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
+ /* 0820 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
+
+ /* Mandaic */
+
+ /* 0840 */ R,D,D,D,D,D,R,R,D,R,D,D,D,D,D,D,D,D,D,D,R,D,R,R,R,X,X,X,X,X,X,X,
+
+ /* Syriac Supplement */
+
+ /* 0860 */ D,U,D,D,D,D,U,R,D,R,R,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
+ /* 0880 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
+
+ /* Arabic Extended-A */
+
+ /* 08A0 */ D,D,D,D,D,D,D,D,D,D,R,R,R,U,R,D,D,R,R,D,D,X,D,D,D,R,D,D,D,D,D,D,
+ /* 08C0 */ D,D,D,D,D,D,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
+ /* 08E0 */ X,X,U,
+
+#define joining_offset_0x1806u 739
+
+ /* Mongolian */
+
+ /* 1800 */ U,D,X,X,C,X,X,X,U,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
+ /* 1820 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
+ /* 1840 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
+ /* 1860 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,X,X,X,X,X,X,X,
+ /* 1880 */ U,U,U,U,U,T,T,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
+ /* 18A0 */ D,D,D,D,D,D,D,D,D,X,D,
+
+#define joining_offset_0x200cu 904
+
+ /* General Punctuation */
+
+ /* 2000 */ U,C,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
+ /* 2020 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,U,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
+ /* 2040 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
+ /* 2060 */ X,X,X,X,X,X,U,U,U,U,
+
+#define joining_offset_0xa840u 998
+
+ /* Phags-pa */
+
+ /* A840 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
+ /* A860 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,L,U,
+
+#define joining_offset_0x10ac0u 1050
+
+ /* Manichaean */
+
+ /* 10AC0 */ D,D,D,D,D,R,U,R,U,R,R,U,U,L,R,R,R,R,R,D,D,D,D,L,D,D,D,D,D,R,D,D,
+ /* 10AE0 */ D,R,U,U,R,X,X,X,X,X,X,D,D,D,D,R,
+
+#define joining_offset_0x10b80u 1098
+
+ /* Psalter Pahlavi */
+
+ /* 10B80 */ D,R,D,R,R,R,D,D,D,R,D,D,R,D,R,R,D,R,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
+ /* 10BA0 */ X,X,X,X,X,X,X,X,X,R,R,R,R,D,D,U,
+
+#define joining_offset_0x10d00u 1146
+
+ /* Hanifi Rohingya */
+
+ /* 10D00 */ L,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
+ /* 10D20 */ D,D,R,D,
+
+#define joining_offset_0x10f30u 1182
+
+ /* Sogdian */
+
+ /* 10F20 */ D,D,D,R,D,D,D,D,D,D,D,D,D,D,D,D,
+ /* 10F40 */ D,D,D,D,D,U,X,X,X,X,X,X,X,X,X,X,X,D,D,D,R,
+
+#define joining_offset_0x10fb0u 1219
+
+ /* Chorasmian */
+
+ /* 10FA0 */ D,U,D,D,R,R,R,U,D,R,R,D,D,R,D,D,
+ /* 10FC0 */ U,D,R,R,D,U,U,U,U,R,D,L,
+
+#define joining_offset_0x110bdu 1247
+
+ /* Kaithi */
+
+ /* 110A0 */ U,X,X,
+ /* 110C0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,U,
+
+#define joining_offset_0x1e900u 1264
+
+ /* Adlam */
+
+ /* 1E900 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
+ /* 1E920 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
+ /* 1E940 */ D,D,D,D,X,X,X,X,X,X,X,T,
+
+}; /* Table items: 1340; occupancy: 57% */
+
+
+static unsigned int
+joining_type (hb_codepoint_t u)
+{
+ switch (u >> 12)
+ {
+ case 0x0u:
+ if (hb_in_range<hb_codepoint_t> (u, 0x0600u, 0x08E2u)) return joining_table[u - 0x0600u + joining_offset_0x0600u];
+ break;
+
+ case 0x1u:
+ if (hb_in_range<hb_codepoint_t> (u, 0x1806u, 0x18AAu)) return joining_table[u - 0x1806u + joining_offset_0x1806u];
+ break;
+
+ case 0x2u:
+ if (hb_in_range<hb_codepoint_t> (u, 0x200Cu, 0x2069u)) return joining_table[u - 0x200Cu + joining_offset_0x200cu];
+ break;
+
+ case 0xAu:
+ if (hb_in_range<hb_codepoint_t> (u, 0xA840u, 0xA873u)) return joining_table[u - 0xA840u + joining_offset_0xa840u];
+ break;
+
+ case 0x10u:
+ if (hb_in_range<hb_codepoint_t> (u, 0x10AC0u, 0x10AEFu)) return joining_table[u - 0x10AC0u + joining_offset_0x10ac0u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x10B80u, 0x10BAFu)) return joining_table[u - 0x10B80u + joining_offset_0x10b80u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x10D00u, 0x10D23u)) return joining_table[u - 0x10D00u + joining_offset_0x10d00u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x10F30u, 0x10F54u)) return joining_table[u - 0x10F30u + joining_offset_0x10f30u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x10FB0u, 0x10FCBu)) return joining_table[u - 0x10FB0u + joining_offset_0x10fb0u];
+ break;
+
+ case 0x11u:
+ if (hb_in_range<hb_codepoint_t> (u, 0x110BDu, 0x110CDu)) return joining_table[u - 0x110BDu + joining_offset_0x110bdu];
+ break;
+
+ case 0x1Eu:
+ if (hb_in_range<hb_codepoint_t> (u, 0x1E900u, 0x1E94Bu)) return joining_table[u - 0x1E900u + joining_offset_0x1e900u];
+ break;
+
+ default:
+ break;
+ }
+ return X;
+}
+
+#undef A
+#undef DR
+#undef C
+#undef D
+#undef L
+#undef R
+#undef T
+#undef U
+#undef X
+
+
+static const uint16_t shaping_table[][4] =
+{
+ {0x0000u, 0x0000u, 0x0000u, 0xFE80u}, /* U+0621 ARABIC LETTER HAMZA ISOLATED FORM */
+ {0x0000u, 0x0000u, 0xFE82u, 0xFE81u}, /* U+0622 ARABIC LETTER ALEF WITH MADDA ABOVE */
+ {0x0000u, 0x0000u, 0xFE84u, 0xFE83u}, /* U+0623 ARABIC LETTER ALEF WITH HAMZA ABOVE */
+ {0x0000u, 0x0000u, 0xFE86u, 0xFE85u}, /* U+0624 ARABIC LETTER WAW WITH HAMZA ABOVE */
+ {0x0000u, 0x0000u, 0xFE88u, 0xFE87u}, /* U+0625 ARABIC LETTER ALEF WITH HAMZA BELOW */
+ {0xFE8Bu, 0xFE8Cu, 0xFE8Au, 0xFE89u}, /* U+0626 ARABIC LETTER YEH WITH HAMZA ABOVE */
+ {0x0000u, 0x0000u, 0xFE8Eu, 0xFE8Du}, /* U+0627 ARABIC LETTER ALEF */
+ {0xFE91u, 0xFE92u, 0xFE90u, 0xFE8Fu}, /* U+0628 ARABIC LETTER BEH */
+ {0x0000u, 0x0000u, 0xFE94u, 0xFE93u}, /* U+0629 ARABIC LETTER TEH MARBUTA */
+ {0xFE97u, 0xFE98u, 0xFE96u, 0xFE95u}, /* U+062A ARABIC LETTER TEH */
+ {0xFE9Bu, 0xFE9Cu, 0xFE9Au, 0xFE99u}, /* U+062B ARABIC LETTER THEH */
+ {0xFE9Fu, 0xFEA0u, 0xFE9Eu, 0xFE9Du}, /* U+062C ARABIC LETTER JEEM */
+ {0xFEA3u, 0xFEA4u, 0xFEA2u, 0xFEA1u}, /* U+062D ARABIC LETTER HAH */
+ {0xFEA7u, 0xFEA8u, 0xFEA6u, 0xFEA5u}, /* U+062E ARABIC LETTER KHAH */
+ {0x0000u, 0x0000u, 0xFEAAu, 0xFEA9u}, /* U+062F ARABIC LETTER DAL */
+ {0x0000u, 0x0000u, 0xFEACu, 0xFEABu}, /* U+0630 ARABIC LETTER THAL */
+ {0x0000u, 0x0000u, 0xFEAEu, 0xFEADu}, /* U+0631 ARABIC LETTER REH */
+ {0x0000u, 0x0000u, 0xFEB0u, 0xFEAFu}, /* U+0632 ARABIC LETTER ZAIN */
+ {0xFEB3u, 0xFEB4u, 0xFEB2u, 0xFEB1u}, /* U+0633 ARABIC LETTER SEEN */
+ {0xFEB7u, 0xFEB8u, 0xFEB6u, 0xFEB5u}, /* U+0634 ARABIC LETTER SHEEN */
+ {0xFEBBu, 0xFEBCu, 0xFEBAu, 0xFEB9u}, /* U+0635 ARABIC LETTER SAD */
+ {0xFEBFu, 0xFEC0u, 0xFEBEu, 0xFEBDu}, /* U+0636 ARABIC LETTER DAD */
+ {0xFEC3u, 0xFEC4u, 0xFEC2u, 0xFEC1u}, /* U+0637 ARABIC LETTER TAH */
+ {0xFEC7u, 0xFEC8u, 0xFEC6u, 0xFEC5u}, /* U+0638 ARABIC LETTER ZAH */
+ {0xFECBu, 0xFECCu, 0xFECAu, 0xFEC9u}, /* U+0639 ARABIC LETTER AIN */
+ {0xFECFu, 0xFED0u, 0xFECEu, 0xFECDu}, /* U+063A ARABIC LETTER GHAIN */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+063B */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+063C */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+063D */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+063E */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+063F */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0640 */
+ {0xFED3u, 0xFED4u, 0xFED2u, 0xFED1u}, /* U+0641 ARABIC LETTER FEH */
+ {0xFED7u, 0xFED8u, 0xFED6u, 0xFED5u}, /* U+0642 ARABIC LETTER QAF */
+ {0xFEDBu, 0xFEDCu, 0xFEDAu, 0xFED9u}, /* U+0643 ARABIC LETTER KAF */
+ {0xFEDFu, 0xFEE0u, 0xFEDEu, 0xFEDDu}, /* U+0644 ARABIC LETTER LAM */
+ {0xFEE3u, 0xFEE4u, 0xFEE2u, 0xFEE1u}, /* U+0645 ARABIC LETTER MEEM */
+ {0xFEE7u, 0xFEE8u, 0xFEE6u, 0xFEE5u}, /* U+0646 ARABIC LETTER NOON */
+ {0xFEEBu, 0xFEECu, 0xFEEAu, 0xFEE9u}, /* U+0647 ARABIC LETTER HEH */
+ {0x0000u, 0x0000u, 0xFEEEu, 0xFEEDu}, /* U+0648 ARABIC LETTER WAW */
+ {0xFBE8u, 0xFBE9u, 0xFEF0u, 0xFEEFu}, /* U+0649 ARABIC LETTER */
+ {0xFEF3u, 0xFEF4u, 0xFEF2u, 0xFEF1u}, /* U+064A ARABIC LETTER YEH */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+064B */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+064C */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+064D */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+064E */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+064F */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0650 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0651 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0652 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0653 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0654 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0655 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0656 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0657 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0658 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0659 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+065A */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+065B */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+065C */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+065D */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+065E */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+065F */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0660 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0661 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0662 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0663 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0664 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0665 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0666 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0667 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0668 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0669 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+066A */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+066B */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+066C */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+066D */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+066E */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+066F */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0670 */
+ {0x0000u, 0x0000u, 0xFB51u, 0xFB50u}, /* U+0671 ARABIC LETTER ALEF WASLA */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0672 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0673 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0674 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0675 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0676 */
+ {0x0000u, 0x0000u, 0x0000u, 0xFBDDu}, /* U+0677 ARABIC LETTER U WITH HAMZA ABOVE ISOLATED FORM */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0678 */
+ {0xFB68u, 0xFB69u, 0xFB67u, 0xFB66u}, /* U+0679 ARABIC LETTER TTEH */
+ {0xFB60u, 0xFB61u, 0xFB5Fu, 0xFB5Eu}, /* U+067A ARABIC LETTER TTEHEH */
+ {0xFB54u, 0xFB55u, 0xFB53u, 0xFB52u}, /* U+067B ARABIC LETTER BEEH */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+067C */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+067D */
+ {0xFB58u, 0xFB59u, 0xFB57u, 0xFB56u}, /* U+067E ARABIC LETTER PEH */
+ {0xFB64u, 0xFB65u, 0xFB63u, 0xFB62u}, /* U+067F ARABIC LETTER TEHEH */
+ {0xFB5Cu, 0xFB5Du, 0xFB5Bu, 0xFB5Au}, /* U+0680 ARABIC LETTER BEHEH */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0681 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0682 */
+ {0xFB78u, 0xFB79u, 0xFB77u, 0xFB76u}, /* U+0683 ARABIC LETTER NYEH */
+ {0xFB74u, 0xFB75u, 0xFB73u, 0xFB72u}, /* U+0684 ARABIC LETTER DYEH */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0685 */
+ {0xFB7Cu, 0xFB7Du, 0xFB7Bu, 0xFB7Au}, /* U+0686 ARABIC LETTER TCHEH */
+ {0xFB80u, 0xFB81u, 0xFB7Fu, 0xFB7Eu}, /* U+0687 ARABIC LETTER TCHEHEH */
+ {0x0000u, 0x0000u, 0xFB89u, 0xFB88u}, /* U+0688 ARABIC LETTER DDAL */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0689 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+068A */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+068B */
+ {0x0000u, 0x0000u, 0xFB85u, 0xFB84u}, /* U+068C ARABIC LETTER DAHAL */
+ {0x0000u, 0x0000u, 0xFB83u, 0xFB82u}, /* U+068D ARABIC LETTER DDAHAL */
+ {0x0000u, 0x0000u, 0xFB87u, 0xFB86u}, /* U+068E ARABIC LETTER DUL */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+068F */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0690 */
+ {0x0000u, 0x0000u, 0xFB8Du, 0xFB8Cu}, /* U+0691 ARABIC LETTER RREH */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0692 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0693 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0694 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0695 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0696 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0697 */
+ {0x0000u, 0x0000u, 0xFB8Bu, 0xFB8Au}, /* U+0698 ARABIC LETTER JEH */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0699 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+069A */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+069B */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+069C */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+069D */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+069E */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+069F */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A0 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A1 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A2 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A3 */
+ {0xFB6Cu, 0xFB6Du, 0xFB6Bu, 0xFB6Au}, /* U+06A4 ARABIC LETTER VEH */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A5 */
+ {0xFB70u, 0xFB71u, 0xFB6Fu, 0xFB6Eu}, /* U+06A6 ARABIC LETTER PEHEH */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A7 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A8 */
+ {0xFB90u, 0xFB91u, 0xFB8Fu, 0xFB8Eu}, /* U+06A9 ARABIC LETTER KEHEH */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06AA */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06AB */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06AC */
+ {0xFBD5u, 0xFBD6u, 0xFBD4u, 0xFBD3u}, /* U+06AD ARABIC LETTER NG */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06AE */
+ {0xFB94u, 0xFB95u, 0xFB93u, 0xFB92u}, /* U+06AF ARABIC LETTER GAF */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B0 */
+ {0xFB9Cu, 0xFB9Du, 0xFB9Bu, 0xFB9Au}, /* U+06B1 ARABIC LETTER NGOEH */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B2 */
+ {0xFB98u, 0xFB99u, 0xFB97u, 0xFB96u}, /* U+06B3 ARABIC LETTER GUEH */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B4 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B5 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B6 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B7 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B8 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B9 */
+ {0x0000u, 0x0000u, 0xFB9Fu, 0xFB9Eu}, /* U+06BA ARABIC LETTER NOON GHUNNA */
+ {0xFBA2u, 0xFBA3u, 0xFBA1u, 0xFBA0u}, /* U+06BB ARABIC LETTER RNOON */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06BC */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06BD */
+ {0xFBACu, 0xFBADu, 0xFBABu, 0xFBAAu}, /* U+06BE ARABIC LETTER HEH DOACHASHMEE */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06BF */
+ {0x0000u, 0x0000u, 0xFBA5u, 0xFBA4u}, /* U+06C0 ARABIC LETTER HEH WITH YEH ABOVE */
+ {0xFBA8u, 0xFBA9u, 0xFBA7u, 0xFBA6u}, /* U+06C1 ARABIC LETTER HEH GOAL */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06C2 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06C3 */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06C4 */
+ {0x0000u, 0x0000u, 0xFBE1u, 0xFBE0u}, /* U+06C5 ARABIC LETTER KIRGHIZ OE */
+ {0x0000u, 0x0000u, 0xFBDAu, 0xFBD9u}, /* U+06C6 ARABIC LETTER OE */
+ {0x0000u, 0x0000u, 0xFBD8u, 0xFBD7u}, /* U+06C7 ARABIC LETTER U */
+ {0x0000u, 0x0000u, 0xFBDCu, 0xFBDBu}, /* U+06C8 ARABIC LETTER YU */
+ {0x0000u, 0x0000u, 0xFBE3u, 0xFBE2u}, /* U+06C9 ARABIC LETTER KIRGHIZ YU */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06CA */
+ {0x0000u, 0x0000u, 0xFBDFu, 0xFBDEu}, /* U+06CB ARABIC LETTER VE */
+ {0xFBFEu, 0xFBFFu, 0xFBFDu, 0xFBFCu}, /* U+06CC ARABIC LETTER FARSI YEH */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06CD */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06CE */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06CF */
+ {0xFBE6u, 0xFBE7u, 0xFBE5u, 0xFBE4u}, /* U+06D0 ARABIC LETTER E */
+ {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06D1 */
+ {0x0000u, 0x0000u, 0xFBAFu, 0xFBAEu}, /* U+06D2 ARABIC LETTER YEH BARREE */
+ {0x0000u, 0x0000u, 0xFBB1u, 0xFBB0u}, /* U+06D3 ARABIC LETTER YEH BARREE WITH HAMZA ABOVE */
+};
+
+#define SHAPING_TABLE_FIRST 0x0621u
+#define SHAPING_TABLE_LAST 0x06D3u
+
+
+static const struct ligature_set_t {
+ uint16_t first;
+ struct ligature_pairs_t {
+ uint16_t second;
+ uint16_t ligature;
+ } ligatures[4];
+} ligature_table[] =
+{
+ { 0xFEDFu, {
+ { 0xFE82u, 0xFEF5u }, /* ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM */
+ { 0xFE84u, 0xFEF7u }, /* ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM */
+ { 0xFE88u, 0xFEF9u }, /* ARABIC LIGATURE LAM WITH ALEF WITH HAMZA BELOW ISOLATED FORM */
+ { 0xFE8Eu, 0xFEFBu }, /* ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM */
+ }},
+ { 0xFEE0u, {
+ { 0xFE82u, 0xFEF6u }, /* ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM */
+ { 0xFE84u, 0xFEF8u }, /* ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM */
+ { 0xFE88u, 0xFEFAu }, /* ARABIC LIGATURE LAM WITH ALEF WITH HAMZA BELOW FINAL FORM */
+ { 0xFE8Eu, 0xFEFCu }, /* ARABIC LIGATURE LAM WITH ALEF FINAL FORM */
+ }},
+};
+
+
+#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH */
+
+/* == End of generated table == */
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-win1256.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-win1256.hh
new file mode 100644
index 0000000000..b15e145f2f
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-win1256.hh
@@ -0,0 +1,323 @@
+/*
+ * Copyright © 2014 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_WIN1256_HH
+
+
+/*
+ * The macros in the first part of this file are generic macros that can
+ * be used to define the bytes for OpenType table data in code in a
+ * readable manner. We can move the macros to reside with their respective
+ * struct types, but since we only use these to define one data table, the
+ * Windows-1256 Arabic shaping table in this file, we keep them here.
+ */
+
+
+/* First we measure, then we cut. */
+#ifndef OT_MEASURE
+#define OT_MEASURE
+#define OT_TABLE_START static const struct TABLE_NAME {
+#define OT_TABLE_END }
+#define OT_LABEL_START(Name) unsigned char Name[
+#define OT_LABEL_END ];
+#define OT_UINT8(u8) +1/*byte*/
+#define OT_UINT16(u16) +2/*bytes*/
+#else
+#undef OT_MEASURE
+#define OT_TABLE_START TABLE_NAME = {
+#define OT_TABLE_END };
+#define OT_LABEL_START(Name) {
+#define OT_LABEL_END },
+#define OT_UINT8(u8) (u8),
+#define OT_UINT16(u16) (unsigned char)((u16)>>8), (unsigned char)((u16)&0xFFu),
+#define OT_COUNT(Name, ItemSize) ((unsigned int) sizeof(((struct TABLE_NAME*)0)->Name) \
+ / (unsigned int)(ItemSize) \
+ /* OT_ASSERT it's divisible (and positive). */)
+#define OT_DISTANCE(From,To) ((unsigned int) \
+ ((char*)(&((struct TABLE_NAME*)0)->To) - \
+ (char*)(&((struct TABLE_NAME*)0)->From)) \
+ /* OT_ASSERT it's positive. */)
+#endif
+
+
+#define OT_LABEL(Name) \
+ OT_LABEL_END \
+ OT_LABEL_START(Name)
+
+/* Whenever we receive an argument that is a list, it will expand to
+ * contain commas. That cannot be passed to another macro because the
+ * commas will throw off the preprocessor. The solution is to wrap
+ * the passed-in argument in OT_LIST() before passing to the next macro.
+ * Unfortunately this trick requires vararg macros. */
+#define OT_LIST(...) __VA_ARGS__
+
+
+/*
+ * Basic Types
+ */
+
+#define OT_TAG(a,b,c,d) \
+ OT_UINT8(a) OT_UINT8(b) OT_UINT8(c) OT_UINT8(d)
+
+#define OT_OFFSET(From, To) /* Offset from From to To in bytes */ \
+ OT_UINT16(OT_DISTANCE(From, To))
+
+#define OT_GLYPHID /* GlyphID */ \
+ OT_UINT16
+
+#define OT_UARRAY(Name, Items) \
+ OT_LABEL_START(Name) \
+ OT_UINT16(OT_COUNT(Name##Data, 2)) \
+ OT_LABEL(Name##Data) \
+ Items \
+ OT_LABEL_END
+
+#define OT_UHEADLESSARRAY(Name, Items) \
+ OT_LABEL_START(Name) \
+ OT_UINT16(OT_COUNT(Name##Data, 2) + 1) \
+ OT_LABEL(Name##Data) \
+ Items \
+ OT_LABEL_END
+
+
+/*
+ * Common Types
+ */
+
+#define OT_LOOKUP_FLAG_IGNORE_MARKS 0x08u
+
+#define OT_LOOKUP(Name, LookupType, LookupFlag, SubLookupOffsets) \
+ OT_LABEL_START(Name) \
+ OT_UINT16(LookupType) \
+ OT_UINT16(LookupFlag) \
+ OT_LABEL_END \
+ OT_UARRAY(Name##SubLookupOffsetsArray, OT_LIST(SubLookupOffsets))
+
+#define OT_SUBLOOKUP(Name, SubFormat, Items) \
+ OT_LABEL_START(Name) \
+ OT_UINT16(SubFormat) \
+ Items
+
+#define OT_COVERAGE1(Name, Items) \
+ OT_LABEL_START(Name) \
+ OT_UINT16(1) \
+ OT_LABEL_END \
+ OT_UARRAY(Name##Glyphs, OT_LIST(Items))
+
+
+/*
+ * GSUB
+ */
+
+#define OT_LOOKUP_TYPE_SUBST_SINGLE 1u
+#define OT_LOOKUP_TYPE_SUBST_LIGATURE 4u
+
+#define OT_SUBLOOKUP_SINGLE_SUBST_FORMAT2(Name, FromGlyphs, ToGlyphs) \
+ OT_SUBLOOKUP(Name, 2, \
+ OT_OFFSET(Name, Name##Coverage) \
+ OT_LABEL_END \
+ OT_UARRAY(Name##Substitute, OT_LIST(ToGlyphs)) \
+ ) \
+ OT_COVERAGE1(Name##Coverage, OT_LIST(FromGlyphs)) \
+ /* ASSERT_STATIC_EXPR_ZERO (len(FromGlyphs) == len(ToGlyphs)) */
+
+#define OT_SUBLOOKUP_LIGATURE_SUBST_FORMAT1(Name, FirstGlyphs, LigatureSetOffsets) \
+ OT_SUBLOOKUP(Name, 1, \
+ OT_OFFSET(Name, Name##Coverage) \
+ OT_LABEL_END \
+ OT_UARRAY(Name##LigatureSetOffsetsArray, OT_LIST(LigatureSetOffsets)) \
+ ) \
+ OT_COVERAGE1(Name##Coverage, OT_LIST(FirstGlyphs)) \
+ /* ASSERT_STATIC_EXPR_ZERO (len(FirstGlyphs) == len(LigatureSetOffsets)) */
+
+#define OT_LIGATURE_SET(Name, LigatureSetOffsets) \
+ OT_UARRAY(Name, OT_LIST(LigatureSetOffsets))
+
+#define OT_LIGATURE(Name, Components, LigGlyph) \
+ OT_LABEL_START(Name) \
+ LigGlyph \
+ OT_LABEL_END \
+ OT_UHEADLESSARRAY(Name##ComponentsArray, OT_LIST(Components))
+
+/*
+ *
+ * Start of Windows-1256 shaping table.
+ *
+ */
+
+/* Table name. */
+#define TABLE_NAME arabic_win1256_gsub_lookups
+
+/* Table manifest. */
+#define MANIFEST(Items) \
+ OT_LABEL_START(manifest) \
+ OT_UINT16(OT_COUNT(manifestData, 6)) \
+ OT_LABEL(manifestData) \
+ Items \
+ OT_LABEL_END
+
+#define MANIFEST_LOOKUP(Tag, Name) \
+ Tag \
+ OT_OFFSET(manifest, Name)
+
+/* Shorthand. */
+#define G OT_GLYPHID
+
+/*
+ * Table Start
+ */
+OT_TABLE_START
+
+
+/*
+ * Manifest
+ */
+MANIFEST(
+ MANIFEST_LOOKUP(OT_TAG('r','l','i','g'), rligLookup)
+ MANIFEST_LOOKUP(OT_TAG('i','n','i','t'), initLookup)
+ MANIFEST_LOOKUP(OT_TAG('m','e','d','i'), mediLookup)
+ MANIFEST_LOOKUP(OT_TAG('f','i','n','a'), finaLookup)
+ MANIFEST_LOOKUP(OT_TAG('r','l','i','g'), rligMarksLookup)
+)
+
+/*
+ * Lookups
+ */
+OT_LOOKUP(initLookup, OT_LOOKUP_TYPE_SUBST_SINGLE, OT_LOOKUP_FLAG_IGNORE_MARKS,
+ OT_OFFSET(initLookup, initmediSubLookup)
+ OT_OFFSET(initLookup, initSubLookup)
+)
+OT_LOOKUP(mediLookup, OT_LOOKUP_TYPE_SUBST_SINGLE, OT_LOOKUP_FLAG_IGNORE_MARKS,
+ OT_OFFSET(mediLookup, initmediSubLookup)
+ OT_OFFSET(mediLookup, mediSubLookup)
+ OT_OFFSET(mediLookup, medifinaLamAlefSubLookup)
+)
+OT_LOOKUP(finaLookup, OT_LOOKUP_TYPE_SUBST_SINGLE, OT_LOOKUP_FLAG_IGNORE_MARKS,
+ OT_OFFSET(finaLookup, finaSubLookup)
+ /* We don't need this one currently as the sequence inherits masks
+ * from the first item. Just in case we change that in the future
+ * to be smart about Arabic masks when ligating... */
+ OT_OFFSET(finaLookup, medifinaLamAlefSubLookup)
+)
+OT_LOOKUP(rligLookup, OT_LOOKUP_TYPE_SUBST_LIGATURE, OT_LOOKUP_FLAG_IGNORE_MARKS,
+ OT_OFFSET(rligLookup, lamAlefLigaturesSubLookup)
+)
+OT_LOOKUP(rligMarksLookup, OT_LOOKUP_TYPE_SUBST_LIGATURE, 0,
+ OT_OFFSET(rligMarksLookup, shaddaLigaturesSubLookup)
+)
+
+/*
+ * init/medi/fina forms
+ */
+OT_SUBLOOKUP_SINGLE_SUBST_FORMAT2(initmediSubLookup,
+ G(198) G(200) G(201) G(202) G(203) G(204) G(205) G(206) G(211)
+ G(212) G(213) G(214) G(223) G(225) G(227) G(228) G(236) G(237),
+ G(162) G(4) G(5) G(5) G(6) G(7) G(9) G(11) G(13)
+ G(14) G(15) G(26) G(140) G(141) G(142) G(143) G(154) G(154)
+)
+OT_SUBLOOKUP_SINGLE_SUBST_FORMAT2(initSubLookup,
+ G(218) G(219) G(221) G(222) G(229),
+ G(27) G(30) G(128) G(131) G(144)
+)
+OT_SUBLOOKUP_SINGLE_SUBST_FORMAT2(mediSubLookup,
+ G(218) G(219) G(221) G(222) G(229),
+ G(28) G(31) G(129) G(138) G(149)
+)
+OT_SUBLOOKUP_SINGLE_SUBST_FORMAT2(finaSubLookup,
+ G(194) G(195) G(197) G(198) G(199) G(201) G(204) G(205) G(206)
+ G(218) G(219) G(229) G(236) G(237),
+ G(2) G(1) G(3) G(181) G(0) G(159) G(8) G(10) G(12)
+ G(29) G(127) G(152) G(160) G(156)
+)
+OT_SUBLOOKUP_SINGLE_SUBST_FORMAT2(medifinaLamAlefSubLookup,
+ G(165) G(178) G(180) G(252),
+ G(170) G(179) G(185) G(255)
+)
+
+/*
+ * Lam+Alef ligatures
+ */
+OT_SUBLOOKUP_LIGATURE_SUBST_FORMAT1(lamAlefLigaturesSubLookup,
+ G(225),
+ OT_OFFSET(lamAlefLigaturesSubLookup, lamLigatureSet)
+)
+OT_LIGATURE_SET(lamLigatureSet,
+ OT_OFFSET(lamLigatureSet, lamInitLigature1)
+ OT_OFFSET(lamLigatureSet, lamInitLigature2)
+ OT_OFFSET(lamLigatureSet, lamInitLigature3)
+ OT_OFFSET(lamLigatureSet, lamInitLigature4)
+)
+OT_LIGATURE(lamInitLigature1, G(199), G(165))
+OT_LIGATURE(lamInitLigature2, G(195), G(178))
+OT_LIGATURE(lamInitLigature3, G(194), G(180))
+OT_LIGATURE(lamInitLigature4, G(197), G(252))
+
+/*
+ * Shadda ligatures
+ */
+OT_SUBLOOKUP_LIGATURE_SUBST_FORMAT1(shaddaLigaturesSubLookup,
+ G(248),
+ OT_OFFSET(shaddaLigaturesSubLookup, shaddaLigatureSet)
+)
+OT_LIGATURE_SET(shaddaLigatureSet,
+ OT_OFFSET(shaddaLigatureSet, shaddaLigature1)
+ OT_OFFSET(shaddaLigatureSet, shaddaLigature2)
+ OT_OFFSET(shaddaLigatureSet, shaddaLigature3)
+)
+OT_LIGATURE(shaddaLigature1, G(243), G(172))
+OT_LIGATURE(shaddaLigature2, G(245), G(173))
+OT_LIGATURE(shaddaLigature3, G(246), G(175))
+
+/*
+ * Table end
+ */
+OT_TABLE_END
+
+
+/*
+ * Clean up
+ */
+#undef OT_TABLE_START
+#undef OT_TABLE_END
+#undef OT_LABEL_START
+#undef OT_LABEL_END
+#undef OT_UINT8
+#undef OT_UINT16
+#undef OT_DISTANCE
+#undef OT_COUNT
+
+/*
+ * Include a second time to get the table data...
+ */
+#if 0
+#include "hb.hh" /* Make check-includes.sh happy. */
+#endif
+#ifdef OT_MEASURE
+#include "hb-ot-shape-complex-arabic-win1256.hh"
+#endif
+
+#define HB_OT_SHAPE_COMPLEX_ARABIC_WIN1256_HH
+#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_WIN1256_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic.cc
new file mode 100644
index 0000000000..1e93f0efd5
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic.cc
@@ -0,0 +1,716 @@
+/*
+ * Copyright © 2010,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_OT_SHAPE
+
+#include "hb-ot-shape-complex-arabic.hh"
+#include "hb-ot-shape.hh"
+
+
+/* buffer var allocations */
+#define arabic_shaping_action() complex_var_u8_0() /* arabic shaping action */
+
+#define HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH HB_BUFFER_SCRATCH_FLAG_COMPLEX0
+
+/* See:
+ * https://github.com/harfbuzz/harfbuzz/commit/6e6f82b6f3dde0fc6c3c7d991d9ec6cfff57823d#commitcomment-14248516 */
+#define HB_ARABIC_GENERAL_CATEGORY_IS_WORD(gen_cat) \
+ (FLAG_UNSAFE (gen_cat) & \
+ (FLAG (HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED) | \
+ FLAG (HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE) | \
+ /*FLAG (HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER) |*/ \
+ FLAG (HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER) | \
+ FLAG (HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER) | \
+ /*FLAG (HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER) |*/ \
+ /*FLAG (HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER) |*/ \
+ FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | \
+ FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) | \
+ FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) | \
+ FLAG (HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) | \
+ FLAG (HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER) | \
+ FLAG (HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER) | \
+ FLAG (HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL) | \
+ FLAG (HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL) | \
+ FLAG (HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL) | \
+ FLAG (HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL)))
+
+
+/*
+ * Joining types:
+ */
+
+/*
+ * Bits used in the joining tables
+ */
+enum hb_arabic_joining_type_t {
+ JOINING_TYPE_U = 0,
+ JOINING_TYPE_L = 1,
+ JOINING_TYPE_R = 2,
+ JOINING_TYPE_D = 3,
+ JOINING_TYPE_C = JOINING_TYPE_D,
+ JOINING_GROUP_ALAPH = 4,
+ JOINING_GROUP_DALATH_RISH = 5,
+ NUM_STATE_MACHINE_COLS = 6,
+
+ JOINING_TYPE_T = 7,
+ JOINING_TYPE_X = 8 /* means: use general-category to choose between U or T. */
+};
+
+#include "hb-ot-shape-complex-arabic-table.hh"
+
+static unsigned int get_joining_type (hb_codepoint_t u, hb_unicode_general_category_t gen_cat)
+{
+ unsigned int j_type = joining_type(u);
+ if (likely (j_type != JOINING_TYPE_X))
+ return j_type;
+
+ return (FLAG_UNSAFE(gen_cat) &
+ (FLAG(HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) |
+ FLAG(HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) |
+ FLAG(HB_UNICODE_GENERAL_CATEGORY_FORMAT))
+ ) ? JOINING_TYPE_T : JOINING_TYPE_U;
+}
+
+#define FEATURE_IS_SYRIAC(tag) hb_in_range<unsigned char> ((unsigned char) (tag), '2', '3')
+
+static const hb_tag_t arabic_features[] =
+{
+ HB_TAG('i','s','o','l'),
+ HB_TAG('f','i','n','a'),
+ HB_TAG('f','i','n','2'),
+ HB_TAG('f','i','n','3'),
+ HB_TAG('m','e','d','i'),
+ HB_TAG('m','e','d','2'),
+ HB_TAG('i','n','i','t'),
+ HB_TAG_NONE
+};
+
+
+/* Same order as the feature array */
+enum arabic_action_t {
+ ISOL,
+ FINA,
+ FIN2,
+ FIN3,
+ MEDI,
+ MED2,
+ INIT,
+
+ NONE,
+
+ ARABIC_NUM_FEATURES = NONE,
+
+ /* We abuse the same byte for other things... */
+ STCH_FIXED,
+ STCH_REPEATING,
+};
+
+static const struct arabic_state_table_entry {
+ uint8_t prev_action;
+ uint8_t curr_action;
+ uint16_t next_state;
+} arabic_state_table[][NUM_STATE_MACHINE_COLS] =
+{
+ /* jt_U, jt_L, jt_R, jt_D, jg_ALAPH, jg_DALATH_RISH */
+
+ /* State 0: prev was U, not willing to join. */
+ { {NONE,NONE,0}, {NONE,ISOL,2}, {NONE,ISOL,1}, {NONE,ISOL,2}, {NONE,ISOL,1}, {NONE,ISOL,6}, },
+
+ /* State 1: prev was R or ISOL/ALAPH, not willing to join. */
+ { {NONE,NONE,0}, {NONE,ISOL,2}, {NONE,ISOL,1}, {NONE,ISOL,2}, {NONE,FIN2,5}, {NONE,ISOL,6}, },
+
+ /* State 2: prev was D/L in ISOL form, willing to join. */
+ { {NONE,NONE,0}, {NONE,ISOL,2}, {INIT,FINA,1}, {INIT,FINA,3}, {INIT,FINA,4}, {INIT,FINA,6}, },
+
+ /* State 3: prev was D in FINA form, willing to join. */
+ { {NONE,NONE,0}, {NONE,ISOL,2}, {MEDI,FINA,1}, {MEDI,FINA,3}, {MEDI,FINA,4}, {MEDI,FINA,6}, },
+
+ /* State 4: prev was FINA ALAPH, not willing to join. */
+ { {NONE,NONE,0}, {NONE,ISOL,2}, {MED2,ISOL,1}, {MED2,ISOL,2}, {MED2,FIN2,5}, {MED2,ISOL,6}, },
+
+ /* State 5: prev was FIN2/FIN3 ALAPH, not willing to join. */
+ { {NONE,NONE,0}, {NONE,ISOL,2}, {ISOL,ISOL,1}, {ISOL,ISOL,2}, {ISOL,FIN2,5}, {ISOL,ISOL,6}, },
+
+ /* State 6: prev was DALATH/RISH, not willing to join. */
+ { {NONE,NONE,0}, {NONE,ISOL,2}, {NONE,ISOL,1}, {NONE,ISOL,2}, {NONE,FIN3,5}, {NONE,ISOL,6}, }
+};
+
+
+static void
+arabic_fallback_shape (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer);
+
+static void
+record_stch (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer);
+
+static void
+collect_features_arabic (hb_ot_shape_planner_t *plan)
+{
+ hb_ot_map_builder_t *map = &plan->map;
+
+ /* We apply features according to the Arabic spec, with pauses
+ * in between most.
+ *
+ * The pause between init/medi/... and rlig is required. See eg:
+ * https://bugzilla.mozilla.org/show_bug.cgi?id=644184
+ *
+ * The pauses between init/medi/... themselves are not necessarily
+ * needed as only one of those features is applied to any character.
+ * The only difference it makes is when fonts have contextual
+ * substitutions. We now follow the order of the spec, which makes
+ * for better experience if that's what Uniscribe is doing.
+ *
+ * At least for Arabic, looks like Uniscribe has a pause between
+ * rlig and calt. Otherwise the IranNastaliq's ALLAH ligature won't
+ * work. However, testing shows that rlig and calt are applied
+ * together for Mongolian in Uniscribe. As such, we only add a
+ * pause for Arabic, not other scripts.
+ *
+ * A pause after calt is required to make KFGQPC Uthmanic Script HAFS
+ * work correctly. See https://github.com/harfbuzz/harfbuzz/issues/505
+ */
+
+
+ map->enable_feature (HB_TAG('s','t','c','h'));
+ map->add_gsub_pause (record_stch);
+
+ map->enable_feature (HB_TAG('c','c','m','p'));
+ map->enable_feature (HB_TAG('l','o','c','l'));
+
+ map->add_gsub_pause (nullptr);
+
+ for (unsigned int i = 0; i < ARABIC_NUM_FEATURES; i++)
+ {
+ bool has_fallback = plan->props.script == HB_SCRIPT_ARABIC && !FEATURE_IS_SYRIAC (arabic_features[i]);
+ map->add_feature (arabic_features[i], has_fallback ? F_HAS_FALLBACK : F_NONE);
+ map->add_gsub_pause (nullptr);
+ }
+
+ /* Normally, Unicode says a ZWNJ means "don't ligate". In Arabic script
+ * however, it says a ZWJ should also mean "don't ligate". So we run
+ * the main ligating features as MANUAL_ZWJ. */
+
+ map->enable_feature (HB_TAG('r','l','i','g'), F_MANUAL_ZWJ | F_HAS_FALLBACK);
+
+ if (plan->props.script == HB_SCRIPT_ARABIC)
+ map->add_gsub_pause (arabic_fallback_shape);
+
+ /* No pause after rclt. See 98460779bae19e4d64d29461ff154b3527bf8420. */
+ map->enable_feature (HB_TAG('r','c','l','t'), F_MANUAL_ZWJ);
+ map->enable_feature (HB_TAG('c','a','l','t'), F_MANUAL_ZWJ);
+ map->add_gsub_pause (nullptr);
+
+ /* The spec includes 'cswh'. Earlier versions of Windows
+ * used to enable this by default, but testing suggests
+ * that Windows 8 and later do not enable it by default,
+ * and spec now says 'Off by default'.
+ * We disabled this in ae23c24c32.
+ * Note that IranNastaliq uses this feature extensively
+ * to fixup broken glyph sequences. Oh well...
+ * Test case: U+0643,U+0640,U+0631. */
+ //map->enable_feature (HB_TAG('c','s','w','h'));
+ map->enable_feature (HB_TAG('m','s','e','t'));
+}
+
+#include "hb-ot-shape-complex-arabic-fallback.hh"
+
+struct arabic_shape_plan_t
+{
+ /* The "+ 1" in the next array is to accommodate for the "NONE" command,
+ * which is not an OpenType feature, but this simplifies the code by not
+ * having to do a "if (... < NONE) ..." and just rely on the fact that
+ * mask_array[NONE] == 0. */
+ hb_mask_t mask_array[ARABIC_NUM_FEATURES + 1];
+
+ hb_atomic_ptr_t<arabic_fallback_plan_t> fallback_plan;
+
+ unsigned int do_fallback : 1;
+ unsigned int has_stch : 1;
+};
+
+void *
+data_create_arabic (const hb_ot_shape_plan_t *plan)
+{
+ arabic_shape_plan_t *arabic_plan = (arabic_shape_plan_t *) calloc (1, sizeof (arabic_shape_plan_t));
+ if (unlikely (!arabic_plan))
+ return nullptr;
+
+ arabic_plan->do_fallback = plan->props.script == HB_SCRIPT_ARABIC;
+ arabic_plan->has_stch = !!plan->map.get_1_mask (HB_TAG ('s','t','c','h'));
+ for (unsigned int i = 0; i < ARABIC_NUM_FEATURES; i++) {
+ arabic_plan->mask_array[i] = plan->map.get_1_mask (arabic_features[i]);
+ arabic_plan->do_fallback = arabic_plan->do_fallback &&
+ (FEATURE_IS_SYRIAC (arabic_features[i]) ||
+ plan->map.needs_fallback (arabic_features[i]));
+ }
+
+ return arabic_plan;
+}
+
+void
+data_destroy_arabic (void *data)
+{
+ arabic_shape_plan_t *arabic_plan = (arabic_shape_plan_t *) data;
+
+ arabic_fallback_plan_destroy (arabic_plan->fallback_plan);
+
+ free (data);
+}
+
+static void
+arabic_joining (hb_buffer_t *buffer)
+{
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ unsigned int prev = UINT_MAX, state = 0;
+
+ /* Check pre-context */
+ for (unsigned int i = 0; i < buffer->context_len[0]; i++)
+ {
+ unsigned int this_type = get_joining_type (buffer->context[0][i], buffer->unicode->general_category (buffer->context[0][i]));
+
+ if (unlikely (this_type == JOINING_TYPE_T))
+ continue;
+
+ const arabic_state_table_entry *entry = &arabic_state_table[state][this_type];
+ state = entry->next_state;
+ break;
+ }
+
+ for (unsigned int i = 0; i < count; i++)
+ {
+ unsigned int this_type = get_joining_type (info[i].codepoint, _hb_glyph_info_get_general_category (&info[i]));
+
+ if (unlikely (this_type == JOINING_TYPE_T)) {
+ info[i].arabic_shaping_action() = NONE;
+ continue;
+ }
+
+ const arabic_state_table_entry *entry = &arabic_state_table[state][this_type];
+
+ if (entry->prev_action != NONE && prev != UINT_MAX)
+ {
+ info[prev].arabic_shaping_action() = entry->prev_action;
+ buffer->unsafe_to_break (prev, i + 1);
+ }
+
+ info[i].arabic_shaping_action() = entry->curr_action;
+
+ prev = i;
+ state = entry->next_state;
+ }
+
+ for (unsigned int i = 0; i < buffer->context_len[1]; i++)
+ {
+ unsigned int this_type = get_joining_type (buffer->context[1][i], buffer->unicode->general_category (buffer->context[1][i]));
+
+ if (unlikely (this_type == JOINING_TYPE_T))
+ continue;
+
+ const arabic_state_table_entry *entry = &arabic_state_table[state][this_type];
+ if (entry->prev_action != NONE && prev != UINT_MAX)
+ info[prev].arabic_shaping_action() = entry->prev_action;
+ break;
+ }
+}
+
+static void
+mongolian_variation_selectors (hb_buffer_t *buffer)
+{
+ /* Copy arabic_shaping_action() from base to Mongolian variation selectors. */
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ for (unsigned int i = 1; i < count; i++)
+ if (unlikely (hb_in_range<hb_codepoint_t> (info[i].codepoint, 0x180Bu, 0x180Du)))
+ info[i].arabic_shaping_action() = info[i - 1].arabic_shaping_action();
+}
+
+void
+setup_masks_arabic_plan (const arabic_shape_plan_t *arabic_plan,
+ hb_buffer_t *buffer,
+ hb_script_t script)
+{
+ HB_BUFFER_ALLOCATE_VAR (buffer, arabic_shaping_action);
+
+ arabic_joining (buffer);
+ if (script == HB_SCRIPT_MONGOLIAN)
+ mongolian_variation_selectors (buffer);
+
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ for (unsigned int i = 0; i < count; i++)
+ info[i].mask |= arabic_plan->mask_array[info[i].arabic_shaping_action()];
+}
+
+static void
+setup_masks_arabic (const hb_ot_shape_plan_t *plan,
+ hb_buffer_t *buffer,
+ hb_font_t *font HB_UNUSED)
+{
+ const arabic_shape_plan_t *arabic_plan = (const arabic_shape_plan_t *) plan->data;
+ setup_masks_arabic_plan (arabic_plan, buffer, plan->props.script);
+}
+
+static void
+arabic_fallback_shape (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer)
+{
+#ifdef HB_NO_OT_SHAPE_COMPLEX_ARABIC_FALLBACK
+ return;
+#endif
+
+ const arabic_shape_plan_t *arabic_plan = (const arabic_shape_plan_t *) plan->data;
+
+ if (!arabic_plan->do_fallback)
+ return;
+
+retry:
+ arabic_fallback_plan_t *fallback_plan = arabic_plan->fallback_plan;
+ if (unlikely (!fallback_plan))
+ {
+ /* This sucks. We need a font to build the fallback plan... */
+ fallback_plan = arabic_fallback_plan_create (plan, font);
+ if (unlikely (!arabic_plan->fallback_plan.cmpexch (nullptr, fallback_plan)))
+ {
+ arabic_fallback_plan_destroy (fallback_plan);
+ goto retry;
+ }
+ }
+
+ arabic_fallback_plan_shape (fallback_plan, font, buffer);
+}
+
+/*
+ * Stretch feature: "stch".
+ * See example here:
+ * https://docs.microsoft.com/en-us/typography/script-development/syriac
+ * We implement this in a generic way, such that the Arabic subtending
+ * marks can use it as well.
+ */
+
+static void
+record_stch (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font HB_UNUSED,
+ hb_buffer_t *buffer)
+{
+ const arabic_shape_plan_t *arabic_plan = (const arabic_shape_plan_t *) plan->data;
+ if (!arabic_plan->has_stch)
+ return;
+
+ /* 'stch' feature was just applied. Look for anything that multiplied,
+ * and record it for stch treatment later. Note that rtlm, frac, etc
+ * are applied before stch, but we assume that they didn't result in
+ * anything multiplying into 5 pieces, so it's safe-ish... */
+
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ for (unsigned int i = 0; i < count; i++)
+ if (unlikely (_hb_glyph_info_multiplied (&info[i])))
+ {
+ unsigned int comp = _hb_glyph_info_get_lig_comp (&info[i]);
+ info[i].arabic_shaping_action() = comp % 2 ? STCH_REPEATING : STCH_FIXED;
+ buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH;
+ }
+}
+
+static void
+apply_stch (const hb_ot_shape_plan_t *plan HB_UNUSED,
+ hb_buffer_t *buffer,
+ hb_font_t *font)
+{
+ if (likely (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH)))
+ return;
+
+ /* The Arabic shaper currently always processes in RTL mode, so we should
+ * stretch / position the stretched pieces to the left / preceding glyphs. */
+
+ /* We do a two pass implementation:
+ * First pass calculates the exact number of extra glyphs we need,
+ * We then enlarge buffer to have that much room,
+ * Second pass applies the stretch, copying things to the end of buffer.
+ */
+
+ int sign = font->x_scale < 0 ? -1 : +1;
+ unsigned int extra_glyphs_needed = 0; // Set during MEASURE, used during CUT
+ enum { MEASURE, CUT } /* step_t */;
+
+ for (unsigned int step = MEASURE; step <= CUT; step = step + 1)
+ {
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ hb_glyph_position_t *pos = buffer->pos;
+ unsigned int new_len = count + extra_glyphs_needed; // write head during CUT
+ unsigned int j = new_len;
+ for (unsigned int i = count; i; i--)
+ {
+ if (!hb_in_range<uint8_t> (info[i - 1].arabic_shaping_action(), STCH_FIXED, STCH_REPEATING))
+ {
+ if (step == CUT)
+ {
+ --j;
+ info[j] = info[i - 1];
+ pos[j] = pos[i - 1];
+ }
+ continue;
+ }
+
+ /* Yay, justification! */
+
+ hb_position_t w_total = 0; // Total to be filled
+ hb_position_t w_fixed = 0; // Sum of fixed tiles
+ hb_position_t w_repeating = 0; // Sum of repeating tiles
+ int n_fixed = 0;
+ int n_repeating = 0;
+
+ unsigned int end = i;
+ while (i &&
+ hb_in_range<uint8_t> (info[i - 1].arabic_shaping_action(), STCH_FIXED, STCH_REPEATING))
+ {
+ i--;
+ hb_position_t width = font->get_glyph_h_advance (info[i].codepoint);
+ if (info[i].arabic_shaping_action() == STCH_FIXED)
+ {
+ w_fixed += width;
+ n_fixed++;
+ }
+ else
+ {
+ w_repeating += width;
+ n_repeating++;
+ }
+ }
+ unsigned int start = i;
+ unsigned int context = i;
+ while (context &&
+ !hb_in_range<uint8_t> (info[context - 1].arabic_shaping_action(), STCH_FIXED, STCH_REPEATING) &&
+ (_hb_glyph_info_is_default_ignorable (&info[context - 1]) ||
+ HB_ARABIC_GENERAL_CATEGORY_IS_WORD (_hb_glyph_info_get_general_category (&info[context - 1]))))
+ {
+ context--;
+ w_total += pos[context].x_advance;
+ }
+ i++; // Don't touch i again.
+
+ DEBUG_MSG (ARABIC, nullptr, "%s stretch at (%d,%d,%d)",
+ step == MEASURE ? "measuring" : "cutting", context, start, end);
+ DEBUG_MSG (ARABIC, nullptr, "rest of word: count=%d width %d", start - context, w_total);
+ DEBUG_MSG (ARABIC, nullptr, "fixed tiles: count=%d width=%d", n_fixed, w_fixed);
+ DEBUG_MSG (ARABIC, nullptr, "repeating tiles: count=%d width=%d", n_repeating, w_repeating);
+
+ /* Number of additional times to repeat each repeating tile. */
+ int n_copies = 0;
+
+ hb_position_t w_remaining = w_total - w_fixed;
+ if (sign * w_remaining > sign * w_repeating && sign * w_repeating > 0)
+ n_copies = (sign * w_remaining) / (sign * w_repeating) - 1;
+
+ /* See if we can improve the fit by adding an extra repeat and squeezing them together a bit. */
+ hb_position_t extra_repeat_overlap = 0;
+ hb_position_t shortfall = sign * w_remaining - sign * w_repeating * (n_copies + 1);
+ if (shortfall > 0 && n_repeating > 0)
+ {
+ ++n_copies;
+ hb_position_t excess = (n_copies + 1) * sign * w_repeating - sign * w_remaining;
+ if (excess > 0)
+ extra_repeat_overlap = excess / (n_copies * n_repeating);
+ }
+
+ if (step == MEASURE)
+ {
+ extra_glyphs_needed += n_copies * n_repeating;
+ DEBUG_MSG (ARABIC, nullptr, "will add extra %d copies of repeating tiles", n_copies);
+ }
+ else
+ {
+ buffer->unsafe_to_break (context, end);
+ hb_position_t x_offset = 0;
+ for (unsigned int k = end; k > start; k--)
+ {
+ hb_position_t width = font->get_glyph_h_advance (info[k - 1].codepoint);
+
+ unsigned int repeat = 1;
+ if (info[k - 1].arabic_shaping_action() == STCH_REPEATING)
+ repeat += n_copies;
+
+ DEBUG_MSG (ARABIC, nullptr, "appending %d copies of glyph %d; j=%d",
+ repeat, info[k - 1].codepoint, j);
+ for (unsigned int n = 0; n < repeat; n++)
+ {
+ x_offset -= width;
+ if (n > 0)
+ x_offset += extra_repeat_overlap;
+ pos[k - 1].x_offset = x_offset;
+ /* Append copy. */
+ --j;
+ info[j] = info[k - 1];
+ pos[j] = pos[k - 1];
+ }
+ }
+ }
+ }
+
+ if (step == MEASURE)
+ {
+ if (unlikely (!buffer->ensure (count + extra_glyphs_needed)))
+ break;
+ }
+ else
+ {
+ assert (j == 0);
+ buffer->len = new_len;
+ }
+ }
+}
+
+
+static void
+postprocess_glyphs_arabic (const hb_ot_shape_plan_t *plan,
+ hb_buffer_t *buffer,
+ hb_font_t *font)
+{
+ apply_stch (plan, buffer, font);
+
+ HB_BUFFER_DEALLOCATE_VAR (buffer, arabic_shaping_action);
+}
+
+/* https://www.unicode.org/reports/tr53/ */
+
+static hb_codepoint_t
+modifier_combining_marks[] =
+{
+ 0x0654u, /* ARABIC HAMZA ABOVE */
+ 0x0655u, /* ARABIC HAMZA BELOW */
+ 0x0658u, /* ARABIC MARK NOON GHUNNA */
+ 0x06DCu, /* ARABIC SMALL HIGH SEEN */
+ 0x06E3u, /* ARABIC SMALL LOW SEEN */
+ 0x06E7u, /* ARABIC SMALL HIGH YEH */
+ 0x06E8u, /* ARABIC SMALL HIGH NOON */
+ 0x08D3u, /* ARABIC SMALL LOW WAW */
+ 0x08F3u, /* ARABIC SMALL HIGH WAW */
+};
+
+static inline bool
+info_is_mcm (const hb_glyph_info_t &info)
+{
+ hb_codepoint_t u = info.codepoint;
+ for (unsigned int i = 0; i < ARRAY_LENGTH (modifier_combining_marks); i++)
+ if (u == modifier_combining_marks[i])
+ return true;
+ return false;
+}
+
+static void
+reorder_marks_arabic (const hb_ot_shape_plan_t *plan HB_UNUSED,
+ hb_buffer_t *buffer,
+ unsigned int start,
+ unsigned int end)
+{
+ hb_glyph_info_t *info = buffer->info;
+
+ DEBUG_MSG (ARABIC, buffer, "Reordering marks from %d to %d", start, end);
+
+ unsigned int i = start;
+ for (unsigned int cc = 220; cc <= 230; cc += 10)
+ {
+ DEBUG_MSG (ARABIC, buffer, "Looking for %d's starting at %d", cc, i);
+ while (i < end && info_cc(info[i]) < cc)
+ i++;
+ DEBUG_MSG (ARABIC, buffer, "Looking for %d's stopped at %d", cc, i);
+
+ if (i == end)
+ break;
+
+ if (info_cc(info[i]) > cc)
+ continue;
+
+ unsigned int j = i;
+ while (j < end && info_cc(info[j]) == cc && info_is_mcm (info[j]))
+ j++;
+
+ if (i == j)
+ continue;
+
+ DEBUG_MSG (ARABIC, buffer, "Found %d's from %d to %d", cc, i, j);
+
+ /* Shift it! */
+ DEBUG_MSG (ARABIC, buffer, "Shifting %d's: %d %d", cc, i, j);
+ hb_glyph_info_t temp[HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS];
+ assert (j - i <= ARRAY_LENGTH (temp));
+ buffer->merge_clusters (start, j);
+ memmove (temp, &info[i], (j - i) * sizeof (hb_glyph_info_t));
+ memmove (&info[start + j - i], &info[start], (i - start) * sizeof (hb_glyph_info_t));
+ memmove (&info[start], temp, (j - i) * sizeof (hb_glyph_info_t));
+
+ /* Renumber CC such that the reordered sequence is still sorted.
+ * 22 and 26 are chosen because they are smaller than all Arabic categories,
+ * and are folded back to 220/230 respectively during fallback mark positioning.
+ *
+ * We do this because the CGJ-handling logic in the normalizer relies on
+ * mark sequences having an increasing order even after this reordering.
+ * https://github.com/harfbuzz/harfbuzz/issues/554
+ * This, however, does break some obscure sequences, where the normalizer
+ * might compose a sequence that it should not. For example, in the seequence
+ * ALEF, HAMZAH, MADDAH, we should NOT try to compose ALEF+MADDAH, but with this
+ * renumbering, we will.
+ */
+ unsigned int new_start = start + j - i;
+ unsigned int new_cc = cc == 220 ? HB_MODIFIED_COMBINING_CLASS_CCC22 : HB_MODIFIED_COMBINING_CLASS_CCC26;
+ while (start < new_start)
+ {
+ _hb_glyph_info_set_modified_combining_class (&info[start], new_cc);
+ start++;
+ }
+
+ i = j;
+ }
+}
+
+const hb_ot_complex_shaper_t _hb_ot_complex_shaper_arabic =
+{
+ collect_features_arabic,
+ nullptr, /* override_features */
+ data_create_arabic,
+ data_destroy_arabic,
+ nullptr, /* preprocess_text */
+ postprocess_glyphs_arabic,
+ HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT,
+ nullptr, /* decompose */
+ nullptr, /* compose */
+ setup_masks_arabic,
+ HB_TAG_NONE, /* gpos_tag */
+ reorder_marks_arabic,
+ HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
+ true, /* fallback_position */
+};
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic.hh
new file mode 100644
index 0000000000..5bf6ff6338
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic.hh
@@ -0,0 +1,50 @@
+/*
+ * Copyright © 2015 Mozilla Foundation.
+ * Copyright © 2015 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Mozilla Author(s): Jonathan Kew
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_HH
+#define HB_OT_SHAPE_COMPLEX_ARABIC_HH
+
+#include "hb.hh"
+
+#include "hb-ot-shape-complex.hh"
+
+
+struct arabic_shape_plan_t;
+
+HB_INTERNAL void *
+data_create_arabic (const hb_ot_shape_plan_t *plan);
+
+HB_INTERNAL void
+data_destroy_arabic (void *data);
+
+HB_INTERNAL void
+setup_masks_arabic_plan (const arabic_shape_plan_t *arabic_plan,
+ hb_buffer_t *buffer,
+ hb_script_t script);
+
+#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-default.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-default.cc
new file mode 100644
index 0000000000..a755aea098
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-default.cc
@@ -0,0 +1,73 @@
+/*
+ * Copyright © 2010,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_OT_SHAPE
+
+#include "hb-ot-shape-complex.hh"
+
+
+const hb_ot_complex_shaper_t _hb_ot_complex_shaper_default =
+{
+ nullptr, /* collect_features */
+ nullptr, /* override_features */
+ nullptr, /* data_create */
+ nullptr, /* data_destroy */
+ nullptr, /* preprocess_text */
+ nullptr, /* postprocess_glyphs */
+ HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT,
+ nullptr, /* decompose */
+ nullptr, /* compose */
+ nullptr, /* setup_masks */
+ HB_TAG_NONE, /* gpos_tag */
+ nullptr, /* reorder_marks */
+ HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
+ true, /* fallback_position */
+};
+
+/* Same as default but no mark advance zeroing / fallback positioning.
+ * Dumbest shaper ever, basically. */
+const hb_ot_complex_shaper_t _hb_ot_complex_shaper_dumber =
+{
+ nullptr, /* collect_features */
+ nullptr, /* override_features */
+ nullptr, /* data_create */
+ nullptr, /* data_destroy */
+ nullptr, /* preprocess_text */
+ nullptr, /* postprocess_glyphs */
+ HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT,
+ nullptr, /* decompose */
+ nullptr, /* compose */
+ nullptr, /* setup_masks */
+ HB_TAG_NONE, /* gpos_tag */
+ nullptr, /* reorder_marks */
+ HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
+ false, /* fallback_position */
+};
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-hangul.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-hangul.cc
new file mode 100644
index 0000000000..f5915f43ae
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-hangul.cc
@@ -0,0 +1,439 @@
+/*
+ * Copyright © 2013 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_OT_SHAPE
+
+#include "hb-ot-shape-complex.hh"
+
+
+/* Hangul shaper */
+
+
+/* Same order as the feature array below */
+enum {
+ _JMO,
+
+ LJMO,
+ VJMO,
+ TJMO,
+
+ FIRST_HANGUL_FEATURE = LJMO,
+ HANGUL_FEATURE_COUNT = TJMO + 1
+};
+
+static const hb_tag_t hangul_features[HANGUL_FEATURE_COUNT] =
+{
+ HB_TAG_NONE,
+ HB_TAG('l','j','m','o'),
+ HB_TAG('v','j','m','o'),
+ HB_TAG('t','j','m','o')
+};
+
+static void
+collect_features_hangul (hb_ot_shape_planner_t *plan)
+{
+ hb_ot_map_builder_t *map = &plan->map;
+
+ for (unsigned int i = FIRST_HANGUL_FEATURE; i < HANGUL_FEATURE_COUNT; i++)
+ map->add_feature (hangul_features[i]);
+}
+
+static void
+override_features_hangul (hb_ot_shape_planner_t *plan)
+{
+ /* Uniscribe does not apply 'calt' for Hangul, and certain fonts
+ * (Noto Sans CJK, Source Sans Han, etc) apply all of jamo lookups
+ * in calt, which is not desirable. */
+ plan->map.disable_feature (HB_TAG('c','a','l','t'));
+}
+
+struct hangul_shape_plan_t
+{
+ hb_mask_t mask_array[HANGUL_FEATURE_COUNT];
+};
+
+static void *
+data_create_hangul (const hb_ot_shape_plan_t *plan)
+{
+ hangul_shape_plan_t *hangul_plan = (hangul_shape_plan_t *) calloc (1, sizeof (hangul_shape_plan_t));
+ if (unlikely (!hangul_plan))
+ return nullptr;
+
+ for (unsigned int i = 0; i < HANGUL_FEATURE_COUNT; i++)
+ hangul_plan->mask_array[i] = plan->map.get_1_mask (hangul_features[i]);
+
+ return hangul_plan;
+}
+
+static void
+data_destroy_hangul (void *data)
+{
+ free (data);
+}
+
+/* Constants for algorithmic hangul syllable [de]composition. */
+#define LBase 0x1100u
+#define VBase 0x1161u
+#define TBase 0x11A7u
+#define LCount 19u
+#define VCount 21u
+#define TCount 28u
+#define SBase 0xAC00u
+#define NCount (VCount * TCount)
+#define SCount (LCount * NCount)
+
+#define isCombiningL(u) (hb_in_range<hb_codepoint_t> ((u), LBase, LBase+LCount-1))
+#define isCombiningV(u) (hb_in_range<hb_codepoint_t> ((u), VBase, VBase+VCount-1))
+#define isCombiningT(u) (hb_in_range<hb_codepoint_t> ((u), TBase+1, TBase+TCount-1))
+#define isCombinedS(u) (hb_in_range<hb_codepoint_t> ((u), SBase, SBase+SCount-1))
+
+#define isL(u) (hb_in_ranges<hb_codepoint_t> ((u), 0x1100u, 0x115Fu, 0xA960u, 0xA97Cu))
+#define isV(u) (hb_in_ranges<hb_codepoint_t> ((u), 0x1160u, 0x11A7u, 0xD7B0u, 0xD7C6u))
+#define isT(u) (hb_in_ranges<hb_codepoint_t> ((u), 0x11A8u, 0x11FFu, 0xD7CBu, 0xD7FBu))
+
+#define isHangulTone(u) (hb_in_range<hb_codepoint_t> ((u), 0x302Eu, 0x302Fu))
+
+/* buffer var allocations */
+#define hangul_shaping_feature() complex_var_u8_0() /* hangul jamo shaping feature */
+
+static bool
+is_zero_width_char (hb_font_t *font,
+ hb_codepoint_t unicode)
+{
+ hb_codepoint_t glyph;
+ return hb_font_get_glyph (font, unicode, 0, &glyph) && hb_font_get_glyph_h_advance (font, glyph) == 0;
+}
+
+static void
+preprocess_text_hangul (const hb_ot_shape_plan_t *plan HB_UNUSED,
+ hb_buffer_t *buffer,
+ hb_font_t *font)
+{
+ HB_BUFFER_ALLOCATE_VAR (buffer, hangul_shaping_feature);
+
+ /* Hangul syllables come in two shapes: LV, and LVT. Of those:
+ *
+ * - LV can be precomposed, or decomposed. Lets call those
+ * <LV> and <L,V>,
+ * - LVT can be fully precomposed, partically precomposed, or
+ * fully decomposed. Ie. <LVT>, <LV,T>, or <L,V,T>.
+ *
+ * The composition / decomposition is mechanical. However, not
+ * all <L,V> sequences compose, and not all <LV,T> sequences
+ * compose.
+ *
+ * Here are the specifics:
+ *
+ * - <L>: U+1100..115F, U+A960..A97F
+ * - <V>: U+1160..11A7, U+D7B0..D7C7
+ * - <T>: U+11A8..11FF, U+D7CB..D7FB
+ *
+ * - Only the <L,V> sequences for some of the U+11xx ranges combine.
+ * - Only <LV,T> sequences for some of the Ts in U+11xx range combine.
+ *
+ * Here is what we want to accomplish in this shaper:
+ *
+ * - If the whole syllable can be precomposed, do that,
+ * - Otherwise, fully decompose and apply ljmo/vjmo/tjmo features.
+ * - If a valid syllable is followed by a Hangul tone mark, reorder the tone
+ * mark to precede the whole syllable - unless it is a zero-width glyph, in
+ * which case we leave it untouched, assuming it's designed to overstrike.
+ *
+ * That is, of the different possible syllables:
+ *
+ * <L>
+ * <L,V>
+ * <L,V,T>
+ * <LV>
+ * <LVT>
+ * <LV, T>
+ *
+ * - <L> needs no work.
+ *
+ * - <LV> and <LVT> can stay the way they are if the font supports them, otherwise we
+ * should fully decompose them if font supports.
+ *
+ * - <L,V> and <L,V,T> we should compose if the whole thing can be composed.
+ *
+ * - <LV,T> we should compose if the whole thing can be composed, otherwise we should
+ * decompose.
+ */
+
+ buffer->clear_output ();
+ unsigned int start = 0, end = 0; /* Extent of most recently seen syllable;
+ * valid only if start < end
+ */
+ unsigned int count = buffer->len;
+
+ for (buffer->idx = 0; buffer->idx < count && buffer->successful;)
+ {
+ hb_codepoint_t u = buffer->cur().codepoint;
+
+ if (isHangulTone (u))
+ {
+ /*
+ * We could cache the width of the tone marks and the existence of dotted-circle,
+ * but the use of the Hangul tone mark characters seems to be rare enough that
+ * I didn't bother for now.
+ */
+ if (start < end && end == buffer->out_len)
+ {
+ /* Tone mark follows a valid syllable; move it in front, unless it's zero width. */
+ buffer->unsafe_to_break_from_outbuffer (start, buffer->idx);
+ buffer->next_glyph ();
+ if (!is_zero_width_char (font, u))
+ {
+ buffer->merge_out_clusters (start, end + 1);
+ hb_glyph_info_t *info = buffer->out_info;
+ hb_glyph_info_t tone = info[end];
+ memmove (&info[start + 1], &info[start], (end - start) * sizeof (hb_glyph_info_t));
+ info[start] = tone;
+ }
+ }
+ else
+ {
+ /* No valid syllable as base for tone mark; try to insert dotted circle. */
+ if (!(buffer->flags & HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE) &&
+ font->has_glyph (0x25CCu))
+ {
+ hb_codepoint_t chars[2];
+ if (!is_zero_width_char (font, u)) {
+ chars[0] = u;
+ chars[1] = 0x25CCu;
+ } else {
+ chars[0] = 0x25CCu;
+ chars[1] = u;
+ }
+ buffer->replace_glyphs (1, 2, chars);
+ }
+ else
+ {
+ /* No dotted circle available in the font; just leave tone mark untouched. */
+ buffer->next_glyph ();
+ }
+ }
+ start = end = buffer->out_len;
+ continue;
+ }
+
+ start = buffer->out_len; /* Remember current position as a potential syllable start;
+ * will only be used if we set end to a later position.
+ */
+
+ if (isL (u) && buffer->idx + 1 < count)
+ {
+ hb_codepoint_t l = u;
+ hb_codepoint_t v = buffer->cur(+1).codepoint;
+ if (isV (v))
+ {
+ /* Have <L,V> or <L,V,T>. */
+ hb_codepoint_t t = 0;
+ unsigned int tindex = 0;
+ if (buffer->idx + 2 < count)
+ {
+ t = buffer->cur(+2).codepoint;
+ if (isT (t))
+ tindex = t - TBase; /* Only used if isCombiningT (t); otherwise invalid. */
+ else
+ t = 0; /* The next character was not a trailing jamo. */
+ }
+ buffer->unsafe_to_break (buffer->idx, buffer->idx + (t ? 3 : 2));
+
+ /* We've got a syllable <L,V,T?>; see if it can potentially be composed. */
+ if (isCombiningL (l) && isCombiningV (v) && (t == 0 || isCombiningT (t)))
+ {
+ /* Try to compose; if this succeeds, end is set to start+1. */
+ hb_codepoint_t s = SBase + (l - LBase) * NCount + (v - VBase) * TCount + tindex;
+ if (font->has_glyph (s))
+ {
+ buffer->replace_glyphs (t ? 3 : 2, 1, &s);
+ if (unlikely (!buffer->successful))
+ return;
+ end = start + 1;
+ continue;
+ }
+ }
+
+ /* We didn't compose, either because it's an Old Hangul syllable without a
+ * precomposed character in Unicode, or because the font didn't support the
+ * necessary precomposed glyph.
+ * Set jamo features on the individual glyphs, and advance past them.
+ */
+ buffer->cur().hangul_shaping_feature() = LJMO;
+ buffer->next_glyph ();
+ buffer->cur().hangul_shaping_feature() = VJMO;
+ buffer->next_glyph ();
+ if (t)
+ {
+ buffer->cur().hangul_shaping_feature() = TJMO;
+ buffer->next_glyph ();
+ end = start + 3;
+ }
+ else
+ end = start + 2;
+ if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES)
+ buffer->merge_out_clusters (start, end);
+ continue;
+ }
+ }
+
+ else if (isCombinedS (u))
+ {
+ /* Have <LV>, <LVT>, or <LV,T> */
+ hb_codepoint_t s = u;
+ bool has_glyph = font->has_glyph (s);
+ unsigned int lindex = (s - SBase) / NCount;
+ unsigned int nindex = (s - SBase) % NCount;
+ unsigned int vindex = nindex / TCount;
+ unsigned int tindex = nindex % TCount;
+
+ if (!tindex &&
+ buffer->idx + 1 < count &&
+ isCombiningT (buffer->cur(+1).codepoint))
+ {
+ /* <LV,T>, try to combine. */
+ unsigned int new_tindex = buffer->cur(+1).codepoint - TBase;
+ hb_codepoint_t new_s = s + new_tindex;
+ if (font->has_glyph (new_s))
+ {
+ buffer->replace_glyphs (2, 1, &new_s);
+ if (unlikely (!buffer->successful))
+ return;
+ end = start + 1;
+ continue;
+ }
+ else
+ buffer->unsafe_to_break (buffer->idx, buffer->idx + 2); /* Mark unsafe between LV and T. */
+ }
+
+ /* Otherwise, decompose if font doesn't support <LV> or <LVT>,
+ * or if having non-combining <LV,T>. Note that we already handled
+ * combining <LV,T> above. */
+ if (!has_glyph ||
+ (!tindex &&
+ buffer->idx + 1 < count &&
+ isT (buffer->cur(+1).codepoint)))
+ {
+ hb_codepoint_t decomposed[3] = {LBase + lindex,
+ VBase + vindex,
+ TBase + tindex};
+ if (font->has_glyph (decomposed[0]) &&
+ font->has_glyph (decomposed[1]) &&
+ (!tindex || font->has_glyph (decomposed[2])))
+ {
+ unsigned int s_len = tindex ? 3 : 2;
+ buffer->replace_glyphs (1, s_len, decomposed);
+
+ /* If we decomposed an LV because of a non-combining T following,
+ * we want to include this T in the syllable.
+ */
+ if (has_glyph && !tindex)
+ {
+ buffer->next_glyph ();
+ s_len++;
+ }
+
+ if (unlikely (!buffer->successful))
+ return;
+
+ /* We decomposed S: apply jamo features to the individual glyphs
+ * that are now in buffer->out_info.
+ */
+ hb_glyph_info_t *info = buffer->out_info;
+ end = start + s_len;
+
+ unsigned int i = start;
+ info[i++].hangul_shaping_feature() = LJMO;
+ info[i++].hangul_shaping_feature() = VJMO;
+ if (i < end)
+ info[i++].hangul_shaping_feature() = TJMO;
+
+ if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES)
+ buffer->merge_out_clusters (start, end);
+ continue;
+ }
+ else if ((!tindex && buffer->idx + 1 < count && isT (buffer->cur(+1).codepoint)))
+ buffer->unsafe_to_break (buffer->idx, buffer->idx + 2); /* Mark unsafe between LV and T. */
+ }
+
+ if (has_glyph)
+ {
+ /* We didn't decompose the S, so just advance past it. */
+ end = start + 1;
+ buffer->next_glyph ();
+ continue;
+ }
+ }
+
+ /* Didn't find a recognizable syllable, so we leave end <= start;
+ * this will prevent tone-mark reordering happening.
+ */
+ buffer->next_glyph ();
+ }
+ buffer->swap_buffers ();
+}
+
+static void
+setup_masks_hangul (const hb_ot_shape_plan_t *plan,
+ hb_buffer_t *buffer,
+ hb_font_t *font HB_UNUSED)
+{
+ const hangul_shape_plan_t *hangul_plan = (const hangul_shape_plan_t *) plan->data;
+
+ if (likely (hangul_plan))
+ {
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ for (unsigned int i = 0; i < count; i++, info++)
+ info->mask |= hangul_plan->mask_array[info->hangul_shaping_feature()];
+ }
+
+ HB_BUFFER_DEALLOCATE_VAR (buffer, hangul_shaping_feature);
+}
+
+
+const hb_ot_complex_shaper_t _hb_ot_complex_shaper_hangul =
+{
+ collect_features_hangul,
+ override_features_hangul,
+ data_create_hangul,
+ data_destroy_hangul,
+ preprocess_text_hangul,
+ nullptr, /* postprocess_glyphs */
+ HB_OT_SHAPE_NORMALIZATION_MODE_NONE,
+ nullptr, /* decompose */
+ nullptr, /* compose */
+ setup_masks_hangul,
+ HB_TAG_NONE, /* gpos_tag */
+ nullptr, /* reorder_marks */
+ HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
+ false, /* fallback_position */
+};
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-hebrew.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-hebrew.cc
new file mode 100644
index 0000000000..334d3ded82
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-hebrew.cc
@@ -0,0 +1,185 @@
+/*
+ * Copyright © 2010,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_OT_SHAPE
+
+#include "hb-ot-shape-complex.hh"
+
+
+static bool
+compose_hebrew (const hb_ot_shape_normalize_context_t *c,
+ hb_codepoint_t a,
+ hb_codepoint_t b,
+ hb_codepoint_t *ab)
+{
+ /* Hebrew presentation-form shaping.
+ * https://bugzilla.mozilla.org/show_bug.cgi?id=728866
+ * Hebrew presentation forms with dagesh, for characters U+05D0..05EA;
+ * Note that some letters do not have a dagesh presForm encoded.
+ */
+ static const hb_codepoint_t sDageshForms[0x05EAu - 0x05D0u + 1] = {
+ 0xFB30u, /* ALEF */
+ 0xFB31u, /* BET */
+ 0xFB32u, /* GIMEL */
+ 0xFB33u, /* DALET */
+ 0xFB34u, /* HE */
+ 0xFB35u, /* VAV */
+ 0xFB36u, /* ZAYIN */
+ 0x0000u, /* HET */
+ 0xFB38u, /* TET */
+ 0xFB39u, /* YOD */
+ 0xFB3Au, /* FINAL KAF */
+ 0xFB3Bu, /* KAF */
+ 0xFB3Cu, /* LAMED */
+ 0x0000u, /* FINAL MEM */
+ 0xFB3Eu, /* MEM */
+ 0x0000u, /* FINAL NUN */
+ 0xFB40u, /* NUN */
+ 0xFB41u, /* SAMEKH */
+ 0x0000u, /* AYIN */
+ 0xFB43u, /* FINAL PE */
+ 0xFB44u, /* PE */
+ 0x0000u, /* FINAL TSADI */
+ 0xFB46u, /* TSADI */
+ 0xFB47u, /* QOF */
+ 0xFB48u, /* RESH */
+ 0xFB49u, /* SHIN */
+ 0xFB4Au /* TAV */
+ };
+
+ bool found = (bool) c->unicode->compose (a, b, ab);
+
+#ifdef HB_NO_OT_SHAPE_COMPLEX_HEBREW_FALLBACK
+ return found;
+#endif
+
+ if (!found && !c->plan->has_gpos_mark)
+ {
+ /* Special-case Hebrew presentation forms that are excluded from
+ * standard normalization, but wanted for old fonts. */
+ switch (b) {
+ case 0x05B4u: /* HIRIQ */
+ if (a == 0x05D9u) { /* YOD */
+ *ab = 0xFB1Du;
+ found = true;
+ }
+ break;
+ case 0x05B7u: /* patah */
+ if (a == 0x05F2u) { /* YIDDISH YOD YOD */
+ *ab = 0xFB1Fu;
+ found = true;
+ } else if (a == 0x05D0u) { /* ALEF */
+ *ab = 0xFB2Eu;
+ found = true;
+ }
+ break;
+ case 0x05B8u: /* QAMATS */
+ if (a == 0x05D0u) { /* ALEF */
+ *ab = 0xFB2Fu;
+ found = true;
+ }
+ break;
+ case 0x05B9u: /* HOLAM */
+ if (a == 0x05D5u) { /* VAV */
+ *ab = 0xFB4Bu;
+ found = true;
+ }
+ break;
+ case 0x05BCu: /* DAGESH */
+ if (a >= 0x05D0u && a <= 0x05EAu) {
+ *ab = sDageshForms[a - 0x05D0u];
+ found = (*ab != 0);
+ } else if (a == 0xFB2Au) { /* SHIN WITH SHIN DOT */
+ *ab = 0xFB2Cu;
+ found = true;
+ } else if (a == 0xFB2Bu) { /* SHIN WITH SIN DOT */
+ *ab = 0xFB2Du;
+ found = true;
+ }
+ break;
+ case 0x05BFu: /* RAFE */
+ switch (a) {
+ case 0x05D1u: /* BET */
+ *ab = 0xFB4Cu;
+ found = true;
+ break;
+ case 0x05DBu: /* KAF */
+ *ab = 0xFB4Du;
+ found = true;
+ break;
+ case 0x05E4u: /* PE */
+ *ab = 0xFB4Eu;
+ found = true;
+ break;
+ }
+ break;
+ case 0x05C1u: /* SHIN DOT */
+ if (a == 0x05E9u) { /* SHIN */
+ *ab = 0xFB2Au;
+ found = true;
+ } else if (a == 0xFB49u) { /* SHIN WITH DAGESH */
+ *ab = 0xFB2Cu;
+ found = true;
+ }
+ break;
+ case 0x05C2u: /* SIN DOT */
+ if (a == 0x05E9u) { /* SHIN */
+ *ab = 0xFB2Bu;
+ found = true;
+ } else if (a == 0xFB49u) { /* SHIN WITH DAGESH */
+ *ab = 0xFB2Du;
+ found = true;
+ }
+ break;
+ }
+ }
+
+ return found;
+}
+
+
+const hb_ot_complex_shaper_t _hb_ot_complex_shaper_hebrew =
+{
+ nullptr, /* collect_features */
+ nullptr, /* override_features */
+ nullptr, /* data_create */
+ nullptr, /* data_destroy */
+ nullptr, /* preprocess_text */
+ nullptr, /* postprocess_glyphs */
+ HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT,
+ nullptr, /* decompose */
+ compose_hebrew,
+ nullptr, /* setup_masks */
+ HB_TAG ('h','e','b','r'), /* gpos_tag. https://github.com/harfbuzz/harfbuzz/issues/347#issuecomment-267838368 */
+ nullptr, /* reorder_marks */
+ HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
+ true, /* fallback_position */
+};
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-indic-machine.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-indic-machine.hh
new file mode 100644
index 0000000000..670b6bf486
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-indic-machine.hh
@@ -0,0 +1,574 @@
+
+#line 1 "hb-ot-shape-complex-indic-machine.rl"
+/*
+ * Copyright © 2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_SHAPE_COMPLEX_INDIC_MACHINE_HH
+#define HB_OT_SHAPE_COMPLEX_INDIC_MACHINE_HH
+
+#include "hb.hh"
+
+
+#line 36 "hb-ot-shape-complex-indic-machine.hh"
+static const unsigned char _indic_syllable_machine_trans_keys[] = {
+ 8u, 8u, 4u, 8u, 5u, 7u, 5u, 8u, 4u, 8u, 6u, 6u, 16u, 16u, 4u, 8u,
+ 4u, 13u, 4u, 8u, 8u, 8u, 5u, 7u, 5u, 8u, 4u, 8u, 6u, 6u, 16u, 16u,
+ 4u, 8u, 4u, 13u, 4u, 13u, 4u, 13u, 8u, 8u, 5u, 7u, 5u, 8u, 4u, 8u,
+ 6u, 6u, 16u, 16u, 4u, 8u, 4u, 8u, 4u, 13u, 8u, 8u, 5u, 7u, 5u, 8u,
+ 4u, 8u, 6u, 6u, 16u, 16u, 4u, 8u, 4u, 8u, 5u, 8u, 8u, 8u, 1u, 19u,
+ 3u, 17u, 3u, 17u, 4u, 17u, 1u, 16u, 5u, 10u, 5u, 10u, 10u, 10u, 5u, 10u,
+ 1u, 16u, 1u, 16u, 1u, 16u, 3u, 10u, 4u, 10u, 5u, 10u, 4u, 10u, 5u, 10u,
+ 3u, 10u, 5u, 10u, 3u, 17u, 3u, 17u, 3u, 17u, 3u, 17u, 4u, 17u, 1u, 16u,
+ 3u, 17u, 3u, 17u, 4u, 17u, 1u, 16u, 5u, 10u, 10u, 10u, 5u, 10u, 1u, 16u,
+ 1u, 16u, 3u, 10u, 4u, 10u, 5u, 10u, 4u, 10u, 5u, 10u, 5u, 10u, 3u, 10u,
+ 5u, 10u, 3u, 17u, 3u, 17u, 4u, 8u, 3u, 17u, 3u, 17u, 4u, 17u, 1u, 16u,
+ 3u, 17u, 1u, 16u, 5u, 10u, 10u, 10u, 5u, 10u, 1u, 16u, 1u, 16u, 3u, 10u,
+ 4u, 10u, 5u, 10u, 3u, 17u, 4u, 10u, 5u, 10u, 5u, 10u, 3u, 10u, 5u, 10u,
+ 3u, 17u, 4u, 13u, 4u, 8u, 3u, 17u, 3u, 17u, 4u, 17u, 1u, 16u, 3u, 17u,
+ 1u, 16u, 5u, 10u, 10u, 10u, 5u, 10u, 1u, 16u, 1u, 16u, 3u, 10u, 4u, 10u,
+ 5u, 10u, 3u, 17u, 4u, 10u, 5u, 10u, 5u, 10u, 3u, 10u, 5u, 10u, 1u, 17u,
+ 3u, 17u, 1u, 17u, 4u, 13u, 5u, 10u, 10u, 10u, 5u, 10u, 1u, 16u, 3u, 10u,
+ 5u, 10u, 5u, 10u, 10u, 10u, 5u, 10u, 1u, 16u, 0
+};
+
+static const char _indic_syllable_machine_key_spans[] = {
+ 1, 5, 3, 4, 5, 1, 1, 5,
+ 10, 5, 1, 3, 4, 5, 1, 1,
+ 5, 10, 10, 10, 1, 3, 4, 5,
+ 1, 1, 5, 5, 10, 1, 3, 4,
+ 5, 1, 1, 5, 5, 4, 1, 19,
+ 15, 15, 14, 16, 6, 6, 1, 6,
+ 16, 16, 16, 8, 7, 6, 7, 6,
+ 8, 6, 15, 15, 15, 15, 14, 16,
+ 15, 15, 14, 16, 6, 1, 6, 16,
+ 16, 8, 7, 6, 7, 6, 6, 8,
+ 6, 15, 15, 5, 15, 15, 14, 16,
+ 15, 16, 6, 1, 6, 16, 16, 8,
+ 7, 6, 15, 7, 6, 6, 8, 6,
+ 15, 10, 5, 15, 15, 14, 16, 15,
+ 16, 6, 1, 6, 16, 16, 8, 7,
+ 6, 15, 7, 6, 6, 8, 6, 17,
+ 15, 17, 10, 6, 1, 6, 16, 8,
+ 6, 6, 1, 6, 16
+};
+
+static const short _indic_syllable_machine_index_offsets[] = {
+ 0, 2, 8, 12, 17, 23, 25, 27,
+ 33, 44, 50, 52, 56, 61, 67, 69,
+ 71, 77, 88, 99, 110, 112, 116, 121,
+ 127, 129, 131, 137, 143, 154, 156, 160,
+ 165, 171, 173, 175, 181, 187, 192, 194,
+ 214, 230, 246, 261, 278, 285, 292, 294,
+ 301, 318, 335, 352, 361, 369, 376, 384,
+ 391, 400, 407, 423, 439, 455, 471, 486,
+ 503, 519, 535, 550, 567, 574, 576, 583,
+ 600, 617, 626, 634, 641, 649, 656, 663,
+ 672, 679, 695, 711, 717, 733, 749, 764,
+ 781, 797, 814, 821, 823, 830, 847, 864,
+ 873, 881, 888, 904, 912, 919, 926, 935,
+ 942, 958, 969, 975, 991, 1007, 1022, 1039,
+ 1055, 1072, 1079, 1081, 1088, 1105, 1122, 1131,
+ 1139, 1146, 1162, 1170, 1177, 1184, 1193, 1200,
+ 1218, 1234, 1252, 1263, 1270, 1272, 1279, 1296,
+ 1305, 1312, 1319, 1321, 1328
+};
+
+static const unsigned char _indic_syllable_machine_indicies[] = {
+ 1, 0, 2, 3, 3, 4, 1, 0,
+ 3, 3, 4, 0, 3, 3, 4, 1,
+ 0, 5, 3, 3, 4, 1, 0, 6,
+ 0, 7, 0, 8, 3, 3, 4, 1,
+ 0, 2, 3, 3, 4, 1, 0, 0,
+ 0, 0, 9, 0, 11, 12, 12, 13,
+ 14, 10, 14, 10, 12, 12, 13, 10,
+ 12, 12, 13, 14, 10, 15, 12, 12,
+ 13, 14, 10, 16, 10, 17, 10, 18,
+ 12, 12, 13, 14, 10, 11, 12, 12,
+ 13, 14, 10, 10, 10, 10, 19, 10,
+ 11, 12, 12, 13, 14, 10, 10, 10,
+ 10, 20, 10, 22, 23, 23, 24, 25,
+ 21, 21, 21, 21, 26, 21, 25, 21,
+ 23, 23, 24, 27, 23, 23, 24, 25,
+ 21, 28, 23, 23, 24, 25, 21, 29,
+ 21, 30, 21, 22, 23, 23, 24, 25,
+ 21, 31, 23, 23, 24, 25, 21, 33,
+ 34, 34, 35, 36, 32, 32, 32, 32,
+ 37, 32, 36, 32, 34, 34, 35, 32,
+ 34, 34, 35, 36, 32, 38, 34, 34,
+ 35, 36, 32, 39, 32, 40, 32, 33,
+ 34, 34, 35, 36, 32, 41, 34, 34,
+ 35, 36, 32, 23, 23, 24, 1, 0,
+ 43, 42, 45, 46, 47, 48, 49, 50,
+ 24, 25, 44, 51, 52, 52, 26, 44,
+ 53, 54, 55, 56, 57, 44, 59, 60,
+ 61, 62, 4, 1, 58, 63, 58, 58,
+ 9, 58, 58, 58, 64, 58, 65, 60,
+ 66, 66, 4, 1, 58, 63, 58, 58,
+ 58, 58, 58, 58, 64, 58, 60, 66,
+ 66, 4, 1, 58, 63, 58, 58, 58,
+ 58, 58, 58, 64, 58, 45, 58, 58,
+ 58, 67, 68, 58, 1, 58, 63, 58,
+ 58, 58, 58, 58, 45, 58, 69, 69,
+ 58, 1, 58, 63, 58, 63, 58, 58,
+ 70, 58, 63, 58, 63, 58, 63, 58,
+ 58, 58, 58, 63, 58, 45, 58, 71,
+ 58, 69, 69, 58, 1, 58, 63, 58,
+ 58, 58, 58, 58, 45, 58, 45, 58,
+ 58, 58, 69, 69, 58, 1, 58, 63,
+ 58, 58, 58, 58, 58, 45, 58, 45,
+ 58, 58, 58, 69, 68, 58, 1, 58,
+ 63, 58, 58, 58, 58, 58, 45, 58,
+ 72, 7, 73, 74, 4, 1, 58, 63,
+ 58, 7, 73, 74, 4, 1, 58, 63,
+ 58, 73, 73, 4, 1, 58, 63, 58,
+ 75, 76, 76, 4, 1, 58, 63, 58,
+ 67, 77, 58, 1, 58, 63, 58, 67,
+ 58, 69, 69, 58, 1, 58, 63, 58,
+ 69, 77, 58, 1, 58, 63, 58, 59,
+ 60, 66, 66, 4, 1, 58, 63, 58,
+ 58, 58, 58, 58, 58, 64, 58, 59,
+ 60, 61, 66, 4, 1, 58, 63, 58,
+ 58, 9, 58, 58, 58, 64, 58, 79,
+ 80, 81, 82, 13, 14, 78, 83, 78,
+ 78, 20, 78, 78, 78, 84, 78, 85,
+ 80, 86, 82, 13, 14, 78, 83, 78,
+ 78, 78, 78, 78, 78, 84, 78, 80,
+ 86, 82, 13, 14, 78, 83, 78, 78,
+ 78, 78, 78, 78, 84, 78, 87, 78,
+ 78, 78, 88, 89, 78, 14, 78, 83,
+ 78, 78, 78, 78, 78, 87, 78, 90,
+ 80, 91, 92, 13, 14, 78, 83, 78,
+ 78, 19, 78, 78, 78, 84, 78, 93,
+ 80, 86, 86, 13, 14, 78, 83, 78,
+ 78, 78, 78, 78, 78, 84, 78, 80,
+ 86, 86, 13, 14, 78, 83, 78, 78,
+ 78, 78, 78, 78, 84, 78, 87, 78,
+ 78, 78, 94, 89, 78, 14, 78, 83,
+ 78, 78, 78, 78, 78, 87, 78, 83,
+ 78, 78, 95, 78, 83, 78, 83, 78,
+ 83, 78, 78, 78, 78, 83, 78, 87,
+ 78, 96, 78, 94, 94, 78, 14, 78,
+ 83, 78, 78, 78, 78, 78, 87, 78,
+ 87, 78, 78, 78, 94, 94, 78, 14,
+ 78, 83, 78, 78, 78, 78, 78, 87,
+ 78, 97, 17, 98, 99, 13, 14, 78,
+ 83, 78, 17, 98, 99, 13, 14, 78,
+ 83, 78, 98, 98, 13, 14, 78, 83,
+ 78, 100, 101, 101, 13, 14, 78, 83,
+ 78, 88, 102, 78, 14, 78, 83, 78,
+ 94, 94, 78, 14, 78, 83, 78, 88,
+ 78, 94, 94, 78, 14, 78, 83, 78,
+ 94, 102, 78, 14, 78, 83, 78, 90,
+ 80, 86, 86, 13, 14, 78, 83, 78,
+ 78, 78, 78, 78, 78, 84, 78, 90,
+ 80, 91, 86, 13, 14, 78, 83, 78,
+ 78, 19, 78, 78, 78, 84, 78, 11,
+ 12, 12, 13, 14, 78, 79, 80, 86,
+ 82, 13, 14, 78, 83, 78, 78, 78,
+ 78, 78, 78, 84, 78, 104, 48, 105,
+ 105, 24, 25, 103, 51, 103, 103, 103,
+ 103, 103, 103, 55, 103, 48, 105, 105,
+ 24, 25, 103, 51, 103, 103, 103, 103,
+ 103, 103, 55, 103, 106, 103, 103, 103,
+ 107, 108, 103, 25, 103, 51, 103, 103,
+ 103, 103, 103, 106, 103, 47, 48, 109,
+ 110, 24, 25, 103, 51, 103, 103, 26,
+ 103, 103, 103, 55, 103, 106, 103, 103,
+ 103, 111, 108, 103, 25, 103, 51, 103,
+ 103, 103, 103, 103, 106, 103, 51, 103,
+ 103, 112, 103, 51, 103, 51, 103, 51,
+ 103, 103, 103, 103, 51, 103, 106, 103,
+ 113, 103, 111, 111, 103, 25, 103, 51,
+ 103, 103, 103, 103, 103, 106, 103, 106,
+ 103, 103, 103, 111, 111, 103, 25, 103,
+ 51, 103, 103, 103, 103, 103, 106, 103,
+ 114, 30, 115, 116, 24, 25, 103, 51,
+ 103, 30, 115, 116, 24, 25, 103, 51,
+ 103, 115, 115, 24, 25, 103, 51, 103,
+ 47, 48, 105, 105, 24, 25, 103, 51,
+ 103, 103, 103, 103, 103, 103, 55, 103,
+ 117, 118, 118, 24, 25, 103, 51, 103,
+ 107, 119, 103, 25, 103, 51, 103, 111,
+ 111, 103, 25, 103, 51, 103, 107, 103,
+ 111, 111, 103, 25, 103, 51, 103, 111,
+ 119, 103, 25, 103, 51, 103, 47, 48,
+ 109, 105, 24, 25, 103, 51, 103, 103,
+ 26, 103, 103, 103, 55, 103, 22, 23,
+ 23, 24, 25, 120, 120, 120, 120, 26,
+ 120, 22, 23, 23, 24, 25, 120, 122,
+ 123, 124, 125, 35, 36, 121, 126, 121,
+ 121, 37, 121, 121, 121, 127, 121, 128,
+ 123, 125, 125, 35, 36, 121, 126, 121,
+ 121, 121, 121, 121, 121, 127, 121, 123,
+ 125, 125, 35, 36, 121, 126, 121, 121,
+ 121, 121, 121, 121, 127, 121, 129, 121,
+ 121, 121, 130, 131, 121, 36, 121, 126,
+ 121, 121, 121, 121, 121, 129, 121, 122,
+ 123, 124, 52, 35, 36, 121, 126, 121,
+ 121, 37, 121, 121, 121, 127, 121, 129,
+ 121, 121, 121, 132, 131, 121, 36, 121,
+ 126, 121, 121, 121, 121, 121, 129, 121,
+ 126, 121, 121, 133, 121, 126, 121, 126,
+ 121, 126, 121, 121, 121, 121, 126, 121,
+ 129, 121, 134, 121, 132, 132, 121, 36,
+ 121, 126, 121, 121, 121, 121, 121, 129,
+ 121, 129, 121, 121, 121, 132, 132, 121,
+ 36, 121, 126, 121, 121, 121, 121, 121,
+ 129, 121, 135, 40, 136, 137, 35, 36,
+ 121, 126, 121, 40, 136, 137, 35, 36,
+ 121, 126, 121, 136, 136, 35, 36, 121,
+ 126, 121, 122, 123, 125, 125, 35, 36,
+ 121, 126, 121, 121, 121, 121, 121, 121,
+ 127, 121, 138, 139, 139, 35, 36, 121,
+ 126, 121, 130, 140, 121, 36, 121, 126,
+ 121, 132, 132, 121, 36, 121, 126, 121,
+ 130, 121, 132, 132, 121, 36, 121, 126,
+ 121, 132, 140, 121, 36, 121, 126, 121,
+ 45, 46, 47, 48, 109, 105, 24, 25,
+ 103, 51, 52, 52, 26, 103, 103, 45,
+ 55, 103, 59, 141, 61, 62, 4, 1,
+ 58, 63, 58, 58, 9, 58, 58, 58,
+ 64, 58, 45, 46, 47, 48, 142, 143,
+ 24, 144, 58, 145, 58, 52, 26, 58,
+ 58, 45, 55, 58, 22, 146, 146, 24,
+ 144, 58, 63, 58, 58, 26, 58, 145,
+ 58, 58, 147, 58, 145, 58, 145, 58,
+ 145, 58, 58, 58, 58, 145, 58, 45,
+ 58, 71, 22, 146, 146, 24, 144, 58,
+ 63, 58, 58, 58, 58, 58, 45, 58,
+ 149, 148, 150, 150, 148, 43, 148, 151,
+ 148, 150, 150, 148, 43, 148, 151, 148,
+ 151, 148, 148, 152, 148, 151, 148, 151,
+ 148, 151, 148, 148, 148, 148, 151, 148,
+ 45, 120, 120, 120, 120, 120, 120, 120,
+ 120, 120, 52, 120, 120, 120, 120, 45,
+ 120, 0
+};
+
+static const unsigned char _indic_syllable_machine_trans_targs[] = {
+ 39, 45, 50, 2, 51, 5, 6, 53,
+ 57, 58, 39, 67, 11, 73, 68, 14,
+ 15, 75, 80, 81, 84, 39, 89, 21,
+ 95, 90, 98, 39, 24, 25, 97, 103,
+ 39, 112, 30, 118, 113, 121, 33, 34,
+ 120, 126, 39, 137, 39, 40, 60, 85,
+ 87, 105, 106, 91, 107, 127, 128, 99,
+ 135, 140, 39, 41, 43, 8, 59, 46,
+ 54, 42, 1, 44, 48, 0, 47, 49,
+ 52, 3, 4, 55, 7, 56, 39, 61,
+ 63, 18, 83, 69, 76, 62, 9, 64,
+ 78, 71, 65, 17, 82, 66, 10, 70,
+ 72, 74, 12, 13, 77, 16, 79, 39,
+ 86, 26, 88, 101, 93, 19, 104, 20,
+ 92, 94, 96, 22, 23, 100, 27, 102,
+ 39, 39, 108, 110, 28, 35, 114, 122,
+ 109, 111, 124, 116, 29, 115, 117, 119,
+ 31, 32, 123, 36, 125, 129, 130, 134,
+ 131, 132, 37, 133, 39, 136, 38, 138,
+ 139
+};
+
+static const char _indic_syllable_machine_trans_actions[] = {
+ 1, 0, 2, 0, 2, 0, 0, 2,
+ 2, 2, 3, 2, 0, 2, 0, 0,
+ 0, 2, 2, 2, 2, 4, 2, 0,
+ 5, 0, 5, 6, 0, 0, 5, 2,
+ 7, 2, 0, 2, 0, 2, 0, 0,
+ 2, 2, 8, 0, 11, 2, 2, 5,
+ 0, 12, 12, 0, 2, 5, 2, 5,
+ 2, 0, 13, 2, 0, 0, 2, 0,
+ 2, 2, 0, 2, 2, 0, 0, 2,
+ 2, 0, 0, 0, 0, 2, 14, 2,
+ 0, 0, 2, 0, 2, 2, 0, 2,
+ 2, 2, 2, 0, 2, 2, 0, 0,
+ 2, 2, 0, 0, 0, 0, 2, 15,
+ 5, 0, 5, 2, 2, 0, 5, 0,
+ 0, 2, 5, 0, 0, 0, 0, 2,
+ 16, 17, 2, 0, 0, 0, 0, 2,
+ 2, 2, 2, 2, 0, 0, 2, 2,
+ 0, 0, 0, 0, 2, 0, 18, 18,
+ 0, 0, 0, 0, 19, 2, 0, 0,
+ 0
+};
+
+static const char _indic_syllable_machine_to_state_actions[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 9,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0
+};
+
+static const char _indic_syllable_machine_from_state_actions[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0
+};
+
+static const short _indic_syllable_machine_eof_trans[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 22, 22, 28, 22, 22,
+ 22, 22, 22, 22, 33, 33, 33, 33,
+ 33, 33, 33, 33, 33, 1, 43, 0,
+ 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 79, 79, 79, 79,
+ 79, 79, 79, 79, 79, 79, 79, 79,
+ 79, 79, 79, 79, 79, 79, 79, 79,
+ 79, 79, 79, 79, 79, 104, 104, 104,
+ 104, 104, 104, 104, 104, 104, 104, 104,
+ 104, 104, 104, 104, 104, 104, 104, 104,
+ 104, 121, 121, 122, 122, 122, 122, 122,
+ 122, 122, 122, 122, 122, 122, 122, 122,
+ 122, 122, 122, 122, 122, 122, 122, 104,
+ 59, 59, 59, 59, 59, 59, 59, 149,
+ 149, 149, 149, 149, 121
+};
+
+static const int indic_syllable_machine_start = 39;
+static const int indic_syllable_machine_first_final = 39;
+static const int indic_syllable_machine_error = -1;
+
+static const int indic_syllable_machine_en_main = 39;
+
+
+#line 36 "hb-ot-shape-complex-indic-machine.rl"
+
+
+
+#line 93 "hb-ot-shape-complex-indic-machine.rl"
+
+
+#define found_syllable(syllable_type) \
+ HB_STMT_START { \
+ if (0) fprintf (stderr, "syllable %d..%d %s\n", ts, te, #syllable_type); \
+ for (unsigned int i = ts; i < te; i++) \
+ info[i].syllable() = (syllable_serial << 4) | indic_##syllable_type; \
+ syllable_serial++; \
+ if (unlikely (syllable_serial == 16)) syllable_serial = 1; \
+ } HB_STMT_END
+
+static void
+find_syllables_indic (hb_buffer_t *buffer)
+{
+ unsigned int p, pe, eof, ts, te, act;
+ int cs;
+ hb_glyph_info_t *info = buffer->info;
+
+#line 411 "hb-ot-shape-complex-indic-machine.hh"
+ {
+ cs = indic_syllable_machine_start;
+ ts = 0;
+ te = 0;
+ act = 0;
+ }
+
+#line 113 "hb-ot-shape-complex-indic-machine.rl"
+
+
+ p = 0;
+ pe = eof = buffer->len;
+
+ unsigned int syllable_serial = 1;
+
+#line 427 "hb-ot-shape-complex-indic-machine.hh"
+ {
+ int _slen;
+ int _trans;
+ const unsigned char *_keys;
+ const unsigned char *_inds;
+ if ( p == pe )
+ goto _test_eof;
+_resume:
+ switch ( _indic_syllable_machine_from_state_actions[cs] ) {
+ case 10:
+#line 1 "NONE"
+ {ts = p;}
+ break;
+#line 441 "hb-ot-shape-complex-indic-machine.hh"
+ }
+
+ _keys = _indic_syllable_machine_trans_keys + (cs<<1);
+ _inds = _indic_syllable_machine_indicies + _indic_syllable_machine_index_offsets[cs];
+
+ _slen = _indic_syllable_machine_key_spans[cs];
+ _trans = _inds[ _slen > 0 && _keys[0] <=( info[p].indic_category()) &&
+ ( info[p].indic_category()) <= _keys[1] ?
+ ( info[p].indic_category()) - _keys[0] : _slen ];
+
+_eof_trans:
+ cs = _indic_syllable_machine_trans_targs[_trans];
+
+ if ( _indic_syllable_machine_trans_actions[_trans] == 0 )
+ goto _again;
+
+ switch ( _indic_syllable_machine_trans_actions[_trans] ) {
+ case 2:
+#line 1 "NONE"
+ {te = p+1;}
+ break;
+ case 11:
+#line 89 "hb-ot-shape-complex-indic-machine.rl"
+ {te = p+1;{ found_syllable (non_indic_cluster); }}
+ break;
+ case 13:
+#line 84 "hb-ot-shape-complex-indic-machine.rl"
+ {te = p;p--;{ found_syllable (consonant_syllable); }}
+ break;
+ case 14:
+#line 85 "hb-ot-shape-complex-indic-machine.rl"
+ {te = p;p--;{ found_syllable (vowel_syllable); }}
+ break;
+ case 17:
+#line 86 "hb-ot-shape-complex-indic-machine.rl"
+ {te = p;p--;{ found_syllable (standalone_cluster); }}
+ break;
+ case 19:
+#line 87 "hb-ot-shape-complex-indic-machine.rl"
+ {te = p;p--;{ found_syllable (symbol_cluster); }}
+ break;
+ case 15:
+#line 88 "hb-ot-shape-complex-indic-machine.rl"
+ {te = p;p--;{ found_syllable (broken_cluster); }}
+ break;
+ case 16:
+#line 89 "hb-ot-shape-complex-indic-machine.rl"
+ {te = p;p--;{ found_syllable (non_indic_cluster); }}
+ break;
+ case 1:
+#line 84 "hb-ot-shape-complex-indic-machine.rl"
+ {{p = ((te))-1;}{ found_syllable (consonant_syllable); }}
+ break;
+ case 3:
+#line 85 "hb-ot-shape-complex-indic-machine.rl"
+ {{p = ((te))-1;}{ found_syllable (vowel_syllable); }}
+ break;
+ case 7:
+#line 86 "hb-ot-shape-complex-indic-machine.rl"
+ {{p = ((te))-1;}{ found_syllable (standalone_cluster); }}
+ break;
+ case 8:
+#line 87 "hb-ot-shape-complex-indic-machine.rl"
+ {{p = ((te))-1;}{ found_syllable (symbol_cluster); }}
+ break;
+ case 4:
+#line 88 "hb-ot-shape-complex-indic-machine.rl"
+ {{p = ((te))-1;}{ found_syllable (broken_cluster); }}
+ break;
+ case 6:
+#line 1 "NONE"
+ { switch( act ) {
+ case 1:
+ {{p = ((te))-1;} found_syllable (consonant_syllable); }
+ break;
+ case 5:
+ {{p = ((te))-1;} found_syllable (broken_cluster); }
+ break;
+ case 6:
+ {{p = ((te))-1;} found_syllable (non_indic_cluster); }
+ break;
+ }
+ }
+ break;
+ case 18:
+#line 1 "NONE"
+ {te = p+1;}
+#line 84 "hb-ot-shape-complex-indic-machine.rl"
+ {act = 1;}
+ break;
+ case 5:
+#line 1 "NONE"
+ {te = p+1;}
+#line 88 "hb-ot-shape-complex-indic-machine.rl"
+ {act = 5;}
+ break;
+ case 12:
+#line 1 "NONE"
+ {te = p+1;}
+#line 89 "hb-ot-shape-complex-indic-machine.rl"
+ {act = 6;}
+ break;
+#line 544 "hb-ot-shape-complex-indic-machine.hh"
+ }
+
+_again:
+ switch ( _indic_syllable_machine_to_state_actions[cs] ) {
+ case 9:
+#line 1 "NONE"
+ {ts = 0;}
+ break;
+#line 553 "hb-ot-shape-complex-indic-machine.hh"
+ }
+
+ if ( ++p != pe )
+ goto _resume;
+ _test_eof: {}
+ if ( p == eof )
+ {
+ if ( _indic_syllable_machine_eof_trans[cs] > 0 ) {
+ _trans = _indic_syllable_machine_eof_trans[cs] - 1;
+ goto _eof_trans;
+ }
+ }
+
+ }
+
+#line 121 "hb-ot-shape-complex-indic-machine.rl"
+
+}
+
+#undef found_syllable
+
+#endif /* HB_OT_SHAPE_COMPLEX_INDIC_MACHINE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-indic-table.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-indic-table.cc
new file mode 100644
index 0000000000..a150fd2486
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-indic-table.cc
@@ -0,0 +1,501 @@
+/* == Start of generated table == */
+/*
+ * The following table is generated by running:
+ *
+ * ./gen-indic-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt Blocks.txt
+ *
+ * on files with these headers:
+ *
+ * # IndicSyllabicCategory-13.0.0.txt
+ * # Date: 2019-07-22, 19:55:00 GMT [KW, RP]
+ * # IndicPositionalCategory-13.0.0.txt
+ * # Date: 2019-07-23, 00:01:00 GMT [KW, RP]
+ * # Blocks-13.0.0.txt
+ * # Date: 2019-07-10, 19:06:00 GMT [KW]
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_OT_SHAPE
+
+#include "hb-ot-shape-complex-indic.hh"
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-macros"
+
+#define ISC_A INDIC_SYLLABIC_CATEGORY_AVAGRAHA /* 17 chars; Avagraha */
+#define ISC_Bi INDIC_SYLLABIC_CATEGORY_BINDU /* 91 chars; Bindu */
+#define ISC_BJN INDIC_SYLLABIC_CATEGORY_BRAHMI_JOINING_NUMBER /* 20 chars; Brahmi_Joining_Number */
+#define ISC_Ca INDIC_SYLLABIC_CATEGORY_CANTILLATION_MARK /* 59 chars; Cantillation_Mark */
+#define ISC_C INDIC_SYLLABIC_CATEGORY_CONSONANT /* 2195 chars; Consonant */
+#define ISC_CD INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD /* 12 chars; Consonant_Dead */
+#define ISC_CF INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL /* 67 chars; Consonant_Final */
+#define ISC_CHL INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER /* 5 chars; Consonant_Head_Letter */
+#define ISC_CIP INDIC_SYLLABIC_CATEGORY_CONSONANT_INITIAL_POSTFIXED /* 1 chars; Consonant_Initial_Postfixed */
+#define ISC_CK INDIC_SYLLABIC_CATEGORY_CONSONANT_KILLER /* 2 chars; Consonant_Killer */
+#define ISC_CM INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL /* 31 chars; Consonant_Medial */
+#define ISC_CP INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER /* 22 chars; Consonant_Placeholder */
+#define ISC_CPR INDIC_SYLLABIC_CATEGORY_CONSONANT_PRECEDING_REPHA /* 3 chars; Consonant_Preceding_Repha */
+#define ISC_CPrf INDIC_SYLLABIC_CATEGORY_CONSONANT_PREFIXED /* 10 chars; Consonant_Prefixed */
+#define ISC_CS INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED /* 94 chars; Consonant_Subjoined */
+#define ISC_CSR INDIC_SYLLABIC_CATEGORY_CONSONANT_SUCCEEDING_REPHA /* 4 chars; Consonant_Succeeding_Repha */
+#define ISC_CWS INDIC_SYLLABIC_CATEGORY_CONSONANT_WITH_STACKER /* 8 chars; Consonant_With_Stacker */
+#define ISC_GM INDIC_SYLLABIC_CATEGORY_GEMINATION_MARK /* 3 chars; Gemination_Mark */
+#define ISC_IS INDIC_SYLLABIC_CATEGORY_INVISIBLE_STACKER /* 12 chars; Invisible_Stacker */
+#define ISC_ZWJ INDIC_SYLLABIC_CATEGORY_JOINER /* 1 chars; Joiner */
+#define ISC_ML INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER /* 1 chars; Modifying_Letter */
+#define ISC_ZWNJ INDIC_SYLLABIC_CATEGORY_NON_JOINER /* 1 chars; Non_Joiner */
+#define ISC_N INDIC_SYLLABIC_CATEGORY_NUKTA /* 31 chars; Nukta */
+#define ISC_Nd INDIC_SYLLABIC_CATEGORY_NUMBER /* 491 chars; Number */
+#define ISC_NJ INDIC_SYLLABIC_CATEGORY_NUMBER_JOINER /* 1 chars; Number_Joiner */
+#define ISC_x INDIC_SYLLABIC_CATEGORY_OTHER /* 1 chars; Other */
+#define ISC_PK INDIC_SYLLABIC_CATEGORY_PURE_KILLER /* 23 chars; Pure_Killer */
+#define ISC_RS INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER /* 2 chars; Register_Shifter */
+#define ISC_SM INDIC_SYLLABIC_CATEGORY_SYLLABLE_MODIFIER /* 25 chars; Syllable_Modifier */
+#define ISC_TL INDIC_SYLLABIC_CATEGORY_TONE_LETTER /* 7 chars; Tone_Letter */
+#define ISC_TM INDIC_SYLLABIC_CATEGORY_TONE_MARK /* 42 chars; Tone_Mark */
+#define ISC_V INDIC_SYLLABIC_CATEGORY_VIRAMA /* 27 chars; Virama */
+#define ISC_Vs INDIC_SYLLABIC_CATEGORY_VISARGA /* 35 chars; Visarga */
+#define ISC_Vo INDIC_SYLLABIC_CATEGORY_VOWEL /* 30 chars; Vowel */
+#define ISC_M INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT /* 683 chars; Vowel_Dependent */
+#define ISC_VI INDIC_SYLLABIC_CATEGORY_VOWEL_INDEPENDENT /* 484 chars; Vowel_Independent */
+
+#define IMC_B INDIC_MATRA_CATEGORY_BOTTOM /* 351 chars; Bottom */
+#define IMC_BL INDIC_MATRA_CATEGORY_BOTTOM_AND_LEFT /* 1 chars; Bottom_And_Left */
+#define IMC_BR INDIC_MATRA_CATEGORY_BOTTOM_AND_RIGHT /* 4 chars; Bottom_And_Right */
+#define IMC_L INDIC_MATRA_CATEGORY_LEFT /* 64 chars; Left */
+#define IMC_LR INDIC_MATRA_CATEGORY_LEFT_AND_RIGHT /* 22 chars; Left_And_Right */
+#define IMC_x INDIC_MATRA_CATEGORY_NOT_APPLICABLE /* 1 chars; Not_Applicable */
+#define IMC_O INDIC_MATRA_CATEGORY_OVERSTRUCK /* 10 chars; Overstruck */
+#define IMC_R INDIC_MATRA_CATEGORY_RIGHT /* 288 chars; Right */
+#define IMC_T INDIC_MATRA_CATEGORY_TOP /* 415 chars; Top */
+#define IMC_TB INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM /* 10 chars; Top_And_Bottom */
+#define IMC_TBL INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_LEFT /* 2 chars; Top_And_Bottom_And_Left */
+#define IMC_TBR INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_RIGHT /* 1 chars; Top_And_Bottom_And_Right */
+#define IMC_TL INDIC_MATRA_CATEGORY_TOP_AND_LEFT /* 6 chars; Top_And_Left */
+#define IMC_TLR INDIC_MATRA_CATEGORY_TOP_AND_LEFT_AND_RIGHT /* 4 chars; Top_And_Left_And_Right */
+#define IMC_TR INDIC_MATRA_CATEGORY_TOP_AND_RIGHT /* 13 chars; Top_And_Right */
+#define IMC_VOL INDIC_MATRA_CATEGORY_VISUAL_ORDER_LEFT /* 19 chars; Visual_Order_Left */
+
+#pragma GCC diagnostic pop
+
+#define _(S,M) INDIC_COMBINE_CATEGORIES (ISC_##S, IMC_##M)
+
+
+static const INDIC_TABLE_ELEMENT_TYPE indic_table[] = {
+
+
+#define indic_offset_0x0028u 0
+
+
+ /* Basic Latin */
+
+ /* 0028 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(CP,x), _(x,x), _(x,x),
+ /* 0030 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
+ /* 0038 */ _(Nd,x), _(Nd,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
+
+#define indic_offset_0x00b0u 24
+
+
+ /* Latin-1 Supplement */
+
+ /* 00B0 */ _(x,x), _(x,x), _(SM,x), _(SM,x), _(x,x), _(x,x), _(x,x), _(x,x),
+ /* 00B8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
+ /* 00C0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
+ /* 00C8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
+ /* 00D0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(CP,x),
+
+#define indic_offset_0x0900u 64
+
+
+ /* Devanagari */
+
+ /* 0900 */ _(Bi,T), _(Bi,T), _(Bi,T), _(Vs,R), _(VI,x), _(VI,x), _(VI,x), _(VI,x),
+ /* 0908 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x),
+ /* 0910 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x),
+ /* 0918 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0920 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0928 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0930 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0938 */ _(C,x), _(C,x), _(M,T), _(M,R), _(N,B), _(A,x), _(M,R), _(M,L),
+ /* 0940 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(M,T), _(M,T), _(M,T),
+ /* 0948 */ _(M,T), _(M,R), _(M,R), _(M,R), _(M,R), _(V,B), _(M,L), _(M,R),
+ /* 0950 */ _(x,x), _(Ca,T), _(Ca,B), _(x,T), _(x,T), _(M,T), _(M,B), _(M,B),
+ /* 0958 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0960 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
+ /* 0968 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
+ /* 0970 */ _(x,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x),
+ /* 0978 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+
+ /* Bengali */
+
+ /* 0980 */ _(CP,x), _(Bi,T), _(Bi,R), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x),
+ /* 0988 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(VI,x),
+ /* 0990 */ _(VI,x), _(x,x), _(x,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x),
+ /* 0998 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 09A0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 09A8 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 09B0 */ _(C,x), _(x,x), _(C,x), _(x,x), _(x,x), _(x,x), _(C,x), _(C,x),
+ /* 09B8 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(A,x), _(M,R), _(M,L),
+ /* 09C0 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(x,x), _(x,x), _(M,L),
+ /* 09C8 */ _(M,L), _(x,x), _(x,x), _(M,LR), _(M,LR), _(V,B), _(CD,x), _(x,x),
+ /* 09D0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R),
+ /* 09D8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x),
+ /* 09E0 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
+ /* 09E8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
+ /* 09F0 */ _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
+ /* 09F8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(Bi,x), _(x,x), _(SM,T), _(x,x),
+
+ /* Gurmukhi */
+
+ /* 0A00 */ _(x,x), _(Bi,T), _(Bi,T), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x),
+ /* 0A08 */ _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(x,x), _(x,x), _(VI,x),
+ /* 0A10 */ _(VI,x), _(x,x), _(x,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x),
+ /* 0A18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0A20 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0A28 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0A30 */ _(C,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(x,x),
+ /* 0A38 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(x,x), _(M,R), _(M,L),
+ /* 0A40 */ _(M,R), _(M,B), _(M,B), _(x,x), _(x,x), _(x,x), _(x,x), _(M,T),
+ /* 0A48 */ _(M,T), _(x,x), _(x,x), _(M,T), _(M,T), _(V,B), _(x,x), _(x,x),
+ /* 0A50 */ _(x,x), _(Ca,B), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
+ /* 0A58 */ _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(x,x), _(C,x), _(x,x),
+ /* 0A60 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
+ /* 0A68 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
+ /* 0A70 */ _(Bi,T), _(GM,T), _(CP,x), _(CP,x), _(x,x), _(CM,B), _(x,x), _(x,x),
+ /* 0A78 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
+
+ /* Gujarati */
+
+ /* 0A80 */ _(x,x), _(Bi,T), _(Bi,T), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x),
+ /* 0A88 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(VI,x),
+ /* 0A90 */ _(VI,x), _(VI,x), _(x,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x),
+ /* 0A98 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0AA0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0AA8 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0AB0 */ _(C,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(C,x),
+ /* 0AB8 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(A,x), _(M,R), _(M,L),
+ /* 0AC0 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(M,T), _(x,x), _(M,T),
+ /* 0AC8 */ _(M,T), _(M,TR), _(x,x), _(M,R), _(M,R), _(V,B), _(x,x), _(x,x),
+ /* 0AD0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
+ /* 0AD8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
+ /* 0AE0 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
+ /* 0AE8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
+ /* 0AF0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
+ /* 0AF8 */ _(x,x), _(C,x), _(Ca,T), _(Ca,T), _(Ca,T), _(N,T), _(N,T), _(N,T),
+
+ /* Oriya */
+
+ /* 0B00 */ _(x,x), _(Bi,T), _(Bi,R), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x),
+ /* 0B08 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(VI,x),
+ /* 0B10 */ _(VI,x), _(x,x), _(x,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x),
+ /* 0B18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0B20 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0B28 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0B30 */ _(C,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(C,x),
+ /* 0B38 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(A,x), _(M,R), _(M,T),
+ /* 0B40 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(x,x), _(x,x), _(M,L),
+ /* 0B48 */ _(M,TL), _(x,x), _(x,x), _(M,LR),_(M,TLR), _(V,B), _(x,x), _(x,x),
+ /* 0B50 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,T), _(M,T), _(M,TR),
+ /* 0B58 */ _(x,x), _(x,x), _(x,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x),
+ /* 0B60 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
+ /* 0B68 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
+ /* 0B70 */ _(x,x), _(C,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
+ /* 0B78 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
+
+ /* Tamil */
+
+ /* 0B80 */ _(x,x), _(x,x), _(Bi,T), _(ML,x), _(x,x), _(VI,x), _(VI,x), _(VI,x),
+ /* 0B88 */ _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(x,x), _(VI,x), _(VI,x),
+ /* 0B90 */ _(VI,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(x,x), _(x,x),
+ /* 0B98 */ _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), _(x,x), _(C,x), _(C,x),
+ /* 0BA0 */ _(x,x), _(x,x), _(x,x), _(C,x), _(C,x), _(x,x), _(x,x), _(x,x),
+ /* 0BA8 */ _(C,x), _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(C,x), _(C,x),
+ /* 0BB0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0BB8 */ _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R), _(M,R),
+ /* 0BC0 */ _(M,T), _(M,R), _(M,R), _(x,x), _(x,x), _(x,x), _(M,L), _(M,L),
+ /* 0BC8 */ _(M,L), _(x,x), _(M,LR), _(M,LR), _(M,LR), _(V,T), _(x,x), _(x,x),
+ /* 0BD0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R),
+ /* 0BD8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
+ /* 0BE0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
+ /* 0BE8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
+ /* 0BF0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
+ /* 0BF8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
+
+ /* Telugu */
+
+ /* 0C00 */ _(Bi,T), _(Bi,R), _(Bi,R), _(Vs,R), _(Bi,T), _(VI,x), _(VI,x), _(VI,x),
+ /* 0C08 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(VI,x), _(VI,x),
+ /* 0C10 */ _(VI,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x),
+ /* 0C18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0C20 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0C28 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0C30 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0C38 */ _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(A,x), _(M,T), _(M,T),
+ /* 0C40 */ _(M,T), _(M,R), _(M,R), _(M,R), _(M,R), _(x,x), _(M,T), _(M,T),
+ /* 0C48 */ _(M,TB), _(x,x), _(M,T), _(M,T), _(M,T), _(V,T), _(x,x), _(x,x),
+ /* 0C50 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,T), _(M,B), _(x,x),
+ /* 0C58 */ _(C,x), _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
+ /* 0C60 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
+ /* 0C68 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
+ /* 0C70 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
+ /* 0C78 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
+
+ /* Kannada */
+
+ /* 0C80 */ _(Bi,x), _(Bi,T), _(Bi,R), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x),
+ /* 0C88 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(VI,x), _(VI,x),
+ /* 0C90 */ _(VI,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x),
+ /* 0C98 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0CA0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0CA8 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0CB0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(C,x),
+ /* 0CB8 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(A,x), _(M,R), _(M,T),
+ /* 0CC0 */ _(M,TR), _(M,R), _(M,R), _(M,R), _(M,R), _(x,x), _(M,T), _(M,TR),
+ /* 0CC8 */ _(M,TR), _(x,x), _(M,TR), _(M,TR), _(M,T), _(V,T), _(x,x), _(x,x),
+ /* 0CD0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R), _(M,R), _(x,x),
+ /* 0CD8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(C,x), _(x,x),
+ /* 0CE0 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
+ /* 0CE8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
+ /* 0CF0 */ _(x,x),_(CWS,x),_(CWS,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
+ /* 0CF8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
+
+ /* Malayalam */
+
+ /* 0D00 */ _(Bi,T), _(Bi,T), _(Bi,R), _(Vs,R), _(Bi,x), _(VI,x), _(VI,x), _(VI,x),
+ /* 0D08 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(VI,x), _(VI,x),
+ /* 0D10 */ _(VI,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x),
+ /* 0D18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0D20 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0D28 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0D30 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0D38 */ _(C,x), _(C,x), _(C,x), _(PK,T), _(PK,T), _(A,x), _(M,R), _(M,R),
+ /* 0D40 */ _(M,R), _(M,R), _(M,R), _(M,B), _(M,B), _(x,x), _(M,L), _(M,L),
+ /* 0D48 */ _(M,L), _(x,x), _(M,LR), _(M,LR), _(M,LR), _(V,T),_(CPR,T), _(x,x),
+ /* 0D50 */ _(x,x), _(x,x), _(x,x), _(x,x), _(CD,x), _(CD,x), _(CD,x), _(M,R),
+ /* 0D58 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(VI,x),
+ /* 0D60 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
+ /* 0D68 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
+ /* 0D70 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
+ /* 0D78 */ _(x,x), _(x,x), _(CD,x), _(CD,x), _(CD,x), _(CD,x), _(CD,x), _(CD,x),
+
+ /* Sinhala */
+
+ /* 0D80 */ _(x,x), _(Bi,T), _(Bi,R), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x),
+ /* 0D88 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x),
+ /* 0D90 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x),
+ /* 0D98 */ _(x,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0DA0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0DA8 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0DB0 */ _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 0DB8 */ _(C,x), _(C,x), _(C,x), _(C,x), _(x,x), _(C,x), _(x,x), _(x,x),
+ /* 0DC0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(x,x),
+ /* 0DC8 */ _(x,x), _(x,x), _(V,T), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R),
+ /* 0DD0 */ _(M,R), _(M,R), _(M,T), _(M,T), _(M,B), _(x,x), _(M,B), _(x,x),
+ /* 0DD8 */ _(M,R), _(M,L), _(M,TL), _(M,L), _(M,LR),_(M,TLR), _(M,LR), _(M,R),
+ /* 0DE0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
+ /* 0DE8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
+ /* 0DF0 */ _(x,x), _(x,x), _(M,R), _(M,R), _(x,x), _(x,x), _(x,x), _(x,x),
+
+#define indic_offset_0x1000u 1336
+
+
+ /* Myanmar */
+
+ /* 1000 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 1008 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 1010 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 1018 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 1020 */ _(C,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x),
+ /* 1028 */ _(VI,x), _(VI,x), _(VI,x), _(M,R), _(M,R), _(M,T), _(M,T), _(M,B),
+ /* 1030 */ _(M,B), _(M,L), _(M,T), _(M,T), _(M,T), _(M,T), _(Bi,T), _(TM,B),
+ /* 1038 */ _(Vs,R), _(IS,x), _(PK,T), _(CM,R),_(CM,TBL), _(CM,B), _(CM,B), _(C,x),
+ /* 1040 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
+ /* 1048 */ _(Nd,x), _(Nd,x), _(x,x), _(CP,x), _(x,x), _(x,x), _(CP,x), _(x,x),
+ /* 1050 */ _(C,x), _(C,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(M,R), _(M,R),
+ /* 1058 */ _(M,B), _(M,B), _(C,x), _(C,x), _(C,x), _(C,x), _(CM,B), _(CM,B),
+ /* 1060 */ _(CM,B), _(C,x), _(M,R), _(TM,R), _(TM,R), _(C,x), _(C,x), _(M,R),
+ /* 1068 */ _(M,R), _(TM,R), _(TM,R), _(TM,R), _(TM,R), _(TM,R), _(C,x), _(C,x),
+ /* 1070 */ _(C,x), _(M,T), _(M,T), _(M,T), _(M,T), _(C,x), _(C,x), _(C,x),
+ /* 1078 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 1080 */ _(C,x), _(C,x), _(CM,B), _(M,R), _(M,L), _(M,T), _(M,T), _(TM,R),
+ /* 1088 */ _(TM,R), _(TM,R), _(TM,R), _(TM,R), _(TM,R), _(TM,B), _(C,x), _(TM,R),
+ /* 1090 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
+ /* 1098 */ _(Nd,x), _(Nd,x), _(TM,R), _(TM,R), _(M,R), _(M,T), _(x,x), _(x,x),
+
+#define indic_offset_0x1780u 1496
+
+
+ /* Khmer */
+
+ /* 1780 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 1788 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 1790 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 1798 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* 17A0 */ _(C,x), _(C,x), _(C,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x),
+ /* 17A8 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x),
+ /* 17B0 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(M,R), _(M,T),
+ /* 17B8 */ _(M,T), _(M,T), _(M,T), _(M,B), _(M,B), _(M,B), _(M,TL),_(M,TLR),
+ /* 17C0 */ _(M,LR), _(M,L), _(M,L), _(M,L), _(M,LR), _(M,LR), _(Bi,T), _(Vs,R),
+ /* 17C8 */ _(M,R), _(RS,T), _(RS,T), _(SM,T),_(CSR,T), _(CK,T), _(SM,T), _(SM,T),
+ /* 17D0 */ _(SM,T), _(PK,T), _(IS,x), _(SM,T), _(x,x), _(x,x), _(x,x), _(x,x),
+ /* 17D8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(A,x), _(SM,T), _(x,x), _(x,x),
+ /* 17E0 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
+ /* 17E8 */ _(Nd,x), _(Nd,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
+
+#define indic_offset_0x1cd0u 1608
+
+
+ /* Vedic Extensions */
+
+ /* 1CD0 */ _(Ca,T), _(Ca,T), _(Ca,T), _(x,x), _(Ca,O), _(Ca,B), _(Ca,B), _(Ca,B),
+ /* 1CD8 */ _(Ca,B), _(Ca,B), _(Ca,T), _(Ca,T), _(Ca,B), _(Ca,B), _(Ca,B), _(Ca,B),
+ /* 1CE0 */ _(Ca,T), _(Ca,R), _(x,O), _(x,O), _(x,O), _(x,O), _(x,O), _(x,O),
+ /* 1CE8 */ _(x,O), _(x,x), _(x,x), _(x,x), _(x,x), _(x,B), _(x,x), _(x,x),
+ /* 1CF0 */ _(x,x), _(x,x), _(CD,x), _(CD,x), _(Ca,T),_(CWS,x),_(CWS,x), _(Ca,R),
+ /* 1CF8 */ _(Ca,x), _(Ca,x), _(CP,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
+
+#define indic_offset_0x2008u 1656
+
+
+ /* General Punctuation */
+
+ /* 2008 */ _(x,x), _(x,x), _(x,x), _(x,x),_(ZWNJ,x),_(ZWJ,x), _(x,x), _(x,x),
+ /* 2010 */ _(CP,x), _(CP,x), _(CP,x), _(CP,x), _(CP,x), _(x,x), _(x,x), _(x,x),
+
+#define indic_offset_0x2070u 1672
+
+
+ /* Superscripts and Subscripts */
+
+ /* 2070 */ _(x,x), _(x,x), _(x,x), _(x,x), _(SM,x), _(x,x), _(x,x), _(x,x),
+ /* 2078 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
+ /* 2080 */ _(x,x), _(x,x), _(SM,x), _(SM,x), _(SM,x), _(x,x), _(x,x), _(x,x),
+
+#define indic_offset_0xa8e0u 1696
+
+
+ /* Devanagari Extended */
+
+ /* A8E0 */ _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T),
+ /* A8E8 */ _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T),
+ /* A8F0 */ _(Ca,T), _(Ca,T), _(Bi,x), _(Bi,x), _(x,x), _(x,x), _(x,x), _(x,x),
+ /* A8F8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(VI,x), _(M,T),
+
+#define indic_offset_0xa9e0u 1728
+
+
+ /* Myanmar Extended-B */
+
+ /* A9E0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(M,T), _(x,x), _(C,x),
+ /* A9E8 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* A9F0 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
+ /* A9F8 */ _(Nd,x), _(Nd,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(x,x),
+
+#define indic_offset_0xaa60u 1760
+
+
+ /* Myanmar Extended-A */
+
+ /* AA60 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* AA68 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
+ /* AA70 */ _(x,x), _(C,x), _(C,x), _(C,x), _(CP,x), _(CP,x), _(CP,x), _(x,x),
+ /* AA78 */ _(x,x), _(x,x), _(C,x), _(TM,R), _(TM,T), _(TM,R), _(C,x), _(C,x),
+
+}; /* Table items: 1792; occupancy: 70% */
+
+INDIC_TABLE_ELEMENT_TYPE
+hb_indic_get_categories (hb_codepoint_t u)
+{
+ switch (u >> 12)
+ {
+ case 0x0u:
+ if (unlikely (u == 0x00A0u)) return _(CP,x);
+ if (hb_in_range<hb_codepoint_t> (u, 0x0028u, 0x003Fu)) return indic_table[u - 0x0028u + indic_offset_0x0028u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x00B0u, 0x00D7u)) return indic_table[u - 0x00B0u + indic_offset_0x00b0u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x0900u, 0x0DF7u)) return indic_table[u - 0x0900u + indic_offset_0x0900u];
+ break;
+
+ case 0x1u:
+ if (hb_in_range<hb_codepoint_t> (u, 0x1000u, 0x109Fu)) return indic_table[u - 0x1000u + indic_offset_0x1000u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x1780u, 0x17EFu)) return indic_table[u - 0x1780u + indic_offset_0x1780u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x1CD0u, 0x1CFFu)) return indic_table[u - 0x1CD0u + indic_offset_0x1cd0u];
+ break;
+
+ case 0x2u:
+ if (unlikely (u == 0x25CCu)) return _(CP,x);
+ if (hb_in_range<hb_codepoint_t> (u, 0x2008u, 0x2017u)) return indic_table[u - 0x2008u + indic_offset_0x2008u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x2070u, 0x2087u)) return indic_table[u - 0x2070u + indic_offset_0x2070u];
+ break;
+
+ case 0xAu:
+ if (hb_in_range<hb_codepoint_t> (u, 0xA8E0u, 0xA8FFu)) return indic_table[u - 0xA8E0u + indic_offset_0xa8e0u];
+ if (hb_in_range<hb_codepoint_t> (u, 0xA9E0u, 0xA9FFu)) return indic_table[u - 0xA9E0u + indic_offset_0xa9e0u];
+ if (hb_in_range<hb_codepoint_t> (u, 0xAA60u, 0xAA7Fu)) return indic_table[u - 0xAA60u + indic_offset_0xaa60u];
+ break;
+
+ default:
+ break;
+ }
+ return _(x,x);
+}
+
+#undef _
+
+#undef ISC_A
+#undef ISC_Bi
+#undef ISC_BJN
+#undef ISC_Ca
+#undef ISC_C
+#undef ISC_CD
+#undef ISC_CF
+#undef ISC_CHL
+#undef ISC_CIP
+#undef ISC_CK
+#undef ISC_CM
+#undef ISC_CP
+#undef ISC_CPR
+#undef ISC_CPrf
+#undef ISC_CS
+#undef ISC_CSR
+#undef ISC_CWS
+#undef ISC_GM
+#undef ISC_IS
+#undef ISC_ZWJ
+#undef ISC_ML
+#undef ISC_ZWNJ
+#undef ISC_N
+#undef ISC_Nd
+#undef ISC_NJ
+#undef ISC_x
+#undef ISC_PK
+#undef ISC_RS
+#undef ISC_SM
+#undef ISC_TL
+#undef ISC_TM
+#undef ISC_V
+#undef ISC_Vs
+#undef ISC_Vo
+#undef ISC_M
+#undef ISC_VI
+
+#undef IMC_B
+#undef IMC_BL
+#undef IMC_BR
+#undef IMC_L
+#undef IMC_LR
+#undef IMC_x
+#undef IMC_O
+#undef IMC_R
+#undef IMC_T
+#undef IMC_TB
+#undef IMC_TBL
+#undef IMC_TBR
+#undef IMC_TL
+#undef IMC_TLR
+#undef IMC_TR
+#undef IMC_VOL
+
+#endif
+
+/* == End of generated table == */
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-indic.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-indic.cc
new file mode 100644
index 0000000000..34972f81e2
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-indic.cc
@@ -0,0 +1,1615 @@
+/*
+ * Copyright © 2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_OT_SHAPE
+
+#include "hb-ot-shape-complex-indic.hh"
+#include "hb-ot-shape-complex-vowel-constraints.hh"
+#include "hb-ot-layout.hh"
+
+
+/*
+ * Indic shaper.
+ */
+
+
+/*
+ * Indic configurations. Note that we do not want to keep every single script-specific
+ * behavior in these tables necessarily. This should mainly be used for per-script
+ * properties that are cheaper keeping here, than in the code. Ie. if, say, one and
+ * only one script has an exception, that one script can be if'ed directly in the code,
+ * instead of adding a new flag in these structs.
+ */
+
+enum base_position_t {
+ BASE_POS_LAST_SINHALA,
+ BASE_POS_LAST
+};
+enum reph_position_t {
+ REPH_POS_AFTER_MAIN = POS_AFTER_MAIN,
+ REPH_POS_BEFORE_SUB = POS_BEFORE_SUB,
+ REPH_POS_AFTER_SUB = POS_AFTER_SUB,
+ REPH_POS_BEFORE_POST = POS_BEFORE_POST,
+ REPH_POS_AFTER_POST = POS_AFTER_POST
+};
+enum reph_mode_t {
+ REPH_MODE_IMPLICIT, /* Reph formed out of initial Ra,H sequence. */
+ REPH_MODE_EXPLICIT, /* Reph formed out of initial Ra,H,ZWJ sequence. */
+ REPH_MODE_LOG_REPHA /* Encoded Repha character, needs reordering. */
+};
+enum blwf_mode_t {
+ BLWF_MODE_PRE_AND_POST, /* Below-forms feature applied to pre-base and post-base. */
+ BLWF_MODE_POST_ONLY /* Below-forms feature applied to post-base only. */
+};
+struct indic_config_t
+{
+ hb_script_t script;
+ bool has_old_spec;
+ hb_codepoint_t virama;
+ base_position_t base_pos;
+ reph_position_t reph_pos;
+ reph_mode_t reph_mode;
+ blwf_mode_t blwf_mode;
+};
+
+static const indic_config_t indic_configs[] =
+{
+ /* Default. Should be first. */
+ {HB_SCRIPT_INVALID, false, 0,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
+ {HB_SCRIPT_DEVANAGARI,true, 0x094Du,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
+ {HB_SCRIPT_BENGALI, true, 0x09CDu,BASE_POS_LAST, REPH_POS_AFTER_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
+ {HB_SCRIPT_GURMUKHI, true, 0x0A4Du,BASE_POS_LAST, REPH_POS_BEFORE_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
+ {HB_SCRIPT_GUJARATI, true, 0x0ACDu,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
+ {HB_SCRIPT_ORIYA, true, 0x0B4Du,BASE_POS_LAST, REPH_POS_AFTER_MAIN, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
+ {HB_SCRIPT_TAMIL, true, 0x0BCDu,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
+ {HB_SCRIPT_TELUGU, true, 0x0C4Du,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_EXPLICIT, BLWF_MODE_POST_ONLY},
+ {HB_SCRIPT_KANNADA, true, 0x0CCDu,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_POST_ONLY},
+ {HB_SCRIPT_MALAYALAM, true, 0x0D4Du,BASE_POS_LAST, REPH_POS_AFTER_MAIN, REPH_MODE_LOG_REPHA,BLWF_MODE_PRE_AND_POST},
+ {HB_SCRIPT_SINHALA, false,0x0DCAu,BASE_POS_LAST_SINHALA,
+ REPH_POS_AFTER_POST, REPH_MODE_EXPLICIT, BLWF_MODE_PRE_AND_POST},
+};
+
+
+
+/*
+ * Indic shaper.
+ */
+
+static const hb_ot_map_feature_t
+indic_features[] =
+{
+ /*
+ * Basic features.
+ * These features are applied in order, one at a time, after initial_reordering.
+ */
+ {HB_TAG('n','u','k','t'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('a','k','h','n'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('r','p','h','f'), F_MANUAL_JOINERS},
+ {HB_TAG('r','k','r','f'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('p','r','e','f'), F_MANUAL_JOINERS},
+ {HB_TAG('b','l','w','f'), F_MANUAL_JOINERS},
+ {HB_TAG('a','b','v','f'), F_MANUAL_JOINERS},
+ {HB_TAG('h','a','l','f'), F_MANUAL_JOINERS},
+ {HB_TAG('p','s','t','f'), F_MANUAL_JOINERS},
+ {HB_TAG('v','a','t','u'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('c','j','c','t'), F_GLOBAL_MANUAL_JOINERS},
+ /*
+ * Other features.
+ * These features are applied all at once, after final_reordering
+ * but before clearing syllables.
+ * Default Bengali font in Windows for example has intermixed
+ * lookups for init,pres,abvs,blws features.
+ */
+ {HB_TAG('i','n','i','t'), F_MANUAL_JOINERS},
+ {HB_TAG('p','r','e','s'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('a','b','v','s'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('b','l','w','s'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('p','s','t','s'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('h','a','l','n'), F_GLOBAL_MANUAL_JOINERS},
+};
+
+/*
+ * Must be in the same order as the indic_features array.
+ */
+enum {
+ _INDIC_NUKT,
+ _INDIC_AKHN,
+ INDIC_RPHF,
+ _INDIC_RKRF,
+ INDIC_PREF,
+ INDIC_BLWF,
+ INDIC_ABVF,
+ INDIC_HALF,
+ INDIC_PSTF,
+ _INDIC_VATU,
+ _INDIC_CJCT,
+
+ INDIC_INIT,
+ _INDIC_PRES,
+ _INDIC_ABVS,
+ _INDIC_BLWS,
+ _INDIC_PSTS,
+ _INDIC_HALN,
+
+ INDIC_NUM_FEATURES,
+ INDIC_BASIC_FEATURES = INDIC_INIT, /* Don't forget to update this! */
+};
+
+static void
+setup_syllables_indic (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer);
+static void
+initial_reordering_indic (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer);
+static void
+final_reordering_indic (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer);
+
+static void
+collect_features_indic (hb_ot_shape_planner_t *plan)
+{
+ hb_ot_map_builder_t *map = &plan->map;
+
+ /* Do this before any lookups have been applied. */
+ map->add_gsub_pause (setup_syllables_indic);
+
+ map->enable_feature (HB_TAG('l','o','c','l'));
+ /* The Indic specs do not require ccmp, but we apply it here since if
+ * there is a use of it, it's typically at the beginning. */
+ map->enable_feature (HB_TAG('c','c','m','p'));
+
+
+ unsigned int i = 0;
+ map->add_gsub_pause (initial_reordering_indic);
+
+ for (; i < INDIC_BASIC_FEATURES; i++) {
+ map->add_feature (indic_features[i]);
+ map->add_gsub_pause (nullptr);
+ }
+
+ map->add_gsub_pause (final_reordering_indic);
+
+ for (; i < INDIC_NUM_FEATURES; i++)
+ map->add_feature (indic_features[i]);
+
+ map->enable_feature (HB_TAG('c','a','l','t'));
+ map->enable_feature (HB_TAG('c','l','i','g'));
+
+ map->add_gsub_pause (_hb_clear_syllables);
+}
+
+static void
+override_features_indic (hb_ot_shape_planner_t *plan)
+{
+ plan->map.disable_feature (HB_TAG('l','i','g','a'));
+}
+
+
+struct indic_shape_plan_t
+{
+ bool load_virama_glyph (hb_font_t *font, hb_codepoint_t *pglyph) const
+ {
+ hb_codepoint_t glyph = virama_glyph.get_relaxed ();
+ if (unlikely (glyph == (hb_codepoint_t) -1))
+ {
+ if (!config->virama || !font->get_nominal_glyph (config->virama, &glyph))
+ glyph = 0;
+ /* Technically speaking, the spec says we should apply 'locl' to virama too.
+ * Maybe one day... */
+
+ /* Our get_nominal_glyph() function needs a font, so we can't get the virama glyph
+ * during shape planning... Instead, overwrite it here. */
+ virama_glyph.set_relaxed ((int) glyph);
+ }
+
+ *pglyph = glyph;
+ return glyph != 0;
+ }
+
+ const indic_config_t *config;
+
+ bool is_old_spec;
+#ifndef HB_NO_UNISCRIBE_BUG_COMPATIBLE
+ bool uniscribe_bug_compatible;
+#else
+ static constexpr bool uniscribe_bug_compatible = false;
+#endif
+ mutable hb_atomic_int_t virama_glyph;
+
+ hb_indic_would_substitute_feature_t rphf;
+ hb_indic_would_substitute_feature_t pref;
+ hb_indic_would_substitute_feature_t blwf;
+ hb_indic_would_substitute_feature_t pstf;
+ hb_indic_would_substitute_feature_t vatu;
+
+ hb_mask_t mask_array[INDIC_NUM_FEATURES];
+};
+
+static void *
+data_create_indic (const hb_ot_shape_plan_t *plan)
+{
+ indic_shape_plan_t *indic_plan = (indic_shape_plan_t *) calloc (1, sizeof (indic_shape_plan_t));
+ if (unlikely (!indic_plan))
+ return nullptr;
+
+ indic_plan->config = &indic_configs[0];
+ for (unsigned int i = 1; i < ARRAY_LENGTH (indic_configs); i++)
+ if (plan->props.script == indic_configs[i].script) {
+ indic_plan->config = &indic_configs[i];
+ break;
+ }
+
+ indic_plan->is_old_spec = indic_plan->config->has_old_spec && ((plan->map.chosen_script[0] & 0x000000FFu) != '2');
+#ifndef HB_NO_UNISCRIBE_BUG_COMPATIBLE
+ indic_plan->uniscribe_bug_compatible = hb_options ().uniscribe_bug_compatible;
+#endif
+ indic_plan->virama_glyph.set_relaxed (-1);
+
+ /* Use zero-context would_substitute() matching for new-spec of the main
+ * Indic scripts, and scripts with one spec only, but not for old-specs.
+ * The new-spec for all dual-spec scripts says zero-context matching happens.
+ *
+ * However, testing with Malayalam shows that old and new spec both allow
+ * context. Testing with Bengali new-spec however shows that it doesn't.
+ * So, the heuristic here is the way it is. It should *only* be changed,
+ * as we discover more cases of what Windows does. DON'T TOUCH OTHERWISE.
+ */
+ bool zero_context = !indic_plan->is_old_spec && plan->props.script != HB_SCRIPT_MALAYALAM;
+ indic_plan->rphf.init (&plan->map, HB_TAG('r','p','h','f'), zero_context);
+ indic_plan->pref.init (&plan->map, HB_TAG('p','r','e','f'), zero_context);
+ indic_plan->blwf.init (&plan->map, HB_TAG('b','l','w','f'), zero_context);
+ indic_plan->pstf.init (&plan->map, HB_TAG('p','s','t','f'), zero_context);
+ indic_plan->vatu.init (&plan->map, HB_TAG('v','a','t','u'), zero_context);
+
+ for (unsigned int i = 0; i < ARRAY_LENGTH (indic_plan->mask_array); i++)
+ indic_plan->mask_array[i] = (indic_features[i].flags & F_GLOBAL) ?
+ 0 : plan->map.get_1_mask (indic_features[i].tag);
+
+ return indic_plan;
+}
+
+static void
+data_destroy_indic (void *data)
+{
+ free (data);
+}
+
+static indic_position_t
+consonant_position_from_face (const indic_shape_plan_t *indic_plan,
+ const hb_codepoint_t consonant,
+ const hb_codepoint_t virama,
+ hb_face_t *face)
+{
+ /* For old-spec, the order of glyphs is Consonant,Virama,
+ * whereas for new-spec, it's Virama,Consonant. However,
+ * some broken fonts (like Free Sans) simply copied lookups
+ * from old-spec to new-spec without modification.
+ * And oddly enough, Uniscribe seems to respect those lookups.
+ * Eg. in the sequence U+0924,U+094D,U+0930, Uniscribe finds
+ * base at 0. The font however, only has lookups matching
+ * 930,94D in 'blwf', not the expected 94D,930 (with new-spec
+ * table). As such, we simply match both sequences. Seems
+ * to work.
+ *
+ * Vatu is done as well, for:
+ * https://github.com/harfbuzz/harfbuzz/issues/1587
+ */
+ hb_codepoint_t glyphs[3] = {virama, consonant, virama};
+ if (indic_plan->blwf.would_substitute (glyphs , 2, face) ||
+ indic_plan->blwf.would_substitute (glyphs+1, 2, face) ||
+ indic_plan->vatu.would_substitute (glyphs , 2, face) ||
+ indic_plan->vatu.would_substitute (glyphs+1, 2, face))
+ return POS_BELOW_C;
+ if (indic_plan->pstf.would_substitute (glyphs , 2, face) ||
+ indic_plan->pstf.would_substitute (glyphs+1, 2, face))
+ return POS_POST_C;
+ if (indic_plan->pref.would_substitute (glyphs , 2, face) ||
+ indic_plan->pref.would_substitute (glyphs+1, 2, face))
+ return POS_POST_C;
+ return POS_BASE_C;
+}
+
+
+enum indic_syllable_type_t {
+ indic_consonant_syllable,
+ indic_vowel_syllable,
+ indic_standalone_cluster,
+ indic_symbol_cluster,
+ indic_broken_cluster,
+ indic_non_indic_cluster,
+};
+
+#include "hb-ot-shape-complex-indic-machine.hh"
+
+
+static void
+setup_masks_indic (const hb_ot_shape_plan_t *plan HB_UNUSED,
+ hb_buffer_t *buffer,
+ hb_font_t *font HB_UNUSED)
+{
+ HB_BUFFER_ALLOCATE_VAR (buffer, indic_category);
+ HB_BUFFER_ALLOCATE_VAR (buffer, indic_position);
+
+ /* We cannot setup masks here. We save information about characters
+ * and setup masks later on in a pause-callback. */
+
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ for (unsigned int i = 0; i < count; i++)
+ set_indic_properties (info[i]);
+}
+
+static void
+setup_syllables_indic (const hb_ot_shape_plan_t *plan HB_UNUSED,
+ hb_font_t *font HB_UNUSED,
+ hb_buffer_t *buffer)
+{
+ find_syllables_indic (buffer);
+ foreach_syllable (buffer, start, end)
+ buffer->unsafe_to_break (start, end);
+}
+
+static int
+compare_indic_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb)
+{
+ int a = pa->indic_position();
+ int b = pb->indic_position();
+
+ return a < b ? -1 : a == b ? 0 : +1;
+}
+
+
+
+static void
+update_consonant_positions_indic (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer)
+{
+ const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data;
+
+ if (indic_plan->config->base_pos != BASE_POS_LAST)
+ return;
+
+ hb_codepoint_t virama;
+ if (indic_plan->load_virama_glyph (font, &virama))
+ {
+ hb_face_t *face = font->face;
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ for (unsigned int i = 0; i < count; i++)
+ if (info[i].indic_position() == POS_BASE_C)
+ {
+ hb_codepoint_t consonant = info[i].codepoint;
+ info[i].indic_position() = consonant_position_from_face (indic_plan, consonant, virama, face);
+ }
+ }
+}
+
+
+/* Rules from:
+ * https://docs.microsqoft.com/en-us/typography/script-development/devanagari */
+
+static void
+initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
+ hb_face_t *face,
+ hb_buffer_t *buffer,
+ unsigned int start, unsigned int end)
+{
+ const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data;
+ hb_glyph_info_t *info = buffer->info;
+
+ /* https://github.com/harfbuzz/harfbuzz/issues/435#issuecomment-335560167
+ * // For compatibility with legacy usage in Kannada,
+ * // Ra+h+ZWJ must behave like Ra+ZWJ+h...
+ */
+ if (buffer->props.script == HB_SCRIPT_KANNADA &&
+ start + 3 <= end &&
+ is_one_of (info[start ], FLAG (OT_Ra)) &&
+ is_one_of (info[start+1], FLAG (OT_H)) &&
+ is_one_of (info[start+2], FLAG (OT_ZWJ)))
+ {
+ buffer->merge_clusters (start+1, start+3);
+ hb_glyph_info_t tmp = info[start+1];
+ info[start+1] = info[start+2];
+ info[start+2] = tmp;
+ }
+
+ /* 1. Find base consonant:
+ *
+ * The shaping engine finds the base consonant of the syllable, using the
+ * following algorithm: starting from the end of the syllable, move backwards
+ * until a consonant is found that does not have a below-base or post-base
+ * form (post-base forms have to follow below-base forms), or that is not a
+ * pre-base-reordering Ra, or arrive at the first consonant. The consonant
+ * stopped at will be the base.
+ *
+ * o If the syllable starts with Ra + Halant (in a script that has Reph)
+ * and has more than one consonant, Ra is excluded from candidates for
+ * base consonants.
+ */
+
+ unsigned int base = end;
+ bool has_reph = false;
+
+ {
+ /* -> If the syllable starts with Ra + Halant (in a script that has Reph)
+ * and has more than one consonant, Ra is excluded from candidates for
+ * base consonants. */
+ unsigned int limit = start;
+ if (indic_plan->mask_array[INDIC_RPHF] &&
+ start + 3 <= end &&
+ (
+ (indic_plan->config->reph_mode == REPH_MODE_IMPLICIT && !is_joiner (info[start + 2])) ||
+ (indic_plan->config->reph_mode == REPH_MODE_EXPLICIT && info[start + 2].indic_category() == OT_ZWJ)
+ ))
+ {
+ /* See if it matches the 'rphf' feature. */
+ hb_codepoint_t glyphs[3] = {info[start].codepoint,
+ info[start + 1].codepoint,
+ indic_plan->config->reph_mode == REPH_MODE_EXPLICIT ?
+ info[start + 2].codepoint : 0};
+ if (indic_plan->rphf.would_substitute (glyphs, 2, face) ||
+ (indic_plan->config->reph_mode == REPH_MODE_EXPLICIT &&
+ indic_plan->rphf.would_substitute (glyphs, 3, face)))
+ {
+ limit += 2;
+ while (limit < end && is_joiner (info[limit]))
+ limit++;
+ base = start;
+ has_reph = true;
+ }
+ } else if (indic_plan->config->reph_mode == REPH_MODE_LOG_REPHA && info[start].indic_category() == OT_Repha)
+ {
+ limit += 1;
+ while (limit < end && is_joiner (info[limit]))
+ limit++;
+ base = start;
+ has_reph = true;
+ }
+
+ switch (indic_plan->config->base_pos)
+ {
+ case BASE_POS_LAST:
+ {
+ /* -> starting from the end of the syllable, move backwards */
+ unsigned int i = end;
+ bool seen_below = false;
+ do {
+ i--;
+ /* -> until a consonant is found */
+ if (is_consonant (info[i]))
+ {
+ /* -> that does not have a below-base or post-base form
+ * (post-base forms have to follow below-base forms), */
+ if (info[i].indic_position() != POS_BELOW_C &&
+ (info[i].indic_position() != POS_POST_C || seen_below))
+ {
+ base = i;
+ break;
+ }
+ if (info[i].indic_position() == POS_BELOW_C)
+ seen_below = true;
+
+ /* -> or that is not a pre-base-reordering Ra,
+ *
+ * IMPLEMENTATION NOTES:
+ *
+ * Our pre-base-reordering Ra's are marked POS_POST_C, so will be skipped
+ * by the logic above already.
+ */
+
+ /* -> or arrive at the first consonant. The consonant stopped at will
+ * be the base. */
+ base = i;
+ }
+ else
+ {
+ /* A ZWJ after a Halant stops the base search, and requests an explicit
+ * half form.
+ * A ZWJ before a Halant, requests a subjoined form instead, and hence
+ * search continues. This is particularly important for Bengali
+ * sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya. */
+ if (start < i &&
+ info[i].indic_category() == OT_ZWJ &&
+ info[i - 1].indic_category() == OT_H)
+ break;
+ }
+ } while (i > limit);
+ }
+ break;
+
+ case BASE_POS_LAST_SINHALA:
+ {
+ /* Sinhala base positioning is slightly different from main Indic, in that:
+ * 1. Its ZWJ behavior is different,
+ * 2. We don't need to look into the font for consonant positions.
+ */
+
+ if (!has_reph)
+ base = limit;
+
+ /* Find the last base consonant that is not blocked by ZWJ. If there is
+ * a ZWJ right before a base consonant, that would request a subjoined form. */
+ for (unsigned int i = limit; i < end; i++)
+ if (is_consonant (info[i]))
+ {
+ if (limit < i && info[i - 1].indic_category() == OT_ZWJ)
+ break;
+ else
+ base = i;
+ }
+
+ /* Mark all subsequent consonants as below. */
+ for (unsigned int i = base + 1; i < end; i++)
+ if (is_consonant (info[i]))
+ info[i].indic_position() = POS_BELOW_C;
+ }
+ break;
+ }
+
+ /* -> If the syllable starts with Ra + Halant (in a script that has Reph)
+ * and has more than one consonant, Ra is excluded from candidates for
+ * base consonants.
+ *
+ * Only do this for unforced Reph. (ie. not for Ra,H,ZWJ. */
+ if (has_reph && base == start && limit - base <= 2) {
+ /* Have no other consonant, so Reph is not formed and Ra becomes base. */
+ has_reph = false;
+ }
+ }
+
+
+ /* 2. Decompose and reorder Matras:
+ *
+ * Each matra and any syllable modifier sign in the syllable are moved to the
+ * appropriate position relative to the consonant(s) in the syllable. The
+ * shaping engine decomposes two- or three-part matras into their constituent
+ * parts before any repositioning. Matra characters are classified by which
+ * consonant in a conjunct they have affinity for and are reordered to the
+ * following positions:
+ *
+ * o Before first half form in the syllable
+ * o After subjoined consonants
+ * o After post-form consonant
+ * o After main consonant (for above marks)
+ *
+ * IMPLEMENTATION NOTES:
+ *
+ * The normalize() routine has already decomposed matras for us, so we don't
+ * need to worry about that.
+ */
+
+
+ /* 3. Reorder marks to canonical order:
+ *
+ * Adjacent nukta and halant or nukta and vedic sign are always repositioned
+ * if necessary, so that the nukta is first.
+ *
+ * IMPLEMENTATION NOTES:
+ *
+ * We don't need to do this: the normalize() routine already did this for us.
+ */
+
+
+ /* Reorder characters */
+
+ for (unsigned int i = start; i < base; i++)
+ info[i].indic_position() = hb_min (POS_PRE_C, (indic_position_t) info[i].indic_position());
+
+ if (base < end)
+ info[base].indic_position() = POS_BASE_C;
+
+ /* Mark final consonants. A final consonant is one appearing after a matra.
+ * Happens in Sinhala. */
+ for (unsigned int i = base + 1; i < end; i++)
+ if (info[i].indic_category() == OT_M) {
+ for (unsigned int j = i + 1; j < end; j++)
+ if (is_consonant (info[j])) {
+ info[j].indic_position() = POS_FINAL_C;
+ break;
+ }
+ break;
+ }
+
+ /* Handle beginning Ra */
+ if (has_reph)
+ info[start].indic_position() = POS_RA_TO_BECOME_REPH;
+
+ /* For old-style Indic script tags, move the first post-base Halant after
+ * last consonant.
+ *
+ * Reports suggest that in some scripts Uniscribe does this only if there
+ * is *not* a Halant after last consonant already. We know that is the
+ * case for Kannada, while it reorders unconditionally in other scripts,
+ * eg. Malayalam, Bengali, and Devanagari. We don't currently know about
+ * other scripts, so we block Kannada.
+ *
+ * Kannada test case:
+ * U+0C9A,U+0CCD,U+0C9A,U+0CCD
+ * With some versions of Lohit Kannada.
+ * https://bugs.freedesktop.org/show_bug.cgi?id=59118
+ *
+ * Malayalam test case:
+ * U+0D38,U+0D4D,U+0D31,U+0D4D,U+0D31,U+0D4D
+ * With lohit-ttf-20121122/Lohit-Malayalam.ttf
+ *
+ * Bengali test case:
+ * U+0998,U+09CD,U+09AF,U+09CD
+ * With Windows XP vrinda.ttf
+ * https://github.com/harfbuzz/harfbuzz/issues/1073
+ *
+ * Devanagari test case:
+ * U+091F,U+094D,U+0930,U+094D
+ * With chandas.ttf
+ * https://github.com/harfbuzz/harfbuzz/issues/1071
+ */
+ if (indic_plan->is_old_spec)
+ {
+ bool disallow_double_halants = buffer->props.script == HB_SCRIPT_KANNADA;
+ for (unsigned int i = base + 1; i < end; i++)
+ if (info[i].indic_category() == OT_H)
+ {
+ unsigned int j;
+ for (j = end - 1; j > i; j--)
+ if (is_consonant (info[j]) ||
+ (disallow_double_halants && info[j].indic_category() == OT_H))
+ break;
+ if (info[j].indic_category() != OT_H && j > i) {
+ /* Move Halant to after last consonant. */
+ hb_glyph_info_t t = info[i];
+ memmove (&info[i], &info[i + 1], (j - i) * sizeof (info[0]));
+ info[j] = t;
+ }
+ break;
+ }
+ }
+
+ /* Attach misc marks to previous char to move with them. */
+ {
+ indic_position_t last_pos = POS_START;
+ for (unsigned int i = start; i < end; i++)
+ {
+ if ((FLAG_UNSAFE (info[i].indic_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | MEDIAL_FLAGS | FLAG (OT_H))))
+ {
+ info[i].indic_position() = last_pos;
+ if (unlikely (info[i].indic_category() == OT_H &&
+ info[i].indic_position() == POS_PRE_M))
+ {
+ /*
+ * Uniscribe doesn't move the Halant with Left Matra.
+ * TEST: U+092B,U+093F,U+094DE
+ * We follow. This is important for the Sinhala
+ * U+0DDA split matra since it decomposes to U+0DD9,U+0DCA
+ * where U+0DD9 is a left matra and U+0DCA is the virama.
+ * We don't want to move the virama with the left matra.
+ * TEST: U+0D9A,U+0DDA
+ */
+ for (unsigned int j = i; j > start; j--)
+ if (info[j - 1].indic_position() != POS_PRE_M) {
+ info[i].indic_position() = info[j - 1].indic_position();
+ break;
+ }
+ }
+ } else if (info[i].indic_position() != POS_SMVD) {
+ last_pos = (indic_position_t) info[i].indic_position();
+ }
+ }
+ }
+ /* For post-base consonants let them own anything before them
+ * since the last consonant or matra. */
+ {
+ unsigned int last = base;
+ for (unsigned int i = base + 1; i < end; i++)
+ if (is_consonant (info[i]))
+ {
+ for (unsigned int j = last + 1; j < i; j++)
+ if (info[j].indic_position() < POS_SMVD)
+ info[j].indic_position() = info[i].indic_position();
+ last = i;
+ } else if (info[i].indic_category() == OT_M)
+ last = i;
+ }
+
+
+ {
+ /* Use syllable() for sort accounting temporarily. */
+ unsigned int syllable = info[start].syllable();
+ for (unsigned int i = start; i < end; i++)
+ info[i].syllable() = i - start;
+
+ /* Sit tight, rock 'n roll! */
+ hb_stable_sort (info + start, end - start, compare_indic_order);
+ /* Find base again */
+ base = end;
+ for (unsigned int i = start; i < end; i++)
+ if (info[i].indic_position() == POS_BASE_C)
+ {
+ base = i;
+ break;
+ }
+ /* Things are out-of-control for post base positions, they may shuffle
+ * around like crazy. In old-spec mode, we move halants around, so in
+ * that case merge all clusters after base. Otherwise, check the sort
+ * order and merge as needed.
+ * For pre-base stuff, we handle cluster issues in final reordering.
+ *
+ * We could use buffer->sort() for this, if there was no special
+ * reordering of pre-base stuff happening later...
+ * We don't want to merge_clusters all of that, which buffer->sort()
+ * would.
+ */
+ if (indic_plan->is_old_spec || end - start > 127)
+ buffer->merge_clusters (base, end);
+ else
+ {
+ /* Note! syllable() is a one-byte field. */
+ for (unsigned int i = base; i < end; i++)
+ if (info[i].syllable() != 255)
+ {
+ unsigned int max = i;
+ unsigned int j = start + info[i].syllable();
+ while (j != i)
+ {
+ max = hb_max (max, j);
+ unsigned int next = start + info[j].syllable();
+ info[j].syllable() = 255; /* So we don't process j later again. */
+ j = next;
+ }
+ if (i != max)
+ buffer->merge_clusters (i, max + 1);
+ }
+ }
+
+ /* Put syllable back in. */
+ for (unsigned int i = start; i < end; i++)
+ info[i].syllable() = syllable;
+ }
+
+ /* Setup masks now */
+
+ {
+ hb_mask_t mask;
+
+ /* Reph */
+ for (unsigned int i = start; i < end && info[i].indic_position() == POS_RA_TO_BECOME_REPH; i++)
+ info[i].mask |= indic_plan->mask_array[INDIC_RPHF];
+
+ /* Pre-base */
+ mask = indic_plan->mask_array[INDIC_HALF];
+ if (!indic_plan->is_old_spec &&
+ indic_plan->config->blwf_mode == BLWF_MODE_PRE_AND_POST)
+ mask |= indic_plan->mask_array[INDIC_BLWF];
+ for (unsigned int i = start; i < base; i++)
+ info[i].mask |= mask;
+ /* Base */
+ mask = 0;
+ if (base < end)
+ info[base].mask |= mask;
+ /* Post-base */
+ mask = indic_plan->mask_array[INDIC_BLWF] |
+ indic_plan->mask_array[INDIC_ABVF] |
+ indic_plan->mask_array[INDIC_PSTF];
+ for (unsigned int i = base + 1; i < end; i++)
+ info[i].mask |= mask;
+ }
+
+ if (indic_plan->is_old_spec &&
+ buffer->props.script == HB_SCRIPT_DEVANAGARI)
+ {
+ /* Old-spec eye-lash Ra needs special handling. From the
+ * spec:
+ *
+ * "The feature 'below-base form' is applied to consonants
+ * having below-base forms and following the base consonant.
+ * The exception is vattu, which may appear below half forms
+ * as well as below the base glyph. The feature 'below-base
+ * form' will be applied to all such occurrences of Ra as well."
+ *
+ * Test case: U+0924,U+094D,U+0930,U+094d,U+0915
+ * with Sanskrit 2003 font.
+ *
+ * However, note that Ra,Halant,ZWJ is the correct way to
+ * request eyelash form of Ra, so we wouldbn't inhibit it
+ * in that sequence.
+ *
+ * Test case: U+0924,U+094D,U+0930,U+094d,U+200D,U+0915
+ */
+ for (unsigned int i = start; i + 1 < base; i++)
+ if (info[i ].indic_category() == OT_Ra &&
+ info[i+1].indic_category() == OT_H &&
+ (i + 2 == base ||
+ info[i+2].indic_category() != OT_ZWJ))
+ {
+ info[i ].mask |= indic_plan->mask_array[INDIC_BLWF];
+ info[i+1].mask |= indic_plan->mask_array[INDIC_BLWF];
+ }
+ }
+
+ unsigned int pref_len = 2;
+ if (indic_plan->mask_array[INDIC_PREF] && base + pref_len < end)
+ {
+ /* Find a Halant,Ra sequence and mark it for pre-base-reordering processing. */
+ for (unsigned int i = base + 1; i + pref_len - 1 < end; i++) {
+ hb_codepoint_t glyphs[2];
+ for (unsigned int j = 0; j < pref_len; j++)
+ glyphs[j] = info[i + j].codepoint;
+ if (indic_plan->pref.would_substitute (glyphs, pref_len, face))
+ {
+ for (unsigned int j = 0; j < pref_len; j++)
+ info[i++].mask |= indic_plan->mask_array[INDIC_PREF];
+ break;
+ }
+ }
+ }
+
+ /* Apply ZWJ/ZWNJ effects */
+ for (unsigned int i = start + 1; i < end; i++)
+ if (is_joiner (info[i])) {
+ bool non_joiner = info[i].indic_category() == OT_ZWNJ;
+ unsigned int j = i;
+
+ do {
+ j--;
+
+ /* ZWJ/ZWNJ should disable CJCT. They do that by simply
+ * being there, since we don't skip them for the CJCT
+ * feature (ie. F_MANUAL_ZWJ) */
+
+ /* A ZWNJ disables HALF. */
+ if (non_joiner)
+ info[j].mask &= ~indic_plan->mask_array[INDIC_HALF];
+
+ } while (j > start && !is_consonant (info[j]));
+ }
+}
+
+static void
+initial_reordering_standalone_cluster (const hb_ot_shape_plan_t *plan,
+ hb_face_t *face,
+ hb_buffer_t *buffer,
+ unsigned int start, unsigned int end)
+{
+ /* We treat placeholder/dotted-circle as if they are consonants, so we
+ * should just chain. Only if not in compatibility mode that is... */
+
+ const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data;
+ if (indic_plan->uniscribe_bug_compatible)
+ {
+ /* For dotted-circle, this is what Uniscribe does:
+ * If dotted-circle is the last glyph, it just does nothing.
+ * Ie. It doesn't form Reph. */
+ if (buffer->info[end - 1].indic_category() == OT_DOTTEDCIRCLE)
+ return;
+ }
+
+ initial_reordering_consonant_syllable (plan, face, buffer, start, end);
+}
+
+static void
+initial_reordering_syllable_indic (const hb_ot_shape_plan_t *plan,
+ hb_face_t *face,
+ hb_buffer_t *buffer,
+ unsigned int start, unsigned int end)
+{
+ indic_syllable_type_t syllable_type = (indic_syllable_type_t) (buffer->info[start].syllable() & 0x0F);
+ switch (syllable_type)
+ {
+ case indic_vowel_syllable: /* We made the vowels look like consonants. So let's call the consonant logic! */
+ case indic_consonant_syllable:
+ initial_reordering_consonant_syllable (plan, face, buffer, start, end);
+ break;
+
+ case indic_broken_cluster: /* We already inserted dotted-circles, so just call the standalone_cluster. */
+ case indic_standalone_cluster:
+ initial_reordering_standalone_cluster (plan, face, buffer, start, end);
+ break;
+
+ case indic_symbol_cluster:
+ case indic_non_indic_cluster:
+ break;
+ }
+}
+
+static inline void
+insert_dotted_circles_indic (const hb_ot_shape_plan_t *plan HB_UNUSED,
+ hb_font_t *font,
+ hb_buffer_t *buffer)
+{
+ if (unlikely (buffer->flags & HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE))
+ return;
+
+ /* Note: This loop is extra overhead, but should not be measurable.
+ * TODO Use a buffer scratch flag to remove the loop. */
+ bool has_broken_syllables = false;
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ for (unsigned int i = 0; i < count; i++)
+ if ((info[i].syllable() & 0x0F) == indic_broken_cluster)
+ {
+ has_broken_syllables = true;
+ break;
+ }
+ if (likely (!has_broken_syllables))
+ return;
+
+
+ hb_codepoint_t dottedcircle_glyph;
+ if (!font->get_nominal_glyph (0x25CCu, &dottedcircle_glyph))
+ return;
+
+ hb_glyph_info_t dottedcircle = {0};
+ dottedcircle.codepoint = 0x25CCu;
+ set_indic_properties (dottedcircle);
+ dottedcircle.codepoint = dottedcircle_glyph;
+
+ buffer->clear_output ();
+
+ buffer->idx = 0;
+ unsigned int last_syllable = 0;
+ while (buffer->idx < buffer->len && buffer->successful)
+ {
+ unsigned int syllable = buffer->cur().syllable();
+ indic_syllable_type_t syllable_type = (indic_syllable_type_t) (syllable & 0x0F);
+ if (unlikely (last_syllable != syllable && syllable_type == indic_broken_cluster))
+ {
+ last_syllable = syllable;
+
+ hb_glyph_info_t ginfo = dottedcircle;
+ ginfo.cluster = buffer->cur().cluster;
+ ginfo.mask = buffer->cur().mask;
+ ginfo.syllable() = buffer->cur().syllable();
+
+ /* Insert dottedcircle after possible Repha. */
+ while (buffer->idx < buffer->len && buffer->successful &&
+ last_syllable == buffer->cur().syllable() &&
+ buffer->cur().indic_category() == OT_Repha)
+ buffer->next_glyph ();
+
+ buffer->output_info (ginfo);
+ }
+ else
+ buffer->next_glyph ();
+ }
+ buffer->swap_buffers ();
+}
+
+static void
+initial_reordering_indic (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer)
+{
+ update_consonant_positions_indic (plan, font, buffer);
+ insert_dotted_circles_indic (plan, font, buffer);
+
+ foreach_syllable (buffer, start, end)
+ initial_reordering_syllable_indic (plan, font->face, buffer, start, end);
+}
+
+static void
+final_reordering_syllable_indic (const hb_ot_shape_plan_t *plan,
+ hb_buffer_t *buffer,
+ unsigned int start, unsigned int end)
+{
+ const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data;
+ hb_glyph_info_t *info = buffer->info;
+
+
+ /* This function relies heavily on halant glyphs. Lots of ligation
+ * and possibly multiple substitutions happened prior to this
+ * phase, and that might have messed up our properties. Recover
+ * from a particular case of that where we're fairly sure that a
+ * class of OT_H is desired but has been lost. */
+ /* We don't call load_virama_glyph(), since we know it's already
+ * loaded. */
+ hb_codepoint_t virama_glyph = indic_plan->virama_glyph.get_relaxed ();
+ if (virama_glyph)
+ {
+ for (unsigned int i = start; i < end; i++)
+ if (info[i].codepoint == virama_glyph &&
+ _hb_glyph_info_ligated (&info[i]) &&
+ _hb_glyph_info_multiplied (&info[i]))
+ {
+ /* This will make sure that this glyph passes is_halant() test. */
+ info[i].indic_category() = OT_H;
+ _hb_glyph_info_clear_ligated_and_multiplied (&info[i]);
+ }
+ }
+
+
+ /* 4. Final reordering:
+ *
+ * After the localized forms and basic shaping forms GSUB features have been
+ * applied (see below), the shaping engine performs some final glyph
+ * reordering before applying all the remaining font features to the entire
+ * syllable.
+ */
+
+ bool try_pref = !!indic_plan->mask_array[INDIC_PREF];
+
+ /* Find base again */
+ unsigned int base;
+ for (base = start; base < end; base++)
+ if (info[base].indic_position() >= POS_BASE_C)
+ {
+ if (try_pref && base + 1 < end)
+ {
+ for (unsigned int i = base + 1; i < end; i++)
+ if ((info[i].mask & indic_plan->mask_array[INDIC_PREF]) != 0)
+ {
+ if (!(_hb_glyph_info_substituted (&info[i]) &&
+ _hb_glyph_info_ligated_and_didnt_multiply (&info[i])))
+ {
+ /* Ok, this was a 'pref' candidate but didn't form any.
+ * Base is around here... */
+ base = i;
+ while (base < end && is_halant (info[base]))
+ base++;
+ info[base].indic_position() = POS_BASE_C;
+
+ try_pref = false;
+ }
+ break;
+ }
+ }
+ /* For Malayalam, skip over unformed below- (but NOT post-) forms. */
+ if (buffer->props.script == HB_SCRIPT_MALAYALAM)
+ {
+ for (unsigned int i = base + 1; i < end; i++)
+ {
+ while (i < end && is_joiner (info[i]))
+ i++;
+ if (i == end || !is_halant (info[i]))
+ break;
+ i++; /* Skip halant. */
+ while (i < end && is_joiner (info[i]))
+ i++;
+ if (i < end && is_consonant (info[i]) && info[i].indic_position() == POS_BELOW_C)
+ {
+ base = i;
+ info[base].indic_position() = POS_BASE_C;
+ }
+ }
+ }
+
+ if (start < base && info[base].indic_position() > POS_BASE_C)
+ base--;
+ break;
+ }
+ if (base == end && start < base &&
+ is_one_of (info[base - 1], FLAG (OT_ZWJ)))
+ base--;
+ if (base < end)
+ while (start < base &&
+ is_one_of (info[base], (FLAG (OT_N) | FLAG (OT_H))))
+ base--;
+
+
+ /* o Reorder matras:
+ *
+ * If a pre-base matra character had been reordered before applying basic
+ * features, the glyph can be moved closer to the main consonant based on
+ * whether half-forms had been formed. Actual position for the matra is
+ * defined as “after last standalone halant glyph, after initial matra
+ * position and before the main consonantâ€. If ZWJ or ZWNJ follow this
+ * halant, position is moved after it.
+ *
+ * IMPLEMENTATION NOTES:
+ *
+ * It looks like the last sentence is wrong. Testing, with Windows 7 Uniscribe
+ * and Devanagari shows that the behavior is best described as:
+ *
+ * "If ZWJ follows this halant, matra is NOT repositioned after this halant.
+ * If ZWNJ follows this halant, position is moved after it."
+ *
+ * Test case, with Adobe Devanagari or Nirmala UI:
+ *
+ * U+091F,U+094D,U+200C,U+092F,U+093F
+ * (Matra moves to the middle, after ZWNJ.)
+ *
+ * U+091F,U+094D,U+200D,U+092F,U+093F
+ * (Matra does NOT move, stays to the left.)
+ *
+ * https://github.com/harfbuzz/harfbuzz/issues/1070
+ */
+
+ if (start + 1 < end && start < base) /* Otherwise there can't be any pre-base matra characters. */
+ {
+ /* If we lost track of base, alas, position before last thingy. */
+ unsigned int new_pos = base == end ? base - 2 : base - 1;
+
+ /* Malayalam / Tamil do not have "half" forms or explicit virama forms.
+ * The glyphs formed by 'half' are Chillus or ligated explicit viramas.
+ * We want to position matra after them.
+ */
+ if (buffer->props.script != HB_SCRIPT_MALAYALAM && buffer->props.script != HB_SCRIPT_TAMIL)
+ {
+ search:
+ while (new_pos > start &&
+ !(is_one_of (info[new_pos], (FLAG (OT_M) | FLAG (OT_H)))))
+ new_pos--;
+
+ /* If we found no Halant we are done.
+ * Otherwise only proceed if the Halant does
+ * not belong to the Matra itself! */
+ if (is_halant (info[new_pos]) &&
+ info[new_pos].indic_position() != POS_PRE_M)
+ {
+#if 0 // See comment above
+ /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
+ if (new_pos + 1 < end && is_joiner (info[new_pos + 1]))
+ new_pos++;
+#endif
+ if (new_pos + 1 < end)
+ {
+ /* -> If ZWJ follows this halant, matra is NOT repositioned after this halant. */
+ if (info[new_pos + 1].indic_category() == OT_ZWJ)
+ {
+ /* Keep searching. */
+ if (new_pos > start)
+ {
+ new_pos--;
+ goto search;
+ }
+ }
+ /* -> If ZWNJ follows this halant, position is moved after it.
+ *
+ * IMPLEMENTATION NOTES:
+ *
+ * This is taken care of by the state-machine. A Halant,ZWNJ is a terminating
+ * sequence for a consonant syllable; any pre-base matras occurring after it
+ * will belong to the subsequent syllable.
+ */
+ }
+ }
+ else
+ new_pos = start; /* No move. */
+ }
+
+ if (start < new_pos && info[new_pos].indic_position () != POS_PRE_M)
+ {
+ /* Now go see if there's actually any matras... */
+ for (unsigned int i = new_pos; i > start; i--)
+ if (info[i - 1].indic_position () == POS_PRE_M)
+ {
+ unsigned int old_pos = i - 1;
+ if (old_pos < base && base <= new_pos) /* Shouldn't actually happen. */
+ base--;
+
+ hb_glyph_info_t tmp = info[old_pos];
+ memmove (&info[old_pos], &info[old_pos + 1], (new_pos - old_pos) * sizeof (info[0]));
+ info[new_pos] = tmp;
+
+ /* Note: this merge_clusters() is intentionally *after* the reordering.
+ * Indic matra reordering is special and tricky... */
+ buffer->merge_clusters (new_pos, hb_min (end, base + 1));
+
+ new_pos--;
+ }
+ } else {
+ for (unsigned int i = start; i < base; i++)
+ if (info[i].indic_position () == POS_PRE_M) {
+ buffer->merge_clusters (i, hb_min (end, base + 1));
+ break;
+ }
+ }
+ }
+
+
+ /* o Reorder reph:
+ *
+ * Reph’s original position is always at the beginning of the syllable,
+ * (i.e. it is not reordered at the character reordering stage). However,
+ * it will be reordered according to the basic-forms shaping results.
+ * Possible positions for reph, depending on the script, are; after main,
+ * before post-base consonant forms, and after post-base consonant forms.
+ */
+
+ /* Two cases:
+ *
+ * - If repha is encoded as a sequence of characters (Ra,H or Ra,H,ZWJ), then
+ * we should only move it if the sequence ligated to the repha form.
+ *
+ * - If repha is encoded separately and in the logical position, we should only
+ * move it if it did NOT ligate. If it ligated, it's probably the font trying
+ * to make it work without the reordering.
+ */
+ if (start + 1 < end &&
+ info[start].indic_position() == POS_RA_TO_BECOME_REPH &&
+ ((info[start].indic_category() == OT_Repha) ^
+ _hb_glyph_info_ligated_and_didnt_multiply (&info[start])))
+ {
+ unsigned int new_reph_pos;
+ reph_position_t reph_pos = indic_plan->config->reph_pos;
+
+ /* 1. If reph should be positioned after post-base consonant forms,
+ * proceed to step 5.
+ */
+ if (reph_pos == REPH_POS_AFTER_POST)
+ {
+ goto reph_step_5;
+ }
+
+ /* 2. If the reph repositioning class is not after post-base: target
+ * position is after the first explicit halant glyph between the
+ * first post-reph consonant and last main consonant. If ZWJ or ZWNJ
+ * are following this halant, position is moved after it. If such
+ * position is found, this is the target position. Otherwise,
+ * proceed to the next step.
+ *
+ * Note: in old-implementation fonts, where classifications were
+ * fixed in shaping engine, there was no case where reph position
+ * will be found on this step.
+ */
+ {
+ new_reph_pos = start + 1;
+ while (new_reph_pos < base && !is_halant (info[new_reph_pos]))
+ new_reph_pos++;
+
+ if (new_reph_pos < base && is_halant (info[new_reph_pos]))
+ {
+ /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */
+ if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1]))
+ new_reph_pos++;
+ goto reph_move;
+ }
+ }
+
+ /* 3. If reph should be repositioned after the main consonant: find the
+ * first consonant not ligated with main, or find the first
+ * consonant that is not a potential pre-base-reordering Ra.
+ */
+ if (reph_pos == REPH_POS_AFTER_MAIN)
+ {
+ new_reph_pos = base;
+ while (new_reph_pos + 1 < end && info[new_reph_pos + 1].indic_position() <= POS_AFTER_MAIN)
+ new_reph_pos++;
+ if (new_reph_pos < end)
+ goto reph_move;
+ }
+
+ /* 4. If reph should be positioned before post-base consonant, find
+ * first post-base classified consonant not ligated with main. If no
+ * consonant is found, the target position should be before the
+ * first matra, syllable modifier sign or vedic sign.
+ */
+ /* This is our take on what step 4 is trying to say (and failing, BADLY). */
+ if (reph_pos == REPH_POS_AFTER_SUB)
+ {
+ new_reph_pos = base;
+ while (new_reph_pos + 1 < end &&
+ !( FLAG_UNSAFE (info[new_reph_pos + 1].indic_position()) & (FLAG (POS_POST_C) | FLAG (POS_AFTER_POST) | FLAG (POS_SMVD))))
+ new_reph_pos++;
+ if (new_reph_pos < end)
+ goto reph_move;
+ }
+
+ /* 5. If no consonant is found in steps 3 or 4, move reph to a position
+ * immediately before the first post-base matra, syllable modifier
+ * sign or vedic sign that has a reordering class after the intended
+ * reph position. For example, if the reordering position for reph
+ * is post-main, it will skip above-base matras that also have a
+ * post-main position.
+ */
+ reph_step_5:
+ {
+ /* Copied from step 2. */
+ new_reph_pos = start + 1;
+ while (new_reph_pos < base && !is_halant (info[new_reph_pos]))
+ new_reph_pos++;
+
+ if (new_reph_pos < base && is_halant (info[new_reph_pos]))
+ {
+ /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */
+ if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1]))
+ new_reph_pos++;
+ goto reph_move;
+ }
+ }
+ /* See https://github.com/harfbuzz/harfbuzz/issues/2298#issuecomment-615318654 */
+
+ /* 6. Otherwise, reorder reph to the end of the syllable.
+ */
+ {
+ new_reph_pos = end - 1;
+ while (new_reph_pos > start && info[new_reph_pos].indic_position() == POS_SMVD)
+ new_reph_pos--;
+
+ /*
+ * If the Reph is to be ending up after a Matra,Halant sequence,
+ * position it before that Halant so it can interact with the Matra.
+ * However, if it's a plain Consonant,Halant we shouldn't do that.
+ * Uniscribe doesn't do this.
+ * TEST: U+0930,U+094D,U+0915,U+094B,U+094D
+ */
+ if (!indic_plan->uniscribe_bug_compatible &&
+ unlikely (is_halant (info[new_reph_pos])))
+ {
+ for (unsigned int i = base + 1; i < new_reph_pos; i++)
+ if (info[i].indic_category() == OT_M) {
+ /* Ok, got it. */
+ new_reph_pos--;
+ }
+ }
+
+ goto reph_move;
+ }
+
+ reph_move:
+ {
+ /* Move */
+ buffer->merge_clusters (start, new_reph_pos + 1);
+ hb_glyph_info_t reph = info[start];
+ memmove (&info[start], &info[start + 1], (new_reph_pos - start) * sizeof (info[0]));
+ info[new_reph_pos] = reph;
+
+ if (start < base && base <= new_reph_pos)
+ base--;
+ }
+ }
+
+
+ /* o Reorder pre-base-reordering consonants:
+ *
+ * If a pre-base-reordering consonant is found, reorder it according to
+ * the following rules:
+ */
+
+ if (try_pref && base + 1 < end) /* Otherwise there can't be any pre-base-reordering Ra. */
+ {
+ for (unsigned int i = base + 1; i < end; i++)
+ if ((info[i].mask & indic_plan->mask_array[INDIC_PREF]) != 0)
+ {
+ /* 1. Only reorder a glyph produced by substitution during application
+ * of the <pref> feature. (Note that a font may shape a Ra consonant with
+ * the feature generally but block it in certain contexts.)
+ */
+ /* Note: We just check that something got substituted. We don't check that
+ * the <pref> feature actually did it...
+ *
+ * Reorder pref only if it ligated. */
+ if (_hb_glyph_info_ligated_and_didnt_multiply (&info[i]))
+ {
+ /*
+ * 2. Try to find a target position the same way as for pre-base matra.
+ * If it is found, reorder pre-base consonant glyph.
+ *
+ * 3. If position is not found, reorder immediately before main
+ * consonant.
+ */
+
+ unsigned int new_pos = base;
+ /* Malayalam / Tamil do not have "half" forms or explicit virama forms.
+ * The glyphs formed by 'half' are Chillus or ligated explicit viramas.
+ * We want to position matra after them.
+ */
+ if (buffer->props.script != HB_SCRIPT_MALAYALAM && buffer->props.script != HB_SCRIPT_TAMIL)
+ {
+ while (new_pos > start &&
+ !(is_one_of (info[new_pos - 1], FLAG(OT_M) | FLAG (OT_H))))
+ new_pos--;
+ }
+
+ if (new_pos > start && is_halant (info[new_pos - 1]))
+ {
+ /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
+ if (new_pos < end && is_joiner (info[new_pos]))
+ new_pos++;
+ }
+
+ {
+ unsigned int old_pos = i;
+
+ buffer->merge_clusters (new_pos, old_pos + 1);
+ hb_glyph_info_t tmp = info[old_pos];
+ memmove (&info[new_pos + 1], &info[new_pos], (old_pos - new_pos) * sizeof (info[0]));
+ info[new_pos] = tmp;
+
+ if (new_pos <= base && base < old_pos)
+ base++;
+ }
+ }
+
+ break;
+ }
+ }
+
+
+ /* Apply 'init' to the Left Matra if it's a word start. */
+ if (info[start].indic_position () == POS_PRE_M)
+ {
+ if (!start ||
+ !(FLAG_UNSAFE (_hb_glyph_info_get_general_category (&info[start - 1])) &
+ FLAG_RANGE (HB_UNICODE_GENERAL_CATEGORY_FORMAT, HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)))
+ info[start].mask |= indic_plan->mask_array[INDIC_INIT];
+ else
+ buffer->unsafe_to_break (start - 1, start + 1);
+ }
+
+
+ /*
+ * Finish off the clusters and go home!
+ */
+ if (indic_plan->uniscribe_bug_compatible)
+ {
+ switch ((hb_tag_t) plan->props.script)
+ {
+ case HB_SCRIPT_TAMIL:
+ case HB_SCRIPT_SINHALA:
+ break;
+
+ default:
+ /* Uniscribe merges the entire syllable into a single cluster... Except for Tamil & Sinhala.
+ * This means, half forms are submerged into the main consonant's cluster.
+ * This is unnecessary, and makes cursor positioning harder, but that's what
+ * Uniscribe does. */
+ buffer->merge_clusters (start, end);
+ break;
+ }
+ }
+}
+
+
+static void
+final_reordering_indic (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font HB_UNUSED,
+ hb_buffer_t *buffer)
+{
+ unsigned int count = buffer->len;
+ if (unlikely (!count)) return;
+
+ foreach_syllable (buffer, start, end)
+ final_reordering_syllable_indic (plan, buffer, start, end);
+
+ HB_BUFFER_DEALLOCATE_VAR (buffer, indic_category);
+ HB_BUFFER_DEALLOCATE_VAR (buffer, indic_position);
+}
+
+
+static void
+preprocess_text_indic (const hb_ot_shape_plan_t *plan,
+ hb_buffer_t *buffer,
+ hb_font_t *font)
+{
+ _hb_preprocess_text_vowel_constraints (plan, buffer, font);
+}
+
+static bool
+decompose_indic (const hb_ot_shape_normalize_context_t *c,
+ hb_codepoint_t ab,
+ hb_codepoint_t *a,
+ hb_codepoint_t *b)
+{
+ switch (ab)
+ {
+ /* Don't decompose these. */
+ case 0x0931u : return false; /* DEVANAGARI LETTER RRA */
+ // https://github.com/harfbuzz/harfbuzz/issues/779
+ case 0x09DCu : return false; /* BENGALI LETTER RRA */
+ case 0x09DDu : return false; /* BENGALI LETTER RHA */
+ case 0x0B94u : return false; /* TAMIL LETTER AU */
+
+
+ /*
+ * Decompose split matras that don't have Unicode decompositions.
+ */
+
+#if 0
+ /* Gujarati */
+ /* This one has no decomposition in Unicode, but needs no decomposition either. */
+ /* case 0x0AC9u : return false; */
+
+ /* Oriya */
+ case 0x0B57u : *a = no decomp, -> RIGHT; return true;
+#endif
+ }
+
+ if ((ab == 0x0DDAu || hb_in_range<hb_codepoint_t> (ab, 0x0DDCu, 0x0DDEu)))
+ {
+ /*
+ * Sinhala split matras... Let the fun begin.
+ *
+ * These four characters have Unicode decompositions. However, Uniscribe
+ * decomposes them "Khmer-style", that is, it uses the character itself to
+ * get the second half. The first half of all four decompositions is always
+ * U+0DD9.
+ *
+ * Now, there are buggy fonts, namely, the widely used lklug.ttf, that are
+ * broken with Uniscribe. But we need to support them. As such, we only
+ * do the Uniscribe-style decomposition if the character is transformed into
+ * its "sec.half" form by the 'pstf' feature. Otherwise, we fall back to
+ * Unicode decomposition.
+ *
+ * Note that we can't unconditionally use Unicode decomposition. That would
+ * break some other fonts, that are designed to work with Uniscribe, and
+ * don't have positioning features for the Unicode-style decomposition.
+ *
+ * Argh...
+ *
+ * The Uniscribe behavior is now documented in the newly published Sinhala
+ * spec in 2012:
+ *
+ * https://docs.microsoft.com/en-us/typography/script-development/sinhala#shaping
+ */
+
+
+ const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) c->plan->data;
+ hb_codepoint_t glyph;
+ if (indic_plan->uniscribe_bug_compatible ||
+ (c->font->get_nominal_glyph (ab, &glyph) &&
+ indic_plan->pstf.would_substitute (&glyph, 1, c->font->face)))
+ {
+ /* Ok, safe to use Uniscribe-style decomposition. */
+ *a = 0x0DD9u;
+ *b = ab;
+ return true;
+ }
+ }
+
+ return (bool) c->unicode->decompose (ab, a, b);
+}
+
+static bool
+compose_indic (const hb_ot_shape_normalize_context_t *c,
+ hb_codepoint_t a,
+ hb_codepoint_t b,
+ hb_codepoint_t *ab)
+{
+ /* Avoid recomposing split matras. */
+ if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))
+ return false;
+
+ /* Composition-exclusion exceptions that we want to recompose. */
+ if (a == 0x09AFu && b == 0x09BCu) { *ab = 0x09DFu; return true; }
+
+ return (bool) c->unicode->compose (a, b, ab);
+}
+
+
+const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic =
+{
+ collect_features_indic,
+ override_features_indic,
+ data_create_indic,
+ data_destroy_indic,
+ preprocess_text_indic,
+ nullptr, /* postprocess_glyphs */
+ HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
+ decompose_indic,
+ compose_indic,
+ setup_masks_indic,
+ HB_TAG_NONE, /* gpos_tag */
+ nullptr, /* reorder_marks */
+ HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
+ false, /* fallback_position */
+};
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-indic.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-indic.hh
new file mode 100644
index 0000000000..41bd8bd6cc
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-indic.hh
@@ -0,0 +1,436 @@
+/*
+ * Copyright © 2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_SHAPE_COMPLEX_INDIC_HH
+#define HB_OT_SHAPE_COMPLEX_INDIC_HH
+
+#include "hb.hh"
+
+#include "hb-ot-shape-complex.hh"
+
+
+/* buffer var allocations */
+#define indic_category() complex_var_u8_0() /* indic_category_t */
+#define indic_position() complex_var_u8_1() /* indic_position_t */
+
+
+#define INDIC_TABLE_ELEMENT_TYPE uint16_t
+
+/* Cateories used in the OpenType spec:
+ * https://docs.microsoft.com/en-us/typography/script-development/devanagari
+ */
+/* Note: This enum is duplicated in the -machine.rl source file.
+ * Not sure how to avoid duplication. */
+enum indic_category_t {
+ OT_X = 0,
+ OT_C = 1,
+ OT_V = 2,
+ OT_N = 3,
+ OT_H = 4,
+ OT_ZWNJ = 5,
+ OT_ZWJ = 6,
+ OT_M = 7,
+ OT_SM = 8,
+ /* OT_VD = 9, UNUSED; we use OT_A instead. */
+ OT_A = 10,
+ OT_PLACEHOLDER = 11,
+ OT_DOTTEDCIRCLE = 12,
+ OT_RS = 13, /* Register Shifter, used in Khmer OT spec. */
+ OT_Coeng = 14, /* Khmer-style Virama. */
+ OT_Repha = 15, /* Atomically-encoded logical or visual repha. */
+ OT_Ra = 16,
+ OT_CM = 17, /* Consonant-Medial. */
+ OT_Symbol = 18, /* Avagraha, etc that take marks (SM,A,VD). */
+ OT_CS = 19,
+
+ /* The following are used by Khmer & Myanmar shapers. Defined
+ * here for them to share. */
+ OT_VAbv = 26,
+ OT_VBlw = 27,
+ OT_VPre = 28,
+ OT_VPst = 29,
+};
+
+#define MEDIAL_FLAGS (FLAG (OT_CM))
+
+/* Note:
+ *
+ * We treat Vowels and placeholders as if they were consonants. This is safe because Vowels
+ * cannot happen in a consonant syllable. The plus side however is, we can call the
+ * consonant syllable logic from the vowel syllable function and get it all right! */
+#define CONSONANT_FLAGS (FLAG (OT_C) | FLAG (OT_CS) | FLAG (OT_Ra) | MEDIAL_FLAGS | FLAG (OT_V) | FLAG (OT_PLACEHOLDER) | FLAG (OT_DOTTEDCIRCLE))
+#define JOINER_FLAGS (FLAG (OT_ZWJ) | FLAG (OT_ZWNJ))
+
+
+/* Visual positions in a syllable from left to right. */
+enum indic_position_t {
+ POS_START = 0,
+
+ POS_RA_TO_BECOME_REPH = 1,
+ POS_PRE_M = 2,
+ POS_PRE_C = 3,
+
+ POS_BASE_C = 4,
+ POS_AFTER_MAIN = 5,
+
+ POS_ABOVE_C = 6,
+
+ POS_BEFORE_SUB = 7,
+ POS_BELOW_C = 8,
+ POS_AFTER_SUB = 9,
+
+ POS_BEFORE_POST = 10,
+ POS_POST_C = 11,
+ POS_AFTER_POST = 12,
+
+ POS_FINAL_C = 13,
+ POS_SMVD = 14,
+
+ POS_END = 15
+};
+
+/* Categories used in IndicSyllabicCategory.txt from UCD. */
+enum indic_syllabic_category_t {
+ INDIC_SYLLABIC_CATEGORY_OTHER = OT_X,
+
+ INDIC_SYLLABIC_CATEGORY_AVAGRAHA = OT_Symbol,
+ INDIC_SYLLABIC_CATEGORY_BINDU = OT_SM,
+ INDIC_SYLLABIC_CATEGORY_BRAHMI_JOINING_NUMBER = OT_PLACEHOLDER, /* Don't care. */
+ INDIC_SYLLABIC_CATEGORY_CANTILLATION_MARK = OT_A,
+ INDIC_SYLLABIC_CATEGORY_CONSONANT = OT_C,
+ INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD = OT_C,
+ INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL = OT_CM,
+ INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER = OT_C,
+ INDIC_SYLLABIC_CATEGORY_CONSONANT_KILLER = OT_M, /* U+17CD only. */
+ INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL = OT_CM,
+ INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER = OT_PLACEHOLDER,
+ INDIC_SYLLABIC_CATEGORY_CONSONANT_PRECEDING_REPHA = OT_Repha,
+ INDIC_SYLLABIC_CATEGORY_CONSONANT_PREFIXED = OT_X, /* Don't care. */
+ INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED = OT_CM,
+ INDIC_SYLLABIC_CATEGORY_CONSONANT_SUCCEEDING_REPHA = OT_CM,
+ INDIC_SYLLABIC_CATEGORY_CONSONANT_WITH_STACKER = OT_CS,
+ INDIC_SYLLABIC_CATEGORY_GEMINATION_MARK = OT_SM, /* https://github.com/harfbuzz/harfbuzz/issues/552 */
+ INDIC_SYLLABIC_CATEGORY_INVISIBLE_STACKER = OT_Coeng,
+ INDIC_SYLLABIC_CATEGORY_JOINER = OT_ZWJ,
+ INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER = OT_X,
+ INDIC_SYLLABIC_CATEGORY_NON_JOINER = OT_ZWNJ,
+ INDIC_SYLLABIC_CATEGORY_NUKTA = OT_N,
+ INDIC_SYLLABIC_CATEGORY_NUMBER = OT_PLACEHOLDER,
+ INDIC_SYLLABIC_CATEGORY_NUMBER_JOINER = OT_PLACEHOLDER, /* Don't care. */
+ INDIC_SYLLABIC_CATEGORY_PURE_KILLER = OT_M, /* Is like a vowel matra. */
+ INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER = OT_RS,
+ INDIC_SYLLABIC_CATEGORY_SYLLABLE_MODIFIER = OT_SM,
+ INDIC_SYLLABIC_CATEGORY_TONE_LETTER = OT_X,
+ INDIC_SYLLABIC_CATEGORY_TONE_MARK = OT_N,
+ INDIC_SYLLABIC_CATEGORY_VIRAMA = OT_H,
+ INDIC_SYLLABIC_CATEGORY_VISARGA = OT_SM,
+ INDIC_SYLLABIC_CATEGORY_VOWEL = OT_V,
+ INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT = OT_M,
+ INDIC_SYLLABIC_CATEGORY_VOWEL_INDEPENDENT = OT_V
+};
+
+/* Categories used in IndicSMatraCategory.txt from UCD */
+enum indic_matra_category_t {
+ INDIC_MATRA_CATEGORY_NOT_APPLICABLE = POS_END,
+
+ INDIC_MATRA_CATEGORY_LEFT = POS_PRE_C,
+ INDIC_MATRA_CATEGORY_TOP = POS_ABOVE_C,
+ INDIC_MATRA_CATEGORY_BOTTOM = POS_BELOW_C,
+ INDIC_MATRA_CATEGORY_RIGHT = POS_POST_C,
+
+ /* These should resolve to the position of the last part of the split sequence. */
+ INDIC_MATRA_CATEGORY_BOTTOM_AND_RIGHT = INDIC_MATRA_CATEGORY_RIGHT,
+ INDIC_MATRA_CATEGORY_LEFT_AND_RIGHT = INDIC_MATRA_CATEGORY_RIGHT,
+ INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM = INDIC_MATRA_CATEGORY_BOTTOM,
+ INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_LEFT = INDIC_MATRA_CATEGORY_BOTTOM,
+ INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_RIGHT = INDIC_MATRA_CATEGORY_RIGHT,
+ INDIC_MATRA_CATEGORY_TOP_AND_LEFT = INDIC_MATRA_CATEGORY_TOP,
+ INDIC_MATRA_CATEGORY_TOP_AND_LEFT_AND_RIGHT = INDIC_MATRA_CATEGORY_RIGHT,
+ INDIC_MATRA_CATEGORY_TOP_AND_RIGHT = INDIC_MATRA_CATEGORY_RIGHT,
+
+ INDIC_MATRA_CATEGORY_OVERSTRUCK = POS_AFTER_MAIN,
+ INDIC_MATRA_CATEGORY_VISUAL_ORDER_LEFT = POS_PRE_M
+};
+
+#define INDIC_COMBINE_CATEGORIES(S,M) \
+ ( \
+ ASSERT_STATIC_EXPR_ZERO (S < 255 && M < 255) + \
+ ( S | \
+ ( \
+ ( \
+ S == INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL || \
+ S == INDIC_SYLLABIC_CATEGORY_GEMINATION_MARK || \
+ S == INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER || \
+ S == INDIC_SYLLABIC_CATEGORY_CONSONANT_SUCCEEDING_REPHA || \
+ S == INDIC_SYLLABIC_CATEGORY_VIRAMA || \
+ S == INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT || \
+ false \
+ ? M : INDIC_MATRA_CATEGORY_NOT_APPLICABLE \
+ ) << 8 \
+ ) \
+ ) \
+ )
+
+HB_INTERNAL INDIC_TABLE_ELEMENT_TYPE
+hb_indic_get_categories (hb_codepoint_t u);
+
+
+static inline bool
+is_one_of (const hb_glyph_info_t &info, unsigned int flags)
+{
+ /* If it ligated, all bets are off. */
+ if (_hb_glyph_info_ligated (&info)) return false;
+ return !!(FLAG_UNSAFE (info.indic_category()) & flags);
+}
+
+static inline bool
+is_joiner (const hb_glyph_info_t &info)
+{
+ return is_one_of (info, JOINER_FLAGS);
+}
+
+static inline bool
+is_consonant (const hb_glyph_info_t &info)
+{
+ return is_one_of (info, CONSONANT_FLAGS);
+}
+
+static inline bool
+is_halant (const hb_glyph_info_t &info)
+{
+ return is_one_of (info, FLAG (OT_H));
+}
+
+#define IN_HALF_BLOCK(u, Base) (((u) & ~0x7Fu) == (Base))
+
+#define IS_DEVA(u) (IN_HALF_BLOCK (u, 0x0900u))
+#define IS_BENG(u) (IN_HALF_BLOCK (u, 0x0980u))
+#define IS_GURU(u) (IN_HALF_BLOCK (u, 0x0A00u))
+#define IS_GUJR(u) (IN_HALF_BLOCK (u, 0x0A80u))
+#define IS_ORYA(u) (IN_HALF_BLOCK (u, 0x0B00u))
+#define IS_TAML(u) (IN_HALF_BLOCK (u, 0x0B80u))
+#define IS_TELU(u) (IN_HALF_BLOCK (u, 0x0C00u))
+#define IS_KNDA(u) (IN_HALF_BLOCK (u, 0x0C80u))
+#define IS_MLYM(u) (IN_HALF_BLOCK (u, 0x0D00u))
+#define IS_SINH(u) (IN_HALF_BLOCK (u, 0x0D80u))
+
+
+#define MATRA_POS_LEFT(u) POS_PRE_M
+#define MATRA_POS_RIGHT(u) ( \
+ IS_DEVA(u) ? POS_AFTER_SUB : \
+ IS_BENG(u) ? POS_AFTER_POST : \
+ IS_GURU(u) ? POS_AFTER_POST : \
+ IS_GUJR(u) ? POS_AFTER_POST : \
+ IS_ORYA(u) ? POS_AFTER_POST : \
+ IS_TAML(u) ? POS_AFTER_POST : \
+ IS_TELU(u) ? (u <= 0x0C42u ? POS_BEFORE_SUB : POS_AFTER_SUB) : \
+ IS_KNDA(u) ? (u < 0x0CC3u || u > 0xCD6u ? POS_BEFORE_SUB : POS_AFTER_SUB) : \
+ IS_MLYM(u) ? POS_AFTER_POST : \
+ IS_SINH(u) ? POS_AFTER_SUB : \
+ /*default*/ POS_AFTER_SUB \
+ )
+#define MATRA_POS_TOP(u) ( /* BENG and MLYM don't have top matras. */ \
+ IS_DEVA(u) ? POS_AFTER_SUB : \
+ IS_GURU(u) ? POS_AFTER_POST : /* Deviate from spec */ \
+ IS_GUJR(u) ? POS_AFTER_SUB : \
+ IS_ORYA(u) ? POS_AFTER_MAIN : \
+ IS_TAML(u) ? POS_AFTER_SUB : \
+ IS_TELU(u) ? POS_BEFORE_SUB : \
+ IS_KNDA(u) ? POS_BEFORE_SUB : \
+ IS_SINH(u) ? POS_AFTER_SUB : \
+ /*default*/ POS_AFTER_SUB \
+ )
+#define MATRA_POS_BOTTOM(u) ( \
+ IS_DEVA(u) ? POS_AFTER_SUB : \
+ IS_BENG(u) ? POS_AFTER_SUB : \
+ IS_GURU(u) ? POS_AFTER_POST : \
+ IS_GUJR(u) ? POS_AFTER_POST : \
+ IS_ORYA(u) ? POS_AFTER_SUB : \
+ IS_TAML(u) ? POS_AFTER_POST : \
+ IS_TELU(u) ? POS_BEFORE_SUB : \
+ IS_KNDA(u) ? POS_BEFORE_SUB : \
+ IS_MLYM(u) ? POS_AFTER_POST : \
+ IS_SINH(u) ? POS_AFTER_SUB : \
+ /*default*/ POS_AFTER_SUB \
+ )
+
+static inline indic_position_t
+matra_position_indic (hb_codepoint_t u, indic_position_t side)
+{
+ switch ((int) side)
+ {
+ case POS_PRE_C: return MATRA_POS_LEFT (u);
+ case POS_POST_C: return MATRA_POS_RIGHT (u);
+ case POS_ABOVE_C: return MATRA_POS_TOP (u);
+ case POS_BELOW_C: return MATRA_POS_BOTTOM (u);
+ }
+ return side;
+}
+
+/* XXX
+ * This is a hack for now. We should move this data into the main Indic table.
+ * Or completely remove it and just check in the tables.
+ */
+static const hb_codepoint_t ra_chars[] = {
+ 0x0930u, /* Devanagari */
+ 0x09B0u, /* Bengali */
+ 0x09F0u, /* Bengali */
+ 0x0A30u, /* Gurmukhi */ /* No Reph */
+ 0x0AB0u, /* Gujarati */
+ 0x0B30u, /* Oriya */
+ 0x0BB0u, /* Tamil */ /* No Reph */
+ 0x0C30u, /* Telugu */ /* Reph formed only with ZWJ */
+ 0x0CB0u, /* Kannada */
+ 0x0D30u, /* Malayalam */ /* No Reph, Logical Repha */
+
+ 0x0DBBu, /* Sinhala */ /* Reph formed only with ZWJ */
+
+ 0x179Au, /* Khmer */
+};
+
+static inline bool
+is_ra (hb_codepoint_t u)
+{
+ for (unsigned int i = 0; i < ARRAY_LENGTH (ra_chars); i++)
+ if (u == ra_chars[i])
+ return true;
+ return false;
+}
+
+static inline void
+set_indic_properties (hb_glyph_info_t &info)
+{
+ hb_codepoint_t u = info.codepoint;
+ unsigned int type = hb_indic_get_categories (u);
+ indic_category_t cat = (indic_category_t) (type & 0x7Fu);
+ indic_position_t pos = (indic_position_t) (type >> 8);
+
+
+ /*
+ * Re-assign category
+ */
+
+ /* The following act more like the Bindus. */
+ if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x0953u, 0x0954u)))
+ cat = OT_SM;
+ /* The following act like consonants. */
+ else if (unlikely (hb_in_ranges<hb_codepoint_t> (u, 0x0A72u, 0x0A73u,
+ 0x1CF5u, 0x1CF6u)))
+ cat = OT_C;
+ /* TODO: The following should only be allowed after a Visarga.
+ * For now, just treat them like regular tone marks. */
+ else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x1CE2u, 0x1CE8u)))
+ cat = OT_A;
+ /* TODO: The following should only be allowed after some of
+ * the nasalization marks, maybe only for U+1CE9..U+1CF1.
+ * For now, just treat them like tone marks. */
+ else if (unlikely (u == 0x1CEDu))
+ cat = OT_A;
+ /* The following take marks in standalone clusters, similar to Avagraha. */
+ else if (unlikely (hb_in_ranges<hb_codepoint_t> (u, 0xA8F2u, 0xA8F7u,
+ 0x1CE9u, 0x1CECu,
+ 0x1CEEu, 0x1CF1u)))
+ {
+ cat = OT_Symbol;
+ static_assert (((int) INDIC_SYLLABIC_CATEGORY_AVAGRAHA == OT_Symbol), "");
+ }
+ else if (unlikely (u == 0x0A51u))
+ {
+ /* https://github.com/harfbuzz/harfbuzz/issues/524 */
+ cat = OT_M;
+ pos = POS_BELOW_C;
+ }
+
+ /* According to ScriptExtensions.txt, these Grantha marks may also be used in Tamil,
+ * so the Indic shaper needs to know their categories. */
+ else if (unlikely (u == 0x11301u || u == 0x11303u)) cat = OT_SM;
+ else if (unlikely (u == 0x1133Bu || u == 0x1133Cu)) cat = OT_N;
+
+ else if (unlikely (u == 0x0AFBu)) cat = OT_N; /* https://github.com/harfbuzz/harfbuzz/issues/552 */
+
+ else if (unlikely (u == 0x0980u)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/issues/538 */
+ else if (unlikely (u == 0x09FCu)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/pull/1613 */
+ else if (unlikely (u == 0x0C80u)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/pull/623 */
+ else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x2010u, 0x2011u)))
+ cat = OT_PLACEHOLDER;
+ else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE;
+
+
+ /*
+ * Re-assign position.
+ */
+
+ if ((FLAG_UNSAFE (cat) & CONSONANT_FLAGS))
+ {
+ pos = POS_BASE_C;
+ if (is_ra (u))
+ cat = OT_Ra;
+ }
+ else if (cat == OT_M)
+ {
+ pos = matra_position_indic (u, pos);
+ }
+ else if ((FLAG_UNSAFE (cat) & (FLAG (OT_SM) /* | FLAG (OT_VD) */ | FLAG (OT_A) | FLAG (OT_Symbol))))
+ {
+ pos = POS_SMVD;
+ }
+
+ if (unlikely (u == 0x0B01u)) pos = POS_BEFORE_SUB; /* Oriya Bindu is BeforeSub in the spec. */
+
+
+
+ info.indic_category() = cat;
+ info.indic_position() = pos;
+}
+
+struct hb_indic_would_substitute_feature_t
+{
+ void init (const hb_ot_map_t *map, hb_tag_t feature_tag, bool zero_context_)
+ {
+ zero_context = zero_context_;
+ map->get_stage_lookups (0/*GSUB*/,
+ map->get_feature_stage (0/*GSUB*/, feature_tag),
+ &lookups, &count);
+ }
+
+ bool would_substitute (const hb_codepoint_t *glyphs,
+ unsigned int glyphs_count,
+ hb_face_t *face) const
+ {
+ for (unsigned int i = 0; i < count; i++)
+ if (hb_ot_layout_lookup_would_substitute (face, lookups[i].index, glyphs, glyphs_count, zero_context))
+ return true;
+ return false;
+ }
+
+ private:
+ const hb_ot_map_t::lookup_map_t *lookups;
+ unsigned int count;
+ bool zero_context;
+};
+
+
+#endif /* HB_OT_SHAPE_COMPLEX_INDIC_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer-machine.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer-machine.hh
new file mode 100644
index 0000000000..a040318d34
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer-machine.hh
@@ -0,0 +1,372 @@
+
+#line 1 "hb-ot-shape-complex-khmer-machine.rl"
+/*
+ * Copyright © 2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_SHAPE_COMPLEX_KHMER_MACHINE_HH
+#define HB_OT_SHAPE_COMPLEX_KHMER_MACHINE_HH
+
+#include "hb.hh"
+
+
+#line 36 "hb-ot-shape-complex-khmer-machine.hh"
+static const unsigned char _khmer_syllable_machine_trans_keys[] = {
+ 5u, 26u, 5u, 21u, 5u, 26u, 5u, 21u, 1u, 16u, 5u, 21u, 5u, 26u, 5u, 21u,
+ 5u, 26u, 5u, 21u, 5u, 21u, 5u, 26u, 5u, 21u, 1u, 16u, 5u, 21u, 5u, 26u,
+ 5u, 21u, 5u, 26u, 5u, 21u, 5u, 26u, 1u, 29u, 5u, 29u, 5u, 29u, 5u, 29u,
+ 22u, 22u, 5u, 22u, 5u, 29u, 5u, 29u, 5u, 29u, 1u, 16u, 5u, 26u, 5u, 29u,
+ 5u, 29u, 22u, 22u, 5u, 22u, 5u, 29u, 5u, 29u, 1u, 16u, 5u, 29u, 5u, 29u,
+ 0
+};
+
+static const char _khmer_syllable_machine_key_spans[] = {
+ 22, 17, 22, 17, 16, 17, 22, 17,
+ 22, 17, 17, 22, 17, 16, 17, 22,
+ 17, 22, 17, 22, 29, 25, 25, 25,
+ 1, 18, 25, 25, 25, 16, 22, 25,
+ 25, 1, 18, 25, 25, 16, 25, 25
+};
+
+static const short _khmer_syllable_machine_index_offsets[] = {
+ 0, 23, 41, 64, 82, 99, 117, 140,
+ 158, 181, 199, 217, 240, 258, 275, 293,
+ 316, 334, 357, 375, 398, 428, 454, 480,
+ 506, 508, 527, 553, 579, 605, 622, 645,
+ 671, 697, 699, 718, 744, 770, 787, 813
+};
+
+static const char _khmer_syllable_machine_indicies[] = {
+ 1, 1, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 2,
+ 3, 0, 0, 0, 0, 4, 0, 1,
+ 1, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 3,
+ 0, 1, 1, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 3, 0, 0, 0, 0, 4, 0,
+ 5, 5, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 4, 0, 6, 6, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 6, 0, 7, 7, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 8, 0, 9, 9, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 10, 0, 0,
+ 0, 0, 4, 0, 9, 9, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 10, 0, 11, 11,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 12, 0,
+ 0, 0, 0, 4, 0, 11, 11, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 12, 0, 14,
+ 14, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 15,
+ 13, 14, 14, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 15, 16, 16, 16, 16, 17, 16,
+ 18, 18, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 17, 16, 19, 19, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 19, 16, 20, 20, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 21, 16, 22, 22, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 23, 16, 16,
+ 16, 16, 17, 16, 22, 22, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 23, 16, 24, 24,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 25, 16,
+ 16, 16, 16, 17, 16, 24, 24, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 25, 16, 14,
+ 14, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 26, 15,
+ 16, 16, 16, 16, 17, 16, 28, 28,
+ 27, 27, 29, 29, 27, 27, 27, 27,
+ 2, 2, 27, 30, 27, 28, 27, 27,
+ 27, 27, 15, 19, 27, 27, 27, 17,
+ 23, 25, 21, 27, 32, 32, 31, 31,
+ 31, 31, 31, 31, 31, 33, 31, 31,
+ 31, 31, 31, 2, 3, 6, 31, 31,
+ 31, 4, 10, 12, 8, 31, 34, 34,
+ 31, 31, 31, 31, 31, 31, 31, 35,
+ 31, 31, 31, 31, 31, 31, 3, 6,
+ 31, 31, 31, 4, 10, 12, 8, 31,
+ 5, 5, 31, 31, 31, 31, 31, 31,
+ 31, 35, 31, 31, 31, 31, 31, 31,
+ 4, 6, 31, 31, 31, 31, 31, 31,
+ 8, 31, 6, 31, 7, 7, 31, 31,
+ 31, 31, 31, 31, 31, 35, 31, 31,
+ 31, 31, 31, 31, 8, 6, 31, 36,
+ 36, 31, 31, 31, 31, 31, 31, 31,
+ 35, 31, 31, 31, 31, 31, 31, 10,
+ 6, 31, 31, 31, 4, 31, 31, 8,
+ 31, 37, 37, 31, 31, 31, 31, 31,
+ 31, 31, 35, 31, 31, 31, 31, 31,
+ 31, 12, 6, 31, 31, 31, 4, 10,
+ 31, 8, 31, 34, 34, 31, 31, 31,
+ 31, 31, 31, 31, 33, 31, 31, 31,
+ 31, 31, 31, 3, 6, 31, 31, 31,
+ 4, 10, 12, 8, 31, 28, 28, 31,
+ 31, 31, 31, 31, 31, 31, 31, 31,
+ 31, 31, 31, 31, 28, 31, 14, 14,
+ 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38, 15, 38,
+ 38, 38, 38, 17, 38, 40, 40, 39,
+ 39, 39, 39, 39, 39, 39, 41, 39,
+ 39, 39, 39, 39, 39, 15, 19, 39,
+ 39, 39, 17, 23, 25, 21, 39, 18,
+ 18, 39, 39, 39, 39, 39, 39, 39,
+ 41, 39, 39, 39, 39, 39, 39, 17,
+ 19, 39, 39, 39, 39, 39, 39, 21,
+ 39, 19, 39, 20, 20, 39, 39, 39,
+ 39, 39, 39, 39, 41, 39, 39, 39,
+ 39, 39, 39, 21, 19, 39, 42, 42,
+ 39, 39, 39, 39, 39, 39, 39, 41,
+ 39, 39, 39, 39, 39, 39, 23, 19,
+ 39, 39, 39, 17, 39, 39, 21, 39,
+ 43, 43, 39, 39, 39, 39, 39, 39,
+ 39, 41, 39, 39, 39, 39, 39, 39,
+ 25, 19, 39, 39, 39, 17, 23, 39,
+ 21, 39, 44, 44, 39, 39, 39, 39,
+ 39, 39, 39, 39, 39, 39, 39, 39,
+ 39, 44, 39, 45, 45, 39, 39, 39,
+ 39, 39, 39, 39, 30, 39, 39, 39,
+ 39, 39, 26, 15, 19, 39, 39, 39,
+ 17, 23, 25, 21, 39, 40, 40, 39,
+ 39, 39, 39, 39, 39, 39, 30, 39,
+ 39, 39, 39, 39, 39, 15, 19, 39,
+ 39, 39, 17, 23, 25, 21, 39, 0
+};
+
+static const char _khmer_syllable_machine_trans_targs[] = {
+ 20, 1, 28, 22, 23, 3, 24, 5,
+ 25, 7, 26, 9, 27, 20, 10, 31,
+ 20, 32, 12, 33, 14, 34, 16, 35,
+ 18, 36, 39, 20, 21, 30, 37, 20,
+ 0, 29, 2, 4, 6, 8, 20, 20,
+ 11, 13, 15, 17, 38, 19
+};
+
+static const char _khmer_syllable_machine_trans_actions[] = {
+ 1, 0, 2, 2, 2, 0, 0, 0,
+ 2, 0, 2, 0, 2, 3, 0, 4,
+ 5, 2, 0, 0, 0, 2, 0, 2,
+ 0, 2, 4, 8, 2, 9, 0, 10,
+ 0, 0, 0, 0, 0, 0, 11, 12,
+ 0, 0, 0, 0, 4, 0
+};
+
+static const char _khmer_syllable_machine_to_state_actions[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 6, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+static const char _khmer_syllable_machine_from_state_actions[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 7, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+static const unsigned char _khmer_syllable_machine_eof_trans[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 14, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 0, 32, 32, 32,
+ 32, 32, 32, 32, 32, 32, 39, 40,
+ 40, 40, 40, 40, 40, 40, 40, 40
+};
+
+static const int khmer_syllable_machine_start = 20;
+static const int khmer_syllable_machine_first_final = 20;
+static const int khmer_syllable_machine_error = -1;
+
+static const int khmer_syllable_machine_en_main = 20;
+
+
+#line 36 "hb-ot-shape-complex-khmer-machine.rl"
+
+
+
+#line 80 "hb-ot-shape-complex-khmer-machine.rl"
+
+
+#define found_syllable(syllable_type) \
+ HB_STMT_START { \
+ if (0) fprintf (stderr, "syllable %d..%d %s\n", ts, te, #syllable_type); \
+ for (unsigned int i = ts; i < te; i++) \
+ info[i].syllable() = (syllable_serial << 4) | khmer_##syllable_type; \
+ syllable_serial++; \
+ if (unlikely (syllable_serial == 16)) syllable_serial = 1; \
+ } HB_STMT_END
+
+static void
+find_syllables_khmer (hb_buffer_t *buffer)
+{
+ unsigned int p, pe, eof, ts, te, act HB_UNUSED;
+ int cs;
+ hb_glyph_info_t *info = buffer->info;
+
+#line 242 "hb-ot-shape-complex-khmer-machine.hh"
+ {
+ cs = khmer_syllable_machine_start;
+ ts = 0;
+ te = 0;
+ act = 0;
+ }
+
+#line 100 "hb-ot-shape-complex-khmer-machine.rl"
+
+
+ p = 0;
+ pe = eof = buffer->len;
+
+ unsigned int syllable_serial = 1;
+
+#line 258 "hb-ot-shape-complex-khmer-machine.hh"
+ {
+ int _slen;
+ int _trans;
+ const unsigned char *_keys;
+ const char *_inds;
+ if ( p == pe )
+ goto _test_eof;
+_resume:
+ switch ( _khmer_syllable_machine_from_state_actions[cs] ) {
+ case 7:
+#line 1 "NONE"
+ {ts = p;}
+ break;
+#line 272 "hb-ot-shape-complex-khmer-machine.hh"
+ }
+
+ _keys = _khmer_syllable_machine_trans_keys + (cs<<1);
+ _inds = _khmer_syllable_machine_indicies + _khmer_syllable_machine_index_offsets[cs];
+
+ _slen = _khmer_syllable_machine_key_spans[cs];
+ _trans = _inds[ _slen > 0 && _keys[0] <=( info[p].khmer_category()) &&
+ ( info[p].khmer_category()) <= _keys[1] ?
+ ( info[p].khmer_category()) - _keys[0] : _slen ];
+
+_eof_trans:
+ cs = _khmer_syllable_machine_trans_targs[_trans];
+
+ if ( _khmer_syllable_machine_trans_actions[_trans] == 0 )
+ goto _again;
+
+ switch ( _khmer_syllable_machine_trans_actions[_trans] ) {
+ case 2:
+#line 1 "NONE"
+ {te = p+1;}
+ break;
+ case 8:
+#line 76 "hb-ot-shape-complex-khmer-machine.rl"
+ {te = p+1;{ found_syllable (non_khmer_cluster); }}
+ break;
+ case 10:
+#line 74 "hb-ot-shape-complex-khmer-machine.rl"
+ {te = p;p--;{ found_syllable (consonant_syllable); }}
+ break;
+ case 12:
+#line 75 "hb-ot-shape-complex-khmer-machine.rl"
+ {te = p;p--;{ found_syllable (broken_cluster); }}
+ break;
+ case 11:
+#line 76 "hb-ot-shape-complex-khmer-machine.rl"
+ {te = p;p--;{ found_syllable (non_khmer_cluster); }}
+ break;
+ case 1:
+#line 74 "hb-ot-shape-complex-khmer-machine.rl"
+ {{p = ((te))-1;}{ found_syllable (consonant_syllable); }}
+ break;
+ case 5:
+#line 75 "hb-ot-shape-complex-khmer-machine.rl"
+ {{p = ((te))-1;}{ found_syllable (broken_cluster); }}
+ break;
+ case 3:
+#line 1 "NONE"
+ { switch( act ) {
+ case 2:
+ {{p = ((te))-1;} found_syllable (broken_cluster); }
+ break;
+ case 3:
+ {{p = ((te))-1;} found_syllable (non_khmer_cluster); }
+ break;
+ }
+ }
+ break;
+ case 4:
+#line 1 "NONE"
+ {te = p+1;}
+#line 75 "hb-ot-shape-complex-khmer-machine.rl"
+ {act = 2;}
+ break;
+ case 9:
+#line 1 "NONE"
+ {te = p+1;}
+#line 76 "hb-ot-shape-complex-khmer-machine.rl"
+ {act = 3;}
+ break;
+#line 342 "hb-ot-shape-complex-khmer-machine.hh"
+ }
+
+_again:
+ switch ( _khmer_syllable_machine_to_state_actions[cs] ) {
+ case 6:
+#line 1 "NONE"
+ {ts = 0;}
+ break;
+#line 351 "hb-ot-shape-complex-khmer-machine.hh"
+ }
+
+ if ( ++p != pe )
+ goto _resume;
+ _test_eof: {}
+ if ( p == eof )
+ {
+ if ( _khmer_syllable_machine_eof_trans[cs] > 0 ) {
+ _trans = _khmer_syllable_machine_eof_trans[cs] - 1;
+ goto _eof_trans;
+ }
+ }
+
+ }
+
+#line 108 "hb-ot-shape-complex-khmer-machine.rl"
+
+}
+
+#undef found_syllable
+
+#endif /* HB_OT_SHAPE_COMPLEX_KHMER_MACHINE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer-machine.rl b/thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer-machine.rl
new file mode 100644
index 0000000000..e7f14533dd
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer-machine.rl
@@ -0,0 +1,113 @@
+/*
+ * Copyright © 2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_SHAPE_COMPLEX_KHMER_MACHINE_HH
+#define HB_OT_SHAPE_COMPLEX_KHMER_MACHINE_HH
+
+#include "hb.hh"
+
+%%{
+ machine khmer_syllable_machine;
+ alphtype unsigned char;
+ write data;
+}%%
+
+%%{
+
+# Same order as enum khmer_category_t. Not sure how to avoid duplication.
+C = 1;
+V = 2;
+ZWNJ = 5;
+ZWJ = 6;
+PLACEHOLDER = 11;
+DOTTEDCIRCLE = 12;
+Coeng= 14;
+Ra = 16;
+Robatic = 20;
+Xgroup = 21;
+Ygroup = 22;
+VAbv = 26;
+VBlw = 27;
+VPre = 28;
+VPst = 29;
+
+c = (C | Ra | V);
+cn = c.((ZWJ|ZWNJ)?.Robatic)?;
+joiner = (ZWJ | ZWNJ);
+xgroup = (joiner*.Xgroup)*;
+ygroup = Ygroup*;
+
+# This grammar was experimentally extracted from what Uniscribe allows.
+
+matra_group = VPre? xgroup VBlw? xgroup (joiner?.VAbv)? xgroup VPst?;
+syllable_tail = xgroup matra_group xgroup (Coeng.c)? ygroup;
+
+
+broken_cluster = (Coeng.cn)* (Coeng | syllable_tail);
+consonant_syllable = (cn|PLACEHOLDER|DOTTEDCIRCLE) broken_cluster;
+other = any;
+
+main := |*
+ consonant_syllable => { found_syllable (consonant_syllable); };
+ broken_cluster => { found_syllable (broken_cluster); };
+ other => { found_syllable (non_khmer_cluster); };
+*|;
+
+
+}%%
+
+#define found_syllable(syllable_type) \
+ HB_STMT_START { \
+ if (0) fprintf (stderr, "syllable %d..%d %s\n", ts, te, #syllable_type); \
+ for (unsigned int i = ts; i < te; i++) \
+ info[i].syllable() = (syllable_serial << 4) | khmer_##syllable_type; \
+ syllable_serial++; \
+ if (unlikely (syllable_serial == 16)) syllable_serial = 1; \
+ } HB_STMT_END
+
+static void
+find_syllables_khmer (hb_buffer_t *buffer)
+{
+ unsigned int p, pe, eof, ts, te, act HB_UNUSED;
+ int cs;
+ hb_glyph_info_t *info = buffer->info;
+ %%{
+ write init;
+ getkey info[p].khmer_category();
+ }%%
+
+ p = 0;
+ pe = eof = buffer->len;
+
+ unsigned int syllable_serial = 1;
+ %%{
+ write exec;
+ }%%
+}
+
+#undef found_syllable
+
+#endif /* HB_OT_SHAPE_COMPLEX_KHMER_MACHINE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer.cc
new file mode 100644
index 0000000000..3da8374899
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer.cc
@@ -0,0 +1,457 @@
+/*
+ * Copyright © 2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_OT_SHAPE
+
+#include "hb-ot-shape-complex-khmer.hh"
+#include "hb-ot-layout.hh"
+
+
+/*
+ * Khmer shaper.
+ */
+
+static const hb_ot_map_feature_t
+khmer_features[] =
+{
+ /*
+ * Basic features.
+ * These features are applied in order, one at a time, after reordering.
+ */
+ {HB_TAG('p','r','e','f'), F_MANUAL_JOINERS},
+ {HB_TAG('b','l','w','f'), F_MANUAL_JOINERS},
+ {HB_TAG('a','b','v','f'), F_MANUAL_JOINERS},
+ {HB_TAG('p','s','t','f'), F_MANUAL_JOINERS},
+ {HB_TAG('c','f','a','r'), F_MANUAL_JOINERS},
+ /*
+ * Other features.
+ * These features are applied all at once after clearing syllables.
+ */
+ {HB_TAG('p','r','e','s'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('a','b','v','s'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('b','l','w','s'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('p','s','t','s'), F_GLOBAL_MANUAL_JOINERS},
+};
+
+/*
+ * Must be in the same order as the khmer_features array.
+ */
+enum {
+ KHMER_PREF,
+ KHMER_BLWF,
+ KHMER_ABVF,
+ KHMER_PSTF,
+ KHMER_CFAR,
+
+ _KHMER_PRES,
+ _KHMER_ABVS,
+ _KHMER_BLWS,
+ _KHMER_PSTS,
+
+ KHMER_NUM_FEATURES,
+ KHMER_BASIC_FEATURES = _KHMER_PRES, /* Don't forget to update this! */
+};
+
+static void
+setup_syllables_khmer (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer);
+static void
+reorder_khmer (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer);
+
+static void
+collect_features_khmer (hb_ot_shape_planner_t *plan)
+{
+ hb_ot_map_builder_t *map = &plan->map;
+
+ /* Do this before any lookups have been applied. */
+ map->add_gsub_pause (setup_syllables_khmer);
+ map->add_gsub_pause (reorder_khmer);
+
+ /* Testing suggests that Uniscribe does NOT pause between basic
+ * features. Test with KhmerUI.ttf and the following three
+ * sequences:
+ *
+ * U+1789,U+17BC
+ * U+1789,U+17D2,U+1789
+ * U+1789,U+17D2,U+1789,U+17BC
+ *
+ * https://github.com/harfbuzz/harfbuzz/issues/974
+ */
+ map->enable_feature (HB_TAG('l','o','c','l'));
+ map->enable_feature (HB_TAG('c','c','m','p'));
+
+ unsigned int i = 0;
+ for (; i < KHMER_BASIC_FEATURES; i++)
+ map->add_feature (khmer_features[i]);
+
+ map->add_gsub_pause (_hb_clear_syllables);
+
+ for (; i < KHMER_NUM_FEATURES; i++)
+ map->add_feature (khmer_features[i]);
+}
+
+static void
+override_features_khmer (hb_ot_shape_planner_t *plan)
+{
+ hb_ot_map_builder_t *map = &plan->map;
+
+ /* Khmer spec has 'clig' as part of required shaping features:
+ * "Apply feature 'clig' to form ligatures that are desired for
+ * typographical correctness.", hence in overrides... */
+ map->enable_feature (HB_TAG('c','l','i','g'));
+
+ /* Uniscribe does not apply 'kern' in Khmer. */
+ if (hb_options ().uniscribe_bug_compatible)
+ {
+ map->disable_feature (HB_TAG('k','e','r','n'));
+ }
+
+ map->disable_feature (HB_TAG('l','i','g','a'));
+}
+
+
+struct khmer_shape_plan_t
+{
+ bool get_virama_glyph (hb_font_t *font, hb_codepoint_t *pglyph) const
+ {
+ hb_codepoint_t glyph = virama_glyph;
+ if (unlikely (virama_glyph == (hb_codepoint_t) -1))
+ {
+ if (!font->get_nominal_glyph (0x17D2u, &glyph))
+ glyph = 0;
+ /* Technically speaking, the spec says we should apply 'locl' to virama too.
+ * Maybe one day... */
+
+ /* Our get_nominal_glyph() function needs a font, so we can't get the virama glyph
+ * during shape planning... Instead, overwrite it here. It's safe. Don't worry! */
+ virama_glyph = glyph;
+ }
+
+ *pglyph = glyph;
+ return glyph != 0;
+ }
+
+ mutable hb_codepoint_t virama_glyph;
+
+ hb_mask_t mask_array[KHMER_NUM_FEATURES];
+};
+
+static void *
+data_create_khmer (const hb_ot_shape_plan_t *plan)
+{
+ khmer_shape_plan_t *khmer_plan = (khmer_shape_plan_t *) calloc (1, sizeof (khmer_shape_plan_t));
+ if (unlikely (!khmer_plan))
+ return nullptr;
+
+ khmer_plan->virama_glyph = (hb_codepoint_t) -1;
+
+ for (unsigned int i = 0; i < ARRAY_LENGTH (khmer_plan->mask_array); i++)
+ khmer_plan->mask_array[i] = (khmer_features[i].flags & F_GLOBAL) ?
+ 0 : plan->map.get_1_mask (khmer_features[i].tag);
+
+ return khmer_plan;
+}
+
+static void
+data_destroy_khmer (void *data)
+{
+ free (data);
+}
+
+
+enum khmer_syllable_type_t {
+ khmer_consonant_syllable,
+ khmer_broken_cluster,
+ khmer_non_khmer_cluster,
+};
+
+#include "hb-ot-shape-complex-khmer-machine.hh"
+
+static void
+setup_masks_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED,
+ hb_buffer_t *buffer,
+ hb_font_t *font HB_UNUSED)
+{
+ HB_BUFFER_ALLOCATE_VAR (buffer, khmer_category);
+
+ /* We cannot setup masks here. We save information about characters
+ * and setup masks later on in a pause-callback. */
+
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ for (unsigned int i = 0; i < count; i++)
+ set_khmer_properties (info[i]);
+}
+
+static void
+setup_syllables_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED,
+ hb_font_t *font HB_UNUSED,
+ hb_buffer_t *buffer)
+{
+ find_syllables_khmer (buffer);
+ foreach_syllable (buffer, start, end)
+ buffer->unsafe_to_break (start, end);
+}
+
+
+/* Rules from:
+ * https://docs.microsoft.com/en-us/typography/script-development/devanagari */
+
+static void
+reorder_consonant_syllable (const hb_ot_shape_plan_t *plan,
+ hb_face_t *face HB_UNUSED,
+ hb_buffer_t *buffer,
+ unsigned int start, unsigned int end)
+{
+ const khmer_shape_plan_t *khmer_plan = (const khmer_shape_plan_t *) plan->data;
+ hb_glyph_info_t *info = buffer->info;
+
+ /* Setup masks. */
+ {
+ /* Post-base */
+ hb_mask_t mask = khmer_plan->mask_array[KHMER_BLWF] |
+ khmer_plan->mask_array[KHMER_ABVF] |
+ khmer_plan->mask_array[KHMER_PSTF];
+ for (unsigned int i = start + 1; i < end; i++)
+ info[i].mask |= mask;
+ }
+
+ unsigned int num_coengs = 0;
+ for (unsigned int i = start + 1; i < end; i++)
+ {
+ /* """
+ * When a COENG + (Cons | IndV) combination are found (and subscript count
+ * is less than two) the character combination is handled according to the
+ * subscript type of the character following the COENG.
+ *
+ * ...
+ *
+ * Subscript Type 2 - The COENG + RO characters are reordered to immediately
+ * before the base glyph. Then the COENG + RO characters are assigned to have
+ * the 'pref' OpenType feature applied to them.
+ * """
+ */
+ if (info[i].khmer_category() == OT_Coeng && num_coengs <= 2 && i + 1 < end)
+ {
+ num_coengs++;
+
+ if (info[i + 1].khmer_category() == OT_Ra)
+ {
+ for (unsigned int j = 0; j < 2; j++)
+ info[i + j].mask |= khmer_plan->mask_array[KHMER_PREF];
+
+ /* Move the Coeng,Ro sequence to the start. */
+ buffer->merge_clusters (start, i + 2);
+ hb_glyph_info_t t0 = info[i];
+ hb_glyph_info_t t1 = info[i + 1];
+ memmove (&info[start + 2], &info[start], (i - start) * sizeof (info[0]));
+ info[start] = t0;
+ info[start + 1] = t1;
+
+ /* Mark the subsequent stuff with 'cfar'. Used in Khmer.
+ * Read the feature spec.
+ * This allows distinguishing the following cases with MS Khmer fonts:
+ * U+1784,U+17D2,U+179A,U+17D2,U+1782
+ * U+1784,U+17D2,U+1782,U+17D2,U+179A
+ */
+ if (khmer_plan->mask_array[KHMER_CFAR])
+ for (unsigned int j = i + 2; j < end; j++)
+ info[j].mask |= khmer_plan->mask_array[KHMER_CFAR];
+
+ num_coengs = 2; /* Done. */
+ }
+ }
+
+ /* Reorder left matra piece. */
+ else if (info[i].khmer_category() == OT_VPre)
+ {
+ /* Move to the start. */
+ buffer->merge_clusters (start, i + 1);
+ hb_glyph_info_t t = info[i];
+ memmove (&info[start + 1], &info[start], (i - start) * sizeof (info[0]));
+ info[start] = t;
+ }
+ }
+}
+
+static void
+reorder_syllable_khmer (const hb_ot_shape_plan_t *plan,
+ hb_face_t *face,
+ hb_buffer_t *buffer,
+ unsigned int start, unsigned int end)
+{
+ khmer_syllable_type_t syllable_type = (khmer_syllable_type_t) (buffer->info[start].syllable() & 0x0F);
+ switch (syllable_type)
+ {
+ case khmer_broken_cluster: /* We already inserted dotted-circles, so just call the consonant_syllable. */
+ case khmer_consonant_syllable:
+ reorder_consonant_syllable (plan, face, buffer, start, end);
+ break;
+
+ case khmer_non_khmer_cluster:
+ break;
+ }
+}
+
+static inline void
+insert_dotted_circles_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED,
+ hb_font_t *font,
+ hb_buffer_t *buffer)
+{
+ if (unlikely (buffer->flags & HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE))
+ return;
+
+ /* Note: This loop is extra overhead, but should not be measurable.
+ * TODO Use a buffer scratch flag to remove the loop. */
+ bool has_broken_syllables = false;
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ for (unsigned int i = 0; i < count; i++)
+ if ((info[i].syllable() & 0x0F) == khmer_broken_cluster)
+ {
+ has_broken_syllables = true;
+ break;
+ }
+ if (likely (!has_broken_syllables))
+ return;
+
+
+ hb_codepoint_t dottedcircle_glyph;
+ if (!font->get_nominal_glyph (0x25CCu, &dottedcircle_glyph))
+ return;
+
+ hb_glyph_info_t dottedcircle = {0};
+ dottedcircle.codepoint = 0x25CCu;
+ set_khmer_properties (dottedcircle);
+ dottedcircle.codepoint = dottedcircle_glyph;
+
+ buffer->clear_output ();
+
+ buffer->idx = 0;
+ unsigned int last_syllable = 0;
+ while (buffer->idx < buffer->len && buffer->successful)
+ {
+ unsigned int syllable = buffer->cur().syllable();
+ khmer_syllable_type_t syllable_type = (khmer_syllable_type_t) (syllable & 0x0F);
+ if (unlikely (last_syllable != syllable && syllable_type == khmer_broken_cluster))
+ {
+ last_syllable = syllable;
+
+ hb_glyph_info_t ginfo = dottedcircle;
+ ginfo.cluster = buffer->cur().cluster;
+ ginfo.mask = buffer->cur().mask;
+ ginfo.syllable() = buffer->cur().syllable();
+
+ /* Insert dottedcircle after possible Repha. */
+ while (buffer->idx < buffer->len && buffer->successful &&
+ last_syllable == buffer->cur().syllable() &&
+ buffer->cur().khmer_category() == OT_Repha)
+ buffer->next_glyph ();
+
+ buffer->output_info (ginfo);
+ }
+ else
+ buffer->next_glyph ();
+ }
+ buffer->swap_buffers ();
+}
+
+static void
+reorder_khmer (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer)
+{
+ insert_dotted_circles_khmer (plan, font, buffer);
+
+ foreach_syllable (buffer, start, end)
+ reorder_syllable_khmer (plan, font->face, buffer, start, end);
+
+ HB_BUFFER_DEALLOCATE_VAR (buffer, khmer_category);
+}
+
+
+static bool
+decompose_khmer (const hb_ot_shape_normalize_context_t *c,
+ hb_codepoint_t ab,
+ hb_codepoint_t *a,
+ hb_codepoint_t *b)
+{
+ switch (ab)
+ {
+ /*
+ * Decompose split matras that don't have Unicode decompositions.
+ */
+
+ /* Khmer */
+ case 0x17BEu : *a = 0x17C1u; *b= 0x17BEu; return true;
+ case 0x17BFu : *a = 0x17C1u; *b= 0x17BFu; return true;
+ case 0x17C0u : *a = 0x17C1u; *b= 0x17C0u; return true;
+ case 0x17C4u : *a = 0x17C1u; *b= 0x17C4u; return true;
+ case 0x17C5u : *a = 0x17C1u; *b= 0x17C5u; return true;
+ }
+
+ return (bool) c->unicode->decompose (ab, a, b);
+}
+
+static bool
+compose_khmer (const hb_ot_shape_normalize_context_t *c,
+ hb_codepoint_t a,
+ hb_codepoint_t b,
+ hb_codepoint_t *ab)
+{
+ /* Avoid recomposing split matras. */
+ if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))
+ return false;
+
+ return (bool) c->unicode->compose (a, b, ab);
+}
+
+
+const hb_ot_complex_shaper_t _hb_ot_complex_shaper_khmer =
+{
+ collect_features_khmer,
+ override_features_khmer,
+ data_create_khmer,
+ data_destroy_khmer,
+ nullptr, /* preprocess_text */
+ nullptr, /* postprocess_glyphs */
+ HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
+ decompose_khmer,
+ compose_khmer,
+ setup_masks_khmer,
+ HB_TAG_NONE, /* gpos_tag */
+ nullptr, /* reorder_marks */
+ HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
+ false, /* fallback_position */
+};
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer.hh
new file mode 100644
index 0000000000..11a77bfd4b
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer.hh
@@ -0,0 +1,113 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_SHAPE_COMPLEX_KHMER_HH
+#define HB_OT_SHAPE_COMPLEX_KHMER_HH
+
+#include "hb.hh"
+
+#include "hb-ot-shape-complex-indic.hh"
+
+
+/* buffer var allocations */
+#define khmer_category() indic_category() /* khmer_category_t */
+
+
+/* Note: This enum is duplicated in the -machine.rl source file.
+ * Not sure how to avoid duplication. */
+enum khmer_category_t
+{
+ OT_Robatic = 20,
+ OT_Xgroup = 21,
+ OT_Ygroup = 22,
+ //OT_VAbv = 26,
+ //OT_VBlw = 27,
+ //OT_VPre = 28,
+ //OT_VPst = 29,
+};
+
+static inline void
+set_khmer_properties (hb_glyph_info_t &info)
+{
+ hb_codepoint_t u = info.codepoint;
+ unsigned int type = hb_indic_get_categories (u);
+ khmer_category_t cat = (khmer_category_t) (type & 0x7Fu);
+ indic_position_t pos = (indic_position_t) (type >> 8);
+
+
+ /*
+ * Re-assign category
+ *
+ * These categories are experimentally extracted from what Uniscribe allows.
+ */
+ switch (u)
+ {
+ case 0x179Au:
+ cat = (khmer_category_t) OT_Ra;
+ break;
+
+ case 0x17CCu:
+ case 0x17C9u:
+ case 0x17CAu:
+ cat = OT_Robatic;
+ break;
+
+ case 0x17C6u:
+ case 0x17CBu:
+ case 0x17CDu:
+ case 0x17CEu:
+ case 0x17CFu:
+ case 0x17D0u:
+ case 0x17D1u:
+ cat = OT_Xgroup;
+ break;
+
+ case 0x17C7u:
+ case 0x17C8u:
+ case 0x17DDu:
+ case 0x17D3u: /* Just guessing. Uniscribe doesn't categorize it. */
+ cat = OT_Ygroup;
+ break;
+ }
+
+ /*
+ * Re-assign position.
+ */
+ if (cat == (khmer_category_t) OT_M)
+ switch ((int) pos)
+ {
+ case POS_PRE_C: cat = (khmer_category_t) OT_VPre; break;
+ case POS_BELOW_C: cat = (khmer_category_t) OT_VBlw; break;
+ case POS_ABOVE_C: cat = (khmer_category_t) OT_VAbv; break;
+ case POS_POST_C: cat = (khmer_category_t) OT_VPst; break;
+ default: assert (0);
+ }
+
+ info.khmer_category() = cat;
+}
+
+
+#endif /* HB_OT_SHAPE_COMPLEX_KHMER_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-myanmar-machine.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-myanmar-machine.hh
new file mode 100644
index 0000000000..c2f4c0045c
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-myanmar-machine.hh
@@ -0,0 +1,430 @@
+
+#line 1 "hb-ot-shape-complex-myanmar-machine.rl"
+/*
+ * Copyright © 2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_SHAPE_COMPLEX_MYANMAR_MACHINE_HH
+#define HB_OT_SHAPE_COMPLEX_MYANMAR_MACHINE_HH
+
+#include "hb.hh"
+
+
+#line 36 "hb-ot-shape-complex-myanmar-machine.hh"
+static const unsigned char _myanmar_syllable_machine_trans_keys[] = {
+ 1u, 32u, 3u, 30u, 5u, 29u, 5u, 8u, 5u, 29u, 3u, 25u, 5u, 25u, 5u, 25u,
+ 3u, 29u, 3u, 29u, 3u, 29u, 3u, 29u, 1u, 16u, 3u, 29u, 3u, 29u, 3u, 29u,
+ 3u, 29u, 3u, 29u, 3u, 30u, 3u, 29u, 3u, 29u, 3u, 29u, 3u, 29u, 3u, 29u,
+ 5u, 29u, 5u, 8u, 5u, 29u, 3u, 25u, 5u, 25u, 5u, 25u, 3u, 29u, 3u, 29u,
+ 3u, 29u, 3u, 29u, 1u, 16u, 3u, 30u, 3u, 29u, 3u, 29u, 3u, 29u, 3u, 29u,
+ 3u, 29u, 3u, 30u, 3u, 29u, 3u, 29u, 3u, 29u, 3u, 29u, 3u, 29u, 3u, 30u,
+ 3u, 29u, 1u, 32u, 1u, 32u, 8u, 8u, 0
+};
+
+static const char _myanmar_syllable_machine_key_spans[] = {
+ 32, 28, 25, 4, 25, 23, 21, 21,
+ 27, 27, 27, 27, 16, 27, 27, 27,
+ 27, 27, 28, 27, 27, 27, 27, 27,
+ 25, 4, 25, 23, 21, 21, 27, 27,
+ 27, 27, 16, 28, 27, 27, 27, 27,
+ 27, 28, 27, 27, 27, 27, 27, 28,
+ 27, 32, 32, 1
+};
+
+static const short _myanmar_syllable_machine_index_offsets[] = {
+ 0, 33, 62, 88, 93, 119, 143, 165,
+ 187, 215, 243, 271, 299, 316, 344, 372,
+ 400, 428, 456, 485, 513, 541, 569, 597,
+ 625, 651, 656, 682, 706, 728, 750, 778,
+ 806, 834, 862, 879, 908, 936, 964, 992,
+ 1020, 1048, 1077, 1105, 1133, 1161, 1189, 1217,
+ 1246, 1274, 1307, 1340
+};
+
+static const char _myanmar_syllable_machine_indicies[] = {
+ 1, 1, 2, 3, 4, 4, 0, 5,
+ 0, 6, 1, 0, 0, 0, 0, 7,
+ 0, 8, 9, 0, 10, 11, 12, 13,
+ 14, 15, 16, 17, 18, 19, 20, 1,
+ 0, 22, 23, 24, 24, 21, 25, 21,
+ 26, 21, 21, 21, 21, 21, 21, 21,
+ 27, 21, 21, 28, 29, 30, 31, 32,
+ 33, 34, 35, 36, 37, 21, 24, 24,
+ 21, 25, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 38, 21, 21, 21, 21,
+ 21, 21, 32, 21, 21, 21, 36, 21,
+ 24, 24, 21, 25, 21, 24, 24, 21,
+ 25, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 32, 21, 21, 21, 36, 21, 39,
+ 21, 24, 24, 21, 25, 21, 32, 21,
+ 21, 21, 21, 21, 21, 21, 40, 21,
+ 21, 21, 21, 21, 21, 32, 21, 24,
+ 24, 21, 25, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 40, 21, 21, 21,
+ 21, 21, 21, 32, 21, 24, 24, 21,
+ 25, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 32, 21, 22, 21, 24, 24, 21,
+ 25, 21, 26, 21, 21, 21, 21, 21,
+ 21, 21, 41, 21, 21, 41, 21, 21,
+ 21, 32, 42, 21, 21, 36, 21, 22,
+ 21, 24, 24, 21, 25, 21, 26, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 32, 21, 21,
+ 21, 36, 21, 22, 21, 24, 24, 21,
+ 25, 21, 26, 21, 21, 21, 21, 21,
+ 21, 21, 41, 21, 21, 21, 21, 21,
+ 21, 32, 42, 21, 21, 36, 21, 22,
+ 21, 24, 24, 21, 25, 21, 26, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 32, 42, 21,
+ 21, 36, 21, 1, 1, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 1, 21, 22, 21, 24, 24,
+ 21, 25, 21, 26, 21, 21, 21, 21,
+ 21, 21, 21, 27, 21, 21, 28, 29,
+ 30, 31, 32, 33, 34, 35, 36, 21,
+ 22, 21, 24, 24, 21, 25, 21, 26,
+ 21, 21, 21, 21, 21, 21, 21, 43,
+ 21, 21, 21, 21, 21, 21, 32, 33,
+ 34, 35, 36, 21, 22, 21, 24, 24,
+ 21, 25, 21, 26, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 32, 33, 34, 35, 36, 21,
+ 22, 21, 24, 24, 21, 25, 21, 26,
+ 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 32, 33,
+ 34, 21, 36, 21, 22, 21, 24, 24,
+ 21, 25, 21, 26, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 32, 21, 34, 21, 36, 21,
+ 22, 21, 24, 24, 21, 25, 21, 26,
+ 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 32, 33,
+ 34, 35, 36, 43, 21, 22, 21, 24,
+ 24, 21, 25, 21, 26, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 28,
+ 21, 30, 21, 32, 33, 34, 35, 36,
+ 21, 22, 21, 24, 24, 21, 25, 21,
+ 26, 21, 21, 21, 21, 21, 21, 21,
+ 43, 21, 21, 28, 21, 21, 21, 32,
+ 33, 34, 35, 36, 21, 22, 21, 24,
+ 24, 21, 25, 21, 26, 21, 21, 21,
+ 21, 21, 21, 21, 44, 21, 21, 28,
+ 29, 30, 21, 32, 33, 34, 35, 36,
+ 21, 22, 21, 24, 24, 21, 25, 21,
+ 26, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 28, 29, 30, 21, 32,
+ 33, 34, 35, 36, 21, 22, 23, 24,
+ 24, 21, 25, 21, 26, 21, 21, 21,
+ 21, 21, 21, 21, 27, 21, 21, 28,
+ 29, 30, 31, 32, 33, 34, 35, 36,
+ 21, 46, 46, 45, 5, 45, 45, 45,
+ 45, 45, 45, 45, 45, 45, 47, 45,
+ 45, 45, 45, 45, 45, 14, 45, 45,
+ 45, 18, 45, 46, 46, 45, 5, 45,
+ 46, 46, 45, 5, 45, 45, 45, 45,
+ 45, 45, 45, 45, 45, 45, 45, 45,
+ 45, 45, 45, 45, 14, 45, 45, 45,
+ 18, 45, 48, 45, 46, 46, 45, 5,
+ 45, 14, 45, 45, 45, 45, 45, 45,
+ 45, 49, 45, 45, 45, 45, 45, 45,
+ 14, 45, 46, 46, 45, 5, 45, 45,
+ 45, 45, 45, 45, 45, 45, 45, 49,
+ 45, 45, 45, 45, 45, 45, 14, 45,
+ 46, 46, 45, 5, 45, 45, 45, 45,
+ 45, 45, 45, 45, 45, 45, 45, 45,
+ 45, 45, 45, 45, 14, 45, 2, 45,
+ 46, 46, 45, 5, 45, 6, 45, 45,
+ 45, 45, 45, 45, 45, 50, 45, 45,
+ 50, 45, 45, 45, 14, 51, 45, 45,
+ 18, 45, 2, 45, 46, 46, 45, 5,
+ 45, 6, 45, 45, 45, 45, 45, 45,
+ 45, 45, 45, 45, 45, 45, 45, 45,
+ 14, 45, 45, 45, 18, 45, 2, 45,
+ 46, 46, 45, 5, 45, 6, 45, 45,
+ 45, 45, 45, 45, 45, 50, 45, 45,
+ 45, 45, 45, 45, 14, 51, 45, 45,
+ 18, 45, 2, 45, 46, 46, 45, 5,
+ 45, 6, 45, 45, 45, 45, 45, 45,
+ 45, 45, 45, 45, 45, 45, 45, 45,
+ 14, 51, 45, 45, 18, 45, 52, 52,
+ 45, 45, 45, 45, 45, 45, 45, 45,
+ 45, 45, 45, 45, 45, 52, 45, 2,
+ 3, 46, 46, 45, 5, 45, 6, 45,
+ 45, 45, 45, 45, 45, 45, 8, 45,
+ 45, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 45, 2, 45, 46, 46,
+ 45, 5, 45, 6, 45, 45, 45, 45,
+ 45, 45, 45, 8, 45, 45, 10, 11,
+ 12, 13, 14, 15, 16, 17, 18, 45,
+ 2, 45, 46, 46, 45, 5, 45, 6,
+ 45, 45, 45, 45, 45, 45, 45, 53,
+ 45, 45, 45, 45, 45, 45, 14, 15,
+ 16, 17, 18, 45, 2, 45, 46, 46,
+ 45, 5, 45, 6, 45, 45, 45, 45,
+ 45, 45, 45, 45, 45, 45, 45, 45,
+ 45, 45, 14, 15, 16, 17, 18, 45,
+ 2, 45, 46, 46, 45, 5, 45, 6,
+ 45, 45, 45, 45, 45, 45, 45, 45,
+ 45, 45, 45, 45, 45, 45, 14, 15,
+ 16, 45, 18, 45, 2, 45, 46, 46,
+ 45, 5, 45, 6, 45, 45, 45, 45,
+ 45, 45, 45, 45, 45, 45, 45, 45,
+ 45, 45, 14, 45, 16, 45, 18, 45,
+ 2, 45, 46, 46, 45, 5, 45, 6,
+ 45, 45, 45, 45, 45, 45, 45, 45,
+ 45, 45, 45, 45, 45, 45, 14, 15,
+ 16, 17, 18, 53, 45, 2, 45, 46,
+ 46, 45, 5, 45, 6, 45, 45, 45,
+ 45, 45, 45, 45, 45, 45, 45, 10,
+ 45, 12, 45, 14, 15, 16, 17, 18,
+ 45, 2, 45, 46, 46, 45, 5, 45,
+ 6, 45, 45, 45, 45, 45, 45, 45,
+ 53, 45, 45, 10, 45, 45, 45, 14,
+ 15, 16, 17, 18, 45, 2, 45, 46,
+ 46, 45, 5, 45, 6, 45, 45, 45,
+ 45, 45, 45, 45, 54, 45, 45, 10,
+ 11, 12, 45, 14, 15, 16, 17, 18,
+ 45, 2, 45, 46, 46, 45, 5, 45,
+ 6, 45, 45, 45, 45, 45, 45, 45,
+ 45, 45, 45, 10, 11, 12, 45, 14,
+ 15, 16, 17, 18, 45, 2, 3, 46,
+ 46, 45, 5, 45, 6, 45, 45, 45,
+ 45, 45, 45, 45, 8, 45, 45, 10,
+ 11, 12, 13, 14, 15, 16, 17, 18,
+ 45, 22, 23, 24, 24, 21, 25, 21,
+ 26, 21, 21, 21, 21, 21, 21, 21,
+ 55, 21, 21, 28, 29, 30, 31, 32,
+ 33, 34, 35, 36, 37, 21, 22, 56,
+ 24, 24, 21, 25, 21, 26, 21, 21,
+ 21, 21, 21, 21, 21, 27, 21, 21,
+ 28, 29, 30, 31, 32, 33, 34, 35,
+ 36, 21, 1, 1, 2, 3, 46, 46,
+ 45, 5, 45, 6, 1, 45, 45, 45,
+ 45, 1, 45, 8, 45, 45, 10, 11,
+ 12, 13, 14, 15, 16, 17, 18, 19,
+ 45, 1, 45, 1, 1, 57, 57, 57,
+ 57, 57, 57, 57, 57, 1, 57, 57,
+ 57, 57, 1, 57, 57, 57, 57, 57,
+ 57, 57, 57, 57, 57, 57, 57, 57,
+ 57, 57, 1, 57, 58, 57, 0
+};
+
+static const char _myanmar_syllable_machine_trans_targs[] = {
+ 0, 1, 24, 34, 0, 25, 31, 47,
+ 36, 50, 37, 42, 43, 44, 27, 39,
+ 40, 41, 30, 46, 51, 0, 2, 12,
+ 0, 3, 9, 13, 14, 19, 20, 21,
+ 5, 16, 17, 18, 8, 23, 4, 6,
+ 7, 10, 11, 15, 22, 0, 0, 26,
+ 28, 29, 32, 33, 35, 38, 45, 48,
+ 49, 0, 0
+};
+
+static const char _myanmar_syllable_machine_trans_actions[] = {
+ 3, 0, 0, 0, 4, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 5, 0, 0,
+ 6, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 7, 8, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 9, 10
+};
+
+static const char _myanmar_syllable_machine_to_state_actions[] = {
+ 1, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+
+static const char _myanmar_syllable_machine_from_state_actions[] = {
+ 2, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+
+static const short _myanmar_syllable_machine_eof_trans[] = {
+ 0, 22, 22, 22, 22, 22, 22, 22,
+ 22, 22, 22, 22, 22, 22, 22, 22,
+ 22, 22, 22, 22, 22, 22, 22, 22,
+ 46, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 46, 46, 46, 46, 46, 22,
+ 22, 46, 58, 58
+};
+
+static const int myanmar_syllable_machine_start = 0;
+static const int myanmar_syllable_machine_first_final = 0;
+static const int myanmar_syllable_machine_error = -1;
+
+static const int myanmar_syllable_machine_en_main = 0;
+
+
+#line 36 "hb-ot-shape-complex-myanmar-machine.rl"
+
+
+
+#line 94 "hb-ot-shape-complex-myanmar-machine.rl"
+
+
+#define found_syllable(syllable_type) \
+ HB_STMT_START { \
+ if (0) fprintf (stderr, "syllable %d..%d %s\n", ts, te, #syllable_type); \
+ for (unsigned int i = ts; i < te; i++) \
+ info[i].syllable() = (syllable_serial << 4) | myanmar_##syllable_type; \
+ syllable_serial++; \
+ if (unlikely (syllable_serial == 16)) syllable_serial = 1; \
+ } HB_STMT_END
+
+static void
+find_syllables_myanmar (hb_buffer_t *buffer)
+{
+ unsigned int p, pe, eof, ts, te, act HB_UNUSED;
+ int cs;
+ hb_glyph_info_t *info = buffer->info;
+
+#line 320 "hb-ot-shape-complex-myanmar-machine.hh"
+ {
+ cs = myanmar_syllable_machine_start;
+ ts = 0;
+ te = 0;
+ act = 0;
+ }
+
+#line 114 "hb-ot-shape-complex-myanmar-machine.rl"
+
+
+ p = 0;
+ pe = eof = buffer->len;
+
+ unsigned int syllable_serial = 1;
+
+#line 336 "hb-ot-shape-complex-myanmar-machine.hh"
+ {
+ int _slen;
+ int _trans;
+ const unsigned char *_keys;
+ const char *_inds;
+ if ( p == pe )
+ goto _test_eof;
+_resume:
+ switch ( _myanmar_syllable_machine_from_state_actions[cs] ) {
+ case 2:
+#line 1 "NONE"
+ {ts = p;}
+ break;
+#line 350 "hb-ot-shape-complex-myanmar-machine.hh"
+ }
+
+ _keys = _myanmar_syllable_machine_trans_keys + (cs<<1);
+ _inds = _myanmar_syllable_machine_indicies + _myanmar_syllable_machine_index_offsets[cs];
+
+ _slen = _myanmar_syllable_machine_key_spans[cs];
+ _trans = _inds[ _slen > 0 && _keys[0] <=( info[p].myanmar_category()) &&
+ ( info[p].myanmar_category()) <= _keys[1] ?
+ ( info[p].myanmar_category()) - _keys[0] : _slen ];
+
+_eof_trans:
+ cs = _myanmar_syllable_machine_trans_targs[_trans];
+
+ if ( _myanmar_syllable_machine_trans_actions[_trans] == 0 )
+ goto _again;
+
+ switch ( _myanmar_syllable_machine_trans_actions[_trans] ) {
+ case 6:
+#line 86 "hb-ot-shape-complex-myanmar-machine.rl"
+ {te = p+1;{ found_syllable (consonant_syllable); }}
+ break;
+ case 4:
+#line 87 "hb-ot-shape-complex-myanmar-machine.rl"
+ {te = p+1;{ found_syllable (non_myanmar_cluster); }}
+ break;
+ case 10:
+#line 88 "hb-ot-shape-complex-myanmar-machine.rl"
+ {te = p+1;{ found_syllable (punctuation_cluster); }}
+ break;
+ case 8:
+#line 89 "hb-ot-shape-complex-myanmar-machine.rl"
+ {te = p+1;{ found_syllable (broken_cluster); }}
+ break;
+ case 3:
+#line 90 "hb-ot-shape-complex-myanmar-machine.rl"
+ {te = p+1;{ found_syllable (non_myanmar_cluster); }}
+ break;
+ case 5:
+#line 86 "hb-ot-shape-complex-myanmar-machine.rl"
+ {te = p;p--;{ found_syllable (consonant_syllable); }}
+ break;
+ case 7:
+#line 89 "hb-ot-shape-complex-myanmar-machine.rl"
+ {te = p;p--;{ found_syllable (broken_cluster); }}
+ break;
+ case 9:
+#line 90 "hb-ot-shape-complex-myanmar-machine.rl"
+ {te = p;p--;{ found_syllable (non_myanmar_cluster); }}
+ break;
+#line 400 "hb-ot-shape-complex-myanmar-machine.hh"
+ }
+
+_again:
+ switch ( _myanmar_syllable_machine_to_state_actions[cs] ) {
+ case 1:
+#line 1 "NONE"
+ {ts = 0;}
+ break;
+#line 409 "hb-ot-shape-complex-myanmar-machine.hh"
+ }
+
+ if ( ++p != pe )
+ goto _resume;
+ _test_eof: {}
+ if ( p == eof )
+ {
+ if ( _myanmar_syllable_machine_eof_trans[cs] > 0 ) {
+ _trans = _myanmar_syllable_machine_eof_trans[cs] - 1;
+ goto _eof_trans;
+ }
+ }
+
+ }
+
+#line 122 "hb-ot-shape-complex-myanmar-machine.rl"
+
+}
+
+#undef found_syllable
+
+#endif /* HB_OT_SHAPE_COMPLEX_MYANMAR_MACHINE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-myanmar.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-myanmar.cc
new file mode 100644
index 0000000000..fc3490d716
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-myanmar.cc
@@ -0,0 +1,387 @@
+/*
+ * Copyright © 2011,2012,2013 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_OT_SHAPE
+
+#include "hb-ot-shape-complex-myanmar.hh"
+
+
+/*
+ * Myanmar shaper.
+ */
+
+static const hb_tag_t
+myanmar_basic_features[] =
+{
+ /*
+ * Basic features.
+ * These features are applied in order, one at a time, after reordering.
+ */
+ HB_TAG('r','p','h','f'),
+ HB_TAG('p','r','e','f'),
+ HB_TAG('b','l','w','f'),
+ HB_TAG('p','s','t','f'),
+};
+static const hb_tag_t
+myanmar_other_features[] =
+{
+ /*
+ * Other features.
+ * These features are applied all at once, after clearing syllables.
+ */
+ HB_TAG('p','r','e','s'),
+ HB_TAG('a','b','v','s'),
+ HB_TAG('b','l','w','s'),
+ HB_TAG('p','s','t','s'),
+};
+
+static void
+setup_syllables_myanmar (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer);
+static void
+reorder_myanmar (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer);
+
+static void
+collect_features_myanmar (hb_ot_shape_planner_t *plan)
+{
+ hb_ot_map_builder_t *map = &plan->map;
+
+ /* Do this before any lookups have been applied. */
+ map->add_gsub_pause (setup_syllables_myanmar);
+
+ map->enable_feature (HB_TAG('l','o','c','l'));
+ /* The Indic specs do not require ccmp, but we apply it here since if
+ * there is a use of it, it's typically at the beginning. */
+ map->enable_feature (HB_TAG('c','c','m','p'));
+
+
+ map->add_gsub_pause (reorder_myanmar);
+
+ for (unsigned int i = 0; i < ARRAY_LENGTH (myanmar_basic_features); i++)
+ {
+ map->enable_feature (myanmar_basic_features[i], F_MANUAL_ZWJ);
+ map->add_gsub_pause (nullptr);
+ }
+
+ map->add_gsub_pause (_hb_clear_syllables);
+
+ for (unsigned int i = 0; i < ARRAY_LENGTH (myanmar_other_features); i++)
+ map->enable_feature (myanmar_other_features[i], F_MANUAL_ZWJ);
+}
+
+static void
+override_features_myanmar (hb_ot_shape_planner_t *plan)
+{
+ plan->map.disable_feature (HB_TAG('l','i','g','a'));
+}
+
+
+enum myanmar_syllable_type_t {
+ myanmar_consonant_syllable,
+ myanmar_punctuation_cluster,
+ myanmar_broken_cluster,
+ myanmar_non_myanmar_cluster,
+};
+
+#include "hb-ot-shape-complex-myanmar-machine.hh"
+
+
+static void
+setup_masks_myanmar (const hb_ot_shape_plan_t *plan HB_UNUSED,
+ hb_buffer_t *buffer,
+ hb_font_t *font HB_UNUSED)
+{
+ HB_BUFFER_ALLOCATE_VAR (buffer, myanmar_category);
+ HB_BUFFER_ALLOCATE_VAR (buffer, myanmar_position);
+
+ /* We cannot setup masks here. We save information about characters
+ * and setup masks later on in a pause-callback. */
+
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ for (unsigned int i = 0; i < count; i++)
+ set_myanmar_properties (info[i]);
+}
+
+static void
+setup_syllables_myanmar (const hb_ot_shape_plan_t *plan HB_UNUSED,
+ hb_font_t *font HB_UNUSED,
+ hb_buffer_t *buffer)
+{
+ find_syllables_myanmar (buffer);
+ foreach_syllable (buffer, start, end)
+ buffer->unsafe_to_break (start, end);
+}
+
+static int
+compare_myanmar_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb)
+{
+ int a = pa->myanmar_position();
+ int b = pb->myanmar_position();
+
+ return a < b ? -1 : a == b ? 0 : +1;
+}
+
+
+/* Rules from:
+ * https://docs.microsoft.com/en-us/typography/script-development/myanmar */
+
+static void
+initial_reordering_consonant_syllable (hb_buffer_t *buffer,
+ unsigned int start, unsigned int end)
+{
+ hb_glyph_info_t *info = buffer->info;
+
+ unsigned int base = end;
+ bool has_reph = false;
+
+ {
+ unsigned int limit = start;
+ if (start + 3 <= end &&
+ info[start ].myanmar_category() == OT_Ra &&
+ info[start+1].myanmar_category() == OT_As &&
+ info[start+2].myanmar_category() == OT_H)
+ {
+ limit += 3;
+ base = start;
+ has_reph = true;
+ }
+
+ {
+ if (!has_reph)
+ base = limit;
+
+ for (unsigned int i = limit; i < end; i++)
+ if (is_consonant (info[i]))
+ {
+ base = i;
+ break;
+ }
+ }
+ }
+
+ /* Reorder! */
+ {
+ unsigned int i = start;
+ for (; i < start + (has_reph ? 3 : 0); i++)
+ info[i].myanmar_position() = POS_AFTER_MAIN;
+ for (; i < base; i++)
+ info[i].myanmar_position() = POS_PRE_C;
+ if (i < end)
+ {
+ info[i].myanmar_position() = POS_BASE_C;
+ i++;
+ }
+ indic_position_t pos = POS_AFTER_MAIN;
+ /* The following loop may be ugly, but it implements all of
+ * Myanmar reordering! */
+ for (; i < end; i++)
+ {
+ if (info[i].myanmar_category() == OT_MR) /* Pre-base reordering */
+ {
+ info[i].myanmar_position() = POS_PRE_C;
+ continue;
+ }
+ if (info[i].myanmar_position() < POS_BASE_C) /* Left matra */
+ {
+ continue;
+ }
+ if (info[i].myanmar_category() == OT_VS)
+ {
+ info[i].myanmar_position() = info[i - 1].myanmar_position();
+ continue;
+ }
+
+ if (pos == POS_AFTER_MAIN && info[i].myanmar_category() == OT_VBlw)
+ {
+ pos = POS_BELOW_C;
+ info[i].myanmar_position() = pos;
+ continue;
+ }
+
+ if (pos == POS_BELOW_C && info[i].myanmar_category() == OT_A)
+ {
+ info[i].myanmar_position() = POS_BEFORE_SUB;
+ continue;
+ }
+ if (pos == POS_BELOW_C && info[i].myanmar_category() == OT_VBlw)
+ {
+ info[i].myanmar_position() = pos;
+ continue;
+ }
+ if (pos == POS_BELOW_C && info[i].myanmar_category() != OT_A)
+ {
+ pos = POS_AFTER_SUB;
+ info[i].myanmar_position() = pos;
+ continue;
+ }
+ info[i].myanmar_position() = pos;
+ }
+ }
+
+ /* Sit tight, rock 'n roll! */
+ buffer->sort (start, end, compare_myanmar_order);
+}
+
+static void
+reorder_syllable_myanmar (const hb_ot_shape_plan_t *plan HB_UNUSED,
+ hb_face_t *face HB_UNUSED,
+ hb_buffer_t *buffer,
+ unsigned int start, unsigned int end)
+{
+ myanmar_syllable_type_t syllable_type = (myanmar_syllable_type_t) (buffer->info[start].syllable() & 0x0F);
+ switch (syllable_type) {
+
+ case myanmar_broken_cluster: /* We already inserted dotted-circles, so just call the consonant_syllable. */
+ case myanmar_consonant_syllable:
+ initial_reordering_consonant_syllable (buffer, start, end);
+ break;
+
+ case myanmar_punctuation_cluster:
+ case myanmar_non_myanmar_cluster:
+ break;
+ }
+}
+
+static inline void
+insert_dotted_circles_myanmar (const hb_ot_shape_plan_t *plan HB_UNUSED,
+ hb_font_t *font,
+ hb_buffer_t *buffer)
+{
+ if (unlikely (buffer->flags & HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE))
+ return;
+
+ /* Note: This loop is extra overhead, but should not be measurable.
+ * TODO Use a buffer scratch flag to remove the loop. */
+ bool has_broken_syllables = false;
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ for (unsigned int i = 0; i < count; i++)
+ if ((info[i].syllable() & 0x0F) == myanmar_broken_cluster)
+ {
+ has_broken_syllables = true;
+ break;
+ }
+ if (likely (!has_broken_syllables))
+ return;
+
+
+ hb_codepoint_t dottedcircle_glyph;
+ if (!font->get_nominal_glyph (0x25CCu, &dottedcircle_glyph))
+ return;
+
+ hb_glyph_info_t dottedcircle = {0};
+ dottedcircle.codepoint = 0x25CCu;
+ set_myanmar_properties (dottedcircle);
+ dottedcircle.codepoint = dottedcircle_glyph;
+
+ buffer->clear_output ();
+
+ buffer->idx = 0;
+ unsigned int last_syllable = 0;
+ while (buffer->idx < buffer->len && buffer->successful)
+ {
+ unsigned int syllable = buffer->cur().syllable();
+ myanmar_syllable_type_t syllable_type = (myanmar_syllable_type_t) (syllable & 0x0F);
+ if (unlikely (last_syllable != syllable && syllable_type == myanmar_broken_cluster))
+ {
+ last_syllable = syllable;
+
+ hb_glyph_info_t ginfo = dottedcircle;
+ ginfo.cluster = buffer->cur().cluster;
+ ginfo.mask = buffer->cur().mask;
+ ginfo.syllable() = buffer->cur().syllable();
+
+ buffer->output_info (ginfo);
+ }
+ else
+ buffer->next_glyph ();
+ }
+ buffer->swap_buffers ();
+}
+
+static void
+reorder_myanmar (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer)
+{
+ insert_dotted_circles_myanmar (plan, font, buffer);
+
+ foreach_syllable (buffer, start, end)
+ reorder_syllable_myanmar (plan, font->face, buffer, start, end);
+
+ HB_BUFFER_DEALLOCATE_VAR (buffer, myanmar_category);
+ HB_BUFFER_DEALLOCATE_VAR (buffer, myanmar_position);
+}
+
+
+const hb_ot_complex_shaper_t _hb_ot_complex_shaper_myanmar =
+{
+ collect_features_myanmar,
+ override_features_myanmar,
+ nullptr, /* data_create */
+ nullptr, /* data_destroy */
+ nullptr, /* preprocess_text */
+ nullptr, /* postprocess_glyphs */
+ HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
+ nullptr, /* decompose */
+ nullptr, /* compose */
+ setup_masks_myanmar,
+ HB_TAG_NONE, /* gpos_tag */
+ nullptr, /* reorder_marks */
+ HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
+ false, /* fallback_position */
+};
+
+
+/* Ugly Zawgyi encoding.
+ * Disable all auto processing.
+ * https://github.com/harfbuzz/harfbuzz/issues/1162 */
+const hb_ot_complex_shaper_t _hb_ot_complex_shaper_myanmar_zawgyi =
+{
+ nullptr, /* collect_features */
+ nullptr, /* override_features */
+ nullptr, /* data_create */
+ nullptr, /* data_destroy */
+ nullptr, /* preprocess_text */
+ nullptr, /* postprocess_glyphs */
+ HB_OT_SHAPE_NORMALIZATION_MODE_NONE,
+ nullptr, /* decompose */
+ nullptr, /* compose */
+ nullptr, /* setup_masks */
+ HB_TAG_NONE, /* gpos_tag */
+ nullptr, /* reorder_marks */
+ HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
+ false, /* fallback_position */
+};
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-myanmar.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-myanmar.hh
new file mode 100644
index 0000000000..7b9821e6ba
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-myanmar.hh
@@ -0,0 +1,171 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_SHAPE_COMPLEX_MYANMAR_HH
+#define HB_OT_SHAPE_COMPLEX_MYANMAR_HH
+
+#include "hb.hh"
+
+#include "hb-ot-shape-complex-indic.hh"
+
+
+/* buffer var allocations */
+#define myanmar_category() indic_category() /* myanmar_category_t */
+#define myanmar_position() indic_position() /* myanmar_position_t */
+
+
+/* Note: This enum is duplicated in the -machine.rl source file.
+ * Not sure how to avoid duplication. */
+enum myanmar_category_t {
+ OT_As = 18, /* Asat */
+ OT_D0 = 20, /* Digit zero */
+ OT_DB = OT_N, /* Dot below */
+ OT_GB = OT_PLACEHOLDER,
+ OT_MH = 21, /* Various consonant medial types */
+ OT_MR = 22, /* Various consonant medial types */
+ OT_MW = 23, /* Various consonant medial types */
+ OT_MY = 24, /* Various consonant medial types */
+ OT_PT = 25, /* Pwo and other tones */
+ //OT_VAbv = 26,
+ //OT_VBlw = 27,
+ //OT_VPre = 28,
+ //OT_VPst = 29,
+ OT_VS = 30, /* Variation selectors */
+ OT_P = 31, /* Punctuation */
+ OT_D = 32, /* Digits except zero */
+};
+
+
+static inline void
+set_myanmar_properties (hb_glyph_info_t &info)
+{
+ hb_codepoint_t u = info.codepoint;
+ unsigned int type = hb_indic_get_categories (u);
+ unsigned int cat = type & 0x7Fu;
+ indic_position_t pos = (indic_position_t) (type >> 8);
+
+ /* Myanmar
+ * https://docs.microsoft.com/en-us/typography/script-development/myanmar#analyze
+ */
+ if (unlikely (hb_in_range<hb_codepoint_t> (u, 0xFE00u, 0xFE0Fu)))
+ cat = OT_VS;
+
+ switch (u)
+ {
+ case 0x104Eu:
+ cat = OT_C; /* The spec says C, IndicSyllableCategory doesn't have. */
+ break;
+
+ case 0x002Du: case 0x00A0u: case 0x00D7u: case 0x2012u:
+ case 0x2013u: case 0x2014u: case 0x2015u: case 0x2022u:
+ case 0x25CCu: case 0x25FBu: case 0x25FCu: case 0x25FDu:
+ case 0x25FEu:
+ cat = OT_GB;
+ break;
+
+ case 0x1004u: case 0x101Bu: case 0x105Au:
+ cat = OT_Ra;
+ break;
+
+ case 0x1032u: case 0x1036u:
+ cat = OT_A;
+ break;
+
+ case 0x1039u:
+ cat = OT_H;
+ break;
+
+ case 0x103Au:
+ cat = OT_As;
+ break;
+
+ case 0x1041u: case 0x1042u: case 0x1043u: case 0x1044u:
+ case 0x1045u: case 0x1046u: case 0x1047u: case 0x1048u:
+ case 0x1049u: case 0x1090u: case 0x1091u: case 0x1092u:
+ case 0x1093u: case 0x1094u: case 0x1095u: case 0x1096u:
+ case 0x1097u: case 0x1098u: case 0x1099u:
+ cat = OT_D;
+ break;
+
+ case 0x1040u:
+ cat = OT_D; /* XXX The spec says D0, but Uniscribe doesn't seem to do. */
+ break;
+
+ case 0x103Eu: case 0x1060u:
+ cat = OT_MH;
+ break;
+
+ case 0x103Cu:
+ cat = OT_MR;
+ break;
+
+ case 0x103Du: case 0x1082u:
+ cat = OT_MW;
+ break;
+
+ case 0x103Bu: case 0x105Eu: case 0x105Fu:
+ cat = OT_MY;
+ break;
+
+ case 0x1063u: case 0x1064u: case 0x1069u: case 0x106Au:
+ case 0x106Bu: case 0x106Cu: case 0x106Du: case 0xAA7Bu:
+ cat = OT_PT;
+ break;
+
+ case 0x1038u: case 0x1087u: case 0x1088u: case 0x1089u:
+ case 0x108Au: case 0x108Bu: case 0x108Cu: case 0x108Du:
+ case 0x108Fu: case 0x109Au: case 0x109Bu: case 0x109Cu:
+ cat = OT_SM;
+ break;
+
+ case 0x104Au: case 0x104Bu:
+ cat = OT_P;
+ break;
+
+ case 0xAA74u: case 0xAA75u: case 0xAA76u:
+ /* https://github.com/harfbuzz/harfbuzz/issues/218 */
+ cat = OT_C;
+ break;
+ }
+
+ if (cat == OT_M)
+ {
+ switch ((int) pos)
+ {
+ case POS_PRE_C: cat = (myanmar_category_t) OT_VPre;
+ pos = POS_PRE_M; break;
+ case POS_ABOVE_C: cat = (myanmar_category_t) OT_VAbv; break;
+ case POS_BELOW_C: cat = (myanmar_category_t) OT_VBlw; break;
+ case POS_POST_C: cat = (myanmar_category_t) OT_VPst; break;
+ }
+ }
+
+ info.myanmar_category() = cat;
+ info.myanmar_position() = pos;
+}
+
+
+#endif /* HB_OT_SHAPE_COMPLEX_MYANMAR_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-thai.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-thai.cc
new file mode 100644
index 0000000000..347ea2e7ac
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-thai.cc
@@ -0,0 +1,394 @@
+/*
+ * Copyright © 2010,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_OT_SHAPE
+
+#include "hb-ot-shape-complex.hh"
+
+
+/* Thai / Lao shaper */
+
+
+/* PUA shaping */
+
+
+enum thai_consonant_type_t
+{
+ NC,
+ AC,
+ RC,
+ DC,
+ NOT_CONSONANT,
+ NUM_CONSONANT_TYPES = NOT_CONSONANT
+};
+
+static thai_consonant_type_t
+get_consonant_type (hb_codepoint_t u)
+{
+ if (u == 0x0E1Bu || u == 0x0E1Du || u == 0x0E1Fu/* || u == 0x0E2Cu*/)
+ return AC;
+ if (u == 0x0E0Du || u == 0x0E10u)
+ return RC;
+ if (u == 0x0E0Eu || u == 0x0E0Fu)
+ return DC;
+ if (hb_in_range<hb_codepoint_t> (u, 0x0E01u, 0x0E2Eu))
+ return NC;
+ return NOT_CONSONANT;
+}
+
+
+enum thai_mark_type_t
+{
+ AV,
+ BV,
+ T,
+ NOT_MARK,
+ NUM_MARK_TYPES = NOT_MARK
+};
+
+static thai_mark_type_t
+get_mark_type (hb_codepoint_t u)
+{
+ if (u == 0x0E31u || hb_in_range<hb_codepoint_t> (u, 0x0E34u, 0x0E37u) ||
+ u == 0x0E47u || hb_in_range<hb_codepoint_t> (u, 0x0E4Du, 0x0E4Eu))
+ return AV;
+ if (hb_in_range<hb_codepoint_t> (u, 0x0E38u, 0x0E3Au))
+ return BV;
+ if (hb_in_range<hb_codepoint_t> (u, 0x0E48u, 0x0E4Cu))
+ return T;
+ return NOT_MARK;
+}
+
+
+enum thai_action_t
+{
+ NOP,
+ SD, /* Shift combining-mark down */
+ SL, /* Shift combining-mark left */
+ SDL, /* Shift combining-mark down-left */
+ RD /* Remove descender from base */
+};
+
+static hb_codepoint_t
+thai_pua_shape (hb_codepoint_t u, thai_action_t action, hb_font_t *font)
+{
+ struct thai_pua_mapping_t {
+ hb_codepoint_t u;
+ hb_codepoint_t win_pua;
+ hb_codepoint_t mac_pua;
+ } const *pua_mappings = nullptr;
+ static const thai_pua_mapping_t SD_mappings[] = {
+ {0x0E48u, 0xF70Au, 0xF88Bu}, /* MAI EK */
+ {0x0E49u, 0xF70Bu, 0xF88Eu}, /* MAI THO */
+ {0x0E4Au, 0xF70Cu, 0xF891u}, /* MAI TRI */
+ {0x0E4Bu, 0xF70Du, 0xF894u}, /* MAI CHATTAWA */
+ {0x0E4Cu, 0xF70Eu, 0xF897u}, /* THANTHAKHAT */
+ {0x0E38u, 0xF718u, 0xF89Bu}, /* SARA U */
+ {0x0E39u, 0xF719u, 0xF89Cu}, /* SARA UU */
+ {0x0E3Au, 0xF71Au, 0xF89Du}, /* PHINTHU */
+ {0x0000u, 0x0000u, 0x0000u}
+ };
+ static const thai_pua_mapping_t SDL_mappings[] = {
+ {0x0E48u, 0xF705u, 0xF88Cu}, /* MAI EK */
+ {0x0E49u, 0xF706u, 0xF88Fu}, /* MAI THO */
+ {0x0E4Au, 0xF707u, 0xF892u}, /* MAI TRI */
+ {0x0E4Bu, 0xF708u, 0xF895u}, /* MAI CHATTAWA */
+ {0x0E4Cu, 0xF709u, 0xF898u}, /* THANTHAKHAT */
+ {0x0000u, 0x0000u, 0x0000u}
+ };
+ static const thai_pua_mapping_t SL_mappings[] = {
+ {0x0E48u, 0xF713u, 0xF88Au}, /* MAI EK */
+ {0x0E49u, 0xF714u, 0xF88Du}, /* MAI THO */
+ {0x0E4Au, 0xF715u, 0xF890u}, /* MAI TRI */
+ {0x0E4Bu, 0xF716u, 0xF893u}, /* MAI CHATTAWA */
+ {0x0E4Cu, 0xF717u, 0xF896u}, /* THANTHAKHAT */
+ {0x0E31u, 0xF710u, 0xF884u}, /* MAI HAN-AKAT */
+ {0x0E34u, 0xF701u, 0xF885u}, /* SARA I */
+ {0x0E35u, 0xF702u, 0xF886u}, /* SARA II */
+ {0x0E36u, 0xF703u, 0xF887u}, /* SARA UE */
+ {0x0E37u, 0xF704u, 0xF888u}, /* SARA UEE */
+ {0x0E47u, 0xF712u, 0xF889u}, /* MAITAIKHU */
+ {0x0E4Du, 0xF711u, 0xF899u}, /* NIKHAHIT */
+ {0x0000u, 0x0000u, 0x0000u}
+ };
+ static const thai_pua_mapping_t RD_mappings[] = {
+ {0x0E0Du, 0xF70Fu, 0xF89Au}, /* YO YING */
+ {0x0E10u, 0xF700u, 0xF89Eu}, /* THO THAN */
+ {0x0000u, 0x0000u, 0x0000u}
+ };
+
+ switch (action) {
+ case NOP: return u;
+ case SD: pua_mappings = SD_mappings; break;
+ case SDL: pua_mappings = SDL_mappings; break;
+ case SL: pua_mappings = SL_mappings; break;
+ case RD: pua_mappings = RD_mappings; break;
+ }
+ for (; pua_mappings->u; pua_mappings++)
+ if (pua_mappings->u == u)
+ {
+ hb_codepoint_t glyph;
+ if (hb_font_get_glyph (font, pua_mappings->win_pua, 0, &glyph))
+ return pua_mappings->win_pua;
+ if (hb_font_get_glyph (font, pua_mappings->mac_pua, 0, &glyph))
+ return pua_mappings->mac_pua;
+ break;
+ }
+ return u;
+}
+
+
+static enum thai_above_state_t
+{ /* Cluster above looks like: */
+ T0, /* ⣤ */
+ T1, /* ⣼ */
+ T2, /* ⣾ */
+ T3, /* ⣿ */
+ NUM_ABOVE_STATES
+} thai_above_start_state[NUM_CONSONANT_TYPES + 1/* For NOT_CONSONANT */] =
+{
+ T0, /* NC */
+ T1, /* AC */
+ T0, /* RC */
+ T0, /* DC */
+ T3, /* NOT_CONSONANT */
+};
+
+static const struct thai_above_state_machine_edge_t {
+ thai_action_t action;
+ thai_above_state_t next_state;
+} thai_above_state_machine[NUM_ABOVE_STATES][NUM_MARK_TYPES] =
+{ /*AV*/ /*BV*/ /*T*/
+/*T0*/ {{NOP,T3}, {NOP,T0}, {SD, T3}},
+/*T1*/ {{SL, T2}, {NOP,T1}, {SDL,T2}},
+/*T2*/ {{NOP,T3}, {NOP,T2}, {SL, T3}},
+/*T3*/ {{NOP,T3}, {NOP,T3}, {NOP,T3}},
+};
+
+
+static enum thai_below_state_t
+{
+ B0, /* No descender */
+ B1, /* Removable descender */
+ B2, /* Strict descender */
+ NUM_BELOW_STATES
+} thai_below_start_state[NUM_CONSONANT_TYPES + 1/* For NOT_CONSONANT */] =
+{
+ B0, /* NC */
+ B0, /* AC */
+ B1, /* RC */
+ B2, /* DC */
+ B2, /* NOT_CONSONANT */
+};
+
+static const struct thai_below_state_machine_edge_t {
+ thai_action_t action;
+ thai_below_state_t next_state;
+} thai_below_state_machine[NUM_BELOW_STATES][NUM_MARK_TYPES] =
+{ /*AV*/ /*BV*/ /*T*/
+/*B0*/ {{NOP,B0}, {NOP,B2}, {NOP, B0}},
+/*B1*/ {{NOP,B1}, {RD, B2}, {NOP, B1}},
+/*B2*/ {{NOP,B2}, {SD, B2}, {NOP, B2}},
+};
+
+
+static void
+do_thai_pua_shaping (const hb_ot_shape_plan_t *plan HB_UNUSED,
+ hb_buffer_t *buffer,
+ hb_font_t *font)
+{
+#ifdef HB_NO_OT_SHAPE_COMPLEX_THAI_FALLBACK
+ return;
+#endif
+
+ thai_above_state_t above_state = thai_above_start_state[NOT_CONSONANT];
+ thai_below_state_t below_state = thai_below_start_state[NOT_CONSONANT];
+ unsigned int base = 0;
+
+ hb_glyph_info_t *info = buffer->info;
+ unsigned int count = buffer->len;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ thai_mark_type_t mt = get_mark_type (info[i].codepoint);
+
+ if (mt == NOT_MARK) {
+ thai_consonant_type_t ct = get_consonant_type (info[i].codepoint);
+ above_state = thai_above_start_state[ct];
+ below_state = thai_below_start_state[ct];
+ base = i;
+ continue;
+ }
+
+ const thai_above_state_machine_edge_t &above_edge = thai_above_state_machine[above_state][mt];
+ const thai_below_state_machine_edge_t &below_edge = thai_below_state_machine[below_state][mt];
+ above_state = above_edge.next_state;
+ below_state = below_edge.next_state;
+
+ /* At least one of the above/below actions is NOP. */
+ thai_action_t action = above_edge.action != NOP ? above_edge.action : below_edge.action;
+
+ buffer->unsafe_to_break (base, i);
+ if (action == RD)
+ info[base].codepoint = thai_pua_shape (info[base].codepoint, action, font);
+ else
+ info[i].codepoint = thai_pua_shape (info[i].codepoint, action, font);
+ }
+}
+
+
+static void
+preprocess_text_thai (const hb_ot_shape_plan_t *plan,
+ hb_buffer_t *buffer,
+ hb_font_t *font)
+{
+ /* This function implements the shaping logic documented here:
+ *
+ * https://linux.thai.net/~thep/th-otf/shaping.html
+ *
+ * The first shaping rule listed there is needed even if the font has Thai
+ * OpenType tables. The rest do fallback positioning based on PUA codepoints.
+ * We implement that only if there exist no Thai GSUB in the font.
+ */
+
+ /* The following is NOT specified in the MS OT Thai spec, however, it seems
+ * to be what Uniscribe and other engines implement. According to Eric Muller:
+ *
+ * When you have a SARA AM, decompose it in NIKHAHIT + SARA AA, *and* move the
+ * NIKHAHIT backwards over any tone mark (0E48-0E4B).
+ *
+ * <0E14, 0E4B, 0E33> -> <0E14, 0E4D, 0E4B, 0E32>
+ *
+ * This reordering is legit only when the NIKHAHIT comes from a SARA AM, not
+ * when it's there to start with. The string <0E14, 0E4B, 0E4D> is probably
+ * not what a user wanted, but the rendering is nevertheless nikhahit above
+ * chattawa.
+ *
+ * Same for Lao.
+ *
+ * Note:
+ *
+ * Uniscribe also does some below-marks reordering. Namely, it positions U+0E3A
+ * after U+0E38 and U+0E39. We do that by modifying the ccc for U+0E3A.
+ * See unicode->modified_combining_class (). Lao does NOT have a U+0E3A
+ * equivalent.
+ */
+
+
+ /*
+ * Here are the characters of significance:
+ *
+ * Thai Lao
+ * SARA AM: U+0E33 U+0EB3
+ * SARA AA: U+0E32 U+0EB2
+ * Nikhahit: U+0E4D U+0ECD
+ *
+ * Testing shows that Uniscribe reorder the following marks:
+ * Thai: <0E31,0E34..0E37,0E47..0E4E>
+ * Lao: <0EB1,0EB4..0EB7,0EC7..0ECE>
+ *
+ * Note how the Lao versions are the same as Thai + 0x80.
+ */
+
+ /* We only get one script at a time, so a script-agnostic implementation
+ * is adequate here. */
+#define IS_SARA_AM(x) (((x) & ~0x0080u) == 0x0E33u)
+#define NIKHAHIT_FROM_SARA_AM(x) ((x) - 0x0E33u + 0x0E4Du)
+#define SARA_AA_FROM_SARA_AM(x) ((x) - 1)
+#define IS_TONE_MARK(x) (hb_in_ranges<hb_codepoint_t> ((x) & ~0x0080u, 0x0E34u, 0x0E37u, 0x0E47u, 0x0E4Eu, 0x0E31u, 0x0E31u))
+
+ buffer->clear_output ();
+ unsigned int count = buffer->len;
+ for (buffer->idx = 0; buffer->idx < count && buffer->successful;)
+ {
+ hb_codepoint_t u = buffer->cur().codepoint;
+ if (likely (!IS_SARA_AM (u))) {
+ buffer->next_glyph ();
+ continue;
+ }
+
+ /* Is SARA AM. Decompose and reorder. */
+ hb_glyph_info_t &nikhahit = buffer->output_glyph (NIKHAHIT_FROM_SARA_AM (u));
+ _hb_glyph_info_set_continuation (&nikhahit);
+ buffer->replace_glyph (SARA_AA_FROM_SARA_AM (u));
+ if (unlikely (!buffer->successful))
+ return;
+
+ /* Make Nikhahit be recognized as a ccc=0 mark when zeroing widths. */
+ unsigned int end = buffer->out_len;
+ _hb_glyph_info_set_general_category (&buffer->out_info[end - 2], HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK);
+
+ /* Ok, let's see... */
+ unsigned int start = end - 2;
+ while (start > 0 && IS_TONE_MARK (buffer->out_info[start - 1].codepoint))
+ start--;
+
+ if (start + 2 < end)
+ {
+ /* Move Nikhahit (end-2) to the beginning */
+ buffer->merge_out_clusters (start, end);
+ hb_glyph_info_t t = buffer->out_info[end - 2];
+ memmove (buffer->out_info + start + 1,
+ buffer->out_info + start,
+ sizeof (buffer->out_info[0]) * (end - start - 2));
+ buffer->out_info[start] = t;
+ }
+ else
+ {
+ /* Since we decomposed, and NIKHAHIT is combining, merge clusters with the
+ * previous cluster. */
+ if (start && buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES)
+ buffer->merge_out_clusters (start - 1, end);
+ }
+ }
+ buffer->swap_buffers ();
+
+ /* If font has Thai GSUB, we are done. */
+ if (plan->props.script == HB_SCRIPT_THAI && !plan->map.found_script[0])
+ do_thai_pua_shaping (plan, buffer, font);
+}
+
+const hb_ot_complex_shaper_t _hb_ot_complex_shaper_thai =
+{
+ nullptr, /* collect_features */
+ nullptr, /* override_features */
+ nullptr, /* data_create */
+ nullptr, /* data_destroy */
+ preprocess_text_thai,
+ nullptr, /* postprocess_glyphs */
+ HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT,
+ nullptr, /* decompose */
+ nullptr, /* compose */
+ nullptr, /* setup_masks */
+ HB_TAG_NONE, /* gpos_tag */
+ nullptr, /* reorder_marks */
+ HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
+ false,/* fallback_position */
+};
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-use-machine.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-use-machine.hh
new file mode 100644
index 0000000000..462342c618
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-use-machine.hh
@@ -0,0 +1,562 @@
+
+#line 1 "hb-ot-shape-complex-use-machine.rl"
+/*
+ * Copyright © 2015 Mozilla Foundation.
+ * Copyright © 2015 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Mozilla Author(s): Jonathan Kew
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH
+#define HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH
+
+#include "hb.hh"
+
+
+#line 38 "hb-ot-shape-complex-use-machine.hh"
+static const unsigned char _use_syllable_machine_trans_keys[] = {
+ 12u, 48u, 1u, 15u, 1u, 1u, 12u, 48u, 1u, 1u, 0u, 48u, 21u, 21u, 11u, 48u,
+ 11u, 48u, 1u, 15u, 1u, 1u, 11u, 48u, 22u, 48u, 23u, 48u, 24u, 47u, 25u, 47u,
+ 26u, 47u, 45u, 46u, 46u, 46u, 24u, 48u, 24u, 48u, 24u, 48u, 1u, 1u, 24u, 48u,
+ 23u, 48u, 23u, 48u, 23u, 48u, 22u, 48u, 22u, 48u, 22u, 48u, 22u, 48u, 11u, 48u,
+ 1u, 48u, 11u, 48u, 13u, 21u, 4u, 4u, 13u, 13u, 11u, 48u, 11u, 48u, 41u, 42u,
+ 42u, 42u, 11u, 48u, 11u, 48u, 22u, 48u, 23u, 48u, 24u, 47u, 25u, 47u, 26u, 47u,
+ 45u, 46u, 46u, 46u, 24u, 48u, 24u, 48u, 24u, 48u, 24u, 48u, 23u, 48u, 23u, 48u,
+ 23u, 48u, 22u, 48u, 22u, 48u, 22u, 48u, 22u, 48u, 11u, 48u, 1u, 48u, 1u, 15u,
+ 4u, 4u, 13u, 21u, 13u, 13u, 12u, 48u, 1u, 48u, 11u, 48u, 41u, 42u, 42u, 42u,
+ 21u, 42u, 1u, 5u, 0
+};
+
+static const char _use_syllable_machine_key_spans[] = {
+ 37, 15, 1, 37, 1, 49, 1, 38,
+ 38, 15, 1, 38, 27, 26, 24, 23,
+ 22, 2, 1, 25, 25, 25, 1, 25,
+ 26, 26, 26, 27, 27, 27, 27, 38,
+ 48, 38, 9, 1, 1, 38, 38, 2,
+ 1, 38, 38, 27, 26, 24, 23, 22,
+ 2, 1, 25, 25, 25, 25, 26, 26,
+ 26, 27, 27, 27, 27, 38, 48, 15,
+ 1, 9, 1, 37, 48, 38, 2, 1,
+ 22, 5
+};
+
+static const short _use_syllable_machine_index_offsets[] = {
+ 0, 38, 54, 56, 94, 96, 146, 148,
+ 187, 226, 242, 244, 283, 311, 338, 363,
+ 387, 410, 413, 415, 441, 467, 493, 495,
+ 521, 548, 575, 602, 630, 658, 686, 714,
+ 753, 802, 841, 851, 853, 855, 894, 933,
+ 936, 938, 977, 1016, 1044, 1071, 1096, 1120,
+ 1143, 1146, 1148, 1174, 1200, 1226, 1252, 1279,
+ 1306, 1333, 1361, 1389, 1417, 1445, 1484, 1533,
+ 1549, 1551, 1561, 1563, 1601, 1650, 1689, 1692,
+ 1694, 1717
+};
+
+static const char _use_syllable_machine_indicies[] = {
+ 1, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 0, 0, 0, 1, 0, 3, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 4, 2, 3, 2,
+ 6, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5,
+ 6, 5, 5, 5, 6, 5, 7, 5,
+ 8, 9, 10, 8, 11, 12, 10, 10,
+ 10, 10, 10, 3, 13, 14, 10, 15,
+ 8, 8, 16, 17, 10, 10, 18, 19,
+ 20, 21, 22, 23, 24, 18, 25, 26,
+ 27, 28, 29, 30, 10, 31, 32, 33,
+ 10, 34, 35, 36, 37, 38, 39, 40,
+ 13, 10, 42, 41, 44, 1, 43, 43,
+ 45, 43, 43, 43, 43, 43, 46, 47,
+ 48, 49, 50, 51, 52, 53, 47, 54,
+ 46, 55, 56, 57, 58, 43, 59, 60,
+ 61, 43, 43, 43, 43, 62, 63, 64,
+ 65, 1, 43, 44, 1, 43, 43, 45,
+ 43, 43, 43, 43, 43, 66, 47, 48,
+ 49, 50, 51, 52, 53, 47, 54, 55,
+ 55, 56, 57, 58, 43, 59, 60, 61,
+ 43, 43, 43, 43, 62, 63, 64, 65,
+ 1, 43, 44, 67, 67, 67, 67, 67,
+ 67, 67, 67, 67, 67, 67, 67, 67,
+ 68, 67, 44, 67, 44, 1, 43, 43,
+ 45, 43, 43, 43, 43, 43, 43, 47,
+ 48, 49, 50, 51, 52, 53, 47, 54,
+ 55, 55, 56, 57, 58, 43, 59, 60,
+ 61, 43, 43, 43, 43, 62, 63, 64,
+ 65, 1, 43, 47, 48, 49, 50, 51,
+ 43, 43, 43, 43, 43, 43, 56, 57,
+ 58, 43, 59, 60, 61, 43, 43, 43,
+ 43, 48, 63, 64, 65, 69, 43, 48,
+ 49, 50, 51, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 59, 60, 61,
+ 43, 43, 43, 43, 43, 63, 64, 65,
+ 69, 43, 49, 50, 51, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 63,
+ 64, 65, 43, 50, 51, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 63,
+ 64, 65, 43, 51, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 63, 64,
+ 65, 43, 63, 64, 43, 64, 43, 49,
+ 50, 51, 43, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 59, 60, 61, 43,
+ 43, 43, 43, 43, 63, 64, 65, 69,
+ 43, 49, 50, 51, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 60,
+ 61, 43, 43, 43, 43, 43, 63, 64,
+ 65, 69, 43, 49, 50, 51, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 43,
+ 43, 43, 61, 43, 43, 43, 43, 43,
+ 63, 64, 65, 69, 43, 71, 70, 49,
+ 50, 51, 43, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 63, 64, 65, 69,
+ 43, 48, 49, 50, 51, 43, 43, 43,
+ 43, 43, 43, 56, 57, 58, 43, 59,
+ 60, 61, 43, 43, 43, 43, 48, 63,
+ 64, 65, 69, 43, 48, 49, 50, 51,
+ 43, 43, 43, 43, 43, 43, 43, 57,
+ 58, 43, 59, 60, 61, 43, 43, 43,
+ 43, 48, 63, 64, 65, 69, 43, 48,
+ 49, 50, 51, 43, 43, 43, 43, 43,
+ 43, 43, 43, 58, 43, 59, 60, 61,
+ 43, 43, 43, 43, 48, 63, 64, 65,
+ 69, 43, 47, 48, 49, 50, 51, 43,
+ 53, 47, 43, 43, 43, 56, 57, 58,
+ 43, 59, 60, 61, 43, 43, 43, 43,
+ 48, 63, 64, 65, 69, 43, 47, 48,
+ 49, 50, 51, 43, 72, 47, 43, 43,
+ 43, 56, 57, 58, 43, 59, 60, 61,
+ 43, 43, 43, 43, 48, 63, 64, 65,
+ 69, 43, 47, 48, 49, 50, 51, 43,
+ 43, 47, 43, 43, 43, 56, 57, 58,
+ 43, 59, 60, 61, 43, 43, 43, 43,
+ 48, 63, 64, 65, 69, 43, 47, 48,
+ 49, 50, 51, 52, 53, 47, 43, 43,
+ 43, 56, 57, 58, 43, 59, 60, 61,
+ 43, 43, 43, 43, 48, 63, 64, 65,
+ 69, 43, 44, 1, 43, 43, 45, 43,
+ 43, 43, 43, 43, 43, 47, 48, 49,
+ 50, 51, 52, 53, 47, 54, 43, 55,
+ 56, 57, 58, 43, 59, 60, 61, 43,
+ 43, 43, 43, 62, 63, 64, 65, 1,
+ 43, 44, 67, 67, 67, 67, 67, 67,
+ 67, 67, 67, 67, 67, 67, 67, 68,
+ 67, 67, 67, 67, 67, 67, 67, 48,
+ 49, 50, 51, 67, 67, 67, 67, 67,
+ 67, 67, 67, 67, 67, 59, 60, 61,
+ 67, 67, 67, 67, 67, 63, 64, 65,
+ 69, 67, 44, 1, 43, 43, 45, 43,
+ 43, 43, 43, 43, 43, 47, 48, 49,
+ 50, 51, 52, 53, 47, 54, 46, 55,
+ 56, 57, 58, 43, 59, 60, 61, 43,
+ 43, 43, 43, 62, 63, 64, 65, 1,
+ 43, 74, 73, 73, 73, 73, 73, 73,
+ 73, 75, 73, 11, 76, 74, 73, 44,
+ 1, 43, 43, 45, 43, 43, 43, 43,
+ 43, 77, 47, 48, 49, 50, 51, 52,
+ 53, 47, 54, 46, 55, 56, 57, 58,
+ 43, 59, 60, 61, 43, 78, 79, 43,
+ 62, 63, 64, 65, 1, 43, 44, 1,
+ 43, 43, 45, 43, 43, 43, 43, 43,
+ 43, 47, 48, 49, 50, 51, 52, 53,
+ 47, 54, 46, 55, 56, 57, 58, 43,
+ 59, 60, 61, 43, 78, 79, 43, 62,
+ 63, 64, 65, 1, 43, 78, 79, 80,
+ 79, 80, 3, 6, 81, 81, 82, 81,
+ 81, 81, 81, 81, 83, 18, 19, 20,
+ 21, 22, 23, 24, 18, 25, 27, 27,
+ 28, 29, 30, 81, 31, 32, 33, 81,
+ 81, 81, 81, 37, 38, 39, 40, 6,
+ 81, 3, 6, 81, 81, 82, 81, 81,
+ 81, 81, 81, 81, 18, 19, 20, 21,
+ 22, 23, 24, 18, 25, 27, 27, 28,
+ 29, 30, 81, 31, 32, 33, 81, 81,
+ 81, 81, 37, 38, 39, 40, 6, 81,
+ 18, 19, 20, 21, 22, 81, 81, 81,
+ 81, 81, 81, 28, 29, 30, 81, 31,
+ 32, 33, 81, 81, 81, 81, 19, 38,
+ 39, 40, 84, 81, 19, 20, 21, 22,
+ 81, 81, 81, 81, 81, 81, 81, 81,
+ 81, 81, 31, 32, 33, 81, 81, 81,
+ 81, 81, 38, 39, 40, 84, 81, 20,
+ 21, 22, 81, 81, 81, 81, 81, 81,
+ 81, 81, 81, 81, 81, 81, 81, 81,
+ 81, 81, 81, 81, 38, 39, 40, 81,
+ 21, 22, 81, 81, 81, 81, 81, 81,
+ 81, 81, 81, 81, 81, 81, 81, 81,
+ 81, 81, 81, 81, 38, 39, 40, 81,
+ 22, 81, 81, 81, 81, 81, 81, 81,
+ 81, 81, 81, 81, 81, 81, 81, 81,
+ 81, 81, 81, 38, 39, 40, 81, 38,
+ 39, 81, 39, 81, 20, 21, 22, 81,
+ 81, 81, 81, 81, 81, 81, 81, 81,
+ 81, 31, 32, 33, 81, 81, 81, 81,
+ 81, 38, 39, 40, 84, 81, 20, 21,
+ 22, 81, 81, 81, 81, 81, 81, 81,
+ 81, 81, 81, 81, 32, 33, 81, 81,
+ 81, 81, 81, 38, 39, 40, 84, 81,
+ 20, 21, 22, 81, 81, 81, 81, 81,
+ 81, 81, 81, 81, 81, 81, 81, 33,
+ 81, 81, 81, 81, 81, 38, 39, 40,
+ 84, 81, 20, 21, 22, 81, 81, 81,
+ 81, 81, 81, 81, 81, 81, 81, 81,
+ 81, 81, 81, 81, 81, 81, 81, 38,
+ 39, 40, 84, 81, 19, 20, 21, 22,
+ 81, 81, 81, 81, 81, 81, 28, 29,
+ 30, 81, 31, 32, 33, 81, 81, 81,
+ 81, 19, 38, 39, 40, 84, 81, 19,
+ 20, 21, 22, 81, 81, 81, 81, 81,
+ 81, 81, 29, 30, 81, 31, 32, 33,
+ 81, 81, 81, 81, 19, 38, 39, 40,
+ 84, 81, 19, 20, 21, 22, 81, 81,
+ 81, 81, 81, 81, 81, 81, 30, 81,
+ 31, 32, 33, 81, 81, 81, 81, 19,
+ 38, 39, 40, 84, 81, 18, 19, 20,
+ 21, 22, 81, 24, 18, 81, 81, 81,
+ 28, 29, 30, 81, 31, 32, 33, 81,
+ 81, 81, 81, 19, 38, 39, 40, 84,
+ 81, 18, 19, 20, 21, 22, 81, 85,
+ 18, 81, 81, 81, 28, 29, 30, 81,
+ 31, 32, 33, 81, 81, 81, 81, 19,
+ 38, 39, 40, 84, 81, 18, 19, 20,
+ 21, 22, 81, 81, 18, 81, 81, 81,
+ 28, 29, 30, 81, 31, 32, 33, 81,
+ 81, 81, 81, 19, 38, 39, 40, 84,
+ 81, 18, 19, 20, 21, 22, 23, 24,
+ 18, 81, 81, 81, 28, 29, 30, 81,
+ 31, 32, 33, 81, 81, 81, 81, 19,
+ 38, 39, 40, 84, 81, 3, 6, 81,
+ 81, 82, 81, 81, 81, 81, 81, 81,
+ 18, 19, 20, 21, 22, 23, 24, 18,
+ 25, 81, 27, 28, 29, 30, 81, 31,
+ 32, 33, 81, 81, 81, 81, 37, 38,
+ 39, 40, 6, 81, 3, 81, 81, 81,
+ 81, 81, 81, 81, 81, 81, 81, 81,
+ 81, 81, 4, 81, 81, 81, 81, 81,
+ 81, 81, 19, 20, 21, 22, 81, 81,
+ 81, 81, 81, 81, 81, 81, 81, 81,
+ 31, 32, 33, 81, 81, 81, 81, 81,
+ 38, 39, 40, 84, 81, 3, 86, 86,
+ 86, 86, 86, 86, 86, 86, 86, 86,
+ 86, 86, 86, 4, 86, 87, 81, 14,
+ 81, 81, 81, 81, 81, 81, 81, 88,
+ 81, 14, 81, 6, 86, 86, 86, 86,
+ 86, 86, 86, 86, 86, 86, 86, 86,
+ 86, 86, 86, 86, 86, 86, 86, 86,
+ 86, 86, 86, 86, 86, 86, 86, 86,
+ 86, 86, 86, 6, 86, 86, 86, 6,
+ 86, 9, 81, 81, 81, 9, 81, 81,
+ 81, 81, 81, 3, 6, 14, 81, 82,
+ 81, 81, 81, 81, 81, 81, 18, 19,
+ 20, 21, 22, 23, 24, 18, 25, 26,
+ 27, 28, 29, 30, 81, 31, 32, 33,
+ 81, 34, 35, 81, 37, 38, 39, 40,
+ 6, 81, 3, 6, 81, 81, 82, 81,
+ 81, 81, 81, 81, 81, 18, 19, 20,
+ 21, 22, 23, 24, 18, 25, 26, 27,
+ 28, 29, 30, 81, 31, 32, 33, 81,
+ 81, 81, 81, 37, 38, 39, 40, 6,
+ 81, 34, 35, 81, 35, 81, 78, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 78, 79, 80, 9, 86, 86,
+ 86, 9, 86, 0
+};
+
+static const char _use_syllable_machine_trans_targs[] = {
+ 5, 9, 5, 41, 2, 5, 1, 53,
+ 6, 7, 5, 34, 37, 63, 64, 67,
+ 68, 72, 43, 44, 45, 46, 47, 57,
+ 58, 60, 69, 61, 54, 55, 56, 50,
+ 51, 52, 70, 71, 73, 62, 48, 49,
+ 5, 5, 5, 5, 8, 0, 33, 12,
+ 13, 14, 15, 16, 27, 28, 30, 31,
+ 24, 25, 26, 19, 20, 21, 32, 17,
+ 18, 5, 11, 5, 10, 22, 5, 23,
+ 29, 5, 35, 36, 5, 38, 39, 40,
+ 5, 5, 3, 42, 4, 59, 5, 65,
+ 66
+};
+
+static const char _use_syllable_machine_trans_actions[] = {
+ 1, 0, 2, 3, 0, 4, 0, 5,
+ 0, 5, 8, 0, 5, 9, 0, 9,
+ 3, 0, 5, 5, 0, 0, 0, 5,
+ 5, 5, 3, 3, 5, 5, 5, 5,
+ 5, 5, 0, 0, 0, 3, 0, 0,
+ 10, 11, 12, 13, 5, 0, 5, 0,
+ 0, 0, 0, 0, 0, 0, 0, 5,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 14, 5, 15, 0, 0, 16, 0,
+ 0, 17, 0, 0, 18, 5, 0, 0,
+ 19, 20, 0, 3, 0, 5, 21, 0,
+ 0
+};
+
+static const char _use_syllable_machine_to_state_actions[] = {
+ 0, 0, 0, 0, 0, 6, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0
+};
+
+static const char _use_syllable_machine_from_state_actions[] = {
+ 0, 0, 0, 0, 0, 7, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0
+};
+
+static const short _use_syllable_machine_eof_trans[] = {
+ 1, 3, 3, 6, 6, 0, 42, 44,
+ 44, 68, 68, 44, 44, 44, 44, 44,
+ 44, 44, 44, 44, 44, 44, 71, 44,
+ 44, 44, 44, 44, 44, 44, 44, 44,
+ 68, 44, 74, 77, 74, 44, 44, 81,
+ 81, 82, 82, 82, 82, 82, 82, 82,
+ 82, 82, 82, 82, 82, 82, 82, 82,
+ 82, 82, 82, 82, 82, 82, 82, 87,
+ 82, 82, 82, 87, 82, 82, 82, 82,
+ 81, 87
+};
+
+static const int use_syllable_machine_start = 5;
+static const int use_syllable_machine_first_final = 5;
+static const int use_syllable_machine_error = -1;
+
+static const int use_syllable_machine_en_main = 5;
+
+
+#line 38 "hb-ot-shape-complex-use-machine.rl"
+
+
+
+#line 162 "hb-ot-shape-complex-use-machine.rl"
+
+
+#define found_syllable(syllable_type) \
+ HB_STMT_START { \
+ if (0) fprintf (stderr, "syllable %d..%d %s\n", ts, te, #syllable_type); \
+ for (unsigned int i = ts; i < te; i++) \
+ info[i].syllable() = (syllable_serial << 4) | use_##syllable_type; \
+ syllable_serial++; \
+ if (unlikely (syllable_serial == 16)) syllable_serial = 1; \
+ } HB_STMT_END
+
+static void
+find_syllables_use (hb_buffer_t *buffer)
+{
+ unsigned int p, pe, eof, ts, te, act;
+ int cs;
+ hb_glyph_info_t *info = buffer->info;
+
+#line 396 "hb-ot-shape-complex-use-machine.hh"
+ {
+ cs = use_syllable_machine_start;
+ ts = 0;
+ te = 0;
+ act = 0;
+ }
+
+#line 182 "hb-ot-shape-complex-use-machine.rl"
+
+
+ p = 0;
+ pe = eof = buffer->len;
+
+ unsigned int syllable_serial = 1;
+
+#line 412 "hb-ot-shape-complex-use-machine.hh"
+ {
+ int _slen;
+ int _trans;
+ const unsigned char *_keys;
+ const char *_inds;
+ if ( p == pe )
+ goto _test_eof;
+_resume:
+ switch ( _use_syllable_machine_from_state_actions[cs] ) {
+ case 7:
+#line 1 "NONE"
+ {ts = p;}
+ break;
+#line 426 "hb-ot-shape-complex-use-machine.hh"
+ }
+
+ _keys = _use_syllable_machine_trans_keys + (cs<<1);
+ _inds = _use_syllable_machine_indicies + _use_syllable_machine_index_offsets[cs];
+
+ _slen = _use_syllable_machine_key_spans[cs];
+ _trans = _inds[ _slen > 0 && _keys[0] <=( info[p].use_category()) &&
+ ( info[p].use_category()) <= _keys[1] ?
+ ( info[p].use_category()) - _keys[0] : _slen ];
+
+_eof_trans:
+ cs = _use_syllable_machine_trans_targs[_trans];
+
+ if ( _use_syllable_machine_trans_actions[_trans] == 0 )
+ goto _again;
+
+ switch ( _use_syllable_machine_trans_actions[_trans] ) {
+ case 5:
+#line 1 "NONE"
+ {te = p+1;}
+ break;
+ case 12:
+#line 150 "hb-ot-shape-complex-use-machine.rl"
+ {te = p+1;{ found_syllable (independent_cluster); }}
+ break;
+ case 14:
+#line 153 "hb-ot-shape-complex-use-machine.rl"
+ {te = p+1;{ found_syllable (standard_cluster); }}
+ break;
+ case 10:
+#line 157 "hb-ot-shape-complex-use-machine.rl"
+ {te = p+1;{ found_syllable (broken_cluster); }}
+ break;
+ case 8:
+#line 158 "hb-ot-shape-complex-use-machine.rl"
+ {te = p+1;{ found_syllable (non_cluster); }}
+ break;
+ case 11:
+#line 150 "hb-ot-shape-complex-use-machine.rl"
+ {te = p;p--;{ found_syllable (independent_cluster); }}
+ break;
+ case 15:
+#line 151 "hb-ot-shape-complex-use-machine.rl"
+ {te = p;p--;{ found_syllable (virama_terminated_cluster); }}
+ break;
+ case 16:
+#line 152 "hb-ot-shape-complex-use-machine.rl"
+ {te = p;p--;{ found_syllable (sakot_terminated_cluster); }}
+ break;
+ case 13:
+#line 153 "hb-ot-shape-complex-use-machine.rl"
+ {te = p;p--;{ found_syllable (standard_cluster); }}
+ break;
+ case 18:
+#line 154 "hb-ot-shape-complex-use-machine.rl"
+ {te = p;p--;{ found_syllable (number_joiner_terminated_cluster); }}
+ break;
+ case 17:
+#line 155 "hb-ot-shape-complex-use-machine.rl"
+ {te = p;p--;{ found_syllable (numeral_cluster); }}
+ break;
+ case 19:
+#line 156 "hb-ot-shape-complex-use-machine.rl"
+ {te = p;p--;{ found_syllable (symbol_cluster); }}
+ break;
+ case 20:
+#line 157 "hb-ot-shape-complex-use-machine.rl"
+ {te = p;p--;{ found_syllable (broken_cluster); }}
+ break;
+ case 21:
+#line 158 "hb-ot-shape-complex-use-machine.rl"
+ {te = p;p--;{ found_syllable (non_cluster); }}
+ break;
+ case 1:
+#line 153 "hb-ot-shape-complex-use-machine.rl"
+ {{p = ((te))-1;}{ found_syllable (standard_cluster); }}
+ break;
+ case 4:
+#line 157 "hb-ot-shape-complex-use-machine.rl"
+ {{p = ((te))-1;}{ found_syllable (broken_cluster); }}
+ break;
+ case 2:
+#line 1 "NONE"
+ { switch( act ) {
+ case 8:
+ {{p = ((te))-1;} found_syllable (broken_cluster); }
+ break;
+ case 9:
+ {{p = ((te))-1;} found_syllable (non_cluster); }
+ break;
+ }
+ }
+ break;
+ case 3:
+#line 1 "NONE"
+ {te = p+1;}
+#line 157 "hb-ot-shape-complex-use-machine.rl"
+ {act = 8;}
+ break;
+ case 9:
+#line 1 "NONE"
+ {te = p+1;}
+#line 158 "hb-ot-shape-complex-use-machine.rl"
+ {act = 9;}
+ break;
+#line 532 "hb-ot-shape-complex-use-machine.hh"
+ }
+
+_again:
+ switch ( _use_syllable_machine_to_state_actions[cs] ) {
+ case 6:
+#line 1 "NONE"
+ {ts = 0;}
+ break;
+#line 541 "hb-ot-shape-complex-use-machine.hh"
+ }
+
+ if ( ++p != pe )
+ goto _resume;
+ _test_eof: {}
+ if ( p == eof )
+ {
+ if ( _use_syllable_machine_eof_trans[cs] > 0 ) {
+ _trans = _use_syllable_machine_eof_trans[cs] - 1;
+ goto _eof_trans;
+ }
+ }
+
+ }
+
+#line 190 "hb-ot-shape-complex-use-machine.rl"
+
+}
+
+#undef found_syllable
+
+#endif /* HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-use-table.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-use-table.cc
new file mode 100644
index 0000000000..aa9c350862
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-use-table.cc
@@ -0,0 +1,873 @@
+/* == Start of generated table == */
+/*
+ * The following table is generated by running:
+ *
+ * ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt
+ *
+ * on files with these headers:
+ *
+ * # IndicSyllabicCategory-13.0.0.txt
+ * # Date: 2019-07-22, 19:55:00 GMT [KW, RP]
+ * # IndicPositionalCategory-13.0.0.txt
+ * # Date: 2019-07-23, 00:01:00 GMT [KW, RP]
+ * # Blocks-13.0.0.txt
+ * # Date: 2019-07-10, 19:06:00 GMT [KW]
+ * UnicodeData.txt does not have a header.
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_OT_SHAPE
+
+#include "hb-ot-shape-complex-use.hh"
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-macros"
+#define B USE_B /* BASE */
+#define CGJ USE_CGJ /* CGJ */
+#define CS USE_CS /* CONS_WITH_STACKER */
+#define GB USE_GB /* BASE_OTHER */
+#define H USE_H /* HALANT */
+#define HN USE_HN /* HALANT_NUM */
+#define HVM USE_HVM /* HALANT_OR_VOWEL_MODIFIER */
+#define IND USE_IND /* BASE_IND */
+#define N USE_N /* BASE_NUM */
+#define O USE_O /* OTHER */
+#define R USE_R /* REPHA */
+#define Rsv USE_Rsv /* Reserved */
+#define S USE_S /* SYM */
+#define SUB USE_SUB /* CONS_SUB */
+#define Sk USE_Sk /* SAKOT */
+#define VS USE_VS /* VARIATION_SELECTOR */
+#define WJ USE_WJ /* Word_Joiner */
+#define ZWJ USE_ZWJ /* ZWJ */
+#define ZWNJ USE_ZWNJ /* ZWNJ */
+#define CMAbv USE_CMAbv
+#define CMBlw USE_CMBlw
+#define FAbv USE_FAbv
+#define FBlw USE_FBlw
+#define FPst USE_FPst
+#define FMAbv USE_FMAbv
+#define FMBlw USE_FMBlw
+#define FMPst USE_FMPst
+#define MAbv USE_MAbv
+#define MBlw USE_MBlw
+#define MPst USE_MPst
+#define MPre USE_MPre
+#define SMAbv USE_SMAbv
+#define SMBlw USE_SMBlw
+#define VAbv USE_VAbv
+#define VBlw USE_VBlw
+#define VPst USE_VPst
+#define VPre USE_VPre
+#define VMAbv USE_VMAbv
+#define VMBlw USE_VMBlw
+#define VMPst USE_VMPst
+#define VMPre USE_VMPre
+#pragma GCC diagnostic pop
+
+static const USE_TABLE_ELEMENT_TYPE use_table[] = {
+
+
+#define use_offset_0x0028u 0
+
+
+ /* Basic Latin */
+ O, O, O, O, O, GB, O, O,
+ /* 0030 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
+
+#define use_offset_0x00a0u 24
+
+
+ /* Latin-1 Supplement */
+
+ /* 00A0 */ GB, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
+ /* 00B0 */ O, O, FMPst, FMPst, O, O, O, O, O, O, O, O, O, O, O, O,
+ /* 00C0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
+ /* 00D0 */ O, O, O, O, O, O, O, GB,
+
+#define use_offset_0x0348u 80
+
+
+ /* Combining Diacritical Marks */
+ O, O, O, O, O, O, O, CGJ,
+
+#define use_offset_0x0900u 88
+
+
+ /* Devanagari */
+
+ /* 0900 */ VMAbv, VMAbv, VMAbv, VMPst, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 0910 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 0920 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 0930 */ B, B, B, B, B, B, B, B, B, B, VAbv, VPst, CMBlw, B, VPst, VPre,
+ /* 0940 */ VPst, VBlw, VBlw, VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, VPst, VPst, VPst, VPst, H, VPre, VPst,
+ /* 0950 */ O, VMAbv, VMBlw, O, O, VAbv, VBlw, VBlw, B, B, B, B, B, B, B, B,
+ /* 0960 */ B, B, VBlw, VBlw, O, O, B, B, B, B, B, B, B, B, B, B,
+ /* 0970 */ O, O, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+
+ /* Bengali */
+
+ /* 0980 */ GB, VMAbv, VMPst, VMPst, O, B, B, B, B, B, B, B, B, O, O, B,
+ /* 0990 */ B, O, O, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 09A0 */ B, B, B, B, B, B, B, B, B, O, B, B, B, B, B, B,
+ /* 09B0 */ B, O, B, O, O, O, B, B, B, B, O, O, CMBlw, B, VPst, VPre,
+ /* 09C0 */ VPst, VBlw, VBlw, VBlw, VBlw, O, O, VPre, VPre, O, O, VPst, VPst, H, IND, O,
+ /* 09D0 */ O, O, O, O, O, O, O, VPst, O, O, O, O, B, B, O, B,
+ /* 09E0 */ B, B, VBlw, VBlw, O, O, B, B, B, B, B, B, B, B, B, B,
+ /* 09F0 */ B, B, O, O, O, O, O, O, O, O, O, O, B, O, FMAbv, O,
+
+ /* Gurmukhi */
+
+ /* 0A00 */ O, VMAbv, VMAbv, VMPst, O, B, B, B, B, B, B, O, O, O, O, B,
+ /* 0A10 */ B, O, O, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 0A20 */ B, B, B, B, B, B, B, B, B, O, B, B, B, B, B, B,
+ /* 0A30 */ B, O, B, B, O, B, B, O, B, B, O, O, CMBlw, O, VPst, VPre,
+ /* 0A40 */ VPst, VBlw, VBlw, O, O, O, O, VAbv, VAbv, O, O, VAbv, VAbv, H, O, O,
+ /* 0A50 */ O, VMBlw, O, O, O, O, O, O, O, B, B, B, B, O, B, O,
+ /* 0A60 */ O, O, O, O, O, O, B, B, B, B, B, B, B, B, B, B,
+ /* 0A70 */ VMAbv, CMAbv, GB, GB, O, MBlw, O, O, O, O, O, O, O, O, O, O,
+
+ /* Gujarati */
+
+ /* 0A80 */ O, VMAbv, VMAbv, VMPst, O, B, B, B, B, B, B, B, B, B, O, B,
+ /* 0A90 */ B, B, O, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 0AA0 */ B, B, B, B, B, B, B, B, B, O, B, B, B, B, B, B,
+ /* 0AB0 */ B, O, B, B, O, B, B, B, B, B, O, O, CMBlw, B, VPst, VPre,
+ /* 0AC0 */ VPst, VBlw, VBlw, VBlw, VBlw, VAbv, O, VAbv, VAbv, VAbv, O, VPst, VPst, H, O, O,
+ /* 0AD0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
+ /* 0AE0 */ B, B, VBlw, VBlw, O, O, B, B, B, B, B, B, B, B, B, B,
+ /* 0AF0 */ O, O, O, O, O, O, O, O, O, B, VMAbv, VMAbv, VMAbv, CMAbv, CMAbv, CMAbv,
+
+ /* Oriya */
+
+ /* 0B00 */ O, VMAbv, VMPst, VMPst, O, B, B, B, B, B, B, B, B, O, O, B,
+ /* 0B10 */ B, O, O, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 0B20 */ B, B, B, B, B, B, B, B, B, O, B, B, B, B, B, B,
+ /* 0B30 */ B, O, B, B, O, B, B, B, B, B, O, O, CMBlw, B, VPst, VAbv,
+ /* 0B40 */ VPst, VBlw, VBlw, VBlw, VBlw, O, O, VPre, VPst, O, O, VPst, VPst, H, O, O,
+ /* 0B50 */ O, O, O, O, O, VAbv, VAbv, VAbv, O, O, O, O, B, B, O, B,
+ /* 0B60 */ B, B, VBlw, VBlw, O, O, B, B, B, B, B, B, B, B, B, B,
+ /* 0B70 */ O, B, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
+
+ /* Tamil */
+
+ /* 0B80 */ O, O, VMAbv, IND, O, B, B, B, B, B, B, O, O, O, B, B,
+ /* 0B90 */ B, O, B, B, B, B, O, O, O, B, B, O, B, O, B, B,
+ /* 0BA0 */ O, O, O, B, B, O, O, O, B, B, B, O, O, O, B, B,
+ /* 0BB0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, VPst, VPst,
+ /* 0BC0 */ VAbv, VPst, VPst, O, O, O, VPre, VPre, VPre, O, VPst, VPst, VPst, H, O, O,
+ /* 0BD0 */ O, O, O, O, O, O, O, VPst, O, O, O, O, O, O, O, O,
+ /* 0BE0 */ O, O, O, O, O, O, B, B, B, B, B, B, B, B, B, B,
+ /* 0BF0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
+
+ /* Telugu */
+
+ /* 0C00 */ VMAbv, VMPst, VMPst, VMPst, VMAbv, B, B, B, B, B, B, B, B, O, B, B,
+ /* 0C10 */ B, O, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 0C20 */ B, B, B, B, B, B, B, B, B, O, B, B, B, B, B, B,
+ /* 0C30 */ B, B, B, B, B, B, B, B, B, B, O, O, O, B, VAbv, VAbv,
+ /* 0C40 */ VAbv, VPst, VPst, VPst, VPst, O, VAbv, VAbv, VAbv, O, VAbv, VAbv, VAbv, H, O, O,
+ /* 0C50 */ O, O, O, O, O, VAbv, VBlw, O, B, B, B, O, O, O, O, O,
+ /* 0C60 */ B, B, VBlw, VBlw, O, O, B, B, B, B, B, B, B, B, B, B,
+ /* 0C70 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
+
+ /* Kannada */
+
+ /* 0C80 */ B, VMAbv, VMPst, VMPst, O, B, B, B, B, B, B, B, B, O, B, B,
+ /* 0C90 */ B, O, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 0CA0 */ B, B, B, B, B, B, B, B, B, O, B, B, B, B, B, B,
+ /* 0CB0 */ B, B, B, B, O, B, B, B, B, B, O, O, CMBlw, B, VPst, VAbv,
+ /* 0CC0 */ VAbv, VPst, VPst, VPst, VPst, O, VAbv, VAbv, VAbv, O, VAbv, VAbv, VAbv, H, O, O,
+ /* 0CD0 */ O, O, O, O, O, VPst, VPst, O, O, O, O, O, O, O, B, O,
+ /* 0CE0 */ B, B, VBlw, VBlw, O, O, B, B, B, B, B, B, B, B, B, B,
+ /* 0CF0 */ O, CS, CS, O, O, O, O, O, O, O, O, O, O, O, O, O,
+
+ /* Malayalam */
+
+ /* 0D00 */ VMAbv, VMAbv, VMPst, VMPst, B, B, B, B, B, B, B, B, B, O, B, B,
+ /* 0D10 */ B, O, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 0D20 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 0D30 */ B, B, B, B, B, B, B, B, B, B, B, VAbv, VAbv, B, VPst, VPst,
+ /* 0D40 */ VPst, VPst, VPst, VBlw, VBlw, O, VPre, VPre, VPre, O, VPst, VPst, VPst, H, R, O,
+ /* 0D50 */ O, O, O, O, IND, IND, IND, VPst, O, O, O, O, O, O, O, B,
+ /* 0D60 */ B, B, VBlw, VBlw, O, O, B, B, B, B, B, B, B, B, B, B,
+ /* 0D70 */ O, O, O, O, O, O, O, O, O, O, IND, IND, IND, IND, IND, IND,
+
+ /* Sinhala */
+
+ /* 0D80 */ O, VMAbv, VMPst, VMPst, O, B, B, B, B, B, B, B, B, B, B, B,
+ /* 0D90 */ B, B, B, B, B, B, B, O, O, O, B, B, B, B, B, B,
+ /* 0DA0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 0DB0 */ B, B, O, B, B, B, B, B, B, B, B, B, O, B, O, O,
+ /* 0DC0 */ B, B, B, B, B, B, B, O, O, O, H, O, O, O, O, VPst,
+ /* 0DD0 */ VPst, VPst, VAbv, VAbv, VBlw, O, VBlw, O, VPst, VPre, VPst, VPre, VPst, VPst, VPst, VPst,
+ /* 0DE0 */ O, O, O, O, O, O, B, B, B, B, B, B, B, B, B, B,
+ /* 0DF0 */ O, O, VPst, VPst, O, O, O, O,
+
+#define use_offset_0x0f18u 1360
+
+
+ /* Tibetan */
+ VBlw, VBlw, O, O, O, O, O, O,
+ /* 0F20 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 0F30 */ B, B, B, B, O, FMBlw, O, FMBlw, O, CMAbv, O, O, O, O, VPst, VPre,
+ /* 0F40 */ B, B, B, B, B, B, B, B, O, B, B, B, B, B, B, B,
+ /* 0F50 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 0F60 */ B, B, B, B, B, B, B, B, B, B, B, B, B, O, O, O,
+ /* 0F70 */ O, VBlw, VBlw, VAbv, VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, VBlw, VBlw, VBlw, VBlw, VMAbv, VMPst,
+ /* 0F80 */ VBlw, VAbv, VMAbv, VMAbv, VBlw, IND, VMAbv, VMAbv, B, B, B, B, B, SUB, SUB, SUB,
+ /* 0F90 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, O, SUB, SUB, SUB, SUB, SUB, SUB, SUB,
+ /* 0FA0 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB,
+ /* 0FB0 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, O, O, O,
+ /* 0FC0 */ O, O, O, O, O, O, FMBlw, O,
+
+#define use_offset_0x1000u 1536
+
+
+ /* Myanmar */
+
+ /* 1000 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 1010 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 1020 */ B, B, B, B, B, B, B, B, B, B, B, VPst, VPst, VAbv, VAbv, VBlw,
+ /* 1030 */ VBlw, VPre, VAbv, VAbv, VAbv, VAbv, VMAbv, VMBlw, VMPst, H, VAbv, MPst, MPre, MBlw, MBlw, B,
+ /* 1040 */ B, B, B, B, B, B, B, B, B, B, O, GB, O, O, GB, O,
+ /* 1050 */ B, B, B, B, B, B, VPst, VPst, VBlw, VBlw, B, B, B, B, MBlw, MBlw,
+ /* 1060 */ MBlw, B, VPst, VMPst, VMPst, B, B, VPst, VPst, VMPst, VMPst, VMPst, VMPst, VMPst, B, B,
+ /* 1070 */ B, VAbv, VAbv, VAbv, VAbv, B, B, B, B, B, B, B, B, B, B, B,
+ /* 1080 */ B, B, MBlw, VPst, VPre, VAbv, VAbv, VMPst, VMPst, VMPst, VMPst, VMPst, VMPst, VMBlw, B, VMPst,
+ /* 1090 */ B, B, B, B, B, B, B, B, B, B, VMPst, VMPst, VPst, VAbv, O, O,
+
+#define use_offset_0x1700u 1696
+
+
+ /* Tagalog */
+
+ /* 1700 */ B, B, B, B, B, B, B, B, B, B, B, B, B, O, B, B,
+ /* 1710 */ B, B, VAbv, VBlw, VBlw, O, O, O, O, O, O, O, O, O, O, O,
+
+ /* Hanunoo */
+
+ /* 1720 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 1730 */ B, B, VAbv, VBlw, VBlw, O, O, O, O, O, O, O, O, O, O, O,
+
+ /* Buhid */
+
+ /* 1740 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 1750 */ B, B, VAbv, VBlw, O, O, O, O, O, O, O, O, O, O, O, O,
+
+ /* Tagbanwa */
+
+ /* 1760 */ B, B, B, B, B, B, B, B, B, B, B, B, B, O, B, B,
+ /* 1770 */ B, O, VAbv, VBlw, O, O, O, O, O, O, O, O, O, O, O, O,
+
+ /* Khmer */
+
+ /* 1780 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 1790 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 17A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 17B0 */ B, B, B, B, O, O, VPst, VAbv, VAbv, VAbv, VAbv, VBlw, VBlw, VBlw, VPst, VPst,
+ /* 17C0 */ VPst, VPre, VPre, VPre, VPst, VPst, VMAbv, VMPst, VPst, VMAbv, VMAbv, FMAbv, FAbv, CMAbv, FMAbv, FMAbv,
+ /* 17D0 */ FMAbv, VAbv, H, FMAbv, O, O, O, O, O, O, O, O, B, FMAbv, O, O,
+ /* 17E0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
+
+#define use_offset_0x1900u 1936
+
+
+ /* Limbu */
+
+ /* 1900 */ GB, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 1910 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, O,
+ /* 1920 */ VAbv, VAbv, VBlw, VPst, VPst, VAbv, VAbv, VAbv, VAbv, SUB, SUB, SUB, O, O, O, O,
+ /* 1930 */ FPst, FPst, VMBlw, FPst, FPst, FPst, FPst, FPst, FPst, FBlw, VAbv, FMBlw, O, O, O, O,
+ /* 1940 */ O, O, O, O, O, O, B, B, B, B, B, B, B, B, B, B,
+
+ /* Tai Le */
+
+ /* 1950 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 1960 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, O, O,
+ /* 1970 */ B, B, B, B, B, O, O, O, O, O, O, O, O, O, O, O,
+
+ /* New Tai Lue */
+
+ /* 1980 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 1990 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 19A0 */ B, B, B, B, B, B, B, B, B, B, B, B, O, O, O, O,
+ /* 19B0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 19C0 */ B, B, B, B, B, B, B, B, VMPst, VMPst, O, O, O, O, O, O,
+ /* 19D0 */ B, B, B, B, B, B, B, B, B, B, B, O, O, O, O, O,
+ /* 19E0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
+ /* 19F0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
+
+ /* Buginese */
+
+ /* 1A00 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 1A10 */ B, B, B, B, B, B, B, VAbv, VBlw, VPre, VPst, VAbv, O, O, O, O,
+
+ /* Tai Tham */
+
+ /* 1A20 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 1A30 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 1A40 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 1A50 */ B, B, B, B, B, MPre, MBlw, SUB, FAbv, FAbv, MAbv, SUB, SUB, SUB, SUB, O,
+ /* 1A60 */ Sk, VPst, VAbv, VPst, VPst, VAbv, VAbv, VAbv, VAbv, VBlw, VBlw, VAbv, VBlw, VPst, VPre, VPre,
+ /* 1A70 */ VPre, VPre, VPre, VAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VAbv, FMAbv, FMAbv, O, O, FMBlw,
+ /* 1A80 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
+ /* 1A90 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
+
+#define use_offset_0x1b00u 2352
+
+
+ /* Balinese */
+
+ /* 1B00 */ VMAbv, VMAbv, VMAbv, FAbv, VMPst, B, B, B, B, B, B, B, B, B, B, B,
+ /* 1B10 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 1B20 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 1B30 */ B, B, B, B, CMAbv, VPst, VAbv, VAbv, VBlw, VBlw, VBlw, VBlw, VAbv, VAbv, VPre, VPre,
+ /* 1B40 */ VPst, VPst, VAbv, VAbv, H, B, B, B, B, B, B, B, O, O, O, O,
+ /* 1B50 */ B, B, B, B, B, B, B, B, B, B, O, GB, GB, O, O, GB,
+ /* 1B60 */ O, S, GB, S, S, S, S, S, GB, S, S, SMAbv, SMBlw, SMAbv, SMAbv, SMAbv,
+ /* 1B70 */ SMAbv, SMAbv, SMAbv, SMAbv, O, O, O, O, O, O, O, O, O, O, O, O,
+
+ /* Sundanese */
+
+ /* 1B80 */ VMAbv, FAbv, VMPst, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 1B90 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 1BA0 */ B, SUB, SUB, SUB, VAbv, VBlw, VPre, VPst, VAbv, VAbv, VPst, H, SUB, SUB, B, B,
+ /* 1BB0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+
+ /* Batak */
+
+ /* 1BC0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 1BD0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 1BE0 */ B, B, B, B, B, B, CMAbv, VPst, VAbv, VAbv, VPst, VPst, VPst, VAbv, VPst, VAbv,
+ /* 1BF0 */ FAbv, FAbv, CMBlw, CMBlw, O, O, O, O, O, O, O, O, O, O, O, O,
+
+ /* Lepcha */
+
+ /* 1C00 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 1C10 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 1C20 */ B, B, B, B, SUB, SUB, VPst, VPre, VPre, VPre, VPst, VPst, VBlw, FAbv, FAbv, FAbv,
+ /* 1C30 */ FAbv, FAbv, FAbv, FAbv, VMPre, VMPre, FMAbv, CMBlw, O, O, O, O, O, O, O, O,
+ /* 1C40 */ B, B, B, B, B, B, B, B, B, B, O, O, O, B, B, B,
+
+#define use_offset_0x1cd0u 2688
+
+
+ /* Vedic Extensions */
+
+ /* 1CD0 */ VMAbv, VMAbv, VMAbv, O, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMAbv, VMAbv, VMBlw, VMBlw, VMBlw, VMBlw,
+ /* 1CE0 */ VMAbv, VMPst, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, O, O, O, O, VMBlw, O, O,
+ /* 1CF0 */ O, O, IND, IND, VMAbv, CS, CS, VMPst, VMAbv, VMAbv, GB, O, O, O, O, O,
+
+#define use_offset_0x1df8u 2736
+
+
+ /* Combining Diacritical Marks Supplement */
+ O, O, O, FMAbv, O, O, O, O,
+
+#define use_offset_0x2008u 2744
+
+
+ /* General Punctuation */
+ O, O, O, O, ZWNJ, ZWJ, O, O,
+ /* 2010 */ GB, GB, GB, GB, GB, O, O, O,
+
+#define use_offset_0x2060u 2760
+
+ /* 2060 */ WJ, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
+
+ /* Superscripts and Subscripts */
+
+ /* 2070 */ O, O, O, O, FMPst, O, O, O, O, O, O, O, O, O, O, O,
+ /* 2080 */ O, O, FMPst, FMPst, FMPst, O, O, O,
+
+#define use_offset_0x20f0u 2800
+
+
+ /* Combining Diacritical Marks for Symbols */
+
+ /* 20F0 */ VMAbv, O, O, O, O, O, O, O,
+
+#define use_offset_0x25c8u 2808
+
+
+ /* Geometric Shapes */
+ O, O, O, O, GB, O, O, O,
+
+#define use_offset_0xa800u 2816
+
+
+ /* Syloti Nagri */
+
+ /* A800 */ B, B, VAbv, B, B, B, H, B, B, B, B, VMAbv, B, B, B, B,
+ /* A810 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* A820 */ B, B, B, VPst, VPst, VBlw, VAbv, VPst, O, O, O, O, VBlw, O, O, O,
+ /* A830 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
+
+ /* Phags-pa */
+
+ /* A840 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* A850 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* A860 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* A870 */ B, B, B, B, O, O, O, O, O, O, O, O, O, O, O, O,
+
+ /* Saurashtra */
+
+ /* A880 */ VMPst, VMPst, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* A890 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* A8A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* A8B0 */ B, B, B, B, MPst, VPst, VPst, VPst, VPst, VPst, VPst, VPst, VPst, VPst, VPst, VPst,
+ /* A8C0 */ VPst, VPst, VPst, VPst, H, VMAbv, O, O, O, O, O, O, O, O, O, O,
+ /* A8D0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
+
+ /* Devanagari Extended */
+
+ /* A8E0 */ VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv,
+ /* A8F0 */ VMAbv, VMAbv, B, B, O, O, O, O, O, O, O, O, O, O, B, VAbv,
+
+ /* Kayah Li */
+
+ /* A900 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* A910 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* A920 */ B, B, B, B, B, B, VAbv, VAbv, VAbv, VAbv, VAbv, VMBlw, VMBlw, VMBlw, O, O,
+
+ /* Rejang */
+
+ /* A930 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* A940 */ B, B, B, B, B, B, B, VBlw, VBlw, VBlw, VAbv, VBlw, VBlw, VBlw, VBlw, FAbv,
+ /* A950 */ FAbv, FAbv, FPst, VPst, O, O, O, O, O, O, O, O, O, O, O, O,
+ /* A960 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
+ /* A970 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
+
+ /* Javanese */
+
+ /* A980 */ VMAbv, VMAbv, FAbv, VMPst, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* A990 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* A9A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* A9B0 */ B, B, B, CMAbv, VPst, VPst, VAbv, VAbv, VBlw, VBlw, VPre, VPre, VAbv, MBlw, MBlw, MBlw,
+ /* A9C0 */ H, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
+ /* A9D0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
+
+ /* Myanmar Extended-B */
+
+ /* A9E0 */ B, B, B, B, B, VAbv, O, B, B, B, B, B, B, B, B, B,
+ /* A9F0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, O,
+
+ /* Cham */
+
+ /* AA00 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* AA10 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* AA20 */ B, B, B, B, B, B, B, B, B, VMAbv, VAbv, VAbv, VAbv, VBlw, VAbv, VPre,
+ /* AA30 */ VPre, VAbv, VBlw, MPst, MPre, MBlw, MBlw, O, O, O, O, O, O, O, O, O,
+ /* AA40 */ B, B, B, FAbv, B, B, B, B, B, B, B, B, FAbv, FPst, O, O,
+ /* AA50 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
+
+ /* Myanmar Extended-A */
+
+ /* AA60 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* AA70 */ O, B, B, B, GB, GB, GB, O, O, O, B, VMPst, VMAbv, VMPst, B, B,
+
+ /* Tai Viet */
+
+ /* AA80 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* AA90 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* AAA0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* AAB0 */ VAbv, B, VAbv, VAbv, VBlw, B, B, VAbv, VAbv, B, B, B, B, B, VAbv, VMAbv,
+ /* AAC0 */ B, VMAbv, B, O, O, O, O, O, O, O, O, O, O, O, O, O,
+ /* AAD0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
+
+ /* Meetei Mayek Extensions */
+
+ /* AAE0 */ B, B, B, B, B, B, B, B, B, B, B, VPre, VBlw, VAbv, VPre, VPst,
+ /* AAF0 */ O, O, O, O, O, VMPst, H, O,
+
+#define use_offset_0xabc0u 3576
+
+
+ /* Meetei Mayek */
+
+ /* ABC0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* ABD0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* ABE0 */ B, B, B, VPst, VPst, VAbv, VPst, VPst, VBlw, VPst, VPst, O, VMPst, VBlw, O, O,
+ /* ABF0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
+
+#define use_offset_0xfe00u 3640
+
+
+ /* Variation Selectors */
+
+ /* FE00 */ VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS,
+
+#define use_offset_0x10a00u 3656
+
+
+ /* Kharoshthi */
+
+ /* 10A00 */ B, VBlw, VBlw, VBlw, O, VAbv, VBlw, O, O, O, O, O, VBlw, VBlw, VMBlw, VMAbv,
+ /* 10A10 */ B, B, B, B, O, B, B, B, O, B, B, B, B, B, B, B,
+ /* 10A20 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 10A30 */ B, B, B, B, B, B, O, O, CMAbv, CMBlw, CMBlw, O, O, O, O, H,
+ /* 10A40 */ B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, O,
+
+#define use_offset_0x11000u 3736
+
+
+ /* Brahmi */
+
+ /* 11000 */ VMPst, VMAbv, VMPst, CS, CS, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11010 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11020 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11030 */ B, B, B, B, B, B, B, B, VAbv, VAbv, VAbv, VAbv, VBlw, VBlw, VBlw, VBlw,
+ /* 11040 */ VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, HVM, O, O, O, O, O, O, O, O, O,
+ /* 11050 */ O, O, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
+ /* 11060 */ N, N, N, N, N, N, B, B, B, B, B, B, B, B, B, B,
+ /* 11070 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, HN,
+
+ /* Kaithi */
+
+ /* 11080 */ VMAbv, VMAbv, VMPst, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11090 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 110A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 110B0 */ VPst, VPre, VPst, VBlw, VBlw, VAbv, VAbv, VPst, VPst, H, CMBlw, O, O, O, O, O,
+
+#define use_offset_0x11100u 3928
+
+
+ /* Chakma */
+
+ /* 11100 */ VMAbv, VMAbv, VMAbv, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11110 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11120 */ B, B, B, B, B, B, B, VBlw, VBlw, VBlw, VAbv, VAbv, VPre, VBlw, VAbv, VAbv,
+ /* 11130 */ VBlw, VAbv, VAbv, H, CMBlw, O, B, B, B, B, B, B, B, B, B, B,
+ /* 11140 */ O, O, O, O, B, VPst, VPst, B, O, O, O, O, O, O, O, O,
+
+ /* Mahajani */
+
+ /* 11150 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11160 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11170 */ B, B, B, CMBlw, O, O, O, O, O, O, O, O, O, O, O, O,
+
+ /* Sharada */
+
+ /* 11180 */ VMAbv, VMAbv, VMPst, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11190 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 111A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 111B0 */ B, B, B, VPst, VPre, VPst, VBlw, VBlw, VBlw, VBlw, VBlw, VBlw, VAbv, VAbv, VAbv, VAbv,
+ /* 111C0 */ H, B, R, R, O, O, O, O, GB, FMBlw, CMBlw, VAbv, VBlw, O, VPre, VMAbv,
+ /* 111D0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
+
+ /* Sinhala Archaic Numbers */
+
+ /* 111E0 */ O, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 111F0 */ B, B, B, B, B, O, O, O, O, O, O, O, O, O, O, O,
+
+ /* Khojki */
+
+ /* 11200 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11210 */ B, B, O, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11220 */ B, B, B, B, B, B, B, B, B, B, B, B, VPst, VPst, VPst, VBlw,
+ /* 11230 */ VAbv, VAbv, VAbv, VAbv, VMAbv, H, CMAbv, CMAbv, O, O, O, O, O, O, VMAbv, O,
+
+#define use_offset_0x11280u 4248
+
+
+ /* Multani */
+
+ /* 11280 */ B, B, B, B, B, B, B, O, B, O, B, B, B, B, O, B,
+ /* 11290 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, O, B,
+ /* 112A0 */ B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, O,
+
+ /* Khudawadi */
+
+ /* 112B0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 112C0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 112D0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, VMAbv,
+ /* 112E0 */ VPst, VPre, VPst, VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, CMBlw, VBlw, O, O, O, O, O,
+ /* 112F0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
+
+ /* Grantha */
+
+ /* 11300 */ VMAbv, VMAbv, VMAbv, VMAbv, O, B, B, B, B, B, B, B, B, O, O, B,
+ /* 11310 */ B, O, O, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11320 */ B, B, B, B, B, B, B, B, B, O, B, B, B, B, B, B,
+ /* 11330 */ B, O, B, B, O, B, B, B, B, B, O, CMBlw, CMBlw, B, VPst, VPst,
+ /* 11340 */ VAbv, VPst, VPst, VPst, VPst, O, O, VPre, VPre, O, O, VPst, VPst, HVM, O, O,
+ /* 11350 */ O, O, O, O, O, O, O, VPst, O, O, O, O, O, O, B, B,
+ /* 11360 */ B, B, VPst, VPst, O, O, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, O, O, O,
+ /* 11370 */ VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, O, O, O,
+
+#define use_offset_0x11400u 4496
+
+
+ /* Newa */
+
+ /* 11400 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11410 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11420 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11430 */ B, B, B, B, B, VPst, VPre, VPst, VBlw, VBlw, VBlw, VBlw, VBlw, VBlw, VAbv, VAbv,
+ /* 11440 */ VPst, VPst, H, VMAbv, VMAbv, VMPst, CMBlw, B, O, O, O, O, O, O, O, O,
+ /* 11450 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, FMAbv, B,
+ /* 11460 */ CS, CS, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
+ /* 11470 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
+
+ /* Tirhuta */
+
+ /* 11480 */ O, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11490 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 114A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 114B0 */ VPst, VPre, VPst, VBlw, VBlw, VBlw, VBlw, VBlw, VBlw, VPre, VAbv, VPst, VPst, VPst, VPst, VMAbv,
+ /* 114C0 */ VMAbv, VMAbv, H, CMBlw, B, O, O, O, O, O, O, O, O, O, O, O,
+ /* 114D0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
+
+#define use_offset_0x11580u 4720
+
+
+ /* Siddham */
+
+ /* 11580 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11590 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 115A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, VPst,
+ /* 115B0 */ VPre, VPst, VBlw, VBlw, VBlw, VBlw, O, O, VPre, VPst, VPst, VPst, VMAbv, VMAbv, VMPst, H,
+ /* 115C0 */ CMBlw, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
+ /* 115D0 */ O, O, O, O, O, O, O, O, B, B, B, B, VBlw, VBlw, O, O,
+ /* 115E0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
+ /* 115F0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
+
+ /* Modi */
+
+ /* 11600 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11610 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11620 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11630 */ VPst, VPst, VPst, VBlw, VBlw, VBlw, VBlw, VBlw, VBlw, VAbv, VAbv, VPst, VPst, VMAbv, VMPst, H,
+ /* 11640 */ VAbv, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
+ /* 11650 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
+ /* 11660 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
+ /* 11670 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
+
+ /* Takri */
+
+ /* 11680 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11690 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 116A0 */ B, B, B, B, B, B, B, B, B, B, B, VMAbv, VMPst, VAbv, VPre, VPst,
+ /* 116B0 */ VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, H, CMBlw, B, O, O, O, O, O, O, O,
+ /* 116C0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
+ /* 116D0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
+ /* 116E0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
+ /* 116F0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
+
+ /* Ahom */
+
+ /* 11700 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11710 */ B, B, B, B, B, B, B, B, B, B, B, O, O, MBlw, MPre, MAbv,
+ /* 11720 */ VPst, VPst, VAbv, VAbv, VBlw, VBlw, VPre, VAbv, VBlw, VAbv, VAbv, VAbv, O, O, O, O,
+ /* 11730 */ B, B, B, B, B, B, B, B, B, B, B, B, O, O, O, O,
+
+#define use_offset_0x11800u 5168
+
+
+ /* Dogra */
+
+ /* 11800 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11810 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11820 */ B, B, B, B, B, B, B, B, B, B, B, B, VPst, VPre, VPst, VBlw,
+ /* 11830 */ VBlw, VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, VMAbv, VMPst, H, CMBlw, O, O, O, O, O,
+
+#define use_offset_0x11900u 5232
+
+
+ /* Dives Akuru */
+
+ /* 11900 */ B, B, B, B, B, B, B, O, O, B, O, O, B, B, B, B,
+ /* 11910 */ B, B, B, B, O, B, B, O, B, B, B, B, B, B, B, B,
+ /* 11920 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11930 */ VPst, VPst, VPst, VPst, VPst, VPre, O, VPre, VPst, O, O, VMAbv, VMAbv, VPst, H, R,
+ /* 11940 */ MPst, R, MBlw, CMBlw, O, O, O, O, O, O, O, O, O, O, O, O,
+ /* 11950 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
+
+#define use_offset_0x119a0u 5328
+
+
+ /* Nandinagari */
+
+ /* 119A0 */ B, B, B, B, B, B, B, B, O, O, B, B, B, B, B, B,
+ /* 119B0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 119C0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 119D0 */ B, VPst, VPre, VPst, VBlw, VBlw, VBlw, VBlw, O, O, VAbv, VAbv, VPst, VPst, VMPst, VMPst,
+ /* 119E0 */ H, B, O, O, VPre, O, O, O, O, O, O, O, O, O, O, O,
+ /* 119F0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
+
+ /* Zanabazar Square */
+
+ /* 11A00 */ B, VAbv, VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, VAbv, VAbv, VBlw, B, B, B, B, B,
+ /* 11A10 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11A20 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11A30 */ B, B, B, FMBlw, VBlw, VMAbv, VMAbv, VMAbv, VMAbv, VMPst, R, MBlw, MBlw, MBlw, MBlw, GB,
+ /* 11A40 */ O, O, O, O, O, GB, O, H, O, O, O, O, O, O, O, O,
+
+ /* Soyombo */
+
+ /* 11A50 */ B, VAbv, VBlw, VBlw, VAbv, VAbv, VAbv, VPst, VPst, VBlw, VBlw, VBlw, B, B, B, B,
+ /* 11A60 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11A70 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11A80 */ B, B, B, B, R, R, R, R, R, R, FBlw, FBlw, FBlw, FBlw, FBlw, FBlw,
+ /* 11A90 */ FBlw, FBlw, FBlw, FBlw, FBlw, FBlw, VMAbv, VMPst, CMAbv, H, O, O, O, B, O, O,
+
+#define use_offset_0x11c00u 5584
+
+
+ /* Bhaiksuki */
+
+ /* 11C00 */ B, B, B, B, B, B, B, B, B, O, B, B, B, B, B, B,
+ /* 11C10 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11C20 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, VPst,
+ /* 11C30 */ VAbv, VAbv, VBlw, VBlw, VBlw, VBlw, VBlw, O, VAbv, VAbv, VAbv, VAbv, VMAbv, VMAbv, VMPst, H,
+ /* 11C40 */ B, O, O, O, GB, GB, O, O, O, O, O, O, O, O, O, O,
+ /* 11C50 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11C60 */ B, B, B, B, B, B, B, B, B, B, B, B, B, O, O, O,
+
+ /* Marchen */
+
+ /* 11C70 */ O, O, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11C80 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11C90 */ O, O, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB,
+ /* 11CA0 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, O, SUB, SUB, SUB, SUB, SUB, SUB, SUB,
+ /* 11CB0 */ VBlw, VPre, VBlw, VAbv, VPst, VMAbv, VMAbv, O,
+
+#define use_offset_0x11d00u 5768
+
+
+ /* Masaram Gondi */
+
+ /* 11D00 */ B, B, B, B, B, B, B, O, B, B, O, B, B, B, B, B,
+ /* 11D10 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11D20 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11D30 */ B, VAbv, VAbv, VAbv, VAbv, VAbv, VBlw, O, O, O, VAbv, O, VAbv, VAbv, O, VAbv,
+ /* 11D40 */ VMAbv, VMAbv, CMBlw, VAbv, VBlw, H, R, MBlw, O, O, O, O, O, O, O, O,
+ /* 11D50 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
+
+ /* Gunjala Gondi */
+
+ /* 11D60 */ B, B, B, B, B, B, O, B, B, O, B, B, B, B, B, B,
+ /* 11D70 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11D80 */ B, B, B, B, B, B, B, B, B, B, VPst, VPst, VPst, VPst, VPst, O,
+ /* 11D90 */ VAbv, VAbv, O, VPst, VPst, VMAbv, VMPst, H, O, O, O, O, O, O, O, O,
+ /* 11DA0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
+
+#define use_offset_0x11ee0u 5944
+
+
+ /* Makasar */
+
+ /* 11EE0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
+ /* 11EF0 */ B, B, GB, VAbv, VBlw, VPre, VPst, O,
+
+}; /* Table items: 5968; occupancy: 74% */
+
+USE_TABLE_ELEMENT_TYPE
+hb_use_get_category (hb_codepoint_t u)
+{
+ switch (u >> 12)
+ {
+ case 0x0u:
+ if (hb_in_range<hb_codepoint_t> (u, 0x0028u, 0x003Fu)) return use_table[u - 0x0028u + use_offset_0x0028u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x00A0u, 0x00D7u)) return use_table[u - 0x00A0u + use_offset_0x00a0u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x0348u, 0x034Fu)) return use_table[u - 0x0348u + use_offset_0x0348u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x0900u, 0x0DF7u)) return use_table[u - 0x0900u + use_offset_0x0900u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x0F18u, 0x0FC7u)) return use_table[u - 0x0F18u + use_offset_0x0f18u];
+ break;
+
+ case 0x1u:
+ if (hb_in_range<hb_codepoint_t> (u, 0x1000u, 0x109Fu)) return use_table[u - 0x1000u + use_offset_0x1000u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x1700u, 0x17EFu)) return use_table[u - 0x1700u + use_offset_0x1700u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x1900u, 0x1A9Fu)) return use_table[u - 0x1900u + use_offset_0x1900u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x1B00u, 0x1C4Fu)) return use_table[u - 0x1B00u + use_offset_0x1b00u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x1CD0u, 0x1CFFu)) return use_table[u - 0x1CD0u + use_offset_0x1cd0u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x1DF8u, 0x1DFFu)) return use_table[u - 0x1DF8u + use_offset_0x1df8u];
+ break;
+
+ case 0x2u:
+ if (hb_in_range<hb_codepoint_t> (u, 0x2008u, 0x2017u)) return use_table[u - 0x2008u + use_offset_0x2008u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x2060u, 0x2087u)) return use_table[u - 0x2060u + use_offset_0x2060u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x20F0u, 0x20F7u)) return use_table[u - 0x20F0u + use_offset_0x20f0u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x25C8u, 0x25CFu)) return use_table[u - 0x25C8u + use_offset_0x25c8u];
+ break;
+
+ case 0xAu:
+ if (hb_in_range<hb_codepoint_t> (u, 0xA800u, 0xAAF7u)) return use_table[u - 0xA800u + use_offset_0xa800u];
+ if (hb_in_range<hb_codepoint_t> (u, 0xABC0u, 0xABFFu)) return use_table[u - 0xABC0u + use_offset_0xabc0u];
+ break;
+
+ case 0xFu:
+ if (hb_in_range<hb_codepoint_t> (u, 0xFE00u, 0xFE0Fu)) return use_table[u - 0xFE00u + use_offset_0xfe00u];
+ break;
+
+ case 0x10u:
+ if (hb_in_range<hb_codepoint_t> (u, 0x10A00u, 0x10A4Fu)) return use_table[u - 0x10A00u + use_offset_0x10a00u];
+ break;
+
+ case 0x11u:
+ if (hb_in_range<hb_codepoint_t> (u, 0x11000u, 0x110BFu)) return use_table[u - 0x11000u + use_offset_0x11000u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x11100u, 0x1123Fu)) return use_table[u - 0x11100u + use_offset_0x11100u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x11280u, 0x11377u)) return use_table[u - 0x11280u + use_offset_0x11280u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x11400u, 0x114DFu)) return use_table[u - 0x11400u + use_offset_0x11400u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x11580u, 0x1173Fu)) return use_table[u - 0x11580u + use_offset_0x11580u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x11800u, 0x1183Fu)) return use_table[u - 0x11800u + use_offset_0x11800u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x11900u, 0x1195Fu)) return use_table[u - 0x11900u + use_offset_0x11900u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x119A0u, 0x11A9Fu)) return use_table[u - 0x119A0u + use_offset_0x119a0u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x11C00u, 0x11CB7u)) return use_table[u - 0x11C00u + use_offset_0x11c00u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x11D00u, 0x11DAFu)) return use_table[u - 0x11D00u + use_offset_0x11d00u];
+ if (hb_in_range<hb_codepoint_t> (u, 0x11EE0u, 0x11EF7u)) return use_table[u - 0x11EE0u + use_offset_0x11ee0u];
+ break;
+
+ default:
+ break;
+ }
+ return USE_O;
+}
+
+#undef B
+#undef CGJ
+#undef CS
+#undef GB
+#undef H
+#undef HN
+#undef HVM
+#undef IND
+#undef N
+#undef O
+#undef R
+#undef Rsv
+#undef S
+#undef SUB
+#undef Sk
+#undef VS
+#undef WJ
+#undef ZWJ
+#undef ZWNJ
+#undef CMAbv
+#undef CMBlw
+#undef FAbv
+#undef FBlw
+#undef FPst
+#undef FMAbv
+#undef FMBlw
+#undef FMPst
+#undef MAbv
+#undef MBlw
+#undef MPst
+#undef MPre
+#undef SMAbv
+#undef SMBlw
+#undef VAbv
+#undef VBlw
+#undef VPst
+#undef VPre
+#undef VMAbv
+#undef VMBlw
+#undef VMPst
+#undef VMPre
+
+
+#endif
+/* == End of generated table == */
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-use.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-use.cc
new file mode 100644
index 0000000000..a1e25bdd80
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-use.cc
@@ -0,0 +1,569 @@
+/*
+ * Copyright © 2015 Mozilla Foundation.
+ * Copyright © 2015 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Mozilla Author(s): Jonathan Kew
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_OT_SHAPE
+
+#include "hb-ot-shape-complex-use.hh"
+#include "hb-ot-shape-complex-arabic.hh"
+#include "hb-ot-shape-complex-arabic-joining-list.hh"
+#include "hb-ot-shape-complex-vowel-constraints.hh"
+
+/* buffer var allocations */
+#define use_category() complex_var_u8_1()
+
+
+/*
+ * Universal Shaping Engine.
+ * https://docs.microsoft.com/en-us/typography/script-development/use
+ */
+
+static const hb_tag_t
+use_basic_features[] =
+{
+ /*
+ * Basic features.
+ * These features are applied all at once, before reordering.
+ */
+ HB_TAG('r','k','r','f'),
+ HB_TAG('a','b','v','f'),
+ HB_TAG('b','l','w','f'),
+ HB_TAG('h','a','l','f'),
+ HB_TAG('p','s','t','f'),
+ HB_TAG('v','a','t','u'),
+ HB_TAG('c','j','c','t'),
+};
+static const hb_tag_t
+use_topographical_features[] =
+{
+ HB_TAG('i','s','o','l'),
+ HB_TAG('i','n','i','t'),
+ HB_TAG('m','e','d','i'),
+ HB_TAG('f','i','n','a'),
+};
+/* Same order as use_topographical_features. */
+enum joining_form_t {
+ USE_ISOL,
+ USE_INIT,
+ USE_MEDI,
+ USE_FINA,
+ _USE_NONE
+};
+static const hb_tag_t
+use_other_features[] =
+{
+ /*
+ * Other features.
+ * These features are applied all at once, after reordering and
+ * clearing syllables.
+ */
+ HB_TAG('a','b','v','s'),
+ HB_TAG('b','l','w','s'),
+ HB_TAG('h','a','l','n'),
+ HB_TAG('p','r','e','s'),
+ HB_TAG('p','s','t','s'),
+};
+
+static void
+setup_syllables_use (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer);
+static void
+record_rphf_use (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer);
+static void
+record_pref_use (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer);
+static void
+reorder_use (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer);
+
+static void
+collect_features_use (hb_ot_shape_planner_t *plan)
+{
+ hb_ot_map_builder_t *map = &plan->map;
+
+ /* Do this before any lookups have been applied. */
+ map->add_gsub_pause (setup_syllables_use);
+
+ /* "Default glyph pre-processing group" */
+ map->enable_feature (HB_TAG('l','o','c','l'));
+ map->enable_feature (HB_TAG('c','c','m','p'));
+ map->enable_feature (HB_TAG('n','u','k','t'));
+ map->enable_feature (HB_TAG('a','k','h','n'), F_MANUAL_ZWJ);
+
+ /* "Reordering group" */
+ map->add_gsub_pause (_hb_clear_substitution_flags);
+ map->add_feature (HB_TAG('r','p','h','f'), F_MANUAL_ZWJ);
+ map->add_gsub_pause (record_rphf_use);
+ map->add_gsub_pause (_hb_clear_substitution_flags);
+ map->enable_feature (HB_TAG('p','r','e','f'), F_MANUAL_ZWJ);
+ map->add_gsub_pause (record_pref_use);
+
+ /* "Orthographic unit shaping group" */
+ for (unsigned int i = 0; i < ARRAY_LENGTH (use_basic_features); i++)
+ map->enable_feature (use_basic_features[i], F_MANUAL_ZWJ);
+
+ map->add_gsub_pause (reorder_use);
+ map->add_gsub_pause (_hb_clear_syllables);
+
+ /* "Topographical features" */
+ for (unsigned int i = 0; i < ARRAY_LENGTH (use_topographical_features); i++)
+ map->add_feature (use_topographical_features[i]);
+ map->add_gsub_pause (nullptr);
+
+ /* "Standard typographic presentation" */
+ for (unsigned int i = 0; i < ARRAY_LENGTH (use_other_features); i++)
+ map->enable_feature (use_other_features[i], F_MANUAL_ZWJ);
+}
+
+struct use_shape_plan_t
+{
+ hb_mask_t rphf_mask;
+
+ arabic_shape_plan_t *arabic_plan;
+};
+
+static void *
+data_create_use (const hb_ot_shape_plan_t *plan)
+{
+ use_shape_plan_t *use_plan = (use_shape_plan_t *) calloc (1, sizeof (use_shape_plan_t));
+ if (unlikely (!use_plan))
+ return nullptr;
+
+ use_plan->rphf_mask = plan->map.get_1_mask (HB_TAG('r','p','h','f'));
+
+ if (has_arabic_joining (plan->props.script))
+ {
+ use_plan->arabic_plan = (arabic_shape_plan_t *) data_create_arabic (plan);
+ if (unlikely (!use_plan->arabic_plan))
+ {
+ free (use_plan);
+ return nullptr;
+ }
+ }
+
+ return use_plan;
+}
+
+static void
+data_destroy_use (void *data)
+{
+ use_shape_plan_t *use_plan = (use_shape_plan_t *) data;
+
+ if (use_plan->arabic_plan)
+ data_destroy_arabic (use_plan->arabic_plan);
+
+ free (data);
+}
+
+enum use_syllable_type_t {
+ use_independent_cluster,
+ use_virama_terminated_cluster,
+ use_sakot_terminated_cluster,
+ use_standard_cluster,
+ use_number_joiner_terminated_cluster,
+ use_numeral_cluster,
+ use_symbol_cluster,
+ use_broken_cluster,
+ use_non_cluster,
+};
+
+#include "hb-ot-shape-complex-use-machine.hh"
+
+
+static void
+setup_masks_use (const hb_ot_shape_plan_t *plan,
+ hb_buffer_t *buffer,
+ hb_font_t *font HB_UNUSED)
+{
+ const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
+
+ /* Do this before allocating use_category(). */
+ if (use_plan->arabic_plan)
+ {
+ setup_masks_arabic_plan (use_plan->arabic_plan, buffer, plan->props.script);
+ }
+
+ HB_BUFFER_ALLOCATE_VAR (buffer, use_category);
+
+ /* We cannot setup masks here. We save information about characters
+ * and setup masks later on in a pause-callback. */
+
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ for (unsigned int i = 0; i < count; i++)
+ info[i].use_category() = hb_use_get_category (info[i].codepoint);
+}
+
+static void
+setup_rphf_mask (const hb_ot_shape_plan_t *plan,
+ hb_buffer_t *buffer)
+{
+ const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
+
+ hb_mask_t mask = use_plan->rphf_mask;
+ if (!mask) return;
+
+ hb_glyph_info_t *info = buffer->info;
+
+ foreach_syllable (buffer, start, end)
+ {
+ unsigned int limit = info[start].use_category() == USE_R ? 1 : hb_min (3u, end - start);
+ for (unsigned int i = start; i < start + limit; i++)
+ info[i].mask |= mask;
+ }
+}
+
+static void
+setup_topographical_masks (const hb_ot_shape_plan_t *plan,
+ hb_buffer_t *buffer)
+{
+ const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
+ if (use_plan->arabic_plan)
+ return;
+
+ static_assert ((USE_INIT < 4 && USE_ISOL < 4 && USE_MEDI < 4 && USE_FINA < 4), "");
+ hb_mask_t masks[4], all_masks = 0;
+ for (unsigned int i = 0; i < 4; i++)
+ {
+ masks[i] = plan->map.get_1_mask (use_topographical_features[i]);
+ if (masks[i] == plan->map.get_global_mask ())
+ masks[i] = 0;
+ all_masks |= masks[i];
+ }
+ if (!all_masks)
+ return;
+ hb_mask_t other_masks = ~all_masks;
+
+ unsigned int last_start = 0;
+ joining_form_t last_form = _USE_NONE;
+ hb_glyph_info_t *info = buffer->info;
+ foreach_syllable (buffer, start, end)
+ {
+ use_syllable_type_t syllable_type = (use_syllable_type_t) (info[start].syllable() & 0x0F);
+ switch (syllable_type)
+ {
+ case use_independent_cluster:
+ case use_symbol_cluster:
+ case use_non_cluster:
+ /* These don't join. Nothing to do. */
+ last_form = _USE_NONE;
+ break;
+
+ case use_virama_terminated_cluster:
+ case use_sakot_terminated_cluster:
+ case use_standard_cluster:
+ case use_number_joiner_terminated_cluster:
+ case use_numeral_cluster:
+ case use_broken_cluster:
+
+ bool join = last_form == USE_FINA || last_form == USE_ISOL;
+
+ if (join)
+ {
+ /* Fixup previous syllable's form. */
+ last_form = last_form == USE_FINA ? USE_MEDI : USE_INIT;
+ for (unsigned int i = last_start; i < start; i++)
+ info[i].mask = (info[i].mask & other_masks) | masks[last_form];
+ }
+
+ /* Form for this syllable. */
+ last_form = join ? USE_FINA : USE_ISOL;
+ for (unsigned int i = start; i < end; i++)
+ info[i].mask = (info[i].mask & other_masks) | masks[last_form];
+
+ break;
+ }
+
+ last_start = start;
+ }
+}
+
+static void
+setup_syllables_use (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font HB_UNUSED,
+ hb_buffer_t *buffer)
+{
+ find_syllables_use (buffer);
+ foreach_syllable (buffer, start, end)
+ buffer->unsafe_to_break (start, end);
+ setup_rphf_mask (plan, buffer);
+ setup_topographical_masks (plan, buffer);
+}
+
+static void
+record_rphf_use (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font HB_UNUSED,
+ hb_buffer_t *buffer)
+{
+ const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
+
+ hb_mask_t mask = use_plan->rphf_mask;
+ if (!mask) return;
+ hb_glyph_info_t *info = buffer->info;
+
+ foreach_syllable (buffer, start, end)
+ {
+ /* Mark a substituted repha as USE_R. */
+ for (unsigned int i = start; i < end && (info[i].mask & mask); i++)
+ if (_hb_glyph_info_substituted (&info[i]))
+ {
+ info[i].use_category() = USE_R;
+ break;
+ }
+ }
+}
+
+static void
+record_pref_use (const hb_ot_shape_plan_t *plan HB_UNUSED,
+ hb_font_t *font HB_UNUSED,
+ hb_buffer_t *buffer)
+{
+ hb_glyph_info_t *info = buffer->info;
+
+ foreach_syllable (buffer, start, end)
+ {
+ /* Mark a substituted pref as VPre, as they behave the same way. */
+ for (unsigned int i = start; i < end; i++)
+ if (_hb_glyph_info_substituted (&info[i]))
+ {
+ info[i].use_category() = USE_VPre;
+ break;
+ }
+ }
+}
+
+static inline bool
+is_halant_use (const hb_glyph_info_t &info)
+{
+ return (info.use_category() == USE_H || info.use_category() == USE_HVM) &&
+ !_hb_glyph_info_ligated (&info);
+}
+
+static void
+reorder_syllable_use (hb_buffer_t *buffer, unsigned int start, unsigned int end)
+{
+ use_syllable_type_t syllable_type = (use_syllable_type_t) (buffer->info[start].syllable() & 0x0F);
+ /* Only a few syllable types need reordering. */
+ if (unlikely (!(FLAG_UNSAFE (syllable_type) &
+ (FLAG (use_virama_terminated_cluster) |
+ FLAG (use_sakot_terminated_cluster) |
+ FLAG (use_standard_cluster) |
+ FLAG (use_broken_cluster) |
+ 0))))
+ return;
+
+ hb_glyph_info_t *info = buffer->info;
+
+#define POST_BASE_FLAGS64 (FLAG64 (USE_FM) | \
+ FLAG64 (USE_FAbv) | \
+ FLAG64 (USE_FBlw) | \
+ FLAG64 (USE_FPst) | \
+ FLAG64 (USE_MAbv) | \
+ FLAG64 (USE_MBlw) | \
+ FLAG64 (USE_MPst) | \
+ FLAG64 (USE_MPre) | \
+ FLAG64 (USE_VAbv) | \
+ FLAG64 (USE_VBlw) | \
+ FLAG64 (USE_VPst) | \
+ FLAG64 (USE_VPre) | \
+ FLAG64 (USE_VMAbv) | \
+ FLAG64 (USE_VMBlw) | \
+ FLAG64 (USE_VMPst) | \
+ FLAG64 (USE_VMPre))
+
+ /* Move things forward. */
+ if (info[start].use_category() == USE_R && end - start > 1)
+ {
+ /* Got a repha. Reorder it towards the end, but before the first post-base
+ * glyph. */
+ for (unsigned int i = start + 1; i < end; i++)
+ {
+ bool is_post_base_glyph = (FLAG64_UNSAFE (info[i].use_category()) & POST_BASE_FLAGS64) ||
+ is_halant_use (info[i]);
+ if (is_post_base_glyph || i == end - 1)
+ {
+ /* If we hit a post-base glyph, move before it; otherwise move to the
+ * end. Shift things in between backward. */
+
+ if (is_post_base_glyph)
+ i--;
+
+ buffer->merge_clusters (start, i + 1);
+ hb_glyph_info_t t = info[start];
+ memmove (&info[start], &info[start + 1], (i - start) * sizeof (info[0]));
+ info[i] = t;
+
+ break;
+ }
+ }
+ }
+
+ /* Move things back. */
+ unsigned int j = start;
+ for (unsigned int i = start; i < end; i++)
+ {
+ uint32_t flag = FLAG_UNSAFE (info[i].use_category());
+ if (is_halant_use (info[i]))
+ {
+ /* If we hit a halant, move after it; otherwise move to the beginning, and
+ * shift things in between forward. */
+ j = i + 1;
+ }
+ else if (((flag) & (FLAG (USE_VPre) | FLAG (USE_VMPre))) &&
+ /* Only move the first component of a MultipleSubst. */
+ 0 == _hb_glyph_info_get_lig_comp (&info[i]) &&
+ j < i)
+ {
+ buffer->merge_clusters (j, i + 1);
+ hb_glyph_info_t t = info[i];
+ memmove (&info[j + 1], &info[j], (i - j) * sizeof (info[0]));
+ info[j] = t;
+ }
+ }
+}
+
+static inline void
+insert_dotted_circles_use (const hb_ot_shape_plan_t *plan HB_UNUSED,
+ hb_font_t *font,
+ hb_buffer_t *buffer)
+{
+ if (unlikely (buffer->flags & HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE))
+ return;
+
+ /* Note: This loop is extra overhead, but should not be measurable.
+ * TODO Use a buffer scratch flag to remove the loop. */
+ bool has_broken_syllables = false;
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ for (unsigned int i = 0; i < count; i++)
+ if ((info[i].syllable() & 0x0F) == use_broken_cluster)
+ {
+ has_broken_syllables = true;
+ break;
+ }
+ if (likely (!has_broken_syllables))
+ return;
+
+ hb_glyph_info_t dottedcircle = {0};
+ if (!font->get_nominal_glyph (0x25CCu, &dottedcircle.codepoint))
+ return;
+ dottedcircle.use_category() = hb_use_get_category (0x25CC);
+
+ buffer->clear_output ();
+
+ buffer->idx = 0;
+ unsigned int last_syllable = 0;
+ while (buffer->idx < buffer->len && buffer->successful)
+ {
+ unsigned int syllable = buffer->cur().syllable();
+ use_syllable_type_t syllable_type = (use_syllable_type_t) (syllable & 0x0F);
+ if (unlikely (last_syllable != syllable && syllable_type == use_broken_cluster))
+ {
+ last_syllable = syllable;
+
+ hb_glyph_info_t ginfo = dottedcircle;
+ ginfo.cluster = buffer->cur().cluster;
+ ginfo.mask = buffer->cur().mask;
+ ginfo.syllable() = buffer->cur().syllable();
+
+ /* Insert dottedcircle after possible Repha. */
+ while (buffer->idx < buffer->len && buffer->successful &&
+ last_syllable == buffer->cur().syllable() &&
+ buffer->cur().use_category() == USE_R)
+ buffer->next_glyph ();
+
+ buffer->output_info (ginfo);
+ }
+ else
+ buffer->next_glyph ();
+ }
+ buffer->swap_buffers ();
+}
+
+static void
+reorder_use (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer)
+{
+ insert_dotted_circles_use (plan, font, buffer);
+
+ foreach_syllable (buffer, start, end)
+ reorder_syllable_use (buffer, start, end);
+
+ HB_BUFFER_DEALLOCATE_VAR (buffer, use_category);
+}
+
+
+static void
+preprocess_text_use (const hb_ot_shape_plan_t *plan,
+ hb_buffer_t *buffer,
+ hb_font_t *font)
+{
+ _hb_preprocess_text_vowel_constraints (plan, buffer, font);
+}
+
+static bool
+compose_use (const hb_ot_shape_normalize_context_t *c,
+ hb_codepoint_t a,
+ hb_codepoint_t b,
+ hb_codepoint_t *ab)
+{
+ /* Avoid recomposing split matras. */
+ if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))
+ return false;
+
+ return (bool)c->unicode->compose (a, b, ab);
+}
+
+
+const hb_ot_complex_shaper_t _hb_ot_complex_shaper_use =
+{
+ collect_features_use,
+ nullptr, /* override_features */
+ data_create_use,
+ data_destroy_use,
+ preprocess_text_use,
+ nullptr, /* postprocess_glyphs */
+ HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
+ nullptr, /* decompose */
+ compose_use,
+ setup_masks_use,
+ HB_TAG_NONE, /* gpos_tag */
+ nullptr, /* reorder_marks */
+ HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
+ false, /* fallback_position */
+};
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-use.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-use.hh
new file mode 100644
index 0000000000..ce6645ecd3
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-use.hh
@@ -0,0 +1,105 @@
+/*
+ * Copyright © 2015 Mozilla Foundation.
+ * Copyright © 2015 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Mozilla Author(s): Jonathan Kew
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_SHAPE_COMPLEX_USE_HH
+#define HB_OT_SHAPE_COMPLEX_USE_HH
+
+#include "hb.hh"
+
+
+#include "hb-ot-shape-complex.hh"
+
+
+#define USE_TABLE_ELEMENT_TYPE uint8_t
+
+/* Cateories used in the Universal Shaping Engine spec:
+ * https://docs.microsoft.com/en-us/typography/script-development/use
+ */
+/* Note: This enum is duplicated in the -machine.rl source file.
+ * Not sure how to avoid duplication. */
+enum use_category_t {
+ USE_O = 0, /* OTHER */
+
+ USE_B = 1, /* BASE */
+ USE_IND = 3, /* BASE_IND */
+ USE_N = 4, /* BASE_NUM */
+ USE_GB = 5, /* BASE_OTHER */
+ USE_CGJ = 6, /* CGJ */
+// USE_F = 7, /* CONS_FINAL */
+ USE_FM = 8, /* CONS_FINAL_MOD */
+// USE_M = 9, /* CONS_MED */
+// USE_CM = 10, /* CONS_MOD */
+ USE_SUB = 11, /* CONS_SUB */
+ USE_H = 12, /* HALANT */
+
+ USE_HN = 13, /* HALANT_NUM */
+ USE_ZWNJ = 14, /* Zero width non-joiner */
+ USE_ZWJ = 15, /* Zero width joiner */
+ USE_WJ = 16, /* Word joiner */
+ USE_Rsv = 17, /* Reserved characters */
+ USE_R = 18, /* REPHA */
+ USE_S = 19, /* SYM */
+// USE_SM = 20, /* SYM_MOD */
+ USE_VS = 21, /* VARIATION_SELECTOR */
+// USE_V = 36, /* VOWEL */
+// USE_VM = 40, /* VOWEL_MOD */
+ USE_CS = 43, /* CONS_WITH_STACKER */
+
+ /* https://github.com/harfbuzz/harfbuzz/issues/1102 */
+ USE_HVM = 44, /* HALANT_OR_VOWEL_MODIFIER */
+
+ USE_Sk = 48, /* SAKOT */
+
+ USE_FAbv = 24, /* CONS_FINAL_ABOVE */
+ USE_FBlw = 25, /* CONS_FINAL_BELOW */
+ USE_FPst = 26, /* CONS_FINAL_POST */
+ USE_MAbv = 27, /* CONS_MED_ABOVE */
+ USE_MBlw = 28, /* CONS_MED_BELOW */
+ USE_MPst = 29, /* CONS_MED_POST */
+ USE_MPre = 30, /* CONS_MED_PRE */
+ USE_CMAbv = 31, /* CONS_MOD_ABOVE */
+ USE_CMBlw = 32, /* CONS_MOD_BELOW */
+ USE_VAbv = 33, /* VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST */
+ USE_VBlw = 34, /* VOWEL_BELOW / VOWEL_BELOW_POST */
+ USE_VPst = 35, /* VOWEL_POST UIPC = Right */
+ USE_VPre = 22, /* VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST */
+ USE_VMAbv = 37, /* VOWEL_MOD_ABOVE */
+ USE_VMBlw = 38, /* VOWEL_MOD_BELOW */
+ USE_VMPst = 39, /* VOWEL_MOD_POST */
+ USE_VMPre = 23, /* VOWEL_MOD_PRE */
+ USE_SMAbv = 41, /* SYM_MOD_ABOVE */
+ USE_SMBlw = 42, /* SYM_MOD_BELOW */
+ USE_FMAbv = 45, /* CONS_FINAL_MOD UIPC = Top */
+ USE_FMBlw = 46, /* CONS_FINAL_MOD UIPC = Bottom */
+ USE_FMPst = 47, /* CONS_FINAL_MOD UIPC = Not_Applicable */
+};
+
+HB_INTERNAL USE_TABLE_ELEMENT_TYPE
+hb_use_get_category (hb_codepoint_t u);
+
+#endif /* HB_OT_SHAPE_COMPLEX_USE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-vowel-constraints.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-vowel-constraints.cc
new file mode 100644
index 0000000000..c3368c6ec2
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-vowel-constraints.cc
@@ -0,0 +1,464 @@
+/* == Start of generated functions == */
+/*
+ * The following functions are generated by running:
+ *
+ * ./gen-vowel-constraints.py ms-use/IndicShapingInvalidCluster.txt Scripts.txt
+ *
+ * on files with these headers:
+ *
+ * # IndicShapingInvalidCluster.txt
+ * # Date: 2015-03-12, 21:17:00 GMT [AG]
+ * # Date: 2019-11-08, 23:22:00 GMT [AG]
+ *
+ * # Scripts-13.0.0.txt
+ * # Date: 2020-01-22, 00:07:43 GMT
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_OT_SHAPE
+
+#include "hb-ot-shape-complex-vowel-constraints.hh"
+
+static void
+_output_dotted_circle (hb_buffer_t *buffer)
+{
+ hb_glyph_info_t &dottedcircle = buffer->output_glyph (0x25CCu);
+ _hb_glyph_info_reset_continuation (&dottedcircle);
+}
+
+static void
+_output_with_dotted_circle (hb_buffer_t *buffer)
+{
+ _output_dotted_circle (buffer);
+ buffer->next_glyph ();
+}
+
+void
+_hb_preprocess_text_vowel_constraints (const hb_ot_shape_plan_t *plan HB_UNUSED,
+ hb_buffer_t *buffer,
+ hb_font_t *font HB_UNUSED)
+{
+#ifdef HB_NO_OT_SHAPE_COMPLEX_VOWEL_CONSTRAINTS
+ return;
+#endif
+ if (buffer->flags & HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE)
+ return;
+
+ /* UGLY UGLY UGLY business of adding dotted-circle in the middle of
+ * vowel-sequences that look like another vowel. Data for each script
+ * collected from the USE script development spec.
+ *
+ * https://github.com/harfbuzz/harfbuzz/issues/1019
+ */
+ bool processed = false;
+ buffer->clear_output ();
+ unsigned int count = buffer->len;
+ switch ((unsigned) buffer->props.script)
+ {
+ case HB_SCRIPT_DEVANAGARI:
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x0905u:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x093Au: case 0x093Bu: case 0x093Eu: case 0x0945u:
+ case 0x0946u: case 0x0949u: case 0x094Au: case 0x094Bu:
+ case 0x094Cu: case 0x094Fu: case 0x0956u: case 0x0957u:
+ matched = true;
+ break;
+ }
+ break;
+ case 0x0906u:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x093Au: case 0x0945u: case 0x0946u: case 0x0947u:
+ case 0x0948u:
+ matched = true;
+ break;
+ }
+ break;
+ case 0x0909u:
+ matched = 0x0941u == buffer->cur (1).codepoint;
+ break;
+ case 0x090Fu:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x0945u: case 0x0946u: case 0x0947u:
+ matched = true;
+ break;
+ }
+ break;
+ case 0x0930u:
+ if (0x094Du == buffer->cur (1).codepoint &&
+ buffer->idx + 2 < count &&
+ 0x0907u == buffer->cur (2).codepoint)
+ {
+ buffer->next_glyph ();
+ matched = true;
+ }
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+
+ case HB_SCRIPT_BENGALI:
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x0985u:
+ matched = 0x09BEu == buffer->cur (1).codepoint;
+ break;
+ case 0x098Bu:
+ matched = 0x09C3u == buffer->cur (1).codepoint;
+ break;
+ case 0x098Cu:
+ matched = 0x09E2u == buffer->cur (1).codepoint;
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+
+ case HB_SCRIPT_GURMUKHI:
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x0A05u:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x0A3Eu: case 0x0A48u: case 0x0A4Cu:
+ matched = true;
+ break;
+ }
+ break;
+ case 0x0A72u:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x0A3Fu: case 0x0A40u: case 0x0A47u:
+ matched = true;
+ break;
+ }
+ break;
+ case 0x0A73u:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x0A41u: case 0x0A42u: case 0x0A4Bu:
+ matched = true;
+ break;
+ }
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+
+ case HB_SCRIPT_GUJARATI:
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x0A85u:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x0ABEu: case 0x0AC5u: case 0x0AC7u: case 0x0AC8u:
+ case 0x0AC9u: case 0x0ACBu: case 0x0ACCu:
+ matched = true;
+ break;
+ }
+ break;
+ case 0x0AC5u:
+ matched = 0x0ABEu == buffer->cur (1).codepoint;
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+
+ case HB_SCRIPT_ORIYA:
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x0B05u:
+ matched = 0x0B3Eu == buffer->cur (1).codepoint;
+ break;
+ case 0x0B0Fu: case 0x0B13u:
+ matched = 0x0B57u == buffer->cur (1).codepoint;
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+
+ case HB_SCRIPT_TAMIL:
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ if (0x0B85u == buffer->cur ().codepoint &&
+ 0x0BC2u == buffer->cur (1).codepoint)
+ {
+ matched = true;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+
+ case HB_SCRIPT_TELUGU:
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x0C12u:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x0C4Cu: case 0x0C55u:
+ matched = true;
+ break;
+ }
+ break;
+ case 0x0C3Fu: case 0x0C46u: case 0x0C4Au:
+ matched = 0x0C55u == buffer->cur (1).codepoint;
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+
+ case HB_SCRIPT_KANNADA:
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x0C89u: case 0x0C8Bu:
+ matched = 0x0CBEu == buffer->cur (1).codepoint;
+ break;
+ case 0x0C92u:
+ matched = 0x0CCCu == buffer->cur (1).codepoint;
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+
+ case HB_SCRIPT_MALAYALAM:
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x0D07u: case 0x0D09u:
+ matched = 0x0D57u == buffer->cur (1).codepoint;
+ break;
+ case 0x0D0Eu:
+ matched = 0x0D46u == buffer->cur (1).codepoint;
+ break;
+ case 0x0D12u:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x0D3Eu: case 0x0D57u:
+ matched = true;
+ break;
+ }
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+
+ case HB_SCRIPT_SINHALA:
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x0D85u:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x0DCFu: case 0x0DD0u: case 0x0DD1u:
+ matched = true;
+ break;
+ }
+ break;
+ case 0x0D8Bu: case 0x0D8Fu: case 0x0D94u:
+ matched = 0x0DDFu == buffer->cur (1).codepoint;
+ break;
+ case 0x0D8Du:
+ matched = 0x0DD8u == buffer->cur (1).codepoint;
+ break;
+ case 0x0D91u:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x0DCAu: case 0x0DD9u: case 0x0DDAu: case 0x0DDCu:
+ case 0x0DDDu:
+ matched = true;
+ break;
+ }
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+
+ case HB_SCRIPT_BRAHMI:
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x11005u:
+ matched = 0x11038u == buffer->cur (1).codepoint;
+ break;
+ case 0x1100Bu:
+ matched = 0x1103Eu == buffer->cur (1).codepoint;
+ break;
+ case 0x1100Fu:
+ matched = 0x11042u == buffer->cur (1).codepoint;
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+
+ case HB_SCRIPT_KHUDAWADI:
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x112B0u:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x112E0u: case 0x112E5u: case 0x112E6u: case 0x112E7u:
+ case 0x112E8u:
+ matched = true;
+ break;
+ }
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+
+ case HB_SCRIPT_TIRHUTA:
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x11481u:
+ matched = 0x114B0u == buffer->cur (1).codepoint;
+ break;
+ case 0x1148Bu: case 0x1148Du:
+ matched = 0x114BAu == buffer->cur (1).codepoint;
+ break;
+ case 0x114AAu:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x114B5u: case 0x114B6u:
+ matched = true;
+ break;
+ }
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+
+ case HB_SCRIPT_MODI:
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x11600u: case 0x11601u:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x11639u: case 0x1163Au:
+ matched = true;
+ break;
+ }
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+
+ case HB_SCRIPT_TAKRI:
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x11680u:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x116ADu: case 0x116B4u: case 0x116B5u:
+ matched = true;
+ break;
+ }
+ break;
+ case 0x11686u:
+ matched = 0x116B2u == buffer->cur (1).codepoint;
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+
+ default:
+ break;
+ }
+ if (processed)
+ {
+ if (buffer->idx < count)
+ buffer->next_glyph ();
+ buffer->swap_buffers ();
+ }
+}
+
+
+#endif
+/* == End of generated functions == */
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-vowel-constraints.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-vowel-constraints.hh
new file mode 100644
index 0000000000..d9082d4ead
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-vowel-constraints.hh
@@ -0,0 +1,39 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_SHAPE_COMPLEX_VOWEL_CONSTRAINTS_HH
+#define HB_OT_SHAPE_COMPLEX_VOWEL_CONSTRAINTS_HH
+
+#include "hb.hh"
+
+#include "hb-ot-shape-complex.hh"
+
+HB_INTERNAL void
+_hb_preprocess_text_vowel_constraints (const hb_ot_shape_plan_t *plan,
+ hb_buffer_t *buffer,
+ hb_font_t *font);
+
+#endif /* HB_OT_SHAPE_COMPLEX_VOWEL_CONSTRAINTS_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex.hh
new file mode 100644
index 0000000000..61f4c0e158
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex.hh
@@ -0,0 +1,402 @@
+/*
+ * Copyright © 2010,2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_SHAPE_COMPLEX_HH
+#define HB_OT_SHAPE_COMPLEX_HH
+
+#include "hb.hh"
+
+#include "hb-ot-layout.hh"
+#include "hb-ot-shape.hh"
+#include "hb-ot-shape-normalize.hh"
+
+
+/* buffer var allocations, used by complex shapers */
+#define complex_var_u8_0() var2.u8[2]
+#define complex_var_u8_1() var2.u8[3]
+
+
+#define HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS 32
+
+enum hb_ot_shape_zero_width_marks_type_t {
+ HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
+ HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
+ HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE
+};
+
+
+/* Master OT shaper list */
+#define HB_COMPLEX_SHAPERS_IMPLEMENT_SHAPERS \
+ HB_COMPLEX_SHAPER_IMPLEMENT (arabic) \
+ HB_COMPLEX_SHAPER_IMPLEMENT (default) \
+ HB_COMPLEX_SHAPER_IMPLEMENT (dumber) \
+ HB_COMPLEX_SHAPER_IMPLEMENT (hangul) \
+ HB_COMPLEX_SHAPER_IMPLEMENT (hebrew) \
+ HB_COMPLEX_SHAPER_IMPLEMENT (indic) \
+ HB_COMPLEX_SHAPER_IMPLEMENT (khmer) \
+ HB_COMPLEX_SHAPER_IMPLEMENT (myanmar) \
+ HB_COMPLEX_SHAPER_IMPLEMENT (myanmar_zawgyi) \
+ HB_COMPLEX_SHAPER_IMPLEMENT (thai) \
+ HB_COMPLEX_SHAPER_IMPLEMENT (use) \
+ /* ^--- Add new shapers here; keep sorted. */
+
+
+struct hb_ot_complex_shaper_t
+{
+ /* collect_features()
+ * Called during shape_plan().
+ * Shapers should use plan->map to add their features and callbacks.
+ * May be NULL.
+ */
+ void (*collect_features) (hb_ot_shape_planner_t *plan);
+
+ /* override_features()
+ * Called during shape_plan().
+ * Shapers should use plan->map to override features and add callbacks after
+ * common features are added.
+ * May be NULL.
+ */
+ void (*override_features) (hb_ot_shape_planner_t *plan);
+
+
+ /* data_create()
+ * Called at the end of shape_plan().
+ * Whatever shapers return will be accessible through plan->data later.
+ * If nullptr is returned, means a plan failure.
+ */
+ void *(*data_create) (const hb_ot_shape_plan_t *plan);
+
+ /* data_destroy()
+ * Called when the shape_plan is being destroyed.
+ * plan->data is passed here for destruction.
+ * If nullptr is returned, means a plan failure.
+ * May be NULL.
+ */
+ void (*data_destroy) (void *data);
+
+
+ /* preprocess_text()
+ * Called during shape().
+ * Shapers can use to modify text before shaping starts.
+ * May be NULL.
+ */
+ void (*preprocess_text) (const hb_ot_shape_plan_t *plan,
+ hb_buffer_t *buffer,
+ hb_font_t *font);
+
+ /* postprocess_glyphs()
+ * Called during shape().
+ * Shapers can use to modify glyphs after shaping ends.
+ * May be NULL.
+ */
+ void (*postprocess_glyphs) (const hb_ot_shape_plan_t *plan,
+ hb_buffer_t *buffer,
+ hb_font_t *font);
+
+
+ hb_ot_shape_normalization_mode_t normalization_preference;
+
+ /* decompose()
+ * Called during shape()'s normalization.
+ * May be NULL.
+ */
+ bool (*decompose) (const hb_ot_shape_normalize_context_t *c,
+ hb_codepoint_t ab,
+ hb_codepoint_t *a,
+ hb_codepoint_t *b);
+
+ /* compose()
+ * Called during shape()'s normalization.
+ * May be NULL.
+ */
+ bool (*compose) (const hb_ot_shape_normalize_context_t *c,
+ hb_codepoint_t a,
+ hb_codepoint_t b,
+ hb_codepoint_t *ab);
+
+ /* setup_masks()
+ * Called during shape().
+ * Shapers should use map to get feature masks and set on buffer.
+ * Shapers may NOT modify characters.
+ * May be NULL.
+ */
+ void (*setup_masks) (const hb_ot_shape_plan_t *plan,
+ hb_buffer_t *buffer,
+ hb_font_t *font);
+
+ /* gpos_tag()
+ * If not HB_TAG_NONE, then must match found GPOS script tag for
+ * GPOS to be applied. Otherwise, fallback positioning will be used.
+ */
+ hb_tag_t gpos_tag;
+
+ /* reorder_marks()
+ * Called during shape().
+ * Shapers can use to modify ordering of combining marks.
+ * May be NULL.
+ */
+ void (*reorder_marks) (const hb_ot_shape_plan_t *plan,
+ hb_buffer_t *buffer,
+ unsigned int start,
+ unsigned int end);
+
+ hb_ot_shape_zero_width_marks_type_t zero_width_marks;
+
+ bool fallback_position;
+};
+
+#define HB_COMPLEX_SHAPER_IMPLEMENT(name) extern HB_INTERNAL const hb_ot_complex_shaper_t _hb_ot_complex_shaper_##name;
+HB_COMPLEX_SHAPERS_IMPLEMENT_SHAPERS
+#undef HB_COMPLEX_SHAPER_IMPLEMENT
+
+
+static inline const hb_ot_complex_shaper_t *
+hb_ot_shape_complex_categorize (const hb_ot_shape_planner_t *planner)
+{
+ switch ((hb_tag_t) planner->props.script)
+ {
+ default:
+ return &_hb_ot_complex_shaper_default;
+
+
+ /* Unicode-1.1 additions */
+ case HB_SCRIPT_ARABIC:
+
+ /* Unicode-3.0 additions */
+ case HB_SCRIPT_MONGOLIAN:
+ case HB_SCRIPT_SYRIAC:
+
+ /* Unicode-5.0 additions */
+ case HB_SCRIPT_NKO:
+ case HB_SCRIPT_PHAGS_PA:
+
+ /* Unicode-6.0 additions */
+ case HB_SCRIPT_MANDAIC:
+
+ /* Unicode-7.0 additions */
+ case HB_SCRIPT_MANICHAEAN:
+ case HB_SCRIPT_PSALTER_PAHLAVI:
+
+ /* Unicode-9.0 additions */
+ case HB_SCRIPT_ADLAM:
+
+ /* Unicode-11.0 additions */
+ case HB_SCRIPT_HANIFI_ROHINGYA:
+ case HB_SCRIPT_SOGDIAN:
+
+ /* For Arabic script, use the Arabic shaper even if no OT script tag was found.
+ * This is because we do fallback shaping for Arabic script (and not others).
+ * But note that Arabic shaping is applicable only to horizontal layout; for
+ * vertical text, just use the generic shaper instead. */
+ if ((planner->map.chosen_script[0] != HB_OT_TAG_DEFAULT_SCRIPT ||
+ planner->props.script == HB_SCRIPT_ARABIC) &&
+ HB_DIRECTION_IS_HORIZONTAL(planner->props.direction))
+ return &_hb_ot_complex_shaper_arabic;
+ else
+ return &_hb_ot_complex_shaper_default;
+
+
+ /* Unicode-1.1 additions */
+ case HB_SCRIPT_THAI:
+ case HB_SCRIPT_LAO:
+
+ return &_hb_ot_complex_shaper_thai;
+
+
+ /* Unicode-1.1 additions */
+ case HB_SCRIPT_HANGUL:
+
+ return &_hb_ot_complex_shaper_hangul;
+
+
+ /* Unicode-1.1 additions */
+ case HB_SCRIPT_HEBREW:
+
+ return &_hb_ot_complex_shaper_hebrew;
+
+
+ /* Unicode-1.1 additions */
+ case HB_SCRIPT_BENGALI:
+ case HB_SCRIPT_DEVANAGARI:
+ case HB_SCRIPT_GUJARATI:
+ case HB_SCRIPT_GURMUKHI:
+ case HB_SCRIPT_KANNADA:
+ case HB_SCRIPT_MALAYALAM:
+ case HB_SCRIPT_ORIYA:
+ case HB_SCRIPT_TAMIL:
+ case HB_SCRIPT_TELUGU:
+
+ /* Unicode-3.0 additions */
+ case HB_SCRIPT_SINHALA:
+
+ /* If the designer designed the font for the 'DFLT' script,
+ * (or we ended up arbitrarily pick 'latn'), use the default shaper.
+ * Otherwise, use the specific shaper.
+ *
+ * If it's indy3 tag, send to USE. */
+ if (planner->map.chosen_script[0] == HB_TAG ('D','F','L','T') ||
+ planner->map.chosen_script[0] == HB_TAG ('l','a','t','n'))
+ return &_hb_ot_complex_shaper_default;
+ else if ((planner->map.chosen_script[0] & 0x000000FF) == '3')
+ return &_hb_ot_complex_shaper_use;
+ else
+ return &_hb_ot_complex_shaper_indic;
+
+ case HB_SCRIPT_KHMER:
+ return &_hb_ot_complex_shaper_khmer;
+
+ case HB_SCRIPT_MYANMAR:
+ /* If the designer designed the font for the 'DFLT' script,
+ * (or we ended up arbitrarily pick 'latn'), use the default shaper.
+ * Otherwise, use the specific shaper.
+ *
+ * If designer designed for 'mymr' tag, also send to default
+ * shaper. That's tag used from before Myanmar shaping spec
+ * was developed. The shaping spec uses 'mym2' tag. */
+ if (planner->map.chosen_script[0] == HB_TAG ('D','F','L','T') ||
+ planner->map.chosen_script[0] == HB_TAG ('l','a','t','n') ||
+ planner->map.chosen_script[0] == HB_TAG ('m','y','m','r'))
+ return &_hb_ot_complex_shaper_default;
+ else
+ return &_hb_ot_complex_shaper_myanmar;
+
+
+ /* https://github.com/harfbuzz/harfbuzz/issues/1162 */
+ case HB_SCRIPT_MYANMAR_ZAWGYI:
+
+ return &_hb_ot_complex_shaper_myanmar_zawgyi;
+
+
+ /* Unicode-2.0 additions */
+ case HB_SCRIPT_TIBETAN:
+
+ /* Unicode-3.0 additions */
+ //case HB_SCRIPT_MONGOLIAN:
+ //case HB_SCRIPT_SINHALA:
+
+ /* Unicode-3.2 additions */
+ case HB_SCRIPT_BUHID:
+ case HB_SCRIPT_HANUNOO:
+ case HB_SCRIPT_TAGALOG:
+ case HB_SCRIPT_TAGBANWA:
+
+ /* Unicode-4.0 additions */
+ case HB_SCRIPT_LIMBU:
+ case HB_SCRIPT_TAI_LE:
+
+ /* Unicode-4.1 additions */
+ case HB_SCRIPT_BUGINESE:
+ case HB_SCRIPT_KHAROSHTHI:
+ case HB_SCRIPT_SYLOTI_NAGRI:
+ case HB_SCRIPT_TIFINAGH:
+
+ /* Unicode-5.0 additions */
+ case HB_SCRIPT_BALINESE:
+ //case HB_SCRIPT_NKO:
+ //case HB_SCRIPT_PHAGS_PA:
+
+ /* Unicode-5.1 additions */
+ case HB_SCRIPT_CHAM:
+ case HB_SCRIPT_KAYAH_LI:
+ case HB_SCRIPT_LEPCHA:
+ case HB_SCRIPT_REJANG:
+ case HB_SCRIPT_SAURASHTRA:
+ case HB_SCRIPT_SUNDANESE:
+
+ /* Unicode-5.2 additions */
+ case HB_SCRIPT_EGYPTIAN_HIEROGLYPHS:
+ case HB_SCRIPT_JAVANESE:
+ case HB_SCRIPT_KAITHI:
+ case HB_SCRIPT_MEETEI_MAYEK:
+ case HB_SCRIPT_TAI_THAM:
+ case HB_SCRIPT_TAI_VIET:
+
+ /* Unicode-6.0 additions */
+ case HB_SCRIPT_BATAK:
+ case HB_SCRIPT_BRAHMI:
+ //case HB_SCRIPT_MANDAIC:
+
+ /* Unicode-6.1 additions */
+ case HB_SCRIPT_CHAKMA:
+ case HB_SCRIPT_SHARADA:
+ case HB_SCRIPT_TAKRI:
+
+ /* Unicode-7.0 additions */
+ case HB_SCRIPT_DUPLOYAN:
+ case HB_SCRIPT_GRANTHA:
+ case HB_SCRIPT_KHOJKI:
+ case HB_SCRIPT_KHUDAWADI:
+ case HB_SCRIPT_MAHAJANI:
+ //case HB_SCRIPT_MANICHAEAN:
+ case HB_SCRIPT_MODI:
+ case HB_SCRIPT_PAHAWH_HMONG:
+ //case HB_SCRIPT_PSALTER_PAHLAVI:
+ case HB_SCRIPT_SIDDHAM:
+ case HB_SCRIPT_TIRHUTA:
+
+ /* Unicode-8.0 additions */
+ case HB_SCRIPT_AHOM:
+
+ /* Unicode-9.0 additions */
+ //case HB_SCRIPT_ADLAM:
+ case HB_SCRIPT_BHAIKSUKI:
+ case HB_SCRIPT_MARCHEN:
+ case HB_SCRIPT_NEWA:
+
+ /* Unicode-10.0 additions */
+ case HB_SCRIPT_MASARAM_GONDI:
+ case HB_SCRIPT_SOYOMBO:
+ case HB_SCRIPT_ZANABAZAR_SQUARE:
+
+ /* Unicode-11.0 additions */
+ case HB_SCRIPT_DOGRA:
+ case HB_SCRIPT_GUNJALA_GONDI:
+ //case HB_SCRIPT_HANIFI_ROHINGYA:
+ case HB_SCRIPT_MAKASAR:
+ //case HB_SCRIPT_SOGDIAN:
+
+ /* Unicode-12.0 additions */
+ case HB_SCRIPT_NANDINAGARI:
+
+ /* Unicode-13.0 additions */
+ case HB_SCRIPT_CHORASMIAN:
+ case HB_SCRIPT_DIVES_AKURU:
+
+ /* If the designer designed the font for the 'DFLT' script,
+ * (or we ended up arbitrarily pick 'latn'), use the default shaper.
+ * Otherwise, use the specific shaper.
+ * Note that for some simple scripts, there may not be *any*
+ * GSUB/GPOS needed, so there may be no scripts found! */
+ if (planner->map.chosen_script[0] == HB_TAG ('D','F','L','T') ||
+ planner->map.chosen_script[0] == HB_TAG ('l','a','t','n'))
+ return &_hb_ot_complex_shaper_default;
+ else
+ return &_hb_ot_complex_shaper_use;
+ }
+}
+
+
+#endif /* HB_OT_SHAPE_COMPLEX_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-fallback.cc b/thirdparty/harfbuzz/src/hb-ot-shape-fallback.cc
new file mode 100644
index 0000000000..42bf524d16
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-fallback.cc
@@ -0,0 +1,596 @@
+/*
+ * Copyright © 2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_OT_SHAPE
+
+#include "hb-ot-shape-fallback.hh"
+#include "hb-kern.hh"
+
+static unsigned int
+recategorize_combining_class (hb_codepoint_t u,
+ unsigned int klass)
+{
+ if (klass >= 200)
+ return klass;
+
+ /* Thai / Lao need some per-character work. */
+ if ((u & ~0xFF) == 0x0E00u)
+ {
+ if (unlikely (klass == 0))
+ {
+ switch (u)
+ {
+ case 0x0E31u:
+ case 0x0E34u:
+ case 0x0E35u:
+ case 0x0E36u:
+ case 0x0E37u:
+ case 0x0E47u:
+ case 0x0E4Cu:
+ case 0x0E4Du:
+ case 0x0E4Eu:
+ klass = HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT;
+ break;
+
+ case 0x0EB1u:
+ case 0x0EB4u:
+ case 0x0EB5u:
+ case 0x0EB6u:
+ case 0x0EB7u:
+ case 0x0EBBu:
+ case 0x0ECCu:
+ case 0x0ECDu:
+ klass = HB_UNICODE_COMBINING_CLASS_ABOVE;
+ break;
+
+ case 0x0EBCu:
+ klass = HB_UNICODE_COMBINING_CLASS_BELOW;
+ break;
+ }
+ } else {
+ /* Thai virama is below-right */
+ if (u == 0x0E3Au)
+ klass = HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT;
+ }
+ }
+
+ switch (klass)
+ {
+
+ /* Hebrew */
+
+ case HB_MODIFIED_COMBINING_CLASS_CCC10: /* sheva */
+ case HB_MODIFIED_COMBINING_CLASS_CCC11: /* hataf segol */
+ case HB_MODIFIED_COMBINING_CLASS_CCC12: /* hataf patah */
+ case HB_MODIFIED_COMBINING_CLASS_CCC13: /* hataf qamats */
+ case HB_MODIFIED_COMBINING_CLASS_CCC14: /* hiriq */
+ case HB_MODIFIED_COMBINING_CLASS_CCC15: /* tsere */
+ case HB_MODIFIED_COMBINING_CLASS_CCC16: /* segol */
+ case HB_MODIFIED_COMBINING_CLASS_CCC17: /* patah */
+ case HB_MODIFIED_COMBINING_CLASS_CCC18: /* qamats */
+ case HB_MODIFIED_COMBINING_CLASS_CCC20: /* qubuts */
+ case HB_MODIFIED_COMBINING_CLASS_CCC22: /* meteg */
+ return HB_UNICODE_COMBINING_CLASS_BELOW;
+
+ case HB_MODIFIED_COMBINING_CLASS_CCC23: /* rafe */
+ return HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE;
+
+ case HB_MODIFIED_COMBINING_CLASS_CCC24: /* shin dot */
+ return HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT;
+
+ case HB_MODIFIED_COMBINING_CLASS_CCC25: /* sin dot */
+ case HB_MODIFIED_COMBINING_CLASS_CCC19: /* holam */
+ return HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT;
+
+ case HB_MODIFIED_COMBINING_CLASS_CCC26: /* point varika */
+ return HB_UNICODE_COMBINING_CLASS_ABOVE;
+
+ case HB_MODIFIED_COMBINING_CLASS_CCC21: /* dagesh */
+ break;
+
+
+ /* Arabic and Syriac */
+
+ case HB_MODIFIED_COMBINING_CLASS_CCC27: /* fathatan */
+ case HB_MODIFIED_COMBINING_CLASS_CCC28: /* dammatan */
+ case HB_MODIFIED_COMBINING_CLASS_CCC30: /* fatha */
+ case HB_MODIFIED_COMBINING_CLASS_CCC31: /* damma */
+ case HB_MODIFIED_COMBINING_CLASS_CCC33: /* shadda */
+ case HB_MODIFIED_COMBINING_CLASS_CCC34: /* sukun */
+ case HB_MODIFIED_COMBINING_CLASS_CCC35: /* superscript alef */
+ case HB_MODIFIED_COMBINING_CLASS_CCC36: /* superscript alaph */
+ return HB_UNICODE_COMBINING_CLASS_ABOVE;
+
+ case HB_MODIFIED_COMBINING_CLASS_CCC29: /* kasratan */
+ case HB_MODIFIED_COMBINING_CLASS_CCC32: /* kasra */
+ return HB_UNICODE_COMBINING_CLASS_BELOW;
+
+
+ /* Thai */
+
+ case HB_MODIFIED_COMBINING_CLASS_CCC103: /* sara u / sara uu */
+ return HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT;
+
+ case HB_MODIFIED_COMBINING_CLASS_CCC107: /* mai */
+ return HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT;
+
+
+ /* Lao */
+
+ case HB_MODIFIED_COMBINING_CLASS_CCC118: /* sign u / sign uu */
+ return HB_UNICODE_COMBINING_CLASS_BELOW;
+
+ case HB_MODIFIED_COMBINING_CLASS_CCC122: /* mai */
+ return HB_UNICODE_COMBINING_CLASS_ABOVE;
+
+
+ /* Tibetan */
+
+ case HB_MODIFIED_COMBINING_CLASS_CCC129: /* sign aa */
+ return HB_UNICODE_COMBINING_CLASS_BELOW;
+
+ case HB_MODIFIED_COMBINING_CLASS_CCC130: /* sign i*/
+ return HB_UNICODE_COMBINING_CLASS_ABOVE;
+
+ case HB_MODIFIED_COMBINING_CLASS_CCC132: /* sign u */
+ return HB_UNICODE_COMBINING_CLASS_BELOW;
+
+ }
+
+ return klass;
+}
+
+void
+_hb_ot_shape_fallback_mark_position_recategorize_marks (const hb_ot_shape_plan_t *plan HB_UNUSED,
+ hb_font_t *font HB_UNUSED,
+ hb_buffer_t *buffer)
+{
+#ifdef HB_NO_OT_SHAPE_FALLBACK
+ return;
+#endif
+
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ for (unsigned int i = 0; i < count; i++)
+ if (_hb_glyph_info_get_general_category (&info[i]) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) {
+ unsigned int combining_class = _hb_glyph_info_get_modified_combining_class (&info[i]);
+ combining_class = recategorize_combining_class (info[i].codepoint, combining_class);
+ _hb_glyph_info_set_modified_combining_class (&info[i], combining_class);
+ }
+}
+
+
+static void
+zero_mark_advances (hb_buffer_t *buffer,
+ unsigned int start,
+ unsigned int end,
+ bool adjust_offsets_when_zeroing)
+{
+ hb_glyph_info_t *info = buffer->info;
+ for (unsigned int i = start; i < end; i++)
+ if (_hb_glyph_info_get_general_category (&info[i]) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)
+ {
+ if (adjust_offsets_when_zeroing)
+ {
+ buffer->pos[i].x_offset -= buffer->pos[i].x_advance;
+ buffer->pos[i].y_offset -= buffer->pos[i].y_advance;
+ }
+ buffer->pos[i].x_advance = 0;
+ buffer->pos[i].y_advance = 0;
+ }
+}
+
+static inline void
+position_mark (const hb_ot_shape_plan_t *plan HB_UNUSED,
+ hb_font_t *font,
+ hb_buffer_t *buffer,
+ hb_glyph_extents_t &base_extents,
+ unsigned int i,
+ unsigned int combining_class)
+{
+ hb_glyph_extents_t mark_extents;
+ if (!font->get_glyph_extents (buffer->info[i].codepoint, &mark_extents))
+ return;
+
+ hb_position_t y_gap = font->y_scale / 16;
+
+ hb_glyph_position_t &pos = buffer->pos[i];
+ pos.x_offset = pos.y_offset = 0;
+
+
+ /* We don't position LEFT and RIGHT marks. */
+
+ /* X positioning */
+ switch (combining_class)
+ {
+ case HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW:
+ case HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE:
+ if (buffer->props.direction == HB_DIRECTION_LTR) {
+ pos.x_offset += base_extents.x_bearing + base_extents.width - mark_extents.width / 2 - mark_extents.x_bearing;
+ break;
+ } else if (buffer->props.direction == HB_DIRECTION_RTL) {
+ pos.x_offset += base_extents.x_bearing - mark_extents.width / 2 - mark_extents.x_bearing;
+ break;
+ }
+ HB_FALLTHROUGH;
+
+ default:
+ case HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW:
+ case HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE:
+ case HB_UNICODE_COMBINING_CLASS_BELOW:
+ case HB_UNICODE_COMBINING_CLASS_ABOVE:
+ /* Center align. */
+ pos.x_offset += base_extents.x_bearing + (base_extents.width - mark_extents.width) / 2 - mark_extents.x_bearing;
+ break;
+
+ case HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT:
+ case HB_UNICODE_COMBINING_CLASS_BELOW_LEFT:
+ case HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT:
+ /* Left align. */
+ pos.x_offset += base_extents.x_bearing - mark_extents.x_bearing;
+ break;
+
+ case HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT:
+ case HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT:
+ case HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT:
+ /* Right align. */
+ pos.x_offset += base_extents.x_bearing + base_extents.width - mark_extents.width - mark_extents.x_bearing;
+ break;
+ }
+
+ /* Y positioning */
+ switch (combining_class)
+ {
+ case HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW:
+ case HB_UNICODE_COMBINING_CLASS_BELOW_LEFT:
+ case HB_UNICODE_COMBINING_CLASS_BELOW:
+ case HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT:
+ /* Add gap, fall-through. */
+ base_extents.height -= y_gap;
+ HB_FALLTHROUGH;
+
+ case HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT:
+ case HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW:
+ pos.y_offset = base_extents.y_bearing + base_extents.height - mark_extents.y_bearing;
+ /* Never shift up "below" marks. */
+ if ((y_gap > 0) == (pos.y_offset > 0))
+ {
+ base_extents.height -= pos.y_offset;
+ pos.y_offset = 0;
+ }
+ base_extents.height += mark_extents.height;
+ break;
+
+ case HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE:
+ case HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT:
+ case HB_UNICODE_COMBINING_CLASS_ABOVE:
+ case HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT:
+ /* Add gap, fall-through. */
+ base_extents.y_bearing += y_gap;
+ base_extents.height -= y_gap;
+ HB_FALLTHROUGH;
+
+ case HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE:
+ case HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT:
+ pos.y_offset = base_extents.y_bearing - (mark_extents.y_bearing + mark_extents.height);
+ /* Don't shift down "above" marks too much. */
+ if ((y_gap > 0) != (pos.y_offset > 0))
+ {
+ unsigned int correction = -pos.y_offset / 2;
+ base_extents.y_bearing += correction;
+ base_extents.height -= correction;
+ pos.y_offset += correction;
+ }
+ base_extents.y_bearing -= mark_extents.height;
+ base_extents.height += mark_extents.height;
+ break;
+ }
+}
+
+static inline void
+position_around_base (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer,
+ unsigned int base,
+ unsigned int end,
+ bool adjust_offsets_when_zeroing)
+{
+ hb_direction_t horiz_dir = HB_DIRECTION_INVALID;
+
+ buffer->unsafe_to_break (base, end);
+
+ hb_glyph_extents_t base_extents;
+ if (!font->get_glyph_extents (buffer->info[base].codepoint,
+ &base_extents))
+ {
+ /* If extents don't work, zero marks and go home. */
+ zero_mark_advances (buffer, base + 1, end, adjust_offsets_when_zeroing);
+ return;
+ }
+ base_extents.y_bearing += buffer->pos[base].y_offset;
+ /* Use horizontal advance for horizontal positioning.
+ * Generally a better idea. Also works for zero-ink glyphs. See:
+ * https://github.com/harfbuzz/harfbuzz/issues/1532 */
+ base_extents.x_bearing = 0;
+ base_extents.width = font->get_glyph_h_advance (buffer->info[base].codepoint);
+
+ unsigned int lig_id = _hb_glyph_info_get_lig_id (&buffer->info[base]);
+ /* Use integer for num_lig_components such that it doesn't convert to unsigned
+ * when we divide or multiply by it. */
+ int num_lig_components = _hb_glyph_info_get_lig_num_comps (&buffer->info[base]);
+
+ hb_position_t x_offset = 0, y_offset = 0;
+ if (HB_DIRECTION_IS_FORWARD (buffer->props.direction)) {
+ x_offset -= buffer->pos[base].x_advance;
+ y_offset -= buffer->pos[base].y_advance;
+ }
+
+ hb_glyph_extents_t component_extents = base_extents;
+ int last_lig_component = -1;
+ unsigned int last_combining_class = 255;
+ hb_glyph_extents_t cluster_extents = base_extents; /* Initialization is just to shut gcc up. */
+ hb_glyph_info_t *info = buffer->info;
+ for (unsigned int i = base + 1; i < end; i++)
+ if (_hb_glyph_info_get_modified_combining_class (&info[i]))
+ {
+ if (num_lig_components > 1) {
+ unsigned int this_lig_id = _hb_glyph_info_get_lig_id (&info[i]);
+ int this_lig_component = _hb_glyph_info_get_lig_comp (&info[i]) - 1;
+ /* Conditions for attaching to the last component. */
+ if (!lig_id || lig_id != this_lig_id || this_lig_component >= num_lig_components)
+ this_lig_component = num_lig_components - 1;
+ if (last_lig_component != this_lig_component)
+ {
+ last_lig_component = this_lig_component;
+ last_combining_class = 255;
+ component_extents = base_extents;
+ if (unlikely (horiz_dir == HB_DIRECTION_INVALID)) {
+ if (HB_DIRECTION_IS_HORIZONTAL (plan->props.direction))
+ horiz_dir = plan->props.direction;
+ else
+ horiz_dir = hb_script_get_horizontal_direction (plan->props.script);
+ }
+ if (horiz_dir == HB_DIRECTION_LTR)
+ component_extents.x_bearing += (this_lig_component * component_extents.width) / num_lig_components;
+ else
+ component_extents.x_bearing += ((num_lig_components - 1 - this_lig_component) * component_extents.width) / num_lig_components;
+ component_extents.width /= num_lig_components;
+ }
+ }
+
+ unsigned int this_combining_class = _hb_glyph_info_get_modified_combining_class (&info[i]);
+ if (last_combining_class != this_combining_class)
+ {
+ last_combining_class = this_combining_class;
+ cluster_extents = component_extents;
+ }
+
+ position_mark (plan, font, buffer, cluster_extents, i, this_combining_class);
+
+ buffer->pos[i].x_advance = 0;
+ buffer->pos[i].y_advance = 0;
+ buffer->pos[i].x_offset += x_offset;
+ buffer->pos[i].y_offset += y_offset;
+
+ } else {
+ if (HB_DIRECTION_IS_FORWARD (buffer->props.direction)) {
+ x_offset -= buffer->pos[i].x_advance;
+ y_offset -= buffer->pos[i].y_advance;
+ } else {
+ x_offset += buffer->pos[i].x_advance;
+ y_offset += buffer->pos[i].y_advance;
+ }
+ }
+}
+
+static inline void
+position_cluster (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer,
+ unsigned int start,
+ unsigned int end,
+ bool adjust_offsets_when_zeroing)
+{
+ if (end - start < 2)
+ return;
+
+ /* Find the base glyph */
+ hb_glyph_info_t *info = buffer->info;
+ for (unsigned int i = start; i < end; i++)
+ if (!_hb_glyph_info_is_unicode_mark (&info[i]))
+ {
+ /* Find mark glyphs */
+ unsigned int j;
+ for (j = i + 1; j < end; j++)
+ if (!_hb_glyph_info_is_unicode_mark (&info[j]))
+ break;
+
+ position_around_base (plan, font, buffer, i, j, adjust_offsets_when_zeroing);
+
+ i = j - 1;
+ }
+}
+
+void
+_hb_ot_shape_fallback_mark_position (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer,
+ bool adjust_offsets_when_zeroing)
+{
+#ifdef HB_NO_OT_SHAPE_FALLBACK
+ return;
+#endif
+
+ _hb_buffer_assert_gsubgpos_vars (buffer);
+
+ unsigned int start = 0;
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ for (unsigned int i = 1; i < count; i++)
+ if (likely (!_hb_glyph_info_is_unicode_mark (&info[i]))) {
+ position_cluster (plan, font, buffer, start, i, adjust_offsets_when_zeroing);
+ start = i;
+ }
+ position_cluster (plan, font, buffer, start, count, adjust_offsets_when_zeroing);
+}
+
+
+#ifndef HB_DISABLE_DEPRECATED
+struct hb_ot_shape_fallback_kern_driver_t
+{
+ hb_ot_shape_fallback_kern_driver_t (hb_font_t *font_,
+ hb_buffer_t *buffer) :
+ font (font_), direction (buffer->props.direction) {}
+
+ hb_position_t get_kerning (hb_codepoint_t first, hb_codepoint_t second) const
+ {
+ hb_position_t kern = 0;
+ font->get_glyph_kerning_for_direction (first, second,
+ direction,
+ &kern, &kern);
+ return kern;
+ }
+
+ hb_font_t *font;
+ hb_direction_t direction;
+};
+#endif
+
+/* Performs font-assisted kerning. */
+void
+_hb_ot_shape_fallback_kern (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer)
+{
+#ifdef HB_NO_OT_SHAPE_FALLBACK
+ return;
+#endif
+
+#ifndef HB_DISABLE_DEPRECATED
+ if (HB_DIRECTION_IS_HORIZONTAL (buffer->props.direction) ?
+ !font->has_glyph_h_kerning_func () :
+ !font->has_glyph_v_kerning_func ())
+ return;
+
+ bool reverse = HB_DIRECTION_IS_BACKWARD (buffer->props.direction);
+
+ if (reverse)
+ buffer->reverse ();
+
+ hb_ot_shape_fallback_kern_driver_t driver (font, buffer);
+ OT::hb_kern_machine_t<hb_ot_shape_fallback_kern_driver_t> machine (driver);
+ machine.kern (font, buffer, plan->kern_mask, false);
+
+ if (reverse)
+ buffer->reverse ();
+#endif
+}
+
+
+/* Adjusts width of various spaces. */
+void
+_hb_ot_shape_fallback_spaces (const hb_ot_shape_plan_t *plan HB_UNUSED,
+ hb_font_t *font,
+ hb_buffer_t *buffer)
+{
+ hb_glyph_info_t *info = buffer->info;
+ hb_glyph_position_t *pos = buffer->pos;
+ bool horizontal = HB_DIRECTION_IS_HORIZONTAL (buffer->props.direction);
+ unsigned int count = buffer->len;
+ for (unsigned int i = 0; i < count; i++)
+ if (_hb_glyph_info_is_unicode_space (&info[i]) && !_hb_glyph_info_ligated (&info[i]))
+ {
+ hb_unicode_funcs_t::space_t space_type = _hb_glyph_info_get_unicode_space_fallback_type (&info[i]);
+ hb_codepoint_t glyph;
+ typedef hb_unicode_funcs_t t;
+ switch (space_type)
+ {
+ case t::NOT_SPACE: /* Shouldn't happen. */
+ case t::SPACE:
+ break;
+
+ case t::SPACE_EM:
+ case t::SPACE_EM_2:
+ case t::SPACE_EM_3:
+ case t::SPACE_EM_4:
+ case t::SPACE_EM_5:
+ case t::SPACE_EM_6:
+ case t::SPACE_EM_16:
+ if (horizontal)
+ pos[i].x_advance = +(font->x_scale + ((int) space_type)/2) / (int) space_type;
+ else
+ pos[i].y_advance = -(font->y_scale + ((int) space_type)/2) / (int) space_type;
+ break;
+
+ case t::SPACE_4_EM_18:
+ if (horizontal)
+ pos[i].x_advance = (int64_t) +font->x_scale * 4 / 18;
+ else
+ pos[i].y_advance = (int64_t) -font->y_scale * 4 / 18;
+ break;
+
+ case t::SPACE_FIGURE:
+ for (char u = '0'; u <= '9'; u++)
+ if (font->get_nominal_glyph (u, &glyph))
+ {
+ if (horizontal)
+ pos[i].x_advance = font->get_glyph_h_advance (glyph);
+ else
+ pos[i].y_advance = font->get_glyph_v_advance (glyph);
+ break;
+ }
+ break;
+
+ case t::SPACE_PUNCTUATION:
+ if (font->get_nominal_glyph ('.', &glyph) ||
+ font->get_nominal_glyph (',', &glyph))
+ {
+ if (horizontal)
+ pos[i].x_advance = font->get_glyph_h_advance (glyph);
+ else
+ pos[i].y_advance = font->get_glyph_v_advance (glyph);
+ }
+ break;
+
+ case t::SPACE_NARROW:
+ /* Half-space?
+ * Unicode doc https://unicode.org/charts/PDF/U2000.pdf says ~1/4 or 1/5 of EM.
+ * However, in my testing, many fonts have their regular space being about that
+ * size. To me, a percentage of the space width makes more sense. Half is as
+ * good as any. */
+ if (horizontal)
+ pos[i].x_advance /= 2;
+ else
+ pos[i].y_advance /= 2;
+ break;
+ }
+ }
+}
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-fallback.hh b/thirdparty/harfbuzz/src/hb-ot-shape-fallback.hh
new file mode 100644
index 0000000000..5faf5f2dfb
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-fallback.hh
@@ -0,0 +1,54 @@
+/*
+ * Copyright © 2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_SHAPE_FALLBACK_HH
+#define HB_OT_SHAPE_FALLBACK_HH
+
+#include "hb.hh"
+
+#include "hb-ot-shape.hh"
+
+
+HB_INTERNAL void _hb_ot_shape_fallback_mark_position (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer,
+ bool adjust_offsets_when_zeroing);
+
+HB_INTERNAL void _hb_ot_shape_fallback_mark_position_recategorize_marks (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer);
+
+
+HB_INTERNAL void _hb_ot_shape_fallback_kern (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer);
+
+HB_INTERNAL void _hb_ot_shape_fallback_spaces (const hb_ot_shape_plan_t *plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer);
+
+
+#endif /* HB_OT_SHAPE_FALLBACK_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-normalize.cc b/thirdparty/harfbuzz/src/hb-ot-shape-normalize.cc
new file mode 100644
index 0000000000..50b5829c4a
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-normalize.cc
@@ -0,0 +1,478 @@
+/*
+ * Copyright © 2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_OT_SHAPE
+
+#include "hb-ot-shape-normalize.hh"
+#include "hb-ot-shape-complex.hh"
+#include "hb-ot-shape.hh"
+
+
+/*
+ * HIGHLEVEL DESIGN:
+ *
+ * This file exports one main function: _hb_ot_shape_normalize().
+ *
+ * This function closely reflects the Unicode Normalization Algorithm,
+ * yet it's different.
+ *
+ * Each shaper specifies whether it prefers decomposed (NFD) or composed (NFC).
+ * The logic however tries to use whatever the font can support.
+ *
+ * In general what happens is that: each grapheme is decomposed in a chain
+ * of 1:2 decompositions, marks reordered, and then recomposed if desired,
+ * so far it's like Unicode Normalization. However, the decomposition and
+ * recomposition only happens if the font supports the resulting characters.
+ *
+ * The goals are:
+ *
+ * - Try to render all canonically equivalent strings similarly. To really
+ * achieve this we have to always do the full decomposition and then
+ * selectively recompose from there. It's kinda too expensive though, so
+ * we skip some cases. For example, if composed is desired, we simply
+ * don't touch 1-character clusters that are supported by the font, even
+ * though their NFC may be different.
+ *
+ * - When a font has a precomposed character for a sequence but the 'ccmp'
+ * feature in the font is not adequate, use the precomposed character
+ * which typically has better mark positioning.
+ *
+ * - When a font does not support a combining mark, but supports it precomposed
+ * with previous base, use that. This needs the itemizer to have this
+ * knowledge too. We need to provide assistance to the itemizer.
+ *
+ * - When a font does not support a character but supports its canonical
+ * decomposition, well, use the decomposition.
+ *
+ * - The complex shapers can customize the compose and decompose functions to
+ * offload some of their requirements to the normalizer. For example, the
+ * Indic shaper may want to disallow recomposing of two matras.
+ */
+
+static bool
+decompose_unicode (const hb_ot_shape_normalize_context_t *c,
+ hb_codepoint_t ab,
+ hb_codepoint_t *a,
+ hb_codepoint_t *b)
+{
+ return (bool) c->unicode->decompose (ab, a, b);
+}
+
+static bool
+compose_unicode (const hb_ot_shape_normalize_context_t *c,
+ hb_codepoint_t a,
+ hb_codepoint_t b,
+ hb_codepoint_t *ab)
+{
+ return (bool) c->unicode->compose (a, b, ab);
+}
+
+static inline void
+set_glyph (hb_glyph_info_t &info, hb_font_t *font)
+{
+ (void) font->get_nominal_glyph (info.codepoint, &info.glyph_index());
+}
+
+static inline void
+output_char (hb_buffer_t *buffer, hb_codepoint_t unichar, hb_codepoint_t glyph)
+{
+ buffer->cur().glyph_index() = glyph;
+ buffer->output_glyph (unichar); /* This is very confusing indeed. */
+ _hb_glyph_info_set_unicode_props (&buffer->prev(), buffer);
+}
+
+static inline void
+next_char (hb_buffer_t *buffer, hb_codepoint_t glyph)
+{
+ buffer->cur().glyph_index() = glyph;
+ buffer->next_glyph ();
+}
+
+static inline void
+skip_char (hb_buffer_t *buffer)
+{
+ buffer->skip_glyph ();
+}
+
+/* Returns 0 if didn't decompose, number of resulting characters otherwise. */
+static inline unsigned int
+decompose (const hb_ot_shape_normalize_context_t *c, bool shortest, hb_codepoint_t ab)
+{
+ hb_codepoint_t a = 0, b = 0, a_glyph = 0, b_glyph = 0;
+ hb_buffer_t * const buffer = c->buffer;
+ hb_font_t * const font = c->font;
+
+ if (!c->decompose (c, ab, &a, &b) ||
+ (b && !font->get_nominal_glyph (b, &b_glyph)))
+ return 0;
+
+ bool has_a = (bool) font->get_nominal_glyph (a, &a_glyph);
+ if (shortest && has_a) {
+ /* Output a and b */
+ output_char (buffer, a, a_glyph);
+ if (likely (b)) {
+ output_char (buffer, b, b_glyph);
+ return 2;
+ }
+ return 1;
+ }
+
+ unsigned int ret;
+ if ((ret = decompose (c, shortest, a))) {
+ if (b) {
+ output_char (buffer, b, b_glyph);
+ return ret + 1;
+ }
+ return ret;
+ }
+
+ if (has_a) {
+ output_char (buffer, a, a_glyph);
+ if (likely (b)) {
+ output_char (buffer, b, b_glyph);
+ return 2;
+ }
+ return 1;
+ }
+
+ return 0;
+}
+
+static inline void
+decompose_current_character (const hb_ot_shape_normalize_context_t *c, bool shortest)
+{
+ hb_buffer_t * const buffer = c->buffer;
+ hb_codepoint_t u = buffer->cur().codepoint;
+ hb_codepoint_t glyph = 0;
+
+ if (shortest && c->font->get_nominal_glyph (u, &glyph))
+ {
+ next_char (buffer, glyph);
+ return;
+ }
+
+ if (decompose (c, shortest, u))
+ {
+ skip_char (buffer);
+ return;
+ }
+
+ if (!shortest && c->font->get_nominal_glyph (u, &glyph))
+ {
+ next_char (buffer, glyph);
+ return;
+ }
+
+ if (_hb_glyph_info_is_unicode_space (&buffer->cur()))
+ {
+ hb_codepoint_t space_glyph;
+ hb_unicode_funcs_t::space_t space_type = buffer->unicode->space_fallback_type (u);
+ if (space_type != hb_unicode_funcs_t::NOT_SPACE && c->font->get_nominal_glyph (0x0020u, &space_glyph))
+ {
+ _hb_glyph_info_set_unicode_space_fallback_type (&buffer->cur(), space_type);
+ next_char (buffer, space_glyph);
+ buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_SPACE_FALLBACK;
+ return;
+ }
+ }
+
+ if (u == 0x2011u)
+ {
+ /* U+2011 is the only sensible character that is a no-break version of another character
+ * and not a space. The space ones are handled already. Handle this lone one. */
+ hb_codepoint_t other_glyph;
+ if (c->font->get_nominal_glyph (0x2010u, &other_glyph))
+ {
+ next_char (buffer, other_glyph);
+ return;
+ }
+ }
+
+ next_char (buffer, glyph); /* glyph is initialized in earlier branches. */
+}
+
+static inline void
+handle_variation_selector_cluster (const hb_ot_shape_normalize_context_t *c,
+ unsigned int end,
+ bool short_circuit HB_UNUSED)
+{
+ /* TODO Currently if there's a variation-selector we give-up, it's just too hard. */
+ hb_buffer_t * const buffer = c->buffer;
+ hb_font_t * const font = c->font;
+ for (; buffer->idx < end - 1 && buffer->successful;) {
+ if (unlikely (buffer->unicode->is_variation_selector (buffer->cur(+1).codepoint))) {
+ if (font->get_variation_glyph (buffer->cur().codepoint, buffer->cur(+1).codepoint, &buffer->cur().glyph_index()))
+ {
+ hb_codepoint_t unicode = buffer->cur().codepoint;
+ buffer->replace_glyphs (2, 1, &unicode);
+ }
+ else
+ {
+ /* Just pass on the two characters separately, let GSUB do its magic. */
+ set_glyph (buffer->cur(), font);
+ buffer->next_glyph ();
+ set_glyph (buffer->cur(), font);
+ buffer->next_glyph ();
+ }
+ /* Skip any further variation selectors. */
+ while (buffer->idx < end && unlikely (buffer->unicode->is_variation_selector (buffer->cur().codepoint)))
+ {
+ set_glyph (buffer->cur(), font);
+ buffer->next_glyph ();
+ }
+ } else {
+ set_glyph (buffer->cur(), font);
+ buffer->next_glyph ();
+ }
+ }
+ if (likely (buffer->idx < end)) {
+ set_glyph (buffer->cur(), font);
+ buffer->next_glyph ();
+ }
+}
+
+static inline void
+decompose_multi_char_cluster (const hb_ot_shape_normalize_context_t *c, unsigned int end, bool short_circuit)
+{
+ hb_buffer_t * const buffer = c->buffer;
+ for (unsigned int i = buffer->idx; i < end && buffer->successful; i++)
+ if (unlikely (buffer->unicode->is_variation_selector (buffer->info[i].codepoint))) {
+ handle_variation_selector_cluster (c, end, short_circuit);
+ return;
+ }
+
+ while (buffer->idx < end && buffer->successful)
+ decompose_current_character (c, short_circuit);
+}
+
+
+static int
+compare_combining_class (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb)
+{
+ unsigned int a = _hb_glyph_info_get_modified_combining_class (pa);
+ unsigned int b = _hb_glyph_info_get_modified_combining_class (pb);
+
+ return a < b ? -1 : a == b ? 0 : +1;
+}
+
+
+void
+_hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan,
+ hb_buffer_t *buffer,
+ hb_font_t *font)
+{
+ if (unlikely (!buffer->len)) return;
+
+ _hb_buffer_assert_unicode_vars (buffer);
+
+ hb_ot_shape_normalization_mode_t mode = plan->shaper->normalization_preference;
+ if (mode == HB_OT_SHAPE_NORMALIZATION_MODE_AUTO)
+ {
+ if (plan->has_gpos_mark)
+ // https://github.com/harfbuzz/harfbuzz/issues/653#issuecomment-423905920
+ //mode = HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED;
+ mode = HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS;
+ else
+ mode = HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS;
+ }
+
+ const hb_ot_shape_normalize_context_t c = {
+ plan,
+ buffer,
+ font,
+ buffer->unicode,
+ plan->shaper->decompose ? plan->shaper->decompose : decompose_unicode,
+ plan->shaper->compose ? plan->shaper->compose : compose_unicode
+ };
+
+ bool always_short_circuit = mode == HB_OT_SHAPE_NORMALIZATION_MODE_NONE;
+ bool might_short_circuit = always_short_circuit ||
+ (mode != HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED &&
+ mode != HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT);
+ unsigned int count;
+
+ /* We do a fairly straightforward yet custom normalization process in three
+ * separate rounds: decompose, reorder, recompose (if desired). Currently
+ * this makes two buffer swaps. We can make it faster by moving the last
+ * two rounds into the inner loop for the first round, but it's more readable
+ * this way. */
+
+
+ /* First round, decompose */
+
+ bool all_simple = true;
+ {
+ buffer->clear_output ();
+ count = buffer->len;
+ buffer->idx = 0;
+ do
+ {
+ unsigned int end;
+ for (end = buffer->idx + 1; end < count; end++)
+ if (unlikely (_hb_glyph_info_is_unicode_mark (&buffer->info[end])))
+ break;
+
+ if (end < count)
+ end--; /* Leave one base for the marks to cluster with. */
+
+ /* From idx to end are simple clusters. */
+ if (might_short_circuit)
+ {
+ unsigned int done = font->get_nominal_glyphs (end - buffer->idx,
+ &buffer->cur().codepoint,
+ sizeof (buffer->info[0]),
+ &buffer->cur().glyph_index(),
+ sizeof (buffer->info[0]));
+ buffer->next_glyphs (done);
+ }
+ while (buffer->idx < end && buffer->successful)
+ decompose_current_character (&c, might_short_circuit);
+
+ if (buffer->idx == count || !buffer->successful)
+ break;
+
+ all_simple = false;
+
+ /* Find all the marks now. */
+ for (end = buffer->idx + 1; end < count; end++)
+ if (!_hb_glyph_info_is_unicode_mark(&buffer->info[end]))
+ break;
+
+ /* idx to end is one non-simple cluster. */
+ decompose_multi_char_cluster (&c, end, always_short_circuit);
+ }
+ while (buffer->idx < count && buffer->successful);
+ buffer->swap_buffers ();
+ }
+
+
+ /* Second round, reorder (inplace) */
+
+ if (!all_simple)
+ {
+ count = buffer->len;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ if (_hb_glyph_info_get_modified_combining_class (&buffer->info[i]) == 0)
+ continue;
+
+ unsigned int end;
+ for (end = i + 1; end < count; end++)
+ if (_hb_glyph_info_get_modified_combining_class (&buffer->info[end]) == 0)
+ break;
+
+ /* We are going to do a O(n^2). Only do this if the sequence is short. */
+ if (end - i > HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS) {
+ i = end;
+ continue;
+ }
+
+ buffer->sort (i, end, compare_combining_class);
+
+ if (plan->shaper->reorder_marks)
+ plan->shaper->reorder_marks (plan, buffer, i, end);
+
+ i = end;
+ }
+ }
+ if (buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_CGJ)
+ {
+ /* For all CGJ, check if it prevented any reordering at all.
+ * If it did NOT, then make it skippable.
+ * https://github.com/harfbuzz/harfbuzz/issues/554
+ */
+ for (unsigned int i = 1; i + 1 < buffer->len; i++)
+ if (buffer->info[i].codepoint == 0x034Fu/*CGJ*/ &&
+ info_cc(buffer->info[i-1]) <= info_cc(buffer->info[i+1]))
+ {
+ _hb_glyph_info_unhide (&buffer->info[i]);
+ }
+ }
+
+
+ /* Third round, recompose */
+
+ if (!all_simple &&
+ (mode == HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS ||
+ mode == HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT))
+ {
+ /* As noted in the comment earlier, we don't try to combine
+ * ccc=0 chars with their previous Starter. */
+
+ buffer->clear_output ();
+ count = buffer->len;
+ unsigned int starter = 0;
+ buffer->next_glyph ();
+ while (buffer->idx < count && buffer->successful)
+ {
+ hb_codepoint_t composed, glyph;
+ if (/* We don't try to compose a non-mark character with it's preceding starter.
+ * This is both an optimization to avoid trying to compose every two neighboring
+ * glyphs in most scripts AND a desired feature for Hangul. Apparently Hangul
+ * fonts are not designed to mix-and-match pre-composed syllables and Jamo. */
+ _hb_glyph_info_is_unicode_mark(&buffer->cur()))
+ {
+ if (/* If there's anything between the starter and this char, they should have CCC
+ * smaller than this character's. */
+ (starter == buffer->out_len - 1 ||
+ info_cc (buffer->prev()) < info_cc (buffer->cur())) &&
+ /* And compose. */
+ c.compose (&c,
+ buffer->out_info[starter].codepoint,
+ buffer->cur().codepoint,
+ &composed) &&
+ /* And the font has glyph for the composite. */
+ font->get_nominal_glyph (composed, &glyph))
+ {
+ /* Composes. */
+ buffer->next_glyph (); /* Copy to out-buffer. */
+ if (unlikely (!buffer->successful))
+ return;
+ buffer->merge_out_clusters (starter, buffer->out_len);
+ buffer->out_len--; /* Remove the second composable. */
+ /* Modify starter and carry on. */
+ buffer->out_info[starter].codepoint = composed;
+ buffer->out_info[starter].glyph_index() = glyph;
+ _hb_glyph_info_set_unicode_props (&buffer->out_info[starter], buffer);
+
+ continue;
+ }
+ }
+
+ /* Blocked, or doesn't compose. */
+ buffer->next_glyph ();
+
+ if (info_cc (buffer->prev()) == 0)
+ starter = buffer->out_len - 1;
+ }
+ buffer->swap_buffers ();
+ }
+}
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-normalize.hh b/thirdparty/harfbuzz/src/hb-ot-shape-normalize.hh
new file mode 100644
index 0000000000..04f1a80091
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape-normalize.hh
@@ -0,0 +1,70 @@
+/*
+ * Copyright © 2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_SHAPE_NORMALIZE_HH
+#define HB_OT_SHAPE_NORMALIZE_HH
+
+#include "hb.hh"
+
+
+/* buffer var allocations, used during the normalization process */
+#define glyph_index() var1.u32
+
+struct hb_ot_shape_plan_t;
+
+enum hb_ot_shape_normalization_mode_t {
+ HB_OT_SHAPE_NORMALIZATION_MODE_NONE,
+ HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED,
+ HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS, /* Never composes base-to-base */
+ HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, /* Always fully decomposes and then recompose back */
+
+ HB_OT_SHAPE_NORMALIZATION_MODE_AUTO, /* See hb-ot-shape-normalize.cc for logic. */
+ HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT = HB_OT_SHAPE_NORMALIZATION_MODE_AUTO
+};
+
+HB_INTERNAL void _hb_ot_shape_normalize (const hb_ot_shape_plan_t *shaper,
+ hb_buffer_t *buffer,
+ hb_font_t *font);
+
+
+struct hb_ot_shape_normalize_context_t
+{
+ const hb_ot_shape_plan_t *plan;
+ hb_buffer_t *buffer;
+ hb_font_t *font;
+ hb_unicode_funcs_t *unicode;
+ bool (*decompose) (const hb_ot_shape_normalize_context_t *c,
+ hb_codepoint_t ab,
+ hb_codepoint_t *a,
+ hb_codepoint_t *b);
+ bool (*compose) (const hb_ot_shape_normalize_context_t *c,
+ hb_codepoint_t a,
+ hb_codepoint_t b,
+ hb_codepoint_t *ab);
+};
+
+
+#endif /* HB_OT_SHAPE_NORMALIZE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape.cc b/thirdparty/harfbuzz/src/hb-ot-shape.cc
new file mode 100644
index 0000000000..fe0444987a
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape.cc
@@ -0,0 +1,1223 @@
+/*
+ * Copyright © 2009,2010 Red Hat, Inc.
+ * Copyright © 2010,2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_OT_SHAPE
+
+#ifdef HB_NO_OT_LAYOUT
+#error "Cannot compile 'ot' shaper with HB_NO_OT_LAYOUT."
+#endif
+
+#include "hb-shaper-impl.hh"
+
+#include "hb-ot-shape.hh"
+#include "hb-ot-shape-complex.hh"
+#include "hb-ot-shape-fallback.hh"
+#include "hb-ot-shape-normalize.hh"
+
+#include "hb-ot-face.hh"
+
+#include "hb-set.hh"
+
+#include "hb-aat-layout.hh"
+
+
+#ifndef HB_NO_AAT_SHAPE
+static inline bool
+_hb_apply_morx (hb_face_t *face, const hb_segment_properties_t *props)
+{
+ /* https://github.com/harfbuzz/harfbuzz/issues/2124 */
+ return hb_aat_layout_has_substitution (face) &&
+ (HB_DIRECTION_IS_HORIZONTAL (props->direction) || !hb_ot_layout_has_substitution (face));
+}
+#endif
+
+/**
+ * SECTION:hb-ot-shape
+ * @title: hb-ot-shape
+ * @short_description: OpenType shaping support
+ * @include: hb-ot.h
+ *
+ * Support functions for OpenType shaping related queries.
+ **/
+
+
+static void
+hb_ot_shape_collect_features (hb_ot_shape_planner_t *planner,
+ const hb_feature_t *user_features,
+ unsigned int num_user_features);
+
+hb_ot_shape_planner_t::hb_ot_shape_planner_t (hb_face_t *face,
+ const hb_segment_properties_t *props) :
+ face (face),
+ props (*props),
+ map (face, props),
+ aat_map (face, props)
+#ifndef HB_NO_AAT_SHAPE
+ , apply_morx (_hb_apply_morx (face, props))
+#endif
+{
+ shaper = hb_ot_shape_complex_categorize (this);
+
+ script_zero_marks = shaper->zero_width_marks != HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE;
+ script_fallback_mark_positioning = shaper->fallback_position;
+
+ /* https://github.com/harfbuzz/harfbuzz/issues/1528 */
+ if (apply_morx && shaper != &_hb_ot_complex_shaper_default)
+ shaper = &_hb_ot_complex_shaper_dumber;
+}
+
+void
+hb_ot_shape_planner_t::compile (hb_ot_shape_plan_t &plan,
+ const hb_ot_shape_plan_key_t &key)
+{
+ plan.props = props;
+ plan.shaper = shaper;
+ map.compile (plan.map, key);
+#ifndef HB_NO_AAT_SHAPE
+ if (apply_morx)
+ aat_map.compile (plan.aat_map);
+#endif
+
+#ifndef HB_NO_OT_SHAPE_FRACTIONS
+ plan.frac_mask = plan.map.get_1_mask (HB_TAG ('f','r','a','c'));
+ plan.numr_mask = plan.map.get_1_mask (HB_TAG ('n','u','m','r'));
+ plan.dnom_mask = plan.map.get_1_mask (HB_TAG ('d','n','o','m'));
+ plan.has_frac = plan.frac_mask || (plan.numr_mask && plan.dnom_mask);
+#endif
+
+ plan.rtlm_mask = plan.map.get_1_mask (HB_TAG ('r','t','l','m'));
+ plan.has_vert = !!plan.map.get_1_mask (HB_TAG ('v','e','r','t'));
+
+ hb_tag_t kern_tag = HB_DIRECTION_IS_HORIZONTAL (props.direction) ?
+ HB_TAG ('k','e','r','n') : HB_TAG ('v','k','r','n');
+#ifndef HB_NO_OT_KERN
+ plan.kern_mask = plan.map.get_mask (kern_tag);
+ plan.requested_kerning = !!plan.kern_mask;
+#endif
+#ifndef HB_NO_AAT_SHAPE
+ plan.trak_mask = plan.map.get_mask (HB_TAG ('t','r','a','k'));
+ plan.requested_tracking = !!plan.trak_mask;
+#endif
+
+ bool has_gpos_kern = plan.map.get_feature_index (1, kern_tag) != HB_OT_LAYOUT_NO_FEATURE_INDEX;
+ bool disable_gpos = plan.shaper->gpos_tag &&
+ plan.shaper->gpos_tag != plan.map.chosen_script[1];
+
+ /*
+ * Decide who provides glyph classes. GDEF or Unicode.
+ */
+
+ if (!hb_ot_layout_has_glyph_classes (face))
+ plan.fallback_glyph_classes = true;
+
+ /*
+ * Decide who does substitutions. GSUB, morx, or fallback.
+ */
+
+#ifndef HB_NO_AAT_SHAPE
+ plan.apply_morx = apply_morx;
+#endif
+
+ /*
+ * Decide who does positioning. GPOS, kerx, kern, or fallback.
+ */
+
+ if (0)
+ ;
+#ifndef HB_NO_AAT_SHAPE
+ else if (hb_aat_layout_has_positioning (face))
+ plan.apply_kerx = true;
+#endif
+ else if (!apply_morx && !disable_gpos && hb_ot_layout_has_positioning (face))
+ plan.apply_gpos = true;
+
+ if (!plan.apply_kerx && (!has_gpos_kern || !plan.apply_gpos))
+ {
+ /* Apparently Apple applies kerx if GPOS kern was not applied. */
+#ifndef HB_NO_AAT_SHAPE
+ if (hb_aat_layout_has_positioning (face))
+ plan.apply_kerx = true;
+ else
+#endif
+#ifndef HB_NO_OT_KERN
+ if (hb_ot_layout_has_kerning (face))
+ plan.apply_kern = true;
+#endif
+ }
+
+ plan.zero_marks = script_zero_marks &&
+ !plan.apply_kerx &&
+ (!plan.apply_kern
+#ifndef HB_NO_OT_KERN
+ || !hb_ot_layout_has_machine_kerning (face)
+#endif
+ );
+ plan.has_gpos_mark = !!plan.map.get_1_mask (HB_TAG ('m','a','r','k'));
+
+ plan.adjust_mark_positioning_when_zeroing = !plan.apply_gpos &&
+ !plan.apply_kerx &&
+ (!plan.apply_kern
+#ifndef HB_NO_OT_KERN
+ || !hb_ot_layout_has_cross_kerning (face)
+#endif
+ );
+
+ plan.fallback_mark_positioning = plan.adjust_mark_positioning_when_zeroing &&
+ script_fallback_mark_positioning;
+
+#ifndef HB_NO_AAT_SHAPE
+ /* Currently we always apply trak. */
+ plan.apply_trak = plan.requested_tracking && hb_aat_layout_has_tracking (face);
+#endif
+}
+
+bool
+hb_ot_shape_plan_t::init0 (hb_face_t *face,
+ const hb_shape_plan_key_t *key)
+{
+ map.init ();
+#ifndef HB_NO_AAT_SHAPE
+ aat_map.init ();
+#endif
+
+ hb_ot_shape_planner_t planner (face,
+ &key->props);
+
+ hb_ot_shape_collect_features (&planner,
+ key->user_features,
+ key->num_user_features);
+
+ planner.compile (*this, key->ot);
+
+ if (shaper->data_create)
+ {
+ data = shaper->data_create (this);
+ if (unlikely (!data))
+ {
+ map.fini ();
+#ifndef HB_NO_AAT_SHAPE
+ aat_map.fini ();
+#endif
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void
+hb_ot_shape_plan_t::fini ()
+{
+ if (shaper->data_destroy)
+ shaper->data_destroy (const_cast<void *> (data));
+
+ map.fini ();
+#ifndef HB_NO_AAT_SHAPE
+ aat_map.fini ();
+#endif
+}
+
+void
+hb_ot_shape_plan_t::substitute (hb_font_t *font,
+ hb_buffer_t *buffer) const
+{
+#ifndef HB_NO_AAT_SHAPE
+ if (unlikely (apply_morx))
+ hb_aat_layout_substitute (this, font, buffer);
+ else
+#endif
+ map.substitute (this, font, buffer);
+}
+
+void
+hb_ot_shape_plan_t::position (hb_font_t *font,
+ hb_buffer_t *buffer) const
+{
+ if (this->apply_gpos)
+ map.position (this, font, buffer);
+#ifndef HB_NO_AAT_SHAPE
+ else if (this->apply_kerx)
+ hb_aat_layout_position (this, font, buffer);
+#endif
+#ifndef HB_NO_OT_KERN
+ else if (this->apply_kern)
+ hb_ot_layout_kern (this, font, buffer);
+#endif
+ else
+ _hb_ot_shape_fallback_kern (this, font, buffer);
+
+#ifndef HB_NO_AAT_SHAPE
+ if (this->apply_trak)
+ hb_aat_layout_track (this, font, buffer);
+#endif
+}
+
+
+static const hb_ot_map_feature_t
+common_features[] =
+{
+ {HB_TAG('a','b','v','m'), F_GLOBAL},
+ {HB_TAG('b','l','w','m'), F_GLOBAL},
+ {HB_TAG('c','c','m','p'), F_GLOBAL},
+ {HB_TAG('l','o','c','l'), F_GLOBAL},
+ {HB_TAG('m','a','r','k'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('m','k','m','k'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('r','l','i','g'), F_GLOBAL},
+};
+
+
+static const hb_ot_map_feature_t
+horizontal_features[] =
+{
+ {HB_TAG('c','a','l','t'), F_GLOBAL},
+ {HB_TAG('c','l','i','g'), F_GLOBAL},
+ {HB_TAG('c','u','r','s'), F_GLOBAL},
+ {HB_TAG('d','i','s','t'), F_GLOBAL},
+ {HB_TAG('k','e','r','n'), F_GLOBAL_HAS_FALLBACK},
+ {HB_TAG('l','i','g','a'), F_GLOBAL},
+ {HB_TAG('r','c','l','t'), F_GLOBAL},
+};
+
+static void
+hb_ot_shape_collect_features (hb_ot_shape_planner_t *planner,
+ const hb_feature_t *user_features,
+ unsigned int num_user_features)
+{
+ hb_ot_map_builder_t *map = &planner->map;
+
+ map->enable_feature (HB_TAG('r','v','r','n'));
+ map->add_gsub_pause (nullptr);
+
+ switch (planner->props.direction) {
+ case HB_DIRECTION_LTR:
+ map->enable_feature (HB_TAG ('l','t','r','a'));
+ map->enable_feature (HB_TAG ('l','t','r','m'));
+ break;
+ case HB_DIRECTION_RTL:
+ map->enable_feature (HB_TAG ('r','t','l','a'));
+ map->add_feature (HB_TAG ('r','t','l','m'));
+ break;
+ case HB_DIRECTION_TTB:
+ case HB_DIRECTION_BTT:
+ case HB_DIRECTION_INVALID:
+ default:
+ break;
+ }
+
+#ifndef HB_NO_OT_SHAPE_FRACTIONS
+ /* Automatic fractions. */
+ map->add_feature (HB_TAG ('f','r','a','c'));
+ map->add_feature (HB_TAG ('n','u','m','r'));
+ map->add_feature (HB_TAG ('d','n','o','m'));
+#endif
+
+ /* Random! */
+ map->enable_feature (HB_TAG ('r','a','n','d'), F_RANDOM, HB_OT_MAP_MAX_VALUE);
+
+#ifndef HB_NO_AAT_SHAPE
+ /* Tracking. We enable dummy feature here just to allow disabling
+ * AAT 'trak' table using features.
+ * https://github.com/harfbuzz/harfbuzz/issues/1303 */
+ map->enable_feature (HB_TAG ('t','r','a','k'), F_HAS_FALLBACK);
+#endif
+
+ map->enable_feature (HB_TAG ('H','A','R','F'));
+
+ if (planner->shaper->collect_features)
+ planner->shaper->collect_features (planner);
+
+ map->enable_feature (HB_TAG ('B','U','Z','Z'));
+
+ for (unsigned int i = 0; i < ARRAY_LENGTH (common_features); i++)
+ map->add_feature (common_features[i]);
+
+ if (HB_DIRECTION_IS_HORIZONTAL (planner->props.direction))
+ for (unsigned int i = 0; i < ARRAY_LENGTH (horizontal_features); i++)
+ map->add_feature (horizontal_features[i]);
+ else
+ {
+ /* We really want to find a 'vert' feature if there's any in the font, no
+ * matter which script/langsys it is listed (or not) under.
+ * See various bugs referenced from:
+ * https://github.com/harfbuzz/harfbuzz/issues/63 */
+ map->enable_feature (HB_TAG ('v','e','r','t'), F_GLOBAL_SEARCH);
+ }
+
+ for (unsigned int i = 0; i < num_user_features; i++)
+ {
+ const hb_feature_t *feature = &user_features[i];
+ map->add_feature (feature->tag,
+ (feature->start == HB_FEATURE_GLOBAL_START &&
+ feature->end == HB_FEATURE_GLOBAL_END) ? F_GLOBAL : F_NONE,
+ feature->value);
+ }
+
+#ifndef HB_NO_AAT_SHAPE
+ if (planner->apply_morx)
+ {
+ hb_aat_map_builder_t *aat_map = &planner->aat_map;
+ for (unsigned int i = 0; i < num_user_features; i++)
+ {
+ const hb_feature_t *feature = &user_features[i];
+ aat_map->add_feature (feature->tag, feature->value);
+ }
+ }
+#endif
+
+ if (planner->shaper->override_features)
+ planner->shaper->override_features (planner);
+}
+
+
+/*
+ * shaper face data
+ */
+
+struct hb_ot_face_data_t {};
+
+hb_ot_face_data_t *
+_hb_ot_shaper_face_data_create (hb_face_t *face)
+{
+ return (hb_ot_face_data_t *) HB_SHAPER_DATA_SUCCEEDED;
+}
+
+void
+_hb_ot_shaper_face_data_destroy (hb_ot_face_data_t *data)
+{
+}
+
+
+/*
+ * shaper font data
+ */
+
+struct hb_ot_font_data_t {};
+
+hb_ot_font_data_t *
+_hb_ot_shaper_font_data_create (hb_font_t *font HB_UNUSED)
+{
+ return (hb_ot_font_data_t *) HB_SHAPER_DATA_SUCCEEDED;
+}
+
+void
+_hb_ot_shaper_font_data_destroy (hb_ot_font_data_t *data HB_UNUSED)
+{
+}
+
+
+/*
+ * shaper
+ */
+
+struct hb_ot_shape_context_t
+{
+ hb_ot_shape_plan_t *plan;
+ hb_font_t *font;
+ hb_face_t *face;
+ hb_buffer_t *buffer;
+ const hb_feature_t *user_features;
+ unsigned int num_user_features;
+
+ /* Transient stuff */
+ hb_direction_t target_direction;
+};
+
+
+
+/* Main shaper */
+
+
+/* Prepare */
+
+static void
+hb_set_unicode_props (hb_buffer_t *buffer)
+{
+ /* Implement enough of Unicode Graphemes here that shaping
+ * in reverse-direction wouldn't break graphemes. Namely,
+ * we mark all marks and ZWJ and ZWJ,Extended_Pictographic
+ * sequences as continuations. The foreach_grapheme()
+ * macro uses this bit.
+ *
+ * https://www.unicode.org/reports/tr29/#Regex_Definitions
+ */
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ _hb_glyph_info_set_unicode_props (&info[i], buffer);
+
+ /* Marks are already set as continuation by the above line.
+ * Handle Emoji_Modifier and ZWJ-continuation. */
+ if (unlikely (_hb_glyph_info_get_general_category (&info[i]) == HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL &&
+ hb_in_range<hb_codepoint_t> (info[i].codepoint, 0x1F3FBu, 0x1F3FFu)))
+ {
+ _hb_glyph_info_set_continuation (&info[i]);
+ }
+#ifndef HB_NO_EMOJI_SEQUENCES
+ else if (unlikely (_hb_glyph_info_is_zwj (&info[i])))
+ {
+ _hb_glyph_info_set_continuation (&info[i]);
+ if (i + 1 < count &&
+ _hb_unicode_is_emoji_Extended_Pictographic (info[i + 1].codepoint))
+ {
+ i++;
+ _hb_glyph_info_set_unicode_props (&info[i], buffer);
+ _hb_glyph_info_set_continuation (&info[i]);
+ }
+ }
+#endif
+ /* Or part of the Other_Grapheme_Extend that is not marks.
+ * As of Unicode 11 that is just:
+ *
+ * 200C ; Other_Grapheme_Extend # Cf ZERO WIDTH NON-JOINER
+ * FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
+ * E0020..E007F ; Other_Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG
+ *
+ * ZWNJ is special, we don't want to merge it as there's no need, and keeping
+ * it separate results in more granular clusters. Ignore Katakana for now.
+ * Tags are used for Emoji sub-region flag sequences:
+ * https://github.com/harfbuzz/harfbuzz/issues/1556
+ */
+ else if (unlikely (hb_in_range<hb_codepoint_t> (info[i].codepoint, 0xE0020u, 0xE007Fu)))
+ _hb_glyph_info_set_continuation (&info[i]);
+ }
+}
+
+static void
+hb_insert_dotted_circle (hb_buffer_t *buffer, hb_font_t *font)
+{
+ if (unlikely (buffer->flags & HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE))
+ return;
+
+ if (!(buffer->flags & HB_BUFFER_FLAG_BOT) ||
+ buffer->context_len[0] ||
+ !_hb_glyph_info_is_unicode_mark (&buffer->info[0]))
+ return;
+
+ if (!font->has_glyph (0x25CCu))
+ return;
+
+ hb_glyph_info_t dottedcircle = {0};
+ dottedcircle.codepoint = 0x25CCu;
+ _hb_glyph_info_set_unicode_props (&dottedcircle, buffer);
+
+ buffer->clear_output ();
+
+ buffer->idx = 0;
+ hb_glyph_info_t info = dottedcircle;
+ info.cluster = buffer->cur().cluster;
+ info.mask = buffer->cur().mask;
+ buffer->output_info (info);
+ while (buffer->idx < buffer->len && buffer->successful)
+ buffer->next_glyph ();
+ buffer->swap_buffers ();
+}
+
+static void
+hb_form_clusters (hb_buffer_t *buffer)
+{
+ if (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_NON_ASCII))
+ return;
+
+ if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES)
+ foreach_grapheme (buffer, start, end)
+ buffer->merge_clusters (start, end);
+ else
+ foreach_grapheme (buffer, start, end)
+ buffer->unsafe_to_break (start, end);
+}
+
+static void
+hb_ensure_native_direction (hb_buffer_t *buffer)
+{
+ hb_direction_t direction = buffer->props.direction;
+ hb_direction_t horiz_dir = hb_script_get_horizontal_direction (buffer->props.script);
+
+ /* TODO vertical:
+ * The only BTT vertical script is Ogham, but it's not clear to me whether OpenType
+ * Ogham fonts are supposed to be implemented BTT or not. Need to research that
+ * first. */
+ if ((HB_DIRECTION_IS_HORIZONTAL (direction) &&
+ direction != horiz_dir && horiz_dir != HB_DIRECTION_INVALID) ||
+ (HB_DIRECTION_IS_VERTICAL (direction) &&
+ direction != HB_DIRECTION_TTB))
+ {
+
+ if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS)
+ foreach_grapheme (buffer, start, end)
+ {
+ buffer->merge_clusters (start, end);
+ buffer->reverse_range (start, end);
+ }
+ else
+ foreach_grapheme (buffer, start, end)
+ /* form_clusters() merged clusters already, we don't merge. */
+ buffer->reverse_range (start, end);
+
+ buffer->reverse ();
+
+ buffer->props.direction = HB_DIRECTION_REVERSE (buffer->props.direction);
+ }
+}
+
+
+/*
+ * Substitute
+ */
+
+static hb_codepoint_t
+hb_vert_char_for (hb_codepoint_t u)
+{
+ switch (u >> 8)
+ {
+ case 0x20: switch (u) {
+ case 0x2013u: return 0xfe32u; // EN DASH
+ case 0x2014u: return 0xfe31u; // EM DASH
+ case 0x2025u: return 0xfe30u; // TWO DOT LEADER
+ case 0x2026u: return 0xfe19u; // HORIZONTAL ELLIPSIS
+ } break;
+ case 0x30: switch (u) {
+ case 0x3001u: return 0xfe11u; // IDEOGRAPHIC COMMA
+ case 0x3002u: return 0xfe12u; // IDEOGRAPHIC FULL STOP
+ case 0x3008u: return 0xfe3fu; // LEFT ANGLE BRACKET
+ case 0x3009u: return 0xfe40u; // RIGHT ANGLE BRACKET
+ case 0x300au: return 0xfe3du; // LEFT DOUBLE ANGLE BRACKET
+ case 0x300bu: return 0xfe3eu; // RIGHT DOUBLE ANGLE BRACKET
+ case 0x300cu: return 0xfe41u; // LEFT CORNER BRACKET
+ case 0x300du: return 0xfe42u; // RIGHT CORNER BRACKET
+ case 0x300eu: return 0xfe43u; // LEFT WHITE CORNER BRACKET
+ case 0x300fu: return 0xfe44u; // RIGHT WHITE CORNER BRACKET
+ case 0x3010u: return 0xfe3bu; // LEFT BLACK LENTICULAR BRACKET
+ case 0x3011u: return 0xfe3cu; // RIGHT BLACK LENTICULAR BRACKET
+ case 0x3014u: return 0xfe39u; // LEFT TORTOISE SHELL BRACKET
+ case 0x3015u: return 0xfe3au; // RIGHT TORTOISE SHELL BRACKET
+ case 0x3016u: return 0xfe17u; // LEFT WHITE LENTICULAR BRACKET
+ case 0x3017u: return 0xfe18u; // RIGHT WHITE LENTICULAR BRACKET
+ } break;
+ case 0xfe: switch (u) {
+ case 0xfe4fu: return 0xfe34u; // WAVY LOW LINE
+ } break;
+ case 0xff: switch (u) {
+ case 0xff01u: return 0xfe15u; // FULLWIDTH EXCLAMATION MARK
+ case 0xff08u: return 0xfe35u; // FULLWIDTH LEFT PARENTHESIS
+ case 0xff09u: return 0xfe36u; // FULLWIDTH RIGHT PARENTHESIS
+ case 0xff0cu: return 0xfe10u; // FULLWIDTH COMMA
+ case 0xff1au: return 0xfe13u; // FULLWIDTH COLON
+ case 0xff1bu: return 0xfe14u; // FULLWIDTH SEMICOLON
+ case 0xff1fu: return 0xfe16u; // FULLWIDTH QUESTION MARK
+ case 0xff3bu: return 0xfe47u; // FULLWIDTH LEFT SQUARE BRACKET
+ case 0xff3du: return 0xfe48u; // FULLWIDTH RIGHT SQUARE BRACKET
+ case 0xff3fu: return 0xfe33u; // FULLWIDTH LOW LINE
+ case 0xff5bu: return 0xfe37u; // FULLWIDTH LEFT CURLY BRACKET
+ case 0xff5du: return 0xfe38u; // FULLWIDTH RIGHT CURLY BRACKET
+ } break;
+ }
+
+ return u;
+}
+
+static inline void
+hb_ot_rotate_chars (const hb_ot_shape_context_t *c)
+{
+ hb_buffer_t *buffer = c->buffer;
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+
+ if (HB_DIRECTION_IS_BACKWARD (c->target_direction))
+ {
+ hb_unicode_funcs_t *unicode = buffer->unicode;
+ hb_mask_t rtlm_mask = c->plan->rtlm_mask;
+
+ for (unsigned int i = 0; i < count; i++) {
+ hb_codepoint_t codepoint = unicode->mirroring (info[i].codepoint);
+ if (unlikely (codepoint != info[i].codepoint && c->font->has_glyph (codepoint)))
+ info[i].codepoint = codepoint;
+ else
+ info[i].mask |= rtlm_mask;
+ }
+ }
+
+ if (HB_DIRECTION_IS_VERTICAL (c->target_direction) && !c->plan->has_vert)
+ {
+ for (unsigned int i = 0; i < count; i++) {
+ hb_codepoint_t codepoint = hb_vert_char_for (info[i].codepoint);
+ if (unlikely (codepoint != info[i].codepoint && c->font->has_glyph (codepoint)))
+ info[i].codepoint = codepoint;
+ }
+ }
+}
+
+static inline void
+hb_ot_shape_setup_masks_fraction (const hb_ot_shape_context_t *c)
+{
+#ifdef HB_NO_OT_SHAPE_FRACTIONS
+ return;
+#endif
+
+ if (!(c->buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_NON_ASCII) ||
+ !c->plan->has_frac)
+ return;
+
+ hb_buffer_t *buffer = c->buffer;
+
+ hb_mask_t pre_mask, post_mask;
+ if (HB_DIRECTION_IS_FORWARD (buffer->props.direction))
+ {
+ pre_mask = c->plan->numr_mask | c->plan->frac_mask;
+ post_mask = c->plan->frac_mask | c->plan->dnom_mask;
+ }
+ else
+ {
+ pre_mask = c->plan->frac_mask | c->plan->dnom_mask;
+ post_mask = c->plan->numr_mask | c->plan->frac_mask;
+ }
+
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ if (info[i].codepoint == 0x2044u) /* FRACTION SLASH */
+ {
+ unsigned int start = i, end = i + 1;
+ while (start &&
+ _hb_glyph_info_get_general_category (&info[start - 1]) ==
+ HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER)
+ start--;
+ while (end < count &&
+ _hb_glyph_info_get_general_category (&info[end]) ==
+ HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER)
+ end++;
+
+ buffer->unsafe_to_break (start, end);
+
+ for (unsigned int j = start; j < i; j++)
+ info[j].mask |= pre_mask;
+ info[i].mask |= c->plan->frac_mask;
+ for (unsigned int j = i + 1; j < end; j++)
+ info[j].mask |= post_mask;
+
+ i = end - 1;
+ }
+ }
+}
+
+static inline void
+hb_ot_shape_initialize_masks (const hb_ot_shape_context_t *c)
+{
+ hb_ot_map_t *map = &c->plan->map;
+ hb_buffer_t *buffer = c->buffer;
+
+ hb_mask_t global_mask = map->get_global_mask ();
+ buffer->reset_masks (global_mask);
+}
+
+static inline void
+hb_ot_shape_setup_masks (const hb_ot_shape_context_t *c)
+{
+ hb_ot_map_t *map = &c->plan->map;
+ hb_buffer_t *buffer = c->buffer;
+
+ hb_ot_shape_setup_masks_fraction (c);
+
+ if (c->plan->shaper->setup_masks)
+ c->plan->shaper->setup_masks (c->plan, buffer, c->font);
+
+ for (unsigned int i = 0; i < c->num_user_features; i++)
+ {
+ const hb_feature_t *feature = &c->user_features[i];
+ if (!(feature->start == HB_FEATURE_GLOBAL_START && feature->end == HB_FEATURE_GLOBAL_END)) {
+ unsigned int shift;
+ hb_mask_t mask = map->get_mask (feature->tag, &shift);
+ buffer->set_masks (feature->value << shift, mask, feature->start, feature->end);
+ }
+ }
+}
+
+static void
+hb_ot_zero_width_default_ignorables (const hb_buffer_t *buffer)
+{
+ if (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_DEFAULT_IGNORABLES) ||
+ (buffer->flags & HB_BUFFER_FLAG_PRESERVE_DEFAULT_IGNORABLES) ||
+ (buffer->flags & HB_BUFFER_FLAG_REMOVE_DEFAULT_IGNORABLES))
+ return;
+
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ hb_glyph_position_t *pos = buffer->pos;
+ unsigned int i = 0;
+ for (i = 0; i < count; i++)
+ if (unlikely (_hb_glyph_info_is_default_ignorable (&info[i])))
+ pos[i].x_advance = pos[i].y_advance = pos[i].x_offset = pos[i].y_offset = 0;
+}
+
+static void
+hb_ot_hide_default_ignorables (hb_buffer_t *buffer,
+ hb_font_t *font)
+{
+ if (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_DEFAULT_IGNORABLES) ||
+ (buffer->flags & HB_BUFFER_FLAG_PRESERVE_DEFAULT_IGNORABLES))
+ return;
+
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+
+ hb_codepoint_t invisible = buffer->invisible;
+ if (!(buffer->flags & HB_BUFFER_FLAG_REMOVE_DEFAULT_IGNORABLES) &&
+ (invisible || font->get_nominal_glyph (' ', &invisible)))
+ {
+ /* Replace default-ignorables with a zero-advance invisible glyph. */
+ for (unsigned int i = 0; i < count; i++)
+ {
+ if (_hb_glyph_info_is_default_ignorable (&info[i]))
+ info[i].codepoint = invisible;
+ }
+ }
+ else
+ hb_ot_layout_delete_glyphs_inplace (buffer, _hb_glyph_info_is_default_ignorable);
+}
+
+
+static inline void
+hb_ot_map_glyphs_fast (hb_buffer_t *buffer)
+{
+ /* Normalization process sets up glyph_index(), we just copy it. */
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ for (unsigned int i = 0; i < count; i++)
+ info[i].codepoint = info[i].glyph_index();
+
+ buffer->content_type = HB_BUFFER_CONTENT_TYPE_GLYPHS;
+}
+
+static inline void
+hb_synthesize_glyph_classes (hb_buffer_t *buffer)
+{
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ hb_ot_layout_glyph_props_flags_t klass;
+
+ /* Never mark default-ignorables as marks.
+ * They won't get in the way of lookups anyway,
+ * but having them as mark will cause them to be skipped
+ * over if the lookup-flag says so, but at least for the
+ * Mongolian variation selectors, looks like Uniscribe
+ * marks them as non-mark. Some Mongolian fonts without
+ * GDEF rely on this. Another notable character that
+ * this applies to is COMBINING GRAPHEME JOINER. */
+ klass = (_hb_glyph_info_get_general_category (&info[i]) !=
+ HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK ||
+ _hb_glyph_info_is_default_ignorable (&info[i])) ?
+ HB_OT_LAYOUT_GLYPH_PROPS_BASE_GLYPH :
+ HB_OT_LAYOUT_GLYPH_PROPS_MARK;
+ _hb_glyph_info_set_glyph_props (&info[i], klass);
+ }
+}
+
+static inline void
+hb_ot_substitute_default (const hb_ot_shape_context_t *c)
+{
+ hb_buffer_t *buffer = c->buffer;
+
+ hb_ot_rotate_chars (c);
+
+ HB_BUFFER_ALLOCATE_VAR (buffer, glyph_index);
+
+ _hb_ot_shape_normalize (c->plan, buffer, c->font);
+
+ hb_ot_shape_setup_masks (c);
+
+ /* This is unfortunate to go here, but necessary... */
+ if (c->plan->fallback_mark_positioning)
+ _hb_ot_shape_fallback_mark_position_recategorize_marks (c->plan, c->font, buffer);
+
+ hb_ot_map_glyphs_fast (buffer);
+
+ HB_BUFFER_DEALLOCATE_VAR (buffer, glyph_index);
+}
+
+static inline void
+hb_ot_substitute_complex (const hb_ot_shape_context_t *c)
+{
+ hb_buffer_t *buffer = c->buffer;
+
+ hb_ot_layout_substitute_start (c->font, buffer);
+
+ if (c->plan->fallback_glyph_classes)
+ hb_synthesize_glyph_classes (c->buffer);
+
+ c->plan->substitute (c->font, buffer);
+}
+
+static inline void
+hb_ot_substitute_pre (const hb_ot_shape_context_t *c)
+{
+ hb_ot_substitute_default (c);
+
+ _hb_buffer_allocate_gsubgpos_vars (c->buffer);
+
+ hb_ot_substitute_complex (c);
+}
+
+static inline void
+hb_ot_substitute_post (const hb_ot_shape_context_t *c)
+{
+ hb_ot_hide_default_ignorables (c->buffer, c->font);
+#ifndef HB_NO_AAT_SHAPE
+ if (c->plan->apply_morx)
+ hb_aat_layout_remove_deleted_glyphs (c->buffer);
+#endif
+
+ if (c->plan->shaper->postprocess_glyphs)
+ c->plan->shaper->postprocess_glyphs (c->plan, c->buffer, c->font);
+}
+
+
+/*
+ * Position
+ */
+
+static inline void
+adjust_mark_offsets (hb_glyph_position_t *pos)
+{
+ pos->x_offset -= pos->x_advance;
+ pos->y_offset -= pos->y_advance;
+}
+
+static inline void
+zero_mark_width (hb_glyph_position_t *pos)
+{
+ pos->x_advance = 0;
+ pos->y_advance = 0;
+}
+
+static inline void
+zero_mark_widths_by_gdef (hb_buffer_t *buffer, bool adjust_offsets)
+{
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ for (unsigned int i = 0; i < count; i++)
+ if (_hb_glyph_info_is_mark (&info[i]))
+ {
+ if (adjust_offsets)
+ adjust_mark_offsets (&buffer->pos[i]);
+ zero_mark_width (&buffer->pos[i]);
+ }
+}
+
+static inline void
+hb_ot_position_default (const hb_ot_shape_context_t *c)
+{
+ hb_direction_t direction = c->buffer->props.direction;
+ unsigned int count = c->buffer->len;
+ hb_glyph_info_t *info = c->buffer->info;
+ hb_glyph_position_t *pos = c->buffer->pos;
+
+ if (HB_DIRECTION_IS_HORIZONTAL (direction))
+ {
+ c->font->get_glyph_h_advances (count, &info[0].codepoint, sizeof(info[0]),
+ &pos[0].x_advance, sizeof(pos[0]));
+ /* The nil glyph_h_origin() func returns 0, so no need to apply it. */
+ if (c->font->has_glyph_h_origin_func ())
+ for (unsigned int i = 0; i < count; i++)
+ c->font->subtract_glyph_h_origin (info[i].codepoint,
+ &pos[i].x_offset,
+ &pos[i].y_offset);
+ }
+ else
+ {
+ c->font->get_glyph_v_advances (count, &info[0].codepoint, sizeof(info[0]),
+ &pos[0].y_advance, sizeof(pos[0]));
+ for (unsigned int i = 0; i < count; i++)
+ {
+ c->font->subtract_glyph_v_origin (info[i].codepoint,
+ &pos[i].x_offset,
+ &pos[i].y_offset);
+ }
+ }
+ if (c->buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_SPACE_FALLBACK)
+ _hb_ot_shape_fallback_spaces (c->plan, c->font, c->buffer);
+}
+
+static inline void
+hb_ot_position_complex (const hb_ot_shape_context_t *c)
+{
+ unsigned int count = c->buffer->len;
+ hb_glyph_info_t *info = c->buffer->info;
+ hb_glyph_position_t *pos = c->buffer->pos;
+
+ /* If the font has no GPOS and direction is forward, then when
+ * zeroing mark widths, we shift the mark with it, such that the
+ * mark is positioned hanging over the previous glyph. When
+ * direction is backward we don't shift and it will end up
+ * hanging over the next glyph after the final reordering.
+ *
+ * Note: If fallback positinoing happens, we don't care about
+ * this as it will be overriden.
+ */
+ bool adjust_offsets_when_zeroing = c->plan->adjust_mark_positioning_when_zeroing &&
+ HB_DIRECTION_IS_FORWARD (c->buffer->props.direction);
+
+ /* We change glyph origin to what GPOS expects (horizontal), apply GPOS, change it back. */
+
+ /* The nil glyph_h_origin() func returns 0, so no need to apply it. */
+ if (c->font->has_glyph_h_origin_func ())
+ for (unsigned int i = 0; i < count; i++)
+ c->font->add_glyph_h_origin (info[i].codepoint,
+ &pos[i].x_offset,
+ &pos[i].y_offset);
+
+ hb_ot_layout_position_start (c->font, c->buffer);
+
+ if (c->plan->zero_marks)
+ switch (c->plan->shaper->zero_width_marks)
+ {
+ case HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY:
+ zero_mark_widths_by_gdef (c->buffer, adjust_offsets_when_zeroing);
+ break;
+
+ default:
+ case HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE:
+ case HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE:
+ break;
+ }
+
+ c->plan->position (c->font, c->buffer);
+
+ if (c->plan->zero_marks)
+ switch (c->plan->shaper->zero_width_marks)
+ {
+ case HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE:
+ zero_mark_widths_by_gdef (c->buffer, adjust_offsets_when_zeroing);
+ break;
+
+ default:
+ case HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE:
+ case HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY:
+ break;
+ }
+
+ /* Finish off. Has to follow a certain order. */
+ hb_ot_layout_position_finish_advances (c->font, c->buffer);
+ hb_ot_zero_width_default_ignorables (c->buffer);
+#ifndef HB_NO_AAT_SHAPE
+ if (c->plan->apply_morx)
+ hb_aat_layout_zero_width_deleted_glyphs (c->buffer);
+#endif
+ hb_ot_layout_position_finish_offsets (c->font, c->buffer);
+
+ /* The nil glyph_h_origin() func returns 0, so no need to apply it. */
+ if (c->font->has_glyph_h_origin_func ())
+ for (unsigned int i = 0; i < count; i++)
+ c->font->subtract_glyph_h_origin (info[i].codepoint,
+ &pos[i].x_offset,
+ &pos[i].y_offset);
+
+ if (c->plan->fallback_mark_positioning)
+ _hb_ot_shape_fallback_mark_position (c->plan, c->font, c->buffer,
+ adjust_offsets_when_zeroing);
+}
+
+static inline void
+hb_ot_position (const hb_ot_shape_context_t *c)
+{
+ c->buffer->clear_positions ();
+
+ hb_ot_position_default (c);
+
+ hb_ot_position_complex (c);
+
+ if (HB_DIRECTION_IS_BACKWARD (c->buffer->props.direction))
+ hb_buffer_reverse (c->buffer);
+
+ _hb_buffer_deallocate_gsubgpos_vars (c->buffer);
+}
+
+static inline void
+hb_propagate_flags (hb_buffer_t *buffer)
+{
+ /* Propagate cluster-level glyph flags to be the same on all cluster glyphs.
+ * Simplifies using them. */
+
+ if (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK))
+ return;
+
+ hb_glyph_info_t *info = buffer->info;
+
+ foreach_cluster (buffer, start, end)
+ {
+ unsigned int mask = 0;
+ for (unsigned int i = start; i < end; i++)
+ if (info[i].mask & HB_GLYPH_FLAG_UNSAFE_TO_BREAK)
+ {
+ mask = HB_GLYPH_FLAG_UNSAFE_TO_BREAK;
+ break;
+ }
+ if (mask)
+ for (unsigned int i = start; i < end; i++)
+ info[i].mask |= mask;
+ }
+}
+
+/* Pull it all together! */
+
+static void
+hb_ot_shape_internal (hb_ot_shape_context_t *c)
+{
+ c->buffer->deallocate_var_all ();
+ c->buffer->scratch_flags = HB_BUFFER_SCRATCH_FLAG_DEFAULT;
+ if (likely (!hb_unsigned_mul_overflows (c->buffer->len, HB_BUFFER_MAX_LEN_FACTOR)))
+ {
+ c->buffer->max_len = hb_max (c->buffer->len * HB_BUFFER_MAX_LEN_FACTOR,
+ (unsigned) HB_BUFFER_MAX_LEN_MIN);
+ }
+ if (likely (!hb_unsigned_mul_overflows (c->buffer->len, HB_BUFFER_MAX_OPS_FACTOR)))
+ {
+ c->buffer->max_ops = hb_max (c->buffer->len * HB_BUFFER_MAX_OPS_FACTOR,
+ (unsigned) HB_BUFFER_MAX_OPS_MIN);
+ }
+
+ /* Save the original direction, we use it later. */
+ c->target_direction = c->buffer->props.direction;
+
+ _hb_buffer_allocate_unicode_vars (c->buffer);
+
+ c->buffer->clear_output ();
+
+ hb_ot_shape_initialize_masks (c);
+ hb_set_unicode_props (c->buffer);
+ hb_insert_dotted_circle (c->buffer, c->font);
+
+ hb_form_clusters (c->buffer);
+
+ hb_ensure_native_direction (c->buffer);
+
+ if (c->plan->shaper->preprocess_text)
+ c->plan->shaper->preprocess_text (c->plan, c->buffer, c->font);
+
+ hb_ot_substitute_pre (c);
+ hb_ot_position (c);
+ hb_ot_substitute_post (c);
+
+ hb_propagate_flags (c->buffer);
+
+ _hb_buffer_deallocate_unicode_vars (c->buffer);
+
+ c->buffer->props.direction = c->target_direction;
+
+ c->buffer->max_len = HB_BUFFER_MAX_LEN_DEFAULT;
+ c->buffer->max_ops = HB_BUFFER_MAX_OPS_DEFAULT;
+ c->buffer->deallocate_var_all ();
+}
+
+
+hb_bool_t
+_hb_ot_shape (hb_shape_plan_t *shape_plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer,
+ const hb_feature_t *features,
+ unsigned int num_features)
+{
+ hb_ot_shape_context_t c = {&shape_plan->ot, font, font->face, buffer, features, num_features};
+ hb_ot_shape_internal (&c);
+
+ return true;
+}
+
+
+/**
+ * hb_ot_shape_plan_collect_lookups:
+ *
+ * Since: 0.9.7
+ **/
+void
+hb_ot_shape_plan_collect_lookups (hb_shape_plan_t *shape_plan,
+ hb_tag_t table_tag,
+ hb_set_t *lookup_indexes /* OUT */)
+{
+ shape_plan->ot.collect_lookups (table_tag, lookup_indexes);
+}
+
+
+/* TODO Move this to hb-ot-shape-normalize, make it do decompose, and make it public. */
+static void
+add_char (hb_font_t *font,
+ hb_unicode_funcs_t *unicode,
+ hb_bool_t mirror,
+ hb_codepoint_t u,
+ hb_set_t *glyphs)
+{
+ hb_codepoint_t glyph;
+ if (font->get_nominal_glyph (u, &glyph))
+ glyphs->add (glyph);
+ if (mirror)
+ {
+ hb_codepoint_t m = unicode->mirroring (u);
+ if (m != u && font->get_nominal_glyph (m, &glyph))
+ glyphs->add (glyph);
+ }
+}
+
+
+/**
+ * hb_ot_shape_glyphs_closure:
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_ot_shape_glyphs_closure (hb_font_t *font,
+ hb_buffer_t *buffer,
+ const hb_feature_t *features,
+ unsigned int num_features,
+ hb_set_t *glyphs)
+{
+ const char *shapers[] = {"ot", nullptr};
+ hb_shape_plan_t *shape_plan = hb_shape_plan_create_cached (font->face, &buffer->props,
+ features, num_features, shapers);
+
+ bool mirror = hb_script_get_horizontal_direction (buffer->props.script) == HB_DIRECTION_RTL;
+
+ unsigned int count = buffer->len;
+ hb_glyph_info_t *info = buffer->info;
+ for (unsigned int i = 0; i < count; i++)
+ add_char (font, buffer->unicode, mirror, info[i].codepoint, glyphs);
+
+ hb_set_t *lookups = hb_set_create ();
+ hb_ot_shape_plan_collect_lookups (shape_plan, HB_OT_TAG_GSUB, lookups);
+ hb_ot_layout_lookups_substitute_closure (font->face, lookups, glyphs);
+
+ hb_set_destroy (lookups);
+
+ hb_shape_plan_destroy (shape_plan);
+}
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape.h b/thirdparty/harfbuzz/src/hb-ot-shape.h
new file mode 100644
index 0000000000..7b1bcc0637
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright © 2013 Red Hat, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_H_IN
+#error "Include <hb-ot.h> instead."
+#endif
+
+#ifndef HB_OT_SHAPE_H
+#define HB_OT_SHAPE_H
+
+#include "hb.h"
+
+HB_BEGIN_DECLS
+
+/* TODO port to shape-plan / set. */
+HB_EXTERN void
+hb_ot_shape_glyphs_closure (hb_font_t *font,
+ hb_buffer_t *buffer,
+ const hb_feature_t *features,
+ unsigned int num_features,
+ hb_set_t *glyphs);
+
+HB_EXTERN void
+hb_ot_shape_plan_collect_lookups (hb_shape_plan_t *shape_plan,
+ hb_tag_t table_tag,
+ hb_set_t *lookup_indexes /* OUT */);
+
+HB_END_DECLS
+
+#endif /* HB_OT_SHAPE_H */
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape.hh b/thirdparty/harfbuzz/src/hb-ot-shape.hh
new file mode 100644
index 0000000000..acc98772a9
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-shape.hh
@@ -0,0 +1,170 @@
+/*
+ * Copyright © 2010 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_SHAPE_HH
+#define HB_OT_SHAPE_HH
+
+#include "hb.hh"
+
+#include "hb-ot-map.hh"
+#include "hb-aat-map.hh"
+
+
+struct hb_ot_shape_plan_key_t
+{
+ unsigned int variations_index[2];
+
+ void init (hb_face_t *face,
+ const int *coords,
+ unsigned num_coords)
+ {
+ for (unsigned int table_index = 0; table_index < 2; table_index++)
+ hb_ot_layout_table_find_feature_variations (face,
+ table_tags[table_index],
+ coords,
+ num_coords,
+ &variations_index[table_index]);
+ }
+
+ bool equal (const hb_ot_shape_plan_key_t *other)
+ {
+ return 0 == memcmp (this, other, sizeof (*this));
+ }
+};
+
+
+struct hb_shape_plan_key_t;
+
+struct hb_ot_shape_plan_t
+{
+ hb_segment_properties_t props;
+ const struct hb_ot_complex_shaper_t *shaper;
+ hb_ot_map_t map;
+ hb_aat_map_t aat_map;
+ const void *data;
+#ifndef HB_NO_OT_SHAPE_FRACTIONS
+ hb_mask_t frac_mask, numr_mask, dnom_mask;
+#else
+ static constexpr hb_mask_t frac_mask = 0;
+ static constexpr hb_mask_t numr_mask = 0;
+ static constexpr hb_mask_t dnom_mask = 0;
+#endif
+ hb_mask_t rtlm_mask;
+#ifndef HB_NO_OT_KERN
+ hb_mask_t kern_mask;
+#else
+ static constexpr hb_mask_t kern_mask = 0;
+#endif
+#ifndef HB_NO_AAT_SHAPE
+ hb_mask_t trak_mask;
+#else
+ static constexpr hb_mask_t trak_mask = 0;
+#endif
+
+#ifndef HB_NO_OT_KERN
+ bool requested_kerning : 1;
+#else
+ static constexpr bool requested_kerning = false;
+#endif
+#ifndef HB_NO_AAT_SHAPE
+ bool requested_tracking : 1;
+#else
+ static constexpr bool requested_tracking = false;
+#endif
+#ifndef HB_NO_OT_SHAPE_FRACTIONS
+ bool has_frac : 1;
+#else
+ static constexpr bool has_frac = false;
+#endif
+ bool has_vert : 1;
+ bool has_gpos_mark : 1;
+ bool zero_marks : 1;
+ bool fallback_glyph_classes : 1;
+ bool fallback_mark_positioning : 1;
+ bool adjust_mark_positioning_when_zeroing : 1;
+
+ bool apply_gpos : 1;
+#ifndef HB_NO_OT_KERN
+ bool apply_kern : 1;
+#else
+ static constexpr bool apply_kern = false;
+#endif
+#ifndef HB_NO_AAT_SHAPE
+ bool apply_kerx : 1;
+ bool apply_morx : 1;
+ bool apply_trak : 1;
+#else
+ static constexpr bool apply_kerx = false;
+ static constexpr bool apply_morx = false;
+ static constexpr bool apply_trak = false;
+#endif
+
+ void collect_lookups (hb_tag_t table_tag, hb_set_t *lookups) const
+ {
+ unsigned int table_index;
+ switch (table_tag) {
+ case HB_OT_TAG_GSUB: table_index = 0; break;
+ case HB_OT_TAG_GPOS: table_index = 1; break;
+ default: return;
+ }
+ map.collect_lookups (table_index, lookups);
+ }
+
+ HB_INTERNAL bool init0 (hb_face_t *face,
+ const hb_shape_plan_key_t *key);
+ HB_INTERNAL void fini ();
+
+ HB_INTERNAL void substitute (hb_font_t *font, hb_buffer_t *buffer) const;
+ HB_INTERNAL void position (hb_font_t *font, hb_buffer_t *buffer) const;
+};
+
+struct hb_shape_plan_t;
+
+struct hb_ot_shape_planner_t
+{
+ /* In the order that they are filled in. */
+ hb_face_t *face;
+ hb_segment_properties_t props;
+ hb_ot_map_builder_t map;
+ hb_aat_map_builder_t aat_map;
+#ifndef HB_NO_AAT_SHAPE
+ bool apply_morx : 1;
+#else
+ static constexpr bool apply_morx = false;
+#endif
+ bool script_zero_marks : 1;
+ bool script_fallback_mark_positioning : 1;
+ const struct hb_ot_complex_shaper_t *shaper;
+
+ HB_INTERNAL hb_ot_shape_planner_t (hb_face_t *face,
+ const hb_segment_properties_t *props);
+
+ HB_INTERNAL void compile (hb_ot_shape_plan_t &plan,
+ const hb_ot_shape_plan_key_t &key);
+};
+
+
+#endif /* HB_OT_SHAPE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-stat-table.hh b/thirdparty/harfbuzz/src/hb-ot-stat-table.hh
new file mode 100644
index 0000000000..6aa4fa4492
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-stat-table.hh
@@ -0,0 +1,404 @@
+/*
+ * Copyright © 2018 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#ifndef HB_OT_STAT_TABLE_HH
+#define HB_OT_STAT_TABLE_HH
+
+#include "hb-open-type.hh"
+#include "hb-ot-layout-common.hh"
+
+/*
+ * STAT -- Style Attributes
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/stat
+ */
+#define HB_OT_TAG_STAT HB_TAG('S','T','A','T')
+
+
+namespace OT {
+
+enum
+{
+ OLDER_SIBLING_FONT_ATTRIBUTE = 0x0001, /* If set, this axis value table
+ * provides axis value information
+ * that is applicable to other fonts
+ * within the same font family. This
+ * is used if the other fonts were
+ * released earlier and did not include
+ * information about values for some axis.
+ * If newer versions of the other
+ * fonts include the information
+ * themselves and are present,
+ * then this record is ignored. */
+ ELIDABLE_AXIS_VALUE_NAME = 0x0002 /* If set, it indicates that the axis
+ * value represents the “normal†value
+ * for the axis and may be omitted when
+ * composing name strings. */
+ // Reserved = 0xFFFC /* Reserved for future use — set to zero. */
+};
+
+struct AxisValueFormat1
+{
+ unsigned int get_axis_index () const { return axisIndex; }
+ float get_value () const { return value.to_float (); }
+
+ hb_ot_name_id_t get_value_name_id () const { return valueNameID; }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this)));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier — set to 1. */
+ HBUINT16 axisIndex; /* Zero-base index into the axis record array
+ * identifying the axis of design variation
+ * to which the axis value record applies.
+ * Must be less than designAxisCount. */
+ HBUINT16 flags; /* Flags — see below for details. */
+ NameID valueNameID; /* The name ID for entries in the 'name' table
+ * that provide a display string for this
+ * attribute value. */
+ HBFixed value; /* A numeric value for this attribute value. */
+ public:
+ DEFINE_SIZE_STATIC (12);
+};
+
+struct AxisValueFormat2
+{
+ unsigned int get_axis_index () const { return axisIndex; }
+ float get_value () const { return nominalValue.to_float (); }
+
+ hb_ot_name_id_t get_value_name_id () const { return valueNameID; }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this)));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier — set to 2. */
+ HBUINT16 axisIndex; /* Zero-base index into the axis record array
+ * identifying the axis of design variation
+ * to which the axis value record applies.
+ * Must be less than designAxisCount. */
+ HBUINT16 flags; /* Flags — see below for details. */
+ NameID valueNameID; /* The name ID for entries in the 'name' table
+ * that provide a display string for this
+ * attribute value. */
+ HBFixed nominalValue; /* A numeric value for this attribute value. */
+ HBFixed rangeMinValue; /* The minimum value for a range associated
+ * with the specified name ID. */
+ HBFixed rangeMaxValue; /* The maximum value for a range associated
+ * with the specified name ID. */
+ public:
+ DEFINE_SIZE_STATIC (20);
+};
+
+struct AxisValueFormat3
+{
+ unsigned int get_axis_index () const { return axisIndex; }
+ float get_value () const { return value.to_float (); }
+
+ hb_ot_name_id_t get_value_name_id () const { return valueNameID; }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this)));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier — set to 3. */
+ HBUINT16 axisIndex; /* Zero-base index into the axis record array
+ * identifying the axis of design variation
+ * to which the axis value record applies.
+ * Must be less than designAxisCount. */
+ HBUINT16 flags; /* Flags — see below for details. */
+ NameID valueNameID; /* The name ID for entries in the 'name' table
+ * that provide a display string for this
+ * attribute value. */
+ HBFixed value; /* A numeric value for this attribute value. */
+ HBFixed linkedValue; /* The numeric value for a style-linked mapping
+ * from this value. */
+ public:
+ DEFINE_SIZE_STATIC (16);
+};
+
+struct AxisValueRecord
+{
+ unsigned int get_axis_index () const { return axisIndex; }
+ float get_value () const { return value.to_float (); }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this)));
+ }
+
+ protected:
+ HBUINT16 axisIndex; /* Zero-base index into the axis record array
+ * identifying the axis to which this value
+ * applies. Must be less than designAxisCount. */
+ HBFixed value; /* A numeric value for this attribute value. */
+ public:
+ DEFINE_SIZE_STATIC (6);
+};
+
+struct AxisValueFormat4
+{
+ const AxisValueRecord &get_axis_record (unsigned int axis_index) const
+ { return axisValues.as_array (axisCount)[axis_index]; }
+
+ hb_ot_name_id_t get_value_name_id () const { return valueNameID; }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this)));
+ }
+
+ protected:
+ HBUINT16 format; /* Format identifier — set to 4. */
+ HBUINT16 axisCount; /* The total number of axes contributing to
+ * this axis-values combination. */
+ HBUINT16 flags; /* Flags — see below for details. */
+ NameID valueNameID; /* The name ID for entries in the 'name' table
+ * that provide a display string for this
+ * attribute value. */
+ UnsizedArrayOf<AxisValueRecord>
+ axisValues; /* Array of AxisValue records that provide the
+ * combination of axis values, one for each
+ * contributing axis. */
+ public:
+ DEFINE_SIZE_ARRAY (8, axisValues);
+};
+
+struct AxisValue
+{
+ bool get_value (unsigned int axis_index) const
+ {
+ switch (u.format)
+ {
+ case 1: return u.format1.get_value ();
+ case 2: return u.format2.get_value ();
+ case 3: return u.format3.get_value ();
+ case 4: return u.format4.get_axis_record (axis_index).get_value ();
+ default:return 0;
+ }
+ }
+
+ unsigned int get_axis_index () const
+ {
+ switch (u.format)
+ {
+ case 1: return u.format1.get_axis_index ();
+ case 2: return u.format2.get_axis_index ();
+ case 3: return u.format3.get_axis_index ();
+ /* case 4: Makes more sense for variable fonts which are handled by fvar in hb-style */
+ default:return -1;
+ }
+ }
+
+ hb_ot_name_id_t get_value_name_id () const
+ {
+ switch (u.format)
+ {
+ case 1: return u.format1.get_value_name_id ();
+ case 2: return u.format2.get_value_name_id ();
+ case 3: return u.format3.get_value_name_id ();
+ case 4: return u.format4.get_value_name_id ();
+ default:return HB_OT_NAME_ID_INVALID;
+ }
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!c->check_struct (this)))
+ return_trace (false);
+
+ switch (u.format)
+ {
+ case 1: return_trace (u.format1.sanitize (c));
+ case 2: return_trace (u.format2.sanitize (c));
+ case 3: return_trace (u.format3.sanitize (c));
+ case 4: return_trace (u.format4.sanitize (c));
+ default:return_trace (true);
+ }
+ }
+
+ protected:
+ union
+ {
+ HBUINT16 format;
+ AxisValueFormat1 format1;
+ AxisValueFormat2 format2;
+ AxisValueFormat3 format3;
+ AxisValueFormat4 format4;
+ } u;
+ public:
+ DEFINE_SIZE_UNION (2, format);
+};
+
+struct StatAxisRecord
+{
+ int cmp (hb_tag_t key) const { return tag.cmp (key); }
+
+ hb_ot_name_id_t get_name_id () const { return nameID; }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this)));
+ }
+
+ protected:
+ Tag tag; /* A tag identifying the axis of design variation. */
+ NameID nameID; /* The name ID for entries in the 'name' table that
+ * provide a display string for this axis. */
+ HBUINT16 ordering; /* A value that applications can use to determine
+ * primary sorting of face names, or for ordering
+ * of descriptors when composing family or face names. */
+ public:
+ DEFINE_SIZE_STATIC (8);
+};
+
+struct STAT
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_STAT;
+
+ bool has_data () const { return version.to_int (); }
+
+ bool get_value (hb_tag_t tag, float *value) const
+ {
+ unsigned int axis_index;
+ if (!get_design_axes ().lfind (tag, &axis_index)) return false;
+
+ hb_array_t<const OffsetTo<AxisValue>> axis_values = get_axis_value_offsets ();
+ for (unsigned int i = 0; i < axis_values.length; i++)
+ {
+ const AxisValue& axis_value = this+axis_values[i];
+ if (axis_value.get_axis_index () == axis_index)
+ {
+ if (value)
+ *value = axis_value.get_value (axis_index);
+ return true;
+ }
+ }
+ return false;
+ }
+
+ unsigned get_design_axis_count () const { return designAxisCount; }
+
+ hb_ot_name_id_t get_axis_record_name_id (unsigned axis_record_index) const
+ {
+ if (unlikely (axis_record_index >= designAxisCount)) return HB_OT_NAME_ID_INVALID;
+ const StatAxisRecord &axis_record = get_design_axes ()[axis_record_index];
+ return axis_record.get_name_id ();
+ }
+
+ unsigned get_axis_value_count () const { return axisValueCount; }
+
+ hb_ot_name_id_t get_axis_value_name_id (unsigned axis_value_index) const
+ {
+ if (unlikely (axis_value_index >= axisValueCount)) return HB_OT_NAME_ID_INVALID;
+ const AxisValue &axis_value = (this + get_axis_value_offsets ()[axis_value_index]);
+ return axis_value.get_value_name_id ();
+ }
+
+ void collect_name_ids (hb_set_t *nameids_to_retain) const
+ {
+ if (!has_data ()) return;
+
+ + get_design_axes ()
+ | hb_map (&StatAxisRecord::get_name_id)
+ | hb_sink (nameids_to_retain)
+ ;
+
+ + get_axis_value_offsets ()
+ | hb_map (hb_add (&(this + offsetToAxisValueOffsets)))
+ | hb_map (&AxisValue::get_value_name_id)
+ | hb_sink (nameids_to_retain)
+ ;
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (likely (c->check_struct (this) &&
+ version.major == 1 &&
+ version.minor > 0 &&
+ designAxesOffset.sanitize (c, this, designAxisCount) &&
+ offsetToAxisValueOffsets.sanitize (c, this, axisValueCount, &(this+offsetToAxisValueOffsets))));
+ }
+
+ protected:
+ hb_array_t<const StatAxisRecord> const get_design_axes () const
+ { return (this+designAxesOffset).as_array (designAxisCount); }
+
+ hb_array_t<const OffsetTo<AxisValue>> const get_axis_value_offsets () const
+ { return (this+offsetToAxisValueOffsets).as_array (axisValueCount); }
+
+
+ protected:
+ FixedVersion<>version; /* Version of the stat table
+ * initially set to 0x00010002u */
+ HBUINT16 designAxisSize; /* The size in bytes of each axis record. */
+ HBUINT16 designAxisCount;/* The number of design axis records. In a
+ * font with an 'fvar' table, this value must be
+ * greater than or equal to the axisCount value
+ * in the 'fvar' table. In all fonts, must
+ * be greater than zero if axisValueCount
+ * is greater than zero. */
+ LNNOffsetTo<UnsizedArrayOf<StatAxisRecord>>
+ designAxesOffset;
+ /* Offset in bytes from the beginning of
+ * the STAT table to the start of the design
+ * axes array. If designAxisCount is zero,
+ * set to zero; if designAxisCount is greater
+ * than zero, must be greater than zero. */
+ HBUINT16 axisValueCount; /* The number of axis value tables. */
+ LNNOffsetTo<UnsizedArrayOf<OffsetTo<AxisValue>>>
+ offsetToAxisValueOffsets;
+ /* Offset in bytes from the beginning of
+ * the STAT table to the start of the design
+ * axes value offsets array. If axisValueCount
+ * is zero, set to zero; if axisValueCount is
+ * greater than zero, must be greater than zero. */
+ NameID elidedFallbackNameID;
+ /* Name ID used as fallback when projection of
+ * names into a particular font model produces
+ * a subfamily name containing only elidable
+ * elements. */
+ public:
+ DEFINE_SIZE_STATIC (20);
+};
+
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_STAT_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-tag-table.hh b/thirdparty/harfbuzz/src/hb-ot-tag-table.hh
new file mode 100644
index 0000000000..99937d9f69
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-tag-table.hh
@@ -0,0 +1,2176 @@
+/* == Start of generated table == */
+/*
+ * The following table is generated by running:
+ *
+ * ./gen-tag-table.py languagetags language-subtag-registry
+ *
+ * on files with these headers:
+ *
+ * <meta name="updated_at" content="2019-05-22 06:05 PM" />
+ * File-Date: 2020-05-12
+ */
+
+#ifndef HB_OT_TAG_TABLE_HH
+#define HB_OT_TAG_TABLE_HH
+
+static const LangTag ot_languages[] = {
+ {"aa", HB_TAG('A','F','R',' ')}, /* Afar */
+ {"aae", HB_TAG('S','Q','I',' ')}, /* Arbëreshë Albanian -> Albanian */
+ {"aao", HB_TAG('A','R','A',' ')}, /* Algerian Saharan Arabic -> Arabic */
+ {"aat", HB_TAG('S','Q','I',' ')}, /* Arvanitika Albanian -> Albanian */
+ {"ab", HB_TAG('A','B','K',' ')}, /* Abkhazian */
+ {"abh", HB_TAG('A','R','A',' ')}, /* Tajiki Arabic -> Arabic */
+ {"abq", HB_TAG('A','B','A',' ')}, /* Abaza */
+ {"abv", HB_TAG('A','R','A',' ')}, /* Baharna Arabic -> Arabic */
+ {"acf", HB_TAG('F','A','N',' ')}, /* Saint Lucian Creole French -> French Antillean */
+/*{"ach", HB_TAG('A','C','H',' ')},*/ /* Acoli -> Acholi */
+ {"acm", HB_TAG('A','R','A',' ')}, /* Mesopotamian Arabic -> Arabic */
+ {"acq", HB_TAG('A','R','A',' ')}, /* Ta'izzi-Adeni Arabic -> Arabic */
+/*{"acr", HB_TAG('A','C','R',' ')},*/ /* Achi */
+ {"acw", HB_TAG('A','R','A',' ')}, /* Hijazi Arabic -> Arabic */
+ {"acx", HB_TAG('A','R','A',' ')}, /* Omani Arabic -> Arabic */
+ {"acy", HB_TAG('A','R','A',' ')}, /* Cypriot Arabic -> Arabic */
+ {"ada", HB_TAG('D','N','G',' ')}, /* Adangme -> Dangme */
+ {"adf", HB_TAG('A','R','A',' ')}, /* Dhofari Arabic -> Arabic */
+ {"adp", HB_TAG('D','Z','N',' ')}, /* Adap (retired code) -> Dzongkha */
+/*{"ady", HB_TAG('A','D','Y',' ')},*/ /* Adyghe */
+ {"aeb", HB_TAG('A','R','A',' ')}, /* Tunisian Arabic -> Arabic */
+ {"aec", HB_TAG('A','R','A',' ')}, /* Saidi Arabic -> Arabic */
+ {"af", HB_TAG('A','F','K',' ')}, /* Afrikaans */
+ {"afb", HB_TAG('A','R','A',' ')}, /* Gulf Arabic -> Arabic */
+ {"ahg", HB_TAG('A','G','W',' ')}, /* Qimant -> Agaw */
+ {"aht", HB_TAG('A','T','H',' ')}, /* Ahtena -> Athapaskan */
+ {"aii", HB_TAG('S','W','A',' ')}, /* Assyrian Neo-Aramaic -> Swadaya Aramaic */
+ {"aii", HB_TAG('S','Y','R',' ')}, /* Assyrian Neo-Aramaic -> Syriac */
+/*{"aio", HB_TAG('A','I','O',' ')},*/ /* Aiton */
+ {"aiw", HB_TAG('A','R','I',' ')}, /* Aari */
+ {"ajp", HB_TAG('A','R','A',' ')}, /* South Levantine Arabic -> Arabic */
+ {"ak", HB_TAG('A','K','A',' ')}, /* Akan [macrolanguage] */
+ {"ak", HB_TAG('T','W','I',' ')}, /* Akan [macrolanguage] -> Twi */
+ {"aln", HB_TAG('S','Q','I',' ')}, /* Gheg Albanian -> Albanian */
+ {"als", HB_TAG('S','Q','I',' ')}, /* Tosk Albanian -> Albanian */
+/*{"alt", HB_TAG('A','L','T',' ')},*/ /* Southern Altai -> Altai */
+ {"am", HB_TAG('A','M','H',' ')}, /* Amharic */
+ {"amf", HB_TAG('H','B','N',' ')}, /* Hamer-Banna -> Hammer-Banna */
+ {"amw", HB_TAG('S','Y','R',' ')}, /* Western Neo-Aramaic -> Syriac */
+ {"an", HB_TAG('A','R','G',' ')}, /* Aragonese */
+/*{"ang", HB_TAG('A','N','G',' ')},*/ /* Old English (ca. 450-1100) -> Anglo-Saxon */
+ {"apc", HB_TAG('A','R','A',' ')}, /* North Levantine Arabic -> Arabic */
+ {"apd", HB_TAG('A','R','A',' ')}, /* Sudanese Arabic -> Arabic */
+ {"apj", HB_TAG('A','T','H',' ')}, /* Jicarilla Apache -> Athapaskan */
+ {"apk", HB_TAG('A','T','H',' ')}, /* Kiowa Apache -> Athapaskan */
+ {"apl", HB_TAG('A','T','H',' ')}, /* Lipan Apache -> Athapaskan */
+ {"apm", HB_TAG('A','T','H',' ')}, /* Mescalero-Chiricahua Apache -> Athapaskan */
+ {"apw", HB_TAG('A','T','H',' ')}, /* Western Apache -> Athapaskan */
+ {"ar", HB_TAG('A','R','A',' ')}, /* Arabic [macrolanguage] */
+ {"arb", HB_TAG('A','R','A',' ')}, /* Standard Arabic -> Arabic */
+ {"arn", HB_TAG('M','A','P',' ')}, /* Mapudungun */
+ {"arq", HB_TAG('A','R','A',' ')}, /* Algerian Arabic -> Arabic */
+ {"ars", HB_TAG('A','R','A',' ')}, /* Najdi Arabic -> Arabic */
+ {"ary", HB_TAG('M','O','R',' ')}, /* Moroccan Arabic -> Moroccan */
+ {"arz", HB_TAG('A','R','A',' ')}, /* Egyptian Arabic -> Arabic */
+ {"as", HB_TAG('A','S','M',' ')}, /* Assamese */
+/*{"ast", HB_TAG('A','S','T',' ')},*/ /* Asturian */
+/*{"ath", HB_TAG('A','T','H',' ')},*/ /* Athapascan [family] -> Athapaskan */
+ {"atj", HB_TAG('R','C','R',' ')}, /* Atikamekw -> R-Cree */
+ {"atv", HB_TAG('A','L','T',' ')}, /* Northern Altai -> Altai */
+ {"auz", HB_TAG('A','R','A',' ')}, /* Uzbeki Arabic -> Arabic */
+ {"av", HB_TAG('A','V','R',' ')}, /* Avaric -> Avar */
+ {"avl", HB_TAG('A','R','A',' ')}, /* Eastern Egyptian Bedawi Arabic -> Arabic */
+/*{"awa", HB_TAG('A','W','A',' ')},*/ /* Awadhi */
+ {"ay", HB_TAG('A','Y','M',' ')}, /* Aymara [macrolanguage] */
+ {"ayc", HB_TAG('A','Y','M',' ')}, /* Southern Aymara -> Aymara */
+ {"ayh", HB_TAG('A','R','A',' ')}, /* Hadrami Arabic -> Arabic */
+ {"ayl", HB_TAG('A','R','A',' ')}, /* Libyan Arabic -> Arabic */
+ {"ayn", HB_TAG('A','R','A',' ')}, /* Sanaani Arabic -> Arabic */
+ {"ayp", HB_TAG('A','R','A',' ')}, /* North Mesopotamian Arabic -> Arabic */
+ {"ayr", HB_TAG('A','Y','M',' ')}, /* Central Aymara -> Aymara */
+ {"az", HB_TAG('A','Z','E',' ')}, /* Azerbaijani [macrolanguage] */
+/*{"azb", HB_TAG('A','Z','B',' ')},*/ /* South Azerbaijani -> Torki */
+ {"azj", HB_TAG('A','Z','E',' ')}, /* North Azerbaijani -> Azerbaijani */
+ {"ba", HB_TAG('B','S','H',' ')}, /* Bashkir */
+ {"bad", HB_TAG('B','A','D','0')}, /* Banda [family] */
+ {"bai", HB_TAG('B','M','L',' ')}, /* Bamileke [family] */
+ {"bal", HB_TAG('B','L','I',' ')}, /* Baluchi [macrolanguage] */
+/*{"ban", HB_TAG('B','A','N',' ')},*/ /* Balinese */
+/*{"bar", HB_TAG('B','A','R',' ')},*/ /* Bavarian */
+/*{"bbc", HB_TAG('B','B','C',' ')},*/ /* Batak Toba */
+ {"bbz", HB_TAG('A','R','A',' ')}, /* Babalia Creole Arabic (retired code) -> Arabic */
+ {"bcc", HB_TAG('B','L','I',' ')}, /* Southern Balochi -> Baluchi */
+ {"bci", HB_TAG('B','A','U',' ')}, /* Baoulé -> Baulé */
+ {"bcl", HB_TAG('B','I','K',' ')}, /* Central Bikol -> Bikol */
+ {"bcq", HB_TAG('B','C','H',' ')}, /* Bench */
+ {"bcr", HB_TAG('A','T','H',' ')}, /* Babine -> Athapaskan */
+/*{"bdy", HB_TAG('B','D','Y',' ')},*/ /* Bandjalang */
+ {"be", HB_TAG('B','E','L',' ')}, /* Belarusian -> Belarussian */
+ {"bea", HB_TAG('A','T','H',' ')}, /* Beaver -> Athapaskan */
+ {"beb", HB_TAG('B','T','I',' ')}, /* Bebele -> Beti */
+/*{"bem", HB_TAG('B','E','M',' ')},*/ /* Bemba (Zambia) */
+ {"ber", HB_TAG('B','B','R',' ')}, /* Berber [family] */
+ {"bfq", HB_TAG('B','A','D',' ')}, /* Badaga */
+ {"bft", HB_TAG('B','L','T',' ')}, /* Balti */
+ {"bfu", HB_TAG('L','A','H',' ')}, /* Gahri -> Lahuli */
+ {"bfy", HB_TAG('B','A','G',' ')}, /* Bagheli -> Baghelkhandi */
+ {"bg", HB_TAG('B','G','R',' ')}, /* Bulgarian */
+/*{"bgc", HB_TAG('B','G','C',' ')},*/ /* Haryanvi */
+ {"bgn", HB_TAG('B','L','I',' ')}, /* Western Balochi -> Baluchi */
+ {"bgp", HB_TAG('B','L','I',' ')}, /* Eastern Balochi -> Baluchi */
+/*{"bgq", HB_TAG('B','G','Q',' ')},*/ /* Bagri */
+ {"bgr", HB_TAG('Q','I','N',' ')}, /* Bawm Chin -> Chin */
+ {"bhb", HB_TAG('B','H','I',' ')}, /* Bhili */
+/*{"bhi", HB_TAG('B','H','I',' ')},*/ /* Bhilali -> Bhili */
+ {"bhk", HB_TAG('B','I','K',' ')}, /* Albay Bicolano (retired code) -> Bikol */
+/*{"bho", HB_TAG('B','H','O',' ')},*/ /* Bhojpuri */
+ {"bhr", HB_TAG('M','L','G',' ')}, /* Bara Malagasy -> Malagasy */
+ {"bi", HB_TAG('B','I','S',' ')}, /* Bislama */
+/*{"bik", HB_TAG('B','I','K',' ')},*/ /* Bikol [macrolanguage] */
+ {"bin", HB_TAG('E','D','O',' ')}, /* Edo */
+/*{"bjj", HB_TAG('B','J','J',' ')},*/ /* Kanauji */
+ {"bjn", HB_TAG('M','L','Y',' ')}, /* Banjar -> Malay */
+ {"bjq", HB_TAG('M','L','G',' ')}, /* Southern Betsimisaraka Malagasy (retired code) -> Malagasy */
+ {"bjt", HB_TAG('B','L','N',' ')}, /* Balanta-Ganja -> Balante */
+ {"bla", HB_TAG('B','K','F',' ')}, /* Siksika -> Blackfoot */
+ {"ble", HB_TAG('B','L','N',' ')}, /* Balanta-Kentohe -> Balante */
+/*{"blk", HB_TAG('B','L','K',' ')},*/ /* Pa’o Karen */
+ {"bln", HB_TAG('B','I','K',' ')}, /* Southern Catanduanes Bikol -> Bikol */
+ {"bm", HB_TAG('B','M','B',' ')}, /* Bambara (Bamanankan) */
+ {"bmm", HB_TAG('M','L','G',' ')}, /* Northern Betsimisaraka Malagasy -> Malagasy */
+ {"bn", HB_TAG('B','E','N',' ')}, /* Bengali */
+ {"bo", HB_TAG('T','I','B',' ')}, /* Tibetan */
+/*{"bpy", HB_TAG('B','P','Y',' ')},*/ /* Bishnupriya -> Bishnupriya Manipuri */
+ {"bqi", HB_TAG('L','R','C',' ')}, /* Bakhtiari -> Luri */
+ {"br", HB_TAG('B','R','E',' ')}, /* Breton */
+ {"bra", HB_TAG('B','R','I',' ')}, /* Braj -> Braj Bhasha */
+/*{"brh", HB_TAG('B','R','H',' ')},*/ /* Brahui */
+/*{"brx", HB_TAG('B','R','X',' ')},*/ /* Bodo (India) */
+ {"bs", HB_TAG('B','O','S',' ')}, /* Bosnian */
+/*{"bsk", HB_TAG('B','S','K',' ')},*/ /* Burushaski */
+ {"btb", HB_TAG('B','T','I',' ')}, /* Beti (Cameroon) (retired code) */
+ {"btj", HB_TAG('M','L','Y',' ')}, /* Bacanese Malay -> Malay */
+ {"bto", HB_TAG('B','I','K',' ')}, /* Rinconada Bikol -> Bikol */
+/*{"bts", HB_TAG('B','T','S',' ')},*/ /* Batak Simalungun */
+/*{"bug", HB_TAG('B','U','G',' ')},*/ /* Buginese -> Bugis */
+ {"bum", HB_TAG('B','T','I',' ')}, /* Bulu (Cameroon) -> Beti */
+ {"bve", HB_TAG('M','L','Y',' ')}, /* Berau Malay -> Malay */
+ {"bvu", HB_TAG('M','L','Y',' ')}, /* Bukit Malay -> Malay */
+ {"bxk", HB_TAG('L','U','H',' ')}, /* Bukusu -> Luyia */
+ {"bxp", HB_TAG('B','T','I',' ')}, /* Bebil -> Beti */
+ {"bxr", HB_TAG('R','B','U',' ')}, /* Russia Buriat -> Russian Buriat */
+ {"byn", HB_TAG('B','I','L',' ')}, /* Bilin -> Bilen */
+/*{"byv", HB_TAG('B','Y','V',' ')},*/ /* Medumba */
+ {"bzc", HB_TAG('M','L','G',' ')}, /* Southern Betsimisaraka Malagasy -> Malagasy */
+ {"ca", HB_TAG('C','A','T',' ')}, /* Catalan */
+ {"caf", HB_TAG('C','R','R',' ')}, /* Southern Carrier -> Carrier */
+ {"caf", HB_TAG('A','T','H',' ')}, /* Southern Carrier -> Athapaskan */
+/*{"cak", HB_TAG('C','A','K',' ')},*/ /* Kaqchikel */
+/*{"cbk", HB_TAG('C','B','K',' ')},*/ /* Chavacano -> Zamboanga Chavacano */
+ {"cbl", HB_TAG('Q','I','N',' ')}, /* Bualkhaw Chin -> Chin */
+ {"cco", HB_TAG('C','C','H','N')}, /* Comaltepec Chinantec -> Chinantec */
+ {"ccq", HB_TAG('A','R','K',' ')}, /* Chaungtha (retired code) -> Rakhine */
+ {"cdo", HB_TAG('Z','H','S',' ')}, /* Min Dong Chinese -> Chinese Simplified */
+ {"ce", HB_TAG('C','H','E',' ')}, /* Chechen */
+/*{"ceb", HB_TAG('C','E','B',' ')},*/ /* Cebuano */
+ {"cfm", HB_TAG('H','A','L',' ')}, /* Halam (Falam Chin) */
+/*{"cgg", HB_TAG('C','G','G',' ')},*/ /* Chiga */
+ {"ch", HB_TAG('C','H','A',' ')}, /* Chamorro */
+ {"chj", HB_TAG('C','C','H','N')}, /* Ojitlán Chinantec -> Chinantec */
+ {"chk", HB_TAG('C','H','K','0')}, /* Chuukese */
+/*{"cho", HB_TAG('C','H','O',' ')},*/ /* Choctaw */
+ {"chp", HB_TAG('C','H','P',' ')}, /* Chipewyan */
+ {"chp", HB_TAG('S','A','Y',' ')}, /* Chipewyan -> Sayisi */
+ {"chp", HB_TAG('A','T','H',' ')}, /* Chipewyan -> Athapaskan */
+ {"chq", HB_TAG('C','C','H','N')}, /* Quiotepec Chinantec -> Chinantec */
+/*{"chr", HB_TAG('C','H','R',' ')},*/ /* Cherokee */
+/*{"chy", HB_TAG('C','H','Y',' ')},*/ /* Cheyenne */
+ {"chz", HB_TAG('C','C','H','N')}, /* Ozumacín Chinantec -> Chinantec */
+ {"ciw", HB_TAG('O','J','B',' ')}, /* Chippewa -> Ojibway */
+/*{"cja", HB_TAG('C','J','A',' ')},*/ /* Western Cham */
+/*{"cjm", HB_TAG('C','J','M',' ')},*/ /* Eastern Cham */
+ {"cjy", HB_TAG('Z','H','S',' ')}, /* Jinyu Chinese -> Chinese Simplified */
+ {"cka", HB_TAG('Q','I','N',' ')}, /* Khumi Awa Chin (retired code) -> Chin */
+ {"ckb", HB_TAG('K','U','R',' ')}, /* Central Kurdish -> Kurdish */
+ {"ckt", HB_TAG('C','H','K',' ')}, /* Chukot -> Chukchi */
+ {"clc", HB_TAG('A','T','H',' ')}, /* Chilcotin -> Athapaskan */
+ {"cld", HB_TAG('S','Y','R',' ')}, /* Chaldean Neo-Aramaic -> Syriac */
+ {"cle", HB_TAG('C','C','H','N')}, /* Lealao Chinantec -> Chinantec */
+ {"cmn", HB_TAG('Z','H','S',' ')}, /* Mandarin Chinese -> Chinese Simplified */
+ {"cmr", HB_TAG('Q','I','N',' ')}, /* Mro-Khimi Chin -> Chin */
+ {"cnb", HB_TAG('Q','I','N',' ')}, /* Chinbon Chin -> Chin */
+ {"cnh", HB_TAG('Q','I','N',' ')}, /* Hakha Chin -> Chin */
+ {"cnk", HB_TAG('Q','I','N',' ')}, /* Khumi Chin -> Chin */
+ {"cnl", HB_TAG('C','C','H','N')}, /* Lalana Chinantec -> Chinantec */
+ {"cnp", HB_TAG('Z','H','S',' ')}, /* Northern Ping Chinese -> Chinese Simplified */
+ {"cnt", HB_TAG('C','C','H','N')}, /* Tepetotutla Chinantec -> Chinantec */
+ {"cnw", HB_TAG('Q','I','N',' ')}, /* Ngawn Chin -> Chin */
+ {"co", HB_TAG('C','O','S',' ')}, /* Corsican */
+ {"coa", HB_TAG('M','L','Y',' ')}, /* Cocos Islands Malay -> Malay */
+/*{"cop", HB_TAG('C','O','P',' ')},*/ /* Coptic */
+ {"coq", HB_TAG('A','T','H',' ')}, /* Coquille -> Athapaskan */
+ {"cpa", HB_TAG('C','C','H','N')}, /* Palantla Chinantec -> Chinantec */
+ {"cpe", HB_TAG('C','P','P',' ')}, /* English-based creoles and pidgins [family] -> Creoles */
+ {"cpf", HB_TAG('C','P','P',' ')}, /* French-based creoles and pidgins [family] -> Creoles */
+/*{"cpp", HB_TAG('C','P','P',' ')},*/ /* Portuguese-based creoles and pidgins [family] -> Creoles */
+ {"cpx", HB_TAG('Z','H','S',' ')}, /* Pu-Xian Chinese -> Chinese Simplified */
+ {"cqd", HB_TAG('H','M','N',' ')}, /* Chuanqiandian Cluster Miao -> Hmong */
+ {"cqu", HB_TAG('Q','U','H',' ')}, /* Chilean Quechua (retired code) -> Quechua (Bolivia) */
+ {"cr", HB_TAG('C','R','E',' ')}, /* Cree [macrolanguage] */
+ {"cr", HB_TAG('Y','C','R',' ')}, /* Cree [macrolanguage] -> Y-Cree */
+ {"crh", HB_TAG('C','R','T',' ')}, /* Crimean Tatar */
+ {"crj", HB_TAG('E','C','R',' ')}, /* Southern East Cree -> Eastern Cree */
+ {"crk", HB_TAG('W','C','R',' ')}, /* Plains Cree -> West-Cree */
+ {"crl", HB_TAG('E','C','R',' ')}, /* Northern East Cree -> Eastern Cree */
+ {"crm", HB_TAG('M','C','R',' ')}, /* Moose Cree */
+ {"crm", HB_TAG('L','C','R',' ')}, /* Moose Cree -> L-Cree */
+ {"crp", HB_TAG('C','P','P',' ')}, /* Creoles and pidgins [family] -> Creoles */
+ {"crx", HB_TAG('C','R','R',' ')}, /* Carrier */
+ {"crx", HB_TAG('A','T','H',' ')}, /* Carrier -> Athapaskan */
+ {"cs", HB_TAG('C','S','Y',' ')}, /* Czech */
+ {"csa", HB_TAG('C','C','H','N')}, /* Chiltepec Chinantec -> Chinantec */
+/*{"csb", HB_TAG('C','S','B',' ')},*/ /* Kashubian */
+ {"csh", HB_TAG('Q','I','N',' ')}, /* Asho Chin -> Chin */
+ {"cso", HB_TAG('C','C','H','N')}, /* Sochiapam Chinantec -> Chinantec */
+ {"csp", HB_TAG('Z','H','S',' ')}, /* Southern Ping Chinese -> Chinese Simplified */
+ {"csw", HB_TAG('N','C','R',' ')}, /* Swampy Cree -> N-Cree */
+ {"csw", HB_TAG('N','H','C',' ')}, /* Swampy Cree -> Norway House Cree */
+ {"csy", HB_TAG('Q','I','N',' ')}, /* Siyin Chin -> Chin */
+ {"ctc", HB_TAG('A','T','H',' ')}, /* Chetco -> Athapaskan */
+ {"ctd", HB_TAG('Q','I','N',' ')}, /* Tedim Chin -> Chin */
+ {"cte", HB_TAG('C','C','H','N')}, /* Tepinapa Chinantec -> Chinantec */
+/*{"ctg", HB_TAG('C','T','G',' ')},*/ /* Chittagonian */
+ {"ctl", HB_TAG('C','C','H','N')}, /* Tlacoatzintepec Chinantec -> Chinantec */
+ {"cts", HB_TAG('B','I','K',' ')}, /* Northern Catanduanes Bikol -> Bikol */
+ {"cu", HB_TAG('C','S','L',' ')}, /* Church Slavonic */
+ {"cuc", HB_TAG('C','C','H','N')}, /* Usila Chinantec -> Chinantec */
+/*{"cuk", HB_TAG('C','U','K',' ')},*/ /* San Blas Kuna */
+ {"cv", HB_TAG('C','H','U',' ')}, /* Chuvash */
+ {"cvn", HB_TAG('C','C','H','N')}, /* Valle Nacional Chinantec -> Chinantec */
+ {"cwd", HB_TAG('D','C','R',' ')}, /* Woods Cree */
+ {"cwd", HB_TAG('T','C','R',' ')}, /* Woods Cree -> TH-Cree */
+ {"cy", HB_TAG('W','E','L',' ')}, /* Welsh */
+ {"czh", HB_TAG('Z','H','S',' ')}, /* Huizhou Chinese -> Chinese Simplified */
+ {"czo", HB_TAG('Z','H','S',' ')}, /* Min Zhong Chinese -> Chinese Simplified */
+ {"czt", HB_TAG('Q','I','N',' ')}, /* Zotung Chin -> Chin */
+ {"da", HB_TAG('D','A','N',' ')}, /* Danish */
+ {"dao", HB_TAG('Q','I','N',' ')}, /* Daai Chin -> Chin */
+ {"dap", HB_TAG('N','I','S',' ')}, /* Nisi (India) (retired code) */
+/*{"dar", HB_TAG('D','A','R',' ')},*/ /* Dargwa */
+/*{"dax", HB_TAG('D','A','X',' ')},*/ /* Dayi */
+ {"de", HB_TAG('D','E','U',' ')}, /* German */
+ {"den", HB_TAG('S','L','A',' ')}, /* Slave (Athapascan) [macrolanguage] -> Slavey */
+ {"den", HB_TAG('A','T','H',' ')}, /* Slave (Athapascan) [macrolanguage] -> Athapaskan */
+/*{"dgo", HB_TAG('D','G','O',' ')},*/ /* Dogri */
+ {"dgr", HB_TAG('A','T','H',' ')}, /* Dogrib -> Athapaskan */
+ {"dhd", HB_TAG('M','A','W',' ')}, /* Dhundari -> Marwari */
+/*{"dhg", HB_TAG('D','H','G',' ')},*/ /* Dhangu */
+ {"dib", HB_TAG('D','N','K',' ')}, /* South Central Dinka -> Dinka */
+ {"dik", HB_TAG('D','N','K',' ')}, /* Southwestern Dinka -> Dinka */
+ {"din", HB_TAG('D','N','K',' ')}, /* Dinka [macrolanguage] */
+ {"dip", HB_TAG('D','N','K',' ')}, /* Northeastern Dinka -> Dinka */
+/*{"diq", HB_TAG('D','I','Q',' ')},*/ /* Dimli */
+ {"diw", HB_TAG('D','N','K',' ')}, /* Northwestern Dinka -> Dinka */
+ {"dje", HB_TAG('D','J','R',' ')}, /* Zarma */
+ {"djr", HB_TAG('D','J','R','0')}, /* Djambarrpuyngu */
+ {"dks", HB_TAG('D','N','K',' ')}, /* Southeastern Dinka -> Dinka */
+ {"dng", HB_TAG('D','U','N',' ')}, /* Dungan */
+/*{"dnj", HB_TAG('D','N','J',' ')},*/ /* Dan */
+ {"doi", HB_TAG('D','G','R',' ')}, /* Dogri [macrolanguage] */
+ {"drh", HB_TAG('M','N','G',' ')}, /* Darkhat (retired code) -> Mongolian */
+ {"drw", HB_TAG('D','R','I',' ')}, /* Darwazi (retired code) -> Dari */
+ {"dsb", HB_TAG('L','S','B',' ')}, /* Lower Sorbian */
+ {"dty", HB_TAG('N','E','P',' ')}, /* Dotyali -> Nepali */
+/*{"duj", HB_TAG('D','U','J',' ')},*/ /* Dhuwal (retired code) */
+ {"dup", HB_TAG('M','L','Y',' ')}, /* Duano -> Malay */
+ {"dv", HB_TAG('D','I','V',' ')}, /* Divehi (Dhivehi, Maldivian) */
+ {"dv", HB_TAG('D','H','V',' ')}, /* Divehi (Dhivehi, Maldivian) (deprecated) */
+ {"dwk", HB_TAG('K','U','I',' ')}, /* Dawik Kui -> Kui */
+ {"dwu", HB_TAG('D','U','J',' ')}, /* Dhuwal */
+ {"dwy", HB_TAG('D','U','J',' ')}, /* Dhuwaya -> Dhuwal */
+ {"dyu", HB_TAG('J','U','L',' ')}, /* Dyula -> Jula */
+ {"dz", HB_TAG('D','Z','N',' ')}, /* Dzongkha */
+ {"ee", HB_TAG('E','W','E',' ')}, /* Ewe */
+/*{"efi", HB_TAG('E','F','I',' ')},*/ /* Efik */
+ {"ekk", HB_TAG('E','T','I',' ')}, /* Standard Estonian -> Estonian */
+ {"el", HB_TAG('E','L','L',' ')}, /* Modern Greek (1453-) -> Greek */
+ {"emk", HB_TAG('E','M','K',' ')}, /* Eastern Maninkakan */
+ {"emk", HB_TAG('M','N','K',' ')}, /* Eastern Maninkakan -> Maninka */
+ {"en", HB_TAG('E','N','G',' ')}, /* English */
+ {"enb", HB_TAG('K','A','L',' ')}, /* Markweeta -> Kalenjin */
+ {"enf", HB_TAG('F','N','E',' ')}, /* Forest Enets -> Forest Nenets */
+ {"enh", HB_TAG('T','N','E',' ')}, /* Tundra Enets -> Tundra Nenets */
+ {"eo", HB_TAG('N','T','O',' ')}, /* Esperanto */
+ {"es", HB_TAG('E','S','P',' ')}, /* Spanish */
+ {"esg", HB_TAG('G','O','N',' ')}, /* Aheri Gondi -> Gondi */
+ {"esi", HB_TAG('I','P','K',' ')}, /* North Alaskan Inupiatun -> Inupiat */
+ {"esk", HB_TAG('I','P','K',' ')}, /* Northwest Alaska Inupiatun -> Inupiat */
+/*{"esu", HB_TAG('E','S','U',' ')},*/ /* Central Yupik */
+ {"et", HB_TAG('E','T','I',' ')}, /* Estonian [macrolanguage] */
+ {"eto", HB_TAG('B','T','I',' ')}, /* Eton (Cameroon) -> Beti */
+ {"eu", HB_TAG('E','U','Q',' ')}, /* Basque */
+ {"eve", HB_TAG('E','V','N',' ')}, /* Even */
+ {"evn", HB_TAG('E','V','K',' ')}, /* Evenki */
+ {"ewo", HB_TAG('B','T','I',' ')}, /* Ewondo -> Beti */
+ {"eyo", HB_TAG('K','A','L',' ')}, /* Keiyo -> Kalenjin */
+ {"fa", HB_TAG('F','A','R',' ')}, /* Persian [macrolanguage] */
+ {"fan", HB_TAG('F','A','N','0')}, /* Fang (Equatorial Guinea) */
+/*{"fat", HB_TAG('F','A','T',' ')},*/ /* Fanti */
+ {"fbl", HB_TAG('B','I','K',' ')}, /* West Albay Bikol -> Bikol */
+ {"ff", HB_TAG('F','U','L',' ')}, /* Fulah [macrolanguage] */
+ {"ffm", HB_TAG('F','U','L',' ')}, /* Maasina Fulfulde -> Fulah */
+ {"fi", HB_TAG('F','I','N',' ')}, /* Finnish */
+ {"fil", HB_TAG('P','I','L',' ')}, /* Filipino */
+ {"fj", HB_TAG('F','J','I',' ')}, /* Fijian */
+ {"flm", HB_TAG('H','A','L',' ')}, /* Halam (Falam Chin) (retired code) */
+ {"flm", HB_TAG('Q','I','N',' ')}, /* Falam Chin (retired code) -> Chin */
+/*{"fmp", HB_TAG('F','M','P',' ')},*/ /* Fe’fe’ */
+ {"fo", HB_TAG('F','O','S',' ')}, /* Faroese */
+/*{"fon", HB_TAG('F','O','N',' ')},*/ /* Fon */
+ {"fr", HB_TAG('F','R','A',' ')}, /* French */
+/*{"frc", HB_TAG('F','R','C',' ')},*/ /* Cajun French */
+/*{"frp", HB_TAG('F','R','P',' ')},*/ /* Arpitan */
+ {"fub", HB_TAG('F','U','L',' ')}, /* Adamawa Fulfulde -> Fulah */
+ {"fuc", HB_TAG('F','U','L',' ')}, /* Pulaar -> Fulah */
+ {"fue", HB_TAG('F','U','L',' ')}, /* Borgu Fulfulde -> Fulah */
+ {"fuf", HB_TAG('F','T','A',' ')}, /* Pular -> Futa */
+ {"fuh", HB_TAG('F','U','L',' ')}, /* Western Niger Fulfulde -> Fulah */
+ {"fui", HB_TAG('F','U','L',' ')}, /* Bagirmi Fulfulde -> Fulah */
+ {"fuq", HB_TAG('F','U','L',' ')}, /* Central-Eastern Niger Fulfulde -> Fulah */
+ {"fur", HB_TAG('F','R','L',' ')}, /* Friulian */
+/*{"fuv", HB_TAG('F','U','V',' ')},*/ /* Nigerian Fulfulde */
+ {"fy", HB_TAG('F','R','I',' ')}, /* Western Frisian -> Frisian */
+ {"ga", HB_TAG('I','R','I',' ')}, /* Irish */
+ {"gaa", HB_TAG('G','A','D',' ')}, /* Ga */
+/*{"gag", HB_TAG('G','A','G',' ')},*/ /* Gagauz */
+ {"gan", HB_TAG('Z','H','S',' ')}, /* Gan Chinese -> Chinese Simplified */
+ {"gax", HB_TAG('O','R','O',' ')}, /* Borana-Arsi-Guji Oromo -> Oromo */
+ {"gaz", HB_TAG('O','R','O',' ')}, /* West Central Oromo -> Oromo */
+ {"gbm", HB_TAG('G','A','W',' ')}, /* Garhwali */
+ {"gce", HB_TAG('A','T','H',' ')}, /* Galice -> Athapaskan */
+ {"gd", HB_TAG('G','A','E',' ')}, /* Scottish Gaelic (Gaelic) */
+ {"gda", HB_TAG('R','A','J',' ')}, /* Gade Lohar -> Rajasthani */
+/*{"gez", HB_TAG('G','E','Z',' ')},*/ /* Geez */
+ {"ggo", HB_TAG('G','O','N',' ')}, /* Southern Gondi (retired code) -> Gondi */
+/*{"gih", HB_TAG('G','I','H',' ')},*/ /* Githabul */
+ {"gil", HB_TAG('G','I','L','0')}, /* Kiribati (Gilbertese) */
+ {"gju", HB_TAG('R','A','J',' ')}, /* Gujari -> Rajasthani */
+/*{"gkp", HB_TAG('G','K','P',' ')},*/ /* Guinea Kpelle -> Kpelle (Guinea) */
+ {"gl", HB_TAG('G','A','L',' ')}, /* Galician */
+ {"gld", HB_TAG('N','A','N',' ')}, /* Nanai */
+/*{"glk", HB_TAG('G','L','K',' ')},*/ /* Gilaki */
+ {"gn", HB_TAG('G','U','A',' ')}, /* Guarani [macrolanguage] */
+/*{"gnn", HB_TAG('G','N','N',' ')},*/ /* Gumatj */
+ {"gno", HB_TAG('G','O','N',' ')}, /* Northern Gondi -> Gondi */
+ {"gnw", HB_TAG('G','U','A',' ')}, /* Western Bolivian Guaraní -> Guarani */
+/*{"gog", HB_TAG('G','O','G',' ')},*/ /* Gogo */
+ {"gom", HB_TAG('K','O','K',' ')}, /* Goan Konkani -> Konkani */
+/*{"gon", HB_TAG('G','O','N',' ')},*/ /* Gondi [macrolanguage] */
+ {"grt", HB_TAG('G','R','O',' ')}, /* Garo */
+ {"gru", HB_TAG('S','O','G',' ')}, /* Kistane -> Sodo Gurage */
+ {"gsw", HB_TAG('A','L','S',' ')}, /* Alsatian */
+ {"gu", HB_TAG('G','U','J',' ')}, /* Gujarati */
+/*{"guc", HB_TAG('G','U','C',' ')},*/ /* Wayuu */
+/*{"guf", HB_TAG('G','U','F',' ')},*/ /* Gupapuyngu */
+ {"gug", HB_TAG('G','U','A',' ')}, /* Paraguayan Guaraní -> Guarani */
+ {"gui", HB_TAG('G','U','A',' ')}, /* Eastern Bolivian Guaraní -> Guarani */
+ {"guk", HB_TAG('G','M','Z',' ')}, /* Gumuz */
+ {"guk", HB_TAG('G','U','K',' ')}, /* Gumuz (SIL fonts) */
+ {"gun", HB_TAG('G','U','A',' ')}, /* Mbyá Guaraní -> Guarani */
+/*{"guz", HB_TAG('G','U','Z',' ')},*/ /* Gusii */
+ {"gv", HB_TAG('M','N','X',' ')}, /* Manx */
+ {"gwi", HB_TAG('A','T','H',' ')}, /* Gwichʼin -> Athapaskan */
+ {"ha", HB_TAG('H','A','U',' ')}, /* Hausa */
+ {"haa", HB_TAG('A','T','H',' ')}, /* Han -> Athapaskan */
+ {"hae", HB_TAG('O','R','O',' ')}, /* Eastern Oromo -> Oromo */
+ {"hak", HB_TAG('Z','H','S',' ')}, /* Hakka Chinese -> Chinese Simplified */
+ {"har", HB_TAG('H','R','I',' ')}, /* Harari */
+/*{"haw", HB_TAG('H','A','W',' ')},*/ /* Hawaiian */
+/*{"hay", HB_TAG('H','A','Y',' ')},*/ /* Haya */
+/*{"haz", HB_TAG('H','A','Z',' ')},*/ /* Hazaragi */
+ {"he", HB_TAG('I','W','R',' ')}, /* Hebrew */
+ {"hea", HB_TAG('H','M','N',' ')}, /* Northern Qiandong Miao -> Hmong */
+ {"hi", HB_TAG('H','I','N',' ')}, /* Hindi */
+/*{"hil", HB_TAG('H','I','L',' ')},*/ /* Hiligaynon */
+ {"hji", HB_TAG('M','L','Y',' ')}, /* Haji -> Malay */
+ {"hlt", HB_TAG('Q','I','N',' ')}, /* Matu Chin -> Chin */
+ {"hma", HB_TAG('H','M','N',' ')}, /* Southern Mashan Hmong -> Hmong */
+ {"hmc", HB_TAG('H','M','N',' ')}, /* Central Huishui Hmong -> Hmong */
+ {"hmd", HB_TAG('H','M','N',' ')}, /* Large Flowery Miao -> Hmong */
+ {"hme", HB_TAG('H','M','N',' ')}, /* Eastern Huishui Hmong -> Hmong */
+ {"hmg", HB_TAG('H','M','N',' ')}, /* Southwestern Guiyang Hmong -> Hmong */
+ {"hmh", HB_TAG('H','M','N',' ')}, /* Southwestern Huishui Hmong -> Hmong */
+ {"hmi", HB_TAG('H','M','N',' ')}, /* Northern Huishui Hmong -> Hmong */
+ {"hmj", HB_TAG('H','M','N',' ')}, /* Ge -> Hmong */
+ {"hml", HB_TAG('H','M','N',' ')}, /* Luopohe Hmong -> Hmong */
+ {"hmm", HB_TAG('H','M','N',' ')}, /* Central Mashan Hmong -> Hmong */
+/*{"hmn", HB_TAG('H','M','N',' ')},*/ /* Hmong [macrolanguage] */
+ {"hmp", HB_TAG('H','M','N',' ')}, /* Northern Mashan Hmong -> Hmong */
+ {"hmq", HB_TAG('H','M','N',' ')}, /* Eastern Qiandong Miao -> Hmong */
+ {"hms", HB_TAG('H','M','N',' ')}, /* Southern Qiandong Miao -> Hmong */
+ {"hmw", HB_TAG('H','M','N',' ')}, /* Western Mashan Hmong -> Hmong */
+ {"hmy", HB_TAG('H','M','N',' ')}, /* Southern Guiyang Hmong -> Hmong */
+ {"hmz", HB_TAG('H','M','N',' ')}, /* Hmong Shua -> Hmong */
+/*{"hnd", HB_TAG('H','N','D',' ')},*/ /* Southern Hindko -> Hindko */
+ {"hne", HB_TAG('C','H','H',' ')}, /* Chhattisgarhi -> Chattisgarhi */
+ {"hnj", HB_TAG('H','M','N',' ')}, /* Hmong Njua -> Hmong */
+ {"hno", HB_TAG('H','N','D',' ')}, /* Northern Hindko -> Hindko */
+ {"ho", HB_TAG('H','M','O',' ')}, /* Hiri Motu */
+ {"hoc", HB_TAG('H','O',' ',' ')}, /* Ho */
+ {"hoi", HB_TAG('A','T','H',' ')}, /* Holikachuk -> Athapaskan */
+ {"hoj", HB_TAG('H','A','R',' ')}, /* Hadothi -> Harauti */
+ {"hr", HB_TAG('H','R','V',' ')}, /* Croatian */
+ {"hrm", HB_TAG('H','M','N',' ')}, /* Horned Miao -> Hmong */
+ {"hsb", HB_TAG('U','S','B',' ')}, /* Upper Sorbian */
+ {"hsn", HB_TAG('Z','H','S',' ')}, /* Xiang Chinese -> Chinese Simplified */
+ {"ht", HB_TAG('H','A','I',' ')}, /* Haitian (Haitian Creole) */
+ {"hu", HB_TAG('H','U','N',' ')}, /* Hungarian */
+ {"huj", HB_TAG('H','M','N',' ')}, /* Northern Guiyang Hmong -> Hmong */
+ {"hup", HB_TAG('A','T','H',' ')}, /* Hupa -> Athapaskan */
+ {"hy", HB_TAG('H','Y','E','0')}, /* Armenian -> Armenian East */
+ {"hy", HB_TAG('H','Y','E',' ')}, /* Armenian */
+ {"hyw", HB_TAG('H','Y','E',' ')}, /* Western Armenian -> Armenian */
+ {"hz", HB_TAG('H','E','R',' ')}, /* Herero */
+ {"ia", HB_TAG('I','N','A',' ')}, /* Interlingua (International Auxiliary Language Association) */
+/*{"iba", HB_TAG('I','B','A',' ')},*/ /* Iban */
+/*{"ibb", HB_TAG('I','B','B',' ')},*/ /* Ibibio */
+ {"id", HB_TAG('I','N','D',' ')}, /* Indonesian */
+ {"ida", HB_TAG('L','U','H',' ')}, /* Idakho-Isukha-Tiriki -> Luyia */
+ {"ie", HB_TAG('I','L','E',' ')}, /* Interlingue */
+ {"ig", HB_TAG('I','B','O',' ')}, /* Igbo */
+ {"igb", HB_TAG('E','B','I',' ')}, /* Ebira */
+ {"ii", HB_TAG('Y','I','M',' ')}, /* Sichuan Yi -> Yi Modern */
+ {"ijc", HB_TAG('I','J','O',' ')}, /* Izon -> Ijo */
+/*{"ijo", HB_TAG('I','J','O',' ')},*/ /* Ijo [family] */
+ {"ik", HB_TAG('I','P','K',' ')}, /* Inupiaq [macrolanguage] -> Inupiat */
+ {"ike", HB_TAG('I','N','U',' ')}, /* Eastern Canadian Inuktitut -> Inuktitut */
+ {"ikt", HB_TAG('I','N','U',' ')}, /* Inuinnaqtun -> Inuktitut */
+/*{"ilo", HB_TAG('I','L','O',' ')},*/ /* Iloko -> Ilokano */
+ {"in", HB_TAG('I','N','D',' ')}, /* Indonesian (retired code) */
+ {"ing", HB_TAG('A','T','H',' ')}, /* Degexit'an -> Athapaskan */
+ {"inh", HB_TAG('I','N','G',' ')}, /* Ingush */
+ {"io", HB_TAG('I','D','O',' ')}, /* Ido */
+ {"is", HB_TAG('I','S','L',' ')}, /* Icelandic */
+ {"it", HB_TAG('I','T','A',' ')}, /* Italian */
+ {"iu", HB_TAG('I','N','U',' ')}, /* Inuktitut [macrolanguage] */
+ {"iw", HB_TAG('I','W','R',' ')}, /* Hebrew (retired code) */
+ {"ja", HB_TAG('J','A','N',' ')}, /* Japanese */
+ {"jak", HB_TAG('M','L','Y',' ')}, /* Jakun -> Malay */
+/*{"jam", HB_TAG('J','A','M',' ')},*/ /* Jamaican Creole English -> Jamaican Creole */
+ {"jax", HB_TAG('M','L','Y',' ')}, /* Jambi Malay -> Malay */
+/*{"jbo", HB_TAG('J','B','O',' ')},*/ /* Lojban */
+/*{"jct", HB_TAG('J','C','T',' ')},*/ /* Krymchak */
+ {"ji", HB_TAG('J','I','I',' ')}, /* Yiddish (retired code) */
+ {"jv", HB_TAG('J','A','V',' ')}, /* Javanese */
+ {"jw", HB_TAG('J','A','V',' ')}, /* Javanese (retired code) */
+ {"ka", HB_TAG('K','A','T',' ')}, /* Georgian */
+ {"kaa", HB_TAG('K','R','K',' ')}, /* Karakalpak */
+ {"kab", HB_TAG('K','A','B','0')}, /* Kabyle */
+ {"kam", HB_TAG('K','M','B',' ')}, /* Kamba (Kenya) */
+ {"kar", HB_TAG('K','R','N',' ')}, /* Karen [family] */
+ {"kbd", HB_TAG('K','A','B',' ')}, /* Kabardian */
+ {"kby", HB_TAG('K','N','R',' ')}, /* Manga Kanuri -> Kanuri */
+ {"kca", HB_TAG('K','H','K',' ')}, /* Khanty -> Khanty-Kazim */
+ {"kca", HB_TAG('K','H','S',' ')}, /* Khanty -> Khanty-Shurishkar */
+ {"kca", HB_TAG('K','H','V',' ')}, /* Khanty -> Khanty-Vakhi */
+/*{"kde", HB_TAG('K','D','E',' ')},*/ /* Makonde */
+ {"kdr", HB_TAG('K','R','M',' ')}, /* Karaim */
+ {"kdt", HB_TAG('K','U','Y',' ')}, /* Kuy */
+/*{"kea", HB_TAG('K','E','A',' ')},*/ /* Kabuverdianu (Crioulo) */
+/*{"kek", HB_TAG('K','E','K',' ')},*/ /* Kekchi */
+ {"kex", HB_TAG('K','K','N',' ')}, /* Kukna -> Kokni */
+ {"kfa", HB_TAG('K','O','D',' ')}, /* Kodava -> Kodagu */
+ {"kfr", HB_TAG('K','A','C',' ')}, /* Kachhi -> Kachchi */
+ {"kfx", HB_TAG('K','U','L',' ')}, /* Kullu Pahari -> Kulvi */
+ {"kfy", HB_TAG('K','M','N',' ')}, /* Kumaoni */
+ {"kg", HB_TAG('K','O','N','0')}, /* Kongo [macrolanguage] */
+ {"kha", HB_TAG('K','S','I',' ')}, /* Khasi */
+ {"khb", HB_TAG('X','B','D',' ')}, /* Lü */
+ {"khk", HB_TAG('M','N','G',' ')}, /* Halh Mongolian -> Mongolian */
+ {"kht", HB_TAG('K','H','N',' ')}, /* Khamti -> Khamti Shan (Microsoft fonts) */
+ {"kht", HB_TAG('K','H','T',' ')}, /* Khamti -> Khamti Shan (OpenType spec and SIL fonts) */
+/*{"khw", HB_TAG('K','H','W',' ')},*/ /* Khowar */
+ {"ki", HB_TAG('K','I','K',' ')}, /* Kikuyu (Gikuyu) */
+/*{"kiu", HB_TAG('K','I','U',' ')},*/ /* Kirmanjki */
+ {"kj", HB_TAG('K','U','A',' ')}, /* Kuanyama */
+/*{"kjd", HB_TAG('K','J','D',' ')},*/ /* Southern Kiwai */
+ {"kjh", HB_TAG('K','H','A',' ')}, /* Khakas -> Khakass */
+/*{"kjp", HB_TAG('K','J','P',' ')},*/ /* Pwo Eastern Karen -> Eastern Pwo Karen */
+/*{"kjz", HB_TAG('K','J','Z',' ')},*/ /* Bumthangkha */
+ {"kk", HB_TAG('K','A','Z',' ')}, /* Kazakh */
+ {"kkz", HB_TAG('A','T','H',' ')}, /* Kaska -> Athapaskan */
+ {"kl", HB_TAG('G','R','N',' ')}, /* Greenlandic */
+ {"kln", HB_TAG('K','A','L',' ')}, /* Kalenjin [macrolanguage] */
+ {"km", HB_TAG('K','H','M',' ')}, /* Khmer */
+ {"kmb", HB_TAG('M','B','N',' ')}, /* Kimbundu -> Mbundu */
+ {"kmr", HB_TAG('K','U','R',' ')}, /* Northern Kurdish -> Kurdish */
+ {"kmw", HB_TAG('K','M','O',' ')}, /* Komo (Democratic Republic of Congo) */
+/*{"kmz", HB_TAG('K','M','Z',' ')},*/ /* Khorasani Turkish -> Khorasani Turkic */
+ {"kn", HB_TAG('K','A','N',' ')}, /* Kannada */
+ {"knc", HB_TAG('K','N','R',' ')}, /* Central Kanuri -> Kanuri */
+ {"kng", HB_TAG('K','O','N','0')}, /* Koongo -> Kongo */
+ {"knn", HB_TAG('K','O','K',' ')}, /* Konkani */
+ {"ko", HB_TAG('K','O','R',' ')}, /* Korean */
+ {"koi", HB_TAG('K','O','P',' ')}, /* Komi-Permyak */
+/*{"kok", HB_TAG('K','O','K',' ')},*/ /* Konkani [macrolanguage] */
+/*{"kos", HB_TAG('K','O','S',' ')},*/ /* Kosraean */
+ {"koy", HB_TAG('A','T','H',' ')}, /* Koyukon -> Athapaskan */
+ {"kpe", HB_TAG('K','P','L',' ')}, /* Kpelle [macrolanguage] */
+ {"kpv", HB_TAG('K','O','Z',' ')}, /* Komi-Zyrian */
+ {"kpy", HB_TAG('K','Y','K',' ')}, /* Koryak */
+ {"kqs", HB_TAG('K','I','S',' ')}, /* Northern Kissi -> Kisii */
+ {"kqy", HB_TAG('K','R','T',' ')}, /* Koorete */
+ {"kr", HB_TAG('K','N','R',' ')}, /* Kanuri [macrolanguage] */
+ {"krc", HB_TAG('K','A','R',' ')}, /* Karachay-Balkar -> Karachay */
+ {"krc", HB_TAG('B','A','L',' ')}, /* Karachay-Balkar -> Balkar */
+/*{"kri", HB_TAG('K','R','I',' ')},*/ /* Krio */
+/*{"krl", HB_TAG('K','R','L',' ')},*/ /* Karelian */
+ {"krt", HB_TAG('K','N','R',' ')}, /* Tumari Kanuri -> Kanuri */
+ {"kru", HB_TAG('K','U','U',' ')}, /* Kurukh */
+ {"ks", HB_TAG('K','S','H',' ')}, /* Kashmiri */
+ {"ksh", HB_TAG('K','S','H','0')}, /* Kölsch -> Ripuarian */
+ {"kss", HB_TAG('K','I','S',' ')}, /* Southern Kisi -> Kisii */
+/*{"ksw", HB_TAG('K','S','W',' ')},*/ /* S’gaw Karen */
+ {"ktb", HB_TAG('K','E','B',' ')}, /* Kambaata -> Kebena */
+ {"ktu", HB_TAG('K','O','N',' ')}, /* Kituba (Democratic Republic of Congo) -> Kikongo */
+ {"ktw", HB_TAG('A','T','H',' ')}, /* Kato -> Athapaskan */
+ {"ku", HB_TAG('K','U','R',' ')}, /* Kurdish [macrolanguage] */
+/*{"kum", HB_TAG('K','U','M',' ')},*/ /* Kumyk */
+ {"kuu", HB_TAG('A','T','H',' ')}, /* Upper Kuskokwim -> Athapaskan */
+ {"kv", HB_TAG('K','O','M',' ')}, /* Komi [macrolanguage] */
+ {"kvb", HB_TAG('M','L','Y',' ')}, /* Kubu -> Malay */
+ {"kvr", HB_TAG('M','L','Y',' ')}, /* Kerinci -> Malay */
+ {"kw", HB_TAG('C','O','R',' ')}, /* Cornish */
+ {"kwy", HB_TAG('K','O','N','0')}, /* San Salvador Kongo -> Kongo */
+ {"kxc", HB_TAG('K','M','S',' ')}, /* Konso -> Komso */
+ {"kxd", HB_TAG('M','L','Y',' ')}, /* Brunei -> Malay */
+ {"kxl", HB_TAG('K','U','U',' ')}, /* Nepali Kurux (retired code) -> Kurukh */
+ {"kxu", HB_TAG('K','U','I',' ')}, /* Kui (India) (retired code) */
+ {"ky", HB_TAG('K','I','R',' ')}, /* Kirghiz (Kyrgyz) */
+/*{"kyu", HB_TAG('K','Y','U',' ')},*/ /* Western Kayah */
+ {"la", HB_TAG('L','A','T',' ')}, /* Latin */
+ {"lad", HB_TAG('J','U','D',' ')}, /* Ladino */
+ {"lb", HB_TAG('L','T','Z',' ')}, /* Luxembourgish */
+ {"lbe", HB_TAG('L','A','K',' ')}, /* Lak */
+ {"lbj", HB_TAG('L','D','K',' ')}, /* Ladakhi */
+ {"lbl", HB_TAG('B','I','K',' ')}, /* Libon Bikol -> Bikol */
+ {"lce", HB_TAG('M','L','Y',' ')}, /* Loncong -> Malay */
+ {"lcf", HB_TAG('M','L','Y',' ')}, /* Lubu -> Malay */
+ {"ldi", HB_TAG('K','O','N','0')}, /* Laari -> Kongo */
+/*{"lez", HB_TAG('L','E','Z',' ')},*/ /* Lezghian -> Lezgi */
+ {"lg", HB_TAG('L','U','G',' ')}, /* Ganda */
+ {"li", HB_TAG('L','I','M',' ')}, /* Limburgish */
+ {"lif", HB_TAG('L','M','B',' ')}, /* Limbu */
+/*{"lij", HB_TAG('L','I','J',' ')},*/ /* Ligurian */
+/*{"lis", HB_TAG('L','I','S',' ')},*/ /* Lisu */
+ {"liw", HB_TAG('M','L','Y',' ')}, /* Col -> Malay */
+/*{"ljp", HB_TAG('L','J','P',' ')},*/ /* Lampung Api -> Lampung */
+ {"lkb", HB_TAG('L','U','H',' ')}, /* Kabras -> Luyia */
+/*{"lki", HB_TAG('L','K','I',' ')},*/ /* Laki */
+ {"lko", HB_TAG('L','U','H',' ')}, /* Khayo -> Luyia */
+ {"lks", HB_TAG('L','U','H',' ')}, /* Kisa -> Luyia */
+ {"lld", HB_TAG('L','A','D',' ')}, /* Ladin */
+ {"lmn", HB_TAG('L','A','M',' ')}, /* Lambadi -> Lambani */
+/*{"lmo", HB_TAG('L','M','O',' ')},*/ /* Lombard */
+ {"ln", HB_TAG('L','I','N',' ')}, /* Lingala */
+ {"lo", HB_TAG('L','A','O',' ')}, /* Lao */
+/*{"lom", HB_TAG('L','O','M',' ')},*/ /* Loma (Liberia) */
+/*{"lrc", HB_TAG('L','R','C',' ')},*/ /* Northern Luri -> Luri */
+ {"lri", HB_TAG('L','U','H',' ')}, /* Marachi -> Luyia */
+ {"lrm", HB_TAG('L','U','H',' ')}, /* Marama -> Luyia */
+ {"lsm", HB_TAG('L','U','H',' ')}, /* Saamia -> Luyia */
+ {"lt", HB_TAG('L','T','H',' ')}, /* Lithuanian */
+ {"ltg", HB_TAG('L','V','I',' ')}, /* Latgalian -> Latvian */
+ {"lto", HB_TAG('L','U','H',' ')}, /* Tsotso -> Luyia */
+ {"lts", HB_TAG('L','U','H',' ')}, /* Tachoni -> Luyia */
+ {"lu", HB_TAG('L','U','B',' ')}, /* Luba-Katanga */
+/*{"lua", HB_TAG('L','U','A',' ')},*/ /* Luba-Lulua */
+/*{"luo", HB_TAG('L','U','O',' ')},*/ /* Luo (Kenya and Tanzania) */
+ {"lus", HB_TAG('M','I','Z',' ')}, /* Lushai -> Mizo */
+ {"luy", HB_TAG('L','U','H',' ')}, /* Luyia [macrolanguage] */
+ {"luz", HB_TAG('L','R','C',' ')}, /* Southern Luri -> Luri */
+ {"lv", HB_TAG('L','V','I',' ')}, /* Latvian [macrolanguage] */
+ {"lvs", HB_TAG('L','V','I',' ')}, /* Standard Latvian -> Latvian */
+ {"lwg", HB_TAG('L','U','H',' ')}, /* Wanga -> Luyia */
+ {"lzh", HB_TAG('Z','H','T',' ')}, /* Literary Chinese -> Chinese Traditional */
+ {"lzz", HB_TAG('L','A','Z',' ')}, /* Laz */
+/*{"mad", HB_TAG('M','A','D',' ')},*/ /* Madurese -> Madura */
+/*{"mag", HB_TAG('M','A','G',' ')},*/ /* Magahi */
+ {"mai", HB_TAG('M','T','H',' ')}, /* Maithili */
+ {"mak", HB_TAG('M','K','R',' ')}, /* Makasar */
+/*{"mam", HB_TAG('M','A','M',' ')},*/ /* Mam */
+ {"man", HB_TAG('M','N','K',' ')}, /* Mandingo [macrolanguage] -> Maninka */
+ {"max", HB_TAG('M','L','Y',' ')}, /* North Moluccan Malay -> Malay */
+/*{"mbo", HB_TAG('M','B','O',' ')},*/ /* Mbo (Cameroon) */
+ {"mct", HB_TAG('B','T','I',' ')}, /* Mengisa -> Beti */
+ {"mdf", HB_TAG('M','O','K',' ')}, /* Moksha */
+/*{"mdr", HB_TAG('M','D','R',' ')},*/ /* Mandar */
+ {"mdy", HB_TAG('M','L','E',' ')}, /* Male (Ethiopia) */
+ {"men", HB_TAG('M','D','E',' ')}, /* Mende (Sierra Leone) */
+ {"meo", HB_TAG('M','L','Y',' ')}, /* Kedah Malay -> Malay */
+/*{"mer", HB_TAG('M','E','R',' ')},*/ /* Meru */
+/*{"mfa", HB_TAG('M','F','A',' ')},*/ /* Pattani Malay */
+ {"mfb", HB_TAG('M','L','Y',' ')}, /* Bangka -> Malay */
+/*{"mfe", HB_TAG('M','F','E',' ')},*/ /* Morisyen */
+ {"mg", HB_TAG('M','L','G',' ')}, /* Malagasy [macrolanguage] */
+ {"mh", HB_TAG('M','A','H',' ')}, /* Marshallese */
+ {"mhr", HB_TAG('L','M','A',' ')}, /* Eastern Mari -> Low Mari */
+ {"mhv", HB_TAG('A','R','K',' ')}, /* Arakanese (retired code) -> Rakhine */
+ {"mi", HB_TAG('M','R','I',' ')}, /* Maori */
+/*{"min", HB_TAG('M','I','N',' ')},*/ /* Minangkabau */
+ {"mk", HB_TAG('M','K','D',' ')}, /* Macedonian */
+ {"mku", HB_TAG('M','N','K',' ')}, /* Konyanka Maninka -> Maninka */
+/*{"mkw", HB_TAG('M','K','W',' ')},*/ /* Kituba (Congo) */
+ {"ml", HB_TAG('M','A','L',' ')}, /* Malayalam -> Malayalam Traditional */
+ {"ml", HB_TAG('M','L','R',' ')}, /* Malayalam -> Malayalam Reformed */
+ {"mlq", HB_TAG('M','L','N',' ')}, /* Western Maninkakan -> Malinke */
+ {"mlq", HB_TAG('M','N','K',' ')}, /* Western Maninkakan -> Maninka */
+ {"mmr", HB_TAG('H','M','N',' ')}, /* Western Xiangxi Miao -> Hmong */
+ {"mn", HB_TAG('M','N','G',' ')}, /* Mongolian [macrolanguage] */
+ {"mnc", HB_TAG('M','C','H',' ')}, /* Manchu */
+/*{"mni", HB_TAG('M','N','I',' ')},*/ /* Manipuri */
+ {"mnk", HB_TAG('M','N','D',' ')}, /* Mandinka */
+ {"mnk", HB_TAG('M','N','K',' ')}, /* Mandinka -> Maninka */
+ {"mnp", HB_TAG('Z','H','S',' ')}, /* Min Bei Chinese -> Chinese Simplified */
+ {"mns", HB_TAG('M','A','N',' ')}, /* Mansi */
+ {"mnw", HB_TAG('M','O','N',' ')}, /* Mon */
+ {"mo", HB_TAG('M','O','L',' ')}, /* Moldavian (retired code) */
+/*{"moh", HB_TAG('M','O','H',' ')},*/ /* Mohawk */
+/*{"mos", HB_TAG('M','O','S',' ')},*/ /* Mossi */
+ {"mpe", HB_TAG('M','A','J',' ')}, /* Majang */
+ {"mqg", HB_TAG('M','L','Y',' ')}, /* Kota Bangun Kutai Malay -> Malay */
+ {"mr", HB_TAG('M','A','R',' ')}, /* Marathi */
+ {"mrh", HB_TAG('Q','I','N',' ')}, /* Mara Chin -> Chin */
+ {"mrj", HB_TAG('H','M','A',' ')}, /* Western Mari -> High Mari */
+ {"ms", HB_TAG('M','L','Y',' ')}, /* Malay [macrolanguage] */
+ {"msc", HB_TAG('M','N','K',' ')}, /* Sankaran Maninka -> Maninka */
+ {"msh", HB_TAG('M','L','G',' ')}, /* Masikoro Malagasy -> Malagasy */
+ {"msi", HB_TAG('M','L','Y',' ')}, /* Sabah Malay -> Malay */
+ {"mt", HB_TAG('M','T','S',' ')}, /* Maltese */
+ {"mtr", HB_TAG('M','A','W',' ')}, /* Mewari -> Marwari */
+ {"mui", HB_TAG('M','L','Y',' ')}, /* Musi -> Malay */
+ {"mup", HB_TAG('R','A','J',' ')}, /* Malvi -> Rajasthani */
+ {"muq", HB_TAG('H','M','N',' ')}, /* Eastern Xiangxi Miao -> Hmong */
+/*{"mus", HB_TAG('M','U','S',' ')},*/ /* Creek -> Muscogee */
+ {"mvb", HB_TAG('A','T','H',' ')}, /* Mattole -> Athapaskan */
+ {"mve", HB_TAG('M','A','W',' ')}, /* Marwari (Pakistan) */
+ {"mvf", HB_TAG('M','N','G',' ')}, /* Peripheral Mongolian -> Mongolian */
+ {"mwk", HB_TAG('M','N','K',' ')}, /* Kita Maninkakan -> Maninka */
+/*{"mwl", HB_TAG('M','W','L',' ')},*/ /* Mirandese */
+ {"mwr", HB_TAG('M','A','W',' ')}, /* Marwari [macrolanguage] */
+/*{"mww", HB_TAG('M','W','W',' ')},*/ /* Hmong Daw */
+ {"my", HB_TAG('B','R','M',' ')}, /* Burmese */
+ {"mym", HB_TAG('M','E','N',' ')}, /* Me’en */
+/*{"myn", HB_TAG('M','Y','N',' ')},*/ /* Mayan [family] */
+ {"myq", HB_TAG('M','N','K',' ')}, /* Forest Maninka (retired code) -> Maninka */
+ {"myv", HB_TAG('E','R','Z',' ')}, /* Erzya */
+/*{"mzn", HB_TAG('M','Z','N',' ')},*/ /* Mazanderani */
+ {"na", HB_TAG('N','A','U',' ')}, /* Nauru -> Nauruan */
+/*{"nag", HB_TAG('N','A','G',' ')},*/ /* Naga Pidgin -> Naga-Assamese */
+/*{"nah", HB_TAG('N','A','H',' ')},*/ /* Nahuatl [family] */
+ {"nan", HB_TAG('Z','H','S',' ')}, /* Min Nan Chinese -> Chinese Simplified */
+/*{"nap", HB_TAG('N','A','P',' ')},*/ /* Neapolitan */
+ {"nb", HB_TAG('N','O','R',' ')}, /* Norwegian Bokmål -> Norwegian */
+ {"nd", HB_TAG('N','D','B',' ')}, /* North Ndebele -> Ndebele */
+/*{"ndc", HB_TAG('N','D','C',' ')},*/ /* Ndau */
+/*{"nds", HB_TAG('N','D','S',' ')},*/ /* Low Saxon */
+ {"ne", HB_TAG('N','E','P',' ')}, /* Nepali [macrolanguage] */
+/*{"new", HB_TAG('N','E','W',' ')},*/ /* Newari */
+ {"ng", HB_TAG('N','D','G',' ')}, /* Ndonga */
+/*{"nga", HB_TAG('N','G','A',' ')},*/ /* Ngbaka */
+ {"ngl", HB_TAG('L','M','W',' ')}, /* Lomwe */
+ {"ngo", HB_TAG('S','X','T',' ')}, /* Ngoni -> Sutu */
+ {"nhd", HB_TAG('G','U','A',' ')}, /* Chiripá -> Guarani */
+ {"niq", HB_TAG('K','A','L',' ')}, /* Nandi -> Kalenjin */
+/*{"niu", HB_TAG('N','I','U',' ')},*/ /* Niuean */
+ {"niv", HB_TAG('G','I','L',' ')}, /* Gilyak */
+ {"njz", HB_TAG('N','I','S',' ')}, /* Nyishi -> Nisi */
+ {"nl", HB_TAG('N','L','D',' ')}, /* Dutch */
+ {"nle", HB_TAG('L','U','H',' ')}, /* East Nyala -> Luyia */
+ {"nn", HB_TAG('N','Y','N',' ')}, /* Norwegian Nynorsk (Nynorsk, Norwegian) */
+ {"no", HB_TAG('N','O','R',' ')}, /* Norwegian [macrolanguage] */
+ {"nod", HB_TAG('N','T','A',' ')}, /* Northern Thai -> Northern Tai */
+/*{"noe", HB_TAG('N','O','E',' ')},*/ /* Nimadi */
+/*{"nog", HB_TAG('N','O','G',' ')},*/ /* Nogai */
+/*{"nov", HB_TAG('N','O','V',' ')},*/ /* Novial */
+ {"npi", HB_TAG('N','E','P',' ')}, /* Nepali */
+ {"nqo", HB_TAG('N','K','O',' ')}, /* N’Ko */
+ {"nr", HB_TAG('N','D','B',' ')}, /* South Ndebele -> Ndebele */
+ {"nsk", HB_TAG('N','A','S',' ')}, /* Naskapi */
+/*{"nso", HB_TAG('N','S','O',' ')},*/ /* Pedi -> Sotho, Northern */
+ {"nv", HB_TAG('N','A','V',' ')}, /* Navajo */
+ {"nv", HB_TAG('A','T','H',' ')}, /* Navajo -> Athapaskan */
+ {"ny", HB_TAG('C','H','I',' ')}, /* Chichewa (Chewa, Nyanja) */
+ {"nyd", HB_TAG('L','U','H',' ')}, /* Nyore -> Luyia */
+/*{"nym", HB_TAG('N','Y','M',' ')},*/ /* Nyamwezi */
+ {"nyn", HB_TAG('N','K','L',' ')}, /* Nyankole */
+/*{"nza", HB_TAG('N','Z','A',' ')},*/ /* Tigon Mbembe -> Mbembe Tigon */
+ {"oc", HB_TAG('O','C','I',' ')}, /* Occitan (post 1500) */
+ {"oj", HB_TAG('O','J','B',' ')}, /* Ojibwa [macrolanguage] -> Ojibway */
+/*{"ojb", HB_TAG('O','J','B',' ')},*/ /* Northwestern Ojibwa -> Ojibway */
+ {"ojc", HB_TAG('O','J','B',' ')}, /* Central Ojibwa -> Ojibway */
+ {"ojg", HB_TAG('O','J','B',' ')}, /* Eastern Ojibwa -> Ojibway */
+ {"ojs", HB_TAG('O','C','R',' ')}, /* Severn Ojibwa -> Oji-Cree */
+ {"ojw", HB_TAG('O','J','B',' ')}, /* Western Ojibwa -> Ojibway */
+ {"oki", HB_TAG('K','A','L',' ')}, /* Okiek -> Kalenjin */
+ {"okm", HB_TAG('K','O','H',' ')}, /* Middle Korean (10th-16th cent.) -> Korean Old Hangul */
+ {"om", HB_TAG('O','R','O',' ')}, /* Oromo [macrolanguage] */
+ {"or", HB_TAG('O','R','I',' ')}, /* Odia (formerly Oriya) [macrolanguage] */
+ {"orc", HB_TAG('O','R','O',' ')}, /* Orma -> Oromo */
+ {"orn", HB_TAG('M','L','Y',' ')}, /* Orang Kanaq -> Malay */
+ {"ors", HB_TAG('M','L','Y',' ')}, /* Orang Seletar -> Malay */
+ {"ory", HB_TAG('O','R','I',' ')}, /* Odia (formerly Oriya) */
+ {"os", HB_TAG('O','S','S',' ')}, /* Ossetian */
+ {"otw", HB_TAG('O','J','B',' ')}, /* Ottawa -> Ojibway */
+ {"pa", HB_TAG('P','A','N',' ')}, /* Punjabi */
+/*{"pag", HB_TAG('P','A','G',' ')},*/ /* Pangasinan */
+/*{"pam", HB_TAG('P','A','M',' ')},*/ /* Pampanga -> Pampangan */
+ {"pap", HB_TAG('P','A','P','0')}, /* Papiamento -> Papiamentu */
+/*{"pau", HB_TAG('P','A','U',' ')},*/ /* Palauan */
+ {"pbt", HB_TAG('P','A','S',' ')}, /* Southern Pashto -> Pashto */
+ {"pbu", HB_TAG('P','A','S',' ')}, /* Northern Pashto -> Pashto */
+/*{"pcc", HB_TAG('P','C','C',' ')},*/ /* Bouyei */
+/*{"pcd", HB_TAG('P','C','D',' ')},*/ /* Picard */
+ {"pce", HB_TAG('P','L','G',' ')}, /* Ruching Palaung -> Palaung */
+ {"pck", HB_TAG('Q','I','N',' ')}, /* Paite Chin -> Chin */
+/*{"pdc", HB_TAG('P','D','C',' ')},*/ /* Pennsylvania German */
+ {"pel", HB_TAG('M','L','Y',' ')}, /* Pekal -> Malay */
+ {"pes", HB_TAG('F','A','R',' ')}, /* Iranian Persian -> Persian */
+ {"pga", HB_TAG('A','R','A',' ')}, /* Sudanese Creole Arabic -> Arabic */
+/*{"phk", HB_TAG('P','H','K',' ')},*/ /* Phake */
+ {"pi", HB_TAG('P','A','L',' ')}, /* Pali */
+/*{"pih", HB_TAG('P','I','H',' ')},*/ /* Pitcairn-Norfolk -> Norfolk */
+ {"pko", HB_TAG('K','A','L',' ')}, /* Pökoot -> Kalenjin */
+ {"pl", HB_TAG('P','L','K',' ')}, /* Polish */
+ {"pll", HB_TAG('P','L','G',' ')}, /* Shwe Palaung -> Palaung */
+ {"plp", HB_TAG('P','A','P',' ')}, /* Palpa (retired code) */
+ {"plt", HB_TAG('M','L','G',' ')}, /* Plateau Malagasy -> Malagasy */
+/*{"pms", HB_TAG('P','M','S',' ')},*/ /* Piemontese */
+/*{"pnb", HB_TAG('P','N','B',' ')},*/ /* Western Panjabi */
+/*{"poh", HB_TAG('P','O','H',' ')},*/ /* Poqomchi' -> Pocomchi */
+/*{"pon", HB_TAG('P','O','N',' ')},*/ /* Pohnpeian */
+ {"ppa", HB_TAG('B','A','G',' ')}, /* Pao (retired code) -> Baghelkhandi */
+/*{"pro", HB_TAG('P','R','O',' ')},*/ /* Old Provençal (to 1500) -> Provençal / Old Provençal */
+ {"prs", HB_TAG('D','R','I',' ')}, /* Dari */
+ {"ps", HB_TAG('P','A','S',' ')}, /* Pashto [macrolanguage] */
+ {"pse", HB_TAG('M','L','Y',' ')}, /* Central Malay -> Malay */
+ {"pst", HB_TAG('P','A','S',' ')}, /* Central Pashto -> Pashto */
+ {"pt", HB_TAG('P','T','G',' ')}, /* Portuguese */
+/*{"pwo", HB_TAG('P','W','O',' ')},*/ /* Pwo Western Karen -> Western Pwo Karen */
+ {"qu", HB_TAG('Q','U','Z',' ')}, /* Quechua [macrolanguage] */
+ {"qub", HB_TAG('Q','W','H',' ')}, /* Huallaga Huánuco Quechua -> Quechua (Peru) */
+/*{"quc", HB_TAG('Q','U','C',' ')},*/ /* K’iche’ */
+ {"qud", HB_TAG('Q','V','I',' ')}, /* Calderón Highland Quichua -> Quechua (Ecuador) */
+ {"quf", HB_TAG('Q','U','Z',' ')}, /* Lambayeque Quechua -> Quechua */
+ {"qug", HB_TAG('Q','V','I',' ')}, /* Chimborazo Highland Quichua -> Quechua (Ecuador) */
+/*{"quh", HB_TAG('Q','U','H',' ')},*/ /* South Bolivian Quechua -> Quechua (Bolivia) */
+ {"quk", HB_TAG('Q','U','Z',' ')}, /* Chachapoyas Quechua -> Quechua */
+ {"qul", HB_TAG('Q','U','Z',' ')}, /* North Bolivian Quechua -> Quechua */
+ {"qup", HB_TAG('Q','V','I',' ')}, /* Southern Pastaza Quechua -> Quechua (Ecuador) */
+ {"qur", HB_TAG('Q','W','H',' ')}, /* Yanahuanca Pasco Quechua -> Quechua (Peru) */
+ {"qus", HB_TAG('Q','U','H',' ')}, /* Santiago del Estero Quichua -> Quechua (Bolivia) */
+ {"quw", HB_TAG('Q','V','I',' ')}, /* Tena Lowland Quichua -> Quechua (Ecuador) */
+ {"qux", HB_TAG('Q','W','H',' ')}, /* Yauyos Quechua -> Quechua (Peru) */
+ {"quy", HB_TAG('Q','U','Z',' ')}, /* Ayacucho Quechua -> Quechua */
+/*{"quz", HB_TAG('Q','U','Z',' ')},*/ /* Cusco Quechua -> Quechua */
+ {"qva", HB_TAG('Q','W','H',' ')}, /* Ambo-Pasco Quechua -> Quechua (Peru) */
+ {"qvc", HB_TAG('Q','U','Z',' ')}, /* Cajamarca Quechua -> Quechua */
+ {"qve", HB_TAG('Q','U','Z',' ')}, /* Eastern Apurímac Quechua -> Quechua */
+ {"qvh", HB_TAG('Q','W','H',' ')}, /* Huamalíes-Dos de Mayo Huánuco Quechua -> Quechua (Peru) */
+/*{"qvi", HB_TAG('Q','V','I',' ')},*/ /* Imbabura Highland Quichua -> Quechua (Ecuador) */
+ {"qvj", HB_TAG('Q','V','I',' ')}, /* Loja Highland Quichua -> Quechua (Ecuador) */
+ {"qvl", HB_TAG('Q','W','H',' ')}, /* Cajatambo North Lima Quechua -> Quechua (Peru) */
+ {"qvm", HB_TAG('Q','W','H',' ')}, /* Margos-Yarowilca-Lauricocha Quechua -> Quechua (Peru) */
+ {"qvn", HB_TAG('Q','W','H',' ')}, /* North Junín Quechua -> Quechua (Peru) */
+ {"qvo", HB_TAG('Q','V','I',' ')}, /* Napo Lowland Quechua -> Quechua (Ecuador) */
+ {"qvp", HB_TAG('Q','W','H',' ')}, /* Pacaraos Quechua -> Quechua (Peru) */
+ {"qvs", HB_TAG('Q','U','Z',' ')}, /* San Martín Quechua -> Quechua */
+ {"qvw", HB_TAG('Q','W','H',' ')}, /* Huaylla Wanca Quechua -> Quechua (Peru) */
+ {"qvz", HB_TAG('Q','V','I',' ')}, /* Northern Pastaza Quichua -> Quechua (Ecuador) */
+ {"qwa", HB_TAG('Q','W','H',' ')}, /* Corongo Ancash Quechua -> Quechua (Peru) */
+ {"qwc", HB_TAG('Q','U','Z',' ')}, /* Classical Quechua -> Quechua */
+/*{"qwh", HB_TAG('Q','W','H',' ')},*/ /* Huaylas Ancash Quechua -> Quechua (Peru) */
+ {"qws", HB_TAG('Q','W','H',' ')}, /* Sihuas Ancash Quechua -> Quechua (Peru) */
+ {"qxa", HB_TAG('Q','W','H',' ')}, /* Chiquián Ancash Quechua -> Quechua (Peru) */
+ {"qxc", HB_TAG('Q','W','H',' ')}, /* Chincha Quechua -> Quechua (Peru) */
+ {"qxh", HB_TAG('Q','W','H',' ')}, /* Panao Huánuco Quechua -> Quechua (Peru) */
+ {"qxl", HB_TAG('Q','V','I',' ')}, /* Salasaca Highland Quichua -> Quechua (Ecuador) */
+ {"qxn", HB_TAG('Q','W','H',' ')}, /* Northern Conchucos Ancash Quechua -> Quechua (Peru) */
+ {"qxo", HB_TAG('Q','W','H',' ')}, /* Southern Conchucos Ancash Quechua -> Quechua (Peru) */
+ {"qxp", HB_TAG('Q','U','Z',' ')}, /* Puno Quechua -> Quechua */
+ {"qxr", HB_TAG('Q','V','I',' ')}, /* Cañar Highland Quichua -> Quechua (Ecuador) */
+ {"qxt", HB_TAG('Q','W','H',' ')}, /* Santa Ana de Tusi Pasco Quechua -> Quechua (Peru) */
+ {"qxu", HB_TAG('Q','U','Z',' ')}, /* Arequipa-La Unión Quechua -> Quechua */
+ {"qxw", HB_TAG('Q','W','H',' ')}, /* Jauja Wanca Quechua -> Quechua (Peru) */
+ {"rag", HB_TAG('L','U','H',' ')}, /* Logooli -> Luyia */
+/*{"raj", HB_TAG('R','A','J',' ')},*/ /* Rajasthani [macrolanguage] */
+/*{"rar", HB_TAG('R','A','R',' ')},*/ /* Rarotongan */
+ {"rbb", HB_TAG('P','L','G',' ')}, /* Rumai Palaung -> Palaung */
+ {"rbl", HB_TAG('B','I','K',' ')}, /* Miraya Bikol -> Bikol */
+/*{"rej", HB_TAG('R','E','J',' ')},*/ /* Rejang */
+/*{"ria", HB_TAG('R','I','A',' ')},*/ /* Riang (India) */
+/*{"rif", HB_TAG('R','I','F',' ')},*/ /* Tarifit */
+/*{"rit", HB_TAG('R','I','T',' ')},*/ /* Ritharrngu -> Ritarungo */
+ {"rki", HB_TAG('A','R','K',' ')}, /* Rakhine */
+/*{"rkw", HB_TAG('R','K','W',' ')},*/ /* Arakwal */
+ {"rm", HB_TAG('R','M','S',' ')}, /* Romansh */
+ {"rmc", HB_TAG('R','O','Y',' ')}, /* Carpathian Romani -> Romany */
+ {"rmf", HB_TAG('R','O','Y',' ')}, /* Kalo Finnish Romani -> Romany */
+ {"rml", HB_TAG('R','O','Y',' ')}, /* Baltic Romani -> Romany */
+ {"rmn", HB_TAG('R','O','Y',' ')}, /* Balkan Romani -> Romany */
+ {"rmo", HB_TAG('R','O','Y',' ')}, /* Sinte Romani -> Romany */
+ {"rmw", HB_TAG('R','O','Y',' ')}, /* Welsh Romani -> Romany */
+/*{"rmy", HB_TAG('R','M','Y',' ')},*/ /* Vlax Romani */
+ {"rmz", HB_TAG('A','R','K',' ')}, /* Marma -> Rakhine */
+ {"rn", HB_TAG('R','U','N',' ')}, /* Rundi */
+ {"rnl", HB_TAG('H','A','L',' ')}, /* Ranglong -> Halam (Falam Chin) */
+ {"ro", HB_TAG('R','O','M',' ')}, /* Romanian */
+ {"rom", HB_TAG('R','O','Y',' ')}, /* Romany [macrolanguage] */
+/*{"rtm", HB_TAG('R','T','M',' ')},*/ /* Rotuman */
+ {"ru", HB_TAG('R','U','S',' ')}, /* Russian */
+ {"rue", HB_TAG('R','S','Y',' ')}, /* Rusyn */
+/*{"rup", HB_TAG('R','U','P',' ')},*/ /* Aromanian */
+ {"rw", HB_TAG('R','U','A',' ')}, /* Kinyarwanda */
+ {"rwr", HB_TAG('M','A','W',' ')}, /* Marwari (India) */
+ {"sa", HB_TAG('S','A','N',' ')}, /* Sanskrit */
+ {"sah", HB_TAG('Y','A','K',' ')}, /* Yakut -> Sakha */
+ {"sam", HB_TAG('P','A','A',' ')}, /* Samaritan Aramaic -> Palestinian Aramaic */
+/*{"sas", HB_TAG('S','A','S',' ')},*/ /* Sasak */
+/*{"sat", HB_TAG('S','A','T',' ')},*/ /* Santali */
+ {"sc", HB_TAG('S','R','D',' ')}, /* Sardinian [macrolanguage] */
+ {"sck", HB_TAG('S','A','D',' ')}, /* Sadri */
+/*{"scn", HB_TAG('S','C','N',' ')},*/ /* Sicilian */
+/*{"sco", HB_TAG('S','C','O',' ')},*/ /* Scots */
+ {"scs", HB_TAG('S','C','S',' ')}, /* North Slavey */
+ {"scs", HB_TAG('S','L','A',' ')}, /* North Slavey -> Slavey */
+ {"scs", HB_TAG('A','T','H',' ')}, /* North Slavey -> Athapaskan */
+ {"sd", HB_TAG('S','N','D',' ')}, /* Sindhi */
+ {"sdc", HB_TAG('S','R','D',' ')}, /* Sassarese Sardinian -> Sardinian */
+ {"sdh", HB_TAG('K','U','R',' ')}, /* Southern Kurdish -> Kurdish */
+ {"sdn", HB_TAG('S','R','D',' ')}, /* Gallurese Sardinian -> Sardinian */
+ {"se", HB_TAG('N','S','M',' ')}, /* Northern Sami */
+ {"seh", HB_TAG('S','N','A',' ')}, /* Sena */
+ {"sek", HB_TAG('A','T','H',' ')}, /* Sekani -> Athapaskan */
+/*{"sel", HB_TAG('S','E','L',' ')},*/ /* Selkup */
+ {"sez", HB_TAG('Q','I','N',' ')}, /* Senthang Chin -> Chin */
+ {"sfm", HB_TAG('H','M','N',' ')}, /* Small Flowery Miao -> Hmong */
+ {"sg", HB_TAG('S','G','O',' ')}, /* Sango */
+/*{"sga", HB_TAG('S','G','A',' ')},*/ /* Old Irish (to 900) */
+ {"sgc", HB_TAG('K','A','L',' ')}, /* Kipsigis -> Kalenjin */
+/*{"sgs", HB_TAG('S','G','S',' ')},*/ /* Samogitian */
+ {"sgw", HB_TAG('C','H','G',' ')}, /* Sebat Bet Gurage -> Chaha Gurage */
+ {"sgw", HB_TAG('S','G','W',' ')}, /* Sebat Bet Gurage -> Chaha Gurage (SIL fonts) */
+/*{"shi", HB_TAG('S','H','I',' ')},*/ /* Tachelhit */
+/*{"shn", HB_TAG('S','H','N',' ')},*/ /* Shan */
+ {"shu", HB_TAG('A','R','A',' ')}, /* Chadian Arabic -> Arabic */
+ {"si", HB_TAG('S','N','H',' ')}, /* Sinhala (Sinhalese) */
+/*{"sid", HB_TAG('S','I','D',' ')},*/ /* Sidamo */
+ {"sjd", HB_TAG('K','S','M',' ')}, /* Kildin Sami */
+ {"sjo", HB_TAG('S','I','B',' ')}, /* Xibe -> Sibe */
+ {"sk", HB_TAG('S','K','Y',' ')}, /* Slovak */
+ {"skg", HB_TAG('M','L','G',' ')}, /* Sakalava Malagasy -> Malagasy */
+ {"skr", HB_TAG('S','R','K',' ')}, /* Saraiki */
+ {"sl", HB_TAG('S','L','V',' ')}, /* Slovenian */
+ {"sm", HB_TAG('S','M','O',' ')}, /* Samoan */
+ {"sma", HB_TAG('S','S','M',' ')}, /* Southern Sami */
+ {"smj", HB_TAG('L','S','M',' ')}, /* Lule Sami */
+ {"smn", HB_TAG('I','S','M',' ')}, /* Inari Sami */
+ {"sms", HB_TAG('S','K','S',' ')}, /* Skolt Sami */
+ {"sn", HB_TAG('S','N','A','0')}, /* Shona */
+/*{"snk", HB_TAG('S','N','K',' ')},*/ /* Soninke */
+ {"so", HB_TAG('S','M','L',' ')}, /* Somali */
+/*{"sop", HB_TAG('S','O','P',' ')},*/ /* Songe */
+ {"spv", HB_TAG('O','R','I',' ')}, /* Sambalpuri -> Odia (formerly Oriya) */
+ {"spy", HB_TAG('K','A','L',' ')}, /* Sabaot -> Kalenjin */
+ {"sq", HB_TAG('S','Q','I',' ')}, /* Albanian [macrolanguage] */
+ {"sr", HB_TAG('S','R','B',' ')}, /* Serbian */
+ {"src", HB_TAG('S','R','D',' ')}, /* Logudorese Sardinian -> Sardinian */
+ {"sro", HB_TAG('S','R','D',' ')}, /* Campidanese Sardinian -> Sardinian */
+/*{"srr", HB_TAG('S','R','R',' ')},*/ /* Serer */
+ {"srs", HB_TAG('A','T','H',' ')}, /* Sarsi -> Athapaskan */
+ {"ss", HB_TAG('S','W','Z',' ')}, /* Swati */
+ {"ssh", HB_TAG('A','R','A',' ')}, /* Shihhi Arabic -> Arabic */
+ {"st", HB_TAG('S','O','T',' ')}, /* Southern Sotho -> Sotho, Southern */
+/*{"stq", HB_TAG('S','T','Q',' ')},*/ /* Saterfriesisch -> Saterland Frisian */
+ {"stv", HB_TAG('S','I','G',' ')}, /* Silt'e -> Silte Gurage */
+ {"su", HB_TAG('S','U','N',' ')}, /* Sundanese */
+/*{"suk", HB_TAG('S','U','K',' ')},*/ /* Sukuma */
+ {"suq", HB_TAG('S','U','R',' ')}, /* Suri */
+ {"sv", HB_TAG('S','V','E',' ')}, /* Swedish */
+/*{"sva", HB_TAG('S','V','A',' ')},*/ /* Svan */
+ {"sw", HB_TAG('S','W','K',' ')}, /* Swahili [macrolanguage] */
+ {"swb", HB_TAG('C','M','R',' ')}, /* Maore Comorian -> Comorian */
+ {"swc", HB_TAG('S','W','K',' ')}, /* Congo Swahili -> Swahili */
+ {"swh", HB_TAG('S','W','K',' ')}, /* Swahili */
+ {"swv", HB_TAG('M','A','W',' ')}, /* Shekhawati -> Marwari */
+/*{"sxu", HB_TAG('S','X','U',' ')},*/ /* Upper Saxon */
+ {"syc", HB_TAG('S','Y','R',' ')}, /* Classical Syriac -> Syriac */
+/*{"syl", HB_TAG('S','Y','L',' ')},*/ /* Sylheti */
+/*{"syr", HB_TAG('S','Y','R',' ')},*/ /* Syriac [macrolanguage] */
+/*{"szl", HB_TAG('S','Z','L',' ')},*/ /* Silesian */
+ {"ta", HB_TAG('T','A','M',' ')}, /* Tamil */
+ {"taa", HB_TAG('A','T','H',' ')}, /* Lower Tanana -> Athapaskan */
+/*{"tab", HB_TAG('T','A','B',' ')},*/ /* Tabassaran -> Tabasaran */
+ {"taq", HB_TAG('T','M','H',' ')}, /* Tamasheq -> Tamashek */
+ {"tau", HB_TAG('A','T','H',' ')}, /* Upper Tanana -> Athapaskan */
+ {"tcb", HB_TAG('A','T','H',' ')}, /* Tanacross -> Athapaskan */
+ {"tce", HB_TAG('A','T','H',' ')}, /* Southern Tutchone -> Athapaskan */
+ {"tcp", HB_TAG('Q','I','N',' ')}, /* Tawr Chin -> Chin */
+ {"tcy", HB_TAG('T','U','L',' ')}, /* Tulu -> Tumbuka */
+ {"tcz", HB_TAG('Q','I','N',' ')}, /* Thado Chin -> Chin */
+/*{"tdd", HB_TAG('T','D','D',' ')},*/ /* Tai Nüa -> Dehong Dai */
+ {"tdx", HB_TAG('M','L','G',' ')}, /* Tandroy-Mahafaly Malagasy -> Malagasy */
+ {"te", HB_TAG('T','E','L',' ')}, /* Telugu */
+ {"tec", HB_TAG('K','A','L',' ')}, /* Terik -> Kalenjin */
+ {"tem", HB_TAG('T','M','N',' ')}, /* Timne -> Temne */
+/*{"tet", HB_TAG('T','E','T',' ')},*/ /* Tetum */
+ {"tfn", HB_TAG('A','T','H',' ')}, /* Tanaina -> Athapaskan */
+ {"tg", HB_TAG('T','A','J',' ')}, /* Tajik -> Tajiki */
+ {"tgj", HB_TAG('N','I','S',' ')}, /* Tagin -> Nisi */
+ {"tgx", HB_TAG('A','T','H',' ')}, /* Tagish -> Athapaskan */
+ {"th", HB_TAG('T','H','A',' ')}, /* Thai */
+ {"tht", HB_TAG('A','T','H',' ')}, /* Tahltan -> Athapaskan */
+ {"thv", HB_TAG('T','M','H',' ')}, /* Tahaggart Tamahaq -> Tamashek */
+ {"thz", HB_TAG('T','M','H',' ')}, /* Tayart Tamajeq -> Tamashek */
+ {"ti", HB_TAG('T','G','Y',' ')}, /* Tigrinya */
+ {"tig", HB_TAG('T','G','R',' ')}, /* Tigre */
+/*{"tiv", HB_TAG('T','I','V',' ')},*/ /* Tiv */
+ {"tk", HB_TAG('T','K','M',' ')}, /* Turkmen */
+ {"tkg", HB_TAG('M','L','G',' ')}, /* Tesaka Malagasy -> Malagasy */
+ {"tl", HB_TAG('T','G','L',' ')}, /* Tagalog */
+/*{"tmh", HB_TAG('T','M','H',' ')},*/ /* Tamashek [macrolanguage] */
+ {"tmw", HB_TAG('M','L','Y',' ')}, /* Temuan -> Malay */
+ {"tn", HB_TAG('T','N','A',' ')}, /* Tswana */
+ {"tnf", HB_TAG('D','R','I',' ')}, /* Tangshewi (retired code) -> Dari */
+ {"to", HB_TAG('T','G','N',' ')}, /* Tonga (Tonga Islands) -> Tongan */
+ {"tod", HB_TAG('T','O','D','0')}, /* Toma */
+ {"toi", HB_TAG('T','N','G',' ')}, /* Tonga (Zambia) */
+ {"tol", HB_TAG('A','T','H',' ')}, /* Tolowa -> Athapaskan */
+/*{"tpi", HB_TAG('T','P','I',' ')},*/ /* Tok Pisin */
+ {"tr", HB_TAG('T','R','K',' ')}, /* Turkish */
+ {"tru", HB_TAG('T','U','A',' ')}, /* Turoyo -> Turoyo Aramaic */
+ {"tru", HB_TAG('S','Y','R',' ')}, /* Turoyo -> Syriac */
+ {"ts", HB_TAG('T','S','G',' ')}, /* Tsonga */
+/*{"tsj", HB_TAG('T','S','J',' ')},*/ /* Tshangla */
+ {"tt", HB_TAG('T','A','T',' ')}, /* Tatar */
+ {"ttm", HB_TAG('A','T','H',' ')}, /* Northern Tutchone -> Athapaskan */
+ {"ttq", HB_TAG('T','M','H',' ')}, /* Tawallammat Tamajaq -> Tamashek */
+/*{"tum", HB_TAG('T','U','M',' ')},*/ /* Tumbuka -> Tulu */
+ {"tuu", HB_TAG('A','T','H',' ')}, /* Tututni -> Athapaskan */
+ {"tuy", HB_TAG('K','A','L',' ')}, /* Tugen -> Kalenjin */
+/*{"tvl", HB_TAG('T','V','L',' ')},*/ /* Tuvalu */
+ {"tw", HB_TAG('T','W','I',' ')}, /* Twi */
+ {"tw", HB_TAG('A','K','A',' ')}, /* Twi -> Akan */
+ {"txc", HB_TAG('A','T','H',' ')}, /* Tsetsaut -> Athapaskan */
+ {"txy", HB_TAG('M','L','G',' ')}, /* Tanosy Malagasy -> Malagasy */
+ {"ty", HB_TAG('T','H','T',' ')}, /* Tahitian */
+ {"tyv", HB_TAG('T','U','V',' ')}, /* Tuvinian -> Tuvin */
+/*{"tyz", HB_TAG('T','Y','Z',' ')},*/ /* Tày */
+/*{"tzm", HB_TAG('T','Z','M',' ')},*/ /* Central Atlas Tamazight -> Tamazight */
+/*{"tzo", HB_TAG('T','Z','O',' ')},*/ /* Tzotzil */
+ {"ubl", HB_TAG('B','I','K',' ')}, /* Buhi'non Bikol -> Bikol */
+/*{"udm", HB_TAG('U','D','M',' ')},*/ /* Udmurt */
+ {"ug", HB_TAG('U','Y','G',' ')}, /* Uyghur */
+ {"uk", HB_TAG('U','K','R',' ')}, /* Ukrainian */
+ {"uki", HB_TAG('K','U','I',' ')}, /* Kui (India) */
+/*{"umb", HB_TAG('U','M','B',' ')},*/ /* Umbundu */
+ {"unr", HB_TAG('M','U','N',' ')}, /* Mundari */
+ {"ur", HB_TAG('U','R','D',' ')}, /* Urdu */
+ {"urk", HB_TAG('M','L','Y',' ')}, /* Urak Lawoi' -> Malay */
+ {"uz", HB_TAG('U','Z','B',' ')}, /* Uzbek [macrolanguage] */
+ {"uzn", HB_TAG('U','Z','B',' ')}, /* Northern Uzbek -> Uzbek */
+ {"uzs", HB_TAG('U','Z','B',' ')}, /* Southern Uzbek -> Uzbek */
+ {"ve", HB_TAG('V','E','N',' ')}, /* Venda */
+/*{"vec", HB_TAG('V','E','C',' ')},*/ /* Venetian */
+ {"vi", HB_TAG('V','I','T',' ')}, /* Vietnamese */
+ {"vkk", HB_TAG('M','L','Y',' ')}, /* Kaur -> Malay */
+ {"vkt", HB_TAG('M','L','Y',' ')}, /* Tenggarong Kutai Malay -> Malay */
+ {"vls", HB_TAG('F','L','E',' ')}, /* Vlaams -> Dutch (Flemish) */
+ {"vmw", HB_TAG('M','A','K',' ')}, /* Makhuwa */
+ {"vo", HB_TAG('V','O','L',' ')}, /* Volapük */
+/*{"vro", HB_TAG('V','R','O',' ')},*/ /* Võro */
+ {"wa", HB_TAG('W','L','N',' ')}, /* Walloon */
+/*{"war", HB_TAG('W','A','R',' ')},*/ /* Waray (Philippines) -> Waray-Waray */
+ {"wbm", HB_TAG('W','A',' ',' ')}, /* Wa */
+ {"wbr", HB_TAG('W','A','G',' ')}, /* Wagdi */
+ {"wlc", HB_TAG('C','M','R',' ')}, /* Mwali Comorian -> Comorian */
+ {"wle", HB_TAG('S','I','G',' ')}, /* Wolane -> Silte Gurage */
+ {"wlk", HB_TAG('A','T','H',' ')}, /* Wailaki -> Athapaskan */
+ {"wni", HB_TAG('C','M','R',' ')}, /* Ndzwani Comorian -> Comorian */
+ {"wo", HB_TAG('W','L','F',' ')}, /* Wolof */
+ {"wry", HB_TAG('M','A','W',' ')}, /* Merwari -> Marwari */
+ {"wsg", HB_TAG('G','O','N',' ')}, /* Adilabad Gondi -> Gondi */
+/*{"wtm", HB_TAG('W','T','M',' ')},*/ /* Mewati */
+ {"wuu", HB_TAG('Z','H','S',' ')}, /* Wu Chinese -> Chinese Simplified */
+ {"xal", HB_TAG('K','L','M',' ')}, /* Kalmyk */
+ {"xal", HB_TAG('T','O','D',' ')}, /* Kalmyk -> Todo */
+ {"xan", HB_TAG('S','E','K',' ')}, /* Xamtanga -> Sekota */
+ {"xh", HB_TAG('X','H','S',' ')}, /* Xhosa */
+/*{"xjb", HB_TAG('X','J','B',' ')},*/ /* Minjungbal -> Minjangbal */
+/*{"xkf", HB_TAG('X','K','F',' ')},*/ /* Khengkha */
+ {"xmm", HB_TAG('M','L','Y',' ')}, /* Manado Malay -> Malay */
+ {"xmv", HB_TAG('M','L','G',' ')}, /* Antankarana Malagasy -> Malagasy */
+ {"xmw", HB_TAG('M','L','G',' ')}, /* Tsimihety Malagasy -> Malagasy */
+ {"xnr", HB_TAG('D','G','R',' ')}, /* Kangri -> Dogri */
+/*{"xog", HB_TAG('X','O','G',' ')},*/ /* Soga */
+/*{"xpe", HB_TAG('X','P','E',' ')},*/ /* Liberia Kpelle -> Kpelle (Liberia) */
+ {"xsl", HB_TAG('S','S','L',' ')}, /* South Slavey */
+ {"xsl", HB_TAG('S','L','A',' ')}, /* South Slavey -> Slavey */
+ {"xsl", HB_TAG('A','T','H',' ')}, /* South Slavey -> Athapaskan */
+ {"xst", HB_TAG('S','I','G',' ')}, /* Silt'e (retired code) -> Silte Gurage */
+ {"xwo", HB_TAG('T','O','D',' ')}, /* Written Oirat -> Todo */
+/*{"yao", HB_TAG('Y','A','O',' ')},*/ /* Yao */
+/*{"yap", HB_TAG('Y','A','P',' ')},*/ /* Yapese */
+ {"ybd", HB_TAG('A','R','K',' ')}, /* Yangbye (retired code) -> Rakhine */
+ {"ydd", HB_TAG('J','I','I',' ')}, /* Eastern Yiddish -> Yiddish */
+ {"yi", HB_TAG('J','I','I',' ')}, /* Yiddish [macrolanguage] */
+ {"yih", HB_TAG('J','I','I',' ')}, /* Western Yiddish -> Yiddish */
+ {"yo", HB_TAG('Y','B','A',' ')}, /* Yoruba */
+ {"yos", HB_TAG('Q','I','N',' ')}, /* Yos (retired code) -> Chin */
+ {"yrk", HB_TAG('T','N','E',' ')}, /* Nenets -> Tundra Nenets */
+ {"yrk", HB_TAG('F','N','E',' ')}, /* Nenets -> Forest Nenets */
+ {"yue", HB_TAG('Z','H','H',' ')}, /* Yue Chinese -> Chinese, Hong Kong SAR */
+ {"za", HB_TAG('Z','H','A',' ')}, /* Zhuang [macrolanguage] */
+ {"zch", HB_TAG('Z','H','A',' ')}, /* Central Hongshuihe Zhuang -> Zhuang */
+ {"zdj", HB_TAG('C','M','R',' ')}, /* Ngazidja Comorian -> Comorian */
+/*{"zea", HB_TAG('Z','E','A',' ')},*/ /* Zeeuws -> Zealandic */
+ {"zeh", HB_TAG('Z','H','A',' ')}, /* Eastern Hongshuihe Zhuang -> Zhuang */
+ {"zgb", HB_TAG('Z','H','A',' ')}, /* Guibei Zhuang -> Zhuang */
+/*{"zgh", HB_TAG('Z','G','H',' ')},*/ /* Standard Moroccan Tamazight */
+ {"zgm", HB_TAG('Z','H','A',' ')}, /* Minz Zhuang -> Zhuang */
+ {"zgn", HB_TAG('Z','H','A',' ')}, /* Guibian Zhuang -> Zhuang */
+ {"zh", HB_TAG('Z','H','S',' ')}, /* Chinese [macrolanguage] -> Chinese Simplified */
+ {"zhd", HB_TAG('Z','H','A',' ')}, /* Dai Zhuang -> Zhuang */
+ {"zhn", HB_TAG('Z','H','A',' ')}, /* Nong Zhuang -> Zhuang */
+ {"zlj", HB_TAG('Z','H','A',' ')}, /* Liujiang Zhuang -> Zhuang */
+ {"zlm", HB_TAG('M','L','Y',' ')}, /* Malay */
+ {"zln", HB_TAG('Z','H','A',' ')}, /* Lianshan Zhuang -> Zhuang */
+ {"zlq", HB_TAG('Z','H','A',' ')}, /* Liuqian Zhuang -> Zhuang */
+ {"zmi", HB_TAG('M','L','Y',' ')}, /* Negeri Sembilan Malay -> Malay */
+ {"zne", HB_TAG('Z','N','D',' ')}, /* Zande */
+ {"zom", HB_TAG('Q','I','N',' ')}, /* Zou -> Chin */
+ {"zqe", HB_TAG('Z','H','A',' ')}, /* Qiubei Zhuang -> Zhuang */
+ {"zsm", HB_TAG('M','L','Y',' ')}, /* Standard Malay -> Malay */
+ {"zu", HB_TAG('Z','U','L',' ')}, /* Zulu */
+ {"zum", HB_TAG('L','R','C',' ')}, /* Kumzari -> Luri */
+ {"zyb", HB_TAG('Z','H','A',' ')}, /* Yongbei Zhuang -> Zhuang */
+ {"zyg", HB_TAG('Z','H','A',' ')}, /* Yang Zhuang -> Zhuang */
+ {"zyj", HB_TAG('Z','H','A',' ')}, /* Youjiang Zhuang -> Zhuang */
+ {"zyn", HB_TAG('Z','H','A',' ')}, /* Yongnan Zhuang -> Zhuang */
+/*{"zza", HB_TAG('Z','Z','A',' ')},*/ /* Zazaki [macrolanguage] */
+ {"zzj", HB_TAG('Z','H','A',' ')}, /* Zuojiang Zhuang -> Zhuang */
+};
+
+/**
+ * hb_ot_tags_from_complex_language:
+ * @lang_str: a BCP 47 language tag to convert.
+ * @limit: a pointer to the end of the substring of @lang_str to consider for
+ * conversion.
+ * @count: maximum number of language tags to retrieve (IN) and actual number of
+ * language tags retrieved (OUT). If no tags are retrieved, it is not modified.
+ * @tags: array of size at least @language_count to store the language tag
+ * results
+ *
+ * Converts a multi-subtag BCP 47 language tag to language tags.
+ *
+ * Return value: Whether any language systems were retrieved.
+ **/
+static bool
+hb_ot_tags_from_complex_language (const char *lang_str,
+ const char *limit,
+ unsigned int *count /* IN/OUT */,
+ hb_tag_t *tags /* OUT */)
+{
+ if (subtag_matches (lang_str, limit, "-fonnapa"))
+ {
+ /* Undetermined; North American Phonetic Alphabet */
+ tags[0] = HB_TAG('A','P','P','H'); /* Phonetic transcription—Americanist conventions */
+ *count = 1;
+ return true;
+ }
+ if (subtag_matches (lang_str, limit, "-polyton"))
+ {
+ /* Modern Greek (1453-); Polytonic Greek */
+ tags[0] = HB_TAG('P','G','R',' '); /* Polytonic Greek */
+ *count = 1;
+ return true;
+ }
+ if (subtag_matches (lang_str, limit, "-provenc"))
+ {
+ /* Occitan (post 1500); Provençal */
+ tags[0] = HB_TAG('P','R','O',' '); /* Provençal / Old Provençal */
+ *count = 1;
+ return true;
+ }
+ if (subtag_matches (lang_str, limit, "-fonipa"))
+ {
+ /* Undetermined; International Phonetic Alphabet */
+ tags[0] = HB_TAG('I','P','P','H'); /* Phonetic transcription—IPA conventions */
+ *count = 1;
+ return true;
+ }
+ if (subtag_matches (lang_str, limit, "-geok"))
+ {
+ /* Undetermined; Khutsuri (Asomtavruli and Nuskhuri) */
+ tags[0] = HB_TAG('K','G','E',' '); /* Khutsuri Georgian */
+ *count = 1;
+ return true;
+ }
+ if (subtag_matches (lang_str, limit, "-syre"))
+ {
+ /* Undetermined; Syriac (Estrangelo variant) */
+ tags[0] = HB_TAG('S','Y','R','E'); /* Syriac, Estrangela script-variant (equivalent to ISO 15924 'Syre') */
+ *count = 1;
+ return true;
+ }
+ if (subtag_matches (lang_str, limit, "-syrj"))
+ {
+ /* Undetermined; Syriac (Western variant) */
+ tags[0] = HB_TAG('S','Y','R','J'); /* Syriac, Western script-variant (equivalent to ISO 15924 'Syrj') */
+ *count = 1;
+ return true;
+ }
+ if (subtag_matches (lang_str, limit, "-syrn"))
+ {
+ /* Undetermined; Syriac (Eastern variant) */
+ tags[0] = HB_TAG('S','Y','R','N'); /* Syriac, Eastern script-variant (equivalent to ISO 15924 'Syrn') */
+ *count = 1;
+ return true;
+ }
+ switch (lang_str[0])
+ {
+ case 'a':
+ if (0 == strcmp (&lang_str[1], "rt-lojban"))
+ {
+ /* Lojban */
+ tags[0] = HB_TAG('J','B','O',' '); /* Lojban */
+ *count = 1;
+ return true;
+ }
+ break;
+ case 'c':
+ if (lang_matches (&lang_str[1], "do-hant-hk"))
+ {
+ /* Min Dong Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "do-hant-mo"))
+ {
+ /* Min Dong Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "jy-hant-hk"))
+ {
+ /* Jinyu Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "jy-hant-mo"))
+ {
+ /* Jinyu Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "mn-hant-hk"))
+ {
+ /* Mandarin Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "mn-hant-mo"))
+ {
+ /* Mandarin Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "np-hant-hk"))
+ {
+ /* Northern Ping Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "np-hant-mo"))
+ {
+ /* Northern Ping Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "px-hant-hk"))
+ {
+ /* Pu-Xian Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "px-hant-mo"))
+ {
+ /* Pu-Xian Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "sp-hant-hk"))
+ {
+ /* Southern Ping Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "sp-hant-mo"))
+ {
+ /* Southern Ping Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "zh-hant-hk"))
+ {
+ /* Huizhou Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "zh-hant-mo"))
+ {
+ /* Huizhou Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "zo-hant-hk"))
+ {
+ /* Min Zhong Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "zo-hant-mo"))
+ {
+ /* Min Zhong Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "do-hans"))
+ {
+ /* Min Dong Chinese */
+ tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "do-hant"))
+ {
+ /* Min Dong Chinese */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "jy-hans"))
+ {
+ /* Jinyu Chinese */
+ tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "jy-hant"))
+ {
+ /* Jinyu Chinese */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "mn-hans"))
+ {
+ /* Mandarin Chinese */
+ tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "mn-hant"))
+ {
+ /* Mandarin Chinese */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "np-hans"))
+ {
+ /* Northern Ping Chinese */
+ tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "np-hant"))
+ {
+ /* Northern Ping Chinese */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "px-hans"))
+ {
+ /* Pu-Xian Chinese */
+ tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "px-hant"))
+ {
+ /* Pu-Xian Chinese */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "sp-hans"))
+ {
+ /* Southern Ping Chinese */
+ tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "sp-hant"))
+ {
+ /* Southern Ping Chinese */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "zh-hans"))
+ {
+ /* Huizhou Chinese */
+ tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "zh-hant"))
+ {
+ /* Huizhou Chinese */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "zo-hans"))
+ {
+ /* Min Zhong Chinese */
+ tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "zo-hant"))
+ {
+ /* Min Zhong Chinese */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "do-", 3)
+ && subtag_matches (lang_str, limit, "-hk"))
+ {
+ /* Min Dong Chinese; Hong Kong */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "do-", 3)
+ && subtag_matches (lang_str, limit, "-mo"))
+ {
+ /* Min Dong Chinese; Macao */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "do-", 3)
+ && subtag_matches (lang_str, limit, "-tw"))
+ {
+ /* Min Dong Chinese; Taiwan, Province of China */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "jy-", 3)
+ && subtag_matches (lang_str, limit, "-hk"))
+ {
+ /* Jinyu Chinese; Hong Kong */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "jy-", 3)
+ && subtag_matches (lang_str, limit, "-mo"))
+ {
+ /* Jinyu Chinese; Macao */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "jy-", 3)
+ && subtag_matches (lang_str, limit, "-tw"))
+ {
+ /* Jinyu Chinese; Taiwan, Province of China */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "mn-", 3)
+ && subtag_matches (lang_str, limit, "-hk"))
+ {
+ /* Mandarin Chinese; Hong Kong */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "mn-", 3)
+ && subtag_matches (lang_str, limit, "-mo"))
+ {
+ /* Mandarin Chinese; Macao */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "mn-", 3)
+ && subtag_matches (lang_str, limit, "-tw"))
+ {
+ /* Mandarin Chinese; Taiwan, Province of China */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "np-", 3)
+ && subtag_matches (lang_str, limit, "-hk"))
+ {
+ /* Northern Ping Chinese; Hong Kong */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "np-", 3)
+ && subtag_matches (lang_str, limit, "-mo"))
+ {
+ /* Northern Ping Chinese; Macao */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "np-", 3)
+ && subtag_matches (lang_str, limit, "-tw"))
+ {
+ /* Northern Ping Chinese; Taiwan, Province of China */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "px-", 3)
+ && subtag_matches (lang_str, limit, "-hk"))
+ {
+ /* Pu-Xian Chinese; Hong Kong */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "px-", 3)
+ && subtag_matches (lang_str, limit, "-mo"))
+ {
+ /* Pu-Xian Chinese; Macao */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "px-", 3)
+ && subtag_matches (lang_str, limit, "-tw"))
+ {
+ /* Pu-Xian Chinese; Taiwan, Province of China */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "sp-", 3)
+ && subtag_matches (lang_str, limit, "-hk"))
+ {
+ /* Southern Ping Chinese; Hong Kong */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "sp-", 3)
+ && subtag_matches (lang_str, limit, "-mo"))
+ {
+ /* Southern Ping Chinese; Macao */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "sp-", 3)
+ && subtag_matches (lang_str, limit, "-tw"))
+ {
+ /* Southern Ping Chinese; Taiwan, Province of China */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "zh-", 3)
+ && subtag_matches (lang_str, limit, "-hk"))
+ {
+ /* Huizhou Chinese; Hong Kong */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "zh-", 3)
+ && subtag_matches (lang_str, limit, "-mo"))
+ {
+ /* Huizhou Chinese; Macao */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "zh-", 3)
+ && subtag_matches (lang_str, limit, "-tw"))
+ {
+ /* Huizhou Chinese; Taiwan, Province of China */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "zo-", 3)
+ && subtag_matches (lang_str, limit, "-hk"))
+ {
+ /* Min Zhong Chinese; Hong Kong */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "zo-", 3)
+ && subtag_matches (lang_str, limit, "-mo"))
+ {
+ /* Min Zhong Chinese; Macao */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "zo-", 3)
+ && subtag_matches (lang_str, limit, "-tw"))
+ {
+ /* Min Zhong Chinese; Taiwan, Province of China */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ break;
+ case 'g':
+ if (lang_matches (&lang_str[1], "an-hant-hk"))
+ {
+ /* Gan Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "an-hant-mo"))
+ {
+ /* Gan Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "an-hans"))
+ {
+ /* Gan Chinese */
+ tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "an-hant"))
+ {
+ /* Gan Chinese */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "a-latg"))
+ {
+ /* Irish */
+ tags[0] = HB_TAG('I','R','T',' '); /* Irish Traditional */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "an-", 3)
+ && subtag_matches (lang_str, limit, "-hk"))
+ {
+ /* Gan Chinese; Hong Kong */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "an-", 3)
+ && subtag_matches (lang_str, limit, "-mo"))
+ {
+ /* Gan Chinese; Macao */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "an-", 3)
+ && subtag_matches (lang_str, limit, "-tw"))
+ {
+ /* Gan Chinese; Taiwan, Province of China */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ break;
+ case 'h':
+ if (lang_matches (&lang_str[1], "ak-hant-hk"))
+ {
+ /* Hakka Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "ak-hant-mo"))
+ {
+ /* Hakka Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "sn-hant-hk"))
+ {
+ /* Xiang Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "sn-hant-mo"))
+ {
+ /* Xiang Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "ak-hans"))
+ {
+ /* Hakka Chinese */
+ tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "ak-hant"))
+ {
+ /* Hakka Chinese */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "sn-hans"))
+ {
+ /* Xiang Chinese */
+ tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "sn-hant"))
+ {
+ /* Xiang Chinese */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "ak-", 3)
+ && subtag_matches (lang_str, limit, "-hk"))
+ {
+ /* Hakka Chinese; Hong Kong */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "ak-", 3)
+ && subtag_matches (lang_str, limit, "-mo"))
+ {
+ /* Hakka Chinese; Macao */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "ak-", 3)
+ && subtag_matches (lang_str, limit, "-tw"))
+ {
+ /* Hakka Chinese; Taiwan, Province of China */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "sn-", 3)
+ && subtag_matches (lang_str, limit, "-hk"))
+ {
+ /* Xiang Chinese; Hong Kong */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "sn-", 3)
+ && subtag_matches (lang_str, limit, "-mo"))
+ {
+ /* Xiang Chinese; Macao */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "sn-", 3)
+ && subtag_matches (lang_str, limit, "-tw"))
+ {
+ /* Xiang Chinese; Taiwan, Province of China */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ break;
+ case 'i':
+ if (0 == strcmp (&lang_str[1], "-navajo"))
+ {
+ /* Navajo */
+ unsigned int i;
+ hb_tag_t possible_tags[] = {
+ HB_TAG('N','A','V',' '), /* Navajo */
+ HB_TAG('A','T','H',' '), /* Athapaskan */
+ };
+ for (i = 0; i < 2 && i < *count; i++)
+ tags[i] = possible_tags[i];
+ *count = i;
+ return true;
+ }
+ if (0 == strcmp (&lang_str[1], "-hak"))
+ {
+ /* Hakka */
+ tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */
+ *count = 1;
+ return true;
+ }
+ if (0 == strcmp (&lang_str[1], "-lux"))
+ {
+ /* Luxembourgish */
+ tags[0] = HB_TAG('L','T','Z',' '); /* Luxembourgish */
+ *count = 1;
+ return true;
+ }
+ break;
+ case 'l':
+ if (lang_matches (&lang_str[1], "zh-hans"))
+ {
+ /* Literary Chinese */
+ tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */
+ *count = 1;
+ return true;
+ }
+ break;
+ case 'm':
+ if (lang_matches (&lang_str[1], "np-hant-hk"))
+ {
+ /* Min Bei Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "np-hant-mo"))
+ {
+ /* Min Bei Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "np-hans"))
+ {
+ /* Min Bei Chinese */
+ tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "np-hant"))
+ {
+ /* Min Bei Chinese */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "np-", 3)
+ && subtag_matches (lang_str, limit, "-hk"))
+ {
+ /* Min Bei Chinese; Hong Kong */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "np-", 3)
+ && subtag_matches (lang_str, limit, "-mo"))
+ {
+ /* Min Bei Chinese; Macao */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "np-", 3)
+ && subtag_matches (lang_str, limit, "-tw"))
+ {
+ /* Min Bei Chinese; Taiwan, Province of China */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ break;
+ case 'n':
+ if (lang_matches (&lang_str[1], "an-hant-hk"))
+ {
+ /* Min Nan Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "an-hant-mo"))
+ {
+ /* Min Nan Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "an-hans"))
+ {
+ /* Min Nan Chinese */
+ tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "an-hant"))
+ {
+ /* Min Nan Chinese */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "an-", 3)
+ && subtag_matches (lang_str, limit, "-hk"))
+ {
+ /* Min Nan Chinese; Hong Kong */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "an-", 3)
+ && subtag_matches (lang_str, limit, "-mo"))
+ {
+ /* Min Nan Chinese; Macao */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "an-", 3)
+ && subtag_matches (lang_str, limit, "-tw"))
+ {
+ /* Min Nan Chinese; Taiwan, Province of China */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ if (0 == strcmp (&lang_str[1], "o-bok"))
+ {
+ /* Norwegian Bokmal */
+ tags[0] = HB_TAG('N','O','R',' '); /* Norwegian */
+ *count = 1;
+ return true;
+ }
+ if (0 == strcmp (&lang_str[1], "o-nyn"))
+ {
+ /* Norwegian Nynorsk */
+ tags[0] = HB_TAG('N','Y','N',' '); /* Norwegian Nynorsk (Nynorsk, Norwegian) */
+ *count = 1;
+ return true;
+ }
+ break;
+ case 'r':
+ if (0 == strncmp (&lang_str[1], "o-", 2)
+ && subtag_matches (lang_str, limit, "-md"))
+ {
+ /* Romanian; Moldova */
+ tags[0] = HB_TAG('M','O','L',' '); /* Moldavian */
+ *count = 1;
+ return true;
+ }
+ break;
+ case 'w':
+ if (lang_matches (&lang_str[1], "uu-hant-hk"))
+ {
+ /* Wu Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "uu-hant-mo"))
+ {
+ /* Wu Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "uu-hans"))
+ {
+ /* Wu Chinese */
+ tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "uu-hant"))
+ {
+ /* Wu Chinese */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "uu-", 3)
+ && subtag_matches (lang_str, limit, "-hk"))
+ {
+ /* Wu Chinese; Hong Kong */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "uu-", 3)
+ && subtag_matches (lang_str, limit, "-mo"))
+ {
+ /* Wu Chinese; Macao */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "uu-", 3)
+ && subtag_matches (lang_str, limit, "-tw"))
+ {
+ /* Wu Chinese; Taiwan, Province of China */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ break;
+ case 'y':
+ if (lang_matches (&lang_str[1], "ue-hans"))
+ {
+ /* Yue Chinese */
+ tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */
+ *count = 1;
+ return true;
+ }
+ break;
+ case 'z':
+ if (lang_matches (&lang_str[1], "h-hant-hk"))
+ {
+ /* Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "h-hant-mo"))
+ {
+ /* Chinese */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strcmp (&lang_str[1], "h-min-nan"))
+ {
+ /* Minnan, Hokkien, Amoy, Taiwanese, Southern Min, Southern Fujian, Hoklo, Southern Fukien, Ho-lo */
+ tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "h-hans"))
+ {
+ /* Chinese */
+ tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */
+ *count = 1;
+ return true;
+ }
+ if (lang_matches (&lang_str[1], "h-hant"))
+ {
+ /* Chinese */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ if (0 == strcmp (&lang_str[1], "h-min"))
+ {
+ /* Min, Fuzhou, Hokkien, Amoy, or Taiwanese */
+ tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "h-", 2)
+ && subtag_matches (lang_str, limit, "-hk"))
+ {
+ /* Chinese; Hong Kong */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "h-", 2)
+ && subtag_matches (lang_str, limit, "-mo"))
+ {
+ /* Chinese; Macao */
+ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */
+ *count = 1;
+ return true;
+ }
+ if (0 == strncmp (&lang_str[1], "h-", 2)
+ && subtag_matches (lang_str, limit, "-tw"))
+ {
+ /* Chinese; Taiwan, Province of China */
+ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */
+ *count = 1;
+ return true;
+ }
+ break;
+ }
+ return false;
+}
+
+/**
+ * hb_ot_ambiguous_tag_to_language
+ * @tag: A language tag.
+ *
+ * Converts @tag to a BCP 47 language tag if it is ambiguous (it corresponds to
+ * many language tags) and the best tag is not the alphabetically first, or if
+ * the best tag consists of multiple subtags, or if the best tag does not appear
+ * in #ot_languages.
+ *
+ * Return value: The #hb_language_t corresponding to the BCP 47 language tag,
+ * or #HB_LANGUAGE_INVALID if @tag is not ambiguous.
+ **/
+static hb_language_t
+hb_ot_ambiguous_tag_to_language (hb_tag_t tag)
+{
+ switch (tag)
+ {
+ case HB_TAG('A','L','T',' '): /* Altai */
+ return hb_language_from_string ("alt", -1); /* Southern Altai */
+ case HB_TAG('A','P','P','H'): /* Phonetic transcription—Americanist conventions */
+ return hb_language_from_string ("und-fonnapa", -1); /* Undetermined; North American Phonetic Alphabet */
+ case HB_TAG('A','R','A',' '): /* Arabic */
+ return hb_language_from_string ("ar", -1); /* Arabic */
+ case HB_TAG('A','R','K',' '): /* Rakhine */
+ return hb_language_from_string ("rki", -1); /* Rakhine */
+ case HB_TAG('A','T','H',' '): /* Athapaskan */
+ return hb_language_from_string ("ath", -1); /* Athapascan */
+ case HB_TAG('B','I','K',' '): /* Bikol */
+ return hb_language_from_string ("bik", -1); /* Bikol */
+ case HB_TAG('C','P','P',' '): /* Creoles */
+ return hb_language_from_string ("crp", -1); /* Creoles and pidgins */
+ case HB_TAG('C','R','R',' '): /* Carrier */
+ return hb_language_from_string ("crx", -1); /* Carrier */
+ case HB_TAG('D','N','K',' '): /* Dinka */
+ return hb_language_from_string ("din", -1); /* Dinka */
+ case HB_TAG('D','R','I',' '): /* Dari */
+ return hb_language_from_string ("prs", -1); /* Dari */
+ case HB_TAG('D','Z','N',' '): /* Dzongkha */
+ return hb_language_from_string ("dz", -1); /* Dzongkha */
+ case HB_TAG('E','T','I',' '): /* Estonian */
+ return hb_language_from_string ("et", -1); /* Estonian */
+ case HB_TAG('G','O','N',' '): /* Gondi */
+ return hb_language_from_string ("gon", -1); /* Gondi */
+ case HB_TAG('H','M','N',' '): /* Hmong */
+ return hb_language_from_string ("hmn", -1); /* Hmong */
+ case HB_TAG('H','N','D',' '): /* Hindko */
+ return hb_language_from_string ("hnd", -1); /* Southern Hindko */
+ case HB_TAG('I','J','O',' '): /* Ijo */
+ return hb_language_from_string ("ijo", -1); /* Ijo */
+ case HB_TAG('I','N','U',' '): /* Inuktitut */
+ return hb_language_from_string ("iu", -1); /* Inuktitut */
+ case HB_TAG('I','P','K',' '): /* Inupiat */
+ return hb_language_from_string ("ik", -1); /* Inupiaq */
+ case HB_TAG('I','P','P','H'): /* Phonetic transcription—IPA conventions */
+ return hb_language_from_string ("und-fonipa", -1); /* Undetermined; International Phonetic Alphabet */
+ case HB_TAG('I','R','T',' '): /* Irish Traditional */
+ return hb_language_from_string ("ga-Latg", -1); /* Irish; Latin (Gaelic variant) */
+ case HB_TAG('J','I','I',' '): /* Yiddish */
+ return hb_language_from_string ("yi", -1); /* Yiddish */
+ case HB_TAG('K','A','L',' '): /* Kalenjin */
+ return hb_language_from_string ("kln", -1); /* Kalenjin */
+ case HB_TAG('K','G','E',' '): /* Khutsuri Georgian */
+ return hb_language_from_string ("und-Geok", -1); /* Undetermined; Khutsuri (Asomtavruli and Nuskhuri) */
+ case HB_TAG('K','N','R',' '): /* Kanuri */
+ return hb_language_from_string ("kr", -1); /* Kanuri */
+ case HB_TAG('K','O','K',' '): /* Konkani */
+ return hb_language_from_string ("kok", -1); /* Konkani */
+ case HB_TAG('K','U','I',' '): /* Kui */
+ return hb_language_from_string ("uki", -1); /* Kui (India) */
+ case HB_TAG('K','U','R',' '): /* Kurdish */
+ return hb_language_from_string ("ku", -1); /* Kurdish */
+ case HB_TAG('L','U','H',' '): /* Luyia */
+ return hb_language_from_string ("luy", -1); /* Luyia */
+ case HB_TAG('L','V','I',' '): /* Latvian */
+ return hb_language_from_string ("lv", -1); /* Latvian */
+ case HB_TAG('M','A','W',' '): /* Marwari */
+ return hb_language_from_string ("mwr", -1); /* Marwari */
+ case HB_TAG('M','L','G',' '): /* Malagasy */
+ return hb_language_from_string ("mg", -1); /* Malagasy */
+ case HB_TAG('M','L','Y',' '): /* Malay */
+ return hb_language_from_string ("ms", -1); /* Malay */
+ case HB_TAG('M','N','G',' '): /* Mongolian */
+ return hb_language_from_string ("mn", -1); /* Mongolian */
+ case HB_TAG('M','O','L',' '): /* Moldavian */
+ return hb_language_from_string ("ro-MD", -1); /* Romanian; Moldova */
+ case HB_TAG('N','E','P',' '): /* Nepali */
+ return hb_language_from_string ("ne", -1); /* Nepali */
+ case HB_TAG('N','I','S',' '): /* Nisi */
+ return hb_language_from_string ("njz", -1); /* Nyishi */
+ case HB_TAG('N','O','R',' '): /* Norwegian */
+ return hb_language_from_string ("no", -1); /* Norwegian */
+ case HB_TAG('O','J','B',' '): /* Ojibway */
+ return hb_language_from_string ("oj", -1); /* Ojibwa */
+ case HB_TAG('O','R','O',' '): /* Oromo */
+ return hb_language_from_string ("om", -1); /* Oromo */
+ case HB_TAG('P','A','S',' '): /* Pashto */
+ return hb_language_from_string ("ps", -1); /* Pashto */
+ case HB_TAG('P','G','R',' '): /* Polytonic Greek */
+ return hb_language_from_string ("el-polyton", -1); /* Modern Greek (1453-); Polytonic Greek */
+ case HB_TAG('P','R','O',' '): /* Provençal / Old Provençal */
+ return hb_language_from_string ("pro", -1); /* Old Provençal (to 1500) */
+ case HB_TAG('Q','U','H',' '): /* Quechua (Bolivia) */
+ return hb_language_from_string ("quh", -1); /* South Bolivian Quechua */
+ case HB_TAG('Q','V','I',' '): /* Quechua (Ecuador) */
+ return hb_language_from_string ("qvi", -1); /* Imbabura Highland Quichua */
+ case HB_TAG('Q','W','H',' '): /* Quechua (Peru) */
+ return hb_language_from_string ("qwh", -1); /* Huaylas Ancash Quechua */
+ case HB_TAG('R','A','J',' '): /* Rajasthani */
+ return hb_language_from_string ("raj", -1); /* Rajasthani */
+ case HB_TAG('R','O','Y',' '): /* Romany */
+ return hb_language_from_string ("rom", -1); /* Romany */
+ case HB_TAG('S','Q','I',' '): /* Albanian */
+ return hb_language_from_string ("sq", -1); /* Albanian */
+ case HB_TAG('S','Y','R',' '): /* Syriac */
+ return hb_language_from_string ("syr", -1); /* Syriac */
+ case HB_TAG('S','Y','R','E'): /* Syriac, Estrangela script-variant (equivalent to ISO 15924 'Syre') */
+ return hb_language_from_string ("und-Syre", -1); /* Undetermined; Syriac (Estrangelo variant) */
+ case HB_TAG('S','Y','R','J'): /* Syriac, Western script-variant (equivalent to ISO 15924 'Syrj') */
+ return hb_language_from_string ("und-Syrj", -1); /* Undetermined; Syriac (Western variant) */
+ case HB_TAG('S','Y','R','N'): /* Syriac, Eastern script-variant (equivalent to ISO 15924 'Syrn') */
+ return hb_language_from_string ("und-Syrn", -1); /* Undetermined; Syriac (Eastern variant) */
+ case HB_TAG('T','M','H',' '): /* Tamashek */
+ return hb_language_from_string ("tmh", -1); /* Tamashek */
+ case HB_TAG('T','N','E',' '): /* Tundra Nenets */
+ return hb_language_from_string ("yrk", -1); /* Nenets */
+ case HB_TAG('Z','H','H',' '): /* Chinese, Hong Kong SAR */
+ return hb_language_from_string ("zh-HK", -1); /* Chinese; Hong Kong */
+ case HB_TAG('Z','H','S',' '): /* Chinese Simplified */
+ return hb_language_from_string ("zh-Hans", -1); /* Chinese; Han (Simplified variant) */
+ case HB_TAG('Z','H','T',' '): /* Chinese Traditional */
+ return hb_language_from_string ("zh-Hant", -1); /* Chinese; Han (Traditional variant) */
+ default:
+ return HB_LANGUAGE_INVALID;
+ }
+}
+
+#endif /* HB_OT_TAG_TABLE_HH */
+
+/* == End of generated table == */
diff --git a/thirdparty/harfbuzz/src/hb-ot-tag.cc b/thirdparty/harfbuzz/src/hb-ot-tag.cc
new file mode 100644
index 0000000000..7ec91c5815
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-tag.cc
@@ -0,0 +1,567 @@
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ * Copyright © 2011 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod, Roozbeh Pournader
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_OT_TAG
+
+
+/* hb_script_t */
+
+static hb_tag_t
+hb_ot_old_tag_from_script (hb_script_t script)
+{
+ /* This seems to be accurate as of end of 2012. */
+
+ switch ((hb_tag_t) script)
+ {
+ case HB_SCRIPT_INVALID: return HB_OT_TAG_DEFAULT_SCRIPT;
+
+ /* KATAKANA and HIRAGANA both map to 'kana' */
+ case HB_SCRIPT_HIRAGANA: return HB_TAG('k','a','n','a');
+
+ /* Spaces at the end are preserved, unlike ISO 15924 */
+ case HB_SCRIPT_LAO: return HB_TAG('l','a','o',' ');
+ case HB_SCRIPT_YI: return HB_TAG('y','i',' ',' ');
+ /* Unicode-5.0 additions */
+ case HB_SCRIPT_NKO: return HB_TAG('n','k','o',' ');
+ /* Unicode-5.1 additions */
+ case HB_SCRIPT_VAI: return HB_TAG('v','a','i',' ');
+ }
+
+ /* Else, just change first char to lowercase and return */
+ return ((hb_tag_t) script) | 0x20000000u;
+}
+
+static hb_script_t
+hb_ot_old_tag_to_script (hb_tag_t tag)
+{
+ if (unlikely (tag == HB_OT_TAG_DEFAULT_SCRIPT))
+ return HB_SCRIPT_INVALID;
+
+ /* This side of the conversion is fully algorithmic. */
+
+ /* Any spaces at the end of the tag are replaced by repeating the last
+ * letter. Eg 'nko ' -> 'Nkoo' */
+ if (unlikely ((tag & 0x0000FF00u) == 0x00002000u))
+ tag |= (tag >> 8) & 0x0000FF00u; /* Copy second letter to third */
+ if (unlikely ((tag & 0x000000FFu) == 0x00000020u))
+ tag |= (tag >> 8) & 0x000000FFu; /* Copy third letter to fourth */
+
+ /* Change first char to uppercase and return */
+ return (hb_script_t) (tag & ~0x20000000u);
+}
+
+static hb_tag_t
+hb_ot_new_tag_from_script (hb_script_t script)
+{
+ switch ((hb_tag_t) script) {
+ case HB_SCRIPT_BENGALI: return HB_TAG('b','n','g','2');
+ case HB_SCRIPT_DEVANAGARI: return HB_TAG('d','e','v','2');
+ case HB_SCRIPT_GUJARATI: return HB_TAG('g','j','r','2');
+ case HB_SCRIPT_GURMUKHI: return HB_TAG('g','u','r','2');
+ case HB_SCRIPT_KANNADA: return HB_TAG('k','n','d','2');
+ case HB_SCRIPT_MALAYALAM: return HB_TAG('m','l','m','2');
+ case HB_SCRIPT_ORIYA: return HB_TAG('o','r','y','2');
+ case HB_SCRIPT_TAMIL: return HB_TAG('t','m','l','2');
+ case HB_SCRIPT_TELUGU: return HB_TAG('t','e','l','2');
+ case HB_SCRIPT_MYANMAR: return HB_TAG('m','y','m','2');
+ }
+
+ return HB_OT_TAG_DEFAULT_SCRIPT;
+}
+
+static hb_script_t
+hb_ot_new_tag_to_script (hb_tag_t tag)
+{
+ switch (tag) {
+ case HB_TAG('b','n','g','2'): return HB_SCRIPT_BENGALI;
+ case HB_TAG('d','e','v','2'): return HB_SCRIPT_DEVANAGARI;
+ case HB_TAG('g','j','r','2'): return HB_SCRIPT_GUJARATI;
+ case HB_TAG('g','u','r','2'): return HB_SCRIPT_GURMUKHI;
+ case HB_TAG('k','n','d','2'): return HB_SCRIPT_KANNADA;
+ case HB_TAG('m','l','m','2'): return HB_SCRIPT_MALAYALAM;
+ case HB_TAG('o','r','y','2'): return HB_SCRIPT_ORIYA;
+ case HB_TAG('t','m','l','2'): return HB_SCRIPT_TAMIL;
+ case HB_TAG('t','e','l','2'): return HB_SCRIPT_TELUGU;
+ case HB_TAG('m','y','m','2'): return HB_SCRIPT_MYANMAR;
+ }
+
+ return HB_SCRIPT_UNKNOWN;
+}
+
+#ifndef HB_DISABLE_DEPRECATED
+void
+hb_ot_tags_from_script (hb_script_t script,
+ hb_tag_t *script_tag_1,
+ hb_tag_t *script_tag_2)
+{
+ unsigned int count = 2;
+ hb_tag_t tags[2];
+ hb_ot_tags_from_script_and_language (script, HB_LANGUAGE_INVALID, &count, tags, nullptr, nullptr);
+ *script_tag_1 = count > 0 ? tags[0] : HB_OT_TAG_DEFAULT_SCRIPT;
+ *script_tag_2 = count > 1 ? tags[1] : HB_OT_TAG_DEFAULT_SCRIPT;
+}
+#endif
+
+/*
+ * Complete list at:
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/scripttags
+ *
+ * Most of the script tags are the same as the ISO 15924 tag but lowercased.
+ * So we just do that, and handle the exceptional cases in a switch.
+ */
+
+static void
+hb_ot_all_tags_from_script (hb_script_t script,
+ unsigned int *count /* IN/OUT */,
+ hb_tag_t *tags /* OUT */)
+{
+ unsigned int i = 0;
+
+ hb_tag_t new_tag = hb_ot_new_tag_from_script (script);
+ if (unlikely (new_tag != HB_OT_TAG_DEFAULT_SCRIPT))
+ {
+ /* HB_SCRIPT_MYANMAR maps to 'mym2', but there is no 'mym3'. */
+ if (new_tag != HB_TAG('m','y','m','2'))
+ tags[i++] = new_tag | '3';
+ if (*count > i)
+ tags[i++] = new_tag;
+ }
+
+ if (*count > i)
+ {
+ hb_tag_t old_tag = hb_ot_old_tag_from_script (script);
+ if (old_tag != HB_OT_TAG_DEFAULT_SCRIPT)
+ tags[i++] = old_tag;
+ }
+
+ *count = i;
+}
+
+hb_script_t
+hb_ot_tag_to_script (hb_tag_t tag)
+{
+ unsigned char digit = tag & 0x000000FFu;
+ if (unlikely (digit == '2' || digit == '3'))
+ return hb_ot_new_tag_to_script (tag & 0xFFFFFF32);
+
+ return hb_ot_old_tag_to_script (tag);
+}
+
+
+/* hb_language_t */
+
+static bool
+subtag_matches (const char *lang_str,
+ const char *limit,
+ const char *subtag)
+{
+ do {
+ const char *s = strstr (lang_str, subtag);
+ if (!s || s >= limit)
+ return false;
+ if (!ISALNUM (s[strlen (subtag)]))
+ return true;
+ lang_str = s + strlen (subtag);
+ } while (true);
+}
+
+static hb_bool_t
+lang_matches (const char *lang_str, const char *spec)
+{
+ unsigned int len = strlen (spec);
+
+ return strncmp (lang_str, spec, len) == 0 &&
+ (lang_str[len] == '\0' || lang_str[len] == '-');
+}
+
+struct LangTag
+{
+ char language[4];
+ hb_tag_t tag;
+
+ int cmp (const char *a) const
+ {
+ const char *b = this->language;
+ unsigned int da, db;
+ const char *p;
+
+ p = strchr (a, '-');
+ da = p ? (unsigned int) (p - a) : strlen (a);
+
+ p = strchr (b, '-');
+ db = p ? (unsigned int) (p - b) : strlen (b);
+
+ return strncmp (a, b, hb_max (da, db));
+ }
+ int cmp (const LangTag *that) const
+ { return cmp (that->language); }
+};
+
+#include "hb-ot-tag-table.hh"
+
+/* The corresponding languages IDs for the following IDs are unclear,
+ * overlap, or are architecturally weird. Needs more research. */
+
+/*{"??", {HB_TAG('B','C','R',' ')}},*/ /* Bible Cree */
+/*{"zh?", {HB_TAG('C','H','N',' ')}},*/ /* Chinese (seen in Microsoft fonts) */
+/*{"ar-Syrc?", {HB_TAG('G','A','R',' ')}},*/ /* Garshuni */
+/*{"??", {HB_TAG('N','G','R',' ')}},*/ /* Nagari */
+/*{"??", {HB_TAG('Y','I','C',' ')}},*/ /* Yi Classic */
+/*{"zh?", {HB_TAG('Z','H','P',' ')}},*/ /* Chinese Phonetic */
+
+#ifndef HB_DISABLE_DEPRECATED
+hb_tag_t
+hb_ot_tag_from_language (hb_language_t language)
+{
+ unsigned int count = 1;
+ hb_tag_t tags[1];
+ hb_ot_tags_from_script_and_language (HB_SCRIPT_UNKNOWN, language, nullptr, nullptr, &count, tags);
+ return count > 0 ? tags[0] : HB_OT_TAG_DEFAULT_LANGUAGE;
+}
+#endif
+
+static void
+hb_ot_tags_from_language (const char *lang_str,
+ const char *limit,
+ unsigned int *count,
+ hb_tag_t *tags)
+{
+ const char *s;
+ unsigned int tag_idx;
+
+ /* Check for matches of multiple subtags. */
+ if (hb_ot_tags_from_complex_language (lang_str, limit, count, tags))
+ return;
+
+ /* Find a language matching in the first component. */
+ s = strchr (lang_str, '-');
+ {
+ if (s && limit - lang_str >= 6)
+ {
+ const char *extlang_end = strchr (s + 1, '-');
+ /* If there is an extended language tag, use it. */
+ if (3 == (extlang_end ? extlang_end - s - 1 : strlen (s + 1)) &&
+ ISALPHA (s[1]))
+ lang_str = s + 1;
+ }
+ if (hb_sorted_array (ot_languages).bfind (lang_str, &tag_idx))
+ {
+ unsigned int i;
+ while (tag_idx != 0 &&
+ 0 == strcmp (ot_languages[tag_idx].language, ot_languages[tag_idx - 1].language))
+ tag_idx--;
+ for (i = 0;
+ i < *count &&
+ tag_idx + i < ARRAY_LENGTH (ot_languages) &&
+ 0 == strcmp (ot_languages[tag_idx + i].language, ot_languages[tag_idx].language);
+ i++)
+ tags[i] = ot_languages[tag_idx + i].tag;
+ *count = i;
+ return;
+ }
+ }
+
+ if (!s)
+ s = lang_str + strlen (lang_str);
+ if (s - lang_str == 3) {
+ /* Assume it's ISO-639-3 and upper-case and use it. */
+ tags[0] = hb_tag_from_string (lang_str, s - lang_str) & ~0x20202000u;
+ *count = 1;
+ return;
+ }
+
+ *count = 0;
+}
+
+static bool
+parse_private_use_subtag (const char *private_use_subtag,
+ unsigned int *count,
+ hb_tag_t *tags,
+ const char *prefix,
+ unsigned char (*normalize) (unsigned char))
+{
+#ifdef HB_NO_LANGUAGE_PRIVATE_SUBTAG
+ return false;
+#endif
+
+ if (!(private_use_subtag && count && tags && *count)) return false;
+
+ const char *s = strstr (private_use_subtag, prefix);
+ if (!s) return false;
+
+ char tag[4];
+ int i;
+ s += strlen (prefix);
+ if (s[0] == '-') {
+ s += 1;
+ char c;
+ for (i = 0; i < 8 && ISHEX (s[i]); i++)
+ {
+ c = FROMHEX (s[i]);
+ if (i % 2 == 0)
+ tag[i / 2] = c << 4;
+ else
+ tag[i / 2] += c;
+ }
+ if (i != 8) return false;
+ } else {
+ for (i = 0; i < 4 && ISALNUM (s[i]); i++)
+ tag[i] = normalize (s[i]);
+ if (!i) return false;
+
+ for (; i < 4; i++)
+ tag[i] = ' ';
+ }
+ tags[0] = HB_TAG (tag[0], tag[1], tag[2], tag[3]);
+ if ((tags[0] & 0xDFDFDFDF) == HB_OT_TAG_DEFAULT_SCRIPT)
+ tags[0] ^= ~0xDFDFDFDF;
+ *count = 1;
+ return true;
+}
+
+/**
+ * hb_ot_tags_from_script_and_language:
+ * @script: an #hb_script_t to convert.
+ * @language: an #hb_language_t to convert.
+ * @script_count: (allow-none): maximum number of script tags to retrieve (IN)
+ * and actual number of script tags retrieved (OUT)
+ * @script_tags: (out) (allow-none): array of size at least @script_count to store the
+ * script tag results
+ * @language_count: (allow-none): maximum number of language tags to retrieve
+ * (IN) and actual number of language tags retrieved (OUT)
+ * @language_tags: (out) (allow-none): array of size at least @language_count to store
+ * the language tag results
+ *
+ * Converts an #hb_script_t and an #hb_language_t to script and language tags.
+ *
+ * Since: 2.0.0
+ **/
+void
+hb_ot_tags_from_script_and_language (hb_script_t script,
+ hb_language_t language,
+ unsigned int *script_count /* IN/OUT */,
+ hb_tag_t *script_tags /* OUT */,
+ unsigned int *language_count /* IN/OUT */,
+ hb_tag_t *language_tags /* OUT */)
+{
+ bool needs_script = true;
+
+ if (language == HB_LANGUAGE_INVALID)
+ {
+ if (language_count && language_tags && *language_count)
+ *language_count = 0;
+ }
+ else
+ {
+ const char *lang_str, *s, *limit, *private_use_subtag;
+ bool needs_language;
+
+ lang_str = hb_language_to_string (language);
+ limit = nullptr;
+ private_use_subtag = nullptr;
+ if (lang_str[0] == 'x' && lang_str[1] == '-')
+ {
+ private_use_subtag = lang_str;
+ } else {
+ for (s = lang_str + 1; *s; s++)
+ {
+ if (s[-1] == '-' && s[1] == '-')
+ {
+ if (s[0] == 'x')
+ {
+ private_use_subtag = s;
+ if (!limit)
+ limit = s - 1;
+ break;
+ } else if (!limit)
+ {
+ limit = s - 1;
+ }
+ }
+ }
+ if (!limit)
+ limit = s;
+ }
+
+ needs_script = !parse_private_use_subtag (private_use_subtag, script_count, script_tags, "-hbsc", TOLOWER);
+ needs_language = !parse_private_use_subtag (private_use_subtag, language_count, language_tags, "-hbot", TOUPPER);
+
+ if (needs_language && language_count && language_tags && *language_count)
+ hb_ot_tags_from_language (lang_str, limit, language_count, language_tags);
+ }
+
+ if (needs_script && script_count && script_tags && *script_count)
+ hb_ot_all_tags_from_script (script, script_count, script_tags);
+}
+
+/**
+ * hb_ot_tag_to_language:
+ *
+ *
+ *
+ * Return value: (transfer none):
+ *
+ * Since: 0.9.2
+ **/
+hb_language_t
+hb_ot_tag_to_language (hb_tag_t tag)
+{
+ unsigned int i;
+
+ if (tag == HB_OT_TAG_DEFAULT_LANGUAGE)
+ return nullptr;
+
+ {
+ hb_language_t disambiguated_tag = hb_ot_ambiguous_tag_to_language (tag);
+ if (disambiguated_tag != HB_LANGUAGE_INVALID)
+ return disambiguated_tag;
+ }
+
+ for (i = 0; i < ARRAY_LENGTH (ot_languages); i++)
+ if (ot_languages[i].tag == tag)
+ return hb_language_from_string (ot_languages[i].language, -1);
+
+ /* Return a custom language in the form of "x-hbot-AABBCCDD".
+ * If it's three letters long, also guess it's ISO 639-3 and lower-case and
+ * prepend it (if it's not a registered tag, the private use subtags will
+ * ensure that calling hb_ot_tag_from_language on the result will still return
+ * the same tag as the original tag).
+ */
+ {
+ char buf[20];
+ char *str = buf;
+ if (ISALPHA (tag >> 24)
+ && ISALPHA ((tag >> 16) & 0xFF)
+ && ISALPHA ((tag >> 8) & 0xFF)
+ && (tag & 0xFF) == ' ')
+ {
+ buf[0] = TOLOWER (tag >> 24);
+ buf[1] = TOLOWER ((tag >> 16) & 0xFF);
+ buf[2] = TOLOWER ((tag >> 8) & 0xFF);
+ buf[3] = '-';
+ str += 4;
+ }
+ snprintf (str, 16, "x-hbot-%08x", tag);
+ return hb_language_from_string (&*buf, -1);
+ }
+}
+
+/**
+ * hb_ot_tags_to_script_and_language:
+ * @script_tag: a script tag
+ * @language_tag: a language tag
+ * @script: (allow-none): the #hb_script_t corresponding to @script_tag (OUT).
+ * @language: (allow-none): the #hb_language_t corresponding to @script_tag and
+ * @language_tag (OUT).
+ *
+ * Converts a script tag and a language tag to an #hb_script_t and an
+ * #hb_language_t.
+ *
+ * Since: 2.0.0
+ **/
+void
+hb_ot_tags_to_script_and_language (hb_tag_t script_tag,
+ hb_tag_t language_tag,
+ hb_script_t *script /* OUT */,
+ hb_language_t *language /* OUT */)
+{
+ hb_script_t script_out = hb_ot_tag_to_script (script_tag);
+ if (script)
+ *script = script_out;
+ if (language)
+ {
+ unsigned int script_count = 1;
+ hb_tag_t primary_script_tag[1];
+ hb_ot_tags_from_script_and_language (script_out,
+ HB_LANGUAGE_INVALID,
+ &script_count,
+ primary_script_tag,
+ nullptr, nullptr);
+ *language = hb_ot_tag_to_language (language_tag);
+ if (script_count == 0 || primary_script_tag[0] != script_tag)
+ {
+ unsigned char *buf;
+ const char *lang_str = hb_language_to_string (*language);
+ size_t len = strlen (lang_str);
+ buf = (unsigned char *) malloc (len + 16);
+ if (unlikely (!buf))
+ {
+ *language = nullptr;
+ }
+ else
+ {
+ int shift;
+ memcpy (buf, lang_str, len);
+ if (lang_str[0] != 'x' || lang_str[1] != '-') {
+ buf[len++] = '-';
+ buf[len++] = 'x';
+ }
+ buf[len++] = '-';
+ buf[len++] = 'h';
+ buf[len++] = 'b';
+ buf[len++] = 's';
+ buf[len++] = 'c';
+ buf[len++] = '-';
+ for (shift = 28; shift >= 0; shift -= 4)
+ buf[len++] = TOHEX (script_tag >> shift);
+ *language = hb_language_from_string ((char *) buf, len);
+ free (buf);
+ }
+ }
+ }
+}
+
+#ifdef MAIN
+static inline void
+test_langs_sorted ()
+{
+ for (unsigned int i = 1; i < ARRAY_LENGTH (ot_languages); i++)
+ {
+ int c = ot_languages[i].cmp (&ot_languages[i - 1]);
+ if (c > 0)
+ {
+ fprintf (stderr, "ot_languages not sorted at index %d: %s %d %s\n",
+ i, ot_languages[i-1].language, c, ot_languages[i].language);
+ abort();
+ }
+ }
+}
+
+int
+main ()
+{
+ test_langs_sorted ();
+ return 0;
+}
+
+#endif
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-ot-var-avar-table.hh b/thirdparty/harfbuzz/src/hb-ot-var-avar-table.hh
new file mode 100644
index 0000000000..29219adb0a
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-var-avar-table.hh
@@ -0,0 +1,169 @@
+/*
+ * Copyright © 2017 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_VAR_AVAR_TABLE_HH
+#define HB_OT_VAR_AVAR_TABLE_HH
+
+#include "hb-open-type.hh"
+
+/*
+ * avar -- Axis Variations
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/avar
+ */
+
+#define HB_OT_TAG_avar HB_TAG('a','v','a','r')
+
+
+namespace OT {
+
+
+struct AxisValueMap
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ public:
+ F2DOT14 coords[2];
+// F2DOT14 fromCoord; /* A normalized coordinate value obtained using
+// * default normalization. */
+// F2DOT14 toCoord; /* The modified, normalized coordinate value. */
+
+ public:
+ DEFINE_SIZE_STATIC (4);
+};
+
+struct SegmentMaps : ArrayOf<AxisValueMap>
+{
+ int map (int value, unsigned int from_offset = 0, unsigned int to_offset = 1) const
+ {
+#define fromCoord coords[from_offset]
+#define toCoord coords[to_offset]
+ /* The following special-cases are not part of OpenType, which requires
+ * that at least -1, 0, and +1 must be mapped. But we include these as
+ * part of a better error recovery scheme. */
+ if (len < 2)
+ {
+ if (!len)
+ return value;
+ else /* len == 1*/
+ return value - arrayZ[0].fromCoord + arrayZ[0].toCoord;
+ }
+
+ if (value <= arrayZ[0].fromCoord)
+ return value - arrayZ[0].fromCoord + arrayZ[0].toCoord;
+
+ unsigned int i;
+ unsigned int count = len - 1;
+ for (i = 1; i < count && value > arrayZ[i].fromCoord; i++)
+ ;
+
+ if (value >= arrayZ[i].fromCoord)
+ return value - arrayZ[i].fromCoord + arrayZ[i].toCoord;
+
+ if (unlikely (arrayZ[i-1].fromCoord == arrayZ[i].fromCoord))
+ return arrayZ[i-1].toCoord;
+
+ int denom = arrayZ[i].fromCoord - arrayZ[i-1].fromCoord;
+ return roundf (arrayZ[i-1].toCoord + ((float) (arrayZ[i].toCoord - arrayZ[i-1].toCoord) *
+ (value - arrayZ[i-1].fromCoord)) / denom);
+#undef toCoord
+#undef fromCoord
+ }
+
+ int unmap (int value) const { return map (value, 1, 0); }
+
+ public:
+ DEFINE_SIZE_ARRAY (2, *this);
+};
+
+struct avar
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_avar;
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ if (unlikely (!(version.sanitize (c) &&
+ version.major == 1 &&
+ c->check_struct (this))))
+ return_trace (false);
+
+ const SegmentMaps *map = &firstAxisSegmentMaps;
+ unsigned int count = axisCount;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ if (unlikely (!map->sanitize (c)))
+ return_trace (false);
+ map = &StructAfter<SegmentMaps> (*map);
+ }
+
+ return_trace (true);
+ }
+
+ void map_coords (int *coords, unsigned int coords_length) const
+ {
+ unsigned int count = hb_min (coords_length, axisCount);
+
+ const SegmentMaps *map = &firstAxisSegmentMaps;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ coords[i] = map->map (coords[i]);
+ map = &StructAfter<SegmentMaps> (*map);
+ }
+ }
+
+ void unmap_coords (int *coords, unsigned int coords_length) const
+ {
+ unsigned int count = hb_min (coords_length, axisCount);
+
+ const SegmentMaps *map = &firstAxisSegmentMaps;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ coords[i] = map->unmap (coords[i]);
+ map = &StructAfter<SegmentMaps> (*map);
+ }
+ }
+
+ protected:
+ FixedVersion<>version; /* Version of the avar table
+ * initially set to 0x00010000u */
+ HBUINT16 reserved; /* This field is permanently reserved. Set to 0. */
+ HBUINT16 axisCount; /* The number of variation axes in the font. This
+ * must be the same number as axisCount in the
+ * 'fvar' table. */
+ SegmentMaps firstAxisSegmentMaps;
+
+ public:
+ DEFINE_SIZE_MIN (8);
+};
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_VAR_AVAR_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-var-fvar-table.hh b/thirdparty/harfbuzz/src/hb-ot-var-fvar-table.hh
new file mode 100644
index 0000000000..f9e933fb2b
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-var-fvar-table.hh
@@ -0,0 +1,327 @@
+/*
+ * Copyright © 2017 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_VAR_FVAR_TABLE_HH
+#define HB_OT_VAR_FVAR_TABLE_HH
+
+#include "hb-open-type.hh"
+
+/*
+ * fvar -- Font Variations
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/fvar
+ */
+
+#define HB_OT_TAG_fvar HB_TAG('f','v','a','r')
+
+
+namespace OT {
+
+
+struct InstanceRecord
+{
+ friend struct fvar;
+
+ hb_array_t<const HBFixed> get_coordinates (unsigned int axis_count) const
+ { return coordinatesZ.as_array (axis_count); }
+
+ bool sanitize (hb_sanitize_context_t *c, unsigned int axis_count) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ c->check_array (coordinatesZ.arrayZ, axis_count));
+ }
+
+ protected:
+ NameID subfamilyNameID;/* The name ID for entries in the 'name' table
+ * that provide subfamily names for this instance. */
+ HBUINT16 flags; /* Reserved for future use — set to 0. */
+ UnsizedArrayOf<HBFixed>
+ coordinatesZ; /* The coordinates array for this instance. */
+ //NameID postScriptNameIDX;/*Optional. The name ID for entries in the 'name'
+ // * table that provide PostScript names for this
+ // * instance. */
+
+ public:
+ DEFINE_SIZE_UNBOUNDED (4);
+};
+
+struct AxisRecord
+{
+ int cmp (hb_tag_t key) const { return axisTag.cmp (key); }
+
+ enum
+ {
+ AXIS_FLAG_HIDDEN = 0x0001,
+ };
+
+#ifndef HB_DISABLE_DEPRECATED
+ void get_axis_deprecated (hb_ot_var_axis_t *info) const
+ {
+ info->tag = axisTag;
+ info->name_id = axisNameID;
+ get_coordinates (info->min_value, info->default_value, info->max_value);
+ }
+#endif
+
+ void get_axis_info (unsigned axis_index, hb_ot_var_axis_info_t *info) const
+ {
+ info->axis_index = axis_index;
+ info->tag = axisTag;
+ info->name_id = axisNameID;
+ info->flags = (hb_ot_var_axis_flags_t) (unsigned int) flags;
+ get_coordinates (info->min_value, info->default_value, info->max_value);
+ info->reserved = 0;
+ }
+
+ int normalize_axis_value (float v) const
+ {
+ float min_value, default_value, max_value;
+ get_coordinates (min_value, default_value, max_value);
+
+ v = hb_clamp (v, min_value, max_value);
+
+ if (v == default_value)
+ return 0;
+ else if (v < default_value)
+ v = (v - default_value) / (default_value - min_value);
+ else
+ v = (v - default_value) / (max_value - default_value);
+ return roundf (v * 16384.f);
+ }
+
+ float unnormalize_axis_value (int v) const
+ {
+ float min_value, default_value, max_value;
+ get_coordinates (min_value, default_value, max_value);
+
+ if (v == 0)
+ return default_value;
+ else if (v < 0)
+ return v * (default_value - min_value) / 16384.f + default_value;
+ else
+ return v * (max_value - default_value) / 16384.f + default_value;
+ }
+
+ hb_ot_name_id_t get_name_id () const { return axisNameID; }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ protected:
+ void get_coordinates (float &min, float &default_, float &max) const
+ {
+ default_ = defaultValue / 65536.f;
+ /* Ensure order, to simplify client math. */
+ min = hb_min (default_, minValue / 65536.f);
+ max = hb_max (default_, maxValue / 65536.f);
+ }
+
+ protected:
+ Tag axisTag; /* Tag identifying the design variation for the axis. */
+ HBFixed minValue; /* The minimum coordinate value for the axis. */
+ HBFixed defaultValue; /* The default coordinate value for the axis. */
+ HBFixed maxValue; /* The maximum coordinate value for the axis. */
+ HBUINT16 flags; /* Axis flags. */
+ NameID axisNameID; /* The name ID for entries in the 'name' table that
+ * provide a display name for this axis. */
+
+ public:
+ DEFINE_SIZE_STATIC (20);
+};
+
+struct fvar
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_fvar;
+
+ bool has_data () const { return version.to_int (); }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (version.sanitize (c) &&
+ likely (version.major == 1) &&
+ c->check_struct (this) &&
+ axisSize == 20 && /* Assumed in our code. */
+ instanceSize >= axisCount * 4 + 4 &&
+ get_axes ().sanitize (c) &&
+ c->check_range (get_instance (0), instanceCount, instanceSize));
+ }
+
+ unsigned int get_axis_count () const { return axisCount; }
+
+#ifndef HB_DISABLE_DEPRECATED
+ unsigned int get_axes_deprecated (unsigned int start_offset,
+ unsigned int *axes_count /* IN/OUT */,
+ hb_ot_var_axis_t *axes_array /* OUT */) const
+ {
+ if (axes_count)
+ {
+ hb_array_t<const AxisRecord> arr = get_axes ().sub_array (start_offset, axes_count);
+ for (unsigned i = 0; i < arr.length; ++i)
+ arr[i].get_axis_deprecated (&axes_array[i]);
+ }
+ return axisCount;
+ }
+#endif
+
+ unsigned int get_axis_infos (unsigned int start_offset,
+ unsigned int *axes_count /* IN/OUT */,
+ hb_ot_var_axis_info_t *axes_array /* OUT */) const
+ {
+ if (axes_count)
+ {
+ hb_array_t<const AxisRecord> arr = get_axes ().sub_array (start_offset, axes_count);
+ for (unsigned i = 0; i < arr.length; ++i)
+ arr[i].get_axis_info (start_offset + i, &axes_array[i]);
+ }
+ return axisCount;
+ }
+
+#ifndef HB_DISABLE_DEPRECATED
+ bool
+ find_axis_deprecated (hb_tag_t tag, unsigned *axis_index, hb_ot_var_axis_t *info) const
+ {
+ unsigned i;
+ if (!axis_index) axis_index = &i;
+ *axis_index = HB_OT_VAR_NO_AXIS_INDEX;
+ auto axes = get_axes ();
+ return axes.lfind (tag, axis_index) && (axes[*axis_index].get_axis_deprecated (info), true);
+ }
+#endif
+
+ bool
+ find_axis_info (hb_tag_t tag, hb_ot_var_axis_info_t *info) const
+ {
+ unsigned i;
+ auto axes = get_axes ();
+ return axes.lfind (tag, &i) && (axes[i].get_axis_info (i, info), true);
+ }
+
+ int normalize_axis_value (unsigned int axis_index, float v) const
+ { return get_axes ()[axis_index].normalize_axis_value (v); }
+
+ float unnormalize_axis_value (unsigned int axis_index, int v) const
+ { return get_axes ()[axis_index].unnormalize_axis_value (v); }
+
+ unsigned int get_instance_count () const { return instanceCount; }
+
+ hb_ot_name_id_t get_instance_subfamily_name_id (unsigned int instance_index) const
+ {
+ const InstanceRecord *instance = get_instance (instance_index);
+ if (unlikely (!instance)) return HB_OT_NAME_ID_INVALID;
+ return instance->subfamilyNameID;
+ }
+
+ hb_ot_name_id_t get_instance_postscript_name_id (unsigned int instance_index) const
+ {
+ const InstanceRecord *instance = get_instance (instance_index);
+ if (unlikely (!instance)) return HB_OT_NAME_ID_INVALID;
+ if (instanceSize >= axisCount * 4 + 6)
+ return StructAfter<NameID> (instance->get_coordinates (axisCount));
+ return HB_OT_NAME_ID_INVALID;
+ }
+
+ unsigned int get_instance_coords (unsigned int instance_index,
+ unsigned int *coords_length, /* IN/OUT */
+ float *coords /* OUT */) const
+ {
+ const InstanceRecord *instance = get_instance (instance_index);
+ if (unlikely (!instance))
+ {
+ if (coords_length)
+ *coords_length = 0;
+ return 0;
+ }
+
+ if (coords_length && *coords_length)
+ {
+ hb_array_t<const HBFixed> instanceCoords = instance->get_coordinates (axisCount)
+ .sub_array (0, *coords_length);
+ for (unsigned int i = 0; i < instanceCoords.length; i++)
+ coords[i] = instanceCoords.arrayZ[i].to_float ();
+ }
+ return axisCount;
+ }
+
+ void collect_name_ids (hb_set_t *nameids) const
+ {
+ if (!has_data ()) return;
+
+ + get_axes ()
+ | hb_map (&AxisRecord::get_name_id)
+ | hb_sink (nameids)
+ ;
+
+ + hb_range ((unsigned) instanceCount)
+ | hb_map ([this] (const unsigned _) { return get_instance_subfamily_name_id (_); })
+ | hb_sink (nameids)
+ ;
+
+ + hb_range ((unsigned) instanceCount)
+ | hb_map ([this] (const unsigned _) { return get_instance_postscript_name_id (_); })
+ | hb_sink (nameids)
+ ;
+ }
+
+ protected:
+ hb_array_t<const AxisRecord> get_axes () const
+ { return hb_array (&(this+firstAxis), axisCount); }
+
+ const InstanceRecord *get_instance (unsigned int i) const
+ {
+ if (unlikely (i >= instanceCount)) return nullptr;
+ return &StructAtOffset<InstanceRecord> (&StructAfter<InstanceRecord> (get_axes ()),
+ i * instanceSize);
+ }
+
+ protected:
+ FixedVersion<>version; /* Version of the fvar table
+ * initially set to 0x00010000u */
+ OffsetTo<AxisRecord>
+ firstAxis; /* Offset in bytes from the beginning of the table
+ * to the start of the AxisRecord array. */
+ HBUINT16 reserved; /* This field is permanently reserved. Set to 2. */
+ HBUINT16 axisCount; /* The number of variation axes in the font (the
+ * number of records in the axes array). */
+ HBUINT16 axisSize; /* The size in bytes of each VariationAxisRecord —
+ * set to 20 (0x0014) for this version. */
+ HBUINT16 instanceCount; /* The number of named instances defined in the font
+ * (the number of records in the instances array). */
+ HBUINT16 instanceSize; /* The size in bytes of each InstanceRecord — set
+ * to either axisCount * sizeof(HBFixed) + 4, or to
+ * axisCount * sizeof(HBFixed) + 6. */
+
+ public:
+ DEFINE_SIZE_STATIC (16);
+};
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_VAR_FVAR_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-var-gvar-table.hh b/thirdparty/harfbuzz/src/hb-ot-var-gvar-table.hh
new file mode 100644
index 0000000000..4d4e6dcae4
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-var-gvar-table.hh
@@ -0,0 +1,701 @@
+/*
+ * Copyright © 2019 Adobe Inc.
+ * Copyright © 2019 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Adobe Author(s): Michiharu Ariza
+ */
+
+#ifndef HB_OT_VAR_GVAR_TABLE_HH
+#define HB_OT_VAR_GVAR_TABLE_HH
+
+#include "hb-open-type.hh"
+
+/*
+ * gvar -- Glyph Variation Table
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/gvar
+ */
+#define HB_OT_TAG_gvar HB_TAG('g','v','a','r')
+
+namespace OT {
+
+struct contour_point_t
+{
+ void init (float x_ = 0.f, float y_ = 0.f, bool is_end_point_ = false)
+ { flag = 0; x = x_; y = y_; is_end_point = is_end_point_; }
+
+ void translate (const contour_point_t &p) { x += p.x; y += p.y; }
+
+ uint8_t flag;
+ float x, y;
+ bool is_end_point;
+};
+
+struct contour_point_vector_t : hb_vector_t<contour_point_t>
+{
+ void extend (const hb_array_t<contour_point_t> &a)
+ {
+ unsigned int old_len = length;
+ resize (old_len + a.length);
+ for (unsigned int i = 0; i < a.length; i++)
+ (*this)[old_len + i] = a[i];
+ }
+
+ void transform (const float (&matrix)[4])
+ {
+ for (unsigned int i = 0; i < length; i++)
+ {
+ contour_point_t &p = (*this)[i];
+ float x_ = p.x * matrix[0] + p.y * matrix[2];
+ p.y = p.x * matrix[1] + p.y * matrix[3];
+ p.x = x_;
+ }
+ }
+
+ void translate (const contour_point_t& delta)
+ {
+ for (unsigned int i = 0; i < length; i++)
+ (*this)[i].translate (delta);
+ }
+};
+
+/* https://docs.microsoft.com/en-us/typography/opentype/spec/otvarcommonformats#tuplevariationheader */
+struct TupleVariationHeader
+{
+ unsigned get_size (unsigned axis_count) const
+ { return min_size + get_all_tuples (axis_count).get_size (); }
+
+ unsigned get_data_size () const { return varDataSize; }
+
+ const TupleVariationHeader &get_next (unsigned axis_count) const
+ { return StructAtOffset<TupleVariationHeader> (this, get_size (axis_count)); }
+
+ float calculate_scalar (const int *coords, unsigned int coord_count,
+ const hb_array_t<const F2DOT14> shared_tuples) const
+ {
+ hb_array_t<const F2DOT14> peak_tuple;
+
+ if (has_peak ())
+ peak_tuple = get_peak_tuple (coord_count);
+ else
+ {
+ unsigned int index = get_index ();
+ if (unlikely (index * coord_count >= shared_tuples.length))
+ return 0.f;
+ peak_tuple = shared_tuples.sub_array (coord_count * index, coord_count);
+ }
+
+ hb_array_t<const F2DOT14> start_tuple;
+ hb_array_t<const F2DOT14> end_tuple;
+ if (has_intermediate ())
+ {
+ start_tuple = get_start_tuple (coord_count);
+ end_tuple = get_end_tuple (coord_count);
+ }
+
+ float scalar = 1.f;
+ for (unsigned int i = 0; i < coord_count; i++)
+ {
+ int v = coords[i];
+ int peak = peak_tuple[i];
+ if (!peak || v == peak) continue;
+
+ if (has_intermediate ())
+ {
+ int start = start_tuple[i];
+ int end = end_tuple[i];
+ if (unlikely (start > peak || peak > end ||
+ (start < 0 && end > 0 && peak))) continue;
+ if (v < start || v > end) return 0.f;
+ if (v < peak)
+ { if (peak != start) scalar *= (float) (v - start) / (peak - start); }
+ else
+ { if (peak != end) scalar *= (float) (end - v) / (end - peak); }
+ }
+ else if (!v || v < hb_min (0, peak) || v > hb_max (0, peak)) return 0.f;
+ else
+ scalar *= (float) v / peak;
+ }
+ return scalar;
+ }
+
+ bool has_peak () const { return tupleIndex & TuppleIndex::EmbeddedPeakTuple; }
+ bool has_intermediate () const { return tupleIndex & TuppleIndex::IntermediateRegion; }
+ bool has_private_points () const { return tupleIndex & TuppleIndex::PrivatePointNumbers; }
+ unsigned get_index () const { return tupleIndex & TuppleIndex::TupleIndexMask; }
+
+ protected:
+ struct TuppleIndex : HBUINT16
+ {
+ enum Flags {
+ EmbeddedPeakTuple = 0x8000u,
+ IntermediateRegion = 0x4000u,
+ PrivatePointNumbers = 0x2000u,
+ TupleIndexMask = 0x0FFFu
+ };
+
+ DEFINE_SIZE_STATIC (2);
+ };
+
+ hb_array_t<const F2DOT14> get_all_tuples (unsigned axis_count) const
+ { return StructAfter<UnsizedArrayOf<F2DOT14>> (tupleIndex).as_array ((has_peak () + has_intermediate () * 2) * axis_count); }
+ hb_array_t<const F2DOT14> get_peak_tuple (unsigned axis_count) const
+ { return get_all_tuples (axis_count).sub_array (0, axis_count); }
+ hb_array_t<const F2DOT14> get_start_tuple (unsigned axis_count) const
+ { return get_all_tuples (axis_count).sub_array (has_peak () * axis_count, axis_count); }
+ hb_array_t<const F2DOT14> get_end_tuple (unsigned axis_count) const
+ { return get_all_tuples (axis_count).sub_array (has_peak () * axis_count + axis_count, axis_count); }
+
+ HBUINT16 varDataSize; /* The size in bytes of the serialized
+ * data for this tuple variation table. */
+ TuppleIndex tupleIndex; /* A packed field. The high 4 bits are flags (see below).
+ The low 12 bits are an index into a shared tuple
+ records array. */
+ /* UnsizedArrayOf<F2DOT14> peakTuple - optional */
+ /* Peak tuple record for this tuple variation table — optional,
+ * determined by flags in the tupleIndex value.
+ *
+ * Note that this must always be included in the 'cvar' table. */
+ /* UnsizedArrayOf<F2DOT14> intermediateStartTuple - optional */
+ /* Intermediate start tuple record for this tuple variation table — optional,
+ determined by flags in the tupleIndex value. */
+ /* UnsizedArrayOf<F2DOT14> intermediateEndTuple - optional */
+ /* Intermediate end tuple record for this tuple variation table — optional,
+ * determined by flags in the tupleIndex value. */
+ public:
+ DEFINE_SIZE_MIN (4);
+};
+
+struct GlyphVariationData
+{
+ const TupleVariationHeader &get_tuple_var_header (void) const
+ { return StructAfter<TupleVariationHeader> (data); }
+
+ struct tuple_iterator_t
+ {
+ void init (hb_bytes_t var_data_bytes_, unsigned int axis_count_)
+ {
+ var_data_bytes = var_data_bytes_;
+ var_data = var_data_bytes_.as<GlyphVariationData> ();
+ index = 0;
+ axis_count = axis_count_;
+ current_tuple = &var_data->get_tuple_var_header ();
+ data_offset = 0;
+ }
+
+ bool get_shared_indices (hb_vector_t<unsigned int> &shared_indices /* OUT */)
+ {
+ if (var_data->has_shared_point_numbers ())
+ {
+ const HBUINT8 *base = &(var_data+var_data->data);
+ const HBUINT8 *p = base;
+ if (!unpack_points (p, shared_indices, var_data_bytes)) return false;
+ data_offset = p - base;
+ }
+ return true;
+ }
+
+ bool is_valid () const
+ {
+ return (index < var_data->tupleVarCount.get_count ()) &&
+ var_data_bytes.check_range (current_tuple, TupleVariationHeader::min_size) &&
+ var_data_bytes.check_range (current_tuple, hb_max (current_tuple->get_data_size (), current_tuple->get_size (axis_count))) &&
+ current_tuple->get_size (axis_count);
+ }
+
+ bool move_to_next ()
+ {
+ data_offset += current_tuple->get_data_size ();
+ current_tuple = &current_tuple->get_next (axis_count);
+ index++;
+ return is_valid ();
+ }
+
+ const HBUINT8 *get_serialized_data () const
+ { return &(var_data+var_data->data) + data_offset; }
+
+ private:
+ const GlyphVariationData *var_data;
+ unsigned int index;
+ unsigned int axis_count;
+ unsigned int data_offset;
+
+ public:
+ hb_bytes_t var_data_bytes;
+ const TupleVariationHeader *current_tuple;
+ };
+
+ static bool get_tuple_iterator (hb_bytes_t var_data_bytes, unsigned axis_count,
+ hb_vector_t<unsigned int> &shared_indices /* OUT */,
+ tuple_iterator_t *iterator /* OUT */)
+ {
+ iterator->init (var_data_bytes, axis_count);
+ if (!iterator->get_shared_indices (shared_indices))
+ return false;
+ return iterator->is_valid ();
+ }
+
+ bool has_shared_point_numbers () const { return tupleVarCount.has_shared_point_numbers (); }
+
+ static bool unpack_points (const HBUINT8 *&p /* IN/OUT */,
+ hb_vector_t<unsigned int> &points /* OUT */,
+ const hb_bytes_t &bytes)
+ {
+ enum packed_point_flag_t
+ {
+ POINTS_ARE_WORDS = 0x80,
+ POINT_RUN_COUNT_MASK = 0x7F
+ };
+
+ if (unlikely (!bytes.check_range (p))) return false;
+
+ uint16_t count = *p++;
+ if (count & POINTS_ARE_WORDS)
+ {
+ if (unlikely (!bytes.check_range (p))) return false;
+ count = ((count & POINT_RUN_COUNT_MASK) << 8) | *p++;
+ }
+ points.resize (count);
+
+ unsigned int n = 0;
+ uint16_t i = 0;
+ while (i < count)
+ {
+ if (unlikely (!bytes.check_range (p))) return false;
+ uint16_t j;
+ uint8_t control = *p++;
+ uint16_t run_count = (control & POINT_RUN_COUNT_MASK) + 1;
+ if (control & POINTS_ARE_WORDS)
+ {
+ for (j = 0; j < run_count && i < count; j++, i++)
+ {
+ if (unlikely (!bytes.check_range ((const HBUINT16 *) p)))
+ return false;
+ n += *(const HBUINT16 *)p;
+ points[i] = n;
+ p += HBUINT16::static_size;
+ }
+ }
+ else
+ {
+ for (j = 0; j < run_count && i < count; j++, i++)
+ {
+ if (unlikely (!bytes.check_range (p))) return false;
+ n += *p++;
+ points[i] = n;
+ }
+ }
+ if (j < run_count) return false;
+ }
+ return true;
+ }
+
+ static bool unpack_deltas (const HBUINT8 *&p /* IN/OUT */,
+ hb_vector_t<int> &deltas /* IN/OUT */,
+ const hb_bytes_t &bytes)
+ {
+ enum packed_delta_flag_t
+ {
+ DELTAS_ARE_ZERO = 0x80,
+ DELTAS_ARE_WORDS = 0x40,
+ DELTA_RUN_COUNT_MASK = 0x3F
+ };
+
+ unsigned int i = 0;
+ unsigned int count = deltas.length;
+ while (i < count)
+ {
+ if (unlikely (!bytes.check_range (p))) return false;
+ uint8_t control = *p++;
+ unsigned int run_count = (control & DELTA_RUN_COUNT_MASK) + 1;
+ unsigned int j;
+ if (control & DELTAS_ARE_ZERO)
+ for (j = 0; j < run_count && i < count; j++, i++)
+ deltas[i] = 0;
+ else if (control & DELTAS_ARE_WORDS)
+ for (j = 0; j < run_count && i < count; j++, i++)
+ {
+ if (unlikely (!bytes.check_range ((const HBUINT16 *) p)))
+ return false;
+ deltas[i] = *(const HBINT16 *) p;
+ p += HBUINT16::static_size;
+ }
+ else
+ for (j = 0; j < run_count && i < count; j++, i++)
+ {
+ if (unlikely (!bytes.check_range (p)))
+ return false;
+ deltas[i] = *(const HBINT8 *) p++;
+ }
+ if (j < run_count)
+ return false;
+ }
+ return true;
+ }
+
+ bool has_data () const { return tupleVarCount; }
+
+ protected:
+ struct TupleVarCount : HBUINT16
+ {
+ bool has_shared_point_numbers () const { return ((*this) & SharedPointNumbers); }
+ unsigned int get_count () const { return (*this) & CountMask; }
+
+ protected:
+ enum Flags
+ {
+ SharedPointNumbers= 0x8000u,
+ CountMask = 0x0FFFu
+ };
+ public:
+ DEFINE_SIZE_STATIC (2);
+ };
+
+ TupleVarCount tupleVarCount; /* A packed field. The high 4 bits are flags, and the
+ * low 12 bits are the number of tuple variation tables
+ * for this glyph. The number of tuple variation tables
+ * can be any number between 1 and 4095. */
+ OffsetTo<HBUINT8>
+ data; /* Offset from the start of the GlyphVariationData table
+ * to the serialized data. */
+ /* TupleVariationHeader tupleVariationHeaders[] *//* Array of tuple variation headers. */
+ public:
+ DEFINE_SIZE_MIN (4);
+};
+
+struct gvar
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_gvar;
+
+ bool sanitize_shallow (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) && (version.major == 1) &&
+ (glyphCount == c->get_num_glyphs ()) &&
+ sharedTuples.sanitize (c, this, axisCount * sharedTupleCount) &&
+ (is_long_offset () ?
+ c->check_array (get_long_offset_array (), glyphCount+1) :
+ c->check_array (get_short_offset_array (), glyphCount+1)) &&
+ c->check_array (((const HBUINT8*)&(this+dataZ)) + get_offset (0),
+ get_offset (glyphCount) - get_offset (0)));
+ }
+
+ /* GlyphVariationData not sanitized here; must be checked while accessing each glyph varation data */
+ bool sanitize (hb_sanitize_context_t *c) const
+ { return sanitize_shallow (c); }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+
+ gvar *out = c->serializer->allocate_min<gvar> ();
+ if (unlikely (!out)) return_trace (false);
+
+ out->version.major = 1;
+ out->version.minor = 0;
+ out->axisCount = axisCount;
+ out->sharedTupleCount = sharedTupleCount;
+
+ unsigned int num_glyphs = c->plan->num_output_glyphs ();
+ out->glyphCount = num_glyphs;
+
+ unsigned int subset_data_size = 0;
+ for (hb_codepoint_t gid = 0; gid < num_glyphs; gid++)
+ {
+ hb_codepoint_t old_gid;
+ if (!c->plan->old_gid_for_new_gid (gid, &old_gid)) continue;
+ subset_data_size += get_glyph_var_data_bytes (c->source_blob, old_gid).length;
+ }
+
+ bool long_offset = subset_data_size & ~0xFFFFu;
+ out->flags = long_offset ? 1 : 0;
+
+ HBUINT8 *subset_offsets = c->serializer->allocate_size<HBUINT8> ((long_offset ? 4 : 2) * (num_glyphs + 1));
+ if (!subset_offsets) return_trace (false);
+
+ /* shared tuples */
+ if (!sharedTupleCount || !sharedTuples)
+ out->sharedTuples = 0;
+ else
+ {
+ unsigned int shared_tuple_size = F2DOT14::static_size * axisCount * sharedTupleCount;
+ F2DOT14 *tuples = c->serializer->allocate_size<F2DOT14> (shared_tuple_size);
+ if (!tuples) return_trace (false);
+ out->sharedTuples = (char *) tuples - (char *) out;
+ memcpy (tuples, this+sharedTuples, shared_tuple_size);
+ }
+
+ char *subset_data = c->serializer->allocate_size<char> (subset_data_size);
+ if (!subset_data) return_trace (false);
+ out->dataZ = subset_data - (char *) out;
+
+ unsigned int glyph_offset = 0;
+ for (hb_codepoint_t gid = 0; gid < num_glyphs; gid++)
+ {
+ hb_codepoint_t old_gid;
+ hb_bytes_t var_data_bytes = c->plan->old_gid_for_new_gid (gid, &old_gid)
+ ? get_glyph_var_data_bytes (c->source_blob, old_gid)
+ : hb_bytes_t ();
+
+ if (long_offset)
+ ((HBUINT32 *) subset_offsets)[gid] = glyph_offset;
+ else
+ ((HBUINT16 *) subset_offsets)[gid] = glyph_offset / 2;
+
+ if (var_data_bytes.length > 0)
+ memcpy (subset_data, var_data_bytes.arrayZ, var_data_bytes.length);
+ subset_data += var_data_bytes.length;
+ glyph_offset += var_data_bytes.length;
+ }
+ if (long_offset)
+ ((HBUINT32 *) subset_offsets)[num_glyphs] = glyph_offset;
+ else
+ ((HBUINT16 *) subset_offsets)[num_glyphs] = glyph_offset / 2;
+
+ return_trace (true);
+ }
+
+ protected:
+ const hb_bytes_t get_glyph_var_data_bytes (hb_blob_t *blob, hb_codepoint_t glyph) const
+ {
+ unsigned start_offset = get_offset (glyph);
+ unsigned length = get_offset (glyph+1) - start_offset;
+ hb_bytes_t var_data = blob->as_bytes ().sub_array (((unsigned) dataZ) + start_offset, length);
+ return likely (var_data.length >= GlyphVariationData::min_size) ? var_data : hb_bytes_t ();
+ }
+
+ bool is_long_offset () const { return flags & 1; }
+
+ unsigned get_offset (unsigned i) const
+ { return is_long_offset () ? get_long_offset_array ()[i] : get_short_offset_array ()[i] * 2; }
+
+ const HBUINT32 * get_long_offset_array () const { return (const HBUINT32 *) &offsetZ; }
+ const HBUINT16 *get_short_offset_array () const { return (const HBUINT16 *) &offsetZ; }
+
+ public:
+ struct accelerator_t
+ {
+ void init (hb_face_t *face)
+ { table = hb_sanitize_context_t ().reference_table<gvar> (face); }
+ void fini () { table.destroy (); }
+
+ private:
+ struct x_getter { static float get (const contour_point_t &p) { return p.x; } };
+ struct y_getter { static float get (const contour_point_t &p) { return p.y; } };
+
+ template <typename T>
+ static float infer_delta (const hb_array_t<contour_point_t> points,
+ const hb_array_t<contour_point_t> deltas,
+ unsigned int target, unsigned int prev, unsigned int next)
+ {
+ float target_val = T::get (points[target]);
+ float prev_val = T::get (points[prev]);
+ float next_val = T::get (points[next]);
+ float prev_delta = T::get (deltas[prev]);
+ float next_delta = T::get (deltas[next]);
+
+ if (prev_val == next_val)
+ return (prev_delta == next_delta) ? prev_delta : 0.f;
+ else if (target_val <= hb_min (prev_val, next_val))
+ return (prev_val < next_val) ? prev_delta : next_delta;
+ else if (target_val >= hb_max (prev_val, next_val))
+ return (prev_val > next_val) ? prev_delta : next_delta;
+
+ /* linear interpolation */
+ float r = (target_val - prev_val) / (next_val - prev_val);
+ return (1.f - r) * prev_delta + r * next_delta;
+ }
+
+ static unsigned int next_index (unsigned int i, unsigned int start, unsigned int end)
+ { return (i >= end) ? start : (i + 1); }
+
+ public:
+ bool apply_deltas_to_points (hb_codepoint_t glyph, hb_font_t *font,
+ const hb_array_t<contour_point_t> points) const
+ {
+ /* num_coords should exactly match gvar's axisCount due to how GlyphVariationData tuples are aligned */
+ if (!font->num_coords || font->num_coords != table->axisCount) return true;
+
+ if (unlikely (glyph >= table->glyphCount)) return true;
+
+ hb_bytes_t var_data_bytes = table->get_glyph_var_data_bytes (table.get_blob (), glyph);
+ if (!var_data_bytes.as<GlyphVariationData> ()->has_data ()) return true;
+ hb_vector_t<unsigned int> shared_indices;
+ GlyphVariationData::tuple_iterator_t iterator;
+ if (!GlyphVariationData::get_tuple_iterator (var_data_bytes, table->axisCount,
+ shared_indices, &iterator))
+ return true; /* so isn't applied at all */
+
+ /* Save original points for inferred delta calculation */
+ contour_point_vector_t orig_points;
+ orig_points.resize (points.length);
+ for (unsigned int i = 0; i < orig_points.length; i++)
+ orig_points[i] = points[i];
+
+ contour_point_vector_t deltas; /* flag is used to indicate referenced point */
+ deltas.resize (points.length);
+
+ hb_vector_t<unsigned> end_points;
+ for (unsigned i = 0; i < points.length; ++i)
+ if (points[i].is_end_point)
+ end_points.push (i);
+
+ int *coords = font->coords;
+ unsigned num_coords = font->num_coords;
+ hb_array_t<const F2DOT14> shared_tuples = (table+table->sharedTuples).as_array (table->sharedTupleCount * table->axisCount);
+ do
+ {
+ float scalar = iterator.current_tuple->calculate_scalar (coords, num_coords, shared_tuples);
+ if (scalar == 0.f) continue;
+ const HBUINT8 *p = iterator.get_serialized_data ();
+ unsigned int length = iterator.current_tuple->get_data_size ();
+ if (unlikely (!iterator.var_data_bytes.check_range (p, length)))
+ return false;
+
+ hb_bytes_t bytes ((const char *) p, length);
+ hb_vector_t<unsigned int> private_indices;
+ if (iterator.current_tuple->has_private_points () &&
+ !GlyphVariationData::unpack_points (p, private_indices, bytes))
+ return false;
+ const hb_array_t<unsigned int> &indices = private_indices.length ? private_indices : shared_indices;
+
+ bool apply_to_all = (indices.length == 0);
+ unsigned int num_deltas = apply_to_all ? points.length : indices.length;
+ hb_vector_t<int> x_deltas;
+ x_deltas.resize (num_deltas);
+ if (!GlyphVariationData::unpack_deltas (p, x_deltas, bytes))
+ return false;
+ hb_vector_t<int> y_deltas;
+ y_deltas.resize (num_deltas);
+ if (!GlyphVariationData::unpack_deltas (p, y_deltas, bytes))
+ return false;
+
+ for (unsigned int i = 0; i < deltas.length; i++)
+ deltas[i].init ();
+ for (unsigned int i = 0; i < num_deltas; i++)
+ {
+ unsigned int pt_index = apply_to_all ? i : indices[i];
+ deltas[pt_index].flag = 1; /* this point is referenced, i.e., explicit deltas specified */
+ deltas[pt_index].x += x_deltas[i] * scalar;
+ deltas[pt_index].y += y_deltas[i] * scalar;
+ }
+
+ /* infer deltas for unreferenced points */
+ unsigned start_point = 0;
+ for (unsigned c = 0; c < end_points.length; c++)
+ {
+ unsigned end_point = end_points[c];
+
+ /* Check the number of unreferenced points in a contour. If no unref points or no ref points, nothing to do. */
+ unsigned unref_count = 0;
+ for (unsigned i = start_point; i <= end_point; i++)
+ if (!deltas[i].flag) unref_count++;
+
+ unsigned j = start_point;
+ if (unref_count == 0 || unref_count > end_point - start_point)
+ goto no_more_gaps;
+
+ for (;;)
+ {
+ /* Locate the next gap of unreferenced points between two referenced points prev and next.
+ * Note that a gap may wrap around at left (start_point) and/or at right (end_point).
+ */
+ unsigned int prev, next, i;
+ for (;;)
+ {
+ i = j;
+ j = next_index (i, start_point, end_point);
+ if (deltas[i].flag && !deltas[j].flag) break;
+ }
+ prev = j = i;
+ for (;;)
+ {
+ i = j;
+ j = next_index (i, start_point, end_point);
+ if (!deltas[i].flag && deltas[j].flag) break;
+ }
+ next = j;
+ /* Infer deltas for all unref points in the gap between prev and next */
+ i = prev;
+ for (;;)
+ {
+ i = next_index (i, start_point, end_point);
+ if (i == next) break;
+ deltas[i].x = infer_delta<x_getter> (orig_points.as_array (), deltas.as_array (), i, prev, next);
+ deltas[i].y = infer_delta<y_getter> (orig_points.as_array (), deltas.as_array (), i, prev, next);
+ if (--unref_count == 0) goto no_more_gaps;
+ }
+ }
+no_more_gaps:
+ start_point = end_point + 1;
+ }
+
+ /* apply specified / inferred deltas to points */
+ for (unsigned int i = 0; i < points.length; i++)
+ {
+ points[i].x += roundf (deltas[i].x);
+ points[i].y += roundf (deltas[i].y);
+ }
+ } while (iterator.move_to_next ());
+
+ return true;
+ }
+
+ unsigned int get_axis_count () const { return table->axisCount; }
+
+ private:
+ hb_blob_ptr_t<gvar> table;
+ };
+
+ protected:
+ FixedVersion<>version; /* Version number of the glyph variations table
+ * Set to 0x00010000u. */
+ HBUINT16 axisCount; /* The number of variation axes for this font. This must be
+ * the same number as axisCount in the 'fvar' table. */
+ HBUINT16 sharedTupleCount;
+ /* The number of shared tuple records. Shared tuple records
+ * can be referenced within glyph variation data tables for
+ * multiple glyphs, as opposed to other tuple records stored
+ * directly within a glyph variation data table. */
+ LNNOffsetTo<UnsizedArrayOf<F2DOT14>>
+ sharedTuples; /* Offset from the start of this table to the shared tuple records.
+ * Array of tuple records shared across all glyph variation data tables. */
+ HBUINT16 glyphCount; /* The number of glyphs in this font. This must match the number of
+ * glyphs stored elsewhere in the font. */
+ HBUINT16 flags; /* Bit-field that gives the format of the offset array that follows.
+ * If bit 0 is clear, the offsets are uint16; if bit 0 is set, the
+ * offsets are uint32. */
+ LOffsetTo<GlyphVariationData>
+ dataZ; /* Offset from the start of this table to the array of
+ * GlyphVariationData tables. */
+ UnsizedArrayOf<HBUINT8>
+ offsetZ; /* Offsets from the start of the GlyphVariationData array
+ * to each GlyphVariationData table. */
+ public:
+ DEFINE_SIZE_MIN (20);
+};
+
+struct gvar_accelerator_t : gvar::accelerator_t {};
+
+} /* namespace OT */
+
+#endif /* HB_OT_VAR_GVAR_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-var-hvar-table.hh b/thirdparty/harfbuzz/src/hb-ot-var-hvar-table.hh
new file mode 100644
index 0000000000..fdcc88d674
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-var-hvar-table.hh
@@ -0,0 +1,488 @@
+/*
+ * Copyright © 2017 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_VAR_HVAR_TABLE_HH
+#define HB_OT_VAR_HVAR_TABLE_HH
+
+#include "hb-ot-layout-common.hh"
+
+
+namespace OT {
+
+
+struct DeltaSetIndexMap
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ c->check_range (mapDataZ.arrayZ,
+ mapCount,
+ get_width ()));
+ }
+
+ template <typename T>
+ bool serialize (hb_serialize_context_t *c, const T &plan)
+ {
+ unsigned int width = plan.get_width ();
+ unsigned int inner_bit_count = plan.get_inner_bit_count ();
+ const hb_array_t<const unsigned int> output_map = plan.get_output_map ();
+
+ TRACE_SERIALIZE (this);
+ if (unlikely (output_map.length && ((((inner_bit_count-1)&~0xF)!=0) || (((width-1)&~0x3)!=0))))
+ return_trace (false);
+ if (unlikely (!c->extend_min (*this))) return_trace (false);
+
+ format = ((width-1)<<4)|(inner_bit_count-1);
+ mapCount = output_map.length;
+ HBUINT8 *p = c->allocate_size<HBUINT8> (width * output_map.length);
+ if (unlikely (!p)) return_trace (false);
+ for (unsigned int i = 0; i < output_map.length; i++)
+ {
+ unsigned int v = output_map[i];
+ unsigned int outer = v >> 16;
+ unsigned int inner = v & 0xFFFF;
+ unsigned int u = (outer << inner_bit_count) | inner;
+ for (unsigned int w = width; w > 0;)
+ {
+ p[--w] = u;
+ u >>= 8;
+ }
+ p += width;
+ }
+ return_trace (true);
+ }
+
+ unsigned int map (unsigned int v) const /* Returns 16.16 outer.inner. */
+ {
+ /* If count is zero, pass value unchanged. This takes
+ * care of direct mapping for advance map. */
+ if (!mapCount)
+ return v;
+
+ if (v >= mapCount)
+ v = mapCount - 1;
+
+ unsigned int u = 0;
+ { /* Fetch it. */
+ unsigned int w = get_width ();
+ const HBUINT8 *p = mapDataZ.arrayZ + w * v;
+ for (; w; w--)
+ u = (u << 8) + *p++;
+ }
+
+ { /* Repack it. */
+ unsigned int n = get_inner_bit_count ();
+ unsigned int outer = u >> n;
+ unsigned int inner = u & ((1 << n) - 1);
+ u = (outer<<16) | inner;
+ }
+
+ return u;
+ }
+
+ unsigned int get_map_count () const { return mapCount; }
+ unsigned int get_width () const { return ((format >> 4) & 3) + 1; }
+ unsigned int get_inner_bit_count () const { return (format & 0xF) + 1; }
+
+ protected:
+ HBUINT16 format; /* A packed field that describes the compressed
+ * representation of delta-set indices. */
+ HBUINT16 mapCount; /* The number of mapping entries. */
+ UnsizedArrayOf<HBUINT8>
+ mapDataZ; /* The delta-set index mapping data. */
+
+ public:
+ DEFINE_SIZE_ARRAY (4, mapDataZ);
+};
+
+struct index_map_subset_plan_t
+{
+ enum index_map_index_t {
+ ADV_INDEX,
+ LSB_INDEX, /* dual as TSB */
+ RSB_INDEX, /* dual as BSB */
+ VORG_INDEX
+ };
+
+ void init (const DeltaSetIndexMap &index_map,
+ hb_inc_bimap_t &outer_map,
+ hb_vector_t<hb_set_t *> &inner_sets,
+ const hb_subset_plan_t *plan)
+ {
+ map_count = 0;
+ outer_bit_count = 0;
+ inner_bit_count = 1;
+ max_inners.init ();
+ output_map.init ();
+
+ if (&index_map == &Null (DeltaSetIndexMap)) return;
+
+ unsigned int last_val = (unsigned int)-1;
+ hb_codepoint_t last_gid = (hb_codepoint_t)-1;
+ hb_codepoint_t gid = (hb_codepoint_t) hb_min (index_map.get_map_count (), plan->num_output_glyphs ());
+
+ outer_bit_count = (index_map.get_width () * 8) - index_map.get_inner_bit_count ();
+ max_inners.resize (inner_sets.length);
+ for (unsigned i = 0; i < inner_sets.length; i++) max_inners[i] = 0;
+
+ /* Search backwards for a map value different from the last map value */
+ for (; gid > 0; gid--)
+ {
+ hb_codepoint_t old_gid;
+ if (!plan->old_gid_for_new_gid (gid - 1, &old_gid))
+ {
+ if (last_gid == (hb_codepoint_t) -1)
+ continue;
+ else
+ break;
+ }
+
+ unsigned int v = index_map.map (old_gid);
+ if (last_gid == (hb_codepoint_t) -1)
+ {
+ last_val = v;
+ last_gid = gid;
+ continue;
+ }
+ if (v != last_val) break;
+
+ last_gid = gid;
+ }
+
+ if (unlikely (last_gid == (hb_codepoint_t)-1)) return;
+ map_count = last_gid;
+ for (gid = 0; gid < map_count; gid++)
+ {
+ hb_codepoint_t old_gid;
+ if (plan->old_gid_for_new_gid (gid, &old_gid))
+ {
+ unsigned int v = index_map.map (old_gid);
+ unsigned int outer = v >> 16;
+ unsigned int inner = v & 0xFFFF;
+ outer_map.add (outer);
+ if (inner > max_inners[outer]) max_inners[outer] = inner;
+ if (outer >= inner_sets.length) return;
+ inner_sets[outer]->add (inner);
+ }
+ }
+ }
+
+ void fini ()
+ {
+ max_inners.fini ();
+ output_map.fini ();
+ }
+
+ void remap (const DeltaSetIndexMap *input_map,
+ const hb_inc_bimap_t &outer_map,
+ const hb_vector_t<hb_inc_bimap_t> &inner_maps,
+ const hb_subset_plan_t *plan)
+ {
+ if (input_map == &Null (DeltaSetIndexMap)) return;
+
+ for (unsigned int i = 0; i < max_inners.length; i++)
+ {
+ if (inner_maps[i].get_population () == 0) continue;
+ unsigned int bit_count = (max_inners[i]==0)? 1: hb_bit_storage (inner_maps[i][max_inners[i]]);
+ if (bit_count > inner_bit_count) inner_bit_count = bit_count;
+ }
+
+ output_map.resize (map_count);
+ for (hb_codepoint_t gid = 0; gid < output_map.length; gid++)
+ {
+ hb_codepoint_t old_gid;
+ if (plan->old_gid_for_new_gid (gid, &old_gid))
+ {
+ unsigned int v = input_map->map (old_gid);
+ unsigned int outer = v >> 16;
+ output_map[gid] = (outer_map[outer] << 16) | (inner_maps[outer][v & 0xFFFF]);
+ }
+ else
+ output_map[gid] = 0; /* Map unused glyph to outer/inner=0/0 */
+ }
+ }
+
+ unsigned int get_inner_bit_count () const { return inner_bit_count; }
+ unsigned int get_width () const { return ((outer_bit_count + inner_bit_count + 7) / 8); }
+ unsigned int get_map_count () const { return map_count; }
+
+ unsigned int get_size () const
+ { return (map_count? (DeltaSetIndexMap::min_size + get_width () * map_count): 0); }
+
+ bool is_identity () const { return get_output_map ().length == 0; }
+ hb_array_t<const unsigned int> get_output_map () const { return output_map.as_array (); }
+
+ protected:
+ unsigned int map_count;
+ hb_vector_t<unsigned int> max_inners;
+ unsigned int outer_bit_count;
+ unsigned int inner_bit_count;
+ hb_vector_t<unsigned int> output_map;
+};
+
+struct hvarvvar_subset_plan_t
+{
+ hvarvvar_subset_plan_t() : inner_maps (), index_map_plans () {}
+ ~hvarvvar_subset_plan_t() { fini (); }
+
+ void init (const hb_array_t<const DeltaSetIndexMap *> &index_maps,
+ const VariationStore &_var_store,
+ const hb_subset_plan_t *plan)
+ {
+ index_map_plans.resize (index_maps.length);
+
+ var_store = &_var_store;
+ inner_sets.resize (var_store->get_sub_table_count ());
+ for (unsigned int i = 0; i < inner_sets.length; i++)
+ inner_sets[i] = hb_set_create ();
+ adv_set = hb_set_create ();
+
+ inner_maps.resize (var_store->get_sub_table_count ());
+
+ for (unsigned int i = 0; i < inner_maps.length; i++)
+ inner_maps[i].init ();
+
+ if (unlikely (!index_map_plans.length || !inner_sets.length || !inner_maps.length)) return;
+
+ bool retain_adv_map = false;
+ index_map_plans[0].init (*index_maps[0], outer_map, inner_sets, plan);
+ if (index_maps[0] == &Null (DeltaSetIndexMap))
+ {
+ retain_adv_map = plan->retain_gids;
+ outer_map.add (0);
+ for (hb_codepoint_t gid = 0; gid < plan->num_output_glyphs (); gid++)
+ {
+ hb_codepoint_t old_gid;
+ if (plan->old_gid_for_new_gid (gid, &old_gid))
+ inner_sets[0]->add (old_gid);
+ }
+ hb_set_union (adv_set, inner_sets[0]);
+ }
+
+ for (unsigned int i = 1; i < index_maps.length; i++)
+ index_map_plans[i].init (*index_maps[i], outer_map, inner_sets, plan);
+
+ outer_map.sort ();
+
+ if (retain_adv_map)
+ {
+ for (hb_codepoint_t gid = 0; gid < plan->num_output_glyphs (); gid++)
+ if (inner_sets[0]->has (gid))
+ inner_maps[0].add (gid);
+ else
+ inner_maps[0].skip ();
+ }
+ else
+ {
+ inner_maps[0].add_set (adv_set);
+ hb_set_subtract (inner_sets[0], adv_set);
+ inner_maps[0].add_set (inner_sets[0]);
+ }
+
+ for (unsigned int i = 1; i < inner_maps.length; i++)
+ inner_maps[i].add_set (inner_sets[i]);
+
+ for (unsigned int i = 0; i < index_maps.length; i++)
+ index_map_plans[i].remap (index_maps[i], outer_map, inner_maps, plan);
+ }
+
+ void fini ()
+ {
+ for (unsigned int i = 0; i < inner_sets.length; i++)
+ hb_set_destroy (inner_sets[i]);
+ hb_set_destroy (adv_set);
+ inner_maps.fini_deep ();
+ index_map_plans.fini_deep ();
+ }
+
+ hb_inc_bimap_t outer_map;
+ hb_vector_t<hb_inc_bimap_t> inner_maps;
+ hb_vector_t<index_map_subset_plan_t> index_map_plans;
+ const VariationStore *var_store;
+
+ protected:
+ hb_vector_t<hb_set_t *> inner_sets;
+ hb_set_t *adv_set;
+};
+
+/*
+ * HVAR -- Horizontal Metrics Variations
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/hvar
+ * VVAR -- Vertical Metrics Variations
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/vvar
+ */
+#define HB_OT_TAG_HVAR HB_TAG('H','V','A','R')
+#define HB_OT_TAG_VVAR HB_TAG('V','V','A','R')
+
+struct HVARVVAR
+{
+ static constexpr hb_tag_t HVARTag = HB_OT_TAG_HVAR;
+ static constexpr hb_tag_t VVARTag = HB_OT_TAG_VVAR;
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (version.sanitize (c) &&
+ likely (version.major == 1) &&
+ varStore.sanitize (c, this) &&
+ advMap.sanitize (c, this) &&
+ lsbMap.sanitize (c, this) &&
+ rsbMap.sanitize (c, this));
+ }
+
+ void listup_index_maps (hb_vector_t<const DeltaSetIndexMap *> &index_maps) const
+ {
+ index_maps.push (&(this+advMap));
+ index_maps.push (&(this+lsbMap));
+ index_maps.push (&(this+rsbMap));
+ }
+
+ bool serialize_index_maps (hb_serialize_context_t *c,
+ const hb_array_t<index_map_subset_plan_t> &im_plans)
+ {
+ TRACE_SERIALIZE (this);
+ if (im_plans[index_map_subset_plan_t::ADV_INDEX].is_identity ())
+ advMap = 0;
+ else if (unlikely (!advMap.serialize (c, this).serialize (c, im_plans[index_map_subset_plan_t::ADV_INDEX])))
+ return_trace (false);
+ if (im_plans[index_map_subset_plan_t::LSB_INDEX].is_identity ())
+ lsbMap = 0;
+ else if (unlikely (!lsbMap.serialize (c, this).serialize (c, im_plans[index_map_subset_plan_t::LSB_INDEX])))
+ return_trace (false);
+ if (im_plans[index_map_subset_plan_t::RSB_INDEX].is_identity ())
+ rsbMap = 0;
+ else if (unlikely (!rsbMap.serialize (c, this).serialize (c, im_plans[index_map_subset_plan_t::RSB_INDEX])))
+ return_trace (false);
+
+ return_trace (true);
+ }
+
+ template <typename T>
+ bool _subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ hvarvvar_subset_plan_t hvar_plan;
+ hb_vector_t<const DeltaSetIndexMap *>
+ index_maps;
+
+ ((T*)this)->listup_index_maps (index_maps);
+ hvar_plan.init (index_maps.as_array (), this+varStore, c->plan);
+
+ T *out = c->serializer->allocate_min<T> ();
+ if (unlikely (!out)) return_trace (false);
+
+ out->version.major = 1;
+ out->version.minor = 0;
+
+ if (unlikely (!out->varStore.serialize (c->serializer, out)
+ .serialize (c->serializer, hvar_plan.var_store, hvar_plan.inner_maps.as_array ())))
+ return_trace (false);
+
+ return_trace (out->T::serialize_index_maps (c->serializer,
+ hvar_plan.index_map_plans.as_array ()));
+ }
+
+ float get_advance_var (hb_codepoint_t glyph, hb_font_t *font) const
+ {
+ unsigned int varidx = (this+advMap).map (glyph);
+ return (this+varStore).get_delta (varidx, font->coords, font->num_coords);
+ }
+
+ float get_side_bearing_var (hb_codepoint_t glyph,
+ const int *coords, unsigned int coord_count) const
+ {
+ if (!has_side_bearing_deltas ()) return 0.f;
+ unsigned int varidx = (this+lsbMap).map (glyph);
+ return (this+varStore).get_delta (varidx, coords, coord_count);
+ }
+
+ bool has_side_bearing_deltas () const { return lsbMap && rsbMap; }
+
+ protected:
+ FixedVersion<>version; /* Version of the metrics variation table
+ * initially set to 0x00010000u */
+ LOffsetTo<VariationStore>
+ varStore; /* Offset to item variation store table. */
+ LOffsetTo<DeltaSetIndexMap>
+ advMap; /* Offset to advance var-idx mapping. */
+ LOffsetTo<DeltaSetIndexMap>
+ lsbMap; /* Offset to lsb/tsb var-idx mapping. */
+ LOffsetTo<DeltaSetIndexMap>
+ rsbMap; /* Offset to rsb/bsb var-idx mapping. */
+
+ public:
+ DEFINE_SIZE_STATIC (20);
+};
+
+struct HVAR : HVARVVAR {
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_HVAR;
+ bool subset (hb_subset_context_t *c) const { return HVARVVAR::_subset<HVAR> (c); }
+};
+struct VVAR : HVARVVAR {
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_VVAR;
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (static_cast<const HVARVVAR *> (this)->sanitize (c) &&
+ vorgMap.sanitize (c, this));
+ }
+
+ void listup_index_maps (hb_vector_t<const DeltaSetIndexMap *> &index_maps) const
+ {
+ HVARVVAR::listup_index_maps (index_maps);
+ index_maps.push (&(this+vorgMap));
+ }
+
+ bool serialize_index_maps (hb_serialize_context_t *c,
+ const hb_array_t<index_map_subset_plan_t> &im_plans)
+ {
+ TRACE_SERIALIZE (this);
+ if (unlikely (!HVARVVAR::serialize_index_maps (c, im_plans)))
+ return_trace (false);
+ if (!im_plans[index_map_subset_plan_t::VORG_INDEX].get_map_count ())
+ vorgMap = 0;
+ else if (unlikely (!vorgMap.serialize (c, this).serialize (c, im_plans[index_map_subset_plan_t::VORG_INDEX])))
+ return_trace (false);
+
+ return_trace (true);
+ }
+
+ bool subset (hb_subset_context_t *c) const { return HVARVVAR::_subset<VVAR> (c); }
+
+ protected:
+ LOffsetTo<DeltaSetIndexMap>
+ vorgMap; /* Offset to vertical-origin var-idx mapping. */
+
+ public:
+ DEFINE_SIZE_STATIC (24);
+};
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_VAR_HVAR_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-var-mvar-table.hh b/thirdparty/harfbuzz/src/hb-ot-var-mvar-table.hh
new file mode 100644
index 0000000000..1b7fad9cec
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-var-mvar-table.hh
@@ -0,0 +1,119 @@
+/*
+ * Copyright © 2017 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_VAR_MVAR_TABLE_HH
+#define HB_OT_VAR_MVAR_TABLE_HH
+
+#include "hb-ot-layout-common.hh"
+
+
+namespace OT {
+
+
+struct VariationValueRecord
+{
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ public:
+ Tag valueTag; /* Four-byte tag identifying a font-wide measure. */
+ HBUINT32 varIdx; /* Outer/inner index into VariationStore item. */
+
+ public:
+ DEFINE_SIZE_STATIC (8);
+};
+
+
+/*
+ * MVAR -- Metrics Variations
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/mvar
+ */
+#define HB_OT_TAG_MVAR HB_TAG('M','V','A','R')
+
+struct MVAR
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_MVAR;
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (version.sanitize (c) &&
+ likely (version.major == 1) &&
+ c->check_struct (this) &&
+ valueRecordSize >= VariationValueRecord::static_size &&
+ varStore.sanitize (c, this) &&
+ c->check_range (valuesZ.arrayZ,
+ valueRecordCount,
+ valueRecordSize));
+ }
+
+ float get_var (hb_tag_t tag,
+ const int *coords, unsigned int coord_count) const
+ {
+ const VariationValueRecord *record;
+ record = (VariationValueRecord *) hb_bsearch (tag,
+ (const VariationValueRecord *)
+ (const HBUINT8 *) valuesZ,
+ valueRecordCount, valueRecordSize,
+ tag_compare);
+ if (!record)
+ return 0.;
+
+ return (this+varStore).get_delta (record->varIdx, coords, coord_count);
+ }
+
+protected:
+ static int tag_compare (const void *pa, const void *pb)
+ {
+ const hb_tag_t *a = (const hb_tag_t *) pa;
+ const Tag *b = (const Tag *) pb;
+ return b->cmp (*a);
+ }
+
+ protected:
+ FixedVersion<>version; /* Version of the metrics variation table
+ * initially set to 0x00010000u */
+ HBUINT16 reserved; /* Not used; set to 0. */
+ HBUINT16 valueRecordSize;/* The size in bytes of each value record —
+ * must be greater than zero. */
+ HBUINT16 valueRecordCount;/* The number of value records — may be zero. */
+ OffsetTo<VariationStore>
+ varStore; /* Offset to item variation store table. */
+ UnsizedArrayOf<HBUINT8>
+ valuesZ; /* Array of value records. The records must be
+ * in binary order of their valueTag field. */
+
+ public:
+ DEFINE_SIZE_ARRAY (12, valuesZ);
+};
+
+} /* namespace OT */
+
+
+#endif /* HB_OT_VAR_MVAR_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot-var.cc b/thirdparty/harfbuzz/src/hb-ot-var.cc
new file mode 100644
index 0000000000..6b8b09b6ba
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-var.cc
@@ -0,0 +1,220 @@
+/*
+ * Copyright © 2017 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_VAR
+
+#include "hb-ot-var.h"
+
+#include "hb-ot-var-avar-table.hh"
+#include "hb-ot-var-fvar-table.hh"
+#include "hb-ot-var-mvar-table.hh"
+
+
+/**
+ * SECTION:hb-ot-var
+ * @title: hb-ot-var
+ * @short_description: OpenType Font Variations
+ * @include: hb-ot.h
+ *
+ * Functions for fetching information about OpenType Variable Fonts.
+ **/
+
+
+/*
+ * fvar/avar
+ */
+
+
+/**
+ * hb_ot_var_has_data:
+ * @face: #hb_face_t to test
+ *
+ * This function allows to verify the presence of OpenType variation data on the face.
+ *
+ * Return value: true if face has a `fvar' table and false otherwise
+ *
+ * Since: 1.4.2
+ **/
+hb_bool_t
+hb_ot_var_has_data (hb_face_t *face)
+{
+ return face->table.fvar->has_data ();
+}
+
+/**
+ * hb_ot_var_get_axis_count:
+ *
+ * Since: 1.4.2
+ **/
+unsigned int
+hb_ot_var_get_axis_count (hb_face_t *face)
+{
+ return face->table.fvar->get_axis_count ();
+}
+
+#ifndef HB_DISABLE_DEPRECATED
+/**
+ * hb_ot_var_get_axes:
+ *
+ * Since: 1.4.2
+ * Deprecated: 2.2.0
+ **/
+unsigned int
+hb_ot_var_get_axes (hb_face_t *face,
+ unsigned int start_offset,
+ unsigned int *axes_count /* IN/OUT */,
+ hb_ot_var_axis_t *axes_array /* OUT */)
+{
+ return face->table.fvar->get_axes_deprecated (start_offset, axes_count, axes_array);
+}
+
+/**
+ * hb_ot_var_find_axis:
+ *
+ * Since: 1.4.2
+ * Deprecated: 2.2.0
+ **/
+hb_bool_t
+hb_ot_var_find_axis (hb_face_t *face,
+ hb_tag_t axis_tag,
+ unsigned int *axis_index,
+ hb_ot_var_axis_t *axis_info)
+{
+ return face->table.fvar->find_axis_deprecated (axis_tag, axis_index, axis_info);
+}
+#endif
+
+/**
+ * hb_ot_var_get_axis_infos:
+ *
+ * Since: 2.2.0
+ **/
+HB_EXTERN unsigned int
+hb_ot_var_get_axis_infos (hb_face_t *face,
+ unsigned int start_offset,
+ unsigned int *axes_count /* IN/OUT */,
+ hb_ot_var_axis_info_t *axes_array /* OUT */)
+{
+ return face->table.fvar->get_axis_infos (start_offset, axes_count, axes_array);
+}
+
+/**
+ * hb_ot_var_find_axis_info:
+ *
+ * Since: 2.2.0
+ **/
+HB_EXTERN hb_bool_t
+hb_ot_var_find_axis_info (hb_face_t *face,
+ hb_tag_t axis_tag,
+ hb_ot_var_axis_info_t *axis_info)
+{
+ return face->table.fvar->find_axis_info (axis_tag, axis_info);
+}
+
+
+/*
+ * Named instances.
+ */
+
+unsigned int
+hb_ot_var_get_named_instance_count (hb_face_t *face)
+{
+ return face->table.fvar->get_instance_count ();
+}
+
+hb_ot_name_id_t
+hb_ot_var_named_instance_get_subfamily_name_id (hb_face_t *face,
+ unsigned int instance_index)
+{
+ return face->table.fvar->get_instance_subfamily_name_id (instance_index);
+}
+
+hb_ot_name_id_t
+hb_ot_var_named_instance_get_postscript_name_id (hb_face_t *face,
+ unsigned int instance_index)
+{
+ return face->table.fvar->get_instance_postscript_name_id (instance_index);
+}
+
+unsigned int
+hb_ot_var_named_instance_get_design_coords (hb_face_t *face,
+ unsigned int instance_index,
+ unsigned int *coords_length, /* IN/OUT */
+ float *coords /* OUT */)
+{
+ return face->table.fvar->get_instance_coords (instance_index, coords_length, coords);
+}
+
+
+/**
+ * hb_ot_var_normalize_variations:
+ *
+ * Since: 1.4.2
+ **/
+void
+hb_ot_var_normalize_variations (hb_face_t *face,
+ const hb_variation_t *variations, /* IN */
+ unsigned int variations_length,
+ int *coords, /* OUT */
+ unsigned int coords_length)
+{
+ for (unsigned int i = 0; i < coords_length; i++)
+ coords[i] = 0;
+
+ const OT::fvar &fvar = *face->table.fvar;
+ for (unsigned int i = 0; i < variations_length; i++)
+ {
+ hb_ot_var_axis_info_t info;
+ if (hb_ot_var_find_axis_info (face, variations[i].tag, &info) &&
+ info.axis_index < coords_length)
+ coords[info.axis_index] = fvar.normalize_axis_value (info.axis_index, variations[i].value);
+ }
+
+ face->table.avar->map_coords (coords, coords_length);
+}
+
+/**
+ * hb_ot_var_normalize_coords:
+ *
+ * Since: 1.4.2
+ **/
+void
+hb_ot_var_normalize_coords (hb_face_t *face,
+ unsigned int coords_length,
+ const float *design_coords, /* IN */
+ int *normalized_coords /* OUT */)
+{
+ const OT::fvar &fvar = *face->table.fvar;
+ for (unsigned int i = 0; i < coords_length; i++)
+ normalized_coords[i] = fvar.normalize_axis_value (i, design_coords[i]);
+
+ face->table.avar->map_coords (normalized_coords, coords_length);
+}
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-ot-var.h b/thirdparty/harfbuzz/src/hb-ot-var.h
new file mode 100644
index 0000000000..df89bc5a23
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-var.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright © 2017 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_H_IN
+#error "Include <hb-ot.h> instead."
+#endif
+
+#ifndef HB_OT_VAR_H
+#define HB_OT_VAR_H
+
+#include "hb.h"
+
+HB_BEGIN_DECLS
+
+
+#define HB_OT_TAG_VAR_AXIS_ITALIC HB_TAG('i','t','a','l')
+#define HB_OT_TAG_VAR_AXIS_OPTICAL_SIZE HB_TAG('o','p','s','z')
+#define HB_OT_TAG_VAR_AXIS_SLANT HB_TAG('s','l','n','t')
+#define HB_OT_TAG_VAR_AXIS_WIDTH HB_TAG('w','d','t','h')
+#define HB_OT_TAG_VAR_AXIS_WEIGHT HB_TAG('w','g','h','t')
+
+
+/*
+ * fvar / avar
+ */
+
+HB_EXTERN hb_bool_t
+hb_ot_var_has_data (hb_face_t *face);
+
+
+/*
+ * Variation axes.
+ */
+
+
+HB_EXTERN unsigned int
+hb_ot_var_get_axis_count (hb_face_t *face);
+
+/**
+ * hb_ot_var_axis_flags_t:
+ * @HB_OT_VAR_AXIS_FLAG_HIDDEN: The axis should not be exposed directly in user interfaces.
+ *
+ * Since: 2.2.0
+ */
+typedef enum { /*< flags >*/
+ HB_OT_VAR_AXIS_FLAG_HIDDEN = 0x00000001u,
+
+ _HB_OT_VAR_AXIS_FLAG_MAX_VALUE= HB_TAG_MAX_SIGNED /*< skip >*/
+} hb_ot_var_axis_flags_t;
+
+/**
+ * hb_ot_var_axis_info_t:
+ *
+ * Since: 2.2.0
+ */
+typedef struct hb_ot_var_axis_info_t
+{
+ unsigned int axis_index;
+ hb_tag_t tag;
+ hb_ot_name_id_t name_id;
+ hb_ot_var_axis_flags_t flags;
+ float min_value;
+ float default_value;
+ float max_value;
+ /*< private >*/
+ unsigned int reserved;
+} hb_ot_var_axis_info_t;
+
+HB_EXTERN unsigned int
+hb_ot_var_get_axis_infos (hb_face_t *face,
+ unsigned int start_offset,
+ unsigned int *axes_count /* IN/OUT */,
+ hb_ot_var_axis_info_t *axes_array /* OUT */);
+
+HB_EXTERN hb_bool_t
+hb_ot_var_find_axis_info (hb_face_t *face,
+ hb_tag_t axis_tag,
+ hb_ot_var_axis_info_t *axis_info);
+
+
+/*
+ * Named instances.
+ */
+
+HB_EXTERN unsigned int
+hb_ot_var_get_named_instance_count (hb_face_t *face);
+
+HB_EXTERN hb_ot_name_id_t
+hb_ot_var_named_instance_get_subfamily_name_id (hb_face_t *face,
+ unsigned int instance_index);
+
+HB_EXTERN hb_ot_name_id_t
+hb_ot_var_named_instance_get_postscript_name_id (hb_face_t *face,
+ unsigned int instance_index);
+
+HB_EXTERN unsigned int
+hb_ot_var_named_instance_get_design_coords (hb_face_t *face,
+ unsigned int instance_index,
+ unsigned int *coords_length, /* IN/OUT */
+ float *coords /* OUT */);
+
+
+/*
+ * Conversions.
+ */
+
+HB_EXTERN void
+hb_ot_var_normalize_variations (hb_face_t *face,
+ const hb_variation_t *variations, /* IN */
+ unsigned int variations_length,
+ int *coords, /* OUT */
+ unsigned int coords_length);
+
+HB_EXTERN void
+hb_ot_var_normalize_coords (hb_face_t *face,
+ unsigned int coords_length,
+ const float *design_coords, /* IN */
+ int *normalized_coords /* OUT */);
+
+
+HB_END_DECLS
+
+#endif /* HB_OT_VAR_H */
diff --git a/thirdparty/harfbuzz/src/hb-ot-vorg-table.hh b/thirdparty/harfbuzz/src/hb-ot-vorg-table.hh
new file mode 100644
index 0000000000..c6803200f9
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot-vorg-table.hh
@@ -0,0 +1,136 @@
+/*
+ * Copyright © 2018 Adobe Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Adobe Author(s): Michiharu Ariza
+ */
+
+#ifndef HB_OT_VORG_TABLE_HH
+#define HB_OT_VORG_TABLE_HH
+
+#include "hb-open-type.hh"
+
+/*
+ * VORG -- Vertical Origin Table
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/vorg
+ */
+#define HB_OT_TAG_VORG HB_TAG('V','O','R','G')
+
+namespace OT {
+
+struct VertOriginMetric
+{
+ int cmp (hb_codepoint_t g) const { return glyph.cmp (g); }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this));
+ }
+
+ public:
+ HBGlyphID glyph;
+ FWORD vertOriginY;
+
+ public:
+ DEFINE_SIZE_STATIC (4);
+};
+
+struct VORG
+{
+ static constexpr hb_tag_t tableTag = HB_OT_TAG_VORG;
+
+ bool has_data () const { return version.to_int (); }
+
+ int get_y_origin (hb_codepoint_t glyph) const
+ {
+ unsigned int i;
+ if (!vertYOrigins.bfind (glyph, &i))
+ return defaultVertOriginY;
+ return vertYOrigins[i].vertOriginY;
+ }
+
+ template <typename Iterator,
+ hb_requires (hb_is_iterator (Iterator))>
+ void serialize (hb_serialize_context_t *c,
+ Iterator it,
+ FWORD defaultVertOriginY)
+ {
+
+ if (unlikely (!c->extend_min ((*this)))) return;
+
+ this->version.major = 1;
+ this->version.minor = 0;
+
+ this->defaultVertOriginY = defaultVertOriginY;
+ this->vertYOrigins.len = it.len ();
+
+ c->copy_all (it);
+ }
+
+ bool subset (hb_subset_context_t *c) const
+ {
+ TRACE_SUBSET (this);
+ VORG *vorg_prime = c->serializer->start_embed<VORG> ();
+ if (unlikely (!c->serializer->check_success (vorg_prime))) return_trace (false);
+
+ auto it =
+ + vertYOrigins.as_array ()
+ | hb_filter (c->plan->glyphset (), &VertOriginMetric::glyph)
+ | hb_map ([&] (const VertOriginMetric& _)
+ {
+ hb_codepoint_t new_glyph = HB_SET_VALUE_INVALID;
+ c->plan->new_gid_for_old_gid (_.glyph, &new_glyph);
+
+ VertOriginMetric metric;
+ metric.glyph = new_glyph;
+ metric.vertOriginY = _.vertOriginY;
+ return metric;
+ })
+ ;
+
+ /* serialize the new table */
+ vorg_prime->serialize (c->serializer, it, defaultVertOriginY);
+ return_trace (true);
+ }
+
+ bool sanitize (hb_sanitize_context_t *c) const
+ {
+ TRACE_SANITIZE (this);
+ return_trace (c->check_struct (this) &&
+ version.major == 1 &&
+ vertYOrigins.sanitize (c));
+ }
+
+ protected:
+ FixedVersion<>version; /* Version of VORG table. Set to 0x00010000u. */
+ FWORD defaultVertOriginY;
+ /* The default vertical origin. */
+ SortedArrayOf<VertOriginMetric>
+ vertYOrigins; /* The array of vertical origins. */
+
+ public:
+ DEFINE_SIZE_ARRAY(8, vertYOrigins);
+};
+} /* namespace OT */
+
+#endif /* HB_OT_VORG_TABLE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ot.h b/thirdparty/harfbuzz/src/hb-ot.h
new file mode 100644
index 0000000000..f2dbaa1b31
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ot.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_H
+#define HB_OT_H
+#define HB_OT_H_IN
+
+#include "hb.h"
+
+#include "hb-ot-color.h"
+#include "hb-ot-deprecated.h"
+#include "hb-ot-font.h"
+#include "hb-ot-layout.h"
+#include "hb-ot-math.h"
+#include "hb-ot-meta.h"
+#include "hb-ot-metrics.h"
+#include "hb-ot-name.h"
+#include "hb-ot-shape.h"
+#include "hb-ot-var.h"
+
+HB_BEGIN_DECLS
+
+HB_END_DECLS
+
+#undef HB_OT_H_IN
+#endif /* HB_OT_H */
diff --git a/thirdparty/harfbuzz/src/hb-pool.hh b/thirdparty/harfbuzz/src/hb-pool.hh
new file mode 100644
index 0000000000..dcf0faf2a9
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-pool.hh
@@ -0,0 +1,100 @@
+/*
+ * Copyright © 2019 Facebook, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Facebook Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_POOL_HH
+#define HB_POOL_HH
+
+#include "hb.hh"
+
+/* Memory pool for persistent allocation of small objects. */
+
+template <typename T, unsigned ChunkLen = 16>
+struct hb_pool_t
+{
+ hb_pool_t () : next (nullptr) {}
+ ~hb_pool_t () { fini (); }
+
+ void fini ()
+ {
+ next = nullptr;
+
+ for (chunk_t *_ : chunks) ::free (_);
+
+ chunks.fini ();
+ }
+
+ T* alloc ()
+ {
+ if (unlikely (!next))
+ {
+ if (unlikely (!chunks.alloc (chunks.length + 1))) return nullptr;
+ chunk_t *chunk = (chunk_t *) calloc (1, sizeof (chunk_t));
+ if (unlikely (!chunk)) return nullptr;
+ chunks.push (chunk);
+ next = chunk->thread ();
+ }
+
+ T* obj = next;
+ next = * ((T**) next);
+
+ memset (obj, 0, sizeof (T));
+
+ return obj;
+ }
+
+ void free (T* obj)
+ {
+ * (T**) obj = next;
+ next = obj;
+ }
+
+ private:
+
+ static_assert (ChunkLen > 1, "");
+ static_assert (sizeof (T) >= sizeof (void *), "");
+ static_assert (alignof (T) % alignof (void *) == 0, "");
+
+ struct chunk_t
+ {
+ T* thread ()
+ {
+ for (unsigned i = 0; i < ARRAY_LENGTH (arrayZ) - 1; i++)
+ * (T**) &arrayZ[i] = &arrayZ[i + 1];
+
+ * (T**) &arrayZ[ARRAY_LENGTH (arrayZ) - 1] = nullptr;
+
+ return arrayZ;
+ }
+
+ T arrayZ[ChunkLen];
+ };
+
+ T* next;
+ hb_vector_t<chunk_t *> chunks;
+};
+
+
+#endif /* HB_POOL_HH */
diff --git a/thirdparty/harfbuzz/src/hb-sanitize.hh b/thirdparty/harfbuzz/src/hb-sanitize.hh
new file mode 100644
index 0000000000..024b4d1c99
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-sanitize.hh
@@ -0,0 +1,412 @@
+/*
+ * Copyright © 2007,2008,2009,2010 Red Hat, Inc.
+ * Copyright © 2012,2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_SANITIZE_HH
+#define HB_SANITIZE_HH
+
+#include "hb.hh"
+#include "hb-blob.hh"
+#include "hb-dispatch.hh"
+
+
+/*
+ * Sanitize
+ *
+ *
+ * === Introduction ===
+ *
+ * The sanitize machinery is at the core of our zero-cost font loading. We
+ * mmap() font file into memory and create a blob out of it. Font subtables
+ * are returned as a readonly sub-blob of the main font blob. These table
+ * blobs are then sanitized before use, to ensure invalid memory access does
+ * not happen. The toplevel sanitize API use is like, eg. to load the 'head'
+ * table:
+ *
+ * hb_blob_t *head_blob = hb_sanitize_context_t ().reference_table<OT::head> (face);
+ *
+ * The blob then can be converted to a head table struct with:
+ *
+ * const head *head_table = head_blob->as<head> ();
+ *
+ * What the reference_table does is, to call hb_face_reference_table() to load
+ * the table blob, sanitize it and return either the sanitized blob, or empty
+ * blob if sanitization failed. The blob->as() function returns the null
+ * object of its template type argument if the blob is empty. Otherwise, it
+ * just casts the blob contents to the desired type.
+ *
+ * Sanitizing a blob of data with a type T works as follows (with minor
+ * simplification):
+ *
+ * - Cast blob content to T*, call sanitize() method of it,
+ * - If sanitize succeeded, return blob.
+ * - Otherwise, if blob is not writable, try making it writable,
+ * or copy if cannot be made writable in-place,
+ * - Call sanitize() again. Return blob if sanitize succeeded.
+ * - Return empty blob otherwise.
+ *
+ *
+ * === The sanitize() contract ===
+ *
+ * The sanitize() method of each object type shall return true if it's safe to
+ * call other methods of the object, and false otherwise.
+ *
+ * Note that what sanitize() checks for might align with what the specification
+ * describes as valid table data, but does not have to be. In particular, we
+ * do NOT want to be pedantic and concern ourselves with validity checks that
+ * are irrelevant to our use of the table. On the contrary, we want to be
+ * lenient with error handling and accept invalid data to the extent that it
+ * does not impose extra burden on us.
+ *
+ * Based on the sanitize contract, one can see that what we check for depends
+ * on how we use the data in other table methods. Ie. if other table methods
+ * assume that offsets do NOT point out of the table data block, then that's
+ * something sanitize() must check for (GSUB/GPOS/GDEF/etc work this way). On
+ * the other hand, if other methods do such checks themselves, then sanitize()
+ * does not have to bother with them (glyf/local work this way). The choice
+ * depends on the table structure and sanitize() performance. For example, to
+ * check glyf/loca offsets in sanitize() would cost O(num-glyphs). We try hard
+ * to avoid such costs during font loading. By postponing such checks to the
+ * actual glyph loading, we reduce the sanitize cost to O(1) and total runtime
+ * cost to O(used-glyphs). As such, this is preferred.
+ *
+ * The same argument can be made re GSUB/GPOS/GDEF, but there, the table
+ * structure is so complicated that by checking all offsets at sanitize() time,
+ * we make the code much simpler in other methods, as offsets and referenced
+ * objects do not need to be validated at each use site.
+ */
+
+/* This limits sanitizing time on really broken fonts. */
+#ifndef HB_SANITIZE_MAX_EDITS
+#define HB_SANITIZE_MAX_EDITS 32
+#endif
+#ifndef HB_SANITIZE_MAX_OPS_FACTOR
+#define HB_SANITIZE_MAX_OPS_FACTOR 8
+#endif
+#ifndef HB_SANITIZE_MAX_OPS_MIN
+#define HB_SANITIZE_MAX_OPS_MIN 16384
+#endif
+#ifndef HB_SANITIZE_MAX_OPS_MAX
+#define HB_SANITIZE_MAX_OPS_MAX 0x3FFFFFFF
+#endif
+#ifndef HB_SANITIZE_MAX_SUTABLES
+#define HB_SANITIZE_MAX_SUTABLES 0x4000
+#endif
+
+struct hb_sanitize_context_t :
+ hb_dispatch_context_t<hb_sanitize_context_t, bool, HB_DEBUG_SANITIZE>
+{
+ hb_sanitize_context_t () :
+ start (nullptr), end (nullptr),
+ max_ops (0), max_subtables (0),
+ writable (false), edit_count (0),
+ blob (nullptr),
+ num_glyphs (65536),
+ num_glyphs_set (false) {}
+
+ const char *get_name () { return "SANITIZE"; }
+ template <typename T, typename F>
+ bool may_dispatch (const T *obj HB_UNUSED, const F *format)
+ { return format->sanitize (this); }
+ static return_t default_return_value () { return true; }
+ static return_t no_dispatch_return_value () { return false; }
+ bool stop_sublookup_iteration (const return_t r) const { return !r; }
+
+ bool visit_subtables (unsigned count)
+ {
+ max_subtables += count;
+ return max_subtables < HB_SANITIZE_MAX_SUTABLES;
+ }
+
+ private:
+ template <typename T, typename ...Ts> auto
+ _dispatch (const T &obj, hb_priority<1>, Ts&&... ds) HB_AUTO_RETURN
+ ( obj.sanitize (this, hb_forward<Ts> (ds)...) )
+ template <typename T, typename ...Ts> auto
+ _dispatch (const T &obj, hb_priority<0>, Ts&&... ds) HB_AUTO_RETURN
+ ( obj.dispatch (this, hb_forward<Ts> (ds)...) )
+ public:
+ template <typename T, typename ...Ts> auto
+ dispatch (const T &obj, Ts&&... ds) HB_AUTO_RETURN
+ ( _dispatch (obj, hb_prioritize, hb_forward<Ts> (ds)...) )
+
+
+ void init (hb_blob_t *b)
+ {
+ this->blob = hb_blob_reference (b);
+ this->writable = false;
+ }
+
+ void set_num_glyphs (unsigned int num_glyphs_)
+ {
+ num_glyphs = num_glyphs_;
+ num_glyphs_set = true;
+ }
+ unsigned int get_num_glyphs () { return num_glyphs; }
+
+ void set_max_ops (int max_ops_) { max_ops = max_ops_; }
+
+ template <typename T>
+ void set_object (const T *obj)
+ {
+ reset_object ();
+
+ if (!obj) return;
+
+ const char *obj_start = (const char *) obj;
+ if (unlikely (obj_start < this->start || this->end <= obj_start))
+ this->start = this->end = nullptr;
+ else
+ {
+ this->start = obj_start;
+ this->end = obj_start + hb_min (size_t (this->end - obj_start), obj->get_size ());
+ }
+ }
+
+ void reset_object ()
+ {
+ this->start = this->blob->data;
+ this->end = this->start + this->blob->length;
+ assert (this->start <= this->end); /* Must not overflow. */
+ }
+
+ void start_processing ()
+ {
+ reset_object ();
+ if (unlikely (hb_unsigned_mul_overflows (this->end - this->start, HB_SANITIZE_MAX_OPS_FACTOR)))
+ this->max_ops = HB_SANITIZE_MAX_OPS_MAX;
+ else
+ this->max_ops = hb_clamp ((unsigned) (this->end - this->start) * HB_SANITIZE_MAX_OPS_FACTOR,
+ (unsigned) HB_SANITIZE_MAX_OPS_MIN,
+ (unsigned) HB_SANITIZE_MAX_OPS_MAX);
+ this->edit_count = 0;
+ this->debug_depth = 0;
+
+ DEBUG_MSG_LEVEL (SANITIZE, start, 0, +1,
+ "start [%p..%p] (%lu bytes)",
+ this->start, this->end,
+ (unsigned long) (this->end - this->start));
+ }
+
+ void end_processing ()
+ {
+ DEBUG_MSG_LEVEL (SANITIZE, this->start, 0, -1,
+ "end [%p..%p] %u edit requests",
+ this->start, this->end, this->edit_count);
+
+ hb_blob_destroy (this->blob);
+ this->blob = nullptr;
+ this->start = this->end = nullptr;
+ }
+
+ unsigned get_edit_count () { return edit_count; }
+
+ bool check_range (const void *base,
+ unsigned int len) const
+ {
+ const char *p = (const char *) base;
+ bool ok = !len ||
+ (this->start <= p &&
+ p <= this->end &&
+ (unsigned int) (this->end - p) >= len &&
+ this->max_ops-- > 0);
+
+ DEBUG_MSG_LEVEL (SANITIZE, p, this->debug_depth+1, 0,
+ "check_range [%p..%p]"
+ " (%d bytes) in [%p..%p] -> %s",
+ p, p + len, len,
+ this->start, this->end,
+ ok ? "OK" : "OUT-OF-RANGE");
+
+ return likely (ok);
+ }
+
+ template <typename T>
+ bool check_range (const T *base,
+ unsigned int a,
+ unsigned int b) const
+ {
+ return !hb_unsigned_mul_overflows (a, b) &&
+ this->check_range (base, a * b);
+ }
+
+ template <typename T>
+ bool check_range (const T *base,
+ unsigned int a,
+ unsigned int b,
+ unsigned int c) const
+ {
+ return !hb_unsigned_mul_overflows (a, b) &&
+ this->check_range (base, a * b, c);
+ }
+
+ template <typename T>
+ bool check_array (const T *base, unsigned int len) const
+ {
+ return this->check_range (base, len, hb_static_size (T));
+ }
+
+ template <typename T>
+ bool check_array (const T *base,
+ unsigned int a,
+ unsigned int b) const
+ {
+ return this->check_range (base, a, b, hb_static_size (T));
+ }
+
+ template <typename Type>
+ bool check_struct (const Type *obj) const
+ { return likely (this->check_range (obj, obj->min_size)); }
+
+ bool may_edit (const void *base, unsigned int len)
+ {
+ if (this->edit_count >= HB_SANITIZE_MAX_EDITS)
+ return false;
+
+ const char *p = (const char *) base;
+ this->edit_count++;
+
+ DEBUG_MSG_LEVEL (SANITIZE, p, this->debug_depth+1, 0,
+ "may_edit(%u) [%p..%p] (%d bytes) in [%p..%p] -> %s",
+ this->edit_count,
+ p, p + len, len,
+ this->start, this->end,
+ this->writable ? "GRANTED" : "DENIED");
+
+ return this->writable;
+ }
+
+ template <typename Type, typename ValueType>
+ bool try_set (const Type *obj, const ValueType &v)
+ {
+ if (this->may_edit (obj, hb_static_size (Type)))
+ {
+ * const_cast<Type *> (obj) = v;
+ return true;
+ }
+ return false;
+ }
+
+ template <typename Type>
+ hb_blob_t *sanitize_blob (hb_blob_t *blob)
+ {
+ bool sane;
+
+ init (blob);
+
+ retry:
+ DEBUG_MSG_FUNC (SANITIZE, start, "start");
+
+ start_processing ();
+
+ if (unlikely (!start))
+ {
+ end_processing ();
+ return blob;
+ }
+
+ Type *t = reinterpret_cast<Type *> (const_cast<char *> (start));
+
+ sane = t->sanitize (this);
+ if (sane)
+ {
+ if (edit_count)
+ {
+ DEBUG_MSG_FUNC (SANITIZE, start, "passed first round with %d edits; going for second round", edit_count);
+
+ /* sanitize again to ensure no toe-stepping */
+ edit_count = 0;
+ sane = t->sanitize (this);
+ if (edit_count) {
+ DEBUG_MSG_FUNC (SANITIZE, start, "requested %d edits in second round; FAILLING", edit_count);
+ sane = false;
+ }
+ }
+ }
+ else
+ {
+ if (edit_count && !writable) {
+ start = hb_blob_get_data_writable (blob, nullptr);
+ end = start + blob->length;
+
+ if (start)
+ {
+ writable = true;
+ /* ok, we made it writable by relocating. try again */
+ DEBUG_MSG_FUNC (SANITIZE, start, "retry");
+ goto retry;
+ }
+ }
+ }
+
+ end_processing ();
+
+ DEBUG_MSG_FUNC (SANITIZE, start, sane ? "PASSED" : "FAILED");
+ if (sane)
+ {
+ hb_blob_make_immutable (blob);
+ return blob;
+ }
+ else
+ {
+ hb_blob_destroy (blob);
+ return hb_blob_get_empty ();
+ }
+ }
+
+ template <typename Type>
+ hb_blob_t *reference_table (const hb_face_t *face, hb_tag_t tableTag = Type::tableTag)
+ {
+ if (!num_glyphs_set)
+ set_num_glyphs (hb_face_get_glyph_count (face));
+ return sanitize_blob<Type> (hb_face_reference_table (face, tableTag));
+ }
+
+ const char *start, *end;
+ mutable int max_ops, max_subtables;
+ private:
+ bool writable;
+ unsigned int edit_count;
+ hb_blob_t *blob;
+ unsigned int num_glyphs;
+ bool num_glyphs_set;
+};
+
+struct hb_sanitize_with_object_t
+{
+ template <typename T>
+ hb_sanitize_with_object_t (hb_sanitize_context_t *c, const T& obj) : c (c)
+ { c->set_object (obj); }
+ ~hb_sanitize_with_object_t ()
+ { c->reset_object (); }
+
+ private:
+ hb_sanitize_context_t *c;
+};
+
+
+#endif /* HB_SANITIZE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-serialize.hh b/thirdparty/harfbuzz/src/hb-serialize.hh
new file mode 100644
index 0000000000..4566153a59
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-serialize.hh
@@ -0,0 +1,553 @@
+/*
+ * Copyright © 2007,2008,2009,2010 Red Hat, Inc.
+ * Copyright © 2012,2018 Google, Inc.
+ * Copyright © 2019 Facebook, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ * Facebook Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_SERIALIZE_HH
+#define HB_SERIALIZE_HH
+
+#include "hb.hh"
+#include "hb-blob.hh"
+#include "hb-map.hh"
+#include "hb-pool.hh"
+
+
+/*
+ * Serialize
+ */
+
+struct hb_serialize_context_t
+{
+ typedef unsigned objidx_t;
+
+ enum whence_t {
+ Head, /* Relative to the current object head (default). */
+ Tail, /* Relative to the current object tail after packed. */
+ Absolute /* Absolute: from the start of the serialize buffer. */
+ };
+
+ struct object_t
+ {
+ void fini () { links.fini (); }
+
+ bool operator == (const object_t &o) const
+ {
+ return (tail - head == o.tail - o.head)
+ && (links.length == o.links.length)
+ && 0 == hb_memcmp (head, o.head, tail - head)
+ && links.as_bytes () == o.links.as_bytes ();
+ }
+ uint32_t hash () const
+ {
+ return hb_bytes_t (head, tail - head).hash () ^
+ links.as_bytes ().hash ();
+ }
+
+ struct link_t
+ {
+ bool is_wide: 1;
+ bool is_signed: 1;
+ unsigned whence: 2;
+ unsigned position: 28;
+ unsigned bias;
+ objidx_t objidx;
+ };
+
+ char *head;
+ char *tail;
+ hb_vector_t<link_t> links;
+ object_t *next;
+ };
+
+ struct snapshot_t
+ {
+ char *head;
+ char *tail;
+ object_t *current; // Just for sanity check
+ unsigned num_links;
+ };
+
+ snapshot_t snapshot ()
+ { return snapshot_t { head, tail, current, current->links.length }; }
+
+ hb_serialize_context_t (void *start_, unsigned int size) :
+ start ((char *) start_),
+ end (start + size),
+ current (nullptr)
+ { reset (); }
+ ~hb_serialize_context_t () { fini (); }
+
+ void fini ()
+ {
+ for (object_t *_ : ++hb_iter (packed)) _->fini ();
+ packed.fini ();
+ this->packed_map.fini ();
+
+ while (current)
+ {
+ auto *_ = current;
+ current = current->next;
+ _->fini ();
+ }
+ object_pool.fini ();
+ }
+
+ bool in_error () const { return !this->successful; }
+
+ void reset ()
+ {
+ this->successful = true;
+ this->ran_out_of_room = false;
+ this->head = this->start;
+ this->tail = this->end;
+ this->debug_depth = 0;
+
+ fini ();
+ this->packed.push (nullptr);
+ }
+
+ bool check_success (bool success)
+ { return this->successful && (success || (err_other_error (), false)); }
+
+ template <typename T1, typename T2>
+ bool check_equal (T1 &&v1, T2 &&v2)
+ { return check_success ((long long) v1 == (long long) v2); }
+
+ template <typename T1, typename T2>
+ bool check_assign (T1 &v1, T2 &&v2)
+ { return check_equal (v1 = v2, v2); }
+
+ template <typename T> bool propagate_error (T &&obj)
+ { return check_success (!hb_deref (obj).in_error ()); }
+
+ template <typename T1, typename... Ts> bool propagate_error (T1 &&o1, Ts&&... os)
+ { return propagate_error (hb_forward<T1> (o1)) &&
+ propagate_error (hb_forward<Ts> (os)...); }
+
+ /* To be called around main operation. */
+ template <typename Type>
+ Type *start_serialize ()
+ {
+ DEBUG_MSG_LEVEL (SERIALIZE, this->start, 0, +1,
+ "start [%p..%p] (%lu bytes)",
+ this->start, this->end,
+ (unsigned long) (this->end - this->start));
+
+ assert (!current);
+ return push<Type> ();
+ }
+ void end_serialize ()
+ {
+ DEBUG_MSG_LEVEL (SERIALIZE, this->start, 0, -1,
+ "end [%p..%p] serialized %u bytes; %s",
+ this->start, this->end,
+ (unsigned) (this->head - this->start),
+ this->successful ? "successful" : "UNSUCCESSFUL");
+
+ propagate_error (packed, packed_map);
+
+ if (unlikely (!current)) return;
+ if (unlikely (in_error())) return;
+
+ assert (!current->next);
+
+ /* Only "pack" if there exist other objects... Otherwise, don't bother.
+ * Saves a move. */
+ if (packed.length <= 1)
+ return;
+
+ pop_pack (false);
+
+ resolve_links ();
+ }
+
+ template <typename Type = void>
+ Type *push ()
+ {
+ if (unlikely (in_error ())) return start_embed<Type> ();
+
+ object_t *obj = object_pool.alloc ();
+ if (unlikely (!obj))
+ check_success (false);
+ else
+ {
+ obj->head = head;
+ obj->tail = tail;
+ obj->next = current;
+ current = obj;
+ }
+ return start_embed<Type> ();
+ }
+ void pop_discard ()
+ {
+ object_t *obj = current;
+ if (unlikely (!obj)) return;
+ if (unlikely (in_error())) return;
+
+ current = current->next;
+ revert (obj->head, obj->tail);
+ obj->fini ();
+ object_pool.free (obj);
+ }
+
+ /* Set share to false when an object is unlikely sharable with others
+ * so not worth an attempt, or a contiguous table is serialized as
+ * multiple consecutive objects in the reverse order so can't be shared.
+ */
+ objidx_t pop_pack (bool share=true)
+ {
+ object_t *obj = current;
+ if (unlikely (!obj)) return 0;
+ if (unlikely (in_error())) return 0;
+
+ current = current->next;
+ obj->tail = head;
+ obj->next = nullptr;
+ unsigned len = obj->tail - obj->head;
+ head = obj->head; /* Rewind head. */
+
+ if (!len)
+ {
+ assert (!obj->links.length);
+ return 0;
+ }
+
+ objidx_t objidx;
+ if (share)
+ {
+ objidx = packed_map.get (obj);
+ if (objidx)
+ {
+ obj->fini ();
+ return objidx;
+ }
+ }
+
+ tail -= len;
+ memmove (tail, obj->head, len);
+
+ obj->head = tail;
+ obj->tail = tail + len;
+
+ packed.push (obj);
+
+ if (unlikely (packed.in_error ())) {
+ // obj wasn't successfully added to packed, so clean it up otherwise it's
+ // links will be leaked.
+ propagate_error (packed);
+ obj->fini ();
+ return 0;
+ }
+
+ objidx = packed.length - 1;
+
+ if (share) packed_map.set (obj, objidx);
+ propagate_error (packed_map);
+
+ return objidx;
+ }
+
+ void revert (snapshot_t snap)
+ {
+ if (unlikely (in_error ())) return;
+ assert (snap.current == current);
+ current->links.shrink (snap.num_links);
+ revert (snap.head, snap.tail);
+ }
+
+ void revert (char *snap_head,
+ char *snap_tail)
+ {
+ if (unlikely (in_error ())) return;
+ assert (snap_head <= head);
+ assert (tail <= snap_tail);
+ head = snap_head;
+ tail = snap_tail;
+ discard_stale_objects ();
+ }
+
+ void discard_stale_objects ()
+ {
+ if (unlikely (in_error ())) return;
+ while (packed.length > 1 &&
+ packed.tail ()->head < tail)
+ {
+ packed_map.del (packed.tail ());
+ assert (!packed.tail ()->next);
+ packed.tail ()->fini ();
+ packed.pop ();
+ }
+ if (packed.length > 1)
+ assert (packed.tail ()->head == tail);
+ }
+
+ template <typename T>
+ void add_link (T &ofs, objidx_t objidx,
+ whence_t whence = Head,
+ unsigned bias = 0)
+ {
+ static_assert (sizeof (T) == 2 || sizeof (T) == 4, "");
+ if (unlikely (in_error ())) return;
+
+ if (!objidx)
+ return;
+
+ assert (current);
+ assert (current->head <= (const char *) &ofs);
+
+ auto& link = *current->links.push ();
+
+ link.is_wide = sizeof (T) == 4;
+ link.is_signed = hb_is_signed (hb_unwrap_type (T));
+ link.whence = (unsigned) whence;
+ link.position = (const char *) &ofs - current->head;
+ link.bias = bias;
+ link.objidx = objidx;
+ }
+
+ unsigned to_bias (const void *base) const
+ {
+ if (unlikely (in_error ())) return 0;
+ if (!base) return 0;
+ assert (current);
+ assert (current->head <= (const char *) base);
+ return (const char *) base - current->head;
+ }
+
+ void resolve_links ()
+ {
+ if (unlikely (in_error ())) return;
+
+ assert (!current);
+ assert (packed.length > 1);
+
+ for (const object_t* parent : ++hb_iter (packed))
+ for (const object_t::link_t &link : parent->links)
+ {
+ const object_t* child = packed[link.objidx];
+ if (unlikely (!child)) { err_other_error(); return; }
+ unsigned offset = 0;
+ switch ((whence_t) link.whence) {
+ case Head: offset = child->head - parent->head; break;
+ case Tail: offset = child->head - parent->tail; break;
+ case Absolute: offset = (head - start) + (child->head - tail); break;
+ }
+
+ assert (offset >= link.bias);
+ offset -= link.bias;
+ if (link.is_signed)
+ {
+ if (link.is_wide)
+ assign_offset<int32_t> (parent, link, offset);
+ else
+ assign_offset<int16_t> (parent, link, offset);
+ }
+ else
+ {
+ if (link.is_wide)
+ assign_offset<uint32_t> (parent, link, offset);
+ else
+ assign_offset<uint16_t> (parent, link, offset);
+ }
+ }
+ }
+
+ unsigned int length () const
+ {
+ if (unlikely (!current)) return 0;
+ return this->head - current->head;
+ }
+
+ void align (unsigned int alignment)
+ {
+ unsigned int l = length () % alignment;
+ if (l)
+ allocate_size<void> (alignment - l);
+ }
+
+ template <typename Type = void>
+ Type *start_embed (const Type *obj HB_UNUSED = nullptr) const
+ { return reinterpret_cast<Type *> (this->head); }
+ template <typename Type>
+ Type *start_embed (const Type &obj) const
+ { return start_embed (hb_addressof (obj)); }
+
+ /* Following two functions exist to allow setting breakpoint on. */
+ void err_ran_out_of_room () { this->ran_out_of_room = true; }
+ void err_other_error () { this->successful = false; }
+
+ template <typename Type>
+ Type *allocate_size (unsigned int size)
+ {
+ if (unlikely (!this->successful)) return nullptr;
+
+ if (this->tail - this->head < ptrdiff_t (size))
+ {
+ err_ran_out_of_room ();
+ this->successful = false;
+ return nullptr;
+ }
+ memset (this->head, 0, size);
+ char *ret = this->head;
+ this->head += size;
+ return reinterpret_cast<Type *> (ret);
+ }
+
+ template <typename Type>
+ Type *allocate_min ()
+ { return this->allocate_size<Type> (Type::min_size); }
+
+ template <typename Type>
+ Type *embed (const Type *obj)
+ {
+ unsigned int size = obj->get_size ();
+ Type *ret = this->allocate_size<Type> (size);
+ if (unlikely (!ret)) return nullptr;
+ memcpy (ret, obj, size);
+ return ret;
+ }
+ template <typename Type>
+ Type *embed (const Type &obj)
+ { return embed (hb_addressof (obj)); }
+
+ template <typename Type, typename ...Ts> auto
+ _copy (const Type &src, hb_priority<1>, Ts&&... ds) HB_RETURN
+ (Type *, src.copy (this, hb_forward<Ts> (ds)...))
+
+ template <typename Type> auto
+ _copy (const Type &src, hb_priority<0>) -> decltype (&(hb_declval<Type> () = src))
+ {
+ Type *ret = this->allocate_size<Type> (sizeof (Type));
+ if (unlikely (!ret)) return nullptr;
+ *ret = src;
+ return ret;
+ }
+
+ /* Like embed, but active: calls obj.operator=() or obj.copy() to transfer data
+ * instead of memcpy(). */
+ template <typename Type, typename ...Ts>
+ Type *copy (const Type &src, Ts&&... ds)
+ { return _copy (src, hb_prioritize, hb_forward<Ts> (ds)...); }
+ template <typename Type, typename ...Ts>
+ Type *copy (const Type *src, Ts&&... ds)
+ { return copy (*src, hb_forward<Ts> (ds)...); }
+
+ template<typename Iterator,
+ hb_requires (hb_is_iterator (Iterator)),
+ typename ...Ts>
+ void copy_all (Iterator it, Ts&&... ds)
+ { for (decltype (*it) _ : it) copy (_, hb_forward<Ts> (ds)...); }
+
+ template <typename Type>
+ hb_serialize_context_t& operator << (const Type &obj) & { embed (obj); return *this; }
+
+ template <typename Type>
+ Type *extend_size (Type *obj, unsigned int size)
+ {
+ if (unlikely (in_error ())) return nullptr;
+
+ assert (this->start <= (char *) obj);
+ assert ((char *) obj <= this->head);
+ assert ((char *) obj + size >= this->head);
+ if (unlikely (!this->allocate_size<Type> (((char *) obj) + size - this->head))) return nullptr;
+ return reinterpret_cast<Type *> (obj);
+ }
+ template <typename Type>
+ Type *extend_size (Type &obj, unsigned int size)
+ { return extend_size (hb_addressof (obj), size); }
+
+ template <typename Type>
+ Type *extend_min (Type *obj) { return extend_size (obj, obj->min_size); }
+ template <typename Type>
+ Type *extend_min (Type &obj) { return extend_min (hb_addressof (obj)); }
+
+ template <typename Type, typename ...Ts>
+ Type *extend (Type *obj, Ts&&... ds)
+ { return extend_size (obj, obj->get_size (hb_forward<Ts> (ds)...)); }
+ template <typename Type, typename ...Ts>
+ Type *extend (Type &obj, Ts&&... ds)
+ { return extend (hb_addressof (obj), hb_forward<Ts> (ds)...); }
+
+ /* Output routines. */
+ hb_bytes_t copy_bytes () const
+ {
+ assert (this->successful);
+ /* Copy both items from head side and tail side... */
+ unsigned int len = (this->head - this->start)
+ + (this->end - this->tail);
+
+ char *p = (char *) malloc (len);
+ if (unlikely (!p)) return hb_bytes_t ();
+
+ memcpy (p, this->start, this->head - this->start);
+ memcpy (p + (this->head - this->start), this->tail, this->end - this->tail);
+ return hb_bytes_t (p, len);
+ }
+ template <typename Type>
+ Type *copy () const
+ { return reinterpret_cast<Type *> ((char *) copy_bytes ().arrayZ); }
+ hb_blob_t *copy_blob () const
+ {
+ hb_bytes_t b = copy_bytes ();
+ return hb_blob_create (b.arrayZ, b.length,
+ HB_MEMORY_MODE_WRITABLE,
+ (char *) b.arrayZ, free);
+ }
+
+ private:
+ template <typename T>
+ void assign_offset (const object_t* parent, const object_t::link_t &link, unsigned offset)
+ {
+ auto &off = * ((BEInt<T, sizeof (T)> *) (parent->head + link.position));
+ assert (0 == off);
+ check_assign (off, offset);
+ }
+
+ public: /* TODO Make private. */
+ char *start, *head, *tail, *end;
+ unsigned int debug_depth;
+ bool successful;
+ bool ran_out_of_room;
+
+ private:
+
+ /* Object memory pool. */
+ hb_pool_t<object_t> object_pool;
+
+ /* Stack of currently under construction objects. */
+ object_t *current;
+
+ /* Stack of packed objects. Object 0 is always nil object. */
+ hb_vector_t<object_t *> packed;
+
+ /* Map view of packed objects. */
+ hb_hashmap_t<const object_t *, objidx_t, nullptr, 0> packed_map;
+};
+
+
+#endif /* HB_SERIALIZE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-set-digest.hh b/thirdparty/harfbuzz/src/hb-set-digest.hh
new file mode 100644
index 0000000000..b97526f775
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-set-digest.hh
@@ -0,0 +1,174 @@
+/*
+ * Copyright © 2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_SET_DIGEST_HH
+#define HB_SET_DIGEST_HH
+
+#include "hb.hh"
+
+/*
+ * The set digests here implement various "filters" that support
+ * "approximate member query". Conceptually these are like Bloom
+ * Filter and Quotient Filter, however, much smaller, faster, and
+ * designed to fit the requirements of our uses for glyph coverage
+ * queries.
+ *
+ * Our filters are highly accurate if the lookup covers fairly local
+ * set of glyphs, but fully flooded and ineffective if coverage is
+ * all over the place.
+ *
+ * The frozen-set can be used instead of a digest, to trade more
+ * memory for 100% accuracy, but in practice, that doesn't look like
+ * an attractive trade-off.
+ */
+
+template <typename mask_t, unsigned int shift>
+struct hb_set_digest_lowest_bits_t
+{
+ static constexpr unsigned mask_bytes = sizeof (mask_t);
+ static constexpr unsigned mask_bits = sizeof (mask_t) * 8;
+ static constexpr unsigned num_bits = 0
+ + (mask_bytes >= 1 ? 3 : 0)
+ + (mask_bytes >= 2 ? 1 : 0)
+ + (mask_bytes >= 4 ? 1 : 0)
+ + (mask_bytes >= 8 ? 1 : 0)
+ + (mask_bytes >= 16? 1 : 0)
+ + 0;
+
+ static_assert ((shift < sizeof (hb_codepoint_t) * 8), "");
+ static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), "");
+
+ void init () { mask = 0; }
+
+ void add (hb_codepoint_t g) { mask |= mask_for (g); }
+
+ bool add_range (hb_codepoint_t a, hb_codepoint_t b)
+ {
+ if ((b >> shift) - (a >> shift) >= mask_bits - 1)
+ mask = (mask_t) -1;
+ else {
+ mask_t ma = mask_for (a);
+ mask_t mb = mask_for (b);
+ mask |= mb + (mb - ma) - (mb < ma);
+ }
+ return true;
+ }
+
+ template <typename T>
+ void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
+ {
+ for (unsigned int i = 0; i < count; i++)
+ {
+ add (*array);
+ array = (const T *) (stride + (const char *) array);
+ }
+ }
+ template <typename T>
+ bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
+ {
+ for (unsigned int i = 0; i < count; i++)
+ {
+ add (*array);
+ array = (const T *) (stride + (const char *) array);
+ }
+ return true;
+ }
+
+ bool may_have (hb_codepoint_t g) const
+ { return !!(mask & mask_for (g)); }
+
+ private:
+
+ static mask_t mask_for (hb_codepoint_t g)
+ { return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); }
+ mask_t mask;
+};
+
+template <typename head_t, typename tail_t>
+struct hb_set_digest_combiner_t
+{
+ void init ()
+ {
+ head.init ();
+ tail.init ();
+ }
+
+ void add (hb_codepoint_t g)
+ {
+ head.add (g);
+ tail.add (g);
+ }
+
+ bool add_range (hb_codepoint_t a, hb_codepoint_t b)
+ {
+ head.add_range (a, b);
+ tail.add_range (a, b);
+ return true;
+ }
+ template <typename T>
+ void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
+ {
+ head.add_array (array, count, stride);
+ tail.add_array (array, count, stride);
+ }
+ template <typename T>
+ bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
+ {
+ head.add_sorted_array (array, count, stride);
+ tail.add_sorted_array (array, count, stride);
+ return true;
+ }
+
+ bool may_have (hb_codepoint_t g) const
+ {
+ return head.may_have (g) && tail.may_have (g);
+ }
+
+ private:
+ head_t head;
+ tail_t tail;
+};
+
+
+/*
+ * hb_set_digest_t
+ *
+ * This is a combination of digests that performs "best".
+ * There is not much science to this: it's a result of intuition
+ * and testing.
+ */
+typedef hb_set_digest_combiner_t
+<
+ hb_set_digest_lowest_bits_t<unsigned long, 4>,
+ hb_set_digest_combiner_t
+ <
+ hb_set_digest_lowest_bits_t<unsigned long, 0>,
+ hb_set_digest_lowest_bits_t<unsigned long, 9>
+ >
+> hb_set_digest_t;
+
+
+#endif /* HB_SET_DIGEST_HH */
diff --git a/thirdparty/harfbuzz/src/hb-set.cc b/thirdparty/harfbuzz/src/hb-set.cc
new file mode 100644
index 0000000000..0551ed80f2
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-set.cc
@@ -0,0 +1,541 @@
+/*
+ * Copyright © 2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb-set.hh"
+
+
+/**
+ * SECTION:hb-set
+ * @title: hb-set
+ * @short_description: Object representing a set of integers
+ * @include: hb.h
+ *
+ * Set objects represent a mathematical set of integer values. They are
+ * used in non-shaping API to query certain set of characters or glyphs,
+ * or other integer values.
+ **/
+
+
+/**
+ * hb_set_create: (Xconstructor)
+ *
+ * Return value: (transfer full):
+ *
+ * Since: 0.9.2
+ **/
+hb_set_t *
+hb_set_create ()
+{
+ hb_set_t *set;
+
+ if (!(set = hb_object_create<hb_set_t> ()))
+ return hb_set_get_empty ();
+
+ set->init_shallow ();
+
+ return set;
+}
+
+/**
+ * hb_set_get_empty:
+ *
+ * Return value: (transfer full):
+ *
+ * Since: 0.9.2
+ **/
+hb_set_t *
+hb_set_get_empty ()
+{
+ return const_cast<hb_set_t *> (&Null (hb_set_t));
+}
+
+/**
+ * hb_set_reference: (skip)
+ * @set: a set.
+ *
+ * Return value: (transfer full):
+ *
+ * Since: 0.9.2
+ **/
+hb_set_t *
+hb_set_reference (hb_set_t *set)
+{
+ return hb_object_reference (set);
+}
+
+/**
+ * hb_set_destroy: (skip)
+ * @set: a set.
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_set_destroy (hb_set_t *set)
+{
+ if (!hb_object_destroy (set)) return;
+
+ set->fini_shallow ();
+
+ free (set);
+}
+
+/**
+ * hb_set_set_user_data: (skip)
+ * @set: a set.
+ * @key:
+ * @data:
+ * @destroy:
+ * @replace:
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_set_set_user_data (hb_set_t *set,
+ hb_user_data_key_t *key,
+ void * data,
+ hb_destroy_func_t destroy,
+ hb_bool_t replace)
+{
+ return hb_object_set_user_data (set, key, data, destroy, replace);
+}
+
+/**
+ * hb_set_get_user_data: (skip)
+ * @set: a set.
+ * @key:
+ *
+ * Return value: (transfer none):
+ *
+ * Since: 0.9.2
+ **/
+void *
+hb_set_get_user_data (hb_set_t *set,
+ hb_user_data_key_t *key)
+{
+ return hb_object_get_user_data (set, key);
+}
+
+
+/**
+ * hb_set_allocation_successful:
+ * @set: a set.
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_set_allocation_successful (const hb_set_t *set)
+{
+ return set->successful;
+}
+
+/**
+ * hb_set_clear:
+ * @set: a set.
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_set_clear (hb_set_t *set)
+{
+ set->clear ();
+}
+
+/**
+ * hb_set_is_empty:
+ * @set: a set.
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.7
+ **/
+hb_bool_t
+hb_set_is_empty (const hb_set_t *set)
+{
+ return set->is_empty ();
+}
+
+/**
+ * hb_set_has:
+ * @set: a set.
+ * @codepoint:
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_set_has (const hb_set_t *set,
+ hb_codepoint_t codepoint)
+{
+ return set->has (codepoint);
+}
+
+/**
+ * hb_set_add:
+ * @set: a set.
+ * @codepoint:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_set_add (hb_set_t *set,
+ hb_codepoint_t codepoint)
+{
+ set->add (codepoint);
+}
+
+/**
+ * hb_set_add_range:
+ * @set: a set.
+ * @first:
+ * @last:
+ *
+ *
+ *
+ * Since: 0.9.7
+ **/
+void
+hb_set_add_range (hb_set_t *set,
+ hb_codepoint_t first,
+ hb_codepoint_t last)
+{
+ set->add_range (first, last);
+}
+
+/**
+ * hb_set_del:
+ * @set: a set.
+ * @codepoint:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_set_del (hb_set_t *set,
+ hb_codepoint_t codepoint)
+{
+ set->del (codepoint);
+}
+
+/**
+ * hb_set_del_range:
+ * @set: a set.
+ * @first:
+ * @last:
+ *
+ *
+ *
+ * Since: 0.9.7
+ **/
+void
+hb_set_del_range (hb_set_t *set,
+ hb_codepoint_t first,
+ hb_codepoint_t last)
+{
+ set->del_range (first, last);
+}
+
+/**
+ * hb_set_is_equal:
+ * @set: a set.
+ * @other: other set.
+ *
+ *
+ *
+ * Return value: %TRUE if the two sets are equal, %FALSE otherwise.
+ *
+ * Since: 0.9.7
+ **/
+hb_bool_t
+hb_set_is_equal (const hb_set_t *set,
+ const hb_set_t *other)
+{
+ return set->is_equal (other);
+}
+
+/**
+ * hb_set_is_subset:
+ * @set: a set.
+ * @larger_set: other set.
+ *
+ *
+ *
+ * Return value: %TRUE if the @set is a subset of (or equal to) @larger_set, %FALSE otherwise.
+ *
+ * Since: 1.8.1
+ **/
+hb_bool_t
+hb_set_is_subset (const hb_set_t *set,
+ const hb_set_t *larger_set)
+{
+ return set->is_subset (larger_set);
+}
+
+/**
+ * hb_set_set:
+ * @set: a set.
+ * @other:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_set_set (hb_set_t *set,
+ const hb_set_t *other)
+{
+ set->set (other);
+}
+
+/**
+ * hb_set_union:
+ * @set: a set.
+ * @other:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_set_union (hb_set_t *set,
+ const hb_set_t *other)
+{
+ set->union_ (other);
+}
+
+/**
+ * hb_set_intersect:
+ * @set: a set.
+ * @other:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_set_intersect (hb_set_t *set,
+ const hb_set_t *other)
+{
+ set->intersect (other);
+}
+
+/**
+ * hb_set_subtract:
+ * @set: a set.
+ * @other:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_set_subtract (hb_set_t *set,
+ const hb_set_t *other)
+{
+ set->subtract (other);
+}
+
+/**
+ * hb_set_symmetric_difference:
+ * @set: a set.
+ * @other:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_set_symmetric_difference (hb_set_t *set,
+ const hb_set_t *other)
+{
+ set->symmetric_difference (other);
+}
+
+#ifndef HB_DISABLE_DEPRECATED
+/**
+ * hb_set_invert:
+ * @set: a set.
+ *
+ *
+ *
+ * Since: 0.9.10
+ *
+ * Deprecated: 1.6.1
+ **/
+void
+hb_set_invert (hb_set_t *set HB_UNUSED)
+{
+}
+#endif
+
+/**
+ * hb_set_get_population:
+ * @set: a set.
+ *
+ * Returns the number of numbers in the set.
+ *
+ * Return value: set population.
+ *
+ * Since: 0.9.7
+ **/
+unsigned int
+hb_set_get_population (const hb_set_t *set)
+{
+ return set->get_population ();
+}
+
+/**
+ * hb_set_get_min:
+ * @set: a set.
+ *
+ * Finds the minimum number in the set.
+ *
+ * Return value: minimum of the set, or %HB_SET_VALUE_INVALID if set is empty.
+ *
+ * Since: 0.9.7
+ **/
+hb_codepoint_t
+hb_set_get_min (const hb_set_t *set)
+{
+ return set->get_min ();
+}
+
+/**
+ * hb_set_get_max:
+ * @set: a set.
+ *
+ * Finds the maximum number in the set.
+ *
+ * Return value: minimum of the set, or %HB_SET_VALUE_INVALID if set is empty.
+ *
+ * Since: 0.9.7
+ **/
+hb_codepoint_t
+hb_set_get_max (const hb_set_t *set)
+{
+ return set->get_max ();
+}
+
+/**
+ * hb_set_next:
+ * @set: a set.
+ * @codepoint: (inout):
+ *
+ * Gets the next number in @set that is greater than current value of @codepoint.
+ *
+ * Set @codepoint to %HB_SET_VALUE_INVALID to get started.
+ *
+ * Return value: whether there was a next value.
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_set_next (const hb_set_t *set,
+ hb_codepoint_t *codepoint)
+{
+ return set->next (codepoint);
+}
+
+/**
+ * hb_set_previous:
+ * @set: a set.
+ * @codepoint: (inout):
+ *
+ * Gets the previous number in @set that is lower than current value of @codepoint.
+ *
+ * Set @codepoint to %HB_SET_VALUE_INVALID to get started.
+ *
+ * Return value: whether there was a previous value.
+ *
+ * Since: 1.8.0
+ **/
+hb_bool_t
+hb_set_previous (const hb_set_t *set,
+ hb_codepoint_t *codepoint)
+{
+ return set->previous (codepoint);
+}
+
+/**
+ * hb_set_next_range:
+ * @set: a set.
+ * @first: (out): output first codepoint in the range.
+ * @last: (inout): input current last and output last codepoint in the range.
+ *
+ * Gets the next consecutive range of numbers in @set that
+ * are greater than current value of @last.
+ *
+ * Set @last to %HB_SET_VALUE_INVALID to get started.
+ *
+ * Return value: whether there was a next range.
+ *
+ * Since: 0.9.7
+ **/
+hb_bool_t
+hb_set_next_range (const hb_set_t *set,
+ hb_codepoint_t *first,
+ hb_codepoint_t *last)
+{
+ return set->next_range (first, last);
+}
+
+/**
+ * hb_set_previous_range:
+ * @set: a set.
+ * @first: (inout): input current first and output first codepoint in the range.
+ * @last: (out): output last codepoint in the range.
+ *
+ * Gets the previous consecutive range of numbers in @set that
+ * are less than current value of @first.
+ *
+ * Set @first to %HB_SET_VALUE_INVALID to get started.
+ *
+ * Return value: whether there was a previous range.
+ *
+ * Since: 1.8.0
+ **/
+hb_bool_t
+hb_set_previous_range (const hb_set_t *set,
+ hb_codepoint_t *first,
+ hb_codepoint_t *last)
+{
+ return set->previous_range (first, last);
+}
diff --git a/thirdparty/harfbuzz/src/hb-set.h b/thirdparty/harfbuzz/src/hb-set.h
new file mode 100644
index 0000000000..ed0e05db2e
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-set.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright © 2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_H_IN
+#error "Include <hb.h> instead."
+#endif
+
+#ifndef HB_SET_H
+#define HB_SET_H
+
+#include "hb-common.h"
+
+HB_BEGIN_DECLS
+
+
+/*
+ * Since: 0.9.21
+ */
+#define HB_SET_VALUE_INVALID ((hb_codepoint_t) -1)
+
+typedef struct hb_set_t hb_set_t;
+
+
+HB_EXTERN hb_set_t *
+hb_set_create (void);
+
+HB_EXTERN hb_set_t *
+hb_set_get_empty (void);
+
+HB_EXTERN hb_set_t *
+hb_set_reference (hb_set_t *set);
+
+HB_EXTERN void
+hb_set_destroy (hb_set_t *set);
+
+HB_EXTERN hb_bool_t
+hb_set_set_user_data (hb_set_t *set,
+ hb_user_data_key_t *key,
+ void * data,
+ hb_destroy_func_t destroy,
+ hb_bool_t replace);
+
+HB_EXTERN void *
+hb_set_get_user_data (hb_set_t *set,
+ hb_user_data_key_t *key);
+
+
+/* Returns false if allocation has failed before */
+HB_EXTERN hb_bool_t
+hb_set_allocation_successful (const hb_set_t *set);
+
+HB_EXTERN void
+hb_set_clear (hb_set_t *set);
+
+HB_EXTERN hb_bool_t
+hb_set_is_empty (const hb_set_t *set);
+
+HB_EXTERN hb_bool_t
+hb_set_has (const hb_set_t *set,
+ hb_codepoint_t codepoint);
+
+HB_EXTERN void
+hb_set_add (hb_set_t *set,
+ hb_codepoint_t codepoint);
+
+HB_EXTERN void
+hb_set_add_range (hb_set_t *set,
+ hb_codepoint_t first,
+ hb_codepoint_t last);
+
+HB_EXTERN void
+hb_set_del (hb_set_t *set,
+ hb_codepoint_t codepoint);
+
+HB_EXTERN void
+hb_set_del_range (hb_set_t *set,
+ hb_codepoint_t first,
+ hb_codepoint_t last);
+
+HB_EXTERN hb_bool_t
+hb_set_is_equal (const hb_set_t *set,
+ const hb_set_t *other);
+
+HB_EXTERN hb_bool_t
+hb_set_is_subset (const hb_set_t *set,
+ const hb_set_t *larger_set);
+
+HB_EXTERN void
+hb_set_set (hb_set_t *set,
+ const hb_set_t *other);
+
+HB_EXTERN void
+hb_set_union (hb_set_t *set,
+ const hb_set_t *other);
+
+HB_EXTERN void
+hb_set_intersect (hb_set_t *set,
+ const hb_set_t *other);
+
+HB_EXTERN void
+hb_set_subtract (hb_set_t *set,
+ const hb_set_t *other);
+
+HB_EXTERN void
+hb_set_symmetric_difference (hb_set_t *set,
+ const hb_set_t *other);
+
+HB_EXTERN unsigned int
+hb_set_get_population (const hb_set_t *set);
+
+/* Returns HB_SET_VALUE_INVALID if set empty. */
+HB_EXTERN hb_codepoint_t
+hb_set_get_min (const hb_set_t *set);
+
+/* Returns HB_SET_VALUE_INVALID if set empty. */
+HB_EXTERN hb_codepoint_t
+hb_set_get_max (const hb_set_t *set);
+
+/* Pass HB_SET_VALUE_INVALID in to get started. */
+HB_EXTERN hb_bool_t
+hb_set_next (const hb_set_t *set,
+ hb_codepoint_t *codepoint);
+
+/* Pass HB_SET_VALUE_INVALID in to get started. */
+HB_EXTERN hb_bool_t
+hb_set_previous (const hb_set_t *set,
+ hb_codepoint_t *codepoint);
+
+/* Pass HB_SET_VALUE_INVALID for first and last to get started. */
+HB_EXTERN hb_bool_t
+hb_set_next_range (const hb_set_t *set,
+ hb_codepoint_t *first,
+ hb_codepoint_t *last);
+
+/* Pass HB_SET_VALUE_INVALID for first and last to get started. */
+HB_EXTERN hb_bool_t
+hb_set_previous_range (const hb_set_t *set,
+ hb_codepoint_t *first,
+ hb_codepoint_t *last);
+
+
+HB_END_DECLS
+
+#endif /* HB_SET_H */
diff --git a/thirdparty/harfbuzz/src/hb-set.hh b/thirdparty/harfbuzz/src/hb-set.hh
new file mode 100644
index 0000000000..b6e2086a2e
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-set.hh
@@ -0,0 +1,884 @@
+/*
+ * Copyright © 2012,2017 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_SET_HH
+#define HB_SET_HH
+
+#include "hb.hh"
+#include "hb-machinery.hh"
+
+
+/*
+ * hb_set_t
+ */
+
+/* TODO Keep a free-list so we can free pages that are completely zeroed. At that
+ * point maybe also use a sentinel value for "all-1" pages? */
+
+struct hb_set_t
+{
+ HB_DELETE_COPY_ASSIGN (hb_set_t);
+ hb_set_t () { init (); }
+ ~hb_set_t () { fini (); }
+
+ struct page_map_t
+ {
+ int cmp (const page_map_t &o) const { return (int) o.major - (int) major; }
+
+ uint32_t major;
+ uint32_t index;
+ };
+
+ struct page_t
+ {
+ void init0 () { v.clear (); }
+ void init1 () { v.clear (0xFF); }
+
+ unsigned int len () const
+ { return ARRAY_LENGTH_CONST (v); }
+
+ bool is_empty () const
+ {
+ for (unsigned int i = 0; i < len (); i++)
+ if (v[i])
+ return false;
+ return true;
+ }
+
+ void add (hb_codepoint_t g) { elt (g) |= mask (g); }
+ void del (hb_codepoint_t g) { elt (g) &= ~mask (g); }
+ bool get (hb_codepoint_t g) const { return elt (g) & mask (g); }
+
+ void add_range (hb_codepoint_t a, hb_codepoint_t b)
+ {
+ elt_t *la = &elt (a);
+ elt_t *lb = &elt (b);
+ if (la == lb)
+ *la |= (mask (b) << 1) - mask(a);
+ else
+ {
+ *la |= ~(mask (a) - 1);
+ la++;
+
+ memset (la, 0xff, (char *) lb - (char *) la);
+
+ *lb |= ((mask (b) << 1) - 1);
+ }
+ }
+
+ void del_range (hb_codepoint_t a, hb_codepoint_t b)
+ {
+ elt_t *la = &elt (a);
+ elt_t *lb = &elt (b);
+ if (la == lb)
+ *la &= ~((mask (b) << 1) - mask(a));
+ else
+ {
+ *la &= mask (a) - 1;
+ la++;
+
+ memset (la, 0, (char *) lb - (char *) la);
+
+ *lb &= ~((mask (b) << 1) - 1);
+ }
+ }
+
+ bool is_equal (const page_t *other) const
+ {
+ return 0 == hb_memcmp (&v, &other->v, sizeof (v));
+ }
+
+ unsigned int get_population () const
+ {
+ unsigned int pop = 0;
+ for (unsigned int i = 0; i < len (); i++)
+ pop += hb_popcount (v[i]);
+ return pop;
+ }
+
+ bool next (hb_codepoint_t *codepoint) const
+ {
+ unsigned int m = (*codepoint + 1) & MASK;
+ if (!m)
+ {
+ *codepoint = INVALID;
+ return false;
+ }
+ unsigned int i = m / ELT_BITS;
+ unsigned int j = m & ELT_MASK;
+
+ const elt_t vv = v[i] & ~((elt_t (1) << j) - 1);
+ for (const elt_t *p = &vv; i < len (); p = &v[++i])
+ if (*p)
+ {
+ *codepoint = i * ELT_BITS + elt_get_min (*p);
+ return true;
+ }
+
+ *codepoint = INVALID;
+ return false;
+ }
+ bool previous (hb_codepoint_t *codepoint) const
+ {
+ unsigned int m = (*codepoint - 1) & MASK;
+ if (m == MASK)
+ {
+ *codepoint = INVALID;
+ return false;
+ }
+ unsigned int i = m / ELT_BITS;
+ unsigned int j = m & ELT_MASK;
+
+ /* Fancy mask to avoid shifting by elt_t bitsize, which is undefined. */
+ const elt_t mask = j < 8 * sizeof (elt_t) - 1 ?
+ ((elt_t (1) << (j + 1)) - 1) :
+ (elt_t) -1;
+ const elt_t vv = v[i] & mask;
+ const elt_t *p = &vv;
+ while (true)
+ {
+ if (*p)
+ {
+ *codepoint = i * ELT_BITS + elt_get_max (*p);
+ return true;
+ }
+ if ((int) i <= 0) break;
+ p = &v[--i];
+ }
+
+ *codepoint = INVALID;
+ return false;
+ }
+ hb_codepoint_t get_min () const
+ {
+ for (unsigned int i = 0; i < len (); i++)
+ if (v[i])
+ return i * ELT_BITS + elt_get_min (v[i]);
+ return INVALID;
+ }
+ hb_codepoint_t get_max () const
+ {
+ for (int i = len () - 1; i >= 0; i--)
+ if (v[i])
+ return i * ELT_BITS + elt_get_max (v[i]);
+ return 0;
+ }
+
+ typedef unsigned long long elt_t;
+ static constexpr unsigned PAGE_BITS = 512;
+ static_assert ((PAGE_BITS & ((PAGE_BITS) - 1)) == 0, "");
+
+ static unsigned int elt_get_min (const elt_t &elt) { return hb_ctz (elt); }
+ static unsigned int elt_get_max (const elt_t &elt) { return hb_bit_storage (elt) - 1; }
+
+ typedef hb_vector_size_t<elt_t, PAGE_BITS / 8> vector_t;
+
+ static constexpr unsigned ELT_BITS = sizeof (elt_t) * 8;
+ static constexpr unsigned ELT_MASK = ELT_BITS - 1;
+ static constexpr unsigned BITS = sizeof (vector_t) * 8;
+ static constexpr unsigned MASK = BITS - 1;
+ static_assert ((unsigned) PAGE_BITS == (unsigned) BITS, "");
+
+ elt_t &elt (hb_codepoint_t g) { return v[(g & MASK) / ELT_BITS]; }
+ elt_t const &elt (hb_codepoint_t g) const { return v[(g & MASK) / ELT_BITS]; }
+ elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & ELT_MASK); }
+
+ vector_t v;
+ };
+ static_assert (page_t::PAGE_BITS == sizeof (page_t) * 8, "");
+
+ hb_object_header_t header;
+ bool successful; /* Allocations successful */
+ mutable unsigned int population;
+ hb_sorted_vector_t<page_map_t> page_map;
+ hb_vector_t<page_t> pages;
+
+ void init_shallow ()
+ {
+ successful = true;
+ population = 0;
+ page_map.init ();
+ pages.init ();
+ }
+ void init ()
+ {
+ hb_object_init (this);
+ init_shallow ();
+ }
+ void fini_shallow ()
+ {
+ population = 0;
+ page_map.fini ();
+ pages.fini ();
+ }
+ void fini ()
+ {
+ hb_object_fini (this);
+ fini_shallow ();
+ }
+
+ bool in_error () const { return !successful; }
+
+ bool resize (unsigned int count)
+ {
+ if (unlikely (!successful)) return false;
+ if (!pages.resize (count) || !page_map.resize (count))
+ {
+ pages.resize (page_map.length);
+ successful = false;
+ return false;
+ }
+ return true;
+ }
+
+ void reset ()
+ {
+ if (unlikely (hb_object_is_immutable (this)))
+ return;
+ clear ();
+ successful = true;
+ }
+
+ void clear ()
+ {
+ if (unlikely (hb_object_is_immutable (this)))
+ return;
+ population = 0;
+ page_map.resize (0);
+ pages.resize (0);
+ }
+ bool is_empty () const
+ {
+ unsigned int count = pages.length;
+ for (unsigned int i = 0; i < count; i++)
+ if (!pages[i].is_empty ())
+ return false;
+ return true;
+ }
+
+ void dirty () { population = UINT_MAX; }
+
+ void add (hb_codepoint_t g)
+ {
+ if (unlikely (!successful)) return;
+ if (unlikely (g == INVALID)) return;
+ dirty ();
+ page_t *page = page_for_insert (g); if (unlikely (!page)) return;
+ page->add (g);
+ }
+ bool add_range (hb_codepoint_t a, hb_codepoint_t b)
+ {
+ if (unlikely (!successful)) return true; /* https://github.com/harfbuzz/harfbuzz/issues/657 */
+ if (unlikely (a > b || a == INVALID || b == INVALID)) return false;
+ dirty ();
+ unsigned int ma = get_major (a);
+ unsigned int mb = get_major (b);
+ if (ma == mb)
+ {
+ page_t *page = page_for_insert (a); if (unlikely (!page)) return false;
+ page->add_range (a, b);
+ }
+ else
+ {
+ page_t *page = page_for_insert (a); if (unlikely (!page)) return false;
+ page->add_range (a, major_start (ma + 1) - 1);
+
+ for (unsigned int m = ma + 1; m < mb; m++)
+ {
+ page = page_for_insert (major_start (m)); if (unlikely (!page)) return false;
+ page->init1 ();
+ }
+
+ page = page_for_insert (b); if (unlikely (!page)) return false;
+ page->add_range (major_start (mb), b);
+ }
+ return true;
+ }
+
+ template <typename T>
+ void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
+ {
+ if (unlikely (!successful)) return;
+ if (!count) return;
+ dirty ();
+ hb_codepoint_t g = *array;
+ while (count)
+ {
+ unsigned int m = get_major (g);
+ page_t *page = page_for_insert (g); if (unlikely (!page)) return;
+ unsigned int start = major_start (m);
+ unsigned int end = major_start (m + 1);
+ do
+ {
+ page->add (g);
+
+ array = &StructAtOffsetUnaligned<T> (array, stride);
+ count--;
+ }
+ while (count && (g = *array, start <= g && g < end));
+ }
+ }
+
+ /* Might return false if array looks unsorted.
+ * Used for faster rejection of corrupt data. */
+ template <typename T>
+ bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
+ {
+ if (unlikely (!successful)) return true; /* https://github.com/harfbuzz/harfbuzz/issues/657 */
+ if (!count) return true;
+ dirty ();
+ hb_codepoint_t g = *array;
+ hb_codepoint_t last_g = g;
+ while (count)
+ {
+ unsigned int m = get_major (g);
+ page_t *page = page_for_insert (g); if (unlikely (!page)) return false;
+ unsigned int end = major_start (m + 1);
+ do
+ {
+ /* If we try harder we can change the following comparison to <=;
+ * Not sure if it's worth it. */
+ if (g < last_g) return false;
+ last_g = g;
+ page->add (g);
+
+ array = (const T *) ((const char *) array + stride);
+ count--;
+ }
+ while (count && (g = *array, g < end));
+ }
+ return true;
+ }
+
+ void del (hb_codepoint_t g)
+ {
+ /* TODO perform op even if !successful. */
+ if (unlikely (!successful)) return;
+ page_t *page = page_for (g);
+ if (!page)
+ return;
+ dirty ();
+ page->del (g);
+ }
+
+ private:
+ void del_pages (int ds, int de)
+ {
+ if (ds <= de)
+ {
+ unsigned int write_index = 0;
+ for (unsigned int i = 0; i < page_map.length; i++)
+ {
+ int m = (int) page_map[i].major;
+ if (m < ds || de < m)
+ page_map[write_index++] = page_map[i];
+ }
+ compact (write_index);
+ resize (write_index);
+ }
+ }
+
+ public:
+ void del_range (hb_codepoint_t a, hb_codepoint_t b)
+ {
+ /* TODO perform op even if !successful. */
+ if (unlikely (!successful)) return;
+ if (unlikely (a > b || a == INVALID || b == INVALID)) return;
+ dirty ();
+ unsigned int ma = get_major (a);
+ unsigned int mb = get_major (b);
+ /* Delete pages from ds through de if ds <= de. */
+ int ds = (a == major_start (ma))? (int) ma: (int) (ma + 1);
+ int de = (b + 1 == major_start (mb + 1))? (int) mb: ((int) mb - 1);
+ if (ds > de || (int) ma < ds)
+ {
+ page_t *page = page_for (a);
+ if (page)
+ {
+ if (ma == mb)
+ page->del_range (a, b);
+ else
+ page->del_range (a, major_start (ma + 1) - 1);
+ }
+ }
+ if (de < (int) mb && ma != mb)
+ {
+ page_t *page = page_for (b);
+ if (page)
+ page->del_range (major_start (mb), b);
+ }
+ del_pages (ds, de);
+ }
+
+ bool get (hb_codepoint_t g) const
+ {
+ const page_t *page = page_for (g);
+ if (!page)
+ return false;
+ return page->get (g);
+ }
+
+ /* Has interface. */
+ static constexpr bool SENTINEL = false;
+ typedef bool value_t;
+ value_t operator [] (hb_codepoint_t k) const { return get (k); }
+ bool has (hb_codepoint_t k) const { return (*this)[k] != SENTINEL; }
+ /* Predicate. */
+ bool operator () (hb_codepoint_t k) const { return has (k); }
+
+ /* Sink interface. */
+ hb_set_t& operator << (hb_codepoint_t v)
+ { add (v); return *this; }
+ hb_set_t& operator << (const hb_pair_t<hb_codepoint_t, hb_codepoint_t>& range)
+ { add_range (range.first, range.second); return *this; }
+
+ bool intersects (hb_codepoint_t first, hb_codepoint_t last) const
+ {
+ hb_codepoint_t c = first - 1;
+ return next (&c) && c <= last;
+ }
+ void set (const hb_set_t *other)
+ {
+ if (unlikely (!successful)) return;
+ unsigned int count = other->pages.length;
+ if (!resize (count))
+ return;
+ population = other->population;
+ memcpy ((void *) pages, (const void *) other->pages, count * pages.item_size);
+ memcpy ((void *) page_map, (const void *) other->page_map, count * page_map.item_size);
+ }
+
+ bool is_equal (const hb_set_t *other) const
+ {
+ if (get_population () != other->get_population ())
+ return false;
+
+ unsigned int na = pages.length;
+ unsigned int nb = other->pages.length;
+
+ unsigned int a = 0, b = 0;
+ for (; a < na && b < nb; )
+ {
+ if (page_at (a).is_empty ()) { a++; continue; }
+ if (other->page_at (b).is_empty ()) { b++; continue; }
+ if (page_map[a].major != other->page_map[b].major ||
+ !page_at (a).is_equal (&other->page_at (b)))
+ return false;
+ a++;
+ b++;
+ }
+ for (; a < na; a++)
+ if (!page_at (a).is_empty ()) { return false; }
+ for (; b < nb; b++)
+ if (!other->page_at (b).is_empty ()) { return false; }
+
+ return true;
+ }
+
+ bool is_subset (const hb_set_t *larger_set) const
+ {
+ if (get_population () > larger_set->get_population ())
+ return false;
+
+ /* TODO Optimize to use pages. */
+ hb_codepoint_t c = INVALID;
+ while (next (&c))
+ if (!larger_set->has (c))
+ return false;
+
+ return true;
+ }
+
+ void compact (unsigned int length)
+ {
+ hb_vector_t<uint32_t> old_index_to_page_map_index;
+ old_index_to_page_map_index.resize(pages.length);
+ for (uint32_t i = 0; i < old_index_to_page_map_index.length; i++)
+ old_index_to_page_map_index[i] = 0xFFFFFFFF;
+
+ for (uint32_t i = 0; i < length; i++)
+ old_index_to_page_map_index[page_map[i].index] = i;
+
+ compact_pages (old_index_to_page_map_index);
+ }
+
+ void compact_pages (const hb_vector_t<uint32_t>& old_index_to_page_map_index)
+ {
+ unsigned int write_index = 0;
+ for (unsigned int i = 0; i < pages.length; i++)
+ {
+ if (old_index_to_page_map_index[i] == 0xFFFFFFFF) continue;
+
+ if (write_index < i)
+ pages[write_index] = pages[i];
+
+ page_map[old_index_to_page_map_index[i]].index = write_index;
+ write_index++;
+ }
+ }
+
+ template <typename Op>
+ void process (const Op& op, const hb_set_t *other)
+ {
+ if (unlikely (!successful)) return;
+
+ dirty ();
+
+ unsigned int na = pages.length;
+ unsigned int nb = other->pages.length;
+ unsigned int next_page = na;
+
+ unsigned int count = 0, newCount = 0;
+ unsigned int a = 0, b = 0;
+ unsigned int write_index = 0;
+ for (; a < na && b < nb; )
+ {
+ if (page_map[a].major == other->page_map[b].major)
+ {
+ if (!Op::passthru_left)
+ {
+ // Move page_map entries that we're keeping from the left side set
+ // to the front of the page_map vector. This isn't necessary if
+ // passthru_left is set since no left side pages will be removed
+ // in that case.
+ if (write_index < a)
+ page_map[write_index] = page_map[a];
+ write_index++;
+ }
+
+ count++;
+ a++;
+ b++;
+ }
+ else if (page_map[a].major < other->page_map[b].major)
+ {
+ if (Op::passthru_left)
+ count++;
+ a++;
+ }
+ else
+ {
+ if (Op::passthru_right)
+ count++;
+ b++;
+ }
+ }
+ if (Op::passthru_left)
+ count += na - a;
+ if (Op::passthru_right)
+ count += nb - b;
+
+ if (!Op::passthru_left)
+ {
+ na = write_index;
+ next_page = write_index;
+ compact (write_index);
+ }
+
+ if (!resize (count))
+ return;
+
+ newCount = count;
+
+ /* Process in-place backward. */
+ a = na;
+ b = nb;
+ for (; a && b; )
+ {
+ if (page_map[a - 1].major == other->page_map[b - 1].major)
+ {
+ a--;
+ b--;
+ count--;
+ page_map[count] = page_map[a];
+ page_at (count).v = op (page_at (a).v, other->page_at (b).v);
+ }
+ else if (page_map[a - 1].major > other->page_map[b - 1].major)
+ {
+ a--;
+ if (Op::passthru_left)
+ {
+ count--;
+ page_map[count] = page_map[a];
+ }
+ }
+ else
+ {
+ b--;
+ if (Op::passthru_right)
+ {
+ count--;
+ page_map[count].major = other->page_map[b].major;
+ page_map[count].index = next_page++;
+ page_at (count).v = other->page_at (b).v;
+ }
+ }
+ }
+ if (Op::passthru_left)
+ while (a)
+ {
+ a--;
+ count--;
+ page_map[count] = page_map [a];
+ }
+ if (Op::passthru_right)
+ while (b)
+ {
+ b--;
+ count--;
+ page_map[count].major = other->page_map[b].major;
+ page_map[count].index = next_page++;
+ page_at (count).v = other->page_at (b).v;
+ }
+ assert (!count);
+ if (pages.length > newCount)
+ resize (newCount);
+ }
+
+ void union_ (const hb_set_t *other)
+ {
+ process (hb_bitwise_or, other);
+ }
+ void intersect (const hb_set_t *other)
+ {
+ process (hb_bitwise_and, other);
+ }
+ void subtract (const hb_set_t *other)
+ {
+ process (hb_bitwise_sub, other);
+ }
+ void symmetric_difference (const hb_set_t *other)
+ {
+ process (hb_bitwise_xor, other);
+ }
+ bool next (hb_codepoint_t *codepoint) const
+ {
+ if (unlikely (*codepoint == INVALID)) {
+ *codepoint = get_min ();
+ return *codepoint != INVALID;
+ }
+
+ page_map_t map = {get_major (*codepoint), 0};
+ unsigned int i;
+ page_map.bfind (map, &i, HB_BFIND_NOT_FOUND_STORE_CLOSEST);
+ if (i < page_map.length && page_map[i].major == map.major)
+ {
+ if (pages[page_map[i].index].next (codepoint))
+ {
+ *codepoint += page_map[i].major * page_t::PAGE_BITS;
+ return true;
+ }
+ i++;
+ }
+ for (; i < page_map.length; i++)
+ {
+ hb_codepoint_t m = pages[page_map[i].index].get_min ();
+ if (m != INVALID)
+ {
+ *codepoint = page_map[i].major * page_t::PAGE_BITS + m;
+ return true;
+ }
+ }
+ *codepoint = INVALID;
+ return false;
+ }
+ bool previous (hb_codepoint_t *codepoint) const
+ {
+ if (unlikely (*codepoint == INVALID)) {
+ *codepoint = get_max ();
+ return *codepoint != INVALID;
+ }
+
+ page_map_t map = {get_major (*codepoint), 0};
+ unsigned int i;
+ page_map.bfind (map, &i, HB_BFIND_NOT_FOUND_STORE_CLOSEST);
+ if (i < page_map.length && page_map[i].major == map.major)
+ {
+ if (pages[page_map[i].index].previous (codepoint))
+ {
+ *codepoint += page_map[i].major * page_t::PAGE_BITS;
+ return true;
+ }
+ }
+ i--;
+ for (; (int) i >= 0; i--)
+ {
+ hb_codepoint_t m = pages[page_map[i].index].get_max ();
+ if (m != INVALID)
+ {
+ *codepoint = page_map[i].major * page_t::PAGE_BITS + m;
+ return true;
+ }
+ }
+ *codepoint = INVALID;
+ return false;
+ }
+ bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const
+ {
+ hb_codepoint_t i;
+
+ i = *last;
+ if (!next (&i))
+ {
+ *last = *first = INVALID;
+ return false;
+ }
+
+ /* TODO Speed up. */
+ *last = *first = i;
+ while (next (&i) && i == *last + 1)
+ (*last)++;
+
+ return true;
+ }
+ bool previous_range (hb_codepoint_t *first, hb_codepoint_t *last) const
+ {
+ hb_codepoint_t i;
+
+ i = *first;
+ if (!previous (&i))
+ {
+ *last = *first = INVALID;
+ return false;
+ }
+
+ /* TODO Speed up. */
+ *last = *first = i;
+ while (previous (&i) && i == *first - 1)
+ (*first)--;
+
+ return true;
+ }
+
+ unsigned int get_population () const
+ {
+ if (population != UINT_MAX)
+ return population;
+
+ unsigned int pop = 0;
+ unsigned int count = pages.length;
+ for (unsigned int i = 0; i < count; i++)
+ pop += pages[i].get_population ();
+
+ population = pop;
+ return pop;
+ }
+ hb_codepoint_t get_min () const
+ {
+ unsigned int count = pages.length;
+ for (unsigned int i = 0; i < count; i++)
+ if (!page_at (i).is_empty ())
+ return page_map[i].major * page_t::PAGE_BITS + page_at (i).get_min ();
+ return INVALID;
+ }
+ hb_codepoint_t get_max () const
+ {
+ unsigned int count = pages.length;
+ for (int i = count - 1; i >= 0; i++)
+ if (!page_at (i).is_empty ())
+ return page_map[(unsigned) i].major * page_t::PAGE_BITS + page_at (i).get_max ();
+ return INVALID;
+ }
+
+ static constexpr hb_codepoint_t INVALID = HB_SET_VALUE_INVALID;
+
+ /*
+ * Iterator implementation.
+ */
+ struct iter_t : hb_iter_with_fallback_t<iter_t, hb_codepoint_t>
+ {
+ static constexpr bool is_sorted_iterator = true;
+ iter_t (const hb_set_t &s_ = Null (hb_set_t),
+ bool init = true) : s (&s_), v (INVALID), l(0)
+ {
+ if (init)
+ {
+ l = s->get_population () + 1;
+ __next__ ();
+ }
+ }
+
+ typedef hb_codepoint_t __item_t__;
+ hb_codepoint_t __item__ () const { return v; }
+ bool __more__ () const { return v != INVALID; }
+ void __next__ () { s->next (&v); if (l) l--; }
+ void __prev__ () { s->previous (&v); }
+ unsigned __len__ () const { return l; }
+ iter_t end () const { return iter_t (*s, false); }
+ bool operator != (const iter_t& o) const
+ { return s != o.s || v != o.v; }
+
+ protected:
+ const hb_set_t *s;
+ hb_codepoint_t v;
+ unsigned l;
+ };
+ iter_t iter () const { return iter_t (*this); }
+ operator iter_t () const { return iter (); }
+
+ protected:
+
+ page_t *page_for_insert (hb_codepoint_t g)
+ {
+ page_map_t map = {get_major (g), pages.length};
+ unsigned int i;
+ if (!page_map.bfind (map, &i, HB_BFIND_NOT_FOUND_STORE_CLOSEST))
+ {
+ if (!resize (pages.length + 1))
+ return nullptr;
+
+ pages[map.index].init0 ();
+ memmove (page_map + i + 1,
+ page_map + i,
+ (page_map.length - 1 - i) * page_map.item_size);
+ page_map[i] = map;
+ }
+ return &pages[page_map[i].index];
+ }
+ page_t *page_for (hb_codepoint_t g)
+ {
+ page_map_t key = {get_major (g)};
+ const page_map_t *found = page_map.bsearch (key);
+ if (found)
+ return &pages[found->index];
+ return nullptr;
+ }
+ const page_t *page_for (hb_codepoint_t g) const
+ {
+ page_map_t key = {get_major (g)};
+ const page_map_t *found = page_map.bsearch (key);
+ if (found)
+ return &pages[found->index];
+ return nullptr;
+ }
+ page_t &page_at (unsigned int i) { return pages[page_map[i].index]; }
+ const page_t &page_at (unsigned int i) const { return pages[page_map[i].index]; }
+ unsigned int get_major (hb_codepoint_t g) const { return g / page_t::PAGE_BITS; }
+ hb_codepoint_t major_start (unsigned int major) const { return major * page_t::PAGE_BITS; }
+};
+
+
+#endif /* HB_SET_HH */
diff --git a/thirdparty/harfbuzz/src/hb-shape-plan.cc b/thirdparty/harfbuzz/src/hb-shape-plan.cc
new file mode 100644
index 0000000000..666470b4f1
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-shape-plan.cc
@@ -0,0 +1,513 @@
+/*
+ * Copyright © 2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+#include "hb-shape-plan.hh"
+#include "hb-shaper.hh"
+#include "hb-font.hh"
+#include "hb-buffer.hh"
+
+
+/**
+ * SECTION:hb-shape-plan
+ * @title: hb-shape-plan
+ * @short_description: Object representing a shaping plan
+ * @include: hb.h
+ *
+ * Shape plans are not used for shaping directly, but can be access to query
+ * certain information about how shaping will perform given a set of input
+ * parameters (script, language, direction, features, etc.)
+ * Most client would not need to deal with shape plans directly.
+ **/
+
+
+/*
+ * hb_shape_plan_key_t
+ */
+
+bool
+hb_shape_plan_key_t::init (bool copy,
+ hb_face_t *face,
+ const hb_segment_properties_t *props,
+ const hb_feature_t *user_features,
+ unsigned int num_user_features,
+ const int *coords,
+ unsigned int num_coords,
+ const char * const *shaper_list)
+{
+ hb_feature_t *features = nullptr;
+ if (copy && num_user_features && !(features = (hb_feature_t *) calloc (num_user_features, sizeof (hb_feature_t))))
+ goto bail;
+
+ this->props = *props;
+ this->num_user_features = num_user_features;
+ this->user_features = copy ? features : user_features;
+ if (copy && num_user_features)
+ {
+ memcpy (features, user_features, num_user_features * sizeof (hb_feature_t));
+ /* Make start/end uniform to easier catch bugs. */
+ for (unsigned int i = 0; i < num_user_features; i++)
+ {
+ if (features[0].start != HB_FEATURE_GLOBAL_START)
+ features[0].start = 1;
+ if (features[0].end != HB_FEATURE_GLOBAL_END)
+ features[0].end = 2;
+ }
+ }
+ this->shaper_func = nullptr;
+ this->shaper_name = nullptr;
+#ifndef HB_NO_OT_SHAPE
+ this->ot.init (face, coords, num_coords);
+#endif
+
+ /*
+ * Choose shaper.
+ */
+
+#define HB_SHAPER_PLAN(shaper) \
+ HB_STMT_START { \
+ if (face->data.shaper) \
+ { \
+ this->shaper_func = _hb_##shaper##_shape; \
+ this->shaper_name = #shaper; \
+ return true; \
+ } \
+ } HB_STMT_END
+
+ if (unlikely (shaper_list))
+ {
+ for (; *shaper_list; shaper_list++)
+ if (false)
+ ;
+#define HB_SHAPER_IMPLEMENT(shaper) \
+ else if (0 == strcmp (*shaper_list, #shaper)) \
+ HB_SHAPER_PLAN (shaper);
+#include "hb-shaper-list.hh"
+#undef HB_SHAPER_IMPLEMENT
+ }
+ else
+ {
+ const hb_shaper_entry_t *shapers = _hb_shapers_get ();
+ for (unsigned int i = 0; i < HB_SHAPERS_COUNT; i++)
+ if (false)
+ ;
+#define HB_SHAPER_IMPLEMENT(shaper) \
+ else if (shapers[i].func == _hb_##shaper##_shape) \
+ HB_SHAPER_PLAN (shaper);
+#include "hb-shaper-list.hh"
+#undef HB_SHAPER_IMPLEMENT
+ }
+#undef HB_SHAPER_PLAN
+
+bail:
+ ::free (features);
+ return false;
+}
+
+bool
+hb_shape_plan_key_t::user_features_match (const hb_shape_plan_key_t *other)
+{
+ if (this->num_user_features != other->num_user_features)
+ return false;
+ for (unsigned int i = 0; i < num_user_features; i++)
+ {
+ if (this->user_features[i].tag != other->user_features[i].tag ||
+ this->user_features[i].value != other->user_features[i].value ||
+ (this->user_features[i].start == HB_FEATURE_GLOBAL_START &&
+ this->user_features[i].end == HB_FEATURE_GLOBAL_END) !=
+ (other->user_features[i].start == HB_FEATURE_GLOBAL_START &&
+ other->user_features[i].end == HB_FEATURE_GLOBAL_END))
+ return false;
+ }
+ return true;
+}
+
+bool
+hb_shape_plan_key_t::equal (const hb_shape_plan_key_t *other)
+{
+ return hb_segment_properties_equal (&this->props, &other->props) &&
+ this->user_features_match (other) &&
+#ifndef HB_NO_OT_SHAPE
+ this->ot.equal (&other->ot) &&
+#endif
+ this->shaper_func == other->shaper_func;
+}
+
+
+/*
+ * hb_shape_plan_t
+ */
+
+
+/**
+ * hb_shape_plan_create: (Xconstructor)
+ * @face:
+ * @props:
+ * @user_features: (array length=num_user_features):
+ * @num_user_features:
+ * @shaper_list: (array zero-terminated=1):
+ *
+ *
+ *
+ * Return value: (transfer full):
+ *
+ * Since: 0.9.7
+ **/
+hb_shape_plan_t *
+hb_shape_plan_create (hb_face_t *face,
+ const hb_segment_properties_t *props,
+ const hb_feature_t *user_features,
+ unsigned int num_user_features,
+ const char * const *shaper_list)
+{
+ return hb_shape_plan_create2 (face, props,
+ user_features, num_user_features,
+ nullptr, 0,
+ shaper_list);
+}
+
+hb_shape_plan_t *
+hb_shape_plan_create2 (hb_face_t *face,
+ const hb_segment_properties_t *props,
+ const hb_feature_t *user_features,
+ unsigned int num_user_features,
+ const int *coords,
+ unsigned int num_coords,
+ const char * const *shaper_list)
+{
+ DEBUG_MSG_FUNC (SHAPE_PLAN, nullptr,
+ "face=%p num_features=%d num_coords=%d shaper_list=%p",
+ face,
+ num_user_features,
+ num_coords,
+ shaper_list);
+
+ assert (props->direction != HB_DIRECTION_INVALID);
+
+ hb_shape_plan_t *shape_plan;
+
+ if (unlikely (!props))
+ goto bail;
+ if (!(shape_plan = hb_object_create<hb_shape_plan_t> ()))
+ goto bail;
+
+ if (unlikely (!face))
+ face = hb_face_get_empty ();
+ hb_face_make_immutable (face);
+ shape_plan->face_unsafe = face;
+
+ if (unlikely (!shape_plan->key.init (true,
+ face,
+ props,
+ user_features,
+ num_user_features,
+ coords,
+ num_coords,
+ shaper_list)))
+ goto bail2;
+#ifndef HB_NO_OT_SHAPE
+ if (unlikely (!shape_plan->ot.init0 (face, &shape_plan->key)))
+ goto bail3;
+#endif
+
+ return shape_plan;
+
+#ifndef HB_NO_OT_SHAPE
+bail3:
+#endif
+ shape_plan->key.free ();
+bail2:
+ free (shape_plan);
+bail:
+ return hb_shape_plan_get_empty ();
+}
+
+/**
+ * hb_shape_plan_get_empty:
+ *
+ *
+ *
+ * Return value: (transfer full):
+ *
+ * Since: 0.9.7
+ **/
+hb_shape_plan_t *
+hb_shape_plan_get_empty ()
+{
+ return const_cast<hb_shape_plan_t *> (&Null (hb_shape_plan_t));
+}
+
+/**
+ * hb_shape_plan_reference: (skip)
+ * @shape_plan: a shape plan.
+ *
+ *
+ *
+ * Return value: (transfer full):
+ *
+ * Since: 0.9.7
+ **/
+hb_shape_plan_t *
+hb_shape_plan_reference (hb_shape_plan_t *shape_plan)
+{
+ return hb_object_reference (shape_plan);
+}
+
+/**
+ * hb_shape_plan_destroy: (skip)
+ * @shape_plan: a shape plan.
+ *
+ *
+ *
+ * Since: 0.9.7
+ **/
+void
+hb_shape_plan_destroy (hb_shape_plan_t *shape_plan)
+{
+ if (!hb_object_destroy (shape_plan)) return;
+
+#ifndef HB_NO_OT_SHAPE
+ shape_plan->ot.fini ();
+#endif
+ shape_plan->key.free ();
+ free (shape_plan);
+}
+
+/**
+ * hb_shape_plan_set_user_data: (skip)
+ * @shape_plan: a shape plan.
+ * @key:
+ * @data:
+ * @destroy:
+ * @replace:
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.7
+ **/
+hb_bool_t
+hb_shape_plan_set_user_data (hb_shape_plan_t *shape_plan,
+ hb_user_data_key_t *key,
+ void * data,
+ hb_destroy_func_t destroy,
+ hb_bool_t replace)
+{
+ return hb_object_set_user_data (shape_plan, key, data, destroy, replace);
+}
+
+/**
+ * hb_shape_plan_get_user_data: (skip)
+ * @shape_plan: a shape plan.
+ * @key:
+ *
+ *
+ *
+ * Return value: (transfer none):
+ *
+ * Since: 0.9.7
+ **/
+void *
+hb_shape_plan_get_user_data (hb_shape_plan_t *shape_plan,
+ hb_user_data_key_t *key)
+{
+ return hb_object_get_user_data (shape_plan, key);
+}
+
+/**
+ * hb_shape_plan_get_shaper:
+ * @shape_plan: a shape plan.
+ *
+ *
+ *
+ * Return value: (transfer none):
+ *
+ * Since: 0.9.7
+ **/
+const char *
+hb_shape_plan_get_shaper (hb_shape_plan_t *shape_plan)
+{
+ return shape_plan->key.shaper_name;
+}
+
+
+/**
+ * hb_shape_plan_execute:
+ * @shape_plan: a shape plan.
+ * @font: a font.
+ * @buffer: a buffer.
+ * @features: (array length=num_features):
+ * @num_features:
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.7
+ **/
+hb_bool_t
+hb_shape_plan_execute (hb_shape_plan_t *shape_plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer,
+ const hb_feature_t *features,
+ unsigned int num_features)
+{
+ DEBUG_MSG_FUNC (SHAPE_PLAN, shape_plan,
+ "num_features=%d shaper_func=%p, shaper_name=%s",
+ num_features,
+ shape_plan->key.shaper_func,
+ shape_plan->key.shaper_name);
+
+ if (unlikely (!buffer->len))
+ return true;
+
+ assert (!hb_object_is_immutable (buffer));
+ assert (buffer->content_type == HB_BUFFER_CONTENT_TYPE_UNICODE);
+
+ if (unlikely (hb_object_is_inert (shape_plan)))
+ return false;
+
+ assert (shape_plan->face_unsafe == font->face);
+ assert (hb_segment_properties_equal (&shape_plan->key.props, &buffer->props));
+
+#define HB_SHAPER_EXECUTE(shaper) \
+ HB_STMT_START { \
+ return font->data.shaper && \
+ _hb_##shaper##_shape (shape_plan, font, buffer, features, num_features); \
+ } HB_STMT_END
+
+ if (false)
+ ;
+#define HB_SHAPER_IMPLEMENT(shaper) \
+ else if (shape_plan->key.shaper_func == _hb_##shaper##_shape) \
+ HB_SHAPER_EXECUTE (shaper);
+#include "hb-shaper-list.hh"
+#undef HB_SHAPER_IMPLEMENT
+
+#undef HB_SHAPER_EXECUTE
+
+ return false;
+}
+
+
+/*
+ * Caching
+ */
+
+/**
+ * hb_shape_plan_create_cached:
+ * @face:
+ * @props:
+ * @user_features: (array length=num_user_features):
+ * @num_user_features:
+ * @shaper_list: (array zero-terminated=1):
+ *
+ *
+ *
+ * Return value: (transfer full):
+ *
+ * Since: 0.9.7
+ **/
+hb_shape_plan_t *
+hb_shape_plan_create_cached (hb_face_t *face,
+ const hb_segment_properties_t *props,
+ const hb_feature_t *user_features,
+ unsigned int num_user_features,
+ const char * const *shaper_list)
+{
+ return hb_shape_plan_create_cached2 (face, props,
+ user_features, num_user_features,
+ nullptr, 0,
+ shaper_list);
+}
+
+hb_shape_plan_t *
+hb_shape_plan_create_cached2 (hb_face_t *face,
+ const hb_segment_properties_t *props,
+ const hb_feature_t *user_features,
+ unsigned int num_user_features,
+ const int *coords,
+ unsigned int num_coords,
+ const char * const *shaper_list)
+{
+ DEBUG_MSG_FUNC (SHAPE_PLAN, nullptr,
+ "face=%p num_features=%d shaper_list=%p",
+ face,
+ num_user_features,
+ shaper_list);
+
+retry:
+ hb_face_t::plan_node_t *cached_plan_nodes = face->shape_plans;
+
+ bool dont_cache = hb_object_is_inert (face);
+
+ if (likely (!dont_cache))
+ {
+ hb_shape_plan_key_t key;
+ if (!key.init (false,
+ face,
+ props,
+ user_features,
+ num_user_features,
+ coords,
+ num_coords,
+ shaper_list))
+ return hb_shape_plan_get_empty ();
+
+ for (hb_face_t::plan_node_t *node = cached_plan_nodes; node; node = node->next)
+ if (node->shape_plan->key.equal (&key))
+ {
+ DEBUG_MSG_FUNC (SHAPE_PLAN, node->shape_plan, "fulfilled from cache");
+ return hb_shape_plan_reference (node->shape_plan);
+ }
+ }
+
+ hb_shape_plan_t *shape_plan = hb_shape_plan_create2 (face, props,
+ user_features, num_user_features,
+ coords, num_coords,
+ shaper_list);
+
+ if (unlikely (dont_cache))
+ return shape_plan;
+
+ hb_face_t::plan_node_t *node = (hb_face_t::plan_node_t *) calloc (1, sizeof (hb_face_t::plan_node_t));
+ if (unlikely (!node))
+ return shape_plan;
+
+ node->shape_plan = shape_plan;
+ node->next = cached_plan_nodes;
+
+ if (unlikely (!face->shape_plans.cmpexch (cached_plan_nodes, node)))
+ {
+ hb_shape_plan_destroy (shape_plan);
+ free (node);
+ goto retry;
+ }
+ DEBUG_MSG_FUNC (SHAPE_PLAN, shape_plan, "inserted into cache");
+
+ return hb_shape_plan_reference (shape_plan);
+}
diff --git a/thirdparty/harfbuzz/src/hb-shape-plan.h b/thirdparty/harfbuzz/src/hb-shape-plan.h
new file mode 100644
index 0000000000..b62ae7ca35
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-shape-plan.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright © 2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_H_IN
+#error "Include <hb.h> instead."
+#endif
+
+#ifndef HB_SHAPE_PLAN_H
+#define HB_SHAPE_PLAN_H
+
+#include "hb-common.h"
+#include "hb-font.h"
+
+HB_BEGIN_DECLS
+
+typedef struct hb_shape_plan_t hb_shape_plan_t;
+
+HB_EXTERN hb_shape_plan_t *
+hb_shape_plan_create (hb_face_t *face,
+ const hb_segment_properties_t *props,
+ const hb_feature_t *user_features,
+ unsigned int num_user_features,
+ const char * const *shaper_list);
+
+HB_EXTERN hb_shape_plan_t *
+hb_shape_plan_create_cached (hb_face_t *face,
+ const hb_segment_properties_t *props,
+ const hb_feature_t *user_features,
+ unsigned int num_user_features,
+ const char * const *shaper_list);
+
+HB_EXTERN hb_shape_plan_t *
+hb_shape_plan_create2 (hb_face_t *face,
+ const hb_segment_properties_t *props,
+ const hb_feature_t *user_features,
+ unsigned int num_user_features,
+ const int *coords,
+ unsigned int num_coords,
+ const char * const *shaper_list);
+
+HB_EXTERN hb_shape_plan_t *
+hb_shape_plan_create_cached2 (hb_face_t *face,
+ const hb_segment_properties_t *props,
+ const hb_feature_t *user_features,
+ unsigned int num_user_features,
+ const int *coords,
+ unsigned int num_coords,
+ const char * const *shaper_list);
+
+
+HB_EXTERN hb_shape_plan_t *
+hb_shape_plan_get_empty (void);
+
+HB_EXTERN hb_shape_plan_t *
+hb_shape_plan_reference (hb_shape_plan_t *shape_plan);
+
+HB_EXTERN void
+hb_shape_plan_destroy (hb_shape_plan_t *shape_plan);
+
+HB_EXTERN hb_bool_t
+hb_shape_plan_set_user_data (hb_shape_plan_t *shape_plan,
+ hb_user_data_key_t *key,
+ void * data,
+ hb_destroy_func_t destroy,
+ hb_bool_t replace);
+
+HB_EXTERN void *
+hb_shape_plan_get_user_data (hb_shape_plan_t *shape_plan,
+ hb_user_data_key_t *key);
+
+
+HB_EXTERN hb_bool_t
+hb_shape_plan_execute (hb_shape_plan_t *shape_plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer,
+ const hb_feature_t *features,
+ unsigned int num_features);
+
+HB_EXTERN const char *
+hb_shape_plan_get_shaper (hb_shape_plan_t *shape_plan);
+
+
+HB_END_DECLS
+
+#endif /* HB_SHAPE_PLAN_H */
diff --git a/thirdparty/harfbuzz/src/hb-shape-plan.hh b/thirdparty/harfbuzz/src/hb-shape-plan.hh
new file mode 100644
index 0000000000..6da7edb2f8
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-shape-plan.hh
@@ -0,0 +1,76 @@
+/*
+ * Copyright © 2012,2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_SHAPE_PLAN_HH
+#define HB_SHAPE_PLAN_HH
+
+#include "hb.hh"
+#include "hb-shaper.hh"
+#include "hb-ot-shape.hh"
+
+
+struct hb_shape_plan_key_t
+{
+ hb_segment_properties_t props;
+
+ const hb_feature_t *user_features;
+ unsigned int num_user_features;
+
+#ifndef HB_NO_OT_SHAPE
+ hb_ot_shape_plan_key_t ot;
+#endif
+
+ hb_shape_func_t *shaper_func;
+ const char *shaper_name;
+
+ HB_INTERNAL bool init (bool copy,
+ hb_face_t *face,
+ const hb_segment_properties_t *props,
+ const hb_feature_t *user_features,
+ unsigned int num_user_features,
+ const int *coords,
+ unsigned int num_coords,
+ const char * const *shaper_list);
+
+ HB_INTERNAL void free () { ::free ((void *) user_features); }
+
+ HB_INTERNAL bool user_features_match (const hb_shape_plan_key_t *other);
+
+ HB_INTERNAL bool equal (const hb_shape_plan_key_t *other);
+};
+
+struct hb_shape_plan_t
+{
+ hb_object_header_t header;
+ hb_face_t *face_unsafe; /* We don't carry a reference to face. */
+ hb_shape_plan_key_t key;
+#ifndef HB_NO_OT_SHAPE
+ hb_ot_shape_plan_t ot;
+#endif
+};
+
+
+#endif /* HB_SHAPE_PLAN_HH */
diff --git a/thirdparty/harfbuzz/src/hb-shape.cc b/thirdparty/harfbuzz/src/hb-shape.cc
new file mode 100644
index 0000000000..017fb91b69
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-shape.cc
@@ -0,0 +1,172 @@
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ * Copyright © 2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#include "hb-shaper.hh"
+#include "hb-shape-plan.hh"
+#include "hb-buffer.hh"
+#include "hb-font.hh"
+#include "hb-machinery.hh"
+
+
+/**
+ * SECTION:hb-shape
+ * @title: hb-shape
+ * @short_description: Conversion of text strings into positioned glyphs
+ * @include: hb.h
+ *
+ * Shaping is the central operation of HarfBuzz. Shaping operates on buffers,
+ * which are sequences of Unicode characters that use the same font and have
+ * the same text direction, script, and language. After shaping the buffer
+ * contains the output glyphs and their positions.
+ **/
+
+
+#if HB_USE_ATEXIT
+static void free_static_shaper_list ();
+#endif
+
+static const char *nil_shaper_list[] = {nullptr};
+
+static struct hb_shaper_list_lazy_loader_t : hb_lazy_loader_t<const char *,
+ hb_shaper_list_lazy_loader_t>
+{
+ static const char ** create ()
+ {
+ const char **shaper_list = (const char **) calloc (1 + HB_SHAPERS_COUNT, sizeof (const char *));
+ if (unlikely (!shaper_list))
+ return nullptr;
+
+ const hb_shaper_entry_t *shapers = _hb_shapers_get ();
+ unsigned int i;
+ for (i = 0; i < HB_SHAPERS_COUNT; i++)
+ shaper_list[i] = shapers[i].name;
+ shaper_list[i] = nullptr;
+
+#if HB_USE_ATEXIT
+ atexit (free_static_shaper_list);
+#endif
+
+ return shaper_list;
+ }
+ static void destroy (const char **l)
+ { free (l); }
+ static const char ** get_null ()
+ { return nil_shaper_list; }
+} static_shaper_list;
+
+#if HB_USE_ATEXIT
+static
+void free_static_shaper_list ()
+{
+ static_shaper_list.free_instance ();
+}
+#endif
+
+
+/**
+ * hb_shape_list_shapers:
+ *
+ * Retrieves the list of shapers supported by HarfBuzz.
+ *
+ * Return value: (transfer none) (array zero-terminated=1): an array of
+ * constant strings
+ *
+ * Since: 0.9.2
+ **/
+const char **
+hb_shape_list_shapers ()
+{
+ return static_shaper_list.get_unconst ();
+}
+
+
+/**
+ * hb_shape_full:
+ * @font: an #hb_font_t to use for shaping
+ * @buffer: an #hb_buffer_t to shape
+ * @features: (array length=num_features) (allow-none): an array of user
+ * specified #hb_feature_t or %NULL
+ * @num_features: the length of @features array
+ * @shaper_list: (array zero-terminated=1) (allow-none): a %NULL-terminated
+ * array of shapers to use or %NULL
+ *
+ * See hb_shape() for details. If @shaper_list is not %NULL, the specified
+ * shapers will be used in the given order, otherwise the default shapers list
+ * will be used.
+ *
+ * Return value: false if all shapers failed, true otherwise
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_shape_full (hb_font_t *font,
+ hb_buffer_t *buffer,
+ const hb_feature_t *features,
+ unsigned int num_features,
+ const char * const *shaper_list)
+{
+ if (unlikely (hb_object_is_immutable (buffer))) return false;
+
+ hb_shape_plan_t *shape_plan = hb_shape_plan_create_cached2 (font->face, &buffer->props,
+ features, num_features,
+ font->coords, font->num_coords,
+ shaper_list);
+ hb_bool_t res = hb_shape_plan_execute (shape_plan, font, buffer, features, num_features);
+ hb_shape_plan_destroy (shape_plan);
+
+ if (res)
+ buffer->content_type = HB_BUFFER_CONTENT_TYPE_GLYPHS;
+ return res;
+}
+
+/**
+ * hb_shape:
+ * @font: an #hb_font_t to use for shaping
+ * @buffer: an #hb_buffer_t to shape
+ * @features: (array length=num_features) (allow-none): an array of user
+ * specified #hb_feature_t or %NULL
+ * @num_features: the length of @features array
+ *
+ * Shapes @buffer using @font turning its Unicode characters content to
+ * positioned glyphs. If @features is not %NULL, it will be used to control the
+ * features applied during shaping. If two @features have the same tag but
+ * overlapping ranges the value of the feature with the higher index takes
+ * precedence.
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_shape (hb_font_t *font,
+ hb_buffer_t *buffer,
+ const hb_feature_t *features,
+ unsigned int num_features)
+{
+ hb_shape_full (font, buffer, features, num_features, nullptr);
+}
diff --git a/thirdparty/harfbuzz/src/hb-shape.h b/thirdparty/harfbuzz/src/hb-shape.h
new file mode 100644
index 0000000000..39507ff744
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-shape.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ * Copyright © 2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_H_IN
+#error "Include <hb.h> instead."
+#endif
+
+#ifndef HB_SHAPE_H
+#define HB_SHAPE_H
+
+#include "hb-common.h"
+#include "hb-buffer.h"
+#include "hb-font.h"
+
+HB_BEGIN_DECLS
+
+
+HB_EXTERN void
+hb_shape (hb_font_t *font,
+ hb_buffer_t *buffer,
+ const hb_feature_t *features,
+ unsigned int num_features);
+
+HB_EXTERN hb_bool_t
+hb_shape_full (hb_font_t *font,
+ hb_buffer_t *buffer,
+ const hb_feature_t *features,
+ unsigned int num_features,
+ const char * const *shaper_list);
+
+HB_EXTERN const char **
+hb_shape_list_shapers (void);
+
+
+HB_END_DECLS
+
+#endif /* HB_SHAPE_H */
diff --git a/thirdparty/harfbuzz/src/hb-shaper-impl.hh b/thirdparty/harfbuzz/src/hb-shaper-impl.hh
new file mode 100644
index 0000000000..b674fceb6a
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-shaper-impl.hh
@@ -0,0 +1,38 @@
+/*
+ * Copyright © 2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_SHAPER_IMPL_HH
+#define HB_SHAPER_IMPL_HH
+
+#include "hb.hh"
+
+#include "hb-shaper.hh"
+#include "hb-face.hh"
+#include "hb-font.hh"
+#include "hb-shape-plan.hh"
+#include "hb-buffer.hh"
+
+#endif /* HB_SHAPER_IMPL_HH */
diff --git a/thirdparty/harfbuzz/src/hb-shaper-list.hh b/thirdparty/harfbuzz/src/hb-shaper-list.hh
new file mode 100644
index 0000000000..0d63933a76
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-shaper-list.hh
@@ -0,0 +1,60 @@
+/*
+ * Copyright © 2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_SHAPER_LIST_HH
+#define HB_SHAPER_LIST_HH
+#endif /* HB_SHAPER_LIST_HH */ /* Dummy header guards */
+
+#ifndef HB_NO_SHAPER
+
+
+/* v--- Add new shapers in the right place here. */
+
+#ifdef HAVE_GRAPHITE2
+/* Only picks up fonts that have a "Silf" table. */
+HB_SHAPER_IMPLEMENT (graphite2)
+#endif
+
+#ifndef HB_NO_OT_SHAPE
+HB_SHAPER_IMPLEMENT (ot) /* <--- This is our main OpenType shaper. */
+#endif
+
+#ifdef HAVE_UNISCRIBE
+HB_SHAPER_IMPLEMENT (uniscribe)
+#endif
+#ifdef HAVE_DIRECTWRITE
+HB_SHAPER_IMPLEMENT (directwrite)
+#endif
+#ifdef HAVE_CORETEXT
+HB_SHAPER_IMPLEMENT (coretext)
+#endif
+
+#ifndef HB_NO_FALLBACK_SHAPE
+HB_SHAPER_IMPLEMENT (fallback) /* <--- This should be last. */
+#endif
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-shaper.cc b/thirdparty/harfbuzz/src/hb-shaper.cc
new file mode 100644
index 0000000000..0ea68ad1f5
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-shaper.cc
@@ -0,0 +1,108 @@
+/*
+ * Copyright © 2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+#include "hb-shaper.hh"
+#include "hb-machinery.hh"
+
+
+static const hb_shaper_entry_t all_shapers[] = {
+#define HB_SHAPER_IMPLEMENT(name) {#name, _hb_##name##_shape},
+#include "hb-shaper-list.hh"
+#undef HB_SHAPER_IMPLEMENT
+};
+#ifndef HB_NO_SHAPER
+static_assert (0 != ARRAY_LENGTH_CONST (all_shapers), "No shaper enabled.");
+#endif
+
+#if HB_USE_ATEXIT
+static void free_static_shapers ();
+#endif
+
+static struct hb_shapers_lazy_loader_t : hb_lazy_loader_t<const hb_shaper_entry_t,
+ hb_shapers_lazy_loader_t>
+{
+ static hb_shaper_entry_t *create ()
+ {
+ char *env = getenv ("HB_SHAPER_LIST");
+ if (!env || !*env)
+ return nullptr;
+
+ hb_shaper_entry_t *shapers = (hb_shaper_entry_t *) calloc (1, sizeof (all_shapers));
+ if (unlikely (!shapers))
+ return nullptr;
+
+ memcpy (shapers, all_shapers, sizeof (all_shapers));
+
+ /* Reorder shaper list to prefer requested shapers. */
+ unsigned int i = 0;
+ char *end, *p = env;
+ for (;;)
+ {
+ end = strchr (p, ',');
+ if (!end)
+ end = p + strlen (p);
+
+ for (unsigned int j = i; j < ARRAY_LENGTH (all_shapers); j++)
+ if (end - p == (int) strlen (shapers[j].name) &&
+ 0 == strncmp (shapers[j].name, p, end - p))
+ {
+ /* Reorder this shaper to position i */
+ struct hb_shaper_entry_t t = shapers[j];
+ memmove (&shapers[i + 1], &shapers[i], sizeof (shapers[i]) * (j - i));
+ shapers[i] = t;
+ i++;
+ }
+
+ if (!*end)
+ break;
+ else
+ p = end + 1;
+ }
+
+#if HB_USE_ATEXIT
+ atexit (free_static_shapers);
+#endif
+
+ return shapers;
+ }
+ static void destroy (const hb_shaper_entry_t *p) { free ((void *) p); }
+ static const hb_shaper_entry_t *get_null () { return all_shapers; }
+} static_shapers;
+
+#if HB_USE_ATEXIT
+static
+void free_static_shapers ()
+{
+ static_shapers.free_instance ();
+}
+#endif
+
+const hb_shaper_entry_t *
+_hb_shapers_get ()
+{
+ return static_shapers.get_unconst ();
+}
diff --git a/thirdparty/harfbuzz/src/hb-shaper.hh b/thirdparty/harfbuzz/src/hb-shaper.hh
new file mode 100644
index 0000000000..b4138a324f
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-shaper.hh
@@ -0,0 +1,134 @@
+/*
+ * Copyright © 2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_SHAPER_HH
+#define HB_SHAPER_HH
+
+#include "hb.hh"
+#include "hb-machinery.hh"
+
+typedef hb_bool_t hb_shape_func_t (hb_shape_plan_t *shape_plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer,
+ const hb_feature_t *features,
+ unsigned int num_features);
+
+#define HB_SHAPER_IMPLEMENT(name) \
+ extern "C" HB_INTERNAL hb_shape_func_t _hb_##name##_shape;
+#include "hb-shaper-list.hh"
+#undef HB_SHAPER_IMPLEMENT
+
+struct hb_shaper_entry_t {
+ char name[16];
+ hb_shape_func_t *func;
+};
+
+HB_INTERNAL const hb_shaper_entry_t *
+_hb_shapers_get ();
+
+
+template <typename Data, unsigned int WheresData, typename T>
+struct hb_shaper_lazy_loader_t;
+
+#define HB_SHAPER_ORDER(Shaper) \
+ HB_PASTE (HB_SHAPER_ORDER_, Shaper)
+enum hb_shaper_order_t
+{
+ _HB_SHAPER_ORDER_ORDER_ZERO,
+#define HB_SHAPER_IMPLEMENT(Shaper) \
+ HB_SHAPER_ORDER (Shaper),
+#include "hb-shaper-list.hh"
+#undef HB_SHAPER_IMPLEMENT
+ _HB_SHAPERS_COUNT_PLUS_ONE,
+ HB_SHAPERS_COUNT = _HB_SHAPERS_COUNT_PLUS_ONE - 1,
+};
+
+template <enum hb_shaper_order_t order, typename Object> struct hb_shaper_object_data_type_t;
+
+#define HB_SHAPER_DATA_SUCCEEDED ((void *) +1)
+#define HB_SHAPER_DATA_TYPE(shaper, object) hb_##shaper##_##object##_data_t
+#define HB_SHAPER_DATA_CREATE_FUNC(shaper, object) _hb_##shaper##_shaper_##object##_data_create
+#define HB_SHAPER_DATA_DESTROY_FUNC(shaper, object) _hb_##shaper##_shaper_##object##_data_destroy
+
+#define HB_SHAPER_DATA_INSTANTIATE_SHAPERS(shaper, object) \
+ \
+ struct HB_SHAPER_DATA_TYPE (shaper, object); /* Type forward declaration. */ \
+ extern "C" HB_INTERNAL HB_SHAPER_DATA_TYPE (shaper, object) * \
+ HB_SHAPER_DATA_CREATE_FUNC (shaper, object) (hb_##object##_t *object); \
+ extern "C" HB_INTERNAL void \
+ HB_SHAPER_DATA_DESTROY_FUNC (shaper, object) (HB_SHAPER_DATA_TYPE (shaper, object) *shaper##_##object); \
+ \
+ template <> \
+ struct hb_shaper_object_data_type_t<HB_SHAPER_ORDER (shaper), hb_##object##_t> \
+ { \
+ typedef HB_SHAPER_DATA_TYPE(shaper, object) value; \
+ }; \
+ \
+ template <unsigned int WheresData> \
+ struct hb_shaper_lazy_loader_t<hb_##object##_t, WheresData, HB_SHAPER_DATA_TYPE(shaper, object)> \
+ : hb_lazy_loader_t<HB_SHAPER_DATA_TYPE(shaper, object), \
+ hb_shaper_lazy_loader_t<hb_##object##_t, \
+ WheresData, \
+ HB_SHAPER_DATA_TYPE(shaper, object)>, \
+ hb_##object##_t, WheresData> \
+ { \
+ typedef HB_SHAPER_DATA_TYPE(shaper, object) Type; \
+ static Type* create (hb_##object##_t *data) \
+ { return HB_SHAPER_DATA_CREATE_FUNC (shaper, object) (data); } \
+ static Type *get_null () { return nullptr; } \
+ static void destroy (Type *p) { HB_SHAPER_DATA_DESTROY_FUNC (shaper, object) (p); } \
+ }; \
+ \
+ static_assert (true, "") /* Require semicolon after. */
+
+
+template <typename Object>
+struct hb_shaper_object_dataset_t
+{
+ void init0 (Object *parent_data)
+ {
+ this->parent_data = parent_data;
+#define HB_SHAPER_IMPLEMENT(shaper) shaper.init0 ();
+#include "hb-shaper-list.hh"
+#undef HB_SHAPER_IMPLEMENT
+ }
+ void fini ()
+ {
+#define HB_SHAPER_IMPLEMENT(shaper) shaper.fini ();
+#include "hb-shaper-list.hh"
+#undef HB_SHAPER_IMPLEMENT
+ }
+
+ Object *parent_data; /* MUST be JUST before the lazy loaders. */
+#define HB_SHAPER_IMPLEMENT(shaper) \
+ hb_shaper_lazy_loader_t<Object, HB_SHAPER_ORDER(shaper), \
+ typename hb_shaper_object_data_type_t<HB_SHAPER_ORDER(shaper), Object>::value \
+ > shaper;
+#include "hb-shaper-list.hh"
+#undef HB_SHAPER_IMPLEMENT
+};
+
+#endif /* HB_SHAPER_HH */
diff --git a/thirdparty/harfbuzz/src/hb-static.cc b/thirdparty/harfbuzz/src/hb-static.cc
new file mode 100644
index 0000000000..f5b7fa50a0
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-static.cc
@@ -0,0 +1,112 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#include "hb-open-type.hh"
+#include "hb-face.hh"
+
+#include "hb-aat-layout-common.hh"
+#include "hb-aat-layout-feat-table.hh"
+#include "hb-ot-layout-common.hh"
+#include "hb-ot-cmap-table.hh"
+#include "hb-ot-head-table.hh"
+#include "hb-ot-maxp-table.hh"
+
+#ifndef HB_NO_VISIBILITY
+#include "hb-ot-name-language-static.hh"
+
+uint64_t const _hb_NullPool[(HB_NULL_POOL_SIZE + sizeof (uint64_t) - 1) / sizeof (uint64_t)] = {};
+/*thread_local*/ uint64_t _hb_CrapPool[(HB_NULL_POOL_SIZE + sizeof (uint64_t) - 1) / sizeof (uint64_t)] = {};
+
+DEFINE_NULL_NAMESPACE_BYTES (OT, Index) = {0xFF,0xFF};
+DEFINE_NULL_NAMESPACE_BYTES (OT, LangSys) = {0x00,0x00, 0xFF,0xFF, 0x00,0x00};
+DEFINE_NULL_NAMESPACE_BYTES (OT, RangeRecord) = {0x00,0x01, 0x00,0x00, 0x00, 0x00};
+DEFINE_NULL_NAMESPACE_BYTES (OT, CmapSubtableLongGroup) = {0x00,0x00,0x00,0x01, 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00};
+DEFINE_NULL_NAMESPACE_BYTES (AAT, SettingName) = {0xFF,0xFF, 0xFF,0xFF};
+/* Hand-coded because Lookup is a template. Sad. */
+const unsigned char _hb_Null_AAT_Lookup[2] = {0xFF, 0xFF};
+
+
+
+/* hb_face_t */
+
+unsigned int
+hb_face_t::load_num_glyphs () const
+{
+ hb_sanitize_context_t c = hb_sanitize_context_t ();
+ c.set_num_glyphs (0); /* So we don't recurse ad infinitum. */
+ hb_blob_t *maxp_blob = c.reference_table<OT::maxp> (this);
+ const OT::maxp *maxp_table = maxp_blob->as<OT::maxp> ();
+
+ unsigned int ret = maxp_table->get_num_glyphs ();
+ num_glyphs.set_relaxed (ret);
+ hb_blob_destroy (maxp_blob);
+ return ret;
+}
+
+unsigned int
+hb_face_t::load_upem () const
+{
+ unsigned int ret = table.head->get_upem ();
+ upem.set_relaxed (ret);
+ return ret;
+}
+
+
+/* hb_user_data_array_t */
+
+bool
+hb_user_data_array_t::set (hb_user_data_key_t *key,
+ void * data,
+ hb_destroy_func_t destroy,
+ hb_bool_t replace)
+{
+ if (!key)
+ return false;
+
+ if (replace) {
+ if (!data && !destroy) {
+ items.remove (key, lock);
+ return true;
+ }
+ }
+ hb_user_data_item_t item = {key, data, destroy};
+ bool ret = !!items.replace_or_insert (item, lock, (bool) replace);
+
+ return ret;
+}
+
+void *
+hb_user_data_array_t::get (hb_user_data_key_t *key)
+{
+ hb_user_data_item_t item = {nullptr, nullptr, nullptr};
+
+ return items.find (key, &item, lock) ? item.data : nullptr;
+}
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-string-array.hh b/thirdparty/harfbuzz/src/hb-string-array.hh
new file mode 100644
index 0000000000..e7ac119232
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-string-array.hh
@@ -0,0 +1,85 @@
+/*
+ * Copyright © 2017 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_STRING_ARRAY_HH
+#if 0 /* Make checks happy. */
+#define HB_STRING_ARRAY_HH
+#endif
+
+#include "hb.hh"
+
+/* Based on Bruno Haible's code in Appendix B of Ulrich Drepper's dsohowto.pdf:
+ * https://software.intel.com/sites/default/files/m/a/1/e/dsohowto.pdf */
+
+#define HB_STRING_ARRAY_TYPE_NAME HB_PASTE(HB_STRING_ARRAY_NAME, _msgstr_t)
+#define HB_STRING_ARRAY_POOL_NAME HB_PASTE(HB_STRING_ARRAY_NAME, _msgstr)
+#define HB_STRING_ARRAY_OFFS_NAME HB_PASTE(HB_STRING_ARRAY_NAME, _msgidx)
+#define HB_STRING_ARRAY_LENG_NAME HB_PASTE(HB_STRING_ARRAY_NAME, _length)
+
+static const union HB_STRING_ARRAY_TYPE_NAME {
+ struct {
+/* I like to avoid storing the nul-termination byte since we don't need it,
+ * but C++ does not allow that.
+ * https://stackoverflow.com/q/28433862
+ */
+#define _S(s) char HB_PASTE (str, __LINE__)[sizeof (s)];
+#include HB_STRING_ARRAY_LIST
+#undef _S
+ } st;
+ char str[HB_VAR_ARRAY];
+}
+HB_STRING_ARRAY_POOL_NAME =
+{
+ {
+#define _S(s) s,
+#include HB_STRING_ARRAY_LIST
+#undef _S
+ }
+};
+static const unsigned int HB_STRING_ARRAY_OFFS_NAME[] =
+{
+#define _S(s) offsetof (union HB_STRING_ARRAY_TYPE_NAME, st.HB_PASTE(str, __LINE__)),
+#include HB_STRING_ARRAY_LIST
+#undef _S
+ sizeof (HB_STRING_ARRAY_TYPE_NAME)
+};
+
+static const unsigned int HB_STRING_ARRAY_LENG_NAME = ARRAY_LENGTH_CONST (HB_STRING_ARRAY_OFFS_NAME) - 1;
+
+static inline hb_bytes_t
+HB_STRING_ARRAY_NAME (unsigned int i)
+{
+ assert (i < ARRAY_LENGTH (HB_STRING_ARRAY_OFFS_NAME) - 1);
+ return hb_bytes_t (HB_STRING_ARRAY_POOL_NAME.str + HB_STRING_ARRAY_OFFS_NAME[i],
+ HB_STRING_ARRAY_OFFS_NAME[i + 1] - HB_STRING_ARRAY_OFFS_NAME[i] - 1);
+}
+
+#undef HB_STRING_ARRAY_TYPE_NAME
+#undef HB_STRING_ARRAY_POOL_NAME
+#undef HB_STRING_ARRAY_OFFS_NAME
+#undef HB_STRING_ARRAY_LENG_NAME
+
+#endif /* HB_STRING_ARRAY_HH */
diff --git a/thirdparty/harfbuzz/src/hb-style.cc b/thirdparty/harfbuzz/src/hb-style.cc
new file mode 100644
index 0000000000..86b9f7da5f
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-style.cc
@@ -0,0 +1,135 @@
+/*
+ * Copyright © 2019 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_STYLE
+#ifdef HB_EXPERIMENTAL_API
+
+#include "hb-ot-var-avar-table.hh"
+#include "hb-ot-var-fvar-table.hh"
+#include "hb-ot-stat-table.hh"
+#include "hb-ot-os2-table.hh"
+#include "hb-ot-head-table.hh"
+#include "hb-ot-post-table.hh"
+#include "hb-ot-face.hh"
+
+/**
+ * hb_style_tag_t:
+ * @HB_STYLE_TAG_ITALIC: Used to vary between non-italic and italic.
+ * A value of 0 can be interpreted as "Roman" (non-italic); a value of 1 can
+ * be interpreted as (fully) italic.
+ * @HB_STYLE_TAG_OPTICAL_SIZE: Used to vary design to suit different text sizes.
+ * Non-zero. Values can be interpreted as text size, in points.
+ * @HB_STYLE_TAG_SLANT: Used to vary between upright and slanted text. Values
+ * must be greater than -90 and less than +90. Values can be interpreted as
+ * the angle, in counter-clockwise degrees, of oblique slant from whatever the
+ * designer considers to be upright for that font design.
+ * @HB_STYLE_TAG_WIDTH: Used to vary width of text from narrower to wider.
+ * Non-zero. Values can be interpreted as a percentage of whatever the font
+ * designer considers “normal width†for that font design.
+ * @HB_STYLE_TAG_WEIGHT: Used to vary stroke thicknesses or other design details
+ * to give variation from lighter to blacker. Values can be interpreted in direct
+ * comparison to values for usWeightClass in the OS/2 table,
+ * or the CSS font-weight property.
+ *
+ * Defined by https://docs.microsoft.com/en-us/typography/opentype/spec/dvaraxisreg
+ *
+ * Since: EXPERIMENTAL
+ **/
+typedef enum {
+ HB_STYLE_TAG_ITALIC = HB_TAG ('i','t','a','l'),
+ HB_STYLE_TAG_OPTICAL_SIZE = HB_TAG ('o','p','s','z'),
+ HB_STYLE_TAG_SLANT = HB_TAG ('s','l','n','t'),
+ HB_STYLE_TAG_WIDTH = HB_TAG ('w','d','t','h'),
+ HB_STYLE_TAG_WEIGHT = HB_TAG ('w','g','h','t'),
+
+ _HB_STYLE_TAG_MAX_VALUE = HB_TAG_MAX_SIGNED /*< skip >*/
+} hb_style_tag_t;
+
+/**
+ * hb_style_get_value:
+ * @font: a #hb_font_t object.
+ * @style_tag: a style tag.
+ *
+ * Searches variation axes of a hb_font_t object for a specific axis first,
+ * if not set, then tries to get default style values from different
+ * tables of the font.
+ *
+ * Returns: Corresponding axis or default value to a style tag.
+ *
+ * Since: EXPERIMENTAL
+ **/
+float
+hb_style_get_value (hb_font_t *font, hb_tag_t tag)
+{
+ hb_style_tag_t style_tag = (hb_style_tag_t) tag;
+ hb_face_t *face = font->face;
+
+#ifndef HB_NO_VAR
+ hb_ot_var_axis_info_t axis;
+ if (hb_ot_var_find_axis_info (face, style_tag, &axis))
+ {
+ if (axis.axis_index < font->num_coords) return font->design_coords[axis.axis_index];
+ /* If a face is variable, fvar's default_value is better than STAT records */
+ return axis.default_value;
+ }
+#endif
+
+ if (style_tag == HB_STYLE_TAG_OPTICAL_SIZE && font->ptem)
+ return font->ptem;
+
+ /* STAT */
+ float value;
+ if (face->table.STAT->get_value (style_tag, &value))
+ return value;
+
+ switch ((unsigned) style_tag)
+ {
+ case HB_STYLE_TAG_ITALIC:
+ return face->table.OS2->is_italic () || face->table.head->is_italic () ? 1 : 0;
+ case HB_STYLE_TAG_OPTICAL_SIZE:
+ {
+ unsigned int lower, upper;
+ return face->table.OS2->v5 ().get_optical_size (&lower, &upper)
+ ? (float) (lower + upper) / 2.f
+ : 12.f;
+ }
+ case HB_STYLE_TAG_SLANT:
+ return face->table.post->table->italicAngle.to_float ();
+ case HB_STYLE_TAG_WIDTH:
+ return face->table.OS2->has_data ()
+ ? face->table.OS2->get_width ()
+ : (face->table.head->is_condensed () ? 75 : 100);
+ case HB_STYLE_TAG_WEIGHT:
+ return face->table.OS2->has_data ()
+ ? face->table.OS2->usWeightClass
+ : (face->table.head->is_bold () ? 700 : 400);
+ default:
+ return 0;
+ }
+}
+
+#endif
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-style.h b/thirdparty/harfbuzz/src/hb-style.h
new file mode 100644
index 0000000000..1209c79e94
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-style.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright © 2019 Ebrahim Byagowi
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#ifndef HB_H_IN
+#error "Include <hb.h> instead."
+#endif
+
+#ifndef HB_STYLE_H
+#define HB_STYLE_H
+
+#include "hb.h"
+
+HB_BEGIN_DECLS
+
+#ifdef HB_EXPERIMENTAL_API
+HB_EXTERN float
+hb_style_get_value (hb_font_t *font, hb_tag_t style_tag);
+#endif
+
+HB_END_DECLS
+
+#endif /* HB_STYLE_H */
diff --git a/thirdparty/harfbuzz/src/hb-subset-cff-common.cc b/thirdparty/harfbuzz/src/hb-subset-cff-common.cc
new file mode 100644
index 0000000000..04e1db24ac
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-subset-cff-common.cc
@@ -0,0 +1,227 @@
+/*
+ * Copyright © 2018 Adobe Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Adobe Author(s): Michiharu Ariza
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_SUBSET_CFF
+
+#include "hb-ot-cff-common.hh"
+#include "hb-ot-cff2-table.hh"
+#include "hb-subset-cff-common.hh"
+
+/* Disable FDSelect format 0 for compatibility with fonttools which doesn't seem choose it.
+ * Rarely any/much smaller than format 3 anyway. */
+#define CFF_SERIALIZE_FDSELECT_0 0
+
+using namespace CFF;
+
+/**
+ * hb_plan_subset_cff_fdselect
+ * Determine an optimal FDSelect format according to a provided plan.
+ *
+ * Return value: FDSelect format, size, and ranges for the most compact subset FDSelect
+ * along with a font index remapping table
+ **/
+
+bool
+hb_plan_subset_cff_fdselect (const hb_subset_plan_t *plan,
+ unsigned int fdCount,
+ const FDSelect &src, /* IN */
+ unsigned int &subset_fd_count /* OUT */,
+ unsigned int &subset_fdselect_size /* OUT */,
+ unsigned int &subset_fdselect_format /* OUT */,
+ hb_vector_t<code_pair_t> &fdselect_ranges /* OUT */,
+ hb_inc_bimap_t &fdmap /* OUT */)
+{
+ subset_fd_count = 0;
+ subset_fdselect_size = 0;
+ subset_fdselect_format = 0;
+ unsigned int num_ranges = 0;
+
+ unsigned int subset_num_glyphs = plan->num_output_glyphs ();
+ if (subset_num_glyphs == 0)
+ return true;
+
+ {
+ /* use hb_set to determine the subset of font dicts */
+ hb_set_t *set = hb_set_create ();
+ if (unlikely (set == &Null (hb_set_t))) return false;
+ hb_codepoint_t prev_fd = CFF_UNDEF_CODE;
+ for (hb_codepoint_t i = 0; i < subset_num_glyphs; i++)
+ {
+ hb_codepoint_t glyph;
+ hb_codepoint_t fd;
+ if (!plan->old_gid_for_new_gid (i, &glyph))
+ {
+ /* fonttools retains FDSelect & font dicts for missing glyphs. do the same */
+ glyph = i;
+ }
+ fd = src.get_fd (glyph);
+ set->add (fd);
+
+ if (fd != prev_fd)
+ {
+ num_ranges++;
+ prev_fd = fd;
+ code_pair_t pair = { fd, i };
+ fdselect_ranges.push (pair);
+ }
+ }
+
+ subset_fd_count = set->get_population ();
+ if (subset_fd_count == fdCount)
+ {
+ /* all font dicts belong to the subset. no need to subset FDSelect & FDArray */
+ fdmap.identity (fdCount);
+ hb_set_destroy (set);
+ }
+ else
+ {
+ /* create a fdmap */
+ fdmap.reset ();
+
+ hb_codepoint_t fd = CFF_UNDEF_CODE;
+ while (set->next (&fd))
+ fdmap.add (fd);
+ hb_set_destroy (set);
+ if (unlikely (fdmap.get_population () != subset_fd_count))
+ return false;
+ }
+
+ /* update each font dict index stored as "code" in fdselect_ranges */
+ for (unsigned int i = 0; i < fdselect_ranges.length; i++)
+ fdselect_ranges[i].code = fdmap[fdselect_ranges[i].code];
+ }
+
+ /* determine which FDSelect format is most compact */
+ if (subset_fd_count > 0xFF)
+ {
+ if (unlikely (src.format != 4))
+ return false;
+ subset_fdselect_format = 4;
+ subset_fdselect_size = FDSelect::min_size + FDSelect4::min_size + FDSelect4_Range::static_size * num_ranges + HBUINT32::static_size;
+ }
+ else
+ {
+#if CFF_SERIALIZE_FDSELECT_0
+ unsigned int format0_size = FDSelect::min_size + FDSelect0::min_size + HBUINT8::static_size * subset_num_glyphs;
+#endif
+ unsigned int format3_size = FDSelect::min_size + FDSelect3::min_size + FDSelect3_Range::static_size * num_ranges + HBUINT16::static_size;
+
+#if CFF_SERIALIZE_FDSELECT_0
+ if (format0_size <= format3_size)
+ {
+ // subset_fdselect_format = 0;
+ subset_fdselect_size = format0_size;
+ }
+ else
+#endif
+ {
+ subset_fdselect_format = 3;
+ subset_fdselect_size = format3_size;
+ }
+ }
+
+ return true;
+}
+
+template <typename FDSELECT3_4>
+static inline bool
+serialize_fdselect_3_4 (hb_serialize_context_t *c,
+ const unsigned int num_glyphs,
+ const FDSelect &src,
+ unsigned int size,
+ const hb_vector_t<code_pair_t> &fdselect_ranges)
+{
+ TRACE_SERIALIZE (this);
+ FDSELECT3_4 *p = c->allocate_size<FDSELECT3_4> (size);
+ if (unlikely (!p)) return_trace (false);
+ p->nRanges () = fdselect_ranges.length;
+ for (unsigned int i = 0; i < fdselect_ranges.length; i++)
+ {
+ p->ranges[i].first = fdselect_ranges[i].glyph;
+ p->ranges[i].fd = fdselect_ranges[i].code;
+ }
+ p->sentinel () = num_glyphs;
+ return_trace (true);
+}
+
+/**
+ * hb_serialize_cff_fdselect
+ * Serialize a subset FDSelect format planned above.
+ **/
+bool
+hb_serialize_cff_fdselect (hb_serialize_context_t *c,
+ const unsigned int num_glyphs,
+ const FDSelect &src,
+ unsigned int fd_count,
+ unsigned int fdselect_format,
+ unsigned int size,
+ const hb_vector_t<code_pair_t> &fdselect_ranges)
+{
+ TRACE_SERIALIZE (this);
+ FDSelect *p = c->allocate_min<FDSelect> ();
+ if (unlikely (!p)) return_trace (false);
+ p->format = fdselect_format;
+ size -= FDSelect::min_size;
+
+ switch (fdselect_format)
+ {
+#if CFF_SERIALIZE_FDSELECT_0
+ case 0:
+ {
+ FDSelect0 *p = c->allocate_size<FDSelect0> (size);
+ if (unlikely (!p)) return_trace (false);
+ unsigned int range_index = 0;
+ unsigned int fd = fdselect_ranges[range_index++].code;
+ for (unsigned int i = 0; i < num_glyphs; i++)
+ {
+ if ((range_index < fdselect_ranges.len) &&
+ (i >= fdselect_ranges[range_index].glyph))
+ {
+ fd = fdselect_ranges[range_index++].code;
+ }
+ p->fds[i] = fd;
+ }
+ return_trace (true);
+ }
+#endif /* CFF_SERIALIZE_FDSELECT_0 */
+
+ case 3:
+ return serialize_fdselect_3_4<FDSelect3> (c, num_glyphs, src,
+ size, fdselect_ranges);
+
+ case 4:
+ return serialize_fdselect_3_4<FDSelect4> (c, num_glyphs, src,
+ size, fdselect_ranges);
+
+ default:
+ return_trace (false);
+ }
+}
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-subset-cff-common.hh b/thirdparty/harfbuzz/src/hb-subset-cff-common.hh
new file mode 100644
index 0000000000..422b20b8d0
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-subset-cff-common.hh
@@ -0,0 +1,989 @@
+/*
+ * Copyright © 2018 Adobe Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Adobe Author(s): Michiharu Ariza
+ */
+
+#ifndef HB_SUBSET_CFF_COMMON_HH
+#define HB_SUBSET_CFF_COMMON_HH
+
+#include "hb.hh"
+
+#include "hb-subset-plan.hh"
+#include "hb-cff-interp-cs-common.hh"
+
+namespace CFF {
+
+/* Used for writing a temporary charstring */
+struct str_encoder_t
+{
+ str_encoder_t (str_buff_t &buff_)
+ : buff (buff_), error (false) {}
+
+ void reset () { buff.resize (0); }
+
+ void encode_byte (unsigned char b)
+ {
+ if (unlikely (buff.push (b) == &Crap (unsigned char)))
+ set_error ();
+ }
+
+ void encode_int (int v)
+ {
+ if ((-1131 <= v) && (v <= 1131))
+ {
+ if ((-107 <= v) && (v <= 107))
+ encode_byte (v + 139);
+ else if (v > 0)
+ {
+ v -= 108;
+ encode_byte ((v >> 8) + OpCode_TwoBytePosInt0);
+ encode_byte (v & 0xFF);
+ }
+ else
+ {
+ v = -v - 108;
+ encode_byte ((v >> 8) + OpCode_TwoByteNegInt0);
+ encode_byte (v & 0xFF);
+ }
+ }
+ else
+ {
+ if (unlikely (v < -32768))
+ v = -32768;
+ else if (unlikely (v > 32767))
+ v = 32767;
+ encode_byte (OpCode_shortint);
+ encode_byte ((v >> 8) & 0xFF);
+ encode_byte (v & 0xFF);
+ }
+ }
+
+ void encode_num (const number_t& n)
+ {
+ if (n.in_int_range ())
+ {
+ encode_int (n.to_int ());
+ }
+ else
+ {
+ int32_t v = n.to_fixed ();
+ encode_byte (OpCode_fixedcs);
+ encode_byte ((v >> 24) & 0xFF);
+ encode_byte ((v >> 16) & 0xFF);
+ encode_byte ((v >> 8) & 0xFF);
+ encode_byte (v & 0xFF);
+ }
+ }
+
+ void encode_op (op_code_t op)
+ {
+ if (Is_OpCode_ESC (op))
+ {
+ encode_byte (OpCode_escape);
+ encode_byte (Unmake_OpCode_ESC (op));
+ }
+ else
+ encode_byte (op);
+ }
+
+ void copy_str (const byte_str_t &str)
+ {
+ unsigned int offset = buff.length;
+ if (unlikely (!buff.resize (offset + str.length)))
+ {
+ set_error ();
+ return;
+ }
+ if (unlikely (buff.length < offset + str.length))
+ {
+ set_error ();
+ return;
+ }
+ memcpy (&buff[offset], &str[0], str.length);
+ }
+
+ bool is_error () const { return error; }
+
+ protected:
+ void set_error () { error = true; }
+
+ str_buff_t &buff;
+ bool error;
+};
+
+struct cff_sub_table_info_t {
+ cff_sub_table_info_t ()
+ : fd_array_link (0),
+ char_strings_link (0)
+ {
+ fd_select.init ();
+ }
+
+ table_info_t fd_select;
+ objidx_t fd_array_link;
+ objidx_t char_strings_link;
+};
+
+template <typename OPSTR=op_str_t>
+struct cff_top_dict_op_serializer_t : op_serializer_t
+{
+ bool serialize (hb_serialize_context_t *c,
+ const OPSTR &opstr,
+ const cff_sub_table_info_t &info) const
+ {
+ TRACE_SERIALIZE (this);
+
+ switch (opstr.op)
+ {
+ case OpCode_CharStrings:
+ return_trace (FontDict::serialize_link4_op(c, opstr.op, info.char_strings_link, whence_t::Absolute));
+
+ case OpCode_FDArray:
+ return_trace (FontDict::serialize_link4_op(c, opstr.op, info.fd_array_link, whence_t::Absolute));
+
+ case OpCode_FDSelect:
+ return_trace (FontDict::serialize_link4_op(c, opstr.op, info.fd_select.link, whence_t::Absolute));
+
+ default:
+ return_trace (copy_opstr (c, opstr));
+ }
+ return_trace (true);
+ }
+};
+
+struct cff_font_dict_op_serializer_t : op_serializer_t
+{
+ bool serialize (hb_serialize_context_t *c,
+ const op_str_t &opstr,
+ const table_info_t &privateDictInfo) const
+ {
+ TRACE_SERIALIZE (this);
+
+ if (opstr.op == OpCode_Private)
+ {
+ /* serialize the private dict size & offset as 2-byte & 4-byte integers */
+ return_trace (UnsizedByteStr::serialize_int2 (c, privateDictInfo.size) &&
+ Dict::serialize_link4_op (c, opstr.op, privateDictInfo.link, whence_t::Absolute));
+ }
+ else
+ {
+ HBUINT8 *d = c->allocate_size<HBUINT8> (opstr.str.length);
+ if (unlikely (!d)) return_trace (false);
+ memcpy (d, &opstr.str[0], opstr.str.length);
+ }
+ return_trace (true);
+ }
+};
+
+struct cff_private_dict_op_serializer_t : op_serializer_t
+{
+ cff_private_dict_op_serializer_t (bool desubroutinize_, bool drop_hints_)
+ : desubroutinize (desubroutinize_), drop_hints (drop_hints_) {}
+
+ bool serialize (hb_serialize_context_t *c,
+ const op_str_t &opstr,
+ objidx_t subrs_link) const
+ {
+ TRACE_SERIALIZE (this);
+
+ if (drop_hints && dict_opset_t::is_hint_op (opstr.op))
+ return true;
+ if (opstr.op == OpCode_Subrs)
+ {
+ if (desubroutinize || !subrs_link)
+ return_trace (true);
+ else
+ return_trace (FontDict::serialize_link2_op (c, opstr.op, subrs_link));
+ }
+ else
+ return_trace (copy_opstr (c, opstr));
+ }
+
+ protected:
+ const bool desubroutinize;
+ const bool drop_hints;
+};
+
+struct flatten_param_t
+{
+ str_buff_t &flatStr;
+ bool drop_hints;
+};
+
+template <typename ACC, typename ENV, typename OPSET, op_code_t endchar_op=OpCode_Invalid>
+struct subr_flattener_t
+{
+ subr_flattener_t (const ACC &acc_,
+ const hb_subset_plan_t *plan_)
+ : acc (acc_), plan (plan_) {}
+
+ bool flatten (str_buff_vec_t &flat_charstrings)
+ {
+ if (!flat_charstrings.resize (plan->num_output_glyphs ()))
+ return false;
+ for (unsigned int i = 0; i < plan->num_output_glyphs (); i++)
+ flat_charstrings[i].init ();
+ for (unsigned int i = 0; i < plan->num_output_glyphs (); i++)
+ {
+ hb_codepoint_t glyph;
+ if (!plan->old_gid_for_new_gid (i, &glyph))
+ {
+ /* add an endchar only charstring for a missing glyph if CFF1 */
+ if (endchar_op != OpCode_Invalid) flat_charstrings[i].push (endchar_op);
+ continue;
+ }
+ const byte_str_t str = (*acc.charStrings)[glyph];
+ unsigned int fd = acc.fdSelect->get_fd (glyph);
+ if (unlikely (fd >= acc.fdCount))
+ return false;
+ cs_interpreter_t<ENV, OPSET, flatten_param_t> interp;
+ interp.env.init (str, acc, fd);
+ flatten_param_t param = { flat_charstrings[i], plan->drop_hints };
+ if (unlikely (!interp.interpret (param)))
+ return false;
+ }
+ return true;
+ }
+
+ const ACC &acc;
+ const hb_subset_plan_t *plan;
+};
+
+struct subr_closures_t
+{
+ subr_closures_t () : valid (false), global_closure (nullptr)
+ { local_closures.init (); }
+
+ void init (unsigned int fd_count)
+ {
+ valid = true;
+ global_closure = hb_set_create ();
+ if (global_closure == hb_set_get_empty ())
+ valid = false;
+ if (!local_closures.resize (fd_count))
+ valid = false;
+
+ for (unsigned int i = 0; i < local_closures.length; i++)
+ {
+ local_closures[i] = hb_set_create ();
+ if (local_closures[i] == hb_set_get_empty ())
+ valid = false;
+ }
+ }
+
+ void fini ()
+ {
+ hb_set_destroy (global_closure);
+ for (unsigned int i = 0; i < local_closures.length; i++)
+ hb_set_destroy (local_closures[i]);
+ local_closures.fini ();
+ }
+
+ void reset ()
+ {
+ hb_set_clear (global_closure);
+ for (unsigned int i = 0; i < local_closures.length; i++)
+ hb_set_clear (local_closures[i]);
+ }
+
+ bool is_valid () const { return valid; }
+ bool valid;
+ hb_set_t *global_closure;
+ hb_vector_t<hb_set_t *> local_closures;
+};
+
+struct parsed_cs_op_t : op_str_t
+{
+ void init (unsigned int subr_num_ = 0)
+ {
+ op_str_t::init ();
+ subr_num = subr_num_;
+ drop_flag = false;
+ keep_flag = false;
+ skip_flag = false;
+ }
+
+ void fini () { op_str_t::fini (); }
+
+ bool for_drop () const { return drop_flag; }
+ void set_drop () { if (!for_keep ()) drop_flag = true; }
+
+ bool for_keep () const { return keep_flag; }
+ void set_keep () { keep_flag = true; }
+
+ bool for_skip () const { return skip_flag; }
+ void set_skip () { skip_flag = true; }
+
+ unsigned int subr_num;
+
+ protected:
+ bool drop_flag : 1;
+ bool keep_flag : 1;
+ bool skip_flag : 1;
+};
+
+struct parsed_cs_str_t : parsed_values_t<parsed_cs_op_t>
+{
+ void init ()
+ {
+ SUPER::init ();
+ parsed = false;
+ hint_dropped = false;
+ has_prefix_ = false;
+ }
+
+ void add_op (op_code_t op, const byte_str_ref_t& str_ref)
+ {
+ if (!is_parsed ())
+ SUPER::add_op (op, str_ref);
+ }
+
+ void add_call_op (op_code_t op, const byte_str_ref_t& str_ref, unsigned int subr_num)
+ {
+ if (!is_parsed ())
+ {
+ unsigned int parsed_len = get_count ();
+ if (likely (parsed_len > 0))
+ values[parsed_len-1].set_skip ();
+
+ parsed_cs_op_t val;
+ val.init (subr_num);
+ SUPER::add_op (op, str_ref, val);
+ }
+ }
+
+ void set_prefix (const number_t &num, op_code_t op = OpCode_Invalid)
+ {
+ has_prefix_ = true;
+ prefix_op_ = op;
+ prefix_num_ = num;
+ }
+
+ bool at_end (unsigned int pos) const
+ {
+ return ((pos + 1 >= values.length) /* CFF2 */
+ || (values[pos + 1].op == OpCode_return));
+ }
+
+ bool is_parsed () const { return parsed; }
+ void set_parsed () { parsed = true; }
+
+ bool is_hint_dropped () const { return hint_dropped; }
+ void set_hint_dropped () { hint_dropped = true; }
+
+ bool is_vsindex_dropped () const { return vsindex_dropped; }
+ void set_vsindex_dropped () { vsindex_dropped = true; }
+
+ bool has_prefix () const { return has_prefix_; }
+ op_code_t prefix_op () const { return prefix_op_; }
+ const number_t &prefix_num () const { return prefix_num_; }
+
+ protected:
+ bool parsed;
+ bool hint_dropped;
+ bool vsindex_dropped;
+ bool has_prefix_;
+ op_code_t prefix_op_;
+ number_t prefix_num_;
+
+ private:
+ typedef parsed_values_t<parsed_cs_op_t> SUPER;
+};
+
+struct parsed_cs_str_vec_t : hb_vector_t<parsed_cs_str_t>
+{
+ void init (unsigned int len_ = 0)
+ {
+ SUPER::init ();
+ if (unlikely (!resize (len_)))
+ return;
+ for (unsigned int i = 0; i < length; i++)
+ (*this)[i].init ();
+ }
+ void fini () { SUPER::fini_deep (); }
+
+ private:
+ typedef hb_vector_t<parsed_cs_str_t> SUPER;
+};
+
+struct subr_subset_param_t
+{
+ void init (parsed_cs_str_t *parsed_charstring_,
+ parsed_cs_str_vec_t *parsed_global_subrs_, parsed_cs_str_vec_t *parsed_local_subrs_,
+ hb_set_t *global_closure_, hb_set_t *local_closure_,
+ bool drop_hints_)
+ {
+ parsed_charstring = parsed_charstring_;
+ current_parsed_str = parsed_charstring;
+ parsed_global_subrs = parsed_global_subrs_;
+ parsed_local_subrs = parsed_local_subrs_;
+ global_closure = global_closure_;
+ local_closure = local_closure_;
+ drop_hints = drop_hints_;
+ }
+
+ parsed_cs_str_t *get_parsed_str_for_context (call_context_t &context)
+ {
+ switch (context.type)
+ {
+ case CSType_CharString:
+ return parsed_charstring;
+
+ case CSType_LocalSubr:
+ if (likely (context.subr_num < parsed_local_subrs->length))
+ return &(*parsed_local_subrs)[context.subr_num];
+ break;
+
+ case CSType_GlobalSubr:
+ if (likely (context.subr_num < parsed_global_subrs->length))
+ return &(*parsed_global_subrs)[context.subr_num];
+ break;
+ }
+ return nullptr;
+ }
+
+ template <typename ENV>
+ void set_current_str (ENV &env, bool calling)
+ {
+ parsed_cs_str_t *parsed_str = get_parsed_str_for_context (env.context);
+ if (unlikely (!parsed_str))
+ {
+ env.set_error ();
+ return;
+ }
+ /* If the called subroutine is parsed partially but not completely yet,
+ * it must be because we are calling it recursively.
+ * Handle it as an error. */
+ if (unlikely (calling && !parsed_str->is_parsed () && (parsed_str->values.length > 0)))
+ env.set_error ();
+ else
+ current_parsed_str = parsed_str;
+ }
+
+ parsed_cs_str_t *current_parsed_str;
+
+ parsed_cs_str_t *parsed_charstring;
+ parsed_cs_str_vec_t *parsed_global_subrs;
+ parsed_cs_str_vec_t *parsed_local_subrs;
+ hb_set_t *global_closure;
+ hb_set_t *local_closure;
+ bool drop_hints;
+};
+
+struct subr_remap_t : hb_inc_bimap_t
+{
+ void create (hb_set_t *closure)
+ {
+ /* create a remapping of subroutine numbers from old to new.
+ * no optimization based on usage counts. fonttools doesn't appear doing that either.
+ */
+
+ hb_codepoint_t old_num = HB_SET_VALUE_INVALID;
+ while (hb_set_next (closure, &old_num))
+ add (old_num);
+
+ if (get_population () < 1240)
+ bias = 107;
+ else if (get_population () < 33900)
+ bias = 1131;
+ else
+ bias = 32768;
+ }
+
+ int biased_num (unsigned int old_num) const
+ {
+ hb_codepoint_t new_num = get (old_num);
+ return (int)new_num - bias;
+ }
+
+ protected:
+ int bias;
+};
+
+struct subr_remaps_t
+{
+ subr_remaps_t ()
+ {
+ global_remap.init ();
+ local_remaps.init ();
+ }
+
+ ~subr_remaps_t () { fini (); }
+
+ void init (unsigned int fdCount)
+ {
+ if (unlikely (!local_remaps.resize (fdCount))) return;
+ for (unsigned int i = 0; i < fdCount; i++)
+ local_remaps[i].init ();
+ }
+
+ bool in_error()
+ {
+ return local_remaps.in_error ();
+ }
+
+ void create (subr_closures_t& closures)
+ {
+ global_remap.create (closures.global_closure);
+ for (unsigned int i = 0; i < local_remaps.length; i++)
+ local_remaps[i].create (closures.local_closures[i]);
+ }
+
+ void fini ()
+ {
+ global_remap.fini ();
+ local_remaps.fini_deep ();
+ }
+
+ subr_remap_t global_remap;
+ hb_vector_t<subr_remap_t> local_remaps;
+};
+
+template <typename SUBSETTER, typename SUBRS, typename ACC, typename ENV, typename OPSET, op_code_t endchar_op=OpCode_Invalid>
+struct subr_subsetter_t
+{
+ subr_subsetter_t (ACC &acc_, const hb_subset_plan_t *plan_)
+ : acc (acc_), plan (plan_)
+ {
+ parsed_charstrings.init ();
+ parsed_global_subrs.init ();
+ parsed_local_subrs.init ();
+ }
+
+ ~subr_subsetter_t ()
+ {
+ closures.fini ();
+ remaps.fini ();
+ parsed_charstrings.fini_deep ();
+ parsed_global_subrs.fini_deep ();
+ parsed_local_subrs.fini_deep ();
+ }
+
+ /* Subroutine subsetting with --no-desubroutinize runs in phases:
+ *
+ * 1. execute charstrings/subroutines to determine subroutine closures
+ * 2. parse out all operators and numbers
+ * 3. mark hint operators and operands for removal if --no-hinting
+ * 4. re-encode all charstrings and subroutines with new subroutine numbers
+ *
+ * Phases #1 and #2 are done at the same time in collect_subrs ().
+ * Phase #3 walks charstrings/subroutines forward then backward (hence parsing required),
+ * because we can't tell if a number belongs to a hint op until we see the first moveto.
+ *
+ * Assumption: a callsubr/callgsubr operator must immediately follow a (biased) subroutine number
+ * within the same charstring/subroutine, e.g., not split across a charstring and a subroutine.
+ */
+ bool subset (void)
+ {
+ closures.init (acc.fdCount);
+ remaps.init (acc.fdCount);
+
+ parsed_charstrings.init (plan->num_output_glyphs ());
+ parsed_global_subrs.init (acc.globalSubrs->count);
+
+ if (unlikely (remaps.in_error()
+ || parsed_charstrings.in_error ()
+ || parsed_global_subrs.in_error ())) {
+ return false;
+ }
+
+ if (unlikely (!parsed_local_subrs.resize (acc.fdCount))) return false;
+
+ for (unsigned int i = 0; i < acc.fdCount; i++)
+ {
+ parsed_local_subrs[i].init (acc.privateDicts[i].localSubrs->count);
+ if (unlikely (parsed_local_subrs[i].in_error ())) return false;
+ }
+ if (unlikely (!closures.valid))
+ return false;
+
+ /* phase 1 & 2 */
+ for (unsigned int i = 0; i < plan->num_output_glyphs (); i++)
+ {
+ hb_codepoint_t glyph;
+ if (!plan->old_gid_for_new_gid (i, &glyph))
+ continue;
+ const byte_str_t str = (*acc.charStrings)[glyph];
+ unsigned int fd = acc.fdSelect->get_fd (glyph);
+ if (unlikely (fd >= acc.fdCount))
+ return false;
+
+ cs_interpreter_t<ENV, OPSET, subr_subset_param_t> interp;
+ interp.env.init (str, acc, fd);
+
+ subr_subset_param_t param;
+ param.init (&parsed_charstrings[i],
+ &parsed_global_subrs, &parsed_local_subrs[fd],
+ closures.global_closure, closures.local_closures[fd],
+ plan->drop_hints);
+
+ if (unlikely (!interp.interpret (param)))
+ return false;
+
+ /* complete parsed string esp. copy CFF1 width or CFF2 vsindex to the parsed charstring for encoding */
+ SUBSETTER::complete_parsed_str (interp.env, param, parsed_charstrings[i]);
+ }
+
+ if (plan->drop_hints)
+ {
+ /* mark hint ops and arguments for drop */
+ for (unsigned int i = 0; i < plan->num_output_glyphs (); i++)
+ {
+ hb_codepoint_t glyph;
+ if (!plan->old_gid_for_new_gid (i, &glyph))
+ continue;
+ unsigned int fd = acc.fdSelect->get_fd (glyph);
+ if (unlikely (fd >= acc.fdCount))
+ return false;
+ subr_subset_param_t param;
+ param.init (&parsed_charstrings[i],
+ &parsed_global_subrs, &parsed_local_subrs[fd],
+ closures.global_closure, closures.local_closures[fd],
+ plan->drop_hints);
+
+ drop_hints_param_t drop;
+ if (drop_hints_in_str (parsed_charstrings[i], param, drop))
+ {
+ parsed_charstrings[i].set_hint_dropped ();
+ if (drop.vsindex_dropped)
+ parsed_charstrings[i].set_vsindex_dropped ();
+ }
+ }
+
+ /* after dropping hints recreate closures of actually used subrs */
+ closures.reset ();
+ for (unsigned int i = 0; i < plan->num_output_glyphs (); i++)
+ {
+ hb_codepoint_t glyph;
+ if (!plan->old_gid_for_new_gid (i, &glyph))
+ continue;
+ unsigned int fd = acc.fdSelect->get_fd (glyph);
+ if (unlikely (fd >= acc.fdCount))
+ return false;
+ subr_subset_param_t param;
+ param.init (&parsed_charstrings[i],
+ &parsed_global_subrs, &parsed_local_subrs[fd],
+ closures.global_closure, closures.local_closures[fd],
+ plan->drop_hints);
+ collect_subr_refs_in_str (parsed_charstrings[i], param);
+ }
+ }
+
+ remaps.create (closures);
+
+ return true;
+ }
+
+ bool encode_charstrings (str_buff_vec_t &buffArray) const
+ {
+ if (unlikely (!buffArray.resize (plan->num_output_glyphs ())))
+ return false;
+ for (unsigned int i = 0; i < plan->num_output_glyphs (); i++)
+ {
+ hb_codepoint_t glyph;
+ if (!plan->old_gid_for_new_gid (i, &glyph))
+ {
+ /* add an endchar only charstring for a missing glyph if CFF1 */
+ if (endchar_op != OpCode_Invalid) buffArray[i].push (endchar_op);
+ continue;
+ }
+ unsigned int fd = acc.fdSelect->get_fd (glyph);
+ if (unlikely (fd >= acc.fdCount))
+ return false;
+ if (unlikely (!encode_str (parsed_charstrings[i], fd, buffArray[i])))
+ return false;
+ }
+ return true;
+ }
+
+ bool encode_subrs (const parsed_cs_str_vec_t &subrs, const subr_remap_t& remap, unsigned int fd, str_buff_vec_t &buffArray) const
+ {
+ unsigned int count = remap.get_population ();
+
+ if (unlikely (!buffArray.resize (count)))
+ return false;
+ for (unsigned int old_num = 0; old_num < subrs.length; old_num++)
+ {
+ hb_codepoint_t new_num = remap[old_num];
+ if (new_num != CFF_UNDEF_CODE)
+ {
+ if (unlikely (!encode_str (subrs[old_num], fd, buffArray[new_num])))
+ return false;
+ }
+ }
+ return true;
+ }
+
+ bool encode_globalsubrs (str_buff_vec_t &buffArray)
+ {
+ return encode_subrs (parsed_global_subrs, remaps.global_remap, 0, buffArray);
+ }
+
+ bool encode_localsubrs (unsigned int fd, str_buff_vec_t &buffArray) const
+ {
+ return encode_subrs (parsed_local_subrs[fd], remaps.local_remaps[fd], fd, buffArray);
+ }
+
+ protected:
+ struct drop_hints_param_t
+ {
+ drop_hints_param_t ()
+ : seen_moveto (false),
+ ends_in_hint (false),
+ all_dropped (false),
+ vsindex_dropped (false) {}
+
+ bool seen_moveto;
+ bool ends_in_hint;
+ bool all_dropped;
+ bool vsindex_dropped;
+ };
+
+ bool drop_hints_in_subr (parsed_cs_str_t &str, unsigned int pos,
+ parsed_cs_str_vec_t &subrs, unsigned int subr_num,
+ const subr_subset_param_t &param, drop_hints_param_t &drop)
+ {
+ drop.ends_in_hint = false;
+ bool has_hint = drop_hints_in_str (subrs[subr_num], param, drop);
+
+ /* if this subr ends with a stem hint (i.e., not a number; potential argument for moveto),
+ * then this entire subroutine must be a hint. drop its call. */
+ if (drop.ends_in_hint)
+ {
+ str.values[pos].set_drop ();
+ /* if this subr call is at the end of the parent subr, propagate the flag
+ * otherwise reset the flag */
+ if (!str.at_end (pos))
+ drop.ends_in_hint = false;
+ }
+ else if (drop.all_dropped)
+ {
+ str.values[pos].set_drop ();
+ }
+
+ return has_hint;
+ }
+
+ /* returns true if it sees a hint op before the first moveto */
+ bool drop_hints_in_str (parsed_cs_str_t &str, const subr_subset_param_t &param, drop_hints_param_t &drop)
+ {
+ bool seen_hint = false;
+
+ for (unsigned int pos = 0; pos < str.values.length; pos++)
+ {
+ bool has_hint = false;
+ switch (str.values[pos].op)
+ {
+ case OpCode_callsubr:
+ has_hint = drop_hints_in_subr (str, pos,
+ *param.parsed_local_subrs, str.values[pos].subr_num,
+ param, drop);
+ break;
+
+ case OpCode_callgsubr:
+ has_hint = drop_hints_in_subr (str, pos,
+ *param.parsed_global_subrs, str.values[pos].subr_num,
+ param, drop);
+ break;
+
+ case OpCode_rmoveto:
+ case OpCode_hmoveto:
+ case OpCode_vmoveto:
+ drop.seen_moveto = true;
+ break;
+
+ case OpCode_hintmask:
+ case OpCode_cntrmask:
+ if (drop.seen_moveto)
+ {
+ str.values[pos].set_drop ();
+ break;
+ }
+ HB_FALLTHROUGH;
+
+ case OpCode_hstemhm:
+ case OpCode_vstemhm:
+ case OpCode_hstem:
+ case OpCode_vstem:
+ has_hint = true;
+ str.values[pos].set_drop ();
+ if (str.at_end (pos))
+ drop.ends_in_hint = true;
+ break;
+
+ case OpCode_dotsection:
+ str.values[pos].set_drop ();
+ break;
+
+ default:
+ /* NONE */
+ break;
+ }
+ if (has_hint)
+ {
+ for (int i = pos - 1; i >= 0; i--)
+ {
+ parsed_cs_op_t &csop = str.values[(unsigned)i];
+ if (csop.for_drop ())
+ break;
+ csop.set_drop ();
+ if (csop.op == OpCode_vsindexcs)
+ drop.vsindex_dropped = true;
+ }
+ seen_hint |= has_hint;
+ }
+ }
+
+ /* Raise all_dropped flag if all operators except return are dropped from a subr.
+ * It may happen even after seeing the first moveto if a subr contains
+ * only (usually one) hintmask operator, then calls to this subr can be dropped.
+ */
+ drop.all_dropped = true;
+ for (unsigned int pos = 0; pos < str.values.length; pos++)
+ {
+ parsed_cs_op_t &csop = str.values[pos];
+ if (csop.op == OpCode_return)
+ break;
+ if (!csop.for_drop ())
+ {
+ drop.all_dropped = false;
+ break;
+ }
+ }
+
+ return seen_hint;
+ }
+
+ void collect_subr_refs_in_subr (parsed_cs_str_t &str, unsigned int pos,
+ unsigned int subr_num, parsed_cs_str_vec_t &subrs,
+ hb_set_t *closure,
+ const subr_subset_param_t &param)
+ {
+ closure->add (subr_num);
+ collect_subr_refs_in_str (subrs[subr_num], param);
+ }
+
+ void collect_subr_refs_in_str (parsed_cs_str_t &str, const subr_subset_param_t &param)
+ {
+ for (unsigned int pos = 0; pos < str.values.length; pos++)
+ {
+ if (!str.values[pos].for_drop ())
+ {
+ switch (str.values[pos].op)
+ {
+ case OpCode_callsubr:
+ collect_subr_refs_in_subr (str, pos,
+ str.values[pos].subr_num, *param.parsed_local_subrs,
+ param.local_closure, param);
+ break;
+
+ case OpCode_callgsubr:
+ collect_subr_refs_in_subr (str, pos,
+ str.values[pos].subr_num, *param.parsed_global_subrs,
+ param.global_closure, param);
+ break;
+
+ default: break;
+ }
+ }
+ }
+ }
+
+ bool encode_str (const parsed_cs_str_t &str, const unsigned int fd, str_buff_t &buff) const
+ {
+ buff.init ();
+ str_encoder_t encoder (buff);
+ encoder.reset ();
+ /* if a prefix (CFF1 width or CFF2 vsindex) has been removed along with hints,
+ * re-insert it at the beginning of charstreing */
+ if (str.has_prefix () && str.is_hint_dropped ())
+ {
+ encoder.encode_num (str.prefix_num ());
+ if (str.prefix_op () != OpCode_Invalid)
+ encoder.encode_op (str.prefix_op ());
+ }
+ for (unsigned int i = 0; i < str.get_count(); i++)
+ {
+ const parsed_cs_op_t &opstr = str.values[i];
+ if (!opstr.for_drop () && !opstr.for_skip ())
+ {
+ switch (opstr.op)
+ {
+ case OpCode_callsubr:
+ encoder.encode_int (remaps.local_remaps[fd].biased_num (opstr.subr_num));
+ encoder.encode_op (OpCode_callsubr);
+ break;
+
+ case OpCode_callgsubr:
+ encoder.encode_int (remaps.global_remap.biased_num (opstr.subr_num));
+ encoder.encode_op (OpCode_callgsubr);
+ break;
+
+ default:
+ encoder.copy_str (opstr.str);
+ break;
+ }
+ }
+ }
+ return !encoder.is_error ();
+ }
+
+ protected:
+ const ACC &acc;
+ const hb_subset_plan_t *plan;
+
+ subr_closures_t closures;
+
+ parsed_cs_str_vec_t parsed_charstrings;
+ parsed_cs_str_vec_t parsed_global_subrs;
+ hb_vector_t<parsed_cs_str_vec_t> parsed_local_subrs;
+
+ subr_remaps_t remaps;
+
+ private:
+ typedef typename SUBRS::count_type subr_count_type;
+};
+
+} /* namespace CFF */
+
+HB_INTERNAL bool
+hb_plan_subset_cff_fdselect (const hb_subset_plan_t *plan,
+ unsigned int fdCount,
+ const CFF::FDSelect &src, /* IN */
+ unsigned int &subset_fd_count /* OUT */,
+ unsigned int &subset_fdselect_size /* OUT */,
+ unsigned int &subset_fdselect_format /* OUT */,
+ hb_vector_t<CFF::code_pair_t> &fdselect_ranges /* OUT */,
+ hb_inc_bimap_t &fdmap /* OUT */);
+
+HB_INTERNAL bool
+hb_serialize_cff_fdselect (hb_serialize_context_t *c,
+ unsigned int num_glyphs,
+ const CFF::FDSelect &src,
+ unsigned int fd_count,
+ unsigned int fdselect_format,
+ unsigned int size,
+ const hb_vector_t<CFF::code_pair_t> &fdselect_ranges);
+
+#endif /* HB_SUBSET_CFF_COMMON_HH */
diff --git a/thirdparty/harfbuzz/src/hb-subset-cff1.cc b/thirdparty/harfbuzz/src/hb-subset-cff1.cc
new file mode 100644
index 0000000000..df322f8451
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-subset-cff1.cc
@@ -0,0 +1,940 @@
+/*
+ * Copyright © 2018 Adobe Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Adobe Author(s): Michiharu Ariza
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_SUBSET_CFF
+
+#include "hb-open-type.hh"
+#include "hb-ot-cff1-table.hh"
+#include "hb-set.h"
+#include "hb-bimap.hh"
+#include "hb-subset-cff1.hh"
+#include "hb-subset-plan.hh"
+#include "hb-subset-cff-common.hh"
+#include "hb-cff1-interp-cs.hh"
+
+using namespace CFF;
+
+struct remap_sid_t : hb_inc_bimap_t
+{
+ unsigned int add (unsigned int sid)
+ {
+ if ((sid != CFF_UNDEF_SID) && !is_std_std (sid))
+ return offset_sid (hb_inc_bimap_t::add (unoffset_sid (sid)));
+ else
+ return sid;
+ }
+
+ unsigned int operator[] (unsigned int sid) const
+ {
+ if (is_std_std (sid) || (sid == CFF_UNDEF_SID))
+ return sid;
+ else
+ return offset_sid (get (unoffset_sid (sid)));
+ }
+
+ static const unsigned int num_std_strings = 391;
+
+ static bool is_std_std (unsigned int sid) { return sid < num_std_strings; }
+ static unsigned int offset_sid (unsigned int sid) { return sid + num_std_strings; }
+ static unsigned int unoffset_sid (unsigned int sid) { return sid - num_std_strings; }
+};
+
+struct cff1_sub_table_info_t : cff_sub_table_info_t
+{
+ cff1_sub_table_info_t ()
+ : cff_sub_table_info_t (),
+ encoding_link (0),
+ charset_link (0)
+ {
+ privateDictInfo.init ();
+ }
+
+ objidx_t encoding_link;
+ objidx_t charset_link;
+ table_info_t privateDictInfo;
+};
+
+/* a copy of a parsed out cff1_top_dict_values_t augmented with additional operators */
+struct cff1_top_dict_values_mod_t : cff1_top_dict_values_t
+{
+ void init (const cff1_top_dict_values_t *base_= &Null (cff1_top_dict_values_t))
+ {
+ SUPER::init ();
+ base = base_;
+ }
+
+ void fini () { SUPER::fini (); }
+
+ unsigned get_count () const { return base->get_count () + SUPER::get_count (); }
+ const cff1_top_dict_val_t &get_value (unsigned int i) const
+ {
+ if (i < base->get_count ())
+ return (*base)[i];
+ else
+ return SUPER::values[i - base->get_count ()];
+ }
+ const cff1_top_dict_val_t &operator [] (unsigned int i) const { return get_value (i); }
+
+ void reassignSIDs (const remap_sid_t& sidmap)
+ {
+ for (unsigned int i = 0; i < name_dict_values_t::ValCount; i++)
+ nameSIDs[i] = sidmap[base->nameSIDs[i]];
+ }
+
+ protected:
+ typedef cff1_top_dict_values_t SUPER;
+ const cff1_top_dict_values_t *base;
+};
+
+struct top_dict_modifiers_t
+{
+ top_dict_modifiers_t (const cff1_sub_table_info_t &info_,
+ const unsigned int (&nameSIDs_)[name_dict_values_t::ValCount])
+ : info (info_),
+ nameSIDs (nameSIDs_)
+ {}
+
+ const cff1_sub_table_info_t &info;
+ const unsigned int (&nameSIDs)[name_dict_values_t::ValCount];
+};
+
+struct cff1_top_dict_op_serializer_t : cff_top_dict_op_serializer_t<cff1_top_dict_val_t>
+{
+ bool serialize (hb_serialize_context_t *c,
+ const cff1_top_dict_val_t &opstr,
+ const top_dict_modifiers_t &mod) const
+ {
+ TRACE_SERIALIZE (this);
+
+ op_code_t op = opstr.op;
+ switch (op)
+ {
+ case OpCode_charset:
+ if (mod.info.charset_link)
+ return_trace (FontDict::serialize_link4_op(c, op, mod.info.charset_link, whence_t::Absolute));
+ else
+ goto fall_back;
+
+ case OpCode_Encoding:
+ if (mod.info.encoding_link)
+ return_trace (FontDict::serialize_link4_op(c, op, mod.info.encoding_link, whence_t::Absolute));
+ else
+ goto fall_back;
+
+ case OpCode_Private:
+ return_trace (UnsizedByteStr::serialize_int2 (c, mod.info.privateDictInfo.size) &&
+ Dict::serialize_link4_op (c, op, mod.info.privateDictInfo.link, whence_t::Absolute));
+
+ case OpCode_version:
+ case OpCode_Notice:
+ case OpCode_Copyright:
+ case OpCode_FullName:
+ case OpCode_FamilyName:
+ case OpCode_Weight:
+ case OpCode_PostScript:
+ case OpCode_BaseFontName:
+ case OpCode_FontName:
+ return_trace (FontDict::serialize_int2_op (c, op, mod.nameSIDs[name_dict_values_t::name_op_to_index (op)]));
+
+ case OpCode_ROS:
+ {
+ /* for registry & ordering, reassigned SIDs are serialized
+ * for supplement, the original byte string is copied along with the op code */
+ op_str_t supp_op;
+ supp_op.op = op;
+ if ( unlikely (!(opstr.str.length >= opstr.last_arg_offset + 3)))
+ return_trace (false);
+ supp_op.str = byte_str_t (&opstr.str + opstr.last_arg_offset, opstr.str.length - opstr.last_arg_offset);
+ return_trace (UnsizedByteStr::serialize_int2 (c, mod.nameSIDs[name_dict_values_t::registry]) &&
+ UnsizedByteStr::serialize_int2 (c, mod.nameSIDs[name_dict_values_t::ordering]) &&
+ copy_opstr (c, supp_op));
+ }
+ fall_back:
+ default:
+ return_trace (cff_top_dict_op_serializer_t<cff1_top_dict_val_t>::serialize (c, opstr, mod.info));
+ }
+ return_trace (true);
+ }
+
+};
+
+struct cff1_font_dict_op_serializer_t : cff_font_dict_op_serializer_t
+{
+ bool serialize (hb_serialize_context_t *c,
+ const op_str_t &opstr,
+ const cff1_font_dict_values_mod_t &mod) const
+ {
+ TRACE_SERIALIZE (this);
+
+ if (opstr.op == OpCode_FontName)
+ return_trace (FontDict::serialize_int2_op (c, opstr.op, mod.fontName));
+ else
+ return_trace (SUPER::serialize (c, opstr, mod.privateDictInfo));
+ }
+
+ private:
+ typedef cff_font_dict_op_serializer_t SUPER;
+};
+
+struct cff1_cs_opset_flatten_t : cff1_cs_opset_t<cff1_cs_opset_flatten_t, flatten_param_t>
+{
+ static void flush_args_and_op (op_code_t op, cff1_cs_interp_env_t &env, flatten_param_t& param)
+ {
+ if (env.arg_start > 0)
+ flush_width (env, param);
+
+ switch (op)
+ {
+ case OpCode_hstem:
+ case OpCode_hstemhm:
+ case OpCode_vstem:
+ case OpCode_vstemhm:
+ case OpCode_hintmask:
+ case OpCode_cntrmask:
+ case OpCode_dotsection:
+ if (param.drop_hints)
+ {
+ env.clear_args ();
+ return;
+ }
+ HB_FALLTHROUGH;
+
+ default:
+ SUPER::flush_args_and_op (op, env, param);
+ break;
+ }
+ }
+ static void flush_args (cff1_cs_interp_env_t &env, flatten_param_t& param)
+ {
+ str_encoder_t encoder (param.flatStr);
+ for (unsigned int i = env.arg_start; i < env.argStack.get_count (); i++)
+ encoder.encode_num (env.eval_arg (i));
+ SUPER::flush_args (env, param);
+ }
+
+ static void flush_op (op_code_t op, cff1_cs_interp_env_t &env, flatten_param_t& param)
+ {
+ str_encoder_t encoder (param.flatStr);
+ encoder.encode_op (op);
+ }
+
+ static void flush_width (cff1_cs_interp_env_t &env, flatten_param_t& param)
+ {
+ assert (env.has_width);
+ str_encoder_t encoder (param.flatStr);
+ encoder.encode_num (env.width);
+ }
+
+ static void flush_hintmask (op_code_t op, cff1_cs_interp_env_t &env, flatten_param_t& param)
+ {
+ SUPER::flush_hintmask (op, env, param);
+ if (!param.drop_hints)
+ {
+ str_encoder_t encoder (param.flatStr);
+ for (unsigned int i = 0; i < env.hintmask_size; i++)
+ encoder.encode_byte (env.str_ref[i]);
+ }
+ }
+
+ private:
+ typedef cff1_cs_opset_t<cff1_cs_opset_flatten_t, flatten_param_t> SUPER;
+};
+
+struct range_list_t : hb_vector_t<code_pair_t>
+{
+ /* replace the first glyph ID in the "glyph" field each range with a nLeft value */
+ bool complete (unsigned int last_glyph)
+ {
+ bool two_byte = false;
+ for (unsigned int i = (*this).length; i > 0; i--)
+ {
+ code_pair_t &pair = (*this)[i - 1];
+ unsigned int nLeft = last_glyph - pair.glyph - 1;
+ if (nLeft >= 0x100)
+ two_byte = true;
+ last_glyph = pair.glyph;
+ pair.glyph = nLeft;
+ }
+ return two_byte;
+ }
+};
+
+struct cff1_cs_opset_subr_subset_t : cff1_cs_opset_t<cff1_cs_opset_subr_subset_t, subr_subset_param_t>
+{
+ static void process_op (op_code_t op, cff1_cs_interp_env_t &env, subr_subset_param_t& param)
+ {
+ switch (op) {
+
+ case OpCode_return:
+ param.current_parsed_str->add_op (op, env.str_ref);
+ param.current_parsed_str->set_parsed ();
+ env.return_from_subr ();
+ param.set_current_str (env, false);
+ break;
+
+ case OpCode_endchar:
+ param.current_parsed_str->add_op (op, env.str_ref);
+ param.current_parsed_str->set_parsed ();
+ SUPER::process_op (op, env, param);
+ break;
+
+ case OpCode_callsubr:
+ process_call_subr (op, CSType_LocalSubr, env, param, env.localSubrs, param.local_closure);
+ break;
+
+ case OpCode_callgsubr:
+ process_call_subr (op, CSType_GlobalSubr, env, param, env.globalSubrs, param.global_closure);
+ break;
+
+ default:
+ SUPER::process_op (op, env, param);
+ param.current_parsed_str->add_op (op, env.str_ref);
+ break;
+ }
+ }
+
+ protected:
+ static void process_call_subr (op_code_t op, cs_type_t type,
+ cff1_cs_interp_env_t &env, subr_subset_param_t& param,
+ cff1_biased_subrs_t& subrs, hb_set_t *closure)
+ {
+ byte_str_ref_t str_ref = env.str_ref;
+ env.call_subr (subrs, type);
+ param.current_parsed_str->add_call_op (op, str_ref, env.context.subr_num);
+ closure->add (env.context.subr_num);
+ param.set_current_str (env, true);
+ }
+
+ private:
+ typedef cff1_cs_opset_t<cff1_cs_opset_subr_subset_t, subr_subset_param_t> SUPER;
+};
+
+struct cff1_subr_subsetter_t : subr_subsetter_t<cff1_subr_subsetter_t, CFF1Subrs, const OT::cff1::accelerator_subset_t, cff1_cs_interp_env_t, cff1_cs_opset_subr_subset_t, OpCode_endchar>
+{
+ cff1_subr_subsetter_t (const OT::cff1::accelerator_subset_t &acc_, const hb_subset_plan_t *plan_)
+ : subr_subsetter_t (acc_, plan_) {}
+
+ static void complete_parsed_str (cff1_cs_interp_env_t &env, subr_subset_param_t& param, parsed_cs_str_t &charstring)
+ {
+ /* insert width at the beginning of the charstring as necessary */
+ if (env.has_width)
+ charstring.set_prefix (env.width);
+
+ /* subroutines/charstring left on the call stack are legally left unmarked
+ * unmarked when a subroutine terminates with endchar. mark them.
+ */
+ param.current_parsed_str->set_parsed ();
+ for (unsigned int i = 0; i < env.callStack.get_count (); i++)
+ {
+ parsed_cs_str_t *parsed_str = param.get_parsed_str_for_context (env.callStack[i]);
+ if (likely (parsed_str))
+ parsed_str->set_parsed ();
+ else
+ env.set_error ();
+ }
+ }
+};
+
+struct cff_subset_plan {
+ cff_subset_plan ()
+ : info (),
+ orig_fdcount (0),
+ subset_fdcount (1),
+ subset_fdselect_format (0),
+ drop_hints (false),
+ desubroutinize(false)
+ {
+ topdict_mod.init ();
+ subset_fdselect_ranges.init ();
+ fdmap.init ();
+ subset_charstrings.init ();
+ subset_globalsubrs.init ();
+ subset_localsubrs.init ();
+ fontdicts_mod.init ();
+ subset_enc_code_ranges.init ();
+ subset_enc_supp_codes.init ();
+ subset_charset_ranges.init ();
+ sidmap.init ();
+ for (unsigned int i = 0; i < name_dict_values_t::ValCount; i++)
+ topDictModSIDs[i] = CFF_UNDEF_SID;
+ }
+
+ ~cff_subset_plan ()
+ {
+ topdict_mod.fini ();
+ subset_fdselect_ranges.fini ();
+ fdmap.fini ();
+ subset_charstrings.fini_deep ();
+ subset_globalsubrs.fini_deep ();
+ subset_localsubrs.fini_deep ();
+ fontdicts_mod.fini ();
+ subset_enc_code_ranges.fini ();
+ subset_enc_supp_codes.fini ();
+ subset_charset_ranges.fini ();
+ sidmap.fini ();
+ }
+
+ void plan_subset_encoding (const OT::cff1::accelerator_subset_t &acc, hb_subset_plan_t *plan)
+ {
+ const Encoding *encoding = acc.encoding;
+ unsigned int size0, size1, supp_size;
+ hb_codepoint_t code, last_code = CFF_UNDEF_CODE;
+ hb_vector_t<hb_codepoint_t> supp_codes;
+
+ if (unlikely (!subset_enc_code_ranges.resize (0)))
+ {
+ plan->check_success (false);
+ return;
+ }
+
+ supp_size = 0;
+ supp_codes.init ();
+
+ subset_enc_num_codes = plan->num_output_glyphs () - 1;
+ unsigned int glyph;
+ for (glyph = 1; glyph < plan->num_output_glyphs (); glyph++)
+ {
+ hb_codepoint_t old_glyph;
+ if (!plan->old_gid_for_new_gid (glyph, &old_glyph))
+ {
+ /* Retain the code for the old missing glyph ID */
+ old_glyph = glyph;
+ }
+ code = acc.glyph_to_code (old_glyph);
+ if (code == CFF_UNDEF_CODE)
+ {
+ subset_enc_num_codes = glyph - 1;
+ break;
+ }
+
+ if ((last_code == CFF_UNDEF_CODE) || (code != last_code + 1))
+ {
+ code_pair_t pair = { code, glyph };
+ subset_enc_code_ranges.push (pair);
+ }
+ last_code = code;
+
+ if (encoding != &Null (Encoding))
+ {
+ hb_codepoint_t sid = acc.glyph_to_sid (old_glyph);
+ encoding->get_supplement_codes (sid, supp_codes);
+ for (unsigned int i = 0; i < supp_codes.length; i++)
+ {
+ code_pair_t pair = { supp_codes[i], sid };
+ subset_enc_supp_codes.push (pair);
+ }
+ supp_size += SuppEncoding::static_size * supp_codes.length;
+ }
+ }
+ supp_codes.fini ();
+
+ subset_enc_code_ranges.complete (glyph);
+
+ assert (subset_enc_num_codes <= 0xFF);
+ size0 = Encoding0::min_size + HBUINT8::static_size * subset_enc_num_codes;
+ size1 = Encoding1::min_size + Encoding1_Range::static_size * subset_enc_code_ranges.length;
+
+ if (size0 < size1)
+ subset_enc_format = 0;
+ else
+ subset_enc_format = 1;
+ }
+
+ void plan_subset_charset (const OT::cff1::accelerator_subset_t &acc, hb_subset_plan_t *plan)
+ {
+ unsigned int size0, size_ranges;
+ hb_codepoint_t sid, last_sid = CFF_UNDEF_CODE;
+
+ if (unlikely (!subset_charset_ranges.resize (0)))
+ {
+ plan->check_success (false);
+ return;
+ }
+
+ unsigned int glyph;
+ for (glyph = 1; glyph < plan->num_output_glyphs (); glyph++)
+ {
+ hb_codepoint_t old_glyph;
+ if (!plan->old_gid_for_new_gid (glyph, &old_glyph))
+ {
+ /* Retain the SID for the old missing glyph ID */
+ old_glyph = glyph;
+ }
+ sid = acc.glyph_to_sid (old_glyph);
+
+ if (!acc.is_CID ())
+ sid = sidmap.add (sid);
+
+ if ((last_sid == CFF_UNDEF_CODE) || (sid != last_sid + 1))
+ {
+ code_pair_t pair = { sid, glyph };
+ subset_charset_ranges.push (pair);
+ }
+ last_sid = sid;
+ }
+
+ bool two_byte = subset_charset_ranges.complete (glyph);
+
+ size0 = Charset0::min_size + HBUINT16::static_size * (plan->num_output_glyphs () - 1);
+ if (!two_byte)
+ size_ranges = Charset1::min_size + Charset1_Range::static_size * subset_charset_ranges.length;
+ else
+ size_ranges = Charset2::min_size + Charset2_Range::static_size * subset_charset_ranges.length;
+
+ if (size0 < size_ranges)
+ subset_charset_format = 0;
+ else if (!two_byte)
+ subset_charset_format = 1;
+ else
+ subset_charset_format = 2;
+ }
+
+ bool collect_sids_in_dicts (const OT::cff1::accelerator_subset_t &acc)
+ {
+ sidmap.reset ();
+
+ for (unsigned int i = 0; i < name_dict_values_t::ValCount; i++)
+ {
+ unsigned int sid = acc.topDict.nameSIDs[i];
+ if (sid != CFF_UNDEF_SID)
+ {
+ (void)sidmap.add (sid);
+ topDictModSIDs[i] = sidmap[sid];
+ }
+ }
+
+ if (acc.fdArray != &Null (CFF1FDArray))
+ for (unsigned int i = 0; i < orig_fdcount; i++)
+ if (fdmap.has (i))
+ (void)sidmap.add (acc.fontDicts[i].fontName);
+
+ return true;
+ }
+
+ bool create (const OT::cff1::accelerator_subset_t &acc,
+ hb_subset_plan_t *plan)
+ {
+ /* make sure notdef is first */
+ hb_codepoint_t old_glyph;
+ if (!plan->old_gid_for_new_gid (0, &old_glyph) || (old_glyph != 0)) return false;
+
+ num_glyphs = plan->num_output_glyphs ();
+ orig_fdcount = acc.fdCount;
+ drop_hints = plan->drop_hints;
+ desubroutinize = plan->desubroutinize;
+
+ /* check whether the subset renumbers any glyph IDs */
+ gid_renum = false;
+ for (hb_codepoint_t new_glyph = 0; new_glyph < plan->num_output_glyphs (); new_glyph++)
+ {
+ if (!plan->old_gid_for_new_gid(new_glyph, &old_glyph))
+ continue;
+ if (new_glyph != old_glyph) {
+ gid_renum = true;
+ break;
+ }
+ }
+
+ subset_charset = gid_renum || !acc.is_predef_charset ();
+ subset_encoding = !acc.is_CID() && !acc.is_predef_encoding ();
+
+ /* top dict INDEX */
+ {
+ /* Add encoding/charset to a (copy of) top dict as necessary */
+ topdict_mod.init (&acc.topDict);
+ bool need_to_add_enc = (subset_encoding && !acc.topDict.has_op (OpCode_Encoding));
+ bool need_to_add_set = (subset_charset && !acc.topDict.has_op (OpCode_charset));
+ if (need_to_add_enc || need_to_add_set)
+ {
+ if (need_to_add_enc)
+ topdict_mod.add_op (OpCode_Encoding);
+ if (need_to_add_set)
+ topdict_mod.add_op (OpCode_charset);
+ }
+ }
+
+ /* Determine re-mapping of font index as fdmap among other info */
+ if (acc.fdSelect != &Null (CFF1FDSelect))
+ {
+ if (unlikely (!hb_plan_subset_cff_fdselect (plan,
+ orig_fdcount,
+ *acc.fdSelect,
+ subset_fdcount,
+ info.fd_select.size,
+ subset_fdselect_format,
+ subset_fdselect_ranges,
+ fdmap)))
+ return false;
+ }
+ else
+ fdmap.identity (1);
+
+ /* remove unused SIDs & reassign SIDs */
+ {
+ /* SIDs for name strings in dicts are added before glyph names so they fit in 16-bit int range */
+ if (unlikely (!collect_sids_in_dicts (acc)))
+ return false;
+ if (unlikely (sidmap.get_population () > 0x8000)) /* assumption: a dict won't reference that many strings */
+ return false;
+
+ if (subset_charset) plan_subset_charset (acc, plan);
+
+ topdict_mod.reassignSIDs (sidmap);
+ }
+
+ if (desubroutinize)
+ {
+ /* Flatten global & local subrs */
+ subr_flattener_t<const OT::cff1::accelerator_subset_t, cff1_cs_interp_env_t, cff1_cs_opset_flatten_t, OpCode_endchar>
+ flattener(acc, plan);
+ if (!flattener.flatten (subset_charstrings))
+ return false;
+ }
+ else
+ {
+ cff1_subr_subsetter_t subr_subsetter (acc, plan);
+
+ /* Subset subrs: collect used subroutines, leaving all unused ones behind */
+ if (!subr_subsetter.subset ())
+ return false;
+
+ /* encode charstrings, global subrs, local subrs with new subroutine numbers */
+ if (!subr_subsetter.encode_charstrings (subset_charstrings))
+ return false;
+
+ if (!subr_subsetter.encode_globalsubrs (subset_globalsubrs))
+ return false;
+
+ /* local subrs */
+ if (!subset_localsubrs.resize (orig_fdcount))
+ return false;
+ for (unsigned int fd = 0; fd < orig_fdcount; fd++)
+ {
+ subset_localsubrs[fd].init ();
+ if (fdmap.has (fd))
+ {
+ if (!subr_subsetter.encode_localsubrs (fd, subset_localsubrs[fd]))
+ return false;
+ }
+ }
+ }
+
+ /* Encoding */
+ if (subset_encoding)
+ plan_subset_encoding (acc, plan);
+
+ /* private dicts & local subrs */
+ if (!acc.is_CID ())
+ fontdicts_mod.push (cff1_font_dict_values_mod_t ());
+ else
+ {
+ + hb_iter (acc.fontDicts)
+ | hb_filter ([&] (const cff1_font_dict_values_t &_)
+ { return fdmap.has (&_ - &acc.fontDicts[0]); } )
+ | hb_map ([&] (const cff1_font_dict_values_t &_)
+ {
+ cff1_font_dict_values_mod_t mod;
+ mod.init (&_, sidmap[_.fontName]);
+ return mod;
+ })
+ | hb_sink (fontdicts_mod)
+ ;
+ }
+
+ return ((subset_charstrings.length == plan->num_output_glyphs ())
+ && (fontdicts_mod.length == subset_fdcount));
+ }
+
+ cff1_top_dict_values_mod_t topdict_mod;
+ cff1_sub_table_info_t info;
+
+ unsigned int num_glyphs;
+ unsigned int orig_fdcount;
+ unsigned int subset_fdcount;
+ unsigned int subset_fdselect_format;
+ hb_vector_t<code_pair_t> subset_fdselect_ranges;
+
+ /* font dict index remap table from fullset FDArray to subset FDArray.
+ * set to CFF_UNDEF_CODE if excluded from subset */
+ hb_inc_bimap_t fdmap;
+
+ str_buff_vec_t subset_charstrings;
+ str_buff_vec_t subset_globalsubrs;
+ hb_vector_t<str_buff_vec_t> subset_localsubrs;
+ hb_vector_t<cff1_font_dict_values_mod_t> fontdicts_mod;
+
+ bool drop_hints;
+
+ bool gid_renum;
+ bool subset_encoding;
+ uint8_t subset_enc_format;
+ unsigned int subset_enc_num_codes;
+ range_list_t subset_enc_code_ranges;
+ hb_vector_t<code_pair_t> subset_enc_supp_codes;
+
+ uint8_t subset_charset_format;
+ range_list_t subset_charset_ranges;
+ bool subset_charset;
+
+ remap_sid_t sidmap;
+ unsigned int topDictModSIDs[name_dict_values_t::ValCount];
+
+ bool desubroutinize;
+};
+
+static bool _serialize_cff1 (hb_serialize_context_t *c,
+ cff_subset_plan &plan,
+ const OT::cff1::accelerator_subset_t &acc,
+ unsigned int num_glyphs)
+{
+ /* private dicts & local subrs */
+ for (int i = (int)acc.privateDicts.length; --i >= 0 ;)
+ {
+ if (plan.fdmap.has (i))
+ {
+ objidx_t subrs_link = 0;
+ if (plan.subset_localsubrs[i].length > 0)
+ {
+ CFF1Subrs *dest = c->start_embed <CFF1Subrs> ();
+ if (unlikely (!dest)) return false;
+ c->push ();
+ if (likely (dest && dest->serialize (c, plan.subset_localsubrs[i])))
+ subrs_link = c->pop_pack ();
+ else
+ {
+ c->pop_discard ();
+ return false;
+ }
+ }
+
+ PrivateDict *pd = c->start_embed<PrivateDict> ();
+ if (unlikely (!pd)) return false;
+ c->push ();
+ cff_private_dict_op_serializer_t privSzr (plan.desubroutinize, plan.drop_hints);
+ /* N.B. local subrs immediately follows its corresponding private dict. i.e., subr offset == private dict size */
+ if (likely (pd->serialize (c, acc.privateDicts[i], privSzr, subrs_link)))
+ {
+ unsigned fd = plan.fdmap[i];
+ plan.fontdicts_mod[fd].privateDictInfo.size = c->length ();
+ plan.fontdicts_mod[fd].privateDictInfo.link = c->pop_pack ();
+ }
+ else
+ {
+ c->pop_discard ();
+ return false;
+ }
+ }
+ }
+
+ if (!acc.is_CID ())
+ plan.info.privateDictInfo = plan.fontdicts_mod[0].privateDictInfo;
+
+ /* CharStrings */
+ {
+ CFF1CharStrings *cs = c->start_embed<CFF1CharStrings> ();
+ if (unlikely (!cs)) return false;
+ c->push ();
+ if (likely (cs->serialize (c, plan.subset_charstrings)))
+ plan.info.char_strings_link = c->pop_pack ();
+ else
+ {
+ c->pop_discard ();
+ return false;
+ }
+ }
+
+ /* FDArray (FD Index) */
+ if (acc.fdArray != &Null (CFF1FDArray))
+ {
+ CFF1FDArray *fda = c->start_embed<CFF1FDArray> ();
+ if (unlikely (!fda)) return false;
+ c->push ();
+ cff1_font_dict_op_serializer_t fontSzr;
+ auto it = + hb_zip (+ hb_iter (plan.fontdicts_mod), + hb_iter (plan.fontdicts_mod));
+ if (likely (fda->serialize (c, it, fontSzr)))
+ plan.info.fd_array_link = c->pop_pack (false);
+ else
+ {
+ c->pop_discard ();
+ return false;
+ }
+ }
+
+ /* FDSelect */
+ if (acc.fdSelect != &Null (CFF1FDSelect))
+ {
+ c->push ();
+ if (likely (hb_serialize_cff_fdselect (c, num_glyphs, *acc.fdSelect, acc.fdCount,
+ plan.subset_fdselect_format, plan.info.fd_select.size,
+ plan.subset_fdselect_ranges)))
+ plan.info.fd_select.link = c->pop_pack ();
+ else
+ {
+ c->pop_discard ();
+ return false;
+ }
+ }
+
+ /* Charset */
+ if (plan.subset_charset)
+ {
+ Charset *dest = c->start_embed<Charset> ();
+ if (unlikely (!dest)) return false;
+ c->push ();
+ if (likely (dest->serialize (c,
+ plan.subset_charset_format,
+ plan.num_glyphs,
+ plan.subset_charset_ranges)))
+ plan.info.charset_link = c->pop_pack ();
+ else
+ {
+ c->pop_discard ();
+ return false;
+ }
+ }
+
+ /* Encoding */
+ if (plan.subset_encoding)
+ {
+ Encoding *dest = c->start_embed<Encoding> ();
+ if (unlikely (!dest)) return false;
+ c->push ();
+ if (likely (dest->serialize (c,
+ plan.subset_enc_format,
+ plan.subset_enc_num_codes,
+ plan.subset_enc_code_ranges,
+ plan.subset_enc_supp_codes)))
+ plan.info.encoding_link = c->pop_pack ();
+ else
+ {
+ c->pop_discard ();
+ return false;
+ }
+ }
+
+ /* global subrs */
+ {
+ c->push ();
+ CFF1Subrs *dest = c->start_embed <CFF1Subrs> ();
+ if (unlikely (!dest)) return false;
+ if (likely (dest->serialize (c, plan.subset_globalsubrs)))
+ c->pop_pack ();
+ else
+ {
+ c->pop_discard ();
+ return false;
+ }
+ }
+
+ /* String INDEX */
+ {
+ CFF1StringIndex *dest = c->start_embed<CFF1StringIndex> ();
+ if (unlikely (!dest)) return false;
+ c->push ();
+ if (likely (dest->serialize (c, *acc.stringIndex, plan.sidmap)))
+ c->pop_pack ();
+ else
+ {
+ c->pop_discard ();
+ return false;
+ }
+ }
+
+ OT::cff1 *cff = c->allocate_min<OT::cff1> ();
+ if (unlikely (!cff))
+ return false;
+
+ /* header */
+ cff->version.major = 0x01;
+ cff->version.minor = 0x00;
+ cff->nameIndex = cff->min_size;
+ cff->offSize = 4; /* unused? */
+
+ /* name INDEX */
+ if (unlikely (!(*acc.nameIndex).copy (c))) return false;
+
+ /* top dict INDEX */
+ {
+ /* serialize singleton TopDict */
+ TopDict *top = c->start_embed<TopDict> ();
+ if (!top) return false;
+ c->push ();
+ cff1_top_dict_op_serializer_t topSzr;
+ unsigned top_size = 0;
+ top_dict_modifiers_t modifier (plan.info, plan.topDictModSIDs);
+ if (likely (top->serialize (c, plan.topdict_mod, topSzr, modifier)))
+ {
+ top_size = c->length ();
+ c->pop_pack (false);
+ }
+ else
+ {
+ c->pop_discard ();
+ return false;
+ }
+ /* serialize INDEX header for above */
+ CFF1Index *dest = c->start_embed<CFF1Index> ();
+ if (!dest) return false;
+ return dest->serialize_header (c, hb_iter (hb_array_t<unsigned> (&top_size, 1)));
+ }
+}
+
+static bool
+_hb_subset_cff1 (const OT::cff1::accelerator_subset_t &acc,
+ hb_subset_context_t *c)
+{
+ cff_subset_plan cff_plan;
+
+ if (unlikely (!cff_plan.create (acc, c->plan)))
+ {
+ DEBUG_MSG(SUBSET, nullptr, "Failed to generate a cff subsetting plan.");
+ return false;
+ }
+
+ return _serialize_cff1 (c->serializer, cff_plan, acc, c->plan->num_output_glyphs ());
+}
+
+/**
+ * hb_subset_cff1:
+ * Subsets the CFF table according to a provided plan.
+ *
+ * Return value: subsetted cff table.
+ **/
+bool
+hb_subset_cff1 (hb_subset_context_t *c)
+{
+ OT::cff1::accelerator_subset_t acc;
+ acc.init (c->plan->source);
+ bool result = likely (acc.is_valid ()) && _hb_subset_cff1 (acc, c);
+ acc.fini ();
+
+ return result;
+}
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-subset-cff1.hh b/thirdparty/harfbuzz/src/hb-subset-cff1.hh
new file mode 100644
index 0000000000..aaf5def1ed
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-subset-cff1.hh
@@ -0,0 +1,37 @@
+/*
+ * Copyright © 2018 Adobe Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Adobe Author(s): Michiharu Ariza
+ */
+
+#ifndef HB_SUBSET_CFF1_HH
+#define HB_SUBSET_CFF1_HH
+
+#include "hb.hh"
+
+#include "hb-subset-plan.hh"
+
+HB_INTERNAL bool
+hb_subset_cff1 (hb_subset_context_t *c);
+
+#endif /* HB_SUBSET_CFF1_HH */
diff --git a/thirdparty/harfbuzz/src/hb-subset-cff2.cc b/thirdparty/harfbuzz/src/hb-subset-cff2.cc
new file mode 100644
index 0000000000..17ee040deb
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-subset-cff2.cc
@@ -0,0 +1,488 @@
+/*
+ * Copyright © 2018 Adobe Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Adobe Author(s): Michiharu Ariza
+ */
+
+#include "hb.hh"
+
+#ifndef HB_NO_SUBSET_CFF
+
+#include "hb-open-type.hh"
+#include "hb-ot-cff2-table.hh"
+#include "hb-set.h"
+#include "hb-subset-cff2.hh"
+#include "hb-subset-plan.hh"
+#include "hb-subset-cff-common.hh"
+#include "hb-cff2-interp-cs.hh"
+
+using namespace CFF;
+
+struct cff2_sub_table_info_t : cff_sub_table_info_t
+{
+ cff2_sub_table_info_t ()
+ : cff_sub_table_info_t (),
+ var_store_link (0)
+ {}
+
+ objidx_t var_store_link;
+};
+
+struct cff2_top_dict_op_serializer_t : cff_top_dict_op_serializer_t<>
+{
+ bool serialize (hb_serialize_context_t *c,
+ const op_str_t &opstr,
+ const cff2_sub_table_info_t &info) const
+ {
+ TRACE_SERIALIZE (this);
+
+ switch (opstr.op)
+ {
+ case OpCode_vstore:
+ return_trace (FontDict::serialize_link4_op(c, opstr.op, info.var_store_link));
+
+ default:
+ return_trace (cff_top_dict_op_serializer_t<>::serialize (c, opstr, info));
+ }
+ }
+};
+
+struct cff2_cs_opset_flatten_t : cff2_cs_opset_t<cff2_cs_opset_flatten_t, flatten_param_t>
+{
+ static void flush_args_and_op (op_code_t op, cff2_cs_interp_env_t &env, flatten_param_t& param)
+ {
+ switch (op)
+ {
+ case OpCode_return:
+ case OpCode_endchar:
+ /* dummy opcodes in CFF2. ignore */
+ break;
+
+ case OpCode_hstem:
+ case OpCode_hstemhm:
+ case OpCode_vstem:
+ case OpCode_vstemhm:
+ case OpCode_hintmask:
+ case OpCode_cntrmask:
+ if (param.drop_hints)
+ {
+ env.clear_args ();
+ return;
+ }
+ HB_FALLTHROUGH;
+
+ default:
+ SUPER::flush_args_and_op (op, env, param);
+ break;
+ }
+ }
+
+ static void flush_args (cff2_cs_interp_env_t &env, flatten_param_t& param)
+ {
+ for (unsigned int i = 0; i < env.argStack.get_count ();)
+ {
+ const blend_arg_t &arg = env.argStack[i];
+ if (arg.blending ())
+ {
+ if (unlikely (!((arg.numValues > 0) && (env.argStack.get_count () >= arg.numValues))))
+ {
+ env.set_error ();
+ return;
+ }
+ flatten_blends (arg, i, env, param);
+ i += arg.numValues;
+ }
+ else
+ {
+ str_encoder_t encoder (param.flatStr);
+ encoder.encode_num (arg);
+ i++;
+ }
+ }
+ SUPER::flush_args (env, param);
+ }
+
+ static void flatten_blends (const blend_arg_t &arg, unsigned int i, cff2_cs_interp_env_t &env, flatten_param_t& param)
+ {
+ /* flatten the default values */
+ str_encoder_t encoder (param.flatStr);
+ for (unsigned int j = 0; j < arg.numValues; j++)
+ {
+ const blend_arg_t &arg1 = env.argStack[i + j];
+ if (unlikely (!((arg1.blending () && (arg.numValues == arg1.numValues) && (arg1.valueIndex == j) &&
+ (arg1.deltas.length == env.get_region_count ())))))
+ {
+ env.set_error ();
+ return;
+ }
+ encoder.encode_num (arg1);
+ }
+ /* flatten deltas for each value */
+ for (unsigned int j = 0; j < arg.numValues; j++)
+ {
+ const blend_arg_t &arg1 = env.argStack[i + j];
+ for (unsigned int k = 0; k < arg1.deltas.length; k++)
+ encoder.encode_num (arg1.deltas[k]);
+ }
+ /* flatten the number of values followed by blend operator */
+ encoder.encode_int (arg.numValues);
+ encoder.encode_op (OpCode_blendcs);
+ }
+
+ static void flush_op (op_code_t op, cff2_cs_interp_env_t &env, flatten_param_t& param)
+ {
+ switch (op)
+ {
+ case OpCode_return:
+ case OpCode_endchar:
+ return;
+ default:
+ str_encoder_t encoder (param.flatStr);
+ encoder.encode_op (op);
+ }
+ }
+
+ private:
+ typedef cff2_cs_opset_t<cff2_cs_opset_flatten_t, flatten_param_t> SUPER;
+ typedef cs_opset_t<blend_arg_t, cff2_cs_opset_flatten_t, cff2_cs_opset_flatten_t, cff2_cs_interp_env_t, flatten_param_t> CSOPSET;
+};
+
+struct cff2_cs_opset_subr_subset_t : cff2_cs_opset_t<cff2_cs_opset_subr_subset_t, subr_subset_param_t>
+{
+ static void process_op (op_code_t op, cff2_cs_interp_env_t &env, subr_subset_param_t& param)
+ {
+ switch (op) {
+
+ case OpCode_return:
+ param.current_parsed_str->set_parsed ();
+ env.return_from_subr ();
+ param.set_current_str (env, false);
+ break;
+
+ case OpCode_endchar:
+ param.current_parsed_str->set_parsed ();
+ SUPER::process_op (op, env, param);
+ break;
+
+ case OpCode_callsubr:
+ process_call_subr (op, CSType_LocalSubr, env, param, env.localSubrs, param.local_closure);
+ break;
+
+ case OpCode_callgsubr:
+ process_call_subr (op, CSType_GlobalSubr, env, param, env.globalSubrs, param.global_closure);
+ break;
+
+ default:
+ SUPER::process_op (op, env, param);
+ param.current_parsed_str->add_op (op, env.str_ref);
+ break;
+ }
+ }
+
+ protected:
+ static void process_call_subr (op_code_t op, cs_type_t type,
+ cff2_cs_interp_env_t &env, subr_subset_param_t& param,
+ cff2_biased_subrs_t& subrs, hb_set_t *closure)
+ {
+ byte_str_ref_t str_ref = env.str_ref;
+ env.call_subr (subrs, type);
+ param.current_parsed_str->add_call_op (op, str_ref, env.context.subr_num);
+ closure->add (env.context.subr_num);
+ param.set_current_str (env, true);
+ }
+
+ private:
+ typedef cff2_cs_opset_t<cff2_cs_opset_subr_subset_t, subr_subset_param_t> SUPER;
+};
+
+struct cff2_subr_subsetter_t : subr_subsetter_t<cff2_subr_subsetter_t, CFF2Subrs, const OT::cff2::accelerator_subset_t, cff2_cs_interp_env_t, cff2_cs_opset_subr_subset_t>
+{
+ cff2_subr_subsetter_t (const OT::cff2::accelerator_subset_t &acc_, const hb_subset_plan_t *plan_)
+ : subr_subsetter_t (acc_, plan_) {}
+
+ static void complete_parsed_str (cff2_cs_interp_env_t &env, subr_subset_param_t& param, parsed_cs_str_t &charstring)
+ {
+ /* vsindex is inserted at the beginning of the charstring as necessary */
+ if (env.seen_vsindex ())
+ {
+ number_t ivs;
+ ivs.set_int ((int)env.get_ivs ());
+ charstring.set_prefix (ivs, OpCode_vsindexcs);
+ }
+ }
+};
+
+struct cff2_subset_plan {
+ cff2_subset_plan ()
+ : orig_fdcount (0),
+ subset_fdcount(1),
+ subset_fdselect_size (0),
+ subset_fdselect_format (0),
+ drop_hints (false),
+ desubroutinize (false)
+ {
+ subset_fdselect_ranges.init ();
+ fdmap.init ();
+ subset_charstrings.init ();
+ subset_globalsubrs.init ();
+ subset_localsubrs.init ();
+ }
+
+ ~cff2_subset_plan ()
+ {
+ subset_fdselect_ranges.fini ();
+ fdmap.fini ();
+ subset_charstrings.fini_deep ();
+ subset_globalsubrs.fini_deep ();
+ subset_localsubrs.fini_deep ();
+ }
+
+ bool create (const OT::cff2::accelerator_subset_t &acc,
+ hb_subset_plan_t *plan)
+ {
+ orig_fdcount = acc.fdArray->count;
+
+ drop_hints = plan->drop_hints;
+ desubroutinize = plan->desubroutinize;
+
+ if (desubroutinize)
+ {
+ /* Flatten global & local subrs */
+ subr_flattener_t<const OT::cff2::accelerator_subset_t, cff2_cs_interp_env_t, cff2_cs_opset_flatten_t>
+ flattener(acc, plan);
+ if (!flattener.flatten (subset_charstrings))
+ return false;
+ }
+ else
+ {
+ cff2_subr_subsetter_t subr_subsetter (acc, plan);
+
+ /* Subset subrs: collect used subroutines, leaving all unused ones behind */
+ if (!subr_subsetter.subset ())
+ return false;
+
+ /* encode charstrings, global subrs, local subrs with new subroutine numbers */
+ if (!subr_subsetter.encode_charstrings (subset_charstrings))
+ return false;
+
+ if (!subr_subsetter.encode_globalsubrs (subset_globalsubrs))
+ return false;
+
+ /* local subrs */
+ if (!subset_localsubrs.resize (orig_fdcount))
+ return false;
+ for (unsigned int fd = 0; fd < orig_fdcount; fd++)
+ {
+ subset_localsubrs[fd].init ();
+ if (!subr_subsetter.encode_localsubrs (fd, subset_localsubrs[fd]))
+ return false;
+ }
+ }
+
+ /* FDSelect */
+ if (acc.fdSelect != &Null (CFF2FDSelect))
+ {
+ if (unlikely (!hb_plan_subset_cff_fdselect (plan,
+ orig_fdcount,
+ *(const FDSelect *)acc.fdSelect,
+ subset_fdcount,
+ subset_fdselect_size,
+ subset_fdselect_format,
+ subset_fdselect_ranges,
+ fdmap)))
+ return false;
+ }
+ else
+ fdmap.identity (1);
+
+ return true;
+ }
+
+ cff2_sub_table_info_t info;
+
+ unsigned int orig_fdcount;
+ unsigned int subset_fdcount;
+ unsigned int subset_fdselect_size;
+ unsigned int subset_fdselect_format;
+ hb_vector_t<code_pair_t> subset_fdselect_ranges;
+
+ hb_inc_bimap_t fdmap;
+
+ str_buff_vec_t subset_charstrings;
+ str_buff_vec_t subset_globalsubrs;
+ hb_vector_t<str_buff_vec_t> subset_localsubrs;
+
+ bool drop_hints;
+ bool desubroutinize;
+};
+
+static bool _serialize_cff2 (hb_serialize_context_t *c,
+ cff2_subset_plan &plan,
+ const OT::cff2::accelerator_subset_t &acc,
+ unsigned int num_glyphs)
+{
+ /* private dicts & local subrs */
+ hb_vector_t<table_info_t> private_dict_infos;
+ if (unlikely (!private_dict_infos.resize (plan.subset_fdcount))) return false;
+
+ for (int i = (int)acc.privateDicts.length; --i >= 0 ;)
+ {
+ if (plan.fdmap.has (i))
+ {
+ objidx_t subrs_link = 0;
+
+ if (plan.subset_localsubrs[i].length > 0)
+ {
+ CFF2Subrs *dest = c->start_embed <CFF2Subrs> ();
+ if (unlikely (!dest)) return false;
+ c->push ();
+ if (likely (dest->serialize (c, plan.subset_localsubrs[i])))
+ subrs_link = c->pop_pack ();
+ else
+ {
+ c->pop_discard ();
+ return false;
+ }
+ }
+ PrivateDict *pd = c->start_embed<PrivateDict> ();
+ if (unlikely (!pd)) return false;
+ c->push ();
+ cff_private_dict_op_serializer_t privSzr (plan.desubroutinize, plan.drop_hints);
+ if (likely (pd->serialize (c, acc.privateDicts[i], privSzr, subrs_link)))
+ {
+ unsigned fd = plan.fdmap[i];
+ private_dict_infos[fd].size = c->length ();
+ private_dict_infos[fd].link = c->pop_pack ();
+ }
+ else
+ {
+ c->pop_discard ();
+ return false;
+ }
+ }
+ }
+
+ /* CharStrings */
+ {
+ CFF2CharStrings *cs = c->start_embed<CFF2CharStrings> ();
+ if (unlikely (!cs)) return false;
+ c->push ();
+ if (likely (cs->serialize (c, plan.subset_charstrings)))
+ plan.info.char_strings_link = c->pop_pack ();
+ else
+ {
+ c->pop_discard ();
+ return false;
+ }
+ }
+
+ /* FDSelect */
+ if (acc.fdSelect != &Null (CFF2FDSelect))
+ {
+ c->push ();
+ if (likely (hb_serialize_cff_fdselect (c, num_glyphs, *(const FDSelect *)acc.fdSelect, plan.orig_fdcount,
+ plan.subset_fdselect_format, plan.subset_fdselect_size,
+ plan.subset_fdselect_ranges)))
+ plan.info.fd_select.link = c->pop_pack ();
+ else
+ {
+ c->pop_discard ();
+ return false;
+ }
+ }
+
+ /* FDArray (FD Index) */
+ {
+ c->push ();
+ CFF2FDArray *fda = c->start_embed<CFF2FDArray> ();
+ if (unlikely (!fda)) return false;
+ cff_font_dict_op_serializer_t fontSzr;
+ auto it =
+ + hb_zip (+ hb_iter (acc.fontDicts)
+ | hb_filter ([&] (const cff2_font_dict_values_t &_)
+ { return plan.fdmap.has (&_ - &acc.fontDicts[0]); }),
+ hb_iter (private_dict_infos))
+ ;
+ if (unlikely (!fda->serialize (c, it, fontSzr))) return false;
+ plan.info.fd_array_link = c->pop_pack ();
+ }
+
+ /* variation store */
+ if (acc.varStore != &Null (CFF2VariationStore))
+ {
+ c->push ();
+ CFF2VariationStore *dest = c->start_embed<CFF2VariationStore> ();
+ if (unlikely (!dest || !dest->serialize (c, acc.varStore))) return false;
+ plan.info.var_store_link = c->pop_pack ();
+ }
+
+ OT::cff2 *cff2 = c->allocate_min<OT::cff2> ();
+ if (unlikely (!cff2)) return false;
+
+ /* header */
+ cff2->version.major = 0x02;
+ cff2->version.minor = 0x00;
+ cff2->topDict = OT::cff2::static_size;
+
+ /* top dict */
+ {
+ TopDict &dict = cff2 + cff2->topDict;
+ cff2_top_dict_op_serializer_t topSzr;
+ if (unlikely (!dict.serialize (c, acc.topDict, topSzr, plan.info))) return false;
+ cff2->topDictSize = c->head - (const char *)&dict;
+ }
+
+ /* global subrs */
+ {
+ CFF2Subrs *dest = c->start_embed <CFF2Subrs> ();
+ if (unlikely (!dest)) return false;
+ return dest->serialize (c, plan.subset_globalsubrs);
+ }
+}
+
+static bool
+_hb_subset_cff2 (const OT::cff2::accelerator_subset_t &acc,
+ hb_subset_context_t *c)
+{
+ cff2_subset_plan cff2_plan;
+
+ if (unlikely (!cff2_plan.create (acc, c->plan))) return false;
+ return _serialize_cff2 (c->serializer, cff2_plan, acc, c->plan->num_output_glyphs ());
+}
+
+/**
+ * hb_subset_cff2:
+ * Subsets the CFF2 table according to a provided subset context.
+ **/
+bool
+hb_subset_cff2 (hb_subset_context_t *c)
+{
+ OT::cff2::accelerator_subset_t acc;
+ acc.init (c->plan->source);
+ bool result = likely (acc.is_valid ()) && _hb_subset_cff2 (acc, c);
+ acc.fini ();
+
+ return result;
+}
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-subset-cff2.hh b/thirdparty/harfbuzz/src/hb-subset-cff2.hh
new file mode 100644
index 0000000000..f10556ddd7
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-subset-cff2.hh
@@ -0,0 +1,37 @@
+/*
+ * Copyright © 2018 Adobe Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Adobe Author(s): Michiharu Ariza
+ */
+
+#ifndef HB_SUBSET_CFF2_HH
+#define HB_SUBSET_CFF2_HH
+
+#include "hb.hh"
+
+#include "hb-subset-plan.hh"
+
+HB_INTERNAL bool
+hb_subset_cff2 (hb_subset_context_t *c);
+
+#endif /* HB_SUBSET_CFF2_HH */
diff --git a/thirdparty/harfbuzz/src/hb-subset-input.cc b/thirdparty/harfbuzz/src/hb-subset-input.cc
new file mode 100644
index 0000000000..fe9be3ce02
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-subset-input.cc
@@ -0,0 +1,229 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Garret Rieger, Rod Sheeter, Behdad Esfahbod
+ */
+
+#include "hb-subset.hh"
+#include "hb-set.hh"
+
+/**
+ * hb_subset_input_create_or_fail:
+ *
+ * Return value: New subset input.
+ *
+ * Since: 1.8.0
+ **/
+hb_subset_input_t *
+hb_subset_input_create_or_fail ()
+{
+ hb_subset_input_t *input = hb_object_create<hb_subset_input_t>();
+
+ if (unlikely (!input))
+ return nullptr;
+
+ input->unicodes = hb_set_create ();
+ input->glyphs = hb_set_create ();
+ input->name_ids = hb_set_create ();
+ hb_set_add_range (input->name_ids, 0, 6);
+ input->name_languages = hb_set_create ();
+ hb_set_add (input->name_languages, 0x0409);
+ input->drop_tables = hb_set_create ();
+ input->drop_hints = false;
+ input->desubroutinize = false;
+ input->retain_gids = false;
+ input->name_legacy = false;
+
+ hb_tag_t default_drop_tables[] = {
+ // Layout disabled by default
+ HB_TAG ('G', 'S', 'U', 'B'),
+ HB_TAG ('G', 'P', 'O', 'S'),
+ HB_TAG ('G', 'D', 'E', 'F'),
+ HB_TAG ('m', 'o', 'r', 'x'),
+ HB_TAG ('m', 'o', 'r', 't'),
+ HB_TAG ('k', 'e', 'r', 'x'),
+ HB_TAG ('k', 'e', 'r', 'n'),
+
+ // Copied from fontTools:
+ HB_TAG ('B', 'A', 'S', 'E'),
+ HB_TAG ('J', 'S', 'T', 'F'),
+ HB_TAG ('D', 'S', 'I', 'G'),
+ HB_TAG ('E', 'B', 'D', 'T'),
+ HB_TAG ('E', 'B', 'L', 'C'),
+ HB_TAG ('E', 'B', 'S', 'C'),
+ HB_TAG ('S', 'V', 'G', ' '),
+ HB_TAG ('P', 'C', 'L', 'T'),
+ HB_TAG ('L', 'T', 'S', 'H'),
+ // Graphite tables
+ HB_TAG ('F', 'e', 'a', 't'),
+ HB_TAG ('G', 'l', 'a', 't'),
+ HB_TAG ('G', 'l', 'o', 'c'),
+ HB_TAG ('S', 'i', 'l', 'f'),
+ HB_TAG ('S', 'i', 'l', 'l'),
+ };
+
+ input->drop_tables->add_array (default_drop_tables, ARRAY_LENGTH (default_drop_tables));
+
+ return input;
+}
+
+/**
+ * hb_subset_input_reference: (skip)
+ * @subset_input: a subset_input.
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 1.8.0
+ **/
+hb_subset_input_t *
+hb_subset_input_reference (hb_subset_input_t *subset_input)
+{
+ return hb_object_reference (subset_input);
+}
+
+/**
+ * hb_subset_input_destroy:
+ * @subset_input: a subset_input.
+ *
+ * Since: 1.8.0
+ **/
+void
+hb_subset_input_destroy (hb_subset_input_t *subset_input)
+{
+ if (!hb_object_destroy (subset_input)) return;
+
+ hb_set_destroy (subset_input->unicodes);
+ hb_set_destroy (subset_input->glyphs);
+ hb_set_destroy (subset_input->name_ids);
+ hb_set_destroy (subset_input->name_languages);
+ hb_set_destroy (subset_input->drop_tables);
+
+ free (subset_input);
+}
+
+/**
+ * hb_subset_input_unicode_set:
+ * @subset_input: a subset_input.
+ *
+ * Since: 1.8.0
+ **/
+HB_EXTERN hb_set_t *
+hb_subset_input_unicode_set (hb_subset_input_t *subset_input)
+{
+ return subset_input->unicodes;
+}
+
+/**
+ * hb_subset_input_glyph_set:
+ * @subset_input: a subset_input.
+ *
+ * Since: 1.8.0
+ **/
+HB_EXTERN hb_set_t *
+hb_subset_input_glyph_set (hb_subset_input_t *subset_input)
+{
+ return subset_input->glyphs;
+}
+
+HB_EXTERN hb_set_t *
+hb_subset_input_nameid_set (hb_subset_input_t *subset_input)
+{
+ return subset_input->name_ids;
+}
+
+HB_EXTERN hb_set_t *
+hb_subset_input_namelangid_set (hb_subset_input_t *subset_input)
+{
+ return subset_input->name_languages;
+}
+
+HB_EXTERN hb_set_t *
+hb_subset_input_drop_tables_set (hb_subset_input_t *subset_input)
+{
+ return subset_input->drop_tables;
+}
+
+HB_EXTERN void
+hb_subset_input_set_drop_hints (hb_subset_input_t *subset_input,
+ hb_bool_t drop_hints)
+{
+ subset_input->drop_hints = drop_hints;
+}
+
+HB_EXTERN hb_bool_t
+hb_subset_input_get_drop_hints (hb_subset_input_t *subset_input)
+{
+ return subset_input->drop_hints;
+}
+
+HB_EXTERN void
+hb_subset_input_set_desubroutinize (hb_subset_input_t *subset_input,
+ hb_bool_t desubroutinize)
+{
+ subset_input->desubroutinize = desubroutinize;
+}
+
+HB_EXTERN hb_bool_t
+hb_subset_input_get_desubroutinize (hb_subset_input_t *subset_input)
+{
+ return subset_input->desubroutinize;
+}
+
+/**
+ * hb_subset_input_set_retain_gids:
+ * @subset_input: a subset_input.
+ * @retain_gids: If true the subsetter will not renumber glyph ids.
+ * Since: 2.4.0
+ **/
+HB_EXTERN void
+hb_subset_input_set_retain_gids (hb_subset_input_t *subset_input,
+ hb_bool_t retain_gids)
+{
+ subset_input->retain_gids = retain_gids;
+}
+
+/**
+ * hb_subset_input_get_retain_gids:
+ * Returns: value of retain_gids.
+ * Since: 2.4.0
+ **/
+HB_EXTERN hb_bool_t
+hb_subset_input_get_retain_gids (hb_subset_input_t *subset_input)
+{
+ return subset_input->retain_gids;
+}
+
+HB_EXTERN void
+hb_subset_input_set_name_legacy (hb_subset_input_t *subset_input,
+ hb_bool_t name_legacy)
+{
+ subset_input->name_legacy = name_legacy;
+}
+
+HB_EXTERN hb_bool_t
+hb_subset_input_get_name_legacy (hb_subset_input_t *subset_input)
+{
+ return subset_input->name_legacy;
+}
diff --git a/thirdparty/harfbuzz/src/hb-subset-input.hh b/thirdparty/harfbuzz/src/hb-subset-input.hh
new file mode 100644
index 0000000000..0aeb96695b
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-subset-input.hh
@@ -0,0 +1,61 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Garret Rieger, Roderick Sheeter
+ */
+
+#ifndef HB_SUBSET_INPUT_HH
+#define HB_SUBSET_INPUT_HH
+
+
+#include "hb.hh"
+
+#include "hb-subset.h"
+
+#include "hb-font.hh"
+
+struct hb_subset_input_t
+{
+ hb_object_header_t header;
+
+ hb_set_t *unicodes;
+ hb_set_t *glyphs;
+ hb_set_t *name_ids;
+ hb_set_t *name_languages;
+ hb_set_t *drop_tables;
+
+ bool drop_hints;
+ bool desubroutinize;
+ bool retain_gids;
+ bool name_legacy;
+ /* TODO
+ *
+ * features
+ * lookups
+ * name_ids
+ * ...
+ */
+};
+
+
+#endif /* HB_SUBSET_INPUT_HH */
diff --git a/thirdparty/harfbuzz/src/hb-subset-plan.cc b/thirdparty/harfbuzz/src/hb-subset-plan.cc
new file mode 100644
index 0000000000..24beada3e8
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-subset-plan.cc
@@ -0,0 +1,395 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Garret Rieger, Roderick Sheeter
+ */
+
+#include "hb-subset-plan.hh"
+#include "hb-map.hh"
+#include "hb-set.hh"
+
+#include "hb-ot-cmap-table.hh"
+#include "hb-ot-glyf-table.hh"
+#include "hb-ot-layout-gdef-table.hh"
+#include "hb-ot-layout-gpos-table.hh"
+#include "hb-ot-layout-gsub-table.hh"
+#include "hb-ot-cff1-table.hh"
+#include "hb-ot-color-colr-table.hh"
+#include "hb-ot-var-fvar-table.hh"
+#include "hb-ot-stat-table.hh"
+
+
+#ifndef HB_NO_SUBSET_CFF
+static inline void
+_add_cff_seac_components (const OT::cff1::accelerator_t &cff,
+ hb_codepoint_t gid,
+ hb_set_t *gids_to_retain)
+{
+ hb_codepoint_t base_gid, accent_gid;
+ if (cff.get_seac_components (gid, &base_gid, &accent_gid))
+ {
+ gids_to_retain->add (base_gid);
+ gids_to_retain->add (accent_gid);
+ }
+}
+#endif
+
+#ifndef HB_NO_SUBSET_LAYOUT
+static void
+_remap_indexes (const hb_set_t *indexes,
+ hb_map_t *mapping /* OUT */)
+{
+ unsigned count = indexes->get_population ();
+
+ for (auto _ : + hb_zip (indexes->iter (), hb_range (count)))
+ mapping->set (_.first, _.second);
+
+}
+
+static inline void
+_gsub_closure_glyphs_lookups_features (hb_face_t *face,
+ hb_set_t *gids_to_retain,
+ hb_map_t *gsub_lookups,
+ hb_map_t *gsub_features)
+{
+ hb_set_t lookup_indices;
+ hb_ot_layout_collect_lookups (face,
+ HB_OT_TAG_GSUB,
+ nullptr,
+ nullptr,
+ nullptr,
+ &lookup_indices);
+ hb_ot_layout_lookups_substitute_closure (face,
+ &lookup_indices,
+ gids_to_retain);
+ hb_blob_ptr_t<OT::GSUB> gsub = hb_sanitize_context_t ().reference_table<OT::GSUB> (face);
+ gsub->closure_lookups (face,
+ gids_to_retain,
+ &lookup_indices);
+ _remap_indexes (&lookup_indices, gsub_lookups);
+
+ //closure features
+ hb_set_t feature_indices;
+ gsub->closure_features (gsub_lookups, &feature_indices);
+ _remap_indexes (&feature_indices, gsub_features);
+ gsub.destroy ();
+}
+
+static inline void
+_gpos_closure_lookups_features (hb_face_t *face,
+ const hb_set_t *gids_to_retain,
+ hb_map_t *gpos_lookups,
+ hb_map_t *gpos_features)
+{
+ hb_set_t lookup_indices;
+ hb_ot_layout_collect_lookups (face,
+ HB_OT_TAG_GPOS,
+ nullptr,
+ nullptr,
+ nullptr,
+ &lookup_indices);
+ hb_blob_ptr_t<OT::GPOS> gpos = hb_sanitize_context_t ().reference_table<OT::GPOS> (face);
+ gpos->closure_lookups (face,
+ gids_to_retain,
+ &lookup_indices);
+ _remap_indexes (&lookup_indices, gpos_lookups);
+
+ //closure features
+ hb_set_t feature_indices;
+ gpos->closure_features (gpos_lookups, &feature_indices);
+ _remap_indexes (&feature_indices, gpos_features);
+ gpos.destroy ();
+}
+#endif
+
+#ifndef HB_NO_VAR
+static inline void
+ _collect_layout_variation_indices (hb_face_t *face,
+ const hb_set_t *glyphset,
+ const hb_map_t *gpos_lookups,
+ hb_set_t *layout_variation_indices,
+ hb_map_t *layout_variation_idx_map)
+{
+ hb_blob_ptr_t<OT::GDEF> gdef = hb_sanitize_context_t ().reference_table<OT::GDEF> (face);
+ hb_blob_ptr_t<OT::GPOS> gpos = hb_sanitize_context_t ().reference_table<OT::GPOS> (face);
+
+ if (!gdef->has_data ())
+ {
+ gdef.destroy ();
+ gpos.destroy ();
+ return;
+ }
+ OT::hb_collect_variation_indices_context_t c (layout_variation_indices, glyphset, gpos_lookups);
+ gdef->collect_variation_indices (&c);
+
+ if (hb_ot_layout_has_positioning (face))
+ gpos->collect_variation_indices (&c);
+
+ gdef->remap_layout_variation_indices (layout_variation_indices, layout_variation_idx_map);
+
+ gdef.destroy ();
+ gpos.destroy ();
+}
+#endif
+
+static inline void
+_cmap_closure (hb_face_t *face,
+ const hb_set_t *unicodes,
+ hb_set_t *glyphset)
+{
+ OT::cmap::accelerator_t cmap;
+ cmap.init (face);
+ cmap.table->closure_glyphs (unicodes, glyphset);
+ cmap.fini ();
+}
+
+static inline void
+_remove_invalid_gids (hb_set_t *glyphs,
+ unsigned int num_glyphs)
+{
+ hb_codepoint_t gid = HB_SET_VALUE_INVALID;
+ while (glyphs->next (&gid))
+ {
+ if (gid >= num_glyphs)
+ glyphs->del (gid);
+ }
+}
+
+static void
+_populate_gids_to_retain (hb_subset_plan_t* plan,
+ const hb_set_t *unicodes,
+ const hb_set_t *input_glyphs_to_retain,
+ bool close_over_gsub,
+ bool close_over_gpos,
+ bool close_over_gdef)
+{
+ OT::cmap::accelerator_t cmap;
+ OT::glyf::accelerator_t glyf;
+#ifndef HB_NO_SUBSET_CFF
+ OT::cff1::accelerator_t cff;
+#endif
+ OT::COLR::accelerator_t colr;
+ cmap.init (plan->source);
+ glyf.init (plan->source);
+#ifndef HB_NO_SUBSET_CFF
+ cff.init (plan->source);
+#endif
+ colr.init (plan->source);
+
+ plan->_glyphset_gsub->add (0); // Not-def
+ hb_set_union (plan->_glyphset_gsub, input_glyphs_to_retain);
+
+ hb_codepoint_t cp = HB_SET_VALUE_INVALID;
+ while (unicodes->next (&cp))
+ {
+ hb_codepoint_t gid;
+ if (!cmap.get_nominal_glyph (cp, &gid))
+ {
+ DEBUG_MSG(SUBSET, nullptr, "Drop U+%04X; no gid", cp);
+ continue;
+ }
+ plan->unicodes->add (cp);
+ plan->codepoint_to_glyph->set (cp, gid);
+ plan->_glyphset_gsub->add (gid);
+ }
+
+ _cmap_closure (plan->source, plan->unicodes, plan->_glyphset_gsub);
+
+#ifndef HB_NO_SUBSET_LAYOUT
+ if (close_over_gsub)
+ // closure all glyphs/lookups/features needed for GSUB substitutions.
+ _gsub_closure_glyphs_lookups_features (plan->source, plan->_glyphset_gsub, plan->gsub_lookups, plan->gsub_features);
+
+ if (close_over_gpos)
+ _gpos_closure_lookups_features (plan->source, plan->_glyphset_gsub, plan->gpos_lookups, plan->gpos_features);
+#endif
+ _remove_invalid_gids (plan->_glyphset_gsub, plan->source->get_num_glyphs ());
+
+ // Populate a full set of glyphs to retain by adding all referenced
+ // composite glyphs.
+ hb_codepoint_t gid = HB_SET_VALUE_INVALID;
+ while (plan->_glyphset_gsub->next (&gid))
+ {
+ glyf.add_gid_and_children (gid, plan->_glyphset);
+#ifndef HB_NO_SUBSET_CFF
+ if (cff.is_valid ())
+ _add_cff_seac_components (cff, gid, plan->_glyphset);
+#endif
+ if (colr.is_valid ())
+ colr.closure_glyphs (gid, plan->_glyphset);
+ }
+
+ _remove_invalid_gids (plan->_glyphset, plan->source->get_num_glyphs ());
+
+#ifndef HB_NO_VAR
+ if (close_over_gdef)
+ _collect_layout_variation_indices (plan->source, plan->_glyphset, plan->gpos_lookups, plan->layout_variation_indices, plan->layout_variation_idx_map);
+#endif
+
+#ifndef HB_NO_SUBSET_CFF
+ cff.fini ();
+#endif
+ glyf.fini ();
+ cmap.fini ();
+}
+
+static void
+_create_old_gid_to_new_gid_map (const hb_face_t *face,
+ bool retain_gids,
+ const hb_set_t *all_gids_to_retain,
+ hb_map_t *glyph_map, /* OUT */
+ hb_map_t *reverse_glyph_map, /* OUT */
+ unsigned int *num_glyphs /* OUT */)
+{
+ if (!retain_gids)
+ {
+ + hb_enumerate (hb_iter (all_gids_to_retain), (hb_codepoint_t) 0)
+ | hb_sink (reverse_glyph_map)
+ ;
+ *num_glyphs = reverse_glyph_map->get_population ();
+ } else {
+ + hb_iter (all_gids_to_retain)
+ | hb_map ([] (hb_codepoint_t _) {
+ return hb_pair_t<hb_codepoint_t, hb_codepoint_t> (_, _);
+ })
+ | hb_sink (reverse_glyph_map)
+ ;
+
+ unsigned max_glyph =
+ + hb_iter (all_gids_to_retain)
+ | hb_reduce (hb_max, 0u)
+ ;
+ *num_glyphs = max_glyph + 1;
+ }
+
+ + reverse_glyph_map->iter ()
+ | hb_map (&hb_pair_t<hb_codepoint_t, hb_codepoint_t>::reverse)
+ | hb_sink (glyph_map)
+ ;
+}
+
+static void
+_nameid_closure (hb_face_t *face,
+ hb_set_t *nameids)
+{
+#ifndef HB_NO_STYLE
+ face->table.STAT->collect_name_ids (nameids);
+#endif
+#ifndef HB_NO_VAR
+ face->table.fvar->collect_name_ids (nameids);
+#endif
+}
+
+/**
+ * hb_subset_plan_create:
+ * Computes a plan for subsetting the supplied face according
+ * to a provided input. The plan describes
+ * which tables and glyphs should be retained.
+ *
+ * Return value: New subset plan.
+ *
+ * Since: 1.7.5
+ **/
+hb_subset_plan_t *
+hb_subset_plan_create (hb_face_t *face,
+ hb_subset_input_t *input)
+{
+ hb_subset_plan_t *plan;
+ if (unlikely (!(plan = hb_object_create<hb_subset_plan_t> ())))
+ return const_cast<hb_subset_plan_t *> (&Null (hb_subset_plan_t));
+
+ plan->successful = true;
+ plan->drop_hints = input->drop_hints;
+ plan->desubroutinize = input->desubroutinize;
+ plan->retain_gids = input->retain_gids;
+ plan->name_legacy = input->name_legacy;
+ plan->unicodes = hb_set_create ();
+ plan->name_ids = hb_set_reference (input->name_ids);
+ _nameid_closure (face, plan->name_ids);
+ plan->name_languages = hb_set_reference (input->name_languages);
+ plan->glyphs_requested = hb_set_reference (input->glyphs);
+ plan->drop_tables = hb_set_reference (input->drop_tables);
+ plan->source = hb_face_reference (face);
+ plan->dest = hb_face_builder_create ();
+
+ plan->_glyphset = hb_set_create ();
+ plan->_glyphset_gsub = hb_set_create ();
+ plan->codepoint_to_glyph = hb_map_create ();
+ plan->glyph_map = hb_map_create ();
+ plan->reverse_glyph_map = hb_map_create ();
+ plan->gsub_lookups = hb_map_create ();
+ plan->gpos_lookups = hb_map_create ();
+ plan->gsub_features = hb_map_create ();
+ plan->gpos_features = hb_map_create ();
+ plan->layout_variation_indices = hb_set_create ();
+ plan->layout_variation_idx_map = hb_map_create ();
+
+ _populate_gids_to_retain (plan,
+ input->unicodes,
+ input->glyphs,
+ !input->drop_tables->has (HB_OT_TAG_GSUB),
+ !input->drop_tables->has (HB_OT_TAG_GPOS),
+ !input->drop_tables->has (HB_OT_TAG_GDEF));
+
+ _create_old_gid_to_new_gid_map (face,
+ input->retain_gids,
+ plan->_glyphset,
+ plan->glyph_map,
+ plan->reverse_glyph_map,
+ &plan->_num_output_glyphs);
+
+ return plan;
+}
+
+/**
+ * hb_subset_plan_destroy:
+ *
+ * Since: 1.7.5
+ **/
+void
+hb_subset_plan_destroy (hb_subset_plan_t *plan)
+{
+ if (!hb_object_destroy (plan)) return;
+
+ hb_set_destroy (plan->unicodes);
+ hb_set_destroy (plan->name_ids);
+ hb_set_destroy (plan->name_languages);
+ hb_set_destroy (plan->glyphs_requested);
+ hb_set_destroy (plan->drop_tables);
+ hb_face_destroy (plan->source);
+ hb_face_destroy (plan->dest);
+ hb_map_destroy (plan->codepoint_to_glyph);
+ hb_map_destroy (plan->glyph_map);
+ hb_map_destroy (plan->reverse_glyph_map);
+ hb_set_destroy (plan->_glyphset);
+ hb_set_destroy (plan->_glyphset_gsub);
+ hb_map_destroy (plan->gsub_lookups);
+ hb_map_destroy (plan->gpos_lookups);
+ hb_map_destroy (plan->gsub_features);
+ hb_map_destroy (plan->gpos_features);
+ hb_set_destroy (plan->layout_variation_indices);
+ hb_map_destroy (plan->layout_variation_idx_map);
+
+
+ free (plan);
+}
diff --git a/thirdparty/harfbuzz/src/hb-subset-plan.hh b/thirdparty/harfbuzz/src/hb-subset-plan.hh
new file mode 100644
index 0000000000..e9f603dd1d
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-subset-plan.hh
@@ -0,0 +1,194 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Garret Rieger, Roderick Sheeter
+ */
+
+#ifndef HB_SUBSET_PLAN_HH
+#define HB_SUBSET_PLAN_HH
+
+#include "hb.hh"
+
+#include "hb-subset.h"
+#include "hb-subset-input.hh"
+
+#include "hb-map.hh"
+#include "hb-set.hh"
+
+struct hb_subset_plan_t
+{
+ hb_object_header_t header;
+
+ bool successful : 1;
+ bool drop_hints : 1;
+ bool desubroutinize : 1;
+ bool retain_gids : 1;
+ bool name_legacy : 1;
+
+ // For each cp that we'd like to retain maps to the corresponding gid.
+ hb_set_t *unicodes;
+
+ // name_ids we would like to retain
+ hb_set_t *name_ids;
+
+ // name_languages we would like to retain
+ hb_set_t *name_languages;
+
+ //glyph ids requested to retain
+ hb_set_t *glyphs_requested;
+
+ // Tables which should be dropped.
+ hb_set_t *drop_tables;
+
+ // The glyph subset
+ hb_map_t *codepoint_to_glyph;
+
+ // Old -> New glyph id mapping
+ hb_map_t *glyph_map;
+ hb_map_t *reverse_glyph_map;
+
+ // Plan is only good for a specific source/dest so keep them with it
+ hb_face_t *source;
+ hb_face_t *dest;
+
+ unsigned int _num_output_glyphs;
+ hb_set_t *_glyphset;
+ hb_set_t *_glyphset_gsub;
+
+ //active lookups we'd like to retain
+ hb_map_t *gsub_lookups;
+ hb_map_t *gpos_lookups;
+
+ //active features we'd like to retain
+ hb_map_t *gsub_features;
+ hb_map_t *gpos_features;
+
+ //The set of layout item variation store delta set indices to be retained
+ hb_set_t *layout_variation_indices;
+ //Old -> New layout item variation store delta set index mapping
+ hb_map_t *layout_variation_idx_map;
+
+ public:
+
+ bool in_error () const { return !successful; }
+
+ bool check_success(bool success)
+ {
+ successful = (successful && success);
+ return successful;
+ }
+
+ /*
+ * The set of input glyph ids which will be retained in the subset.
+ * Does NOT include ids kept due to retain_gids. You probably want to use
+ * glyph_map/reverse_glyph_map.
+ */
+ inline const hb_set_t *
+ glyphset () const
+ {
+ return _glyphset;
+ }
+
+ /*
+ * The set of input glyph ids which will be retained in the subset.
+ */
+ inline const hb_set_t *
+ glyphset_gsub () const
+ {
+ return _glyphset_gsub;
+ }
+
+ /*
+ * The total number of output glyphs in the final subset.
+ */
+ inline unsigned int
+ num_output_glyphs () const
+ {
+ return _num_output_glyphs;
+ }
+
+ /*
+ * Given an output gid , returns true if that glyph id is an empty
+ * glyph (ie. it's a gid that we are dropping all data for).
+ */
+ inline bool is_empty_glyph (hb_codepoint_t gid) const
+ {
+ return !_glyphset->has (gid);
+ }
+
+ inline bool new_gid_for_codepoint (hb_codepoint_t codepoint,
+ hb_codepoint_t *new_gid) const
+ {
+ hb_codepoint_t old_gid = codepoint_to_glyph->get (codepoint);
+ if (old_gid == HB_MAP_VALUE_INVALID)
+ return false;
+
+ return new_gid_for_old_gid (old_gid, new_gid);
+ }
+
+ inline bool new_gid_for_old_gid (hb_codepoint_t old_gid,
+ hb_codepoint_t *new_gid) const
+ {
+ hb_codepoint_t gid = glyph_map->get (old_gid);
+ if (gid == HB_MAP_VALUE_INVALID)
+ return false;
+
+ *new_gid = gid;
+ return true;
+ }
+
+ inline bool old_gid_for_new_gid (hb_codepoint_t new_gid,
+ hb_codepoint_t *old_gid) const
+ {
+ hb_codepoint_t gid = reverse_glyph_map->get (new_gid);
+ if (gid == HB_MAP_VALUE_INVALID)
+ return false;
+
+ *old_gid = gid;
+ return true;
+ }
+
+ inline bool
+ add_table (hb_tag_t tag,
+ hb_blob_t *contents)
+ {
+ hb_blob_t *source_blob = source->reference_table (tag);
+ DEBUG_MSG(SUBSET, nullptr, "add table %c%c%c%c, dest %d bytes, source %d bytes",
+ HB_UNTAG(tag),
+ hb_blob_get_length (contents),
+ hb_blob_get_length (source_blob));
+ hb_blob_destroy (source_blob);
+ return hb_face_builder_add_table (dest, tag, contents);
+ }
+};
+
+typedef struct hb_subset_plan_t hb_subset_plan_t;
+
+HB_INTERNAL hb_subset_plan_t *
+hb_subset_plan_create (hb_face_t *face,
+ hb_subset_input_t *input);
+
+HB_INTERNAL void
+hb_subset_plan_destroy (hb_subset_plan_t *plan);
+
+#endif /* HB_SUBSET_PLAN_HH */
diff --git a/thirdparty/harfbuzz/src/hb-subset.cc b/thirdparty/harfbuzz/src/hb-subset.cc
new file mode 100644
index 0000000000..8b77ecd45a
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-subset.cc
@@ -0,0 +1,269 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Garret Rieger, Rod Sheeter, Behdad Esfahbod
+ */
+
+#include "hb.hh"
+#include "hb-open-type.hh"
+
+#include "hb-subset.hh"
+
+#include "hb-open-file.hh"
+#include "hb-ot-cmap-table.hh"
+#include "hb-ot-glyf-table.hh"
+#include "hb-ot-hdmx-table.hh"
+#include "hb-ot-head-table.hh"
+#include "hb-ot-hhea-table.hh"
+#include "hb-ot-hmtx-table.hh"
+#include "hb-ot-maxp-table.hh"
+#include "hb-ot-color-sbix-table.hh"
+#include "hb-ot-color-colr-table.hh"
+#include "hb-ot-os2-table.hh"
+#include "hb-ot-post-table.hh"
+#include "hb-ot-cff1-table.hh"
+#include "hb-ot-cff2-table.hh"
+#include "hb-ot-vorg-table.hh"
+#include "hb-ot-name-table.hh"
+#include "hb-ot-color-cbdt-table.hh"
+#include "hb-ot-layout-gsub-table.hh"
+#include "hb-ot-layout-gpos-table.hh"
+#include "hb-ot-var-gvar-table.hh"
+#include "hb-ot-var-hvar-table.hh"
+
+
+static unsigned
+_plan_estimate_subset_table_size (hb_subset_plan_t *plan, unsigned table_len)
+{
+ unsigned src_glyphs = plan->source->get_num_glyphs ();
+ unsigned dst_glyphs = plan->glyphset ()->get_population ();
+
+ if (unlikely (!src_glyphs))
+ return 512 + table_len;
+
+ return 512 + (unsigned) (table_len * sqrt ((double) dst_glyphs / src_glyphs));
+}
+
+template<typename TableType>
+static bool
+_subset (hb_subset_plan_t *plan)
+{
+ bool result = false;
+ hb_blob_t *source_blob = hb_sanitize_context_t ().reference_table<TableType> (plan->source);
+ const TableType *table = source_blob->as<TableType> ();
+
+ hb_tag_t tag = TableType::tableTag;
+ if (source_blob->data)
+ {
+ hb_vector_t<char> buf;
+ /* TODO Not all tables are glyph-related. 'name' table size for example should not be
+ * affected by number of glyphs. Accommodate that. */
+ unsigned buf_size = _plan_estimate_subset_table_size (plan, source_blob->length);
+ DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c initial estimated table size: %u bytes.", HB_UNTAG (tag), buf_size);
+ if (unlikely (!buf.alloc (buf_size)))
+ {
+ DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c failed to allocate %u bytes.", HB_UNTAG (tag), buf_size);
+ hb_blob_destroy (source_blob);
+ return false;
+ }
+ retry:
+ hb_serialize_context_t serializer ((void *) buf, buf_size);
+ serializer.start_serialize<TableType> ();
+ hb_subset_context_t c (source_blob, plan, &serializer, tag);
+ bool needed = table->subset (&c);
+ if (serializer.ran_out_of_room)
+ {
+ buf_size += (buf_size >> 1) + 32;
+ DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c ran out of room; reallocating to %u bytes.", HB_UNTAG (tag), buf_size);
+ if (unlikely (!buf.alloc (buf_size)))
+ {
+ DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c failed to reallocate %u bytes.", HB_UNTAG (tag), buf_size);
+ hb_blob_destroy (source_blob);
+ return false;
+ }
+ goto retry;
+ }
+ serializer.end_serialize ();
+
+ result = !serializer.in_error ();
+
+ if (result)
+ {
+ if (needed)
+ {
+ hb_blob_t *dest_blob = serializer.copy_blob ();
+ DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c final subset table size: %u bytes.", HB_UNTAG (tag), dest_blob->length);
+ result = c.plan->add_table (tag, dest_blob);
+ hb_blob_destroy (dest_blob);
+ }
+ else
+ {
+ DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c::subset table subsetted to empty.", HB_UNTAG (tag));
+ }
+ }
+ }
+ else
+ DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c::subset sanitize failed on source table.", HB_UNTAG (tag));
+
+ hb_blob_destroy (source_blob);
+ DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c::subset %s", HB_UNTAG (tag), result ? "success" : "FAILED!");
+ return result;
+}
+
+static bool
+_is_table_present (hb_face_t *source, hb_tag_t tag)
+{
+ hb_tag_t table_tags[32];
+ unsigned offset = 0, num_tables = ARRAY_LENGTH (table_tags);
+ while ((hb_face_get_table_tags (source, offset, &num_tables, table_tags), num_tables))
+ {
+ for (unsigned i = 0; i < num_tables; ++i)
+ if (table_tags[i] == tag)
+ return true;
+ offset += num_tables;
+ }
+ return false;
+}
+
+static bool
+_should_drop_table (hb_subset_plan_t *plan, hb_tag_t tag)
+{
+ if (plan->drop_tables->has (tag))
+ return true;
+
+ switch (tag)
+ {
+ case HB_TAG ('c','v','a','r'): /* hint table, fallthrough */
+ case HB_TAG ('c','v','t',' '): /* hint table, fallthrough */
+ case HB_TAG ('f','p','g','m'): /* hint table, fallthrough */
+ case HB_TAG ('p','r','e','p'): /* hint table, fallthrough */
+ case HB_TAG ('h','d','m','x'): /* hint table, fallthrough */
+ case HB_TAG ('V','D','M','X'): /* hint table, fallthrough */
+ return plan->drop_hints;
+
+#ifdef HB_NO_SUBSET_LAYOUT
+ // Drop Layout Tables if requested.
+ case HB_OT_TAG_GDEF:
+ case HB_OT_TAG_GPOS:
+ case HB_OT_TAG_GSUB:
+ case HB_TAG ('m','o','r','x'):
+ case HB_TAG ('m','o','r','t'):
+ case HB_TAG ('k','e','r','x'):
+ case HB_TAG ('k','e','r','n'):
+ return true;
+#endif
+
+ default:
+ return false;
+ }
+}
+
+static bool
+_subset_table (hb_subset_plan_t *plan, hb_tag_t tag)
+{
+ DEBUG_MSG (SUBSET, nullptr, "subset %c%c%c%c", HB_UNTAG (tag));
+ switch (tag)
+ {
+ case HB_OT_TAG_glyf: return _subset<const OT::glyf> (plan);
+ case HB_OT_TAG_hdmx: return _subset<const OT::hdmx> (plan);
+ case HB_OT_TAG_name: return _subset<const OT::name> (plan);
+ case HB_OT_TAG_head:
+ if (_is_table_present (plan->source, HB_OT_TAG_glyf) && !_should_drop_table (plan, HB_OT_TAG_glyf))
+ return true; /* skip head, handled by glyf */
+ return _subset<const OT::head> (plan);
+ case HB_OT_TAG_hhea: return true; /* skip hhea, handled by hmtx */
+ case HB_OT_TAG_hmtx: return _subset<const OT::hmtx> (plan);
+ case HB_OT_TAG_vhea: return true; /* skip vhea, handled by vmtx */
+ case HB_OT_TAG_vmtx: return _subset<const OT::vmtx> (plan);
+ case HB_OT_TAG_maxp: return _subset<const OT::maxp> (plan);
+ case HB_OT_TAG_sbix: return _subset<const OT::sbix> (plan);
+ case HB_OT_TAG_loca: return true; /* skip loca, handled by glyf */
+ case HB_OT_TAG_cmap: return _subset<const OT::cmap> (plan);
+ case HB_OT_TAG_OS2 : return _subset<const OT::OS2 > (plan);
+ case HB_OT_TAG_post: return _subset<const OT::post> (plan);
+ case HB_OT_TAG_COLR: return _subset<const OT::COLR> (plan);
+ case HB_OT_TAG_CBLC: return _subset<const OT::CBLC> (plan);
+ case HB_OT_TAG_CBDT: return true; /* skip CBDT, handled by CBLC */
+
+#ifndef HB_NO_SUBSET_CFF
+ case HB_OT_TAG_cff1: return _subset<const OT::cff1> (plan);
+ case HB_OT_TAG_cff2: return _subset<const OT::cff2> (plan);
+ case HB_OT_TAG_VORG: return _subset<const OT::VORG> (plan);
+#endif
+
+#ifndef HB_NO_SUBSET_LAYOUT
+ case HB_OT_TAG_GDEF: return _subset<const OT::GDEF> (plan);
+ case HB_OT_TAG_GSUB: return _subset<const OT::GSUB> (plan);
+ case HB_OT_TAG_GPOS: return _subset<const OT::GPOS> (plan);
+ case HB_OT_TAG_gvar: return _subset<const OT::gvar> (plan);
+ case HB_OT_TAG_HVAR: return _subset<const OT::HVAR> (plan);
+ case HB_OT_TAG_VVAR: return _subset<const OT::VVAR> (plan);
+#endif
+
+ default:
+ hb_blob_t *source_table = hb_face_reference_table (plan->source, tag);
+ bool result = plan->add_table (tag, source_table);
+ hb_blob_destroy (source_table);
+ return result;
+ }
+}
+
+/**
+ * hb_subset:
+ * @source: font face data to be subset.
+ * @input: input to use for the subsetting.
+ *
+ * Subsets a font according to provided input.
+ **/
+hb_face_t *
+hb_subset (hb_face_t *source, hb_subset_input_t *input)
+{
+ if (unlikely (!input || !source)) return hb_face_get_empty ();
+
+ hb_subset_plan_t *plan = hb_subset_plan_create (source, input);
+ if (unlikely (plan->in_error ()))
+ return hb_face_get_empty ();
+
+ hb_set_t tags_set;
+ bool success = true;
+ hb_tag_t table_tags[32];
+ unsigned offset = 0, num_tables = ARRAY_LENGTH (table_tags);
+ while ((hb_face_get_table_tags (source, offset, &num_tables, table_tags), num_tables))
+ {
+ for (unsigned i = 0; i < num_tables; ++i)
+ {
+ hb_tag_t tag = table_tags[i];
+ if (_should_drop_table (plan, tag) && !tags_set.has (tag)) continue;
+ tags_set.add (tag);
+ success = _subset_table (plan, tag);
+ if (unlikely (!success)) goto end;
+ }
+ offset += num_tables;
+ }
+end:
+
+ hb_face_t *result = success ? hb_face_reference (plan->dest) : hb_face_get_empty ();
+
+ hb_subset_plan_destroy (plan);
+ return result;
+}
diff --git a/thirdparty/harfbuzz/src/hb-subset.h b/thirdparty/harfbuzz/src/hb-subset.h
new file mode 100644
index 0000000000..ddf4409734
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-subset.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Rod Sheeter
+ */
+
+#ifndef HB_SUBSET_H
+#define HB_SUBSET_H
+
+#include "hb.h"
+
+HB_BEGIN_DECLS
+
+/*
+ * hb_subset_input_t
+ *
+ * Things that change based on the input. Characters to keep, etc.
+ */
+
+typedef struct hb_subset_input_t hb_subset_input_t;
+
+HB_EXTERN hb_subset_input_t *
+hb_subset_input_create_or_fail (void);
+
+HB_EXTERN hb_subset_input_t *
+hb_subset_input_reference (hb_subset_input_t *subset_input);
+
+HB_EXTERN void
+hb_subset_input_destroy (hb_subset_input_t *subset_input);
+
+HB_EXTERN hb_set_t *
+hb_subset_input_unicode_set (hb_subset_input_t *subset_input);
+
+HB_EXTERN hb_set_t *
+hb_subset_input_glyph_set (hb_subset_input_t *subset_input);
+
+HB_EXTERN hb_set_t *
+hb_subset_input_nameid_set (hb_subset_input_t *subset_input);
+
+HB_EXTERN hb_set_t *
+hb_subset_input_namelangid_set (hb_subset_input_t *subset_input);
+
+HB_EXTERN hb_set_t *
+hb_subset_input_drop_tables_set (hb_subset_input_t *subset_input);
+
+HB_EXTERN void
+hb_subset_input_set_drop_hints (hb_subset_input_t *subset_input,
+ hb_bool_t drop_hints);
+HB_EXTERN hb_bool_t
+hb_subset_input_get_drop_hints (hb_subset_input_t *subset_input);
+
+HB_EXTERN void
+hb_subset_input_set_desubroutinize (hb_subset_input_t *subset_input,
+ hb_bool_t desubroutinize);
+HB_EXTERN hb_bool_t
+hb_subset_input_get_desubroutinize (hb_subset_input_t *subset_input);
+
+HB_EXTERN void
+hb_subset_input_set_retain_gids (hb_subset_input_t *subset_input,
+ hb_bool_t retain_gids);
+HB_EXTERN hb_bool_t
+hb_subset_input_get_retain_gids (hb_subset_input_t *subset_input);
+
+HB_EXTERN void
+hb_subset_input_set_name_legacy (hb_subset_input_t *subset_input,
+ hb_bool_t name_legacy);
+HB_EXTERN hb_bool_t
+hb_subset_input_get_name_legacy (hb_subset_input_t *subset_input);
+
+/* hb_subset () */
+HB_EXTERN hb_face_t *
+hb_subset (hb_face_t *source, hb_subset_input_t *input);
+
+
+HB_END_DECLS
+
+#endif /* HB_SUBSET_H */
diff --git a/thirdparty/harfbuzz/src/hb-subset.hh b/thirdparty/harfbuzz/src/hb-subset.hh
new file mode 100644
index 0000000000..c9b01c67f3
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-subset.hh
@@ -0,0 +1,73 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Garret Rieger, Roderick Sheeter
+ */
+
+#ifndef HB_SUBSET_HH
+#define HB_SUBSET_HH
+
+
+#include "hb.hh"
+
+#include "hb-subset.h"
+
+#include "hb-machinery.hh"
+#include "hb-subset-input.hh"
+#include "hb-subset-plan.hh"
+
+struct hb_subset_context_t :
+ hb_dispatch_context_t<hb_subset_context_t, bool, HB_DEBUG_SUBSET>
+{
+ const char *get_name () { return "SUBSET"; }
+ static return_t default_return_value () { return true; }
+
+ private:
+ template <typename T, typename ...Ts> auto
+ _dispatch (const T &obj, hb_priority<1>, Ts&&... ds) HB_AUTO_RETURN
+ ( obj.subset (this, hb_forward<Ts> (ds)...) )
+ template <typename T, typename ...Ts> auto
+ _dispatch (const T &obj, hb_priority<0>, Ts&&... ds) HB_AUTO_RETURN
+ ( obj.dispatch (this, hb_forward<Ts> (ds)...) )
+ public:
+ template <typename T, typename ...Ts> auto
+ dispatch (const T &obj, Ts&&... ds) HB_AUTO_RETURN
+ ( _dispatch (obj, hb_prioritize, hb_forward<Ts> (ds)...) )
+
+ hb_blob_t *source_blob;
+ hb_subset_plan_t *plan;
+ hb_serialize_context_t *serializer;
+ hb_tag_t table_tag;
+
+ hb_subset_context_t (hb_blob_t *source_blob_,
+ hb_subset_plan_t *plan_,
+ hb_serialize_context_t *serializer_,
+ hb_tag_t table_tag_) :
+ source_blob (source_blob_),
+ plan (plan_),
+ serializer (serializer_),
+ table_tag (table_tag_) {}
+};
+
+
+#endif /* HB_SUBSET_HH */
diff --git a/thirdparty/harfbuzz/src/hb-ucd-table.hh b/thirdparty/harfbuzz/src/hb-ucd-table.hh
new file mode 100644
index 0000000000..88623db338
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ucd-table.hh
@@ -0,0 +1,6780 @@
+/* == Start of generated table == */
+/*
+ * The following table is generated by running:
+ *
+ * ./gen-ucd-table.py ucd.nounihan.grouped.xml
+ *
+ * on file with this description: Unicode 13.0.0
+ */
+
+#ifndef HB_UCD_TABLE_HH
+#define HB_UCD_TABLE_HH
+
+#include "hb.hh"
+
+static const hb_script_t
+_hb_ucd_sc_map[157] =
+{
+ HB_SCRIPT_COMMON, HB_SCRIPT_INHERITED,
+ HB_SCRIPT_UNKNOWN, HB_SCRIPT_ARABIC,
+ HB_SCRIPT_ARMENIAN, HB_SCRIPT_BENGALI,
+ HB_SCRIPT_CYRILLIC, HB_SCRIPT_DEVANAGARI,
+ HB_SCRIPT_GEORGIAN, HB_SCRIPT_GREEK,
+ HB_SCRIPT_GUJARATI, HB_SCRIPT_GURMUKHI,
+ HB_SCRIPT_HANGUL, HB_SCRIPT_HAN,
+ HB_SCRIPT_HEBREW, HB_SCRIPT_HIRAGANA,
+ HB_SCRIPT_KANNADA, HB_SCRIPT_KATAKANA,
+ HB_SCRIPT_LAO, HB_SCRIPT_LATIN,
+ HB_SCRIPT_MALAYALAM, HB_SCRIPT_ORIYA,
+ HB_SCRIPT_TAMIL, HB_SCRIPT_TELUGU,
+ HB_SCRIPT_THAI, HB_SCRIPT_TIBETAN,
+ HB_SCRIPT_BOPOMOFO, HB_SCRIPT_BRAILLE,
+ HB_SCRIPT_CANADIAN_SYLLABICS, HB_SCRIPT_CHEROKEE,
+ HB_SCRIPT_ETHIOPIC, HB_SCRIPT_KHMER,
+ HB_SCRIPT_MONGOLIAN, HB_SCRIPT_MYANMAR,
+ HB_SCRIPT_OGHAM, HB_SCRIPT_RUNIC,
+ HB_SCRIPT_SINHALA, HB_SCRIPT_SYRIAC,
+ HB_SCRIPT_THAANA, HB_SCRIPT_YI,
+ HB_SCRIPT_DESERET, HB_SCRIPT_GOTHIC,
+ HB_SCRIPT_OLD_ITALIC, HB_SCRIPT_BUHID,
+ HB_SCRIPT_HANUNOO, HB_SCRIPT_TAGALOG,
+ HB_SCRIPT_TAGBANWA, HB_SCRIPT_CYPRIOT,
+ HB_SCRIPT_LIMBU, HB_SCRIPT_LINEAR_B,
+ HB_SCRIPT_OSMANYA, HB_SCRIPT_SHAVIAN,
+ HB_SCRIPT_TAI_LE, HB_SCRIPT_UGARITIC,
+ HB_SCRIPT_BUGINESE, HB_SCRIPT_COPTIC,
+ HB_SCRIPT_GLAGOLITIC, HB_SCRIPT_KHAROSHTHI,
+ HB_SCRIPT_NEW_TAI_LUE, HB_SCRIPT_OLD_PERSIAN,
+ HB_SCRIPT_SYLOTI_NAGRI, HB_SCRIPT_TIFINAGH,
+ HB_SCRIPT_BALINESE, HB_SCRIPT_CUNEIFORM,
+ HB_SCRIPT_NKO, HB_SCRIPT_PHAGS_PA,
+ HB_SCRIPT_PHOENICIAN, HB_SCRIPT_CARIAN,
+ HB_SCRIPT_CHAM, HB_SCRIPT_KAYAH_LI,
+ HB_SCRIPT_LEPCHA, HB_SCRIPT_LYCIAN,
+ HB_SCRIPT_LYDIAN, HB_SCRIPT_OL_CHIKI,
+ HB_SCRIPT_REJANG, HB_SCRIPT_SAURASHTRA,
+ HB_SCRIPT_SUNDANESE, HB_SCRIPT_VAI,
+ HB_SCRIPT_AVESTAN, HB_SCRIPT_BAMUM,
+ HB_SCRIPT_EGYPTIAN_HIEROGLYPHS, HB_SCRIPT_IMPERIAL_ARAMAIC,
+ HB_SCRIPT_INSCRIPTIONAL_PAHLAVI, HB_SCRIPT_INSCRIPTIONAL_PARTHIAN,
+ HB_SCRIPT_JAVANESE, HB_SCRIPT_KAITHI,
+ HB_SCRIPT_LISU, HB_SCRIPT_MEETEI_MAYEK,
+ HB_SCRIPT_OLD_SOUTH_ARABIAN, HB_SCRIPT_OLD_TURKIC,
+ HB_SCRIPT_SAMARITAN, HB_SCRIPT_TAI_THAM,
+ HB_SCRIPT_TAI_VIET, HB_SCRIPT_BATAK,
+ HB_SCRIPT_BRAHMI, HB_SCRIPT_MANDAIC,
+ HB_SCRIPT_CHAKMA, HB_SCRIPT_MEROITIC_CURSIVE,
+ HB_SCRIPT_MEROITIC_HIEROGLYPHS, HB_SCRIPT_MIAO,
+ HB_SCRIPT_SHARADA, HB_SCRIPT_SORA_SOMPENG,
+ HB_SCRIPT_TAKRI, HB_SCRIPT_BASSA_VAH,
+ HB_SCRIPT_CAUCASIAN_ALBANIAN, HB_SCRIPT_DUPLOYAN,
+ HB_SCRIPT_ELBASAN, HB_SCRIPT_GRANTHA,
+ HB_SCRIPT_KHOJKI, HB_SCRIPT_KHUDAWADI,
+ HB_SCRIPT_LINEAR_A, HB_SCRIPT_MAHAJANI,
+ HB_SCRIPT_MANICHAEAN, HB_SCRIPT_MENDE_KIKAKUI,
+ HB_SCRIPT_MODI, HB_SCRIPT_MRO,
+ HB_SCRIPT_NABATAEAN, HB_SCRIPT_OLD_NORTH_ARABIAN,
+ HB_SCRIPT_OLD_PERMIC, HB_SCRIPT_PAHAWH_HMONG,
+ HB_SCRIPT_PALMYRENE, HB_SCRIPT_PAU_CIN_HAU,
+ HB_SCRIPT_PSALTER_PAHLAVI, HB_SCRIPT_SIDDHAM,
+ HB_SCRIPT_TIRHUTA, HB_SCRIPT_WARANG_CITI,
+ HB_SCRIPT_AHOM, HB_SCRIPT_ANATOLIAN_HIEROGLYPHS,
+ HB_SCRIPT_HATRAN, HB_SCRIPT_MULTANI,
+ HB_SCRIPT_OLD_HUNGARIAN, HB_SCRIPT_SIGNWRITING,
+ HB_SCRIPT_ADLAM, HB_SCRIPT_BHAIKSUKI,
+ HB_SCRIPT_MARCHEN, HB_SCRIPT_OSAGE,
+ HB_SCRIPT_TANGUT, HB_SCRIPT_NEWA,
+ HB_SCRIPT_MASARAM_GONDI, HB_SCRIPT_NUSHU,
+ HB_SCRIPT_SOYOMBO, HB_SCRIPT_ZANABAZAR_SQUARE,
+ HB_SCRIPT_DOGRA, HB_SCRIPT_GUNJALA_GONDI,
+ HB_SCRIPT_HANIFI_ROHINGYA, HB_SCRIPT_MAKASAR,
+ HB_SCRIPT_MEDEFAIDRIN, HB_SCRIPT_OLD_SOGDIAN,
+ HB_SCRIPT_SOGDIAN, HB_SCRIPT_ELYMAIC,
+ HB_SCRIPT_NANDINAGARI, HB_SCRIPT_NYIAKENG_PUACHUE_HMONG,
+ HB_SCRIPT_WANCHO, HB_SCRIPT_CHORASMIAN,
+ HB_SCRIPT_DIVES_AKURU, HB_SCRIPT_KHITAN_SMALL_SCRIPT,
+ HB_SCRIPT_YEZIDI,
+};
+static const uint16_t
+_hb_ucd_dm1_p0_map[825] =
+{
+ 0x003Bu, 0x004Bu, 0x0060u, 0x00B4u, 0x00B7u, 0x00C5u, 0x02B9u, 0x0300u,
+ 0x0301u, 0x0313u, 0x0385u, 0x0386u, 0x0388u, 0x0389u, 0x038Au, 0x038Cu,
+ 0x038Eu, 0x038Fu, 0x0390u, 0x03A9u, 0x03ACu, 0x03ADu, 0x03AEu, 0x03AFu,
+ 0x03B0u, 0x03B9u, 0x03CCu, 0x03CDu, 0x03CEu, 0x2002u, 0x2003u, 0x3008u,
+ 0x3009u, 0x349Eu, 0x34B9u, 0x34BBu, 0x34DFu, 0x3515u, 0x36EEu, 0x36FCu,
+ 0x3781u, 0x382Fu, 0x3862u, 0x387Cu, 0x38C7u, 0x38E3u, 0x391Cu, 0x393Au,
+ 0x3A2Eu, 0x3A6Cu, 0x3AE4u, 0x3B08u, 0x3B19u, 0x3B49u, 0x3B9Du, 0x3C18u,
+ 0x3C4Eu, 0x3D33u, 0x3D96u, 0x3EACu, 0x3EB8u, 0x3F1Bu, 0x3FFCu, 0x4008u,
+ 0x4018u, 0x4039u, 0x4046u, 0x4096u, 0x40E3u, 0x412Fu, 0x4202u, 0x4227u,
+ 0x42A0u, 0x4301u, 0x4334u, 0x4359u, 0x43D5u, 0x43D9u, 0x440Bu, 0x446Bu,
+ 0x452Bu, 0x455Du, 0x4561u, 0x456Bu, 0x45D7u, 0x45F9u, 0x4635u, 0x46BEu,
+ 0x46C7u, 0x4995u, 0x49E6u, 0x4A6Eu, 0x4A76u, 0x4AB2u, 0x4B33u, 0x4BCEu,
+ 0x4CCEu, 0x4CEDu, 0x4CF8u, 0x4D56u, 0x4E0Du, 0x4E26u, 0x4E32u, 0x4E38u,
+ 0x4E39u, 0x4E3Du, 0x4E41u, 0x4E82u, 0x4E86u, 0x4EAEu, 0x4EC0u, 0x4ECCu,
+ 0x4EE4u, 0x4F60u, 0x4F80u, 0x4F86u, 0x4F8Bu, 0x4FAEu, 0x4FBBu, 0x4FBFu,
+ 0x5002u, 0x502Bu, 0x507Au, 0x5099u, 0x50CFu, 0x50DAu, 0x50E7u, 0x5140u,
+ 0x5145u, 0x514Du, 0x5154u, 0x5164u, 0x5167u, 0x5168u, 0x5169u, 0x516Du,
+ 0x5177u, 0x5180u, 0x518Du, 0x5192u, 0x5195u, 0x5197u, 0x51A4u, 0x51ACu,
+ 0x51B5u, 0x51B7u, 0x51C9u, 0x51CCu, 0x51DCu, 0x51DEu, 0x51F5u, 0x5203u,
+ 0x5207u, 0x5217u, 0x5229u, 0x523Au, 0x523Bu, 0x5246u, 0x5272u, 0x5277u,
+ 0x5289u, 0x529Bu, 0x52A3u, 0x52B3u, 0x52C7u, 0x52C9u, 0x52D2u, 0x52DEu,
+ 0x52E4u, 0x52F5u, 0x52FAu, 0x5305u, 0x5306u, 0x5317u, 0x533Fu, 0x5349u,
+ 0x5351u, 0x535Au, 0x5373u, 0x5375u, 0x537Du, 0x537Fu, 0x53C3u, 0x53CAu,
+ 0x53DFu, 0x53E5u, 0x53EBu, 0x53F1u, 0x5406u, 0x540Fu, 0x541Du, 0x5438u,
+ 0x5442u, 0x5448u, 0x5468u, 0x549Eu, 0x54A2u, 0x54BDu, 0x54F6u, 0x5510u,
+ 0x5553u, 0x5555u, 0x5563u, 0x5584u, 0x5587u, 0x5599u, 0x559Du, 0x55ABu,
+ 0x55B3u, 0x55C0u, 0x55C2u, 0x55E2u, 0x5606u, 0x5651u, 0x5668u, 0x5674u,
+ 0x56F9u, 0x5716u, 0x5717u, 0x578Bu, 0x57CEu, 0x57F4u, 0x580Du, 0x5831u,
+ 0x5832u, 0x5840u, 0x585Au, 0x585Eu, 0x58A8u, 0x58ACu, 0x58B3u, 0x58D8u,
+ 0x58DFu, 0x58EEu, 0x58F2u, 0x58F7u, 0x5906u, 0x591Au, 0x5922u, 0x5944u,
+ 0x5948u, 0x5951u, 0x5954u, 0x5962u, 0x5973u, 0x59D8u, 0x59ECu, 0x5A1Bu,
+ 0x5A27u, 0x5A62u, 0x5A66u, 0x5AB5u, 0x5B08u, 0x5B28u, 0x5B3Eu, 0x5B85u,
+ 0x5BC3u, 0x5BD8u, 0x5BE7u, 0x5BEEu, 0x5BF3u, 0x5BFFu, 0x5C06u, 0x5C22u,
+ 0x5C3Fu, 0x5C60u, 0x5C62u, 0x5C64u, 0x5C65u, 0x5C6Eu, 0x5C8Du, 0x5CC0u,
+ 0x5D19u, 0x5D43u, 0x5D50u, 0x5D6Bu, 0x5D6Eu, 0x5D7Cu, 0x5DB2u, 0x5DBAu,
+ 0x5DE1u, 0x5DE2u, 0x5DFDu, 0x5E28u, 0x5E3Du, 0x5E69u, 0x5E74u, 0x5EA6u,
+ 0x5EB0u, 0x5EB3u, 0x5EB6u, 0x5EC9u, 0x5ECAu, 0x5ED2u, 0x5ED3u, 0x5ED9u,
+ 0x5EECu, 0x5EFEu, 0x5F04u, 0x5F22u, 0x5F53u, 0x5F62u, 0x5F69u, 0x5F6Bu,
+ 0x5F8Bu, 0x5F9Au, 0x5FA9u, 0x5FADu, 0x5FCDu, 0x5FD7u, 0x5FF5u, 0x5FF9u,
+ 0x6012u, 0x601Cu, 0x6075u, 0x6081u, 0x6094u, 0x60C7u, 0x60D8u, 0x60E1u,
+ 0x6108u, 0x6144u, 0x6148u, 0x614Cu, 0x614Eu, 0x6160u, 0x6168u, 0x617Au,
+ 0x618Eu, 0x6190u, 0x61A4u, 0x61AFu, 0x61B2u, 0x61DEu, 0x61F2u, 0x61F6u,
+ 0x6200u, 0x6210u, 0x621Bu, 0x622Eu, 0x6234u, 0x625Du, 0x62B1u, 0x62C9u,
+ 0x62CFu, 0x62D3u, 0x62D4u, 0x62FCu, 0x62FEu, 0x633Du, 0x6350u, 0x6368u,
+ 0x637Bu, 0x6383u, 0x63A0u, 0x63A9u, 0x63C4u, 0x63C5u, 0x63E4u, 0x641Cu,
+ 0x6422u, 0x6452u, 0x6469u, 0x6477u, 0x647Eu, 0x649Au, 0x649Du, 0x64C4u,
+ 0x654Fu, 0x6556u, 0x656Cu, 0x6578u, 0x6599u, 0x65C5u, 0x65E2u, 0x65E3u,
+ 0x6613u, 0x6649u, 0x6674u, 0x6688u, 0x6691u, 0x669Cu, 0x66B4u, 0x66C6u,
+ 0x66F4u, 0x66F8u, 0x6700u, 0x6717u, 0x671Bu, 0x6721u, 0x674Eu, 0x6753u,
+ 0x6756u, 0x675Eu, 0x677Bu, 0x6785u, 0x6797u, 0x67F3u, 0x67FAu, 0x6817u,
+ 0x681Fu, 0x6852u, 0x6881u, 0x6885u, 0x688Eu, 0x68A8u, 0x6914u, 0x6942u,
+ 0x69A3u, 0x69EAu, 0x6A02u, 0x6A13u, 0x6AA8u, 0x6AD3u, 0x6ADBu, 0x6B04u,
+ 0x6B21u, 0x6B54u, 0x6B72u, 0x6B77u, 0x6B79u, 0x6B9Fu, 0x6BAEu, 0x6BBAu,
+ 0x6BBBu, 0x6C4Eu, 0x6C67u, 0x6C88u, 0x6CBFu, 0x6CCCu, 0x6CCDu, 0x6CE5u,
+ 0x6D16u, 0x6D1Bu, 0x6D1Eu, 0x6D34u, 0x6D3Eu, 0x6D41u, 0x6D69u, 0x6D6Au,
+ 0x6D77u, 0x6D78u, 0x6D85u, 0x6DCBu, 0x6DDAu, 0x6DEAu, 0x6DF9u, 0x6E1Au,
+ 0x6E2Fu, 0x6E6Eu, 0x6E9Cu, 0x6EBAu, 0x6EC7u, 0x6ECBu, 0x6ED1u, 0x6EDBu,
+ 0x6F0Fu, 0x6F22u, 0x6F23u, 0x6F6Eu, 0x6FC6u, 0x6FEBu, 0x6FFEu, 0x701Bu,
+ 0x701Eu, 0x7039u, 0x704Au, 0x7070u, 0x7077u, 0x707Du, 0x7099u, 0x70ADu,
+ 0x70C8u, 0x70D9u, 0x7145u, 0x7149u, 0x716Eu, 0x719Cu, 0x71CEu, 0x71D0u,
+ 0x7210u, 0x721Bu, 0x7228u, 0x722Bu, 0x7235u, 0x7250u, 0x7262u, 0x7280u,
+ 0x7295u, 0x72AFu, 0x72C0u, 0x72FCu, 0x732Au, 0x7375u, 0x737Au, 0x7387u,
+ 0x738Bu, 0x73A5u, 0x73B2u, 0x73DEu, 0x7406u, 0x7409u, 0x7422u, 0x7447u,
+ 0x745Cu, 0x7469u, 0x7471u, 0x7485u, 0x7489u, 0x7498u, 0x74CAu, 0x7506u,
+ 0x7524u, 0x753Bu, 0x753Eu, 0x7559u, 0x7565u, 0x7570u, 0x75E2u, 0x7610u,
+ 0x761Du, 0x761Fu, 0x7642u, 0x7669u, 0x76CAu, 0x76DBu, 0x76E7u, 0x76F4u,
+ 0x7701u, 0x771Eu, 0x771Fu, 0x7740u, 0x774Au, 0x778Bu, 0x77A7u, 0x784Eu,
+ 0x786Bu, 0x788Cu, 0x7891u, 0x78CAu, 0x78CCu, 0x78FBu, 0x792Au, 0x793Cu,
+ 0x793Eu, 0x7948u, 0x7949u, 0x7950u, 0x7956u, 0x795Du, 0x795Eu, 0x7965u,
+ 0x797Fu, 0x798Du, 0x798Eu, 0x798Fu, 0x79AEu, 0x79CAu, 0x79EBu, 0x7A1Cu,
+ 0x7A40u, 0x7A4Au, 0x7A4Fu, 0x7A81u, 0x7AB1u, 0x7ACBu, 0x7AEEu, 0x7B20u,
+ 0x7BC0u, 0x7BC6u, 0x7BC9u, 0x7C3Eu, 0x7C60u, 0x7C7Bu, 0x7C92u, 0x7CBEu,
+ 0x7CD2u, 0x7CD6u, 0x7CE3u, 0x7CE7u, 0x7CE8u, 0x7D00u, 0x7D10u, 0x7D22u,
+ 0x7D2Fu, 0x7D5Bu, 0x7D63u, 0x7DA0u, 0x7DBEu, 0x7DC7u, 0x7DF4u, 0x7E02u,
+ 0x7E09u, 0x7E37u, 0x7E41u, 0x7E45u, 0x7F3Eu, 0x7F72u, 0x7F79u, 0x7F7Au,
+ 0x7F85u, 0x7F95u, 0x7F9Au, 0x7FBDu, 0x7FFAu, 0x8001u, 0x8005u, 0x8046u,
+ 0x8060u, 0x806Fu, 0x8070u, 0x807Eu, 0x808Bu, 0x80ADu, 0x80B2u, 0x8103u,
+ 0x813Eu, 0x81D8u, 0x81E8u, 0x81EDu, 0x8201u, 0x8204u, 0x8218u, 0x826Fu,
+ 0x8279u, 0x828Bu, 0x8291u, 0x829Du, 0x82B1u, 0x82B3u, 0x82BDu, 0x82E5u,
+ 0x82E6u, 0x831Du, 0x8323u, 0x8336u, 0x8352u, 0x8353u, 0x8363u, 0x83ADu,
+ 0x83BDu, 0x83C9u, 0x83CAu, 0x83CCu, 0x83DCu, 0x83E7u, 0x83EFu, 0x83F1u,
+ 0x843Du, 0x8449u, 0x8457u, 0x84EEu, 0x84F1u, 0x84F3u, 0x84FCu, 0x8516u,
+ 0x8564u, 0x85CDu, 0x85FAu, 0x8606u, 0x8612u, 0x862Du, 0x863Fu, 0x8650u,
+ 0x865Cu, 0x8667u, 0x8669u, 0x8688u, 0x86A9u, 0x86E2u, 0x870Eu, 0x8728u,
+ 0x876Bu, 0x8779u, 0x8786u, 0x87BAu, 0x87E1u, 0x8801u, 0x881Fu, 0x884Cu,
+ 0x8860u, 0x8863u, 0x88C2u, 0x88CFu, 0x88D7u, 0x88DEu, 0x88E1u, 0x88F8u,
+ 0x88FAu, 0x8910u, 0x8941u, 0x8964u, 0x8986u, 0x898Bu, 0x8996u, 0x8AA0u,
+ 0x8AAAu, 0x8ABFu, 0x8ACBu, 0x8AD2u, 0x8AD6u, 0x8AEDu, 0x8AF8u, 0x8AFEu,
+ 0x8B01u, 0x8B39u, 0x8B58u, 0x8B80u, 0x8B8Au, 0x8C48u, 0x8C55u, 0x8CABu,
+ 0x8CC1u, 0x8CC2u, 0x8CC8u, 0x8CD3u, 0x8D08u, 0x8D1Bu, 0x8D77u, 0x8DBCu,
+ 0x8DCBu, 0x8DEFu, 0x8DF0u, 0x8ECAu, 0x8ED4u, 0x8F26u, 0x8F2Au, 0x8F38u,
+ 0x8F3Bu, 0x8F62u, 0x8F9Eu, 0x8FB0u, 0x8FB6u, 0x9023u, 0x9038u, 0x9072u,
+ 0x907Cu, 0x908Fu, 0x9094u, 0x90CEu, 0x90DEu, 0x90F1u, 0x90FDu, 0x9111u,
+ 0x911Bu, 0x916Au, 0x9199u, 0x91B4u, 0x91CCu, 0x91CFu, 0x91D1u, 0x9234u,
+ 0x9238u, 0x9276u, 0x927Cu, 0x92D7u, 0x92D8u, 0x9304u, 0x934Au, 0x93F9u,
+ 0x9415u, 0x958Bu, 0x95ADu, 0x95B7u, 0x962Eu, 0x964Bu, 0x964Du, 0x9675u,
+ 0x9678u, 0x967Cu, 0x9686u, 0x96A3u, 0x96B7u, 0x96B8u, 0x96C3u, 0x96E2u,
+ 0x96E3u, 0x96F6u, 0x96F7u, 0x9723u, 0x9732u, 0x9748u, 0x9756u, 0x97DBu,
+ 0x97E0u, 0x97FFu, 0x980Bu, 0x9818u, 0x9829u, 0x983Bu, 0x985Eu, 0x98E2u,
+ 0x98EFu, 0x98FCu, 0x9928u, 0x9929u, 0x99A7u, 0x99C2u, 0x99F1u, 0x99FEu,
+ 0x9A6Au, 0x9B12u, 0x9B6Fu, 0x9C40u, 0x9C57u, 0x9CFDu, 0x9D67u, 0x9DB4u,
+ 0x9DFAu, 0x9E1Eu, 0x9E7Fu, 0x9E97u, 0x9E9Fu, 0x9EBBu, 0x9ECEu, 0x9EF9u,
+ 0x9EFEu, 0x9F05u, 0x9F0Fu, 0x9F16u, 0x9F3Bu, 0x9F43u, 0x9F8Du, 0x9F8Eu,
+ 0x9F9Cu,
+};
+static const uint16_t
+_hb_ucd_dm1_p2_map[110] =
+{
+ 0x0122u, 0x051Cu, 0x0525u, 0x054Bu, 0x063Au, 0x0804u, 0x08DEu, 0x0A2Cu,
+ 0x0B63u, 0x14E4u, 0x16A8u, 0x16EAu, 0x19C8u, 0x1B18u, 0x1D0Bu, 0x1DE4u,
+ 0x1DE6u, 0x2183u, 0x219Fu, 0x2331u, 0x26D4u, 0x2844u, 0x284Au, 0x2B0Cu,
+ 0x2BF1u, 0x300Au, 0x32B8u, 0x335Fu, 0x3393u, 0x339Cu, 0x33C3u, 0x33D5u,
+ 0x346Du, 0x36A3u, 0x38A7u, 0x3A8Du, 0x3AFAu, 0x3CBCu, 0x3D1Eu, 0x3ED1u,
+ 0x3F5Eu, 0x3F8Eu, 0x4263u, 0x42EEu, 0x43ABu, 0x4608u, 0x4735u, 0x4814u,
+ 0x4C36u, 0x4C92u, 0x4FA1u, 0x4FB8u, 0x5044u, 0x50F2u, 0x50F3u, 0x5119u,
+ 0x5133u, 0x5249u, 0x541Du, 0x5626u, 0x569Au, 0x56C5u, 0x597Cu, 0x5AA7u,
+ 0x5BABu, 0x5C80u, 0x5CD0u, 0x5F86u, 0x61DAu, 0x6228u, 0x6247u, 0x62D9u,
+ 0x633Eu, 0x64DAu, 0x6523u, 0x65A8u, 0x67A7u, 0x67B5u, 0x6B3Cu, 0x6C36u,
+ 0x6CD5u, 0x6D6Bu, 0x6F2Cu, 0x6FB1u, 0x70D2u, 0x73CAu, 0x7667u, 0x78AEu,
+ 0x7966u, 0x7CA8u, 0x7ED3u, 0x7F2Fu, 0x85D2u, 0x85EDu, 0x872Eu, 0x8BFAu,
+ 0x8D77u, 0x9145u, 0x91DFu, 0x921Au, 0x940Au, 0x9496u, 0x95B6u, 0x9B30u,
+ 0xA0CEu, 0xA105u, 0xA20Eu, 0xA291u, 0xA392u, 0xA600u,
+};
+static const uint32_t
+_hb_ucd_dm2_u32_map[638] =
+{
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x003Cu, 0x0338u, 0x226Eu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x003Du, 0x0338u, 0x2260u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x003Eu, 0x0338u, 0x226Fu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x0300u, 0x00C0u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x0301u, 0x00C1u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x0302u, 0x00C2u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x0303u, 0x00C3u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x0304u, 0x0100u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x0306u, 0x0102u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x0307u, 0x0226u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x0308u, 0x00C4u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x0309u, 0x1EA2u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x030Au, 0x00C5u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x030Cu, 0x01CDu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x030Fu, 0x0200u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x0311u, 0x0202u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x0323u, 0x1EA0u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x0325u, 0x1E00u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x0328u, 0x0104u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0042u, 0x0307u, 0x1E02u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0042u, 0x0323u, 0x1E04u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0042u, 0x0331u, 0x1E06u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0043u, 0x0301u, 0x0106u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0043u, 0x0302u, 0x0108u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0043u, 0x0307u, 0x010Au),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0043u, 0x030Cu, 0x010Cu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0043u, 0x0327u, 0x00C7u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0044u, 0x0307u, 0x1E0Au),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0044u, 0x030Cu, 0x010Eu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0044u, 0x0323u, 0x1E0Cu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0044u, 0x0327u, 0x1E10u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0044u, 0x032Du, 0x1E12u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0044u, 0x0331u, 0x1E0Eu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0300u, 0x00C8u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0301u, 0x00C9u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0302u, 0x00CAu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0303u, 0x1EBCu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0304u, 0x0112u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0306u, 0x0114u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0307u, 0x0116u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0308u, 0x00CBu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0309u, 0x1EBAu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x030Cu, 0x011Au),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x030Fu, 0x0204u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0311u, 0x0206u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0323u, 0x1EB8u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0327u, 0x0228u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0328u, 0x0118u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x032Du, 0x1E18u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0330u, 0x1E1Au),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0046u, 0x0307u, 0x1E1Eu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0047u, 0x0301u, 0x01F4u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0047u, 0x0302u, 0x011Cu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0047u, 0x0304u, 0x1E20u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0047u, 0x0306u, 0x011Eu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0047u, 0x0307u, 0x0120u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0047u, 0x030Cu, 0x01E6u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0047u, 0x0327u, 0x0122u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0048u, 0x0302u, 0x0124u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0048u, 0x0307u, 0x1E22u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0048u, 0x0308u, 0x1E26u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0048u, 0x030Cu, 0x021Eu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0048u, 0x0323u, 0x1E24u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0048u, 0x0327u, 0x1E28u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0048u, 0x032Eu, 0x1E2Au),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x0300u, 0x00CCu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x0301u, 0x00CDu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x0302u, 0x00CEu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x0303u, 0x0128u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x0304u, 0x012Au),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x0306u, 0x012Cu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x0307u, 0x0130u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x0308u, 0x00CFu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x0309u, 0x1EC8u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x030Cu, 0x01CFu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x030Fu, 0x0208u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x0311u, 0x020Au),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x0323u, 0x1ECAu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x0328u, 0x012Eu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x0330u, 0x1E2Cu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Au, 0x0302u, 0x0134u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Bu, 0x0301u, 0x1E30u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Bu, 0x030Cu, 0x01E8u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Bu, 0x0323u, 0x1E32u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Bu, 0x0327u, 0x0136u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Bu, 0x0331u, 0x1E34u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Cu, 0x0301u, 0x0139u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Cu, 0x030Cu, 0x013Du),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Cu, 0x0323u, 0x1E36u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Cu, 0x0327u, 0x013Bu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Cu, 0x032Du, 0x1E3Cu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Cu, 0x0331u, 0x1E3Au),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Du, 0x0301u, 0x1E3Eu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Du, 0x0307u, 0x1E40u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Du, 0x0323u, 0x1E42u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Eu, 0x0300u, 0x01F8u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Eu, 0x0301u, 0x0143u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Eu, 0x0303u, 0x00D1u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Eu, 0x0307u, 0x1E44u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Eu, 0x030Cu, 0x0147u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Eu, 0x0323u, 0x1E46u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Eu, 0x0327u, 0x0145u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Eu, 0x032Du, 0x1E4Au),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Eu, 0x0331u, 0x1E48u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x0300u, 0x00D2u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x0301u, 0x00D3u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x0302u, 0x00D4u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x0303u, 0x00D5u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x0304u, 0x014Cu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x0306u, 0x014Eu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x0307u, 0x022Eu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x0308u, 0x00D6u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x0309u, 0x1ECEu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x030Bu, 0x0150u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x030Cu, 0x01D1u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x030Fu, 0x020Cu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x0311u, 0x020Eu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x031Bu, 0x01A0u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x0323u, 0x1ECCu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x0328u, 0x01EAu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0050u, 0x0301u, 0x1E54u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0050u, 0x0307u, 0x1E56u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0052u, 0x0301u, 0x0154u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0052u, 0x0307u, 0x1E58u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0052u, 0x030Cu, 0x0158u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0052u, 0x030Fu, 0x0210u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0052u, 0x0311u, 0x0212u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0052u, 0x0323u, 0x1E5Au),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0052u, 0x0327u, 0x0156u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0052u, 0x0331u, 0x1E5Eu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0053u, 0x0301u, 0x015Au),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0053u, 0x0302u, 0x015Cu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0053u, 0x0307u, 0x1E60u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0053u, 0x030Cu, 0x0160u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0053u, 0x0323u, 0x1E62u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0053u, 0x0326u, 0x0218u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0053u, 0x0327u, 0x015Eu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0054u, 0x0307u, 0x1E6Au),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0054u, 0x030Cu, 0x0164u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0054u, 0x0323u, 0x1E6Cu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0054u, 0x0326u, 0x021Au),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0054u, 0x0327u, 0x0162u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0054u, 0x032Du, 0x1E70u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0054u, 0x0331u, 0x1E6Eu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x0300u, 0x00D9u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x0301u, 0x00DAu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x0302u, 0x00DBu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x0303u, 0x0168u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x0304u, 0x016Au),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x0306u, 0x016Cu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x0308u, 0x00DCu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x0309u, 0x1EE6u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x030Au, 0x016Eu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x030Bu, 0x0170u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x030Cu, 0x01D3u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x030Fu, 0x0214u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x0311u, 0x0216u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x031Bu, 0x01AFu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x0323u, 0x1EE4u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x0324u, 0x1E72u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x0328u, 0x0172u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x032Du, 0x1E76u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x0330u, 0x1E74u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0056u, 0x0303u, 0x1E7Cu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0056u, 0x0323u, 0x1E7Eu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0057u, 0x0300u, 0x1E80u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0057u, 0x0301u, 0x1E82u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0057u, 0x0302u, 0x0174u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0057u, 0x0307u, 0x1E86u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0057u, 0x0308u, 0x1E84u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0057u, 0x0323u, 0x1E88u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0058u, 0x0307u, 0x1E8Au),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0058u, 0x0308u, 0x1E8Cu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0059u, 0x0300u, 0x1EF2u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0059u, 0x0301u, 0x00DDu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0059u, 0x0302u, 0x0176u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0059u, 0x0303u, 0x1EF8u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0059u, 0x0304u, 0x0232u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0059u, 0x0307u, 0x1E8Eu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0059u, 0x0308u, 0x0178u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0059u, 0x0309u, 0x1EF6u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0059u, 0x0323u, 0x1EF4u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x005Au, 0x0301u, 0x0179u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x005Au, 0x0302u, 0x1E90u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x005Au, 0x0307u, 0x017Bu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x005Au, 0x030Cu, 0x017Du),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x005Au, 0x0323u, 0x1E92u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x005Au, 0x0331u, 0x1E94u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x0300u, 0x00E0u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x0301u, 0x00E1u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x0302u, 0x00E2u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x0303u, 0x00E3u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x0304u, 0x0101u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x0306u, 0x0103u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x0307u, 0x0227u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x0308u, 0x00E4u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x0309u, 0x1EA3u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x030Au, 0x00E5u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x030Cu, 0x01CEu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x030Fu, 0x0201u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x0311u, 0x0203u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x0323u, 0x1EA1u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x0325u, 0x1E01u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x0328u, 0x0105u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0062u, 0x0307u, 0x1E03u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0062u, 0x0323u, 0x1E05u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0062u, 0x0331u, 0x1E07u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0063u, 0x0301u, 0x0107u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0063u, 0x0302u, 0x0109u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0063u, 0x0307u, 0x010Bu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0063u, 0x030Cu, 0x010Du),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0063u, 0x0327u, 0x00E7u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0064u, 0x0307u, 0x1E0Bu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0064u, 0x030Cu, 0x010Fu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0064u, 0x0323u, 0x1E0Du),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0064u, 0x0327u, 0x1E11u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0064u, 0x032Du, 0x1E13u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0064u, 0x0331u, 0x1E0Fu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0300u, 0x00E8u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0301u, 0x00E9u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0302u, 0x00EAu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0303u, 0x1EBDu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0304u, 0x0113u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0306u, 0x0115u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0307u, 0x0117u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0308u, 0x00EBu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0309u, 0x1EBBu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x030Cu, 0x011Bu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x030Fu, 0x0205u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0311u, 0x0207u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0323u, 0x1EB9u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0327u, 0x0229u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0328u, 0x0119u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x032Du, 0x1E19u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0330u, 0x1E1Bu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0066u, 0x0307u, 0x1E1Fu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0067u, 0x0301u, 0x01F5u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0067u, 0x0302u, 0x011Du),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0067u, 0x0304u, 0x1E21u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0067u, 0x0306u, 0x011Fu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0067u, 0x0307u, 0x0121u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0067u, 0x030Cu, 0x01E7u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0067u, 0x0327u, 0x0123u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0068u, 0x0302u, 0x0125u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0068u, 0x0307u, 0x1E23u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0068u, 0x0308u, 0x1E27u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0068u, 0x030Cu, 0x021Fu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0068u, 0x0323u, 0x1E25u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0068u, 0x0327u, 0x1E29u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0068u, 0x032Eu, 0x1E2Bu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0068u, 0x0331u, 0x1E96u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x0300u, 0x00ECu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x0301u, 0x00EDu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x0302u, 0x00EEu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x0303u, 0x0129u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x0304u, 0x012Bu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x0306u, 0x012Du),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x0308u, 0x00EFu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x0309u, 0x1EC9u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x030Cu, 0x01D0u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x030Fu, 0x0209u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x0311u, 0x020Bu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x0323u, 0x1ECBu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x0328u, 0x012Fu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x0330u, 0x1E2Du),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Au, 0x0302u, 0x0135u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Au, 0x030Cu, 0x01F0u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Bu, 0x0301u, 0x1E31u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Bu, 0x030Cu, 0x01E9u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Bu, 0x0323u, 0x1E33u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Bu, 0x0327u, 0x0137u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Bu, 0x0331u, 0x1E35u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Cu, 0x0301u, 0x013Au),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Cu, 0x030Cu, 0x013Eu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Cu, 0x0323u, 0x1E37u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Cu, 0x0327u, 0x013Cu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Cu, 0x032Du, 0x1E3Du),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Cu, 0x0331u, 0x1E3Bu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Du, 0x0301u, 0x1E3Fu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Du, 0x0307u, 0x1E41u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Du, 0x0323u, 0x1E43u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Eu, 0x0300u, 0x01F9u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Eu, 0x0301u, 0x0144u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Eu, 0x0303u, 0x00F1u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Eu, 0x0307u, 0x1E45u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Eu, 0x030Cu, 0x0148u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Eu, 0x0323u, 0x1E47u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Eu, 0x0327u, 0x0146u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Eu, 0x032Du, 0x1E4Bu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Eu, 0x0331u, 0x1E49u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x0300u, 0x00F2u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x0301u, 0x00F3u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x0302u, 0x00F4u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x0303u, 0x00F5u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x0304u, 0x014Du),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x0306u, 0x014Fu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x0307u, 0x022Fu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x0308u, 0x00F6u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x0309u, 0x1ECFu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x030Bu, 0x0151u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x030Cu, 0x01D2u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x030Fu, 0x020Du),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x0311u, 0x020Fu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x031Bu, 0x01A1u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x0323u, 0x1ECDu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x0328u, 0x01EBu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0070u, 0x0301u, 0x1E55u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0070u, 0x0307u, 0x1E57u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0072u, 0x0301u, 0x0155u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0072u, 0x0307u, 0x1E59u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0072u, 0x030Cu, 0x0159u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0072u, 0x030Fu, 0x0211u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0072u, 0x0311u, 0x0213u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0072u, 0x0323u, 0x1E5Bu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0072u, 0x0327u, 0x0157u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0072u, 0x0331u, 0x1E5Fu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0073u, 0x0301u, 0x015Bu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0073u, 0x0302u, 0x015Du),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0073u, 0x0307u, 0x1E61u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0073u, 0x030Cu, 0x0161u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0073u, 0x0323u, 0x1E63u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0073u, 0x0326u, 0x0219u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0073u, 0x0327u, 0x015Fu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0074u, 0x0307u, 0x1E6Bu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0074u, 0x0308u, 0x1E97u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0074u, 0x030Cu, 0x0165u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0074u, 0x0323u, 0x1E6Du),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0074u, 0x0326u, 0x021Bu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0074u, 0x0327u, 0x0163u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0074u, 0x032Du, 0x1E71u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0074u, 0x0331u, 0x1E6Fu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x0300u, 0x00F9u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x0301u, 0x00FAu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x0302u, 0x00FBu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x0303u, 0x0169u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x0304u, 0x016Bu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x0306u, 0x016Du),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x0308u, 0x00FCu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x0309u, 0x1EE7u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x030Au, 0x016Fu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x030Bu, 0x0171u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x030Cu, 0x01D4u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x030Fu, 0x0215u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x0311u, 0x0217u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x031Bu, 0x01B0u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x0323u, 0x1EE5u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x0324u, 0x1E73u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x0328u, 0x0173u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x032Du, 0x1E77u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x0330u, 0x1E75u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0076u, 0x0303u, 0x1E7Du),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0076u, 0x0323u, 0x1E7Fu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0077u, 0x0300u, 0x1E81u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0077u, 0x0301u, 0x1E83u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0077u, 0x0302u, 0x0175u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0077u, 0x0307u, 0x1E87u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0077u, 0x0308u, 0x1E85u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0077u, 0x030Au, 0x1E98u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0077u, 0x0323u, 0x1E89u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0078u, 0x0307u, 0x1E8Bu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0078u, 0x0308u, 0x1E8Du),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0079u, 0x0300u, 0x1EF3u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0079u, 0x0301u, 0x00FDu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0079u, 0x0302u, 0x0177u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0079u, 0x0303u, 0x1EF9u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0079u, 0x0304u, 0x0233u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0079u, 0x0307u, 0x1E8Fu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0079u, 0x0308u, 0x00FFu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0079u, 0x0309u, 0x1EF7u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0079u, 0x030Au, 0x1E99u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0079u, 0x0323u, 0x1EF5u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x007Au, 0x0301u, 0x017Au),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x007Au, 0x0302u, 0x1E91u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x007Au, 0x0307u, 0x017Cu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x007Au, 0x030Cu, 0x017Eu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x007Au, 0x0323u, 0x1E93u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x007Au, 0x0331u, 0x1E95u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00A8u, 0x0300u, 0x1FEDu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00A8u, 0x0301u, 0x0385u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00A8u, 0x0342u, 0x1FC1u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00C2u, 0x0300u, 0x1EA6u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00C2u, 0x0301u, 0x1EA4u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00C2u, 0x0303u, 0x1EAAu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00C2u, 0x0309u, 0x1EA8u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00C4u, 0x0304u, 0x01DEu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00C5u, 0x0301u, 0x01FAu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00C6u, 0x0301u, 0x01FCu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00C6u, 0x0304u, 0x01E2u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00C7u, 0x0301u, 0x1E08u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00CAu, 0x0300u, 0x1EC0u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00CAu, 0x0301u, 0x1EBEu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00CAu, 0x0303u, 0x1EC4u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00CAu, 0x0309u, 0x1EC2u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00CFu, 0x0301u, 0x1E2Eu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00D4u, 0x0300u, 0x1ED2u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00D4u, 0x0301u, 0x1ED0u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00D4u, 0x0303u, 0x1ED6u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00D4u, 0x0309u, 0x1ED4u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00D5u, 0x0301u, 0x1E4Cu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00D5u, 0x0304u, 0x022Cu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00D5u, 0x0308u, 0x1E4Eu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00D6u, 0x0304u, 0x022Au),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00D8u, 0x0301u, 0x01FEu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00DCu, 0x0300u, 0x01DBu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00DCu, 0x0301u, 0x01D7u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00DCu, 0x0304u, 0x01D5u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00DCu, 0x030Cu, 0x01D9u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00E2u, 0x0300u, 0x1EA7u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00E2u, 0x0301u, 0x1EA5u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00E2u, 0x0303u, 0x1EABu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00E2u, 0x0309u, 0x1EA9u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00E4u, 0x0304u, 0x01DFu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00E5u, 0x0301u, 0x01FBu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00E6u, 0x0301u, 0x01FDu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00E6u, 0x0304u, 0x01E3u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00E7u, 0x0301u, 0x1E09u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00EAu, 0x0300u, 0x1EC1u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00EAu, 0x0301u, 0x1EBFu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00EAu, 0x0303u, 0x1EC5u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00EAu, 0x0309u, 0x1EC3u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00EFu, 0x0301u, 0x1E2Fu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00F4u, 0x0300u, 0x1ED3u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00F4u, 0x0301u, 0x1ED1u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00F4u, 0x0303u, 0x1ED7u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00F4u, 0x0309u, 0x1ED5u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00F5u, 0x0301u, 0x1E4Du),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00F5u, 0x0304u, 0x022Du),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00F5u, 0x0308u, 0x1E4Fu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00F6u, 0x0304u, 0x022Bu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00F8u, 0x0301u, 0x01FFu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00FCu, 0x0300u, 0x01DCu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00FCu, 0x0301u, 0x01D8u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00FCu, 0x0304u, 0x01D6u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x00FCu, 0x030Cu, 0x01DAu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0102u, 0x0300u, 0x1EB0u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0102u, 0x0301u, 0x1EAEu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0102u, 0x0303u, 0x1EB4u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0102u, 0x0309u, 0x1EB2u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0103u, 0x0300u, 0x1EB1u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0103u, 0x0301u, 0x1EAFu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0103u, 0x0303u, 0x1EB5u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0103u, 0x0309u, 0x1EB3u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0112u, 0x0300u, 0x1E14u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0112u, 0x0301u, 0x1E16u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0113u, 0x0300u, 0x1E15u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0113u, 0x0301u, 0x1E17u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x014Cu, 0x0300u, 0x1E50u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x014Cu, 0x0301u, 0x1E52u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x014Du, 0x0300u, 0x1E51u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x014Du, 0x0301u, 0x1E53u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x015Au, 0x0307u, 0x1E64u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x015Bu, 0x0307u, 0x1E65u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0160u, 0x0307u, 0x1E66u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0161u, 0x0307u, 0x1E67u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0168u, 0x0301u, 0x1E78u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0169u, 0x0301u, 0x1E79u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x016Au, 0x0308u, 0x1E7Au),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x016Bu, 0x0308u, 0x1E7Bu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x017Fu, 0x0307u, 0x1E9Bu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x01A0u, 0x0300u, 0x1EDCu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x01A0u, 0x0301u, 0x1EDAu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x01A0u, 0x0303u, 0x1EE0u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x01A0u, 0x0309u, 0x1EDEu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x01A0u, 0x0323u, 0x1EE2u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x01A1u, 0x0300u, 0x1EDDu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x01A1u, 0x0301u, 0x1EDBu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x01A1u, 0x0303u, 0x1EE1u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x01A1u, 0x0309u, 0x1EDFu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x01A1u, 0x0323u, 0x1EE3u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x01AFu, 0x0300u, 0x1EEAu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x01AFu, 0x0301u, 0x1EE8u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x01AFu, 0x0303u, 0x1EEEu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x01AFu, 0x0309u, 0x1EECu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x01AFu, 0x0323u, 0x1EF0u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x01B0u, 0x0300u, 0x1EEBu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x01B0u, 0x0301u, 0x1EE9u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x01B0u, 0x0303u, 0x1EEFu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x01B0u, 0x0309u, 0x1EEDu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x01B0u, 0x0323u, 0x1EF1u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x01B7u, 0x030Cu, 0x01EEu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x01EAu, 0x0304u, 0x01ECu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x01EBu, 0x0304u, 0x01EDu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0226u, 0x0304u, 0x01E0u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0227u, 0x0304u, 0x01E1u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0228u, 0x0306u, 0x1E1Cu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0229u, 0x0306u, 0x1E1Du),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x022Eu, 0x0304u, 0x0230u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x022Fu, 0x0304u, 0x0231u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0292u, 0x030Cu, 0x01EFu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0308u, 0x0301u, 0x0000u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0391u, 0x0300u, 0x1FBAu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0391u, 0x0301u, 0x0386u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0391u, 0x0304u, 0x1FB9u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0391u, 0x0306u, 0x1FB8u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0391u, 0x0313u, 0x1F08u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0391u, 0x0314u, 0x1F09u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0391u, 0x0345u, 0x1FBCu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0395u, 0x0300u, 0x1FC8u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0395u, 0x0301u, 0x0388u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0395u, 0x0313u, 0x1F18u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0395u, 0x0314u, 0x1F19u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0397u, 0x0300u, 0x1FCAu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0397u, 0x0301u, 0x0389u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0397u, 0x0313u, 0x1F28u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0397u, 0x0314u, 0x1F29u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0397u, 0x0345u, 0x1FCCu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0399u, 0x0300u, 0x1FDAu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0399u, 0x0301u, 0x038Au),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0399u, 0x0304u, 0x1FD9u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0399u, 0x0306u, 0x1FD8u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0399u, 0x0308u, 0x03AAu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0399u, 0x0313u, 0x1F38u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0399u, 0x0314u, 0x1F39u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x039Fu, 0x0300u, 0x1FF8u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x039Fu, 0x0301u, 0x038Cu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x039Fu, 0x0313u, 0x1F48u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x039Fu, 0x0314u, 0x1F49u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03A1u, 0x0314u, 0x1FECu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03A5u, 0x0300u, 0x1FEAu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03A5u, 0x0301u, 0x038Eu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03A5u, 0x0304u, 0x1FE9u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03A5u, 0x0306u, 0x1FE8u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03A5u, 0x0308u, 0x03ABu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03A5u, 0x0314u, 0x1F59u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03A9u, 0x0300u, 0x1FFAu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03A9u, 0x0301u, 0x038Fu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03A9u, 0x0313u, 0x1F68u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03A9u, 0x0314u, 0x1F69u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03A9u, 0x0345u, 0x1FFCu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03ACu, 0x0345u, 0x1FB4u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03AEu, 0x0345u, 0x1FC4u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03B1u, 0x0300u, 0x1F70u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03B1u, 0x0301u, 0x03ACu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03B1u, 0x0304u, 0x1FB1u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03B1u, 0x0306u, 0x1FB0u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03B1u, 0x0313u, 0x1F00u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03B1u, 0x0314u, 0x1F01u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03B1u, 0x0342u, 0x1FB6u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03B1u, 0x0345u, 0x1FB3u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03B5u, 0x0300u, 0x1F72u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03B5u, 0x0301u, 0x03ADu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03B5u, 0x0313u, 0x1F10u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03B5u, 0x0314u, 0x1F11u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03B7u, 0x0300u, 0x1F74u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03B7u, 0x0301u, 0x03AEu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03B7u, 0x0313u, 0x1F20u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03B7u, 0x0314u, 0x1F21u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03B7u, 0x0342u, 0x1FC6u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03B7u, 0x0345u, 0x1FC3u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03B9u, 0x0300u, 0x1F76u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03B9u, 0x0301u, 0x03AFu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03B9u, 0x0304u, 0x1FD1u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03B9u, 0x0306u, 0x1FD0u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03B9u, 0x0308u, 0x03CAu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03B9u, 0x0313u, 0x1F30u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03B9u, 0x0314u, 0x1F31u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03B9u, 0x0342u, 0x1FD6u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03BFu, 0x0300u, 0x1F78u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03BFu, 0x0301u, 0x03CCu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03BFu, 0x0313u, 0x1F40u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03BFu, 0x0314u, 0x1F41u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03C1u, 0x0313u, 0x1FE4u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03C1u, 0x0314u, 0x1FE5u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03C5u, 0x0300u, 0x1F7Au),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03C5u, 0x0301u, 0x03CDu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03C5u, 0x0304u, 0x1FE1u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03C5u, 0x0306u, 0x1FE0u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03C5u, 0x0308u, 0x03CBu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03C5u, 0x0313u, 0x1F50u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03C5u, 0x0314u, 0x1F51u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03C5u, 0x0342u, 0x1FE6u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03C9u, 0x0300u, 0x1F7Cu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03C9u, 0x0301u, 0x03CEu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03C9u, 0x0313u, 0x1F60u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03C9u, 0x0314u, 0x1F61u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03C9u, 0x0342u, 0x1FF6u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03C9u, 0x0345u, 0x1FF3u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03CAu, 0x0300u, 0x1FD2u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03CAu, 0x0301u, 0x0390u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03CAu, 0x0342u, 0x1FD7u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03CBu, 0x0300u, 0x1FE2u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03CBu, 0x0301u, 0x03B0u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03CBu, 0x0342u, 0x1FE7u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03CEu, 0x0345u, 0x1FF4u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03D2u, 0x0301u, 0x03D3u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x03D2u, 0x0308u, 0x03D4u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0406u, 0x0308u, 0x0407u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0410u, 0x0306u, 0x04D0u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0410u, 0x0308u, 0x04D2u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0413u, 0x0301u, 0x0403u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0415u, 0x0300u, 0x0400u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0415u, 0x0306u, 0x04D6u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0415u, 0x0308u, 0x0401u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0416u, 0x0306u, 0x04C1u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0416u, 0x0308u, 0x04DCu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0417u, 0x0308u, 0x04DEu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0418u, 0x0300u, 0x040Du),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0418u, 0x0304u, 0x04E2u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0418u, 0x0306u, 0x0419u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0418u, 0x0308u, 0x04E4u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x041Au, 0x0301u, 0x040Cu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x041Eu, 0x0308u, 0x04E6u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0423u, 0x0304u, 0x04EEu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0423u, 0x0306u, 0x040Eu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0423u, 0x0308u, 0x04F0u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0423u, 0x030Bu, 0x04F2u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0427u, 0x0308u, 0x04F4u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x042Bu, 0x0308u, 0x04F8u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x042Du, 0x0308u, 0x04ECu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0430u, 0x0306u, 0x04D1u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0430u, 0x0308u, 0x04D3u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0433u, 0x0301u, 0x0453u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0435u, 0x0300u, 0x0450u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0435u, 0x0306u, 0x04D7u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0435u, 0x0308u, 0x0451u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0436u, 0x0306u, 0x04C2u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0436u, 0x0308u, 0x04DDu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0437u, 0x0308u, 0x04DFu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0438u, 0x0300u, 0x045Du),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0438u, 0x0304u, 0x04E3u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0438u, 0x0306u, 0x0439u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0438u, 0x0308u, 0x04E5u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x043Au, 0x0301u, 0x045Cu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x043Eu, 0x0308u, 0x04E7u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0443u, 0x0304u, 0x04EFu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0443u, 0x0306u, 0x045Eu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0443u, 0x0308u, 0x04F1u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0443u, 0x030Bu, 0x04F3u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0447u, 0x0308u, 0x04F5u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x044Bu, 0x0308u, 0x04F9u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x044Du, 0x0308u, 0x04EDu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0456u, 0x0308u, 0x0457u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0474u, 0x030Fu, 0x0476u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x0475u, 0x030Fu, 0x0477u),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x04D8u, 0x0308u, 0x04DAu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x04D9u, 0x0308u, 0x04DBu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x04E8u, 0x0308u, 0x04EAu),
+ HB_CODEPOINT_ENCODE3_11_7_14 (0x04E9u, 0x0308u, 0x04EBu),
+};
+static const uint64_t
+_hb_ucd_dm2_u64_map[388] =
+{
+ HB_CODEPOINT_ENCODE3 (0x05D0u, 0x05B7u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05D0u, 0x05B8u, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x05D0u, 0x05BCu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05D1u, 0x05BCu, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x05D1u, 0x05BFu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05D2u, 0x05BCu, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x05D3u, 0x05BCu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05D4u, 0x05BCu, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x05D5u, 0x05B9u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05D5u, 0x05BCu, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x05D6u, 0x05BCu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05D8u, 0x05BCu, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x05D9u, 0x05B4u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05D9u, 0x05BCu, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x05DAu, 0x05BCu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05DBu, 0x05BCu, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x05DBu, 0x05BFu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05DCu, 0x05BCu, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x05DEu, 0x05BCu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05E0u, 0x05BCu, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x05E1u, 0x05BCu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05E3u, 0x05BCu, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x05E4u, 0x05BCu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05E4u, 0x05BFu, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x05E6u, 0x05BCu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05E7u, 0x05BCu, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x05E8u, 0x05BCu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05E9u, 0x05BCu, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x05E9u, 0x05C1u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05E9u, 0x05C2u, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x05EAu, 0x05BCu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05F2u, 0x05B7u, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x0627u, 0x0653u, 0x0622u), HB_CODEPOINT_ENCODE3 (0x0627u, 0x0654u, 0x0623u),
+ HB_CODEPOINT_ENCODE3 (0x0627u, 0x0655u, 0x0625u), HB_CODEPOINT_ENCODE3 (0x0648u, 0x0654u, 0x0624u),
+ HB_CODEPOINT_ENCODE3 (0x064Au, 0x0654u, 0x0626u), HB_CODEPOINT_ENCODE3 (0x06C1u, 0x0654u, 0x06C2u),
+ HB_CODEPOINT_ENCODE3 (0x06D2u, 0x0654u, 0x06D3u), HB_CODEPOINT_ENCODE3 (0x06D5u, 0x0654u, 0x06C0u),
+ HB_CODEPOINT_ENCODE3 (0x0915u, 0x093Cu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0916u, 0x093Cu, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x0917u, 0x093Cu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x091Cu, 0x093Cu, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x0921u, 0x093Cu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0922u, 0x093Cu, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x0928u, 0x093Cu, 0x0929u), HB_CODEPOINT_ENCODE3 (0x092Bu, 0x093Cu, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x092Fu, 0x093Cu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0930u, 0x093Cu, 0x0931u),
+ HB_CODEPOINT_ENCODE3 (0x0933u, 0x093Cu, 0x0934u), HB_CODEPOINT_ENCODE3 (0x09A1u, 0x09BCu, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x09A2u, 0x09BCu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x09AFu, 0x09BCu, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x09C7u, 0x09BEu, 0x09CBu), HB_CODEPOINT_ENCODE3 (0x09C7u, 0x09D7u, 0x09CCu),
+ HB_CODEPOINT_ENCODE3 (0x0A16u, 0x0A3Cu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0A17u, 0x0A3Cu, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x0A1Cu, 0x0A3Cu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0A2Bu, 0x0A3Cu, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x0A32u, 0x0A3Cu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0A38u, 0x0A3Cu, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x0B21u, 0x0B3Cu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0B22u, 0x0B3Cu, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x0B47u, 0x0B3Eu, 0x0B4Bu), HB_CODEPOINT_ENCODE3 (0x0B47u, 0x0B56u, 0x0B48u),
+ HB_CODEPOINT_ENCODE3 (0x0B47u, 0x0B57u, 0x0B4Cu), HB_CODEPOINT_ENCODE3 (0x0B92u, 0x0BD7u, 0x0B94u),
+ HB_CODEPOINT_ENCODE3 (0x0BC6u, 0x0BBEu, 0x0BCAu), HB_CODEPOINT_ENCODE3 (0x0BC6u, 0x0BD7u, 0x0BCCu),
+ HB_CODEPOINT_ENCODE3 (0x0BC7u, 0x0BBEu, 0x0BCBu), HB_CODEPOINT_ENCODE3 (0x0C46u, 0x0C56u, 0x0C48u),
+ HB_CODEPOINT_ENCODE3 (0x0CBFu, 0x0CD5u, 0x0CC0u), HB_CODEPOINT_ENCODE3 (0x0CC6u, 0x0CC2u, 0x0CCAu),
+ HB_CODEPOINT_ENCODE3 (0x0CC6u, 0x0CD5u, 0x0CC7u), HB_CODEPOINT_ENCODE3 (0x0CC6u, 0x0CD6u, 0x0CC8u),
+ HB_CODEPOINT_ENCODE3 (0x0CCAu, 0x0CD5u, 0x0CCBu), HB_CODEPOINT_ENCODE3 (0x0D46u, 0x0D3Eu, 0x0D4Au),
+ HB_CODEPOINT_ENCODE3 (0x0D46u, 0x0D57u, 0x0D4Cu), HB_CODEPOINT_ENCODE3 (0x0D47u, 0x0D3Eu, 0x0D4Bu),
+ HB_CODEPOINT_ENCODE3 (0x0DD9u, 0x0DCAu, 0x0DDAu), HB_CODEPOINT_ENCODE3 (0x0DD9u, 0x0DCFu, 0x0DDCu),
+ HB_CODEPOINT_ENCODE3 (0x0DD9u, 0x0DDFu, 0x0DDEu), HB_CODEPOINT_ENCODE3 (0x0DDCu, 0x0DCAu, 0x0DDDu),
+ HB_CODEPOINT_ENCODE3 (0x0F40u, 0x0FB5u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0F42u, 0x0FB7u, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x0F4Cu, 0x0FB7u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0F51u, 0x0FB7u, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x0F56u, 0x0FB7u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0F5Bu, 0x0FB7u, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x0F71u, 0x0F72u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0F71u, 0x0F74u, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x0F71u, 0x0F80u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0F90u, 0x0FB5u, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x0F92u, 0x0FB7u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0F9Cu, 0x0FB7u, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x0FA1u, 0x0FB7u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0FA6u, 0x0FB7u, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x0FABu, 0x0FB7u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0FB2u, 0x0F80u, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x0FB3u, 0x0F80u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x1025u, 0x102Eu, 0x1026u),
+ HB_CODEPOINT_ENCODE3 (0x1B05u, 0x1B35u, 0x1B06u), HB_CODEPOINT_ENCODE3 (0x1B07u, 0x1B35u, 0x1B08u),
+ HB_CODEPOINT_ENCODE3 (0x1B09u, 0x1B35u, 0x1B0Au), HB_CODEPOINT_ENCODE3 (0x1B0Bu, 0x1B35u, 0x1B0Cu),
+ HB_CODEPOINT_ENCODE3 (0x1B0Du, 0x1B35u, 0x1B0Eu), HB_CODEPOINT_ENCODE3 (0x1B11u, 0x1B35u, 0x1B12u),
+ HB_CODEPOINT_ENCODE3 (0x1B3Au, 0x1B35u, 0x1B3Bu), HB_CODEPOINT_ENCODE3 (0x1B3Cu, 0x1B35u, 0x1B3Du),
+ HB_CODEPOINT_ENCODE3 (0x1B3Eu, 0x1B35u, 0x1B40u), HB_CODEPOINT_ENCODE3 (0x1B3Fu, 0x1B35u, 0x1B41u),
+ HB_CODEPOINT_ENCODE3 (0x1B42u, 0x1B35u, 0x1B43u), HB_CODEPOINT_ENCODE3 (0x1E36u, 0x0304u, 0x1E38u),
+ HB_CODEPOINT_ENCODE3 (0x1E37u, 0x0304u, 0x1E39u), HB_CODEPOINT_ENCODE3 (0x1E5Au, 0x0304u, 0x1E5Cu),
+ HB_CODEPOINT_ENCODE3 (0x1E5Bu, 0x0304u, 0x1E5Du), HB_CODEPOINT_ENCODE3 (0x1E62u, 0x0307u, 0x1E68u),
+ HB_CODEPOINT_ENCODE3 (0x1E63u, 0x0307u, 0x1E69u), HB_CODEPOINT_ENCODE3 (0x1EA0u, 0x0302u, 0x1EACu),
+ HB_CODEPOINT_ENCODE3 (0x1EA0u, 0x0306u, 0x1EB6u), HB_CODEPOINT_ENCODE3 (0x1EA1u, 0x0302u, 0x1EADu),
+ HB_CODEPOINT_ENCODE3 (0x1EA1u, 0x0306u, 0x1EB7u), HB_CODEPOINT_ENCODE3 (0x1EB8u, 0x0302u, 0x1EC6u),
+ HB_CODEPOINT_ENCODE3 (0x1EB9u, 0x0302u, 0x1EC7u), HB_CODEPOINT_ENCODE3 (0x1ECCu, 0x0302u, 0x1ED8u),
+ HB_CODEPOINT_ENCODE3 (0x1ECDu, 0x0302u, 0x1ED9u), HB_CODEPOINT_ENCODE3 (0x1F00u, 0x0300u, 0x1F02u),
+ HB_CODEPOINT_ENCODE3 (0x1F00u, 0x0301u, 0x1F04u), HB_CODEPOINT_ENCODE3 (0x1F00u, 0x0342u, 0x1F06u),
+ HB_CODEPOINT_ENCODE3 (0x1F00u, 0x0345u, 0x1F80u), HB_CODEPOINT_ENCODE3 (0x1F01u, 0x0300u, 0x1F03u),
+ HB_CODEPOINT_ENCODE3 (0x1F01u, 0x0301u, 0x1F05u), HB_CODEPOINT_ENCODE3 (0x1F01u, 0x0342u, 0x1F07u),
+ HB_CODEPOINT_ENCODE3 (0x1F01u, 0x0345u, 0x1F81u), HB_CODEPOINT_ENCODE3 (0x1F02u, 0x0345u, 0x1F82u),
+ HB_CODEPOINT_ENCODE3 (0x1F03u, 0x0345u, 0x1F83u), HB_CODEPOINT_ENCODE3 (0x1F04u, 0x0345u, 0x1F84u),
+ HB_CODEPOINT_ENCODE3 (0x1F05u, 0x0345u, 0x1F85u), HB_CODEPOINT_ENCODE3 (0x1F06u, 0x0345u, 0x1F86u),
+ HB_CODEPOINT_ENCODE3 (0x1F07u, 0x0345u, 0x1F87u), HB_CODEPOINT_ENCODE3 (0x1F08u, 0x0300u, 0x1F0Au),
+ HB_CODEPOINT_ENCODE3 (0x1F08u, 0x0301u, 0x1F0Cu), HB_CODEPOINT_ENCODE3 (0x1F08u, 0x0342u, 0x1F0Eu),
+ HB_CODEPOINT_ENCODE3 (0x1F08u, 0x0345u, 0x1F88u), HB_CODEPOINT_ENCODE3 (0x1F09u, 0x0300u, 0x1F0Bu),
+ HB_CODEPOINT_ENCODE3 (0x1F09u, 0x0301u, 0x1F0Du), HB_CODEPOINT_ENCODE3 (0x1F09u, 0x0342u, 0x1F0Fu),
+ HB_CODEPOINT_ENCODE3 (0x1F09u, 0x0345u, 0x1F89u), HB_CODEPOINT_ENCODE3 (0x1F0Au, 0x0345u, 0x1F8Au),
+ HB_CODEPOINT_ENCODE3 (0x1F0Bu, 0x0345u, 0x1F8Bu), HB_CODEPOINT_ENCODE3 (0x1F0Cu, 0x0345u, 0x1F8Cu),
+ HB_CODEPOINT_ENCODE3 (0x1F0Du, 0x0345u, 0x1F8Du), HB_CODEPOINT_ENCODE3 (0x1F0Eu, 0x0345u, 0x1F8Eu),
+ HB_CODEPOINT_ENCODE3 (0x1F0Fu, 0x0345u, 0x1F8Fu), HB_CODEPOINT_ENCODE3 (0x1F10u, 0x0300u, 0x1F12u),
+ HB_CODEPOINT_ENCODE3 (0x1F10u, 0x0301u, 0x1F14u), HB_CODEPOINT_ENCODE3 (0x1F11u, 0x0300u, 0x1F13u),
+ HB_CODEPOINT_ENCODE3 (0x1F11u, 0x0301u, 0x1F15u), HB_CODEPOINT_ENCODE3 (0x1F18u, 0x0300u, 0x1F1Au),
+ HB_CODEPOINT_ENCODE3 (0x1F18u, 0x0301u, 0x1F1Cu), HB_CODEPOINT_ENCODE3 (0x1F19u, 0x0300u, 0x1F1Bu),
+ HB_CODEPOINT_ENCODE3 (0x1F19u, 0x0301u, 0x1F1Du), HB_CODEPOINT_ENCODE3 (0x1F20u, 0x0300u, 0x1F22u),
+ HB_CODEPOINT_ENCODE3 (0x1F20u, 0x0301u, 0x1F24u), HB_CODEPOINT_ENCODE3 (0x1F20u, 0x0342u, 0x1F26u),
+ HB_CODEPOINT_ENCODE3 (0x1F20u, 0x0345u, 0x1F90u), HB_CODEPOINT_ENCODE3 (0x1F21u, 0x0300u, 0x1F23u),
+ HB_CODEPOINT_ENCODE3 (0x1F21u, 0x0301u, 0x1F25u), HB_CODEPOINT_ENCODE3 (0x1F21u, 0x0342u, 0x1F27u),
+ HB_CODEPOINT_ENCODE3 (0x1F21u, 0x0345u, 0x1F91u), HB_CODEPOINT_ENCODE3 (0x1F22u, 0x0345u, 0x1F92u),
+ HB_CODEPOINT_ENCODE3 (0x1F23u, 0x0345u, 0x1F93u), HB_CODEPOINT_ENCODE3 (0x1F24u, 0x0345u, 0x1F94u),
+ HB_CODEPOINT_ENCODE3 (0x1F25u, 0x0345u, 0x1F95u), HB_CODEPOINT_ENCODE3 (0x1F26u, 0x0345u, 0x1F96u),
+ HB_CODEPOINT_ENCODE3 (0x1F27u, 0x0345u, 0x1F97u), HB_CODEPOINT_ENCODE3 (0x1F28u, 0x0300u, 0x1F2Au),
+ HB_CODEPOINT_ENCODE3 (0x1F28u, 0x0301u, 0x1F2Cu), HB_CODEPOINT_ENCODE3 (0x1F28u, 0x0342u, 0x1F2Eu),
+ HB_CODEPOINT_ENCODE3 (0x1F28u, 0x0345u, 0x1F98u), HB_CODEPOINT_ENCODE3 (0x1F29u, 0x0300u, 0x1F2Bu),
+ HB_CODEPOINT_ENCODE3 (0x1F29u, 0x0301u, 0x1F2Du), HB_CODEPOINT_ENCODE3 (0x1F29u, 0x0342u, 0x1F2Fu),
+ HB_CODEPOINT_ENCODE3 (0x1F29u, 0x0345u, 0x1F99u), HB_CODEPOINT_ENCODE3 (0x1F2Au, 0x0345u, 0x1F9Au),
+ HB_CODEPOINT_ENCODE3 (0x1F2Bu, 0x0345u, 0x1F9Bu), HB_CODEPOINT_ENCODE3 (0x1F2Cu, 0x0345u, 0x1F9Cu),
+ HB_CODEPOINT_ENCODE3 (0x1F2Du, 0x0345u, 0x1F9Du), HB_CODEPOINT_ENCODE3 (0x1F2Eu, 0x0345u, 0x1F9Eu),
+ HB_CODEPOINT_ENCODE3 (0x1F2Fu, 0x0345u, 0x1F9Fu), HB_CODEPOINT_ENCODE3 (0x1F30u, 0x0300u, 0x1F32u),
+ HB_CODEPOINT_ENCODE3 (0x1F30u, 0x0301u, 0x1F34u), HB_CODEPOINT_ENCODE3 (0x1F30u, 0x0342u, 0x1F36u),
+ HB_CODEPOINT_ENCODE3 (0x1F31u, 0x0300u, 0x1F33u), HB_CODEPOINT_ENCODE3 (0x1F31u, 0x0301u, 0x1F35u),
+ HB_CODEPOINT_ENCODE3 (0x1F31u, 0x0342u, 0x1F37u), HB_CODEPOINT_ENCODE3 (0x1F38u, 0x0300u, 0x1F3Au),
+ HB_CODEPOINT_ENCODE3 (0x1F38u, 0x0301u, 0x1F3Cu), HB_CODEPOINT_ENCODE3 (0x1F38u, 0x0342u, 0x1F3Eu),
+ HB_CODEPOINT_ENCODE3 (0x1F39u, 0x0300u, 0x1F3Bu), HB_CODEPOINT_ENCODE3 (0x1F39u, 0x0301u, 0x1F3Du),
+ HB_CODEPOINT_ENCODE3 (0x1F39u, 0x0342u, 0x1F3Fu), HB_CODEPOINT_ENCODE3 (0x1F40u, 0x0300u, 0x1F42u),
+ HB_CODEPOINT_ENCODE3 (0x1F40u, 0x0301u, 0x1F44u), HB_CODEPOINT_ENCODE3 (0x1F41u, 0x0300u, 0x1F43u),
+ HB_CODEPOINT_ENCODE3 (0x1F41u, 0x0301u, 0x1F45u), HB_CODEPOINT_ENCODE3 (0x1F48u, 0x0300u, 0x1F4Au),
+ HB_CODEPOINT_ENCODE3 (0x1F48u, 0x0301u, 0x1F4Cu), HB_CODEPOINT_ENCODE3 (0x1F49u, 0x0300u, 0x1F4Bu),
+ HB_CODEPOINT_ENCODE3 (0x1F49u, 0x0301u, 0x1F4Du), HB_CODEPOINT_ENCODE3 (0x1F50u, 0x0300u, 0x1F52u),
+ HB_CODEPOINT_ENCODE3 (0x1F50u, 0x0301u, 0x1F54u), HB_CODEPOINT_ENCODE3 (0x1F50u, 0x0342u, 0x1F56u),
+ HB_CODEPOINT_ENCODE3 (0x1F51u, 0x0300u, 0x1F53u), HB_CODEPOINT_ENCODE3 (0x1F51u, 0x0301u, 0x1F55u),
+ HB_CODEPOINT_ENCODE3 (0x1F51u, 0x0342u, 0x1F57u), HB_CODEPOINT_ENCODE3 (0x1F59u, 0x0300u, 0x1F5Bu),
+ HB_CODEPOINT_ENCODE3 (0x1F59u, 0x0301u, 0x1F5Du), HB_CODEPOINT_ENCODE3 (0x1F59u, 0x0342u, 0x1F5Fu),
+ HB_CODEPOINT_ENCODE3 (0x1F60u, 0x0300u, 0x1F62u), HB_CODEPOINT_ENCODE3 (0x1F60u, 0x0301u, 0x1F64u),
+ HB_CODEPOINT_ENCODE3 (0x1F60u, 0x0342u, 0x1F66u), HB_CODEPOINT_ENCODE3 (0x1F60u, 0x0345u, 0x1FA0u),
+ HB_CODEPOINT_ENCODE3 (0x1F61u, 0x0300u, 0x1F63u), HB_CODEPOINT_ENCODE3 (0x1F61u, 0x0301u, 0x1F65u),
+ HB_CODEPOINT_ENCODE3 (0x1F61u, 0x0342u, 0x1F67u), HB_CODEPOINT_ENCODE3 (0x1F61u, 0x0345u, 0x1FA1u),
+ HB_CODEPOINT_ENCODE3 (0x1F62u, 0x0345u, 0x1FA2u), HB_CODEPOINT_ENCODE3 (0x1F63u, 0x0345u, 0x1FA3u),
+ HB_CODEPOINT_ENCODE3 (0x1F64u, 0x0345u, 0x1FA4u), HB_CODEPOINT_ENCODE3 (0x1F65u, 0x0345u, 0x1FA5u),
+ HB_CODEPOINT_ENCODE3 (0x1F66u, 0x0345u, 0x1FA6u), HB_CODEPOINT_ENCODE3 (0x1F67u, 0x0345u, 0x1FA7u),
+ HB_CODEPOINT_ENCODE3 (0x1F68u, 0x0300u, 0x1F6Au), HB_CODEPOINT_ENCODE3 (0x1F68u, 0x0301u, 0x1F6Cu),
+ HB_CODEPOINT_ENCODE3 (0x1F68u, 0x0342u, 0x1F6Eu), HB_CODEPOINT_ENCODE3 (0x1F68u, 0x0345u, 0x1FA8u),
+ HB_CODEPOINT_ENCODE3 (0x1F69u, 0x0300u, 0x1F6Bu), HB_CODEPOINT_ENCODE3 (0x1F69u, 0x0301u, 0x1F6Du),
+ HB_CODEPOINT_ENCODE3 (0x1F69u, 0x0342u, 0x1F6Fu), HB_CODEPOINT_ENCODE3 (0x1F69u, 0x0345u, 0x1FA9u),
+ HB_CODEPOINT_ENCODE3 (0x1F6Au, 0x0345u, 0x1FAAu), HB_CODEPOINT_ENCODE3 (0x1F6Bu, 0x0345u, 0x1FABu),
+ HB_CODEPOINT_ENCODE3 (0x1F6Cu, 0x0345u, 0x1FACu), HB_CODEPOINT_ENCODE3 (0x1F6Du, 0x0345u, 0x1FADu),
+ HB_CODEPOINT_ENCODE3 (0x1F6Eu, 0x0345u, 0x1FAEu), HB_CODEPOINT_ENCODE3 (0x1F6Fu, 0x0345u, 0x1FAFu),
+ HB_CODEPOINT_ENCODE3 (0x1F70u, 0x0345u, 0x1FB2u), HB_CODEPOINT_ENCODE3 (0x1F74u, 0x0345u, 0x1FC2u),
+ HB_CODEPOINT_ENCODE3 (0x1F7Cu, 0x0345u, 0x1FF2u), HB_CODEPOINT_ENCODE3 (0x1FB6u, 0x0345u, 0x1FB7u),
+ HB_CODEPOINT_ENCODE3 (0x1FBFu, 0x0300u, 0x1FCDu), HB_CODEPOINT_ENCODE3 (0x1FBFu, 0x0301u, 0x1FCEu),
+ HB_CODEPOINT_ENCODE3 (0x1FBFu, 0x0342u, 0x1FCFu), HB_CODEPOINT_ENCODE3 (0x1FC6u, 0x0345u, 0x1FC7u),
+ HB_CODEPOINT_ENCODE3 (0x1FF6u, 0x0345u, 0x1FF7u), HB_CODEPOINT_ENCODE3 (0x1FFEu, 0x0300u, 0x1FDDu),
+ HB_CODEPOINT_ENCODE3 (0x1FFEu, 0x0301u, 0x1FDEu), HB_CODEPOINT_ENCODE3 (0x1FFEu, 0x0342u, 0x1FDFu),
+ HB_CODEPOINT_ENCODE3 (0x2190u, 0x0338u, 0x219Au), HB_CODEPOINT_ENCODE3 (0x2192u, 0x0338u, 0x219Bu),
+ HB_CODEPOINT_ENCODE3 (0x2194u, 0x0338u, 0x21AEu), HB_CODEPOINT_ENCODE3 (0x21D0u, 0x0338u, 0x21CDu),
+ HB_CODEPOINT_ENCODE3 (0x21D2u, 0x0338u, 0x21CFu), HB_CODEPOINT_ENCODE3 (0x21D4u, 0x0338u, 0x21CEu),
+ HB_CODEPOINT_ENCODE3 (0x2203u, 0x0338u, 0x2204u), HB_CODEPOINT_ENCODE3 (0x2208u, 0x0338u, 0x2209u),
+ HB_CODEPOINT_ENCODE3 (0x220Bu, 0x0338u, 0x220Cu), HB_CODEPOINT_ENCODE3 (0x2223u, 0x0338u, 0x2224u),
+ HB_CODEPOINT_ENCODE3 (0x2225u, 0x0338u, 0x2226u), HB_CODEPOINT_ENCODE3 (0x223Cu, 0x0338u, 0x2241u),
+ HB_CODEPOINT_ENCODE3 (0x2243u, 0x0338u, 0x2244u), HB_CODEPOINT_ENCODE3 (0x2245u, 0x0338u, 0x2247u),
+ HB_CODEPOINT_ENCODE3 (0x2248u, 0x0338u, 0x2249u), HB_CODEPOINT_ENCODE3 (0x224Du, 0x0338u, 0x226Du),
+ HB_CODEPOINT_ENCODE3 (0x2261u, 0x0338u, 0x2262u), HB_CODEPOINT_ENCODE3 (0x2264u, 0x0338u, 0x2270u),
+ HB_CODEPOINT_ENCODE3 (0x2265u, 0x0338u, 0x2271u), HB_CODEPOINT_ENCODE3 (0x2272u, 0x0338u, 0x2274u),
+ HB_CODEPOINT_ENCODE3 (0x2273u, 0x0338u, 0x2275u), HB_CODEPOINT_ENCODE3 (0x2276u, 0x0338u, 0x2278u),
+ HB_CODEPOINT_ENCODE3 (0x2277u, 0x0338u, 0x2279u), HB_CODEPOINT_ENCODE3 (0x227Au, 0x0338u, 0x2280u),
+ HB_CODEPOINT_ENCODE3 (0x227Bu, 0x0338u, 0x2281u), HB_CODEPOINT_ENCODE3 (0x227Cu, 0x0338u, 0x22E0u),
+ HB_CODEPOINT_ENCODE3 (0x227Du, 0x0338u, 0x22E1u), HB_CODEPOINT_ENCODE3 (0x2282u, 0x0338u, 0x2284u),
+ HB_CODEPOINT_ENCODE3 (0x2283u, 0x0338u, 0x2285u), HB_CODEPOINT_ENCODE3 (0x2286u, 0x0338u, 0x2288u),
+ HB_CODEPOINT_ENCODE3 (0x2287u, 0x0338u, 0x2289u), HB_CODEPOINT_ENCODE3 (0x2291u, 0x0338u, 0x22E2u),
+ HB_CODEPOINT_ENCODE3 (0x2292u, 0x0338u, 0x22E3u), HB_CODEPOINT_ENCODE3 (0x22A2u, 0x0338u, 0x22ACu),
+ HB_CODEPOINT_ENCODE3 (0x22A8u, 0x0338u, 0x22ADu), HB_CODEPOINT_ENCODE3 (0x22A9u, 0x0338u, 0x22AEu),
+ HB_CODEPOINT_ENCODE3 (0x22ABu, 0x0338u, 0x22AFu), HB_CODEPOINT_ENCODE3 (0x22B2u, 0x0338u, 0x22EAu),
+ HB_CODEPOINT_ENCODE3 (0x22B3u, 0x0338u, 0x22EBu), HB_CODEPOINT_ENCODE3 (0x22B4u, 0x0338u, 0x22ECu),
+ HB_CODEPOINT_ENCODE3 (0x22B5u, 0x0338u, 0x22EDu), HB_CODEPOINT_ENCODE3 (0x2ADDu, 0x0338u, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x3046u, 0x3099u, 0x3094u), HB_CODEPOINT_ENCODE3 (0x304Bu, 0x3099u, 0x304Cu),
+ HB_CODEPOINT_ENCODE3 (0x304Du, 0x3099u, 0x304Eu), HB_CODEPOINT_ENCODE3 (0x304Fu, 0x3099u, 0x3050u),
+ HB_CODEPOINT_ENCODE3 (0x3051u, 0x3099u, 0x3052u), HB_CODEPOINT_ENCODE3 (0x3053u, 0x3099u, 0x3054u),
+ HB_CODEPOINT_ENCODE3 (0x3055u, 0x3099u, 0x3056u), HB_CODEPOINT_ENCODE3 (0x3057u, 0x3099u, 0x3058u),
+ HB_CODEPOINT_ENCODE3 (0x3059u, 0x3099u, 0x305Au), HB_CODEPOINT_ENCODE3 (0x305Bu, 0x3099u, 0x305Cu),
+ HB_CODEPOINT_ENCODE3 (0x305Du, 0x3099u, 0x305Eu), HB_CODEPOINT_ENCODE3 (0x305Fu, 0x3099u, 0x3060u),
+ HB_CODEPOINT_ENCODE3 (0x3061u, 0x3099u, 0x3062u), HB_CODEPOINT_ENCODE3 (0x3064u, 0x3099u, 0x3065u),
+ HB_CODEPOINT_ENCODE3 (0x3066u, 0x3099u, 0x3067u), HB_CODEPOINT_ENCODE3 (0x3068u, 0x3099u, 0x3069u),
+ HB_CODEPOINT_ENCODE3 (0x306Fu, 0x3099u, 0x3070u), HB_CODEPOINT_ENCODE3 (0x306Fu, 0x309Au, 0x3071u),
+ HB_CODEPOINT_ENCODE3 (0x3072u, 0x3099u, 0x3073u), HB_CODEPOINT_ENCODE3 (0x3072u, 0x309Au, 0x3074u),
+ HB_CODEPOINT_ENCODE3 (0x3075u, 0x3099u, 0x3076u), HB_CODEPOINT_ENCODE3 (0x3075u, 0x309Au, 0x3077u),
+ HB_CODEPOINT_ENCODE3 (0x3078u, 0x3099u, 0x3079u), HB_CODEPOINT_ENCODE3 (0x3078u, 0x309Au, 0x307Au),
+ HB_CODEPOINT_ENCODE3 (0x307Bu, 0x3099u, 0x307Cu), HB_CODEPOINT_ENCODE3 (0x307Bu, 0x309Au, 0x307Du),
+ HB_CODEPOINT_ENCODE3 (0x309Du, 0x3099u, 0x309Eu), HB_CODEPOINT_ENCODE3 (0x30A6u, 0x3099u, 0x30F4u),
+ HB_CODEPOINT_ENCODE3 (0x30ABu, 0x3099u, 0x30ACu), HB_CODEPOINT_ENCODE3 (0x30ADu, 0x3099u, 0x30AEu),
+ HB_CODEPOINT_ENCODE3 (0x30AFu, 0x3099u, 0x30B0u), HB_CODEPOINT_ENCODE3 (0x30B1u, 0x3099u, 0x30B2u),
+ HB_CODEPOINT_ENCODE3 (0x30B3u, 0x3099u, 0x30B4u), HB_CODEPOINT_ENCODE3 (0x30B5u, 0x3099u, 0x30B6u),
+ HB_CODEPOINT_ENCODE3 (0x30B7u, 0x3099u, 0x30B8u), HB_CODEPOINT_ENCODE3 (0x30B9u, 0x3099u, 0x30BAu),
+ HB_CODEPOINT_ENCODE3 (0x30BBu, 0x3099u, 0x30BCu), HB_CODEPOINT_ENCODE3 (0x30BDu, 0x3099u, 0x30BEu),
+ HB_CODEPOINT_ENCODE3 (0x30BFu, 0x3099u, 0x30C0u), HB_CODEPOINT_ENCODE3 (0x30C1u, 0x3099u, 0x30C2u),
+ HB_CODEPOINT_ENCODE3 (0x30C4u, 0x3099u, 0x30C5u), HB_CODEPOINT_ENCODE3 (0x30C6u, 0x3099u, 0x30C7u),
+ HB_CODEPOINT_ENCODE3 (0x30C8u, 0x3099u, 0x30C9u), HB_CODEPOINT_ENCODE3 (0x30CFu, 0x3099u, 0x30D0u),
+ HB_CODEPOINT_ENCODE3 (0x30CFu, 0x309Au, 0x30D1u), HB_CODEPOINT_ENCODE3 (0x30D2u, 0x3099u, 0x30D3u),
+ HB_CODEPOINT_ENCODE3 (0x30D2u, 0x309Au, 0x30D4u), HB_CODEPOINT_ENCODE3 (0x30D5u, 0x3099u, 0x30D6u),
+ HB_CODEPOINT_ENCODE3 (0x30D5u, 0x309Au, 0x30D7u), HB_CODEPOINT_ENCODE3 (0x30D8u, 0x3099u, 0x30D9u),
+ HB_CODEPOINT_ENCODE3 (0x30D8u, 0x309Au, 0x30DAu), HB_CODEPOINT_ENCODE3 (0x30DBu, 0x3099u, 0x30DCu),
+ HB_CODEPOINT_ENCODE3 (0x30DBu, 0x309Au, 0x30DDu), HB_CODEPOINT_ENCODE3 (0x30EFu, 0x3099u, 0x30F7u),
+ HB_CODEPOINT_ENCODE3 (0x30F0u, 0x3099u, 0x30F8u), HB_CODEPOINT_ENCODE3 (0x30F1u, 0x3099u, 0x30F9u),
+ HB_CODEPOINT_ENCODE3 (0x30F2u, 0x3099u, 0x30FAu), HB_CODEPOINT_ENCODE3 (0x30FDu, 0x3099u, 0x30FEu),
+ HB_CODEPOINT_ENCODE3 (0xFB49u, 0x05C1u, 0x0000u), HB_CODEPOINT_ENCODE3 (0xFB49u, 0x05C2u, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x11099u, 0x110BAu, 0x1109Au),HB_CODEPOINT_ENCODE3 (0x1109Bu, 0x110BAu, 0x1109Cu),
+ HB_CODEPOINT_ENCODE3 (0x110A5u, 0x110BAu, 0x110ABu),HB_CODEPOINT_ENCODE3 (0x11131u, 0x11127u, 0x1112Eu),
+ HB_CODEPOINT_ENCODE3 (0x11132u, 0x11127u, 0x1112Fu),HB_CODEPOINT_ENCODE3 (0x11347u, 0x1133Eu, 0x1134Bu),
+ HB_CODEPOINT_ENCODE3 (0x11347u, 0x11357u, 0x1134Cu),HB_CODEPOINT_ENCODE3 (0x114B9u, 0x114B0u, 0x114BCu),
+ HB_CODEPOINT_ENCODE3 (0x114B9u, 0x114BAu, 0x114BBu),HB_CODEPOINT_ENCODE3 (0x114B9u, 0x114BDu, 0x114BEu),
+ HB_CODEPOINT_ENCODE3 (0x115B8u, 0x115AFu, 0x115BAu),HB_CODEPOINT_ENCODE3 (0x115B9u, 0x115AFu, 0x115BBu),
+ HB_CODEPOINT_ENCODE3 (0x11935u, 0x11930u, 0x11938u), HB_CODEPOINT_ENCODE3 (0x1D157u, 0x1D165u, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x1D158u, 0x1D165u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x1D15Fu, 0x1D16Eu, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x1D15Fu, 0x1D16Fu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x1D15Fu, 0x1D170u, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x1D15Fu, 0x1D171u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x1D15Fu, 0x1D172u, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x1D1B9u, 0x1D165u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x1D1BAu, 0x1D165u, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x1D1BBu, 0x1D16Eu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x1D1BBu, 0x1D16Fu, 0x0000u),
+ HB_CODEPOINT_ENCODE3 (0x1D1BCu, 0x1D16Eu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x1D1BCu, 0x1D16Fu, 0x0000u),
+};
+
+#ifndef HB_OPTIMIZE_SIZE
+
+static const uint8_t
+_hb_ucd_u8[32480] =
+{
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 28,
+ 29, 26, 30, 31, 32, 33, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 34, 35, 35, 35, 35,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
+ 26, 57, 58, 59, 59, 59, 59, 59, 26, 26, 60, 59, 59, 59, 59, 59,
+ 59, 59, 26, 61, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 26, 62, 59, 63, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 64, 26, 26, 65, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 66, 67, 59, 59, 59, 59, 68, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 69, 70, 71, 72, 73, 74, 59, 59,
+ 75, 76, 59, 59, 77, 59, 78, 79, 80, 81, 73, 82, 83, 84, 59, 59,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 85, 26, 26, 26, 26, 26, 26, 26, 86, 87, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 88, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 89, 59, 59, 59, 59, 59, 59, 26, 90, 59, 59,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 91, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 92, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 93,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 94,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 29, 21, 21, 21, 23, 21, 21, 21, 22, 18, 21, 25, 21, 17, 21, 21,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 21, 21, 25, 25, 25, 21,
+ 21, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 22, 21, 18, 24, 16,
+ 24, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 22, 25, 18, 25, 0,
+ 29, 21, 23, 23, 23, 23, 26, 21, 24, 26, 7, 20, 25, 1, 26, 24,
+ 26, 25, 15, 15, 24, 5, 21, 21, 24, 15, 7, 19, 15, 15, 15, 21,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 25, 9, 9, 9, 9, 9, 9, 9, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 25, 5, 5, 5, 5, 5, 5, 5, 5,
+ 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5,
+ 9, 5, 9, 5, 9, 5, 9, 5, 5, 9, 5, 9, 5, 9, 5, 9,
+ 5, 9, 5, 9, 5, 9, 5, 9, 5, 5, 9, 5, 9, 5, 9, 5,
+ 9, 5, 9, 5, 9, 5, 9, 5, 9, 9, 5, 9, 5, 9, 5, 5,
+ 5, 9, 9, 5, 9, 5, 9, 9, 5, 9, 9, 9, 5, 5, 9, 9,
+ 9, 9, 5, 9, 9, 5, 9, 9, 9, 5, 5, 5, 9, 9, 5, 9,
+ 9, 5, 9, 5, 9, 5, 9, 9, 5, 9, 5, 5, 9, 5, 9, 9,
+ 5, 9, 9, 9, 5, 9, 5, 9, 9, 5, 5, 7, 9, 5, 5, 5,
+ 7, 7, 7, 7, 9, 8, 5, 9, 8, 5, 9, 8, 5, 9, 5, 9,
+ 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 5, 9, 5,
+ 5, 9, 8, 5, 9, 5, 9, 9, 9, 5, 9, 5, 9, 5, 9, 5,
+ 9, 5, 9, 5, 5, 5, 5, 5, 5, 5, 9, 9, 5, 9, 9, 5,
+ 5, 9, 5, 9, 9, 9, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5,
+ 5, 5, 5, 5, 7, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 24, 24, 24, 24, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 6, 6, 6, 6, 6, 24, 24, 24, 24, 24, 24, 24, 6, 24, 6, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 9, 5, 9, 5, 6, 24, 9, 5, 2, 2, 6, 5, 5, 5, 21, 9,
+ 2, 2, 2, 2, 24, 24, 9, 21, 9, 9, 9, 2, 9, 2, 9, 9,
+ 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 2, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 9,
+ 5, 5, 9, 9, 9, 5, 5, 5, 9, 5, 9, 5, 9, 5, 9, 5,
+ 5, 5, 5, 5, 9, 5, 25, 9, 5, 9, 9, 5, 5, 9, 9, 9,
+ 9, 5, 26, 12, 12, 12, 12, 12, 11, 11, 9, 5, 9, 5, 9, 5,
+ 9, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 5,
+ 2, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 2, 2, 6, 21, 21, 21, 21, 21, 21,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 21, 17, 2, 2, 26, 26, 23,
+ 2, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 17, 12,
+ 21, 12, 12, 21, 12, 12, 21, 12, 2, 2, 2, 2, 2, 2, 2, 2,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 7,
+ 7, 7, 7, 21, 21, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 1, 1, 1, 1, 1, 25, 25, 25, 21, 21, 23, 21, 21, 26, 26,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 21, 1, 2, 21, 21,
+ 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 12, 12, 12, 12,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 21, 21, 21, 21, 7, 7,
+ 12, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 21, 7, 12, 12, 12, 12, 12, 12, 12, 1, 26, 12,
+ 12, 12, 12, 12, 12, 6, 6, 12, 12, 26, 12, 12, 12, 12, 7, 7,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 7, 7, 7, 26, 26, 7,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 2, 1,
+ 7, 12, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 2, 2, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 6, 6, 26, 21, 21, 21, 6, 2, 2, 12, 23, 23,
+ 7, 7, 7, 7, 7, 7, 12, 12, 12, 12, 6, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 6, 12, 12, 12, 6, 12, 12, 12, 12, 12, 2, 2,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 2,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 12, 12, 2, 2, 21, 2,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 7, 7, 7, 7, 7, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 1, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 10, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 10, 12, 7, 10, 10,
+ 10, 12, 12, 12, 12, 12, 12, 12, 12, 10, 10, 10, 10, 12, 10, 10,
+ 7, 12, 12, 12, 12, 12, 12, 12, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 12, 12, 21, 21, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 21, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 12, 10, 10, 2, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 7,
+ 7, 2, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, 7, 7, 7, 7,
+ 7, 2, 7, 2, 2, 2, 7, 7, 7, 7, 2, 2, 12, 7, 10, 10,
+ 10, 12, 12, 12, 12, 2, 2, 10, 10, 2, 2, 10, 10, 12, 7, 2,
+ 2, 2, 2, 2, 2, 2, 2, 10, 2, 2, 2, 2, 7, 7, 2, 7,
+ 7, 7, 12, 12, 2, 2, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 7, 7, 23, 23, 15, 15, 15, 15, 15, 15, 26, 23, 7, 21, 12, 2,
+ 2, 12, 12, 10, 2, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 7,
+ 7, 2, 7, 7, 2, 7, 7, 2, 7, 7, 2, 2, 12, 2, 10, 10,
+ 10, 12, 12, 2, 2, 2, 2, 12, 12, 2, 2, 12, 12, 12, 2, 2,
+ 2, 12, 2, 2, 2, 2, 2, 2, 2, 7, 7, 7, 7, 2, 7, 2,
+ 2, 2, 2, 2, 2, 2, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 12, 12, 7, 7, 7, 12, 21, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 12, 12, 10, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 7,
+ 7, 7, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 2, 7, 7, 2, 7, 7, 7, 7, 7, 2, 2, 12, 7, 10, 10,
+ 10, 12, 12, 12, 12, 12, 2, 12, 12, 10, 2, 10, 10, 12, 2, 2,
+ 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 21, 23, 2, 2, 2, 2, 2, 2, 2, 7, 12, 12, 12, 12, 12, 12,
+ 2, 12, 10, 10, 2, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 7,
+ 7, 2, 7, 7, 2, 7, 7, 7, 7, 7, 2, 2, 12, 7, 10, 12,
+ 10, 12, 12, 12, 12, 2, 2, 10, 10, 2, 2, 10, 10, 12, 2, 2,
+ 2, 2, 2, 2, 2, 12, 12, 10, 2, 2, 2, 2, 7, 7, 2, 7,
+ 26, 7, 15, 15, 15, 15, 15, 15, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 12, 7, 2, 7, 7, 7, 7, 7, 7, 2, 2, 2, 7, 7,
+ 7, 2, 7, 7, 7, 7, 2, 2, 2, 7, 7, 2, 7, 2, 7, 7,
+ 2, 2, 2, 7, 7, 2, 2, 2, 7, 7, 7, 2, 2, 2, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 10, 10,
+ 12, 10, 10, 2, 2, 2, 10, 10, 10, 2, 10, 10, 10, 12, 2, 2,
+ 7, 2, 2, 2, 2, 2, 2, 10, 2, 2, 2, 2, 2, 2, 2, 2,
+ 15, 15, 15, 26, 26, 26, 26, 26, 26, 23, 26, 2, 2, 2, 2, 2,
+ 12, 10, 10, 10, 12, 7, 7, 7, 7, 7, 7, 7, 7, 2, 7, 7,
+ 7, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 7, 12, 12,
+ 12, 10, 10, 10, 10, 2, 12, 12, 12, 2, 12, 12, 12, 12, 2, 2,
+ 2, 2, 2, 2, 2, 12, 12, 2, 7, 7, 7, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 21, 15, 15, 15, 15, 15, 15, 15, 26,
+ 7, 12, 10, 10, 21, 7, 7, 7, 7, 7, 7, 7, 7, 2, 7, 7,
+ 7, 7, 7, 7, 2, 7, 7, 7, 7, 7, 2, 2, 12, 7, 10, 12,
+ 10, 10, 10, 10, 10, 2, 12, 10, 10, 2, 10, 10, 12, 12, 2, 2,
+ 2, 2, 2, 2, 2, 10, 10, 2, 2, 2, 2, 2, 2, 2, 7, 2,
+ 2, 7, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 12, 12, 10, 10, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 12, 7, 10, 10,
+ 10, 12, 12, 12, 12, 2, 10, 10, 10, 2, 10, 10, 10, 12, 7, 26,
+ 2, 2, 2, 2, 7, 7, 7, 10, 15, 15, 15, 15, 15, 15, 15, 7,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 26, 7, 7, 7, 7, 7, 7,
+ 2, 12, 10, 10, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 7, 7, 7, 7, 7, 7,
+ 7, 7, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 7, 2, 2,
+ 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 12, 2, 2, 2, 2, 10,
+ 10, 10, 12, 12, 12, 2, 12, 2, 10, 10, 10, 10, 10, 10, 10, 10,
+ 2, 2, 10, 10, 21, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 12, 7, 7, 12, 12, 12, 12, 12, 12, 12, 2, 2, 2, 2, 23,
+ 7, 7, 7, 7, 7, 7, 6, 12, 12, 12, 12, 12, 12, 12, 12, 21,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 21, 21, 2, 2, 2, 2,
+ 2, 7, 7, 2, 7, 2, 7, 7, 7, 7, 7, 2, 7, 7, 7, 7,
+ 7, 7, 7, 7, 2, 7, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 12, 7, 7, 12, 12, 12, 12, 12, 12, 12, 12, 12, 7, 2, 2,
+ 7, 7, 7, 7, 7, 2, 6, 2, 12, 12, 12, 12, 12, 12, 2, 2,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 2, 2, 7, 7, 7, 7,
+ 7, 26, 26, 26, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 26, 21, 26, 26, 26, 12, 12, 26, 26, 26, 26, 26, 26,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 26, 12, 26, 12, 26, 12, 22, 18, 22, 18, 10, 10,
+ 7, 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2,
+ 2, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 10,
+ 12, 12, 12, 12, 12, 21, 12, 12, 7, 7, 7, 7, 7, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 2, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 2, 26, 26,
+ 26, 26, 26, 26, 26, 26, 12, 26, 26, 26, 26, 26, 26, 2, 26, 26,
+ 21, 21, 21, 21, 21, 26, 26, 26, 26, 21, 21, 2, 2, 2, 2, 2,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, 10, 12, 12, 12,
+ 12, 10, 12, 12, 12, 12, 12, 12, 10, 12, 12, 10, 10, 12, 12, 7,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 21, 21, 21, 21, 21, 21,
+ 7, 7, 7, 7, 7, 7, 10, 10, 12, 12, 7, 7, 7, 7, 12, 12,
+ 12, 7, 10, 10, 10, 7, 7, 10, 10, 10, 10, 10, 10, 10, 7, 7,
+ 7, 12, 12, 12, 12, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 12, 10, 10, 12, 12, 10, 10, 10, 10, 10, 10, 12, 7, 10,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 10, 10, 10, 12, 26, 26,
+ 9, 9, 9, 9, 9, 9, 2, 9, 2, 2, 2, 2, 2, 9, 2, 2,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 21, 6, 5, 5, 5,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, 7, 7, 2, 2,
+ 7, 7, 7, 7, 7, 7, 7, 2, 7, 2, 7, 7, 7, 7, 2, 2,
+ 7, 2, 7, 7, 7, 7, 2, 2, 7, 7, 7, 7, 7, 7, 7, 2,
+ 7, 2, 7, 7, 7, 7, 2, 2, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 12, 12, 12,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 2, 2, 2,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 2, 2, 2, 2, 2, 2,
+ 9, 9, 9, 9, 9, 9, 2, 2, 5, 5, 5, 5, 5, 5, 2, 2,
+ 17, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 26, 21, 7,
+ 29, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 22, 18, 2, 2, 2,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 21, 21, 21, 14, 14,
+ 14, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 2, 2, 2,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 7, 7,
+ 7, 7, 12, 12, 12, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 7, 7, 12, 12, 12, 21, 21, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 7, 7, 12, 12, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 7, 2, 12, 12, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 7, 7, 7, 7, 12, 12, 10, 12, 12, 12, 12, 12, 12, 12, 10, 10,
+ 10, 10, 10, 10, 10, 10, 12, 10, 10, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 21, 21, 21, 6, 21, 21, 21, 23, 7, 12, 2, 2,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 2, 2, 2, 2, 2, 2,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 2, 2, 2, 2, 2, 2,
+ 21, 21, 21, 21, 21, 21, 17, 21, 21, 21, 21, 12, 12, 12, 1, 2,
+ 7, 7, 7, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 2, 2, 2,
+ 7, 7, 7, 7, 7, 12, 12, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 7, 2, 2, 2, 2, 2,
+ 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2,
+ 12, 12, 12, 10, 10, 10, 10, 12, 12, 10, 10, 10, 2, 2, 2, 2,
+ 10, 10, 12, 10, 10, 10, 10, 10, 10, 12, 12, 12, 2, 2, 2, 2,
+ 26, 2, 2, 2, 21, 21, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2,
+ 7, 7, 7, 7, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 2, 2,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 15, 2, 2, 2, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 7, 7, 7, 7, 7, 7, 7, 12, 12, 10, 10, 12, 2, 2, 21, 21,
+ 7, 7, 7, 7, 7, 10, 12, 10, 12, 12, 12, 12, 12, 12, 12, 2,
+ 12, 10, 12, 10, 10, 12, 12, 12, 12, 12, 12, 12, 12, 10, 10, 10,
+ 10, 10, 10, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 2, 2, 12,
+ 21, 21, 21, 21, 21, 21, 21, 6, 21, 21, 21, 21, 21, 21, 2, 2,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 12,
+ 12, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 12, 12, 12, 12, 10, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 12, 10, 12, 12, 12, 12, 12, 10, 12, 10, 10, 10,
+ 10, 10, 12, 10, 10, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2,
+ 21, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 26, 26, 26, 26, 26, 26, 26, 26, 26, 2, 2, 2,
+ 12, 12, 10, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 10, 12, 12, 12, 12, 10, 10, 12, 12, 10, 12, 12, 12, 7, 7,
+ 7, 7, 7, 7, 7, 7, 12, 10, 12, 12, 10, 10, 10, 12, 10, 12,
+ 12, 12, 10, 10, 2, 2, 2, 2, 2, 2, 2, 2, 21, 21, 21, 21,
+ 7, 7, 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 12, 12, 12, 12,
+ 12, 12, 12, 12, 10, 10, 12, 12, 2, 2, 2, 21, 21, 21, 21, 21,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 2, 2, 2, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 21, 21,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 2, 2,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2, 2, 9, 9, 9,
+ 21, 21, 21, 21, 21, 21, 21, 21, 2, 2, 2, 2, 2, 2, 2, 2,
+ 12, 12, 12, 21, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 10, 12, 12, 12, 12, 12, 12, 12, 7, 7, 7, 7, 12, 7, 7,
+ 7, 7, 7, 7, 12, 7, 7, 10, 12, 12, 7, 2, 2, 2, 2, 2,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 2, 12, 12, 12, 12, 12,
+ 9, 5, 9, 5, 9, 5, 5, 5, 5, 5, 5, 5, 5, 5, 9, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9, 9, 9, 9,
+ 5, 5, 5, 5, 5, 5, 2, 2, 9, 9, 9, 9, 9, 9, 2, 2,
+ 5, 5, 5, 5, 5, 5, 5, 5, 2, 9, 2, 9, 2, 9, 2, 9,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2,
+ 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 8, 8, 8, 8,
+ 5, 5, 5, 5, 5, 2, 5, 5, 9, 9, 9, 9, 8, 24, 5, 24,
+ 24, 24, 5, 5, 5, 2, 5, 5, 9, 9, 9, 9, 8, 24, 24, 24,
+ 5, 5, 5, 5, 2, 2, 5, 5, 9, 9, 9, 9, 2, 24, 24, 24,
+ 5, 5, 5, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9, 24, 24, 24,
+ 2, 2, 5, 5, 5, 2, 5, 5, 9, 9, 9, 9, 8, 24, 24, 2,
+ 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 1, 1, 1, 1, 1,
+ 17, 17, 17, 17, 17, 17, 21, 21, 20, 19, 22, 20, 20, 19, 22, 20,
+ 21, 21, 21, 21, 21, 21, 21, 21, 27, 28, 1, 1, 1, 1, 1, 29,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 20, 19, 21, 21, 21, 21, 16,
+ 16, 21, 21, 21, 25, 22, 18, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 25, 21, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 29,
+ 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 15, 6, 2, 2, 15, 15, 15, 15, 15, 15, 25, 25, 25, 22, 18, 6,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 25, 25, 25, 22, 18, 2,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2,
+ 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11,
+ 11, 12, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 26, 26, 9, 26, 26, 26, 26, 9, 26, 26, 5, 9, 9, 9, 5, 5,
+ 9, 9, 9, 5, 26, 9, 26, 26, 25, 9, 9, 9, 9, 9, 26, 26,
+ 26, 26, 26, 26, 9, 26, 9, 26, 9, 26, 9, 9, 9, 9, 26, 5,
+ 9, 9, 9, 9, 5, 7, 7, 7, 7, 5, 26, 26, 5, 5, 9, 9,
+ 25, 25, 25, 25, 25, 9, 5, 5, 5, 5, 26, 25, 26, 26, 5, 26,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 9, 5, 14, 14, 14, 14, 15, 26, 26, 2, 2, 2, 2,
+ 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 25, 25, 26, 26, 26, 26,
+ 25, 26, 26, 25, 26, 26, 25, 26, 26, 26, 26, 26, 26, 26, 25, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 25, 25,
+ 26, 26, 25, 26, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 26, 26, 26, 26, 26, 26, 26, 26, 22, 18, 22, 18, 26, 26, 26, 26,
+ 25, 25, 26, 26, 26, 26, 26, 26, 26, 22, 18, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 25, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 25, 25, 25, 25, 25,
+ 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 25, 25, 25, 25,
+ 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 2, 2, 2, 2, 2,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 15, 15, 15, 15, 15, 15,
+ 26, 26, 26, 26, 26, 26, 26, 25, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 25, 25, 25, 25, 25, 25, 25, 25,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 25,
+ 26, 26, 26, 26, 26, 26, 26, 26, 22, 18, 22, 18, 22, 18, 22, 18,
+ 22, 18, 22, 18, 22, 18, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 25, 25, 25, 25, 25, 22, 18, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 25, 25, 25, 25, 25, 25, 22, 18, 22, 18, 22, 18, 22, 18, 22, 18,
+ 25, 25, 25, 22, 18, 22, 18, 22, 18, 22, 18, 22, 18, 22, 18, 22,
+ 18, 22, 18, 22, 18, 22, 18, 22, 18, 25, 25, 25, 25, 25, 25, 25,
+ 25, 25, 25, 25, 25, 25, 25, 25, 22, 18, 22, 18, 25, 25, 25, 25,
+ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 22, 18, 25, 25,
+ 25, 25, 25, 25, 25, 26, 26, 25, 25, 25, 25, 25, 25, 26, 26, 26,
+ 26, 26, 26, 26, 2, 2, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 2, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2,
+ 9, 5, 9, 9, 9, 5, 5, 9, 5, 9, 5, 9, 5, 9, 9, 9,
+ 9, 5, 9, 5, 5, 9, 5, 5, 5, 5, 5, 5, 6, 6, 9, 9,
+ 9, 5, 9, 5, 5, 26, 26, 26, 26, 26, 26, 9, 5, 9, 5, 12,
+ 12, 12, 9, 5, 2, 2, 2, 2, 2, 21, 21, 21, 21, 15, 21, 21,
+ 5, 5, 5, 5, 5, 5, 2, 5, 2, 2, 2, 2, 2, 5, 2, 2,
+ 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 2, 2, 2, 6,
+ 21, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 12,
+ 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, 7, 7, 7, 7, 7, 2,
+ 21, 21, 20, 19, 20, 19, 21, 21, 21, 20, 19, 21, 20, 19, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 17, 21, 21, 17, 21, 20, 19, 21, 21,
+ 20, 19, 22, 18, 22, 18, 22, 18, 22, 18, 21, 21, 21, 21, 21, 6,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 17, 17, 21, 21, 21, 21,
+ 17, 21, 22, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 26, 26, 21, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 2, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 26, 26, 26, 26, 26, 26, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 2, 2, 2, 2,
+ 29, 21, 21, 21, 26, 6, 7, 14, 22, 18, 22, 18, 22, 18, 22, 18,
+ 22, 18, 26, 26, 22, 18, 22, 18, 22, 18, 22, 18, 17, 22, 18, 18,
+ 26, 14, 14, 14, 14, 14, 14, 14, 14, 14, 12, 12, 12, 12, 10, 10,
+ 17, 6, 6, 6, 6, 6, 26, 26, 14, 14, 14, 6, 7, 21, 26, 26,
+ 7, 7, 7, 7, 7, 7, 7, 2, 2, 12, 12, 24, 24, 6, 6, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 21, 6, 6, 6, 7,
+ 2, 2, 2, 2, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 26, 26, 15, 15, 15, 15, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 2,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 15, 15, 15, 15, 15, 15, 15, 15,
+ 26, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 7, 7, 7, 7, 7, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 21, 21, 21,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 7, 7, 2, 2, 2, 2,
+ 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 7, 12,
+ 11, 11, 11, 21, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 21, 6,
+ 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 6, 6, 12, 12,
+ 7, 7, 7, 7, 7, 7, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 12, 12, 21, 21, 21, 21, 21, 21, 2, 2, 2, 2, 2, 2, 2, 2,
+ 24, 24, 24, 24, 24, 24, 24, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 24, 24, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5,
+ 5, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5,
+ 6, 5, 5, 5, 5, 5, 5, 5, 5, 9, 5, 9, 5, 9, 9, 5,
+ 9, 5, 9, 5, 9, 5, 9, 5, 6, 24, 24, 9, 5, 9, 5, 7,
+ 9, 5, 9, 5, 5, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5,
+ 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 9, 9, 9, 9, 5,
+ 9, 9, 9, 9, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5,
+ 2, 2, 9, 5, 9, 9, 9, 9, 5, 9, 5, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 9, 5, 7, 6, 6, 5, 7, 7, 7, 7, 7,
+ 7, 7, 12, 7, 7, 7, 12, 7, 7, 7, 7, 12, 7, 7, 7, 7,
+ 7, 7, 7, 10, 10, 12, 12, 10, 26, 26, 26, 26, 12, 2, 2, 2,
+ 15, 15, 15, 15, 15, 15, 26, 26, 23, 26, 2, 2, 2, 2, 2, 2,
+ 7, 7, 7, 7, 21, 21, 21, 21, 2, 2, 2, 2, 2, 2, 2, 2,
+ 10, 10, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 12, 12, 2, 2, 2, 2, 2, 2, 2, 2, 21, 21,
+ 12, 12, 7, 7, 7, 7, 7, 7, 21, 21, 21, 7, 21, 7, 7, 12,
+ 7, 7, 7, 7, 7, 7, 12, 12, 12, 12, 12, 12, 12, 12, 21, 21,
+ 7, 7, 7, 7, 7, 7, 7, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 10, 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 21,
+ 7, 7, 7, 12, 10, 10, 12, 12, 12, 12, 10, 10, 12, 12, 10, 10,
+ 10, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 2, 6,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 2, 2, 2, 2, 21, 21,
+ 7, 7, 7, 7, 7, 12, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 7, 7, 7, 7, 7, 2,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 12, 12, 12, 12, 12, 10,
+ 10, 12, 12, 10, 10, 12, 12, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 7, 7, 7, 12, 7, 7, 7, 7, 7, 7, 7, 7, 12, 10, 2, 2,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 2, 2, 21, 21, 21, 21,
+ 6, 7, 7, 7, 7, 7, 7, 26, 26, 26, 7, 10, 12, 10, 7, 7,
+ 12, 7, 12, 12, 12, 7, 7, 12, 12, 7, 7, 7, 7, 7, 12, 12,
+ 7, 12, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 7, 7, 6, 21, 21,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, 12, 12, 10, 10,
+ 21, 21, 7, 6, 6, 10, 12, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 7, 7, 7, 7, 7, 7, 2, 2, 7, 7, 7, 7, 7, 7, 2,
+ 2, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 24, 6, 6, 6, 6,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 24, 24, 2, 2, 2, 2,
+ 7, 7, 7, 10, 10, 12, 10, 10, 12, 10, 10, 21, 10, 12, 2, 2,
+ 7, 7, 7, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 7, 7, 7, 7, 7,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 5, 5, 5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 7, 12, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 25, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, 7, 7, 7, 2, 7, 2,
+ 7, 7, 2, 7, 7, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 18, 22,
+ 2, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 23, 26, 2, 2,
+ 21, 21, 21, 21, 21, 21, 21, 22, 18, 21, 2, 2, 2, 2, 2, 2,
+ 21, 17, 17, 16, 16, 22, 18, 22, 18, 22, 18, 22, 18, 22, 18, 22,
+ 18, 22, 18, 22, 18, 21, 21, 22, 18, 21, 21, 21, 21, 16, 16, 16,
+ 21, 21, 21, 2, 21, 21, 21, 21, 17, 22, 18, 22, 18, 22, 18, 21,
+ 21, 21, 25, 17, 25, 25, 25, 2, 21, 23, 21, 21, 2, 2, 2, 2,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 1,
+ 2, 21, 21, 21, 23, 21, 21, 21, 22, 18, 21, 25, 21, 17, 21, 21,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 22, 25, 18, 25, 22,
+ 18, 21, 22, 18, 21, 21, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6,
+ 2, 2, 7, 7, 7, 7, 7, 7, 2, 2, 7, 7, 7, 7, 7, 7,
+ 2, 2, 7, 7, 7, 7, 7, 7, 2, 2, 7, 7, 7, 2, 2, 2,
+ 23, 23, 25, 24, 26, 23, 23, 2, 26, 25, 25, 25, 25, 26, 26, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 26, 26, 2, 2,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, 2, 7,
+ 21, 21, 21, 2, 2, 2, 2, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 2, 2, 2, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 14, 14, 14, 14, 14, 15, 15, 15, 15, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 15, 15, 26, 26, 26, 2,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 2, 2, 2,
+ 26, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 12, 2, 2,
+ 12, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 2, 2, 2, 2,
+ 15, 15, 15, 15, 2, 2, 2, 2, 2, 2, 2, 2, 2, 7, 7, 7,
+ 7, 14, 7, 7, 7, 7, 7, 7, 7, 7, 14, 2, 2, 2, 2, 2,
+ 7, 7, 7, 7, 7, 7, 12, 12, 12, 12, 12, 2, 2, 2, 2, 2,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 21,
+ 7, 7, 7, 7, 2, 2, 2, 2, 7, 7, 7, 7, 7, 7, 7, 7,
+ 21, 14, 14, 14, 14, 14, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 9, 9, 9, 9, 9, 9, 9, 9, 5, 5, 5, 5, 5, 5, 5, 5,
+ 9, 9, 9, 9, 2, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 2, 2,
+ 7, 7, 7, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 21,
+ 7, 7, 7, 7, 7, 7, 2, 2, 7, 2, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 2, 7, 7, 2, 2, 2, 7, 2, 2, 7,
+ 7, 7, 7, 7, 7, 7, 2, 21, 15, 15, 15, 15, 15, 15, 15, 15,
+ 7, 7, 7, 7, 7, 7, 7, 26, 26, 15, 15, 15, 15, 15, 15, 15,
+ 2, 2, 2, 2, 2, 2, 2, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 7, 7, 7, 2, 7, 7, 2, 2, 2, 2, 2, 15, 15, 15, 15, 15,
+ 7, 7, 7, 7, 7, 7, 15, 15, 15, 15, 15, 15, 2, 2, 2, 21,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 2, 21,
+ 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 15, 15, 7, 7,
+ 2, 2, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 7, 12, 12, 12, 2, 12, 12, 2, 2, 2, 2, 2, 12, 12, 12, 12,
+ 7, 7, 7, 7, 2, 7, 7, 7, 2, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 2, 2, 12, 12, 12, 2, 2, 2, 2, 12,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 2, 2, 2, 2, 2, 2, 2,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 2, 2, 2, 2, 2, 2, 2,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 15, 15, 21,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 15, 15, 15,
+ 7, 7, 7, 7, 7, 7, 7, 7, 26, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 12, 12, 2, 2, 2, 2, 15, 15, 15, 15, 15,
+ 21, 21, 21, 21, 21, 21, 21, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 7, 7, 7, 7, 7, 7, 2, 2, 2, 21, 21, 21, 21, 21, 21, 21,
+ 7, 7, 7, 7, 7, 7, 2, 2, 15, 15, 15, 15, 15, 15, 15, 15,
+ 7, 7, 7, 2, 2, 2, 2, 2, 15, 15, 15, 15, 15, 15, 15, 15,
+ 7, 7, 2, 2, 2, 2, 2, 2, 2, 21, 21, 21, 21, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 15, 15, 15, 15, 15, 15, 15,
+ 9, 9, 9, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 5, 5, 5, 2, 2, 2, 2, 2, 2, 2, 15, 15, 15, 15, 15, 15,
+ 7, 7, 7, 7, 12, 12, 12, 12, 2, 2, 2, 2, 2, 2, 2, 2,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 2,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 12, 12, 17, 2, 2,
+ 7, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 15, 15, 15, 15, 15, 15, 15, 7, 2, 2, 2, 2, 2, 2, 2, 2,
+ 12, 15, 15, 15, 15, 21, 21, 21, 21, 21, 2, 2, 2, 2, 2, 2,
+ 7, 7, 7, 7, 7, 15, 15, 15, 15, 15, 15, 15, 2, 2, 2, 2,
+ 10, 12, 10, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 21, 21, 21, 21, 21, 21, 21, 2, 2,
+ 15, 15, 15, 15, 15, 15, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 12,
+ 10, 10, 10, 12, 12, 12, 12, 10, 10, 12, 12, 21, 21, 1, 21, 21,
+ 21, 21, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2,
+ 12, 12, 12, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 12, 12, 12, 12, 12, 10, 12, 12, 12,
+ 12, 12, 12, 12, 12, 2, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 21, 21, 21, 21, 7, 10, 10, 7, 2, 2, 2, 2, 2, 2, 2, 2,
+ 7, 7, 7, 12, 21, 21, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 7, 7, 7, 10, 10, 10, 12, 12, 12, 12, 12, 12, 12, 12, 12, 10,
+ 10, 7, 7, 7, 7, 21, 21, 21, 21, 12, 12, 12, 12, 21, 10, 12,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 7, 21, 7, 21, 21, 21,
+ 2, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 12,
+ 12, 12, 10, 10, 12, 10, 12, 12, 21, 21, 21, 21, 21, 21, 12, 2,
+ 7, 7, 7, 7, 7, 7, 7, 2, 7, 2, 7, 7, 7, 7, 2, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 21, 2, 2, 2, 2, 2, 2,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 12,
+ 10, 10, 10, 12, 12, 12, 12, 12, 12, 12, 12, 2, 2, 2, 2, 2,
+ 12, 12, 10, 10, 2, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 7,
+ 7, 2, 7, 7, 2, 7, 7, 7, 7, 7, 2, 12, 12, 7, 10, 10,
+ 12, 10, 10, 10, 10, 2, 2, 10, 10, 2, 2, 10, 10, 10, 2, 2,
+ 7, 2, 2, 2, 2, 2, 2, 10, 2, 2, 2, 2, 2, 7, 7, 7,
+ 7, 7, 10, 10, 2, 2, 12, 12, 12, 12, 12, 12, 12, 2, 2, 2,
+ 12, 12, 12, 12, 12, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 7, 7, 7, 7, 7, 10, 10, 10, 12, 12, 12, 12, 12, 12, 12, 12,
+ 10, 10, 12, 12, 12, 10, 12, 7, 7, 7, 7, 21, 21, 21, 21, 21,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 21, 21, 2, 21, 12, 7,
+ 10, 10, 10, 12, 12, 12, 12, 12, 12, 10, 12, 10, 10, 10, 10, 12,
+ 12, 10, 12, 12, 7, 7, 21, 7, 2, 2, 2, 2, 2, 2, 2, 2,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10,
+ 10, 10, 12, 12, 12, 12, 2, 2, 10, 10, 10, 10, 12, 12, 10, 12,
+ 12, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 7, 7, 7, 7, 12, 12, 2, 2,
+ 10, 10, 10, 12, 12, 12, 12, 12, 12, 12, 12, 10, 10, 12, 10, 12,
+ 12, 21, 21, 21, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 2, 2, 2,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 10, 12, 10, 10,
+ 12, 12, 12, 12, 12, 12, 10, 12, 7, 2, 2, 2, 2, 2, 2, 2,
+ 10, 10, 12, 12, 12, 12, 10, 12, 12, 12, 12, 12, 2, 2, 2, 2,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 15, 15, 21, 21, 21, 26,
+ 12, 12, 12, 12, 12, 12, 12, 12, 10, 12, 12, 21, 2, 2, 2, 2,
+ 15, 15, 15, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 7,
+ 7, 7, 7, 7, 7, 7, 7, 2, 2, 7, 2, 2, 7, 7, 7, 7,
+ 7, 7, 7, 7, 2, 7, 7, 2, 7, 7, 7, 7, 7, 7, 7, 7,
+ 10, 10, 10, 10, 10, 10, 2, 10, 10, 2, 2, 12, 12, 10, 12, 7,
+ 10, 7, 10, 12, 21, 21, 21, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 7, 7, 7, 7, 7, 7,
+ 7, 10, 10, 10, 12, 12, 12, 12, 2, 2, 12, 12, 10, 10, 10, 10,
+ 12, 7, 21, 7, 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 7, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 7, 7, 7, 7, 7,
+ 7, 7, 7, 12, 12, 12, 12, 12, 12, 10, 7, 12, 12, 12, 12, 21,
+ 21, 21, 21, 21, 21, 21, 21, 12, 2, 2, 2, 2, 2, 2, 2, 2,
+ 7, 12, 12, 12, 12, 12, 12, 10, 10, 12, 12, 12, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 10, 12, 12, 21, 21, 21, 7, 21, 21,
+ 21, 21, 21, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 12, 12, 12, 12, 12, 12, 12, 2, 12, 12, 12, 12, 12, 12, 10, 12,
+ 7, 21, 21, 21, 21, 21, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 21, 21, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 2, 2, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 2, 10, 12, 12, 12, 12, 12, 12,
+ 12, 10, 12, 12, 10, 12, 12, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, 2, 7, 7, 7, 7, 7,
+ 7, 12, 12, 12, 12, 12, 12, 2, 2, 2, 12, 2, 12, 12, 2, 12,
+ 12, 12, 12, 12, 12, 12, 7, 12, 2, 2, 2, 2, 2, 2, 2, 2,
+ 7, 7, 7, 7, 7, 7, 2, 7, 7, 2, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10, 2,
+ 12, 12, 2, 10, 10, 12, 10, 12, 7, 2, 2, 2, 2, 2, 2, 2,
+ 7, 7, 7, 12, 12, 10, 10, 21, 21, 2, 2, 2, 2, 2, 2, 2,
+ 15, 15, 15, 15, 15, 26, 26, 26, 26, 26, 26, 26, 26, 23, 23, 23,
+ 23, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 21,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 2,
+ 21, 21, 21, 21, 21, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
+ 12, 12, 12, 12, 12, 21, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 12, 12, 12, 12, 12, 12, 12, 21, 21, 21, 21, 21, 26, 26, 26, 26,
+ 6, 6, 6, 6, 21, 26, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 2, 15, 15, 15, 15, 15,
+ 15, 15, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 2, 7, 7, 7,
+ 15, 15, 15, 15, 15, 15, 15, 21, 21, 21, 21, 2, 2, 2, 2, 2,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 12,
+ 7, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 2, 2, 2, 2, 2, 2, 2, 12,
+ 12, 12, 12, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 21, 6, 12, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 10, 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 7, 7, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 7, 7, 7, 7, 2, 2, 2, 2, 2, 2, 2, 2,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 26, 12, 12, 21,
+ 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 26, 26, 26, 26, 26, 26, 26, 2, 2, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 10, 10, 12, 12, 12, 26, 26, 26, 10, 10, 10,
+ 10, 10, 10, 1, 1, 1, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12,
+ 12, 12, 12, 26, 26, 12, 12, 12, 12, 12, 12, 12, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 12, 12, 12, 12, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 2, 2, 2, 2, 2, 2, 2,
+ 26, 26, 12, 12, 12, 26, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 15, 15, 15, 15, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 5,
+ 5, 5, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 9, 9, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 9, 2, 9, 9,
+ 2, 2, 9, 2, 2, 9, 9, 2, 2, 9, 9, 9, 9, 2, 9, 9,
+ 9, 9, 9, 9, 9, 9, 5, 5, 5, 5, 2, 5, 2, 5, 5, 5,
+ 5, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 9, 9, 2, 9, 9, 9, 9, 2, 2, 9, 9, 9,
+ 9, 9, 9, 9, 9, 2, 9, 9, 9, 9, 9, 9, 9, 2, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 9, 9, 2, 9, 9, 9, 9, 2,
+ 9, 9, 9, 9, 9, 2, 9, 2, 2, 2, 9, 9, 9, 9, 9, 9,
+ 9, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 2, 2, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 25, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 25, 5, 5, 5, 5,
+ 5, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 25, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 25, 5, 5, 5, 5, 5, 5, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 25, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 25,
+ 5, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 25,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 25, 5, 5, 5, 5, 5, 5,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 25, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 25, 5, 5, 5, 5, 5, 5, 9, 5, 2, 2, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 12, 12, 12, 12, 12, 12, 12, 26, 26, 26, 26, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 26, 26, 26,
+ 26, 26, 26, 26, 26, 12, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 12, 26, 26, 21, 21, 21, 21, 21, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 2, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 2, 2, 12, 12, 12, 12, 12,
+ 12, 12, 2, 12, 12, 2, 12, 12, 12, 12, 12, 2, 2, 2, 2, 2,
+ 12, 12, 12, 12, 12, 12, 12, 6, 6, 6, 6, 6, 6, 6, 2, 2,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 2, 2, 2, 2, 7, 26,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 12, 12, 12,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 2, 2, 2, 2, 2, 23,
+ 7, 7, 7, 7, 7, 2, 2, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 12, 12, 12, 12, 12, 12, 12, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 5, 5, 5, 5, 12, 12, 12, 12, 12, 12, 12, 6, 2, 2, 2, 2,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 26, 15, 15, 15,
+ 23, 15, 15, 15, 15, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 26, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 2, 2,
+ 7, 7, 7, 7, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 2, 7, 7, 2, 7, 2, 2, 7, 2, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 2, 7, 7, 7, 7, 2, 7, 2, 7, 2, 2, 2, 2,
+ 2, 2, 7, 2, 2, 2, 2, 7, 2, 7, 2, 7, 2, 7, 7, 7,
+ 2, 7, 7, 2, 7, 2, 2, 7, 2, 7, 2, 7, 2, 7, 2, 7,
+ 2, 7, 7, 2, 7, 2, 2, 7, 7, 7, 7, 2, 7, 7, 7, 7,
+ 7, 7, 7, 2, 7, 7, 7, 7, 2, 7, 7, 7, 7, 2, 7, 2,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, 7, 7, 7,
+ 2, 7, 7, 7, 2, 7, 7, 7, 7, 7, 2, 7, 7, 7, 7, 7,
+ 25, 25, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 2, 2,
+ 2, 2, 2, 2, 2, 2, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 26, 26, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 24, 24, 24, 24, 24,
+ 26, 26, 26, 26, 26, 26, 26, 26, 2, 2, 2, 2, 2, 2, 2, 2,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 2, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 2, 26, 26, 26,
+ 26, 26, 26, 26, 26, 2, 2, 2, 26, 26, 26, 2, 2, 2, 2, 2,
+ 26, 26, 26, 2, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0,
+ 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 12, 12, 13, 14, 12, 15, 16, 17, 18, 19, 20,
+ 21, 22, 0, 0, 0, 0, 23, 0, 0, 0, 0, 0, 0, 0, 24, 25,
+ 0, 26, 27, 0, 28, 29, 30, 31, 32, 33, 0, 34, 0, 0, 0, 0,
+ 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 36, 37, 38, 0, 0, 0, 0,
+ 39, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 0, 0,
+ 43, 44, 45, 46, 0, 47, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 49, 0, 0, 0, 0, 0, 50, 0, 0, 0,
+ 0, 0, 0, 51, 0, 52, 53, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 54, 55, 0, 0, 0, 0, 56, 0, 0, 57, 58, 0,
+ 59, 60, 61, 62, 63, 64, 65, 0, 66, 67, 0, 68, 69, 70, 71, 0,
+ 60, 0, 72, 73, 74, 75, 0, 0, 69, 0, 76, 77, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 78, 79, 0, 0, 0, 0, 0, 0, 0, 0, 80,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 81, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 82, 83, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 85, 0, 79, 0, 0, 86, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 87, 88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+ 12, 1, 0, 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 14, 15, 16, 17, 18, 19, 20, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 1, 21, 0, 0, 0, 0, 0, 22, 23, 24,
+ 0, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, 27,
+ 28, 29, 0, 0, 0, 0, 30, 0, 0, 0, 31, 32, 33, 34, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 13, 35, 36, 0, 0, 26, 37, 38, 39, 0, 0, 0, 0, 0, 40,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 1,
+ 42, 43, 44, 45, 0, 0, 0, 0, 0, 0, 0, 46, 0, 47, 48, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 46, 0, 47, 0, 0,
+ 0, 0, 0, 49, 0, 0, 0, 0, 0, 0, 0, 46, 0, 47, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 47, 50, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 51, 0, 47, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, 0, 54, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 0, 56, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 57, 0, 0, 58, 59, 0, 0, 0, 0,
+ 0, 0, 60, 61, 62, 0, 0, 0, 0, 0, 0, 0, 63, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 65, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 66,
+ 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 67, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 68,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 69, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 70, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 71, 72, 0, 0, 0, 0, 0, 0, 0, 0,
+ 73, 0, 66, 74, 0, 0, 0, 0, 0, 0, 75, 76, 72, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 46, 0, 67, 0, 0, 0,
+ 0, 77, 78, 0, 0, 0, 0, 0, 0, 79, 0, 0, 0, 0, 0, 0,
+ 80, 0, 79, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 81, 82,
+ 83, 84, 85, 86, 0, 0, 0, 0, 0, 0, 0, 0, 87, 88, 89, 1,
+ 1, 1, 90, 91, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 92, 93,
+ 94, 95, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 71, 86, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 97, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 0, 0, 0, 0, 0, 98, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 99, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 71,100,101, 0, 0, 0, 26, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 86, 0,102, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0,
+ 1, 1, 86, 0, 0, 0, 0, 0, 0,103, 0, 0, 0, 0,104, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,105, 0, 73, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,106,107,108, 0, 0, 0,
+ 0, 0,102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 47, 0, 0, 0, 0, 0,109, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,110,111, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 26,112, 0,113, 0, 0, 0, 0, 0,114, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 115, 0, 0, 0, 0, 0, 0, 0,100, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,116, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,117,118, 72, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,102, 0, 0, 0,
+ 0, 0, 0, 97, 0, 0, 0, 0, 0, 0, 0,119, 0, 0, 0, 0,
+ 0, 0, 0, 0,112, 0, 0, 0, 0, 0, 51, 0, 0, 0, 0, 0,
+ 0, 0,105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 73,120, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,121, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0,122, 0, 0, 0, 0, 0, 0, 0, 0, 0,123, 0, 47, 0, 0,
+ 26,124,124, 0, 0, 0, 0, 0, 0, 0, 0, 0,125, 0, 0, 49,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,126, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 97,127, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 97, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,128, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,104, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,129,105, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 73, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67, 0, 97, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,130, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,131, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 97, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0,132, 0, 0, 0, 0, 0, 0, 0,133, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0,134, 0, 0, 0, 0,135, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 136,137,138,139,140,141, 0, 0, 0,142, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,143, 0, 0, 0,
+ 0, 0, 0, 0,133, 1, 1,144,145,112, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0,100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,146, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,100,147, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,230,230,230,230,
+ 230,230,230,230,230,230,230,230,230,232,220,220,220,220,232,216,
+ 220,220,220,220,220,202,202,220,220,220,220,202,202,220,220,220,
+ 220,220,220,220,220,220,220,220, 1, 1, 1, 1, 1,220,220,220,
+ 220,230,230,230,230,230,230,230,230,240,230,220,220,220,230,230,
+ 230,220,220, 0,230,230,230,220,220,220,220,230,232,220,220,230,
+ 233,234,234,233,234,234,233,230,230,230,230,230, 0, 0, 0,230,
+ 230,230,230,230, 0,220,230,230,230,230,220,230,230,230,222,220,
+ 230,230,230,230,230,230,220,220,220,220,220,220,230,230,220,230,
+ 230,222,228,230, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 19, 20,
+ 21, 22, 0, 23, 0, 24, 25, 0,230,220, 0, 18, 30, 31, 32, 0,
+ 0, 0, 0, 0, 0, 0, 0, 27, 28, 29, 30, 31, 32, 33, 34,230,
+ 230,220,220,230,230,230,230,230,220,230,230,220, 35, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,230,230,230,230,230,230,
+ 230, 0, 0,230,230,230,230,220,230, 0, 0,230,230, 0,220,230,
+ 230,220, 0, 0, 0, 36, 0, 0, 0, 0, 0, 0,230,220,230,230,
+ 220,230,230,220,220,220,230,220,220,230,220,230,230,230,220,230,
+ 220,230,220,230,220,230,230, 0, 0, 0, 0, 0,230,230,220,230,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,220, 0, 0,230,230, 0,230,
+ 230,230,230,230,230,230,230,230, 0,230,230,230, 0,230,230,230,
+ 230,230, 0, 0, 0,220,220,220, 0, 0, 0, 0, 0, 0, 0,220,
+ 230,230,230,230,230,230, 0,220,230,230,220,230,230,220,230,230,
+ 230,220,220,220, 27, 28, 29,230,230,230,220,230,230,220,220,230,
+ 230,230,230,230, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0,
+ 0, 9, 0, 0, 0,230,220,230,230, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0,230, 0, 0, 0, 0, 0, 0, 84, 91, 0, 0, 0, 0, 9,
+ 9, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0,103,103, 9, 0,
+ 0, 0, 0, 0,107,107,107,107, 0, 0, 0, 0,118,118, 9, 0,
+ 0, 0, 0, 0,122,122,122,122, 0, 0, 0, 0,220,220, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,220, 0,220, 0,216, 0, 0,
+ 0, 0, 0, 0, 0,129,130, 0,132, 0, 0, 0, 0, 0,130,130,
+ 130,130, 0, 0,130, 0,230,230, 9, 0,230,230, 0, 0, 0, 0,
+ 0, 0,220, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 9, 9, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,230,230,230, 0, 0, 0, 0,
+ 9, 0, 0, 0, 0, 0, 0, 0, 0,230, 0, 0, 0,228, 0, 0,
+ 0, 0, 0, 0, 0,222,230,220, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0,230,220, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0,
+ 0, 0, 0, 0,230,230,230,230,230, 0, 0,220,230,230,230,230,
+ 230,220,220,220,220,220,220,230,230,220, 0,220, 0, 0, 0,230,
+ 220,230,230,230,230,230,230,230, 0, 0, 0, 0, 0, 0, 9, 9,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0,230,230,230, 0,
+ 1,220,220,220,220,220,230,230,220,220,220,220,230, 0, 1, 1,
+ 1, 1, 1, 1, 1, 0, 0, 0, 0,220, 0, 0, 0, 0, 0, 0,
+ 230, 0, 0, 0,230,230, 0, 0, 0, 0, 0, 0,230,230,220,230,
+ 230,230,230,230,230,230,220,230,230,234,214,220,202,230,230,230,
+ 230,230,230,230,230,230,230,230,230,230,232,228,228,220, 0,230,
+ 233,220,230,220,230,230, 1, 1,230,230,230,230, 1, 1, 1,230,
+ 230, 0, 0, 0, 0,230, 0, 0, 0, 1, 1,230,220,230, 1, 1,
+ 220,220,220,220,230, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 9, 0, 0,218,228,232,222,224,224, 0, 8, 8, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,230,230,230,230,230,230,230,230,
+ 230,230, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0,220,
+ 220,220, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 7,
+ 0, 0, 0, 0,230, 0,230,230,220, 0, 0,230,230, 0, 0, 0,
+ 0, 0,230,230, 0,230, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 26, 0,230,230,230,230,230,230,230,220,220,220,220,220,
+ 220,220,230,230,230,230,230, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0,220, 0,230,230, 1,220, 0, 0, 0, 0, 9, 0, 0, 0, 0,
+ 0,230,220, 0, 0, 0, 0,230,230, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0,220,220,230,230,230,220,230,220,220,220, 0, 9, 7, 0,
+ 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 9, 7, 0, 0, 7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 7,
+ 7, 0, 0, 0,230,230,230,230,230, 0, 0, 0, 0, 0, 9, 0,
+ 0, 0, 7, 0, 0, 0, 9, 7, 0, 0, 0, 0, 7, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 7, 0, 0, 0, 0,
+ 0, 9, 9, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0,
+ 9, 9, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0,230,230,230,230,
+ 230,230,230, 0, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 1, 0, 0, 0, 0, 0, 0,216,216, 1, 1, 1, 0, 0,
+ 0,226,216,216,216,216,216, 0, 0, 0, 0, 0, 0, 0, 0,220,
+ 220,220,220,220,220,220,220, 0, 0,230,230,230,230,230,220,220,
+ 0, 0, 0, 0, 0, 0,230,230,230,230, 0, 0, 0, 0,230,230,
+ 230, 0, 0, 0,230, 0, 0,230,230,230,230,230,230,230, 0,230,
+ 230, 0,230,230,220,220,220,220,220,220,220, 0,230,230, 7, 0,
+ 0, 0, 0, 0, 16, 17, 17, 17, 17, 17, 17, 33, 17, 17, 17, 19,
+ 17, 17, 17, 17, 20,101, 17,113,129,169, 17, 27, 28, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17,237, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
+ 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 3, 4, 0, 0, 0, 0,
+ 0, 0, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0,
+ 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 7, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 10, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 10, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 11, 12, 0, 13, 0, 14, 15, 16, 0, 0,
+ 0, 0, 0, 1, 17, 18, 0, 19, 7, 1, 0, 0, 0, 20, 20, 7,
+ 20, 20, 20, 20, 20, 20, 20, 8, 21, 0, 22, 0, 7, 23, 24, 0,
+ 20, 20, 25, 0, 0, 0, 26, 27, 1, 7, 20, 20, 20, 20, 20, 1,
+ 28, 29, 30, 31, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 10, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 20,
+ 20, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 8, 21, 32, 4, 0, 10, 0, 33, 7, 20, 20, 20,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 8, 34, 34, 35, 36, 34, 37, 0, 38, 1, 20, 20,
+ 0, 0, 39, 0, 1, 1, 0, 8, 21, 1, 20, 0, 0, 0, 1, 0,
+ 0, 40, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 21,
+ 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
+ 0, 0, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 21, 7, 20, 41,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 21, 0, 42, 43, 44, 0, 45,
+ 0, 8, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 46, 7, 1, 10, 1, 0, 0, 0, 1, 20, 20, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 20, 20, 1, 20, 20, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, 21, 0, 1, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0,
+ 0, 0, 3, 4, 0, 0, 0, 0, 0, 0, 3, 47, 48, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
+ 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 3, 4, 0, 0, 0, 0,
+ 0, 0, 3, 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+ 12, 13, 14, 15, 16, 17, 18, 17, 19, 20, 21, 22, 23, 24, 25, 25,
+ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 25, 25, 25, 25, 25,
+ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 25, 25, 25, 27, 28, 28, 29, 30, 31, 32, 33, 33, 33, 33, 33, 33,
+ 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
+ 52, 53, 54, 55, 56, 57, 58, 35, 35, 35, 35, 35, 59, 59, 60, 35,
+ 35, 35, 35, 35, 35, 35, 61, 62, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 63, 64, 35, 65, 66, 66, 66, 66,
+ 66, 66, 66, 66, 66, 66, 66, 67, 66, 68, 69, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 70, 71, 35, 35,
+ 35, 35, 72, 35, 35, 35, 35, 35, 35, 35, 35, 35, 73, 74, 75, 76,
+ 77, 78, 35, 35, 79, 80, 35, 35, 81, 35, 82, 83, 84, 85, 17, 86,
+ 87, 88, 35, 35, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 25, 25, 25, 25, 25, 25, 25, 89, 25, 25, 25, 25, 25, 25, 25, 90,
+ 91, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 92, 25, 25, 25, 25,
+ 25, 25, 25, 25, 25, 25, 25, 25, 25, 93, 35, 35, 35, 35, 35, 35,
+ 25, 94, 35, 35, 25, 25, 25, 25, 25, 25, 25, 25, 25, 95, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 19, 0, 0, 0, 0, 0, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 0, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 0,
+ 0, 0, 0, 0, 0, 0, 19, 19, 19, 19, 19, 0, 0, 0, 0, 0,
+ 26, 26, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 0, 9, 9, 9, 2, 2,
+ 9, 9, 9, 9, 0, 9, 2, 2, 2, 2, 9, 0, 9, 0, 9, 9,
+ 9, 2, 9, 2, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 2, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 55, 55, 55, 55, 55, 55, 55, 55,
+ 55, 55, 55, 55, 55, 55, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 1, 1, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 2, 2, 4, 4, 4, 2, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 2, 2,
+ 2, 2, 2, 2, 2, 2, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 2, 2, 2, 2, 14, 14, 14, 14, 14, 14, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3,
+ 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 0, 3, 2, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 0, 3, 3, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+ 37, 37, 37, 37, 2, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+ 37, 2, 2, 37, 37, 37, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38, 38, 38, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 2, 2, 64, 64, 64, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90,
+ 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90,
+ 90, 90, 90, 90, 2, 2, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90,
+ 90, 90, 90, 90, 90, 2, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,
+ 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,
+ 95, 95, 2, 2, 95, 2, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+ 37, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 1, 1, 1, 1, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 5, 5, 5, 5, 2, 5, 5, 5, 5, 5,
+ 5, 5, 5, 2, 2, 5, 5, 2, 2, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2,
+ 5, 5, 5, 5, 5, 5, 5, 2, 5, 2, 2, 2, 5, 5, 5, 5,
+ 2, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 5, 5, 2,
+ 2, 5, 5, 5, 5, 2, 2, 2, 2, 2, 2, 2, 2, 5, 2, 2,
+ 2, 2, 5, 5, 2, 5, 5, 5, 5, 5, 2, 2, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 2, 2, 11, 11, 11, 2, 11, 11, 11, 11, 11,
+ 11, 2, 2, 2, 2, 11, 11, 2, 2, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 2,
+ 11, 11, 11, 11, 11, 11, 11, 2, 11, 11, 2, 11, 11, 2, 11, 11,
+ 2, 2, 11, 2, 11, 11, 11, 11, 11, 2, 2, 2, 2, 11, 11, 2,
+ 2, 11, 11, 11, 2, 2, 2, 11, 2, 2, 2, 2, 2, 2, 2, 11,
+ 11, 11, 11, 2, 11, 2, 2, 2, 2, 2, 2, 2, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 10, 10, 10, 2, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 2, 10, 10, 10, 2, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 2,
+ 10, 10, 10, 10, 10, 10, 10, 2, 10, 10, 2, 10, 10, 10, 10, 10,
+ 2, 2, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 2, 10, 10, 10,
+ 2, 10, 10, 10, 2, 2, 10, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 10, 10, 10, 10, 2, 2, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 2, 2, 2, 2, 2, 2, 2, 10,
+ 10, 10, 10, 10, 10, 10, 2, 21, 21, 21, 2, 21, 21, 21, 21, 21,
+ 21, 21, 21, 2, 2, 21, 21, 2, 2, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 2,
+ 21, 21, 21, 21, 21, 21, 21, 2, 21, 21, 2, 21, 21, 21, 21, 21,
+ 2, 2, 21, 21, 21, 21, 21, 21, 21, 21, 21, 2, 2, 21, 21, 2,
+ 2, 21, 21, 21, 2, 2, 2, 2, 2, 2, 2, 21, 21, 21, 2, 2,
+ 2, 2, 21, 21, 2, 21, 21, 21, 21, 21, 2, 2, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 22, 22, 2, 22, 22, 22, 22, 22,
+ 22, 2, 2, 2, 22, 22, 22, 2, 22, 22, 22, 22, 2, 2, 2, 22,
+ 22, 2, 22, 2, 22, 22, 2, 2, 2, 22, 22, 2, 2, 2, 22, 22,
+ 22, 2, 2, 2, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+ 2, 2, 2, 2, 22, 22, 22, 22, 22, 2, 2, 2, 22, 22, 22, 2,
+ 22, 22, 22, 22, 2, 2, 22, 2, 2, 2, 2, 2, 2, 22, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 22, 22, 22, 22,
+ 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+ 22, 2, 2, 2, 2, 2, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
+ 23, 23, 23, 2, 23, 23, 23, 2, 23, 23, 23, 23, 23, 23, 23, 23,
+ 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 2,
+ 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
+ 2, 2, 2, 23, 23, 23, 23, 23, 23, 23, 23, 2, 23, 23, 23, 2,
+ 23, 23, 23, 23, 2, 2, 2, 2, 2, 2, 2, 23, 23, 2, 23, 23,
+ 23, 2, 2, 2, 2, 2, 23, 23, 23, 23, 2, 2, 23, 23, 23, 23,
+ 23, 23, 23, 23, 23, 23, 2, 2, 2, 2, 2, 2, 2, 23, 23, 23,
+ 23, 23, 23, 23, 23, 23, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 2, 16, 16, 16, 2, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 16, 16, 16, 16, 16,
+ 2, 2, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 16, 16, 16, 2,
+ 16, 16, 16, 16, 2, 2, 2, 2, 2, 2, 2, 16, 16, 2, 2, 2,
+ 2, 2, 2, 2, 16, 2, 16, 16, 16, 16, 2, 2, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 2, 16, 16, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 2, 20, 20, 20, 2, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 2, 20, 20, 20, 2,
+ 20, 20, 20, 20, 20, 20, 2, 2, 2, 2, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 2, 2, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 2, 36, 36, 36, 2, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 2, 2, 2,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 2, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 2, 36, 2, 2, 36, 36, 36, 36, 36, 36, 36, 2, 2, 2,
+ 36, 2, 2, 2, 2, 36, 36, 36, 36, 36, 36, 2, 36, 2, 36, 36,
+ 36, 36, 36, 36, 36, 36, 2, 2, 2, 2, 2, 2, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 2, 2, 36, 36, 36, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 2, 2, 2, 2, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 2, 2, 2, 2, 2, 18, 18, 2, 18, 2, 18, 18, 18, 18,
+ 18, 2, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 2, 18, 2, 18, 18, 18,
+ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+ 18, 18, 18, 18, 2, 2, 18, 18, 18, 18, 18, 2, 18, 2, 18, 18,
+ 18, 18, 18, 18, 2, 2, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+ 2, 2, 18, 18, 18, 18, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 2, 25,
+ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 25, 25, 25, 2, 2, 2, 2, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 25, 25, 25, 2, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 25,
+ 25, 2, 2, 2, 2, 2, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
+ 33, 33, 33, 33, 33, 33, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2, 8, 2, 2,
+ 2, 2, 2, 8, 2, 2, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 0, 8, 8, 8, 8, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+ 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 2,
+ 30, 30, 30, 30, 2, 2, 30, 30, 30, 30, 30, 30, 30, 2, 30, 2,
+ 30, 30, 30, 30, 2, 2, 30, 2, 30, 30, 30, 30, 2, 2, 30, 30,
+ 30, 30, 30, 30, 30, 2, 30, 2, 30, 30, 30, 30, 2, 2, 30, 30,
+ 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 2, 30, 30,
+ 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+ 30, 2, 2, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+ 30, 30, 30, 2, 2, 2, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+ 2, 2, 2, 2, 2, 2, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+ 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 2, 2, 29, 29,
+ 29, 29, 29, 29, 2, 2, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+ 28, 28, 28, 28, 28, 28, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
+ 34, 34, 34, 2, 2, 2, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 0, 0, 0, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 2,
+ 2, 2, 2, 2, 2, 2, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 45, 45, 45, 2, 45, 45, 45, 45, 45, 45, 45, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
+ 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 0, 0, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 46, 2, 46, 46, 46, 2, 46, 46, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
+ 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
+ 31, 31, 31, 31, 2, 2, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
+ 2, 2, 2, 2, 2, 2, 32, 32, 0, 0, 32, 0, 32, 32, 32, 32,
+ 32, 32, 32, 32, 32, 2, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 2, 2, 2, 2, 2, 2, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 2,
+ 2, 2, 2, 2, 2, 2, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 2, 2, 2, 2, 2, 28, 28, 28, 28, 28, 28, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 2, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 2, 2, 2, 2, 48, 2, 2, 2, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52,
+ 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52,
+ 52, 52, 52, 52, 2, 2, 52, 52, 52, 52, 52, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58,
+ 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58,
+ 58, 58, 2, 2, 2, 2, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58,
+ 2, 2, 2, 2, 2, 2, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58,
+ 58, 2, 2, 2, 58, 58, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54,
+ 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54,
+ 54, 54, 2, 2, 54, 54, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91,
+ 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91,
+ 91, 91, 91, 91, 91, 2, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91,
+ 91, 91, 91, 2, 2, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91,
+ 2, 2, 2, 2, 2, 2, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91,
+ 91, 91, 91, 91, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 2, 2, 2, 2, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 2, 2, 2, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76,
+ 76, 76, 76, 76, 76, 76, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93,
+ 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 2, 2, 2, 2, 2, 2,
+ 2, 2, 93, 93, 93, 93, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
+ 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 2, 2,
+ 2, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
+ 2, 2, 2, 70, 70, 70, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
+ 73, 73, 73, 73, 73, 73, 6, 6, 6, 6, 6, 6, 6, 6, 6, 2,
+ 2, 2, 2, 2, 2, 2, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 2, 2, 8, 8, 8, 76, 76, 76, 76, 76, 76, 76, 76, 2, 2,
+ 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0,
+ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,
+ 0, 2, 2, 2, 2, 2, 19, 19, 19, 19, 19, 19, 9, 9, 9, 9,
+ 9, 6, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 9, 9, 9, 9, 9, 19, 19, 19, 19, 9, 9, 9, 9,
+ 9, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 6, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 2, 2, 9, 9,
+ 9, 9, 9, 9, 2, 2, 9, 9, 9, 9, 9, 9, 9, 9, 2, 9,
+ 2, 9, 2, 9, 2, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 2, 2, 9, 9, 9, 9, 9, 2, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2, 2, 9, 9, 9, 9,
+ 9, 9, 2, 9, 9, 9, 2, 2, 9, 9, 9, 2, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 19, 2, 2, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 2, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 2, 2, 2, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0,
+ 19, 19, 0, 0, 0, 0, 0, 0, 19, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 19, 0, 19, 19, 19, 19, 19, 19, 19, 19, 19, 0,
+ 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 2, 2, 2, 2, 2, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+ 27, 27, 27, 27, 27, 27, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
+ 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
+ 56, 56, 56, 56, 56, 2, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
+ 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 2, 2, 2, 2, 2, 55,
+ 55, 55, 55, 55, 55, 55, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61,
+ 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 2, 2,
+ 2, 2, 2, 2, 2, 61, 61, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 61, 30, 30, 30, 30, 30, 30, 30, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 30, 30, 30, 30, 30, 30, 30, 2, 30, 30,
+ 30, 30, 30, 30, 30, 2, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 2, 13, 13, 13, 13, 13, 13, 13, 13, 13, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 13, 13, 13, 13, 13, 13, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 13, 0, 13, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 1, 1, 1, 1, 12, 12, 0, 0, 0, 0, 0, 0, 0, 0, 13, 13,
+ 13, 13, 0, 0, 0, 0, 2, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 2, 2, 1,
+ 1, 0, 0, 15, 15, 15, 0, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 0, 0, 17, 17, 17, 2, 2, 2, 2, 2, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 2, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 2, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 0, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 0, 17, 17, 17, 17, 17, 17, 17, 17, 0, 0,
+ 0, 0, 0, 0, 0, 0, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 2, 2, 2, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39,
+ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39,
+ 39, 39, 39, 2, 2, 2, 39, 39, 39, 39, 39, 39, 39, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86,
+ 86, 86, 86, 86, 86, 86, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77,
+ 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77,
+ 77, 77, 2, 2, 2, 2, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79,
+ 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 2, 2,
+ 2, 2, 2, 2, 2, 2, 0, 0, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 0, 0,
+ 0, 19, 19, 19, 19, 19, 2, 2, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 2, 2, 2, 2, 2, 2, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65,
+ 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 2, 2,
+ 2, 2, 2, 2, 2, 2, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75,
+ 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 2, 2, 2, 2,
+ 2, 2, 2, 2, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75,
+ 2, 2, 2, 2, 2, 2, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69,
+ 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69,
+ 69, 69, 69, 69, 0, 69, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
+ 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 74, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 2, 2, 2, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84,
+ 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84,
+ 84, 84, 84, 84, 2, 0, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84,
+ 2, 2, 2, 2, 84, 84, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
+ 33, 33, 33, 33, 33, 2, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+ 68, 68, 68, 68, 2, 2, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+ 2, 2, 68, 68, 68, 68, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92,
+ 92, 92, 92, 92, 92, 92, 92, 92, 92, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 92, 92, 92, 92, 92, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87,
+ 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 30, 30, 30, 30, 30, 30, 2, 2, 30,
+ 30, 30, 30, 30, 30, 2, 2, 30, 30, 30, 30, 30, 30, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 0, 19, 19, 19, 19, 19, 19, 19, 19, 19, 9, 19, 19, 19, 19,
+ 0, 0, 2, 2, 2, 2, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87,
+ 87, 87, 87, 87, 2, 2, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87,
+ 2, 2, 2, 2, 2, 2, 12, 12, 12, 12, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 12, 12, 12, 12, 12, 12, 12, 2, 2, 2,
+ 2, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 2, 2, 2, 2, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 2, 2, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 2, 2, 2, 2, 2, 2, 19, 19, 19, 19, 19, 19, 19, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 2, 2,
+ 2, 2, 2, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 2, 14, 14,
+ 14, 14, 14, 2, 14, 2, 14, 14, 2, 14, 14, 2, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 0, 0, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 6, 6, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
+ 0, 0, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 2, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 0, 0, 2, 2, 12, 12, 12, 12, 12, 12, 2, 2,
+ 12, 12, 12, 12, 12, 12, 2, 2, 12, 12, 12, 12, 12, 12, 2, 2,
+ 12, 12, 12, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
+ 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0,
+ 0, 0, 0, 0, 2, 2, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49,
+ 49, 49, 2, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49,
+ 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 2, 49, 49,
+ 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49,
+ 49, 2, 49, 49, 2, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49,
+ 49, 49, 49, 49, 2, 2, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49,
+ 49, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 2, 2, 2, 9, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 1, 2, 2, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
+ 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
+ 71, 71, 71, 2, 2, 2, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67,
+ 67, 67, 67, 67, 67, 67, 67, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
+ 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 42, 42, 42, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
+ 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
+ 41, 2, 2, 2, 2, 2,118,118,118,118,118,118,118,118,118,118,
+ 118,118,118,118,118,118,118,118,118,118,118,118,118,118,118,118,
+ 118, 2, 2, 2, 2, 2, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53,
+ 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53,
+ 53, 53, 53, 53, 2, 53, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 2, 2, 2, 2, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+ 40, 40, 40, 40, 40, 40, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51,
+ 51, 51, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 2, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 2, 2, 2, 2, 2, 2,135,135,135,135,135,135,135,135,135,135,
+ 135,135,135,135,135,135,135,135,135,135, 2, 2, 2, 2,135,135,
+ 135,135,135,135,135,135,135,135,135,135,135,135,135,135,135,135,
+ 135,135, 2, 2, 2, 2,106,106,106,106,106,106,106,106,106,106,
+ 106,106,106,106,106,106,106,106,106,106,106,106,106,106, 2, 2,
+ 2, 2, 2, 2, 2, 2,104,104,104,104,104,104,104,104,104,104,
+ 104,104,104,104,104,104,104,104,104,104, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2,104,110,110,110,110,110,110,110,110,110,110,
+ 110,110,110,110,110,110,110,110,110,110,110,110,110, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2,110,110,110,110,110,110, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2,110,110,110,110,110,110,110,110, 2, 2,
+ 2, 2, 2, 2, 2, 2, 47, 47, 47, 47, 47, 47, 2, 2, 47, 2,
+ 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
+ 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 2, 47, 47, 2,
+ 2, 2, 47, 2, 2, 47, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81,
+ 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 2, 81, 81, 81,
+ 81, 81, 81, 81, 81, 81,120,120,120,120,120,120,120,120,120,120,
+ 120,120,120,120,120,120,116,116,116,116,116,116,116,116,116,116,
+ 116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+ 116,116,116,116,116, 2, 2, 2, 2, 2, 2, 2, 2,116,116,116,
+ 116,116,116,116,116,116,128,128,128,128,128,128,128,128,128,128,
+ 128,128,128,128,128,128,128,128,128, 2,128,128, 2, 2, 2, 2,
+ 2,128,128,128,128,128, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
+ 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
+ 66, 66, 2, 2, 2, 66, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 2, 2, 2, 2, 2, 72, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98,
+ 98, 98, 98, 98, 98, 98, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 2, 2,
+ 2, 2, 97, 97, 97, 97, 2, 2, 97, 97, 97, 97, 97, 97, 97, 97,
+ 97, 97, 97, 97, 97, 97, 57, 57, 57, 57, 2, 57, 57, 2, 2, 2,
+ 2, 2, 57, 57, 57, 57, 57, 57, 57, 57, 2, 57, 57, 57, 2, 57,
+ 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57,
+ 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 2, 2, 57, 57,
+ 57, 2, 2, 2, 2, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 2,
+ 2, 2, 2, 2, 2, 2, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88,
+ 88, 88, 88, 88, 88, 88,117,117,117,117,117,117,117,117,117,117,
+ 117,117,117,117,117,117,112,112,112,112,112,112,112,112,112,112,
+ 112,112,112,112,112,112,112,112,112,112,112,112,112, 2, 2, 2,
+ 2,112,112,112,112,112,112,112,112,112,112,112,112, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 2, 2, 2, 78,
+ 78, 78, 78, 78, 78, 78, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83,
+ 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 2, 2, 83, 83,
+ 83, 83, 83, 83, 83, 83, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82,
+ 82, 82, 82, 82, 82, 82, 82, 82, 82, 2, 2, 2, 2, 2, 82, 82,
+ 82, 82, 82, 82, 82, 82,122,122,122,122,122,122,122,122,122,122,
+ 122,122,122,122,122,122,122,122, 2, 2, 2, 2, 2, 2, 2,122,
+ 122,122,122, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,122,
+ 122,122,122,122,122,122, 89, 89, 89, 89, 89, 89, 89, 89, 89, 89,
+ 89, 89, 89, 89, 89, 89, 89, 89, 89, 89, 89, 89, 89, 89, 89, 2,
+ 2, 2, 2, 2, 2, 2,130,130,130,130,130,130,130,130,130,130,
+ 130,130,130,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2,130,130,130, 2, 2, 2, 2, 2, 2, 2,
+ 130,130,130,130,130,130,144,144,144,144,144,144,144,144,144,144,
+ 144,144,144,144,144,144,144,144,144,144,144,144,144,144, 2, 2,
+ 2, 2, 2, 2, 2, 2,144,144,144,144,144,144,144,144,144,144,
+ 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 2,156,156,156,156,156,156,156,156,156,156,
+ 156,156,156,156,156,156,156,156,156,156,156,156,156,156,156,156,
+ 2,156,156,156, 2, 2,156,156, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2,147,147,147,147,147,147,147,147,147,147,
+ 147,147,147,147,147,147,147,147,147,147,147,147,147,147, 2, 2,
+ 2, 2, 2, 2, 2, 2,148,148,148,148,148,148,148,148,148,148,
+ 148,148,148,148,148,148,148,148,148,148,148,148,148,148,148,148,
+ 2, 2, 2, 2, 2, 2,153,153,153,153,153,153,153,153,153,153,
+ 153,153,153,153,153,153,153,153,153,153,153,153,153,153,153,153,
+ 153,153, 2, 2, 2, 2,149,149,149,149,149,149,149,149,149,149,
+ 149,149,149,149,149,149,149,149,149,149,149,149,149, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94,
+ 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94,
+ 94, 94, 94, 94, 2, 2, 2, 2, 94, 94, 94, 94, 94, 94, 94, 94,
+ 94, 94, 94, 94, 94, 94, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 94, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
+ 85, 85, 85, 85, 85, 85, 85, 85, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 85, 2, 2,101,101,101,101,101,101,101,101,101,101,
+ 101,101,101,101,101,101,101,101,101,101,101,101,101,101,101, 2,
+ 2, 2, 2, 2, 2, 2,101,101,101,101,101,101,101,101,101,101,
+ 2, 2, 2, 2, 2, 2, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 2, 96, 96, 96, 96,
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 2, 2,
+ 2, 2, 2, 2, 2, 2,111,111,111,111,111,111,111,111,111,111,
+ 111,111,111,111,111,111,111,111,111,111,111,111,111, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2,100,100,100,100,100,100,100,100,100,100,
+ 100,100,100,100,100,100, 2, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2,108,108,108,108,108,108,108,108,108,108,
+ 108,108,108,108,108,108,108,108, 2,108,108,108,108,108,108,108,
+ 108,108,108,108,108,108,108,108,108,108,108,108,108,108,108,108,
+ 108,108,108,108,108, 2,129,129,129,129,129,129,129, 2,129, 2,
+ 129,129,129,129, 2,129,129,129,129,129,129,129,129,129,129,129,
+ 129,129,129,129, 2,129,129,129,129,129,129,129,129,129,129,129,
+ 2, 2, 2, 2, 2, 2,109,109,109,109,109,109,109,109,109,109,
+ 109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
+ 109, 2, 2, 2, 2, 2,109,109,109,109,109,109,109,109,109,109,
+ 2, 2, 2, 2, 2, 2,107,107,107,107, 2,107,107,107,107,107,
+ 107,107,107, 2, 2,107,107, 2, 2,107,107,107,107,107,107,107,
+ 107,107,107,107,107,107,107,107,107,107,107,107,107,107,107, 2,
+ 107,107,107,107,107,107,107, 2,107,107, 2,107,107,107,107,107,
+ 2, 1,107,107,107,107,107,107,107,107,107, 2, 2,107,107, 2,
+ 2,107,107,107, 2, 2,107, 2, 2, 2, 2, 2, 2,107, 2, 2,
+ 2, 2, 2,107,107,107,107,107,107,107, 2, 2,107,107,107,107,
+ 107,107,107, 2, 2, 2,107,107,107,107,107, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2,137,137,137,137,137,137,137,137,137,137,
+ 137,137,137,137,137,137,137,137,137,137,137,137,137,137,137,137,
+ 137,137, 2,137,137,137,137,137, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2,124,124,124,124,124,124,124,124,124,124,
+ 124,124,124,124,124,124,124,124,124,124,124,124,124,124, 2, 2,
+ 2, 2, 2, 2, 2, 2,124,124,124,124,124,124,124,124,124,124,
+ 2, 2, 2, 2, 2, 2,123,123,123,123,123,123,123,123,123,123,
+ 123,123,123,123,123,123,123,123,123,123,123,123, 2, 2,123,123,
+ 123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,
+ 123,123,123,123, 2, 2,114,114,114,114,114,114,114,114,114,114,
+ 114,114,114,114,114,114,114,114,114,114,114, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2,114,114,114,114,114,114,114,114,114,114,
+ 2, 2, 2, 2, 2, 2, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, 2, 2, 2,102,102,102,102,102,102,102,102,102,102,
+ 102,102,102,102,102,102,102,102,102,102,102,102,102,102,102, 2,
+ 2, 2, 2, 2, 2, 2,102,102,102,102,102,102,102,102,102,102,
+ 2, 2, 2, 2, 2, 2,126,126,126,126,126,126,126,126,126,126,
+ 126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,
+ 126, 2, 2,126,126,126,126,126,126,126,126,126,126,126,126,126,
+ 126,126, 2, 2, 2, 2,142,142,142,142,142,142,142,142,142,142,
+ 142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,
+ 142,142, 2, 2, 2, 2,125,125,125,125,125,125,125,125,125,125,
+ 125,125,125,125,125,125,125,125,125, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2,125,154,154,154,154,154,154,154, 2, 2,154,
+ 2, 2,154,154,154,154,154,154,154,154, 2,154,154, 2,154,154,
+ 154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,
+ 154,154,154,154,154,154,154,154,154,154,154,154, 2,154,154, 2,
+ 2,154,154,154,154,154,154,154,154,154,154,154,154, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2,154,154,154,154,154,154,154,154,154,154,
+ 2, 2, 2, 2, 2, 2,150,150,150,150,150,150,150,150, 2, 2,
+ 150,150,150,150,150,150,150,150,150,150,150,150,150,150,150,150,
+ 150,150,150,150,150,150,150,150,150,150,150, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2,141,141,141,141,141,141,141,141,141,141,
+ 141,141,141,141,141,141,141,141,141,141,141,141,141,141, 2, 2,
+ 2, 2, 2, 2, 2, 2,140,140,140,140,140,140,140,140,140,140,
+ 140,140,140,140,140,140,140,140,140, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2,121,121,121,121,121,121,121,121,121,121,
+ 121,121,121,121,121,121,121,121,121,121,121,121,121,121,121, 2,
+ 2, 2, 2, 2, 2, 2,133,133,133,133,133,133,133,133,133, 2,
+ 133,133,133,133,133,133,133,133,133,133,133,133,133,133,133,133,
+ 133,133,133,133,133,133,133,133,133,133,133,133,133, 2,133,133,
+ 133,133,133,133,133,133,133,133,133,133,133,133, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2,133,133,133,133,133,133,133,133,133,133,
+ 133,133,133, 2, 2, 2,134,134,134,134,134,134,134,134,134,134,
+ 134,134,134,134,134,134, 2, 2,134,134,134,134,134,134,134,134,
+ 134,134,134,134,134,134,134,134,134,134,134,134,134,134, 2,134,
+ 134,134,134,134,134,134,134,134,134,134,134,134,134, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2,138,138,138,138,138,138,138, 2,138,138,
+ 2,138,138,138,138,138,138,138,138,138,138,138,138,138,138,138,
+ 138,138,138,138,138,138,138,138,138,138,138,138,138, 2, 2, 2,
+ 138, 2,138,138, 2,138,138,138,138,138,138,138,138,138, 2, 2,
+ 2, 2, 2, 2, 2, 2,138,138,138,138,138,138,138,138,138,138,
+ 2, 2, 2, 2, 2, 2,143,143,143,143,143,143, 2,143,143, 2,
+ 143,143,143,143,143,143,143,143,143,143,143,143,143,143,143,143,
+ 143,143,143,143,143,143,143,143,143,143,143,143,143,143,143,143,
+ 143,143,143,143,143, 2,143,143, 2,143,143,143,143,143,143, 2,
+ 2, 2, 2, 2, 2, 2,143,143,143,143,143,143,143,143,143,143,
+ 2, 2, 2, 2, 2, 2,145,145,145,145,145,145,145,145,145,145,
+ 145,145,145,145,145,145,145,145,145,145,145,145,145,145,145, 2,
+ 2, 2, 2, 2, 2, 2, 86, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+ 22, 22, 22, 22, 22, 22, 22, 22, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 22, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
+ 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
+ 2, 2, 2, 2, 2, 2, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
+ 63, 63, 63, 63, 63, 2, 63, 63, 63, 63, 63, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 63, 63, 63, 63, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 2, 80, 80, 80, 80, 80, 80, 80, 80, 80, 2,
+ 2, 2, 2, 2, 2, 2,127,127,127,127,127,127,127,127,127,127,
+ 127,127,127,127,127,127,127,127,127,127,127,127,127, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 79, 79, 79, 79, 79, 79, 79, 79, 79, 2,
+ 2, 2, 2, 2, 2, 2,115,115,115,115,115,115,115,115,115,115,
+ 115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+ 115,115,115,115,115, 2,115,115,115,115,115,115,115,115,115,115,
+ 2, 2, 2, 2,115,115,103,103,103,103,103,103,103,103,103,103,
+ 103,103,103,103,103,103,103,103,103,103,103,103,103,103,103,103,
+ 103,103,103,103, 2, 2,103,103,103,103,103,103, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2,119,119,119,119,119,119,119,119,119,119,
+ 119,119,119,119,119,119,119,119,119,119,119,119, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2,119,119,119,119,119,119,119,119,119,119,
+ 2,119,119,119,119,119,119,119, 2,119,119,119,119,119,119,119,
+ 119,119,119,119,119,119,119,119,119,119,119,119,119,119, 2, 2,
+ 2, 2, 2,119,119,119,146,146,146,146,146,146,146,146,146,146,
+ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,
+ 146, 2, 2, 2, 2, 2, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
+ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
+ 99, 2, 2, 2, 2, 99, 99, 99, 99, 99, 99, 99, 99, 99, 2, 2,
+ 2, 2, 2, 2, 2, 99,136,139, 0, 0,155, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 13, 13, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2,136,136,136,136,136,136,136,136,136,136,
+ 136,136,136,136,136,136,136,136,136,136,136,136,136,136, 2, 2,
+ 2, 2, 2, 2, 2, 2,155,155,155,155,155,155,155,155,155,155,
+ 155,155,155,155,155,155,155,155,155,155,155,155, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2,136,136,136,136,136,136,136,136,136, 2,
+ 2, 2, 2, 2, 2, 2, 17, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 2, 15, 15, 15, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 17, 17, 17, 17, 2, 2,
+ 2, 2, 2, 2, 2, 2,139,139,139,139,139,139,139,139,139,139,
+ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,
+ 139,139, 2, 2, 2, 2,105,105,105,105,105,105,105,105,105,105,
+ 105,105,105,105,105,105,105,105,105,105,105,105,105,105,105,105,
+ 105, 2, 2, 2, 2, 2,105,105,105,105,105,105,105,105,105,105,
+ 105,105,105, 2, 2, 2,105,105,105,105,105,105,105,105,105, 2,
+ 2, 2, 2, 2, 2, 2,105,105,105,105,105,105,105,105,105,105,
+ 2, 2,105,105,105,105, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1,
+ 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
+ 2, 2, 2, 2, 2, 2, 9, 9, 9, 9, 9, 9, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 2, 0, 0, 2, 2, 0, 2, 2, 0, 0, 2, 2, 0,
+ 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,
+ 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0,
+ 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 2, 2, 2,
+ 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 2, 2, 0, 0,131,131,131,131,131,131,131,131,131,131,
+ 131,131,131,131,131,131,131,131,131,131,131,131,131,131,131,131,
+ 131,131, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2,131,131,131,131,131, 2,131,131,131,131,131,131,131,131,131,
+ 131,131,131,131,131,131, 56, 56, 56, 56, 56, 56, 56, 2, 56, 56,
+ 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 2,
+ 2, 56, 56, 56, 56, 56, 56, 56, 2, 56, 56, 2, 56, 56, 56, 56,
+ 56, 2, 2, 2, 2, 2,151,151,151,151,151,151,151,151,151,151,
+ 151,151,151,151,151,151,151,151,151,151,151,151,151,151,151,151,
+ 151,151,151, 2, 2, 2,151,151,151,151,151,151,151,151,151,151,
+ 151,151,151,151, 2, 2,151,151,151,151,151,151,151,151,151,151,
+ 2, 2, 2, 2,151,151,152,152,152,152,152,152,152,152,152,152,
+ 152,152,152,152,152,152,152,152,152,152,152,152,152,152,152,152,
+ 2, 2, 2, 2, 2,152,113,113,113,113,113,113,113,113,113,113,
+ 113,113,113,113,113,113,113,113,113,113,113, 2, 2,113,113,113,
+ 113,113,113,113,113,113,113,113,113,113,113,113,113, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2,132,132,132,132,132,132,132,132,132,132,
+ 132,132,132,132,132,132,132,132,132,132,132,132,132,132,132,132,
+ 132,132, 2, 2, 2, 2,132,132,132,132,132,132,132,132,132,132,
+ 2, 2, 2, 2,132,132, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 2, 2, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 2, 3, 3, 2, 3, 2, 2, 3, 2, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 2, 3,
+ 2, 3, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 3, 2, 3,
+ 2, 3, 2, 3, 3, 3, 2, 3, 3, 2, 3, 2, 2, 3, 2, 3,
+ 2, 3, 2, 3, 2, 3, 2, 3, 3, 2, 3, 2, 2, 3, 3, 3,
+ 3, 2, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 2, 3,
+ 3, 3, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 2, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 3, 3,
+ 2, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 15, 0, 0, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2,
+ 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 0,
+ 0, 2, 2, 2, 2, 2, 13, 13, 13, 13, 13, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 13, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 0, 0, 0, 1, 2, 3, 4, 5, 6, 0,
+ 0, 0, 0, 7, 8, 9, 10, 11, 0, 12, 0, 0, 0, 0, 13, 0,
+ 0, 14, 0, 0, 0, 0, 0, 0, 0, 0, 15, 16, 0, 17, 18, 19,
+ 0, 0, 0, 20, 21, 22, 0, 23, 0, 24, 0, 25, 0, 26, 0, 0,
+ 0, 0, 0, 27, 28, 0, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 30, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 33,
+ 34, 35, 36, 37, 38, 39, 40, 0, 0, 0, 41, 0, 42, 43, 44, 45,
+ 46, 47, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 50, 51, 52, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, 54, 55, 56, 57, 58,
+ 59, 60, 61, 62, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 63, 0, 64, 0, 0, 0, 0, 0,
+ 0, 0, 0, 65, 0, 0, 0, 0, 66, 0, 0, 0, 67, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 68, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 69, 70, 71, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 72, 73, 74, 75, 76, 77, 78, 79, 80, 0,
+};
+static const uint16_t
+_hb_ucd_u16[11328] =
+{
+ 0, 0, 1, 2, 3, 4, 5, 6, 0, 0, 7, 8, 9, 10, 11, 12,
+ 13, 13, 13, 14, 15, 13, 13, 16, 17, 18, 19, 20, 21, 22, 13, 23,
+ 13, 13, 13, 24, 25, 11, 11, 11, 11, 26, 11, 27, 28, 29, 30, 31,
+ 32, 32, 32, 32, 32, 32, 32, 33, 34, 35, 36, 11, 37, 38, 13, 39,
+ 9, 9, 9, 11, 11, 11, 13, 13, 40, 13, 13, 13, 41, 13, 13, 13,
+ 13, 13, 13, 42, 9, 43, 11, 11, 44, 45, 32, 46, 47, 48, 49, 50,
+ 51, 52, 48, 48, 53, 32, 54, 55, 48, 48, 48, 48, 48, 56, 57, 58,
+ 59, 60, 48, 32, 61, 48, 48, 48, 48, 48, 62, 63, 64, 48, 65, 66,
+ 48, 67, 68, 69, 48, 70, 71, 72, 72, 72, 48, 73, 74, 75, 76, 32,
+ 77, 48, 48, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
+ 91, 84, 85, 92, 93, 94, 95, 96, 97, 98, 85, 99, 100, 101, 89, 102,
+ 103, 84, 85, 104, 105, 106, 89, 107, 108, 109, 110, 111, 112, 113, 95, 114,
+ 115, 116, 85, 117, 118, 119, 89, 120, 121, 116, 85, 122, 123, 124, 89, 125,
+ 126, 116, 48, 127, 128, 129, 89, 130, 131, 132, 48, 133, 134, 135, 95, 136,
+ 137, 48, 48, 138, 139, 140, 72, 72, 141, 48, 142, 143, 144, 145, 72, 72,
+ 146, 147, 148, 149, 150, 48, 151, 152, 153, 154, 32, 155, 156, 157, 72, 72,
+ 48, 48, 158, 159, 160, 161, 162, 163, 164, 165, 9, 9, 166, 11, 11, 167,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 168, 169, 48, 48, 168, 48, 48, 170, 171, 172, 48, 48,
+ 48, 171, 48, 48, 48, 173, 174, 175, 48, 176, 9, 9, 9, 9, 9, 177,
+ 178, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 179, 48, 180, 181, 48, 48, 48, 48, 182, 183,
+ 184, 185, 48, 186, 48, 187, 184, 188, 48, 48, 48, 189, 190, 191, 192, 193,
+ 194, 192, 48, 48, 195, 48, 48, 196, 197, 48, 198, 48, 48, 48, 48, 199,
+ 48, 200, 201, 202, 203, 48, 204, 205, 48, 48, 206, 48, 207, 208, 209, 209,
+ 48, 210, 48, 48, 48, 211, 212, 213, 192, 192, 214, 215, 216, 72, 72, 72,
+ 217, 48, 48, 218, 219, 160, 220, 221, 222, 48, 223, 64, 48, 48, 224, 225,
+ 48, 48, 226, 227, 228, 64, 48, 229, 230, 9, 9, 231, 232, 233, 234, 235,
+ 11, 11, 236, 27, 27, 27, 237, 238, 11, 239, 27, 27, 32, 32, 32, 240,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 241, 13, 13, 13, 13, 13, 13,
+ 242, 243, 242, 242, 243, 244, 242, 245, 246, 246, 246, 247, 248, 249, 250, 251,
+ 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 262, 72, 263, 264, 216,
+ 265, 266, 267, 268, 269, 270, 271, 271, 272, 273, 274, 209, 275, 276, 209, 277,
+ 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278,
+ 279, 209, 280, 209, 209, 209, 209, 281, 209, 282, 278, 283, 209, 284, 285, 209,
+ 209, 209, 286, 72, 287, 72, 270, 270, 270, 288, 209, 209, 209, 209, 289, 270,
+ 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 290, 291, 209, 209, 292,
+ 209, 209, 209, 209, 209, 209, 293, 209, 209, 209, 209, 209, 209, 209, 209, 209,
+ 209, 209, 209, 209, 209, 209, 294, 295, 270, 296, 209, 209, 297, 278, 298, 278,
+ 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209,
+ 278, 278, 278, 278, 278, 278, 278, 278, 299, 300, 278, 278, 278, 301, 278, 302,
+ 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278,
+ 209, 209, 209, 278, 303, 209, 209, 304, 209, 305, 209, 209, 209, 209, 209, 209,
+ 9, 9, 306, 11, 11, 307, 308, 309, 13, 13, 13, 13, 13, 13, 310, 311,
+ 11, 11, 312, 48, 48, 48, 313, 314, 48, 315, 316, 316, 316, 316, 32, 32,
+ 317, 318, 319, 320, 321, 322, 72, 72, 209, 323, 209, 209, 209, 209, 209, 324,
+ 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 325, 72, 326,
+ 327, 328, 329, 330, 137, 48, 48, 48, 48, 331, 178, 48, 48, 48, 48, 332,
+ 333, 48, 48, 137, 48, 48, 48, 48, 200, 334, 48, 48, 209, 209, 324, 48,
+ 209, 335, 336, 209, 337, 338, 209, 209, 336, 209, 209, 338, 209, 209, 209, 209,
+ 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 209, 209, 209, 209,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 151,
+ 48, 339, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 151, 209, 209, 209, 286, 48, 48, 229,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 340, 48, 341, 72, 13, 13, 342, 343, 13, 344, 48, 48, 48, 48, 345, 346,
+ 31, 347, 348, 349, 13, 13, 13, 350, 351, 352, 353, 354, 355, 72, 72, 356,
+ 357, 48, 358, 359, 48, 48, 48, 360, 361, 48, 48, 362, 363, 192, 32, 364,
+ 64, 48, 365, 48, 366, 367, 48, 151, 77, 48, 48, 368, 369, 370, 371, 372,
+ 48, 48, 373, 374, 375, 376, 48, 377, 48, 48, 48, 378, 379, 380, 381, 382,
+ 383, 384, 316, 11, 11, 385, 386, 11, 11, 11, 11, 11, 48, 48, 387, 192,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 388, 48, 389, 48, 48, 206,
+ 390, 390, 390, 390, 390, 390, 390, 390, 390, 390, 390, 390, 390, 390, 390, 390,
+ 390, 390, 390, 390, 390, 390, 390, 390, 390, 390, 390, 390, 390, 390, 390, 390,
+ 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391,
+ 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391,
+ 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 204, 48, 48, 48, 48, 48, 48, 207, 72, 72,
+ 392, 393, 394, 395, 396, 48, 48, 48, 48, 48, 48, 397, 398, 399, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 400, 72, 48, 48, 48, 48, 401, 48, 48, 74, 72, 72, 402,
+ 32, 403, 32, 404, 405, 406, 407, 73, 48, 48, 48, 48, 48, 48, 48, 408,
+ 409, 2, 3, 4, 5, 410, 411, 412, 48, 413, 48, 200, 414, 415, 416, 417,
+ 418, 48, 172, 419, 204, 204, 72, 72, 48, 48, 48, 48, 48, 48, 48, 71,
+ 420, 270, 270, 421, 271, 271, 271, 422, 423, 424, 425, 72, 72, 209, 209, 426,
+ 72, 72, 72, 72, 72, 72, 72, 72, 48, 151, 48, 48, 48, 101, 427, 428,
+ 48, 48, 429, 48, 430, 48, 48, 431, 48, 432, 48, 48, 433, 434, 72, 72,
+ 9, 9, 435, 11, 11, 48, 48, 48, 48, 204, 192, 9, 9, 436, 11, 437,
+ 48, 48, 74, 48, 48, 48, 438, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 315, 48, 199, 74, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 439, 48, 48, 440, 48, 441, 48, 442, 48, 200, 443, 72, 72, 72, 48, 444,
+ 48, 445, 48, 446, 72, 72, 72, 72, 48, 48, 48, 447, 270, 448, 270, 270,
+ 449, 450, 48, 451, 452, 453, 48, 454, 48, 455, 72, 72, 456, 48, 457, 458,
+ 48, 48, 48, 459, 48, 460, 48, 461, 48, 462, 463, 72, 72, 72, 72, 72,
+ 48, 48, 48, 48, 196, 72, 72, 72, 9, 9, 9, 464, 11, 11, 11, 465,
+ 48, 48, 466, 192, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 72, 72, 72, 72, 72, 72, 270, 467, 48, 48, 468, 469, 72, 72, 72, 72,
+ 48, 455, 470, 48, 62, 471, 72, 72, 72, 72, 72, 48, 472, 72, 48, 315,
+ 473, 48, 48, 474, 475, 448, 476, 477, 222, 48, 48, 478, 479, 48, 196, 192,
+ 480, 48, 481, 482, 483, 48, 48, 484, 222, 48, 48, 485, 486, 487, 488, 489,
+ 48, 98, 490, 491, 72, 72, 72, 72, 492, 493, 494, 48, 48, 495, 496, 192,
+ 497, 84, 85, 498, 499, 500, 501, 502, 72, 72, 72, 72, 72, 72, 72, 72,
+ 48, 48, 48, 503, 504, 505, 469, 72, 48, 48, 48, 506, 507, 192, 72, 72,
+ 72, 72, 72, 72, 72, 72, 72, 72, 48, 48, 508, 509, 510, 511, 72, 72,
+ 48, 48, 48, 512, 513, 192, 514, 72, 48, 48, 515, 516, 192, 72, 72, 72,
+ 48, 173, 517, 518, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 48, 48, 490, 519, 72, 72, 72, 72, 72, 72, 9, 9, 11, 11, 148, 520,
+ 521, 522, 48, 523, 524, 192, 72, 72, 72, 72, 525, 48, 48, 526, 527, 72,
+ 528, 48, 48, 529, 530, 531, 48, 48, 532, 533, 534, 72, 48, 48, 48, 196,
+ 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 85, 48, 508, 535, 536, 148, 175, 537, 48, 538, 539, 540, 72, 72, 72, 72,
+ 541, 48, 48, 542, 543, 192, 544, 48, 545, 546, 192, 72, 72, 72, 72, 72,
+ 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 48, 547,
+ 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 101, 270, 548, 549, 550,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 207, 72, 72, 72, 72, 72, 72,
+ 271, 271, 271, 271, 271, 271, 551, 552, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 388, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 48, 48, 200, 553, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 48, 48, 48, 48, 315, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 48, 48, 48, 196, 48, 200, 370, 72, 72, 72, 72, 72, 72, 48, 204, 554,
+ 48, 48, 48, 555, 556, 557, 558, 559, 48, 72, 72, 72, 72, 72, 72, 72,
+ 72, 72, 72, 72, 9, 9, 11, 11, 270, 560, 72, 72, 72, 72, 72, 72,
+ 48, 48, 48, 48, 561, 562, 563, 563, 564, 565, 72, 72, 72, 72, 566, 567,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 74,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 199, 72, 72,
+ 196, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 200, 72, 72, 72, 568, 569, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 206,
+ 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 48, 48, 48, 48, 48, 48, 71, 151, 196, 570, 571, 72, 72, 72, 72, 72,
+ 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 325,
+ 209, 209, 572, 209, 209, 209, 573, 574, 575, 209, 576, 209, 209, 209, 577, 72,
+ 209, 209, 209, 209, 578, 72, 72, 72, 72, 72, 72, 72, 72, 72, 270, 579,
+ 209, 209, 209, 209, 209, 286, 270, 452, 72, 72, 72, 72, 72, 72, 72, 72,
+ 9, 580, 11, 581, 582, 583, 242, 9, 584, 585, 586, 587, 588, 9, 580, 11,
+ 589, 590, 11, 591, 592, 593, 594, 9, 595, 11, 9, 580, 11, 581, 582, 11,
+ 242, 9, 584, 594, 9, 595, 11, 9, 580, 11, 596, 9, 597, 598, 599, 600,
+ 11, 601, 9, 602, 603, 604, 605, 11, 606, 9, 607, 11, 608, 609, 609, 609,
+ 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209,
+ 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209,
+ 32, 32, 32, 610, 32, 32, 611, 612, 613, 614, 45, 72, 72, 72, 72, 72,
+ 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 615, 616, 617, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 48, 48, 151, 618, 619, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 48, 48, 620, 621,
+ 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 622, 623, 72, 72,
+ 9, 9, 584, 11, 624, 370, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 72, 72, 72, 72, 72, 72, 72, 488, 270, 270, 625, 626, 72, 72, 72, 72,
+ 488, 270, 627, 628, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 629, 48, 630, 631, 632, 633, 634, 635, 636, 206, 637, 206, 72, 72, 72, 638,
+ 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 209, 209, 326, 209, 209, 209, 209, 209, 209, 324, 335, 639, 639, 639, 209, 325,
+ 640, 209, 209, 209, 209, 209, 209, 209, 209, 209, 641, 72, 72, 72, 642, 209,
+ 643, 209, 209, 326, 577, 644, 325, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 645,
+ 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 646, 424, 424,
+ 209, 209, 209, 209, 209, 209, 209, 324, 209, 209, 209, 209, 209, 577, 326, 72,
+ 326, 209, 209, 209, 646, 176, 209, 209, 646, 209, 641, 644, 72, 72, 72, 72,
+ 209, 209, 209, 209, 209, 209, 209, 647, 209, 209, 209, 209, 648, 209, 209, 209,
+ 209, 209, 209, 209, 209, 324, 641, 649, 286, 209, 577, 286, 643, 286, 72, 72,
+ 209, 209, 209, 209, 209, 209, 209, 209, 209, 650, 209, 209, 287, 72, 72, 192,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 204, 72, 72,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 205, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 204, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 469, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 101, 72,
+ 48, 204, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 71, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 651, 72, 652, 652, 652, 652, 652, 652, 72, 72, 72, 72, 72, 72, 72, 72,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 72,
+ 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391,
+ 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 653,
+ 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391,
+ 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 654,
+ 0, 0, 0, 0, 1, 2, 1, 2, 0, 0, 3, 3, 4, 5, 4, 5,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 0, 0, 7, 0,
+ 8, 8, 8, 8, 8, 8, 8, 9, 10, 11, 12, 11, 11, 11, 13, 11,
+ 14, 14, 14, 14, 14, 14, 14, 14, 15, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 16, 17, 18, 17, 17, 19, 20, 21, 21, 22, 21, 23, 24,
+ 25, 26, 27, 27, 28, 29, 27, 30, 27, 27, 27, 27, 27, 31, 27, 27,
+ 32, 33, 33, 33, 34, 27, 27, 27, 35, 35, 35, 36, 37, 37, 37, 38,
+ 39, 39, 40, 41, 42, 43, 44, 45, 45, 45, 27, 46, 47, 48, 49, 27,
+ 50, 50, 50, 50, 50, 51, 52, 50, 53, 54, 55, 56, 57, 58, 59, 60,
+ 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
+ 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92,
+ 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108,
+ 109, 110, 111, 111, 112, 113, 114, 111, 115, 116, 117, 118, 119, 120, 121, 122,
+ 123, 124, 124, 125, 124, 126, 45, 45, 127, 128, 129, 130, 131, 132, 45, 45,
+ 133, 133, 133, 133, 134, 133, 135, 136, 133, 134, 133, 137, 137, 138, 45, 45,
+ 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 140, 140, 141, 140, 140, 142,
+ 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143,
+ 144, 144, 144, 144, 145, 146, 144, 144, 145, 144, 144, 147, 148, 149, 144, 144,
+ 144, 148, 144, 144, 144, 150, 144, 151, 144, 152, 153, 153, 153, 153, 153, 154,
+ 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155,
+ 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155,
+ 155, 155, 155, 155, 155, 155, 155, 155, 156, 157, 158, 158, 158, 158, 159, 160,
+ 161, 162, 163, 164, 165, 166, 167, 168, 169, 169, 169, 169, 169, 170, 171, 171,
+ 172, 173, 174, 174, 174, 174, 174, 175, 174, 174, 176, 155, 155, 155, 155, 177,
+ 178, 179, 180, 180, 181, 182, 183, 184, 185, 185, 186, 185, 187, 188, 169, 169,
+ 189, 190, 191, 191, 191, 192, 191, 193, 194, 194, 195, 8, 196, 45, 45, 45,
+ 197, 197, 197, 197, 198, 197, 197, 199, 200, 200, 200, 200, 201, 201, 201, 202,
+ 203, 203, 203, 204, 205, 206, 206, 206, 207, 140, 140, 208, 209, 210, 211, 212,
+ 4, 4, 213, 4, 4, 214, 215, 216, 4, 4, 4, 217, 8, 8, 8, 218,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 11, 219, 11, 11, 219, 220, 11, 221, 11, 11, 11, 222, 222, 223, 11, 224,
+ 225, 0, 0, 0, 0, 0, 226, 227, 228, 229, 0, 0, 45, 8, 8, 196,
+ 0, 0, 230, 231, 232, 0, 4, 4, 233, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 234, 45, 235, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 237, 0, 238, 0, 0, 0, 0, 0, 0,
+ 239, 239, 240, 239, 239, 240, 4, 4, 241, 241, 241, 241, 241, 241, 241, 242,
+ 140, 140, 141, 243, 243, 243, 244, 245, 144, 246, 247, 247, 247, 247, 14, 14,
+ 0, 0, 0, 0, 0, 248, 45, 45, 249, 250, 249, 249, 249, 249, 249, 251,
+ 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 252, 45, 253,
+ 254, 0, 255, 256, 257, 258, 258, 258, 258, 259, 260, 261, 261, 261, 261, 262,
+ 263, 264, 264, 265, 143, 143, 143, 143, 266, 0, 264, 264, 0, 0, 267, 261,
+ 143, 266, 0, 0, 0, 0, 143, 268, 0, 0, 0, 0, 0, 261, 261, 269,
+ 261, 261, 261, 261, 261, 270, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249,
+ 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249,
+ 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249,
+ 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 0, 0, 0, 0,
+ 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249,
+ 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 271,
+ 272, 272, 272, 272, 272, 272, 272, 272, 272, 272, 272, 272, 272, 272, 272, 272,
+ 272, 272, 272, 272, 272, 272, 272, 272, 272, 272, 272, 272, 272, 272, 272, 272,
+ 272, 272, 272, 272, 272, 272, 272, 272, 273, 272, 272, 272, 274, 275, 275, 275,
+ 276, 276, 276, 276, 276, 276, 276, 276, 276, 276, 276, 276, 276, 276, 276, 276,
+ 276, 276, 277, 45, 14, 14, 14, 14, 14, 14, 278, 278, 278, 278, 278, 279,
+ 0, 0, 280, 4, 4, 4, 4, 4, 281, 4, 4, 4, 282, 45, 45, 283,
+ 284, 284, 285, 286, 287, 287, 287, 288, 289, 289, 289, 289, 290, 291, 50, 50,
+ 292, 292, 293, 294, 294, 295, 143, 296, 297, 297, 297, 297, 298, 299, 139, 300,
+ 301, 301, 301, 302, 303, 304, 139, 139, 305, 305, 305, 305, 306, 307, 308, 309,
+ 310, 311, 247, 4, 4, 312, 313, 153, 153, 153, 153, 153, 308, 308, 314, 315,
+ 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143,
+ 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143,
+ 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143,
+ 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 316, 143, 317, 143, 143, 318,
+ 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249,
+ 249, 249, 249, 249, 249, 249, 319, 249, 249, 249, 249, 249, 249, 320, 45, 45,
+ 321, 322, 21, 323, 324, 27, 27, 27, 27, 27, 27, 27, 325, 48, 27, 27,
+ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+ 27, 27, 27, 326, 45, 27, 27, 27, 27, 327, 27, 27, 47, 45, 45, 328,
+ 8, 286, 329, 0, 0, 330, 331, 46, 27, 27, 27, 27, 27, 27, 27, 332,
+ 333, 0, 1, 2, 1, 2, 334, 260, 261, 335, 143, 266, 336, 337, 338, 339,
+ 340, 341, 342, 343, 344, 344, 45, 45, 341, 341, 341, 341, 341, 341, 341, 345,
+ 346, 0, 0, 347, 11, 11, 11, 11, 348, 349, 350, 45, 45, 0, 0, 351,
+ 45, 45, 45, 45, 45, 45, 45, 45, 352, 353, 354, 354, 354, 355, 356, 253,
+ 357, 357, 358, 359, 360, 361, 361, 362, 363, 364, 365, 365, 366, 367, 45, 45,
+ 368, 368, 368, 368, 368, 369, 369, 369, 370, 371, 372, 373, 373, 374, 373, 375,
+ 376, 376, 377, 378, 378, 378, 379, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 380, 380, 380, 380, 380, 380, 380, 380, 380, 380, 380, 380, 380, 380, 380, 380,
+ 380, 380, 380, 381, 380, 382, 383, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 384, 385, 385, 386, 387, 388, 389, 389, 390, 391, 392, 45, 45, 45, 393, 394,
+ 395, 396, 397, 398, 45, 45, 45, 45, 399, 399, 400, 401, 400, 402, 400, 400,
+ 403, 404, 405, 406, 407, 407, 408, 408, 409, 409, 45, 45, 410, 410, 411, 412,
+ 413, 413, 413, 414, 415, 416, 417, 418, 419, 420, 421, 45, 45, 45, 45, 45,
+ 422, 422, 422, 422, 423, 45, 45, 45, 424, 424, 424, 425, 424, 424, 424, 426,
+ 427, 427, 428, 429, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 45, 45, 45, 45, 45, 45, 27, 430, 431, 431, 432, 433, 45, 45, 45, 45,
+ 434, 434, 435, 436, 436, 437, 45, 45, 45, 45, 45, 438, 439, 45, 440, 441,
+ 442, 442, 442, 442, 443, 444, 442, 445, 446, 446, 446, 446, 447, 448, 449, 450,
+ 451, 451, 451, 452, 453, 454, 454, 455, 456, 456, 456, 456, 456, 456, 457, 458,
+ 459, 460, 459, 461, 45, 45, 45, 45, 462, 463, 464, 465, 465, 465, 466, 467,
+ 468, 469, 470, 471, 472, 473, 474, 475, 45, 45, 45, 45, 45, 45, 45, 45,
+ 476, 476, 476, 476, 476, 477, 478, 45, 479, 479, 479, 479, 480, 481, 45, 45,
+ 45, 45, 45, 45, 45, 45, 45, 45, 482, 482, 482, 483, 482, 484, 45, 45,
+ 485, 485, 485, 485, 486, 487, 488, 45, 489, 489, 489, 490, 491, 45, 45, 45,
+ 492, 493, 494, 492, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 495, 495, 495, 496, 45, 45, 45, 45, 45, 45, 497, 497, 497, 497, 497, 498,
+ 499, 500, 501, 502, 503, 504, 45, 45, 45, 45, 505, 506, 506, 505, 507, 45,
+ 508, 508, 508, 508, 509, 510, 510, 510, 510, 510, 511, 45, 512, 512, 512, 513,
+ 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 514, 515, 515, 516, 517, 515, 518, 519, 519, 520, 521, 522, 45, 45, 45, 45,
+ 523, 524, 524, 525, 526, 527, 528, 529, 530, 531, 532, 45, 45, 45, 45, 45,
+ 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 533, 534,
+ 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 535, 536, 536, 536, 537,
+ 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538,
+ 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538,
+ 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538,
+ 538, 538, 538, 538, 538, 538, 538, 538, 538, 539, 45, 45, 45, 45, 45, 45,
+ 538, 538, 538, 538, 538, 538, 540, 541, 538, 538, 538, 538, 538, 538, 538, 538,
+ 538, 538, 538, 538, 542, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543,
+ 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543,
+ 543, 543, 544, 545, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, 546,
+ 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, 546,
+ 546, 546, 546, 546, 547, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278,
+ 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278,
+ 278, 278, 278, 548, 549, 550, 551, 45, 45, 45, 45, 45, 45, 552, 553, 554,
+ 555, 555, 555, 555, 556, 557, 558, 559, 555, 45, 45, 45, 45, 45, 45, 45,
+ 45, 45, 45, 45, 560, 560, 560, 560, 560, 561, 45, 45, 45, 45, 45, 45,
+ 562, 562, 562, 562, 563, 562, 562, 562, 564, 562, 45, 45, 45, 45, 565, 566,
+ 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567,
+ 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567,
+ 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567,
+ 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 568,
+ 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567,
+ 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569,
+ 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 570, 45, 45,
+ 571, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 572, 258, 258, 258, 258, 258, 258, 258, 258, 258, 258, 258, 258, 258, 258, 258,
+ 258, 573, 45, 45, 45, 574, 575, 576, 576, 576, 576, 576, 576, 576, 576, 576,
+ 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 577,
+ 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 578, 578, 578, 578, 578, 578, 579, 580, 581, 582, 267, 45, 45, 45, 45, 45,
+ 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 583,
+ 0, 0, 584, 0, 0, 0, 585, 586, 587, 0, 588, 0, 0, 0, 589, 45,
+ 11, 11, 11, 11, 590, 45, 45, 45, 45, 45, 45, 45, 45, 45, 0, 267,
+ 0, 0, 0, 0, 0, 234, 0, 589, 45, 45, 45, 45, 45, 45, 45, 45,
+ 0, 0, 0, 0, 0, 226, 0, 0, 0, 591, 592, 593, 594, 0, 0, 0,
+ 595, 596, 0, 597, 598, 599, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 600, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 601, 0, 0, 0,
+ 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602,
+ 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602,
+ 602, 602, 602, 602, 602, 602, 602, 602, 603, 604, 605, 45, 45, 45, 45, 45,
+ 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 606, 607, 608, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 609, 609, 610, 611, 612, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 613, 613, 613, 614,
+ 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 615, 615, 615, 615, 615, 615, 615, 615, 615, 615, 615, 615, 616, 617, 45, 45,
+ 618, 618, 618, 618, 619, 620, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 45, 45, 45, 45, 45, 45, 45, 333, 0, 0, 0, 621, 45, 45, 45, 45,
+ 333, 0, 0, 622, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 623, 27, 624, 625, 626, 627, 628, 629, 630, 631, 632, 631, 45, 45, 45, 325,
+ 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 0, 0, 253, 0, 0, 0, 0, 0, 0, 267, 228, 333, 333, 333, 0, 583,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 622, 45, 45, 45, 633, 0,
+ 634, 0, 0, 253, 589, 635, 583, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 636, 349, 349,
+ 0, 0, 0, 0, 0, 0, 0, 267, 0, 0, 0, 0, 0, 589, 253, 45,
+ 253, 0, 0, 0, 636, 286, 0, 0, 636, 0, 622, 635, 45, 45, 45, 45,
+ 0, 0, 0, 0, 0, 0, 0, 637, 0, 0, 0, 0, 638, 0, 0, 0,
+ 0, 0, 0, 0, 0, 267, 622, 639, 234, 0, 589, 234, 248, 234, 45, 45,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 330, 0, 0, 235, 45, 45, 286,
+ 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 319, 45, 45,
+ 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249,
+ 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249,
+ 249, 249, 249, 640, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249,
+ 249, 319, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249,
+ 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249,
+ 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 566, 249, 249, 249, 249, 249,
+ 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249,
+ 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249,
+ 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 641, 45,
+ 249, 319, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249,
+ 249, 249, 249, 249, 642, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 643, 45, 0, 0, 0, 0, 0, 0, 45, 45, 45, 45, 45, 45, 45, 45,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 939, 940, 941, 942, 946, 948, 0, 962, 969, 970, 971, 976,1001,1002,1003,1008,
+ 0,1033,1040,1041,1042,1043,1047, 0, 0,1080,1081,1082,1086,1110, 0, 0,
+ 1124,1125,1126,1127,1131,1133, 0,1147,1154,1155,1156,1161,1187,1188,1189,1193,
+ 0,1219,1226,1227,1228,1229,1233, 0, 0,1267,1268,1269,1273,1298, 0,1303,
+ 943,1128, 944,1129, 954,1139, 958,1143, 959,1144, 960,1145, 961,1146, 964,1149,
+ 0, 0, 973,1158, 974,1159, 975,1160, 983,1168, 978,1163, 988,1173, 990,1175,
+ 991,1176, 993,1178, 994,1179, 0, 0,1004,1190,1005,1191,1006,1192,1014,1199,
+ 1007, 0, 0, 0,1016,1201,1020,1206, 0,1022,1208,1025,1211,1023,1209, 0,
+ 0, 0, 0,1032,1218,1037,1223,1035,1221, 0, 0, 0,1044,1230,1045,1231,
+ 1049,1235, 0, 0,1058,1244,1064,1250,1060,1246,1066,1252,1067,1253,1072,1258,
+ 1069,1255,1077,1264,1074,1261, 0, 0,1083,1270,1084,1271,1085,1272,1088,1275,
+ 1089,1276,1096,1283,1103,1290,1111,1299,1115,1118,1307,1120,1309,1121,1310, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1053,1239, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1093,
+ 1280, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 949,1134,1010,
+ 1195,1050,1236,1090,1277,1341,1368,1340,1367,1342,1369,1339,1366, 0,1320,1347,
+ 1418,1419,1323,1350, 0, 0, 992,1177,1018,1204,1055,1241,1416,1417,1415,1424,
+ 1202, 0, 0, 0, 987,1172, 0, 0,1031,1217,1321,1348,1322,1349,1338,1365,
+ 950,1135, 951,1136, 979,1164, 980,1165,1011,1196,1012,1197,1051,1237,1052,1238,
+ 1061,1247,1062,1248,1091,1278,1092,1279,1071,1257,1076,1263, 0, 0, 997,1182,
+ 0, 0, 0, 0, 0, 0, 945,1130, 982,1167,1337,1364,1335,1362,1046,1232,
+ 1422,1423,1113,1301, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 8, 9, 0, 10,1425, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
+ 0, 0, 0, 0, 0,1314,1427, 5,1434,1438,1443, 0,1450, 0,1455,1461,
+ 1514, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1446,1458,1468,1476,1480,1486,
+ 1517, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1489,1503,1494,1500,1508, 0,
+ 0, 0, 0,1520,1521, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1526,1528, 0,1525, 0, 0, 0,1522, 0, 0, 0, 0,1536,1532,1539, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,1534, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,1556, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1548,1550, 0,1547, 0, 0, 0,1567, 0, 0, 0, 0,1558,1554,1561, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,1568,1569, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0,1529,1551, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1523,1545,1524,1546, 0, 0,1527,1549, 0, 0,1570,1571,1530,1552,1531,1553,
+ 0, 0,1533,1555,1535,1557,1537,1559, 0, 0,1572,1573,1544,1566,1538,1560,
+ 1540,1562,1541,1563,1542,1564, 0, 0,1543,1565, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0,1606,1607,1609,1608,1610, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1613, 0,1611, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0,1612, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,1620, 0, 0, 0, 0, 0, 0,
+ 0,1623, 0, 0,1624, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,1614,1615,1616,1617,1618,1619,1621,1622,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1628,1629, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1625,1626, 0,1627,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0,1634, 0, 0,1635, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,1630,1631,1632, 0, 0,1633, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,1639, 0, 0,1638,1640, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1636,1637, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0,1641, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1642,1644,1643, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,1645, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1646, 0, 0, 0, 0, 0, 0,1648,1649, 0,1647,1650, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1651,1653,1652, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1654, 0,1655,1657,1656, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0,1659, 0, 0, 0, 0, 0, 0, 0, 0, 0,1660, 0, 0,
+ 0, 0,1661, 0, 0, 0, 0,1662, 0, 0, 0, 0,1663, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,1658, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0,1664, 0,1665,1673, 0,1674, 0, 0, 0, 0, 0, 0, 0,
+ 0,1666, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0,1668, 0, 0, 0, 0, 0, 0, 0, 0, 0,1669, 0, 0,
+ 0, 0,1670, 0, 0, 0, 0,1671, 0, 0, 0, 0,1672, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,1667, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,1675, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,1676, 0,1677, 0,1678, 0,1679, 0,1680, 0,
+ 0, 0,1681, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1682, 0,1683, 0, 0,
+ 1684,1685, 0,1686, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 953,1138, 955,1140, 956,1141, 957,1142,1324,1351, 963,1148, 965,1150, 968,1153,
+ 966,1151, 967,1152,1378,1380,1379,1381, 984,1169, 985,1170,1420,1421, 986,1171,
+ 989,1174, 995,1180, 998,1183, 996,1181, 999,1184,1000,1185,1015,1200,1329,1356,
+ 1017,1203,1019,1205,1021,1207,1024,1210,1687,1688,1027,1213,1026,1212,1028,1214,
+ 1029,1215,1030,1216,1034,1220,1036,1222,1039,1225,1038,1224,1334,1361,1336,1363,
+ 1382,1384,1383,1385,1056,1242,1057,1243,1059,1245,1063,1249,1689,1690,1065,1251,
+ 1068,1254,1070,1256,1386,1387,1388,1389,1691,1692,1073,1259,1075,1262,1079,1266,
+ 1078,1265,1095,1282,1098,1285,1097,1284,1390,1391,1392,1393,1099,1286,1100,1287,
+ 1101,1288,1102,1289,1105,1292,1104,1291,1106,1294,1107,1295,1108,1296,1114,1302,
+ 1119,1308,1122,1311,1123,1312,1186,1260,1293,1305, 0,1394, 0, 0, 0, 0,
+ 952,1137, 947,1132,1317,1344,1316,1343,1319,1346,1318,1345,1693,1695,1371,1375,
+ 1370,1374,1373,1377,1372,1376,1694,1696, 981,1166, 977,1162, 972,1157,1326,1353,
+ 1325,1352,1328,1355,1327,1354,1697,1698,1009,1194,1013,1198,1054,1240,1048,1234,
+ 1331,1358,1330,1357,1333,1360,1332,1359,1699,1700,1396,1401,1395,1400,1398,1403,
+ 1397,1402,1399,1404,1094,1281,1087,1274,1406,1411,1405,1410,1408,1413,1407,1412,
+ 1409,1414,1109,1297,1117,1306,1116,1304,1112,1300, 0, 0, 0, 0, 0, 0,
+ 1471,1472,1701,1705,1702,1706,1703,1707,1430,1431,1715,1719,1716,1720,1717,1721,
+ 1477,1478,1729,1731,1730,1732, 0, 0,1435,1436,1733,1735,1734,1736, 0, 0,
+ 1481,1482,1737,1741,1738,1742,1739,1743,1439,1440,1751,1755,1752,1756,1753,1757,
+ 1490,1491,1765,1768,1766,1769,1767,1770,1447,1448,1771,1774,1772,1775,1773,1776,
+ 1495,1496,1777,1779,1778,1780, 0, 0,1451,1452,1781,1783,1782,1784, 0, 0,
+ 1504,1505,1785,1788,1786,1789,1787,1790, 0,1459, 0,1791, 0,1792, 0,1793,
+ 1509,1510,1794,1798,1795,1799,1796,1800,1462,1463,1808,1812,1809,1813,1810,1814,
+ 1467, 21,1475, 22,1479, 23,1485, 24,1493, 27,1499, 28,1507, 29, 0, 0,
+ 1704,1708,1709,1710,1711,1712,1713,1714,1718,1722,1723,1724,1725,1726,1727,1728,
+ 1740,1744,1745,1746,1747,1748,1749,1750,1754,1758,1759,1760,1761,1762,1763,1764,
+ 1797,1801,1802,1803,1804,1805,1806,1807,1811,1815,1816,1817,1818,1819,1820,1821,
+ 1470,1469,1822,1474,1465, 0,1473,1825,1429,1428,1426, 12,1432, 0, 26, 0,
+ 0,1315,1823,1484,1466, 0,1483,1829,1433, 13,1437, 14,1441,1826,1827,1828,
+ 1488,1487,1513, 19, 0, 0,1492,1515,1445,1444,1442, 15, 0,1831,1832,1833,
+ 1502,1501,1516, 25,1497,1498,1506,1518,1457,1456,1454, 17,1453,1313, 11, 3,
+ 0, 0,1824,1512,1519, 0,1511,1830,1449, 16,1460, 18,1464, 4, 0, 0,
+ 30, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 2, 6, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1834,1835, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1836, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1837,1839,1838,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0,1840, 0, 0, 0, 0,1841, 0, 0,1842, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0,1843, 0,1844, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0,1845, 0, 0,1846, 0, 0,1847, 0,1848, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 937, 0,1850, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1849, 936, 938,
+ 1851,1852, 0, 0,1853,1854, 0, 0,1855,1856, 0, 0, 0, 0, 0, 0,
+ 1857,1858, 0, 0,1861,1862, 0, 0,1863,1864, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1867,1868,1869,1870,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1859,1860,1865,1866, 0, 0, 0, 0, 0, 0,1871,1872,1873,1874, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 33, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1875, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1877, 0,1878, 0,
+ 1879, 0,1880, 0,1881, 0,1882, 0,1883, 0,1884, 0,1885, 0,1886, 0,
+ 1887, 0,1888, 0, 0,1889, 0,1890, 0,1891, 0, 0, 0, 0, 0, 0,
+ 1892,1893, 0,1894,1895, 0,1896,1897, 0,1898,1899, 0,1900,1901, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0,1876, 0, 0, 0, 0, 0, 0, 0, 0, 0,1902, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1904, 0,1905, 0,
+ 1906, 0,1907, 0,1908, 0,1909, 0,1910, 0,1911, 0,1912, 0,1913, 0,
+ 1914, 0,1915, 0, 0,1916, 0,1917, 0,1918, 0, 0, 0, 0, 0, 0,
+ 1919,1920, 0,1921,1922, 0,1923,1924, 0,1925,1926, 0,1927,1928, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0,1903, 0, 0,1929,1930,1931,1932, 0, 0, 0,1933, 0,
+ 710, 385, 724, 715, 455, 103, 186, 825, 825, 242, 751, 205, 241, 336, 524, 601,
+ 663, 676, 688, 738, 411, 434, 474, 500, 649, 746, 799, 108, 180, 416, 482, 662,
+ 810, 275, 462, 658, 692, 344, 618, 679, 293, 388, 440, 492, 740, 116, 146, 168,
+ 368, 414, 481, 527, 606, 660, 665, 722, 781, 803, 809, 538, 553, 588, 642, 758,
+ 811, 701, 233, 299, 573, 612, 487, 540, 714, 779, 232, 267, 412, 445, 457, 585,
+ 594, 766, 167, 613, 149, 148, 560, 589, 648, 768, 708, 345, 411, 704, 105, 259,
+ 313, 496, 518, 174, 542, 120, 307, 101, 430, 372, 584, 183, 228, 529, 650, 697,
+ 424, 732, 428, 349, 632, 355, 517, 110, 135, 147, 403, 580, 624, 700, 750, 170,
+ 193, 245, 297, 374, 463, 543, 763, 801, 812, 815, 162, 384, 420, 730, 287, 330,
+ 337, 366, 459, 476, 509, 558, 591, 610, 726, 652, 734, 759, 154, 163, 198, 473,
+ 683, 697, 292, 311, 353, 423, 572, 494, 113, 217, 259, 280, 314, 499, 506, 603,
+ 608, 752, 778, 782, 788, 117, 557, 748, 774, 320, 109, 126, 260, 265, 373, 411,
+ 479, 523, 655, 737, 823, 380, 765, 161, 395, 398, 438, 451, 502, 516, 537, 583,
+ 791, 136, 340, 769, 122, 273, 446, 727, 305, 322, 400, 496, 771, 155, 190, 269,
+ 377, 391, 406, 432, 501, 519, 599, 684, 687, 749, 776, 175, 452, 191, 480, 510,
+ 659, 772, 805, 813, 397, 444, 619, 566, 568, 575, 491, 471, 707, 111, 636, 156,
+ 153, 288, 346, 578, 256, 435, 383, 729, 680, 767, 694, 295, 128, 210, 0, 0,
+ 227, 0, 379, 0, 0, 150, 493, 525, 544, 551, 552, 556, 783, 576, 604, 0,
+ 661, 0, 703, 0, 0, 735, 743, 0, 0, 0, 793, 794, 795, 808, 741, 773,
+ 118, 127, 130, 166, 169, 177, 207, 213, 215, 226, 229, 268, 270, 317, 327, 329,
+ 335, 369, 375, 381, 404, 441, 448, 458, 477, 484, 503, 539, 545, 547, 546, 548,
+ 549, 550, 554, 555, 561, 564, 569, 591, 593, 595, 598, 607, 620, 625, 625, 651,
+ 690, 695, 705, 706, 716, 717, 733, 735, 777, 786, 790, 315, 869, 623, 0, 0,
+ 102, 145, 134, 115, 129, 138, 165, 171, 207, 202, 206, 212, 227, 231, 240, 243,
+ 250, 254, 294, 296, 303, 308, 319, 325, 321, 329, 326, 335, 341, 357, 360, 362,
+ 370, 379, 388, 389, 393, 421, 424, 438, 456, 454, 458, 465, 477, 535, 485, 490,
+ 493, 507, 512, 514, 521, 522, 525, 526, 528, 533, 532, 541, 565, 569, 574, 586,
+ 591, 597, 607, 637, 647, 674, 691, 693, 695, 698, 703, 699, 705, 704, 702, 706,
+ 709, 717, 728, 736, 747, 754, 770, 777, 783, 784, 786, 787, 790, 802, 825, 848,
+ 847, 857, 55, 65, 66, 883, 892, 916, 822, 824, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1586, 0,1605,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1602,1603,1934,1935,1574,1575,
+ 1576,1577,1579,1580,1581,1583,1584, 0,1585,1587,1588,1589,1591, 0,1592, 0,
+ 1593,1594, 0,1595,1596, 0,1598,1599,1600,1601,1604,1582,1578,1590,1597, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1936, 0,1937, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1938, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1939,1940,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1941,1942, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1944,1943, 0,1945, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1946,1947, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,1948, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1949,1950,
+ 1951,1952,1953,1954,1955, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1956,1957,1958,1960,1959,
+ 1961, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 106, 104, 107, 826, 114, 118, 119, 121, 123, 124, 127, 125, 34, 830, 130, 131,
+ 132, 137, 827, 35, 133, 139, 829, 142, 143, 112, 144, 145, 924, 151, 152, 37,
+ 157, 158, 159, 160, 38, 165, 166, 169, 171, 172, 173, 174, 176, 177, 178, 179,
+ 181, 182, 182, 182, 833, 468, 184, 185, 834, 187, 188, 189, 196, 192, 194, 195,
+ 197, 199, 200, 201, 203, 204, 204, 206, 208, 209, 211, 218, 213, 219, 214, 216,
+ 153, 234, 221, 222, 223, 220, 225, 224, 230, 835, 235, 236, 237, 238, 239, 244,
+ 836, 837, 247, 248, 249, 246, 251, 39, 40, 253, 255, 255, 838, 257, 258, 259,
+ 261, 839, 262, 263, 301, 264, 41, 266, 270, 272, 271, 841, 274, 842, 277, 276,
+ 278, 281, 282, 42, 283, 284, 285, 286, 43, 843, 44, 289, 290, 291, 293, 934,
+ 298, 845, 845, 621, 300, 300, 45, 852, 894, 302, 304, 46, 306, 309, 310, 312,
+ 316, 48, 47, 317, 846, 318, 323, 324, 325, 324, 328, 329, 333, 331, 332, 334,
+ 335, 336, 338, 339, 342, 343, 347, 351, 849, 350, 348, 352, 354, 359, 850, 361,
+ 358, 356, 49, 363, 365, 367, 364, 50, 369, 371, 851, 376, 386, 378, 53, 381,
+ 52, 51, 140, 141, 387, 382, 614, 78, 388, 389, 390, 394, 392, 856, 54, 399,
+ 396, 402, 404, 858, 405, 401, 407, 55, 408, 409, 410, 413, 859, 415, 56, 417,
+ 860, 418, 57, 419, 422, 424, 425, 861, 840, 862, 426, 863, 429, 431, 427, 433,
+ 437, 441, 438, 439, 442, 443, 864, 436, 449, 450, 58, 454, 453, 865, 447, 460,
+ 866, 867, 461, 466, 465, 464, 59, 467, 470, 469, 472, 828, 475, 868, 478, 870,
+ 483, 485, 486, 871, 488, 489, 872, 873, 495, 497, 60, 498, 61, 61, 504, 505,
+ 507, 508, 511, 62, 513, 874, 515, 875, 518, 844, 520, 876, 877, 878, 63, 64,
+ 528, 880, 879, 881, 882, 530, 531, 531, 533, 66, 534, 67, 68, 884, 536, 538,
+ 541, 69, 885, 549, 886, 887, 556, 559, 70, 561, 562, 563, 888, 889, 889, 567,
+ 71, 890, 570, 571, 72, 891, 577, 73, 581, 579, 582, 893, 587, 74, 590, 592,
+ 596, 75, 895, 896, 76, 897, 600, 898, 602, 605, 607, 899, 900, 609, 901, 611,
+ 853, 77, 615, 616, 79, 617, 252, 902, 903, 854, 855, 621, 622, 731, 80, 627,
+ 626, 628, 164, 629, 630, 631, 633, 904, 632, 634, 639, 640, 635, 641, 646, 651,
+ 638, 643, 644, 645, 905, 907, 906, 81, 653, 654, 656, 911, 657, 908, 82, 83,
+ 909, 910, 84, 664, 665, 666, 667, 669, 668, 671, 670, 674, 672, 673, 675, 85,
+ 677, 678, 86, 681, 682, 912, 685, 686, 87, 689, 36, 913, 914, 88, 89, 696,
+ 702, 709, 711, 915, 712, 713, 718, 719, 917, 831, 721, 720, 723, 832, 725, 728,
+ 918, 919, 739, 742, 744, 920, 745, 753, 756, 757, 755, 760, 761, 921, 762, 90,
+ 764, 922, 91, 775, 279, 780, 923, 925, 92, 93, 785, 926, 94, 927, 787, 787,
+ 789, 928, 792, 95, 796, 797, 798, 800, 96, 929, 802, 804, 806, 97, 98, 807,
+ 930, 99, 931, 932, 933, 814, 100, 816, 817, 818, 819, 820, 821, 935, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+static const int16_t
+_hb_ucd_i16[196] =
+{
+ 0, 0, 0, 0, 1, -1, 0, 0, 2, 0, -2, 0, 0, 0, 0, 2,
+ 0, -2, 0, 0, 0, 0, 0, 16, 0, 0, 0, -16, 0, 0, 1, -1,
+ 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, -1, 0, 3, 3, 3, -3,
+ -3, -3, 0, 0, 0, 2016, 0, 0, 0, 0, 0, 2527, 1923, 1914, 1918, 0,
+ 2250, 0, 0, 0, 0, 0, 0, 138, 0, 7, 0, 0, -7, 0, 0, 0,
+ 1, -1, 1, -1, -1, 1, -1, 0, 1824, 0, 0, 0, 0, 0, 2104, 0,
+ 2108, 2106, 0, 2106, 1316, 0, 0, 0, 0, 1, -1, 1, -1, -138, 0, 0,
+ 1, -1, 8, 8, 8, 0, 7, 7, 0, 0, -8, -8, -8, -7, -7, 0,
+ 1, -1, 0, 2,-1316, 1, -1, 0, -1, 1, -1, 1, -1, 3, 1, -1,
+ -3, 1, -1, 1, -1, 0, 0,-1914,-1918, 0, 0,-1923,-1824, 0, 0, 0,
+ 0,-2016, 0, 0, 1, -1, 0, 1, 0, 0,-2104, 0, 0, 0, 0,-2106,
+ -2108,-2106, 0, 0, 1, -1,-2250, 0, 0, 0,-2527, 0, 0, -2, 0, 1,
+ -1, 0, 1, -1,
+};
+
+static inline uint_fast8_t
+_hb_ucd_gc (unsigned u)
+{
+ return u<1114110u?_hb_ucd_u8[2176+(((_hb_ucd_u16[((_hb_ucd_u8[u>>4>>5])<<5)+((u>>4)&31u)])<<4)+((u)&15u))]:2;
+}
+static inline uint_fast8_t
+_hb_ucd_ccc (unsigned u)
+{
+ return u<125259u?_hb_ucd_u8[15060+(((_hb_ucd_u8[13636+(((_hb_ucd_u8[12656+(u>>3>>4)])<<4)+((u>>3)&15u))])<<3)+((u)&7u))]:0;
+}
+static inline unsigned
+_hb_ucd_b4 (const uint8_t* a, unsigned i)
+{
+ return (a[i>>1]>>((i&1u)<<2))&15u;
+}
+static inline int_fast16_t
+_hb_ucd_bmg (unsigned u)
+{
+ return u<65380u?_hb_ucd_i16[((_hb_ucd_u8[16372+(((_hb_ucd_b4(16244+_hb_ucd_u8,u>>2>>6))<<6)+((u>>2)&63u))])<<2)+((u)&3u)]:0;
+}
+static inline uint_fast8_t
+_hb_ucd_sc (unsigned u)
+{
+ return u<918000u?_hb_ucd_u8[19126+(((_hb_ucd_u16[3040+(((_hb_ucd_u8[17332+(u>>4>>5)])<<5)+((u>>4)&31u))])<<4)+((u)&15u))]:2;
+}
+static inline uint_fast16_t
+_hb_ucd_dm (unsigned u)
+{
+ return u<195102u?_hb_ucd_u16[6144+(((_hb_ucd_u8[29430+(u>>6)])<<6)+((u)&63u))]:0;
+}
+
+
+#elif !defined(HB_NO_UCD_UNASSIGNED)
+
+static const uint8_t
+_hb_ucd_u8[17508] =
+{
+ 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 14, 14, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 21, 23, 21, 21, 21, 21, 24, 7, 7,
+ 25, 26, 21, 21, 21, 21, 27, 28, 21, 21, 29, 30, 31, 32, 33, 34,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 35, 7, 36, 37, 7, 38, 7, 7, 7, 39, 21, 40,
+ 7, 7, 41, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 42, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 43,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 44,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 34, 35, 36, 37, 38, 39, 34, 34, 34, 40, 41, 42, 43,
+ 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+ 60, 61, 62, 63, 64, 64, 65, 66, 67, 68, 69, 70, 71, 69, 72, 73,
+ 69, 69, 64, 74, 64, 64, 75, 76, 77, 78, 79, 80, 81, 82, 69, 83,
+ 84, 85, 86, 87, 88, 89, 69, 69, 34, 34, 34, 34, 34, 34, 34, 34,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 90, 34, 34, 34, 34,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 91,
+ 92, 34, 34, 34, 34, 34, 34, 34, 34, 93, 34, 34, 94, 95, 96, 97,
+ 98, 99,100,101,102,103,104,105, 34, 34, 34, 34, 34, 34, 34, 34,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,106,
+ 107,107,107,107,107,107,107,107,107,107,107,107,107,107,107,107,
+ 108,108,108,108,108,108,108,108,108,108,108,108,108,108,108,108,
+ 108,108, 34, 34,109,110,111,112, 34, 34,113,114,115,116,117,118,
+ 119,120,121,122,123,124,125,126,127,128,129,123, 34, 34,130,123,
+ 131,132,133,134,135,136,137,138,139,140,141,123,142,143,144,145,
+ 146,147,148,149,150,151,152,123,153,154,123,155,156,157,158,123,
+ 159,160,161,162,163,164,123,123,165,166,167,168,123,169,123,170,
+ 34, 34, 34, 34, 34, 34, 34,171,172, 34,173,123,123,123,123,123,
+ 123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,
+ 34, 34, 34, 34, 34, 34, 34, 34,174,123,123,123,123,123,123,123,
+ 123,123,123,123,123,123,123,123, 34, 34, 34, 34,175,123,123,123,
+ 34, 34, 34, 34,176,177,178,179,123,123,123,123,180,181,182,183,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,184,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34,185,186,123,123,123,123,123,
+ 34, 34,187, 34, 34,188,123,123,123,123,123,123,123,123,123,123,
+ 123,123,123,123,123,123,123,123,189,190,123,123,123,123,123,123,
+ 69,191,192,193,194,195,196,123,197,198,199,200,201,202,203,204,
+ 69, 69, 69, 69,205,206,123,123,123,123,123,123,123,123,123,123,
+ 207,123,208,123,123,209,123,123,123,123,123,123,123,123,123,123,
+ 34,210,211,123,123,123,123,123,212,213,214,123,215,216,123,123,
+ 217,218,219,220,221,123, 69,222, 69, 69, 69, 69, 69,223,224,225,
+ 226,227,228,229,230,231, 69,232,123,123,123,123,123,123,123,123,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,233, 34, 34,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,234, 34,
+ 235, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,236, 34, 34,
+ 34, 34, 34, 34, 34, 34, 34,237,123,123,123,123,123,123,123,123,
+ 34, 34, 34, 34,238,123,123,123,123,123,123,123,123,123,123,123,
+ 34, 34, 34, 34, 34, 34,239,123,123,123,123,123,123,123,123,123,
+ 240,123,241,242,123,123,123,123,123,123,123,123,123,123,123,123,
+ 108,108,108,108,108,108,108,108,108,108,108,108,108,108,108,243,
+ 108,108,108,108,108,108,108,108,108,108,108,108,108,108,108,244,
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 2, 4, 5, 6, 2,
+ 7, 7, 7, 7, 7, 2, 8, 9, 10, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 17, 18, 19, 1, 20, 20, 21, 22, 23, 24, 25,
+ 26, 27, 15, 2, 28, 29, 27, 30, 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 31, 11, 11, 11, 32, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 33, 16, 16, 16, 16, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, 32, 34, 34, 34, 34, 34, 34, 34, 34, 16, 32, 32, 32,
+ 32, 32, 32, 32, 11, 34, 34, 16, 34, 32, 32, 11, 34, 11, 16, 11,
+ 11, 34, 32, 11, 32, 16, 11, 34, 32, 32, 32, 11, 34, 16, 32, 11,
+ 34, 11, 34, 34, 32, 35, 32, 16, 36, 36, 37, 34, 38, 37, 34, 34,
+ 34, 34, 34, 34, 34, 34, 16, 32, 34, 38, 32, 11, 32, 32, 32, 32,
+ 32, 32, 16, 16, 16, 11, 34, 32, 34, 34, 11, 32, 32, 32, 32, 32,
+ 16, 16, 39, 16, 16, 16, 16, 16, 40, 40, 40, 40, 40, 40, 40, 40,
+ 40, 41, 41, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 41,
+ 40, 40, 42, 41, 41, 41, 42, 42, 41, 41, 41, 41, 41, 41, 41, 41,
+ 43, 43, 43, 43, 43, 43, 43, 43, 32, 32, 42, 32, 44, 45, 16, 10,
+ 44, 44, 41, 46, 11, 47, 47, 11, 34, 11, 11, 11, 11, 11, 11, 11,
+ 11, 48, 11, 11, 11, 11, 16, 16, 16, 16, 16, 16, 16, 16, 16, 34,
+ 16, 11, 32, 16, 32, 32, 32, 32, 16, 16, 32, 49, 34, 32, 34, 11,
+ 32, 50, 43, 43, 51, 32, 32, 32, 11, 34, 34, 34, 34, 34, 34, 16,
+ 48, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 47, 52, 2, 2, 2,
+ 16, 16, 16, 16, 53, 54, 55, 56, 57, 43, 43, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 58, 59, 60, 43, 59, 44, 44, 44, 44,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 61, 44, 62,
+ 36, 63, 64, 44, 44, 44, 44, 44, 65, 65, 65, 8, 9, 66, 2, 67,
+ 43, 43, 43, 43, 43, 60, 68, 2, 69, 36, 36, 36, 36, 70, 43, 43,
+ 7, 7, 7, 7, 7, 2, 2, 36, 71, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 72, 43, 43, 43, 73, 50, 43, 43, 74, 75, 76, 43, 43, 36,
+ 7, 7, 7, 7, 7, 36, 77, 78, 2, 2, 2, 2, 2, 2, 2, 79,
+ 70, 36, 36, 36, 36, 36, 36, 36, 43, 43, 43, 43, 43, 80, 62, 36,
+ 36, 36, 36, 43, 43, 43, 43, 43, 71, 44, 44, 44, 44, 44, 44, 44,
+ 7, 7, 7, 7, 7, 36, 36, 36, 36, 36, 36, 36, 36, 70, 43, 43,
+ 43, 43, 40, 21, 2, 81, 57, 20, 36, 36, 36, 43, 43, 75, 43, 43,
+ 43, 43, 75, 43, 75, 43, 43, 44, 2, 2, 2, 2, 2, 2, 2, 64,
+ 36, 36, 36, 36, 70, 43, 44, 64, 36, 36, 36, 36, 36, 61, 44, 44,
+ 44, 44, 44, 44, 44, 44, 44, 44, 36, 36, 61, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 44, 44, 44, 44, 44, 57, 43, 43, 43, 43, 43, 43,
+ 43, 82, 43, 43, 43, 43, 43, 43, 43, 83, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 83, 71, 84, 85, 43, 43, 43, 83, 84, 85, 84,
+ 70, 43, 43, 43, 36, 36, 36, 36, 36, 43, 2, 7, 7, 7, 7, 7,
+ 86, 36, 36, 36, 36, 36, 36, 36, 70, 84, 62, 36, 36, 36, 61, 62,
+ 61, 62, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 61, 36, 36, 36,
+ 61, 61, 44, 36, 36, 44, 71, 84, 85, 43, 80, 87, 88, 87, 85, 61,
+ 44, 44, 44, 87, 44, 44, 36, 62, 36, 43, 44, 7, 7, 7, 7, 7,
+ 36, 20, 27, 27, 27, 56, 63, 80, 57, 83, 62, 36, 36, 61, 44, 62,
+ 61, 36, 62, 61, 36, 44, 80, 84, 85, 80, 44, 57, 80, 57, 43, 44,
+ 57, 44, 44, 44, 62, 36, 61, 61, 44, 44, 44, 7, 7, 7, 7, 7,
+ 43, 36, 70, 64, 44, 44, 44, 44, 57, 83, 62, 36, 36, 36, 36, 62,
+ 36, 62, 36, 36, 36, 36, 36, 36, 61, 36, 62, 36, 36, 44, 71, 84,
+ 85, 43, 43, 57, 83, 87, 85, 44, 61, 44, 44, 44, 44, 44, 44, 44,
+ 66, 44, 44, 44, 62, 43, 43, 43, 57, 84, 62, 36, 36, 36, 61, 62,
+ 61, 36, 62, 36, 36, 44, 71, 85, 85, 43, 80, 87, 88, 87, 85, 44,
+ 44, 44, 57, 83, 44, 44, 36, 62, 78, 27, 27, 27, 44, 44, 44, 44,
+ 44, 71, 62, 36, 36, 61, 44, 36, 61, 36, 36, 44, 62, 61, 61, 36,
+ 44, 62, 61, 44, 36, 61, 44, 36, 36, 36, 36, 36, 36, 44, 44, 84,
+ 83, 88, 44, 84, 88, 84, 85, 44, 61, 44, 44, 87, 44, 44, 44, 44,
+ 27, 89, 67, 67, 56, 90, 44, 44, 83, 84, 71, 36, 36, 36, 61, 36,
+ 61, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 44, 62, 43,
+ 83, 84, 88, 43, 80, 43, 43, 44, 44, 44, 57, 80, 36, 61, 44, 44,
+ 44, 44, 44, 91, 27, 27, 27, 89, 70, 84, 72, 36, 36, 36, 61, 36,
+ 36, 36, 62, 36, 36, 44, 71, 85, 84, 84, 88, 83, 88, 84, 43, 44,
+ 44, 44, 87, 88, 44, 44, 44, 61, 62, 61, 44, 44, 44, 44, 44, 44,
+ 43, 84, 36, 36, 36, 36, 61, 36, 36, 36, 36, 36, 36, 70, 71, 84,
+ 85, 43, 80, 84, 88, 84, 85, 77, 44, 44, 36, 92, 27, 27, 27, 93,
+ 27, 27, 27, 27, 89, 36, 36, 36, 57, 84, 62, 36, 36, 36, 36, 36,
+ 36, 36, 36, 61, 44, 36, 36, 36, 36, 62, 36, 36, 36, 36, 62, 44,
+ 36, 36, 36, 61, 44, 80, 44, 87, 84, 43, 80, 80, 84, 84, 84, 84,
+ 44, 84, 64, 44, 44, 44, 44, 44, 62, 36, 36, 36, 36, 36, 36, 36,
+ 70, 36, 43, 43, 43, 80, 44, 94, 36, 36, 36, 75, 43, 43, 43, 60,
+ 7, 7, 7, 7, 7, 2, 44, 44, 62, 61, 61, 36, 36, 61, 36, 36,
+ 36, 36, 62, 62, 36, 36, 36, 36, 70, 36, 43, 43, 43, 43, 71, 44,
+ 36, 36, 61, 81, 43, 43, 43, 44, 7, 7, 7, 7, 7, 44, 36, 36,
+ 77, 67, 2, 2, 2, 2, 2, 2, 2, 95, 95, 67, 43, 67, 67, 67,
+ 7, 7, 7, 7, 7, 27, 27, 27, 27, 27, 50, 50, 50, 4, 4, 84,
+ 36, 36, 36, 36, 62, 36, 36, 36, 36, 36, 36, 36, 36, 36, 61, 44,
+ 57, 43, 43, 43, 43, 43, 43, 83, 43, 43, 60, 43, 36, 36, 70, 43,
+ 43, 43, 43, 43, 57, 43, 43, 43, 43, 43, 43, 43, 43, 43, 80, 67,
+ 67, 67, 67, 76, 67, 67, 90, 67, 2, 2, 95, 67, 21, 64, 44, 44,
+ 36, 36, 36, 36, 36, 92, 85, 43, 83, 43, 43, 43, 85, 83, 85, 71,
+ 7, 7, 7, 7, 7, 2, 2, 2, 36, 36, 36, 84, 43, 36, 36, 43,
+ 71, 84, 96, 92, 84, 84, 84, 36, 70, 43, 71, 36, 36, 36, 36, 36,
+ 36, 83, 85, 83, 84, 84, 85, 92, 7, 7, 7, 7, 7, 84, 85, 67,
+ 11, 11, 11, 48, 44, 44, 48, 44, 16, 16, 16, 16, 16, 53, 45, 16,
+ 36, 36, 36, 36, 61, 36, 36, 44, 36, 36, 36, 61, 61, 36, 36, 44,
+ 61, 36, 36, 44, 36, 36, 36, 61, 61, 36, 36, 44, 36, 36, 36, 36,
+ 36, 36, 36, 61, 36, 36, 36, 36, 36, 36, 36, 36, 36, 61, 57, 43,
+ 2, 2, 2, 2, 97, 27, 27, 27, 27, 27, 27, 27, 27, 27, 98, 44,
+ 67, 67, 67, 67, 67, 44, 44, 44, 11, 11, 11, 44, 16, 16, 16, 44,
+ 99, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 77, 72,
+ 100, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,101,102, 44,
+ 36, 36, 36, 36, 36, 63, 2,103,104, 36, 36, 36, 61, 44, 44, 44,
+ 36, 36, 36, 36, 36, 36, 61, 36, 36, 43, 80, 44, 44, 44, 44, 44,
+ 36, 43, 60, 64, 44, 44, 44, 44, 36, 43, 44, 44, 44, 44, 44, 44,
+ 61, 43, 44, 44, 44, 44, 44, 44, 36, 36, 43, 85, 43, 43, 43, 84,
+ 84, 84, 84, 83, 85, 43, 43, 43, 43, 43, 2, 86, 2, 66, 70, 44,
+ 7, 7, 7, 7, 7, 44, 44, 44, 27, 27, 27, 27, 27, 44, 44, 44,
+ 2, 2, 2,105, 2, 59, 43, 68, 36,106, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 61, 44, 44, 44, 36, 36, 70, 71, 36, 36, 36, 36,
+ 36, 36, 36, 36, 70, 61, 44, 44, 36, 36, 36, 44, 44, 44, 44, 44,
+ 36, 36, 36, 36, 36, 36, 36, 61, 43, 83, 84, 85, 83, 84, 44, 44,
+ 84, 83, 84, 84, 85, 43, 44, 44, 90, 44, 2, 7, 7, 7, 7, 7,
+ 36, 36, 36, 36, 36, 36, 36, 44, 36, 36, 61, 44, 44, 44, 44, 44,
+ 36, 36, 36, 36, 36, 36, 44, 44, 36, 36, 36, 36, 36, 44, 44, 44,
+ 7, 7, 7, 7, 7, 98, 44, 67, 67, 67, 67, 67, 67, 67, 67, 67,
+ 36, 36, 36, 70, 83, 85, 44, 2, 36, 36, 92, 83, 43, 43, 43, 80,
+ 83, 83, 85, 43, 43, 43, 83, 84, 84, 85, 43, 43, 43, 43, 80, 57,
+ 2, 2, 2, 86, 2, 2, 2, 44, 43, 43, 43, 43, 43, 43, 43,107,
+ 80, 44, 44, 44, 44, 44, 44, 44, 43, 43, 96, 36, 36, 36, 36, 36,
+ 36, 36, 83, 43, 43, 83, 83, 84, 84, 83, 96, 36, 36, 36, 44, 44,
+ 95, 67, 67, 67, 67, 50, 43, 43, 43, 43, 67, 67, 67, 67, 90, 44,
+ 43, 96, 36, 36, 36, 36, 36, 36, 92, 43, 43, 84, 43, 85, 43, 36,
+ 36, 36, 36, 83, 43, 84, 85, 85, 43, 84, 44, 44, 44, 44, 2, 2,
+ 36, 36, 84, 84, 84, 84, 43, 43, 43, 43, 84, 43, 44, 91, 2, 2,
+ 7, 7, 7, 7, 7, 44, 62, 36, 36, 36, 36, 36, 40, 40, 40, 2,
+ 16, 16, 16, 16,108, 44, 44, 44, 11, 11, 11, 11, 11, 47, 48, 11,
+ 2, 2, 2, 2, 44, 44, 44, 44, 43, 60, 43, 43, 43, 43, 43, 43,
+ 83, 43, 43, 43, 71, 36, 70, 36, 36, 36, 71, 92, 43, 61, 44, 44,
+ 16, 16, 16, 16, 16, 16, 40, 40, 40, 40, 40, 40, 40, 45, 16, 16,
+ 16, 16, 16, 16, 45, 16, 16, 16, 16, 16, 16, 16, 16,109, 40, 40,
+ 43, 43, 43, 43, 43, 57, 43, 43, 32, 32, 32, 16, 16, 16, 16, 32,
+ 16, 16, 16, 16, 11, 11, 11, 11, 16, 16, 16, 44, 11, 11, 11, 44,
+ 16, 16, 16, 16, 48, 48, 48, 48, 16, 16, 16, 16, 16, 16, 16, 44,
+ 16, 16, 16, 16,110,110,110,110, 16, 16,108, 16, 11, 11,111,112,
+ 41, 16,108, 16, 11, 11,111, 41, 16, 16, 44, 16, 11, 11,113, 41,
+ 16, 16, 16, 16, 11, 11,114, 41, 44, 16,108, 16, 11, 11,111,115,
+ 116,116,116,116,116,117, 65, 65,118,118,118, 2,119,120,119,120,
+ 2, 2, 2, 2,121, 65, 65,122, 2, 2, 2, 2,123,124, 2,125,
+ 126, 2,127,128, 2, 2, 2, 2, 2, 9,126, 2, 2, 2, 2,129,
+ 65, 65, 68, 65, 65, 65, 65, 65,130, 44, 27, 27, 27, 8,127,131,
+ 27, 27, 27, 27, 27, 8,127,102, 40, 40, 40, 40, 40, 40, 81, 44,
+ 20, 20, 20, 20, 20, 20, 20, 20, 43, 43, 43, 43, 43, 43,132, 51,
+ 107, 51,107, 43, 43, 43, 43, 43, 67,133, 67,134, 67, 34, 11, 16,
+ 11, 32,134, 67, 49, 11, 11, 67, 67, 67,133,133,133, 11, 11,135,
+ 11, 11, 35, 36, 39, 67, 16, 11, 8, 8, 49, 16, 16, 26, 67,136,
+ 27, 27, 27, 27, 27, 27, 27, 27,103,103,103,103,103,103,103,103,
+ 103,137,138,103,139, 67, 44, 44, 8, 8,140, 67, 67, 8, 67, 67,
+ 140, 26, 67,140, 67, 67, 67,140, 67, 67, 67, 67, 67, 67, 67, 8,
+ 67,140,140, 67, 67, 67, 67, 67, 67, 67, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 67, 67, 67, 67, 4, 4, 67, 67,
+ 8, 67, 67, 67,141,142, 67, 67, 67, 67, 67, 67, 67, 67,140, 67,
+ 67, 67, 67, 67, 67, 26, 8, 8, 8, 8, 67, 67, 67, 67, 67, 67,
+ 67, 67, 67, 67, 67, 67, 8, 8, 8, 67, 67, 67, 67, 67, 67, 67,
+ 67, 67, 67, 90, 44, 44, 44, 44, 67, 67, 67, 67, 67, 90, 44, 44,
+ 27, 27, 27, 27, 27, 27, 67, 67, 67, 67, 67, 67, 67, 27, 27, 27,
+ 67, 67, 67, 26, 67, 67, 67, 67, 26, 67, 67, 67, 67, 67, 67, 67,
+ 67, 67, 67, 67, 8, 8, 8, 8, 67, 67, 67, 67, 67, 67, 67, 26,
+ 67, 67, 67, 67, 4, 4, 4, 4, 4, 4, 4, 27, 27, 27, 27, 27,
+ 27, 27, 67, 67, 67, 67, 67, 67, 8, 8,127,143, 8, 8, 8, 8,
+ 8, 8, 8, 4, 4, 4, 4, 4, 8,127,144,144,144,144,144,144,
+ 144,144,144,144,143, 8, 8, 8, 8, 8, 8, 8, 4, 4, 8, 8,
+ 8, 8, 8, 8, 8, 8, 4, 8, 8, 8,140, 26, 8, 8,140, 67,
+ 67, 67, 44, 67, 67, 67, 67, 67, 67, 67, 67, 55, 67, 67, 67, 67,
+ 11, 11, 11, 11, 11, 11, 11, 47, 16, 16, 16, 16, 16, 16, 16,108,
+ 32, 11, 32, 34, 34, 34, 34, 11, 32, 32, 34, 16, 16, 16, 40, 11,
+ 32, 32,136, 67, 67,134, 34,145, 43, 32, 44, 44, 91, 2, 97, 2,
+ 16, 16, 16,146, 44, 44,146, 44, 36, 36, 36, 36, 44, 44, 44, 52,
+ 64, 44, 44, 44, 44, 44, 44, 57, 36, 36, 36, 61, 44, 44, 44, 44,
+ 36, 36, 36, 61, 36, 36, 36, 61, 2,119,119, 2,123,124,119, 2,
+ 2, 2, 2, 6, 2,105,119, 2,119, 4, 4, 4, 4, 2, 2, 86,
+ 2, 2, 2, 2, 2,118, 2, 2,105,147, 2, 2, 2, 2, 2, 2,
+ 67, 64, 44, 44, 44, 44, 44, 44, 67, 67, 67, 67, 67, 55, 67, 67,
+ 67, 67, 44, 44, 44, 44, 44, 44, 67, 67, 67, 44, 44, 44, 44, 44,
+ 67, 67, 67, 67, 67, 67, 44, 44, 1, 2,148,149, 4, 4, 4, 4,
+ 4, 67, 4, 4, 4, 4,150,151,152,103,103,103,103, 43, 43, 84,
+ 153, 40, 40, 67,103,154, 63, 67, 36, 36, 36, 61, 57,155,156, 69,
+ 36, 36, 36, 36, 36, 63, 40, 69, 44, 44, 62, 36, 36, 36, 36, 36,
+ 67, 27, 27, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 90,
+ 27, 27, 27, 27, 27, 67, 67, 67, 67, 67, 67, 67, 27, 27, 27, 27,
+ 157, 27, 27, 27, 27, 27, 27, 27, 36, 36,106, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36,158, 2, 7, 7, 7, 7, 7, 36, 44, 44,
+ 32, 32, 32, 32, 32, 32, 32, 70, 51,159, 43, 43, 43, 43, 43, 86,
+ 32, 32, 32, 32, 32, 32, 40, 43, 36, 36, 36,103,103,103,103,103,
+ 43, 2, 2, 2, 44, 44, 44, 44, 41, 41, 41,156, 40, 40, 40, 40,
+ 41, 32, 32, 32, 32, 32, 32, 32, 16, 32, 32, 32, 32, 32, 32, 32,
+ 45, 16, 16, 16, 34, 34, 34, 32, 32, 32, 32, 32, 42,160, 34, 35,
+ 32, 32, 16, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 11, 11, 32,
+ 11, 11, 32, 32, 32, 32, 32, 32, 44, 32, 11, 11, 34,108, 44, 44,
+ 44, 44, 48, 35, 40, 35, 36, 36, 36, 71, 36, 71, 36, 70, 36, 36,
+ 36, 92, 85, 83, 67, 67, 80, 44, 27, 27, 27, 67,161, 44, 44, 44,
+ 36, 36, 2, 2, 44, 44, 44, 44, 84, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 84, 84, 84, 84, 84, 84, 84, 84, 43, 44, 44, 44, 44, 2,
+ 43, 36, 36, 36, 2, 72, 72, 70, 36, 36, 36, 43, 43, 43, 43, 2,
+ 36, 36, 36, 70, 43, 43, 43, 43, 43, 84, 44, 44, 44, 44, 44, 91,
+ 36, 70, 84, 43, 43, 84, 43, 84,162, 2, 2, 2, 2, 2, 2, 52,
+ 7, 7, 7, 7, 7, 44, 44, 2, 36, 36, 70, 69, 36, 36, 36, 36,
+ 7, 7, 7, 7, 7, 36, 36, 61, 36, 36, 36, 36, 70, 43, 43, 83,
+ 85, 83, 85, 80, 44, 44, 44, 44, 36, 70, 36, 36, 36, 36, 83, 44,
+ 7, 7, 7, 7, 7, 44, 2, 2, 69, 36, 36, 77, 67, 92, 83, 36,
+ 71, 43, 71, 70, 71, 36, 36, 43, 70, 61, 44, 44, 44, 44, 44, 44,
+ 44, 44, 44, 44, 44, 62,106, 2, 36, 36, 36, 36, 36, 92, 43, 84,
+ 2,106,163, 80, 44, 44, 44, 44, 62, 36, 36, 61, 62, 36, 36, 61,
+ 62, 36, 36, 61, 44, 44, 44, 44, 16, 16, 16, 16, 16,112, 40, 40,
+ 16, 16, 16, 16,109, 41, 44, 44, 36, 92, 85, 84, 83,162, 85, 44,
+ 36, 36, 44, 44, 44, 44, 44, 44, 36, 36, 36, 61, 44, 62, 36, 36,
+ 164,164,164,164,164,164,164,164,165,165,165,165,165,165,165,165,
+ 16, 16, 16,108, 44, 44, 44, 44, 44,146, 16, 16, 44, 44, 62, 71,
+ 36, 36, 36, 36,166, 36, 36, 36, 36, 36, 36, 61, 36, 36, 61, 61,
+ 36, 62, 61, 36, 36, 36, 36, 36, 36, 41, 41, 41, 41, 41, 41, 41,
+ 41, 44, 44, 44, 44, 44, 44, 44, 44, 62, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36,144, 44, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36,161, 44, 2, 2, 2,167,128, 44, 44, 44,
+ 6,168,169,144,144,144,144,144,144,144,128,167,128, 2,125,170,
+ 2, 64, 2, 2,150,144,144,128, 2,171, 8,172, 66, 2, 44, 44,
+ 36, 36, 36, 36, 36, 36, 61, 79, 91, 2, 3, 2, 4, 5, 6, 2,
+ 16, 16, 16, 16, 16, 17, 18,127,128, 4, 2, 36, 36, 36, 36, 36,
+ 69, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 40,
+ 44, 36, 36, 36, 44, 36, 36, 36, 44, 36, 36, 36, 44, 36, 61, 44,
+ 20,173, 56,174, 26, 8,140, 90, 44, 44, 44, 44, 79, 65, 67, 44,
+ 36, 36, 36, 36, 36, 36, 62, 36, 36, 36, 36, 36, 36, 61, 36, 62,
+ 2, 64, 44,175, 27, 27, 27, 27, 27, 27, 44, 55, 67, 67, 67, 67,
+ 103,103,139, 27, 89, 67, 67, 67, 67, 67, 67, 67, 67, 27, 67, 90,
+ 67, 67, 67, 67, 67, 67, 90, 44, 90, 44, 44, 44, 44, 44, 44, 44,
+ 67, 67, 67, 67, 67, 67, 50, 44,176, 27, 27, 27, 27, 27, 27, 27,
+ 27, 27, 27, 27, 27, 27, 44, 44, 27, 27, 44, 44, 44, 44, 62, 36,
+ 149, 36, 36, 36, 36,177, 44, 44, 36, 36, 36, 43, 43, 80, 44, 44,
+ 36, 36, 36, 36, 36, 36, 36, 91, 36, 36, 44, 44, 36, 36, 36, 36,
+ 178,103,103, 44, 44, 44, 44, 44, 11, 11, 11, 11, 16, 16, 16, 16,
+ 11, 11, 44, 44, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 44, 44,
+ 36, 36, 44, 44, 44, 44, 44, 91, 36, 36, 36, 44, 61, 36, 36, 36,
+ 36, 36, 36, 62, 61, 44, 61, 62, 36, 36, 36, 91, 27, 27, 27, 27,
+ 36, 36, 36, 77,157, 27, 27, 27, 44, 44, 44,175, 27, 27, 27, 27,
+ 36, 61, 36, 44, 44,175, 27, 27, 36, 36, 36, 27, 27, 27, 44, 91,
+ 36, 36, 36, 36, 36, 44, 44, 91, 36, 36, 36, 36, 44, 44, 27, 36,
+ 44, 27, 27, 27, 27, 27, 27, 27, 70, 43, 57, 80, 44, 44, 43, 43,
+ 36, 36, 62, 36, 62, 36, 36, 36, 36, 36, 36, 44, 43, 80, 44, 57,
+ 27, 27, 27, 27, 98, 44, 44, 44, 2, 2, 2, 2, 64, 44, 44, 44,
+ 36, 36, 36, 36, 36, 36,179, 30, 36, 36, 36, 36, 36, 36,179, 27,
+ 36, 36, 36, 36, 78, 36, 36, 36, 36, 36, 70, 80, 44,175, 27, 27,
+ 2, 2, 2, 64, 44, 44, 44, 44, 36, 36, 36, 44, 91, 2, 2, 2,
+ 36, 36, 36, 44, 27, 27, 27, 27, 36, 61, 44, 44, 27, 27, 27, 27,
+ 36, 44, 44, 44, 91, 2, 64, 44, 44, 44, 44, 44,175, 27, 27, 27,
+ 11, 47, 44, 44, 44, 44, 44, 44, 16,108, 44, 44, 44, 27, 27, 27,
+ 36, 36, 43, 43, 44, 44, 44, 44, 27, 27, 27, 27, 27, 27, 27, 98,
+ 36, 36, 36, 36, 36, 57,180, 44, 36, 44, 44, 44, 44, 44, 44, 44,
+ 27, 27, 27, 93, 44, 44, 44, 44,176, 27, 30, 2, 2, 44, 44, 44,
+ 36, 36,179, 27, 27, 27, 44, 44, 85, 96, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 43, 43, 43, 43, 43, 43, 43, 60, 2, 2, 2, 44,
+ 27, 27, 27, 7, 7, 7, 7, 7, 44, 44, 44, 44, 44, 44, 44, 57,
+ 84, 85, 43, 83, 85, 60,181, 2, 2, 44, 44, 44, 44, 44, 79, 44,
+ 43, 71, 36, 36, 36, 36, 36, 36, 36, 36, 36, 70, 43, 43, 85, 43,
+ 43, 43, 80, 7, 7, 7, 7, 7, 2, 2, 92, 96, 44, 44, 44, 44,
+ 36, 70, 2, 61, 44, 44, 44, 44, 36, 92, 84, 43, 43, 43, 43, 83,
+ 96, 36, 63, 2, 59, 43, 60, 85, 7, 7, 7, 7, 7, 63, 63, 2,
+ 175, 27, 27, 27, 27, 27, 27, 27, 27, 27, 98, 44, 44, 44, 44, 44,
+ 36, 36, 36, 36, 36, 36, 84, 85, 43, 84, 83, 43, 2, 2, 2, 80,
+ 36, 36, 36, 61, 61, 36, 36, 62, 36, 36, 36, 36, 36, 36, 36, 62,
+ 36, 36, 36, 36, 63, 44, 44, 44, 36, 36, 36, 36, 36, 36, 36, 70,
+ 84, 85, 43, 43, 43, 80, 44, 44, 43, 84, 62, 36, 36, 36, 61, 62,
+ 61, 36, 62, 36, 36, 57, 71, 84, 83, 84, 88, 87, 88, 87, 84, 44,
+ 61, 44, 44, 87, 44, 44, 62, 36, 36, 84, 44, 43, 43, 43, 80, 44,
+ 43, 43, 80, 44, 44, 44, 44, 44, 36, 36, 92, 84, 43, 43, 43, 43,
+ 84, 43, 83, 71, 36, 63, 2, 2, 7, 7, 7, 7, 7, 2, 91, 71,
+ 84, 85, 43, 43, 83, 83, 84, 85, 83, 43, 36, 72, 44, 44, 44, 44,
+ 36, 36, 36, 36, 36, 36, 36, 92, 84, 43, 43, 44, 84, 84, 43, 85,
+ 60, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 36, 36, 43, 44,
+ 84, 85, 43, 43, 43, 83, 85, 85, 60, 2, 61, 44, 44, 44, 44, 44,
+ 2, 2, 2, 2, 2, 2, 64, 44, 36, 36, 36, 36, 36, 70, 85, 84,
+ 43, 43, 43, 85, 61, 44, 44, 44, 84, 43, 43, 85, 43, 43, 44, 44,
+ 7, 7, 7, 7, 7, 27, 2, 95, 43, 43, 43, 43, 85, 60, 44, 44,
+ 27, 98, 44, 44, 44, 44, 44, 62, 36, 36, 36, 61, 62, 44, 36, 36,
+ 36, 36, 62, 61, 36, 36, 36, 36, 84, 84, 84, 87, 88, 57, 83, 71,
+ 96, 85, 2, 64, 44, 44, 44, 44, 36, 36, 36, 36, 44, 36, 36, 36,
+ 92, 84, 43, 43, 44, 43, 84, 84, 71, 72, 88, 44, 44, 44, 44, 44,
+ 70, 43, 43, 43, 43, 71, 36, 36, 36, 70, 43, 43, 83, 70, 43, 60,
+ 2, 2, 2, 59, 44, 44, 44, 44, 70, 43, 43, 83, 85, 43, 36, 36,
+ 36, 36, 36, 36, 36, 43, 43, 43, 43, 43, 43, 83, 43, 2, 72, 2,
+ 2, 64, 44, 44, 44, 44, 44, 44, 43, 43, 43, 80, 43, 43, 43, 85,
+ 63, 2, 2, 44, 44, 44, 44, 44, 2, 36, 36, 36, 36, 36, 36, 36,
+ 44, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 87, 43, 43, 43,
+ 83, 43, 85, 80, 44, 44, 44, 44, 36, 36, 36, 61, 36, 62, 36, 36,
+ 70, 43, 43, 80, 44, 80, 43, 57, 43, 43, 43, 70, 44, 44, 44, 44,
+ 36, 36, 36, 62, 61, 36, 36, 36, 36, 36, 36, 36, 36, 84, 84, 88,
+ 43, 87, 85, 85, 61, 44, 44, 44, 36, 70, 83,162, 64, 44, 44, 44,
+ 27, 27, 89, 67, 67, 67, 56, 20,161, 67, 67, 67, 67, 67, 67, 67,
+ 67, 44, 44, 44, 44, 44, 44, 91,103,103,103,103,103,103,103,177,
+ 2, 2, 64, 44, 44, 44, 44, 44, 65, 65, 65, 65, 68, 44, 44, 44,
+ 43, 43, 60, 44, 44, 44, 44, 44, 43, 43, 43, 60, 2, 2, 67, 67,
+ 40, 40, 95, 44, 44, 44, 44, 44, 7, 7, 7, 7, 7,175, 27, 27,
+ 27, 62, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 44, 44, 62, 36,
+ 27, 27, 27, 30, 2, 64, 44, 44, 36, 36, 36, 36, 36, 61, 44, 57,
+ 92, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84,
+ 84, 84, 84, 84, 44, 44, 44, 57, 43, 74, 40, 40, 40, 40, 40, 40,
+ 40, 86, 80, 44, 44, 44, 44, 44, 84, 44, 44, 44, 44, 44, 44, 44,
+ 36, 61, 44, 44, 44, 44, 44, 44, 44, 44, 36, 36, 44, 44, 44, 44,
+ 36, 36, 36, 36, 36, 44, 50, 60, 65, 65, 44, 44, 44, 44, 44, 44,
+ 67, 67, 67, 90, 55, 67, 67, 67, 67, 67,182, 85, 43, 67,182, 84,
+ 84,183, 65, 65, 65, 82, 43, 43, 43, 76, 50, 43, 43, 43, 67, 67,
+ 67, 67, 67, 67, 67, 43, 43, 67, 67, 67, 67, 67, 90, 44, 44, 44,
+ 67, 43, 76, 44, 44, 44, 44, 44, 27, 27, 44, 44, 44, 44, 44, 44,
+ 11, 11, 11, 11, 11, 16, 16, 16, 16, 16, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 16, 16, 16,108, 16, 16, 16, 16, 16,
+ 11, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 47, 11,
+ 44, 47, 48, 47, 48, 11, 47, 11, 11, 11, 11, 16, 16,146,146, 16,
+ 16, 16,146, 16, 16, 16, 16, 16, 16, 16, 11, 48, 11, 47, 48, 11,
+ 11, 11, 47, 11, 11, 11, 47, 16, 16, 16, 16, 16, 11, 48, 11, 47,
+ 11, 11, 47, 47, 44, 11, 11, 11, 47, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 11, 11, 11, 11, 11, 16, 16, 16, 16, 16,
+ 16, 16, 16, 44, 11, 11, 11, 11, 31, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 33, 16, 16, 16, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 31, 16, 16, 16, 16, 33, 16, 16, 16, 11, 11,
+ 11, 11, 31, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 33,
+ 16, 16, 16, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 31,
+ 16, 16, 16, 16, 33, 16, 16, 16, 11, 11, 11, 11, 31, 16, 16, 16,
+ 16, 33, 16, 16, 16, 32, 44, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 43, 43, 43, 76, 67, 50, 43, 43, 43, 43, 43, 43, 43, 43, 76, 67,
+ 67, 67, 50, 67, 67, 67, 67, 67, 67, 67, 76, 21, 2, 2, 44, 44,
+ 44, 44, 44, 44, 44, 57, 43, 43, 43, 43, 43, 80, 43, 43, 43, 43,
+ 43, 43, 43, 43, 80, 57, 43, 43, 43, 57, 80, 43, 43, 80, 44, 44,
+ 43, 43, 43, 74, 40, 40, 40, 44, 7, 7, 7, 7, 7, 44, 44, 77,
+ 36, 36, 36, 36, 36, 36, 43, 43, 7, 7, 7, 7, 7, 44, 44, 94,
+ 36, 36, 61,175, 27, 27, 27, 27, 43, 43, 43, 80, 44, 44, 44, 44,
+ 16, 16, 43, 43, 43, 74, 44, 44, 27, 27, 27, 27, 27, 27,157, 27,
+ 184, 27, 98, 44, 44, 44, 44, 44, 27, 27, 27, 27, 27, 27, 27,157,
+ 27, 27, 27, 27, 27, 27, 27, 44, 36, 36, 62, 36, 36, 36, 36, 36,
+ 62, 61, 61, 62, 62, 36, 36, 36, 36, 61, 36, 36, 62, 62, 44, 44,
+ 44, 61, 44, 62, 62, 62, 62, 36, 62, 61, 61, 62, 62, 62, 62, 62,
+ 62, 61, 61, 62, 36, 61, 36, 36, 36, 61, 36, 36, 62, 36, 61, 61,
+ 36, 36, 36, 36, 36, 62, 36, 36, 62, 36, 62, 36, 36, 62, 36, 36,
+ 8, 44, 44, 44, 44, 44, 44, 44, 55, 67, 67, 67, 67, 67, 67, 67,
+ 27, 27, 27, 27, 27, 27, 89, 67, 67, 67, 67, 67, 67, 67, 67, 44,
+ 44, 44, 44, 67, 67, 67, 67, 67, 67, 90, 44, 44, 44, 44, 44, 44,
+ 67, 44, 44, 44, 44, 44, 44, 44, 67, 67, 67, 67, 67, 25, 41, 41,
+ 67, 67, 67, 67, 44, 44, 44, 44, 67, 67, 67, 67, 90, 67, 67, 67,
+ 67, 67, 67, 67, 67, 67, 55, 67, 67, 67, 90, 44, 67, 90, 44, 44,
+ 67, 90, 67, 67, 67, 67, 67, 67, 79, 44, 44, 44, 44, 44, 44, 44,
+ 65, 65, 65, 65, 65, 65, 65, 65,165,165,165,165,165,165,165, 44,
+ 165,165,165,165,165,165,165, 0, 0, 0, 29, 21, 21, 21, 23, 21,
+ 22, 18, 21, 25, 21, 17, 13, 13, 25, 25, 25, 21, 21, 9, 9, 9,
+ 9, 22, 21, 18, 24, 16, 24, 5, 5, 5, 5, 22, 25, 18, 25, 0,
+ 23, 23, 26, 21, 24, 26, 7, 20, 25, 1, 26, 24, 26, 25, 15, 15,
+ 24, 15, 7, 19, 15, 21, 9, 25, 9, 5, 5, 25, 5, 9, 5, 7,
+ 7, 7, 9, 8, 8, 5, 7, 5, 6, 6, 24, 24, 6, 24, 12, 12,
+ 2, 2, 6, 5, 9, 21, 9, 2, 2, 9, 25, 9, 26, 12, 11, 11,
+ 2, 6, 5, 21, 17, 2, 2, 26, 26, 23, 2, 12, 17, 12, 21, 12,
+ 12, 21, 7, 2, 2, 7, 7, 21, 21, 2, 1, 1, 21, 23, 26, 26,
+ 1, 2, 6, 7, 7, 12, 12, 7, 21, 7, 12, 1, 12, 6, 6, 12,
+ 12, 26, 7, 26, 26, 7, 2, 1, 12, 2, 6, 2, 1, 12, 12, 10,
+ 10, 10, 10, 12, 21, 6, 2, 10, 10, 2, 15, 26, 26, 2, 2, 21,
+ 7, 10, 15, 7, 2, 23, 21, 26, 10, 7, 21, 15, 15, 2, 17, 7,
+ 29, 7, 7, 22, 18, 2, 14, 14, 14, 7, 17, 21, 7, 6, 11, 12,
+ 5, 2, 5, 6, 8, 8, 8, 24, 5, 24, 2, 24, 9, 24, 24, 2,
+ 29, 29, 29, 1, 17, 17, 20, 19, 22, 20, 27, 28, 1, 29, 21, 20,
+ 19, 21, 21, 16, 16, 21, 25, 22, 18, 21, 21, 29, 15, 6, 18, 6,
+ 12, 11, 9, 26, 26, 9, 26, 5, 5, 26, 14, 9, 5, 14, 14, 15,
+ 25, 26, 26, 22, 18, 26, 18, 25, 18, 22, 5, 12, 2, 5, 22, 21,
+ 26, 6, 7, 14, 17, 22, 18, 18, 26, 14, 17, 6, 14, 6, 12, 24,
+ 24, 6, 26, 15, 6, 21, 11, 21, 24, 9, 23, 26, 10, 21, 6, 10,
+ 4, 4, 3, 3, 7, 25, 21, 22, 17, 16, 16, 22, 16, 16, 25, 17,
+ 25, 2, 25, 24, 23, 2, 2, 15, 12, 15, 14, 2, 21, 14, 7, 15,
+ 12, 17, 21, 1, 26, 10, 10, 1, 23, 15, 0, 1, 2, 3, 4, 5,
+ 6, 7, 8, 9, 0, 10, 11, 12, 13, 0, 14, 0, 0, 0, 0, 0,
+ 15, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 18, 19,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 20, 0, 21, 22, 23, 0, 0, 0, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34,
+ 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 36, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 37, 38, 0, 0, 0, 0, 0, 0, 39, 40, 0, 0, 41, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 0, 0, 0, 0,
+ 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8, 0,
+ 9, 0, 10, 11, 0, 0, 12, 13, 14, 15, 16, 0, 0, 0, 0, 17,
+ 18, 19, 20, 0, 0, 0, 21, 22, 0, 23, 24, 0, 0, 23, 25, 26,
+ 0, 23, 25, 0, 0, 23, 25, 0, 0, 23, 25, 0, 0, 0, 25, 0,
+ 0, 0, 27, 0, 0, 23, 25, 0, 0, 28, 25, 0, 0, 0, 29, 0,
+ 0, 30, 31, 0, 0, 32, 33, 0, 34, 35, 0, 36, 37, 0, 38, 0,
+ 0, 39, 0, 0, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, 0, 0, 0, 0, 43, 0,
+ 0, 0, 0, 0, 0, 44, 0, 0, 0, 45, 0, 0, 0, 0, 0, 0,
+ 46, 0, 0, 47, 0, 48, 49, 0, 0, 50, 51, 52, 0, 53, 0, 54,
+ 0, 55, 0, 0, 0, 0, 56, 57, 0, 0, 0, 0, 0, 0, 58, 59,
+ 0, 0, 0, 0, 0, 0, 60, 61, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 63, 0, 0, 0, 64,
+ 0, 65, 0, 0, 66, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 67, 68, 0, 0, 69, 0, 0, 0, 0, 0, 0, 0, 0,
+ 70, 71, 0, 0, 0, 0, 51, 72, 0, 73, 74, 0, 0, 75, 76, 0,
+ 0, 0, 0, 0, 0, 77, 78, 79, 0, 0, 0, 0, 0, 0, 0, 25,
+ 0, 0, 0, 0, 0, 0, 0, 0, 80, 0, 0, 0, 0, 0, 0, 0,
+ 0, 81, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82,
+ 0, 0, 0, 0, 0, 0, 0, 49, 0, 0, 0, 83, 0, 0, 0, 0,
+ 84, 85, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 87, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 88, 0, 0, 0, 0, 89, 0, 0, 0, 0, 0,
+ 0, 0, 70, 63, 0, 90, 0, 0, 91, 92, 0, 75, 0, 0, 93, 0,
+ 0, 94, 0, 0, 0, 0, 0, 95, 0, 96, 25, 97, 0, 0, 0, 0,
+ 0, 0, 98, 0, 0, 0, 99, 0, 0, 0, 0, 0, 0, 63,100, 0,
+ 0, 63, 0, 0, 0,101, 0, 0, 0,102, 0, 0, 0, 0, 0, 0,
+ 0, 90, 0, 0, 0, 0, 0, 0, 0,103,104, 0, 0, 0, 0, 76,
+ 0, 42,105, 0,106, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 63, 0, 0, 0, 0, 0, 0, 0, 0,107, 0,108, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,109, 0,110, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,111,
+ 0, 0, 0, 0,112, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,113,114,115, 0, 0,
+ 0, 0,116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 117,118, 0, 0, 0, 0, 0, 0, 0,110, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,119, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,120, 0, 0, 0,121, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 4,
+ 5, 6, 7, 4, 4, 8, 9, 10, 1, 11, 12, 13, 14, 15, 16, 17,
+ 18, 1, 1, 1, 0, 0, 0, 0, 19, 1, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 20, 21, 22, 1, 23, 4, 21, 24, 25, 26, 27, 28,
+ 29, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 31, 0,
+ 0, 0, 32, 33, 34, 35, 1, 36, 0, 0, 0, 0, 37, 0, 0, 0,
+ 0, 0, 0, 0, 0, 38, 1, 39, 14, 39, 40, 41, 0, 0, 0, 0,
+ 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 43, 36, 44, 45,
+ 21, 45, 46, 0, 0, 0, 0, 0, 0, 0, 19, 1, 21, 0, 0, 47,
+ 0, 0, 0, 0, 0, 38, 48, 1, 1, 49, 49, 50, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 51, 0, 0, 0, 0, 0, 52, 1, 1, 1,
+ 53, 21, 43, 54, 55, 21, 35, 1, 0, 0, 0, 0, 0, 0, 0, 56,
+ 0, 0, 0, 57, 58, 59, 0, 0, 0, 0, 0, 57, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 60, 0, 0, 0, 57, 0, 61, 0, 0,
+ 0, 0, 0, 0, 0, 0, 62, 63, 0, 0, 64, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 65, 0, 0, 0, 66, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 68, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 69, 0, 0, 0, 0, 0, 0, 70, 71, 0,
+ 0, 0, 0, 0, 72, 73, 74, 75, 76, 77, 0, 0, 0, 0, 0, 0,
+ 0, 78, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 79, 80, 0,
+ 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 49,
+ 0, 0, 0, 0, 0, 63, 0, 0, 0, 0, 0, 0, 64, 0, 0, 81,
+ 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 83, 0,
+ 0, 0, 0, 0, 0, 19, 84, 0, 63, 0, 0, 0, 0, 49, 1, 85,
+ 0, 0, 0, 0, 1, 54, 15, 86, 84, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 56, 0, 0, 0, 63, 0, 0, 0, 0, 0, 0,
+ 0, 0, 19, 10, 1, 0, 0, 0, 0, 0, 87, 0, 0, 0, 0, 0,
+ 0, 88, 0, 0, 87, 0, 0, 0, 0, 0, 0, 0, 0, 79, 0, 0,
+ 0, 0, 0, 0, 89, 9, 12, 4, 90, 8, 91, 47, 0, 59, 50, 0,
+ 21, 1, 21, 92, 93, 1, 1, 1, 1, 1, 1, 1, 1, 94, 95, 96,
+ 0, 0, 0, 0, 97, 1, 98, 59, 81, 99,100, 4, 59, 0, 0, 0,
+ 0, 0, 0, 19, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62,
+ 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,101,102, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,103, 0, 0, 0, 0, 19, 0, 1, 1, 50,
+ 0, 0, 0, 0, 0, 0, 0, 38, 0, 0, 0, 0, 50, 0, 0, 0,
+ 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 0, 0, 0, 0,
+ 1, 1, 1, 1, 50, 0, 0, 0, 0, 0, 52, 69, 0, 0, 0, 0,
+ 0, 0, 0, 0, 62, 0, 0, 0, 0, 0, 0, 0, 79, 0, 0, 0,
+ 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,104,105, 59, 38,
+ 81, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,106, 1, 14, 4, 12, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 38, 89, 0,
+ 0, 0, 0,107, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,108, 62,
+ 0,109, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
+ 0, 0, 19, 59, 0, 0, 0, 0, 0,110, 14, 54, 84, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,111, 0, 89, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 62, 63, 0, 0, 63, 0, 88, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0,111, 0, 0, 0, 0,112, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 79, 56, 0, 38, 1, 59, 1, 59, 0, 0,
+ 64, 88, 0, 0, 0, 0, 0, 60,113, 0, 0, 0, 0, 0, 0, 0,
+ 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,113, 0, 0,
+ 0, 0, 62, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 80,
+ 79, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 57, 0, 88,114, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 62, 0, 0, 0, 0, 0, 0, 8, 91, 0, 0,
+ 0, 0, 0, 0, 1, 89, 0, 0, 0, 0, 0, 0,115, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,116, 0,117,118,119,120, 0, 52, 4,
+ 121, 49, 23, 0, 0, 0, 0, 0, 0, 0, 38, 50, 0, 0, 0, 0,
+ 38, 59, 0, 0, 0, 0, 0, 0, 1, 89, 1, 1, 1, 1, 39, 1,
+ 48,104, 89, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
+ 0, 0, 0, 0, 4,121, 0, 0, 0, 1,122, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0,230,230,230,230,230,232,220,220,220,220,232,216,
+ 220,220,220,220,220,202,202,220,220,220,220,202,202,220,220,220,
+ 1, 1, 1, 1, 1,220,220,220,220,230,230,230,230,240,230,220,
+ 220,220,230,230,230,220,220, 0,230,230,230,220,220,220,220,230,
+ 232,220,220,230,233,234,234,233,234,234,233,230, 0, 0, 0,230,
+ 0,220,230,230,230,230,220,230,230,230,222,220,230,230,220,220,
+ 230,222,228,230, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 19, 20,
+ 21, 22, 0, 23, 0, 24, 25, 0,230,220, 0, 18, 30, 31, 32, 0,
+ 0, 0, 0, 27, 28, 29, 30, 31, 32, 33, 34,230,230,220,220,230,
+ 220,230,230,220, 35, 0, 0, 0, 0, 0,230,230,230, 0, 0,230,
+ 230, 0,220,230,230,220, 0, 0, 0, 36, 0, 0,230,220,230,230,
+ 220,220,230,220,220,230,220,230,220,230,230, 0, 0,220, 0, 0,
+ 230,230, 0,230, 0,230,230,230,230,230, 0, 0, 0,220,220,220,
+ 0, 0, 0,220,230,230, 0,220,230,220,220,220, 27, 28, 29,230,
+ 7, 0, 0, 0, 0, 9, 0, 0, 0,230,220,230,230, 0, 0, 0,
+ 0, 0,230, 0, 0, 84, 91, 0, 0, 0, 0, 9, 9, 0, 0, 0,
+ 0, 0, 9, 0,103,103, 9, 0,107,107,107,107,118,118, 9, 0,
+ 122,122,122,122,220,220, 0, 0, 0,220, 0,220, 0,216, 0, 0,
+ 0,129,130, 0,132, 0, 0, 0, 0, 0,130,130,130,130, 0, 0,
+ 130, 0,230,230, 9, 0,230,230, 0, 0,220, 0, 0, 0, 0, 7,
+ 0, 9, 9, 0, 0,230, 0, 0, 0,228, 0, 0, 0,222,230,220,
+ 220, 0, 0, 0,230, 0, 0,220,230,220, 0,220, 0, 0, 9, 9,
+ 0, 0, 7, 0,230,230,230, 0,230, 0, 1, 1, 1, 0, 0, 0,
+ 230,234,214,220,202,230,230,230,230,230,232,228,228,220, 0,230,
+ 233,220,230,220,230,230, 1, 1, 1, 1, 1,230, 0, 1, 1,230,
+ 220,230, 1, 1, 0, 0,218,228,232,222,224,224, 0, 8, 8, 0,
+ 230, 0,230,230,220, 0, 0,230, 0, 0, 26, 0, 0,220, 0,230,
+ 230, 1,220, 0, 0,230,220, 0, 0, 0,220,220, 0, 9, 7, 0,
+ 0, 7, 9, 0, 0, 0, 9, 7, 9, 9, 0, 0, 6, 6, 0, 0,
+ 0, 0, 1, 0, 0,216,216, 1, 1, 1, 0, 0, 0,226,216,216,
+ 216,216,216, 0,220,220,220, 0,230,230, 7, 0, 16, 17, 17, 17,
+ 17, 17, 17, 33, 17, 17, 17, 19, 17, 17, 17, 17, 20,101, 17,113,
+ 129,169, 17, 27, 28, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,237, 0, 1, 2, 2,
+ 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 6, 7, 8,
+ 9, 0, 0, 0, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 21, 22, 0, 0, 0, 0,
+ 23, 24, 25, 26, 0, 27, 0, 28, 29, 30, 31, 32, 0, 0, 0, 0,
+ 0, 0, 0, 33, 34, 35, 0, 0, 0, 0, 0, 0, 36, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 37, 38, 0, 0, 0, 0, 1, 2, 39, 40,
+ 0, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
+ 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 3, 4, 0, 0, 5, 0,
+ 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 7, 1, 0, 0, 0, 0,
+ 0, 0, 8, 9, 0, 0, 0, 0, 0, 0, 10, 0, 0, 10, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 11, 12, 0, 13, 0, 14, 15, 16, 0, 0,
+ 0, 0, 0, 1, 17, 18, 0, 19, 7, 1, 0, 0, 0, 20, 20, 7,
+ 20, 20, 20, 20, 20, 20, 20, 8, 21, 0, 22, 0, 7, 23, 24, 0,
+ 20, 20, 25, 0, 0, 0, 26, 27, 1, 7, 20, 20, 20, 20, 20, 1,
+ 28, 29, 30, 31, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 10, 0,
+ 0, 0, 0, 0, 0, 0, 20, 20, 20, 1, 0, 0, 8, 21, 32, 4,
+ 0, 10, 0, 33, 7, 20, 20, 20, 0, 0, 0, 0, 8, 34, 34, 35,
+ 36, 34, 37, 0, 38, 1, 20, 20, 0, 0, 39, 0, 1, 1, 0, 8,
+ 21, 1, 20, 0, 0, 0, 1, 0, 0, 40, 1, 1, 0, 0, 8, 21,
+ 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 26, 34, 34, 34, 34, 34,
+ 34, 34, 34, 34, 21, 7, 20, 41, 34, 34, 34, 34, 34, 34, 34, 34,
+ 34, 21, 0, 42, 43, 44, 0, 45, 0, 8, 21, 0, 0, 0, 0, 0,
+ 0, 0, 0, 46, 7, 1, 10, 1, 0, 0, 0, 1, 20, 20, 1, 0,
+ 0, 0, 0, 0, 0, 0, 20, 20, 1, 20, 20, 0, 0, 0, 0, 0,
+ 0, 0, 26, 21, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 3, 47, 48, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3,
+ 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 9,
+ 10, 11, 12, 12, 12, 12, 13, 14, 14, 14, 14, 15, 16, 17, 18, 19,
+ 20, 14, 21, 14, 22, 14, 14, 14, 14, 23, 24, 24, 25, 26, 14, 14,
+ 14, 14, 27, 28, 14, 14, 29, 30, 31, 32, 33, 34, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 35, 7, 36, 37, 7, 38, 7, 7, 7, 39, 14, 40, 7, 7, 41, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 42, 0, 0, 1,
+ 2, 2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 32, 33, 34, 35, 36, 37, 37, 37, 37, 37, 38, 39, 40, 41, 42,
+ 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 2, 2, 53, 54, 55, 56,
+ 57, 58, 59, 59, 59, 59, 60, 59, 59, 59, 59, 59, 59, 59, 61, 61,
+ 59, 59, 59, 59, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73,
+ 74, 75, 76, 77, 78, 59, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
+ 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
+ 70, 70, 70, 70, 70, 70, 70, 70, 70, 79, 70, 70, 70, 70, 70, 70,
+ 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 80, 81, 81,
+ 81, 81, 81, 81, 81, 81, 81, 82, 83, 83, 84, 85, 86, 87, 88, 89,
+ 90, 91, 92, 93, 94, 95, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 96, 97, 97,
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,
+ 70, 70, 98, 99,100,101,102,102,103,104,105,106,107,108,109,110,
+ 111,112, 97,113,114,115,116,117,118, 97,119,119,120, 97,121,122,
+ 123,124,125,126,127,128,129,130,131, 97,132,133,134,135,136,137,
+ 138,139,140,141,142, 97,143,144, 97,145,146,147,148, 97,149,150,
+ 151,152,153,154, 97, 97,155,156,157,158, 97,159, 97,160,161,161,
+ 161,161,161,161,161,162,163,161,164, 97, 97, 97, 97, 97,165,165,
+ 165,165,165,165,165,165,166, 97, 97, 97, 97, 97, 97, 97, 97, 97,
+ 97, 97, 97, 97, 97, 97,167,167,167,167,168, 97, 97, 97,169,169,
+ 169,169,170,171,172,173, 97, 97, 97, 97,174,175,176,177,178,178,
+ 178,178,178,178,178,178,178,178,178,178,178,178,178,178,178,178,
+ 178,178,178,178,178,178,178,178,178,178,178,178,178,179,178,178,
+ 178,178,178,178,180,180,180,181,182, 97, 97, 97, 97, 97,183,184,
+ 185,186,186,187, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,
+ 97, 97, 97, 97, 97, 97,188,189, 97, 97, 97, 97, 97, 97, 59,190,
+ 191,192,193,194,195, 97,196,197,198, 59, 59,199, 59,200,201,201,
+ 201,201,201,202, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,203, 97,
+ 204, 97, 97,205, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,206,207,
+ 208, 97, 97, 97, 97, 97,209,210,211, 97,212,213, 97, 97,214,215,
+ 59,216,217, 97, 59, 59, 59, 59, 59, 59, 59,218,219,220,221,222,
+ 223,224,225,226, 59,227, 97, 97, 97, 97, 97, 97, 97, 97, 70, 70,
+ 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,228, 70, 70, 70, 70,
+ 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,229, 70,230, 70,
+ 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
+ 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,231, 70, 70, 70, 70,
+ 70, 70, 70, 70, 70,232, 97, 97, 97, 97, 97, 97, 97, 97, 70, 70,
+ 70, 70,233, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 70, 70,
+ 70, 70, 70, 70,234, 97, 97, 97, 97, 97, 97, 97, 97, 97,235, 97,
+ 236,237, 0, 1, 2, 2, 0, 1, 2, 2, 2, 3, 4, 5, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 0, 0, 0, 0, 0, 0, 0,
+ 19, 0, 0, 0, 0, 0, 19, 19, 19, 19, 19, 19, 19, 0, 19, 0,
+ 0, 0, 0, 0, 0, 0, 19, 19, 19, 19, 19, 0, 0, 0, 0, 0,
+ 26, 26, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9,
+ 9, 9, 0, 9, 9, 9, 2, 2, 9, 9, 9, 9, 0, 9, 2, 2,
+ 2, 2, 9, 0, 9, 0, 9, 9, 9, 2, 9, 2, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 2, 9, 9, 9, 9, 9, 9, 9,
+ 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 1, 1, 6, 2, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 2, 4, 4, 4, 2, 2, 4, 4, 4, 2, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 2, 2,
+ 2, 2, 2, 2, 2, 2, 14, 14, 14, 2, 2, 2, 2, 14, 14, 14,
+ 14, 14, 14, 2, 2, 2, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3,
+ 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 0, 3, 2, 3, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 1, 3,
+ 3, 3, 3, 3, 3, 3, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+ 37, 37, 37, 37, 2, 37, 37, 37, 37, 2, 2, 37, 37, 37, 38, 38,
+ 38, 38, 38, 38, 38, 38, 38, 38, 2, 2, 2, 2, 2, 2, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 2, 2, 64, 64, 64, 90, 90,
+ 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 2, 2, 90, 90,
+ 90, 90, 90, 90, 90, 2, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,
+ 95, 95, 2, 2, 95, 2, 37, 37, 37, 2, 2, 2, 2, 2, 3, 3,
+ 3, 3, 3, 2, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3,
+ 0, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1,
+ 1, 1, 1, 7, 7, 7, 7, 7, 7, 7, 0, 0, 7, 7, 5, 5,
+ 5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 5, 5, 2,
+ 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2,
+ 5, 5, 5, 5, 5, 5, 5, 2, 5, 2, 2, 2, 5, 5, 5, 5,
+ 2, 2, 5, 5, 5, 5, 5, 2, 2, 5, 5, 5, 5, 2, 2, 2,
+ 2, 2, 2, 2, 2, 5, 2, 2, 2, 2, 5, 5, 2, 5, 5, 5,
+ 5, 5, 2, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 11,
+ 11, 11, 2, 11, 11, 11, 11, 11, 11, 2, 2, 2, 2, 11, 11, 2,
+ 2, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 2,
+ 11, 11, 11, 11, 11, 11, 11, 2, 11, 11, 2, 11, 11, 2, 11, 11,
+ 2, 2, 11, 2, 11, 11, 11, 2, 2, 11, 11, 11, 2, 2, 2, 11,
+ 2, 2, 2, 2, 2, 2, 2, 11, 11, 11, 11, 2, 11, 2, 2, 2,
+ 2, 2, 2, 2, 11, 11, 11, 11, 11, 11, 11, 11, 11, 2, 2, 10,
+ 10, 10, 2, 10, 10, 10, 10, 10, 10, 10, 10, 10, 2, 10, 10, 10,
+ 2, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 2,
+ 10, 10, 10, 10, 10, 10, 10, 2, 10, 10, 2, 10, 10, 10, 10, 10,
+ 2, 2, 10, 10, 10, 10, 10, 10, 2, 10, 10, 10, 2, 2, 10, 2,
+ 2, 2, 2, 2, 2, 2, 10, 10, 10, 10, 2, 2, 10, 10, 10, 10,
+ 2, 2, 2, 2, 2, 2, 2, 10, 10, 10, 10, 10, 10, 10, 2, 21,
+ 21, 21, 2, 21, 21, 21, 21, 21, 21, 21, 21, 2, 2, 21, 21, 2,
+ 2, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 2,
+ 21, 21, 21, 21, 21, 21, 21, 2, 21, 21, 2, 21, 21, 21, 21, 21,
+ 2, 2, 21, 21, 21, 21, 21, 2, 2, 21, 21, 21, 2, 2, 2, 2,
+ 2, 2, 2, 21, 21, 21, 2, 2, 2, 2, 21, 21, 2, 21, 21, 21,
+ 21, 21, 2, 2, 21, 21, 2, 2, 22, 22, 2, 22, 22, 22, 22, 22,
+ 22, 2, 2, 2, 22, 22, 22, 2, 22, 22, 22, 22, 2, 2, 2, 22,
+ 22, 2, 22, 2, 22, 22, 2, 2, 2, 22, 22, 2, 2, 2, 22, 22,
+ 22, 22, 22, 22, 22, 22, 22, 22, 2, 2, 2, 2, 22, 22, 22, 2,
+ 2, 2, 2, 2, 2, 22, 2, 2, 2, 2, 2, 2, 22, 22, 22, 22,
+ 22, 2, 2, 2, 2, 2, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
+ 23, 23, 23, 2, 23, 23, 23, 2, 23, 23, 23, 23, 23, 23, 23, 23,
+ 2, 2, 2, 23, 23, 23, 23, 2, 23, 23, 23, 23, 2, 2, 2, 2,
+ 2, 2, 2, 23, 23, 2, 23, 23, 23, 2, 2, 2, 2, 2, 23, 23,
+ 23, 23, 2, 2, 23, 23, 2, 2, 2, 2, 2, 2, 2, 23, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 16, 16, 16, 2,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 16, 16, 16, 16, 16,
+ 2, 2, 16, 16, 16, 16, 16, 2, 16, 16, 16, 16, 2, 2, 2, 2,
+ 2, 2, 2, 16, 16, 2, 2, 2, 2, 2, 2, 2, 16, 2, 16, 16,
+ 16, 16, 2, 2, 16, 16, 2, 16, 16, 2, 2, 2, 2, 2, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 2, 20, 20, 20, 2,
+ 20, 20, 20, 20, 20, 20, 2, 2, 2, 2, 20, 20, 20, 20, 20, 20,
+ 20, 20, 2, 2, 20, 20, 2, 36, 36, 36, 2, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 2, 2, 2,
+ 36, 36, 36, 36, 36, 36, 36, 36, 2, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 2, 36, 2, 2, 2, 2, 36, 2, 2, 2, 2, 36, 36, 36,
+ 36, 36, 36, 2, 36, 2, 2, 2, 2, 2, 2, 2, 36, 36, 2, 2,
+ 36, 36, 36, 2, 2, 2, 2, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 2, 2, 2, 2, 0, 24, 24,
+ 24, 24, 2, 2, 2, 2, 2, 18, 18, 2, 18, 2, 18, 18, 18, 18,
+ 18, 2, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+ 18, 18, 2, 18, 2, 18, 18, 18, 18, 18, 18, 18, 2, 2, 18, 18,
+ 18, 18, 18, 2, 18, 2, 18, 18, 2, 2, 18, 18, 18, 18, 25, 25,
+ 25, 25, 25, 25, 25, 25, 2, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 25, 25, 25, 2, 2, 2, 25, 25, 25, 25, 25, 2, 25, 25, 25, 25,
+ 25, 25, 25, 0, 0, 0, 0, 25, 25, 2, 2, 2, 2, 2, 33, 33,
+ 33, 33, 33, 33, 33, 33, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 2, 8, 2, 2, 2, 2, 2, 8, 2, 2, 8, 8,
+ 8, 0, 8, 8, 8, 8, 12, 12, 12, 12, 12, 12, 12, 12, 30, 30,
+ 30, 30, 30, 30, 30, 30, 30, 2, 30, 30, 30, 30, 2, 2, 30, 30,
+ 30, 30, 30, 30, 30, 2, 30, 30, 30, 2, 2, 30, 30, 30, 30, 30,
+ 30, 30, 30, 2, 2, 2, 30, 30, 2, 2, 2, 2, 2, 2, 29, 29,
+ 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 2, 2, 28, 28,
+ 28, 28, 28, 28, 28, 28, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
+ 34, 34, 34, 2, 2, 2, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
+ 35, 0, 0, 0, 35, 35, 35, 2, 2, 2, 2, 2, 2, 2, 45, 45,
+ 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 2, 45, 45, 45, 45,
+ 45, 45, 45, 2, 2, 2, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
+ 44, 44, 44, 0, 0, 2, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+ 43, 43, 2, 2, 2, 2, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 46, 2, 46, 46, 46, 2, 46, 46, 2, 2, 2, 2, 31, 31,
+ 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 2, 2, 31, 31,
+ 2, 2, 2, 2, 2, 2, 32, 32, 0, 0, 32, 0, 32, 32, 32, 32,
+ 32, 32, 32, 32, 32, 2, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 2, 2, 2, 2, 2, 2, 32, 2, 2, 2, 2, 2, 2, 2, 32, 32,
+ 32, 2, 2, 2, 2, 2, 28, 28, 28, 28, 28, 28, 2, 2, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 2, 48, 48,
+ 48, 48, 2, 2, 2, 2, 48, 2, 2, 2, 48, 48, 48, 48, 52, 52,
+ 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 2, 2, 52, 52,
+ 52, 52, 52, 2, 2, 2, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58,
+ 58, 58, 2, 2, 2, 2, 58, 58, 2, 2, 2, 2, 2, 2, 58, 58,
+ 58, 2, 2, 2, 58, 58, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54,
+ 54, 54, 2, 2, 54, 54, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91,
+ 91, 91, 91, 91, 91, 2, 91, 91, 91, 91, 91, 2, 2, 91, 91, 91,
+ 2, 2, 2, 2, 2, 2, 91, 91, 91, 91, 91, 91, 2, 2, 1, 2,
+ 2, 2, 2, 2, 2, 2, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 2, 2, 2, 2, 62, 62, 62, 62, 62, 2, 2, 2, 76, 76,
+ 76, 76, 76, 76, 76, 76, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93,
+ 93, 93, 2, 2, 2, 2, 2, 2, 2, 2, 93, 93, 93, 93, 70, 70,
+ 70, 70, 70, 70, 70, 70, 2, 2, 2, 70, 70, 70, 70, 70, 70, 70,
+ 2, 2, 2, 70, 70, 70, 73, 73, 73, 73, 73, 73, 73, 73, 6, 2,
+ 2, 2, 2, 2, 2, 2, 8, 8, 8, 2, 2, 8, 8, 8, 1, 1,
+ 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0,
+ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,
+ 0, 2, 2, 2, 2, 2, 19, 19, 19, 19, 19, 19, 9, 9, 9, 9,
+ 9, 6, 19, 19, 19, 19, 19, 19, 19, 19, 19, 9, 9, 9, 9, 9,
+ 19, 19, 19, 19, 9, 9, 9, 9, 9, 19, 19, 19, 19, 19, 6, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 9, 1, 1,
+ 2, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 2, 2, 2, 9,
+ 2, 9, 2, 9, 2, 9, 9, 9, 9, 9, 9, 2, 9, 9, 9, 9,
+ 9, 9, 2, 2, 9, 9, 9, 9, 9, 9, 2, 9, 9, 9, 2, 2,
+ 9, 9, 9, 2, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2, 0, 0,
+ 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 19,
+ 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 0, 0,
+ 0, 0, 0, 0, 0, 2, 19, 19, 19, 19, 19, 2, 2, 2, 0, 0,
+ 0, 0, 0, 0, 9, 0, 0, 0, 19, 19, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 19, 0, 19, 0, 0, 0, 2, 2, 2, 2, 0, 0,
+ 0, 2, 2, 2, 2, 2, 27, 27, 27, 27, 27, 27, 27, 27, 0, 0,
+ 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 56, 56,
+ 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 2, 55, 55,
+ 55, 55, 2, 2, 2, 2, 2, 55, 55, 55, 55, 55, 55, 55, 61, 61,
+ 61, 61, 61, 61, 61, 61, 2, 2, 2, 2, 2, 2, 2, 61, 61, 2,
+ 2, 2, 2, 2, 2, 2, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 2, 13, 13, 13, 13, 13, 13, 13, 13, 13, 2, 2, 2, 2, 13, 13,
+ 13, 13, 13, 13, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0,
+ 0, 0, 0, 13, 0, 13, 0, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 1, 1, 1, 1, 12, 12, 13, 13, 13, 13, 0, 0, 0, 0, 2, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 2, 2, 1, 1, 0, 0, 15, 15, 15, 0, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 0, 0, 17, 17, 17, 2, 2, 2, 2, 2, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 2, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 2, 12, 12, 12, 12, 12, 12, 12, 0, 17, 17,
+ 17, 17, 17, 17, 17, 0, 13, 13, 13, 13, 13, 2, 2, 2, 39, 39,
+ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 2, 2, 2, 39, 39,
+ 39, 39, 39, 39, 39, 2, 86, 86, 86, 86, 86, 86, 86, 86, 77, 77,
+ 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 2, 2, 2, 2, 79, 79,
+ 79, 79, 79, 79, 79, 79, 0, 0, 19, 19, 19, 19, 19, 19, 0, 0,
+ 0, 19, 19, 19, 19, 19, 2, 2, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 19, 19, 19, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 2, 2, 2, 0, 0,
+ 2, 2, 2, 2, 2, 2, 65, 65, 65, 65, 65, 65, 65, 65, 75, 75,
+ 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 2, 2, 2, 2,
+ 2, 2, 2, 2, 75, 75, 75, 75, 2, 2, 2, 2, 2, 2, 69, 69,
+ 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 0, 69, 74, 74,
+ 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 74, 12, 12, 12, 12, 12, 2, 2, 2, 84, 84,
+ 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 2, 0, 84, 84,
+ 2, 2, 2, 2, 84, 84, 33, 33, 33, 33, 33, 33, 33, 2, 68, 68,
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 2, 68, 68,
+ 68, 68, 68, 68, 2, 2, 68, 68, 2, 2, 68, 68, 68, 68, 92, 92,
+ 92, 92, 92, 92, 92, 92, 92, 92, 92, 2, 2, 2, 2, 2, 2, 2,
+ 2, 92, 92, 92, 92, 92, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87,
+ 87, 87, 87, 87, 87, 2, 2, 30, 30, 30, 30, 30, 30, 2, 19, 19,
+ 19, 0, 19, 19, 19, 19, 19, 19, 19, 19, 19, 9, 19, 19, 19, 19,
+ 0, 0, 2, 2, 2, 2, 87, 87, 87, 87, 87, 87, 2, 2, 87, 87,
+ 2, 2, 2, 2, 2, 2, 12, 12, 12, 12, 2, 2, 2, 2, 2, 2,
+ 2, 12, 12, 12, 12, 12, 13, 13, 2, 2, 2, 2, 2, 2, 19, 19,
+ 19, 19, 19, 19, 19, 2, 2, 2, 2, 4, 4, 4, 4, 4, 2, 2,
+ 2, 2, 2, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 2, 14, 14,
+ 14, 14, 14, 2, 14, 2, 14, 14, 2, 14, 14, 2, 14, 14, 3, 3,
+ 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 0, 0, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 1, 1,
+ 1, 1, 1, 1, 6, 6, 0, 0, 0, 2, 0, 0, 0, 0, 3, 3,
+ 3, 3, 3, 2, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, 17, 0, 0, 2, 2,
+ 12, 12, 12, 12, 12, 12, 2, 2, 12, 12, 12, 2, 2, 2, 2, 0,
+ 0, 0, 0, 0, 2, 2, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49,
+ 49, 49, 2, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 2, 49, 49,
+ 49, 2, 49, 49, 2, 49, 49, 49, 49, 49, 49, 49, 2, 2, 49, 49,
+ 49, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0,
+ 0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 2, 2, 2, 9, 2,
+ 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 1, 2, 2, 71, 71,
+ 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 2, 2, 2, 67, 67,
+ 67, 67, 67, 67, 67, 67, 67, 2, 2, 2, 2, 2, 2, 2, 1, 0,
+ 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
+ 42, 42, 2, 2, 2, 2, 2, 2, 2, 2, 2, 42, 42, 42, 41, 41,
+ 41, 41, 41, 41, 41, 41, 41, 41, 41, 2, 2, 2, 2, 2,118,118,
+ 118,118,118,118,118,118,118,118,118, 2, 2, 2, 2, 2, 53, 53,
+ 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 2, 53, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 2, 2, 2, 2, 59, 59,
+ 59, 59, 59, 59, 2, 2, 40, 40, 40, 40, 40, 40, 40, 40, 51, 51,
+ 51, 51, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 2, 2, 50, 50, 2, 2, 2, 2, 2, 2,135,135,
+ 135,135,135,135,135,135,135,135,135,135, 2, 2, 2, 2,106,106,
+ 106,106,106,106,106,106,104,104,104,104,104,104,104,104,104,104,
+ 104,104, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,104,110,110,
+ 110,110,110,110,110,110,110,110,110,110,110,110,110, 2,110,110,
+ 110,110,110,110, 2, 2, 47, 47, 47, 47, 47, 47, 2, 2, 47, 2,
+ 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
+ 47, 47, 47, 47, 2, 47, 47, 2, 2, 2, 47, 2, 2, 47, 81, 81,
+ 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 2, 81,120,120,
+ 120,120,120,120,120,120,116,116,116,116,116,116,116,116,116,116,
+ 116,116,116,116,116, 2, 2, 2, 2, 2, 2, 2, 2,116,128,128,
+ 128,128,128,128,128,128,128,128,128, 2,128,128, 2, 2, 2, 2,
+ 2,128,128,128,128,128, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
+ 66, 66, 2, 2, 2, 66, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 2, 2, 2, 2, 2, 72, 98, 98, 98, 98, 98, 98, 98, 98, 97, 97,
+ 97, 97, 97, 97, 97, 97, 2, 2, 2, 2, 97, 97, 97, 97, 2, 2,
+ 97, 97, 97, 97, 97, 97, 57, 57, 57, 57, 2, 57, 57, 2, 2, 2,
+ 2, 2, 57, 57, 57, 57, 57, 57, 57, 57, 2, 57, 57, 57, 2, 57,
+ 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57,
+ 57, 57, 57, 57, 2, 2, 57, 57, 57, 2, 2, 2, 2, 57, 57, 2,
+ 2, 2, 2, 2, 2, 2, 88, 88, 88, 88, 88, 88, 88, 88,117,117,
+ 117,117,117,117,117,117,112,112,112,112,112,112,112,112,112,112,
+ 112,112,112,112,112, 2, 2, 2, 2,112,112,112,112,112, 78, 78,
+ 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 2, 2, 2, 78,
+ 78, 78, 78, 78, 78, 78, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83,
+ 83, 83, 83, 83, 2, 2, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82,
+ 82, 2, 2, 2, 2, 2,122,122,122,122,122,122,122,122,122,122,
+ 2, 2, 2, 2, 2, 2, 2,122,122,122,122, 2, 2, 2, 2,122,
+ 122,122,122,122,122,122, 89, 89, 89, 89, 89, 89, 89, 89, 89, 2,
+ 2, 2, 2, 2, 2, 2,130,130,130,130,130,130,130,130,130,130,
+ 130, 2, 2, 2, 2, 2, 2, 2,130,130,130,130,130,130,144,144,
+ 144,144,144,144,144,144,144,144, 2, 2, 2, 2, 2, 2, 3, 3,
+ 3, 3, 3, 3, 3, 2,156,156,156,156,156,156,156,156,156,156,
+ 2,156,156,156, 2, 2,156,156, 2, 2, 2, 2, 2, 2,147,147,
+ 147,147,147,147,147,147,148,148,148,148,148,148,148,148,148,148,
+ 2, 2, 2, 2, 2, 2,153,153,153,153,153,153,153,153,153,153,
+ 153,153, 2, 2, 2, 2,149,149,149,149,149,149,149,149,149,149,
+ 149,149,149,149,149, 2, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94,
+ 94, 94, 94, 94, 2, 2, 2, 2, 94, 94, 94, 94, 94, 94, 2, 2,
+ 2, 2, 2, 2, 2, 94, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 85, 2, 2,101,101,
+ 101,101,101,101,101,101,101, 2, 2, 2, 2, 2, 2, 2,101,101,
+ 2, 2, 2, 2, 2, 2, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
+ 96, 96, 96, 2, 96, 96,111,111,111,111,111,111,111,111,111,111,
+ 111,111,111,111,111, 2,100,100,100,100,100,100,100,100, 2, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 2, 2, 2,108,108,
+ 108,108,108,108,108,108,108,108, 2,108,108,108,108,108,108,108,
+ 108,108,108,108,108, 2,129,129,129,129,129,129,129, 2,129, 2,
+ 129,129,129,129, 2,129,129,129,129,129,129,129,129,129,129,129,
+ 129,129,129,129, 2,129,129,129, 2, 2, 2, 2, 2, 2,109,109,
+ 109,109,109,109,109,109,109,109,109, 2, 2, 2, 2, 2,109,109,
+ 2, 2, 2, 2, 2, 2,107,107,107,107, 2,107,107,107,107,107,
+ 107,107,107, 2, 2,107,107, 2, 2,107,107,107,107,107,107,107,
+ 107,107,107,107,107,107,107, 2,107,107,107,107,107,107,107, 2,
+ 107,107, 2,107,107,107,107,107, 2, 1,107,107,107,107,107, 2,
+ 2,107,107,107, 2, 2,107, 2, 2, 2, 2, 2, 2,107, 2, 2,
+ 2, 2, 2,107,107,107,107,107,107,107, 2, 2,107,107,107,107,
+ 107,107,107, 2, 2, 2,137,137,137,137,137,137,137,137,137,137,
+ 137,137, 2,137,137,137,137,137, 2, 2, 2, 2, 2, 2,124,124,
+ 124,124,124,124,124,124,124,124, 2, 2, 2, 2, 2, 2,123,123,
+ 123,123,123,123,123,123,123,123,123,123,123,123, 2, 2,114,114,
+ 114,114,114,114,114,114,114,114,114,114,114, 2, 2, 2,114,114,
+ 2, 2, 2, 2, 2, 2, 32, 32, 32, 32, 32, 2, 2, 2,102,102,
+ 102,102,102,102,102,102,102, 2, 2, 2, 2, 2, 2, 2,102,102,
+ 2, 2, 2, 2, 2, 2,126,126,126,126,126,126,126,126,126,126,
+ 126, 2, 2,126,126,126,126,126,126,126, 2, 2, 2, 2,142,142,
+ 142,142,142,142,142,142,142,142,142,142, 2, 2, 2, 2,125,125,
+ 125,125,125,125,125,125,125,125,125, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2,125,154,154,154,154,154,154,154, 2, 2,154,
+ 2, 2,154,154,154,154,154,154,154,154, 2,154,154, 2,154,154,
+ 154,154,154,154,154,154,154,154,154,154,154,154, 2,154,154, 2,
+ 2,154,154,154,154,154,154,154, 2, 2, 2, 2, 2, 2,150,150,
+ 150,150,150,150,150,150, 2, 2,150,150,150,150,150,150,150,150,
+ 150,150,150, 2, 2, 2,141,141,141,141,141,141,141,141,140,140,
+ 140,140,140,140,140,140,140,140,140, 2, 2, 2, 2, 2,121,121,
+ 121,121,121,121,121,121,121, 2, 2, 2, 2, 2, 2, 2,133,133,
+ 133,133,133,133,133,133,133, 2,133,133,133,133,133,133,133,133,
+ 133,133,133,133,133, 2,133,133,133,133,133,133, 2, 2,133,133,
+ 133,133,133, 2, 2, 2,134,134,134,134,134,134,134,134, 2, 2,
+ 134,134,134,134,134,134, 2,134,134,134,134,134,134,134,134,134,
+ 134,134,134,134,134, 2,138,138,138,138,138,138,138, 2,138,138,
+ 2,138,138,138,138,138,138,138,138,138,138,138,138,138, 2, 2,
+ 138, 2,138,138, 2,138,138,138, 2, 2, 2, 2, 2, 2,143,143,
+ 143,143,143,143, 2,143,143, 2,143,143,143,143,143,143,143,143,
+ 143,143,143,143,143,143,143,143,143,143,143,143,143, 2,143,143,
+ 2,143,143,143,143,143,143, 2, 2, 2, 2, 2, 2, 2,143,143,
+ 2, 2, 2, 2, 2, 2,145,145,145,145,145,145,145,145,145, 2,
+ 2, 2, 2, 2, 2, 2, 86, 2, 2, 2, 2, 2, 2, 2, 22, 22,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 22, 63, 63,
+ 63, 63, 63, 63, 63, 63, 63, 63, 2, 2, 2, 2, 2, 2, 63, 63,
+ 63, 63, 63, 63, 63, 2, 63, 63, 63, 63, 63, 2, 2, 2, 63, 63,
+ 63, 63, 2, 2, 2, 2, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 2, 80, 2, 2, 2, 2, 2, 2, 2,127,127,
+ 127,127,127,127,127,127,127,127,127,127,127,127,127, 2, 79, 2,
+ 2, 2, 2, 2, 2, 2,115,115,115,115,115,115,115,115,115,115,
+ 115,115,115,115,115, 2,115,115, 2, 2, 2, 2,115,115,103,103,
+ 103,103,103,103,103,103,103,103,103,103,103,103, 2, 2,119,119,
+ 119,119,119,119,119,119,119,119,119,119,119,119, 2, 2,119,119,
+ 2,119,119,119,119,119, 2, 2, 2, 2, 2,119,119,119,146,146,
+ 146,146,146,146,146,146,146,146,146, 2, 2, 2, 2, 2, 99, 99,
+ 99, 99, 99, 99, 99, 99, 99, 99, 99, 2, 2, 2, 2, 99, 2, 2,
+ 2, 2, 2, 2, 2, 99,136,139, 0, 0,155, 2, 2, 2,136,136,
+ 136,136,136,136,136,136,155,155,155,155,155,155,155,155,155,155,
+ 155,155,155,155, 2, 2,136, 2, 2, 2, 2, 2, 2, 2, 17, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 17, 17, 17, 17,139,139,139,139,139,139,139,139,139,139,
+ 139,139, 2, 2, 2, 2,105,105,105,105,105,105,105,105,105,105,
+ 105, 2, 2, 2, 2, 2,105,105,105,105,105, 2, 2, 2,105, 2,
+ 2, 2, 2, 2, 2, 2,105,105, 2, 2,105,105,105,105, 0, 0,
+ 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
+ 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 0, 2, 2, 0, 0, 2, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0,
+ 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0,
+ 0, 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0,
+ 0, 0, 0, 2, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 2,
+ 0, 0, 0, 0, 0, 0,131,131,131,131,131,131,131,131,131,131,
+ 131,131, 2, 2, 2, 2, 2, 2, 2,131,131,131,131,131, 2,131,
+ 131,131,131,131,131,131, 56, 2, 2, 56, 56, 56, 56, 56, 56, 56,
+ 2, 56, 56, 2, 56, 56, 56, 56, 56, 2, 2, 2, 2, 2,151,151,
+ 151,151,151,151,151,151,151,151,151,151,151, 2, 2, 2,151,151,
+ 151,151,151,151, 2, 2,151,151, 2, 2, 2, 2,151,151,152,152,
+ 152,152,152,152,152,152,152,152, 2, 2, 2, 2, 2,152,113,113,
+ 113,113,113,113,113,113,113,113,113,113,113, 2, 2,113,113,113,
+ 113,113,113,113,113, 2,132,132,132,132,132,132,132,132,132,132,
+ 132,132, 2, 2, 2, 2,132,132, 2, 2, 2, 2,132,132, 3, 3,
+ 3, 3, 2, 3, 3, 3, 2, 3, 3, 2, 3, 2, 2, 3, 2, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 2, 3,
+ 2, 3, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 3, 2, 3,
+ 2, 3, 2, 3, 3, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+ 3, 3, 3, 2, 3, 2, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 2, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2,
+ 2, 2, 2, 2, 0, 0, 15, 0, 0, 2, 2, 2, 2, 2, 13, 2,
+ 2, 2, 2, 2, 2, 2, 13, 13, 13, 2, 2, 2, 2, 2, 2, 0,
+ 2, 2, 2, 2, 2, 2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 9, 9, 9, 10, 9, 11, 12, 13, 9, 9, 9, 14, 9, 9, 15, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 16, 17, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 18, 19,
+ 20, 9, 21, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 22, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 23, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0, 0, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 23, 0, 0, 24, 25, 26, 27, 28,
+ 29, 30, 0, 0, 31, 32, 0, 33, 0, 34, 0, 35, 0, 0, 0, 0,
+ 36, 37, 38, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 41, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 43, 44, 0, 45, 0, 0, 0, 0, 0, 0,
+ 46, 47, 0, 0, 0, 0, 0, 48, 0, 49, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 50, 51, 0, 0, 0, 52, 0, 0,
+ 53, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0,
+ 55, 0, 0, 0, 0, 0, 0, 0, 56, 0, 0, 0, 0, 0, 0, 0,
+ 0, 57, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 58, 59, 60, 61, 62, 63, 64, 65,
+ 0, 0, 0, 0, 0, 0, 66, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 67, 68, 0, 69, 70, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82,
+ 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,
+ 99,100,101,102,103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,104, 0, 0, 0, 0, 0, 0,105,106, 0,
+ 107, 0, 0, 0,108, 0,109, 0,110, 0,111,112,113, 0,114, 0,
+ 0, 0,115, 0, 0, 0,116, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0,117, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,118,119,120,121, 0,122,123,124,
+ 125,126, 0,127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0,128,129,130,131,132,133,134,135,136,137,138,139,
+ 140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,
+ 156,157, 0, 0, 0,158,159,160,161, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,162,163, 0,
+ 0, 0, 0, 0, 0, 0,164, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,165, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,166,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,167,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,168, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,169,170, 0, 0, 0, 0,171,
+ 172, 0, 0, 0,173,174,175,176,177,178,179,180,181,182,183,184,
+ 185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,
+ 201,202,203,204,205,206, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 2, 3, 4,
+};
+static const uint16_t
+_hb_ucd_u16[9080] =
+{
+ 0, 0, 1, 2, 3, 4, 5, 6, 0, 0, 7, 8, 9, 10, 11, 12,
+ 13, 13, 13, 14, 15, 13, 13, 16, 17, 18, 19, 20, 21, 22, 13, 23,
+ 13, 13, 13, 24, 25, 11, 11, 11, 11, 26, 11, 27, 28, 29, 30, 31,
+ 32, 32, 32, 32, 32, 32, 32, 33, 34, 35, 36, 11, 37, 38, 13, 39,
+ 9, 9, 9, 11, 11, 11, 13, 13, 40, 13, 13, 13, 41, 13, 13, 13,
+ 13, 13, 13, 42, 9, 43, 11, 11, 44, 45, 32, 46, 47, 48, 49, 50,
+ 51, 52, 48, 48, 53, 32, 54, 55, 48, 48, 48, 48, 48, 56, 57, 58,
+ 59, 60, 48, 32, 61, 48, 48, 48, 48, 48, 62, 63, 64, 48, 65, 66,
+ 48, 67, 68, 69, 48, 70, 71, 72, 72, 72, 48, 73, 74, 75, 76, 32,
+ 77, 48, 48, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
+ 91, 84, 85, 92, 93, 94, 95, 96, 97, 98, 85, 99, 100, 101, 89, 102,
+ 103, 84, 85, 104, 105, 106, 89, 107, 108, 109, 110, 111, 112, 113, 95, 114,
+ 115, 116, 85, 117, 118, 119, 89, 120, 121, 116, 85, 122, 123, 124, 89, 125,
+ 126, 116, 48, 127, 128, 129, 89, 130, 131, 132, 48, 133, 134, 135, 95, 136,
+ 137, 48, 48, 138, 139, 140, 72, 72, 141, 48, 142, 143, 144, 145, 72, 72,
+ 146, 147, 148, 149, 150, 48, 151, 152, 153, 154, 32, 155, 156, 157, 72, 72,
+ 48, 48, 158, 159, 160, 161, 162, 163, 164, 165, 9, 9, 166, 11, 11, 167,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 168, 169, 48, 48,
+ 168, 48, 48, 170, 171, 172, 48, 48, 48, 171, 48, 48, 48, 173, 174, 175,
+ 48, 176, 9, 9, 9, 9, 9, 177, 178, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 179, 48, 180, 181, 48, 48, 48, 48, 182, 183,
+ 184, 185, 48, 186, 48, 187, 184, 188, 48, 48, 48, 189, 190, 191, 192, 193,
+ 194, 192, 48, 48, 195, 48, 48, 196, 197, 48, 198, 48, 48, 48, 48, 199,
+ 48, 200, 201, 202, 203, 48, 204, 205, 48, 48, 206, 48, 207, 208, 209, 209,
+ 48, 210, 48, 48, 48, 211, 212, 213, 192, 192, 214, 215, 216, 72, 72, 72,
+ 217, 48, 48, 218, 219, 160, 220, 221, 222, 48, 223, 64, 48, 48, 224, 225,
+ 48, 48, 226, 227, 228, 64, 48, 229, 230, 9, 9, 231, 232, 233, 234, 235,
+ 11, 11, 236, 27, 27, 27, 237, 238, 11, 239, 27, 27, 32, 32, 32, 240,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 241, 13, 13, 13, 13, 13, 13,
+ 242, 243, 242, 242, 243, 244, 242, 245, 246, 246, 246, 247, 248, 249, 250, 251,
+ 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 262, 72, 263, 264, 216,
+ 265, 266, 267, 268, 269, 270, 271, 271, 272, 273, 274, 209, 275, 276, 209, 277,
+ 278, 278, 278, 278, 278, 278, 278, 278, 279, 209, 280, 209, 209, 209, 209, 281,
+ 209, 282, 278, 283, 209, 284, 285, 209, 209, 209, 286, 72, 287, 72, 270, 270,
+ 270, 288, 209, 209, 209, 209, 289, 270, 209, 209, 209, 209, 209, 209, 209, 209,
+ 209, 209, 209, 290, 291, 209, 209, 292, 209, 209, 209, 209, 209, 209, 293, 209,
+ 209, 209, 209, 209, 209, 209, 294, 295, 270, 296, 209, 209, 297, 278, 298, 278,
+ 299, 300, 278, 278, 278, 301, 278, 302, 209, 209, 209, 278, 303, 209, 209, 304,
+ 209, 305, 209, 209, 209, 209, 209, 209, 9, 9, 306, 11, 11, 307, 308, 309,
+ 13, 13, 13, 13, 13, 13, 310, 311, 11, 11, 312, 48, 48, 48, 313, 314,
+ 48, 315, 316, 316, 316, 316, 32, 32, 317, 318, 319, 320, 321, 322, 72, 72,
+ 209, 323, 209, 209, 209, 209, 209, 324, 209, 209, 209, 209, 209, 325, 72, 326,
+ 327, 328, 329, 330, 137, 48, 48, 48, 48, 331, 178, 48, 48, 48, 48, 332,
+ 333, 48, 48, 137, 48, 48, 48, 48, 200, 334, 48, 48, 209, 209, 324, 48,
+ 209, 335, 336, 209, 337, 338, 209, 209, 336, 209, 209, 338, 209, 209, 209, 209,
+ 48, 48, 48, 48, 209, 209, 209, 209, 48, 48, 48, 48, 48, 48, 48, 151,
+ 48, 339, 48, 48, 48, 48, 48, 48, 151, 209, 209, 209, 286, 48, 48, 229,
+ 340, 48, 341, 72, 13, 13, 342, 343, 13, 344, 48, 48, 48, 48, 345, 346,
+ 31, 347, 348, 349, 13, 13, 13, 350, 351, 352, 353, 354, 355, 72, 72, 356,
+ 357, 48, 358, 359, 48, 48, 48, 360, 361, 48, 48, 362, 363, 192, 32, 364,
+ 64, 48, 365, 48, 366, 367, 48, 151, 77, 48, 48, 368, 369, 370, 371, 372,
+ 48, 48, 373, 374, 375, 376, 48, 377, 48, 48, 48, 378, 379, 380, 381, 382,
+ 383, 384, 316, 11, 11, 385, 386, 11, 11, 11, 11, 11, 48, 48, 387, 192,
+ 48, 48, 388, 48, 389, 48, 48, 206, 390, 390, 390, 390, 390, 390, 390, 390,
+ 391, 391, 391, 391, 391, 391, 391, 391, 48, 48, 48, 48, 48, 48, 204, 48,
+ 48, 48, 48, 48, 48, 207, 72, 72, 392, 393, 394, 395, 396, 48, 48, 48,
+ 48, 48, 48, 397, 398, 399, 48, 48, 48, 48, 48, 400, 72, 48, 48, 48,
+ 48, 401, 48, 48, 74, 72, 72, 402, 32, 403, 32, 404, 405, 406, 407, 73,
+ 48, 48, 48, 48, 48, 48, 48, 408, 409, 2, 3, 4, 5, 410, 411, 412,
+ 48, 413, 48, 200, 414, 415, 416, 417, 418, 48, 172, 419, 204, 204, 72, 72,
+ 48, 48, 48, 48, 48, 48, 48, 71, 420, 270, 270, 421, 271, 271, 271, 422,
+ 423, 424, 425, 72, 72, 209, 209, 426, 72, 72, 72, 72, 72, 72, 72, 72,
+ 48, 151, 48, 48, 48, 101, 427, 428, 48, 48, 429, 48, 430, 48, 48, 431,
+ 48, 432, 48, 48, 433, 434, 72, 72, 9, 9, 435, 11, 11, 48, 48, 48,
+ 48, 204, 192, 9, 9, 436, 11, 437, 48, 48, 74, 48, 48, 48, 438, 72,
+ 48, 48, 48, 315, 48, 199, 74, 72, 439, 48, 48, 440, 48, 441, 48, 442,
+ 48, 200, 443, 72, 72, 72, 48, 444, 48, 445, 48, 446, 72, 72, 72, 72,
+ 48, 48, 48, 447, 270, 448, 270, 270, 449, 450, 48, 451, 452, 453, 48, 454,
+ 48, 455, 72, 72, 456, 48, 457, 458, 48, 48, 48, 459, 48, 460, 48, 461,
+ 48, 462, 463, 72, 72, 72, 72, 72, 48, 48, 48, 48, 196, 72, 72, 72,
+ 9, 9, 9, 464, 11, 11, 11, 465, 48, 48, 466, 192, 72, 72, 72, 72,
+ 72, 72, 72, 72, 72, 72, 270, 467, 48, 48, 468, 469, 72, 72, 72, 72,
+ 48, 455, 470, 48, 62, 471, 72, 72, 72, 72, 72, 48, 472, 72, 48, 315,
+ 473, 48, 48, 474, 475, 448, 476, 477, 222, 48, 48, 478, 479, 48, 196, 192,
+ 480, 48, 481, 482, 483, 48, 48, 484, 222, 48, 48, 485, 486, 487, 488, 489,
+ 48, 98, 490, 491, 72, 72, 72, 72, 492, 493, 494, 48, 48, 495, 496, 192,
+ 497, 84, 85, 498, 499, 500, 501, 502, 48, 48, 48, 503, 504, 505, 469, 72,
+ 48, 48, 48, 506, 507, 192, 72, 72, 48, 48, 508, 509, 510, 511, 72, 72,
+ 48, 48, 48, 512, 513, 192, 514, 72, 48, 48, 515, 516, 192, 72, 72, 72,
+ 48, 173, 517, 518, 72, 72, 72, 72, 48, 48, 490, 519, 72, 72, 72, 72,
+ 72, 72, 9, 9, 11, 11, 148, 520, 521, 522, 48, 523, 524, 192, 72, 72,
+ 72, 72, 525, 48, 48, 526, 527, 72, 528, 48, 48, 529, 530, 531, 48, 48,
+ 532, 533, 534, 72, 48, 48, 48, 196, 85, 48, 508, 535, 536, 148, 175, 537,
+ 48, 538, 539, 540, 72, 72, 72, 72, 541, 48, 48, 542, 543, 192, 544, 48,
+ 545, 546, 192, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 48, 547,
+ 72, 72, 72, 101, 270, 548, 549, 550, 48, 207, 72, 72, 72, 72, 72, 72,
+ 271, 271, 271, 271, 271, 271, 551, 552, 48, 48, 48, 48, 388, 72, 72, 72,
+ 48, 48, 200, 553, 72, 72, 72, 72, 48, 48, 48, 48, 315, 72, 72, 72,
+ 48, 48, 48, 196, 48, 200, 370, 72, 72, 72, 72, 72, 72, 48, 204, 554,
+ 48, 48, 48, 555, 556, 557, 558, 559, 48, 72, 72, 72, 72, 72, 72, 72,
+ 72, 72, 72, 72, 9, 9, 11, 11, 270, 560, 72, 72, 72, 72, 72, 72,
+ 48, 48, 48, 48, 561, 562, 563, 563, 564, 565, 72, 72, 72, 72, 566, 567,
+ 48, 48, 48, 48, 48, 48, 48, 74, 48, 48, 48, 48, 48, 199, 72, 72,
+ 196, 72, 72, 72, 72, 72, 72, 72, 48, 200, 72, 72, 72, 568, 569, 48,
+ 48, 48, 48, 48, 48, 48, 48, 206, 48, 48, 48, 48, 48, 48, 71, 151,
+ 196, 570, 571, 72, 72, 72, 72, 72, 209, 209, 209, 209, 209, 209, 209, 325,
+ 209, 209, 572, 209, 209, 209, 573, 574, 575, 209, 576, 209, 209, 209, 577, 72,
+ 209, 209, 209, 209, 578, 72, 72, 72, 72, 72, 72, 72, 72, 72, 270, 579,
+ 209, 209, 209, 209, 209, 286, 270, 452, 9, 580, 11, 581, 582, 583, 242, 9,
+ 584, 585, 586, 587, 588, 9, 580, 11, 589, 590, 11, 591, 592, 593, 594, 9,
+ 595, 11, 9, 580, 11, 581, 582, 11, 242, 9, 584, 594, 9, 595, 11, 9,
+ 580, 11, 596, 9, 597, 598, 599, 600, 11, 601, 9, 602, 603, 604, 605, 11,
+ 606, 9, 607, 11, 608, 609, 609, 609, 32, 32, 32, 610, 32, 32, 611, 612,
+ 613, 614, 45, 72, 72, 72, 72, 72, 615, 616, 617, 72, 72, 72, 72, 72,
+ 48, 48, 151, 618, 619, 72, 72, 72, 72, 72, 72, 72, 48, 48, 620, 621,
+ 48, 48, 48, 48, 622, 623, 72, 72, 9, 9, 584, 11, 624, 370, 72, 72,
+ 72, 72, 72, 72, 72, 72, 72, 488, 270, 270, 625, 626, 72, 72, 72, 72,
+ 488, 270, 627, 628, 72, 72, 72, 72, 629, 48, 630, 631, 632, 633, 634, 635,
+ 636, 206, 637, 206, 72, 72, 72, 638, 209, 209, 326, 209, 209, 209, 209, 209,
+ 209, 324, 335, 639, 639, 639, 209, 325, 640, 209, 209, 209, 209, 209, 209, 209,
+ 209, 209, 641, 72, 72, 72, 642, 209, 643, 209, 209, 326, 577, 644, 325, 72,
+ 209, 209, 209, 209, 209, 209, 209, 645, 209, 209, 209, 209, 209, 646, 424, 424,
+ 209, 209, 209, 209, 209, 209, 209, 324, 209, 209, 209, 209, 209, 577, 326, 72,
+ 326, 209, 209, 209, 646, 176, 209, 209, 646, 209, 641, 644, 72, 72, 72, 72,
+ 209, 209, 209, 209, 209, 209, 209, 647, 209, 209, 209, 209, 648, 209, 209, 209,
+ 209, 209, 209, 209, 209, 324, 641, 649, 286, 209, 577, 286, 643, 286, 72, 72,
+ 209, 650, 209, 209, 287, 72, 72, 192, 48, 48, 48, 48, 48, 204, 72, 72,
+ 48, 48, 48, 205, 48, 48, 48, 48, 48, 204, 48, 48, 48, 48, 48, 48,
+ 48, 48, 469, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 101, 72,
+ 48, 204, 72, 72, 72, 72, 72, 72, 48, 48, 48, 48, 71, 72, 72, 72,
+ 651, 72, 652, 652, 652, 652, 652, 652, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, 32, 32, 32, 32, 72, 391, 391, 391, 391, 391, 391, 391, 653,
+ 391, 391, 391, 391, 391, 391, 391, 654, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 2, 2, 3, 1, 2, 2, 3, 0, 0, 0, 0, 0, 4, 0, 4,
+ 2, 2, 5, 2, 2, 2, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6,
+ 0, 0, 0, 0, 7, 8, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 10, 11, 12, 13, 14, 14, 15, 14, 14, 14,
+ 14, 14, 14, 14, 16, 17, 14, 14, 18, 18, 18, 18, 18, 18, 18, 18,
+ 18, 18, 18, 18, 18, 18, 18, 18, 19, 18, 18, 18, 18, 18, 18, 18,
+ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 20, 21,
+ 21, 21, 22, 20, 21, 21, 21, 21, 21, 23, 24, 25, 25, 25, 25, 25,
+ 25, 26, 25, 25, 25, 27, 28, 26, 29, 30, 31, 32, 31, 31, 31, 31,
+ 33, 34, 35, 31, 31, 31, 36, 31, 31, 31, 31, 31, 31, 31, 31, 31,
+ 31, 31, 31, 29, 31, 31, 31, 31, 37, 38, 37, 37, 37, 37, 37, 37,
+ 37, 39, 31, 31, 31, 31, 31, 31, 40, 40, 40, 40, 40, 40, 41, 26,
+ 42, 42, 42, 42, 42, 42, 42, 43, 44, 44, 44, 44, 44, 45, 44, 46,
+ 47, 47, 47, 48, 37, 49, 26, 26, 26, 26, 26, 26, 31, 31, 50, 31,
+ 31, 26, 51, 31, 52, 31, 31, 31, 53, 53, 53, 53, 53, 53, 53, 53,
+ 53, 53, 54, 53, 55, 53, 53, 53, 56, 57, 58, 59, 59, 60, 61, 62,
+ 57, 63, 64, 65, 66, 59, 59, 67, 68, 69, 70, 71, 71, 72, 73, 74,
+ 69, 75, 76, 77, 78, 71, 79, 26, 80, 81, 82, 83, 83, 84, 85, 86,
+ 81, 87, 88, 26, 89, 83, 90, 91, 92, 93, 94, 95, 95, 96, 97, 98,
+ 93, 99, 100, 101, 102, 95, 95, 26, 103, 104, 105, 106, 107, 104, 108, 109,
+ 104, 105, 110, 26, 111, 108, 108, 112, 113, 114, 115, 113, 113, 115, 113, 116,
+ 114, 117, 118, 119, 120, 113, 121, 113, 122, 123, 124, 122, 122, 124, 125, 126,
+ 123, 127, 128, 129, 130, 122, 131, 26, 132, 133, 134, 132, 132, 132, 132, 132,
+ 133, 134, 135, 132, 136, 132, 132, 132, 137, 138, 139, 140, 138, 138, 141, 142,
+ 139, 143, 144, 138, 145, 138, 146, 26, 147, 148, 148, 148, 148, 148, 148, 149,
+ 148, 148, 148, 150, 26, 26, 26, 26, 151, 152, 153, 153, 154, 153, 153, 155,
+ 156, 155, 153, 157, 26, 26, 26, 26, 158, 158, 158, 158, 158, 158, 158, 158,
+ 158, 159, 158, 158, 158, 160, 159, 158, 158, 158, 158, 159, 158, 158, 158, 161,
+ 158, 161, 162, 163, 26, 26, 26, 26, 164, 164, 164, 164, 164, 164, 164, 164,
+ 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 165, 165, 165, 165,
+ 166, 167, 165, 165, 165, 165, 165, 168, 169, 169, 169, 169, 169, 169, 169, 169,
+ 169, 169, 169, 169, 169, 169, 169, 169, 170, 170, 170, 170, 170, 170, 170, 170,
+ 170, 171, 172, 171, 170, 170, 170, 170, 170, 171, 170, 170, 170, 170, 171, 172,
+ 171, 170, 172, 170, 170, 170, 170, 170, 170, 170, 171, 170, 170, 170, 170, 170,
+ 170, 170, 170, 173, 170, 170, 170, 174, 170, 170, 170, 175, 176, 176, 176, 176,
+ 176, 176, 176, 176, 176, 176, 177, 177, 178, 178, 178, 178, 178, 178, 178, 178,
+ 178, 178, 178, 178, 178, 178, 178, 178, 179, 179, 179, 180, 181, 181, 181, 181,
+ 181, 181, 181, 181, 181, 182, 181, 183, 184, 185, 186, 26, 187, 187, 188, 26,
+ 189, 189, 190, 26, 191, 192, 193, 26, 194, 194, 194, 194, 194, 194, 194, 194,
+ 194, 194, 194, 195, 194, 196, 194, 196, 197, 198, 199, 200, 199, 199, 199, 199,
+ 199, 199, 199, 199, 199, 199, 199, 201, 199, 199, 199, 199, 199, 202, 178, 178,
+ 178, 178, 178, 178, 178, 178, 203, 26, 204, 204, 204, 205, 204, 206, 204, 206,
+ 207, 204, 208, 208, 208, 209, 210, 26, 211, 211, 211, 211, 211, 212, 211, 211,
+ 211, 213, 211, 214, 194, 194, 194, 194, 215, 215, 215, 216, 217, 217, 217, 217,
+ 217, 217, 217, 218, 217, 217, 217, 219, 217, 220, 217, 220, 217, 221, 9, 9,
+ 222, 26, 26, 26, 26, 26, 26, 26, 223, 223, 223, 223, 223, 223, 223, 223,
+ 223, 224, 223, 223, 223, 223, 223, 225, 226, 226, 226, 226, 226, 226, 226, 226,
+ 227, 227, 227, 227, 227, 227, 228, 229, 230, 230, 230, 230, 230, 230, 230, 231,
+ 230, 232, 233, 233, 233, 233, 233, 233, 18, 234, 165, 165, 165, 165, 165, 235,
+ 226, 26, 236, 9, 237, 238, 239, 240, 2, 2, 2, 2, 241, 242, 2, 2,
+ 2, 2, 2, 243, 244, 245, 2, 246, 2, 2, 2, 2, 2, 2, 2, 247,
+ 9, 9, 9, 9, 9, 9, 9, 248, 14, 14, 249, 249, 14, 14, 14, 14,
+ 249, 249, 14, 250, 14, 14, 14, 249, 14, 14, 14, 14, 14, 14, 251, 14,
+ 251, 14, 252, 253, 14, 14, 254, 255, 0, 256, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 257, 0, 258, 259, 0, 260, 2, 261, 0, 0, 0, 0,
+ 26, 26, 9, 9, 9, 9, 222, 26, 0, 0, 0, 0, 262, 263, 4, 0,
+ 0, 264, 0, 0, 2, 2, 2, 2, 2, 265, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 260, 26, 26, 26,
+ 0, 266, 26, 26, 0, 0, 0, 0, 267, 267, 267, 267, 267, 267, 267, 267,
+ 267, 267, 267, 267, 267, 267, 267, 267, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 268, 0, 0, 0, 269, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 270, 270, 270, 270, 270, 271, 270, 270,
+ 270, 270, 270, 271, 2, 2, 2, 2, 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 272, 273, 165, 165, 165, 165, 166, 167, 274, 274,
+ 274, 274, 274, 274, 274, 275, 276, 275, 170, 170, 172, 26, 172, 172, 172, 172,
+ 172, 172, 172, 172, 18, 18, 18, 18, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 266, 26, 26, 26, 26, 26, 277, 277, 277, 278, 277, 277, 277, 277,
+ 277, 277, 277, 277, 277, 277, 279, 26, 277, 277, 277, 277, 277, 277, 277, 277,
+ 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277,
+ 277, 277, 280, 26, 26, 26, 0, 281, 282, 0, 0, 0, 283, 284, 0, 285,
+ 286, 287, 287, 287, 287, 287, 287, 287, 287, 287, 288, 289, 290, 291, 291, 291,
+ 291, 291, 291, 291, 291, 291, 291, 292, 293, 294, 294, 294, 294, 294, 295, 169,
+ 169, 169, 169, 169, 169, 169, 169, 169, 169, 296, 0, 0, 294, 294, 294, 294,
+ 0, 0, 0, 0, 281, 26, 291, 291, 169, 169, 169, 296, 0, 0, 0, 0,
+ 0, 0, 0, 0, 169, 169, 169, 297, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 291, 291, 291, 291, 291, 298, 291, 291, 291, 291, 291, 291, 291, 291,
+ 291, 291, 291, 0, 0, 0, 0, 0, 277, 277, 277, 277, 277, 277, 277, 277,
+ 0, 0, 0, 0, 0, 0, 0, 0, 277, 277, 277, 277, 277, 277, 277, 277,
+ 277, 277, 277, 277, 277, 277, 277, 299, 300, 300, 300, 300, 300, 300, 300, 300,
+ 300, 300, 300, 300, 300, 300, 300, 300, 300, 301, 300, 300, 300, 300, 300, 300,
+ 302, 26, 303, 303, 303, 303, 303, 303, 304, 304, 304, 304, 304, 304, 304, 304,
+ 304, 304, 304, 304, 304, 304, 304, 304, 304, 304, 304, 304, 304, 305, 26, 26,
+ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 306, 306, 306, 306,
+ 306, 306, 306, 306, 306, 306, 306, 26, 0, 0, 0, 0, 307, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 308, 2, 2, 2, 2, 2, 2,
+ 309, 310, 26, 26, 26, 26, 311, 2, 312, 312, 312, 312, 312, 313, 0, 314,
+ 315, 315, 315, 315, 315, 315, 315, 26, 316, 316, 316, 316, 316, 316, 316, 316,
+ 317, 318, 316, 319, 53, 53, 53, 53, 320, 320, 320, 320, 320, 321, 322, 322,
+ 322, 322, 323, 324, 169, 169, 169, 325, 326, 326, 326, 326, 326, 326, 326, 326,
+ 326, 327, 326, 328, 164, 164, 164, 329, 330, 330, 330, 330, 330, 330, 331, 26,
+ 330, 332, 330, 333, 164, 164, 164, 164, 334, 334, 334, 334, 334, 334, 334, 334,
+ 335, 26, 26, 336, 337, 337, 338, 26, 339, 339, 339, 26, 172, 172, 2, 2,
+ 2, 2, 2, 340, 341, 342, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176,
+ 337, 337, 337, 337, 337, 343, 337, 344, 169, 169, 169, 169, 345, 26, 169, 169,
+ 296, 346, 169, 169, 169, 169, 169, 345, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 277, 277, 277, 277, 277, 277, 277, 277,
+ 277, 277, 277, 277, 277, 280, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277,
+ 277, 277, 277, 347, 26, 26, 26, 26, 348, 26, 349, 350, 25, 25, 351, 352,
+ 353, 25, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
+ 354, 26, 51, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
+ 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 355,
+ 26, 26, 31, 31, 31, 31, 31, 31, 31, 31, 356, 31, 31, 31, 31, 31,
+ 31, 26, 26, 26, 26, 26, 31, 357, 9, 9, 0, 314, 9, 358, 0, 0,
+ 0, 0, 359, 0, 260, 281, 50, 31, 31, 31, 31, 31, 31, 31, 31, 31,
+ 31, 31, 31, 31, 31, 31, 31, 360, 361, 0, 0, 0, 1, 2, 2, 3,
+ 1, 2, 2, 3, 362, 291, 290, 291, 291, 291, 291, 363, 169, 169, 169, 296,
+ 364, 364, 364, 365, 260, 260, 26, 366, 367, 368, 367, 367, 369, 367, 367, 370,
+ 367, 371, 367, 371, 26, 26, 26, 26, 367, 367, 367, 367, 367, 367, 367, 367,
+ 367, 367, 367, 367, 367, 367, 367, 372, 373, 0, 0, 0, 0, 0, 374, 0,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 255, 0, 375, 376, 26, 26, 26,
+ 26, 26, 0, 0, 0, 0, 0, 377, 378, 378, 378, 379, 380, 380, 380, 380,
+ 380, 380, 381, 26, 382, 0, 0, 281, 383, 383, 383, 383, 384, 385, 386, 386,
+ 386, 387, 388, 388, 388, 388, 388, 389, 390, 390, 390, 391, 392, 392, 392, 392,
+ 393, 392, 394, 26, 26, 26, 26, 26, 395, 395, 395, 395, 395, 395, 395, 395,
+ 395, 395, 396, 396, 396, 396, 396, 396, 397, 397, 397, 398, 397, 399, 400, 400,
+ 400, 400, 401, 400, 400, 400, 400, 401, 402, 402, 402, 402, 402, 26, 403, 403,
+ 403, 403, 403, 403, 404, 405, 26, 26, 406, 406, 406, 406, 406, 406, 406, 406,
+ 406, 406, 406, 406, 406, 406, 406, 406, 406, 406, 406, 406, 406, 406, 407, 26,
+ 406, 406, 408, 26, 406, 26, 26, 26, 409, 410, 411, 411, 411, 411, 412, 413,
+ 414, 414, 415, 414, 416, 416, 416, 416, 417, 417, 417, 418, 419, 417, 26, 26,
+ 26, 26, 26, 26, 420, 420, 421, 422, 423, 423, 423, 424, 425, 425, 425, 426,
+ 26, 26, 26, 26, 26, 26, 26, 26, 427, 427, 427, 427, 428, 428, 428, 429,
+ 428, 428, 430, 428, 428, 428, 428, 428, 431, 432, 433, 434, 435, 435, 436, 437,
+ 435, 438, 435, 438, 439, 439, 439, 439, 440, 440, 440, 440, 26, 26, 26, 26,
+ 441, 441, 441, 441, 442, 443, 442, 26, 444, 444, 444, 444, 444, 444, 445, 446,
+ 447, 447, 448, 447, 449, 449, 450, 449, 451, 451, 452, 453, 26, 454, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 455, 455, 455, 455, 455, 455, 455, 455,
+ 455, 456, 26, 26, 26, 26, 26, 26, 457, 457, 457, 457, 457, 457, 458, 26,
+ 457, 457, 457, 457, 457, 457, 458, 459, 460, 460, 460, 460, 460, 26, 460, 461,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 31, 31, 31, 462, 463, 463, 463, 463, 463, 464, 465, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 466, 466, 466, 466, 466, 26, 467, 467,
+ 467, 467, 467, 468, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 469, 469,
+ 469, 470, 26, 26, 471, 471, 472, 26, 473, 473, 473, 473, 473, 473, 473, 473,
+ 473, 474, 475, 473, 473, 473, 26, 476, 477, 477, 477, 477, 477, 477, 477, 477,
+ 478, 479, 480, 480, 480, 481, 480, 482, 483, 483, 483, 483, 483, 483, 484, 483,
+ 483, 26, 485, 485, 485, 485, 486, 26, 487, 487, 487, 487, 487, 487, 487, 487,
+ 487, 487, 487, 487, 488, 138, 489, 26, 490, 490, 491, 490, 490, 490, 490, 492,
+ 26, 26, 26, 26, 26, 26, 26, 26, 493, 494, 495, 496, 495, 497, 498, 498,
+ 498, 498, 498, 498, 498, 499, 498, 500, 501, 502, 503, 504, 504, 505, 506, 507,
+ 502, 508, 509, 510, 511, 512, 512, 26, 513, 513, 513, 513, 513, 513, 513, 513,
+ 513, 513, 513, 514, 515, 26, 26, 26, 516, 516, 516, 516, 516, 516, 516, 516,
+ 516, 26, 516, 517, 26, 26, 26, 26, 518, 518, 518, 518, 518, 518, 519, 518,
+ 518, 518, 518, 519, 26, 26, 26, 26, 520, 520, 520, 520, 520, 520, 520, 520,
+ 521, 26, 520, 522, 199, 523, 26, 26, 524, 524, 524, 524, 524, 524, 524, 525,
+ 524, 526, 26, 26, 26, 26, 26, 26, 527, 527, 527, 528, 527, 529, 527, 527,
+ 26, 26, 26, 26, 26, 26, 26, 26, 530, 530, 530, 530, 530, 530, 530, 531,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 532, 532, 532, 532,
+ 532, 532, 532, 532, 532, 532, 533, 534, 535, 536, 537, 538, 538, 538, 539, 540,
+ 535, 26, 538, 541, 26, 26, 26, 26, 26, 26, 26, 26, 542, 543, 542, 542,
+ 542, 542, 542, 543, 544, 26, 26, 26, 545, 545, 545, 545, 545, 545, 545, 545,
+ 545, 26, 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, 547, 26, 26, 26,
+ 548, 548, 548, 548, 548, 548, 548, 549, 550, 551, 550, 550, 550, 550, 552, 550,
+ 553, 26, 550, 550, 550, 554, 555, 555, 555, 555, 556, 555, 555, 557, 558, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 559, 560, 561, 561, 561, 561, 559, 562,
+ 561, 26, 561, 563, 564, 565, 566, 566, 566, 567, 568, 569, 566, 570, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 571, 571, 571, 572, 26, 26, 26, 26, 26, 26, 573, 26,
+ 108, 108, 108, 108, 108, 108, 574, 575, 576, 576, 576, 576, 576, 576, 576, 576,
+ 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 577, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 576, 576, 576, 576, 576, 576, 576, 576,
+ 576, 576, 576, 576, 576, 578, 579, 26, 576, 576, 576, 576, 576, 576, 576, 576,
+ 580, 26, 26, 26, 26, 26, 26, 26, 581, 581, 581, 581, 581, 581, 581, 581,
+ 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 582, 581, 583,
+ 26, 26, 26, 26, 26, 26, 26, 26, 584, 584, 584, 584, 584, 584, 584, 584,
+ 584, 584, 584, 584, 584, 584, 584, 584, 584, 584, 584, 584, 584, 584, 584, 584,
+ 585, 26, 26, 26, 26, 26, 26, 26, 306, 306, 306, 306, 306, 306, 306, 306,
+ 306, 306, 306, 306, 306, 306, 306, 306, 306, 306, 306, 306, 306, 306, 306, 586,
+ 587, 587, 587, 588, 587, 589, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 590, 590, 590, 591, 591, 26, 592, 592, 592, 592, 592, 592, 592, 592,
+ 593, 26, 592, 594, 594, 592, 592, 595, 592, 592, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 596, 596, 596, 596, 596, 596, 596, 596, 596, 596, 596, 597, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 598, 598, 598, 598, 598, 598, 598, 598,
+ 598, 599, 598, 598, 598, 598, 598, 598, 598, 600, 598, 598, 26, 26, 26, 26,
+ 26, 26, 26, 26, 601, 26, 347, 26, 602, 602, 602, 602, 602, 602, 602, 602,
+ 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602,
+ 602, 602, 602, 602, 602, 602, 602, 26, 603, 603, 603, 603, 603, 603, 603, 603,
+ 603, 603, 603, 603, 603, 603, 603, 603, 603, 603, 603, 603, 603, 603, 603, 603,
+ 603, 603, 604, 26, 26, 26, 26, 26, 602, 605, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 606, 287, 287, 287, 287, 287, 287, 287,
+ 287, 287, 287, 287, 287, 287, 287, 287, 287, 287, 287, 287, 287, 287, 287, 287,
+ 287, 287, 287, 287, 287, 287, 287, 287, 287, 287, 287, 288, 26, 26, 26, 26,
+ 26, 26, 607, 26, 608, 26, 609, 609, 609, 609, 609, 609, 609, 609, 609, 609,
+ 609, 609, 609, 609, 609, 609, 609, 609, 609, 609, 609, 609, 609, 609, 609, 609,
+ 609, 609, 609, 609, 609, 609, 609, 610, 611, 611, 611, 611, 611, 611, 611, 611,
+ 611, 611, 611, 611, 611, 612, 611, 613, 611, 614, 611, 615, 281, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 616, 26, 0, 0, 0, 0, 260, 361, 0, 0,
+ 0, 0, 0, 0, 617, 618, 0, 619, 620, 621, 0, 0, 0, 622, 0, 0,
+ 0, 0, 0, 0, 0, 623, 26, 26, 14, 14, 14, 14, 14, 14, 14, 14,
+ 249, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 0, 0, 281, 26, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 260, 26, 0, 0, 0, 623, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 257, 0, 0, 0, 0, 0, 0, 0, 0, 257, 624, 625, 0, 626,
+ 627, 0, 0, 0, 0, 0, 0, 0, 269, 628, 257, 257, 0, 0, 0, 629,
+ 630, 631, 632, 0, 0, 0, 0, 0, 0, 0, 0, 0, 616, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 268, 0, 0, 0, 0, 0, 0, 633, 633, 633, 633, 633, 633, 633, 633,
+ 633, 633, 633, 633, 633, 633, 633, 633, 633, 634, 26, 635, 636, 633, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 271, 270, 270, 637, 638, 639, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 640, 640, 640, 640, 640, 641, 640, 642,
+ 640, 643, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 644, 644, 644, 644, 644, 644, 644, 645, 646, 646, 646, 646, 646, 646, 646, 646,
+ 646, 646, 646, 646, 646, 646, 646, 646, 646, 646, 646, 646, 646, 646, 646, 646,
+ 647, 646, 648, 26, 26, 26, 26, 26, 649, 649, 649, 649, 649, 649, 649, 649,
+ 649, 650, 649, 651, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 361, 0, 0, 0, 0, 0, 0, 0, 375, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 361, 0, 0, 0, 0, 0, 0, 616,
+ 26, 26, 26, 26, 26, 26, 26, 26, 652, 31, 31, 31, 653, 654, 655, 656,
+ 657, 658, 653, 659, 653, 655, 655, 660, 31, 661, 31, 662, 663, 661, 31, 662,
+ 26, 26, 26, 26, 26, 26, 354, 26, 0, 0, 0, 0, 0, 281, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 281, 26, 0, 260, 361, 0,
+ 361, 0, 361, 0, 0, 0, 616, 26, 0, 0, 0, 0, 0, 616, 26, 26,
+ 26, 26, 26, 26, 664, 0, 0, 0, 665, 26, 0, 0, 0, 0, 0, 281,
+ 0, 623, 314, 26, 616, 26, 26, 26, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 26, 0, 375, 0, 375, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 281, 26, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 623, 0, 281, 26, 26, 0, 281, 0, 0, 0, 0, 0, 0,
+ 0, 26, 0, 314, 0, 0, 0, 0, 0, 26, 0, 0, 0, 616, 314, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 632, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 627, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 281, 26, 0, 616, 375, 266, 260, 26, 0, 0, 0, 623, 260, 26,
+ 266, 26, 260, 26, 26, 26, 26, 26, 0, 0, 359, 0, 0, 0, 0, 0,
+ 0, 266, 26, 26, 26, 26, 0, 314, 277, 277, 277, 277, 277, 277, 277, 277,
+ 277, 277, 277, 280, 26, 26, 26, 26, 277, 277, 277, 277, 277, 277, 299, 26,
+ 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 280, 277, 277, 277, 277,
+ 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 347, 26, 277, 277,
+ 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277,
+ 277, 277, 277, 277, 666, 26, 26, 26, 277, 277, 277, 280, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 277, 277, 277, 277, 277, 277, 277, 277,
+ 277, 667, 26, 26, 26, 26, 26, 26, 668, 26, 26, 26, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 939, 940, 941, 942, 946, 948, 0, 962,
+ 969, 970, 971, 976,1001,1002,1003,1008, 0,1033,1040,1041,1042,1043,1047, 0,
+ 0,1080,1081,1082,1086,1110, 0, 0,1124,1125,1126,1127,1131,1133, 0,1147,
+ 1154,1155,1156,1161,1187,1188,1189,1193, 0,1219,1226,1227,1228,1229,1233, 0,
+ 0,1267,1268,1269,1273,1298, 0,1303, 943,1128, 944,1129, 954,1139, 958,1143,
+ 959,1144, 960,1145, 961,1146, 964,1149, 0, 0, 973,1158, 974,1159, 975,1160,
+ 983,1168, 978,1163, 988,1173, 990,1175, 991,1176, 993,1178, 994,1179, 0, 0,
+ 1004,1190,1005,1191,1006,1192,1014,1199,1007, 0, 0, 0,1016,1201,1020,1206,
+ 0,1022,1208,1025,1211,1023,1209, 0, 0, 0, 0,1032,1218,1037,1223,1035,
+ 1221, 0, 0, 0,1044,1230,1045,1231,1049,1235, 0, 0,1058,1244,1064,1250,
+ 1060,1246,1066,1252,1067,1253,1072,1258,1069,1255,1077,1264,1074,1261, 0, 0,
+ 1083,1270,1084,1271,1085,1272,1088,1275,1089,1276,1096,1283,1103,1290,1111,1299,
+ 1115,1118,1307,1120,1309,1121,1310, 0,1053,1239, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,1093,1280, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 949,1134,1010,1195,1050,1236,1090,1277,1341,1368,1340,
+ 1367,1342,1369,1339,1366, 0,1320,1347,1418,1419,1323,1350, 0, 0, 992,1177,
+ 1018,1204,1055,1241,1416,1417,1415,1424,1202, 0, 0, 0, 987,1172, 0, 0,
+ 1031,1217,1321,1348,1322,1349,1338,1365, 950,1135, 951,1136, 979,1164, 980,1165,
+ 1011,1196,1012,1197,1051,1237,1052,1238,1061,1247,1062,1248,1091,1278,1092,1279,
+ 1071,1257,1076,1263, 0, 0, 997,1182, 0, 0, 0, 0, 0, 0, 945,1130,
+ 982,1167,1337,1364,1335,1362,1046,1232,1422,1423,1113,1301, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 0, 10,1425, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,1314,1427, 5,
+ 1434,1438,1443, 0,1450, 0,1455,1461,1514, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0,1446,1458,1468,1476,1480,1486,1517, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0,1489,1503,1494,1500,1508, 0, 0, 0, 0,1520,1521, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,1526,1528, 0,1525, 0, 0, 0,1522,
+ 0, 0, 0, 0,1536,1532,1539, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0,1534, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0,1556, 0, 0, 0, 0, 0, 0,1548,1550, 0,1547, 0, 0, 0,1567,
+ 0, 0, 0, 0,1558,1554,1561, 0, 0, 0, 0, 0, 0, 0,1568,1569,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,1529,1551, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,1523,1545,1524,1546, 0, 0,1527,1549,
+ 0, 0,1570,1571,1530,1552,1531,1553, 0, 0,1533,1555,1535,1557,1537,1559,
+ 0, 0,1572,1573,1544,1566,1538,1560,1540,1562,1541,1563,1542,1564, 0, 0,
+ 1543,1565, 0, 0, 0, 0, 0, 0, 0, 0,1606,1607,1609,1608,1610, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,1613, 0,1611, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1612, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0,1620, 0, 0, 0, 0, 0, 0, 0,1623, 0, 0,1624, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1614,1615,1616,1617,1618,1619,1621,1622, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0,1628,1629, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0,1625,1626, 0,1627, 0, 0, 0,1634, 0, 0,1635, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0,1630,1631,1632, 0, 0,1633, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1639, 0, 0,1638,1640, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0,1636,1637, 0, 0, 0, 0, 0, 0,1641, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0,1642,1644,1643, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1645, 0, 0, 0, 0, 0, 0, 0,1646, 0, 0, 0, 0, 0, 0,1648,
+ 1649, 0,1647,1650, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0,1651,1653,1652, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0,1654, 0,1655,1657,1656, 0, 0, 0, 0,1659, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0,1660, 0, 0, 0, 0,1661, 0, 0, 0, 0,1662,
+ 0, 0, 0, 0,1663, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0,1658, 0, 0, 0, 0, 0, 0, 0, 0, 0,1664, 0,1665,1673, 0,
+ 1674, 0, 0, 0, 0, 0, 0, 0, 0,1666, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1668, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0,1669, 0, 0, 0, 0,1670, 0, 0, 0, 0,1671,
+ 0, 0, 0, 0,1672, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0,1667, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1675, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1676, 0,
+ 1677, 0,1678, 0,1679, 0,1680, 0, 0, 0,1681, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0,1682, 0,1683, 0, 0,1684,1685, 0,1686, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 953,1138, 955,1140, 956,1141, 957,1142,
+ 1324,1351, 963,1148, 965,1150, 968,1153, 966,1151, 967,1152,1378,1380,1379,1381,
+ 984,1169, 985,1170,1420,1421, 986,1171, 989,1174, 995,1180, 998,1183, 996,1181,
+ 999,1184,1000,1185,1015,1200,1329,1356,1017,1203,1019,1205,1021,1207,1024,1210,
+ 1687,1688,1027,1213,1026,1212,1028,1214,1029,1215,1030,1216,1034,1220,1036,1222,
+ 1039,1225,1038,1224,1334,1361,1336,1363,1382,1384,1383,1385,1056,1242,1057,1243,
+ 1059,1245,1063,1249,1689,1690,1065,1251,1068,1254,1070,1256,1386,1387,1388,1389,
+ 1691,1692,1073,1259,1075,1262,1079,1266,1078,1265,1095,1282,1098,1285,1097,1284,
+ 1390,1391,1392,1393,1099,1286,1100,1287,1101,1288,1102,1289,1105,1292,1104,1291,
+ 1106,1294,1107,1295,1108,1296,1114,1302,1119,1308,1122,1311,1123,1312,1186,1260,
+ 1293,1305, 0,1394, 0, 0, 0, 0, 952,1137, 947,1132,1317,1344,1316,1343,
+ 1319,1346,1318,1345,1693,1695,1371,1375,1370,1374,1373,1377,1372,1376,1694,1696,
+ 981,1166, 977,1162, 972,1157,1326,1353,1325,1352,1328,1355,1327,1354,1697,1698,
+ 1009,1194,1013,1198,1054,1240,1048,1234,1331,1358,1330,1357,1333,1360,1332,1359,
+ 1699,1700,1396,1401,1395,1400,1398,1403,1397,1402,1399,1404,1094,1281,1087,1274,
+ 1406,1411,1405,1410,1408,1413,1407,1412,1409,1414,1109,1297,1117,1306,1116,1304,
+ 1112,1300, 0, 0, 0, 0, 0, 0,1471,1472,1701,1705,1702,1706,1703,1707,
+ 1430,1431,1715,1719,1716,1720,1717,1721,1477,1478,1729,1731,1730,1732, 0, 0,
+ 1435,1436,1733,1735,1734,1736, 0, 0,1481,1482,1737,1741,1738,1742,1739,1743,
+ 1439,1440,1751,1755,1752,1756,1753,1757,1490,1491,1765,1768,1766,1769,1767,1770,
+ 1447,1448,1771,1774,1772,1775,1773,1776,1495,1496,1777,1779,1778,1780, 0, 0,
+ 1451,1452,1781,1783,1782,1784, 0, 0,1504,1505,1785,1788,1786,1789,1787,1790,
+ 0,1459, 0,1791, 0,1792, 0,1793,1509,1510,1794,1798,1795,1799,1796,1800,
+ 1462,1463,1808,1812,1809,1813,1810,1814,1467, 21,1475, 22,1479, 23,1485, 24,
+ 1493, 27,1499, 28,1507, 29, 0, 0,1704,1708,1709,1710,1711,1712,1713,1714,
+ 1718,1722,1723,1724,1725,1726,1727,1728,1740,1744,1745,1746,1747,1748,1749,1750,
+ 1754,1758,1759,1760,1761,1762,1763,1764,1797,1801,1802,1803,1804,1805,1806,1807,
+ 1811,1815,1816,1817,1818,1819,1820,1821,1470,1469,1822,1474,1465, 0,1473,1825,
+ 1429,1428,1426, 12,1432, 0, 26, 0, 0,1315,1823,1484,1466, 0,1483,1829,
+ 1433, 13,1437, 14,1441,1826,1827,1828,1488,1487,1513, 19, 0, 0,1492,1515,
+ 1445,1444,1442, 15, 0,1831,1832,1833,1502,1501,1516, 25,1497,1498,1506,1518,
+ 1457,1456,1454, 17,1453,1313, 11, 3, 0, 0,1824,1512,1519, 0,1511,1830,
+ 1449, 16,1460, 18,1464, 4, 0, 0, 30, 31, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 0,
+ 0, 0, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0,1834,1835, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,1836, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0,1837,1839,1838, 0, 0, 0, 0,1840, 0, 0, 0,
+ 0,1841, 0, 0,1842, 0, 0, 0, 0, 0, 0, 0,1843, 0,1844, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,1845, 0, 0,1846, 0, 0,1847,
+ 0,1848, 0, 0, 0, 0, 0, 0, 937, 0,1850, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0,1849, 936, 938,1851,1852, 0, 0,1853,1854, 0, 0,
+ 1855,1856, 0, 0, 0, 0, 0, 0,1857,1858, 0, 0,1861,1862, 0, 0,
+ 1863,1864, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0,1867,1868,1869,1870,1859,1860,1865,1866, 0, 0, 0, 0,
+ 0, 0,1871,1872,1873,1874, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 32, 33, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0,1875, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0,1877, 0,1878, 0,1879, 0,1880, 0,1881, 0,1882, 0,
+ 1883, 0,1884, 0,1885, 0,1886, 0,1887, 0,1888, 0, 0,1889, 0,1890,
+ 0,1891, 0, 0, 0, 0, 0, 0,1892,1893, 0,1894,1895, 0,1896,1897,
+ 0,1898,1899, 0,1900,1901, 0, 0, 0, 0, 0, 0,1876, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,1902, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0,1904, 0,1905, 0,1906, 0,1907, 0,1908, 0,1909, 0,
+ 1910, 0,1911, 0,1912, 0,1913, 0,1914, 0,1915, 0, 0,1916, 0,1917,
+ 0,1918, 0, 0, 0, 0, 0, 0,1919,1920, 0,1921,1922, 0,1923,1924,
+ 0,1925,1926, 0,1927,1928, 0, 0, 0, 0, 0, 0,1903, 0, 0,1929,
+ 1930,1931,1932, 0, 0, 0,1933, 0, 710, 385, 724, 715, 455, 103, 186, 825,
+ 825, 242, 751, 205, 241, 336, 524, 601, 663, 676, 688, 738, 411, 434, 474, 500,
+ 649, 746, 799, 108, 180, 416, 482, 662, 810, 275, 462, 658, 692, 344, 618, 679,
+ 293, 388, 440, 492, 740, 116, 146, 168, 368, 414, 481, 527, 606, 660, 665, 722,
+ 781, 803, 809, 538, 553, 588, 642, 758, 811, 701, 233, 299, 573, 612, 487, 540,
+ 714, 779, 232, 267, 412, 445, 457, 585, 594, 766, 167, 613, 149, 148, 560, 589,
+ 648, 768, 708, 345, 411, 704, 105, 259, 313, 496, 518, 174, 542, 120, 307, 101,
+ 430, 372, 584, 183, 228, 529, 650, 697, 424, 732, 428, 349, 632, 355, 517, 110,
+ 135, 147, 403, 580, 624, 700, 750, 170, 193, 245, 297, 374, 463, 543, 763, 801,
+ 812, 815, 162, 384, 420, 730, 287, 330, 337, 366, 459, 476, 509, 558, 591, 610,
+ 726, 652, 734, 759, 154, 163, 198, 473, 683, 697, 292, 311, 353, 423, 572, 494,
+ 113, 217, 259, 280, 314, 499, 506, 603, 608, 752, 778, 782, 788, 117, 557, 748,
+ 774, 320, 109, 126, 260, 265, 373, 411, 479, 523, 655, 737, 823, 380, 765, 161,
+ 395, 398, 438, 451, 502, 516, 537, 583, 791, 136, 340, 769, 122, 273, 446, 727,
+ 305, 322, 400, 496, 771, 155, 190, 269, 377, 391, 406, 432, 501, 519, 599, 684,
+ 687, 749, 776, 175, 452, 191, 480, 510, 659, 772, 805, 813, 397, 444, 619, 566,
+ 568, 575, 491, 471, 707, 111, 636, 156, 153, 288, 346, 578, 256, 435, 383, 729,
+ 680, 767, 694, 295, 128, 210, 0, 0, 227, 0, 379, 0, 0, 150, 493, 525,
+ 544, 551, 552, 556, 783, 576, 604, 0, 661, 0, 703, 0, 0, 735, 743, 0,
+ 0, 0, 793, 794, 795, 808, 741, 773, 118, 127, 130, 166, 169, 177, 207, 213,
+ 215, 226, 229, 268, 270, 317, 327, 329, 335, 369, 375, 381, 404, 441, 448, 458,
+ 477, 484, 503, 539, 545, 547, 546, 548, 549, 550, 554, 555, 561, 564, 569, 591,
+ 593, 595, 598, 607, 620, 625, 625, 651, 690, 695, 705, 706, 716, 717, 733, 735,
+ 777, 786, 790, 315, 869, 623, 0, 0, 102, 145, 134, 115, 129, 138, 165, 171,
+ 207, 202, 206, 212, 227, 231, 240, 243, 250, 254, 294, 296, 303, 308, 319, 325,
+ 321, 329, 326, 335, 341, 357, 360, 362, 370, 379, 388, 389, 393, 421, 424, 438,
+ 456, 454, 458, 465, 477, 535, 485, 490, 493, 507, 512, 514, 521, 522, 525, 526,
+ 528, 533, 532, 541, 565, 569, 574, 586, 591, 597, 607, 637, 647, 674, 691, 693,
+ 695, 698, 703, 699, 705, 704, 702, 706, 709, 717, 728, 736, 747, 754, 770, 777,
+ 783, 784, 786, 787, 790, 802, 825, 848, 847, 857, 55, 65, 66, 883, 892, 916,
+ 822, 824, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0,1586, 0,1605, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0,1602,1603,1934,1935,1574,1575,1576,1577,1579,1580,1581,1583,1584, 0,
+ 1585,1587,1588,1589,1591, 0,1592, 0,1593,1594, 0,1595,1596, 0,1598,1599,
+ 1600,1601,1604,1582,1578,1590,1597, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0,1936, 0,1937, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0,1938, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,1939,1940, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0,1941,1942, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0,1944,1943, 0,1945, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0,1946,1947, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1948, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,1949,1950,1951,1952,1953,1954,1955, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0,1956,1957,1958,1960,1959,1961, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 106, 104, 107, 826, 114, 118, 119, 121,
+ 123, 124, 127, 125, 34, 830, 130, 131, 132, 137, 827, 35, 133, 139, 829, 142,
+ 143, 112, 144, 145, 924, 151, 152, 37, 157, 158, 159, 160, 38, 165, 166, 169,
+ 171, 172, 173, 174, 176, 177, 178, 179, 181, 182, 182, 182, 833, 468, 184, 185,
+ 834, 187, 188, 189, 196, 192, 194, 195, 197, 199, 200, 201, 203, 204, 204, 206,
+ 208, 209, 211, 218, 213, 219, 214, 216, 153, 234, 221, 222, 223, 220, 225, 224,
+ 230, 835, 235, 236, 237, 238, 239, 244, 836, 837, 247, 248, 249, 246, 251, 39,
+ 40, 253, 255, 255, 838, 257, 258, 259, 261, 839, 262, 263, 301, 264, 41, 266,
+ 270, 272, 271, 841, 274, 842, 277, 276, 278, 281, 282, 42, 283, 284, 285, 286,
+ 43, 843, 44, 289, 290, 291, 293, 934, 298, 845, 845, 621, 300, 300, 45, 852,
+ 894, 302, 304, 46, 306, 309, 310, 312, 316, 48, 47, 317, 846, 318, 323, 324,
+ 325, 324, 328, 329, 333, 331, 332, 334, 335, 336, 338, 339, 342, 343, 347, 351,
+ 849, 350, 348, 352, 354, 359, 850, 361, 358, 356, 49, 363, 365, 367, 364, 50,
+ 369, 371, 851, 376, 386, 378, 53, 381, 52, 51, 140, 141, 387, 382, 614, 78,
+ 388, 389, 390, 394, 392, 856, 54, 399, 396, 402, 404, 858, 405, 401, 407, 55,
+ 408, 409, 410, 413, 859, 415, 56, 417, 860, 418, 57, 419, 422, 424, 425, 861,
+ 840, 862, 426, 863, 429, 431, 427, 433, 437, 441, 438, 439, 442, 443, 864, 436,
+ 449, 450, 58, 454, 453, 865, 447, 460, 866, 867, 461, 466, 465, 464, 59, 467,
+ 470, 469, 472, 828, 475, 868, 478, 870, 483, 485, 486, 871, 488, 489, 872, 873,
+ 495, 497, 60, 498, 61, 61, 504, 505, 507, 508, 511, 62, 513, 874, 515, 875,
+ 518, 844, 520, 876, 877, 878, 63, 64, 528, 880, 879, 881, 882, 530, 531, 531,
+ 533, 66, 534, 67, 68, 884, 536, 538, 541, 69, 885, 549, 886, 887, 556, 559,
+ 70, 561, 562, 563, 888, 889, 889, 567, 71, 890, 570, 571, 72, 891, 577, 73,
+ 581, 579, 582, 893, 587, 74, 590, 592, 596, 75, 895, 896, 76, 897, 600, 898,
+ 602, 605, 607, 899, 900, 609, 901, 611, 853, 77, 615, 616, 79, 617, 252, 902,
+ 903, 854, 855, 621, 622, 731, 80, 627, 626, 628, 164, 629, 630, 631, 633, 904,
+ 632, 634, 639, 640, 635, 641, 646, 651, 638, 643, 644, 645, 905, 907, 906, 81,
+ 653, 654, 656, 911, 657, 908, 82, 83, 909, 910, 84, 664, 665, 666, 667, 669,
+ 668, 671, 670, 674, 672, 673, 675, 85, 677, 678, 86, 681, 682, 912, 685, 686,
+ 87, 689, 36, 913, 914, 88, 89, 696, 702, 709, 711, 915, 712, 713, 718, 719,
+ 917, 831, 721, 720, 723, 832, 725, 728, 918, 919, 739, 742, 744, 920, 745, 753,
+ 756, 757, 755, 760, 761, 921, 762, 90, 764, 922, 91, 775, 279, 780, 923, 925,
+ 92, 93, 785, 926, 94, 927, 787, 787, 789, 928, 792, 95, 796, 797, 798, 800,
+ 96, 929, 802, 804, 806, 97, 98, 807, 930, 99, 931, 932, 933, 814, 100, 816,
+ 817, 818, 819, 820, 821, 935, 0, 0,
+};
+static const int16_t
+_hb_ucd_i16[196] =
+{
+ 0, 0, 0, 0, 1, -1, 0, 0, 2, 0, -2, 0, 0, 0, 0, 2,
+ 0, -2, 0, 0, 0, 0, 0, 16, 0, 0, 0, -16, 0, 0, 1, -1,
+ 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, -1, 0, 3, 3, 3, -3,
+ -3, -3, 0, 0, 0, 2016, 0, 0, 0, 0, 0, 2527, 1923, 1914, 1918, 0,
+ 2250, 0, 0, 0, 0, 0, 0, 138, 0, 7, 0, 0, -7, 0, 0, 0,
+ 1, -1, 1, -1, -1, 1, -1, 0, 1824, 0, 0, 0, 0, 0, 2104, 0,
+ 2108, 2106, 0, 2106, 1316, 0, 0, 0, 0, 1, -1, 1, -1, -138, 0, 0,
+ 1, -1, 8, 8, 8, 0, 7, 7, 0, 0, -8, -8, -8, -7, -7, 0,
+ 1, -1, 0, 2,-1316, 1, -1, 0, -1, 1, -1, 1, -1, 3, 1, -1,
+ -3, 1, -1, 1, -1, 0, 0,-1914,-1918, 0, 0,-1923,-1824, 0, 0, 0,
+ 0,-2016, 0, 0, 1, -1, 0, 1, 0, 0,-2104, 0, 0, 0, 0,-2106,
+ -2108,-2106, 0, 0, 1, -1,-2250, 0, 0, 0,-2527, 0, 0, -2, 0, 1,
+ -1, 0, 1, -1,
+};
+
+static inline uint_fast8_t
+_hb_ucd_gc (unsigned u)
+{
+ return u<1114110u?_hb_ucd_u8[6504+(((_hb_ucd_u8[1264+(((_hb_ucd_u16[((_hb_ucd_u8[544+(((_hb_ucd_u8[u>>1>>3>>3>>4])<<4)+((u>>1>>3>>3)&15u))])<<3)+((u>>1>>3)&7u)])<<3)+((u>>1)&7u))])<<1)+((u)&1u))]:2;
+}
+static inline uint_fast8_t
+_hb_ucd_ccc (unsigned u)
+{
+ return u<125259u?_hb_ucd_u8[8768+(((_hb_ucd_u8[7792+(((_hb_ucd_u8[7120+(((_hb_ucd_u8[6874+(u>>2>>3>>4)])<<4)+((u>>2>>3)&15u))])<<3)+((u>>2)&7u))])<<2)+((u)&3u))]:0;
+}
+static inline unsigned
+_hb_ucd_b4 (const uint8_t* a, unsigned i)
+{
+ return (a[i>>1]>>((i&1u)<<2))&15u;
+}
+static inline int_fast16_t
+_hb_ucd_bmg (unsigned u)
+{
+ return u<65380u?_hb_ucd_i16[((_hb_ucd_u8[9508+(((_hb_ucd_u8[9388+(((_hb_ucd_b4(9260+_hb_ucd_u8,u>>2>>3>>3))<<3)+((u>>2>>3)&7u))])<<3)+((u>>2)&7u))])<<2)+((u)&3u)]:0;
+}
+static inline uint_fast8_t
+_hb_ucd_sc (unsigned u)
+{
+ return u<918000u?_hb_ucd_u8[10974+(((_hb_ucd_u16[1960+(((_hb_ucd_u8[10286+(((_hb_ucd_u8[9836+(u>>3>>4>>4)])<<4)+((u>>3>>4)&15u))])<<4)+((u>>3)&15u))])<<3)+((u)&7u))]:2;
+}
+static inline uint_fast16_t
+_hb_ucd_dm (unsigned u)
+{
+ return u<195102u?_hb_ucd_u16[5768+(((_hb_ucd_u8[16708+(((_hb_ucd_u8[16326+(u>>4>>5)])<<5)+((u>>4)&31u))])<<4)+((u)&15u))]:0;
+}
+
+
+#else
+
+static const uint8_t
+_hb_ucd_u8[13344] =
+{
+ 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 5, 5, 7, 8, 9,
+ 10, 11, 12, 13, 14, 15, 16, 5, 17, 15, 15, 18, 15, 19, 20, 21,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 22, 23,
+ 5, 24, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 25, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 34, 34, 34, 35, 36, 37, 34, 34, 34, 38, 39, 40, 41,
+ 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
+ 58, 59, 60, 61, 62, 62, 63, 64, 65, 66, 67, 68, 69, 67, 70, 71,
+ 67, 67, 62, 72, 62, 62, 73, 67, 74, 75, 76, 77, 78, 67, 67, 67,
+ 79, 80, 34, 81, 82, 83, 67, 67, 34, 34, 34, 34, 34, 34, 34, 34,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 84, 34, 34, 34, 34,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
+ 85, 34, 34, 34, 34, 34, 34, 34, 34, 86, 34, 34, 87, 88, 89, 90,
+ 91, 92, 93, 94, 95, 96, 97, 98, 34, 34, 34, 34, 34, 34, 34, 34,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
+ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
+ 100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,
+ 100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,
+ 100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,
+ 100,100, 34, 34, 34, 34,101,102, 34, 34,103,104,105,106,107,108,
+ 34, 34,109,110,111,112,113,114,115,116,117,111, 34, 34, 34,111,
+ 118,119,120,121,122,123,124,125, 34,126,127,111,128,129,130,131,
+ 132,133,134,135,136,137,138,111,139,140,111,141,142,143,144,111,
+ 145,146,147,148,149,150,111,111,151,152,153,154,111,155,111,156,
+ 34, 34, 34, 34, 34, 34, 34, 34,157, 34, 34,111,111,111,111,111,
+ 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+ 34, 34, 34, 34, 34, 34, 34, 34,158,111,111,111,111,111,111,111,
+ 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+ 111,111,111,111,111,111,111,111, 34, 34, 34, 34, 34,111,111,111,
+ 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+ 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+ 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+ 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+ 34, 34, 34, 34,159,160,161, 34,111,111,111,111,162,163,164,165,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,111,111,111,111,111,
+ 34, 34, 34, 34, 34, 34,111,111,111,111,111,111,111,111,111,111,
+ 111,111,111,111,111,111,111,111, 34,166,111,111,111,111,111,111,
+ 67, 67,167,168,169,128, 65,111,170,171,172,173,174,175,176,177,
+ 67, 67, 67, 67,178,179,111,111,111,111,111,111,111,111,111,111,
+ 180,111,181,111,111,182,111,111,111,111,111,111,111,111,111,111,
+ 34,183,184,111,111,111,111,111,128,185,186,111, 34,187,111,111,
+ 67, 67,188, 67, 67,111, 67,189, 67, 67, 67, 67, 67, 67, 67, 67,
+ 67, 67, 67, 67, 67, 67, 67,190,111,111,111,111,111,111,111,111,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
+ 34, 34, 34, 34, 34, 34, 34, 34,111,111,111,111,111,111,111,111,
+ 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+ 34, 34, 34, 34, 34,111,111,111,111,111,111,111,111,111,111,111,
+ 34, 34, 34, 34, 34, 34, 34,111,111,111,111,111,111,111,111,111,
+ 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+ 191,111,180,180,111,111,111,111,111,111,111,111,111,111,111,111,
+ 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 2, 4, 5, 6, 2,
+ 7, 7, 7, 7, 7, 2, 8, 9, 10, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 17, 18, 19, 1, 20, 20, 21, 22, 23, 24, 25,
+ 26, 27, 15, 2, 28, 29, 27, 30, 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 31, 11, 11, 11, 32, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 33, 16, 16, 16, 16, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, 32, 34, 34, 34, 34, 34, 34, 34, 34, 16, 32, 32, 32,
+ 32, 32, 32, 32, 11, 34, 34, 16, 34, 32, 32, 11, 34, 11, 16, 11,
+ 11, 34, 32, 11, 32, 16, 11, 34, 32, 32, 32, 11, 34, 16, 32, 11,
+ 34, 11, 34, 34, 32, 35, 32, 16, 36, 36, 37, 34, 38, 37, 34, 34,
+ 34, 34, 34, 34, 34, 34, 16, 32, 34, 38, 32, 11, 32, 32, 32, 32,
+ 32, 32, 16, 16, 16, 11, 34, 32, 34, 34, 11, 32, 32, 32, 32, 32,
+ 16, 16, 39, 16, 16, 16, 16, 16, 40, 40, 40, 40, 40, 40, 40, 40,
+ 40, 41, 41, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 41,
+ 40, 40, 42, 41, 41, 41, 42, 42, 41, 41, 41, 41, 41, 41, 41, 41,
+ 43, 43, 43, 43, 43, 43, 43, 43, 32, 32, 42, 32, 16, 44, 16, 10,
+ 41, 41, 41, 45, 11, 11, 11, 11, 34, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 16, 16, 16, 16, 16, 16, 16, 16, 16, 34,
+ 16, 11, 32, 16, 32, 32, 32, 32, 16, 16, 32, 46, 34, 32, 34, 11,
+ 32, 47, 43, 43, 48, 32, 32, 32, 11, 34, 34, 34, 34, 34, 34, 16,
+ 11, 11, 11, 11, 49, 2, 2, 2, 16, 16, 16, 16, 50, 51, 52, 53,
+ 54, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 55,
+ 56, 57, 43, 56, 43, 43, 43, 43, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 58, 2, 2, 2, 2, 2, 2, 59, 59, 59, 8, 9, 60, 2, 61,
+ 43, 43, 43, 43, 43, 57, 59, 2, 62, 36, 36, 36, 36, 63, 43, 43,
+ 7, 7, 7, 7, 7, 2, 2, 36, 64, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 65, 43, 43, 43, 66, 47, 43, 43, 67, 68, 69, 43, 43, 36,
+ 7, 7, 7, 7, 7, 36, 70, 71, 2, 2, 2, 2, 2, 2, 2, 72,
+ 63, 36, 36, 36, 36, 36, 36, 36, 43, 43, 43, 43, 43, 43, 64, 36,
+ 36, 36, 36, 43, 43, 43, 43, 43, 7, 7, 7, 7, 7, 36, 36, 36,
+ 36, 36, 36, 36, 36, 63, 43, 43, 43, 43, 40, 21, 2, 40, 68, 20,
+ 36, 36, 36, 43, 43, 68, 43, 43, 43, 43, 68, 43, 68, 43, 43, 43,
+ 2, 2, 2, 2, 2, 2, 2, 2, 36, 36, 36, 36, 63, 43, 43, 2,
+ 36, 63, 43, 43, 43, 43, 43, 43, 43, 73, 43, 43, 43, 43, 43, 43,
+ 43, 74, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 74, 64, 75,
+ 76, 43, 43, 43, 74, 75, 76, 75, 63, 43, 43, 43, 36, 36, 36, 36,
+ 36, 43, 2, 7, 7, 7, 7, 7, 77, 36, 36, 36, 36, 36, 36, 36,
+ 63, 75, 78, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 64, 75,
+ 76, 43, 43, 74, 75, 75, 76, 36, 36, 36, 36, 79, 75, 75, 36, 36,
+ 36, 43, 43, 7, 7, 7, 7, 7, 36, 20, 27, 27, 27, 53, 58, 43,
+ 43, 74, 78, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 43, 75,
+ 76, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 64, 36, 36, 36,
+ 36, 36, 36, 7, 7, 7, 7, 7, 43, 36, 63, 2, 2, 2, 2, 2,
+ 76, 43, 43, 43, 74, 75, 76, 43, 60, 20, 20, 20, 80, 43, 43, 43,
+ 43, 75, 78, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 64, 76,
+ 76, 43, 43, 74, 75, 75, 76, 43, 43, 43, 43, 74, 75, 75, 36, 36,
+ 71, 27, 27, 27, 27, 27, 27, 27, 43, 64, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 75, 74, 75, 75, 75, 75, 75, 76, 43,
+ 36, 36, 36, 79, 75, 75, 75, 75, 75, 75, 75, 7, 7, 7, 7, 7,
+ 27, 81, 61, 61, 53, 61, 61, 61, 74, 75, 64, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 43, 74, 75, 75, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 36, 36, 36, 36, 7, 7, 7, 82, 27, 27, 27, 81,
+ 63, 75, 65, 36, 36, 36, 36, 36, 75, 75, 75, 74, 75, 75, 43, 43,
+ 43, 43, 74, 75, 75, 75, 75, 36, 83, 36, 36, 36, 36, 36, 36, 36,
+ 43, 75, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 63, 64, 75,
+ 76, 43, 43, 75, 75, 75, 76, 70, 61, 61, 36, 79, 27, 27, 27, 84,
+ 27, 27, 27, 27, 81, 36, 36, 36, 36, 36, 36, 36, 36, 43, 43, 74,
+ 75, 43, 43, 43, 75, 75, 75, 75, 7, 75, 2, 2, 2, 2, 2, 2,
+ 63, 36, 43, 43, 43, 43, 43, 85, 36, 36, 36, 68, 43, 43, 43, 57,
+ 7, 7, 7, 7, 7, 2, 2, 2, 63, 36, 43, 43, 43, 43, 64, 36,
+ 36, 36, 36, 40, 43, 43, 43, 43, 7, 7, 7, 7, 7, 7, 36, 36,
+ 70, 61, 2, 2, 2, 2, 2, 2, 2, 86, 86, 61, 43, 61, 61, 61,
+ 7, 7, 7, 7, 7, 27, 27, 27, 27, 27, 47, 47, 47, 4, 4, 75,
+ 63, 43, 43, 43, 43, 43, 43, 74, 43, 43, 57, 43, 36, 36, 63, 43,
+ 43, 43, 43, 43, 43, 43, 43, 61, 61, 61, 61, 69, 61, 61, 61, 61,
+ 2, 2, 86, 61, 21, 2, 2, 2, 36, 36, 36, 36, 36, 79, 76, 43,
+ 74, 43, 43, 43, 76, 74, 76, 64, 36, 36, 36, 75, 43, 36, 36, 43,
+ 64, 75, 78, 79, 75, 75, 75, 36, 63, 43, 64, 36, 36, 36, 36, 36,
+ 36, 74, 76, 74, 75, 75, 76, 79, 7, 7, 7, 7, 7, 75, 76, 61,
+ 16, 16, 16, 16, 16, 50, 44, 16, 36, 36, 36, 36, 36, 36, 63, 43,
+ 2, 2, 2, 2, 87, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+ 61, 61, 61, 61, 61, 61, 61, 61, 11, 11, 11, 11, 16, 16, 16, 16,
+ 88, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 70, 65,
+ 89, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 90, 91, 91,
+ 36, 36, 36, 36, 36, 58, 2, 92, 93, 36, 36, 36, 36, 36, 36, 36,
+ 36, 43, 43, 43, 43, 43, 43, 43, 36, 43, 57, 2, 2, 2, 2, 2,
+ 36, 36, 43, 76, 43, 43, 43, 75, 75, 75, 75, 74, 76, 43, 43, 43,
+ 43, 43, 2, 77, 2, 60, 63, 43, 7, 7, 7, 7, 7, 7, 7, 7,
+ 2, 2, 2, 94, 2, 56, 43, 59, 36, 95, 36, 36, 36, 36, 36, 36,
+ 36, 36, 63, 64, 36, 36, 36, 36, 36, 36, 36, 36, 63, 36, 36, 36,
+ 43, 74, 75, 76, 74, 75, 75, 75, 75, 74, 75, 75, 76, 43, 43, 43,
+ 61, 61, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 27, 27, 61,
+ 36, 36, 36, 63, 74, 76, 43, 2, 36, 36, 79, 74, 43, 43, 43, 43,
+ 74, 74, 76, 43, 43, 43, 74, 75, 75, 76, 43, 43, 43, 43, 43, 43,
+ 2, 2, 2, 77, 2, 2, 2, 2, 43, 43, 43, 43, 43, 43, 43, 96,
+ 43, 43, 78, 36, 36, 36, 36, 36, 36, 36, 74, 43, 43, 74, 74, 75,
+ 75, 74, 78, 36, 36, 36, 36, 36, 86, 61, 61, 61, 61, 47, 43, 43,
+ 43, 43, 61, 61, 61, 61, 61, 61, 43, 78, 36, 36, 36, 36, 36, 36,
+ 79, 43, 43, 75, 43, 76, 43, 36, 36, 36, 36, 74, 43, 75, 76, 76,
+ 43, 75, 75, 75, 75, 75, 2, 2, 36, 36, 75, 75, 75, 75, 43, 43,
+ 43, 43, 75, 43, 43, 57, 2, 2, 7, 7, 7, 7, 7, 7, 83, 36,
+ 36, 36, 36, 36, 40, 40, 40, 2, 43, 57, 43, 43, 43, 43, 43, 43,
+ 74, 43, 43, 43, 64, 36, 63, 36, 36, 36, 64, 79, 43, 36, 36, 36,
+ 16, 16, 16, 16, 16, 16, 40, 40, 40, 40, 40, 40, 40, 44, 16, 16,
+ 16, 16, 16, 16, 44, 16, 16, 16, 16, 16, 16, 16, 16, 97, 40, 40,
+ 32, 32, 32, 16, 16, 16, 16, 32, 16, 16, 16, 16, 11, 11, 11, 11,
+ 16, 16, 16, 16, 34, 11, 11, 11, 16, 16, 16, 16, 98, 98, 98, 98,
+ 16, 16, 16, 16, 11, 11, 99,100, 41, 16, 16, 16, 11, 11, 99, 41,
+ 16, 16, 16, 16, 11, 11,101, 41,102,102,102,102,102,103, 59, 59,
+ 51, 51, 51, 2,104,105,104,105, 2, 2, 2, 2,106, 59, 59,107,
+ 2, 2, 2, 2,108,109, 2,110,111, 2,112,113, 2, 2, 2, 2,
+ 2, 9,111, 2, 2, 2, 2,114, 59, 59, 59, 59, 59, 59, 59, 59,
+ 115, 40, 27, 27, 27, 8,112,116, 27, 27, 27, 27, 27, 8,112, 91,
+ 20, 20, 20, 20, 20, 20, 20, 20, 43, 43, 43, 43, 43, 43,117, 48,
+ 96, 48, 96, 43, 43, 43, 43, 43, 61,118, 61,119, 61, 34, 11, 16,
+ 11, 32,119, 61, 46, 11, 11, 61, 61, 61,118,118,118, 11, 11,120,
+ 11, 11, 35, 36, 39, 61, 16, 11, 8, 8, 46, 16, 16, 26, 61,121,
+ 92, 92, 92, 92, 92, 92, 92, 92, 92,122,123, 92,124, 61, 61, 61,
+ 8, 8,125, 61, 61, 8, 61, 61,125, 26, 61,125, 61, 61, 61,125,
+ 61, 61, 61, 61, 61, 61, 61, 8, 61,125,125, 61, 61, 61, 61, 61,
+ 61, 61, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 61, 61, 61, 61, 4, 4, 61, 61, 8, 61, 61, 61,126,127, 61, 61,
+ 61, 61, 61, 61, 61, 61,125, 61, 61, 61, 61, 61, 61, 26, 8, 8,
+ 8, 8, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 8, 8,
+ 8, 61, 61, 61, 61, 61, 61, 61, 27, 27, 27, 27, 27, 27, 61, 61,
+ 61, 61, 61, 61, 61, 27, 27, 27, 61, 61, 61, 26, 61, 61, 61, 61,
+ 26, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 8, 8, 8, 8,
+ 61, 61, 61, 61, 61, 61, 61, 26, 61, 61, 61, 61, 4, 4, 4, 4,
+ 4, 4, 4, 27, 27, 27, 27, 27, 27, 27, 61, 61, 61, 61, 61, 61,
+ 8, 8,112,128, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4,
+ 8,112,129,129,129,129,129,129,129,129,129,129,128, 8, 8, 8,
+ 8, 8, 8, 8, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8,
+ 8, 8,125, 26, 8, 8,125, 61, 32, 11, 32, 34, 34, 34, 34, 11,
+ 32, 32, 34, 16, 16, 16, 40, 11, 32, 32,121, 61, 61,119, 34,130,
+ 43, 32, 16, 16, 50, 2, 87, 2, 36, 36, 36, 36, 36, 36, 36, 95,
+ 2, 2, 2, 2, 2, 2, 2, 56, 2,104,104, 2,108,109,104, 2,
+ 2, 2, 2, 6, 2, 94,104, 2,104, 4, 4, 4, 4, 2, 2, 77,
+ 2, 2, 2, 2, 2, 51, 2, 2, 94,131, 2, 2, 2, 2, 2, 2,
+ 61, 2, 2, 2, 2, 2, 2, 2, 1, 2,132,133, 4, 4, 4, 4,
+ 4, 61, 4, 4, 4, 4,134, 91,135, 92, 92, 92, 92, 43, 43, 75,
+ 136, 40, 40, 61, 92,137, 58, 61, 71, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 63,138,139, 62, 36, 36, 36, 36, 36, 58, 40, 62,
+ 61, 27, 27, 61, 61, 61, 61, 61, 27, 27, 27, 27, 27, 61, 61, 61,
+ 61, 61, 61, 61, 27, 27, 27, 27,140, 27, 27, 27, 27, 27, 27, 27,
+ 36, 36, 95, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,141, 2,
+ 32, 32, 32, 32, 32, 32, 32, 63, 48,142, 43, 43, 43, 43, 43, 77,
+ 32, 32, 32, 32, 32, 32, 40, 43, 36, 36, 36, 92, 92, 92, 92, 92,
+ 43, 2, 2, 2, 2, 2, 2, 2, 41, 41, 41,139, 40, 40, 40, 40,
+ 41, 32, 32, 32, 32, 32, 32, 32, 16, 32, 32, 32, 32, 32, 32, 32,
+ 44, 16, 16, 16, 34, 34, 34, 32, 32, 32, 32, 32, 42,143, 34, 35,
+ 32, 32, 16, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 11, 11, 32,
+ 11, 11, 32, 32, 32, 32, 32, 32, 16, 32, 11, 11, 34, 16, 16, 16,
+ 16, 16, 34, 35, 40, 35, 36, 36, 36, 64, 36, 64, 36, 63, 36, 36,
+ 36, 79, 76, 74, 61, 61, 43, 43, 27, 27, 27, 61,144, 61, 61, 61,
+ 36, 36, 2, 2, 2, 2, 2, 2, 75, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 75, 75, 75, 75, 75, 75, 75, 75, 43, 43, 43, 43, 43, 2,
+ 43, 36, 36, 36, 2, 65, 65, 63, 36, 36, 36, 43, 43, 43, 43, 2,
+ 36, 36, 36, 63, 43, 43, 43, 43, 43, 75, 75, 75, 75, 75, 75,145,
+ 36, 63, 75, 43, 43, 75, 43, 75,145, 2, 2, 2, 2, 2, 2, 77,
+ 7, 7, 7, 7, 7, 7, 7, 2, 36, 36, 63, 62, 36, 36, 36, 36,
+ 36, 36, 36, 36, 63, 43, 43, 74, 76, 74, 76, 43, 43, 43, 43, 43,
+ 36, 63, 36, 36, 36, 36, 74, 75, 7, 7, 7, 7, 7, 7, 2, 2,
+ 62, 36, 36, 70, 61, 79, 74, 36, 64, 43, 64, 63, 64, 36, 36, 43,
+ 36, 36, 36, 36, 36, 36, 95, 2, 36, 36, 36, 36, 36, 79, 43, 75,
+ 2, 95,146, 43, 43, 43, 43, 43, 16, 16, 16, 16, 16,100, 40, 40,
+ 16, 16, 16, 16, 97, 41, 41, 41, 36, 79, 76, 75, 74,145, 76, 43,
+ 147,147,147,147,147,147,147,147,148,148,148,148,148,148,148,148,
+ 16, 16, 16, 16, 16, 16, 35, 64, 36, 36, 36, 36,149, 36, 36, 36,
+ 36, 41, 41, 41, 41, 41, 41, 41, 41,150, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36,129,151,151,151,151,151,151,151,151,
+ 36, 36, 36, 36, 36, 36,144, 61, 2, 2, 2,152,113, 2, 2, 2,
+ 6,153,154,129,129,129,129,129,129,129,113,152,113, 2,110,155,
+ 2, 2, 2, 2,134,129,129,113, 2,156, 8, 8, 60, 2, 2, 2,
+ 36, 36, 36, 36, 36, 36, 36,157, 2, 2, 3, 2, 4, 5, 6, 2,
+ 16, 16, 16, 16, 16, 17, 18,112,113, 4, 2, 36, 36, 36, 36, 36,
+ 62, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 40,
+ 20,158, 53, 20, 26, 8,125, 61, 61, 61, 61, 61,159, 59, 61, 61,
+ 2, 2, 2, 87, 27, 27, 27, 27, 27, 27, 27, 81, 61, 61, 61, 61,
+ 92, 92,124, 27, 81, 61, 61, 61, 61, 61, 61, 61, 61, 27, 61, 61,
+ 61, 61, 61, 61, 61, 61, 47, 43,160,160,160,160,160,160,160,160,
+ 161, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 84, 36,
+ 133, 36, 36, 36, 36, 92, 92, 92, 36, 36, 36, 36, 36, 36, 36, 58,
+ 162, 92, 92, 92, 92, 92, 92, 92, 36, 36, 36, 58, 27, 27, 27, 27,
+ 36, 36, 36, 70,140, 27, 27, 27, 36, 36, 36,163, 27, 27, 27, 27,
+ 36, 36, 36, 36, 36,163, 27, 27, 36, 36, 36, 27, 27, 27, 27, 30,
+ 36, 36, 36, 36, 36, 36, 27, 36, 63, 43, 43, 43, 43, 43, 43, 43,
+ 36, 36, 36, 36, 43, 43, 43, 43, 36, 36, 36, 36, 36, 36,163, 30,
+ 36, 36, 36, 36, 36, 36,163, 27, 36, 36, 36, 36, 71, 36, 36, 36,
+ 36, 36, 63, 43, 43,161, 27, 27, 36, 36, 36, 36, 58, 2, 2, 2,
+ 36, 36, 36, 36, 27, 27, 27, 27, 16, 16, 16, 16, 16, 27, 27, 27,
+ 36, 36, 43, 43, 43, 43, 43, 43, 36, 36, 36, 36, 36, 63,164, 51,
+ 27, 27, 27, 84, 36, 36, 36, 36,161, 27, 30, 2, 2, 2, 2, 2,
+ 36, 36,163, 27, 27, 27, 27, 27, 76, 78, 36, 36, 36, 36, 36, 36,
+ 43, 43, 43, 57, 2, 2, 2, 2, 2, 27, 27, 27, 27, 27, 27, 27,
+ 27, 27, 27, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,165,
+ 75, 76, 43, 74, 76, 57, 72, 2, 2, 2, 2, 2, 2, 2, 72, 59,
+ 36, 36, 36, 63, 43, 43, 76, 43, 43, 43, 43, 7, 7, 7, 7, 7,
+ 2, 2, 79, 78, 36, 36, 36, 36, 36, 63, 2, 36, 36, 36, 36, 36,
+ 36, 79, 75, 43, 43, 43, 43, 74, 78, 36, 58, 2, 56, 43, 57, 76,
+ 7, 7, 7, 7, 7, 58, 58, 2, 87, 27, 27, 27, 27, 27, 27, 27,
+ 36, 36, 36, 36, 36, 36, 75, 76, 43, 75, 74, 43, 2, 2, 2, 43,
+ 36, 36, 36, 36, 36, 36, 36, 63, 74, 75, 75, 75, 75, 75, 75, 75,
+ 36, 36, 36, 79, 75, 75, 78, 36, 36, 75, 75, 43, 43, 43, 43, 43,
+ 36, 36, 79, 75, 43, 43, 43, 43, 75, 43, 74, 64, 36, 58, 2, 2,
+ 7, 7, 7, 7, 7, 2, 2, 64, 75, 76, 43, 43, 74, 74, 75, 76,
+ 74, 43, 36, 65, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 79,
+ 75, 43, 43, 43, 75, 75, 43, 76, 57, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 36, 36, 43, 43, 75, 76, 43, 43, 43, 74, 76, 76,
+ 57, 2, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 63, 76, 75,
+ 43, 43, 43, 76, 36, 36, 36, 36, 75, 43, 43, 76, 43, 43, 43, 43,
+ 7, 7, 7, 7, 7, 27, 2, 86, 43, 43, 43, 43, 76, 57, 2, 2,
+ 27, 27, 27, 27, 27, 27, 27, 84, 75, 75, 75, 75, 75, 76, 74, 64,
+ 78, 76, 2, 2, 2, 2, 2, 2, 79, 75, 43, 43, 43, 43, 75, 75,
+ 64, 65, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75,
+ 63, 43, 43, 43, 43, 64, 36, 36, 36, 63, 43, 43, 74, 63, 43, 57,
+ 2, 2, 2, 56, 43, 43, 43, 43, 63, 43, 43, 74, 76, 43, 36, 36,
+ 36, 36, 36, 36, 36, 43, 43, 43, 43, 43, 43, 74, 43, 2, 65, 2,
+ 43, 43, 43, 43, 43, 43, 43, 76, 58, 2, 2, 2, 2, 2, 2, 2,
+ 2, 36, 36, 36, 36, 36, 36, 36, 43, 43, 43, 43, 74, 43, 43, 43,
+ 74, 43, 76, 43, 43, 43, 43, 43, 43, 43, 43, 63, 43, 43, 43, 43,
+ 36, 36, 36, 36, 36, 75, 75, 75, 43, 74, 76, 76, 36, 36, 36, 36,
+ 36, 63, 74,145, 2, 2, 2, 2, 27, 27, 81, 61, 61, 61, 53, 20,
+ 144, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 21,
+ 43, 43, 57, 2, 2, 2, 2, 2, 43, 43, 43, 57, 2, 2, 61, 61,
+ 40, 40, 86, 61, 61, 61, 61, 61, 7, 7, 7, 7, 7,166, 27, 27,
+ 27, 84, 36, 36, 36, 36, 36, 36, 27, 27, 27, 30, 2, 2, 2, 2,
+ 79, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 76,
+ 43, 67, 40, 40, 40, 40, 40, 40, 40, 77, 43, 43, 43, 43, 43, 43,
+ 36, 36, 36, 36, 36, 36, 47, 57, 61, 61,167, 76, 43, 61,167, 75,
+ 75,168, 59, 59, 59, 73, 43, 43, 43, 69, 47, 43, 43, 43, 61, 61,
+ 61, 61, 61, 61, 61, 43, 43, 61, 61, 43, 69, 61, 61, 61, 61, 61,
+ 11, 11, 11, 11, 11, 16, 16, 16, 16, 16, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 16, 11, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 11, 11, 11, 11, 11, 16, 16, 16, 16, 16,
+ 31, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 33, 16, 16,
+ 16, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 31, 16, 16,
+ 16, 16, 33, 16, 16, 16, 11, 11, 11, 11, 31, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 33, 16, 16, 16, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 31, 16, 16, 16, 16, 33, 16, 16, 16,
+ 11, 11, 11, 11, 31, 16, 16, 16, 16, 33, 16, 16, 16, 32, 16, 7,
+ 43, 43, 43, 69, 61, 47, 43, 43, 43, 43, 43, 43, 43, 43, 69, 61,
+ 61, 61, 47, 61, 61, 61, 61, 61, 61, 61, 69, 21, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 56, 43, 43, 43, 43, 43, 67, 40, 40, 40, 40,
+ 7, 7, 7, 7, 7, 7, 7, 70, 36, 36, 36, 36, 36, 36, 43, 43,
+ 7, 7, 7, 7, 7, 7, 7,169, 16, 16, 43, 43, 43, 67, 40, 40,
+ 27, 27, 27, 27, 27, 27,140, 27,170, 27, 27, 27, 27, 27, 27, 27,
+ 27, 27, 27, 27, 27, 27, 27,140, 27, 27, 27, 27, 27, 27, 81, 61,
+ 61, 61, 61, 61, 61, 25, 41, 41, 0, 0, 29, 21, 21, 21, 23, 21,
+ 22, 18, 21, 25, 21, 17, 13, 13, 25, 25, 25, 21, 21, 9, 9, 9,
+ 9, 22, 21, 18, 24, 16, 24, 5, 5, 5, 5, 22, 25, 18, 25, 0,
+ 23, 23, 26, 21, 24, 26, 7, 20, 25, 1, 26, 24, 26, 25, 15, 15,
+ 24, 15, 7, 19, 15, 21, 9, 25, 9, 5, 5, 25, 5, 9, 5, 7,
+ 7, 7, 9, 8, 8, 5, 7, 5, 6, 6, 24, 24, 6, 24, 12, 12,
+ 6, 5, 9, 21, 25, 9, 26, 12, 11, 11, 9, 6, 5, 21, 17, 17,
+ 17, 26, 26, 23, 23, 12, 17, 12, 21, 12, 12, 21, 7, 21, 1, 1,
+ 21, 23, 26, 26, 6, 7, 7, 12, 12, 7, 21, 7, 12, 1, 12, 6,
+ 6, 12, 12, 26, 7, 26, 26, 7, 21, 1, 1, 12, 12, 10, 10, 10,
+ 10, 12, 21, 6, 10, 7, 7, 10, 23, 7, 15, 26, 13, 21, 13, 7,
+ 15, 7, 12, 23, 21, 26, 21, 15, 17, 7, 29, 7, 7, 22, 18, 18,
+ 14, 14, 14, 7, 17, 21, 7, 6, 11, 12, 5, 6, 8, 8, 8, 24,
+ 5, 24, 9, 24, 29, 29, 29, 1, 20, 19, 22, 20, 27, 28, 1, 29,
+ 21, 20, 19, 21, 21, 16, 16, 21, 25, 22, 18, 21, 21, 29, 15, 6,
+ 18, 6, 12, 11, 9, 26, 26, 9, 26, 5, 5, 26, 14, 9, 5, 14,
+ 14, 15, 25, 26, 26, 22, 18, 26, 18, 25, 18, 22, 5, 12, 22, 21,
+ 26, 6, 7, 14, 17, 22, 26, 14, 17, 6, 14, 6, 12, 24, 24, 6,
+ 26, 15, 6, 21, 11, 21, 24, 9, 23, 26, 10, 21, 6, 10, 4, 4,
+ 3, 3, 7, 25, 24, 7, 22, 22, 21, 22, 17, 16, 16, 22, 16, 16,
+ 25, 17, 7, 1, 25, 24, 26, 1, 2, 2, 12, 15, 21, 14, 7, 15,
+ 12, 17, 13, 12, 13, 15, 26, 10, 10, 1, 13, 23, 23, 15, 0, 1,
+ 2, 3, 4, 5, 6, 7, 8, 9, 0, 10, 11, 12, 13, 0, 14, 0,
+ 0, 0, 0, 0, 15, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 17, 18, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 0, 21, 22, 23,
+ 0, 0, 0, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 34, 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 37, 38, 0, 0, 0, 0, 0, 0, 39, 40,
+ 0, 0, 41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 2, 0, 0, 0, 0, 3, 0, 0, 0, 4, 5, 6, 7, 0, 8,
+ 9, 10, 0, 11, 12, 13, 0, 14, 15, 16, 15, 17, 15, 18, 15, 18,
+ 15, 18, 0, 18, 0, 19, 15, 18, 20, 18, 0, 21, 22, 23, 24, 25,
+ 26, 27, 28, 29, 30, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 32, 0, 0, 0, 0, 0, 0, 33, 0, 0, 34, 0, 0, 35, 0,
+ 36, 0, 0, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 0, 46,
+ 0, 0, 0, 47, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 49,
+ 0, 50, 0, 51, 52, 0, 53, 0, 0, 0, 0, 0, 0, 54, 55, 56,
+ 0, 0, 0, 0, 57, 0, 0, 58, 59, 60, 61, 62, 0, 0, 63, 64,
+ 0, 0, 0, 65, 0, 0, 0, 0, 66, 0, 0, 0, 67, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 68, 0, 0, 0, 69,
+ 0, 70, 0, 0, 71, 0, 0, 72, 0, 0, 0, 0, 0, 0, 0, 0,
+ 73, 0, 0, 0, 0, 0, 74, 0, 0, 75, 0, 0, 0, 76, 77, 0,
+ 78, 61, 0, 79, 80, 0, 0, 81, 82, 83, 0, 0, 0, 84, 0, 85,
+ 0, 0, 50, 86, 50, 0, 87, 0, 88, 0, 0, 0, 77, 0, 0, 0,
+ 89, 90, 0, 91, 92, 93, 94, 0, 0, 0, 0, 0, 50, 0, 0, 0,
+ 0, 95, 96, 0, 0, 0, 0, 97, 98, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 99, 0, 0,100, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0,101,102, 0, 0,103, 0, 0, 0, 0, 0, 0,104, 0, 0, 0,
+ 98, 0, 0, 0, 0, 0, 0,105, 0, 0, 0, 0, 0, 0, 0,106,
+ 0,107, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 0,
+ 8, 0, 0, 0, 0, 9, 10, 11, 12, 0, 0, 0, 0, 13, 0, 0,
+ 14, 15, 0, 16, 0, 17, 18, 0, 0, 19, 0, 20, 21, 0, 0, 0,
+ 0, 0, 22, 23, 0, 24, 25, 0, 0, 26, 0, 0, 0, 27, 28, 29,
+ 0, 0, 0, 30, 31, 32, 0, 0, 31, 0, 0, 33, 31, 0, 0, 0,
+ 31, 34, 0, 0, 0, 0, 0, 35, 36, 0, 0, 0, 0, 0, 0, 37,
+ 38, 0, 0, 0, 0, 0, 0, 39, 40, 0, 0, 0, 0, 41, 0, 42,
+ 0, 0, 0, 43, 44, 0, 0, 0, 45, 0, 0, 0, 0, 0, 0, 46,
+ 47, 0, 0, 0, 0, 48, 0, 0, 0, 49, 0, 49, 0, 50, 0, 0,
+ 0, 0, 51, 0, 0, 0, 0, 52, 0, 53, 0, 0, 0, 0, 54, 55,
+ 0, 0, 0, 56, 57, 0, 0, 0, 0, 0, 0, 58, 49, 0, 59, 60,
+ 0, 0, 61, 0, 0, 0, 62, 63, 0, 0, 0, 64, 0, 65, 66, 67,
+ 68, 69, 1, 70, 0, 71, 72, 73, 0, 0, 74, 75, 0, 0, 0, 76,
+ 0, 0, 1, 1, 0, 0, 77, 0, 0, 78, 0, 0, 0, 0, 74, 79,
+ 0, 80, 0, 0, 0, 0, 0, 75, 81, 0, 82, 0, 49, 0, 1, 75,
+ 0, 0, 83, 0, 0, 84, 0, 0, 0, 0, 0, 85, 54, 0, 0, 0,
+ 0, 0, 0, 86, 87, 0, 0, 81, 0, 0, 31, 0, 0, 88, 0, 0,
+ 0, 0, 89, 0, 0, 0, 0, 47, 0, 0, 57, 0, 0, 0, 0, 90,
+ 91, 0, 0, 92, 0, 0, 93, 0, 0, 0, 94, 0, 0, 0, 95, 0,
+ 96, 57, 0, 0, 81, 0, 0, 76, 0, 0, 0, 97, 98, 0, 0, 99,
+ 100, 0, 0, 0, 0, 0, 0,101, 0, 0,102, 0, 0, 0, 0,103,
+ 31, 0,104,105,106, 33, 0, 0,107, 0, 0, 0,108, 0, 0, 0,
+ 0, 0, 0,109, 0, 0,110, 0, 0, 0, 0,111, 85, 0, 0, 0,
+ 0, 0, 54, 0, 0, 0, 0, 49,112, 0, 0, 0, 0,113, 0, 0,
+ 114, 0, 0, 0, 0,112, 0, 0, 0, 0, 0,115, 0, 0, 0,116,
+ 0, 0, 0,117, 0,118, 0, 0, 0, 0,119,120,121, 0,122, 0,
+ 123, 0, 0, 0,124,125,126, 0, 0, 0,127, 0, 0,128, 0, 0,
+ 129, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 4,
+ 5, 6, 7, 4, 4, 8, 9, 10, 1, 11, 12, 13, 14, 15, 16, 17,
+ 18, 1, 1, 1, 19, 1, 0, 0, 20, 21, 22, 1, 23, 4, 21, 24,
+ 25, 26, 27, 28, 29, 30, 0, 0, 1, 1, 31, 0, 0, 0, 32, 33,
+ 34, 35, 1, 36, 37, 0, 0, 0, 0, 38, 1, 39, 14, 39, 40, 41,
+ 42, 0, 0, 0, 43, 36, 44, 45, 21, 45, 46, 0, 0, 0, 19, 1,
+ 21, 0, 0, 47, 0, 38, 48, 1, 1, 49, 49, 50, 0, 0, 51, 0,
+ 52, 1, 1, 1, 53, 21, 43, 54, 55, 21, 35, 1, 0, 0, 0, 56,
+ 0, 0, 0, 57, 58, 59, 0, 0, 0, 0, 0, 60, 0, 61, 0, 0,
+ 0, 0, 62, 63, 0, 0, 64, 0, 0, 0, 65, 0, 0, 0, 66, 0,
+ 0, 0, 67, 0, 0, 0, 68, 0, 0, 0, 69, 0, 0, 70, 71, 0,
+ 72, 73, 74, 75, 76, 77, 0, 0, 0, 78, 0, 0, 0, 79, 80, 0,
+ 0, 0, 0, 47, 0, 0, 0, 49, 0, 63, 0, 0, 64, 0, 0, 81,
+ 0, 0, 82, 0, 0, 0, 83, 0, 0, 19, 84, 0, 63, 0, 0, 0,
+ 0, 49, 1, 85, 1, 54, 15, 86, 84, 0, 0, 0, 0, 56, 0, 0,
+ 0, 0, 19, 10, 1, 0, 0, 0, 0, 0, 87, 0, 0, 88, 0, 0,
+ 87, 0, 0, 0, 0, 79, 0, 0, 89, 9, 12, 4, 90, 8, 91, 47,
+ 0, 59, 50, 0, 21, 1, 21, 92, 93, 1, 1, 1, 1, 94, 95, 96,
+ 97, 1, 98, 59, 81, 99,100, 4, 59, 0, 0, 0, 0, 0, 0, 19,
+ 50, 0, 0, 0, 0, 0, 0, 62, 0, 0,101,102, 0, 0,103, 0,
+ 0, 1, 1, 50, 0, 0, 0, 38, 0, 64, 0, 0, 0, 0, 0, 63,
+ 0, 0, 52, 69, 62, 0, 0, 0, 79, 0, 0, 0,104,105, 59, 38,
+ 81, 0, 0, 0, 0, 0, 0,106, 1, 14, 4, 12, 0, 38, 89, 0,
+ 0, 0, 0,107, 0, 0,108, 62, 0,109, 0, 0, 0, 1, 0, 0,
+ 0, 0, 19, 59, 0,110, 14, 54, 0, 0,111, 0, 89, 0, 0, 0,
+ 62, 63, 0, 0, 63, 0, 88, 0, 0,111, 0, 0, 0, 0,112, 0,
+ 0, 0, 79, 56, 0, 38, 1, 59, 1, 59, 0, 0, 64, 88, 0, 0,
+ 113, 0, 0, 0, 56, 0, 0, 0, 0,113, 0, 0, 0, 0, 62, 0,
+ 0, 0, 0, 80, 0, 62, 0, 0, 0, 0, 57, 0, 88,114, 0, 0,
+ 8, 91, 0, 0, 1, 89, 0, 0,115, 0, 0, 0, 0, 0, 0,116,
+ 0,117,118,119,120, 0, 52, 4,121, 49, 23, 0, 0, 0, 38, 50,
+ 38, 59, 0, 0, 1, 89, 1, 1, 1, 1, 39, 1, 48,104, 89, 0,
+ 0, 0, 0, 1, 4,121, 0, 0, 0, 1,122, 0, 0, 0, 0, 0,
+ 230,230,230,230,230,232,220,220,220,220,232,216,220,220,220,220,
+ 220,202,202,220,220,220,220,202,202,220,220,220, 1, 1, 1, 1,
+ 1,220,220,220,220,230,230,230,230,240,230,220,220,220,230,230,
+ 230,220,220, 0,230,230,230,220,220,220,220,230,232,220,220,230,
+ 233,234,234,233,234,234,233,230, 0, 0, 0,230, 0,220,230,230,
+ 230,230,220,230,230,230,222,220,230,230,220,220,230,222,228,230,
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 19, 20, 21, 22, 0, 23,
+ 0, 24, 25, 0,230,220, 0, 18, 30, 31, 32, 0, 0, 0, 0, 27,
+ 28, 29, 30, 31, 32, 33, 34,230,230,220,220,230,220,230,230,220,
+ 35, 0, 0, 0, 0, 0,230,230,230, 0, 0,230,230, 0,220,230,
+ 230,220, 0, 0, 0, 36, 0, 0,230,220,230,230,220,220,230,220,
+ 220,230,220,230,220,230,230, 0, 0,220, 0, 0,230,230, 0,230,
+ 0,230,230,230,230,230, 0, 0, 0,220,220,220, 0, 0, 0,220,
+ 230,230, 0,220,230,220,220,220, 27, 28, 29,230, 7, 0, 0, 0,
+ 0, 9, 0, 0, 0,230,220,230,230, 0, 0, 0, 0, 0,230, 0,
+ 0, 84, 91, 0, 0, 0, 0, 9, 9, 0, 0, 0, 0, 0, 9, 0,
+ 103,103, 9, 0,107,107,107,107,118,118, 9, 0,122,122,122,122,
+ 220,220, 0, 0, 0,220, 0,220, 0,216, 0, 0, 0,129,130, 0,
+ 132, 0, 0, 0, 0, 0,130,130,130,130, 0, 0,130, 0,230,230,
+ 9, 0,230,230, 0, 0,220, 0, 0, 0, 0, 7, 0, 9, 9, 0,
+ 0,230, 0, 0, 0,228, 0, 0, 0,222,230,220,220, 0, 0, 0,
+ 230, 0, 0,220,230,220, 0,220, 0, 0, 9, 9, 0, 0, 7, 0,
+ 230,230,230, 0,230, 0, 1, 1, 1, 0, 0, 0,230,234,214,220,
+ 202,230,230,230,230,230,232,228,228,220, 0,230,233,220,230,220,
+ 230,230, 1, 1, 1, 1, 1,230, 0, 1, 1,230,220,230, 1, 1,
+ 0, 0,218,228,232,222,224,224, 0, 8, 8, 0,230, 0,230,230,
+ 220, 0, 0,230, 0, 0, 26, 0, 0,220, 0,230,230, 1,220, 0,
+ 0,230,220, 0, 0, 0,220,220, 0, 9, 7, 0, 0, 7, 9, 0,
+ 0, 0, 9, 7, 9, 9, 0, 0, 6, 6, 0, 0, 0, 0, 1, 0,
+ 0,216,216, 1, 1, 1, 0, 0, 0,226,216,216,216,216,216, 0,
+ 220,220,220, 0,230,230, 7, 0, 16, 17, 17, 33, 17, 49, 17, 17,
+ 84, 97,135,145, 26, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17,177, 0, 1, 2, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3,
+ 3, 3, 5, 3, 3, 3, 3, 3, 6, 7, 8, 3, 3, 3, 3, 3,
+ 9, 10, 11, 12, 13, 3, 3, 3, 3, 3, 3, 3, 3, 14, 3, 15,
+ 3, 3, 3, 3, 3, 3, 16, 17, 18, 19, 20, 21, 3, 3, 3, 22,
+ 23, 3, 3, 3, 3, 3, 3, 3, 24, 3, 3, 3, 3, 3, 3, 3,
+ 3, 25, 3, 3, 26, 27, 0, 1, 0, 0, 0, 0, 0, 1, 0, 2,
+ 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 4, 0, 5,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
+ 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8,
+ 9, 0, 0, 0, 0, 0, 0, 9, 0, 9, 0, 0, 0, 0, 0, 0,
+ 0, 10, 11, 12, 13, 0, 0, 14, 15, 16, 6, 0, 17, 18, 19, 19,
+ 19, 20, 21, 22, 23, 24, 19, 25, 0, 26, 27, 19, 19, 28, 29, 30,
+ 0, 31, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 19, 28, 0,
+ 32, 33, 9, 34, 35, 19, 0, 0, 36, 37, 38, 39, 40, 19, 0, 41,
+ 42, 43, 44, 31, 0, 1, 45, 42, 0, 0, 0, 0, 0, 32, 14, 14,
+ 0, 0, 0, 0, 14, 0, 0, 46, 47, 47, 47, 47, 48, 49, 47, 47,
+ 47, 47, 50, 51, 52, 53, 43, 21, 0, 0, 0, 0, 0, 0, 0, 54,
+ 6, 55, 0, 14, 19, 1, 0, 0, 0, 19, 56, 31, 0, 0, 0, 0,
+ 0, 0, 0, 57, 14, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 3,
+ 0, 0, 0, 58, 59, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
+ 0, 0, 2, 3, 0, 4, 5, 0, 0, 6, 0, 0, 0, 7, 0, 0,
+ 0, 1, 1, 0, 0, 8, 9, 0, 8, 9, 0, 0, 0, 0, 8, 9,
+ 10, 11, 12, 0, 0, 0, 13, 0, 0, 0, 0, 14, 15, 16, 17, 0,
+ 0, 0, 1, 0, 0, 18, 19, 0, 0, 0, 20, 0, 0, 0, 1, 1,
+ 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 8, 21, 9, 0, 0,
+ 22, 0, 0, 0, 0, 1, 0, 23, 24, 25, 0, 0, 26, 0, 0, 0,
+ 8, 21, 27, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 28, 29, 30,
+ 0, 31, 32, 20, 1, 1, 0, 0, 0, 8, 21, 9, 1, 4, 5, 0,
+ 0, 0, 33, 9, 0, 1, 1, 1, 0, 8, 21, 21, 21, 21, 34, 1,
+ 35, 21, 21, 21, 9, 36, 0, 0, 37, 38, 1, 0, 39, 0, 0, 0,
+ 1, 0, 1, 0, 0, 0, 0, 8, 21, 9, 1, 0, 0, 0, 40, 0,
+ 8, 21, 21, 21, 21, 21, 21, 21, 21, 9, 0, 1, 1, 1, 1, 8,
+ 21, 21, 21, 9, 0, 0, 0, 41, 0, 42, 43, 0, 0, 0, 1, 44,
+ 0, 0, 0, 45, 8, 9, 1, 0, 1, 0, 1, 1, 8, 21, 21, 9,
+ 0, 4, 5, 8, 9, 1, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7,
+ 7, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 9, 10, 11, 11,
+ 11, 11, 11, 12, 12, 12, 12, 13, 14, 15, 16, 17, 18, 12, 19, 12,
+ 20, 12, 12, 12, 12, 21, 22, 22, 22, 23, 12, 12, 12, 12, 24, 25,
+ 12, 12, 26, 27, 28, 29, 30, 31, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 32, 12, 33, 7, 7, 34, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 35, 0, 0, 1, 2, 2, 2, 3,
+ 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 32, 33, 33,
+ 33, 34, 35, 35, 35, 35, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+ 45, 46, 47, 48, 49, 50, 2, 2, 51, 51, 52, 53, 54, 55, 56, 56,
+ 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 57, 57, 56, 56, 56, 56,
+ 56, 56, 58, 59, 60, 61, 56, 62, 62, 63, 64, 65, 66, 67, 68, 69,
+ 70, 56, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 62, 62, 71, 62, 62, 62, 62, 72, 72, 72, 72, 72, 72,
+ 72, 72, 72, 73, 74, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
+ 85, 86, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 87, 87, 87, 87, 87, 87,
+ 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 62, 62, 62, 62,
+ 88, 89, 89, 89, 90, 89, 91, 92, 93, 94, 95, 95, 96, 97, 87, 98,
+ 99,100,101,102,103, 87,104,104,104, 87,105,106,107,108,109,110,
+ 111,112,113,114,115, 87, 89,116,117,118,119,120,121,122,123,124,
+ 125, 87,126,127, 87,128,129,130,131, 87,132,133,134,135,136,137,
+ 87, 87,138,139,140,141, 87,142, 87,143,144,144,144,144,144,144,
+ 144,144,144,144,144, 87, 87, 87, 87, 87,145,145,145,145,145,145,
+ 145,145,145, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87,
+ 87, 87,146,146,146,146,146, 87, 87, 87,147,147,147,147,148,149,
+ 150,150, 87, 87, 87, 87,151,151,152,153,154,154,154,154,154,154,
+ 154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,
+ 155,155,155,155,154, 87, 87, 87, 87, 87,156,157,158,159,159,159,
+ 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87,
+ 87, 87,160,161, 87, 87, 87, 87, 87, 87, 56, 56,162,163, 51, 56,
+ 56, 87, 56, 56, 56, 56, 56, 56, 56, 56,164,164,164,164,164,164,
+ 87, 87, 87, 87, 87, 87, 87, 87, 87, 87,165, 87,166, 87, 87,167,
+ 87, 87, 87, 87, 87, 87, 87, 87, 87, 87,168,168,169, 87, 87, 87,
+ 87, 87, 56, 56, 56, 87, 89, 89, 87, 87, 56, 56, 56, 56,170, 87,
+ 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
+ 56, 56, 87, 87, 87, 87, 87, 87, 87, 87, 62, 62, 62, 62, 62, 62,
+ 62, 62, 87, 87, 87, 87, 87, 87, 87, 87, 62, 62, 62, 62, 62, 87,
+ 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 62, 62, 62, 62, 62, 62,
+ 62, 87, 87, 87, 87, 87, 87, 87, 87, 87, 56, 87,171,171, 0, 1,
+ 2, 2, 0, 1, 2, 2, 2, 3, 4, 5, 0, 0, 0, 0, 1, 2,
+ 1, 2, 0, 0, 3, 3, 4, 5, 4, 5, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 6, 0, 0, 7, 0, 8, 8, 8, 8, 8, 8,
+ 8, 9, 10, 11, 11, 11, 11, 11, 12, 11, 13, 13, 13, 13, 13, 13,
+ 13, 13, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 15, 16, 16,
+ 16, 16, 16, 17, 18, 18, 18, 18, 18, 18, 19, 20, 21, 21, 22, 23,
+ 21, 24, 21, 21, 21, 21, 21, 25, 21, 21, 26, 26, 26, 26, 26, 21,
+ 21, 21, 27, 27, 27, 27, 28, 28, 28, 28, 29, 29, 29, 29, 30, 30,
+ 26, 26, 21, 21, 21, 21, 21, 21, 31, 21, 32, 32, 32, 32, 32, 33,
+ 34, 32, 35, 35, 35, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36, 36,
+ 36, 36, 37, 37, 37, 37, 37, 37, 37, 37, 38, 38, 38, 38, 38, 38,
+ 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40,
+ 40, 40, 41, 41, 41, 41, 41, 41, 41, 41, 42, 42, 42, 42, 42, 42,
+ 42, 42, 43, 43, 43, 43, 43, 43, 43, 43, 44, 44, 44, 45, 44, 44,
+ 44, 44, 46, 46, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47,
+ 47, 47, 47, 47, 47, 47, 47, 48, 47, 47, 49, 49, 49, 49, 49, 49,
+ 49, 49, 49, 49, 50, 50, 50, 50, 50, 51, 52, 52, 52, 52, 52, 52,
+ 52, 52, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54,
+ 54, 54, 55, 55, 55, 55, 55, 55, 55, 55, 56, 56, 57, 57, 57, 57,
+ 58, 57, 59, 59, 60, 61, 62, 62, 63, 63, 64, 64, 64, 64, 64, 64,
+ 64, 64, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 55, 55, 55,
+ 55, 55, 67, 67, 67, 67, 67, 68, 68, 68, 69, 69, 69, 69, 69, 69,
+ 64, 64, 70, 70, 71, 71, 71, 71, 71, 71, 71, 71, 71, 8, 8, 8,
+ 8, 8, 72, 72, 72, 72, 72, 72, 72, 72, 73, 73, 73, 73, 74, 74,
+ 74, 74, 75, 75, 75, 75, 75, 76, 76, 76, 13, 50, 50, 50, 73, 77,
+ 78, 79, 4, 4, 80, 4, 4, 81, 82, 83, 4, 4, 4, 84, 8, 8,
+ 8, 8, 11, 11, 11, 11, 11, 11, 11, 11, 85, 0, 0, 0, 0, 0,
+ 0, 86, 0, 4, 0, 0, 0, 8, 8, 8, 0, 0, 87, 88, 89, 0,
+ 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 90, 90, 90, 90, 90, 90, 90, 90, 91, 91, 91, 91, 91, 91,
+ 4, 4, 92, 92, 92, 92, 92, 92, 92, 92, 50, 50, 50, 93, 93, 93,
+ 93, 93, 53, 53, 53, 53, 53, 53, 13, 13, 94, 94, 94, 94, 94, 94,
+ 94, 94, 94, 94, 94, 94, 94, 94, 94, 0, 95, 0, 96, 97, 98, 99,
+ 99, 99, 99,100,101,102,102,102,102,103,104,104,104,105, 52, 52,
+ 52, 52, 52, 0,104,104, 0, 0, 0,102, 52, 52, 0, 0, 0, 0,
+ 52,106, 0, 0, 0, 0, 0,102,102,107,102,102,102,102,102,108,
+ 0, 0, 94, 94, 94, 94, 0, 0, 0, 0,109,109,109,109,109,109,
+ 109,109,109,109,109,109,109,110,110,110,111,111,111,111,111,111,
+ 111,111,111,111,111,111, 13, 13, 13, 13, 13, 13,112,112,112,112,
+ 112,112, 0, 0,113, 4, 4, 4, 4, 4,114, 4, 4, 4, 4, 4,
+ 4, 4,115,115,115, 0,116,116,116,116,117,117,117,117,117,117,
+ 32, 32,118,118,119,120,120,120, 52, 52,121,121,121,121,122,121,
+ 49, 49,123,123,123,123,123,123, 49, 49,124,124,124,124,124,124,
+ 125,125, 53, 53, 53, 4, 4,126,127, 54, 54, 54, 54, 54,125,125,
+ 125,125,128,128,128,128,128,128,128,128, 4,129, 18, 18, 18, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,130, 0, 21,
+ 21, 21, 8, 0,131, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 21,
+ 21,132, 0, 0, 1, 2, 1, 2,133,101,102,134, 52, 52, 52, 52,
+ 0, 0,135,135,135,135,135,135,135,135, 0, 0, 0, 0, 11, 11,
+ 11, 11, 11, 0, 11, 11, 11, 0, 0,136,137,137,138,138,138,138,
+ 139, 0,140,140,140,141,141,142,142,142,143,143,144,144,144,144,
+ 144,144,145,145,145,145,145,146,146,146,147,147,147,148,148,148,
+ 148,148,149,149,149,150,150,150,150,150,151,151,151,151,151,151,
+ 151,151,152,152,152,152,153,153,154,154,155,155,155,155,155,155,
+ 156,156,157,157,158,158,158,158,158,158,159,159,160,160,160,160,
+ 160,160,161,161,161,161,161,161,162,162,163,163,163,163,164,164,
+ 164,164,165,165,165,165,166,166,167,167,168,168,168,168,168,168,
+ 168,168,169,169,169,169,169,169,169,169,170,170,170,170,170,170,
+ 170,170,171,171,171,171,171,171,171,171,172,172,172,172,172,172,
+ 172,172,173,173,173,174,174,174,174,174,175,175,175,175,175,175,
+ 176,176,177,177,177,177,177,177,177,177,178,178,178,178,178,179,
+ 179,179,180,180,180,180,180,181,181,181,182,182,182,182,182,182,
+ 183, 43,184,184,184,184,184,184,184,184,185,185,185,186,186,186,
+ 186,186,187,187,187,188,187,187,187,187,189,189,189,189,189,189,
+ 189,189,190,190,190,190,190,190,190,190,191,191,191,191,191,191,
+ 191,191,192,192,192,192,192,192, 66, 66,193,193,193,193,193,193,
+ 193,193,194,194,194,194,194,194,194,194,195,195,195,195,195,195,
+ 195,195,196,196,196,196,196,196,196,196,197,197,197,197,197,197,
+ 197,197,198,198,198,198,198,198,198,198,199,199,199,199,199,200,
+ 200,200,200,200,200,200,201,201,201,201,202,202,202,202,202,202,
+ 202,203,203,203,203,203,203,203,203,203,204,204,204,204,204,204,
+ 205,205,205,205,205,205,205,205,205,205,206,206,206,206,206,206,
+ 206,206,110,110,110,110, 39, 39, 39, 39,207,207,207,207,207,207,
+ 207,207,208,208,208,208,208,208,208,208,209,209,209,209,209,209,
+ 209,209,112,112,112,112,112,112,112,112,112,112,112,112,210,210,
+ 210,210,211,211,211,211,211,211,211,211,212,212,212,212,212,212,
+ 212,212,213,213,213,213,213,213,213,213,214,214,214,214,214,214,
+ 214,214,214,214,214,214,214,214,215, 94,216,216,216,216,216,216,
+ 216,216,217,217,217,217,217,217,217,217,218, 99, 99, 99, 99, 99,
+ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
+ 219,220,220,220,220,220,220,220,220,220,221,221,221,221,221,221,
+ 221,221,221,221, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 222,223,224, 0,225, 0, 0, 0, 0, 0,226,226,226,226,226,226,
+ 226,226, 91, 91, 91, 91, 91, 91, 91, 91,227,227,227,227,227,227,
+ 227,227,228,228,228,228,228,228,228,228,229,229,229,229,229,229,
+ 229,229,230,230,230,230,230,230,230,230,231, 0, 0, 0, 0, 0,
+ 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 1, 2,
+ 2, 2, 2, 2, 3, 0, 0, 0, 4, 0, 2, 2, 2, 2, 2, 3,
+ 2, 2, 2, 2, 5, 0, 2, 5, 6, 0, 7, 7, 7, 7, 8, 9,
+ 8, 10, 8, 11, 8, 8, 8, 8, 8, 8, 12, 13, 13, 13, 14, 14,
+ 14, 14, 14, 15, 14, 14, 16, 17, 17, 17, 17, 17, 17, 17, 18, 19,
+ 19, 19, 19, 19, 19, 19, 20, 21, 20, 22, 20, 20, 23, 23, 20, 20,
+ 20, 20, 22, 20, 24, 7, 7, 25, 20, 20, 26, 20, 20, 20, 20, 20,
+ 20, 21, 27, 27, 27, 27, 28, 28, 28, 28, 29, 29, 29, 29, 30, 30,
+ 30, 30, 31, 31, 31, 31, 32, 20, 20, 20, 33, 33, 33, 33, 34, 35,
+ 33, 33, 33, 36, 33, 33, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39,
+ 39, 39, 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43,
+ 43, 43, 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 46, 46,
+ 46, 47, 48, 48, 48, 48, 49, 49, 49, 49, 49, 50, 51, 49, 52, 52,
+ 52, 52, 53, 53, 53, 53, 53, 53, 54, 53, 55, 55, 55, 55, 56, 56,
+ 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, 60, 60,
+ 60, 60, 60, 60, 61, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 65,
+ 0, 0, 66, 66, 66, 66, 67, 67, 67, 67, 68, 68, 68, 68, 69, 70,
+ 71, 71, 71, 71, 71, 71, 72, 72, 72, 72, 73, 73, 73, 73, 74, 74,
+ 74, 74, 75, 75, 75, 75, 76, 76, 76, 76, 77, 77, 77, 77, 78, 78,
+ 78, 78, 79, 79, 79, 79, 80, 80, 80, 80, 81, 81, 81, 81, 82, 7,
+ 7, 7, 83, 7, 84, 85, 0, 84, 86, 0, 2, 87, 88, 2, 2, 2,
+ 2, 89, 90, 87, 91, 2, 2, 2, 92, 2, 2, 2, 2, 93, 0, 0,
+ 0, 86, 1, 0, 0, 94, 0, 95, 96, 0, 4, 0, 0, 0, 0, 0,
+ 0, 4, 97, 97, 97, 97, 98, 98, 98, 98, 13, 13, 13, 13, 99, 99,
+ 99, 99,100,100,100,100, 0,101, 0, 0,102,100,103,104, 0, 0,
+ 100, 0,105,106,106,106,106,106,106,106,106,106,107,105,108,109,
+ 109,109,109,109,109,109,109,109,110,108,111,111,111,111,112, 55,
+ 55, 55, 55, 55, 55,113,109,109,109,110,109,109, 0, 0,114,114,
+ 114,114,115,115,115,115,116,116,116,116,117,117,117,117, 96, 2,
+ 2, 2, 2, 2, 94, 2,118,118,118,118,119,119,119,119,120,120,
+ 120,120,121,121,121,121,121,121,121,122,123,123,123,123,124,124,
+ 124,124,124,124,124,125,126,126,126,126,127,127,127,127,128,128,
+ 128,128, 2, 2, 3, 2, 2,129,130, 0,131,131,131,131,132, 17,
+ 17, 18, 20, 20, 20,133, 7, 7, 7,134, 20, 20, 20, 23, 0,135,
+ 109,109,109,109,109,136,137,137,137,137, 0, 0, 0,138,139,139,
+ 139,139,140,140,140,140, 84, 0, 0, 0,141,141,141,141,142,142,
+ 142,142,143,143,143,143,144,144,144,144,145,145,145,145,146,146,
+ 146,146,147,147,147,147,148,148,148,148,149,149,149,149,150,150,
+ 150,150,151,151,151,151,152,152,152,152,153,153,153,153,154,154,
+ 154,154,155,155,155,155,156,156,156,156,157,157,157,157,158,158,
+ 158,158,159,159,159,159,160,160,160,160,161,161,161,161,162,162,
+ 162,162,163,163,163,163,164,164,164,164,165,165,165,165,166,166,
+ 166,166,167,167,167,167,168,168,168,168,169,169,169,169,170,170,
+ 170,170,171,171,171,171,172,172,172,172,173,173,173,173,174,174,
+ 174,174,175,175,175,175,176,176,176,176,177,177,177,177,178,178,
+ 178,178,179,179,179,179,180,180,180,180,181,181,181,181,182,182,
+ 182,182,183,183,183,183,184, 45, 45, 45,185,185,185,185,186,186,
+ 186,186,187,187,187,187,188,188,188,188,188,188,189,188,190,190,
+ 190,190,191,191,191,191,192,192,192,192,193,193,193,193,194,194,
+ 194,194,195,195,195,195,196,196,196,196,197,197,197,197,198,198,
+ 198,198,199,199,199,199,200,200,200,200,201,201,201,201,202,202,
+ 202,202,203,203,203,203,204,204,204,204,205,205,205,205,206,206,
+ 206,206,207,207,207,207,208,208,208,208,209,209,209,209,210,210,
+ 210,210,211,211,211,211,212,212,212,212,213,213,213,213,214,214,
+ 214,214,215,215,215,215,216,217,217,217,218,218,218,218,217,217,
+ 217,217,219,106,106,106,106,109,109,109,220,220,220,220,221,221,
+ 221,221, 0,222, 86, 0, 0, 0,222, 7, 82,138, 7, 0, 0, 0,
+ 223, 86,224,224,224,224,225,225,225,225,226,226,226,226,227,227,
+ 227,227,228,228,228,228,229, 0, 0, 0, 0, 0, 0, 0, 0, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 0, 0, 0, 19, 0, 19, 0,
+ 0, 0, 0, 0, 26, 26, 1, 1, 1, 1, 9, 9, 9, 9, 0, 9,
+ 9, 9, 9, 9, 0, 9, 9, 0, 9, 0, 9, 9, 55, 55, 55, 55,
+ 55, 55, 6, 6, 6, 6, 6, 1, 1, 6, 6, 4, 4, 4, 4, 4,
+ 4, 4, 4, 14, 14, 14, 14, 14, 14, 14, 3, 3, 3, 3, 3, 0,
+ 3, 3, 0, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 1, 1, 1,
+ 3, 3, 1, 3, 3, 3, 37, 37, 37, 37, 38, 38, 38, 38, 64, 64,
+ 64, 64, 90, 90, 90, 90, 95, 95, 95, 95, 3, 3, 0, 3, 7, 7,
+ 7, 7, 7, 1, 1, 1, 1, 7, 7, 7, 0, 0, 7, 7, 5, 5,
+ 5, 5, 11, 11, 11, 11, 10, 10, 10, 10, 21, 21, 21, 21, 22, 22,
+ 22, 22, 23, 23, 23, 23, 16, 16, 16, 16, 20, 20, 20, 20, 36, 36,
+ 36, 36, 24, 24, 24, 24, 24, 24, 24, 0, 18, 18, 18, 18, 25, 25,
+ 25, 25, 25, 0, 0, 0, 0, 25, 25, 25, 33, 33, 33, 33, 8, 8,
+ 8, 8, 8, 8, 8, 0, 12, 12, 12, 12, 30, 30, 30, 30, 29, 29,
+ 29, 29, 28, 28, 28, 28, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35,
+ 35, 0, 0, 0, 35, 35, 45, 45, 45, 45, 44, 44, 44, 44, 44, 0,
+ 0, 0, 43, 43, 43, 43, 46, 46, 46, 46, 31, 31, 31, 31, 32, 32,
+ 0, 0, 32, 0, 32, 32, 32, 32, 32, 32, 48, 48, 48, 48, 52, 52,
+ 52, 52, 58, 58, 58, 58, 54, 54, 54, 54, 91, 91, 91, 91, 62, 62,
+ 62, 62, 76, 76, 76, 76, 93, 93, 93, 93, 70, 70, 70, 70, 73, 73,
+ 73, 73, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1,
+ 0, 0, 1, 1, 0, 0, 19, 19, 9, 9, 9, 9, 9, 6, 19, 9,
+ 9, 9, 9, 9, 19, 19, 9, 9, 9, 19, 6, 19, 19, 19, 19, 19,
+ 19, 9, 0, 0, 0, 19, 0, 0, 9, 0, 0, 0, 19, 19, 27, 27,
+ 27, 27, 56, 56, 56, 56, 61, 61, 61, 61, 13, 13, 13, 13, 0, 13,
+ 0, 13, 0, 13, 13, 13, 13, 13, 1, 1, 1, 1, 12, 12, 0, 15,
+ 15, 15, 15, 15, 15, 15, 15, 1, 1, 0, 0, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 0, 26, 26, 26, 26, 26, 12, 12, 12, 12, 12,
+ 12, 0, 39, 39, 39, 39, 86, 86, 86, 86, 77, 77, 77, 77, 79, 79,
+ 79, 79, 60, 60, 60, 60, 65, 65, 65, 65, 75, 75, 75, 75, 69, 69,
+ 69, 69, 69, 69, 0, 69, 74, 74, 74, 74, 84, 84, 84, 84, 84, 84,
+ 84, 0, 68, 68, 68, 68, 92, 92, 92, 92, 87, 87, 87, 87, 19, 9,
+ 19, 19, 19, 19, 0, 0, 2, 2, 2, 2, 19, 19, 19, 4, 3, 3,
+ 0, 0, 1, 1, 6, 6, 0, 0, 17, 17, 17, 17, 0, 0, 49, 49,
+ 49, 49, 0, 1, 1, 1, 71, 71, 71, 71, 67, 67, 67, 67, 42, 42,
+ 42, 42, 41, 41, 41, 41,118,118,118,118, 53, 53, 53, 53, 59, 59,
+ 59, 59, 40, 40, 40, 40, 51, 51, 51, 51, 50, 50, 50, 50,135,135,
+ 135,135,106,106,106,106,104,104,104,104,110,110,110,110, 47, 47,
+ 47, 47, 81, 81, 81, 81,120,120,120,120,116,116,116,116,128,128,
+ 128,128, 66, 66, 66, 66, 72, 72, 72, 72, 98, 98, 98, 98, 97, 97,
+ 97, 97, 57, 57, 57, 57, 88, 88, 88, 88,117,117,117,117,112,112,
+ 112,112, 78, 78, 78, 78, 83, 83, 83, 83, 82, 82, 82, 82,122,122,
+ 122,122, 89, 89, 89, 89,130,130,130,130,144,144,144,144,156,156,
+ 156,156,147,147,147,147,148,148,148,148,153,153,153,153,149,149,
+ 149,149, 94, 94, 94, 94, 85, 85, 85, 85,101,101,101,101, 96, 96,
+ 96, 96,111,111,111,111,100,100,100,100,100, 36, 36, 36,108,108,
+ 108,108,129,129,129,129,109,109,109,109,107,107,107,107,107,107,
+ 107, 1,137,137,137,137,124,124,124,124,123,123,123,123,114,114,
+ 114,114,102,102,102,102,126,126,126,126,142,142,142,142,125,125,
+ 125,125,154,154,154,154,150,150,150,150,141,141,141,141,140,140,
+ 140,140,121,121,121,121,133,133,133,133,134,134,134,134,138,138,
+ 138,138,143,143,143,143,145,145,145,145, 63, 63, 63, 63, 80, 80,
+ 80, 80,127,127,127,127,115,115,115,115,103,103,103,103,119,119,
+ 119,119,146,146,146,146, 99, 99, 99, 99,136,139, 0, 0,155,155,
+ 155,155,136,136,136,136, 17, 15, 15, 15,139,139,139,139,105,105,
+ 105,105, 0, 0, 0, 1, 0, 0, 1, 1,131,131,131,131,151,151,
+ 151,151,152,152,152,152,113,113,113,113,132,132,132,132, 15, 0,
+ 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9, 9, 10,
+ 9, 11, 12, 13, 9, 9, 9, 14, 9, 9, 15, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 16, 17,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 18, 19, 20, 9, 21, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 22, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 23, 24,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4,
+ 5, 6, 7, 8, 9, 10, 11, 12, 0, 0, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 23, 0, 0, 24, 25, 26, 27, 28, 29, 30, 0, 0,
+ 31, 32, 0, 33, 0, 34, 0, 35, 0, 0, 0, 0, 36, 37, 38, 39,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 43, 44, 0, 45, 0, 0, 0, 0, 0, 0, 46, 47, 0, 0,
+ 0, 0, 0, 48, 0, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 50, 51, 0, 0, 0, 52, 0, 0, 53, 0, 0, 0,
+ 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 55, 0, 0, 0,
+ 0, 0, 0, 0, 56, 0, 0, 0, 0, 0, 0, 0, 0, 57, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 58, 59, 60, 61, 62, 63, 64, 65, 0, 0, 0, 0,
+ 0, 0, 66, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 67, 68, 0, 69, 70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86,
+ 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,100,101,102,
+ 103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0,104, 0, 0, 0, 0, 0, 0,105,106, 0,107, 0, 0, 0,
+ 108, 0,109, 0,110, 0,111,112,113, 0,114, 0, 0, 0,115, 0,
+ 0, 0,116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,117, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0,118,119,120,121, 0,122,123,124,125,126, 0,127,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
+ 144,145,146,147,148,149,150,151,152,153,154,155,156,157, 0, 0,
+ 0,158,159,160,161, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,162,163, 0, 0, 0, 0, 0,
+ 0, 0,164, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0,165, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,166, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,167, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0,168, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0,169,170, 0, 0, 0, 0,171,172, 0, 0, 0,
+ 173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,
+ 189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,
+ 205,206, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4,
+};
+static const uint16_t
+_hb_ucd_u16[4848] =
+{
+ 0, 0, 1, 2, 3, 4, 5, 6, 0, 0, 7, 8, 9, 10, 11, 12,
+ 13, 13, 13, 14, 15, 13, 13, 16, 17, 18, 19, 20, 21, 22, 13, 23,
+ 13, 13, 13, 24, 25, 11, 11, 11, 11, 26, 11, 27, 28, 29, 30, 31,
+ 32, 32, 32, 32, 32, 32, 32, 33, 34, 35, 36, 11, 37, 38, 13, 39,
+ 9, 9, 9, 11, 11, 11, 13, 13, 40, 13, 13, 13, 41, 13, 13, 13,
+ 13, 13, 13, 35, 9, 42, 11, 11, 43, 44, 32, 45, 46, 47, 47, 48,
+ 49, 50, 47, 47, 51, 32, 52, 53, 47, 47, 47, 47, 47, 54, 55, 56,
+ 57, 58, 47, 32, 59, 47, 47, 47, 47, 47, 60, 53, 61, 47, 62, 63,
+ 47, 64, 65, 66, 47, 67, 47, 47, 47, 47, 47, 47, 47, 68, 69, 32,
+ 70, 47, 47, 71, 72, 73, 74, 75, 76, 47, 47, 77, 78, 79, 80, 81,
+ 82, 47, 47, 83, 84, 85, 86, 87, 82, 47, 47, 77, 88, 47, 80, 89,
+ 90, 47, 47, 91, 92, 93, 80, 94, 95, 47, 47, 96, 97, 98, 99, 100,
+ 101, 47, 47, 102, 103, 104, 80, 105, 106, 47, 47, 91, 107, 108, 80, 109,
+ 110, 47, 47, 111, 112, 113, 80, 114, 90, 47, 47, 47, 115, 116, 99, 117,
+ 47, 47, 47, 118, 119, 120, 66, 66, 47, 47, 47, 121, 122, 123, 47, 47,
+ 124, 125, 126, 127, 47, 47, 47, 128, 129, 32, 32, 130, 131, 132, 66, 66,
+ 47, 47, 133, 134, 120, 135, 136, 137, 138, 139, 9, 9, 9, 11, 11, 140,
+ 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 141, 142, 143,
+ 47, 144, 9, 9, 9, 9, 9, 145, 146, 47, 47, 47, 47, 47, 47, 47,
+ 47, 47, 47, 47, 47, 47, 147, 47, 148, 149, 47, 47, 47, 47, 150, 151,
+ 47, 152, 47, 153, 47, 152, 47, 152, 47, 47, 47, 154, 155, 156, 157, 143,
+ 158, 157, 47, 47, 159, 47, 47, 47, 160, 47, 161, 47, 47, 47, 47, 47,
+ 47, 47, 162, 163, 164, 47, 47, 47, 47, 47, 47, 47, 47, 165, 144, 144,
+ 47, 166, 47, 47, 47, 167, 168, 169, 157, 157, 170, 171, 32, 32, 32, 32,
+ 172, 47, 47, 173, 174, 120, 175, 176, 177, 47, 178, 61, 47, 47, 179, 180,
+ 47, 47, 181, 182, 183, 61, 47, 184, 11, 9, 9, 9, 66, 185, 186, 187,
+ 11, 11, 188, 27, 27, 27, 189, 190, 11, 191, 27, 27, 32, 32, 32, 32,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 192, 13, 13, 13, 13, 13, 13,
+ 193, 193, 193, 193, 193, 194, 193, 11, 195, 195, 195, 196, 197, 198, 198, 197,
+ 199, 200, 201, 202, 203, 204, 205, 206, 207, 27, 208, 208, 208, 209, 210, 32,
+ 211, 212, 213, 214, 215, 143, 216, 216, 217, 218, 219, 144, 220, 221, 144, 222,
+ 223, 223, 223, 223, 223, 223, 223, 223, 224, 144, 225, 144, 144, 144, 144, 226,
+ 144, 227, 223, 228, 144, 229, 230, 144, 144, 144, 144, 144, 144, 144, 143, 143,
+ 143, 231, 144, 144, 144, 144, 232, 143, 144, 144, 144, 144, 144, 144, 144, 144,
+ 144, 144, 144, 233, 234, 144, 144, 235, 144, 144, 144, 144, 144, 144, 236, 144,
+ 144, 144, 144, 144, 144, 144, 237, 238, 143, 239, 144, 144, 240, 223, 241, 223,
+ 242, 243, 223, 223, 223, 244, 223, 245, 144, 144, 144, 223, 246, 144, 144, 144,
+ 9, 9, 9, 11, 11, 11, 247, 248, 13, 13, 13, 13, 13, 13, 249, 250,
+ 11, 11, 11, 47, 47, 47, 251, 252, 47, 47, 47, 47, 47, 47, 32, 32,
+ 253, 254, 255, 256, 257, 258, 66, 66, 259, 260, 261, 262, 263, 47, 47, 47,
+ 47, 264, 146, 47, 47, 47, 47, 265, 47, 266, 47, 47, 144, 144, 144, 47,
+ 144, 144, 267, 144, 268, 269, 144, 144, 267, 144, 144, 269, 144, 144, 144, 144,
+ 47, 47, 47, 47, 144, 144, 144, 144, 47, 270, 47, 47, 47, 47, 47, 47,
+ 47, 144, 144, 144, 144, 47, 47, 184, 271, 47, 61, 47, 13, 13, 272, 273,
+ 13, 274, 47, 47, 47, 47, 275, 276, 31, 277, 278, 279, 13, 13, 13, 280,
+ 281, 282, 283, 284, 285, 11, 11, 286, 287, 47, 288, 289, 47, 47, 47, 290,
+ 291, 47, 47, 292, 293, 157, 32, 294, 61, 47, 295, 47, 296, 297, 47, 47,
+ 70, 47, 47, 298, 299, 300, 301, 61, 47, 47, 302, 303, 304, 305, 47, 306,
+ 47, 47, 47, 307, 58, 308, 309, 310, 47, 47, 47, 11, 11, 311, 312, 11,
+ 11, 11, 11, 11, 47, 47, 313, 157, 314, 314, 314, 314, 314, 314, 314, 314,
+ 315, 315, 315, 315, 315, 315, 315, 315, 11, 316, 317, 47, 47, 47, 47, 47,
+ 47, 47, 47, 318, 31, 319, 47, 47, 47, 47, 47, 320, 321, 47, 47, 47,
+ 47, 47, 47, 47, 47, 47, 47, 322, 32, 323, 32, 324, 325, 326, 327, 47,
+ 47, 47, 47, 47, 47, 47, 47, 328, 329, 2, 3, 4, 5, 330, 331, 332,
+ 47, 333, 47, 47, 47, 47, 334, 335, 336, 143, 143, 337, 216, 216, 216, 338,
+ 339, 144, 144, 144, 144, 144, 144, 340, 341, 341, 341, 341, 341, 341, 341, 341,
+ 47, 47, 47, 47, 47, 47, 342, 143, 47, 47, 343, 47, 344, 47, 47, 60,
+ 47, 345, 47, 47, 47, 346, 216, 216, 9, 9, 145, 11, 11, 47, 47, 47,
+ 47, 47, 157, 9, 9, 145, 11, 11, 47, 47, 47, 47, 47, 47, 345, 66,
+ 47, 47, 47, 47, 47, 347, 47, 348, 47, 47, 349, 143, 143, 143, 47, 350,
+ 47, 351, 47, 345, 66, 66, 66, 66, 47, 47, 47, 352, 143, 143, 143, 143,
+ 353, 47, 47, 354, 143, 66, 47, 355, 47, 356, 143, 143, 357, 47, 358, 66,
+ 47, 47, 47, 359, 47, 360, 47, 360, 47, 359, 142, 143, 143, 143, 143, 143,
+ 9, 9, 9, 9, 11, 11, 11, 361, 47, 47, 362, 157, 157, 157, 157, 157,
+ 143, 143, 143, 143, 143, 143, 143, 143, 47, 47, 363, 47, 47, 47, 47, 47,
+ 47, 356, 364, 47, 60, 365, 66, 66, 47, 47, 47, 47, 366, 143, 47, 47,
+ 367, 47, 47, 354, 368, 369, 370, 371, 177, 47, 47, 372, 373, 47, 47, 157,
+ 95, 47, 374, 375, 376, 47, 47, 377, 177, 47, 47, 378, 379, 380, 381, 143,
+ 47, 47, 382, 383, 32, 32, 32, 32, 47, 47, 359, 47, 47, 384, 169, 157,
+ 90, 47, 47, 111, 385, 386, 387, 32, 47, 47, 47, 388, 389, 390, 47, 47,
+ 47, 47, 47, 391, 392, 157, 157, 157, 47, 47, 393, 394, 395, 396, 32, 32,
+ 47, 47, 47, 397, 398, 157, 66, 66, 47, 47, 399, 400, 157, 157, 157, 157,
+ 47, 141, 401, 402, 144, 144, 144, 144, 47, 47, 382, 403, 66, 66, 66, 66,
+ 9, 9, 9, 9, 11, 11, 126, 404, 47, 47, 47, 405, 406, 157, 157, 157,
+ 47, 47, 47, 47, 47, 407, 408, 409, 410, 47, 47, 411, 412, 413, 47, 47,
+ 414, 415, 66, 66, 47, 47, 47, 47, 47, 47, 393, 416, 417, 126, 143, 418,
+ 47, 152, 419, 420, 32, 32, 32, 32, 47, 47, 47, 353, 421, 157, 47, 47,
+ 422, 423, 157, 157, 157, 157, 157, 157, 47, 47, 47, 47, 47, 47, 47, 424,
+ 47, 47, 47, 47, 143, 425, 426, 427, 216, 216, 216, 216, 216, 216, 216, 66,
+ 47, 47, 47, 205, 205, 205, 205, 205, 47, 47, 47, 47, 47, 47, 300, 66,
+ 47, 47, 47, 47, 47, 47, 47, 428, 47, 47, 47, 429, 430, 431, 432, 47,
+ 9, 9, 9, 9, 9, 9, 11, 11, 143, 433, 66, 66, 66, 66, 66, 66,
+ 47, 47, 47, 47, 384, 434, 409, 409, 435, 436, 27, 27, 27, 27, 437, 409,
+ 47, 438, 205, 205, 205, 205, 205, 205, 144, 144, 144, 144, 144, 144, 439, 440,
+ 441, 144, 442, 144, 144, 144, 144, 144, 144, 144, 144, 144, 443, 144, 144, 144,
+ 9, 444, 11, 445, 446, 11, 193, 9, 447, 448, 9, 449, 11, 9, 444, 11,
+ 445, 446, 11, 193, 9, 447, 448, 9, 449, 11, 9, 444, 11, 445, 446, 11,
+ 193, 9, 447, 448, 9, 449, 11, 9, 444, 11, 193, 9, 450, 451, 452, 453,
+ 11, 454, 9, 455, 456, 457, 458, 11, 459, 9, 460, 11, 461, 157, 157, 157,
+ 32, 32, 32, 462, 32, 32, 463, 464, 465, 466, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, 32, 32, 32, 32, 32, 47, 47, 47, 467, 468, 144, 144, 144,
+ 47, 47, 47, 47, 47, 47, 469, 470, 47, 47, 47, 47, 349, 32, 32, 32,
+ 9, 9, 447, 11, 471, 300, 66, 66, 143, 143, 472, 473, 143, 143, 143, 143,
+ 143, 143, 474, 143, 143, 143, 143, 143, 47, 47, 47, 47, 47, 47, 47, 223,
+ 475, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 476,
+ 144, 144, 144, 144, 144, 144, 144, 157, 205, 205, 205, 205, 205, 205, 205, 205,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 939, 940, 941, 942, 946, 948, 0, 962, 969, 970, 971, 976,1001,1002,1003,1008,
+ 0,1033,1040,1041,1042,1043,1047, 0, 0,1080,1081,1082,1086,1110, 0, 0,
+ 1124,1125,1126,1127,1131,1133, 0,1147,1154,1155,1156,1161,1187,1188,1189,1193,
+ 0,1219,1226,1227,1228,1229,1233, 0, 0,1267,1268,1269,1273,1298, 0,1303,
+ 943,1128, 944,1129, 954,1139, 958,1143, 959,1144, 960,1145, 961,1146, 964,1149,
+ 0, 0, 973,1158, 974,1159, 975,1160, 983,1168, 978,1163, 988,1173, 990,1175,
+ 991,1176, 993,1178, 994,1179, 0, 0,1004,1190,1005,1191,1006,1192,1014,1199,
+ 1007, 0, 0, 0,1016,1201,1020,1206, 0,1022,1208,1025,1211,1023,1209, 0,
+ 0, 0, 0,1032,1218,1037,1223,1035,1221, 0, 0, 0,1044,1230,1045,1231,
+ 1049,1235, 0, 0,1058,1244,1064,1250,1060,1246,1066,1252,1067,1253,1072,1258,
+ 1069,1255,1077,1264,1074,1261, 0, 0,1083,1270,1084,1271,1085,1272,1088,1275,
+ 1089,1276,1096,1283,1103,1290,1111,1299,1115,1118,1307,1120,1309,1121,1310, 0,
+ 1053,1239, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1093,
+ 1280, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 949,1134,1010,
+ 1195,1050,1236,1090,1277,1341,1368,1340,1367,1342,1369,1339,1366, 0,1320,1347,
+ 1418,1419,1323,1350, 0, 0, 992,1177,1018,1204,1055,1241,1416,1417,1415,1424,
+ 1202, 0, 0, 0, 987,1172, 0, 0,1031,1217,1321,1348,1322,1349,1338,1365,
+ 950,1135, 951,1136, 979,1164, 980,1165,1011,1196,1012,1197,1051,1237,1052,1238,
+ 1061,1247,1062,1248,1091,1278,1092,1279,1071,1257,1076,1263, 0, 0, 997,1182,
+ 0, 0, 0, 0, 0, 0, 945,1130, 982,1167,1337,1364,1335,1362,1046,1232,
+ 1422,1423,1113,1301, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 8, 9, 0, 10,1425, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
+ 0, 0, 0, 0, 0,1314,1427, 5,1434,1438,1443, 0,1450, 0,1455,1461,
+ 1514, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1446,1458,1468,1476,1480,1486,
+ 1517, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1489,1503,1494,1500,1508, 0,
+ 0, 0, 0,1520,1521, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1526,1528, 0,1525, 0, 0, 0,1522, 0, 0, 0, 0,1536,1532,1539, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,1534, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,1556, 0, 0, 0, 0, 0, 0,
+ 1548,1550, 0,1547, 0, 0, 0,1567, 0, 0, 0, 0,1558,1554,1561, 0,
+ 0, 0, 0, 0, 0, 0,1568,1569, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0,1529,1551, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1523,1545,1524,1546, 0, 0,1527,1549, 0, 0,1570,1571,1530,1552,1531,1553,
+ 0, 0,1533,1555,1535,1557,1537,1559, 0, 0,1572,1573,1544,1566,1538,1560,
+ 1540,1562,1541,1563,1542,1564, 0, 0,1543,1565, 0, 0, 0, 0, 0, 0,
+ 0, 0,1606,1607,1609,1608,1610, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1613, 0,1611, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0,1612, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,1620, 0, 0, 0, 0, 0, 0,
+ 0,1623, 0, 0,1624, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,1614,1615,1616,1617,1618,1619,1621,1622,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1628,1629, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1625,1626, 0,1627,
+ 0, 0, 0,1634, 0, 0,1635, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,1630,1631,1632, 0, 0,1633, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,1639, 0, 0,1638,1640, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1636,1637, 0, 0,
+ 0, 0, 0, 0,1641, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1642,1644,1643, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,1645, 0, 0, 0, 0, 0, 0, 0,
+ 1646, 0, 0, 0, 0, 0, 0,1648,1649, 0,1647,1650, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1651,1653,1652, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1654, 0,1655,1657,1656, 0,
+ 0, 0, 0,1659, 0, 0, 0, 0, 0, 0, 0, 0, 0,1660, 0, 0,
+ 0, 0,1661, 0, 0, 0, 0,1662, 0, 0, 0, 0,1663, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,1658, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0,1664, 0,1665,1673, 0,1674, 0, 0, 0, 0, 0, 0, 0,
+ 0,1666, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0,1668, 0, 0, 0, 0, 0, 0, 0, 0, 0,1669, 0, 0,
+ 0, 0,1670, 0, 0, 0, 0,1671, 0, 0, 0, 0,1672, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,1667, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,1675, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,1676, 0,1677, 0,1678, 0,1679, 0,1680, 0,
+ 0, 0,1681, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1682, 0,1683, 0, 0,
+ 1684,1685, 0,1686, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 953,1138, 955,1140, 956,1141, 957,1142,1324,1351, 963,1148, 965,1150, 968,1153,
+ 966,1151, 967,1152,1378,1380,1379,1381, 984,1169, 985,1170,1420,1421, 986,1171,
+ 989,1174, 995,1180, 998,1183, 996,1181, 999,1184,1000,1185,1015,1200,1329,1356,
+ 1017,1203,1019,1205,1021,1207,1024,1210,1687,1688,1027,1213,1026,1212,1028,1214,
+ 1029,1215,1030,1216,1034,1220,1036,1222,1039,1225,1038,1224,1334,1361,1336,1363,
+ 1382,1384,1383,1385,1056,1242,1057,1243,1059,1245,1063,1249,1689,1690,1065,1251,
+ 1068,1254,1070,1256,1386,1387,1388,1389,1691,1692,1073,1259,1075,1262,1079,1266,
+ 1078,1265,1095,1282,1098,1285,1097,1284,1390,1391,1392,1393,1099,1286,1100,1287,
+ 1101,1288,1102,1289,1105,1292,1104,1291,1106,1294,1107,1295,1108,1296,1114,1302,
+ 1119,1308,1122,1311,1123,1312,1186,1260,1293,1305, 0,1394, 0, 0, 0, 0,
+ 952,1137, 947,1132,1317,1344,1316,1343,1319,1346,1318,1345,1693,1695,1371,1375,
+ 1370,1374,1373,1377,1372,1376,1694,1696, 981,1166, 977,1162, 972,1157,1326,1353,
+ 1325,1352,1328,1355,1327,1354,1697,1698,1009,1194,1013,1198,1054,1240,1048,1234,
+ 1331,1358,1330,1357,1333,1360,1332,1359,1699,1700,1396,1401,1395,1400,1398,1403,
+ 1397,1402,1399,1404,1094,1281,1087,1274,1406,1411,1405,1410,1408,1413,1407,1412,
+ 1409,1414,1109,1297,1117,1306,1116,1304,1112,1300, 0, 0, 0, 0, 0, 0,
+ 1471,1472,1701,1705,1702,1706,1703,1707,1430,1431,1715,1719,1716,1720,1717,1721,
+ 1477,1478,1729,1731,1730,1732, 0, 0,1435,1436,1733,1735,1734,1736, 0, 0,
+ 1481,1482,1737,1741,1738,1742,1739,1743,1439,1440,1751,1755,1752,1756,1753,1757,
+ 1490,1491,1765,1768,1766,1769,1767,1770,1447,1448,1771,1774,1772,1775,1773,1776,
+ 1495,1496,1777,1779,1778,1780, 0, 0,1451,1452,1781,1783,1782,1784, 0, 0,
+ 1504,1505,1785,1788,1786,1789,1787,1790, 0,1459, 0,1791, 0,1792, 0,1793,
+ 1509,1510,1794,1798,1795,1799,1796,1800,1462,1463,1808,1812,1809,1813,1810,1814,
+ 1467, 21,1475, 22,1479, 23,1485, 24,1493, 27,1499, 28,1507, 29, 0, 0,
+ 1704,1708,1709,1710,1711,1712,1713,1714,1718,1722,1723,1724,1725,1726,1727,1728,
+ 1740,1744,1745,1746,1747,1748,1749,1750,1754,1758,1759,1760,1761,1762,1763,1764,
+ 1797,1801,1802,1803,1804,1805,1806,1807,1811,1815,1816,1817,1818,1819,1820,1821,
+ 1470,1469,1822,1474,1465, 0,1473,1825,1429,1428,1426, 12,1432, 0, 26, 0,
+ 0,1315,1823,1484,1466, 0,1483,1829,1433, 13,1437, 14,1441,1826,1827,1828,
+ 1488,1487,1513, 19, 0, 0,1492,1515,1445,1444,1442, 15, 0,1831,1832,1833,
+ 1502,1501,1516, 25,1497,1498,1506,1518,1457,1456,1454, 17,1453,1313, 11, 3,
+ 0, 0,1824,1512,1519, 0,1511,1830,1449, 16,1460, 18,1464, 4, 0, 0,
+ 30, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 2, 6, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1834,1835, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1836, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1837,1839,1838,
+ 0, 0, 0, 0,1840, 0, 0, 0, 0,1841, 0, 0,1842, 0, 0, 0,
+ 0, 0, 0, 0,1843, 0,1844, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0,1845, 0, 0,1846, 0, 0,1847, 0,1848, 0, 0, 0, 0, 0, 0,
+ 937, 0,1850, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1849, 936, 938,
+ 1851,1852, 0, 0,1853,1854, 0, 0,1855,1856, 0, 0, 0, 0, 0, 0,
+ 1857,1858, 0, 0,1861,1862, 0, 0,1863,1864, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1867,1868,1869,1870,
+ 1859,1860,1865,1866, 0, 0, 0, 0, 0, 0,1871,1872,1873,1874, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 33, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1875, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1877, 0,1878, 0,
+ 1879, 0,1880, 0,1881, 0,1882, 0,1883, 0,1884, 0,1885, 0,1886, 0,
+ 1887, 0,1888, 0, 0,1889, 0,1890, 0,1891, 0, 0, 0, 0, 0, 0,
+ 1892,1893, 0,1894,1895, 0,1896,1897, 0,1898,1899, 0,1900,1901, 0, 0,
+ 0, 0, 0, 0,1876, 0, 0, 0, 0, 0, 0, 0, 0, 0,1902, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1904, 0,1905, 0,
+ 1906, 0,1907, 0,1908, 0,1909, 0,1910, 0,1911, 0,1912, 0,1913, 0,
+ 1914, 0,1915, 0, 0,1916, 0,1917, 0,1918, 0, 0, 0, 0, 0, 0,
+ 1919,1920, 0,1921,1922, 0,1923,1924, 0,1925,1926, 0,1927,1928, 0, 0,
+ 0, 0, 0, 0,1903, 0, 0,1929,1930,1931,1932, 0, 0, 0,1933, 0,
+ 710, 385, 724, 715, 455, 103, 186, 825, 825, 242, 751, 205, 241, 336, 524, 601,
+ 663, 676, 688, 738, 411, 434, 474, 500, 649, 746, 799, 108, 180, 416, 482, 662,
+ 810, 275, 462, 658, 692, 344, 618, 679, 293, 388, 440, 492, 740, 116, 146, 168,
+ 368, 414, 481, 527, 606, 660, 665, 722, 781, 803, 809, 538, 553, 588, 642, 758,
+ 811, 701, 233, 299, 573, 612, 487, 540, 714, 779, 232, 267, 412, 445, 457, 585,
+ 594, 766, 167, 613, 149, 148, 560, 589, 648, 768, 708, 345, 411, 704, 105, 259,
+ 313, 496, 518, 174, 542, 120, 307, 101, 430, 372, 584, 183, 228, 529, 650, 697,
+ 424, 732, 428, 349, 632, 355, 517, 110, 135, 147, 403, 580, 624, 700, 750, 170,
+ 193, 245, 297, 374, 463, 543, 763, 801, 812, 815, 162, 384, 420, 730, 287, 330,
+ 337, 366, 459, 476, 509, 558, 591, 610, 726, 652, 734, 759, 154, 163, 198, 473,
+ 683, 697, 292, 311, 353, 423, 572, 494, 113, 217, 259, 280, 314, 499, 506, 603,
+ 608, 752, 778, 782, 788, 117, 557, 748, 774, 320, 109, 126, 260, 265, 373, 411,
+ 479, 523, 655, 737, 823, 380, 765, 161, 395, 398, 438, 451, 502, 516, 537, 583,
+ 791, 136, 340, 769, 122, 273, 446, 727, 305, 322, 400, 496, 771, 155, 190, 269,
+ 377, 391, 406, 432, 501, 519, 599, 684, 687, 749, 776, 175, 452, 191, 480, 510,
+ 659, 772, 805, 813, 397, 444, 619, 566, 568, 575, 491, 471, 707, 111, 636, 156,
+ 153, 288, 346, 578, 256, 435, 383, 729, 680, 767, 694, 295, 128, 210, 0, 0,
+ 227, 0, 379, 0, 0, 150, 493, 525, 544, 551, 552, 556, 783, 576, 604, 0,
+ 661, 0, 703, 0, 0, 735, 743, 0, 0, 0, 793, 794, 795, 808, 741, 773,
+ 118, 127, 130, 166, 169, 177, 207, 213, 215, 226, 229, 268, 270, 317, 327, 329,
+ 335, 369, 375, 381, 404, 441, 448, 458, 477, 484, 503, 539, 545, 547, 546, 548,
+ 549, 550, 554, 555, 561, 564, 569, 591, 593, 595, 598, 607, 620, 625, 625, 651,
+ 690, 695, 705, 706, 716, 717, 733, 735, 777, 786, 790, 315, 869, 623, 0, 0,
+ 102, 145, 134, 115, 129, 138, 165, 171, 207, 202, 206, 212, 227, 231, 240, 243,
+ 250, 254, 294, 296, 303, 308, 319, 325, 321, 329, 326, 335, 341, 357, 360, 362,
+ 370, 379, 388, 389, 393, 421, 424, 438, 456, 454, 458, 465, 477, 535, 485, 490,
+ 493, 507, 512, 514, 521, 522, 525, 526, 528, 533, 532, 541, 565, 569, 574, 586,
+ 591, 597, 607, 637, 647, 674, 691, 693, 695, 698, 703, 699, 705, 704, 702, 706,
+ 709, 717, 728, 736, 747, 754, 770, 777, 783, 784, 786, 787, 790, 802, 825, 848,
+ 847, 857, 55, 65, 66, 883, 892, 916, 822, 824, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1586, 0,1605,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1602,1603,1934,1935,1574,1575,
+ 1576,1577,1579,1580,1581,1583,1584, 0,1585,1587,1588,1589,1591, 0,1592, 0,
+ 1593,1594, 0,1595,1596, 0,1598,1599,1600,1601,1604,1582,1578,1590,1597, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1936, 0,1937, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1938, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1939,1940,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1941,1942, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1944,1943, 0,1945, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1946,1947, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,1948, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1949,1950,
+ 1951,1952,1953,1954,1955, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1956,1957,1958,1960,1959,
+ 1961, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 106, 104, 107, 826, 114, 118, 119, 121, 123, 124, 127, 125, 34, 830, 130, 131,
+ 132, 137, 827, 35, 133, 139, 829, 142, 143, 112, 144, 145, 924, 151, 152, 37,
+ 157, 158, 159, 160, 38, 165, 166, 169, 171, 172, 173, 174, 176, 177, 178, 179,
+ 181, 182, 182, 182, 833, 468, 184, 185, 834, 187, 188, 189, 196, 192, 194, 195,
+ 197, 199, 200, 201, 203, 204, 204, 206, 208, 209, 211, 218, 213, 219, 214, 216,
+ 153, 234, 221, 222, 223, 220, 225, 224, 230, 835, 235, 236, 237, 238, 239, 244,
+ 836, 837, 247, 248, 249, 246, 251, 39, 40, 253, 255, 255, 838, 257, 258, 259,
+ 261, 839, 262, 263, 301, 264, 41, 266, 270, 272, 271, 841, 274, 842, 277, 276,
+ 278, 281, 282, 42, 283, 284, 285, 286, 43, 843, 44, 289, 290, 291, 293, 934,
+ 298, 845, 845, 621, 300, 300, 45, 852, 894, 302, 304, 46, 306, 309, 310, 312,
+ 316, 48, 47, 317, 846, 318, 323, 324, 325, 324, 328, 329, 333, 331, 332, 334,
+ 335, 336, 338, 339, 342, 343, 347, 351, 849, 350, 348, 352, 354, 359, 850, 361,
+ 358, 356, 49, 363, 365, 367, 364, 50, 369, 371, 851, 376, 386, 378, 53, 381,
+ 52, 51, 140, 141, 387, 382, 614, 78, 388, 389, 390, 394, 392, 856, 54, 399,
+ 396, 402, 404, 858, 405, 401, 407, 55, 408, 409, 410, 413, 859, 415, 56, 417,
+ 860, 418, 57, 419, 422, 424, 425, 861, 840, 862, 426, 863, 429, 431, 427, 433,
+ 437, 441, 438, 439, 442, 443, 864, 436, 449, 450, 58, 454, 453, 865, 447, 460,
+ 866, 867, 461, 466, 465, 464, 59, 467, 470, 469, 472, 828, 475, 868, 478, 870,
+ 483, 485, 486, 871, 488, 489, 872, 873, 495, 497, 60, 498, 61, 61, 504, 505,
+ 507, 508, 511, 62, 513, 874, 515, 875, 518, 844, 520, 876, 877, 878, 63, 64,
+ 528, 880, 879, 881, 882, 530, 531, 531, 533, 66, 534, 67, 68, 884, 536, 538,
+ 541, 69, 885, 549, 886, 887, 556, 559, 70, 561, 562, 563, 888, 889, 889, 567,
+ 71, 890, 570, 571, 72, 891, 577, 73, 581, 579, 582, 893, 587, 74, 590, 592,
+ 596, 75, 895, 896, 76, 897, 600, 898, 602, 605, 607, 899, 900, 609, 901, 611,
+ 853, 77, 615, 616, 79, 617, 252, 902, 903, 854, 855, 621, 622, 731, 80, 627,
+ 626, 628, 164, 629, 630, 631, 633, 904, 632, 634, 639, 640, 635, 641, 646, 651,
+ 638, 643, 644, 645, 905, 907, 906, 81, 653, 654, 656, 911, 657, 908, 82, 83,
+ 909, 910, 84, 664, 665, 666, 667, 669, 668, 671, 670, 674, 672, 673, 675, 85,
+ 677, 678, 86, 681, 682, 912, 685, 686, 87, 689, 36, 913, 914, 88, 89, 696,
+ 702, 709, 711, 915, 712, 713, 718, 719, 917, 831, 721, 720, 723, 832, 725, 728,
+ 918, 919, 739, 742, 744, 920, 745, 753, 756, 757, 755, 760, 761, 921, 762, 90,
+ 764, 922, 91, 775, 279, 780, 923, 925, 92, 93, 785, 926, 94, 927, 787, 787,
+ 789, 928, 792, 95, 796, 797, 798, 800, 96, 929, 802, 804, 806, 97, 98, 807,
+ 930, 99, 931, 932, 933, 814, 100, 816, 817, 818, 819, 820, 821, 935, 0, 0,
+};
+static const int16_t
+_hb_ucd_i16[92] =
+{
+ 0, 0, 1, -1, 2, 0, -2, 0, 0, 2, 0, -2, 0, 16, 0, -16,
+ 0, 1, -1, 0, 3, 3, 3, -3, -3, -3, 0, 2016, 0, 2527, 1923, 1914,
+ 1918, 0, 2250, 0, 0, 138, 0, 7, -7, 0, -1, 1, 1824, 0, 2104, 0,
+ 2108, 2106, 0, 2106, 1316, 0, -1, -138, 8, 8, 8, 0, 7, 7, -8, -8,
+ -8, -7,-1316, 1, -1, 3, -3, 1, 0,-1914,-1918, 0, 0,-1923,-1824, 0,
+ 0,-2016,-2104, 0, 0,-2106,-2108,-2106,-2250, 0,-2527, 0,
+};
+
+static inline uint_fast8_t
+_hb_ucd_gc (unsigned u)
+{
+ return u<1114112u?_hb_ucd_u8[4920+(((_hb_ucd_u8[1104+(((_hb_ucd_u16[((_hb_ucd_u8[272+(((_hb_ucd_u8[u>>1>>3>>3>>5])<<5)+((u>>1>>3>>3)&31u))])<<3)+((u>>1>>3)&7u)])<<3)+((u>>1)&7u))])<<1)+((u)&1u))]:2;
+}
+static inline uint_fast8_t
+_hb_ucd_ccc (unsigned u)
+{
+ return u<125259u?_hb_ucd_u8[6796+(((_hb_ucd_u8[6276+(((_hb_ucd_u8[5844+(((_hb_ucd_u8[5508+(((_hb_ucd_u8[5262+(u>>2>>2>>2>>3)])<<3)+((u>>2>>2>>2)&7u))])<<2)+((u>>2>>2)&3u))])<<2)+((u>>2)&3u))])<<2)+((u)&3u))]:0;
+}
+static inline unsigned
+_hb_ucd_b4 (const uint8_t* a, unsigned i)
+{
+ return (a[i>>1]>>((i&1u)<<2))&15u;
+}
+static inline int_fast16_t
+_hb_ucd_bmg (unsigned u)
+{
+ return u<65380u?_hb_ucd_i16[((_hb_ucd_u8[7672+(((_hb_ucd_u8[7448+(((_hb_ucd_u8[7352+(((_hb_ucd_b4(7288+_hb_ucd_u8,u>>1>>2>>3>>3))<<3)+((u>>1>>2>>3)&7u))])<<3)+((u>>1>>2)&7u))])<<2)+((u>>1)&3u))])<<1)+((u)&1u)]:0;
+}
+static inline uint_fast8_t
+_hb_ucd_sc (unsigned u)
+{
+ return u<918016u?_hb_ucd_u8[11242+(((_hb_ucd_u8[10314+(((_hb_ucd_u8[8938+(((_hb_ucd_u8[8362+(((_hb_ucd_u8[7912+(u>>2>>2>>3>>4)])<<4)+((u>>2>>2>>3)&15u))])<<3)+((u>>2>>2)&7u))])<<2)+((u>>2)&3u))])<<2)+((u)&3u))]:2;
+}
+static inline uint_fast16_t
+_hb_ucd_dm (unsigned u)
+{
+ return u<195102u?_hb_ucd_u16[1536+(((_hb_ucd_u8[12544+(((_hb_ucd_u8[12162+(u>>4>>5)])<<5)+((u>>4)&31u))])<<4)+((u)&15u))]:0;
+}
+
+#endif
+
+
+#endif /* HB_UCD_TABLE_HH */
+
+/* == End of generated table == */
diff --git a/thirdparty/harfbuzz/src/hb-ucd.cc b/thirdparty/harfbuzz/src/hb-ucd.cc
new file mode 100644
index 0000000000..ad72a26c04
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-ucd.cc
@@ -0,0 +1,248 @@
+/*
+ * Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "hb.hh"
+#include "hb-unicode.hh"
+#include "hb-machinery.hh"
+
+#include "hb-ucd-table.hh"
+
+static hb_unicode_combining_class_t
+hb_ucd_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+ hb_codepoint_t unicode,
+ void *user_data HB_UNUSED)
+{
+ return (hb_unicode_combining_class_t) _hb_ucd_ccc (unicode);
+}
+
+static hb_unicode_general_category_t
+hb_ucd_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+ hb_codepoint_t unicode,
+ void *user_data HB_UNUSED)
+{
+ return (hb_unicode_general_category_t) _hb_ucd_gc (unicode);
+}
+
+static hb_codepoint_t
+hb_ucd_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+ hb_codepoint_t unicode,
+ void *user_data HB_UNUSED)
+{
+ return unicode + _hb_ucd_bmg (unicode);
+}
+
+static hb_script_t
+hb_ucd_script (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+ hb_codepoint_t unicode,
+ void *user_data HB_UNUSED)
+{
+ return _hb_ucd_sc_map[_hb_ucd_sc (unicode)];
+}
+
+
+#define SBASE 0xAC00u
+#define LBASE 0x1100u
+#define VBASE 0x1161u
+#define TBASE 0x11A7u
+#define SCOUNT 11172u
+#define LCOUNT 19u
+#define VCOUNT 21u
+#define TCOUNT 28u
+#define NCOUNT (VCOUNT * TCOUNT)
+
+static inline bool
+_hb_ucd_decompose_hangul (hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b)
+{
+ unsigned si = ab - SBASE;
+
+ if (si >= SCOUNT)
+ return false;
+
+ if (si % TCOUNT)
+ {
+ /* LV,T */
+ *a = SBASE + (si / TCOUNT) * TCOUNT;
+ *b = TBASE + (si % TCOUNT);
+ return true;
+ } else {
+ /* L,V */
+ *a = LBASE + (si / NCOUNT);
+ *b = VBASE + (si % NCOUNT) / TCOUNT;
+ return true;
+ }
+}
+
+static inline bool
+_hb_ucd_compose_hangul (hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab)
+{
+ if (a >= SBASE && a < (SBASE + SCOUNT) && b > TBASE && b < (TBASE + TCOUNT) &&
+ !((a - SBASE) % TCOUNT))
+ {
+ /* LV,T */
+ *ab = a + (b - TBASE);
+ return true;
+ }
+ else if (a >= LBASE && a < (LBASE + LCOUNT) && b >= VBASE && b < (VBASE + VCOUNT))
+ {
+ /* L,V */
+ int li = a - LBASE;
+ int vi = b - VBASE;
+ *ab = SBASE + li * NCOUNT + vi * TCOUNT;
+ return true;
+ }
+ else
+ return false;
+}
+
+static int
+_cmp_pair (const void *_key, const void *_item)
+{
+ uint64_t& a = * (uint64_t*) _key;
+ uint64_t b = (* (uint64_t*) _item) & HB_CODEPOINT_ENCODE3(0x1FFFFFu, 0x1FFFFFu, 0);
+
+ return a < b ? -1 : a > b ? +1 : 0;
+}
+static int
+_cmp_pair_11_7_14 (const void *_key, const void *_item)
+{
+ uint32_t& a = * (uint32_t*) _key;
+ uint32_t b = (* (uint32_t*) _item) & HB_CODEPOINT_ENCODE3_11_7_14(0x1FFFFFu, 0x1FFFFFu, 0);
+
+ return a < b ? -1 : a > b ? +1 : 0;
+}
+
+static hb_bool_t
+hb_ucd_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+ hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab,
+ void *user_data HB_UNUSED)
+{
+ if (_hb_ucd_compose_hangul (a, b, ab)) return true;
+
+ hb_codepoint_t u = 0;
+
+ if ((a & 0xFFFFF800u) == 0x0000u && (b & 0xFFFFFF80) == 0x0300u)
+ {
+ uint32_t k = HB_CODEPOINT_ENCODE3_11_7_14 (a, b, 0);
+ const uint32_t *v = hb_bsearch (k,
+ _hb_ucd_dm2_u32_map,
+ ARRAY_LENGTH (_hb_ucd_dm2_u32_map),
+ sizeof (*_hb_ucd_dm2_u32_map),
+ _cmp_pair_11_7_14);
+ if (likely (!v)) return false;
+ u = HB_CODEPOINT_DECODE3_11_7_14_3 (*v);
+ }
+ else
+ {
+ uint64_t k = HB_CODEPOINT_ENCODE3 (a, b, 0);
+ const uint64_t *v = hb_bsearch (k,
+ _hb_ucd_dm2_u64_map,
+ ARRAY_LENGTH (_hb_ucd_dm2_u64_map),
+ sizeof (*_hb_ucd_dm2_u64_map),
+ _cmp_pair);
+ if (likely (!v)) return false;
+ u = HB_CODEPOINT_DECODE3_3 (*v);
+ }
+
+ if (unlikely (!u)) return false;
+ *ab = u;
+ return true;
+}
+
+static hb_bool_t
+hb_ucd_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+ hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b,
+ void *user_data HB_UNUSED)
+{
+ if (_hb_ucd_decompose_hangul (ab, a, b)) return true;
+
+ unsigned i = _hb_ucd_dm (ab);
+
+ if (likely (!i)) return false;
+ i--;
+
+ if (i < ARRAY_LENGTH (_hb_ucd_dm1_p0_map) + ARRAY_LENGTH (_hb_ucd_dm1_p2_map))
+ {
+ if (i < ARRAY_LENGTH (_hb_ucd_dm1_p0_map))
+ *a = _hb_ucd_dm1_p0_map[i];
+ else
+ {
+ i -= ARRAY_LENGTH (_hb_ucd_dm1_p0_map);
+ *a = 0x20000 | _hb_ucd_dm1_p2_map[i];
+ }
+ *b = 0;
+ return true;
+ }
+ i -= ARRAY_LENGTH (_hb_ucd_dm1_p0_map) + ARRAY_LENGTH (_hb_ucd_dm1_p2_map);
+
+ if (i < ARRAY_LENGTH (_hb_ucd_dm2_u32_map))
+ {
+ uint32_t v = _hb_ucd_dm2_u32_map[i];
+ *a = HB_CODEPOINT_DECODE3_11_7_14_1 (v);
+ *b = HB_CODEPOINT_DECODE3_11_7_14_2 (v);
+ return true;
+ }
+ i -= ARRAY_LENGTH (_hb_ucd_dm2_u32_map);
+
+ uint64_t v = _hb_ucd_dm2_u64_map[i];
+ *a = HB_CODEPOINT_DECODE3_1 (v);
+ *b = HB_CODEPOINT_DECODE3_2 (v);
+ return true;
+}
+
+
+#if HB_USE_ATEXIT
+static void free_static_ucd_funcs ();
+#endif
+
+static struct hb_ucd_unicode_funcs_lazy_loader_t : hb_unicode_funcs_lazy_loader_t<hb_ucd_unicode_funcs_lazy_loader_t>
+{
+ static hb_unicode_funcs_t *create ()
+ {
+ hb_unicode_funcs_t *funcs = hb_unicode_funcs_create (nullptr);
+
+ hb_unicode_funcs_set_combining_class_func (funcs, hb_ucd_combining_class, nullptr, nullptr);
+ hb_unicode_funcs_set_general_category_func (funcs, hb_ucd_general_category, nullptr, nullptr);
+ hb_unicode_funcs_set_mirroring_func (funcs, hb_ucd_mirroring, nullptr, nullptr);
+ hb_unicode_funcs_set_script_func (funcs, hb_ucd_script, nullptr, nullptr);
+ hb_unicode_funcs_set_compose_func (funcs, hb_ucd_compose, nullptr, nullptr);
+ hb_unicode_funcs_set_decompose_func (funcs, hb_ucd_decompose, nullptr, nullptr);
+
+ hb_unicode_funcs_make_immutable (funcs);
+
+#if HB_USE_ATEXIT
+ atexit (free_static_ucd_funcs);
+#endif
+
+ return funcs;
+ }
+} static_ucd_funcs;
+
+#if HB_USE_ATEXIT
+static
+void free_static_ucd_funcs ()
+{
+ static_ucd_funcs.free_instance ();
+}
+#endif
+
+hb_unicode_funcs_t *
+hb_ucd_get_unicode_funcs ()
+{
+#ifdef HB_NO_UCD
+ return hb_unicode_funcs_get_empty ();
+#endif
+ return static_ucd_funcs.get_unconst ();
+}
diff --git a/thirdparty/harfbuzz/src/hb-unicode-emoji-table.hh b/thirdparty/harfbuzz/src/hb-unicode-emoji-table.hh
new file mode 100644
index 0000000000..eb7776eecb
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-unicode-emoji-table.hh
@@ -0,0 +1,78 @@
+/* == Start of generated table == */
+/*
+ * The following tables are generated by running:
+ *
+ * ./gen-emoji-table.py emoji-data.txt
+ *
+ * on file with this header:
+ *
+ * # emoji-data.txt
+ * # Date: 2020-01-28, 20:52:38 GMT
+ * # © 2020 Unicode®, Inc.
+ * # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+ * # For terms of use, see http://www.unicode.org/terms_of_use.html
+ * #
+ * # Emoji Data for UTS #51
+ * # Version: 13.0
+ * #
+ * # For documentation and usage, see http://www.unicode.org/reports/tr51
+ */
+
+#ifndef HB_UNICODE_EMOJI_TABLE_HH
+#define HB_UNICODE_EMOJI_TABLE_HH
+
+#include "hb-unicode.hh"
+
+static const uint8_t
+_hb_emoji_u8[448] =
+{
+ 0, 0, 0, 0, 33, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 84,118,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 3,
+ 0, 0, 0, 0, 0, 0, 4, 5, 6, 7, 8, 7, 9, 10, 11, 0,
+ 0, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 0,
+ 7, 7, 7, 14, 15, 16, 17, 18, 19, 20, 7, 7, 7, 7, 7, 21,
+ 7, 7, 7, 7, 22, 23, 7, 7, 7, 24, 7, 14, 0, 25, 0, 26,
+ 27, 28, 29, 14, 30, 31, 7, 7, 7, 7, 7, 14, 0, 0, 0, 0,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 22,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,240, 1, 0, 2, 0, 0,
+ 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0,254, 7, 3,
+ 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56,
+ 159,255,243,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 31, 0,255,255,255,255,255,255, 31,255, 3, 0, 0, 0, 8, 0,
+ 0, 0, 24, 0,120, 0, 0, 0, 0, 0, 96, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 16, 0, 96, 0, 0, 8, 0, 0, 0, 0,
+ 255,255,255,255,255,255,255,127, 0, 96, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0,240, 1, 64, 0, 0,254, 3, 0,224,255,255,
+ 255,255,255,255, 31, 0, 0, 0,254,127, 0, 0, 0, 0,252,115,
+ 0,254,255,255,255,255,255,255,255,255,255,255,255,255,255, 3,
+ 255,255,255,255,255,255,255, 31,192,255,255,255,255,255,255,255,
+ 255,127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,240,127,
+ 0, 0,224,255,255,255,255,127, 0,112, 0, 0, 0, 0, 0, 0,
+ 0,127, 0,124, 0, 0, 0, 0, 0,127, 0, 0, 0,192,255,255,
+ 0,240,255,255,255,255,255,243,159,255,255,255,255,255,255,255,
+};
+
+static inline unsigned
+_hb_emoji_b4 (const uint8_t* a, unsigned i)
+{
+ return (a[i>>1]>>((i&1u)<<2))&15u;
+}
+static inline unsigned
+_hb_emoji_b1 (const uint8_t* a, unsigned i)
+{
+ return (a[i>>3]>>((i&7u)<<0))&1u;
+}
+static inline uint_fast8_t
+_hb_emoji_is_Extended_Pictographic (unsigned u)
+{
+ return u<131069u?_hb_emoji_b1(192+_hb_emoji_u8,((_hb_emoji_u8[64+(((_hb_emoji_b4(_hb_emoji_u8,u>>6>>4))<<4)+((u>>6)&15u))])<<6)+((u)&63u)):0;
+}
+
+
+#endif /* HB_UNICODE_EMOJI_TABLE_HH */
+
+/* == End of generated table == */
diff --git a/thirdparty/harfbuzz/src/hb-unicode.cc b/thirdparty/harfbuzz/src/hb-unicode.cc
new file mode 100644
index 0000000000..36070a7f18
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-unicode.cc
@@ -0,0 +1,586 @@
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ * Copyright © 2011 Codethink Limited
+ * Copyright © 2010,2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Codethink Author(s): Ryan Lortie
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#include "hb-unicode.hh"
+
+
+/**
+ * SECTION: hb-unicode
+ * @title: hb-unicode
+ * @short_description: Unicode character property access
+ * @include: hb.h
+ *
+ * Unicode functions are used to access Unicode character properties.
+ * Client can pass its own Unicode functions to HarfBuzz, or access
+ * the built-in Unicode functions that come with HarfBuzz.
+ *
+ * With the Unicode functions, one can query variour Unicode character
+ * properties, such as General Category, Script, Combining Class, etc.
+ **/
+
+
+/*
+ * hb_unicode_funcs_t
+ */
+
+static hb_unicode_combining_class_t
+hb_unicode_combining_class_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+ hb_codepoint_t unicode HB_UNUSED,
+ void *user_data HB_UNUSED)
+{
+ return HB_UNICODE_COMBINING_CLASS_NOT_REORDERED;
+}
+
+#ifndef HB_DISABLE_DEPRECATED
+static unsigned int
+hb_unicode_eastasian_width_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+ hb_codepoint_t unicode HB_UNUSED,
+ void *user_data HB_UNUSED)
+{
+ return 1;
+}
+#endif
+
+static hb_unicode_general_category_t
+hb_unicode_general_category_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+ hb_codepoint_t unicode HB_UNUSED,
+ void *user_data HB_UNUSED)
+{
+ return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER;
+}
+
+static hb_codepoint_t
+hb_unicode_mirroring_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+ hb_codepoint_t unicode,
+ void *user_data HB_UNUSED)
+{
+ return unicode;
+}
+
+static hb_script_t
+hb_unicode_script_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+ hb_codepoint_t unicode HB_UNUSED,
+ void *user_data HB_UNUSED)
+{
+ return HB_SCRIPT_UNKNOWN;
+}
+
+static hb_bool_t
+hb_unicode_compose_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+ hb_codepoint_t a HB_UNUSED,
+ hb_codepoint_t b HB_UNUSED,
+ hb_codepoint_t *ab HB_UNUSED,
+ void *user_data HB_UNUSED)
+{
+ return false;
+}
+
+static hb_bool_t
+hb_unicode_decompose_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+ hb_codepoint_t ab HB_UNUSED,
+ hb_codepoint_t *a HB_UNUSED,
+ hb_codepoint_t *b HB_UNUSED,
+ void *user_data HB_UNUSED)
+{
+ return false;
+}
+
+
+#ifndef HB_DISABLE_DEPRECATED
+static unsigned int
+hb_unicode_decompose_compatibility_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+ hb_codepoint_t u HB_UNUSED,
+ hb_codepoint_t *decomposed HB_UNUSED,
+ void *user_data HB_UNUSED)
+{
+ return 0;
+}
+#endif
+
+#if !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_GLIB)
+#include "hb-glib.h"
+#endif
+#if !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_ICU) && defined(HAVE_ICU_BUILTIN)
+#include "hb-icu.h"
+#endif
+
+hb_unicode_funcs_t *
+hb_unicode_funcs_get_default ()
+{
+#if !defined(HB_NO_UNICODE_FUNCS) && !defined(HB_NO_UCD)
+ return hb_ucd_get_unicode_funcs ();
+#elif !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_GLIB)
+ return hb_glib_get_unicode_funcs ();
+#elif !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_ICU) && defined(HAVE_ICU_BUILTIN)
+ return hb_icu_get_unicode_funcs ();
+#else
+#define HB_UNICODE_FUNCS_NIL 1
+ return hb_unicode_funcs_get_empty ();
+#endif
+}
+
+#if !defined(HB_NO_UNICODE_FUNCS) && defined(HB_UNICODE_FUNCS_NIL)
+#error "Could not find any Unicode functions implementation, you have to provide your own"
+#error "Consider building hb-ucd.cc. If you absolutely want to build without any, check the code."
+#endif
+
+/**
+ * hb_unicode_funcs_create: (Xconstructor)
+ * @parent: (nullable):
+ *
+ *
+ *
+ * Return value: (transfer full):
+ *
+ * Since: 0.9.2
+ **/
+hb_unicode_funcs_t *
+hb_unicode_funcs_create (hb_unicode_funcs_t *parent)
+{
+ hb_unicode_funcs_t *ufuncs;
+
+ if (!(ufuncs = hb_object_create<hb_unicode_funcs_t> ()))
+ return hb_unicode_funcs_get_empty ();
+
+ if (!parent)
+ parent = hb_unicode_funcs_get_empty ();
+
+ hb_unicode_funcs_make_immutable (parent);
+ ufuncs->parent = hb_unicode_funcs_reference (parent);
+
+ ufuncs->func = parent->func;
+
+ /* We can safely copy user_data from parent since we hold a reference
+ * onto it and it's immutable. We should not copy the destroy notifiers
+ * though. */
+ ufuncs->user_data = parent->user_data;
+
+ return ufuncs;
+}
+
+
+DEFINE_NULL_INSTANCE (hb_unicode_funcs_t) =
+{
+ HB_OBJECT_HEADER_STATIC,
+
+ nullptr, /* parent */
+ {
+#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_nil,
+ HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
+#undef HB_UNICODE_FUNC_IMPLEMENT
+ }
+};
+
+/**
+ * hb_unicode_funcs_get_empty:
+ *
+ *
+ *
+ * Return value: (transfer full):
+ *
+ * Since: 0.9.2
+ **/
+hb_unicode_funcs_t *
+hb_unicode_funcs_get_empty ()
+{
+ return const_cast<hb_unicode_funcs_t *> (&Null (hb_unicode_funcs_t));
+}
+
+/**
+ * hb_unicode_funcs_reference: (skip)
+ * @ufuncs: Unicode functions.
+ *
+ *
+ *
+ * Return value: (transfer full):
+ *
+ * Since: 0.9.2
+ **/
+hb_unicode_funcs_t *
+hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs)
+{
+ return hb_object_reference (ufuncs);
+}
+
+/**
+ * hb_unicode_funcs_destroy: (skip)
+ * @ufuncs: Unicode functions.
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs)
+{
+ if (!hb_object_destroy (ufuncs)) return;
+
+#define HB_UNICODE_FUNC_IMPLEMENT(name) \
+ if (ufuncs->destroy.name) ufuncs->destroy.name (ufuncs->user_data.name);
+ HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
+#undef HB_UNICODE_FUNC_IMPLEMENT
+
+ hb_unicode_funcs_destroy (ufuncs->parent);
+
+ free (ufuncs);
+}
+
+/**
+ * hb_unicode_funcs_set_user_data: (skip)
+ * @ufuncs: Unicode functions.
+ * @key:
+ * @data:
+ * @destroy:
+ * @replace:
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs,
+ hb_user_data_key_t *key,
+ void * data,
+ hb_destroy_func_t destroy,
+ hb_bool_t replace)
+{
+ return hb_object_set_user_data (ufuncs, key, data, destroy, replace);
+}
+
+/**
+ * hb_unicode_funcs_get_user_data: (skip)
+ * @ufuncs: Unicode functions.
+ * @key:
+ *
+ *
+ *
+ * Return value: (transfer none):
+ *
+ * Since: 0.9.2
+ **/
+void *
+hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs,
+ hb_user_data_key_t *key)
+{
+ return hb_object_get_user_data (ufuncs, key);
+}
+
+
+/**
+ * hb_unicode_funcs_make_immutable:
+ * @ufuncs: Unicode functions.
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+void
+hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs)
+{
+ if (hb_object_is_immutable (ufuncs))
+ return;
+
+ hb_object_make_immutable (ufuncs);
+}
+
+/**
+ * hb_unicode_funcs_is_immutable:
+ * @ufuncs: Unicode functions.
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs)
+{
+ return hb_object_is_immutable (ufuncs);
+}
+
+/**
+ * hb_unicode_funcs_get_parent:
+ * @ufuncs: Unicode functions.
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_unicode_funcs_t *
+hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs)
+{
+ return ufuncs->parent ? ufuncs->parent : hb_unicode_funcs_get_empty ();
+}
+
+
+#define HB_UNICODE_FUNC_IMPLEMENT(name) \
+ \
+void \
+hb_unicode_funcs_set_##name##_func (hb_unicode_funcs_t *ufuncs, \
+ hb_unicode_##name##_func_t func, \
+ void *user_data, \
+ hb_destroy_func_t destroy) \
+{ \
+ if (hb_object_is_immutable (ufuncs)) \
+ return; \
+ \
+ if (ufuncs->destroy.name) \
+ ufuncs->destroy.name (ufuncs->user_data.name); \
+ \
+ if (func) { \
+ ufuncs->func.name = func; \
+ ufuncs->user_data.name = user_data; \
+ ufuncs->destroy.name = destroy; \
+ } else { \
+ ufuncs->func.name = ufuncs->parent->func.name; \
+ ufuncs->user_data.name = ufuncs->parent->user_data.name; \
+ ufuncs->destroy.name = nullptr; \
+ } \
+}
+
+HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
+#undef HB_UNICODE_FUNC_IMPLEMENT
+
+
+#define HB_UNICODE_FUNC_IMPLEMENT(return_type, name) \
+ \
+return_type \
+hb_unicode_##name (hb_unicode_funcs_t *ufuncs, \
+ hb_codepoint_t unicode) \
+{ \
+ return ufuncs->name (unicode); \
+}
+HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
+#undef HB_UNICODE_FUNC_IMPLEMENT
+
+/**
+ * hb_unicode_compose:
+ * @ufuncs: Unicode functions.
+ * @a:
+ * @b:
+ * @ab: (out):
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
+ hb_codepoint_t a,
+ hb_codepoint_t b,
+ hb_codepoint_t *ab)
+{
+ return ufuncs->compose (a, b, ab);
+}
+
+/**
+ * hb_unicode_decompose:
+ * @ufuncs: Unicode functions.
+ * @ab:
+ * @a: (out):
+ * @b: (out):
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ **/
+hb_bool_t
+hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
+ hb_codepoint_t ab,
+ hb_codepoint_t *a,
+ hb_codepoint_t *b)
+{
+ return ufuncs->decompose (ab, a, b);
+}
+
+#ifndef HB_DISABLE_DEPRECATED
+/**
+ * hb_unicode_decompose_compatibility:
+ * @ufuncs: Unicode functions.
+ * @u:
+ * @decomposed: (out):
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 0.9.2
+ * Deprecated: 2.0.0
+ **/
+unsigned int
+hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs,
+ hb_codepoint_t u,
+ hb_codepoint_t *decomposed)
+{
+ return ufuncs->decompose_compatibility (u, decomposed);
+}
+#endif
+
+
+#ifndef HB_NO_OT_SHAPE
+/* See hb-unicode.hh for details. */
+const uint8_t
+_hb_modified_combining_class[256] =
+{
+ 0, /* HB_UNICODE_COMBINING_CLASS_NOT_REORDERED */
+ 1, /* HB_UNICODE_COMBINING_CLASS_OVERLAY */
+ 2, 3, 4, 5, 6,
+ 7, /* HB_UNICODE_COMBINING_CLASS_NUKTA */
+ 8, /* HB_UNICODE_COMBINING_CLASS_KANA_VOICING */
+ 9, /* HB_UNICODE_COMBINING_CLASS_VIRAMA */
+
+ /* Hebrew */
+ HB_MODIFIED_COMBINING_CLASS_CCC10,
+ HB_MODIFIED_COMBINING_CLASS_CCC11,
+ HB_MODIFIED_COMBINING_CLASS_CCC12,
+ HB_MODIFIED_COMBINING_CLASS_CCC13,
+ HB_MODIFIED_COMBINING_CLASS_CCC14,
+ HB_MODIFIED_COMBINING_CLASS_CCC15,
+ HB_MODIFIED_COMBINING_CLASS_CCC16,
+ HB_MODIFIED_COMBINING_CLASS_CCC17,
+ HB_MODIFIED_COMBINING_CLASS_CCC18,
+ HB_MODIFIED_COMBINING_CLASS_CCC19,
+ HB_MODIFIED_COMBINING_CLASS_CCC20,
+ HB_MODIFIED_COMBINING_CLASS_CCC21,
+ HB_MODIFIED_COMBINING_CLASS_CCC22,
+ HB_MODIFIED_COMBINING_CLASS_CCC23,
+ HB_MODIFIED_COMBINING_CLASS_CCC24,
+ HB_MODIFIED_COMBINING_CLASS_CCC25,
+ HB_MODIFIED_COMBINING_CLASS_CCC26,
+
+ /* Arabic */
+ HB_MODIFIED_COMBINING_CLASS_CCC27,
+ HB_MODIFIED_COMBINING_CLASS_CCC28,
+ HB_MODIFIED_COMBINING_CLASS_CCC29,
+ HB_MODIFIED_COMBINING_CLASS_CCC30,
+ HB_MODIFIED_COMBINING_CLASS_CCC31,
+ HB_MODIFIED_COMBINING_CLASS_CCC32,
+ HB_MODIFIED_COMBINING_CLASS_CCC33,
+ HB_MODIFIED_COMBINING_CLASS_CCC34,
+ HB_MODIFIED_COMBINING_CLASS_CCC35,
+
+ /* Syriac */
+ HB_MODIFIED_COMBINING_CLASS_CCC36,
+
+ 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+ 80, 81, 82, 83,
+
+ /* Telugu */
+ HB_MODIFIED_COMBINING_CLASS_CCC84,
+ 85, 86, 87, 88, 89, 90,
+ HB_MODIFIED_COMBINING_CLASS_CCC91,
+ 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102,
+
+ /* Thai */
+ HB_MODIFIED_COMBINING_CLASS_CCC103,
+ 104, 105, 106,
+ HB_MODIFIED_COMBINING_CLASS_CCC107,
+ 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
+
+ /* Lao */
+ HB_MODIFIED_COMBINING_CLASS_CCC118,
+ 119, 120, 121,
+ HB_MODIFIED_COMBINING_CLASS_CCC122,
+ 123, 124, 125, 126, 127, 128,
+
+ /* Tibetan */
+ HB_MODIFIED_COMBINING_CLASS_CCC129,
+ HB_MODIFIED_COMBINING_CLASS_CCC130,
+ 131,
+ HB_MODIFIED_COMBINING_CLASS_CCC132,
+ 133, 134, 135, 136, 137, 138, 139,
+
+
+ 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+ 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
+ 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
+ 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
+ 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
+
+ 200, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT */
+ 201,
+ 202, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW */
+ 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213,
+ 214, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE */
+ 215,
+ 216, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT */
+ 217,
+ 218, /* HB_UNICODE_COMBINING_CLASS_BELOW_LEFT */
+ 219,
+ 220, /* HB_UNICODE_COMBINING_CLASS_BELOW */
+ 221,
+ 222, /* HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT */
+ 223,
+ 224, /* HB_UNICODE_COMBINING_CLASS_LEFT */
+ 225,
+ 226, /* HB_UNICODE_COMBINING_CLASS_RIGHT */
+ 227,
+ 228, /* HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT */
+ 229,
+ 230, /* HB_UNICODE_COMBINING_CLASS_ABOVE */
+ 231,
+ 232, /* HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT */
+ 233, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW */
+ 234, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE */
+ 235, 236, 237, 238, 239,
+ 240, /* HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT */
+ 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
+ 255, /* HB_UNICODE_COMBINING_CLASS_INVALID */
+};
+#endif
+
+
+/*
+ * Emoji
+ */
+#ifndef HB_NO_EMOJI_SEQUENCES
+
+#include "hb-unicode-emoji-table.hh"
+
+bool
+_hb_unicode_is_emoji_Extended_Pictographic (hb_codepoint_t cp)
+{
+ return _hb_emoji_is_Extended_Pictographic (cp);
+}
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-unicode.h b/thirdparty/harfbuzz/src/hb-unicode.h
new file mode 100644
index 0000000000..61b1b0ba1f
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-unicode.h
@@ -0,0 +1,404 @@
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ * Copyright © 2011 Codethink Limited
+ * Copyright © 2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Codethink Author(s): Ryan Lortie
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_H_IN
+#error "Include <hb.h> instead."
+#endif
+
+#ifndef HB_UNICODE_H
+#define HB_UNICODE_H
+
+#include "hb-common.h"
+
+HB_BEGIN_DECLS
+
+
+/**
+ * HB_UNICODE_MAX
+ *
+ * Since: 1.9.0
+ **/
+#define HB_UNICODE_MAX 0x10FFFFu
+
+
+/* hb_unicode_general_category_t */
+
+/* Unicode Character Database property: General_Category (gc) */
+typedef enum
+{
+ HB_UNICODE_GENERAL_CATEGORY_CONTROL, /* Cc */
+ HB_UNICODE_GENERAL_CATEGORY_FORMAT, /* Cf */
+ HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED, /* Cn */
+ HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE, /* Co */
+ HB_UNICODE_GENERAL_CATEGORY_SURROGATE, /* Cs */
+ HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER, /* Ll */
+ HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER, /* Lm */
+ HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER, /* Lo */
+ HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER, /* Lt */
+ HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER, /* Lu */
+ HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK, /* Mc */
+ HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK, /* Me */
+ HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK, /* Mn */
+ HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER, /* Nd */
+ HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER, /* Nl */
+ HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER, /* No */
+ HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION, /* Pc */
+ HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION, /* Pd */
+ HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION, /* Pe */
+ HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION, /* Pf */
+ HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION, /* Pi */
+ HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION, /* Po */
+ HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION, /* Ps */
+ HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL, /* Sc */
+ HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL, /* Sk */
+ HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL, /* Sm */
+ HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL, /* So */
+ HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR, /* Zl */
+ HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR, /* Zp */
+ HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR /* Zs */
+} hb_unicode_general_category_t;
+
+/* hb_unicode_combining_class_t */
+
+/* Note: newer versions of Unicode may add new values. Clients should be ready to handle
+ * any value in the 0..254 range being returned from hb_unicode_combining_class().
+ */
+
+/* Unicode Character Database property: Canonical_Combining_Class (ccc) */
+typedef enum
+{
+ HB_UNICODE_COMBINING_CLASS_NOT_REORDERED = 0,
+ HB_UNICODE_COMBINING_CLASS_OVERLAY = 1,
+ HB_UNICODE_COMBINING_CLASS_NUKTA = 7,
+ HB_UNICODE_COMBINING_CLASS_KANA_VOICING = 8,
+ HB_UNICODE_COMBINING_CLASS_VIRAMA = 9,
+
+ /* Hebrew */
+ HB_UNICODE_COMBINING_CLASS_CCC10 = 10,
+ HB_UNICODE_COMBINING_CLASS_CCC11 = 11,
+ HB_UNICODE_COMBINING_CLASS_CCC12 = 12,
+ HB_UNICODE_COMBINING_CLASS_CCC13 = 13,
+ HB_UNICODE_COMBINING_CLASS_CCC14 = 14,
+ HB_UNICODE_COMBINING_CLASS_CCC15 = 15,
+ HB_UNICODE_COMBINING_CLASS_CCC16 = 16,
+ HB_UNICODE_COMBINING_CLASS_CCC17 = 17,
+ HB_UNICODE_COMBINING_CLASS_CCC18 = 18,
+ HB_UNICODE_COMBINING_CLASS_CCC19 = 19,
+ HB_UNICODE_COMBINING_CLASS_CCC20 = 20,
+ HB_UNICODE_COMBINING_CLASS_CCC21 = 21,
+ HB_UNICODE_COMBINING_CLASS_CCC22 = 22,
+ HB_UNICODE_COMBINING_CLASS_CCC23 = 23,
+ HB_UNICODE_COMBINING_CLASS_CCC24 = 24,
+ HB_UNICODE_COMBINING_CLASS_CCC25 = 25,
+ HB_UNICODE_COMBINING_CLASS_CCC26 = 26,
+
+ /* Arabic */
+ HB_UNICODE_COMBINING_CLASS_CCC27 = 27,
+ HB_UNICODE_COMBINING_CLASS_CCC28 = 28,
+ HB_UNICODE_COMBINING_CLASS_CCC29 = 29,
+ HB_UNICODE_COMBINING_CLASS_CCC30 = 30,
+ HB_UNICODE_COMBINING_CLASS_CCC31 = 31,
+ HB_UNICODE_COMBINING_CLASS_CCC32 = 32,
+ HB_UNICODE_COMBINING_CLASS_CCC33 = 33,
+ HB_UNICODE_COMBINING_CLASS_CCC34 = 34,
+ HB_UNICODE_COMBINING_CLASS_CCC35 = 35,
+
+ /* Syriac */
+ HB_UNICODE_COMBINING_CLASS_CCC36 = 36,
+
+ /* Telugu */
+ HB_UNICODE_COMBINING_CLASS_CCC84 = 84,
+ HB_UNICODE_COMBINING_CLASS_CCC91 = 91,
+
+ /* Thai */
+ HB_UNICODE_COMBINING_CLASS_CCC103 = 103,
+ HB_UNICODE_COMBINING_CLASS_CCC107 = 107,
+
+ /* Lao */
+ HB_UNICODE_COMBINING_CLASS_CCC118 = 118,
+ HB_UNICODE_COMBINING_CLASS_CCC122 = 122,
+
+ /* Tibetan */
+ HB_UNICODE_COMBINING_CLASS_CCC129 = 129,
+ HB_UNICODE_COMBINING_CLASS_CCC130 = 130,
+ HB_UNICODE_COMBINING_CLASS_CCC133 = 132,
+
+
+ HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT = 200,
+ HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW = 202,
+ HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE = 214,
+ HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT = 216,
+ HB_UNICODE_COMBINING_CLASS_BELOW_LEFT = 218,
+ HB_UNICODE_COMBINING_CLASS_BELOW = 220,
+ HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT = 222,
+ HB_UNICODE_COMBINING_CLASS_LEFT = 224,
+ HB_UNICODE_COMBINING_CLASS_RIGHT = 226,
+ HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT = 228,
+ HB_UNICODE_COMBINING_CLASS_ABOVE = 230,
+ HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT = 232,
+ HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW = 233,
+ HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE = 234,
+
+ HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT = 240,
+
+ HB_UNICODE_COMBINING_CLASS_INVALID = 255
+} hb_unicode_combining_class_t;
+
+
+/*
+ * hb_unicode_funcs_t
+ */
+
+typedef struct hb_unicode_funcs_t hb_unicode_funcs_t;
+
+
+/*
+ * just give me the best implementation you've got there.
+ */
+HB_EXTERN hb_unicode_funcs_t *
+hb_unicode_funcs_get_default (void);
+
+
+HB_EXTERN hb_unicode_funcs_t *
+hb_unicode_funcs_create (hb_unicode_funcs_t *parent);
+
+HB_EXTERN hb_unicode_funcs_t *
+hb_unicode_funcs_get_empty (void);
+
+HB_EXTERN hb_unicode_funcs_t *
+hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs);
+
+HB_EXTERN void
+hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs);
+
+HB_EXTERN hb_bool_t
+hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs,
+ hb_user_data_key_t *key,
+ void * data,
+ hb_destroy_func_t destroy,
+ hb_bool_t replace);
+
+
+HB_EXTERN void *
+hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs,
+ hb_user_data_key_t *key);
+
+
+HB_EXTERN void
+hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs);
+
+HB_EXTERN hb_bool_t
+hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs);
+
+HB_EXTERN hb_unicode_funcs_t *
+hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs);
+
+
+/*
+ * funcs
+ */
+
+/* typedefs */
+
+typedef hb_unicode_combining_class_t (*hb_unicode_combining_class_func_t) (hb_unicode_funcs_t *ufuncs,
+ hb_codepoint_t unicode,
+ void *user_data);
+typedef hb_unicode_general_category_t (*hb_unicode_general_category_func_t) (hb_unicode_funcs_t *ufuncs,
+ hb_codepoint_t unicode,
+ void *user_data);
+typedef hb_codepoint_t (*hb_unicode_mirroring_func_t) (hb_unicode_funcs_t *ufuncs,
+ hb_codepoint_t unicode,
+ void *user_data);
+typedef hb_script_t (*hb_unicode_script_func_t) (hb_unicode_funcs_t *ufuncs,
+ hb_codepoint_t unicode,
+ void *user_data);
+
+typedef hb_bool_t (*hb_unicode_compose_func_t) (hb_unicode_funcs_t *ufuncs,
+ hb_codepoint_t a,
+ hb_codepoint_t b,
+ hb_codepoint_t *ab,
+ void *user_data);
+typedef hb_bool_t (*hb_unicode_decompose_func_t) (hb_unicode_funcs_t *ufuncs,
+ hb_codepoint_t ab,
+ hb_codepoint_t *a,
+ hb_codepoint_t *b,
+ void *user_data);
+
+/* setters */
+
+/**
+ * hb_unicode_funcs_set_combining_class_func:
+ * @ufuncs: a Unicode function structure
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+HB_EXTERN void
+hb_unicode_funcs_set_combining_class_func (hb_unicode_funcs_t *ufuncs,
+ hb_unicode_combining_class_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
+
+/**
+ * hb_unicode_funcs_set_general_category_func:
+ * @ufuncs: a Unicode function structure
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+HB_EXTERN void
+hb_unicode_funcs_set_general_category_func (hb_unicode_funcs_t *ufuncs,
+ hb_unicode_general_category_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
+
+/**
+ * hb_unicode_funcs_set_mirroring_func:
+ * @ufuncs: a Unicode function structure
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+HB_EXTERN void
+hb_unicode_funcs_set_mirroring_func (hb_unicode_funcs_t *ufuncs,
+ hb_unicode_mirroring_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
+
+/**
+ * hb_unicode_funcs_set_script_func:
+ * @ufuncs: a Unicode function structure
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+HB_EXTERN void
+hb_unicode_funcs_set_script_func (hb_unicode_funcs_t *ufuncs,
+ hb_unicode_script_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
+
+/**
+ * hb_unicode_funcs_set_compose_func:
+ * @ufuncs: a Unicode function structure
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+HB_EXTERN void
+hb_unicode_funcs_set_compose_func (hb_unicode_funcs_t *ufuncs,
+ hb_unicode_compose_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
+
+/**
+ * hb_unicode_funcs_set_decompose_func:
+ * @ufuncs: a Unicode function structure
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 0.9.2
+ **/
+HB_EXTERN void
+hb_unicode_funcs_set_decompose_func (hb_unicode_funcs_t *ufuncs,
+ hb_unicode_decompose_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
+
+/* accessors */
+
+/**
+ * hb_unicode_combining_class:
+ *
+ * Since: 0.9.2
+ **/
+HB_EXTERN hb_unicode_combining_class_t
+hb_unicode_combining_class (hb_unicode_funcs_t *ufuncs,
+ hb_codepoint_t unicode);
+
+/**
+ * hb_unicode_general_category:
+ *
+ * Since: 0.9.2
+ **/
+HB_EXTERN hb_unicode_general_category_t
+hb_unicode_general_category (hb_unicode_funcs_t *ufuncs,
+ hb_codepoint_t unicode);
+
+/**
+ * hb_unicode_mirroring:
+ *
+ * Since: 0.9.2
+ **/
+HB_EXTERN hb_codepoint_t
+hb_unicode_mirroring (hb_unicode_funcs_t *ufuncs,
+ hb_codepoint_t unicode);
+
+/**
+ * hb_unicode_script:
+ *
+ * Since: 0.9.2
+ **/
+HB_EXTERN hb_script_t
+hb_unicode_script (hb_unicode_funcs_t *ufuncs,
+ hb_codepoint_t unicode);
+
+HB_EXTERN hb_bool_t
+hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
+ hb_codepoint_t a,
+ hb_codepoint_t b,
+ hb_codepoint_t *ab);
+
+HB_EXTERN hb_bool_t
+hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
+ hb_codepoint_t ab,
+ hb_codepoint_t *a,
+ hb_codepoint_t *b);
+
+HB_END_DECLS
+
+#endif /* HB_UNICODE_H */
diff --git a/thirdparty/harfbuzz/src/hb-unicode.hh b/thirdparty/harfbuzz/src/hb-unicode.hh
new file mode 100644
index 0000000000..34d66d7aa3
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-unicode.hh
@@ -0,0 +1,398 @@
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ * Copyright © 2011 Codethink Limited
+ * Copyright © 2010,2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Codethink Author(s): Ryan Lortie
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_UNICODE_HH
+#define HB_UNICODE_HH
+
+#include "hb.hh"
+
+
+extern HB_INTERNAL const uint8_t _hb_modified_combining_class[256];
+
+/*
+ * hb_unicode_funcs_t
+ */
+
+#define HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS \
+ HB_UNICODE_FUNC_IMPLEMENT (combining_class) \
+ HB_IF_NOT_DEPRECATED (HB_UNICODE_FUNC_IMPLEMENT (eastasian_width)) \
+ HB_UNICODE_FUNC_IMPLEMENT (general_category) \
+ HB_UNICODE_FUNC_IMPLEMENT (mirroring) \
+ HB_UNICODE_FUNC_IMPLEMENT (script) \
+ HB_UNICODE_FUNC_IMPLEMENT (compose) \
+ HB_UNICODE_FUNC_IMPLEMENT (decompose) \
+ HB_IF_NOT_DEPRECATED (HB_UNICODE_FUNC_IMPLEMENT (decompose_compatibility)) \
+ /* ^--- Add new callbacks here */
+
+/* Simple callbacks are those taking a hb_codepoint_t and returning a hb_codepoint_t */
+#define HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE \
+ HB_UNICODE_FUNC_IMPLEMENT (hb_unicode_combining_class_t, combining_class) \
+ HB_IF_NOT_DEPRECATED (HB_UNICODE_FUNC_IMPLEMENT (unsigned int, eastasian_width)) \
+ HB_UNICODE_FUNC_IMPLEMENT (hb_unicode_general_category_t, general_category) \
+ HB_UNICODE_FUNC_IMPLEMENT (hb_codepoint_t, mirroring) \
+ HB_UNICODE_FUNC_IMPLEMENT (hb_script_t, script) \
+ /* ^--- Add new simple callbacks here */
+
+struct hb_unicode_funcs_t
+{
+ hb_object_header_t header;
+
+ hb_unicode_funcs_t *parent;
+
+#define HB_UNICODE_FUNC_IMPLEMENT(return_type, name) \
+ return_type name (hb_codepoint_t unicode) { return func.name (this, unicode, user_data.name); }
+HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
+#undef HB_UNICODE_FUNC_IMPLEMENT
+
+ hb_bool_t compose (hb_codepoint_t a, hb_codepoint_t b,
+ hb_codepoint_t *ab)
+ {
+ *ab = 0;
+ if (unlikely (!a || !b)) return false;
+ return func.compose (this, a, b, ab, user_data.compose);
+ }
+
+ hb_bool_t decompose (hb_codepoint_t ab,
+ hb_codepoint_t *a, hb_codepoint_t *b)
+ {
+ *a = ab; *b = 0;
+ return func.decompose (this, ab, a, b, user_data.decompose);
+ }
+
+ unsigned int decompose_compatibility (hb_codepoint_t u,
+ hb_codepoint_t *decomposed)
+ {
+#ifdef HB_DISABLE_DEPRECATED
+ unsigned int ret = 0;
+#else
+ unsigned int ret = func.decompose_compatibility (this, u, decomposed, user_data.decompose_compatibility);
+#endif
+ if (ret == 1 && u == decomposed[0]) {
+ decomposed[0] = 0;
+ return 0;
+ }
+ decomposed[ret] = 0;
+ return ret;
+ }
+
+ unsigned int
+ modified_combining_class (hb_codepoint_t u)
+ {
+ /* XXX This hack belongs to the USE shaper (for Tai Tham):
+ * Reorder SAKOT to ensure it comes after any tone marks. */
+ if (unlikely (u == 0x1A60u)) return 254;
+
+ /* XXX This hack belongs to the Tibetan shaper:
+ * Reorder PADMA to ensure it comes after any vowel marks. */
+ if (unlikely (u == 0x0FC6u)) return 254;
+ /* Reorder TSA -PHRU to reorder before U+0F74 */
+ if (unlikely (u == 0x0F39u)) return 127;
+
+ return _hb_modified_combining_class[combining_class (u)];
+ }
+
+ static hb_bool_t
+ is_variation_selector (hb_codepoint_t unicode)
+ {
+ /* U+180B..180D MONGOLIAN FREE VARIATION SELECTORs are handled in the
+ * Arabic shaper. No need to match them here. */
+ return unlikely (hb_in_ranges<hb_codepoint_t> (unicode,
+ 0xFE00u, 0xFE0Fu, /* VARIATION SELECTOR-1..16 */
+ 0xE0100u, 0xE01EFu)); /* VARIATION SELECTOR-17..256 */
+ }
+
+ /* Default_Ignorable codepoints:
+ *
+ * Note: While U+115F, U+1160, U+3164 and U+FFA0 are Default_Ignorable,
+ * we do NOT want to hide them, as the way Uniscribe has implemented them
+ * is with regular spacing glyphs, and that's the way fonts are made to work.
+ * As such, we make exceptions for those four.
+ * Also ignoring U+1BCA0..1BCA3. https://github.com/harfbuzz/harfbuzz/issues/503
+ *
+ * Unicode 7.0:
+ * $ grep '; Default_Ignorable_Code_Point ' DerivedCoreProperties.txt | sed 's/;.*#/#/'
+ * 00AD # Cf SOFT HYPHEN
+ * 034F # Mn COMBINING GRAPHEME JOINER
+ * 061C # Cf ARABIC LETTER MARK
+ * 115F..1160 # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER
+ * 17B4..17B5 # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
+ * 180B..180D # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
+ * 180E # Cf MONGOLIAN VOWEL SEPARATOR
+ * 200B..200F # Cf [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK
+ * 202A..202E # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
+ * 2060..2064 # Cf [5] WORD JOINER..INVISIBLE PLUS
+ * 2065 # Cn <reserved-2065>
+ * 2066..206F # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES
+ * 3164 # Lo HANGUL FILLER
+ * FE00..FE0F # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
+ * FEFF # Cf ZERO WIDTH NO-BREAK SPACE
+ * FFA0 # Lo HALFWIDTH HANGUL FILLER
+ * FFF0..FFF8 # Cn [9] <reserved-FFF0>..<reserved-FFF8>
+ * 1BCA0..1BCA3 # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP
+ * 1D173..1D17A # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
+ * E0000 # Cn <reserved-E0000>
+ * E0001 # Cf LANGUAGE TAG
+ * E0002..E001F # Cn [30] <reserved-E0002>..<reserved-E001F>
+ * E0020..E007F # Cf [96] TAG SPACE..CANCEL TAG
+ * E0080..E00FF # Cn [128] <reserved-E0080>..<reserved-E00FF>
+ * E0100..E01EF # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
+ * E01F0..E0FFF # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
+ */
+ static hb_bool_t
+ is_default_ignorable (hb_codepoint_t ch)
+ {
+ hb_codepoint_t plane = ch >> 16;
+ if (likely (plane == 0))
+ {
+ /* BMP */
+ hb_codepoint_t page = ch >> 8;
+ switch (page) {
+ case 0x00: return unlikely (ch == 0x00ADu);
+ case 0x03: return unlikely (ch == 0x034Fu);
+ case 0x06: return unlikely (ch == 0x061Cu);
+ case 0x17: return hb_in_range<hb_codepoint_t> (ch, 0x17B4u, 0x17B5u);
+ case 0x18: return hb_in_range<hb_codepoint_t> (ch, 0x180Bu, 0x180Eu);
+ case 0x20: return hb_in_ranges<hb_codepoint_t> (ch, 0x200Bu, 0x200Fu,
+ 0x202Au, 0x202Eu,
+ 0x2060u, 0x206Fu);
+ case 0xFE: return hb_in_range<hb_codepoint_t> (ch, 0xFE00u, 0xFE0Fu) || ch == 0xFEFFu;
+ case 0xFF: return hb_in_range<hb_codepoint_t> (ch, 0xFFF0u, 0xFFF8u);
+ default: return false;
+ }
+ }
+ else
+ {
+ /* Other planes */
+ switch (plane) {
+ case 0x01: return hb_in_range<hb_codepoint_t> (ch, 0x1D173u, 0x1D17Au);
+ case 0x0E: return hb_in_range<hb_codepoint_t> (ch, 0xE0000u, 0xE0FFFu);
+ default: return false;
+ }
+ }
+ }
+
+ /* Space estimates based on:
+ * https://unicode.org/charts/PDF/U2000.pdf
+ * https://docs.microsoft.com/en-us/typography/develop/character-design-standards/whitespace
+ */
+ enum space_t {
+ NOT_SPACE = 0,
+ SPACE_EM = 1,
+ SPACE_EM_2 = 2,
+ SPACE_EM_3 = 3,
+ SPACE_EM_4 = 4,
+ SPACE_EM_5 = 5,
+ SPACE_EM_6 = 6,
+ SPACE_EM_16 = 16,
+ SPACE_4_EM_18, /* 4/18th of an EM! */
+ SPACE,
+ SPACE_FIGURE,
+ SPACE_PUNCTUATION,
+ SPACE_NARROW,
+ };
+ static space_t
+ space_fallback_type (hb_codepoint_t u)
+ {
+ switch (u)
+ {
+ /* All GC=Zs chars that can use a fallback. */
+ default: return NOT_SPACE; /* U+1680 OGHAM SPACE MARK */
+ case 0x0020u: return SPACE; /* U+0020 SPACE */
+ case 0x00A0u: return SPACE; /* U+00A0 NO-BREAK SPACE */
+ case 0x2000u: return SPACE_EM_2; /* U+2000 EN QUAD */
+ case 0x2001u: return SPACE_EM; /* U+2001 EM QUAD */
+ case 0x2002u: return SPACE_EM_2; /* U+2002 EN SPACE */
+ case 0x2003u: return SPACE_EM; /* U+2003 EM SPACE */
+ case 0x2004u: return SPACE_EM_3; /* U+2004 THREE-PER-EM SPACE */
+ case 0x2005u: return SPACE_EM_4; /* U+2005 FOUR-PER-EM SPACE */
+ case 0x2006u: return SPACE_EM_6; /* U+2006 SIX-PER-EM SPACE */
+ case 0x2007u: return SPACE_FIGURE; /* U+2007 FIGURE SPACE */
+ case 0x2008u: return SPACE_PUNCTUATION; /* U+2008 PUNCTUATION SPACE */
+ case 0x2009u: return SPACE_EM_5; /* U+2009 THIN SPACE */
+ case 0x200Au: return SPACE_EM_16; /* U+200A HAIR SPACE */
+ case 0x202Fu: return SPACE_NARROW; /* U+202F NARROW NO-BREAK SPACE */
+ case 0x205Fu: return SPACE_4_EM_18; /* U+205F MEDIUM MATHEMATICAL SPACE */
+ case 0x3000u: return SPACE_EM; /* U+3000 IDEOGRAPHIC SPACE */
+ }
+ }
+
+ struct {
+#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_func_t name;
+ HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
+#undef HB_UNICODE_FUNC_IMPLEMENT
+ } func;
+
+ struct {
+#define HB_UNICODE_FUNC_IMPLEMENT(name) void *name;
+ HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
+#undef HB_UNICODE_FUNC_IMPLEMENT
+ } user_data;
+
+ struct {
+#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_destroy_func_t name;
+ HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
+#undef HB_UNICODE_FUNC_IMPLEMENT
+ } destroy;
+};
+DECLARE_NULL_INSTANCE (hb_unicode_funcs_t);
+
+
+/*
+ * Modified combining marks
+ */
+
+/* Hebrew
+ *
+ * We permute the "fixed-position" classes 10-26 into the order
+ * described in the SBL Hebrew manual:
+ *
+ * https://www.sbl-site.org/Fonts/SBLHebrewUserManual1.5x.pdf
+ *
+ * (as recommended by:
+ * https://forum.fontlab.com/archive-old-microsoft-volt-group/vista-and-diacritic-ordering/msg22823/)
+ *
+ * More details here:
+ * https://bugzilla.mozilla.org/show_bug.cgi?id=662055
+ */
+#define HB_MODIFIED_COMBINING_CLASS_CCC10 22 /* sheva */
+#define HB_MODIFIED_COMBINING_CLASS_CCC11 15 /* hataf segol */
+#define HB_MODIFIED_COMBINING_CLASS_CCC12 16 /* hataf patah */
+#define HB_MODIFIED_COMBINING_CLASS_CCC13 17 /* hataf qamats */
+#define HB_MODIFIED_COMBINING_CLASS_CCC14 23 /* hiriq */
+#define HB_MODIFIED_COMBINING_CLASS_CCC15 18 /* tsere */
+#define HB_MODIFIED_COMBINING_CLASS_CCC16 19 /* segol */
+#define HB_MODIFIED_COMBINING_CLASS_CCC17 20 /* patah */
+#define HB_MODIFIED_COMBINING_CLASS_CCC18 21 /* qamats */
+#define HB_MODIFIED_COMBINING_CLASS_CCC19 14 /* holam */
+#define HB_MODIFIED_COMBINING_CLASS_CCC20 24 /* qubuts */
+#define HB_MODIFIED_COMBINING_CLASS_CCC21 12 /* dagesh */
+#define HB_MODIFIED_COMBINING_CLASS_CCC22 25 /* meteg */
+#define HB_MODIFIED_COMBINING_CLASS_CCC23 13 /* rafe */
+#define HB_MODIFIED_COMBINING_CLASS_CCC24 10 /* shin dot */
+#define HB_MODIFIED_COMBINING_CLASS_CCC25 11 /* sin dot */
+#define HB_MODIFIED_COMBINING_CLASS_CCC26 26 /* point varika */
+
+/*
+ * Arabic
+ *
+ * Modify to move Shadda (ccc=33) before other marks. See:
+ * https://unicode.org/faq/normalization.html#8
+ * https://unicode.org/faq/normalization.html#9
+ */
+#define HB_MODIFIED_COMBINING_CLASS_CCC27 28 /* fathatan */
+#define HB_MODIFIED_COMBINING_CLASS_CCC28 29 /* dammatan */
+#define HB_MODIFIED_COMBINING_CLASS_CCC29 30 /* kasratan */
+#define HB_MODIFIED_COMBINING_CLASS_CCC30 31 /* fatha */
+#define HB_MODIFIED_COMBINING_CLASS_CCC31 32 /* damma */
+#define HB_MODIFIED_COMBINING_CLASS_CCC32 33 /* kasra */
+#define HB_MODIFIED_COMBINING_CLASS_CCC33 27 /* shadda */
+#define HB_MODIFIED_COMBINING_CLASS_CCC34 34 /* sukun */
+#define HB_MODIFIED_COMBINING_CLASS_CCC35 35 /* superscript alef */
+
+/* Syriac */
+#define HB_MODIFIED_COMBINING_CLASS_CCC36 36 /* superscript alaph */
+
+/* Telugu
+ *
+ * Modify Telugu length marks (ccc=84, ccc=91).
+ * These are the only matras in the main Indic scripts range that have
+ * a non-zero ccc. That makes them reorder with the Halant (ccc=9).
+ * Assign 4 and 5, which are otherwise unassigned.
+ */
+#define HB_MODIFIED_COMBINING_CLASS_CCC84 4 /* length mark */
+#define HB_MODIFIED_COMBINING_CLASS_CCC91 5 /* ai length mark */
+
+/* Thai
+ *
+ * Modify U+0E38 and U+0E39 (ccc=103) to be reordered before U+0E3A (ccc=9).
+ * Assign 3, which is unassigned otherwise.
+ * Uniscribe does this reordering too.
+ */
+#define HB_MODIFIED_COMBINING_CLASS_CCC103 3 /* sara u / sara uu */
+#define HB_MODIFIED_COMBINING_CLASS_CCC107 107 /* mai * */
+
+/* Lao */
+#define HB_MODIFIED_COMBINING_CLASS_CCC118 118 /* sign u / sign uu */
+#define HB_MODIFIED_COMBINING_CLASS_CCC122 122 /* mai * */
+
+/* Tibetan
+ *
+ * In case of multiple vowel-signs, use u first (but after achung)
+ * this allows Dzongkha multi-vowel shortcuts to render correctly
+ */
+#define HB_MODIFIED_COMBINING_CLASS_CCC129 129 /* sign aa */
+#define HB_MODIFIED_COMBINING_CLASS_CCC130 132 /* sign i */
+#define HB_MODIFIED_COMBINING_CLASS_CCC132 131 /* sign u */
+
+/* Misc */
+
+#define HB_UNICODE_GENERAL_CATEGORY_IS_MARK(gen_cat) \
+ (FLAG_UNSAFE (gen_cat) & \
+ (FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | \
+ FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) | \
+ FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)))
+
+
+/*
+ * Ranges, used for bsearch tables.
+ */
+
+struct hb_unicode_range_t
+{
+ static int
+ cmp (const void *_key, const void *_item)
+ {
+ hb_codepoint_t cp = *((hb_codepoint_t *) _key);
+ const hb_unicode_range_t *range = (hb_unicode_range_t *) _item;
+
+ if (cp < range->start)
+ return -1;
+ else if (cp <= range->end)
+ return 0;
+ else
+ return +1;
+ }
+
+ hb_codepoint_t start;
+ hb_codepoint_t end;
+};
+
+/*
+ * Emoji.
+ */
+
+HB_INTERNAL bool
+_hb_unicode_is_emoji_Extended_Pictographic (hb_codepoint_t cp);
+
+
+extern "C" HB_INTERNAL hb_unicode_funcs_t *hb_ucd_get_unicode_funcs ();
+
+
+#endif /* HB_UNICODE_HH */
diff --git a/thirdparty/harfbuzz/src/hb-uniscribe.cc b/thirdparty/harfbuzz/src/hb-uniscribe.cc
new file mode 100644
index 0000000000..48a5dc50ad
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-uniscribe.cc
@@ -0,0 +1,1047 @@
+/*
+ * Copyright © 2011,2012,2013 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb.hh"
+
+#ifdef HAVE_UNISCRIBE
+
+#ifdef HB_NO_OT_TAG
+#error "Cannot compile 'uniscribe' shaper with HB_NO_OT_TAG."
+#endif
+
+#include "hb-shaper-impl.hh"
+
+#include <windows.h>
+#include <usp10.h>
+#include <rpc.h>
+
+#ifndef E_NOT_SUFFICIENT_BUFFER
+#define E_NOT_SUFFICIENT_BUFFER HRESULT_FROM_WIN32 (ERROR_INSUFFICIENT_BUFFER)
+#endif
+
+#include "hb-uniscribe.h"
+
+#include "hb-open-file.hh"
+#include "hb-ot-name-table.hh"
+#include "hb-ot-layout.h"
+
+
+/**
+ * SECTION:hb-uniscribe
+ * @title: hb-uniscribe
+ * @short_description: Windows integration
+ * @include: hb-uniscribe.h
+ *
+ * Functions for using HarfBuzz with Windows fonts.
+ **/
+
+typedef HRESULT (WINAPI *SIOT) /*ScriptItemizeOpenType*/(
+ const WCHAR *pwcInChars,
+ int cInChars,
+ int cMaxItems,
+ const SCRIPT_CONTROL *psControl,
+ const SCRIPT_STATE *psState,
+ SCRIPT_ITEM *pItems,
+ OPENTYPE_TAG *pScriptTags,
+ int *pcItems
+);
+
+typedef HRESULT (WINAPI *SSOT) /*ScriptShapeOpenType*/(
+ HDC hdc,
+ SCRIPT_CACHE *psc,
+ SCRIPT_ANALYSIS *psa,
+ OPENTYPE_TAG tagScript,
+ OPENTYPE_TAG tagLangSys,
+ int *rcRangeChars,
+ TEXTRANGE_PROPERTIES **rpRangeProperties,
+ int cRanges,
+ const WCHAR *pwcChars,
+ int cChars,
+ int cMaxGlyphs,
+ WORD *pwLogClust,
+ SCRIPT_CHARPROP *pCharProps,
+ WORD *pwOutGlyphs,
+ SCRIPT_GLYPHPROP *pOutGlyphProps,
+ int *pcGlyphs
+);
+
+typedef HRESULT (WINAPI *SPOT) /*ScriptPlaceOpenType*/(
+ HDC hdc,
+ SCRIPT_CACHE *psc,
+ SCRIPT_ANALYSIS *psa,
+ OPENTYPE_TAG tagScript,
+ OPENTYPE_TAG tagLangSys,
+ int *rcRangeChars,
+ TEXTRANGE_PROPERTIES **rpRangeProperties,
+ int cRanges,
+ const WCHAR *pwcChars,
+ WORD *pwLogClust,
+ SCRIPT_CHARPROP *pCharProps,
+ int cChars,
+ const WORD *pwGlyphs,
+ const SCRIPT_GLYPHPROP *pGlyphProps,
+ int cGlyphs,
+ int *piAdvance,
+ GOFFSET *pGoffset,
+ ABC *pABC
+);
+
+
+/* Fallback implementations. */
+
+static HRESULT WINAPI
+hb_ScriptItemizeOpenType(
+ const WCHAR *pwcInChars,
+ int cInChars,
+ int cMaxItems,
+ const SCRIPT_CONTROL *psControl,
+ const SCRIPT_STATE *psState,
+ SCRIPT_ITEM *pItems,
+ OPENTYPE_TAG *pScriptTags,
+ int *pcItems
+)
+{
+{
+ return ScriptItemize (pwcInChars,
+ cInChars,
+ cMaxItems,
+ psControl,
+ psState,
+ pItems,
+ pcItems);
+}
+}
+
+static HRESULT WINAPI
+hb_ScriptShapeOpenType(
+ HDC hdc,
+ SCRIPT_CACHE *psc,
+ SCRIPT_ANALYSIS *psa,
+ OPENTYPE_TAG tagScript,
+ OPENTYPE_TAG tagLangSys,
+ int *rcRangeChars,
+ TEXTRANGE_PROPERTIES **rpRangeProperties,
+ int cRanges,
+ const WCHAR *pwcChars,
+ int cChars,
+ int cMaxGlyphs,
+ WORD *pwLogClust,
+ SCRIPT_CHARPROP *pCharProps,
+ WORD *pwOutGlyphs,
+ SCRIPT_GLYPHPROP *pOutGlyphProps,
+ int *pcGlyphs
+)
+{
+ SCRIPT_VISATTR *psva = (SCRIPT_VISATTR *) pOutGlyphProps;
+ return ScriptShape (hdc,
+ psc,
+ pwcChars,
+ cChars,
+ cMaxGlyphs,
+ psa,
+ pwOutGlyphs,
+ pwLogClust,
+ psva,
+ pcGlyphs);
+}
+
+static HRESULT WINAPI
+hb_ScriptPlaceOpenType(
+ HDC hdc,
+ SCRIPT_CACHE *psc,
+ SCRIPT_ANALYSIS *psa,
+ OPENTYPE_TAG tagScript,
+ OPENTYPE_TAG tagLangSys,
+ int *rcRangeChars,
+ TEXTRANGE_PROPERTIES **rpRangeProperties,
+ int cRanges,
+ const WCHAR *pwcChars,
+ WORD *pwLogClust,
+ SCRIPT_CHARPROP *pCharProps,
+ int cChars,
+ const WORD *pwGlyphs,
+ const SCRIPT_GLYPHPROP *pGlyphProps,
+ int cGlyphs,
+ int *piAdvance,
+ GOFFSET *pGoffset,
+ ABC *pABC
+)
+{
+ SCRIPT_VISATTR *psva = (SCRIPT_VISATTR *) pGlyphProps;
+ return ScriptPlace (hdc,
+ psc,
+ pwGlyphs,
+ cGlyphs,
+ psva,
+ psa,
+ piAdvance,
+ pGoffset,
+ pABC);
+}
+
+
+struct hb_uniscribe_shaper_funcs_t
+{
+ SIOT ScriptItemizeOpenType;
+ SSOT ScriptShapeOpenType;
+ SPOT ScriptPlaceOpenType;
+
+ void init ()
+ {
+ HMODULE hinstLib;
+ this->ScriptItemizeOpenType = nullptr;
+ this->ScriptShapeOpenType = nullptr;
+ this->ScriptPlaceOpenType = nullptr;
+
+ hinstLib = GetModuleHandle (TEXT ("usp10.dll"));
+ if (hinstLib)
+ {
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-function-type"
+ this->ScriptItemizeOpenType = (SIOT) GetProcAddress (hinstLib, "ScriptItemizeOpenType");
+ this->ScriptShapeOpenType = (SSOT) GetProcAddress (hinstLib, "ScriptShapeOpenType");
+ this->ScriptPlaceOpenType = (SPOT) GetProcAddress (hinstLib, "ScriptPlaceOpenType");
+#pragma GCC diagnostic pop
+ }
+ if (!this->ScriptItemizeOpenType ||
+ !this->ScriptShapeOpenType ||
+ !this->ScriptPlaceOpenType)
+ {
+ DEBUG_MSG (UNISCRIBE, nullptr, "OpenType versions of functions not found; falling back.");
+ this->ScriptItemizeOpenType = hb_ScriptItemizeOpenType;
+ this->ScriptShapeOpenType = hb_ScriptShapeOpenType;
+ this->ScriptPlaceOpenType = hb_ScriptPlaceOpenType;
+ }
+ }
+};
+
+#if HB_USE_ATEXIT
+static void free_static_uniscribe_shaper_funcs ();
+#endif
+
+static struct hb_uniscribe_shaper_funcs_lazy_loader_t : hb_lazy_loader_t<hb_uniscribe_shaper_funcs_t,
+ hb_uniscribe_shaper_funcs_lazy_loader_t>
+{
+ static hb_uniscribe_shaper_funcs_t *create ()
+ {
+ hb_uniscribe_shaper_funcs_t *funcs = (hb_uniscribe_shaper_funcs_t *) calloc (1, sizeof (hb_uniscribe_shaper_funcs_t));
+ if (unlikely (!funcs))
+ return nullptr;
+
+ funcs->init ();
+
+#if HB_USE_ATEXIT
+ atexit (free_static_uniscribe_shaper_funcs);
+#endif
+
+ return funcs;
+ }
+ static void destroy (hb_uniscribe_shaper_funcs_t *p)
+ {
+ free ((void *) p);
+ }
+ static hb_uniscribe_shaper_funcs_t *get_null ()
+ {
+ return nullptr;
+ }
+} static_uniscribe_shaper_funcs;
+
+#if HB_USE_ATEXIT
+static
+void free_static_uniscribe_shaper_funcs ()
+{
+ static_uniscribe_shaper_funcs.free_instance ();
+}
+#endif
+
+static hb_uniscribe_shaper_funcs_t *
+hb_uniscribe_shaper_get_funcs ()
+{
+ return static_uniscribe_shaper_funcs.get_unconst ();
+}
+
+
+struct active_feature_t {
+ OPENTYPE_FEATURE_RECORD rec;
+ unsigned int order;
+
+ HB_INTERNAL static int cmp (const void *pa, const void *pb) {
+ const active_feature_t *a = (const active_feature_t *) pa;
+ const active_feature_t *b = (const active_feature_t *) pb;
+ return a->rec.tagFeature < b->rec.tagFeature ? -1 : a->rec.tagFeature > b->rec.tagFeature ? 1 :
+ a->order < b->order ? -1 : a->order > b->order ? 1 :
+ a->rec.lParameter < b->rec.lParameter ? -1 : a->rec.lParameter > b->rec.lParameter ? 1 :
+ 0;
+ }
+ bool operator== (const active_feature_t *f)
+ { return cmp (this, f) == 0; }
+};
+
+struct feature_event_t {
+ unsigned int index;
+ bool start;
+ active_feature_t feature;
+
+ HB_INTERNAL static int cmp (const void *pa, const void *pb)
+ {
+ const feature_event_t *a = (const feature_event_t *) pa;
+ const feature_event_t *b = (const feature_event_t *) pb;
+ return a->index < b->index ? -1 : a->index > b->index ? 1 :
+ a->start < b->start ? -1 : a->start > b->start ? 1 :
+ active_feature_t::cmp (&a->feature, &b->feature);
+ }
+};
+
+struct range_record_t {
+ TEXTRANGE_PROPERTIES props;
+ unsigned int index_first; /* == start */
+ unsigned int index_last; /* == end - 1 */
+};
+
+
+/*
+ * shaper face data
+ */
+
+struct hb_uniscribe_face_data_t {
+ HANDLE fh;
+ hb_uniscribe_shaper_funcs_t *funcs;
+ wchar_t face_name[LF_FACESIZE];
+};
+
+/* face_name should point to a wchar_t[LF_FACESIZE] object. */
+static void
+_hb_generate_unique_face_name (wchar_t *face_name, unsigned int *plen)
+{
+ /* We'll create a private name for the font from a UUID using a simple,
+ * somewhat base64-like encoding scheme */
+ const char *enc = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-";
+ UUID id;
+ UuidCreate ((UUID*) &id);
+ static_assert ((2 + 3 * (16/2) < LF_FACESIZE), "");
+ unsigned int name_str_len = 0;
+ face_name[name_str_len++] = 'F';
+ face_name[name_str_len++] = '_';
+ unsigned char *p = (unsigned char *) &id;
+ for (unsigned int i = 0; i < 16; i += 2)
+ {
+ /* Spread the 16 bits from two bytes of the UUID across three chars of face_name,
+ * using the bits in groups of 5,5,6 to select chars from enc.
+ * This will generate 24 characters; with the 'F_' prefix we already provided,
+ * the name will be 26 chars (plus the NUL terminator), so will always fit within
+ * face_name (LF_FACESIZE = 32). */
+ face_name[name_str_len++] = enc[p[i] >> 3];
+ face_name[name_str_len++] = enc[((p[i] << 2) | (p[i + 1] >> 6)) & 0x1f];
+ face_name[name_str_len++] = enc[p[i + 1] & 0x3f];
+ }
+ face_name[name_str_len] = 0;
+ if (plen)
+ *plen = name_str_len;
+}
+
+/* Destroys blob. */
+static hb_blob_t *
+_hb_rename_font (hb_blob_t *blob, wchar_t *new_name)
+{
+ /* Create a copy of the font data, with the 'name' table replaced by a
+ * table that names the font with our private F_* name created above.
+ * For simplicity, we just append a new 'name' table and update the
+ * sfnt directory; the original table is left in place, but unused.
+ *
+ * The new table will contain just 5 name IDs: family, style, unique,
+ * full, PS. All of them point to the same name data with our unique name.
+ */
+
+ blob = hb_sanitize_context_t ().sanitize_blob<OT::OpenTypeFontFile> (blob);
+
+ unsigned int length, new_length, name_str_len;
+ const char *orig_sfnt_data = hb_blob_get_data (blob, &length);
+
+ _hb_generate_unique_face_name (new_name, &name_str_len);
+
+ static const uint16_t name_IDs[] = { 1, 2, 3, 4, 6 };
+
+ unsigned int name_table_length = OT::name::min_size +
+ ARRAY_LENGTH (name_IDs) * OT::NameRecord::static_size +
+ name_str_len * 2; /* for name data in UTF16BE form */
+ unsigned int padded_name_table_length = ((name_table_length + 3) & ~3);
+ unsigned int name_table_offset = (length + 3) & ~3;
+
+ new_length = name_table_offset + padded_name_table_length;
+ void *new_sfnt_data = calloc (1, new_length);
+ if (!new_sfnt_data)
+ {
+ hb_blob_destroy (blob);
+ return nullptr;
+ }
+
+ memcpy(new_sfnt_data, orig_sfnt_data, length);
+
+ OT::name &name = StructAtOffset<OT::name> (new_sfnt_data, name_table_offset);
+ name.format = 0;
+ name.count = ARRAY_LENGTH (name_IDs);
+ name.stringOffset = name.get_size ();
+ for (unsigned int i = 0; i < ARRAY_LENGTH (name_IDs); i++)
+ {
+ OT::NameRecord &record = name.nameRecordZ[i];
+ record.platformID = 3;
+ record.encodingID = 1;
+ record.languageID = 0x0409u; /* English */
+ record.nameID = name_IDs[i];
+ record.length = name_str_len * 2;
+ record.offset = 0;
+ }
+
+ /* Copy string data from new_name, converting wchar_t to UTF16BE. */
+ unsigned char *p = &StructAfter<unsigned char> (name);
+ for (unsigned int i = 0; i < name_str_len; i++)
+ {
+ *p++ = new_name[i] >> 8;
+ *p++ = new_name[i] & 0xff;
+ }
+
+ /* Adjust name table entry to point to new name table */
+ const OT::OpenTypeFontFile &file = * (OT::OpenTypeFontFile *) (new_sfnt_data);
+ unsigned int face_count = file.get_face_count ();
+ for (unsigned int face_index = 0; face_index < face_count; face_index++)
+ {
+ /* Note: doing multiple edits (ie. TTC) can be unsafe. There may be
+ * toe-stepping. But we don't really care. */
+ const OT::OpenTypeFontFace &face = file.get_face (face_index);
+ unsigned int index;
+ if (face.find_table_index (HB_OT_TAG_name, &index))
+ {
+ OT::TableRecord &record = const_cast<OT::TableRecord &> (face.get_table (index));
+ record.checkSum.set_for_data (&name, padded_name_table_length);
+ record.offset = name_table_offset;
+ record.length = name_table_length;
+ }
+ else if (face_index == 0) /* Fail if first face doesn't have 'name' table. */
+ {
+ free (new_sfnt_data);
+ hb_blob_destroy (blob);
+ return nullptr;
+ }
+ }
+
+ /* The checkSumAdjustment field in the 'head' table is now wrong,
+ * but that doesn't actually seem to cause any problems so we don't
+ * bother. */
+
+ hb_blob_destroy (blob);
+ return hb_blob_create ((const char *) new_sfnt_data, new_length,
+ HB_MEMORY_MODE_WRITABLE, new_sfnt_data, free);
+}
+
+hb_uniscribe_face_data_t *
+_hb_uniscribe_shaper_face_data_create (hb_face_t *face)
+{
+ hb_uniscribe_face_data_t *data = (hb_uniscribe_face_data_t *) calloc (1, sizeof (hb_uniscribe_face_data_t));
+ if (unlikely (!data))
+ return nullptr;
+
+ data->funcs = hb_uniscribe_shaper_get_funcs ();
+ if (unlikely (!data->funcs))
+ {
+ free (data);
+ return nullptr;
+ }
+
+ hb_blob_t *blob = hb_face_reference_blob (face);
+ if (unlikely (!hb_blob_get_length (blob)))
+ DEBUG_MSG (UNISCRIBE, face, "Face has empty blob");
+
+ blob = _hb_rename_font (blob, data->face_name);
+ if (unlikely (!blob))
+ {
+ free (data);
+ return nullptr;
+ }
+
+ DWORD num_fonts_installed;
+ data->fh = AddFontMemResourceEx ((void *) hb_blob_get_data (blob, nullptr),
+ hb_blob_get_length (blob),
+ 0, &num_fonts_installed);
+ if (unlikely (!data->fh))
+ {
+ DEBUG_MSG (UNISCRIBE, face, "Face AddFontMemResourceEx() failed");
+ free (data);
+ return nullptr;
+ }
+
+ return data;
+}
+
+void
+_hb_uniscribe_shaper_face_data_destroy (hb_uniscribe_face_data_t *data)
+{
+ RemoveFontMemResourceEx (data->fh);
+ free (data);
+}
+
+
+/*
+ * shaper font data
+ */
+
+struct hb_uniscribe_font_data_t
+{
+ HDC hdc;
+ mutable LOGFONTW log_font;
+ HFONT hfont;
+ mutable SCRIPT_CACHE script_cache;
+ double x_mult, y_mult; /* From LOGFONT space to HB space. */
+};
+
+static bool
+populate_log_font (LOGFONTW *lf,
+ hb_font_t *font,
+ unsigned int font_size)
+{
+ memset (lf, 0, sizeof (*lf));
+ lf->lfHeight = - (int) font_size;
+ lf->lfCharSet = DEFAULT_CHARSET;
+
+ memcpy (lf->lfFaceName, font->face->data.uniscribe->face_name, sizeof (lf->lfFaceName));
+
+ return true;
+}
+
+hb_uniscribe_font_data_t *
+_hb_uniscribe_shaper_font_data_create (hb_font_t *font)
+{
+ hb_uniscribe_font_data_t *data = (hb_uniscribe_font_data_t *) calloc (1, sizeof (hb_uniscribe_font_data_t));
+ if (unlikely (!data))
+ return nullptr;
+
+ int font_size = font->face->get_upem (); /* Default... */
+ /* No idea if the following is even a good idea. */
+ if (font->y_ppem)
+ font_size = font->y_ppem;
+
+ if (font_size < 0)
+ font_size = -font_size;
+ data->x_mult = (double) font->x_scale / font_size;
+ data->y_mult = (double) font->y_scale / font_size;
+
+ data->hdc = GetDC (nullptr);
+
+ if (unlikely (!populate_log_font (&data->log_font, font, font_size))) {
+ DEBUG_MSG (UNISCRIBE, font, "Font populate_log_font() failed");
+ _hb_uniscribe_shaper_font_data_destroy (data);
+ return nullptr;
+ }
+
+ data->hfont = CreateFontIndirectW (&data->log_font);
+ if (unlikely (!data->hfont)) {
+ DEBUG_MSG (UNISCRIBE, font, "Font CreateFontIndirectW() failed");
+ _hb_uniscribe_shaper_font_data_destroy (data);
+ return nullptr;
+ }
+
+ if (!SelectObject (data->hdc, data->hfont)) {
+ DEBUG_MSG (UNISCRIBE, font, "Font SelectObject() failed");
+ _hb_uniscribe_shaper_font_data_destroy (data);
+ return nullptr;
+ }
+
+ return data;
+}
+
+void
+_hb_uniscribe_shaper_font_data_destroy (hb_uniscribe_font_data_t *data)
+{
+ if (data->hdc)
+ ReleaseDC (nullptr, data->hdc);
+ if (data->hfont)
+ DeleteObject (data->hfont);
+ if (data->script_cache)
+ ScriptFreeCache (&data->script_cache);
+ free (data);
+}
+
+/**
+ * hb_uniscribe_font_get_logfontw:
+ * @font: The #hb_font_t to work upon
+ *
+ * Fetches the LOGFONTW structure that corresponds to the
+ * specified #hb_font_t font.
+ *
+ * Return value: a pointer to the LOGFONTW retrieved
+ *
+ **/
+LOGFONTW *
+hb_uniscribe_font_get_logfontw (hb_font_t *font)
+{
+ const hb_uniscribe_font_data_t *data = font->data.uniscribe;
+ return data ? &data->log_font : nullptr;
+}
+
+/**
+ * hb_uniscribe_font_get_hfont:
+ * @font: The #hb_font_t to work upon
+ *
+ * Fetches the HFONT handle that corresponds to the
+ * specified #hb_font_t font.
+ *
+ * Return value: the HFONT retreieved
+ *
+ **/
+HFONT
+hb_uniscribe_font_get_hfont (hb_font_t *font)
+{
+ const hb_uniscribe_font_data_t *data = font->data.uniscribe;
+ return data ? data->hfont : nullptr;
+}
+
+
+/*
+ * shaper
+ */
+
+
+hb_bool_t
+_hb_uniscribe_shape (hb_shape_plan_t *shape_plan,
+ hb_font_t *font,
+ hb_buffer_t *buffer,
+ const hb_feature_t *features,
+ unsigned int num_features)
+{
+ hb_face_t *face = font->face;
+ const hb_uniscribe_face_data_t *face_data = face->data.uniscribe;
+ const hb_uniscribe_font_data_t *font_data = font->data.uniscribe;
+ hb_uniscribe_shaper_funcs_t *funcs = face_data->funcs;
+
+ /*
+ * Set up features.
+ */
+ hb_vector_t<OPENTYPE_FEATURE_RECORD> feature_records;
+ hb_vector_t<range_record_t> range_records;
+ if (num_features)
+ {
+ /* Sort features by start/end events. */
+ hb_vector_t<feature_event_t> feature_events;
+ for (unsigned int i = 0; i < num_features; i++)
+ {
+ active_feature_t feature;
+ feature.rec.tagFeature = hb_uint32_swap (features[i].tag);
+ feature.rec.lParameter = features[i].value;
+ feature.order = i;
+
+ feature_event_t *event;
+
+ event = feature_events.push ();
+ event->index = features[i].start;
+ event->start = true;
+ event->feature = feature;
+
+ event = feature_events.push ();
+ event->index = features[i].end;
+ event->start = false;
+ event->feature = feature;
+ }
+ feature_events.qsort ();
+ /* Add a strategic final event. */
+ {
+ active_feature_t feature;
+ feature.rec.tagFeature = 0;
+ feature.rec.lParameter = 0;
+ feature.order = num_features + 1;
+
+ feature_event_t *event = feature_events.push ();
+ event->index = 0; /* This value does magic. */
+ event->start = false;
+ event->feature = feature;
+ }
+
+ /* Scan events and save features for each range. */
+ hb_vector_t<active_feature_t> active_features;
+ unsigned int last_index = 0;
+ for (unsigned int i = 0; i < feature_events.length; i++)
+ {
+ feature_event_t *event = &feature_events[i];
+
+ if (event->index != last_index)
+ {
+ /* Save a snapshot of active features and the range. */
+ range_record_t *range = range_records.push ();
+
+ unsigned int offset = feature_records.length;
+
+ active_features.qsort ();
+ for (unsigned int j = 0; j < active_features.length; j++)
+ {
+ if (!j || active_features[j].rec.tagFeature != feature_records[feature_records.length - 1].tagFeature)
+ {
+ feature_records.push (active_features[j].rec);
+ }
+ else
+ {
+ /* Overrides value for existing feature. */
+ feature_records[feature_records.length - 1].lParameter = active_features[j].rec.lParameter;
+ }
+ }
+
+ /* Will convert to pointer after all is ready, since feature_records.array
+ * may move as we grow it. */
+ range->props.potfRecords = reinterpret_cast<OPENTYPE_FEATURE_RECORD *> (offset);
+ range->props.cotfRecords = feature_records.length - offset;
+ range->index_first = last_index;
+ range->index_last = event->index - 1;
+
+ last_index = event->index;
+ }
+
+ if (event->start)
+ {
+ active_features.push (event->feature);
+ }
+ else
+ {
+ active_feature_t *feature = active_features.find (&event->feature);
+ if (feature)
+ active_features.remove (feature - active_features.arrayZ);
+ }
+ }
+
+ if (!range_records.length) /* No active feature found. */
+ num_features = 0;
+
+ /* Fixup the pointers. */
+ for (unsigned int i = 0; i < range_records.length; i++)
+ {
+ range_record_t *range = &range_records[i];
+ range->props.potfRecords = (OPENTYPE_FEATURE_RECORD *) feature_records + reinterpret_cast<uintptr_t> (range->props.potfRecords);
+ }
+ }
+
+#define FAIL(...) \
+ HB_STMT_START { \
+ DEBUG_MSG (UNISCRIBE, nullptr, __VA_ARGS__); \
+ return false; \
+ } HB_STMT_END
+
+ HRESULT hr;
+
+retry:
+
+ unsigned int scratch_size;
+ hb_buffer_t::scratch_buffer_t *scratch = buffer->get_scratch_buffer (&scratch_size);
+
+#define ALLOCATE_ARRAY(Type, name, len) \
+ Type *name = (Type *) scratch; \
+ do { \
+ unsigned int _consumed = DIV_CEIL ((len) * sizeof (Type), sizeof (*scratch)); \
+ assert (_consumed <= scratch_size); \
+ scratch += _consumed; \
+ scratch_size -= _consumed; \
+ } while (0)
+
+#define utf16_index() var1.u32
+
+ ALLOCATE_ARRAY (WCHAR, pchars, buffer->len * 2);
+
+ unsigned int chars_len = 0;
+ for (unsigned int i = 0; i < buffer->len; i++)
+ {
+ hb_codepoint_t c = buffer->info[i].codepoint;
+ buffer->info[i].utf16_index() = chars_len;
+ if (likely (c <= 0xFFFFu))
+ pchars[chars_len++] = c;
+ else if (unlikely (c > 0x10FFFFu))
+ pchars[chars_len++] = 0xFFFDu;
+ else {
+ pchars[chars_len++] = 0xD800u + ((c - 0x10000u) >> 10);
+ pchars[chars_len++] = 0xDC00u + ((c - 0x10000u) & ((1u << 10) - 1));
+ }
+ }
+
+ ALLOCATE_ARRAY (WORD, log_clusters, chars_len);
+ ALLOCATE_ARRAY (SCRIPT_CHARPROP, char_props, chars_len);
+
+ if (num_features)
+ {
+ /* Need log_clusters to assign features. */
+ chars_len = 0;
+ for (unsigned int i = 0; i < buffer->len; i++)
+ {
+ hb_codepoint_t c = buffer->info[i].codepoint;
+ unsigned int cluster = buffer->info[i].cluster;
+ log_clusters[chars_len++] = cluster;
+ if (hb_in_range (c, 0x10000u, 0x10FFFFu))
+ log_clusters[chars_len++] = cluster; /* Surrogates. */
+ }
+ }
+
+ /* The -2 in the following is to compensate for possible
+ * alignment needed after the WORD array. sizeof(WORD) == 2. */
+ unsigned int glyphs_size = (scratch_size * sizeof (int) - 2)
+ / (sizeof (WORD) +
+ sizeof (SCRIPT_GLYPHPROP) +
+ sizeof (int) +
+ sizeof (GOFFSET) +
+ sizeof (uint32_t));
+
+ ALLOCATE_ARRAY (WORD, glyphs, glyphs_size);
+ ALLOCATE_ARRAY (SCRIPT_GLYPHPROP, glyph_props, glyphs_size);
+ ALLOCATE_ARRAY (int, advances, glyphs_size);
+ ALLOCATE_ARRAY (GOFFSET, offsets, glyphs_size);
+ ALLOCATE_ARRAY (uint32_t, vis_clusters, glyphs_size);
+
+ /* Note:
+ * We can't touch the contents of glyph_props. Our fallback
+ * implementations of Shape and Place functions use that buffer
+ * by casting it to a different type. It works because they
+ * both agree about it, but if we want to access it here we
+ * need address that issue first.
+ */
+
+#undef ALLOCATE_ARRAY
+
+#define MAX_ITEMS 256
+
+ SCRIPT_ITEM items[MAX_ITEMS + 1];
+ SCRIPT_CONTROL bidi_control = {0};
+ SCRIPT_STATE bidi_state = {0};
+ ULONG script_tags[MAX_ITEMS];
+ int item_count;
+
+ /* MinGW32 doesn't define fMergeNeutralItems, so we bruteforce */
+ //bidi_control.fMergeNeutralItems = true;
+ *(uint32_t*)&bidi_control |= 1u<<24;
+
+ bidi_state.uBidiLevel = HB_DIRECTION_IS_FORWARD (buffer->props.direction) ? 0 : 1;
+ bidi_state.fOverrideDirection = 1;
+
+ hr = funcs->ScriptItemizeOpenType (pchars,
+ chars_len,
+ MAX_ITEMS,
+ &bidi_control,
+ &bidi_state,
+ items,
+ script_tags,
+ &item_count);
+ if (unlikely (FAILED (hr)))
+ FAIL ("ScriptItemizeOpenType() failed: 0x%08lx", hr);
+
+#undef MAX_ITEMS
+
+ hb_tag_t lang_tag;
+ unsigned int lang_count = 1;
+ hb_ot_tags_from_script_and_language (buffer->props.script,
+ buffer->props.language,
+ nullptr, nullptr,
+ &lang_count, &lang_tag);
+ OPENTYPE_TAG language_tag = hb_uint32_swap (lang_count ? lang_tag : HB_TAG_NONE);
+ hb_vector_t<TEXTRANGE_PROPERTIES*> range_properties;
+ hb_vector_t<int> range_char_counts;
+
+ unsigned int glyphs_offset = 0;
+ unsigned int glyphs_len;
+ bool backward = HB_DIRECTION_IS_BACKWARD (buffer->props.direction);
+ for (int i = 0; i < item_count; i++)
+ {
+ unsigned int chars_offset = items[i].iCharPos;
+ unsigned int item_chars_len = items[i + 1].iCharPos - chars_offset;
+
+ if (num_features)
+ {
+ range_properties.shrink (0);
+ range_char_counts.shrink (0);
+
+ range_record_t *last_range = &range_records[0];
+
+ for (unsigned int k = chars_offset; k < chars_offset + item_chars_len; k++)
+ {
+ range_record_t *range = last_range;
+ while (log_clusters[k] < range->index_first)
+ range--;
+ while (log_clusters[k] > range->index_last)
+ range++;
+ if (!range_properties.length ||
+ &range->props != range_properties[range_properties.length - 1])
+ {
+ TEXTRANGE_PROPERTIES **props = range_properties.push ();
+ int *c = range_char_counts.push ();
+ if (unlikely (!props || !c))
+ {
+ range_properties.shrink (0);
+ range_char_counts.shrink (0);
+ break;
+ }
+ *props = &range->props;
+ *c = 1;
+ }
+ else
+ {
+ range_char_counts[range_char_counts.length - 1]++;
+ }
+
+ last_range = range;
+ }
+ }
+
+ /* Asking for glyphs in logical order circumvents at least
+ * one bug in Uniscribe. */
+ items[i].a.fLogicalOrder = true;
+
+ retry_shape:
+ hr = funcs->ScriptShapeOpenType (font_data->hdc,
+ &font_data->script_cache,
+ &items[i].a,
+ script_tags[i],
+ language_tag,
+ range_char_counts.arrayZ,
+ range_properties.arrayZ,
+ range_properties.length,
+ pchars + chars_offset,
+ item_chars_len,
+ glyphs_size - glyphs_offset,
+ /* out */
+ log_clusters + chars_offset,
+ char_props + chars_offset,
+ glyphs + glyphs_offset,
+ glyph_props + glyphs_offset,
+ (int *) &glyphs_len);
+
+ if (unlikely (items[i].a.fNoGlyphIndex))
+ FAIL ("ScriptShapeOpenType() set fNoGlyphIndex");
+ if (unlikely (hr == E_OUTOFMEMORY || hr == E_NOT_SUFFICIENT_BUFFER))
+ {
+ if (unlikely (!buffer->ensure (buffer->allocated * 2)))
+ FAIL ("Buffer resize failed");
+ goto retry;
+ }
+ if (unlikely (hr == USP_E_SCRIPT_NOT_IN_FONT))
+ {
+ if (items[i].a.eScript == SCRIPT_UNDEFINED)
+ FAIL ("ScriptShapeOpenType() failed: Font doesn't support script");
+ items[i].a.eScript = SCRIPT_UNDEFINED;
+ goto retry_shape;
+ }
+ if (unlikely (FAILED (hr)))
+ {
+ FAIL ("ScriptShapeOpenType() failed: 0x%08lx", hr);
+ }
+
+ for (unsigned int j = chars_offset; j < chars_offset + item_chars_len; j++)
+ log_clusters[j] += glyphs_offset;
+
+ hr = funcs->ScriptPlaceOpenType (font_data->hdc,
+ &font_data->script_cache,
+ &items[i].a,
+ script_tags[i],
+ language_tag,
+ range_char_counts.arrayZ,
+ range_properties.arrayZ,
+ range_properties.length,
+ pchars + chars_offset,
+ log_clusters + chars_offset,
+ char_props + chars_offset,
+ item_chars_len,
+ glyphs + glyphs_offset,
+ glyph_props + glyphs_offset,
+ glyphs_len,
+ /* out */
+ advances + glyphs_offset,
+ offsets + glyphs_offset,
+ nullptr);
+ if (unlikely (FAILED (hr)))
+ FAIL ("ScriptPlaceOpenType() failed: 0x%08lx", hr);
+
+ if (DEBUG_ENABLED (UNISCRIBE))
+ fprintf (stderr, "Item %d RTL %d LayoutRTL %d LogicalOrder %d ScriptTag %c%c%c%c\n",
+ i,
+ items[i].a.fRTL,
+ items[i].a.fLayoutRTL,
+ items[i].a.fLogicalOrder,
+ HB_UNTAG (hb_uint32_swap (script_tags[i])));
+
+ glyphs_offset += glyphs_len;
+ }
+ glyphs_len = glyphs_offset;
+
+ /* Ok, we've got everything we need, now compose output buffer,
+ * very, *very*, carefully! */
+
+ /* Calculate visual-clusters. That's what we ship. */
+ for (unsigned int i = 0; i < glyphs_len; i++)
+ vis_clusters[i] = (uint32_t) -1;
+ for (unsigned int i = 0; i < buffer->len; i++) {
+ uint32_t *p = &vis_clusters[log_clusters[buffer->info[i].utf16_index()]];
+ *p = hb_min (*p, buffer->info[i].cluster);
+ }
+ for (unsigned int i = 1; i < glyphs_len; i++)
+ if (vis_clusters[i] == (uint32_t) -1)
+ vis_clusters[i] = vis_clusters[i - 1];
+
+#undef utf16_index
+
+ if (unlikely (!buffer->ensure (glyphs_len)))
+ FAIL ("Buffer in error");
+
+#undef FAIL
+
+ /* Set glyph infos */
+ buffer->len = 0;
+ for (unsigned int i = 0; i < glyphs_len; i++)
+ {
+ hb_glyph_info_t *info = &buffer->info[buffer->len++];
+
+ info->codepoint = glyphs[i];
+ info->cluster = vis_clusters[i];
+
+ /* The rest is crap. Let's store position info there for now. */
+ info->mask = advances[i];
+ info->var1.i32 = offsets[i].du;
+ info->var2.i32 = offsets[i].dv;
+ }
+
+ /* Set glyph positions */
+ buffer->clear_positions ();
+ double x_mult = font_data->x_mult, y_mult = font_data->y_mult;
+ for (unsigned int i = 0; i < glyphs_len; i++)
+ {
+ hb_glyph_info_t *info = &buffer->info[i];
+ hb_glyph_position_t *pos = &buffer->pos[i];
+
+ /* TODO vertical */
+ pos->x_advance = x_mult * (int32_t) info->mask;
+ pos->x_offset = x_mult * (backward ? -info->var1.i32 : info->var1.i32);
+ pos->y_offset = y_mult * info->var2.i32;
+ }
+
+ if (backward)
+ hb_buffer_reverse (buffer);
+
+ buffer->unsafe_to_break_all ();
+
+ /* Wow, done! */
+ return true;
+}
+
+
+#endif
diff --git a/thirdparty/harfbuzz/src/hb-uniscribe.h b/thirdparty/harfbuzz/src/hb-uniscribe.h
new file mode 100644
index 0000000000..4e4ef9986a
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-uniscribe.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright © 2011 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_UNISCRIBE_H
+#define HB_UNISCRIBE_H
+
+#include "hb.h"
+
+#include <windows.h>
+
+HB_BEGIN_DECLS
+
+
+HB_EXTERN LOGFONTW *
+hb_uniscribe_font_get_logfontw (hb_font_t *font);
+
+HB_EXTERN HFONT
+hb_uniscribe_font_get_hfont (hb_font_t *font);
+
+
+HB_END_DECLS
+
+#endif /* HB_UNISCRIBE_H */
diff --git a/thirdparty/harfbuzz/src/hb-utf.hh b/thirdparty/harfbuzz/src/hb-utf.hh
new file mode 100644
index 0000000000..ff5712d16d
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-utf.hh
@@ -0,0 +1,453 @@
+/*
+ * Copyright © 2011,2012,2014 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_UTF_HH
+#define HB_UTF_HH
+
+#include "hb.hh"
+
+#include "hb-open-type.hh"
+
+
+struct hb_utf8_t
+{
+ typedef uint8_t codepoint_t;
+
+ static const codepoint_t *
+ next (const codepoint_t *text,
+ const codepoint_t *end,
+ hb_codepoint_t *unicode,
+ hb_codepoint_t replacement)
+ {
+ /* Written to only accept well-formed sequences.
+ * Based on ideas from ICU's U8_NEXT.
+ * Generates one "replacement" for each ill-formed byte. */
+
+ hb_codepoint_t c = *text++;
+
+ if (c > 0x7Fu)
+ {
+ if (hb_in_range<hb_codepoint_t> (c, 0xC2u, 0xDFu)) /* Two-byte */
+ {
+ unsigned int t1;
+ if (likely (text < end &&
+ (t1 = text[0] - 0x80u) <= 0x3Fu))
+ {
+ c = ((c&0x1Fu)<<6) | t1;
+ text++;
+ }
+ else
+ goto error;
+ }
+ else if (hb_in_range<hb_codepoint_t> (c, 0xE0u, 0xEFu)) /* Three-byte */
+ {
+ unsigned int t1, t2;
+ if (likely (1 < end - text &&
+ (t1 = text[0] - 0x80u) <= 0x3Fu &&
+ (t2 = text[1] - 0x80u) <= 0x3Fu))
+ {
+ c = ((c&0xFu)<<12) | (t1<<6) | t2;
+ if (unlikely (c < 0x0800u || hb_in_range<hb_codepoint_t> (c, 0xD800u, 0xDFFFu)))
+ goto error;
+ text += 2;
+ }
+ else
+ goto error;
+ }
+ else if (hb_in_range<hb_codepoint_t> (c, 0xF0u, 0xF4u)) /* Four-byte */
+ {
+ unsigned int t1, t2, t3;
+ if (likely (2 < end - text &&
+ (t1 = text[0] - 0x80u) <= 0x3Fu &&
+ (t2 = text[1] - 0x80u) <= 0x3Fu &&
+ (t3 = text[2] - 0x80u) <= 0x3Fu))
+ {
+ c = ((c&0x7u)<<18) | (t1<<12) | (t2<<6) | t3;
+ if (unlikely (!hb_in_range<hb_codepoint_t> (c, 0x10000u, 0x10FFFFu)))
+ goto error;
+ text += 3;
+ }
+ else
+ goto error;
+ }
+ else
+ goto error;
+ }
+
+ *unicode = c;
+ return text;
+
+ error:
+ *unicode = replacement;
+ return text;
+ }
+
+ static const codepoint_t *
+ prev (const codepoint_t *text,
+ const codepoint_t *start,
+ hb_codepoint_t *unicode,
+ hb_codepoint_t replacement)
+ {
+ const codepoint_t *end = text--;
+ while (start < text && (*text & 0xc0) == 0x80 && end - text < 4)
+ text--;
+
+ if (likely (next (text, end, unicode, replacement) == end))
+ return text;
+
+ *unicode = replacement;
+ return end - 1;
+ }
+
+ static unsigned int
+ strlen (const codepoint_t *text)
+ { return ::strlen ((const char *) text); }
+
+ static unsigned int
+ encode_len (hb_codepoint_t unicode)
+ {
+ if (unicode < 0x0080u) return 1;
+ if (unicode < 0x0800u) return 2;
+ if (unicode < 0x10000u) return 3;
+ if (unicode < 0x110000u) return 4;
+ return 3;
+ }
+
+ static codepoint_t *
+ encode (codepoint_t *text,
+ const codepoint_t *end,
+ hb_codepoint_t unicode)
+ {
+ if (unlikely (unicode >= 0xD800u && (unicode <= 0xDFFFu || unicode > 0x10FFFFu)))
+ unicode = 0xFFFDu;
+ if (unicode < 0x0080u)
+ *text++ = unicode;
+ else if (unicode < 0x0800u)
+ {
+ if (end - text >= 2)
+ {
+ *text++ = 0xC0u + (0x1Fu & (unicode >> 6));
+ *text++ = 0x80u + (0x3Fu & (unicode ));
+ }
+ }
+ else if (unicode < 0x10000u)
+ {
+ if (end - text >= 3)
+ {
+ *text++ = 0xE0u + (0x0Fu & (unicode >> 12));
+ *text++ = 0x80u + (0x3Fu & (unicode >> 6));
+ *text++ = 0x80u + (0x3Fu & (unicode ));
+ }
+ }
+ else
+ {
+ if (end - text >= 4)
+ {
+ *text++ = 0xF0u + (0x07u & (unicode >> 18));
+ *text++ = 0x80u + (0x3Fu & (unicode >> 12));
+ *text++ = 0x80u + (0x3Fu & (unicode >> 6));
+ *text++ = 0x80u + (0x3Fu & (unicode ));
+ }
+ }
+ return text;
+ }
+};
+
+
+template <typename TCodepoint>
+struct hb_utf16_xe_t
+{
+ static_assert (sizeof (TCodepoint) == 2, "");
+ typedef TCodepoint codepoint_t;
+
+ static const codepoint_t *
+ next (const codepoint_t *text,
+ const codepoint_t *end,
+ hb_codepoint_t *unicode,
+ hb_codepoint_t replacement)
+ {
+ hb_codepoint_t c = *text++;
+
+ if (likely (!hb_in_range<hb_codepoint_t> (c, 0xD800u, 0xDFFFu)))
+ {
+ *unicode = c;
+ return text;
+ }
+
+ if (likely (c <= 0xDBFFu && text < end))
+ {
+ /* High-surrogate in c */
+ hb_codepoint_t l = *text;
+ if (likely (hb_in_range<hb_codepoint_t> (l, 0xDC00u, 0xDFFFu)))
+ {
+ /* Low-surrogate in l */
+ *unicode = (c << 10) + l - ((0xD800u << 10) - 0x10000u + 0xDC00u);
+ text++;
+ return text;
+ }
+ }
+
+ /* Lonely / out-of-order surrogate. */
+ *unicode = replacement;
+ return text;
+ }
+
+ static const codepoint_t *
+ prev (const codepoint_t *text,
+ const codepoint_t *start,
+ hb_codepoint_t *unicode,
+ hb_codepoint_t replacement)
+ {
+ hb_codepoint_t c = *--text;
+
+ if (likely (!hb_in_range<hb_codepoint_t> (c, 0xD800u, 0xDFFFu)))
+ {
+ *unicode = c;
+ return text;
+ }
+
+ if (likely (c >= 0xDC00u && start < text))
+ {
+ /* Low-surrogate in c */
+ hb_codepoint_t h = text[-1];
+ if (likely (hb_in_range<hb_codepoint_t> (h, 0xD800u, 0xDBFFu)))
+ {
+ /* High-surrogate in h */
+ *unicode = (h << 10) + c - ((0xD800u << 10) - 0x10000u + 0xDC00u);
+ text--;
+ return text;
+ }
+ }
+
+ /* Lonely / out-of-order surrogate. */
+ *unicode = replacement;
+ return text;
+ }
+
+
+ static unsigned int
+ strlen (const codepoint_t *text)
+ {
+ unsigned int l = 0;
+ while (*text++) l++;
+ return l;
+ }
+
+ static unsigned int
+ encode_len (hb_codepoint_t unicode)
+ {
+ return unicode < 0x10000 ? 1 : 2;
+ }
+
+ static codepoint_t *
+ encode (codepoint_t *text,
+ const codepoint_t *end,
+ hb_codepoint_t unicode)
+ {
+ if (unlikely (unicode >= 0xD800u && (unicode <= 0xDFFFu || unicode > 0x10FFFFu)))
+ unicode = 0xFFFDu;
+ if (unicode < 0x10000u)
+ *text++ = unicode;
+ else if (end - text >= 2)
+ {
+ unicode -= 0x10000u;
+ *text++ = 0xD800u + (unicode >> 10);
+ *text++ = 0xDC00u + (unicode & 0x03FFu);
+ }
+ return text;
+ }
+};
+
+typedef hb_utf16_xe_t<uint16_t> hb_utf16_t;
+typedef hb_utf16_xe_t<OT::HBUINT16> hb_utf16_be_t;
+
+
+template <typename TCodepoint, bool validate=true>
+struct hb_utf32_xe_t
+{
+ static_assert (sizeof (TCodepoint) == 4, "");
+ typedef TCodepoint codepoint_t;
+
+ static const TCodepoint *
+ next (const TCodepoint *text,
+ const TCodepoint *end HB_UNUSED,
+ hb_codepoint_t *unicode,
+ hb_codepoint_t replacement)
+ {
+ hb_codepoint_t c = *unicode = *text++;
+ if (validate && unlikely (c >= 0xD800u && (c <= 0xDFFFu || c > 0x10FFFFu)))
+ *unicode = replacement;
+ return text;
+ }
+
+ static const TCodepoint *
+ prev (const TCodepoint *text,
+ const TCodepoint *start HB_UNUSED,
+ hb_codepoint_t *unicode,
+ hb_codepoint_t replacement)
+ {
+ hb_codepoint_t c = *unicode = *--text;
+ if (validate && unlikely (c >= 0xD800u && (c <= 0xDFFFu || c > 0x10FFFFu)))
+ *unicode = replacement;
+ return text;
+ }
+
+ static unsigned int
+ strlen (const TCodepoint *text)
+ {
+ unsigned int l = 0;
+ while (*text++) l++;
+ return l;
+ }
+
+ static unsigned int
+ encode_len (hb_codepoint_t unicode HB_UNUSED)
+ {
+ return 1;
+ }
+
+ static codepoint_t *
+ encode (codepoint_t *text,
+ const codepoint_t *end HB_UNUSED,
+ hb_codepoint_t unicode)
+ {
+ if (validate && unlikely (unicode >= 0xD800u && (unicode <= 0xDFFFu || unicode > 0x10FFFFu)))
+ unicode = 0xFFFDu;
+ *text++ = unicode;
+ return text;
+ }
+};
+
+typedef hb_utf32_xe_t<uint32_t> hb_utf32_t;
+typedef hb_utf32_xe_t<uint32_t, false> hb_utf32_novalidate_t;
+
+
+struct hb_latin1_t
+{
+ typedef uint8_t codepoint_t;
+
+ static const codepoint_t *
+ next (const codepoint_t *text,
+ const codepoint_t *end HB_UNUSED,
+ hb_codepoint_t *unicode,
+ hb_codepoint_t replacement HB_UNUSED)
+ {
+ *unicode = *text++;
+ return text;
+ }
+
+ static const codepoint_t *
+ prev (const codepoint_t *text,
+ const codepoint_t *start HB_UNUSED,
+ hb_codepoint_t *unicode,
+ hb_codepoint_t replacement HB_UNUSED)
+ {
+ *unicode = *--text;
+ return text;
+ }
+
+ static unsigned int
+ strlen (const codepoint_t *text)
+ {
+ unsigned int l = 0;
+ while (*text++) l++;
+ return l;
+ }
+
+ static unsigned int
+ encode_len (hb_codepoint_t unicode HB_UNUSED)
+ {
+ return 1;
+ }
+
+ static codepoint_t *
+ encode (codepoint_t *text,
+ const codepoint_t *end HB_UNUSED,
+ hb_codepoint_t unicode)
+ {
+ if (unlikely (unicode >= 0x0100u))
+ unicode = '?';
+ *text++ = unicode;
+ return text;
+ }
+};
+
+
+struct hb_ascii_t
+{
+ typedef uint8_t codepoint_t;
+
+ static const codepoint_t *
+ next (const codepoint_t *text,
+ const codepoint_t *end HB_UNUSED,
+ hb_codepoint_t *unicode,
+ hb_codepoint_t replacement HB_UNUSED)
+ {
+ *unicode = *text++;
+ if (*unicode >= 0x0080u)
+ *unicode = replacement;
+ return text;
+ }
+
+ static const codepoint_t *
+ prev (const codepoint_t *text,
+ const codepoint_t *start HB_UNUSED,
+ hb_codepoint_t *unicode,
+ hb_codepoint_t replacement)
+ {
+ *unicode = *--text;
+ if (*unicode >= 0x0080u)
+ *unicode = replacement;
+ return text;
+ }
+
+ static unsigned int
+ strlen (const codepoint_t *text)
+ {
+ unsigned int l = 0;
+ while (*text++) l++;
+ return l;
+ }
+
+ static unsigned int
+ encode_len (hb_codepoint_t unicode HB_UNUSED)
+ {
+ return 1;
+ }
+
+ static codepoint_t *
+ encode (codepoint_t *text,
+ const codepoint_t *end HB_UNUSED,
+ hb_codepoint_t unicode)
+ {
+ if (unlikely (unicode >= 0x0080u))
+ unicode = '?';
+ *text++ = unicode;
+ return text;
+ }
+};
+
+#endif /* HB_UTF_HH */
diff --git a/thirdparty/harfbuzz/src/hb-vector.hh b/thirdparty/harfbuzz/src/hb-vector.hh
new file mode 100644
index 0000000000..079b94a6b4
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-vector.hh
@@ -0,0 +1,313 @@
+/*
+ * Copyright © 2017,2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_VECTOR_HH
+#define HB_VECTOR_HH
+
+#include "hb.hh"
+#include "hb-array.hh"
+#include "hb-null.hh"
+
+
+template <typename Type>
+struct hb_vector_t
+{
+ typedef Type item_t;
+ static constexpr unsigned item_size = hb_static_size (Type);
+
+ hb_vector_t () { init (); }
+ hb_vector_t (const hb_vector_t &o)
+ {
+ init ();
+ alloc (o.length);
+ hb_copy (o, *this);
+ }
+ hb_vector_t (hb_vector_t &&o)
+ {
+ allocated = o.allocated;
+ length = o.length;
+ arrayZ = o.arrayZ;
+ o.init ();
+ }
+ ~hb_vector_t () { fini (); }
+
+ private:
+ int allocated; /* == -1 means allocation failed. */
+ public:
+ unsigned int length;
+ public:
+ Type *arrayZ;
+
+ void init ()
+ {
+ allocated = length = 0;
+ arrayZ = nullptr;
+ }
+
+ void fini ()
+ {
+ free (arrayZ);
+ init ();
+ }
+ void fini_deep ()
+ {
+ unsigned int count = length;
+ for (unsigned int i = 0; i < count; i++)
+ arrayZ[i].fini ();
+ fini ();
+ }
+
+ void reset () { resize (0); }
+
+ hb_vector_t& operator = (const hb_vector_t &o)
+ {
+ reset ();
+ alloc (o.length);
+ hb_copy (o, *this);
+ return *this;
+ }
+ hb_vector_t& operator = (hb_vector_t &&o)
+ {
+ fini ();
+ allocated = o.allocated;
+ length = o.length;
+ arrayZ = o.arrayZ;
+ o.init ();
+ return *this;
+ }
+
+ hb_bytes_t as_bytes () const
+ { return hb_bytes_t ((const char *) arrayZ, length * item_size); }
+
+ bool operator == (const hb_vector_t &o) const { return as_array () == o.as_array (); }
+ bool operator != (const hb_vector_t &o) const { return !(*this == o); }
+ uint32_t hash () const { return as_array ().hash (); }
+
+ Type& operator [] (int i_)
+ {
+ unsigned int i = (unsigned int) i_;
+ if (unlikely (i >= length))
+ return Crap (Type);
+ return arrayZ[i];
+ }
+ const Type& operator [] (int i_) const
+ {
+ unsigned int i = (unsigned int) i_;
+ if (unlikely (i >= length))
+ return Null (Type);
+ return arrayZ[i];
+ }
+
+ Type& tail () { return (*this)[length - 1]; }
+ const Type& tail () const { return (*this)[length - 1]; }
+
+ explicit operator bool () const { return length; }
+ unsigned get_size () const { return length * item_size; }
+
+ /* Sink interface. */
+ template <typename T>
+ hb_vector_t& operator << (T&& v) { push (hb_forward<T> (v)); return *this; }
+
+ hb_array_t< Type> as_array () { return hb_array (arrayZ, length); }
+ hb_array_t<const Type> as_array () const { return hb_array (arrayZ, length); }
+
+ /* Iterator. */
+ typedef hb_array_t<const Type> iter_t;
+ typedef hb_array_t< Type> writer_t;
+ iter_t iter () const { return as_array (); }
+ writer_t writer () { return as_array (); }
+ operator iter_t () const { return iter (); }
+ operator writer_t () { return writer (); }
+
+ hb_array_t<const Type> sub_array (unsigned int start_offset, unsigned int count) const
+ { return as_array ().sub_array (start_offset, count); }
+ hb_array_t<const Type> sub_array (unsigned int start_offset, unsigned int *count = nullptr /* IN/OUT */) const
+ { return as_array ().sub_array (start_offset, count); }
+ hb_array_t<Type> sub_array (unsigned int start_offset, unsigned int count)
+ { return as_array ().sub_array (start_offset, count); }
+ hb_array_t<Type> sub_array (unsigned int start_offset, unsigned int *count = nullptr /* IN/OUT */)
+ { return as_array ().sub_array (start_offset, count); }
+
+ hb_sorted_array_t<Type> as_sorted_array ()
+ { return hb_sorted_array (arrayZ, length); }
+ hb_sorted_array_t<const Type> as_sorted_array () const
+ { return hb_sorted_array (arrayZ, length); }
+
+ template <typename T> explicit operator T * () { return arrayZ; }
+ template <typename T> explicit operator const T * () const { return arrayZ; }
+
+ Type * operator + (unsigned int i) { return arrayZ + i; }
+ const Type * operator + (unsigned int i) const { return arrayZ + i; }
+
+ Type *push ()
+ {
+ if (unlikely (!resize (length + 1)))
+ return &Crap (Type);
+ return &arrayZ[length - 1];
+ }
+ template <typename T>
+ Type *push (T&& v)
+ {
+ Type *p = push ();
+ *p = hb_forward<T> (v);
+ return p;
+ }
+
+ bool in_error () const { return allocated < 0; }
+
+ /* Allocate for size but don't adjust length. */
+ bool alloc (unsigned int size)
+ {
+ if (unlikely (allocated < 0))
+ return false;
+
+ if (likely (size <= (unsigned) allocated))
+ return true;
+
+ /* Reallocate */
+
+ unsigned int new_allocated = allocated;
+ while (size >= new_allocated)
+ new_allocated += (new_allocated >> 1) + 8;
+
+ Type *new_array = nullptr;
+ bool overflows =
+ (int) new_allocated < 0 ||
+ (new_allocated < (unsigned) allocated) ||
+ hb_unsigned_mul_overflows (new_allocated, sizeof (Type));
+ if (likely (!overflows))
+ new_array = (Type *) realloc (arrayZ, new_allocated * sizeof (Type));
+
+ if (unlikely (!new_array))
+ {
+ allocated = -1;
+ return false;
+ }
+
+ arrayZ = new_array;
+ allocated = new_allocated;
+
+ return true;
+ }
+
+ bool resize (int size_)
+ {
+ unsigned int size = size_ < 0 ? 0u : (unsigned int) size_;
+ if (!alloc (size))
+ return false;
+
+ if (size > length)
+ memset (arrayZ + length, 0, (size - length) * sizeof (*arrayZ));
+
+ length = size;
+ return true;
+ }
+
+ Type pop ()
+ {
+ if (!length) return Null (Type);
+ return hb_move (arrayZ[--length]); /* Does this move actually work? */
+ }
+
+ void remove (unsigned int i)
+ {
+ if (unlikely (i >= length))
+ return;
+ memmove (static_cast<void *> (&arrayZ[i]),
+ static_cast<void *> (&arrayZ[i + 1]),
+ (length - i - 1) * sizeof (Type));
+ length--;
+ }
+
+ void shrink (int size_)
+ {
+ unsigned int size = size_ < 0 ? 0u : (unsigned int) size_;
+ if (size < length)
+ length = size;
+ }
+
+ template <typename T>
+ Type *find (T v)
+ {
+ for (unsigned int i = 0; i < length; i++)
+ if (arrayZ[i] == v)
+ return &arrayZ[i];
+ return nullptr;
+ }
+ template <typename T>
+ const Type *find (T v) const
+ {
+ for (unsigned int i = 0; i < length; i++)
+ if (arrayZ[i] == v)
+ return &arrayZ[i];
+ return nullptr;
+ }
+
+ void qsort (int (*cmp)(const void*, const void*))
+ { as_array ().qsort (cmp); }
+ void qsort (unsigned int start = 0, unsigned int end = (unsigned int) -1)
+ { as_array ().qsort (start, end); }
+
+ template <typename T>
+ Type *lsearch (const T &x, Type *not_found = nullptr)
+ { return as_array ().lsearch (x, not_found); }
+ template <typename T>
+ const Type *lsearch (const T &x, const Type *not_found = nullptr) const
+ { return as_array ().lsearch (x, not_found); }
+ template <typename T>
+ bool lfind (const T &x, unsigned *pos = nullptr) const
+ { return as_array ().lfind (x, pos); }
+};
+
+template <typename Type>
+struct hb_sorted_vector_t : hb_vector_t<Type>
+{
+ hb_sorted_array_t< Type> as_array () { return hb_sorted_array (this->arrayZ, this->length); }
+ hb_sorted_array_t<const Type> as_array () const { return hb_sorted_array (this->arrayZ, this->length); }
+
+ /* Iterator. */
+ typedef hb_sorted_array_t<const Type> const_iter_t;
+ typedef hb_sorted_array_t< Type> iter_t;
+ const_iter_t iter () const { return as_array (); }
+ const_iter_t citer () const { return as_array (); }
+ iter_t iter () { return as_array (); }
+ operator iter_t () { return iter (); }
+ operator const_iter_t () const { return iter (); }
+
+ template <typename T>
+ Type *bsearch (const T &x, Type *not_found = nullptr)
+ { return as_array ().bsearch (x, not_found); }
+ template <typename T>
+ const Type *bsearch (const T &x, const Type *not_found = nullptr) const
+ { return as_array ().bsearch (x, not_found); }
+ template <typename T>
+ bool bfind (const T &x, unsigned int *i = nullptr,
+ hb_bfind_not_found_t not_found = HB_BFIND_NOT_FOUND_DONT_STORE,
+ unsigned int to_store = (unsigned int) -1) const
+ { return as_array ().bfind (x, i, not_found, to_store); }
+};
+
+#endif /* HB_VECTOR_HH */
diff --git a/thirdparty/harfbuzz/src/hb-version.h b/thirdparty/harfbuzz/src/hb-version.h
new file mode 100644
index 0000000000..92d61b8cdb
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-version.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright © 2011 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_H_IN
+#error "Include <hb.h> instead."
+#endif
+
+#ifndef HB_VERSION_H
+#define HB_VERSION_H
+
+#include "hb-common.h"
+
+HB_BEGIN_DECLS
+
+
+#define HB_VERSION_MAJOR 2
+#define HB_VERSION_MINOR 7
+#define HB_VERSION_MICRO 2
+
+#define HB_VERSION_STRING "2.7.2"
+
+#define HB_VERSION_ATLEAST(major,minor,micro) \
+ ((major)*10000+(minor)*100+(micro) <= \
+ HB_VERSION_MAJOR*10000+HB_VERSION_MINOR*100+HB_VERSION_MICRO)
+
+
+HB_EXTERN void
+hb_version (unsigned int *major,
+ unsigned int *minor,
+ unsigned int *micro);
+
+HB_EXTERN const char *
+hb_version_string (void);
+
+HB_EXTERN hb_bool_t
+hb_version_atleast (unsigned int major,
+ unsigned int minor,
+ unsigned int micro);
+
+
+HB_END_DECLS
+
+#endif /* HB_VERSION_H */
diff --git a/thirdparty/harfbuzz/src/hb.h b/thirdparty/harfbuzz/src/hb.h
new file mode 100644
index 0000000000..360686ca68
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_H
+#define HB_H
+#define HB_H_IN
+
+#include "hb-blob.h"
+#include "hb-buffer.h"
+#include "hb-common.h"
+#include "hb-deprecated.h"
+#include "hb-draw.h"
+#include "hb-face.h"
+#include "hb-font.h"
+#include "hb-map.h"
+#include "hb-set.h"
+#include "hb-shape.h"
+#include "hb-shape-plan.h"
+#include "hb-style.h"
+#include "hb-unicode.h"
+#include "hb-version.h"
+
+HB_BEGIN_DECLS
+HB_END_DECLS
+
+#undef HB_H_IN
+#endif /* HB_H */
diff --git a/thirdparty/harfbuzz/src/hb.hh b/thirdparty/harfbuzz/src/hb.hh
new file mode 100644
index 0000000000..274a0e98db
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb.hh
@@ -0,0 +1,634 @@
+/*
+ * Copyright © 2007,2008,2009 Red Hat, Inc.
+ * Copyright © 2011,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Red Hat Author(s): Behdad Esfahbod
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_HH
+#define HB_HH
+
+
+#ifndef HB_NO_PRAGMA_GCC_DIAGNOSTIC
+#ifdef _MSC_VER
+#pragma warning( disable: 4068 ) /* Unknown pragma */
+#endif
+#if defined(__GNUC__) || defined(__clang__)
+/* Rules:
+ *
+ * - All pragmas are declared GCC even if they are clang ones. Otherwise GCC
+ * nags, even though we instruct it to ignore -Wunknown-pragmas. ¯\_(ツ)_/¯
+ *
+ * - Within each category, keep sorted.
+ *
+ * - Warnings whose scope can be expanded in future compiler versions shall
+ * be declared as "warning". Otherwise, either ignored or error.
+ */
+
+/* Setup. Don't sort order within this category. */
+#ifndef HB_NO_PRAGMA_GCC_DIAGNOSTIC_WARNING
+#pragma GCC diagnostic warning "-Wall"
+#pragma GCC diagnostic warning "-Wextra"
+#endif
+#ifndef HB_NO_PRAGMA_GCC_DIAGNOSTIC_IGNORED
+#pragma GCC diagnostic ignored "-Wpragmas"
+#pragma GCC diagnostic ignored "-Wunknown-pragmas"
+#pragma GCC diagnostic ignored "-Wunknown-warning-option"
+#endif
+#ifndef HB_NO_PRAGMA_GCC_DIAGNOSTIC_WARNING
+//#pragma GCC diagnostic warning "-Weverything"
+#endif
+
+/* Error. Should never happen. */
+#ifndef HB_NO_PRAGMA_GCC_DIAGNOSTIC_ERROR
+#pragma GCC diagnostic error "-Wc++11-narrowing"
+#pragma GCC diagnostic error "-Wcast-align"
+#pragma GCC diagnostic error "-Wcast-function-type"
+#pragma GCC diagnostic error "-Wdelete-non-virtual-dtor"
+#pragma GCC diagnostic error "-Wembedded-directive"
+#pragma GCC diagnostic error "-Wextra-semi-stmt"
+#pragma GCC diagnostic error "-Wformat-security"
+#pragma GCC diagnostic error "-Wimplicit-function-declaration"
+#pragma GCC diagnostic error "-Winit-self"
+#pragma GCC diagnostic error "-Winjected-class-name"
+#pragma GCC diagnostic error "-Wmissing-braces"
+#pragma GCC diagnostic error "-Wmissing-declarations"
+#pragma GCC diagnostic error "-Wmissing-prototypes"
+#pragma GCC diagnostic error "-Wnested-externs"
+#pragma GCC diagnostic error "-Wold-style-definition"
+#pragma GCC diagnostic error "-Wpointer-arith"
+#pragma GCC diagnostic error "-Wredundant-decls"
+#pragma GCC diagnostic error "-Wreorder"
+#pragma GCC diagnostic error "-Wsign-compare"
+#pragma GCC diagnostic error "-Wstrict-prototypes"
+#pragma GCC diagnostic error "-Wstring-conversion"
+#pragma GCC diagnostic error "-Wswitch-enum"
+#pragma GCC diagnostic error "-Wtautological-overlap-compare"
+#pragma GCC diagnostic error "-Wunneeded-internal-declaration"
+#pragma GCC diagnostic error "-Wunused"
+#pragma GCC diagnostic error "-Wunused-local-typedefs"
+#pragma GCC diagnostic error "-Wunused-value"
+#pragma GCC diagnostic error "-Wunused-variable"
+#pragma GCC diagnostic error "-Wvla"
+#pragma GCC diagnostic error "-Wwrite-strings"
+#endif
+
+/* Warning. To be investigated if happens. */
+#ifndef HB_NO_PRAGMA_GCC_DIAGNOSTIC_WARNING
+#pragma GCC diagnostic warning "-Wbuiltin-macro-redefined"
+#pragma GCC diagnostic warning "-Wdeprecated"
+#pragma GCC diagnostic warning "-Wdeprecated-declarations"
+#pragma GCC diagnostic warning "-Wdisabled-optimization"
+#pragma GCC diagnostic warning "-Wdouble-promotion"
+#pragma GCC diagnostic warning "-Wformat=2"
+#pragma GCC diagnostic warning "-Wignored-pragma-optimize"
+#pragma GCC diagnostic warning "-Wlogical-op"
+#pragma GCC diagnostic warning "-Wmaybe-uninitialized"
+#pragma GCC diagnostic warning "-Wmissing-format-attribute"
+#pragma GCC diagnostic warning "-Wundef"
+#pragma GCC diagnostic warning "-Wunused-but-set-variable"
+#endif
+
+/* Ignored currently, but should be fixed at some point. */
+#ifndef HB_NO_PRAGMA_GCC_DIAGNOSTIC_IGNORED
+#pragma GCC diagnostic ignored "-Wconversion" // TODO fix
+#pragma GCC diagnostic ignored "-Wformat-signedness" // TODO fix
+#pragma GCC diagnostic ignored "-Wshadow" // TODO fix
+#pragma GCC diagnostic ignored "-Wunsafe-loop-optimizations" // TODO fix
+#pragma GCC diagnostic ignored "-Wunused-parameter" // TODO fix
+#endif
+
+/* Ignored intentionally. */
+#ifndef HB_NO_PRAGMA_GCC_DIAGNOSTIC_IGNORED
+#pragma GCC diagnostic ignored "-Wclass-memaccess"
+#pragma GCC diagnostic ignored "-Wformat-nonliteral"
+#pragma GCC diagnostic ignored "-Wformat-zero-length"
+#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+#pragma GCC diagnostic ignored "-Wpacked" // Erratic impl in clang
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wtype-limits"
+#pragma GCC diagnostic ignored "-Wc++11-compat" // only gcc raises it
+#endif
+
+#endif
+#endif
+
+
+#include "hb-config.hh"
+
+
+/*
+ * Following added based on what AC_USE_SYSTEM_EXTENSIONS adds to
+ * config.h.in. Copied here for the convenience of those embedding
+ * HarfBuzz and not using our build system.
+ */
+/* Enable extensions on AIX 3, Interix. */
+#ifndef _ALL_SOURCE
+# define _ALL_SOURCE 1
+#endif
+/* Enable GNU extensions on systems that have them. */
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE 1
+#endif
+/* Enable threading extensions on Solaris. */
+#ifndef _POSIX_PTHREAD_SEMANTICS
+# define _POSIX_PTHREAD_SEMANTICS 1
+#endif
+/* Enable extensions on HP NonStop. */
+#ifndef _TANDEM_SOURCE
+# define _TANDEM_SOURCE 1
+#endif
+/* Enable general extensions on Solaris. */
+#ifndef __EXTENSIONS__
+# define __EXTENSIONS__ 1
+#endif
+
+#if defined (_MSC_VER) && defined (HB_DLL_EXPORT)
+#define HB_EXTERN __declspec (dllexport) extern
+#endif
+
+#include "hb.h"
+#define HB_H_IN
+#include "hb-ot.h"
+#define HB_OT_H_IN
+#include "hb-aat.h"
+#define HB_AAT_H_IN
+
+#include <limits.h>
+#include <math.h>
+#include <float.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <assert.h>
+#include <stdio.h>
+#include <stdarg.h>
+
+#if (defined(_MSC_VER) && _MSC_VER >= 1500) || defined(__MINGW32__)
+#ifdef __MINGW32_VERSION
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN 1
+#endif
+#else
+#include <intrin.h>
+#endif
+#endif
+
+#ifdef _WIN32
+#include <windows.h>
+#include <winapifamily.h>
+#endif
+
+#define HB_PASTE1(a,b) a##b
+#define HB_PASTE(a,b) HB_PASTE1(a,b)
+
+
+/* Compile-time custom allocator support. */
+
+#if !defined(HB_CUSTOM_MALLOC) \
+ && defined(hb_malloc_impl) \
+ && defined(hb_calloc_impl) \
+ && defined(hb_realloc_impl) \
+ && defined(hb_free_impl)
+#define HB_CUSTOM_MALLOC
+#endif
+
+#ifdef HB_CUSTOM_MALLOC
+extern "C" void* hb_malloc_impl(size_t size);
+extern "C" void* hb_calloc_impl(size_t nmemb, size_t size);
+extern "C" void* hb_realloc_impl(void *ptr, size_t size);
+extern "C" void hb_free_impl(void *ptr);
+#define malloc hb_malloc_impl
+#define calloc hb_calloc_impl
+#define realloc hb_realloc_impl
+#define free hb_free_impl
+#endif
+
+
+/*
+ * Compiler attributes
+ */
+
+#if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE__)
+#define likely(expr) (__builtin_expect (!!(expr), 1))
+#define unlikely(expr) (__builtin_expect (!!(expr), 0))
+#else
+#define likely(expr) (expr)
+#define unlikely(expr) (expr)
+#endif
+
+#if !defined(__GNUC__) && !defined(__clang__)
+#undef __attribute__
+#define __attribute__(x)
+#endif
+
+#if defined(__GNUC__) && (__GNUC__ >= 3)
+#define HB_PURE_FUNC __attribute__((pure))
+#define HB_CONST_FUNC __attribute__((const))
+#define HB_PRINTF_FUNC(format_idx, arg_idx) __attribute__((__format__ (__printf__, format_idx, arg_idx)))
+#else
+#define HB_PURE_FUNC
+#define HB_CONST_FUNC
+#define HB_PRINTF_FUNC(format_idx, arg_idx)
+#endif
+#if defined(__GNUC__) && (__GNUC__ >= 4) || (__clang__)
+#define HB_UNUSED __attribute__((unused))
+#elif defined(_MSC_VER) /* https://github.com/harfbuzz/harfbuzz/issues/635 */
+#define HB_UNUSED __pragma(warning(suppress: 4100 4101))
+#else
+#define HB_UNUSED
+#endif
+
+#ifndef HB_INTERNAL
+# if !defined(HB_NO_VISIBILITY) && !defined(__MINGW32__) && !defined(__CYGWIN__) && !defined(_MSC_VER) && !defined(__SUNPRO_CC)
+# define HB_INTERNAL __attribute__((__visibility__("hidden")))
+# elif defined(__MINGW32__)
+ /* We use -export-symbols on mingw32, since it does not support visibility attributes. */
+# define HB_INTERNAL
+# elif defined (_MSC_VER) && defined (HB_DLL_EXPORT)
+ /* We do not try to export internal symbols on Visual Studio */
+# define HB_INTERNAL
+#else
+# define HB_INTERNAL
+# define HB_NO_VISIBILITY 1
+# endif
+#endif
+
+/* https://github.com/harfbuzz/harfbuzz/issues/1651 */
+#if defined(__clang__) && __clang_major__ < 10
+#define static_const static
+#else
+#define static_const static const
+#endif
+
+#if defined(__GNUC__) && (__GNUC__ >= 3)
+#define HB_FUNC __PRETTY_FUNCTION__
+#elif defined(_MSC_VER)
+#define HB_FUNC __FUNCSIG__
+#else
+#define HB_FUNC __func__
+#endif
+
+#if defined(__SUNPRO_CC) && (__SUNPRO_CC < 0x5140)
+/* https://github.com/harfbuzz/harfbuzz/issues/630 */
+#define __restrict
+#endif
+
+/*
+ * Borrowed from https://bugzilla.mozilla.org/show_bug.cgi?id=1215411
+ * HB_FALLTHROUGH is an annotation to suppress compiler warnings about switch
+ * cases that fall through without a break or return statement. HB_FALLTHROUGH
+ * is only needed on cases that have code:
+ *
+ * switch (foo) {
+ * case 1: // These cases have no code. No fallthrough annotations are needed.
+ * case 2:
+ * case 3:
+ * foo = 4; // This case has code, so a fallthrough annotation is needed:
+ * HB_FALLTHROUGH;
+ * default:
+ * return foo;
+ * }
+ */
+#if defined(__clang__) && __cplusplus >= 201103L
+ /* clang's fallthrough annotations are only available starting in C++11. */
+# define HB_FALLTHROUGH [[clang::fallthrough]]
+#elif defined(__GNUC__) && (__GNUC__ >= 7)
+ /* GNU fallthrough attribute is available from GCC7 */
+# define HB_FALLTHROUGH __attribute__((fallthrough))
+#elif defined(_MSC_VER)
+ /*
+ * MSVC's __fallthrough annotations are checked by /analyze (Code Analysis):
+ * https://msdn.microsoft.com/en-us/library/ms235402%28VS.80%29.aspx
+ */
+# include <sal.h>
+# define HB_FALLTHROUGH __fallthrough
+#else
+# define HB_FALLTHROUGH /* FALLTHROUGH */
+#endif
+
+/* A tag to enforce use of return value for a function */
+#if __cplusplus >= 201703L
+# define HB_NODISCARD [[nodiscard]]
+#elif defined(__GNUC__) || defined(__clang__)
+# define HB_NODISCARD __attribute__((warn_unused_result))
+#elif defined(_MSC_VER)
+# define HB_NODISCARD _Check_return_
+#else
+# define HB_NODISCARD
+#endif
+#define hb_success_t HB_NODISCARD bool
+
+/* https://github.com/harfbuzz/harfbuzz/issues/1852 */
+#if defined(__clang__) && !(defined(_AIX) && (defined(__IBMCPP__) || defined(__ibmxl__)))
+/* Disable certain sanitizer errors. */
+/* https://github.com/harfbuzz/harfbuzz/issues/1247 */
+#define HB_NO_SANITIZE_SIGNED_INTEGER_OVERFLOW __attribute__((no_sanitize("signed-integer-overflow")))
+#else
+#define HB_NO_SANITIZE_SIGNED_INTEGER_OVERFLOW
+#endif
+
+
+#ifdef _WIN32
+ /* We need Windows Vista for both Uniscribe backend and for
+ * MemoryBarrier. We don't support compiling on Windows XP,
+ * though we run on it fine. */
+# if defined(_WIN32_WINNT) && _WIN32_WINNT < 0x0600
+# undef _WIN32_WINNT
+# endif
+# ifndef _WIN32_WINNT
+# if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+# define _WIN32_WINNT 0x0600
+# endif
+# endif
+# ifndef WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN 1
+# endif
+# ifndef STRICT
+# define STRICT 1
+# endif
+
+# if defined(_WIN32_WCE)
+ /* Some things not defined on Windows CE. */
+# define vsnprintf _vsnprintf
+# ifndef HB_NO_GETENV
+# define HB_NO_GETENV
+# endif
+# if _WIN32_WCE < 0x800
+# define HB_NO_SETLOCALE
+# define HB_NO_ERRNO
+# endif
+# elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+# ifndef HB_NO_GETENV
+# define HB_NO_GETENV
+# endif
+# endif
+# if defined(_MSC_VER) && _MSC_VER < 1900
+# define snprintf _snprintf
+# endif
+#endif
+
+#ifdef HB_NO_GETENV
+#define getenv(Name) nullptr
+#endif
+
+#ifndef HB_NO_ERRNO
+# include <errno.h>
+#else
+static int HB_UNUSED _hb_errno = 0;
+# undef errno
+# define errno _hb_errno
+#endif
+
+#if defined(HAVE_ATEXIT) && !defined(HB_USE_ATEXIT)
+/* atexit() is only safe to be called from shared libraries on certain
+ * platforms. Whitelist.
+ * https://bugs.freedesktop.org/show_bug.cgi?id=82246 */
+# if defined(__linux) && defined(__GLIBC_PREREQ)
+# if __GLIBC_PREREQ(2,3)
+/* From atexit() manpage, it's safe with glibc 2.2.3 on Linux. */
+# define HB_USE_ATEXIT 1
+# endif
+# elif defined(_MSC_VER) || defined(__MINGW32__)
+/* For MSVC:
+ * https://msdn.microsoft.com/en-us/library/tze57ck3.aspx
+ * https://msdn.microsoft.com/en-us/library/zk17ww08.aspx
+ * mingw32 headers say atexit is safe to use in shared libraries.
+ */
+# define HB_USE_ATEXIT 1
+# elif defined(__ANDROID__)
+/* This is available since Android NKD r8 or r8b:
+ * https://issuetracker.google.com/code/p/android/issues/detail?id=6455
+ */
+# define HB_USE_ATEXIT 1
+# elif defined(__APPLE__)
+/* For macOS and related platforms, the atexit man page indicates
+ * that it will be invoked when the library is unloaded, not only
+ * at application exit.
+ */
+# define HB_USE_ATEXIT 1
+# endif
+#endif
+#ifdef HB_NO_ATEXIT
+# undef HB_USE_ATEXIT
+#endif
+#ifndef HB_USE_ATEXIT
+# define HB_USE_ATEXIT 0
+#endif
+
+#define HB_STMT_START do
+#define HB_STMT_END while (0)
+
+/* Static-assert as expression. */
+template <unsigned int cond> class hb_assert_constant_t;
+template <> class hb_assert_constant_t<1> {};
+#define ASSERT_STATIC_EXPR_ZERO(_cond) (0 * (unsigned int) sizeof (hb_assert_constant_t<_cond>))
+
+/* Lets assert int types. Saves trouble down the road. */
+static_assert ((sizeof (int8_t) == 1), "");
+static_assert ((sizeof (uint8_t) == 1), "");
+static_assert ((sizeof (int16_t) == 2), "");
+static_assert ((sizeof (uint16_t) == 2), "");
+static_assert ((sizeof (int32_t) == 4), "");
+static_assert ((sizeof (uint32_t) == 4), "");
+static_assert ((sizeof (int64_t) == 8), "");
+static_assert ((sizeof (uint64_t) == 8), "");
+static_assert ((sizeof (hb_codepoint_t) == 4), "");
+static_assert ((sizeof (hb_position_t) == 4), "");
+static_assert ((sizeof (hb_mask_t) == 4), "");
+static_assert ((sizeof (hb_var_int_t) == 4), "");
+
+#define HB_DELETE_COPY_ASSIGN(TypeName) \
+ TypeName(const TypeName&) = delete; \
+ void operator=(const TypeName&) = delete
+#define HB_DELETE_CREATE_COPY_ASSIGN(TypeName) \
+ TypeName() = delete; \
+ TypeName(const TypeName&) = delete; \
+ void operator=(const TypeName&) = delete
+
+
+/* Flags */
+
+/* Enable bitwise ops on enums marked as flags_t */
+/* To my surprise, looks like the function resolver is happy to silently cast
+ * one enum to another... So this doesn't provide the type-checking that I
+ * originally had in mind... :(.
+ *
+ * For MSVC warnings, see: https://github.com/harfbuzz/harfbuzz/pull/163
+ */
+#ifdef _MSC_VER
+# pragma warning(disable:4200)
+# pragma warning(disable:4800)
+#endif
+#define HB_MARK_AS_FLAG_T(T) \
+ extern "C++" { \
+ static inline T operator | (T l, T r) { return T ((unsigned) l | (unsigned) r); } \
+ static inline T operator & (T l, T r) { return T ((unsigned) l & (unsigned) r); } \
+ static inline T operator ^ (T l, T r) { return T ((unsigned) l ^ (unsigned) r); } \
+ static inline T operator ~ (T r) { return T (~(unsigned int) r); } \
+ static inline T& operator |= (T &l, T r) { l = l | r; return l; } \
+ static inline T& operator &= (T& l, T r) { l = l & r; return l; } \
+ static inline T& operator ^= (T& l, T r) { l = l ^ r; return l; } \
+ } \
+ static_assert (true, "")
+
+/* Useful for set-operations on small enums.
+ * For example, for testing "x ∈ {x1, x2, x3}" use:
+ * (FLAG_UNSAFE(x) & (FLAG(x1) | FLAG(x2) | FLAG(x3)))
+ */
+#define FLAG(x) (ASSERT_STATIC_EXPR_ZERO ((unsigned)(x) < 32) + (((uint32_t) 1U) << (unsigned)(x)))
+#define FLAG_UNSAFE(x) ((unsigned)(x) < 32 ? (((uint32_t) 1U) << (unsigned)(x)) : 0)
+#define FLAG_RANGE(x,y) (ASSERT_STATIC_EXPR_ZERO ((x) < (y)) + FLAG(y+1) - FLAG(x))
+#define FLAG64(x) (ASSERT_STATIC_EXPR_ZERO ((unsigned)(x) < 64) + (((uint64_t) 1ULL) << (unsigned)(x)))
+#define FLAG64_UNSAFE(x) ((unsigned)(x) < 64 ? (((uint64_t) 1ULL) << (unsigned)(x)) : 0)
+
+
+/* Size signifying variable-sized array */
+#ifndef HB_VAR_ARRAY
+#define HB_VAR_ARRAY 1
+#endif
+
+static inline float
+_hb_roundf (float x) { return floorf (x + .5f); }
+#define roundf(x) _hb_roundf(x)
+
+/* Endian swap, used in Windows related backends */
+static inline uint16_t hb_uint16_swap (const uint16_t v)
+{ return (v >> 8) | (v << 8); }
+static inline uint32_t hb_uint32_swap (const uint32_t v)
+{ return (hb_uint16_swap (v) << 16) | hb_uint16_swap (v >> 16); }
+
+/*
+ * Big-endian integers. Here because fundamental.
+ */
+
+template <typename Type, int Bytes> struct BEInt;
+
+template <typename Type>
+struct BEInt<Type, 1>
+{
+ public:
+ BEInt<Type, 1>& operator = (Type V)
+ {
+ v = V;
+ return *this;
+ }
+ operator Type () const { return v; }
+ private: uint8_t v;
+};
+template <typename Type>
+struct BEInt<Type, 2>
+{
+ public:
+ BEInt<Type, 2>& operator = (Type V)
+ {
+ v[0] = (V >> 8) & 0xFF;
+ v[1] = (V ) & 0xFF;
+ return *this;
+ }
+ operator Type () const
+ {
+#if ((defined(__GNUC__) && __GNUC__ >= 5) || defined(__clang__)) && \
+ defined(__BYTE_ORDER) && \
+ (__BYTE_ORDER == __LITTLE_ENDIAN || __BYTE_ORDER == __BIG_ENDIAN)
+ /* Spoon-feed the compiler a big-endian integer with alignment 1.
+ * https://github.com/harfbuzz/harfbuzz/pull/1398 */
+ struct __attribute__((packed)) packed_uint16_t { uint16_t v; };
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ return __builtin_bswap16 (((packed_uint16_t *) this)->v);
+#else /* __BYTE_ORDER == __BIG_ENDIAN */
+ return ((packed_uint16_t *) this)->v;
+#endif
+#endif
+ return (v[0] << 8)
+ + (v[1] );
+ }
+ private: uint8_t v[2];
+};
+template <typename Type>
+struct BEInt<Type, 3>
+{
+ public:
+ BEInt<Type, 3>& operator = (Type V)
+ {
+ v[0] = (V >> 16) & 0xFF;
+ v[1] = (V >> 8) & 0xFF;
+ v[2] = (V ) & 0xFF;
+ return *this;
+ }
+ operator Type () const
+ {
+ return (v[0] << 16)
+ + (v[1] << 8)
+ + (v[2] );
+ }
+ private: uint8_t v[3];
+};
+template <typename Type>
+struct BEInt<Type, 4>
+{
+ public:
+ BEInt<Type, 4>& operator = (Type V)
+ {
+ v[0] = (V >> 24) & 0xFF;
+ v[1] = (V >> 16) & 0xFF;
+ v[2] = (V >> 8) & 0xFF;
+ v[3] = (V ) & 0xFF;
+ return *this;
+ }
+ operator Type () const
+ {
+ return (v[0] << 24)
+ + (v[1] << 16)
+ + (v[2] << 8)
+ + (v[3] );
+ }
+ private: uint8_t v[4];
+};
+
+
+/*
+ * For lack of a better place, put Zawgyi script hack here.
+ * https://github.com/harfbuzz/harfbuzz/issues/1162
+ */
+
+#define HB_SCRIPT_MYANMAR_ZAWGYI ((hb_script_t) HB_TAG ('Q','a','a','g'))
+
+
+/* Headers we include for everyone. Keep topologically sorted by dependency.
+ * They express dependency amongst themselves, but no other file should include
+ * them directly.*/
+#include "hb-meta.hh"
+#include "hb-mutex.hh"
+#include "hb-number.hh"
+#include "hb-atomic.hh" // Requires: hb-meta
+#include "hb-null.hh" // Requires: hb-meta
+#include "hb-algs.hh" // Requires: hb-meta hb-null hb-number
+#include "hb-iter.hh" // Requires: hb-algs hb-meta
+#include "hb-debug.hh" // Requires: hb-algs hb-atomic
+#include "hb-array.hh" // Requires: hb-algs hb-iter hb-null
+#include "hb-vector.hh" // Requires: hb-array hb-null
+#include "hb-object.hh" // Requires: hb-atomic hb-mutex hb-vector
+
+#endif /* HB_HH */
diff --git a/thirdparty/icu4c/APIChangeReport.md b/thirdparty/icu4c/APIChangeReport.md
new file mode 100644
index 0000000000..0cf9ed5bfc
--- /dev/null
+++ b/thirdparty/icu4c/APIChangeReport.md
@@ -0,0 +1,380 @@
+
+
+<!--
+ Copyright © 2019 and later: Unicode, Inc. and others.
+ License & terms of use: http://www.unicode.org/copyright.html
+-->
+
+# ICU4C API Comparison: ICU 67 with ICU 68
+
+> _Note_ Markdown format of this document is new for ICU 65.
+
+- [Removed from ICU 67](#removed)
+- [Deprecated or Obsoleted in ICU 68](#deprecated)
+- [Changed in ICU 68](#changed)
+- [Promoted to stable in ICU 68](#promoted)
+- [Added in ICU 68](#added)
+- [Other existing drafts in ICU 68](#other)
+- [Signature Simplifications](#simplifications)
+
+## Removed
+
+Removed from ICU 67
+
+| File | API | ICU 67 | ICU 68 |
+|---|---|---|---|
+| measunit.h | LocalArray&lt;MeasureUnit&gt; icu::MeasureUnit::splitToSingleUnits(int32_t&amp;, UErrorCode&amp;) const | InternalICU 67 | (missing)
+| measunit.h | int32_t icu::MeasureUnit::getIndex() const | Internal | (missing)
+| measunit.h | <tt>static</tt> int32_t icu::MeasureUnit::getIndexCount() | Internal | (missing)
+| measunit.h | <tt>static</tt> int32_t icu::MeasureUnit::internalGetIndexForTypeAndSubtype(const char*, const char*) | Internal | (missing)
+| nounit.h | UClassID icu::NoUnit::getDynamicClassID() const | DraftICU 60 | (missing)
+| nounit.h | icu::NoUnit::NoUnit(const NoUnit&amp;) | DraftICU 60 | (missing)
+| nounit.h | icu::NoUnit::~NoUnit() | DraftICU 60 | (missing)
+| nounit.h | <tt>static</tt> NoUnit icu::NoUnit::base() | DraftICU 60 | (missing)
+| nounit.h | <tt>static</tt> NoUnit icu::NoUnit::percent() | DraftICU 60 | (missing)
+| nounit.h | <tt>static</tt> NoUnit icu::NoUnit::permille() | DraftICU 60 | (missing)
+| nounit.h | <tt>static</tt> UClassID icu::NoUnit::getStaticClassID() | DraftICU 60 | (missing)
+| nounit.h | void* icu::NoUnit::clone() const | DraftICU 60 | (missing)
+
+## Deprecated
+
+Deprecated or Obsoleted in ICU 68
+
+| File | API | ICU 67 | ICU 68 |
+|---|---|---|---|
+| numberrangeformatter.h | UnicodeString icu::number::FormattedNumberRange::getFirstDecimal(UErrorCode&amp;) const | DraftICU 63 | DeprecatedICU 68
+| numberrangeformatter.h | UnicodeString icu::number::FormattedNumberRange::getSecondDecimal(UErrorCode&amp;) const | DraftICU 63 | DeprecatedICU 68
+| umachine.h | <tt>#define</tt> FALSE | StableICU 2.0 | DeprecatedICU 68
+| umachine.h | <tt>#define</tt> TRUE | StableICU 2.0 | DeprecatedICU 68
+
+## Changed
+
+Changed in ICU 68 (old, new)
+
+
+
+| File | API | ICU 67 | ICU 68 |
+|---|---|---|---|
+| bytestrie.h | BytesTrie&amp; icu::BytesTrie::resetToState64(uint64_t) | Draft→StableICU 65
+| bytestrie.h | uint64_t icu::BytesTrie::getState64() const | Draft→StableICU 65
+| localebuilder.h | UBool icu::LocaleBuilder::copyErrorTo(UErrorCode&amp;) const | Draft→StableICU 65
+| localematcher.h | Builder&amp; icu::LocaleMatcher::Builder::addSupportedLocale(const Locale&amp;) | Draft→StableICU 65
+| localematcher.h | Builder&amp; icu::LocaleMatcher::Builder::operator=(Builder&amp;&amp;) | Draft→StableICU 65
+| localematcher.h | Builder&amp; icu::LocaleMatcher::Builder::setDefaultLocale(const Locale*) | Draft→StableICU 65
+| localematcher.h | Builder&amp; icu::LocaleMatcher::Builder::setDemotionPerDesiredLocale(ULocMatchDemotion) | Draft→StableICU 65
+| localematcher.h | Builder&amp; icu::LocaleMatcher::Builder::setFavorSubtag(ULocMatchFavorSubtag) | Draft→StableICU 65
+| localematcher.h | Builder&amp; icu::LocaleMatcher::Builder::setSupportedLocales(Iter, Iter) | Draft→StableICU 65
+| localematcher.h | Builder&amp; icu::LocaleMatcher::Builder::setSupportedLocales(Locale::Iterator&amp;) | Draft→StableICU 65
+| localematcher.h | Builder&amp; icu::LocaleMatcher::Builder::setSupportedLocalesFromListString(StringPiece) | Draft→StableICU 65
+| localematcher.h | Builder&amp; icu::LocaleMatcher::Builder::setSupportedLocalesViaConverter(Iter, Iter, Conv) | Draft→StableICU 65
+| localematcher.h | Locale icu::LocaleMatcher::Result::makeResolvedLocale(UErrorCode&amp;) const | Draft→StableICU 65
+| localematcher.h | LocaleMatcher icu::LocaleMatcher::Builder::build(UErrorCode&amp;) const | Draft→StableICU 65
+| localematcher.h | LocaleMatcher&amp; icu::LocaleMatcher::operator=(LocaleMatcher&amp;&amp;) | Draft→StableICU 65
+| localematcher.h | Result icu::LocaleMatcher::getBestMatchResult(Locale::Iterator&amp;, UErrorCode&amp;) const | Draft→StableICU 65
+| localematcher.h | Result icu::LocaleMatcher::getBestMatchResult(const Locale&amp;, UErrorCode&amp;) const | Draft→StableICU 65
+| localematcher.h | Result&amp; icu::LocaleMatcher::Result::operator=(Result&amp;&amp;) | Draft→StableICU 65
+| localematcher.h | UBool icu::LocaleMatcher::Builder::copyErrorTo(UErrorCode&amp;) const | Draft→StableICU 65
+| localematcher.h | const Locale* icu::LocaleMatcher::Result::getDesiredLocale() const | Draft→StableICU 65
+| localematcher.h | const Locale* icu::LocaleMatcher::Result::getSupportedLocale() const | Draft→StableICU 65
+| localematcher.h | const Locale* icu::LocaleMatcher::getBestMatch(Locale::Iterator&amp;, UErrorCode&amp;) const | Draft→StableICU 65
+| localematcher.h | const Locale* icu::LocaleMatcher::getBestMatch(const Locale&amp;, UErrorCode&amp;) const | Draft→StableICU 65
+| localematcher.h | const Locale* icu::LocaleMatcher::getBestMatchForListString(StringPiece, UErrorCode&amp;) const | Draft→StableICU 65
+| localematcher.h | <tt>enum</tt> ULocMatchDemotion::ULOCMATCH_DEMOTION_NONE | Draft→StableICU 65
+| localematcher.h | <tt>enum</tt> ULocMatchDemotion::ULOCMATCH_DEMOTION_REGION | Draft→StableICU 65
+| localematcher.h | <tt>enum</tt> ULocMatchFavorSubtag::ULOCMATCH_FAVOR_LANGUAGE | Draft→StableICU 65
+| localematcher.h | <tt>enum</tt> ULocMatchFavorSubtag::ULOCMATCH_FAVOR_SCRIPT | Draft→StableICU 65
+| localematcher.h | icu::LocaleMatcher::Builder::Builder() | Draft→StableICU 65
+| localematcher.h | icu::LocaleMatcher::Builder::Builder(Builder&amp;&amp;) | Draft→StableICU 65
+| localematcher.h | icu::LocaleMatcher::Builder::~Builder() | Draft→StableICU 65
+| localematcher.h | icu::LocaleMatcher::LocaleMatcher(LocaleMatcher&amp;&amp;) | Draft→StableICU 65
+| localematcher.h | icu::LocaleMatcher::Result::Result(Result&amp;&amp;) | Draft→StableICU 65
+| localematcher.h | icu::LocaleMatcher::Result::~Result() | Draft→StableICU 65
+| localematcher.h | icu::LocaleMatcher::~LocaleMatcher() | Draft→StableICU 65
+| localematcher.h | int32_t icu::LocaleMatcher::Result::getDesiredIndex() const | Draft→StableICU 65
+| localematcher.h | int32_t icu::LocaleMatcher::Result::getSupportedIndex() const | Draft→StableICU 65
+| locid.h | UBool icu::Locale::ConvertingIterator&lt; Iter, Conv &gt;::hasNext() const override | Draft→StableICU 65
+| locid.h | UBool icu::Locale::Iterator::hasNext() const | Draft→StableICU 65
+| locid.h | UBool icu::Locale::RangeIterator&lt; Iter &gt;::hasNext() const override | Draft→StableICU 65
+| locid.h | const Locale&amp; icu::Locale::ConvertingIterator&lt; Iter, Conv &gt;::next() override | Draft→StableICU 65
+| locid.h | const Locale&amp; icu::Locale::Iterator::next() | Draft→StableICU 65
+| locid.h | const Locale&amp; icu::Locale::RangeIterator&lt; Iter &gt;::next() override | Draft→StableICU 65
+| locid.h | icu::Locale::ConvertingIterator&lt; Iter, Conv &gt;::ConvertingIterator(Iter, Iter, Conv) | Draft→StableICU 65
+| locid.h | icu::Locale::Iterator::~Iterator() | Draft→StableICU 65
+| locid.h | icu::Locale::RangeIterator&lt; Iter &gt;::RangeIterator(Iter, Iter) | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getBar() | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getDecade() | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getDotPerCentimeter() | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getDotPerInch() | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getEm() | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getMegapixel() | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getPascal() | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getPixel() | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getPixelPerCentimeter() | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getPixelPerInch() | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getThermUs() | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createBar(UErrorCode&amp;) | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createDecade(UErrorCode&amp;) | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createDotPerCentimeter(UErrorCode&amp;) | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createDotPerInch(UErrorCode&amp;) | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createEm(UErrorCode&amp;) | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createMegapixel(UErrorCode&amp;) | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createPascal(UErrorCode&amp;) | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createPixel(UErrorCode&amp;) | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createPixelPerCentimeter(UErrorCode&amp;) | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createPixelPerInch(UErrorCode&amp;) | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createThermUs(UErrorCode&amp;) | Draft→StableICU 65
+| numberformatter.h | StringClass icu::number::FormattedNumber::toDecimalNumber(UErrorCode&amp;) const | Draft→StableICU 65
+| numberrangeformatter.h | UnicodeString icu::number::FormattedNumberRange::getFirstDecimal(UErrorCode&amp;) const | DraftICU 63 | DeprecatedICU 68
+| numberrangeformatter.h | UnicodeString icu::number::FormattedNumberRange::getSecondDecimal(UErrorCode&amp;) const | DraftICU 63 | DeprecatedICU 68
+| reldatefmt.h | <tt>enum</tt> UDateAbsoluteUnit::UDAT_ABSOLUTE_HOUR | Draft→StableICU 65
+| reldatefmt.h | <tt>enum</tt> UDateAbsoluteUnit::UDAT_ABSOLUTE_MINUTE | Draft→StableICU 65
+| stringpiece.h | icu::StringPiece::StringPiece(T) | Draft→StableICU 65
+| ucal.h | int32_t ucal_getHostTimeZone(UChar*, int32_t, UErrorCode*) | Draft→StableICU 65
+| ucharstrie.h | UCharsTrie&amp; icu::UCharsTrie::resetToState64(uint64_t) | Draft→StableICU 65
+| ucharstrie.h | uint64_t icu::UCharsTrie::getState64() const | Draft→StableICU 65
+| uloc.h | UEnumeration* uloc_openAvailableByType(ULocAvailableType, UErrorCode*) | Draft→StableICU 65
+| uloc.h | <tt>enum</tt> ULocAvailableType::ULOC_AVAILABLE_DEFAULT | Draft→StableICU 65
+| uloc.h | <tt>enum</tt> ULocAvailableType::ULOC_AVAILABLE_ONLY_LEGACY_ALIASES | Draft→StableICU 65
+| uloc.h | <tt>enum</tt> ULocAvailableType::ULOC_AVAILABLE_WITH_LEGACY_ALIASES | Draft→StableICU 65
+| umachine.h | <tt>#define</tt> FALSE | StableICU 2.0 | DeprecatedICU 68
+| umachine.h | <tt>#define</tt> TRUE | StableICU 2.0 | DeprecatedICU 68
+| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UDATA_BUNDLE | Draft→StableICU 65
+| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UDATA_DATA_FILE | Draft→StableICU 65
+| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UDATA_RES_FILE | Draft→StableICU 65
+| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UDATA_START | Draft→StableICU 65
+
+## Promoted
+
+Promoted to stable in ICU 68
+
+| File | API | ICU 67 | ICU 68 |
+|---|---|---|---|
+| bytestrie.h | BytesTrie&amp; icu::BytesTrie::resetToState64(uint64_t) | Draft→StableICU 65
+| bytestrie.h | uint64_t icu::BytesTrie::getState64() const | Draft→StableICU 65
+| localebuilder.h | UBool icu::LocaleBuilder::copyErrorTo(UErrorCode&amp;) const | Draft→StableICU 65
+| localematcher.h | Builder&amp; icu::LocaleMatcher::Builder::addSupportedLocale(const Locale&amp;) | Draft→StableICU 65
+| localematcher.h | Builder&amp; icu::LocaleMatcher::Builder::operator=(Builder&amp;&amp;) | Draft→StableICU 65
+| localematcher.h | Builder&amp; icu::LocaleMatcher::Builder::setDefaultLocale(const Locale*) | Draft→StableICU 65
+| localematcher.h | Builder&amp; icu::LocaleMatcher::Builder::setDemotionPerDesiredLocale(ULocMatchDemotion) | Draft→StableICU 65
+| localematcher.h | Builder&amp; icu::LocaleMatcher::Builder::setFavorSubtag(ULocMatchFavorSubtag) | Draft→StableICU 65
+| localematcher.h | Builder&amp; icu::LocaleMatcher::Builder::setSupportedLocales(Iter, Iter) | Draft→StableICU 65
+| localematcher.h | Builder&amp; icu::LocaleMatcher::Builder::setSupportedLocales(Locale::Iterator&amp;) | Draft→StableICU 65
+| localematcher.h | Builder&amp; icu::LocaleMatcher::Builder::setSupportedLocalesFromListString(StringPiece) | Draft→StableICU 65
+| localematcher.h | Builder&amp; icu::LocaleMatcher::Builder::setSupportedLocalesViaConverter(Iter, Iter, Conv) | Draft→StableICU 65
+| localematcher.h | Locale icu::LocaleMatcher::Result::makeResolvedLocale(UErrorCode&amp;) const | Draft→StableICU 65
+| localematcher.h | LocaleMatcher icu::LocaleMatcher::Builder::build(UErrorCode&amp;) const | Draft→StableICU 65
+| localematcher.h | LocaleMatcher&amp; icu::LocaleMatcher::operator=(LocaleMatcher&amp;&amp;) | Draft→StableICU 65
+| localematcher.h | Result icu::LocaleMatcher::getBestMatchResult(Locale::Iterator&amp;, UErrorCode&amp;) const | Draft→StableICU 65
+| localematcher.h | Result icu::LocaleMatcher::getBestMatchResult(const Locale&amp;, UErrorCode&amp;) const | Draft→StableICU 65
+| localematcher.h | Result&amp; icu::LocaleMatcher::Result::operator=(Result&amp;&amp;) | Draft→StableICU 65
+| localematcher.h | UBool icu::LocaleMatcher::Builder::copyErrorTo(UErrorCode&amp;) const | Draft→StableICU 65
+| localematcher.h | const Locale* icu::LocaleMatcher::Result::getDesiredLocale() const | Draft→StableICU 65
+| localematcher.h | const Locale* icu::LocaleMatcher::Result::getSupportedLocale() const | Draft→StableICU 65
+| localematcher.h | const Locale* icu::LocaleMatcher::getBestMatch(Locale::Iterator&amp;, UErrorCode&amp;) const | Draft→StableICU 65
+| localematcher.h | const Locale* icu::LocaleMatcher::getBestMatch(const Locale&amp;, UErrorCode&amp;) const | Draft→StableICU 65
+| localematcher.h | const Locale* icu::LocaleMatcher::getBestMatchForListString(StringPiece, UErrorCode&amp;) const | Draft→StableICU 65
+| localematcher.h | <tt>enum</tt> ULocMatchDemotion::ULOCMATCH_DEMOTION_NONE | Draft→StableICU 65
+| localematcher.h | <tt>enum</tt> ULocMatchDemotion::ULOCMATCH_DEMOTION_REGION | Draft→StableICU 65
+| localematcher.h | <tt>enum</tt> ULocMatchFavorSubtag::ULOCMATCH_FAVOR_LANGUAGE | Draft→StableICU 65
+| localematcher.h | <tt>enum</tt> ULocMatchFavorSubtag::ULOCMATCH_FAVOR_SCRIPT | Draft→StableICU 65
+| localematcher.h | icu::LocaleMatcher::Builder::Builder() | Draft→StableICU 65
+| localematcher.h | icu::LocaleMatcher::Builder::Builder(Builder&amp;&amp;) | Draft→StableICU 65
+| localematcher.h | icu::LocaleMatcher::Builder::~Builder() | Draft→StableICU 65
+| localematcher.h | icu::LocaleMatcher::LocaleMatcher(LocaleMatcher&amp;&amp;) | Draft→StableICU 65
+| localematcher.h | icu::LocaleMatcher::Result::Result(Result&amp;&amp;) | Draft→StableICU 65
+| localematcher.h | icu::LocaleMatcher::Result::~Result() | Draft→StableICU 65
+| localematcher.h | icu::LocaleMatcher::~LocaleMatcher() | Draft→StableICU 65
+| localematcher.h | int32_t icu::LocaleMatcher::Result::getDesiredIndex() const | Draft→StableICU 65
+| localematcher.h | int32_t icu::LocaleMatcher::Result::getSupportedIndex() const | Draft→StableICU 65
+| locid.h | UBool icu::Locale::ConvertingIterator&lt; Iter, Conv &gt;::hasNext() const override | Draft→StableICU 65
+| locid.h | UBool icu::Locale::Iterator::hasNext() const | Draft→StableICU 65
+| locid.h | UBool icu::Locale::RangeIterator&lt; Iter &gt;::hasNext() const override | Draft→StableICU 65
+| locid.h | const Locale&amp; icu::Locale::ConvertingIterator&lt; Iter, Conv &gt;::next() override | Draft→StableICU 65
+| locid.h | const Locale&amp; icu::Locale::Iterator::next() | Draft→StableICU 65
+| locid.h | const Locale&amp; icu::Locale::RangeIterator&lt; Iter &gt;::next() override | Draft→StableICU 65
+| locid.h | icu::Locale::ConvertingIterator&lt; Iter, Conv &gt;::ConvertingIterator(Iter, Iter, Conv) | Draft→StableICU 65
+| locid.h | icu::Locale::Iterator::~Iterator() | Draft→StableICU 65
+| locid.h | icu::Locale::RangeIterator&lt; Iter &gt;::RangeIterator(Iter, Iter) | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getBar() | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getDecade() | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getDotPerCentimeter() | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getDotPerInch() | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getEm() | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getMegapixel() | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getPascal() | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getPixel() | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getPixelPerCentimeter() | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getPixelPerInch() | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getThermUs() | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createBar(UErrorCode&amp;) | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createDecade(UErrorCode&amp;) | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createDotPerCentimeter(UErrorCode&amp;) | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createDotPerInch(UErrorCode&amp;) | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createEm(UErrorCode&amp;) | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createMegapixel(UErrorCode&amp;) | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createPascal(UErrorCode&amp;) | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createPixel(UErrorCode&amp;) | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createPixelPerCentimeter(UErrorCode&amp;) | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createPixelPerInch(UErrorCode&amp;) | Draft→StableICU 65
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createThermUs(UErrorCode&amp;) | Draft→StableICU 65
+| numberformatter.h | StringClass icu::number::FormattedNumber::toDecimalNumber(UErrorCode&amp;) const | Draft→StableICU 65
+| reldatefmt.h | <tt>enum</tt> UDateAbsoluteUnit::UDAT_ABSOLUTE_HOUR | Draft→StableICU 65
+| reldatefmt.h | <tt>enum</tt> UDateAbsoluteUnit::UDAT_ABSOLUTE_MINUTE | Draft→StableICU 65
+| stringpiece.h | icu::StringPiece::StringPiece(T) | Draft→StableICU 65
+| ucal.h | int32_t ucal_getHostTimeZone(UChar*, int32_t, UErrorCode*) | Draft→StableICU 65
+| ucharstrie.h | UCharsTrie&amp; icu::UCharsTrie::resetToState64(uint64_t) | Draft→StableICU 65
+| ucharstrie.h | uint64_t icu::UCharsTrie::getState64() const | Draft→StableICU 65
+| uloc.h | UEnumeration* uloc_openAvailableByType(ULocAvailableType, UErrorCode*) | Draft→StableICU 65
+| uloc.h | <tt>enum</tt> ULocAvailableType::ULOC_AVAILABLE_DEFAULT | Draft→StableICU 65
+| uloc.h | <tt>enum</tt> ULocAvailableType::ULOC_AVAILABLE_ONLY_LEGACY_ALIASES | Draft→StableICU 65
+| uloc.h | <tt>enum</tt> ULocAvailableType::ULOC_AVAILABLE_WITH_LEGACY_ALIASES | Draft→StableICU 65
+| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UDATA_BUNDLE | Draft→StableICU 65
+| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UDATA_DATA_FILE | Draft→StableICU 65
+| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UDATA_RES_FILE | Draft→StableICU 65
+| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UDATA_START | Draft→StableICU 65
+
+## Added
+
+Added in ICU 68
+
+| File | API | ICU 67 | ICU 68 |
+|---|---|---|---|
+| dtitvfmt.h | UDisplayContext icu::DateIntervalFormat::getContext(UDisplayContextType, UErrorCode&amp;) const | (missing) | DraftICU 68
+| dtitvfmt.h | void icu::DateIntervalFormat::setContext(UDisplayContext, UErrorCode&amp;) | (missing) | DraftICU 68
+| dtptngen.h | <tt>static</tt> DateTimePatternGenerator* icu::DateTimePatternGenerator::createInstanceNoStdPat(const Locale&amp;, UErrorCode&amp;) | (missing) | Internal
+| localematcher.h | Builder&amp; icu::LocaleMatcher::Builder::setMaxDistance(const Locale&amp;, const Locale&amp;) | (missing) | DraftICU 68
+| localematcher.h | Builder&amp; icu::LocaleMatcher::Builder::setNoDefaultLocale() | (missing) | DraftICU 68
+| localematcher.h | UBool icu::LocaleMatcher::isMatch(const Locale&amp;, const Locale&amp;, UErrorCode&amp;) const | (missing) | DraftICU 68
+| measunit.h | int32_t icu::MeasureUnit::getOffset() const | (missing) | Internal
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getCandela() | (missing) | DraftICU 68
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getDessertSpoon() | (missing) | DraftICU 68
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getDessertSpoonImperial() | (missing) | DraftICU 68
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getDot() | (missing) | DraftICU 68
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getDram() | (missing) | DraftICU 68
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getDrop() | (missing) | DraftICU 68
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getEarthRadius() | (missing) | DraftICU 68
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getGrain() | (missing) | DraftICU 68
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getJigger() | (missing) | DraftICU 68
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getLumen() | (missing) | DraftICU 68
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getPinch() | (missing) | DraftICU 68
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getQuartImperial() | (missing) | DraftICU 68
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createCandela(UErrorCode&amp;) | (missing) | DraftICU 68
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createDessertSpoon(UErrorCode&amp;) | (missing) | DraftICU 68
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createDessertSpoonImperial(UErrorCode&amp;) | (missing) | DraftICU 68
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createDot(UErrorCode&amp;) | (missing) | DraftICU 68
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createDram(UErrorCode&amp;) | (missing) | DraftICU 68
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createDrop(UErrorCode&amp;) | (missing) | DraftICU 68
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createEarthRadius(UErrorCode&amp;) | (missing) | DraftICU 68
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createGrain(UErrorCode&amp;) | (missing) | DraftICU 68
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createJigger(UErrorCode&amp;) | (missing) | DraftICU 68
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createLumen(UErrorCode&amp;) | (missing) | DraftICU 68
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createPinch(UErrorCode&amp;) | (missing) | DraftICU 68
+| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createQuartImperial(UErrorCode&amp;) | (missing) | DraftICU 68
+| measunit.h | std::pair&lt; LocalArray&lt; MeasureUnit &gt;, int32_t &gt; icu::MeasureUnit::splitToSingleUnits(UErrorCode&amp;) const | (missing) | DraftICU 68
+| numberformatter.h | Derived icu::number::NumberFormatterSettings&lt; Derived &gt;::usage(StringPiece) const&amp; | (missing) | DraftICU 68
+| numberformatter.h | Derived icu::number::NumberFormatterSettings&lt; Derived &gt;::usage(StringPiece)&amp;&amp; | (missing) | DraftICU 68
+| numberformatter.h | MeasureUnit icu::number::FormattedNumber::getOutputUnit(UErrorCode&amp;) const | (missing) | DraftICU 68
+| numberformatter.h | Usage&amp; icu::number::impl::Usage::operator=(Usage&amp;&amp;) | (missing) | Internal
+| numberformatter.h | Usage&amp; icu::number::impl::Usage::operator=(const Usage&amp;) | (missing) | Internal
+| numberformatter.h | bool icu::number::impl::Usage::isSet() const | (missing) | Internal
+| numberformatter.h | icu::number::impl::Usage::Usage(Usage&amp;&amp;) | (missing) | Internal
+| numberformatter.h | icu::number::impl::Usage::Usage(const Usage&amp;) | (missing) | Internal
+| numberformatter.h | icu::number::impl::Usage::~Usage() | (missing) | Internal
+| numberformatter.h | int16_t icu::number::impl::Usage::length() const | (missing) | Internal
+| numberformatter.h | void icu::number::impl::Usage::set(StringPiece) | (missing) | Internal
+| numberrangeformatter.h | std::pair&lt; StringClass, StringClass &gt; icu::number::FormattedNumberRange::getDecimalNumbers(UErrorCode&amp;) const | (missing) | DraftICU 68
+| plurrule.h | UnicodeString icu::PluralRules::select(const number::FormattedNumberRange&amp;, UErrorCode&amp;) const | (missing) | DraftICU 68
+| plurrule.h | UnicodeString icu::PluralRules::select(const number::impl::UFormattedNumberRangeData*, UErrorCode&amp;) const | (missing) | Internal
+| timezone.h | <tt>static</tt> TimeZone* icu::TimeZone::forLocaleOrDefault(const Locale&amp;) | (missing) | Internal
+| ucurr.h | <tt>enum</tt> UCurrNameStyle::UCURR_FORMAL_SYMBOL_NAME | (missing) | DraftICU 68
+| ucurr.h | <tt>enum</tt> UCurrNameStyle::UCURR_VARIANT_SYMBOL_NAME | (missing) | DraftICU 68
+| udateintervalformat.h | UDisplayContext udtitvfmt_getContext(const UDateIntervalFormat*, UDisplayContextType, UErrorCode*) | (missing) | DraftICU 68
+| udateintervalformat.h | void udtitvfmt_setContext(UDateIntervalFormat*, UDisplayContext, UErrorCode*) | (missing) | DraftICU 68
+| umachine.h | <tt>#define</tt> U_DEFINE_FALSE_AND_TRUE | (missing) | InternalICU 68
+| unum.h | <tt>enum</tt> UNumberFormatMinimumGroupingDigits::UNUM_MINIMUM_GROUPING_DIGITS_AUTO | (missing) | DraftICU 68
+| unum.h | <tt>enum</tt> UNumberFormatMinimumGroupingDigits::UNUM_MINIMUM_GROUPING_DIGITS_MIN2 | (missing) | DraftICU 68
+| unumberformatter.h | <tt>enum</tt> UNumberUnitWidth::UNUM_UNIT_WIDTH_FORMAL | (missing) | DraftICU 68
+| unumberformatter.h | <tt>enum</tt> UNumberUnitWidth::UNUM_UNIT_WIDTH_VARIANT | (missing) | DraftICU 68
+| unumberformatter.h | int32_t unumf_resultToDecimalNumber(const UFormattedNumber*, char*, int32_t, UErrorCode*) | (missing) | DraftICU 68
+| unumberrangeformatter.h | UFormattedNumberRange* unumrf_openResult(UErrorCode*) | (missing) | DraftICU 68
+| unumberrangeformatter.h | UNumberRangeFormatter* unumrf_openForSkeletonWithCollapseAndIdentityFallback(const UChar*, int32_t, UNumberRangeCollapse, UNumberRangeIdentityFallback, const char*, UParseError*, UErrorCode*) | (missing) | DraftICU 68
+| unumberrangeformatter.h | UNumberRangeIdentityResult unumrf_resultGetIdentityResult(const UFormattedNumberRange*, UErrorCode*) | (missing) | DraftICU 68
+| unumberrangeformatter.h | const UFormattedValue* unumrf_resultAsValue(const UFormattedNumberRange*, UErrorCode*) | (missing) | DraftICU 68
+| unumberrangeformatter.h | int32_t unumrf_resultGetFirstDecimalNumber(const UFormattedNumberRange*, char*, int32_t, UErrorCode*) | (missing) | DraftICU 68
+| unumberrangeformatter.h | int32_t unumrf_resultGetSecondDecimalNumber(const UFormattedNumberRange*, char*, int32_t, UErrorCode*) | (missing) | DraftICU 68
+| unumberrangeformatter.h | void unumrf_close(UNumberRangeFormatter*) | (missing) | DraftICU 68
+| unumberrangeformatter.h | void unumrf_closeResult(UFormattedNumberRange*) | (missing) | DraftICU 68
+| unumberrangeformatter.h | void unumrf_formatDecimalRange(const UNumberRangeFormatter*, const char*, int32_t, const char*, int32_t, UFormattedNumberRange*, UErrorCode*) | (missing) | DraftICU 68
+| unumberrangeformatter.h | void unumrf_formatDoubleRange(const UNumberRangeFormatter*, double, double, UFormattedNumberRange*, UErrorCode*) | (missing) | DraftICU 68
+| upluralrules.h | int32_t uplrules_selectForRange(const UPluralRules*, const struct UFormattedNumberRange*, UChar*, int32_t, UErrorCode*) | (missing) | DraftICU 68
+
+## Other
+
+Other existing drafts in ICU 68
+
+| File | API | ICU 67 | ICU 68 |
+|---|---|---|---|
+| bytestream.h | void icu::ByteSink::AppendU8(const char*, int32_t) | DraftICU 67 |
+| bytestream.h | void icu::ByteSink::AppendU8(const char8_t*, int32_t) | DraftICU 67 |
+| dtptngen.h | UDateFormatHourCycle icu::DateTimePatternGenerator::getDefaultHourCycle(UErrorCode&amp;) const | DraftICU 67 |
+| listformatter.h | <tt>static</tt> ListFormatter* icu::ListFormatter::createInstance(const Locale&amp;, UListFormatterType, UListFormatterWidth, UErrorCode&amp;) | DraftICU 67 |
+| localematcher.h | Builder&amp; icu::LocaleMatcher::Builder::setDirection(ULocMatchDirection) | DraftICU 67 |
+| localematcher.h | <tt>enum</tt> ULocMatchDirection::ULOCMATCH_DIRECTION_ONLY_TWO_WAY | DraftICU 67 |
+| localematcher.h | <tt>enum</tt> ULocMatchDirection::ULOCMATCH_DIRECTION_WITH_ONE_WAY | DraftICU 67 |
+| locid.h | void icu::Locale::canonicalize(UErrorCode&amp;) | DraftICU 67 |
+| measfmt.h | void icu::MeasureFormat::parseObject(const UnicodeString&amp;, Formattable&amp;, ParsePosition&amp;) const | DraftICU 53 |
+| measunit.h | MeasureUnit icu::MeasureUnit::product(const MeasureUnit&amp;, UErrorCode&amp;) const | DraftICU 67 |
+| measunit.h | MeasureUnit icu::MeasureUnit::reciprocal(UErrorCode&amp;) const | DraftICU 67 |
+| measunit.h | MeasureUnit icu::MeasureUnit::withDimensionality(int32_t, UErrorCode&amp;) const | DraftICU 67 |
+| measunit.h | MeasureUnit icu::MeasureUnit::withSIPrefix(UMeasureSIPrefix, UErrorCode&amp;) const | DraftICU 67 |
+| measunit.h | MeasureUnit&amp; icu::MeasureUnit::operator=(MeasureUnit&amp;&amp;) noexcept | DraftICU 67 |
+| measunit.h | UMeasureSIPrefix icu::MeasureUnit::getSIPrefix(UErrorCode&amp;) const | DraftICU 67 |
+| measunit.h | UMeasureUnitComplexity icu::MeasureUnit::getComplexity(UErrorCode&amp;) const | DraftICU 67 |
+| measunit.h | const char* icu::MeasureUnit::getIdentifier() const | DraftICU 67 |
+| measunit.h | icu::MeasureUnit::MeasureUnit(MeasureUnit&amp;&amp;) noexcept | DraftICU 67 |
+| measunit.h | int32_t icu::MeasureUnit::getDimensionality(UErrorCode&amp;) const | DraftICU 67 |
+| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::forIdentifier(StringPiece, UErrorCode&amp;) | DraftICU 67 |
+| stringpiece.h | icu::StringPiece::StringPiece(const char8_t*) | DraftICU 67 |
+| stringpiece.h | icu::StringPiece::StringPiece(const char8_t*, int32_t) | DraftICU 67 |
+| stringpiece.h | icu::StringPiece::StringPiece(const std::u8string&amp;) | DraftICU 67 |
+| stringpiece.h | icu::StringPiece::StringPiece(std::nullptr_t) | DraftICU 67 |
+| stringpiece.h | int32_t icu::StringPiece::compare(StringPiece) | DraftICU 67 |
+| stringpiece.h | int32_t icu::StringPiece::find(StringPiece, int32_t) | DraftICU 67 |
+| stringpiece.h | void icu::StringPiece::set(const char8_t*) | DraftICU 67 |
+| stringpiece.h | void icu::StringPiece::set(const char8_t*, int32_t) | DraftICU 67 |
+| udat.h | <tt>enum</tt> UDateFormatHourCycle::UDAT_HOUR_CYCLE_11 | DraftICU 67 |
+| udat.h | <tt>enum</tt> UDateFormatHourCycle::UDAT_HOUR_CYCLE_12 | DraftICU 67 |
+| udat.h | <tt>enum</tt> UDateFormatHourCycle::UDAT_HOUR_CYCLE_23 | DraftICU 67 |
+| udat.h | <tt>enum</tt> UDateFormatHourCycle::UDAT_HOUR_CYCLE_24 | DraftICU 67 |
+| udateintervalformat.h | void udtitvfmt_formatCalendarToResult(const UDateIntervalFormat*, UCalendar*, UCalendar*, UFormattedDateInterval*, UErrorCode*) | DraftICU 67 |
+| udateintervalformat.h | void udtitvfmt_formatToResult(const UDateIntervalFormat*, UDate, UDate, UFormattedDateInterval*, UErrorCode*) | DraftICU 67 |
+| udatpg.h | UDateFormatHourCycle udatpg_getDefaultHourCycle(const UDateTimePatternGenerator*, UErrorCode*) | DraftICU 67 |
+| ulistformatter.h | UListFormatter* ulistfmt_openForType(const char*, UListFormatterType, UListFormatterWidth, UErrorCode*) | DraftICU 67 |
+| ulistformatter.h | <tt>enum</tt> UListFormatterType::ULISTFMT_TYPE_AND | DraftICU 67 |
+| ulistformatter.h | <tt>enum</tt> UListFormatterType::ULISTFMT_TYPE_OR | DraftICU 67 |
+| ulistformatter.h | <tt>enum</tt> UListFormatterType::ULISTFMT_TYPE_UNITS | DraftICU 67 |
+| ulistformatter.h | <tt>enum</tt> UListFormatterWidth::ULISTFMT_WIDTH_NARROW | DraftICU 67 |
+| ulistformatter.h | <tt>enum</tt> UListFormatterWidth::ULISTFMT_WIDTH_SHORT | DraftICU 67 |
+| ulistformatter.h | <tt>enum</tt> UListFormatterWidth::ULISTFMT_WIDTH_WIDE | DraftICU 67 |
+| uregex.h | <tt>enum</tt> URegexpFlag::UREGEX_CANON_EQ | DraftICU 2.4 |
+| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UBRK_CREATE_BREAK_ENGINE | DraftICU 67 |
+| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UBRK_CREATE_CHARACTER | DraftICU 67 |
+| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UBRK_CREATE_LINE | DraftICU 67 |
+| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UBRK_CREATE_SENTENCE | DraftICU 67 |
+| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UBRK_CREATE_TITLE | DraftICU 67 |
+| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UBRK_CREATE_WORD | DraftICU 67 |
+| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UBRK_START | DraftICU 67 |
+
+## Simplifications
+
+This section shows cases where the signature was "simplified" for the sake of comparison. The simplified form is in bold, followed by
+ all possible variations in "original" form.
+
+
+## Colophon
+
+Contents generated by StableAPI tool on Wed Sep 30 17:44:26 PDT 2020
+
+Copyright © 2019 and later: Unicode, Inc. and others.
+License & terms of use: http://www.unicode.org/copyright.html
+ \ No newline at end of file
diff --git a/thirdparty/icu4c/LICENSE b/thirdparty/icu4c/LICENSE
new file mode 100644
index 0000000000..5d664a083b
--- /dev/null
+++ b/thirdparty/icu4c/LICENSE
@@ -0,0 +1,414 @@
+COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later)
+
+Copyright © 1991-2020 Unicode, Inc. All rights reserved.
+Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the Unicode data files and any associated documentation
+(the "Data Files") or Unicode software and any associated documentation
+(the "Software") to deal in the Data Files or Software
+without restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, and/or sell copies of
+the Data Files or Software, and to permit persons to whom the Data Files
+or Software are furnished to do so, provided that either
+(a) this copyright and permission notice appear with all copies
+of the Data Files or Software, or
+(b) this copyright and permission notice appear in associated
+Documentation.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
+NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
+DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THE DATA FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder
+shall not be used in advertising or otherwise to promote the sale,
+use or other dealings in these Data Files or Software without prior
+written authorization of the copyright holder.
+
+---------------------
+
+Third-Party Software Licenses
+
+This section contains third-party software notices and/or additional
+terms for licensed third-party software components included within ICU
+libraries.
+
+1. ICU License - ICU 1.8.1 to ICU 57.1
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright (c) 1995-2016 International Business Machines Corporation and others
+All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, and/or sell copies of the Software, and to permit persons
+to whom the Software is furnished to do so, provided that the above
+copyright notice(s) and this permission notice appear in all copies of
+the Software and that both the above copyright notice(s) and this
+permission notice appear in supporting documentation.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
+OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY
+SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER
+RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
+CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder
+shall not be used in advertising or otherwise to promote the sale, use
+or other dealings in this Software without prior written authorization
+of the copyright holder.
+
+All trademarks and registered trademarks mentioned herein are the
+property of their respective owners.
+
+2. Chinese/Japanese Word Break Dictionary Data (cjdict.txt)
+
+ # The Google Chrome software developed by Google is licensed under
+ # the BSD license. Other software included in this distribution is
+ # provided under other licenses, as set forth below.
+ #
+ # The BSD License
+ # http://opensource.org/licenses/bsd-license.php
+ # Copyright (C) 2006-2008, Google Inc.
+ #
+ # All rights reserved.
+ #
+ # Redistribution and use in source and binary forms, with or without
+ # modification, are permitted provided that the following conditions are met:
+ #
+ # Redistributions of source code must retain the above copyright notice,
+ # this list of conditions and the following disclaimer.
+ # Redistributions in binary form must reproduce the above
+ # copyright notice, this list of conditions and the following
+ # disclaimer in the documentation and/or other materials provided with
+ # the distribution.
+ # Neither the name of Google Inc. nor the names of its
+ # contributors may be used to endorse or promote products derived from
+ # this software without specific prior written permission.
+ #
+ #
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ #
+ #
+ # The word list in cjdict.txt are generated by combining three word lists
+ # listed below with further processing for compound word breaking. The
+ # frequency is generated with an iterative training against Google web
+ # corpora.
+ #
+ # * Libtabe (Chinese)
+ # - https://sourceforge.net/project/?group_id=1519
+ # - Its license terms and conditions are shown below.
+ #
+ # * IPADIC (Japanese)
+ # - http://chasen.aist-nara.ac.jp/chasen/distribution.html
+ # - Its license terms and conditions are shown below.
+ #
+ # ---------COPYING.libtabe ---- BEGIN--------------------
+ #
+ # /*
+ # * Copyright (c) 1999 TaBE Project.
+ # * Copyright (c) 1999 Pai-Hsiang Hsiao.
+ # * All rights reserved.
+ # *
+ # * Redistribution and use in source and binary forms, with or without
+ # * modification, are permitted provided that the following conditions
+ # * are met:
+ # *
+ # * . Redistributions of source code must retain the above copyright
+ # * notice, this list of conditions and the following disclaimer.
+ # * . Redistributions in binary form must reproduce the above copyright
+ # * notice, this list of conditions and the following disclaimer in
+ # * the documentation and/or other materials provided with the
+ # * distribution.
+ # * . Neither the name of the TaBE Project nor the names of its
+ # * contributors may be used to endorse or promote products derived
+ # * from this software without specific prior written permission.
+ # *
+ # * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ # * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ # * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ # * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ # * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ # * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ # * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ # * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ # * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ # * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ # * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ # * OF THE POSSIBILITY OF SUCH DAMAGE.
+ # */
+ #
+ # /*
+ # * Copyright (c) 1999 Computer Systems and Communication Lab,
+ # * Institute of Information Science, Academia
+ # * Sinica. All rights reserved.
+ # *
+ # * Redistribution and use in source and binary forms, with or without
+ # * modification, are permitted provided that the following conditions
+ # * are met:
+ # *
+ # * . Redistributions of source code must retain the above copyright
+ # * notice, this list of conditions and the following disclaimer.
+ # * . Redistributions in binary form must reproduce the above copyright
+ # * notice, this list of conditions and the following disclaimer in
+ # * the documentation and/or other materials provided with the
+ # * distribution.
+ # * . Neither the name of the Computer Systems and Communication Lab
+ # * nor the names of its contributors may be used to endorse or
+ # * promote products derived from this software without specific
+ # * prior written permission.
+ # *
+ # * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ # * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ # * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ # * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ # * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ # * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ # * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ # * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ # * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ # * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ # * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ # * OF THE POSSIBILITY OF SUCH DAMAGE.
+ # */
+ #
+ # Copyright 1996 Chih-Hao Tsai @ Beckman Institute,
+ # University of Illinois
+ # c-tsai4@uiuc.edu http://casper.beckman.uiuc.edu/~c-tsai4
+ #
+ # ---------------COPYING.libtabe-----END--------------------------------
+ #
+ #
+ # ---------------COPYING.ipadic-----BEGIN-------------------------------
+ #
+ # Copyright 2000, 2001, 2002, 2003 Nara Institute of Science
+ # and Technology. All Rights Reserved.
+ #
+ # Use, reproduction, and distribution of this software is permitted.
+ # Any copy of this software, whether in its original form or modified,
+ # must include both the above copyright notice and the following
+ # paragraphs.
+ #
+ # Nara Institute of Science and Technology (NAIST),
+ # the copyright holders, disclaims all warranties with regard to this
+ # software, including all implied warranties of merchantability and
+ # fitness, in no event shall NAIST be liable for
+ # any special, indirect or consequential damages or any damages
+ # whatsoever resulting from loss of use, data or profits, whether in an
+ # action of contract, negligence or other tortuous action, arising out
+ # of or in connection with the use or performance of this software.
+ #
+ # A large portion of the dictionary entries
+ # originate from ICOT Free Software. The following conditions for ICOT
+ # Free Software applies to the current dictionary as well.
+ #
+ # Each User may also freely distribute the Program, whether in its
+ # original form or modified, to any third party or parties, PROVIDED
+ # that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear
+ # on, or be attached to, the Program, which is distributed substantially
+ # in the same form as set out herein and that such intended
+ # distribution, if actually made, will neither violate or otherwise
+ # contravene any of the laws and regulations of the countries having
+ # jurisdiction over the User or the intended distribution itself.
+ #
+ # NO WARRANTY
+ #
+ # The program was produced on an experimental basis in the course of the
+ # research and development conducted during the project and is provided
+ # to users as so produced on an experimental basis. Accordingly, the
+ # program is provided without any warranty whatsoever, whether express,
+ # implied, statutory or otherwise. The term "warranty" used herein
+ # includes, but is not limited to, any warranty of the quality,
+ # performance, merchantability and fitness for a particular purpose of
+ # the program and the nonexistence of any infringement or violation of
+ # any right of any third party.
+ #
+ # Each user of the program will agree and understand, and be deemed to
+ # have agreed and understood, that there is no warranty whatsoever for
+ # the program and, accordingly, the entire risk arising from or
+ # otherwise connected with the program is assumed by the user.
+ #
+ # Therefore, neither ICOT, the copyright holder, or any other
+ # organization that participated in or was otherwise related to the
+ # development of the program and their respective officials, directors,
+ # officers and other employees shall be held liable for any and all
+ # damages, including, without limitation, general, special, incidental
+ # and consequential damages, arising out of or otherwise in connection
+ # with the use or inability to use the program or any product, material
+ # or result produced or otherwise obtained by using the program,
+ # regardless of whether they have been advised of, or otherwise had
+ # knowledge of, the possibility of such damages at any time during the
+ # project or thereafter. Each user will be deemed to have agreed to the
+ # foregoing by his or her commencement of use of the program. The term
+ # "use" as used herein includes, but is not limited to, the use,
+ # modification, copying and distribution of the program and the
+ # production of secondary products from the program.
+ #
+ # In the case where the program, whether in its original form or
+ # modified, was distributed or delivered to or received by a user from
+ # any person, organization or entity other than ICOT, unless it makes or
+ # grants independently of ICOT any specific warranty to the user in
+ # writing, such person, organization or entity, will also be exempted
+ # from and not be held liable to the user for any such damages as noted
+ # above as far as the program is concerned.
+ #
+ # ---------------COPYING.ipadic-----END----------------------------------
+
+3. Lao Word Break Dictionary Data (laodict.txt)
+
+ # Copyright (c) 2013 International Business Machines Corporation
+ # and others. All Rights Reserved.
+ #
+ # Project: https://github.com/veer66/lao-dictionary
+ # Dictionary: https://github.com/veer66/lao-dictionary/blob/master/Lao-Dictionary.txt
+ # License: https://github.com/veer66/lao-dictionary/blob/master/Lao-Dictionary-LICENSE.txt
+ # (copied below)
+ #
+ # This file is derived from the above dictionary, with slight
+ # modifications.
+ # ----------------------------------------------------------------------
+ # Copyright (C) 2013 Brian Eugene Wilson, Robert Martin Campbell.
+ # All rights reserved.
+ #
+ # Redistribution and use in source and binary forms, with or without
+ # modification,
+ # are permitted provided that the following conditions are met:
+ #
+ #
+ # Redistributions of source code must retain the above copyright notice, this
+ # list of conditions and the following disclaimer. Redistributions in
+ # binary form must reproduce the above copyright notice, this list of
+ # conditions and the following disclaimer in the documentation and/or
+ # other materials provided with the distribution.
+ #
+ #
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ # OF THE POSSIBILITY OF SUCH DAMAGE.
+ # --------------------------------------------------------------------------
+
+4. Burmese Word Break Dictionary Data (burmesedict.txt)
+
+ # Copyright (c) 2014 International Business Machines Corporation
+ # and others. All Rights Reserved.
+ #
+ # This list is part of a project hosted at:
+ # github.com/kanyawtech/myanmar-karen-word-lists
+ #
+ # --------------------------------------------------------------------------
+ # Copyright (c) 2013, LeRoy Benjamin Sharon
+ # All rights reserved.
+ #
+ # Redistribution and use in source and binary forms, with or without
+ # modification, are permitted provided that the following conditions
+ # are met: Redistributions of source code must retain the above
+ # copyright notice, this list of conditions and the following
+ # disclaimer. Redistributions in binary form must reproduce the
+ # above copyright notice, this list of conditions and the following
+ # disclaimer in the documentation and/or other materials provided
+ # with the distribution.
+ #
+ # Neither the name Myanmar Karen Word Lists, nor the names of its
+ # contributors may be used to endorse or promote products derived
+ # from this software without specific prior written permission.
+ #
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+ # TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ # THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ # SUCH DAMAGE.
+ # --------------------------------------------------------------------------
+
+5. Time Zone Database
+
+ ICU uses the public domain data and code derived from Time Zone
+Database for its time zone support. The ownership of the TZ database
+is explained in BCP 175: Procedure for Maintaining the Time Zone
+Database section 7.
+
+ # 7. Database Ownership
+ #
+ # The TZ database itself is not an IETF Contribution or an IETF
+ # document. Rather it is a pre-existing and regularly updated work
+ # that is in the public domain, and is intended to remain in the
+ # public domain. Therefore, BCPs 78 [RFC5378] and 79 [RFC3979] do
+ # not apply to the TZ Database or contributions that individuals make
+ # to it. Should any claims be made and substantiated against the TZ
+ # Database, the organization that is providing the IANA
+ # Considerations defined in this RFC, under the memorandum of
+ # understanding with the IETF, currently ICANN, may act in accordance
+ # with all competent court orders. No ownership claims will be made
+ # by ICANN or the IETF Trust on the database or the code. Any person
+ # making a contribution to the database or code waives all rights to
+ # future claims in that contribution or in the TZ Database.
+
+6. Google double-conversion
+
+Copyright 2006-2011, the V8 project authors. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+ * Neither the name of Google Inc. nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/thirdparty/icu4c/common/appendable.cpp b/thirdparty/icu4c/common/appendable.cpp
new file mode 100644
index 0000000000..fca3c1e413
--- /dev/null
+++ b/thirdparty/icu4c/common/appendable.cpp
@@ -0,0 +1,74 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2011-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: appendable.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010dec07
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/appendable.h"
+#include "unicode/utf16.h"
+
+U_NAMESPACE_BEGIN
+
+Appendable::~Appendable() {}
+
+UBool
+Appendable::appendCodePoint(UChar32 c) {
+ if(c<=0xffff) {
+ return appendCodeUnit((UChar)c);
+ } else {
+ return appendCodeUnit(U16_LEAD(c)) && appendCodeUnit(U16_TRAIL(c));
+ }
+}
+
+UBool
+Appendable::appendString(const UChar *s, int32_t length) {
+ if(length<0) {
+ UChar c;
+ while((c=*s++)!=0) {
+ if(!appendCodeUnit(c)) {
+ return FALSE;
+ }
+ }
+ } else if(length>0) {
+ const UChar *limit=s+length;
+ do {
+ if(!appendCodeUnit(*s++)) {
+ return FALSE;
+ }
+ } while(s<limit);
+ }
+ return TRUE;
+}
+
+UBool
+Appendable::reserveAppendCapacity(int32_t /*appendCapacity*/) {
+ return TRUE;
+}
+
+UChar *
+Appendable::getAppendBuffer(int32_t minCapacity,
+ int32_t /*desiredCapacityHint*/,
+ UChar *scratch, int32_t scratchCapacity,
+ int32_t *resultCapacity) {
+ if(minCapacity<1 || scratchCapacity<minCapacity) {
+ *resultCapacity=0;
+ return NULL;
+ }
+ *resultCapacity=scratchCapacity;
+ return scratch;
+}
+
+// UnicodeStringAppendable is implemented in unistr.cpp.
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/bmpset.cpp b/thirdparty/icu4c/common/bmpset.cpp
new file mode 100644
index 0000000000..bc79f5e5a6
--- /dev/null
+++ b/thirdparty/icu4c/common/bmpset.cpp
@@ -0,0 +1,741 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2007-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: bmpset.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2007jan29
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/uniset.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+#include "cmemory.h"
+#include "bmpset.h"
+#include "uassert.h"
+
+U_NAMESPACE_BEGIN
+
+BMPSet::BMPSet(const int32_t *parentList, int32_t parentListLength) :
+ list(parentList), listLength(parentListLength) {
+ uprv_memset(latin1Contains, 0, sizeof(latin1Contains));
+ uprv_memset(table7FF, 0, sizeof(table7FF));
+ uprv_memset(bmpBlockBits, 0, sizeof(bmpBlockBits));
+
+ /*
+ * Set the list indexes for binary searches for
+ * U+0800, U+1000, U+2000, .., U+F000, U+10000.
+ * U+0800 is the first 3-byte-UTF-8 code point. Lower code points are
+ * looked up in the bit tables.
+ * The last pair of indexes is for finding supplementary code points.
+ */
+ list4kStarts[0]=findCodePoint(0x800, 0, listLength-1);
+ int32_t i;
+ for(i=1; i<=0x10; ++i) {
+ list4kStarts[i]=findCodePoint(i<<12, list4kStarts[i-1], listLength-1);
+ }
+ list4kStarts[0x11]=listLength-1;
+ containsFFFD=containsSlow(0xfffd, list4kStarts[0xf], list4kStarts[0x10]);
+
+ initBits();
+ overrideIllegal();
+}
+
+BMPSet::BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength) :
+ containsFFFD(otherBMPSet.containsFFFD),
+ list(newParentList), listLength(newParentListLength) {
+ uprv_memcpy(latin1Contains, otherBMPSet.latin1Contains, sizeof(latin1Contains));
+ uprv_memcpy(table7FF, otherBMPSet.table7FF, sizeof(table7FF));
+ uprv_memcpy(bmpBlockBits, otherBMPSet.bmpBlockBits, sizeof(bmpBlockBits));
+ uprv_memcpy(list4kStarts, otherBMPSet.list4kStarts, sizeof(list4kStarts));
+}
+
+BMPSet::~BMPSet() {
+}
+
+/*
+ * Set bits in a bit rectangle in "vertical" bit organization.
+ * start<limit<=0x800
+ */
+static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
+ U_ASSERT(start<limit);
+ U_ASSERT(limit<=0x800);
+
+ int32_t lead=start>>6; // Named for UTF-8 2-byte lead byte with upper 5 bits.
+ int32_t trail=start&0x3f; // Named for UTF-8 2-byte trail byte with lower 6 bits.
+
+ // Set one bit indicating an all-one block.
+ uint32_t bits=(uint32_t)1<<lead;
+ if((start+1)==limit) { // Single-character shortcut.
+ table[trail]|=bits;
+ return;
+ }
+
+ int32_t limitLead=limit>>6;
+ int32_t limitTrail=limit&0x3f;
+
+ if(lead==limitLead) {
+ // Partial vertical bit column.
+ while(trail<limitTrail) {
+ table[trail++]|=bits;
+ }
+ } else {
+ // Partial vertical bit column,
+ // followed by a bit rectangle,
+ // followed by another partial vertical bit column.
+ if(trail>0) {
+ do {
+ table[trail++]|=bits;
+ } while(trail<64);
+ ++lead;
+ }
+ if(lead<limitLead) {
+ bits=~(((unsigned)1<<lead)-1);
+ if(limitLead<0x20) {
+ bits&=((unsigned)1<<limitLead)-1;
+ }
+ for(trail=0; trail<64; ++trail) {
+ table[trail]|=bits;
+ }
+ }
+ // limit<=0x800. If limit==0x800 then limitLead=32 and limitTrail=0.
+ // In that case, bits=1<<limitLead is undefined but the bits value
+ // is not used because trail<limitTrail is already false.
+ bits=(uint32_t)1<<((limitLead == 0x20) ? (limitLead - 1) : limitLead);
+ for(trail=0; trail<limitTrail; ++trail) {
+ table[trail]|=bits;
+ }
+ }
+}
+
+void BMPSet::initBits() {
+ UChar32 start, limit;
+ int32_t listIndex=0;
+
+ // Set latin1Contains[].
+ do {
+ start=list[listIndex++];
+ if(listIndex<listLength) {
+ limit=list[listIndex++];
+ } else {
+ limit=0x110000;
+ }
+ if(start>=0x100) {
+ break;
+ }
+ do {
+ latin1Contains[start++]=1;
+ } while(start<limit && start<0x100);
+ } while(limit<=0x100);
+
+ // Find the first range overlapping with (or after) 80..FF again,
+ // to include them in table7FF as well.
+ for(listIndex=0;;) {
+ start=list[listIndex++];
+ if(listIndex<listLength) {
+ limit=list[listIndex++];
+ } else {
+ limit=0x110000;
+ }
+ if(limit>0x80) {
+ if(start<0x80) {
+ start=0x80;
+ }
+ break;
+ }
+ }
+
+ // Set table7FF[].
+ while(start<0x800) {
+ set32x64Bits(table7FF, start, limit<=0x800 ? limit : 0x800);
+ if(limit>0x800) {
+ start=0x800;
+ break;
+ }
+
+ start=list[listIndex++];
+ if(listIndex<listLength) {
+ limit=list[listIndex++];
+ } else {
+ limit=0x110000;
+ }
+ }
+
+ // Set bmpBlockBits[].
+ int32_t minStart=0x800;
+ while(start<0x10000) {
+ if(limit>0x10000) {
+ limit=0x10000;
+ }
+
+ if(start<minStart) {
+ start=minStart;
+ }
+ if(start<limit) { // Else: Another range entirely in a known mixed-value block.
+ if(start&0x3f) {
+ // Mixed-value block of 64 code points.
+ start>>=6;
+ bmpBlockBits[start&0x3f]|=0x10001<<(start>>6);
+ start=(start+1)<<6; // Round up to the next block boundary.
+ minStart=start; // Ignore further ranges in this block.
+ }
+ if(start<limit) {
+ if(start<(limit&~0x3f)) {
+ // Multiple all-ones blocks of 64 code points each.
+ set32x64Bits(bmpBlockBits, start>>6, limit>>6);
+ }
+
+ if(limit&0x3f) {
+ // Mixed-value block of 64 code points.
+ limit>>=6;
+ bmpBlockBits[limit&0x3f]|=0x10001<<(limit>>6);
+ limit=(limit+1)<<6; // Round up to the next block boundary.
+ minStart=limit; // Ignore further ranges in this block.
+ }
+ }
+ }
+
+ if(limit==0x10000) {
+ break;
+ }
+
+ start=list[listIndex++];
+ if(listIndex<listLength) {
+ limit=list[listIndex++];
+ } else {
+ limit=0x110000;
+ }
+ }
+}
+
+/*
+ * Override some bits and bytes to the result of contains(FFFD)
+ * for faster validity checking at runtime.
+ * No need to set 0 values where they were reset to 0 in the constructor
+ * and not modified by initBits().
+ * (table7FF[] 0..7F, bmpBlockBits[] 0..7FF)
+ * Need to set 0 values for surrogates D800..DFFF.
+ */
+void BMPSet::overrideIllegal() {
+ uint32_t bits, mask;
+ int32_t i;
+
+ if(containsFFFD) {
+ bits=3; // Lead bytes 0xC0 and 0xC1.
+ for(i=0; i<64; ++i) {
+ table7FF[i]|=bits;
+ }
+
+ bits=1; // Lead byte 0xE0.
+ for(i=0; i<32; ++i) { // First half of 4k block.
+ bmpBlockBits[i]|=bits;
+ }
+
+ mask= static_cast<uint32_t>(~(0x10001<<0xd)); // Lead byte 0xED.
+ bits=1<<0xd;
+ for(i=32; i<64; ++i) { // Second half of 4k block.
+ bmpBlockBits[i]=(bmpBlockBits[i]&mask)|bits;
+ }
+ } else {
+ mask= static_cast<uint32_t>(~(0x10001<<0xd)); // Lead byte 0xED.
+ for(i=32; i<64; ++i) { // Second half of 4k block.
+ bmpBlockBits[i]&=mask;
+ }
+ }
+}
+
+int32_t BMPSet::findCodePoint(UChar32 c, int32_t lo, int32_t hi) const {
+ /* Examples:
+ findCodePoint(c)
+ set list[] c=0 1 3 4 7 8
+ === ============== ===========
+ [] [110000] 0 0 0 0 0 0
+ [\u0000-\u0003] [0, 4, 110000] 1 1 1 2 2 2
+ [\u0004-\u0007] [4, 8, 110000] 0 0 0 1 1 2
+ [:Any:] [0, 110000] 1 1 1 1 1 1
+ */
+
+ // Return the smallest i such that c < list[i]. Assume
+ // list[len - 1] == HIGH and that c is legal (0..HIGH-1).
+ if (c < list[lo])
+ return lo;
+ // High runner test. c is often after the last range, so an
+ // initial check for this condition pays off.
+ if (lo >= hi || c >= list[hi-1])
+ return hi;
+ // invariant: c >= list[lo]
+ // invariant: c < list[hi]
+ for (;;) {
+ int32_t i = (lo + hi) >> 1;
+ if (i == lo) {
+ break; // Found!
+ } else if (c < list[i]) {
+ hi = i;
+ } else {
+ lo = i;
+ }
+ }
+ return hi;
+}
+
+UBool
+BMPSet::contains(UChar32 c) const {
+ if((uint32_t)c<=0xff) {
+ return (UBool)latin1Contains[c];
+ } else if((uint32_t)c<=0x7ff) {
+ return (UBool)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0);
+ } else if((uint32_t)c<0xd800 || (c>=0xe000 && c<=0xffff)) {
+ int lead=c>>12;
+ uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
+ if(twoBits<=1) {
+ // All 64 code points with the same bits 15..6
+ // are either in the set or not.
+ return (UBool)twoBits;
+ } else {
+ // Look up the code point in its 4k block of code points.
+ return containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]);
+ }
+ } else if((uint32_t)c<=0x10ffff) {
+ // surrogate or supplementary code point
+ return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]);
+ } else {
+ // Out-of-range code points get FALSE, consistent with long-standing
+ // behavior of UnicodeSet::contains(c).
+ return FALSE;
+ }
+}
+
+/*
+ * Check for sufficient length for trail unit for each surrogate pair.
+ * Handle single surrogates as surrogate code points as usual in ICU.
+ */
+const UChar *
+BMPSet::span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
+ UChar c, c2;
+
+ if(spanCondition) {
+ // span
+ do {
+ c=*s;
+ if(c<=0xff) {
+ if(!latin1Contains[c]) {
+ break;
+ }
+ } else if(c<=0x7ff) {
+ if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
+ break;
+ }
+ } else if(c<0xd800 || c>=0xe000) {
+ int lead=c>>12;
+ uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
+ if(twoBits<=1) {
+ // All 64 code points with the same bits 15..6
+ // are either in the set or not.
+ if(twoBits==0) {
+ break;
+ }
+ } else {
+ // Look up the code point in its 4k block of code points.
+ if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
+ break;
+ }
+ }
+ } else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
+ // surrogate code point
+ if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
+ break;
+ }
+ } else {
+ // surrogate pair
+ if(!containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
+ break;
+ }
+ ++s;
+ }
+ } while(++s<limit);
+ } else {
+ // span not
+ do {
+ c=*s;
+ if(c<=0xff) {
+ if(latin1Contains[c]) {
+ break;
+ }
+ } else if(c<=0x7ff) {
+ if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
+ break;
+ }
+ } else if(c<0xd800 || c>=0xe000) {
+ int lead=c>>12;
+ uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
+ if(twoBits<=1) {
+ // All 64 code points with the same bits 15..6
+ // are either in the set or not.
+ if(twoBits!=0) {
+ break;
+ }
+ } else {
+ // Look up the code point in its 4k block of code points.
+ if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
+ break;
+ }
+ }
+ } else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
+ // surrogate code point
+ if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
+ break;
+ }
+ } else {
+ // surrogate pair
+ if(containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
+ break;
+ }
+ ++s;
+ }
+ } while(++s<limit);
+ }
+ return s;
+}
+
+/* Symmetrical with span(). */
+const UChar *
+BMPSet::spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
+ UChar c, c2;
+
+ if(spanCondition) {
+ // span
+ for(;;) {
+ c=*(--limit);
+ if(c<=0xff) {
+ if(!latin1Contains[c]) {
+ break;
+ }
+ } else if(c<=0x7ff) {
+ if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
+ break;
+ }
+ } else if(c<0xd800 || c>=0xe000) {
+ int lead=c>>12;
+ uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
+ if(twoBits<=1) {
+ // All 64 code points with the same bits 15..6
+ // are either in the set or not.
+ if(twoBits==0) {
+ break;
+ }
+ } else {
+ // Look up the code point in its 4k block of code points.
+ if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
+ break;
+ }
+ }
+ } else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
+ // surrogate code point
+ if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
+ break;
+ }
+ } else {
+ // surrogate pair
+ if(!containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
+ break;
+ }
+ --limit;
+ }
+ if(s==limit) {
+ return s;
+ }
+ }
+ } else {
+ // span not
+ for(;;) {
+ c=*(--limit);
+ if(c<=0xff) {
+ if(latin1Contains[c]) {
+ break;
+ }
+ } else if(c<=0x7ff) {
+ if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
+ break;
+ }
+ } else if(c<0xd800 || c>=0xe000) {
+ int lead=c>>12;
+ uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
+ if(twoBits<=1) {
+ // All 64 code points with the same bits 15..6
+ // are either in the set or not.
+ if(twoBits!=0) {
+ break;
+ }
+ } else {
+ // Look up the code point in its 4k block of code points.
+ if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
+ break;
+ }
+ }
+ } else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
+ // surrogate code point
+ if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
+ break;
+ }
+ } else {
+ // surrogate pair
+ if(containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
+ break;
+ }
+ --limit;
+ }
+ if(s==limit) {
+ return s;
+ }
+ }
+ }
+ return limit+1;
+}
+
+/*
+ * Precheck for sufficient trail bytes at end of string only once per span.
+ * Check validity.
+ */
+const uint8_t *
+BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
+ const uint8_t *limit=s+length;
+ uint8_t b=*s;
+ if(U8_IS_SINGLE(b)) {
+ // Initial all-ASCII span.
+ if(spanCondition) {
+ do {
+ if(!latin1Contains[b] || ++s==limit) {
+ return s;
+ }
+ b=*s;
+ } while(U8_IS_SINGLE(b));
+ } else {
+ do {
+ if(latin1Contains[b] || ++s==limit) {
+ return s;
+ }
+ b=*s;
+ } while(U8_IS_SINGLE(b));
+ }
+ length=(int32_t)(limit-s);
+ }
+
+ if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
+ spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
+ }
+
+ const uint8_t *limit0=limit;
+
+ /*
+ * Make sure that the last 1/2/3/4-byte sequence before limit is complete
+ * or runs into a lead byte.
+ * In the span loop compare s with limit only once
+ * per multi-byte character.
+ *
+ * Give a trailing illegal sequence the same value as the result of contains(FFFD),
+ * including it if that is part of the span, otherwise set limit0 to before
+ * the truncated sequence.
+ */
+ b=*(limit-1);
+ if((int8_t)b<0) {
+ // b>=0x80: lead or trail byte
+ if(b<0xc0) {
+ // single trail byte, check for preceding 3- or 4-byte lead byte
+ if(length>=2 && (b=*(limit-2))>=0xe0) {
+ limit-=2;
+ if(containsFFFD!=spanCondition) {
+ limit0=limit;
+ }
+ } else if(b<0xc0 && b>=0x80 && length>=3 && (b=*(limit-3))>=0xf0) {
+ // 4-byte lead byte with only two trail bytes
+ limit-=3;
+ if(containsFFFD!=spanCondition) {
+ limit0=limit;
+ }
+ }
+ } else {
+ // lead byte with no trail bytes
+ --limit;
+ if(containsFFFD!=spanCondition) {
+ limit0=limit;
+ }
+ }
+ }
+
+ uint8_t t1, t2, t3;
+
+ while(s<limit) {
+ b=*s;
+ if(U8_IS_SINGLE(b)) {
+ // ASCII
+ if(spanCondition) {
+ do {
+ if(!latin1Contains[b]) {
+ return s;
+ } else if(++s==limit) {
+ return limit0;
+ }
+ b=*s;
+ } while(U8_IS_SINGLE(b));
+ } else {
+ do {
+ if(latin1Contains[b]) {
+ return s;
+ } else if(++s==limit) {
+ return limit0;
+ }
+ b=*s;
+ } while(U8_IS_SINGLE(b));
+ }
+ }
+ ++s; // Advance past the lead byte.
+ if(b>=0xe0) {
+ if(b<0xf0) {
+ if( /* handle U+0000..U+FFFF inline */
+ (t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
+ (t2=(uint8_t)(s[1]-0x80)) <= 0x3f
+ ) {
+ b&=0xf;
+ uint32_t twoBits=(bmpBlockBits[t1]>>b)&0x10001;
+ if(twoBits<=1) {
+ // All 64 code points with this lead byte and middle trail byte
+ // are either in the set or not.
+ if(twoBits!=(uint32_t)spanCondition) {
+ return s-1;
+ }
+ } else {
+ // Look up the code point in its 4k block of code points.
+ UChar32 c=(b<<12)|(t1<<6)|t2;
+ if(containsSlow(c, list4kStarts[b], list4kStarts[b+1]) != spanCondition) {
+ return s-1;
+ }
+ }
+ s+=2;
+ continue;
+ }
+ } else if( /* handle U+10000..U+10FFFF inline */
+ (t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
+ (t2=(uint8_t)(s[1]-0x80)) <= 0x3f &&
+ (t3=(uint8_t)(s[2]-0x80)) <= 0x3f
+ ) {
+ // Give an illegal sequence the same value as the result of contains(FFFD).
+ UChar32 c=((UChar32)(b-0xf0)<<18)|((UChar32)t1<<12)|(t2<<6)|t3;
+ if( ( (0x10000<=c && c<=0x10ffff) ?
+ containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) :
+ containsFFFD
+ ) != spanCondition
+ ) {
+ return s-1;
+ }
+ s+=3;
+ continue;
+ }
+ } else {
+ if( /* handle U+0000..U+07FF inline */
+ b>=0xc0 &&
+ (t1=(uint8_t)(*s-0x80)) <= 0x3f
+ ) {
+ if((USetSpanCondition)((table7FF[t1]&((uint32_t)1<<(b&0x1f)))!=0) != spanCondition) {
+ return s-1;
+ }
+ ++s;
+ continue;
+ }
+ }
+
+ // Give an illegal sequence the same value as the result of contains(FFFD).
+ // Handle each byte of an illegal sequence separately to simplify the code;
+ // no need to optimize error handling.
+ if(containsFFFD!=spanCondition) {
+ return s-1;
+ }
+ }
+
+ return limit0;
+}
+
+/*
+ * While going backwards through UTF-8 optimize only for ASCII.
+ * Unlike UTF-16, UTF-8 is not forward-backward symmetrical, that is, it is not
+ * possible to tell from the last byte in a multi-byte sequence how many
+ * preceding bytes there should be. Therefore, going backwards through UTF-8
+ * is much harder than going forward.
+ */
+int32_t
+BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
+ if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
+ spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
+ }
+
+ uint8_t b;
+
+ do {
+ b=s[--length];
+ if(U8_IS_SINGLE(b)) {
+ // ASCII sub-span
+ if(spanCondition) {
+ do {
+ if(!latin1Contains[b]) {
+ return length+1;
+ } else if(length==0) {
+ return 0;
+ }
+ b=s[--length];
+ } while(U8_IS_SINGLE(b));
+ } else {
+ do {
+ if(latin1Contains[b]) {
+ return length+1;
+ } else if(length==0) {
+ return 0;
+ }
+ b=s[--length];
+ } while(U8_IS_SINGLE(b));
+ }
+ }
+
+ int32_t prev=length;
+ UChar32 c;
+ // trail byte: collect a multi-byte character
+ // (or lead byte in last-trail position)
+ c=utf8_prevCharSafeBody(s, 0, &length, b, -3);
+ // c is a valid code point, not ASCII, not a surrogate
+ if(c<=0x7ff) {
+ if((USetSpanCondition)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) != spanCondition) {
+ return prev+1;
+ }
+ } else if(c<=0xffff) {
+ int lead=c>>12;
+ uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
+ if(twoBits<=1) {
+ // All 64 code points with the same bits 15..6
+ // are either in the set or not.
+ if(twoBits!=(uint32_t)spanCondition) {
+ return prev+1;
+ }
+ } else {
+ // Look up the code point in its 4k block of code points.
+ if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]) != spanCondition) {
+ return prev+1;
+ }
+ }
+ } else {
+ if(containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) != spanCondition) {
+ return prev+1;
+ }
+ }
+ } while(length>0);
+ return 0;
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/bmpset.h b/thirdparty/icu4c/common/bmpset.h
new file mode 100644
index 0000000000..e1982ac669
--- /dev/null
+++ b/thirdparty/icu4c/common/bmpset.h
@@ -0,0 +1,164 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: bmpset.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2007jan29
+* created by: Markus W. Scherer
+*/
+
+#ifndef __BMPSET_H__
+#define __BMPSET_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uniset.h"
+
+U_NAMESPACE_BEGIN
+
+/*
+ * Helper class for frozen UnicodeSets, implements contains() and span()
+ * optimized for BMP code points. Structured to be UTF-8-friendly.
+ *
+ * Latin-1: Look up bytes.
+ * 2-byte characters: Bits organized vertically.
+ * 3-byte characters: Use zero/one/mixed data per 64-block in U+0000..U+FFFF,
+ * with mixed for illegal ranges.
+ * Supplementary characters: Binary search over
+ * the supplementary part of the parent set's inversion list.
+ */
+class BMPSet : public UMemory {
+public:
+ BMPSet(const int32_t *parentList, int32_t parentListLength);
+ BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength);
+ virtual ~BMPSet();
+
+ virtual UBool contains(UChar32 c) const;
+
+ /*
+ * Span the initial substring for which each character c has spanCondition==contains(c).
+ * It must be s<limit and spanCondition==0 or 1.
+ * @return The string pointer which limits the span.
+ */
+ const UChar *span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const;
+
+ /*
+ * Span the trailing substring for which each character c has spanCondition==contains(c).
+ * It must be s<limit and spanCondition==0 or 1.
+ * @return The string pointer which starts the span.
+ */
+ const UChar *spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const;
+
+ /*
+ * Span the initial substring for which each character c has spanCondition==contains(c).
+ * It must be length>0 and spanCondition==0 or 1.
+ * @return The string pointer which limits the span.
+ */
+ const uint8_t *spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
+
+ /*
+ * Span the trailing substring for which each character c has spanCondition==contains(c).
+ * It must be length>0 and spanCondition==0 or 1.
+ * @return The start of the span.
+ */
+ int32_t spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
+
+private:
+ void initBits();
+ void overrideIllegal();
+
+ /**
+ * Same as UnicodeSet::findCodePoint(UChar32 c) const except that the
+ * binary search is restricted for finding code points in a certain range.
+ *
+ * For restricting the search for finding in the range start..end,
+ * pass in
+ * lo=findCodePoint(start) and
+ * hi=findCodePoint(end)
+ * with 0<=lo<=hi<len.
+ * findCodePoint(c) defaults to lo=0 and hi=len-1.
+ *
+ * @param c a character in a subrange of MIN_VALUE..MAX_VALUE
+ * @param lo The lowest index to be returned.
+ * @param hi The highest index to be returned.
+ * @return the smallest integer i in the range lo..hi,
+ * inclusive, such that c < list[i]
+ */
+ int32_t findCodePoint(UChar32 c, int32_t lo, int32_t hi) const;
+
+ inline UBool containsSlow(UChar32 c, int32_t lo, int32_t hi) const;
+
+ /*
+ * One byte 0 or 1 per Latin-1 character.
+ */
+ UBool latin1Contains[0x100];
+
+ /* true if contains(U+FFFD). */
+ UBool containsFFFD;
+
+ /*
+ * One bit per code point from U+0000..U+07FF.
+ * The bits are organized vertically; consecutive code points
+ * correspond to the same bit positions in consecutive table words.
+ * With code point parts
+ * lead=c{10..6}
+ * trail=c{5..0}
+ * it is set.contains(c)==(table7FF[trail] bit lead)
+ *
+ * Bits for 0..7F (non-shortest forms) are set to the result of contains(FFFD)
+ * for faster validity checking at runtime.
+ */
+ uint32_t table7FF[64];
+
+ /*
+ * One bit per 64 BMP code points.
+ * The bits are organized vertically; consecutive 64-code point blocks
+ * correspond to the same bit position in consecutive table words.
+ * With code point parts
+ * lead=c{15..12}
+ * t1=c{11..6}
+ * test bits (lead+16) and lead in bmpBlockBits[t1].
+ * If the upper bit is 0, then the lower bit indicates if contains(c)
+ * for all code points in the 64-block.
+ * If the upper bit is 1, then the block is mixed and set.contains(c)
+ * must be called.
+ *
+ * Bits for 0..7FF (non-shortest forms) and D800..DFFF are set to
+ * the result of contains(FFFD) for faster validity checking at runtime.
+ */
+ uint32_t bmpBlockBits[64];
+
+ /*
+ * Inversion list indexes for restricted binary searches in
+ * findCodePoint(), from
+ * findCodePoint(U+0800, U+1000, U+2000, .., U+F000, U+10000).
+ * U+0800 is the first 3-byte-UTF-8 code point. Code points below U+0800 are
+ * always looked up in the bit tables.
+ * The last pair of indexes is for finding supplementary code points.
+ */
+ int32_t list4kStarts[18];
+
+ /*
+ * The inversion list of the parent set, for the slower contains() implementation
+ * for mixed BMP blocks and for supplementary code points.
+ * The list is terminated with list[listLength-1]=0x110000.
+ */
+ const int32_t *list;
+ int32_t listLength;
+};
+
+inline UBool BMPSet::containsSlow(UChar32 c, int32_t lo, int32_t hi) const {
+ return (UBool)(findCodePoint(c, lo, hi) & 1);
+}
+
+U_NAMESPACE_END
+
+#endif
diff --git a/thirdparty/icu4c/common/brkeng.cpp b/thirdparty/icu4c/common/brkeng.cpp
new file mode 100644
index 0000000000..78492db662
--- /dev/null
+++ b/thirdparty/icu4c/common/brkeng.cpp
@@ -0,0 +1,284 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ ************************************************************************************
+ * Copyright (C) 2006-2016, International Business Machines Corporation
+ * and others. All Rights Reserved.
+ ************************************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/uchar.h"
+#include "unicode/uniset.h"
+#include "unicode/chariter.h"
+#include "unicode/ures.h"
+#include "unicode/udata.h"
+#include "unicode/putil.h"
+#include "unicode/ustring.h"
+#include "unicode/uscript.h"
+#include "unicode/ucharstrie.h"
+#include "unicode/bytestrie.h"
+
+#include "brkeng.h"
+#include "cmemory.h"
+#include "dictbe.h"
+#include "charstr.h"
+#include "dictionarydata.h"
+#include "mutex.h"
+#include "uvector.h"
+#include "umutex.h"
+#include "uresimp.h"
+#include "ubrkimpl.h"
+
+U_NAMESPACE_BEGIN
+
+/*
+ ******************************************************************
+ */
+
+LanguageBreakEngine::LanguageBreakEngine() {
+}
+
+LanguageBreakEngine::~LanguageBreakEngine() {
+}
+
+/*
+ ******************************************************************
+ */
+
+LanguageBreakFactory::LanguageBreakFactory() {
+}
+
+LanguageBreakFactory::~LanguageBreakFactory() {
+}
+
+/*
+ ******************************************************************
+ */
+
+UnhandledEngine::UnhandledEngine(UErrorCode &status) : fHandled(nullptr) {
+ (void)status;
+}
+
+UnhandledEngine::~UnhandledEngine() {
+ delete fHandled;
+ fHandled = nullptr;
+}
+
+UBool
+UnhandledEngine::handles(UChar32 c) const {
+ return fHandled && fHandled->contains(c);
+}
+
+int32_t
+UnhandledEngine::findBreaks( UText *text,
+ int32_t /* startPos */,
+ int32_t endPos,
+ UVector32 &/*foundBreaks*/ ) const {
+ UChar32 c = utext_current32(text);
+ while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) {
+ utext_next32(text); // TODO: recast loop to work with post-increment operations.
+ c = utext_current32(text);
+ }
+ return 0;
+}
+
+void
+UnhandledEngine::handleCharacter(UChar32 c) {
+ if (fHandled == nullptr) {
+ fHandled = new UnicodeSet();
+ if (fHandled == nullptr) {
+ return;
+ }
+ }
+ if (!fHandled->contains(c)) {
+ UErrorCode status = U_ZERO_ERROR;
+ // Apply the entire script of the character.
+ int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT);
+ fHandled->applyIntPropertyValue(UCHAR_SCRIPT, script, status);
+ }
+}
+
+/*
+ ******************************************************************
+ */
+
+ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) {
+ fEngines = 0;
+}
+
+ICULanguageBreakFactory::~ICULanguageBreakFactory() {
+ if (fEngines != 0) {
+ delete fEngines;
+ }
+}
+
+U_NAMESPACE_END
+U_CDECL_BEGIN
+static void U_CALLCONV _deleteEngine(void *obj) {
+ delete (const icu::LanguageBreakEngine *) obj;
+}
+U_CDECL_END
+U_NAMESPACE_BEGIN
+
+const LanguageBreakEngine *
+ICULanguageBreakFactory::getEngineFor(UChar32 c) {
+ const LanguageBreakEngine *lbe = NULL;
+ UErrorCode status = U_ZERO_ERROR;
+
+ static UMutex gBreakEngineMutex;
+ Mutex m(&gBreakEngineMutex);
+
+ if (fEngines == NULL) {
+ UStack *engines = new UStack(_deleteEngine, NULL, status);
+ if (U_FAILURE(status) || engines == NULL) {
+ // Note: no way to return error code to caller.
+ delete engines;
+ return NULL;
+ }
+ fEngines = engines;
+ } else {
+ int32_t i = fEngines->size();
+ while (--i >= 0) {
+ lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
+ if (lbe != NULL && lbe->handles(c)) {
+ return lbe;
+ }
+ }
+ }
+
+ // We didn't find an engine. Create one.
+ lbe = loadEngineFor(c);
+ if (lbe != NULL) {
+ fEngines->push((void *)lbe, status);
+ }
+ return lbe;
+}
+
+const LanguageBreakEngine *
+ICULanguageBreakFactory::loadEngineFor(UChar32 c) {
+ UErrorCode status = U_ZERO_ERROR;
+ UScriptCode code = uscript_getScript(c, &status);
+ if (U_SUCCESS(status)) {
+ DictionaryMatcher *m = loadDictionaryMatcherFor(code);
+ if (m != NULL) {
+ const LanguageBreakEngine *engine = NULL;
+ switch(code) {
+ case USCRIPT_THAI:
+ engine = new ThaiBreakEngine(m, status);
+ break;
+ case USCRIPT_LAO:
+ engine = new LaoBreakEngine(m, status);
+ break;
+ case USCRIPT_MYANMAR:
+ engine = new BurmeseBreakEngine(m, status);
+ break;
+ case USCRIPT_KHMER:
+ engine = new KhmerBreakEngine(m, status);
+ break;
+
+#if !UCONFIG_NO_NORMALIZATION
+ // CJK not available w/o normalization
+ case USCRIPT_HANGUL:
+ engine = new CjkBreakEngine(m, kKorean, status);
+ break;
+
+ // use same BreakEngine and dictionary for both Chinese and Japanese
+ case USCRIPT_HIRAGANA:
+ case USCRIPT_KATAKANA:
+ case USCRIPT_HAN:
+ engine = new CjkBreakEngine(m, kChineseJapanese, status);
+ break;
+#if 0
+ // TODO: Have to get some characters with script=common handled
+ // by CjkBreakEngine (e.g. U+309B). Simply subjecting
+ // them to CjkBreakEngine does not work. The engine has to
+ // special-case them.
+ case USCRIPT_COMMON:
+ {
+ UBlockCode block = ublock_getCode(code);
+ if (block == UBLOCK_HIRAGANA || block == UBLOCK_KATAKANA)
+ engine = new CjkBreakEngine(dict, kChineseJapanese, status);
+ break;
+ }
+#endif
+#endif
+
+ default:
+ break;
+ }
+ if (engine == NULL) {
+ delete m;
+ }
+ else if (U_FAILURE(status)) {
+ delete engine;
+ engine = NULL;
+ }
+ return engine;
+ }
+ }
+ return NULL;
+}
+
+DictionaryMatcher *
+ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) {
+ UErrorCode status = U_ZERO_ERROR;
+ // open root from brkitr tree.
+ UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);
+ b = ures_getByKeyWithFallback(b, "dictionaries", b, &status);
+ int32_t dictnlength = 0;
+ const UChar *dictfname =
+ ures_getStringByKeyWithFallback(b, uscript_getShortName(script), &dictnlength, &status);
+ if (U_FAILURE(status)) {
+ ures_close(b);
+ return NULL;
+ }
+ CharString dictnbuf;
+ CharString ext;
+ const UChar *extStart = u_memrchr(dictfname, 0x002e, dictnlength); // last dot
+ if (extStart != NULL) {
+ int32_t len = (int32_t)(extStart - dictfname);
+ ext.appendInvariantChars(UnicodeString(FALSE, extStart + 1, dictnlength - len - 1), status);
+ dictnlength = len;
+ }
+ dictnbuf.appendInvariantChars(UnicodeString(FALSE, dictfname, dictnlength), status);
+ ures_close(b);
+
+ UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext.data(), dictnbuf.data(), &status);
+ if (U_SUCCESS(status)) {
+ // build trie
+ const uint8_t *data = (const uint8_t *)udata_getMemory(file);
+ const int32_t *indexes = (const int32_t *)data;
+ const int32_t offset = indexes[DictionaryData::IX_STRING_TRIE_OFFSET];
+ const int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK;
+ DictionaryMatcher *m = NULL;
+ if (trieType == DictionaryData::TRIE_TYPE_BYTES) {
+ const int32_t transform = indexes[DictionaryData::IX_TRANSFORM];
+ const char *characters = (const char *)(data + offset);
+ m = new BytesDictionaryMatcher(characters, transform, file);
+ }
+ else if (trieType == DictionaryData::TRIE_TYPE_UCHARS) {
+ const UChar *characters = (const UChar *)(data + offset);
+ m = new UCharsDictionaryMatcher(characters, file);
+ }
+ if (m == NULL) {
+ // no matcher exists to take ownership - either we are an invalid
+ // type or memory allocation failed
+ udata_close(file);
+ }
+ return m;
+ } else if (dictfname != NULL) {
+ // we don't have a dictionary matcher.
+ // returning NULL here will cause us to fail to find a dictionary break engine, as expected
+ status = U_ZERO_ERROR;
+ return NULL;
+ }
+ return NULL;
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
diff --git a/thirdparty/icu4c/common/brkeng.h b/thirdparty/icu4c/common/brkeng.h
new file mode 100644
index 0000000000..155433b89a
--- /dev/null
+++ b/thirdparty/icu4c/common/brkeng.h
@@ -0,0 +1,271 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/**
+ ************************************************************************************
+ * Copyright (C) 2006-2012, International Business Machines Corporation and others. *
+ * All Rights Reserved. *
+ ************************************************************************************
+ */
+
+#ifndef BRKENG_H
+#define BRKENG_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "unicode/utext.h"
+#include "unicode/uscript.h"
+
+U_NAMESPACE_BEGIN
+
+class UnicodeSet;
+class UStack;
+class UVector32;
+class DictionaryMatcher;
+
+/*******************************************************************
+ * LanguageBreakEngine
+ */
+
+/**
+ * <p>LanguageBreakEngines implement language-specific knowledge for
+ * finding text boundaries within a run of characters belonging to a
+ * specific set. The boundaries will be of a specific kind, e.g. word,
+ * line, etc.</p>
+ *
+ * <p>LanguageBreakEngines should normally be implemented so as to
+ * be shared between threads without locking.</p>
+ */
+class LanguageBreakEngine : public UMemory {
+ public:
+
+ /**
+ * <p>Default constructor.</p>
+ *
+ */
+ LanguageBreakEngine();
+
+ /**
+ * <p>Virtual destructor.</p>
+ */
+ virtual ~LanguageBreakEngine();
+
+ /**
+ * <p>Indicate whether this engine handles a particular character for
+ * a particular kind of break.</p>
+ *
+ * @param c A character which begins a run that the engine might handle
+ * @return true if this engine handles the particular character and break
+ * type.
+ */
+ virtual UBool handles(UChar32 c) const = 0;
+
+ /**
+ * <p>Find any breaks within a run in the supplied text.</p>
+ *
+ * @param text A UText representing the text. The
+ * iterator is left at the end of the run of characters which the engine
+ * is capable of handling.
+ * @param startPos The start of the run within the supplied text.
+ * @param endPos The end of the run within the supplied text.
+ * @param foundBreaks A Vector of int32_t to receive the breaks.
+ * @return The number of breaks found.
+ */
+ virtual int32_t findBreaks( UText *text,
+ int32_t startPos,
+ int32_t endPos,
+ UVector32 &foundBreaks ) const = 0;
+
+};
+
+/*******************************************************************
+ * LanguageBreakFactory
+ */
+
+/**
+ * <p>LanguageBreakFactorys find and return a LanguageBreakEngine
+ * that can determine breaks for characters in a specific set, if
+ * such an object can be found.</p>
+ *
+ * <p>If a LanguageBreakFactory is to be shared between threads,
+ * appropriate synchronization must be used; there is none internal
+ * to the factory.</p>
+ *
+ * <p>A LanguageBreakEngine returned by a LanguageBreakFactory can
+ * normally be shared between threads without synchronization, unless
+ * the specific subclass of LanguageBreakFactory indicates otherwise.</p>
+ *
+ * <p>A LanguageBreakFactory is responsible for deleting any LanguageBreakEngine
+ * it returns when it itself is deleted, unless the specific subclass of
+ * LanguageBreakFactory indicates otherwise. Naturally, the factory should
+ * not be deleted until the LanguageBreakEngines it has returned are no
+ * longer needed.</p>
+ */
+class LanguageBreakFactory : public UMemory {
+ public:
+
+ /**
+ * <p>Default constructor.</p>
+ *
+ */
+ LanguageBreakFactory();
+
+ /**
+ * <p>Virtual destructor.</p>
+ */
+ virtual ~LanguageBreakFactory();
+
+ /**
+ * <p>Find and return a LanguageBreakEngine that can find the desired
+ * kind of break for the set of characters to which the supplied
+ * character belongs. It is up to the set of available engines to
+ * determine what the sets of characters are.</p>
+ *
+ * @param c A character that begins a run for which a LanguageBreakEngine is
+ * sought.
+ * @return A LanguageBreakEngine with the desired characteristics, or 0.
+ */
+ virtual const LanguageBreakEngine *getEngineFor(UChar32 c) = 0;
+
+};
+
+/*******************************************************************
+ * UnhandledEngine
+ */
+
+/**
+ * <p>UnhandledEngine is a special subclass of LanguageBreakEngine that
+ * handles characters that no other LanguageBreakEngine is available to
+ * handle. It is told the character and the type of break; at its
+ * discretion it may handle more than the specified character (e.g.,
+ * the entire script to which that character belongs.</p>
+ *
+ * <p>UnhandledEngines may not be shared between threads without
+ * external synchronization.</p>
+ */
+
+class UnhandledEngine : public LanguageBreakEngine {
+ private:
+
+ /**
+ * The sets of characters handled.
+ * @internal
+ */
+
+ UnicodeSet *fHandled;
+
+ public:
+
+ /**
+ * <p>Default constructor.</p>
+ *
+ */
+ UnhandledEngine(UErrorCode &status);
+
+ /**
+ * <p>Virtual destructor.</p>
+ */
+ virtual ~UnhandledEngine();
+
+ /**
+ * <p>Indicate whether this engine handles a particular character for
+ * a particular kind of break.</p>
+ *
+ * @param c A character which begins a run that the engine might handle
+ * @return true if this engine handles the particular character and break
+ * type.
+ */
+ virtual UBool handles(UChar32 c) const;
+
+ /**
+ * <p>Find any breaks within a run in the supplied text.</p>
+ *
+ * @param text A UText representing the text (TODO: UText). The
+ * iterator is left at the end of the run of characters which the engine
+ * is capable of handling.
+ * @param startPos The start of the run within the supplied text.
+ * @param endPos The end of the run within the supplied text.
+ * @param foundBreaks An allocated C array of the breaks found, if any
+ * @return The number of breaks found.
+ */
+ virtual int32_t findBreaks( UText *text,
+ int32_t startPos,
+ int32_t endPos,
+ UVector32 &foundBreaks ) const;
+
+ /**
+ * <p>Tell the engine to handle a particular character and break type.</p>
+ *
+ * @param c A character which the engine should handle
+ */
+ virtual void handleCharacter(UChar32 c);
+
+};
+
+/*******************************************************************
+ * ICULanguageBreakFactory
+ */
+
+/**
+ * <p>ICULanguageBreakFactory is the default LanguageBreakFactory for
+ * ICU. It creates dictionary-based LanguageBreakEngines from dictionary
+ * data in the ICU data file.</p>
+ */
+class ICULanguageBreakFactory : public LanguageBreakFactory {
+ private:
+
+ /**
+ * The stack of break engines created by this factory
+ * @internal
+ */
+
+ UStack *fEngines;
+
+ public:
+
+ /**
+ * <p>Standard constructor.</p>
+ *
+ */
+ ICULanguageBreakFactory(UErrorCode &status);
+
+ /**
+ * <p>Virtual destructor.</p>
+ */
+ virtual ~ICULanguageBreakFactory();
+
+ /**
+ * <p>Find and return a LanguageBreakEngine that can find the desired
+ * kind of break for the set of characters to which the supplied
+ * character belongs. It is up to the set of available engines to
+ * determine what the sets of characters are.</p>
+ *
+ * @param c A character that begins a run for which a LanguageBreakEngine is
+ * sought.
+ * @return A LanguageBreakEngine with the desired characteristics, or 0.
+ */
+ virtual const LanguageBreakEngine *getEngineFor(UChar32 c);
+
+protected:
+ /**
+ * <p>Create a LanguageBreakEngine for the set of characters to which
+ * the supplied character belongs, for the specified break type.</p>
+ *
+ * @param c A character that begins a run for which a LanguageBreakEngine is
+ * sought.
+ * @return A LanguageBreakEngine with the desired characteristics, or 0.
+ */
+ virtual const LanguageBreakEngine *loadEngineFor(UChar32 c);
+
+ /**
+ * <p>Create a DictionaryMatcher for the specified script and break type.</p>
+ * @param script An ISO 15924 script code that identifies the dictionary to be
+ * created.
+ * @return A DictionaryMatcher with the desired characteristics, or NULL.
+ */
+ virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script);
+};
+
+U_NAMESPACE_END
+
+ /* BRKENG_H */
+#endif
diff --git a/thirdparty/icu4c/common/brkiter.cpp b/thirdparty/icu4c/common/brkiter.cpp
new file mode 100644
index 0000000000..b9b6ca65cd
--- /dev/null
+++ b/thirdparty/icu4c/common/brkiter.cpp
@@ -0,0 +1,527 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 1997-2015, International Business Machines Corporation and
+* others. All Rights Reserved.
+*******************************************************************************
+*
+* File brkiter.cpp
+*
+* Modification History:
+*
+* Date Name Description
+* 02/18/97 aliu Converted from OpenClass. Added DONE.
+* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
+*****************************************************************************************
+*/
+
+// *****************************************************************************
+// This file was generated from the java source file BreakIterator.java
+// *****************************************************************************
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/rbbi.h"
+#include "unicode/brkiter.h"
+#include "unicode/udata.h"
+#include "unicode/ures.h"
+#include "unicode/ustring.h"
+#include "unicode/filteredbrk.h"
+#include "ucln_cmn.h"
+#include "cstring.h"
+#include "umutex.h"
+#include "servloc.h"
+#include "locbased.h"
+#include "uresimp.h"
+#include "uassert.h"
+#include "ubrkimpl.h"
+#include "utracimp.h"
+#include "charstr.h"
+
+// *****************************************************************************
+// class BreakIterator
+// This class implements methods for finding the location of boundaries in text.
+// Instances of BreakIterator maintain a current position and scan over text
+// returning the index of characters where boundaries occur.
+// *****************************************************************************
+
+U_NAMESPACE_BEGIN
+
+// -------------------------------------
+
+BreakIterator*
+BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &status)
+{
+ char fnbuff[256];
+ char ext[4]={'\0'};
+ CharString actualLocale;
+ int32_t size;
+ const UChar* brkfname = NULL;
+ UResourceBundle brkRulesStack;
+ UResourceBundle brkNameStack;
+ UResourceBundle *brkRules = &brkRulesStack;
+ UResourceBundle *brkName = &brkNameStack;
+ RuleBasedBreakIterator *result = NULL;
+
+ if (U_FAILURE(status))
+ return NULL;
+
+ ures_initStackObject(brkRules);
+ ures_initStackObject(brkName);
+
+ // Get the locale
+ UResourceBundle *b = ures_openNoDefault(U_ICUDATA_BRKITR, loc.getName(), &status);
+
+ // Get the "boundaries" array.
+ if (U_SUCCESS(status)) {
+ brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status);
+ // Get the string object naming the rules file
+ brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status);
+ // Get the actual string
+ brkfname = ures_getString(brkName, &size, &status);
+ U_ASSERT((size_t)size<sizeof(fnbuff));
+ if ((size_t)size>=sizeof(fnbuff)) {
+ size=0;
+ if (U_SUCCESS(status)) {
+ status = U_BUFFER_OVERFLOW_ERROR;
+ }
+ }
+
+ // Use the string if we found it
+ if (U_SUCCESS(status) && brkfname) {
+ actualLocale.append(ures_getLocaleInternal(brkName, &status), -1, status);
+
+ UChar* extStart=u_strchr(brkfname, 0x002e);
+ int len = 0;
+ if(extStart!=NULL){
+ len = (int)(extStart-brkfname);
+ u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff
+ u_UCharsToChars(brkfname, fnbuff, len);
+ }
+ fnbuff[len]=0; // nul terminate
+ }
+ }
+
+ ures_close(brkRules);
+ ures_close(brkName);
+
+ UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status);
+ if (U_FAILURE(status)) {
+ ures_close(b);
+ return NULL;
+ }
+
+ // Create a RuleBasedBreakIterator
+ result = new RuleBasedBreakIterator(file, status);
+
+ // If there is a result, set the valid locale and actual locale, and the kind
+ if (U_SUCCESS(status) && result != NULL) {
+ U_LOCALE_BASED(locBased, *(BreakIterator*)result);
+ locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status),
+ actualLocale.data());
+ }
+
+ ures_close(b);
+
+ if (U_FAILURE(status) && result != NULL) { // Sometimes redundant check, but simple
+ delete result;
+ return NULL;
+ }
+
+ if (result == NULL) {
+ udata_close(file);
+ if (U_SUCCESS(status)) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ }
+
+ return result;
+}
+
+// Creates a break iterator for word breaks.
+BreakIterator* U_EXPORT2
+BreakIterator::createWordInstance(const Locale& key, UErrorCode& status)
+{
+ return createInstance(key, UBRK_WORD, status);
+}
+
+// -------------------------------------
+
+// Creates a break iterator for line breaks.
+BreakIterator* U_EXPORT2
+BreakIterator::createLineInstance(const Locale& key, UErrorCode& status)
+{
+ return createInstance(key, UBRK_LINE, status);
+}
+
+// -------------------------------------
+
+// Creates a break iterator for character breaks.
+BreakIterator* U_EXPORT2
+BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status)
+{
+ return createInstance(key, UBRK_CHARACTER, status);
+}
+
+// -------------------------------------
+
+// Creates a break iterator for sentence breaks.
+BreakIterator* U_EXPORT2
+BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status)
+{
+ return createInstance(key, UBRK_SENTENCE, status);
+}
+
+// -------------------------------------
+
+// Creates a break iterator for title casing breaks.
+BreakIterator* U_EXPORT2
+BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status)
+{
+ return createInstance(key, UBRK_TITLE, status);
+}
+
+// -------------------------------------
+
+// Gets all the available locales that has localized text boundary data.
+const Locale* U_EXPORT2
+BreakIterator::getAvailableLocales(int32_t& count)
+{
+ return Locale::getAvailableLocales(count);
+}
+
+// ------------------------------------------
+//
+// Constructors, destructor and assignment operator
+//
+//-------------------------------------------
+
+BreakIterator::BreakIterator()
+{
+ *validLocale = *actualLocale = 0;
+}
+
+BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other) {
+ uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
+ uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
+}
+
+BreakIterator &BreakIterator::operator =(const BreakIterator &other) {
+ if (this != &other) {
+ uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
+ uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
+ }
+ return *this;
+}
+
+BreakIterator::~BreakIterator()
+{
+}
+
+// ------------------------------------------
+//
+// Registration
+//
+//-------------------------------------------
+#if !UCONFIG_NO_SERVICE
+
+// -------------------------------------
+
+class ICUBreakIteratorFactory : public ICUResourceBundleFactory {
+public:
+ virtual ~ICUBreakIteratorFactory();
+protected:
+ virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const {
+ return BreakIterator::makeInstance(loc, kind, status);
+ }
+};
+
+ICUBreakIteratorFactory::~ICUBreakIteratorFactory() {}
+
+// -------------------------------------
+
+class ICUBreakIteratorService : public ICULocaleService {
+public:
+ ICUBreakIteratorService()
+ : ICULocaleService(UNICODE_STRING("Break Iterator", 14))
+ {
+ UErrorCode status = U_ZERO_ERROR;
+ registerFactory(new ICUBreakIteratorFactory(), status);
+ }
+
+ virtual ~ICUBreakIteratorService();
+
+ virtual UObject* cloneInstance(UObject* instance) const {
+ return ((BreakIterator*)instance)->clone();
+ }
+
+ virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const {
+ LocaleKey& lkey = (LocaleKey&)key;
+ int32_t kind = lkey.kind();
+ Locale loc;
+ lkey.currentLocale(loc);
+ return BreakIterator::makeInstance(loc, kind, status);
+ }
+
+ virtual UBool isDefault() const {
+ return countFactories() == 1;
+ }
+};
+
+ICUBreakIteratorService::~ICUBreakIteratorService() {}
+
+// -------------------------------------
+
+// defined in ucln_cmn.h
+U_NAMESPACE_END
+
+static icu::UInitOnce gInitOnceBrkiter = U_INITONCE_INITIALIZER;
+static icu::ICULocaleService* gService = NULL;
+
+
+
+/**
+ * Release all static memory held by breakiterator.
+ */
+U_CDECL_BEGIN
+static UBool U_CALLCONV breakiterator_cleanup(void) {
+#if !UCONFIG_NO_SERVICE
+ if (gService) {
+ delete gService;
+ gService = NULL;
+ }
+ gInitOnceBrkiter.reset();
+#endif
+ return TRUE;
+}
+U_CDECL_END
+U_NAMESPACE_BEGIN
+
+static void U_CALLCONV
+initService(void) {
+ gService = new ICUBreakIteratorService();
+ ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup);
+}
+
+static ICULocaleService*
+getService(void)
+{
+ umtx_initOnce(gInitOnceBrkiter, &initService);
+ return gService;
+}
+
+
+// -------------------------------------
+
+static inline UBool
+hasService(void)
+{
+ return !gInitOnceBrkiter.isReset() && getService() != NULL;
+}
+
+// -------------------------------------
+
+URegistryKey U_EXPORT2
+BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status)
+{
+ ICULocaleService *service = getService();
+ if (service == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ return service->registerInstance(toAdopt, locale, kind, status);
+}
+
+// -------------------------------------
+
+UBool U_EXPORT2
+BreakIterator::unregister(URegistryKey key, UErrorCode& status)
+{
+ if (U_SUCCESS(status)) {
+ if (hasService()) {
+ return gService->unregister(key, status);
+ }
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ return FALSE;
+}
+
+// -------------------------------------
+
+StringEnumeration* U_EXPORT2
+BreakIterator::getAvailableLocales(void)
+{
+ ICULocaleService *service = getService();
+ if (service == NULL) {
+ return NULL;
+ }
+ return service->getAvailableLocales();
+}
+#endif /* UCONFIG_NO_SERVICE */
+
+// -------------------------------------
+
+BreakIterator*
+BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status)
+{
+ if (U_FAILURE(status)) {
+ return NULL;
+ }
+
+#if !UCONFIG_NO_SERVICE
+ if (hasService()) {
+ Locale actualLoc("");
+ BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status);
+ // TODO: The way the service code works in ICU 2.8 is that if
+ // there is a real registered break iterator, the actualLoc
+ // will be populated, but if the handleDefault path is taken
+ // (because nothing is registered that can handle the
+ // requested locale) then the actualLoc comes back empty. In
+ // that case, the returned object already has its actual/valid
+ // locale data populated (by makeInstance, which is what
+ // handleDefault calls), so we don't touch it. YES, A COMMENT
+ // THIS LONG is a sign of bad code -- so the action item is to
+ // revisit this in ICU 3.0 and clean it up/fix it/remove it.
+ if (U_SUCCESS(status) && (result != NULL) && *actualLoc.getName() != 0) {
+ U_LOCALE_BASED(locBased, *result);
+ locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName());
+ }
+ return result;
+ }
+ else
+#endif
+ {
+ return makeInstance(loc, kind, status);
+ }
+}
+
+// -------------------------------------
+enum { kKeyValueLenMax = 32 };
+
+BreakIterator*
+BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
+{
+
+ if (U_FAILURE(status)) {
+ return NULL;
+ }
+ char lbType[kKeyValueLenMax];
+
+ BreakIterator *result = NULL;
+ switch (kind) {
+ case UBRK_CHARACTER:
+ {
+ UTRACE_ENTRY(UTRACE_UBRK_CREATE_CHARACTER);
+ result = BreakIterator::buildInstance(loc, "grapheme", status);
+ UTRACE_EXIT_STATUS(status);
+ }
+ break;
+ case UBRK_WORD:
+ {
+ UTRACE_ENTRY(UTRACE_UBRK_CREATE_WORD);
+ result = BreakIterator::buildInstance(loc, "word", status);
+ UTRACE_EXIT_STATUS(status);
+ }
+ break;
+ case UBRK_LINE:
+ {
+ UTRACE_ENTRY(UTRACE_UBRK_CREATE_LINE);
+ uprv_strcpy(lbType, "line");
+ char lbKeyValue[kKeyValueLenMax] = {0};
+ UErrorCode kvStatus = U_ZERO_ERROR;
+ int32_t kLen = loc.getKeywordValue("lb", lbKeyValue, kKeyValueLenMax, kvStatus);
+ if (U_SUCCESS(kvStatus) && kLen > 0 && (uprv_strcmp(lbKeyValue,"strict")==0 || uprv_strcmp(lbKeyValue,"normal")==0 || uprv_strcmp(lbKeyValue,"loose")==0)) {
+ uprv_strcat(lbType, "_");
+ uprv_strcat(lbType, lbKeyValue);
+ }
+ result = BreakIterator::buildInstance(loc, lbType, status);
+
+ UTRACE_DATA1(UTRACE_INFO, "lb=%s", lbKeyValue);
+ UTRACE_EXIT_STATUS(status);
+ }
+ break;
+ case UBRK_SENTENCE:
+ {
+ UTRACE_ENTRY(UTRACE_UBRK_CREATE_SENTENCE);
+ result = BreakIterator::buildInstance(loc, "sentence", status);
+#if !UCONFIG_NO_FILTERED_BREAK_ITERATION
+ char ssKeyValue[kKeyValueLenMax] = {0};
+ UErrorCode kvStatus = U_ZERO_ERROR;
+ int32_t kLen = loc.getKeywordValue("ss", ssKeyValue, kKeyValueLenMax, kvStatus);
+ if (U_SUCCESS(kvStatus) && kLen > 0 && uprv_strcmp(ssKeyValue,"standard")==0) {
+ FilteredBreakIteratorBuilder* fbiBuilder = FilteredBreakIteratorBuilder::createInstance(loc, kvStatus);
+ if (U_SUCCESS(kvStatus)) {
+ result = fbiBuilder->build(result, status);
+ delete fbiBuilder;
+ }
+ }
+#endif
+ UTRACE_EXIT_STATUS(status);
+ }
+ break;
+ case UBRK_TITLE:
+ {
+ UTRACE_ENTRY(UTRACE_UBRK_CREATE_TITLE);
+ result = BreakIterator::buildInstance(loc, "title", status);
+ UTRACE_EXIT_STATUS(status);
+ }
+ break;
+ default:
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+
+ if (U_FAILURE(status)) {
+ return NULL;
+ }
+
+ return result;
+}
+
+Locale
+BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
+ U_LOCALE_BASED(locBased, *this);
+ return locBased.getLocale(type, status);
+}
+
+const char *
+BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
+ U_LOCALE_BASED(locBased, *this);
+ return locBased.getLocaleID(type, status);
+}
+
+
+// This implementation of getRuleStatus is a do-nothing stub, here to
+// provide a default implementation for any derived BreakIterator classes that
+// do not implement it themselves.
+int32_t BreakIterator::getRuleStatus() const {
+ return 0;
+}
+
+// This implementation of getRuleStatusVec is a do-nothing stub, here to
+// provide a default implementation for any derived BreakIterator classes that
+// do not implement it themselves.
+int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return 0;
+ }
+ if (capacity < 1) {
+ status = U_BUFFER_OVERFLOW_ERROR;
+ return 1;
+ }
+ *fillInVec = 0;
+ return 1;
+}
+
+BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) {
+ U_LOCALE_BASED(locBased, (*this));
+ locBased.setLocaleIDs(valid, actual);
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
+//eof
diff --git a/thirdparty/icu4c/common/bytesinkutil.cpp b/thirdparty/icu4c/common/bytesinkutil.cpp
new file mode 100644
index 0000000000..c64a845f87
--- /dev/null
+++ b/thirdparty/icu4c/common/bytesinkutil.cpp
@@ -0,0 +1,161 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// bytesinkutil.cpp
+// created: 2017sep14 Markus W. Scherer
+
+#include "unicode/utypes.h"
+#include "unicode/bytestream.h"
+#include "unicode/edits.h"
+#include "unicode/stringoptions.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+#include "bytesinkutil.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "uassert.h"
+
+U_NAMESPACE_BEGIN
+
+UBool
+ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Length,
+ ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return FALSE; }
+ char scratch[200];
+ int32_t s8Length = 0;
+ for (int32_t i = 0; i < s16Length;) {
+ int32_t capacity;
+ int32_t desiredCapacity = s16Length - i;
+ if (desiredCapacity < (INT32_MAX / 3)) {
+ desiredCapacity *= 3; // max 3 UTF-8 bytes per UTF-16 code unit
+ } else if (desiredCapacity < (INT32_MAX / 2)) {
+ desiredCapacity *= 2;
+ } else {
+ desiredCapacity = INT32_MAX;
+ }
+ char *buffer = sink.GetAppendBuffer(U8_MAX_LENGTH, desiredCapacity,
+ scratch, UPRV_LENGTHOF(scratch), &capacity);
+ capacity -= U8_MAX_LENGTH - 1;
+ int32_t j = 0;
+ for (; i < s16Length && j < capacity;) {
+ UChar32 c;
+ U16_NEXT_UNSAFE(s16, i, c);
+ U8_APPEND_UNSAFE(buffer, j, c);
+ }
+ if (j > (INT32_MAX - s8Length)) {
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return FALSE;
+ }
+ sink.Append(buffer, j);
+ s8Length += j;
+ }
+ if (edits != nullptr) {
+ edits->addReplace(length, s8Length);
+ }
+ return TRUE;
+}
+
+UBool
+ByteSinkUtil::appendChange(const uint8_t *s, const uint8_t *limit,
+ const char16_t *s16, int32_t s16Length,
+ ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return FALSE; }
+ if ((limit - s) > INT32_MAX) {
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return FALSE;
+ }
+ return appendChange((int32_t)(limit - s), s16, s16Length, sink, edits, errorCode);
+}
+
+void
+ByteSinkUtil::appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits) {
+ char s8[U8_MAX_LENGTH];
+ int32_t s8Length = 0;
+ U8_APPEND_UNSAFE(s8, s8Length, c);
+ if (edits != nullptr) {
+ edits->addReplace(length, s8Length);
+ }
+ sink.Append(s8, s8Length);
+}
+
+namespace {
+
+// See unicode/utf8.h U8_APPEND_UNSAFE().
+inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
+inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
+
+} // namespace
+
+void
+ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) {
+ U_ASSERT(0x80 <= c && c <= 0x7ff); // 2-byte UTF-8
+ char s8[2] = { (char)getTwoByteLead(c), (char)getTwoByteTrail(c) };
+ sink.Append(s8, 2);
+}
+
+void
+ByteSinkUtil::appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
+ ByteSink &sink, uint32_t options, Edits *edits) {
+ U_ASSERT(length > 0);
+ if (edits != nullptr) {
+ edits->addUnchanged(length);
+ }
+ if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
+ sink.Append(reinterpret_cast<const char *>(s), length);
+ }
+}
+
+UBool
+ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,
+ ByteSink &sink, uint32_t options, Edits *edits,
+ UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return FALSE; }
+ if ((limit - s) > INT32_MAX) {
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return FALSE;
+ }
+ int32_t length = (int32_t)(limit - s);
+ if (length > 0) {
+ appendNonEmptyUnchanged(s, length, sink, options, edits);
+ }
+ return TRUE;
+}
+
+CharStringByteSink::CharStringByteSink(CharString* dest) : dest_(*dest) {
+}
+
+CharStringByteSink::~CharStringByteSink() = default;
+
+void
+CharStringByteSink::Append(const char* bytes, int32_t n) {
+ UErrorCode status = U_ZERO_ERROR;
+ dest_.append(bytes, n, status);
+ // Any errors are silently ignored.
+}
+
+char*
+CharStringByteSink::GetAppendBuffer(int32_t min_capacity,
+ int32_t desired_capacity_hint,
+ char* scratch,
+ int32_t scratch_capacity,
+ int32_t* result_capacity) {
+ if (min_capacity < 1 || scratch_capacity < min_capacity) {
+ *result_capacity = 0;
+ return nullptr;
+ }
+
+ UErrorCode status = U_ZERO_ERROR;
+ char* result = dest_.getAppendBuffer(
+ min_capacity,
+ desired_capacity_hint,
+ *result_capacity,
+ status);
+ if (U_SUCCESS(status)) {
+ return result;
+ }
+
+ *result_capacity = scratch_capacity;
+ return scratch;
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/bytesinkutil.h b/thirdparty/icu4c/common/bytesinkutil.h
new file mode 100644
index 0000000000..ab2516432d
--- /dev/null
+++ b/thirdparty/icu4c/common/bytesinkutil.h
@@ -0,0 +1,83 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// bytesinkutil.h
+// created: 2017sep14 Markus W. Scherer
+
+#include "unicode/utypes.h"
+#include "unicode/bytestream.h"
+#include "unicode/edits.h"
+#include "cmemory.h"
+#include "uassert.h"
+
+U_NAMESPACE_BEGIN
+
+class ByteSink;
+class CharString;
+class Edits;
+
+class U_COMMON_API ByteSinkUtil {
+public:
+ ByteSinkUtil() = delete; // all static
+
+ /** (length) bytes were mapped to valid (s16, s16Length). */
+ static UBool appendChange(int32_t length,
+ const char16_t *s16, int32_t s16Length,
+ ByteSink &sink, Edits *edits, UErrorCode &errorCode);
+
+ /** The bytes at [s, limit[ were mapped to valid (s16, s16Length). */
+ static UBool appendChange(const uint8_t *s, const uint8_t *limit,
+ const char16_t *s16, int32_t s16Length,
+ ByteSink &sink, Edits *edits, UErrorCode &errorCode);
+
+ /** (length) bytes were mapped/changed to valid code point c. */
+ static void appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits = nullptr);
+
+ /** The few bytes at [src, nextSrc[ were mapped/changed to valid code point c. */
+ static inline void appendCodePoint(const uint8_t *src, const uint8_t *nextSrc, UChar32 c,
+ ByteSink &sink, Edits *edits = nullptr) {
+ appendCodePoint((int32_t)(nextSrc - src), c, sink, edits);
+ }
+
+ /** Append the two-byte character (U+0080..U+07FF). */
+ static void appendTwoBytes(UChar32 c, ByteSink &sink);
+
+ static UBool appendUnchanged(const uint8_t *s, int32_t length,
+ ByteSink &sink, uint32_t options, Edits *edits,
+ UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return false; }
+ if (length > 0) { appendNonEmptyUnchanged(s, length, sink, options, edits); }
+ return true;
+ }
+
+ static UBool appendUnchanged(const uint8_t *s, const uint8_t *limit,
+ ByteSink &sink, uint32_t options, Edits *edits,
+ UErrorCode &errorCode);
+
+private:
+ static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
+ ByteSink &sink, uint32_t options, Edits *edits);
+};
+
+class U_COMMON_API CharStringByteSink : public ByteSink {
+public:
+ CharStringByteSink(CharString* dest);
+ ~CharStringByteSink() override;
+
+ CharStringByteSink() = delete;
+ CharStringByteSink(const CharStringByteSink&) = delete;
+ CharStringByteSink& operator=(const CharStringByteSink&) = delete;
+
+ void Append(const char* bytes, int32_t n) override;
+
+ char* GetAppendBuffer(int32_t min_capacity,
+ int32_t desired_capacity_hint,
+ char* scratch,
+ int32_t scratch_capacity,
+ int32_t* result_capacity) override;
+
+private:
+ CharString& dest_;
+};
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/bytestream.cpp b/thirdparty/icu4c/common/bytestream.cpp
new file mode 100644
index 0000000000..0d0e4dda39
--- /dev/null
+++ b/thirdparty/icu4c/common/bytestream.cpp
@@ -0,0 +1,85 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+// Copyright (C) 2009-2011, International Business Machines
+// Corporation and others. All Rights Reserved.
+//
+// Copyright 2007 Google Inc. All Rights Reserved.
+// Author: sanjay@google.com (Sanjay Ghemawat)
+
+#include "unicode/utypes.h"
+#include "unicode/bytestream.h"
+#include "cmemory.h"
+
+U_NAMESPACE_BEGIN
+
+ByteSink::~ByteSink() {}
+
+char* ByteSink::GetAppendBuffer(int32_t min_capacity,
+ int32_t /*desired_capacity_hint*/,
+ char* scratch, int32_t scratch_capacity,
+ int32_t* result_capacity) {
+ if (min_capacity < 1 || scratch_capacity < min_capacity) {
+ *result_capacity = 0;
+ return NULL;
+ }
+ *result_capacity = scratch_capacity;
+ return scratch;
+}
+
+void ByteSink::Flush() {}
+
+CheckedArrayByteSink::CheckedArrayByteSink(char* outbuf, int32_t capacity)
+ : outbuf_(outbuf), capacity_(capacity < 0 ? 0 : capacity),
+ size_(0), appended_(0), overflowed_(FALSE) {
+}
+
+CheckedArrayByteSink::~CheckedArrayByteSink() {}
+
+CheckedArrayByteSink& CheckedArrayByteSink::Reset() {
+ size_ = appended_ = 0;
+ overflowed_ = FALSE;
+ return *this;
+}
+
+void CheckedArrayByteSink::Append(const char* bytes, int32_t n) {
+ if (n <= 0) {
+ return;
+ }
+ if (n > (INT32_MAX - appended_)) {
+ // TODO: Report as integer overflow, not merely buffer overflow.
+ appended_ = INT32_MAX;
+ overflowed_ = TRUE;
+ return;
+ }
+ appended_ += n;
+ int32_t available = capacity_ - size_;
+ if (n > available) {
+ n = available;
+ overflowed_ = TRUE;
+ }
+ if (n > 0 && bytes != (outbuf_ + size_)) {
+ uprv_memcpy(outbuf_ + size_, bytes, n);
+ }
+ size_ += n;
+}
+
+char* CheckedArrayByteSink::GetAppendBuffer(int32_t min_capacity,
+ int32_t /*desired_capacity_hint*/,
+ char* scratch,
+ int32_t scratch_capacity,
+ int32_t* result_capacity) {
+ if (min_capacity < 1 || scratch_capacity < min_capacity) {
+ *result_capacity = 0;
+ return NULL;
+ }
+ int32_t available = capacity_ - size_;
+ if (available >= min_capacity) {
+ *result_capacity = available;
+ return outbuf_ + size_;
+ } else {
+ *result_capacity = scratch_capacity;
+ return scratch;
+ }
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/bytestrie.cpp b/thirdparty/icu4c/common/bytestrie.cpp
new file mode 100644
index 0000000000..c4d498c4bf
--- /dev/null
+++ b/thirdparty/icu4c/common/bytestrie.cpp
@@ -0,0 +1,441 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: bytestrie.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010sep25
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/bytestream.h"
+#include "unicode/bytestrie.h"
+#include "unicode/uobject.h"
+#include "cmemory.h"
+#include "uassert.h"
+
+U_NAMESPACE_BEGIN
+
+BytesTrie::~BytesTrie() {
+ uprv_free(ownedArray_);
+}
+
+// lead byte already shifted right by 1.
+int32_t
+BytesTrie::readValue(const uint8_t *pos, int32_t leadByte) {
+ int32_t value;
+ if(leadByte<kMinTwoByteValueLead) {
+ value=leadByte-kMinOneByteValueLead;
+ } else if(leadByte<kMinThreeByteValueLead) {
+ value=((leadByte-kMinTwoByteValueLead)<<8)|*pos;
+ } else if(leadByte<kFourByteValueLead) {
+ value=((leadByte-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
+ } else if(leadByte==kFourByteValueLead) {
+ value=(pos[0]<<16)|(pos[1]<<8)|pos[2];
+ } else {
+ value=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
+ }
+ return value;
+}
+
+const uint8_t *
+BytesTrie::jumpByDelta(const uint8_t *pos) {
+ int32_t delta=*pos++;
+ if(delta<kMinTwoByteDeltaLead) {
+ // nothing to do
+ } else if(delta<kMinThreeByteDeltaLead) {
+ delta=((delta-kMinTwoByteDeltaLead)<<8)|*pos++;
+ } else if(delta<kFourByteDeltaLead) {
+ delta=((delta-kMinThreeByteDeltaLead)<<16)|(pos[0]<<8)|pos[1];
+ pos+=2;
+ } else if(delta==kFourByteDeltaLead) {
+ delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
+ pos+=3;
+ } else {
+ delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
+ pos+=4;
+ }
+ return pos+delta;
+}
+
+UStringTrieResult
+BytesTrie::current() const {
+ const uint8_t *pos=pos_;
+ if(pos==NULL) {
+ return USTRINGTRIE_NO_MATCH;
+ } else {
+ int32_t node;
+ return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
+ valueResult(node) : USTRINGTRIE_NO_VALUE;
+ }
+}
+
+UStringTrieResult
+BytesTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) {
+ // Branch according to the current byte.
+ if(length==0) {
+ length=*pos++;
+ }
+ ++length;
+ // The length of the branch is the number of bytes to select from.
+ // The data structure encodes a binary search.
+ while(length>kMaxBranchLinearSubNodeLength) {
+ if(inByte<*pos++) {
+ length>>=1;
+ pos=jumpByDelta(pos);
+ } else {
+ length=length-(length>>1);
+ pos=skipDelta(pos);
+ }
+ }
+ // Drop down to linear search for the last few bytes.
+ // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
+ // and divides length by 2.
+ do {
+ if(inByte==*pos++) {
+ UStringTrieResult result;
+ int32_t node=*pos;
+ U_ASSERT(node>=kMinValueLead);
+ if(node&kValueIsFinal) {
+ // Leave the final value for getValue() to read.
+ result=USTRINGTRIE_FINAL_VALUE;
+ } else {
+ // Use the non-final value as the jump delta.
+ ++pos;
+ // int32_t delta=readValue(pos, node>>1);
+ node>>=1;
+ int32_t delta;
+ if(node<kMinTwoByteValueLead) {
+ delta=node-kMinOneByteValueLead;
+ } else if(node<kMinThreeByteValueLead) {
+ delta=((node-kMinTwoByteValueLead)<<8)|*pos++;
+ } else if(node<kFourByteValueLead) {
+ delta=((node-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
+ pos+=2;
+ } else if(node==kFourByteValueLead) {
+ delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
+ pos+=3;
+ } else {
+ delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
+ pos+=4;
+ }
+ // end readValue()
+ pos+=delta;
+ node=*pos;
+ result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
+ }
+ pos_=pos;
+ return result;
+ }
+ --length;
+ pos=skipValue(pos);
+ } while(length>1);
+ if(inByte==*pos++) {
+ pos_=pos;
+ int32_t node=*pos;
+ return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
+ } else {
+ stop();
+ return USTRINGTRIE_NO_MATCH;
+ }
+}
+
+UStringTrieResult
+BytesTrie::nextImpl(const uint8_t *pos, int32_t inByte) {
+ for(;;) {
+ int32_t node=*pos++;
+ if(node<kMinLinearMatch) {
+ return branchNext(pos, node, inByte);
+ } else if(node<kMinValueLead) {
+ // Match the first of length+1 bytes.
+ int32_t length=node-kMinLinearMatch; // Actual match length minus 1.
+ if(inByte==*pos++) {
+ remainingMatchLength_=--length;
+ pos_=pos;
+ return (length<0 && (node=*pos)>=kMinValueLead) ?
+ valueResult(node) : USTRINGTRIE_NO_VALUE;
+ } else {
+ // No match.
+ break;
+ }
+ } else if(node&kValueIsFinal) {
+ // No further matching bytes.
+ break;
+ } else {
+ // Skip intermediate value.
+ pos=skipValue(pos, node);
+ // The next node must not also be a value node.
+ U_ASSERT(*pos<kMinValueLead);
+ }
+ }
+ stop();
+ return USTRINGTRIE_NO_MATCH;
+}
+
+UStringTrieResult
+BytesTrie::next(int32_t inByte) {
+ const uint8_t *pos=pos_;
+ if(pos==NULL) {
+ return USTRINGTRIE_NO_MATCH;
+ }
+ if(inByte<0) {
+ inByte+=0x100;
+ }
+ int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
+ if(length>=0) {
+ // Remaining part of a linear-match node.
+ if(inByte==*pos++) {
+ remainingMatchLength_=--length;
+ pos_=pos;
+ int32_t node;
+ return (length<0 && (node=*pos)>=kMinValueLead) ?
+ valueResult(node) : USTRINGTRIE_NO_VALUE;
+ } else {
+ stop();
+ return USTRINGTRIE_NO_MATCH;
+ }
+ }
+ return nextImpl(pos, inByte);
+}
+
+UStringTrieResult
+BytesTrie::next(const char *s, int32_t sLength) {
+ if(sLength<0 ? *s==0 : sLength==0) {
+ // Empty input.
+ return current();
+ }
+ const uint8_t *pos=pos_;
+ if(pos==NULL) {
+ return USTRINGTRIE_NO_MATCH;
+ }
+ int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
+ for(;;) {
+ // Fetch the next input byte, if there is one.
+ // Continue a linear-match node without rechecking sLength<0.
+ int32_t inByte;
+ if(sLength<0) {
+ for(;;) {
+ if((inByte=*s++)==0) {
+ remainingMatchLength_=length;
+ pos_=pos;
+ int32_t node;
+ return (length<0 && (node=*pos)>=kMinValueLead) ?
+ valueResult(node) : USTRINGTRIE_NO_VALUE;
+ }
+ if(length<0) {
+ remainingMatchLength_=length;
+ break;
+ }
+ if(inByte!=*pos) {
+ stop();
+ return USTRINGTRIE_NO_MATCH;
+ }
+ ++pos;
+ --length;
+ }
+ } else {
+ for(;;) {
+ if(sLength==0) {
+ remainingMatchLength_=length;
+ pos_=pos;
+ int32_t node;
+ return (length<0 && (node=*pos)>=kMinValueLead) ?
+ valueResult(node) : USTRINGTRIE_NO_VALUE;
+ }
+ inByte=*s++;
+ --sLength;
+ if(length<0) {
+ remainingMatchLength_=length;
+ break;
+ }
+ if(inByte!=*pos) {
+ stop();
+ return USTRINGTRIE_NO_MATCH;
+ }
+ ++pos;
+ --length;
+ }
+ }
+ for(;;) {
+ int32_t node=*pos++;
+ if(node<kMinLinearMatch) {
+ UStringTrieResult result=branchNext(pos, node, inByte);
+ if(result==USTRINGTRIE_NO_MATCH) {
+ return USTRINGTRIE_NO_MATCH;
+ }
+ // Fetch the next input byte, if there is one.
+ if(sLength<0) {
+ if((inByte=*s++)==0) {
+ return result;
+ }
+ } else {
+ if(sLength==0) {
+ return result;
+ }
+ inByte=*s++;
+ --sLength;
+ }
+ if(result==USTRINGTRIE_FINAL_VALUE) {
+ // No further matching bytes.
+ stop();
+ return USTRINGTRIE_NO_MATCH;
+ }
+ pos=pos_; // branchNext() advanced pos and wrote it to pos_ .
+ } else if(node<kMinValueLead) {
+ // Match length+1 bytes.
+ length=node-kMinLinearMatch; // Actual match length minus 1.
+ if(inByte!=*pos) {
+ stop();
+ return USTRINGTRIE_NO_MATCH;
+ }
+ ++pos;
+ --length;
+ break;
+ } else if(node&kValueIsFinal) {
+ // No further matching bytes.
+ stop();
+ return USTRINGTRIE_NO_MATCH;
+ } else {
+ // Skip intermediate value.
+ pos=skipValue(pos, node);
+ // The next node must not also be a value node.
+ U_ASSERT(*pos<kMinValueLead);
+ }
+ }
+ }
+}
+
+const uint8_t *
+BytesTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
+ UBool haveUniqueValue, int32_t &uniqueValue) {
+ while(length>kMaxBranchLinearSubNodeLength) {
+ ++pos; // ignore the comparison byte
+ if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
+ return NULL;
+ }
+ length=length-(length>>1);
+ pos=skipDelta(pos);
+ }
+ do {
+ ++pos; // ignore a comparison byte
+ // handle its value
+ int32_t node=*pos++;
+ UBool isFinal=(UBool)(node&kValueIsFinal);
+ int32_t value=readValue(pos, node>>1);
+ pos=skipValue(pos, node);
+ if(isFinal) {
+ if(haveUniqueValue) {
+ if(value!=uniqueValue) {
+ return NULL;
+ }
+ } else {
+ uniqueValue=value;
+ haveUniqueValue=TRUE;
+ }
+ } else {
+ if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) {
+ return NULL;
+ }
+ haveUniqueValue=TRUE;
+ }
+ } while(--length>1);
+ return pos+1; // ignore the last comparison byte
+}
+
+UBool
+BytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
+ for(;;) {
+ int32_t node=*pos++;
+ if(node<kMinLinearMatch) {
+ if(node==0) {
+ node=*pos++;
+ }
+ pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue);
+ if(pos==NULL) {
+ return FALSE;
+ }
+ haveUniqueValue=TRUE;
+ } else if(node<kMinValueLead) {
+ // linear-match node
+ pos+=node-kMinLinearMatch+1; // Ignore the match bytes.
+ } else {
+ UBool isFinal=(UBool)(node&kValueIsFinal);
+ int32_t value=readValue(pos, node>>1);
+ if(haveUniqueValue) {
+ if(value!=uniqueValue) {
+ return FALSE;
+ }
+ } else {
+ uniqueValue=value;
+ haveUniqueValue=TRUE;
+ }
+ if(isFinal) {
+ return TRUE;
+ }
+ pos=skipValue(pos, node);
+ }
+ }
+}
+
+int32_t
+BytesTrie::getNextBytes(ByteSink &out) const {
+ const uint8_t *pos=pos_;
+ if(pos==NULL) {
+ return 0;
+ }
+ if(remainingMatchLength_>=0) {
+ append(out, *pos); // Next byte of a pending linear-match node.
+ return 1;
+ }
+ int32_t node=*pos++;
+ if(node>=kMinValueLead) {
+ if(node&kValueIsFinal) {
+ return 0;
+ } else {
+ pos=skipValue(pos, node);
+ node=*pos++;
+ U_ASSERT(node<kMinValueLead);
+ }
+ }
+ if(node<kMinLinearMatch) {
+ if(node==0) {
+ node=*pos++;
+ }
+ getNextBranchBytes(pos, ++node, out);
+ return node;
+ } else {
+ // First byte of the linear-match node.
+ append(out, *pos);
+ return 1;
+ }
+}
+
+void
+BytesTrie::getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out) {
+ while(length>kMaxBranchLinearSubNodeLength) {
+ ++pos; // ignore the comparison byte
+ getNextBranchBytes(jumpByDelta(pos), length>>1, out);
+ length=length-(length>>1);
+ pos=skipDelta(pos);
+ }
+ do {
+ append(out, *pos++);
+ pos=skipValue(pos);
+ } while(--length>1);
+ append(out, *pos);
+}
+
+void
+BytesTrie::append(ByteSink &out, int c) {
+ char ch=(char)c;
+ out.Append(&ch, 1);
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/bytestriebuilder.cpp b/thirdparty/icu4c/common/bytestriebuilder.cpp
new file mode 100644
index 0000000000..ec1ab7d8f5
--- /dev/null
+++ b/thirdparty/icu4c/common/bytestriebuilder.cpp
@@ -0,0 +1,504 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: bytestriebuilder.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010sep25
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/bytestriebuilder.h"
+#include "unicode/stringpiece.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "uhash.h"
+#include "uarrsort.h"
+#include "uassert.h"
+#include "ustr_imp.h"
+
+U_NAMESPACE_BEGIN
+
+/*
+ * Note: This builder implementation stores (bytes, value) pairs with full copies
+ * of the byte sequences, until the BytesTrie is built.
+ * It might(!) take less memory if we collected the data in a temporary, dynamic trie.
+ */
+
+class BytesTrieElement : public UMemory {
+public:
+ // Use compiler's default constructor, initializes nothing.
+
+ void setTo(StringPiece s, int32_t val, CharString &strings, UErrorCode &errorCode);
+
+ StringPiece getString(const CharString &strings) const {
+ int32_t offset=stringOffset;
+ int32_t length;
+ if(offset>=0) {
+ length=(uint8_t)strings[offset++];
+ } else {
+ offset=~offset;
+ length=((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1];
+ offset+=2;
+ }
+ return StringPiece(strings.data()+offset, length);
+ }
+ int32_t getStringLength(const CharString &strings) const {
+ int32_t offset=stringOffset;
+ if(offset>=0) {
+ return (uint8_t)strings[offset];
+ } else {
+ offset=~offset;
+ return ((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1];
+ }
+ }
+
+ char charAt(int32_t index, const CharString &strings) const { return data(strings)[index]; }
+
+ int32_t getValue() const { return value; }
+
+ int32_t compareStringTo(const BytesTrieElement &o, const CharString &strings) const;
+
+private:
+ const char *data(const CharString &strings) const {
+ int32_t offset=stringOffset;
+ if(offset>=0) {
+ ++offset;
+ } else {
+ offset=~offset+2;
+ }
+ return strings.data()+offset;
+ }
+
+ // If the stringOffset is non-negative, then the first strings byte contains
+ // the string length.
+ // If the stringOffset is negative, then the first two strings bytes contain
+ // the string length (big-endian), and the offset needs to be bit-inverted.
+ // (Compared with a stringLength field here, this saves 3 bytes per string for most strings.)
+ int32_t stringOffset;
+ int32_t value;
+};
+
+void
+BytesTrieElement::setTo(StringPiece s, int32_t val,
+ CharString &strings, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ int32_t length=s.length();
+ if(length>0xffff) {
+ // Too long: We store the length in 1 or 2 bytes.
+ errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return;
+ }
+ int32_t offset=strings.length();
+ if(length>0xff) {
+ offset=~offset;
+ strings.append((char)(length>>8), errorCode);
+ }
+ strings.append((char)length, errorCode);
+ stringOffset=offset;
+ value=val;
+ strings.append(s, errorCode);
+}
+
+int32_t
+BytesTrieElement::compareStringTo(const BytesTrieElement &other, const CharString &strings) const {
+ // TODO: add StringPiece::compare(), see ticket #8187
+ StringPiece thisString=getString(strings);
+ StringPiece otherString=other.getString(strings);
+ int32_t lengthDiff=thisString.length()-otherString.length();
+ int32_t commonLength;
+ if(lengthDiff<=0) {
+ commonLength=thisString.length();
+ } else {
+ commonLength=otherString.length();
+ }
+ int32_t diff=uprv_memcmp(thisString.data(), otherString.data(), commonLength);
+ return diff!=0 ? diff : lengthDiff;
+}
+
+BytesTrieBuilder::BytesTrieBuilder(UErrorCode &errorCode)
+ : strings(NULL), elements(NULL), elementsCapacity(0), elementsLength(0),
+ bytes(NULL), bytesCapacity(0), bytesLength(0) {
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ strings=new CharString();
+ if(strings==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ }
+}
+
+BytesTrieBuilder::~BytesTrieBuilder() {
+ delete strings;
+ delete[] elements;
+ uprv_free(bytes);
+}
+
+BytesTrieBuilder &
+BytesTrieBuilder::add(StringPiece s, int32_t value, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return *this;
+ }
+ if(bytesLength>0) {
+ // Cannot add elements after building.
+ errorCode=U_NO_WRITE_PERMISSION;
+ return *this;
+ }
+ if(elementsLength==elementsCapacity) {
+ int32_t newCapacity;
+ if(elementsCapacity==0) {
+ newCapacity=1024;
+ } else {
+ newCapacity=4*elementsCapacity;
+ }
+ BytesTrieElement *newElements=new BytesTrieElement[newCapacity];
+ if(newElements==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return *this; // error instead of dereferencing null
+ }
+ if(elementsLength>0) {
+ uprv_memcpy(newElements, elements, (size_t)elementsLength*sizeof(BytesTrieElement));
+ }
+ delete[] elements;
+ elements=newElements;
+ elementsCapacity=newCapacity;
+ }
+ elements[elementsLength++].setTo(s, value, *strings, errorCode);
+ return *this;
+}
+
+U_CDECL_BEGIN
+
+static int32_t U_CALLCONV
+compareElementStrings(const void *context, const void *left, const void *right) {
+ const CharString *strings=static_cast<const CharString *>(context);
+ const BytesTrieElement *leftElement=static_cast<const BytesTrieElement *>(left);
+ const BytesTrieElement *rightElement=static_cast<const BytesTrieElement *>(right);
+ return leftElement->compareStringTo(*rightElement, *strings);
+}
+
+U_CDECL_END
+
+BytesTrie *
+BytesTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
+ buildBytes(buildOption, errorCode);
+ BytesTrie *newTrie=NULL;
+ if(U_SUCCESS(errorCode)) {
+ newTrie=new BytesTrie(bytes, bytes+(bytesCapacity-bytesLength));
+ if(newTrie==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ bytes=NULL; // The new trie now owns the array.
+ bytesCapacity=0;
+ }
+ }
+ return newTrie;
+}
+
+StringPiece
+BytesTrieBuilder::buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
+ buildBytes(buildOption, errorCode);
+ StringPiece result;
+ if(U_SUCCESS(errorCode)) {
+ result.set(bytes+(bytesCapacity-bytesLength), bytesLength);
+ }
+ return result;
+}
+
+void
+BytesTrieBuilder::buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ if(bytes!=NULL && bytesLength>0) {
+ // Already built.
+ return;
+ }
+ if(bytesLength==0) {
+ if(elementsLength==0) {
+ errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return;
+ }
+ uprv_sortArray(elements, elementsLength, (int32_t)sizeof(BytesTrieElement),
+ compareElementStrings, strings,
+ FALSE, // need not be a stable sort
+ &errorCode);
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ // Duplicate strings are not allowed.
+ StringPiece prev=elements[0].getString(*strings);
+ for(int32_t i=1; i<elementsLength; ++i) {
+ StringPiece current=elements[i].getString(*strings);
+ if(prev==current) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ prev=current;
+ }
+ }
+ // Create and byte-serialize the trie for the elements.
+ bytesLength=0;
+ int32_t capacity=strings->length();
+ if(capacity<1024) {
+ capacity=1024;
+ }
+ if(bytesCapacity<capacity) {
+ uprv_free(bytes);
+ bytes=static_cast<char *>(uprv_malloc(capacity));
+ if(bytes==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ bytesCapacity=0;
+ return;
+ }
+ bytesCapacity=capacity;
+ }
+ StringTrieBuilder::build(buildOption, elementsLength, errorCode);
+ if(bytes==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ }
+}
+
+BytesTrieBuilder &
+BytesTrieBuilder::clear() {
+ strings->clear();
+ elementsLength=0;
+ bytesLength=0;
+ return *this;
+}
+
+int32_t
+BytesTrieBuilder::getElementStringLength(int32_t i) const {
+ return elements[i].getStringLength(*strings);
+}
+
+UChar
+BytesTrieBuilder::getElementUnit(int32_t i, int32_t byteIndex) const {
+ return (uint8_t)elements[i].charAt(byteIndex, *strings);
+}
+
+int32_t
+BytesTrieBuilder::getElementValue(int32_t i) const {
+ return elements[i].getValue();
+}
+
+int32_t
+BytesTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const {
+ const BytesTrieElement &firstElement=elements[first];
+ const BytesTrieElement &lastElement=elements[last];
+ int32_t minStringLength=firstElement.getStringLength(*strings);
+ while(++byteIndex<minStringLength &&
+ firstElement.charAt(byteIndex, *strings)==
+ lastElement.charAt(byteIndex, *strings)) {}
+ return byteIndex;
+}
+
+int32_t
+BytesTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const {
+ int32_t length=0; // Number of different bytes at byteIndex.
+ int32_t i=start;
+ do {
+ char byte=elements[i++].charAt(byteIndex, *strings);
+ while(i<limit && byte==elements[i].charAt(byteIndex, *strings)) {
+ ++i;
+ }
+ ++length;
+ } while(i<limit);
+ return length;
+}
+
+int32_t
+BytesTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const {
+ do {
+ char byte=elements[i++].charAt(byteIndex, *strings);
+ while(byte==elements[i].charAt(byteIndex, *strings)) {
+ ++i;
+ }
+ } while(--count>0);
+ return i;
+}
+
+int32_t
+BytesTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const {
+ char b=(char)byte;
+ while(b==elements[i].charAt(byteIndex, *strings)) {
+ ++i;
+ }
+ return i;
+}
+
+BytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_t len, Node *nextNode)
+ : LinearMatchNode(len, nextNode), s(bytes) {
+ hash=static_cast<int32_t>(
+ static_cast<uint32_t>(hash)*37u + static_cast<uint32_t>(ustr_hashCharsN(bytes, len)));
+}
+
+UBool
+BytesTrieBuilder::BTLinearMatchNode::operator==(const Node &other) const {
+ if(this==&other) {
+ return TRUE;
+ }
+ if(!LinearMatchNode::operator==(other)) {
+ return FALSE;
+ }
+ const BTLinearMatchNode &o=(const BTLinearMatchNode &)other;
+ return 0==uprv_memcmp(s, o.s, length);
+}
+
+void
+BytesTrieBuilder::BTLinearMatchNode::write(StringTrieBuilder &builder) {
+ BytesTrieBuilder &b=(BytesTrieBuilder &)builder;
+ next->write(builder);
+ b.write(s, length);
+ offset=b.write(b.getMinLinearMatch()+length-1);
+}
+
+StringTrieBuilder::Node *
+BytesTrieBuilder::createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length,
+ Node *nextNode) const {
+ return new BTLinearMatchNode(
+ elements[i].getString(*strings).data()+byteIndex,
+ length,
+ nextNode);
+}
+
+UBool
+BytesTrieBuilder::ensureCapacity(int32_t length) {
+ if(bytes==NULL) {
+ return FALSE; // previous memory allocation had failed
+ }
+ if(length>bytesCapacity) {
+ int32_t newCapacity=bytesCapacity;
+ do {
+ newCapacity*=2;
+ } while(newCapacity<=length);
+ char *newBytes=static_cast<char *>(uprv_malloc(newCapacity));
+ if(newBytes==NULL) {
+ // unable to allocate memory
+ uprv_free(bytes);
+ bytes=NULL;
+ bytesCapacity=0;
+ return FALSE;
+ }
+ uprv_memcpy(newBytes+(newCapacity-bytesLength),
+ bytes+(bytesCapacity-bytesLength), bytesLength);
+ uprv_free(bytes);
+ bytes=newBytes;
+ bytesCapacity=newCapacity;
+ }
+ return TRUE;
+}
+
+int32_t
+BytesTrieBuilder::write(int32_t byte) {
+ int32_t newLength=bytesLength+1;
+ if(ensureCapacity(newLength)) {
+ bytesLength=newLength;
+ bytes[bytesCapacity-bytesLength]=(char)byte;
+ }
+ return bytesLength;
+}
+
+int32_t
+BytesTrieBuilder::write(const char *b, int32_t length) {
+ int32_t newLength=bytesLength+length;
+ if(ensureCapacity(newLength)) {
+ bytesLength=newLength;
+ uprv_memcpy(bytes+(bytesCapacity-bytesLength), b, length);
+ }
+ return bytesLength;
+}
+
+int32_t
+BytesTrieBuilder::writeElementUnits(int32_t i, int32_t byteIndex, int32_t length) {
+ return write(elements[i].getString(*strings).data()+byteIndex, length);
+}
+
+int32_t
+BytesTrieBuilder::writeValueAndFinal(int32_t i, UBool isFinal) {
+ if(0<=i && i<=BytesTrie::kMaxOneByteValue) {
+ return write(((BytesTrie::kMinOneByteValueLead+i)<<1)|isFinal);
+ }
+ char intBytes[5];
+ int32_t length=1;
+ if(i<0 || i>0xffffff) {
+ intBytes[0]=(char)BytesTrie::kFiveByteValueLead;
+ intBytes[1]=(char)((uint32_t)i>>24);
+ intBytes[2]=(char)((uint32_t)i>>16);
+ intBytes[3]=(char)((uint32_t)i>>8);
+ intBytes[4]=(char)i;
+ length=5;
+ // } else if(i<=BytesTrie::kMaxOneByteValue) {
+ // intBytes[0]=(char)(BytesTrie::kMinOneByteValueLead+i);
+ } else {
+ if(i<=BytesTrie::kMaxTwoByteValue) {
+ intBytes[0]=(char)(BytesTrie::kMinTwoByteValueLead+(i>>8));
+ } else {
+ if(i<=BytesTrie::kMaxThreeByteValue) {
+ intBytes[0]=(char)(BytesTrie::kMinThreeByteValueLead+(i>>16));
+ } else {
+ intBytes[0]=(char)BytesTrie::kFourByteValueLead;
+ intBytes[1]=(char)(i>>16);
+ length=2;
+ }
+ intBytes[length++]=(char)(i>>8);
+ }
+ intBytes[length++]=(char)i;
+ }
+ intBytes[0]=(char)((intBytes[0]<<1)|isFinal);
+ return write(intBytes, length);
+}
+
+int32_t
+BytesTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) {
+ int32_t offset=write(node);
+ if(hasValue) {
+ offset=writeValueAndFinal(value, FALSE);
+ }
+ return offset;
+}
+
+int32_t
+BytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
+ int32_t i=bytesLength-jumpTarget;
+ U_ASSERT(i>=0);
+ if(i<=BytesTrie::kMaxOneByteDelta) {
+ return write(i);
+ }
+ char intBytes[5];
+ int32_t length;
+ if(i<=BytesTrie::kMaxTwoByteDelta) {
+ intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8));
+ length=1;
+ } else {
+ if(i<=BytesTrie::kMaxThreeByteDelta) {
+ intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16));
+ length=2;
+ } else {
+ if(i<=0xffffff) {
+ intBytes[0]=(char)BytesTrie::kFourByteDeltaLead;
+ length=3;
+ } else {
+ intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead;
+ intBytes[1]=(char)(i>>24);
+ length=4;
+ }
+ intBytes[1]=(char)(i>>16);
+ }
+ intBytes[1]=(char)(i>>8);
+ }
+ intBytes[length++]=(char)i;
+ return write(intBytes, length);
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/bytestrieiterator.cpp b/thirdparty/icu4c/common/bytestrieiterator.cpp
new file mode 100644
index 0000000000..e64961a1f1
--- /dev/null
+++ b/thirdparty/icu4c/common/bytestrieiterator.cpp
@@ -0,0 +1,214 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: bytestrieiterator.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010nov03
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/stringpiece.h"
+#include "charstr.h"
+#include "uvectr32.h"
+
+U_NAMESPACE_BEGIN
+
+BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength,
+ UErrorCode &errorCode)
+ : bytes_(static_cast<const uint8_t *>(trieBytes)),
+ pos_(bytes_), initialPos_(bytes_),
+ remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
+ str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into
+ // a public API header for which we would want it to depend only on
+ // other public headers.
+ // Unlike BytesTrie itself, its Iterator performs memory allocations anyway
+ // via the CharString and UVector32 implementations, so this additional
+ // cost is minimal.
+ str_=new CharString();
+ stack_=new UVector32(errorCode);
+ if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ }
+}
+
+BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength,
+ UErrorCode &errorCode)
+ : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),
+ remainingMatchLength_(trie.remainingMatchLength_),
+ initialRemainingMatchLength_(trie.remainingMatchLength_),
+ str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ str_=new CharString();
+ stack_=new UVector32(errorCode);
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ if(str_==NULL || stack_==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
+ if(length>=0) {
+ // Pending linear-match node, append remaining bytes to str_.
+ ++length;
+ if(maxLength_>0 && length>maxLength_) {
+ length=maxLength_; // This will leave remainingMatchLength>=0 as a signal.
+ }
+ str_->append(reinterpret_cast<const char *>(pos_), length, errorCode);
+ pos_+=length;
+ remainingMatchLength_-=length;
+ }
+}
+
+BytesTrie::Iterator::~Iterator() {
+ delete str_;
+ delete stack_;
+}
+
+BytesTrie::Iterator &
+BytesTrie::Iterator::reset() {
+ pos_=initialPos_;
+ remainingMatchLength_=initialRemainingMatchLength_;
+ int32_t length=remainingMatchLength_+1; // Remaining match length.
+ if(maxLength_>0 && length>maxLength_) {
+ length=maxLength_;
+ }
+ str_->truncate(length);
+ pos_+=length;
+ remainingMatchLength_-=length;
+ stack_->setSize(0);
+ return *this;
+}
+
+UBool
+BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
+
+UBool
+BytesTrie::Iterator::next(UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return FALSE;
+ }
+ const uint8_t *pos=pos_;
+ if(pos==NULL) {
+ if(stack_->isEmpty()) {
+ return FALSE;
+ }
+ // Pop the state off the stack and continue with the next outbound edge of
+ // the branch node.
+ int32_t stackSize=stack_->size();
+ int32_t length=stack_->elementAti(stackSize-1);
+ pos=bytes_+stack_->elementAti(stackSize-2);
+ stack_->setSize(stackSize-2);
+ str_->truncate(length&0xffff);
+ length=(int32_t)((uint32_t)length>>16);
+ if(length>1) {
+ pos=branchNext(pos, length, errorCode);
+ if(pos==NULL) {
+ return TRUE; // Reached a final value.
+ }
+ } else {
+ str_->append((char)*pos++, errorCode);
+ }
+ }
+ if(remainingMatchLength_>=0) {
+ // We only get here if we started in a pending linear-match node
+ // with more than maxLength remaining bytes.
+ return truncateAndStop();
+ }
+ for(;;) {
+ int32_t node=*pos++;
+ if(node>=kMinValueLead) {
+ // Deliver value for the byte sequence so far.
+ UBool isFinal=(UBool)(node&kValueIsFinal);
+ value_=readValue(pos, node>>1);
+ if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) {
+ pos_=NULL;
+ } else {
+ pos_=skipValue(pos, node);
+ }
+ return TRUE;
+ }
+ if(maxLength_>0 && str_->length()==maxLength_) {
+ return truncateAndStop();
+ }
+ if(node<kMinLinearMatch) {
+ if(node==0) {
+ node=*pos++;
+ }
+ pos=branchNext(pos, node+1, errorCode);
+ if(pos==NULL) {
+ return TRUE; // Reached a final value.
+ }
+ } else {
+ // Linear-match node, append length bytes to str_.
+ int32_t length=node-kMinLinearMatch+1;
+ if(maxLength_>0 && str_->length()+length>maxLength_) {
+ str_->append(reinterpret_cast<const char *>(pos),
+ maxLength_-str_->length(), errorCode);
+ return truncateAndStop();
+ }
+ str_->append(reinterpret_cast<const char *>(pos), length, errorCode);
+ pos+=length;
+ }
+ }
+}
+
+StringPiece
+BytesTrie::Iterator::getString() const {
+ return str_ == NULL ? StringPiece() : str_->toStringPiece();
+}
+
+UBool
+BytesTrie::Iterator::truncateAndStop() {
+ pos_=NULL;
+ value_=-1; // no real value for str
+ return TRUE;
+}
+
+// Branch node, needs to take the first outbound edge and push state for the rest.
+const uint8_t *
+BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
+ while(length>kMaxBranchLinearSubNodeLength) {
+ ++pos; // ignore the comparison byte
+ // Push state for the greater-or-equal edge.
+ stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode);
+ stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode);
+ // Follow the less-than edge.
+ length>>=1;
+ pos=jumpByDelta(pos);
+ }
+ // List of key-value pairs where values are either final values or jump deltas.
+ // Read the first (key, value) pair.
+ uint8_t trieByte=*pos++;
+ int32_t node=*pos++;
+ UBool isFinal=(UBool)(node&kValueIsFinal);
+ int32_t value=readValue(pos, node>>1);
+ pos=skipValue(pos, node);
+ stack_->addElement((int32_t)(pos-bytes_), errorCode);
+ stack_->addElement(((length-1)<<16)|str_->length(), errorCode);
+ str_->append((char)trieByte, errorCode);
+ if(isFinal) {
+ pos_=NULL;
+ value_=value;
+ return NULL;
+ } else {
+ return pos+value;
+ }
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/caniter.cpp b/thirdparty/icu4c/common/caniter.cpp
new file mode 100644
index 0000000000..b28acfc84e
--- /dev/null
+++ b/thirdparty/icu4c/common/caniter.cpp
@@ -0,0 +1,586 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ *****************************************************************************
+ * Copyright (C) 1996-2015, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *****************************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/caniter.h"
+#include "unicode/normalizer2.h"
+#include "unicode/uchar.h"
+#include "unicode/uniset.h"
+#include "unicode/usetiter.h"
+#include "unicode/ustring.h"
+#include "unicode/utf16.h"
+#include "cmemory.h"
+#include "hash.h"
+#include "normalizer2impl.h"
+
+/**
+ * This class allows one to iterate through all the strings that are canonically equivalent to a given
+ * string. For example, here are some sample results:
+Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
+1: \u0041\u030A\u0064\u0307\u0327
+ = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
+2: \u0041\u030A\u0064\u0327\u0307
+ = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
+3: \u0041\u030A\u1E0B\u0327
+ = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
+4: \u0041\u030A\u1E11\u0307
+ = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
+5: \u00C5\u0064\u0307\u0327
+ = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
+6: \u00C5\u0064\u0327\u0307
+ = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
+7: \u00C5\u1E0B\u0327
+ = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
+8: \u00C5\u1E11\u0307
+ = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
+9: \u212B\u0064\u0307\u0327
+ = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
+10: \u212B\u0064\u0327\u0307
+ = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
+11: \u212B\u1E0B\u0327
+ = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
+12: \u212B\u1E11\u0307
+ = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
+ *<br>Note: the code is intended for use with small strings, and is not suitable for larger ones,
+ * since it has not been optimized for that situation.
+ *@author M. Davis
+ *@draft
+ */
+
+// public
+
+U_NAMESPACE_BEGIN
+
+// TODO: add boilerplate methods.
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CanonicalIterator)
+
+/**
+ *@param source string to get results for
+ */
+CanonicalIterator::CanonicalIterator(const UnicodeString &sourceStr, UErrorCode &status) :
+ pieces(NULL),
+ pieces_length(0),
+ pieces_lengths(NULL),
+ current(NULL),
+ current_length(0),
+ nfd(*Normalizer2::getNFDInstance(status)),
+ nfcImpl(*Normalizer2Factory::getNFCImpl(status))
+{
+ if(U_SUCCESS(status) && nfcImpl.ensureCanonIterData(status)) {
+ setSource(sourceStr, status);
+ }
+}
+
+CanonicalIterator::~CanonicalIterator() {
+ cleanPieces();
+}
+
+void CanonicalIterator::cleanPieces() {
+ int32_t i = 0;
+ if(pieces != NULL) {
+ for(i = 0; i < pieces_length; i++) {
+ if(pieces[i] != NULL) {
+ delete[] pieces[i];
+ }
+ }
+ uprv_free(pieces);
+ pieces = NULL;
+ pieces_length = 0;
+ }
+ if(pieces_lengths != NULL) {
+ uprv_free(pieces_lengths);
+ pieces_lengths = NULL;
+ }
+ if(current != NULL) {
+ uprv_free(current);
+ current = NULL;
+ current_length = 0;
+ }
+}
+
+/**
+ *@return gets the source: NOTE: it is the NFD form of source
+ */
+UnicodeString CanonicalIterator::getSource() {
+ return source;
+}
+
+/**
+ * Resets the iterator so that one can start again from the beginning.
+ */
+void CanonicalIterator::reset() {
+ done = FALSE;
+ for (int i = 0; i < current_length; ++i) {
+ current[i] = 0;
+ }
+}
+
+/**
+ *@return the next string that is canonically equivalent. The value null is returned when
+ * the iteration is done.
+ */
+UnicodeString CanonicalIterator::next() {
+ int32_t i = 0;
+
+ if (done) {
+ buffer.setToBogus();
+ return buffer;
+ }
+
+ // delete old contents
+ buffer.remove();
+
+ // construct return value
+
+ for (i = 0; i < pieces_length; ++i) {
+ buffer.append(pieces[i][current[i]]);
+ }
+ //String result = buffer.toString(); // not needed
+
+ // find next value for next time
+
+ for (i = current_length - 1; ; --i) {
+ if (i < 0) {
+ done = TRUE;
+ break;
+ }
+ current[i]++;
+ if (current[i] < pieces_lengths[i]) break; // got sequence
+ current[i] = 0;
+ }
+ return buffer;
+}
+
+/**
+ *@param set the source string to iterate against. This allows the same iterator to be used
+ * while changing the source string, saving object creation.
+ */
+void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &status) {
+ int32_t list_length = 0;
+ UChar32 cp = 0;
+ int32_t start = 0;
+ int32_t i = 0;
+ UnicodeString *list = NULL;
+
+ nfd.normalize(newSource, source, status);
+ if(U_FAILURE(status)) {
+ return;
+ }
+ done = FALSE;
+
+ cleanPieces();
+
+ // catch degenerate case
+ if (newSource.length() == 0) {
+ pieces = (UnicodeString **)uprv_malloc(sizeof(UnicodeString *));
+ pieces_lengths = (int32_t*)uprv_malloc(1 * sizeof(int32_t));
+ pieces_length = 1;
+ current = (int32_t*)uprv_malloc(1 * sizeof(int32_t));
+ current_length = 1;
+ if (pieces == NULL || pieces_lengths == NULL || current == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ goto CleanPartialInitialization;
+ }
+ current[0] = 0;
+ pieces[0] = new UnicodeString[1];
+ pieces_lengths[0] = 1;
+ if (pieces[0] == 0) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ goto CleanPartialInitialization;
+ }
+ return;
+ }
+
+
+ list = new UnicodeString[source.length()];
+ if (list == 0) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ goto CleanPartialInitialization;
+ }
+
+ // i should initialy be the number of code units at the
+ // start of the string
+ i = U16_LENGTH(source.char32At(0));
+ //int32_t i = 1;
+ // find the segments
+ // This code iterates through the source string and
+ // extracts segments that end up on a codepoint that
+ // doesn't start any decompositions. (Analysis is done
+ // on the NFD form - see above).
+ for (; i < source.length(); i += U16_LENGTH(cp)) {
+ cp = source.char32At(i);
+ if (nfcImpl.isCanonSegmentStarter(cp)) {
+ source.extract(start, i-start, list[list_length++]); // add up to i
+ start = i;
+ }
+ }
+ source.extract(start, i-start, list[list_length++]); // add last one
+
+
+ // allocate the arrays, and find the strings that are CE to each segment
+ pieces = (UnicodeString **)uprv_malloc(list_length * sizeof(UnicodeString *));
+ pieces_length = list_length;
+ pieces_lengths = (int32_t*)uprv_malloc(list_length * sizeof(int32_t));
+ current = (int32_t*)uprv_malloc(list_length * sizeof(int32_t));
+ current_length = list_length;
+ if (pieces == NULL || pieces_lengths == NULL || current == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ goto CleanPartialInitialization;
+ }
+
+ for (i = 0; i < current_length; i++) {
+ current[i] = 0;
+ }
+ // for each segment, get all the combinations that can produce
+ // it after NFD normalization
+ for (i = 0; i < pieces_length; ++i) {
+ //if (PROGRESS) printf("SEGMENT\n");
+ pieces[i] = getEquivalents(list[i], pieces_lengths[i], status);
+ }
+
+ delete[] list;
+ return;
+// Common section to cleanup all local variables and reset object variables.
+CleanPartialInitialization:
+ if (list != NULL) {
+ delete[] list;
+ }
+ cleanPieces();
+}
+
+/**
+ * Dumb recursive implementation of permutation.
+ * TODO: optimize
+ * @param source the string to find permutations for
+ * @return the results in a set.
+ */
+void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) {
+ if(U_FAILURE(status)) {
+ return;
+ }
+ //if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source)));
+ int32_t i = 0;
+
+ // optimization:
+ // if zero or one character, just return a set with it
+ // we check for length < 2 to keep from counting code points all the time
+ if (source.length() <= 2 && source.countChar32() <= 1) {
+ UnicodeString *toPut = new UnicodeString(source);
+ /* test for NULL */
+ if (toPut == 0) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ result->put(source, toPut, status);
+ return;
+ }
+
+ // otherwise iterate through the string, and recursively permute all the other characters
+ UChar32 cp;
+ Hashtable subpermute(status);
+ if(U_FAILURE(status)) {
+ return;
+ }
+ subpermute.setValueDeleter(uprv_deleteUObject);
+
+ for (i = 0; i < source.length(); i += U16_LENGTH(cp)) {
+ cp = source.char32At(i);
+ const UHashElement *ne = NULL;
+ int32_t el = UHASH_FIRST;
+ UnicodeString subPermuteString = source;
+
+ // optimization:
+ // if the character is canonical combining class zero,
+ // don't permute it
+ if (skipZeros && i != 0 && u_getCombiningClass(cp) == 0) {
+ //System.out.println("Skipping " + Utility.hex(UTF16.valueOf(source, i)));
+ continue;
+ }
+
+ subpermute.removeAll();
+
+ // see what the permutations of the characters before and after this one are
+ //Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp)));
+ permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status);
+ /* Test for buffer overflows */
+ if(U_FAILURE(status)) {
+ return;
+ }
+ // The upper remove is destructive. The question is do we have to make a copy, or we don't care about the contents
+ // of source at this point.
+
+ // prefix this character to all of them
+ ne = subpermute.nextElement(el);
+ while (ne != NULL) {
+ UnicodeString *permRes = (UnicodeString *)(ne->value.pointer);
+ UnicodeString *chStr = new UnicodeString(cp);
+ //test for NULL
+ if (chStr == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ chStr->append(*permRes); //*((UnicodeString *)(ne->value.pointer));
+ //if (PROGRESS) printf(" Piece: %s\n", UToS(*chStr));
+ result->put(*chStr, chStr, status);
+ ne = subpermute.nextElement(el);
+ }
+ }
+ //return result;
+}
+
+// privates
+
+// we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
+UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status) {
+ Hashtable result(status);
+ Hashtable permutations(status);
+ Hashtable basic(status);
+ if (U_FAILURE(status)) {
+ return 0;
+ }
+ result.setValueDeleter(uprv_deleteUObject);
+ permutations.setValueDeleter(uprv_deleteUObject);
+ basic.setValueDeleter(uprv_deleteUObject);
+
+ UChar USeg[256];
+ int32_t segLen = segment.extract(USeg, 256, status);
+ getEquivalents2(&basic, USeg, segLen, status);
+
+ // now get all the permutations
+ // add only the ones that are canonically equivalent
+ // TODO: optimize by not permuting any class zero.
+
+ const UHashElement *ne = NULL;
+ int32_t el = UHASH_FIRST;
+ //Iterator it = basic.iterator();
+ ne = basic.nextElement(el);
+ //while (it.hasNext())
+ while (ne != NULL) {
+ //String item = (String) it.next();
+ UnicodeString item = *((UnicodeString *)(ne->value.pointer));
+
+ permutations.removeAll();
+ permute(item, CANITER_SKIP_ZEROES, &permutations, status);
+ const UHashElement *ne2 = NULL;
+ int32_t el2 = UHASH_FIRST;
+ //Iterator it2 = permutations.iterator();
+ ne2 = permutations.nextElement(el2);
+ //while (it2.hasNext())
+ while (ne2 != NULL) {
+ //String possible = (String) it2.next();
+ //UnicodeString *possible = new UnicodeString(*((UnicodeString *)(ne2->value.pointer)));
+ UnicodeString possible(*((UnicodeString *)(ne2->value.pointer)));
+ UnicodeString attempt;
+ nfd.normalize(possible, attempt, status);
+
+ // TODO: check if operator == is semanticaly the same as attempt.equals(segment)
+ if (attempt==segment) {
+ //if (PROGRESS) printf("Adding Permutation: %s\n", UToS(Tr(*possible)));
+ // TODO: use the hashtable just to catch duplicates - store strings directly (somehow).
+ result.put(possible, new UnicodeString(possible), status); //add(possible);
+ } else {
+ //if (PROGRESS) printf("-Skipping Permutation: %s\n", UToS(Tr(*possible)));
+ }
+
+ ne2 = permutations.nextElement(el2);
+ }
+ ne = basic.nextElement(el);
+ }
+
+ /* Test for buffer overflows */
+ if(U_FAILURE(status)) {
+ return 0;
+ }
+ // convert into a String[] to clean up storage
+ //String[] finalResult = new String[result.size()];
+ UnicodeString *finalResult = NULL;
+ int32_t resultCount;
+ if((resultCount = result.count()) != 0) {
+ finalResult = new UnicodeString[resultCount];
+ if (finalResult == 0) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ }
+ else {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+ //result.toArray(finalResult);
+ result_len = 0;
+ el = UHASH_FIRST;
+ ne = result.nextElement(el);
+ while(ne != NULL) {
+ finalResult[result_len++] = *((UnicodeString *)(ne->value.pointer));
+ ne = result.nextElement(el);
+ }
+
+
+ return finalResult;
+}
+
+Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status) {
+
+ if (U_FAILURE(status)) {
+ return NULL;
+ }
+
+ //if (PROGRESS) printf("Adding: %s\n", UToS(Tr(segment)));
+
+ UnicodeString toPut(segment, segLen);
+
+ fillinResult->put(toPut, new UnicodeString(toPut), status);
+
+ UnicodeSet starts;
+
+ // cycle through all the characters
+ UChar32 cp;
+ for (int32_t i = 0; i < segLen; i += U16_LENGTH(cp)) {
+ // see if any character is at the start of some decomposition
+ U16_GET(segment, 0, i, segLen, cp);
+ if (!nfcImpl.getCanonStartSet(cp, starts)) {
+ continue;
+ }
+ // if so, see which decompositions match
+ UnicodeSetIterator iter(starts);
+ while (iter.next()) {
+ UChar32 cp2 = iter.getCodepoint();
+ Hashtable remainder(status);
+ remainder.setValueDeleter(uprv_deleteUObject);
+ if (extract(&remainder, cp2, segment, segLen, i, status) == NULL) {
+ continue;
+ }
+
+ // there were some matches, so add all the possibilities to the set.
+ UnicodeString prefix(segment, i);
+ prefix += cp2;
+
+ int32_t el = UHASH_FIRST;
+ const UHashElement *ne = remainder.nextElement(el);
+ while (ne != NULL) {
+ UnicodeString item = *((UnicodeString *)(ne->value.pointer));
+ UnicodeString *toAdd = new UnicodeString(prefix);
+ /* test for NULL */
+ if (toAdd == 0) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ *toAdd += item;
+ fillinResult->put(*toAdd, toAdd, status);
+
+ //if (PROGRESS) printf("Adding: %s\n", UToS(Tr(*toAdd)));
+
+ ne = remainder.nextElement(el);
+ }
+ }
+ }
+
+ /* Test for buffer overflows */
+ if(U_FAILURE(status)) {
+ return NULL;
+ }
+ return fillinResult;
+}
+
+/**
+ * See if the decomposition of cp2 is at segment starting at segmentPos
+ * (with canonical rearrangment!)
+ * If so, take the remainder, and return the equivalents
+ */
+Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
+//Hashtable *CanonicalIterator::extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
+ //if (PROGRESS) printf(" extract: %s, ", UToS(Tr(UnicodeString(comp))));
+ //if (PROGRESS) printf("%s, %i\n", UToS(Tr(segment)), segmentPos);
+
+ if (U_FAILURE(status)) {
+ return NULL;
+ }
+
+ UnicodeString temp(comp);
+ int32_t inputLen=temp.length();
+ UnicodeString decompString;
+ nfd.normalize(temp, decompString, status);
+ if (U_FAILURE(status)) {
+ return NULL;
+ }
+ if (decompString.isBogus()) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ const UChar *decomp=decompString.getBuffer();
+ int32_t decompLen=decompString.length();
+
+ // See if it matches the start of segment (at segmentPos)
+ UBool ok = FALSE;
+ UChar32 cp;
+ int32_t decompPos = 0;
+ UChar32 decompCp;
+ U16_NEXT(decomp, decompPos, decompLen, decompCp);
+
+ int32_t i = segmentPos;
+ while(i < segLen) {
+ U16_NEXT(segment, i, segLen, cp);
+
+ if (cp == decompCp) { // if equal, eat another cp from decomp
+
+ //if (PROGRESS) printf(" matches: %s\n", UToS(Tr(UnicodeString(cp))));
+
+ if (decompPos == decompLen) { // done, have all decomp characters!
+ temp.append(segment+i, segLen-i);
+ ok = TRUE;
+ break;
+ }
+ U16_NEXT(decomp, decompPos, decompLen, decompCp);
+ } else {
+ //if (PROGRESS) printf(" buffer: %s\n", UToS(Tr(UnicodeString(cp))));
+
+ // brute force approach
+ temp.append(cp);
+
+ /* TODO: optimize
+ // since we know that the classes are monotonically increasing, after zero
+ // e.g. 0 5 7 9 0 3
+ // we can do an optimization
+ // there are only a few cases that work: zero, less, same, greater
+ // if both classes are the same, we fail
+ // if the decomp class < the segment class, we fail
+
+ segClass = getClass(cp);
+ if (decompClass <= segClass) return null;
+ */
+ }
+ }
+ if (!ok)
+ return NULL; // we failed, characters left over
+
+ //if (PROGRESS) printf("Matches\n");
+
+ if (inputLen == temp.length()) {
+ fillinResult->put(UnicodeString(), new UnicodeString(), status);
+ return fillinResult; // succeed, but no remainder
+ }
+
+ // brute force approach
+ // check to make sure result is canonically equivalent
+ UnicodeString trial;
+ nfd.normalize(temp, trial, status);
+ if(U_FAILURE(status) || trial.compare(segment+segmentPos, segLen - segmentPos) != 0) {
+ return NULL;
+ }
+
+ return getEquivalents2(fillinResult, temp.getBuffer()+inputLen, temp.length()-inputLen, status);
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_NORMALIZATION */
diff --git a/thirdparty/icu4c/common/capi_helper.h b/thirdparty/icu4c/common/capi_helper.h
new file mode 100644
index 0000000000..54b1db9e33
--- /dev/null
+++ b/thirdparty/icu4c/common/capi_helper.h
@@ -0,0 +1,97 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#ifndef __CAPI_HELPER_H__
+#define __CAPI_HELPER_H__
+
+#include "unicode/utypes.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * An internal helper class to help convert between C and C++ APIs.
+ */
+template<typename CType, typename CPPType, int32_t kMagic>
+class IcuCApiHelper {
+ public:
+ /**
+ * Convert from the C type to the C++ type (const version).
+ */
+ static const CPPType* validate(const CType* input, UErrorCode& status);
+
+ /**
+ * Convert from the C type to the C++ type (non-const version).
+ */
+ static CPPType* validate(CType* input, UErrorCode& status);
+
+ /**
+ * Convert from the C++ type to the C type (const version).
+ */
+ const CType* exportConstForC() const;
+
+ /**
+ * Convert from the C++ type to the C type (non-const version).
+ */
+ CType* exportForC();
+
+ /**
+ * Invalidates the object.
+ */
+ ~IcuCApiHelper();
+
+ private:
+ /**
+ * While the object is valid, fMagic equals kMagic.
+ */
+ int32_t fMagic = kMagic;
+};
+
+
+template<typename CType, typename CPPType, int32_t kMagic>
+const CPPType*
+IcuCApiHelper<CType, CPPType, kMagic>::validate(const CType* input, UErrorCode& status) {
+ if (U_FAILURE(status)) {
+ return nullptr;
+ }
+ if (input == nullptr) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return nullptr;
+ }
+ auto* impl = reinterpret_cast<const CPPType*>(input);
+ if (static_cast<const IcuCApiHelper<CType, CPPType, kMagic>*>(impl)->fMagic != kMagic) {
+ status = U_INVALID_FORMAT_ERROR;
+ return nullptr;
+ }
+ return impl;
+}
+
+template<typename CType, typename CPPType, int32_t kMagic>
+CPPType*
+IcuCApiHelper<CType, CPPType, kMagic>::validate(CType* input, UErrorCode& status) {
+ auto* constInput = static_cast<const CType*>(input);
+ auto* validated = validate(constInput, status);
+ return const_cast<CPPType*>(validated);
+}
+
+template<typename CType, typename CPPType, int32_t kMagic>
+const CType*
+IcuCApiHelper<CType, CPPType, kMagic>::exportConstForC() const {
+ return reinterpret_cast<const CType*>(static_cast<const CPPType*>(this));
+}
+
+template<typename CType, typename CPPType, int32_t kMagic>
+CType*
+IcuCApiHelper<CType, CPPType, kMagic>::exportForC() {
+ return reinterpret_cast<CType*>(static_cast<CPPType*>(this));
+}
+
+template<typename CType, typename CPPType, int32_t kMagic>
+IcuCApiHelper<CType, CPPType, kMagic>::~IcuCApiHelper() {
+ // head off application errors by preventing use of of deleted objects.
+ fMagic = 0;
+}
+
+
+U_NAMESPACE_END
+
+#endif // __CAPI_HELPER_H__
diff --git a/thirdparty/icu4c/common/characterproperties.cpp b/thirdparty/icu4c/common/characterproperties.cpp
new file mode 100644
index 0000000000..7b50a4e205
--- /dev/null
+++ b/thirdparty/icu4c/common/characterproperties.cpp
@@ -0,0 +1,383 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// characterproperties.cpp
+// created: 2018sep03 Markus W. Scherer
+
+#include "unicode/utypes.h"
+#include "unicode/localpointer.h"
+#include "unicode/uchar.h"
+#include "unicode/ucpmap.h"
+#include "unicode/ucptrie.h"
+#include "unicode/umutablecptrie.h"
+#include "unicode/uniset.h"
+#include "unicode/uscript.h"
+#include "unicode/uset.h"
+#include "cmemory.h"
+#include "mutex.h"
+#include "normalizer2impl.h"
+#include "uassert.h"
+#include "ubidi_props.h"
+#include "ucase.h"
+#include "ucln_cmn.h"
+#include "umutex.h"
+#include "uprops.h"
+
+using icu::LocalPointer;
+#if !UCONFIG_NO_NORMALIZATION
+using icu::Normalizer2Factory;
+using icu::Normalizer2Impl;
+#endif
+using icu::UInitOnce;
+using icu::UnicodeSet;
+
+namespace {
+
+UBool U_CALLCONV characterproperties_cleanup();
+
+constexpr int32_t NUM_INCLUSIONS = UPROPS_SRC_COUNT + UCHAR_INT_LIMIT - UCHAR_INT_START;
+
+struct Inclusion {
+ UnicodeSet *fSet = nullptr;
+ UInitOnce fInitOnce = U_INITONCE_INITIALIZER;
+};
+Inclusion gInclusions[NUM_INCLUSIONS]; // cached getInclusions()
+
+UnicodeSet *sets[UCHAR_BINARY_LIMIT] = {};
+
+UCPMap *maps[UCHAR_INT_LIMIT - UCHAR_INT_START] = {};
+
+icu::UMutex cpMutex;
+
+//----------------------------------------------------------------
+// Inclusions list
+//----------------------------------------------------------------
+
+// USetAdder implementation
+// Does not use uset.h to reduce code dependencies
+void U_CALLCONV
+_set_add(USet *set, UChar32 c) {
+ ((UnicodeSet *)set)->add(c);
+}
+
+void U_CALLCONV
+_set_addRange(USet *set, UChar32 start, UChar32 end) {
+ ((UnicodeSet *)set)->add(start, end);
+}
+
+void U_CALLCONV
+_set_addString(USet *set, const UChar *str, int32_t length) {
+ ((UnicodeSet *)set)->add(icu::UnicodeString((UBool)(length<0), str, length));
+}
+
+UBool U_CALLCONV characterproperties_cleanup() {
+ for (Inclusion &in: gInclusions) {
+ delete in.fSet;
+ in.fSet = nullptr;
+ in.fInitOnce.reset();
+ }
+ for (int32_t i = 0; i < UPRV_LENGTHOF(sets); ++i) {
+ delete sets[i];
+ sets[i] = nullptr;
+ }
+ for (int32_t i = 0; i < UPRV_LENGTHOF(maps); ++i) {
+ ucptrie_close(reinterpret_cast<UCPTrie *>(maps[i]));
+ maps[i] = nullptr;
+ }
+ return TRUE;
+}
+
+void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) {
+ // This function is invoked only via umtx_initOnce().
+ U_ASSERT(0 <= src && src < UPROPS_SRC_COUNT);
+ if (src == UPROPS_SRC_NONE) {
+ errorCode = U_INTERNAL_PROGRAM_ERROR;
+ return;
+ }
+ U_ASSERT(gInclusions[src].fSet == nullptr);
+
+ LocalPointer<UnicodeSet> incl(new UnicodeSet());
+ if (incl.isNull()) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ USetAdder sa = {
+ (USet *)incl.getAlias(),
+ _set_add,
+ _set_addRange,
+ _set_addString,
+ nullptr, // don't need remove()
+ nullptr // don't need removeRange()
+ };
+
+ switch(src) {
+ case UPROPS_SRC_CHAR:
+ uchar_addPropertyStarts(&sa, &errorCode);
+ break;
+ case UPROPS_SRC_PROPSVEC:
+ upropsvec_addPropertyStarts(&sa, &errorCode);
+ break;
+ case UPROPS_SRC_CHAR_AND_PROPSVEC:
+ uchar_addPropertyStarts(&sa, &errorCode);
+ upropsvec_addPropertyStarts(&sa, &errorCode);
+ break;
+#if !UCONFIG_NO_NORMALIZATION
+ case UPROPS_SRC_CASE_AND_NORM: {
+ const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
+ if(U_SUCCESS(errorCode)) {
+ impl->addPropertyStarts(&sa, errorCode);
+ }
+ ucase_addPropertyStarts(&sa, &errorCode);
+ break;
+ }
+ case UPROPS_SRC_NFC: {
+ const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
+ if(U_SUCCESS(errorCode)) {
+ impl->addPropertyStarts(&sa, errorCode);
+ }
+ break;
+ }
+ case UPROPS_SRC_NFKC: {
+ const Normalizer2Impl *impl=Normalizer2Factory::getNFKCImpl(errorCode);
+ if(U_SUCCESS(errorCode)) {
+ impl->addPropertyStarts(&sa, errorCode);
+ }
+ break;
+ }
+ case UPROPS_SRC_NFKC_CF: {
+ const Normalizer2Impl *impl=Normalizer2Factory::getNFKC_CFImpl(errorCode);
+ if(U_SUCCESS(errorCode)) {
+ impl->addPropertyStarts(&sa, errorCode);
+ }
+ break;
+ }
+ case UPROPS_SRC_NFC_CANON_ITER: {
+ const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
+ if(U_SUCCESS(errorCode)) {
+ impl->addCanonIterPropertyStarts(&sa, errorCode);
+ }
+ break;
+ }
+#endif
+ case UPROPS_SRC_CASE:
+ ucase_addPropertyStarts(&sa, &errorCode);
+ break;
+ case UPROPS_SRC_BIDI:
+ ubidi_addPropertyStarts(&sa, &errorCode);
+ break;
+ case UPROPS_SRC_INPC:
+ case UPROPS_SRC_INSC:
+ case UPROPS_SRC_VO:
+ uprops_addPropertyStarts((UPropertySource)src, &sa, &errorCode);
+ break;
+ default:
+ errorCode = U_INTERNAL_PROGRAM_ERROR;
+ break;
+ }
+
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
+ if (incl->isBogus()) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ // Compact for caching.
+ incl->compact();
+ gInclusions[src].fSet = incl.orphan();
+ ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup);
+}
+
+const UnicodeSet *getInclusionsForSource(UPropertySource src, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return nullptr; }
+ if (src < 0 || UPROPS_SRC_COUNT <= src) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return nullptr;
+ }
+ Inclusion &i = gInclusions[src];
+ umtx_initOnce(i.fInitOnce, &initInclusion, src, errorCode);
+ return i.fSet;
+}
+
+void U_CALLCONV initIntPropInclusion(UProperty prop, UErrorCode &errorCode) {
+ // This function is invoked only via umtx_initOnce().
+ U_ASSERT(UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT);
+ int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START;
+ U_ASSERT(gInclusions[inclIndex].fSet == nullptr);
+ UPropertySource src = uprops_getSource(prop);
+ const UnicodeSet *incl = getInclusionsForSource(src, errorCode);
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
+
+ LocalPointer<UnicodeSet> intPropIncl(new UnicodeSet(0, 0));
+ if (intPropIncl.isNull()) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ int32_t numRanges = incl->getRangeCount();
+ int32_t prevValue = 0;
+ for (int32_t i = 0; i < numRanges; ++i) {
+ UChar32 rangeEnd = incl->getRangeEnd(i);
+ for (UChar32 c = incl->getRangeStart(i); c <= rangeEnd; ++c) {
+ // TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch.
+ int32_t value = u_getIntPropertyValue(c, prop);
+ if (value != prevValue) {
+ intPropIncl->add(c);
+ prevValue = value;
+ }
+ }
+ }
+
+ if (intPropIncl->isBogus()) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ // Compact for caching.
+ intPropIncl->compact();
+ gInclusions[inclIndex].fSet = intPropIncl.orphan();
+ ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup);
+}
+
+} // namespace
+
+U_NAMESPACE_BEGIN
+
+const UnicodeSet *CharacterProperties::getInclusionsForProperty(
+ UProperty prop, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return nullptr; }
+ if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) {
+ int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START;
+ Inclusion &i = gInclusions[inclIndex];
+ umtx_initOnce(i.fInitOnce, &initIntPropInclusion, prop, errorCode);
+ return i.fSet;
+ } else {
+ UPropertySource src = uprops_getSource(prop);
+ return getInclusionsForSource(src, errorCode);
+ }
+}
+
+U_NAMESPACE_END
+
+namespace {
+
+UnicodeSet *makeSet(UProperty property, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return nullptr; }
+ LocalPointer<UnicodeSet> set(new UnicodeSet());
+ if (set.isNull()) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return nullptr;
+ }
+ const UnicodeSet *inclusions =
+ icu::CharacterProperties::getInclusionsForProperty(property, errorCode);
+ if (U_FAILURE(errorCode)) { return nullptr; }
+ int32_t numRanges = inclusions->getRangeCount();
+ UChar32 startHasProperty = -1;
+
+ for (int32_t i = 0; i < numRanges; ++i) {
+ UChar32 rangeEnd = inclusions->getRangeEnd(i);
+ for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) {
+ // TODO: Get a UCharacterProperty.BinaryProperty to avoid the property dispatch.
+ if (u_hasBinaryProperty(c, property)) {
+ if (startHasProperty < 0) {
+ // Transition from false to true.
+ startHasProperty = c;
+ }
+ } else if (startHasProperty >= 0) {
+ // Transition from true to false.
+ set->add(startHasProperty, c - 1);
+ startHasProperty = -1;
+ }
+ }
+ }
+ if (startHasProperty >= 0) {
+ set->add(startHasProperty, 0x10FFFF);
+ }
+ set->freeze();
+ return set.orphan();
+}
+
+UCPMap *makeMap(UProperty property, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return nullptr; }
+ uint32_t nullValue = property == UCHAR_SCRIPT ? USCRIPT_UNKNOWN : 0;
+ icu::LocalUMutableCPTriePointer mutableTrie(
+ umutablecptrie_open(nullValue, nullValue, &errorCode));
+ const UnicodeSet *inclusions =
+ icu::CharacterProperties::getInclusionsForProperty(property, errorCode);
+ if (U_FAILURE(errorCode)) { return nullptr; }
+ int32_t numRanges = inclusions->getRangeCount();
+ UChar32 start = 0;
+ uint32_t value = nullValue;
+
+ for (int32_t i = 0; i < numRanges; ++i) {
+ UChar32 rangeEnd = inclusions->getRangeEnd(i);
+ for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) {
+ // TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch.
+ uint32_t nextValue = u_getIntPropertyValue(c, property);
+ if (value != nextValue) {
+ if (value != nullValue) {
+ umutablecptrie_setRange(mutableTrie.getAlias(), start, c - 1, value, &errorCode);
+ }
+ start = c;
+ value = nextValue;
+ }
+ }
+ }
+ if (value != 0) {
+ umutablecptrie_setRange(mutableTrie.getAlias(), start, 0x10FFFF, value, &errorCode);
+ }
+
+ UCPTrieType type;
+ if (property == UCHAR_BIDI_CLASS || property == UCHAR_GENERAL_CATEGORY) {
+ type = UCPTRIE_TYPE_FAST;
+ } else {
+ type = UCPTRIE_TYPE_SMALL;
+ }
+ UCPTrieValueWidth valueWidth;
+ // TODO: UCharacterProperty.IntProperty
+ int32_t max = u_getIntPropertyMaxValue(property);
+ if (max <= 0xff) {
+ valueWidth = UCPTRIE_VALUE_BITS_8;
+ } else if (max <= 0xffff) {
+ valueWidth = UCPTRIE_VALUE_BITS_16;
+ } else {
+ valueWidth = UCPTRIE_VALUE_BITS_32;
+ }
+ return reinterpret_cast<UCPMap *>(
+ umutablecptrie_buildImmutable(mutableTrie.getAlias(), type, valueWidth, &errorCode));
+}
+
+} // namespace
+
+U_NAMESPACE_USE
+
+U_CAPI const USet * U_EXPORT2
+u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) { return nullptr; }
+ if (property < 0 || UCHAR_BINARY_LIMIT <= property) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return nullptr;
+ }
+ Mutex m(&cpMutex);
+ UnicodeSet *set = sets[property];
+ if (set == nullptr) {
+ sets[property] = set = makeSet(property, *pErrorCode);
+ }
+ if (U_FAILURE(*pErrorCode)) { return nullptr; }
+ return set->toUSet();
+}
+
+U_CAPI const UCPMap * U_EXPORT2
+u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) { return nullptr; }
+ if (property < UCHAR_INT_START || UCHAR_INT_LIMIT <= property) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return nullptr;
+ }
+ Mutex m(&cpMutex);
+ UCPMap *map = maps[property - UCHAR_INT_START];
+ if (map == nullptr) {
+ maps[property - UCHAR_INT_START] = map = makeMap(property, *pErrorCode);
+ }
+ return map;
+}
diff --git a/thirdparty/icu4c/common/chariter.cpp b/thirdparty/icu4c/common/chariter.cpp
new file mode 100644
index 0000000000..887119a0eb
--- /dev/null
+++ b/thirdparty/icu4c/common/chariter.cpp
@@ -0,0 +1,100 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1999-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+
+#include "unicode/chariter.h"
+
+U_NAMESPACE_BEGIN
+
+ForwardCharacterIterator::~ForwardCharacterIterator() {}
+ForwardCharacterIterator::ForwardCharacterIterator()
+: UObject()
+{}
+ForwardCharacterIterator::ForwardCharacterIterator(const ForwardCharacterIterator &other)
+: UObject(other)
+{}
+
+
+CharacterIterator::CharacterIterator()
+: textLength(0), pos(0), begin(0), end(0) {
+}
+
+CharacterIterator::CharacterIterator(int32_t length)
+: textLength(length), pos(0), begin(0), end(length) {
+ if(textLength < 0) {
+ textLength = end = 0;
+ }
+}
+
+CharacterIterator::CharacterIterator(int32_t length, int32_t position)
+: textLength(length), pos(position), begin(0), end(length) {
+ if(textLength < 0) {
+ textLength = end = 0;
+ }
+ if(pos < 0) {
+ pos = 0;
+ } else if(pos > end) {
+ pos = end;
+ }
+}
+
+CharacterIterator::CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position)
+: textLength(length), pos(position), begin(textBegin), end(textEnd) {
+ if(textLength < 0) {
+ textLength = 0;
+ }
+ if(begin < 0) {
+ begin = 0;
+ } else if(begin > textLength) {
+ begin = textLength;
+ }
+ if(end < begin) {
+ end = begin;
+ } else if(end > textLength) {
+ end = textLength;
+ }
+ if(pos < begin) {
+ pos = begin;
+ } else if(pos > end) {
+ pos = end;
+ }
+}
+
+CharacterIterator::~CharacterIterator() {}
+
+CharacterIterator::CharacterIterator(const CharacterIterator &that) :
+ForwardCharacterIterator(that),
+textLength(that.textLength), pos(that.pos), begin(that.begin), end(that.end)
+{
+}
+
+CharacterIterator &
+CharacterIterator::operator=(const CharacterIterator &that) {
+ ForwardCharacterIterator::operator=(that);
+ textLength = that.textLength;
+ pos = that.pos;
+ begin = that.begin;
+ end = that.end;
+ return *this;
+}
+
+// implementing first[32]PostInc() directly in a subclass should be faster
+// but these implementations make subclassing a little easier
+UChar
+CharacterIterator::firstPostInc(void) {
+ setToStart();
+ return nextPostInc();
+}
+
+UChar32
+CharacterIterator::first32PostInc(void) {
+ setToStart();
+ return next32PostInc();
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/charstr.cpp b/thirdparty/icu4c/common/charstr.cpp
new file mode 100644
index 0000000000..318a185b3f
--- /dev/null
+++ b/thirdparty/icu4c/common/charstr.cpp
@@ -0,0 +1,239 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: charstr.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010may19
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "uinvchar.h"
+#include "ustr_imp.h"
+
+U_NAMESPACE_BEGIN
+
+CharString::CharString(CharString&& src) U_NOEXCEPT
+ : buffer(std::move(src.buffer)), len(src.len) {
+ src.len = 0; // not strictly necessary because we make no guarantees on the source string
+}
+
+CharString& CharString::operator=(CharString&& src) U_NOEXCEPT {
+ buffer = std::move(src.buffer);
+ len = src.len;
+ src.len = 0; // not strictly necessary because we make no guarantees on the source string
+ return *this;
+}
+
+char *CharString::cloneData(UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode)) { return nullptr; }
+ char *p = static_cast<char *>(uprv_malloc(len + 1));
+ if (p == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return nullptr;
+ }
+ uprv_memcpy(p, buffer.getAlias(), len + 1);
+ return p;
+}
+
+int32_t CharString::extract(char *dest, int32_t capacity, UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode)) { return len; }
+ if (capacity < 0 || (capacity > 0 && dest == nullptr)) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return len;
+ }
+ const char *src = buffer.getAlias();
+ if (0 < len && len <= capacity && src != dest) {
+ uprv_memcpy(dest, src, len);
+ }
+ return u_terminateChars(dest, capacity, len, &errorCode);
+}
+
+CharString &CharString::copyFrom(const CharString &s, UErrorCode &errorCode) {
+ if(U_SUCCESS(errorCode) && this!=&s && ensureCapacity(s.len+1, 0, errorCode)) {
+ len=s.len;
+ uprv_memcpy(buffer.getAlias(), s.buffer.getAlias(), len+1);
+ }
+ return *this;
+}
+
+int32_t CharString::lastIndexOf(char c) const {
+ for(int32_t i=len; i>0;) {
+ if(buffer[--i]==c) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+bool CharString::contains(StringPiece s) const {
+ if (s.empty()) { return false; }
+ const char *p = buffer.getAlias();
+ int32_t lastStart = len - s.length();
+ for (int32_t i = 0; i <= lastStart; ++i) {
+ if (uprv_memcmp(p + i, s.data(), s.length()) == 0) {
+ return true;
+ }
+ }
+ return false;
+}
+
+CharString &CharString::truncate(int32_t newLength) {
+ if(newLength<0) {
+ newLength=0;
+ }
+ if(newLength<len) {
+ buffer[len=newLength]=0;
+ }
+ return *this;
+}
+
+CharString &CharString::append(char c, UErrorCode &errorCode) {
+ if(ensureCapacity(len+2, 0, errorCode)) {
+ buffer[len++]=c;
+ buffer[len]=0;
+ }
+ return *this;
+}
+
+CharString &CharString::append(const char *s, int32_t sLength, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return *this;
+ }
+ if(sLength<-1 || (s==NULL && sLength!=0)) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return *this;
+ }
+ if(sLength<0) {
+ sLength= static_cast<int32_t>(uprv_strlen(s));
+ }
+ if(sLength>0) {
+ if(s==(buffer.getAlias()+len)) {
+ // The caller wrote into the getAppendBuffer().
+ if(sLength>=(buffer.getCapacity()-len)) {
+ // The caller wrote too much.
+ errorCode=U_INTERNAL_PROGRAM_ERROR;
+ } else {
+ buffer[len+=sLength]=0;
+ }
+ } else if(buffer.getAlias()<=s && s<(buffer.getAlias()+len) &&
+ sLength>=(buffer.getCapacity()-len)
+ ) {
+ // (Part of) this string is appended to itself which requires reallocation,
+ // so we have to make a copy of the substring and append that.
+ return append(CharString(s, sLength, errorCode), errorCode);
+ } else if(ensureCapacity(len+sLength+1, 0, errorCode)) {
+ uprv_memcpy(buffer.getAlias()+len, s, sLength);
+ buffer[len+=sLength]=0;
+ }
+ }
+ return *this;
+}
+
+char *CharString::getAppendBuffer(int32_t minCapacity,
+ int32_t desiredCapacityHint,
+ int32_t &resultCapacity,
+ UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ resultCapacity=0;
+ return NULL;
+ }
+ int32_t appendCapacity=buffer.getCapacity()-len-1; // -1 for NUL
+ if(appendCapacity>=minCapacity) {
+ resultCapacity=appendCapacity;
+ return buffer.getAlias()+len;
+ }
+ if(ensureCapacity(len+minCapacity+1, len+desiredCapacityHint+1, errorCode)) {
+ resultCapacity=buffer.getCapacity()-len-1;
+ return buffer.getAlias()+len;
+ }
+ resultCapacity=0;
+ return NULL;
+}
+
+CharString &CharString::appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode) {
+ return appendInvariantChars(s.getBuffer(), s.length(), errorCode);
+}
+
+CharString &CharString::appendInvariantChars(const UChar* uchars, int32_t ucharsLen, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return *this;
+ }
+ if (!uprv_isInvariantUString(uchars, ucharsLen)) {
+ errorCode = U_INVARIANT_CONVERSION_ERROR;
+ return *this;
+ }
+ if(ensureCapacity(len+ucharsLen+1, 0, errorCode)) {
+ u_UCharsToChars(uchars, buffer.getAlias()+len, ucharsLen);
+ len += ucharsLen;
+ buffer[len] = 0;
+ }
+ return *this;
+}
+
+UBool CharString::ensureCapacity(int32_t capacity,
+ int32_t desiredCapacityHint,
+ UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return FALSE;
+ }
+ if(capacity>buffer.getCapacity()) {
+ if(desiredCapacityHint==0) {
+ desiredCapacityHint=capacity+buffer.getCapacity();
+ }
+ if( (desiredCapacityHint<=capacity || buffer.resize(desiredCapacityHint, len+1)==NULL) &&
+ buffer.resize(capacity, len+1)==NULL
+ ) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+CharString &CharString::appendPathPart(StringPiece s, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return *this;
+ }
+ if(s.length()==0) {
+ return *this;
+ }
+ char c;
+ if(len>0 && (c=buffer[len-1])!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) {
+ append(getDirSepChar(), errorCode);
+ }
+ append(s, errorCode);
+ return *this;
+}
+
+CharString &CharString::ensureEndsWithFileSeparator(UErrorCode &errorCode) {
+ char c;
+ if(U_SUCCESS(errorCode) && len>0 &&
+ (c=buffer[len-1])!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) {
+ append(getDirSepChar(), errorCode);
+ }
+ return *this;
+}
+
+char CharString::getDirSepChar() const {
+ char dirSepChar = U_FILE_SEP_CHAR;
+#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
+ // We may need to return a different directory separator when building for Cygwin or MSYS2.
+ if(len>0 && !uprv_strchr(data(), U_FILE_SEP_CHAR) && uprv_strchr(data(), U_FILE_ALT_SEP_CHAR))
+ dirSepChar = U_FILE_ALT_SEP_CHAR;
+#endif
+ return dirSepChar;
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/charstr.h b/thirdparty/icu4c/common/charstr.h
new file mode 100644
index 0000000000..6619faac61
--- /dev/null
+++ b/thirdparty/icu4c/common/charstr.h
@@ -0,0 +1,190 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2001-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Date Name Description
+* 11/19/2001 aliu Creation.
+* 05/19/2010 markus Rewritten from scratch
+**********************************************************************
+*/
+
+#ifndef CHARSTRING_H
+#define CHARSTRING_H
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+#include "unicode/uobject.h"
+#include "cmemory.h"
+
+U_NAMESPACE_BEGIN
+
+// Windows needs us to DLL-export the MaybeStackArray template specialization,
+// but MacOS X cannot handle it. Same as in digitlst.h.
+#if !U_PLATFORM_IS_DARWIN_BASED
+template class U_COMMON_API MaybeStackArray<char, 40>;
+#endif
+
+/**
+ * ICU-internal char * string class.
+ * This class does not assume or enforce any particular character encoding.
+ * Raw bytes can be stored. The string object owns its characters.
+ * A terminating NUL is stored, but the class does not prevent embedded NUL characters.
+ *
+ * This class wants to be convenient but is also deliberately minimalist.
+ * Please do not add methods if they only add minor convenience.
+ * For example:
+ * cs.data()[5]='a'; // no need for setCharAt(5, 'a')
+ */
+class U_COMMON_API CharString : public UMemory {
+public:
+ CharString() : len(0) { buffer[0]=0; }
+ CharString(StringPiece s, UErrorCode &errorCode) : len(0) {
+ buffer[0]=0;
+ append(s, errorCode);
+ }
+ CharString(const CharString &s, UErrorCode &errorCode) : len(0) {
+ buffer[0]=0;
+ append(s, errorCode);
+ }
+ CharString(const char *s, int32_t sLength, UErrorCode &errorCode) : len(0) {
+ buffer[0]=0;
+ append(s, sLength, errorCode);
+ }
+ ~CharString() {}
+
+ /**
+ * Move constructor; might leave src in an undefined state.
+ * This string will have the same contents and state that the source string had.
+ */
+ CharString(CharString &&src) U_NOEXCEPT;
+ /**
+ * Move assignment operator; might leave src in an undefined state.
+ * This string will have the same contents and state that the source string had.
+ * The behavior is undefined if *this and src are the same object.
+ */
+ CharString &operator=(CharString &&src) U_NOEXCEPT;
+
+ /**
+ * Replaces this string's contents with the other string's contents.
+ * CharString does not support the standard copy constructor nor
+ * the assignment operator, to make copies explicit and to
+ * use a UErrorCode where memory allocations might be needed.
+ */
+ CharString &copyFrom(const CharString &other, UErrorCode &errorCode);
+
+ UBool isEmpty() const { return len==0; }
+ int32_t length() const { return len; }
+ char operator[](int32_t index) const { return buffer[index]; }
+ StringPiece toStringPiece() const { return StringPiece(buffer.getAlias(), len); }
+
+ const char *data() const { return buffer.getAlias(); }
+ char *data() { return buffer.getAlias(); }
+ /**
+ * Allocates length()+1 chars and copies the NUL-terminated data().
+ * The caller must uprv_free() the result.
+ */
+ char *cloneData(UErrorCode &errorCode) const;
+ /**
+ * Copies the contents of the string into dest.
+ * Checks if there is enough space in dest, extracts the entire string if possible,
+ * and NUL-terminates dest if possible.
+ *
+ * If the string fits into dest but cannot be NUL-terminated (length()==capacity),
+ * then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
+ * If the string itself does not fit into dest (length()>capacity),
+ * then the error code is set to U_BUFFER_OVERFLOW_ERROR.
+ *
+ * @param dest Destination string buffer.
+ * @param capacity Size of the dest buffer (number of chars).
+ * @param errorCode ICU error code.
+ * @return length()
+ */
+ int32_t extract(char *dest, int32_t capacity, UErrorCode &errorCode) const;
+
+ bool operator==(StringPiece other) const {
+ return len == other.length() && (len == 0 || uprv_memcmp(data(), other.data(), len) == 0);
+ }
+ bool operator!=(StringPiece other) const {
+ return !operator==(other);
+ }
+
+ /** @return last index of c, or -1 if c is not in this string */
+ int32_t lastIndexOf(char c) const;
+
+ bool contains(StringPiece s) const;
+
+ CharString &clear() { len=0; buffer[0]=0; return *this; }
+ CharString &truncate(int32_t newLength);
+
+ CharString &append(char c, UErrorCode &errorCode);
+ CharString &append(StringPiece s, UErrorCode &errorCode) {
+ return append(s.data(), s.length(), errorCode);
+ }
+ CharString &append(const CharString &s, UErrorCode &errorCode) {
+ return append(s.data(), s.length(), errorCode);
+ }
+ CharString &append(const char *s, int32_t sLength, UErrorCode &status);
+ /**
+ * Returns a writable buffer for appending and writes the buffer's capacity to
+ * resultCapacity. Guarantees resultCapacity>=minCapacity if U_SUCCESS().
+ * There will additionally be space for a terminating NUL right at resultCapacity.
+ * (This function is similar to ByteSink.GetAppendBuffer().)
+ *
+ * The returned buffer is only valid until the next write operation
+ * on this string.
+ *
+ * After writing at most resultCapacity bytes, call append() with the
+ * pointer returned from this function and the number of bytes written.
+ *
+ * @param minCapacity required minimum capacity of the returned buffer;
+ * must be non-negative
+ * @param desiredCapacityHint desired capacity of the returned buffer;
+ * must be non-negative
+ * @param resultCapacity will be set to the capacity of the returned buffer
+ * @param errorCode in/out error code
+ * @return a buffer with resultCapacity>=min_capacity
+ */
+ char *getAppendBuffer(int32_t minCapacity,
+ int32_t desiredCapacityHint,
+ int32_t &resultCapacity,
+ UErrorCode &errorCode);
+
+ CharString &appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode);
+ CharString &appendInvariantChars(const UChar* uchars, int32_t ucharsLen, UErrorCode& errorCode);
+
+ /**
+ * Appends a filename/path part, e.g., a directory name.
+ * First appends a U_FILE_SEP_CHAR or U_FILE_ALT_SEP_CHAR if necessary.
+ * Does nothing if s is empty.
+ */
+ CharString &appendPathPart(StringPiece s, UErrorCode &errorCode);
+
+ /**
+ * Appends a U_FILE_SEP_CHAR or U_FILE_ALT_SEP_CHAR if this string is not empty
+ * and does not already end with a U_FILE_SEP_CHAR or U_FILE_ALT_SEP_CHAR.
+ */
+ CharString &ensureEndsWithFileSeparator(UErrorCode &errorCode);
+
+private:
+ MaybeStackArray<char, 40> buffer;
+ int32_t len;
+
+ UBool ensureCapacity(int32_t capacity, int32_t desiredCapacityHint, UErrorCode &errorCode);
+
+ CharString(const CharString &other); // forbid copying of this class
+ CharString &operator=(const CharString &other); // forbid copying of this class
+
+ /**
+ * Returns U_FILE_ALT_SEP_CHAR if found in string, and U_FILE_SEP_CHAR is not found.
+ * Otherwise returns U_FILE_SEP_CHAR.
+ */
+ char getDirSepChar() const;
+};
+
+U_NAMESPACE_END
+
+#endif
+//eof
diff --git a/thirdparty/icu4c/common/charstrmap.h b/thirdparty/icu4c/common/charstrmap.h
new file mode 100644
index 0000000000..3320a46208
--- /dev/null
+++ b/thirdparty/icu4c/common/charstrmap.h
@@ -0,0 +1,55 @@
+// © 2020 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// charstrmap.h
+// created: 2020sep01 Frank Yung-Fong Tang
+
+#ifndef __CHARSTRMAP_H__
+#define __CHARSTRMAP_H__
+
+#include <utility>
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "uhash.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Map of const char * keys & values.
+ * Stores pointers as is: Does not own/copy/adopt/release strings.
+ */
+class CharStringMap final : public UMemory {
+public:
+ /** Constructs an unusable non-map. */
+ CharStringMap() : map(nullptr) {}
+ CharStringMap(int32_t size, UErrorCode &errorCode) {
+ map = uhash_openSize(uhash_hashChars, uhash_compareChars, uhash_compareChars,
+ size, &errorCode);
+ }
+ CharStringMap(CharStringMap &&other) U_NOEXCEPT : map(other.map) {
+ other.map = nullptr;
+ }
+ CharStringMap(const CharStringMap &other) = delete;
+ ~CharStringMap() {
+ uhash_close(map);
+ }
+
+ CharStringMap &operator=(CharStringMap &&other) U_NOEXCEPT {
+ map = other.map;
+ other.map = nullptr;
+ return *this;
+ }
+ CharStringMap &operator=(const CharStringMap &other) = delete;
+
+ const char *get(const char *key) const { return static_cast<const char *>(uhash_get(map, key)); }
+ void put(const char *key, const char *value, UErrorCode &errorCode) {
+ uhash_put(map, const_cast<char *>(key), const_cast<char *>(value), &errorCode);
+ }
+
+private:
+ UHashtable *map;
+};
+
+U_NAMESPACE_END
+
+#endif // __CHARSTRMAP_H__
diff --git a/thirdparty/icu4c/common/cmemory.cpp b/thirdparty/icu4c/common/cmemory.cpp
new file mode 100644
index 0000000000..663c1411e4
--- /dev/null
+++ b/thirdparty/icu4c/common/cmemory.cpp
@@ -0,0 +1,138 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2002-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* File cmemory.c ICU Heap allocation.
+* All ICU heap allocation, both for C and C++ new of ICU
+* class types, comes through these functions.
+*
+* If you have a need to replace ICU allocation, this is the
+* place to do it.
+*
+* Note that uprv_malloc(0) returns a non-NULL pointer, and
+* that a subsequent free of that pointer value is a NOP.
+*
+******************************************************************************
+*/
+#include "unicode/uclean.h"
+#include "cmemory.h"
+#include "putilimp.h"
+#include "uassert.h"
+#include <stdlib.h>
+
+/* uprv_malloc(0) returns a pointer to this read-only data. */
+static const int32_t zeroMem[] = {0, 0, 0, 0, 0, 0};
+
+/* Function Pointers for user-supplied heap functions */
+static const void *pContext;
+static UMemAllocFn *pAlloc;
+static UMemReallocFn *pRealloc;
+static UMemFreeFn *pFree;
+
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+#include <stdio.h>
+static int n=0;
+static long b=0;
+#endif
+
+U_CAPI void * U_EXPORT2
+uprv_malloc(size_t s) {
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+#if 1
+ putchar('>');
+ fflush(stdout);
+#else
+ fprintf(stderr,"MALLOC\t#%d\t%ul bytes\t%ul total\n", ++n,s,(b+=s)); fflush(stderr);
+#endif
+#endif
+ if (s > 0) {
+ if (pAlloc) {
+ return (*pAlloc)(pContext, s);
+ } else {
+ return uprv_default_malloc(s);
+ }
+ } else {
+ return (void *)zeroMem;
+ }
+}
+
+U_CAPI void * U_EXPORT2
+uprv_realloc(void * buffer, size_t size) {
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+ putchar('~');
+ fflush(stdout);
+#endif
+ if (buffer == zeroMem) {
+ return uprv_malloc(size);
+ } else if (size == 0) {
+ if (pFree) {
+ (*pFree)(pContext, buffer);
+ } else {
+ uprv_default_free(buffer);
+ }
+ return (void *)zeroMem;
+ } else {
+ if (pRealloc) {
+ return (*pRealloc)(pContext, buffer, size);
+ } else {
+ return uprv_default_realloc(buffer, size);
+ }
+ }
+}
+
+U_CAPI void U_EXPORT2
+uprv_free(void *buffer) {
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+ putchar('<');
+ fflush(stdout);
+#endif
+ if (buffer != zeroMem) {
+ if (pFree) {
+ (*pFree)(pContext, buffer);
+ } else {
+ uprv_default_free(buffer);
+ }
+ }
+}
+
+U_CAPI void * U_EXPORT2
+uprv_calloc(size_t num, size_t size) {
+ void *mem = NULL;
+ size *= num;
+ mem = uprv_malloc(size);
+ if (mem) {
+ uprv_memset(mem, 0, size);
+ }
+ return mem;
+}
+
+U_CAPI void U_EXPORT2
+u_setMemoryFunctions(const void *context, UMemAllocFn *a, UMemReallocFn *r, UMemFreeFn *f, UErrorCode *status)
+{
+ if (U_FAILURE(*status)) {
+ return;
+ }
+ if (a==NULL || r==NULL || f==NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ pContext = context;
+ pAlloc = a;
+ pRealloc = r;
+ pFree = f;
+}
+
+
+U_CFUNC UBool cmemory_cleanup(void) {
+ pContext = NULL;
+ pAlloc = NULL;
+ pRealloc = NULL;
+ pFree = NULL;
+ return TRUE;
+}
diff --git a/thirdparty/icu4c/common/cmemory.h b/thirdparty/icu4c/common/cmemory.h
new file mode 100644
index 0000000000..210bc7645e
--- /dev/null
+++ b/thirdparty/icu4c/common/cmemory.h
@@ -0,0 +1,849 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* File CMEMORY.H
+*
+* Contains stdlib.h/string.h memory functions
+*
+* @author Bertrand A. Damiba
+*
+* Modification History:
+*
+* Date Name Description
+* 6/20/98 Bertrand Created.
+* 05/03/99 stephen Changed from functions to macros.
+*
+******************************************************************************
+*/
+
+#ifndef CMEMORY_H
+#define CMEMORY_H
+
+#include "unicode/utypes.h"
+
+#include <stddef.h>
+#include <string.h>
+#include "unicode/localpointer.h"
+
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+#include <stdio.h>
+#endif
+
+
+#define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size)
+#define uprv_memmove(dst, src, size) U_STANDARD_CPP_NAMESPACE memmove(dst, src, size)
+
+/**
+ * \def UPRV_LENGTHOF
+ * Convenience macro to determine the length of a fixed array at compile-time.
+ * @param array A fixed length array
+ * @return The length of the array, in elements
+ * @internal
+ */
+#define UPRV_LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
+#define uprv_memset(buffer, mark, size) U_STANDARD_CPP_NAMESPACE memset(buffer, mark, size)
+#define uprv_memcmp(buffer1, buffer2, size) U_STANDARD_CPP_NAMESPACE memcmp(buffer1, buffer2,size)
+#define uprv_memchr(ptr, value, num) U_STANDARD_CPP_NAMESPACE memchr(ptr, value, num)
+
+U_CAPI void * U_EXPORT2
+uprv_malloc(size_t s) U_MALLOC_ATTR U_ALLOC_SIZE_ATTR(1);
+
+U_CAPI void * U_EXPORT2
+uprv_realloc(void *mem, size_t size) U_ALLOC_SIZE_ATTR(2);
+
+U_CAPI void U_EXPORT2
+uprv_free(void *mem);
+
+U_CAPI void * U_EXPORT2
+uprv_calloc(size_t num, size_t size) U_MALLOC_ATTR U_ALLOC_SIZE_ATTR2(1,2);
+
+/**
+ * Get the least significant bits of a pointer (a memory address).
+ * For example, with a mask of 3, the macro gets the 2 least significant bits,
+ * which will be 0 if the pointer is 32-bit (4-byte) aligned.
+ *
+ * uintptr_t is the most appropriate integer type to cast to.
+ */
+#define U_POINTER_MASK_LSB(ptr, mask) ((uintptr_t)(ptr) & (mask))
+
+/**
+ * Create & return an instance of "type" in statically allocated storage.
+ * e.g.
+ * static std::mutex *myMutex = STATIC_NEW(std::mutex);
+ * To destroy an object created in this way, invoke the destructor explicitly, e.g.
+ * myMutex->~mutex();
+ * DO NOT use delete.
+ * DO NOT use with class UMutex, which has specific support for static instances.
+ *
+ * STATIC_NEW is intended for use when
+ * - We want a static (or global) object.
+ * - We don't want it to ever be destructed, or to explicitly control destruction,
+ * to avoid use-after-destruction problems.
+ * - We want to avoid an ordinary heap allocated object,
+ * to avoid the possibility of memory allocation failures, and
+ * to avoid memory leak reports, from valgrind, for example.
+ * This is defined as a macro rather than a template function because each invocation
+ * must define distinct static storage for the object being returned.
+ */
+#define STATIC_NEW(type) [] () { \
+ alignas(type) static char storage[sizeof(type)]; \
+ return new(storage) type();} ()
+
+/**
+ * Heap clean up function, called from u_cleanup()
+ * Clears any user heap functions from u_setMemoryFunctions()
+ * Does NOT deallocate any remaining allocated memory.
+ */
+U_CFUNC UBool
+cmemory_cleanup(void);
+
+/**
+ * A function called by <TT>uhash_remove</TT>,
+ * <TT>uhash_close</TT>, or <TT>uhash_put</TT> to delete
+ * an existing key or value.
+ * @param obj A key or value stored in a hashtable
+ * @see uprv_deleteUObject
+ */
+typedef void U_CALLCONV UObjectDeleter(void* obj);
+
+/**
+ * Deleter for UObject instances.
+ * Works for all subclasses of UObject because it has a virtual destructor.
+ */
+U_CAPI void U_EXPORT2
+uprv_deleteUObject(void *obj);
+
+#ifdef __cplusplus
+
+#include <utility>
+#include "unicode/uobject.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * "Smart pointer" class, deletes memory via uprv_free().
+ * For most methods see the LocalPointerBase base class.
+ * Adds operator[] for array item access.
+ *
+ * @see LocalPointerBase
+ */
+template<typename T>
+class LocalMemory : public LocalPointerBase<T> {
+public:
+ using LocalPointerBase<T>::operator*;
+ using LocalPointerBase<T>::operator->;
+ /**
+ * Constructor takes ownership.
+ * @param p simple pointer to an array of T items that is adopted
+ */
+ explicit LocalMemory(T *p=NULL) : LocalPointerBase<T>(p) {}
+ /**
+ * Move constructor, leaves src with isNull().
+ * @param src source smart pointer
+ */
+ LocalMemory(LocalMemory<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
+ src.ptr=NULL;
+ }
+ /**
+ * Destructor deletes the memory it owns.
+ */
+ ~LocalMemory() {
+ uprv_free(LocalPointerBase<T>::ptr);
+ }
+ /**
+ * Move assignment operator, leaves src with isNull().
+ * The behavior is undefined if *this and src are the same object.
+ * @param src source smart pointer
+ * @return *this
+ */
+ LocalMemory<T> &operator=(LocalMemory<T> &&src) U_NOEXCEPT {
+ uprv_free(LocalPointerBase<T>::ptr);
+ LocalPointerBase<T>::ptr=src.ptr;
+ src.ptr=NULL;
+ return *this;
+ }
+ /**
+ * Swap pointers.
+ * @param other other smart pointer
+ */
+ void swap(LocalMemory<T> &other) U_NOEXCEPT {
+ T *temp=LocalPointerBase<T>::ptr;
+ LocalPointerBase<T>::ptr=other.ptr;
+ other.ptr=temp;
+ }
+ /**
+ * Non-member LocalMemory swap function.
+ * @param p1 will get p2's pointer
+ * @param p2 will get p1's pointer
+ */
+ friend inline void swap(LocalMemory<T> &p1, LocalMemory<T> &p2) U_NOEXCEPT {
+ p1.swap(p2);
+ }
+ /**
+ * Deletes the array it owns,
+ * and adopts (takes ownership of) the one passed in.
+ * @param p simple pointer to an array of T items that is adopted
+ */
+ void adoptInstead(T *p) {
+ uprv_free(LocalPointerBase<T>::ptr);
+ LocalPointerBase<T>::ptr=p;
+ }
+ /**
+ * Deletes the array it owns, allocates a new one and reset its bytes to 0.
+ * Returns the new array pointer.
+ * If the allocation fails, then the current array is unchanged and
+ * this method returns NULL.
+ * @param newCapacity must be >0
+ * @return the allocated array pointer, or NULL if the allocation failed
+ */
+ inline T *allocateInsteadAndReset(int32_t newCapacity=1);
+ /**
+ * Deletes the array it owns and allocates a new one, copying length T items.
+ * Returns the new array pointer.
+ * If the allocation fails, then the current array is unchanged and
+ * this method returns NULL.
+ * @param newCapacity must be >0
+ * @param length number of T items to be copied from the old array to the new one;
+ * must be no more than the capacity of the old array,
+ * which the caller must track because the LocalMemory does not track it
+ * @return the allocated array pointer, or NULL if the allocation failed
+ */
+ inline T *allocateInsteadAndCopy(int32_t newCapacity=1, int32_t length=0);
+ /**
+ * Array item access (writable).
+ * No index bounds check.
+ * @param i array index
+ * @return reference to the array item
+ */
+ T &operator[](ptrdiff_t i) const { return LocalPointerBase<T>::ptr[i]; }
+};
+
+template<typename T>
+inline T *LocalMemory<T>::allocateInsteadAndReset(int32_t newCapacity) {
+ if(newCapacity>0) {
+ T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
+ if(p!=NULL) {
+ uprv_memset(p, 0, newCapacity*sizeof(T));
+ uprv_free(LocalPointerBase<T>::ptr);
+ LocalPointerBase<T>::ptr=p;
+ }
+ return p;
+ } else {
+ return NULL;
+ }
+}
+
+
+template<typename T>
+inline T *LocalMemory<T>::allocateInsteadAndCopy(int32_t newCapacity, int32_t length) {
+ if(newCapacity>0) {
+ T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
+ if(p!=NULL) {
+ if(length>0) {
+ if(length>newCapacity) {
+ length=newCapacity;
+ }
+ uprv_memcpy(p, LocalPointerBase<T>::ptr, (size_t)length*sizeof(T));
+ }
+ uprv_free(LocalPointerBase<T>::ptr);
+ LocalPointerBase<T>::ptr=p;
+ }
+ return p;
+ } else {
+ return NULL;
+ }
+}
+
+/**
+ * Simple array/buffer management class using uprv_malloc() and uprv_free().
+ * Provides an internal array with fixed capacity. Can alias another array
+ * or allocate one.
+ *
+ * The array address is properly aligned for type T. It might not be properly
+ * aligned for types larger than T (or larger than the largest subtype of T).
+ *
+ * Unlike LocalMemory and LocalArray, this class never adopts
+ * (takes ownership of) another array.
+ *
+ * WARNING: MaybeStackArray only works with primitive (plain-old data) types.
+ * It does NOT know how to call a destructor! If you work with classes with
+ * destructors, consider:
+ *
+ * - LocalArray in localpointer.h if you know the length ahead of time
+ * - MaybeStackVector if you know the length at runtime
+ */
+template<typename T, int32_t stackCapacity>
+class MaybeStackArray {
+public:
+ // No heap allocation. Use only on the stack.
+ static void* U_EXPORT2 operator new(size_t) U_NOEXCEPT = delete;
+ static void* U_EXPORT2 operator new[](size_t) U_NOEXCEPT = delete;
+#if U_HAVE_PLACEMENT_NEW
+ static void* U_EXPORT2 operator new(size_t, void*) U_NOEXCEPT = delete;
+#endif
+
+ /**
+ * Default constructor initializes with internal T[stackCapacity] buffer.
+ */
+ MaybeStackArray() : ptr(stackArray), capacity(stackCapacity), needToRelease(false) {}
+ /**
+ * Automatically allocates the heap array if the argument is larger than the stack capacity.
+ * Intended for use when an approximate capacity is known at compile time but the true
+ * capacity is not known until runtime.
+ */
+ MaybeStackArray(int32_t newCapacity, UErrorCode status) : MaybeStackArray() {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ if (capacity < newCapacity) {
+ if (resize(newCapacity) == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ }
+ }
+ /**
+ * Destructor deletes the array (if owned).
+ */
+ ~MaybeStackArray() { releaseArray(); }
+ /**
+ * Move constructor: transfers ownership or copies the stack array.
+ */
+ MaybeStackArray(MaybeStackArray<T, stackCapacity> &&src) U_NOEXCEPT;
+ /**
+ * Move assignment: transfers ownership or copies the stack array.
+ */
+ MaybeStackArray<T, stackCapacity> &operator=(MaybeStackArray<T, stackCapacity> &&src) U_NOEXCEPT;
+ /**
+ * Returns the array capacity (number of T items).
+ * @return array capacity
+ */
+ int32_t getCapacity() const { return capacity; }
+ /**
+ * Access without ownership change.
+ * @return the array pointer
+ */
+ T *getAlias() const { return ptr; }
+ /**
+ * Returns the array limit. Simple convenience method.
+ * @return getAlias()+getCapacity()
+ */
+ T *getArrayLimit() const { return getAlias()+capacity; }
+ // No "operator T *() const" because that can make
+ // expressions like mbs[index] ambiguous for some compilers.
+ /**
+ * Array item access (const).
+ * No index bounds check.
+ * @param i array index
+ * @return reference to the array item
+ */
+ const T &operator[](ptrdiff_t i) const { return ptr[i]; }
+ /**
+ * Array item access (writable).
+ * No index bounds check.
+ * @param i array index
+ * @return reference to the array item
+ */
+ T &operator[](ptrdiff_t i) { return ptr[i]; }
+ /**
+ * Deletes the array (if owned) and aliases another one, no transfer of ownership.
+ * If the arguments are illegal, then the current array is unchanged.
+ * @param otherArray must not be NULL
+ * @param otherCapacity must be >0
+ */
+ void aliasInstead(T *otherArray, int32_t otherCapacity) {
+ if(otherArray!=NULL && otherCapacity>0) {
+ releaseArray();
+ ptr=otherArray;
+ capacity=otherCapacity;
+ needToRelease=false;
+ }
+ }
+ /**
+ * Deletes the array (if owned) and allocates a new one, copying length T items.
+ * Returns the new array pointer.
+ * If the allocation fails, then the current array is unchanged and
+ * this method returns NULL.
+ * @param newCapacity can be less than or greater than the current capacity;
+ * must be >0
+ * @param length number of T items to be copied from the old array to the new one
+ * @return the allocated array pointer, or NULL if the allocation failed
+ */
+ inline T *resize(int32_t newCapacity, int32_t length=0);
+ /**
+ * Gives up ownership of the array if owned, or else clones it,
+ * copying length T items; resets itself to the internal stack array.
+ * Returns NULL if the allocation failed.
+ * @param length number of T items to copy when cloning,
+ * and capacity of the clone when cloning
+ * @param resultCapacity will be set to the returned array's capacity (output-only)
+ * @return the array pointer;
+ * caller becomes responsible for deleting the array
+ */
+ inline T *orphanOrClone(int32_t length, int32_t &resultCapacity);
+
+protected:
+ // Resizes the array to the size of src, then copies the contents of src.
+ void copyFrom(const MaybeStackArray &src, UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ if (this->resize(src.capacity, 0) == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ uprv_memcpy(this->ptr, src.ptr, (size_t)capacity * sizeof(T));
+ }
+
+private:
+ T *ptr;
+ int32_t capacity;
+ UBool needToRelease;
+ T stackArray[stackCapacity];
+ void releaseArray() {
+ if(needToRelease) {
+ uprv_free(ptr);
+ }
+ }
+ void resetToStackArray() {
+ ptr=stackArray;
+ capacity=stackCapacity;
+ needToRelease=false;
+ }
+ /* No comparison operators with other MaybeStackArray's. */
+ bool operator==(const MaybeStackArray & /*other*/) = delete;
+ bool operator!=(const MaybeStackArray & /*other*/) = delete;
+ /* No ownership transfer: No copy constructor, no assignment operator. */
+ MaybeStackArray(const MaybeStackArray & /*other*/) = delete;
+ void operator=(const MaybeStackArray & /*other*/) = delete;
+};
+
+template<typename T, int32_t stackCapacity>
+icu::MaybeStackArray<T, stackCapacity>::MaybeStackArray(
+ MaybeStackArray <T, stackCapacity>&& src) U_NOEXCEPT
+ : ptr(src.ptr), capacity(src.capacity), needToRelease(src.needToRelease) {
+ if (src.ptr == src.stackArray) {
+ ptr = stackArray;
+ uprv_memcpy(stackArray, src.stackArray, sizeof(T) * src.capacity);
+ } else {
+ src.resetToStackArray(); // take ownership away from src
+ }
+}
+
+template<typename T, int32_t stackCapacity>
+inline MaybeStackArray <T, stackCapacity>&
+MaybeStackArray<T, stackCapacity>::operator=(MaybeStackArray <T, stackCapacity>&& src) U_NOEXCEPT {
+ releaseArray(); // in case this instance had its own memory allocated
+ capacity = src.capacity;
+ needToRelease = src.needToRelease;
+ if (src.ptr == src.stackArray) {
+ ptr = stackArray;
+ uprv_memcpy(stackArray, src.stackArray, sizeof(T) * src.capacity);
+ } else {
+ ptr = src.ptr;
+ src.resetToStackArray(); // take ownership away from src
+ }
+ return *this;
+}
+
+template<typename T, int32_t stackCapacity>
+inline T *MaybeStackArray<T, stackCapacity>::resize(int32_t newCapacity, int32_t length) {
+ if(newCapacity>0) {
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+ ::fprintf(::stderr, "MaybeStackArray (resize) alloc %d * %lu\n", newCapacity, sizeof(T));
+#endif
+ T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
+ if(p!=NULL) {
+ if(length>0) {
+ if(length>capacity) {
+ length=capacity;
+ }
+ if(length>newCapacity) {
+ length=newCapacity;
+ }
+ uprv_memcpy(p, ptr, (size_t)length*sizeof(T));
+ }
+ releaseArray();
+ ptr=p;
+ capacity=newCapacity;
+ needToRelease=true;
+ }
+ return p;
+ } else {
+ return NULL;
+ }
+}
+
+template<typename T, int32_t stackCapacity>
+inline T *MaybeStackArray<T, stackCapacity>::orphanOrClone(int32_t length, int32_t &resultCapacity) {
+ T *p;
+ if(needToRelease) {
+ p=ptr;
+ } else if(length<=0) {
+ return NULL;
+ } else {
+ if(length>capacity) {
+ length=capacity;
+ }
+ p=(T *)uprv_malloc(length*sizeof(T));
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+ ::fprintf(::stderr,"MaybeStacArray (orphan) alloc %d * %lu\n", length,sizeof(T));
+#endif
+ if(p==NULL) {
+ return NULL;
+ }
+ uprv_memcpy(p, ptr, (size_t)length*sizeof(T));
+ }
+ resultCapacity=length;
+ resetToStackArray();
+ return p;
+}
+
+/**
+ * Variant of MaybeStackArray that allocates a header struct and an array
+ * in one contiguous memory block, using uprv_malloc() and uprv_free().
+ * Provides internal memory with fixed array capacity. Can alias another memory
+ * block or allocate one.
+ * The stackCapacity is the number of T items in the internal memory,
+ * not counting the H header.
+ * Unlike LocalMemory and LocalArray, this class never adopts
+ * (takes ownership of) another memory block.
+ */
+template<typename H, typename T, int32_t stackCapacity>
+class MaybeStackHeaderAndArray {
+public:
+ // No heap allocation. Use only on the stack.
+ static void* U_EXPORT2 operator new(size_t) U_NOEXCEPT = delete;
+ static void* U_EXPORT2 operator new[](size_t) U_NOEXCEPT = delete;
+#if U_HAVE_PLACEMENT_NEW
+ static void* U_EXPORT2 operator new(size_t, void*) U_NOEXCEPT = delete;
+#endif
+
+ /**
+ * Default constructor initializes with internal H+T[stackCapacity] buffer.
+ */
+ MaybeStackHeaderAndArray() : ptr(&stackHeader), capacity(stackCapacity), needToRelease(false) {}
+ /**
+ * Destructor deletes the memory (if owned).
+ */
+ ~MaybeStackHeaderAndArray() { releaseMemory(); }
+ /**
+ * Returns the array capacity (number of T items).
+ * @return array capacity
+ */
+ int32_t getCapacity() const { return capacity; }
+ /**
+ * Access without ownership change.
+ * @return the header pointer
+ */
+ H *getAlias() const { return ptr; }
+ /**
+ * Returns the array start.
+ * @return array start, same address as getAlias()+1
+ */
+ T *getArrayStart() const { return reinterpret_cast<T *>(getAlias()+1); }
+ /**
+ * Returns the array limit.
+ * @return array limit
+ */
+ T *getArrayLimit() const { return getArrayStart()+capacity; }
+ /**
+ * Access without ownership change. Same as getAlias().
+ * A class instance can be used directly in expressions that take a T *.
+ * @return the header pointer
+ */
+ operator H *() const { return ptr; }
+ /**
+ * Array item access (writable).
+ * No index bounds check.
+ * @param i array index
+ * @return reference to the array item
+ */
+ T &operator[](ptrdiff_t i) { return getArrayStart()[i]; }
+ /**
+ * Deletes the memory block (if owned) and aliases another one, no transfer of ownership.
+ * If the arguments are illegal, then the current memory is unchanged.
+ * @param otherArray must not be NULL
+ * @param otherCapacity must be >0
+ */
+ void aliasInstead(H *otherMemory, int32_t otherCapacity) {
+ if(otherMemory!=NULL && otherCapacity>0) {
+ releaseMemory();
+ ptr=otherMemory;
+ capacity=otherCapacity;
+ needToRelease=false;
+ }
+ }
+ /**
+ * Deletes the memory block (if owned) and allocates a new one,
+ * copying the header and length T array items.
+ * Returns the new header pointer.
+ * If the allocation fails, then the current memory is unchanged and
+ * this method returns NULL.
+ * @param newCapacity can be less than or greater than the current capacity;
+ * must be >0
+ * @param length number of T items to be copied from the old array to the new one
+ * @return the allocated pointer, or NULL if the allocation failed
+ */
+ inline H *resize(int32_t newCapacity, int32_t length=0);
+ /**
+ * Gives up ownership of the memory if owned, or else clones it,
+ * copying the header and length T array items; resets itself to the internal memory.
+ * Returns NULL if the allocation failed.
+ * @param length number of T items to copy when cloning,
+ * and array capacity of the clone when cloning
+ * @param resultCapacity will be set to the returned array's capacity (output-only)
+ * @return the header pointer;
+ * caller becomes responsible for deleting the array
+ */
+ inline H *orphanOrClone(int32_t length, int32_t &resultCapacity);
+private:
+ H *ptr;
+ int32_t capacity;
+ UBool needToRelease;
+ // stackHeader must precede stackArray immediately.
+ H stackHeader;
+ T stackArray[stackCapacity];
+ void releaseMemory() {
+ if(needToRelease) {
+ uprv_free(ptr);
+ }
+ }
+ /* No comparison operators with other MaybeStackHeaderAndArray's. */
+ bool operator==(const MaybeStackHeaderAndArray & /*other*/) {return false;}
+ bool operator!=(const MaybeStackHeaderAndArray & /*other*/) {return true;}
+ /* No ownership transfer: No copy constructor, no assignment operator. */
+ MaybeStackHeaderAndArray(const MaybeStackHeaderAndArray & /*other*/) {}
+ void operator=(const MaybeStackHeaderAndArray & /*other*/) {}
+};
+
+template<typename H, typename T, int32_t stackCapacity>
+inline H *MaybeStackHeaderAndArray<H, T, stackCapacity>::resize(int32_t newCapacity,
+ int32_t length) {
+ if(newCapacity>=0) {
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+ ::fprintf(::stderr,"MaybeStackHeaderAndArray alloc %d + %d * %ul\n", sizeof(H),newCapacity,sizeof(T));
+#endif
+ H *p=(H *)uprv_malloc(sizeof(H)+newCapacity*sizeof(T));
+ if(p!=NULL) {
+ if(length<0) {
+ length=0;
+ } else if(length>0) {
+ if(length>capacity) {
+ length=capacity;
+ }
+ if(length>newCapacity) {
+ length=newCapacity;
+ }
+ }
+ uprv_memcpy(p, ptr, sizeof(H)+(size_t)length*sizeof(T));
+ releaseMemory();
+ ptr=p;
+ capacity=newCapacity;
+ needToRelease=true;
+ }
+ return p;
+ } else {
+ return NULL;
+ }
+}
+
+template<typename H, typename T, int32_t stackCapacity>
+inline H *MaybeStackHeaderAndArray<H, T, stackCapacity>::orphanOrClone(int32_t length,
+ int32_t &resultCapacity) {
+ H *p;
+ if(needToRelease) {
+ p=ptr;
+ } else {
+ if(length<0) {
+ length=0;
+ } else if(length>capacity) {
+ length=capacity;
+ }
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+ ::fprintf(::stderr,"MaybeStackHeaderAndArray (orphan) alloc %ul + %d * %lu\n", sizeof(H),length,sizeof(T));
+#endif
+ p=(H *)uprv_malloc(sizeof(H)+length*sizeof(T));
+ if(p==NULL) {
+ return NULL;
+ }
+ uprv_memcpy(p, ptr, sizeof(H)+(size_t)length*sizeof(T));
+ }
+ resultCapacity=length;
+ ptr=&stackHeader;
+ capacity=stackCapacity;
+ needToRelease=false;
+ return p;
+}
+
+/**
+ * A simple memory management class that creates new heap allocated objects (of
+ * any class that has a public constructor), keeps track of them and eventually
+ * deletes them all in its own destructor.
+ *
+ * A typical use-case would be code like this:
+ *
+ * MemoryPool<MyType> pool;
+ *
+ * MyType* o1 = pool.create();
+ * if (o1 != nullptr) {
+ * foo(o1);
+ * }
+ *
+ * MyType* o2 = pool.create(1, 2, 3);
+ * if (o2 != nullptr) {
+ * bar(o2);
+ * }
+ *
+ * // MemoryPool will take care of deleting the MyType objects.
+ *
+ * It doesn't do anything more than that, and is intentionally kept minimalist.
+ */
+template<typename T, int32_t stackCapacity = 8>
+class MemoryPool : public UMemory {
+public:
+ MemoryPool() : fCount(0), fPool() {}
+
+ ~MemoryPool() {
+ for (int32_t i = 0; i < fCount; ++i) {
+ delete fPool[i];
+ }
+ }
+
+ MemoryPool(const MemoryPool&) = delete;
+ MemoryPool& operator=(const MemoryPool&) = delete;
+
+ MemoryPool(MemoryPool&& other) U_NOEXCEPT : fCount(other.fCount),
+ fPool(std::move(other.fPool)) {
+ other.fCount = 0;
+ }
+
+ MemoryPool& operator=(MemoryPool&& other) U_NOEXCEPT {
+ fCount = other.fCount;
+ fPool = std::move(other.fPool);
+ other.fCount = 0;
+ return *this;
+ }
+
+ /**
+ * Creates a new object of typename T, by forwarding any and all arguments
+ * to the typename T constructor.
+ *
+ * @param args Arguments to be forwarded to the typename T constructor.
+ * @return A pointer to the newly created object, or nullptr on error.
+ */
+ template<typename... Args>
+ T* create(Args&&... args) {
+ int32_t capacity = fPool.getCapacity();
+ if (fCount == capacity &&
+ fPool.resize(capacity == stackCapacity ? 4 * capacity : 2 * capacity,
+ capacity) == nullptr) {
+ return nullptr;
+ }
+ return fPool[fCount++] = new T(std::forward<Args>(args)...);
+ }
+
+ template <typename... Args>
+ T* createAndCheckErrorCode(UErrorCode &status, Args &&... args) {
+ if (U_FAILURE(status)) {
+ return nullptr;
+ }
+ T *pointer = this->create(args...);
+ if (U_SUCCESS(status) && pointer == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ return pointer;
+ }
+
+ /**
+ * @return Number of elements that have been allocated.
+ */
+ int32_t count() const {
+ return fCount;
+ }
+
+protected:
+ int32_t fCount;
+ MaybeStackArray<T*, stackCapacity> fPool;
+};
+
+/**
+ * An internal Vector-like implementation based on MemoryPool.
+ *
+ * Heap-allocates each element and stores pointers.
+ *
+ * To append an item to the vector, use emplaceBack.
+ *
+ * MaybeStackVector<MyType> vector;
+ * MyType* element = vector.emplaceBack();
+ * if (!element) {
+ * status = U_MEMORY_ALLOCATION_ERROR;
+ * }
+ * // do stuff with element
+ *
+ * To loop over the vector, use a for loop with indices:
+ *
+ * for (int32_t i = 0; i < vector.length(); i++) {
+ * MyType* element = vector[i];
+ * }
+ */
+template<typename T, int32_t stackCapacity = 8>
+class MaybeStackVector : protected MemoryPool<T, stackCapacity> {
+public:
+ using MemoryPool<T, stackCapacity>::MemoryPool;
+ using MemoryPool<T, stackCapacity>::operator=;
+
+ template<typename... Args>
+ T* emplaceBack(Args&&... args) {
+ return this->create(args...);
+ }
+
+ template <typename... Args>
+ T *emplaceBackAndCheckErrorCode(UErrorCode &status, Args &&... args) {
+ return this->createAndCheckErrorCode(status, args...);
+ }
+
+ int32_t length() const {
+ return this->fCount;
+ }
+
+ T** getAlias() {
+ return this->fPool.getAlias();
+ }
+
+ const T *const *getAlias() const {
+ return this->fPool.getAlias();
+ }
+
+ /**
+ * Array item access (read-only).
+ * No index bounds check.
+ * @param i array index
+ * @return reference to the array item
+ */
+ const T* operator[](ptrdiff_t i) const {
+ return this->fPool[i];
+ }
+
+ /**
+ * Array item access (writable).
+ * No index bounds check.
+ * @param i array index
+ * @return reference to the array item
+ */
+ T* operator[](ptrdiff_t i) {
+ return this->fPool[i];
+ }
+};
+
+
+U_NAMESPACE_END
+
+#endif /* __cplusplus */
+#endif /* CMEMORY_H */
diff --git a/thirdparty/icu4c/common/cpputils.h b/thirdparty/icu4c/common/cpputils.h
new file mode 100644
index 0000000000..307e570486
--- /dev/null
+++ b/thirdparty/icu4c/common/cpputils.h
@@ -0,0 +1,97 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: cpputils.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*/
+
+#ifndef CPPUTILS_H
+#define CPPUTILS_H
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+#include "cmemory.h"
+
+/*==========================================================================*/
+/* Array copy utility functions */
+/*==========================================================================*/
+
+static
+inline void uprv_arrayCopy(const double* src, double* dst, int32_t count)
+{ uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); }
+
+static
+inline void uprv_arrayCopy(const double* src, int32_t srcStart,
+ double* dst, int32_t dstStart, int32_t count)
+{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); }
+
+static
+inline void uprv_arrayCopy(const int8_t* src, int8_t* dst, int32_t count)
+ { uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); }
+
+static
+inline void uprv_arrayCopy(const int8_t* src, int32_t srcStart,
+ int8_t* dst, int32_t dstStart, int32_t count)
+{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); }
+
+static
+inline void uprv_arrayCopy(const int16_t* src, int16_t* dst, int32_t count)
+{ uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); }
+
+static
+inline void uprv_arrayCopy(const int16_t* src, int32_t srcStart,
+ int16_t* dst, int32_t dstStart, int32_t count)
+{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); }
+
+static
+inline void uprv_arrayCopy(const int32_t* src, int32_t* dst, int32_t count)
+{ uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); }
+
+static
+inline void uprv_arrayCopy(const int32_t* src, int32_t srcStart,
+ int32_t* dst, int32_t dstStart, int32_t count)
+{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); }
+
+static
+inline void
+uprv_arrayCopy(const UChar *src, int32_t srcStart,
+ UChar *dst, int32_t dstStart, int32_t count)
+{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); }
+
+/**
+ * Copy an array of UnicodeString OBJECTS (not pointers).
+ * @internal
+ */
+static inline void
+uprv_arrayCopy(const icu::UnicodeString *src, icu::UnicodeString *dst, int32_t count)
+{ while(count-- > 0) *dst++ = *src++; }
+
+/**
+ * Copy an array of UnicodeString OBJECTS (not pointers).
+ * @internal
+ */
+static inline void
+uprv_arrayCopy(const icu::UnicodeString *src, int32_t srcStart,
+ icu::UnicodeString *dst, int32_t dstStart, int32_t count)
+{ uprv_arrayCopy(src+srcStart, dst+dstStart, count); }
+
+/**
+ * Checks that the string is readable and writable.
+ * Sets U_ILLEGAL_ARGUMENT_ERROR if the string isBogus() or has an open getBuffer().
+ */
+inline void
+uprv_checkCanGetBuffer(const icu::UnicodeString &s, UErrorCode &errorCode) {
+ if(U_SUCCESS(errorCode) && s.isBogus()) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ }
+}
+
+#endif /* _CPPUTILS */
diff --git a/thirdparty/icu4c/common/cstr.cpp b/thirdparty/icu4c/common/cstr.cpp
new file mode 100644
index 0000000000..24654f8fc2
--- /dev/null
+++ b/thirdparty/icu4c/common/cstr.cpp
@@ -0,0 +1,54 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2015-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: charstr.cpp
+*/
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "unicode/unistr.h"
+
+#include "cstr.h"
+
+#include "charstr.h"
+#include "uinvchar.h"
+
+U_NAMESPACE_BEGIN
+
+CStr::CStr(const UnicodeString &in) {
+ UErrorCode status = U_ZERO_ERROR;
+#if !UCONFIG_NO_CONVERSION || U_CHARSET_IS_UTF8
+ int32_t length = in.extract(0, in.length(), static_cast<char *>(NULL), static_cast<uint32_t>(0));
+ int32_t resultCapacity = 0;
+ char *buf = s.getAppendBuffer(length, length, resultCapacity, status);
+ if (U_SUCCESS(status)) {
+ in.extract(0, in.length(), buf, resultCapacity);
+ s.append(buf, length, status);
+ }
+#else
+ // No conversion available. Convert any invariant characters; substitute '?' for the rest.
+ // Note: can't just call u_UCharsToChars() or CharString.appendInvariantChars() on the
+ // whole string because they require that the entire input be invariant.
+ char buf[2];
+ for (int i=0; i<in.length(); i = in.moveIndex32(i, 1)) {
+ if (uprv_isInvariantUString(in.getBuffer()+i, 1)) {
+ u_UCharsToChars(in.getBuffer()+i, buf, 1);
+ } else {
+ buf[0] = '?';
+ }
+ s.append(buf, 1, status);
+ }
+#endif
+}
+
+CStr::~CStr() {
+}
+
+const char * CStr::operator ()() const {
+ return s.data();
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/cstr.h b/thirdparty/icu4c/common/cstr.h
new file mode 100644
index 0000000000..c33f487ea1
--- /dev/null
+++ b/thirdparty/icu4c/common/cstr.h
@@ -0,0 +1,60 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* File: cstr.h
+*/
+
+#ifndef CSTR_H
+#define CSTR_H
+
+#include "unicode/unistr.h"
+#include "unicode/uobject.h"
+#include "unicode/utypes.h"
+
+#include "charstr.h"
+
+/**
+ * ICU-internal class CStr, a small helper class to facilitate passing UnicodeStrings
+ * to functions needing (const char *) strings, such as printf().
+ *
+ * It is intended primarily for use in debugging or in tests. Uses platform
+ * default code page conversion, which will do the best job possible,
+ * but may be lossy, depending on the platform.
+ *
+ * If no other conversion is available, use invariant conversion and substitue
+ * '?' for non-invariant characters.
+ *
+ * Example Usage:
+ * UnicodeString s = whatever;
+ * printf("%s", CStr(s)());
+ *
+ * The explicit call to the CStr() constructor creates a temporary object.
+ * Operator () on the temporary object returns a (const char *) pointer.
+ * The lifetime of the (const char *) data is that of the temporary object,
+ * which works well when passing it as a parameter to another function, such as printf.
+ */
+
+U_NAMESPACE_BEGIN
+
+class U_COMMON_API CStr : public UMemory {
+ public:
+ CStr(const UnicodeString &in);
+ ~CStr();
+ const char * operator ()() const;
+
+ private:
+ CharString s;
+ CStr(const CStr &other); // Forbid copying of this class.
+ CStr &operator =(const CStr &other); // Forbid assignment.
+};
+
+U_NAMESPACE_END
+
+#endif
diff --git a/thirdparty/icu4c/common/cstring.cpp b/thirdparty/icu4c/common/cstring.cpp
new file mode 100644
index 0000000000..06275c4b56
--- /dev/null
+++ b/thirdparty/icu4c/common/cstring.cpp
@@ -0,0 +1,341 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* File CSTRING.C
+*
+* @author Helena Shih
+*
+* Modification History:
+*
+* Date Name Description
+* 6/18/98 hshih Created
+* 09/08/98 stephen Added include for ctype, for Mac Port
+* 11/15/99 helena Integrated S/390 IEEE changes.
+******************************************************************************
+*/
+
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "unicode/utypes.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "uassert.h"
+
+/*
+ * We hardcode case conversion for invariant characters to match our expectation
+ * and the compiler execution charset.
+ * This prevents problems on systems
+ * - with non-default casing behavior, like Turkish system locales where
+ * tolower('I') maps to dotless i and toupper('i') maps to dotted I
+ * - where there are no lowercase Latin characters at all, or using different
+ * codes (some old EBCDIC codepages)
+ *
+ * This works because the compiler usually runs on a platform where the execution
+ * charset includes all of the invariant characters at their expected
+ * code positions, so that the char * string literals in ICU code match
+ * the char literals here.
+ *
+ * Note that the set of lowercase Latin letters is discontiguous in EBCDIC
+ * and the set of uppercase Latin letters is discontiguous as well.
+ */
+
+U_CAPI UBool U_EXPORT2
+uprv_isASCIILetter(char c) {
+#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+ return
+ ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') ||
+ ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z');
+#else
+ return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
+#endif
+}
+
+U_CAPI char U_EXPORT2
+uprv_toupper(char c) {
+#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+ if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
+ c=(char)(c+('A'-'a'));
+ }
+#else
+ if('a'<=c && c<='z') {
+ c=(char)(c+('A'-'a'));
+ }
+#endif
+ return c;
+}
+
+
+#if 0
+/*
+ * Commented out because cstring.h defines uprv_tolower() to be
+ * the same as either uprv_asciitolower() or uprv_ebcdictolower()
+ * to reduce the amount of code to cover with tests.
+ *
+ * Note that this uprv_tolower() definition is likely to work for most
+ * charset families, not just ASCII and EBCDIC, because its #else branch
+ * is written generically.
+ */
+U_CAPI char U_EXPORT2
+uprv_tolower(char c) {
+#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+ if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
+ c=(char)(c+('a'-'A'));
+ }
+#else
+ if('A'<=c && c<='Z') {
+ c=(char)(c+('a'-'A'));
+ }
+#endif
+ return c;
+}
+#endif
+
+U_CAPI char U_EXPORT2
+uprv_asciitolower(char c) {
+ if(0x41<=c && c<=0x5a) {
+ c=(char)(c+0x20);
+ }
+ return c;
+}
+
+U_CAPI char U_EXPORT2
+uprv_ebcdictolower(char c) {
+ if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
+ (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
+ (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
+ ) {
+ c=(char)(c-0x40);
+ }
+ return c;
+}
+
+
+U_CAPI char* U_EXPORT2
+T_CString_toLowerCase(char* str)
+{
+ char* origPtr = str;
+
+ if (str) {
+ do
+ *str = (char)uprv_tolower(*str);
+ while (*(str++));
+ }
+
+ return origPtr;
+}
+
+U_CAPI char* U_EXPORT2
+T_CString_toUpperCase(char* str)
+{
+ char* origPtr = str;
+
+ if (str) {
+ do
+ *str = (char)uprv_toupper(*str);
+ while (*(str++));
+ }
+
+ return origPtr;
+}
+
+/*
+ * Takes a int32_t and fills in a char* string with that number "radix"-based.
+ * Does not handle negative values (makes an empty string for them).
+ * Writes at most 12 chars ("-2147483647" plus NUL).
+ * Returns the length of the string (not including the NUL).
+ */
+U_CAPI int32_t U_EXPORT2
+T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
+{
+ char tbuf[30];
+ int32_t tbx = sizeof(tbuf);
+ uint8_t digit;
+ int32_t length = 0;
+ uint32_t uval;
+
+ U_ASSERT(radix>=2 && radix<=16);
+ uval = (uint32_t) v;
+ if(v<0 && radix == 10) {
+ /* Only in base 10 do we conside numbers to be signed. */
+ uval = (uint32_t)(-v);
+ buffer[length++] = '-';
+ }
+
+ tbx = sizeof(tbuf)-1;
+ tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
+ do {
+ digit = (uint8_t)(uval % radix);
+ tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
+ uval = uval / radix;
+ } while (uval != 0);
+
+ /* copy converted number into user buffer */
+ uprv_strcpy(buffer+length, tbuf+tbx);
+ length += sizeof(tbuf) - tbx -1;
+ return length;
+}
+
+
+
+/*
+ * Takes a int64_t and fills in a char* string with that number "radix"-based.
+ * Writes at most 21: chars ("-9223372036854775807" plus NUL).
+ * Returns the length of the string, not including the terminating NULL.
+ */
+U_CAPI int32_t U_EXPORT2
+T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
+{
+ char tbuf[30];
+ int32_t tbx = sizeof(tbuf);
+ uint8_t digit;
+ int32_t length = 0;
+ uint64_t uval;
+
+ U_ASSERT(radix>=2 && radix<=16);
+ uval = (uint64_t) v;
+ if(v<0 && radix == 10) {
+ /* Only in base 10 do we conside numbers to be signed. */
+ uval = (uint64_t)(-v);
+ buffer[length++] = '-';
+ }
+
+ tbx = sizeof(tbuf)-1;
+ tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
+ do {
+ digit = (uint8_t)(uval % radix);
+ tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
+ uval = uval / radix;
+ } while (uval != 0);
+
+ /* copy converted number into user buffer */
+ uprv_strcpy(buffer+length, tbuf+tbx);
+ length += sizeof(tbuf) - tbx -1;
+ return length;
+}
+
+
+U_CAPI int32_t U_EXPORT2
+T_CString_stringToInteger(const char *integerString, int32_t radix)
+{
+ char *end;
+ return uprv_strtoul(integerString, &end, radix);
+
+}
+
+U_CAPI int U_EXPORT2
+uprv_stricmp(const char *str1, const char *str2) {
+ if(str1==NULL) {
+ if(str2==NULL) {
+ return 0;
+ } else {
+ return -1;
+ }
+ } else if(str2==NULL) {
+ return 1;
+ } else {
+ /* compare non-NULL strings lexically with lowercase */
+ int rc;
+ unsigned char c1, c2;
+
+ for(;;) {
+ c1=(unsigned char)*str1;
+ c2=(unsigned char)*str2;
+ if(c1==0) {
+ if(c2==0) {
+ return 0;
+ } else {
+ return -1;
+ }
+ } else if(c2==0) {
+ return 1;
+ } else {
+ /* compare non-zero characters with lowercase */
+ rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
+ if(rc!=0) {
+ return rc;
+ }
+ }
+ ++str1;
+ ++str2;
+ }
+ }
+}
+
+U_CAPI int U_EXPORT2
+uprv_strnicmp(const char *str1, const char *str2, uint32_t n) {
+ if(str1==NULL) {
+ if(str2==NULL) {
+ return 0;
+ } else {
+ return -1;
+ }
+ } else if(str2==NULL) {
+ return 1;
+ } else {
+ /* compare non-NULL strings lexically with lowercase */
+ int rc;
+ unsigned char c1, c2;
+
+ for(; n--;) {
+ c1=(unsigned char)*str1;
+ c2=(unsigned char)*str2;
+ if(c1==0) {
+ if(c2==0) {
+ return 0;
+ } else {
+ return -1;
+ }
+ } else if(c2==0) {
+ return 1;
+ } else {
+ /* compare non-zero characters with lowercase */
+ rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
+ if(rc!=0) {
+ return rc;
+ }
+ }
+ ++str1;
+ ++str2;
+ }
+ }
+
+ return 0;
+}
+
+U_CAPI char* U_EXPORT2
+uprv_strdup(const char *src) {
+ size_t len = uprv_strlen(src) + 1;
+ char *dup = (char *) uprv_malloc(len);
+
+ if (dup) {
+ uprv_memcpy(dup, src, len);
+ }
+
+ return dup;
+}
+
+U_CAPI char* U_EXPORT2
+uprv_strndup(const char *src, int32_t n) {
+ char *dup;
+
+ if(n < 0) {
+ dup = uprv_strdup(src);
+ } else {
+ dup = (char*)uprv_malloc(n+1);
+ if (dup) {
+ uprv_memcpy(dup, src, n);
+ dup[n] = 0;
+ }
+ }
+
+ return dup;
+}
diff --git a/thirdparty/icu4c/common/cstring.h b/thirdparty/icu4c/common/cstring.h
new file mode 100644
index 0000000000..3a14e4216c
--- /dev/null
+++ b/thirdparty/icu4c/common/cstring.h
@@ -0,0 +1,126 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* File CSTRING.H
+*
+* Contains CString interface
+*
+* @author Helena Shih
+*
+* Modification History:
+*
+* Date Name Description
+* 6/17/98 hshih Created.
+* 05/03/99 stephen Changed from functions to macros.
+* 06/14/99 stephen Added icu_strncat, icu_strncmp, icu_tolower
+*
+******************************************************************************
+*/
+
+#ifndef CSTRING_H
+#define CSTRING_H 1
+
+#include "unicode/utypes.h"
+#include "cmemory.h"
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#define uprv_strcpy(dst, src) U_STANDARD_CPP_NAMESPACE strcpy(dst, src)
+#define uprv_strlen(str) U_STANDARD_CPP_NAMESPACE strlen(str)
+#define uprv_strcmp(s1, s2) U_STANDARD_CPP_NAMESPACE strcmp(s1, s2)
+#define uprv_strcat(dst, src) U_STANDARD_CPP_NAMESPACE strcat(dst, src)
+#define uprv_strchr(s, c) U_STANDARD_CPP_NAMESPACE strchr(s, c)
+#define uprv_strstr(s, c) U_STANDARD_CPP_NAMESPACE strstr(s, c)
+#define uprv_strrchr(s, c) U_STANDARD_CPP_NAMESPACE strrchr(s, c)
+#define uprv_strncpy(dst, src, size) U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size)
+#define uprv_strncmp(s1, s2, n) U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n)
+#define uprv_strncat(dst, src, n) U_STANDARD_CPP_NAMESPACE strncat(dst, src, n)
+
+/**
+ * Is c an ASCII-repertoire letter a-z or A-Z?
+ * Note: The implementation is specific to whether ICU is compiled for
+ * an ASCII-based or EBCDIC-based machine. There just does not seem to be a better name for this.
+ */
+U_CAPI UBool U_EXPORT2
+uprv_isASCIILetter(char c);
+
+// NOTE: For u_asciiToUpper that takes a UChar, see ustr_imp.h
+
+U_CAPI char U_EXPORT2
+uprv_toupper(char c);
+
+
+U_CAPI char U_EXPORT2
+uprv_asciitolower(char c);
+
+U_CAPI char U_EXPORT2
+uprv_ebcdictolower(char c);
+
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+# define uprv_tolower uprv_asciitolower
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+# define uprv_tolower uprv_ebcdictolower
+#else
+# error U_CHARSET_FAMILY is not valid
+#endif
+
+#define uprv_strtod(source, end) U_STANDARD_CPP_NAMESPACE strtod(source, end)
+#define uprv_strtoul(str, end, base) U_STANDARD_CPP_NAMESPACE strtoul(str, end, base)
+#define uprv_strtol(str, end, base) U_STANDARD_CPP_NAMESPACE strtol(str, end, base)
+
+/* Conversion from a digit to the character with radix base from 2-19 */
+/* May need to use U_UPPER_ORDINAL*/
+#define T_CString_itosOffset(a) ((a)<=9?('0'+(a)):('A'+(a)-10))
+
+U_CAPI char* U_EXPORT2
+uprv_strdup(const char *src);
+
+/**
+ * uprv_malloc n+1 bytes, and copy n bytes from src into the new string.
+ * Terminate with a null at offset n. If n is -1, works like uprv_strdup
+ * @param src
+ * @param n length of the input string, not including null.
+ * @return new string (owned by caller, use uprv_free to free).
+ * @internal
+ */
+U_CAPI char* U_EXPORT2
+uprv_strndup(const char *src, int32_t n);
+
+U_CAPI char* U_EXPORT2
+T_CString_toLowerCase(char* str);
+
+U_CAPI char* U_EXPORT2
+T_CString_toUpperCase(char* str);
+
+U_CAPI int32_t U_EXPORT2
+T_CString_integerToString(char *buffer, int32_t n, int32_t radix);
+
+U_CAPI int32_t U_EXPORT2
+T_CString_int64ToString(char *buffer, int64_t n, uint32_t radix);
+
+U_CAPI int32_t U_EXPORT2
+T_CString_stringToInteger(const char *integerString, int32_t radix);
+
+/**
+ * Case-insensitive, language-independent string comparison
+ * limited to the ASCII character repertoire.
+ */
+U_CAPI int U_EXPORT2
+uprv_stricmp(const char *str1, const char *str2);
+
+/**
+ * Case-insensitive, language-independent string comparison
+ * limited to the ASCII character repertoire.
+ */
+U_CAPI int U_EXPORT2
+uprv_strnicmp(const char *str1, const char *str2, uint32_t n);
+
+#endif /* ! CSTRING_H */
diff --git a/thirdparty/icu4c/common/cwchar.cpp b/thirdparty/icu4c/common/cwchar.cpp
new file mode 100644
index 0000000000..20c7d71e0f
--- /dev/null
+++ b/thirdparty/icu4c/common/cwchar.cpp
@@ -0,0 +1,55 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2001, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: cwchar.c
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2001may25
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+
+#if !U_HAVE_WCSCPY
+
+#include "cwchar.h"
+
+U_CAPI wchar_t *uprv_wcscat(wchar_t *dst, const wchar_t *src) {
+ wchar_t *start=dst;
+ while(*dst!=0) {
+ ++dst;
+ }
+ while((*dst=*src)!=0) {
+ ++dst;
+ ++src;
+ }
+ return start;
+}
+
+U_CAPI wchar_t *uprv_wcscpy(wchar_t *dst, const wchar_t *src) {
+ wchar_t *start=dst;
+ while((*dst=*src)!=0) {
+ ++dst;
+ ++src;
+ }
+ return start;
+}
+
+U_CAPI size_t uprv_wcslen(const wchar_t *src) {
+ const wchar_t *start=src;
+ while(*src!=0) {
+ ++src;
+ }
+ return src-start;
+}
+
+#endif
+
diff --git a/thirdparty/icu4c/common/cwchar.h b/thirdparty/icu4c/common/cwchar.h
new file mode 100644
index 0000000000..8fd041a1b9
--- /dev/null
+++ b/thirdparty/icu4c/common/cwchar.h
@@ -0,0 +1,58 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2001, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: cwchar.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2001may25
+* created by: Markus W. Scherer
+*
+* This file contains ICU-internal definitions of wchar_t operations.
+* These definitions were moved here from cstring.h so that fewer
+* ICU implementation files include wchar.h.
+*/
+
+#ifndef __CWCHAR_H__
+#define __CWCHAR_H__
+
+#include <string.h>
+#include <stdlib.h>
+#include "unicode/utypes.h"
+
+/* Do this after utypes.h so that we have U_HAVE_WCHAR_H . */
+#if U_HAVE_WCHAR_H
+# include <wchar.h>
+#endif
+
+/*===========================================================================*/
+/* Wide-character functions */
+/*===========================================================================*/
+
+/* The following are not available on all systems, defined in wchar.h or string.h. */
+#if U_HAVE_WCSCPY
+# define uprv_wcscpy wcscpy
+# define uprv_wcscat wcscat
+# define uprv_wcslen wcslen
+#else
+U_CAPI wchar_t* U_EXPORT2
+uprv_wcscpy(wchar_t *dst, const wchar_t *src);
+U_CAPI wchar_t* U_EXPORT2
+uprv_wcscat(wchar_t *dst, const wchar_t *src);
+U_CAPI size_t U_EXPORT2
+uprv_wcslen(const wchar_t *src);
+#endif
+
+/* The following are part of the ANSI C standard, defined in stdlib.h . */
+#define uprv_wcstombs(mbstr, wcstr, count) U_STANDARD_CPP_NAMESPACE wcstombs(mbstr, wcstr, count)
+#define uprv_mbstowcs(wcstr, mbstr, count) U_STANDARD_CPP_NAMESPACE mbstowcs(wcstr, mbstr, count)
+
+
+#endif
diff --git a/thirdparty/icu4c/common/dictbe.cpp b/thirdparty/icu4c/common/dictbe.cpp
new file mode 100644
index 0000000000..b42cdf03fa
--- /dev/null
+++ b/thirdparty/icu4c/common/dictbe.cpp
@@ -0,0 +1,1410 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/**
+ *******************************************************************************
+ * Copyright (C) 2006-2016, International Business Machines Corporation
+ * and others. All Rights Reserved.
+ *******************************************************************************
+ */
+
+#include <utility>
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "brkeng.h"
+#include "dictbe.h"
+#include "unicode/uniset.h"
+#include "unicode/chariter.h"
+#include "unicode/ubrk.h"
+#include "utracimp.h"
+#include "uvectr32.h"
+#include "uvector.h"
+#include "uassert.h"
+#include "unicode/normlzr.h"
+#include "cmemory.h"
+#include "dictionarydata.h"
+
+U_NAMESPACE_BEGIN
+
+/*
+ ******************************************************************
+ */
+
+DictionaryBreakEngine::DictionaryBreakEngine() {
+}
+
+DictionaryBreakEngine::~DictionaryBreakEngine() {
+}
+
+UBool
+DictionaryBreakEngine::handles(UChar32 c) const {
+ return fSet.contains(c);
+}
+
+int32_t
+DictionaryBreakEngine::findBreaks( UText *text,
+ int32_t startPos,
+ int32_t endPos,
+ UVector32 &foundBreaks ) const {
+ (void)startPos; // TODO: remove this param?
+ int32_t result = 0;
+
+ // Find the span of characters included in the set.
+ // The span to break begins at the current position in the text, and
+ // extends towards the start or end of the text, depending on 'reverse'.
+
+ int32_t start = (int32_t)utext_getNativeIndex(text);
+ int32_t current;
+ int32_t rangeStart;
+ int32_t rangeEnd;
+ UChar32 c = utext_current32(text);
+ while((current = (int32_t)utext_getNativeIndex(text)) < endPos && fSet.contains(c)) {
+ utext_next32(text); // TODO: recast loop for postincrement
+ c = utext_current32(text);
+ }
+ rangeStart = start;
+ rangeEnd = current;
+ result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
+ utext_setNativeIndex(text, current);
+
+ return result;
+}
+
+void
+DictionaryBreakEngine::setCharacters( const UnicodeSet &set ) {
+ fSet = set;
+ // Compact for caching
+ fSet.compact();
+}
+
+/*
+ ******************************************************************
+ * PossibleWord
+ */
+
+// Helper class for improving readability of the Thai/Lao/Khmer word break
+// algorithm. The implementation is completely inline.
+
+// List size, limited by the maximum number of words in the dictionary
+// that form a nested sequence.
+static const int32_t POSSIBLE_WORD_LIST_MAX = 20;
+
+class PossibleWord {
+private:
+ // list of word candidate lengths, in increasing length order
+ // TODO: bytes would be sufficient for word lengths.
+ int32_t count; // Count of candidates
+ int32_t prefix; // The longest match with a dictionary word
+ int32_t offset; // Offset in the text of these candidates
+ int32_t mark; // The preferred candidate's offset
+ int32_t current; // The candidate we're currently looking at
+ int32_t cuLengths[POSSIBLE_WORD_LIST_MAX]; // Word Lengths, in code units.
+ int32_t cpLengths[POSSIBLE_WORD_LIST_MAX]; // Word Lengths, in code points.
+
+public:
+ PossibleWord() : count(0), prefix(0), offset(-1), mark(0), current(0) {}
+ ~PossibleWord() {}
+
+ // Fill the list of candidates if needed, select the longest, and return the number found
+ int32_t candidates( UText *text, DictionaryMatcher *dict, int32_t rangeEnd );
+
+ // Select the currently marked candidate, point after it in the text, and invalidate self
+ int32_t acceptMarked( UText *text );
+
+ // Back up from the current candidate to the next shorter one; return TRUE if that exists
+ // and point the text after it
+ UBool backUp( UText *text );
+
+ // Return the longest prefix this candidate location shares with a dictionary word
+ // Return value is in code points.
+ int32_t longestPrefix() { return prefix; }
+
+ // Mark the current candidate as the one we like
+ void markCurrent() { mark = current; }
+
+ // Get length in code points of the marked word.
+ int32_t markedCPLength() { return cpLengths[mark]; }
+};
+
+
+int32_t PossibleWord::candidates( UText *text, DictionaryMatcher *dict, int32_t rangeEnd ) {
+ // TODO: If getIndex is too slow, use offset < 0 and add discardAll()
+ int32_t start = (int32_t)utext_getNativeIndex(text);
+ if (start != offset) {
+ offset = start;
+ count = dict->matches(text, rangeEnd-start, UPRV_LENGTHOF(cuLengths), cuLengths, cpLengths, NULL, &prefix);
+ // Dictionary leaves text after longest prefix, not longest word. Back up.
+ if (count <= 0) {
+ utext_setNativeIndex(text, start);
+ }
+ }
+ if (count > 0) {
+ utext_setNativeIndex(text, start+cuLengths[count-1]);
+ }
+ current = count-1;
+ mark = current;
+ return count;
+}
+
+int32_t
+PossibleWord::acceptMarked( UText *text ) {
+ utext_setNativeIndex(text, offset + cuLengths[mark]);
+ return cuLengths[mark];
+}
+
+
+UBool
+PossibleWord::backUp( UText *text ) {
+ if (current > 0) {
+ utext_setNativeIndex(text, offset + cuLengths[--current]);
+ return TRUE;
+ }
+ return FALSE;
+}
+
+/*
+ ******************************************************************
+ * ThaiBreakEngine
+ */
+
+// How many words in a row are "good enough"?
+static const int32_t THAI_LOOKAHEAD = 3;
+
+// Will not combine a non-word with a preceding dictionary word longer than this
+static const int32_t THAI_ROOT_COMBINE_THRESHOLD = 3;
+
+// Will not combine a non-word that shares at least this much prefix with a
+// dictionary word, with a preceding word
+static const int32_t THAI_PREFIX_COMBINE_THRESHOLD = 3;
+
+// Ellision character
+static const int32_t THAI_PAIYANNOI = 0x0E2F;
+
+// Repeat character
+static const int32_t THAI_MAIYAMOK = 0x0E46;
+
+// Minimum word size
+static const int32_t THAI_MIN_WORD = 2;
+
+// Minimum number of characters for two words
+static const int32_t THAI_MIN_WORD_SPAN = THAI_MIN_WORD * 2;
+
+ThaiBreakEngine::ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
+ : DictionaryBreakEngine(),
+ fDictionary(adoptDictionary)
+{
+ UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
+ UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Thai");
+ fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status);
+ if (U_SUCCESS(status)) {
+ setCharacters(fThaiWordSet);
+ }
+ fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]&[:M:]]"), status);
+ fMarkSet.add(0x0020);
+ fEndWordSet = fThaiWordSet;
+ fEndWordSet.remove(0x0E31); // MAI HAN-AKAT
+ fEndWordSet.remove(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI
+ fBeginWordSet.add(0x0E01, 0x0E2E); // KO KAI through HO NOKHUK
+ fBeginWordSet.add(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI
+ fSuffixSet.add(THAI_PAIYANNOI);
+ fSuffixSet.add(THAI_MAIYAMOK);
+
+ // Compact for caching.
+ fMarkSet.compact();
+ fEndWordSet.compact();
+ fBeginWordSet.compact();
+ fSuffixSet.compact();
+ UTRACE_EXIT_STATUS(status);
+}
+
+ThaiBreakEngine::~ThaiBreakEngine() {
+ delete fDictionary;
+}
+
+int32_t
+ThaiBreakEngine::divideUpDictionaryRange( UText *text,
+ int32_t rangeStart,
+ int32_t rangeEnd,
+ UVector32 &foundBreaks ) const {
+ utext_setNativeIndex(text, rangeStart);
+ utext_moveIndex32(text, THAI_MIN_WORD_SPAN);
+ if (utext_getNativeIndex(text) >= rangeEnd) {
+ return 0; // Not enough characters for two words
+ }
+ utext_setNativeIndex(text, rangeStart);
+
+
+ uint32_t wordsFound = 0;
+ int32_t cpWordLength = 0; // Word Length in Code Points.
+ int32_t cuWordLength = 0; // Word length in code units (UText native indexing)
+ int32_t current;
+ UErrorCode status = U_ZERO_ERROR;
+ PossibleWord words[THAI_LOOKAHEAD];
+
+ utext_setNativeIndex(text, rangeStart);
+
+ while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
+ cpWordLength = 0;
+ cuWordLength = 0;
+
+ // Look for candidate words at the current position
+ int32_t candidates = words[wordsFound%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
+
+ // If we found exactly one, use that
+ if (candidates == 1) {
+ cuWordLength = words[wordsFound % THAI_LOOKAHEAD].acceptMarked(text);
+ cpWordLength = words[wordsFound % THAI_LOOKAHEAD].markedCPLength();
+ wordsFound += 1;
+ }
+ // If there was more than one, see which one can take us forward the most words
+ else if (candidates > 1) {
+ // If we're already at the end of the range, we're done
+ if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
+ goto foundBest;
+ }
+ do {
+ int32_t wordsMatched = 1;
+ if (words[(wordsFound + 1) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
+ if (wordsMatched < 2) {
+ // Followed by another dictionary word; mark first word as a good candidate
+ words[wordsFound%THAI_LOOKAHEAD].markCurrent();
+ wordsMatched = 2;
+ }
+
+ // If we're already at the end of the range, we're done
+ if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
+ goto foundBest;
+ }
+
+ // See if any of the possible second words is followed by a third word
+ do {
+ // If we find a third word, stop right away
+ if (words[(wordsFound + 2) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) {
+ words[wordsFound % THAI_LOOKAHEAD].markCurrent();
+ goto foundBest;
+ }
+ }
+ while (words[(wordsFound + 1) % THAI_LOOKAHEAD].backUp(text));
+ }
+ }
+ while (words[wordsFound % THAI_LOOKAHEAD].backUp(text));
+foundBest:
+ // Set UText position to after the accepted word.
+ cuWordLength = words[wordsFound % THAI_LOOKAHEAD].acceptMarked(text);
+ cpWordLength = words[wordsFound % THAI_LOOKAHEAD].markedCPLength();
+ wordsFound += 1;
+ }
+
+ // We come here after having either found a word or not. We look ahead to the
+ // next word. If it's not a dictionary word, we will combine it with the word we
+ // just found (if there is one), but only if the preceding word does not exceed
+ // the threshold.
+ // The text iterator should now be positioned at the end of the word we found.
+
+ UChar32 uc = 0;
+ if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < THAI_ROOT_COMBINE_THRESHOLD) {
+ // if it is a dictionary word, do nothing. If it isn't, then if there is
+ // no preceding word, or the non-word shares less than the minimum threshold
+ // of characters with a dictionary word, then scan to resynchronize
+ if (words[wordsFound % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
+ && (cuWordLength == 0
+ || words[wordsFound%THAI_LOOKAHEAD].longestPrefix() < THAI_PREFIX_COMBINE_THRESHOLD)) {
+ // Look for a plausible word boundary
+ int32_t remaining = rangeEnd - (current+cuWordLength);
+ UChar32 pc;
+ int32_t chars = 0;
+ for (;;) {
+ int32_t pcIndex = (int32_t)utext_getNativeIndex(text);
+ pc = utext_next32(text);
+ int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex;
+ chars += pcSize;
+ remaining -= pcSize;
+ if (remaining <= 0) {
+ break;
+ }
+ uc = utext_current32(text);
+ if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) {
+ // Maybe. See if it's in the dictionary.
+ // NOTE: In the original Apple code, checked that the next
+ // two characters after uc were not 0x0E4C THANTHAKHAT before
+ // checking the dictionary. That is just a performance filter,
+ // but it's not clear it's faster than checking the trie.
+ int32_t num_candidates = words[(wordsFound + 1) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
+ utext_setNativeIndex(text, current + cuWordLength + chars);
+ if (num_candidates > 0) {
+ break;
+ }
+ }
+ }
+
+ // Bump the word count if there wasn't already one
+ if (cuWordLength <= 0) {
+ wordsFound += 1;
+ }
+
+ // Update the length with the passed-over characters
+ cuWordLength += chars;
+ }
+ else {
+ // Back up to where we were for next iteration
+ utext_setNativeIndex(text, current+cuWordLength);
+ }
+ }
+
+ // Never stop before a combining mark.
+ int32_t currPos;
+ while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
+ utext_next32(text);
+ cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
+ }
+
+ // Look ahead for possible suffixes if a dictionary word does not follow.
+ // We do this in code rather than using a rule so that the heuristic
+ // resynch continues to function. For example, one of the suffix characters
+ // could be a typo in the middle of a word.
+ if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cuWordLength > 0) {
+ if (words[wordsFound%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
+ && fSuffixSet.contains(uc = utext_current32(text))) {
+ if (uc == THAI_PAIYANNOI) {
+ if (!fSuffixSet.contains(utext_previous32(text))) {
+ // Skip over previous end and PAIYANNOI
+ utext_next32(text);
+ int32_t paiyannoiIndex = (int32_t)utext_getNativeIndex(text);
+ utext_next32(text);
+ cuWordLength += (int32_t)utext_getNativeIndex(text) - paiyannoiIndex; // Add PAIYANNOI to word
+ uc = utext_current32(text); // Fetch next character
+ }
+ else {
+ // Restore prior position
+ utext_next32(text);
+ }
+ }
+ if (uc == THAI_MAIYAMOK) {
+ if (utext_previous32(text) != THAI_MAIYAMOK) {
+ // Skip over previous end and MAIYAMOK
+ utext_next32(text);
+ int32_t maiyamokIndex = (int32_t)utext_getNativeIndex(text);
+ utext_next32(text);
+ cuWordLength += (int32_t)utext_getNativeIndex(text) - maiyamokIndex; // Add MAIYAMOK to word
+ }
+ else {
+ // Restore prior position
+ utext_next32(text);
+ }
+ }
+ }
+ else {
+ utext_setNativeIndex(text, current+cuWordLength);
+ }
+ }
+
+ // Did we find a word on this iteration? If so, push it on the break stack
+ if (cuWordLength > 0) {
+ foundBreaks.push((current+cuWordLength), status);
+ }
+ }
+
+ // Don't return a break for the end of the dictionary range if there is one there.
+ if (foundBreaks.peeki() >= rangeEnd) {
+ (void) foundBreaks.popi();
+ wordsFound -= 1;
+ }
+
+ return wordsFound;
+}
+
+/*
+ ******************************************************************
+ * LaoBreakEngine
+ */
+
+// How many words in a row are "good enough"?
+static const int32_t LAO_LOOKAHEAD = 3;
+
+// Will not combine a non-word with a preceding dictionary word longer than this
+static const int32_t LAO_ROOT_COMBINE_THRESHOLD = 3;
+
+// Will not combine a non-word that shares at least this much prefix with a
+// dictionary word, with a preceding word
+static const int32_t LAO_PREFIX_COMBINE_THRESHOLD = 3;
+
+// Minimum word size
+static const int32_t LAO_MIN_WORD = 2;
+
+// Minimum number of characters for two words
+static const int32_t LAO_MIN_WORD_SPAN = LAO_MIN_WORD * 2;
+
+LaoBreakEngine::LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
+ : DictionaryBreakEngine(),
+ fDictionary(adoptDictionary)
+{
+ UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
+ UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Laoo");
+ fLaoWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]]"), status);
+ if (U_SUCCESS(status)) {
+ setCharacters(fLaoWordSet);
+ }
+ fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]&[:M:]]"), status);
+ fMarkSet.add(0x0020);
+ fEndWordSet = fLaoWordSet;
+ fEndWordSet.remove(0x0EC0, 0x0EC4); // prefix vowels
+ fBeginWordSet.add(0x0E81, 0x0EAE); // basic consonants (including holes for corresponding Thai characters)
+ fBeginWordSet.add(0x0EDC, 0x0EDD); // digraph consonants (no Thai equivalent)
+ fBeginWordSet.add(0x0EC0, 0x0EC4); // prefix vowels
+
+ // Compact for caching.
+ fMarkSet.compact();
+ fEndWordSet.compact();
+ fBeginWordSet.compact();
+ UTRACE_EXIT_STATUS(status);
+}
+
+LaoBreakEngine::~LaoBreakEngine() {
+ delete fDictionary;
+}
+
+int32_t
+LaoBreakEngine::divideUpDictionaryRange( UText *text,
+ int32_t rangeStart,
+ int32_t rangeEnd,
+ UVector32 &foundBreaks ) const {
+ if ((rangeEnd - rangeStart) < LAO_MIN_WORD_SPAN) {
+ return 0; // Not enough characters for two words
+ }
+
+ uint32_t wordsFound = 0;
+ int32_t cpWordLength = 0;
+ int32_t cuWordLength = 0;
+ int32_t current;
+ UErrorCode status = U_ZERO_ERROR;
+ PossibleWord words[LAO_LOOKAHEAD];
+
+ utext_setNativeIndex(text, rangeStart);
+
+ while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
+ cuWordLength = 0;
+ cpWordLength = 0;
+
+ // Look for candidate words at the current position
+ int32_t candidates = words[wordsFound%LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
+
+ // If we found exactly one, use that
+ if (candidates == 1) {
+ cuWordLength = words[wordsFound % LAO_LOOKAHEAD].acceptMarked(text);
+ cpWordLength = words[wordsFound % LAO_LOOKAHEAD].markedCPLength();
+ wordsFound += 1;
+ }
+ // If there was more than one, see which one can take us forward the most words
+ else if (candidates > 1) {
+ // If we're already at the end of the range, we're done
+ if (utext_getNativeIndex(text) >= rangeEnd) {
+ goto foundBest;
+ }
+ do {
+ int32_t wordsMatched = 1;
+ if (words[(wordsFound + 1) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
+ if (wordsMatched < 2) {
+ // Followed by another dictionary word; mark first word as a good candidate
+ words[wordsFound%LAO_LOOKAHEAD].markCurrent();
+ wordsMatched = 2;
+ }
+
+ // If we're already at the end of the range, we're done
+ if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
+ goto foundBest;
+ }
+
+ // See if any of the possible second words is followed by a third word
+ do {
+ // If we find a third word, stop right away
+ if (words[(wordsFound + 2) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) {
+ words[wordsFound % LAO_LOOKAHEAD].markCurrent();
+ goto foundBest;
+ }
+ }
+ while (words[(wordsFound + 1) % LAO_LOOKAHEAD].backUp(text));
+ }
+ }
+ while (words[wordsFound % LAO_LOOKAHEAD].backUp(text));
+foundBest:
+ cuWordLength = words[wordsFound % LAO_LOOKAHEAD].acceptMarked(text);
+ cpWordLength = words[wordsFound % LAO_LOOKAHEAD].markedCPLength();
+ wordsFound += 1;
+ }
+
+ // We come here after having either found a word or not. We look ahead to the
+ // next word. If it's not a dictionary word, we will combine it withe the word we
+ // just found (if there is one), but only if the preceding word does not exceed
+ // the threshold.
+ // The text iterator should now be positioned at the end of the word we found.
+ if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < LAO_ROOT_COMBINE_THRESHOLD) {
+ // if it is a dictionary word, do nothing. If it isn't, then if there is
+ // no preceding word, or the non-word shares less than the minimum threshold
+ // of characters with a dictionary word, then scan to resynchronize
+ if (words[wordsFound % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
+ && (cuWordLength == 0
+ || words[wordsFound%LAO_LOOKAHEAD].longestPrefix() < LAO_PREFIX_COMBINE_THRESHOLD)) {
+ // Look for a plausible word boundary
+ int32_t remaining = rangeEnd - (current + cuWordLength);
+ UChar32 pc;
+ UChar32 uc;
+ int32_t chars = 0;
+ for (;;) {
+ int32_t pcIndex = (int32_t)utext_getNativeIndex(text);
+ pc = utext_next32(text);
+ int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex;
+ chars += pcSize;
+ remaining -= pcSize;
+ if (remaining <= 0) {
+ break;
+ }
+ uc = utext_current32(text);
+ if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) {
+ // Maybe. See if it's in the dictionary.
+ // TODO: this looks iffy; compare with old code.
+ int32_t num_candidates = words[(wordsFound + 1) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
+ utext_setNativeIndex(text, current + cuWordLength + chars);
+ if (num_candidates > 0) {
+ break;
+ }
+ }
+ }
+
+ // Bump the word count if there wasn't already one
+ if (cuWordLength <= 0) {
+ wordsFound += 1;
+ }
+
+ // Update the length with the passed-over characters
+ cuWordLength += chars;
+ }
+ else {
+ // Back up to where we were for next iteration
+ utext_setNativeIndex(text, current + cuWordLength);
+ }
+ }
+
+ // Never stop before a combining mark.
+ int32_t currPos;
+ while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
+ utext_next32(text);
+ cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
+ }
+
+ // Look ahead for possible suffixes if a dictionary word does not follow.
+ // We do this in code rather than using a rule so that the heuristic
+ // resynch continues to function. For example, one of the suffix characters
+ // could be a typo in the middle of a word.
+ // NOT CURRENTLY APPLICABLE TO LAO
+
+ // Did we find a word on this iteration? If so, push it on the break stack
+ if (cuWordLength > 0) {
+ foundBreaks.push((current+cuWordLength), status);
+ }
+ }
+
+ // Don't return a break for the end of the dictionary range if there is one there.
+ if (foundBreaks.peeki() >= rangeEnd) {
+ (void) foundBreaks.popi();
+ wordsFound -= 1;
+ }
+
+ return wordsFound;
+}
+
+/*
+ ******************************************************************
+ * BurmeseBreakEngine
+ */
+
+// How many words in a row are "good enough"?
+static const int32_t BURMESE_LOOKAHEAD = 3;
+
+// Will not combine a non-word with a preceding dictionary word longer than this
+static const int32_t BURMESE_ROOT_COMBINE_THRESHOLD = 3;
+
+// Will not combine a non-word that shares at least this much prefix with a
+// dictionary word, with a preceding word
+static const int32_t BURMESE_PREFIX_COMBINE_THRESHOLD = 3;
+
+// Minimum word size
+static const int32_t BURMESE_MIN_WORD = 2;
+
+// Minimum number of characters for two words
+static const int32_t BURMESE_MIN_WORD_SPAN = BURMESE_MIN_WORD * 2;
+
+BurmeseBreakEngine::BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
+ : DictionaryBreakEngine(),
+ fDictionary(adoptDictionary)
+{
+ UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
+ UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Mymr");
+ fBurmeseWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]]"), status);
+ if (U_SUCCESS(status)) {
+ setCharacters(fBurmeseWordSet);
+ }
+ fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]&[:M:]]"), status);
+ fMarkSet.add(0x0020);
+ fEndWordSet = fBurmeseWordSet;
+ fBeginWordSet.add(0x1000, 0x102A); // basic consonants and independent vowels
+
+ // Compact for caching.
+ fMarkSet.compact();
+ fEndWordSet.compact();
+ fBeginWordSet.compact();
+ UTRACE_EXIT_STATUS(status);
+}
+
+BurmeseBreakEngine::~BurmeseBreakEngine() {
+ delete fDictionary;
+}
+
+int32_t
+BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
+ int32_t rangeStart,
+ int32_t rangeEnd,
+ UVector32 &foundBreaks ) const {
+ if ((rangeEnd - rangeStart) < BURMESE_MIN_WORD_SPAN) {
+ return 0; // Not enough characters for two words
+ }
+
+ uint32_t wordsFound = 0;
+ int32_t cpWordLength = 0;
+ int32_t cuWordLength = 0;
+ int32_t current;
+ UErrorCode status = U_ZERO_ERROR;
+ PossibleWord words[BURMESE_LOOKAHEAD];
+
+ utext_setNativeIndex(text, rangeStart);
+
+ while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
+ cuWordLength = 0;
+ cpWordLength = 0;
+
+ // Look for candidate words at the current position
+ int32_t candidates = words[wordsFound%BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
+
+ // If we found exactly one, use that
+ if (candidates == 1) {
+ cuWordLength = words[wordsFound % BURMESE_LOOKAHEAD].acceptMarked(text);
+ cpWordLength = words[wordsFound % BURMESE_LOOKAHEAD].markedCPLength();
+ wordsFound += 1;
+ }
+ // If there was more than one, see which one can take us forward the most words
+ else if (candidates > 1) {
+ // If we're already at the end of the range, we're done
+ if (utext_getNativeIndex(text) >= rangeEnd) {
+ goto foundBest;
+ }
+ do {
+ int32_t wordsMatched = 1;
+ if (words[(wordsFound + 1) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
+ if (wordsMatched < 2) {
+ // Followed by another dictionary word; mark first word as a good candidate
+ words[wordsFound%BURMESE_LOOKAHEAD].markCurrent();
+ wordsMatched = 2;
+ }
+
+ // If we're already at the end of the range, we're done
+ if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
+ goto foundBest;
+ }
+
+ // See if any of the possible second words is followed by a third word
+ do {
+ // If we find a third word, stop right away
+ if (words[(wordsFound + 2) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) {
+ words[wordsFound % BURMESE_LOOKAHEAD].markCurrent();
+ goto foundBest;
+ }
+ }
+ while (words[(wordsFound + 1) % BURMESE_LOOKAHEAD].backUp(text));
+ }
+ }
+ while (words[wordsFound % BURMESE_LOOKAHEAD].backUp(text));
+foundBest:
+ cuWordLength = words[wordsFound % BURMESE_LOOKAHEAD].acceptMarked(text);
+ cpWordLength = words[wordsFound % BURMESE_LOOKAHEAD].markedCPLength();
+ wordsFound += 1;
+ }
+
+ // We come here after having either found a word or not. We look ahead to the
+ // next word. If it's not a dictionary word, we will combine it withe the word we
+ // just found (if there is one), but only if the preceding word does not exceed
+ // the threshold.
+ // The text iterator should now be positioned at the end of the word we found.
+ if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < BURMESE_ROOT_COMBINE_THRESHOLD) {
+ // if it is a dictionary word, do nothing. If it isn't, then if there is
+ // no preceding word, or the non-word shares less than the minimum threshold
+ // of characters with a dictionary word, then scan to resynchronize
+ if (words[wordsFound % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
+ && (cuWordLength == 0
+ || words[wordsFound%BURMESE_LOOKAHEAD].longestPrefix() < BURMESE_PREFIX_COMBINE_THRESHOLD)) {
+ // Look for a plausible word boundary
+ int32_t remaining = rangeEnd - (current + cuWordLength);
+ UChar32 pc;
+ UChar32 uc;
+ int32_t chars = 0;
+ for (;;) {
+ int32_t pcIndex = (int32_t)utext_getNativeIndex(text);
+ pc = utext_next32(text);
+ int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex;
+ chars += pcSize;
+ remaining -= pcSize;
+ if (remaining <= 0) {
+ break;
+ }
+ uc = utext_current32(text);
+ if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) {
+ // Maybe. See if it's in the dictionary.
+ // TODO: this looks iffy; compare with old code.
+ int32_t num_candidates = words[(wordsFound + 1) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
+ utext_setNativeIndex(text, current + cuWordLength + chars);
+ if (num_candidates > 0) {
+ break;
+ }
+ }
+ }
+
+ // Bump the word count if there wasn't already one
+ if (cuWordLength <= 0) {
+ wordsFound += 1;
+ }
+
+ // Update the length with the passed-over characters
+ cuWordLength += chars;
+ }
+ else {
+ // Back up to where we were for next iteration
+ utext_setNativeIndex(text, current + cuWordLength);
+ }
+ }
+
+ // Never stop before a combining mark.
+ int32_t currPos;
+ while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
+ utext_next32(text);
+ cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
+ }
+
+ // Look ahead for possible suffixes if a dictionary word does not follow.
+ // We do this in code rather than using a rule so that the heuristic
+ // resynch continues to function. For example, one of the suffix characters
+ // could be a typo in the middle of a word.
+ // NOT CURRENTLY APPLICABLE TO BURMESE
+
+ // Did we find a word on this iteration? If so, push it on the break stack
+ if (cuWordLength > 0) {
+ foundBreaks.push((current+cuWordLength), status);
+ }
+ }
+
+ // Don't return a break for the end of the dictionary range if there is one there.
+ if (foundBreaks.peeki() >= rangeEnd) {
+ (void) foundBreaks.popi();
+ wordsFound -= 1;
+ }
+
+ return wordsFound;
+}
+
+/*
+ ******************************************************************
+ * KhmerBreakEngine
+ */
+
+// How many words in a row are "good enough"?
+static const int32_t KHMER_LOOKAHEAD = 3;
+
+// Will not combine a non-word with a preceding dictionary word longer than this
+static const int32_t KHMER_ROOT_COMBINE_THRESHOLD = 3;
+
+// Will not combine a non-word that shares at least this much prefix with a
+// dictionary word, with a preceding word
+static const int32_t KHMER_PREFIX_COMBINE_THRESHOLD = 3;
+
+// Minimum word size
+static const int32_t KHMER_MIN_WORD = 2;
+
+// Minimum number of characters for two words
+static const int32_t KHMER_MIN_WORD_SPAN = KHMER_MIN_WORD * 2;
+
+KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
+ : DictionaryBreakEngine(),
+ fDictionary(adoptDictionary)
+{
+ UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
+ UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Khmr");
+ fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status);
+ if (U_SUCCESS(status)) {
+ setCharacters(fKhmerWordSet);
+ }
+ fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]&[:M:]]"), status);
+ fMarkSet.add(0x0020);
+ fEndWordSet = fKhmerWordSet;
+ fBeginWordSet.add(0x1780, 0x17B3);
+ //fBeginWordSet.add(0x17A3, 0x17A4); // deprecated vowels
+ //fEndWordSet.remove(0x17A5, 0x17A9); // Khmer independent vowels that can't end a word
+ //fEndWordSet.remove(0x17B2); // Khmer independent vowel that can't end a word
+ fEndWordSet.remove(0x17D2); // KHMER SIGN COENG that combines some following characters
+ //fEndWordSet.remove(0x17B6, 0x17C5); // Remove dependent vowels
+// fEndWordSet.remove(0x0E31); // MAI HAN-AKAT
+// fEndWordSet.remove(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI
+// fBeginWordSet.add(0x0E01, 0x0E2E); // KO KAI through HO NOKHUK
+// fBeginWordSet.add(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI
+// fSuffixSet.add(THAI_PAIYANNOI);
+// fSuffixSet.add(THAI_MAIYAMOK);
+
+ // Compact for caching.
+ fMarkSet.compact();
+ fEndWordSet.compact();
+ fBeginWordSet.compact();
+// fSuffixSet.compact();
+ UTRACE_EXIT_STATUS(status);
+}
+
+KhmerBreakEngine::~KhmerBreakEngine() {
+ delete fDictionary;
+}
+
+int32_t
+KhmerBreakEngine::divideUpDictionaryRange( UText *text,
+ int32_t rangeStart,
+ int32_t rangeEnd,
+ UVector32 &foundBreaks ) const {
+ if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) {
+ return 0; // Not enough characters for two words
+ }
+
+ uint32_t wordsFound = 0;
+ int32_t cpWordLength = 0;
+ int32_t cuWordLength = 0;
+ int32_t current;
+ UErrorCode status = U_ZERO_ERROR;
+ PossibleWord words[KHMER_LOOKAHEAD];
+
+ utext_setNativeIndex(text, rangeStart);
+
+ while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
+ cuWordLength = 0;
+ cpWordLength = 0;
+
+ // Look for candidate words at the current position
+ int32_t candidates = words[wordsFound%KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
+
+ // If we found exactly one, use that
+ if (candidates == 1) {
+ cuWordLength = words[wordsFound % KHMER_LOOKAHEAD].acceptMarked(text);
+ cpWordLength = words[wordsFound % KHMER_LOOKAHEAD].markedCPLength();
+ wordsFound += 1;
+ }
+
+ // If there was more than one, see which one can take us forward the most words
+ else if (candidates > 1) {
+ // If we're already at the end of the range, we're done
+ if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
+ goto foundBest;
+ }
+ do {
+ int32_t wordsMatched = 1;
+ if (words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
+ if (wordsMatched < 2) {
+ // Followed by another dictionary word; mark first word as a good candidate
+ words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
+ wordsMatched = 2;
+ }
+
+ // If we're already at the end of the range, we're done
+ if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
+ goto foundBest;
+ }
+
+ // See if any of the possible second words is followed by a third word
+ do {
+ // If we find a third word, stop right away
+ if (words[(wordsFound + 2) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) {
+ words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
+ goto foundBest;
+ }
+ }
+ while (words[(wordsFound + 1) % KHMER_LOOKAHEAD].backUp(text));
+ }
+ }
+ while (words[wordsFound % KHMER_LOOKAHEAD].backUp(text));
+foundBest:
+ cuWordLength = words[wordsFound % KHMER_LOOKAHEAD].acceptMarked(text);
+ cpWordLength = words[wordsFound % KHMER_LOOKAHEAD].markedCPLength();
+ wordsFound += 1;
+ }
+
+ // We come here after having either found a word or not. We look ahead to the
+ // next word. If it's not a dictionary word, we will combine it with the word we
+ // just found (if there is one), but only if the preceding word does not exceed
+ // the threshold.
+ // The text iterator should now be positioned at the end of the word we found.
+ if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < KHMER_ROOT_COMBINE_THRESHOLD) {
+ // if it is a dictionary word, do nothing. If it isn't, then if there is
+ // no preceding word, or the non-word shares less than the minimum threshold
+ // of characters with a dictionary word, then scan to resynchronize
+ if (words[wordsFound % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
+ && (cuWordLength == 0
+ || words[wordsFound % KHMER_LOOKAHEAD].longestPrefix() < KHMER_PREFIX_COMBINE_THRESHOLD)) {
+ // Look for a plausible word boundary
+ int32_t remaining = rangeEnd - (current+cuWordLength);
+ UChar32 pc;
+ UChar32 uc;
+ int32_t chars = 0;
+ for (;;) {
+ int32_t pcIndex = (int32_t)utext_getNativeIndex(text);
+ pc = utext_next32(text);
+ int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex;
+ chars += pcSize;
+ remaining -= pcSize;
+ if (remaining <= 0) {
+ break;
+ }
+ uc = utext_current32(text);
+ if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) {
+ // Maybe. See if it's in the dictionary.
+ int32_t num_candidates = words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
+ utext_setNativeIndex(text, current+cuWordLength+chars);
+ if (num_candidates > 0) {
+ break;
+ }
+ }
+ }
+
+ // Bump the word count if there wasn't already one
+ if (cuWordLength <= 0) {
+ wordsFound += 1;
+ }
+
+ // Update the length with the passed-over characters
+ cuWordLength += chars;
+ }
+ else {
+ // Back up to where we were for next iteration
+ utext_setNativeIndex(text, current+cuWordLength);
+ }
+ }
+
+ // Never stop before a combining mark.
+ int32_t currPos;
+ while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
+ utext_next32(text);
+ cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
+ }
+
+ // Look ahead for possible suffixes if a dictionary word does not follow.
+ // We do this in code rather than using a rule so that the heuristic
+ // resynch continues to function. For example, one of the suffix characters
+ // could be a typo in the middle of a word.
+// if ((int32_t)utext_getNativeIndex(text) < rangeEnd && wordLength > 0) {
+// if (words[wordsFound%KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
+// && fSuffixSet.contains(uc = utext_current32(text))) {
+// if (uc == KHMER_PAIYANNOI) {
+// if (!fSuffixSet.contains(utext_previous32(text))) {
+// // Skip over previous end and PAIYANNOI
+// utext_next32(text);
+// utext_next32(text);
+// wordLength += 1; // Add PAIYANNOI to word
+// uc = utext_current32(text); // Fetch next character
+// }
+// else {
+// // Restore prior position
+// utext_next32(text);
+// }
+// }
+// if (uc == KHMER_MAIYAMOK) {
+// if (utext_previous32(text) != KHMER_MAIYAMOK) {
+// // Skip over previous end and MAIYAMOK
+// utext_next32(text);
+// utext_next32(text);
+// wordLength += 1; // Add MAIYAMOK to word
+// }
+// else {
+// // Restore prior position
+// utext_next32(text);
+// }
+// }
+// }
+// else {
+// utext_setNativeIndex(text, current+wordLength);
+// }
+// }
+
+ // Did we find a word on this iteration? If so, push it on the break stack
+ if (cuWordLength > 0) {
+ foundBreaks.push((current+cuWordLength), status);
+ }
+ }
+
+ // Don't return a break for the end of the dictionary range if there is one there.
+ if (foundBreaks.peeki() >= rangeEnd) {
+ (void) foundBreaks.popi();
+ wordsFound -= 1;
+ }
+
+ return wordsFound;
+}
+
+#if !UCONFIG_NO_NORMALIZATION
+/*
+ ******************************************************************
+ * CjkBreakEngine
+ */
+static const uint32_t kuint32max = 0xFFFFFFFF;
+CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status)
+: DictionaryBreakEngine(), fDictionary(adoptDictionary) {
+ UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
+ UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Hani");
+ // Korean dictionary only includes Hangul syllables
+ fHangulWordSet.applyPattern(UNICODE_STRING_SIMPLE("[\\uac00-\\ud7a3]"), status);
+ fHanWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Han:]"), status);
+ fKatakanaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Katakana:]\\uff9e\\uff9f]"), status);
+ fHiraganaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Hiragana:]"), status);
+ nfkcNorm2 = Normalizer2::getNFKCInstance(status);
+
+ if (U_SUCCESS(status)) {
+ // handle Korean and Japanese/Chinese using different dictionaries
+ if (type == kKorean) {
+ setCharacters(fHangulWordSet);
+ } else { //Chinese and Japanese
+ UnicodeSet cjSet;
+ cjSet.addAll(fHanWordSet);
+ cjSet.addAll(fKatakanaWordSet);
+ cjSet.addAll(fHiraganaWordSet);
+ cjSet.add(0xFF70); // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
+ cjSet.add(0x30FC); // KATAKANA-HIRAGANA PROLONGED SOUND MARK
+ setCharacters(cjSet);
+ }
+ }
+ UTRACE_EXIT_STATUS(status);
+}
+
+CjkBreakEngine::~CjkBreakEngine(){
+ delete fDictionary;
+}
+
+// The katakanaCost values below are based on the length frequencies of all
+// katakana phrases in the dictionary
+static const int32_t kMaxKatakanaLength = 8;
+static const int32_t kMaxKatakanaGroupLength = 20;
+static const uint32_t maxSnlp = 255;
+
+static inline uint32_t getKatakanaCost(int32_t wordLength){
+ //TODO: fill array with actual values from dictionary!
+ static const uint32_t katakanaCost[kMaxKatakanaLength + 1]
+ = {8192, 984, 408, 240, 204, 252, 300, 372, 480};
+ return (wordLength > kMaxKatakanaLength) ? 8192 : katakanaCost[wordLength];
+}
+
+static inline bool isKatakana(UChar32 value) {
+ return (value >= 0x30A1 && value <= 0x30FE && value != 0x30FB) ||
+ (value >= 0xFF66 && value <= 0xFF9f);
+}
+
+
+// Function for accessing internal utext flags.
+// Replicates an internal UText function.
+
+static inline int32_t utext_i32_flag(int32_t bitIndex) {
+ return (int32_t)1 << bitIndex;
+}
+
+
+/*
+ * @param text A UText representing the text
+ * @param rangeStart The start of the range of dictionary characters
+ * @param rangeEnd The end of the range of dictionary characters
+ * @param foundBreaks vector<int32> to receive the break positions
+ * @return The number of breaks found
+ */
+int32_t
+CjkBreakEngine::divideUpDictionaryRange( UText *inText,
+ int32_t rangeStart,
+ int32_t rangeEnd,
+ UVector32 &foundBreaks ) const {
+ if (rangeStart >= rangeEnd) {
+ return 0;
+ }
+
+ // UnicodeString version of input UText, NFKC normalized if necessary.
+ UnicodeString inString;
+
+ // inputMap[inStringIndex] = corresponding native index from UText inText.
+ // If NULL then mapping is 1:1
+ LocalPointer<UVector32> inputMap;
+
+ UErrorCode status = U_ZERO_ERROR;
+
+
+ // if UText has the input string as one contiguous UTF-16 chunk
+ if ((inText->providerProperties & utext_i32_flag(UTEXT_PROVIDER_STABLE_CHUNKS)) &&
+ inText->chunkNativeStart <= rangeStart &&
+ inText->chunkNativeLimit >= rangeEnd &&
+ inText->nativeIndexingLimit >= rangeEnd - inText->chunkNativeStart) {
+
+ // Input UText is in one contiguous UTF-16 chunk.
+ // Use Read-only aliasing UnicodeString.
+ inString.setTo(FALSE,
+ inText->chunkContents + rangeStart - inText->chunkNativeStart,
+ rangeEnd - rangeStart);
+ } else {
+ // Copy the text from the original inText (UText) to inString (UnicodeString).
+ // Create a map from UnicodeString indices -> UText offsets.
+ utext_setNativeIndex(inText, rangeStart);
+ int32_t limit = rangeEnd;
+ U_ASSERT(limit <= utext_nativeLength(inText));
+ if (limit > utext_nativeLength(inText)) {
+ limit = (int32_t)utext_nativeLength(inText);
+ }
+ inputMap.adoptInsteadAndCheckErrorCode(new UVector32(status), status);
+ if (U_FAILURE(status)) {
+ return 0;
+ }
+ while (utext_getNativeIndex(inText) < limit) {
+ int32_t nativePosition = (int32_t)utext_getNativeIndex(inText);
+ UChar32 c = utext_next32(inText);
+ U_ASSERT(c != U_SENTINEL);
+ inString.append(c);
+ while (inputMap->size() < inString.length()) {
+ inputMap->addElement(nativePosition, status);
+ }
+ }
+ inputMap->addElement(limit, status);
+ }
+
+
+ if (!nfkcNorm2->isNormalized(inString, status)) {
+ UnicodeString normalizedInput;
+ // normalizedMap[normalizedInput position] == original UText position.
+ LocalPointer<UVector32> normalizedMap(new UVector32(status), status);
+ if (U_FAILURE(status)) {
+ return 0;
+ }
+
+ UnicodeString fragment;
+ UnicodeString normalizedFragment;
+ for (int32_t srcI = 0; srcI < inString.length();) { // Once per normalization chunk
+ fragment.remove();
+ int32_t fragmentStartI = srcI;
+ UChar32 c = inString.char32At(srcI);
+ for (;;) {
+ fragment.append(c);
+ srcI = inString.moveIndex32(srcI, 1);
+ if (srcI == inString.length()) {
+ break;
+ }
+ c = inString.char32At(srcI);
+ if (nfkcNorm2->hasBoundaryBefore(c)) {
+ break;
+ }
+ }
+ nfkcNorm2->normalize(fragment, normalizedFragment, status);
+ normalizedInput.append(normalizedFragment);
+
+ // Map every position in the normalized chunk to the start of the chunk
+ // in the original input.
+ int32_t fragmentOriginalStart = inputMap.isValid() ?
+ inputMap->elementAti(fragmentStartI) : fragmentStartI+rangeStart;
+ while (normalizedMap->size() < normalizedInput.length()) {
+ normalizedMap->addElement(fragmentOriginalStart, status);
+ if (U_FAILURE(status)) {
+ break;
+ }
+ }
+ }
+ U_ASSERT(normalizedMap->size() == normalizedInput.length());
+ int32_t nativeEnd = inputMap.isValid() ?
+ inputMap->elementAti(inString.length()) : inString.length()+rangeStart;
+ normalizedMap->addElement(nativeEnd, status);
+
+ inputMap = std::move(normalizedMap);
+ inString = std::move(normalizedInput);
+ }
+
+ int32_t numCodePts = inString.countChar32();
+ if (numCodePts != inString.length()) {
+ // There are supplementary characters in the input.
+ // The dictionary will produce boundary positions in terms of code point indexes,
+ // not in terms of code unit string indexes.
+ // Use the inputMap mechanism to take care of this in addition to indexing differences
+ // from normalization and/or UTF-8 input.
+ UBool hadExistingMap = inputMap.isValid();
+ if (!hadExistingMap) {
+ inputMap.adoptInsteadAndCheckErrorCode(new UVector32(status), status);
+ if (U_FAILURE(status)) {
+ return 0;
+ }
+ }
+ int32_t cpIdx = 0;
+ for (int32_t cuIdx = 0; ; cuIdx = inString.moveIndex32(cuIdx, 1)) {
+ U_ASSERT(cuIdx >= cpIdx);
+ if (hadExistingMap) {
+ inputMap->setElementAt(inputMap->elementAti(cuIdx), cpIdx);
+ } else {
+ inputMap->addElement(cuIdx+rangeStart, status);
+ }
+ cpIdx++;
+ if (cuIdx == inString.length()) {
+ break;
+ }
+ }
+ }
+
+ // bestSnlp[i] is the snlp of the best segmentation of the first i
+ // code points in the range to be matched.
+ UVector32 bestSnlp(numCodePts + 1, status);
+ bestSnlp.addElement(0, status);
+ for(int32_t i = 1; i <= numCodePts; i++) {
+ bestSnlp.addElement(kuint32max, status);
+ }
+
+
+ // prev[i] is the index of the last CJK code point in the previous word in
+ // the best segmentation of the first i characters.
+ UVector32 prev(numCodePts + 1, status);
+ for(int32_t i = 0; i <= numCodePts; i++){
+ prev.addElement(-1, status);
+ }
+
+ const int32_t maxWordSize = 20;
+ UVector32 values(numCodePts, status);
+ values.setSize(numCodePts);
+ UVector32 lengths(numCodePts, status);
+ lengths.setSize(numCodePts);
+
+ UText fu = UTEXT_INITIALIZER;
+ utext_openUnicodeString(&fu, &inString, &status);
+
+ // Dynamic programming to find the best segmentation.
+
+ // In outer loop, i is the code point index,
+ // ix is the corresponding string (code unit) index.
+ // They differ when the string contains supplementary characters.
+ int32_t ix = 0;
+ bool is_prev_katakana = false;
+ for (int32_t i = 0; i < numCodePts; ++i, ix = inString.moveIndex32(ix, 1)) {
+ if ((uint32_t)bestSnlp.elementAti(i) == kuint32max) {
+ continue;
+ }
+
+ int32_t count;
+ utext_setNativeIndex(&fu, ix);
+ count = fDictionary->matches(&fu, maxWordSize, numCodePts,
+ NULL, lengths.getBuffer(), values.getBuffer(), NULL);
+ // Note: lengths is filled with code point lengths
+ // The NULL parameter is the ignored code unit lengths.
+
+ // if there are no single character matches found in the dictionary
+ // starting with this character, treat character as a 1-character word
+ // with the highest value possible, i.e. the least likely to occur.
+ // Exclude Korean characters from this treatment, as they should be left
+ // together by default.
+ if ((count == 0 || lengths.elementAti(0) != 1) &&
+ !fHangulWordSet.contains(inString.char32At(ix))) {
+ values.setElementAt(maxSnlp, count); // 255
+ lengths.setElementAt(1, count++);
+ }
+
+ for (int32_t j = 0; j < count; j++) {
+ uint32_t newSnlp = (uint32_t)bestSnlp.elementAti(i) + (uint32_t)values.elementAti(j);
+ int32_t ln_j_i = lengths.elementAti(j) + i;
+ if (newSnlp < (uint32_t)bestSnlp.elementAti(ln_j_i)) {
+ bestSnlp.setElementAt(newSnlp, ln_j_i);
+ prev.setElementAt(i, ln_j_i);
+ }
+ }
+
+ // In Japanese,
+ // Katakana word in single character is pretty rare. So we apply
+ // the following heuristic to Katakana: any continuous run of Katakana
+ // characters is considered a candidate word with a default cost
+ // specified in the katakanaCost table according to its length.
+
+ bool is_katakana = isKatakana(inString.char32At(ix));
+ int32_t katakanaRunLength = 1;
+ if (!is_prev_katakana && is_katakana) {
+ int32_t j = inString.moveIndex32(ix, 1);
+ // Find the end of the continuous run of Katakana characters
+ while (j < inString.length() && katakanaRunLength < kMaxKatakanaGroupLength &&
+ isKatakana(inString.char32At(j))) {
+ j = inString.moveIndex32(j, 1);
+ katakanaRunLength++;
+ }
+ if (katakanaRunLength < kMaxKatakanaGroupLength) {
+ uint32_t newSnlp = bestSnlp.elementAti(i) + getKatakanaCost(katakanaRunLength);
+ if (newSnlp < (uint32_t)bestSnlp.elementAti(i+katakanaRunLength)) {
+ bestSnlp.setElementAt(newSnlp, i+katakanaRunLength);
+ prev.setElementAt(i, i+katakanaRunLength); // prev[j] = i;
+ }
+ }
+ }
+ is_prev_katakana = is_katakana;
+ }
+ utext_close(&fu);
+
+ // Start pushing the optimal offset index into t_boundary (t for tentative).
+ // prev[numCodePts] is guaranteed to be meaningful.
+ // We'll first push in the reverse order, i.e.,
+ // t_boundary[0] = numCodePts, and afterwards do a swap.
+ UVector32 t_boundary(numCodePts+1, status);
+
+ int32_t numBreaks = 0;
+ // No segmentation found, set boundary to end of range
+ if ((uint32_t)bestSnlp.elementAti(numCodePts) == kuint32max) {
+ t_boundary.addElement(numCodePts, status);
+ numBreaks++;
+ } else {
+ for (int32_t i = numCodePts; i > 0; i = prev.elementAti(i)) {
+ t_boundary.addElement(i, status);
+ numBreaks++;
+ }
+ U_ASSERT(prev.elementAti(t_boundary.elementAti(numBreaks - 1)) == 0);
+ }
+
+ // Add a break for the start of the dictionary range if there is not one
+ // there already.
+ if (foundBreaks.size() == 0 || foundBreaks.peeki() < rangeStart) {
+ t_boundary.addElement(0, status);
+ numBreaks++;
+ }
+
+ // Now that we're done, convert positions in t_boundary[] (indices in
+ // the normalized input string) back to indices in the original input UText
+ // while reversing t_boundary and pushing values to foundBreaks.
+ int32_t prevCPPos = -1;
+ int32_t prevUTextPos = -1;
+ for (int32_t i = numBreaks-1; i >= 0; i--) {
+ int32_t cpPos = t_boundary.elementAti(i);
+ U_ASSERT(cpPos > prevCPPos);
+ int32_t utextPos = inputMap.isValid() ? inputMap->elementAti(cpPos) : cpPos + rangeStart;
+ U_ASSERT(utextPos >= prevUTextPos);
+ if (utextPos > prevUTextPos) {
+ // Boundaries are added to foundBreaks output in ascending order.
+ U_ASSERT(foundBreaks.size() == 0 || foundBreaks.peeki() < utextPos);
+ foundBreaks.push(utextPos, status);
+ } else {
+ // Normalization expanded the input text, the dictionary found a boundary
+ // within the expansion, giving two boundaries with the same index in the
+ // original text. Ignore the second. See ticket #12918.
+ --numBreaks;
+ }
+ prevCPPos = cpPos;
+ prevUTextPos = utextPos;
+ }
+ (void)prevCPPos; // suppress compiler warnings about unused variable
+
+ // inString goes out of scope
+ // inputMap goes out of scope
+ return numBreaks;
+}
+#endif
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
diff --git a/thirdparty/icu4c/common/dictbe.h b/thirdparty/icu4c/common/dictbe.h
new file mode 100644
index 0000000000..4ea676fc71
--- /dev/null
+++ b/thirdparty/icu4c/common/dictbe.h
@@ -0,0 +1,402 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/**
+ *******************************************************************************
+ * Copyright (C) 2006-2014, International Business Machines Corporation *
+ * and others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+#ifndef DICTBE_H
+#define DICTBE_H
+
+#include "unicode/utypes.h"
+#include "unicode/uniset.h"
+#include "unicode/utext.h"
+
+#include "brkeng.h"
+#include "uvectr32.h"
+
+U_NAMESPACE_BEGIN
+
+class DictionaryMatcher;
+class Normalizer2;
+
+/*******************************************************************
+ * DictionaryBreakEngine
+ */
+
+/**
+ * <p>DictionaryBreakEngine is a kind of LanguageBreakEngine that uses a
+ * dictionary to determine language-specific breaks.</p>
+ *
+ * <p>After it is constructed a DictionaryBreakEngine may be shared between
+ * threads without synchronization.</p>
+ */
+class DictionaryBreakEngine : public LanguageBreakEngine {
+ private:
+ /**
+ * The set of characters handled by this engine
+ * @internal
+ */
+
+ UnicodeSet fSet;
+
+ public:
+
+ /**
+ * <p>Constructor </p>
+ */
+ DictionaryBreakEngine();
+
+ /**
+ * <p>Virtual destructor.</p>
+ */
+ virtual ~DictionaryBreakEngine();
+
+ /**
+ * <p>Indicate whether this engine handles a particular character for
+ * a particular kind of break.</p>
+ *
+ * @param c A character which begins a run that the engine might handle
+ * @return true if this engine handles the particular character and break
+ * type.
+ */
+ virtual UBool handles(UChar32 c) const;
+
+ /**
+ * <p>Find any breaks within a run in the supplied text.</p>
+ *
+ * @param text A UText representing the text. The iterator is left at
+ * the end of the run of characters which the engine is capable of handling
+ * that starts from the first character in the range.
+ * @param startPos The start of the run within the supplied text.
+ * @param endPos The end of the run within the supplied text.
+ * @param foundBreaks vector of int32_t to receive the break positions
+ * @return The number of breaks found.
+ */
+ virtual int32_t findBreaks( UText *text,
+ int32_t startPos,
+ int32_t endPos,
+ UVector32 &foundBreaks ) const;
+
+ protected:
+
+ /**
+ * <p>Set the character set handled by this engine.</p>
+ *
+ * @param set A UnicodeSet of the set of characters handled by the engine
+ */
+ virtual void setCharacters( const UnicodeSet &set );
+
+ /**
+ * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
+ *
+ * @param text A UText representing the text
+ * @param rangeStart The start of the range of dictionary characters
+ * @param rangeEnd The end of the range of dictionary characters
+ * @param foundBreaks Output of C array of int32_t break positions, or 0
+ * @return The number of breaks found
+ */
+ virtual int32_t divideUpDictionaryRange( UText *text,
+ int32_t rangeStart,
+ int32_t rangeEnd,
+ UVector32 &foundBreaks ) const = 0;
+
+};
+
+/*******************************************************************
+ * ThaiBreakEngine
+ */
+
+/**
+ * <p>ThaiBreakEngine is a kind of DictionaryBreakEngine that uses a
+ * dictionary and heuristics to determine Thai-specific breaks.</p>
+ *
+ * <p>After it is constructed a ThaiBreakEngine may be shared between
+ * threads without synchronization.</p>
+ */
+class ThaiBreakEngine : public DictionaryBreakEngine {
+ private:
+ /**
+ * The set of characters handled by this engine
+ * @internal
+ */
+
+ UnicodeSet fThaiWordSet;
+ UnicodeSet fEndWordSet;
+ UnicodeSet fBeginWordSet;
+ UnicodeSet fSuffixSet;
+ UnicodeSet fMarkSet;
+ DictionaryMatcher *fDictionary;
+
+ public:
+
+ /**
+ * <p>Default constructor.</p>
+ *
+ * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
+ * engine is deleted.
+ */
+ ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
+
+ /**
+ * <p>Virtual destructor.</p>
+ */
+ virtual ~ThaiBreakEngine();
+
+ protected:
+ /**
+ * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
+ *
+ * @param text A UText representing the text
+ * @param rangeStart The start of the range of dictionary characters
+ * @param rangeEnd The end of the range of dictionary characters
+ * @param foundBreaks Output of C array of int32_t break positions, or 0
+ * @return The number of breaks found
+ */
+ virtual int32_t divideUpDictionaryRange( UText *text,
+ int32_t rangeStart,
+ int32_t rangeEnd,
+ UVector32 &foundBreaks ) const;
+
+};
+
+/*******************************************************************
+ * LaoBreakEngine
+ */
+
+/**
+ * <p>LaoBreakEngine is a kind of DictionaryBreakEngine that uses a
+ * dictionary and heuristics to determine Lao-specific breaks.</p>
+ *
+ * <p>After it is constructed a LaoBreakEngine may be shared between
+ * threads without synchronization.</p>
+ */
+class LaoBreakEngine : public DictionaryBreakEngine {
+ private:
+ /**
+ * The set of characters handled by this engine
+ * @internal
+ */
+
+ UnicodeSet fLaoWordSet;
+ UnicodeSet fEndWordSet;
+ UnicodeSet fBeginWordSet;
+ UnicodeSet fMarkSet;
+ DictionaryMatcher *fDictionary;
+
+ public:
+
+ /**
+ * <p>Default constructor.</p>
+ *
+ * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
+ * engine is deleted.
+ */
+ LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
+
+ /**
+ * <p>Virtual destructor.</p>
+ */
+ virtual ~LaoBreakEngine();
+
+ protected:
+ /**
+ * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
+ *
+ * @param text A UText representing the text
+ * @param rangeStart The start of the range of dictionary characters
+ * @param rangeEnd The end of the range of dictionary characters
+ * @param foundBreaks Output of C array of int32_t break positions, or 0
+ * @return The number of breaks found
+ */
+ virtual int32_t divideUpDictionaryRange( UText *text,
+ int32_t rangeStart,
+ int32_t rangeEnd,
+ UVector32 &foundBreaks ) const;
+
+};
+
+/*******************************************************************
+ * BurmeseBreakEngine
+ */
+
+/**
+ * <p>BurmeseBreakEngine is a kind of DictionaryBreakEngine that uses a
+ * DictionaryMatcher and heuristics to determine Burmese-specific breaks.</p>
+ *
+ * <p>After it is constructed a BurmeseBreakEngine may be shared between
+ * threads without synchronization.</p>
+ */
+class BurmeseBreakEngine : public DictionaryBreakEngine {
+ private:
+ /**
+ * The set of characters handled by this engine
+ * @internal
+ */
+
+ UnicodeSet fBurmeseWordSet;
+ UnicodeSet fEndWordSet;
+ UnicodeSet fBeginWordSet;
+ UnicodeSet fMarkSet;
+ DictionaryMatcher *fDictionary;
+
+ public:
+
+ /**
+ * <p>Default constructor.</p>
+ *
+ * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
+ * engine is deleted.
+ */
+ BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
+
+ /**
+ * <p>Virtual destructor.</p>
+ */
+ virtual ~BurmeseBreakEngine();
+
+ protected:
+ /**
+ * <p>Divide up a range of known dictionary characters.</p>
+ *
+ * @param text A UText representing the text
+ * @param rangeStart The start of the range of dictionary characters
+ * @param rangeEnd The end of the range of dictionary characters
+ * @param foundBreaks Output of C array of int32_t break positions, or 0
+ * @return The number of breaks found
+ */
+ virtual int32_t divideUpDictionaryRange( UText *text,
+ int32_t rangeStart,
+ int32_t rangeEnd,
+ UVector32 &foundBreaks ) const;
+
+};
+
+/*******************************************************************
+ * KhmerBreakEngine
+ */
+
+/**
+ * <p>KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a
+ * DictionaryMatcher and heuristics to determine Khmer-specific breaks.</p>
+ *
+ * <p>After it is constructed a KhmerBreakEngine may be shared between
+ * threads without synchronization.</p>
+ */
+class KhmerBreakEngine : public DictionaryBreakEngine {
+ private:
+ /**
+ * The set of characters handled by this engine
+ * @internal
+ */
+
+ UnicodeSet fKhmerWordSet;
+ UnicodeSet fEndWordSet;
+ UnicodeSet fBeginWordSet;
+ UnicodeSet fMarkSet;
+ DictionaryMatcher *fDictionary;
+
+ public:
+
+ /**
+ * <p>Default constructor.</p>
+ *
+ * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
+ * engine is deleted.
+ */
+ KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
+
+ /**
+ * <p>Virtual destructor.</p>
+ */
+ virtual ~KhmerBreakEngine();
+
+ protected:
+ /**
+ * <p>Divide up a range of known dictionary characters.</p>
+ *
+ * @param text A UText representing the text
+ * @param rangeStart The start of the range of dictionary characters
+ * @param rangeEnd The end of the range of dictionary characters
+ * @param foundBreaks Output of C array of int32_t break positions, or 0
+ * @return The number of breaks found
+ */
+ virtual int32_t divideUpDictionaryRange( UText *text,
+ int32_t rangeStart,
+ int32_t rangeEnd,
+ UVector32 &foundBreaks ) const;
+
+};
+
+#if !UCONFIG_NO_NORMALIZATION
+
+/*******************************************************************
+ * CjkBreakEngine
+ */
+
+//indicates language/script that the CjkBreakEngine will handle
+enum LanguageType {
+ kKorean,
+ kChineseJapanese
+};
+
+/**
+ * <p>CjkBreakEngine is a kind of DictionaryBreakEngine that uses a
+ * dictionary with costs associated with each word and
+ * Viterbi decoding to determine CJK-specific breaks.</p>
+ */
+class CjkBreakEngine : public DictionaryBreakEngine {
+ protected:
+ /**
+ * The set of characters handled by this engine
+ * @internal
+ */
+ UnicodeSet fHangulWordSet;
+ UnicodeSet fHanWordSet;
+ UnicodeSet fKatakanaWordSet;
+ UnicodeSet fHiraganaWordSet;
+
+ DictionaryMatcher *fDictionary;
+ const Normalizer2 *nfkcNorm2;
+
+ public:
+
+ /**
+ * <p>Default constructor.</p>
+ *
+ * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
+ * engine is deleted. The DictionaryMatcher must contain costs for each word
+ * in order for the dictionary to work properly.
+ */
+ CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status);
+
+ /**
+ * <p>Virtual destructor.</p>
+ */
+ virtual ~CjkBreakEngine();
+
+ protected:
+ /**
+ * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
+ *
+ * @param text A UText representing the text
+ * @param rangeStart The start of the range of dictionary characters
+ * @param rangeEnd The end of the range of dictionary characters
+ * @param foundBreaks Output of C array of int32_t break positions, or 0
+ * @return The number of breaks found
+ */
+ virtual int32_t divideUpDictionaryRange( UText *text,
+ int32_t rangeStart,
+ int32_t rangeEnd,
+ UVector32 &foundBreaks ) const;
+
+};
+
+#endif
+
+U_NAMESPACE_END
+
+ /* DICTBE_H */
+#endif
diff --git a/thirdparty/icu4c/common/dictionarydata.cpp b/thirdparty/icu4c/common/dictionarydata.cpp
new file mode 100644
index 0000000000..6e2dbee5b6
--- /dev/null
+++ b/thirdparty/icu4c/common/dictionarydata.cpp
@@ -0,0 +1,242 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2014-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* dictionarydata.h
+*
+* created on: 2012may31
+* created by: Markus W. Scherer & Maxime Serrano
+*/
+
+#include "dictionarydata.h"
+#include "unicode/ucharstrie.h"
+#include "unicode/bytestrie.h"
+#include "unicode/udata.h"
+#include "cmemory.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+U_NAMESPACE_BEGIN
+
+const int32_t DictionaryData::TRIE_TYPE_BYTES = 0;
+const int32_t DictionaryData::TRIE_TYPE_UCHARS = 1;
+const int32_t DictionaryData::TRIE_TYPE_MASK = 7;
+const int32_t DictionaryData::TRIE_HAS_VALUES = 8;
+
+const int32_t DictionaryData::TRANSFORM_NONE = 0;
+const int32_t DictionaryData::TRANSFORM_TYPE_OFFSET = 0x1000000;
+const int32_t DictionaryData::TRANSFORM_TYPE_MASK = 0x7f000000;
+const int32_t DictionaryData::TRANSFORM_OFFSET_MASK = 0x1fffff;
+
+DictionaryMatcher::~DictionaryMatcher() {
+}
+
+UCharsDictionaryMatcher::~UCharsDictionaryMatcher() {
+ udata_close(file);
+}
+
+int32_t UCharsDictionaryMatcher::getType() const {
+ return DictionaryData::TRIE_TYPE_UCHARS;
+}
+
+int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
+ int32_t *lengths, int32_t *cpLengths, int32_t *values,
+ int32_t *prefix) const {
+
+ UCharsTrie uct(characters);
+ int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text);
+ int32_t wordCount = 0;
+ int32_t codePointsMatched = 0;
+
+ for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) {
+ UStringTrieResult result = (codePointsMatched == 0) ? uct.first(c) : uct.next(c);
+ int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex;
+ codePointsMatched += 1;
+ if (USTRINGTRIE_HAS_VALUE(result)) {
+ if (wordCount < limit) {
+ if (values != NULL) {
+ values[wordCount] = uct.getValue();
+ }
+ if (lengths != NULL) {
+ lengths[wordCount] = lengthMatched;
+ }
+ if (cpLengths != NULL) {
+ cpLengths[wordCount] = codePointsMatched;
+ }
+ ++wordCount;
+ }
+ if (result == USTRINGTRIE_FINAL_VALUE) {
+ break;
+ }
+ }
+ else if (result == USTRINGTRIE_NO_MATCH) {
+ break;
+ }
+ if (lengthMatched >= maxLength) {
+ break;
+ }
+ }
+
+ if (prefix != NULL) {
+ *prefix = codePointsMatched;
+ }
+ return wordCount;
+}
+
+BytesDictionaryMatcher::~BytesDictionaryMatcher() {
+ udata_close(file);
+}
+
+UChar32 BytesDictionaryMatcher::transform(UChar32 c) const {
+ if ((transformConstant & DictionaryData::TRANSFORM_TYPE_MASK) == DictionaryData::TRANSFORM_TYPE_OFFSET) {
+ if (c == 0x200D) {
+ return 0xFF;
+ } else if (c == 0x200C) {
+ return 0xFE;
+ }
+ int32_t delta = c - (transformConstant & DictionaryData::TRANSFORM_OFFSET_MASK);
+ if (delta < 0 || 0xFD < delta) {
+ return U_SENTINEL;
+ }
+ return (UChar32)delta;
+ }
+ return c;
+}
+
+int32_t BytesDictionaryMatcher::getType() const {
+ return DictionaryData::TRIE_TYPE_BYTES;
+}
+
+int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
+ int32_t *lengths, int32_t *cpLengths, int32_t *values,
+ int32_t *prefix) const {
+ BytesTrie bt(characters);
+ int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text);
+ int32_t wordCount = 0;
+ int32_t codePointsMatched = 0;
+
+ for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) {
+ UStringTrieResult result = (codePointsMatched == 0) ? bt.first(transform(c)) : bt.next(transform(c));
+ int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex;
+ codePointsMatched += 1;
+ if (USTRINGTRIE_HAS_VALUE(result)) {
+ if (wordCount < limit) {
+ if (values != NULL) {
+ values[wordCount] = bt.getValue();
+ }
+ if (lengths != NULL) {
+ lengths[wordCount] = lengthMatched;
+ }
+ if (cpLengths != NULL) {
+ cpLengths[wordCount] = codePointsMatched;
+ }
+ ++wordCount;
+ }
+ if (result == USTRINGTRIE_FINAL_VALUE) {
+ break;
+ }
+ }
+ else if (result == USTRINGTRIE_NO_MATCH) {
+ break;
+ }
+ if (lengthMatched >= maxLength) {
+ break;
+ }
+ }
+
+ if (prefix != NULL) {
+ *prefix = codePointsMatched;
+ }
+ return wordCount;
+}
+
+
+U_NAMESPACE_END
+
+U_NAMESPACE_USE
+
+U_CAPI int32_t U_EXPORT2
+udict_swap(const UDataSwapper *ds, const void *inData, int32_t length,
+ void *outData, UErrorCode *pErrorCode) {
+ const UDataInfo *pInfo;
+ int32_t headerSize;
+ const uint8_t *inBytes;
+ uint8_t *outBytes;
+ const int32_t *inIndexes;
+ int32_t indexes[DictionaryData::IX_COUNT];
+ int32_t i, offset, size;
+
+ headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
+ if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) return 0;
+ pInfo = (const UDataInfo *)((const char *)inData + 4);
+ if (!(pInfo->dataFormat[0] == 0x44 &&
+ pInfo->dataFormat[1] == 0x69 &&
+ pInfo->dataFormat[2] == 0x63 &&
+ pInfo->dataFormat[3] == 0x74 &&
+ pInfo->formatVersion[0] == 1)) {
+ udata_printError(ds, "udict_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as dictionary data\n",
+ pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]);
+ *pErrorCode = U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+
+ inBytes = (const uint8_t *)inData + headerSize;
+ outBytes = (uint8_t *)outData + headerSize;
+
+ inIndexes = (const int32_t *)inBytes;
+ if (length >= 0) {
+ length -= headerSize;
+ if (length < (int32_t)(sizeof(indexes))) {
+ udata_printError(ds, "udict_swap(): too few bytes (%d after header) for dictionary data\n", length);
+ *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ }
+
+ for (i = 0; i < DictionaryData::IX_COUNT; i++) {
+ indexes[i] = udata_readInt32(ds, inIndexes[i]);
+ }
+
+ size = indexes[DictionaryData::IX_TOTAL_SIZE];
+
+ if (length >= 0) {
+ if (length < size) {
+ udata_printError(ds, "udict_swap(): too few bytes (%d after header) for all of dictionary data\n", length);
+ *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ if (inBytes != outBytes) {
+ uprv_memcpy(outBytes, inBytes, size);
+ }
+
+ offset = 0;
+ ds->swapArray32(ds, inBytes, sizeof(indexes), outBytes, pErrorCode);
+ offset = (int32_t)sizeof(indexes);
+ int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK;
+ int32_t nextOffset = indexes[DictionaryData::IX_RESERVED1_OFFSET];
+
+ if (trieType == DictionaryData::TRIE_TYPE_UCHARS) {
+ ds->swapArray16(ds, inBytes + offset, nextOffset - offset, outBytes + offset, pErrorCode);
+ } else if (trieType == DictionaryData::TRIE_TYPE_BYTES) {
+ // nothing to do
+ } else {
+ udata_printError(ds, "udict_swap(): unknown trie type!\n");
+ *pErrorCode = U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+
+ // these next two sections are empty in the current format,
+ // but may be used later.
+ offset = nextOffset;
+ nextOffset = indexes[DictionaryData::IX_RESERVED2_OFFSET];
+ offset = nextOffset;
+ nextOffset = indexes[DictionaryData::IX_TOTAL_SIZE];
+ offset = nextOffset;
+ }
+ return headerSize + size;
+}
+#endif
diff --git a/thirdparty/icu4c/common/dictionarydata.h b/thirdparty/icu4c/common/dictionarydata.h
new file mode 100644
index 0000000000..0d303d9a8d
--- /dev/null
+++ b/thirdparty/icu4c/common/dictionarydata.h
@@ -0,0 +1,191 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* dictionarydata.h
+*
+* created on: 2012may31
+* created by: Markus W. Scherer & Maxime Serrano
+*/
+
+#ifndef __DICTIONARYDATA_H__
+#define __DICTIONARYDATA_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/utext.h"
+#include "unicode/udata.h"
+#include "udataswp.h"
+#include "unicode/uobject.h"
+#include "unicode/ustringtrie.h"
+
+U_NAMESPACE_BEGIN
+
+class UCharsTrie;
+class BytesTrie;
+
+class U_COMMON_API DictionaryData : public UMemory {
+public:
+ static const int32_t TRIE_TYPE_BYTES; // = 0;
+ static const int32_t TRIE_TYPE_UCHARS; // = 1;
+ static const int32_t TRIE_TYPE_MASK; // = 7;
+ static const int32_t TRIE_HAS_VALUES; // = 8;
+
+ static const int32_t TRANSFORM_NONE; // = 0;
+ static const int32_t TRANSFORM_TYPE_OFFSET; // = 0x1000000;
+ static const int32_t TRANSFORM_TYPE_MASK; // = 0x7f000000;
+ static const int32_t TRANSFORM_OFFSET_MASK; // = 0x1fffff;
+
+ enum {
+ // Byte offsets from the start of the data, after the generic header.
+ IX_STRING_TRIE_OFFSET,
+ IX_RESERVED1_OFFSET,
+ IX_RESERVED2_OFFSET,
+ IX_TOTAL_SIZE,
+
+ // Trie type: TRIE_HAS_VALUES | TRIE_TYPE_BYTES etc.
+ IX_TRIE_TYPE,
+ // Transform specification: TRANSFORM_TYPE_OFFSET | 0xe00 etc.
+ IX_TRANSFORM,
+
+ IX_RESERVED6,
+ IX_RESERVED7,
+ IX_COUNT
+ };
+};
+
+/**
+ * Wrapper class around generic dictionaries, implementing matches().
+ * getType() should return a TRIE_TYPE_??? constant from DictionaryData.
+ *
+ * All implementations of this interface must be thread-safe if they are to be used inside of the
+ * dictionary-based break iteration code.
+ */
+class U_COMMON_API DictionaryMatcher : public UMemory {
+public:
+ DictionaryMatcher() {}
+ virtual ~DictionaryMatcher();
+ // this should emulate CompactTrieDictionary::matches()
+ /* @param text The text in which to look for matching words. Matching begins
+ * at the current position of the UText.
+ * @param maxLength The max length of match to consider. Units are the native indexing
+ * units of the UText.
+ * @param limit Capacity of output arrays, which is also the maximum number of
+ * matching words to be found.
+ * @param lengths output array, filled with the lengths of the matches, in order,
+ * from shortest to longest. Lengths are in native indexing units
+ * of the UText. May be NULL.
+ * @param cpLengths output array, filled with the lengths of the matches, in order,
+ * from shortest to longest. Lengths are the number of Unicode code points.
+ * May be NULL.
+ * @param values Output array, filled with the values associated with the words found.
+ * May be NULL.
+ * @param prefix Output parameter, the code point length of the prefix match, even if that
+ * prefix didn't lead to a complete word. Will always be >= the cpLength
+ * of the longest complete word matched. May be NULL.
+ * @return Number of matching words found.
+ */
+ virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
+ int32_t *lengths, int32_t *cpLengths, int32_t *values,
+ int32_t *prefix) const = 0;
+
+ /** @return DictionaryData::TRIE_TYPE_XYZ */
+ virtual int32_t getType() const = 0;
+};
+
+// Implementation of the DictionaryMatcher interface for a UCharsTrie dictionary
+class U_COMMON_API UCharsDictionaryMatcher : public DictionaryMatcher {
+public:
+ // constructs a new UCharsDictionaryMatcher.
+ // The UDataMemory * will be closed on this object's destruction.
+ UCharsDictionaryMatcher(const UChar *c, UDataMemory *f) : characters(c), file(f) { }
+ virtual ~UCharsDictionaryMatcher();
+ virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
+ int32_t *lengths, int32_t *cpLengths, int32_t *values,
+ int32_t *prefix) const;
+ virtual int32_t getType() const;
+private:
+ const UChar *characters;
+ UDataMemory *file;
+};
+
+// Implementation of the DictionaryMatcher interface for a BytesTrie dictionary
+class U_COMMON_API BytesDictionaryMatcher : public DictionaryMatcher {
+public:
+ // constructs a new BytesTrieDictionaryMatcher
+ // the transform constant should be the constant read from the file, not a masked version!
+ // the UDataMemory * fed in here will be closed on this object's destruction
+ BytesDictionaryMatcher(const char *c, int32_t t, UDataMemory *f)
+ : characters(c), transformConstant(t), file(f) { }
+ virtual ~BytesDictionaryMatcher();
+ virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
+ int32_t *lengths, int32_t *cpLengths, int32_t *values,
+ int32_t *prefix) const;
+ virtual int32_t getType() const;
+private:
+ UChar32 transform(UChar32 c) const;
+
+ const char *characters;
+ int32_t transformConstant;
+ UDataMemory *file;
+};
+
+U_NAMESPACE_END
+
+U_CAPI int32_t U_EXPORT2
+udict_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode);
+
+/**
+ * Format of dictionary .dict data files.
+ * Format version 1.0.
+ *
+ * A dictionary .dict data file contains a byte-serialized BytesTrie or
+ * a UChars-serialized UCharsTrie.
+ * Such files are used in dictionary-based break iteration (DBBI).
+ *
+ * For a BytesTrie, a transformation type is specified for
+ * transforming Unicode strings into byte sequences.
+ *
+ * A .dict file begins with a standard ICU data file header
+ * (DataHeader, see ucmndata.h and unicode/udata.h).
+ * The UDataInfo.dataVersion field is currently unused (set to 0.0.0.0).
+ *
+ * After the header, the file contains the following parts.
+ * Constants are defined in the DictionaryData class.
+ *
+ * For the data structure of BytesTrie & UCharsTrie see
+ * http://site.icu-project.org/design/struct/tries
+ * and the bytestrie.h and ucharstrie.h header files.
+ *
+ * int32_t indexes[indexesLength]; -- indexesLength=indexes[IX_STRING_TRIE_OFFSET]/4;
+ *
+ * The first four indexes are byte offsets in ascending order.
+ * Each byte offset marks the start of the next part in the data file,
+ * and the end of the previous one.
+ * When two consecutive byte offsets are the same, then the corresponding part is empty.
+ * Byte offsets are offsets from after the header,
+ * that is, from the beginning of the indexes[].
+ * Each part starts at an offset with proper alignment for its data.
+ * If necessary, the previous part may include padding bytes to achieve this alignment.
+ *
+ * trieType=indexes[IX_TRIE_TYPE] defines the trie type.
+ * transform=indexes[IX_TRANSFORM] defines the Unicode-to-bytes transformation.
+ * If the transformation type is TRANSFORM_TYPE_OFFSET,
+ * then the lower 21 bits contain the offset code point.
+ * Each code point c is mapped to byte b = (c - offset).
+ * Code points outside the range offset..(offset+0xff) cannot be mapped
+ * and do not occur in the dictionary.
+ *
+ * stringTrie; -- a serialized BytesTrie or UCharsTrie
+ *
+ * The dictionary maps strings to specific values (TRIE_HAS_VALUES bit set in trieType),
+ * or it maps all strings to 0 (TRIE_HAS_VALUES bit not set).
+ */
+
+#endif /* !UCONFIG_NO_BREAK_ITERATION */
+#endif /* __DICTIONARYDATA_H__ */
diff --git a/thirdparty/icu4c/common/dtintrv.cpp b/thirdparty/icu4c/common/dtintrv.cpp
new file mode 100644
index 0000000000..80bb5d6dbd
--- /dev/null
+++ b/thirdparty/icu4c/common/dtintrv.cpp
@@ -0,0 +1,63 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*******************************************************************************
+* Copyright (C) 2008, International Business Machines Corporation and
+* others. All Rights Reserved.
+*******************************************************************************
+*
+* File DTINTRV.CPP
+*
+*******************************************************************************
+*/
+
+
+
+#include "unicode/dtintrv.h"
+
+
+U_NAMESPACE_BEGIN
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(DateInterval)
+
+//DateInterval::DateInterval(){}
+
+
+DateInterval::DateInterval(UDate from, UDate to)
+: fromDate(from),
+ toDate(to)
+{}
+
+
+DateInterval::~DateInterval(){}
+
+
+DateInterval::DateInterval(const DateInterval& other)
+: UObject(other) {
+ *this = other;
+}
+
+
+DateInterval&
+DateInterval::operator=(const DateInterval& other) {
+ if ( this != &other ) {
+ fromDate = other.fromDate;
+ toDate = other.toDate;
+ }
+ return *this;
+}
+
+
+DateInterval*
+DateInterval::clone() const {
+ return new DateInterval(*this);
+}
+
+
+UBool
+DateInterval::operator==(const DateInterval& other) const {
+ return ( fromDate == other.fromDate && toDate == other.toDate );
+}
+
+
+U_NAMESPACE_END
+
diff --git a/thirdparty/icu4c/common/edits.cpp b/thirdparty/icu4c/common/edits.cpp
new file mode 100644
index 0000000000..95f0c19a72
--- /dev/null
+++ b/thirdparty/icu4c/common/edits.cpp
@@ -0,0 +1,803 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// edits.cpp
+// created: 2017feb08 Markus W. Scherer
+
+#include "unicode/edits.h"
+#include "unicode/unistr.h"
+#include "unicode/utypes.h"
+#include "cmemory.h"
+#include "uassert.h"
+#include "util.h"
+
+U_NAMESPACE_BEGIN
+
+namespace {
+
+// 0000uuuuuuuuuuuu records u+1 unchanged text units.
+const int32_t MAX_UNCHANGED_LENGTH = 0x1000;
+const int32_t MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1;
+
+// 0mmmnnnccccccccc with m=1..6 records ccc+1 replacements of m:n text units.
+const int32_t MAX_SHORT_CHANGE_OLD_LENGTH = 6;
+const int32_t MAX_SHORT_CHANGE_NEW_LENGTH = 7;
+const int32_t SHORT_CHANGE_NUM_MASK = 0x1ff;
+const int32_t MAX_SHORT_CHANGE = 0x6fff;
+
+// 0111mmmmmmnnnnnn records a replacement of m text units with n.
+// m or n = 61: actual length follows in the next edits array unit.
+// m or n = 62..63: actual length follows in the next two edits array units.
+// Bit 30 of the actual length is in the head unit.
+// Trailing units have bit 15 set.
+const int32_t LENGTH_IN_1TRAIL = 61;
+const int32_t LENGTH_IN_2TRAIL = 62;
+
+} // namespace
+
+void Edits::releaseArray() U_NOEXCEPT {
+ if (array != stackArray) {
+ uprv_free(array);
+ }
+}
+
+Edits &Edits::copyArray(const Edits &other) {
+ if (U_FAILURE(errorCode_)) {
+ length = delta = numChanges = 0;
+ return *this;
+ }
+ if (length > capacity) {
+ uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)length * 2);
+ if (newArray == nullptr) {
+ length = delta = numChanges = 0;
+ errorCode_ = U_MEMORY_ALLOCATION_ERROR;
+ return *this;
+ }
+ releaseArray();
+ array = newArray;
+ capacity = length;
+ }
+ if (length > 0) {
+ uprv_memcpy(array, other.array, (size_t)length * 2);
+ }
+ return *this;
+}
+
+Edits &Edits::moveArray(Edits &src) U_NOEXCEPT {
+ if (U_FAILURE(errorCode_)) {
+ length = delta = numChanges = 0;
+ return *this;
+ }
+ releaseArray();
+ if (length > STACK_CAPACITY) {
+ array = src.array;
+ capacity = src.capacity;
+ src.array = src.stackArray;
+ src.capacity = STACK_CAPACITY;
+ src.reset();
+ return *this;
+ }
+ array = stackArray;
+ capacity = STACK_CAPACITY;
+ if (length > 0) {
+ uprv_memcpy(array, src.array, (size_t)length * 2);
+ }
+ return *this;
+}
+
+Edits &Edits::operator=(const Edits &other) {
+ length = other.length;
+ delta = other.delta;
+ numChanges = other.numChanges;
+ errorCode_ = other.errorCode_;
+ return copyArray(other);
+}
+
+Edits &Edits::operator=(Edits &&src) U_NOEXCEPT {
+ length = src.length;
+ delta = src.delta;
+ numChanges = src.numChanges;
+ errorCode_ = src.errorCode_;
+ return moveArray(src);
+}
+
+Edits::~Edits() {
+ releaseArray();
+}
+
+void Edits::reset() U_NOEXCEPT {
+ length = delta = numChanges = 0;
+ errorCode_ = U_ZERO_ERROR;
+}
+
+void Edits::addUnchanged(int32_t unchangedLength) {
+ if(U_FAILURE(errorCode_) || unchangedLength == 0) { return; }
+ if(unchangedLength < 0) {
+ errorCode_ = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ // Merge into previous unchanged-text record, if any.
+ int32_t last = lastUnit();
+ if(last < MAX_UNCHANGED) {
+ int32_t remaining = MAX_UNCHANGED - last;
+ if (remaining >= unchangedLength) {
+ setLastUnit(last + unchangedLength);
+ return;
+ }
+ setLastUnit(MAX_UNCHANGED);
+ unchangedLength -= remaining;
+ }
+ // Split large lengths into multiple units.
+ while(unchangedLength >= MAX_UNCHANGED_LENGTH) {
+ append(MAX_UNCHANGED);
+ unchangedLength -= MAX_UNCHANGED_LENGTH;
+ }
+ // Write a small (remaining) length.
+ if(unchangedLength > 0) {
+ append(unchangedLength - 1);
+ }
+}
+
+void Edits::addReplace(int32_t oldLength, int32_t newLength) {
+ if(U_FAILURE(errorCode_)) { return; }
+ if(oldLength < 0 || newLength < 0) {
+ errorCode_ = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ if (oldLength == 0 && newLength == 0) {
+ return;
+ }
+ ++numChanges;
+ int32_t newDelta = newLength - oldLength;
+ if (newDelta != 0) {
+ if ((newDelta > 0 && delta >= 0 && newDelta > (INT32_MAX - delta)) ||
+ (newDelta < 0 && delta < 0 && newDelta < (INT32_MIN - delta))) {
+ // Integer overflow or underflow.
+ errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
+ return;
+ }
+ delta += newDelta;
+ }
+
+ if(0 < oldLength && oldLength <= MAX_SHORT_CHANGE_OLD_LENGTH &&
+ newLength <= MAX_SHORT_CHANGE_NEW_LENGTH) {
+ // Merge into previous same-lengths short-replacement record, if any.
+ int32_t u = (oldLength << 12) | (newLength << 9);
+ int32_t last = lastUnit();
+ if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
+ (last & ~SHORT_CHANGE_NUM_MASK) == u &&
+ (last & SHORT_CHANGE_NUM_MASK) < SHORT_CHANGE_NUM_MASK) {
+ setLastUnit(last + 1);
+ return;
+ }
+ append(u);
+ return;
+ }
+
+ int32_t head = 0x7000;
+ if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) {
+ head |= oldLength << 6;
+ head |= newLength;
+ append(head);
+ } else if ((capacity - length) >= 5 || growArray()) {
+ int32_t limit = length + 1;
+ if(oldLength < LENGTH_IN_1TRAIL) {
+ head |= oldLength << 6;
+ } else if(oldLength <= 0x7fff) {
+ head |= LENGTH_IN_1TRAIL << 6;
+ array[limit++] = (uint16_t)(0x8000 | oldLength);
+ } else {
+ head |= (LENGTH_IN_2TRAIL + (oldLength >> 30)) << 6;
+ array[limit++] = (uint16_t)(0x8000 | (oldLength >> 15));
+ array[limit++] = (uint16_t)(0x8000 | oldLength);
+ }
+ if(newLength < LENGTH_IN_1TRAIL) {
+ head |= newLength;
+ } else if(newLength <= 0x7fff) {
+ head |= LENGTH_IN_1TRAIL;
+ array[limit++] = (uint16_t)(0x8000 | newLength);
+ } else {
+ head |= LENGTH_IN_2TRAIL + (newLength >> 30);
+ array[limit++] = (uint16_t)(0x8000 | (newLength >> 15));
+ array[limit++] = (uint16_t)(0x8000 | newLength);
+ }
+ array[length] = (uint16_t)head;
+ length = limit;
+ }
+}
+
+void Edits::append(int32_t r) {
+ if(length < capacity || growArray()) {
+ array[length++] = (uint16_t)r;
+ }
+}
+
+UBool Edits::growArray() {
+ int32_t newCapacity;
+ if (array == stackArray) {
+ newCapacity = 2000;
+ } else if (capacity == INT32_MAX) {
+ // Not U_BUFFER_OVERFLOW_ERROR because that could be confused on a string transform API
+ // with a result-string-buffer overflow.
+ errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
+ return FALSE;
+ } else if (capacity >= (INT32_MAX / 2)) {
+ newCapacity = INT32_MAX;
+ } else {
+ newCapacity = 2 * capacity;
+ }
+ // Grow by at least 5 units so that a maximal change record will fit.
+ if ((newCapacity - capacity) < 5) {
+ errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
+ return FALSE;
+ }
+ uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)newCapacity * 2);
+ if (newArray == NULL) {
+ errorCode_ = U_MEMORY_ALLOCATION_ERROR;
+ return FALSE;
+ }
+ uprv_memcpy(newArray, array, (size_t)length * 2);
+ releaseArray();
+ array = newArray;
+ capacity = newCapacity;
+ return TRUE;
+}
+
+UBool Edits::copyErrorTo(UErrorCode &outErrorCode) const {
+ if (U_FAILURE(outErrorCode)) { return TRUE; }
+ if (U_SUCCESS(errorCode_)) { return FALSE; }
+ outErrorCode = errorCode_;
+ return TRUE;
+}
+
+Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode) {
+ if (copyErrorTo(errorCode)) { return *this; }
+ // Picture string a --(Edits ab)--> string b --(Edits bc)--> string c.
+ // Parallel iteration over both Edits.
+ Iterator abIter = ab.getFineIterator();
+ Iterator bcIter = bc.getFineIterator();
+ UBool abHasNext = TRUE, bcHasNext = TRUE;
+ // Copy iterator state into local variables, so that we can modify and subdivide spans.
+ // ab old & new length, bc old & new length
+ int32_t aLength = 0, ab_bLength = 0, bc_bLength = 0, cLength = 0;
+ // When we have different-intermediate-length changes, we accumulate a larger change.
+ int32_t pending_aLength = 0, pending_cLength = 0;
+ for (;;) {
+ // At this point, for each of the two iterators:
+ // Either we are done with the locally cached current edit,
+ // and its intermediate-string length has been reset,
+ // or we will continue to work with a truncated remainder of this edit.
+ //
+ // If the current edit is done, and the iterator has not yet reached the end,
+ // then we fetch the next edit. This is true for at least one of the iterators.
+ //
+ // Normally it does not matter whether we fetch from ab and then bc or vice versa.
+ // However, the result is observably different when
+ // ab deletions meet bc insertions at the same intermediate-string index.
+ // Some users expect the bc insertions to come first, so we fetch from bc first.
+ if (bc_bLength == 0) {
+ if (bcHasNext && (bcHasNext = bcIter.next(errorCode)) != 0) {
+ bc_bLength = bcIter.oldLength();
+ cLength = bcIter.newLength();
+ if (bc_bLength == 0) {
+ // insertion
+ if (ab_bLength == 0 || !abIter.hasChange()) {
+ addReplace(pending_aLength, pending_cLength + cLength);
+ pending_aLength = pending_cLength = 0;
+ } else {
+ pending_cLength += cLength;
+ }
+ continue;
+ }
+ }
+ // else see if the other iterator is done, too.
+ }
+ if (ab_bLength == 0) {
+ if (abHasNext && (abHasNext = abIter.next(errorCode)) != 0) {
+ aLength = abIter.oldLength();
+ ab_bLength = abIter.newLength();
+ if (ab_bLength == 0) {
+ // deletion
+ if (bc_bLength == bcIter.oldLength() || !bcIter.hasChange()) {
+ addReplace(pending_aLength + aLength, pending_cLength);
+ pending_aLength = pending_cLength = 0;
+ } else {
+ pending_aLength += aLength;
+ }
+ continue;
+ }
+ } else if (bc_bLength == 0) {
+ // Both iterators are done at the same time:
+ // The intermediate-string lengths match.
+ break;
+ } else {
+ // The ab output string is shorter than the bc input string.
+ if (!copyErrorTo(errorCode)) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return *this;
+ }
+ }
+ if (bc_bLength == 0) {
+ // The bc input string is shorter than the ab output string.
+ if (!copyErrorTo(errorCode)) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return *this;
+ }
+ // Done fetching: ab_bLength > 0 && bc_bLength > 0
+
+ // The current state has two parts:
+ // - Past: We accumulate a longer ac edit in the "pending" variables.
+ // - Current: We have copies of the current ab/bc edits in local variables.
+ // At least one side is newly fetched.
+ // One side might be a truncated remainder of an edit we fetched earlier.
+
+ if (!abIter.hasChange() && !bcIter.hasChange()) {
+ // An unchanged span all the way from string a to string c.
+ if (pending_aLength != 0 || pending_cLength != 0) {
+ addReplace(pending_aLength, pending_cLength);
+ pending_aLength = pending_cLength = 0;
+ }
+ int32_t unchangedLength = aLength <= cLength ? aLength : cLength;
+ addUnchanged(unchangedLength);
+ ab_bLength = aLength -= unchangedLength;
+ bc_bLength = cLength -= unchangedLength;
+ // At least one of the unchanged spans is now empty.
+ continue;
+ }
+ if (!abIter.hasChange() && bcIter.hasChange()) {
+ // Unchanged a->b but changed b->c.
+ if (ab_bLength >= bc_bLength) {
+ // Split the longer unchanged span into change + remainder.
+ addReplace(pending_aLength + bc_bLength, pending_cLength + cLength);
+ pending_aLength = pending_cLength = 0;
+ aLength = ab_bLength -= bc_bLength;
+ bc_bLength = 0;
+ continue;
+ }
+ // Handle the shorter unchanged span below like a change.
+ } else if (abIter.hasChange() && !bcIter.hasChange()) {
+ // Changed a->b and then unchanged b->c.
+ if (ab_bLength <= bc_bLength) {
+ // Split the longer unchanged span into change + remainder.
+ addReplace(pending_aLength + aLength, pending_cLength + ab_bLength);
+ pending_aLength = pending_cLength = 0;
+ cLength = bc_bLength -= ab_bLength;
+ ab_bLength = 0;
+ continue;
+ }
+ // Handle the shorter unchanged span below like a change.
+ } else { // both abIter.hasChange() && bcIter.hasChange()
+ if (ab_bLength == bc_bLength) {
+ // Changes on both sides up to the same position. Emit & reset.
+ addReplace(pending_aLength + aLength, pending_cLength + cLength);
+ pending_aLength = pending_cLength = 0;
+ ab_bLength = bc_bLength = 0;
+ continue;
+ }
+ }
+ // Accumulate the a->c change, reset the shorter side,
+ // keep a remainder of the longer one.
+ pending_aLength += aLength;
+ pending_cLength += cLength;
+ if (ab_bLength < bc_bLength) {
+ bc_bLength -= ab_bLength;
+ cLength = ab_bLength = 0;
+ } else { // ab_bLength > bc_bLength
+ ab_bLength -= bc_bLength;
+ aLength = bc_bLength = 0;
+ }
+ }
+ if (pending_aLength != 0 || pending_cLength != 0) {
+ addReplace(pending_aLength, pending_cLength);
+ }
+ copyErrorTo(errorCode);
+ return *this;
+}
+
+Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) :
+ array(a), index(0), length(len), remaining(0),
+ onlyChanges_(oc), coarse(crs),
+ dir(0), changed(FALSE), oldLength_(0), newLength_(0),
+ srcIndex(0), replIndex(0), destIndex(0) {}
+
+int32_t Edits::Iterator::readLength(int32_t head) {
+ if (head < LENGTH_IN_1TRAIL) {
+ return head;
+ } else if (head < LENGTH_IN_2TRAIL) {
+ U_ASSERT(index < length);
+ U_ASSERT(array[index] >= 0x8000);
+ return array[index++] & 0x7fff;
+ } else {
+ U_ASSERT((index + 2) <= length);
+ U_ASSERT(array[index] >= 0x8000);
+ U_ASSERT(array[index + 1] >= 0x8000);
+ int32_t len = ((head & 1) << 30) |
+ ((int32_t)(array[index] & 0x7fff) << 15) |
+ (array[index + 1] & 0x7fff);
+ index += 2;
+ return len;
+ }
+}
+
+void Edits::Iterator::updateNextIndexes() {
+ srcIndex += oldLength_;
+ if (changed) {
+ replIndex += newLength_;
+ }
+ destIndex += newLength_;
+}
+
+void Edits::Iterator::updatePreviousIndexes() {
+ srcIndex -= oldLength_;
+ if (changed) {
+ replIndex -= newLength_;
+ }
+ destIndex -= newLength_;
+}
+
+UBool Edits::Iterator::noNext() {
+ // No change before or beyond the string.
+ dir = 0;
+ changed = FALSE;
+ oldLength_ = newLength_ = 0;
+ return FALSE;
+}
+
+UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
+ // Forward iteration: Update the string indexes to the limit of the current span,
+ // and post-increment-read array units to assemble a new span.
+ // Leaves the array index one after the last unit of that span.
+ if (U_FAILURE(errorCode)) { return FALSE; }
+ // We have an errorCode in case we need to start guarding against integer overflows.
+ // It is also convenient for caller loops if we bail out when an error was set elsewhere.
+ if (dir > 0) {
+ updateNextIndexes();
+ } else {
+ if (dir < 0) {
+ // Turn around from previous() to next().
+ // Post-increment-read the same span again.
+ if (remaining > 0) {
+ // Fine-grained iterator:
+ // Stay on the current one of a sequence of compressed changes.
+ ++index; // next() rests on the index after the sequence unit.
+ dir = 1;
+ return TRUE;
+ }
+ }
+ dir = 1;
+ }
+ if (remaining >= 1) {
+ // Fine-grained iterator: Continue a sequence of compressed changes.
+ if (remaining > 1) {
+ --remaining;
+ return TRUE;
+ }
+ remaining = 0;
+ }
+ if (index >= length) {
+ return noNext();
+ }
+ int32_t u = array[index++];
+ if (u <= MAX_UNCHANGED) {
+ // Combine adjacent unchanged ranges.
+ changed = FALSE;
+ oldLength_ = u + 1;
+ while (index < length && (u = array[index]) <= MAX_UNCHANGED) {
+ ++index;
+ oldLength_ += u + 1;
+ }
+ newLength_ = oldLength_;
+ if (onlyChanges) {
+ updateNextIndexes();
+ if (index >= length) {
+ return noNext();
+ }
+ // already fetched u > MAX_UNCHANGED at index
+ ++index;
+ } else {
+ return TRUE;
+ }
+ }
+ changed = TRUE;
+ if (u <= MAX_SHORT_CHANGE) {
+ int32_t oldLen = u >> 12;
+ int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
+ int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
+ if (coarse) {
+ oldLength_ = num * oldLen;
+ newLength_ = num * newLen;
+ } else {
+ // Split a sequence of changes that was compressed into one unit.
+ oldLength_ = oldLen;
+ newLength_ = newLen;
+ if (num > 1) {
+ remaining = num; // This is the first of two or more changes.
+ }
+ return TRUE;
+ }
+ } else {
+ U_ASSERT(u <= 0x7fff);
+ oldLength_ = readLength((u >> 6) & 0x3f);
+ newLength_ = readLength(u & 0x3f);
+ if (!coarse) {
+ return TRUE;
+ }
+ }
+ // Combine adjacent changes.
+ while (index < length && (u = array[index]) > MAX_UNCHANGED) {
+ ++index;
+ if (u <= MAX_SHORT_CHANGE) {
+ int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
+ oldLength_ += (u >> 12) * num;
+ newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num;
+ } else {
+ U_ASSERT(u <= 0x7fff);
+ oldLength_ += readLength((u >> 6) & 0x3f);
+ newLength_ += readLength(u & 0x3f);
+ }
+ }
+ return TRUE;
+}
+
+UBool Edits::Iterator::previous(UErrorCode &errorCode) {
+ // Backward iteration: Pre-decrement-read array units to assemble a new span,
+ // then update the string indexes to the start of that span.
+ // Leaves the array index on the head unit of that span.
+ if (U_FAILURE(errorCode)) { return FALSE; }
+ // We have an errorCode in case we need to start guarding against integer overflows.
+ // It is also convenient for caller loops if we bail out when an error was set elsewhere.
+ if (dir >= 0) {
+ if (dir > 0) {
+ // Turn around from next() to previous().
+ // Set the string indexes to the span limit and
+ // pre-decrement-read the same span again.
+ if (remaining > 0) {
+ // Fine-grained iterator:
+ // Stay on the current one of a sequence of compressed changes.
+ --index; // previous() rests on the sequence unit.
+ dir = -1;
+ return TRUE;
+ }
+ updateNextIndexes();
+ }
+ dir = -1;
+ }
+ if (remaining > 0) {
+ // Fine-grained iterator: Continue a sequence of compressed changes.
+ int32_t u = array[index];
+ U_ASSERT(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE);
+ if (remaining <= (u & SHORT_CHANGE_NUM_MASK)) {
+ ++remaining;
+ updatePreviousIndexes();
+ return TRUE;
+ }
+ remaining = 0;
+ }
+ if (index <= 0) {
+ return noNext();
+ }
+ int32_t u = array[--index];
+ if (u <= MAX_UNCHANGED) {
+ // Combine adjacent unchanged ranges.
+ changed = FALSE;
+ oldLength_ = u + 1;
+ while (index > 0 && (u = array[index - 1]) <= MAX_UNCHANGED) {
+ --index;
+ oldLength_ += u + 1;
+ }
+ newLength_ = oldLength_;
+ // No need to handle onlyChanges as long as previous() is called only from findIndex().
+ updatePreviousIndexes();
+ return TRUE;
+ }
+ changed = TRUE;
+ if (u <= MAX_SHORT_CHANGE) {
+ int32_t oldLen = u >> 12;
+ int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
+ int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
+ if (coarse) {
+ oldLength_ = num * oldLen;
+ newLength_ = num * newLen;
+ } else {
+ // Split a sequence of changes that was compressed into one unit.
+ oldLength_ = oldLen;
+ newLength_ = newLen;
+ if (num > 1) {
+ remaining = 1; // This is the last of two or more changes.
+ }
+ updatePreviousIndexes();
+ return TRUE;
+ }
+ } else {
+ if (u <= 0x7fff) {
+ // The change is encoded in u alone.
+ oldLength_ = readLength((u >> 6) & 0x3f);
+ newLength_ = readLength(u & 0x3f);
+ } else {
+ // Back up to the head of the change, read the lengths,
+ // and reset the index to the head again.
+ U_ASSERT(index > 0);
+ while ((u = array[--index]) > 0x7fff) {}
+ U_ASSERT(u > MAX_SHORT_CHANGE);
+ int32_t headIndex = index++;
+ oldLength_ = readLength((u >> 6) & 0x3f);
+ newLength_ = readLength(u & 0x3f);
+ index = headIndex;
+ }
+ if (!coarse) {
+ updatePreviousIndexes();
+ return TRUE;
+ }
+ }
+ // Combine adjacent changes.
+ while (index > 0 && (u = array[index - 1]) > MAX_UNCHANGED) {
+ --index;
+ if (u <= MAX_SHORT_CHANGE) {
+ int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
+ oldLength_ += (u >> 12) * num;
+ newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num;
+ } else if (u <= 0x7fff) {
+ // Read the lengths, and reset the index to the head again.
+ int32_t headIndex = index++;
+ oldLength_ += readLength((u >> 6) & 0x3f);
+ newLength_ += readLength(u & 0x3f);
+ index = headIndex;
+ }
+ }
+ updatePreviousIndexes();
+ return TRUE;
+}
+
+int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode) || i < 0) { return -1; }
+ int32_t spanStart, spanLength;
+ if (findSource) { // find source index
+ spanStart = srcIndex;
+ spanLength = oldLength_;
+ } else { // find destination index
+ spanStart = destIndex;
+ spanLength = newLength_;
+ }
+ if (i < spanStart) {
+ if (i >= (spanStart / 2)) {
+ // Search backwards.
+ for (;;) {
+ UBool hasPrevious = previous(errorCode);
+ U_ASSERT(hasPrevious); // because i>=0 and the first span starts at 0
+ (void)hasPrevious; // avoid unused-variable warning
+ spanStart = findSource ? srcIndex : destIndex;
+ if (i >= spanStart) {
+ // The index is in the current span.
+ return 0;
+ }
+ if (remaining > 0) {
+ // Is the index in one of the remaining compressed edits?
+ // spanStart is the start of the current span, first of the remaining ones.
+ spanLength = findSource ? oldLength_ : newLength_;
+ int32_t u = array[index];
+ U_ASSERT(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE);
+ int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1 - remaining;
+ int32_t len = num * spanLength;
+ if (i >= (spanStart - len)) {
+ int32_t n = ((spanStart - i - 1) / spanLength) + 1;
+ // 1 <= n <= num
+ srcIndex -= n * oldLength_;
+ replIndex -= n * newLength_;
+ destIndex -= n * newLength_;
+ remaining += n;
+ return 0;
+ }
+ // Skip all of these edits at once.
+ srcIndex -= num * oldLength_;
+ replIndex -= num * newLength_;
+ destIndex -= num * newLength_;
+ remaining = 0;
+ }
+ }
+ }
+ // Reset the iterator to the start.
+ dir = 0;
+ index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
+ } else if (i < (spanStart + spanLength)) {
+ // The index is in the current span.
+ return 0;
+ }
+ while (next(FALSE, errorCode)) {
+ if (findSource) {
+ spanStart = srcIndex;
+ spanLength = oldLength_;
+ } else {
+ spanStart = destIndex;
+ spanLength = newLength_;
+ }
+ if (i < (spanStart + spanLength)) {
+ // The index is in the current span.
+ return 0;
+ }
+ if (remaining > 1) {
+ // Is the index in one of the remaining compressed edits?
+ // spanStart is the start of the current span, first of the remaining ones.
+ int32_t len = remaining * spanLength;
+ if (i < (spanStart + len)) {
+ int32_t n = (i - spanStart) / spanLength; // 1 <= n <= remaining - 1
+ srcIndex += n * oldLength_;
+ replIndex += n * newLength_;
+ destIndex += n * newLength_;
+ remaining -= n;
+ return 0;
+ }
+ // Make next() skip all of these edits at once.
+ oldLength_ *= remaining;
+ newLength_ *= remaining;
+ remaining = 0;
+ }
+ }
+ return 1;
+}
+
+int32_t Edits::Iterator::destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode) {
+ int32_t where = findIndex(i, TRUE, errorCode);
+ if (where < 0) {
+ // Error or before the string.
+ return 0;
+ }
+ if (where > 0 || i == srcIndex) {
+ // At or after string length, or at start of the found span.
+ return destIndex;
+ }
+ if (changed) {
+ // In a change span, map to its end.
+ return destIndex + newLength_;
+ } else {
+ // In an unchanged span, offset 1:1 within it.
+ return destIndex + (i - srcIndex);
+ }
+}
+
+int32_t Edits::Iterator::sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode) {
+ int32_t where = findIndex(i, FALSE, errorCode);
+ if (where < 0) {
+ // Error or before the string.
+ return 0;
+ }
+ if (where > 0 || i == destIndex) {
+ // At or after string length, or at start of the found span.
+ return srcIndex;
+ }
+ if (changed) {
+ // In a change span, map to its end.
+ return srcIndex + oldLength_;
+ } else {
+ // In an unchanged span, offset within it.
+ return srcIndex + (i - destIndex);
+ }
+}
+
+UnicodeString& Edits::Iterator::toString(UnicodeString& sb) const {
+ sb.append(u"{ src[", -1);
+ ICU_Utility::appendNumber(sb, srcIndex);
+ sb.append(u"..", -1);
+ ICU_Utility::appendNumber(sb, srcIndex + oldLength_);
+ if (changed) {
+ sb.append(u"] ⇠dest[", -1);
+ } else {
+ sb.append(u"] ≡ dest[", -1);
+ }
+ ICU_Utility::appendNumber(sb, destIndex);
+ sb.append(u"..", -1);
+ ICU_Utility::appendNumber(sb, destIndex + newLength_);
+ if (changed) {
+ sb.append(u"], repl[", -1);
+ ICU_Utility::appendNumber(sb, replIndex);
+ sb.append(u"..", -1);
+ ICU_Utility::appendNumber(sb, replIndex + newLength_);
+ sb.append(u"] }", -1);
+ } else {
+ sb.append(u"] (no-change) }", -1);
+ }
+ return sb;
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/errorcode.cpp b/thirdparty/icu4c/common/errorcode.cpp
new file mode 100644
index 0000000000..e7ac43b527
--- /dev/null
+++ b/thirdparty/icu4c/common/errorcode.cpp
@@ -0,0 +1,42 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2009-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: errorcode.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2009mar10
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/errorcode.h"
+
+U_NAMESPACE_BEGIN
+
+ErrorCode::~ErrorCode() {}
+
+UErrorCode ErrorCode::reset() {
+ UErrorCode code = errorCode;
+ errorCode = U_ZERO_ERROR;
+ return code;
+}
+
+void ErrorCode::assertSuccess() const {
+ if(isFailure()) {
+ handleFailure();
+ }
+}
+
+const char* ErrorCode::errorName() const {
+ return u_errorName(errorCode);
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/filteredbrk.cpp b/thirdparty/icu4c/common/filteredbrk.cpp
new file mode 100644
index 0000000000..c07128cbce
--- /dev/null
+++ b/thirdparty/icu4c/common/filteredbrk.cpp
@@ -0,0 +1,710 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2014-2015, International Business Machines Corporation and
+* others. All Rights Reserved.
+*******************************************************************************
+*/
+
+#include "unicode/utypes.h"
+#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
+
+#include "cmemory.h"
+
+#include "unicode/filteredbrk.h"
+#include "unicode/ucharstriebuilder.h"
+#include "unicode/ures.h"
+
+#include "uresimp.h" // ures_getByKeyWithFallback
+#include "ubrkimpl.h" // U_ICUDATA_BRKITR
+#include "uvector.h"
+#include "cmemory.h"
+
+U_NAMESPACE_BEGIN
+
+#ifndef FB_DEBUG
+#define FB_DEBUG 0
+#endif
+
+#if FB_DEBUG
+#include <stdio.h>
+static void _fb_trace(const char *m, const UnicodeString *s, UBool b, int32_t d, const char *f, int l) {
+ char buf[2048];
+ if(s) {
+ s->extract(0,s->length(),buf,2048);
+ } else {
+ strcpy(buf,"NULL");
+ }
+ fprintf(stderr,"%s:%d: %s. s='%s'(%p), b=%c, d=%d\n",
+ f, l, m, buf, (const void*)s, b?'T':'F',(int)d);
+}
+
+#define FB_TRACE(m,s,b,d) _fb_trace(m,s,b,d,__FILE__,__LINE__)
+#else
+#define FB_TRACE(m,s,b,d)
+#endif
+
+/**
+ * Used with sortedInsert()
+ */
+static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
+ const UnicodeString &a = *(const UnicodeString*)t1.pointer;
+ const UnicodeString &b = *(const UnicodeString*)t2.pointer;
+ return a.compare(b);
+}
+
+/**
+ * A UVector which implements a set of strings.
+ */
+class U_COMMON_API UStringSet : public UVector {
+ public:
+ UStringSet(UErrorCode &status) : UVector(uprv_deleteUObject,
+ uhash_compareUnicodeString,
+ 1,
+ status) {}
+ virtual ~UStringSet();
+ /**
+ * Is this UnicodeSet contained?
+ */
+ inline UBool contains(const UnicodeString& s) {
+ return contains((void*) &s);
+ }
+ using UVector::contains;
+ /**
+ * Return the ith UnicodeString alias
+ */
+ inline const UnicodeString* getStringAt(int32_t i) const {
+ return (const UnicodeString*)elementAt(i);
+ }
+ /**
+ * Adopt the UnicodeString if not already contained.
+ * Caller no longer owns the pointer in any case.
+ * @return true if adopted successfully, false otherwise (error, or else duplicate)
+ */
+ inline UBool adopt(UnicodeString *str, UErrorCode &status) {
+ if(U_FAILURE(status) || contains(*str)) {
+ delete str;
+ return false;
+ } else {
+ sortedInsert(str, compareUnicodeString, status);
+ if(U_FAILURE(status)) {
+ delete str;
+ return false;
+ }
+ return true;
+ }
+ }
+ /**
+ * Add by value.
+ * @return true if successfully adopted.
+ */
+ inline UBool add(const UnicodeString& str, UErrorCode &status) {
+ if(U_FAILURE(status)) return false;
+ UnicodeString *t = new UnicodeString(str);
+ if(t==NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR; return false;
+ }
+ return adopt(t, status);
+ }
+ /**
+ * Remove this string.
+ * @return true if successfully removed, false otherwise (error, or else it wasn't there)
+ */
+ inline UBool remove(const UnicodeString &s, UErrorCode &status) {
+ if(U_FAILURE(status)) return false;
+ return removeElement((void*) &s);
+ }
+};
+
+/**
+ * Virtual, won't be inlined
+ */
+UStringSet::~UStringSet() {}
+
+/* ----------------------------------------------------------- */
+
+
+/* Filtered Break constants */
+static const int32_t kPARTIAL = (1<<0); //< partial - need to run through forward trie
+static const int32_t kMATCH = (1<<1); //< exact match - skip this one.
+static const int32_t kSuppressInReverse = (1<<0);
+static const int32_t kAddToForward = (1<<1);
+static const UChar kFULLSTOP = 0x002E; // '.'
+
+/**
+ * Shared data for SimpleFilteredSentenceBreakIterator
+ */
+class SimpleFilteredSentenceBreakData : public UMemory {
+public:
+ SimpleFilteredSentenceBreakData(UCharsTrie *forwards, UCharsTrie *backwards )
+ : fForwardsPartialTrie(forwards), fBackwardsTrie(backwards), refcount(1) { }
+ SimpleFilteredSentenceBreakData *incr() { refcount++; return this; }
+ SimpleFilteredSentenceBreakData *decr() { if((--refcount) <= 0) delete this; return 0; }
+ virtual ~SimpleFilteredSentenceBreakData();
+
+ LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M."
+ LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs.
+ int32_t refcount;
+};
+
+SimpleFilteredSentenceBreakData::~SimpleFilteredSentenceBreakData() {}
+
+/**
+ * Concrete implementation
+ */
+class SimpleFilteredSentenceBreakIterator : public BreakIterator {
+public:
+ SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status);
+ SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other);
+ virtual ~SimpleFilteredSentenceBreakIterator();
+private:
+ SimpleFilteredSentenceBreakData *fData;
+ LocalPointer<BreakIterator> fDelegate;
+ LocalUTextPointer fText;
+
+ /* -- subclass interface -- */
+public:
+ /* -- cloning and other subclass stuff -- */
+ virtual BreakIterator * createBufferClone(void * /*stackBuffer*/,
+ int32_t &/*BufferSize*/,
+ UErrorCode &status) {
+ // for now - always deep clone
+ status = U_SAFECLONE_ALLOCATED_WARNING;
+ return clone();
+ }
+ virtual SimpleFilteredSentenceBreakIterator* clone() const { return new SimpleFilteredSentenceBreakIterator(*this); }
+ virtual UClassID getDynamicClassID(void) const { return NULL; }
+ virtual UBool operator==(const BreakIterator& o) const { if(this==&o) return true; return false; }
+
+ /* -- text modifying -- */
+ virtual void setText(UText *text, UErrorCode &status) { fDelegate->setText(text,status); }
+ virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) { fDelegate->refreshInputText(input,status); return *this; }
+ virtual void adoptText(CharacterIterator* it) { fDelegate->adoptText(it); }
+ virtual void setText(const UnicodeString &text) { fDelegate->setText(text); }
+
+ /* -- other functions that are just delegated -- */
+ virtual UText *getUText(UText *fillIn, UErrorCode &status) const { return fDelegate->getUText(fillIn,status); }
+ virtual CharacterIterator& getText(void) const { return fDelegate->getText(); }
+
+ /* -- ITERATION -- */
+ virtual int32_t first(void);
+ virtual int32_t preceding(int32_t offset);
+ virtual int32_t previous(void);
+ virtual UBool isBoundary(int32_t offset);
+ virtual int32_t current(void) const { return fDelegate->current(); } // we keep the delegate current, so this should be correct.
+
+ virtual int32_t next(void);
+
+ virtual int32_t next(int32_t n);
+ virtual int32_t following(int32_t offset);
+ virtual int32_t last(void);
+
+private:
+ /**
+ * Given that the fDelegate has already given its "initial" answer,
+ * find the NEXT actual (non-excepted) break.
+ * @param n initial position from delegate
+ * @return new break position or UBRK_DONE
+ */
+ int32_t internalNext(int32_t n);
+ /**
+ * Given that the fDelegate has already given its "initial" answer,
+ * find the PREV actual (non-excepted) break.
+ * @param n initial position from delegate
+ * @return new break position or UBRK_DONE
+ */
+ int32_t internalPrev(int32_t n);
+ /**
+ * set up the UText with the value of the fDelegate.
+ * Call this before calling breakExceptionAt.
+ * May be able to avoid excess calls
+ */
+ void resetState(UErrorCode &status);
+ /**
+ * Is there a match (exception) at this spot?
+ */
+ enum EFBMatchResult { kNoExceptionHere, kExceptionHere };
+ /**
+ * Determine if there is an exception at this spot
+ * @param n spot to check
+ * @return kNoExceptionHere or kExceptionHere
+ **/
+ enum EFBMatchResult breakExceptionAt(int32_t n);
+};
+
+SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other)
+ : BreakIterator(other), fData(other.fData->incr()), fDelegate(other.fDelegate->clone())
+{
+}
+
+
+SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status) :
+ BreakIterator(adopt->getLocale(ULOC_VALID_LOCALE,status),adopt->getLocale(ULOC_ACTUAL_LOCALE,status)),
+ fData(new SimpleFilteredSentenceBreakData(forwards, backwards)),
+ fDelegate(adopt)
+{
+ // all set..
+}
+
+SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() {
+ fData = fData->decr();
+}
+
+void SimpleFilteredSentenceBreakIterator::resetState(UErrorCode &status) {
+ fText.adoptInstead(fDelegate->getUText(fText.orphan(), status));
+}
+
+SimpleFilteredSentenceBreakIterator::EFBMatchResult
+SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) {
+ int64_t bestPosn = -1;
+ int32_t bestValue = -1;
+ // loops while 'n' points to an exception.
+ utext_setNativeIndex(fText.getAlias(), n); // from n..
+ fData->fBackwardsTrie->reset();
+ UChar32 uch;
+
+ //if(debug2) u_printf(" n@ %d\n", n);
+ // Assume a space is following the '.' (so we handle the case: "Mr. /Brown")
+ if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) { // TODO: skip a class of chars here??
+ // TODO only do this the 1st time?
+ //if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch);
+ } else {
+ //if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch);
+ uch = utext_next32(fText.getAlias());
+ //if(debug2) u_printf(" -> : |%C| \n", (UChar)uch);
+ }
+
+ UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE;
+
+ while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL && // more to consume backwards and..
+ USTRINGTRIE_HAS_NEXT(r=fData->fBackwardsTrie->nextForCodePoint(uch))) {// more in the trie
+ if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
+ bestPosn = utext_getNativeIndex(fText.getAlias());
+ bestValue = fData->fBackwardsTrie->getValue();
+ }
+ //if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias()));
+ }
+
+ if(USTRINGTRIE_MATCHES(r)) { // exact match?
+ //if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
+ bestValue = fData->fBackwardsTrie->getValue();
+ bestPosn = utext_getNativeIndex(fText.getAlias());
+ //if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
+ }
+
+ if(bestPosn>=0) {
+ //if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
+
+ //if(USTRINGTRIE_MATCHES(r)) { // matched - so, now what?
+ //int32_t bestValue = fBackwardsTrie->getValue();
+ ////if(debug2) u_printf("rev< /%C/ matched, skip..%d bestValue=%d\n", (UChar)uch, r, bestValue);
+
+ if(bestValue == kMATCH) { // exact match!
+ //if(debug2) u_printf(" exact backward match\n");
+ return kExceptionHere; // See if the next is another exception.
+ } else if(bestValue == kPARTIAL
+ && fData->fForwardsPartialTrie.isValid()) { // make sure there's a forward trie
+ //if(debug2) u_printf(" partial backward match\n");
+ // We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
+ // to see if it matches something going forward.
+ fData->fForwardsPartialTrie->reset();
+ UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE;
+ utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close ..
+ //if(debug2) u_printf("Retrying at %d\n", bestPosn);
+ while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL &&
+ USTRINGTRIE_HAS_NEXT(rfwd=fData->fForwardsPartialTrie->nextForCodePoint(uch))) {
+ //if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, utext_getNativeIndex(fText.getAlias()));
+ }
+ if(USTRINGTRIE_MATCHES(rfwd)) {
+ //if(debug2) u_printf("fwd> /%C/ == forward match!\n", (UChar)uch);
+ // only full matches here, nothing to check
+ // skip the next:
+ return kExceptionHere;
+ } else {
+ //if(debug2) u_printf("fwd> /%C/ no match.\n", (UChar)uch);
+ // no match (no exception) -return the 'underlying' break
+ return kNoExceptionHere;
+ }
+ } else {
+ return kNoExceptionHere; // internal error and/or no forwards trie
+ }
+ } else {
+ //if(debug2) u_printf("rev< /%C/ .. no match..%d\n", (UChar)uch, r); // no best match
+ return kNoExceptionHere; // No match - so exit. Not an exception.
+ }
+}
+
+// the workhorse single next.
+int32_t
+SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) {
+ if(n == UBRK_DONE || // at end or
+ fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
+ return n;
+ }
+ // OK, do we need to break here?
+ UErrorCode status = U_ZERO_ERROR;
+ // refresh text
+ resetState(status);
+ if(U_FAILURE(status)) return UBRK_DONE; // bail out
+ int64_t utextLen = utext_nativeLength(fText.getAlias());
+
+ //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias()));
+ while (n != UBRK_DONE && n != utextLen) { // outer loop runs once per underlying break (from fDelegate).
+ SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n);
+
+ switch(m) {
+ case kExceptionHere:
+ n = fDelegate->next(); // skip this one. Find the next lowerlevel break.
+ continue;
+
+ default:
+ case kNoExceptionHere:
+ return n;
+ }
+ }
+ return n;
+}
+
+int32_t
+SimpleFilteredSentenceBreakIterator::internalPrev(int32_t n) {
+ if(n == 0 || n == UBRK_DONE || // at end or
+ fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
+ return n;
+ }
+ // OK, do we need to break here?
+ UErrorCode status = U_ZERO_ERROR;
+ // refresh text
+ resetState(status);
+ if(U_FAILURE(status)) return UBRK_DONE; // bail out
+
+ //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias()));
+ while (n != UBRK_DONE && n != 0) { // outer loop runs once per underlying break (from fDelegate).
+ SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n);
+
+ switch(m) {
+ case kExceptionHere:
+ n = fDelegate->previous(); // skip this one. Find the next lowerlevel break.
+ continue;
+
+ default:
+ case kNoExceptionHere:
+ return n;
+ }
+ }
+ return n;
+}
+
+
+int32_t
+SimpleFilteredSentenceBreakIterator::next() {
+ return internalNext(fDelegate->next());
+}
+
+int32_t
+SimpleFilteredSentenceBreakIterator::first(void) {
+ // Don't suppress a break opportunity at the beginning of text.
+ return fDelegate->first();
+}
+
+int32_t
+SimpleFilteredSentenceBreakIterator::preceding(int32_t offset) {
+ return internalPrev(fDelegate->preceding(offset));
+}
+
+int32_t
+SimpleFilteredSentenceBreakIterator::previous(void) {
+ return internalPrev(fDelegate->previous());
+}
+
+UBool SimpleFilteredSentenceBreakIterator::isBoundary(int32_t offset) {
+ if (!fDelegate->isBoundary(offset)) return false; // no break to suppress
+
+ if (fData->fBackwardsTrie.isNull()) return true; // no data = no suppressions
+
+ UErrorCode status = U_ZERO_ERROR;
+ resetState(status);
+
+ SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(offset);
+
+ switch(m) {
+ case kExceptionHere:
+ return false;
+ default:
+ case kNoExceptionHere:
+ return true;
+ }
+}
+
+int32_t
+SimpleFilteredSentenceBreakIterator::next(int32_t offset) {
+ return internalNext(fDelegate->next(offset));
+}
+
+int32_t
+SimpleFilteredSentenceBreakIterator::following(int32_t offset) {
+ return internalNext(fDelegate->following(offset));
+}
+
+int32_t
+SimpleFilteredSentenceBreakIterator::last(void) {
+ // Don't suppress a break opportunity at the end of text.
+ return fDelegate->last();
+}
+
+
+/**
+ * Concrete implementation of builder class.
+ */
+class U_COMMON_API SimpleFilteredBreakIteratorBuilder : public FilteredBreakIteratorBuilder {
+public:
+ virtual ~SimpleFilteredBreakIteratorBuilder();
+ SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status);
+ SimpleFilteredBreakIteratorBuilder(UErrorCode &status);
+ virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
+ virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
+ virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status);
+private:
+ UStringSet fSet;
+};
+
+SimpleFilteredBreakIteratorBuilder::~SimpleFilteredBreakIteratorBuilder()
+{
+}
+
+SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(UErrorCode &status)
+ : fSet(status)
+{
+}
+
+SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status)
+ : fSet(status)
+{
+ if(U_SUCCESS(status)) {
+ UErrorCode subStatus = U_ZERO_ERROR;
+ LocalUResourceBundlePointer b(ures_open(U_ICUDATA_BRKITR, fromLocale.getBaseName(), &subStatus));
+ if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) {
+ status = subStatus; // copy the failing status
+#if FB_DEBUG
+ fprintf(stderr, "open BUNDLE %s : %s, %s\n", fromLocale.getBaseName(), "[exit]", u_errorName(status));
+#endif
+ return; // leaves the builder empty, if you try to use it.
+ }
+ LocalUResourceBundlePointer exceptions(ures_getByKeyWithFallback(b.getAlias(), "exceptions", NULL, &subStatus));
+ if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) {
+ status = subStatus; // copy the failing status
+#if FB_DEBUG
+ fprintf(stderr, "open EXCEPTIONS %s : %s, %s\n", fromLocale.getBaseName(), "[exit]", u_errorName(status));
+#endif
+ return; // leaves the builder empty, if you try to use it.
+ }
+ LocalUResourceBundlePointer breaks(ures_getByKeyWithFallback(exceptions.getAlias(), "SentenceBreak", NULL, &subStatus));
+
+#if FB_DEBUG
+ {
+ UErrorCode subsub = subStatus;
+ fprintf(stderr, "open SentenceBreak %s => %s, %s\n", fromLocale.getBaseName(), ures_getLocale(breaks.getAlias(), &subsub), u_errorName(subStatus));
+ }
+#endif
+
+ if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) {
+ status = subStatus; // copy the failing status
+#if FB_DEBUG
+ fprintf(stderr, "open %s : %s, %s\n", fromLocale.getBaseName(), "[exit]", u_errorName(status));
+#endif
+ return; // leaves the builder empty, if you try to use it.
+ }
+
+ LocalUResourceBundlePointer strs;
+ subStatus = status; // Pick up inherited warning status now
+ do {
+ strs.adoptInstead(ures_getNextResource(breaks.getAlias(), strs.orphan(), &subStatus));
+ if(strs.isValid() && U_SUCCESS(subStatus)) {
+ UnicodeString str(ures_getUnicodeString(strs.getAlias(), &status));
+ suppressBreakAfter(str, status); // load the string
+ }
+ } while (strs.isValid() && U_SUCCESS(subStatus));
+ if(U_FAILURE(subStatus)&&subStatus!=U_INDEX_OUTOFBOUNDS_ERROR&&U_SUCCESS(status)) {
+ status = subStatus;
+ }
+ }
+}
+
+UBool
+SimpleFilteredBreakIteratorBuilder::suppressBreakAfter(const UnicodeString& exception, UErrorCode& status)
+{
+ UBool r = fSet.add(exception, status);
+ FB_TRACE("suppressBreakAfter",&exception,r,0);
+ return r;
+}
+
+UBool
+SimpleFilteredBreakIteratorBuilder::unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status)
+{
+ UBool r = fSet.remove(exception, status);
+ FB_TRACE("unsuppressBreakAfter",&exception,r,0);
+ return r;
+}
+
+/**
+ * Jitterbug 2974: MSVC has a bug whereby new X[0] behaves badly.
+ * Work around this.
+ *
+ * Note: "new UnicodeString[subCount]" ends up calling global operator new
+ * on MSVC2012 for some reason.
+ */
+static inline UnicodeString* newUnicodeStringArray(size_t count) {
+ return new UnicodeString[count ? count : 1];
+}
+
+BreakIterator *
+SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UErrorCode& status) {
+ LocalPointer<BreakIterator> adopt(adoptBreakIterator);
+
+ LocalPointer<UCharsTrieBuilder> builder(new UCharsTrieBuilder(status), status);
+ LocalPointer<UCharsTrieBuilder> builder2(new UCharsTrieBuilder(status), status);
+ if(U_FAILURE(status)) {
+ return NULL;
+ }
+
+ int32_t revCount = 0;
+ int32_t fwdCount = 0;
+
+ int32_t subCount = fSet.size();
+
+ UnicodeString *ustrs_ptr = newUnicodeStringArray(subCount);
+
+ LocalArray<UnicodeString> ustrs(ustrs_ptr);
+
+ LocalMemory<int> partials;
+ partials.allocateInsteadAndReset(subCount);
+
+ LocalPointer<UCharsTrie> backwardsTrie; // i.e. ".srM" for Mrs.
+ LocalPointer<UCharsTrie> forwardsPartialTrie; // Has ".a" for "a.M."
+
+ int n=0;
+ for ( int32_t i = 0;
+ i<fSet.size();
+ i++) {
+ const UnicodeString *abbr = fSet.getStringAt(i);
+ if(abbr) {
+ FB_TRACE("build",abbr,TRUE,i);
+ ustrs[n] = *abbr; // copy by value
+ FB_TRACE("ustrs[n]",&ustrs[n],TRUE,i);
+ } else {
+ FB_TRACE("build",abbr,FALSE,i);
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ partials[n] = 0; // default: not partial
+ n++;
+ }
+ // first pass - find partials.
+ for(int i=0;i<subCount;i++) {
+ int nn = ustrs[i].indexOf(kFULLSTOP); // TODO: non-'.' abbreviations
+ if(nn>-1 && (nn+1)!=ustrs[i].length()) {
+ FB_TRACE("partial",&ustrs[i],FALSE,i);
+ // is partial.
+ // is it unique?
+ int sameAs = -1;
+ for(int j=0;j<subCount;j++) {
+ if(j==i) continue;
+ if(ustrs[i].compare(0,nn+1,ustrs[j],0,nn+1)==0) {
+ FB_TRACE("prefix",&ustrs[j],FALSE,nn+1);
+ //UBool otherIsPartial = ((nn+1)!=ustrs[j].length()); // true if ustrs[j] doesn't end at nn
+ if(partials[j]==0) { // hasn't been processed yet
+ partials[j] = kSuppressInReverse | kAddToForward;
+ FB_TRACE("suppressing",&ustrs[j],FALSE,j);
+ } else if(partials[j] & kSuppressInReverse) {
+ sameAs = j; // the other entry is already in the reverse table.
+ }
+ }
+ }
+ FB_TRACE("for partial same-",&ustrs[i],FALSE,sameAs);
+ FB_TRACE(" == partial #",&ustrs[i],FALSE,partials[i]);
+ UnicodeString prefix(ustrs[i], 0, nn+1);
+ if(sameAs == -1 && partials[i] == 0) {
+ // first one - add the prefix to the reverse table.
+ prefix.reverse();
+ builder->add(prefix, kPARTIAL, status);
+ revCount++;
+ FB_TRACE("Added partial",&prefix,FALSE, i);
+ FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i);
+ partials[i] = kSuppressInReverse | kAddToForward;
+ } else {
+ FB_TRACE("NOT adding partial",&prefix,FALSE, i);
+ FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i);
+ }
+ }
+ }
+ for(int i=0;i<subCount;i++) {
+ if(partials[i]==0) {
+ ustrs[i].reverse();
+ builder->add(ustrs[i], kMATCH, status);
+ revCount++;
+ FB_TRACE(u_errorName(status), &ustrs[i], FALSE, i);
+ } else {
+ FB_TRACE("Adding fwd",&ustrs[i], FALSE, i);
+
+ // an optimization would be to only add the portion after the '.'
+ // for example, for "Ph.D." we store ".hP" in the reverse table. We could just store "D." in the forward,
+ // instead of "Ph.D." since we already know the "Ph." part is a match.
+ // would need the trie to be able to hold 0-length strings, though.
+ builder2->add(ustrs[i], kMATCH, status); // forward
+ fwdCount++;
+ //ustrs[i].reverse();
+ ////if(debug2) u_printf("SUPPRESS- not Added(%d): /%S/ status=%s\n",partials[i], ustrs[i].getTerminatedBuffer(), u_errorName(status));
+ }
+ }
+ FB_TRACE("AbbrCount",NULL,FALSE, subCount);
+
+ if(revCount>0) {
+ backwardsTrie.adoptInstead(builder->build(USTRINGTRIE_BUILD_FAST, status));
+ if(U_FAILURE(status)) {
+ FB_TRACE(u_errorName(status),NULL,FALSE, -1);
+ return NULL;
+ }
+ }
+
+ if(fwdCount>0) {
+ forwardsPartialTrie.adoptInstead(builder2->build(USTRINGTRIE_BUILD_FAST, status));
+ if(U_FAILURE(status)) {
+ FB_TRACE(u_errorName(status),NULL,FALSE, -1);
+ return NULL;
+ }
+ }
+
+ return new SimpleFilteredSentenceBreakIterator(adopt.orphan(), forwardsPartialTrie.orphan(), backwardsTrie.orphan(), status);
+}
+
+
+// ----------- Base class implementation
+
+FilteredBreakIteratorBuilder::FilteredBreakIteratorBuilder() {
+}
+
+FilteredBreakIteratorBuilder::~FilteredBreakIteratorBuilder() {
+}
+
+FilteredBreakIteratorBuilder *
+FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& status) {
+ if(U_FAILURE(status)) return NULL;
+ LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(where, status), status);
+ return (U_SUCCESS(status))? ret.orphan(): NULL;
+}
+
+FilteredBreakIteratorBuilder *
+FilteredBreakIteratorBuilder::createInstance(UErrorCode &status) {
+ return createEmptyInstance(status);
+}
+
+FilteredBreakIteratorBuilder *
+FilteredBreakIteratorBuilder::createEmptyInstance(UErrorCode& status) {
+ if(U_FAILURE(status)) return NULL;
+ LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(status), status);
+ return (U_SUCCESS(status))? ret.orphan(): NULL;
+}
+
+U_NAMESPACE_END
+
+#endif //#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
diff --git a/thirdparty/icu4c/common/filterednormalizer2.cpp b/thirdparty/icu4c/common/filterednormalizer2.cpp
new file mode 100644
index 0000000000..1a0914d3f7
--- /dev/null
+++ b/thirdparty/icu4c/common/filterednormalizer2.cpp
@@ -0,0 +1,363 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2009-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: filterednormalizer2.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2009dec10
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/edits.h"
+#include "unicode/normalizer2.h"
+#include "unicode/stringoptions.h"
+#include "unicode/uniset.h"
+#include "unicode/unistr.h"
+#include "unicode/unorm.h"
+#include "cpputils.h"
+
+U_NAMESPACE_BEGIN
+
+FilteredNormalizer2::~FilteredNormalizer2() {}
+
+UnicodeString &
+FilteredNormalizer2::normalize(const UnicodeString &src,
+ UnicodeString &dest,
+ UErrorCode &errorCode) const {
+ uprv_checkCanGetBuffer(src, errorCode);
+ if(U_FAILURE(errorCode)) {
+ dest.setToBogus();
+ return dest;
+ }
+ if(&dest==&src) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return dest;
+ }
+ dest.remove();
+ return normalize(src, dest, USET_SPAN_SIMPLE, errorCode);
+}
+
+// Internal: No argument checking, and appends to dest.
+// Pass as input spanCondition the one that is likely to yield a non-zero
+// span length at the start of src.
+// For set=[:age=3.2:], since almost all common characters were in Unicode 3.2,
+// USET_SPAN_SIMPLE should be passed in for the start of src
+// and USET_SPAN_NOT_CONTAINED should be passed in if we continue after
+// an in-filter prefix.
+UnicodeString &
+FilteredNormalizer2::normalize(const UnicodeString &src,
+ UnicodeString &dest,
+ USetSpanCondition spanCondition,
+ UErrorCode &errorCode) const {
+ UnicodeString tempDest; // Don't throw away destination buffer between iterations.
+ for(int32_t prevSpanLimit=0; prevSpanLimit<src.length();) {
+ int32_t spanLimit=set.span(src, prevSpanLimit, spanCondition);
+ int32_t spanLength=spanLimit-prevSpanLimit;
+ if(spanCondition==USET_SPAN_NOT_CONTAINED) {
+ if(spanLength!=0) {
+ dest.append(src, prevSpanLimit, spanLength);
+ }
+ spanCondition=USET_SPAN_SIMPLE;
+ } else {
+ if(spanLength!=0) {
+ // Not norm2.normalizeSecondAndAppend() because we do not want
+ // to modify the non-filter part of dest.
+ dest.append(norm2.normalize(src.tempSubStringBetween(prevSpanLimit, spanLimit),
+ tempDest, errorCode));
+ if(U_FAILURE(errorCode)) {
+ break;
+ }
+ }
+ spanCondition=USET_SPAN_NOT_CONTAINED;
+ }
+ prevSpanLimit=spanLimit;
+ }
+ return dest;
+}
+
+void
+FilteredNormalizer2::normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
+ Edits *edits, UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
+ if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
+ edits->reset();
+ }
+ options |= U_EDITS_NO_RESET; // Do not reset for each span.
+ normalizeUTF8(options, src.data(), src.length(), sink, edits, USET_SPAN_SIMPLE, errorCode);
+}
+
+void
+FilteredNormalizer2::normalizeUTF8(uint32_t options, const char *src, int32_t length,
+ ByteSink &sink, Edits *edits,
+ USetSpanCondition spanCondition,
+ UErrorCode &errorCode) const {
+ while (length > 0) {
+ int32_t spanLength = set.spanUTF8(src, length, spanCondition);
+ if (spanCondition == USET_SPAN_NOT_CONTAINED) {
+ if (spanLength != 0) {
+ if (edits != nullptr) {
+ edits->addUnchanged(spanLength);
+ }
+ if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
+ sink.Append(src, spanLength);
+ }
+ }
+ spanCondition = USET_SPAN_SIMPLE;
+ } else {
+ if (spanLength != 0) {
+ // Not norm2.normalizeSecondAndAppend() because we do not want
+ // to modify the non-filter part of dest.
+ norm2.normalizeUTF8(options, StringPiece(src, spanLength), sink, edits, errorCode);
+ if (U_FAILURE(errorCode)) {
+ break;
+ }
+ }
+ spanCondition = USET_SPAN_NOT_CONTAINED;
+ }
+ src += spanLength;
+ length -= spanLength;
+ }
+}
+
+UnicodeString &
+FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
+ const UnicodeString &second,
+ UErrorCode &errorCode) const {
+ return normalizeSecondAndAppend(first, second, TRUE, errorCode);
+}
+
+UnicodeString &
+FilteredNormalizer2::append(UnicodeString &first,
+ const UnicodeString &second,
+ UErrorCode &errorCode) const {
+ return normalizeSecondAndAppend(first, second, FALSE, errorCode);
+}
+
+UnicodeString &
+FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
+ const UnicodeString &second,
+ UBool doNormalize,
+ UErrorCode &errorCode) const {
+ uprv_checkCanGetBuffer(first, errorCode);
+ uprv_checkCanGetBuffer(second, errorCode);
+ if(U_FAILURE(errorCode)) {
+ return first;
+ }
+ if(&first==&second) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return first;
+ }
+ if(first.isEmpty()) {
+ if(doNormalize) {
+ return normalize(second, first, errorCode);
+ } else {
+ return first=second;
+ }
+ }
+ // merge the in-filter suffix of the first string with the in-filter prefix of the second
+ int32_t prefixLimit=set.span(second, 0, USET_SPAN_SIMPLE);
+ if(prefixLimit!=0) {
+ UnicodeString prefix(second.tempSubString(0, prefixLimit));
+ int32_t suffixStart=set.spanBack(first, INT32_MAX, USET_SPAN_SIMPLE);
+ if(suffixStart==0) {
+ if(doNormalize) {
+ norm2.normalizeSecondAndAppend(first, prefix, errorCode);
+ } else {
+ norm2.append(first, prefix, errorCode);
+ }
+ } else {
+ UnicodeString middle(first, suffixStart, INT32_MAX);
+ if(doNormalize) {
+ norm2.normalizeSecondAndAppend(middle, prefix, errorCode);
+ } else {
+ norm2.append(middle, prefix, errorCode);
+ }
+ first.replace(suffixStart, INT32_MAX, middle);
+ }
+ }
+ if(prefixLimit<second.length()) {
+ UnicodeString rest(second.tempSubString(prefixLimit, INT32_MAX));
+ if(doNormalize) {
+ normalize(rest, first, USET_SPAN_NOT_CONTAINED, errorCode);
+ } else {
+ first.append(rest);
+ }
+ }
+ return first;
+}
+
+UBool
+FilteredNormalizer2::getDecomposition(UChar32 c, UnicodeString &decomposition) const {
+ return set.contains(c) && norm2.getDecomposition(c, decomposition);
+}
+
+UBool
+FilteredNormalizer2::getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
+ return set.contains(c) && norm2.getRawDecomposition(c, decomposition);
+}
+
+UChar32
+FilteredNormalizer2::composePair(UChar32 a, UChar32 b) const {
+ return (set.contains(a) && set.contains(b)) ? norm2.composePair(a, b) : U_SENTINEL;
+}
+
+uint8_t
+FilteredNormalizer2::getCombiningClass(UChar32 c) const {
+ return set.contains(c) ? norm2.getCombiningClass(c) : 0;
+}
+
+UBool
+FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
+ uprv_checkCanGetBuffer(s, errorCode);
+ if(U_FAILURE(errorCode)) {
+ return FALSE;
+ }
+ USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
+ for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
+ int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
+ if(spanCondition==USET_SPAN_NOT_CONTAINED) {
+ spanCondition=USET_SPAN_SIMPLE;
+ } else {
+ if( !norm2.isNormalized(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode) ||
+ U_FAILURE(errorCode)
+ ) {
+ return FALSE;
+ }
+ spanCondition=USET_SPAN_NOT_CONTAINED;
+ }
+ prevSpanLimit=spanLimit;
+ }
+ return TRUE;
+}
+
+UBool
+FilteredNormalizer2::isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const {
+ if(U_FAILURE(errorCode)) {
+ return FALSE;
+ }
+ const char *s = sp.data();
+ int32_t length = sp.length();
+ USetSpanCondition spanCondition = USET_SPAN_SIMPLE;
+ while (length > 0) {
+ int32_t spanLength = set.spanUTF8(s, length, spanCondition);
+ if (spanCondition == USET_SPAN_NOT_CONTAINED) {
+ spanCondition = USET_SPAN_SIMPLE;
+ } else {
+ if (!norm2.isNormalizedUTF8(StringPiece(s, spanLength), errorCode) ||
+ U_FAILURE(errorCode)) {
+ return FALSE;
+ }
+ spanCondition = USET_SPAN_NOT_CONTAINED;
+ }
+ s += spanLength;
+ length -= spanLength;
+ }
+ return TRUE;
+}
+
+UNormalizationCheckResult
+FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
+ uprv_checkCanGetBuffer(s, errorCode);
+ if(U_FAILURE(errorCode)) {
+ return UNORM_MAYBE;
+ }
+ UNormalizationCheckResult result=UNORM_YES;
+ USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
+ for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
+ int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
+ if(spanCondition==USET_SPAN_NOT_CONTAINED) {
+ spanCondition=USET_SPAN_SIMPLE;
+ } else {
+ UNormalizationCheckResult qcResult=
+ norm2.quickCheck(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
+ if(U_FAILURE(errorCode) || qcResult==UNORM_NO) {
+ return qcResult;
+ } else if(qcResult==UNORM_MAYBE) {
+ result=qcResult;
+ }
+ spanCondition=USET_SPAN_NOT_CONTAINED;
+ }
+ prevSpanLimit=spanLimit;
+ }
+ return result;
+}
+
+int32_t
+FilteredNormalizer2::spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
+ uprv_checkCanGetBuffer(s, errorCode);
+ if(U_FAILURE(errorCode)) {
+ return 0;
+ }
+ USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
+ for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
+ int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
+ if(spanCondition==USET_SPAN_NOT_CONTAINED) {
+ spanCondition=USET_SPAN_SIMPLE;
+ } else {
+ int32_t yesLimit=
+ prevSpanLimit+
+ norm2.spanQuickCheckYes(
+ s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
+ if(U_FAILURE(errorCode) || yesLimit<spanLimit) {
+ return yesLimit;
+ }
+ spanCondition=USET_SPAN_NOT_CONTAINED;
+ }
+ prevSpanLimit=spanLimit;
+ }
+ return s.length();
+}
+
+UBool
+FilteredNormalizer2::hasBoundaryBefore(UChar32 c) const {
+ return !set.contains(c) || norm2.hasBoundaryBefore(c);
+}
+
+UBool
+FilteredNormalizer2::hasBoundaryAfter(UChar32 c) const {
+ return !set.contains(c) || norm2.hasBoundaryAfter(c);
+}
+
+UBool
+FilteredNormalizer2::isInert(UChar32 c) const {
+ return !set.contains(c) || norm2.isInert(c);
+}
+
+U_NAMESPACE_END
+
+// C API ------------------------------------------------------------------- ***
+
+U_NAMESPACE_USE
+
+U_CAPI UNormalizer2 * U_EXPORT2
+unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) {
+ return NULL;
+ }
+ if(filterSet==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+ Normalizer2 *fn2=new FilteredNormalizer2(*(Normalizer2 *)norm2,
+ *UnicodeSet::fromUSet(filterSet));
+ if(fn2==NULL) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ }
+ return (UNormalizer2 *)fn2;
+}
+
+#endif // !UCONFIG_NO_NORMALIZATION
diff --git a/thirdparty/icu4c/common/hash.h b/thirdparty/icu4c/common/hash.h
new file mode 100644
index 0000000000..f02cb7087a
--- /dev/null
+++ b/thirdparty/icu4c/common/hash.h
@@ -0,0 +1,248 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 1997-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+******************************************************************************
+* Date Name Description
+* 03/28/00 aliu Creation.
+******************************************************************************
+*/
+
+#ifndef HASH_H
+#define HASH_H
+
+#include "unicode/unistr.h"
+#include "unicode/uobject.h"
+#include "cmemory.h"
+#include "uhash.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Hashtable is a thin C++ wrapper around UHashtable, a general-purpose void*
+ * hashtable implemented in C. Hashtable is designed to be idiomatic and
+ * easy-to-use in C++.
+ *
+ * Hashtable is an INTERNAL CLASS.
+ */
+class U_COMMON_API Hashtable : public UMemory {
+ UHashtable* hash;
+ UHashtable hashObj;
+
+ inline void init(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status);
+
+ inline void initSize(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, int32_t size, UErrorCode& status);
+
+public:
+ /**
+ * Construct a hashtable
+ * @param ignoreKeyCase If true, keys are case insensitive.
+ * @param status Error code
+ */
+ inline Hashtable(UBool ignoreKeyCase, UErrorCode& status);
+
+ /**
+ * Construct a hashtable
+ * @param ignoreKeyCase If true, keys are case insensitive.
+ * @param size initial size allocation
+ * @param status Error code
+ */
+ inline Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status);
+
+ /**
+ * Construct a hashtable
+ * @param keyComp Comparator for comparing the keys
+ * @param valueComp Comparator for comparing the values
+ * @param status Error code
+ */
+ inline Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status);
+
+ /**
+ * Construct a hashtable
+ * @param status Error code
+ */
+ inline Hashtable(UErrorCode& status);
+
+ /**
+ * Construct a hashtable, _disregarding any error_. Use this constructor
+ * with caution.
+ */
+ inline Hashtable();
+
+ /**
+ * Non-virtual destructor; make this virtual if Hashtable is subclassed
+ * in the future.
+ */
+ inline ~Hashtable();
+
+ inline UObjectDeleter *setValueDeleter(UObjectDeleter *fn);
+
+ inline int32_t count() const;
+
+ inline void* put(const UnicodeString& key, void* value, UErrorCode& status);
+
+ inline int32_t puti(const UnicodeString& key, int32_t value, UErrorCode& status);
+
+ inline void* get(const UnicodeString& key) const;
+
+ inline int32_t geti(const UnicodeString& key) const;
+
+ inline void* remove(const UnicodeString& key);
+
+ inline int32_t removei(const UnicodeString& key);
+
+ inline void removeAll(void);
+
+ inline const UHashElement* find(const UnicodeString& key) const;
+
+ /**
+ * @param pos - must be UHASH_FIRST on first call, and untouched afterwards.
+ * @see uhash_nextElement
+ */
+ inline const UHashElement* nextElement(int32_t& pos) const;
+
+ inline UKeyComparator* setKeyComparator(UKeyComparator*keyComp);
+
+ inline UValueComparator* setValueComparator(UValueComparator* valueComp);
+
+ inline UBool equals(const Hashtable& that) const;
+private:
+ Hashtable(const Hashtable &other); // forbid copying of this class
+ Hashtable &operator=(const Hashtable &other); // forbid copying of this class
+};
+
+/*********************************************************************
+ * Implementation
+ ********************************************************************/
+
+inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp,
+ UValueComparator *valueComp, UErrorCode& status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ uhash_init(&hashObj, keyHash, keyComp, valueComp, &status);
+ if (U_SUCCESS(status)) {
+ hash = &hashObj;
+ uhash_setKeyDeleter(hash, uprv_deleteUObject);
+ }
+}
+
+inline void Hashtable::initSize(UHashFunction *keyHash, UKeyComparator *keyComp,
+ UValueComparator *valueComp, int32_t size, UErrorCode& status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ uhash_initSize(&hashObj, keyHash, keyComp, valueComp, size, &status);
+ if (U_SUCCESS(status)) {
+ hash = &hashObj;
+ uhash_setKeyDeleter(hash, uprv_deleteUObject);
+ }
+}
+
+inline Hashtable::Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp,
+ UErrorCode& status) : hash(0) {
+ init( uhash_hashUnicodeString, keyComp, valueComp, status);
+}
+
+inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status)
+ : hash(0)
+{
+ init(ignoreKeyCase ? uhash_hashCaselessUnicodeString
+ : uhash_hashUnicodeString,
+ ignoreKeyCase ? uhash_compareCaselessUnicodeString
+ : uhash_compareUnicodeString,
+ NULL,
+ status);
+}
+
+inline Hashtable::Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status)
+ : hash(0)
+{
+ initSize(ignoreKeyCase ? uhash_hashCaselessUnicodeString
+ : uhash_hashUnicodeString,
+ ignoreKeyCase ? uhash_compareCaselessUnicodeString
+ : uhash_compareUnicodeString,
+ NULL, size,
+ status);
+}
+
+inline Hashtable::Hashtable(UErrorCode& status)
+ : hash(0)
+{
+ init(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, status);
+}
+
+inline Hashtable::Hashtable()
+ : hash(0)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ init(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, status);
+}
+
+inline Hashtable::~Hashtable() {
+ if (hash != NULL) {
+ uhash_close(hash);
+ }
+}
+
+inline UObjectDeleter *Hashtable::setValueDeleter(UObjectDeleter *fn) {
+ return uhash_setValueDeleter(hash, fn);
+}
+
+inline int32_t Hashtable::count() const {
+ return uhash_count(hash);
+}
+
+inline void* Hashtable::put(const UnicodeString& key, void* value, UErrorCode& status) {
+ return uhash_put(hash, new UnicodeString(key), value, &status);
+}
+
+inline int32_t Hashtable::puti(const UnicodeString& key, int32_t value, UErrorCode& status) {
+ return uhash_puti(hash, new UnicodeString(key), value, &status);
+}
+
+inline void* Hashtable::get(const UnicodeString& key) const {
+ return uhash_get(hash, &key);
+}
+
+inline int32_t Hashtable::geti(const UnicodeString& key) const {
+ return uhash_geti(hash, &key);
+}
+
+inline void* Hashtable::remove(const UnicodeString& key) {
+ return uhash_remove(hash, &key);
+}
+
+inline int32_t Hashtable::removei(const UnicodeString& key) {
+ return uhash_removei(hash, &key);
+}
+
+inline const UHashElement* Hashtable::find(const UnicodeString& key) const {
+ return uhash_find(hash, &key);
+}
+
+inline const UHashElement* Hashtable::nextElement(int32_t& pos) const {
+ return uhash_nextElement(hash, &pos);
+}
+
+inline void Hashtable::removeAll(void) {
+ uhash_removeAll(hash);
+}
+
+inline UKeyComparator* Hashtable::setKeyComparator(UKeyComparator*keyComp){
+ return uhash_setKeyComparator(hash, keyComp);
+}
+
+inline UValueComparator* Hashtable::setValueComparator(UValueComparator* valueComp){
+ return uhash_setValueComparator(hash, valueComp);
+}
+
+inline UBool Hashtable::equals(const Hashtable& that)const{
+ return uhash_equals(hash, that.hash);
+}
+U_NAMESPACE_END
+
+#endif
+
diff --git a/thirdparty/icu4c/common/icudataver.cpp b/thirdparty/icu4c/common/icudataver.cpp
new file mode 100644
index 0000000000..d314411374
--- /dev/null
+++ b/thirdparty/icu4c/common/icudataver.cpp
@@ -0,0 +1,31 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2009-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/icudataver.h"
+#include "unicode/ures.h"
+#include "uresimp.h" /* for ures_getVersionByKey */
+
+U_CAPI void U_EXPORT2 u_getDataVersion(UVersionInfo dataVersionFillin, UErrorCode *status) {
+ UResourceBundle *icudatares = NULL;
+
+ if (U_FAILURE(*status)) {
+ return;
+ }
+
+ if (dataVersionFillin != NULL) {
+ icudatares = ures_openDirect(NULL, U_ICU_VERSION_BUNDLE , status);
+ if (U_SUCCESS(*status)) {
+ ures_getVersionByKey(icudatares, U_ICU_DATA_KEY, dataVersionFillin, status);
+ }
+ ures_close(icudatares);
+ }
+}
diff --git a/thirdparty/icu4c/common/icuplug.cpp b/thirdparty/icu4c/common/icuplug.cpp
new file mode 100644
index 0000000000..4ab8c66ebe
--- /dev/null
+++ b/thirdparty/icu4c/common/icuplug.cpp
@@ -0,0 +1,884 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2009-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* FILE NAME : icuplug.c
+*
+* Date Name Description
+* 10/29/2009 sl New.
+******************************************************************************
+*/
+
+#include "unicode/icuplug.h"
+
+
+#if UCONFIG_ENABLE_PLUGINS
+
+
+#include "icuplugimp.h"
+#include "cstring.h"
+#include "cmemory.h"
+#include "putilimp.h"
+#include "ucln.h"
+#include <stdio.h>
+#ifdef __MVS__ /* defined by z/OS compiler */
+#define _POSIX_SOURCE
+#include <cics.h> /* 12 Nov 2011 JAM iscics() function */
+#endif
+#include "charstr.h"
+
+using namespace icu;
+
+#ifndef UPLUG_TRACE
+#define UPLUG_TRACE 0
+#endif
+
+#if UPLUG_TRACE
+#include <stdio.h>
+#define DBG(x) fprintf(stderr, "%s:%d: ",__FILE__,__LINE__); fprintf x
+#endif
+
+/**
+ * Internal structure of an ICU plugin.
+ */
+
+struct UPlugData {
+ UPlugEntrypoint *entrypoint; /**< plugin entrypoint */
+ uint32_t structSize; /**< initialized to the size of this structure */
+ uint32_t token; /**< must be U_PLUG_TOKEN */
+ void *lib; /**< plugin library, or NULL */
+ char libName[UPLUG_NAME_MAX]; /**< library name */
+ char sym[UPLUG_NAME_MAX]; /**< plugin symbol, or NULL */
+ char config[UPLUG_NAME_MAX]; /**< configuration data */
+ void *context; /**< user context data */
+ char name[UPLUG_NAME_MAX]; /**< name of plugin */
+ UPlugLevel level; /**< level of plugin */
+ UBool awaitingLoad; /**< TRUE if the plugin is awaiting a load call */
+ UBool dontUnload; /**< TRUE if plugin must stay resident (leak plugin and lib) */
+ UErrorCode pluginStatus; /**< status code of plugin */
+};
+
+
+
+#define UPLUG_LIBRARY_INITIAL_COUNT 8
+#define UPLUG_PLUGIN_INITIAL_COUNT 12
+
+/**
+ * Remove an item
+ * @param list the full list
+ * @param listSize the number of entries in the list
+ * @param memberSize the size of one member
+ * @param itemToRemove the item number of the member
+ * @return the new listsize
+ */
+static int32_t uplug_removeEntryAt(void *list, int32_t listSize, int32_t memberSize, int32_t itemToRemove) {
+ uint8_t *bytePtr = (uint8_t *)list;
+
+ /* get rid of some bad cases first */
+ if(listSize<1) {
+ return listSize;
+ }
+
+ /* is there anything to move? */
+ if(listSize > itemToRemove+1) {
+ memmove(bytePtr+(itemToRemove*memberSize), bytePtr+((itemToRemove+1)*memberSize), memberSize);
+ }
+
+ return listSize-1;
+}
+
+
+
+
+#if U_ENABLE_DYLOAD
+/**
+ * Library management. Internal.
+ * @internal
+ */
+struct UPlugLibrary;
+
+/**
+ * Library management. Internal.
+ * @internal
+ */
+typedef struct UPlugLibrary {
+ void *lib; /**< library ptr */
+ char name[UPLUG_NAME_MAX]; /**< library name */
+ uint32_t ref; /**< reference count */
+} UPlugLibrary;
+
+static UPlugLibrary staticLibraryList[UPLUG_LIBRARY_INITIAL_COUNT];
+static UPlugLibrary * libraryList = staticLibraryList;
+static int32_t libraryCount = 0;
+static int32_t libraryMax = UPLUG_LIBRARY_INITIAL_COUNT;
+
+/**
+ * Search for a library. Doesn't lock
+ * @param libName libname to search for
+ * @return the library's struct
+ */
+static int32_t searchForLibraryName(const char *libName) {
+ int32_t i;
+
+ for(i=0;i<libraryCount;i++) {
+ if(!uprv_strcmp(libName, libraryList[i].name)) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+static int32_t searchForLibrary(void *lib) {
+ int32_t i;
+
+ for(i=0;i<libraryCount;i++) {
+ if(lib==libraryList[i].lib) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+U_CAPI char * U_EXPORT2
+uplug_findLibrary(void *lib, UErrorCode *status) {
+ int32_t libEnt;
+ char *ret = NULL;
+ if(U_FAILURE(*status)) {
+ return NULL;
+ }
+ libEnt = searchForLibrary(lib);
+ if(libEnt!=-1) {
+ ret = libraryList[libEnt].name;
+ } else {
+ *status = U_MISSING_RESOURCE_ERROR;
+ }
+ return ret;
+}
+
+U_CAPI void * U_EXPORT2
+uplug_openLibrary(const char *libName, UErrorCode *status) {
+ int32_t libEntry = -1;
+ void *lib = NULL;
+
+ if(U_FAILURE(*status)) return NULL;
+
+ libEntry = searchForLibraryName(libName);
+ if(libEntry == -1) {
+ libEntry = libraryCount++;
+ if(libraryCount >= libraryMax) {
+ /* Ran out of library slots. Statically allocated because we can't depend on allocating memory.. */
+ *status = U_MEMORY_ALLOCATION_ERROR;
+#if UPLUG_TRACE
+ DBG((stderr, "uplug_openLibrary() - out of library slots (max %d)\n", libraryMax));
+#endif
+ return NULL;
+ }
+ /* Some operating systems don't want
+ DL operations from multiple threads. */
+ libraryList[libEntry].lib = uprv_dl_open(libName, status);
+#if UPLUG_TRACE
+ DBG((stderr, "uplug_openLibrary(%s,%s) libEntry %d, lib %p\n", libName, u_errorName(*status), libEntry, lib));
+#endif
+
+ if(libraryList[libEntry].lib == NULL || U_FAILURE(*status)) {
+ /* cleanup. */
+ libraryList[libEntry].lib = NULL; /* failure with open */
+ libraryList[libEntry].name[0] = 0;
+#if UPLUG_TRACE
+ DBG((stderr, "uplug_openLibrary(%s,%s) libEntry %d, lib %p\n", libName, u_errorName(*status), libEntry, lib));
+#endif
+ /* no need to free - just won't increase the count. */
+ libraryCount--;
+ } else { /* is it still there? */
+ /* link it in */
+ uprv_strncpy(libraryList[libEntry].name,libName,UPLUG_NAME_MAX);
+ libraryList[libEntry].ref=1;
+ lib = libraryList[libEntry].lib;
+ }
+
+ } else {
+ lib = libraryList[libEntry].lib;
+ libraryList[libEntry].ref++;
+ }
+ return lib;
+}
+
+U_CAPI void U_EXPORT2
+uplug_closeLibrary(void *lib, UErrorCode *status) {
+ int32_t i;
+
+#if UPLUG_TRACE
+ DBG((stderr, "uplug_closeLibrary(%p,%s) list %p\n", lib, u_errorName(*status), (void*)libraryList));
+#endif
+ if(U_FAILURE(*status)) return;
+
+ for(i=0;i<libraryCount;i++) {
+ if(lib==libraryList[i].lib) {
+ if(--(libraryList[i].ref) == 0) {
+ uprv_dl_close(libraryList[i].lib, status);
+ libraryCount = uplug_removeEntryAt(libraryList, libraryCount, sizeof(*libraryList), i);
+ }
+ return;
+ }
+ }
+ *status = U_INTERNAL_PROGRAM_ERROR; /* could not find the entry! */
+}
+
+#endif
+
+static UPlugData pluginList[UPLUG_PLUGIN_INITIAL_COUNT];
+static int32_t pluginCount = 0;
+
+
+
+
+static int32_t uplug_pluginNumber(UPlugData* d) {
+ UPlugData *pastPlug = &pluginList[pluginCount];
+ if(d<=pluginList) {
+ return 0;
+ } else if(d>=pastPlug) {
+ return pluginCount;
+ } else {
+ return (d-pluginList)/sizeof(pluginList[0]);
+ }
+}
+
+
+U_CAPI UPlugData * U_EXPORT2
+uplug_nextPlug(UPlugData *prior) {
+ if(prior==NULL) {
+ return pluginList;
+ } else {
+ UPlugData *nextPlug = &prior[1];
+ UPlugData *pastPlug = &pluginList[pluginCount];
+
+ if(nextPlug>=pastPlug) {
+ return NULL;
+ } else {
+ return nextPlug;
+ }
+ }
+}
+
+
+
+/**
+ * Call the plugin with some params
+ */
+static void uplug_callPlug(UPlugData *plug, UPlugReason reason, UErrorCode *status) {
+ UPlugTokenReturn token;
+ if(plug==NULL||U_FAILURE(*status)) {
+ return;
+ }
+ token = (*(plug->entrypoint))(plug, reason, status);
+ if(token!=UPLUG_TOKEN) {
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ }
+}
+
+
+static void uplug_unloadPlug(UPlugData *plug, UErrorCode *status) {
+ if(plug->awaitingLoad) { /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ return;
+ }
+ if(U_SUCCESS(plug->pluginStatus)) {
+ /* Don't unload a plug which has a failing load status - means it didn't actually load. */
+ uplug_callPlug(plug, UPLUG_REASON_UNLOAD, status);
+ }
+}
+
+static void uplug_queryPlug(UPlugData *plug, UErrorCode *status) {
+ if(!plug->awaitingLoad || !(plug->level == UPLUG_LEVEL_UNKNOWN) ) { /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ return;
+ }
+ plug->level = UPLUG_LEVEL_INVALID;
+ uplug_callPlug(plug, UPLUG_REASON_QUERY, status);
+ if(U_SUCCESS(*status)) {
+ if(plug->level == UPLUG_LEVEL_INVALID) {
+ plug->pluginStatus = U_PLUGIN_DIDNT_SET_LEVEL;
+ plug->awaitingLoad = FALSE;
+ }
+ } else {
+ plug->pluginStatus = U_INTERNAL_PROGRAM_ERROR;
+ plug->awaitingLoad = FALSE;
+ }
+}
+
+
+static void uplug_loadPlug(UPlugData *plug, UErrorCode *status) {
+ if(U_FAILURE(*status)) {
+ return;
+ }
+ if(!plug->awaitingLoad || (plug->level < UPLUG_LEVEL_LOW) ) { /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ return;
+ }
+ uplug_callPlug(plug, UPLUG_REASON_LOAD, status);
+ plug->awaitingLoad = FALSE;
+ if(!U_SUCCESS(*status)) {
+ plug->pluginStatus = U_INTERNAL_PROGRAM_ERROR;
+ }
+}
+
+static UPlugData *uplug_allocateEmptyPlug(UErrorCode *status)
+{
+ UPlugData *plug = NULL;
+
+ if(U_FAILURE(*status)) {
+ return NULL;
+ }
+
+ if(pluginCount == UPLUG_PLUGIN_INITIAL_COUNT) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+
+ plug = &pluginList[pluginCount++];
+
+ plug->token = UPLUG_TOKEN;
+ plug->structSize = sizeof(UPlugData);
+ plug->name[0]=0;
+ plug->level = UPLUG_LEVEL_UNKNOWN; /* initialize to null state */
+ plug->awaitingLoad = TRUE;
+ plug->dontUnload = FALSE;
+ plug->pluginStatus = U_ZERO_ERROR;
+ plug->libName[0] = 0;
+ plug->config[0]=0;
+ plug->sym[0]=0;
+ plug->lib=NULL;
+ plug->entrypoint=NULL;
+
+
+ return plug;
+}
+
+static UPlugData *uplug_allocatePlug(UPlugEntrypoint *entrypoint, const char *config, void *lib, const char *symName,
+ UErrorCode *status) {
+ UPlugData *plug = uplug_allocateEmptyPlug(status);
+ if(U_FAILURE(*status)) {
+ return NULL;
+ }
+
+ if(config!=NULL) {
+ uprv_strncpy(plug->config, config, UPLUG_NAME_MAX);
+ } else {
+ plug->config[0] = 0;
+ }
+
+ if(symName!=NULL) {
+ uprv_strncpy(plug->sym, symName, UPLUG_NAME_MAX);
+ } else {
+ plug->sym[0] = 0;
+ }
+
+ plug->entrypoint = entrypoint;
+ plug->lib = lib;
+ uplug_queryPlug(plug, status);
+
+ return plug;
+}
+
+static void uplug_deallocatePlug(UPlugData *plug, UErrorCode *status) {
+ UErrorCode subStatus = U_ZERO_ERROR;
+ if(!plug->dontUnload) {
+#if U_ENABLE_DYLOAD
+ uplug_closeLibrary(plug->lib, &subStatus);
+#endif
+ }
+ plug->lib = NULL;
+ if(U_SUCCESS(*status) && U_FAILURE(subStatus)) {
+ *status = subStatus;
+ }
+ /* shift plugins up and decrement count. */
+ if(U_SUCCESS(*status)) {
+ /* all ok- remove. */
+ pluginCount = uplug_removeEntryAt(pluginList, pluginCount, sizeof(plug[0]), uplug_pluginNumber(plug));
+ } else {
+ /* not ok- leave as a message. */
+ plug->awaitingLoad=FALSE;
+ plug->entrypoint=0;
+ plug->dontUnload=TRUE;
+ }
+}
+
+static void uplug_doUnloadPlug(UPlugData *plugToRemove, UErrorCode *status) {
+ if(plugToRemove != NULL) {
+ uplug_unloadPlug(plugToRemove, status);
+ uplug_deallocatePlug(plugToRemove, status);
+ }
+}
+
+U_CAPI void U_EXPORT2
+uplug_removePlug(UPlugData *plug, UErrorCode *status) {
+ UPlugData *cursor = NULL;
+ UPlugData *plugToRemove = NULL;
+ if(U_FAILURE(*status)) return;
+
+ for(cursor=pluginList;cursor!=NULL;) {
+ if(cursor==plug) {
+ plugToRemove = plug;
+ cursor=NULL;
+ } else {
+ cursor = uplug_nextPlug(cursor);
+ }
+ }
+
+ uplug_doUnloadPlug(plugToRemove, status);
+}
+
+
+
+
+U_CAPI void U_EXPORT2
+uplug_setPlugNoUnload(UPlugData *data, UBool dontUnload)
+{
+ data->dontUnload = dontUnload;
+}
+
+
+U_CAPI void U_EXPORT2
+uplug_setPlugLevel(UPlugData *data, UPlugLevel level) {
+ data->level = level;
+}
+
+
+U_CAPI UPlugLevel U_EXPORT2
+uplug_getPlugLevel(UPlugData *data) {
+ return data->level;
+}
+
+
+U_CAPI void U_EXPORT2
+uplug_setPlugName(UPlugData *data, const char *name) {
+ uprv_strncpy(data->name, name, UPLUG_NAME_MAX);
+}
+
+
+U_CAPI const char * U_EXPORT2
+uplug_getPlugName(UPlugData *data) {
+ return data->name;
+}
+
+
+U_CAPI const char * U_EXPORT2
+uplug_getSymbolName(UPlugData *data) {
+ return data->sym;
+}
+
+U_CAPI const char * U_EXPORT2
+uplug_getLibraryName(UPlugData *data, UErrorCode *status) {
+ if(data->libName[0]) {
+ return data->libName;
+ } else {
+#if U_ENABLE_DYLOAD
+ return uplug_findLibrary(data->lib, status);
+#else
+ return NULL;
+#endif
+ }
+}
+
+U_CAPI void * U_EXPORT2
+uplug_getLibrary(UPlugData *data) {
+ return data->lib;
+}
+
+U_CAPI void * U_EXPORT2
+uplug_getContext(UPlugData *data) {
+ return data->context;
+}
+
+
+U_CAPI void U_EXPORT2
+uplug_setContext(UPlugData *data, void *context) {
+ data->context = context;
+}
+
+U_CAPI const char* U_EXPORT2
+uplug_getConfiguration(UPlugData *data) {
+ return data->config;
+}
+
+U_CAPI UPlugData* U_EXPORT2
+uplug_getPlugInternal(int32_t n) {
+ if(n <0 || n >= pluginCount) {
+ return NULL;
+ } else {
+ return &(pluginList[n]);
+ }
+}
+
+
+U_CAPI UErrorCode U_EXPORT2
+uplug_getPlugLoadStatus(UPlugData *plug) {
+ return plug->pluginStatus;
+}
+
+
+
+
+/**
+ * Initialize a plugin fron an entrypoint and library - but don't load it.
+ */
+static UPlugData* uplug_initPlugFromEntrypointAndLibrary(UPlugEntrypoint *entrypoint, const char *config, void *lib, const char *sym,
+ UErrorCode *status) {
+ UPlugData *plug = NULL;
+
+ plug = uplug_allocatePlug(entrypoint, config, lib, sym, status);
+
+ if(U_SUCCESS(*status)) {
+ return plug;
+ } else {
+ uplug_deallocatePlug(plug, status);
+ return NULL;
+ }
+}
+
+U_CAPI UPlugData* U_EXPORT2
+uplug_loadPlugFromEntrypoint(UPlugEntrypoint *entrypoint, const char *config, UErrorCode *status) {
+ UPlugData* plug = uplug_initPlugFromEntrypointAndLibrary(entrypoint, config, NULL, NULL, status);
+ uplug_loadPlug(plug, status);
+ return plug;
+}
+
+#if U_ENABLE_DYLOAD
+
+static UPlugData*
+uplug_initErrorPlug(const char *libName, const char *sym, const char *config, const char *nameOrError, UErrorCode loadStatus, UErrorCode *status)
+{
+ UPlugData *plug = uplug_allocateEmptyPlug(status);
+ if(U_FAILURE(*status)) return NULL;
+
+ plug->pluginStatus = loadStatus;
+ plug->awaitingLoad = FALSE; /* Won't load. */
+ plug->dontUnload = TRUE; /* cannot unload. */
+
+ if(sym!=NULL) {
+ uprv_strncpy(plug->sym, sym, UPLUG_NAME_MAX);
+ }
+
+ if(libName!=NULL) {
+ uprv_strncpy(plug->libName, libName, UPLUG_NAME_MAX);
+ }
+
+ if(nameOrError!=NULL) {
+ uprv_strncpy(plug->name, nameOrError, UPLUG_NAME_MAX);
+ }
+
+ if(config!=NULL) {
+ uprv_strncpy(plug->config, config, UPLUG_NAME_MAX);
+ }
+
+ return plug;
+}
+
+/**
+ * Fetch a plugin from DLL, and then initialize it from a library- but don't load it.
+ */
+static UPlugData*
+uplug_initPlugFromLibrary(const char *libName, const char *sym, const char *config, UErrorCode *status) {
+ void *lib = NULL;
+ UPlugData *plug = NULL;
+ if(U_FAILURE(*status)) { return NULL; }
+ lib = uplug_openLibrary(libName, status);
+ if(lib!=NULL && U_SUCCESS(*status)) {
+ UPlugEntrypoint *entrypoint = NULL;
+ entrypoint = (UPlugEntrypoint*)uprv_dlsym_func(lib, sym, status);
+
+ if(entrypoint!=NULL&&U_SUCCESS(*status)) {
+ plug = uplug_initPlugFromEntrypointAndLibrary(entrypoint, config, lib, sym, status);
+ if(plug!=NULL&&U_SUCCESS(*status)) {
+ plug->lib = lib; /* plug takes ownership of library */
+ lib = NULL; /* library is now owned by plugin. */
+ }
+ } else {
+ UErrorCode subStatus = U_ZERO_ERROR;
+ plug = uplug_initErrorPlug(libName,sym,config,"ERROR: Could not load entrypoint",(lib==NULL)?U_MISSING_RESOURCE_ERROR:*status,&subStatus);
+ }
+ if(lib!=NULL) { /* still need to close the lib */
+ UErrorCode subStatus = U_ZERO_ERROR;
+ uplug_closeLibrary(lib, &subStatus); /* don't care here */
+ }
+ } else {
+ UErrorCode subStatus = U_ZERO_ERROR;
+ plug = uplug_initErrorPlug(libName,sym,config,"ERROR: could not load library",(lib==NULL)?U_MISSING_RESOURCE_ERROR:*status,&subStatus);
+ }
+ return plug;
+}
+
+U_CAPI UPlugData* U_EXPORT2
+uplug_loadPlugFromLibrary(const char *libName, const char *sym, const char *config, UErrorCode *status) {
+ UPlugData *plug = NULL;
+ if(U_FAILURE(*status)) { return NULL; }
+ plug = uplug_initPlugFromLibrary(libName, sym, config, status);
+ uplug_loadPlug(plug, status);
+
+ return plug;
+}
+
+#endif
+
+static UPlugLevel gCurrentLevel = UPLUG_LEVEL_LOW;
+
+U_CAPI UPlugLevel U_EXPORT2 uplug_getCurrentLevel() {
+ return gCurrentLevel;
+}
+
+static UBool U_CALLCONV uplug_cleanup(void)
+{
+ int32_t i;
+
+ UPlugData *pluginToRemove;
+ /* cleanup plugs */
+ for(i=0;i<pluginCount;i++) {
+ UErrorCode subStatus = U_ZERO_ERROR;
+ pluginToRemove = &pluginList[i];
+ /* unload and deallocate */
+ uplug_doUnloadPlug(pluginToRemove, &subStatus);
+ }
+ /* close other held libs? */
+ gCurrentLevel = UPLUG_LEVEL_LOW;
+ return TRUE;
+}
+
+#if U_ENABLE_DYLOAD
+
+static void uplug_loadWaitingPlugs(UErrorCode *status) {
+ int32_t i;
+ UPlugLevel currentLevel = uplug_getCurrentLevel();
+
+ if(U_FAILURE(*status)) {
+ return;
+ }
+#if UPLUG_TRACE
+ DBG((stderr, "uplug_loadWaitingPlugs() Level: %d\n", currentLevel));
+#endif
+ /* pass #1: low level plugs */
+ for(i=0;i<pluginCount;i++) {
+ UErrorCode subStatus = U_ZERO_ERROR;
+ UPlugData *pluginToLoad = &pluginList[i];
+ if(pluginToLoad->awaitingLoad) {
+ if(pluginToLoad->level == UPLUG_LEVEL_LOW) {
+ if(currentLevel > UPLUG_LEVEL_LOW) {
+ pluginToLoad->pluginStatus = U_PLUGIN_TOO_HIGH;
+ } else {
+ UPlugLevel newLevel;
+ uplug_loadPlug(pluginToLoad, &subStatus);
+ newLevel = uplug_getCurrentLevel();
+ if(newLevel > currentLevel) {
+ pluginToLoad->pluginStatus = U_PLUGIN_CHANGED_LEVEL_WARNING;
+ currentLevel = newLevel;
+ }
+ }
+ pluginToLoad->awaitingLoad = FALSE;
+ }
+ }
+ }
+ for(i=0;i<pluginCount;i++) {
+ UErrorCode subStatus = U_ZERO_ERROR;
+ UPlugData *pluginToLoad = &pluginList[i];
+
+ if(pluginToLoad->awaitingLoad) {
+ if(pluginToLoad->level == UPLUG_LEVEL_INVALID) {
+ pluginToLoad->pluginStatus = U_PLUGIN_DIDNT_SET_LEVEL;
+ } else if(pluginToLoad->level == UPLUG_LEVEL_UNKNOWN) {
+ pluginToLoad->pluginStatus = U_INTERNAL_PROGRAM_ERROR;
+ } else {
+ uplug_loadPlug(pluginToLoad, &subStatus);
+ }
+ pluginToLoad->awaitingLoad = FALSE;
+ }
+ }
+
+#if UPLUG_TRACE
+ DBG((stderr, " Done Loading Plugs. Level: %d\n", (int32_t)uplug_getCurrentLevel()));
+#endif
+}
+
+/* Name of the plugin config file */
+static char plugin_file[2048] = "";
+#endif
+
+U_CAPI const char* U_EXPORT2
+uplug_getPluginFile() {
+#if U_ENABLE_DYLOAD && !UCONFIG_NO_FILE_IO
+ return plugin_file;
+#else
+ return NULL;
+#endif
+}
+
+
+// uplug_init() is called first thing from u_init().
+
+U_CAPI void U_EXPORT2
+uplug_init(UErrorCode *status) {
+#if !U_ENABLE_DYLOAD
+ (void)status; /* unused */
+#elif !UCONFIG_NO_FILE_IO
+ CharString plugin_dir;
+ const char *env = getenv("ICU_PLUGINS");
+
+ if(U_FAILURE(*status)) return;
+ if(env != NULL) {
+ plugin_dir.append(env, -1, *status);
+ }
+ if(U_FAILURE(*status)) return;
+
+#if defined(DEFAULT_ICU_PLUGINS)
+ if(plugin_dir.isEmpty()) {
+ plugin_dir.append(DEFAULT_ICU_PLUGINS, -1, *status);
+ }
+#endif
+
+#if UPLUG_TRACE
+ DBG((stderr, "ICU_PLUGINS=%s\n", plugin_dir.data()));
+#endif
+
+ if(!plugin_dir.isEmpty()) {
+ FILE *f;
+
+ CharString pluginFile;
+#ifdef OS390BATCH
+/* There are potentially a lot of ways to implement a plugin directory on OS390/zOS */
+/* Keeping in mind that unauthorized file access is logged, monitored, and enforced */
+/* I've chosen to open a DDNAME if BATCH and leave it alone for (presumably) UNIX */
+/* System Services. Alternative techniques might be allocating a member in */
+/* SYS1.PARMLIB or setting an environment variable "ICU_PLUGIN_PATH" (?). The */
+/* DDNAME can be connected to a file in the HFS if need be. */
+
+ pluginFile.append("//DD:ICUPLUG", -1, *status); /* JAM 20 Oct 2011 */
+#else
+ pluginFile.append(plugin_dir, *status);
+ pluginFile.append(U_FILE_SEP_STRING, -1, *status);
+ pluginFile.append("icuplugins", -1, *status);
+ pluginFile.append(U_ICU_VERSION_SHORT, -1, *status);
+ pluginFile.append(".txt", -1, *status);
+#endif
+
+#if UPLUG_TRACE
+ DBG((stderr, "status=%s\n", u_errorName(*status)));
+#endif
+
+ if(U_FAILURE(*status)) {
+ return;
+ }
+ if((size_t)pluginFile.length() > (sizeof(plugin_file)-1)) {
+ *status = U_BUFFER_OVERFLOW_ERROR;
+#if UPLUG_TRACE
+ DBG((stderr, "status=%s\n", u_errorName(*status)));
+#endif
+ return;
+ }
+
+ /* plugin_file is not used for processing - it is only used
+ so that uplug_getPluginFile() works (i.e. icuinfo)
+ */
+ pluginFile.extract(plugin_file, sizeof(plugin_file), *status);
+
+#if UPLUG_TRACE
+ DBG((stderr, "pluginfile= %s len %d/%d\n", plugin_file, (int)strlen(plugin_file), (int)sizeof(plugin_file)));
+#endif
+
+#ifdef __MVS__
+ if (iscics()) /* 12 Nov 2011 JAM */
+ {
+ f = NULL;
+ }
+ else
+#endif
+ {
+ f = fopen(pluginFile.data(), "r");
+ }
+
+ if(f != NULL) {
+ char linebuf[1024];
+ char *p, *libName=NULL, *symName=NULL, *config=NULL;
+ int32_t line = 0;
+
+
+ while(fgets(linebuf,1023,f)) {
+ line++;
+
+ if(!*linebuf || *linebuf=='#') {
+ continue;
+ } else {
+ p = linebuf;
+ while(*p&&isspace((int)*p))
+ p++;
+ if(!*p || *p=='#') continue;
+ libName = p;
+ while(*p&&!isspace((int)*p)) {
+ p++;
+ }
+ if(!*p || *p=='#') continue; /* no tab after libname */
+ *p=0; /* end of libname */
+ p++;
+ while(*p&&isspace((int)*p)) {
+ p++;
+ }
+ if(!*p||*p=='#') continue; /* no symname after libname +tab */
+ symName = p;
+ while(*p&&!isspace((int)*p)) {
+ p++;
+ }
+
+ if(*p) { /* has config */
+ *p=0;
+ ++p;
+ while(*p&&isspace((int)*p)) {
+ p++;
+ }
+ if(*p) {
+ config = p;
+ }
+ }
+
+ /* chop whitespace at the end of the config */
+ if(config!=NULL&&*config!=0) {
+ p = config+strlen(config);
+ while(p>config&&isspace((int)*(--p))) {
+ *p=0;
+ }
+ }
+
+ /* OK, we're good. */
+ {
+ UErrorCode subStatus = U_ZERO_ERROR;
+ UPlugData *plug = uplug_initPlugFromLibrary(libName, symName, config, &subStatus);
+ if(U_FAILURE(subStatus) && U_SUCCESS(*status)) {
+ *status = subStatus;
+ }
+#if UPLUG_TRACE
+ DBG((stderr, "PLUGIN libName=[%s], sym=[%s], config=[%s]\n", libName, symName, config));
+ DBG((stderr, " -> %p, %s\n", (void*)plug, u_errorName(subStatus)));
+#else
+ (void)plug; /* unused */
+#endif
+ }
+ }
+ }
+ fclose(f);
+ } else {
+#if UPLUG_TRACE
+ DBG((stderr, "Can't open plugin file %s\n", plugin_file));
+#endif
+ }
+ }
+ uplug_loadWaitingPlugs(status);
+#endif /* U_ENABLE_DYLOAD */
+ gCurrentLevel = UPLUG_LEVEL_HIGH;
+ ucln_registerCleanup(UCLN_UPLUG, uplug_cleanup);
+}
+
+#endif
+
+
diff --git a/thirdparty/icu4c/common/icuplugimp.h b/thirdparty/icu4c/common/icuplugimp.h
new file mode 100644
index 0000000000..9df309204e
--- /dev/null
+++ b/thirdparty/icu4c/common/icuplugimp.h
@@ -0,0 +1,93 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2009-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* FILE NAME : icuplugimp.h
+*
+* Internal functions for the ICU plugin system
+*
+* Date Name Description
+* 10/29/2009 sl New.
+******************************************************************************
+*/
+
+
+#ifndef ICUPLUGIMP_H
+#define ICUPLUGIMP_H
+
+#include "unicode/icuplug.h"
+
+#if UCONFIG_ENABLE_PLUGINS
+
+/*========================*/
+/** @{ Library Manipulation
+ */
+
+/**
+ * Open a library, adding a reference count if needed.
+ * @param libName library name to load
+ * @param status error code
+ * @return the library pointer, or NULL
+ * @internal internal use only
+ */
+U_CAPI void * U_EXPORT2
+uplug_openLibrary(const char *libName, UErrorCode *status);
+
+/**
+ * Close a library, if its reference count is 0
+ * @param lib the library to close
+ * @param status error code
+ * @internal internal use only
+ */
+U_CAPI void U_EXPORT2
+uplug_closeLibrary(void *lib, UErrorCode *status);
+
+/**
+ * Get a library's name, or NULL if not found.
+ * @param lib the library's name
+ * @param status error code
+ * @return the library name, or NULL if not found.
+ * @internal internal use only
+ */
+U_CAPI char * U_EXPORT2
+uplug_findLibrary(void *lib, UErrorCode *status);
+
+/** @} */
+
+/*========================*/
+/** {@ ICU Plugin internal interfaces
+ */
+
+/**
+ * Initialize the plugins
+ * @param status error result
+ * @internal - Internal use only.
+ */
+U_CAPI void U_EXPORT2
+uplug_init(UErrorCode *status);
+
+/**
+ * Get raw plug N
+ * @internal - Internal use only
+ */
+U_CAPI UPlugData* U_EXPORT2
+uplug_getPlugInternal(int32_t n);
+
+/**
+ * Get the name of the plugin file.
+ * @internal - Internal use only.
+ */
+U_CAPI const char* U_EXPORT2
+uplug_getPluginFile(void);
+
+/** @} */
+
+#endif
+
+#endif
diff --git a/thirdparty/icu4c/common/loadednormalizer2impl.cpp b/thirdparty/icu4c/common/loadednormalizer2impl.cpp
new file mode 100644
index 0000000000..e4b36f1055
--- /dev/null
+++ b/thirdparty/icu4c/common/loadednormalizer2impl.cpp
@@ -0,0 +1,418 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* loadednormalizer2impl.cpp
+*
+* created on: 2014sep03
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/udata.h"
+#include "unicode/localpointer.h"
+#include "unicode/normalizer2.h"
+#include "unicode/ucptrie.h"
+#include "unicode/unistr.h"
+#include "unicode/unorm.h"
+#include "cstring.h"
+#include "mutex.h"
+#include "norm2allmodes.h"
+#include "normalizer2impl.h"
+#include "uassert.h"
+#include "ucln_cmn.h"
+#include "uhash.h"
+
+U_NAMESPACE_BEGIN
+
+class LoadedNormalizer2Impl : public Normalizer2Impl {
+public:
+ LoadedNormalizer2Impl() : memory(NULL), ownedTrie(NULL) {}
+ virtual ~LoadedNormalizer2Impl();
+
+ void load(const char *packageName, const char *name, UErrorCode &errorCode);
+
+private:
+ static UBool U_CALLCONV
+ isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
+
+ UDataMemory *memory;
+ UCPTrie *ownedTrie;
+};
+
+LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {
+ udata_close(memory);
+ ucptrie_close(ownedTrie);
+}
+
+UBool U_CALLCONV
+LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
+ const char * /* type */, const char * /*name*/,
+ const UDataInfo *pInfo) {
+ if(
+ pInfo->size>=20 &&
+ pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
+ pInfo->charsetFamily==U_CHARSET_FAMILY &&
+ pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */
+ pInfo->dataFormat[1]==0x72 &&
+ pInfo->dataFormat[2]==0x6d &&
+ pInfo->dataFormat[3]==0x32 &&
+ pInfo->formatVersion[0]==4
+ ) {
+ // Normalizer2Impl *me=(Normalizer2Impl *)context;
+ // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+void
+LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
+ const int32_t *inIndexes=(const int32_t *)inBytes;
+ int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
+ if(indexesLength<=IX_MIN_LCCC_CP) {
+ errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes.
+ return;
+ }
+
+ int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
+ int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
+ ownedTrie=ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16,
+ inBytes+offset, nextOffset-offset, NULL,
+ &errorCode);
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+
+ offset=nextOffset;
+ nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
+ const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset);
+
+ // smallFCD: new in formatVersion 2
+ offset=nextOffset;
+ const uint8_t *inSmallFCD=inBytes+offset;
+
+ init(inIndexes, ownedTrie, inExtraData, inSmallFCD);
+}
+
+// instance cache ---------------------------------------------------------- ***
+
+Norm2AllModes *
+Norm2AllModes::createInstance(const char *packageName,
+ const char *name,
+ UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return NULL;
+ }
+ LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl;
+ if(impl==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ impl->load(packageName, name, errorCode);
+ return createInstance(impl, errorCode);
+}
+
+U_CDECL_BEGIN
+static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();
+U_CDECL_END
+
+#if !NORM2_HARDCODE_NFC_DATA
+static Norm2AllModes *nfcSingleton;
+static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER;
+#endif
+
+static Norm2AllModes *nfkcSingleton;
+static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
+
+static Norm2AllModes *nfkc_cfSingleton;
+static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
+
+static UHashtable *cache=NULL;
+
+// UInitOnce singleton initialization function
+static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
+#if !NORM2_HARDCODE_NFC_DATA
+ if (uprv_strcmp(what, "nfc") == 0) {
+ nfcSingleton = Norm2AllModes::createInstance(NULL, "nfc", errorCode);
+ } else
+#endif
+ if (uprv_strcmp(what, "nfkc") == 0) {
+ nfkcSingleton = Norm2AllModes::createInstance(NULL, "nfkc", errorCode);
+ } else if (uprv_strcmp(what, "nfkc_cf") == 0) {
+ nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
+ } else {
+ UPRV_UNREACHABLE; // Unknown singleton
+ }
+ ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
+}
+
+U_CDECL_BEGIN
+
+static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
+ delete (Norm2AllModes *)allModes;
+}
+
+static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
+#if !NORM2_HARDCODE_NFC_DATA
+ delete nfcSingleton;
+ nfcSingleton = NULL;
+ nfcInitOnce.reset();
+#endif
+
+ delete nfkcSingleton;
+ nfkcSingleton = NULL;
+ nfkcInitOnce.reset();
+
+ delete nfkc_cfSingleton;
+ nfkc_cfSingleton = NULL;
+ nfkc_cfInitOnce.reset();
+
+ uhash_close(cache);
+ cache=NULL;
+ return TRUE;
+}
+
+U_CDECL_END
+
+#if !NORM2_HARDCODE_NFC_DATA
+const Norm2AllModes *
+Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return NULL; }
+ umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
+ return nfcSingleton;
+}
+#endif
+
+const Norm2AllModes *
+Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return NULL; }
+ umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
+ return nfkcSingleton;
+}
+
+const Norm2AllModes *
+Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return NULL; }
+ umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
+ return nfkc_cfSingleton;
+}
+
+#if !NORM2_HARDCODE_NFC_DATA
+const Normalizer2 *
+Normalizer2::getNFCInstance(UErrorCode &errorCode) {
+ const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
+ return allModes!=NULL ? &allModes->comp : NULL;
+}
+
+const Normalizer2 *
+Normalizer2::getNFDInstance(UErrorCode &errorCode) {
+ const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
+ return allModes!=NULL ? &allModes->decomp : NULL;
+}
+
+const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
+ const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
+ return allModes!=NULL ? &allModes->fcd : NULL;
+}
+
+const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
+ const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
+ return allModes!=NULL ? &allModes->fcc : NULL;
+}
+
+const Normalizer2Impl *
+Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
+ const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
+ return allModes!=NULL ? allModes->impl : NULL;
+}
+#endif
+
+const Normalizer2 *
+Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
+ const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
+ return allModes!=NULL ? &allModes->comp : NULL;
+}
+
+const Normalizer2 *
+Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
+ const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
+ return allModes!=NULL ? &allModes->decomp : NULL;
+}
+
+const Normalizer2 *
+Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
+ const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
+ return allModes!=NULL ? &allModes->comp : NULL;
+}
+
+const Normalizer2 *
+Normalizer2::getInstance(const char *packageName,
+ const char *name,
+ UNormalization2Mode mode,
+ UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return NULL;
+ }
+ if(name==NULL || *name==0) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+ const Norm2AllModes *allModes=NULL;
+ if(packageName==NULL) {
+ if(0==uprv_strcmp(name, "nfc")) {
+ allModes=Norm2AllModes::getNFCInstance(errorCode);
+ } else if(0==uprv_strcmp(name, "nfkc")) {
+ allModes=Norm2AllModes::getNFKCInstance(errorCode);
+ } else if(0==uprv_strcmp(name, "nfkc_cf")) {
+ allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
+ }
+ }
+ if(allModes==NULL && U_SUCCESS(errorCode)) {
+ {
+ Mutex lock;
+ if(cache!=NULL) {
+ allModes=(Norm2AllModes *)uhash_get(cache, name);
+ }
+ }
+ if(allModes==NULL) {
+ ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
+ LocalPointer<Norm2AllModes> localAllModes(
+ Norm2AllModes::createInstance(packageName, name, errorCode));
+ if(U_SUCCESS(errorCode)) {
+ Mutex lock;
+ if(cache==NULL) {
+ cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ return NULL;
+ }
+ uhash_setKeyDeleter(cache, uprv_free);
+ uhash_setValueDeleter(cache, deleteNorm2AllModes);
+ }
+ void *temp=uhash_get(cache, name);
+ if(temp==NULL) {
+ int32_t keyLength= static_cast<int32_t>(uprv_strlen(name)+1);
+ char *nameCopy=(char *)uprv_malloc(keyLength);
+ if(nameCopy==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ uprv_memcpy(nameCopy, name, keyLength);
+ allModes=localAllModes.getAlias();
+ uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);
+ } else {
+ // race condition
+ allModes=(Norm2AllModes *)temp;
+ }
+ }
+ }
+ }
+ if(allModes!=NULL && U_SUCCESS(errorCode)) {
+ switch(mode) {
+ case UNORM2_COMPOSE:
+ return &allModes->comp;
+ case UNORM2_DECOMPOSE:
+ return &allModes->decomp;
+ case UNORM2_FCD:
+ return &allModes->fcd;
+ case UNORM2_COMPOSE_CONTIGUOUS:
+ return &allModes->fcc;
+ default:
+ break; // do nothing
+ }
+ }
+ return NULL;
+}
+
+const Normalizer2 *
+Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return NULL;
+ }
+ switch(mode) {
+ case UNORM_NFD:
+ return Normalizer2::getNFDInstance(errorCode);
+ case UNORM_NFKD:
+ return Normalizer2::getNFKDInstance(errorCode);
+ case UNORM_NFC:
+ return Normalizer2::getNFCInstance(errorCode);
+ case UNORM_NFKC:
+ return Normalizer2::getNFKCInstance(errorCode);
+ case UNORM_FCD:
+ return getFCDInstance(errorCode);
+ default: // UNORM_NONE
+ return getNoopInstance(errorCode);
+ }
+}
+
+const Normalizer2Impl *
+Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
+ const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
+ return allModes!=NULL ? allModes->impl : NULL;
+}
+
+const Normalizer2Impl *
+Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
+ const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
+ return allModes!=NULL ? allModes->impl : NULL;
+}
+
+U_NAMESPACE_END
+
+// C API ------------------------------------------------------------------- ***
+
+U_NAMESPACE_USE
+
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
+ return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
+}
+
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
+ return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
+}
+
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
+ return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
+}
+
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getInstance(const char *packageName,
+ const char *name,
+ UNormalization2Mode mode,
+ UErrorCode *pErrorCode) {
+ return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
+}
+
+U_CFUNC UNormalizationCheckResult
+unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
+ if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
+ return UNORM_YES;
+ }
+ UErrorCode errorCode=U_ZERO_ERROR;
+ const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
+ if(U_SUCCESS(errorCode)) {
+ return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
+ } else {
+ return UNORM_MAYBE;
+ }
+}
+
+#endif // !UCONFIG_NO_NORMALIZATION
diff --git a/thirdparty/icu4c/common/localebuilder.cpp b/thirdparty/icu4c/common/localebuilder.cpp
new file mode 100644
index 0000000000..1dd8131e58
--- /dev/null
+++ b/thirdparty/icu4c/common/localebuilder.cpp
@@ -0,0 +1,468 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include <utility>
+
+#include "bytesinkutil.h" // CharStringByteSink
+#include "charstr.h"
+#include "cstring.h"
+#include "ulocimp.h"
+#include "unicode/localebuilder.h"
+#include "unicode/locid.h"
+
+U_NAMESPACE_BEGIN
+
+#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
+#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
+
+const char* kAttributeKey = "attribute";
+
+static bool _isExtensionSubtags(char key, const char* s, int32_t len) {
+ switch (uprv_tolower(key)) {
+ case 'u':
+ return ultag_isUnicodeExtensionSubtags(s, len);
+ case 't':
+ return ultag_isTransformedExtensionSubtags(s, len);
+ case 'x':
+ return ultag_isPrivateuseValueSubtags(s, len);
+ default:
+ return ultag_isExtensionSubtags(s, len);
+ }
+}
+
+LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR), language_(),
+ script_(), region_(), variant_(nullptr), extensions_(nullptr)
+{
+ language_[0] = 0;
+ script_[0] = 0;
+ region_[0] = 0;
+}
+
+LocaleBuilder::~LocaleBuilder()
+{
+ delete variant_;
+ delete extensions_;
+}
+
+LocaleBuilder& LocaleBuilder::setLocale(const Locale& locale)
+{
+ clear();
+ setLanguage(locale.getLanguage());
+ setScript(locale.getScript());
+ setRegion(locale.getCountry());
+ setVariant(locale.getVariant());
+ extensions_ = locale.clone();
+ if (extensions_ == nullptr) {
+ status_ = U_MEMORY_ALLOCATION_ERROR;
+ }
+ return *this;
+}
+
+LocaleBuilder& LocaleBuilder::setLanguageTag(StringPiece tag)
+{
+ Locale l = Locale::forLanguageTag(tag, status_);
+ if (U_FAILURE(status_)) { return *this; }
+ // Because setLocale will reset status_ we need to return
+ // first if we have error in forLanguageTag.
+ setLocale(l);
+ return *this;
+}
+
+static void setField(StringPiece input, char* dest, UErrorCode& errorCode,
+ UBool (*test)(const char*, int32_t)) {
+ if (U_FAILURE(errorCode)) { return; }
+ if (input.empty()) {
+ dest[0] = '\0';
+ } else if (test(input.data(), input.length())) {
+ uprv_memcpy(dest, input.data(), input.length());
+ dest[input.length()] = '\0';
+ } else {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+}
+
+LocaleBuilder& LocaleBuilder::setLanguage(StringPiece language)
+{
+ setField(language, language_, status_, &ultag_isLanguageSubtag);
+ return *this;
+}
+
+LocaleBuilder& LocaleBuilder::setScript(StringPiece script)
+{
+ setField(script, script_, status_, &ultag_isScriptSubtag);
+ return *this;
+}
+
+LocaleBuilder& LocaleBuilder::setRegion(StringPiece region)
+{
+ setField(region, region_, status_, &ultag_isRegionSubtag);
+ return *this;
+}
+
+static void transform(char* data, int32_t len) {
+ for (int32_t i = 0; i < len; i++, data++) {
+ if (*data == '_') {
+ *data = '-';
+ } else {
+ *data = uprv_tolower(*data);
+ }
+ }
+}
+
+LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant)
+{
+ if (U_FAILURE(status_)) { return *this; }
+ if (variant.empty()) {
+ delete variant_;
+ variant_ = nullptr;
+ return *this;
+ }
+ CharString* new_variant = new CharString(variant, status_);
+ if (U_FAILURE(status_)) { return *this; }
+ if (new_variant == nullptr) {
+ status_ = U_MEMORY_ALLOCATION_ERROR;
+ return *this;
+ }
+ transform(new_variant->data(), new_variant->length());
+ if (!ultag_isVariantSubtags(new_variant->data(), new_variant->length())) {
+ delete new_variant;
+ status_ = U_ILLEGAL_ARGUMENT_ERROR;
+ return *this;
+ }
+ delete variant_;
+ variant_ = new_variant;
+ return *this;
+}
+
+static bool
+_isKeywordValue(const char* key, const char* value, int32_t value_len)
+{
+ if (key[1] == '\0') {
+ // one char key
+ return (UPRV_ISALPHANUM(uprv_tolower(key[0])) &&
+ _isExtensionSubtags(key[0], value, value_len));
+ } else if (uprv_strcmp(key, kAttributeKey) == 0) {
+ // unicode attributes
+ return ultag_isUnicodeLocaleAttributes(value, value_len);
+ }
+ // otherwise: unicode extension value
+ // We need to convert from legacy key/value to unicode
+ // key/value
+ const char* unicode_locale_key = uloc_toUnicodeLocaleKey(key);
+ const char* unicode_locale_type = uloc_toUnicodeLocaleType(key, value);
+
+ return unicode_locale_key && unicode_locale_type &&
+ ultag_isUnicodeLocaleKey(unicode_locale_key, -1) &&
+ ultag_isUnicodeLocaleType(unicode_locale_type, -1);
+}
+
+static void
+_copyExtensions(const Locale& from, icu::StringEnumeration *keywords,
+ Locale& to, bool validate, UErrorCode& errorCode)
+{
+ if (U_FAILURE(errorCode)) { return; }
+ LocalPointer<icu::StringEnumeration> ownedKeywords;
+ if (keywords == nullptr) {
+ ownedKeywords.adoptInstead(from.createKeywords(errorCode));
+ if (U_FAILURE(errorCode) || ownedKeywords.isNull()) { return; }
+ keywords = ownedKeywords.getAlias();
+ }
+ const char* key;
+ while ((key = keywords->next(nullptr, errorCode)) != nullptr) {
+ CharString value;
+ CharStringByteSink sink(&value);
+ from.getKeywordValue(key, sink, errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ if (uprv_strcmp(key, kAttributeKey) == 0) {
+ transform(value.data(), value.length());
+ }
+ if (validate &&
+ !_isKeywordValue(key, value.data(), value.length())) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ to.setKeywordValue(key, value.data(), errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ }
+}
+
+void static
+_clearUAttributesAndKeyType(Locale& locale, UErrorCode& errorCode)
+{
+ // Clear Unicode attributes
+ locale.setKeywordValue(kAttributeKey, "", errorCode);
+
+ // Clear all Unicode keyword values
+ LocalPointer<icu::StringEnumeration> iter(locale.createUnicodeKeywords(errorCode));
+ if (U_FAILURE(errorCode) || iter.isNull()) { return; }
+ const char* key;
+ while ((key = iter->next(nullptr, errorCode)) != nullptr) {
+ locale.setUnicodeKeywordValue(key, nullptr, errorCode);
+ }
+}
+
+static void
+_setUnicodeExtensions(Locale& locale, const CharString& value, UErrorCode& errorCode)
+{
+ // Add the unicode extensions to extensions_
+ CharString locale_str("und-u-", errorCode);
+ locale_str.append(value, errorCode);
+ _copyExtensions(
+ Locale::forLanguageTag(locale_str.data(), errorCode), nullptr,
+ locale, false, errorCode);
+}
+
+LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value)
+{
+ if (U_FAILURE(status_)) { return *this; }
+ if (!UPRV_ISALPHANUM(key)) {
+ status_ = U_ILLEGAL_ARGUMENT_ERROR;
+ return *this;
+ }
+ CharString value_str(value, status_);
+ if (U_FAILURE(status_)) { return *this; }
+ transform(value_str.data(), value_str.length());
+ if (!value_str.isEmpty() &&
+ !_isExtensionSubtags(key, value_str.data(), value_str.length())) {
+ status_ = U_ILLEGAL_ARGUMENT_ERROR;
+ return *this;
+ }
+ if (extensions_ == nullptr) {
+ extensions_ = new Locale();
+ if (extensions_ == nullptr) {
+ status_ = U_MEMORY_ALLOCATION_ERROR;
+ return *this;
+ }
+ }
+ if (uprv_tolower(key) != 'u') {
+ // for t, x and others extension.
+ extensions_->setKeywordValue(StringPiece(&key, 1), value_str.data(),
+ status_);
+ return *this;
+ }
+ _clearUAttributesAndKeyType(*extensions_, status_);
+ if (U_FAILURE(status_)) { return *this; }
+ if (!value.empty()) {
+ _setUnicodeExtensions(*extensions_, value_str, status_);
+ }
+ return *this;
+}
+
+LocaleBuilder& LocaleBuilder::setUnicodeLocaleKeyword(
+ StringPiece key, StringPiece type)
+{
+ if (U_FAILURE(status_)) { return *this; }
+ if (!ultag_isUnicodeLocaleKey(key.data(), key.length()) ||
+ (!type.empty() &&
+ !ultag_isUnicodeLocaleType(type.data(), type.length()))) {
+ status_ = U_ILLEGAL_ARGUMENT_ERROR;
+ return *this;
+ }
+ if (extensions_ == nullptr) {
+ extensions_ = new Locale();
+ }
+ if (extensions_ == nullptr) {
+ status_ = U_MEMORY_ALLOCATION_ERROR;
+ return *this;
+ }
+ extensions_->setUnicodeKeywordValue(key, type, status_);
+ return *this;
+}
+
+LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute(
+ StringPiece value)
+{
+ CharString value_str(value, status_);
+ if (U_FAILURE(status_)) { return *this; }
+ transform(value_str.data(), value_str.length());
+ if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
+ status_ = U_ILLEGAL_ARGUMENT_ERROR;
+ return *this;
+ }
+ if (extensions_ == nullptr) {
+ extensions_ = new Locale();
+ if (extensions_ == nullptr) {
+ status_ = U_MEMORY_ALLOCATION_ERROR;
+ return *this;
+ }
+ extensions_->setKeywordValue(kAttributeKey, value_str.data(), status_);
+ return *this;
+ }
+
+ CharString attributes;
+ CharStringByteSink sink(&attributes);
+ UErrorCode localErrorCode = U_ZERO_ERROR;
+ extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode);
+ if (U_FAILURE(localErrorCode)) {
+ CharString new_attributes(value_str.data(), status_);
+ // No attributes, set the attribute.
+ extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
+ return *this;
+ }
+
+ transform(attributes.data(),attributes.length());
+ const char* start = attributes.data();
+ const char* limit = attributes.data() + attributes.length();
+ CharString new_attributes;
+ bool inserted = false;
+ while (start < limit) {
+ if (!inserted) {
+ int cmp = uprv_strcmp(start, value_str.data());
+ if (cmp == 0) { return *this; } // Found it in attributes: Just return
+ if (cmp > 0) {
+ if (!new_attributes.isEmpty()) new_attributes.append('_', status_);
+ new_attributes.append(value_str.data(), status_);
+ inserted = true;
+ }
+ }
+ if (!new_attributes.isEmpty()) {
+ new_attributes.append('_', status_);
+ }
+ new_attributes.append(start, status_);
+ start += uprv_strlen(start) + 1;
+ }
+ if (!inserted) {
+ if (!new_attributes.isEmpty()) {
+ new_attributes.append('_', status_);
+ }
+ new_attributes.append(value_str.data(), status_);
+ }
+ // Not yet in the attributes, set the attribute.
+ extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
+ return *this;
+}
+
+LocaleBuilder& LocaleBuilder::removeUnicodeLocaleAttribute(
+ StringPiece value)
+{
+ CharString value_str(value, status_);
+ if (U_FAILURE(status_)) { return *this; }
+ transform(value_str.data(), value_str.length());
+ if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
+ status_ = U_ILLEGAL_ARGUMENT_ERROR;
+ return *this;
+ }
+ if (extensions_ == nullptr) { return *this; }
+ UErrorCode localErrorCode = U_ZERO_ERROR;
+ CharString attributes;
+ CharStringByteSink sink(&attributes);
+ extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode);
+ // get failure, just return
+ if (U_FAILURE(localErrorCode)) { return *this; }
+ // Do not have any attributes, just return.
+ if (attributes.isEmpty()) { return *this; }
+
+ char* p = attributes.data();
+ // Replace null terminiator in place for _ and - so later
+ // we can use uprv_strcmp to compare.
+ for (int32_t i = 0; i < attributes.length(); i++, p++) {
+ *p = (*p == '_' || *p == '-') ? '\0' : uprv_tolower(*p);
+ }
+
+ const char* start = attributes.data();
+ const char* limit = attributes.data() + attributes.length();
+ CharString new_attributes;
+ bool found = false;
+ while (start < limit) {
+ if (uprv_strcmp(start, value_str.data()) == 0) {
+ found = true;
+ } else {
+ if (!new_attributes.isEmpty()) {
+ new_attributes.append('_', status_);
+ }
+ new_attributes.append(start, status_);
+ }
+ start += uprv_strlen(start) + 1;
+ }
+ // Found the value in attributes, set the attribute.
+ if (found) {
+ extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
+ }
+ return *this;
+}
+
+LocaleBuilder& LocaleBuilder::clear()
+{
+ status_ = U_ZERO_ERROR;
+ language_[0] = 0;
+ script_[0] = 0;
+ region_[0] = 0;
+ delete variant_;
+ variant_ = nullptr;
+ clearExtensions();
+ return *this;
+}
+
+LocaleBuilder& LocaleBuilder::clearExtensions()
+{
+ delete extensions_;
+ extensions_ = nullptr;
+ return *this;
+}
+
+Locale makeBogusLocale() {
+ Locale bogus;
+ bogus.setToBogus();
+ return bogus;
+}
+
+void LocaleBuilder::copyExtensionsFrom(const Locale& src, UErrorCode& errorCode)
+{
+ if (U_FAILURE(errorCode)) { return; }
+ LocalPointer<icu::StringEnumeration> keywords(src.createKeywords(errorCode));
+ if (U_FAILURE(errorCode) || keywords.isNull() || keywords->count(errorCode) == 0) {
+ // Error, or no extensions to copy.
+ return;
+ }
+ if (extensions_ == nullptr) {
+ extensions_ = new Locale();
+ if (extensions_ == nullptr) {
+ status_ = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ }
+ _copyExtensions(src, keywords.getAlias(), *extensions_, false, errorCode);
+}
+
+Locale LocaleBuilder::build(UErrorCode& errorCode)
+{
+ if (U_FAILURE(errorCode)) {
+ return makeBogusLocale();
+ }
+ if (U_FAILURE(status_)) {
+ errorCode = status_;
+ return makeBogusLocale();
+ }
+ CharString locale_str(language_, errorCode);
+ if (uprv_strlen(script_) > 0) {
+ locale_str.append('-', errorCode).append(StringPiece(script_), errorCode);
+ }
+ if (uprv_strlen(region_) > 0) {
+ locale_str.append('-', errorCode).append(StringPiece(region_), errorCode);
+ }
+ if (variant_ != nullptr) {
+ locale_str.append('-', errorCode).append(StringPiece(variant_->data()), errorCode);
+ }
+ if (U_FAILURE(errorCode)) {
+ return makeBogusLocale();
+ }
+ Locale product(locale_str.data());
+ if (extensions_ != nullptr) {
+ _copyExtensions(*extensions_, nullptr, product, true, errorCode);
+ }
+ if (U_FAILURE(errorCode)) {
+ return makeBogusLocale();
+ }
+ return product;
+}
+
+UBool LocaleBuilder::copyErrorTo(UErrorCode &outErrorCode) const {
+ if (U_FAILURE(outErrorCode)) {
+ // Do not overwrite the older error code
+ return TRUE;
+ }
+ outErrorCode = status_;
+ return U_FAILURE(outErrorCode);
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/localematcher.cpp b/thirdparty/icu4c/common/localematcher.cpp
new file mode 100644
index 0000000000..5795cbf87e
--- /dev/null
+++ b/thirdparty/icu4c/common/localematcher.cpp
@@ -0,0 +1,846 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// localematcher.cpp
+// created: 2019may08 Markus W. Scherer
+
+#include "unicode/utypes.h"
+#include "unicode/localebuilder.h"
+#include "unicode/localematcher.h"
+#include "unicode/locid.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uloc.h"
+#include "unicode/uobject.h"
+#include "cstring.h"
+#include "localeprioritylist.h"
+#include "loclikelysubtags.h"
+#include "locdistance.h"
+#include "lsr.h"
+#include "uassert.h"
+#include "uhash.h"
+#include "ustr_imp.h"
+#include "uvector.h"
+
+#define UND_LSR LSR("und", "", "", LSR::EXPLICIT_LSR)
+
+/**
+ * Indicator for the lifetime of desired-locale objects passed into the LocaleMatcher.
+ *
+ * @draft ICU 65
+ */
+enum ULocMatchLifetime {
+ /**
+ * Locale objects are temporary.
+ * The matcher will make a copy of a locale that will be used beyond one function call.
+ *
+ * @draft ICU 65
+ */
+ ULOCMATCH_TEMPORARY_LOCALES,
+ /**
+ * Locale objects are stored at least as long as the matcher is used.
+ * The matcher will keep only a pointer to a locale that will be used beyond one function call,
+ * avoiding a copy.
+ *
+ * @draft ICU 65
+ */
+ ULOCMATCH_STORED_LOCALES // TODO: permanent? cached? clone?
+};
+#ifndef U_IN_DOXYGEN
+typedef enum ULocMatchLifetime ULocMatchLifetime;
+#endif
+
+U_NAMESPACE_BEGIN
+
+LocaleMatcher::Result::Result(LocaleMatcher::Result &&src) U_NOEXCEPT :
+ desiredLocale(src.desiredLocale),
+ supportedLocale(src.supportedLocale),
+ desiredIndex(src.desiredIndex),
+ supportedIndex(src.supportedIndex),
+ desiredIsOwned(src.desiredIsOwned) {
+ if (desiredIsOwned) {
+ src.desiredLocale = nullptr;
+ src.desiredIndex = -1;
+ src.desiredIsOwned = FALSE;
+ }
+}
+
+LocaleMatcher::Result::~Result() {
+ if (desiredIsOwned) {
+ delete desiredLocale;
+ }
+}
+
+LocaleMatcher::Result &LocaleMatcher::Result::operator=(LocaleMatcher::Result &&src) U_NOEXCEPT {
+ this->~Result();
+
+ desiredLocale = src.desiredLocale;
+ supportedLocale = src.supportedLocale;
+ desiredIndex = src.desiredIndex;
+ supportedIndex = src.supportedIndex;
+ desiredIsOwned = src.desiredIsOwned;
+
+ if (desiredIsOwned) {
+ src.desiredLocale = nullptr;
+ src.desiredIndex = -1;
+ src.desiredIsOwned = FALSE;
+ }
+ return *this;
+}
+
+Locale LocaleMatcher::Result::makeResolvedLocale(UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode) || supportedLocale == nullptr) {
+ return Locale::getRoot();
+ }
+ const Locale *bestDesired = getDesiredLocale();
+ if (bestDesired == nullptr || *supportedLocale == *bestDesired) {
+ return *supportedLocale;
+ }
+ LocaleBuilder b;
+ b.setLocale(*supportedLocale);
+
+ // Copy the region from bestDesired, if there is one.
+ const char *region = bestDesired->getCountry();
+ if (*region != 0) {
+ b.setRegion(region);
+ }
+
+ // Copy the variants from bestDesired, if there are any.
+ // Note that this will override any supportedLocale variants.
+ // For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster).
+ const char *variants = bestDesired->getVariant();
+ if (*variants != 0) {
+ b.setVariant(variants);
+ }
+
+ // Copy the extensions from bestDesired, if there are any.
+ // C++ note: The following note, copied from Java, may not be true,
+ // as long as C++ copies by legacy ICU keyword, not by extension singleton.
+ // Note that this will override any supportedLocale extensions.
+ // For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native"
+ // (replacing calendar).
+ b.copyExtensionsFrom(*bestDesired, errorCode);
+ return b.build(errorCode);
+}
+
+LocaleMatcher::Builder::Builder(LocaleMatcher::Builder &&src) U_NOEXCEPT :
+ errorCode_(src.errorCode_),
+ supportedLocales_(src.supportedLocales_),
+ thresholdDistance_(src.thresholdDistance_),
+ demotion_(src.demotion_),
+ defaultLocale_(src.defaultLocale_),
+ withDefault_(src.withDefault_),
+ favor_(src.favor_),
+ direction_(src.direction_) {
+ src.supportedLocales_ = nullptr;
+ src.defaultLocale_ = nullptr;
+}
+
+LocaleMatcher::Builder::~Builder() {
+ delete supportedLocales_;
+ delete defaultLocale_;
+ delete maxDistanceDesired_;
+ delete maxDistanceSupported_;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::operator=(LocaleMatcher::Builder &&src) U_NOEXCEPT {
+ this->~Builder();
+
+ errorCode_ = src.errorCode_;
+ supportedLocales_ = src.supportedLocales_;
+ thresholdDistance_ = src.thresholdDistance_;
+ demotion_ = src.demotion_;
+ defaultLocale_ = src.defaultLocale_;
+ withDefault_ = src.withDefault_,
+ favor_ = src.favor_;
+ direction_ = src.direction_;
+
+ src.supportedLocales_ = nullptr;
+ src.defaultLocale_ = nullptr;
+ return *this;
+}
+
+void LocaleMatcher::Builder::clearSupportedLocales() {
+ if (supportedLocales_ != nullptr) {
+ supportedLocales_->removeAllElements();
+ }
+}
+
+bool LocaleMatcher::Builder::ensureSupportedLocaleVector() {
+ if (U_FAILURE(errorCode_)) { return false; }
+ if (supportedLocales_ != nullptr) { return true; }
+ supportedLocales_ = new UVector(uprv_deleteUObject, nullptr, errorCode_);
+ if (U_FAILURE(errorCode_)) { return false; }
+ if (supportedLocales_ == nullptr) {
+ errorCode_ = U_MEMORY_ALLOCATION_ERROR;
+ return false;
+ }
+ return true;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocalesFromListString(
+ StringPiece locales) {
+ LocalePriorityList list(locales, errorCode_);
+ if (U_FAILURE(errorCode_)) { return *this; }
+ clearSupportedLocales();
+ if (!ensureSupportedLocaleVector()) { return *this; }
+ int32_t length = list.getLengthIncludingRemoved();
+ for (int32_t i = 0; i < length; ++i) {
+ Locale *locale = list.orphanLocaleAt(i);
+ if (locale == nullptr) { continue; }
+ supportedLocales_->addElement(locale, errorCode_);
+ if (U_FAILURE(errorCode_)) {
+ delete locale;
+ break;
+ }
+ }
+ return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocales(Locale::Iterator &locales) {
+ if (U_FAILURE(errorCode_)) { return *this; }
+ clearSupportedLocales();
+ if (!ensureSupportedLocaleVector()) { return *this; }
+ while (locales.hasNext()) {
+ const Locale &locale = locales.next();
+ Locale *clone = locale.clone();
+ if (clone == nullptr) {
+ errorCode_ = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ supportedLocales_->addElement(clone, errorCode_);
+ if (U_FAILURE(errorCode_)) {
+ delete clone;
+ break;
+ }
+ }
+ return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::addSupportedLocale(const Locale &locale) {
+ if (!ensureSupportedLocaleVector()) { return *this; }
+ Locale *clone = locale.clone();
+ if (clone == nullptr) {
+ errorCode_ = U_MEMORY_ALLOCATION_ERROR;
+ return *this;
+ }
+ supportedLocales_->addElement(clone, errorCode_);
+ if (U_FAILURE(errorCode_)) {
+ delete clone;
+ }
+ return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setNoDefaultLocale() {
+ if (U_FAILURE(errorCode_)) { return *this; }
+ delete defaultLocale_;
+ defaultLocale_ = nullptr;
+ withDefault_ = false;
+ return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setDefaultLocale(const Locale *defaultLocale) {
+ if (U_FAILURE(errorCode_)) { return *this; }
+ Locale *clone = nullptr;
+ if (defaultLocale != nullptr) {
+ clone = defaultLocale->clone();
+ if (clone == nullptr) {
+ errorCode_ = U_MEMORY_ALLOCATION_ERROR;
+ return *this;
+ }
+ }
+ delete defaultLocale_;
+ defaultLocale_ = clone;
+ withDefault_ = true;
+ return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setFavorSubtag(ULocMatchFavorSubtag subtag) {
+ if (U_FAILURE(errorCode_)) { return *this; }
+ favor_ = subtag;
+ return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setDemotionPerDesiredLocale(ULocMatchDemotion demotion) {
+ if (U_FAILURE(errorCode_)) { return *this; }
+ demotion_ = demotion;
+ return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setMaxDistance(const Locale &desired,
+ const Locale &supported) {
+ if (U_FAILURE(errorCode_)) { return *this; }
+ Locale *desiredClone = desired.clone();
+ Locale *supportedClone = supported.clone();
+ if (desiredClone == nullptr || supportedClone == nullptr) {
+ delete desiredClone; // in case only one could not be allocated
+ delete supportedClone;
+ errorCode_ = U_MEMORY_ALLOCATION_ERROR;
+ return *this;
+ }
+ delete maxDistanceDesired_;
+ delete maxDistanceSupported_;
+ maxDistanceDesired_ = desiredClone;
+ maxDistanceSupported_ = supportedClone;
+ return *this;
+}
+
+#if 0
+/**
+ * <i>Internal only!</i>
+ *
+ * @param thresholdDistance the thresholdDistance to set, with -1 = default
+ * @return this Builder object
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+@Deprecated
+LocaleMatcher::Builder &LocaleMatcher::Builder::internalSetThresholdDistance(int32_t thresholdDistance) {
+ if (U_FAILURE(errorCode_)) { return *this; }
+ if (thresholdDistance > 100) {
+ thresholdDistance = 100;
+ }
+ thresholdDistance_ = thresholdDistance;
+ return *this;
+}
+#endif
+
+UBool LocaleMatcher::Builder::copyErrorTo(UErrorCode &outErrorCode) const {
+ if (U_FAILURE(outErrorCode)) { return TRUE; }
+ if (U_SUCCESS(errorCode_)) { return FALSE; }
+ outErrorCode = errorCode_;
+ return TRUE;
+}
+
+LocaleMatcher LocaleMatcher::Builder::build(UErrorCode &errorCode) const {
+ if (U_SUCCESS(errorCode) && U_FAILURE(errorCode_)) {
+ errorCode = errorCode_;
+ }
+ return LocaleMatcher(*this, errorCode);
+}
+
+namespace {
+
+LSR getMaximalLsrOrUnd(const XLikelySubtags &likelySubtags, const Locale &locale,
+ UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode) || locale.isBogus() || *locale.getName() == 0 /* "und" */) {
+ return UND_LSR;
+ } else {
+ return likelySubtags.makeMaximizedLsrFrom(locale, errorCode);
+ }
+}
+
+int32_t hashLSR(const UHashTok token) {
+ const LSR *lsr = static_cast<const LSR *>(token.pointer);
+ return lsr->hashCode;
+}
+
+UBool compareLSRs(const UHashTok t1, const UHashTok t2) {
+ const LSR *lsr1 = static_cast<const LSR *>(t1.pointer);
+ const LSR *lsr2 = static_cast<const LSR *>(t2.pointer);
+ return *lsr1 == *lsr2;
+}
+
+} // namespace
+
+int32_t LocaleMatcher::putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength,
+ UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return suppLength; }
+ int32_t index = uhash_geti(supportedLsrToIndex, &lsr);
+ if (index == 0) {
+ uhash_puti(supportedLsrToIndex, const_cast<LSR *>(&lsr), i + 1, &errorCode);
+ if (U_SUCCESS(errorCode)) {
+ supportedLSRs[suppLength] = &lsr;
+ supportedIndexes[suppLength++] = i;
+ }
+ }
+ return suppLength;
+}
+
+LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
+ likelySubtags(*XLikelySubtags::getSingleton(errorCode)),
+ localeDistance(*LocaleDistance::getSingleton(errorCode)),
+ thresholdDistance(builder.thresholdDistance_),
+ demotionPerDesiredLocale(0),
+ favorSubtag(builder.favor_),
+ direction(builder.direction_),
+ supportedLocales(nullptr), lsrs(nullptr), supportedLocalesLength(0),
+ supportedLsrToIndex(nullptr),
+ supportedLSRs(nullptr), supportedIndexes(nullptr), supportedLSRsLength(0),
+ ownedDefaultLocale(nullptr), defaultLocale(nullptr) {
+ if (U_FAILURE(errorCode)) { return; }
+ const Locale *def = builder.defaultLocale_;
+ LSR builderDefaultLSR;
+ const LSR *defLSR = nullptr;
+ if (def != nullptr) {
+ ownedDefaultLocale = def->clone();
+ if (ownedDefaultLocale == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ def = ownedDefaultLocale;
+ builderDefaultLSR = getMaximalLsrOrUnd(likelySubtags, *def, errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ defLSR = &builderDefaultLSR;
+ }
+ supportedLocalesLength = builder.supportedLocales_ != nullptr ?
+ builder.supportedLocales_->size() : 0;
+ if (supportedLocalesLength > 0) {
+ // Store the supported locales in input order,
+ // so that when different types are used (e.g., language tag strings)
+ // we can return those by parallel index.
+ supportedLocales = static_cast<const Locale **>(
+ uprv_malloc(supportedLocalesLength * sizeof(const Locale *)));
+ // Supported LRSs in input order.
+ // In C++, we store these permanently to simplify ownership management
+ // in the hash tables. Duplicate LSRs (if any) are unused overhead.
+ lsrs = new LSR[supportedLocalesLength];
+ if (supportedLocales == nullptr || lsrs == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ // If the constructor fails partway, we need null pointers for destructibility.
+ uprv_memset(supportedLocales, 0, supportedLocalesLength * sizeof(const Locale *));
+ for (int32_t i = 0; i < supportedLocalesLength; ++i) {
+ const Locale &locale = *static_cast<Locale *>(builder.supportedLocales_->elementAt(i));
+ supportedLocales[i] = locale.clone();
+ if (supportedLocales[i] == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ const Locale &supportedLocale = *supportedLocales[i];
+ LSR &lsr = lsrs[i] = getMaximalLsrOrUnd(likelySubtags, supportedLocale, errorCode);
+ lsr.setHashCode();
+ if (U_FAILURE(errorCode)) { return; }
+ }
+
+ // We need an unordered map from LSR to first supported locale with that LSR,
+ // and an ordered list of (LSR, supported index) for
+ // the supported locales in the following order:
+ // 1. Default locale, if it is supported.
+ // 2. Priority locales (aka "paradigm locales") in builder order.
+ // 3. Remaining locales in builder order.
+ supportedLsrToIndex = uhash_openSize(hashLSR, compareLSRs, uhash_compareLong,
+ supportedLocalesLength, &errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ supportedLSRs = static_cast<const LSR **>(
+ uprv_malloc(supportedLocalesLength * sizeof(const LSR *)));
+ supportedIndexes = static_cast<int32_t *>(
+ uprv_malloc(supportedLocalesLength * sizeof(int32_t)));
+ if (supportedLSRs == nullptr || supportedIndexes == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ int32_t suppLength = 0;
+ // Determine insertion order.
+ // Add locales immediately that are equivalent to the default.
+ MaybeStackArray<int8_t, 100> order(supportedLocalesLength, errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ int32_t numParadigms = 0;
+ for (int32_t i = 0; i < supportedLocalesLength; ++i) {
+ const Locale &locale = *supportedLocales[i];
+ const LSR &lsr = lsrs[i];
+ if (defLSR == nullptr && builder.withDefault_) {
+ // Implicit default locale = first supported locale, if not turned off.
+ U_ASSERT(i == 0);
+ def = &locale;
+ defLSR = &lsr;
+ order[i] = 1;
+ suppLength = putIfAbsent(lsr, 0, suppLength, errorCode);
+ } else if (defLSR != nullptr && lsr.isEquivalentTo(*defLSR)) {
+ order[i] = 1;
+ suppLength = putIfAbsent(lsr, i, suppLength, errorCode);
+ } else if (localeDistance.isParadigmLSR(lsr)) {
+ order[i] = 2;
+ ++numParadigms;
+ } else {
+ order[i] = 3;
+ }
+ if (U_FAILURE(errorCode)) { return; }
+ }
+ // Add supported paradigm locales.
+ int32_t paradigmLimit = suppLength + numParadigms;
+ for (int32_t i = 0; i < supportedLocalesLength && suppLength < paradigmLimit; ++i) {
+ if (order[i] == 2) {
+ suppLength = putIfAbsent(lsrs[i], i, suppLength, errorCode);
+ }
+ }
+ // Add remaining supported locales.
+ for (int32_t i = 0; i < supportedLocalesLength; ++i) {
+ if (order[i] == 3) {
+ suppLength = putIfAbsent(lsrs[i], i, suppLength, errorCode);
+ }
+ }
+ supportedLSRsLength = suppLength;
+ // If supportedLSRsLength < supportedLocalesLength then
+ // we waste as many array slots as there are duplicate supported LSRs,
+ // but the amount of wasted space is small as long as there are few duplicates.
+ }
+
+ defaultLocale = def;
+
+ if (builder.demotion_ == ULOCMATCH_DEMOTION_REGION) {
+ demotionPerDesiredLocale = localeDistance.getDefaultDemotionPerDesiredLocale();
+ }
+
+ if (thresholdDistance >= 0) {
+ // already copied
+ } else if (builder.maxDistanceDesired_ != nullptr) {
+ LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, *builder.maxDistanceSupported_, errorCode);
+ const LSR *pSuppLSR = &suppLSR;
+ int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
+ getMaximalLsrOrUnd(likelySubtags, *builder.maxDistanceDesired_, errorCode),
+ &pSuppLSR, 1,
+ LocaleDistance::shiftDistance(100), favorSubtag, direction);
+ if (U_SUCCESS(errorCode)) {
+ // +1 for an exclusive threshold from an inclusive max.
+ thresholdDistance = LocaleDistance::getDistanceFloor(indexAndDistance) + 1;
+ } else {
+ thresholdDistance = 0;
+ }
+ } else {
+ thresholdDistance = localeDistance.getDefaultScriptDistance();
+ }
+}
+
+LocaleMatcher::LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT :
+ likelySubtags(src.likelySubtags),
+ localeDistance(src.localeDistance),
+ thresholdDistance(src.thresholdDistance),
+ demotionPerDesiredLocale(src.demotionPerDesiredLocale),
+ favorSubtag(src.favorSubtag),
+ direction(src.direction),
+ supportedLocales(src.supportedLocales), lsrs(src.lsrs),
+ supportedLocalesLength(src.supportedLocalesLength),
+ supportedLsrToIndex(src.supportedLsrToIndex),
+ supportedLSRs(src.supportedLSRs),
+ supportedIndexes(src.supportedIndexes),
+ supportedLSRsLength(src.supportedLSRsLength),
+ ownedDefaultLocale(src.ownedDefaultLocale), defaultLocale(src.defaultLocale) {
+ src.supportedLocales = nullptr;
+ src.lsrs = nullptr;
+ src.supportedLocalesLength = 0;
+ src.supportedLsrToIndex = nullptr;
+ src.supportedLSRs = nullptr;
+ src.supportedIndexes = nullptr;
+ src.supportedLSRsLength = 0;
+ src.ownedDefaultLocale = nullptr;
+ src.defaultLocale = nullptr;
+}
+
+LocaleMatcher::~LocaleMatcher() {
+ for (int32_t i = 0; i < supportedLocalesLength; ++i) {
+ delete supportedLocales[i];
+ }
+ uprv_free(supportedLocales);
+ delete[] lsrs;
+ uhash_close(supportedLsrToIndex);
+ uprv_free(supportedLSRs);
+ uprv_free(supportedIndexes);
+ delete ownedDefaultLocale;
+}
+
+LocaleMatcher &LocaleMatcher::operator=(LocaleMatcher &&src) U_NOEXCEPT {
+ this->~LocaleMatcher();
+
+ thresholdDistance = src.thresholdDistance;
+ demotionPerDesiredLocale = src.demotionPerDesiredLocale;
+ favorSubtag = src.favorSubtag;
+ direction = src.direction;
+ supportedLocales = src.supportedLocales;
+ lsrs = src.lsrs;
+ supportedLocalesLength = src.supportedLocalesLength;
+ supportedLsrToIndex = src.supportedLsrToIndex;
+ supportedLSRs = src.supportedLSRs;
+ supportedIndexes = src.supportedIndexes;
+ supportedLSRsLength = src.supportedLSRsLength;
+ ownedDefaultLocale = src.ownedDefaultLocale;
+ defaultLocale = src.defaultLocale;
+
+ src.supportedLocales = nullptr;
+ src.lsrs = nullptr;
+ src.supportedLocalesLength = 0;
+ src.supportedLsrToIndex = nullptr;
+ src.supportedLSRs = nullptr;
+ src.supportedIndexes = nullptr;
+ src.supportedLSRsLength = 0;
+ src.ownedDefaultLocale = nullptr;
+ src.defaultLocale = nullptr;
+ return *this;
+}
+
+class LocaleLsrIterator {
+public:
+ LocaleLsrIterator(const XLikelySubtags &likelySubtags, Locale::Iterator &locales,
+ ULocMatchLifetime lifetime) :
+ likelySubtags(likelySubtags), locales(locales), lifetime(lifetime) {}
+
+ ~LocaleLsrIterator() {
+ if (lifetime == ULOCMATCH_TEMPORARY_LOCALES) {
+ delete remembered;
+ }
+ }
+
+ bool hasNext() const {
+ return locales.hasNext();
+ }
+
+ LSR next(UErrorCode &errorCode) {
+ current = &locales.next();
+ return getMaximalLsrOrUnd(likelySubtags, *current, errorCode);
+ }
+
+ void rememberCurrent(int32_t desiredIndex, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return; }
+ bestDesiredIndex = desiredIndex;
+ if (lifetime == ULOCMATCH_STORED_LOCALES) {
+ remembered = current;
+ } else {
+ // ULOCMATCH_TEMPORARY_LOCALES
+ delete remembered;
+ remembered = new Locale(*current);
+ if (remembered == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ }
+ }
+ }
+
+ const Locale *orphanRemembered() {
+ const Locale *rem = remembered;
+ remembered = nullptr;
+ return rem;
+ }
+
+ int32_t getBestDesiredIndex() const {
+ return bestDesiredIndex;
+ }
+
+private:
+ const XLikelySubtags &likelySubtags;
+ Locale::Iterator &locales;
+ ULocMatchLifetime lifetime;
+ const Locale *current = nullptr, *remembered = nullptr;
+ int32_t bestDesiredIndex = -1;
+};
+
+const Locale *LocaleMatcher::getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode)) { return nullptr; }
+ int32_t suppIndex = getBestSuppIndex(
+ getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
+ nullptr, errorCode);
+ return U_SUCCESS(errorCode) && suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
+}
+
+const Locale *LocaleMatcher::getBestMatch(Locale::Iterator &desiredLocales,
+ UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode)) { return nullptr; }
+ if (!desiredLocales.hasNext()) {
+ return defaultLocale;
+ }
+ LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
+ int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
+ return U_SUCCESS(errorCode) && suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
+}
+
+const Locale *LocaleMatcher::getBestMatchForListString(
+ StringPiece desiredLocaleList, UErrorCode &errorCode) const {
+ LocalePriorityList list(desiredLocaleList, errorCode);
+ LocalePriorityList::Iterator iter = list.iterator();
+ return getBestMatch(iter, errorCode);
+}
+
+LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
+ const Locale &desiredLocale, UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode)) {
+ return Result(nullptr, defaultLocale, -1, -1, FALSE);
+ }
+ int32_t suppIndex = getBestSuppIndex(
+ getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
+ nullptr, errorCode);
+ if (U_FAILURE(errorCode) || suppIndex < 0) {
+ return Result(nullptr, defaultLocale, -1, -1, FALSE);
+ } else {
+ return Result(&desiredLocale, supportedLocales[suppIndex], 0, suppIndex, FALSE);
+ }
+}
+
+LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
+ Locale::Iterator &desiredLocales, UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode) || !desiredLocales.hasNext()) {
+ return Result(nullptr, defaultLocale, -1, -1, FALSE);
+ }
+ LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
+ int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
+ if (U_FAILURE(errorCode) || suppIndex < 0) {
+ return Result(nullptr, defaultLocale, -1, -1, FALSE);
+ } else {
+ return Result(lsrIter.orphanRemembered(), supportedLocales[suppIndex],
+ lsrIter.getBestDesiredIndex(), suppIndex, TRUE);
+ }
+}
+
+int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter,
+ UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode)) { return -1; }
+ int32_t desiredIndex = 0;
+ int32_t bestSupportedLsrIndex = -1;
+ for (int32_t bestShiftedDistance = LocaleDistance::shiftDistance(thresholdDistance);;) {
+ // Quick check for exact maximized LSR.
+ // Returns suppIndex+1 where 0 means not found.
+ if (supportedLsrToIndex != nullptr) {
+ desiredLSR.setHashCode();
+ int32_t index = uhash_geti(supportedLsrToIndex, &desiredLSR);
+ if (index != 0) {
+ int32_t suppIndex = index - 1;
+ if (remainingIter != nullptr) {
+ remainingIter->rememberCurrent(desiredIndex, errorCode);
+ }
+ return suppIndex;
+ }
+ }
+ int32_t bestIndexAndDistance = localeDistance.getBestIndexAndDistance(
+ desiredLSR, supportedLSRs, supportedLSRsLength,
+ bestShiftedDistance, favorSubtag, direction);
+ if (bestIndexAndDistance >= 0) {
+ bestShiftedDistance = LocaleDistance::getShiftedDistance(bestIndexAndDistance);
+ if (remainingIter != nullptr) {
+ remainingIter->rememberCurrent(desiredIndex, errorCode);
+ if (U_FAILURE(errorCode)) { return -1; }
+ }
+ bestSupportedLsrIndex = LocaleDistance::getIndex(bestIndexAndDistance);
+ }
+ if ((bestShiftedDistance -= LocaleDistance::shiftDistance(demotionPerDesiredLocale)) <= 0) {
+ break;
+ }
+ if (remainingIter == nullptr || !remainingIter->hasNext()) {
+ break;
+ }
+ desiredLSR = remainingIter->next(errorCode);
+ if (U_FAILURE(errorCode)) { return -1; }
+ ++desiredIndex;
+ }
+ if (bestSupportedLsrIndex < 0) {
+ // no good match
+ return -1;
+ }
+ return supportedIndexes[bestSupportedLsrIndex];
+}
+
+UBool LocaleMatcher::isMatch(const Locale &desired, const Locale &supported,
+ UErrorCode &errorCode) const {
+ LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
+ if (U_FAILURE(errorCode)) { return 0; }
+ const LSR *pSuppLSR = &suppLSR;
+ int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
+ getMaximalLsrOrUnd(likelySubtags, desired, errorCode),
+ &pSuppLSR, 1,
+ LocaleDistance::shiftDistance(thresholdDistance), favorSubtag, direction);
+ return indexAndDistance >= 0;
+}
+
+double LocaleMatcher::internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const {
+ // Returns the inverse of the distance: That is, 1-distance(desired, supported).
+ LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
+ if (U_FAILURE(errorCode)) { return 0; }
+ const LSR *pSuppLSR = &suppLSR;
+ int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
+ getMaximalLsrOrUnd(likelySubtags, desired, errorCode),
+ &pSuppLSR, 1,
+ LocaleDistance::shiftDistance(thresholdDistance), favorSubtag, direction);
+ double distance = LocaleDistance::getDistanceDouble(indexAndDistance);
+ return (100.0 - distance) / 100.0;
+}
+
+U_NAMESPACE_END
+
+// uloc_acceptLanguage() --------------------------------------------------- ***
+
+U_NAMESPACE_USE
+
+namespace {
+
+class LocaleFromTag {
+public:
+ LocaleFromTag() : locale(Locale::getRoot()) {}
+ const Locale &operator()(const char *tag) { return locale = Locale(tag); }
+
+private:
+ // Store the locale in the converter, rather than return a reference to a temporary,
+ // or a value which could go out of scope with the caller's reference to it.
+ Locale locale;
+};
+
+int32_t acceptLanguage(UEnumeration &supportedLocales, Locale::Iterator &desiredLocales,
+ char *dest, int32_t capacity, UAcceptResult *acceptResult,
+ UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return 0; }
+ LocaleMatcher::Builder builder;
+ const char *locString;
+ while ((locString = uenum_next(&supportedLocales, nullptr, &errorCode)) != nullptr) {
+ Locale loc(locString);
+ if (loc.isBogus()) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ builder.addSupportedLocale(loc);
+ }
+ LocaleMatcher matcher = builder.build(errorCode);
+ LocaleMatcher::Result result = matcher.getBestMatchResult(desiredLocales, errorCode);
+ if (U_FAILURE(errorCode)) { return 0; }
+ if (result.getDesiredIndex() >= 0) {
+ if (acceptResult != nullptr) {
+ *acceptResult = *result.getDesiredLocale() == *result.getSupportedLocale() ?
+ ULOC_ACCEPT_VALID : ULOC_ACCEPT_FALLBACK;
+ }
+ const char *bestStr = result.getSupportedLocale()->getName();
+ int32_t bestLength = (int32_t)uprv_strlen(bestStr);
+ if (bestLength <= capacity) {
+ uprv_memcpy(dest, bestStr, bestLength);
+ }
+ return u_terminateChars(dest, capacity, bestLength, &errorCode);
+ } else {
+ if (acceptResult != nullptr) {
+ *acceptResult = ULOC_ACCEPT_FAILED;
+ }
+ return u_terminateChars(dest, capacity, 0, &errorCode);
+ }
+}
+
+} // namespace
+
+U_CAPI int32_t U_EXPORT2
+uloc_acceptLanguage(char *result, int32_t resultAvailable,
+ UAcceptResult *outResult,
+ const char **acceptList, int32_t acceptListCount,
+ UEnumeration *availableLocales,
+ UErrorCode *status) {
+ if (U_FAILURE(*status)) { return 0; }
+ if ((result == nullptr ? resultAvailable != 0 : resultAvailable < 0) ||
+ (acceptList == nullptr ? acceptListCount != 0 : acceptListCount < 0) ||
+ availableLocales == nullptr) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ LocaleFromTag converter;
+ Locale::ConvertingIterator<const char **, LocaleFromTag> desiredLocales(
+ acceptList, acceptList + acceptListCount, converter);
+ return acceptLanguage(*availableLocales, desiredLocales,
+ result, resultAvailable, outResult, *status);
+}
+
+U_CAPI int32_t U_EXPORT2
+uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable,
+ UAcceptResult *outResult,
+ const char *httpAcceptLanguage,
+ UEnumeration *availableLocales,
+ UErrorCode *status) {
+ if (U_FAILURE(*status)) { return 0; }
+ if ((result == nullptr ? resultAvailable != 0 : resultAvailable < 0) ||
+ httpAcceptLanguage == nullptr || availableLocales == nullptr) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ LocalePriorityList list(httpAcceptLanguage, *status);
+ LocalePriorityList::Iterator desiredLocales = list.iterator();
+ return acceptLanguage(*availableLocales, desiredLocales,
+ result, resultAvailable, outResult, *status);
+}
diff --git a/thirdparty/icu4c/common/localeprioritylist.cpp b/thirdparty/icu4c/common/localeprioritylist.cpp
new file mode 100644
index 0000000000..8916b121be
--- /dev/null
+++ b/thirdparty/icu4c/common/localeprioritylist.cpp
@@ -0,0 +1,239 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// localeprioritylist.cpp
+// created: 2019jul11 Markus W. Scherer
+
+#include "unicode/utypes.h"
+#include "unicode/localpointer.h"
+#include "unicode/locid.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "localeprioritylist.h"
+#include "uarrsort.h"
+#include "uassert.h"
+#include "uhash.h"
+
+U_NAMESPACE_BEGIN
+
+namespace {
+
+int32_t hashLocale(const UHashTok token) {
+ auto *locale = static_cast<const Locale *>(token.pointer);
+ return locale->hashCode();
+}
+
+UBool compareLocales(const UHashTok t1, const UHashTok t2) {
+ auto *l1 = static_cast<const Locale *>(t1.pointer);
+ auto *l2 = static_cast<const Locale *>(t2.pointer);
+ return *l1 == *l2;
+}
+
+constexpr int32_t WEIGHT_ONE = 1000;
+
+struct LocaleAndWeight {
+ Locale *locale;
+ int32_t weight; // 0..1000 = 0.0..1.0
+ int32_t index; // force stable sort
+
+ int32_t compare(const LocaleAndWeight &other) const {
+ int32_t diff = other.weight - weight; // descending: other-this
+ if (diff != 0) { return diff; }
+ return index - other.index;
+ }
+};
+
+int32_t U_CALLCONV
+compareLocaleAndWeight(const void * /*context*/, const void *left, const void *right) {
+ return static_cast<const LocaleAndWeight *>(left)->
+ compare(*static_cast<const LocaleAndWeight *>(right));
+}
+
+const char *skipSpaces(const char *p, const char *limit) {
+ while (p < limit && *p == ' ') { ++p; }
+ return p;
+}
+
+int32_t findTagLength(const char *p, const char *limit) {
+ // Look for accept-language delimiters.
+ // Leave other validation up to the Locale constructor.
+ const char *q;
+ for (q = p; q < limit; ++q) {
+ char c = *q;
+ if (c == ' ' || c == ',' || c == ';') { break; }
+ }
+ return static_cast<int32_t>(q - p);
+}
+
+/**
+ * Parses and returns a qvalue weight in millis.
+ * Advances p to after the parsed substring.
+ * Returns a negative value if parsing fails.
+ */
+int32_t parseWeight(const char *&p, const char *limit) {
+ p = skipSpaces(p, limit);
+ char c;
+ if (p == limit || ((c = *p) != '0' && c != '1')) { return -1; }
+ int32_t weight = (c - '0') * 1000;
+ if (++p == limit || *p != '.') { return weight; }
+ int32_t multiplier = 100;
+ while (++p != limit && '0' <= (c = *p) && c <= '9') {
+ c -= '0';
+ if (multiplier > 0) {
+ weight += c * multiplier;
+ multiplier /= 10;
+ } else if (multiplier == 0) {
+ // round up
+ if (c >= 5) { ++weight; }
+ multiplier = -1;
+ } // else ignore further fraction digits
+ }
+ return weight <= WEIGHT_ONE ? weight : -1; // bad if > 1.0
+}
+
+} // namespace
+
+/**
+ * Nothing but a wrapper over a MaybeStackArray of LocaleAndWeight.
+ *
+ * This wrapper exists (and is not in an anonymous namespace)
+ * so that we can forward-declare it in the header file and
+ * don't have to expose the MaybeStackArray specialization and
+ * the LocaleAndWeight to code (like the test) that #includes localeprioritylist.h.
+ * Also, otherwise we would have to do a platform-specific
+ * template export declaration of some kind for the MaybeStackArray specialization
+ * to be properly exported from the common DLL.
+ */
+struct LocaleAndWeightArray : public UMemory {
+ MaybeStackArray<LocaleAndWeight, 20> array;
+};
+
+LocalePriorityList::LocalePriorityList(StringPiece s, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return; }
+ list = new LocaleAndWeightArray();
+ if (list == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ const char *p = s.data();
+ const char *limit = p + s.length();
+ while ((p = skipSpaces(p, limit)) != limit) {
+ if (*p == ',') { // empty range field
+ ++p;
+ continue;
+ }
+ int32_t tagLength = findTagLength(p, limit);
+ if (tagLength == 0) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ CharString tag(p, tagLength, errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ Locale locale = Locale(tag.data());
+ if (locale.isBogus()) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ int32_t weight = WEIGHT_ONE;
+ if ((p = skipSpaces(p + tagLength, limit)) != limit && *p == ';') {
+ if ((p = skipSpaces(p + 1, limit)) == limit || *p != 'q' ||
+ (p = skipSpaces(p + 1, limit)) == limit || *p != '=' ||
+ (++p, (weight = parseWeight(p, limit)) < 0)) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ p = skipSpaces(p, limit);
+ }
+ if (p != limit && *p != ',') { // trailing junk
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ add(locale, weight, errorCode);
+ if (p == limit) { break; }
+ ++p;
+ }
+ sort(errorCode);
+}
+
+LocalePriorityList::~LocalePriorityList() {
+ if (list != nullptr) {
+ for (int32_t i = 0; i < listLength; ++i) {
+ delete list->array[i].locale;
+ }
+ delete list;
+ }
+ uhash_close(map);
+}
+
+const Locale *LocalePriorityList::localeAt(int32_t i) const {
+ return list->array[i].locale;
+}
+
+Locale *LocalePriorityList::orphanLocaleAt(int32_t i) {
+ if (list == nullptr) { return nullptr; }
+ LocaleAndWeight &lw = list->array[i];
+ Locale *l = lw.locale;
+ lw.locale = nullptr;
+ return l;
+}
+
+bool LocalePriorityList::add(const Locale &locale, int32_t weight, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return false; }
+ if (map == nullptr) {
+ if (weight <= 0) { return true; } // do not add q=0
+ map = uhash_open(hashLocale, compareLocales, uhash_compareLong, &errorCode);
+ if (U_FAILURE(errorCode)) { return false; }
+ }
+ LocalPointer<Locale> clone;
+ int32_t index = uhash_geti(map, &locale);
+ if (index != 0) {
+ // Duplicate: Remove the old item and append it anew.
+ LocaleAndWeight &lw = list->array[index - 1];
+ clone.adoptInstead(lw.locale);
+ lw.locale = nullptr;
+ lw.weight = 0;
+ ++numRemoved;
+ }
+ if (weight <= 0) { // do not add q=0
+ if (index != 0) {
+ // Not strictly necessary but cleaner.
+ uhash_removei(map, &locale);
+ }
+ return true;
+ }
+ if (clone.isNull()) {
+ clone.adoptInstead(locale.clone());
+ if (clone.isNull() || (clone->isBogus() && !locale.isBogus())) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return false;
+ }
+ }
+ if (listLength == list->array.getCapacity()) {
+ int32_t newCapacity = listLength < 50 ? 100 : 4 * listLength;
+ if (list->array.resize(newCapacity, listLength) == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return false;
+ }
+ }
+ uhash_puti(map, clone.getAlias(), listLength + 1, &errorCode);
+ if (U_FAILURE(errorCode)) { return false; }
+ LocaleAndWeight &lw = list->array[listLength];
+ lw.locale = clone.orphan();
+ lw.weight = weight;
+ lw.index = listLength++;
+ if (weight < WEIGHT_ONE) { hasWeights = true; }
+ U_ASSERT(uhash_count(map) == getLength());
+ return true;
+}
+
+void LocalePriorityList::sort(UErrorCode &errorCode) {
+ // Sort by descending weights if there is a mix of weights.
+ // The comparator forces a stable sort via the item index.
+ if (U_FAILURE(errorCode) || getLength() <= 1 || !hasWeights) { return; }
+ uprv_sortArray(list->array.getAlias(), listLength, sizeof(LocaleAndWeight),
+ compareLocaleAndWeight, nullptr, FALSE, &errorCode);
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/localeprioritylist.h b/thirdparty/icu4c/common/localeprioritylist.h
new file mode 100644
index 0000000000..41e9d3ea08
--- /dev/null
+++ b/thirdparty/icu4c/common/localeprioritylist.h
@@ -0,0 +1,115 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// localeprioritylist.h
+// created: 2019jul11 Markus W. Scherer
+
+#ifndef __LOCALEPRIORITYLIST_H__
+#define __LOCALEPRIORITYLIST_H__
+
+#include "unicode/utypes.h"
+#include "unicode/locid.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+
+struct UHashtable;
+
+U_NAMESPACE_BEGIN
+
+struct LocaleAndWeightArray;
+
+/**
+ * Parses a list of locales from an accept-language string.
+ * We are a bit more lenient than the spec:
+ * We accept extra whitespace in more places, empty range fields,
+ * and any number of qvalue fraction digits.
+ *
+ * https://tools.ietf.org/html/rfc2616#section-14.4
+ * 14.4 Accept-Language
+ *
+ * Accept-Language = "Accept-Language" ":"
+ * 1#( language-range [ ";" "q" "=" qvalue ] )
+ * language-range = ( ( 1*8ALPHA *( "-" 1*8ALPHA ) ) | "*" )
+ *
+ * Each language-range MAY be given an associated quality value which
+ * represents an estimate of the user's preference for the languages
+ * specified by that range. The quality value defaults to "q=1". For
+ * example,
+ *
+ * Accept-Language: da, en-gb;q=0.8, en;q=0.7
+ *
+ * https://tools.ietf.org/html/rfc2616#section-3.9
+ * 3.9 Quality Values
+ *
+ * HTTP content negotiation (section 12) uses short "floating point"
+ * numbers to indicate the relative importance ("weight") of various
+ * negotiable parameters. A weight is normalized to a real number in
+ * the range 0 through 1, where 0 is the minimum and 1 the maximum
+ * value. If a parameter has a quality value of 0, then content with
+ * this parameter is `not acceptable' for the client. HTTP/1.1
+ * applications MUST NOT generate more than three digits after the
+ * decimal point. User configuration of these values SHOULD also be
+ * limited in this fashion.
+ *
+ * qvalue = ( "0" [ "." 0*3DIGIT ] )
+ * | ( "1" [ "." 0*3("0") ] )
+ */
+class U_COMMON_API LocalePriorityList : public UMemory {
+public:
+ class Iterator : public Locale::Iterator {
+ public:
+ UBool hasNext() const override { return count < length; }
+
+ const Locale &next() override {
+ for(;;) {
+ const Locale *locale = list.localeAt(index++);
+ if (locale != nullptr) {
+ ++count;
+ return *locale;
+ }
+ }
+ }
+
+ private:
+ friend class LocalePriorityList;
+
+ Iterator(const LocalePriorityList &list) : list(list), length(list.getLength()) {}
+
+ const LocalePriorityList &list;
+ int32_t index = 0;
+ int32_t count = 0;
+ const int32_t length;
+ };
+
+ LocalePriorityList(StringPiece s, UErrorCode &errorCode);
+
+ ~LocalePriorityList();
+
+ int32_t getLength() const { return listLength - numRemoved; }
+
+ int32_t getLengthIncludingRemoved() const { return listLength; }
+
+ Iterator iterator() const { return Iterator(*this); }
+
+ const Locale *localeAt(int32_t i) const;
+
+ Locale *orphanLocaleAt(int32_t i);
+
+private:
+ LocalePriorityList(const LocalePriorityList &) = delete;
+ LocalePriorityList &operator=(const LocalePriorityList &) = delete;
+
+ bool add(const Locale &locale, int32_t weight, UErrorCode &errorCode);
+
+ void sort(UErrorCode &errorCode);
+
+ LocaleAndWeightArray *list = nullptr;
+ int32_t listLength = 0;
+ int32_t numRemoved = 0;
+ bool hasWeights = false; // other than 1.0
+ UHashtable *map = nullptr;
+};
+
+U_NAMESPACE_END
+
+#endif // __LOCALEPRIORITYLIST_H__
diff --git a/thirdparty/icu4c/common/localsvc.h b/thirdparty/icu4c/common/localsvc.h
new file mode 100644
index 0000000000..3364019513
--- /dev/null
+++ b/thirdparty/icu4c/common/localsvc.h
@@ -0,0 +1,27 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+***************************************************************************
+* Copyright (C) 2006 International Business Machines Corporation *
+* and others. All rights reserved. *
+***************************************************************************
+*/
+
+#ifndef LOCALSVC_H
+#define LOCALSVC_H
+
+#include "unicode/utypes.h"
+
+#if defined(U_LOCAL_SERVICE_HOOK) && U_LOCAL_SERVICE_HOOK
+/**
+ * Prototype for user-supplied service hook. This function is expected to return
+ * a type of factory object specific to the requested service.
+ *
+ * @param what service-specific string identifying the specific user hook
+ * @param status error status
+ * @return a service-specific hook, or NULL on failure.
+ */
+U_CAPI void* uprv_svc_hook(const char *what, UErrorCode *status);
+#endif
+
+#endif
diff --git a/thirdparty/icu4c/common/locavailable.cpp b/thirdparty/icu4c/common/locavailable.cpp
new file mode 100644
index 0000000000..e8ec512e37
--- /dev/null
+++ b/thirdparty/icu4c/common/locavailable.cpp
@@ -0,0 +1,270 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1997-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: locavailable.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010feb25
+* created by: Markus W. Scherer
+*
+* Code for available locales, separated out from other .cpp files
+* that then do not depend on resource bundle code and res_index bundles.
+*/
+
+#include "unicode/errorcode.h"
+#include "unicode/utypes.h"
+#include "unicode/locid.h"
+#include "unicode/uloc.h"
+#include "unicode/ures.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "ucln_cmn.h"
+#include "uassert.h"
+#include "umutex.h"
+#include "uresimp.h"
+
+// C++ API ----------------------------------------------------------------- ***
+
+U_NAMESPACE_BEGIN
+
+static icu::Locale* availableLocaleList = NULL;
+static int32_t availableLocaleListCount;
+static icu::UInitOnce gInitOnceLocale = U_INITONCE_INITIALIZER;
+
+U_NAMESPACE_END
+
+U_CDECL_BEGIN
+
+static UBool U_CALLCONV locale_available_cleanup(void)
+{
+ U_NAMESPACE_USE
+
+ if (availableLocaleList) {
+ delete []availableLocaleList;
+ availableLocaleList = NULL;
+ }
+ availableLocaleListCount = 0;
+ gInitOnceLocale.reset();
+
+ return TRUE;
+}
+
+U_CDECL_END
+
+U_NAMESPACE_BEGIN
+
+void U_CALLCONV locale_available_init() {
+ // This function is a friend of class Locale.
+ // This function is only invoked via umtx_initOnce().
+
+ // for now, there is a hardcoded list, so just walk through that list and set it up.
+ // Note: this function is a friend of class Locale.
+ availableLocaleListCount = uloc_countAvailable();
+ if(availableLocaleListCount) {
+ availableLocaleList = new Locale[availableLocaleListCount];
+ }
+ if (availableLocaleList == NULL) {
+ availableLocaleListCount= 0;
+ }
+ for (int32_t locCount=availableLocaleListCount-1; locCount>=0; --locCount) {
+ availableLocaleList[locCount].setFromPOSIXID(uloc_getAvailable(locCount));
+ }
+ ucln_common_registerCleanup(UCLN_COMMON_LOCALE_AVAILABLE, locale_available_cleanup);
+}
+
+const Locale* U_EXPORT2
+Locale::getAvailableLocales(int32_t& count)
+{
+ umtx_initOnce(gInitOnceLocale, &locale_available_init);
+ count = availableLocaleListCount;
+ return availableLocaleList;
+}
+
+
+U_NAMESPACE_END
+
+// C API ------------------------------------------------------------------- ***
+
+U_NAMESPACE_USE
+
+/* ### Constants **************************************************/
+
+namespace {
+
+// Enough capacity for the two lists in the res_index.res file
+const char** gAvailableLocaleNames[2] = {};
+int32_t gAvailableLocaleCounts[2] = {};
+icu::UInitOnce ginstalledLocalesInitOnce = U_INITONCE_INITIALIZER;
+
+class AvailableLocalesSink : public ResourceSink {
+ public:
+ void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) U_OVERRIDE {
+ ResourceTable resIndexTable = value.getTable(status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+ for (int32_t i = 0; resIndexTable.getKeyAndValue(i, key, value); ++i) {
+ ULocAvailableType type;
+ if (uprv_strcmp(key, "InstalledLocales") == 0) {
+ type = ULOC_AVAILABLE_DEFAULT;
+ } else if (uprv_strcmp(key, "AliasLocales") == 0) {
+ type = ULOC_AVAILABLE_ONLY_LEGACY_ALIASES;
+ } else {
+ // CLDRVersion, etc.
+ continue;
+ }
+ ResourceTable availableLocalesTable = value.getTable(status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+ gAvailableLocaleCounts[type] = availableLocalesTable.getSize();
+ gAvailableLocaleNames[type] = static_cast<const char**>(
+ uprv_malloc(gAvailableLocaleCounts[type] * sizeof(const char*)));
+ if (gAvailableLocaleNames[type] == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ for (int32_t j = 0; availableLocalesTable.getKeyAndValue(j, key, value); ++j) {
+ gAvailableLocaleNames[type][j] = key;
+ }
+ }
+ }
+};
+
+class AvailableLocalesStringEnumeration : public StringEnumeration {
+ public:
+ AvailableLocalesStringEnumeration(ULocAvailableType type) : fType(type) {
+ }
+
+ const char* next(int32_t *resultLength, UErrorCode&) override {
+ ULocAvailableType actualType = fType;
+ int32_t actualIndex = fIndex++;
+
+ // If the "combined" list was requested, resolve that now
+ if (fType == ULOC_AVAILABLE_WITH_LEGACY_ALIASES) {
+ int32_t defaultLocalesCount = gAvailableLocaleCounts[ULOC_AVAILABLE_DEFAULT];
+ if (actualIndex < defaultLocalesCount) {
+ actualType = ULOC_AVAILABLE_DEFAULT;
+ } else {
+ actualIndex -= defaultLocalesCount;
+ actualType = ULOC_AVAILABLE_ONLY_LEGACY_ALIASES;
+ }
+ }
+
+ // Return the requested string
+ int32_t count = gAvailableLocaleCounts[actualType];
+ const char* result;
+ if (actualIndex < count) {
+ result = gAvailableLocaleNames[actualType][actualIndex];
+ if (resultLength != nullptr) {
+ *resultLength = static_cast<int32_t>(uprv_strlen(result));
+ }
+ } else {
+ result = nullptr;
+ if (resultLength != nullptr) {
+ *resultLength = 0;
+ }
+ }
+ return result;
+ }
+
+ void reset(UErrorCode&) override {
+ fIndex = 0;
+ }
+
+ int32_t count(UErrorCode&) const override {
+ if (fType == ULOC_AVAILABLE_WITH_LEGACY_ALIASES) {
+ return gAvailableLocaleCounts[ULOC_AVAILABLE_DEFAULT]
+ + gAvailableLocaleCounts[ULOC_AVAILABLE_ONLY_LEGACY_ALIASES];
+ } else {
+ return gAvailableLocaleCounts[fType];
+ }
+ }
+
+ private:
+ ULocAvailableType fType;
+ int32_t fIndex = 0;
+};
+
+/* ### Get available **************************************************/
+
+static UBool U_CALLCONV uloc_cleanup(void) {
+ for (int32_t i = 0; i < UPRV_LENGTHOF(gAvailableLocaleNames); i++) {
+ uprv_free(gAvailableLocaleNames[i]);
+ gAvailableLocaleNames[i] = nullptr;
+ gAvailableLocaleCounts[i] = 0;
+ }
+ ginstalledLocalesInitOnce.reset();
+ return TRUE;
+}
+
+// Load Installed Locales. This function will be called exactly once
+// via the initOnce mechanism.
+
+static void U_CALLCONV loadInstalledLocales(UErrorCode& status) {
+ ucln_common_registerCleanup(UCLN_COMMON_ULOC, uloc_cleanup);
+
+ icu::LocalUResourceBundlePointer rb(ures_openDirect(NULL, "res_index", &status));
+ AvailableLocalesSink sink;
+ ures_getAllItemsWithFallback(rb.getAlias(), "", sink, status);
+}
+
+void _load_installedLocales(UErrorCode& status) {
+ umtx_initOnce(ginstalledLocalesInitOnce, &loadInstalledLocales, status);
+}
+
+} // namespace
+
+U_CAPI const char* U_EXPORT2
+uloc_getAvailable(int32_t offset) {
+ icu::ErrorCode status;
+ _load_installedLocales(status);
+ if (status.isFailure()) {
+ return nullptr;
+ }
+ if (offset > gAvailableLocaleCounts[0]) {
+ // *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return nullptr;
+ }
+ return gAvailableLocaleNames[0][offset];
+}
+
+U_CAPI int32_t U_EXPORT2
+uloc_countAvailable() {
+ icu::ErrorCode status;
+ _load_installedLocales(status);
+ if (status.isFailure()) {
+ return 0;
+ }
+ return gAvailableLocaleCounts[0];
+}
+
+U_CAPI UEnumeration* U_EXPORT2
+uloc_openAvailableByType(ULocAvailableType type, UErrorCode* status) {
+ if (U_FAILURE(*status)) {
+ return nullptr;
+ }
+ if (type < 0 || type >= ULOC_AVAILABLE_COUNT) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return nullptr;
+ }
+ _load_installedLocales(*status);
+ if (U_FAILURE(*status)) {
+ return nullptr;
+ }
+ LocalPointer<AvailableLocalesStringEnumeration> result(
+ new AvailableLocalesStringEnumeration(type), *status);
+ if (U_FAILURE(*status)) {
+ return nullptr;
+ }
+ return uenum_openFromStringEnumeration(result.orphan(), status);
+}
+
diff --git a/thirdparty/icu4c/common/locbased.cpp b/thirdparty/icu4c/common/locbased.cpp
new file mode 100644
index 0000000000..ff378b4cc7
--- /dev/null
+++ b/thirdparty/icu4c/common/locbased.cpp
@@ -0,0 +1,55 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2004-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Author: Alan Liu
+* Created: January 16 2004
+* Since: ICU 2.8
+**********************************************************************
+*/
+#include "locbased.h"
+#include "cstring.h"
+
+U_NAMESPACE_BEGIN
+
+Locale LocaleBased::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
+ const char* id = getLocaleID(type, status);
+ return Locale((id != 0) ? id : "");
+}
+
+const char* LocaleBased::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
+ if (U_FAILURE(status)) {
+ return NULL;
+ }
+
+ switch(type) {
+ case ULOC_VALID_LOCALE:
+ return valid;
+ case ULOC_ACTUAL_LOCALE:
+ return actual;
+ default:
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+}
+
+void LocaleBased::setLocaleIDs(const char* validID, const char* actualID) {
+ if (validID != 0) {
+ uprv_strncpy(valid, validID, ULOC_FULLNAME_CAPACITY);
+ valid[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
+ }
+ if (actualID != 0) {
+ uprv_strncpy(actual, actualID, ULOC_FULLNAME_CAPACITY);
+ actual[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
+ }
+}
+
+void LocaleBased::setLocaleIDs(const Locale& validID, const Locale& actualID) {
+ uprv_strcpy(valid, validID.getName());
+ uprv_strcpy(actual, actualID.getName());
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/locbased.h b/thirdparty/icu4c/common/locbased.h
new file mode 100644
index 0000000000..45738863b5
--- /dev/null
+++ b/thirdparty/icu4c/common/locbased.h
@@ -0,0 +1,107 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2004-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Author: Alan Liu
+* Created: January 16 2004
+* Since: ICU 2.8
+**********************************************************************
+*/
+#ifndef LOCBASED_H
+#define LOCBASED_H
+
+#include "unicode/locid.h"
+#include "unicode/uobject.h"
+
+/**
+ * Macro to declare a locale LocaleBased wrapper object for the given
+ * object, which must have two members named `validLocale' and
+ * `actualLocale' of size ULOC_FULLNAME_CAPACITY
+ */
+#define U_LOCALE_BASED(varname, objname) \
+ LocaleBased varname((objname).validLocale, (objname).actualLocale)
+
+U_NAMESPACE_BEGIN
+
+/**
+ * A utility class that unifies the implementation of getLocale() by
+ * various ICU services. This class is likely to be removed in the
+ * ICU 3.0 time frame in favor of an integrated approach with the
+ * services framework.
+ * @since ICU 2.8
+ */
+class U_COMMON_API LocaleBased : public UMemory {
+
+ public:
+
+ /**
+ * Construct a LocaleBased wrapper around the two pointers. These
+ * will be aliased for the lifetime of this object.
+ */
+ inline LocaleBased(char* validAlias, char* actualAlias);
+
+ /**
+ * Construct a LocaleBased wrapper around the two const pointers.
+ * These will be aliased for the lifetime of this object.
+ */
+ inline LocaleBased(const char* validAlias, const char* actualAlias);
+
+ /**
+ * Return locale meta-data for the service object wrapped by this
+ * object. Either the valid or the actual locale may be
+ * retrieved.
+ * @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE
+ * @param status input-output error code
+ * @return the indicated locale
+ */
+ Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
+
+ /**
+ * Return the locale ID for the service object wrapped by this
+ * object. Either the valid or the actual locale may be
+ * retrieved.
+ * @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE
+ * @param status input-output error code
+ * @return the indicated locale ID
+ */
+ const char* getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
+
+ /**
+ * Set the locale meta-data for the service object wrapped by this
+ * object. If either parameter is zero, it is ignored.
+ * @param valid the ID of the valid locale
+ * @param actual the ID of the actual locale
+ */
+ void setLocaleIDs(const char* valid, const char* actual);
+
+ /**
+ * Set the locale meta-data for the service object wrapped by this
+ * object.
+ * @param valid the ID of the valid locale
+ * @param actual the ID of the actual locale
+ */
+ void setLocaleIDs(const Locale& valid, const Locale& actual);
+
+ private:
+
+ char* valid;
+
+ char* actual;
+};
+
+inline LocaleBased::LocaleBased(char* validAlias, char* actualAlias) :
+ valid(validAlias), actual(actualAlias) {
+}
+
+inline LocaleBased::LocaleBased(const char* validAlias,
+ const char* actualAlias) :
+ // ugh: cast away const
+ valid((char*)validAlias), actual((char*)actualAlias) {
+}
+
+U_NAMESPACE_END
+
+#endif
diff --git a/thirdparty/icu4c/common/locdispnames.cpp b/thirdparty/icu4c/common/locdispnames.cpp
new file mode 100644
index 0000000000..47c0667417
--- /dev/null
+++ b/thirdparty/icu4c/common/locdispnames.cpp
@@ -0,0 +1,890 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1997-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: locdispnames.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010feb25
+* created by: Markus W. Scherer
+*
+* Code for locale display names, separated out from other .cpp files
+* that then do not depend on resource bundle code and display name data.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/brkiter.h"
+#include "unicode/locid.h"
+#include "unicode/uenum.h"
+#include "unicode/uloc.h"
+#include "unicode/ures.h"
+#include "unicode/ustring.h"
+#include "bytesinkutil.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "putilimp.h"
+#include "ulocimp.h"
+#include "uresimp.h"
+#include "ureslocs.h"
+#include "ustr_imp.h"
+
+// C++ API ----------------------------------------------------------------- ***
+
+U_NAMESPACE_BEGIN
+
+UnicodeString&
+Locale::getDisplayLanguage(UnicodeString& dispLang) const
+{
+ return this->getDisplayLanguage(getDefault(), dispLang);
+}
+
+/*We cannot make any assumptions on the size of the output display strings
+* Yet, since we are calling through to a C API, we need to set limits on
+* buffer size. For all the following getDisplay functions we first attempt
+* to fill up a stack allocated buffer. If it is to small we heap allocated
+* the exact buffer we need copy it to the UnicodeString and delete it*/
+
+UnicodeString&
+Locale::getDisplayLanguage(const Locale &displayLocale,
+ UnicodeString &result) const {
+ UChar *buffer;
+ UErrorCode errorCode=U_ZERO_ERROR;
+ int32_t length;
+
+ buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
+ if(buffer==0) {
+ result.truncate(0);
+ return result;
+ }
+
+ length=uloc_getDisplayLanguage(fullName, displayLocale.fullName,
+ buffer, result.getCapacity(),
+ &errorCode);
+ result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
+
+ if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
+ buffer=result.getBuffer(length);
+ if(buffer==0) {
+ result.truncate(0);
+ return result;
+ }
+ errorCode=U_ZERO_ERROR;
+ length=uloc_getDisplayLanguage(fullName, displayLocale.fullName,
+ buffer, result.getCapacity(),
+ &errorCode);
+ result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
+ }
+
+ return result;
+}
+
+UnicodeString&
+Locale::getDisplayScript(UnicodeString& dispScript) const
+{
+ return this->getDisplayScript(getDefault(), dispScript);
+}
+
+UnicodeString&
+Locale::getDisplayScript(const Locale &displayLocale,
+ UnicodeString &result) const {
+ UChar *buffer;
+ UErrorCode errorCode=U_ZERO_ERROR;
+ int32_t length;
+
+ buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
+ if(buffer==0) {
+ result.truncate(0);
+ return result;
+ }
+
+ length=uloc_getDisplayScript(fullName, displayLocale.fullName,
+ buffer, result.getCapacity(),
+ &errorCode);
+ result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
+
+ if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
+ buffer=result.getBuffer(length);
+ if(buffer==0) {
+ result.truncate(0);
+ return result;
+ }
+ errorCode=U_ZERO_ERROR;
+ length=uloc_getDisplayScript(fullName, displayLocale.fullName,
+ buffer, result.getCapacity(),
+ &errorCode);
+ result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
+ }
+
+ return result;
+}
+
+UnicodeString&
+Locale::getDisplayCountry(UnicodeString& dispCntry) const
+{
+ return this->getDisplayCountry(getDefault(), dispCntry);
+}
+
+UnicodeString&
+Locale::getDisplayCountry(const Locale &displayLocale,
+ UnicodeString &result) const {
+ UChar *buffer;
+ UErrorCode errorCode=U_ZERO_ERROR;
+ int32_t length;
+
+ buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
+ if(buffer==0) {
+ result.truncate(0);
+ return result;
+ }
+
+ length=uloc_getDisplayCountry(fullName, displayLocale.fullName,
+ buffer, result.getCapacity(),
+ &errorCode);
+ result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
+
+ if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
+ buffer=result.getBuffer(length);
+ if(buffer==0) {
+ result.truncate(0);
+ return result;
+ }
+ errorCode=U_ZERO_ERROR;
+ length=uloc_getDisplayCountry(fullName, displayLocale.fullName,
+ buffer, result.getCapacity(),
+ &errorCode);
+ result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
+ }
+
+ return result;
+}
+
+UnicodeString&
+Locale::getDisplayVariant(UnicodeString& dispVar) const
+{
+ return this->getDisplayVariant(getDefault(), dispVar);
+}
+
+UnicodeString&
+Locale::getDisplayVariant(const Locale &displayLocale,
+ UnicodeString &result) const {
+ UChar *buffer;
+ UErrorCode errorCode=U_ZERO_ERROR;
+ int32_t length;
+
+ buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
+ if(buffer==0) {
+ result.truncate(0);
+ return result;
+ }
+
+ length=uloc_getDisplayVariant(fullName, displayLocale.fullName,
+ buffer, result.getCapacity(),
+ &errorCode);
+ result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
+
+ if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
+ buffer=result.getBuffer(length);
+ if(buffer==0) {
+ result.truncate(0);
+ return result;
+ }
+ errorCode=U_ZERO_ERROR;
+ length=uloc_getDisplayVariant(fullName, displayLocale.fullName,
+ buffer, result.getCapacity(),
+ &errorCode);
+ result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
+ }
+
+ return result;
+}
+
+UnicodeString&
+Locale::getDisplayName( UnicodeString& name ) const
+{
+ return this->getDisplayName(getDefault(), name);
+}
+
+UnicodeString&
+Locale::getDisplayName(const Locale &displayLocale,
+ UnicodeString &result) const {
+ UChar *buffer;
+ UErrorCode errorCode=U_ZERO_ERROR;
+ int32_t length;
+
+ buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
+ if(buffer==0) {
+ result.truncate(0);
+ return result;
+ }
+
+ length=uloc_getDisplayName(fullName, displayLocale.fullName,
+ buffer, result.getCapacity(),
+ &errorCode);
+ result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
+
+ if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
+ buffer=result.getBuffer(length);
+ if(buffer==0) {
+ result.truncate(0);
+ return result;
+ }
+ errorCode=U_ZERO_ERROR;
+ length=uloc_getDisplayName(fullName, displayLocale.fullName,
+ buffer, result.getCapacity(),
+ &errorCode);
+ result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
+ }
+
+ return result;
+}
+
+#if ! UCONFIG_NO_BREAK_ITERATION
+
+// -------------------------------------
+// Gets the objectLocale display name in the default locale language.
+UnicodeString& U_EXPORT2
+BreakIterator::getDisplayName(const Locale& objectLocale,
+ UnicodeString& name)
+{
+ return objectLocale.getDisplayName(name);
+}
+
+// -------------------------------------
+// Gets the objectLocale display name in the displayLocale language.
+UnicodeString& U_EXPORT2
+BreakIterator::getDisplayName(const Locale& objectLocale,
+ const Locale& displayLocale,
+ UnicodeString& name)
+{
+ return objectLocale.getDisplayName(displayLocale, name);
+}
+
+#endif
+
+
+U_NAMESPACE_END
+
+// C API ------------------------------------------------------------------- ***
+
+U_NAMESPACE_USE
+
+/* ### Constants **************************************************/
+
+/* These strings describe the resources we attempt to load from
+ the locale ResourceBundle data file.*/
+static const char _kLanguages[] = "Languages";
+static const char _kScripts[] = "Scripts";
+static const char _kScriptsStandAlone[] = "Scripts%stand-alone";
+static const char _kCountries[] = "Countries";
+static const char _kVariants[] = "Variants";
+static const char _kKeys[] = "Keys";
+static const char _kTypes[] = "Types";
+//static const char _kRootName[] = "root";
+static const char _kCurrency[] = "currency";
+static const char _kCurrencies[] = "Currencies";
+static const char _kLocaleDisplayPattern[] = "localeDisplayPattern";
+static const char _kPattern[] = "pattern";
+static const char _kSeparator[] = "separator";
+
+/* ### Display name **************************************************/
+
+static int32_t
+_getStringOrCopyKey(const char *path, const char *locale,
+ const char *tableKey,
+ const char* subTableKey,
+ const char *itemKey,
+ const char *substitute,
+ UChar *dest, int32_t destCapacity,
+ UErrorCode *pErrorCode) {
+ const UChar *s = NULL;
+ int32_t length = 0;
+
+ if(itemKey==NULL) {
+ /* top-level item: normal resource bundle access */
+ icu::LocalUResourceBundlePointer rb(ures_open(path, locale, pErrorCode));
+
+ if(U_SUCCESS(*pErrorCode)) {
+ s=ures_getStringByKey(rb.getAlias(), tableKey, &length, pErrorCode);
+ /* see comment about closing rb near "return item;" in _res_getTableStringWithFallback() */
+ }
+ } else {
+ /* Language code should not be a number. If it is, set the error code. */
+ if (!uprv_strncmp(tableKey, "Languages", 9) && uprv_strtol(itemKey, NULL, 10)) {
+ *pErrorCode = U_MISSING_RESOURCE_ERROR;
+ } else {
+ /* second-level item, use special fallback */
+ s=uloc_getTableStringWithFallback(path, locale,
+ tableKey,
+ subTableKey,
+ itemKey,
+ &length,
+ pErrorCode);
+ }
+ }
+
+ if(U_SUCCESS(*pErrorCode)) {
+ int32_t copyLength=uprv_min(length, destCapacity);
+ if(copyLength>0 && s != NULL) {
+ u_memcpy(dest, s, copyLength);
+ }
+ } else {
+ /* no string from a resource bundle: convert the substitute */
+ length=(int32_t)uprv_strlen(substitute);
+ u_charsToUChars(substitute, dest, uprv_min(length, destCapacity));
+ *pErrorCode=U_USING_DEFAULT_WARNING;
+ }
+
+ return u_terminateUChars(dest, destCapacity, length, pErrorCode);
+}
+
+typedef int32_t U_CALLCONV UDisplayNameGetter(const char *, char *, int32_t, UErrorCode *);
+
+static int32_t
+_getDisplayNameForComponent(const char *locale,
+ const char *displayLocale,
+ UChar *dest, int32_t destCapacity,
+ UDisplayNameGetter *getter,
+ const char *tag,
+ UErrorCode *pErrorCode) {
+ char localeBuffer[ULOC_FULLNAME_CAPACITY*4];
+ int32_t length;
+ UErrorCode localStatus;
+ const char* root = NULL;
+
+ /* argument checking */
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ localStatus = U_ZERO_ERROR;
+ length=(*getter)(locale, localeBuffer, sizeof(localeBuffer), &localStatus);
+ if(U_FAILURE(localStatus) || localStatus==U_STRING_NOT_TERMINATED_WARNING) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ if(length==0) {
+ // For the display name, we treat this as unknown language (ICU-20273).
+ if (getter == uloc_getLanguage) {
+ uprv_strcpy(localeBuffer, "und");
+ } else {
+ return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
+ }
+ }
+
+ root = tag == _kCountries ? U_ICUDATA_REGION : U_ICUDATA_LANG;
+
+ return _getStringOrCopyKey(root, displayLocale,
+ tag, NULL, localeBuffer,
+ localeBuffer,
+ dest, destCapacity,
+ pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayLanguage(const char *locale,
+ const char *displayLocale,
+ UChar *dest, int32_t destCapacity,
+ UErrorCode *pErrorCode) {
+ return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
+ uloc_getLanguage, _kLanguages, pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayScript(const char* locale,
+ const char* displayLocale,
+ UChar *dest, int32_t destCapacity,
+ UErrorCode *pErrorCode)
+{
+ UErrorCode err = U_ZERO_ERROR;
+ int32_t res = _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
+ uloc_getScript, _kScriptsStandAlone, &err);
+
+ if (destCapacity == 0 && err == U_BUFFER_OVERFLOW_ERROR) {
+ // For preflight, return the max of the value and the fallback.
+ int32_t fallback_res = _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
+ uloc_getScript, _kScripts, pErrorCode);
+ return (fallback_res > res) ? fallback_res : res;
+ }
+ if ( err == U_USING_DEFAULT_WARNING ) {
+ return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
+ uloc_getScript, _kScripts, pErrorCode);
+ } else {
+ *pErrorCode = err;
+ return res;
+ }
+}
+
+static int32_t
+uloc_getDisplayScriptInContext(const char* locale,
+ const char* displayLocale,
+ UChar *dest, int32_t destCapacity,
+ UErrorCode *pErrorCode)
+{
+ return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
+ uloc_getScript, _kScripts, pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayCountry(const char *locale,
+ const char *displayLocale,
+ UChar *dest, int32_t destCapacity,
+ UErrorCode *pErrorCode) {
+ return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
+ uloc_getCountry, _kCountries, pErrorCode);
+}
+
+/*
+ * TODO separate variant1_variant2_variant3...
+ * by getting each tag's display string and concatenating them with ", "
+ * in between - similar to uloc_getDisplayName()
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayVariant(const char *locale,
+ const char *displayLocale,
+ UChar *dest, int32_t destCapacity,
+ UErrorCode *pErrorCode) {
+ return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
+ uloc_getVariant, _kVariants, pErrorCode);
+}
+
+/* Instead of having a separate pass for 'special' patterns, reintegrate the two
+ * so we don't get bitten by preflight bugs again. We can be reasonably efficient
+ * without two separate code paths, this code isn't that performance-critical.
+ *
+ * This code is general enough to deal with patterns that have a prefix or swap the
+ * language and remainder components, since we gave developers enough rope to do such
+ * things if they futz with the pattern data. But since we don't give them a way to
+ * specify a pattern for arbitrary combinations of components, there's not much use in
+ * that. I don't think our data includes such patterns, the only variable I know if is
+ * whether there is a space before the open paren, or not. Oh, and zh uses different
+ * chars than the standard open/close paren (which ja and ko use, btw).
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayName(const char *locale,
+ const char *displayLocale,
+ UChar *dest, int32_t destCapacity,
+ UErrorCode *pErrorCode)
+{
+ static const UChar defaultSeparator[9] = { 0x007b, 0x0030, 0x007d, 0x002c, 0x0020, 0x007b, 0x0031, 0x007d, 0x0000 }; /* "{0}, {1}" */
+ static const UChar sub0[4] = { 0x007b, 0x0030, 0x007d , 0x0000 } ; /* {0} */
+ static const UChar sub1[4] = { 0x007b, 0x0031, 0x007d , 0x0000 } ; /* {1} */
+ static const int32_t subLen = 3;
+ static const UChar defaultPattern[10] = {
+ 0x007b, 0x0030, 0x007d, 0x0020, 0x0028, 0x007b, 0x0031, 0x007d, 0x0029, 0x0000
+ }; /* {0} ({1}) */
+ static const int32_t defaultPatLen = 9;
+ static const int32_t defaultSub0Pos = 0;
+ static const int32_t defaultSub1Pos = 5;
+
+ int32_t length; /* of formatted result */
+
+ const UChar *separator;
+ int32_t sepLen = 0;
+ const UChar *pattern;
+ int32_t patLen = 0;
+ int32_t sub0Pos, sub1Pos;
+
+ UChar formatOpenParen = 0x0028; // (
+ UChar formatReplaceOpenParen = 0x005B; // [
+ UChar formatCloseParen = 0x0029; // )
+ UChar formatReplaceCloseParen = 0x005D; // ]
+
+ UBool haveLang = TRUE; /* assume true, set false if we find we don't have
+ a lang component in the locale */
+ UBool haveRest = TRUE; /* assume true, set false if we find we don't have
+ any other component in the locale */
+ UBool retry = FALSE; /* set true if we need to retry, see below */
+
+ int32_t langi = 0; /* index of the language substitution (0 or 1), virtually always 0 */
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ {
+ UErrorCode status = U_ZERO_ERROR;
+
+ icu::LocalUResourceBundlePointer locbundle(
+ ures_open(U_ICUDATA_LANG, displayLocale, &status));
+ icu::LocalUResourceBundlePointer dspbundle(
+ ures_getByKeyWithFallback(locbundle.getAlias(), _kLocaleDisplayPattern, NULL, &status));
+
+ separator=ures_getStringByKeyWithFallback(dspbundle.getAlias(), _kSeparator, &sepLen, &status);
+ pattern=ures_getStringByKeyWithFallback(dspbundle.getAlias(), _kPattern, &patLen, &status);
+ }
+
+ /* If we couldn't find any data, then use the defaults */
+ if(sepLen == 0) {
+ separator = defaultSeparator;
+ }
+ /* #10244: Even though separator is now a pattern, it is awkward to handle it as such
+ * here since we are trying to build the display string in place in the dest buffer,
+ * and to handle it as a pattern would entail having separate storage for the
+ * substrings that need to be combined (the first of which may be the result of
+ * previous such combinations). So for now we continue to treat the portion between
+ * {0} and {1} as a string to be appended when joining substrings, ignoring anything
+ * that is before {0} or after {1} (no existing separator pattern has any such thing).
+ * This is similar to how pattern is handled below.
+ */
+ {
+ UChar *p0=u_strstr(separator, sub0);
+ UChar *p1=u_strstr(separator, sub1);
+ if (p0==NULL || p1==NULL || p1<p0) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ separator = (const UChar *)p0 + subLen;
+ sepLen = static_cast<int32_t>(p1 - separator);
+ }
+
+ if(patLen==0 || (patLen==defaultPatLen && !u_strncmp(pattern, defaultPattern, patLen))) {
+ pattern=defaultPattern;
+ patLen=defaultPatLen;
+ sub0Pos=defaultSub0Pos;
+ sub1Pos=defaultSub1Pos;
+ // use default formatOpenParen etc. set above
+ } else { /* non-default pattern */
+ UChar *p0=u_strstr(pattern, sub0);
+ UChar *p1=u_strstr(pattern, sub1);
+ if (p0==NULL || p1==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ sub0Pos = static_cast<int32_t>(p0-pattern);
+ sub1Pos = static_cast<int32_t>(p1-pattern);
+ if (sub1Pos < sub0Pos) { /* a very odd pattern */
+ int32_t t=sub0Pos; sub0Pos=sub1Pos; sub1Pos=t;
+ langi=1;
+ }
+ if (u_strchr(pattern, 0xFF08) != NULL) {
+ formatOpenParen = 0xFF08; // fullwidth (
+ formatReplaceOpenParen = 0xFF3B; // fullwidth [
+ formatCloseParen = 0xFF09; // fullwidth )
+ formatReplaceCloseParen = 0xFF3D; // fullwidth ]
+ }
+ }
+
+ /* We loop here because there is one case in which after the first pass we could need to
+ * reextract the data. If there's initial padding before the first element, we put in
+ * the padding and then write that element. If it turns out there's no second element,
+ * we didn't need the padding. If we do need the data (no preflight), and the first element
+ * would have fit but for the padding, we need to reextract. In this case (only) we
+ * adjust the parameters so padding is not added, and repeat.
+ */
+ do {
+ UChar* p=dest;
+ int32_t patPos=0; /* position in the pattern, used for non-substitution portions */
+ int32_t langLen=0; /* length of language substitution */
+ int32_t langPos=0; /* position in output of language substitution */
+ int32_t restLen=0; /* length of 'everything else' substitution */
+ int32_t restPos=0; /* position in output of 'everything else' substitution */
+ icu::LocalUEnumerationPointer kenum; /* keyword enumeration */
+
+ /* prefix of pattern, extremely likely to be empty */
+ if(sub0Pos) {
+ if(destCapacity >= sub0Pos) {
+ while (patPos < sub0Pos) {
+ *p++ = pattern[patPos++];
+ }
+ } else {
+ patPos=sub0Pos;
+ }
+ length=sub0Pos;
+ } else {
+ length=0;
+ }
+
+ for(int32_t subi=0,resti=0;subi<2;) { /* iterate through patterns 0 and 1*/
+ UBool subdone = FALSE; /* set true when ready to move to next substitution */
+
+ /* prep p and cap for calls to get display components, pin cap to 0 since
+ they complain if cap is negative */
+ int32_t cap=destCapacity-length;
+ if (cap <= 0) {
+ cap=0;
+ } else {
+ p=dest+length;
+ }
+
+ if (subi == langi) { /* {0}*/
+ if(haveLang) {
+ langPos=length;
+ langLen=uloc_getDisplayLanguage(locale, displayLocale, p, cap, pErrorCode);
+ length+=langLen;
+ haveLang=langLen>0;
+ }
+ subdone=TRUE;
+ } else { /* {1} */
+ if(!haveRest) {
+ subdone=TRUE;
+ } else {
+ int32_t len; /* length of component (plus other stuff) we just fetched */
+ switch(resti++) {
+ case 0:
+ restPos=length;
+ len=uloc_getDisplayScriptInContext(locale, displayLocale, p, cap, pErrorCode);
+ break;
+ case 1:
+ len=uloc_getDisplayCountry(locale, displayLocale, p, cap, pErrorCode);
+ break;
+ case 2:
+ len=uloc_getDisplayVariant(locale, displayLocale, p, cap, pErrorCode);
+ break;
+ case 3:
+ kenum.adoptInstead(uloc_openKeywords(locale, pErrorCode));
+ U_FALLTHROUGH;
+ default: {
+ const char* kw=uenum_next(kenum.getAlias(), &len, pErrorCode);
+ if (kw == NULL) {
+ len=0; /* mark that we didn't add a component */
+ subdone=TRUE;
+ } else {
+ /* incorporating this behavior into the loop made it even more complex,
+ so just special case it here */
+ len = uloc_getDisplayKeyword(kw, displayLocale, p, cap, pErrorCode);
+ if(len) {
+ if(len < cap) {
+ p[len]=0x3d; /* '=', assume we'll need it */
+ }
+ len+=1;
+
+ /* adjust for call to get keyword */
+ cap-=len;
+ if(cap <= 0) {
+ cap=0;
+ } else {
+ p+=len;
+ }
+ }
+ /* reset for call below */
+ if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) {
+ *pErrorCode=U_ZERO_ERROR;
+ }
+ int32_t vlen = uloc_getDisplayKeywordValue(locale, kw, displayLocale,
+ p, cap, pErrorCode);
+ if(len) {
+ if(vlen==0) {
+ --len; /* remove unneeded '=' */
+ }
+ /* restore cap and p to what they were at start */
+ cap=destCapacity-length;
+ if(cap <= 0) {
+ cap=0;
+ } else {
+ p=dest+length;
+ }
+ }
+ len+=vlen; /* total we added for key + '=' + value */
+ }
+ } break;
+ } /* end switch */
+
+ if (len>0) {
+ /* we addeed a component, so add separator and write it if there's room. */
+ if(len+sepLen<=cap) {
+ const UChar * plimit = p + len;
+ for (; p < plimit; p++) {
+ if (*p == formatOpenParen) {
+ *p = formatReplaceOpenParen;
+ } else if (*p == formatCloseParen) {
+ *p = formatReplaceCloseParen;
+ }
+ }
+ for(int32_t i=0;i<sepLen;++i) {
+ *p++=separator[i];
+ }
+ }
+ length+=len+sepLen;
+ } else if(subdone) {
+ /* remove separator if we added it */
+ if (length!=restPos) {
+ length-=sepLen;
+ }
+ restLen=length-restPos;
+ haveRest=restLen>0;
+ }
+ }
+ }
+
+ if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) {
+ *pErrorCode=U_ZERO_ERROR;
+ }
+
+ if(subdone) {
+ if(haveLang && haveRest) {
+ /* append internal portion of pattern, the first time,
+ or last portion of pattern the second time */
+ int32_t padLen;
+ patPos+=subLen;
+ padLen=(subi==0 ? sub1Pos : patLen)-patPos;
+ if(length+padLen <= destCapacity) {
+ p=dest+length;
+ for(int32_t i=0;i<padLen;++i) {
+ *p++=pattern[patPos++];
+ }
+ } else {
+ patPos+=padLen;
+ }
+ length+=padLen;
+ } else if(subi==0) {
+ /* don't have first component, reset for second component */
+ sub0Pos=0;
+ length=0;
+ } else if(length>0) {
+ /* true length is the length of just the component we got. */
+ length=haveLang?langLen:restLen;
+ if(dest && sub0Pos!=0) {
+ if (sub0Pos+length<=destCapacity) {
+ /* first component not at start of result,
+ but we have full component in buffer. */
+ u_memmove(dest, dest+(haveLang?langPos:restPos), length);
+ } else {
+ /* would have fit, but didn't because of pattern prefix. */
+ sub0Pos=0; /* stops initial padding (and a second retry,
+ so we won't end up here again) */
+ retry=TRUE;
+ }
+ }
+ }
+
+ ++subi; /* move on to next substitution */
+ }
+ }
+ } while(retry);
+
+ return u_terminateUChars(dest, destCapacity, length, pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayKeyword(const char* keyword,
+ const char* displayLocale,
+ UChar* dest,
+ int32_t destCapacity,
+ UErrorCode* status){
+
+ /* argument checking */
+ if(status==NULL || U_FAILURE(*status)) {
+ return 0;
+ }
+
+ if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
+ *status=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+
+ /* pass itemKey=NULL to look for a top-level item */
+ return _getStringOrCopyKey(U_ICUDATA_LANG, displayLocale,
+ _kKeys, NULL,
+ keyword,
+ keyword,
+ dest, destCapacity,
+ status);
+
+}
+
+
+#define UCURRENCY_DISPLAY_NAME_INDEX 1
+
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayKeywordValue( const char* locale,
+ const char* keyword,
+ const char* displayLocale,
+ UChar* dest,
+ int32_t destCapacity,
+ UErrorCode* status){
+
+
+ /* argument checking */
+ if(status==NULL || U_FAILURE(*status)) {
+ return 0;
+ }
+
+ if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
+ *status=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* get the keyword value */
+ CharString keywordValue;
+ {
+ CharStringByteSink sink(&keywordValue);
+ ulocimp_getKeywordValue(locale, keyword, sink, status);
+ }
+
+ /*
+ * if the keyword is equal to currency .. then to get the display name
+ * we need to do the fallback ourselves
+ */
+ if(uprv_stricmp(keyword, _kCurrency)==0){
+
+ int32_t dispNameLen = 0;
+ const UChar *dispName = NULL;
+
+ icu::LocalUResourceBundlePointer bundle(
+ ures_open(U_ICUDATA_CURR, displayLocale, status));
+ icu::LocalUResourceBundlePointer currencies(
+ ures_getByKey(bundle.getAlias(), _kCurrencies, NULL, status));
+ icu::LocalUResourceBundlePointer currency(
+ ures_getByKeyWithFallback(currencies.getAlias(), keywordValue.data(), NULL, status));
+
+ dispName = ures_getStringByIndex(currency.getAlias(), UCURRENCY_DISPLAY_NAME_INDEX, &dispNameLen, status);
+
+ if(U_FAILURE(*status)){
+ if(*status == U_MISSING_RESOURCE_ERROR){
+ /* we just want to write the value over if nothing is available */
+ *status = U_USING_DEFAULT_WARNING;
+ }else{
+ return 0;
+ }
+ }
+
+ /* now copy the dispName over if not NULL */
+ if(dispName != NULL){
+ if(dispNameLen <= destCapacity){
+ u_memcpy(dest, dispName, dispNameLen);
+ return u_terminateUChars(dest, destCapacity, dispNameLen, status);
+ }else{
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ return dispNameLen;
+ }
+ }else{
+ /* we have not found the display name for the value .. just copy over */
+ if(keywordValue.length() <= destCapacity){
+ u_charsToUChars(keywordValue.data(), dest, keywordValue.length());
+ return u_terminateUChars(dest, destCapacity, keywordValue.length(), status);
+ }else{
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ return keywordValue.length();
+ }
+ }
+
+
+ }else{
+
+ return _getStringOrCopyKey(U_ICUDATA_LANG, displayLocale,
+ _kTypes, keyword,
+ keywordValue.data(),
+ keywordValue.data(),
+ dest, destCapacity,
+ status);
+ }
+}
diff --git a/thirdparty/icu4c/common/locdistance.cpp b/thirdparty/icu4c/common/locdistance.cpp
new file mode 100644
index 0000000000..ff8892791b
--- /dev/null
+++ b/thirdparty/icu4c/common/locdistance.cpp
@@ -0,0 +1,415 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// locdistance.cpp
+// created: 2019may08 Markus W. Scherer
+
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/localematcher.h"
+#include "unicode/locid.h"
+#include "unicode/uobject.h"
+#include "unicode/ures.h"
+#include "cstring.h"
+#include "locdistance.h"
+#include "loclikelysubtags.h"
+#include "uassert.h"
+#include "ucln_cmn.h"
+#include "uinvchar.h"
+#include "umutex.h"
+
+U_NAMESPACE_BEGIN
+
+namespace {
+
+/**
+ * Bit flag used on the last character of a subtag in the trie.
+ * Must be set consistently by the builder and the lookup code.
+ */
+constexpr int32_t END_OF_SUBTAG = 0x80;
+/** Distance value bit flag, set by the builder. */
+constexpr int32_t DISTANCE_SKIP_SCRIPT = 0x80;
+/** Distance value bit flag, set by trieNext(). */
+constexpr int32_t DISTANCE_IS_FINAL = 0x100;
+constexpr int32_t DISTANCE_IS_FINAL_OR_SKIP_SCRIPT = DISTANCE_IS_FINAL | DISTANCE_SKIP_SCRIPT;
+
+constexpr int32_t ABOVE_THRESHOLD = 100;
+
+// Indexes into array of distances.
+enum {
+ IX_DEF_LANG_DISTANCE,
+ IX_DEF_SCRIPT_DISTANCE,
+ IX_DEF_REGION_DISTANCE,
+ IX_MIN_REGION_DISTANCE,
+ IX_LIMIT
+};
+
+LocaleDistance *gLocaleDistance = nullptr;
+UInitOnce gInitOnce = U_INITONCE_INITIALIZER;
+
+UBool U_CALLCONV cleanup() {
+ delete gLocaleDistance;
+ gLocaleDistance = nullptr;
+ gInitOnce.reset();
+ return TRUE;
+}
+
+} // namespace
+
+void U_CALLCONV LocaleDistance::initLocaleDistance(UErrorCode &errorCode) {
+ // This function is invoked only via umtx_initOnce().
+ U_ASSERT(gLocaleDistance == nullptr);
+ const XLikelySubtags &likely = *XLikelySubtags::getSingleton(errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ const LocaleDistanceData &data = likely.getDistanceData();
+ if (data.distanceTrieBytes == nullptr ||
+ data.regionToPartitions == nullptr || data.partitions == nullptr ||
+ // ok if no paradigms
+ data.distances == nullptr) {
+ errorCode = U_MISSING_RESOURCE_ERROR;
+ return;
+ }
+ gLocaleDistance = new LocaleDistance(data, likely);
+ if (gLocaleDistance == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ ucln_common_registerCleanup(UCLN_COMMON_LOCALE_DISTANCE, cleanup);
+}
+
+const LocaleDistance *LocaleDistance::getSingleton(UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return nullptr; }
+ umtx_initOnce(gInitOnce, &LocaleDistance::initLocaleDistance, errorCode);
+ return gLocaleDistance;
+}
+
+LocaleDistance::LocaleDistance(const LocaleDistanceData &data, const XLikelySubtags &likely) :
+ likelySubtags(likely),
+ trie(data.distanceTrieBytes),
+ regionToPartitionsIndex(data.regionToPartitions), partitionArrays(data.partitions),
+ paradigmLSRs(data.paradigms), paradigmLSRsLength(data.paradigmsLength),
+ defaultLanguageDistance(data.distances[IX_DEF_LANG_DISTANCE]),
+ defaultScriptDistance(data.distances[IX_DEF_SCRIPT_DISTANCE]),
+ defaultRegionDistance(data.distances[IX_DEF_REGION_DISTANCE]),
+ minRegionDistance(data.distances[IX_MIN_REGION_DISTANCE]) {
+ // For the default demotion value, use the
+ // default region distance between unrelated Englishes.
+ // Thus, unless demotion is turned off,
+ // a mere region difference for one desired locale
+ // is as good as a perfect match for the next following desired locale.
+ // As of CLDR 36, we have <languageMatch desired="en_*_*" supported="en_*_*" distance="5"/>.
+ LSR en("en", "Latn", "US", LSR::EXPLICIT_LSR);
+ LSR enGB("en", "Latn", "GB", LSR::EXPLICIT_LSR);
+ const LSR *p_enGB = &enGB;
+ int32_t indexAndDistance = getBestIndexAndDistance(en, &p_enGB, 1,
+ shiftDistance(50), ULOCMATCH_FAVOR_LANGUAGE, ULOCMATCH_DIRECTION_WITH_ONE_WAY);
+ defaultDemotionPerDesiredLocale = getDistanceFloor(indexAndDistance);
+}
+
+int32_t LocaleDistance::getBestIndexAndDistance(
+ const LSR &desired,
+ const LSR **supportedLSRs, int32_t supportedLSRsLength,
+ int32_t shiftedThreshold,
+ ULocMatchFavorSubtag favorSubtag, ULocMatchDirection direction) const {
+ BytesTrie iter(trie);
+ // Look up the desired language only once for all supported LSRs.
+ // Its "distance" is either a match point value of 0, or a non-match negative value.
+ // Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
+ int32_t desLangDistance = trieNext(iter, desired.language, false);
+ uint64_t desLangState = desLangDistance >= 0 && supportedLSRsLength > 1 ? iter.getState64() : 0;
+ // Index of the supported LSR with the lowest distance.
+ int32_t bestIndex = -1;
+ // Cached lookup info from XLikelySubtags.compareLikely().
+ int32_t bestLikelyInfo = -1;
+ for (int32_t slIndex = 0; slIndex < supportedLSRsLength; ++slIndex) {
+ const LSR &supported = *supportedLSRs[slIndex];
+ bool star = false;
+ int32_t distance = desLangDistance;
+ if (distance >= 0) {
+ U_ASSERT((distance & DISTANCE_IS_FINAL) == 0);
+ if (slIndex != 0) {
+ iter.resetToState64(desLangState);
+ }
+ distance = trieNext(iter, supported.language, true);
+ }
+ // Note: The data builder verifies that there are no rules with "any" (*) language and
+ // real (non *) script or region subtags.
+ // This means that if the lookup for either language fails we can use
+ // the default distances without further lookups.
+ int32_t flags;
+ if (distance >= 0) {
+ flags = distance & DISTANCE_IS_FINAL_OR_SKIP_SCRIPT;
+ distance &= ~DISTANCE_IS_FINAL_OR_SKIP_SCRIPT;
+ } else { // <*, *>
+ if (uprv_strcmp(desired.language, supported.language) == 0) {
+ distance = 0;
+ } else {
+ distance = defaultLanguageDistance;
+ }
+ flags = 0;
+ star = true;
+ }
+ U_ASSERT(0 <= distance && distance <= 100);
+ // Round up the shifted threshold (if fraction bits are not 0)
+ // for comparison with un-shifted distances until we need fraction bits.
+ // (If we simply shifted non-zero fraction bits away, then we might ignore a language
+ // when it's really still a micro distance below the threshold.)
+ int32_t roundedThreshold = (shiftedThreshold + DISTANCE_FRACTION_MASK) >> DISTANCE_SHIFT;
+ // We implement "favor subtag" by reducing the language subtag distance
+ // (unscientifically reducing it to a quarter of the normal value),
+ // so that the script distance is relatively more important.
+ // For example, given a default language distance of 80, we reduce it to 20,
+ // which is below the default threshold of 50, which is the default script distance.
+ if (favorSubtag == ULOCMATCH_FAVOR_SCRIPT) {
+ distance >>= 2;
+ }
+ // Let distance == roundedThreshold pass until the tie-breaker logic
+ // at the end of the loop.
+ if (distance > roundedThreshold) {
+ continue;
+ }
+
+ int32_t scriptDistance;
+ if (star || flags != 0) {
+ if (uprv_strcmp(desired.script, supported.script) == 0) {
+ scriptDistance = 0;
+ } else {
+ scriptDistance = defaultScriptDistance;
+ }
+ } else {
+ scriptDistance = getDesSuppScriptDistance(iter, iter.getState64(),
+ desired.script, supported.script);
+ flags = scriptDistance & DISTANCE_IS_FINAL;
+ scriptDistance &= ~DISTANCE_IS_FINAL;
+ }
+ distance += scriptDistance;
+ if (distance > roundedThreshold) {
+ continue;
+ }
+
+ if (uprv_strcmp(desired.region, supported.region) == 0) {
+ // regionDistance = 0
+ } else if (star || (flags & DISTANCE_IS_FINAL) != 0) {
+ distance += defaultRegionDistance;
+ } else {
+ int32_t remainingThreshold = roundedThreshold - distance;
+ if (minRegionDistance > remainingThreshold) {
+ continue;
+ }
+
+ // From here on we know the regions are not equal.
+ // Map each region to zero or more partitions. (zero = one non-matching string)
+ // (Each array of single-character partition strings is encoded as one string.)
+ // If either side has more than one, then we find the maximum distance.
+ // This could be optimized by adding some more structure, but probably not worth it.
+ distance += getRegionPartitionsDistance(
+ iter, iter.getState64(),
+ partitionsForRegion(desired),
+ partitionsForRegion(supported),
+ remainingThreshold);
+ }
+ int32_t shiftedDistance = shiftDistance(distance);
+ if (shiftedDistance == 0) {
+ // Distinguish between equivalent but originally unequal locales via an
+ // additional micro distance.
+ shiftedDistance |= (desired.flags ^ supported.flags);
+ if (shiftedDistance < shiftedThreshold) {
+ if (direction != ULOCMATCH_DIRECTION_ONLY_TWO_WAY ||
+ // Is there also a match when we swap desired/supported?
+ isMatch(supported, desired, shiftedThreshold, favorSubtag)) {
+ if (shiftedDistance == 0) {
+ return slIndex << INDEX_SHIFT;
+ }
+ bestIndex = slIndex;
+ shiftedThreshold = shiftedDistance;
+ bestLikelyInfo = -1;
+ }
+ }
+ } else {
+ if (shiftedDistance < shiftedThreshold) {
+ if (direction != ULOCMATCH_DIRECTION_ONLY_TWO_WAY ||
+ // Is there also a match when we swap desired/supported?
+ isMatch(supported, desired, shiftedThreshold, favorSubtag)) {
+ bestIndex = slIndex;
+ shiftedThreshold = shiftedDistance;
+ bestLikelyInfo = -1;
+ }
+ } else if (shiftedDistance == shiftedThreshold && bestIndex >= 0) {
+ if (direction != ULOCMATCH_DIRECTION_ONLY_TWO_WAY ||
+ // Is there also a match when we swap desired/supported?
+ isMatch(supported, desired, shiftedThreshold, favorSubtag)) {
+ bestLikelyInfo = likelySubtags.compareLikely(
+ supported, *supportedLSRs[bestIndex], bestLikelyInfo);
+ if ((bestLikelyInfo & 1) != 0) {
+ // This supported locale matches as well as the previous best match,
+ // and neither matches perfectly,
+ // but this one is "more likely" (has more-default subtags).
+ bestIndex = slIndex;
+ }
+ }
+ }
+ }
+ }
+ return bestIndex >= 0 ?
+ (bestIndex << INDEX_SHIFT) | shiftedThreshold :
+ INDEX_NEG_1 | shiftDistance(ABOVE_THRESHOLD);
+}
+
+int32_t LocaleDistance::getDesSuppScriptDistance(
+ BytesTrie &iter, uint64_t startState, const char *desired, const char *supported) {
+ // Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
+ int32_t distance = trieNext(iter, desired, false);
+ if (distance >= 0) {
+ distance = trieNext(iter, supported, true);
+ }
+ if (distance < 0) {
+ UStringTrieResult result = iter.resetToState64(startState).next(u'*'); // <*, *>
+ U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
+ if (uprv_strcmp(desired, supported) == 0) {
+ distance = 0; // same script
+ } else {
+ distance = iter.getValue();
+ U_ASSERT(distance >= 0);
+ }
+ if (result == USTRINGTRIE_FINAL_VALUE) {
+ distance |= DISTANCE_IS_FINAL;
+ }
+ }
+ return distance;
+}
+
+int32_t LocaleDistance::getRegionPartitionsDistance(
+ BytesTrie &iter, uint64_t startState,
+ const char *desiredPartitions, const char *supportedPartitions, int32_t threshold) {
+ char desired = *desiredPartitions++;
+ char supported = *supportedPartitions++;
+ U_ASSERT(desired != 0 && supported != 0);
+ // See if we have single desired/supported partitions, from NUL-terminated
+ // partition strings without explicit length.
+ bool suppLengthGt1 = *supportedPartitions != 0; // gt1: more than 1 character
+ // equivalent to: if (desLength == 1 && suppLength == 1)
+ if (*desiredPartitions == 0 && !suppLengthGt1) {
+ // Fastpath for single desired/supported partitions.
+ UStringTrieResult result = iter.next(uprv_invCharToAscii(desired) | END_OF_SUBTAG);
+ if (USTRINGTRIE_HAS_NEXT(result)) {
+ result = iter.next(uprv_invCharToAscii(supported) | END_OF_SUBTAG);
+ if (USTRINGTRIE_HAS_VALUE(result)) {
+ return iter.getValue();
+ }
+ }
+ return getFallbackRegionDistance(iter, startState);
+ }
+
+ const char *supportedStart = supportedPartitions - 1; // for restart of inner loop
+ int32_t regionDistance = 0;
+ // Fall back to * only once, not for each pair of partition strings.
+ bool star = false;
+ for (;;) {
+ // Look up each desired-partition string only once,
+ // not for each (desired, supported) pair.
+ UStringTrieResult result = iter.next(uprv_invCharToAscii(desired) | END_OF_SUBTAG);
+ if (USTRINGTRIE_HAS_NEXT(result)) {
+ uint64_t desState = suppLengthGt1 ? iter.getState64() : 0;
+ for (;;) {
+ result = iter.next(uprv_invCharToAscii(supported) | END_OF_SUBTAG);
+ int32_t d;
+ if (USTRINGTRIE_HAS_VALUE(result)) {
+ d = iter.getValue();
+ } else if (star) {
+ d = 0;
+ } else {
+ d = getFallbackRegionDistance(iter, startState);
+ star = true;
+ }
+ if (d > threshold) {
+ return d;
+ } else if (regionDistance < d) {
+ regionDistance = d;
+ }
+ if ((supported = *supportedPartitions++) != 0) {
+ iter.resetToState64(desState);
+ } else {
+ break;
+ }
+ }
+ } else if (!star) {
+ int32_t d = getFallbackRegionDistance(iter, startState);
+ if (d > threshold) {
+ return d;
+ } else if (regionDistance < d) {
+ regionDistance = d;
+ }
+ star = true;
+ }
+ if ((desired = *desiredPartitions++) != 0) {
+ iter.resetToState64(startState);
+ supportedPartitions = supportedStart;
+ supported = *supportedPartitions++;
+ } else {
+ break;
+ }
+ }
+ return regionDistance;
+}
+
+int32_t LocaleDistance::getFallbackRegionDistance(BytesTrie &iter, uint64_t startState) {
+#if U_DEBUG
+ UStringTrieResult result =
+#endif
+ iter.resetToState64(startState).next(u'*'); // <*, *>
+ U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
+ int32_t distance = iter.getValue();
+ U_ASSERT(distance >= 0);
+ return distance;
+}
+
+int32_t LocaleDistance::trieNext(BytesTrie &iter, const char *s, bool wantValue) {
+ uint8_t c;
+ if ((c = *s) == 0) {
+ return -1; // no empty subtags in the distance data
+ }
+ for (;;) {
+ c = uprv_invCharToAscii(c);
+ // EBCDIC: If *s is not an invariant character,
+ // then c is now 0 and will simply not match anything, which is harmless.
+ uint8_t next = *++s;
+ if (next != 0) {
+ if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) {
+ return -1;
+ }
+ } else {
+ // last character of this subtag
+ UStringTrieResult result = iter.next(c | END_OF_SUBTAG);
+ if (wantValue) {
+ if (USTRINGTRIE_HAS_VALUE(result)) {
+ int32_t value = iter.getValue();
+ if (result == USTRINGTRIE_FINAL_VALUE) {
+ value |= DISTANCE_IS_FINAL;
+ }
+ return value;
+ }
+ } else {
+ if (USTRINGTRIE_HAS_NEXT(result)) {
+ return 0;
+ }
+ }
+ return -1;
+ }
+ c = next;
+ }
+}
+
+UBool LocaleDistance::isParadigmLSR(const LSR &lsr) const {
+ // Linear search for a very short list (length 6 as of 2019),
+ // because we look for equivalence not equality, and
+ // because it's easy.
+ // If there are many paradigm LSRs we should use a hash set
+ // with custom comparator and hasher.
+ U_ASSERT(paradigmLSRsLength <= 15);
+ for (int32_t i = 0; i < paradigmLSRsLength; ++i) {
+ if (lsr.isEquivalentTo(paradigmLSRs[i])) { return true; }
+ }
+ return false;
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/locdistance.h b/thirdparty/icu4c/common/locdistance.h
new file mode 100644
index 0000000000..51b777e627
--- /dev/null
+++ b/thirdparty/icu4c/common/locdistance.h
@@ -0,0 +1,151 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// locdistance.h
+// created: 2019may08 Markus W. Scherer
+
+#ifndef __LOCDISTANCE_H__
+#define __LOCDISTANCE_H__
+
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/localematcher.h"
+#include "unicode/locid.h"
+#include "unicode/uobject.h"
+#include "lsr.h"
+
+U_NAMESPACE_BEGIN
+
+struct LocaleDistanceData;
+
+/**
+ * Offline-built data for LocaleMatcher.
+ * Mostly but not only the data for mapping locales to their maximized forms.
+ */
+class LocaleDistance final : public UMemory {
+public:
+ static const LocaleDistance *getSingleton(UErrorCode &errorCode);
+
+ static int32_t shiftDistance(int32_t distance) {
+ return distance << DISTANCE_SHIFT;
+ }
+
+ static int32_t getShiftedDistance(int32_t indexAndDistance) {
+ return indexAndDistance & DISTANCE_MASK;
+ }
+
+ static double getDistanceDouble(int32_t indexAndDistance) {
+ double shiftedDistance = getShiftedDistance(indexAndDistance);
+ return shiftedDistance / (1 << DISTANCE_SHIFT);
+ }
+
+ static int32_t getDistanceFloor(int32_t indexAndDistance) {
+ return (indexAndDistance & DISTANCE_MASK) >> DISTANCE_SHIFT;
+ }
+
+ static int32_t getIndex(int32_t indexAndDistance) {
+ // assert indexAndDistance >= 0;
+ return indexAndDistance >> INDEX_SHIFT;
+ }
+
+ /**
+ * Finds the supported LSR with the smallest distance from the desired one.
+ * Equivalent LSR subtags must be normalized into a canonical form.
+ *
+ * <p>Returns the index of the lowest-distance supported LSR in the high bits
+ * (negative if none has a distance below the threshold),
+ * and its distance (0..ABOVE_THRESHOLD) in the low bits.
+ */
+ int32_t getBestIndexAndDistance(const LSR &desired,
+ const LSR **supportedLSRs, int32_t supportedLSRsLength,
+ int32_t shiftedThreshold,
+ ULocMatchFavorSubtag favorSubtag,
+ ULocMatchDirection direction) const;
+
+ UBool isParadigmLSR(const LSR &lsr) const;
+
+ int32_t getDefaultScriptDistance() const {
+ return defaultScriptDistance;
+ }
+
+ int32_t getDefaultDemotionPerDesiredLocale() const {
+ return defaultDemotionPerDesiredLocale;
+ }
+
+private:
+ // The distance is shifted left to gain some fraction bits.
+ static constexpr int32_t DISTANCE_SHIFT = 3;
+ static constexpr int32_t DISTANCE_FRACTION_MASK = 7;
+ // 7 bits for 0..100
+ static constexpr int32_t DISTANCE_INT_SHIFT = 7;
+ static constexpr int32_t INDEX_SHIFT = DISTANCE_INT_SHIFT + DISTANCE_SHIFT;
+ static constexpr int32_t DISTANCE_MASK = 0x3ff;
+ // tic constexpr int32_t MAX_INDEX = 0x1fffff; // avoids sign bit
+ static constexpr int32_t INDEX_NEG_1 = 0xfffffc00;
+
+ LocaleDistance(const LocaleDistanceData &data, const XLikelySubtags &likely);
+ LocaleDistance(const LocaleDistance &other) = delete;
+ LocaleDistance &operator=(const LocaleDistance &other) = delete;
+
+ static void initLocaleDistance(UErrorCode &errorCode);
+
+ UBool isMatch(const LSR &desired, const LSR &supported,
+ int32_t shiftedThreshold, ULocMatchFavorSubtag favorSubtag) const {
+ const LSR *pSupp = &supported;
+ return getBestIndexAndDistance(
+ desired, &pSupp, 1,
+ shiftedThreshold, favorSubtag, ULOCMATCH_DIRECTION_WITH_ONE_WAY) >= 0;
+ }
+
+ static int32_t getDesSuppScriptDistance(BytesTrie &iter, uint64_t startState,
+ const char *desired, const char *supported);
+
+ static int32_t getRegionPartitionsDistance(
+ BytesTrie &iter, uint64_t startState,
+ const char *desiredPartitions, const char *supportedPartitions,
+ int32_t threshold);
+
+ static int32_t getFallbackRegionDistance(BytesTrie &iter, uint64_t startState);
+
+ static int32_t trieNext(BytesTrie &iter, const char *s, bool wantValue);
+
+ const char *partitionsForRegion(const LSR &lsr) const {
+ // ill-formed region -> one non-matching string
+ int32_t pIndex = regionToPartitionsIndex[lsr.regionIndex];
+ return partitionArrays[pIndex];
+ }
+
+ int32_t getDefaultRegionDistance() const {
+ return defaultRegionDistance;
+ }
+
+ const XLikelySubtags &likelySubtags;
+
+ // The trie maps each dlang+slang+dscript+sscript+dregion+sregion
+ // (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance.
+ // There is also a trie value for each subsequence of whole subtags.
+ // One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"".
+ BytesTrie trie;
+
+ /**
+ * Maps each region to zero or more single-character partitions.
+ */
+ const uint8_t *regionToPartitionsIndex;
+ const char **partitionArrays;
+
+ /**
+ * Used to get the paradigm region for a cluster, if there is one.
+ */
+ const LSR *paradigmLSRs;
+ int32_t paradigmLSRsLength;
+
+ int32_t defaultLanguageDistance;
+ int32_t defaultScriptDistance;
+ int32_t defaultRegionDistance;
+ int32_t minRegionDistance;
+ int32_t defaultDemotionPerDesiredLocale;
+};
+
+U_NAMESPACE_END
+
+#endif // __LOCDISTANCE_H__
diff --git a/thirdparty/icu4c/common/locdspnm.cpp b/thirdparty/icu4c/common/locdspnm.cpp
new file mode 100644
index 0000000000..43334f5196
--- /dev/null
+++ b/thirdparty/icu4c/common/locdspnm.cpp
@@ -0,0 +1,1110 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2016, International Business Machines Corporation and
+* others. All Rights Reserved.
+*******************************************************************************
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/locdspnm.h"
+#include "unicode/simpleformatter.h"
+#include "unicode/ucasemap.h"
+#include "unicode/ures.h"
+#include "unicode/udisplaycontext.h"
+#include "unicode/brkiter.h"
+#include "unicode/ucurr.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "mutex.h"
+#include "ulocimp.h"
+#include "umutex.h"
+#include "ureslocs.h"
+#include "uresimp.h"
+
+#include <stdarg.h>
+
+/**
+ * Concatenate a number of null-terminated strings to buffer, leaving a
+ * null-terminated string. The last argument should be the null pointer.
+ * Return the length of the string in the buffer, not counting the trailing
+ * null. Return -1 if there is an error (buffer is null, or buflen < 1).
+ */
+static int32_t ncat(char *buffer, uint32_t buflen, ...) {
+ va_list args;
+ char *str;
+ char *p = buffer;
+ const char* e = buffer + buflen - 1;
+
+ if (buffer == NULL || buflen < 1) {
+ return -1;
+ }
+
+ va_start(args, buflen);
+ while ((str = va_arg(args, char *)) != 0) {
+ char c;
+ while (p != e && (c = *str++) != 0) {
+ *p++ = c;
+ }
+ }
+ *p = 0;
+ va_end(args);
+
+ return static_cast<int32_t>(p - buffer);
+}
+
+U_NAMESPACE_BEGIN
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// Access resource data for locale components.
+// Wrap code in uloc.c for now.
+class ICUDataTable {
+ const char* path;
+ Locale locale;
+
+public:
+ ICUDataTable(const char* path, const Locale& locale);
+ ~ICUDataTable();
+
+ const Locale& getLocale();
+
+ UnicodeString& get(const char* tableKey, const char* itemKey,
+ UnicodeString& result) const;
+ UnicodeString& get(const char* tableKey, const char* subTableKey, const char* itemKey,
+ UnicodeString& result) const;
+
+ UnicodeString& getNoFallback(const char* tableKey, const char* itemKey,
+ UnicodeString &result) const;
+ UnicodeString& getNoFallback(const char* tableKey, const char* subTableKey, const char* itemKey,
+ UnicodeString &result) const;
+};
+
+inline UnicodeString &
+ICUDataTable::get(const char* tableKey, const char* itemKey, UnicodeString& result) const {
+ return get(tableKey, NULL, itemKey, result);
+}
+
+inline UnicodeString &
+ICUDataTable::getNoFallback(const char* tableKey, const char* itemKey, UnicodeString& result) const {
+ return getNoFallback(tableKey, NULL, itemKey, result);
+}
+
+ICUDataTable::ICUDataTable(const char* path, const Locale& locale)
+ : path(NULL), locale(Locale::getRoot())
+{
+ if (path) {
+ int32_t len = static_cast<int32_t>(uprv_strlen(path));
+ this->path = (const char*) uprv_malloc(len + 1);
+ if (this->path) {
+ uprv_strcpy((char *)this->path, path);
+ this->locale = locale;
+ }
+ }
+}
+
+ICUDataTable::~ICUDataTable() {
+ if (path) {
+ uprv_free((void*) path);
+ path = NULL;
+ }
+}
+
+const Locale&
+ICUDataTable::getLocale() {
+ return locale;
+}
+
+UnicodeString &
+ICUDataTable::get(const char* tableKey, const char* subTableKey, const char* itemKey,
+ UnicodeString &result) const {
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t len = 0;
+
+ const UChar *s = uloc_getTableStringWithFallback(path, locale.getName(),
+ tableKey, subTableKey, itemKey,
+ &len, &status);
+ if (U_SUCCESS(status) && len > 0) {
+ return result.setTo(s, len);
+ }
+ return result.setTo(UnicodeString(itemKey, -1, US_INV));
+}
+
+UnicodeString &
+ICUDataTable::getNoFallback(const char* tableKey, const char* subTableKey, const char* itemKey,
+ UnicodeString& result) const {
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t len = 0;
+
+ const UChar *s = uloc_getTableStringWithFallback(path, locale.getName(),
+ tableKey, subTableKey, itemKey,
+ &len, &status);
+ if (U_SUCCESS(status)) {
+ return result.setTo(s, len);
+ }
+
+ result.setToBogus();
+ return result;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+LocaleDisplayNames::~LocaleDisplayNames() {}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#if 0 // currently unused
+
+class DefaultLocaleDisplayNames : public LocaleDisplayNames {
+ UDialectHandling dialectHandling;
+
+public:
+ // constructor
+ DefaultLocaleDisplayNames(UDialectHandling dialectHandling);
+
+ virtual ~DefaultLocaleDisplayNames();
+
+ virtual const Locale& getLocale() const;
+ virtual UDialectHandling getDialectHandling() const;
+
+ virtual UnicodeString& localeDisplayName(const Locale& locale,
+ UnicodeString& result) const;
+ virtual UnicodeString& localeDisplayName(const char* localeId,
+ UnicodeString& result) const;
+ virtual UnicodeString& languageDisplayName(const char* lang,
+ UnicodeString& result) const;
+ virtual UnicodeString& scriptDisplayName(const char* script,
+ UnicodeString& result) const;
+ virtual UnicodeString& scriptDisplayName(UScriptCode scriptCode,
+ UnicodeString& result) const;
+ virtual UnicodeString& regionDisplayName(const char* region,
+ UnicodeString& result) const;
+ virtual UnicodeString& variantDisplayName(const char* variant,
+ UnicodeString& result) const;
+ virtual UnicodeString& keyDisplayName(const char* key,
+ UnicodeString& result) const;
+ virtual UnicodeString& keyValueDisplayName(const char* key,
+ const char* value,
+ UnicodeString& result) const;
+};
+
+DefaultLocaleDisplayNames::DefaultLocaleDisplayNames(UDialectHandling dialectHandling)
+ : dialectHandling(dialectHandling) {
+}
+
+DefaultLocaleDisplayNames::~DefaultLocaleDisplayNames() {
+}
+
+const Locale&
+DefaultLocaleDisplayNames::getLocale() const {
+ return Locale::getRoot();
+}
+
+UDialectHandling
+DefaultLocaleDisplayNames::getDialectHandling() const {
+ return dialectHandling;
+}
+
+UnicodeString&
+DefaultLocaleDisplayNames::localeDisplayName(const Locale& locale,
+ UnicodeString& result) const {
+ return result = UnicodeString(locale.getName(), -1, US_INV);
+}
+
+UnicodeString&
+DefaultLocaleDisplayNames::localeDisplayName(const char* localeId,
+ UnicodeString& result) const {
+ return result = UnicodeString(localeId, -1, US_INV);
+}
+
+UnicodeString&
+DefaultLocaleDisplayNames::languageDisplayName(const char* lang,
+ UnicodeString& result) const {
+ return result = UnicodeString(lang, -1, US_INV);
+}
+
+UnicodeString&
+DefaultLocaleDisplayNames::scriptDisplayName(const char* script,
+ UnicodeString& result) const {
+ return result = UnicodeString(script, -1, US_INV);
+}
+
+UnicodeString&
+DefaultLocaleDisplayNames::scriptDisplayName(UScriptCode scriptCode,
+ UnicodeString& result) const {
+ const char* name = uscript_getName(scriptCode);
+ if (name) {
+ return result = UnicodeString(name, -1, US_INV);
+ }
+ return result.remove();
+}
+
+UnicodeString&
+DefaultLocaleDisplayNames::regionDisplayName(const char* region,
+ UnicodeString& result) const {
+ return result = UnicodeString(region, -1, US_INV);
+}
+
+UnicodeString&
+DefaultLocaleDisplayNames::variantDisplayName(const char* variant,
+ UnicodeString& result) const {
+ return result = UnicodeString(variant, -1, US_INV);
+}
+
+UnicodeString&
+DefaultLocaleDisplayNames::keyDisplayName(const char* key,
+ UnicodeString& result) const {
+ return result = UnicodeString(key, -1, US_INV);
+}
+
+UnicodeString&
+DefaultLocaleDisplayNames::keyValueDisplayName(const char* /* key */,
+ const char* value,
+ UnicodeString& result) const {
+ return result = UnicodeString(value, -1, US_INV);
+}
+
+#endif // currently unused class DefaultLocaleDisplayNames
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+class LocaleDisplayNamesImpl : public LocaleDisplayNames {
+ Locale locale;
+ UDialectHandling dialectHandling;
+ ICUDataTable langData;
+ ICUDataTable regionData;
+ SimpleFormatter separatorFormat;
+ SimpleFormatter format;
+ SimpleFormatter keyTypeFormat;
+ UDisplayContext capitalizationContext;
+#if !UCONFIG_NO_BREAK_ITERATION
+ BreakIterator* capitalizationBrkIter;
+#else
+ UObject* capitalizationBrkIter;
+#endif
+ UnicodeString formatOpenParen;
+ UnicodeString formatReplaceOpenParen;
+ UnicodeString formatCloseParen;
+ UnicodeString formatReplaceCloseParen;
+ UDisplayContext nameLength;
+ UDisplayContext substitute;
+
+ // Constants for capitalization context usage types.
+ enum CapContextUsage {
+ kCapContextUsageLanguage,
+ kCapContextUsageScript,
+ kCapContextUsageTerritory,
+ kCapContextUsageVariant,
+ kCapContextUsageKey,
+ kCapContextUsageKeyValue,
+ kCapContextUsageCount
+ };
+ // Capitalization transforms. For each usage type, indicates whether to titlecase for
+ // the context specified in capitalizationContext (which we know at construction time)
+ UBool fCapitalization[kCapContextUsageCount];
+
+public:
+ // constructor
+ LocaleDisplayNamesImpl(const Locale& locale, UDialectHandling dialectHandling);
+ LocaleDisplayNamesImpl(const Locale& locale, UDisplayContext *contexts, int32_t length);
+ virtual ~LocaleDisplayNamesImpl();
+
+ virtual const Locale& getLocale() const;
+ virtual UDialectHandling getDialectHandling() const;
+ virtual UDisplayContext getContext(UDisplayContextType type) const;
+
+ virtual UnicodeString& localeDisplayName(const Locale& locale,
+ UnicodeString& result) const;
+ virtual UnicodeString& localeDisplayName(const char* localeId,
+ UnicodeString& result) const;
+ virtual UnicodeString& languageDisplayName(const char* lang,
+ UnicodeString& result) const;
+ virtual UnicodeString& scriptDisplayName(const char* script,
+ UnicodeString& result) const;
+ virtual UnicodeString& scriptDisplayName(UScriptCode scriptCode,
+ UnicodeString& result) const;
+ virtual UnicodeString& regionDisplayName(const char* region,
+ UnicodeString& result) const;
+ virtual UnicodeString& variantDisplayName(const char* variant,
+ UnicodeString& result) const;
+ virtual UnicodeString& keyDisplayName(const char* key,
+ UnicodeString& result) const;
+ virtual UnicodeString& keyValueDisplayName(const char* key,
+ const char* value,
+ UnicodeString& result) const;
+private:
+ UnicodeString& localeIdName(const char* localeId,
+ UnicodeString& result, bool substitute) const;
+ UnicodeString& appendWithSep(UnicodeString& buffer, const UnicodeString& src) const;
+ UnicodeString& adjustForUsageAndContext(CapContextUsage usage, UnicodeString& result) const;
+ UnicodeString& scriptDisplayName(const char* script, UnicodeString& result, UBool skipAdjust) const;
+ UnicodeString& regionDisplayName(const char* region, UnicodeString& result, UBool skipAdjust) const;
+ UnicodeString& variantDisplayName(const char* variant, UnicodeString& result, UBool skipAdjust) const;
+ UnicodeString& keyDisplayName(const char* key, UnicodeString& result, UBool skipAdjust) const;
+ UnicodeString& keyValueDisplayName(const char* key, const char* value,
+ UnicodeString& result, UBool skipAdjust) const;
+ void initialize(void);
+
+ struct CapitalizationContextSink;
+};
+
+LocaleDisplayNamesImpl::LocaleDisplayNamesImpl(const Locale& locale,
+ UDialectHandling dialectHandling)
+ : dialectHandling(dialectHandling)
+ , langData(U_ICUDATA_LANG, locale)
+ , regionData(U_ICUDATA_REGION, locale)
+ , capitalizationContext(UDISPCTX_CAPITALIZATION_NONE)
+ , capitalizationBrkIter(NULL)
+ , nameLength(UDISPCTX_LENGTH_FULL)
+ , substitute(UDISPCTX_SUBSTITUTE)
+{
+ initialize();
+}
+
+LocaleDisplayNamesImpl::LocaleDisplayNamesImpl(const Locale& locale,
+ UDisplayContext *contexts, int32_t length)
+ : dialectHandling(ULDN_STANDARD_NAMES)
+ , langData(U_ICUDATA_LANG, locale)
+ , regionData(U_ICUDATA_REGION, locale)
+ , capitalizationContext(UDISPCTX_CAPITALIZATION_NONE)
+ , capitalizationBrkIter(NULL)
+ , nameLength(UDISPCTX_LENGTH_FULL)
+ , substitute(UDISPCTX_SUBSTITUTE)
+{
+ while (length-- > 0) {
+ UDisplayContext value = *contexts++;
+ UDisplayContextType selector = (UDisplayContextType)((uint32_t)value >> 8);
+ switch (selector) {
+ case UDISPCTX_TYPE_DIALECT_HANDLING:
+ dialectHandling = (UDialectHandling)value;
+ break;
+ case UDISPCTX_TYPE_CAPITALIZATION:
+ capitalizationContext = value;
+ break;
+ case UDISPCTX_TYPE_DISPLAY_LENGTH:
+ nameLength = value;
+ break;
+ case UDISPCTX_TYPE_SUBSTITUTE_HANDLING:
+ substitute = value;
+ break;
+ default:
+ break;
+ }
+ }
+ initialize();
+}
+
+struct LocaleDisplayNamesImpl::CapitalizationContextSink : public ResourceSink {
+ UBool hasCapitalizationUsage;
+ LocaleDisplayNamesImpl& parent;
+
+ CapitalizationContextSink(LocaleDisplayNamesImpl& _parent)
+ : hasCapitalizationUsage(FALSE), parent(_parent) {}
+ virtual ~CapitalizationContextSink();
+
+ virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/,
+ UErrorCode &errorCode) {
+ ResourceTable contexts = value.getTable(errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ for (int i = 0; contexts.getKeyAndValue(i, key, value); ++i) {
+
+ CapContextUsage usageEnum;
+ if (uprv_strcmp(key, "key") == 0) {
+ usageEnum = kCapContextUsageKey;
+ } else if (uprv_strcmp(key, "keyValue") == 0) {
+ usageEnum = kCapContextUsageKeyValue;
+ } else if (uprv_strcmp(key, "languages") == 0) {
+ usageEnum = kCapContextUsageLanguage;
+ } else if (uprv_strcmp(key, "script") == 0) {
+ usageEnum = kCapContextUsageScript;
+ } else if (uprv_strcmp(key, "territory") == 0) {
+ usageEnum = kCapContextUsageTerritory;
+ } else if (uprv_strcmp(key, "variant") == 0) {
+ usageEnum = kCapContextUsageVariant;
+ } else {
+ continue;
+ }
+
+ int32_t len = 0;
+ const int32_t* intVector = value.getIntVector(len, errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ if (len < 2) { continue; }
+
+ int32_t titlecaseInt = (parent.capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU) ? intVector[0] : intVector[1];
+ if (titlecaseInt == 0) { continue; }
+
+ parent.fCapitalization[usageEnum] = TRUE;
+ hasCapitalizationUsage = TRUE;
+ }
+ }
+};
+
+// Virtual destructors must be defined out of line.
+LocaleDisplayNamesImpl::CapitalizationContextSink::~CapitalizationContextSink() {}
+
+void
+LocaleDisplayNamesImpl::initialize(void) {
+ LocaleDisplayNamesImpl *nonConstThis = (LocaleDisplayNamesImpl *)this;
+ nonConstThis->locale = langData.getLocale() == Locale::getRoot()
+ ? regionData.getLocale()
+ : langData.getLocale();
+
+ UnicodeString sep;
+ langData.getNoFallback("localeDisplayPattern", "separator", sep);
+ if (sep.isBogus()) {
+ sep = UnicodeString("{0}, {1}", -1, US_INV);
+ }
+ UErrorCode status = U_ZERO_ERROR;
+ separatorFormat.applyPatternMinMaxArguments(sep, 2, 2, status);
+
+ UnicodeString pattern;
+ langData.getNoFallback("localeDisplayPattern", "pattern", pattern);
+ if (pattern.isBogus()) {
+ pattern = UnicodeString("{0} ({1})", -1, US_INV);
+ }
+ format.applyPatternMinMaxArguments(pattern, 2, 2, status);
+ if (pattern.indexOf((UChar)0xFF08) >= 0) {
+ formatOpenParen.setTo((UChar)0xFF08); // fullwidth (
+ formatReplaceOpenParen.setTo((UChar)0xFF3B); // fullwidth [
+ formatCloseParen.setTo((UChar)0xFF09); // fullwidth )
+ formatReplaceCloseParen.setTo((UChar)0xFF3D); // fullwidth ]
+ } else {
+ formatOpenParen.setTo((UChar)0x0028); // (
+ formatReplaceOpenParen.setTo((UChar)0x005B); // [
+ formatCloseParen.setTo((UChar)0x0029); // )
+ formatReplaceCloseParen.setTo((UChar)0x005D); // ]
+ }
+
+ UnicodeString ktPattern;
+ langData.get("localeDisplayPattern", "keyTypePattern", ktPattern);
+ if (ktPattern.isBogus()) {
+ ktPattern = UnicodeString("{0}={1}", -1, US_INV);
+ }
+ keyTypeFormat.applyPatternMinMaxArguments(ktPattern, 2, 2, status);
+
+ uprv_memset(fCapitalization, 0, sizeof(fCapitalization));
+#if !UCONFIG_NO_BREAK_ITERATION
+ // Only get the context data if we need it! This is a const object so we know now...
+ // Also check whether we will need a break iterator (depends on the data)
+ UBool needBrkIter = FALSE;
+ if (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_STANDALONE) {
+ LocalUResourceBundlePointer resource(ures_open(NULL, locale.getName(), &status));
+ if (U_FAILURE(status)) { return; }
+ CapitalizationContextSink sink(*this);
+ ures_getAllItemsWithFallback(resource.getAlias(), "contextTransforms", sink, status);
+ if (status == U_MISSING_RESOURCE_ERROR) {
+ // Silently ignore. Not every locale has contextTransforms.
+ status = U_ZERO_ERROR;
+ } else if (U_FAILURE(status)) {
+ return;
+ }
+ needBrkIter = sink.hasCapitalizationUsage;
+ }
+ // Get a sentence break iterator if we will need it
+ if (needBrkIter || capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE) {
+ status = U_ZERO_ERROR;
+ capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status);
+ if (U_FAILURE(status)) {
+ delete capitalizationBrkIter;
+ capitalizationBrkIter = NULL;
+ }
+ }
+#endif
+}
+
+LocaleDisplayNamesImpl::~LocaleDisplayNamesImpl() {
+#if !UCONFIG_NO_BREAK_ITERATION
+ delete capitalizationBrkIter;
+#endif
+}
+
+const Locale&
+LocaleDisplayNamesImpl::getLocale() const {
+ return locale;
+}
+
+UDialectHandling
+LocaleDisplayNamesImpl::getDialectHandling() const {
+ return dialectHandling;
+}
+
+UDisplayContext
+LocaleDisplayNamesImpl::getContext(UDisplayContextType type) const {
+ switch (type) {
+ case UDISPCTX_TYPE_DIALECT_HANDLING:
+ return (UDisplayContext)dialectHandling;
+ case UDISPCTX_TYPE_CAPITALIZATION:
+ return capitalizationContext;
+ case UDISPCTX_TYPE_DISPLAY_LENGTH:
+ return nameLength;
+ case UDISPCTX_TYPE_SUBSTITUTE_HANDLING:
+ return substitute;
+ default:
+ break;
+ }
+ return (UDisplayContext)0;
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::adjustForUsageAndContext(CapContextUsage usage,
+ UnicodeString& result) const {
+#if !UCONFIG_NO_BREAK_ITERATION
+ // check to see whether we need to titlecase result
+ if ( result.length() > 0 && u_islower(result.char32At(0)) && capitalizationBrkIter!= NULL &&
+ ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || fCapitalization[usage] ) ) {
+ // note fCapitalization[usage] won't be set unless capitalizationContext is UI_LIST_OR_MENU or STANDALONE
+ static UMutex capitalizationBrkIterLock;
+ Mutex lock(&capitalizationBrkIterLock);
+ result.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
+ }
+#endif
+ return result;
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::localeDisplayName(const Locale& loc,
+ UnicodeString& result) const {
+ if (loc.isBogus()) {
+ result.setToBogus();
+ return result;
+ }
+ UnicodeString resultName;
+
+ const char* lang = loc.getLanguage();
+ if (uprv_strlen(lang) == 0) {
+ lang = "root";
+ }
+ const char* script = loc.getScript();
+ const char* country = loc.getCountry();
+ const char* variant = loc.getVariant();
+
+ UBool hasScript = uprv_strlen(script) > 0;
+ UBool hasCountry = uprv_strlen(country) > 0;
+ UBool hasVariant = uprv_strlen(variant) > 0;
+
+ if (dialectHandling == ULDN_DIALECT_NAMES) {
+ char buffer[ULOC_FULLNAME_CAPACITY];
+ do { // loop construct is so we can break early out of search
+ if (hasScript && hasCountry) {
+ ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", script, "_", country, (char *)0);
+ localeIdName(buffer, resultName, false);
+ if (!resultName.isBogus()) {
+ hasScript = FALSE;
+ hasCountry = FALSE;
+ break;
+ }
+ }
+ if (hasScript) {
+ ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", script, (char *)0);
+ localeIdName(buffer, resultName, false);
+ if (!resultName.isBogus()) {
+ hasScript = FALSE;
+ break;
+ }
+ }
+ if (hasCountry) {
+ ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", country, (char*)0);
+ localeIdName(buffer, resultName, false);
+ if (!resultName.isBogus()) {
+ hasCountry = FALSE;
+ break;
+ }
+ }
+ } while (FALSE);
+ }
+ if (resultName.isBogus() || resultName.isEmpty()) {
+ localeIdName(lang, resultName, substitute == UDISPCTX_SUBSTITUTE);
+ if (resultName.isBogus()) {
+ result.setToBogus();
+ return result;
+ }
+ }
+
+ UnicodeString resultRemainder;
+ UnicodeString temp;
+ UErrorCode status = U_ZERO_ERROR;
+
+ if (hasScript) {
+ UnicodeString script_str = scriptDisplayName(script, temp, TRUE);
+ if (script_str.isBogus()) {
+ result.setToBogus();
+ return result;
+ }
+ resultRemainder.append(script_str);
+ }
+ if (hasCountry) {
+ UnicodeString region_str = regionDisplayName(country, temp, TRUE);
+ if (region_str.isBogus()) {
+ result.setToBogus();
+ return result;
+ }
+ appendWithSep(resultRemainder, region_str);
+ }
+ if (hasVariant) {
+ UnicodeString variant_str = variantDisplayName(variant, temp, TRUE);
+ if (variant_str.isBogus()) {
+ result.setToBogus();
+ return result;
+ }
+ appendWithSep(resultRemainder, variant_str);
+ }
+ resultRemainder.findAndReplace(formatOpenParen, formatReplaceOpenParen);
+ resultRemainder.findAndReplace(formatCloseParen, formatReplaceCloseParen);
+
+ LocalPointer<StringEnumeration> e(loc.createKeywords(status));
+ if (e.isValid() && U_SUCCESS(status)) {
+ UnicodeString temp2;
+ char value[ULOC_KEYWORD_AND_VALUES_CAPACITY]; // sigh, no ULOC_VALUE_CAPACITY
+ const char* key;
+ while ((key = e->next((int32_t *)0, status)) != NULL) {
+ value[0] = 0;
+ loc.getKeywordValue(key, value, ULOC_KEYWORD_AND_VALUES_CAPACITY, status);
+ if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
+ return result;
+ }
+ keyDisplayName(key, temp, TRUE);
+ temp.findAndReplace(formatOpenParen, formatReplaceOpenParen);
+ temp.findAndReplace(formatCloseParen, formatReplaceCloseParen);
+ keyValueDisplayName(key, value, temp2, TRUE);
+ temp2.findAndReplace(formatOpenParen, formatReplaceOpenParen);
+ temp2.findAndReplace(formatCloseParen, formatReplaceCloseParen);
+ if (temp2 != UnicodeString(value, -1, US_INV)) {
+ appendWithSep(resultRemainder, temp2);
+ } else if (temp != UnicodeString(key, -1, US_INV)) {
+ UnicodeString temp3;
+ keyTypeFormat.format(temp, temp2, temp3, status);
+ appendWithSep(resultRemainder, temp3);
+ } else {
+ appendWithSep(resultRemainder, temp)
+ .append((UChar)0x3d /* = */)
+ .append(temp2);
+ }
+ }
+ }
+
+ if (!resultRemainder.isEmpty()) {
+ format.format(resultName, resultRemainder, result.remove(), status);
+ return adjustForUsageAndContext(kCapContextUsageLanguage, result);
+ }
+
+ result = resultName;
+ return adjustForUsageAndContext(kCapContextUsageLanguage, result);
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::appendWithSep(UnicodeString& buffer, const UnicodeString& src) const {
+ if (buffer.isEmpty()) {
+ buffer.setTo(src);
+ } else {
+ const UnicodeString *values[2] = { &buffer, &src };
+ UErrorCode status = U_ZERO_ERROR;
+ separatorFormat.formatAndReplace(values, 2, buffer, NULL, 0, status);
+ }
+ return buffer;
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::localeDisplayName(const char* localeId,
+ UnicodeString& result) const {
+ return localeDisplayName(Locale(localeId), result);
+}
+
+// private
+UnicodeString&
+LocaleDisplayNamesImpl::localeIdName(const char* localeId,
+ UnicodeString& result, bool substitute) const {
+ if (nameLength == UDISPCTX_LENGTH_SHORT) {
+ langData.getNoFallback("Languages%short", localeId, result);
+ if (!result.isBogus()) {
+ return result;
+ }
+ }
+ if (substitute) {
+ return langData.get("Languages", localeId, result);
+ } else {
+ return langData.getNoFallback("Languages", localeId, result);
+ }
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::languageDisplayName(const char* lang,
+ UnicodeString& result) const {
+ if (uprv_strcmp("root", lang) == 0 || uprv_strchr(lang, '_') != NULL) {
+ return result = UnicodeString(lang, -1, US_INV);
+ }
+ if (nameLength == UDISPCTX_LENGTH_SHORT) {
+ langData.getNoFallback("Languages%short", lang, result);
+ if (!result.isBogus()) {
+ return adjustForUsageAndContext(kCapContextUsageLanguage, result);
+ }
+ }
+ if (substitute == UDISPCTX_SUBSTITUTE) {
+ langData.get("Languages", lang, result);
+ } else {
+ langData.getNoFallback("Languages", lang, result);
+ }
+ return adjustForUsageAndContext(kCapContextUsageLanguage, result);
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::scriptDisplayName(const char* script,
+ UnicodeString& result,
+ UBool skipAdjust) const {
+ if (nameLength == UDISPCTX_LENGTH_SHORT) {
+ langData.getNoFallback("Scripts%short", script, result);
+ if (!result.isBogus()) {
+ return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageScript, result);
+ }
+ }
+ if (substitute == UDISPCTX_SUBSTITUTE) {
+ langData.get("Scripts", script, result);
+ } else {
+ langData.getNoFallback("Scripts", script, result);
+ }
+ return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageScript, result);
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::scriptDisplayName(const char* script,
+ UnicodeString& result) const {
+ return scriptDisplayName(script, result, FALSE);
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::scriptDisplayName(UScriptCode scriptCode,
+ UnicodeString& result) const {
+ return scriptDisplayName(uscript_getName(scriptCode), result, FALSE);
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::regionDisplayName(const char* region,
+ UnicodeString& result,
+ UBool skipAdjust) const {
+ if (nameLength == UDISPCTX_LENGTH_SHORT) {
+ regionData.getNoFallback("Countries%short", region, result);
+ if (!result.isBogus()) {
+ return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageTerritory, result);
+ }
+ }
+ if (substitute == UDISPCTX_SUBSTITUTE) {
+ regionData.get("Countries", region, result);
+ } else {
+ regionData.getNoFallback("Countries", region, result);
+ }
+ return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageTerritory, result);
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::regionDisplayName(const char* region,
+ UnicodeString& result) const {
+ return regionDisplayName(region, result, FALSE);
+}
+
+
+UnicodeString&
+LocaleDisplayNamesImpl::variantDisplayName(const char* variant,
+ UnicodeString& result,
+ UBool skipAdjust) const {
+ // don't have a resource for short variant names
+ if (substitute == UDISPCTX_SUBSTITUTE) {
+ langData.get("Variants", variant, result);
+ } else {
+ langData.getNoFallback("Variants", variant, result);
+ }
+ return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageVariant, result);
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::variantDisplayName(const char* variant,
+ UnicodeString& result) const {
+ return variantDisplayName(variant, result, FALSE);
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::keyDisplayName(const char* key,
+ UnicodeString& result,
+ UBool skipAdjust) const {
+ // don't have a resource for short key names
+ if (substitute == UDISPCTX_SUBSTITUTE) {
+ langData.get("Keys", key, result);
+ } else {
+ langData.getNoFallback("Keys", key, result);
+ }
+ return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageKey, result);
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::keyDisplayName(const char* key,
+ UnicodeString& result) const {
+ return keyDisplayName(key, result, FALSE);
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::keyValueDisplayName(const char* key,
+ const char* value,
+ UnicodeString& result,
+ UBool skipAdjust) const {
+ if (uprv_strcmp(key, "currency") == 0) {
+ // ICU4C does not have ICU4J CurrencyDisplayInfo equivalent for now.
+ UErrorCode sts = U_ZERO_ERROR;
+ UnicodeString ustrValue(value, -1, US_INV);
+ int32_t len;
+ const UChar *currencyName = ucurr_getName(ustrValue.getTerminatedBuffer(),
+ locale.getBaseName(), UCURR_LONG_NAME, nullptr /* isChoiceFormat */, &len, &sts);
+ if (U_FAILURE(sts)) {
+ // Return the value as is on failure
+ result = ustrValue;
+ return result;
+ }
+ result.setTo(currencyName, len);
+ return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageKeyValue, result);
+ }
+
+ if (nameLength == UDISPCTX_LENGTH_SHORT) {
+ langData.getNoFallback("Types%short", key, value, result);
+ if (!result.isBogus()) {
+ return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageKeyValue, result);
+ }
+ }
+ if (substitute == UDISPCTX_SUBSTITUTE) {
+ langData.get("Types", key, value, result);
+ } else {
+ langData.getNoFallback("Types", key, value, result);
+ }
+ return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageKeyValue, result);
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::keyValueDisplayName(const char* key,
+ const char* value,
+ UnicodeString& result) const {
+ return keyValueDisplayName(key, value, result, FALSE);
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+LocaleDisplayNames*
+LocaleDisplayNames::createInstance(const Locale& locale,
+ UDialectHandling dialectHandling) {
+ return new LocaleDisplayNamesImpl(locale, dialectHandling);
+}
+
+LocaleDisplayNames*
+LocaleDisplayNames::createInstance(const Locale& locale,
+ UDisplayContext *contexts, int32_t length) {
+ if (contexts == NULL) {
+ length = 0;
+ }
+ return new LocaleDisplayNamesImpl(locale, contexts, length);
+}
+
+U_NAMESPACE_END
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+U_NAMESPACE_USE
+
+U_CAPI ULocaleDisplayNames * U_EXPORT2
+uldn_open(const char * locale,
+ UDialectHandling dialectHandling,
+ UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if (locale == NULL) {
+ locale = uloc_getDefault();
+ }
+ return (ULocaleDisplayNames *)LocaleDisplayNames::createInstance(Locale(locale), dialectHandling);
+}
+
+U_CAPI ULocaleDisplayNames * U_EXPORT2
+uldn_openForContext(const char * locale,
+ UDisplayContext *contexts, int32_t length,
+ UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if (locale == NULL) {
+ locale = uloc_getDefault();
+ }
+ return (ULocaleDisplayNames *)LocaleDisplayNames::createInstance(Locale(locale), contexts, length);
+}
+
+
+U_CAPI void U_EXPORT2
+uldn_close(ULocaleDisplayNames *ldn) {
+ delete (LocaleDisplayNames *)ldn;
+}
+
+U_CAPI const char * U_EXPORT2
+uldn_getLocale(const ULocaleDisplayNames *ldn) {
+ if (ldn) {
+ return ((const LocaleDisplayNames *)ldn)->getLocale().getName();
+ }
+ return NULL;
+}
+
+U_CAPI UDialectHandling U_EXPORT2
+uldn_getDialectHandling(const ULocaleDisplayNames *ldn) {
+ if (ldn) {
+ return ((const LocaleDisplayNames *)ldn)->getDialectHandling();
+ }
+ return ULDN_STANDARD_NAMES;
+}
+
+U_CAPI UDisplayContext U_EXPORT2
+uldn_getContext(const ULocaleDisplayNames *ldn,
+ UDisplayContextType type,
+ UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return (UDisplayContext)0;
+ }
+ return ((const LocaleDisplayNames *)ldn)->getContext(type);
+}
+
+U_CAPI int32_t U_EXPORT2
+uldn_localeDisplayName(const ULocaleDisplayNames *ldn,
+ const char *locale,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if (ldn == NULL || locale == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ UnicodeString temp(result, 0, maxResultSize);
+ ((const LocaleDisplayNames *)ldn)->localeDisplayName(locale, temp);
+ if (temp.isBogus()) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ return temp.extract(result, maxResultSize, *pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uldn_languageDisplayName(const ULocaleDisplayNames *ldn,
+ const char *lang,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if (ldn == NULL || lang == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ UnicodeString temp(result, 0, maxResultSize);
+ ((const LocaleDisplayNames *)ldn)->languageDisplayName(lang, temp);
+ return temp.extract(result, maxResultSize, *pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uldn_scriptDisplayName(const ULocaleDisplayNames *ldn,
+ const char *script,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if (ldn == NULL || script == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ UnicodeString temp(result, 0, maxResultSize);
+ ((const LocaleDisplayNames *)ldn)->scriptDisplayName(script, temp);
+ return temp.extract(result, maxResultSize, *pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uldn_scriptCodeDisplayName(const ULocaleDisplayNames *ldn,
+ UScriptCode scriptCode,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode) {
+ return uldn_scriptDisplayName(ldn, uscript_getName(scriptCode), result, maxResultSize, pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uldn_regionDisplayName(const ULocaleDisplayNames *ldn,
+ const char *region,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if (ldn == NULL || region == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ UnicodeString temp(result, 0, maxResultSize);
+ ((const LocaleDisplayNames *)ldn)->regionDisplayName(region, temp);
+ return temp.extract(result, maxResultSize, *pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uldn_variantDisplayName(const ULocaleDisplayNames *ldn,
+ const char *variant,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if (ldn == NULL || variant == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ UnicodeString temp(result, 0, maxResultSize);
+ ((const LocaleDisplayNames *)ldn)->variantDisplayName(variant, temp);
+ return temp.extract(result, maxResultSize, *pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uldn_keyDisplayName(const ULocaleDisplayNames *ldn,
+ const char *key,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if (ldn == NULL || key == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ UnicodeString temp(result, 0, maxResultSize);
+ ((const LocaleDisplayNames *)ldn)->keyDisplayName(key, temp);
+ return temp.extract(result, maxResultSize, *pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uldn_keyValueDisplayName(const ULocaleDisplayNames *ldn,
+ const char *key,
+ const char *value,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if (ldn == NULL || key == NULL || value == NULL || (result == NULL && maxResultSize > 0)
+ || maxResultSize < 0) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ UnicodeString temp(result, 0, maxResultSize);
+ ((const LocaleDisplayNames *)ldn)->keyValueDisplayName(key, value, temp);
+ return temp.extract(result, maxResultSize, *pErrorCode);
+}
+
+#endif
diff --git a/thirdparty/icu4c/common/locid.cpp b/thirdparty/icu4c/common/locid.cpp
new file mode 100644
index 0000000000..2804e36bf6
--- /dev/null
+++ b/thirdparty/icu4c/common/locid.cpp
@@ -0,0 +1,2536 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ **********************************************************************
+ * Copyright (C) 1997-2016, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+*
+* File locid.cpp
+*
+* Created by: Richard Gillam
+*
+* Modification History:
+*
+* Date Name Description
+* 02/11/97 aliu Changed gLocPath to fgDataDirectory and added
+* methods to get and set it.
+* 04/02/97 aliu Made operator!= inline; fixed return value
+* of getName().
+* 04/15/97 aliu Cleanup for AIX/Win32.
+* 04/24/97 aliu Numerous changes per code review.
+* 08/18/98 stephen Changed getDisplayName()
+* Added SIMPLIFIED_CHINESE, TRADITIONAL_CHINESE
+* Added getISOCountries(), getISOLanguages(),
+* getLanguagesForCountry()
+* 03/16/99 bertrand rehaul.
+* 07/21/99 stephen Added U_CFUNC setDefault
+* 11/09/99 weiv Added const char * getName() const;
+* 04/12/00 srl removing unicodestring api's and cached hash code
+* 08/10/01 grhoten Change the static Locales to accessor functions
+******************************************************************************
+*/
+
+#include <utility>
+
+#include "unicode/bytestream.h"
+#include "unicode/locid.h"
+#include "unicode/strenum.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uloc.h"
+#include "unicode/ures.h"
+
+#include "bytesinkutil.h"
+#include "charstr.h"
+#include "charstrmap.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "mutex.h"
+#include "putilimp.h"
+#include "uassert.h"
+#include "ucln_cmn.h"
+#include "uhash.h"
+#include "ulocimp.h"
+#include "umutex.h"
+#include "uniquecharstr.h"
+#include "ustr_imp.h"
+#include "uvector.h"
+
+U_CDECL_BEGIN
+static UBool U_CALLCONV locale_cleanup(void);
+U_CDECL_END
+
+U_NAMESPACE_BEGIN
+
+static Locale *gLocaleCache = NULL;
+static UInitOnce gLocaleCacheInitOnce = U_INITONCE_INITIALIZER;
+
+// gDefaultLocaleMutex protects all access to gDefaultLocalesHashT and gDefaultLocale.
+static UMutex gDefaultLocaleMutex;
+static UHashtable *gDefaultLocalesHashT = NULL;
+static Locale *gDefaultLocale = NULL;
+
+/**
+ * \def ULOC_STRING_LIMIT
+ * strings beyond this value crash in CharString
+ */
+#define ULOC_STRING_LIMIT 357913941
+
+U_NAMESPACE_END
+
+typedef enum ELocalePos {
+ eENGLISH,
+ eFRENCH,
+ eGERMAN,
+ eITALIAN,
+ eJAPANESE,
+ eKOREAN,
+ eCHINESE,
+
+ eFRANCE,
+ eGERMANY,
+ eITALY,
+ eJAPAN,
+ eKOREA,
+ eCHINA, /* Alias for PRC */
+ eTAIWAN,
+ eUK,
+ eUS,
+ eCANADA,
+ eCANADA_FRENCH,
+ eROOT,
+
+
+ //eDEFAULT,
+ eMAX_LOCALES
+} ELocalePos;
+
+U_CDECL_BEGIN
+//
+// Deleter function for Locales owned by the default Locale hash table/
+//
+static void U_CALLCONV
+deleteLocale(void *obj) {
+ delete (icu::Locale *) obj;
+}
+
+static UBool U_CALLCONV locale_cleanup(void)
+{
+ U_NAMESPACE_USE
+
+ delete [] gLocaleCache;
+ gLocaleCache = NULL;
+ gLocaleCacheInitOnce.reset();
+
+ if (gDefaultLocalesHashT) {
+ uhash_close(gDefaultLocalesHashT); // Automatically deletes all elements, using deleter func.
+ gDefaultLocalesHashT = NULL;
+ }
+ gDefaultLocale = NULL;
+ return TRUE;
+}
+
+
+static void U_CALLCONV locale_init(UErrorCode &status) {
+ U_NAMESPACE_USE
+
+ U_ASSERT(gLocaleCache == NULL);
+ gLocaleCache = new Locale[(int)eMAX_LOCALES];
+ if (gLocaleCache == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup);
+ gLocaleCache[eROOT] = Locale("");
+ gLocaleCache[eENGLISH] = Locale("en");
+ gLocaleCache[eFRENCH] = Locale("fr");
+ gLocaleCache[eGERMAN] = Locale("de");
+ gLocaleCache[eITALIAN] = Locale("it");
+ gLocaleCache[eJAPANESE] = Locale("ja");
+ gLocaleCache[eKOREAN] = Locale("ko");
+ gLocaleCache[eCHINESE] = Locale("zh");
+ gLocaleCache[eFRANCE] = Locale("fr", "FR");
+ gLocaleCache[eGERMANY] = Locale("de", "DE");
+ gLocaleCache[eITALY] = Locale("it", "IT");
+ gLocaleCache[eJAPAN] = Locale("ja", "JP");
+ gLocaleCache[eKOREA] = Locale("ko", "KR");
+ gLocaleCache[eCHINA] = Locale("zh", "CN");
+ gLocaleCache[eTAIWAN] = Locale("zh", "TW");
+ gLocaleCache[eUK] = Locale("en", "GB");
+ gLocaleCache[eUS] = Locale("en", "US");
+ gLocaleCache[eCANADA] = Locale("en", "CA");
+ gLocaleCache[eCANADA_FRENCH] = Locale("fr", "CA");
+}
+
+U_CDECL_END
+
+U_NAMESPACE_BEGIN
+
+Locale *locale_set_default_internal(const char *id, UErrorCode& status) {
+ // Synchronize this entire function.
+ Mutex lock(&gDefaultLocaleMutex);
+
+ UBool canonicalize = FALSE;
+
+ // If given a NULL string for the locale id, grab the default
+ // name from the system.
+ // (Different from most other locale APIs, where a null name means use
+ // the current ICU default locale.)
+ if (id == NULL) {
+ id = uprv_getDefaultLocaleID(); // This function not thread safe? TODO: verify.
+ canonicalize = TRUE; // always canonicalize host ID
+ }
+
+ CharString localeNameBuf;
+ {
+ CharStringByteSink sink(&localeNameBuf);
+ if (canonicalize) {
+ ulocimp_canonicalize(id, sink, &status);
+ } else {
+ ulocimp_getName(id, sink, &status);
+ }
+ }
+
+ if (U_FAILURE(status)) {
+ return gDefaultLocale;
+ }
+
+ if (gDefaultLocalesHashT == NULL) {
+ gDefaultLocalesHashT = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
+ if (U_FAILURE(status)) {
+ return gDefaultLocale;
+ }
+ uhash_setValueDeleter(gDefaultLocalesHashT, deleteLocale);
+ ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup);
+ }
+
+ Locale *newDefault = (Locale *)uhash_get(gDefaultLocalesHashT, localeNameBuf.data());
+ if (newDefault == NULL) {
+ newDefault = new Locale(Locale::eBOGUS);
+ if (newDefault == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return gDefaultLocale;
+ }
+ newDefault->init(localeNameBuf.data(), FALSE);
+ uhash_put(gDefaultLocalesHashT, (char*) newDefault->getName(), newDefault, &status);
+ if (U_FAILURE(status)) {
+ return gDefaultLocale;
+ }
+ }
+ gDefaultLocale = newDefault;
+ return gDefaultLocale;
+}
+
+U_NAMESPACE_END
+
+/* sfb 07/21/99 */
+U_CFUNC void
+locale_set_default(const char *id)
+{
+ U_NAMESPACE_USE
+ UErrorCode status = U_ZERO_ERROR;
+ locale_set_default_internal(id, status);
+}
+/* end */
+
+U_CFUNC const char *
+locale_get_default(void)
+{
+ U_NAMESPACE_USE
+ return Locale::getDefault().getName();
+}
+
+
+U_NAMESPACE_BEGIN
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale)
+
+/*Character separating the posix id fields*/
+// '_'
+// In the platform codepage.
+#define SEP_CHAR '_'
+#define NULL_CHAR '\0'
+
+Locale::~Locale()
+{
+ if (baseName != fullName) {
+ uprv_free(baseName);
+ }
+ baseName = NULL;
+ /*if fullName is on the heap, we free it*/
+ if (fullName != fullNameBuffer)
+ {
+ uprv_free(fullName);
+ fullName = NULL;
+ }
+}
+
+Locale::Locale()
+ : UObject(), fullName(fullNameBuffer), baseName(NULL)
+{
+ init(NULL, FALSE);
+}
+
+/*
+ * Internal constructor to allow construction of a locale object with
+ * NO side effects. (Default constructor tries to get
+ * the default locale.)
+ */
+Locale::Locale(Locale::ELocaleType)
+ : UObject(), fullName(fullNameBuffer), baseName(NULL)
+{
+ setToBogus();
+}
+
+
+Locale::Locale( const char * newLanguage,
+ const char * newCountry,
+ const char * newVariant,
+ const char * newKeywords)
+ : UObject(), fullName(fullNameBuffer), baseName(NULL)
+{
+ if( (newLanguage==NULL) && (newCountry == NULL) && (newVariant == NULL) )
+ {
+ init(NULL, FALSE); /* shortcut */
+ }
+ else
+ {
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t size = 0;
+ int32_t lsize = 0;
+ int32_t csize = 0;
+ int32_t vsize = 0;
+ int32_t ksize = 0;
+
+ // Calculate the size of the resulting string.
+
+ // Language
+ if ( newLanguage != NULL )
+ {
+ lsize = (int32_t)uprv_strlen(newLanguage);
+ if ( lsize < 0 || lsize > ULOC_STRING_LIMIT ) { // int32 wrap
+ setToBogus();
+ return;
+ }
+ size = lsize;
+ }
+
+ CharString togo(newLanguage, lsize, status); // start with newLanguage
+
+ // _Country
+ if ( newCountry != NULL )
+ {
+ csize = (int32_t)uprv_strlen(newCountry);
+ if ( csize < 0 || csize > ULOC_STRING_LIMIT ) { // int32 wrap
+ setToBogus();
+ return;
+ }
+ size += csize;
+ }
+
+ // _Variant
+ if ( newVariant != NULL )
+ {
+ // remove leading _'s
+ while(newVariant[0] == SEP_CHAR)
+ {
+ newVariant++;
+ }
+
+ // remove trailing _'s
+ vsize = (int32_t)uprv_strlen(newVariant);
+ if ( vsize < 0 || vsize > ULOC_STRING_LIMIT ) { // int32 wrap
+ setToBogus();
+ return;
+ }
+ while( (vsize>1) && (newVariant[vsize-1] == SEP_CHAR) )
+ {
+ vsize--;
+ }
+ }
+
+ if( vsize > 0 )
+ {
+ size += vsize;
+ }
+
+ // Separator rules:
+ if ( vsize > 0 )
+ {
+ size += 2; // at least: __v
+ }
+ else if ( csize > 0 )
+ {
+ size += 1; // at least: _v
+ }
+
+ if ( newKeywords != NULL)
+ {
+ ksize = (int32_t)uprv_strlen(newKeywords);
+ if ( ksize < 0 || ksize > ULOC_STRING_LIMIT ) {
+ setToBogus();
+ return;
+ }
+ size += ksize + 1;
+ }
+
+ // NOW we have the full locale string..
+ // Now, copy it back.
+
+ // newLanguage is already copied
+
+ if ( ( vsize != 0 ) || (csize != 0) ) // at least: __v
+ { // ^
+ togo.append(SEP_CHAR, status);
+ }
+
+ if ( csize != 0 )
+ {
+ togo.append(newCountry, status);
+ }
+
+ if ( vsize != 0)
+ {
+ togo.append(SEP_CHAR, status)
+ .append(newVariant, vsize, status);
+ }
+
+ if ( ksize != 0)
+ {
+ if (uprv_strchr(newKeywords, '=')) {
+ togo.append('@', status); /* keyword parsing */
+ }
+ else {
+ togo.append('_', status); /* Variant parsing with a script */
+ if ( vsize == 0) {
+ togo.append('_', status); /* No country found */
+ }
+ }
+ togo.append(newKeywords, status);
+ }
+
+ if (U_FAILURE(status)) {
+ // Something went wrong with appending, etc.
+ setToBogus();
+ return;
+ }
+ // Parse it, because for example 'language' might really be a complete
+ // string.
+ init(togo.data(), FALSE);
+ }
+}
+
+Locale::Locale(const Locale &other)
+ : UObject(other), fullName(fullNameBuffer), baseName(NULL)
+{
+ *this = other;
+}
+
+Locale::Locale(Locale&& other) U_NOEXCEPT
+ : UObject(other), fullName(fullNameBuffer), baseName(fullName) {
+ *this = std::move(other);
+}
+
+Locale& Locale::operator=(const Locale& other) {
+ if (this == &other) {
+ return *this;
+ }
+
+ setToBogus();
+
+ if (other.fullName == other.fullNameBuffer) {
+ uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
+ } else if (other.fullName == nullptr) {
+ fullName = nullptr;
+ } else {
+ fullName = uprv_strdup(other.fullName);
+ if (fullName == nullptr) return *this;
+ }
+
+ if (other.baseName == other.fullName) {
+ baseName = fullName;
+ } else if (other.baseName != nullptr) {
+ baseName = uprv_strdup(other.baseName);
+ if (baseName == nullptr) return *this;
+ }
+
+ uprv_strcpy(language, other.language);
+ uprv_strcpy(script, other.script);
+ uprv_strcpy(country, other.country);
+
+ variantBegin = other.variantBegin;
+ fIsBogus = other.fIsBogus;
+
+ return *this;
+}
+
+Locale& Locale::operator=(Locale&& other) U_NOEXCEPT {
+ if (baseName != fullName) uprv_free(baseName);
+ if (fullName != fullNameBuffer) uprv_free(fullName);
+
+ if (other.fullName == other.fullNameBuffer) {
+ uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
+ fullName = fullNameBuffer;
+ } else {
+ fullName = other.fullName;
+ }
+
+ if (other.baseName == other.fullName) {
+ baseName = fullName;
+ } else {
+ baseName = other.baseName;
+ }
+
+ uprv_strcpy(language, other.language);
+ uprv_strcpy(script, other.script);
+ uprv_strcpy(country, other.country);
+
+ variantBegin = other.variantBegin;
+ fIsBogus = other.fIsBogus;
+
+ other.baseName = other.fullName = other.fullNameBuffer;
+
+ return *this;
+}
+
+Locale *
+Locale::clone() const {
+ return new Locale(*this);
+}
+
+UBool
+Locale::operator==( const Locale& other) const
+{
+ return (uprv_strcmp(other.fullName, fullName) == 0);
+}
+
+namespace {
+
+UInitOnce gKnownCanonicalizedInitOnce = U_INITONCE_INITIALIZER;
+UHashtable *gKnownCanonicalized = nullptr;
+
+static const char* const KNOWN_CANONICALIZED[] = {
+ "c",
+ // Commonly used locales known are already canonicalized
+ "af", "af_ZA", "am", "am_ET", "ar", "ar_001", "as", "as_IN", "az", "az_AZ",
+ "be", "be_BY", "bg", "bg_BG", "bn", "bn_IN", "bs", "bs_BA", "ca", "ca_ES",
+ "cs", "cs_CZ", "cy", "cy_GB", "da", "da_DK", "de", "de_DE", "el", "el_GR",
+ "en", "en_GB", "en_US", "es", "es_419", "es_ES", "et", "et_EE", "eu",
+ "eu_ES", "fa", "fa_IR", "fi", "fi_FI", "fil", "fil_PH", "fr", "fr_FR",
+ "ga", "ga_IE", "gl", "gl_ES", "gu", "gu_IN", "he", "he_IL", "hi", "hi_IN",
+ "hr", "hr_HR", "hu", "hu_HU", "hy", "hy_AM", "id", "id_ID", "is", "is_IS",
+ "it", "it_IT", "ja", "ja_JP", "jv", "jv_ID", "ka", "ka_GE", "kk", "kk_KZ",
+ "km", "km_KH", "kn", "kn_IN", "ko", "ko_KR", "ky", "ky_KG", "lo", "lo_LA",
+ "lt", "lt_LT", "lv", "lv_LV", "mk", "mk_MK", "ml", "ml_IN", "mn", "mn_MN",
+ "mr", "mr_IN", "ms", "ms_MY", "my", "my_MM", "nb", "nb_NO", "ne", "ne_NP",
+ "nl", "nl_NL", "or", "or_IN", "pa", "pa_IN", "pl", "pl_PL", "ps", "ps_AF",
+ "pt", "pt_BR", "pt_PT", "ro", "ro_RO", "ru", "ru_RU", "sd", "sd_IN", "si",
+ "si_LK", "sk", "sk_SK", "sl", "sl_SI", "so", "so_SO", "sq", "sq_AL", "sr",
+ "sr_Cyrl_RS", "sr_Latn", "sr_RS", "sv", "sv_SE", "sw", "sw_TZ", "ta",
+ "ta_IN", "te", "te_IN", "th", "th_TH", "tk", "tk_TM", "tr", "tr_TR", "uk",
+ "uk_UA", "ur", "ur_PK", "uz", "uz_UZ", "vi", "vi_VN", "yue", "yue_Hant",
+ "yue_Hant_HK", "yue_HK", "zh", "zh_CN", "zh_Hans", "zh_Hans_CN", "zh_Hant",
+ "zh_Hant_TW", "zh_TW", "zu", "zu_ZA"
+};
+
+static UBool U_CALLCONV cleanupKnownCanonicalized() {
+ gKnownCanonicalizedInitOnce.reset();
+ if (gKnownCanonicalized) { uhash_close(gKnownCanonicalized); }
+ return TRUE;
+}
+
+static void U_CALLCONV loadKnownCanonicalized(UErrorCode &status) {
+ ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KNOWN_CANONICALIZED,
+ cleanupKnownCanonicalized);
+ LocalUHashtablePointer newKnownCanonicalizedMap(
+ uhash_open(uhash_hashChars, uhash_compareChars, nullptr, &status));
+ for (int32_t i = 0;
+ U_SUCCESS(status) && i < UPRV_LENGTHOF(KNOWN_CANONICALIZED);
+ i++) {
+ uhash_puti(newKnownCanonicalizedMap.getAlias(),
+ (void*)KNOWN_CANONICALIZED[i],
+ 1, &status);
+ }
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ gKnownCanonicalized = newKnownCanonicalizedMap.orphan();
+}
+
+class AliasData;
+
+/**
+ * A Builder class to build the alias data.
+ */
+class AliasDataBuilder {
+public:
+ AliasDataBuilder() {
+ }
+
+ // Build the AliasData from resource.
+ AliasData* build(UErrorCode &status);
+
+private:
+ void readAlias(UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length,
+ void (*checkType)(const char* type),
+ void (*checkReplacement)(const UnicodeString& replacement),
+ UErrorCode &status);
+
+ // Read the languageAlias data from alias to
+ // strings+types+replacementIndexes
+ // The number of record will be stored into length.
+ // Allocate length items for types, to store the type field.
+ // Allocate length items for replacementIndexes,
+ // to store the index in the strings for the replacement script.
+ void readLanguageAlias(UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length,
+ UErrorCode &status);
+
+ // Read the scriptAlias data from alias to
+ // strings+types+replacementIndexes
+ // Allocate length items for types, to store the type field.
+ // Allocate length items for replacementIndexes,
+ // to store the index in the strings for the replacement script.
+ void readScriptAlias(UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length, UErrorCode &status);
+
+ // Read the territoryAlias data from alias to
+ // strings+types+replacementIndexes
+ // Allocate length items for types, to store the type field.
+ // Allocate length items for replacementIndexes,
+ // to store the index in the strings for the replacement script.
+ void readTerritoryAlias(UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length, UErrorCode &status);
+
+ // Read the variantAlias data from alias to
+ // strings+types+replacementIndexes
+ // Allocate length items for types, to store the type field.
+ // Allocate length items for replacementIndexes,
+ // to store the index in the strings for the replacement variant.
+ void readVariantAlias(UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length, UErrorCode &status);
+};
+
+/**
+ * A class to hold the Alias Data.
+ */
+class AliasData : public UMemory {
+public:
+ static const AliasData* singleton(UErrorCode& status) {
+ if (U_FAILURE(status)) {
+ // Do not get into loadData if the status already has error.
+ return nullptr;
+ }
+ umtx_initOnce(AliasData::gInitOnce, &AliasData::loadData, status);
+ return gSingleton;
+ }
+
+ const CharStringMap& languageMap() const { return language; }
+ const CharStringMap& scriptMap() const { return script; }
+ const CharStringMap& territoryMap() const { return territory; }
+ const CharStringMap& variantMap() const { return variant; }
+
+ static void U_CALLCONV loadData(UErrorCode &status);
+ static UBool U_CALLCONV cleanup();
+
+ static UInitOnce gInitOnce;
+
+private:
+ AliasData(CharStringMap languageMap,
+ CharStringMap scriptMap,
+ CharStringMap territoryMap,
+ CharStringMap variantMap,
+ CharString* strings)
+ : language(std::move(languageMap)),
+ script(std::move(scriptMap)),
+ territory(std::move(territoryMap)),
+ variant(std::move(variantMap)),
+ strings(strings) {
+ }
+
+ ~AliasData() {
+ delete strings;
+ }
+
+ static const AliasData* gSingleton;
+
+ CharStringMap language;
+ CharStringMap script;
+ CharStringMap territory;
+ CharStringMap variant;
+ CharString* strings;
+
+ friend class AliasDataBuilder;
+};
+
+
+const AliasData* AliasData::gSingleton = nullptr;
+UInitOnce AliasData::gInitOnce = U_INITONCE_INITIALIZER;
+
+UBool U_CALLCONV
+AliasData::cleanup()
+{
+ gInitOnce.reset();
+ delete gSingleton;
+ return TRUE;
+}
+
+void
+AliasDataBuilder::readAlias(
+ UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length,
+ void (*checkType)(const char* type),
+ void (*checkReplacement)(const UnicodeString& replacement),
+ UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ length = ures_getSize(alias);
+ const char** rawTypes = types.allocateInsteadAndCopy(length);
+ if (rawTypes == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ int32_t* rawIndexes = replacementIndexes.allocateInsteadAndCopy(length);
+ if (rawIndexes == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ int i = 0;
+ while (ures_hasNext(alias)) {
+ LocalUResourceBundlePointer res(
+ ures_getNextResource(alias, nullptr, &status));
+ const char* aliasFrom = ures_getKey(res.getAlias());
+ UnicodeString aliasTo =
+ ures_getUnicodeStringByKey(res.getAlias(), "replacement", &status);
+
+ checkType(aliasFrom);
+ checkReplacement(aliasTo);
+
+ rawTypes[i] = aliasFrom;
+ rawIndexes[i] = strings->add(aliasTo, status);
+ i++;
+ }
+}
+
+/**
+ * Read the languageAlias data from alias to strings+types+replacementIndexes.
+ * Allocate length items for types, to store the type field. Allocate length
+ * items for replacementIndexes, to store the index in the strings for the
+ * replacement language.
+ */
+void
+AliasDataBuilder::readLanguageAlias(
+ UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length,
+ UErrorCode &status)
+{
+ return readAlias(
+ alias, strings, types, replacementIndexes, length,
+#if U_DEBUG
+ [](const char* type) {
+ // Assert the aliasFrom only contains the following possibilties
+ // language_REGION_variant
+ // language_REGION
+ // language_variant
+ // language
+ // und_variant
+ Locale test(type);
+ // Assert no script in aliasFrom
+ U_ASSERT(test.getScript()[0] == '\0');
+ // Assert when language is und, no REGION in aliasFrom.
+ U_ASSERT(test.getLanguage()[0] != '\0' || test.getCountry()[0] == '\0');
+ },
+#else
+ [](const char*) {},
+#endif
+ [](const UnicodeString&) {}, status);
+}
+
+/**
+ * Read the scriptAlias data from alias to strings+types+replacementIndexes.
+ * Allocate length items for types, to store the type field. Allocate length
+ * items for replacementIndexes, to store the index in the strings for the
+ * replacement script.
+ */
+void
+AliasDataBuilder::readScriptAlias(
+ UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length,
+ UErrorCode &status)
+{
+ return readAlias(
+ alias, strings, types, replacementIndexes, length,
+#if U_DEBUG
+ [](const char* type) {
+ U_ASSERT(uprv_strlen(type) == 4);
+ },
+ [](const UnicodeString& replacement) {
+ U_ASSERT(replacement.length() == 4);
+ },
+#else
+ [](const char*) {},
+ [](const UnicodeString&) { },
+#endif
+ status);
+}
+
+/**
+ * Read the territoryAlias data from alias to strings+types+replacementIndexes.
+ * Allocate length items for types, to store the type field. Allocate length
+ * items for replacementIndexes, to store the index in the strings for the
+ * replacement regions.
+ */
+void
+AliasDataBuilder::readTerritoryAlias(
+ UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length,
+ UErrorCode &status)
+{
+ return readAlias(
+ alias, strings, types, replacementIndexes, length,
+#if U_DEBUG
+ [](const char* type) {
+ U_ASSERT(uprv_strlen(type) == 2 || uprv_strlen(type) == 3);
+ },
+#else
+ [](const char*) {},
+#endif
+ [](const UnicodeString&) { },
+ status);
+}
+
+/**
+ * Read the variantAlias data from alias to strings+types+replacementIndexes.
+ * Allocate length items for types, to store the type field. Allocate length
+ * items for replacementIndexes, to store the index in the strings for the
+ * replacement variant.
+ */
+void
+AliasDataBuilder::readVariantAlias(
+ UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length,
+ UErrorCode &status)
+{
+ return readAlias(
+ alias, strings, types, replacementIndexes, length,
+#if U_DEBUG
+ [](const char* type) {
+ U_ASSERT(uprv_strlen(type) >= 4 && uprv_strlen(type) <= 8);
+ U_ASSERT(uprv_strlen(type) != 4 ||
+ (type[0] >= '0' && type[0] <= '9'));
+ },
+ [](const UnicodeString& replacement) {
+ U_ASSERT(replacement.length() >= 4 && replacement.length() <= 8);
+ U_ASSERT(replacement.length() != 4 ||
+ (replacement.charAt(0) >= u'0' &&
+ replacement.charAt(0) <= u'9'));
+ },
+#else
+ [](const char*) {},
+ [](const UnicodeString&) { },
+#endif
+ status);
+}
+
+/**
+ * Initializes the alias data from the ICU resource bundles. The alias data
+ * contains alias of language, country, script and variants.
+ *
+ * If the alias data has already loaded, then this method simply returns without
+ * doing anything meaningful.
+ */
+void U_CALLCONV
+AliasData::loadData(UErrorCode &status)
+{
+#ifdef LOCALE_CANONICALIZATION_DEBUG
+ UDate start = uprv_getRawUTCtime();
+#endif // LOCALE_CANONICALIZATION_DEBUG
+ ucln_common_registerCleanup(UCLN_COMMON_LOCALE_ALIAS, cleanup);
+ AliasDataBuilder builder;
+ gSingleton = builder.build(status);
+#ifdef LOCALE_CANONICALIZATION_DEBUG
+ UDate end = uprv_getRawUTCtime();
+ printf("AliasData::loadData took total %f ms\n", end - start);
+#endif // LOCALE_CANONICALIZATION_DEBUG
+}
+
+/**
+ * Build the alias data from resources.
+ */
+AliasData*
+AliasDataBuilder::build(UErrorCode &status) {
+ LocalUResourceBundlePointer metadata(
+ ures_openDirect(nullptr, "metadata", &status));
+ LocalUResourceBundlePointer metadataAlias(
+ ures_getByKey(metadata.getAlias(), "alias", nullptr, &status));
+ LocalUResourceBundlePointer languageAlias(
+ ures_getByKey(metadataAlias.getAlias(), "language", nullptr, &status));
+ LocalUResourceBundlePointer scriptAlias(
+ ures_getByKey(metadataAlias.getAlias(), "script", nullptr, &status));
+ LocalUResourceBundlePointer territoryAlias(
+ ures_getByKey(metadataAlias.getAlias(), "territory", nullptr, &status));
+ LocalUResourceBundlePointer variantAlias(
+ ures_getByKey(metadataAlias.getAlias(), "variant", nullptr, &status));
+
+ if (U_FAILURE(status)) {
+ return nullptr;
+ }
+ int32_t languagesLength = 0, scriptLength = 0, territoryLength = 0,
+ variantLength = 0;
+
+ // Read the languageAlias into languageTypes, languageReplacementIndexes
+ // and strings
+ UniqueCharStrings strings(status);
+ LocalMemory<const char*> languageTypes;
+ LocalMemory<int32_t> languageReplacementIndexes;
+ readLanguageAlias(languageAlias.getAlias(),
+ &strings,
+ languageTypes,
+ languageReplacementIndexes,
+ languagesLength,
+ status);
+
+ // Read the scriptAlias into scriptTypes, scriptReplacementIndexes
+ // and strings
+ LocalMemory<const char*> scriptTypes;
+ LocalMemory<int32_t> scriptReplacementIndexes;
+ readScriptAlias(scriptAlias.getAlias(),
+ &strings,
+ scriptTypes,
+ scriptReplacementIndexes,
+ scriptLength,
+ status);
+
+ // Read the territoryAlias into territoryTypes, territoryReplacementIndexes
+ // and strings
+ LocalMemory<const char*> territoryTypes;
+ LocalMemory<int32_t> territoryReplacementIndexes;
+ readTerritoryAlias(territoryAlias.getAlias(),
+ &strings,
+ territoryTypes,
+ territoryReplacementIndexes,
+ territoryLength, status);
+
+ // Read the variantAlias into variantTypes, variantReplacementIndexes
+ // and strings
+ LocalMemory<const char*> variantTypes;
+ LocalMemory<int32_t> variantReplacementIndexes;
+ readVariantAlias(variantAlias.getAlias(),
+ &strings,
+ variantTypes,
+ variantReplacementIndexes,
+ variantLength, status);
+
+ if (U_FAILURE(status)) {
+ return nullptr;
+ }
+
+ // We can only use strings after freeze it.
+ strings.freeze();
+
+ // Build the languageMap from languageTypes & languageReplacementIndexes
+ CharStringMap languageMap(490, status);
+ for (int32_t i = 0; U_SUCCESS(status) && i < languagesLength; i++) {
+ languageMap.put(languageTypes[i],
+ strings.get(languageReplacementIndexes[i]),
+ status);
+ }
+
+ // Build the scriptMap from scriptTypes & scriptReplacementIndexes
+ CharStringMap scriptMap(1, status);
+ for (int32_t i = 0; U_SUCCESS(status) && i < scriptLength; i++) {
+ scriptMap.put(scriptTypes[i],
+ strings.get(scriptReplacementIndexes[i]),
+ status);
+ }
+
+ // Build the territoryMap from territoryTypes & territoryReplacementIndexes
+ CharStringMap territoryMap(650, status);
+ for (int32_t i = 0; U_SUCCESS(status) && i < territoryLength; i++) {
+ territoryMap.put(territoryTypes[i],
+ strings.get(territoryReplacementIndexes[i]),
+ status);
+ }
+
+ // Build the variantMap from variantTypes & variantReplacementIndexes.
+ CharStringMap variantMap(2, status);
+ for (int32_t i = 0; U_SUCCESS(status) && i < variantLength; i++) {
+ variantMap.put(variantTypes[i],
+ strings.get(variantReplacementIndexes[i]),
+ status);
+ }
+
+ if (U_FAILURE(status)) {
+ return nullptr;
+ }
+
+ // copy hashtables
+ auto *data = new AliasData(
+ std::move(languageMap),
+ std::move(scriptMap),
+ std::move(territoryMap),
+ std::move(variantMap),
+ strings.orphanCharStrings());
+
+ if (data == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ return data;
+}
+
+/**
+ * A class that find the replacement values of locale fields by using AliasData.
+ */
+class AliasReplacer {
+public:
+ AliasReplacer(UErrorCode status) :
+ language(nullptr), script(nullptr), region(nullptr),
+ extensions(nullptr), variants(status),
+ data(nullptr) {
+ }
+ ~AliasReplacer() {
+ }
+
+ // Check the fields inside locale, if need to replace fields,
+ // place the the replaced locale ID in out and return true.
+ // Otherwise return false for no replacement or error.
+ bool replace(
+ const Locale& locale, CharString& out, UErrorCode status);
+
+private:
+ const char* language;
+ const char* script;
+ const char* region;
+ const char* extensions;
+ UVector variants;
+
+ const AliasData* data;
+
+ inline bool notEmpty(const char* str) {
+ return str && str[0] != NULL_CHAR;
+ }
+
+ /**
+ * If replacement is neither null nor empty and input is either null or empty,
+ * return replacement.
+ * If replacement is neither null nor empty but input is not empty, return input.
+ * If replacement is either null or empty and type is either null or empty,
+ * return input.
+ * Otherwise return null.
+ * replacement input type return
+ * AAA nullptr * AAA
+ * AAA BBB * BBB
+ * nullptr || "" CCC nullptr CCC
+ * nullptr || "" * DDD nullptr
+ */
+ inline const char* deleteOrReplace(
+ const char* input, const char* type, const char* replacement) {
+ return notEmpty(replacement) ?
+ ((input == nullptr) ? replacement : input) :
+ ((type == nullptr) ? input : nullptr);
+ }
+
+ inline bool same(const char* a, const char* b) {
+ if (a == nullptr && b == nullptr) {
+ return true;
+ }
+ if ((a == nullptr && b != nullptr) ||
+ (a != nullptr && b == nullptr)) {
+ return false;
+ }
+ return uprv_strcmp(a, b) == 0;
+ }
+
+ // Gather fields and generate locale ID into out.
+ CharString& outputToString(CharString& out, UErrorCode status);
+
+ // Generate the lookup key.
+ CharString& generateKey(const char* language, const char* region,
+ const char* variant, CharString& out,
+ UErrorCode status);
+
+ void parseLanguageReplacement(const char* replacement,
+ const char*& replaceLanguage,
+ const char*& replaceScript,
+ const char*& replaceRegion,
+ const char*& replaceVariant,
+ const char*& replaceExtensions,
+ UVector& toBeFreed,
+ UErrorCode& status);
+
+ // Replace by using languageAlias.
+ bool replaceLanguage(bool checkLanguage, bool checkRegion,
+ bool checkVariants, UVector& toBeFreed,
+ UErrorCode& status);
+
+ // Replace by using territoryAlias.
+ bool replaceTerritory(UVector& toBeFreed, UErrorCode& status);
+
+ // Replace by using scriptAlias.
+ bool replaceScript(UErrorCode& status);
+
+ // Replace by using variantAlias.
+ bool replaceVariant(UErrorCode& status);
+};
+
+CharString&
+AliasReplacer::generateKey(
+ const char* language, const char* region, const char* variant,
+ CharString& out, UErrorCode status)
+{
+ out.append(language, status);
+ if (notEmpty(region)) {
+ out.append(SEP_CHAR, status)
+ .append(region, status);
+ }
+ if (notEmpty(variant)) {
+ out.append(SEP_CHAR, status)
+ .append(variant, status);
+ }
+ return out;
+}
+
+void
+AliasReplacer::parseLanguageReplacement(
+ const char* replacement,
+ const char*& replacedLanguage,
+ const char*& replacedScript,
+ const char*& replacedRegion,
+ const char*& replacedVariant,
+ const char*& replacedExtensions,
+ UVector& toBeFreed,
+ UErrorCode& status)
+{
+ if (U_FAILURE(status)) {
+ return;
+ }
+ replacedScript = replacedRegion = replacedVariant
+ = replacedExtensions = nullptr;
+ if (uprv_strchr(replacement, '_') == nullptr) {
+ replacedLanguage = replacement;
+ // reach the end, just return it.
+ return;
+ }
+ // We have multiple field so we have to allocate and parse
+ CharString* str = new CharString(
+ replacement, (int32_t)uprv_strlen(replacement), status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+ if (str == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ toBeFreed.addElement(str, status);
+ char* data = str->data();
+ replacedLanguage = (const char*) data;
+ char* endOfField = uprv_strchr(data, '_');
+ *endOfField = '\0'; // null terminiate it.
+ endOfField++;
+ const char* start = endOfField;
+ endOfField = (char*) uprv_strchr(start, '_');
+ size_t len = 0;
+ if (endOfField == nullptr) {
+ len = uprv_strlen(start);
+ } else {
+ len = endOfField - start;
+ *endOfField = '\0'; // null terminiate it.
+ }
+ if (len == 4 && uprv_isASCIILetter(*start)) {
+ // Got a script
+ replacedScript = start;
+ if (endOfField == nullptr) {
+ return;
+ }
+ start = endOfField++;
+ endOfField = (char*)uprv_strchr(start, '_');
+ if (endOfField == nullptr) {
+ len = uprv_strlen(start);
+ } else {
+ len = endOfField - start;
+ *endOfField = '\0'; // null terminiate it.
+ }
+ }
+ if (len >= 2 && len <= 3) {
+ // Got a region
+ replacedRegion = start;
+ if (endOfField == nullptr) {
+ return;
+ }
+ start = endOfField++;
+ endOfField = (char*)uprv_strchr(start, '_');
+ if (endOfField == nullptr) {
+ len = uprv_strlen(start);
+ } else {
+ len = endOfField - start;
+ *endOfField = '\0'; // null terminiate it.
+ }
+ }
+ if (len >= 4) {
+ // Got a variant
+ replacedVariant = start;
+ if (endOfField == nullptr) {
+ return;
+ }
+ start = endOfField++;
+ }
+ replacedExtensions = start;
+}
+
+bool
+AliasReplacer::replaceLanguage(
+ bool checkLanguage, bool checkRegion,
+ bool checkVariants, UVector& toBeFreed, UErrorCode& status)
+{
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ if ( (checkRegion && region == nullptr) ||
+ (checkVariants && variants.size() == 0)) {
+ // Nothing to search.
+ return false;
+ }
+ int32_t variant_size = checkVariants ? variants.size() : 1;
+ // Since we may have more than one variant, we need to loop through them.
+ const char* searchLanguage = checkLanguage ? language : "und";
+ const char* searchRegion = checkRegion ? region : nullptr;
+ const char* searchVariant = nullptr;
+ for (int32_t variant_index = 0;
+ variant_index < variant_size;
+ variant_index++) {
+ if (checkVariants) {
+ U_ASSERT(variant_index < variant_size);
+ searchVariant = (const char*)(variants.elementAt(variant_index));
+ }
+
+ if (searchVariant != nullptr && uprv_strlen(searchVariant) < 4) {
+ // Do not consider ill-formed variant subtag.
+ searchVariant = nullptr;
+ }
+ CharString typeKey;
+ generateKey(searchLanguage, searchRegion, searchVariant, typeKey,
+ status);
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ const char *replacement = data->languageMap().get(typeKey.data());
+ if (replacement == nullptr) {
+ // Found no replacement data.
+ continue;
+ }
+
+ const char* replacedLanguage = nullptr;
+ const char* replacedScript = nullptr;
+ const char* replacedRegion = nullptr;
+ const char* replacedVariant = nullptr;
+ const char* replacedExtensions = nullptr;
+ parseLanguageReplacement(replacement,
+ replacedLanguage,
+ replacedScript,
+ replacedRegion,
+ replacedVariant,
+ replacedExtensions,
+ toBeFreed,
+ status);
+ replacedLanguage =
+ (replacedLanguage != nullptr && uprv_strcmp(replacedLanguage, "und") == 0) ?
+ language : replacedLanguage;
+ replacedScript = deleteOrReplace(script, nullptr, replacedScript);
+ replacedRegion = deleteOrReplace(region, searchRegion, replacedRegion);
+ replacedVariant = deleteOrReplace(
+ searchVariant, searchVariant, replacedVariant);
+
+ if ( same(language, replacedLanguage) &&
+ same(script, replacedScript) &&
+ same(region, replacedRegion) &&
+ same(searchVariant, replacedVariant) &&
+ replacedExtensions == nullptr) {
+ // Replacement produce no changes.
+ continue;
+ }
+
+ language = replacedLanguage;
+ region = replacedRegion;
+ script = replacedScript;
+ if (searchVariant != nullptr) {
+ if (notEmpty(replacedVariant)) {
+ variants.setElementAt((void*)replacedVariant, variant_index);
+ } else {
+ variants.removeElementAt(variant_index);
+ }
+ }
+ if (replacedExtensions != nullptr) {
+ // TODO(ICU-21292)
+ // DO NOTHING
+ // UTS35 does not specifiy what should we do if we have extensions in the
+ // replacement. Currently we know only the following 4 "BCP47 LegacyRules" have
+ // extensions in them languageAlias:
+ // i_default => en_x_i_default
+ // i_enochian => und_x_i_enochian
+ // i_mingo => see_x_i_mingo
+ // zh_min => nan_x_zh_min
+ // But all of them are already changed by code inside ultag_parse() before
+ // hitting this code.
+ }
+
+ // Something changed by language alias data.
+ return true;
+ }
+ // Nothing changed by language alias data.
+ return false;
+}
+
+bool
+AliasReplacer::replaceTerritory(UVector& toBeFreed, UErrorCode& status)
+{
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ if (region == nullptr) {
+ // No region to search.
+ return false;
+ }
+ const char *replacement = data->territoryMap().get(region);
+ if (replacement == nullptr) {
+ // Found no replacement data for this region.
+ return false;
+ }
+ const char* replacedRegion = replacement;
+ const char* firstSpace = uprv_strchr(replacement, ' ');
+ if (firstSpace != nullptr) {
+ // If there are are more than one region in the replacement.
+ // We need to check which one match based on the language.
+ // Cannot use nullptr for language because that will construct
+ // the default locale, in that case, use "und" to get the correct
+ // locale.
+ Locale l(language == nullptr ? "und" : language, nullptr, script);
+ l.addLikelySubtags(status);
+ const char* likelyRegion = l.getCountry();
+ CharString* item = nullptr;
+ if (likelyRegion != nullptr && uprv_strlen(likelyRegion) > 0) {
+ size_t len = uprv_strlen(likelyRegion);
+ const char* foundInReplacement = uprv_strstr(replacement,
+ likelyRegion);
+ if (foundInReplacement != nullptr) {
+ // Assuming the case there are no three letter region code in
+ // the replacement of territoryAlias
+ U_ASSERT(foundInReplacement == replacement ||
+ *(foundInReplacement-1) == ' ');
+ U_ASSERT(foundInReplacement[len] == ' ' ||
+ foundInReplacement[len] == '\0');
+ item = new CharString(foundInReplacement, (int32_t)len, status);
+ }
+ }
+ if (item == nullptr) {
+ item = new CharString(replacement,
+ (int32_t)(firstSpace - replacement), status);
+ }
+ if (U_FAILURE(status)) { return false; }
+ if (item == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return false;
+ }
+ replacedRegion = item->data();
+ toBeFreed.addElement(item, status);
+ }
+ U_ASSERT(!same(region, replacedRegion));
+ region = replacedRegion;
+ // The region is changed by data in territory alias.
+ return true;
+}
+
+bool
+AliasReplacer::replaceScript(UErrorCode& status)
+{
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ if (script == nullptr) {
+ // No script to search.
+ return false;
+ }
+ const char *replacement = data->scriptMap().get(script);
+ if (replacement == nullptr) {
+ // Found no replacement data for this script.
+ return false;
+ }
+ U_ASSERT(!same(script, replacement));
+ script = replacement;
+ // The script is changed by data in script alias.
+ return true;
+}
+
+bool
+AliasReplacer::replaceVariant(UErrorCode& status)
+{
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ // Since we may have more than one variant, we need to loop through them.
+ for (int32_t i = 0; i < variants.size(); i++) {
+ const char *variant = (const char*)(variants.elementAt(i));
+ const char *replacement = data->variantMap().get(variant);
+ if (replacement == nullptr) {
+ // Found no replacement data for this variant.
+ continue;
+ }
+ U_ASSERT((uprv_strlen(replacement) >= 5 &&
+ uprv_strlen(replacement) <= 8) ||
+ (uprv_strlen(replacement) == 4 &&
+ replacement[0] >= '0' &&
+ replacement[0] <= '9'));
+ if (!same(variant, replacement)) {
+ variants.setElementAt((void*)replacement, i);
+ // Special hack to handle hepburn-heploc => alalc97
+ if (uprv_strcmp(variant, "heploc") == 0) {
+ for (int32_t j = 0; j < variants.size(); j++) {
+ if (uprv_strcmp((const char*)(variants.elementAt(j)),
+ "hepburn") == 0) {
+ variants.removeElementAt(j);
+ }
+ }
+ }
+ return true;
+ }
+ }
+ return false;
+}
+
+CharString&
+AliasReplacer::outputToString(
+ CharString& out, UErrorCode status)
+{
+ out.append(language, status);
+ if (notEmpty(script)) {
+ out.append(SEP_CHAR, status)
+ .append(script, status);
+ }
+ if (notEmpty(region)) {
+ out.append(SEP_CHAR, status)
+ .append(region, status);
+ }
+ if (variants.size() > 0) {
+ if (!notEmpty(script) && !notEmpty(region)) {
+ out.append(SEP_CHAR, status);
+ }
+ variants.sort([](UElement e1, UElement e2) -> int8_t {
+ return uprv_strcmp(
+ (const char*)e1.pointer, (const char*)e2.pointer);
+ }, status);
+ int32_t variantsStart = out.length();
+ for (int32_t i = 0; i < variants.size(); i++) {
+ out.append(SEP_CHAR, status)
+ .append((const char*)((UVector*)variants.elementAt(i)),
+ status);
+ }
+ T_CString_toUpperCase(out.data() + variantsStart);
+ }
+ if (notEmpty(extensions)) {
+ CharString tmp("und_", status);
+ tmp.append(extensions, status);
+ Locale tmpLocale(tmp.data());
+ // only support x extension inside CLDR for now.
+ U_ASSERT(extensions[0] == 'x');
+ out.append(tmpLocale.getName() + 1, status);
+ }
+ return out;
+}
+
+bool
+AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status)
+{
+ data = AliasData::singleton(status);
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ U_ASSERT(data != nullptr);
+ out.clear();
+ language = locale.getLanguage();
+ if (!notEmpty(language)) {
+ language = nullptr;
+ }
+ script = locale.getScript();
+ if (!notEmpty(script)) {
+ script = nullptr;
+ }
+ region = locale.getCountry();
+ if (!notEmpty(region)) {
+ region = nullptr;
+ }
+ const char* variantsStr = locale.getVariant();
+ const char* extensionsStr = locale_getKeywordsStart(locale.getName());
+ CharString variantsBuff(variantsStr, -1, status);
+ if (!variantsBuff.isEmpty()) {
+ if (U_FAILURE(status)) { return false; }
+ char* start = variantsBuff.data();
+ T_CString_toLowerCase(start);
+ char* end;
+ while ((end = uprv_strchr(start, SEP_CHAR)) != nullptr &&
+ U_SUCCESS(status)) {
+ *end = NULL_CHAR; // null terminate inside variantsBuff
+ variants.addElement(start, status);
+ start = end + 1;
+ }
+ variants.addElement(start, status);
+ }
+ if (U_FAILURE(status)) { return false; }
+
+ // Sort the variants
+ variants.sort([](UElement e1, UElement e2) -> int8_t {
+ return uprv_strcmp(
+ (const char*)e1.pointer, (const char*)e2.pointer);
+ }, status);
+
+ // A changed count to assert when loop too many times.
+ int changed = 0;
+ // A UVector to to hold CharString allocated by the replace* method
+ // and freed when out of scope from his function.
+ UVector stringsToBeFreed([](void *obj){ delete ((CharString*) obj); },
+ nullptr, 10, status);
+ while (U_SUCCESS(status)) {
+ // Something wrong with the data cause looping here more than 10 times
+ // already.
+ U_ASSERT(changed < 5);
+ // From observation of key in data/misc/metadata.txt
+ // we know currently we only need to search in the following combination
+ // of fields for type in languageAlias:
+ // * lang_region_variant
+ // * lang_region
+ // * lang_variant
+ // * lang
+ // * und_variant
+ // This assumption is ensured by the U_ASSERT in readLanguageAlias
+ //
+ // lang REGION variant
+ if ( replaceLanguage(true, true, true, stringsToBeFreed, status) ||
+ replaceLanguage(true, true, false, stringsToBeFreed, status) ||
+ replaceLanguage(true, false, true, stringsToBeFreed, status) ||
+ replaceLanguage(true, false, false, stringsToBeFreed, status) ||
+ replaceLanguage(false,false, true, stringsToBeFreed, status) ||
+ replaceTerritory(stringsToBeFreed, status) ||
+ replaceScript(status) ||
+ replaceVariant(status)) {
+ // Some values in data is changed, try to match from the beginning
+ // again.
+ changed++;
+ continue;
+ }
+ // Nothing changed. Break out.
+ break;
+ } // while(1)
+
+ if (U_FAILURE(status)) { return false; }
+ // Nothing changed and we know the order of the vaiants are not change
+ // because we have no variant or only one.
+ if (changed == 0 && variants.size() <= 1) {
+ return false;
+ }
+ outputToString(out, status);
+ if (extensionsStr != nullptr) {
+ out.append(extensionsStr, status);
+ }
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ // If the tag is not changed, return.
+ if (uprv_strcmp(out.data(), locale.getName()) == 0) {
+ U_ASSERT(changed == 0);
+ U_ASSERT(variants.size() > 1);
+ out.clear();
+ return false;
+ }
+ return true;
+}
+
+// Return true if the locale is changed during canonicalization.
+// The replaced value then will be put into out.
+bool
+canonicalizeLocale(const Locale& locale, CharString& out, UErrorCode& status)
+{
+ AliasReplacer replacer(status);
+ return replacer.replace(locale, out, status);
+}
+
+// Function to optimize for known cases without so we can skip the loading
+// of resources in the startup time until we really need it.
+bool
+isKnownCanonicalizedLocale(const char* locale, UErrorCode& status)
+{
+ if ( uprv_strcmp(locale, "c") == 0 ||
+ uprv_strcmp(locale, "en") == 0 ||
+ uprv_strcmp(locale, "en_US") == 0) {
+ return true;
+ }
+
+ // common well-known Canonicalized.
+ umtx_initOnce(gKnownCanonicalizedInitOnce,
+ &loadKnownCanonicalized, status);
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ U_ASSERT(gKnownCanonicalized != nullptr);
+ return uhash_geti(gKnownCanonicalized, locale) != 0;
+}
+
+} // namespace
+
+// Function for testing.
+U_CAPI const char* const*
+ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* length)
+{
+ *length = UPRV_LENGTHOF(KNOWN_CANONICALIZED);
+ return KNOWN_CANONICALIZED;
+}
+
+// Function for testing.
+U_CAPI bool
+ulocimp_isCanonicalizedLocaleForTest(const char* localeName)
+{
+ Locale l(localeName);
+ UErrorCode status = U_ZERO_ERROR;
+ CharString temp;
+ return !canonicalizeLocale(l, temp, status) && U_SUCCESS(status);
+}
+
+/*This function initializes a Locale from a C locale ID*/
+Locale& Locale::init(const char* localeID, UBool canonicalize)
+{
+ fIsBogus = FALSE;
+ /* Free our current storage */
+ if (baseName != fullName) {
+ uprv_free(baseName);
+ }
+ baseName = NULL;
+ if(fullName != fullNameBuffer) {
+ uprv_free(fullName);
+ fullName = fullNameBuffer;
+ }
+
+ // not a loop:
+ // just an easy way to have a common error-exit
+ // without goto and without another function
+ do {
+ char *separator;
+ char *field[5] = {0};
+ int32_t fieldLen[5] = {0};
+ int32_t fieldIdx;
+ int32_t variantField;
+ int32_t length;
+ UErrorCode err;
+
+ if(localeID == NULL) {
+ // not an error, just set the default locale
+ return *this = getDefault();
+ }
+
+ /* preset all fields to empty */
+ language[0] = script[0] = country[0] = 0;
+
+ // "canonicalize" the locale ID to ICU/Java format
+ err = U_ZERO_ERROR;
+ length = canonicalize ?
+ uloc_canonicalize(localeID, fullName, sizeof(fullNameBuffer), &err) :
+ uloc_getName(localeID, fullName, sizeof(fullNameBuffer), &err);
+
+ if(err == U_BUFFER_OVERFLOW_ERROR || length >= (int32_t)sizeof(fullNameBuffer)) {
+ /*Go to heap for the fullName if necessary*/
+ fullName = (char *)uprv_malloc(sizeof(char)*(length + 1));
+ if(fullName == 0) {
+ fullName = fullNameBuffer;
+ break; // error: out of memory
+ }
+ err = U_ZERO_ERROR;
+ length = canonicalize ?
+ uloc_canonicalize(localeID, fullName, length+1, &err) :
+ uloc_getName(localeID, fullName, length+1, &err);
+ }
+ if(U_FAILURE(err) || err == U_STRING_NOT_TERMINATED_WARNING) {
+ /* should never occur */
+ break;
+ }
+
+ variantBegin = length;
+
+ /* after uloc_getName/canonicalize() we know that only '_' are separators */
+ /* But _ could also appeared in timezone such as "en@timezone=America/Los_Angeles" */
+ separator = field[0] = fullName;
+ fieldIdx = 1;
+ char* at = uprv_strchr(fullName, '@');
+ while ((separator = uprv_strchr(field[fieldIdx-1], SEP_CHAR)) != 0 &&
+ fieldIdx < UPRV_LENGTHOF(field)-1 &&
+ (at == nullptr || separator < at)) {
+ field[fieldIdx] = separator + 1;
+ fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]);
+ fieldIdx++;
+ }
+ // variant may contain @foo or .foo POSIX cruft; remove it
+ separator = uprv_strchr(field[fieldIdx-1], '@');
+ char* sep2 = uprv_strchr(field[fieldIdx-1], '.');
+ if (separator!=NULL || sep2!=NULL) {
+ if (separator==NULL || (sep2!=NULL && separator > sep2)) {
+ separator = sep2;
+ }
+ fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]);
+ } else {
+ fieldLen[fieldIdx-1] = length - (int32_t)(field[fieldIdx-1] - fullName);
+ }
+
+ if (fieldLen[0] >= (int32_t)(sizeof(language)))
+ {
+ break; // error: the language field is too long
+ }
+
+ variantField = 1; /* Usually the 2nd one, except when a script or country is also used. */
+ if (fieldLen[0] > 0) {
+ /* We have a language */
+ uprv_memcpy(language, fullName, fieldLen[0]);
+ language[fieldLen[0]] = 0;
+ }
+ if (fieldLen[1] == 4 && uprv_isASCIILetter(field[1][0]) &&
+ uprv_isASCIILetter(field[1][1]) && uprv_isASCIILetter(field[1][2]) &&
+ uprv_isASCIILetter(field[1][3])) {
+ /* We have at least a script */
+ uprv_memcpy(script, field[1], fieldLen[1]);
+ script[fieldLen[1]] = 0;
+ variantField++;
+ }
+
+ if (fieldLen[variantField] == 2 || fieldLen[variantField] == 3) {
+ /* We have a country */
+ uprv_memcpy(country, field[variantField], fieldLen[variantField]);
+ country[fieldLen[variantField]] = 0;
+ variantField++;
+ } else if (fieldLen[variantField] == 0) {
+ variantField++; /* script or country empty but variant in next field (i.e. en__POSIX) */
+ }
+
+ if (fieldLen[variantField] > 0) {
+ /* We have a variant */
+ variantBegin = (int32_t)(field[variantField] - fullName);
+ }
+
+ err = U_ZERO_ERROR;
+ initBaseName(err);
+ if (U_FAILURE(err)) {
+ break;
+ }
+
+ if (canonicalize) {
+ if (!isKnownCanonicalizedLocale(fullName, err)) {
+ CharString replaced;
+ // Not sure it is already canonicalized
+ if (canonicalizeLocale(*this, replaced, err)) {
+ U_ASSERT(U_SUCCESS(err));
+ // If need replacement, call init again.
+ init(replaced.data(), false);
+ }
+ if (U_FAILURE(err)) {
+ break;
+ }
+ }
+ } // if (canonicalize) {
+
+ // successful end of init()
+ return *this;
+ } while(0); /*loop doesn't iterate*/
+
+ // when an error occurs, then set this object to "bogus" (there is no UErrorCode here)
+ setToBogus();
+
+ return *this;
+}
+
+/*
+ * Set up the base name.
+ * If there are no key words, it's exactly the full name.
+ * If key words exist, it's the full name truncated at the '@' character.
+ * Need to set up both at init() and after setting a keyword.
+ */
+void
+Locale::initBaseName(UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ U_ASSERT(baseName==NULL || baseName==fullName);
+ const char *atPtr = uprv_strchr(fullName, '@');
+ const char *eqPtr = uprv_strchr(fullName, '=');
+ if (atPtr && eqPtr && atPtr < eqPtr) {
+ // Key words exist.
+ int32_t baseNameLength = (int32_t)(atPtr - fullName);
+ baseName = (char *)uprv_malloc(baseNameLength + 1);
+ if (baseName == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ uprv_strncpy(baseName, fullName, baseNameLength);
+ baseName[baseNameLength] = 0;
+
+ // The original computation of variantBegin leaves it equal to the length
+ // of fullName if there is no variant. It should instead be
+ // the length of the baseName.
+ if (variantBegin > baseNameLength) {
+ variantBegin = baseNameLength;
+ }
+ } else {
+ baseName = fullName;
+ }
+}
+
+
+int32_t
+Locale::hashCode() const
+{
+ return ustr_hashCharsN(fullName, static_cast<int32_t>(uprv_strlen(fullName)));
+}
+
+void
+Locale::setToBogus() {
+ /* Free our current storage */
+ if(baseName != fullName) {
+ uprv_free(baseName);
+ }
+ baseName = NULL;
+ if(fullName != fullNameBuffer) {
+ uprv_free(fullName);
+ fullName = fullNameBuffer;
+ }
+ *fullNameBuffer = 0;
+ *language = 0;
+ *script = 0;
+ *country = 0;
+ fIsBogus = TRUE;
+ variantBegin = 0;
+}
+
+const Locale& U_EXPORT2
+Locale::getDefault()
+{
+ {
+ Mutex lock(&gDefaultLocaleMutex);
+ if (gDefaultLocale != NULL) {
+ return *gDefaultLocale;
+ }
+ }
+ UErrorCode status = U_ZERO_ERROR;
+ return *locale_set_default_internal(NULL, status);
+}
+
+
+
+void U_EXPORT2
+Locale::setDefault( const Locale& newLocale,
+ UErrorCode& status)
+{
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ /* Set the default from the full name string of the supplied locale.
+ * This is a convenient way to access the default locale caching mechanisms.
+ */
+ const char *localeID = newLocale.getName();
+ locale_set_default_internal(localeID, status);
+}
+
+void
+Locale::addLikelySubtags(UErrorCode& status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ CharString maximizedLocaleID;
+ {
+ CharStringByteSink sink(&maximizedLocaleID);
+ ulocimp_addLikelySubtags(fullName, sink, &status);
+ }
+
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ init(maximizedLocaleID.data(), /*canonicalize=*/FALSE);
+ if (isBogus()) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+}
+
+void
+Locale::minimizeSubtags(UErrorCode& status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ CharString minimizedLocaleID;
+ {
+ CharStringByteSink sink(&minimizedLocaleID);
+ ulocimp_minimizeSubtags(fullName, sink, &status);
+ }
+
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ init(minimizedLocaleID.data(), /*canonicalize=*/FALSE);
+ if (isBogus()) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+}
+
+void
+Locale::canonicalize(UErrorCode& status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ if (isBogus()) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ CharString uncanonicalized(fullName, status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+ init(uncanonicalized.data(), /*canonicalize=*/TRUE);
+ if (isBogus()) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+}
+
+Locale U_EXPORT2
+Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
+{
+ Locale result(Locale::eBOGUS);
+
+ if (U_FAILURE(status)) {
+ return result;
+ }
+
+ // If a BCP 47 language tag is passed as the language parameter to the
+ // normal Locale constructor, it will actually fall back to invoking
+ // uloc_forLanguageTag() to parse it if it somehow is able to detect that
+ // the string actually is BCP 47. This works well for things like strings
+ // using BCP 47 extensions, but it does not at all work for things like
+ // legacy language tags (marked as “Type: grandfathered†in BCP 47,
+ // e.g., "en-GB-oed") which are possible to also
+ // interpret as ICU locale IDs and because of that won't trigger the BCP 47
+ // parsing. Therefore the code here explicitly calls uloc_forLanguageTag()
+ // and then Locale::init(), instead of just calling the normal constructor.
+
+ CharString localeID;
+ int32_t parsedLength;
+ {
+ CharStringByteSink sink(&localeID);
+ ulocimp_forLanguageTag(
+ tag.data(),
+ tag.length(),
+ sink,
+ &parsedLength,
+ &status);
+ }
+
+ if (U_FAILURE(status)) {
+ return result;
+ }
+
+ if (parsedLength != tag.size()) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return result;
+ }
+
+ result.init(localeID.data(), /*canonicalize=*/FALSE);
+ if (result.isBogus()) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return result;
+}
+
+void
+Locale::toLanguageTag(ByteSink& sink, UErrorCode& status) const
+{
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ if (fIsBogus) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ ulocimp_toLanguageTag(fullName, sink, /*strict=*/FALSE, &status);
+}
+
+Locale U_EXPORT2
+Locale::createFromName (const char *name)
+{
+ if (name) {
+ Locale l("");
+ l.init(name, FALSE);
+ return l;
+ }
+ else {
+ return getDefault();
+ }
+}
+
+Locale U_EXPORT2
+Locale::createCanonical(const char* name) {
+ Locale loc("");
+ loc.init(name, TRUE);
+ return loc;
+}
+
+const char *
+Locale::getISO3Language() const
+{
+ return uloc_getISO3Language(fullName);
+}
+
+
+const char *
+Locale::getISO3Country() const
+{
+ return uloc_getISO3Country(fullName);
+}
+
+/**
+ * Return the LCID value as specified in the "LocaleID" resource for this
+ * locale. The LocaleID must be expressed as a hexadecimal number, from
+ * one to four digits. If the LocaleID resource is not present, or is
+ * in an incorrect format, 0 is returned. The LocaleID is for use in
+ * Windows (it is an LCID), but is available on all platforms.
+ */
+uint32_t
+Locale::getLCID() const
+{
+ return uloc_getLCID(fullName);
+}
+
+const char* const* U_EXPORT2 Locale::getISOCountries()
+{
+ return uloc_getISOCountries();
+}
+
+const char* const* U_EXPORT2 Locale::getISOLanguages()
+{
+ return uloc_getISOLanguages();
+}
+
+// Set the locale's data based on a posix id.
+void Locale::setFromPOSIXID(const char *posixID)
+{
+ init(posixID, TRUE);
+}
+
+const Locale & U_EXPORT2
+Locale::getRoot(void)
+{
+ return getLocale(eROOT);
+}
+
+const Locale & U_EXPORT2
+Locale::getEnglish(void)
+{
+ return getLocale(eENGLISH);
+}
+
+const Locale & U_EXPORT2
+Locale::getFrench(void)
+{
+ return getLocale(eFRENCH);
+}
+
+const Locale & U_EXPORT2
+Locale::getGerman(void)
+{
+ return getLocale(eGERMAN);
+}
+
+const Locale & U_EXPORT2
+Locale::getItalian(void)
+{
+ return getLocale(eITALIAN);
+}
+
+const Locale & U_EXPORT2
+Locale::getJapanese(void)
+{
+ return getLocale(eJAPANESE);
+}
+
+const Locale & U_EXPORT2
+Locale::getKorean(void)
+{
+ return getLocale(eKOREAN);
+}
+
+const Locale & U_EXPORT2
+Locale::getChinese(void)
+{
+ return getLocale(eCHINESE);
+}
+
+const Locale & U_EXPORT2
+Locale::getSimplifiedChinese(void)
+{
+ return getLocale(eCHINA);
+}
+
+const Locale & U_EXPORT2
+Locale::getTraditionalChinese(void)
+{
+ return getLocale(eTAIWAN);
+}
+
+
+const Locale & U_EXPORT2
+Locale::getFrance(void)
+{
+ return getLocale(eFRANCE);
+}
+
+const Locale & U_EXPORT2
+Locale::getGermany(void)
+{
+ return getLocale(eGERMANY);
+}
+
+const Locale & U_EXPORT2
+Locale::getItaly(void)
+{
+ return getLocale(eITALY);
+}
+
+const Locale & U_EXPORT2
+Locale::getJapan(void)
+{
+ return getLocale(eJAPAN);
+}
+
+const Locale & U_EXPORT2
+Locale::getKorea(void)
+{
+ return getLocale(eKOREA);
+}
+
+const Locale & U_EXPORT2
+Locale::getChina(void)
+{
+ return getLocale(eCHINA);
+}
+
+const Locale & U_EXPORT2
+Locale::getPRC(void)
+{
+ return getLocale(eCHINA);
+}
+
+const Locale & U_EXPORT2
+Locale::getTaiwan(void)
+{
+ return getLocale(eTAIWAN);
+}
+
+const Locale & U_EXPORT2
+Locale::getUK(void)
+{
+ return getLocale(eUK);
+}
+
+const Locale & U_EXPORT2
+Locale::getUS(void)
+{
+ return getLocale(eUS);
+}
+
+const Locale & U_EXPORT2
+Locale::getCanada(void)
+{
+ return getLocale(eCANADA);
+}
+
+const Locale & U_EXPORT2
+Locale::getCanadaFrench(void)
+{
+ return getLocale(eCANADA_FRENCH);
+}
+
+const Locale &
+Locale::getLocale(int locid)
+{
+ Locale *localeCache = getLocaleCache();
+ U_ASSERT((locid < eMAX_LOCALES)&&(locid>=0));
+ if (localeCache == NULL) {
+ // Failure allocating the locale cache.
+ // The best we can do is return a NULL reference.
+ locid = 0;
+ }
+ return localeCache[locid]; /*operating on NULL*/
+}
+
+/*
+This function is defined this way in order to get around static
+initialization and static destruction.
+ */
+Locale *
+Locale::getLocaleCache(void)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ umtx_initOnce(gLocaleCacheInitOnce, locale_init, status);
+ return gLocaleCache;
+}
+
+class KeywordEnumeration : public StringEnumeration {
+private:
+ char *keywords;
+ char *current;
+ int32_t length;
+ UnicodeString currUSKey;
+ static const char fgClassID;/* Warning this is used beyond the typical RTTI usage. */
+
+public:
+ static UClassID U_EXPORT2 getStaticClassID(void) { return (UClassID)&fgClassID; }
+ virtual UClassID getDynamicClassID(void) const { return getStaticClassID(); }
+public:
+ KeywordEnumeration(const char *keys, int32_t keywordLen, int32_t currentIndex, UErrorCode &status)
+ : keywords((char *)&fgClassID), current((char *)&fgClassID), length(0) {
+ if(U_SUCCESS(status) && keywordLen != 0) {
+ if(keys == NULL || keywordLen < 0) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ } else {
+ keywords = (char *)uprv_malloc(keywordLen+1);
+ if (keywords == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ else {
+ uprv_memcpy(keywords, keys, keywordLen);
+ keywords[keywordLen] = 0;
+ current = keywords + currentIndex;
+ length = keywordLen;
+ }
+ }
+ }
+ }
+
+ virtual ~KeywordEnumeration();
+
+ virtual StringEnumeration * clone() const
+ {
+ UErrorCode status = U_ZERO_ERROR;
+ return new KeywordEnumeration(keywords, length, (int32_t)(current - keywords), status);
+ }
+
+ virtual int32_t count(UErrorCode &/*status*/) const {
+ char *kw = keywords;
+ int32_t result = 0;
+ while(*kw) {
+ result++;
+ kw += uprv_strlen(kw)+1;
+ }
+ return result;
+ }
+
+ virtual const char* next(int32_t* resultLength, UErrorCode& status) {
+ const char* result;
+ int32_t len;
+ if(U_SUCCESS(status) && *current != 0) {
+ result = current;
+ len = (int32_t)uprv_strlen(current);
+ current += len+1;
+ if(resultLength != NULL) {
+ *resultLength = len;
+ }
+ } else {
+ if(resultLength != NULL) {
+ *resultLength = 0;
+ }
+ result = NULL;
+ }
+ return result;
+ }
+
+ virtual const UnicodeString* snext(UErrorCode& status) {
+ int32_t resultLength = 0;
+ const char *s = next(&resultLength, status);
+ return setChars(s, resultLength, status);
+ }
+
+ virtual void reset(UErrorCode& /*status*/) {
+ current = keywords;
+ }
+};
+
+const char KeywordEnumeration::fgClassID = '\0';
+
+KeywordEnumeration::~KeywordEnumeration() {
+ uprv_free(keywords);
+}
+
+// A wrapper around KeywordEnumeration that calls uloc_toUnicodeLocaleKey() in
+// the next() method for each keyword before returning it.
+class UnicodeKeywordEnumeration : public KeywordEnumeration {
+public:
+ using KeywordEnumeration::KeywordEnumeration;
+ virtual ~UnicodeKeywordEnumeration();
+
+ virtual const char* next(int32_t* resultLength, UErrorCode& status) {
+ const char* legacy_key = KeywordEnumeration::next(nullptr, status);
+ if (U_SUCCESS(status) && legacy_key != nullptr) {
+ const char* key = uloc_toUnicodeLocaleKey(legacy_key);
+ if (key == nullptr) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ } else {
+ if (resultLength != nullptr) {
+ *resultLength = static_cast<int32_t>(uprv_strlen(key));
+ }
+ return key;
+ }
+ }
+ if (resultLength != nullptr) *resultLength = 0;
+ return nullptr;
+ }
+};
+
+// Out-of-line virtual destructor to serve as the "key function".
+UnicodeKeywordEnumeration::~UnicodeKeywordEnumeration() = default;
+
+StringEnumeration *
+Locale::createKeywords(UErrorCode &status) const
+{
+ StringEnumeration *result = NULL;
+
+ if (U_FAILURE(status)) {
+ return result;
+ }
+
+ const char* variantStart = uprv_strchr(fullName, '@');
+ const char* assignment = uprv_strchr(fullName, '=');
+ if(variantStart) {
+ if(assignment > variantStart) {
+ CharString keywords;
+ CharStringByteSink sink(&keywords);
+ ulocimp_getKeywords(variantStart+1, '@', sink, FALSE, &status);
+ if (U_SUCCESS(status) && !keywords.isEmpty()) {
+ result = new KeywordEnumeration(keywords.data(), keywords.length(), 0, status);
+ if (!result) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ }
+ } else {
+ status = U_INVALID_FORMAT_ERROR;
+ }
+ }
+ return result;
+}
+
+StringEnumeration *
+Locale::createUnicodeKeywords(UErrorCode &status) const
+{
+ StringEnumeration *result = NULL;
+
+ if (U_FAILURE(status)) {
+ return result;
+ }
+
+ const char* variantStart = uprv_strchr(fullName, '@');
+ const char* assignment = uprv_strchr(fullName, '=');
+ if(variantStart) {
+ if(assignment > variantStart) {
+ CharString keywords;
+ CharStringByteSink sink(&keywords);
+ ulocimp_getKeywords(variantStart+1, '@', sink, FALSE, &status);
+ if (U_SUCCESS(status) && !keywords.isEmpty()) {
+ result = new UnicodeKeywordEnumeration(keywords.data(), keywords.length(), 0, status);
+ if (!result) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ }
+ } else {
+ status = U_INVALID_FORMAT_ERROR;
+ }
+ }
+ return result;
+}
+
+int32_t
+Locale::getKeywordValue(const char* keywordName, char *buffer, int32_t bufLen, UErrorCode &status) const
+{
+ return uloc_getKeywordValue(fullName, keywordName, buffer, bufLen, &status);
+}
+
+void
+Locale::getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const {
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ if (fIsBogus) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ // TODO: Remove the need for a const char* to a NUL terminated buffer.
+ const CharString keywordName_nul(keywordName, status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ ulocimp_getKeywordValue(fullName, keywordName_nul.data(), sink, &status);
+}
+
+void
+Locale::getUnicodeKeywordValue(StringPiece keywordName,
+ ByteSink& sink,
+ UErrorCode& status) const {
+ // TODO: Remove the need for a const char* to a NUL terminated buffer.
+ const CharString keywordName_nul(keywordName, status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
+
+ if (legacy_key == nullptr) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ CharString legacy_value;
+ {
+ CharStringByteSink sink(&legacy_value);
+ getKeywordValue(legacy_key, sink, status);
+ }
+
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ const char* unicode_value = uloc_toUnicodeLocaleType(
+ keywordName_nul.data(), legacy_value.data());
+
+ if (unicode_value == nullptr) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ sink.Append(unicode_value, static_cast<int32_t>(uprv_strlen(unicode_value)));
+}
+
+void
+Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status)
+{
+ if (U_FAILURE(status)) {
+ return;
+ }
+ int32_t bufferLength = uprv_max((int32_t)(uprv_strlen(fullName) + 1), ULOC_FULLNAME_CAPACITY);
+ int32_t newLength = uloc_setKeywordValue(keywordName, keywordValue, fullName,
+ bufferLength, &status) + 1;
+ /* Handle the case the current buffer is not enough to hold the new id */
+ if (status == U_BUFFER_OVERFLOW_ERROR) {
+ U_ASSERT(newLength > bufferLength);
+ char* newFullName = (char *)uprv_malloc(newLength);
+ if (newFullName == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ uprv_strcpy(newFullName, fullName);
+ if (fullName != fullNameBuffer) {
+ // if full Name is already on the heap, need to free it.
+ uprv_free(fullName);
+ }
+ fullName = newFullName;
+ status = U_ZERO_ERROR;
+ uloc_setKeywordValue(keywordName, keywordValue, fullName, newLength, &status);
+ } else {
+ U_ASSERT(newLength <= bufferLength);
+ }
+ if (U_SUCCESS(status) && baseName == fullName) {
+ // May have added the first keyword, meaning that the fullName is no longer also the baseName.
+ initBaseName(status);
+ }
+}
+
+void
+Locale::setKeywordValue(StringPiece keywordName,
+ StringPiece keywordValue,
+ UErrorCode& status) {
+ // TODO: Remove the need for a const char* to a NUL terminated buffer.
+ const CharString keywordName_nul(keywordName, status);
+ const CharString keywordValue_nul(keywordValue, status);
+ setKeywordValue(keywordName_nul.data(), keywordValue_nul.data(), status);
+}
+
+void
+Locale::setUnicodeKeywordValue(StringPiece keywordName,
+ StringPiece keywordValue,
+ UErrorCode& status) {
+ // TODO: Remove the need for a const char* to a NUL terminated buffer.
+ const CharString keywordName_nul(keywordName, status);
+ const CharString keywordValue_nul(keywordValue, status);
+
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
+
+ if (legacy_key == nullptr) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ const char* legacy_value = nullptr;
+
+ if (!keywordValue_nul.isEmpty()) {
+ legacy_value =
+ uloc_toLegacyType(keywordName_nul.data(), keywordValue_nul.data());
+
+ if (legacy_value == nullptr) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ }
+
+ setKeywordValue(legacy_key, legacy_value, status);
+}
+
+const char *
+Locale::getBaseName() const {
+ return baseName;
+}
+
+Locale::Iterator::~Iterator() = default;
+
+//eof
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/loclikely.cpp b/thirdparty/icu4c/common/loclikely.cpp
new file mode 100644
index 0000000000..94a60aba3e
--- /dev/null
+++ b/thirdparty/icu4c/common/loclikely.cpp
@@ -0,0 +1,1410 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1997-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: loclikely.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010feb25
+* created by: Markus W. Scherer
+*
+* Code for likely and minimized locale subtags, separated out from other .cpp files
+* that then do not depend on resource bundle code and likely-subtags data.
+*/
+
+#include "unicode/bytestream.h"
+#include "unicode/utypes.h"
+#include "unicode/locid.h"
+#include "unicode/putil.h"
+#include "unicode/uchar.h"
+#include "unicode/uloc.h"
+#include "unicode/ures.h"
+#include "unicode/uscript.h"
+#include "bytesinkutil.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "ulocimp.h"
+#include "ustr_imp.h"
+
+/**
+ * These are the canonical strings for unknown languages, scripts and regions.
+ **/
+static const char* const unknownLanguage = "und";
+static const char* const unknownScript = "Zzzz";
+static const char* const unknownRegion = "ZZ";
+
+/**
+ * This function looks for the localeID in the likelySubtags resource.
+ *
+ * @param localeID The tag to find.
+ * @param buffer A buffer to hold the matching entry
+ * @param bufferLength The length of the output buffer
+ * @return A pointer to "buffer" if found, or a null pointer if not.
+ */
+static const char* U_CALLCONV
+findLikelySubtags(const char* localeID,
+ char* buffer,
+ int32_t bufferLength,
+ UErrorCode* err) {
+ const char* result = NULL;
+
+ if (!U_FAILURE(*err)) {
+ int32_t resLen = 0;
+ const UChar* s = NULL;
+ UErrorCode tmpErr = U_ZERO_ERROR;
+ icu::LocalUResourceBundlePointer subtags(ures_openDirect(NULL, "likelySubtags", &tmpErr));
+ if (U_SUCCESS(tmpErr)) {
+ icu::CharString und;
+ if (localeID != NULL) {
+ if (*localeID == '\0') {
+ localeID = unknownLanguage;
+ } else if (*localeID == '_') {
+ und.append(unknownLanguage, *err);
+ und.append(localeID, *err);
+ if (U_FAILURE(*err)) {
+ return NULL;
+ }
+ localeID = und.data();
+ }
+ }
+ s = ures_getStringByKey(subtags.getAlias(), localeID, &resLen, &tmpErr);
+
+ if (U_FAILURE(tmpErr)) {
+ /*
+ * If a resource is missing, it's not really an error, it's
+ * just that we don't have any data for that particular locale ID.
+ */
+ if (tmpErr != U_MISSING_RESOURCE_ERROR) {
+ *err = tmpErr;
+ }
+ }
+ else if (resLen >= bufferLength) {
+ /* The buffer should never overflow. */
+ *err = U_INTERNAL_PROGRAM_ERROR;
+ }
+ else {
+ u_UCharsToChars(s, buffer, resLen + 1);
+ if (resLen >= 3 &&
+ uprv_strnicmp(buffer, unknownLanguage, 3) == 0 &&
+ (resLen == 3 || buffer[3] == '_')) {
+ uprv_memmove(buffer, buffer + 3, resLen - 3 + 1);
+ }
+ result = buffer;
+ }
+ } else {
+ *err = tmpErr;
+ }
+ }
+
+ return result;
+}
+
+/**
+ * Append a tag to a buffer, adding the separator if necessary. The buffer
+ * must be large enough to contain the resulting tag plus any separator
+ * necessary. The tag must not be a zero-length string.
+ *
+ * @param tag The tag to add.
+ * @param tagLength The length of the tag.
+ * @param buffer The output buffer.
+ * @param bufferLength The length of the output buffer. This is an input/ouput parameter.
+ **/
+static void U_CALLCONV
+appendTag(
+ const char* tag,
+ int32_t tagLength,
+ char* buffer,
+ int32_t* bufferLength,
+ UBool withSeparator) {
+
+ if (withSeparator) {
+ buffer[*bufferLength] = '_';
+ ++(*bufferLength);
+ }
+
+ uprv_memmove(
+ &buffer[*bufferLength],
+ tag,
+ tagLength);
+
+ *bufferLength += tagLength;
+}
+
+/**
+ * Create a tag string from the supplied parameters. The lang, script and region
+ * parameters may be NULL pointers. If they are, their corresponding length parameters
+ * must be less than or equal to 0.
+ *
+ * If any of the language, script or region parameters are empty, and the alternateTags
+ * parameter is not NULL, it will be parsed for potential language, script and region tags
+ * to be used when constructing the new tag. If the alternateTags parameter is NULL, or
+ * it contains no language tag, the default tag for the unknown language is used.
+ *
+ * If the length of the new string exceeds the capacity of the output buffer,
+ * the function copies as many bytes to the output buffer as it can, and returns
+ * the error U_BUFFER_OVERFLOW_ERROR.
+ *
+ * If an illegal argument is provided, the function returns the error
+ * U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
+ * the tag string fits in the output buffer, but the null terminator doesn't.
+ *
+ * @param lang The language tag to use.
+ * @param langLength The length of the language tag.
+ * @param script The script tag to use.
+ * @param scriptLength The length of the script tag.
+ * @param region The region tag to use.
+ * @param regionLength The length of the region tag.
+ * @param trailing Any trailing data to append to the new tag.
+ * @param trailingLength The length of the trailing data.
+ * @param alternateTags A string containing any alternate tags.
+ * @param sink The output sink receiving the tag string.
+ * @param err A pointer to a UErrorCode for error reporting.
+ **/
+static void U_CALLCONV
+createTagStringWithAlternates(
+ const char* lang,
+ int32_t langLength,
+ const char* script,
+ int32_t scriptLength,
+ const char* region,
+ int32_t regionLength,
+ const char* trailing,
+ int32_t trailingLength,
+ const char* alternateTags,
+ icu::ByteSink& sink,
+ UErrorCode* err) {
+
+ if (U_FAILURE(*err)) {
+ goto error;
+ }
+ else if (langLength >= ULOC_LANG_CAPACITY ||
+ scriptLength >= ULOC_SCRIPT_CAPACITY ||
+ regionLength >= ULOC_COUNTRY_CAPACITY) {
+ goto error;
+ }
+ else {
+ /**
+ * ULOC_FULLNAME_CAPACITY will provide enough capacity
+ * that we can build a string that contains the language,
+ * script and region code without worrying about overrunning
+ * the user-supplied buffer.
+ **/
+ char tagBuffer[ULOC_FULLNAME_CAPACITY];
+ int32_t tagLength = 0;
+ UBool regionAppended = FALSE;
+
+ if (langLength > 0) {
+ appendTag(
+ lang,
+ langLength,
+ tagBuffer,
+ &tagLength,
+ /*withSeparator=*/FALSE);
+ }
+ else if (alternateTags == NULL) {
+ /*
+ * Use the empty string for an unknown language, if
+ * we found no language.
+ */
+ }
+ else {
+ /*
+ * Parse the alternateTags string for the language.
+ */
+ char alternateLang[ULOC_LANG_CAPACITY];
+ int32_t alternateLangLength = sizeof(alternateLang);
+
+ alternateLangLength =
+ uloc_getLanguage(
+ alternateTags,
+ alternateLang,
+ alternateLangLength,
+ err);
+ if(U_FAILURE(*err) ||
+ alternateLangLength >= ULOC_LANG_CAPACITY) {
+ goto error;
+ }
+ else if (alternateLangLength == 0) {
+ /*
+ * Use the empty string for an unknown language, if
+ * we found no language.
+ */
+ }
+ else {
+ appendTag(
+ alternateLang,
+ alternateLangLength,
+ tagBuffer,
+ &tagLength,
+ /*withSeparator=*/FALSE);
+ }
+ }
+
+ if (scriptLength > 0) {
+ appendTag(
+ script,
+ scriptLength,
+ tagBuffer,
+ &tagLength,
+ /*withSeparator=*/TRUE);
+ }
+ else if (alternateTags != NULL) {
+ /*
+ * Parse the alternateTags string for the script.
+ */
+ char alternateScript[ULOC_SCRIPT_CAPACITY];
+
+ const int32_t alternateScriptLength =
+ uloc_getScript(
+ alternateTags,
+ alternateScript,
+ sizeof(alternateScript),
+ err);
+
+ if (U_FAILURE(*err) ||
+ alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
+ goto error;
+ }
+ else if (alternateScriptLength > 0) {
+ appendTag(
+ alternateScript,
+ alternateScriptLength,
+ tagBuffer,
+ &tagLength,
+ /*withSeparator=*/TRUE);
+ }
+ }
+
+ if (regionLength > 0) {
+ appendTag(
+ region,
+ regionLength,
+ tagBuffer,
+ &tagLength,
+ /*withSeparator=*/TRUE);
+
+ regionAppended = TRUE;
+ }
+ else if (alternateTags != NULL) {
+ /*
+ * Parse the alternateTags string for the region.
+ */
+ char alternateRegion[ULOC_COUNTRY_CAPACITY];
+
+ const int32_t alternateRegionLength =
+ uloc_getCountry(
+ alternateTags,
+ alternateRegion,
+ sizeof(alternateRegion),
+ err);
+ if (U_FAILURE(*err) ||
+ alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
+ goto error;
+ }
+ else if (alternateRegionLength > 0) {
+ appendTag(
+ alternateRegion,
+ alternateRegionLength,
+ tagBuffer,
+ &tagLength,
+ /*withSeparator=*/TRUE);
+
+ regionAppended = TRUE;
+ }
+ }
+
+ /**
+ * Copy the partial tag from our internal buffer to the supplied
+ * target.
+ **/
+ sink.Append(tagBuffer, tagLength);
+
+ if (trailingLength > 0) {
+ if (*trailing != '@') {
+ sink.Append("_", 1);
+ if (!regionAppended) {
+ /* extra separator is required */
+ sink.Append("_", 1);
+ }
+ }
+
+ /*
+ * Copy the trailing data into the supplied buffer.
+ */
+ sink.Append(trailing, trailingLength);
+ }
+
+ return;
+ }
+
+error:
+
+ /**
+ * An overflow indicates the locale ID passed in
+ * is ill-formed. If we got here, and there was
+ * no previous error, it's an implicit overflow.
+ **/
+ if (*err == U_BUFFER_OVERFLOW_ERROR ||
+ U_SUCCESS(*err)) {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+}
+
+/**
+ * Create a tag string from the supplied parameters. The lang, script and region
+ * parameters may be NULL pointers. If they are, their corresponding length parameters
+ * must be less than or equal to 0. If the lang parameter is an empty string, the
+ * default value for an unknown language is written to the output buffer.
+ *
+ * If the length of the new string exceeds the capacity of the output buffer,
+ * the function copies as many bytes to the output buffer as it can, and returns
+ * the error U_BUFFER_OVERFLOW_ERROR.
+ *
+ * If an illegal argument is provided, the function returns the error
+ * U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * @param lang The language tag to use.
+ * @param langLength The length of the language tag.
+ * @param script The script tag to use.
+ * @param scriptLength The length of the script tag.
+ * @param region The region tag to use.
+ * @param regionLength The length of the region tag.
+ * @param trailing Any trailing data to append to the new tag.
+ * @param trailingLength The length of the trailing data.
+ * @param sink The output sink receiving the tag string.
+ * @param err A pointer to a UErrorCode for error reporting.
+ **/
+static void U_CALLCONV
+createTagString(
+ const char* lang,
+ int32_t langLength,
+ const char* script,
+ int32_t scriptLength,
+ const char* region,
+ int32_t regionLength,
+ const char* trailing,
+ int32_t trailingLength,
+ icu::ByteSink& sink,
+ UErrorCode* err)
+{
+ createTagStringWithAlternates(
+ lang,
+ langLength,
+ script,
+ scriptLength,
+ region,
+ regionLength,
+ trailing,
+ trailingLength,
+ NULL,
+ sink,
+ err);
+}
+
+/**
+ * Parse the language, script, and region subtags from a tag string, and copy the
+ * results into the corresponding output parameters. The buffers are null-terminated,
+ * unless overflow occurs.
+ *
+ * The langLength, scriptLength, and regionLength parameters are input/output
+ * parameters, and must contain the capacity of their corresponding buffers on
+ * input. On output, they will contain the actual length of the buffers, not
+ * including the null terminator.
+ *
+ * If the length of any of the output subtags exceeds the capacity of the corresponding
+ * buffer, the function copies as many bytes to the output buffer as it can, and returns
+ * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow
+ * occurs.
+ *
+ * If an illegal argument is provided, the function returns the error
+ * U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * @param localeID The locale ID to parse.
+ * @param lang The language tag buffer.
+ * @param langLength The length of the language tag.
+ * @param script The script tag buffer.
+ * @param scriptLength The length of the script tag.
+ * @param region The region tag buffer.
+ * @param regionLength The length of the region tag.
+ * @param err A pointer to a UErrorCode for error reporting.
+ * @return The number of chars of the localeID parameter consumed.
+ **/
+static int32_t U_CALLCONV
+parseTagString(
+ const char* localeID,
+ char* lang,
+ int32_t* langLength,
+ char* script,
+ int32_t* scriptLength,
+ char* region,
+ int32_t* regionLength,
+ UErrorCode* err)
+{
+ const char* position = localeID;
+ int32_t subtagLength = 0;
+
+ if(U_FAILURE(*err) ||
+ localeID == NULL ||
+ lang == NULL ||
+ langLength == NULL ||
+ script == NULL ||
+ scriptLength == NULL ||
+ region == NULL ||
+ regionLength == NULL) {
+ goto error;
+ }
+
+ subtagLength = ulocimp_getLanguage(position, &position, *err).extract(lang, *langLength, *err);
+
+ /*
+ * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
+ * to be an error, because it indicates the user-supplied tag is
+ * not well-formed.
+ */
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ *langLength = subtagLength;
+
+ /*
+ * If no language was present, use the empty string instead.
+ * Otherwise, move past any separator.
+ */
+ if (_isIDSeparator(*position)) {
+ ++position;
+ }
+
+ subtagLength = ulocimp_getScript(position, &position, *err).extract(script, *scriptLength, *err);
+
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ *scriptLength = subtagLength;
+
+ if (*scriptLength > 0) {
+ if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
+ /**
+ * If the script part is the "unknown" script, then don't return it.
+ **/
+ *scriptLength = 0;
+ }
+
+ /*
+ * Move past any separator.
+ */
+ if (_isIDSeparator(*position)) {
+ ++position;
+ }
+ }
+
+ subtagLength = ulocimp_getCountry(position, &position, *err).extract(region, *regionLength, *err);
+
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ *regionLength = subtagLength;
+
+ if (*regionLength > 0) {
+ if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
+ /**
+ * If the region part is the "unknown" region, then don't return it.
+ **/
+ *regionLength = 0;
+ }
+ } else if (*position != 0 && *position != '@') {
+ /* back up over consumed trailing separator */
+ --position;
+ }
+
+exit:
+
+ return (int32_t)(position - localeID);
+
+error:
+
+ /**
+ * If we get here, we have no explicit error, it's the result of an
+ * illegal argument.
+ **/
+ if (!U_FAILURE(*err)) {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+
+ goto exit;
+}
+
+static UBool U_CALLCONV
+createLikelySubtagsString(
+ const char* lang,
+ int32_t langLength,
+ const char* script,
+ int32_t scriptLength,
+ const char* region,
+ int32_t regionLength,
+ const char* variants,
+ int32_t variantsLength,
+ icu::ByteSink& sink,
+ UErrorCode* err) {
+ /**
+ * ULOC_FULLNAME_CAPACITY will provide enough capacity
+ * that we can build a string that contains the language,
+ * script and region code without worrying about overrunning
+ * the user-supplied buffer.
+ **/
+ char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
+
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ /**
+ * Try the language with the script and region first.
+ **/
+ if (scriptLength > 0 && regionLength > 0) {
+
+ const char* likelySubtags = NULL;
+
+ icu::CharString tagBuffer;
+ {
+ icu::CharStringByteSink sink(&tagBuffer);
+ createTagString(
+ lang,
+ langLength,
+ script,
+ scriptLength,
+ region,
+ regionLength,
+ NULL,
+ 0,
+ sink,
+ err);
+ }
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ likelySubtags =
+ findLikelySubtags(
+ tagBuffer.data(),
+ likelySubtagsBuffer,
+ sizeof(likelySubtagsBuffer),
+ err);
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ if (likelySubtags != NULL) {
+ /* Always use the language tag from the
+ maximal string, since it may be more
+ specific than the one provided. */
+ createTagStringWithAlternates(
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ variants,
+ variantsLength,
+ likelySubtags,
+ sink,
+ err);
+ return TRUE;
+ }
+ }
+
+ /**
+ * Try the language with just the script.
+ **/
+ if (scriptLength > 0) {
+
+ const char* likelySubtags = NULL;
+
+ icu::CharString tagBuffer;
+ {
+ icu::CharStringByteSink sink(&tagBuffer);
+ createTagString(
+ lang,
+ langLength,
+ script,
+ scriptLength,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ sink,
+ err);
+ }
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ likelySubtags =
+ findLikelySubtags(
+ tagBuffer.data(),
+ likelySubtagsBuffer,
+ sizeof(likelySubtagsBuffer),
+ err);
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ if (likelySubtags != NULL) {
+ /* Always use the language tag from the
+ maximal string, since it may be more
+ specific than the one provided. */
+ createTagStringWithAlternates(
+ NULL,
+ 0,
+ NULL,
+ 0,
+ region,
+ regionLength,
+ variants,
+ variantsLength,
+ likelySubtags,
+ sink,
+ err);
+ return TRUE;
+ }
+ }
+
+ /**
+ * Try the language with just the region.
+ **/
+ if (regionLength > 0) {
+
+ const char* likelySubtags = NULL;
+
+ icu::CharString tagBuffer;
+ {
+ icu::CharStringByteSink sink(&tagBuffer);
+ createTagString(
+ lang,
+ langLength,
+ NULL,
+ 0,
+ region,
+ regionLength,
+ NULL,
+ 0,
+ sink,
+ err);
+ }
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ likelySubtags =
+ findLikelySubtags(
+ tagBuffer.data(),
+ likelySubtagsBuffer,
+ sizeof(likelySubtagsBuffer),
+ err);
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ if (likelySubtags != NULL) {
+ /* Always use the language tag from the
+ maximal string, since it may be more
+ specific than the one provided. */
+ createTagStringWithAlternates(
+ NULL,
+ 0,
+ script,
+ scriptLength,
+ NULL,
+ 0,
+ variants,
+ variantsLength,
+ likelySubtags,
+ sink,
+ err);
+ return TRUE;
+ }
+ }
+
+ /**
+ * Finally, try just the language.
+ **/
+ {
+ const char* likelySubtags = NULL;
+
+ icu::CharString tagBuffer;
+ {
+ icu::CharStringByteSink sink(&tagBuffer);
+ createTagString(
+ lang,
+ langLength,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ sink,
+ err);
+ }
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ likelySubtags =
+ findLikelySubtags(
+ tagBuffer.data(),
+ likelySubtagsBuffer,
+ sizeof(likelySubtagsBuffer),
+ err);
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ if (likelySubtags != NULL) {
+ /* Always use the language tag from the
+ maximal string, since it may be more
+ specific than the one provided. */
+ createTagStringWithAlternates(
+ NULL,
+ 0,
+ script,
+ scriptLength,
+ region,
+ regionLength,
+ variants,
+ variantsLength,
+ likelySubtags,
+ sink,
+ err);
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+
+error:
+
+ if (!U_FAILURE(*err)) {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+
+ return FALSE;
+}
+
+#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t count = 0; \
+ int32_t i; \
+ for (i = 0; i < trailingLength; i++) { \
+ if (trailing[i] == '-' || trailing[i] == '_') { \
+ count = 0; \
+ if (count > 8) { \
+ goto error; \
+ } \
+ } else if (trailing[i] == '@') { \
+ break; \
+ } else if (count > 8) { \
+ goto error; \
+ } else { \
+ count++; \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+static UBool
+_uloc_addLikelySubtags(const char* localeID,
+ icu::ByteSink& sink,
+ UErrorCode* err) {
+ char lang[ULOC_LANG_CAPACITY];
+ int32_t langLength = sizeof(lang);
+ char script[ULOC_SCRIPT_CAPACITY];
+ int32_t scriptLength = sizeof(script);
+ char region[ULOC_COUNTRY_CAPACITY];
+ int32_t regionLength = sizeof(region);
+ const char* trailing = "";
+ int32_t trailingLength = 0;
+ int32_t trailingIndex = 0;
+ UBool success = FALSE;
+
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+ if (localeID == NULL) {
+ goto error;
+ }
+
+ trailingIndex = parseTagString(
+ localeID,
+ lang,
+ &langLength,
+ script,
+ &scriptLength,
+ region,
+ &regionLength,
+ err);
+ if(U_FAILURE(*err)) {
+ /* Overflow indicates an illegal argument error */
+ if (*err == U_BUFFER_OVERFLOW_ERROR) {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+
+ goto error;
+ }
+
+ /* Find the length of the trailing portion. */
+ while (_isIDSeparator(localeID[trailingIndex])) {
+ trailingIndex++;
+ }
+ trailing = &localeID[trailingIndex];
+ trailingLength = (int32_t)uprv_strlen(trailing);
+
+ CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
+
+ success =
+ createLikelySubtagsString(
+ lang,
+ langLength,
+ script,
+ scriptLength,
+ region,
+ regionLength,
+ trailing,
+ trailingLength,
+ sink,
+ err);
+
+ if (!success) {
+ const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
+
+ /*
+ * If we get here, we need to return localeID.
+ */
+ sink.Append(localeID, localIDLength);
+ }
+
+ return success;
+
+error:
+
+ if (!U_FAILURE(*err)) {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return FALSE;
+}
+
+// Add likely subtags to the sink
+// return true if the value in the sink is produced by a match during the lookup
+// return false if the value in the sink is the same as input because there are
+// no match after the lookup.
+static UBool _ulocimp_addLikelySubtags(const char*, icu::ByteSink&, UErrorCode*);
+
+static void
+_uloc_minimizeSubtags(const char* localeID,
+ icu::ByteSink& sink,
+ UErrorCode* err) {
+ icu::CharString maximizedTagBuffer;
+
+ char lang[ULOC_LANG_CAPACITY];
+ int32_t langLength = sizeof(lang);
+ char script[ULOC_SCRIPT_CAPACITY];
+ int32_t scriptLength = sizeof(script);
+ char region[ULOC_COUNTRY_CAPACITY];
+ int32_t regionLength = sizeof(region);
+ const char* trailing = "";
+ int32_t trailingLength = 0;
+ int32_t trailingIndex = 0;
+ UBool successGetMax = FALSE;
+
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+ else if (localeID == NULL) {
+ goto error;
+ }
+
+ trailingIndex =
+ parseTagString(
+ localeID,
+ lang,
+ &langLength,
+ script,
+ &scriptLength,
+ region,
+ &regionLength,
+ err);
+ if(U_FAILURE(*err)) {
+
+ /* Overflow indicates an illegal argument error */
+ if (*err == U_BUFFER_OVERFLOW_ERROR) {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+
+ goto error;
+ }
+
+ /* Find the spot where the variants or the keywords begin, if any. */
+ while (_isIDSeparator(localeID[trailingIndex])) {
+ trailingIndex++;
+ }
+ trailing = &localeID[trailingIndex];
+ trailingLength = (int32_t)uprv_strlen(trailing);
+
+ CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
+
+ {
+ icu::CharString base;
+ {
+ icu::CharStringByteSink baseSink(&base);
+ createTagString(
+ lang,
+ langLength,
+ script,
+ scriptLength,
+ region,
+ regionLength,
+ NULL,
+ 0,
+ baseSink,
+ err);
+ }
+
+ /**
+ * First, we need to first get the maximization
+ * from AddLikelySubtags.
+ **/
+ {
+ icu::CharStringByteSink maxSink(&maximizedTagBuffer);
+ successGetMax = _ulocimp_addLikelySubtags(base.data(), maxSink, err);
+ }
+ }
+
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ if (!successGetMax) {
+ /**
+ * If we got here, return the locale ID parameter unchanged.
+ **/
+ const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
+ sink.Append(localeID, localeIDLength);
+ return;
+ }
+
+ // In the following, the lang, script, region are referring to those in
+ // the maximizedTagBuffer, not the one in the localeID.
+ langLength = sizeof(lang);
+ scriptLength = sizeof(script);
+ regionLength = sizeof(region);
+ parseTagString(
+ maximizedTagBuffer.data(),
+ lang,
+ &langLength,
+ script,
+ &scriptLength,
+ region,
+ &regionLength,
+ err);
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ /**
+ * Start first with just the language.
+ **/
+ {
+ icu::CharString tagBuffer;
+ {
+ icu::CharStringByteSink tagSink(&tagBuffer);
+ createLikelySubtagsString(
+ lang,
+ langLength,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ tagSink,
+ err);
+ }
+
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+ else if (!tagBuffer.isEmpty() &&
+ uprv_strnicmp(
+ maximizedTagBuffer.data(),
+ tagBuffer.data(),
+ tagBuffer.length()) == 0) {
+
+ createTagString(
+ lang,
+ langLength,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ trailing,
+ trailingLength,
+ sink,
+ err);
+ return;
+ }
+ }
+
+ /**
+ * Next, try the language and region.
+ **/
+ if (regionLength > 0) {
+
+ icu::CharString tagBuffer;
+ {
+ icu::CharStringByteSink tagSink(&tagBuffer);
+ createLikelySubtagsString(
+ lang,
+ langLength,
+ NULL,
+ 0,
+ region,
+ regionLength,
+ NULL,
+ 0,
+ tagSink,
+ err);
+ }
+
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+ else if (!tagBuffer.isEmpty() &&
+ uprv_strnicmp(
+ maximizedTagBuffer.data(),
+ tagBuffer.data(),
+ tagBuffer.length()) == 0) {
+
+ createTagString(
+ lang,
+ langLength,
+ NULL,
+ 0,
+ region,
+ regionLength,
+ trailing,
+ trailingLength,
+ sink,
+ err);
+ return;
+ }
+ }
+
+ /**
+ * Finally, try the language and script. This is our last chance,
+ * since trying with all three subtags would only yield the
+ * maximal version that we already have.
+ **/
+ if (scriptLength > 0) {
+ icu::CharString tagBuffer;
+ {
+ icu::CharStringByteSink tagSink(&tagBuffer);
+ createLikelySubtagsString(
+ lang,
+ langLength,
+ script,
+ scriptLength,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ tagSink,
+ err);
+ }
+
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+ else if (!tagBuffer.isEmpty() &&
+ uprv_strnicmp(
+ maximizedTagBuffer.data(),
+ tagBuffer.data(),
+ tagBuffer.length()) == 0) {
+
+ createTagString(
+ lang,
+ langLength,
+ script,
+ scriptLength,
+ NULL,
+ 0,
+ trailing,
+ trailingLength,
+ sink,
+ err);
+ return;
+ }
+ }
+
+ {
+ /**
+ * If we got here, return the max + trail.
+ **/
+ createTagString(
+ lang,
+ langLength,
+ script,
+ scriptLength,
+ region,
+ regionLength,
+ trailing,
+ trailingLength,
+ sink,
+ err);
+ return;
+ }
+
+error:
+
+ if (!U_FAILURE(*err)) {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+}
+
+static UBool
+do_canonicalize(const char* localeID,
+ char* buffer,
+ int32_t bufferCapacity,
+ UErrorCode* err)
+{
+ uloc_canonicalize(
+ localeID,
+ buffer,
+ bufferCapacity,
+ err);
+
+ if (*err == U_STRING_NOT_TERMINATED_WARNING ||
+ *err == U_BUFFER_OVERFLOW_ERROR) {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+
+ return FALSE;
+ }
+ else if (U_FAILURE(*err)) {
+
+ return FALSE;
+ }
+ else {
+ return TRUE;
+ }
+}
+
+U_CAPI int32_t U_EXPORT2
+uloc_addLikelySubtags(const char* localeID,
+ char* maximizedLocaleID,
+ int32_t maximizedLocaleIDCapacity,
+ UErrorCode* status) {
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+
+ icu::CheckedArrayByteSink sink(
+ maximizedLocaleID, maximizedLocaleIDCapacity);
+
+ ulocimp_addLikelySubtags(localeID, sink, status);
+ int32_t reslen = sink.NumberOfBytesAppended();
+
+ if (U_FAILURE(*status)) {
+ return sink.Overflowed() ? reslen : -1;
+ }
+
+ if (sink.Overflowed()) {
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ } else {
+ u_terminateChars(
+ maximizedLocaleID, maximizedLocaleIDCapacity, reslen, status);
+ }
+
+ return reslen;
+}
+
+static UBool
+_ulocimp_addLikelySubtags(const char* localeID,
+ icu::ByteSink& sink,
+ UErrorCode* status) {
+ char localeBuffer[ULOC_FULLNAME_CAPACITY];
+
+ if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
+ return _uloc_addLikelySubtags(localeBuffer, sink, status);
+ }
+ return FALSE;
+}
+
+U_CAPI void U_EXPORT2
+ulocimp_addLikelySubtags(const char* localeID,
+ icu::ByteSink& sink,
+ UErrorCode* status) {
+ _ulocimp_addLikelySubtags(localeID, sink, status);
+}
+
+U_CAPI int32_t U_EXPORT2
+uloc_minimizeSubtags(const char* localeID,
+ char* minimizedLocaleID,
+ int32_t minimizedLocaleIDCapacity,
+ UErrorCode* status) {
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+
+ icu::CheckedArrayByteSink sink(
+ minimizedLocaleID, minimizedLocaleIDCapacity);
+
+ ulocimp_minimizeSubtags(localeID, sink, status);
+ int32_t reslen = sink.NumberOfBytesAppended();
+
+ if (U_FAILURE(*status)) {
+ return sink.Overflowed() ? reslen : -1;
+ }
+
+ if (sink.Overflowed()) {
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ } else {
+ u_terminateChars(
+ minimizedLocaleID, minimizedLocaleIDCapacity, reslen, status);
+ }
+
+ return reslen;
+}
+
+U_CAPI void U_EXPORT2
+ulocimp_minimizeSubtags(const char* localeID,
+ icu::ByteSink& sink,
+ UErrorCode* status) {
+ char localeBuffer[ULOC_FULLNAME_CAPACITY];
+
+ if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
+ _uloc_minimizeSubtags(localeBuffer, sink, status);
+ }
+}
+
+// Pairs of (language subtag, + or -) for finding out fast if common languages
+// are LTR (minus) or RTL (plus).
+static const char LANG_DIR_STRING[] =
+ "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
+
+// Implemented here because this calls ulocimp_addLikelySubtags().
+U_CAPI UBool U_EXPORT2
+uloc_isRightToLeft(const char *locale) {
+ UErrorCode errorCode = U_ZERO_ERROR;
+ char script[8];
+ int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);
+ if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
+ scriptLength == 0) {
+ // Fastpath: We know the likely scripts and their writing direction
+ // for some common languages.
+ errorCode = U_ZERO_ERROR;
+ char lang[8];
+ int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
+ if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
+ return FALSE;
+ }
+ if (langLength > 0) {
+ const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
+ if (langPtr != NULL) {
+ switch (langPtr[langLength]) {
+ case '-': return FALSE;
+ case '+': return TRUE;
+ default: break; // partial match of a longer code
+ }
+ }
+ }
+ // Otherwise, find the likely script.
+ errorCode = U_ZERO_ERROR;
+ icu::CharString likely;
+ {
+ icu::CharStringByteSink sink(&likely);
+ ulocimp_addLikelySubtags(locale, sink, &errorCode);
+ }
+ if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
+ return FALSE;
+ }
+ scriptLength = uloc_getScript(likely.data(), script, UPRV_LENGTHOF(script), &errorCode);
+ if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
+ scriptLength == 0) {
+ return FALSE;
+ }
+ }
+ UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
+ return uscript_isRightToLeft(scriptCode);
+}
+
+U_NAMESPACE_BEGIN
+
+UBool
+Locale::isRightToLeft() const {
+ return uloc_isRightToLeft(getBaseName());
+}
+
+U_NAMESPACE_END
+
+// The following must at least allow for rg key value (6) plus terminator (1).
+#define ULOC_RG_BUFLEN 8
+
+U_CAPI int32_t U_EXPORT2
+ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
+ char *region, int32_t regionCapacity, UErrorCode* status) {
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+ char rgBuf[ULOC_RG_BUFLEN];
+ UErrorCode rgStatus = U_ZERO_ERROR;
+
+ // First check for rg keyword value
+ int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus);
+ if (U_FAILURE(rgStatus) || rgLen != 6) {
+ rgLen = 0;
+ } else {
+ // rgBuf guaranteed to be zero terminated here, with text len 6
+ char *rgPtr = rgBuf;
+ for (; *rgPtr!= 0; rgPtr++) {
+ *rgPtr = uprv_toupper(*rgPtr);
+ }
+ rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0;
+ }
+
+ if (rgLen == 0) {
+ // No valid rg keyword value, try for unicode_region_subtag
+ rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status);
+ if (U_FAILURE(*status)) {
+ rgLen = 0;
+ } else if (rgLen == 0 && inferRegion) {
+ // no unicode_region_subtag but inferRegion TRUE, try likely subtags
+ rgStatus = U_ZERO_ERROR;
+ icu::CharString locBuf;
+ {
+ icu::CharStringByteSink sink(&locBuf);
+ ulocimp_addLikelySubtags(localeID, sink, &rgStatus);
+ }
+ if (U_SUCCESS(rgStatus)) {
+ rgLen = uloc_getCountry(locBuf.data(), rgBuf, ULOC_RG_BUFLEN, status);
+ if (U_FAILURE(*status)) {
+ rgLen = 0;
+ }
+ }
+ }
+ }
+
+ rgBuf[rgLen] = 0;
+ uprv_strncpy(region, rgBuf, regionCapacity);
+ return u_terminateChars(region, regionCapacity, rgLen, status);
+}
+
diff --git a/thirdparty/icu4c/common/loclikelysubtags.cpp b/thirdparty/icu4c/common/loclikelysubtags.cpp
new file mode 100644
index 0000000000..a031bfa587
--- /dev/null
+++ b/thirdparty/icu4c/common/loclikelysubtags.cpp
@@ -0,0 +1,682 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// loclikelysubtags.cpp
+// created: 2019may08 Markus W. Scherer
+
+#include <utility>
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/localpointer.h"
+#include "unicode/locid.h"
+#include "unicode/uobject.h"
+#include "unicode/ures.h"
+#include "charstr.h"
+#include "cstring.h"
+#include "loclikelysubtags.h"
+#include "lsr.h"
+#include "uassert.h"
+#include "ucln_cmn.h"
+#include "uhash.h"
+#include "uinvchar.h"
+#include "umutex.h"
+#include "uniquecharstr.h"
+#include "uresdata.h"
+#include "uresimp.h"
+
+U_NAMESPACE_BEGIN
+
+namespace {
+
+constexpr char PSEUDO_ACCENTS_PREFIX = '\''; // -XA, -PSACCENT
+constexpr char PSEUDO_BIDI_PREFIX = '+'; // -XB, -PSBIDI
+constexpr char PSEUDO_CRACKED_PREFIX = ','; // -XC, -PSCRACK
+
+} // namespace
+
+LocaleDistanceData::LocaleDistanceData(LocaleDistanceData &&data) :
+ distanceTrieBytes(data.distanceTrieBytes),
+ regionToPartitions(data.regionToPartitions),
+ partitions(data.partitions),
+ paradigms(data.paradigms), paradigmsLength(data.paradigmsLength),
+ distances(data.distances) {
+ data.partitions = nullptr;
+ data.paradigms = nullptr;
+}
+
+LocaleDistanceData::~LocaleDistanceData() {
+ uprv_free(partitions);
+ delete[] paradigms;
+}
+
+// TODO(ICU-20777): Rename to just LikelySubtagsData.
+struct XLikelySubtagsData {
+ UResourceBundle *langInfoBundle = nullptr;
+ UniqueCharStrings strings;
+ CharStringMap languageAliases;
+ CharStringMap regionAliases;
+ const uint8_t *trieBytes = nullptr;
+ LSR *lsrs = nullptr;
+ int32_t lsrsLength = 0;
+
+ LocaleDistanceData distanceData;
+
+ XLikelySubtagsData(UErrorCode &errorCode) : strings(errorCode) {}
+
+ ~XLikelySubtagsData() {
+ ures_close(langInfoBundle);
+ delete[] lsrs;
+ }
+
+ void load(UErrorCode &errorCode) {
+ langInfoBundle = ures_openDirect(nullptr, "langInfo", &errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ StackUResourceBundle stackTempBundle;
+ ResourceDataValue value;
+ ures_getValueWithFallback(langInfoBundle, "likely", stackTempBundle.getAlias(),
+ value, errorCode);
+ ResourceTable likelyTable = value.getTable(errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+
+ // Read all strings in the resource bundle and convert them to invariant char *.
+ LocalMemory<int32_t> languageIndexes, regionIndexes, lsrSubtagIndexes;
+ int32_t languagesLength = 0, regionsLength = 0, lsrSubtagsLength = 0;
+ if (!readStrings(likelyTable, "languageAliases", value,
+ languageIndexes, languagesLength, errorCode) ||
+ !readStrings(likelyTable, "regionAliases", value,
+ regionIndexes, regionsLength, errorCode) ||
+ !readStrings(likelyTable, "lsrs", value,
+ lsrSubtagIndexes,lsrSubtagsLength, errorCode)) {
+ return;
+ }
+ if ((languagesLength & 1) != 0 ||
+ (regionsLength & 1) != 0 ||
+ (lsrSubtagsLength % 3) != 0) {
+ errorCode = U_INVALID_FORMAT_ERROR;
+ return;
+ }
+ if (lsrSubtagsLength == 0) {
+ errorCode = U_MISSING_RESOURCE_ERROR;
+ return;
+ }
+
+ if (!likelyTable.findValue("trie", value)) {
+ errorCode = U_MISSING_RESOURCE_ERROR;
+ return;
+ }
+ int32_t length;
+ trieBytes = value.getBinary(length, errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+
+ // Also read distance/matcher data if available,
+ // to open & keep only one resource bundle pointer
+ // and to use one single UniqueCharStrings.
+ UErrorCode matchErrorCode = U_ZERO_ERROR;
+ ures_getValueWithFallback(langInfoBundle, "match", stackTempBundle.getAlias(),
+ value, matchErrorCode);
+ LocalMemory<int32_t> partitionIndexes, paradigmSubtagIndexes;
+ int32_t partitionsLength = 0, paradigmSubtagsLength = 0;
+ if (U_SUCCESS(matchErrorCode)) {
+ ResourceTable matchTable = value.getTable(errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+
+ if (matchTable.findValue("trie", value)) {
+ distanceData.distanceTrieBytes = value.getBinary(length, errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ }
+
+ if (matchTable.findValue("regionToPartitions", value)) {
+ distanceData.regionToPartitions = value.getBinary(length, errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ if (length < LSR::REGION_INDEX_LIMIT) {
+ errorCode = U_INVALID_FORMAT_ERROR;
+ return;
+ }
+ }
+
+ if (!readStrings(matchTable, "partitions", value,
+ partitionIndexes, partitionsLength, errorCode) ||
+ !readStrings(matchTable, "paradigms", value,
+ paradigmSubtagIndexes, paradigmSubtagsLength, errorCode)) {
+ return;
+ }
+ if ((paradigmSubtagsLength % 3) != 0) {
+ errorCode = U_INVALID_FORMAT_ERROR;
+ return;
+ }
+
+ if (matchTable.findValue("distances", value)) {
+ distanceData.distances = value.getIntVector(length, errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ if (length < 4) { // LocaleDistance IX_LIMIT
+ errorCode = U_INVALID_FORMAT_ERROR;
+ return;
+ }
+ }
+ } else if (matchErrorCode == U_MISSING_RESOURCE_ERROR) {
+ // ok for likely subtags
+ } else { // error other than missing resource
+ errorCode = matchErrorCode;
+ return;
+ }
+
+ // Fetch & store invariant-character versions of strings
+ // only after we have collected and de-duplicated all of them.
+ strings.freeze();
+
+ languageAliases = CharStringMap(languagesLength / 2, errorCode);
+ for (int32_t i = 0; i < languagesLength; i += 2) {
+ languageAliases.put(strings.get(languageIndexes[i]),
+ strings.get(languageIndexes[i + 1]), errorCode);
+ }
+
+ regionAliases = CharStringMap(regionsLength / 2, errorCode);
+ for (int32_t i = 0; i < regionsLength; i += 2) {
+ regionAliases.put(strings.get(regionIndexes[i]),
+ strings.get(regionIndexes[i + 1]), errorCode);
+ }
+ if (U_FAILURE(errorCode)) { return; }
+
+ lsrsLength = lsrSubtagsLength / 3;
+ lsrs = new LSR[lsrsLength];
+ if (lsrs == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ for (int32_t i = 0, j = 0; i < lsrSubtagsLength; i += 3, ++j) {
+ lsrs[j] = LSR(strings.get(lsrSubtagIndexes[i]),
+ strings.get(lsrSubtagIndexes[i + 1]),
+ strings.get(lsrSubtagIndexes[i + 2]),
+ LSR::IMPLICIT_LSR);
+ }
+
+ if (partitionsLength > 0) {
+ distanceData.partitions = static_cast<const char **>(
+ uprv_malloc(partitionsLength * sizeof(const char *)));
+ if (distanceData.partitions == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ for (int32_t i = 0; i < partitionsLength; ++i) {
+ distanceData.partitions[i] = strings.get(partitionIndexes[i]);
+ }
+ }
+
+ if (paradigmSubtagsLength > 0) {
+ distanceData.paradigmsLength = paradigmSubtagsLength / 3;
+ LSR *paradigms = new LSR[distanceData.paradigmsLength];
+ if (paradigms == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ for (int32_t i = 0, j = 0; i < paradigmSubtagsLength; i += 3, ++j) {
+ paradigms[j] = LSR(strings.get(paradigmSubtagIndexes[i]),
+ strings.get(paradigmSubtagIndexes[i + 1]),
+ strings.get(paradigmSubtagIndexes[i + 2]),
+ LSR::DONT_CARE_FLAGS);
+ }
+ distanceData.paradigms = paradigms;
+ }
+ }
+
+private:
+ bool readStrings(const ResourceTable &table, const char *key, ResourceValue &value,
+ LocalMemory<int32_t> &indexes, int32_t &length, UErrorCode &errorCode) {
+ if (table.findValue(key, value)) {
+ ResourceArray stringArray = value.getArray(errorCode);
+ if (U_FAILURE(errorCode)) { return false; }
+ length = stringArray.getSize();
+ if (length == 0) { return true; }
+ int32_t *rawIndexes = indexes.allocateInsteadAndCopy(length);
+ if (rawIndexes == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return false;
+ }
+ for (int i = 0; i < length; ++i) {
+ stringArray.getValue(i, value); // returns TRUE because i < length
+ rawIndexes[i] = strings.add(value.getUnicodeString(errorCode), errorCode);
+ if (U_FAILURE(errorCode)) { return false; }
+ }
+ }
+ return true;
+ }
+};
+
+namespace {
+
+XLikelySubtags *gLikelySubtags = nullptr;
+UInitOnce gInitOnce = U_INITONCE_INITIALIZER;
+
+UBool U_CALLCONV cleanup() {
+ delete gLikelySubtags;
+ gLikelySubtags = nullptr;
+ gInitOnce.reset();
+ return TRUE;
+}
+
+} // namespace
+
+void U_CALLCONV XLikelySubtags::initLikelySubtags(UErrorCode &errorCode) {
+ // This function is invoked only via umtx_initOnce().
+ U_ASSERT(gLikelySubtags == nullptr);
+ XLikelySubtagsData data(errorCode);
+ data.load(errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ gLikelySubtags = new XLikelySubtags(data);
+ if (gLikelySubtags == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ ucln_common_registerCleanup(UCLN_COMMON_LIKELY_SUBTAGS, cleanup);
+}
+
+const XLikelySubtags *XLikelySubtags::getSingleton(UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return nullptr; }
+ umtx_initOnce(gInitOnce, &XLikelySubtags::initLikelySubtags, errorCode);
+ return gLikelySubtags;
+}
+
+XLikelySubtags::XLikelySubtags(XLikelySubtagsData &data) :
+ langInfoBundle(data.langInfoBundle),
+ strings(data.strings.orphanCharStrings()),
+ languageAliases(std::move(data.languageAliases)),
+ regionAliases(std::move(data.regionAliases)),
+ trie(data.trieBytes),
+ lsrs(data.lsrs),
+#if U_DEBUG
+ lsrsLength(data.lsrsLength),
+#endif
+ distanceData(std::move(data.distanceData)) {
+ data.langInfoBundle = nullptr;
+ data.lsrs = nullptr;
+
+ // Cache the result of looking up language="und" encoded as "*", and "und-Zzzz" ("**").
+ UStringTrieResult result = trie.next(u'*');
+ U_ASSERT(USTRINGTRIE_HAS_NEXT(result));
+ trieUndState = trie.getState64();
+ result = trie.next(u'*');
+ U_ASSERT(USTRINGTRIE_HAS_NEXT(result));
+ trieUndZzzzState = trie.getState64();
+ result = trie.next(u'*');
+ U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
+ defaultLsrIndex = trie.getValue();
+ trie.reset();
+
+ for (char16_t c = u'a'; c <= u'z'; ++c) {
+ result = trie.next(c);
+ if (result == USTRINGTRIE_NO_VALUE) {
+ trieFirstLetterStates[c - u'a'] = trie.getState64();
+ }
+ trie.reset();
+ }
+}
+
+XLikelySubtags::~XLikelySubtags() {
+ ures_close(langInfoBundle);
+ delete strings;
+ delete[] lsrs;
+}
+
+LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const {
+ const char *name = locale.getName();
+ if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=")
+ // Private use language tag x-subtag-subtag...
+ return LSR(name, "", "", LSR::EXPLICIT_LSR);
+ }
+ return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
+ locale.getVariant(), errorCode);
+}
+
+namespace {
+
+const char *getCanonical(const CharStringMap &aliases, const char *alias) {
+ const char *canonical = aliases.get(alias);
+ return canonical == nullptr ? alias : canonical;
+}
+
+} // namespace
+
+LSR XLikelySubtags::makeMaximizedLsr(const char *language, const char *script, const char *region,
+ const char *variant, UErrorCode &errorCode) const {
+ // Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK.
+ // They should match only themselves,
+ // not other locales with what looks like the same language and script subtags.
+ char c1;
+ if (region[0] == 'X' && (c1 = region[1]) != 0 && region[2] == 0) {
+ switch (c1) {
+ case 'A':
+ return LSR(PSEUDO_ACCENTS_PREFIX, language, script, region,
+ LSR::EXPLICIT_LSR, errorCode);
+ case 'B':
+ return LSR(PSEUDO_BIDI_PREFIX, language, script, region,
+ LSR::EXPLICIT_LSR, errorCode);
+ case 'C':
+ return LSR(PSEUDO_CRACKED_PREFIX, language, script, region,
+ LSR::EXPLICIT_LSR, errorCode);
+ default: // normal locale
+ break;
+ }
+ }
+
+ if (variant[0] == 'P' && variant[1] == 'S') {
+ int32_t lsrFlags = *region == 0 ?
+ LSR::EXPLICIT_LANGUAGE | LSR::EXPLICIT_SCRIPT : LSR::EXPLICIT_LSR;
+ if (uprv_strcmp(variant, "PSACCENT") == 0) {
+ return LSR(PSEUDO_ACCENTS_PREFIX, language, script,
+ *region == 0 ? "XA" : region, lsrFlags, errorCode);
+ } else if (uprv_strcmp(variant, "PSBIDI") == 0) {
+ return LSR(PSEUDO_BIDI_PREFIX, language, script,
+ *region == 0 ? "XB" : region, lsrFlags, errorCode);
+ } else if (uprv_strcmp(variant, "PSCRACK") == 0) {
+ return LSR(PSEUDO_CRACKED_PREFIX, language, script,
+ *region == 0 ? "XC" : region, lsrFlags, errorCode);
+ }
+ // else normal locale
+ }
+
+ language = getCanonical(languageAliases, language);
+ // (We have no script mappings.)
+ region = getCanonical(regionAliases, region);
+ return maximize(language, script, region);
+}
+
+LSR XLikelySubtags::maximize(const char *language, const char *script, const char *region) const {
+ if (uprv_strcmp(language, "und") == 0) {
+ language = "";
+ }
+ if (uprv_strcmp(script, "Zzzz") == 0) {
+ script = "";
+ }
+ if (uprv_strcmp(region, "ZZ") == 0) {
+ region = "";
+ }
+ if (*script != 0 && *region != 0 && *language != 0) {
+ return LSR(language, script, region, LSR::EXPLICIT_LSR); // already maximized
+ }
+
+ uint32_t retainOldMask = 0;
+ BytesTrie iter(trie);
+ uint64_t state;
+ int32_t value;
+ // Small optimization: Array lookup for first language letter.
+ int32_t c0;
+ if (0 <= (c0 = uprv_lowerOrdinal(language[0])) && c0 <= 25 &&
+ language[1] != 0 && // language.length() >= 2
+ (state = trieFirstLetterStates[c0]) != 0) {
+ value = trieNext(iter.resetToState64(state), language, 1);
+ } else {
+ value = trieNext(iter, language, 0);
+ }
+ if (value >= 0) {
+ if (*language != 0) {
+ retainOldMask |= 4;
+ }
+ state = iter.getState64();
+ } else {
+ retainOldMask |= 4;
+ iter.resetToState64(trieUndState); // "und" ("*")
+ state = 0;
+ }
+
+ if (value > 0) {
+ // Intermediate or final value from just language.
+ if (value == SKIP_SCRIPT) {
+ value = 0;
+ }
+ if (*script != 0) {
+ retainOldMask |= 2;
+ }
+ } else {
+ value = trieNext(iter, script, 0);
+ if (value >= 0) {
+ if (*script != 0) {
+ retainOldMask |= 2;
+ }
+ state = iter.getState64();
+ } else {
+ retainOldMask |= 2;
+ if (state == 0) {
+ iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**")
+ } else {
+ iter.resetToState64(state);
+ value = trieNext(iter, "", 0);
+ U_ASSERT(value >= 0);
+ state = iter.getState64();
+ }
+ }
+ }
+
+ if (value > 0) {
+ // Final value from just language or language+script.
+ if (*region != 0) {
+ retainOldMask |= 1;
+ }
+ } else {
+ value = trieNext(iter, region, 0);
+ if (value >= 0) {
+ if (*region != 0) {
+ retainOldMask |= 1;
+ }
+ } else {
+ retainOldMask |= 1;
+ if (state == 0) {
+ value = defaultLsrIndex;
+ } else {
+ iter.resetToState64(state);
+ value = trieNext(iter, "", 0);
+ U_ASSERT(value > 0);
+ }
+ }
+ }
+ U_ASSERT(value < lsrsLength);
+ const LSR &result = lsrs[value];
+
+ if (*language == 0) {
+ language = "und";
+ }
+
+ if (retainOldMask == 0) {
+ // Quickly return a copy of the lookup-result LSR
+ // without new allocation of the subtags.
+ return LSR(result.language, result.script, result.region, result.flags);
+ }
+ if ((retainOldMask & 4) == 0) {
+ language = result.language;
+ }
+ if ((retainOldMask & 2) == 0) {
+ script = result.script;
+ }
+ if ((retainOldMask & 1) == 0) {
+ region = result.region;
+ }
+ // retainOldMask flags = LSR explicit-subtag flags
+ return LSR(language, script, region, retainOldMask);
+}
+
+int32_t XLikelySubtags::compareLikely(const LSR &lsr, const LSR &other, int32_t likelyInfo) const {
+ // If likelyInfo >= 0:
+ // likelyInfo bit 1 is set if the previous comparison with lsr
+ // was for equal language and script.
+ // Otherwise the scripts differed.
+ if (uprv_strcmp(lsr.language, other.language) != 0) {
+ return 0xfffffffc; // negative, lsr not better than other
+ }
+ if (uprv_strcmp(lsr.script, other.script) != 0) {
+ int32_t index;
+ if (likelyInfo >= 0 && (likelyInfo & 2) == 0) {
+ index = likelyInfo >> 2;
+ } else {
+ index = getLikelyIndex(lsr.language, "");
+ likelyInfo = index << 2;
+ }
+ const LSR &likely = lsrs[index];
+ if (uprv_strcmp(lsr.script, likely.script) == 0) {
+ return likelyInfo | 1;
+ } else {
+ return likelyInfo & ~1;
+ }
+ }
+ if (uprv_strcmp(lsr.region, other.region) != 0) {
+ int32_t index;
+ if (likelyInfo >= 0 && (likelyInfo & 2) != 0) {
+ index = likelyInfo >> 2;
+ } else {
+ index = getLikelyIndex(lsr.language, lsr.region);
+ likelyInfo = (index << 2) | 2;
+ }
+ const LSR &likely = lsrs[index];
+ if (uprv_strcmp(lsr.region, likely.region) == 0) {
+ return likelyInfo | 1;
+ } else {
+ return likelyInfo & ~1;
+ }
+ }
+ return likelyInfo & ~1; // lsr not better than other
+}
+
+// Subset of maximize().
+int32_t XLikelySubtags::getLikelyIndex(const char *language, const char *script) const {
+ if (uprv_strcmp(language, "und") == 0) {
+ language = "";
+ }
+ if (uprv_strcmp(script, "Zzzz") == 0) {
+ script = "";
+ }
+
+ BytesTrie iter(trie);
+ uint64_t state;
+ int32_t value;
+ // Small optimization: Array lookup for first language letter.
+ int32_t c0;
+ if (0 <= (c0 = uprv_lowerOrdinal(language[0])) && c0 <= 25 &&
+ language[1] != 0 && // language.length() >= 2
+ (state = trieFirstLetterStates[c0]) != 0) {
+ value = trieNext(iter.resetToState64(state), language, 1);
+ } else {
+ value = trieNext(iter, language, 0);
+ }
+ if (value >= 0) {
+ state = iter.getState64();
+ } else {
+ iter.resetToState64(trieUndState); // "und" ("*")
+ state = 0;
+ }
+
+ if (value > 0) {
+ // Intermediate or final value from just language.
+ if (value == SKIP_SCRIPT) {
+ value = 0;
+ }
+ } else {
+ value = trieNext(iter, script, 0);
+ if (value >= 0) {
+ state = iter.getState64();
+ } else {
+ if (state == 0) {
+ iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**")
+ } else {
+ iter.resetToState64(state);
+ value = trieNext(iter, "", 0);
+ U_ASSERT(value >= 0);
+ state = iter.getState64();
+ }
+ }
+ }
+
+ if (value > 0) {
+ // Final value from just language or language+script.
+ } else {
+ value = trieNext(iter, "", 0);
+ U_ASSERT(value > 0);
+ }
+ U_ASSERT(value < lsrsLength);
+ return value;
+}
+
+int32_t XLikelySubtags::trieNext(BytesTrie &iter, const char *s, int32_t i) {
+ UStringTrieResult result;
+ uint8_t c;
+ if ((c = s[i]) == 0) {
+ result = iter.next(u'*');
+ } else {
+ for (;;) {
+ c = uprv_invCharToAscii(c);
+ // EBCDIC: If s[i] is not an invariant character,
+ // then c is now 0 and will simply not match anything, which is harmless.
+ uint8_t next = s[++i];
+ if (next != 0) {
+ if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) {
+ return -1;
+ }
+ } else {
+ // last character of this subtag
+ result = iter.next(c | 0x80);
+ break;
+ }
+ c = next;
+ }
+ }
+ switch (result) {
+ case USTRINGTRIE_NO_MATCH: return -1;
+ case USTRINGTRIE_NO_VALUE: return 0;
+ case USTRINGTRIE_INTERMEDIATE_VALUE:
+ U_ASSERT(iter.getValue() == SKIP_SCRIPT);
+ return SKIP_SCRIPT;
+ case USTRINGTRIE_FINAL_VALUE: return iter.getValue();
+ default: return -1;
+ }
+}
+
+// TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code
+// in loclikely.cpp to this new code, including activating this
+// minimizeSubtags() function. The LocaleMatcher does not minimize.
+#if 0
+LSR XLikelySubtags::minimizeSubtags(const char *languageIn, const char *scriptIn,
+ const char *regionIn, ULocale.Minimize fieldToFavor,
+ UErrorCode &errorCode) const {
+ LSR result = maximize(languageIn, scriptIn, regionIn);
+
+ // We could try just a series of checks, like:
+ // LSR result2 = addLikelySubtags(languageIn, "", "");
+ // if result.equals(result2) return result2;
+ // However, we can optimize 2 of the cases:
+ // (languageIn, "", "")
+ // (languageIn, "", regionIn)
+
+ // value00 = lookup(result.language, "", "")
+ BytesTrie iter = new BytesTrie(trie);
+ int value = trieNext(iter, result.language, 0);
+ U_ASSERT(value >= 0);
+ if (value == 0) {
+ value = trieNext(iter, "", 0);
+ U_ASSERT(value >= 0);
+ if (value == 0) {
+ value = trieNext(iter, "", 0);
+ }
+ }
+ U_ASSERT(value > 0);
+ LSR value00 = lsrs[value];
+ boolean favorRegionOk = false;
+ if (result.script.equals(value00.script)) { //script is default
+ if (result.region.equals(value00.region)) {
+ return new LSR(result.language, "", "", LSR.DONT_CARE_FLAGS);
+ } else if (fieldToFavor == ULocale.Minimize.FAVOR_REGION) {
+ return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
+ } else {
+ favorRegionOk = true;
+ }
+ }
+
+ // The last case is not as easy to optimize.
+ // Maybe do later, but for now use the straightforward code.
+ LSR result2 = maximize(languageIn, scriptIn, "");
+ if (result2.equals(result)) {
+ return new LSR(result.language, result.script, "", LSR.DONT_CARE_FLAGS);
+ } else if (favorRegionOk) {
+ return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
+ }
+ return result;
+}
+#endif
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/loclikelysubtags.h b/thirdparty/icu4c/common/loclikelysubtags.h
new file mode 100644
index 0000000000..14a01a5eac
--- /dev/null
+++ b/thirdparty/icu4c/common/loclikelysubtags.h
@@ -0,0 +1,121 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// loclikelysubtags.h
+// created: 2019may08 Markus W. Scherer
+
+#ifndef __LOCLIKELYSUBTAGS_H__
+#define __LOCLIKELYSUBTAGS_H__
+
+#include <utility>
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/locid.h"
+#include "unicode/uobject.h"
+#include "unicode/ures.h"
+#include "charstrmap.h"
+#include "lsr.h"
+
+U_NAMESPACE_BEGIN
+
+struct XLikelySubtagsData;
+
+struct LocaleDistanceData {
+ LocaleDistanceData() = default;
+ LocaleDistanceData(LocaleDistanceData &&data);
+ ~LocaleDistanceData();
+
+ const uint8_t *distanceTrieBytes = nullptr;
+ const uint8_t *regionToPartitions = nullptr;
+ const char **partitions = nullptr;
+ const LSR *paradigms = nullptr;
+ int32_t paradigmsLength = 0;
+ const int32_t *distances = nullptr;
+
+private:
+ LocaleDistanceData &operator=(const LocaleDistanceData &) = delete;
+};
+
+// TODO(ICU-20777): Rename to just LikelySubtags.
+class XLikelySubtags final : public UMemory {
+public:
+ ~XLikelySubtags();
+
+ static constexpr int32_t SKIP_SCRIPT = 1;
+
+ // VisibleForTesting
+ static const XLikelySubtags *getSingleton(UErrorCode &errorCode);
+
+ // VisibleForTesting
+ LSR makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const;
+
+ /**
+ * Tests whether lsr is "more likely" than other.
+ * For example, fr-Latn-FR is more likely than fr-Latn-CH because
+ * FR is the default region for fr-Latn.
+ *
+ * The likelyInfo caches lookup information between calls.
+ * The return value is an updated likelyInfo value,
+ * with bit 0 set if lsr is "more likely".
+ * The initial value of likelyInfo must be negative.
+ */
+ int32_t compareLikely(const LSR &lsr, const LSR &other, int32_t likelyInfo) const;
+
+ // TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code
+ // in loclikely.cpp to this new code, including activating this
+ // minimizeSubtags() function. The LocaleMatcher does not minimize.
+#if 0
+ LSR minimizeSubtags(const char *languageIn, const char *scriptIn, const char *regionIn,
+ ULocale.Minimize fieldToFavor, UErrorCode &errorCode) const;
+#endif
+
+ // visible for LocaleDistance
+ const LocaleDistanceData &getDistanceData() const { return distanceData; }
+
+private:
+ XLikelySubtags(XLikelySubtagsData &data);
+ XLikelySubtags(const XLikelySubtags &other) = delete;
+ XLikelySubtags &operator=(const XLikelySubtags &other) = delete;
+
+ static void initLikelySubtags(UErrorCode &errorCode);
+
+ LSR makeMaximizedLsr(const char *language, const char *script, const char *region,
+ const char *variant, UErrorCode &errorCode) const;
+
+ /**
+ * Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
+ */
+ LSR maximize(const char *language, const char *script, const char *region) const;
+
+ int32_t getLikelyIndex(const char *language, const char *script) const;
+
+ static int32_t trieNext(BytesTrie &iter, const char *s, int32_t i);
+
+ UResourceBundle *langInfoBundle;
+ // We could store the strings by value, except that if there were few enough strings,
+ // moving the contents could copy it to a different array,
+ // invalidating the pointers stored in the maps.
+ CharString *strings;
+ CharStringMap languageAliases;
+ CharStringMap regionAliases;
+
+ // The trie maps each lang+script+region (encoded in ASCII) to an index into lsrs.
+ // There is also a trie value for each intermediate lang and lang+script.
+ // '*' is used instead of "und", "Zzzz"/"" and "ZZ"/"".
+ BytesTrie trie;
+ uint64_t trieUndState;
+ uint64_t trieUndZzzzState;
+ int32_t defaultLsrIndex;
+ uint64_t trieFirstLetterStates[26];
+ const LSR *lsrs;
+#if U_DEBUG
+ int32_t lsrsLength;
+#endif
+
+ // distance/matcher data: see comment in XLikelySubtagsData::load()
+ LocaleDistanceData distanceData;
+};
+
+U_NAMESPACE_END
+
+#endif // __LOCLIKELYSUBTAGS_H__
diff --git a/thirdparty/icu4c/common/locmap.cpp b/thirdparty/icu4c/common/locmap.cpp
new file mode 100644
index 0000000000..29a5646385
--- /dev/null
+++ b/thirdparty/icu4c/common/locmap.cpp
@@ -0,0 +1,1315 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ **********************************************************************
+ * Copyright (C) 1996-2016, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ *
+ * Provides functionality for mapping between
+ * LCID and Posix IDs or ICU locale to codepage
+ *
+ * Note: All classes and code in this file are
+ * intended for internal use only.
+ *
+ * Methods of interest:
+ * unsigned long convertToLCID(const char*);
+ * const char* convertToPosix(unsigned long);
+ *
+ * Kathleen Wilson, 4/30/96
+ *
+ * Date Name Description
+ * 3/11/97 aliu Fixed off-by-one bug in assignment operator. Added
+ * setId() method and safety check against
+ * MAX_ID_LENGTH.
+ * 04/23/99 stephen Added C wrapper for convertToPosix.
+ * 09/18/00 george Removed the memory leaks.
+ * 08/23/01 george Convert to C
+ */
+
+#include "locmap.h"
+#include "bytesinkutil.h"
+#include "charstr.h"
+#include "cstring.h"
+#include "cmemory.h"
+#include "ulocimp.h"
+#include "unicode/uloc.h"
+
+#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
+#include <windows.h>
+#include <winnls.h> // LCIDToLocaleName and LocaleNameToLCID
+#endif
+
+/*
+ * Note:
+ * The mapping from Win32 locale ID numbers to POSIX locale strings should
+ * be the faster one.
+ *
+ * Windows LCIDs are defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
+ * [MS-LCID] Windows Language Code Identifier (LCID) Reference
+ */
+
+/*
+////////////////////////////////////////////////
+//
+// Internal Classes for LCID <--> POSIX Mapping
+//
+/////////////////////////////////////////////////
+*/
+
+typedef struct ILcidPosixElement
+{
+ const uint32_t hostID;
+ const char * const posixID;
+} ILcidPosixElement;
+
+typedef struct ILcidPosixMap
+{
+ const uint32_t numRegions;
+ const struct ILcidPosixElement* const regionMaps;
+} ILcidPosixMap;
+
+
+/*
+/////////////////////////////////////////////////
+//
+// Easy macros to make the LCID <--> POSIX Mapping
+//
+/////////////////////////////////////////////////
+*/
+
+/**
+ * The standard one language/one country mapping for LCID.
+ * The first element must be the language, and the following
+ * elements are the language with the country.
+ * @param hostID LCID in host format such as 0x044d
+ * @param languageID posix ID of just the language such as 'de'
+ * @param posixID posix ID of the language_TERRITORY such as 'de_CH'
+ */
+#define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
+static const ILcidPosixElement locmap_ ## languageID [] = { \
+ {LANGUAGE_LCID(hostID), #languageID}, /* parent locale */ \
+ {hostID, #posixID}, \
+};
+
+/**
+ * Define a subtable by ID
+ * @param id the POSIX ID, either a language or language_TERRITORY
+ */
+#define ILCID_POSIX_SUBTABLE(id) \
+static const ILcidPosixElement locmap_ ## id [] =
+
+
+/**
+ * Create the map for the posixID. This macro supposes that the language string
+ * name is the same as the global variable name, and that the first element
+ * in the ILcidPosixElement is just the language.
+ * @param _posixID the full POSIX ID for this entry.
+ */
+#define ILCID_POSIX_MAP(_posixID) \
+ {UPRV_LENGTHOF(locmap_ ## _posixID), locmap_ ## _posixID}
+
+/*
+////////////////////////////////////////////
+//
+// Create the table of LCID to POSIX Mapping
+// None of it should be dynamically created.
+//
+// Keep static locale variables inside the function so that
+// it can be created properly during static init.
+//
+// Note: This table should be updated periodically. Check the [MS-LCID] Windows Language Code Identifier
+// (LCID) Reference defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
+//
+// Microsoft is moving away from LCID in favor of locale name as of Vista. This table needs to be
+// maintained for support of older Windows version.
+// Update: Windows 7 (091130)
+//
+// Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain
+// @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is
+// called from uloc_getLCID(), keywords other than collation are already removed. If we really need
+// to support other keywords in this mapping data, we must update the implementation.
+////////////////////////////////////////////
+*/
+
+// TODO: For Windows ideally this table would be a list of exceptions rather than a complete list as
+// LocaleNameToLCID and LCIDToLocaleName provide 90% of these.
+
+ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)
+
+ILCID_POSIX_SUBTABLE(ar) {
+ {0x01, "ar"},
+ {0x3801, "ar_AE"},
+ {0x3c01, "ar_BH"},
+ {0x1401, "ar_DZ"},
+ {0x0c01, "ar_EG"},
+ {0x0801, "ar_IQ"},
+ {0x2c01, "ar_JO"},
+ {0x3401, "ar_KW"},
+ {0x3001, "ar_LB"},
+ {0x1001, "ar_LY"},
+ {0x1801, "ar_MA"},
+ {0x1801, "ar_MO"},
+ {0x2001, "ar_OM"},
+ {0x4001, "ar_QA"},
+ {0x0401, "ar_SA"},
+ {0x2801, "ar_SY"},
+ {0x1c01, "ar_TN"},
+ {0x2401, "ar_YE"}
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)
+ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET)
+ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL)
+
+ILCID_POSIX_SUBTABLE(az) {
+ {0x2c, "az"},
+ {0x082c, "az_Cyrl_AZ"}, /* Cyrillic based */
+ {0x742c, "az_Cyrl"}, /* Cyrillic based */
+ {0x042c, "az_Latn_AZ"}, /* Latin based */
+ {0x782c, "az_Latn"}, /* Latin based */
+ {0x042c, "az_AZ"} /* Latin based */
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU)
+ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
+
+/*ILCID_POSIX_SUBTABLE(ber) {
+ {0x5f, "ber"},
+ {0x045f, "ber_Arab_DZ"},
+ {0x045f, "ber_Arab"},
+ {0x085f, "ber_Latn_DZ"},
+ {0x085f, "ber_Latn"}
+};*/
+
+ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
+
+ILCID_POSIX_SUBTABLE(bin) {
+ {0x66, "bin"},
+ {0x0466, "bin_NG"}
+};
+
+ILCID_POSIX_SUBTABLE(bn) {
+ {0x45, "bn"},
+ {0x0845, "bn_BD"},
+ {0x0445, "bn_IN"}
+};
+
+ILCID_POSIX_SUBTABLE(bo) {
+ {0x51, "bo"},
+ {0x0851, "bo_BT"},
+ {0x0451, "bo_CN"},
+ {0x0c51, "dz_BT"}
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR)
+
+ILCID_POSIX_SUBTABLE(ca) {
+ {0x03, "ca"},
+ {0x0403, "ca_ES"},
+ {0x0803, "ca_ES_VALENCIA"}
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)
+
+ILCID_POSIX_SUBTABLE(chr) {
+ {0x05c, "chr"},
+ {0x7c5c, "chr_Cher"},
+ {0x045c, "chr_Cher_US"},
+ {0x045c, "chr_US"}
+};
+
+// ICU has chosen different names for these.
+ILCID_POSIX_SUBTABLE(ckb) {
+ {0x92, "ckb"},
+ {0x7c92, "ckb_Arab"},
+ {0x0492, "ckb_Arab_IQ"}
+};
+
+/* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
+ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ)
+
+ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB)
+ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)
+
+// Windows doesn't know POSIX or BCP47 Unicode phonebook sort names
+ILCID_POSIX_SUBTABLE(de) {
+ {0x07, "de"},
+ {0x0c07, "de_AT"},
+ {0x0807, "de_CH"},
+ {0x0407, "de_DE"},
+ {0x1407, "de_LI"},
+ {0x1007, "de_LU"},
+ {0x10407,"de_DE@collation=phonebook"}, /*This is really de_DE_PHONEBOOK on Windows*/
+ {0x10407,"de@collation=phonebook"} /*This is really de_DE_PHONEBOOK on Windows*/
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
+ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)
+
+// Windows uses an empty string for 'invariant'
+ILCID_POSIX_SUBTABLE(en) {
+ {0x09, "en"},
+ {0x0c09, "en_AU"},
+ {0x2809, "en_BZ"},
+ {0x1009, "en_CA"},
+ {0x0809, "en_GB"},
+ {0x3c09, "en_HK"},
+ {0x3809, "en_ID"},
+ {0x1809, "en_IE"},
+ {0x4009, "en_IN"},
+ {0x2009, "en_JM"},
+ {0x4409, "en_MY"},
+ {0x1409, "en_NZ"},
+ {0x3409, "en_PH"},
+ {0x4809, "en_SG"},
+ {0x2C09, "en_TT"},
+ {0x0409, "en_US"},
+ {0x007f, "en_US_POSIX"}, /* duplicate for round-tripping */
+ {0x2409, "en_029"},
+ {0x1c09, "en_ZA"},
+ {0x3009, "en_ZW"},
+ {0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). On Windows8+ This is 0x1000 or dynamically assigned */
+ {0x0409, "en_AS"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
+ {0x0409, "en_GU"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
+ {0x0409, "en_MH"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
+ {0x0409, "en_MP"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
+ {0x0409, "en_UM"} /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
+};
+
+ILCID_POSIX_SUBTABLE(en_US_POSIX) {
+ {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
+};
+
+// Windows doesn't know POSIX or BCP47 Unicode traditional sort names
+ILCID_POSIX_SUBTABLE(es) {
+ {0x0a, "es"},
+ {0x2c0a, "es_AR"},
+ {0x400a, "es_BO"},
+ {0x340a, "es_CL"},
+ {0x240a, "es_CO"},
+ {0x140a, "es_CR"},
+ {0x5c0a, "es_CU"},
+ {0x1c0a, "es_DO"},
+ {0x300a, "es_EC"},
+ {0x0c0a, "es_ES"}, /*Modern sort.*/
+ {0x100a, "es_GT"},
+ {0x480a, "es_HN"},
+ {0x080a, "es_MX"},
+ {0x4c0a, "es_NI"},
+ {0x180a, "es_PA"},
+ {0x280a, "es_PE"},
+ {0x500a, "es_PR"},
+ {0x3c0a, "es_PY"},
+ {0x440a, "es_SV"},
+ {0x540a, "es_US"},
+ {0x380a, "es_UY"},
+ {0x200a, "es_VE"},
+ {0x580a, "es_419"},
+ {0x040a, "es_ES@collation=traditional"},
+ {0x040a, "es@collation=traditional"} // Windows will treat this as es-ES@collation=traditional
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
+ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)
+
+/* ISO-639 doesn't distinguish between Persian and Dari.*/
+ILCID_POSIX_SUBTABLE(fa) {
+ {0x29, "fa"},
+ {0x0429, "fa_IR"}, /* Persian/Farsi (Iran) */
+ {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
+};
+
+
+/* duplicate for roundtripping */
+ILCID_POSIX_SUBTABLE(fa_AF) {
+ {0x8c, "fa_AF"}, /* Persian/Dari (Afghanistan) */
+ {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
+};
+
+ILCID_POSIX_SUBTABLE(ff) {
+ {0x67, "ff"},
+ {0x7c67, "ff_Latn"},
+ {0x0867, "ff_Latn_SN"},
+ {0x0467, "ff_NG"}
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
+ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH)
+ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)
+
+ILCID_POSIX_SUBTABLE(fr) {
+ {0x0c, "fr"},
+ {0x080c, "fr_BE"},
+ {0x0c0c, "fr_CA"},
+ {0x240c, "fr_CD"},
+ {0x240c, "fr_CG"},
+ {0x100c, "fr_CH"},
+ {0x300c, "fr_CI"},
+ {0x2c0c, "fr_CM"},
+ {0x040c, "fr_FR"},
+ {0x3c0c, "fr_HT"},
+ {0x140c, "fr_LU"},
+ {0x380c, "fr_MA"},
+ {0x180c, "fr_MC"},
+ {0x340c, "fr_ML"},
+ {0x200c, "fr_RE"},
+ {0x280c, "fr_SN"},
+ {0xe40c, "fr_015"},
+ {0x1c0c, "fr_029"}
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv, fuv_NG)
+
+ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL)
+
+ILCID_POSIX_SUBTABLE(ga) { /* Gaelic (Ireland) */
+ {0x3c, "ga"},
+ {0x083c, "ga_IE"},
+ {0x043c, "gd_GB"}
+};
+
+ILCID_POSIX_SUBTABLE(gd) { /* Gaelic (Scotland) */
+ {0x91, "gd"},
+ {0x0491, "gd_GB"}
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
+ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
+ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY)
+ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR)
+
+ILCID_POSIX_SUBTABLE(ha) {
+ {0x68, "ha"},
+ {0x7c68, "ha_Latn"},
+ {0x0468, "ha_Latn_NG"},
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US)
+ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
+ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)
+
+/* This LCID is really four different locales.*/
+ILCID_POSIX_SUBTABLE(hr) {
+ {0x1a, "hr"},
+ {0x141a, "bs_Latn_BA"}, /* Bosnian, Bosnia and Herzegovina */
+ {0x681a, "bs_Latn"}, /* Bosnian, Bosnia and Herzegovina */
+ {0x141a, "bs_BA"}, /* Bosnian, Bosnia and Herzegovina */
+ {0x781a, "bs"}, /* Bosnian */
+ {0x201a, "bs_Cyrl_BA"}, /* Bosnian, Bosnia and Herzegovina */
+ {0x641a, "bs_Cyrl"}, /* Bosnian, Bosnia and Herzegovina */
+ {0x101a, "hr_BA"}, /* Croatian in Bosnia */
+ {0x041a, "hr_HR"}, /* Croatian*/
+ {0x2c1a, "sr_Latn_ME"},
+ {0x241a, "sr_Latn_RS"},
+ {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
+ {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
+ {0x701a, "sr_Latn"}, /* It's 0x1a or 0x081a, pick one to make the test program happy. */
+ {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
+ {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
+ {0x301a, "sr_Cyrl_ME"},
+ {0x281a, "sr_Cyrl_RS"},
+ {0x6c1a, "sr_Cyrl"}, /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
+ {0x7c1a, "sr"} /* In CLDR sr is sr_Cyrl. */
+};
+
+ILCID_POSIX_SUBTABLE(hsb) {
+ {0x2E, "hsb"},
+ {0x042E, "hsb_DE"},
+ {0x082E, "dsb_DE"},
+ {0x7C2E, "dsb"},
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
+ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
+
+ILCID_POSIX_SUBTABLE(ibb) {
+ {0x69, "ibb"},
+ {0x0469, "ibb_NG"}
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
+ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
+ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
+ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)
+
+ILCID_POSIX_SUBTABLE(it) {
+ {0x10, "it"},
+ {0x0810, "it_CH"},
+ {0x0410, "it_IT"}
+};
+
+ILCID_POSIX_SUBTABLE(iu) {
+ {0x5d, "iu"},
+ {0x045d, "iu_Cans_CA"},
+ {0x785d, "iu_Cans"},
+ {0x085d, "iu_Latn_CA"},
+ {0x7c5d, "iu_Latn"}
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL) /*Left in for compatibility*/
+ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
+ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
+ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
+ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL)
+ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH)
+ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)
+
+ILCID_POSIX_SUBTABLE(ko) {
+ {0x12, "ko"},
+ {0x0812, "ko_KP"},
+ {0x0412, "ko_KR"}
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
+ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr, kr_NG)
+
+ILCID_POSIX_SUBTABLE(ks) { /* We could add PK and CN too */
+ {0x60, "ks"},
+ {0x0460, "ks_Arab_IN"},
+ {0x0860, "ks_Deva_IN"}
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG) /* Kyrgyz is spoken in Kyrgyzstan */
+
+ILCID_POSIX_SUBTABLE(la) {
+ {0x76, "la"},
+ {0x0476, "la_001"},
+ {0x0476, "la_IT"} /*Left in for compatibility*/
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
+ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
+ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
+ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)
+ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ)
+ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)
+ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN)
+
+ILCID_POSIX_SUBTABLE(mn) {
+ {0x50, "mn"},
+ {0x0450, "mn_MN"},
+ {0x7c50, "mn_Mong"},
+ {0x0850, "mn_Mong_CN"},
+ {0x0850, "mn_CN"},
+ {0x7850, "mn_Cyrl"},
+ {0x0c50, "mn_Mong_MN"}
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN)
+ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA)
+ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)
+
+ILCID_POSIX_SUBTABLE(ms) {
+ {0x3e, "ms"},
+ {0x083e, "ms_BN"}, /* Brunei Darussalam*/
+ {0x043e, "ms_MY"} /* Malaysia*/
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
+ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM)
+
+ILCID_POSIX_SUBTABLE(ne) {
+ {0x61, "ne"},
+ {0x0861, "ne_IN"}, /* India*/
+ {0x0461, "ne_NP"} /* Nepal*/
+};
+
+ILCID_POSIX_SUBTABLE(nl) {
+ {0x13, "nl"},
+ {0x0813, "nl_BE"},
+ {0x0413, "nl_NL"}
+};
+
+/* The "no" locale split into nb and nn. By default in ICU, "no" is nb.*/
+// TODO: Not all of these are needed on Windows, but I don't know how ICU treats preferred ones here.
+ILCID_POSIX_SUBTABLE(no) {
+ {0x14, "no"}, /* really nb_NO - actually Windows differentiates between neutral (no region) and specific (with region) */
+ {0x7c14, "nb"}, /* really nb */
+ {0x0414, "nb_NO"}, /* really nb_NO. Keep first in the 414 list. */
+ {0x0414, "no_NO"}, /* really nb_NO */
+ {0x0814, "nn_NO"}, /* really nn_NO. Keep first in the 814 list. */
+ {0x7814, "nn"}, /* It's 0x14 or 0x814, pick one to make the test program happy. */
+ {0x0814, "no_NO_NY"}/* really nn_NO */
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA) /* TODO: Verify the ISO-639 code */
+ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR)
+
+ILCID_POSIX_SUBTABLE(om) { /* TODO: Verify the country */
+ {0x72, "om"},
+ {0x0472, "om_ET"},
+ {0x0472, "gaz_ET"}
+};
+
+/* Declared as or_IN to get around compiler errors*/
+ILCID_POSIX_SUBTABLE(or_IN) {
+ {0x48, "or"},
+ {0x0448, "or_IN"},
+};
+
+ILCID_POSIX_SUBTABLE(pa) {
+ {0x46, "pa"},
+ {0x0446, "pa_IN"},
+ {0x0846, "pa_Arab_PK"},
+ {0x0846, "pa_PK"}
+};
+
+ILCID_POSIX_SUBTABLE(pap) {
+ {0x79, "pap"},
+ {0x0479, "pap_029"},
+ {0x0479, "pap_AN"} /*Left in for compatibility*/
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
+ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
+
+ILCID_POSIX_SUBTABLE(pt) {
+ {0x16, "pt"},
+ {0x0416, "pt_BR"},
+ {0x0816, "pt_PT"}
+};
+
+ILCID_POSIX_SUBTABLE(qu) {
+ {0x6b, "qu"},
+ {0x046b, "qu_BO"},
+ {0x086b, "qu_EC"},
+ {0x0C6b, "qu_PE"},
+ {0x046b, "quz_BO"},
+ {0x086b, "quz_EC"},
+ {0x0C6b, "quz_PE"}
+};
+
+ILCID_POSIX_SUBTABLE(quc) {
+ {0x93, "quc"},
+ {0x0493, "quc_CO"},
+ /*
+ "quc_Latn_GT" is an exceptional case. Language ID of "quc"
+ is 0x93, but LCID of "quc_Latn_GT" is 0x486, which should be
+ under the group of "qut". "qut" is a retired ISO 639-3 language
+ code for West Central Quiche, and merged to "quc".
+ It looks Windows previously reserved "qut" for K'iche', but,
+ decided to use "quc" when adding a locale for K'iche' (Guatemala).
+
+ This data structure used here assumes language ID bits in
+ LCID is unique for alphabetic language code. But this is not true
+ for "quc_Latn_GT". If we don't have the data below, LCID look up
+ by alphabetic locale ID (POSIX) will fail. The same entry is found
+ under "qut" below, which is required for reverse look up.
+ */
+ {0x0486, "quc_Latn_GT"}
+};
+
+ILCID_POSIX_SUBTABLE(qut) {
+ {0x86, "qut"},
+ {0x0486, "qut_GT"},
+ /*
+ See the note in "quc" above.
+ */
+ {0x0486, "quc_Latn_GT"}
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH)
+
+ILCID_POSIX_SUBTABLE(ro) {
+ {0x18, "ro"},
+ {0x0418, "ro_RO"},
+ {0x0818, "ro_MD"}
+};
+
+// TODO: This is almost certainly 'wrong'. 0 in Windows is a synonym for LOCALE_USER_DEFAULT.
+// More likely this is a similar concept to the Windows 0x7f Invariant locale ""
+// (Except that it's not invariant in ICU)
+ILCID_POSIX_SUBTABLE(root) {
+ {0x00, "root"}
+};
+
+ILCID_POSIX_SUBTABLE(ru) {
+ {0x19, "ru"},
+ {0x0419, "ru_RU"},
+ {0x0819, "ru_MD"}
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW)
+ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
+ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)
+
+ILCID_POSIX_SUBTABLE(sd) {
+ {0x59, "sd"},
+ {0x0459, "sd_Deva_IN"},
+ {0x0459, "sd_IN"},
+ {0x0859, "sd_Arab_PK"},
+ {0x0859, "sd_PK"},
+ {0x7c59, "sd_Arab"}
+};
+
+ILCID_POSIX_SUBTABLE(se) {
+ {0x3b, "se"},
+ {0x0c3b, "se_FI"},
+ {0x043b, "se_NO"},
+ {0x083b, "se_SE"},
+ {0x783b, "sma"},
+ {0x183b, "sma_NO"},
+ {0x1c3b, "sma_SE"},
+ {0x7c3b, "smj"},
+ {0x703b, "smn"},
+ {0x743b, "sms"},
+ {0x103b, "smj_NO"},
+ {0x143b, "smj_SE"},
+ {0x243b, "smn_FI"},
+ {0x203b, "sms_FI"},
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
+ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
+ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
+
+ILCID_POSIX_SUBTABLE(so) {
+ {0x77, "so"},
+ {0x0477, "so_SO"}
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
+ILCID_POSIX_ELEMENT_ARRAY(0x0430, st, st_ZA)
+
+ILCID_POSIX_SUBTABLE(sv) {
+ {0x1d, "sv"},
+ {0x081d, "sv_FI"},
+ {0x041d, "sv_SE"}
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
+ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)
+
+ILCID_POSIX_SUBTABLE(ta) {
+ {0x49, "ta"},
+ {0x0449, "ta_IN"},
+ {0x0849, "ta_LK"}
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)
+
+/* Cyrillic based by default */
+ILCID_POSIX_SUBTABLE(tg) {
+ {0x28, "tg"},
+ {0x7c28, "tg_Cyrl"},
+ {0x0428, "tg_Cyrl_TJ"}
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)
+
+ILCID_POSIX_SUBTABLE(ti) {
+ {0x73, "ti"},
+ {0x0873, "ti_ER"},
+ {0x0473, "ti_ET"}
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM)
+
+ILCID_POSIX_SUBTABLE(tn) {
+ {0x32, "tn"},
+ {0x0832, "tn_BW"},
+ {0x0432, "tn_ZA"}
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
+ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts, ts_ZA)
+ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)
+
+ILCID_POSIX_SUBTABLE(tzm) {
+ {0x5f, "tzm"},
+ {0x7c5f, "tzm_Latn"},
+ {0x085f, "tzm_Latn_DZ"},
+ {0x105f, "tzm_Tfng_MA"},
+ {0x045f, "tzm_Arab_MA"},
+ {0x045f, "tmz"}
+};
+
+ILCID_POSIX_SUBTABLE(ug) {
+ {0x80, "ug"},
+ {0x0480, "ug_CN"},
+ {0x0480, "ug_Arab_CN"}
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)
+
+ILCID_POSIX_SUBTABLE(ur) {
+ {0x20, "ur"},
+ {0x0820, "ur_IN"},
+ {0x0420, "ur_PK"}
+};
+
+ILCID_POSIX_SUBTABLE(uz) {
+ {0x43, "uz"},
+ {0x0843, "uz_Cyrl_UZ"}, /* Cyrillic based */
+ {0x7843, "uz_Cyrl"}, /* Cyrillic based */
+ {0x0843, "uz_UZ"}, /* Cyrillic based */
+ {0x0443, "uz_Latn_UZ"}, /* Latin based */
+ {0x7c43, "uz_Latn"} /* Latin based */
+};
+
+ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */
+ {0x33, "ve"},
+ {0x0433, "ve_ZA"},
+ {0x0433, "ven_ZA"}
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
+ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
+ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)
+
+ILCID_POSIX_SUBTABLE(yi) {
+ {0x003d, "yi"},
+ {0x043d, "yi_001"}
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)
+
+// Windows & ICU tend to different names for some of these
+// TODO: Windows probably does not need all of these entries, but I don't know how the precedence works.
+ILCID_POSIX_SUBTABLE(zh) {
+ {0x0004, "zh_Hans"},
+ {0x7804, "zh"},
+ {0x0804, "zh_CN"},
+ {0x0804, "zh_Hans_CN"},
+ {0x0c04, "zh_Hant_HK"},
+ {0x0c04, "zh_HK"},
+ {0x1404, "zh_Hant_MO"},
+ {0x1404, "zh_MO"},
+ {0x1004, "zh_Hans_SG"},
+ {0x1004, "zh_SG"},
+ {0x0404, "zh_Hant_TW"},
+ {0x7c04, "zh_Hant"},
+ {0x0404, "zh_TW"},
+ {0x30404,"zh_Hant_TW"}, /* Bopomofo order */
+ {0x30404,"zh_TW"}, /* Bopomofo order */
+ {0x20004,"zh@collation=stroke"},
+ {0x20404,"zh_Hant@collation=stroke"},
+ {0x20404,"zh_Hant_TW@collation=stroke"},
+ {0x20404,"zh_TW@collation=stroke"},
+ {0x20804,"zh_Hans@collation=stroke"},
+ {0x20804,"zh_Hans_CN@collation=stroke"},
+ {0x20804,"zh_CN@collation=stroke"}
+ // TODO: Alternate collations for other LCIDs are missing, eg: 0x50804
+};
+
+ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)
+
+/* This must be static and grouped by LCID. */
+static const ILcidPosixMap gPosixIDmap[] = {
+ ILCID_POSIX_MAP(af), /* af Afrikaans 0x36 */
+ ILCID_POSIX_MAP(am), /* am Amharic 0x5e */
+ ILCID_POSIX_MAP(ar), /* ar Arabic 0x01 */
+ ILCID_POSIX_MAP(arn), /* arn Araucanian/Mapudungun 0x7a */
+ ILCID_POSIX_MAP(as), /* as Assamese 0x4d */
+ ILCID_POSIX_MAP(az), /* az Azerbaijani 0x2c */
+ ILCID_POSIX_MAP(ba), /* ba Bashkir 0x6d */
+ ILCID_POSIX_MAP(be), /* be Belarusian 0x23 */
+/* ILCID_POSIX_MAP(ber), ber Berber/Tamazight 0x5f */
+ ILCID_POSIX_MAP(bg), /* bg Bulgarian 0x02 */
+ ILCID_POSIX_MAP(bin), /* bin Edo 0x66 */
+ ILCID_POSIX_MAP(bn), /* bn Bengali; Bangla 0x45 */
+ ILCID_POSIX_MAP(bo), /* bo Tibetan 0x51 */
+ ILCID_POSIX_MAP(br), /* br Breton 0x7e */
+ ILCID_POSIX_MAP(ca), /* ca Catalan 0x03 */
+ ILCID_POSIX_MAP(chr), /* chr Cherokee 0x5c */
+ ILCID_POSIX_MAP(ckb), /* ckb Sorani (Central Kurdish) 0x92 */
+ ILCID_POSIX_MAP(co), /* co Corsican 0x83 */
+ ILCID_POSIX_MAP(cs), /* cs Czech 0x05 */
+ ILCID_POSIX_MAP(cy), /* cy Welsh 0x52 */
+ ILCID_POSIX_MAP(da), /* da Danish 0x06 */
+ ILCID_POSIX_MAP(de), /* de German 0x07 */
+ ILCID_POSIX_MAP(dv), /* dv Divehi 0x65 */
+ ILCID_POSIX_MAP(el), /* el Greek 0x08 */
+ ILCID_POSIX_MAP(en), /* en English 0x09 */
+ ILCID_POSIX_MAP(en_US_POSIX), /* invariant 0x7f */
+ ILCID_POSIX_MAP(es), /* es Spanish 0x0a */
+ ILCID_POSIX_MAP(et), /* et Estonian 0x25 */
+ ILCID_POSIX_MAP(eu), /* eu Basque 0x2d */
+ ILCID_POSIX_MAP(fa), /* fa Persian/Farsi 0x29 */
+ ILCID_POSIX_MAP(fa_AF), /* fa Persian/Dari 0x8c */
+ ILCID_POSIX_MAP(ff), /* ff Fula 0x67 */
+ ILCID_POSIX_MAP(fi), /* fi Finnish 0x0b */
+ ILCID_POSIX_MAP(fil), /* fil Filipino 0x64 */
+ ILCID_POSIX_MAP(fo), /* fo Faroese 0x38 */
+ ILCID_POSIX_MAP(fr), /* fr French 0x0c */
+ ILCID_POSIX_MAP(fuv), /* fuv Fulfulde - Nigeria 0x67 */
+ ILCID_POSIX_MAP(fy), /* fy Frisian 0x62 */
+ ILCID_POSIX_MAP(ga), /* * Gaelic (Ireland,Scotland) 0x3c */
+ ILCID_POSIX_MAP(gd), /* gd Gaelic (United Kingdom) 0x91 */
+ ILCID_POSIX_MAP(gl), /* gl Galician 0x56 */
+ ILCID_POSIX_MAP(gn), /* gn Guarani 0x74 */
+ ILCID_POSIX_MAP(gsw), /* gsw Alemanic/Alsatian/Swiss German 0x84 */
+ ILCID_POSIX_MAP(gu), /* gu Gujarati 0x47 */
+ ILCID_POSIX_MAP(ha), /* ha Hausa 0x68 */
+ ILCID_POSIX_MAP(haw), /* haw Hawaiian 0x75 */
+ ILCID_POSIX_MAP(he), /* he Hebrew (formerly iw) 0x0d */
+ ILCID_POSIX_MAP(hi), /* hi Hindi 0x39 */
+ ILCID_POSIX_MAP(hr), /* * Croatian and others 0x1a */
+ ILCID_POSIX_MAP(hsb), /* hsb Upper Sorbian 0x2e */
+ ILCID_POSIX_MAP(hu), /* hu Hungarian 0x0e */
+ ILCID_POSIX_MAP(hy), /* hy Armenian 0x2b */
+ ILCID_POSIX_MAP(ibb), /* ibb Ibibio - Nigeria 0x69 */
+ ILCID_POSIX_MAP(id), /* id Indonesian (formerly in) 0x21 */
+ ILCID_POSIX_MAP(ig), /* ig Igbo 0x70 */
+ ILCID_POSIX_MAP(ii), /* ii Sichuan Yi 0x78 */
+ ILCID_POSIX_MAP(is), /* is Icelandic 0x0f */
+ ILCID_POSIX_MAP(it), /* it Italian 0x10 */
+ ILCID_POSIX_MAP(iu), /* iu Inuktitut 0x5d */
+ ILCID_POSIX_MAP(iw), /* iw Hebrew 0x0d */
+ ILCID_POSIX_MAP(ja), /* ja Japanese 0x11 */
+ ILCID_POSIX_MAP(ka), /* ka Georgian 0x37 */
+ ILCID_POSIX_MAP(kk), /* kk Kazakh 0x3f */
+ ILCID_POSIX_MAP(kl), /* kl Kalaallisut 0x6f */
+ ILCID_POSIX_MAP(km), /* km Khmer 0x53 */
+ ILCID_POSIX_MAP(kn), /* kn Kannada 0x4b */
+ ILCID_POSIX_MAP(ko), /* ko Korean 0x12 */
+ ILCID_POSIX_MAP(kok), /* kok Konkani 0x57 */
+ ILCID_POSIX_MAP(kr), /* kr Kanuri 0x71 */
+ ILCID_POSIX_MAP(ks), /* ks Kashmiri 0x60 */
+ ILCID_POSIX_MAP(ky), /* ky Kyrgyz 0x40 */
+ ILCID_POSIX_MAP(lb), /* lb Luxembourgish 0x6e */
+ ILCID_POSIX_MAP(la), /* la Latin 0x76 */
+ ILCID_POSIX_MAP(lo), /* lo Lao 0x54 */
+ ILCID_POSIX_MAP(lt), /* lt Lithuanian 0x27 */
+ ILCID_POSIX_MAP(lv), /* lv Latvian, Lettish 0x26 */
+ ILCID_POSIX_MAP(mi), /* mi Maori 0x81 */
+ ILCID_POSIX_MAP(mk), /* mk Macedonian 0x2f */
+ ILCID_POSIX_MAP(ml), /* ml Malayalam 0x4c */
+ ILCID_POSIX_MAP(mn), /* mn Mongolian 0x50 */
+ ILCID_POSIX_MAP(mni), /* mni Manipuri 0x58 */
+ ILCID_POSIX_MAP(moh), /* moh Mohawk 0x7c */
+ ILCID_POSIX_MAP(mr), /* mr Marathi 0x4e */
+ ILCID_POSIX_MAP(ms), /* ms Malay 0x3e */
+ ILCID_POSIX_MAP(mt), /* mt Maltese 0x3a */
+ ILCID_POSIX_MAP(my), /* my Burmese 0x55 */
+/* ILCID_POSIX_MAP(nb), // no Norwegian 0x14 */
+ ILCID_POSIX_MAP(ne), /* ne Nepali 0x61 */
+ ILCID_POSIX_MAP(nl), /* nl Dutch 0x13 */
+/* ILCID_POSIX_MAP(nn), // no Norwegian 0x14 */
+ ILCID_POSIX_MAP(no), /* * Norwegian 0x14 */
+ ILCID_POSIX_MAP(nso), /* nso Sotho, Northern (Sepedi dialect) 0x6c */
+ ILCID_POSIX_MAP(oc), /* oc Occitan 0x82 */
+ ILCID_POSIX_MAP(om), /* om Oromo 0x72 */
+ ILCID_POSIX_MAP(or_IN), /* or Oriya 0x48 */
+ ILCID_POSIX_MAP(pa), /* pa Punjabi 0x46 */
+ ILCID_POSIX_MAP(pap), /* pap Papiamentu 0x79 */
+ ILCID_POSIX_MAP(pl), /* pl Polish 0x15 */
+ ILCID_POSIX_MAP(ps), /* ps Pashto 0x63 */
+ ILCID_POSIX_MAP(pt), /* pt Portuguese 0x16 */
+ ILCID_POSIX_MAP(qu), /* qu Quechua 0x6B */
+ ILCID_POSIX_MAP(quc), /* quc K'iche 0x93 */
+ ILCID_POSIX_MAP(qut), /* qut K'iche 0x86 */
+ ILCID_POSIX_MAP(rm), /* rm Raeto-Romance/Romansh 0x17 */
+ ILCID_POSIX_MAP(ro), /* ro Romanian 0x18 */
+ ILCID_POSIX_MAP(root), /* root 0x00 */
+ ILCID_POSIX_MAP(ru), /* ru Russian 0x19 */
+ ILCID_POSIX_MAP(rw), /* rw Kinyarwanda 0x87 */
+ ILCID_POSIX_MAP(sa), /* sa Sanskrit 0x4f */
+ ILCID_POSIX_MAP(sah), /* sah Yakut 0x85 */
+ ILCID_POSIX_MAP(sd), /* sd Sindhi 0x59 */
+ ILCID_POSIX_MAP(se), /* se Sami 0x3b */
+/* ILCID_POSIX_MAP(sh), // sh Serbo-Croatian 0x1a */
+ ILCID_POSIX_MAP(si), /* si Sinhalese 0x5b */
+ ILCID_POSIX_MAP(sk), /* sk Slovak 0x1b */
+ ILCID_POSIX_MAP(sl), /* sl Slovenian 0x24 */
+ ILCID_POSIX_MAP(so), /* so Somali 0x77 */
+ ILCID_POSIX_MAP(sq), /* sq Albanian 0x1c */
+/* ILCID_POSIX_MAP(sr), // sr Serbian 0x1a */
+ ILCID_POSIX_MAP(st), /* st Sutu 0x30 */
+ ILCID_POSIX_MAP(sv), /* sv Swedish 0x1d */
+ ILCID_POSIX_MAP(sw), /* sw Swahili 0x41 */
+ ILCID_POSIX_MAP(syr), /* syr Syriac 0x5A */
+ ILCID_POSIX_MAP(ta), /* ta Tamil 0x49 */
+ ILCID_POSIX_MAP(te), /* te Telugu 0x4a */
+ ILCID_POSIX_MAP(tg), /* tg Tajik 0x28 */
+ ILCID_POSIX_MAP(th), /* th Thai 0x1e */
+ ILCID_POSIX_MAP(ti), /* ti Tigrigna 0x73 */
+ ILCID_POSIX_MAP(tk), /* tk Turkmen 0x42 */
+ ILCID_POSIX_MAP(tn), /* tn Tswana 0x32 */
+ ILCID_POSIX_MAP(tr), /* tr Turkish 0x1f */
+ ILCID_POSIX_MAP(ts), /* ts Tsonga 0x31 */
+ ILCID_POSIX_MAP(tt), /* tt Tatar 0x44 */
+ ILCID_POSIX_MAP(tzm), /* tzm Tamazight 0x5f */
+ ILCID_POSIX_MAP(ug), /* ug Uighur 0x80 */
+ ILCID_POSIX_MAP(uk), /* uk Ukrainian 0x22 */
+ ILCID_POSIX_MAP(ur), /* ur Urdu 0x20 */
+ ILCID_POSIX_MAP(uz), /* uz Uzbek 0x43 */
+ ILCID_POSIX_MAP(ve), /* ve Venda 0x33 */
+ ILCID_POSIX_MAP(vi), /* vi Vietnamese 0x2a */
+ ILCID_POSIX_MAP(wo), /* wo Wolof 0x88 */
+ ILCID_POSIX_MAP(xh), /* xh Xhosa 0x34 */
+ ILCID_POSIX_MAP(yi), /* yi Yiddish 0x3d */
+ ILCID_POSIX_MAP(yo), /* yo Yoruba 0x6a */
+ ILCID_POSIX_MAP(zh), /* zh Chinese 0x04 */
+ ILCID_POSIX_MAP(zu), /* zu Zulu 0x35 */
+};
+
+static const uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap);
+
+/**
+ * Do not call this function. It is called by hostID.
+ * The function is not private because this struct must stay as a C struct,
+ * and this is an internal class.
+ */
+static int32_t
+idCmp(const char* id1, const char* id2)
+{
+ int32_t diffIdx = 0;
+ while (*id1 == *id2 && *id1 != 0) {
+ diffIdx++;
+ id1++;
+ id2++;
+ }
+ return diffIdx;
+}
+
+/**
+ * Searches for a Windows LCID
+ *
+ * @param posixID the Posix style locale id.
+ * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
+ * no equivalent Windows LCID.
+ * @return the LCID
+ */
+static uint32_t
+getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
+{
+ int32_t bestIdx = 0;
+ int32_t bestIdxDiff = 0;
+ int32_t posixIDlen = (int32_t)uprv_strlen(posixID);
+ uint32_t idx;
+
+ for (idx = 0; idx < this_0->numRegions; idx++ ) {
+ int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);
+ if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) {
+ if (posixIDlen == sameChars) {
+ /* Exact match */
+ return this_0->regionMaps[idx].hostID;
+ }
+ bestIdxDiff = sameChars;
+ bestIdx = idx;
+ }
+ }
+ /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
+ /* We also have to make sure that sid and si and similar string subsets don't match. */
+ if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')
+ && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)
+ {
+ *status = U_USING_FALLBACK_WARNING;
+ return this_0->regionMaps[bestIdx].hostID;
+ }
+
+ /*no match found */
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return this_0->regionMaps->hostID;
+}
+
+static const char*
+getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
+{
+ uint32_t i;
+ for (i = 0; i < this_0->numRegions; i++)
+ {
+ if (this_0->regionMaps[i].hostID == hostID)
+ {
+ return this_0->regionMaps[i].posixID;
+ }
+ }
+
+ /* If you get here, then no matching region was found,
+ so return the language id with the wild card region. */
+ return this_0->regionMaps[0].posixID;
+}
+
+/*
+//////////////////////////////////////
+//
+// LCID --> POSIX
+//
+/////////////////////////////////////
+*/
+#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
+/*
+ * Various language tags needs to be changed:
+ * quz -> qu
+ * prs -> fa
+ */
+#define FIX_LANGUAGE_ID_TAG(buffer, len) \
+ if (len >= 3) { \
+ if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\
+ buffer[2] = 0; \
+ uprv_strcat(buffer, buffer+3); \
+ } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\
+ buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \
+ uprv_strcat(buffer, buffer+3); \
+ } \
+ }
+
+#endif
+
+U_CAPI int32_t
+uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status)
+{
+ uint16_t langID;
+ uint32_t localeIndex;
+ UBool bLookup = TRUE;
+ const char *pPosixID = NULL;
+
+#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
+ static_assert(ULOC_FULLNAME_CAPACITY > LOCALE_NAME_MAX_LENGTH, "Windows locale names have smaller length than ICU locale names.");
+
+ char locName[LOCALE_NAME_MAX_LENGTH] = {};
+
+ // Note: Windows primary lang ID 0x92 in LCID is used for Central Kurdish and
+ // GetLocaleInfo() maps such LCID to "ku". However, CLDR uses "ku" for
+ // Northern Kurdish and "ckb" for Central Kurdish. For this reason, we cannot
+ // use the Windows API to resolve locale ID for this specific case.
+ if ((hostid & 0x3FF) != 0x92) {
+ int32_t tmpLen = 0;
+ char16_t windowsLocaleName[LOCALE_NAME_MAX_LENGTH] = {};
+
+ // Note: LOCALE_ALLOW_NEUTRAL_NAMES was enabled in Windows7+, prior versions did not handle neutral (no-region) locale names.
+ tmpLen = LCIDToLocaleName(hostid, (PWSTR)windowsLocaleName, UPRV_LENGTHOF(windowsLocaleName), LOCALE_ALLOW_NEUTRAL_NAMES);
+ if (tmpLen > 1) {
+ int32_t i = 0;
+ // Only need to look up in table if have _, eg for de-de_phoneb type alternate sort.
+ bLookup = FALSE;
+ for (i = 0; i < UPRV_LENGTHOF(locName); i++)
+ {
+ locName[i] = (char)(windowsLocaleName[i]);
+
+ // Windows locale name may contain sorting variant, such as "es-ES_tradnl".
+ // In such cases, we need special mapping data found in the hardcoded table
+ // in this source file.
+ if (windowsLocaleName[i] == L'_')
+ {
+ // Keep the base locale, without variant
+ // TODO: Should these be mapped from _phoneb to @collation=phonebook, etc.?
+ locName[i] = '\0';
+ tmpLen = i;
+ bLookup = TRUE;
+ break;
+ }
+ else if (windowsLocaleName[i] == L'-')
+ {
+ // Windows names use -, ICU uses _
+ locName[i] = '_';
+ }
+ else if (windowsLocaleName[i] == L'\0')
+ {
+ // No point in doing more work than necessary
+ break;
+ }
+ }
+ // TODO: Need to understand this better, why isn't it an alias?
+ FIX_LANGUAGE_ID_TAG(locName, tmpLen);
+ pPosixID = locName;
+ }
+ }
+#endif
+
+ if (bLookup) {
+ const char *pCandidate = NULL;
+ langID = LANGUAGE_LCID(hostid);
+
+ for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) {
+ if (langID == gPosixIDmap[localeIndex].regionMaps->hostID) {
+ pCandidate = getPosixID(&gPosixIDmap[localeIndex], hostid);
+ break;
+ }
+ }
+
+ /* On Windows, when locale name has a variant, we still look up the hardcoded table.
+ If a match in the hardcoded table is longer than the Windows locale name without
+ variant, we use the one as the result */
+ if (pCandidate && (pPosixID == NULL || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) {
+ pPosixID = pCandidate;
+ }
+ }
+
+ if (pPosixID) {
+ int32_t resLen = static_cast<int32_t>(uprv_strlen(pPosixID));
+ int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity;
+ uprv_memcpy(posixID, pPosixID, copyLen);
+ if (resLen < posixIDCapacity) {
+ posixID[resLen] = 0;
+ if (*status == U_STRING_NOT_TERMINATED_WARNING) {
+ *status = U_ZERO_ERROR;
+ }
+ } else if (resLen == posixIDCapacity) {
+ *status = U_STRING_NOT_TERMINATED_WARNING;
+ } else {
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ }
+ return resLen;
+ }
+
+ /* no match found */
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return -1;
+}
+
+/*
+//////////////////////////////////////
+//
+// POSIX --> LCID
+// This should only be called from uloc_getLCID.
+// The locale ID must be in canonical form.
+//
+/////////////////////////////////////
+*/
+U_CAPI uint32_t
+uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status)
+{
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+
+ // The purpose of this function is to leverage the Windows platform name->lcid
+ // conversion functionality when available.
+#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
+ int32_t len;
+ char baseName[ULOC_FULLNAME_CAPACITY] = {};
+ const char * mylocaleID = localeID;
+
+ // Check any for keywords.
+ if (uprv_strchr(localeID, '@'))
+ {
+ icu::CharString collVal;
+ {
+ icu::CharStringByteSink sink(&collVal);
+ ulocimp_getKeywordValue(localeID, "collation", sink, status);
+ }
+ if (U_SUCCESS(*status) && !collVal.isEmpty())
+ {
+ // If it contains the keyword collation, return 0 so that the LCID lookup table will be used.
+ return 0;
+ }
+ else
+ {
+ // If the locale ID contains keywords other than collation, just use the base name.
+ len = uloc_getBaseName(localeID, baseName, UPRV_LENGTHOF(baseName) - 1, status);
+
+ if (U_SUCCESS(*status) && len > 0)
+ {
+ baseName[len] = 0;
+ mylocaleID = baseName;
+ }
+ }
+ }
+
+ char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {};
+ // this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form
+ (void)uloc_toLanguageTag(mylocaleID, asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), FALSE, status);
+
+ if (U_SUCCESS(*status))
+ {
+ // Need it to be UTF-16, not 8-bit
+ wchar_t bcp47Tag[LOCALE_NAME_MAX_LENGTH] = {};
+ int32_t i;
+ for (i = 0; i < UPRV_LENGTHOF(bcp47Tag); i++)
+ {
+ if (asciiBCP47Tag[i] == '\0')
+ {
+ break;
+ }
+ else
+ {
+ // Copy the character
+ bcp47Tag[i] = static_cast<wchar_t>(asciiBCP47Tag[i]);
+ }
+ }
+
+ if (i < (UPRV_LENGTHOF(bcp47Tag) - 1))
+ {
+ // Ensure it's null terminated
+ bcp47Tag[i] = L'\0';
+ LCID lcid = LocaleNameToLCID(bcp47Tag, LOCALE_ALLOW_NEUTRAL_NAMES);
+ if (lcid > 0)
+ {
+ // Found LCID from windows, return that one, unless its completely ambiguous
+ // LOCALE_USER_DEFAULT and transients are OK because they will round trip
+ // for this process.
+ if (lcid != LOCALE_CUSTOM_UNSPECIFIED)
+ {
+ return lcid;
+ }
+ }
+ }
+ }
+#else
+ (void) localeID; // Suppress unused variable warning.
+#endif
+
+ // Nothing found, or not implemented.
+ return 0;
+}
+
+U_CAPI uint32_t
+uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
+{
+ // This function does the table lookup when native platform name->lcid conversion isn't available,
+ // or for locales that don't follow patterns the platform expects.
+ uint32_t low = 0;
+ uint32_t high = gLocaleCount;
+ uint32_t mid;
+ uint32_t oldmid = 0;
+ int32_t compVal;
+
+ uint32_t value = 0;
+ uint32_t fallbackValue = (uint32_t)-1;
+ UErrorCode myStatus;
+ uint32_t idx;
+
+ /* Check for incomplete id. */
+ if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) {
+ return 0;
+ }
+
+ /*Binary search for the map entry for normal cases */
+
+ while (high > low) /*binary search*/{
+
+ mid = (high+low) >> 1; /*Finds median*/
+
+ if (mid == oldmid)
+ break;
+
+ compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);
+ if (compVal < 0){
+ high = mid;
+ }
+ else if (compVal > 0){
+ low = mid;
+ }
+ else /*we found it*/{
+ return getHostID(&gPosixIDmap[mid], posixID, status);
+ }
+ oldmid = mid;
+ }
+
+ /*
+ * Sometimes we can't do a binary search on posixID because some LCIDs
+ * go to different locales. We hit one of those special cases.
+ */
+ for (idx = 0; idx < gLocaleCount; idx++ ) {
+ myStatus = U_ZERO_ERROR;
+ value = getHostID(&gPosixIDmap[idx], posixID, &myStatus);
+ if (myStatus == U_ZERO_ERROR) {
+ return value;
+ }
+ else if (myStatus == U_USING_FALLBACK_WARNING) {
+ fallbackValue = value;
+ }
+ }
+
+ if (fallbackValue != (uint32_t)-1) {
+ *status = U_USING_FALLBACK_WARNING;
+ return fallbackValue;
+ }
+
+ /* no match found */
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0; /* return international (root) */
+}
diff --git a/thirdparty/icu4c/common/locmap.h b/thirdparty/icu4c/common/locmap.h
new file mode 100644
index 0000000000..e669873a14
--- /dev/null
+++ b/thirdparty/icu4c/common/locmap.h
@@ -0,0 +1,40 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1996-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* File locmap.h : Locale Mapping Classes
+*
+*
+* Created by: Helena Shih
+*
+* Modification History:
+*
+* Date Name Description
+* 3/11/97 aliu Added setId().
+* 4/20/99 Madhu Added T_convertToPosix()
+* 09/18/00 george Removed the memory leaks.
+* 08/23/01 george Convert to C
+*============================================================================
+*/
+
+#ifndef LOCMAP_H
+#define LOCMAP_H
+
+#include "unicode/utypes.h"
+
+#define LANGUAGE_LCID(hostID) (uint16_t)(0x03FF & hostID)
+
+U_CAPI int32_t uprv_convertToPosix(uint32_t hostid, char* posixID, int32_t posixIDCapacity, UErrorCode* status);
+
+/* Don't call these functions directly. Use uloc_getLCID instead. */
+U_CAPI uint32_t uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status); // Leverage platform conversion if possible
+U_CAPI uint32_t uprv_convertToLCID(const char* langID, const char* posixID, UErrorCode* status);
+
+#endif /* LOCMAP_H */
+
diff --git a/thirdparty/icu4c/common/locresdata.cpp b/thirdparty/icu4c/common/locresdata.cpp
new file mode 100644
index 0000000000..d1d9a4729f
--- /dev/null
+++ b/thirdparty/icu4c/common/locresdata.cpp
@@ -0,0 +1,220 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1997-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: loclikely.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010feb25
+* created by: Markus W. Scherer
+*
+* Code for miscellaneous locale-related resource bundle data access,
+* separated out from other .cpp files
+* that then do not depend on resource bundle code and this data.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "unicode/uloc.h"
+#include "unicode/ures.h"
+#include "cstring.h"
+#include "ulocimp.h"
+#include "uresimp.h"
+
+/*
+ * Lookup a resource bundle table item with fallback on the table level.
+ * Regular resource bundle lookups perform fallback to parent locale bundles
+ * and eventually the root bundle, but only for top-level items.
+ * This function takes the name of a top-level table and of an item in that table
+ * and performs a lookup of both, falling back until a bundle contains a table
+ * with this item.
+ *
+ * Note: Only the opening of entire bundles falls back through the default locale
+ * before root. Once a bundle is open, item lookups do not go through the
+ * default locale because that would result in a mix of languages that is
+ * unpredictable to the programmer and most likely useless.
+ */
+U_CAPI const UChar * U_EXPORT2
+uloc_getTableStringWithFallback(const char *path, const char *locale,
+ const char *tableKey, const char *subTableKey,
+ const char *itemKey,
+ int32_t *pLength,
+ UErrorCode *pErrorCode)
+{
+/* char localeBuffer[ULOC_FULLNAME_CAPACITY*4];*/
+ const UChar *item=NULL;
+ UErrorCode errorCode;
+ char explicitFallbackName[ULOC_FULLNAME_CAPACITY] = {0};
+
+ /*
+ * open the bundle for the current locale
+ * this falls back through the locale's chain to root
+ */
+ errorCode=U_ZERO_ERROR;
+ icu::LocalUResourceBundlePointer rb(ures_open(path, locale, &errorCode));
+
+ if(U_FAILURE(errorCode)) {
+ /* total failure, not even root could be opened */
+ *pErrorCode=errorCode;
+ return NULL;
+ } else if(errorCode==U_USING_DEFAULT_WARNING ||
+ (errorCode==U_USING_FALLBACK_WARNING && *pErrorCode!=U_USING_DEFAULT_WARNING)
+ ) {
+ /* set the "strongest" error code (success->fallback->default->failure) */
+ *pErrorCode=errorCode;
+ }
+
+ for(;;){
+ icu::StackUResourceBundle table;
+ icu::StackUResourceBundle subTable;
+ ures_getByKeyWithFallback(rb.getAlias(), tableKey, table.getAlias(), &errorCode);
+
+ if (subTableKey != NULL) {
+ /*
+ ures_getByKeyWithFallback(table.getAlias(), subTableKey, subTable.getAlias(), &errorCode);
+ item = ures_getStringByKeyWithFallback(subTable.getAlias(), itemKey, pLength, &errorCode);
+ if(U_FAILURE(errorCode)){
+ *pErrorCode = errorCode;
+ }
+
+ break;*/
+
+ ures_getByKeyWithFallback(table.getAlias(), subTableKey, table.getAlias(), &errorCode);
+ }
+ if(U_SUCCESS(errorCode)){
+ item = ures_getStringByKeyWithFallback(table.getAlias(), itemKey, pLength, &errorCode);
+ if(U_FAILURE(errorCode)){
+ const char* replacement = NULL;
+ *pErrorCode = errorCode; /*save the errorCode*/
+ errorCode = U_ZERO_ERROR;
+ /* may be a deprecated code */
+ if(uprv_strcmp(tableKey, "Countries")==0){
+ replacement = uloc_getCurrentCountryID(itemKey);
+ }else if(uprv_strcmp(tableKey, "Languages")==0){
+ replacement = uloc_getCurrentLanguageID(itemKey);
+ }
+ /*pointer comparison is ok since uloc_getCurrentCountryID & uloc_getCurrentLanguageID return the key itself is replacement is not found*/
+ if(replacement!=NULL && itemKey != replacement){
+ item = ures_getStringByKeyWithFallback(table.getAlias(), replacement, pLength, &errorCode);
+ if(U_SUCCESS(errorCode)){
+ *pErrorCode = errorCode;
+ break;
+ }
+ }
+ }else{
+ break;
+ }
+ }
+
+ if(U_FAILURE(errorCode)){
+
+ /* still can't figure out ?.. try the fallback mechanism */
+ int32_t len = 0;
+ const UChar* fallbackLocale = NULL;
+ *pErrorCode = errorCode;
+ errorCode = U_ZERO_ERROR;
+
+ fallbackLocale = ures_getStringByKeyWithFallback(table.getAlias(), "Fallback", &len, &errorCode);
+ if(U_FAILURE(errorCode)){
+ *pErrorCode = errorCode;
+ break;
+ }
+
+ u_UCharsToChars(fallbackLocale, explicitFallbackName, len);
+
+ /* guard against recursive fallback */
+ if(uprv_strcmp(explicitFallbackName, locale)==0){
+ *pErrorCode = U_INTERNAL_PROGRAM_ERROR;
+ break;
+ }
+ rb.adoptInstead(ures_open(path, explicitFallbackName, &errorCode));
+ if(U_FAILURE(errorCode)){
+ *pErrorCode = errorCode;
+ break;
+ }
+ /* succeeded in opening the fallback bundle .. continue and try to fetch the item */
+ }else{
+ break;
+ }
+ }
+
+ return item;
+}
+
+static ULayoutType
+_uloc_getOrientationHelper(const char* localeId,
+ const char* key,
+ UErrorCode *status)
+{
+ ULayoutType result = ULOC_LAYOUT_UNKNOWN;
+
+ if (!U_FAILURE(*status)) {
+ int32_t length = 0;
+ char localeBuffer[ULOC_FULLNAME_CAPACITY];
+
+ uloc_canonicalize(localeId, localeBuffer, sizeof(localeBuffer), status);
+
+ if (!U_FAILURE(*status)) {
+ const UChar* const value =
+ uloc_getTableStringWithFallback(
+ NULL,
+ localeBuffer,
+ "layout",
+ NULL,
+ key,
+ &length,
+ status);
+
+ if (!U_FAILURE(*status) && length != 0) {
+ switch(value[0])
+ {
+ case 0x0062: /* 'b' */
+ result = ULOC_LAYOUT_BTT;
+ break;
+ case 0x006C: /* 'l' */
+ result = ULOC_LAYOUT_LTR;
+ break;
+ case 0x0072: /* 'r' */
+ result = ULOC_LAYOUT_RTL;
+ break;
+ case 0x0074: /* 't' */
+ result = ULOC_LAYOUT_TTB;
+ break;
+ default:
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ break;
+ }
+ }
+ }
+ }
+
+ return result;
+}
+
+U_CAPI ULayoutType U_EXPORT2
+uloc_getCharacterOrientation(const char* localeId,
+ UErrorCode *status)
+{
+ return _uloc_getOrientationHelper(localeId, "characters", status);
+}
+
+/**
+ * Get the layout line orientation for the specified locale.
+ *
+ * @param localeID locale name
+ * @param status Error status
+ * @return an enum indicating the layout orientation for lines.
+ */
+U_CAPI ULayoutType U_EXPORT2
+uloc_getLineOrientation(const char* localeId,
+ UErrorCode *status)
+{
+ return _uloc_getOrientationHelper(localeId, "lines", status);
+}
diff --git a/thirdparty/icu4c/common/locutil.cpp b/thirdparty/icu4c/common/locutil.cpp
new file mode 100644
index 0000000000..3d9d69ff7e
--- /dev/null
+++ b/thirdparty/icu4c/common/locutil.cpp
@@ -0,0 +1,275 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ *******************************************************************************
+ * Copyright (C) 2002-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *******************************************************************************
+ */
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_SERVICE || !UCONFIG_NO_TRANSLITERATION
+
+#include "unicode/resbund.h"
+#include "unicode/uenum.h"
+#include "cmemory.h"
+#include "ustrfmt.h"
+#include "locutil.h"
+#include "charstr.h"
+#include "ucln_cmn.h"
+#include "uassert.h"
+#include "umutex.h"
+
+// see LocaleUtility::getAvailableLocaleNames
+static icu::UInitOnce LocaleUtilityInitOnce = U_INITONCE_INITIALIZER;
+static icu::Hashtable * LocaleUtility_cache = NULL;
+
+#define UNDERSCORE_CHAR ((UChar)0x005f)
+#define AT_SIGN_CHAR ((UChar)64)
+#define PERIOD_CHAR ((UChar)46)
+
+/*
+ ******************************************************************
+ */
+
+/**
+ * Release all static memory held by Locale Utility.
+ */
+U_CDECL_BEGIN
+static UBool U_CALLCONV service_cleanup(void) {
+ if (LocaleUtility_cache) {
+ delete LocaleUtility_cache;
+ LocaleUtility_cache = NULL;
+ }
+ return TRUE;
+}
+
+
+static void U_CALLCONV locale_utility_init(UErrorCode &status) {
+ using namespace icu;
+ U_ASSERT(LocaleUtility_cache == NULL);
+ ucln_common_registerCleanup(UCLN_COMMON_SERVICE, service_cleanup);
+ LocaleUtility_cache = new Hashtable(status);
+ if (U_FAILURE(status)) {
+ delete LocaleUtility_cache;
+ LocaleUtility_cache = NULL;
+ return;
+ }
+ if (LocaleUtility_cache == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ LocaleUtility_cache->setValueDeleter(uhash_deleteHashtable);
+}
+
+U_CDECL_END
+
+U_NAMESPACE_BEGIN
+
+UnicodeString&
+LocaleUtility::canonicalLocaleString(const UnicodeString* id, UnicodeString& result)
+{
+ if (id == NULL) {
+ result.setToBogus();
+ } else {
+ // Fix case only (no other changes) up to the first '@' or '.' or
+ // end of string, whichever comes first. In 3.0 I changed this to
+ // stop at first '@' or '.'. It used to run out to the end of
+ // string. My fix makes the tests pass but is probably
+ // structurally incorrect. See below. [alan 3.0]
+
+ // TODO: Doug, you might want to revise this...
+ result = *id;
+ int32_t i = 0;
+ int32_t end = result.indexOf(AT_SIGN_CHAR);
+ int32_t n = result.indexOf(PERIOD_CHAR);
+ if (n >= 0 && n < end) {
+ end = n;
+ }
+ if (end < 0) {
+ end = result.length();
+ }
+ n = result.indexOf(UNDERSCORE_CHAR);
+ if (n < 0) {
+ n = end;
+ }
+ for (; i < n; ++i) {
+ UChar c = result.charAt(i);
+ if (c >= 0x0041 && c <= 0x005a) {
+ c += 0x20;
+ result.setCharAt(i, c);
+ }
+ }
+ for (n = end; i < n; ++i) {
+ UChar c = result.charAt(i);
+ if (c >= 0x0061 && c <= 0x007a) {
+ c -= 0x20;
+ result.setCharAt(i, c);
+ }
+ }
+ }
+ return result;
+
+#if 0
+ // This code does a proper full level 2 canonicalization of id.
+ // It's nasty to go from UChar to char to char to UChar -- but
+ // that's what you have to do to use the uloc_canonicalize
+ // function on UnicodeStrings.
+
+ // I ended up doing the alternate fix (see above) not for
+ // performance reasons, although performance will certainly be
+ // better, but because doing a full level 2 canonicalization
+ // causes some tests to fail. [alan 3.0]
+
+ // TODO: Doug, you might want to revisit this...
+ result.setToBogus();
+ if (id != 0) {
+ int32_t buflen = id->length() + 8; // space for NUL
+ char* buf = (char*) uprv_malloc(buflen);
+ char* canon = (buf == 0) ? 0 : (char*) uprv_malloc(buflen);
+ if (buf != 0 && canon != 0) {
+ U_ASSERT(id->extract(0, INT32_MAX, buf, buflen) < buflen);
+ UErrorCode ec = U_ZERO_ERROR;
+ uloc_canonicalize(buf, canon, buflen, &ec);
+ if (U_SUCCESS(ec)) {
+ result = UnicodeString(canon);
+ }
+ }
+ uprv_free(buf);
+ uprv_free(canon);
+ }
+ return result;
+#endif
+}
+
+Locale&
+LocaleUtility::initLocaleFromName(const UnicodeString& id, Locale& result)
+{
+ enum { BUFLEN = 128 }; // larger than ever needed
+
+ if (id.isBogus() || id.length() >= BUFLEN) {
+ result.setToBogus();
+ } else {
+ /*
+ * We need to convert from a UnicodeString to char * in order to
+ * create a Locale.
+ *
+ * Problem: Locale ID strings may contain '@' which is a variant
+ * character and cannot be handled by invariant-character conversion.
+ *
+ * Hack: Since ICU code can handle locale IDs with multiple encodings
+ * of '@' (at least for EBCDIC; it's not known to be a problem for
+ * ASCII-based systems),
+ * we use regular invariant-character conversion for everything else
+ * and manually convert U+0040 into a compiler-char-constant '@'.
+ * While this compilation-time constant may not match the runtime
+ * encoding of '@', it should be one of the encodings which ICU
+ * recognizes.
+ *
+ * There should be only at most one '@' in a locale ID.
+ */
+ char buffer[BUFLEN];
+ int32_t prev, i;
+ prev = 0;
+ for(;;) {
+ i = id.indexOf((UChar)0x40, prev);
+ if(i < 0) {
+ // no @ between prev and the rest of the string
+ id.extract(prev, INT32_MAX, buffer + prev, BUFLEN - prev, US_INV);
+ break; // done
+ } else {
+ // normal invariant-character conversion for text between @s
+ id.extract(prev, i - prev, buffer + prev, BUFLEN - prev, US_INV);
+ // manually "convert" U+0040 at id[i] into '@' at buffer[i]
+ buffer[i] = '@';
+ prev = i + 1;
+ }
+ }
+ result = Locale::createFromName(buffer);
+ }
+ return result;
+}
+
+UnicodeString&
+LocaleUtility::initNameFromLocale(const Locale& locale, UnicodeString& result)
+{
+ if (locale.isBogus()) {
+ result.setToBogus();
+ } else {
+ result.append(UnicodeString(locale.getName(), -1, US_INV));
+ }
+ return result;
+}
+
+const Hashtable*
+LocaleUtility::getAvailableLocaleNames(const UnicodeString& bundleID)
+{
+ // LocaleUtility_cache is a hash-of-hashes. The top-level keys
+ // are path strings ('bundleID') passed to
+ // ures_openAvailableLocales. The top-level values are
+ // second-level hashes. The second-level keys are result strings
+ // from ures_openAvailableLocales. The second-level values are
+ // garbage ((void*)1 or other random pointer).
+
+ UErrorCode status = U_ZERO_ERROR;
+ umtx_initOnce(LocaleUtilityInitOnce, locale_utility_init, status);
+ Hashtable *cache = LocaleUtility_cache;
+ if (cache == NULL) {
+ // Catastrophic failure.
+ return NULL;
+ }
+
+ Hashtable* htp;
+ umtx_lock(NULL);
+ htp = (Hashtable*) cache->get(bundleID);
+ umtx_unlock(NULL);
+
+ if (htp == NULL) {
+ htp = new Hashtable(status);
+ if (htp && U_SUCCESS(status)) {
+ CharString cbundleID;
+ cbundleID.appendInvariantChars(bundleID, status);
+ const char* path = cbundleID.isEmpty() ? NULL : cbundleID.data();
+ icu::LocalUEnumerationPointer uenum(ures_openAvailableLocales(path, &status));
+ for (;;) {
+ const UChar* id = uenum_unext(uenum.getAlias(), NULL, &status);
+ if (id == NULL) {
+ break;
+ }
+ htp->put(UnicodeString(id), (void*)htp, status);
+ }
+ if (U_FAILURE(status)) {
+ delete htp;
+ return NULL;
+ }
+ umtx_lock(NULL);
+ Hashtable *t = static_cast<Hashtable *>(cache->get(bundleID));
+ if (t != NULL) {
+ // Another thread raced through this code, creating the cache entry first.
+ // Discard ours and return theirs.
+ umtx_unlock(NULL);
+ delete htp;
+ htp = t;
+ } else {
+ cache->put(bundleID, (void*)htp, status);
+ umtx_unlock(NULL);
+ }
+ }
+ }
+ return htp;
+}
+
+UBool
+LocaleUtility::isFallbackOf(const UnicodeString& root, const UnicodeString& child)
+{
+ return child.indexOf(root) == 0 &&
+ (child.length() == root.length() ||
+ child.charAt(root.length()) == UNDERSCORE_CHAR);
+}
+
+U_NAMESPACE_END
+
+/* !UCONFIG_NO_SERVICE */
+#endif
+
+
diff --git a/thirdparty/icu4c/common/locutil.h b/thirdparty/icu4c/common/locutil.h
new file mode 100644
index 0000000000..31bfffd7a5
--- /dev/null
+++ b/thirdparty/icu4c/common/locutil.h
@@ -0,0 +1,39 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/**
+ *******************************************************************************
+ * Copyright (C) 2002-2005, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ *
+ *******************************************************************************
+ */
+#ifndef LOCUTIL_H
+#define LOCUTIL_H
+
+#include "unicode/utypes.h"
+#include "hash.h"
+
+#if !UCONFIG_NO_SERVICE || !UCONFIG_NO_TRANSLITERATION
+
+
+U_NAMESPACE_BEGIN
+
+// temporary utility functions, till I know where to find them
+// in header so tests can also access them
+
+class U_COMMON_API LocaleUtility {
+public:
+ static UnicodeString& canonicalLocaleString(const UnicodeString* id, UnicodeString& result);
+ static Locale& initLocaleFromName(const UnicodeString& id, Locale& result);
+ static UnicodeString& initNameFromLocale(const Locale& locale, UnicodeString& result);
+ static const Hashtable* getAvailableLocaleNames(const UnicodeString& bundleID);
+ static UBool isFallbackOf(const UnicodeString& root, const UnicodeString& child);
+};
+
+U_NAMESPACE_END
+
+
+#endif
+
+#endif
diff --git a/thirdparty/icu4c/common/lsr.cpp b/thirdparty/icu4c/common/lsr.cpp
new file mode 100644
index 0000000000..b81808f2c4
--- /dev/null
+++ b/thirdparty/icu4c/common/lsr.cpp
@@ -0,0 +1,114 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// lsr.cpp
+// created: 2019may08 Markus W. Scherer
+
+#include "unicode/utypes.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "lsr.h"
+#include "uinvchar.h"
+#include "ustr_imp.h"
+
+U_NAMESPACE_BEGIN
+
+LSR::LSR(char prefix, const char *lang, const char *scr, const char *r, int32_t f,
+ UErrorCode &errorCode) :
+ language(nullptr), script(nullptr), region(r),
+ regionIndex(indexForRegion(region)), flags(f) {
+ if (U_SUCCESS(errorCode)) {
+ CharString langScript;
+ langScript.append(prefix, errorCode).append(lang, errorCode).append('\0', errorCode);
+ int32_t scriptOffset = langScript.length();
+ langScript.append(prefix, errorCode).append(scr, errorCode);
+ owned = langScript.cloneData(errorCode);
+ if (U_SUCCESS(errorCode)) {
+ language = owned;
+ script = owned + scriptOffset;
+ }
+ }
+}
+
+LSR::LSR(LSR &&other) U_NOEXCEPT :
+ language(other.language), script(other.script), region(other.region), owned(other.owned),
+ regionIndex(other.regionIndex), flags(other.flags),
+ hashCode(other.hashCode) {
+ if (owned != nullptr) {
+ other.language = other.script = "";
+ other.owned = nullptr;
+ other.hashCode = 0;
+ }
+}
+
+void LSR::deleteOwned() {
+ uprv_free(owned);
+}
+
+LSR &LSR::operator=(LSR &&other) U_NOEXCEPT {
+ this->~LSR();
+ language = other.language;
+ script = other.script;
+ region = other.region;
+ regionIndex = other.regionIndex;
+ flags = other.flags;
+ owned = other.owned;
+ hashCode = other.hashCode;
+ if (owned != nullptr) {
+ other.language = other.script = "";
+ other.owned = nullptr;
+ other.hashCode = 0;
+ }
+ return *this;
+}
+
+UBool LSR::isEquivalentTo(const LSR &other) const {
+ return
+ uprv_strcmp(language, other.language) == 0 &&
+ uprv_strcmp(script, other.script) == 0 &&
+ regionIndex == other.regionIndex &&
+ // Compare regions if both are ill-formed (and their indexes are 0).
+ (regionIndex > 0 || uprv_strcmp(region, other.region) == 0);
+}
+
+UBool LSR::operator==(const LSR &other) const {
+ return
+ uprv_strcmp(language, other.language) == 0 &&
+ uprv_strcmp(script, other.script) == 0 &&
+ regionIndex == other.regionIndex &&
+ // Compare regions if both are ill-formed (and their indexes are 0).
+ (regionIndex > 0 || uprv_strcmp(region, other.region) == 0) &&
+ flags == other.flags;
+}
+
+int32_t LSR::indexForRegion(const char *region) {
+ int32_t c = region[0];
+ int32_t a = c - '0';
+ if (0 <= a && a <= 9) { // digits: "419"
+ int32_t b = region[1] - '0';
+ if (b < 0 || 9 < b) { return 0; }
+ c = region[2] - '0';
+ if (c < 0 || 9 < c || region[3] != 0) { return 0; }
+ return (10 * a + b) * 10 + c + 1;
+ } else { // letters: "DE"
+ a = uprv_upperOrdinal(c);
+ if (a < 0 || 25 < a) { return 0; }
+ int32_t b = uprv_upperOrdinal(region[1]);
+ if (b < 0 || 25 < b || region[2] != 0) { return 0; }
+ return 26 * a + b + 1001;
+ }
+ return 0;
+}
+
+LSR &LSR::setHashCode() {
+ if (hashCode == 0) {
+ uint32_t h = ustr_hashCharsN(language, static_cast<int32_t>(uprv_strlen(language)));
+ h = h * 37 + ustr_hashCharsN(script, static_cast<int32_t>(uprv_strlen(script)));
+ h = h * 37 + regionIndex;
+ hashCode = h * 37 + flags;
+ }
+ return *this;
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/lsr.h b/thirdparty/icu4c/common/lsr.h
new file mode 100644
index 0000000000..a33f855245
--- /dev/null
+++ b/thirdparty/icu4c/common/lsr.h
@@ -0,0 +1,82 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// lsr.h
+// created: 2019may08 Markus W. Scherer
+
+#ifndef __LSR_H__
+#define __LSR_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "cstring.h"
+
+U_NAMESPACE_BEGIN
+
+struct LSR final : public UMemory {
+ static constexpr int32_t REGION_INDEX_LIMIT = 1001 + 26 * 26;
+
+ static constexpr int32_t EXPLICIT_LSR = 7;
+ static constexpr int32_t EXPLICIT_LANGUAGE = 4;
+ static constexpr int32_t EXPLICIT_SCRIPT = 2;
+ static constexpr int32_t EXPLICIT_REGION = 1;
+ static constexpr int32_t IMPLICIT_LSR = 0;
+ static constexpr int32_t DONT_CARE_FLAGS = 0;
+
+ const char *language;
+ const char *script;
+ const char *region;
+ char *owned = nullptr;
+ /** Index for region, 0 if ill-formed. @see indexForRegion */
+ int32_t regionIndex = 0;
+ int32_t flags = 0;
+ /** Only set for LSRs that will be used in a hash table. */
+ int32_t hashCode = 0;
+
+ LSR() : language("und"), script(""), region("") {}
+
+ /** Constructor which aliases all subtag pointers. */
+ LSR(const char *lang, const char *scr, const char *r, int32_t f) :
+ language(lang), script(scr), region(r),
+ regionIndex(indexForRegion(region)), flags(f) {}
+ /**
+ * Constructor which prepends the prefix to the language and script,
+ * copies those into owned memory, and aliases the region.
+ */
+ LSR(char prefix, const char *lang, const char *scr, const char *r, int32_t f,
+ UErrorCode &errorCode);
+ LSR(LSR &&other) U_NOEXCEPT;
+ LSR(const LSR &other) = delete;
+ inline ~LSR() {
+ // Pure inline code for almost all instances.
+ if (owned != nullptr) {
+ deleteOwned();
+ }
+ }
+
+ LSR &operator=(LSR &&other) U_NOEXCEPT;
+ LSR &operator=(const LSR &other) = delete;
+
+ /**
+ * Returns a positive index (>0) for a well-formed region code.
+ * Do not rely on a particular region->index mapping; it may change.
+ * Returns 0 for ill-formed strings.
+ */
+ static int32_t indexForRegion(const char *region);
+
+ UBool isEquivalentTo(const LSR &other) const;
+ UBool operator==(const LSR &other) const;
+
+ inline UBool operator!=(const LSR &other) const {
+ return !operator==(other);
+ }
+
+ LSR &setHashCode();
+
+private:
+ void deleteOwned();
+};
+
+U_NAMESPACE_END
+
+#endif // __LSR_H__
diff --git a/thirdparty/icu4c/common/messageimpl.h b/thirdparty/icu4c/common/messageimpl.h
new file mode 100644
index 0000000000..a56479066b
--- /dev/null
+++ b/thirdparty/icu4c/common/messageimpl.h
@@ -0,0 +1,65 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: messageimpl.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2011apr04
+* created by: Markus W. Scherer
+*/
+
+#ifndef __MESSAGEIMPL_H__
+#define __MESSAGEIMPL_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/messagepattern.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Helper functions for use of MessagePattern.
+ * In Java, these are package-private methods in MessagePattern itself.
+ * In C++, they are declared here and implemented in messagepattern.cpp.
+ */
+class U_COMMON_API MessageImpl {
+public:
+ /**
+ * @return true if getApostropheMode()==UMSGPAT_APOS_DOUBLE_REQUIRED
+ */
+ static UBool jdkAposMode(const MessagePattern &msgPattern) {
+ return msgPattern.getApostropheMode()==UMSGPAT_APOS_DOUBLE_REQUIRED;
+ }
+
+ /**
+ * Appends the s[start, limit[ substring to sb, but with only half of the apostrophes
+ * according to JDK pattern behavior.
+ */
+ static void appendReducedApostrophes(const UnicodeString &s, int32_t start, int32_t limit,
+ UnicodeString &sb);
+
+ /**
+ * Appends the sub-message to the result string.
+ * Omits SKIP_SYNTAX and appends whole arguments using appendReducedApostrophes().
+ */
+ static UnicodeString &appendSubMessageWithoutSkipSyntax(const MessagePattern &msgPattern,
+ int32_t msgStart,
+ UnicodeString &result);
+
+private:
+ MessageImpl(); // no constructor: all static methods
+};
+
+U_NAMESPACE_END
+
+#endif // !UCONFIG_NO_FORMATTING
+
+#endif // __MESSAGEIMPL_H__
diff --git a/thirdparty/icu4c/common/messagepattern.cpp b/thirdparty/icu4c/common/messagepattern.cpp
new file mode 100644
index 0000000000..f223d06711
--- /dev/null
+++ b/thirdparty/icu4c/common/messagepattern.cpp
@@ -0,0 +1,1233 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2011-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: messagepattern.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2011mar14
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/messagepattern.h"
+#include "unicode/unistr.h"
+#include "unicode/utf16.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "messageimpl.h"
+#include "patternprops.h"
+#include "putilimp.h"
+#include "uassert.h"
+
+U_NAMESPACE_BEGIN
+
+// Unicode character/code point constants ---------------------------------- ***
+
+static const UChar u_pound=0x23;
+static const UChar u_apos=0x27;
+static const UChar u_plus=0x2B;
+static const UChar u_comma=0x2C;
+static const UChar u_minus=0x2D;
+static const UChar u_dot=0x2E;
+static const UChar u_colon=0x3A;
+static const UChar u_lessThan=0x3C;
+static const UChar u_equal=0x3D;
+static const UChar u_A=0x41;
+static const UChar u_C=0x43;
+static const UChar u_D=0x44;
+static const UChar u_E=0x45;
+static const UChar u_H=0x48;
+static const UChar u_I=0x49;
+static const UChar u_L=0x4C;
+static const UChar u_N=0x4E;
+static const UChar u_O=0x4F;
+static const UChar u_P=0x50;
+static const UChar u_R=0x52;
+static const UChar u_S=0x53;
+static const UChar u_T=0x54;
+static const UChar u_U=0x55;
+static const UChar u_Z=0x5A;
+static const UChar u_a=0x61;
+static const UChar u_c=0x63;
+static const UChar u_d=0x64;
+static const UChar u_e=0x65;
+static const UChar u_f=0x66;
+static const UChar u_h=0x68;
+static const UChar u_i=0x69;
+static const UChar u_l=0x6C;
+static const UChar u_n=0x6E;
+static const UChar u_o=0x6F;
+static const UChar u_p=0x70;
+static const UChar u_r=0x72;
+static const UChar u_s=0x73;
+static const UChar u_t=0x74;
+static const UChar u_u=0x75;
+static const UChar u_z=0x7A;
+static const UChar u_leftCurlyBrace=0x7B;
+static const UChar u_pipe=0x7C;
+static const UChar u_rightCurlyBrace=0x7D;
+static const UChar u_lessOrEqual=0x2264; // U+2264 is <=
+
+static const UChar kOffsetColon[]={ // "offset:"
+ u_o, u_f, u_f, u_s, u_e, u_t, u_colon
+};
+
+static const UChar kOther[]={ // "other"
+ u_o, u_t, u_h, u_e, u_r
+};
+
+// MessagePatternList ------------------------------------------------------ ***
+
+template<typename T, int32_t stackCapacity>
+class MessagePatternList : public UMemory {
+public:
+ MessagePatternList() {}
+ void copyFrom(const MessagePatternList<T, stackCapacity> &other,
+ int32_t length,
+ UErrorCode &errorCode);
+ UBool ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode);
+ UBool equals(const MessagePatternList<T, stackCapacity> &other, int32_t length) const {
+ for(int32_t i=0; i<length; ++i) {
+ if(a[i]!=other.a[i]) { return FALSE; }
+ }
+ return TRUE;
+ }
+
+ MaybeStackArray<T, stackCapacity> a;
+};
+
+template<typename T, int32_t stackCapacity>
+void
+MessagePatternList<T, stackCapacity>::copyFrom(
+ const MessagePatternList<T, stackCapacity> &other,
+ int32_t length,
+ UErrorCode &errorCode) {
+ if(U_SUCCESS(errorCode) && length>0) {
+ if(length>a.getCapacity() && NULL==a.resize(length)) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ uprv_memcpy(a.getAlias(), other.a.getAlias(), (size_t)length*sizeof(T));
+ }
+}
+
+template<typename T, int32_t stackCapacity>
+UBool
+MessagePatternList<T, stackCapacity>::ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return FALSE;
+ }
+ if(a.getCapacity()>oldLength || a.resize(2*oldLength, oldLength)!=NULL) {
+ return TRUE;
+ }
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return FALSE;
+}
+
+// MessagePatternList specializations -------------------------------------- ***
+
+class MessagePatternDoubleList : public MessagePatternList<double, 8> {
+};
+
+class MessagePatternPartsList : public MessagePatternList<MessagePattern::Part, 32> {
+};
+
+// MessagePattern constructors etc. ---------------------------------------- ***
+
+MessagePattern::MessagePattern(UErrorCode &errorCode)
+ : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE),
+ partsList(NULL), parts(NULL), partsLength(0),
+ numericValuesList(NULL), numericValues(NULL), numericValuesLength(0),
+ hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) {
+ init(errorCode);
+}
+
+MessagePattern::MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode)
+ : aposMode(mode),
+ partsList(NULL), parts(NULL), partsLength(0),
+ numericValuesList(NULL), numericValues(NULL), numericValuesLength(0),
+ hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) {
+ init(errorCode);
+}
+
+MessagePattern::MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
+ : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE),
+ partsList(NULL), parts(NULL), partsLength(0),
+ numericValuesList(NULL), numericValues(NULL), numericValuesLength(0),
+ hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) {
+ if(init(errorCode)) {
+ parse(pattern, parseError, errorCode);
+ }
+}
+
+UBool
+MessagePattern::init(UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return FALSE;
+ }
+ partsList=new MessagePatternPartsList();
+ if(partsList==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return FALSE;
+ }
+ parts=partsList->a.getAlias();
+ return TRUE;
+}
+
+MessagePattern::MessagePattern(const MessagePattern &other)
+ : UObject(other), aposMode(other.aposMode), msg(other.msg),
+ partsList(NULL), parts(NULL), partsLength(0),
+ numericValuesList(NULL), numericValues(NULL), numericValuesLength(0),
+ hasArgNames(other.hasArgNames), hasArgNumbers(other.hasArgNumbers),
+ needsAutoQuoting(other.needsAutoQuoting) {
+ UErrorCode errorCode=U_ZERO_ERROR;
+ if(!copyStorage(other, errorCode)) {
+ clear();
+ }
+}
+
+MessagePattern &
+MessagePattern::operator=(const MessagePattern &other) {
+ if(this==&other) {
+ return *this;
+ }
+ aposMode=other.aposMode;
+ msg=other.msg;
+ hasArgNames=other.hasArgNames;
+ hasArgNumbers=other.hasArgNumbers;
+ needsAutoQuoting=other.needsAutoQuoting;
+ UErrorCode errorCode=U_ZERO_ERROR;
+ if(!copyStorage(other, errorCode)) {
+ clear();
+ }
+ return *this;
+}
+
+UBool
+MessagePattern::copyStorage(const MessagePattern &other, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return FALSE;
+ }
+ parts=NULL;
+ partsLength=0;
+ numericValues=NULL;
+ numericValuesLength=0;
+ if(partsList==NULL) {
+ partsList=new MessagePatternPartsList();
+ if(partsList==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return FALSE;
+ }
+ parts=partsList->a.getAlias();
+ }
+ if(other.partsLength>0) {
+ partsList->copyFrom(*other.partsList, other.partsLength, errorCode);
+ if(U_FAILURE(errorCode)) {
+ return FALSE;
+ }
+ parts=partsList->a.getAlias();
+ partsLength=other.partsLength;
+ }
+ if(other.numericValuesLength>0) {
+ if(numericValuesList==NULL) {
+ numericValuesList=new MessagePatternDoubleList();
+ if(numericValuesList==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return FALSE;
+ }
+ numericValues=numericValuesList->a.getAlias();
+ }
+ numericValuesList->copyFrom(
+ *other.numericValuesList, other.numericValuesLength, errorCode);
+ if(U_FAILURE(errorCode)) {
+ return FALSE;
+ }
+ numericValues=numericValuesList->a.getAlias();
+ numericValuesLength=other.numericValuesLength;
+ }
+ return TRUE;
+}
+
+MessagePattern::~MessagePattern() {
+ delete partsList;
+ delete numericValuesList;
+}
+
+// MessagePattern API ------------------------------------------------------ ***
+
+MessagePattern &
+MessagePattern::parse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) {
+ preParse(pattern, parseError, errorCode);
+ parseMessage(0, 0, 0, UMSGPAT_ARG_TYPE_NONE, parseError, errorCode);
+ postParse();
+ return *this;
+}
+
+MessagePattern &
+MessagePattern::parseChoiceStyle(const UnicodeString &pattern,
+ UParseError *parseError, UErrorCode &errorCode) {
+ preParse(pattern, parseError, errorCode);
+ parseChoiceStyle(0, 0, parseError, errorCode);
+ postParse();
+ return *this;
+}
+
+MessagePattern &
+MessagePattern::parsePluralStyle(const UnicodeString &pattern,
+ UParseError *parseError, UErrorCode &errorCode) {
+ preParse(pattern, parseError, errorCode);
+ parsePluralOrSelectStyle(UMSGPAT_ARG_TYPE_PLURAL, 0, 0, parseError, errorCode);
+ postParse();
+ return *this;
+}
+
+MessagePattern &
+MessagePattern::parseSelectStyle(const UnicodeString &pattern,
+ UParseError *parseError, UErrorCode &errorCode) {
+ preParse(pattern, parseError, errorCode);
+ parsePluralOrSelectStyle(UMSGPAT_ARG_TYPE_SELECT, 0, 0, parseError, errorCode);
+ postParse();
+ return *this;
+}
+
+void
+MessagePattern::clear() {
+ // Mostly the same as preParse().
+ msg.remove();
+ hasArgNames=hasArgNumbers=FALSE;
+ needsAutoQuoting=FALSE;
+ partsLength=0;
+ numericValuesLength=0;
+}
+
+UBool
+MessagePattern::operator==(const MessagePattern &other) const {
+ if(this==&other) {
+ return TRUE;
+ }
+ return
+ aposMode==other.aposMode &&
+ msg==other.msg &&
+ // parts.equals(o.parts)
+ partsLength==other.partsLength &&
+ (partsLength==0 || partsList->equals(*other.partsList, partsLength));
+ // No need to compare numericValues if msg and parts are the same.
+}
+
+int32_t
+MessagePattern::hashCode() const {
+ int32_t hash=(aposMode*37+msg.hashCode())*37+partsLength;
+ for(int32_t i=0; i<partsLength; ++i) {
+ hash=hash*37+parts[i].hashCode();
+ }
+ return hash;
+}
+
+int32_t
+MessagePattern::validateArgumentName(const UnicodeString &name) {
+ if(!PatternProps::isIdentifier(name.getBuffer(), name.length())) {
+ return UMSGPAT_ARG_NAME_NOT_VALID;
+ }
+ return parseArgNumber(name, 0, name.length());
+}
+
+UnicodeString
+MessagePattern::autoQuoteApostropheDeep() const {
+ if(!needsAutoQuoting) {
+ return msg;
+ }
+ UnicodeString modified(msg);
+ // Iterate backward so that the insertion indexes do not change.
+ int32_t count=countParts();
+ for(int32_t i=count; i>0;) {
+ const Part &part=getPart(--i);
+ if(part.getType()==UMSGPAT_PART_TYPE_INSERT_CHAR) {
+ modified.insert(part.index, (UChar)part.value);
+ }
+ }
+ return modified;
+}
+
+double
+MessagePattern::getNumericValue(const Part &part) const {
+ UMessagePatternPartType type=part.type;
+ if(type==UMSGPAT_PART_TYPE_ARG_INT) {
+ return part.value;
+ } else if(type==UMSGPAT_PART_TYPE_ARG_DOUBLE) {
+ return numericValues[part.value];
+ } else {
+ return UMSGPAT_NO_NUMERIC_VALUE;
+ }
+}
+
+/**
+ * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
+ * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
+ * @return the "offset:" value.
+ * @draft ICU 4.8
+ */
+double
+MessagePattern::getPluralOffset(int32_t pluralStart) const {
+ const Part &part=getPart(pluralStart);
+ if(Part::hasNumericValue(part.type)) {
+ return getNumericValue(part);
+ } else {
+ return 0;
+ }
+}
+
+// MessagePattern::Part ---------------------------------------------------- ***
+
+UBool
+MessagePattern::Part::operator==(const Part &other) const {
+ if(this==&other) {
+ return TRUE;
+ }
+ return
+ type==other.type &&
+ index==other.index &&
+ length==other.length &&
+ value==other.value &&
+ limitPartIndex==other.limitPartIndex;
+}
+
+// MessagePattern parser --------------------------------------------------- ***
+
+void
+MessagePattern::preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ if(parseError!=NULL) {
+ parseError->line=0;
+ parseError->offset=0;
+ parseError->preContext[0]=0;
+ parseError->postContext[0]=0;
+ }
+ msg=pattern;
+ hasArgNames=hasArgNumbers=FALSE;
+ needsAutoQuoting=FALSE;
+ partsLength=0;
+ numericValuesLength=0;
+}
+
+void
+MessagePattern::postParse() {
+ if(partsList!=NULL) {
+ parts=partsList->a.getAlias();
+ }
+ if(numericValuesList!=NULL) {
+ numericValues=numericValuesList->a.getAlias();
+ }
+}
+
+int32_t
+MessagePattern::parseMessage(int32_t index, int32_t msgStartLength,
+ int32_t nestingLevel, UMessagePatternArgType parentType,
+ UParseError *parseError, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return 0;
+ }
+ if(nestingLevel>Part::MAX_VALUE) {
+ errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ int32_t msgStart=partsLength;
+ addPart(UMSGPAT_PART_TYPE_MSG_START, index, msgStartLength, nestingLevel, errorCode);
+ index+=msgStartLength;
+ for(;;) { // while(index<msg.length()) with U_FAILURE(errorCode) check
+ if(U_FAILURE(errorCode)) {
+ return 0;
+ }
+ if(index>=msg.length()) {
+ break;
+ }
+ UChar c=msg.charAt(index++);
+ if(c==u_apos) {
+ if(index==msg.length()) {
+ // The apostrophe is the last character in the pattern.
+ // Add a Part for auto-quoting.
+ addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0,
+ u_apos, errorCode); // value=char to be inserted
+ needsAutoQuoting=TRUE;
+ } else {
+ c=msg.charAt(index);
+ if(c==u_apos) {
+ // double apostrophe, skip the second one
+ addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index++, 1, 0, errorCode);
+ } else if(
+ aposMode==UMSGPAT_APOS_DOUBLE_REQUIRED ||
+ c==u_leftCurlyBrace || c==u_rightCurlyBrace ||
+ (parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_pipe) ||
+ (UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(parentType) && c==u_pound)
+ ) {
+ // skip the quote-starting apostrophe
+ addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index-1, 1, 0, errorCode);
+ // find the end of the quoted literal text
+ for(;;) {
+ index=msg.indexOf(u_apos, index+1);
+ if(index>=0) {
+ if(/*(index+1)<msg.length() &&*/ msg.charAt(index+1)==u_apos) {
+ // double apostrophe inside quoted literal text
+ // still encodes a single apostrophe, skip the second one
+ addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, ++index, 1, 0, errorCode);
+ } else {
+ // skip the quote-ending apostrophe
+ addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index++, 1, 0, errorCode);
+ break;
+ }
+ } else {
+ // The quoted text reaches to the end of the of the message.
+ index=msg.length();
+ // Add a Part for auto-quoting.
+ addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0,
+ u_apos, errorCode); // value=char to be inserted
+ needsAutoQuoting=TRUE;
+ break;
+ }
+ }
+ } else {
+ // Interpret the apostrophe as literal text.
+ // Add a Part for auto-quoting.
+ addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0,
+ u_apos, errorCode); // value=char to be inserted
+ needsAutoQuoting=TRUE;
+ }
+ }
+ } else if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(parentType) && c==u_pound) {
+ // The unquoted # in a plural message fragment will be replaced
+ // with the (number-offset).
+ addPart(UMSGPAT_PART_TYPE_REPLACE_NUMBER, index-1, 1, 0, errorCode);
+ } else if(c==u_leftCurlyBrace) {
+ index=parseArg(index-1, 1, nestingLevel, parseError, errorCode);
+ } else if((nestingLevel>0 && c==u_rightCurlyBrace) ||
+ (parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_pipe)) {
+ // Finish the message before the terminator.
+ // In a choice style, report the "}" substring only for the following ARG_LIMIT,
+ // not for this MSG_LIMIT.
+ int32_t limitLength=(parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_rightCurlyBrace) ? 0 : 1;
+ addLimitPart(msgStart, UMSGPAT_PART_TYPE_MSG_LIMIT, index-1, limitLength,
+ nestingLevel, errorCode);
+ if(parentType==UMSGPAT_ARG_TYPE_CHOICE) {
+ // Let the choice style parser see the '}' or '|'.
+ return index-1;
+ } else {
+ // continue parsing after the '}'
+ return index;
+ }
+ } // else: c is part of literal text
+ }
+ if(nestingLevel>0 && !inTopLevelChoiceMessage(nestingLevel, parentType)) {
+ setParseError(parseError, 0); // Unmatched '{' braces in message.
+ errorCode=U_UNMATCHED_BRACES;
+ return 0;
+ }
+ addLimitPart(msgStart, UMSGPAT_PART_TYPE_MSG_LIMIT, index, 0, nestingLevel, errorCode);
+ return index;
+}
+
+int32_t
+MessagePattern::parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
+ UParseError *parseError, UErrorCode &errorCode) {
+ int32_t argStart=partsLength;
+ UMessagePatternArgType argType=UMSGPAT_ARG_TYPE_NONE;
+ addPart(UMSGPAT_PART_TYPE_ARG_START, index, argStartLength, argType, errorCode);
+ if(U_FAILURE(errorCode)) {
+ return 0;
+ }
+ int32_t nameIndex=index=skipWhiteSpace(index+argStartLength);
+ if(index==msg.length()) {
+ setParseError(parseError, 0); // Unmatched '{' braces in message.
+ errorCode=U_UNMATCHED_BRACES;
+ return 0;
+ }
+ // parse argument name or number
+ index=skipIdentifier(index);
+ int32_t number=parseArgNumber(nameIndex, index);
+ if(number>=0) {
+ int32_t length=index-nameIndex;
+ if(length>Part::MAX_LENGTH || number>Part::MAX_VALUE) {
+ setParseError(parseError, nameIndex); // Argument number too large.
+ errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ hasArgNumbers=TRUE;
+ addPart(UMSGPAT_PART_TYPE_ARG_NUMBER, nameIndex, length, number, errorCode);
+ } else if(number==UMSGPAT_ARG_NAME_NOT_NUMBER) {
+ int32_t length=index-nameIndex;
+ if(length>Part::MAX_LENGTH) {
+ setParseError(parseError, nameIndex); // Argument name too long.
+ errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ hasArgNames=TRUE;
+ addPart(UMSGPAT_PART_TYPE_ARG_NAME, nameIndex, length, 0, errorCode);
+ } else { // number<-1 (ARG_NAME_NOT_VALID)
+ setParseError(parseError, nameIndex); // Bad argument syntax.
+ errorCode=U_PATTERN_SYNTAX_ERROR;
+ return 0;
+ }
+ index=skipWhiteSpace(index);
+ if(index==msg.length()) {
+ setParseError(parseError, 0); // Unmatched '{' braces in message.
+ errorCode=U_UNMATCHED_BRACES;
+ return 0;
+ }
+ UChar c=msg.charAt(index);
+ if(c==u_rightCurlyBrace) {
+ // all done
+ } else if(c!=u_comma) {
+ setParseError(parseError, nameIndex); // Bad argument syntax.
+ errorCode=U_PATTERN_SYNTAX_ERROR;
+ return 0;
+ } else /* ',' */ {
+ // parse argument type: case-sensitive a-zA-Z
+ int32_t typeIndex=index=skipWhiteSpace(index+1);
+ while(index<msg.length() && isArgTypeChar(msg.charAt(index))) {
+ ++index;
+ }
+ int32_t length=index-typeIndex;
+ index=skipWhiteSpace(index);
+ if(index==msg.length()) {
+ setParseError(parseError, 0); // Unmatched '{' braces in message.
+ errorCode=U_UNMATCHED_BRACES;
+ return 0;
+ }
+ if(length==0 || ((c=msg.charAt(index))!=u_comma && c!=u_rightCurlyBrace)) {
+ setParseError(parseError, nameIndex); // Bad argument syntax.
+ errorCode=U_PATTERN_SYNTAX_ERROR;
+ return 0;
+ }
+ if(length>Part::MAX_LENGTH) {
+ setParseError(parseError, nameIndex); // Argument type name too long.
+ errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ argType=UMSGPAT_ARG_TYPE_SIMPLE;
+ if(length==6) {
+ // case-insensitive comparisons for complex-type names
+ if(isChoice(typeIndex)) {
+ argType=UMSGPAT_ARG_TYPE_CHOICE;
+ } else if(isPlural(typeIndex)) {
+ argType=UMSGPAT_ARG_TYPE_PLURAL;
+ } else if(isSelect(typeIndex)) {
+ argType=UMSGPAT_ARG_TYPE_SELECT;
+ }
+ } else if(length==13) {
+ if(isSelect(typeIndex) && isOrdinal(typeIndex+6)) {
+ argType=UMSGPAT_ARG_TYPE_SELECTORDINAL;
+ }
+ }
+ // change the ARG_START type from NONE to argType
+ partsList->a[argStart].value=(int16_t)argType;
+ if(argType==UMSGPAT_ARG_TYPE_SIMPLE) {
+ addPart(UMSGPAT_PART_TYPE_ARG_TYPE, typeIndex, length, 0, errorCode);
+ }
+ // look for an argument style (pattern)
+ if(c==u_rightCurlyBrace) {
+ if(argType!=UMSGPAT_ARG_TYPE_SIMPLE) {
+ setParseError(parseError, nameIndex); // No style field for complex argument.
+ errorCode=U_PATTERN_SYNTAX_ERROR;
+ return 0;
+ }
+ } else /* ',' */ {
+ ++index;
+ if(argType==UMSGPAT_ARG_TYPE_SIMPLE) {
+ index=parseSimpleStyle(index, parseError, errorCode);
+ } else if(argType==UMSGPAT_ARG_TYPE_CHOICE) {
+ index=parseChoiceStyle(index, nestingLevel, parseError, errorCode);
+ } else {
+ index=parsePluralOrSelectStyle(argType, index, nestingLevel, parseError, errorCode);
+ }
+ }
+ }
+ // Argument parsing stopped on the '}'.
+ addLimitPart(argStart, UMSGPAT_PART_TYPE_ARG_LIMIT, index, 1, argType, errorCode);
+ return index+1;
+}
+
+int32_t
+MessagePattern::parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return 0;
+ }
+ int32_t start=index;
+ int32_t nestedBraces=0;
+ while(index<msg.length()) {
+ UChar c=msg.charAt(index++);
+ if(c==u_apos) {
+ // Treat apostrophe as quoting but include it in the style part.
+ // Find the end of the quoted literal text.
+ index=msg.indexOf(u_apos, index);
+ if(index<0) {
+ // Quoted literal argument style text reaches to the end of the message.
+ setParseError(parseError, start);
+ errorCode=U_PATTERN_SYNTAX_ERROR;
+ return 0;
+ }
+ // skip the quote-ending apostrophe
+ ++index;
+ } else if(c==u_leftCurlyBrace) {
+ ++nestedBraces;
+ } else if(c==u_rightCurlyBrace) {
+ if(nestedBraces>0) {
+ --nestedBraces;
+ } else {
+ int32_t length=--index-start;
+ if(length>Part::MAX_LENGTH) {
+ setParseError(parseError, start); // Argument style text too long.
+ errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ addPart(UMSGPAT_PART_TYPE_ARG_STYLE, start, length, 0, errorCode);
+ return index;
+ }
+ } // c is part of literal text
+ }
+ setParseError(parseError, 0); // Unmatched '{' braces in message.
+ errorCode=U_UNMATCHED_BRACES;
+ return 0;
+}
+
+int32_t
+MessagePattern::parseChoiceStyle(int32_t index, int32_t nestingLevel,
+ UParseError *parseError, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return 0;
+ }
+ int32_t start=index;
+ index=skipWhiteSpace(index);
+ if(index==msg.length() || msg.charAt(index)==u_rightCurlyBrace) {
+ setParseError(parseError, 0); // Missing choice argument pattern.
+ errorCode=U_PATTERN_SYNTAX_ERROR;
+ return 0;
+ }
+ for(;;) {
+ // The choice argument style contains |-separated (number, separator, message) triples.
+ // Parse the number.
+ int32_t numberIndex=index;
+ index=skipDouble(index);
+ int32_t length=index-numberIndex;
+ if(length==0) {
+ setParseError(parseError, start); // Bad choice pattern syntax.
+ errorCode=U_PATTERN_SYNTAX_ERROR;
+ return 0;
+ }
+ if(length>Part::MAX_LENGTH) {
+ setParseError(parseError, numberIndex); // Choice number too long.
+ errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ parseDouble(numberIndex, index, TRUE, parseError, errorCode); // adds ARG_INT or ARG_DOUBLE
+ if(U_FAILURE(errorCode)) {
+ return 0;
+ }
+ // Parse the separator.
+ index=skipWhiteSpace(index);
+ if(index==msg.length()) {
+ setParseError(parseError, start); // Bad choice pattern syntax.
+ errorCode=U_PATTERN_SYNTAX_ERROR;
+ return 0;
+ }
+ UChar c=msg.charAt(index);
+ if(!(c==u_pound || c==u_lessThan || c==u_lessOrEqual)) { // U+2264 is <=
+ setParseError(parseError, start); // Expected choice separator (#<\u2264) instead of c.
+ errorCode=U_PATTERN_SYNTAX_ERROR;
+ return 0;
+ }
+ addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, index, 1, 0, errorCode);
+ // Parse the message fragment.
+ index=parseMessage(++index, 0, nestingLevel+1, UMSGPAT_ARG_TYPE_CHOICE, parseError, errorCode);
+ if(U_FAILURE(errorCode)) {
+ return 0;
+ }
+ // parseMessage(..., CHOICE) returns the index of the terminator, or msg.length().
+ if(index==msg.length()) {
+ return index;
+ }
+ if(msg.charAt(index)==u_rightCurlyBrace) {
+ if(!inMessageFormatPattern(nestingLevel)) {
+ setParseError(parseError, start); // Bad choice pattern syntax.
+ errorCode=U_PATTERN_SYNTAX_ERROR;
+ return 0;
+ }
+ return index;
+ } // else the terminator is '|'
+ index=skipWhiteSpace(index+1);
+ }
+}
+
+int32_t
+MessagePattern::parsePluralOrSelectStyle(UMessagePatternArgType argType,
+ int32_t index, int32_t nestingLevel,
+ UParseError *parseError, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return 0;
+ }
+ int32_t start=index;
+ UBool isEmpty=TRUE;
+ UBool hasOther=FALSE;
+ for(;;) {
+ // First, collect the selector looking for a small set of terminators.
+ // It would be a little faster to consider the syntax of each possible
+ // token right here, but that makes the code too complicated.
+ index=skipWhiteSpace(index);
+ UBool eos=index==msg.length();
+ if(eos || msg.charAt(index)==u_rightCurlyBrace) {
+ if(eos==inMessageFormatPattern(nestingLevel)) {
+ setParseError(parseError, start); // Bad plural/select pattern syntax.
+ errorCode=U_PATTERN_SYNTAX_ERROR;
+ return 0;
+ }
+ if(!hasOther) {
+ setParseError(parseError, 0); // Missing 'other' keyword in plural/select pattern.
+ errorCode=U_DEFAULT_KEYWORD_MISSING;
+ return 0;
+ }
+ return index;
+ }
+ int32_t selectorIndex=index;
+ if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) && msg.charAt(selectorIndex)==u_equal) {
+ // explicit-value plural selector: =double
+ index=skipDouble(index+1);
+ int32_t length=index-selectorIndex;
+ if(length==1) {
+ setParseError(parseError, start); // Bad plural/select pattern syntax.
+ errorCode=U_PATTERN_SYNTAX_ERROR;
+ return 0;
+ }
+ if(length>Part::MAX_LENGTH) {
+ setParseError(parseError, selectorIndex); // Argument selector too long.
+ errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode);
+ parseDouble(selectorIndex+1, index, FALSE,
+ parseError, errorCode); // adds ARG_INT or ARG_DOUBLE
+ } else {
+ index=skipIdentifier(index);
+ int32_t length=index-selectorIndex;
+ if(length==0) {
+ setParseError(parseError, start); // Bad plural/select pattern syntax.
+ errorCode=U_PATTERN_SYNTAX_ERROR;
+ return 0;
+ }
+ // Note: The ':' in "offset:" is just beyond the skipIdentifier() range.
+ if( UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) && length==6 && index<msg.length() &&
+ 0==msg.compare(selectorIndex, 7, kOffsetColon, 0, 7)
+ ) {
+ // plural offset, not a selector
+ if(!isEmpty) {
+ // Plural argument 'offset:' (if present) must precede key-message pairs.
+ setParseError(parseError, start);
+ errorCode=U_PATTERN_SYNTAX_ERROR;
+ return 0;
+ }
+ // allow whitespace between offset: and its value
+ int32_t valueIndex=skipWhiteSpace(index+1); // The ':' is at index.
+ index=skipDouble(valueIndex);
+ if(index==valueIndex) {
+ setParseError(parseError, start); // Missing value for plural 'offset:'.
+ errorCode=U_PATTERN_SYNTAX_ERROR;
+ return 0;
+ }
+ if((index-valueIndex)>Part::MAX_LENGTH) {
+ setParseError(parseError, valueIndex); // Plural offset value too long.
+ errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ parseDouble(valueIndex, index, FALSE,
+ parseError, errorCode); // adds ARG_INT or ARG_DOUBLE
+ if(U_FAILURE(errorCode)) {
+ return 0;
+ }
+ isEmpty=FALSE;
+ continue; // no message fragment after the offset
+ } else {
+ // normal selector word
+ if(length>Part::MAX_LENGTH) {
+ setParseError(parseError, selectorIndex); // Argument selector too long.
+ errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode);
+ if(0==msg.compare(selectorIndex, length, kOther, 0, 5)) {
+ hasOther=TRUE;
+ }
+ }
+ }
+ if(U_FAILURE(errorCode)) {
+ return 0;
+ }
+
+ // parse the message fragment following the selector
+ index=skipWhiteSpace(index);
+ if(index==msg.length() || msg.charAt(index)!=u_leftCurlyBrace) {
+ setParseError(parseError, selectorIndex); // No message fragment after plural/select selector.
+ errorCode=U_PATTERN_SYNTAX_ERROR;
+ return 0;
+ }
+ index=parseMessage(index, 1, nestingLevel+1, argType, parseError, errorCode);
+ if(U_FAILURE(errorCode)) {
+ return 0;
+ }
+ isEmpty=FALSE;
+ }
+}
+
+int32_t
+MessagePattern::parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit) {
+ // If the identifier contains only ASCII digits, then it is an argument _number_
+ // and must not have leading zeros (except "0" itself).
+ // Otherwise it is an argument _name_.
+ if(start>=limit) {
+ return UMSGPAT_ARG_NAME_NOT_VALID;
+ }
+ int32_t number;
+ // Defer numeric errors until we know there are only digits.
+ UBool badNumber;
+ UChar c=s.charAt(start++);
+ if(c==0x30) {
+ if(start==limit) {
+ return 0;
+ } else {
+ number=0;
+ badNumber=TRUE; // leading zero
+ }
+ } else if(0x31<=c && c<=0x39) {
+ number=c-0x30;
+ badNumber=FALSE;
+ } else {
+ return UMSGPAT_ARG_NAME_NOT_NUMBER;
+ }
+ while(start<limit) {
+ c=s.charAt(start++);
+ if(0x30<=c && c<=0x39) {
+ if(number>=INT32_MAX/10) {
+ badNumber=TRUE; // overflow
+ }
+ number=number*10+(c-0x30);
+ } else {
+ return UMSGPAT_ARG_NAME_NOT_NUMBER;
+ }
+ }
+ // There are only ASCII digits.
+ if(badNumber) {
+ return UMSGPAT_ARG_NAME_NOT_VALID;
+ } else {
+ return number;
+ }
+}
+
+void
+MessagePattern::parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
+ UParseError *parseError, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ U_ASSERT(start<limit);
+ // fake loop for easy exit and single throw statement
+ for(;;) { /*loop doesn't iterate*/
+ // fast path for small integers and infinity
+ int32_t value=0;
+ int32_t isNegative=0; // not boolean so that we can easily add it to value
+ int32_t index=start;
+ UChar c=msg.charAt(index++);
+ if(c==u_minus) {
+ isNegative=1;
+ if(index==limit) {
+ break; // no number
+ }
+ c=msg.charAt(index++);
+ } else if(c==u_plus) {
+ if(index==limit) {
+ break; // no number
+ }
+ c=msg.charAt(index++);
+ }
+ if(c==0x221e) { // infinity
+ if(allowInfinity && index==limit) {
+ double infinity=uprv_getInfinity();
+ addArgDoublePart(
+ isNegative!=0 ? -infinity : infinity,
+ start, limit-start, errorCode);
+ return;
+ } else {
+ break;
+ }
+ }
+ // try to parse the number as a small integer but fall back to a double
+ while('0'<=c && c<='9') {
+ value=value*10+(c-'0');
+ if(value>(Part::MAX_VALUE+isNegative)) {
+ break; // not a small-enough integer
+ }
+ if(index==limit) {
+ addPart(UMSGPAT_PART_TYPE_ARG_INT, start, limit-start,
+ isNegative!=0 ? -value : value, errorCode);
+ return;
+ }
+ c=msg.charAt(index++);
+ }
+ // Let Double.parseDouble() throw a NumberFormatException.
+ char numberChars[128];
+ int32_t capacity=(int32_t)sizeof(numberChars);
+ int32_t length=limit-start;
+ if(length>=capacity) {
+ break; // number too long
+ }
+ msg.extract(start, length, numberChars, capacity, US_INV);
+ if((int32_t)uprv_strlen(numberChars)<length) {
+ break; // contains non-invariant character that was turned into NUL
+ }
+ char *end;
+ double numericValue=uprv_strtod(numberChars, &end);
+ if(end!=(numberChars+length)) {
+ break; // parsing error
+ }
+ addArgDoublePart(numericValue, start, length, errorCode);
+ return;
+ }
+ setParseError(parseError, start /*, limit*/); // Bad syntax for numeric value.
+ errorCode=U_PATTERN_SYNTAX_ERROR;
+ return;
+}
+
+int32_t
+MessagePattern::skipWhiteSpace(int32_t index) {
+ const UChar *s=msg.getBuffer();
+ int32_t msgLength=msg.length();
+ const UChar *t=PatternProps::skipWhiteSpace(s+index, msgLength-index);
+ return (int32_t)(t-s);
+}
+
+int32_t
+MessagePattern::skipIdentifier(int32_t index) {
+ const UChar *s=msg.getBuffer();
+ int32_t msgLength=msg.length();
+ const UChar *t=PatternProps::skipIdentifier(s+index, msgLength-index);
+ return (int32_t)(t-s);
+}
+
+int32_t
+MessagePattern::skipDouble(int32_t index) {
+ int32_t msgLength=msg.length();
+ while(index<msgLength) {
+ UChar c=msg.charAt(index);
+ // U+221E: Allow the infinity symbol, for ChoiceFormat patterns.
+ if((c<0x30 && c!=u_plus && c!=u_minus && c!=u_dot) || (c>0x39 && c!=u_e && c!=u_E && c!=0x221e)) {
+ break;
+ }
+ ++index;
+ }
+ return index;
+}
+
+UBool
+MessagePattern::isArgTypeChar(UChar32 c) {
+ return (u_a<=c && c<=u_z) || (u_A<=c && c<=u_Z);
+}
+
+UBool
+MessagePattern::isChoice(int32_t index) {
+ UChar c;
+ return
+ ((c=msg.charAt(index++))==u_c || c==u_C) &&
+ ((c=msg.charAt(index++))==u_h || c==u_H) &&
+ ((c=msg.charAt(index++))==u_o || c==u_O) &&
+ ((c=msg.charAt(index++))==u_i || c==u_I) &&
+ ((c=msg.charAt(index++))==u_c || c==u_C) &&
+ ((c=msg.charAt(index))==u_e || c==u_E);
+}
+
+UBool
+MessagePattern::isPlural(int32_t index) {
+ UChar c;
+ return
+ ((c=msg.charAt(index++))==u_p || c==u_P) &&
+ ((c=msg.charAt(index++))==u_l || c==u_L) &&
+ ((c=msg.charAt(index++))==u_u || c==u_U) &&
+ ((c=msg.charAt(index++))==u_r || c==u_R) &&
+ ((c=msg.charAt(index++))==u_a || c==u_A) &&
+ ((c=msg.charAt(index))==u_l || c==u_L);
+}
+
+UBool
+MessagePattern::isSelect(int32_t index) {
+ UChar c;
+ return
+ ((c=msg.charAt(index++))==u_s || c==u_S) &&
+ ((c=msg.charAt(index++))==u_e || c==u_E) &&
+ ((c=msg.charAt(index++))==u_l || c==u_L) &&
+ ((c=msg.charAt(index++))==u_e || c==u_E) &&
+ ((c=msg.charAt(index++))==u_c || c==u_C) &&
+ ((c=msg.charAt(index))==u_t || c==u_T);
+}
+
+UBool
+MessagePattern::isOrdinal(int32_t index) {
+ UChar c;
+ return
+ ((c=msg.charAt(index++))==u_o || c==u_O) &&
+ ((c=msg.charAt(index++))==u_r || c==u_R) &&
+ ((c=msg.charAt(index++))==u_d || c==u_D) &&
+ ((c=msg.charAt(index++))==u_i || c==u_I) &&
+ ((c=msg.charAt(index++))==u_n || c==u_N) &&
+ ((c=msg.charAt(index++))==u_a || c==u_A) &&
+ ((c=msg.charAt(index))==u_l || c==u_L);
+}
+
+UBool
+MessagePattern::inMessageFormatPattern(int32_t nestingLevel) {
+ return nestingLevel>0 || partsList->a[0].type==UMSGPAT_PART_TYPE_MSG_START;
+}
+
+UBool
+MessagePattern::inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType) {
+ return
+ nestingLevel==1 &&
+ parentType==UMSGPAT_ARG_TYPE_CHOICE &&
+ partsList->a[0].type!=UMSGPAT_PART_TYPE_MSG_START;
+}
+
+void
+MessagePattern::addPart(UMessagePatternPartType type, int32_t index, int32_t length,
+ int32_t value, UErrorCode &errorCode) {
+ if(partsList->ensureCapacityForOneMore(partsLength, errorCode)) {
+ Part &part=partsList->a[partsLength++];
+ part.type=type;
+ part.index=index;
+ part.length=(uint16_t)length;
+ part.value=(int16_t)value;
+ part.limitPartIndex=0;
+ }
+}
+
+void
+MessagePattern::addLimitPart(int32_t start,
+ UMessagePatternPartType type, int32_t index, int32_t length,
+ int32_t value, UErrorCode &errorCode) {
+ partsList->a[start].limitPartIndex=partsLength;
+ addPart(type, index, length, value, errorCode);
+}
+
+void
+MessagePattern::addArgDoublePart(double numericValue, int32_t start, int32_t length,
+ UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ int32_t numericIndex=numericValuesLength;
+ if(numericValuesList==NULL) {
+ numericValuesList=new MessagePatternDoubleList();
+ if(numericValuesList==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ } else if(!numericValuesList->ensureCapacityForOneMore(numericValuesLength, errorCode)) {
+ return;
+ } else {
+ if(numericIndex>Part::MAX_VALUE) {
+ errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return;
+ }
+ }
+ numericValuesList->a[numericValuesLength++]=numericValue;
+ addPart(UMSGPAT_PART_TYPE_ARG_DOUBLE, start, length, numericIndex, errorCode);
+}
+
+void
+MessagePattern::setParseError(UParseError *parseError, int32_t index) {
+ if(parseError==NULL) {
+ return;
+ }
+ parseError->offset=index;
+
+ // Set preContext to some of msg before index.
+ // Avoid splitting a surrogate pair.
+ int32_t length=index;
+ if(length>=U_PARSE_CONTEXT_LEN) {
+ length=U_PARSE_CONTEXT_LEN-1;
+ if(length>0 && U16_IS_TRAIL(msg[index-length])) {
+ --length;
+ }
+ }
+ msg.extract(index-length, length, parseError->preContext);
+ parseError->preContext[length]=0;
+
+ // Set postContext to some of msg starting at index.
+ length=msg.length()-index;
+ if(length>=U_PARSE_CONTEXT_LEN) {
+ length=U_PARSE_CONTEXT_LEN-1;
+ if(length>0 && U16_IS_LEAD(msg[index+length-1])) {
+ --length;
+ }
+ }
+ msg.extract(index, length, parseError->postContext);
+ parseError->postContext[length]=0;
+}
+
+// MessageImpl ------------------------------------------------------------- ***
+
+void
+MessageImpl::appendReducedApostrophes(const UnicodeString &s, int32_t start, int32_t limit,
+ UnicodeString &sb) {
+ int32_t doubleApos=-1;
+ for(;;) {
+ int32_t i=s.indexOf(u_apos, start);
+ if(i<0 || i>=limit) {
+ sb.append(s, start, limit-start);
+ break;
+ }
+ if(i==doubleApos) {
+ // Double apostrophe at start-1 and start==i, append one.
+ sb.append(u_apos);
+ ++start;
+ doubleApos=-1;
+ } else {
+ // Append text between apostrophes and skip this one.
+ sb.append(s, start, i-start);
+ doubleApos=start=i+1;
+ }
+ }
+}
+
+// Ported from second half of ICU4J SelectFormat.format(String).
+UnicodeString &
+MessageImpl::appendSubMessageWithoutSkipSyntax(const MessagePattern &msgPattern,
+ int32_t msgStart,
+ UnicodeString &result) {
+ const UnicodeString &msgString=msgPattern.getPatternString();
+ int32_t prevIndex=msgPattern.getPart(msgStart).getLimit();
+ for(int32_t i=msgStart;;) {
+ const MessagePattern::Part &part=msgPattern.getPart(++i);
+ UMessagePatternPartType type=part.getType();
+ int32_t index=part.getIndex();
+ if(type==UMSGPAT_PART_TYPE_MSG_LIMIT) {
+ return result.append(msgString, prevIndex, index-prevIndex);
+ } else if(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
+ result.append(msgString, prevIndex, index-prevIndex);
+ prevIndex=part.getLimit();
+ } else if(type==UMSGPAT_PART_TYPE_ARG_START) {
+ result.append(msgString, prevIndex, index-prevIndex);
+ prevIndex=index;
+ i=msgPattern.getLimitPartIndex(i);
+ index=msgPattern.getPart(i).getLimit();
+ appendReducedApostrophes(msgString, prevIndex, index, result);
+ prevIndex=index;
+ }
+ }
+}
+
+U_NAMESPACE_END
+
+#endif // !UCONFIG_NO_FORMATTING
diff --git a/thirdparty/icu4c/common/msvcres.h b/thirdparty/icu4c/common/msvcres.h
new file mode 100644
index 0000000000..0cace85e74
--- /dev/null
+++ b/thirdparty/icu4c/common/msvcres.h
@@ -0,0 +1,25 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+//{{NO_DEPENDENCIES}}
+// Copyright (c) 2003-2010 International Business Machines
+// Corporation and others. All Rights Reserved.
+//
+// Used by common.rc and other .rc files.
+//Do not edit with Microsoft Developer Studio because it will modify this
+//header the wrong way. This is here to prevent Visual Studio .NET from
+//unnessarily building the resource files when it's not needed.
+//
+
+/*
+These are defined before unicode/uversion.h in order to prevent
+STLPort's broken stddef.h from being used when rc.exe parses this file.
+*/
+#define _STLP_OUTERMOST_HEADER_ID 0
+#define _STLP_WINCE 1
+
+#include "unicode/uversion.h"
+
+#define ICU_WEBSITE "http://icu-project.org"
+#define ICU_COMPANY "The ICU Project"
+#define ICU_PRODUCT_PREFIX "ICU"
+#define ICU_PRODUCT "International Components for Unicode"
diff --git a/thirdparty/icu4c/common/mutex.h b/thirdparty/icu4c/common/mutex.h
new file mode 100644
index 0000000000..44b1f90ba0
--- /dev/null
+++ b/thirdparty/icu4c/common/mutex.h
@@ -0,0 +1,77 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*/
+//----------------------------------------------------------------------------
+// File: mutex.h
+//
+// Lightweight C++ wrapper for umtx_ C mutex functions
+//
+// Author: Alan Liu 1/31/97
+// History:
+// 06/04/97 helena Updated setImplementation as per feedback from 5/21 drop.
+// 04/07/1999 srl refocused as a thin wrapper
+//
+//----------------------------------------------------------------------------
+#ifndef MUTEX_H
+#define MUTEX_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "umutex.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Mutex is a helper class for convenient locking and unlocking of a UMutex.
+ *
+ * Creating a local scope Mutex will lock a UMutex, holding the lock until the Mutex
+ * goes out of scope.
+ *
+ * If no UMutex is specified, the ICU global mutex is implied.
+ *
+ * For example:
+ *
+ * static UMutex myMutex;
+ *
+ * void Function(int arg1, int arg2)
+ * {
+ * static Object* foo; // Shared read-write object
+ * Mutex mutex(&myMutex); // or no args for the global lock
+ * foo->Method();
+ * // When 'mutex' goes out of scope and gets destroyed here, the lock is released
+ * }
+ *
+ * Note: Do NOT use the form 'Mutex mutex();' as that merely forward-declares a function
+ * returning a Mutex. This is a common mistake which silently slips through the
+ * compiler!!
+ */
+
+class U_COMMON_API Mutex : public UMemory {
+public:
+ Mutex(UMutex *mutex = nullptr) : fMutex(mutex) {
+ umtx_lock(fMutex);
+ }
+ ~Mutex() {
+ umtx_unlock(fMutex);
+ }
+
+ Mutex(const Mutex &other) = delete; // forbid assigning of this class
+ Mutex &operator=(const Mutex &other) = delete; // forbid copying of this class
+ void *operator new(size_t s) = delete; // forbid heap allocation. Locals only.
+
+private:
+ UMutex *fMutex;
+};
+
+
+U_NAMESPACE_END
+
+#endif //_MUTEX_
+//eof
diff --git a/thirdparty/icu4c/common/norm2_nfc_data.h b/thirdparty/icu4c/common/norm2_nfc_data.h
new file mode 100644
index 0000000000..455cc0c428
--- /dev/null
+++ b/thirdparty/icu4c/common/norm2_nfc_data.h
@@ -0,0 +1,1149 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+//
+// Copyright (C) 1999-2016, International Business Machines
+// Corporation and others. All Rights Reserved.
+//
+// file name: norm2_nfc_data.h
+//
+// machine-generated by: icu/source/tools/gennorm2/n2builder.cpp
+
+
+#ifdef INCLUDED_FROM_NORMALIZER2_CPP
+
+static const UVersionInfo norm2_nfc_data_formatVersion={4,0,0,0};
+static const UVersionInfo norm2_nfc_data_dataVersion={0xd,0,0,0};
+
+static const int32_t norm2_nfc_data_indexes[Normalizer2Impl::IX_COUNT]={
+0x50,0x4bac,0x8814,0x8914,0x8914,0x8914,0x8914,0x8914,0xc0,0x300,0xae2,0x29e0,0x3c66,0xfc00,0x1288,0x3b9c,
+0x3c34,0x3c66,0x300,0
+};
+
+static const uint16_t norm2_nfc_data_trieIndex[1746]={
+0,0x40,0x7b,0xbb,0xfb,0x13a,0x17a,0x1b2,0x1f2,0x226,0x254,0x226,0x294,0x2d4,0x313,0x353,
+0x393,0x3d2,0x40f,0x44e,0x226,0x226,0x488,0x4c8,0x4f8,0x530,0x226,0x570,0x59f,0x5de,0x226,0x5f3,
+0x631,0x65f,0x226,0x68c,0x6cc,0x709,0x729,0x768,0x7a7,0x7e4,0x803,0x840,0x729,0x879,0x8a7,0x8e6,
+0x226,0x920,0x937,0x977,0x98e,0x9cd,0x226,0xa03,0xa23,0xa5e,0xa6a,0xaa5,0xacd,0xb0a,0xb4a,0xb84,
+0xb9f,0x226,0xbda,0x226,0xc1a,0xc39,0xc6f,0xcac,0x226,0x226,0x226,0x226,0x226,0xccf,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xcfb,0x226,0x226,0xd30,
+0x226,0x226,0xd4e,0x226,0xd78,0x226,0x226,0x226,0xdb4,0xdd4,0xe14,0xe53,0xe8e,0xece,0xf02,0xf2e,
+0x808,0x226,0x226,0xf62,0x226,0x226,0x226,0xfa2,0xfe2,0x1022,0x1062,0x10a2,0x10e2,0x1122,0x1162,0x11a2,
+0x11e2,0x226,0x226,0x1212,0x1243,0x226,0x1273,0x12a6,0x12e3,0x1322,0x1362,0x1398,0x13c6,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x13f1,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0xcbd,0x226,0x140e,0x226,0x144e,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x148e,0x14c8,0x1506,0x1546,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1585,0x15c3,0x15e3,0x226,0x226,0x226,0x226,
+0x161d,0x226,0x226,0x1645,0x1677,0x16a5,0x80c,0x16b8,0x226,0x226,0x16c8,0x1708,0x226,0x226,0x226,0x1420,
+0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,
+0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,
+0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,
+0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,
+0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,
+0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,
+0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,
+0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,
+0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,
+0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,
+0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x1794,0x226,
+0x17d4,0x180f,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x184f,0x188f,0x18cf,0x190f,0x194f,0x198f,0x19cf,0x1a0f,0x1a32,0x1a72,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1a92,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x655,0x664,0x67c,0x69b,0x6b0,0x6b0,0x6b0,0x6b4,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xbda,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x54f,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x40c,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1ac5,0x226,0x226,0x1ad5,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0xdc6,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1ae5,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x15d6,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x1aef,0x54f,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x7eb,0x226,0x226,
+0x9ba,0x226,0x1aff,0x1b0c,0x1b18,0x226,0x226,0x226,0x226,0x414,0x226,0x1b23,0x1b33,0x226,0x226,0x226,
+0x7e0,0x226,0x226,0x226,0x226,0x1b43,0x226,0x226,0x226,0x1b4e,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x1b55,0x226,0x226,0x226,0x226,0x1b60,0x1b6f,0x8f6,0x1b7d,0x412,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x1b8b,0x798,0x226,0x226,0x226,0x226,0x226,0x1b9b,0x1baa,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x8d6,0x1bb2,0x1bc2,0x226,
+0x226,0x226,0x9ba,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1bcc,0x226,0x226,0x226,0x226,0x226,
+0x226,0x7e6,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1bc9,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1bdc,
+0x7e0,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x84d,0x226,0x226,0x226,0x7ed,0x7ea,
+0x226,0x226,0x226,0x226,0x7e8,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x9ba,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xbd4,0x226,0x226,0x226,
+0x226,0x7ea,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x1bec,0x226,0x226,0x226,0xefb,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x1bfc,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1bfe,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x1c0d,0x1c1d,0x1c2b,0x1c38,0x226,0x1c44,0x1c52,0x1c62,0x226,0x226,
+0x226,0x226,0xcea,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1c72,0x1c7a,
+0x1c88,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0xefb,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x4fc,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x1c98,0x226,0x226,0x226,0x226,0x226,0x226,0x1ca4,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x1cb4,0x1cc4,0x1cd4,0x1ce4,0x1cf4,0x1d04,0x1d14,0x1d24,0x1d34,0x1d44,0x1d54,
+0x1d64,0x1d74,0x1d84,0x1d94,0x1da4,0x1db4,0x1dc4,0x1dd4,0x1de4,0x1df4,0x1e04,0x1e14,0x1e24,0x1e34,0x1e44,0x1e54,
+0x1e64,0x1e74,0x1e84,0x1e94,0x1ea4,0x1eb4,0x1ec4,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x408,0x428,0xc4,0xc4,0xc4,0x448,0x457,0x46d,0x489,0x4a6,0x4c2,
+0x4df,0x4fc,0x51b,0x538,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,
+0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x552,0xc4,0x566,0xc4,0xc4,0xc4,0xc4,
+0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,
+0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x586,0xc4,0xc4,0xc4,0xc4,0xc4,
+0xc4,0xc4,0xc4,0x591,0x5ae,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x5ce,0x5e2,0xc4,0xc4,0x5f5,
+0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,
+0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,
+0x615,0x635
+};
+
+static const uint16_t norm2_nfc_data_trieData[7892]={
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,4,8,0xc,1,
+1,0x10,0x50,0x5c,0x70,0x88,0xcc,0xd0,0xec,0x108,0x144,0x148,0x15c,0x174,0x180,0x1a4,
+0x1e4,1,0x1ec,0x20c,0x228,0x244,0x290,0x298,0x2b0,0x2b8,0x2dc,1,1,1,1,1,
+1,0x2f4,0x334,0x340,0x354,0x36c,0x3b0,0x3b4,0x3d0,0x3f0,0x428,0x430,0x444,0x45c,0x468,0x48c,
+0x4cc,1,0x4d4,0x4f4,0x510,0x530,0x57c,0x584,0x5a0,0x5a8,0x5d0,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,0x5e8,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,0x128a,0x1290,0xae4,0x1296,0xafa,
+0xb04,0x5f4,0xb0e,0x129c,0x12a2,0xb18,0x12a8,0x12ae,0x12b4,0x12ba,0xb2e,1,0x12c0,0x12c6,0x12cc,0xb38,
+0xb4e,0xb60,1,0x5fc,0x12d2,0x12d8,0x12de,0xb6a,0x12e4,1,1,0x12ea,0x12f0,0xb80,0x12f6,0xb96,
+0xba0,0x600,0xbaa,0x12fc,0x1302,0xbb4,0x1308,0x130e,0x1314,0x131a,0xbca,1,0x1320,0x1326,0x132c,0xbd4,
+0xbea,0xbfc,1,0x608,0x1332,0x1338,0x133e,0xc06,0x1344,1,0x134a,0x1350,0x1356,0xc1c,0xc32,0x135d,
+0x1363,0x1368,0x136e,0x1374,0x137a,0x1380,0x1386,0x138c,0x1392,0x1398,0x139e,1,1,0xc48,0xc56,0x13a4,
+0x13aa,0x13b0,0x13b6,0x13bd,0x13c3,0x13c8,0x13ce,0x13d4,0x13da,0x13e0,0x13e6,0x13ec,0x13f2,0x13f9,0x13ff,0x1404,
+0x140a,1,1,0x1410,0x1416,0x141c,0x1422,0x1428,0x142e,0x1435,0x143b,0x1440,1,1,1,0x1447,
+0x144d,0x1453,0x1459,1,0x145e,0x1464,0x146b,0x1471,0x1476,0x147c,1,1,1,0x1482,0x1488,0x148f,
+0x1495,0x149a,0x14a0,1,1,1,0xc64,0xc72,0x14a6,0x14ac,0x14b2,0x14b8,1,1,0x14be,0x14c4,
+0x14cb,0x14d1,0x14d6,0x14dc,0xc80,0xc8a,0x14e2,0x14e8,0x14ef,0x14f5,0xc94,0xc9e,0x14fb,0x1501,0x1506,0x150c,
+1,1,0xca8,0xcb2,0xcbc,0xcc6,0x1512,0x1518,0x151e,0x1524,0x152a,0x1530,0x1537,0x153d,0x1542,0x1548,
+0x154e,0x1554,0x155a,0x1560,0x1566,0x156c,0x1572,0x1578,0x157e,0x60c,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,0xcd0,0xcea,1,1,1,1,
+1,1,1,1,1,1,1,1,1,0xd04,0xd1e,1,1,1,1,1,
+1,0x610,1,1,1,1,1,1,1,1,1,1,1,1,1,0x1584,
+0x158a,0x1590,0x1596,0x159c,0x15a2,0x15a8,0x15ae,0x15b6,0x15c0,0x15ca,0x15d4,0x15de,0x15e8,0x15f2,0x15fc,1,
+0x1606,0x1610,0x161a,0x1624,0x162d,0x1633,1,1,0x1638,0x163e,0x1644,0x164a,0xd38,0xd42,0x1653,0x165d,
+0x1665,0x166b,0x1671,1,1,1,0x1676,0x167c,1,1,0x1682,0x1688,0x1690,0x169a,0x16a3,0x16a9,
+0x16af,0x16b5,0x16ba,0x16c0,0x16c6,0x16cc,0x16d2,0x16d8,0x16de,0x16e4,0x16ea,0x16f0,0x16f6,0x16fc,0x1702,0x1708,
+0x170e,0x1714,0x171a,0x1720,0x1726,0x172c,0x1732,0x1738,0x173e,0x1744,0x174a,0x1750,0x1756,0x175c,1,1,
+0x1762,0x1768,1,1,1,1,1,1,0xd4c,0xd56,0xd60,0xd6a,0x1770,0x177a,0x1784,0x178e,
+0xd74,0xd7e,0x1798,0x17a2,0x17aa,0x17b0,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,0x614,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xffcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,
+0xfdcc,0xffcc,0xffcc,0xfdcc,0xffcc,0xfdcc,0xffcc,0xfdcc,0xfdcc,0xffd0,0xffb8,0xffb8,0xffb8,0xffb8,0xffd0,0xfdb0,
+0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xff94,0xff94,0xfdb8,0xfdb8,0xfdb8,0xfdb8,0xfd94,0xfd94,0xffb8,0xffb8,0xffb8,
+0xffb8,0xfdb8,0xfdb8,0xffb8,0xfdb8,0xfdb8,0xffb8,0xffb8,0xfe02,0xfe02,0xfe02,0xfe02,0xfc02,0xffb8,0xffb8,0xffb8,
+0xffb8,0xffcc,0xffcc,0xffcc,0x3c36,0x3c3c,0xfdcc,0x3c42,0x3c48,0xfde0,0xffcc,0xffb8,0xffb8,0xffb8,0xffcc,0xffcc,
+0xffcc,0xffb8,0xffb8,1,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,0xffd0,0xffb8,0xffb8,0xffcc,
+0xffd2,0xffd4,0xffd4,0xffd2,0xffd4,0xffd4,0xffd2,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
+0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,0x29e1,1,1,1,1,1,1,1,
+1,1,0x29e5,1,1,1,1,1,0x17b7,0x17bd,0x29e9,0x17c3,0x17c9,0x17cf,1,0x17d5,
+1,0x17db,0x17e1,0x17e9,0x618,1,1,1,0x634,1,0x644,1,0x658,1,1,1,
+1,1,0x674,1,0x684,1,1,1,0x688,1,1,1,0x6a0,0x17f1,0x17f7,0xd88,
+0x17fd,0xd92,0x1803,0x180b,0x6b4,1,1,1,0x6d4,1,0x6e4,1,0x6fc,1,1,1,
+1,1,0x71c,1,0x72c,1,1,1,0x734,1,1,1,0x754,0xd9c,0xdae,0x1813,
+0x1819,0xdc0,1,1,1,0x76c,0x181f,0x1825,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,0x182b,0x1831,1,0x1837,1,1,0x774,0x183d,1,1,1,1,0x1843,
+0x1849,0x184f,1,0x778,1,1,0x780,1,0x784,0x790,0x798,0x79c,0x1855,0x7ac,1,1,
+1,0x7b0,1,1,1,1,0x7b4,1,1,1,0x7c4,1,1,1,0x7c8,1,
+0x7cc,1,1,0x7d0,1,1,0x7d8,1,0x7dc,0x7e8,0x7f0,0x7f4,0x185b,0x804,1,1,
+1,0x808,1,1,1,0x80c,1,1,1,0x81c,1,1,1,0x820,1,0x824,
+1,1,0x1861,0x1867,1,0x186d,1,1,0x828,0x1873,1,1,1,1,0x1879,0x187f,
+0x1885,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,0x82c,0x830,0x188b,0x1891,1,1,1,1,1,1,
+1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x1897,
+0x189d,1,1,1,1,1,1,1,1,1,1,1,1,1,0x18a3,0x18a9,
+0x18af,0x18b5,1,1,0x18bb,0x18c1,0x834,0x838,0x18c7,0x18cd,0x18d3,0x18d9,0x18df,0x18e5,1,1,
+0x18eb,0x18f1,0x18f7,0x18fd,0x1903,0x1909,0x83c,0x840,0x190f,0x1915,0x191b,0x1921,0x1927,0x192d,0x1933,0x1939,
+0x193f,0x1945,0x194b,0x1951,1,1,0x1957,0x195d,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,
+0xffcc,0xffcc,0xffbc,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,
+0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffbc,0xffc8,0xffcc,0xfe14,0xfe16,0xfe18,0xfe1a,0xfe1c,0xfe1e,0xfe20,0xfe22,
+0xfe24,0xfe26,0xfe26,0xfe28,0xfe2a,0xfe2c,1,0xfe2e,1,0xfe30,0xfe32,1,0xffcc,0xffb8,1,0xfe24,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
+0xfe3c,0xfe3e,0xfe40,1,1,1,1,1,1,1,0x1962,0x1968,0x196f,0x1975,0x197b,0x844,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,0x850,1,0x854,0xfe36,0xfe38,0xfe3a,0xfe3c,0xfe3e,
+0xfe40,0xfe42,0xfe44,0xfdcc,0xfdcc,0xfdb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+0xfe46,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+0x1981,0x858,0x1987,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,0x85c,0x198d,1,0x860,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,0xffcc,
+0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,1,1,0xffcc,0xffcc,1,0xffb8,0xffcc,0xffcc,0xffb8,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+0xfe48,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,
+0xffb8,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffcc,0xffb8,0xffb8,0xffcc,0xffb8,0xffcc,0xffcc,
+0xffb8,0xffcc,0xffb8,0xffcc,0xffb8,0xffcc,0xffb8,0xffcc,0xffcc,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,
+0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,1,1,1,1,1,1,1,1,1,
+0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc,
+0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,0xffb8,0xffb8,0xffb8,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffb8,
+0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffb8,
+0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xfe36,0xfe38,0xfe3a,0xffcc,
+0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,0x864,0x1993,1,1,1,1,1,1,0x868,0x1999,1,0x86c,
+0x199f,1,1,1,1,1,1,1,0xfc0e,1,1,1,1,1,1,1,
+1,1,1,1,1,1,0xfe12,1,1,1,0xffcc,0xffb8,0xffcc,0xffcc,1,1,
+1,0x29ec,0x29f2,0x29f8,0x29fe,0x2a04,0x2a0a,0x2a10,0x2a16,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,0xfe0e,1,0xfc00,1,1,1,1,1,1,1,0x870,
+1,1,1,0x19a5,0x19ab,0xfe12,1,1,1,1,1,1,1,1,1,0xfc00,
+1,1,1,1,0x2a1c,0x2a22,1,0x2a28,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,0xffcc,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,0x2a2e,1,1,0x2a34,1,1,
+1,1,1,0xfe0e,1,1,1,1,1,1,1,1,1,1,1,1,
+1,0xfe12,1,1,1,1,1,1,1,1,1,1,1,0x2a3a,0x2a40,0x2a46,
+1,1,0x2a4c,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe0e,
+1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+0x878,0x19b1,1,1,0x19b7,0x19bd,0xfe12,1,1,1,1,1,1,1,1,0xfc00,
+0xfc00,1,1,1,1,0x2a52,0x2a58,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,0x884,1,0x19c3,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,0xfc00,1,1,1,1,1,1,0x888,0x890,1,1,
+0x19c9,0x19cf,0x19d5,0xfe12,1,1,1,1,1,1,1,1,1,0xfc00,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,0x894,1,0x19db,1,1,1,1,0xfe12,1,1,
+1,1,1,1,1,0xfea8,0xfcb6,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,0xfe0e,1,1,0x898,0x19e1,1,0xfc00,1,1,1,0x89c,0x19e7,0x19ed,
+1,0xdca,0x19f5,1,0xfe12,1,1,1,1,1,1,1,0xfc00,0xfc00,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,0xfe12,0xfe12,1,0xfc00,1,1,1,
+1,1,1,0x8a8,0x8b0,1,1,0x19fd,0x1a03,0x1a09,0xfe12,1,1,1,1,1,
+1,1,1,1,0xfc00,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,0xfc12,1,1,
+1,1,0xfc00,1,1,1,1,1,1,1,1,1,0x8b4,0x1a0f,1,0xdd4,
+0x1a17,0x1a1f,0xfc00,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,0xfece,0xfece,0xfe12,1,1,
+1,1,1,1,1,1,0xfed6,0xfed6,0xfed6,0xfed6,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,0xfeec,0xfeec,0xfe12,1,1,1,1,1,1,1,1,0xfef4,0xfef4,0xfef4,
+0xfef4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,0xffb8,0xffb8,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,0xffb8,1,0xffb8,1,0xffb0,1,1,1,1,1,1,0x2a5f,1,1,
+1,1,1,1,1,1,1,0x2a65,1,1,1,1,0x2a6b,1,1,1,
+1,0x2a71,1,1,1,1,0x2a77,1,1,1,1,1,1,1,1,1,
+1,1,1,0x2a7d,1,1,1,1,1,1,1,0xff02,0xff04,0x3c50,0xff08,0x3c58,
+0x2a82,1,0x2a88,1,0xff04,0xff04,0xff04,0xff04,1,1,0xff04,0x3c60,0xffcc,0xffcc,0xfe12,1,
+0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,0x2a8f,1,1,
+1,1,1,1,1,1,1,0x2a95,1,1,1,1,0x2a9b,1,1,1,
+1,0x2aa1,1,1,1,1,0x2aa7,1,1,1,1,1,1,1,1,1,
+1,1,1,0x2aad,1,1,1,1,1,1,0xffb8,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,0x8c0,0x1a25,1,1,1,1,1,1,1,0xfc00,1,1,
+1,1,1,1,1,1,0xfe0e,1,0xfe12,0xfe12,1,1,1,1,1,1,
+1,1,1,1,1,1,1,0xffb8,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,
+0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,
+0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,0xfe12,1,1,1,1,1,1,1,1,1,1,0xffcc,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,0xffc8,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,0xffbc,0xffcc,0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffb8,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,0xfe12,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
+0xffcc,1,1,0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,
+0xffcc,0xffb8,1,0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,0x8c4,0x1a2b,0x8c8,0x1a31,0x8cc,0x1a37,0x8d0,0x1a3d,0x8d4,0x1a43,1,1,0x8d8,
+0x1a49,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,0xfe0e,0xfc00,1,1,1,1,0x8dc,0x1a4f,0x8e0,0x1a55,0x8e4,0x8e8,0x1a5b,0x1a61,
+0x8ec,0x1a67,0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
+0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,0xfe12,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,0xfe0e,1,1,1,1,1,1,1,1,1,1,1,
+0xfe12,0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,0xffcc,0xffcc,0xffcc,1,0xfe02,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,0xffcc,0xffb8,0xffb8,
+0xffb8,0xffb8,0xffcc,1,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,1,1,1,1,0xffb8,
+1,1,1,1,1,1,0xffcc,1,1,1,0xffcc,0xffcc,1,1,1,1,
+1,1,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffd4,
+0xffac,0xffb8,0xff94,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
+0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
+0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffd0,0xffc8,0xffc8,0xffb8,1,0xffcc,0xffd2,0xffb8,
+0xffcc,0xffb8,0x1a6c,0x1a72,0x1a78,0x1a7e,0x1a85,0x1a8b,0x1a91,0x1a97,0x1a9f,0x1aa9,0x1ab0,0x1ab6,0x1abc,0x1ac2,
+0x1ac8,0x1ace,0x1ad5,0x1adb,0x1ae0,0x1ae6,0x1aee,0x1af8,0x1b02,0x1b0c,0x1b14,0x1b1a,0x1b20,0x1b26,0x1b2f,0x1b39,
+0x1b41,0x1b47,0x1b4c,0x1b52,0x1b58,0x1b5e,0x1b64,0x1b6a,0x1b70,0x1b76,0x1b7d,0x1b83,0x1b88,0x1b8e,0x1b94,0x1b9a,
+0x1ba2,0x1bac,0x1bb4,0x1bba,0x1bc0,0x1bc6,0x1bcc,0x1bd2,0xdde,0xde8,0x1bda,0x1be4,0x1bec,0x1bf2,0x1bf8,0x1bfe,
+0x1c04,0x1c0a,0x1c10,0x1c16,0x1c1d,0x1c23,0x1c28,0x1c2e,0x1c34,0x1c3a,0x1c40,0x1c46,0x1c4c,0x1c52,0x1c5a,0x1c64,
+0x1c6e,0x1c78,0x1c82,0x1c8c,0x1c96,0x1ca0,0x1ca9,0x1caf,0x1cb5,0x1cbb,0x1cc0,0x1cc6,0xdf2,0xdfc,0x1cce,0x1cd8,
+0x1ce0,0x1ce6,0x1cec,0x1cf2,0xe06,0xe10,0x1cfa,0x1d04,0x1d0e,0x1d18,0x1d22,0x1d2c,0x1d34,0x1d3a,0x1d40,0x1d46,
+0x1d4c,0x1d52,0x1d58,0x1d5e,0x1d64,0x1d6a,0x1d70,0x1d76,0x1d7c,0x1d82,0x1d8a,0x1d94,0x1d9e,0x1da8,0x1db0,0x1db6,
+0x1dbd,0x1dc3,0x1dc8,0x1dce,0x1dd4,0x1dda,0x1de0,0x1de6,0x1dec,0x1df2,0x1df9,0x1dff,0x1e05,0x1e0b,0x1e11,0x1e17,
+0x1e1c,0x1e22,0x1e28,0x1e2e,0x1e35,0x1e3b,0x1e41,0x1e47,0x1e4c,0x1e52,0x1e58,0x1e5e,1,0x1e65,1,1,
+1,1,0xe1a,0xe28,0x1e6a,0x1e70,0x1e78,0x1e82,0x1e8c,0x1e96,0x1ea0,0x1eaa,0x1eb4,0x1ebe,0x1ec8,0x1ed2,
+0x1edc,0x1ee6,0x1ef0,0x1efa,0x1f04,0x1f0e,0x1f18,0x1f22,0x1f2c,0x1f36,0xe36,0xe40,0x1f3e,0x1f44,0x1f4a,0x1f50,
+0x1f58,0x1f62,0x1f6c,0x1f76,0x1f80,0x1f8a,0x1f94,0x1f9e,0x1fa8,0x1fb2,0x1fba,0x1fc0,0x1fc6,0x1fcc,0xe4a,0xe54,
+0x1fd2,0x1fd8,0x1fe0,0x1fea,0x1ff4,0x1ffe,0x2008,0x2012,0x201c,0x2026,0x2030,0x203a,0x2044,0x204e,0x2058,0x2062,
+0x206c,0x2076,0x2080,0x208a,0x2094,0x209e,0x20a6,0x20ac,0x20b2,0x20b8,0x20c0,0x20ca,0x20d4,0x20de,0x20e8,0x20f2,
+0x20fc,0x2106,0x2110,0x211a,0x2122,0x2128,0x212f,0x2135,0x213a,0x2140,0x2146,0x214c,1,1,1,1,
+1,1,0xe5e,0xe74,0xe8c,0xe9a,0xea8,0xeb6,0xec4,0xed2,0xede,0xef4,0xf0c,0xf1a,0xf28,0xf36,
+0xf44,0xf52,0xf5e,0xf6c,0x2155,0x215f,0x2169,0x2173,1,1,0xf7a,0xf88,0x217d,0x2187,0x2191,0x219b,
+1,1,0xf96,0xfac,0xfc4,0xfd2,0xfe0,0xfee,0xffc,0x100a,0x1016,0x102c,0x1044,0x1052,0x1060,0x106e,
+0x107c,0x108a,0x1096,0x10a8,0x21a5,0x21af,0x21b9,0x21c3,0x21cd,0x21d7,0x10ba,0x10cc,0x21e1,0x21eb,0x21f5,0x21ff,
+0x2209,0x2213,0x10de,0x10ec,0x221d,0x2227,0x2231,0x223b,1,1,0x10fa,0x1108,0x2245,0x224f,0x2259,0x2263,
+1,1,0x1116,0x1128,0x226d,0x2277,0x2281,0x228b,0x2295,0x229f,1,0x113a,1,0x22a9,1,0x22b3,
+1,0x22bd,0x114c,0x1162,0x117a,0x1188,0x1196,0x11a4,0x11b2,0x11c0,0x11cc,0x11e2,0x11fa,0x1208,0x1216,0x1224,
+0x1232,0x1240,0x124c,0x3b9e,0x22c5,0x3ba6,0x1256,0x3bae,0x22cb,0x3bb6,0x22d1,0x3bbe,0x22d7,0x3bc6,0x1260,0x3bce,
+1,1,0x22de,0x22e8,0x22f7,0x2307,0x2317,0x2327,0x2337,0x2347,0x2352,0x235c,0x236b,0x237b,0x238b,0x239b,
+0x23ab,0x23bb,0x23c6,0x23d0,0x23df,0x23ef,0x23ff,0x240f,0x241f,0x242f,0x243a,0x2444,0x2453,0x2463,0x2473,0x2483,
+0x2493,0x24a3,0x24ae,0x24b8,0x24c7,0x24d7,0x24e7,0x24f7,0x2507,0x2517,0x2522,0x252c,0x253b,0x254b,0x255b,0x256b,
+0x257b,0x258b,0x2595,0x259b,0x25a3,0x25aa,0x25b3,1,0x126a,0x25bd,0x25c5,0x25cb,0x25d1,0x3bd6,0x25d6,1,
+0x2ab2,0x8f0,1,0x25dd,0x25e5,0x25ec,0x25f5,1,0x1274,0x25ff,0x2607,0x3bde,0x260d,0x3be6,0x2612,0x2619,
+0x261f,0x2625,0x262b,0x2631,0x2639,0x3bf0,1,1,0x2641,0x2649,0x2651,0x2657,0x265d,0x3bfa,1,0x2663,
+0x2669,0x266f,0x2675,0x267b,0x2683,0x3c04,0x268b,0x2691,0x2697,0x269f,0x26a7,0x26ad,0x26b3,0x3c0e,0x26b9,0x26bf,
+0x3c16,0x2ab7,1,1,0x26c7,0x26ce,0x26d7,1,0x127e,0x26e1,0x26e9,0x3c1e,0x26ef,0x3c26,0x26f4,0x2abb,
+0x8fc,1,0xfa09,0xfa09,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,0xffcc,0xffcc,0xfe02,0xfe02,0xffcc,0xffcc,0xffcc,0xffcc,0xfe02,0xfe02,0xfe02,0xffcc,0xffcc,1,
+1,1,1,0xffcc,1,1,1,0xfe02,0xfe02,0xffcc,0xffb8,0xffcc,0xfe02,0xfe02,0xffb8,0xffb8,
+0xffb8,0xffb8,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,0x2abe,1,1,1,0x2ac2,0x3c2e,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,0x908,1,0x90c,1,0x910,1,1,1,1,1,0x26fb,0x2701,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,0x2707,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,0x270d,0x2713,0x2719,0x914,1,0x918,1,0x91c,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,0x920,0x271f,1,1,1,0x924,0x2725,1,0x928,0x272b,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,0x92c,0x2731,0x930,0x2737,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x934,
+1,1,1,0x273d,1,0x938,0x2743,0x93c,1,0x2749,0x940,0x274f,1,1,1,0x944,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,0x2755,0x948,0x275b,1,0x94c,0x950,1,1,1,1,1,1,1,0x2761,
+0x2767,0x276d,0x2773,0x2779,0x954,0x958,0x277f,0x2785,0x95c,0x960,0x278b,0x2791,0x964,0x968,0x96c,0x970,
+1,1,0x2797,0x279d,0x974,0x978,0x27a3,0x27a9,0x97c,0x980,0x27af,0x27b5,1,1,1,1,
+1,1,1,0x984,0x988,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,0x98c,1,1,1,1,1,0x990,0x994,1,0x998,0x27bb,0x27c1,
+0x27c7,0x27cd,1,1,0x99c,0x9a0,0x9a4,0x9a8,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,0x27d3,0x27d9,0x27df,0x27e5,1,1,1,1,
+1,1,0x27eb,0x27f1,0x27f7,0x27fd,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x2ac7,
+0x2acb,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,0x2acf,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,
+0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
+0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,0xffb4,0xffc8,0xffd0,0xffbc,0xffc0,0xffc0,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x9ac,1,
+1,1,1,0x9b0,0x2803,0x9b4,0x2809,0x9b8,0x280f,0x9bc,0x2815,0x9c0,0x281b,0x9c4,0x2821,0x9c8,
+0x2827,0x9cc,0x282d,0x9d0,0x2833,0x9d4,0x2839,0x9d8,0x283f,0x9dc,0x2845,1,0x9e0,0x284b,0x9e4,0x2851,
+0x9e8,0x2857,1,1,1,1,1,0x9ec,0x285d,0x2863,0x9f4,0x2869,0x286f,0x9fc,0x2875,0x287b,
+0xa04,0x2881,0x2887,0xa0c,0x288d,0x2893,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,0x2899,1,1,1,1,0xfc10,
+0xfc10,1,1,0xa14,0x289f,1,1,1,1,1,1,1,0xa18,1,1,1,
+1,0xa1c,0x28a5,0xa20,0x28ab,0xa24,0x28b1,0xa28,0x28b7,0xa2c,0x28bd,0xa30,0x28c3,0xa34,0x28c9,0xa38,
+0x28cf,0xa3c,0x28d5,0xa40,0x28db,0xa44,0x28e1,0xa48,0x28e7,1,0xa4c,0x28ed,0xa50,0x28f3,0xa54,0x28f9,
+1,1,1,1,1,0xa58,0x28ff,0x2905,0xa60,0x290b,0x2911,0xa68,0x2917,0x291d,0xa70,0x2923,
+0x2929,0xa78,0x292f,0x2935,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,0xa80,0xa84,0xa88,0xa8c,1,0x293b,1,1,0x2941,0x2947,0x294d,
+0x2953,1,1,0xa90,0x2959,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,0xffcc,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
+0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,0xfe12,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
+0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,0xffb8,0xffb8,0xffb8,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,0xfe12,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,0xffcc,1,0xffcc,0xffcc,0xffb8,1,1,0xffcc,
+0xffcc,1,1,1,1,1,0xffcc,0xffcc,1,0xffcc,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1,
+1,1,1,1,1,1,1,1,0xae2,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,
+0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,
+0x1289,0x1289,0x1289,0x1289,0xae2,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,
+0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,
+0xae2,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,
+0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0xae2,0x1289,0x1289,0x1289,
+0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,
+0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,0x3c66,1,0x3c66,0x3c66,0x3c66,0x3c66,0x3c66,0x3c66,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x3c66,0x3c66,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,0x3c66,1,1,1,1,0x3c66,1,1,1,0x3c66,1,0x3c66,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,0x3b97,1,0x2ad5,
+0x2ad9,0x2add,0x2ae1,0x2ae5,0x2ae9,0x2aed,0x2af1,0x2af1,0x2af5,0x2af9,0x2afd,0x2b01,0x2b05,0x2b09,0x2b0d,0x2b11,
+0x2b15,0x2b19,0x2b1d,0x2b21,0x2b25,0x2b29,0x2b2d,0x2b31,0x2b35,0x2b39,0x2b3d,0x2b41,0x2b45,0x2b49,0x2b4d,0x2b51,
+0x2b55,0x2b59,0x2b5d,0x2b61,0x2b65,0x2b69,0x2b6d,0x2b71,0x2b75,0x2b79,0x2b7d,0x2b81,0x2b85,0x2b89,0x2b8d,0x2b91,
+0x2b95,0x2b99,0x2b9d,0x2ba1,0x2ba5,0x2ba9,0x2bad,0x2bb1,0x2bb5,0x2bb9,0x2bbd,0x2bc1,0x2bc5,0x2bc9,0x2bcd,0x2bd1,
+0x2bd5,0x2bd9,0x2bdd,0x2be1,0x2be5,0x2be9,0x2bed,0x2bf1,0x2bf5,0x2bf9,0x2bfd,0x2c01,0x2c05,0x2c09,0x2c0d,0x2c11,
+0x2c15,0x2c19,0x2c1d,0x2c21,0x2c25,0x2c29,0x2c2d,0x2c31,0x2c35,0x2c39,0x2c3d,0x2b21,0x2c41,0x2c45,0x2c49,0x2c4d,
+0x2c51,0x2c55,0x2c59,0x2c5d,0x2c61,0x2c65,0x2c69,0x2c6d,0x2c71,0x2c75,0x2c79,0x2c7d,0x2c81,0x2c85,0x2c89,0x2c8d,
+0x2c91,0x2c95,0x2c99,0x2c9d,0x2ca1,0x2ca5,0x2ca9,0x2cad,0x2cb1,0x2cb5,0x2cb9,0x2cbd,0x2cc1,0x2cc5,0x2cc9,0x2ccd,
+0x2cd1,0x2cd5,0x2cd9,0x2cdd,0x2ce1,0x2ce5,0x2ce9,0x2ced,0x2cf1,0x2cf5,0x2cf9,0x2cfd,0x2d01,0x2d05,0x2d09,0x2d0d,
+0x2d11,0x2d15,0x2d19,0x2d1d,0x2d21,0x2d25,0x2d29,0x2d2d,0x2d31,0x2d35,0x2d39,0x2d3d,0x2d41,0x2d45,0x2d49,0x2d4d,
+0x2c89,0x2d51,0x2d55,0x2d59,0x2d5d,0x2d61,0x2d65,0x2d69,0x2d6d,0x2c49,0x2d71,0x2d75,0x2d79,0x2d7d,0x2d81,0x2d85,
+0x2d89,0x2d8d,0x2d91,0x2d95,0x2d99,0x2d9d,0x2da1,0x2da5,0x2da9,0x2dad,0x2db1,0x2db5,0x2db9,0x2dbd,0x2b21,0x2dc1,
+0x2dc5,0x2dc9,0x2dcd,0x2dd1,0x2dd5,0x2dd9,0x2ddd,0x2de1,0x2de5,0x2de9,0x2ded,0x2df1,0x2df5,0x2df9,0x2dfd,0x2e01,
+0x2e05,0x2e09,0x2e0d,0x2e11,0x2e15,0x2e19,0x2e1d,0x2e21,0x2e25,0x2e29,0x2c51,0x2e2d,0x2e31,0x2e35,0x2e39,0x2e3d,
+0x2e41,0x2e45,0x2e49,0x2e4d,0x2e51,0x2e55,0x2e59,0x2e5d,0x2e61,0x2e65,0x2e69,0x2e6d,0x2e71,0x2e75,0x2e79,0x2e7d,
+0x2e81,0x2e85,0x2e89,0x2e8d,0x2e91,0x2e95,0x2e99,0x2e9d,0x2ea1,0x2ea5,0x2ea9,0x2ead,0x2eb1,0x2eb5,0x2eb9,0x2ebd,
+0x2ec1,0x2ec5,0x2ec9,0x2ecd,0x2ed1,0x2ed5,0x2ed9,0x2edd,0x2ee1,0x2ee5,0x2ee9,0x2eed,0x2ef1,1,1,0x2ef5,
+1,0x2ef9,1,1,0x2efd,0x2f01,0x2f05,0x2f09,0x2f0d,0x2f11,0x2f15,0x2f19,0x2f1d,0x2f21,1,0x2f25,
+1,0x2f29,1,1,0x2f2d,0x2f31,1,1,1,0x2f35,0x2f39,0x2f3d,0x2f41,0x2f45,0x2f49,0x2f4d,
+0x2f51,0x2f55,0x2f59,0x2f5d,0x2f61,0x2f65,0x2f69,0x2f6d,0x2f71,0x2f75,0x2f79,0x2f7d,0x2f81,0x2f85,0x2f89,0x2f8d,
+0x2f91,0x2f95,0x2f99,0x2f9d,0x2fa1,0x2fa5,0x2fa9,0x2fad,0x2fb1,0x2fb5,0x2fb9,0x2fbd,0x2fc1,0x2fc5,0x2fc9,0x2fcd,
+0x2fd1,0x2fd5,0x2fd9,0x2fdd,0x2fe1,0x2fe5,0x2d25,0x2fe9,0x2fed,0x2ff1,0x2ff5,0x2ff9,0x2ffd,0x2ffd,0x3001,0x3005,
+0x3009,0x300d,0x3011,0x3015,0x3019,0x301d,0x2f2d,0x3021,0x3025,0x3029,0x302d,0x3031,0x3037,1,1,0x303b,
+0x303f,0x3043,0x3047,0x304b,0x304f,0x3053,0x3057,0x2f65,0x305b,0x305f,0x3063,0x2ef5,0x3067,0x306b,0x306f,0x3073,
+0x3077,0x307b,0x307f,0x3083,0x3087,0x308b,0x308f,0x3093,0x2f89,0x3097,0x2f8d,0x309b,0x309f,0x30a3,0x30a7,0x30ab,
+0x2ef9,0x2b75,0x30af,0x30b3,0x30b7,0x2c8d,0x2de9,0x30bb,0x30bf,0x2fa9,0x30c3,0x2fad,0x30c7,0x30cb,0x30cf,0x2f01,
+0x30d3,0x30d7,0x30db,0x30df,0x30e3,0x2f05,0x30e7,0x30eb,0x30ef,0x30f3,0x30f7,0x30fb,0x2fe5,0x30ff,0x3103,0x2d25,
+0x3107,0x2ff5,0x310b,0x310f,0x3113,0x3117,0x311b,0x3009,0x311f,0x2f29,0x3123,0x300d,0x2c41,0x3127,0x3011,0x312b,
+0x3019,0x312f,0x3133,0x3137,0x313b,0x313f,0x3021,0x2f19,0x3143,0x3025,0x3147,0x3029,0x314b,0x2af1,0x314f,0x3155,
+0x315b,0x3161,0x3165,0x3169,0x316d,0x3173,0x3179,0x317f,0x3183,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x3186,
+0xfe34,0x318c,1,1,1,1,1,1,1,1,1,1,0x3192,0x3198,0x31a0,0x31aa,
+0x31b2,0x31b8,0x31be,0x31c4,0x31ca,0x31d0,0x31d6,0x31dc,0x31e2,1,0x31e8,0x31ee,0x31f4,0x31fa,0x3200,1,
+0x3206,1,0x320c,0x3212,1,0x3218,0x321e,1,0x3224,0x322a,0x3230,0x3236,0x323c,0x3242,0x3248,0x324e,
+0x3254,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,
+0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,0xffb8,1,0xffcc,1,1,1,1,1,1,1,1,0xffcc,0xfe02,0xffb8,
+1,1,1,1,0xfe12,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,
+1,1,1,1,1,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,0xffb8,0xffb8,0xffb8,1,
+1,1,1,1,1,1,1,1,0xa94,0x295f,0xa9a,0x2969,1,1,1,1,
+1,0xaa0,1,1,1,1,1,0x2973,1,1,1,1,1,1,1,1,
+1,0xfe12,0xfc0e,1,1,1,1,1,1,1,0xfc00,1,1,1,1,1,
+1,0x297d,0x2987,1,0xaa6,0xaac,0xfe12,0xfe12,1,1,1,1,1,1,1,1,
+1,1,1,0xfe12,1,1,1,1,1,1,1,1,1,0xfe0e,1,1,
+1,1,1,0xfe12,0xfe0e,1,1,1,1,1,1,1,1,1,0xfe0e,0xfe12,
+1,1,1,1,1,1,1,1,1,1,1,0xfe0e,0xfe0e,1,0xfc00,1,
+1,1,1,1,1,1,0xab2,1,1,1,0x2991,0x299b,0xfe12,1,1,1,
+1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,0xfe12,1,1,
+1,0xfe0e,1,1,1,1,1,1,1,1,1,0xfc00,1,1,1,1,
+1,1,1,1,0xabe,0xfc00,0x29a5,0x29af,0xfc00,0x29b9,1,1,0xfe12,0xfe0e,1,1,
+1,1,1,1,1,1,1,1,1,1,0xad0,0xad6,0x29c3,0x29cd,1,1,
+1,0xfe12,0xfe0e,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,0xfe12,0xfe0e,1,1,1,1,1,1,1,1,0xfc00,1,1,1,
+1,0xadc,1,1,0x29d7,1,1,1,1,0xfe12,0xfe12,1,0xfe02,0xfe02,0xfe02,0xfe02,
+0xfe02,1,1,1,1,1,1,1,1,1,1,1,0xfe0c,0xfe0c,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,0xfe02,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,0x325a,0x3264,0x3278,0x3290,0x32a8,
+0x32c0,0x32d8,0xffb0,0xffb0,0xfe02,0xfe02,0xfe02,1,1,1,0xffc4,0xffb0,0xffb0,0xffb0,1,1,
+1,1,1,1,1,1,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,1,1,0xffcc,0xffcc,0xffcc,
+0xffcc,0xffcc,0xffb8,0xffb8,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,
+0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,0x32e6,0x32f0,0x3304,
+0x331c,0x3334,0x334c,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
+0xffcc,0xffcc,0xffcc,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,1,0xffcc,0xffcc,
+0xffcc,0xffcc,0xffcc,1,1,1,1,1,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,1,
+1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xfe0e,1,
+1,1,1,1,0x335b,0x335f,0x3363,0x3367,0x336d,0x2f4d,0x3371,0x3375,0x3379,0x337d,0x2f51,0x3381,
+0x3385,0x3389,0x2f55,0x338f,0x3393,0x3397,0x339b,0x33a1,0x33a5,0x33a9,0x33ad,0x33b3,0x33b7,0x33bb,0x33bf,0x303f,
+0x33c3,0x33c9,0x33cd,0x33d1,0x33d5,0x33d9,0x33dd,0x33e1,0x33e5,0x3053,0x2f59,0x2f5d,0x3057,0x33e9,0x33ed,0x2c59,
+0x33f1,0x2f61,0x33f5,0x33f9,0x33fd,0x3401,0x3401,0x3401,0x3405,0x340b,0x340f,0x3413,0x3417,0x341d,0x3421,0x3425,
+0x3429,0x342d,0x3431,0x3435,0x3439,0x343d,0x3441,0x3445,0x3449,0x344d,0x344d,0x305f,0x3451,0x3455,0x3459,0x345d,
+0x2f69,0x3461,0x3465,0x3469,0x2ebd,0x346d,0x3471,0x3475,0x3479,0x347d,0x3481,0x3485,0x3489,0x348d,0x3493,0x3497,
+0x349b,0x349f,0x34a3,0x34a7,0x34ab,0x34b1,0x34b7,0x34bb,0x34bf,0x34c3,0x34c7,0x34cb,0x34cf,0x34d3,0x34d7,0x34d7,
+0x34db,0x34e1,0x34e5,0x2c49,0x34e9,0x34ed,0x34f3,0x34f7,0x34fb,0x34ff,0x3503,0x3507,0x2f7d,0x350b,0x350f,0x3513,
+0x3519,0x351d,0x3523,0x3527,0x352b,0x352f,0x3533,0x3537,0x353b,0x353f,0x3543,0x3547,0x354b,0x354f,0x3555,0x3559,
+0x355d,0x3561,0x2b71,0x3565,0x356b,0x356f,0x356f,0x3575,0x3579,0x3579,0x357d,0x3581,0x3587,0x358d,0x3591,0x3595,
+0x3599,0x359d,0x35a1,0x35a5,0x35a9,0x35ad,0x35b1,0x2f81,0x35b5,0x35bb,0x35bf,0x35c3,0x308f,0x35c3,0x35c7,0x2f89,
+0x35cb,0x35cf,0x35d3,0x35d7,0x2f8d,0x2b05,0x35db,0x35df,0x35e3,0x35e7,0x35eb,0x35ef,0x35f3,0x35f9,0x35fd,0x3601,
+0x3605,0x3609,0x360d,0x3613,0x3617,0x361b,0x361f,0x3623,0x3627,0x362b,0x362f,0x3633,0x2f91,0x3637,0x363b,0x3641,
+0x3645,0x3649,0x364d,0x2f99,0x3651,0x3655,0x3659,0x365d,0x3661,0x3665,0x3669,0x366d,0x2b75,0x30af,0x3671,0x3675,
+0x3679,0x367d,0x3683,0x3687,0x368b,0x368f,0x2f9d,0x3693,0x3699,0x369d,0x36a1,0x3161,0x36a5,0x36a9,0x36ad,0x36b1,
+0x36b5,0x36bb,0x36bf,0x36c3,0x36c7,0x36cd,0x36d1,0x36d5,0x36d9,0x2c8d,0x36dd,0x36e1,0x36e7,0x36ed,0x36f3,0x36f7,
+0x36fd,0x3701,0x3705,0x3709,0x370d,0x2fa1,0x2de9,0x3711,0x3715,0x3719,0x371d,0x3723,0x3727,0x372b,0x372f,0x30bf,
+0x3733,0x3737,0x373d,0x3741,0x3745,0x374b,0x3751,0x3755,0x30c3,0x3759,0x375d,0x3761,0x3765,0x3769,0x376d,0x3771,
+0x3777,0x377b,0x3781,0x3785,0x378b,0x30cb,0x378f,0x3793,0x3799,0x379d,0x37a1,0x37a7,0x37ad,0x37b1,0x37b5,0x37b9,
+0x37bd,0x37bd,0x37c1,0x37c5,0x30d3,0x37c9,0x37cd,0x37d1,0x37d5,0x37d9,0x37df,0x37e3,0x2c55,0x37e9,0x37ef,0x37f3,
+0x37f9,0x37ff,0x3805,0x3809,0x30eb,0x380d,0x3813,0x3819,0x381f,0x3825,0x3829,0x3829,0x30ef,0x3169,0x382d,0x3831,
+0x3835,0x3839,0x383f,0x2bbd,0x30f7,0x3843,0x3847,0x2fcd,0x384d,0x3853,0x2f15,0x3859,0x385d,0x2fdd,0x3861,0x3865,
+0x3869,0x386f,0x386f,0x3875,0x3879,0x387d,0x3883,0x3887,0x388b,0x388f,0x3895,0x3899,0x389d,0x38a1,0x38a5,0x38a9,
+0x38af,0x38b3,0x38b7,0x38bb,0x38bf,0x38c3,0x38c7,0x38cd,0x38d3,0x38d7,0x38dd,0x38e1,0x38e7,0x38eb,0x2ff5,0x38ef,
+0x38f5,0x38fb,0x38ff,0x3905,0x3909,0x390f,0x3913,0x3917,0x391b,0x391f,0x3923,0x3927,0x392d,0x3933,0x3939,0x3575,
+0x393f,0x3943,0x3947,0x394b,0x394f,0x3953,0x3957,0x395b,0x395f,0x3963,0x3967,0x396b,0x2c9d,0x3971,0x3975,0x3979,
+0x397d,0x3981,0x3985,0x3001,0x3989,0x398d,0x3991,0x3995,0x3999,0x399f,0x39a5,0x39ab,0x39af,0x39b3,0x39b7,0x39bb,
+0x39c1,0x39c5,0x39cb,0x39cf,0x39d3,0x39d9,0x39df,0x39e3,0x2ba9,0x39e7,0x39eb,0x39ef,0x39f3,0x39f7,0x39fb,0x3113,
+0x39ff,0x3a03,0x3a07,0x3a0b,0x3a0f,0x3a13,0x3a17,0x3a1b,0x3a1f,0x3a23,0x3a29,0x3a2d,0x3a31,0x3a35,0x3a39,0x3a3d,
+0x3a43,0x3a49,0x3a4d,0x3a51,0x3127,0x312b,0x3a55,0x3a59,0x3a5f,0x3a63,0x3a67,0x3a6b,0x3a6f,0x3a75,0x3a7b,0x3a7f,
+0x3a83,0x3a87,0x3a8d,0x312f,0x3a91,0x3a97,0x3a9d,0x3aa1,0x3aa5,0x3aa9,0x3aaf,0x3ab3,0x3ab7,0x3abb,0x3abf,0x3ac3,
+0x3ac7,0x3acb,0x3ad1,0x3ad5,0x3ad9,0x3add,0x3ae3,0x3ae7,0x3aeb,0x3aef,0x3af3,0x3af9,0x3aff,0x3b03,0x3b07,0x3b0b,
+0x3b11,0x3b15,0x3147,0x3147,0x3b1b,0x3b1f,0x3b25,0x3b29,0x3b2d,0x3b31,0x3b35,0x3b39,0x3b3d,0x3b41,0x314b,0x3b47,
+0x3b4b,0x3b4f,0x3b53,0x3b57,0x3b5b,0x3b61,0x3b65,0x3b6b,0x3b71,0x3b77,0x3b7b,0x3b7f,0x3b83,0x3b87,0x3b8b,0x3b8f,
+0x3b93,0x3b97,1,1
+};
+
+static const UCPTrie norm2_nfc_data_trie={
+ norm2_nfc_data_trieIndex,
+ { norm2_nfc_data_trieData },
+ 1746, 7892,
+ 0x2fc00, 0x30,
+ 0, 0,
+ 0, 0,
+ 0xc4, 0x226,
+ 0x1,
+};
+
+static const uint16_t norm2_nfc_data_extraData[7732]={
+0xffff,0xffff,0x8670,0x44dc,0x8670,0x44c0,0x8670,0x44de,0x600,0x180,0x602,0x182,0x604,0x185,0x606,0x186,
+0x608,0x200,0x60c,0x205,0x60e,0x44d,0x610,0x189,0x612,0x3d44,0x614,0x18b,0x618,0x39a,0x61e,0x400,
+0x622,0x404,0x646,0x3d41,0x64a,0x3c00,0x8650,0x208,0x60e,0x3c04,0x646,0x3c08,0x8662,0x3c0c,0x602,0x20c,
+0x604,0x210,0x60e,0x214,0x618,0x218,0x864e,0x18f,0x60e,0x3c14,0x618,0x21c,0x646,0x3c18,0x64e,0x3c20,
+0x65a,0x3c24,0x8662,0x3c1c,0x600,0x190,0x602,0x192,0x604,0x195,0x606,0x3d78,0x608,0x225,0x60c,0x228,
+0x60e,0x22c,0x610,0x196,0x612,0x3d74,0x618,0x234,0x61e,0x408,0x622,0x40c,0x646,0x3d71,0x64e,0x451,
+0x650,0x230,0x65a,0x3c30,0x8660,0x3c34,0x860e,0x3c3c,0x602,0x3e8,0x604,0x238,0x608,0x3c40,0x60c,0x23c,
+0x60e,0x240,0x618,0x3cc,0x864e,0x244,0x604,0x248,0x60e,0x3c44,0x610,0x3c4c,0x618,0x43c,0x646,0x3c48,
+0x64e,0x3c50,0x865c,0x3c54,0x600,0x198,0x602,0x19a,0x604,0x19c,0x606,0x250,0x608,0x254,0x60c,0x258,
+0x60e,0x260,0x610,0x19f,0x612,0x3d90,0x618,0x39e,0x61e,0x410,0x622,0x414,0x646,0x3d94,0x650,0x25c,
+0x8660,0x3c58,0x8604,0x268,0x602,0x3c60,0x618,0x3d0,0x646,0x3c64,0x64e,0x26c,0x8662,0x3c68,0x602,0x272,
+0x618,0x27a,0x646,0x3c6d,0x64e,0x276,0x65a,0x3c78,0x8662,0x3c74,0x602,0x3c7c,0x60e,0x3c80,0x8646,0x3c84,
+0x600,0x3f0,0x602,0x286,0x606,0x1a2,0x60e,0x3c88,0x618,0x28e,0x646,0x3c8c,0x64e,0x28a,0x65a,0x3c94,
+0x8662,0x3c90,0x600,0x1a4,0x602,0x1a6,0x604,0x1a9,0x606,0x1ab,0x608,0x299,0x60c,0x29c,0x60e,0x45d,
+0x610,0x1ad,0x612,0x3d9c,0x616,0x2a0,0x618,0x3a2,0x61e,0x418,0x622,0x41c,0x636,0x341,0x646,0x3d99,
+0x8650,0x3d5,0x602,0x3ca8,0x860e,0x3cac,0x602,0x2a8,0x60e,0x3cb0,0x618,0x2b0,0x61e,0x420,0x622,0x424,
+0x646,0x3cb5,0x64e,0x2ac,0x8662,0x3cbc,0x602,0x2b5,0x604,0x2b8,0x60e,0x3cc0,0x618,0x2c1,0x646,0x3cc5,
+0x64c,0x430,0x864e,0x2bc,0x60e,0x3cd4,0x618,0x2c8,0x646,0x3cd8,0x64c,0x434,0x64e,0x2c4,0x65a,0x3ce0,
+0x8662,0x3cdc,0x600,0x1b2,0x602,0x1b4,0x604,0x1b6,0x606,0x2d1,0x608,0x2d5,0x60c,0x2d8,0x610,0x1b9,
+0x612,0x3dcc,0x614,0x2dc,0x616,0x2e0,0x618,0x3a6,0x61e,0x428,0x622,0x42c,0x636,0x35f,0x646,0x3dc8,
+0x648,0x3ce4,0x650,0x2e4,0x65a,0x3cec,0x8660,0x3ce8,0x606,0x3cf8,0x8646,0x3cfc,0x600,0x3d00,0x602,0x3d04,
+0x604,0x2e8,0x60e,0x3d0c,0x610,0x3d08,0x8646,0x3d10,0x60e,0x3d14,0x8610,0x3d18,0x600,0x3de4,0x602,0x1ba,
+0x604,0x2ec,0x606,0x3df0,0x608,0x464,0x60e,0x3d1c,0x610,0x2f0,0x612,0x3dec,0x8646,0x3de8,0x602,0x2f2,
+0x604,0x3d20,0x60e,0x2f6,0x618,0x2fa,0x646,0x3d24,0x8662,0x3d28,0x600,0x1c0,0x602,0x1c2,0x604,0x1c5,
+0x606,0x1c6,0x608,0x202,0x60c,0x207,0x60e,0x44f,0x610,0x1c9,0x612,0x3d46,0x614,0x1cb,0x618,0x39c,
+0x61e,0x402,0x622,0x406,0x646,0x3d43,0x64a,0x3c02,0x8650,0x20a,0x60e,0x3c06,0x646,0x3c0a,0x8662,0x3c0e,
+0x602,0x20e,0x604,0x212,0x60e,0x216,0x618,0x21a,0x864e,0x1cf,0x60e,0x3c16,0x618,0x21e,0x646,0x3c1a,
+0x64e,0x3c22,0x65a,0x3c26,0x8662,0x3c1e,0x600,0x1d0,0x602,0x1d2,0x604,0x1d5,0x606,0x3d7a,0x608,0x227,
+0x60c,0x22a,0x60e,0x22e,0x610,0x1d6,0x612,0x3d76,0x618,0x236,0x61e,0x40a,0x622,0x40e,0x646,0x3d73,
+0x64e,0x453,0x650,0x232,0x65a,0x3c32,0x8660,0x3c36,0x860e,0x3c3e,0x602,0x3ea,0x604,0x23a,0x608,0x3c42,
+0x60c,0x23e,0x60e,0x242,0x618,0x3ce,0x864e,0x246,0x604,0x24a,0x60e,0x3c46,0x610,0x3c4e,0x618,0x43e,
+0x646,0x3c4a,0x64e,0x3c52,0x65c,0x3c56,0x8662,0x3d2c,0x600,0x1d8,0x602,0x1da,0x604,0x1dc,0x606,0x252,
+0x608,0x256,0x60c,0x25a,0x610,0x1df,0x612,0x3d92,0x618,0x3a0,0x61e,0x412,0x622,0x416,0x646,0x3d96,
+0x650,0x25e,0x8660,0x3c5a,0x604,0x26a,0x8618,0x3e0,0x602,0x3c62,0x618,0x3d2,0x646,0x3c66,0x64e,0x26e,
+0x8662,0x3c6a,0x602,0x274,0x618,0x27c,0x646,0x3c6f,0x64e,0x278,0x65a,0x3c7a,0x8662,0x3c76,0x602,0x3c7e,
+0x60e,0x3c82,0x8646,0x3c86,0x600,0x3f2,0x602,0x288,0x606,0x1e2,0x60e,0x3c8a,0x618,0x290,0x646,0x3c8e,
+0x64e,0x28c,0x65a,0x3c96,0x8662,0x3c92,0x600,0x1e4,0x602,0x1e6,0x604,0x1e9,0x606,0x1eb,0x608,0x29b,
+0x60c,0x29e,0x60e,0x45f,0x610,0x1ed,0x612,0x3d9e,0x616,0x2a2,0x618,0x3a4,0x61e,0x41a,0x622,0x41e,
+0x636,0x343,0x646,0x3d9b,0x8650,0x3d7,0x602,0x3caa,0x860e,0x3cae,0x602,0x2aa,0x60e,0x3cb2,0x618,0x2b2,
+0x61e,0x422,0x622,0x426,0x646,0x3cb7,0x64e,0x2ae,0x8662,0x3cbe,0x602,0x2b7,0x604,0x2ba,0x60e,0x3cc2,
+0x618,0x2c3,0x646,0x3cc7,0x64c,0x432,0x864e,0x2be,0x60e,0x3cd6,0x610,0x3d2e,0x618,0x2ca,0x646,0x3cda,
+0x64c,0x436,0x64e,0x2c6,0x65a,0x3ce2,0x8662,0x3cde,0x600,0x1f2,0x602,0x1f4,0x604,0x1f6,0x606,0x2d3,
+0x608,0x2d7,0x60c,0x2da,0x610,0x1f9,0x612,0x3dce,0x614,0x2de,0x616,0x2e2,0x618,0x3a8,0x61e,0x42a,
+0x622,0x42e,0x636,0x361,0x646,0x3dca,0x648,0x3ce6,0x650,0x2e6,0x65a,0x3cee,0x8660,0x3cea,0x606,0x3cfa,
+0x8646,0x3cfe,0x600,0x3d02,0x602,0x3d06,0x604,0x2ea,0x60e,0x3d0e,0x610,0x3d0a,0x614,0x3d30,0x8646,0x3d12,
+0x60e,0x3d16,0x8610,0x3d1a,0x600,0x3de6,0x602,0x1fa,0x604,0x2ee,0x606,0x3df2,0x608,0x466,0x60e,0x3d1e,
+0x610,0x1fe,0x612,0x3dee,0x614,0x3d32,0x8646,0x3dea,0x602,0x2f4,0x604,0x3d22,0x60e,0x2f8,0x618,0x2fc,
+0x646,0x3d26,0x8662,0x3d2a,0x600,0x3fda,0x602,0x70a,0x8684,0x3f82,0x602,0x3f8,0x8608,0x3c4,0x8602,0x3fc,
+0x602,0x3fa,0x8608,0x3c6,0x8602,0x3fe,0x860e,0x3d36,0x8618,0x3dc,0x8618,0x3de,0x600,0x3f74,0x602,0x70c,
+0x608,0x3f72,0x60c,0x3f70,0x626,0x3e11,0x628,0x3e13,0x868a,0x3f78,0x600,0x3f90,0x602,0x710,0x626,0x3e31,
+0x8628,0x3e33,0x600,0x3f94,0x602,0x712,0x626,0x3e51,0x628,0x3e53,0x868a,0x3f98,0x600,0x3fb4,0x602,0x714,
+0x608,0x3fb2,0x60c,0x3fb0,0x610,0x754,0x626,0x3e71,0x8628,0x3e73,0x600,0x3ff0,0x602,0x718,0x626,0x3e91,
+0x8628,0x3e93,0x8628,0x3fd8,0x600,0x3fd4,0x602,0x71c,0x608,0x3fd2,0x60c,0x3fd0,0x610,0x756,0x8628,0x3eb3,
+0x600,0x3ff4,0x602,0x71e,0x626,0x3ed1,0x628,0x3ed3,0x868a,0x3ff8,0x600,0x3ee1,0x602,0x759,0x608,0x3f62,
+0x60c,0x3f60,0x626,0x3e01,0x628,0x3e03,0x684,0x3f6d,0x868a,0x3f66,0x600,0x3ee4,0x602,0x75a,0x626,0x3e21,
+0x8628,0x3e23,0x600,0x3ee9,0x602,0x75d,0x626,0x3e41,0x628,0x3e43,0x684,0x3f8d,0x868a,0x3f86,0x600,0x3eec,
+0x602,0x75e,0x608,0x3fa2,0x60c,0x3fa0,0x610,0x795,0x626,0x3e61,0x628,0x3e63,0x8684,0x3fac,0x600,0x3ef0,
+0x602,0x798,0x626,0x3e81,0x8628,0x3e83,0x626,0x3fc8,0x8628,0x3fca,0x600,0x3ef4,0x602,0x79a,0x608,0x3fc2,
+0x60c,0x3fc0,0x610,0x797,0x626,0x3ea1,0x628,0x3ea3,0x8684,0x3fcc,0x600,0x3ef9,0x602,0x79d,0x626,0x3ec1,
+0x628,0x3ec3,0x684,0x3fed,0x868a,0x3fe6,0x602,0x7a6,0x8610,0x7a8,0x8610,0x80e,0x60c,0x9a0,0x8610,0x9a4,
+0x8602,0x806,0x600,0x800,0x60c,0x9ac,0x8610,0x802,0x60c,0x982,0x8610,0x9b8,0x8610,0x9bc,0x600,0x81a,
+0x608,0x9c4,0x60c,0x832,0x8610,0x9c8,0x8602,0x818,0x8610,0x9cc,0x608,0x9dc,0x60c,0x81c,0x610,0x9e0,
+0x8616,0x9e4,0x8610,0x9e8,0x8610,0x9f0,0x8610,0x9d8,0x60c,0x9a2,0x8610,0x9a6,0x8602,0x8a6,0x600,0x8a0,
+0x60c,0x9ae,0x8610,0x8a2,0x60c,0x984,0x8610,0x9ba,0x8610,0x9be,0x600,0x8ba,0x608,0x9c6,0x60c,0x872,
+0x8610,0x9ca,0x8602,0x8b8,0x8610,0x9ce,0x608,0x9de,0x60c,0x8bc,0x610,0x9e2,0x8616,0x9e6,0x8610,0x9ea,
+0x8610,0x9f2,0x8610,0x9da,0x8610,0x8ae,0x861e,0x8ec,0x861e,0x8ee,0x8610,0x9b4,0x8610,0x9b6,0x8610,0x9d4,
+0x8610,0x9d6,0xca6,0xc44,0xca8,0xc46,0x8caa,0xc4a,0x8ca8,0xc48,0x8ca8,0xc4c,0x8ca8,0xd84,0x8ca8,0xda6,
+0x8ca8,0xd80,0x9278,0x1252,0x9278,0x1262,0x9278,0x1268,0x137c,0x1396,0x93ae,0x1398,0x167c,0x1696,0x16ac,0x1690,
+0x96ae,0x1698,0x97ae,0x1728,0x177c,0x1794,0x97ae,0x1798,0x977c,0x1796,0x98ac,0x1890,0x99aa,0x1980,0x1984,0x1995,
+0x19aa,0x198e,0x99ac,0x1990,0x1a7c,0x1a94,0x9aae,0x1a98,0x9a7c,0x1a96,0x1b94,0x1bb4,0x1b9e,0x1bb9,0x9bbe,0x1bbc,
+0xa05c,0x204c,0xb66a,0x360c,0xb66a,0x3610,0xb66a,0x3614,0xb66a,0x3618,0xb66a,0x361c,0xb66a,0x3624,0xb66a,0x3676,
+0xb66a,0x367a,0xb66a,0x3680,0xb66a,0x3682,0xb66a,0x3686,0x600,0x3f9a,0x602,0x3f9c,0x8684,0x3f9e,0x600,0x3fba,
+0x602,0x3fbc,0x8684,0x3fbe,0x8670,0x4334,0x8670,0x4336,0x8670,0x435c,0x8670,0x439a,0x8670,0x439e,0x8670,0x439c,
+0x8670,0x4408,0x8670,0x4412,0x8670,0x4418,0x8670,0x4448,0x8670,0x444c,0x8670,0x4482,0x8670,0x4488,0x8670,0x448e,
+0x8670,0x4492,0x8670,0x44da,0x8670,0x44c4,0x8670,0x44e0,0x8670,0x44e2,0x8670,0x44e8,0x8670,0x44ea,0x8670,0x44f0,
+0x8670,0x44f2,0x8670,0x4500,0x8670,0x4502,0x8670,0x45c0,0x8670,0x45c2,0x8670,0x4508,0x8670,0x450a,0x8670,0x4510,
+0x8670,0x4512,0x8670,0x45c4,0x8670,0x45c6,0x8670,0x4558,0x8670,0x455a,0x8670,0x455c,0x8670,0x455e,0x8670,0x45d4,
+0x8670,0x45d6,0x8670,0x45d8,0x8670,0x45da,0xe132,0x6128,0xe132,0x6098,0xe132,0x609c,0xe132,0x60a0,0xe132,0x60a4,
+0xe132,0x60a8,0xe132,0x60ac,0xe132,0x60b0,0xe132,0x60b4,0xe132,0x60b8,0xe132,0x60bc,0xe132,0x60c0,0xe132,0x60c4,
+0xe132,0x60ca,0xe132,0x60ce,0xe132,0x60d2,0x6132,0x60e0,0xe134,0x60e2,0x6132,0x60e6,0xe134,0x60e8,0x6132,0x60ec,
+0xe134,0x60ee,0x6132,0x60f2,0xe134,0x60f4,0x6132,0x60f8,0xe134,0x60fa,0xe132,0x613c,0xe132,0x61e8,0xe132,0x6158,
+0xe132,0x615c,0xe132,0x6160,0xe132,0x6164,0xe132,0x6168,0xe132,0x616c,0xe132,0x6170,0xe132,0x6174,0xe132,0x6178,
+0xe132,0x617c,0xe132,0x6180,0xe132,0x6184,0xe132,0x618a,0xe132,0x618e,0xe132,0x6192,0x6132,0x61a0,0xe134,0x61a2,
+0x6132,0x61a6,0xe134,0x61a8,0x6132,0x61ac,0xe134,0x61ae,0x6132,0x61b2,0xe134,0x61b4,0x6132,0x61b8,0xe134,0x61ba,
+0xe132,0x61ee,0xe132,0x61f0,0xe132,0x61f2,0xe132,0x61f4,0xe132,0x61fc,0xb489,0x2e82,0x2134,0xb489,0x2e82,0x2138,
+0xb489,0x2e82,0x2156,0xb489,0x49c2,0x225c,0xb489,0x49c2,0x225e,0x3489,0xcf82,0x2696,0xb489,0xd5c2,0x2698,0x348b,
+0x2c02,0x2978,0x348b,0x2e82,0x2976,0xb48b,0x2f42,0x297c,0xb48b,0x6bc2,0x2b74,0xb48b,0x6bc2,0x2b76,0xb48d,0x4c02,
+0x3270,2,0xe602,0x41,0x302,0x600,0x3d4c,0x602,0x3d48,0x606,0x3d54,0x8612,0x3d50,0xe602,0x41,0x308,
+0x8608,0x3bc,0xe602,0x41,0x30a,0x8602,0x3f4,0xca02,0x43,0x327,0x8602,0x3c10,0xe602,0x45,0x302,0x600,
+0x3d80,0x602,0x3d7c,0x606,0x3d88,0x8612,0x3d84,0xe602,0x49,0x308,0x8602,0x3c5c,0xe602,0x4f,0x302,0x600,
+0x3da4,0x602,0x3da0,0x606,0x3dac,0x8612,0x3da8,0xe602,0x4f,0x303,0x602,0x3c98,0x608,0x458,0x8610,0x3c9c,
+0xe602,0x4f,0x308,0x8608,0x454,0xe602,0x55,0x308,0x600,0x3b6,0x602,0x3ae,0x608,0x3aa,0x8618,0x3b2,
+0xe602,0x61,0x302,0x600,0x3d4e,0x602,0x3d4a,0x606,0x3d56,0x8612,0x3d52,0xe602,0x61,0x308,0x8608,0x3be,
+0xe602,0x61,0x30a,0x8602,0x3f6,0xca02,0x63,0x327,0x8602,0x3c12,0xe602,0x65,0x302,0x600,0x3d82,0x602,
+0x3d7e,0x606,0x3d8a,0x8612,0x3d86,0xe602,0x69,0x308,0x8602,0x3c5e,0xe602,0x6f,0x302,0x600,0x3da6,0x602,
+0x3da2,0x606,0x3dae,0x8612,0x3daa,0xe602,0x6f,0x303,0x602,0x3c9a,0x608,0x45a,0x8610,0x3c9e,0xe602,0x6f,
+0x308,0x8608,0x456,0xe602,0x75,0x308,0x600,0x3b8,0x602,0x3b0,0x608,0x3ac,0x8618,0x3b4,0xe602,0x41,
+0x306,0x600,0x3d60,0x602,0x3d5c,0x606,0x3d68,0x8612,0x3d64,0xe602,0x61,0x306,0x600,0x3d62,0x602,0x3d5e,
+0x606,0x3d6a,0x8612,0x3d66,0xe602,0x45,0x304,0x600,0x3c28,0x8602,0x3c2c,0xe602,0x65,0x304,0x600,0x3c2a,
+0x8602,0x3c2e,0xe602,0x4f,0x304,0x600,0x3ca0,0x8602,0x3ca4,0xe602,0x6f,0x304,0x600,0x3ca2,0x8602,0x3ca6,
+0xe602,0x53,0x301,0x860e,0x3cc8,0xe602,0x73,0x301,0x860e,0x3cca,0xe602,0x53,0x30c,0x860e,0x3ccc,0xe602,
+0x73,0x30c,0x860e,0x3cce,0xe602,0x55,0x303,0x8602,0x3cf0,0xe602,0x75,0x303,0x8602,0x3cf2,0xe602,0x55,
+0x304,0x8610,0x3cf4,0xe602,0x75,0x304,0x8610,0x3cf6,0xd802,0x4f,0x31b,0x600,0x3db8,0x602,0x3db4,0x606,
+0x3dc0,0x612,0x3dbc,0x8646,0x3dc4,0xd802,0x6f,0x31b,0x600,0x3dba,0x602,0x3db6,0x606,0x3dc2,0x612,0x3dbe,
+0x8646,0x3dc6,0xd802,0x55,0x31b,0x600,0x3dd4,0x602,0x3dd0,0x606,0x3ddc,0x612,0x3dd8,0x8646,0x3de0,0xd802,
+0x75,0x31b,0x600,0x3dd6,0x602,0x3dd2,0x606,0x3dde,0x612,0x3dda,0x8646,0x3de2,0xca02,0x4f,0x328,0x8608,
+0x3d8,0xca02,0x6f,0x328,0x8608,0x3da,0xe602,0x41,0x307,0x8608,0x3c0,0xe602,0x61,0x307,0x8608,0x3c2,
+0xca02,0x45,0x327,0x860c,0x3c38,0xca02,0x65,0x327,0x860c,0x3c3a,0xe602,0x4f,0x307,0x8608,0x460,0xe602,
+0x6f,0x307,0x8608,0x462,0xe602,0x3b1,0x301,0x868a,0x3f68,0xe602,0x3b7,0x301,0x868a,0x3f88,0xe602,0x3b9,
+0x308,0x600,0x3fa4,0x602,0x720,0x8684,0x3fae,0xe602,0x3c5,0x308,0x600,0x3fc4,0x602,0x760,0x8684,0x3fce,
+0xe602,0x3c9,0x301,0x868a,0x3fe8,2,0xcc6,0xcc2,0x99aa,0x1996,2,0xdd9,0xdcf,0x9b94,0x1bba,0xdc02,
+0x4c,0x323,0x8608,0x3c70,0xdc02,0x6c,0x323,0x8608,0x3c72,0xdc02,0x52,0x323,0x8608,0x3cb8,0xdc02,0x72,
+0x323,0x8608,0x3cba,0xdc02,0x53,0x323,0x860e,0x3cd0,0xdc02,0x73,0x323,0x860e,0x3cd2,0xdc02,0x41,0x323,
+0x604,0x3d58,0x860c,0x3d6c,0xdc02,0x61,0x323,0x604,0x3d5a,0x860c,0x3d6e,0xdc02,0x45,0x323,0x8604,0x3d8c,
+0xdc02,0x65,0x323,0x8604,0x3d8e,0xdc02,0x4f,0x323,0x8604,0x3db0,0xdc02,0x6f,0x323,0x8604,0x3db2,0xe602,
+0x3b1,0x313,0x600,0x3e05,0x602,0x3e09,0x684,0x3e0d,0x868a,0x3f00,0xe602,0x3b1,0x314,0x600,0x3e07,0x602,
+0x3e0b,0x684,0x3e0f,0x868a,0x3f02,0x1f00,0xe643,0x3b1,0x313,0x300,0x868a,0x3f04,0x1f01,0xe643,0x3b1,0x314,
+0x300,0x868a,0x3f06,0x1f00,0xe643,0x3b1,0x313,0x301,0x868a,0x3f08,0x1f01,0xe643,0x3b1,0x314,0x301,0x868a,
+0x3f0a,0x1f00,0xe643,0x3b1,0x313,0x342,0x868a,0x3f0c,0x1f01,0xe643,0x3b1,0x314,0x342,0x868a,0x3f0e,0xe602,
+0x391,0x313,0x600,0x3e15,0x602,0x3e19,0x684,0x3e1d,0x868a,0x3f10,0xe602,0x391,0x314,0x600,0x3e17,0x602,
+0x3e1b,0x684,0x3e1f,0x868a,0x3f12,0x1f08,0xe643,0x391,0x313,0x300,0x868a,0x3f14,0x1f09,0xe643,0x391,0x314,
+0x300,0x868a,0x3f16,0x1f08,0xe643,0x391,0x313,0x301,0x868a,0x3f18,0x1f09,0xe643,0x391,0x314,0x301,0x868a,
+0x3f1a,0x1f08,0xe643,0x391,0x313,0x342,0x868a,0x3f1c,0x1f09,0xe643,0x391,0x314,0x342,0x868a,0x3f1e,0xe602,
+0x3b5,0x313,0x600,0x3e24,0x8602,0x3e28,0xe602,0x3b5,0x314,0x600,0x3e26,0x8602,0x3e2a,0xe602,0x395,0x313,
+0x600,0x3e34,0x8602,0x3e38,0xe602,0x395,0x314,0x600,0x3e36,0x8602,0x3e3a,0xe602,0x3b7,0x313,0x600,0x3e45,
+0x602,0x3e49,0x684,0x3e4d,0x868a,0x3f20,0xe602,0x3b7,0x314,0x600,0x3e47,0x602,0x3e4b,0x684,0x3e4f,0x868a,
+0x3f22,0x1f20,0xe643,0x3b7,0x313,0x300,0x868a,0x3f24,0x1f21,0xe643,0x3b7,0x314,0x300,0x868a,0x3f26,0x1f20,
+0xe643,0x3b7,0x313,0x301,0x868a,0x3f28,0x1f21,0xe643,0x3b7,0x314,0x301,0x868a,0x3f2a,0x1f20,0xe643,0x3b7,
+0x313,0x342,0x868a,0x3f2c,0x1f21,0xe643,0x3b7,0x314,0x342,0x868a,0x3f2e,0xe602,0x397,0x313,0x600,0x3e55,
+0x602,0x3e59,0x684,0x3e5d,0x868a,0x3f30,0xe602,0x397,0x314,0x600,0x3e57,0x602,0x3e5b,0x684,0x3e5f,0x868a,
+0x3f32,0x1f28,0xe643,0x397,0x313,0x300,0x868a,0x3f34,0x1f29,0xe643,0x397,0x314,0x300,0x868a,0x3f36,0x1f28,
+0xe643,0x397,0x313,0x301,0x868a,0x3f38,0x1f29,0xe643,0x397,0x314,0x301,0x868a,0x3f3a,0x1f28,0xe643,0x397,
+0x313,0x342,0x868a,0x3f3c,0x1f29,0xe643,0x397,0x314,0x342,0x868a,0x3f3e,0xe602,0x3b9,0x313,0x600,0x3e64,
+0x602,0x3e68,0x8684,0x3e6c,0xe602,0x3b9,0x314,0x600,0x3e66,0x602,0x3e6a,0x8684,0x3e6e,0xe602,0x399,0x313,
+0x600,0x3e74,0x602,0x3e78,0x8684,0x3e7c,0xe602,0x399,0x314,0x600,0x3e76,0x602,0x3e7a,0x8684,0x3e7e,0xe602,
+0x3bf,0x313,0x600,0x3e84,0x8602,0x3e88,0xe602,0x3bf,0x314,0x600,0x3e86,0x8602,0x3e8a,0xe602,0x39f,0x313,
+0x600,0x3e94,0x8602,0x3e98,0xe602,0x39f,0x314,0x600,0x3e96,0x8602,0x3e9a,0xe602,0x3c5,0x313,0x600,0x3ea4,
+0x602,0x3ea8,0x8684,0x3eac,0xe602,0x3c5,0x314,0x600,0x3ea6,0x602,0x3eaa,0x8684,0x3eae,0xe602,0x3a5,0x314,
+0x600,0x3eb6,0x602,0x3eba,0x8684,0x3ebe,0xe602,0x3c9,0x313,0x600,0x3ec5,0x602,0x3ec9,0x684,0x3ecd,0x868a,
+0x3f40,0xe602,0x3c9,0x314,0x600,0x3ec7,0x602,0x3ecb,0x684,0x3ecf,0x868a,0x3f42,0x1f60,0xe643,0x3c9,0x313,
+0x300,0x868a,0x3f44,0x1f61,0xe643,0x3c9,0x314,0x300,0x868a,0x3f46,0x1f60,0xe643,0x3c9,0x313,0x301,0x868a,
+0x3f48,0x1f61,0xe643,0x3c9,0x314,0x301,0x868a,0x3f4a,0x1f60,0xe643,0x3c9,0x313,0x342,0x868a,0x3f4c,0x1f61,
+0xe643,0x3c9,0x314,0x342,0x868a,0x3f4e,0xe602,0x3a9,0x313,0x600,0x3ed5,0x602,0x3ed9,0x684,0x3edd,0x868a,
+0x3f50,0xe602,0x3a9,0x314,0x600,0x3ed7,0x602,0x3edb,0x684,0x3edf,0x868a,0x3f52,0x1f68,0xe643,0x3a9,0x313,
+0x300,0x868a,0x3f54,0x1f69,0xe643,0x3a9,0x314,0x300,0x868a,0x3f56,0x1f68,0xe643,0x3a9,0x313,0x301,0x868a,
+0x3f58,0x1f69,0xe643,0x3a9,0x314,0x301,0x868a,0x3f5a,0x1f68,0xe643,0x3a9,0x313,0x342,0x868a,0x3f5c,0x1f69,
+0xe643,0x3a9,0x314,0x342,0x868a,0x3f5e,0xe602,0x3b1,0x300,0x868a,0x3f64,0xe602,0x3b7,0x300,0x868a,0x3f84,
+0xe602,0x3c9,0x300,0x868a,0x3fe4,0xe602,0x3b1,0x342,0x868a,0x3f6e,0xe602,0x3b7,0x342,0x868a,0x3f8e,0xe602,
+0x3c9,0x342,0x868a,0x3fee,3,0xe602,0x41,0x300,0xe602,0x41,0x301,0xe602,0x41,0x303,0xe602,0x45,
+0x300,0xe602,0x45,0x301,0xe602,0x45,0x308,0xe602,0x49,0x300,0xe602,0x49,0x301,0xe602,0x49,0x302,
+0xe602,0x4e,0x303,0xe602,0x4f,0x300,0xe602,0x4f,0x301,0xe602,0x55,0x300,0xe602,0x55,0x301,0xe602,
+0x55,0x302,0xe602,0x59,0x301,0xe602,0x61,0x300,0xe602,0x61,0x301,0xe602,0x61,0x303,0xe602,0x65,
+0x300,0xe602,0x65,0x301,0xe602,0x65,0x308,0xe602,0x69,0x300,0xe602,0x69,0x301,0xe602,0x69,0x302,
+0xe602,0x6e,0x303,0xe602,0x6f,0x300,0xe602,0x6f,0x301,0xe602,0x75,0x300,0xe602,0x75,0x301,0xe602,
+0x75,0x302,0xe602,0x79,0x301,0xe602,0x79,0x308,0xe602,0x41,0x304,0xe602,0x61,0x304,0xca02,0x41,
+0x328,0xca02,0x61,0x328,0xe602,0x43,0x301,0xe602,0x63,0x301,0xe602,0x43,0x302,0xe602,0x63,0x302,
+0xe602,0x43,0x307,0xe602,0x63,0x307,0xe602,0x43,0x30c,0xe602,0x63,0x30c,0xe602,0x44,0x30c,0xe602,
+0x64,0x30c,0xe602,0x45,0x306,0xe602,0x65,0x306,0xe602,0x45,0x307,0xe602,0x65,0x307,0xca02,0x45,
+0x328,0xca02,0x65,0x328,0xe602,0x45,0x30c,0xe602,0x65,0x30c,0xe602,0x47,0x302,0xe602,0x67,0x302,
+0xe602,0x47,0x306,0xe602,0x67,0x306,0xe602,0x47,0x307,0xe602,0x67,0x307,0xca02,0x47,0x327,0xca02,
+0x67,0x327,0xe602,0x48,0x302,0xe602,0x68,0x302,0xe602,0x49,0x303,0xe602,0x69,0x303,0xe602,0x49,
+0x304,0xe602,0x69,0x304,0xe602,0x49,0x306,0xe602,0x69,0x306,0xca02,0x49,0x328,0xca02,0x69,0x328,
+0xe602,0x49,0x307,0xe602,0x4a,0x302,0xe602,0x6a,0x302,0xca02,0x4b,0x327,0xca02,0x6b,0x327,0xe602,
+0x4c,0x301,0xe602,0x6c,0x301,0xca02,0x4c,0x327,0xca02,0x6c,0x327,0xe602,0x4c,0x30c,0xe602,0x6c,
+0x30c,0xe602,0x4e,0x301,0xe602,0x6e,0x301,0xca02,0x4e,0x327,0xca02,0x6e,0x327,0xe602,0x4e,0x30c,
+0xe602,0x6e,0x30c,0xe602,0x4f,0x306,0xe602,0x6f,0x306,0xe602,0x4f,0x30b,0xe602,0x6f,0x30b,0xe602,
+0x52,0x301,0xe602,0x72,0x301,0xca02,0x52,0x327,0xca02,0x72,0x327,0xe602,0x52,0x30c,0xe602,0x72,
+0x30c,0xe602,0x53,0x302,0xe602,0x73,0x302,0xca02,0x53,0x327,0xca02,0x73,0x327,0xca02,0x54,0x327,
+0xca02,0x74,0x327,0xe602,0x54,0x30c,0xe602,0x74,0x30c,0xe602,0x55,0x306,0xe602,0x75,0x306,0xe602,
+0x55,0x30a,0xe602,0x75,0x30a,0xe602,0x55,0x30b,0xe602,0x75,0x30b,0xca02,0x55,0x328,0xca02,0x75,
+0x328,0xe602,0x57,0x302,0xe602,0x77,0x302,0xe602,0x59,0x302,0xe602,0x79,0x302,0xe602,0x59,0x308,
+0xe602,0x5a,0x301,0xe602,0x7a,0x301,0xe602,0x5a,0x307,0xe602,0x7a,0x307,0xe602,0x5a,0x30c,0xe602,
+0x7a,0x30c,0xe602,0x41,0x30c,0xe602,0x61,0x30c,0xe602,0x49,0x30c,0xe602,0x69,0x30c,0xe602,0x4f,
+0x30c,0xe602,0x6f,0x30c,0xe602,0x55,0x30c,0xe602,0x75,0x30c,0xdc,0xe643,0x55,0x308,0x304,0xfc,
+0xe643,0x75,0x308,0x304,0xdc,0xe643,0x55,0x308,0x301,0xfc,0xe643,0x75,0x308,0x301,0xdc,0xe643,
+0x55,0x308,0x30c,0xfc,0xe643,0x75,0x308,0x30c,0xdc,0xe643,0x55,0x308,0x300,0xfc,0xe643,0x75,
+0x308,0x300,0xc4,0xe643,0x41,0x308,0x304,0xe4,0xe643,0x61,0x308,0x304,0x226,0xe643,0x41,0x307,
+0x304,0x227,0xe643,0x61,0x307,0x304,0xe602,0xc6,0x304,0xe602,0xe6,0x304,0xe602,0x47,0x30c,0xe602,
+0x67,0x30c,0xe602,0x4b,0x30c,0xe602,0x6b,0x30c,0x1ea,0xe643,0x4f,0x328,0x304,0x1eb,0xe643,0x6f,
+0x328,0x304,0xe602,0x1b7,0x30c,0xe602,0x292,0x30c,0xe602,0x6a,0x30c,0xe602,0x47,0x301,0xe602,0x67,
+0x301,0xe602,0x4e,0x300,0xe602,0x6e,0x300,0xc5,0xe643,0x41,0x30a,0x301,0xe5,0xe643,0x61,0x30a,
+0x301,0xe602,0xc6,0x301,0xe602,0xe6,0x301,0xe602,0xd8,0x301,0xe602,0xf8,0x301,0xe602,0x41,0x30f,
+0xe602,0x61,0x30f,0xe602,0x41,0x311,0xe602,0x61,0x311,0xe602,0x45,0x30f,0xe602,0x65,0x30f,0xe602,
+0x45,0x311,0xe602,0x65,0x311,0xe602,0x49,0x30f,0xe602,0x69,0x30f,0xe602,0x49,0x311,0xe602,0x69,
+0x311,0xe602,0x4f,0x30f,0xe602,0x6f,0x30f,0xe602,0x4f,0x311,0xe602,0x6f,0x311,0xe602,0x52,0x30f,
+0xe602,0x72,0x30f,0xe602,0x52,0x311,0xe602,0x72,0x311,0xe602,0x55,0x30f,0xe602,0x75,0x30f,0xe602,
+0x55,0x311,0xe602,0x75,0x311,0xdc02,0x53,0x326,0xdc02,0x73,0x326,0xdc02,0x54,0x326,0xdc02,0x74,
+0x326,0xe602,0x48,0x30c,0xe602,0x68,0x30c,0xd6,0xe643,0x4f,0x308,0x304,0xf6,0xe643,0x6f,0x308,
+0x304,0xd5,0xe643,0x4f,0x303,0x304,0xf5,0xe643,0x6f,0x303,0x304,0x22e,0xe643,0x4f,0x307,0x304,
+0x22f,0xe643,0x6f,0x307,0x304,0xe602,0x59,0x304,0xe602,0x79,0x304,0xe602,0xa8,0x301,0xe602,0x391,
+0x301,0xe602,0x395,0x301,0xe602,0x397,0x301,0xe602,0x399,0x301,0xe602,0x39f,0x301,0xe602,0x3a5,0x301,
+0xe602,0x3a9,0x301,0x3ca,0xe643,0x3b9,0x308,0x301,0xe602,0x399,0x308,0xe602,0x3a5,0x308,0xe602,0x3b5,
+0x301,0xe602,0x3b9,0x301,0x3cb,0xe643,0x3c5,0x308,0x301,0xe602,0x3bf,0x301,0xe602,0x3c5,0x301,0xe602,
+0x3d2,0x301,0xe602,0x3d2,0x308,0xe602,0x415,0x300,0xe602,0x415,0x308,0xe602,0x413,0x301,0xe602,0x406,
+0x308,0xe602,0x41a,0x301,0xe602,0x418,0x300,0xe602,0x423,0x306,0xe602,0x418,0x306,0xe602,0x438,0x306,
+0xe602,0x435,0x300,0xe602,0x435,0x308,0xe602,0x433,0x301,0xe602,0x456,0x308,0xe602,0x43a,0x301,0xe602,
+0x438,0x300,0xe602,0x443,0x306,0xe602,0x474,0x30f,0xe602,0x475,0x30f,0xe602,0x416,0x306,0xe602,0x436,
+0x306,0xe602,0x410,0x306,0xe602,0x430,0x306,0xe602,0x410,0x308,0xe602,0x430,0x308,0xe602,0x415,0x306,
+0xe602,0x435,0x306,0xe602,0x4d8,0x308,0xe602,0x4d9,0x308,0xe602,0x416,0x308,0xe602,0x436,0x308,0xe602,
+0x417,0x308,0xe602,0x437,0x308,0xe602,0x418,0x304,0xe602,0x438,0x304,0xe602,0x418,0x308,0xe602,0x438,
+0x308,0xe602,0x41e,0x308,0xe602,0x43e,0x308,0xe602,0x4e8,0x308,0xe602,0x4e9,0x308,0xe602,0x42d,0x308,
+0xe602,0x44d,0x308,0xe602,0x423,0x304,0xe602,0x443,0x304,0xe602,0x423,0x308,0xe602,0x443,0x308,0xe602,
+0x423,0x30b,0xe602,0x443,0x30b,0xe602,0x427,0x308,0xe602,0x447,0x308,0xe602,0x42b,0x308,0xe602,0x44b,
+0x308,0xe602,0x627,0x653,0xe602,0x627,0x654,0xe602,0x648,0x654,0xdc02,0x627,0x655,0xe602,0x64a,0x654,
+0xe602,0x6d5,0x654,0xe602,0x6c1,0x654,0xe602,0x6d2,0x654,0x702,0x928,0x93c,0x702,0x930,0x93c,0x702,
+0x933,0x93c,2,0x9c7,0x9be,2,0x9c7,0x9d7,2,0xb47,0xb56,2,0xb47,0xb3e,2,0xb47,
+0xb57,2,0xb92,0xbd7,2,0xbc6,0xbbe,2,0xbc7,0xbbe,2,0xbc6,0xbd7,0x5b02,0xc46,0xc56,
+2,0xcbf,0xcd5,2,0xcc6,0xcd5,2,0xcc6,0xcd6,0xcca,0x43,0xcc6,0xcc2,0xcd5,2,0xd46,
+0xd3e,2,0xd47,0xd3e,2,0xd46,0xd57,0x902,0xdd9,0xdca,0xddc,0x943,0xdd9,0xdcf,0xdca,2,
+0xdd9,0xddf,2,0x1025,0x102e,2,0x1b05,0x1b35,2,0x1b07,0x1b35,2,0x1b09,0x1b35,2,0x1b0b,
+0x1b35,2,0x1b0d,0x1b35,2,0x1b11,0x1b35,2,0x1b3a,0x1b35,2,0x1b3c,0x1b35,2,0x1b3e,0x1b35,
+2,0x1b3f,0x1b35,2,0x1b42,0x1b35,0xdc02,0x41,0x325,0xdc02,0x61,0x325,0xe602,0x42,0x307,0xe602,
+0x62,0x307,0xdc02,0x42,0x323,0xdc02,0x62,0x323,0xdc02,0x42,0x331,0xdc02,0x62,0x331,0xc7,0xe643,
+0x43,0x327,0x301,0xe7,0xe643,0x63,0x327,0x301,0xe602,0x44,0x307,0xe602,0x64,0x307,0xdc02,0x44,
+0x323,0xdc02,0x64,0x323,0xdc02,0x44,0x331,0xdc02,0x64,0x331,0xca02,0x44,0x327,0xca02,0x64,0x327,
+0xdc02,0x44,0x32d,0xdc02,0x64,0x32d,0x112,0xe643,0x45,0x304,0x300,0x113,0xe643,0x65,0x304,0x300,
+0x112,0xe643,0x45,0x304,0x301,0x113,0xe643,0x65,0x304,0x301,0xdc02,0x45,0x32d,0xdc02,0x65,0x32d,
+0xdc02,0x45,0x330,0xdc02,0x65,0x330,0x228,0xe643,0x45,0x327,0x306,0x229,0xe643,0x65,0x327,0x306,
+0xe602,0x46,0x307,0xe602,0x66,0x307,0xe602,0x47,0x304,0xe602,0x67,0x304,0xe602,0x48,0x307,0xe602,
+0x68,0x307,0xdc02,0x48,0x323,0xdc02,0x68,0x323,0xe602,0x48,0x308,0xe602,0x68,0x308,0xca02,0x48,
+0x327,0xca02,0x68,0x327,0xdc02,0x48,0x32e,0xdc02,0x68,0x32e,0xdc02,0x49,0x330,0xdc02,0x69,0x330,
+0xcf,0xe643,0x49,0x308,0x301,0xef,0xe643,0x69,0x308,0x301,0xe602,0x4b,0x301,0xe602,0x6b,0x301,
+0xdc02,0x4b,0x323,0xdc02,0x6b,0x323,0xdc02,0x4b,0x331,0xdc02,0x6b,0x331,0x1e36,0xe643,0x4c,0x323,
+0x304,0x1e37,0xe643,0x6c,0x323,0x304,0xdc02,0x4c,0x331,0xdc02,0x6c,0x331,0xdc02,0x4c,0x32d,0xdc02,
+0x6c,0x32d,0xe602,0x4d,0x301,0xe602,0x6d,0x301,0xe602,0x4d,0x307,0xe602,0x6d,0x307,0xdc02,0x4d,
+0x323,0xdc02,0x6d,0x323,0xe602,0x4e,0x307,0xe602,0x6e,0x307,0xdc02,0x4e,0x323,0xdc02,0x6e,0x323,
+0xdc02,0x4e,0x331,0xdc02,0x6e,0x331,0xdc02,0x4e,0x32d,0xdc02,0x6e,0x32d,0xd5,0xe643,0x4f,0x303,
+0x301,0xf5,0xe643,0x6f,0x303,0x301,0xd5,0xe643,0x4f,0x303,0x308,0xf5,0xe643,0x6f,0x303,0x308,
+0x14c,0xe643,0x4f,0x304,0x300,0x14d,0xe643,0x6f,0x304,0x300,0x14c,0xe643,0x4f,0x304,0x301,0x14d,
+0xe643,0x6f,0x304,0x301,0xe602,0x50,0x301,0xe602,0x70,0x301,0xe602,0x50,0x307,0xe602,0x70,0x307,
+0xe602,0x52,0x307,0xe602,0x72,0x307,0x1e5a,0xe643,0x52,0x323,0x304,0x1e5b,0xe643,0x72,0x323,0x304,
+0xdc02,0x52,0x331,0xdc02,0x72,0x331,0xe602,0x53,0x307,0xe602,0x73,0x307,0x15a,0xe643,0x53,0x301,
+0x307,0x15b,0xe643,0x73,0x301,0x307,0x160,0xe643,0x53,0x30c,0x307,0x161,0xe643,0x73,0x30c,0x307,
+0x1e62,0xe643,0x53,0x323,0x307,0x1e63,0xe643,0x73,0x323,0x307,0xe602,0x54,0x307,0xe602,0x74,0x307,
+0xdc02,0x54,0x323,0xdc02,0x74,0x323,0xdc02,0x54,0x331,0xdc02,0x74,0x331,0xdc02,0x54,0x32d,0xdc02,
+0x74,0x32d,0xdc02,0x55,0x324,0xdc02,0x75,0x324,0xdc02,0x55,0x330,0xdc02,0x75,0x330,0xdc02,0x55,
+0x32d,0xdc02,0x75,0x32d,0x168,0xe643,0x55,0x303,0x301,0x169,0xe643,0x75,0x303,0x301,0x16a,0xe643,
+0x55,0x304,0x308,0x16b,0xe643,0x75,0x304,0x308,0xe602,0x56,0x303,0xe602,0x76,0x303,0xdc02,0x56,
+0x323,0xdc02,0x76,0x323,0xe602,0x57,0x300,0xe602,0x77,0x300,0xe602,0x57,0x301,0xe602,0x77,0x301,
+0xe602,0x57,0x308,0xe602,0x77,0x308,0xe602,0x57,0x307,0xe602,0x77,0x307,0xdc02,0x57,0x323,0xdc02,
+0x77,0x323,0xe602,0x58,0x307,0xe602,0x78,0x307,0xe602,0x58,0x308,0xe602,0x78,0x308,0xe602,0x59,
+0x307,0xe602,0x79,0x307,0xe602,0x5a,0x302,0xe602,0x7a,0x302,0xdc02,0x5a,0x323,0xdc02,0x7a,0x323,
+0xdc02,0x5a,0x331,0xdc02,0x7a,0x331,0xdc02,0x68,0x331,0xe602,0x74,0x308,0xe602,0x77,0x30a,0xe602,
+0x79,0x30a,0xe602,0x17f,0x307,0xe602,0x41,0x309,0xe602,0x61,0x309,0xc2,0xe643,0x41,0x302,0x301,
+0xe2,0xe643,0x61,0x302,0x301,0xc2,0xe643,0x41,0x302,0x300,0xe2,0xe643,0x61,0x302,0x300,0xc2,
+0xe643,0x41,0x302,0x309,0xe2,0xe643,0x61,0x302,0x309,0xc2,0xe643,0x41,0x302,0x303,0xe2,0xe643,
+0x61,0x302,0x303,0x1ea0,0xe643,0x41,0x323,0x302,0x1ea1,0xe643,0x61,0x323,0x302,0x102,0xe643,0x41,
+0x306,0x301,0x103,0xe643,0x61,0x306,0x301,0x102,0xe643,0x41,0x306,0x300,0x103,0xe643,0x61,0x306,
+0x300,0x102,0xe643,0x41,0x306,0x309,0x103,0xe643,0x61,0x306,0x309,0x102,0xe643,0x41,0x306,0x303,
+0x103,0xe643,0x61,0x306,0x303,0x1ea0,0xe643,0x41,0x323,0x306,0x1ea1,0xe643,0x61,0x323,0x306,0xe602,
+0x45,0x309,0xe602,0x65,0x309,0xe602,0x45,0x303,0xe602,0x65,0x303,0xca,0xe643,0x45,0x302,0x301,
+0xea,0xe643,0x65,0x302,0x301,0xca,0xe643,0x45,0x302,0x300,0xea,0xe643,0x65,0x302,0x300,0xca,
+0xe643,0x45,0x302,0x309,0xea,0xe643,0x65,0x302,0x309,0xca,0xe643,0x45,0x302,0x303,0xea,0xe643,
+0x65,0x302,0x303,0x1eb8,0xe643,0x45,0x323,0x302,0x1eb9,0xe643,0x65,0x323,0x302,0xe602,0x49,0x309,
+0xe602,0x69,0x309,0xdc02,0x49,0x323,0xdc02,0x69,0x323,0xe602,0x4f,0x309,0xe602,0x6f,0x309,0xd4,
+0xe643,0x4f,0x302,0x301,0xf4,0xe643,0x6f,0x302,0x301,0xd4,0xe643,0x4f,0x302,0x300,0xf4,0xe643,
+0x6f,0x302,0x300,0xd4,0xe643,0x4f,0x302,0x309,0xf4,0xe643,0x6f,0x302,0x309,0xd4,0xe643,0x4f,
+0x302,0x303,0xf4,0xe643,0x6f,0x302,0x303,0x1ecc,0xe643,0x4f,0x323,0x302,0x1ecd,0xe643,0x6f,0x323,
+0x302,0x1a0,0xe643,0x4f,0x31b,0x301,0x1a1,0xe643,0x6f,0x31b,0x301,0x1a0,0xe643,0x4f,0x31b,0x300,
+0x1a1,0xe643,0x6f,0x31b,0x300,0x1a0,0xe643,0x4f,0x31b,0x309,0x1a1,0xe643,0x6f,0x31b,0x309,0x1a0,
+0xe643,0x4f,0x31b,0x303,0x1a1,0xe643,0x6f,0x31b,0x303,0x1a0,0xdc43,0x4f,0x31b,0x323,0x1a1,0xdc43,
+0x6f,0x31b,0x323,0xdc02,0x55,0x323,0xdc02,0x75,0x323,0xe602,0x55,0x309,0xe602,0x75,0x309,0x1af,
+0xe643,0x55,0x31b,0x301,0x1b0,0xe643,0x75,0x31b,0x301,0x1af,0xe643,0x55,0x31b,0x300,0x1b0,0xe643,
+0x75,0x31b,0x300,0x1af,0xe643,0x55,0x31b,0x309,0x1b0,0xe643,0x75,0x31b,0x309,0x1af,0xe643,0x55,
+0x31b,0x303,0x1b0,0xe643,0x75,0x31b,0x303,0x1af,0xdc43,0x55,0x31b,0x323,0x1b0,0xdc43,0x75,0x31b,
+0x323,0xe602,0x59,0x300,0xe602,0x79,0x300,0xdc02,0x59,0x323,0xdc02,0x79,0x323,0xe602,0x59,0x309,
+0xe602,0x79,0x309,0xe602,0x59,0x303,0xe602,0x79,0x303,0x1f10,0xe643,0x3b5,0x313,0x300,0x1f11,0xe643,
+0x3b5,0x314,0x300,0x1f10,0xe643,0x3b5,0x313,0x301,0x1f11,0xe643,0x3b5,0x314,0x301,0x1f18,0xe643,0x395,
+0x313,0x300,0x1f19,0xe643,0x395,0x314,0x300,0x1f18,0xe643,0x395,0x313,0x301,0x1f19,0xe643,0x395,0x314,
+0x301,0x1f30,0xe643,0x3b9,0x313,0x300,0x1f31,0xe643,0x3b9,0x314,0x300,0x1f30,0xe643,0x3b9,0x313,0x301,
+0x1f31,0xe643,0x3b9,0x314,0x301,0x1f30,0xe643,0x3b9,0x313,0x342,0x1f31,0xe643,0x3b9,0x314,0x342,0x1f38,
+0xe643,0x399,0x313,0x300,0x1f39,0xe643,0x399,0x314,0x300,0x1f38,0xe643,0x399,0x313,0x301,0x1f39,0xe643,
+0x399,0x314,0x301,0x1f38,0xe643,0x399,0x313,0x342,0x1f39,0xe643,0x399,0x314,0x342,0x1f40,0xe643,0x3bf,
+0x313,0x300,0x1f41,0xe643,0x3bf,0x314,0x300,0x1f40,0xe643,0x3bf,0x313,0x301,0x1f41,0xe643,0x3bf,0x314,
+0x301,0x1f48,0xe643,0x39f,0x313,0x300,0x1f49,0xe643,0x39f,0x314,0x300,0x1f48,0xe643,0x39f,0x313,0x301,
+0x1f49,0xe643,0x39f,0x314,0x301,0x1f50,0xe643,0x3c5,0x313,0x300,0x1f51,0xe643,0x3c5,0x314,0x300,0x1f50,
+0xe643,0x3c5,0x313,0x301,0x1f51,0xe643,0x3c5,0x314,0x301,0x1f50,0xe643,0x3c5,0x313,0x342,0x1f51,0xe643,
+0x3c5,0x314,0x342,0x1f59,0xe643,0x3a5,0x314,0x300,0x1f59,0xe643,0x3a5,0x314,0x301,0x1f59,0xe643,0x3a5,
+0x314,0x342,0xe602,0x3b5,0x300,0xe602,0x3b9,0x300,0xe602,0x3bf,0x300,0xe602,0x3c5,0x300,0x1f00,0xf043,
+0x3b1,0x313,0x345,0x1f01,0xf043,0x3b1,0x314,0x345,0x1f02,0x345,2,0xf044,0x3b1,0x313,0x300,0x345,
+0x1f03,0x345,2,0xf044,0x3b1,0x314,0x300,0x345,0x1f04,0x345,2,0xf044,0x3b1,0x313,0x301,0x345,
+0x1f05,0x345,2,0xf044,0x3b1,0x314,0x301,0x345,0x1f06,0x345,2,0xf044,0x3b1,0x313,0x342,0x345,
+0x1f07,0x345,2,0xf044,0x3b1,0x314,0x342,0x345,0x1f08,0xf043,0x391,0x313,0x345,0x1f09,0xf043,0x391,
+0x314,0x345,0x1f0a,0x345,2,0xf044,0x391,0x313,0x300,0x345,0x1f0b,0x345,2,0xf044,0x391,0x314,
+0x300,0x345,0x1f0c,0x345,2,0xf044,0x391,0x313,0x301,0x345,0x1f0d,0x345,2,0xf044,0x391,0x314,
+0x301,0x345,0x1f0e,0x345,2,0xf044,0x391,0x313,0x342,0x345,0x1f0f,0x345,2,0xf044,0x391,0x314,
+0x342,0x345,0x1f20,0xf043,0x3b7,0x313,0x345,0x1f21,0xf043,0x3b7,0x314,0x345,0x1f22,0x345,2,0xf044,
+0x3b7,0x313,0x300,0x345,0x1f23,0x345,2,0xf044,0x3b7,0x314,0x300,0x345,0x1f24,0x345,2,0xf044,
+0x3b7,0x313,0x301,0x345,0x1f25,0x345,2,0xf044,0x3b7,0x314,0x301,0x345,0x1f26,0x345,2,0xf044,
+0x3b7,0x313,0x342,0x345,0x1f27,0x345,2,0xf044,0x3b7,0x314,0x342,0x345,0x1f28,0xf043,0x397,0x313,
+0x345,0x1f29,0xf043,0x397,0x314,0x345,0x1f2a,0x345,2,0xf044,0x397,0x313,0x300,0x345,0x1f2b,0x345,
+2,0xf044,0x397,0x314,0x300,0x345,0x1f2c,0x345,2,0xf044,0x397,0x313,0x301,0x345,0x1f2d,0x345,
+2,0xf044,0x397,0x314,0x301,0x345,0x1f2e,0x345,2,0xf044,0x397,0x313,0x342,0x345,0x1f2f,0x345,
+2,0xf044,0x397,0x314,0x342,0x345,0x1f60,0xf043,0x3c9,0x313,0x345,0x1f61,0xf043,0x3c9,0x314,0x345,
+0x1f62,0x345,2,0xf044,0x3c9,0x313,0x300,0x345,0x1f63,0x345,2,0xf044,0x3c9,0x314,0x300,0x345,
+0x1f64,0x345,2,0xf044,0x3c9,0x313,0x301,0x345,0x1f65,0x345,2,0xf044,0x3c9,0x314,0x301,0x345,
+0x1f66,0x345,2,0xf044,0x3c9,0x313,0x342,0x345,0x1f67,0x345,2,0xf044,0x3c9,0x314,0x342,0x345,
+0x1f68,0xf043,0x3a9,0x313,0x345,0x1f69,0xf043,0x3a9,0x314,0x345,0x1f6a,0x345,2,0xf044,0x3a9,0x313,
+0x300,0x345,0x1f6b,0x345,2,0xf044,0x3a9,0x314,0x300,0x345,0x1f6c,0x345,2,0xf044,0x3a9,0x313,
+0x301,0x345,0x1f6d,0x345,2,0xf044,0x3a9,0x314,0x301,0x345,0x1f6e,0x345,2,0xf044,0x3a9,0x313,
+0x342,0x345,0x1f6f,0x345,2,0xf044,0x3a9,0x314,0x342,0x345,0xe602,0x3b1,0x306,0xe602,0x3b1,0x304,
+0x1f70,0xf043,0x3b1,0x300,0x345,0xf002,0x3b1,0x345,0x3ac,0xf043,0x3b1,0x301,0x345,0x1fb6,0xf043,0x3b1,
+0x342,0x345,0xe602,0x391,0x306,0xe602,0x391,0x304,0xe602,0x391,0x300,0xf002,0x391,0x345,0xe602,0xa8,
+0x342,0x1f74,0xf043,0x3b7,0x300,0x345,0xf002,0x3b7,0x345,0x3ae,0xf043,0x3b7,0x301,0x345,0x1fc6,0xf043,
+0x3b7,0x342,0x345,0xe602,0x395,0x300,0xe602,0x397,0x300,0xf002,0x397,0x345,0xe602,0x1fbf,0x300,0xe602,
+0x1fbf,0x301,0xe602,0x1fbf,0x342,0xe602,0x3b9,0x306,0xe602,0x3b9,0x304,0x3ca,0xe643,0x3b9,0x308,0x300,
+0xe602,0x3b9,0x342,0x3ca,0xe643,0x3b9,0x308,0x342,0xe602,0x399,0x306,0xe602,0x399,0x304,0xe602,0x399,
+0x300,0xe602,0x1ffe,0x300,0xe602,0x1ffe,0x301,0xe602,0x1ffe,0x342,0xe602,0x3c5,0x306,0xe602,0x3c5,0x304,
+0x3cb,0xe643,0x3c5,0x308,0x300,0xe602,0x3c1,0x313,0xe602,0x3c1,0x314,0xe602,0x3c5,0x342,0x3cb,0xe643,
+0x3c5,0x308,0x342,0xe602,0x3a5,0x306,0xe602,0x3a5,0x304,0xe602,0x3a5,0x300,0xe602,0x3a1,0x314,0xe602,
+0xa8,0x300,0x1f7c,0xf043,0x3c9,0x300,0x345,0xf002,0x3c9,0x345,0x3ce,0xf043,0x3c9,0x301,0x345,0x1ff6,
+0xf043,0x3c9,0x342,0x345,0xe602,0x39f,0x300,0xe602,0x3a9,0x300,0xf002,0x3a9,0x345,0x102,0x2190,0x338,
+0x102,0x2192,0x338,0x102,0x2194,0x338,0x102,0x21d0,0x338,0x102,0x21d4,0x338,0x102,0x21d2,0x338,0x102,
+0x2203,0x338,0x102,0x2208,0x338,0x102,0x220b,0x338,0x102,0x2223,0x338,0x102,0x2225,0x338,0x102,0x223c,
+0x338,0x102,0x2243,0x338,0x102,0x2245,0x338,0x102,0x2248,0x338,0x102,0x3d,0x338,0x102,0x2261,0x338,
+0x102,0x224d,0x338,0x102,0x3c,0x338,0x102,0x3e,0x338,0x102,0x2264,0x338,0x102,0x2265,0x338,0x102,
+0x2272,0x338,0x102,0x2273,0x338,0x102,0x2276,0x338,0x102,0x2277,0x338,0x102,0x227a,0x338,0x102,0x227b,
+0x338,0x102,0x2282,0x338,0x102,0x2283,0x338,0x102,0x2286,0x338,0x102,0x2287,0x338,0x102,0x22a2,0x338,
+0x102,0x22a8,0x338,0x102,0x22a9,0x338,0x102,0x22ab,0x338,0x102,0x227c,0x338,0x102,0x227d,0x338,0x102,
+0x2291,0x338,0x102,0x2292,0x338,0x102,0x22b2,0x338,0x102,0x22b3,0x338,0x102,0x22b4,0x338,0x102,0x22b5,
+0x338,0x802,0x304b,0x3099,0x802,0x304d,0x3099,0x802,0x304f,0x3099,0x802,0x3051,0x3099,0x802,0x3053,0x3099,
+0x802,0x3055,0x3099,0x802,0x3057,0x3099,0x802,0x3059,0x3099,0x802,0x305b,0x3099,0x802,0x305d,0x3099,0x802,
+0x305f,0x3099,0x802,0x3061,0x3099,0x802,0x3064,0x3099,0x802,0x3066,0x3099,0x802,0x3068,0x3099,0x802,0x306f,
+0x3099,0x802,0x306f,0x309a,0x802,0x3072,0x3099,0x802,0x3072,0x309a,0x802,0x3075,0x3099,0x802,0x3075,0x309a,
+0x802,0x3078,0x3099,0x802,0x3078,0x309a,0x802,0x307b,0x3099,0x802,0x307b,0x309a,0x802,0x3046,0x3099,0x802,
+0x309d,0x3099,0x802,0x30ab,0x3099,0x802,0x30ad,0x3099,0x802,0x30af,0x3099,0x802,0x30b1,0x3099,0x802,0x30b3,
+0x3099,0x802,0x30b5,0x3099,0x802,0x30b7,0x3099,0x802,0x30b9,0x3099,0x802,0x30bb,0x3099,0x802,0x30bd,0x3099,
+0x802,0x30bf,0x3099,0x802,0x30c1,0x3099,0x802,0x30c4,0x3099,0x802,0x30c6,0x3099,0x802,0x30c8,0x3099,0x802,
+0x30cf,0x3099,0x802,0x30cf,0x309a,0x802,0x30d2,0x3099,0x802,0x30d2,0x309a,0x802,0x30d5,0x3099,0x802,0x30d5,
+0x309a,0x802,0x30d8,0x3099,0x802,0x30d8,0x309a,0x802,0x30db,0x3099,0x802,0x30db,0x309a,0x802,0x30a6,0x3099,
+0x802,0x30ef,0x3099,0x802,0x30f0,0x3099,0x802,0x30f1,0x3099,0x802,0x30f2,0x3099,0x802,0x30fd,0x3099,0x704,
+0xd804,0xdc99,0xd804,0xdcba,0x704,0xd804,0xdc9b,0xd804,0xdcba,0x704,0xd804,0xdca5,0xd804,0xdcba,4,0xd804,
+0xdd31,0xd804,0xdd27,4,0xd804,0xdd32,0xd804,0xdd27,4,0xd804,0xdf47,0xd804,0xdf3e,4,0xd804,0xdf47,
+0xd804,0xdf57,4,0xd805,0xdcb9,0xd805,0xdcba,4,0xd805,0xdcb9,0xd805,0xdcb0,4,0xd805,0xdcb9,0xd805,
+0xdcbd,4,0xd805,0xddb8,0xd805,0xddaf,4,0xd805,0xddb9,0xd805,0xddaf,4,0xd806,0xdd35,0xd806,0xdd30,
+1,0x2b9,1,0x3b,1,0xb7,0x702,0x915,0x93c,0x702,0x916,0x93c,0x702,0x917,0x93c,0x702,
+0x91c,0x93c,0x702,0x921,0x93c,0x702,0x922,0x93c,0x702,0x92b,0x93c,0x702,0x92f,0x93c,0x702,0x9a1,
+0x9bc,0x702,0x9a2,0x9bc,0x702,0x9af,0x9bc,0x702,0xa32,0xa3c,0x702,0xa38,0xa3c,0x702,0xa16,0xa3c,
+0x702,0xa17,0xa3c,0x702,0xa1c,0xa3c,0x702,0xa2b,0xa3c,0x702,0xb21,0xb3c,0x702,0xb22,0xb3c,2,
+0xf42,0xfb7,2,0xf4c,0xfb7,2,0xf51,0xfb7,2,0xf56,0xfb7,2,0xf5b,0xfb7,2,0xf40,
+0xfb5,0x8202,0xfb2,0xf80,0x8202,0xfb3,0xf80,2,0xf92,0xfb7,2,0xf9c,0xfb7,2,0xfa1,0xfb7,
+2,0xfa6,0xfb7,2,0xfab,0xfb7,2,0xf90,0xfb5,1,0x3b9,1,0x60,1,0xb4,1,
+0x3a9,1,0x4b,1,0x3008,1,0x3009,0x102,0x2add,0x338,1,0x8c48,1,0x66f4,1,0x8eca,
+1,0x8cc8,1,0x6ed1,1,0x4e32,1,0x53e5,1,0x9f9c,1,0x5951,1,0x91d1,1,0x5587,
+1,0x5948,1,0x61f6,1,0x7669,1,0x7f85,1,0x863f,1,0x87ba,1,0x88f8,1,0x908f,
+1,0x6a02,1,0x6d1b,1,0x70d9,1,0x73de,1,0x843d,1,0x916a,1,0x99f1,1,0x4e82,
+1,0x5375,1,0x6b04,1,0x721b,1,0x862d,1,0x9e1e,1,0x5d50,1,0x6feb,1,0x85cd,
+1,0x8964,1,0x62c9,1,0x81d8,1,0x881f,1,0x5eca,1,0x6717,1,0x6d6a,1,0x72fc,
+1,0x90ce,1,0x4f86,1,0x51b7,1,0x52de,1,0x64c4,1,0x6ad3,1,0x7210,1,0x76e7,
+1,0x8001,1,0x8606,1,0x865c,1,0x8def,1,0x9732,1,0x9b6f,1,0x9dfa,1,0x788c,
+1,0x797f,1,0x7da0,1,0x83c9,1,0x9304,1,0x9e7f,1,0x8ad6,1,0x58df,1,0x5f04,
+1,0x7c60,1,0x807e,1,0x7262,1,0x78ca,1,0x8cc2,1,0x96f7,1,0x58d8,1,0x5c62,
+1,0x6a13,1,0x6dda,1,0x6f0f,1,0x7d2f,1,0x7e37,1,0x964b,1,0x52d2,1,0x808b,
+1,0x51dc,1,0x51cc,1,0x7a1c,1,0x7dbe,1,0x83f1,1,0x9675,1,0x8b80,1,0x62cf,
+1,0x8afe,1,0x4e39,1,0x5be7,1,0x6012,1,0x7387,1,0x7570,1,0x5317,1,0x78fb,
+1,0x4fbf,1,0x5fa9,1,0x4e0d,1,0x6ccc,1,0x6578,1,0x7d22,1,0x53c3,1,0x585e,
+1,0x7701,1,0x8449,1,0x8aaa,1,0x6bba,1,0x8fb0,1,0x6c88,1,0x62fe,1,0x82e5,
+1,0x63a0,1,0x7565,1,0x4eae,1,0x5169,1,0x51c9,1,0x6881,1,0x7ce7,1,0x826f,
+1,0x8ad2,1,0x91cf,1,0x52f5,1,0x5442,1,0x5973,1,0x5eec,1,0x65c5,1,0x6ffe,
+1,0x792a,1,0x95ad,1,0x9a6a,1,0x9e97,1,0x9ece,1,0x529b,1,0x66c6,1,0x6b77,
+1,0x8f62,1,0x5e74,1,0x6190,1,0x6200,1,0x649a,1,0x6f23,1,0x7149,1,0x7489,
+1,0x79ca,1,0x7df4,1,0x806f,1,0x8f26,1,0x84ee,1,0x9023,1,0x934a,1,0x5217,
+1,0x52a3,1,0x54bd,1,0x70c8,1,0x88c2,1,0x5ec9,1,0x5ff5,1,0x637b,1,0x6bae,
+1,0x7c3e,1,0x7375,1,0x4ee4,1,0x56f9,1,0x5dba,1,0x601c,1,0x73b2,1,0x7469,
+1,0x7f9a,1,0x8046,1,0x9234,1,0x96f6,1,0x9748,1,0x9818,1,0x4f8b,1,0x79ae,
+1,0x91b4,1,0x96b8,1,0x60e1,1,0x4e86,1,0x50da,1,0x5bee,1,0x5c3f,1,0x6599,
+1,0x71ce,1,0x7642,1,0x84fc,1,0x907c,1,0x9f8d,1,0x6688,1,0x962e,1,0x5289,
+1,0x677b,1,0x67f3,1,0x6d41,1,0x6e9c,1,0x7409,1,0x7559,1,0x786b,1,0x7d10,
+1,0x985e,1,0x516d,1,0x622e,1,0x9678,1,0x502b,1,0x5d19,1,0x6dea,1,0x8f2a,
+1,0x5f8b,1,0x6144,1,0x6817,1,0x9686,1,0x5229,1,0x540f,1,0x5c65,1,0x6613,
+1,0x674e,1,0x68a8,1,0x6ce5,1,0x7406,1,0x75e2,1,0x7f79,1,0x88cf,1,0x88e1,
+1,0x91cc,1,0x96e2,1,0x533f,1,0x6eba,1,0x541d,1,0x71d0,1,0x7498,1,0x85fa,
+1,0x96a3,1,0x9c57,1,0x9e9f,1,0x6797,1,0x6dcb,1,0x81e8,1,0x7acb,1,0x7b20,
+1,0x7c92,1,0x72c0,1,0x7099,1,0x8b58,1,0x4ec0,1,0x8336,1,0x523a,1,0x5207,
+1,0x5ea6,1,0x62d3,1,0x7cd6,1,0x5b85,1,0x6d1e,1,0x66b4,1,0x8f3b,1,0x884c,
+1,0x964d,1,0x898b,1,0x5ed3,1,0x5140,1,0x55c0,1,0x585a,1,0x6674,1,0x51de,
+1,0x732a,1,0x76ca,1,0x793c,1,0x795e,1,0x7965,1,0x798f,1,0x9756,1,0x7cbe,
+1,0x7fbd,1,0x8612,1,0x8af8,1,0x9038,1,0x90fd,1,0x98ef,1,0x98fc,1,0x9928,
+1,0x9db4,1,0x90de,1,0x96b7,1,0x4fae,1,0x50e7,1,0x514d,1,0x52c9,1,0x52e4,
+1,0x5351,1,0x559d,1,0x5606,1,0x5668,1,0x5840,1,0x58a8,1,0x5c64,1,0x5c6e,
+1,0x6094,1,0x6168,1,0x618e,1,0x61f2,1,0x654f,1,0x65e2,1,0x6691,1,0x6885,
+1,0x6d77,1,0x6e1a,1,0x6f22,1,0x716e,1,0x722b,1,0x7422,1,0x7891,1,0x793e,
+1,0x7949,1,0x7948,1,0x7950,1,0x7956,1,0x795d,1,0x798d,1,0x798e,1,0x7a40,
+1,0x7a81,1,0x7bc0,1,0x7e09,1,0x7e41,1,0x7f72,1,0x8005,1,0x81ed,1,0x8279,
+1,0x8457,1,0x8910,1,0x8996,1,0x8b01,1,0x8b39,1,0x8cd3,1,0x8d08,1,0x8fb6,
+1,0x96e3,1,0x97ff,1,0x983b,1,0x6075,2,0xd850,0xdeee,1,0x8218,1,0x4e26,1,
+0x51b5,1,0x5168,1,0x4f80,1,0x5145,1,0x5180,1,0x52c7,1,0x52fa,1,0x5555,1,
+0x5599,1,0x55e2,1,0x58b3,1,0x5944,1,0x5954,1,0x5a62,1,0x5b28,1,0x5ed2,1,
+0x5ed9,1,0x5f69,1,0x5fad,1,0x60d8,1,0x614e,1,0x6108,1,0x6160,1,0x6234,1,
+0x63c4,1,0x641c,1,0x6452,1,0x6556,1,0x671b,1,0x6756,1,0x6b79,1,0x6edb,1,
+0x6ecb,1,0x701e,1,0x77a7,1,0x7235,1,0x72af,1,0x7471,1,0x7506,1,0x753b,1,
+0x761d,1,0x761f,1,0x76db,1,0x76f4,1,0x774a,1,0x7740,1,0x78cc,1,0x7ab1,1,
+0x7c7b,1,0x7d5b,1,0x7f3e,1,0x8352,1,0x83ef,1,0x8779,1,0x8941,1,0x8986,1,
+0x8abf,1,0x8acb,1,0x8aed,1,0x8b8a,1,0x8f38,1,0x9072,1,0x9199,1,0x9276,1,
+0x967c,1,0x97db,1,0x980b,1,0x9b12,2,0xd84a,0xdc4a,2,0xd84a,0xdc44,2,0xd84c,0xdfd5,
+1,0x3b9d,1,0x4018,1,0x4039,2,0xd854,0xde49,2,0xd857,0xdcd0,2,0xd85f,0xded3,1,
+0x9f43,1,0x9f8e,0xe02,0x5d9,0x5b4,0x1102,0x5f2,0x5b7,0x1802,0x5e9,0x5c1,0x1902,0x5e9,0x5c2,0xfb49,
+0x1843,0x5e9,0x5bc,0x5c1,0xfb49,0x1943,0x5e9,0x5bc,0x5c2,0x1102,0x5d0,0x5b7,0x1202,0x5d0,0x5b8,0x1502,
+0x5d0,0x5bc,0x1502,0x5d1,0x5bc,0x1502,0x5d2,0x5bc,0x1502,0x5d3,0x5bc,0x1502,0x5d4,0x5bc,0x1502,0x5d5,
+0x5bc,0x1502,0x5d6,0x5bc,0x1502,0x5d8,0x5bc,0x1502,0x5d9,0x5bc,0x1502,0x5da,0x5bc,0x1502,0x5db,0x5bc,
+0x1502,0x5dc,0x5bc,0x1502,0x5de,0x5bc,0x1502,0x5e0,0x5bc,0x1502,0x5e1,0x5bc,0x1502,0x5e3,0x5bc,0x1502,
+0x5e4,0x5bc,0x1502,0x5e6,0x5bc,0x1502,0x5e7,0x5bc,0x1502,0x5e8,0x5bc,0x1502,0x5e9,0x5bc,0x1502,0x5ea,
+0x5bc,0x1302,0x5d5,0x5b9,0x1702,0x5d1,0x5bf,0x1702,0x5db,0x5bf,0x1702,0x5e4,0x5bf,0xd804,0xd834,0xdd57,
+0xd834,0xdd65,0xd804,0xd834,0xdd58,0xd834,0xdd65,0xd834,0xdd5f,0xd834,0xdd6e,4,0xd846,0xd834,0xdd58,0xd834,
+0xdd65,0xd834,0xdd6e,0xd834,0xdd5f,0xd834,0xdd6f,4,0xd846,0xd834,0xdd58,0xd834,0xdd65,0xd834,0xdd6f,0xd834,
+0xdd5f,0xd834,0xdd70,4,0xd846,0xd834,0xdd58,0xd834,0xdd65,0xd834,0xdd70,0xd834,0xdd5f,0xd834,0xdd71,4,
+0xd846,0xd834,0xdd58,0xd834,0xdd65,0xd834,0xdd71,0xd834,0xdd5f,0xd834,0xdd72,4,0xd846,0xd834,0xdd58,0xd834,
+0xdd65,0xd834,0xdd72,0xd804,0xd834,0xddb9,0xd834,0xdd65,0xd804,0xd834,0xddba,0xd834,0xdd65,0xd834,0xddbb,0xd834,
+0xdd6e,4,0xd846,0xd834,0xddb9,0xd834,0xdd65,0xd834,0xdd6e,0xd834,0xddbc,0xd834,0xdd6e,4,0xd846,0xd834,
+0xddba,0xd834,0xdd65,0xd834,0xdd6e,0xd834,0xddbb,0xd834,0xdd6f,4,0xd846,0xd834,0xddb9,0xd834,0xdd65,0xd834,
+0xdd6f,0xd834,0xddbc,0xd834,0xdd6f,4,0xd846,0xd834,0xddba,0xd834,0xdd65,0xd834,0xdd6f,1,0x4e3d,1,
+0x4e38,1,0x4e41,2,0xd840,0xdd22,1,0x4f60,1,0x4fbb,1,0x5002,1,0x507a,1,0x5099,
+1,0x50cf,1,0x349e,2,0xd841,0xde3a,1,0x5154,1,0x5164,1,0x5177,2,0xd841,0xdd1c,
+1,0x34b9,1,0x5167,1,0x518d,2,0xd841,0xdd4b,1,0x5197,1,0x51a4,1,0x4ecc,1,
+0x51ac,2,0xd864,0xdddf,1,0x51f5,1,0x5203,1,0x34df,1,0x523b,1,0x5246,1,0x5272,
+1,0x5277,1,0x3515,1,0x5305,1,0x5306,1,0x5349,1,0x535a,1,0x5373,1,0x537d,
+1,0x537f,2,0xd842,0xde2c,1,0x7070,1,0x53ca,1,0x53df,2,0xd842,0xdf63,1,0x53eb,
+1,0x53f1,1,0x5406,1,0x549e,1,0x5438,1,0x5448,1,0x5468,1,0x54a2,1,0x54f6,
+1,0x5510,1,0x5553,1,0x5563,1,0x5584,1,0x55ab,1,0x55b3,1,0x55c2,1,0x5716,
+1,0x5717,1,0x5651,1,0x5674,1,0x58ee,1,0x57ce,1,0x57f4,1,0x580d,1,0x578b,
+1,0x5832,1,0x5831,1,0x58ac,2,0xd845,0xdce4,1,0x58f2,1,0x58f7,1,0x5906,1,
+0x591a,1,0x5922,1,0x5962,2,0xd845,0xdea8,2,0xd845,0xdeea,1,0x59ec,1,0x5a1b,1,
+0x5a27,1,0x59d8,1,0x5a66,1,0x36ee,1,0x36fc,1,0x5b08,1,0x5b3e,2,0xd846,0xddc8,
+1,0x5bc3,1,0x5bd8,1,0x5bf3,2,0xd846,0xdf18,1,0x5bff,1,0x5c06,1,0x5f53,1,
+0x5c22,1,0x3781,1,0x5c60,1,0x5cc0,1,0x5c8d,2,0xd847,0xdde4,1,0x5d43,2,0xd847,
+0xdde6,1,0x5d6e,1,0x5d6b,1,0x5d7c,1,0x5de1,1,0x5de2,1,0x382f,1,0x5dfd,1,
+0x5e28,1,0x5e3d,1,0x5e69,1,0x3862,2,0xd848,0xdd83,1,0x387c,1,0x5eb0,1,0x5eb3,
+1,0x5eb6,2,0xd868,0xdf92,1,0x5efe,2,0xd848,0xdf31,1,0x8201,1,0x5f22,1,0x38c7,
+2,0xd84c,0xdeb8,2,0xd858,0xddda,1,0x5f62,1,0x5f6b,1,0x38e3,1,0x5f9a,1,0x5fcd,
+1,0x5fd7,1,0x5ff9,1,0x6081,1,0x393a,1,0x391c,2,0xd849,0xded4,1,0x60c7,1,
+0x6148,1,0x614c,1,0x617a,1,0x61b2,1,0x61a4,1,0x61af,1,0x61de,1,0x6210,1,
+0x621b,1,0x625d,1,0x62b1,1,0x62d4,1,0x6350,2,0xd84a,0xdf0c,1,0x633d,1,0x62fc,
+1,0x6368,1,0x6383,1,0x63e4,2,0xd84a,0xdff1,1,0x6422,1,0x63c5,1,0x63a9,1,
+0x3a2e,1,0x6469,1,0x647e,1,0x649d,1,0x6477,1,0x3a6c,1,0x656c,2,0xd84c,0xdc0a,
+1,0x65e3,1,0x66f8,1,0x6649,1,0x3b19,1,0x3b08,1,0x3ae4,1,0x5192,1,0x5195,
+1,0x6700,1,0x669c,1,0x80ad,1,0x43d9,1,0x6721,1,0x675e,1,0x6753,2,0xd84c,
+0xdfc3,1,0x3b49,1,0x67fa,1,0x6785,1,0x6852,2,0xd84d,0xdc6d,1,0x688e,1,0x681f,
+1,0x6914,1,0x6942,1,0x69a3,1,0x69ea,1,0x6aa8,2,0xd84d,0xdea3,1,0x6adb,1,
+0x3c18,1,0x6b21,2,0xd84e,0xdca7,1,0x6b54,1,0x3c4e,1,0x6b72,1,0x6b9f,1,0x6bbb,
+2,0xd84e,0xde8d,2,0xd847,0xdd0b,2,0xd84e,0xdefa,1,0x6c4e,2,0xd84f,0xdcbc,1,0x6cbf,
+1,0x6ccd,1,0x6c67,1,0x6d16,1,0x6d3e,1,0x6d69,1,0x6d78,1,0x6d85,2,0xd84f,
+0xdd1e,1,0x6d34,1,0x6e2f,1,0x6e6e,1,0x3d33,1,0x6ec7,2,0xd84f,0xded1,1,0x6df9,
+1,0x6f6e,2,0xd84f,0xdf5e,2,0xd84f,0xdf8e,1,0x6fc6,1,0x7039,1,0x701b,1,0x3d96,
+1,0x704a,1,0x707d,1,0x7077,1,0x70ad,2,0xd841,0xdd25,1,0x7145,2,0xd850,0xde63,
+1,0x719c,2,0xd850,0xdfab,1,0x7228,1,0x7250,2,0xd851,0xde08,1,0x7280,1,0x7295,
+2,0xd851,0xdf35,2,0xd852,0xdc14,1,0x737a,1,0x738b,1,0x3eac,1,0x73a5,1,0x3eb8,
+1,0x7447,1,0x745c,1,0x7485,1,0x74ca,1,0x3f1b,1,0x7524,2,0xd853,0xdc36,1,
+0x753e,2,0xd853,0xdc92,2,0xd848,0xdd9f,1,0x7610,2,0xd853,0xdfa1,2,0xd853,0xdfb8,2,
+0xd854,0xdc44,1,0x3ffc,1,0x4008,2,0xd854,0xdcf3,2,0xd854,0xdcf2,2,0xd854,0xdd19,2,
+0xd854,0xdd33,1,0x771e,1,0x771f,1,0x778b,1,0x4046,1,0x4096,2,0xd855,0xdc1d,1,
+0x784e,1,0x40e3,2,0xd855,0xde26,2,0xd855,0xde9a,2,0xd855,0xdec5,1,0x79eb,1,0x412f,
+1,0x7a4a,1,0x7a4f,2,0xd856,0xdd7c,2,0xd856,0xdea7,1,0x7aee,1,0x4202,2,0xd856,
+0xdfab,1,0x7bc6,1,0x7bc9,1,0x4227,2,0xd857,0xdc80,1,0x7cd2,1,0x42a0,1,0x7ce8,
+1,0x7ce3,1,0x7d00,2,0xd857,0xdf86,1,0x7d63,1,0x4301,1,0x7dc7,1,0x7e02,1,
+0x7e45,1,0x4334,2,0xd858,0xde28,2,0xd858,0xde47,1,0x4359,2,0xd858,0xded9,1,0x7f7a,
+2,0xd858,0xdf3e,1,0x7f95,1,0x7ffa,2,0xd859,0xdcda,2,0xd859,0xdd23,1,0x8060,2,
+0xd859,0xdda8,1,0x8070,2,0xd84c,0xdf5f,1,0x43d5,1,0x80b2,1,0x8103,1,0x440b,1,
+0x813e,1,0x5ab5,2,0xd859,0xdfa7,2,0xd859,0xdfb5,2,0xd84c,0xdf93,2,0xd84c,0xdf9c,1,
+0x8204,1,0x8f9e,1,0x446b,1,0x8291,1,0x828b,1,0x829d,1,0x52b3,1,0x82b1,1,
+0x82b3,1,0x82bd,1,0x82e6,2,0xd85a,0xdf3c,1,0x831d,1,0x8363,1,0x83ad,1,0x8323,
+1,0x83bd,1,0x83e7,1,0x8353,1,0x83ca,1,0x83cc,1,0x83dc,2,0xd85b,0xdc36,2,
+0xd85b,0xdd6b,2,0xd85b,0xdcd5,1,0x452b,1,0x84f1,1,0x84f3,1,0x8516,2,0xd85c,0xdfca,
+1,0x8564,2,0xd85b,0xdf2c,1,0x455d,1,0x4561,2,0xd85b,0xdfb1,2,0xd85c,0xdcd2,1,
+0x456b,1,0x8650,1,0x8667,1,0x8669,1,0x86a9,1,0x8688,1,0x870e,1,0x86e2,1,
+0x8728,1,0x876b,1,0x8786,1,0x45d7,1,0x87e1,1,0x8801,1,0x45f9,1,0x8860,1,
+0x8863,2,0xd85d,0xde67,1,0x88d7,1,0x88de,1,0x4635,1,0x88fa,1,0x34bb,2,0xd85e,
+0xdcae,2,0xd85e,0xdd66,1,0x46be,1,0x46c7,1,0x8aa0,1,0x8c55,2,0xd85f,0xdca8,1,
+0x8cab,1,0x8cc1,1,0x8d1b,1,0x8d77,2,0xd85f,0xdf2f,2,0xd842,0xdc04,1,0x8dcb,1,
+0x8dbc,1,0x8df0,2,0xd842,0xdcde,1,0x8ed4,2,0xd861,0xddd2,2,0xd861,0xdded,1,0x9094,
+1,0x90f1,1,0x9111,2,0xd861,0xdf2e,1,0x911b,1,0x9238,1,0x92d7,1,0x92d8,1,
+0x927c,1,0x93f9,1,0x9415,2,0xd862,0xdffa,1,0x958b,1,0x4995,1,0x95b7,2,0xd863,
+0xdd77,1,0x49e6,1,0x96c3,1,0x5db2,1,0x9723,2,0xd864,0xdd45,2,0xd864,0xde1a,1,
+0x4a6e,1,0x4a76,1,0x97e0,2,0xd865,0xdc0a,1,0x4ab2,2,0xd865,0xdc96,1,0x9829,2,
+0xd865,0xddb6,1,0x98e2,1,0x4b33,1,0x9929,1,0x99a7,1,0x99c2,1,0x99fe,1,0x4bce,
+2,0xd866,0xdf30,1,0x9c40,1,0x9cfd,1,0x4cce,1,0x4ced,1,0x9d67,2,0xd868,0xdcce,
+1,0x4cf8,2,0xd868,0xdd05,2,0xd868,0xde0e,2,0xd868,0xde91,1,0x9ebb,1,0x4d56,1,
+0x9ef9,1,0x9efe,1,0x9f05,1,0x9f0f,1,0x9f16,1,0x9f3b,2,0xd869,0xde00,0x3ac,0xe642,
+0x3b1,0x301,0x3ad,0xe642,0x3b5,0x301,0x3ae,0xe642,0x3b7,0x301,0x3af,0xe642,0x3b9,0x301,0x3cc,0xe642,
+0x3bf,0x301,0x3cd,0xe642,0x3c5,0x301,0x3ce,0xe642,0x3c9,0x301,0x386,0xe642,0x391,0x301,0x388,0xe642,
+0x395,0x301,0x389,0xe642,0x397,0x301,0x390,1,0xe643,0x3b9,0x308,0x301,0x38a,0xe642,0x399,0x301,
+0x3b0,1,0xe643,0x3c5,0x308,0x301,0x38e,0xe642,0x3a5,0x301,0x385,0xe642,0xa8,0x301,0x38c,0xe642,
+0x39f,0x301,0x38f,0xe642,0x3a9,0x301,0xc5,0xe642,0x41,0x30a,0xe6e6,0xe681,0x300,0xe6e6,0xe681,0x301,
+0xe6e6,0xe681,0x313,0xe6e6,0xe682,0x308,0x301,0x8100,0x8282,0xf71,0xf72,0x8100,0x8482,0xf71,0xf74,0x8100,
+0x8282,0xf71,0xf80,0
+};
+
+static const uint8_t norm2_nfc_data_smallFCD[256]={
+0xc0,0xef,3,0x7f,0xdf,0x70,0xcf,0x87,0xc7,0xe6,0x66,0x46,0x64,0x46,0x66,0x5b,
+0x12,0,0,4,0,0,0,0x43,0x20,2,0x69,0xae,0xc2,0xc0,0xff,0xff,
+0xc0,0x72,0xbf,0,0,0,0,0,0,0,0x40,0,0x80,0x88,0,0,
+0xfe,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0x98,0,0xc3,0x66,0xe0,0x80,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,7,0,0,2,0
+};
+
+#endif // INCLUDED_FROM_NORMALIZER2_CPP
diff --git a/thirdparty/icu4c/common/norm2allmodes.h b/thirdparty/icu4c/common/norm2allmodes.h
new file mode 100644
index 0000000000..e8bd52c6ae
--- /dev/null
+++ b/thirdparty/icu4c/common/norm2allmodes.h
@@ -0,0 +1,369 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* norm2allmodes.h
+*
+* created on: 2014sep07
+* created by: Markus W. Scherer
+*/
+
+#ifndef __NORM2ALLMODES_H__
+#define __NORM2ALLMODES_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/edits.h"
+#include "unicode/normalizer2.h"
+#include "unicode/stringoptions.h"
+#include "unicode/unistr.h"
+#include "cpputils.h"
+#include "normalizer2impl.h"
+
+U_NAMESPACE_BEGIN
+
+// Intermediate class:
+// Has Normalizer2Impl and does boilerplate argument checking and setup.
+class Normalizer2WithImpl : public Normalizer2 {
+public:
+ Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
+ virtual ~Normalizer2WithImpl();
+
+ // normalize
+ virtual UnicodeString &
+ normalize(const UnicodeString &src,
+ UnicodeString &dest,
+ UErrorCode &errorCode) const {
+ if(U_FAILURE(errorCode)) {
+ dest.setToBogus();
+ return dest;
+ }
+ const UChar *sArray=src.getBuffer();
+ if(&dest==&src || sArray==NULL) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ dest.setToBogus();
+ return dest;
+ }
+ dest.remove();
+ ReorderingBuffer buffer(impl, dest);
+ if(buffer.init(src.length(), errorCode)) {
+ normalize(sArray, sArray+src.length(), buffer, errorCode);
+ }
+ return dest;
+ }
+ virtual void
+ normalize(const UChar *src, const UChar *limit,
+ ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
+
+ // normalize and append
+ virtual UnicodeString &
+ normalizeSecondAndAppend(UnicodeString &first,
+ const UnicodeString &second,
+ UErrorCode &errorCode) const {
+ return normalizeSecondAndAppend(first, second, true, errorCode);
+ }
+ virtual UnicodeString &
+ append(UnicodeString &first,
+ const UnicodeString &second,
+ UErrorCode &errorCode) const {
+ return normalizeSecondAndAppend(first, second, false, errorCode);
+ }
+ UnicodeString &
+ normalizeSecondAndAppend(UnicodeString &first,
+ const UnicodeString &second,
+ UBool doNormalize,
+ UErrorCode &errorCode) const {
+ uprv_checkCanGetBuffer(first, errorCode);
+ if(U_FAILURE(errorCode)) {
+ return first;
+ }
+ const UChar *secondArray=second.getBuffer();
+ if(&first==&second || secondArray==NULL) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return first;
+ }
+ int32_t firstLength=first.length();
+ UnicodeString safeMiddle;
+ {
+ ReorderingBuffer buffer(impl, first);
+ if(buffer.init(firstLength+second.length(), errorCode)) {
+ normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
+ safeMiddle, buffer, errorCode);
+ }
+ } // The ReorderingBuffer destructor finalizes the first string.
+ if(U_FAILURE(errorCode)) {
+ // Restore the modified suffix of the first string.
+ first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
+ }
+ return first;
+ }
+ virtual void
+ normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
+ UnicodeString &safeMiddle,
+ ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
+ virtual UBool
+ getDecomposition(UChar32 c, UnicodeString &decomposition) const {
+ UChar buffer[4];
+ int32_t length;
+ const UChar *d=impl.getDecomposition(c, buffer, length);
+ if(d==NULL) {
+ return false;
+ }
+ if(d==buffer) {
+ decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c)
+ } else {
+ decomposition.setTo(false, d, length); // read-only alias
+ }
+ return true;
+ }
+ virtual UBool
+ getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
+ UChar buffer[30];
+ int32_t length;
+ const UChar *d=impl.getRawDecomposition(c, buffer, length);
+ if(d==NULL) {
+ return false;
+ }
+ if(d==buffer) {
+ decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition)
+ } else {
+ decomposition.setTo(false, d, length); // read-only alias
+ }
+ return true;
+ }
+ virtual UChar32
+ composePair(UChar32 a, UChar32 b) const {
+ return impl.composePair(a, b);
+ }
+
+ virtual uint8_t
+ getCombiningClass(UChar32 c) const {
+ return impl.getCC(impl.getNorm16(c));
+ }
+
+ // quick checks
+ virtual UBool
+ isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
+ if(U_FAILURE(errorCode)) {
+ return false;
+ }
+ const UChar *sArray=s.getBuffer();
+ if(sArray==NULL) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return false;
+ }
+ const UChar *sLimit=sArray+s.length();
+ return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
+ }
+ virtual UNormalizationCheckResult
+ quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
+ return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
+ }
+ virtual int32_t
+ spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
+ if(U_FAILURE(errorCode)) {
+ return 0;
+ }
+ const UChar *sArray=s.getBuffer();
+ if(sArray==NULL) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
+ }
+ virtual const UChar *
+ spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
+
+ virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
+ return UNORM_YES;
+ }
+
+ const Normalizer2Impl &impl;
+};
+
+class DecomposeNormalizer2 : public Normalizer2WithImpl {
+public:
+ DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
+ virtual ~DecomposeNormalizer2();
+
+private:
+ virtual void
+ normalize(const UChar *src, const UChar *limit,
+ ReorderingBuffer &buffer, UErrorCode &errorCode) const {
+ impl.decompose(src, limit, &buffer, errorCode);
+ }
+ using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
+ virtual void
+ normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
+ UnicodeString &safeMiddle,
+ ReorderingBuffer &buffer, UErrorCode &errorCode) const {
+ impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
+ }
+ virtual const UChar *
+ spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
+ return impl.decompose(src, limit, NULL, errorCode);
+ }
+ using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
+ virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
+ return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
+ }
+ virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundaryBefore(c); }
+ virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundaryAfter(c); }
+ virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
+};
+
+class ComposeNormalizer2 : public Normalizer2WithImpl {
+public:
+ ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
+ Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
+ virtual ~ComposeNormalizer2();
+
+private:
+ virtual void
+ normalize(const UChar *src, const UChar *limit,
+ ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
+ impl.compose(src, limit, onlyContiguous, true, buffer, errorCode);
+ }
+ using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
+
+ void
+ normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
+ Edits *edits, UErrorCode &errorCode) const U_OVERRIDE {
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
+ if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
+ edits->reset();
+ }
+ const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
+ impl.composeUTF8(options, onlyContiguous, s, s + src.length(),
+ &sink, edits, errorCode);
+ sink.Flush();
+ }
+
+ virtual void
+ normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
+ UnicodeString &safeMiddle,
+ ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
+ impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
+ }
+
+ virtual UBool
+ isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
+ if(U_FAILURE(errorCode)) {
+ return false;
+ }
+ const UChar *sArray=s.getBuffer();
+ if(sArray==NULL) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return false;
+ }
+ UnicodeString temp;
+ ReorderingBuffer buffer(impl, temp);
+ if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization
+ return false;
+ }
+ return impl.compose(sArray, sArray+s.length(), onlyContiguous, false, buffer, errorCode);
+ }
+ virtual UBool
+ isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const U_OVERRIDE {
+ if(U_FAILURE(errorCode)) {
+ return false;
+ }
+ const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
+ return impl.composeUTF8(0, onlyContiguous, s, s + sp.length(), nullptr, nullptr, errorCode);
+ }
+ virtual UNormalizationCheckResult
+ quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
+ if(U_FAILURE(errorCode)) {
+ return UNORM_MAYBE;
+ }
+ const UChar *sArray=s.getBuffer();
+ if(sArray==NULL) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return UNORM_MAYBE;
+ }
+ UNormalizationCheckResult qcResult=UNORM_YES;
+ impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
+ return qcResult;
+ }
+ virtual const UChar *
+ spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const U_OVERRIDE {
+ return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
+ }
+ using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
+ virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const U_OVERRIDE {
+ return impl.getCompQuickCheck(impl.getNorm16(c));
+ }
+ virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE {
+ return impl.hasCompBoundaryBefore(c);
+ }
+ virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE {
+ return impl.hasCompBoundaryAfter(c, onlyContiguous);
+ }
+ virtual UBool isInert(UChar32 c) const U_OVERRIDE {
+ return impl.isCompInert(c, onlyContiguous);
+ }
+
+ const UBool onlyContiguous;
+};
+
+class FCDNormalizer2 : public Normalizer2WithImpl {
+public:
+ FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
+ virtual ~FCDNormalizer2();
+
+private:
+ virtual void
+ normalize(const UChar *src, const UChar *limit,
+ ReorderingBuffer &buffer, UErrorCode &errorCode) const {
+ impl.makeFCD(src, limit, &buffer, errorCode);
+ }
+ using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
+ virtual void
+ normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
+ UnicodeString &safeMiddle,
+ ReorderingBuffer &buffer, UErrorCode &errorCode) const {
+ impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
+ }
+ virtual const UChar *
+ spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
+ return impl.makeFCD(src, limit, NULL, errorCode);
+ }
+ using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
+ virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
+ virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
+ virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
+};
+
+struct Norm2AllModes : public UMemory {
+ Norm2AllModes(Normalizer2Impl *i)
+ : impl(i), comp(*i, false), decomp(*i), fcd(*i), fcc(*i, true) {}
+ ~Norm2AllModes();
+
+ static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode);
+ static Norm2AllModes *createNFCInstance(UErrorCode &errorCode);
+ static Norm2AllModes *createInstance(const char *packageName,
+ const char *name,
+ UErrorCode &errorCode);
+
+ static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode);
+ static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode);
+ static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode);
+
+ Normalizer2Impl *impl;
+ ComposeNormalizer2 comp;
+ DecomposeNormalizer2 decomp;
+ FCDNormalizer2 fcd;
+ ComposeNormalizer2 fcc;
+};
+
+U_NAMESPACE_END
+
+#endif // !UCONFIG_NO_NORMALIZATION
+#endif // __NORM2ALLMODES_H__
diff --git a/thirdparty/icu4c/common/normalizer2.cpp b/thirdparty/icu4c/common/normalizer2.cpp
new file mode 100644
index 0000000000..6be7e0b21a
--- /dev/null
+++ b/thirdparty/icu4c/common/normalizer2.cpp
@@ -0,0 +1,572 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2009-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: normalizer2.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2009nov22
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/edits.h"
+#include "unicode/normalizer2.h"
+#include "unicode/stringoptions.h"
+#include "unicode/unistr.h"
+#include "unicode/unorm.h"
+#include "cstring.h"
+#include "mutex.h"
+#include "norm2allmodes.h"
+#include "normalizer2impl.h"
+#include "uassert.h"
+#include "ucln_cmn.h"
+
+using icu::Normalizer2Impl;
+
+#if NORM2_HARDCODE_NFC_DATA
+// NFC/NFD data machine-generated by gennorm2 --csource
+#define INCLUDED_FROM_NORMALIZER2_CPP
+#include "norm2_nfc_data.h"
+#endif
+
+U_NAMESPACE_BEGIN
+
+// Public API dispatch via Normalizer2 subclasses -------------------------- ***
+
+Normalizer2::~Normalizer2() {}
+
+void
+Normalizer2::normalizeUTF8(uint32_t /*options*/, StringPiece src, ByteSink &sink,
+ Edits *edits, UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
+ if (edits != nullptr) {
+ errorCode = U_UNSUPPORTED_ERROR;
+ return;
+ }
+ UnicodeString src16 = UnicodeString::fromUTF8(src);
+ normalize(src16, errorCode).toUTF8(sink);
+}
+
+UBool
+Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const {
+ return FALSE;
+}
+
+UChar32
+Normalizer2::composePair(UChar32, UChar32) const {
+ return U_SENTINEL;
+}
+
+uint8_t
+Normalizer2::getCombiningClass(UChar32 /*c*/) const {
+ return 0;
+}
+
+UBool
+Normalizer2::isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const {
+ return U_SUCCESS(errorCode) && isNormalized(UnicodeString::fromUTF8(s), errorCode);
+}
+
+// Normalizer2 implementation for the old UNORM_NONE.
+class NoopNormalizer2 : public Normalizer2 {
+ virtual ~NoopNormalizer2();
+
+ virtual UnicodeString &
+ normalize(const UnicodeString &src,
+ UnicodeString &dest,
+ UErrorCode &errorCode) const U_OVERRIDE {
+ if(U_SUCCESS(errorCode)) {
+ if(&dest!=&src) {
+ dest=src;
+ } else {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ }
+ return dest;
+ }
+ virtual void
+ normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
+ Edits *edits, UErrorCode &errorCode) const U_OVERRIDE {
+ if(U_SUCCESS(errorCode)) {
+ if (edits != nullptr) {
+ if ((options & U_EDITS_NO_RESET) == 0) {
+ edits->reset();
+ }
+ edits->addUnchanged(src.length());
+ }
+ if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
+ sink.Append(src.data(), src.length());
+ }
+ sink.Flush();
+ }
+ }
+
+ virtual UnicodeString &
+ normalizeSecondAndAppend(UnicodeString &first,
+ const UnicodeString &second,
+ UErrorCode &errorCode) const U_OVERRIDE {
+ if(U_SUCCESS(errorCode)) {
+ if(&first!=&second) {
+ first.append(second);
+ } else {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ }
+ return first;
+ }
+ virtual UnicodeString &
+ append(UnicodeString &first,
+ const UnicodeString &second,
+ UErrorCode &errorCode) const U_OVERRIDE {
+ if(U_SUCCESS(errorCode)) {
+ if(&first!=&second) {
+ first.append(second);
+ } else {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ }
+ return first;
+ }
+ virtual UBool
+ getDecomposition(UChar32, UnicodeString &) const U_OVERRIDE {
+ return FALSE;
+ }
+ // No need to U_OVERRIDE the default getRawDecomposition().
+ virtual UBool
+ isNormalized(const UnicodeString &, UErrorCode &errorCode) const U_OVERRIDE {
+ return U_SUCCESS(errorCode);
+ }
+ virtual UBool
+ isNormalizedUTF8(StringPiece, UErrorCode &errorCode) const U_OVERRIDE {
+ return U_SUCCESS(errorCode);
+ }
+ virtual UNormalizationCheckResult
+ quickCheck(const UnicodeString &, UErrorCode &) const U_OVERRIDE {
+ return UNORM_YES;
+ }
+ virtual int32_t
+ spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const U_OVERRIDE {
+ return s.length();
+ }
+ virtual UBool hasBoundaryBefore(UChar32) const U_OVERRIDE { return TRUE; }
+ virtual UBool hasBoundaryAfter(UChar32) const U_OVERRIDE { return TRUE; }
+ virtual UBool isInert(UChar32) const U_OVERRIDE { return TRUE; }
+};
+
+NoopNormalizer2::~NoopNormalizer2() {}
+
+Normalizer2WithImpl::~Normalizer2WithImpl() {}
+
+DecomposeNormalizer2::~DecomposeNormalizer2() {}
+
+ComposeNormalizer2::~ComposeNormalizer2() {}
+
+FCDNormalizer2::~FCDNormalizer2() {}
+
+// instance cache ---------------------------------------------------------- ***
+
+U_CDECL_BEGIN
+static UBool U_CALLCONV uprv_normalizer2_cleanup();
+U_CDECL_END
+
+static Normalizer2 *noopSingleton;
+static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER;
+
+static void U_CALLCONV initNoopSingleton(UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ noopSingleton=new NoopNormalizer2;
+ if(noopSingleton==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
+}
+
+const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return NULL; }
+ umtx_initOnce(noopInitOnce, &initNoopSingleton, errorCode);
+ return noopSingleton;
+}
+
+const Normalizer2Impl *
+Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
+ return &((Normalizer2WithImpl *)norm2)->impl;
+}
+
+Norm2AllModes::~Norm2AllModes() {
+ delete impl;
+}
+
+Norm2AllModes *
+Norm2AllModes::createInstance(Normalizer2Impl *impl, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ delete impl;
+ return NULL;
+ }
+ Norm2AllModes *allModes=new Norm2AllModes(impl);
+ if(allModes==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ delete impl;
+ return NULL;
+ }
+ return allModes;
+}
+
+#if NORM2_HARDCODE_NFC_DATA
+Norm2AllModes *
+Norm2AllModes::createNFCInstance(UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return NULL;
+ }
+ Normalizer2Impl *impl=new Normalizer2Impl;
+ if(impl==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ impl->init(norm2_nfc_data_indexes, &norm2_nfc_data_trie,
+ norm2_nfc_data_extraData, norm2_nfc_data_smallFCD);
+ return createInstance(impl, errorCode);
+}
+
+static Norm2AllModes *nfcSingleton;
+
+static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER;
+
+static void U_CALLCONV initNFCSingleton(UErrorCode &errorCode) {
+ nfcSingleton=Norm2AllModes::createNFCInstance(errorCode);
+ ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
+}
+
+const Norm2AllModes *
+Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return NULL; }
+ umtx_initOnce(nfcInitOnce, &initNFCSingleton, errorCode);
+ return nfcSingleton;
+}
+
+const Normalizer2 *
+Normalizer2::getNFCInstance(UErrorCode &errorCode) {
+ const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
+ return allModes!=NULL ? &allModes->comp : NULL;
+}
+
+const Normalizer2 *
+Normalizer2::getNFDInstance(UErrorCode &errorCode) {
+ const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
+ return allModes!=NULL ? &allModes->decomp : NULL;
+}
+
+const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
+ const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
+ return allModes!=NULL ? &allModes->fcd : NULL;
+}
+
+const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
+ const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
+ return allModes!=NULL ? &allModes->fcc : NULL;
+}
+
+const Normalizer2Impl *
+Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
+ const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
+ return allModes!=NULL ? allModes->impl : NULL;
+}
+#endif // NORM2_HARDCODE_NFC_DATA
+
+U_CDECL_BEGIN
+
+static UBool U_CALLCONV uprv_normalizer2_cleanup() {
+ delete noopSingleton;
+ noopSingleton = NULL;
+ noopInitOnce.reset();
+#if NORM2_HARDCODE_NFC_DATA
+ delete nfcSingleton;
+ nfcSingleton = NULL;
+ nfcInitOnce.reset();
+#endif
+ return TRUE;
+}
+
+U_CDECL_END
+
+U_NAMESPACE_END
+
+// C API ------------------------------------------------------------------- ***
+
+U_NAMESPACE_USE
+
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFCInstance(UErrorCode *pErrorCode) {
+ return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode);
+}
+
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFDInstance(UErrorCode *pErrorCode) {
+ return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode);
+}
+
+U_CAPI void U_EXPORT2
+unorm2_close(UNormalizer2 *norm2) {
+ delete (Normalizer2 *)norm2;
+}
+
+U_CAPI int32_t U_EXPORT2
+unorm2_normalize(const UNormalizer2 *norm2,
+ const UChar *src, int32_t length,
+ UChar *dest, int32_t capacity,
+ UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if( (src==NULL ? length!=0 : length<-1) ||
+ (dest==NULL ? capacity!=0 : capacity<0) ||
+ (src==dest && src!=NULL)
+ ) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ UnicodeString destString(dest, 0, capacity);
+ // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
+ if(length!=0) {
+ const Normalizer2 *n2=(const Normalizer2 *)norm2;
+ const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
+ if(n2wi!=NULL) {
+ // Avoid duplicate argument checking and support NUL-terminated src.
+ ReorderingBuffer buffer(n2wi->impl, destString);
+ if(buffer.init(length, *pErrorCode)) {
+ n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode);
+ }
+ } else {
+ UnicodeString srcString(length<0, src, length);
+ n2->normalize(srcString, destString, *pErrorCode);
+ }
+ }
+ return destString.extract(dest, capacity, *pErrorCode);
+}
+
+static int32_t
+normalizeSecondAndAppend(const UNormalizer2 *norm2,
+ UChar *first, int32_t firstLength, int32_t firstCapacity,
+ const UChar *second, int32_t secondLength,
+ UBool doNormalize,
+ UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if( (second==NULL ? secondLength!=0 : secondLength<-1) ||
+ (first==NULL ? (firstCapacity!=0 || firstLength!=0) :
+ (firstCapacity<0 || firstLength<-1)) ||
+ (first==second && first!=NULL)
+ ) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ UnicodeString firstString(first, firstLength, firstCapacity);
+ firstLength=firstString.length(); // In case it was -1.
+ // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
+ if(secondLength!=0) {
+ const Normalizer2 *n2=(const Normalizer2 *)norm2;
+ const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
+ if(n2wi!=NULL) {
+ // Avoid duplicate argument checking and support NUL-terminated src.
+ UnicodeString safeMiddle;
+ {
+ ReorderingBuffer buffer(n2wi->impl, firstString);
+ if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1
+ n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
+ doNormalize, safeMiddle, buffer, *pErrorCode);
+ }
+ } // The ReorderingBuffer destructor finalizes firstString.
+ if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) {
+ // Restore the modified suffix of the first string.
+ // This does not restore first[] array contents between firstLength and firstCapacity.
+ // (That might be uninitialized memory, as far as we know.)
+ if(first!=NULL) { /* don't dereference NULL */
+ safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length());
+ if(firstLength<firstCapacity) {
+ first[firstLength]=0; // NUL-terminate in case it was originally.
+ }
+ }
+ }
+ } else {
+ UnicodeString secondString(secondLength<0, second, secondLength);
+ if(doNormalize) {
+ n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode);
+ } else {
+ n2->append(firstString, secondString, *pErrorCode);
+ }
+ }
+ }
+ return firstString.extract(first, firstCapacity, *pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
+ UChar *first, int32_t firstLength, int32_t firstCapacity,
+ const UChar *second, int32_t secondLength,
+ UErrorCode *pErrorCode) {
+ return normalizeSecondAndAppend(norm2,
+ first, firstLength, firstCapacity,
+ second, secondLength,
+ TRUE, pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+unorm2_append(const UNormalizer2 *norm2,
+ UChar *first, int32_t firstLength, int32_t firstCapacity,
+ const UChar *second, int32_t secondLength,
+ UErrorCode *pErrorCode) {
+ return normalizeSecondAndAppend(norm2,
+ first, firstLength, firstCapacity,
+ second, secondLength,
+ FALSE, pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+unorm2_getDecomposition(const UNormalizer2 *norm2,
+ UChar32 c, UChar *decomposition, int32_t capacity,
+ UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(decomposition==NULL ? capacity!=0 : capacity<0) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ UnicodeString destString(decomposition, 0, capacity);
+ if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) {
+ return destString.extract(decomposition, capacity, *pErrorCode);
+ } else {
+ return -1;
+ }
+}
+
+U_CAPI int32_t U_EXPORT2
+unorm2_getRawDecomposition(const UNormalizer2 *norm2,
+ UChar32 c, UChar *decomposition, int32_t capacity,
+ UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(decomposition==NULL ? capacity!=0 : capacity<0) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ UnicodeString destString(decomposition, 0, capacity);
+ if(reinterpret_cast<const Normalizer2 *>(norm2)->getRawDecomposition(c, destString)) {
+ return destString.extract(decomposition, capacity, *pErrorCode);
+ } else {
+ return -1;
+ }
+}
+
+U_CAPI UChar32 U_EXPORT2
+unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b) {
+ return reinterpret_cast<const Normalizer2 *>(norm2)->composePair(a, b);
+}
+
+U_CAPI uint8_t U_EXPORT2
+unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) {
+ return reinterpret_cast<const Normalizer2 *>(norm2)->getCombiningClass(c);
+}
+
+U_CAPI UBool U_EXPORT2
+unorm2_isNormalized(const UNormalizer2 *norm2,
+ const UChar *s, int32_t length,
+ UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if((s==NULL && length!=0) || length<-1) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ UnicodeString sString(length<0, s, length);
+ return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode);
+}
+
+U_CAPI UNormalizationCheckResult U_EXPORT2
+unorm2_quickCheck(const UNormalizer2 *norm2,
+ const UChar *s, int32_t length,
+ UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) {
+ return UNORM_NO;
+ }
+ if((s==NULL && length!=0) || length<-1) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return UNORM_NO;
+ }
+ UnicodeString sString(length<0, s, length);
+ return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
+ const UChar *s, int32_t length,
+ UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if((s==NULL && length!=0) || length<-1) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ UnicodeString sString(length<0, s, length);
+ return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode);
+}
+
+U_CAPI UBool U_EXPORT2
+unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) {
+ return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c);
+}
+
+U_CAPI UBool U_EXPORT2
+unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) {
+ return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c);
+}
+
+U_CAPI UBool U_EXPORT2
+unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
+ return ((const Normalizer2 *)norm2)->isInert(c);
+}
+
+// Some properties APIs ---------------------------------------------------- ***
+
+U_CAPI uint8_t U_EXPORT2
+u_getCombiningClass(UChar32 c) {
+ UErrorCode errorCode=U_ZERO_ERROR;
+ const Normalizer2 *nfd=Normalizer2::getNFDInstance(errorCode);
+ if(U_SUCCESS(errorCode)) {
+ return nfd->getCombiningClass(c);
+ } else {
+ return 0;
+ }
+}
+
+U_CFUNC uint16_t
+unorm_getFCD16(UChar32 c) {
+ UErrorCode errorCode=U_ZERO_ERROR;
+ const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
+ if(U_SUCCESS(errorCode)) {
+ return impl->getFCD16(c);
+ } else {
+ return 0;
+ }
+}
+
+#endif // !UCONFIG_NO_NORMALIZATION
diff --git a/thirdparty/icu4c/common/normalizer2impl.cpp b/thirdparty/icu4c/common/normalizer2impl.cpp
new file mode 100644
index 0000000000..cbf6b4d980
--- /dev/null
+++ b/thirdparty/icu4c/common/normalizer2impl.cpp
@@ -0,0 +1,2669 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2009-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: normalizer2impl.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2009nov22
+* created by: Markus W. Scherer
+*/
+
+// #define UCPTRIE_DEBUG
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/bytestream.h"
+#include "unicode/edits.h"
+#include "unicode/normalizer2.h"
+#include "unicode/stringoptions.h"
+#include "unicode/ucptrie.h"
+#include "unicode/udata.h"
+#include "unicode/umutablecptrie.h"
+#include "unicode/ustring.h"
+#include "unicode/utf16.h"
+#include "unicode/utf8.h"
+#include "bytesinkutil.h"
+#include "cmemory.h"
+#include "mutex.h"
+#include "normalizer2impl.h"
+#include "putilimp.h"
+#include "uassert.h"
+#include "ucptrie_impl.h"
+#include "uset_imp.h"
+#include "uvector.h"
+
+U_NAMESPACE_BEGIN
+
+namespace {
+
+/**
+ * UTF-8 lead byte for minNoMaybeCP.
+ * Can be lower than the actual lead byte for c.
+ * Typically U+0300 for NFC/NFD, U+00A0 for NFKC/NFKD, U+0041 for NFKC_Casefold.
+ */
+inline uint8_t leadByteForCP(UChar32 c) {
+ if (c <= 0x7f) {
+ return (uint8_t)c;
+ } else if (c <= 0x7ff) {
+ return (uint8_t)(0xc0+(c>>6));
+ } else {
+ // Should not occur because ccc(U+0300)!=0.
+ return 0xe0;
+ }
+}
+
+/**
+ * Returns the code point from one single well-formed UTF-8 byte sequence
+ * between cpStart and cpLimit.
+ *
+ * Trie UTF-8 macros do not assemble whole code points (for efficiency).
+ * When we do need the code point, we call this function.
+ * We should not need it for normalization-inert data (norm16==0).
+ * Illegal sequences yield the error value norm16==0 just like real normalization-inert code points.
+ */
+UChar32 codePointFromValidUTF8(const uint8_t *cpStart, const uint8_t *cpLimit) {
+ // Similar to U8_NEXT_UNSAFE(s, i, c).
+ U_ASSERT(cpStart < cpLimit);
+ uint8_t c = *cpStart;
+ switch(cpLimit-cpStart) {
+ case 1:
+ return c;
+ case 2:
+ return ((c&0x1f)<<6) | (cpStart[1]&0x3f);
+ case 3:
+ // no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar)
+ return (UChar)((c<<12) | ((cpStart[1]&0x3f)<<6) | (cpStart[2]&0x3f));
+ case 4:
+ return ((c&7)<<18) | ((cpStart[1]&0x3f)<<12) | ((cpStart[2]&0x3f)<<6) | (cpStart[3]&0x3f);
+ default:
+ UPRV_UNREACHABLE; // Should not occur.
+ }
+}
+
+/**
+ * Returns the last code point in [start, p[ if it is valid and in U+1000..U+D7FF.
+ * Otherwise returns a negative value.
+ */
+UChar32 previousHangulOrJamo(const uint8_t *start, const uint8_t *p) {
+ if ((p - start) >= 3) {
+ p -= 3;
+ uint8_t l = *p;
+ uint8_t t1, t2;
+ if (0xe1 <= l && l <= 0xed &&
+ (t1 = (uint8_t)(p[1] - 0x80)) <= 0x3f &&
+ (t2 = (uint8_t)(p[2] - 0x80)) <= 0x3f &&
+ (l < 0xed || t1 <= 0x1f)) {
+ return ((l & 0xf) << 12) | (t1 << 6) | t2;
+ }
+ }
+ return U_SENTINEL;
+}
+
+/**
+ * Returns the offset from the Jamo T base if [src, limit[ starts with a single Jamo T code point.
+ * Otherwise returns a negative value.
+ */
+int32_t getJamoTMinusBase(const uint8_t *src, const uint8_t *limit) {
+ // Jamo T: E1 86 A8..E1 87 82
+ if ((limit - src) >= 3 && *src == 0xe1) {
+ if (src[1] == 0x86) {
+ uint8_t t = src[2];
+ // The first Jamo T is U+11A8 but JAMO_T_BASE is 11A7.
+ // Offset 0 does not correspond to any conjoining Jamo.
+ if (0xa8 <= t && t <= 0xbf) {
+ return t - 0xa7;
+ }
+ } else if (src[1] == 0x87) {
+ uint8_t t = src[2];
+ if ((int8_t)t <= (int8_t)0x82u) {
+ return t - (0xa7 - 0x40);
+ }
+ }
+ }
+ return -1;
+}
+
+void
+appendCodePointDelta(const uint8_t *cpStart, const uint8_t *cpLimit, int32_t delta,
+ ByteSink &sink, Edits *edits) {
+ char buffer[U8_MAX_LENGTH];
+ int32_t length;
+ int32_t cpLength = (int32_t)(cpLimit - cpStart);
+ if (cpLength == 1) {
+ // The builder makes ASCII map to ASCII.
+ buffer[0] = (uint8_t)(*cpStart + delta);
+ length = 1;
+ } else {
+ int32_t trail = *(cpLimit-1) + delta;
+ if (0x80 <= trail && trail <= 0xbf) {
+ // The delta only changes the last trail byte.
+ --cpLimit;
+ length = 0;
+ do { buffer[length++] = *cpStart++; } while (cpStart < cpLimit);
+ buffer[length++] = (uint8_t)trail;
+ } else {
+ // Decode the code point, add the delta, re-encode.
+ UChar32 c = codePointFromValidUTF8(cpStart, cpLimit) + delta;
+ length = 0;
+ U8_APPEND_UNSAFE(buffer, length, c);
+ }
+ }
+ if (edits != nullptr) {
+ edits->addReplace(cpLength, length);
+ }
+ sink.Append(buffer, length);
+}
+
+} // namespace
+
+// ReorderingBuffer -------------------------------------------------------- ***
+
+ReorderingBuffer::ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest,
+ UErrorCode &errorCode) :
+ impl(ni), str(dest),
+ start(str.getBuffer(8)), reorderStart(start), limit(start),
+ remainingCapacity(str.getCapacity()), lastCC(0) {
+ if (start == nullptr && U_SUCCESS(errorCode)) {
+ // getBuffer() already did str.setToBogus()
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ }
+}
+
+UBool ReorderingBuffer::init(int32_t destCapacity, UErrorCode &errorCode) {
+ int32_t length=str.length();
+ start=str.getBuffer(destCapacity);
+ if(start==NULL) {
+ // getBuffer() already did str.setToBogus()
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return FALSE;
+ }
+ limit=start+length;
+ remainingCapacity=str.getCapacity()-length;
+ reorderStart=start;
+ if(start==limit) {
+ lastCC=0;
+ } else {
+ setIterator();
+ lastCC=previousCC();
+ // Set reorderStart after the last code point with cc<=1 if there is one.
+ if(lastCC>1) {
+ while(previousCC()>1) {}
+ }
+ reorderStart=codePointLimit;
+ }
+ return TRUE;
+}
+
+UBool ReorderingBuffer::equals(const UChar *otherStart, const UChar *otherLimit) const {
+ int32_t length=(int32_t)(limit-start);
+ return
+ length==(int32_t)(otherLimit-otherStart) &&
+ 0==u_memcmp(start, otherStart, length);
+}
+
+UBool ReorderingBuffer::equals(const uint8_t *otherStart, const uint8_t *otherLimit) const {
+ U_ASSERT((otherLimit - otherStart) <= INT32_MAX); // ensured by caller
+ int32_t length = (int32_t)(limit - start);
+ int32_t otherLength = (int32_t)(otherLimit - otherStart);
+ // For equal strings, UTF-8 is at least as long as UTF-16, and at most three times as long.
+ if (otherLength < length || (otherLength / 3) > length) {
+ return FALSE;
+ }
+ // Compare valid strings from between normalization boundaries.
+ // (Invalid sequences are normalization-inert.)
+ for (int32_t i = 0, j = 0;;) {
+ if (i >= length) {
+ return j >= otherLength;
+ } else if (j >= otherLength) {
+ return FALSE;
+ }
+ // Not at the end of either string yet.
+ UChar32 c, other;
+ U16_NEXT_UNSAFE(start, i, c);
+ U8_NEXT_UNSAFE(otherStart, j, other);
+ if (c != other) {
+ return FALSE;
+ }
+ }
+}
+
+UBool ReorderingBuffer::appendSupplementary(UChar32 c, uint8_t cc, UErrorCode &errorCode) {
+ if(remainingCapacity<2 && !resize(2, errorCode)) {
+ return FALSE;
+ }
+ if(lastCC<=cc || cc==0) {
+ limit[0]=U16_LEAD(c);
+ limit[1]=U16_TRAIL(c);
+ limit+=2;
+ lastCC=cc;
+ if(cc<=1) {
+ reorderStart=limit;
+ }
+ } else {
+ insert(c, cc);
+ }
+ remainingCapacity-=2;
+ return TRUE;
+}
+
+UBool ReorderingBuffer::append(const UChar *s, int32_t length, UBool isNFD,
+ uint8_t leadCC, uint8_t trailCC,
+ UErrorCode &errorCode) {
+ if(length==0) {
+ return TRUE;
+ }
+ if(remainingCapacity<length && !resize(length, errorCode)) {
+ return FALSE;
+ }
+ remainingCapacity-=length;
+ if(lastCC<=leadCC || leadCC==0) {
+ if(trailCC<=1) {
+ reorderStart=limit+length;
+ } else if(leadCC<=1) {
+ reorderStart=limit+1; // Ok if not a code point boundary.
+ }
+ const UChar *sLimit=s+length;
+ do { *limit++=*s++; } while(s!=sLimit);
+ lastCC=trailCC;
+ } else {
+ int32_t i=0;
+ UChar32 c;
+ U16_NEXT(s, i, length, c);
+ insert(c, leadCC); // insert first code point
+ while(i<length) {
+ U16_NEXT(s, i, length, c);
+ if(i<length) {
+ if (isNFD) {
+ leadCC = Normalizer2Impl::getCCFromYesOrMaybe(impl.getRawNorm16(c));
+ } else {
+ leadCC = impl.getCC(impl.getNorm16(c));
+ }
+ } else {
+ leadCC=trailCC;
+ }
+ append(c, leadCC, errorCode);
+ }
+ }
+ return TRUE;
+}
+
+UBool ReorderingBuffer::appendZeroCC(UChar32 c, UErrorCode &errorCode) {
+ int32_t cpLength=U16_LENGTH(c);
+ if(remainingCapacity<cpLength && !resize(cpLength, errorCode)) {
+ return FALSE;
+ }
+ remainingCapacity-=cpLength;
+ if(cpLength==1) {
+ *limit++=(UChar)c;
+ } else {
+ limit[0]=U16_LEAD(c);
+ limit[1]=U16_TRAIL(c);
+ limit+=2;
+ }
+ lastCC=0;
+ reorderStart=limit;
+ return TRUE;
+}
+
+UBool ReorderingBuffer::appendZeroCC(const UChar *s, const UChar *sLimit, UErrorCode &errorCode) {
+ if(s==sLimit) {
+ return TRUE;
+ }
+ int32_t length=(int32_t)(sLimit-s);
+ if(remainingCapacity<length && !resize(length, errorCode)) {
+ return FALSE;
+ }
+ u_memcpy(limit, s, length);
+ limit+=length;
+ remainingCapacity-=length;
+ lastCC=0;
+ reorderStart=limit;
+ return TRUE;
+}
+
+void ReorderingBuffer::remove() {
+ reorderStart=limit=start;
+ remainingCapacity=str.getCapacity();
+ lastCC=0;
+}
+
+void ReorderingBuffer::removeSuffix(int32_t suffixLength) {
+ if(suffixLength<(limit-start)) {
+ limit-=suffixLength;
+ remainingCapacity+=suffixLength;
+ } else {
+ limit=start;
+ remainingCapacity=str.getCapacity();
+ }
+ lastCC=0;
+ reorderStart=limit;
+}
+
+UBool ReorderingBuffer::resize(int32_t appendLength, UErrorCode &errorCode) {
+ int32_t reorderStartIndex=(int32_t)(reorderStart-start);
+ int32_t length=(int32_t)(limit-start);
+ str.releaseBuffer(length);
+ int32_t newCapacity=length+appendLength;
+ int32_t doubleCapacity=2*str.getCapacity();
+ if(newCapacity<doubleCapacity) {
+ newCapacity=doubleCapacity;
+ }
+ if(newCapacity<256) {
+ newCapacity=256;
+ }
+ start=str.getBuffer(newCapacity);
+ if(start==NULL) {
+ // getBuffer() already did str.setToBogus()
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return FALSE;
+ }
+ reorderStart=start+reorderStartIndex;
+ limit=start+length;
+ remainingCapacity=str.getCapacity()-length;
+ return TRUE;
+}
+
+void ReorderingBuffer::skipPrevious() {
+ codePointLimit=codePointStart;
+ UChar c=*--codePointStart;
+ if(U16_IS_TRAIL(c) && start<codePointStart && U16_IS_LEAD(*(codePointStart-1))) {
+ --codePointStart;
+ }
+}
+
+uint8_t ReorderingBuffer::previousCC() {
+ codePointLimit=codePointStart;
+ if(reorderStart>=codePointStart) {
+ return 0;
+ }
+ UChar32 c=*--codePointStart;
+ UChar c2;
+ if(U16_IS_TRAIL(c) && start<codePointStart && U16_IS_LEAD(c2=*(codePointStart-1))) {
+ --codePointStart;
+ c=U16_GET_SUPPLEMENTARY(c2, c);
+ }
+ return impl.getCCFromYesOrMaybeCP(c);
+}
+
+// Inserts c somewhere before the last character.
+// Requires 0<cc<lastCC which implies reorderStart<limit.
+void ReorderingBuffer::insert(UChar32 c, uint8_t cc) {
+ for(setIterator(), skipPrevious(); previousCC()>cc;) {}
+ // insert c at codePointLimit, after the character with prevCC<=cc
+ UChar *q=limit;
+ UChar *r=limit+=U16_LENGTH(c);
+ do {
+ *--r=*--q;
+ } while(codePointLimit!=q);
+ writeCodePoint(q, c);
+ if(cc<=1) {
+ reorderStart=r;
+ }
+}
+
+// Normalizer2Impl --------------------------------------------------------- ***
+
+struct CanonIterData : public UMemory {
+ CanonIterData(UErrorCode &errorCode);
+ ~CanonIterData();
+ void addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode);
+ UMutableCPTrie *mutableTrie;
+ UCPTrie *trie;
+ UVector canonStartSets; // contains UnicodeSet *
+};
+
+Normalizer2Impl::~Normalizer2Impl() {
+ delete fCanonIterData;
+}
+
+void
+Normalizer2Impl::init(const int32_t *inIndexes, const UCPTrie *inTrie,
+ const uint16_t *inExtraData, const uint8_t *inSmallFCD) {
+ minDecompNoCP = static_cast<UChar>(inIndexes[IX_MIN_DECOMP_NO_CP]);
+ minCompNoMaybeCP = static_cast<UChar>(inIndexes[IX_MIN_COMP_NO_MAYBE_CP]);
+ minLcccCP = static_cast<UChar>(inIndexes[IX_MIN_LCCC_CP]);
+
+ minYesNo = static_cast<uint16_t>(inIndexes[IX_MIN_YES_NO]);
+ minYesNoMappingsOnly = static_cast<uint16_t>(inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY]);
+ minNoNo = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO]);
+ minNoNoCompBoundaryBefore = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE]);
+ minNoNoCompNoMaybeCC = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC]);
+ minNoNoEmpty = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO_EMPTY]);
+ limitNoNo = static_cast<uint16_t>(inIndexes[IX_LIMIT_NO_NO]);
+ minMaybeYes = static_cast<uint16_t>(inIndexes[IX_MIN_MAYBE_YES]);
+ U_ASSERT((minMaybeYes & 7) == 0); // 8-aligned for noNoDelta bit fields
+ centerNoNoDelta = (minMaybeYes >> DELTA_SHIFT) - MAX_DELTA - 1;
+
+ normTrie=inTrie;
+
+ maybeYesCompositions=inExtraData;
+ extraData=maybeYesCompositions+((MIN_NORMAL_MAYBE_YES-minMaybeYes)>>OFFSET_SHIFT);
+
+ smallFCD=inSmallFCD;
+}
+
+U_CDECL_BEGIN
+
+static uint32_t U_CALLCONV
+segmentStarterMapper(const void * /*context*/, uint32_t value) {
+ return value&CANON_NOT_SEGMENT_STARTER;
+}
+
+U_CDECL_END
+
+void
+Normalizer2Impl::addLcccChars(UnicodeSet &set) const {
+ UChar32 start = 0, end;
+ uint32_t norm16;
+ while ((end = ucptrie_getRange(normTrie, start, UCPMAP_RANGE_FIXED_LEAD_SURROGATES, INERT,
+ nullptr, nullptr, &norm16)) >= 0) {
+ if (norm16 > Normalizer2Impl::MIN_NORMAL_MAYBE_YES &&
+ norm16 != Normalizer2Impl::JAMO_VT) {
+ set.add(start, end);
+ } else if (minNoNoCompNoMaybeCC <= norm16 && norm16 < limitNoNo) {
+ uint16_t fcd16 = getFCD16(start);
+ if (fcd16 > 0xff) { set.add(start, end); }
+ }
+ start = end + 1;
+ }
+}
+
+void
+Normalizer2Impl::addPropertyStarts(const USetAdder *sa, UErrorCode & /*errorCode*/) const {
+ // Add the start code point of each same-value range of the trie.
+ UChar32 start = 0, end;
+ uint32_t value;
+ while ((end = ucptrie_getRange(normTrie, start, UCPMAP_RANGE_FIXED_LEAD_SURROGATES, INERT,
+ nullptr, nullptr, &value)) >= 0) {
+ sa->add(sa->set, start);
+ if (start != end && isAlgorithmicNoNo((uint16_t)value) &&
+ (value & Normalizer2Impl::DELTA_TCCC_MASK) > Normalizer2Impl::DELTA_TCCC_1) {
+ // Range of code points with same-norm16-value algorithmic decompositions.
+ // They might have different non-zero FCD16 values.
+ uint16_t prevFCD16 = getFCD16(start);
+ while (++start <= end) {
+ uint16_t fcd16 = getFCD16(start);
+ if (fcd16 != prevFCD16) {
+ sa->add(sa->set, start);
+ prevFCD16 = fcd16;
+ }
+ }
+ }
+ start = end + 1;
+ }
+
+ /* add Hangul LV syllables and LV+1 because of skippables */
+ for(UChar c=Hangul::HANGUL_BASE; c<Hangul::HANGUL_LIMIT; c+=Hangul::JAMO_T_COUNT) {
+ sa->add(sa->set, c);
+ sa->add(sa->set, c+1);
+ }
+ sa->add(sa->set, Hangul::HANGUL_LIMIT); /* add Hangul+1 to continue with other properties */
+}
+
+void
+Normalizer2Impl::addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const {
+ // Add the start code point of each same-value range of the canonical iterator data trie.
+ if (!ensureCanonIterData(errorCode)) { return; }
+ // Currently only used for the SEGMENT_STARTER property.
+ UChar32 start = 0, end;
+ uint32_t value;
+ while ((end = ucptrie_getRange(fCanonIterData->trie, start, UCPMAP_RANGE_NORMAL, 0,
+ segmentStarterMapper, nullptr, &value)) >= 0) {
+ sa->add(sa->set, start);
+ start = end + 1;
+ }
+}
+
+const UChar *
+Normalizer2Impl::copyLowPrefixFromNulTerminated(const UChar *src,
+ UChar32 minNeedDataCP,
+ ReorderingBuffer *buffer,
+ UErrorCode &errorCode) const {
+ // Make some effort to support NUL-terminated strings reasonably.
+ // Take the part of the fast quick check loop that does not look up
+ // data and check the first part of the string.
+ // After this prefix, determine the string length to simplify the rest
+ // of the code.
+ const UChar *prevSrc=src;
+ UChar c;
+ while((c=*src++)<minNeedDataCP && c!=0) {}
+ // Back out the last character for full processing.
+ // Copy this prefix.
+ if(--src!=prevSrc) {
+ if(buffer!=NULL) {
+ buffer->appendZeroCC(prevSrc, src, errorCode);
+ }
+ }
+ return src;
+}
+
+UnicodeString &
+Normalizer2Impl::decompose(const UnicodeString &src, UnicodeString &dest,
+ UErrorCode &errorCode) const {
+ if(U_FAILURE(errorCode)) {
+ dest.setToBogus();
+ return dest;
+ }
+ const UChar *sArray=src.getBuffer();
+ if(&dest==&src || sArray==NULL) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ dest.setToBogus();
+ return dest;
+ }
+ decompose(sArray, sArray+src.length(), dest, src.length(), errorCode);
+ return dest;
+}
+
+void
+Normalizer2Impl::decompose(const UChar *src, const UChar *limit,
+ UnicodeString &dest,
+ int32_t destLengthEstimate,
+ UErrorCode &errorCode) const {
+ if(destLengthEstimate<0 && limit!=NULL) {
+ destLengthEstimate=(int32_t)(limit-src);
+ }
+ dest.remove();
+ ReorderingBuffer buffer(*this, dest);
+ if(buffer.init(destLengthEstimate, errorCode)) {
+ decompose(src, limit, &buffer, errorCode);
+ }
+}
+
+// Dual functionality:
+// buffer!=NULL: normalize
+// buffer==NULL: isNormalized/spanQuickCheckYes
+const UChar *
+Normalizer2Impl::decompose(const UChar *src, const UChar *limit,
+ ReorderingBuffer *buffer,
+ UErrorCode &errorCode) const {
+ UChar32 minNoCP=minDecompNoCP;
+ if(limit==NULL) {
+ src=copyLowPrefixFromNulTerminated(src, minNoCP, buffer, errorCode);
+ if(U_FAILURE(errorCode)) {
+ return src;
+ }
+ limit=u_strchr(src, 0);
+ }
+
+ const UChar *prevSrc;
+ UChar32 c=0;
+ uint16_t norm16=0;
+
+ // only for quick check
+ const UChar *prevBoundary=src;
+ uint8_t prevCC=0;
+
+ for(;;) {
+ // count code units below the minimum or with irrelevant data for the quick check
+ for(prevSrc=src; src!=limit;) {
+ if( (c=*src)<minNoCP ||
+ isMostDecompYesAndZeroCC(norm16=UCPTRIE_FAST_BMP_GET(normTrie, UCPTRIE_16, c))
+ ) {
+ ++src;
+ } else if(!U16_IS_LEAD(c)) {
+ break;
+ } else {
+ UChar c2;
+ if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
+ c=U16_GET_SUPPLEMENTARY(c, c2);
+ norm16=UCPTRIE_FAST_SUPP_GET(normTrie, UCPTRIE_16, c);
+ if(isMostDecompYesAndZeroCC(norm16)) {
+ src+=2;
+ } else {
+ break;
+ }
+ } else {
+ ++src; // unpaired lead surrogate: inert
+ }
+ }
+ }
+ // copy these code units all at once
+ if(src!=prevSrc) {
+ if(buffer!=NULL) {
+ if(!buffer->appendZeroCC(prevSrc, src, errorCode)) {
+ break;
+ }
+ } else {
+ prevCC=0;
+ prevBoundary=src;
+ }
+ }
+ if(src==limit) {
+ break;
+ }
+
+ // Check one above-minimum, relevant code point.
+ src+=U16_LENGTH(c);
+ if(buffer!=NULL) {
+ if(!decompose(c, norm16, *buffer, errorCode)) {
+ break;
+ }
+ } else {
+ if(isDecompYes(norm16)) {
+ uint8_t cc=getCCFromYesOrMaybe(norm16);
+ if(prevCC<=cc || cc==0) {
+ prevCC=cc;
+ if(cc<=1) {
+ prevBoundary=src;
+ }
+ continue;
+ }
+ }
+ return prevBoundary; // "no" or cc out of order
+ }
+ }
+ return src;
+}
+
+// Decompose a short piece of text which is likely to contain characters that
+// fail the quick check loop and/or where the quick check loop's overhead
+// is unlikely to be amortized.
+// Called by the compose() and makeFCD() implementations.
+const UChar *
+Normalizer2Impl::decomposeShort(const UChar *src, const UChar *limit,
+ UBool stopAtCompBoundary, UBool onlyContiguous,
+ ReorderingBuffer &buffer, UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode)) {
+ return nullptr;
+ }
+ while(src<limit) {
+ if (stopAtCompBoundary && *src < minCompNoMaybeCP) {
+ return src;
+ }
+ const UChar *prevSrc = src;
+ UChar32 c;
+ uint16_t norm16;
+ UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, src, limit, c, norm16);
+ if (stopAtCompBoundary && norm16HasCompBoundaryBefore(norm16)) {
+ return prevSrc;
+ }
+ if(!decompose(c, norm16, buffer, errorCode)) {
+ return nullptr;
+ }
+ if (stopAtCompBoundary && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
+ return src;
+ }
+ }
+ return src;
+}
+
+UBool Normalizer2Impl::decompose(UChar32 c, uint16_t norm16,
+ ReorderingBuffer &buffer,
+ UErrorCode &errorCode) const {
+ // get the decomposition and the lead and trail cc's
+ if (norm16 >= limitNoNo) {
+ if (isMaybeOrNonZeroCC(norm16)) {
+ return buffer.append(c, getCCFromYesOrMaybe(norm16), errorCode);
+ }
+ // Maps to an isCompYesAndZeroCC.
+ c=mapAlgorithmic(c, norm16);
+ norm16=getRawNorm16(c);
+ }
+ if (norm16 < minYesNo) {
+ // c does not decompose
+ return buffer.append(c, 0, errorCode);
+ } else if(isHangulLV(norm16) || isHangulLVT(norm16)) {
+ // Hangul syllable: decompose algorithmically
+ UChar jamos[3];
+ return buffer.appendZeroCC(jamos, jamos+Hangul::decompose(c, jamos), errorCode);
+ }
+ // c decomposes, get everything from the variable-length extra data
+ const uint16_t *mapping=getMapping(norm16);
+ uint16_t firstUnit=*mapping;
+ int32_t length=firstUnit&MAPPING_LENGTH_MASK;
+ uint8_t leadCC, trailCC;
+ trailCC=(uint8_t)(firstUnit>>8);
+ if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) {
+ leadCC=(uint8_t)(*(mapping-1)>>8);
+ } else {
+ leadCC=0;
+ }
+ return buffer.append((const UChar *)mapping+1, length, TRUE, leadCC, trailCC, errorCode);
+}
+
+const uint8_t *
+Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
+ UBool stopAtCompBoundary, UBool onlyContiguous,
+ ReorderingBuffer &buffer, UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode)) {
+ return nullptr;
+ }
+ while (src < limit) {
+ const uint8_t *prevSrc = src;
+ uint16_t norm16;
+ UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16);
+ // Get the decomposition and the lead and trail cc's.
+ UChar32 c = U_SENTINEL;
+ if (norm16 >= limitNoNo) {
+ if (isMaybeOrNonZeroCC(norm16)) {
+ // No boundaries around this character.
+ c = codePointFromValidUTF8(prevSrc, src);
+ if (!buffer.append(c, getCCFromYesOrMaybe(norm16), errorCode)) {
+ return nullptr;
+ }
+ continue;
+ }
+ // Maps to an isCompYesAndZeroCC.
+ if (stopAtCompBoundary) {
+ return prevSrc;
+ }
+ c = codePointFromValidUTF8(prevSrc, src);
+ c = mapAlgorithmic(c, norm16);
+ norm16 = getRawNorm16(c);
+ } else if (stopAtCompBoundary && norm16 < minNoNoCompNoMaybeCC) {
+ return prevSrc;
+ }
+ // norm16!=INERT guarantees that [prevSrc, src[ is valid UTF-8.
+ // We do not see invalid UTF-8 here because
+ // its norm16==INERT is normalization-inert,
+ // so it gets copied unchanged in the fast path,
+ // and we stop the slow path where invalid UTF-8 begins.
+ U_ASSERT(norm16 != INERT);
+ if (norm16 < minYesNo) {
+ if (c < 0) {
+ c = codePointFromValidUTF8(prevSrc, src);
+ }
+ // does not decompose
+ if (!buffer.append(c, 0, errorCode)) {
+ return nullptr;
+ }
+ } else if (isHangulLV(norm16) || isHangulLVT(norm16)) {
+ // Hangul syllable: decompose algorithmically
+ if (c < 0) {
+ c = codePointFromValidUTF8(prevSrc, src);
+ }
+ char16_t jamos[3];
+ if (!buffer.appendZeroCC(jamos, jamos+Hangul::decompose(c, jamos), errorCode)) {
+ return nullptr;
+ }
+ } else {
+ // The character decomposes, get everything from the variable-length extra data.
+ const uint16_t *mapping = getMapping(norm16);
+ uint16_t firstUnit = *mapping;
+ int32_t length = firstUnit & MAPPING_LENGTH_MASK;
+ uint8_t trailCC = (uint8_t)(firstUnit >> 8);
+ uint8_t leadCC;
+ if (firstUnit & MAPPING_HAS_CCC_LCCC_WORD) {
+ leadCC = (uint8_t)(*(mapping-1) >> 8);
+ } else {
+ leadCC = 0;
+ }
+ if (!buffer.append((const char16_t *)mapping+1, length, TRUE, leadCC, trailCC, errorCode)) {
+ return nullptr;
+ }
+ }
+ if (stopAtCompBoundary && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
+ return src;
+ }
+ }
+ return src;
+}
+
+const UChar *
+Normalizer2Impl::getDecomposition(UChar32 c, UChar buffer[4], int32_t &length) const {
+ uint16_t norm16;
+ if(c<minDecompNoCP || isMaybeOrNonZeroCC(norm16=getNorm16(c))) {
+ // c does not decompose
+ return nullptr;
+ }
+ const UChar *decomp = nullptr;
+ if(isDecompNoAlgorithmic(norm16)) {
+ // Maps to an isCompYesAndZeroCC.
+ c=mapAlgorithmic(c, norm16);
+ decomp=buffer;
+ length=0;
+ U16_APPEND_UNSAFE(buffer, length, c);
+ // The mapping might decompose further.
+ norm16 = getRawNorm16(c);
+ }
+ if (norm16 < minYesNo) {
+ return decomp;
+ } else if(isHangulLV(norm16) || isHangulLVT(norm16)) {
+ // Hangul syllable: decompose algorithmically
+ length=Hangul::decompose(c, buffer);
+ return buffer;
+ }
+ // c decomposes, get everything from the variable-length extra data
+ const uint16_t *mapping=getMapping(norm16);
+ length=*mapping&MAPPING_LENGTH_MASK;
+ return (const UChar *)mapping+1;
+}
+
+// The capacity of the buffer must be 30=MAPPING_LENGTH_MASK-1
+// so that a raw mapping fits that consists of one unit ("rm0")
+// plus all but the first two code units of the normal mapping.
+// The maximum length of a normal mapping is 31=MAPPING_LENGTH_MASK.
+const UChar *
+Normalizer2Impl::getRawDecomposition(UChar32 c, UChar buffer[30], int32_t &length) const {
+ uint16_t norm16;
+ if(c<minDecompNoCP || isDecompYes(norm16=getNorm16(c))) {
+ // c does not decompose
+ return NULL;
+ } else if(isHangulLV(norm16) || isHangulLVT(norm16)) {
+ // Hangul syllable: decompose algorithmically
+ Hangul::getRawDecomposition(c, buffer);
+ length=2;
+ return buffer;
+ } else if(isDecompNoAlgorithmic(norm16)) {
+ c=mapAlgorithmic(c, norm16);
+ length=0;
+ U16_APPEND_UNSAFE(buffer, length, c);
+ return buffer;
+ }
+ // c decomposes, get everything from the variable-length extra data
+ const uint16_t *mapping=getMapping(norm16);
+ uint16_t firstUnit=*mapping;
+ int32_t mLength=firstUnit&MAPPING_LENGTH_MASK; // length of normal mapping
+ if(firstUnit&MAPPING_HAS_RAW_MAPPING) {
+ // Read the raw mapping from before the firstUnit and before the optional ccc/lccc word.
+ // Bit 7=MAPPING_HAS_CCC_LCCC_WORD
+ const uint16_t *rawMapping=mapping-((firstUnit>>7)&1)-1;
+ uint16_t rm0=*rawMapping;
+ if(rm0<=MAPPING_LENGTH_MASK) {
+ length=rm0;
+ return (const UChar *)rawMapping-rm0;
+ } else {
+ // Copy the normal mapping and replace its first two code units with rm0.
+ buffer[0]=(UChar)rm0;
+ u_memcpy(buffer+1, (const UChar *)mapping+1+2, mLength-2);
+ length=mLength-1;
+ return buffer;
+ }
+ } else {
+ length=mLength;
+ return (const UChar *)mapping+1;
+ }
+}
+
+void Normalizer2Impl::decomposeAndAppend(const UChar *src, const UChar *limit,
+ UBool doDecompose,
+ UnicodeString &safeMiddle,
+ ReorderingBuffer &buffer,
+ UErrorCode &errorCode) const {
+ buffer.copyReorderableSuffixTo(safeMiddle);
+ if(doDecompose) {
+ decompose(src, limit, &buffer, errorCode);
+ return;
+ }
+ // Just merge the strings at the boundary.
+ bool isFirst = true;
+ uint8_t firstCC = 0, prevCC = 0, cc;
+ const UChar *p = src;
+ while (p != limit) {
+ const UChar *codePointStart = p;
+ UChar32 c;
+ uint16_t norm16;
+ UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);
+ if ((cc = getCC(norm16)) == 0) {
+ p = codePointStart;
+ break;
+ }
+ if (isFirst) {
+ firstCC = cc;
+ isFirst = false;
+ }
+ prevCC = cc;
+ }
+ if(limit==NULL) { // appendZeroCC() needs limit!=NULL
+ limit=u_strchr(p, 0);
+ }
+
+ if (buffer.append(src, (int32_t)(p - src), FALSE, firstCC, prevCC, errorCode)) {
+ buffer.appendZeroCC(p, limit, errorCode);
+ }
+}
+
+UBool Normalizer2Impl::hasDecompBoundaryBefore(UChar32 c) const {
+ return c < minLcccCP || (c <= 0xffff && !singleLeadMightHaveNonZeroFCD16(c)) ||
+ norm16HasDecompBoundaryBefore(getNorm16(c));
+}
+
+UBool Normalizer2Impl::norm16HasDecompBoundaryBefore(uint16_t norm16) const {
+ if (norm16 < minNoNoCompNoMaybeCC) {
+ return TRUE;
+ }
+ if (norm16 >= limitNoNo) {
+ return norm16 <= MIN_NORMAL_MAYBE_YES || norm16 == JAMO_VT;
+ }
+ // c decomposes, get everything from the variable-length extra data
+ const uint16_t *mapping=getMapping(norm16);
+ uint16_t firstUnit=*mapping;
+ // TRUE if leadCC==0 (hasFCDBoundaryBefore())
+ return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (*(mapping-1)&0xff00)==0;
+}
+
+UBool Normalizer2Impl::hasDecompBoundaryAfter(UChar32 c) const {
+ if (c < minDecompNoCP) {
+ return TRUE;
+ }
+ if (c <= 0xffff && !singleLeadMightHaveNonZeroFCD16(c)) {
+ return TRUE;
+ }
+ return norm16HasDecompBoundaryAfter(getNorm16(c));
+}
+
+UBool Normalizer2Impl::norm16HasDecompBoundaryAfter(uint16_t norm16) const {
+ if(norm16 <= minYesNo || isHangulLVT(norm16)) {
+ return TRUE;
+ }
+ if (norm16 >= limitNoNo) {
+ if (isMaybeOrNonZeroCC(norm16)) {
+ return norm16 <= MIN_NORMAL_MAYBE_YES || norm16 == JAMO_VT;
+ }
+ // Maps to an isCompYesAndZeroCC.
+ return (norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1;
+ }
+ // c decomposes, get everything from the variable-length extra data
+ const uint16_t *mapping=getMapping(norm16);
+ uint16_t firstUnit=*mapping;
+ // decomp after-boundary: same as hasFCDBoundaryAfter(),
+ // fcd16<=1 || trailCC==0
+ if(firstUnit>0x1ff) {
+ return FALSE; // trailCC>1
+ }
+ if(firstUnit<=0xff) {
+ return TRUE; // trailCC==0
+ }
+ // if(trailCC==1) test leadCC==0, same as checking for before-boundary
+ // TRUE if leadCC==0 (hasFCDBoundaryBefore())
+ return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (*(mapping-1)&0xff00)==0;
+}
+
+/*
+ * Finds the recomposition result for
+ * a forward-combining "lead" character,
+ * specified with a pointer to its compositions list,
+ * and a backward-combining "trail" character.
+ *
+ * If the lead and trail characters combine, then this function returns
+ * the following "compositeAndFwd" value:
+ * Bits 21..1 composite character
+ * Bit 0 set if the composite is a forward-combining starter
+ * otherwise it returns -1.
+ *
+ * The compositions list has (trail, compositeAndFwd) pair entries,
+ * encoded as either pairs or triples of 16-bit units.
+ * The last entry has the high bit of its first unit set.
+ *
+ * The list is sorted by ascending trail characters (there are no duplicates).
+ * A linear search is used.
+ *
+ * See normalizer2impl.h for a more detailed description
+ * of the compositions list format.
+ */
+int32_t Normalizer2Impl::combine(const uint16_t *list, UChar32 trail) {
+ uint16_t key1, firstUnit;
+ if(trail<COMP_1_TRAIL_LIMIT) {
+ // trail character is 0..33FF
+ // result entry may have 2 or 3 units
+ key1=(uint16_t)(trail<<1);
+ while(key1>(firstUnit=*list)) {
+ list+=2+(firstUnit&COMP_1_TRIPLE);
+ }
+ if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
+ if(firstUnit&COMP_1_TRIPLE) {
+ return ((int32_t)list[1]<<16)|list[2];
+ } else {
+ return list[1];
+ }
+ }
+ } else {
+ // trail character is 3400..10FFFF
+ // result entry has 3 units
+ key1=(uint16_t)(COMP_1_TRAIL_LIMIT+
+ (((trail>>COMP_1_TRAIL_SHIFT))&
+ ~COMP_1_TRIPLE));
+ uint16_t key2=(uint16_t)(trail<<COMP_2_TRAIL_SHIFT);
+ uint16_t secondUnit;
+ for(;;) {
+ if(key1>(firstUnit=*list)) {
+ list+=2+(firstUnit&COMP_1_TRIPLE);
+ } else if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
+ if(key2>(secondUnit=list[1])) {
+ if(firstUnit&COMP_1_LAST_TUPLE) {
+ break;
+ } else {
+ list+=3;
+ }
+ } else if(key2==(secondUnit&COMP_2_TRAIL_MASK)) {
+ return ((int32_t)(secondUnit&~COMP_2_TRAIL_MASK)<<16)|list[2];
+ } else {
+ break;
+ }
+ } else {
+ break;
+ }
+ }
+ }
+ return -1;
+}
+
+/**
+ * @param list some character's compositions list
+ * @param set recursively receives the composites from these compositions
+ */
+void Normalizer2Impl::addComposites(const uint16_t *list, UnicodeSet &set) const {
+ uint16_t firstUnit;
+ int32_t compositeAndFwd;
+ do {
+ firstUnit=*list;
+ if((firstUnit&COMP_1_TRIPLE)==0) {
+ compositeAndFwd=list[1];
+ list+=2;
+ } else {
+ compositeAndFwd=(((int32_t)list[1]&~COMP_2_TRAIL_MASK)<<16)|list[2];
+ list+=3;
+ }
+ UChar32 composite=compositeAndFwd>>1;
+ if((compositeAndFwd&1)!=0) {
+ addComposites(getCompositionsListForComposite(getRawNorm16(composite)), set);
+ }
+ set.add(composite);
+ } while((firstUnit&COMP_1_LAST_TUPLE)==0);
+}
+
+/*
+ * Recomposes the buffer text starting at recomposeStartIndex
+ * (which is in NFD - decomposed and canonically ordered),
+ * and truncates the buffer contents.
+ *
+ * Note that recomposition never lengthens the text:
+ * Any character consists of either one or two code units;
+ * a composition may contain at most one more code unit than the original starter,
+ * while the combining mark that is removed has at least one code unit.
+ */
+void Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStartIndex,
+ UBool onlyContiguous) const {
+ UChar *p=buffer.getStart()+recomposeStartIndex;
+ UChar *limit=buffer.getLimit();
+ if(p==limit) {
+ return;
+ }
+
+ UChar *starter, *pRemove, *q, *r;
+ const uint16_t *compositionsList;
+ UChar32 c, compositeAndFwd;
+ uint16_t norm16;
+ uint8_t cc, prevCC;
+ UBool starterIsSupplementary;
+
+ // Some of the following variables are not used until we have a forward-combining starter
+ // and are only initialized now to avoid compiler warnings.
+ compositionsList=NULL; // used as indicator for whether we have a forward-combining starter
+ starter=NULL;
+ starterIsSupplementary=FALSE;
+ prevCC=0;
+
+ for(;;) {
+ UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);
+ cc=getCCFromYesOrMaybe(norm16);
+ if( // this character combines backward and
+ isMaybe(norm16) &&
+ // we have seen a starter that combines forward and
+ compositionsList!=NULL &&
+ // the backward-combining character is not blocked
+ (prevCC<cc || prevCC==0)
+ ) {
+ if(isJamoVT(norm16)) {
+ // c is a Jamo V/T, see if we can compose it with the previous character.
+ if(c<Hangul::JAMO_T_BASE) {
+ // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T.
+ UChar prev=(UChar)(*starter-Hangul::JAMO_L_BASE);
+ if(prev<Hangul::JAMO_L_COUNT) {
+ pRemove=p-1;
+ UChar syllable=(UChar)
+ (Hangul::HANGUL_BASE+
+ (prev*Hangul::JAMO_V_COUNT+(c-Hangul::JAMO_V_BASE))*
+ Hangul::JAMO_T_COUNT);
+ UChar t;
+ if(p!=limit && (t=(UChar)(*p-Hangul::JAMO_T_BASE))<Hangul::JAMO_T_COUNT) {
+ ++p;
+ syllable+=t; // The next character was a Jamo T.
+ }
+ *starter=syllable;
+ // remove the Jamo V/T
+ q=pRemove;
+ r=p;
+ while(r<limit) {
+ *q++=*r++;
+ }
+ limit=q;
+ p=pRemove;
+ }
+ }
+ /*
+ * No "else" for Jamo T:
+ * Since the input is in NFD, there are no Hangul LV syllables that
+ * a Jamo T could combine with.
+ * All Jamo Ts are combined above when handling Jamo Vs.
+ */
+ if(p==limit) {
+ break;
+ }
+ compositionsList=NULL;
+ continue;
+ } else if((compositeAndFwd=combine(compositionsList, c))>=0) {
+ // The starter and the combining mark (c) do combine.
+ UChar32 composite=compositeAndFwd>>1;
+
+ // Replace the starter with the composite, remove the combining mark.
+ pRemove=p-U16_LENGTH(c); // pRemove & p: start & limit of the combining mark
+ if(starterIsSupplementary) {
+ if(U_IS_SUPPLEMENTARY(composite)) {
+ // both are supplementary
+ starter[0]=U16_LEAD(composite);
+ starter[1]=U16_TRAIL(composite);
+ } else {
+ *starter=(UChar)composite;
+ // The composite is shorter than the starter,
+ // move the intermediate characters forward one.
+ starterIsSupplementary=FALSE;
+ q=starter+1;
+ r=q+1;
+ while(r<pRemove) {
+ *q++=*r++;
+ }
+ --pRemove;
+ }
+ } else if(U_IS_SUPPLEMENTARY(composite)) {
+ // The composite is longer than the starter,
+ // move the intermediate characters back one.
+ starterIsSupplementary=TRUE;
+ ++starter; // temporarily increment for the loop boundary
+ q=pRemove;
+ r=++pRemove;
+ while(starter<q) {
+ *--r=*--q;
+ }
+ *starter=U16_TRAIL(composite);
+ *--starter=U16_LEAD(composite); // undo the temporary increment
+ } else {
+ // both are on the BMP
+ *starter=(UChar)composite;
+ }
+
+ /* remove the combining mark by moving the following text over it */
+ if(pRemove<p) {
+ q=pRemove;
+ r=p;
+ while(r<limit) {
+ *q++=*r++;
+ }
+ limit=q;
+ p=pRemove;
+ }
+ // Keep prevCC because we removed the combining mark.
+
+ if(p==limit) {
+ break;
+ }
+ // Is the composite a starter that combines forward?
+ if(compositeAndFwd&1) {
+ compositionsList=
+ getCompositionsListForComposite(getRawNorm16(composite));
+ } else {
+ compositionsList=NULL;
+ }
+
+ // We combined; continue with looking for compositions.
+ continue;
+ }
+ }
+
+ // no combination this time
+ prevCC=cc;
+ if(p==limit) {
+ break;
+ }
+
+ // If c did not combine, then check if it is a starter.
+ if(cc==0) {
+ // Found a new starter.
+ if((compositionsList=getCompositionsListForDecompYes(norm16))!=NULL) {
+ // It may combine with something, prepare for it.
+ if(U_IS_BMP(c)) {
+ starterIsSupplementary=FALSE;
+ starter=p-1;
+ } else {
+ starterIsSupplementary=TRUE;
+ starter=p-2;
+ }
+ }
+ } else if(onlyContiguous) {
+ // FCC: no discontiguous compositions; any intervening character blocks.
+ compositionsList=NULL;
+ }
+ }
+ buffer.setReorderingLimit(limit);
+}
+
+UChar32
+Normalizer2Impl::composePair(UChar32 a, UChar32 b) const {
+ uint16_t norm16=getNorm16(a); // maps an out-of-range 'a' to inert norm16
+ const uint16_t *list;
+ if(isInert(norm16)) {
+ return U_SENTINEL;
+ } else if(norm16<minYesNoMappingsOnly) {
+ // a combines forward.
+ if(isJamoL(norm16)) {
+ b-=Hangul::JAMO_V_BASE;
+ if(0<=b && b<Hangul::JAMO_V_COUNT) {
+ return
+ (Hangul::HANGUL_BASE+
+ ((a-Hangul::JAMO_L_BASE)*Hangul::JAMO_V_COUNT+b)*
+ Hangul::JAMO_T_COUNT);
+ } else {
+ return U_SENTINEL;
+ }
+ } else if(isHangulLV(norm16)) {
+ b-=Hangul::JAMO_T_BASE;
+ if(0<b && b<Hangul::JAMO_T_COUNT) { // not b==0!
+ return a+b;
+ } else {
+ return U_SENTINEL;
+ }
+ } else {
+ // 'a' has a compositions list in extraData
+ list=getMapping(norm16);
+ if(norm16>minYesNo) { // composite 'a' has both mapping & compositions list
+ list+= // mapping pointer
+ 1+ // +1 to skip the first unit with the mapping length
+ (*list&MAPPING_LENGTH_MASK); // + mapping length
+ }
+ }
+ } else if(norm16<minMaybeYes || MIN_NORMAL_MAYBE_YES<=norm16) {
+ return U_SENTINEL;
+ } else {
+ list=getCompositionsListForMaybe(norm16);
+ }
+ if(b<0 || 0x10ffff<b) { // combine(list, b) requires a valid code point b
+ return U_SENTINEL;
+ }
+#if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
+ return combine(list, b)>>1;
+#else
+ int32_t compositeAndFwd=combine(list, b);
+ return compositeAndFwd>=0 ? compositeAndFwd>>1 : U_SENTINEL;
+#endif
+}
+
+// Very similar to composeQuickCheck(): Make the same changes in both places if relevant.
+// doCompose: normalize
+// !doCompose: isNormalized (buffer must be empty and initialized)
+UBool
+Normalizer2Impl::compose(const UChar *src, const UChar *limit,
+ UBool onlyContiguous,
+ UBool doCompose,
+ ReorderingBuffer &buffer,
+ UErrorCode &errorCode) const {
+ const UChar *prevBoundary=src;
+ UChar32 minNoMaybeCP=minCompNoMaybeCP;
+ if(limit==NULL) {
+ src=copyLowPrefixFromNulTerminated(src, minNoMaybeCP,
+ doCompose ? &buffer : NULL,
+ errorCode);
+ if(U_FAILURE(errorCode)) {
+ return FALSE;
+ }
+ limit=u_strchr(src, 0);
+ if (prevBoundary != src) {
+ if (hasCompBoundaryAfter(*(src-1), onlyContiguous)) {
+ prevBoundary = src;
+ } else {
+ buffer.removeSuffix(1);
+ prevBoundary = --src;
+ }
+ }
+ }
+
+ for (;;) {
+ // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point,
+ // or with (compYes && ccc==0) properties.
+ const UChar *prevSrc;
+ UChar32 c = 0;
+ uint16_t norm16 = 0;
+ for (;;) {
+ if (src == limit) {
+ if (prevBoundary != limit && doCompose) {
+ buffer.appendZeroCC(prevBoundary, limit, errorCode);
+ }
+ return TRUE;
+ }
+ if( (c=*src)<minNoMaybeCP ||
+ isCompYesAndZeroCC(norm16=UCPTRIE_FAST_BMP_GET(normTrie, UCPTRIE_16, c))
+ ) {
+ ++src;
+ } else {
+ prevSrc = src++;
+ if(!U16_IS_LEAD(c)) {
+ break;
+ } else {
+ UChar c2;
+ if(src!=limit && U16_IS_TRAIL(c2=*src)) {
+ ++src;
+ c=U16_GET_SUPPLEMENTARY(c, c2);
+ norm16=UCPTRIE_FAST_SUPP_GET(normTrie, UCPTRIE_16, c);
+ if(!isCompYesAndZeroCC(norm16)) {
+ break;
+ }
+ }
+ }
+ }
+ }
+ // isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
+ // The current character is either a "noNo" (has a mapping)
+ // or a "maybeYes" (combines backward)
+ // or a "yesYes" with ccc!=0.
+ // It is not a Hangul syllable or Jamo L because those have "yes" properties.
+
+ // Medium-fast path: Handle cases that do not require full decomposition and recomposition.
+ if (!isMaybeOrNonZeroCC(norm16)) { // minNoNo <= norm16 < minMaybeYes
+ if (!doCompose) {
+ return FALSE;
+ }
+ // Fast path for mapping a character that is immediately surrounded by boundaries.
+ // In this case, we need not decompose around the current character.
+ if (isDecompNoAlgorithmic(norm16)) {
+ // Maps to a single isCompYesAndZeroCC character
+ // which also implies hasCompBoundaryBefore.
+ if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) ||
+ hasCompBoundaryBefore(src, limit)) {
+ if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) {
+ break;
+ }
+ if(!buffer.append(mapAlgorithmic(c, norm16), 0, errorCode)) {
+ break;
+ }
+ prevBoundary = src;
+ continue;
+ }
+ } else if (norm16 < minNoNoCompBoundaryBefore) {
+ // The mapping is comp-normalized which also implies hasCompBoundaryBefore.
+ if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) ||
+ hasCompBoundaryBefore(src, limit)) {
+ if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) {
+ break;
+ }
+ const UChar *mapping = reinterpret_cast<const UChar *>(getMapping(norm16));
+ int32_t length = *mapping++ & MAPPING_LENGTH_MASK;
+ if(!buffer.appendZeroCC(mapping, mapping + length, errorCode)) {
+ break;
+ }
+ prevBoundary = src;
+ continue;
+ }
+ } else if (norm16 >= minNoNoEmpty) {
+ // The current character maps to nothing.
+ // Simply omit it from the output if there is a boundary before _or_ after it.
+ // The character itself implies no boundaries.
+ if (hasCompBoundaryBefore(src, limit) ||
+ hasCompBoundaryAfter(prevBoundary, prevSrc, onlyContiguous)) {
+ if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) {
+ break;
+ }
+ prevBoundary = src;
+ continue;
+ }
+ }
+ // Other "noNo" type, or need to examine more text around this character:
+ // Fall through to the slow path.
+ } else if (isJamoVT(norm16) && prevBoundary != prevSrc) {
+ UChar prev=*(prevSrc-1);
+ if(c<Hangul::JAMO_T_BASE) {
+ // The current character is a Jamo Vowel,
+ // compose with previous Jamo L and following Jamo T.
+ UChar l = (UChar)(prev-Hangul::JAMO_L_BASE);
+ if(l<Hangul::JAMO_L_COUNT) {
+ if (!doCompose) {
+ return FALSE;
+ }
+ int32_t t;
+ if (src != limit &&
+ 0 < (t = ((int32_t)*src - Hangul::JAMO_T_BASE)) &&
+ t < Hangul::JAMO_T_COUNT) {
+ // The next character is a Jamo T.
+ ++src;
+ } else if (hasCompBoundaryBefore(src, limit)) {
+ // No Jamo T follows, not even via decomposition.
+ t = 0;
+ } else {
+ t = -1;
+ }
+ if (t >= 0) {
+ UChar32 syllable = Hangul::HANGUL_BASE +
+ (l*Hangul::JAMO_V_COUNT + (c-Hangul::JAMO_V_BASE)) *
+ Hangul::JAMO_T_COUNT + t;
+ --prevSrc; // Replace the Jamo L as well.
+ if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) {
+ break;
+ }
+ if(!buffer.appendBMP((UChar)syllable, 0, errorCode)) {
+ break;
+ }
+ prevBoundary = src;
+ continue;
+ }
+ // If we see L+V+x where x!=T then we drop to the slow path,
+ // decompose and recompose.
+ // This is to deal with NFKC finding normal L and V but a
+ // compatibility variant of a T.
+ // We need to either fully compose that combination here
+ // (which would complicate the code and may not work with strange custom data)
+ // or use the slow path.
+ }
+ } else if (Hangul::isHangulLV(prev)) {
+ // The current character is a Jamo Trailing consonant,
+ // compose with previous Hangul LV that does not contain a Jamo T.
+ if (!doCompose) {
+ return FALSE;
+ }
+ UChar32 syllable = prev + c - Hangul::JAMO_T_BASE;
+ --prevSrc; // Replace the Hangul LV as well.
+ if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) {
+ break;
+ }
+ if(!buffer.appendBMP((UChar)syllable, 0, errorCode)) {
+ break;
+ }
+ prevBoundary = src;
+ continue;
+ }
+ // No matching context, or may need to decompose surrounding text first:
+ // Fall through to the slow path.
+ } else if (norm16 > JAMO_VT) { // norm16 >= MIN_YES_YES_WITH_CC
+ // One or more combining marks that do not combine-back:
+ // Check for canonical order, copy unchanged if ok and
+ // if followed by a character with a boundary-before.
+ uint8_t cc = getCCFromNormalYesOrMaybe(norm16); // cc!=0
+ if (onlyContiguous /* FCC */ && getPreviousTrailCC(prevBoundary, prevSrc) > cc) {
+ // Fails FCD test, need to decompose and contiguously recompose.
+ if (!doCompose) {
+ return FALSE;
+ }
+ } else {
+ // If !onlyContiguous (not FCC), then we ignore the tccc of
+ // the previous character which passed the quick check "yes && ccc==0" test.
+ const UChar *nextSrc;
+ uint16_t n16;
+ for (;;) {
+ if (src == limit) {
+ if (doCompose) {
+ buffer.appendZeroCC(prevBoundary, limit, errorCode);
+ }
+ return TRUE;
+ }
+ uint8_t prevCC = cc;
+ nextSrc = src;
+ UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, c, n16);
+ if (n16 >= MIN_YES_YES_WITH_CC) {
+ cc = getCCFromNormalYesOrMaybe(n16);
+ if (prevCC > cc) {
+ if (!doCompose) {
+ return FALSE;
+ }
+ break;
+ }
+ } else {
+ break;
+ }
+ src = nextSrc;
+ }
+ // src is after the last in-order combining mark.
+ // If there is a boundary here, then we continue with no change.
+ if (norm16HasCompBoundaryBefore(n16)) {
+ if (isCompYesAndZeroCC(n16)) {
+ src = nextSrc;
+ }
+ continue;
+ }
+ // Use the slow path. There is no boundary in [prevSrc, src[.
+ }
+ }
+
+ // Slow path: Find the nearest boundaries around the current character,
+ // decompose and recompose.
+ if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) {
+ const UChar *p = prevSrc;
+ UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, prevBoundary, p, c, norm16);
+ if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
+ prevSrc = p;
+ }
+ }
+ if (doCompose && prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) {
+ break;
+ }
+ int32_t recomposeStartIndex=buffer.length();
+ // We know there is not a boundary here.
+ decomposeShort(prevSrc, src, FALSE /* !stopAtCompBoundary */, onlyContiguous,
+ buffer, errorCode);
+ // Decompose until the next boundary.
+ src = decomposeShort(src, limit, TRUE /* stopAtCompBoundary */, onlyContiguous,
+ buffer, errorCode);
+ if (U_FAILURE(errorCode)) {
+ break;
+ }
+ if ((src - prevSrc) > INT32_MAX) { // guard before buffer.equals()
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return TRUE;
+ }
+ recompose(buffer, recomposeStartIndex, onlyContiguous);
+ if(!doCompose) {
+ if(!buffer.equals(prevSrc, src)) {
+ return FALSE;
+ }
+ buffer.remove();
+ }
+ prevBoundary=src;
+ }
+ return TRUE;
+}
+
+// Very similar to compose(): Make the same changes in both places if relevant.
+// pQCResult==NULL: spanQuickCheckYes
+// pQCResult!=NULL: quickCheck (*pQCResult must be UNORM_YES)
+const UChar *
+Normalizer2Impl::composeQuickCheck(const UChar *src, const UChar *limit,
+ UBool onlyContiguous,
+ UNormalizationCheckResult *pQCResult) const {
+ const UChar *prevBoundary=src;
+ UChar32 minNoMaybeCP=minCompNoMaybeCP;
+ if(limit==NULL) {
+ UErrorCode errorCode=U_ZERO_ERROR;
+ src=copyLowPrefixFromNulTerminated(src, minNoMaybeCP, NULL, errorCode);
+ limit=u_strchr(src, 0);
+ if (prevBoundary != src) {
+ if (hasCompBoundaryAfter(*(src-1), onlyContiguous)) {
+ prevBoundary = src;
+ } else {
+ prevBoundary = --src;
+ }
+ }
+ }
+
+ for(;;) {
+ // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point,
+ // or with (compYes && ccc==0) properties.
+ const UChar *prevSrc;
+ UChar32 c = 0;
+ uint16_t norm16 = 0;
+ for (;;) {
+ if(src==limit) {
+ return src;
+ }
+ if( (c=*src)<minNoMaybeCP ||
+ isCompYesAndZeroCC(norm16=UCPTRIE_FAST_BMP_GET(normTrie, UCPTRIE_16, c))
+ ) {
+ ++src;
+ } else {
+ prevSrc = src++;
+ if(!U16_IS_LEAD(c)) {
+ break;
+ } else {
+ UChar c2;
+ if(src!=limit && U16_IS_TRAIL(c2=*src)) {
+ ++src;
+ c=U16_GET_SUPPLEMENTARY(c, c2);
+ norm16=UCPTRIE_FAST_SUPP_GET(normTrie, UCPTRIE_16, c);
+ if(!isCompYesAndZeroCC(norm16)) {
+ break;
+ }
+ }
+ }
+ }
+ }
+ // isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
+ // The current character is either a "noNo" (has a mapping)
+ // or a "maybeYes" (combines backward)
+ // or a "yesYes" with ccc!=0.
+ // It is not a Hangul syllable or Jamo L because those have "yes" properties.
+
+ uint16_t prevNorm16 = INERT;
+ if (prevBoundary != prevSrc) {
+ if (norm16HasCompBoundaryBefore(norm16)) {
+ prevBoundary = prevSrc;
+ } else {
+ const UChar *p = prevSrc;
+ uint16_t n16;
+ UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, prevBoundary, p, c, n16);
+ if (norm16HasCompBoundaryAfter(n16, onlyContiguous)) {
+ prevBoundary = prevSrc;
+ } else {
+ prevBoundary = p;
+ prevNorm16 = n16;
+ }
+ }
+ }
+
+ if(isMaybeOrNonZeroCC(norm16)) {
+ uint8_t cc=getCCFromYesOrMaybe(norm16);
+ if (onlyContiguous /* FCC */ && cc != 0 &&
+ getTrailCCFromCompYesAndZeroCC(prevNorm16) > cc) {
+ // The [prevBoundary..prevSrc[ character
+ // passed the quick check "yes && ccc==0" test
+ // but is out of canonical order with the current combining mark.
+ } else {
+ // If !onlyContiguous (not FCC), then we ignore the tccc of
+ // the previous character which passed the quick check "yes && ccc==0" test.
+ const UChar *nextSrc;
+ for (;;) {
+ if (norm16 < MIN_YES_YES_WITH_CC) {
+ if (pQCResult != nullptr) {
+ *pQCResult = UNORM_MAYBE;
+ } else {
+ return prevBoundary;
+ }
+ }
+ if (src == limit) {
+ return src;
+ }
+ uint8_t prevCC = cc;
+ nextSrc = src;
+ UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, c, norm16);
+ if (isMaybeOrNonZeroCC(norm16)) {
+ cc = getCCFromYesOrMaybe(norm16);
+ if (!(prevCC <= cc || cc == 0)) {
+ break;
+ }
+ } else {
+ break;
+ }
+ src = nextSrc;
+ }
+ // src is after the last in-order combining mark.
+ if (isCompYesAndZeroCC(norm16)) {
+ prevBoundary = src;
+ src = nextSrc;
+ continue;
+ }
+ }
+ }
+ if(pQCResult!=NULL) {
+ *pQCResult=UNORM_NO;
+ }
+ return prevBoundary;
+ }
+}
+
+void Normalizer2Impl::composeAndAppend(const UChar *src, const UChar *limit,
+ UBool doCompose,
+ UBool onlyContiguous,
+ UnicodeString &safeMiddle,
+ ReorderingBuffer &buffer,
+ UErrorCode &errorCode) const {
+ if(!buffer.isEmpty()) {
+ const UChar *firstStarterInSrc=findNextCompBoundary(src, limit, onlyContiguous);
+ if(src!=firstStarterInSrc) {
+ const UChar *lastStarterInDest=findPreviousCompBoundary(buffer.getStart(),
+ buffer.getLimit(), onlyContiguous);
+ int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastStarterInDest);
+ UnicodeString middle(lastStarterInDest, destSuffixLength);
+ buffer.removeSuffix(destSuffixLength);
+ safeMiddle=middle;
+ middle.append(src, (int32_t)(firstStarterInSrc-src));
+ const UChar *middleStart=middle.getBuffer();
+ compose(middleStart, middleStart+middle.length(), onlyContiguous,
+ TRUE, buffer, errorCode);
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ src=firstStarterInSrc;
+ }
+ }
+ if(doCompose) {
+ compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
+ } else {
+ if(limit==NULL) { // appendZeroCC() needs limit!=NULL
+ limit=u_strchr(src, 0);
+ }
+ buffer.appendZeroCC(src, limit, errorCode);
+ }
+}
+
+UBool
+Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
+ const uint8_t *src, const uint8_t *limit,
+ ByteSink *sink, Edits *edits, UErrorCode &errorCode) const {
+ U_ASSERT(limit != nullptr);
+ UnicodeString s16;
+ uint8_t minNoMaybeLead = leadByteForCP(minCompNoMaybeCP);
+ const uint8_t *prevBoundary = src;
+
+ for (;;) {
+ // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point,
+ // or with (compYes && ccc==0) properties.
+ const uint8_t *prevSrc;
+ uint16_t norm16 = 0;
+ for (;;) {
+ if (src == limit) {
+ if (prevBoundary != limit && sink != nullptr) {
+ ByteSinkUtil::appendUnchanged(prevBoundary, limit,
+ *sink, options, edits, errorCode);
+ }
+ return TRUE;
+ }
+ if (*src < minNoMaybeLead) {
+ ++src;
+ } else {
+ prevSrc = src;
+ UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16);
+ if (!isCompYesAndZeroCC(norm16)) {
+ break;
+ }
+ }
+ }
+ // isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
+ // The current character is either a "noNo" (has a mapping)
+ // or a "maybeYes" (combines backward)
+ // or a "yesYes" with ccc!=0.
+ // It is not a Hangul syllable or Jamo L because those have "yes" properties.
+
+ // Medium-fast path: Handle cases that do not require full decomposition and recomposition.
+ if (!isMaybeOrNonZeroCC(norm16)) { // minNoNo <= norm16 < minMaybeYes
+ if (sink == nullptr) {
+ return FALSE;
+ }
+ // Fast path for mapping a character that is immediately surrounded by boundaries.
+ // In this case, we need not decompose around the current character.
+ if (isDecompNoAlgorithmic(norm16)) {
+ // Maps to a single isCompYesAndZeroCC character
+ // which also implies hasCompBoundaryBefore.
+ if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) ||
+ hasCompBoundaryBefore(src, limit)) {
+ if (prevBoundary != prevSrc &&
+ !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
+ *sink, options, edits, errorCode)) {
+ break;
+ }
+ appendCodePointDelta(prevSrc, src, getAlgorithmicDelta(norm16), *sink, edits);
+ prevBoundary = src;
+ continue;
+ }
+ } else if (norm16 < minNoNoCompBoundaryBefore) {
+ // The mapping is comp-normalized which also implies hasCompBoundaryBefore.
+ if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) ||
+ hasCompBoundaryBefore(src, limit)) {
+ if (prevBoundary != prevSrc &&
+ !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
+ *sink, options, edits, errorCode)) {
+ break;
+ }
+ const uint16_t *mapping = getMapping(norm16);
+ int32_t length = *mapping++ & MAPPING_LENGTH_MASK;
+ if (!ByteSinkUtil::appendChange(prevSrc, src, (const UChar *)mapping, length,
+ *sink, edits, errorCode)) {
+ break;
+ }
+ prevBoundary = src;
+ continue;
+ }
+ } else if (norm16 >= minNoNoEmpty) {
+ // The current character maps to nothing.
+ // Simply omit it from the output if there is a boundary before _or_ after it.
+ // The character itself implies no boundaries.
+ if (hasCompBoundaryBefore(src, limit) ||
+ hasCompBoundaryAfter(prevBoundary, prevSrc, onlyContiguous)) {
+ if (prevBoundary != prevSrc &&
+ !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
+ *sink, options, edits, errorCode)) {
+ break;
+ }
+ if (edits != nullptr) {
+ edits->addReplace((int32_t)(src - prevSrc), 0);
+ }
+ prevBoundary = src;
+ continue;
+ }
+ }
+ // Other "noNo" type, or need to examine more text around this character:
+ // Fall through to the slow path.
+ } else if (isJamoVT(norm16)) {
+ // Jamo L: E1 84 80..92
+ // Jamo V: E1 85 A1..B5
+ // Jamo T: E1 86 A8..E1 87 82
+ U_ASSERT((src - prevSrc) == 3 && *prevSrc == 0xe1);
+ UChar32 prev = previousHangulOrJamo(prevBoundary, prevSrc);
+ if (prevSrc[1] == 0x85) {
+ // The current character is a Jamo Vowel,
+ // compose with previous Jamo L and following Jamo T.
+ UChar32 l = prev - Hangul::JAMO_L_BASE;
+ if ((uint32_t)l < Hangul::JAMO_L_COUNT) {
+ if (sink == nullptr) {
+ return FALSE;
+ }
+ int32_t t = getJamoTMinusBase(src, limit);
+ if (t >= 0) {
+ // The next character is a Jamo T.
+ src += 3;
+ } else if (hasCompBoundaryBefore(src, limit)) {
+ // No Jamo T follows, not even via decomposition.
+ t = 0;
+ }
+ if (t >= 0) {
+ UChar32 syllable = Hangul::HANGUL_BASE +
+ (l*Hangul::JAMO_V_COUNT + (prevSrc[2]-0xa1)) *
+ Hangul::JAMO_T_COUNT + t;
+ prevSrc -= 3; // Replace the Jamo L as well.
+ if (prevBoundary != prevSrc &&
+ !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
+ *sink, options, edits, errorCode)) {
+ break;
+ }
+ ByteSinkUtil::appendCodePoint(prevSrc, src, syllable, *sink, edits);
+ prevBoundary = src;
+ continue;
+ }
+ // If we see L+V+x where x!=T then we drop to the slow path,
+ // decompose and recompose.
+ // This is to deal with NFKC finding normal L and V but a
+ // compatibility variant of a T.
+ // We need to either fully compose that combination here
+ // (which would complicate the code and may not work with strange custom data)
+ // or use the slow path.
+ }
+ } else if (Hangul::isHangulLV(prev)) {
+ // The current character is a Jamo Trailing consonant,
+ // compose with previous Hangul LV that does not contain a Jamo T.
+ if (sink == nullptr) {
+ return FALSE;
+ }
+ UChar32 syllable = prev + getJamoTMinusBase(prevSrc, src);
+ prevSrc -= 3; // Replace the Hangul LV as well.
+ if (prevBoundary != prevSrc &&
+ !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
+ *sink, options, edits, errorCode)) {
+ break;
+ }
+ ByteSinkUtil::appendCodePoint(prevSrc, src, syllable, *sink, edits);
+ prevBoundary = src;
+ continue;
+ }
+ // No matching context, or may need to decompose surrounding text first:
+ // Fall through to the slow path.
+ } else if (norm16 > JAMO_VT) { // norm16 >= MIN_YES_YES_WITH_CC
+ // One or more combining marks that do not combine-back:
+ // Check for canonical order, copy unchanged if ok and
+ // if followed by a character with a boundary-before.
+ uint8_t cc = getCCFromNormalYesOrMaybe(norm16); // cc!=0
+ if (onlyContiguous /* FCC */ && getPreviousTrailCC(prevBoundary, prevSrc) > cc) {
+ // Fails FCD test, need to decompose and contiguously recompose.
+ if (sink == nullptr) {
+ return FALSE;
+ }
+ } else {
+ // If !onlyContiguous (not FCC), then we ignore the tccc of
+ // the previous character which passed the quick check "yes && ccc==0" test.
+ const uint8_t *nextSrc;
+ uint16_t n16;
+ for (;;) {
+ if (src == limit) {
+ if (sink != nullptr) {
+ ByteSinkUtil::appendUnchanged(prevBoundary, limit,
+ *sink, options, edits, errorCode);
+ }
+ return TRUE;
+ }
+ uint8_t prevCC = cc;
+ nextSrc = src;
+ UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, n16);
+ if (n16 >= MIN_YES_YES_WITH_CC) {
+ cc = getCCFromNormalYesOrMaybe(n16);
+ if (prevCC > cc) {
+ if (sink == nullptr) {
+ return FALSE;
+ }
+ break;
+ }
+ } else {
+ break;
+ }
+ src = nextSrc;
+ }
+ // src is after the last in-order combining mark.
+ // If there is a boundary here, then we continue with no change.
+ if (norm16HasCompBoundaryBefore(n16)) {
+ if (isCompYesAndZeroCC(n16)) {
+ src = nextSrc;
+ }
+ continue;
+ }
+ // Use the slow path. There is no boundary in [prevSrc, src[.
+ }
+ }
+
+ // Slow path: Find the nearest boundaries around the current character,
+ // decompose and recompose.
+ if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) {
+ const uint8_t *p = prevSrc;
+ UCPTRIE_FAST_U8_PREV(normTrie, UCPTRIE_16, prevBoundary, p, norm16);
+ if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
+ prevSrc = p;
+ }
+ }
+ ReorderingBuffer buffer(*this, s16, errorCode);
+ if (U_FAILURE(errorCode)) {
+ break;
+ }
+ // We know there is not a boundary here.
+ decomposeShort(prevSrc, src, FALSE /* !stopAtCompBoundary */, onlyContiguous,
+ buffer, errorCode);
+ // Decompose until the next boundary.
+ src = decomposeShort(src, limit, TRUE /* stopAtCompBoundary */, onlyContiguous,
+ buffer, errorCode);
+ if (U_FAILURE(errorCode)) {
+ break;
+ }
+ if ((src - prevSrc) > INT32_MAX) { // guard before buffer.equals()
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return TRUE;
+ }
+ recompose(buffer, 0, onlyContiguous);
+ if (!buffer.equals(prevSrc, src)) {
+ if (sink == nullptr) {
+ return FALSE;
+ }
+ if (prevBoundary != prevSrc &&
+ !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
+ *sink, options, edits, errorCode)) {
+ break;
+ }
+ if (!ByteSinkUtil::appendChange(prevSrc, src, buffer.getStart(), buffer.length(),
+ *sink, edits, errorCode)) {
+ break;
+ }
+ prevBoundary = src;
+ }
+ }
+ return TRUE;
+}
+
+UBool Normalizer2Impl::hasCompBoundaryBefore(const UChar *src, const UChar *limit) const {
+ if (src == limit || *src < minCompNoMaybeCP) {
+ return TRUE;
+ }
+ UChar32 c;
+ uint16_t norm16;
+ UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, src, limit, c, norm16);
+ return norm16HasCompBoundaryBefore(norm16);
+}
+
+UBool Normalizer2Impl::hasCompBoundaryBefore(const uint8_t *src, const uint8_t *limit) const {
+ if (src == limit) {
+ return TRUE;
+ }
+ uint16_t norm16;
+ UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16);
+ return norm16HasCompBoundaryBefore(norm16);
+}
+
+UBool Normalizer2Impl::hasCompBoundaryAfter(const UChar *start, const UChar *p,
+ UBool onlyContiguous) const {
+ if (start == p) {
+ return TRUE;
+ }
+ UChar32 c;
+ uint16_t norm16;
+ UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16);
+ return norm16HasCompBoundaryAfter(norm16, onlyContiguous);
+}
+
+UBool Normalizer2Impl::hasCompBoundaryAfter(const uint8_t *start, const uint8_t *p,
+ UBool onlyContiguous) const {
+ if (start == p) {
+ return TRUE;
+ }
+ uint16_t norm16;
+ UCPTRIE_FAST_U8_PREV(normTrie, UCPTRIE_16, start, p, norm16);
+ return norm16HasCompBoundaryAfter(norm16, onlyContiguous);
+}
+
+const UChar *Normalizer2Impl::findPreviousCompBoundary(const UChar *start, const UChar *p,
+ UBool onlyContiguous) const {
+ while (p != start) {
+ const UChar *codePointLimit = p;
+ UChar32 c;
+ uint16_t norm16;
+ UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16);
+ if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
+ return codePointLimit;
+ }
+ if (hasCompBoundaryBefore(c, norm16)) {
+ return p;
+ }
+ }
+ return p;
+}
+
+const UChar *Normalizer2Impl::findNextCompBoundary(const UChar *p, const UChar *limit,
+ UBool onlyContiguous) const {
+ while (p != limit) {
+ const UChar *codePointStart = p;
+ UChar32 c;
+ uint16_t norm16;
+ UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);
+ if (hasCompBoundaryBefore(c, norm16)) {
+ return codePointStart;
+ }
+ if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
+ return p;
+ }
+ }
+ return p;
+}
+
+uint8_t Normalizer2Impl::getPreviousTrailCC(const UChar *start, const UChar *p) const {
+ if (start == p) {
+ return 0;
+ }
+ int32_t i = (int32_t)(p - start);
+ UChar32 c;
+ U16_PREV(start, 0, i, c);
+ return (uint8_t)getFCD16(c);
+}
+
+uint8_t Normalizer2Impl::getPreviousTrailCC(const uint8_t *start, const uint8_t *p) const {
+ if (start == p) {
+ return 0;
+ }
+ int32_t i = (int32_t)(p - start);
+ UChar32 c;
+ U8_PREV(start, 0, i, c);
+ return (uint8_t)getFCD16(c);
+}
+
+// Note: normalizer2impl.cpp r30982 (2011-nov-27)
+// still had getFCDTrie() which built and cached an FCD trie.
+// That provided faster access to FCD data than getFCD16FromNormData()
+// but required synchronization and consumed some 10kB of heap memory
+// in any process that uses FCD (e.g., via collation).
+// minDecompNoCP etc. and smallFCD[] are intended to help with any loss of performance,
+// at least for ASCII & CJK.
+
+// Ticket 20907 - The optimizer in MSVC/Visual Studio versions below 16.4 has trouble with this
+// function on Windows ARM64. As a work-around, we disable optimizations for this function.
+// This work-around could/should be removed once the following versions of Visual Studio are no
+// longer supported: All versions of VS2017, and versions of VS2019 below 16.4.
+#if (defined(_MSC_VER) && (defined(_M_ARM64)) && (_MSC_VER < 1924))
+#pragma optimize( "", off )
+#endif
+// Gets the FCD value from the regular normalization data.
+uint16_t Normalizer2Impl::getFCD16FromNormData(UChar32 c) const {
+ uint16_t norm16=getNorm16(c);
+ if (norm16 >= limitNoNo) {
+ if(norm16>=MIN_NORMAL_MAYBE_YES) {
+ // combining mark
+ norm16=getCCFromNormalYesOrMaybe(norm16);
+ return norm16|(norm16<<8);
+ } else if(norm16>=minMaybeYes) {
+ return 0;
+ } else { // isDecompNoAlgorithmic(norm16)
+ uint16_t deltaTrailCC = norm16 & DELTA_TCCC_MASK;
+ if (deltaTrailCC <= DELTA_TCCC_1) {
+ return deltaTrailCC >> OFFSET_SHIFT;
+ }
+ // Maps to an isCompYesAndZeroCC.
+ c=mapAlgorithmic(c, norm16);
+ norm16=getRawNorm16(c);
+ }
+ }
+ if(norm16<=minYesNo || isHangulLVT(norm16)) {
+ // no decomposition or Hangul syllable, all zeros
+ return 0;
+ }
+ // c decomposes, get everything from the variable-length extra data
+ const uint16_t *mapping=getMapping(norm16);
+ uint16_t firstUnit=*mapping;
+ norm16=firstUnit>>8; // tccc
+ if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) {
+ norm16|=*(mapping-1)&0xff00; // lccc
+ }
+ return norm16;
+}
+#if (defined(_MSC_VER) && (defined(_M_ARM64)) && (_MSC_VER < 1924))
+#pragma optimize( "", on )
+#endif
+
+// Dual functionality:
+// buffer!=NULL: normalize
+// buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes
+const UChar *
+Normalizer2Impl::makeFCD(const UChar *src, const UChar *limit,
+ ReorderingBuffer *buffer,
+ UErrorCode &errorCode) const {
+ // Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1.
+ // Similar to the prevBoundary in the compose() implementation.
+ const UChar *prevBoundary=src;
+ int32_t prevFCD16=0;
+ if(limit==NULL) {
+ src=copyLowPrefixFromNulTerminated(src, minLcccCP, buffer, errorCode);
+ if(U_FAILURE(errorCode)) {
+ return src;
+ }
+ if(prevBoundary<src) {
+ prevBoundary=src;
+ // We know that the previous character's lccc==0.
+ // Fetching the fcd16 value was deferred for this below-U+0300 code point.
+ prevFCD16=getFCD16(*(src-1));
+ if(prevFCD16>1) {
+ --prevBoundary;
+ }
+ }
+ limit=u_strchr(src, 0);
+ }
+
+ // Note: In this function we use buffer->appendZeroCC() because we track
+ // the lead and trail combining classes here, rather than leaving it to
+ // the ReorderingBuffer.
+ // The exception is the call to decomposeShort() which uses the buffer
+ // in the normal way.
+
+ const UChar *prevSrc;
+ UChar32 c=0;
+ uint16_t fcd16=0;
+
+ for(;;) {
+ // count code units with lccc==0
+ for(prevSrc=src; src!=limit;) {
+ if((c=*src)<minLcccCP) {
+ prevFCD16=~c;
+ ++src;
+ } else if(!singleLeadMightHaveNonZeroFCD16(c)) {
+ prevFCD16=0;
+ ++src;
+ } else {
+ if(U16_IS_LEAD(c)) {
+ UChar c2;
+ if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
+ c=U16_GET_SUPPLEMENTARY(c, c2);
+ }
+ }
+ if((fcd16=getFCD16FromNormData(c))<=0xff) {
+ prevFCD16=fcd16;
+ src+=U16_LENGTH(c);
+ } else {
+ break;
+ }
+ }
+ }
+ // copy these code units all at once
+ if(src!=prevSrc) {
+ if(buffer!=NULL && !buffer->appendZeroCC(prevSrc, src, errorCode)) {
+ break;
+ }
+ if(src==limit) {
+ break;
+ }
+ prevBoundary=src;
+ // We know that the previous character's lccc==0.
+ if(prevFCD16<0) {
+ // Fetching the fcd16 value was deferred for this below-minLcccCP code point.
+ UChar32 prev=~prevFCD16;
+ if(prev<minDecompNoCP) {
+ prevFCD16=0;
+ } else {
+ prevFCD16=getFCD16FromNormData(prev);
+ if(prevFCD16>1) {
+ --prevBoundary;
+ }
+ }
+ } else {
+ const UChar *p=src-1;
+ if(U16_IS_TRAIL(*p) && prevSrc<p && U16_IS_LEAD(*(p-1))) {
+ --p;
+ // Need to fetch the previous character's FCD value because
+ // prevFCD16 was just for the trail surrogate code point.
+ prevFCD16=getFCD16FromNormData(U16_GET_SUPPLEMENTARY(p[0], p[1]));
+ // Still known to have lccc==0 because its lead surrogate unit had lccc==0.
+ }
+ if(prevFCD16>1) {
+ prevBoundary=p;
+ }
+ }
+ // The start of the current character (c).
+ prevSrc=src;
+ } else if(src==limit) {
+ break;
+ }
+
+ src+=U16_LENGTH(c);
+ // The current character (c) at [prevSrc..src[ has a non-zero lead combining class.
+ // Check for proper order, and decompose locally if necessary.
+ if((prevFCD16&0xff)<=(fcd16>>8)) {
+ // proper order: prev tccc <= current lccc
+ if((fcd16&0xff)<=1) {
+ prevBoundary=src;
+ }
+ if(buffer!=NULL && !buffer->appendZeroCC(c, errorCode)) {
+ break;
+ }
+ prevFCD16=fcd16;
+ continue;
+ } else if(buffer==NULL) {
+ return prevBoundary; // quick check "no"
+ } else {
+ /*
+ * Back out the part of the source that we copied or appended
+ * already but is now going to be decomposed.
+ * prevSrc is set to after what was copied/appended.
+ */
+ buffer->removeSuffix((int32_t)(prevSrc-prevBoundary));
+ /*
+ * Find the part of the source that needs to be decomposed,
+ * up to the next safe boundary.
+ */
+ src=findNextFCDBoundary(src, limit);
+ /*
+ * The source text does not fulfill the conditions for FCD.
+ * Decompose and reorder a limited piece of the text.
+ */
+ decomposeShort(prevBoundary, src, FALSE, FALSE, *buffer, errorCode);
+ if (U_FAILURE(errorCode)) {
+ break;
+ }
+ prevBoundary=src;
+ prevFCD16=0;
+ }
+ }
+ return src;
+}
+
+void Normalizer2Impl::makeFCDAndAppend(const UChar *src, const UChar *limit,
+ UBool doMakeFCD,
+ UnicodeString &safeMiddle,
+ ReorderingBuffer &buffer,
+ UErrorCode &errorCode) const {
+ if(!buffer.isEmpty()) {
+ const UChar *firstBoundaryInSrc=findNextFCDBoundary(src, limit);
+ if(src!=firstBoundaryInSrc) {
+ const UChar *lastBoundaryInDest=findPreviousFCDBoundary(buffer.getStart(),
+ buffer.getLimit());
+ int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastBoundaryInDest);
+ UnicodeString middle(lastBoundaryInDest, destSuffixLength);
+ buffer.removeSuffix(destSuffixLength);
+ safeMiddle=middle;
+ middle.append(src, (int32_t)(firstBoundaryInSrc-src));
+ const UChar *middleStart=middle.getBuffer();
+ makeFCD(middleStart, middleStart+middle.length(), &buffer, errorCode);
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ src=firstBoundaryInSrc;
+ }
+ }
+ if(doMakeFCD) {
+ makeFCD(src, limit, &buffer, errorCode);
+ } else {
+ if(limit==NULL) { // appendZeroCC() needs limit!=NULL
+ limit=u_strchr(src, 0);
+ }
+ buffer.appendZeroCC(src, limit, errorCode);
+ }
+}
+
+const UChar *Normalizer2Impl::findPreviousFCDBoundary(const UChar *start, const UChar *p) const {
+ while(start<p) {
+ const UChar *codePointLimit = p;
+ UChar32 c;
+ uint16_t norm16;
+ UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16);
+ if (c < minDecompNoCP || norm16HasDecompBoundaryAfter(norm16)) {
+ return codePointLimit;
+ }
+ if (norm16HasDecompBoundaryBefore(norm16)) {
+ return p;
+ }
+ }
+ return p;
+}
+
+const UChar *Normalizer2Impl::findNextFCDBoundary(const UChar *p, const UChar *limit) const {
+ while(p<limit) {
+ const UChar *codePointStart=p;
+ UChar32 c;
+ uint16_t norm16;
+ UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);
+ if (c < minLcccCP || norm16HasDecompBoundaryBefore(norm16)) {
+ return codePointStart;
+ }
+ if (norm16HasDecompBoundaryAfter(norm16)) {
+ return p;
+ }
+ }
+ return p;
+}
+
+// CanonicalIterator data -------------------------------------------------- ***
+
+CanonIterData::CanonIterData(UErrorCode &errorCode) :
+ mutableTrie(umutablecptrie_open(0, 0, &errorCode)), trie(nullptr),
+ canonStartSets(uprv_deleteUObject, NULL, errorCode) {}
+
+CanonIterData::~CanonIterData() {
+ umutablecptrie_close(mutableTrie);
+ ucptrie_close(trie);
+}
+
+void CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode) {
+ uint32_t canonValue = umutablecptrie_get(mutableTrie, decompLead);
+ if((canonValue&(CANON_HAS_SET|CANON_VALUE_MASK))==0 && origin!=0) {
+ // origin is the first character whose decomposition starts with
+ // the character for which we are setting the value.
+ umutablecptrie_set(mutableTrie, decompLead, canonValue|origin, &errorCode);
+ } else {
+ // origin is not the first character, or it is U+0000.
+ UnicodeSet *set;
+ if((canonValue&CANON_HAS_SET)==0) {
+ set=new UnicodeSet;
+ if(set==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ UChar32 firstOrigin=(UChar32)(canonValue&CANON_VALUE_MASK);
+ canonValue=(canonValue&~CANON_VALUE_MASK)|CANON_HAS_SET|(uint32_t)canonStartSets.size();
+ umutablecptrie_set(mutableTrie, decompLead, canonValue, &errorCode);
+ canonStartSets.addElement(set, errorCode);
+ if(firstOrigin!=0) {
+ set->add(firstOrigin);
+ }
+ } else {
+ set=(UnicodeSet *)canonStartSets[(int32_t)(canonValue&CANON_VALUE_MASK)];
+ }
+ set->add(origin);
+ }
+}
+
+// C++ class for friend access to private Normalizer2Impl members.
+class InitCanonIterData {
+public:
+ static void doInit(Normalizer2Impl *impl, UErrorCode &errorCode);
+};
+
+U_CDECL_BEGIN
+
+// UInitOnce instantiation function for CanonIterData
+static void U_CALLCONV
+initCanonIterData(Normalizer2Impl *impl, UErrorCode &errorCode) {
+ InitCanonIterData::doInit(impl, errorCode);
+}
+
+U_CDECL_END
+
+void InitCanonIterData::doInit(Normalizer2Impl *impl, UErrorCode &errorCode) {
+ U_ASSERT(impl->fCanonIterData == NULL);
+ impl->fCanonIterData = new CanonIterData(errorCode);
+ if (impl->fCanonIterData == NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ }
+ if (U_SUCCESS(errorCode)) {
+ UChar32 start = 0, end;
+ uint32_t value;
+ while ((end = ucptrie_getRange(impl->normTrie, start,
+ UCPMAP_RANGE_FIXED_LEAD_SURROGATES, Normalizer2Impl::INERT,
+ nullptr, nullptr, &value)) >= 0) {
+ // Call Normalizer2Impl::makeCanonIterDataFromNorm16() for a range of same-norm16 characters.
+ if (value != Normalizer2Impl::INERT) {
+ impl->makeCanonIterDataFromNorm16(start, end, value, *impl->fCanonIterData, errorCode);
+ }
+ start = end + 1;
+ }
+#ifdef UCPTRIE_DEBUG
+ umutablecptrie_setName(impl->fCanonIterData->mutableTrie, "CanonIterData");
+#endif
+ impl->fCanonIterData->trie = umutablecptrie_buildImmutable(
+ impl->fCanonIterData->mutableTrie, UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_32, &errorCode);
+ umutablecptrie_close(impl->fCanonIterData->mutableTrie);
+ impl->fCanonIterData->mutableTrie = nullptr;
+ }
+ if (U_FAILURE(errorCode)) {
+ delete impl->fCanonIterData;
+ impl->fCanonIterData = NULL;
+ }
+}
+
+void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, const uint16_t norm16,
+ CanonIterData &newData,
+ UErrorCode &errorCode) const {
+ if(isInert(norm16) || (minYesNo<=norm16 && norm16<minNoNo)) {
+ // Inert, or 2-way mapping (including Hangul syllable).
+ // We do not write a canonStartSet for any yesNo character.
+ // Composites from 2-way mappings are added at runtime from the
+ // starter's compositions list, and the other characters in
+ // 2-way mappings get CANON_NOT_SEGMENT_STARTER set because they are
+ // "maybe" characters.
+ return;
+ }
+ for(UChar32 c=start; c<=end; ++c) {
+ uint32_t oldValue = umutablecptrie_get(newData.mutableTrie, c);
+ uint32_t newValue=oldValue;
+ if(isMaybeOrNonZeroCC(norm16)) {
+ // not a segment starter if it occurs in a decomposition or has cc!=0
+ newValue|=CANON_NOT_SEGMENT_STARTER;
+ if(norm16<MIN_NORMAL_MAYBE_YES) {
+ newValue|=CANON_HAS_COMPOSITIONS;
+ }
+ } else if(norm16<minYesNo) {
+ newValue|=CANON_HAS_COMPOSITIONS;
+ } else {
+ // c has a one-way decomposition
+ UChar32 c2=c;
+ // Do not modify the whole-range norm16 value.
+ uint16_t norm16_2=norm16;
+ if (isDecompNoAlgorithmic(norm16_2)) {
+ // Maps to an isCompYesAndZeroCC.
+ c2 = mapAlgorithmic(c2, norm16_2);
+ norm16_2 = getRawNorm16(c2);
+ // No compatibility mappings for the CanonicalIterator.
+ U_ASSERT(!(isHangulLV(norm16_2) || isHangulLVT(norm16_2)));
+ }
+ if (norm16_2 > minYesNo) {
+ // c decomposes, get everything from the variable-length extra data
+ const uint16_t *mapping=getMapping(norm16_2);
+ uint16_t firstUnit=*mapping;
+ int32_t length=firstUnit&MAPPING_LENGTH_MASK;
+ if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
+ if(c==c2 && (*(mapping-1)&0xff)!=0) {
+ newValue|=CANON_NOT_SEGMENT_STARTER; // original c has cc!=0
+ }
+ }
+ // Skip empty mappings (no characters in the decomposition).
+ if(length!=0) {
+ ++mapping; // skip over the firstUnit
+ // add c to first code point's start set
+ int32_t i=0;
+ U16_NEXT_UNSAFE(mapping, i, c2);
+ newData.addToStartSet(c, c2, errorCode);
+ // Set CANON_NOT_SEGMENT_STARTER for each remaining code point of a
+ // one-way mapping. A 2-way mapping is possible here after
+ // intermediate algorithmic mapping.
+ if(norm16_2>=minNoNo) {
+ while(i<length) {
+ U16_NEXT_UNSAFE(mapping, i, c2);
+ uint32_t c2Value = umutablecptrie_get(newData.mutableTrie, c2);
+ if((c2Value&CANON_NOT_SEGMENT_STARTER)==0) {
+ umutablecptrie_set(newData.mutableTrie, c2,
+ c2Value|CANON_NOT_SEGMENT_STARTER, &errorCode);
+ }
+ }
+ }
+ }
+ } else {
+ // c decomposed to c2 algorithmically; c has cc==0
+ newData.addToStartSet(c, c2, errorCode);
+ }
+ }
+ if(newValue!=oldValue) {
+ umutablecptrie_set(newData.mutableTrie, c, newValue, &errorCode);
+ }
+ }
+}
+
+UBool Normalizer2Impl::ensureCanonIterData(UErrorCode &errorCode) const {
+ // Logically const: Synchronized instantiation.
+ Normalizer2Impl *me=const_cast<Normalizer2Impl *>(this);
+ umtx_initOnce(me->fCanonIterDataInitOnce, &initCanonIterData, me, errorCode);
+ return U_SUCCESS(errorCode);
+}
+
+int32_t Normalizer2Impl::getCanonValue(UChar32 c) const {
+ return (int32_t)ucptrie_get(fCanonIterData->trie, c);
+}
+
+const UnicodeSet &Normalizer2Impl::getCanonStartSet(int32_t n) const {
+ return *(const UnicodeSet *)fCanonIterData->canonStartSets[n];
+}
+
+UBool Normalizer2Impl::isCanonSegmentStarter(UChar32 c) const {
+ return getCanonValue(c)>=0;
+}
+
+UBool Normalizer2Impl::getCanonStartSet(UChar32 c, UnicodeSet &set) const {
+ int32_t canonValue=getCanonValue(c)&~CANON_NOT_SEGMENT_STARTER;
+ if(canonValue==0) {
+ return FALSE;
+ }
+ set.clear();
+ int32_t value=canonValue&CANON_VALUE_MASK;
+ if((canonValue&CANON_HAS_SET)!=0) {
+ set.addAll(getCanonStartSet(value));
+ } else if(value!=0) {
+ set.add(value);
+ }
+ if((canonValue&CANON_HAS_COMPOSITIONS)!=0) {
+ uint16_t norm16=getRawNorm16(c);
+ if(norm16==JAMO_L) {
+ UChar32 syllable=
+ (UChar32)(Hangul::HANGUL_BASE+(c-Hangul::JAMO_L_BASE)*Hangul::JAMO_VT_COUNT);
+ set.add(syllable, syllable+Hangul::JAMO_VT_COUNT-1);
+ } else {
+ addComposites(getCompositionsList(norm16), set);
+ }
+ }
+ return TRUE;
+}
+
+U_NAMESPACE_END
+
+// Normalizer2 data swapping ----------------------------------------------- ***
+
+U_NAMESPACE_USE
+
+U_CAPI int32_t U_EXPORT2
+unorm2_swap(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const UDataInfo *pInfo;
+ int32_t headerSize;
+
+ const uint8_t *inBytes;
+ uint8_t *outBytes;
+
+ const int32_t *inIndexes;
+ int32_t indexes[Normalizer2Impl::IX_TOTAL_SIZE+1];
+
+ int32_t i, offset, nextOffset, size;
+
+ /* udata_swapDataHeader checks the arguments */
+ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ /* check data format and format version */
+ pInfo=(const UDataInfo *)((const char *)inData+4);
+ uint8_t formatVersion0=pInfo->formatVersion[0];
+ if(!(
+ pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */
+ pInfo->dataFormat[1]==0x72 &&
+ pInfo->dataFormat[2]==0x6d &&
+ pInfo->dataFormat[3]==0x32 &&
+ (1<=formatVersion0 && formatVersion0<=4)
+ )) {
+ udata_printError(ds, "unorm2_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as Normalizer2 data\n",
+ pInfo->dataFormat[0], pInfo->dataFormat[1],
+ pInfo->dataFormat[2], pInfo->dataFormat[3],
+ pInfo->formatVersion[0]);
+ *pErrorCode=U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+
+ inBytes=(const uint8_t *)inData+headerSize;
+ outBytes=(uint8_t *)outData+headerSize;
+
+ inIndexes=(const int32_t *)inBytes;
+ int32_t minIndexesLength;
+ if(formatVersion0==1) {
+ minIndexesLength=Normalizer2Impl::IX_MIN_MAYBE_YES+1;
+ } else if(formatVersion0==2) {
+ minIndexesLength=Normalizer2Impl::IX_MIN_YES_NO_MAPPINGS_ONLY+1;
+ } else {
+ minIndexesLength=Normalizer2Impl::IX_MIN_LCCC_CP+1;
+ }
+
+ if(length>=0) {
+ length-=headerSize;
+ if(length<minIndexesLength*4) {
+ udata_printError(ds, "unorm2_swap(): too few bytes (%d after header) for Normalizer2 data\n",
+ length);
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ }
+
+ /* read the first few indexes */
+ for(i=0; i<UPRV_LENGTHOF(indexes); ++i) {
+ indexes[i]=udata_readInt32(ds, inIndexes[i]);
+ }
+
+ /* get the total length of the data */
+ size=indexes[Normalizer2Impl::IX_TOTAL_SIZE];
+
+ if(length>=0) {
+ if(length<size) {
+ udata_printError(ds, "unorm2_swap(): too few bytes (%d after header) for all of Normalizer2 data\n",
+ length);
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ /* copy the data for inaccessible bytes */
+ if(inBytes!=outBytes) {
+ uprv_memcpy(outBytes, inBytes, size);
+ }
+
+ offset=0;
+
+ /* swap the int32_t indexes[] */
+ nextOffset=indexes[Normalizer2Impl::IX_NORM_TRIE_OFFSET];
+ ds->swapArray32(ds, inBytes, nextOffset-offset, outBytes, pErrorCode);
+ offset=nextOffset;
+
+ /* swap the trie */
+ nextOffset=indexes[Normalizer2Impl::IX_EXTRA_DATA_OFFSET];
+ utrie_swapAnyVersion(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode);
+ offset=nextOffset;
+
+ /* swap the uint16_t extraData[] */
+ nextOffset=indexes[Normalizer2Impl::IX_SMALL_FCD_OFFSET];
+ ds->swapArray16(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode);
+ offset=nextOffset;
+
+ /* no need to swap the uint8_t smallFCD[] (new in formatVersion 2) */
+ nextOffset=indexes[Normalizer2Impl::IX_SMALL_FCD_OFFSET+1];
+ offset=nextOffset;
+
+ U_ASSERT(offset==size);
+ }
+
+ return headerSize+size;
+}
+
+#endif // !UCONFIG_NO_NORMALIZATION
diff --git a/thirdparty/icu4c/common/normalizer2impl.h b/thirdparty/icu4c/common/normalizer2impl.h
new file mode 100644
index 0000000000..4218a30a34
--- /dev/null
+++ b/thirdparty/icu4c/common/normalizer2impl.h
@@ -0,0 +1,978 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2009-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: normalizer2impl.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2009nov22
+* created by: Markus W. Scherer
+*/
+
+#ifndef __NORMALIZER2IMPL_H__
+#define __NORMALIZER2IMPL_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/normalizer2.h"
+#include "unicode/ucptrie.h"
+#include "unicode/unistr.h"
+#include "unicode/unorm.h"
+#include "unicode/utf.h"
+#include "unicode/utf16.h"
+#include "mutex.h"
+#include "udataswp.h"
+#include "uset_imp.h"
+
+// When the nfc.nrm data is *not* hardcoded into the common library
+// (with this constant set to 0),
+// then it needs to be built into the data package:
+// Add nfc.nrm to icu4c/source/data/Makefile.in DAT_FILES_SHORT
+#define NORM2_HARDCODE_NFC_DATA 1
+
+U_NAMESPACE_BEGIN
+
+struct CanonIterData;
+
+class ByteSink;
+class Edits;
+class InitCanonIterData;
+class LcccContext;
+
+class U_COMMON_API Hangul {
+public:
+ /* Korean Hangul and Jamo constants */
+ enum {
+ JAMO_L_BASE=0x1100, /* "lead" jamo */
+ JAMO_L_END=0x1112,
+ JAMO_V_BASE=0x1161, /* "vowel" jamo */
+ JAMO_V_END=0x1175,
+ JAMO_T_BASE=0x11a7, /* "trail" jamo */
+ JAMO_T_END=0x11c2,
+
+ HANGUL_BASE=0xac00,
+ HANGUL_END=0xd7a3,
+
+ JAMO_L_COUNT=19,
+ JAMO_V_COUNT=21,
+ JAMO_T_COUNT=28,
+
+ JAMO_VT_COUNT=JAMO_V_COUNT*JAMO_T_COUNT,
+
+ HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT,
+ HANGUL_LIMIT=HANGUL_BASE+HANGUL_COUNT
+ };
+
+ static inline UBool isHangul(UChar32 c) {
+ return HANGUL_BASE<=c && c<HANGUL_LIMIT;
+ }
+ static inline UBool
+ isHangulLV(UChar32 c) {
+ c-=HANGUL_BASE;
+ return 0<=c && c<HANGUL_COUNT && c%JAMO_T_COUNT==0;
+ }
+ static inline UBool isJamoL(UChar32 c) {
+ return (uint32_t)(c-JAMO_L_BASE)<JAMO_L_COUNT;
+ }
+ static inline UBool isJamoV(UChar32 c) {
+ return (uint32_t)(c-JAMO_V_BASE)<JAMO_V_COUNT;
+ }
+ static inline UBool isJamoT(UChar32 c) {
+ int32_t t=c-JAMO_T_BASE;
+ return 0<t && t<JAMO_T_COUNT; // not JAMO_T_BASE itself
+ }
+ static UBool isJamo(UChar32 c) {
+ return JAMO_L_BASE<=c && c<=JAMO_T_END &&
+ (c<=JAMO_L_END || (JAMO_V_BASE<=c && c<=JAMO_V_END) || JAMO_T_BASE<c);
+ }
+
+ /**
+ * Decomposes c, which must be a Hangul syllable, into buffer
+ * and returns the length of the decomposition (2 or 3).
+ */
+ static inline int32_t decompose(UChar32 c, UChar buffer[3]) {
+ c-=HANGUL_BASE;
+ UChar32 c2=c%JAMO_T_COUNT;
+ c/=JAMO_T_COUNT;
+ buffer[0]=(UChar)(JAMO_L_BASE+c/JAMO_V_COUNT);
+ buffer[1]=(UChar)(JAMO_V_BASE+c%JAMO_V_COUNT);
+ if(c2==0) {
+ return 2;
+ } else {
+ buffer[2]=(UChar)(JAMO_T_BASE+c2);
+ return 3;
+ }
+ }
+
+ /**
+ * Decomposes c, which must be a Hangul syllable, into buffer.
+ * This is the raw, not recursive, decomposition. Its length is always 2.
+ */
+ static inline void getRawDecomposition(UChar32 c, UChar buffer[2]) {
+ UChar32 orig=c;
+ c-=HANGUL_BASE;
+ UChar32 c2=c%JAMO_T_COUNT;
+ if(c2==0) {
+ c/=JAMO_T_COUNT;
+ buffer[0]=(UChar)(JAMO_L_BASE+c/JAMO_V_COUNT);
+ buffer[1]=(UChar)(JAMO_V_BASE+c%JAMO_V_COUNT);
+ } else {
+ buffer[0]=(UChar)(orig-c2); // LV syllable
+ buffer[1]=(UChar)(JAMO_T_BASE+c2);
+ }
+ }
+private:
+ Hangul(); // no instantiation
+};
+
+class Normalizer2Impl;
+
+class U_COMMON_API ReorderingBuffer : public UMemory {
+public:
+ /** Constructs only; init() should be called. */
+ ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest) :
+ impl(ni), str(dest),
+ start(NULL), reorderStart(NULL), limit(NULL),
+ remainingCapacity(0), lastCC(0) {}
+ /** Constructs, removes the string contents, and initializes for a small initial capacity. */
+ ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest, UErrorCode &errorCode);
+ ~ReorderingBuffer() {
+ if(start!=NULL) {
+ str.releaseBuffer((int32_t)(limit-start));
+ }
+ }
+ UBool init(int32_t destCapacity, UErrorCode &errorCode);
+
+ UBool isEmpty() const { return start==limit; }
+ int32_t length() const { return (int32_t)(limit-start); }
+ UChar *getStart() { return start; }
+ UChar *getLimit() { return limit; }
+ uint8_t getLastCC() const { return lastCC; }
+
+ UBool equals(const UChar *start, const UChar *limit) const;
+ UBool equals(const uint8_t *otherStart, const uint8_t *otherLimit) const;
+
+ UBool append(UChar32 c, uint8_t cc, UErrorCode &errorCode) {
+ return (c<=0xffff) ?
+ appendBMP((UChar)c, cc, errorCode) :
+ appendSupplementary(c, cc, errorCode);
+ }
+ UBool append(const UChar *s, int32_t length, UBool isNFD,
+ uint8_t leadCC, uint8_t trailCC,
+ UErrorCode &errorCode);
+ UBool appendBMP(UChar c, uint8_t cc, UErrorCode &errorCode) {
+ if(remainingCapacity==0 && !resize(1, errorCode)) {
+ return false;
+ }
+ if(lastCC<=cc || cc==0) {
+ *limit++=c;
+ lastCC=cc;
+ if(cc<=1) {
+ reorderStart=limit;
+ }
+ } else {
+ insert(c, cc);
+ }
+ --remainingCapacity;
+ return true;
+ }
+ UBool appendZeroCC(UChar32 c, UErrorCode &errorCode);
+ UBool appendZeroCC(const UChar *s, const UChar *sLimit, UErrorCode &errorCode);
+ void remove();
+ void removeSuffix(int32_t suffixLength);
+ void setReorderingLimit(UChar *newLimit) {
+ remainingCapacity+=(int32_t)(limit-newLimit);
+ reorderStart=limit=newLimit;
+ lastCC=0;
+ }
+ void copyReorderableSuffixTo(UnicodeString &s) const {
+ s.setTo(ConstChar16Ptr(reorderStart), (int32_t)(limit-reorderStart));
+ }
+private:
+ /*
+ * TODO: Revisit whether it makes sense to track reorderStart.
+ * It is set to after the last known character with cc<=1,
+ * which stops previousCC() before it reads that character and looks up its cc.
+ * previousCC() is normally only called from insert().
+ * In other words, reorderStart speeds up the insertion of a combining mark
+ * into a multi-combining mark sequence where it does not belong at the end.
+ * This might not be worth the trouble.
+ * On the other hand, it's not a huge amount of trouble.
+ *
+ * We probably need it for UNORM_SIMPLE_APPEND.
+ */
+
+ UBool appendSupplementary(UChar32 c, uint8_t cc, UErrorCode &errorCode);
+ void insert(UChar32 c, uint8_t cc);
+ static void writeCodePoint(UChar *p, UChar32 c) {
+ if(c<=0xffff) {
+ *p=(UChar)c;
+ } else {
+ p[0]=U16_LEAD(c);
+ p[1]=U16_TRAIL(c);
+ }
+ }
+ UBool resize(int32_t appendLength, UErrorCode &errorCode);
+
+ const Normalizer2Impl &impl;
+ UnicodeString &str;
+ UChar *start, *reorderStart, *limit;
+ int32_t remainingCapacity;
+ uint8_t lastCC;
+
+ // private backward iterator
+ void setIterator() { codePointStart=limit; }
+ void skipPrevious(); // Requires start<codePointStart.
+ uint8_t previousCC(); // Returns 0 if there is no previous character.
+
+ UChar *codePointStart, *codePointLimit;
+};
+
+/**
+ * Low-level implementation of the Unicode Normalization Algorithm.
+ * For the data structure and details see the documentation at the end of
+ * this normalizer2impl.h and in the design doc at
+ * http://site.icu-project.org/design/normalization/custom
+ */
+class U_COMMON_API Normalizer2Impl : public UObject {
+public:
+ Normalizer2Impl() : normTrie(NULL), fCanonIterData(NULL) { }
+ virtual ~Normalizer2Impl();
+
+ void init(const int32_t *inIndexes, const UCPTrie *inTrie,
+ const uint16_t *inExtraData, const uint8_t *inSmallFCD);
+
+ void addLcccChars(UnicodeSet &set) const;
+ void addPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const;
+ void addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const;
+
+ // low-level properties ------------------------------------------------ ***
+
+ UBool ensureCanonIterData(UErrorCode &errorCode) const;
+
+ // The trie stores values for lead surrogate code *units*.
+ // Surrogate code *points* are inert.
+ uint16_t getNorm16(UChar32 c) const {
+ return U_IS_LEAD(c) ?
+ static_cast<uint16_t>(INERT) :
+ UCPTRIE_FAST_GET(normTrie, UCPTRIE_16, c);
+ }
+ uint16_t getRawNorm16(UChar32 c) const { return UCPTRIE_FAST_GET(normTrie, UCPTRIE_16, c); }
+
+ UNormalizationCheckResult getCompQuickCheck(uint16_t norm16) const {
+ if(norm16<minNoNo || MIN_YES_YES_WITH_CC<=norm16) {
+ return UNORM_YES;
+ } else if(minMaybeYes<=norm16) {
+ return UNORM_MAYBE;
+ } else {
+ return UNORM_NO;
+ }
+ }
+ UBool isAlgorithmicNoNo(uint16_t norm16) const { return limitNoNo<=norm16 && norm16<minMaybeYes; }
+ UBool isCompNo(uint16_t norm16) const { return minNoNo<=norm16 && norm16<minMaybeYes; }
+ UBool isDecompYes(uint16_t norm16) const { return norm16<minYesNo || minMaybeYes<=norm16; }
+
+ uint8_t getCC(uint16_t norm16) const {
+ if(norm16>=MIN_NORMAL_MAYBE_YES) {
+ return getCCFromNormalYesOrMaybe(norm16);
+ }
+ if(norm16<minNoNo || limitNoNo<=norm16) {
+ return 0;
+ }
+ return getCCFromNoNo(norm16);
+ }
+ static uint8_t getCCFromNormalYesOrMaybe(uint16_t norm16) {
+ return (uint8_t)(norm16 >> OFFSET_SHIFT);
+ }
+ static uint8_t getCCFromYesOrMaybe(uint16_t norm16) {
+ return norm16>=MIN_NORMAL_MAYBE_YES ? getCCFromNormalYesOrMaybe(norm16) : 0;
+ }
+ uint8_t getCCFromYesOrMaybeCP(UChar32 c) const {
+ if (c < minCompNoMaybeCP) { return 0; }
+ return getCCFromYesOrMaybe(getNorm16(c));
+ }
+
+ /**
+ * Returns the FCD data for code point c.
+ * @param c A Unicode code point.
+ * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
+ */
+ uint16_t getFCD16(UChar32 c) const {
+ if(c<minDecompNoCP) {
+ return 0;
+ } else if(c<=0xffff) {
+ if(!singleLeadMightHaveNonZeroFCD16(c)) { return 0; }
+ }
+ return getFCD16FromNormData(c);
+ }
+ /**
+ * Returns the FCD data for the next code point (post-increment).
+ * Might skip only a lead surrogate rather than the whole surrogate pair if none of
+ * the supplementary code points associated with the lead surrogate have non-zero FCD data.
+ * @param s A valid pointer into a string. Requires s!=limit.
+ * @param limit The end of the string, or NULL.
+ * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
+ */
+ uint16_t nextFCD16(const UChar *&s, const UChar *limit) const {
+ UChar32 c=*s++;
+ if(c<minDecompNoCP || !singleLeadMightHaveNonZeroFCD16(c)) {
+ return 0;
+ }
+ UChar c2;
+ if(U16_IS_LEAD(c) && s!=limit && U16_IS_TRAIL(c2=*s)) {
+ c=U16_GET_SUPPLEMENTARY(c, c2);
+ ++s;
+ }
+ return getFCD16FromNormData(c);
+ }
+ /**
+ * Returns the FCD data for the previous code point (pre-decrement).
+ * @param start The start of the string.
+ * @param s A valid pointer into a string. Requires start<s.
+ * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
+ */
+ uint16_t previousFCD16(const UChar *start, const UChar *&s) const {
+ UChar32 c=*--s;
+ if(c<minDecompNoCP) {
+ return 0;
+ }
+ if(!U16_IS_TRAIL(c)) {
+ if(!singleLeadMightHaveNonZeroFCD16(c)) {
+ return 0;
+ }
+ } else {
+ UChar c2;
+ if(start<s && U16_IS_LEAD(c2=*(s-1))) {
+ c=U16_GET_SUPPLEMENTARY(c2, c);
+ --s;
+ }
+ }
+ return getFCD16FromNormData(c);
+ }
+
+ /** Returns true if the single-or-lead code unit c might have non-zero FCD data. */
+ UBool singleLeadMightHaveNonZeroFCD16(UChar32 lead) const {
+ // 0<=lead<=0xffff
+ uint8_t bits=smallFCD[lead>>8];
+ if(bits==0) { return false; }
+ return (UBool)((bits>>((lead>>5)&7))&1);
+ }
+ /** Returns the FCD value from the regular normalization data. */
+ uint16_t getFCD16FromNormData(UChar32 c) const;
+
+ /**
+ * Gets the decomposition for one code point.
+ * @param c code point
+ * @param buffer out-only buffer for algorithmic decompositions
+ * @param length out-only, takes the length of the decomposition, if any
+ * @return pointer to the decomposition, or NULL if none
+ */
+ const UChar *getDecomposition(UChar32 c, UChar buffer[4], int32_t &length) const;
+
+ /**
+ * Gets the raw decomposition for one code point.
+ * @param c code point
+ * @param buffer out-only buffer for algorithmic decompositions
+ * @param length out-only, takes the length of the decomposition, if any
+ * @return pointer to the decomposition, or NULL if none
+ */
+ const UChar *getRawDecomposition(UChar32 c, UChar buffer[30], int32_t &length) const;
+
+ UChar32 composePair(UChar32 a, UChar32 b) const;
+
+ UBool isCanonSegmentStarter(UChar32 c) const;
+ UBool getCanonStartSet(UChar32 c, UnicodeSet &set) const;
+
+ enum {
+ // Fixed norm16 values.
+ MIN_YES_YES_WITH_CC=0xfe02,
+ JAMO_VT=0xfe00,
+ MIN_NORMAL_MAYBE_YES=0xfc00,
+ JAMO_L=2, // offset=1 hasCompBoundaryAfter=false
+ INERT=1, // offset=0 hasCompBoundaryAfter=true
+
+ // norm16 bit 0 is comp-boundary-after.
+ HAS_COMP_BOUNDARY_AFTER=1,
+ OFFSET_SHIFT=1,
+
+ // For algorithmic one-way mappings, norm16 bits 2..1 indicate the
+ // tccc (0, 1, >1) for quick FCC boundary-after tests.
+ DELTA_TCCC_0=0,
+ DELTA_TCCC_1=2,
+ DELTA_TCCC_GT_1=4,
+ DELTA_TCCC_MASK=6,
+ DELTA_SHIFT=3,
+
+ MAX_DELTA=0x40
+ };
+
+ enum {
+ // Byte offsets from the start of the data, after the generic header.
+ IX_NORM_TRIE_OFFSET,
+ IX_EXTRA_DATA_OFFSET,
+ IX_SMALL_FCD_OFFSET,
+ IX_RESERVED3_OFFSET,
+ IX_RESERVED4_OFFSET,
+ IX_RESERVED5_OFFSET,
+ IX_RESERVED6_OFFSET,
+ IX_TOTAL_SIZE,
+
+ // Code point thresholds for quick check codes.
+ IX_MIN_DECOMP_NO_CP,
+ IX_MIN_COMP_NO_MAYBE_CP,
+
+ // Norm16 value thresholds for quick check combinations and types of extra data.
+
+ /** Mappings & compositions in [minYesNo..minYesNoMappingsOnly[. */
+ IX_MIN_YES_NO,
+ /** Mappings are comp-normalized. */
+ IX_MIN_NO_NO,
+ IX_LIMIT_NO_NO,
+ IX_MIN_MAYBE_YES,
+
+ /** Mappings only in [minYesNoMappingsOnly..minNoNo[. */
+ IX_MIN_YES_NO_MAPPINGS_ONLY,
+ /** Mappings are not comp-normalized but have a comp boundary before. */
+ IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE,
+ /** Mappings do not have a comp boundary before. */
+ IX_MIN_NO_NO_COMP_NO_MAYBE_CC,
+ /** Mappings to the empty string. */
+ IX_MIN_NO_NO_EMPTY,
+
+ IX_MIN_LCCC_CP,
+ IX_RESERVED19,
+ IX_COUNT
+ };
+
+ enum {
+ MAPPING_HAS_CCC_LCCC_WORD=0x80,
+ MAPPING_HAS_RAW_MAPPING=0x40,
+ // unused bit 0x20,
+ MAPPING_LENGTH_MASK=0x1f
+ };
+
+ enum {
+ COMP_1_LAST_TUPLE=0x8000,
+ COMP_1_TRIPLE=1,
+ COMP_1_TRAIL_LIMIT=0x3400,
+ COMP_1_TRAIL_MASK=0x7ffe,
+ COMP_1_TRAIL_SHIFT=9, // 10-1 for the "triple" bit
+ COMP_2_TRAIL_SHIFT=6,
+ COMP_2_TRAIL_MASK=0xffc0
+ };
+
+ // higher-level functionality ------------------------------------------ ***
+
+ // NFD without an NFD Normalizer2 instance.
+ UnicodeString &decompose(const UnicodeString &src, UnicodeString &dest,
+ UErrorCode &errorCode) const;
+ /**
+ * Decomposes [src, limit[ and writes the result to dest.
+ * limit can be NULL if src is NUL-terminated.
+ * destLengthEstimate is the initial dest buffer capacity and can be -1.
+ */
+ void decompose(const UChar *src, const UChar *limit,
+ UnicodeString &dest, int32_t destLengthEstimate,
+ UErrorCode &errorCode) const;
+
+ const UChar *decompose(const UChar *src, const UChar *limit,
+ ReorderingBuffer *buffer, UErrorCode &errorCode) const;
+ void decomposeAndAppend(const UChar *src, const UChar *limit,
+ UBool doDecompose,
+ UnicodeString &safeMiddle,
+ ReorderingBuffer &buffer,
+ UErrorCode &errorCode) const;
+ UBool compose(const UChar *src, const UChar *limit,
+ UBool onlyContiguous,
+ UBool doCompose,
+ ReorderingBuffer &buffer,
+ UErrorCode &errorCode) const;
+ const UChar *composeQuickCheck(const UChar *src, const UChar *limit,
+ UBool onlyContiguous,
+ UNormalizationCheckResult *pQCResult) const;
+ void composeAndAppend(const UChar *src, const UChar *limit,
+ UBool doCompose,
+ UBool onlyContiguous,
+ UnicodeString &safeMiddle,
+ ReorderingBuffer &buffer,
+ UErrorCode &errorCode) const;
+
+ /** sink==nullptr: isNormalized() */
+ UBool composeUTF8(uint32_t options, UBool onlyContiguous,
+ const uint8_t *src, const uint8_t *limit,
+ ByteSink *sink, icu::Edits *edits, UErrorCode &errorCode) const;
+
+ const UChar *makeFCD(const UChar *src, const UChar *limit,
+ ReorderingBuffer *buffer, UErrorCode &errorCode) const;
+ void makeFCDAndAppend(const UChar *src, const UChar *limit,
+ UBool doMakeFCD,
+ UnicodeString &safeMiddle,
+ ReorderingBuffer &buffer,
+ UErrorCode &errorCode) const;
+
+ UBool hasDecompBoundaryBefore(UChar32 c) const;
+ UBool norm16HasDecompBoundaryBefore(uint16_t norm16) const;
+ UBool hasDecompBoundaryAfter(UChar32 c) const;
+ UBool norm16HasDecompBoundaryAfter(uint16_t norm16) const;
+ UBool isDecompInert(UChar32 c) const { return isDecompYesAndZeroCC(getNorm16(c)); }
+
+ UBool hasCompBoundaryBefore(UChar32 c) const {
+ return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(getNorm16(c));
+ }
+ UBool hasCompBoundaryAfter(UChar32 c, UBool onlyContiguous) const {
+ return norm16HasCompBoundaryAfter(getNorm16(c), onlyContiguous);
+ }
+ UBool isCompInert(UChar32 c, UBool onlyContiguous) const {
+ uint16_t norm16=getNorm16(c);
+ return isCompYesAndZeroCC(norm16) &&
+ (norm16 & HAS_COMP_BOUNDARY_AFTER) != 0 &&
+ (!onlyContiguous || isInert(norm16) || *getMapping(norm16) <= 0x1ff);
+ }
+
+ UBool hasFCDBoundaryBefore(UChar32 c) const { return hasDecompBoundaryBefore(c); }
+ UBool hasFCDBoundaryAfter(UChar32 c) const { return hasDecompBoundaryAfter(c); }
+ UBool isFCDInert(UChar32 c) const { return getFCD16(c)<=1; }
+private:
+ friend class InitCanonIterData;
+ friend class LcccContext;
+
+ UBool isMaybe(uint16_t norm16) const { return minMaybeYes<=norm16 && norm16<=JAMO_VT; }
+ UBool isMaybeOrNonZeroCC(uint16_t norm16) const { return norm16>=minMaybeYes; }
+ static UBool isInert(uint16_t norm16) { return norm16==INERT; }
+ static UBool isJamoL(uint16_t norm16) { return norm16==JAMO_L; }
+ static UBool isJamoVT(uint16_t norm16) { return norm16==JAMO_VT; }
+ uint16_t hangulLVT() const { return minYesNoMappingsOnly|HAS_COMP_BOUNDARY_AFTER; }
+ UBool isHangulLV(uint16_t norm16) const { return norm16==minYesNo; }
+ UBool isHangulLVT(uint16_t norm16) const {
+ return norm16==hangulLVT();
+ }
+ UBool isCompYesAndZeroCC(uint16_t norm16) const { return norm16<minNoNo; }
+ // UBool isCompYes(uint16_t norm16) const {
+ // return norm16>=MIN_YES_YES_WITH_CC || norm16<minNoNo;
+ // }
+ // UBool isCompYesOrMaybe(uint16_t norm16) const {
+ // return norm16<minNoNo || minMaybeYes<=norm16;
+ // }
+ // UBool hasZeroCCFromDecompYes(uint16_t norm16) const {
+ // return norm16<=MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
+ // }
+ UBool isDecompYesAndZeroCC(uint16_t norm16) const {
+ return norm16<minYesNo ||
+ norm16==JAMO_VT ||
+ (minMaybeYes<=norm16 && norm16<=MIN_NORMAL_MAYBE_YES);
+ }
+ /**
+ * A little faster and simpler than isDecompYesAndZeroCC() but does not include
+ * the MaybeYes which combine-forward and have ccc=0.
+ * (Standard Unicode 10 normalization does not have such characters.)
+ */
+ UBool isMostDecompYesAndZeroCC(uint16_t norm16) const {
+ return norm16<minYesNo || norm16==MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
+ }
+ UBool isDecompNoAlgorithmic(uint16_t norm16) const { return norm16>=limitNoNo; }
+
+ // For use with isCompYes().
+ // Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC.
+ // static uint8_t getCCFromYes(uint16_t norm16) {
+ // return norm16>=MIN_YES_YES_WITH_CC ? getCCFromNormalYesOrMaybe(norm16) : 0;
+ // }
+ uint8_t getCCFromNoNo(uint16_t norm16) const {
+ const uint16_t *mapping=getMapping(norm16);
+ if(*mapping&MAPPING_HAS_CCC_LCCC_WORD) {
+ return (uint8_t)*(mapping-1);
+ } else {
+ return 0;
+ }
+ }
+ // requires that the [cpStart..cpLimit[ character passes isCompYesAndZeroCC()
+ uint8_t getTrailCCFromCompYesAndZeroCC(uint16_t norm16) const {
+ if(norm16<=minYesNo) {
+ return 0; // yesYes and Hangul LV have ccc=tccc=0
+ } else {
+ // For Hangul LVT we harmlessly fetch a firstUnit with tccc=0 here.
+ return (uint8_t)(*getMapping(norm16)>>8); // tccc from yesNo
+ }
+ }
+ uint8_t getPreviousTrailCC(const UChar *start, const UChar *p) const;
+ uint8_t getPreviousTrailCC(const uint8_t *start, const uint8_t *p) const;
+
+ // Requires algorithmic-NoNo.
+ UChar32 mapAlgorithmic(UChar32 c, uint16_t norm16) const {
+ return c+(norm16>>DELTA_SHIFT)-centerNoNoDelta;
+ }
+ UChar32 getAlgorithmicDelta(uint16_t norm16) const {
+ return (norm16>>DELTA_SHIFT)-centerNoNoDelta;
+ }
+
+ // Requires minYesNo<norm16<limitNoNo.
+ const uint16_t *getMapping(uint16_t norm16) const { return extraData+(norm16>>OFFSET_SHIFT); }
+ const uint16_t *getCompositionsListForDecompYes(uint16_t norm16) const {
+ if(norm16<JAMO_L || MIN_NORMAL_MAYBE_YES<=norm16) {
+ return NULL;
+ } else if(norm16<minMaybeYes) {
+ return getMapping(norm16); // for yesYes; if Jamo L: harmless empty list
+ } else {
+ return maybeYesCompositions+norm16-minMaybeYes;
+ }
+ }
+ const uint16_t *getCompositionsListForComposite(uint16_t norm16) const {
+ // A composite has both mapping & compositions list.
+ const uint16_t *list=getMapping(norm16);
+ return list+ // mapping pointer
+ 1+ // +1 to skip the first unit with the mapping length
+ (*list&MAPPING_LENGTH_MASK); // + mapping length
+ }
+ const uint16_t *getCompositionsListForMaybe(uint16_t norm16) const {
+ // minMaybeYes<=norm16<MIN_NORMAL_MAYBE_YES
+ return maybeYesCompositions+((norm16-minMaybeYes)>>OFFSET_SHIFT);
+ }
+ /**
+ * @param c code point must have compositions
+ * @return compositions list pointer
+ */
+ const uint16_t *getCompositionsList(uint16_t norm16) const {
+ return isDecompYes(norm16) ?
+ getCompositionsListForDecompYes(norm16) :
+ getCompositionsListForComposite(norm16);
+ }
+
+ const UChar *copyLowPrefixFromNulTerminated(const UChar *src,
+ UChar32 minNeedDataCP,
+ ReorderingBuffer *buffer,
+ UErrorCode &errorCode) const;
+ const UChar *decomposeShort(const UChar *src, const UChar *limit,
+ UBool stopAtCompBoundary, UBool onlyContiguous,
+ ReorderingBuffer &buffer, UErrorCode &errorCode) const;
+ UBool decompose(UChar32 c, uint16_t norm16,
+ ReorderingBuffer &buffer, UErrorCode &errorCode) const;
+
+ const uint8_t *decomposeShort(const uint8_t *src, const uint8_t *limit,
+ UBool stopAtCompBoundary, UBool onlyContiguous,
+ ReorderingBuffer &buffer, UErrorCode &errorCode) const;
+
+ static int32_t combine(const uint16_t *list, UChar32 trail);
+ void addComposites(const uint16_t *list, UnicodeSet &set) const;
+ void recompose(ReorderingBuffer &buffer, int32_t recomposeStartIndex,
+ UBool onlyContiguous) const;
+
+ UBool hasCompBoundaryBefore(UChar32 c, uint16_t norm16) const {
+ return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(norm16);
+ }
+ UBool norm16HasCompBoundaryBefore(uint16_t norm16) const {
+ return norm16 < minNoNoCompNoMaybeCC || isAlgorithmicNoNo(norm16);
+ }
+ UBool hasCompBoundaryBefore(const UChar *src, const UChar *limit) const;
+ UBool hasCompBoundaryBefore(const uint8_t *src, const uint8_t *limit) const;
+ UBool hasCompBoundaryAfter(const UChar *start, const UChar *p,
+ UBool onlyContiguous) const;
+ UBool hasCompBoundaryAfter(const uint8_t *start, const uint8_t *p,
+ UBool onlyContiguous) const;
+ UBool norm16HasCompBoundaryAfter(uint16_t norm16, UBool onlyContiguous) const {
+ return (norm16 & HAS_COMP_BOUNDARY_AFTER) != 0 &&
+ (!onlyContiguous || isTrailCC01ForCompBoundaryAfter(norm16));
+ }
+ /** For FCC: Given norm16 HAS_COMP_BOUNDARY_AFTER, does it have tccc<=1? */
+ UBool isTrailCC01ForCompBoundaryAfter(uint16_t norm16) const {
+ return isInert(norm16) || (isDecompNoAlgorithmic(norm16) ?
+ (norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1 : *getMapping(norm16) <= 0x1ff);
+ }
+
+ const UChar *findPreviousCompBoundary(const UChar *start, const UChar *p, UBool onlyContiguous) const;
+ const UChar *findNextCompBoundary(const UChar *p, const UChar *limit, UBool onlyContiguous) const;
+
+ const UChar *findPreviousFCDBoundary(const UChar *start, const UChar *p) const;
+ const UChar *findNextFCDBoundary(const UChar *p, const UChar *limit) const;
+
+ void makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, const uint16_t norm16,
+ CanonIterData &newData, UErrorCode &errorCode) const;
+
+ int32_t getCanonValue(UChar32 c) const;
+ const UnicodeSet &getCanonStartSet(int32_t n) const;
+
+ // UVersionInfo dataVersion;
+
+ // BMP code point thresholds for quick check loops looking at single UTF-16 code units.
+ UChar minDecompNoCP;
+ UChar minCompNoMaybeCP;
+ UChar minLcccCP;
+
+ // Norm16 value thresholds for quick check combinations and types of extra data.
+ uint16_t minYesNo;
+ uint16_t minYesNoMappingsOnly;
+ uint16_t minNoNo;
+ uint16_t minNoNoCompBoundaryBefore;
+ uint16_t minNoNoCompNoMaybeCC;
+ uint16_t minNoNoEmpty;
+ uint16_t limitNoNo;
+ uint16_t centerNoNoDelta;
+ uint16_t minMaybeYes;
+
+ const UCPTrie *normTrie;
+ const uint16_t *maybeYesCompositions;
+ const uint16_t *extraData; // mappings and/or compositions for yesYes, yesNo & noNo characters
+ const uint8_t *smallFCD; // [0x100] one bit per 32 BMP code points, set if any FCD!=0
+
+ UInitOnce fCanonIterDataInitOnce = U_INITONCE_INITIALIZER;
+ CanonIterData *fCanonIterData;
+};
+
+// bits in canonIterData
+#define CANON_NOT_SEGMENT_STARTER 0x80000000
+#define CANON_HAS_COMPOSITIONS 0x40000000
+#define CANON_HAS_SET 0x200000
+#define CANON_VALUE_MASK 0x1fffff
+
+/**
+ * ICU-internal shortcut for quick access to standard Unicode normalization.
+ */
+class U_COMMON_API Normalizer2Factory {
+public:
+ static const Normalizer2 *getFCDInstance(UErrorCode &errorCode);
+ static const Normalizer2 *getFCCInstance(UErrorCode &errorCode);
+ static const Normalizer2 *getNoopInstance(UErrorCode &errorCode);
+
+ static const Normalizer2 *getInstance(UNormalizationMode mode, UErrorCode &errorCode);
+
+ static const Normalizer2Impl *getNFCImpl(UErrorCode &errorCode);
+ static const Normalizer2Impl *getNFKCImpl(UErrorCode &errorCode);
+ static const Normalizer2Impl *getNFKC_CFImpl(UErrorCode &errorCode);
+
+ // Get the Impl instance of the Normalizer2.
+ // Must be used only when it is known that norm2 is a Normalizer2WithImpl instance.
+ static const Normalizer2Impl *getImpl(const Normalizer2 *norm2);
+private:
+ Normalizer2Factory(); // No instantiation.
+};
+
+U_NAMESPACE_END
+
+U_CAPI int32_t U_EXPORT2
+unorm2_swap(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+/**
+ * Get the NF*_QC property for a code point, for u_getIntPropertyValue().
+ * @internal
+ */
+U_CFUNC UNormalizationCheckResult
+unorm_getQuickCheck(UChar32 c, UNormalizationMode mode);
+
+/**
+ * Gets the 16-bit FCD value (lead & trail CCs) for a code point, for u_getIntPropertyValue().
+ * @internal
+ */
+U_CFUNC uint16_t
+unorm_getFCD16(UChar32 c);
+
+/**
+ * Format of Normalizer2 .nrm data files.
+ * Format version 4.0.
+ *
+ * Normalizer2 .nrm data files provide data for the Unicode Normalization algorithms.
+ * ICU ships with data files for standard Unicode Normalization Forms
+ * NFC and NFD (nfc.nrm), NFKC and NFKD (nfkc.nrm) and NFKC_Casefold (nfkc_cf.nrm).
+ * Custom (application-specific) data can be built into additional .nrm files
+ * with the gennorm2 build tool.
+ * ICU ships with one such file, uts46.nrm, for the implementation of UTS #46.
+ *
+ * Normalizer2.getInstance() causes a .nrm file to be loaded, unless it has been
+ * cached already. Internally, Normalizer2Impl.load() reads the .nrm file.
+ *
+ * A .nrm file begins with a standard ICU data file header
+ * (DataHeader, see ucmndata.h and unicode/udata.h).
+ * The UDataInfo.dataVersion field usually contains the Unicode version
+ * for which the data was generated.
+ *
+ * After the header, the file contains the following parts.
+ * Constants are defined as enum values of the Normalizer2Impl class.
+ *
+ * Many details of the data structures are described in the design doc
+ * which is at http://site.icu-project.org/design/normalization/custom
+ *
+ * int32_t indexes[indexesLength]; -- indexesLength=indexes[IX_NORM_TRIE_OFFSET]/4;
+ *
+ * The first eight indexes are byte offsets in ascending order.
+ * Each byte offset marks the start of the next part in the data file,
+ * and the end of the previous one.
+ * When two consecutive byte offsets are the same, then the corresponding part is empty.
+ * Byte offsets are offsets from after the header,
+ * that is, from the beginning of the indexes[].
+ * Each part starts at an offset with proper alignment for its data.
+ * If necessary, the previous part may include padding bytes to achieve this alignment.
+ *
+ * minDecompNoCP=indexes[IX_MIN_DECOMP_NO_CP] is the lowest code point
+ * with a decomposition mapping, that is, with NF*D_QC=No.
+ * minCompNoMaybeCP=indexes[IX_MIN_COMP_NO_MAYBE_CP] is the lowest code point
+ * with NF*C_QC=No (has a one-way mapping) or Maybe (combines backward).
+ * minLcccCP=indexes[IX_MIN_LCCC_CP] (index 18, new in formatVersion 3)
+ * is the lowest code point with lccc!=0.
+ *
+ * The next eight indexes are thresholds of 16-bit trie values for ranges of
+ * values indicating multiple normalization properties.
+ * They are listed here in threshold order, not in the order they are stored in the indexes.
+ * minYesNo=indexes[IX_MIN_YES_NO];
+ * minYesNoMappingsOnly=indexes[IX_MIN_YES_NO_MAPPINGS_ONLY];
+ * minNoNo=indexes[IX_MIN_NO_NO];
+ * minNoNoCompBoundaryBefore=indexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE];
+ * minNoNoCompNoMaybeCC=indexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC];
+ * minNoNoEmpty=indexes[IX_MIN_NO_NO_EMPTY];
+ * limitNoNo=indexes[IX_LIMIT_NO_NO];
+ * minMaybeYes=indexes[IX_MIN_MAYBE_YES];
+ * See the normTrie description below and the design doc for details.
+ *
+ * UCPTrie normTrie; -- see ucptrie_impl.h and ucptrie.h, same as Java CodePointTrie
+ *
+ * The trie holds the main normalization data. Each code point is mapped to a 16-bit value.
+ * Rather than using independent bits in the value (which would require more than 16 bits),
+ * information is extracted primarily via range checks.
+ * Except, format version 3 uses bit 0 for hasCompBoundaryAfter().
+ * For example, a 16-bit value norm16 in the range minYesNo<=norm16<minNoNo
+ * means that the character has NF*C_QC=Yes and NF*D_QC=No properties,
+ * which means it has a two-way (round-trip) decomposition mapping.
+ * Values in the range 2<=norm16<limitNoNo are also directly indexes into the extraData
+ * pointing to mappings, compositions lists, or both.
+ * Value norm16==INERT (0 in versions 1 & 2, 1 in version 3)
+ * means that the character is normalization-inert, that is,
+ * it does not have a mapping, does not participate in composition, has a zero
+ * canonical combining class, and forms a boundary where text before it and after it
+ * can be normalized independently.
+ * For details about how multiple properties are encoded in 16-bit values
+ * see the design doc.
+ * Note that the encoding cannot express all combinations of the properties involved;
+ * it only supports those combinations that are allowed by
+ * the Unicode Normalization algorithms. Details are in the design doc as well.
+ * The gennorm2 tool only builds .nrm files for data that conforms to the limitations.
+ *
+ * The trie has a value for each lead surrogate code unit representing the "worst case"
+ * properties of the 1024 supplementary characters whose UTF-16 form starts with
+ * the lead surrogate. If all of the 1024 supplementary characters are normalization-inert,
+ * then their lead surrogate code unit has the trie value INERT.
+ * When the lead surrogate unit's value exceeds the quick check minimum during processing,
+ * the properties for the full supplementary code point need to be looked up.
+ *
+ * uint16_t maybeYesCompositions[MIN_NORMAL_MAYBE_YES-minMaybeYes];
+ * uint16_t extraData[];
+ *
+ * There is only one byte offset for the end of these two arrays.
+ * The split between them is given by the constant and variable mentioned above.
+ * In version 3, the difference must be shifted right by OFFSET_SHIFT.
+ *
+ * The maybeYesCompositions array contains compositions lists for characters that
+ * combine both forward (as starters in composition pairs)
+ * and backward (as trailing characters in composition pairs).
+ * Such characters do not occur in Unicode 5.2 but are allowed by
+ * the Unicode Normalization algorithms.
+ * If there are no such characters, then minMaybeYes==MIN_NORMAL_MAYBE_YES
+ * and the maybeYesCompositions array is empty.
+ * If there are such characters, then minMaybeYes is subtracted from their norm16 values
+ * to get the index into this array.
+ *
+ * The extraData array contains compositions lists for "YesYes" characters,
+ * followed by mappings and optional compositions lists for "YesNo" characters,
+ * followed by only mappings for "NoNo" characters.
+ * (Referring to pairs of NFC/NFD quick check values.)
+ * The norm16 values of those characters are directly indexes into the extraData array.
+ * In version 3, the norm16 values must be shifted right by OFFSET_SHIFT
+ * for accessing extraData.
+ *
+ * The data structures for compositions lists and mappings are described in the design doc.
+ *
+ * uint8_t smallFCD[0x100]; -- new in format version 2
+ *
+ * This is a bit set to help speed up FCD value lookups in the absence of a full
+ * UTrie2 or other large data structure with the full FCD value mapping.
+ *
+ * Each smallFCD bit is set if any of the corresponding 32 BMP code points
+ * has a non-zero FCD value (lccc!=0 or tccc!=0).
+ * Bit 0 of smallFCD[0] is for U+0000..U+001F. Bit 7 of smallFCD[0xff] is for U+FFE0..U+FFFF.
+ * A bit for 32 lead surrogates is set if any of the 32k corresponding
+ * _supplementary_ code points has a non-zero FCD value.
+ *
+ * This bit set is most useful for the large blocks of CJK characters with FCD=0.
+ *
+ * Changes from format version 1 to format version 2 ---------------------------
+ *
+ * - Addition of data for raw (not recursively decomposed) mappings.
+ * + The MAPPING_NO_COMP_BOUNDARY_AFTER bit in the extraData is now also set when
+ * the mapping is to an empty string or when the character combines-forward.
+ * This subsumes the one actual use of the MAPPING_PLUS_COMPOSITION_LIST bit which
+ * is then repurposed for the MAPPING_HAS_RAW_MAPPING bit.
+ * + For details see the design doc.
+ * - Addition of indexes[IX_MIN_YES_NO_MAPPINGS_ONLY] and separation of the yesNo extraData into
+ * distinct ranges (combines-forward vs. not)
+ * so that a range check can be used to find out if there is a compositions list.
+ * This is fully equivalent with formatVersion 1's MAPPING_PLUS_COMPOSITION_LIST flag.
+ * It is needed for the new (in ICU 49) composePair(), not for other normalization.
+ * - Addition of the smallFCD[] bit set.
+ *
+ * Changes from format version 2 to format version 3 (ICU 60) ------------------
+ *
+ * - norm16 bit 0 indicates hasCompBoundaryAfter(),
+ * except that for contiguous composition (FCC) the tccc must be checked as well.
+ * Data indexes and ccc values are shifted left by one (OFFSET_SHIFT).
+ * Thresholds like minNoNo are tested before shifting.
+ *
+ * - Algorithmic mapping deltas are shifted left by two more bits (total DELTA_SHIFT),
+ * to make room for two bits (three values) indicating whether the tccc is 0, 1, or greater.
+ * See DELTA_TCCC_MASK etc.
+ * This helps with fetching tccc/FCD values and FCC hasCompBoundaryAfter().
+ * minMaybeYes is 8-aligned so that the DELTA_TCCC_MASK bits can be tested directly.
+ *
+ * - Algorithmic mappings are only used for mapping to "comp yes and ccc=0" characters,
+ * and ASCII characters are mapped algorithmically only to other ASCII characters.
+ * This helps with hasCompBoundaryBefore() and compose() fast paths.
+ * It is never necessary any more to loop for algorithmic mappings.
+ *
+ * - Addition of indexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE],
+ * indexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC], and indexes[IX_MIN_NO_NO_EMPTY],
+ * and separation of the noNo extraData into distinct ranges.
+ * With this, the noNo norm16 value indicates whether the mapping is
+ * compose-normalized, not normalized but hasCompBoundaryBefore(),
+ * not even that, or maps to an empty string.
+ * hasCompBoundaryBefore() can be determined solely from the norm16 value.
+ *
+ * - The norm16 value for Hangul LVT is now different from that for Hangul LV,
+ * so that hasCompBoundaryAfter() need not check for the syllable type.
+ * For Hangul LV, minYesNo continues to be used (no comp-boundary-after).
+ * For Hangul LVT, minYesNoMappingsOnly|HAS_COMP_BOUNDARY_AFTER is used.
+ * The extraData units at these indexes are set to firstUnit=2 and firstUnit=3, respectively,
+ * to simplify some code.
+ *
+ * - The extraData firstUnit bit 5 is no longer necessary
+ * (norm16 bit 0 used instead of firstUnit MAPPING_NO_COMP_BOUNDARY_AFTER),
+ * is reserved again, and always set to 0.
+ *
+ * - Addition of indexes[IX_MIN_LCCC_CP], the first code point where lccc!=0.
+ * This used to be hardcoded to U+0300, but in data like NFKC_Casefold it is lower:
+ * U+00AD Soft Hyphen maps to an empty string,
+ * which is artificially assigned "worst case" values lccc=1 and tccc=255.
+ *
+ * - A mapping to an empty string has explicit lccc=1 and tccc=255 values.
+ *
+ * Changes from format version 3 to format version 4 (ICU 63) ------------------
+ *
+ * Switched from UTrie2 to UCPTrie/CodePointTrie.
+ *
+ * The new trie no longer stores different values for surrogate code *units* vs.
+ * surrogate code *points*.
+ * Lead surrogates still have values for optimized UTF-16 string processing.
+ * When looking up code point properties, the code now checks for lead surrogates and
+ * treats them as inert.
+ *
+ * gennorm2 now has to reject mappings for surrogate code points.
+ * UTS #46 maps unpaired surrogates to U+FFFD in code rather than via its
+ * custom normalization data file.
+ */
+
+#endif /* !UCONFIG_NO_NORMALIZATION */
+#endif /* __NORMALIZER2IMPL_H__ */
diff --git a/thirdparty/icu4c/common/normlzr.cpp b/thirdparty/icu4c/common/normlzr.cpp
new file mode 100644
index 0000000000..2dea0ffc33
--- /dev/null
+++ b/thirdparty/icu4c/common/normlzr.cpp
@@ -0,0 +1,529 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ *************************************************************************
+ * COPYRIGHT:
+ * Copyright (c) 1996-2012, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *************************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/uniset.h"
+#include "unicode/unistr.h"
+#include "unicode/chariter.h"
+#include "unicode/schriter.h"
+#include "unicode/uchriter.h"
+#include "unicode/normlzr.h"
+#include "unicode/utf16.h"
+#include "cmemory.h"
+#include "normalizer2impl.h"
+#include "uprops.h" // for uniset_getUnicode32Instance()
+
+#if defined(move32)
+ // System can define move32 intrinsics, but the char iters define move32 method
+ // using same undef trick in headers, so undef here to re-enable the method.
+#undef move32
+#endif
+
+U_NAMESPACE_BEGIN
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer)
+
+//-------------------------------------------------------------------------
+// Constructors and other boilerplate
+//-------------------------------------------------------------------------
+
+Normalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) :
+ UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
+ text(new StringCharacterIterator(str)),
+ currentIndex(0), nextIndex(0),
+ buffer(), bufferPos(0)
+{
+ init();
+}
+
+Normalizer::Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode) :
+ UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
+ text(new UCharCharacterIterator(str, length)),
+ currentIndex(0), nextIndex(0),
+ buffer(), bufferPos(0)
+{
+ init();
+}
+
+Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) :
+ UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
+ text(iter.clone()),
+ currentIndex(0), nextIndex(0),
+ buffer(), bufferPos(0)
+{
+ init();
+}
+
+Normalizer::Normalizer(const Normalizer &copy) :
+ UObject(copy), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(copy.fUMode), fOptions(copy.fOptions),
+ text(copy.text->clone()),
+ currentIndex(copy.currentIndex), nextIndex(copy.nextIndex),
+ buffer(copy.buffer), bufferPos(copy.bufferPos)
+{
+ init();
+}
+
+void
+Normalizer::init() {
+ UErrorCode errorCode=U_ZERO_ERROR;
+ fNorm2=Normalizer2Factory::getInstance(fUMode, errorCode);
+ if(fOptions&UNORM_UNICODE_3_2) {
+ delete fFilteredNorm2;
+ fNorm2=fFilteredNorm2=
+ new FilteredNormalizer2(*fNorm2, *uniset_getUnicode32Instance(errorCode));
+ }
+ if(U_FAILURE(errorCode)) {
+ errorCode=U_ZERO_ERROR;
+ fNorm2=Normalizer2Factory::getNoopInstance(errorCode);
+ }
+}
+
+Normalizer::~Normalizer()
+{
+ delete fFilteredNorm2;
+ delete text;
+}
+
+Normalizer*
+Normalizer::clone() const
+{
+ return new Normalizer(*this);
+}
+
+/**
+ * Generates a hash code for this iterator.
+ */
+int32_t Normalizer::hashCode() const
+{
+ return text->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex;
+}
+
+UBool Normalizer::operator==(const Normalizer& that) const
+{
+ return
+ this==&that ||
+ (fUMode==that.fUMode &&
+ fOptions==that.fOptions &&
+ *text==*that.text &&
+ buffer==that.buffer &&
+ bufferPos==that.bufferPos &&
+ nextIndex==that.nextIndex);
+}
+
+//-------------------------------------------------------------------------
+// Static utility methods
+//-------------------------------------------------------------------------
+
+void U_EXPORT2
+Normalizer::normalize(const UnicodeString& source,
+ UNormalizationMode mode, int32_t options,
+ UnicodeString& result,
+ UErrorCode &status) {
+ if(source.isBogus() || U_FAILURE(status)) {
+ result.setToBogus();
+ if(U_SUCCESS(status)) {
+ status=U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ } else {
+ UnicodeString localDest;
+ UnicodeString *dest;
+
+ if(&source!=&result) {
+ dest=&result;
+ } else {
+ // the source and result strings are the same object, use a temporary one
+ dest=&localDest;
+ }
+ const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
+ if(U_SUCCESS(status)) {
+ if(options&UNORM_UNICODE_3_2) {
+ FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
+ normalize(source, *dest, status);
+ } else {
+ n2->normalize(source, *dest, status);
+ }
+ }
+ if(dest==&localDest && U_SUCCESS(status)) {
+ result=*dest;
+ }
+ }
+}
+
+void U_EXPORT2
+Normalizer::compose(const UnicodeString& source,
+ UBool compat, int32_t options,
+ UnicodeString& result,
+ UErrorCode &status) {
+ normalize(source, compat ? UNORM_NFKC : UNORM_NFC, options, result, status);
+}
+
+void U_EXPORT2
+Normalizer::decompose(const UnicodeString& source,
+ UBool compat, int32_t options,
+ UnicodeString& result,
+ UErrorCode &status) {
+ normalize(source, compat ? UNORM_NFKD : UNORM_NFD, options, result, status);
+}
+
+UNormalizationCheckResult
+Normalizer::quickCheck(const UnicodeString& source,
+ UNormalizationMode mode, int32_t options,
+ UErrorCode &status) {
+ const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
+ if(U_SUCCESS(status)) {
+ if(options&UNORM_UNICODE_3_2) {
+ return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
+ quickCheck(source, status);
+ } else {
+ return n2->quickCheck(source, status);
+ }
+ } else {
+ return UNORM_MAYBE;
+ }
+}
+
+UBool
+Normalizer::isNormalized(const UnicodeString& source,
+ UNormalizationMode mode, int32_t options,
+ UErrorCode &status) {
+ const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
+ if(U_SUCCESS(status)) {
+ if(options&UNORM_UNICODE_3_2) {
+ return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
+ isNormalized(source, status);
+ } else {
+ return n2->isNormalized(source, status);
+ }
+ } else {
+ return FALSE;
+ }
+}
+
+UnicodeString & U_EXPORT2
+Normalizer::concatenate(const UnicodeString &left, const UnicodeString &right,
+ UnicodeString &result,
+ UNormalizationMode mode, int32_t options,
+ UErrorCode &errorCode) {
+ if(left.isBogus() || right.isBogus() || U_FAILURE(errorCode)) {
+ result.setToBogus();
+ if(U_SUCCESS(errorCode)) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ } else {
+ UnicodeString localDest;
+ UnicodeString *dest;
+
+ if(&right!=&result) {
+ dest=&result;
+ } else {
+ // the right and result strings are the same object, use a temporary one
+ dest=&localDest;
+ }
+ *dest=left;
+ const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, errorCode);
+ if(U_SUCCESS(errorCode)) {
+ if(options&UNORM_UNICODE_3_2) {
+ FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(errorCode)).
+ append(*dest, right, errorCode);
+ } else {
+ n2->append(*dest, right, errorCode);
+ }
+ }
+ if(dest==&localDest && U_SUCCESS(errorCode)) {
+ result=*dest;
+ }
+ }
+ return result;
+}
+
+//-------------------------------------------------------------------------
+// Iteration API
+//-------------------------------------------------------------------------
+
+/**
+ * Return the current character in the normalized text.
+ */
+UChar32 Normalizer::current() {
+ if(bufferPos<buffer.length() || nextNormalize()) {
+ return buffer.char32At(bufferPos);
+ } else {
+ return DONE;
+ }
+}
+
+/**
+ * Return the next character in the normalized text and advance
+ * the iteration position by one. If the end
+ * of the text has already been reached, {@link #DONE} is returned.
+ */
+UChar32 Normalizer::next() {
+ if(bufferPos<buffer.length() || nextNormalize()) {
+ UChar32 c=buffer.char32At(bufferPos);
+ bufferPos+=U16_LENGTH(c);
+ return c;
+ } else {
+ return DONE;
+ }
+}
+
+/**
+ * Return the previous character in the normalized text and decrement
+ * the iteration position by one. If the beginning
+ * of the text has already been reached, {@link #DONE} is returned.
+ */
+UChar32 Normalizer::previous() {
+ if(bufferPos>0 || previousNormalize()) {
+ UChar32 c=buffer.char32At(bufferPos-1);
+ bufferPos-=U16_LENGTH(c);
+ return c;
+ } else {
+ return DONE;
+ }
+}
+
+void Normalizer::reset() {
+ currentIndex=nextIndex=text->setToStart();
+ clearBuffer();
+}
+
+void
+Normalizer::setIndexOnly(int32_t index) {
+ text->setIndex(index); // pins index
+ currentIndex=nextIndex=text->getIndex();
+ clearBuffer();
+}
+
+/**
+ * Return the first character in the normalized text. This resets
+ * the <tt>Normalizer's</tt> position to the beginning of the text.
+ */
+UChar32 Normalizer::first() {
+ reset();
+ return next();
+}
+
+/**
+ * Return the last character in the normalized text. This resets
+ * the <tt>Normalizer's</tt> position to be just before the
+ * the input text corresponding to that normalized character.
+ */
+UChar32 Normalizer::last() {
+ currentIndex=nextIndex=text->setToEnd();
+ clearBuffer();
+ return previous();
+}
+
+/**
+ * Retrieve the current iteration position in the input text that is
+ * being normalized. This method is useful in applications such as
+ * searching, where you need to be able to determine the position in
+ * the input text that corresponds to a given normalized output character.
+ * <p>
+ * <b>Note:</b> This method sets the position in the <em>input</em>, while
+ * {@link #next} and {@link #previous} iterate through characters in the
+ * <em>output</em>. This means that there is not necessarily a one-to-one
+ * correspondence between characters returned by <tt>next</tt> and
+ * <tt>previous</tt> and the indices passed to and returned from
+ * <tt>setIndex</tt> and {@link #getIndex}.
+ *
+ */
+int32_t Normalizer::getIndex() const {
+ if(bufferPos<buffer.length()) {
+ return currentIndex;
+ } else {
+ return nextIndex;
+ }
+}
+
+/**
+ * Retrieve the index of the start of the input text. This is the begin index
+ * of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt>
+ * over which this <tt>Normalizer</tt> is iterating
+ */
+int32_t Normalizer::startIndex() const {
+ return text->startIndex();
+}
+
+/**
+ * Retrieve the index of the end of the input text. This is the end index
+ * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
+ * over which this <tt>Normalizer</tt> is iterating
+ */
+int32_t Normalizer::endIndex() const {
+ return text->endIndex();
+}
+
+//-------------------------------------------------------------------------
+// Property access methods
+//-------------------------------------------------------------------------
+
+void
+Normalizer::setMode(UNormalizationMode newMode)
+{
+ fUMode = newMode;
+ init();
+}
+
+UNormalizationMode
+Normalizer::getUMode() const
+{
+ return fUMode;
+}
+
+void
+Normalizer::setOption(int32_t option,
+ UBool value)
+{
+ if (value) {
+ fOptions |= option;
+ } else {
+ fOptions &= (~option);
+ }
+ init();
+}
+
+UBool
+Normalizer::getOption(int32_t option) const
+{
+ return (fOptions & option) != 0;
+}
+
+/**
+ * Set the input text over which this <tt>Normalizer</tt> will iterate.
+ * The iteration position is set to the beginning of the input text.
+ */
+void
+Normalizer::setText(const UnicodeString& newText,
+ UErrorCode &status)
+{
+ if (U_FAILURE(status)) {
+ return;
+ }
+ CharacterIterator *newIter = new StringCharacterIterator(newText);
+ if (newIter == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ delete text;
+ text = newIter;
+ reset();
+}
+
+/**
+ * Set the input text over which this <tt>Normalizer</tt> will iterate.
+ * The iteration position is set to the beginning of the string.
+ */
+void
+Normalizer::setText(const CharacterIterator& newText,
+ UErrorCode &status)
+{
+ if (U_FAILURE(status)) {
+ return;
+ }
+ CharacterIterator *newIter = newText.clone();
+ if (newIter == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ delete text;
+ text = newIter;
+ reset();
+}
+
+void
+Normalizer::setText(ConstChar16Ptr newText,
+ int32_t length,
+ UErrorCode &status)
+{
+ if (U_FAILURE(status)) {
+ return;
+ }
+ CharacterIterator *newIter = new UCharCharacterIterator(newText, length);
+ if (newIter == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ delete text;
+ text = newIter;
+ reset();
+}
+
+/**
+ * Copies the text under iteration into the UnicodeString referred to by "result".
+ * @param result Receives a copy of the text under iteration.
+ */
+void
+Normalizer::getText(UnicodeString& result)
+{
+ text->getText(result);
+}
+
+//-------------------------------------------------------------------------
+// Private utility methods
+//-------------------------------------------------------------------------
+
+void Normalizer::clearBuffer() {
+ buffer.remove();
+ bufferPos=0;
+}
+
+UBool
+Normalizer::nextNormalize() {
+ clearBuffer();
+ currentIndex=nextIndex;
+ text->setIndex(nextIndex);
+ if(!text->hasNext()) {
+ return FALSE;
+ }
+ // Skip at least one character so we make progress.
+ UnicodeString segment(text->next32PostInc());
+ while(text->hasNext()) {
+ UChar32 c;
+ if(fNorm2->hasBoundaryBefore(c=text->next32PostInc())) {
+ text->move32(-1, CharacterIterator::kCurrent);
+ break;
+ }
+ segment.append(c);
+ }
+ nextIndex=text->getIndex();
+ UErrorCode errorCode=U_ZERO_ERROR;
+ fNorm2->normalize(segment, buffer, errorCode);
+ return U_SUCCESS(errorCode) && !buffer.isEmpty();
+}
+
+UBool
+Normalizer::previousNormalize() {
+ clearBuffer();
+ nextIndex=currentIndex;
+ text->setIndex(currentIndex);
+ if(!text->hasPrevious()) {
+ return FALSE;
+ }
+ UnicodeString segment;
+ while(text->hasPrevious()) {
+ UChar32 c=text->previous32();
+ segment.insert(0, c);
+ if(fNorm2->hasBoundaryBefore(c)) {
+ break;
+ }
+ }
+ currentIndex=text->getIndex();
+ UErrorCode errorCode=U_ZERO_ERROR;
+ fNorm2->normalize(segment, buffer, errorCode);
+ bufferPos=buffer.length();
+ return U_SUCCESS(errorCode) && !buffer.isEmpty();
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_NORMALIZATION */
diff --git a/thirdparty/icu4c/common/parsepos.cpp b/thirdparty/icu4c/common/parsepos.cpp
new file mode 100644
index 0000000000..56c6c78813
--- /dev/null
+++ b/thirdparty/icu4c/common/parsepos.cpp
@@ -0,0 +1,23 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2003-2003, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+
+#include "unicode/parsepos.h"
+
+U_NAMESPACE_BEGIN
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ParsePosition)
+
+ParsePosition::~ParsePosition() {}
+
+ParsePosition *
+ParsePosition::clone() const {
+ return new ParsePosition(*this);
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/patternprops.cpp b/thirdparty/icu4c/common/patternprops.cpp
new file mode 100644
index 0000000000..c38a7e276d
--- /dev/null
+++ b/thirdparty/icu4c/common/patternprops.cpp
@@ -0,0 +1,230 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: patternprops.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2011mar13
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "patternprops.h"
+
+U_NAMESPACE_BEGIN
+
+/*
+ * One byte per Latin-1 character.
+ * Bit 0 is set if either Pattern property is true,
+ * bit 1 if Pattern_Syntax is true,
+ * bit 2 if Pattern_White_Space is true.
+ * That is, Pattern_Syntax is encoded as 3 and Pattern_White_Space as 5.
+ */
+static const uint8_t latin1[256]={
+ // WS: 9..D
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // WS: 20 Syntax: 21..2F
+ 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ // Syntax: 3A..40
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3,
+ 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // Syntax: 5B..5E
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0,
+ // Syntax: 60
+ 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // Syntax: 7B..7E
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0,
+ // WS: 85
+ 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // Syntax: A1..A7, A9, AB, AC, AE
+ 0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 0, 3, 3, 0, 3, 0,
+ // Syntax: B0, B1, B6, BB, BF
+ 3, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 0, 0, 0, 3,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // Syntax: D7
+ 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // Syntax: F7
+ 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/*
+ * One byte per 32 characters from U+2000..U+303F indexing into
+ * a small table of 32-bit data words.
+ * The first two data words are all-zeros and all-ones.
+ */
+static const uint8_t index2000[130]={
+ 2, 3, 4, 0, 0, 0, 0, 0, // 20xx
+ 0, 0, 0, 0, 5, 1, 1, 1, // 21xx
+ 1, 1, 1, 1, 1, 1, 1, 1, // 22xx
+ 1, 1, 1, 1, 1, 1, 1, 1, // 23xx
+ 1, 1, 1, 0, 0, 0, 0, 0, // 24xx
+ 1, 1, 1, 1, 1, 1, 1, 1, // 25xx
+ 1, 1, 1, 1, 1, 1, 1, 1, // 26xx
+ 1, 1, 1, 6, 7, 1, 1, 1, // 27xx
+ 1, 1, 1, 1, 1, 1, 1, 1, // 28xx
+ 1, 1, 1, 1, 1, 1, 1, 1, // 29xx
+ 1, 1, 1, 1, 1, 1, 1, 1, // 2Axx
+ 1, 1, 1, 1, 1, 1, 1, 1, // 2Bxx
+ 0, 0, 0, 0, 0, 0, 0, 0, // 2Cxx
+ 0, 0, 0, 0, 0, 0, 0, 0, // 2Dxx
+ 1, 1, 1, 1, 0, 0, 0, 0, // 2Exx
+ 0, 0, 0, 0, 0, 0, 0, 0, // 2Fxx
+ 8, 9 // 3000..303F
+};
+
+/*
+ * One 32-bit integer per 32 characters. Ranges of all-false and all-true
+ * are mapped to the first two values, other ranges map to appropriate bit patterns.
+ */
+static const uint32_t syntax2000[]={
+ 0,
+ 0xffffffff,
+ 0xffff0000, // 2: 2010..201F
+ 0x7fff00ff, // 3: 2020..2027, 2030..203E
+ 0x7feffffe, // 4: 2041..2053, 2055..205E
+ 0xffff0000, // 5: 2190..219F
+ 0x003fffff, // 6: 2760..2775
+ 0xfff00000, // 7: 2794..279F
+ 0xffffff0e, // 8: 3001..3003, 3008..301F
+ 0x00010001 // 9: 3020, 3030
+};
+
+/*
+ * Same as syntax2000, but with additional bits set for the
+ * Pattern_White_Space characters 200E 200F 2028 2029.
+ */
+static const uint32_t syntaxOrWhiteSpace2000[]={
+ 0,
+ 0xffffffff,
+ 0xffffc000, // 2: 200E..201F
+ 0x7fff03ff, // 3: 2020..2029, 2030..203E
+ 0x7feffffe, // 4: 2041..2053, 2055..205E
+ 0xffff0000, // 5: 2190..219F
+ 0x003fffff, // 6: 2760..2775
+ 0xfff00000, // 7: 2794..279F
+ 0xffffff0e, // 8: 3001..3003, 3008..301F
+ 0x00010001 // 9: 3020, 3030
+};
+
+UBool
+PatternProps::isSyntax(UChar32 c) {
+ if(c<0) {
+ return FALSE;
+ } else if(c<=0xff) {
+ return (UBool)(latin1[c]>>1)&1;
+ } else if(c<0x2010) {
+ return FALSE;
+ } else if(c<=0x3030) {
+ uint32_t bits=syntax2000[index2000[(c-0x2000)>>5]];
+ return (UBool)((bits>>(c&0x1f))&1);
+ } else if(0xfd3e<=c && c<=0xfe46) {
+ return c<=0xfd3f || 0xfe45<=c;
+ } else {
+ return FALSE;
+ }
+}
+
+UBool
+PatternProps::isSyntaxOrWhiteSpace(UChar32 c) {
+ if(c<0) {
+ return FALSE;
+ } else if(c<=0xff) {
+ return (UBool)(latin1[c]&1);
+ } else if(c<0x200e) {
+ return FALSE;
+ } else if(c<=0x3030) {
+ uint32_t bits=syntaxOrWhiteSpace2000[index2000[(c-0x2000)>>5]];
+ return (UBool)((bits>>(c&0x1f))&1);
+ } else if(0xfd3e<=c && c<=0xfe46) {
+ return c<=0xfd3f || 0xfe45<=c;
+ } else {
+ return FALSE;
+ }
+}
+
+UBool
+PatternProps::isWhiteSpace(UChar32 c) {
+ if(c<0) {
+ return FALSE;
+ } else if(c<=0xff) {
+ return (UBool)(latin1[c]>>2)&1;
+ } else if(0x200e<=c && c<=0x2029) {
+ return c<=0x200f || 0x2028<=c;
+ } else {
+ return FALSE;
+ }
+}
+
+const UChar *
+PatternProps::skipWhiteSpace(const UChar *s, int32_t length) {
+ while(length>0 && isWhiteSpace(*s)) {
+ ++s;
+ --length;
+ }
+ return s;
+}
+
+int32_t
+PatternProps::skipWhiteSpace(const UnicodeString& s, int32_t start) {
+ int32_t i = start;
+ int32_t length = s.length();
+ while(i<length && isWhiteSpace(s.charAt(i))) {
+ ++i;
+ }
+ return i;
+}
+
+const UChar *
+PatternProps::trimWhiteSpace(const UChar *s, int32_t &length) {
+ if(length<=0 || (!isWhiteSpace(s[0]) && !isWhiteSpace(s[length-1]))) {
+ return s;
+ }
+ int32_t start=0;
+ int32_t limit=length;
+ while(start<limit && isWhiteSpace(s[start])) {
+ ++start;
+ }
+ if(start<limit) {
+ // There is non-white space at start; we will not move limit below that,
+ // so we need not test start<limit in the loop.
+ while(isWhiteSpace(s[limit-1])) {
+ --limit;
+ }
+ }
+ length=limit-start;
+ return s+start;
+}
+
+UBool
+PatternProps::isIdentifier(const UChar *s, int32_t length) {
+ if(length<=0) {
+ return FALSE;
+ }
+ const UChar *limit=s+length;
+ do {
+ if(isSyntaxOrWhiteSpace(*s++)) {
+ return FALSE;
+ }
+ } while(s<limit);
+ return TRUE;
+}
+
+const UChar *
+PatternProps::skipIdentifier(const UChar *s, int32_t length) {
+ while(length>0 && !isSyntaxOrWhiteSpace(*s)) {
+ ++s;
+ --length;
+ }
+ return s;
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/patternprops.h b/thirdparty/icu4c/common/patternprops.h
new file mode 100644
index 0000000000..95898d580c
--- /dev/null
+++ b/thirdparty/icu4c/common/patternprops.h
@@ -0,0 +1,98 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: patternprops.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2011mar13
+* created by: Markus W. Scherer
+*/
+
+#ifndef __PATTERNPROPS_H__
+#define __PATTERNPROPS_H__
+
+#include "unicode/unistr.h"
+#include "unicode/utypes.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Implements the immutable Unicode properties Pattern_Syntax and Pattern_White_Space.
+ * Hardcodes these properties, does not load data, does not depend on other ICU classes.
+ * <p>
+ * Note: Both properties include ASCII as well as non-ASCII, non-Latin-1 code points,
+ * and both properties only include BMP code points (no supplementary ones).
+ * Pattern_Syntax includes some unassigned code points.
+ * <p>
+ * [:Pattern_White_Space:] =
+ * [\u0009-\u000D\ \u0085\u200E\u200F\u2028\u2029]
+ * <p>
+ * [:Pattern_Syntax:] =
+ * [!-/\:-@\[-\^`\{-~\u00A1-\u00A7\u00A9\u00AB\u00AC\u00AE
+ * \u00B0\u00B1\u00B6\u00BB\u00BF\u00D7\u00F7
+ * \u2010-\u2027\u2030-\u203E\u2041-\u2053\u2055-\u205E
+ * \u2190-\u245F\u2500-\u2775\u2794-\u2BFF\u2E00-\u2E7F
+ * \u3001-\u3003\u3008-\u3020\u3030\uFD3E\uFD3F\uFE45\uFE46]
+ * @author mscherer
+ */
+class U_COMMON_API PatternProps {
+public:
+ /**
+ * @return true if c is a Pattern_Syntax code point.
+ */
+ static UBool isSyntax(UChar32 c);
+
+ /**
+ * @return true if c is a Pattern_Syntax or Pattern_White_Space code point.
+ */
+ static UBool isSyntaxOrWhiteSpace(UChar32 c);
+
+ /**
+ * @return true if c is a Pattern_White_Space character.
+ */
+ static UBool isWhiteSpace(UChar32 c);
+
+ /**
+ * Skips over Pattern_White_Space starting at s.
+ * @return The smallest pointer at or after s with a non-white space character.
+ */
+ static const UChar *skipWhiteSpace(const UChar *s, int32_t length);
+
+ /**
+ * Skips over Pattern_White_Space starting at index start in s.
+ * @return The smallest index at or after start with a non-white space character.
+ */
+ static int32_t skipWhiteSpace(const UnicodeString &s, int32_t start);
+
+ /**
+ * @return s except with leading and trailing Pattern_White_Space removed and length adjusted.
+ */
+ static const UChar *trimWhiteSpace(const UChar *s, int32_t &length);
+
+ /**
+ * Tests whether the string contains a "pattern identifier", that is,
+ * whether it contains only non-Pattern_White_Space, non-Pattern_Syntax characters.
+ * @return true if there are no Pattern_White_Space or Pattern_Syntax characters in s.
+ */
+ static UBool isIdentifier(const UChar *s, int32_t length);
+
+ /**
+ * Skips over a "pattern identifier" starting at index s.
+ * @return The smallest pointer at or after s with
+ * a Pattern_White_Space or Pattern_Syntax character.
+ */
+ static const UChar *skipIdentifier(const UChar *s, int32_t length);
+
+private:
+ PatternProps(); // no constructor: all static methods
+};
+
+U_NAMESPACE_END
+
+#endif // __PATTERNPROPS_H__
diff --git a/thirdparty/icu4c/common/pluralmap.cpp b/thirdparty/icu4c/common/pluralmap.cpp
new file mode 100644
index 0000000000..ec87f0198e
--- /dev/null
+++ b/thirdparty/icu4c/common/pluralmap.cpp
@@ -0,0 +1,44 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ * Copyright (C) 2015, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ */
+
+#include "unicode/unistr.h"
+#include "charstr.h"
+#include "cstring.h"
+#include "pluralmap.h"
+
+U_NAMESPACE_BEGIN
+
+static const char * const gPluralForms[] = {
+ "other", "zero", "one", "two", "few", "many"};
+
+PluralMapBase::Category
+PluralMapBase::toCategory(const char *pluralForm) {
+ for (int32_t i = 0; i < UPRV_LENGTHOF(gPluralForms); ++i) {
+ if (uprv_strcmp(pluralForm, gPluralForms[i]) == 0) {
+ return static_cast<Category>(i);
+ }
+ }
+ return NONE;
+}
+
+PluralMapBase::Category
+PluralMapBase::toCategory(const UnicodeString &pluralForm) {
+ CharString cCategory;
+ UErrorCode status = U_ZERO_ERROR;
+ cCategory.appendInvariantChars(pluralForm, status);
+ return U_FAILURE(status) ? NONE : toCategory(cCategory.data());
+}
+
+const char *PluralMapBase::getCategoryName(Category c) {
+ int32_t index = c;
+ return (index < 0 || index >= UPRV_LENGTHOF(gPluralForms)) ?
+ NULL : gPluralForms[index];
+}
+
+
+U_NAMESPACE_END
+
diff --git a/thirdparty/icu4c/common/pluralmap.h b/thirdparty/icu4c/common/pluralmap.h
new file mode 100644
index 0000000000..d898ac4671
--- /dev/null
+++ b/thirdparty/icu4c/common/pluralmap.h
@@ -0,0 +1,292 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 2015, International Business Machines Corporation and
+* others. All Rights Reserved.
+******************************************************************************
+*
+* File pluralmap.h - PluralMap class that maps plural categories to values.
+******************************************************************************
+*/
+
+#ifndef __PLURAL_MAP_H__
+#define __PLURAL_MAP_H__
+
+#include "unicode/uobject.h"
+#include "cmemory.h"
+
+U_NAMESPACE_BEGIN
+
+class UnicodeString;
+
+class U_COMMON_API PluralMapBase : public UMemory {
+public:
+ /**
+ * The names of all the plural categories. NONE is not an actual plural
+ * category, but rather represents the absense of a plural category.
+ */
+ enum Category {
+ NONE = -1,
+ OTHER,
+ ZERO,
+ ONE,
+ TWO,
+ FEW,
+ MANY,
+ CATEGORY_COUNT
+ };
+
+ /**
+ * Converts a category name such as "zero", "one", "two", "few", "many"
+ * or "other" to a category enum. Returns NONE for an unrecognized
+ * category name.
+ */
+ static Category toCategory(const char *categoryName);
+
+ /**
+ * Converts a category name such as "zero", "one", "two", "few", "many"
+ * or "other" to a category enum. Returns NONE for urecongized
+ * category name.
+ */
+ static Category toCategory(const UnicodeString &categoryName);
+
+ /**
+ * Converts a category to a name.
+ * Passing NONE or CATEGORY_COUNT for category returns NULL.
+ */
+ static const char *getCategoryName(Category category);
+};
+
+/**
+ * A Map of plural categories to values. It maintains ownership of the
+ * values.
+ *
+ * Type T is the value type. T must provide the followng:
+ * 1) Default constructor
+ * 2) Copy constructor
+ * 3) Assignment operator
+ * 4) Must extend UMemory
+ */
+template<typename T>
+class PluralMap : public PluralMapBase {
+public:
+ /**
+ * Other category is maps to a copy of the default value.
+ */
+ PluralMap() : fOtherVariant() {
+ initializeNew();
+ }
+
+ /**
+ * Other category is mapped to otherVariant.
+ */
+ PluralMap(const T &otherVariant) : fOtherVariant(otherVariant) {
+ initializeNew();
+ }
+
+ PluralMap(const PluralMap<T> &other) : fOtherVariant(other.fOtherVariant) {
+ fVariants[0] = &fOtherVariant;
+ for (int32_t i = 1; i < UPRV_LENGTHOF(fVariants); ++i) {
+ fVariants[i] = other.fVariants[i] ?
+ new T(*other.fVariants[i]) : NULL;
+ }
+ }
+
+ PluralMap<T> &operator=(const PluralMap<T> &other) {
+ if (this == &other) {
+ return *this;
+ }
+ for (int32_t i = 0; i < UPRV_LENGTHOF(fVariants); ++i) {
+ if (fVariants[i] != NULL && other.fVariants[i] != NULL) {
+ *fVariants[i] = *other.fVariants[i];
+ } else if (fVariants[i] != NULL) {
+ delete fVariants[i];
+ fVariants[i] = NULL;
+ } else if (other.fVariants[i] != NULL) {
+ fVariants[i] = new T(*other.fVariants[i]);
+ } else {
+ // do nothing
+ }
+ }
+ return *this;
+ }
+
+ ~PluralMap() {
+ for (int32_t i = 1; i < UPRV_LENGTHOF(fVariants); ++i) {
+ delete fVariants[i];
+ }
+ }
+
+ /**
+ * Removes all mappings and makes 'other' point to the default value.
+ */
+ void clear() {
+ *fVariants[0] = T();
+ for (int32_t i = 1; i < UPRV_LENGTHOF(fVariants); ++i) {
+ delete fVariants[i];
+ fVariants[i] = NULL;
+ }
+ }
+
+ /**
+ * Iterates through the mappings in this instance, set index to NONE
+ * prior to using. Call next repeatedly to get the values until it
+ * returns NULL. Each time next returns, caller may pass index
+ * to getCategoryName() to get the name of the plural category.
+ * When this function returns NULL, index is CATEGORY_COUNT
+ */
+ const T *next(Category &index) const {
+ int32_t idx = index;
+ ++idx;
+ for (; idx < UPRV_LENGTHOF(fVariants); ++idx) {
+ if (fVariants[idx] != NULL) {
+ index = static_cast<Category>(idx);
+ return fVariants[idx];
+ }
+ }
+ index = static_cast<Category>(idx);
+ return NULL;
+ }
+
+ /**
+ * non const version of next.
+ */
+ T *nextMutable(Category &index) {
+ const T *result = next(index);
+ return const_cast<T *>(result);
+ }
+
+ /**
+ * Returns the 'other' variant.
+ * Same as calling get(OTHER).
+ */
+ const T &getOther() const {
+ return get(OTHER);
+ }
+
+ /**
+ * Returns the value associated with a category.
+ * If no value found, or v is NONE or CATEGORY_COUNT, falls
+ * back to returning the value for the 'other' category.
+ */
+ const T &get(Category v) const {
+ int32_t index = v;
+ if (index < 0 || index >= UPRV_LENGTHOF(fVariants) || fVariants[index] == NULL) {
+ return *fVariants[0];
+ }
+ return *fVariants[index];
+ }
+
+ /**
+ * Convenience routine to get the value by category name. Otherwise
+ * works just like get(Category).
+ */
+ const T &get(const char *category) const {
+ return get(toCategory(category));
+ }
+
+ /**
+ * Convenience routine to get the value by category name as a
+ * UnicodeString. Otherwise works just like get(category).
+ */
+ const T &get(const UnicodeString &category) const {
+ return get(toCategory(category));
+ }
+
+ /**
+ * Returns a pointer to the value associated with a category
+ * that caller can safely modify. If the value was defaulting to the 'other'
+ * variant because no explicit value was stored, this method creates a
+ * new value using the default constructor at the returned pointer.
+ *
+ * @param category the category with the value to change.
+ * @param status error returned here if index is NONE or CATEGORY_COUNT
+ * or memory could not be allocated, or any other error happens.
+ */
+ T *getMutable(
+ Category category,
+ UErrorCode &status) {
+ return getMutable(category, NULL, status);
+ }
+
+ /**
+ * Convenience routine to get a mutable pointer to a value by category name.
+ * Otherwise works just like getMutable(Category, UErrorCode &).
+ * reports an error if the category name is invalid.
+ */
+ T *getMutable(
+ const char *category,
+ UErrorCode &status) {
+ return getMutable(toCategory(category), NULL, status);
+ }
+
+ /**
+ * Just like getMutable(Category, UErrorCode &) but copies defaultValue to
+ * returned pointer if it was defaulting to the 'other' variant
+ * because no explicit value was stored.
+ */
+ T *getMutableWithDefault(
+ Category category,
+ const T &defaultValue,
+ UErrorCode &status) {
+ return getMutable(category, &defaultValue, status);
+ }
+
+ /**
+ * Returns true if this object equals rhs.
+ */
+ UBool equals(
+ const PluralMap<T> &rhs,
+ UBool (*eqFunc)(const T &, const T &)) const {
+ for (int32_t i = 0; i < UPRV_LENGTHOF(fVariants); ++i) {
+ if (fVariants[i] == rhs.fVariants[i]) {
+ continue;
+ }
+ if (fVariants[i] == NULL || rhs.fVariants[i] == NULL) {
+ return false;
+ }
+ if (!eqFunc(*fVariants[i], *rhs.fVariants[i])) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+private:
+ T fOtherVariant;
+ T* fVariants[6];
+
+ T *getMutable(
+ Category category,
+ const T *defaultValue,
+ UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return NULL;
+ }
+ int32_t index = category;
+ if (index < 0 || index >= UPRV_LENGTHOF(fVariants)) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+ if (fVariants[index] == NULL) {
+ fVariants[index] = defaultValue == NULL ?
+ new T() : new T(*defaultValue);
+ }
+ if (!fVariants[index]) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ return fVariants[index];
+ }
+
+ void initializeNew() {
+ fVariants[0] = &fOtherVariant;
+ for (int32_t i = 1; i < UPRV_LENGTHOF(fVariants); ++i) {
+ fVariants[i] = NULL;
+ }
+ }
+};
+
+U_NAMESPACE_END
+
+#endif
diff --git a/thirdparty/icu4c/common/propname.cpp b/thirdparty/icu4c/common/propname.cpp
new file mode 100644
index 0000000000..a12eb7d913
--- /dev/null
+++ b/thirdparty/icu4c/common/propname.cpp
@@ -0,0 +1,328 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2002-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Author: Alan Liu
+* Created: October 30 2002
+* Since: ICU 2.4
+* 2010nov19 Markus Scherer Rewrite for formatVersion 2.
+**********************************************************************
+*/
+#include "propname.h"
+#include "unicode/uchar.h"
+#include "unicode/udata.h"
+#include "unicode/uscript.h"
+#include "umutex.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "uarrsort.h"
+#include "uinvchar.h"
+
+#define INCLUDED_FROM_PROPNAME_CPP
+#include "propname_data.h"
+
+U_CDECL_BEGIN
+
+/**
+ * Get the next non-ignorable ASCII character from a property name
+ * and lowercases it.
+ * @return ((advance count for the name)<<8)|character
+ */
+static inline int32_t
+getASCIIPropertyNameChar(const char *name) {
+ int32_t i;
+ char c;
+
+ /* Ignore delimiters '-', '_', and ASCII White_Space */
+ for(i=0;
+ (c=name[i++])==0x2d || c==0x5f ||
+ c==0x20 || (0x09<=c && c<=0x0d);
+ ) {}
+
+ if(c!=0) {
+ return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
+ } else {
+ return i<<8;
+ }
+}
+
+/**
+ * Get the next non-ignorable EBCDIC character from a property name
+ * and lowercases it.
+ * @return ((advance count for the name)<<8)|character
+ */
+static inline int32_t
+getEBCDICPropertyNameChar(const char *name) {
+ int32_t i;
+ char c;
+
+ /* Ignore delimiters '-', '_', and EBCDIC White_Space */
+ for(i=0;
+ (c=name[i++])==0x60 || c==0x6d ||
+ c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
+ ) {}
+
+ if(c!=0) {
+ return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
+ } else {
+ return i<<8;
+ }
+}
+
+/**
+ * Unicode property names and property value names are compared "loosely".
+ *
+ * UCD.html 4.0.1 says:
+ * For all property names, property value names, and for property values for
+ * Enumerated, Binary, or Catalog properties, use the following
+ * loose matching rule:
+ *
+ * LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
+ *
+ * This function does just that, for (char *) name strings.
+ * It is almost identical to ucnv_compareNames() but also ignores
+ * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
+ *
+ * @internal
+ */
+
+U_CAPI int32_t U_EXPORT2
+uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
+ int32_t rc, r1, r2;
+
+ for(;;) {
+ r1=getASCIIPropertyNameChar(name1);
+ r2=getASCIIPropertyNameChar(name2);
+
+ /* If we reach the ends of both strings then they match */
+ if(((r1|r2)&0xff)==0) {
+ return 0;
+ }
+
+ /* Compare the lowercased characters */
+ if(r1!=r2) {
+ rc=(r1&0xff)-(r2&0xff);
+ if(rc!=0) {
+ return rc;
+ }
+ }
+
+ name1+=r1>>8;
+ name2+=r2>>8;
+ }
+}
+
+U_CAPI int32_t U_EXPORT2
+uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
+ int32_t rc, r1, r2;
+
+ for(;;) {
+ r1=getEBCDICPropertyNameChar(name1);
+ r2=getEBCDICPropertyNameChar(name2);
+
+ /* If we reach the ends of both strings then they match */
+ if(((r1|r2)&0xff)==0) {
+ return 0;
+ }
+
+ /* Compare the lowercased characters */
+ if(r1!=r2) {
+ rc=(r1&0xff)-(r2&0xff);
+ if(rc!=0) {
+ return rc;
+ }
+ }
+
+ name1+=r1>>8;
+ name2+=r2>>8;
+ }
+}
+
+U_CDECL_END
+
+U_NAMESPACE_BEGIN
+
+int32_t PropNameData::findProperty(int32_t property) {
+ int32_t i=1; // valueMaps index, initially after numRanges
+ for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) {
+ // Read and skip the start and limit of this range.
+ int32_t start=valueMaps[i];
+ int32_t limit=valueMaps[i+1];
+ i+=2;
+ if(property<start) {
+ break;
+ }
+ if(property<limit) {
+ return i+(property-start)*2;
+ }
+ i+=(limit-start)*2; // Skip all entries for this range.
+ }
+ return 0;
+}
+
+int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) {
+ if(valueMapIndex==0) {
+ return 0; // The property does not have named values.
+ }
+ ++valueMapIndex; // Skip the BytesTrie offset.
+ int32_t numRanges=valueMaps[valueMapIndex++];
+ if(numRanges<0x10) {
+ // Ranges of values.
+ for(; numRanges>0; --numRanges) {
+ // Read and skip the start and limit of this range.
+ int32_t start=valueMaps[valueMapIndex];
+ int32_t limit=valueMaps[valueMapIndex+1];
+ valueMapIndex+=2;
+ if(value<start) {
+ break;
+ }
+ if(value<limit) {
+ return valueMaps[valueMapIndex+value-start];
+ }
+ valueMapIndex+=limit-start; // Skip all entries for this range.
+ }
+ } else {
+ // List of values.
+ int32_t valuesStart=valueMapIndex;
+ int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
+ do {
+ int32_t v=valueMaps[valueMapIndex];
+ if(value<v) {
+ break;
+ }
+ if(value==v) {
+ return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
+ }
+ } while(++valueMapIndex<nameGroupOffsetsStart);
+ }
+ return 0;
+}
+
+const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) {
+ int32_t numNames=*nameGroup++;
+ if(nameIndex<0 || numNames<=nameIndex) {
+ return NULL;
+ }
+ // Skip nameIndex names.
+ for(; nameIndex>0; --nameIndex) {
+ nameGroup=uprv_strchr(nameGroup, 0)+1;
+ }
+ if(*nameGroup==0) {
+ return NULL; // no name (Property[Value]Aliases.txt has "n/a")
+ }
+ return nameGroup;
+}
+
+UBool PropNameData::containsName(BytesTrie &trie, const char *name) {
+ if(name==NULL) {
+ return FALSE;
+ }
+ UStringTrieResult result=USTRINGTRIE_NO_VALUE;
+ char c;
+ while((c=*name++)!=0) {
+ c=uprv_invCharToLowercaseAscii(c);
+ // Ignore delimiters '-', '_', and ASCII White_Space.
+ if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
+ continue;
+ }
+ if(!USTRINGTRIE_HAS_NEXT(result)) {
+ return FALSE;
+ }
+ result=trie.next((uint8_t)c);
+ }
+ return USTRINGTRIE_HAS_VALUE(result);
+}
+
+const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) {
+ int32_t valueMapIndex=findProperty(property);
+ if(valueMapIndex==0) {
+ return NULL; // Not a known property.
+ }
+ return getName(nameGroups+valueMaps[valueMapIndex], nameChoice);
+}
+
+const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) {
+ int32_t valueMapIndex=findProperty(property);
+ if(valueMapIndex==0) {
+ return NULL; // Not a known property.
+ }
+ int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
+ if(nameGroupOffset==0) {
+ return NULL;
+ }
+ return getName(nameGroups+nameGroupOffset, nameChoice);
+}
+
+int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) {
+ BytesTrie trie(bytesTries+bytesTrieOffset);
+ if(containsName(trie, alias)) {
+ return trie.getValue();
+ } else {
+ return UCHAR_INVALID_CODE;
+ }
+}
+
+int32_t PropNameData::getPropertyEnum(const char *alias) {
+ return getPropertyOrValueEnum(0, alias);
+}
+
+int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) {
+ int32_t valueMapIndex=findProperty(property);
+ if(valueMapIndex==0) {
+ return UCHAR_INVALID_CODE; // Not a known property.
+ }
+ valueMapIndex=valueMaps[valueMapIndex+1];
+ if(valueMapIndex==0) {
+ return UCHAR_INVALID_CODE; // The property does not have named values.
+ }
+ // valueMapIndex is the start of the property's valueMap,
+ // where the first word is the BytesTrie offset.
+ return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
+}
+U_NAMESPACE_END
+
+//----------------------------------------------------------------------
+// Public API implementation
+
+U_CAPI const char* U_EXPORT2
+u_getPropertyName(UProperty property,
+ UPropertyNameChoice nameChoice) {
+ U_NAMESPACE_USE
+ return PropNameData::getPropertyName(property, nameChoice);
+}
+
+U_CAPI UProperty U_EXPORT2
+u_getPropertyEnum(const char* alias) {
+ U_NAMESPACE_USE
+ return (UProperty)PropNameData::getPropertyEnum(alias);
+}
+
+U_CAPI const char* U_EXPORT2
+u_getPropertyValueName(UProperty property,
+ int32_t value,
+ UPropertyNameChoice nameChoice) {
+ U_NAMESPACE_USE
+ return PropNameData::getPropertyValueName(property, value, nameChoice);
+}
+
+U_CAPI int32_t U_EXPORT2
+u_getPropertyValueEnum(UProperty property,
+ const char* alias) {
+ U_NAMESPACE_USE
+ return PropNameData::getPropertyValueEnum(property, alias);
+}
+
+U_CAPI const char* U_EXPORT2
+uscript_getName(UScriptCode scriptCode){
+ return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
+ U_LONG_PROPERTY_NAME);
+}
+
+U_CAPI const char* U_EXPORT2
+uscript_getShortName(UScriptCode scriptCode){
+ return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
+ U_SHORT_PROPERTY_NAME);
+}
diff --git a/thirdparty/icu4c/common/propname.h b/thirdparty/icu4c/common/propname.h
new file mode 100644
index 0000000000..1a8ced5b87
--- /dev/null
+++ b/thirdparty/icu4c/common/propname.h
@@ -0,0 +1,212 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2002-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Author: Alan Liu
+* Created: October 30 2002
+* Since: ICU 2.4
+* 2010nov19 Markus Scherer Rewrite for formatVersion 2.
+**********************************************************************
+*/
+#ifndef PROPNAME_H
+#define PROPNAME_H
+
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/uchar.h"
+#include "udataswp.h"
+#include "uprops.h"
+
+/*
+ * This header defines the in-memory layout of the property names data
+ * structure representing the UCD data files PropertyAliases.txt and
+ * PropertyValueAliases.txt. It is used by:
+ * propname.cpp - reads data
+ * genpname - creates data
+ */
+
+/* low-level char * property name comparison -------------------------------- */
+
+U_CDECL_BEGIN
+
+/**
+ * \var uprv_comparePropertyNames
+ * Unicode property names and property value names are compared "loosely".
+ *
+ * UCD.html 4.0.1 says:
+ * For all property names, property value names, and for property values for
+ * Enumerated, Binary, or Catalog properties, use the following
+ * loose matching rule:
+ *
+ * LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
+ *
+ * This function does just that, for (char *) name strings.
+ * It is almost identical to ucnv_compareNames() but also ignores
+ * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
+ *
+ * @internal
+ */
+
+U_CAPI int32_t U_EXPORT2
+uprv_compareASCIIPropertyNames(const char *name1, const char *name2);
+
+U_CAPI int32_t U_EXPORT2
+uprv_compareEBCDICPropertyNames(const char *name1, const char *name2);
+
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+# define uprv_comparePropertyNames uprv_compareASCIIPropertyNames
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+# define uprv_comparePropertyNames uprv_compareEBCDICPropertyNames
+#else
+# error U_CHARSET_FAMILY is not valid
+#endif
+
+U_CDECL_END
+
+/* UDataMemory structure and signatures ------------------------------------- */
+
+#define PNAME_DATA_NAME "pnames"
+#define PNAME_DATA_TYPE "icu"
+
+/* Fields in UDataInfo: */
+
+/* PNAME_SIG[] is encoded as numeric literals for compatibility with the HP compiler */
+#define PNAME_SIG_0 ((uint8_t)0x70) /* p */
+#define PNAME_SIG_1 ((uint8_t)0x6E) /* n */
+#define PNAME_SIG_2 ((uint8_t)0x61) /* a */
+#define PNAME_SIG_3 ((uint8_t)0x6D) /* m */
+
+U_NAMESPACE_BEGIN
+
+class PropNameData {
+public:
+ enum {
+ // Byte offsets from the start of the data, after the generic header.
+ IX_VALUE_MAPS_OFFSET,
+ IX_BYTE_TRIES_OFFSET,
+ IX_NAME_GROUPS_OFFSET,
+ IX_RESERVED3_OFFSET,
+ IX_RESERVED4_OFFSET,
+ IX_TOTAL_SIZE,
+
+ // Other values.
+ IX_MAX_NAME_LENGTH,
+ IX_RESERVED7,
+ IX_COUNT
+ };
+
+ static const char *getPropertyName(int32_t property, int32_t nameChoice);
+ static const char *getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice);
+
+ static int32_t getPropertyEnum(const char *alias);
+ static int32_t getPropertyValueEnum(int32_t property, const char *alias);
+
+private:
+ static int32_t findProperty(int32_t property);
+ static int32_t findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value);
+ static const char *getName(const char *nameGroup, int32_t nameIndex);
+ static UBool containsName(BytesTrie &trie, const char *name);
+
+ static int32_t getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias);
+
+ static const int32_t indexes[];
+ static const int32_t valueMaps[];
+ static const uint8_t bytesTries[];
+ static const char nameGroups[];
+};
+
+/*
+ * pnames.icu formatVersion 2
+ *
+ * formatVersion 2 is new in ICU 4.8.
+ * In ICU 4.8, the pnames.icu data file is used only in ICU4J.
+ * ICU4C 4.8 has the same data structures hardcoded in source/common/propname_data.h.
+ *
+ * For documentation of pnames.icu formatVersion 1 see ICU4C 4.6 (2010-dec-01)
+ * or earlier versions of this header file (source/common/propname.h).
+ *
+ * The pnames.icu begins with the standard ICU DataHeader/UDataInfo.
+ * After that:
+ *
+ * int32_t indexes[8];
+ *
+ * (See the PropNameData::IX_... constants.)
+ *
+ * The first 6 indexes are byte offsets from the beginning of the data
+ * (beginning of indexes[]) to following structures.
+ * The length of each structure is the difference between its offset
+ * and the next one.
+ * All offsets are filled in: Where there is no data between two offsets,
+ * those two offsets are the same.
+ * The last offset (indexes[PropNameData::IX_TOTAL_SIZE]) indicates the
+ * total number of bytes in the file. (Not counting the standard headers.)
+ *
+ * The sixth index (indexes[PropNameData::IX_MAX_NAME_LENGTH]) has the
+ * maximum length of any Unicode property (or property value) alias.
+ * (Without normalization, that is, including underscores etc.)
+ *
+ * int32_t valueMaps[];
+ *
+ * The valueMaps[] begins with a map from UProperty enums to properties,
+ * followed by the per-property value maps from property values to names,
+ * for those properties that have named values.
+ * (Binary & enumerated, plus General_Category_Mask.)
+ *
+ * valueMaps[0] contains the number of UProperty enum ranges.
+ * For each range:
+ * int32_t start, limit -- first and last+1 UProperty enum of a dense range
+ * Followed by (limit-start) pairs of
+ * int32_t nameGroupOffset;
+ * Offset into nameGroups[] for the property's names/aliases.
+ * int32_t valueMapIndex;
+ * Offset of the property's value map in the valueMaps[] array.
+ * If the valueMapIndex is 0, then the property does not have named values.
+ *
+ * For each property's value map:
+ * int32_t bytesTrieOffset; -- Offset into bytesTries[] for name->value mapping.
+ * int32_t numRanges;
+ * If numRanges is in the range 1..15, then that many ranges of values follow.
+ * Per range:
+ * int32_t start, limit -- first and last+1 UProperty enum of a range
+ * Followed by (limit-start) entries of
+ * int32_t nameGroupOffset;
+ * Offset into nameGroups[] for the property value's names/aliases.
+ * If the nameGroupOffset is 0, then this is not a named value for this property.
+ * (That is, the ranges need not be dense.)
+ * If numRanges is >=0x10, then (numRanges-0x10) sorted values
+ * and then (numRanges-0x10) corresponding nameGroupOffsets follow.
+ * Values are sorted as signed integers.
+ * In this case, the set of values is dense; no nameGroupOffset will be 0.
+ *
+ * For both properties and property values, ranges are sorted by their start/limit values.
+ *
+ * uint8_t bytesTries[];
+ *
+ * This is a sequence of BytesTrie structures, byte-serialized tries for
+ * mapping from names/aliases to values.
+ * The first one maps from property names/aliases to UProperty enum constants.
+ * The following ones are indexed by property value map bytesTrieOffsets
+ * for mapping each property's names/aliases to their property values.
+ *
+ * char nameGroups[];
+ *
+ * This is a sequence of property name groups.
+ * Each group is a list of names/aliases (invariant-character strings) for
+ * one property or property value, in the order of UCharNameChoice.
+ * The first byte of each group is the number of names in the group.
+ * It is followed by that many NUL-terminated strings.
+ * The first string is for the short name; if there is no short name,
+ * then the first string is empty.
+ * The second string is the long name. Further strings are additional aliases.
+ *
+ * The first name group is for a property rather than a property value,
+ * so that a nameGroupOffset of 0 can be used to indicate "no value"
+ * in a property's sparse value ranges.
+ */
+
+U_NAMESPACE_END
+
+#endif
diff --git a/thirdparty/icu4c/common/propname_data.h b/thirdparty/icu4c/common/propname_data.h
new file mode 100644
index 0000000000..6f63e9cdd4
--- /dev/null
+++ b/thirdparty/icu4c/common/propname_data.h
@@ -0,0 +1,1919 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+//
+// Copyright (C) 1999-2016, International Business Machines
+// Corporation and others. All Rights Reserved.
+//
+// file name: propname_data.h
+//
+// machine-generated by: icu/tools/unicode/c/genprops/pnamesbuilder.cpp
+
+
+#ifdef INCLUDED_FROM_PROPNAME_CPP
+
+U_NAMESPACE_BEGIN
+
+const int32_t PropNameData::indexes[8]={0x20,0x15b8,0x5048,0xa69a,0xa69a,0xa69a,0x2f,0};
+
+const int32_t PropNameData::valueMaps[1382]={
+6,0,0x41,0,0xe3,0x368,0xe3,0x37e,0xe3,0x393,0xe3,0x3a9,0xe3,0x3b4,0xe3,0x3d5,
+0xe3,0x3e5,0xe3,0x3f4,0xe3,0x402,0xe3,0x426,0xe3,0x43d,0xe3,0x455,0xe3,0x46c,0xe3,0x47b,
+0xe3,0x48a,0xe3,0x49b,0xe3,0x4a9,0xe3,0x4bb,0xe3,0x4d5,0xe3,0x4f0,0xe3,0x505,0xe3,0x522,
+0xe3,0x533,0xe3,0x53e,0xe3,0x55d,0xe3,0x573,0xe3,0x584,0xe3,0x594,0xe3,0x5af,0xe3,0x5c8,
+0xe3,0x5d9,0xe3,0x5f3,0xe3,0x606,0xe3,0x616,0xe3,0x630,0xe3,0x649,0xe3,0x660,0xe3,0x674,
+0xe3,0x68a,0xe3,0x69e,0xe3,0x6b4,0xe3,0x6ce,0xe3,0x6e6,0xe3,0x702,0xe3,0x70a,0xe3,0x712,
+0xe3,0x71a,0xe3,0x722,0xe3,0x72b,0xe3,0x738,0xe3,0x74b,0xe3,0x768,0xe3,0x785,0xe3,0x7a2,
+0xe3,0x7c0,0xe3,0x7de,0xe3,0x802,0xe3,0x80f,0xe3,0x829,0xe3,0x83e,0xe3,0x859,0xe3,0x870,
+0xe3,0x887,0xe3,0x8a9,0xe3,0x1000,0x1019,0x8c8,0x15f,0xae8,0x17a,0x2f11,0xe9,0x2f30,0x2b3,0x306e,
+0x2c9,0x30c8,0x2d3,0x3325,0x2f5,0x3c20,0x35f,0x3c90,0x369,0x3f2a,0x398,0x3f68,0x3a0,0x4a5b,0x465,0x4ad9,
+0x46f,0x4afe,0x475,0x4b18,0x47b,0x4b39,0x482,0x4b53,0xe9,0x4b78,0xe9,0x4b9e,0x489,0x4c48,0x49f,0x4cc1,
+0x4b2,0x4d73,0x4cd,0x4daa,0x4d4,0x4f8a,0x4e8,0x540a,0x510,0x2000,0x2001,0x5469,0x518,0x3000,0x3001,0x54f5,
+0,0x4000,0x400e,0x5507,0,0x5510,0,0x552a,0,0x553b,0,0x554c,0,0x5562,0,0x556b,
+0,0x5588,0,0x55a6,0,0x55c4,0,0x55e2,0,0x55f8,0,0x560c,0,0x5622,0,0x7000,
+0x7001,0x563b,0,0x7d6,0x12,0,1,0x12,0x20,0x7f4,0x4a,0,1,6,7,8,
+9,0xa,0xb,0xc,0xd,0xe,0xf,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,
+0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0x54,0x5b,0x67,0x6b,
+0x76,0x7a,0x81,0x82,0x84,0x85,0xc8,0xca,0xd6,0xd8,0xda,0xdc,0xde,0xe0,0xe2,0xe4,
+0xe6,0xe8,0xe9,0xea,0xf0,0x2e,0x40,0x4c,0x5e,0x68,0x79,0x84,0x91,0x9e,0xab,0xb8,
+0xc5,0xd2,0xdf,0xec,0xf9,0x106,0x113,0x120,0x12d,0x13a,0x147,0x154,0x161,0x16e,0x17b,0x188,
+0x195,0x1a2,0x1af,0x1bc,0x1c9,0x1d6,0x1e3,0x1f0,0x1fd,0x20c,0x21b,0x22a,0x239,0x248,0x257,0x266,
+0x275,0x28f,0x2a3,0x2b7,0x2d2,0x2e1,0x2ea,0x2fa,0x302,0x30b,0x31a,0x323,0x333,0x344,0x355,0x995,
+1,0,0x17,0x8d7,0x8e8,0x8f9,0x90d,0x924,0x93c,0x94e,0x963,0x97a,0x98f,0x99f,0x9b1,0x9ce,
+0x9ea,0x9fc,0xa19,0xa35,0xa51,0xa66,0xa7b,0xa95,0xab0,0xacb,0xb37,1,0,0x135,0xaf3,0xb00,
+0xb13,0xb3b,0xb59,0xb77,0xb8f,0xbba,0xbe4,0xbfc,0xc0f,0xc22,0xc31,0xc40,0xc4f,0xc5e,0xc75,0xc86,
+0xc99,0xcac,0xcb9,0xcc6,0xcd5,0xce6,0xcfb,0xd0c,0xd17,0xd20,0xd31,0xd42,0xd55,0xd67,0xd7a,0xd8d,
+0xdcc,0xdd9,0xde6,0xdf3,0xe08,0xe38,0xe52,0xe73,0xe9e,0xec1,0xf1f,0xf46,0xf61,0xf70,0xf97,0xfbf,
+0xfe2,0x1005,0x102f,0x1048,0x1067,0x108a,0x10ae,0x10c1,0x10db,0x1105,0x111d,0x1145,0x116e,0x1181,0x1194,0x11a7,
+0x11ce,0x11dd,0x11fd,0x122b,0x1249,0x1277,0x1293,0x12ae,0x12c7,0x12e0,0x1301,0x1331,0x1350,0x1372,0x13a6,0x13d3,
+0x1418,0x1439,0x1463,0x1484,0x14ad,0x14c0,0x14f3,0x150a,0x1519,0x152a,0x1555,0x156c,0x159d,0x15cb,0x160e,0x1619,
+0x1652,0x1663,0x1674,0x1681,0x1694,0x16ce,0x16f2,0x1716,0x1750,0x1788,0x17b3,0x17cb,0x17f7,0x1823,0x1830,0x183f,
+0x185c,0x187e,0x18ac,0x18cc,0x18f3,0x191a,0x1939,0x194c,0x195d,0x196e,0x1993,0x19b8,0x19df,0x1a13,0x1a40,0x1a5e,
+0x1a71,0x1a8a,0x1ac3,0x1ad2,0x1af2,0x1b14,0x1b36,0x1b4d,0x1b64,0x1b91,0x1baa,0x1bc3,0x1bf4,0x1c1e,0x1c39,0x1c4c,
+0x1c6b,0x1c74,0x1c87,0x1ca5,0x1cc3,0x1cd6,0x1ced,0x1d02,0x1d37,0x1d5b,0x1d70,0x1d7f,0x1d92,0x1db6,0x1dbf,0x1de3,
+0x1dfa,0x1e0d,0x1e1c,0x1e27,0x1e48,0x1e60,0x1e6f,0x1e7e,0x1e8d,0x1ea4,0x1eb9,0x1ece,0x1f07,0x1f1a,0x1f36,0x1f41,
+0x1f4e,0x1f7c,0x1fa0,0x1fc3,0x1fd6,0x1ff8,0x200b,0x2026,0x2049,0x206c,0x2091,0x20a2,0x20d1,0x20fe,0x2115,0x2130,
+0x213f,0x216a,0x21a2,0x21dc,0x220a,0x221b,0x2228,0x224c,0x225b,0x2277,0x2291,0x22ae,0x22e6,0x22fb,0x2328,0x2347,
+0x2375,0x2395,0x23c9,0x23d8,0x2402,0x2425,0x2450,0x245b,0x246c,0x2487,0x24ab,0x24b8,0x24cd,0x24f4,0x251f,0x2556,
+0x2569,0x257a,0x25aa,0x25bb,0x25ca,0x25df,0x25fd,0x2610,0x2623,0x263a,0x2657,0x2662,0x266b,0x268d,0x26a2,0x26c7,
+0x26de,0x2707,0x2722,0x2737,0x2750,0x2771,0x27a6,0x27b7,0x27e8,0x280c,0x281d,0x2836,0x2841,0x286e,0x2890,0x28be,
+0x28f1,0x2900,0x2911,0x292e,0x2970,0x2997,0x29a4,0x29b9,0x29dd,0x2a03,0x2a3c,0x2a4d,0x2a71,0x2a7c,0x2a89,0x2a98,
+0x2abd,0x2aeb,0x2b07,0x2b24,0x2b31,0x2b42,0x2b60,0x2b83,0x2ba0,0x2bad,0x2bcd,0x2bea,0x2c0b,0x2c34,0x2c45,0x2c64,
+0x2c7d,0x2c96,0x2ca7,0x2cf0,0x2d01,0x2d1a,0x2d49,0x2d76,0x2d9b,0x2ddd,0x2df9,0x2e08,0x2e1f,0x2e4d,0x2e66,0x2e8f,
+0x2ea9,0x2ee4,0x2f02,0x1e85,1,0,0x12,0x2f47,0x2f57,0x2f6a,0x2f7a,0x2f8a,0x2f99,0x2fa9,0x2fbb,0x2fce,
+0x2fe0,0x2ff0,0x3000,0x300f,0x301e,0x302e,0x303b,0x304a,0x305e,0x1f43,1,0,6,0x3083,0x308e,0x309b,
+0x30a8,0x30b5,0x30c0,0x1f87,1,0,0x1e,0x30dd,0x30ec,0x3101,0x3116,0x312b,0x313f,0x3150,0x3164,0x3177,
+0x3188,0x31a1,0x31b3,0x31c4,0x31d8,0x31eb,0x3203,0x3215,0x3220,0x3230,0x323e,0x3253,0x3268,0x327e,0x3298,0x32ae,
+0x32be,0x32d2,0x32e6,0x32f7,0x330f,0x21b2,1,0,0x66,0x3337,0x335a,0x3363,0x3370,0x337b,0x3384,0x338f,
+0x3398,0x33b1,0x33b6,0x33bf,0x33dc,0x33e5,0x33f2,0x33fb,0x341f,0x3426,0x342f,0x3442,0x344d,0x3456,0x3461,0x347a,
+0x3483,0x3492,0x349d,0x34a6,0x34b1,0x34ba,0x34c1,0x34ca,0x34d5,0x34de,0x34f7,0x3500,0x350d,0x3518,0x3529,0x3534,
+0x3549,0x3560,0x3569,0x3572,0x358b,0x3596,0x359f,0x35a8,0x35bf,0x35dc,0x35e7,0x35f8,0x3603,0x360a,0x3617,0x3624,
+0x3651,0x3666,0x366f,0x368a,0x36ad,0x36ce,0x36ef,0x3714,0x373b,0x375c,0x377f,0x37a0,0x37c7,0x37e8,0x380d,0x382c,
+0x384b,0x386a,0x3887,0x38a8,0x38c9,0x38ec,0x3911,0x3930,0x394f,0x3970,0x3997,0x39bc,0x39db,0x39fc,0x3a1f,0x3a3a,
+0x3a53,0x3a6e,0x3a87,0x3aa4,0x3abf,0x3adc,0x3afb,0x3b18,0x3b35,0x3b54,0x3b71,0x3b8c,0x3ba9,0x3bc6,0x3bf9,0x24f7,
+1,0,6,0x3c31,0x3c40,0x3c50,0x3c60,0x3c70,0x3c81,0x2555,1,0,0x2b,0x3c9f,0x3cab,0x3cb9,
+0x3cc8,0x3cd7,0x3ce7,0x3cf8,0x3d0c,0x3d21,0x3d37,0x3d4a,0x3d5e,0x3d6e,0x3d77,0x3d82,0x3d92,0x3dae,0x3dc0,0x3dce,
+0x3ddd,0x3de9,0x3dfe,0x3e12,0x3e25,0x3e33,0x3e47,0x3e55,0x3e5f,0x3e71,0x3e7d,0x3e8b,0x3e9b,0x3ea2,0x3ea9,0x3eb0,
+0x3eb7,0x3ebe,0x3ed4,0x3ef5,0x870,0x3f07,0x3f12,0x3f21,0x27ae,1,0,4,0x3f3b,0x3f46,0x3f52,0x3f5c,
+0x27d4,1,0,0xc1,0x3f73,0x3f80,0x3f95,0x3fa2,0x3fb1,0x3fbf,0x3fce,0x3fdd,0x3fef,0x3ffe,0x400c,0x401d,
+0x402c,0x403b,0x4048,0x4054,0x4063,0x4072,0x407c,0x4089,0x4096,0x40a5,0x40b3,0x40c2,0x40ce,0x40d8,0x40e4,0x40f4,
+0x4104,0x4112,0x411e,0x412f,0x413b,0x4147,0x4155,0x4162,0x416e,0x417b,0xd0c,0x4188,0x4196,0x41b0,0x41b9,0x41c7,
+0x41d5,0x41e1,0x41f0,0x41fe,0x420c,0x4218,0x4227,0x4235,0x4243,0x4250,0x425f,0x427a,0x4289,0x429a,0x42ab,0x42be,
+0x42d0,0x42df,0x42f1,0x4300,0x430c,0x4317,0x1e1c,0x4324,0x432f,0x433a,0x4345,0x4350,0x436b,0x4376,0x4381,0x438c,
+0x439f,0x43b3,0x43be,0x43cd,0x43dc,0x43e7,0x43f2,0x43ff,0x440e,0x441c,0x4427,0x4442,0x444c,0x445d,0x446e,0x447d,
+0x448e,0x4499,0x44a4,0x44af,0x44ba,0x44c5,0x44d0,0x44db,0x44e5,0x44f0,0x4500,0x450b,0x4519,0x4526,0x4531,0x4540,
+0x454d,0x455a,0x4569,0x4576,0x4587,0x4599,0x45a9,0x45b4,0x45c7,0x45de,0x45ec,0x45f9,0x4604,0x4611,0x4622,0x463e,
+0x4654,0x465f,0x467c,0x468c,0x469b,0x46a6,0x46b1,0x1f36,0x46bd,0x46c8,0x46e0,0x46f0,0x46ff,0x470d,0x471b,0x4726,
+0x4731,0x4745,0x475c,0x4774,0x4784,0x4794,0x47a4,0x47b6,0x47c1,0x47cc,0x47d6,0x47e2,0x47f0,0x4803,0x480f,0x481c,
+0x4827,0x4843,0x4850,0x485e,0x4877,0x2836,0x4886,0x2657,0x4893,0x48a1,0x48b3,0x48c1,0x48cd,0x48dd,0x2a71,0x48eb,
+0x48f7,0x4902,0x490d,0x4918,0x492c,0x493a,0x4951,0x495d,0x4971,0x497f,0x4991,0x49a7,0x49b5,0x49c7,0x49d5,0x49f2,
+0x4a04,0x4a11,0x4a22,0x4a34,0x4a4e,0x31cc,1,0,6,0x4a75,0x4a88,0x4a98,0x4aa6,0x4ab7,0x4ac7,0x3228,
+0x12,0,1,0x4af1,0x4af7,0x3235,0x12,0,1,0x4af1,0x4af7,0x3242,1,0,3,0x4af1,
+0x4af7,0x4b30,0x3258,1,0,3,0x4af1,0x4af7,0x4b30,0x326e,1,0,0x12,0x4bba,0x4bc4,0x4bd0,
+0x4bd7,0x4be2,0x4be7,0x4bee,0x4bf5,0x4bfe,0x4c03,0x4c08,0x4c18,0x870,0x3f07,0x4c24,0x3f12,0x4c34,0x3f21,0x3317,
+1,0,0xf,0x4bba,0x4c5b,0x4c65,0x4c6f,0x4c7a,0x3ddd,0x4c84,0x4c90,0x4c98,0x4c9f,0x4ca9,0x4bd0,0x4bd7,
+0x4be7,0x4cb3,0x339e,1,0,0x17,0x4bba,0x4cd0,0x4c6f,0x4cdc,0x4ce9,0x4cf7,0x3ddd,0x4d02,0x4bd0,0x4d13,
+0x4be7,0x4d22,0x4d30,0x870,0x3ef5,0x4d3c,0x4d4d,0x3f07,0x4c24,0x3f12,0x4c34,0x3f21,0x4d5e,0x34bb,1,0,
+3,0x4d91,0x4d99,0x4da1,0x34d4,1,0,0x10,0x4dca,0x4dd1,0x4de0,0x4e01,0x4e24,0x4e2f,0x4e4e,0x4e65,
+0x4e72,0x4e7b,0x4e9a,0x4ecd,0x4ee8,0x4f17,0x4f34,0x4f59,0x356d,1,0,0x24,0x4fa8,0x4fb5,0x4fc8,0x4fd5,
+0x5002,0x5027,0x503c,0x505b,0x507c,0x50a9,0x50e2,0x5105,0x5128,0x5155,0x518a,0x51b1,0x51da,0x5211,0x5240,0x5261,
+0x5286,0x5295,0x52b8,0x52cf,0x52dc,0x52eb,0x5308,0x5321,0x5344,0x5369,0x5382,0x5397,0x53a6,0x53b7,0x53c4,0x53e5,
+0x373d,1,0,4,0x5423,0x542e,0x5446,0x545e,0x3779,0x36,1,2,4,8,0xe,0x10,
+0x20,0x3e,0x40,0x80,0x100,0x1c0,0x200,0x400,0x800,0xe00,0x1000,0x2000,0x4000,0x7000,0x8000,0x10000,
+0x20000,0x40000,0x78001,0x80000,0x100000,0x200000,0x400000,0x800000,0x1000000,0x2000000,0x4000000,0x8000000,0xf000000,0x10000000,0x20000000,0x30f80000,
+0x30dd,0x30ec,0x3101,0x3116,0x5497,0x312b,0x313f,0x548d,0x3150,0x3164,0x3177,0x54a8,0x3188,0x31a1,0x31b3,0x54bf,
+0x31c4,0x31d8,0x31eb,0x54e8,0x3203,0x3215,0x3220,0x3230,0x5484,0x323e,0x3253,0x3268,0x327e,0x3298,0x32ae,0x32be,
+0x32d2,0x32e6,0x54de,0x32f7,0x330f,0x54c9
+};
+
+const uint8_t PropNameData::bytesTries[14992]={
+0,0x15,0x6d,0xc3,0x78,0x73,0xc2,0x12,0x76,0x7a,0x76,0x6a,0x77,0xa2,0x52,0x78,
+1,0x64,0x50,0x69,0x10,0x64,1,0x63,0x30,0x73,0x62,0x13,0x74,0x61,0x72,0x74,
+0x63,0x60,0x16,0x6f,0x6e,0x74,0x69,0x6e,0x75,0x65,0x61,0x13,0x69,0x67,0x69,0x74,
+0x81,3,0x61,0x2e,0x65,0x4c,0x6f,0xc3,0x18,0x73,0x69,0x1e,0x72,0x69,0x61,0x74,
+0x69,0x6f,0x6e,0x73,0x65,0x6c,0x65,0x63,0x74,0x6f,0x72,0x69,0x10,0x72,0x1f,0x74,
+0x69,0x63,0x61,0x6c,0x6f,0x72,0x69,0x65,0x6e,0x74,0x61,0x74,0x69,0x6f,0x6e,0xc3,
+0x18,3,0x62,0xc3,0x14,0x68,0x32,0x6f,0x42,0x73,0x13,0x70,0x61,0x63,0x65,0x5f,
+0x17,0x69,0x74,0x65,0x73,0x70,0x61,0x63,0x65,0x5f,0x16,0x72,0x64,0x62,0x72,0x65,
+0x61,0x6b,0xc3,0x14,0x73,0xa2,0x49,0x74,0xa4,0x3b,0x75,3,0x63,0xd9,0x40,0xc,
+0x69,0x52,0x6e,0x58,0x70,0x12,0x70,0x65,0x72,0x5c,0x13,0x63,0x61,0x73,0x65,0x5c,
+0x16,0x6d,0x61,0x70,0x70,0x69,0x6e,0x67,0xd9,0x40,0xc,0x12,0x64,0x65,0x6f,0x5b,
+0x10,0x69,1,0x63,0x3e,0x66,0x1b,0x69,0x65,0x64,0x69,0x64,0x65,0x6f,0x67,0x72,
+0x61,0x70,0x68,0x5b,0x17,0x6f,0x64,0x65,0x31,0x6e,0x61,0x6d,0x65,0xd9,0x40,0xb,
+0xa,0x69,0x84,0x70,0x19,0x70,0x30,0x74,0x36,0x75,0x10,0x63,0xd9,0x40,9,0x12,
+0x61,0x63,0x65,0x5f,1,0x63,0xd9,0x40,8,0x65,0x11,0x72,0x6d,0x67,0x69,0x3c,
+0x6c,0xa2,0x5f,0x6f,0x17,0x66,0x74,0x64,0x6f,0x74,0x74,0x65,0x64,0x57,0x13,0x6d,
+0x70,0x6c,0x65,3,0x63,0x50,0x6c,0x68,0x74,0x8a,0x75,0x1e,0x70,0x70,0x65,0x72,
+0x63,0x61,0x73,0x65,0x6d,0x61,0x70,0x70,0x69,0x6e,0x67,0xd9,0x40,9,0x19,0x61,
+0x73,0x65,0x66,0x6f,0x6c,0x64,0x69,0x6e,0x67,0xd9,0x40,6,0x1e,0x6f,0x77,0x65,
+0x72,0x63,0x61,0x73,0x65,0x6d,0x61,0x70,0x70,0x69,0x6e,0x67,0xd9,0x40,7,0x1e,
+0x69,0x74,0x6c,0x65,0x63,0x61,0x73,0x65,0x6d,0x61,0x70,0x70,0x69,0x6e,0x67,0xd9,
+0x40,8,0x10,0x63,0xd9,0x40,7,0x62,0xc3,0x13,0x63,0x34,0x64,0x57,0x65,0x6e,
+0x66,0x10,0x63,0xd9,0x40,6,0xc2,0xa,2,0x66,0xd9,0x40,6,0x72,0x28,0x78,
+0xd9,0x70,0,0x12,0x69,0x70,0x74,0xc2,0xa,0x19,0x65,0x78,0x74,0x65,0x6e,0x73,
+0x69,0x6f,0x6e,0x73,0xd9,0x70,0,1,0x67,0x6a,0x6e,1,0x73,0x54,0x74,0x13,
+0x65,0x6e,0x63,0x65,1,0x62,0x34,0x74,0x16,0x65,0x72,0x6d,0x69,0x6e,0x61,0x6c,
+0x67,0x13,0x72,0x65,0x61,0x6b,0xc3,0x13,0x14,0x69,0x74,0x69,0x76,0x65,0x65,1,
+0x6d,0x2e,0x73,0x13,0x74,0x61,0x72,0x74,0x73,0x19,0x65,0x6e,0x74,0x73,0x74,0x61,
+0x72,0x74,0x65,0x72,0x73,3,0x63,0x66,0x65,0x72,0x69,0x98,0x72,0x19,0x61,0x69,
+0x6c,0x63,0x61,0x6e,0x6f,0x6e,0x69,0x63,0x1f,0x61,0x6c,0x63,0x6f,0x6d,0x62,0x69,
+0x6e,0x69,0x6e,0x67,0x63,0x6c,0x61,0x73,0x73,0xc3,0x11,0xd8,0x40,0xa,0x11,0x63,
+0x63,0xc3,0x11,0x11,0x72,0x6d,0x58,0x1e,0x69,0x6e,0x61,0x6c,0x70,0x75,0x6e,0x63,
+0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x59,0x1d,0x74,0x6c,0x65,0x63,0x61,0x73,0x65,
+0x6d,0x61,0x70,0x70,0x69,0x6e,0x67,0xd9,0x40,0xa,0x6d,0x70,0x6e,0x76,0x70,0xa2,
+0xf1,0x71,0xa4,0x43,0x72,2,0x61,0x28,0x65,0x32,0x69,0x9d,0x14,0x64,0x69,0x63,
+0x61,0x6c,0x55,0x1e,0x67,0x69,0x6f,0x6e,0x61,0x6c,0x69,0x6e,0x64,0x69,0x63,0x61,
+0x74,0x6f,0x72,0x9d,0x12,0x61,0x74,0x68,0x4f,6,0x6f,0x39,0x6f,0x32,0x74,0xc3,
+9,0x75,0x54,0x76,0xd9,0x30,0,0x12,0x6e,0x63,0x68,0x1f,0x61,0x72,0x61,0x63,
+0x74,0x65,0x72,0x63,0x6f,0x64,0x65,0x70,0x6f,0x69,0x6e,0x74,0x51,0x14,0x6d,0x65,
+0x72,0x69,0x63,1,0x74,0x32,0x76,0x13,0x61,0x6c,0x75,0x65,0xd9,0x30,0,0x12,
+0x79,0x70,0x65,0xc3,9,0x61,0xa2,0x77,0x63,0xa2,0x82,0x66,2,0x63,0x98,0x64,
+0xa2,0x53,0x6b,1,0x63,0x56,0x64,1,0x69,0x42,0x71,1,0x63,0xc3,0xd,0x75,
+0x17,0x69,0x63,0x6b,0x63,0x68,0x65,0x63,0x6b,0xc3,0xd,0x13,0x6e,0x65,0x72,0x74,
+0x6d,1,0x69,0x42,0x71,1,0x63,0xc3,0xf,0x75,0x17,0x69,0x63,0x6b,0x63,0x68,
+0x65,0x63,0x6b,0xc3,0xf,0x13,0x6e,0x65,0x72,0x74,0x71,1,0x69,0x42,0x71,1,
+0x63,0xc3,0xe,0x75,0x17,0x69,0x63,0x6b,0x63,0x68,0x65,0x63,0x6b,0xc3,0xe,0x13,
+0x6e,0x65,0x72,0x74,0x6f,1,0x69,0x42,0x71,1,0x63,0xc3,0xc,0x75,0x17,0x69,
+0x63,0x6b,0x63,0x68,0x65,0x63,0x6b,0xc3,0xc,0x13,0x6e,0x65,0x72,0x74,0x6b,0xd8,
+0x40,5,1,0x31,0xd9,0x40,0xb,0x6d,0x10,0x65,0xd9,0x40,5,0x12,0x68,0x61,
+0x72,0x51,2,0x61,0x6c,0x63,0xa2,0x4c,0x72,1,0x65,0x2a,0x69,0x11,0x6e,0x74,
+0x7f,0x16,0x70,0x65,0x6e,0x64,0x65,0x64,0x63,0x1f,0x6f,0x6e,0x63,0x61,0x74,0x65,
+0x6e,0x61,0x74,0x69,0x6f,0x6e,0x6d,0x61,0x72,0x6b,0x9f,0x10,0x74,2,0x73,0x2c,
+0x74,0x30,0x77,0x10,0x73,0x77,0x11,0x79,0x6e,0x75,0x12,0x65,0x72,0x6e,1,0x73,
+0x38,0x77,0x18,0x68,0x69,0x74,0x65,0x73,0x70,0x61,0x63,0x65,0x77,0x14,0x79,0x6e,
+0x74,0x61,0x78,0x75,0x10,0x6d,0x9f,1,0x6d,0x3c,0x75,0x1a,0x6f,0x74,0x61,0x74,
+0x69,0x6f,0x6e,0x6d,0x61,0x72,0x6b,0x53,0x12,0x61,0x72,0x6b,0x53,0x66,0xc1,0xf8,
+0x69,0xc1,0x3c,0x69,0xa2,0x6f,0x6a,0xa4,9,0x6c,4,0x62,0xc3,8,0x63,0x8c,
+0x65,0x98,0x69,0xa2,0x56,0x6f,2,0x65,0x4b,0x67,0x4c,0x77,0x11,0x65,0x72,0x4c,
+0x13,0x63,0x61,0x73,0x65,0x4c,0x16,0x6d,0x61,0x70,0x70,0x69,0x6e,0x67,0xd9,0x40,
+4,0x11,0x69,0x63,0x1f,0x61,0x6c,0x6f,0x72,0x64,0x65,0x72,0x65,0x78,0x63,0x65,
+0x70,0x74,0x69,0x6f,0x6e,0x4b,0xd8,0x40,4,0x11,0x63,0x63,0xc3,0x10,0x18,0x61,
+0x64,0x63,0x61,0x6e,0x6f,0x6e,0x69,0x63,0x1f,0x61,0x6c,0x63,0x6f,0x6d,0x62,0x69,
+0x6e,0x69,0x6e,0x67,0x63,0x6c,0x61,0x73,0x73,0xc3,0x10,0x16,0x6e,0x65,0x62,0x72,
+0x65,0x61,0x6b,0xc3,8,2,0x64,0x4a,0x6e,0xa2,0x5b,0x73,1,0x63,0xd9,0x40,
+3,0x6f,0x16,0x63,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0xd9,0x40,3,2,0x63,0x80,
+0x65,0x90,0x73,0x40,1,0x62,0x52,0x74,0x46,1,0x61,0x40,0x72,0x1c,0x69,0x6e,
+0x61,0x72,0x79,0x6f,0x70,0x65,0x72,0x61,0x74,0x6f,0x72,0x47,0x11,0x72,0x74,0x41,
+0x44,0x1c,0x69,0x6e,0x61,0x72,0x79,0x6f,0x70,0x65,0x72,0x61,0x74,0x6f,0x72,0x45,
+0x3e,0x16,0x6f,0x6e,0x74,0x69,0x6e,0x75,0x65,0x3f,0x10,0x6f,0x42,0x16,0x67,0x72,
+0x61,0x70,0x68,0x69,0x63,0x43,2,0x64,0x2e,0x70,0x86,0x73,0x10,0x63,0xc3,0x17,
+0x11,0x69,0x63,1,0x70,0x46,0x73,0x1e,0x79,0x6c,0x6c,0x61,0x62,0x69,0x63,0x63,
+0x61,0x74,0x65,0x67,0x6f,0x72,0x79,0xc3,0x17,0x10,0x6f,0x1f,0x73,0x69,0x74,0x69,
+0x6f,0x6e,0x61,0x6c,0x63,0x61,0x74,0x65,0x67,0x6f,0x72,0x79,0xc3,0x16,0x10,0x63,
+0xc3,0x16,2,0x67,0xc3,6,0x6f,0x26,0x74,0xc3,7,0x11,0x69,0x6e,1,0x63,
+0x4a,0x69,0x11,0x6e,0x67,1,0x67,0x2e,0x74,0x12,0x79,0x70,0x65,0xc3,7,0x13,
+0x72,0x6f,0x75,0x70,0xc3,6,0x48,0x15,0x6f,0x6e,0x74,0x72,0x6f,0x6c,0x49,0x66,
+0x86,0x67,0xa2,0x4a,0x68,3,0x61,0x36,0x65,0x58,0x73,0x68,0x79,0x13,0x70,0x68,
+0x65,0x6e,0x3d,0x1f,0x6e,0x67,0x75,0x6c,0x73,0x79,0x6c,0x6c,0x61,0x62,0x6c,0x65,
+0x74,0x79,0x70,0x65,0xc3,0xb,0x10,0x78,0x3a,0x14,0x64,0x69,0x67,0x69,0x74,0x3b,
+0x10,0x74,0xc3,0xb,0x16,0x75,0x6c,0x6c,0x63,0x6f,0x6d,0x70,0x1f,0x6f,0x73,0x69,
+0x74,0x69,0x6f,0x6e,0x65,0x78,0x63,0x6c,0x75,0x73,0x69,0x6f,0x6e,0x33,2,0x63,
+0xa2,0x44,0x65,0xa2,0x4b,0x72,3,0x61,0x34,0x62,0x84,0x65,0x8a,0x6c,0x12,0x69,
+0x6e,0x6b,0x39,0x11,0x70,0x68,0x7c,0x12,0x65,0x6d,0x65,3,0x62,0x5e,0x63,0x30,
+0x65,0x48,0x6c,0x12,0x69,0x6e,0x6b,0x39,0x1a,0x6c,0x75,0x73,0x74,0x65,0x72,0x62,
+0x72,0x65,0x61,0x6b,0xc3,0x12,0x14,0x78,0x74,0x65,0x6e,0x64,0x37,0x12,0x61,0x73,
+0x65,0x35,0x11,0x78,0x74,0x37,0xc2,5,1,0x62,0xc3,0x12,0x6d,0xd9,0x20,0,
+0x1c,0x6e,0x65,0x72,0x61,0x6c,0x63,0x61,0x74,0x65,0x67,0x6f,0x72,0x79,0xc2,5,
+0x13,0x6d,0x61,0x73,0x6b,0xd9,0x20,0,0x61,0xa2,0x90,0x62,0xa2,0xbe,0x63,0xa4,
+0x30,0x64,0xa4,0xfd,0x65,5,0x6d,0x63,0x6d,0x6e,0x70,0xa2,0x59,0x78,0x10,0x74,
+0x30,1,0x65,0x2c,0x70,0x12,0x69,0x63,0x74,0xa1,0x12,0x6e,0x64,0x65,1,0x64,
+0x24,0x72,0x31,0x1b,0x70,0x69,0x63,0x74,0x6f,0x67,0x72,0x61,0x70,0x68,0x69,0x63,
+0xa1,0x10,0x6f,1,0x64,0x97,0x6a,0x10,0x69,0x92,2,0x63,0x40,0x6d,0x50,0x70,
+0x1a,0x72,0x65,0x73,0x65,0x6e,0x74,0x61,0x74,0x69,0x6f,0x6e,0x95,0x17,0x6f,0x6d,
+0x70,0x6f,0x6e,0x65,0x6e,0x74,0x9b,0x16,0x6f,0x64,0x69,0x66,0x69,0x65,0x72,0x96,
+0x13,0x62,0x61,0x73,0x65,0x99,0x12,0x72,0x65,0x73,0x95,0x61,0x30,0x62,0x4e,0x63,
+0x12,0x6f,0x6d,0x70,0x9b,0xc2,4,0x1b,0x73,0x74,0x61,0x73,0x69,0x61,0x6e,0x77,
+0x69,0x64,0x74,0x68,0xc3,4,0x12,0x61,0x73,0x65,0x99,3,0x67,0x44,0x68,0x4a,
+0x6c,0x4e,0x73,0x1a,0x63,0x69,0x69,0x68,0x65,0x78,0x64,0x69,0x67,0x69,0x74,0x23,
+0x10,0x65,0xd9,0x40,0,0x11,0x65,0x78,0x23,1,0x6e,0x38,0x70,0x11,0x68,0x61,
+0x20,0x14,0x62,0x65,0x74,0x69,0x63,0x21,0x11,0x75,0x6d,0x79,4,0x63,0xc3,0,
+0x69,0x3e,0x6c,0xa2,0x57,0x6d,0xa2,0x64,0x70,1,0x62,0xd9,0x40,0xd,0x74,0xc3,
+0x15,0x11,0x64,0x69,2,0x63,0x54,0x6d,0x74,0x70,0x1b,0x61,0x69,0x72,0x65,0x64,
+0x62,0x72,0x61,0x63,0x6b,0x65,0x74,0xd8,0x40,0xd,0x13,0x74,0x79,0x70,0x65,0xc3,
+0x15,0x24,1,0x6c,0x30,0x6f,0x14,0x6e,0x74,0x72,0x6f,0x6c,0x25,0x12,0x61,0x73,
+0x73,0xc3,0,0x26,0x14,0x69,0x72,0x72,0x6f,0x72,1,0x65,0x38,0x69,0x16,0x6e,
+0x67,0x67,0x6c,0x79,0x70,0x68,0xd9,0x40,1,0x10,0x64,0x27,2,0x61,0x32,0x6b,
+0xc3,1,0x6f,0x11,0x63,0x6b,0xc3,1,0x11,0x6e,0x6b,0x7b,0x10,0x67,0xd9,0x40,
+1,6,0x68,0x7c,0x68,0x54,0x69,0x85,0x6f,0xa2,0x6f,0x77,4,0x63,0x30,0x6b,
+0x36,0x6c,0x87,0x74,0x8b,0x75,0x89,1,0x66,0x8d,0x6d,0x8f,0x11,0x63,0x66,0x91,
+0x18,0x61,0x6e,0x67,0x65,0x73,0x77,0x68,0x65,0x6e,4,0x63,0x44,0x6c,0x6c,0x6e,
+0x7e,0x74,0x98,0x75,0x18,0x70,0x70,0x65,0x72,0x63,0x61,0x73,0x65,0x64,0x89,0x12,
+0x61,0x73,0x65,1,0x66,0x30,0x6d,0x14,0x61,0x70,0x70,0x65,0x64,0x8f,0x14,0x6f,
+0x6c,0x64,0x65,0x64,0x8d,0x18,0x6f,0x77,0x65,0x72,0x63,0x61,0x73,0x65,0x64,0x87,
+0x1c,0x66,0x6b,0x63,0x63,0x61,0x73,0x65,0x66,0x6f,0x6c,0x64,0x65,0x64,0x91,0x18,
+0x69,0x74,0x6c,0x65,0x63,0x61,0x73,0x65,0x64,0x8b,0x13,0x6d,0x70,0x65,0x78,0x33,
+0x61,0x2e,0x63,0xa2,0x48,0x66,0xd9,0x40,2,1,0x6e,0x72,0x73,0x10,0x65,3,
+0x64,0x83,0x66,0x3a,0x69,0x4a,0x73,0x17,0x65,0x6e,0x73,0x69,0x74,0x69,0x76,0x65,
+0x65,0x15,0x6f,0x6c,0x64,0x69,0x6e,0x67,0xd9,0x40,2,0x17,0x67,0x6e,0x6f,0x72,
+0x61,0x62,0x6c,0x65,0x85,0x13,0x6f,0x6e,0x69,0x63,0x1f,0x61,0x6c,0x63,0x6f,0x6d,
+0x62,0x69,0x6e,0x69,0x6e,0x67,0x63,0x6c,0x61,0x73,0x73,0xc3,2,0x10,0x63,0xc3,
+2,3,0x61,0x30,0x65,0x34,0x69,0xa2,0x41,0x74,0xc3,3,0x11,0x73,0x68,0x29,
+2,0x63,0x3a,0x66,0x58,0x70,0x2c,0x16,0x72,0x65,0x63,0x61,0x74,0x65,0x64,0x2d,
+0x1d,0x6f,0x6d,0x70,0x6f,0x73,0x69,0x74,0x69,0x6f,0x6e,0x74,0x79,0x70,0x65,0xc3,
+3,0x15,0x61,0x75,0x6c,0x74,0x69,0x67,0x1f,0x6e,0x6f,0x72,0x61,0x62,0x6c,0x65,
+0x63,0x6f,0x64,0x65,0x70,0x6f,0x69,0x6e,0x74,0x2b,0x2a,0x10,0x61,0x2e,0x15,0x63,
+0x72,0x69,0x74,0x69,0x63,0x2f,3,0x66,0x34,0x6e,0x3e,0x74,0x42,0x79,0x22,0x11,
+0x65,0x73,0x23,0x20,0x13,0x61,0x6c,0x73,0x65,0x21,0x20,0x10,0x6f,0x21,0x22,0x12,
+0x72,0x75,0x65,0x23,0xb,0x6b,0x5b,0x6f,0x23,0x6f,0x3c,0x72,0x4c,0x76,1,0x69,
+0x24,0x72,0x33,0x13,0x72,0x61,0x6d,0x61,0x33,0x10,0x76,0x22,0x14,0x65,0x72,0x6c,
+0x61,0x79,0x23,0xa2,0xe2,0x13,0x69,0x67,0x68,0x74,0xa3,0xe2,0x6b,0x58,0x6c,0x74,
+0x6e,3,0x6b,0x2f,0x6f,0x30,0x72,0x21,0x75,0x12,0x6b,0x74,0x61,0x2f,0x19,0x74,
+0x72,0x65,0x6f,0x72,0x64,0x65,0x72,0x65,0x64,0x21,1,0x61,0x24,0x76,0x31,0x18,
+0x6e,0x61,0x76,0x6f,0x69,0x63,0x69,0x6e,0x67,0x31,0xa2,0xe0,0x12,0x65,0x66,0x74,
+0xa3,0xe0,0x64,0x45,0x64,0x4e,0x68,0x88,0x69,1,0x6f,0x26,0x73,0xa3,0xf0,0x1a,
+0x74,0x61,0x73,0x75,0x62,0x73,0x63,0x72,0x69,0x70,0x74,0xa3,0xf0,2,0x61,0xa3,
+0xea,0x62,0xa3,0xe9,0x6f,0x13,0x75,0x62,0x6c,0x65,1,0x61,0x30,0x62,0x13,0x65,
+0x6c,0x6f,0x77,0xa3,0xe9,0x13,0x62,0x6f,0x76,0x65,0xa3,0xea,0x12,0x61,0x6e,0x72,
+0x2c,0x15,0x65,0x61,0x64,0x69,0x6e,0x67,0x2d,0x61,0xa2,0x7b,0x62,0xa2,0xd4,0x63,
+0x11,0x63,0x63,4,0x31,0x3c,0x32,0xa2,0x42,0x33,0xa2,0x56,0x38,0xa2,0x64,0x39,
+0x10,0x31,0xa3,0x5b,9,0x35,0xa,0x35,0x3f,0x36,0x41,0x37,0x43,0x38,0x45,0x39,
+0x47,0x30,0x30,0x31,0x3c,0x32,0x42,0x33,0x4e,0x34,0x3d,0x34,1,0x33,0xa3,0x67,
+0x37,0xa3,0x6b,0x36,0x10,0x38,0xa3,0x76,0x38,1,0x32,0xa3,0x7a,0x39,0xa3,0x81,
+0x3a,2,0x30,0xa3,0x82,0x32,0xa3,0x84,0x33,0xa3,0x85,9,0x35,0xa,0x35,0x53,
+0x36,0x55,0x37,0x57,0x38,0x59,0x39,0x5b,0x30,0x49,0x31,0x4b,0x32,0x4d,0x33,0x4f,
+0x34,0x51,6,0x33,8,0x33,0x63,0x34,0x65,0x35,0x67,0x36,0x69,0x30,0x5d,0x31,
+0x5f,0x32,0x61,0x10,0x34,0xa3,0x54,0xa2,0xe6,3,0x62,0xa0,0x6c,0xa3,0xe4,0x72,
+0xa3,0xe8,0x74,2,0x61,0x74,0x62,0x7c,0x74,0x14,0x61,0x63,0x68,0x65,0x64,1,
+0x61,0x3e,0x62,0x13,0x65,0x6c,0x6f,0x77,0xa2,0xca,0x13,0x6c,0x65,0x66,0x74,0xa3,
+0xc8,0x13,0x62,0x6f,0x76,0x65,0xa2,0xd6,0x14,0x72,0x69,0x67,0x68,0x74,0xa3,0xd8,
+0xa2,0xd6,0x10,0x72,0xa3,0xd8,0xa2,0xca,0x10,0x6c,0xa3,0xc8,0x12,0x6f,0x76,0x65,
+0xa2,0xe6,1,0x6c,0x30,0x72,0x13,0x69,0x67,0x68,0x74,0xa3,0xe8,0x12,0x65,0x66,
+0x74,0xa3,0xe4,0xa2,0xdc,2,0x65,0x2c,0x6c,0xa3,0xda,0x72,0xa3,0xde,0x12,0x6c,
+0x6f,0x77,0xa2,0xdc,1,0x6c,0x30,0x72,0x13,0x69,0x67,0x68,0x74,0xa3,0xde,0x12,
+0x65,0x66,0x74,0xa3,0xda,0xb,0x6e,0xc0,0xca,0x72,0x5f,0x72,0x46,0x73,0xa2,0x48,
+0x77,1,0x68,0x24,0x73,0x33,0x17,0x69,0x74,0x65,0x73,0x70,0x61,0x63,0x65,0x33,
+0x22,1,0x69,0x30,0x6c,2,0x65,0x3d,0x69,0x4b,0x6f,0x3f,0x18,0x67,0x68,0x74,
+0x74,0x6f,0x6c,0x65,0x66,0x74,0x22,2,0x65,0x38,0x69,0x48,0x6f,0x16,0x76,0x65,
+0x72,0x72,0x69,0x64,0x65,0x3f,0x17,0x6d,0x62,0x65,0x64,0x64,0x69,0x6e,0x67,0x3d,
+0x15,0x73,0x6f,0x6c,0x61,0x74,0x65,0x4b,0x30,0x1e,0x65,0x67,0x6d,0x65,0x6e,0x74,
+0x73,0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0x31,0x6e,0xa2,0x41,0x6f,0xa2,0x53,
+0x70,2,0x61,0x66,0x64,0x86,0x6f,0x1b,0x70,0x64,0x69,0x72,0x65,0x63,0x74,0x69,
+0x6f,0x6e,0x61,0x6c,1,0x66,0x32,0x69,0x15,0x73,0x6f,0x6c,0x61,0x74,0x65,0x4d,
+0x14,0x6f,0x72,0x6d,0x61,0x74,0x41,0x1f,0x72,0x61,0x67,0x72,0x61,0x70,0x68,0x73,
+0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0x2f,1,0x66,0x41,0x69,0x4d,1,0x6f,
+0x28,0x73,0x10,0x6d,0x43,0x1b,0x6e,0x73,0x70,0x61,0x63,0x69,0x6e,0x67,0x6d,0x61,
+0x72,0x6b,0x43,1,0x6e,0x35,0x74,0x19,0x68,0x65,0x72,0x6e,0x65,0x75,0x74,0x72,
+0x61,0x6c,0x35,0x65,0x88,0x65,0x98,0x66,0xa2,0x6a,0x6c,0x20,1,0x65,0x30,0x72,
+2,0x65,0x37,0x69,0x49,0x6f,0x39,0x18,0x66,0x74,0x74,0x6f,0x72,0x69,0x67,0x68,
+0x74,0x20,2,0x65,0x38,0x69,0x48,0x6f,0x16,0x76,0x65,0x72,0x72,0x69,0x64,0x65,
+0x39,0x17,0x6d,0x62,0x65,0x64,0x64,0x69,0x6e,0x67,0x37,0x15,0x73,0x6f,0x6c,0x61,
+0x74,0x65,0x49,3,0x6e,0x25,0x73,0x27,0x74,0x29,0x75,0x15,0x72,0x6f,0x70,0x65,
+0x61,0x6e,2,0x6e,0x3c,0x73,0x46,0x74,0x18,0x65,0x72,0x6d,0x69,0x6e,0x61,0x74,
+0x6f,0x72,0x29,0x14,0x75,0x6d,0x62,0x65,0x72,0x25,0x17,0x65,0x70,0x61,0x72,0x61,
+0x74,0x6f,0x72,0x27,1,0x69,0x28,0x73,0x10,0x69,0x47,0x1f,0x72,0x73,0x74,0x73,
+0x74,0x72,0x6f,0x6e,0x67,0x69,0x73,0x6f,0x6c,0x61,0x74,0x65,0x47,0x61,0x4e,0x62,
+0x84,0x63,1,0x6f,0x24,0x73,0x2d,0x1c,0x6d,0x6d,0x6f,0x6e,0x73,0x65,0x70,0x61,
+0x72,0x61,0x74,0x6f,0x72,0x2d,2,0x6c,0x3b,0x6e,0x2b,0x72,0x13,0x61,0x62,0x69,
+0x63,1,0x6c,0x30,0x6e,0x14,0x75,0x6d,0x62,0x65,0x72,0x2b,0x14,0x65,0x74,0x74,
+0x65,0x72,0x3b,0x2e,1,0x6e,0x45,0x6f,0x1c,0x75,0x6e,0x64,0x61,0x72,0x79,0x6e,
+0x65,0x75,0x74,0x72,0x61,0x6c,0x45,0,0x16,0x6d,0xc8,0xc8,0x74,0xc1,0xee,0x77,
+0x6a,0x77,0x48,0x79,0x70,0x7a,0x1d,0x61,0x6e,0x61,0x62,0x61,0x7a,0x61,0x72,0x73,
+0x71,0x75,0x61,0x72,0x65,0xa5,0x18,0x10,0x61,1,0x6e,0x36,0x72,0x16,0x61,0x6e,
+0x67,0x63,0x69,0x74,0x69,0xa3,0xfc,0x12,0x63,0x68,0x6f,0xa5,0x2c,1,0x65,0x88,
+0x69,2,0x6a,0x3c,0x72,0x68,0x73,0x17,0x79,0x6c,0x6c,0x61,0x62,0x6c,0x65,0x73,
+0xa3,0x48,0x12,0x69,0x6e,0x67,0xa2,0x74,0x1e,0x68,0x65,0x78,0x61,0x67,0x72,0x61,
+0x6d,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0x74,0x16,0x61,0x64,0x69,0x63,0x61,
+0x6c,0x73,0xa3,0x49,0x13,0x7a,0x69,0x64,0x69,0xa5,0x34,0x74,0xa2,0x59,0x75,0xa4,
+0x35,0x76,2,0x61,0x36,0x65,0x7a,0x73,0xa2,0x6c,0x12,0x73,0x75,0x70,0xa3,0x7d,
+1,0x69,0xa3,0x9f,0x72,0x1e,0x69,0x61,0x74,0x69,0x6f,0x6e,0x73,0x65,0x6c,0x65,
+0x63,0x74,0x6f,0x72,0x73,0xa2,0x6c,0x19,0x73,0x75,0x70,0x70,0x6c,0x65,0x6d,0x65,
+0x6e,0x74,0xa3,0x7d,1,0x64,0x3c,0x72,0x19,0x74,0x69,0x63,0x61,0x6c,0x66,0x6f,
+0x72,0x6d,0x73,0xa3,0x91,0x14,0x69,0x63,0x65,0x78,0x74,0xa2,0xaf,0x16,0x65,0x6e,
+0x73,0x69,0x6f,0x6e,0x73,0xa3,0xaf,4,0x61,0x68,0x65,0xa2,0xad,0x68,0xa2,0xb0,
+0x69,0xa2,0xb8,0x72,0x1c,0x61,0x6e,0x73,0x70,0x6f,0x72,0x74,0x61,0x6e,0x64,0x6d,
+0x61,0x70,0xa2,0xcf,0x16,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0xcf,4,0x67,
+0x7e,0x69,0xa2,0x41,0x6b,0xa2,0x6a,0x6d,0xa2,0x6c,0x6e,0x12,0x67,0x75,0x74,0xa4,
+0x10,1,0x63,0x40,0x73,0x11,0x75,0x70,0xa4,0x33,0x16,0x70,0x6c,0x65,0x6d,0x65,
+0x6e,0x74,0xa5,0x33,0x18,0x6f,0x6d,0x70,0x6f,0x6e,0x65,0x6e,0x74,0x73,0xa5,0x11,
+2,0x61,0x2a,0x62,0x32,0x73,0xa3,0x60,0x12,0x6c,0x6f,0x67,0xa3,0x62,0x13,0x61,
+0x6e,0x77,0x61,0xa3,0x65,3,0x6c,0x52,0x74,0x56,0x76,0x5e,0x78,0x16,0x75,0x61,
+0x6e,0x6a,0x69,0x6e,0x67,0xa2,0x7c,0x16,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,
+0x7c,0x10,0x65,0xa3,0x70,0x12,0x68,0x61,0x6d,0xa3,0xae,0x12,0x69,0x65,0x74,0xa3,
+0xb7,0x11,0x72,0x69,0xa3,0xdc,0x11,0x69,0x6c,0x48,0x12,0x73,0x75,0x70,0xa4,0x2b,
+0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa5,0x2b,0x13,0x6c,0x75,0x67,0x75,0x4b,
+0x10,0x61,1,0x61,0x24,0x69,0x53,0x11,0x6e,0x61,0x3d,2,0x62,0x34,0x66,0x3c,
+0x72,0x13,0x68,0x75,0x74,0x61,0xa3,0xfb,0x13,0x65,0x74,0x61,0x6e,0x57,0x14,0x69,
+0x6e,0x61,0x67,0x68,0xa3,0x90,2,0x63,0x82,0x67,0x92,0x6e,0x1f,0x69,0x66,0x69,
+0x65,0x64,0x63,0x61,0x6e,0x61,0x64,0x69,0x61,0x6e,0x61,0x62,0x6f,0x1f,0x72,0x69,
+0x67,0x69,0x6e,0x61,0x6c,0x73,0x79,0x6c,0x6c,0x61,0x62,0x69,0x63,0x73,0x62,0x17,
+0x65,0x78,0x74,0x65,0x6e,0x64,0x65,0x64,0xa3,0xad,0x11,0x61,0x73,0x62,0x12,0x65,
+0x78,0x74,0xa3,0xad,0x15,0x61,0x72,0x69,0x74,0x69,0x63,0xa3,0x78,0x70,0xc3,0x4b,
+0x70,0xa6,0x61,0x72,0xa8,0x1d,0x73,7,0x6f,0xc1,0xbe,0x6f,0xa2,0x69,0x70,0xa2,
+0x85,0x75,0xa2,0xa4,0x79,2,0x6c,0x50,0x6d,0x62,0x72,0x12,0x69,0x61,0x63,0x3a,
+0x12,0x73,0x75,0x70,0xa4,0x17,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa5,0x17,
+0x17,0x6f,0x74,0x69,0x6e,0x61,0x67,0x72,0x69,0xa3,0x8f,0x13,0x62,0x6f,0x6c,0x73,
+1,0x61,0x4c,0x66,0x10,0x6f,0x1f,0x72,0x6c,0x65,0x67,0x61,0x63,0x79,0x63,0x6f,
+0x6d,0x70,0x75,0x74,0x69,0x6e,0x67,0xa5,0x32,0x1f,0x6e,0x64,0x70,0x69,0x63,0x74,
+0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0x65,0x78,0x74,1,0x61,0xa5,0x2a,0x65,0x14,
+0x6e,0x64,0x65,0x64,0x61,0xa5,0x2a,2,0x67,0x34,0x72,0x3e,0x79,0x13,0x6f,0x6d,
+0x62,0x6f,0xa5,0x16,0x13,0x64,0x69,0x61,0x6e,0xa5,0x23,0x17,0x61,0x73,0x6f,0x6d,
+0x70,0x65,0x6e,0x67,0xa3,0xda,1,0x61,0x32,0x65,0x14,0x63,0x69,0x61,0x6c,0x73,
+0xa3,0x56,0x12,0x63,0x69,0x6e,0x1f,0x67,0x6d,0x6f,0x64,0x69,0x66,0x69,0x65,0x72,
+0x6c,0x65,0x74,0x74,0x65,0x72,0x73,0x2d,2,0x6e,0x48,0x70,0x76,0x74,0x1d,0x74,
+0x6f,0x6e,0x73,0x69,0x67,0x6e,0x77,0x72,0x69,0x74,0x69,0x6e,0x67,0xa5,6,0x15,
+0x64,0x61,0x6e,0x65,0x73,0x65,0xa2,0x9b,0x12,0x73,0x75,0x70,0xa2,0xdb,0x16,0x70,
+0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0xdb,4,0x61,0xa2,0xa8,0x65,0x5c,0x6d,0x9e,
+0x70,0xa2,0x4b,0x73,0x13,0x79,0x6d,0x62,0x6f,0x1f,0x6c,0x73,0x61,0x6e,0x64,0x70,
+0x69,0x63,0x74,0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0xa5,5,0x10,0x72,1,0x61,
+0x4e,0x73,0x12,0x63,0x72,0x69,0x1f,0x70,0x74,0x73,0x61,0x6e,0x64,0x73,0x75,0x62,
+0x73,0x63,0x72,0x69,0x70,0x74,0x73,0x73,0x14,0x6e,0x64,0x73,0x75,0x62,0x73,0x1b,
+0x61,0x74,0x68,0x6f,0x70,0x65,0x72,0x61,0x74,0x6f,0x72,0x73,0xa3,0x6a,1,0x6c,
+0x40,0x75,1,0x61,0x6e,0x6e,0x17,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xa3,
+0x8e,0x15,0x65,0x6d,0x65,0x6e,0x74,0x61,1,0x6c,0x50,0x72,0x1e,0x79,0x70,0x72,
+0x69,0x76,0x61,0x74,0x65,0x75,0x73,0x65,0x61,0x72,0x65,0x61,1,0x61,0xa3,0x6d,
+0x62,0xa3,0x6e,3,0x61,0x5c,0x6d,0x78,0x70,0xa2,0x41,0x73,0x13,0x79,0x6d,0x62,
+0x6f,0x1f,0x6c,0x73,0x61,0x6e,0x64,0x70,0x69,0x63,0x74,0x6f,0x67,0x72,0x61,0x70,
+0x68,0x73,0xa5,5,0x14,0x72,0x72,0x6f,0x77,0x73,2,0x61,0xa3,0x67,0x62,0xa3,
+0x68,0x63,0xa3,0xfa,0x13,0x61,0x74,0x68,0x65,0x1f,0x6d,0x61,0x74,0x69,0x63,0x61,
+0x6c,0x6f,0x70,0x65,0x72,0x61,0x74,0x6f,0x72,0x73,0xa3,0x6a,0x19,0x75,0x6e,0x63,
+0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xa3,0x8e,0x61,0x88,0x68,0xa2,0x48,0x69,0xa2,
+0x71,0x6d,0x12,0x61,0x6c,0x6c,1,0x66,0x46,0x6b,0x15,0x61,0x6e,0x61,0x65,0x78,
+0x74,0xa4,0x29,0x15,0x65,0x6e,0x73,0x69,0x6f,0x6e,0xa5,0x29,0x12,0x6f,0x72,0x6d,
+1,0x73,0xa3,0x54,0x76,0x16,0x61,0x72,0x69,0x61,0x6e,0x74,0x73,0xa3,0x54,1,
+0x6d,0x36,0x75,0x16,0x72,0x61,0x73,0x68,0x74,0x72,0x61,0xa3,0xa1,0x15,0x61,0x72,
+0x69,0x74,0x61,0x6e,0xa3,0xac,1,0x61,0x52,0x6f,0x13,0x72,0x74,0x68,0x61,0x1f,
+0x6e,0x64,0x66,0x6f,0x72,0x6d,0x61,0x74,0x63,0x6f,0x6e,0x74,0x72,0x6f,0x6c,0x73,
+0xa3,0xf7,1,0x72,0x2e,0x76,0x12,0x69,0x61,0x6e,0xa3,0x79,0x12,0x61,0x64,0x61,
+0xa3,0xd9,1,0x64,0x50,0x6e,0x13,0x68,0x61,0x6c,0x61,0x50,0x1d,0x61,0x72,0x63,
+0x68,0x61,0x69,0x63,0x6e,0x75,0x6d,0x62,0x65,0x72,0x73,0xa3,0xf9,0x13,0x64,0x68,
+0x61,0x6d,0xa3,0xf8,5,0x72,0x35,0x72,0x44,0x73,0x64,0x75,1,0x61,0xa3,0x4e,
+0x6e,0x17,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x71,0x17,0x69,0x76,0x61,0x74,
+0x65,0x75,0x73,0x65,0xa2,0x4e,0x13,0x61,0x72,0x65,0x61,0xa3,0x4e,0x1b,0x61,0x6c,
+0x74,0x65,0x72,0x70,0x61,0x68,0x6c,0x61,0x76,0x69,0xa3,0xf6,0x61,0x40,0x68,0x82,
+0x6c,0x19,0x61,0x79,0x69,0x6e,0x67,0x63,0x61,0x72,0x64,0x73,0xa3,0xcc,2,0x68,
+0x38,0x6c,0x4a,0x75,0x15,0x63,0x69,0x6e,0x68,0x61,0x75,0xa3,0xf5,0x17,0x61,0x77,
+0x68,0x68,0x6d,0x6f,0x6e,0x67,0xa3,0xf3,0x15,0x6d,0x79,0x72,0x65,0x6e,0x65,0xa3,
+0xf4,1,0x61,0x8e,0x6f,1,0x65,0x74,0x6e,0x16,0x65,0x74,0x69,0x63,0x65,0x78,
+0x74,0xa2,0x72,1,0x65,0x2c,0x73,0x11,0x75,0x70,0xa3,0x8d,0x15,0x6e,0x73,0x69,
+0x6f,0x6e,0x73,0xa2,0x72,0x19,0x73,0x75,0x70,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,
+0xa3,0x8d,0x15,0x6e,0x69,0x63,0x69,0x61,0x6e,0xa3,0x97,1,0x67,0x3e,0x69,0x13,
+0x73,0x74,0x6f,0x73,0xa2,0xa6,0x13,0x64,0x69,0x73,0x63,0xa3,0xa6,0x12,0x73,0x70,
+0x61,0xa3,0x96,1,0x65,0x5c,0x75,1,0x6d,0x2a,0x6e,0x11,0x69,0x63,0x67,0x10,
+0x69,0xa2,0xc0,0x1d,0x6e,0x75,0x6d,0x65,0x72,0x61,0x6c,0x73,0x79,0x6d,0x62,0x6f,
+0x6c,0x73,0xa3,0xc0,0x13,0x6a,0x61,0x6e,0x67,0xa3,0xa3,0x6d,0xa2,0xe6,0x6e,0xa8,
+0x19,0x6f,6,0x70,0x63,0x70,0x56,0x72,0x8a,0x73,0xa2,0x4c,0x74,0x10,0x74,0x1f,
+0x6f,0x6d,0x61,0x6e,0x73,0x69,0x79,0x61,0x71,0x6e,0x75,0x6d,0x62,0x65,0x72,0x73,
+0xa5,0x28,0x18,0x74,0x69,0x63,0x61,0x6c,0x63,0x68,0x61,0x72,0x1f,0x61,0x63,0x74,
+0x65,0x72,0x72,0x65,0x63,0x6f,0x67,0x6e,0x69,0x74,0x69,0x6f,0x6e,0x85,1,0x69,
+0x46,0x6e,0x1e,0x61,0x6d,0x65,0x6e,0x74,0x61,0x6c,0x64,0x69,0x6e,0x67,0x62,0x61,
+0x74,0x73,0xa3,0xf2,0x11,0x79,0x61,0x47,1,0x61,0x30,0x6d,0x13,0x61,0x6e,0x79,
+0x61,0xa3,0x7a,0x11,0x67,0x65,0xa5,0xf,0x63,0xa2,0x71,0x67,0xa2,0x71,0x6c,1,
+0x63,0xa2,0x62,0x64,5,0x70,0x38,0x70,0x36,0x73,0x56,0x74,0x14,0x75,0x72,0x6b,
+0x69,0x63,0xa3,0xbf,0x11,0x65,0x72,1,0x6d,0x2e,0x73,0x12,0x69,0x61,0x6e,0xa3,
+0x8c,0x11,0x69,0x63,0xa3,0xf1,0x10,0x6f,1,0x67,0x3a,0x75,0x18,0x74,0x68,0x61,
+0x72,0x61,0x62,0x69,0x61,0x6e,0xa3,0xbb,0x13,0x64,0x69,0x61,0x6e,0xa5,0x22,0x68,
+0x42,0x69,0x54,0x6e,0x1a,0x6f,0x72,0x74,0x68,0x61,0x72,0x61,0x62,0x69,0x61,0x6e,
+0xa3,0xf0,0x17,0x75,0x6e,0x67,0x61,0x72,0x69,0x61,0x6e,0xa5,4,0x14,0x74,0x61,
+0x6c,0x69,0x63,0xa3,0x58,0x13,0x68,0x69,0x6b,0x69,0xa3,0x9d,0x10,0x72,0x85,0x12,
+0x68,0x61,0x6d,0x65,6,0x6f,0x86,0x6f,0x6c,0x72,0xa2,0x61,0x75,0xa2,0x62,0x79,
+0x14,0x61,0x6e,0x6d,0x61,0x72,0x58,0x12,0x65,0x78,0x74,2,0x61,0xa3,0xb6,0x62,
+0xa3,0xee,0x65,0x13,0x6e,0x64,0x65,0x64,1,0x61,0xa3,0xb6,0x62,0xa3,0xee,1,
+0x64,0x52,0x6e,0x15,0x67,0x6f,0x6c,0x69,0x61,0x6e,0x6a,0x12,0x73,0x75,0x70,0xa4,
+0xd,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa5,0xd,0x10,0x69,0xa2,0xec,0x13,
+0x66,0x69,0x65,0x72,1,0x6c,0x3c,0x74,0x19,0x6f,0x6e,0x65,0x6c,0x65,0x74,0x74,
+0x65,0x72,0x73,0xa3,0x8a,0x15,0x65,0x74,0x74,0x65,0x72,0x73,0x2d,0x10,0x6f,0xa3,
+0xed,1,0x6c,0x44,0x73,0x11,0x69,0x63,0xa2,0x5c,0x18,0x61,0x6c,0x73,0x79,0x6d,
+0x62,0x6f,0x6c,0x73,0xa3,0x5c,0x13,0x74,0x61,0x6e,0x69,0xa5,3,0x61,0xa2,0x9b,
+0x65,0xa4,0x4c,0x69,1,0x61,0xa2,0x8f,0x73,0x10,0x63,5,0x70,0x18,0x70,0xa2,
+0x71,0x73,0x36,0x74,0x17,0x65,0x63,0x68,0x6e,0x69,0x63,0x61,0x6c,0x81,0x15,0x79,
+0x6d,0x62,0x6f,0x6c,0x73,0x8f,0x61,0xa2,0x66,0x65,0x46,0x6d,0x19,0x61,0x74,0x68,
+0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,1,0x61,0xa3,0x66,0x62,0xa3,0x69,0x17,0x6c,
+0x6c,0x61,0x6e,0x65,0x6f,0x75,0x73,2,0x6d,0x3a,0x73,0x6c,0x74,0x17,0x65,0x63,
+0x68,0x6e,0x69,0x63,0x61,0x6c,0x81,0x11,0x61,0x74,0x1f,0x68,0x65,0x6d,0x61,0x74,
+0x69,0x63,0x61,0x6c,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,1,0x61,0xa3,0x66,0x62,
+0xa3,0x69,0x15,0x79,0x6d,0x62,0x6f,0x6c,0x73,0x8e,0x12,0x61,0x6e,0x64,1,0x61,
+0x3c,0x70,0x19,0x69,0x63,0x74,0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0xa3,0xcd,0x14,
+0x72,0x72,0x6f,0x77,0x73,0xa3,0x73,0x10,0x6f,0xa3,0xd8,7,0x72,0x6f,0x72,0x44,
+0x73,0x4e,0x74,0x62,0x79,0x19,0x61,0x6e,0x6e,0x75,0x6d,0x65,0x72,0x61,0x6c,0x73,
+0xa5,0x20,0x13,0x63,0x68,0x65,0x6e,0xa5,0xc,0x18,0x61,0x72,0x61,0x6d,0x67,0x6f,
+0x6e,0x64,0x69,0xa5,0x14,0x10,0x68,2,0x61,0x3a,0x65,0x4a,0x6f,0x17,0x70,0x65,
+0x72,0x61,0x74,0x6f,0x72,0x73,0x7f,0x16,0x6c,0x70,0x68,0x61,0x6e,0x75,0x6d,0xa3,
+0x5d,0x16,0x6d,0x61,0x74,0x69,0x63,0x61,0x6c,1,0x61,0x36,0x6f,0x17,0x70,0x65,
+0x72,0x61,0x74,0x6f,0x72,0x73,0x7f,0x11,0x6c,0x70,0x1f,0x68,0x61,0x6e,0x75,0x6d,
+0x65,0x72,0x69,0x63,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0x5d,0x68,0x50,0x6b,
+0x7e,0x6c,0x88,0x6e,1,0x64,0x34,0x69,0x15,0x63,0x68,0x61,0x65,0x61,0x6e,0xa3,
+0xea,0x12,0x61,0x69,0x63,0xa3,0xc6,1,0x61,0x3e,0x6a,0x12,0x6f,0x6e,0x67,0xa2,
+0xaa,0x14,0x74,0x69,0x6c,0x65,0x73,0xa3,0xaa,0x13,0x6a,0x61,0x6e,0x69,0xa3,0xe9,
+0x13,0x61,0x73,0x61,0x72,0xa5,0x1f,0x15,0x61,0x79,0x61,0x6c,0x61,0x6d,0x4f,3,
+0x64,0x6c,0x65,0x7e,0x6e,0xa2,0x47,0x72,0x14,0x6f,0x69,0x74,0x69,0x63,1,0x63,
+0x3c,0x68,0x19,0x69,0x65,0x72,0x6f,0x67,0x6c,0x79,0x70,0x68,0x73,0xa3,0xd7,0x15,
+0x75,0x72,0x73,0x69,0x76,0x65,0xa3,0xd6,0x17,0x65,0x66,0x61,0x69,0x64,0x72,0x69,
+0x6e,0xa5,0x21,0x17,0x74,0x65,0x69,0x6d,0x61,0x79,0x65,0x6b,0xa2,0xb8,0x12,0x65,
+0x78,0x74,0xa2,0xd5,0x16,0x65,0x6e,0x73,0x69,0x6f,0x6e,0x73,0xa3,0xd5,0x18,0x64,
+0x65,0x6b,0x69,0x6b,0x61,0x6b,0x75,0x69,0xa3,0xeb,6,0x6b,0x3b,0x6b,0x56,0x6f,
+0x5a,0x75,0x64,0x79,0x11,0x69,0x61,0x1f,0x6b,0x65,0x6e,0x67,0x70,0x75,0x61,0x63,
+0x68,0x75,0x65,0x68,0x6d,0x6f,0x6e,0x67,0xa5,0x27,0x10,0x6f,0xa3,0x92,0x14,0x62,
+0x6c,0x6f,0x63,0x6b,0x21,1,0x6d,0x2c,0x73,0x11,0x68,0x75,0xa5,0x15,0x17,0x62,
+0x65,0x72,0x66,0x6f,0x72,0x6d,0x73,0x7b,0x61,0x44,0x62,0x21,0x65,0x10,0x77,1,
+0x61,0xa5,0xe,0x74,0x14,0x61,0x69,0x6c,0x75,0x65,0xa3,0x8b,1,0x62,0x38,0x6e,
+0x17,0x64,0x69,0x6e,0x61,0x67,0x61,0x72,0x69,0xa5,0x26,0x15,0x61,0x74,0x61,0x65,
+0x61,0x6e,0xa3,0xef,0x67,0xc4,0x32,0x6a,0xc1,0xb9,0x6a,0xa2,0xd5,0x6b,0xa2,0xee,
+0x6c,4,0x61,0x54,0x65,0xa2,0x61,0x69,0xa2,0x78,0x6f,0xa2,0xb7,0x79,1,0x63,
+0x2e,0x64,0x12,0x69,0x61,0x6e,0xa3,0xa9,0x12,0x69,0x61,0x6e,0xa3,0xa7,1,0x6f,
+0x55,0x74,0x11,0x69,0x6e,1,0x31,0x82,0x65,0x11,0x78,0x74,4,0x61,0x5c,0x62,
+0x29,0x63,0xa3,0x94,0x64,0xa3,0x95,0x65,0xa2,0xe7,0x13,0x6e,0x64,0x65,0x64,4,
+0x61,0x36,0x62,0x29,0x63,0xa3,0x94,0x64,0xa3,0x95,0x65,0xa3,0xe7,0x26,0x18,0x64,
+0x64,0x69,0x74,0x69,0x6f,0x6e,0x61,0x6c,0x6d,0x24,0x12,0x73,0x75,0x70,0x24,0x16,
+0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0x25,1,0x70,0x42,0x74,0x1d,0x74,0x65,0x72,
+0x6c,0x69,0x6b,0x65,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0x79,0x12,0x63,0x68,0x61,
+0xa3,0x9c,2,0x6d,0x4e,0x6e,0x54,0x73,0x10,0x75,0xa2,0xb0,0x12,0x73,0x75,0x70,
+0xa4,0x31,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa5,0x31,0x11,0x62,0x75,0xa3,
+0x6f,0x12,0x65,0x61,0x72,1,0x61,0xa3,0xe8,0x62,1,0x69,0x38,0x73,0x17,0x79,
+0x6c,0x6c,0x61,0x62,0x61,0x72,0x79,0xa3,0x75,0x17,0x64,0x65,0x6f,0x67,0x72,0x61,
+0x6d,0x73,0xa3,0x76,0x1a,0x77,0x73,0x75,0x72,0x72,0x6f,0x67,0x61,0x74,0x65,0x73,
+0xa3,0x4d,0x10,0x61,1,0x6d,0x32,0x76,0x14,0x61,0x6e,0x65,0x73,0x65,0xa3,0xb5,
+0x10,0x6f,0x5c,0x12,0x65,0x78,0x74,1,0x61,0xa3,0xb4,0x62,0xa3,0xb9,1,0x61,
+0xa2,0x43,0x68,4,0x61,0x40,0x69,0x50,0x6d,0x6e,0x6f,0x86,0x75,0x15,0x64,0x61,
+0x77,0x61,0x64,0x69,0xa3,0xe6,0x16,0x72,0x6f,0x73,0x68,0x74,0x68,0x69,0xa3,0x89,
+0x1d,0x74,0x61,0x6e,0x73,0x6d,0x61,0x6c,0x6c,0x73,0x63,0x72,0x69,0x70,0x74,0xa5,
+0x30,0x11,0x65,0x72,0x68,0x16,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0x71,0x12,
+0x6a,0x6b,0x69,0xa3,0xe5,3,0x69,0x3a,0x6e,0x42,0x74,0xa2,0x51,0x79,0x13,0x61,
+0x68,0x6c,0x69,0xa3,0xa2,0x12,0x74,0x68,0x69,0xa3,0xc1,3,0x61,0x34,0x62,0x76,
+0x67,0x7c,0x6e,0x12,0x61,0x64,0x61,0x4d,1,0x65,0x40,0x73,0x11,0x75,0x70,0xa2,
+0xcb,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0xcb,0x11,0x78,0x74,1,0x61,
+0xa5,0x13,0x65,0x14,0x6e,0x64,0x65,0x64,0x61,0xa5,0x13,0x11,0x75,0x6e,0xa3,0x42,
+0x11,0x78,0x69,0x96,0x17,0x72,0x61,0x64,0x69,0x63,0x61,0x6c,0x73,0x97,0x14,0x61,
+0x6b,0x61,0x6e,0x61,0x9e,1,0x65,0x4c,0x70,0x10,0x68,0x1f,0x6f,0x6e,0x65,0x74,
+0x69,0x63,0x65,0x78,0x74,0x65,0x6e,0x73,0x69,0x6f,0x6e,0x73,0xa3,0x6b,0x11,0x78,
+0x74,0xa3,0x6b,0x67,0xa2,0xb5,0x68,0xa4,0x84,0x69,3,0x64,0x4c,0x6d,0xa2,0x55,
+0x6e,0xa2,0x62,0x70,0x13,0x61,0x65,0x78,0x74,0x2a,0x16,0x65,0x6e,0x73,0x69,0x6f,
+0x6e,0x73,0x2b,1,0x63,0x99,0x65,0x17,0x6f,0x67,0x72,0x61,0x70,0x68,0x69,0x63,
+1,0x64,0x56,0x73,0x15,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa4,0xb,0x1d,0x61,0x6e,
+0x64,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xa5,0xb,0x13,0x65,
+0x73,0x63,0x72,0x1f,0x69,0x70,0x74,0x69,0x6f,0x6e,0x63,0x68,0x61,0x72,0x61,0x63,
+0x74,0x65,0x72,0x73,0x99,0x1c,0x70,0x65,0x72,0x69,0x61,0x6c,0x61,0x72,0x61,0x6d,
+0x61,0x69,0x63,0xa3,0xba,1,0x64,0x62,0x73,0x1b,0x63,0x72,0x69,0x70,0x74,0x69,
+0x6f,0x6e,0x61,0x6c,0x70,0x61,1,0x68,0x32,0x72,0x14,0x74,0x68,0x69,0x61,0x6e,
+0xa3,0xbd,0x13,0x6c,0x61,0x76,0x69,0xa3,0xbe,0x11,0x69,0x63,1,0x6e,0x3e,0x73,
+0x1a,0x69,0x79,0x61,0x71,0x6e,0x75,0x6d,0x62,0x65,0x72,0x73,0xa5,0x1e,0x19,0x75,
+0x6d,0x62,0x65,0x72,0x66,0x6f,0x72,0x6d,0x73,0xa3,0xb2,4,0x65,0x74,0x6c,0xa2,
+0x82,0x6f,0xa2,0x9a,0x72,0xa2,0x9e,0x75,2,0x6a,0x34,0x6e,0x3e,0x72,0x14,0x6d,
+0x75,0x6b,0x68,0x69,0x43,0x14,0x61,0x72,0x61,0x74,0x69,0x45,0x18,0x6a,0x61,0x6c,
+0x61,0x67,0x6f,0x6e,0x64,0x69,0xa5,0x1c,1,0x6e,0xa2,0x46,0x6f,1,0x6d,0x6e,
+0x72,0x13,0x67,0x69,0x61,0x6e,0x5a,1,0x65,0x40,0x73,0x11,0x75,0x70,0xa2,0x87,
+0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0x87,0x11,0x78,0x74,0xa4,0x1b,0x14,
+0x65,0x6e,0x64,0x65,0x64,0xa5,0x1b,0x1a,0x65,0x74,0x72,0x69,0x63,0x73,0x68,0x61,
+0x70,0x65,0x73,0x8c,0x12,0x65,0x78,0x74,0xa2,0xe3,0x14,0x65,0x6e,0x64,0x65,0x64,
+0xa3,0xe3,0x1e,0x65,0x72,0x61,0x6c,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,
+0x6f,0x6e,0x71,0x17,0x61,0x67,0x6f,0x6c,0x69,0x74,0x69,0x63,0xa2,0x88,0x12,0x73,
+0x75,0x70,0xa4,0xa,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa5,0xa,0x13,0x74,
+0x68,0x69,0x63,0xa3,0x59,1,0x61,0x5c,0x65,0x11,0x65,0x6b,0x30,1,0x61,0x38,
+0x65,0x11,0x78,0x74,0x6e,0x14,0x65,0x6e,0x64,0x65,0x64,0x6f,0x17,0x6e,0x64,0x63,
+0x6f,0x70,0x74,0x69,0x63,0x31,0x13,0x6e,0x74,0x68,0x61,0xa3,0xe4,2,0x61,0xa2,
+0x48,0x65,0xa2,0xdf,0x69,1,0x67,0x30,0x72,0x14,0x61,0x67,0x61,0x6e,0x61,0x9d,
+0x10,0x68,1,0x70,0x3a,0x73,0x18,0x75,0x72,0x72,0x6f,0x67,0x61,0x74,0x65,0x73,
+0xa3,0x4b,1,0x72,0x3c,0x75,0x19,0x73,0x75,0x72,0x72,0x6f,0x67,0x61,0x74,0x65,
+0x73,0xa3,0x4c,0x11,0x69,0x76,0x1f,0x61,0x74,0x65,0x75,0x73,0x65,0x73,0x75,0x72,
+0x72,0x6f,0x67,0x61,0x74,0x65,0x73,0xa3,0x4c,2,0x6c,0x32,0x6e,0x9a,0x74,0x12,
+0x72,0x61,0x6e,0xa5,2,0x10,0x66,2,0x61,0x58,0x6d,0x70,0x77,0x14,0x69,0x64,
+0x74,0x68,0x61,0x1f,0x6e,0x64,0x66,0x75,0x6c,0x6c,0x77,0x69,0x64,0x74,0x68,0x66,
+0x6f,0x72,0x6d,0x73,0xa3,0x57,0x1a,0x6e,0x64,0x66,0x75,0x6c,0x6c,0x66,0x6f,0x72,
+0x6d,0x73,0xa3,0x57,0x13,0x61,0x72,0x6b,0x73,0xa3,0x52,2,0x67,0x34,0x69,0xa2,
+0x45,0x75,0x12,0x6e,0x6f,0x6f,0xa3,0x63,0x11,0x75,0x6c,0xa2,0x4a,2,0x63,0x3c,
+0x6a,0x5e,0x73,0x17,0x79,0x6c,0x6c,0x61,0x62,0x6c,0x65,0x73,0xa3,0x4a,0x1f,0x6f,
+0x6d,0x70,0x61,0x74,0x69,0x62,0x69,0x6c,0x69,0x74,0x79,0x6a,0x61,0x6d,0x6f,0xa3,
+0x41,0x12,0x61,0x6d,0x6f,0x5c,0x17,0x65,0x78,0x74,0x65,0x6e,0x64,0x65,0x64,1,
+0x61,0xa3,0xb4,0x62,0xa3,0xb9,0x19,0x66,0x69,0x72,0x6f,0x68,0x69,0x6e,0x67,0x79,
+0x61,0xa5,0x1d,0x13,0x62,0x72,0x65,0x77,0x37,0x61,0xa4,5,0x62,0xa6,0x45,0x63,
+0xa8,0x1a,0x64,0xac,0xb8,0x65,5,0x6d,0xa2,0x6d,0x86,0x6e,0x96,0x74,0x15,0x68,
+0x69,0x6f,0x70,0x69,0x63,0x5e,1,0x65,0x40,0x73,0x11,0x75,0x70,0xa2,0x86,0x16,
+0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0x86,0x11,0x78,0x74,0xa2,0x85,1,0x61,
+0xa3,0xc8,0x65,0x13,0x6e,0x64,0x65,0x64,0xa2,0x85,0x10,0x61,0xa3,0xc8,0x16,0x6f,
+0x74,0x69,0x63,0x6f,0x6e,0x73,0xa3,0xce,0x15,0x63,0x6c,0x6f,0x73,0x65,0x64,2,
+0x61,0x5a,0x63,0x9e,0x69,0x1c,0x64,0x65,0x6f,0x67,0x72,0x61,0x70,0x68,0x69,0x63,
+0x73,0x75,0x70,0xa2,0xc4,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0xc4,0x16,
+0x6c,0x70,0x68,0x61,0x6e,0x75,0x6d,0x86,1,0x65,0x2c,0x73,0x11,0x75,0x70,0xa3,
+0xc3,0x13,0x72,0x69,0x63,0x73,0x86,0x18,0x75,0x70,0x70,0x6c,0x65,0x6d,0x65,0x6e,
+0x74,0xa3,0xc3,0x11,0x6a,0x6b,0xa2,0x44,0x1f,0x6c,0x65,0x74,0x74,0x65,0x72,0x73,
+0x61,0x6e,0x64,0x6d,0x6f,0x6e,0x74,0x68,0x73,0xa3,0x44,0x61,0x4a,0x67,0x76,0x6c,
+1,0x62,0x30,0x79,0x13,0x6d,0x61,0x69,0x63,0xa5,0x25,0x13,0x61,0x73,0x61,0x6e,
+0xa3,0xe2,0x13,0x72,0x6c,0x79,0x64,0x1f,0x79,0x6e,0x61,0x73,0x74,0x69,0x63,0x63,
+0x75,0x6e,0x65,0x69,0x66,0x6f,0x72,0x6d,0xa5,1,0x1f,0x79,0x70,0x74,0x69,0x61,
+0x6e,0x68,0x69,0x65,0x72,0x6f,0x67,0x6c,0x79,0x70,0x68,1,0x66,0x26,0x73,0xa3,
+0xc2,0x1c,0x6f,0x72,0x6d,0x61,0x74,0x63,0x6f,0x6e,0x74,0x72,0x6f,0x6c,0x73,0xa5,
+0x24,7,0x6e,0xc0,0xe5,0x6e,0x3e,0x72,0xa2,0x5d,0x73,0xa2,0xd8,0x76,0x14,0x65,
+0x73,0x74,0x61,0x6e,0xa3,0xbc,1,0x61,0x92,0x63,0x13,0x69,0x65,0x6e,0x74,1,
+0x67,0x34,0x73,0x15,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0xa5,0x13,0x72,0x65,0x65,
+0x6b,1,0x6d,0x34,0x6e,0x15,0x75,0x6d,0x62,0x65,0x72,0x73,0xa3,0x7f,0x13,0x75,
+0x73,0x69,0x63,0xa2,0x7e,0x19,0x61,0x6c,0x6e,0x6f,0x74,0x61,0x74,0x69,0x6f,0x6e,
+0xa3,0x7e,0x10,0x74,0x1f,0x6f,0x6c,0x69,0x61,0x6e,0x68,0x69,0x65,0x72,0x6f,0x67,
+0x6c,0x79,0x70,0x68,0x73,0xa3,0xfe,2,0x61,0x32,0x6d,0xa2,0x71,0x72,0x12,0x6f,
+0x77,0x73,0x7d,0x12,0x62,0x69,0x63,0x38,3,0x65,0x4a,0x6d,0x66,0x70,0xa2,0x43,
+0x73,0x11,0x75,0x70,0xa2,0x80,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0x80,
+0x11,0x78,0x74,1,0x61,0xa3,0xd2,0x65,0x14,0x6e,0x64,0x65,0x64,0x61,0xa3,0xd2,
+0x12,0x61,0x74,0x68,0xa2,0xd3,0x18,0x65,0x6d,0x61,0x74,0x69,0x63,0x61,0x6c,0x61,
+0x1f,0x6c,0x70,0x68,0x61,0x62,0x65,0x74,0x69,0x63,0x73,0x79,0x6d,0x62,0x6f,0x6c,
+0x73,0xa3,0xd3,1,0x66,0x42,0x72,0x1e,0x65,0x73,0x65,0x6e,0x74,0x61,0x74,0x69,
+0x6f,0x6e,0x66,0x6f,0x72,0x6d,0x73,1,0x61,0xa3,0x51,0x62,0xa3,0x55,0x14,0x65,
+0x6e,0x69,0x61,0x6e,0x35,0x12,0x63,0x69,0x69,0x23,0x64,0x9e,0x65,0xa2,0x42,0x68,
+0xa2,0x4d,0x6c,1,0x63,0x62,0x70,0x17,0x68,0x61,0x62,0x65,0x74,0x69,0x63,0x70,
+1,0x66,0xa3,0x50,0x72,0x1e,0x65,0x73,0x65,0x6e,0x74,0x61,0x74,0x69,0x6f,0x6e,
+0x66,0x6f,0x72,0x6d,0x73,0xa3,0x50,0x16,0x68,0x65,0x6d,0x69,0x63,0x61,0x6c,0xa2,
+0xd0,0x16,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0xd0,0x12,0x6c,0x61,0x6d,0xa5,
+7,0x1a,0x67,0x65,0x61,0x6e,0x6e,0x75,0x6d,0x62,0x65,0x72,0x73,0xa3,0x77,0x11,
+0x6f,0x6d,0xa3,0xfd,7,0x6f,0x71,0x6f,0x64,0x72,0xa2,0x41,0x75,0xa2,0x58,0x79,
+0x1b,0x7a,0x61,0x6e,0x74,0x69,0x6e,0x65,0x6d,0x75,0x73,0x69,0x63,0xa2,0x5b,0x18,
+0x61,0x6c,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0x5b,1,0x70,0x34,0x78,0x16,
+0x64,0x72,0x61,0x77,0x69,0x6e,0x67,0x89,0x14,0x6f,0x6d,0x6f,0x66,0x6f,0xa0,0x12,
+0x65,0x78,0x74,0xa2,0x43,0x14,0x65,0x6e,0x64,0x65,0x64,0xa3,0x43,0x10,0x61,1,
+0x68,0x40,0x69,0x12,0x6c,0x6c,0x65,0x92,0x17,0x70,0x61,0x74,0x74,0x65,0x72,0x6e,
+0x73,0x93,0x11,0x6d,0x69,0xa3,0xc9,1,0x67,0x2c,0x68,0x11,0x69,0x64,0xa3,0x64,
+0x14,0x69,0x6e,0x65,0x73,0x65,0xa3,0x81,0x61,0x48,0x65,0xa2,0x4e,0x68,0xa2,0x52,
+0x6c,0x1a,0x6f,0x63,0x6b,0x65,0x6c,0x65,0x6d,0x65,0x6e,0x74,0x73,0x8b,3,0x6c,
+0x34,0x6d,0x40,0x73,0x66,0x74,0x11,0x61,0x6b,0xa3,0xc7,0x14,0x69,0x6e,0x65,0x73,
+0x65,0xa3,0x93,0x11,0x75,0x6d,0xa2,0xb1,0x12,0x73,0x75,0x70,0xa2,0xca,0x16,0x70,
+0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0xca,1,0x69,0x30,0x73,0x13,0x61,0x76,0x61,
+0x68,0xa3,0xdd,0x15,0x63,0x6c,0x61,0x74,0x69,0x6e,0x23,0x14,0x6e,0x67,0x61,0x6c,
+0x69,0x41,0x16,0x61,0x69,0x6b,0x73,0x75,0x6b,0x69,0xa5,8,5,0x6f,0xc1,0x4c,
+0x6f,0xa2,0x55,0x75,0xa4,0x10,0x79,1,0x70,0x9c,0x72,0x14,0x69,0x6c,0x6c,0x69,
+0x63,0x32,1,0x65,0x4c,0x73,0x11,0x75,0x70,0xa2,0x61,0x16,0x70,0x6c,0x65,0x6d,
+0x65,0x6e,0x74,0xa2,0x61,0x12,0x61,0x72,0x79,0xa3,0x61,0x11,0x78,0x74,3,0x61,
+0xa3,0x9e,0x62,0xa3,0xa0,0x63,0xa5,9,0x65,0x13,0x6e,0x64,0x65,0x64,2,0x61,
+0xa3,0x9e,0x62,0xa3,0xa0,0x63,0xa5,9,0x1c,0x72,0x69,0x6f,0x74,0x73,0x79,0x6c,
+0x6c,0x61,0x62,0x61,0x72,0x79,0xa3,0x7b,3,0x6d,0x5a,0x6e,0xa2,0x95,0x70,0xa2,
+0xa0,0x75,0x17,0x6e,0x74,0x69,0x6e,0x67,0x72,0x6f,0x64,0xa2,0x9a,0x17,0x6e,0x75,
+0x6d,0x65,0x72,0x61,0x6c,0x73,0xa3,0x9a,2,0x62,0x3a,0x6d,0xa2,0x5f,0x70,0x15,
+0x61,0x74,0x6a,0x61,0x6d,0x6f,0xa3,0x41,0x14,0x69,0x6e,0x69,0x6e,0x67,2,0x64,
+0x46,0x68,0x9e,0x6d,0x1d,0x61,0x72,0x6b,0x73,0x66,0x6f,0x72,0x73,0x79,0x6d,0x62,
+0x6f,0x6c,0x73,0x77,0x1e,0x69,0x61,0x63,0x72,0x69,0x74,0x69,0x63,0x61,0x6c,0x6d,
+0x61,0x72,0x6b,0x73,0x2e,2,0x65,0x40,0x66,0xa6,0x41,0x73,0x18,0x75,0x70,0x70,
+0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0x83,0x16,0x78,0x74,0x65,0x6e,0x64,0x65,0x64,
+0xa3,0xe0,0x17,0x61,0x6c,0x66,0x6d,0x61,0x72,0x6b,0x73,0xa3,0x52,0x11,0x6f,0x6e,
+0x1f,0x69,0x6e,0x64,0x69,0x63,0x6e,0x75,0x6d,0x62,0x65,0x72,0x66,0x6f,0x72,0x6d,
+0x73,0xa3,0xb2,0x1b,0x74,0x72,0x6f,0x6c,0x70,0x69,0x63,0x74,0x75,0x72,0x65,0x73,
+0x83,0x12,0x74,0x69,0x63,0xa2,0x84,0x1b,0x65,0x70,0x61,0x63,0x74,0x6e,0x75,0x6d,
+0x62,0x65,0x72,0x73,0xa3,0xdf,1,0x6e,0x3e,0x72,0x1b,0x72,0x65,0x6e,0x63,0x79,
+0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0x75,0x15,0x65,0x69,0x66,0x6f,0x72,0x6d,0xa2,
+0x98,0x16,0x6e,0x75,0x6d,0x62,0x65,0x72,0x73,0xa2,0x99,0x1d,0x61,0x6e,0x64,0x70,
+0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xa3,0x99,0x61,0xa2,0xe1,0x68,
+0xa4,0xb,0x6a,0x10,0x6b,0xa2,0x47,4,0x63,0x8c,0x65,0xa2,0x80,0x72,0xa2,0x98,
+0x73,0xa2,0xaa,0x75,0x1f,0x6e,0x69,0x66,0x69,0x65,0x64,0x69,0x64,0x65,0x6f,0x67,
+0x72,0x61,0x70,0x68,0x73,0xa2,0x47,0x18,0x65,0x78,0x74,0x65,0x6e,0x73,0x69,0x6f,
+0x6e,6,0x64,0x6b,0x64,0xa3,0xd1,0x65,0xa5,0,0x66,0xa5,0x12,0x67,0xa5,0x2e,
+0x14,0x6f,0x6d,0x70,0x61,0x74,0xa2,0x45,1,0x66,0x96,0x69,1,0x62,0x44,0x64,
+0x17,0x65,0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0xa2,0x4f,0x12,0x73,0x75,0x70,0xa3,
+0x5f,0x14,0x69,0x6c,0x69,0x74,0x79,0xa2,0x45,1,0x66,0x54,0x69,0x18,0x64,0x65,
+0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0xa2,0x4f,0x19,0x73,0x75,0x70,0x70,0x6c,0x65,
+0x6d,0x65,0x6e,0x74,0xa3,0x5f,0x13,0x6f,0x72,0x6d,0x73,0xa3,0x53,0x11,0x78,0x74,
+6,0x64,0xc,0x64,0xa3,0xd1,0x65,0xa5,0,0x66,0xa5,0x12,0x67,0xa5,0x2e,0x61,
+0xa3,0x46,0x62,0xa3,0x5e,0x63,0xa3,0xc5,0x19,0x61,0x64,0x69,0x63,0x61,0x6c,0x73,
+0x73,0x75,0x70,0x94,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0x95,1,0x74,0x50,
+0x79,0x14,0x6d,0x62,0x6f,0x6c,0x73,0x9a,0x1d,0x61,0x6e,0x64,0x70,0x75,0x6e,0x63,
+0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x9b,0x14,0x72,0x6f,0x6b,0x65,0x73,0xa3,0x82,
+2,0x6e,0x48,0x72,0x64,0x75,0x1d,0x63,0x61,0x73,0x69,0x61,0x6e,0x61,0x6c,0x62,
+0x61,0x6e,0x69,0x61,0x6e,0xa3,0xde,0x1d,0x61,0x64,0x69,0x61,0x6e,0x73,0x79,0x6c,
+0x6c,0x61,0x62,0x69,0x63,0x73,0x63,0x12,0x69,0x61,0x6e,0xa3,0xa8,2,0x61,0x3a,
+0x65,0x4c,0x6f,0x16,0x72,0x61,0x73,0x6d,0x69,0x61,0x6e,0xa5,0x2d,1,0x6b,0x26,
+0x6d,0xa3,0xa4,0x11,0x6d,0x61,0xa3,0xd4,1,0x72,0x38,0x73,0x17,0x73,0x73,0x79,
+0x6d,0x62,0x6f,0x6c,0x73,0xa5,0x19,0x13,0x6f,0x6b,0x65,0x65,0x60,0x12,0x73,0x75,
+0x70,0xa2,0xff,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0xff,3,0x65,0x3e,
+0x69,0x7e,0x6f,0xa2,0x69,0x75,0x15,0x70,0x6c,0x6f,0x79,0x61,0x6e,0xa3,0xe1,1,
+0x73,0x50,0x76,0x16,0x61,0x6e,0x61,0x67,0x61,0x72,0x69,0x3e,0x12,0x65,0x78,0x74,
+0xa2,0xb3,0x14,0x65,0x6e,0x64,0x65,0x64,0xa3,0xb3,0x13,0x65,0x72,0x65,0x74,0xa3,
+0x5a,2,0x61,0x3a,0x6e,0x82,0x76,0x16,0x65,0x73,0x61,0x6b,0x75,0x72,0x75,0xa5,
+0x2f,0x18,0x63,0x72,0x69,0x74,0x69,0x63,0x61,0x6c,0x73,0x2e,2,0x65,0x30,0x66,
+0x36,0x73,0x11,0x75,0x70,0xa3,0x83,0x11,0x78,0x74,0xa3,0xe0,0x18,0x6f,0x72,0x73,
+0x79,0x6d,0x62,0x6f,0x6c,0x73,0x77,0x14,0x67,0x62,0x61,0x74,0x73,0x91,1,0x67,
+0x3e,0x6d,0x12,0x69,0x6e,0x6f,0xa2,0xab,0x14,0x74,0x69,0x6c,0x65,0x73,0xa3,0xab,
+0x11,0x72,0x61,0xa5,0x1a,8,0x6d,0x5f,0x6d,0x3a,0x6e,0x48,0x73,0x7a,0x76,0xa2,
+0x4b,0x77,0x12,0x69,0x64,0x65,0x43,0x11,0x65,0x64,0x32,0x12,0x69,0x61,0x6c,0x33,
+2,0x61,0x40,0x62,0x37,0x6f,1,0x62,0x28,0x6e,0x10,0x65,0x21,0x13,0x72,0x65,
+0x61,0x6b,0x37,0x10,0x72,0x34,0x12,0x72,0x6f,0x77,0x35,2,0x6d,0x38,0x71,0x46,
+0x75,1,0x62,0x3d,0x70,0x3e,0x11,0x65,0x72,0x3f,1,0x61,0x24,0x6c,0x39,0x11,
+0x6c,0x6c,0x39,1,0x72,0x3b,0x75,0x12,0x61,0x72,0x65,0x3b,0x12,0x65,0x72,0x74,
+0x40,0x13,0x69,0x63,0x61,0x6c,0x41,0x63,0x58,0x65,0x92,0x66,0x96,0x69,1,0x6e,
+0x36,0x73,0x10,0x6f,0x30,0x14,0x6c,0x61,0x74,0x65,0x64,0x31,0x11,0x69,0x74,0x2e,
+0x12,0x69,0x61,0x6c,0x2f,2,0x61,0x36,0x69,0x48,0x6f,0x10,0x6d,0x24,0x12,0x70,
+0x61,0x74,0x25,0x10,0x6e,0x22,0x15,0x6f,0x6e,0x69,0x63,0x61,0x6c,0x23,0x13,0x72,
+0x63,0x6c,0x65,0x27,0x11,0x6e,0x63,0x27,2,0x69,0x3a,0x6f,0x44,0x72,0x10,0x61,
+0x2c,0x14,0x63,0x74,0x69,0x6f,0x6e,0x2d,0x10,0x6e,0x28,0x11,0x61,0x6c,0x29,0x11,
+0x6e,0x74,0x2b,4,0x61,0x3a,0x66,0x4c,0x68,0x5e,0x6e,0x70,0x77,0x2a,0x12,0x69,
+0x64,0x65,0x2b,0x22,0x17,0x6d,0x62,0x69,0x67,0x75,0x6f,0x75,0x73,0x23,0x26,0x17,
+0x75,0x6c,0x6c,0x77,0x69,0x64,0x74,0x68,0x27,0x24,0x17,0x61,0x6c,0x66,0x77,0x69,
+0x64,0x74,0x68,0x25,0x20,1,0x61,0x30,0x65,0x14,0x75,0x74,0x72,0x61,0x6c,0x21,
+0x28,0x13,0x72,0x72,0x6f,0x77,0x29,0xd,0x6e,0xc0,0xfb,0x73,0x6d,0x73,0x3a,0x74,
+0x98,0x75,0xa2,0x49,0x7a,2,0x6c,0x3b,0x70,0x3d,0x73,0x39,5,0x6f,0x28,0x6f,
+0x57,0x70,0x34,0x75,0x16,0x72,0x72,0x6f,0x67,0x61,0x74,0x65,0x45,0x11,0x61,0x63,
+1,0x65,0x32,0x69,0x15,0x6e,0x67,0x6d,0x61,0x72,0x6b,0x31,0x18,0x73,0x65,0x70,
+0x61,0x72,0x61,0x74,0x6f,0x72,0x39,0x63,0x53,0x6b,0x55,0x6d,0x51,0x1d,0x69,0x74,
+0x6c,0x65,0x63,0x61,0x73,0x65,0x6c,0x65,0x74,0x74,0x65,0x72,0x27,1,0x6e,0x40,
+0x70,0x1c,0x70,0x65,0x72,0x63,0x61,0x73,0x65,0x6c,0x65,0x74,0x74,0x65,0x72,0x23,
+0x17,0x61,0x73,0x73,0x69,0x67,0x6e,0x65,0x64,0x21,0x6e,0x8a,0x6f,0xa2,0x47,0x70,
+8,0x66,0x14,0x66,0x5b,0x69,0x59,0x6f,0x4f,0x72,0x24,0x73,0x49,0x17,0x69,0x76,
+0x61,0x74,0x65,0x75,0x73,0x65,0x43,0x61,0x2c,0x63,0x4d,0x64,0x47,0x65,0x4b,0x1f,
+0x72,0x61,0x67,0x72,0x61,0x70,0x68,0x73,0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,
+0x3d,2,0x64,0x33,0x6c,0x35,0x6f,0x36,0x1b,0x6e,0x73,0x70,0x61,0x63,0x69,0x6e,
+0x67,0x6d,0x61,0x72,0x6b,0x2d,1,0x70,0x7c,0x74,0x12,0x68,0x65,0x72,3,0x6c,
+0x38,0x6e,0x42,0x70,0x4c,0x73,0x14,0x79,0x6d,0x62,0x6f,0x6c,0x57,0x14,0x65,0x74,
+0x74,0x65,0x72,0x2b,0x14,0x75,0x6d,0x62,0x65,0x72,0x37,0x19,0x75,0x6e,0x63,0x74,
+0x75,0x61,0x74,0x69,0x6f,0x6e,0x4f,0x1c,0x65,0x6e,0x70,0x75,0x6e,0x63,0x74,0x75,
+0x61,0x74,0x69,0x6f,0x6e,0x49,0x66,0x9e,0x66,0x88,0x69,0xa2,0x4b,0x6c,0xa2,0x5c,
+0x6d,4,0x61,0x60,0x63,0x31,0x65,0x2f,0x6e,0x2d,0x6f,0x15,0x64,0x69,0x66,0x69,
+0x65,0x72,1,0x6c,0x30,0x73,0x14,0x79,0x6d,0x62,0x6f,0x6c,0x55,0x14,0x65,0x74,
+0x74,0x65,0x72,0x29,0x17,0x74,0x68,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x51,1,0x69,
+0x2e,0x6f,0x13,0x72,0x6d,0x61,0x74,0x41,0x1d,0x6e,0x61,0x6c,0x70,0x75,0x6e,0x63,
+0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x5b,0x10,0x6e,0x1f,0x69,0x74,0x69,0x61,0x6c,
+0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x59,6,0x6d,0x18,0x6d,
+0x29,0x6f,0x28,0x74,0x27,0x75,0x23,0x2a,0x1c,0x77,0x65,0x72,0x63,0x61,0x73,0x65,
+0x6c,0x65,0x74,0x74,0x65,0x72,0x25,0x65,0x28,0x69,0x3c,0x6c,0x25,0x19,0x74,0x74,
+0x65,0x72,0x6e,0x75,0x6d,0x62,0x65,0x72,0x35,0x1a,0x6e,0x65,0x73,0x65,0x70,0x61,
+0x72,0x61,0x74,0x6f,0x72,0x3b,0x63,0x44,0x64,0xa2,0x60,0x65,0x1b,0x6e,0x63,0x6c,
+0x6f,0x73,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b,0x2f,6,0x6e,0x39,0x6e,0x46,0x6f,
+0x4e,0x73,0x45,0x75,0x1b,0x72,0x72,0x65,0x6e,0x63,0x79,0x73,0x79,0x6d,0x62,0x6f,
+0x6c,0x53,0x20,0x12,0x74,0x72,0x6c,0x3f,0x42,0x10,0x6e,1,0x6e,0x2c,0x74,0x12,
+0x72,0x6f,0x6c,0x3f,0x1f,0x65,0x63,0x74,0x6f,0x72,0x70,0x75,0x6e,0x63,0x74,0x75,
+0x61,0x74,0x69,0x6f,0x6e,0x4d,0x63,0x3f,0x66,0x41,0x6c,0x1d,0x6f,0x73,0x65,0x70,
+0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x4b,2,0x61,0x30,0x65,0x4a,
+0x69,0x12,0x67,0x69,0x74,0x33,0x1c,0x73,0x68,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,
+0x74,0x69,0x6f,0x6e,0x47,0x1a,0x63,0x69,0x6d,0x61,0x6c,0x6e,0x75,0x6d,0x62,0x65,
+0x72,0x33,0,0x12,0x6d,0xc2,0x3f,0x73,0xa1,0x73,0x4e,0x74,0xa2,0x56,0x77,0xa2,
+0x72,0x79,0xa2,0x73,0x7a,1,0x61,0x2c,0x68,0x12,0x61,0x69,0x6e,0x8b,0x11,0x69,
+0x6e,0x85,5,0x74,0x22,0x74,0x38,0x77,0x4c,0x79,0x16,0x72,0x69,0x61,0x63,0x77,
+0x61,0x77,0x6f,0x18,0x72,0x61,0x69,0x67,0x68,0x74,0x77,0x61,0x77,0xa3,0x55,0x15,
+0x61,0x73,0x68,0x6b,0x61,0x66,0x6d,0x61,0x2e,0x65,0x38,0x68,0x11,0x69,0x6e,0x6b,
+0x10,0x64,0x62,0x11,0x68,0x65,0x65,1,0x65,0x2e,0x6d,0x13,0x6b,0x61,0x74,0x68,
+0x69,0x10,0x6e,0x67,1,0x61,0x4e,0x65,1,0x68,0x28,0x74,0x10,0x68,0x77,0x16,
+0x6d,0x61,0x72,0x62,0x75,0x74,0x61,0x74,0x13,0x67,0x6f,0x61,0x6c,0x3d,1,0x68,
+0x71,0x77,0x73,0x11,0x61,0x77,0x79,1,0x65,0x32,0x75,0x11,0x64,0x68,0x80,0x11,
+0x68,0x65,0x83,0x10,0x68,0x7a,1,0x62,0x34,0x77,0x16,0x69,0x74,0x68,0x74,0x61,
+0x69,0x6c,0x7f,0x14,0x61,0x72,0x72,0x65,0x65,0x7d,0x6d,0x6c,0x6e,0xa4,0x6b,0x70,
+0xa4,0x88,0x71,0xa4,0x88,0x72,1,0x65,0x38,0x6f,0x18,0x68,0x69,0x6e,0x67,0x79,
+0x61,0x79,0x65,0x68,0x93,1,0x68,0x5f,0x76,0x16,0x65,0x72,0x73,0x65,0x64,0x70,
+0x65,0x61,2,0x61,0x2e,0x65,0xa4,0x3e,0x69,0x10,0x6d,0x53,1,0x6c,0xa2,0xe7,
+0x6e,0x16,0x69,0x63,0x68,0x61,0x65,0x61,0x6e,0,0x12,0x6e,0x76,0x73,0x51,0x73,
+0x3e,0x74,0x5c,0x77,0xa0,0x79,0xa2,0x42,0x7a,0x13,0x61,0x79,0x69,0x6e,0xa3,0x54,
+0x10,0x61,1,0x64,0x2e,0x6d,0x12,0x65,0x6b,0x68,0xa3,0x4c,0x11,0x68,0x65,0xa3,
+0x4b,3,0x61,0x38,0x65,0x3c,0x68,0x4a,0x77,0x13,0x65,0x6e,0x74,0x79,0xa3,0x51,
+0x10,0x77,0xa3,0x4d,1,0x6e,0xa3,0x4e,0x74,0x10,0x68,0xa3,0x4f,0x14,0x61,0x6d,
+0x65,0x64,0x68,0xa3,0x50,0x11,0x61,0x77,0xa3,0x52,0x12,0x6f,0x64,0x68,0xa3,0x53,
+0x6e,0x3a,0x6f,0x40,0x70,0x46,0x71,0x4a,0x72,0x12,0x65,0x73,0x68,0xa3,0x4a,0x11,
+0x75,0x6e,0xa3,0x46,0x11,0x6e,0x65,0xa3,0x47,0x10,0x65,0xa3,0x48,0x12,0x6f,0x70,
+0x68,0xa3,0x49,0x67,0x33,0x67,0x38,0x68,0x40,0x6b,0x5e,0x6c,0x66,0x6d,0x11,0x65,
+0x6d,0xa3,0x45,0x13,0x69,0x6d,0x65,0x6c,0xa1,1,0x65,0x32,0x75,0x14,0x6e,0x64,
+0x72,0x65,0x64,0xa3,0x42,0x11,0x74,0x68,0xa3,0x41,0x12,0x61,0x70,0x68,0xa3,0x43,
+0x14,0x61,0x6d,0x65,0x64,0x68,0xa3,0x44,0x61,0x34,0x62,0x4a,0x64,0x50,0x66,0x12,
+0x69,0x76,0x65,0x9f,1,0x6c,0x2a,0x79,0x11,0x69,0x6e,0x97,0x12,0x65,0x70,0x68,
+0x95,0x12,0x65,0x74,0x68,0x99,1,0x61,0x30,0x68,0x14,0x61,0x6d,0x65,0x64,0x68,
+0x9d,0x13,0x6c,0x65,0x74,0x68,0x9b,0x15,0x61,0x79,0x61,0x6c,0x61,0x6d,6,0x6e,
+0x2c,0x6e,0x34,0x72,0x5e,0x73,0x62,0x74,0x11,0x74,0x61,0xa3,0x63,2,0x67,0x2e,
+0x6e,0x32,0x79,0x10,0x61,0xa3,0x60,0x10,0x61,0xa3,0x5d,1,0x61,0xa3,0x5e,0x6e,
+0x10,0x61,0xa3,0x5f,0x10,0x61,0xa3,0x61,0x11,0x73,0x61,0xa3,0x62,0x62,0x3c,0x6a,
+0x42,0x6c,0x10,0x6c,1,0x61,0xa3,0x5b,0x6c,0x10,0x61,0xa3,0x5c,0x11,0x68,0x61,
+0xa3,0x59,0x10,0x61,0xa3,0x5a,0x11,0x65,0x6d,0x51,2,0x6f,0x2c,0x75,0x50,0x79,
+0x10,0x61,0x91,1,0x6a,0x28,0x6f,0x10,0x6e,0x55,0x1a,0x6f,0x69,0x6e,0x69,0x6e,
+0x67,0x67,0x72,0x6f,0x75,0x70,0x21,0x10,0x6e,0x57,0x10,0x65,0x59,0x10,0x61,1,
+0x66,0x5b,0x70,0x10,0x68,0x5d,0x66,0x9a,0x66,0x42,0x67,0x7a,0x68,0x8a,0x6b,0xa2,
+0x75,0x6c,0x11,0x61,0x6d,0x4c,0x12,0x61,0x64,0x68,0x4f,2,0x61,0x3e,0x65,0x4a,
+0x69,0x19,0x6e,0x61,0x6c,0x73,0x65,0x6d,0x6b,0x61,0x74,0x68,0x35,0x15,0x72,0x73,
+0x69,0x79,0x65,0x68,0x8f,0x86,0x10,0x68,0x33,0x10,0x61,1,0x66,0x37,0x6d,0x11,
+0x61,0x6c,0x39,1,0x61,0x40,0x65,0x3e,1,0x68,0x28,0x74,0x10,0x68,0x45,0x40,
+0x13,0x67,0x6f,0x61,0x6c,0x43,2,0x68,0x3b,0x6d,0x5c,0x6e,0x1a,0x69,0x66,0x69,
+0x72,0x6f,0x68,0x69,0x6e,0x67,0x79,0x61,1,0x6b,0x2a,0x70,0x10,0x61,0xa3,0x65,
+0x15,0x69,0x6e,0x6e,0x61,0x79,0x61,0xa3,0x64,0x1a,0x7a,0x61,0x6f,0x6e,0x68,0x65,
+0x68,0x67,0x6f,0x61,0x6c,0x3d,2,0x61,0x3a,0x68,0x44,0x6e,0x17,0x6f,0x74,0x74,
+0x65,0x64,0x68,0x65,0x68,0x4b,1,0x66,0x47,0x70,0x10,0x68,0x49,0x12,0x61,0x70,
+0x68,0x89,0x61,0x2e,0x62,0x8a,0x64,0xa2,0x51,0x65,0x31,2,0x66,0x3c,0x69,0x70,
+0x6c,1,0x61,0x28,0x65,0x10,0x66,0x27,0x11,0x70,0x68,0x25,0x14,0x72,0x69,0x63,
+0x61,0x6e,2,0x66,0x30,0x6e,0x36,0x71,0x11,0x61,0x66,0xa3,0x58,0x11,0x65,0x68,
+0xa3,0x56,0x12,0x6f,0x6f,0x6e,0xa3,0x57,0x10,0x6e,0x23,1,0x65,0x4a,0x75,0x10,
+0x72,0x1f,0x75,0x73,0x68,0x61,0x73,0x6b,0x69,0x79,0x65,0x68,0x62,0x61,0x72,0x72,
+0x65,0x65,0x8d,1,0x68,0x29,0x74,0x10,0x68,0x2b,0x11,0x61,0x6c,0x2c,0x16,0x61,
+0x74,0x68,0x72,0x69,0x73,0x68,0x2f,7,0x6e,0x2e,0x6e,0x2c,0x72,0x3e,0x74,0x56,
+0x75,0x21,0x18,0x6f,0x6e,0x6a,0x6f,0x69,0x6e,0x69,0x6e,0x67,0x21,0x28,0x1a,0x69,
+0x67,0x68,0x74,0x6a,0x6f,0x69,0x6e,0x69,0x6e,0x67,0x29,0x2a,0x19,0x72,0x61,0x6e,
+0x73,0x70,0x61,0x72,0x65,0x6e,0x74,0x2b,0x63,0x23,0x64,0x40,0x6a,0x56,0x6c,0x26,
+0x19,0x65,0x66,0x74,0x6a,0x6f,0x69,0x6e,0x69,0x6e,0x67,0x27,0x24,0x19,0x75,0x61,
+0x6c,0x6a,0x6f,0x69,0x6e,0x69,0x6e,0x67,0x25,0x19,0x6f,0x69,0x6e,0x63,0x61,0x75,
+0x73,0x69,0x6e,0x67,0x23,0,0x13,0x6e,0xc0,0xd0,0x73,0x49,0x73,0x48,0x75,0x78,
+0x77,0x84,0x78,0x9c,0x7a,0x10,0x77,0x58,1,0x6a,0x75,0x73,0x13,0x70,0x61,0x63,
+0x65,0x59,4,0x61,0x51,0x67,0x53,0x70,0x28,0x75,0x30,0x79,0x57,0x54,0x12,0x61,
+0x63,0x65,0x55,0x16,0x72,0x72,0x6f,0x67,0x61,0x74,0x65,0x53,0x15,0x6e,0x6b,0x6e,
+0x6f,0x77,0x6e,0x21,1,0x6a,0x5d,0x6f,0x17,0x72,0x64,0x6a,0x6f,0x69,0x6e,0x65,
+0x72,0x5d,0x10,0x78,0x21,0x6e,0x60,0x6f,0xa2,0x41,0x70,0xa2,0x50,0x71,0xa2,0x6e,
+0x72,1,0x65,0x24,0x69,0x6f,0x1e,0x67,0x69,0x6f,0x6e,0x61,0x6c,0x69,0x6e,0x64,
+0x69,0x63,0x61,0x74,0x6f,0x72,0x6f,4,0x65,0x3e,0x6c,0x5b,0x6f,0x46,0x73,0x45,
+0x75,0x46,0x14,0x6d,0x65,0x72,0x69,0x63,0x47,0x15,0x78,0x74,0x6c,0x69,0x6e,0x65,
+0x5b,0x17,0x6e,0x73,0x74,0x61,0x72,0x74,0x65,0x72,0x45,0x10,0x70,0x48,0x1c,0x65,
+0x6e,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x49,1,0x6f,0x3e,
+0x72,0x4c,0x1a,0x65,0x66,0x69,0x78,0x6e,0x75,0x6d,0x65,0x72,0x69,0x63,0x4d,0x4a,
+0x1b,0x73,0x74,0x66,0x69,0x78,0x6e,0x75,0x6d,0x65,0x72,0x69,0x63,0x4b,0x10,0x75,
+0x4e,0x16,0x6f,0x74,0x61,0x74,0x69,0x6f,0x6e,0x4f,0x68,0x7b,0x68,0x50,0x69,0x86,
+0x6a,0xa2,0x61,0x6c,0xa2,0x65,0x6d,0x1c,0x61,0x6e,0x64,0x61,0x74,0x6f,0x72,0x79,
+0x62,0x72,0x65,0x61,0x6b,0x2d,4,0x32,0x5f,0x33,0x61,0x65,0x34,0x6c,0x6d,0x79,
+0x3a,0x13,0x70,0x68,0x65,0x6e,0x3b,0x19,0x62,0x72,0x65,0x77,0x6c,0x65,0x74,0x74,
+0x65,0x72,0x6d,2,0x64,0x28,0x6e,0x3c,0x73,0x41,0x3c,0x18,0x65,0x6f,0x67,0x72,
+0x61,0x70,0x68,0x69,0x63,0x3d,0x3e,1,0x66,0x3e,0x73,0x11,0x65,0x70,1,0x61,
+0x22,0x65,0x14,0x72,0x61,0x62,0x6c,0x65,0x3f,0x18,0x69,0x78,0x6e,0x75,0x6d,0x65,
+0x72,0x69,0x63,0x41,2,0x6c,0x63,0x74,0x65,0x76,0x67,1,0x66,0x43,0x69,0x15,
+0x6e,0x65,0x66,0x65,0x65,0x64,0x43,0x61,0x40,0x62,0x70,0x63,0xa2,0x55,0x65,0xa2,
+0xdb,0x67,0x10,0x6c,0x38,0x11,0x75,0x65,0x39,2,0x69,0x23,0x6c,0x34,0x6d,0x16,
+0x62,0x69,0x67,0x75,0x6f,0x75,0x73,0x23,0x24,0x17,0x70,0x68,0x61,0x62,0x65,0x74,
+0x69,0x63,0x25,4,0x32,0x27,0x61,0x29,0x62,0x2b,0x6b,0x2d,0x72,0x12,0x65,0x61,
+0x6b,2,0x61,0x36,0x62,0x3e,0x73,0x15,0x79,0x6d,0x62,0x6f,0x6c,0x73,0x57,0x13,
+0x66,0x74,0x65,0x72,0x29,1,0x65,0x2a,0x6f,0x11,0x74,0x68,0x27,0x13,0x66,0x6f,
+0x72,0x65,0x2b,7,0x6d,0x51,0x6d,0x33,0x6f,0x28,0x70,0x69,0x72,0x35,1,0x6d,
+0x76,0x6e,1,0x64,0x3c,0x74,0x1a,0x69,0x6e,0x67,0x65,0x6e,0x74,0x62,0x72,0x65,
+0x61,0x6b,0x2f,0x15,0x69,0x74,0x69,0x6f,0x6e,0x61,0x1f,0x6c,0x6a,0x61,0x70,0x61,
+0x6e,0x65,0x73,0x65,0x73,0x74,0x61,0x72,0x74,0x65,0x72,0x6b,1,0x62,0x3a,0x70,
+0x19,0x6c,0x65,0x78,0x63,0x6f,0x6e,0x74,0x65,0x78,0x74,0x51,0x18,0x69,0x6e,0x69,
+0x6e,0x67,0x6d,0x61,0x72,0x6b,0x33,0x61,0x6a,0x62,0x2f,0x6a,0x6b,0x6c,0x30,0x13,
+0x6f,0x73,0x65,0x70,1,0x61,0x38,0x75,0x18,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,
+0x6f,0x6e,0x31,0x18,0x72,0x65,0x6e,0x74,0x68,0x65,0x73,0x69,0x73,0x69,0x1b,0x72,
+0x72,0x69,0x61,0x67,0x65,0x72,0x65,0x74,0x75,0x72,0x6e,0x35,2,0x62,0x3e,0x6d,
+0x46,0x78,0x36,0x18,0x63,0x6c,0x61,0x6d,0x61,0x74,0x69,0x6f,0x6e,0x37,0x70,0x12,
+0x61,0x73,0x65,0x71,0x72,0x16,0x6f,0x64,0x69,0x66,0x69,0x65,0x72,0x73,1,0x64,
+0x42,0x6e,1,0x6f,0x32,0x75,0x26,0x14,0x6d,0x65,0x72,0x69,0x63,0x27,0x11,0x6e,
+0x65,0x21,1,0x65,0x2e,0x69,0x24,0x12,0x67,0x69,0x74,0x25,0x22,0x14,0x63,0x69,
+0x6d,0x61,0x6c,0x23,0,0x18,0x6e,0xc4,0x2a,0x74,0xc1,0x6d,0x77,0x96,0x77,0xa2,
+0x4c,0x78,0xa2,0x70,0x79,0xa2,0x7a,0x7a,6,0x73,0x1e,0x73,0x34,0x78,0x42,0x79,
+0x48,0x7a,0x11,0x7a,0x7a,0xa3,0x67,0x10,0x79,1,0x65,0xa3,0xae,0x6d,0xa3,0x81,
+0x11,0x78,0x78,0xa3,0x66,0x11,0x79,0x79,0x21,0x61,0x30,0x69,0x58,0x6d,0x11,0x74,
+0x68,0xa3,0x80,0x10,0x6e,1,0x61,0x26,0x62,0xa3,0xb1,0x1a,0x62,0x61,0x7a,0x61,
+0x72,0x73,0x71,0x75,0x61,0x72,0x65,0xa3,0xb1,0x11,0x6e,0x68,0x23,2,0x61,0x30,
+0x63,0x5a,0x6f,0x11,0x6c,0x65,0xa3,0x9b,1,0x6e,0x3c,0x72,0x10,0x61,0xa2,0x92,
+0x15,0x6e,0x67,0x63,0x69,0x74,0x69,0xa3,0x92,0x12,0x63,0x68,0x6f,0xa3,0xbc,0x11,
+0x68,0x6f,0xa3,0xbc,1,0x70,0x2c,0x73,0x11,0x75,0x78,0xa3,0x65,0x11,0x65,0x6f,
+0x9b,1,0x65,0x2c,0x69,0x72,0x11,0x69,0x69,0x73,0x11,0x7a,0x69,0xa2,0xc0,0x11,
+0x64,0x69,0xa3,0xc0,0x74,0x4a,0x75,0xa2,0xba,0x76,1,0x61,0x2c,0x69,0x11,0x73,
+0x70,0xa3,0x64,0x10,0x69,0xa2,0x63,0x10,0x69,0xa3,0x63,5,0x67,0x36,0x67,0x68,
+0x68,0x6c,0x69,2,0x62,0x3a,0x66,0x4a,0x72,0x10,0x68,0xa2,0x9e,0x12,0x75,0x74,
+0x61,0xa3,0x9e,1,0x65,0x24,0x74,0x6f,0x12,0x74,0x61,0x6e,0x6f,0x14,0x69,0x6e,
+0x61,0x67,0x68,0x99,0x11,0x6c,0x67,0x75,0x10,0x61,1,0x61,0x24,0x69,0x6d,0x6a,
+0x11,0x6e,0x61,0x6b,0x61,0x30,0x65,0xa2,0x5b,0x66,0x11,0x6e,0x67,0x99,6,0x6c,
+0x21,0x6c,0x32,0x6d,0x38,0x6e,0x44,0x76,0x10,0x74,0xa3,0x7f,1,0x65,0x89,0x75,
+0x97,1,0x69,0x24,0x6c,0x67,0x10,0x6c,0x67,0x10,0x67,0xa2,0x9a,0x11,0x75,0x74,
+0xa3,0x9a,0x67,0x36,0x69,0x52,0x6b,0x10,0x72,0xa2,0x99,0x10,0x69,0xa3,0x99,1,
+0x61,0x30,0x62,0x7a,0x13,0x61,0x6e,0x77,0x61,0x7b,0x12,0x6c,0x6f,0x67,0x75,2,
+0x6c,0x32,0x74,0x34,0x76,0x12,0x69,0x65,0x74,0xa3,0x7f,0x10,0x65,0x89,0x12,0x68,
+0x61,0x6d,0xa3,0x6a,1,0x6c,0x2a,0x6e,0x10,0x67,0xa3,0x62,0x10,0x75,0x68,0x11,
+0x67,0x75,0x69,1,0x67,0x32,0x6e,0x14,0x6b,0x6e,0x6f,0x77,0x6e,0xa3,0x67,0x11,
+0x61,0x72,0x8a,0x13,0x69,0x74,0x69,0x63,0x8b,0x71,0xc1,0x13,0x71,0xa2,0xde,0x72,
+0xa2,0xe3,0x73,6,0x69,0x8a,0x69,0x72,0x6f,0xa2,0x4c,0x75,0xa2,0x75,0x79,1,
+0x6c,0x46,0x72,4,0x63,0x65,0x65,0xa3,0x5f,0x69,0x2c,0x6a,0xa3,0x60,0x6e,0xa3,
+0x61,0x11,0x61,0x63,0x65,0x10,0x6f,0x94,0x16,0x74,0x69,0x6e,0x61,0x67,0x72,0x69,
+0x95,2,0x64,0x3c,0x67,0x4c,0x6e,1,0x64,0xa3,0x91,0x68,0x62,0x12,0x61,0x6c,
+0x61,0x63,0x10,0x64,0xa2,0xa6,0x12,0x68,0x61,0x6d,0xa3,0xa6,0x17,0x6e,0x77,0x72,
+0x69,0x74,0x69,0x6e,0x67,0xa3,0x70,2,0x67,0x3a,0x72,0x52,0x79,0x10,0x6f,0xa2,
+0xb0,0x12,0x6d,0x62,0x6f,0xa3,0xb0,1,0x64,0x26,0x6f,0xa3,0xb8,0xa2,0xb7,0x12,
+0x69,0x61,0x6e,0xa3,0xb7,0x10,0x61,0xa2,0x98,0x16,0x73,0x6f,0x6d,0x70,0x65,0x6e,
+0x67,0xa3,0x98,0x11,0x6e,0x64,0xa2,0x71,0x14,0x61,0x6e,0x65,0x73,0x65,0xa3,0x71,
+0x61,0x5c,0x67,0xa2,0x43,0x68,1,0x61,0x2a,0x72,0x10,0x64,0xa3,0x97,2,0x72,
+0x28,0x76,0x30,0x77,0x87,0x12,0x61,0x64,0x61,0xa3,0x97,0x12,0x69,0x61,0x6e,0x87,
+2,0x6d,0x40,0x72,0x58,0x75,0x10,0x72,0xa2,0x6f,0x15,0x61,0x73,0x68,0x74,0x72,
+0x61,0xa3,0x6f,1,0x61,0x26,0x72,0xa3,0x7e,0x14,0x72,0x69,0x74,0x61,0x6e,0xa3,
+0x7e,1,0x61,0xa3,0x5e,0x62,0xa3,0x85,0x11,0x6e,0x77,0xa3,0x70,0x11,0x61,0x61,
+1,0x63,0x2f,0x69,0x23,3,0x65,0x3e,0x6a,0x48,0x6f,0x4e,0x75,0x10,0x6e,1,
+0x69,0x24,0x72,0x61,0x10,0x63,0x61,0x13,0x6a,0x61,0x6e,0x67,0xa3,0x6e,0x11,0x6e,
+0x67,0xa3,0x6e,1,0x68,0x2a,0x72,0x10,0x6f,0xa3,0x5d,0x10,0x67,0xa3,0xb6,0x6e,
+0xa2,0x83,0x6f,0xa2,0xf2,0x70,5,0x6c,0x1e,0x6c,0x44,0x72,0x4a,0x73,0x1b,0x61,
+0x6c,0x74,0x65,0x72,0x70,0x61,0x68,0x6c,0x61,0x76,0x69,0xa3,0x7b,0x11,0x72,0x64,
+0xa3,0x5c,0x11,0x74,0x69,0xa3,0x7d,0x61,0x7c,0x65,0xa2,0x54,0x68,3,0x61,0x3e,
+0x6c,0x4e,0x6e,0x5e,0x6f,0x16,0x65,0x6e,0x69,0x63,0x69,0x61,0x6e,0xa3,0x5b,0x10,
+0x67,0xa2,0x5a,0x12,0x73,0x70,0x61,0xa3,0x5a,2,0x69,0xa3,0x7a,0x70,0xa3,0x7b,
+0x76,0xa3,0x7c,0x10,0x78,0xa3,0x5b,2,0x68,0x3e,0x6c,0x50,0x75,0x10,0x63,0xa2,
+0xa5,0x14,0x69,0x6e,0x68,0x61,0x75,0xa3,0xa5,0x17,0x61,0x77,0x68,0x68,0x6d,0x6f,
+0x6e,0x67,0xa3,0x4b,0x10,0x6d,0xa2,0x90,0x14,0x79,0x72,0x65,0x6e,0x65,0xa3,0x90,
+0x11,0x72,0x6d,0xa3,0x59,6,0x6b,0x36,0x6b,0x56,0x73,0x6e,0x75,0x74,0x79,0x11,
+0x69,0x61,0x1f,0x6b,0x65,0x6e,0x67,0x70,0x75,0x61,0x63,0x68,0x75,0x65,0x68,0x6d,
+0x6f,0x6e,0x67,0xa3,0xba,1,0x67,0x2e,0x6f,0xa2,0x57,0x10,0x6f,0xa3,0x57,0x10,
+0x62,0xa3,0x84,0x11,0x68,0x75,0xa3,0x96,0x12,0x73,0x68,0x75,0xa3,0x96,0x61,0x42,
+0x62,0x80,0x65,0x10,0x77,1,0x61,0xa3,0xaa,0x74,0x14,0x61,0x69,0x6c,0x75,0x65,
+0x97,2,0x62,0x2e,0x6e,0x3c,0x72,0x10,0x62,0xa3,0x8e,0x15,0x61,0x74,0x61,0x65,
+0x61,0x6e,0xa3,0x8f,0x10,0x64,0xa2,0xbb,0x16,0x69,0x6e,0x61,0x67,0x61,0x72,0x69,
+0xa3,0xbb,0x11,0x61,0x74,0xa3,0x8f,3,0x67,0x5a,0x6c,0x6c,0x72,0xa2,0x93,0x73,
+2,0x61,0x36,0x67,0x3c,0x6d,0x10,0x61,0x84,0x12,0x6e,0x79,0x61,0x85,0x11,0x67,
+0x65,0xa3,0xab,0x10,0x65,0xa3,0xab,1,0x61,0x2a,0x68,0x11,0x61,0x6d,0x5b,0x10,
+0x6d,0x5b,1,0x63,0xa2,0x60,0x64,5,0x70,0x37,0x70,0x36,0x73,0x54,0x74,0x14,
+0x75,0x72,0x6b,0x69,0x63,0xa3,0x58,0x11,0x65,0x72,1,0x6d,0x2c,0x73,0x12,0x69,
+0x61,0x6e,0x9b,0x11,0x69,0x63,0xa3,0x59,0x10,0x6f,1,0x67,0x3a,0x75,0x18,0x74,
+0x68,0x61,0x72,0x61,0x62,0x69,0x61,0x6e,0xa3,0x85,0x13,0x64,0x69,0x61,0x6e,0xa3,
+0xb8,0x68,0x42,0x69,0x54,0x6e,0x1a,0x6f,0x72,0x74,0x68,0x61,0x72,0x61,0x62,0x69,
+0x61,0x6e,0xa3,0x8e,0x17,0x75,0x6e,0x67,0x61,0x72,0x69,0x61,0x6e,0xa3,0x4c,0x14,
+0x74,0x61,0x6c,0x69,0x63,0x5d,1,0x68,0x26,0x6b,0xa3,0x6d,0x12,0x69,0x6b,0x69,
+0xa3,0x6d,2,0x69,0x2c,0x6b,0x30,0x79,0x10,0x61,0x5f,0x11,0x79,0x61,0x5f,0x10,
+0x68,0xa3,0x58,0x68,0xc3,0xd,0x6b,0xc2,0x24,0x6b,0xa4,0x17,0x6c,0xa4,0xb2,0x6d,
+8,0x6f,0x46,0x6f,0x48,0x72,0x74,0x74,0x80,0x75,0x86,0x79,1,0x61,0x28,0x6d,
+0x10,0x72,0x59,0x13,0x6e,0x6d,0x61,0x72,0x59,2,0x64,0x2e,0x6e,0x32,0x6f,0x10,
+0x6e,0xa3,0x72,0x10,0x69,0xa3,0xa3,0x10,0x67,0x56,0x14,0x6f,0x6c,0x69,0x61,0x6e,
+0x57,0x10,0x6f,0xa2,0x95,0x10,0x6f,0xa3,0x95,0x11,0x65,0x69,0xa3,0x73,0x11,0x6c,
+0x74,0xa2,0xa4,0x12,0x61,0x6e,0x69,0xa3,0xa4,0x61,0x36,0x65,0xa2,0x67,0x69,0xa2,
+0xbd,0x6c,0x11,0x79,0x6d,0x55,6,0x6e,0x38,0x6e,0x32,0x72,0x5c,0x73,0x6c,0x79,
+0x10,0x61,0xa3,0x55,1,0x64,0x38,0x69,0xa2,0x79,0x15,0x63,0x68,0x61,0x65,0x61,
+0x6e,0xa3,0x79,0xa2,0x54,0x12,0x61,0x69,0x63,0xa3,0x54,0x10,0x63,0xa2,0xa9,0x12,
+0x68,0x65,0x6e,0xa3,0xa9,0x18,0x61,0x72,0x61,0x6d,0x67,0x6f,0x6e,0x64,0x69,0xa3,
+0xaf,0x68,0x36,0x6b,0x4c,0x6c,0x15,0x61,0x79,0x61,0x6c,0x61,0x6d,0x55,1,0x61,
+0x26,0x6a,0xa3,0xa0,0x13,0x6a,0x61,0x6e,0x69,0xa3,0xa0,0x10,0x61,0xa2,0xb4,0x12,
+0x73,0x61,0x72,0xa3,0xb4,3,0x64,0x78,0x65,0x94,0x6e,0xa2,0x42,0x72,1,0x63,
+0xa3,0x8d,0x6f,0xa2,0x56,0x13,0x69,0x74,0x69,0x63,1,0x63,0x3c,0x68,0x19,0x69,
+0x65,0x72,0x6f,0x67,0x6c,0x79,0x70,0x68,0x73,0xa3,0x56,0x15,0x75,0x72,0x73,0x69,
+0x76,0x65,0xa3,0x8d,1,0x65,0x26,0x66,0xa3,0xb5,0x16,0x66,0x61,0x69,0x64,0x72,
+0x69,0x6e,0xa3,0xb5,0x17,0x74,0x65,0x69,0x6d,0x61,0x79,0x65,0x6b,0xa3,0x73,0x10,
+0x64,0xa2,0x8c,0x17,0x65,0x6b,0x69,0x6b,0x61,0x6b,0x75,0x69,0xa3,0x8c,0x11,0x61,
+0x6f,0xa3,0x5c,6,0x6e,0x1a,0x6e,0x34,0x6f,0x38,0x70,0x3e,0x74,0x11,0x68,0x69,
+0xa3,0x78,0x11,0x64,0x61,0x4b,0x11,0x72,0x65,0xa3,0x77,0x11,0x65,0x6c,0xa3,0x8a,
+0x61,0x30,0x68,0x9a,0x69,0x11,0x74,0x73,0xa3,0xbf,4,0x69,0x3c,0x6c,0x44,0x6e,
+0x48,0x74,0x56,0x79,0x13,0x61,0x68,0x6c,0x69,0xa3,0x4f,0x12,0x74,0x68,0x69,0xa3,
+0x78,0x10,0x69,0xa3,0x4f,1,0x61,0x4d,0x6e,0x12,0x61,0x64,0x61,0x4b,0x14,0x61,
+0x6b,0x61,0x6e,0x61,0x4c,0x19,0x6f,0x72,0x68,0x69,0x72,0x61,0x67,0x61,0x6e,0x61,
+0x8d,4,0x61,0x40,0x69,0x52,0x6d,0x70,0x6f,0x7c,0x75,0x15,0x64,0x61,0x77,0x61,
+0x64,0x69,0xa3,0x91,0x10,0x72,0x92,0x15,0x6f,0x73,0x68,0x74,0x68,0x69,0x93,0x1d,
+0x74,0x61,0x6e,0x73,0x6d,0x61,0x6c,0x6c,0x73,0x63,0x72,0x69,0x70,0x74,0xa3,0xbf,
+1,0x65,0x24,0x72,0x4f,0x10,0x72,0x4f,0x10,0x6a,0xa2,0x9d,0x11,0x6b,0x69,0xa3,
+0x9d,4,0x61,0x5c,0x65,0x90,0x69,0xa0,0x6f,0xa2,0x5d,0x79,1,0x63,0x34,0x64,
+0x10,0x69,0xa2,0x6c,0x11,0x61,0x6e,0xa3,0x6c,0x10,0x69,0xa2,0x6b,0x11,0x61,0x6e,
+0xa3,0x6b,2,0x6e,0x42,0x6f,0x46,0x74,3,0x66,0xa3,0x50,0x67,0xa3,0x51,0x69,
+0x24,0x6e,0x53,0x10,0x6e,0x53,0x10,0x61,0xa3,0x6a,0x50,0x10,0x6f,0x51,0x11,0x70,
+0x63,0xa2,0x52,0x11,0x68,0x61,0xa3,0x52,2,0x6d,0x2e,0x6e,0x36,0x73,0x10,0x75,
+0xa3,0x83,0x10,0x62,0x80,0x10,0x75,0x81,2,0x61,0xa3,0x53,0x62,0x83,0x65,0x11,
+0x61,0x72,1,0x61,0xa3,0x53,0x62,0x83,0x11,0x6d,0x61,0xa3,0x8b,0x68,0x6e,0x69,
+0xa2,0x95,0x6a,2,0x61,0x30,0x70,0x52,0x75,0x11,0x72,0x63,0xa3,0x94,1,0x6d,
+0x38,0x76,0x10,0x61,0xa2,0x4e,0x13,0x6e,0x65,0x73,0x65,0xa3,0x4e,0x10,0x6f,0xa3,
+0xad,0x11,0x61,0x6e,0xa3,0x69,6,0x6c,0x1e,0x6c,0x34,0x6d,0x3a,0x72,0x48,0x75,
+0x11,0x6e,0x67,0xa3,0x4c,0x11,0x75,0x77,0xa3,0x9c,0x10,0x6e,1,0x67,0xa3,0x4b,
+0x70,0xa3,0xba,0x11,0x6b,0x74,0x8d,0x61,0x3c,0x65,0xa2,0x43,0x69,0x11,0x72,0x61,
+0x48,0x13,0x67,0x61,0x6e,0x61,0x49,1,0x6e,0x34,0x74,0x10,0x72,0xa2,0xa2,0x11,
+0x61,0x6e,0xa3,0xa2,0x42,6,0x6f,0xe,0x6f,0x77,0x73,0xa3,0x49,0x74,0xa3,0x4a,
+0x75,0x12,0x6e,0x6f,0x6f,0x77,0x62,0xa3,0xac,0x67,0x3e,0x69,0x42,0x19,0x66,0x69,
+0x72,0x6f,0x68,0x69,0x6e,0x67,0x79,0x61,0xa3,0xb6,0x44,0x11,0x75,0x6c,0x45,0x11,
+0x62,0x72,0x46,0x11,0x65,0x77,0x47,2,0x6d,0x2e,0x6e,0x4a,0x74,0x11,0x61,0x6c,
+0x5d,0x1c,0x70,0x65,0x72,0x69,0x61,0x6c,0x61,0x72,0x61,0x6d,0x61,0x69,0x63,0xa3,
+0x74,2,0x64,0x66,0x68,0x6a,0x73,0x1b,0x63,0x72,0x69,0x70,0x74,0x69,0x6f,0x6e,
+0x61,0x6c,0x70,0x61,1,0x68,0x32,0x72,0x14,0x74,0x68,0x69,0x61,0x6e,0xa3,0x7d,
+0x13,0x6c,0x61,0x76,0x69,0xa3,0x7a,0x10,0x73,0xa3,0x4d,0x15,0x65,0x72,0x69,0x74,
+0x65,0x64,0x23,0x64,0xc1,0xd,0x64,0xa2,0x7a,0x65,0xa2,0xc1,0x67,4,0x65,0x82,
+0x6c,0x9a,0x6f,0xa2,0x46,0x72,0xa2,0x55,0x75,2,0x6a,0x3c,0x6e,0x4e,0x72,1,
+0x6d,0x24,0x75,0x41,0x13,0x75,0x6b,0x68,0x69,0x41,1,0x61,0x24,0x72,0x3f,0x13,
+0x72,0x61,0x74,0x69,0x3f,0x18,0x6a,0x61,0x6c,0x61,0x67,0x6f,0x6e,0x64,0x69,0xa3,
+0xb3,0x10,0x6f,1,0x6b,0xa3,0x48,0x72,0x38,0x13,0x67,0x69,0x61,0x6e,0x39,0x11,
+0x61,0x67,0x90,0x15,0x6f,0x6c,0x69,0x74,0x69,0x63,0x91,1,0x6e,0x30,0x74,0x10,
+0x68,0x3a,0x11,0x69,0x63,0x3b,1,0x67,0xa3,0xb3,0x6d,0xa3,0xaf,1,0x61,0x32,
+0x65,1,0x65,0x24,0x6b,0x3d,0x10,0x6b,0x3d,0x10,0x6e,0xa2,0x89,0x12,0x74,0x68,
+0x61,0xa3,0x89,4,0x65,0x46,0x69,0x6c,0x6f,0x8c,0x73,0x9a,0x75,0x11,0x70,0x6c,
+0xa2,0x87,0x13,0x6f,0x79,0x61,0x6e,0xa3,0x87,1,0x73,0x38,0x76,0x10,0x61,0x34,
+0x15,0x6e,0x61,0x67,0x61,0x72,0x69,0x35,0x13,0x65,0x72,0x65,0x74,0x33,1,0x61,
+0x36,0x76,0x16,0x65,0x73,0x61,0x6b,0x75,0x72,0x75,0xa3,0xbe,0x10,0x6b,0xa3,0xbe,
+0x11,0x67,0x72,0xa2,0xb2,0x10,0x61,0xa3,0xb2,0x11,0x72,0x74,0x33,2,0x67,0x3a,
+0x6c,0x72,0x74,0x11,0x68,0x69,0x36,0x13,0x6f,0x70,0x69,0x63,0x37,0x10,0x79,2,
+0x64,0xa3,0x45,0x68,0xa3,0x46,0x70,0xa2,0x47,0x1e,0x74,0x69,0x61,0x6e,0x68,0x69,
+0x65,0x72,0x6f,0x67,0x6c,0x79,0x70,0x68,0x73,0xa3,0x47,1,0x62,0x36,0x79,0x10,
+0x6d,0xa2,0xb9,0x12,0x61,0x69,0x63,0xa3,0xb9,0x10,0x61,0xa2,0x88,0x12,0x73,0x61,
+0x6e,0xa3,0x88,0x61,0xa2,0xb4,0x62,0xa4,0x19,0x63,6,0x6f,0x3d,0x6f,0x5a,0x70,
+0x76,0x75,0x7a,0x79,1,0x70,0x3e,0x72,2,0x69,0x2a,0x6c,0x31,0x73,0xa3,0x44,
+0x13,0x6c,0x6c,0x69,0x63,0x31,0x13,0x72,0x69,0x6f,0x74,0x7f,1,0x6d,0x30,0x70,
+0x10,0x74,0x2e,0x11,0x69,0x63,0x2f,0x12,0x6d,0x6f,0x6e,0x21,0x11,0x72,0x74,0x7f,
+0x16,0x6e,0x65,0x69,0x66,0x6f,0x72,0x6d,0xa3,0x65,0x61,0x32,0x68,0xa2,0x41,0x69,
+0x11,0x72,0x74,0xa3,0x43,3,0x6b,0x4c,0x6e,0x50,0x72,0x76,0x75,0x1d,0x63,0x61,
+0x73,0x69,0x61,0x6e,0x61,0x6c,0x62,0x61,0x6e,0x69,0x61,0x6e,0xa3,0x9f,0x10,0x6d,
+0xa3,0x76,1,0x61,0x24,0x73,0x71,0x1d,0x64,0x69,0x61,0x6e,0x61,0x62,0x6f,0x72,
+0x69,0x67,0x69,0x6e,0x61,0x6c,0x71,0x10,0x69,0xa2,0x68,0x11,0x61,0x6e,0xa3,0x68,
+3,0x61,0x32,0x65,0x44,0x6f,0x52,0x72,0x10,0x73,0xa3,0xbd,1,0x6b,0x26,0x6d,
+0xa3,0x42,0x11,0x6d,0x61,0xa3,0x76,0x10,0x72,0x2c,0x13,0x6f,0x6b,0x65,0x65,0x2d,
+0x16,0x72,0x61,0x73,0x6d,0x69,0x61,0x6e,0xa3,0xbd,6,0x68,0x4a,0x68,0x48,0x6e,
+0x4e,0x72,0x76,0x76,1,0x65,0x2a,0x73,0x10,0x74,0xa3,0x75,0x13,0x73,0x74,0x61,
+0x6e,0xa3,0x75,0x11,0x6f,0x6d,0xa3,0xa1,0x11,0x61,0x74,0x1f,0x6f,0x6c,0x69,0x61,
+0x6e,0x68,0x69,0x65,0x72,0x6f,0x67,0x6c,0x79,0x70,0x68,0x73,0xa3,0x9c,1,0x61,
+0x3e,0x6d,2,0x65,0x2a,0x69,0xa3,0x74,0x6e,0x27,0x13,0x6e,0x69,0x61,0x6e,0x27,
+0x10,0x62,0x24,0x11,0x69,0x63,0x25,0x64,0x30,0x66,0x44,0x67,0x11,0x68,0x62,0xa3,
+0x9f,0x10,0x6c,1,0x61,0x26,0x6d,0xa3,0xa7,0x10,0x6d,0xa3,0xa7,0x11,0x61,0x6b,
+0xa3,0x93,6,0x6c,0x3c,0x6c,0x52,0x6f,0x56,0x72,0x66,0x75,1,0x67,0x30,0x68,
+1,0x64,0x79,0x69,0x10,0x64,0x79,0x10,0x69,0x8e,0x13,0x6e,0x65,0x73,0x65,0x8f,
+0x11,0x69,0x73,0xa1,0x11,0x70,0x6f,0x2a,0x13,0x6d,0x6f,0x66,0x6f,0x2b,0x10,0x61,
+1,0x68,0x2e,0x69,0x7c,0x12,0x6c,0x6c,0x65,0x7d,0xa2,0x41,0x11,0x6d,0x69,0xa3,
+0x41,0x61,0x48,0x65,0x9c,0x68,1,0x61,0x2a,0x6b,0x10,0x73,0xa3,0xa8,0x15,0x69,
+0x6b,0x73,0x75,0x6b,0x69,0xa3,0xa8,3,0x6c,0x3a,0x6d,0x48,0x73,0x54,0x74,1,
+0x61,0x24,0x6b,0x9f,0x10,0x6b,0x9f,0x10,0x69,0x9c,0x13,0x6e,0x65,0x73,0x65,0x9d,
+0x10,0x75,0xa2,0x82,0x10,0x6d,0xa3,0x82,0x10,0x73,0xa2,0x86,0x13,0x61,0x76,0x61,
+0x68,0xa3,0x86,0x11,0x6e,0x67,0x28,0x12,0x61,0x6c,0x69,0x29,3,0x6c,0x42,0x6e,
+0x90,0x74,0xa2,0x46,0x76,0x24,0x17,0x6f,0x77,0x65,0x6c,0x6a,0x61,0x6d,0x6f,0x25,
+0x22,1,0x65,0x54,0x76,0x28,1,0x73,0x38,0x74,0x2a,0x17,0x73,0x79,0x6c,0x6c,
+0x61,0x62,0x6c,0x65,0x2b,0x16,0x79,0x6c,0x6c,0x61,0x62,0x6c,0x65,0x29,0x18,0x61,
+0x64,0x69,0x6e,0x67,0x6a,0x61,0x6d,0x6f,0x23,1,0x61,0x21,0x6f,0x1a,0x74,0x61,
+0x70,0x70,0x6c,0x69,0x63,0x61,0x62,0x6c,0x65,0x21,0x26,0x1a,0x72,0x61,0x69,0x6c,
+0x69,0x6e,0x67,0x6a,0x61,0x6d,0x6f,0x27,1,0x6e,0x2c,0x79,0x22,0x11,0x65,0x73,
+0x23,0x20,0x10,0x6f,0x21,1,0x6e,0x2c,0x79,0x22,0x11,0x65,0x73,0x23,0x20,0x10,
+0x6f,0x21,2,0x6d,0x30,0x6e,0x3a,0x79,0x22,0x11,0x65,0x73,0x23,0x24,0x13,0x61,
+0x79,0x62,0x65,0x25,0x20,0x10,0x6f,0x21,2,0x6d,0x30,0x6e,0x3a,0x79,0x22,0x11,
+0x65,0x73,0x23,0x24,0x13,0x61,0x79,0x62,0x65,0x25,0x20,0x10,0x6f,0x21,0xb,0x72,
+0x39,0x76,0xc,0x76,0x33,0x78,0x2a,0x7a,0x11,0x77,0x6a,0x43,0x10,0x78,0x21,0x72,
+0x28,0x73,0x50,0x74,0x31,1,0x65,0x24,0x69,0x39,0x1e,0x67,0x69,0x6f,0x6e,0x61,
+0x6c,0x69,0x6e,0x64,0x69,0x63,0x61,0x74,0x6f,0x72,0x39,1,0x6d,0x35,0x70,0x18,
+0x61,0x63,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b,0x35,0x6c,0x1f,0x6c,0x3c,0x6f,0x4a,
+0x70,1,0x70,0x37,0x72,0x14,0x65,0x70,0x65,0x6e,0x64,0x37,0x28,1,0x66,0x2b,
+0x76,0x2c,0x10,0x74,0x2f,0x13,0x74,0x68,0x65,0x72,0x21,0x63,0x4c,0x65,0x64,0x67,
+1,0x61,0x3a,0x6c,0x19,0x75,0x65,0x61,0x66,0x74,0x65,0x72,0x7a,0x77,0x6a,0x41,
+0x10,0x7a,0x41,2,0x6e,0x23,0x6f,0x24,0x72,0x25,0x14,0x6e,0x74,0x72,0x6f,0x6c,
+0x23,2,0x62,0x34,0x6d,0x4e,0x78,0x26,0x13,0x74,0x65,0x6e,0x64,0x27,0x3a,1,
+0x61,0x24,0x67,0x3d,0x11,0x73,0x65,0x3a,0x12,0x67,0x61,0x7a,0x3d,0x3e,0x16,0x6f,
+0x64,0x69,0x66,0x69,0x65,0x72,0x3f,9,0x6e,0x4a,0x6e,0x34,0x6f,0x44,0x73,0x60,
+0x75,0x94,0x78,0x10,0x78,0x21,0x10,0x75,0x2a,0x14,0x6d,0x65,0x72,0x69,0x63,0x2b,
+1,0x6c,0x2c,0x74,0x12,0x68,0x65,0x72,0x21,0x14,0x65,0x74,0x74,0x65,0x72,0x2d,
+3,0x63,0x36,0x65,0x46,0x70,0x31,0x74,0x32,0x12,0x65,0x72,0x6d,0x33,0x3c,0x16,
+0x6f,0x6e,0x74,0x69,0x6e,0x75,0x65,0x3d,0x2e,0x10,0x70,0x2f,0x10,0x70,0x34,0x12,
+0x70,0x65,0x72,0x35,0x61,0x46,0x63,0x52,0x65,0x64,0x66,0x72,0x6c,2,0x65,0x2d,
+0x66,0x3b,0x6f,0x28,0x12,0x77,0x65,0x72,0x29,0x10,0x74,0x22,0x12,0x65,0x72,0x6d,
+0x23,1,0x6c,0x24,0x72,0x37,0x24,0x12,0x6f,0x73,0x65,0x25,0x10,0x78,0x38,0x13,
+0x74,0x65,0x6e,0x64,0x39,0x10,0x6f,0x26,0x13,0x72,0x6d,0x61,0x74,0x27,0,0x10,
+0x6c,0x88,0x72,0x40,0x72,0x36,0x73,0x5e,0x77,0x7a,0x78,0x8a,0x7a,0x11,0x77,0x6a,
+0x4b,1,0x65,0x24,0x69,0x3b,0x1e,0x67,0x69,0x6f,0x6e,0x61,0x6c,0x69,0x6e,0x64,
+0x69,0x63,0x61,0x74,0x6f,0x72,0x3b,1,0x69,0x24,0x71,0x3f,0x18,0x6e,0x67,0x6c,
+0x65,0x71,0x75,0x6f,0x74,0x65,0x3f,0x17,0x73,0x65,0x67,0x73,0x70,0x61,0x63,0x65,
+0x4d,0x10,0x78,0x21,0x6c,0x36,0x6d,0x3c,0x6e,0x76,0x6f,0x13,0x74,0x68,0x65,0x72,
+0x21,1,0x65,0x23,0x66,0x35,3,0x62,0x37,0x69,0x28,0x6c,0x29,0x6e,0x2b,0x10,
+0x64,1,0x6c,0x34,0x6e,0x11,0x75,0x6d,0x2a,0x12,0x6c,0x65,0x74,0x37,0x14,0x65,
+0x74,0x74,0x65,0x72,0x29,2,0x65,0x36,0x6c,0x39,0x75,0x2c,0x14,0x6d,0x65,0x72,
+0x69,0x63,0x2d,0x14,0x77,0x6c,0x69,0x6e,0x65,0x39,0x66,0x3f,0x66,0x40,0x67,0x4e,
+0x68,0x70,0x6b,0x10,0x61,0x26,0x15,0x74,0x61,0x6b,0x61,0x6e,0x61,0x27,0x10,0x6f,
+0x24,0x13,0x72,0x6d,0x61,0x74,0x25,1,0x61,0x3a,0x6c,0x19,0x75,0x65,0x61,0x66,
+0x74,0x65,0x72,0x7a,0x77,0x6a,0x49,0x10,0x7a,0x49,1,0x65,0x24,0x6c,0x3d,0x19,
+0x62,0x72,0x65,0x77,0x6c,0x65,0x74,0x74,0x65,0x72,0x3d,0x61,0x86,0x63,0x92,0x64,
+0x94,0x65,2,0x62,0x44,0x6d,0x5e,0x78,0x2e,0x13,0x74,0x65,0x6e,0x64,0x32,0x15,
+0x6e,0x75,0x6d,0x6c,0x65,0x74,0x2f,0x42,1,0x61,0x24,0x67,0x45,0x11,0x73,0x65,
+0x42,0x12,0x67,0x61,0x7a,0x45,0x46,0x16,0x6f,0x64,0x69,0x66,0x69,0x65,0x72,0x47,
+0x15,0x6c,0x65,0x74,0x74,0x65,0x72,0x23,0x10,0x72,0x31,1,0x6f,0x24,0x71,0x41,
+0x18,0x75,0x62,0x6c,0x65,0x71,0x75,0x6f,0x74,0x65,0x41,2,0x63,0x32,0x6e,0x3c,
+0x6f,0x22,0x12,0x70,0x65,0x6e,0x23,0x24,0x13,0x6c,0x6f,0x73,0x65,0x25,0x20,0x12,
+0x6f,0x6e,0x65,0x21,6,0x6f,0x65,0x6f,0x4a,0x72,0x5c,0x74,0x64,0x76,0x1d,0x69,
+0x73,0x75,0x61,0x6c,0x6f,0x72,0x64,0x65,0x72,0x6c,0x65,0x66,0x74,0x3d,0x18,0x76,
+0x65,0x72,0x73,0x74,0x72,0x75,0x63,0x6b,0x2d,0x13,0x69,0x67,0x68,0x74,0x2f,0x11,
+0x6f,0x70,0x30,0x12,0x61,0x6e,0x64,2,0x62,0x32,0x6c,0x62,0x72,0x13,0x69,0x67,
+0x68,0x74,0x3b,0x14,0x6f,0x74,0x74,0x6f,0x6d,0x32,0x12,0x61,0x6e,0x64,1,0x6c,
+0x2e,0x72,0x13,0x69,0x67,0x68,0x74,0x35,0x12,0x65,0x66,0x74,0x3f,0x12,0x65,0x66,
+0x74,0x36,0x17,0x61,0x6e,0x64,0x72,0x69,0x67,0x68,0x74,0x39,0x62,0x2c,0x6c,0x5c,
+0x6e,0x10,0x61,0x21,0x14,0x6f,0x74,0x74,0x6f,0x6d,0x22,0x12,0x61,0x6e,0x64,1,
+0x6c,0x2e,0x72,0x13,0x69,0x67,0x68,0x74,0x27,0x12,0x65,0x66,0x74,0x25,0x12,0x65,
+0x66,0x74,0x28,0x17,0x61,0x6e,0x64,0x72,0x69,0x67,0x68,0x74,0x2b,0xd,0x6e,0xaa,
+0x72,0x70,0x72,0x92,0x73,0xa2,0x46,0x74,0xa2,0x54,0x76,1,0x69,0x60,0x6f,0x12,
+0x77,0x65,0x6c,0x62,1,0x64,0x3a,0x69,0x19,0x6e,0x64,0x65,0x70,0x65,0x6e,0x64,
+0x65,0x6e,0x74,0x67,0x17,0x65,0x70,0x65,0x6e,0x64,0x65,0x6e,0x74,0x65,1,0x72,
+0x2e,0x73,0x13,0x61,0x72,0x67,0x61,0x61,0x12,0x61,0x6d,0x61,0x5f,0x1d,0x65,0x67,
+0x69,0x73,0x74,0x65,0x72,0x73,0x68,0x69,0x66,0x74,0x65,0x72,0x57,0x1e,0x79,0x6c,
+0x6c,0x61,0x62,0x6c,0x65,0x6d,0x6f,0x64,0x69,0x66,0x69,0x65,0x72,0x59,0x12,0x6f,
+0x6e,0x65,1,0x6c,0x2c,0x6d,0x12,0x61,0x72,0x6b,0x5d,0x14,0x65,0x74,0x74,0x65,
+0x72,0x5b,0x6e,0x3c,0x6f,0x7c,0x70,0x18,0x75,0x72,0x65,0x6b,0x69,0x6c,0x6c,0x65,
+0x72,0x55,1,0x6f,0x4c,0x75,1,0x6b,0x3c,0x6d,0x12,0x62,0x65,0x72,0x50,0x15,
+0x6a,0x6f,0x69,0x6e,0x65,0x72,0x53,0x11,0x74,0x61,0x4f,0x16,0x6e,0x6a,0x6f,0x69,
+0x6e,0x65,0x72,0x4d,0x13,0x74,0x68,0x65,0x72,0x21,0x67,0x3e,0x67,0x4a,0x69,0x64,
+0x6a,0x82,0x6d,0x1d,0x6f,0x64,0x69,0x66,0x79,0x69,0x6e,0x67,0x6c,0x65,0x74,0x74,
+0x65,0x72,0x4b,0x1c,0x65,0x6d,0x69,0x6e,0x61,0x74,0x69,0x6f,0x6e,0x6d,0x61,0x72,
+0x6b,0x45,0x1e,0x6e,0x76,0x69,0x73,0x69,0x62,0x6c,0x65,0x73,0x74,0x61,0x63,0x6b,
+0x65,0x72,0x47,0x14,0x6f,0x69,0x6e,0x65,0x72,0x49,0x61,0xa2,0xba,0x62,0xa2,0xc0,
+0x63,1,0x61,0xa2,0xa2,0x6f,0x16,0x6e,0x73,0x6f,0x6e,0x61,0x6e,0x74,0x2a,8,
+0x6b,0x67,0x6b,0x48,0x6d,0x52,0x70,0x5c,0x73,0xa2,0x42,0x77,0x19,0x69,0x74,0x68,
+0x73,0x74,0x61,0x63,0x6b,0x65,0x72,0x43,0x14,0x69,0x6c,0x6c,0x65,0x72,0x35,0x14,
+0x65,0x64,0x69,0x61,0x6c,0x37,1,0x6c,0x52,0x72,0x10,0x65,1,0x63,0x2e,0x66,
+0x13,0x69,0x78,0x65,0x64,0x3d,0x19,0x65,0x64,0x69,0x6e,0x67,0x72,0x65,0x70,0x68,
+0x61,0x3b,0x18,0x61,0x63,0x65,0x68,0x6f,0x6c,0x64,0x65,0x72,0x39,0x10,0x75,1,
+0x62,0x3e,0x63,0x1b,0x63,0x65,0x65,0x64,0x69,0x6e,0x67,0x72,0x65,0x70,0x68,0x61,
+0x41,0x15,0x6a,0x6f,0x69,0x6e,0x65,0x64,0x3f,0x64,0x4c,0x66,0x52,0x68,0x5a,0x69,
+0x1e,0x6e,0x69,0x74,0x69,0x61,0x6c,0x70,0x6f,0x73,0x74,0x66,0x69,0x78,0x65,0x64,
+0x33,0x12,0x65,0x61,0x64,0x2d,0x13,0x69,0x6e,0x61,0x6c,0x2f,0x18,0x65,0x61,0x64,
+0x6c,0x65,0x74,0x74,0x65,0x72,0x31,0x1d,0x6e,0x74,0x69,0x6c,0x6c,0x61,0x74,0x69,
+0x6f,0x6e,0x6d,0x61,0x72,0x6b,0x29,0x16,0x76,0x61,0x67,0x72,0x61,0x68,0x61,0x23,
+1,0x69,0x4a,0x72,0x10,0x61,0x1f,0x68,0x6d,0x69,0x6a,0x6f,0x69,0x6e,0x69,0x6e,
+0x67,0x6e,0x75,0x6d,0x62,0x65,0x72,0x27,0x12,0x6e,0x64,0x75,0x25,2,0x72,0x38,
+0x74,0x46,0x75,0x26,0x15,0x70,0x72,0x69,0x67,0x68,0x74,0x27,0x20,0x15,0x6f,0x74,
+0x61,0x74,0x65,0x64,0x21,1,0x72,0x24,0x75,0x25,0x22,0x18,0x61,0x6e,0x73,0x66,
+0x6f,0x72,0x6d,0x65,0x64,1,0x72,0x32,0x75,0x15,0x70,0x72,0x69,0x67,0x68,0x74,
+0x25,0x15,0x6f,0x74,0x61,0x74,0x65,0x64,0x23,0xd,0x6e,0xc1,0x86,0x73,0xa8,0x73,
+0x4c,0x74,0xa2,0x76,0x75,0xa2,0x83,0x7a,0xd8,0x70,0,2,0x6c,0xd9,0x20,0,
+0x70,0xd9,0x40,0,0x73,0xc3,0,0xfe,0xf,0,0,0,7,0x6f,0x3c,0x6f,
+0xff,8,0,0,0,0x70,0x3a,0x75,0x6e,0x79,0x13,0x6d,0x62,0x6f,0x6c,0xff,
+0xf,0,0,0,0x11,0x61,0x63,1,0x65,0x34,0x69,0x15,0x6e,0x67,0x6d,0x61,
+0x72,0x6b,0xa5,0,0x18,0x73,0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0xc3,0,
+0x16,0x72,0x72,0x6f,0x67,0x61,0x74,0x65,0xe1,0,0,0x63,0xff,2,0,0,
+0,0x65,0x38,0x6b,0xff,4,0,0,0,0x6d,0xff,1,0,0,0,0x16,
+0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0xd9,0x70,0,0x1d,0x69,0x74,0x6c,0x65,0x63,
+0x61,0x73,0x65,0x6c,0x65,0x74,0x74,0x65,0x72,0x31,1,0x6e,0x40,0x70,0x1c,0x70,
+0x65,0x72,0x63,0x61,0x73,0x65,0x6c,0x65,0x74,0x74,0x65,0x72,0x25,0x17,0x61,0x73,
+0x73,0x69,0x67,0x6e,0x65,0x64,0x23,0x6e,0xa2,0x69,0x6f,0xa2,0x89,0x70,0xfe,0x30,
+0xf8,0,0,9,0x69,0x33,0x69,0xff,0x10,0,0,0,0x6f,0xfd,0x80,0,
+0,0x72,0x54,0x73,0xf9,0,0,0x75,0x12,0x6e,0x63,0x74,0xfe,0x30,0xf8,0,
+0,0x15,0x75,0x61,0x74,0x69,0x6f,0x6e,0xff,0x30,0xf8,0,0,0x17,0x69,0x76,
+0x61,0x74,0x65,0x75,0x73,0x65,0xdd,0,0,0x61,0x48,0x63,0xfd,0x40,0,0,
+0x64,0xe9,0,0,0x65,0xfd,0x20,0,0,0x66,0xff,0x20,0,0,0,0x1f,
+0x72,0x61,0x67,0x72,0x61,0x70,0x68,0x73,0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,
+0xd9,0x40,0,0xbe,0,3,0x64,0xa7,0,0x6c,0xab,0,0x6f,0x30,0x75,0x13,
+0x6d,0x62,0x65,0x72,0xbf,0,0xb2,0,0x1b,0x6e,0x73,0x70,0x61,0x63,0x69,0x6e,
+0x67,0x6d,0x61,0x72,0x6b,0xa1,1,0x70,0x92,0x74,0x12,0x68,0x65,0x72,0xe6,0x80,
+1,3,0x6c,0x40,0x6e,0x4a,0x70,0x56,0x73,0x14,0x79,0x6d,0x62,0x6f,0x6c,0xff,
+8,0,0,0,0x14,0x65,0x74,0x74,0x65,0x72,0x61,0x14,0x75,0x6d,0x62,0x65,
+0x72,0xb3,0,0x19,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xfd,0x80,
+0,0,0x1c,0x65,0x6e,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,
+0xf9,0,0,0x66,0xc0,0xc4,0x66,0xa2,0x47,0x69,0xa2,0x64,0x6c,0xa2,0x79,0x6d,
+0xa4,0xc0,4,0x61,0x6c,0x63,0xa5,0,0x65,0xa3,0x80,0x6e,0xa1,0x6f,0x15,0x64,
+0x69,0x66,0x69,0x65,0x72,1,0x6c,0x38,0x73,0x14,0x79,0x6d,0x62,0x6f,0x6c,0xff,
+4,0,0,0,0x14,0x65,0x74,0x74,0x65,0x72,0x41,1,0x72,0x3c,0x74,0x16,
+0x68,0x73,0x79,0x6d,0x62,0x6f,0x6c,0xff,1,0,0,0,0x10,0x6b,0xa5,0xc0,
+1,0x69,0x32,0x6f,0x13,0x72,0x6d,0x61,0x74,0xdb,0,0,0x1d,0x6e,0x61,0x6c,
+0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xff,0x20,0,0,0,
+0x10,0x6e,0x1f,0x69,0x74,0x69,0x61,0x6c,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,
+0x69,0x6f,0x6e,0xff,0x10,0,0,0,0x9c,7,0x6d,0x18,0x6d,0x41,0x6f,0x28,
+0x74,0x31,0x75,0x25,0x60,0x1c,0x77,0x65,0x72,0x63,0x61,0x73,0x65,0x6c,0x65,0x74,
+0x74,0x65,0x72,0x29,0x63,0x3d,0x65,0x28,0x69,0x42,0x6c,0x29,0x13,0x74,0x74,0x65,
+0x72,0x9c,0x15,0x6e,0x75,0x6d,0x62,0x65,0x72,0xab,0,0x1a,0x6e,0x65,0x73,0x65,
+0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0xd9,0x20,0,0x63,0x46,0x64,0xa2,0x96,0x65,
+0x1b,0x6e,0x63,0x6c,0x6f,0x73,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b,0xa3,0x80,0xe6,
+0x80,1,7,0x6e,0x57,0x6e,0x52,0x6f,0x5e,0x73,0xe1,0,0,0x75,0x1b,0x72,
+0x72,0x65,0x6e,0x63,0x79,0x73,0x79,0x6d,0x62,0x6f,0x6c,0xff,2,0,0,0,
+0x22,0x12,0x74,0x72,0x6c,0xd9,0x80,0,0xdc,0,0,1,0x6d,0x62,0x6e,1,
+0x6e,0x30,0x74,0x12,0x72,0x6f,0x6c,0xd9,0x80,0,0x1f,0x65,0x63,0x74,0x6f,0x72,
+0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xfd,0x40,0,0,0x19,
+0x62,0x69,0x6e,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b,0xa5,0xc0,0x61,0x58,0x63,0xd9,
+0x80,0,0x66,0xdb,0,0,0x6c,0x1d,0x6f,0x73,0x65,0x70,0x75,0x6e,0x63,0x74,
+0x75,0x61,0x74,0x69,0x6f,0x6e,0xfd,0x20,0,0,0x18,0x73,0x65,0x64,0x6c,0x65,
+0x74,0x74,0x65,0x72,0x3d,2,0x61,0x32,0x65,0x50,0x69,0x12,0x67,0x69,0x74,0xa7,
+0,0x1c,0x73,0x68,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xe9,
+0,0,0x1a,0x63,0x69,0x6d,0x61,0x6c,0x6e,0x75,0x6d,0x62,0x65,0x72,0xa7,0
+};
+
+const char PropNameData::nameGroups[22098]={
+2,'A','l','p','h','a',0,'A','l','p','h','a','b','e','t','i','c',0,
+4,'N',0,'N','o',0,'F',0,'F','a','l','s','e',0,4,'Y',0,'Y','e','s',0,'T',0,'T','r','u','e',0,
+2,'N','R',0,'N','o','t','_','R','e','o','r','d','e','r','e','d',0,
+2,'O','V',0,'O','v','e','r','l','a','y',0,2,'H','A','N','R',0,'H','a','n','_','R','e','a','d','i','n','g',0,
+2,'N','K',0,'N','u','k','t','a',0,2,'K','V',0,'K','a','n','a','_','V','o','i','c','i','n','g',0,
+2,'V','R',0,'V','i','r','a','m','a',0,2,'C','C','C','1','0',0,'C','C','C','1','0',0,
+2,'C','C','C','1','1',0,'C','C','C','1','1',0,2,'C','C','C','1','2',0,'C','C','C','1','2',0,
+2,'C','C','C','1','3',0,'C','C','C','1','3',0,2,'C','C','C','1','4',0,'C','C','C','1','4',0,
+2,'C','C','C','1','5',0,'C','C','C','1','5',0,2,'C','C','C','1','6',0,'C','C','C','1','6',0,
+2,'C','C','C','1','7',0,'C','C','C','1','7',0,2,'C','C','C','1','8',0,'C','C','C','1','8',0,
+2,'C','C','C','1','9',0,'C','C','C','1','9',0,2,'C','C','C','2','0',0,'C','C','C','2','0',0,
+2,'C','C','C','2','1',0,'C','C','C','2','1',0,2,'C','C','C','2','2',0,'C','C','C','2','2',0,
+2,'C','C','C','2','3',0,'C','C','C','2','3',0,2,'C','C','C','2','4',0,'C','C','C','2','4',0,
+2,'C','C','C','2','5',0,'C','C','C','2','5',0,2,'C','C','C','2','6',0,'C','C','C','2','6',0,
+2,'C','C','C','2','7',0,'C','C','C','2','7',0,2,'C','C','C','2','8',0,'C','C','C','2','8',0,
+2,'C','C','C','2','9',0,'C','C','C','2','9',0,2,'C','C','C','3','0',0,'C','C','C','3','0',0,
+2,'C','C','C','3','1',0,'C','C','C','3','1',0,2,'C','C','C','3','2',0,'C','C','C','3','2',0,
+2,'C','C','C','3','3',0,'C','C','C','3','3',0,2,'C','C','C','3','4',0,'C','C','C','3','4',0,
+2,'C','C','C','3','5',0,'C','C','C','3','5',0,2,'C','C','C','3','6',0,'C','C','C','3','6',0,
+2,'C','C','C','8','4',0,'C','C','C','8','4',0,2,'C','C','C','9','1',0,'C','C','C','9','1',0,
+2,'C','C','C','1','0','3',0,'C','C','C','1','0','3',0,2,'C','C','C','1','0','7',0,'C','C','C','1','0','7',0,
+2,'C','C','C','1','1','8',0,'C','C','C','1','1','8',0,2,'C','C','C','1','2','2',0,'C','C','C','1','2','2',0,
+2,'C','C','C','1','2','9',0,'C','C','C','1','2','9',0,2,'C','C','C','1','3','0',0,'C','C','C','1','3','0',0,
+2,'C','C','C','1','3','2',0,'C','C','C','1','3','2',0,2,'C','C','C','1','3','3',0,'C','C','C','1','3','3',0,
+2,'A','T','B','L',0,'A','t','t','a','c','h','e','d','_','B','e','l','o','w','_','L','e','f','t',0,
+2,'A','T','B',0,'A','t','t','a','c','h','e','d','_','B','e','l','o','w',0,
+2,'A','T','A',0,'A','t','t','a','c','h','e','d','_','A','b','o','v','e',0,
+2,'A','T','A','R',0,'A','t','t','a','c','h','e','d','_','A','b','o','v','e','_','R','i','g','h','t',0,
+2,'B','L',0,'B','e','l','o','w','_','L','e','f','t',0,2,'B',0,'B','e','l','o','w',0,
+2,'B','R',0,'B','e','l','o','w','_','R','i','g','h','t',0,
+2,'L',0,'L','e','f','t',0,2,'R',0,'R','i','g','h','t',0,
+2,'A','L',0,'A','b','o','v','e','_','L','e','f','t',0,2,'A',0,'A','b','o','v','e',0,
+2,'A','R',0,'A','b','o','v','e','_','R','i','g','h','t',0,
+2,'D','B',0,'D','o','u','b','l','e','_','B','e','l','o','w',0,
+2,'D','A',0,'D','o','u','b','l','e','_','A','b','o','v','e',0,
+2,'I','S',0,'I','o','t','a','_','S','u','b','s','c','r','i','p','t',0,
+2,'A','H','e','x',0,'A','S','C','I','I','_','H','e','x','_','D','i','g','i','t',0,
+2,'B','i','d','i','_','C',0,'B','i','d','i','_','C','o','n','t','r','o','l',0,
+2,'B','i','d','i','_','M',0,'B','i','d','i','_','M','i','r','r','o','r','e','d',0,
+2,'D','a','s','h',0,'D','a','s','h',0,2,'D','I',0,'D','e','f','a','u','l','t','_','I','g','n','o','r','a','b','l','e',
+'_','C','o','d','e','_','P','o','i','n','t',0,2,'D','e','p',0,'D','e','p','r','e','c','a','t','e','d',0,
+2,'D','i','a',0,'D','i','a','c','r','i','t','i','c',0,2,'E','x','t',0,'E','x','t','e','n','d','e','r',0,
+2,'C','o','m','p','_','E','x',0,'F','u','l','l','_','C','o','m','p','o','s','i','t','i','o','n','_','E','x','c','l','u','s',
+'i','o','n',0,2,'G','r','_','B','a','s','e',0,'G','r','a','p','h','e','m','e','_','B','a','s','e',0,
+2,'G','r','_','E','x','t',0,'G','r','a','p','h','e','m','e','_','E','x','t','e','n','d',0,
+2,'G','r','_','L','i','n','k',0,'G','r','a','p','h','e','m','e','_','L','i','n','k',0,
+2,'H','e','x',0,'H','e','x','_','D','i','g','i','t',0,2,'H','y','p','h','e','n',0,'H','y','p','h','e','n',0,
+2,'I','D','C',0,'I','D','_','C','o','n','t','i','n','u','e',0,
+2,'I','D','S',0,'I','D','_','S','t','a','r','t',0,2,'I','d','e','o',0,'I','d','e','o','g','r','a','p','h','i','c',0,
+2,'I','D','S','B',0,'I','D','S','_','B','i','n','a','r','y','_','O','p','e','r','a','t','o','r',0,
+2,'I','D','S','T',0,'I','D','S','_','T','r','i','n','a','r','y','_','O','p','e','r','a','t','o','r',0,
+2,'J','o','i','n','_','C',0,'J','o','i','n','_','C','o','n','t','r','o','l',0,
+2,'L','O','E',0,'L','o','g','i','c','a','l','_','O','r','d','e','r','_','E','x','c','e','p','t','i','o','n',0,
+2,'L','o','w','e','r',0,'L','o','w','e','r','c','a','s','e',0,
+2,'M','a','t','h',0,'M','a','t','h',0,2,'N','C','h','a','r',0,'N','o','n','c','h','a','r','a','c','t','e','r','_','C',
+'o','d','e','_','P','o','i','n','t',0,2,'Q','M','a','r','k',0,'Q','u','o','t','a','t','i','o','n','_','M','a','r','k',0,
+2,'R','a','d','i','c','a','l',0,'R','a','d','i','c','a','l',0,
+2,'S','D',0,'S','o','f','t','_','D','o','t','t','e','d',0,
+2,'T','e','r','m',0,'T','e','r','m','i','n','a','l','_','P','u','n','c','t','u','a','t','i','o','n',0,
+2,'U','I','d','e','o',0,'U','n','i','f','i','e','d','_','I','d','e','o','g','r','a','p','h',0,
+2,'U','p','p','e','r',0,'U','p','p','e','r','c','a','s','e',0,
+3,'W','S','p','a','c','e',0,'W','h','i','t','e','_','S','p','a','c','e',0,'s','p','a','c','e',0,
+2,'X','I','D','C',0,'X','I','D','_','C','o','n','t','i','n','u','e',0,
+2,'X','I','D','S',0,'X','I','D','_','S','t','a','r','t',0,
+2,'S','e','n','s','i','t','i','v','e',0,'C','a','s','e','_','S','e','n','s','i','t','i','v','e',0,
+2,'S','T','e','r','m',0,'S','e','n','t','e','n','c','e','_','T','e','r','m','i','n','a','l',0,
+2,'V','S',0,'V','a','r','i','a','t','i','o','n','_','S','e','l','e','c','t','o','r',0,
+2,'n','f','d','i','n','e','r','t',0,'N','F','D','_','I','n','e','r','t',0,
+2,'n','f','k','d','i','n','e','r','t',0,'N','F','K','D','_','I','n','e','r','t',0,
+2,'n','f','c','i','n','e','r','t',0,'N','F','C','_','I','n','e','r','t',0,
+2,'n','f','k','c','i','n','e','r','t',0,'N','F','K','C','_','I','n','e','r','t',0,
+2,'s','e','g','s','t','a','r','t',0,'S','e','g','m','e','n','t','_','S','t','a','r','t','e','r',0,
+2,'P','a','t','_','S','y','n',0,'P','a','t','t','e','r','n','_','S','y','n','t','a','x',0,
+2,'P','a','t','_','W','S',0,'P','a','t','t','e','r','n','_','W','h','i','t','e','_','S','p','a','c','e',0,
+2,0,'a','l','n','u','m',0,2,0,'b','l','a','n','k',0,
+2,0,'g','r','a','p','h',0,2,0,'p','r','i','n','t',0,
+2,0,'x','d','i','g','i','t',0,2,'C','a','s','e','d',0,'C','a','s','e','d',0,
+2,'C','I',0,'C','a','s','e','_','I','g','n','o','r','a','b','l','e',0,
+2,'C','W','L',0,'C','h','a','n','g','e','s','_','W','h','e','n','_','L','o','w','e','r','c','a','s','e','d',0,
+2,'C','W','U',0,'C','h','a','n','g','e','s','_','W','h','e','n','_','U','p','p','e','r','c','a','s','e','d',0,
+2,'C','W','T',0,'C','h','a','n','g','e','s','_','W','h','e','n','_','T','i','t','l','e','c','a','s','e','d',0,
+2,'C','W','C','F',0,'C','h','a','n','g','e','s','_','W','h','e','n','_','C','a','s','e','f','o','l','d','e','d',0,
+2,'C','W','C','M',0,'C','h','a','n','g','e','s','_','W','h','e','n','_','C','a','s','e','m','a','p','p','e','d',0,
+2,'C','W','K','C','F',0,'C','h','a','n','g','e','s','_','W','h','e','n','_','N','F','K','C','_','C','a','s','e','f','o','l',
+'d','e','d',0,2,'E','m','o','j','i',0,'E','m','o','j','i',0,
+2,'E','P','r','e','s',0,'E','m','o','j','i','_','P','r','e','s','e','n','t','a','t','i','o','n',0,
+2,'E','M','o','d',0,'E','m','o','j','i','_','M','o','d','i','f','i','e','r',0,
+2,'E','B','a','s','e',0,'E','m','o','j','i','_','M','o','d','i','f','i','e','r','_','B','a','s','e',0,
+2,'E','C','o','m','p',0,'E','m','o','j','i','_','C','o','m','p','o','n','e','n','t',0,
+2,'R','I',0,'R','e','g','i','o','n','a','l','_','I','n','d','i','c','a','t','o','r',0,
+2,'P','C','M',0,'P','r','e','p','e','n','d','e','d','_','C','o','n','c','a','t','e','n','a','t','i','o','n','_','M','a','r',
+'k',0,2,'E','x','t','P','i','c','t',0,'E','x','t','e','n','d','e','d','_','P','i','c','t','o','g','r','a','p','h','i','c',
+0,2,'b','c',0,'B','i','d','i','_','C','l','a','s','s',0,
+2,'L',0,'L','e','f','t','_','T','o','_','R','i','g','h','t',0,
+2,'R',0,'R','i','g','h','t','_','T','o','_','L','e','f','t',0,
+2,'E','N',0,'E','u','r','o','p','e','a','n','_','N','u','m','b','e','r',0,
+2,'E','S',0,'E','u','r','o','p','e','a','n','_','S','e','p','a','r','a','t','o','r',0,
+2,'E','T',0,'E','u','r','o','p','e','a','n','_','T','e','r','m','i','n','a','t','o','r',0,
+2,'A','N',0,'A','r','a','b','i','c','_','N','u','m','b','e','r',0,
+2,'C','S',0,'C','o','m','m','o','n','_','S','e','p','a','r','a','t','o','r',0,
+2,'B',0,'P','a','r','a','g','r','a','p','h','_','S','e','p','a','r','a','t','o','r',0,
+2,'S',0,'S','e','g','m','e','n','t','_','S','e','p','a','r','a','t','o','r',0,
+2,'W','S',0,'W','h','i','t','e','_','S','p','a','c','e',0,
+2,'O','N',0,'O','t','h','e','r','_','N','e','u','t','r','a','l',0,
+2,'L','R','E',0,'L','e','f','t','_','T','o','_','R','i','g','h','t','_','E','m','b','e','d','d','i','n','g',0,
+2,'L','R','O',0,'L','e','f','t','_','T','o','_','R','i','g','h','t','_','O','v','e','r','r','i','d','e',0,
+2,'A','L',0,'A','r','a','b','i','c','_','L','e','t','t','e','r',0,
+2,'R','L','E',0,'R','i','g','h','t','_','T','o','_','L','e','f','t','_','E','m','b','e','d','d','i','n','g',0,
+2,'R','L','O',0,'R','i','g','h','t','_','T','o','_','L','e','f','t','_','O','v','e','r','r','i','d','e',0,
+2,'P','D','F',0,'P','o','p','_','D','i','r','e','c','t','i','o','n','a','l','_','F','o','r','m','a','t',0,
+2,'N','S','M',0,'N','o','n','s','p','a','c','i','n','g','_','M','a','r','k',0,
+2,'B','N',0,'B','o','u','n','d','a','r','y','_','N','e','u','t','r','a','l',0,
+2,'F','S','I',0,'F','i','r','s','t','_','S','t','r','o','n','g','_','I','s','o','l','a','t','e',0,
+2,'L','R','I',0,'L','e','f','t','_','T','o','_','R','i','g','h','t','_','I','s','o','l','a','t','e',0,
+2,'R','L','I',0,'R','i','g','h','t','_','T','o','_','L','e','f','t','_','I','s','o','l','a','t','e',0,
+2,'P','D','I',0,'P','o','p','_','D','i','r','e','c','t','i','o','n','a','l','_','I','s','o','l','a','t','e',0,
+2,'b','l','k',0,'B','l','o','c','k',0,2,'N','B',0,'N','o','_','B','l','o','c','k',0,
+2,'A','S','C','I','I',0,'B','a','s','i','c','_','L','a','t','i','n',0,
+3,'L','a','t','i','n','_','1','_','S','u','p',0,'L','a','t','i','n','_','1','_','S','u','p','p','l','e','m','e','n','t',0,
+'L','a','t','i','n','_','1',0,2,'L','a','t','i','n','_','E','x','t','_','A',0,'L','a','t','i','n','_','E','x','t','e','n',
+'d','e','d','_','A',0,2,'L','a','t','i','n','_','E','x','t','_','B',0,'L','a','t','i','n','_','E','x','t','e','n','d','e',
+'d','_','B',0,2,'I','P','A','_','E','x','t',0,'I','P','A','_','E','x','t','e','n','s','i','o','n','s',0,
+2,'M','o','d','i','f','i','e','r','_','L','e','t','t','e','r','s',0,'S','p','a','c','i','n','g','_','M','o','d','i','f','i',
+'e','r','_','L','e','t','t','e','r','s',0,2,'D','i','a','c','r','i','t','i','c','a','l','s',0,
+'C','o','m','b','i','n','i','n','g','_','D','i','a','c','r','i','t','i','c','a','l','_','M','a','r','k','s',0,
+2,'G','r','e','e','k',0,'G','r','e','e','k','_','A','n','d','_','C','o','p','t','i','c',0,
+2,'C','y','r','i','l','l','i','c',0,'C','y','r','i','l','l','i','c',0,
+2,'A','r','m','e','n','i','a','n',0,'A','r','m','e','n','i','a','n',0,
+2,'H','e','b','r','e','w',0,'H','e','b','r','e','w',0,2,'A','r','a','b','i','c',0,'A','r','a','b','i','c',0,
+2,'S','y','r','i','a','c',0,'S','y','r','i','a','c',0,2,'T','h','a','a','n','a',0,'T','h','a','a','n','a',0,
+2,'D','e','v','a','n','a','g','a','r','i',0,'D','e','v','a','n','a','g','a','r','i',0,
+2,'B','e','n','g','a','l','i',0,'B','e','n','g','a','l','i',0,
+2,'G','u','r','m','u','k','h','i',0,'G','u','r','m','u','k','h','i',0,
+2,'G','u','j','a','r','a','t','i',0,'G','u','j','a','r','a','t','i',0,
+2,'O','r','i','y','a',0,'O','r','i','y','a',0,2,'T','a','m','i','l',0,'T','a','m','i','l',0,
+2,'T','e','l','u','g','u',0,'T','e','l','u','g','u',0,2,'K','a','n','n','a','d','a',0,
+'K','a','n','n','a','d','a',0,2,'M','a','l','a','y','a','l','a','m',0,'M','a','l','a','y','a','l','a','m',0,
+2,'S','i','n','h','a','l','a',0,'S','i','n','h','a','l','a',0,
+2,'T','h','a','i',0,'T','h','a','i',0,2,'L','a','o',0,'L','a','o',0,
+2,'T','i','b','e','t','a','n',0,'T','i','b','e','t','a','n',0,
+2,'M','y','a','n','m','a','r',0,'M','y','a','n','m','a','r',0,
+2,'G','e','o','r','g','i','a','n',0,'G','e','o','r','g','i','a','n',0,
+2,'J','a','m','o',0,'H','a','n','g','u','l','_','J','a','m','o',0,
+2,'E','t','h','i','o','p','i','c',0,'E','t','h','i','o','p','i','c',0,
+2,'C','h','e','r','o','k','e','e',0,'C','h','e','r','o','k','e','e',0,
+3,'U','C','A','S',0,'U','n','i','f','i','e','d','_','C','a','n','a','d','i','a','n','_','A','b','o','r','i','g','i','n','a',
+'l','_','S','y','l','l','a','b','i','c','s',0,'C','a','n','a','d','i','a','n','_','S','y','l','l','a','b','i','c','s',0,
+2,'O','g','h','a','m',0,'O','g','h','a','m',0,2,'R','u','n','i','c',0,'R','u','n','i','c',0,
+2,'K','h','m','e','r',0,'K','h','m','e','r',0,2,'M','o','n','g','o','l','i','a','n',0,
+'M','o','n','g','o','l','i','a','n',0,2,'L','a','t','i','n','_','E','x','t','_','A','d','d','i','t','i','o','n','a','l',0,
+'L','a','t','i','n','_','E','x','t','e','n','d','e','d','_','A','d','d','i','t','i','o','n','a','l',0,
+2,'G','r','e','e','k','_','E','x','t',0,'G','r','e','e','k','_','E','x','t','e','n','d','e','d',0,
+2,'P','u','n','c','t','u','a','t','i','o','n',0,'G','e','n','e','r','a','l','_','P','u','n','c','t','u','a','t','i','o','n',
+0,2,'S','u','p','e','r','_','A','n','d','_','S','u','b',0,'S','u','p','e','r','s','c','r','i','p','t','s','_','A','n','d',
+'_','S','u','b','s','c','r','i','p','t','s',0,2,'C','u','r','r','e','n','c','y','_','S','y','m','b','o','l','s',0,
+'C','u','r','r','e','n','c','y','_','S','y','m','b','o','l','s',0,
+3,'D','i','a','c','r','i','t','i','c','a','l','s','_','F','o','r','_','S','y','m','b','o','l','s',0,
+'C','o','m','b','i','n','i','n','g','_','D','i','a','c','r','i','t','i','c','a','l','_','M','a','r','k','s','_','F','o','r','_',
+'S','y','m','b','o','l','s',0,'C','o','m','b','i','n','i','n','g','_','M','a','r','k','s','_','F','o','r','_','S','y','m','b',
+'o','l','s',0,2,'L','e','t','t','e','r','l','i','k','e','_','S','y','m','b','o','l','s',0,
+'L','e','t','t','e','r','l','i','k','e','_','S','y','m','b','o','l','s',0,
+2,'N','u','m','b','e','r','_','F','o','r','m','s',0,'N','u','m','b','e','r','_','F','o','r','m','s',0,
+2,'A','r','r','o','w','s',0,'A','r','r','o','w','s',0,2,'M','a','t','h','_','O','p','e','r','a','t','o','r','s',0,
+'M','a','t','h','e','m','a','t','i','c','a','l','_','O','p','e','r','a','t','o','r','s',0,
+2,'M','i','s','c','_','T','e','c','h','n','i','c','a','l',0,'M','i','s','c','e','l','l','a','n','e','o','u','s','_','T','e',
+'c','h','n','i','c','a','l',0,2,'C','o','n','t','r','o','l','_','P','i','c','t','u','r','e','s',0,
+'C','o','n','t','r','o','l','_','P','i','c','t','u','r','e','s',0,
+2,'O','C','R',0,'O','p','t','i','c','a','l','_','C','h','a','r','a','c','t','e','r','_','R','e','c','o','g','n','i','t','i',
+'o','n',0,2,'E','n','c','l','o','s','e','d','_','A','l','p','h','a','n','u','m',0,'E','n','c','l','o','s','e','d','_','A',
+'l','p','h','a','n','u','m','e','r','i','c','s',0,2,'B','o','x','_','D','r','a','w','i','n','g',0,
+'B','o','x','_','D','r','a','w','i','n','g',0,2,'B','l','o','c','k','_','E','l','e','m','e','n','t','s',0,
+'B','l','o','c','k','_','E','l','e','m','e','n','t','s',0,2,'G','e','o','m','e','t','r','i','c','_','S','h','a','p','e','s',
+0,'G','e','o','m','e','t','r','i','c','_','S','h','a','p','e','s',0,
+2,'M','i','s','c','_','S','y','m','b','o','l','s',0,'M','i','s','c','e','l','l','a','n','e','o','u','s','_','S','y','m','b',
+'o','l','s',0,2,'D','i','n','g','b','a','t','s',0,'D','i','n','g','b','a','t','s',0,
+2,'B','r','a','i','l','l','e',0,'B','r','a','i','l','l','e','_','P','a','t','t','e','r','n','s',0,
+2,'C','J','K','_','R','a','d','i','c','a','l','s','_','S','u','p',0,'C','J','K','_','R','a','d','i','c','a','l','s','_','S',
+'u','p','p','l','e','m','e','n','t',0,2,'K','a','n','g','x','i',0,'K','a','n','g','x','i','_','R','a','d','i','c','a','l',
+'s',0,2,'I','D','C',0,'I','d','e','o','g','r','a','p','h','i','c','_','D','e','s','c','r','i','p','t','i','o','n','_','C',
+'h','a','r','a','c','t','e','r','s',0,2,'C','J','K','_','S','y','m','b','o','l','s',0,'C','J','K','_','S','y','m','b','o',
+'l','s','_','A','n','d','_','P','u','n','c','t','u','a','t','i','o','n',0,
+2,'H','i','r','a','g','a','n','a',0,'H','i','r','a','g','a','n','a',0,
+2,'K','a','t','a','k','a','n','a',0,'K','a','t','a','k','a','n','a',0,
+2,'B','o','p','o','m','o','f','o',0,'B','o','p','o','m','o','f','o',0,
+2,'C','o','m','p','a','t','_','J','a','m','o',0,'H','a','n','g','u','l','_','C','o','m','p','a','t','i','b','i','l','i','t',
+'y','_','J','a','m','o',0,2,'K','a','n','b','u','n',0,'K','a','n','b','u','n',0,
+2,'B','o','p','o','m','o','f','o','_','E','x','t',0,'B','o','p','o','m','o','f','o','_','E','x','t','e','n','d','e','d',0,
+2,'E','n','c','l','o','s','e','d','_','C','J','K',0,'E','n','c','l','o','s','e','d','_','C','J','K','_','L','e','t','t','e',
+'r','s','_','A','n','d','_','M','o','n','t','h','s',0,2,'C','J','K','_','C','o','m','p','a','t',0,
+'C','J','K','_','C','o','m','p','a','t','i','b','i','l','i','t','y',0,
+2,'C','J','K','_','E','x','t','_','A',0,'C','J','K','_','U','n','i','f','i','e','d','_','I','d','e','o','g','r','a','p','h',
+'s','_','E','x','t','e','n','s','i','o','n','_','A',0,2,'C','J','K',0,'C','J','K','_','U','n','i','f','i','e','d','_','I',
+'d','e','o','g','r','a','p','h','s',0,2,'Y','i','_','S','y','l','l','a','b','l','e','s',0,
+'Y','i','_','S','y','l','l','a','b','l','e','s',0,2,'Y','i','_','R','a','d','i','c','a','l','s',0,
+'Y','i','_','R','a','d','i','c','a','l','s',0,2,'H','a','n','g','u','l',0,'H','a','n','g','u','l','_','S','y','l','l','a',
+'b','l','e','s',0,2,'H','i','g','h','_','S','u','r','r','o','g','a','t','e','s',0,'H','i','g','h','_','S','u','r','r','o',
+'g','a','t','e','s',0,2,'H','i','g','h','_','P','U','_','S','u','r','r','o','g','a','t','e','s',0,
+'H','i','g','h','_','P','r','i','v','a','t','e','_','U','s','e','_','S','u','r','r','o','g','a','t','e','s',0,
+2,'L','o','w','_','S','u','r','r','o','g','a','t','e','s',0,'L','o','w','_','S','u','r','r','o','g','a','t','e','s',0,
+3,'P','U','A',0,'P','r','i','v','a','t','e','_','U','s','e','_','A','r','e','a',0,'P','r','i','v','a','t','e','_','U','s',
+'e',0,2,'C','J','K','_','C','o','m','p','a','t','_','I','d','e','o','g','r','a','p','h','s',0,
+'C','J','K','_','C','o','m','p','a','t','i','b','i','l','i','t','y','_','I','d','e','o','g','r','a','p','h','s',0,
+2,'A','l','p','h','a','b','e','t','i','c','_','P','F',0,'A','l','p','h','a','b','e','t','i','c','_','P','r','e','s','e','n',
+'t','a','t','i','o','n','_','F','o','r','m','s',0,3,'A','r','a','b','i','c','_','P','F','_','A',0,
+'A','r','a','b','i','c','_','P','r','e','s','e','n','t','a','t','i','o','n','_','F','o','r','m','s','_','A',0,
+'A','r','a','b','i','c','_','P','r','e','s','e','n','t','a','t','i','o','n','_','F','o','r','m','s','-','A',0,
+2,'H','a','l','f','_','M','a','r','k','s',0,'C','o','m','b','i','n','i','n','g','_','H','a','l','f','_','M','a','r','k','s',
+0,2,'C','J','K','_','C','o','m','p','a','t','_','F','o','r','m','s',0,'C','J','K','_','C','o','m','p','a','t','i','b','i',
+'l','i','t','y','_','F','o','r','m','s',0,2,'S','m','a','l','l','_','F','o','r','m','s',0,
+'S','m','a','l','l','_','F','o','r','m','_','V','a','r','i','a','n','t','s',0,
+2,'A','r','a','b','i','c','_','P','F','_','B',0,'A','r','a','b','i','c','_','P','r','e','s','e','n','t','a','t','i','o','n',
+'_','F','o','r','m','s','_','B',0,2,'S','p','e','c','i','a','l','s',0,'S','p','e','c','i','a','l','s',0,
+2,'H','a','l','f','_','A','n','d','_','F','u','l','l','_','F','o','r','m','s',0,'H','a','l','f','w','i','d','t','h','_','A',
+'n','d','_','F','u','l','l','w','i','d','t','h','_','F','o','r','m','s',0,
+2,'O','l','d','_','I','t','a','l','i','c',0,'O','l','d','_','I','t','a','l','i','c',0,
+2,'G','o','t','h','i','c',0,'G','o','t','h','i','c',0,2,'D','e','s','e','r','e','t',0,
+'D','e','s','e','r','e','t',0,2,'B','y','z','a','n','t','i','n','e','_','M','u','s','i','c',0,
+'B','y','z','a','n','t','i','n','e','_','M','u','s','i','c','a','l','_','S','y','m','b','o','l','s',0,
+2,'M','u','s','i','c',0,'M','u','s','i','c','a','l','_','S','y','m','b','o','l','s',0,
+2,'M','a','t','h','_','A','l','p','h','a','n','u','m',0,'M','a','t','h','e','m','a','t','i','c','a','l','_','A','l','p','h',
+'a','n','u','m','e','r','i','c','_','S','y','m','b','o','l','s',0,
+2,'C','J','K','_','E','x','t','_','B',0,'C','J','K','_','U','n','i','f','i','e','d','_','I','d','e','o','g','r','a','p','h',
+'s','_','E','x','t','e','n','s','i','o','n','_','B',0,2,'C','J','K','_','C','o','m','p','a','t','_','I','d','e','o','g','r',
+'a','p','h','s','_','S','u','p',0,'C','J','K','_','C','o','m','p','a','t','i','b','i','l','i','t','y','_','I','d','e','o','g',
+'r','a','p','h','s','_','S','u','p','p','l','e','m','e','n','t',0,
+2,'T','a','g','s',0,'T','a','g','s',0,3,'C','y','r','i','l','l','i','c','_','S','u','p',0,
+'C','y','r','i','l','l','i','c','_','S','u','p','p','l','e','m','e','n','t',0,'C','y','r','i','l','l','i','c','_','S','u','p',
+'p','l','e','m','e','n','t','a','r','y',0,2,'T','a','g','a','l','o','g',0,'T','a','g','a','l','o','g',0,
+2,'H','a','n','u','n','o','o',0,'H','a','n','u','n','o','o',0,
+2,'B','u','h','i','d',0,'B','u','h','i','d',0,2,'T','a','g','b','a','n','w','a',0,'T','a','g','b','a','n','w','a',0,
+2,'M','i','s','c','_','M','a','t','h','_','S','y','m','b','o','l','s','_','A',0,'M','i','s','c','e','l','l','a','n','e','o',
+'u','s','_','M','a','t','h','e','m','a','t','i','c','a','l','_','S','y','m','b','o','l','s','_','A',0,
+2,'S','u','p','_','A','r','r','o','w','s','_','A',0,'S','u','p','p','l','e','m','e','n','t','a','l','_','A','r','r','o','w',
+'s','_','A',0,2,'S','u','p','_','A','r','r','o','w','s','_','B',0,'S','u','p','p','l','e','m','e','n','t','a','l','_','A',
+'r','r','o','w','s','_','B',0,2,'M','i','s','c','_','M','a','t','h','_','S','y','m','b','o','l','s','_','B',0,
+'M','i','s','c','e','l','l','a','n','e','o','u','s','_','M','a','t','h','e','m','a','t','i','c','a','l','_','S','y','m','b','o',
+'l','s','_','B',0,2,'S','u','p','_','M','a','t','h','_','O','p','e','r','a','t','o','r','s',0,
+'S','u','p','p','l','e','m','e','n','t','a','l','_','M','a','t','h','e','m','a','t','i','c','a','l','_','O','p','e','r','a','t',
+'o','r','s',0,2,'K','a','t','a','k','a','n','a','_','E','x','t',0,'K','a','t','a','k','a','n','a','_','P','h','o','n','e',
+'t','i','c','_','E','x','t','e','n','s','i','o','n','s',0,2,'V','S',0,'V','a','r','i','a','t','i','o','n','_','S','e','l',
+'e','c','t','o','r','s',0,2,'S','u','p','_','P','U','A','_','A',0,'S','u','p','p','l','e','m','e','n','t','a','r','y','_',
+'P','r','i','v','a','t','e','_','U','s','e','_','A','r','e','a','_','A',0,
+2,'S','u','p','_','P','U','A','_','B',0,'S','u','p','p','l','e','m','e','n','t','a','r','y','_','P','r','i','v','a','t','e',
+'_','U','s','e','_','A','r','e','a','_','B',0,2,'L','i','m','b','u',0,'L','i','m','b','u',0,
+2,'T','a','i','_','L','e',0,'T','a','i','_','L','e',0,2,'K','h','m','e','r','_','S','y','m','b','o','l','s',0,
+'K','h','m','e','r','_','S','y','m','b','o','l','s',0,2,'P','h','o','n','e','t','i','c','_','E','x','t',0,
+'P','h','o','n','e','t','i','c','_','E','x','t','e','n','s','i','o','n','s',0,
+2,'M','i','s','c','_','A','r','r','o','w','s',0,'M','i','s','c','e','l','l','a','n','e','o','u','s','_','S','y','m','b','o',
+'l','s','_','A','n','d','_','A','r','r','o','w','s',0,2,'Y','i','j','i','n','g',0,'Y','i','j','i','n','g','_','H','e','x',
+'a','g','r','a','m','_','S','y','m','b','o','l','s',0,2,'L','i','n','e','a','r','_','B','_','S','y','l','l','a','b','a','r',
+'y',0,'L','i','n','e','a','r','_','B','_','S','y','l','l','a','b','a','r','y',0,
+2,'L','i','n','e','a','r','_','B','_','I','d','e','o','g','r','a','m','s',0,'L','i','n','e','a','r','_','B','_','I','d','e',
+'o','g','r','a','m','s',0,2,'A','e','g','e','a','n','_','N','u','m','b','e','r','s',0,'A','e','g','e','a','n','_','N','u',
+'m','b','e','r','s',0,2,'U','g','a','r','i','t','i','c',0,'U','g','a','r','i','t','i','c',0,
+2,'S','h','a','v','i','a','n',0,'S','h','a','v','i','a','n',0,
+2,'O','s','m','a','n','y','a',0,'O','s','m','a','n','y','a',0,
+2,'C','y','p','r','i','o','t','_','S','y','l','l','a','b','a','r','y',0,'C','y','p','r','i','o','t','_','S','y','l','l','a',
+'b','a','r','y',0,2,'T','a','i','_','X','u','a','n','_','J','i','n','g',0,'T','a','i','_','X','u','a','n','_','J','i','n',
+'g','_','S','y','m','b','o','l','s',0,2,'V','S','_','S','u','p',0,'V','a','r','i','a','t','i','o','n','_','S','e','l','e',
+'c','t','o','r','s','_','S','u','p','p','l','e','m','e','n','t',0,
+2,'A','n','c','i','e','n','t','_','G','r','e','e','k','_','M','u','s','i','c',0,'A','n','c','i','e','n','t','_','G','r','e',
+'e','k','_','M','u','s','i','c','a','l','_','N','o','t','a','t','i','o','n',0,
+2,'A','n','c','i','e','n','t','_','G','r','e','e','k','_','N','u','m','b','e','r','s',0,'A','n','c','i','e','n','t','_','G',
+'r','e','e','k','_','N','u','m','b','e','r','s',0,2,'A','r','a','b','i','c','_','S','u','p',0,
+'A','r','a','b','i','c','_','S','u','p','p','l','e','m','e','n','t',0,
+2,'B','u','g','i','n','e','s','e',0,'B','u','g','i','n','e','s','e',0,
+2,'C','J','K','_','S','t','r','o','k','e','s',0,'C','J','K','_','S','t','r','o','k','e','s',0,
+2,'D','i','a','c','r','i','t','i','c','a','l','s','_','S','u','p',0,'C','o','m','b','i','n','i','n','g','_','D','i','a','c',
+'r','i','t','i','c','a','l','_','M','a','r','k','s','_','S','u','p','p','l','e','m','e','n','t',0,
+2,'C','o','p','t','i','c',0,'C','o','p','t','i','c',0,2,'E','t','h','i','o','p','i','c','_','E','x','t',0,
+'E','t','h','i','o','p','i','c','_','E','x','t','e','n','d','e','d',0,
+2,'E','t','h','i','o','p','i','c','_','S','u','p',0,'E','t','h','i','o','p','i','c','_','S','u','p','p','l','e','m','e','n',
+'t',0,2,'G','e','o','r','g','i','a','n','_','S','u','p',0,'G','e','o','r','g','i','a','n','_','S','u','p','p','l','e','m',
+'e','n','t',0,2,'G','l','a','g','o','l','i','t','i','c',0,'G','l','a','g','o','l','i','t','i','c',0,
+2,'K','h','a','r','o','s','h','t','h','i',0,'K','h','a','r','o','s','h','t','h','i',0,
+2,'M','o','d','i','f','i','e','r','_','T','o','n','e','_','L','e','t','t','e','r','s',0,'M','o','d','i','f','i','e','r','_',
+'T','o','n','e','_','L','e','t','t','e','r','s',0,2,'N','e','w','_','T','a','i','_','L','u','e',0,
+'N','e','w','_','T','a','i','_','L','u','e',0,2,'O','l','d','_','P','e','r','s','i','a','n',0,
+'O','l','d','_','P','e','r','s','i','a','n',0,2,'P','h','o','n','e','t','i','c','_','E','x','t','_','S','u','p',0,
+'P','h','o','n','e','t','i','c','_','E','x','t','e','n','s','i','o','n','s','_','S','u','p','p','l','e','m','e','n','t',0,
+2,'S','u','p','_','P','u','n','c','t','u','a','t','i','o','n',0,'S','u','p','p','l','e','m','e','n','t','a','l','_','P','u',
+'n','c','t','u','a','t','i','o','n',0,2,'S','y','l','o','t','i','_','N','a','g','r','i',0,
+'S','y','l','o','t','i','_','N','a','g','r','i',0,2,'T','i','f','i','n','a','g','h',0,'T','i','f','i','n','a','g','h',0,
+2,'V','e','r','t','i','c','a','l','_','F','o','r','m','s',0,'V','e','r','t','i','c','a','l','_','F','o','r','m','s',0,
+2,'N','K','o',0,'N','K','o',0,2,'B','a','l','i','n','e','s','e',0,'B','a','l','i','n','e','s','e',0,
+2,'L','a','t','i','n','_','E','x','t','_','C',0,'L','a','t','i','n','_','E','x','t','e','n','d','e','d','_','C',0,
+2,'L','a','t','i','n','_','E','x','t','_','D',0,'L','a','t','i','n','_','E','x','t','e','n','d','e','d','_','D',0,
+2,'P','h','a','g','s','_','P','a',0,'P','h','a','g','s','_','P','a',0,
+2,'P','h','o','e','n','i','c','i','a','n',0,'P','h','o','e','n','i','c','i','a','n',0,
+2,'C','u','n','e','i','f','o','r','m',0,'C','u','n','e','i','f','o','r','m',0,
+2,'C','u','n','e','i','f','o','r','m','_','N','u','m','b','e','r','s',0,'C','u','n','e','i','f','o','r','m','_','N','u','m',
+'b','e','r','s','_','A','n','d','_','P','u','n','c','t','u','a','t','i','o','n',0,
+2,'C','o','u','n','t','i','n','g','_','R','o','d',0,'C','o','u','n','t','i','n','g','_','R','o','d','_','N','u','m','e','r',
+'a','l','s',0,2,'S','u','n','d','a','n','e','s','e',0,'S','u','n','d','a','n','e','s','e',0,
+2,'L','e','p','c','h','a',0,'L','e','p','c','h','a',0,2,'O','l','_','C','h','i','k','i',0,
+'O','l','_','C','h','i','k','i',0,2,'C','y','r','i','l','l','i','c','_','E','x','t','_','A',0,
+'C','y','r','i','l','l','i','c','_','E','x','t','e','n','d','e','d','_','A',0,
+2,'V','a','i',0,'V','a','i',0,2,'C','y','r','i','l','l','i','c','_','E','x','t','_','B',0,
+'C','y','r','i','l','l','i','c','_','E','x','t','e','n','d','e','d','_','B',0,
+2,'S','a','u','r','a','s','h','t','r','a',0,'S','a','u','r','a','s','h','t','r','a',0,
+2,'K','a','y','a','h','_','L','i',0,'K','a','y','a','h','_','L','i',0,
+2,'R','e','j','a','n','g',0,'R','e','j','a','n','g',0,2,'C','h','a','m',0,'C','h','a','m',0,
+2,'A','n','c','i','e','n','t','_','S','y','m','b','o','l','s',0,'A','n','c','i','e','n','t','_','S','y','m','b','o','l','s',
+0,2,'P','h','a','i','s','t','o','s',0,'P','h','a','i','s','t','o','s','_','D','i','s','c',0,
+2,'L','y','c','i','a','n',0,'L','y','c','i','a','n',0,2,'C','a','r','i','a','n',0,'C','a','r','i','a','n',0,
+2,'L','y','d','i','a','n',0,'L','y','d','i','a','n',0,2,'M','a','h','j','o','n','g',0,
+'M','a','h','j','o','n','g','_','T','i','l','e','s',0,2,'D','o','m','i','n','o',0,'D','o','m','i','n','o','_','T','i','l',
+'e','s',0,2,'S','a','m','a','r','i','t','a','n',0,'S','a','m','a','r','i','t','a','n',0,
+2,'U','C','A','S','_','E','x','t',0,'U','n','i','f','i','e','d','_','C','a','n','a','d','i','a','n','_','A','b','o','r','i',
+'g','i','n','a','l','_','S','y','l','l','a','b','i','c','s','_','E','x','t','e','n','d','e','d',0,
+2,'T','a','i','_','T','h','a','m',0,'T','a','i','_','T','h','a','m',0,
+2,'V','e','d','i','c','_','E','x','t',0,'V','e','d','i','c','_','E','x','t','e','n','s','i','o','n','s',0,
+2,'L','i','s','u',0,'L','i','s','u',0,2,'B','a','m','u','m',0,'B','a','m','u','m',0,
+2,'I','n','d','i','c','_','N','u','m','b','e','r','_','F','o','r','m','s',0,'C','o','m','m','o','n','_','I','n','d','i','c',
+'_','N','u','m','b','e','r','_','F','o','r','m','s',0,2,'D','e','v','a','n','a','g','a','r','i','_','E','x','t',0,
+'D','e','v','a','n','a','g','a','r','i','_','E','x','t','e','n','d','e','d',0,
+2,'J','a','m','o','_','E','x','t','_','A',0,'H','a','n','g','u','l','_','J','a','m','o','_','E','x','t','e','n','d','e','d',
+'_','A',0,2,'J','a','v','a','n','e','s','e',0,'J','a','v','a','n','e','s','e',0,
+2,'M','y','a','n','m','a','r','_','E','x','t','_','A',0,'M','y','a','n','m','a','r','_','E','x','t','e','n','d','e','d','_',
+'A',0,2,'T','a','i','_','V','i','e','t',0,'T','a','i','_','V','i','e','t',0,
+2,'M','e','e','t','e','i','_','M','a','y','e','k',0,'M','e','e','t','e','i','_','M','a','y','e','k',0,
+2,'J','a','m','o','_','E','x','t','_','B',0,'H','a','n','g','u','l','_','J','a','m','o','_','E','x','t','e','n','d','e','d',
+'_','B',0,2,'I','m','p','e','r','i','a','l','_','A','r','a','m','a','i','c',0,'I','m','p','e','r','i','a','l','_','A','r',
+'a','m','a','i','c',0,2,'O','l','d','_','S','o','u','t','h','_','A','r','a','b','i','a','n',0,
+'O','l','d','_','S','o','u','t','h','_','A','r','a','b','i','a','n',0,
+2,'A','v','e','s','t','a','n',0,'A','v','e','s','t','a','n',0,
+2,'I','n','s','c','r','i','p','t','i','o','n','a','l','_','P','a','r','t','h','i','a','n',0,
+'I','n','s','c','r','i','p','t','i','o','n','a','l','_','P','a','r','t','h','i','a','n',0,
+2,'I','n','s','c','r','i','p','t','i','o','n','a','l','_','P','a','h','l','a','v','i',0,'I','n','s','c','r','i','p','t','i',
+'o','n','a','l','_','P','a','h','l','a','v','i',0,2,'O','l','d','_','T','u','r','k','i','c',0,
+'O','l','d','_','T','u','r','k','i','c',0,2,'R','u','m','i',0,'R','u','m','i','_','N','u','m','e','r','a','l','_','S','y',
+'m','b','o','l','s',0,2,'K','a','i','t','h','i',0,'K','a','i','t','h','i',0,
+2,'E','g','y','p','t','i','a','n','_','H','i','e','r','o','g','l','y','p','h','s',0,'E','g','y','p','t','i','a','n','_','H',
+'i','e','r','o','g','l','y','p','h','s',0,2,'E','n','c','l','o','s','e','d','_','A','l','p','h','a','n','u','m','_','S','u',
+'p',0,'E','n','c','l','o','s','e','d','_','A','l','p','h','a','n','u','m','e','r','i','c','_','S','u','p','p','l','e','m','e',
+'n','t',0,2,'E','n','c','l','o','s','e','d','_','I','d','e','o','g','r','a','p','h','i','c','_','S','u','p',0,
+'E','n','c','l','o','s','e','d','_','I','d','e','o','g','r','a','p','h','i','c','_','S','u','p','p','l','e','m','e','n','t',0,
+2,'C','J','K','_','E','x','t','_','C',0,'C','J','K','_','U','n','i','f','i','e','d','_','I','d','e','o','g','r','a','p','h',
+'s','_','E','x','t','e','n','s','i','o','n','_','C',0,2,'M','a','n','d','a','i','c',0,'M','a','n','d','a','i','c',0,
+2,'B','a','t','a','k',0,'B','a','t','a','k',0,2,'E','t','h','i','o','p','i','c','_','E','x','t','_','A',0,
+'E','t','h','i','o','p','i','c','_','E','x','t','e','n','d','e','d','_','A',0,
+2,'B','r','a','h','m','i',0,'B','r','a','h','m','i',0,2,'B','a','m','u','m','_','S','u','p',0,
+'B','a','m','u','m','_','S','u','p','p','l','e','m','e','n','t',0,
+2,'K','a','n','a','_','S','u','p',0,'K','a','n','a','_','S','u','p','p','l','e','m','e','n','t',0,
+2,'P','l','a','y','i','n','g','_','C','a','r','d','s',0,'P','l','a','y','i','n','g','_','C','a','r','d','s',0,
+2,'M','i','s','c','_','P','i','c','t','o','g','r','a','p','h','s',0,'M','i','s','c','e','l','l','a','n','e','o','u','s','_',
+'S','y','m','b','o','l','s','_','A','n','d','_','P','i','c','t','o','g','r','a','p','h','s',0,
+2,'E','m','o','t','i','c','o','n','s',0,'E','m','o','t','i','c','o','n','s',0,
+2,'T','r','a','n','s','p','o','r','t','_','A','n','d','_','M','a','p',0,'T','r','a','n','s','p','o','r','t','_','A','n','d',
+'_','M','a','p','_','S','y','m','b','o','l','s',0,2,'A','l','c','h','e','m','i','c','a','l',0,
+'A','l','c','h','e','m','i','c','a','l','_','S','y','m','b','o','l','s',0,
+2,'C','J','K','_','E','x','t','_','D',0,'C','J','K','_','U','n','i','f','i','e','d','_','I','d','e','o','g','r','a','p','h',
+'s','_','E','x','t','e','n','s','i','o','n','_','D',0,2,'A','r','a','b','i','c','_','E','x','t','_','A',0,
+'A','r','a','b','i','c','_','E','x','t','e','n','d','e','d','_','A',0,
+2,'A','r','a','b','i','c','_','M','a','t','h',0,'A','r','a','b','i','c','_','M','a','t','h','e','m','a','t','i','c','a','l',
+'_','A','l','p','h','a','b','e','t','i','c','_','S','y','m','b','o','l','s',0,
+2,'C','h','a','k','m','a',0,'C','h','a','k','m','a',0,2,'M','e','e','t','e','i','_','M','a','y','e','k','_','E','x','t',
+0,'M','e','e','t','e','i','_','M','a','y','e','k','_','E','x','t','e','n','s','i','o','n','s',0,
+2,'M','e','r','o','i','t','i','c','_','C','u','r','s','i','v','e',0,'M','e','r','o','i','t','i','c','_','C','u','r','s','i',
+'v','e',0,2,'M','e','r','o','i','t','i','c','_','H','i','e','r','o','g','l','y','p','h','s',0,
+'M','e','r','o','i','t','i','c','_','H','i','e','r','o','g','l','y','p','h','s',0,
+2,'M','i','a','o',0,'M','i','a','o',0,2,'S','h','a','r','a','d','a',0,'S','h','a','r','a','d','a',0,
+2,'S','o','r','a','_','S','o','m','p','e','n','g',0,'S','o','r','a','_','S','o','m','p','e','n','g',0,
+2,'S','u','n','d','a','n','e','s','e','_','S','u','p',0,'S','u','n','d','a','n','e','s','e','_','S','u','p','p','l','e','m',
+'e','n','t',0,2,'T','a','k','r','i',0,'T','a','k','r','i',0,
+2,'B','a','s','s','a','_','V','a','h',0,'B','a','s','s','a','_','V','a','h',0,
+2,'C','a','u','c','a','s','i','a','n','_','A','l','b','a','n','i','a','n',0,'C','a','u','c','a','s','i','a','n','_','A','l',
+'b','a','n','i','a','n',0,2,'C','o','p','t','i','c','_','E','p','a','c','t','_','N','u','m','b','e','r','s',0,
+'C','o','p','t','i','c','_','E','p','a','c','t','_','N','u','m','b','e','r','s',0,
+2,'D','i','a','c','r','i','t','i','c','a','l','s','_','E','x','t',0,'C','o','m','b','i','n','i','n','g','_','D','i','a','c',
+'r','i','t','i','c','a','l','_','M','a','r','k','s','_','E','x','t','e','n','d','e','d',0,
+2,'D','u','p','l','o','y','a','n',0,'D','u','p','l','o','y','a','n',0,
+2,'E','l','b','a','s','a','n',0,'E','l','b','a','s','a','n',0,
+2,'G','e','o','m','e','t','r','i','c','_','S','h','a','p','e','s','_','E','x','t',0,'G','e','o','m','e','t','r','i','c','_',
+'S','h','a','p','e','s','_','E','x','t','e','n','d','e','d',0,
+2,'G','r','a','n','t','h','a',0,'G','r','a','n','t','h','a',0,
+2,'K','h','o','j','k','i',0,'K','h','o','j','k','i',0,2,'K','h','u','d','a','w','a','d','i',0,
+'K','h','u','d','a','w','a','d','i',0,2,'L','a','t','i','n','_','E','x','t','_','E',0,'L','a','t','i','n','_','E','x','t',
+'e','n','d','e','d','_','E',0,2,'L','i','n','e','a','r','_','A',0,'L','i','n','e','a','r','_','A',0,
+2,'M','a','h','a','j','a','n','i',0,'M','a','h','a','j','a','n','i',0,
+2,'M','a','n','i','c','h','a','e','a','n',0,'M','a','n','i','c','h','a','e','a','n',0,
+2,'M','e','n','d','e','_','K','i','k','a','k','u','i',0,'M','e','n','d','e','_','K','i','k','a','k','u','i',0,
+2,'M','o','d','i',0,'M','o','d','i',0,2,'M','r','o',0,'M','r','o',0,
+2,'M','y','a','n','m','a','r','_','E','x','t','_','B',0,'M','y','a','n','m','a','r','_','E','x','t','e','n','d','e','d','_',
+'B',0,2,'N','a','b','a','t','a','e','a','n',0,'N','a','b','a','t','a','e','a','n',0,
+2,'O','l','d','_','N','o','r','t','h','_','A','r','a','b','i','a','n',0,'O','l','d','_','N','o','r','t','h','_','A','r','a',
+'b','i','a','n',0,2,'O','l','d','_','P','e','r','m','i','c',0,'O','l','d','_','P','e','r','m','i','c',0,
+2,'O','r','n','a','m','e','n','t','a','l','_','D','i','n','g','b','a','t','s',0,'O','r','n','a','m','e','n','t','a','l','_',
+'D','i','n','g','b','a','t','s',0,2,'P','a','h','a','w','h','_','H','m','o','n','g',0,'P','a','h','a','w','h','_','H','m',
+'o','n','g',0,2,'P','a','l','m','y','r','e','n','e',0,'P','a','l','m','y','r','e','n','e',0,
+2,'P','a','u','_','C','i','n','_','H','a','u',0,'P','a','u','_','C','i','n','_','H','a','u',0,
+2,'P','s','a','l','t','e','r','_','P','a','h','l','a','v','i',0,'P','s','a','l','t','e','r','_','P','a','h','l','a','v','i',
+0,2,'S','h','o','r','t','h','a','n','d','_','F','o','r','m','a','t','_','C','o','n','t','r','o','l','s',0,
+'S','h','o','r','t','h','a','n','d','_','F','o','r','m','a','t','_','C','o','n','t','r','o','l','s',0,
+2,'S','i','d','d','h','a','m',0,'S','i','d','d','h','a','m',0,
+2,'S','i','n','h','a','l','a','_','A','r','c','h','a','i','c','_','N','u','m','b','e','r','s',0,
+'S','i','n','h','a','l','a','_','A','r','c','h','a','i','c','_','N','u','m','b','e','r','s',0,
+2,'S','u','p','_','A','r','r','o','w','s','_','C',0,'S','u','p','p','l','e','m','e','n','t','a','l','_','A','r','r','o','w',
+'s','_','C',0,2,'T','i','r','h','u','t','a',0,'T','i','r','h','u','t','a',0,
+2,'W','a','r','a','n','g','_','C','i','t','i',0,'W','a','r','a','n','g','_','C','i','t','i',0,
+2,'A','h','o','m',0,'A','h','o','m',0,2,'A','n','a','t','o','l','i','a','n','_','H','i','e','r','o','g','l','y','p','h',
+'s',0,'A','n','a','t','o','l','i','a','n','_','H','i','e','r','o','g','l','y','p','h','s',0,
+2,'C','h','e','r','o','k','e','e','_','S','u','p',0,'C','h','e','r','o','k','e','e','_','S','u','p','p','l','e','m','e','n',
+'t',0,2,'C','J','K','_','E','x','t','_','E',0,'C','J','K','_','U','n','i','f','i','e','d','_','I','d','e','o','g','r','a',
+'p','h','s','_','E','x','t','e','n','s','i','o','n','_','E',0,
+2,'E','a','r','l','y','_','D','y','n','a','s','t','i','c','_','C','u','n','e','i','f','o','r','m',0,
+'E','a','r','l','y','_','D','y','n','a','s','t','i','c','_','C','u','n','e','i','f','o','r','m',0,
+2,'H','a','t','r','a','n',0,'H','a','t','r','a','n',0,2,'M','u','l','t','a','n','i',0,
+'M','u','l','t','a','n','i',0,2,'O','l','d','_','H','u','n','g','a','r','i','a','n',0,'O','l','d','_','H','u','n','g','a',
+'r','i','a','n',0,2,'S','u','p','_','S','y','m','b','o','l','s','_','A','n','d','_','P','i','c','t','o','g','r','a','p','h',
+'s',0,'S','u','p','p','l','e','m','e','n','t','a','l','_','S','y','m','b','o','l','s','_','A','n','d','_','P','i','c','t','o',
+'g','r','a','p','h','s',0,2,'S','u','t','t','o','n','_','S','i','g','n','W','r','i','t','i','n','g',0,
+'S','u','t','t','o','n','_','S','i','g','n','W','r','i','t','i','n','g',0,
+2,'A','d','l','a','m',0,'A','d','l','a','m',0,2,'B','h','a','i','k','s','u','k','i',0,
+'B','h','a','i','k','s','u','k','i',0,2,'C','y','r','i','l','l','i','c','_','E','x','t','_','C',0,
+'C','y','r','i','l','l','i','c','_','E','x','t','e','n','d','e','d','_','C',0,
+2,'G','l','a','g','o','l','i','t','i','c','_','S','u','p',0,'G','l','a','g','o','l','i','t','i','c','_','S','u','p','p','l',
+'e','m','e','n','t',0,2,'I','d','e','o','g','r','a','p','h','i','c','_','S','y','m','b','o','l','s',0,
+'I','d','e','o','g','r','a','p','h','i','c','_','S','y','m','b','o','l','s','_','A','n','d','_','P','u','n','c','t','u','a','t',
+'i','o','n',0,2,'M','a','r','c','h','e','n',0,'M','a','r','c','h','e','n',0,
+2,'M','o','n','g','o','l','i','a','n','_','S','u','p',0,'M','o','n','g','o','l','i','a','n','_','S','u','p','p','l','e','m',
+'e','n','t',0,2,'N','e','w','a',0,'N','e','w','a',0,2,'O','s','a','g','e',0,'O','s','a','g','e',0,
+2,'T','a','n','g','u','t',0,'T','a','n','g','u','t',0,2,'T','a','n','g','u','t','_','C','o','m','p','o','n','e','n','t',
+'s',0,'T','a','n','g','u','t','_','C','o','m','p','o','n','e','n','t','s',0,
+2,'C','J','K','_','E','x','t','_','F',0,'C','J','K','_','U','n','i','f','i','e','d','_','I','d','e','o','g','r','a','p','h',
+'s','_','E','x','t','e','n','s','i','o','n','_','F',0,2,'K','a','n','a','_','E','x','t','_','A',0,
+'K','a','n','a','_','E','x','t','e','n','d','e','d','_','A',0,
+2,'M','a','s','a','r','a','m','_','G','o','n','d','i',0,'M','a','s','a','r','a','m','_','G','o','n','d','i',0,
+2,'N','u','s','h','u',0,'N','u','s','h','u',0,2,'S','o','y','o','m','b','o',0,'S','o','y','o','m','b','o',0,
+2,'S','y','r','i','a','c','_','S','u','p',0,'S','y','r','i','a','c','_','S','u','p','p','l','e','m','e','n','t',0,
+2,'Z','a','n','a','b','a','z','a','r','_','S','q','u','a','r','e',0,'Z','a','n','a','b','a','z','a','r','_','S','q','u','a',
+'r','e',0,2,'C','h','e','s','s','_','S','y','m','b','o','l','s',0,'C','h','e','s','s','_','S','y','m','b','o','l','s',0,
+2,'D','o','g','r','a',0,'D','o','g','r','a',0,2,'G','e','o','r','g','i','a','n','_','E','x','t',0,
+'G','e','o','r','g','i','a','n','_','E','x','t','e','n','d','e','d',0,
+2,'G','u','n','j','a','l','a','_','G','o','n','d','i',0,'G','u','n','j','a','l','a','_','G','o','n','d','i',0,
+2,'H','a','n','i','f','i','_','R','o','h','i','n','g','y','a',0,'H','a','n','i','f','i','_','R','o','h','i','n','g','y','a',
+0,2,'I','n','d','i','c','_','S','i','y','a','q','_','N','u','m','b','e','r','s',0,'I','n','d','i','c','_','S','i','y','a',
+'q','_','N','u','m','b','e','r','s',0,2,'M','a','k','a','s','a','r',0,'M','a','k','a','s','a','r',0,
+2,'M','a','y','a','n','_','N','u','m','e','r','a','l','s',0,'M','a','y','a','n','_','N','u','m','e','r','a','l','s',0,
+2,'M','e','d','e','f','a','i','d','r','i','n',0,'M','e','d','e','f','a','i','d','r','i','n',0,
+2,'O','l','d','_','S','o','g','d','i','a','n',0,'O','l','d','_','S','o','g','d','i','a','n',0,
+2,'S','o','g','d','i','a','n',0,'S','o','g','d','i','a','n',0,
+2,'E','g','y','p','t','i','a','n','_','H','i','e','r','o','g','l','y','p','h','_','F','o','r','m','a','t','_','C','o','n','t',
+'r','o','l','s',0,'E','g','y','p','t','i','a','n','_','H','i','e','r','o','g','l','y','p','h','_','F','o','r','m','a','t','_',
+'C','o','n','t','r','o','l','s',0,2,'E','l','y','m','a','i','c',0,'E','l','y','m','a','i','c',0,
+2,'N','a','n','d','i','n','a','g','a','r','i',0,'N','a','n','d','i','n','a','g','a','r','i',0,
+2,'N','y','i','a','k','e','n','g','_','P','u','a','c','h','u','e','_','H','m','o','n','g',0,
+'N','y','i','a','k','e','n','g','_','P','u','a','c','h','u','e','_','H','m','o','n','g',0,
+2,'O','t','t','o','m','a','n','_','S','i','y','a','q','_','N','u','m','b','e','r','s',0,'O','t','t','o','m','a','n','_','S',
+'i','y','a','q','_','N','u','m','b','e','r','s',0,2,'S','m','a','l','l','_','K','a','n','a','_','E','x','t',0,
+'S','m','a','l','l','_','K','a','n','a','_','E','x','t','e','n','s','i','o','n',0,
+2,'S','y','m','b','o','l','s','_','A','n','d','_','P','i','c','t','o','g','r','a','p','h','s','_','E','x','t','_','A',0,
+'S','y','m','b','o','l','s','_','A','n','d','_','P','i','c','t','o','g','r','a','p','h','s','_','E','x','t','e','n','d','e','d',
+'_','A',0,2,'T','a','m','i','l','_','S','u','p',0,'T','a','m','i','l','_','S','u','p','p','l','e','m','e','n','t',0,
+2,'W','a','n','c','h','o',0,'W','a','n','c','h','o',0,2,'C','h','o','r','a','s','m','i','a','n',0,
+'C','h','o','r','a','s','m','i','a','n',0,2,'C','J','K','_','E','x','t','_','G',0,'C','J','K','_','U','n','i','f','i','e',
+'d','_','I','d','e','o','g','r','a','p','h','s','_','E','x','t','e','n','s','i','o','n','_','G',0,
+2,'D','i','v','e','s','_','A','k','u','r','u',0,'D','i','v','e','s','_','A','k','u','r','u',0,
+2,'K','h','i','t','a','n','_','S','m','a','l','l','_','S','c','r','i','p','t',0,'K','h','i','t','a','n','_','S','m','a','l',
+'l','_','S','c','r','i','p','t',0,2,'L','i','s','u','_','S','u','p',0,'L','i','s','u','_','S','u','p','p','l','e','m','e',
+'n','t',0,2,'S','y','m','b','o','l','s','_','F','o','r','_','L','e','g','a','c','y','_','C','o','m','p','u','t','i','n','g',
+0,'S','y','m','b','o','l','s','_','F','o','r','_','L','e','g','a','c','y','_','C','o','m','p','u','t','i','n','g',0,
+2,'T','a','n','g','u','t','_','S','u','p',0,'T','a','n','g','u','t','_','S','u','p','p','l','e','m','e','n','t',0,
+2,'Y','e','z','i','d','i',0,'Y','e','z','i','d','i',0,2,'c','c','c',0,'C','a','n','o','n','i','c','a','l','_','C','o',
+'m','b','i','n','i','n','g','_','C','l','a','s','s',0,2,'d','t',0,'D','e','c','o','m','p','o','s','i','t','i','o','n','_',
+'T','y','p','e',0,3,'N','o','n','e',0,'N','o','n','e',0,'n','o','n','e',0,
+3,'C','a','n',0,'C','a','n','o','n','i','c','a','l',0,'c','a','n',0,
+3,'C','o','m',0,'C','o','m','p','a','t',0,'c','o','m',0,
+3,'E','n','c',0,'C','i','r','c','l','e',0,'e','n','c',0,
+3,'F','i','n',0,'F','i','n','a','l',0,'f','i','n',0,3,'F','o','n','t',0,'F','o','n','t',0,
+'f','o','n','t',0,3,'F','r','a',0,'F','r','a','c','t','i','o','n',0,'f','r','a',0,
+3,'I','n','i','t',0,'I','n','i','t','i','a','l',0,'i','n','i','t',0,
+3,'I','s','o',0,'I','s','o','l','a','t','e','d',0,'i','s','o',0,
+3,'M','e','d',0,'M','e','d','i','a','l',0,'m','e','d',0,
+3,'N','a','r',0,'N','a','r','r','o','w',0,'n','a','r',0,
+3,'N','b',0,'N','o','b','r','e','a','k',0,'n','b',0,3,'S','m','l',0,'S','m','a','l','l',0,
+'s','m','l',0,3,'S','q','r',0,'S','q','u','a','r','e',0,'s','q','r',0,
+3,'S','u','b',0,'S','u','b',0,'s','u','b',0,3,'S','u','p',0,'S','u','p','e','r',0,
+'s','u','p',0,3,'V','e','r','t',0,'V','e','r','t','i','c','a','l',0,'v','e','r','t',0,
+3,'W','i','d','e',0,'W','i','d','e',0,'w','i','d','e',0,
+2,'e','a',0,'E','a','s','t','_','A','s','i','a','n','_','W','i','d','t','h',0,
+2,'N',0,'N','e','u','t','r','a','l',0,2,'A',0,'A','m','b','i','g','u','o','u','s',0,
+2,'H',0,'H','a','l','f','w','i','d','t','h',0,2,'F',0,'F','u','l','l','w','i','d','t','h',0,
+2,'N','a',0,'N','a','r','r','o','w',0,2,'W',0,'W','i','d','e',0,
+2,'g','c',0,'G','e','n','e','r','a','l','_','C','a','t','e','g','o','r','y',0,
+2,'C','n',0,'U','n','a','s','s','i','g','n','e','d',0,2,'L','u',0,'U','p','p','e','r','c','a','s','e','_','L','e','t',
+'t','e','r',0,2,'L','l',0,'L','o','w','e','r','c','a','s','e','_','L','e','t','t','e','r',0,
+2,'L','t',0,'T','i','t','l','e','c','a','s','e','_','L','e','t','t','e','r',0,
+2,'L','m',0,'M','o','d','i','f','i','e','r','_','L','e','t','t','e','r',0,
+2,'L','o',0,'O','t','h','e','r','_','L','e','t','t','e','r',0,
+2,'M','n',0,'N','o','n','s','p','a','c','i','n','g','_','M','a','r','k',0,
+2,'M','e',0,'E','n','c','l','o','s','i','n','g','_','M','a','r','k',0,
+2,'M','c',0,'S','p','a','c','i','n','g','_','M','a','r','k',0,
+3,'N','d',0,'D','e','c','i','m','a','l','_','N','u','m','b','e','r',0,'d','i','g','i','t',0,
+2,'N','l',0,'L','e','t','t','e','r','_','N','u','m','b','e','r',0,
+2,'N','o',0,'O','t','h','e','r','_','N','u','m','b','e','r',0,
+2,'Z','s',0,'S','p','a','c','e','_','S','e','p','a','r','a','t','o','r',0,
+2,'Z','l',0,'L','i','n','e','_','S','e','p','a','r','a','t','o','r',0,
+2,'Z','p',0,'P','a','r','a','g','r','a','p','h','_','S','e','p','a','r','a','t','o','r',0,
+3,'C','c',0,'C','o','n','t','r','o','l',0,'c','n','t','r','l',0,
+2,'C','f',0,'F','o','r','m','a','t',0,2,'C','o',0,'P','r','i','v','a','t','e','_','U','s','e',0,
+2,'C','s',0,'S','u','r','r','o','g','a','t','e',0,2,'P','d',0,'D','a','s','h','_','P','u','n','c','t','u','a','t','i',
+'o','n',0,2,'P','s',0,'O','p','e','n','_','P','u','n','c','t','u','a','t','i','o','n',0,
+2,'P','e',0,'C','l','o','s','e','_','P','u','n','c','t','u','a','t','i','o','n',0,
+2,'P','c',0,'C','o','n','n','e','c','t','o','r','_','P','u','n','c','t','u','a','t','i','o','n',0,
+2,'P','o',0,'O','t','h','e','r','_','P','u','n','c','t','u','a','t','i','o','n',0,
+2,'S','m',0,'M','a','t','h','_','S','y','m','b','o','l',0,
+2,'S','c',0,'C','u','r','r','e','n','c','y','_','S','y','m','b','o','l',0,
+2,'S','k',0,'M','o','d','i','f','i','e','r','_','S','y','m','b','o','l',0,
+2,'S','o',0,'O','t','h','e','r','_','S','y','m','b','o','l',0,
+2,'P','i',0,'I','n','i','t','i','a','l','_','P','u','n','c','t','u','a','t','i','o','n',0,
+2,'P','f',0,'F','i','n','a','l','_','P','u','n','c','t','u','a','t','i','o','n',0,
+2,'j','g',0,'J','o','i','n','i','n','g','_','G','r','o','u','p',0,
+2,'N','o','_','J','o','i','n','i','n','g','_','G','r','o','u','p',0,'N','o','_','J','o','i','n','i','n','g','_','G','r','o',
+'u','p',0,2,'A','i','n',0,'A','i','n',0,2,'A','l','a','p','h',0,'A','l','a','p','h',0,
+2,'A','l','e','f',0,'A','l','e','f',0,2,'B','e','h',0,'B','e','h',0,
+2,'B','e','t','h',0,'B','e','t','h',0,2,'D','a','l',0,'D','a','l',0,
+2,'D','a','l','a','t','h','_','R','i','s','h',0,'D','a','l','a','t','h','_','R','i','s','h',0,
+2,'E',0,'E',0,2,'F','e','h',0,'F','e','h',0,2,'F','i','n','a','l','_','S','e','m','k','a','t','h',0,
+'F','i','n','a','l','_','S','e','m','k','a','t','h',0,2,'G','a','f',0,'G','a','f',0,
+2,'G','a','m','a','l',0,'G','a','m','a','l',0,2,'H','a','h',0,'H','a','h',0,
+2,'T','e','h','_','M','a','r','b','u','t','a','_','G','o','a','l',0,'H','a','m','z','a','_','O','n','_','H','e','h','_','G',
+'o','a','l',0,2,'H','e',0,'H','e',0,2,'H','e','h',0,'H','e','h',0,
+2,'H','e','h','_','G','o','a','l',0,'H','e','h','_','G','o','a','l',0,
+2,'H','e','t','h',0,'H','e','t','h',0,2,'K','a','f',0,'K','a','f',0,
+2,'K','a','p','h',0,'K','a','p','h',0,2,'K','n','o','t','t','e','d','_','H','e','h',0,
+'K','n','o','t','t','e','d','_','H','e','h',0,2,'L','a','m',0,'L','a','m',0,
+2,'L','a','m','a','d','h',0,'L','a','m','a','d','h',0,2,'M','e','e','m',0,'M','e','e','m',0,
+2,'M','i','m',0,'M','i','m',0,2,'N','o','o','n',0,'N','o','o','n',0,
+2,'N','u','n',0,'N','u','n',0,2,'P','e',0,'P','e',0,
+2,'Q','a','f',0,'Q','a','f',0,2,'Q','a','p','h',0,'Q','a','p','h',0,
+2,'R','e','h',0,'R','e','h',0,2,'R','e','v','e','r','s','e','d','_','P','e',0,'R','e','v','e','r','s','e','d','_','P',
+'e',0,2,'S','a','d',0,'S','a','d',0,2,'S','a','d','h','e',0,'S','a','d','h','e',0,
+2,'S','e','e','n',0,'S','e','e','n',0,2,'S','e','m','k','a','t','h',0,'S','e','m','k','a','t','h',0,
+2,'S','h','i','n',0,'S','h','i','n',0,2,'S','w','a','s','h','_','K','a','f',0,'S','w','a','s','h','_','K','a','f',0,
+2,'S','y','r','i','a','c','_','W','a','w',0,'S','y','r','i','a','c','_','W','a','w',0,
+2,'T','a','h',0,'T','a','h',0,2,'T','a','w',0,'T','a','w',0,
+2,'T','e','h','_','M','a','r','b','u','t','a',0,'T','e','h','_','M','a','r','b','u','t','a',0,
+2,'T','e','t','h',0,'T','e','t','h',0,2,'W','a','w',0,'W','a','w',0,
+2,'Y','e','h',0,'Y','e','h',0,2,'Y','e','h','_','B','a','r','r','e','e',0,'Y','e','h','_','B','a','r','r','e','e',0,
+2,'Y','e','h','_','W','i','t','h','_','T','a','i','l',0,'Y','e','h','_','W','i','t','h','_','T','a','i','l',0,
+2,'Y','u','d','h',0,'Y','u','d','h',0,2,'Y','u','d','h','_','H','e',0,'Y','u','d','h','_','H','e',0,
+2,'Z','a','i','n',0,'Z','a','i','n',0,2,'F','e',0,'F','e',0,
+2,'K','h','a','p','h',0,'K','h','a','p','h',0,2,'Z','h','a','i','n',0,'Z','h','a','i','n',0,
+2,'B','u','r','u','s','h','a','s','k','i','_','Y','e','h','_','B','a','r','r','e','e',0,'B','u','r','u','s','h','a','s','k',
+'i','_','Y','e','h','_','B','a','r','r','e','e',0,2,'F','a','r','s','i','_','Y','e','h',0,
+'F','a','r','s','i','_','Y','e','h',0,2,'N','y','a',0,'N','y','a',0,
+2,'R','o','h','i','n','g','y','a','_','Y','e','h',0,'R','o','h','i','n','g','y','a','_','Y','e','h',0,
+2,'M','a','n','i','c','h','a','e','a','n','_','A','l','e','p','h',0,'M','a','n','i','c','h','a','e','a','n','_','A','l','e',
+'p','h',0,2,'M','a','n','i','c','h','a','e','a','n','_','A','y','i','n',0,'M','a','n','i','c','h','a','e','a','n','_','A',
+'y','i','n',0,2,'M','a','n','i','c','h','a','e','a','n','_','B','e','t','h',0,'M','a','n','i','c','h','a','e','a','n','_',
+'B','e','t','h',0,2,'M','a','n','i','c','h','a','e','a','n','_','D','a','l','e','t','h',0,
+'M','a','n','i','c','h','a','e','a','n','_','D','a','l','e','t','h',0,
+2,'M','a','n','i','c','h','a','e','a','n','_','D','h','a','m','e','d','h',0,'M','a','n','i','c','h','a','e','a','n','_','D',
+'h','a','m','e','d','h',0,2,'M','a','n','i','c','h','a','e','a','n','_','F','i','v','e',0,
+'M','a','n','i','c','h','a','e','a','n','_','F','i','v','e',0,
+2,'M','a','n','i','c','h','a','e','a','n','_','G','i','m','e','l',0,'M','a','n','i','c','h','a','e','a','n','_','G','i','m',
+'e','l',0,2,'M','a','n','i','c','h','a','e','a','n','_','H','e','t','h',0,'M','a','n','i','c','h','a','e','a','n','_','H',
+'e','t','h',0,2,'M','a','n','i','c','h','a','e','a','n','_','H','u','n','d','r','e','d',0,
+'M','a','n','i','c','h','a','e','a','n','_','H','u','n','d','r','e','d',0,
+2,'M','a','n','i','c','h','a','e','a','n','_','K','a','p','h',0,'M','a','n','i','c','h','a','e','a','n','_','K','a','p','h',
+0,2,'M','a','n','i','c','h','a','e','a','n','_','L','a','m','e','d','h',0,'M','a','n','i','c','h','a','e','a','n','_','L',
+'a','m','e','d','h',0,2,'M','a','n','i','c','h','a','e','a','n','_','M','e','m',0,'M','a','n','i','c','h','a','e','a','n',
+'_','M','e','m',0,2,'M','a','n','i','c','h','a','e','a','n','_','N','u','n',0,'M','a','n','i','c','h','a','e','a','n','_',
+'N','u','n',0,2,'M','a','n','i','c','h','a','e','a','n','_','O','n','e',0,'M','a','n','i','c','h','a','e','a','n','_','O',
+'n','e',0,2,'M','a','n','i','c','h','a','e','a','n','_','P','e',0,'M','a','n','i','c','h','a','e','a','n','_','P','e',0,
+2,'M','a','n','i','c','h','a','e','a','n','_','Q','o','p','h',0,'M','a','n','i','c','h','a','e','a','n','_','Q','o','p','h',
+0,2,'M','a','n','i','c','h','a','e','a','n','_','R','e','s','h',0,'M','a','n','i','c','h','a','e','a','n','_','R','e','s',
+'h',0,2,'M','a','n','i','c','h','a','e','a','n','_','S','a','d','h','e',0,'M','a','n','i','c','h','a','e','a','n','_','S',
+'a','d','h','e',0,2,'M','a','n','i','c','h','a','e','a','n','_','S','a','m','e','k','h',0,
+'M','a','n','i','c','h','a','e','a','n','_','S','a','m','e','k','h',0,
+2,'M','a','n','i','c','h','a','e','a','n','_','T','a','w',0,'M','a','n','i','c','h','a','e','a','n','_','T','a','w',0,
+2,'M','a','n','i','c','h','a','e','a','n','_','T','e','n',0,'M','a','n','i','c','h','a','e','a','n','_','T','e','n',0,
+2,'M','a','n','i','c','h','a','e','a','n','_','T','e','t','h',0,'M','a','n','i','c','h','a','e','a','n','_','T','e','t','h',
+0,2,'M','a','n','i','c','h','a','e','a','n','_','T','h','a','m','e','d','h',0,'M','a','n','i','c','h','a','e','a','n','_',
+'T','h','a','m','e','d','h',0,2,'M','a','n','i','c','h','a','e','a','n','_','T','w','e','n','t','y',0,
+'M','a','n','i','c','h','a','e','a','n','_','T','w','e','n','t','y',0,
+2,'M','a','n','i','c','h','a','e','a','n','_','W','a','w',0,'M','a','n','i','c','h','a','e','a','n','_','W','a','w',0,
+2,'M','a','n','i','c','h','a','e','a','n','_','Y','o','d','h',0,'M','a','n','i','c','h','a','e','a','n','_','Y','o','d','h',
+0,2,'M','a','n','i','c','h','a','e','a','n','_','Z','a','y','i','n',0,'M','a','n','i','c','h','a','e','a','n','_','Z','a',
+'y','i','n',0,2,'S','t','r','a','i','g','h','t','_','W','a','w',0,'S','t','r','a','i','g','h','t','_','W','a','w',0,
+2,'A','f','r','i','c','a','n','_','F','e','h',0,'A','f','r','i','c','a','n','_','F','e','h',0,
+2,'A','f','r','i','c','a','n','_','N','o','o','n',0,'A','f','r','i','c','a','n','_','N','o','o','n',0,
+2,'A','f','r','i','c','a','n','_','Q','a','f',0,'A','f','r','i','c','a','n','_','Q','a','f',0,
+2,'M','a','l','a','y','a','l','a','m','_','B','h','a',0,'M','a','l','a','y','a','l','a','m','_','B','h','a',0,
+2,'M','a','l','a','y','a','l','a','m','_','J','a',0,'M','a','l','a','y','a','l','a','m','_','J','a',0,
+2,'M','a','l','a','y','a','l','a','m','_','L','l','a',0,'M','a','l','a','y','a','l','a','m','_','L','l','a',0,
+2,'M','a','l','a','y','a','l','a','m','_','L','l','l','a',0,'M','a','l','a','y','a','l','a','m','_','L','l','l','a',0,
+2,'M','a','l','a','y','a','l','a','m','_','N','g','a',0,'M','a','l','a','y','a','l','a','m','_','N','g','a',0,
+2,'M','a','l','a','y','a','l','a','m','_','N','n','a',0,'M','a','l','a','y','a','l','a','m','_','N','n','a',0,
+2,'M','a','l','a','y','a','l','a','m','_','N','n','n','a',0,'M','a','l','a','y','a','l','a','m','_','N','n','n','a',0,
+2,'M','a','l','a','y','a','l','a','m','_','N','y','a',0,'M','a','l','a','y','a','l','a','m','_','N','y','a',0,
+2,'M','a','l','a','y','a','l','a','m','_','R','a',0,'M','a','l','a','y','a','l','a','m','_','R','a',0,
+2,'M','a','l','a','y','a','l','a','m','_','S','s','a',0,'M','a','l','a','y','a','l','a','m','_','S','s','a',0,
+2,'M','a','l','a','y','a','l','a','m','_','T','t','a',0,'M','a','l','a','y','a','l','a','m','_','T','t','a',0,
+2,'H','a','n','i','f','i','_','R','o','h','i','n','g','y','a','_','K','i','n','n','a','_','Y','a',0,
+'H','a','n','i','f','i','_','R','o','h','i','n','g','y','a','_','K','i','n','n','a','_','Y','a',0,
+2,'H','a','n','i','f','i','_','R','o','h','i','n','g','y','a','_','P','a',0,'H','a','n','i','f','i','_','R','o','h','i','n',
+'g','y','a','_','P','a',0,2,'j','t',0,'J','o','i','n','i','n','g','_','T','y','p','e',0,
+2,'U',0,'N','o','n','_','J','o','i','n','i','n','g',0,2,'C',0,'J','o','i','n','_','C','a','u','s','i','n','g',0,
+2,'D',0,'D','u','a','l','_','J','o','i','n','i','n','g',0,
+2,'L',0,'L','e','f','t','_','J','o','i','n','i','n','g',0,
+2,'R',0,'R','i','g','h','t','_','J','o','i','n','i','n','g',0,
+2,'T',0,'T','r','a','n','s','p','a','r','e','n','t',0,2,'l','b',0,'L','i','n','e','_','B','r','e','a','k',0,
+2,'X','X',0,'U','n','k','n','o','w','n',0,2,'A','I',0,'A','m','b','i','g','u','o','u','s',0,
+2,'A','L',0,'A','l','p','h','a','b','e','t','i','c',0,2,'B','2',0,'B','r','e','a','k','_','B','o','t','h',0,
+2,'B','A',0,'B','r','e','a','k','_','A','f','t','e','r',0,
+2,'B','B',0,'B','r','e','a','k','_','B','e','f','o','r','e',0,
+2,'B','K',0,'M','a','n','d','a','t','o','r','y','_','B','r','e','a','k',0,
+2,'C','B',0,'C','o','n','t','i','n','g','e','n','t','_','B','r','e','a','k',0,
+2,'C','L',0,'C','l','o','s','e','_','P','u','n','c','t','u','a','t','i','o','n',0,
+2,'C','M',0,'C','o','m','b','i','n','i','n','g','_','M','a','r','k',0,
+2,'C','R',0,'C','a','r','r','i','a','g','e','_','R','e','t','u','r','n',0,
+2,'E','X',0,'E','x','c','l','a','m','a','t','i','o','n',0,
+2,'G','L',0,'G','l','u','e',0,2,'H','Y',0,'H','y','p','h','e','n',0,
+2,'I','D',0,'I','d','e','o','g','r','a','p','h','i','c',0,
+3,'I','N',0,'I','n','s','e','p','a','r','a','b','l','e',0,'I','n','s','e','p','e','r','a','b','l','e',0,
+2,'I','S',0,'I','n','f','i','x','_','N','u','m','e','r','i','c',0,
+2,'L','F',0,'L','i','n','e','_','F','e','e','d',0,2,'N','S',0,'N','o','n','s','t','a','r','t','e','r',0,
+2,'N','U',0,'N','u','m','e','r','i','c',0,2,'O','P',0,'O','p','e','n','_','P','u','n','c','t','u','a','t','i','o','n',
+0,2,'P','O',0,'P','o','s','t','f','i','x','_','N','u','m','e','r','i','c',0,
+2,'P','R',0,'P','r','e','f','i','x','_','N','u','m','e','r','i','c',0,
+2,'Q','U',0,'Q','u','o','t','a','t','i','o','n',0,2,'S','A',0,'C','o','m','p','l','e','x','_','C','o','n','t','e','x',
+'t',0,2,'S','G',0,'S','u','r','r','o','g','a','t','e',0,
+2,'S','P',0,'S','p','a','c','e',0,2,'S','Y',0,'B','r','e','a','k','_','S','y','m','b','o','l','s',0,
+2,'Z','W',0,'Z','W','S','p','a','c','e',0,2,'N','L',0,'N','e','x','t','_','L','i','n','e',0,
+2,'W','J',0,'W','o','r','d','_','J','o','i','n','e','r',0,
+2,'H','2',0,'H','2',0,2,'H','3',0,'H','3',0,2,'J','L',0,'J','L',0,
+2,'J','T',0,'J','T',0,2,'J','V',0,'J','V',0,2,'C','P',0,'C','l','o','s','e','_','P','a','r','e','n','t','h','e',
+'s','i','s',0,2,'C','J',0,'C','o','n','d','i','t','i','o','n','a','l','_','J','a','p','a','n','e','s','e','_','S','t','a',
+'r','t','e','r',0,2,'H','L',0,'H','e','b','r','e','w','_','L','e','t','t','e','r',0,
+2,'E','B',0,'E','_','B','a','s','e',0,2,'E','M',0,'E','_','M','o','d','i','f','i','e','r',0,
+2,'Z','W','J',0,'Z','W','J',0,2,'n','t',0,'N','u','m','e','r','i','c','_','T','y','p','e',0,
+2,'N','o','n','e',0,'N','o','n','e',0,2,'D','e',0,'D','e','c','i','m','a','l',0,
+2,'D','i',0,'D','i','g','i','t',0,2,'N','u',0,'N','u','m','e','r','i','c',0,
+2,'s','c',0,'S','c','r','i','p','t',0,2,'Z','y','y','y',0,'C','o','m','m','o','n',0,
+3,'Z','i','n','h',0,'I','n','h','e','r','i','t','e','d',0,'Q','a','a','i',0,
+2,'A','r','a','b',0,'A','r','a','b','i','c',0,2,'A','r','m','n',0,'A','r','m','e','n','i','a','n',0,
+2,'B','e','n','g',0,'B','e','n','g','a','l','i',0,2,'B','o','p','o',0,'B','o','p','o','m','o','f','o',0,
+2,'C','h','e','r',0,'C','h','e','r','o','k','e','e',0,3,'C','o','p','t',0,'C','o','p','t','i','c',0,
+'Q','a','a','c',0,2,'C','y','r','l',0,'C','y','r','i','l','l','i','c',0,
+2,'D','s','r','t',0,'D','e','s','e','r','e','t',0,2,'D','e','v','a',0,'D','e','v','a','n','a','g','a','r','i',0,
+2,'E','t','h','i',0,'E','t','h','i','o','p','i','c',0,2,'G','e','o','r',0,'G','e','o','r','g','i','a','n',0,
+2,'G','o','t','h',0,'G','o','t','h','i','c',0,2,'G','r','e','k',0,'G','r','e','e','k',0,
+2,'G','u','j','r',0,'G','u','j','a','r','a','t','i',0,2,'G','u','r','u',0,'G','u','r','m','u','k','h','i',0,
+2,'H','a','n','i',0,'H','a','n',0,2,'H','a','n','g',0,'H','a','n','g','u','l',0,
+2,'H','e','b','r',0,'H','e','b','r','e','w',0,2,'H','i','r','a',0,'H','i','r','a','g','a','n','a',0,
+2,'K','n','d','a',0,'K','a','n','n','a','d','a',0,2,'K','a','n','a',0,'K','a','t','a','k','a','n','a',0,
+2,'K','h','m','r',0,'K','h','m','e','r',0,2,'L','a','o','o',0,'L','a','o',0,
+2,'L','a','t','n',0,'L','a','t','i','n',0,2,'M','l','y','m',0,'M','a','l','a','y','a','l','a','m',0,
+2,'M','o','n','g',0,'M','o','n','g','o','l','i','a','n',0,
+2,'M','y','m','r',0,'M','y','a','n','m','a','r',0,2,'O','g','a','m',0,'O','g','h','a','m',0,
+2,'I','t','a','l',0,'O','l','d','_','I','t','a','l','i','c',0,
+2,'O','r','y','a',0,'O','r','i','y','a',0,2,'R','u','n','r',0,'R','u','n','i','c',0,
+2,'S','i','n','h',0,'S','i','n','h','a','l','a',0,2,'S','y','r','c',0,'S','y','r','i','a','c',0,
+2,'T','a','m','l',0,'T','a','m','i','l',0,2,'T','e','l','u',0,'T','e','l','u','g','u',0,
+2,'T','h','a','a',0,'T','h','a','a','n','a',0,2,'T','i','b','t',0,'T','i','b','e','t','a','n',0,
+2,'C','a','n','s',0,'C','a','n','a','d','i','a','n','_','A','b','o','r','i','g','i','n','a','l',0,
+2,'Y','i','i','i',0,'Y','i',0,2,'T','g','l','g',0,'T','a','g','a','l','o','g',0,
+2,'H','a','n','o',0,'H','a','n','u','n','o','o',0,2,'B','u','h','d',0,'B','u','h','i','d',0,
+2,'T','a','g','b',0,'T','a','g','b','a','n','w','a',0,2,'B','r','a','i',0,'B','r','a','i','l','l','e',0,
+2,'C','p','r','t',0,'C','y','p','r','i','o','t',0,2,'L','i','m','b',0,'L','i','m','b','u',0,
+2,'L','i','n','b',0,'L','i','n','e','a','r','_','B',0,2,'O','s','m','a',0,'O','s','m','a','n','y','a',0,
+2,'S','h','a','w',0,'S','h','a','v','i','a','n',0,2,'T','a','l','e',0,'T','a','i','_','L','e',0,
+2,'U','g','a','r',0,'U','g','a','r','i','t','i','c',0,2,'H','r','k','t',0,'K','a','t','a','k','a','n','a','_','O','r',
+'_','H','i','r','a','g','a','n','a',0,2,'B','u','g','i',0,'B','u','g','i','n','e','s','e',0,
+2,'G','l','a','g',0,'G','l','a','g','o','l','i','t','i','c',0,
+2,'K','h','a','r',0,'K','h','a','r','o','s','h','t','h','i',0,
+2,'S','y','l','o',0,'S','y','l','o','t','i','_','N','a','g','r','i',0,
+2,'T','a','l','u',0,'N','e','w','_','T','a','i','_','L','u','e',0,
+2,'T','f','n','g',0,'T','i','f','i','n','a','g','h',0,2,'X','p','e','o',0,'O','l','d','_','P','e','r','s','i','a','n',
+0,2,'B','a','l','i',0,'B','a','l','i','n','e','s','e',0,
+2,'B','a','t','k',0,'B','a','t','a','k',0,2,'B','l','i','s',0,'B','l','i','s',0,
+2,'B','r','a','h',0,'B','r','a','h','m','i',0,2,'C','i','r','t',0,'C','i','r','t',0,
+2,'C','y','r','s',0,'C','y','r','s',0,2,'E','g','y','d',0,'E','g','y','d',0,
+2,'E','g','y','h',0,'E','g','y','h',0,2,'E','g','y','p',0,'E','g','y','p','t','i','a','n','_','H','i','e','r','o','g',
+'l','y','p','h','s',0,2,'G','e','o','k',0,'G','e','o','k',0,
+2,'H','a','n','s',0,'H','a','n','s',0,2,'H','a','n','t',0,'H','a','n','t',0,
+2,'H','m','n','g',0,'P','a','h','a','w','h','_','H','m','o','n','g',0,
+2,'H','u','n','g',0,'O','l','d','_','H','u','n','g','a','r','i','a','n',0,
+2,'I','n','d','s',0,'I','n','d','s',0,2,'J','a','v','a',0,'J','a','v','a','n','e','s','e',0,
+2,'K','a','l','i',0,'K','a','y','a','h','_','L','i',0,2,'L','a','t','f',0,'L','a','t','f',0,
+2,'L','a','t','g',0,'L','a','t','g',0,2,'L','e','p','c',0,'L','e','p','c','h','a',0,
+2,'L','i','n','a',0,'L','i','n','e','a','r','_','A',0,2,'M','a','n','d',0,'M','a','n','d','a','i','c',0,
+2,'M','a','y','a',0,'M','a','y','a',0,2,'M','e','r','o',0,'M','e','r','o','i','t','i','c','_','H','i','e','r','o','g',
+'l','y','p','h','s',0,2,'N','k','o','o',0,'N','k','o',0,
+2,'O','r','k','h',0,'O','l','d','_','T','u','r','k','i','c',0,
+2,'P','e','r','m',0,'O','l','d','_','P','e','r','m','i','c',0,
+2,'P','h','a','g',0,'P','h','a','g','s','_','P','a',0,2,'P','h','n','x',0,'P','h','o','e','n','i','c','i','a','n',0,
+2,'P','l','r','d',0,'M','i','a','o',0,2,'R','o','r','o',0,'R','o','r','o',0,
+2,'S','a','r','a',0,'S','a','r','a',0,2,'S','y','r','e',0,'S','y','r','e',0,
+2,'S','y','r','j',0,'S','y','r','j',0,2,'S','y','r','n',0,'S','y','r','n',0,
+2,'T','e','n','g',0,'T','e','n','g',0,2,'V','a','i','i',0,'V','a','i',0,
+2,'V','i','s','p',0,'V','i','s','p',0,2,'X','s','u','x',0,'C','u','n','e','i','f','o','r','m',0,
+2,'Z','x','x','x',0,'Z','x','x','x',0,2,'Z','z','z','z',0,'U','n','k','n','o','w','n',0,
+2,'C','a','r','i',0,'C','a','r','i','a','n',0,2,'J','p','a','n',0,'J','p','a','n',0,
+2,'L','a','n','a',0,'T','a','i','_','T','h','a','m',0,2,'L','y','c','i',0,'L','y','c','i','a','n',0,
+2,'L','y','d','i',0,'L','y','d','i','a','n',0,2,'O','l','c','k',0,'O','l','_','C','h','i','k','i',0,
+2,'R','j','n','g',0,'R','e','j','a','n','g',0,2,'S','a','u','r',0,'S','a','u','r','a','s','h','t','r','a',0,
+2,'S','g','n','w',0,'S','i','g','n','W','r','i','t','i','n','g',0,
+2,'S','u','n','d',0,'S','u','n','d','a','n','e','s','e',0,
+2,'M','o','o','n',0,'M','o','o','n',0,2,'M','t','e','i',0,'M','e','e','t','e','i','_','M','a','y','e','k',0,
+2,'A','r','m','i',0,'I','m','p','e','r','i','a','l','_','A','r','a','m','a','i','c',0,
+2,'A','v','s','t',0,'A','v','e','s','t','a','n',0,2,'C','a','k','m',0,'C','h','a','k','m','a',0,
+2,'K','o','r','e',0,'K','o','r','e',0,2,'K','t','h','i',0,'K','a','i','t','h','i',0,
+2,'M','a','n','i',0,'M','a','n','i','c','h','a','e','a','n',0,
+2,'P','h','l','i',0,'I','n','s','c','r','i','p','t','i','o','n','a','l','_','P','a','h','l','a','v','i',0,
+2,'P','h','l','p',0,'P','s','a','l','t','e','r','_','P','a','h','l','a','v','i',0,
+2,'P','h','l','v',0,'P','h','l','v',0,2,'P','r','t','i',0,'I','n','s','c','r','i','p','t','i','o','n','a','l','_','P',
+'a','r','t','h','i','a','n',0,2,'S','a','m','r',0,'S','a','m','a','r','i','t','a','n',0,
+2,'T','a','v','t',0,'T','a','i','_','V','i','e','t',0,2,'Z','m','t','h',0,'Z','m','t','h',0,
+2,'Z','s','y','m',0,'Z','s','y','m',0,2,'B','a','m','u',0,'B','a','m','u','m',0,
+2,'N','k','g','b',0,'N','k','g','b',0,2,'S','a','r','b',0,'O','l','d','_','S','o','u','t','h','_','A','r','a','b','i',
+'a','n',0,2,'B','a','s','s',0,'B','a','s','s','a','_','V','a','h',0,
+2,'D','u','p','l',0,'D','u','p','l','o','y','a','n',0,2,'E','l','b','a',0,'E','l','b','a','s','a','n',0,
+2,'G','r','a','n',0,'G','r','a','n','t','h','a',0,2,'K','p','e','l',0,'K','p','e','l',0,
+2,'L','o','m','a',0,'L','o','m','a',0,2,'M','e','n','d',0,'M','e','n','d','e','_','K','i','k','a','k','u','i',0,
+2,'M','e','r','c',0,'M','e','r','o','i','t','i','c','_','C','u','r','s','i','v','e',0,
+2,'N','a','r','b',0,'O','l','d','_','N','o','r','t','h','_','A','r','a','b','i','a','n',0,
+2,'N','b','a','t',0,'N','a','b','a','t','a','e','a','n',0,
+2,'P','a','l','m',0,'P','a','l','m','y','r','e','n','e',0,
+2,'S','i','n','d',0,'K','h','u','d','a','w','a','d','i',0,
+2,'W','a','r','a',0,'W','a','r','a','n','g','_','C','i','t','i',0,
+2,'A','f','a','k',0,'A','f','a','k',0,2,'J','u','r','c',0,'J','u','r','c',0,
+2,'M','r','o','o',0,'M','r','o',0,2,'N','s','h','u',0,'N','u','s','h','u',0,
+2,'S','h','r','d',0,'S','h','a','r','a','d','a',0,2,'S','o','r','a',0,'S','o','r','a','_','S','o','m','p','e','n','g',
+0,2,'T','a','k','r',0,'T','a','k','r','i',0,2,'T','a','n','g',0,'T','a','n','g','u','t',0,
+2,'W','o','l','e',0,'W','o','l','e',0,2,'H','l','u','w',0,'A','n','a','t','o','l','i','a','n','_','H','i','e','r','o',
+'g','l','y','p','h','s',0,2,'K','h','o','j',0,'K','h','o','j','k','i',0,
+2,'T','i','r','h',0,'T','i','r','h','u','t','a',0,2,'A','g','h','b',0,'C','a','u','c','a','s','i','a','n','_','A','l',
+'b','a','n','i','a','n',0,2,'M','a','h','j',0,'M','a','h','a','j','a','n','i',0,
+2,'H','a','t','r',0,'H','a','t','r','a','n',0,2,'M','u','l','t',0,'M','u','l','t','a','n','i',0,
+2,'P','a','u','c',0,'P','a','u','_','C','i','n','_','H','a','u',0,
+2,'S','i','d','d',0,'S','i','d','d','h','a','m',0,2,'A','d','l','m',0,'A','d','l','a','m',0,
+2,'B','h','k','s',0,'B','h','a','i','k','s','u','k','i',0,
+2,'M','a','r','c',0,'M','a','r','c','h','e','n',0,2,'O','s','g','e',0,'O','s','a','g','e',0,
+2,'H','a','n','b',0,'H','a','n','b',0,2,'J','a','m','o',0,'J','a','m','o',0,
+2,'Z','s','y','e',0,'Z','s','y','e',0,2,'G','o','n','m',0,'M','a','s','a','r','a','m','_','G','o','n','d','i',0,
+2,'S','o','y','o',0,'S','o','y','o','m','b','o',0,2,'Z','a','n','b',0,'Z','a','n','a','b','a','z','a','r','_','S','q',
+'u','a','r','e',0,2,'D','o','g','r',0,'D','o','g','r','a',0,
+2,'G','o','n','g',0,'G','u','n','j','a','l','a','_','G','o','n','d','i',0,
+2,'M','a','k','a',0,'M','a','k','a','s','a','r',0,2,'M','e','d','f',0,'M','e','d','e','f','a','i','d','r','i','n',0,
+2,'R','o','h','g',0,'H','a','n','i','f','i','_','R','o','h','i','n','g','y','a',0,
+2,'S','o','g','d',0,'S','o','g','d','i','a','n',0,2,'S','o','g','o',0,'O','l','d','_','S','o','g','d','i','a','n',0,
+2,'E','l','y','m',0,'E','l','y','m','a','i','c',0,2,'H','m','n','p',0,'N','y','i','a','k','e','n','g','_','P','u','a',
+'c','h','u','e','_','H','m','o','n','g',0,2,'N','a','n','d',0,'N','a','n','d','i','n','a','g','a','r','i',0,
+2,'W','c','h','o',0,'W','a','n','c','h','o',0,2,'C','h','r','s',0,'C','h','o','r','a','s','m','i','a','n',0,
+2,'D','i','a','k',0,'D','i','v','e','s','_','A','k','u','r','u',0,
+2,'K','i','t','s',0,'K','h','i','t','a','n','_','S','m','a','l','l','_','S','c','r','i','p','t',0,
+2,'Y','e','z','i',0,'Y','e','z','i','d','i',0,2,'h','s','t',0,'H','a','n','g','u','l','_','S','y','l','l','a','b','l',
+'e','_','T','y','p','e',0,2,'N','A',0,'N','o','t','_','A','p','p','l','i','c','a','b','l','e',0,
+2,'L',0,'L','e','a','d','i','n','g','_','J','a','m','o',0,
+2,'V',0,'V','o','w','e','l','_','J','a','m','o',0,2,'T',0,'T','r','a','i','l','i','n','g','_','J','a','m','o',0,
+2,'L','V',0,'L','V','_','S','y','l','l','a','b','l','e',0,
+2,'L','V','T',0,'L','V','T','_','S','y','l','l','a','b','l','e',0,
+2,'N','F','D','_','Q','C',0,'N','F','D','_','Q','u','i','c','k','_','C','h','e','c','k',0,
+2,'N',0,'N','o',0,2,'Y',0,'Y','e','s',0,2,'N','F','K','D','_','Q','C',0,'N','F','K','D','_','Q','u','i','c','k',
+'_','C','h','e','c','k',0,2,'N','F','C','_','Q','C',0,'N','F','C','_','Q','u','i','c','k','_','C','h','e','c','k',0,
+2,'M',0,'M','a','y','b','e',0,2,'N','F','K','C','_','Q','C',0,'N','F','K','C','_','Q','u','i','c','k','_','C','h','e',
+'c','k',0,2,'l','c','c','c',0,'L','e','a','d','_','C','a','n','o','n','i','c','a','l','_','C','o','m','b','i','n','i','n',
+'g','_','C','l','a','s','s',0,2,'t','c','c','c',0,'T','r','a','i','l','_','C','a','n','o','n','i','c','a','l','_','C','o',
+'m','b','i','n','i','n','g','_','C','l','a','s','s',0,2,'G','C','B',0,'G','r','a','p','h','e','m','e','_','C','l','u','s',
+'t','e','r','_','B','r','e','a','k',0,2,'X','X',0,'O','t','h','e','r',0,
+2,'C','N',0,'C','o','n','t','r','o','l',0,2,'C','R',0,'C','R',0,
+2,'E','X',0,'E','x','t','e','n','d',0,2,'L',0,'L',0,
+2,'L','F',0,'L','F',0,2,'L','V',0,'L','V',0,2,'L','V','T',0,'L','V','T',0,
+2,'T',0,'T',0,2,'V',0,'V',0,2,'S','M',0,'S','p','a','c','i','n','g','M','a','r','k',0,
+2,'P','P',0,'P','r','e','p','e','n','d',0,2,'E','B','G',0,'E','_','B','a','s','e','_','G','A','Z',0,
+2,'G','A','Z',0,'G','l','u','e','_','A','f','t','e','r','_','Z','w','j',0,
+2,'S','B',0,'S','e','n','t','e','n','c','e','_','B','r','e','a','k',0,
+2,'A','T',0,'A','T','e','r','m',0,2,'C','L',0,'C','l','o','s','e',0,
+2,'F','O',0,'F','o','r','m','a','t',0,2,'L','O',0,'L','o','w','e','r',0,
+2,'L','E',0,'O','L','e','t','t','e','r',0,2,'S','E',0,'S','e','p',0,
+2,'S','P',0,'S','p',0,2,'S','T',0,'S','T','e','r','m',0,
+2,'U','P',0,'U','p','p','e','r',0,2,'S','C',0,'S','C','o','n','t','i','n','u','e',0,
+2,'W','B',0,'W','o','r','d','_','B','r','e','a','k',0,2,'L','E',0,'A','L','e','t','t','e','r',0,
+2,'K','A',0,'K','a','t','a','k','a','n','a',0,2,'M','L',0,'M','i','d','L','e','t','t','e','r',0,
+2,'M','N',0,'M','i','d','N','u','m',0,2,'E','X',0,'E','x','t','e','n','d','N','u','m','L','e','t',0,
+2,'E','x','t','e','n','d',0,'E','x','t','e','n','d',0,2,'M','B',0,'M','i','d','N','u','m','L','e','t',0,
+2,'N','L',0,'N','e','w','l','i','n','e',0,2,'S','Q',0,'S','i','n','g','l','e','_','Q','u','o','t','e',0,
+2,'D','Q',0,'D','o','u','b','l','e','_','Q','u','o','t','e',0,
+2,'W','S','e','g','S','p','a','c','e',0,'W','S','e','g','S','p','a','c','e',0,
+2,'b','p','t',0,'B','i','d','i','_','P','a','i','r','e','d','_','B','r','a','c','k','e','t','_','T','y','p','e',0,
+2,'n',0,'N','o','n','e',0,2,'o',0,'O','p','e','n',0,
+2,'c',0,'C','l','o','s','e',0,2,'I','n','P','C',0,'I','n','d','i','c','_','P','o','s','i','t','i','o','n','a','l','_',
+'C','a','t','e','g','o','r','y',0,2,'N','A',0,'N','A',0,
+2,'B','o','t','t','o','m',0,'B','o','t','t','o','m',0,2,'B','o','t','t','o','m','_','A','n','d','_','L','e','f','t',0,
+'B','o','t','t','o','m','_','A','n','d','_','L','e','f','t',0,
+2,'B','o','t','t','o','m','_','A','n','d','_','R','i','g','h','t',0,'B','o','t','t','o','m','_','A','n','d','_','R','i','g',
+'h','t',0,2,'L','e','f','t',0,'L','e','f','t',0,2,'L','e','f','t','_','A','n','d','_','R','i','g','h','t',0,
+'L','e','f','t','_','A','n','d','_','R','i','g','h','t',0,2,'O','v','e','r','s','t','r','u','c','k',0,
+'O','v','e','r','s','t','r','u','c','k',0,2,'R','i','g','h','t',0,'R','i','g','h','t',0,
+2,'T','o','p',0,'T','o','p',0,2,'T','o','p','_','A','n','d','_','B','o','t','t','o','m',0,
+'T','o','p','_','A','n','d','_','B','o','t','t','o','m',0,2,'T','o','p','_','A','n','d','_','B','o','t','t','o','m','_','A',
+'n','d','_','R','i','g','h','t',0,'T','o','p','_','A','n','d','_','B','o','t','t','o','m','_','A','n','d','_','R','i','g','h',
+'t',0,2,'T','o','p','_','A','n','d','_','L','e','f','t',0,'T','o','p','_','A','n','d','_','L','e','f','t',0,
+2,'T','o','p','_','A','n','d','_','L','e','f','t','_','A','n','d','_','R','i','g','h','t',0,
+'T','o','p','_','A','n','d','_','L','e','f','t','_','A','n','d','_','R','i','g','h','t',0,
+2,'T','o','p','_','A','n','d','_','R','i','g','h','t',0,'T','o','p','_','A','n','d','_','R','i','g','h','t',0,
+2,'V','i','s','u','a','l','_','O','r','d','e','r','_','L','e','f','t',0,'V','i','s','u','a','l','_','O','r','d','e','r','_',
+'L','e','f','t',0,2,'T','o','p','_','A','n','d','_','B','o','t','t','o','m','_','A','n','d','_','L','e','f','t',0,
+'T','o','p','_','A','n','d','_','B','o','t','t','o','m','_','A','n','d','_','L','e','f','t',0,
+2,'I','n','S','C',0,'I','n','d','i','c','_','S','y','l','l','a','b','i','c','_','C','a','t','e','g','o','r','y',0,
+2,'O','t','h','e','r',0,'O','t','h','e','r',0,2,'A','v','a','g','r','a','h','a',0,'A','v','a','g','r','a','h','a',0,
+2,'B','i','n','d','u',0,'B','i','n','d','u',0,2,'B','r','a','h','m','i','_','J','o','i','n','i','n','g','_','N','u','m',
+'b','e','r',0,'B','r','a','h','m','i','_','J','o','i','n','i','n','g','_','N','u','m','b','e','r',0,
+2,'C','a','n','t','i','l','l','a','t','i','o','n','_','M','a','r','k',0,'C','a','n','t','i','l','l','a','t','i','o','n','_',
+'M','a','r','k',0,2,'C','o','n','s','o','n','a','n','t',0,'C','o','n','s','o','n','a','n','t',0,
+2,'C','o','n','s','o','n','a','n','t','_','D','e','a','d',0,'C','o','n','s','o','n','a','n','t','_','D','e','a','d',0,
+2,'C','o','n','s','o','n','a','n','t','_','F','i','n','a','l',0,'C','o','n','s','o','n','a','n','t','_','F','i','n','a','l',
+0,2,'C','o','n','s','o','n','a','n','t','_','H','e','a','d','_','L','e','t','t','e','r',0,
+'C','o','n','s','o','n','a','n','t','_','H','e','a','d','_','L','e','t','t','e','r',0,
+2,'C','o','n','s','o','n','a','n','t','_','I','n','i','t','i','a','l','_','P','o','s','t','f','i','x','e','d',0,
+'C','o','n','s','o','n','a','n','t','_','I','n','i','t','i','a','l','_','P','o','s','t','f','i','x','e','d',0,
+2,'C','o','n','s','o','n','a','n','t','_','K','i','l','l','e','r',0,'C','o','n','s','o','n','a','n','t','_','K','i','l','l',
+'e','r',0,2,'C','o','n','s','o','n','a','n','t','_','M','e','d','i','a','l',0,'C','o','n','s','o','n','a','n','t','_','M',
+'e','d','i','a','l',0,2,'C','o','n','s','o','n','a','n','t','_','P','l','a','c','e','h','o','l','d','e','r',0,
+'C','o','n','s','o','n','a','n','t','_','P','l','a','c','e','h','o','l','d','e','r',0,
+2,'C','o','n','s','o','n','a','n','t','_','P','r','e','c','e','d','i','n','g','_','R','e','p','h','a',0,
+'C','o','n','s','o','n','a','n','t','_','P','r','e','c','e','d','i','n','g','_','R','e','p','h','a',0,
+2,'C','o','n','s','o','n','a','n','t','_','P','r','e','f','i','x','e','d',0,'C','o','n','s','o','n','a','n','t','_','P','r',
+'e','f','i','x','e','d',0,2,'C','o','n','s','o','n','a','n','t','_','S','u','b','j','o','i','n','e','d',0,
+'C','o','n','s','o','n','a','n','t','_','S','u','b','j','o','i','n','e','d',0,
+2,'C','o','n','s','o','n','a','n','t','_','S','u','c','c','e','e','d','i','n','g','_','R','e','p','h','a',0,
+'C','o','n','s','o','n','a','n','t','_','S','u','c','c','e','e','d','i','n','g','_','R','e','p','h','a',0,
+2,'C','o','n','s','o','n','a','n','t','_','W','i','t','h','_','S','t','a','c','k','e','r',0,
+'C','o','n','s','o','n','a','n','t','_','W','i','t','h','_','S','t','a','c','k','e','r',0,
+2,'G','e','m','i','n','a','t','i','o','n','_','M','a','r','k',0,'G','e','m','i','n','a','t','i','o','n','_','M','a','r','k',
+0,2,'I','n','v','i','s','i','b','l','e','_','S','t','a','c','k','e','r',0,'I','n','v','i','s','i','b','l','e','_','S','t',
+'a','c','k','e','r',0,2,'J','o','i','n','e','r',0,'J','o','i','n','e','r',0,
+2,'M','o','d','i','f','y','i','n','g','_','L','e','t','t','e','r',0,'M','o','d','i','f','y','i','n','g','_','L','e','t','t',
+'e','r',0,2,'N','o','n','_','J','o','i','n','e','r',0,'N','o','n','_','J','o','i','n','e','r',0,
+2,'N','u','k','t','a',0,'N','u','k','t','a',0,2,'N','u','m','b','e','r',0,'N','u','m','b','e','r',0,
+2,'N','u','m','b','e','r','_','J','o','i','n','e','r',0,'N','u','m','b','e','r','_','J','o','i','n','e','r',0,
+2,'P','u','r','e','_','K','i','l','l','e','r',0,'P','u','r','e','_','K','i','l','l','e','r',0,
+2,'R','e','g','i','s','t','e','r','_','S','h','i','f','t','e','r',0,'R','e','g','i','s','t','e','r','_','S','h','i','f','t',
+'e','r',0,2,'S','y','l','l','a','b','l','e','_','M','o','d','i','f','i','e','r',0,'S','y','l','l','a','b','l','e','_','M',
+'o','d','i','f','i','e','r',0,2,'T','o','n','e','_','L','e','t','t','e','r',0,'T','o','n','e','_','L','e','t','t','e','r',
+0,2,'T','o','n','e','_','M','a','r','k',0,'T','o','n','e','_','M','a','r','k',0,
+2,'V','i','r','a','m','a',0,'V','i','r','a','m','a',0,2,'V','i','s','a','r','g','a',0,
+'V','i','s','a','r','g','a',0,2,'V','o','w','e','l',0,'V','o','w','e','l',0,
+2,'V','o','w','e','l','_','D','e','p','e','n','d','e','n','t',0,'V','o','w','e','l','_','D','e','p','e','n','d','e','n','t',
+0,2,'V','o','w','e','l','_','I','n','d','e','p','e','n','d','e','n','t',0,'V','o','w','e','l','_','I','n','d','e','p','e',
+'n','d','e','n','t',0,2,'v','o',0,'V','e','r','t','i','c','a','l','_','O','r','i','e','n','t','a','t','i','o','n',0,
+2,'R',0,'R','o','t','a','t','e','d',0,2,'T','r',0,'T','r','a','n','s','f','o','r','m','e','d','_','R','o','t','a','t',
+'e','d',0,2,'T','u',0,'T','r','a','n','s','f','o','r','m','e','d','_','U','p','r','i','g','h','t',0,
+2,'U',0,'U','p','r','i','g','h','t',0,2,'g','c','m',0,'G','e','n','e','r','a','l','_','C','a','t','e','g','o','r','y',
+'_','M','a','s','k',0,2,'C',0,'O','t','h','e','r',0,2,'L',0,'L','e','t','t','e','r',0,
+2,'L','C',0,'C','a','s','e','d','_','L','e','t','t','e','r',0,
+3,'M',0,'M','a','r','k',0,'C','o','m','b','i','n','i','n','g','_','M','a','r','k',0,
+2,'N',0,'N','u','m','b','e','r',0,3,'P',0,'P','u','n','c','t','u','a','t','i','o','n',0,
+'p','u','n','c','t',0,2,'S',0,'S','y','m','b','o','l',0,
+2,'Z',0,'S','e','p','a','r','a','t','o','r',0,2,'n','v',0,'N','u','m','e','r','i','c','_','V','a','l','u','e',0,
+2,'a','g','e',0,'A','g','e',0,2,'b','m','g',0,'B','i','d','i','_','M','i','r','r','o','r','i','n','g','_','G','l','y',
+'p','h',0,2,'c','f',0,'C','a','s','e','_','F','o','l','d','i','n','g',0,
+2,'i','s','c',0,'I','S','O','_','C','o','m','m','e','n','t',0,
+2,'l','c',0,'L','o','w','e','r','c','a','s','e','_','M','a','p','p','i','n','g',0,
+2,'n','a',0,'N','a','m','e',0,3,'s','c','f',0,'S','i','m','p','l','e','_','C','a','s','e','_','F','o','l','d','i','n',
+'g',0,'s','f','c',0,2,'s','l','c',0,'S','i','m','p','l','e','_','L','o','w','e','r','c','a','s','e','_','M','a','p','p',
+'i','n','g',0,2,'s','t','c',0,'S','i','m','p','l','e','_','T','i','t','l','e','c','a','s','e','_','M','a','p','p','i','n',
+'g',0,2,'s','u','c',0,'S','i','m','p','l','e','_','U','p','p','e','r','c','a','s','e','_','M','a','p','p','i','n','g',0,
+2,'t','c',0,'T','i','t','l','e','c','a','s','e','_','M','a','p','p','i','n','g',0,
+2,'n','a','1',0,'U','n','i','c','o','d','e','_','1','_','N','a','m','e',0,
+2,'u','c',0,'U','p','p','e','r','c','a','s','e','_','M','a','p','p','i','n','g',0,
+2,'b','p','b',0,'B','i','d','i','_','P','a','i','r','e','d','_','B','r','a','c','k','e','t',0,
+2,'s','c','x',0,'S','c','r','i','p','t','_','E','x','t','e','n','s','i','o','n','s',0
+};
+
+U_NAMESPACE_END
+
+#endif // INCLUDED_FROM_PROPNAME_CPP
diff --git a/thirdparty/icu4c/common/propsvec.cpp b/thirdparty/icu4c/common/propsvec.cpp
new file mode 100644
index 0000000000..056fcda9cf
--- /dev/null
+++ b/thirdparty/icu4c/common/propsvec.cpp
@@ -0,0 +1,529 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: propsvec.c
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002feb22
+* created by: Markus W. Scherer
+*
+* Store bits (Unicode character properties) in bit set vectors.
+*/
+
+#include <stdlib.h>
+#include "unicode/utypes.h"
+#include "cmemory.h"
+#include "utrie.h"
+#include "utrie2.h"
+#include "uarrsort.h"
+#include "propsvec.h"
+#include "uassert.h"
+
+struct UPropsVectors {
+ uint32_t *v;
+ int32_t columns; /* number of columns, plus two for start & limit values */
+ int32_t maxRows;
+ int32_t rows;
+ int32_t prevRow; /* search optimization: remember last row seen */
+ UBool isCompacted;
+};
+
+#define UPVEC_INITIAL_ROWS (1<<12)
+#define UPVEC_MEDIUM_ROWS ((int32_t)1<<16)
+#define UPVEC_MAX_ROWS (UPVEC_MAX_CP+1)
+
+U_CAPI UPropsVectors * U_EXPORT2
+upvec_open(int32_t columns, UErrorCode *pErrorCode) {
+ UPropsVectors *pv;
+ uint32_t *v, *row;
+ uint32_t cp;
+
+ if(U_FAILURE(*pErrorCode)) {
+ return NULL;
+ }
+ if(columns<1) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+ columns+=2; /* count range start and limit columns */
+
+ pv=(UPropsVectors *)uprv_malloc(sizeof(UPropsVectors));
+ v=(uint32_t *)uprv_malloc(UPVEC_INITIAL_ROWS*columns*4);
+ if(pv==NULL || v==NULL) {
+ uprv_free(pv);
+ uprv_free(v);
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ uprv_memset(pv, 0, sizeof(UPropsVectors));
+ pv->v=v;
+ pv->columns=columns;
+ pv->maxRows=UPVEC_INITIAL_ROWS;
+ pv->rows=2+(UPVEC_MAX_CP-UPVEC_FIRST_SPECIAL_CP);
+
+ /* set the all-Unicode row and the special-value rows */
+ row=pv->v;
+ uprv_memset(row, 0, pv->rows*columns*4);
+ row[0]=0;
+ row[1]=0x110000;
+ row+=columns;
+ for(cp=UPVEC_FIRST_SPECIAL_CP; cp<=UPVEC_MAX_CP; ++cp) {
+ row[0]=cp;
+ row[1]=cp+1;
+ row+=columns;
+ }
+ return pv;
+}
+
+U_CAPI void U_EXPORT2
+upvec_close(UPropsVectors *pv) {
+ if(pv!=NULL) {
+ uprv_free(pv->v);
+ uprv_free(pv);
+ }
+}
+
+static uint32_t *
+_findRow(UPropsVectors *pv, UChar32 rangeStart) {
+ uint32_t *row;
+ int32_t columns, i, start, limit, prevRow;
+
+ columns=pv->columns;
+ limit=pv->rows;
+ prevRow=pv->prevRow;
+
+ /* check the vicinity of the last-seen row (start searching with an unrolled loop) */
+ row=pv->v+prevRow*columns;
+ if(rangeStart>=(UChar32)row[0]) {
+ if(rangeStart<(UChar32)row[1]) {
+ /* same row as last seen */
+ return row;
+ } else if(rangeStart<(UChar32)(row+=columns)[1]) {
+ /* next row after the last one */
+ pv->prevRow=prevRow+1;
+ return row;
+ } else if(rangeStart<(UChar32)(row+=columns)[1]) {
+ /* second row after the last one */
+ pv->prevRow=prevRow+2;
+ return row;
+ } else if((rangeStart-(UChar32)row[1])<10) {
+ /* we are close, continue looping */
+ prevRow+=2;
+ do {
+ ++prevRow;
+ row+=columns;
+ } while(rangeStart>=(UChar32)row[1]);
+ pv->prevRow=prevRow;
+ return row;
+ }
+ } else if(rangeStart<(UChar32)pv->v[1]) {
+ /* the very first row */
+ pv->prevRow=0;
+ return pv->v;
+ }
+
+ /* do a binary search for the start of the range */
+ start=0;
+ while(start<limit-1) {
+ i=(start+limit)/2;
+ row=pv->v+i*columns;
+ if(rangeStart<(UChar32)row[0]) {
+ limit=i;
+ } else if(rangeStart<(UChar32)row[1]) {
+ pv->prevRow=i;
+ return row;
+ } else {
+ start=i;
+ }
+ }
+
+ /* must be found because all ranges together always cover all of Unicode */
+ pv->prevRow=start;
+ return pv->v+start*columns;
+}
+
+U_CAPI void U_EXPORT2
+upvec_setValue(UPropsVectors *pv,
+ UChar32 start, UChar32 end,
+ int32_t column,
+ uint32_t value, uint32_t mask,
+ UErrorCode *pErrorCode) {
+ uint32_t *firstRow, *lastRow;
+ int32_t columns;
+ UChar32 limit;
+ UBool splitFirstRow, splitLastRow;
+
+ /* argument checking */
+ if(U_FAILURE(*pErrorCode)) {
+ return;
+ }
+ if( pv==NULL ||
+ start<0 || start>end || end>UPVEC_MAX_CP ||
+ column<0 || column>=(pv->columns-2)
+ ) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ if(pv->isCompacted) {
+ *pErrorCode=U_NO_WRITE_PERMISSION;
+ return;
+ }
+ limit=end+1;
+
+ /* initialize */
+ columns=pv->columns;
+ column+=2; /* skip range start and limit columns */
+ value&=mask;
+
+ /* find the rows whose ranges overlap with the input range */
+
+ /* find the first and last rows, always successful */
+ firstRow=_findRow(pv, start);
+ lastRow=_findRow(pv, end);
+
+ /*
+ * Rows need to be split if they partially overlap with the
+ * input range (only possible for the first and last rows)
+ * and if their value differs from the input value.
+ */
+ splitFirstRow= (UBool)(start!=(UChar32)firstRow[0] && value!=(firstRow[column]&mask));
+ splitLastRow= (UBool)(limit!=(UChar32)lastRow[1] && value!=(lastRow[column]&mask));
+
+ /* split first/last rows if necessary */
+ if(splitFirstRow || splitLastRow) {
+ int32_t count, rows;
+
+ rows=pv->rows;
+ if((rows+splitFirstRow+splitLastRow)>pv->maxRows) {
+ uint32_t *newVectors;
+ int32_t newMaxRows;
+
+ if(pv->maxRows<UPVEC_MEDIUM_ROWS) {
+ newMaxRows=UPVEC_MEDIUM_ROWS;
+ } else if(pv->maxRows<UPVEC_MAX_ROWS) {
+ newMaxRows=UPVEC_MAX_ROWS;
+ } else {
+ /* Implementation bug, or UPVEC_MAX_ROWS too low. */
+ *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
+ return;
+ }
+ newVectors=(uint32_t *)uprv_malloc(newMaxRows*columns*4);
+ if(newVectors==NULL) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ uprv_memcpy(newVectors, pv->v, (size_t)rows*columns*4);
+ firstRow=newVectors+(firstRow-pv->v);
+ lastRow=newVectors+(lastRow-pv->v);
+ uprv_free(pv->v);
+ pv->v=newVectors;
+ pv->maxRows=newMaxRows;
+ }
+
+ /* count the number of row cells to move after the last row, and move them */
+ count = (int32_t)((pv->v+rows*columns)-(lastRow+columns));
+ if(count>0) {
+ uprv_memmove(
+ lastRow+(1+splitFirstRow+splitLastRow)*columns,
+ lastRow+columns,
+ count*4);
+ }
+ pv->rows=rows+splitFirstRow+splitLastRow;
+
+ /* split the first row, and move the firstRow pointer to the second part */
+ if(splitFirstRow) {
+ /* copy all affected rows up one and move the lastRow pointer */
+ count = (int32_t)((lastRow-firstRow)+columns);
+ uprv_memmove(firstRow+columns, firstRow, (size_t)count*4);
+ lastRow+=columns;
+
+ /* split the range and move the firstRow pointer */
+ firstRow[1]=firstRow[columns]=(uint32_t)start;
+ firstRow+=columns;
+ }
+
+ /* split the last row */
+ if(splitLastRow) {
+ /* copy the last row data */
+ uprv_memcpy(lastRow+columns, lastRow, (size_t)columns*4);
+
+ /* split the range and move the firstRow pointer */
+ lastRow[1]=lastRow[columns]=(uint32_t)limit;
+ }
+ }
+
+ /* set the "row last seen" to the last row for the range */
+ pv->prevRow=(int32_t)((lastRow-(pv->v))/columns);
+
+ /* set the input value in all remaining rows */
+ firstRow+=column;
+ lastRow+=column;
+ mask=~mask;
+ for(;;) {
+ *firstRow=(*firstRow&mask)|value;
+ if(firstRow==lastRow) {
+ break;
+ }
+ firstRow+=columns;
+ }
+}
+
+U_CAPI uint32_t U_EXPORT2
+upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column) {
+ uint32_t *row;
+ UPropsVectors *ncpv;
+
+ if(pv->isCompacted || c<0 || c>UPVEC_MAX_CP || column<0 || column>=(pv->columns-2)) {
+ return 0;
+ }
+ ncpv=(UPropsVectors *)pv;
+ row=_findRow(ncpv, c);
+ return row[2+column];
+}
+
+U_CAPI uint32_t * U_EXPORT2
+upvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
+ UChar32 *pRangeStart, UChar32 *pRangeEnd) {
+ uint32_t *row;
+ int32_t columns;
+
+ if(pv->isCompacted || rowIndex<0 || rowIndex>=pv->rows) {
+ return NULL;
+ }
+
+ columns=pv->columns;
+ row=pv->v+rowIndex*columns;
+ if(pRangeStart!=NULL) {
+ *pRangeStart=(UChar32)row[0];
+ }
+ if(pRangeEnd!=NULL) {
+ *pRangeEnd=(UChar32)row[1]-1;
+ }
+ return row+2;
+}
+
+static int32_t U_CALLCONV
+upvec_compareRows(const void *context, const void *l, const void *r) {
+ const uint32_t *left=(const uint32_t *)l, *right=(const uint32_t *)r;
+ const UPropsVectors *pv=(const UPropsVectors *)context;
+ int32_t i, count, columns;
+
+ count=columns=pv->columns; /* includes start/limit columns */
+
+ /* start comparing after start/limit but wrap around to them */
+ i=2;
+ do {
+ if(left[i]!=right[i]) {
+ return left[i]<right[i] ? -1 : 1;
+ }
+ if(++i==columns) {
+ i=0;
+ }
+ } while(--count>0);
+
+ return 0;
+}
+
+U_CAPI void U_EXPORT2
+upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) {
+ uint32_t *row;
+ int32_t i, columns, valueColumns, rows, count;
+ UChar32 start, limit;
+
+ /* argument checking */
+ if(U_FAILURE(*pErrorCode)) {
+ return;
+ }
+ if(handler==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ if(pv->isCompacted) {
+ return;
+ }
+
+ /* Set the flag now: Sorting and compacting destroys the builder data structure. */
+ pv->isCompacted=TRUE;
+
+ rows=pv->rows;
+ columns=pv->columns;
+ U_ASSERT(columns>=3); /* upvec_open asserts this */
+ valueColumns=columns-2; /* not counting start & limit */
+
+ /* sort the properties vectors to find unique vector values */
+ uprv_sortArray(pv->v, rows, columns*4,
+ upvec_compareRows, pv, FALSE, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ return;
+ }
+
+ /*
+ * Find and set the special values.
+ * This has to do almost the same work as the compaction below,
+ * to find the indexes where the special-value rows will move.
+ */
+ row=pv->v;
+ count=-valueColumns;
+ for(i=0; i<rows; ++i) {
+ start=(UChar32)row[0];
+
+ /* count a new values vector if it is different from the current one */
+ if(count<0 || 0!=uprv_memcmp(row+2, row-valueColumns, valueColumns*4)) {
+ count+=valueColumns;
+ }
+
+ if(start>=UPVEC_FIRST_SPECIAL_CP) {
+ handler(context, start, start, count, row+2, valueColumns, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ return;
+ }
+ }
+
+ row+=columns;
+ }
+
+ /* count is at the beginning of the last vector, add valueColumns to include that last vector */
+ count+=valueColumns;
+
+ /* Call the handler once more to signal the start of delivering real values. */
+ handler(context, UPVEC_START_REAL_VALUES_CP, UPVEC_START_REAL_VALUES_CP,
+ count, row-valueColumns, valueColumns, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ return;
+ }
+
+ /*
+ * Move vector contents up to a contiguous array with only unique
+ * vector values, and call the handler function for each vector.
+ *
+ * This destroys the Properties Vector structure and replaces it
+ * with an array of just vector values.
+ */
+ row=pv->v;
+ count=-valueColumns;
+ for(i=0; i<rows; ++i) {
+ /* fetch these first before memmove() may overwrite them */
+ start=(UChar32)row[0];
+ limit=(UChar32)row[1];
+
+ /* add a new values vector if it is different from the current one */
+ if(count<0 || 0!=uprv_memcmp(row+2, pv->v+count, valueColumns*4)) {
+ count+=valueColumns;
+ uprv_memmove(pv->v+count, row+2, (size_t)valueColumns*4);
+ }
+
+ if(start<UPVEC_FIRST_SPECIAL_CP) {
+ handler(context, start, limit-1, count, pv->v+count, valueColumns, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ return;
+ }
+ }
+
+ row+=columns;
+ }
+
+ /* count is at the beginning of the last vector, add one to include that last vector */
+ pv->rows=count/valueColumns+1;
+}
+
+U_CAPI const uint32_t * U_EXPORT2
+upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns) {
+ if(!pv->isCompacted) {
+ return NULL;
+ }
+ if(pRows!=NULL) {
+ *pRows=pv->rows;
+ }
+ if(pColumns!=NULL) {
+ *pColumns=pv->columns-2;
+ }
+ return pv->v;
+}
+
+U_CAPI uint32_t * U_EXPORT2
+upvec_cloneArray(const UPropsVectors *pv,
+ int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode) {
+ uint32_t *clonedArray;
+ int32_t byteLength;
+
+ if(U_FAILURE(*pErrorCode)) {
+ return NULL;
+ }
+ if(!pv->isCompacted) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+ byteLength=pv->rows*(pv->columns-2)*4;
+ clonedArray=(uint32_t *)uprv_malloc(byteLength);
+ if(clonedArray==NULL) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ uprv_memcpy(clonedArray, pv->v, byteLength);
+ if(pRows!=NULL) {
+ *pRows=pv->rows;
+ }
+ if(pColumns!=NULL) {
+ *pColumns=pv->columns-2;
+ }
+ return clonedArray;
+}
+
+U_CAPI UTrie2 * U_EXPORT2
+upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode) {
+ UPVecToUTrie2Context toUTrie2={ NULL, 0, 0, 0 };
+ upvec_compact(pv, upvec_compactToUTrie2Handler, &toUTrie2, pErrorCode);
+ utrie2_freeze(toUTrie2.trie, UTRIE2_16_VALUE_BITS, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ utrie2_close(toUTrie2.trie);
+ toUTrie2.trie=NULL;
+ }
+ return toUTrie2.trie;
+}
+
+/*
+ * TODO(markus): Add upvec_16BitsToUTrie2() function that enumerates all rows, extracts
+ * some 16-bit field and builds and returns a UTrie2.
+ */
+
+U_CAPI void U_CALLCONV
+upvec_compactToUTrie2Handler(void *context,
+ UChar32 start, UChar32 end,
+ int32_t rowIndex, uint32_t *row, int32_t columns,
+ UErrorCode *pErrorCode) {
+ (void)row;
+ (void)columns;
+ UPVecToUTrie2Context *toUTrie2=(UPVecToUTrie2Context *)context;
+ if(start<UPVEC_FIRST_SPECIAL_CP) {
+ utrie2_setRange32(toUTrie2->trie, start, end, (uint32_t)rowIndex, TRUE, pErrorCode);
+ } else {
+ switch(start) {
+ case UPVEC_INITIAL_VALUE_CP:
+ toUTrie2->initialValue=rowIndex;
+ break;
+ case UPVEC_ERROR_VALUE_CP:
+ toUTrie2->errorValue=rowIndex;
+ break;
+ case UPVEC_START_REAL_VALUES_CP:
+ toUTrie2->maxValue=rowIndex;
+ if(rowIndex>0xffff) {
+ /* too many rows for a 16-bit trie */
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ } else {
+ toUTrie2->trie=utrie2_open(toUTrie2->initialValue,
+ toUTrie2->errorValue, pErrorCode);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+}
diff --git a/thirdparty/icu4c/common/propsvec.h b/thirdparty/icu4c/common/propsvec.h
new file mode 100644
index 0000000000..39080615ea
--- /dev/null
+++ b/thirdparty/icu4c/common/propsvec.h
@@ -0,0 +1,178 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2010, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: propsvec.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002feb22
+* created by: Markus W. Scherer
+*
+* Store bits (Unicode character properties) in bit set vectors.
+*/
+
+#ifndef __UPROPSVEC_H__
+#define __UPROPSVEC_H__
+
+#include "unicode/utypes.h"
+#include "utrie.h"
+#include "utrie2.h"
+
+U_CDECL_BEGIN
+
+/**
+ * Unicode Properties Vectors associated with code point ranges.
+ *
+ * Rows of uint32_t integers in a contiguous array store
+ * the range limits and the properties vectors.
+ *
+ * Logically, each row has a certain number of uint32_t values,
+ * which is set via the upvec_open() "columns" parameter.
+ *
+ * Internally, two additional columns are stored.
+ * In each internal row,
+ * row[0] contains the start code point and
+ * row[1] contains the limit code point,
+ * which is the start of the next range.
+ *
+ * Initially, there is only one "normal" row for
+ * range [0..0x110000[ with values 0.
+ * There are additional rows for special purposes, see UPVEC_FIRST_SPECIAL_CP.
+ *
+ * It would be possible to store only one range boundary per row,
+ * but self-contained rows allow to later sort them by contents.
+ */
+struct UPropsVectors;
+typedef struct UPropsVectors UPropsVectors;
+
+/*
+ * Special pseudo code points for storing the initialValue and the errorValue,
+ * which are used to initialize a UTrie2 or similar.
+ */
+#define UPVEC_FIRST_SPECIAL_CP 0x110000
+#define UPVEC_INITIAL_VALUE_CP 0x110000
+#define UPVEC_ERROR_VALUE_CP 0x110001
+#define UPVEC_MAX_CP 0x110001
+
+/*
+ * Special pseudo code point used in upvec_compact() signalling the end of
+ * delivering special values and the beginning of delivering real ones.
+ * Stable value, unlike UPVEC_MAX_CP which might grow over time.
+ */
+#define UPVEC_START_REAL_VALUES_CP 0x200000
+
+/*
+ * Open a UPropsVectors object.
+ * @param columns Number of value integers (uint32_t) per row.
+ */
+U_CAPI UPropsVectors * U_EXPORT2
+upvec_open(int32_t columns, UErrorCode *pErrorCode);
+
+U_CAPI void U_EXPORT2
+upvec_close(UPropsVectors *pv);
+
+/*
+ * In rows for code points [start..end], select the column,
+ * reset the mask bits and set the value bits (ANDed with the mask).
+ *
+ * Will set U_NO_WRITE_PERMISSION if called after upvec_compact().
+ */
+U_CAPI void U_EXPORT2
+upvec_setValue(UPropsVectors *pv,
+ UChar32 start, UChar32 end,
+ int32_t column,
+ uint32_t value, uint32_t mask,
+ UErrorCode *pErrorCode);
+
+/*
+ * Logically const but must not be used on the same pv concurrently!
+ * Always returns 0 if called after upvec_compact().
+ */
+U_CAPI uint32_t U_EXPORT2
+upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column);
+
+/*
+ * pRangeStart and pRangeEnd can be NULL.
+ * @return NULL if rowIndex out of range and for illegal arguments,
+ * or if called after upvec_compact()
+ */
+U_CAPI uint32_t * U_EXPORT2
+upvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
+ UChar32 *pRangeStart, UChar32 *pRangeEnd);
+
+/*
+ * Compact the vectors:
+ * - modify the memory
+ * - keep only unique vectors
+ * - store them contiguously from the beginning of the memory
+ * - for each (non-unique) row, call the handler function
+ *
+ * The handler's rowIndex is the index of the row in the compacted
+ * memory block.
+ * (Therefore, it starts at 0 increases in increments of the columns value.)
+ *
+ * In a first phase, only special values are delivered (each exactly once),
+ * with start==end both equalling a special pseudo code point.
+ * Then the handler is called once more with start==end==UPVEC_START_REAL_VALUES_CP
+ * where rowIndex is the length of the compacted array,
+ * and the row is arbitrary (but not NULL).
+ * Then, in the second phase, the handler is called for each row of real values.
+ */
+typedef void U_CALLCONV
+UPVecCompactHandler(void *context,
+ UChar32 start, UChar32 end,
+ int32_t rowIndex, uint32_t *row, int32_t columns,
+ UErrorCode *pErrorCode);
+
+U_CAPI void U_EXPORT2
+upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode);
+
+/*
+ * Get the vectors array after calling upvec_compact().
+ * The caller must not modify nor release the returned array.
+ * Returns NULL if called before upvec_compact().
+ */
+U_CAPI const uint32_t * U_EXPORT2
+upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns);
+
+/*
+ * Get a clone of the vectors array after calling upvec_compact().
+ * The caller owns the returned array and must uprv_free() it.
+ * Returns NULL if called before upvec_compact().
+ */
+U_CAPI uint32_t * U_EXPORT2
+upvec_cloneArray(const UPropsVectors *pv,
+ int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode);
+
+/*
+ * Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted
+ * vectors array, and freeze the trie.
+ */
+U_CAPI UTrie2 * U_EXPORT2
+upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode);
+
+struct UPVecToUTrie2Context {
+ UTrie2 *trie;
+ int32_t initialValue;
+ int32_t errorValue;
+ int32_t maxValue;
+};
+typedef struct UPVecToUTrie2Context UPVecToUTrie2Context;
+
+/* context=UPVecToUTrie2Context, creates the trie and stores the rowIndex values */
+U_CAPI void U_CALLCONV
+upvec_compactToUTrie2Handler(void *context,
+ UChar32 start, UChar32 end,
+ int32_t rowIndex, uint32_t *row, int32_t columns,
+ UErrorCode *pErrorCode);
+
+U_CDECL_END
+
+#endif
diff --git a/thirdparty/icu4c/common/punycode.cpp b/thirdparty/icu4c/common/punycode.cpp
new file mode 100644
index 0000000000..4832938ff7
--- /dev/null
+++ b/thirdparty/icu4c/common/punycode.cpp
@@ -0,0 +1,590 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: punycode.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002jan31
+* created by: Markus W. Scherer
+*/
+
+
+/* This ICU code derived from: */
+/*
+punycode.c 0.4.0 (2001-Nov-17-Sat)
+http://www.cs.berkeley.edu/~amc/idn/
+Adam M. Costello
+http://www.nicemice.net/amc/
+
+Disclaimer and license
+
+ Regarding this entire document or any portion of it (including
+ the pseudocode and C code), the author makes no guarantees and
+ is not responsible for any damage resulting from its use. The
+ author grants irrevocable permission to anyone to use, modify,
+ and distribute it in any way that does not diminish the rights
+ of anyone else to use, modify, and distribute it, provided that
+ redistributed derivative works do not contain misleading author or
+ version information. Derivative works need not be licensed under
+ similar terms.
+*/
+/*
+ * ICU modifications:
+ * - ICU data types and coding conventions
+ * - ICU string buffer handling with implicit source lengths
+ * and destination preflighting
+ * - UTF-16 handling
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_IDNA
+
+#include "unicode/ustring.h"
+#include "unicode/utf.h"
+#include "unicode/utf16.h"
+#include "ustr_imp.h"
+#include "cstring.h"
+#include "cmemory.h"
+#include "punycode.h"
+#include "uassert.h"
+
+
+/* Punycode ----------------------------------------------------------------- */
+
+/* Punycode parameters for Bootstring */
+#define BASE 36
+#define TMIN 1
+#define TMAX 26
+#define SKEW 38
+#define DAMP 700
+#define INITIAL_BIAS 72
+#define INITIAL_N 0x80
+
+/* "Basic" Unicode/ASCII code points */
+#define _HYPHEN 0X2d
+#define DELIMITER _HYPHEN
+
+#define _ZERO_ 0X30
+#define _NINE 0x39
+
+#define _SMALL_A 0X61
+#define _SMALL_Z 0X7a
+
+#define _CAPITAL_A 0X41
+#define _CAPITAL_Z 0X5a
+
+#define IS_BASIC(c) ((c)<0x80)
+#define IS_BASIC_UPPERCASE(c) (_CAPITAL_A<=(c) && (c)<=_CAPITAL_Z)
+
+/**
+ * digitToBasic() returns the basic code point whose value
+ * (when used for representing integers) is d, which must be in the
+ * range 0 to BASE-1. The lowercase form is used unless the uppercase flag is
+ * nonzero, in which case the uppercase form is used.
+ */
+static inline char
+digitToBasic(int32_t digit, UBool uppercase) {
+ /* 0..25 map to ASCII a..z or A..Z */
+ /* 26..35 map to ASCII 0..9 */
+ if(digit<26) {
+ if(uppercase) {
+ return (char)(_CAPITAL_A+digit);
+ } else {
+ return (char)(_SMALL_A+digit);
+ }
+ } else {
+ return (char)((_ZERO_-26)+digit);
+ }
+}
+
+/**
+ * @return the numeric value of a basic code point (for use in representing integers)
+ * in the range 0 to BASE-1, or a negative value if cp is invalid.
+ */
+static int32_t decodeDigit(int32_t cp) {
+ if(cp<=u'Z') {
+ if(cp<=u'9') {
+ if(cp<u'0') {
+ return -1;
+ } else {
+ return cp-u'0'+26; // 0..9 -> 26..35
+ }
+ } else {
+ return cp-u'A'; // A-Z -> 0..25
+ }
+ } else if(cp<=u'z') {
+ return cp-'a'; // a..z -> 0..25
+ } else {
+ return -1;
+ }
+}
+
+static inline char
+asciiCaseMap(char b, UBool uppercase) {
+ if(uppercase) {
+ if(_SMALL_A<=b && b<=_SMALL_Z) {
+ b-=(_SMALL_A-_CAPITAL_A);
+ }
+ } else {
+ if(_CAPITAL_A<=b && b<=_CAPITAL_Z) {
+ b+=(_SMALL_A-_CAPITAL_A);
+ }
+ }
+ return b;
+}
+
+/* Punycode-specific Bootstring code ---------------------------------------- */
+
+/*
+ * The following code omits the {parts} of the pseudo-algorithm in the spec
+ * that are not used with the Punycode parameter set.
+ */
+
+/* Bias adaptation function. */
+static int32_t
+adaptBias(int32_t delta, int32_t length, UBool firstTime) {
+ int32_t count;
+
+ if(firstTime) {
+ delta/=DAMP;
+ } else {
+ delta/=2;
+ }
+
+ delta+=delta/length;
+ for(count=0; delta>((BASE-TMIN)*TMAX)/2; count+=BASE) {
+ delta/=(BASE-TMIN);
+ }
+
+ return count+(((BASE-TMIN+1)*delta)/(delta+SKEW));
+}
+
+namespace {
+
+// ICU-13727: Limit input length for n^2 algorithm
+// where well-formed strings are at most 59 characters long.
+constexpr int32_t ENCODE_MAX_CODE_UNITS=1000;
+constexpr int32_t DECODE_MAX_CHARS=2000;
+
+} // namespace
+
+// encode
+U_CAPI int32_t
+u_strToPunycode(const UChar *src, int32_t srcLength,
+ UChar *dest, int32_t destCapacity,
+ const UBool *caseFlags,
+ UErrorCode *pErrorCode) {
+
+ int32_t cpBuffer[ENCODE_MAX_CODE_UNITS];
+ int32_t n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount;
+ UChar c, c2;
+
+ /* argument checking */
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ if (srcLength>ENCODE_MAX_CODE_UNITS) {
+ *pErrorCode=U_INPUT_TOO_LONG_ERROR;
+ return 0;
+ }
+
+ /*
+ * Handle the basic code points and
+ * convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit):
+ */
+ srcCPCount=destLength=0;
+ if(srcLength==-1) {
+ /* NUL-terminated input */
+ for(j=0; /* no condition */; ++j) {
+ if((c=src[j])==0) {
+ break;
+ }
+ if(j>=ENCODE_MAX_CODE_UNITS) {
+ *pErrorCode=U_INPUT_TOO_LONG_ERROR;
+ return 0;
+ }
+ if(IS_BASIC(c)) {
+ cpBuffer[srcCPCount++]=0;
+ if(destLength<destCapacity) {
+ dest[destLength]=
+ caseFlags!=NULL ?
+ asciiCaseMap((char)c, caseFlags[j]) :
+ (char)c;
+ }
+ ++destLength;
+ } else {
+ n=(caseFlags!=NULL && caseFlags[j])<<31L;
+ if(U16_IS_SINGLE(c)) {
+ n|=c;
+ } else if(U16_IS_LEAD(c) && U16_IS_TRAIL(c2=src[j+1])) {
+ ++j;
+ n|=(int32_t)U16_GET_SUPPLEMENTARY(c, c2);
+ } else {
+ /* error: unmatched surrogate */
+ *pErrorCode=U_INVALID_CHAR_FOUND;
+ return 0;
+ }
+ cpBuffer[srcCPCount++]=n;
+ }
+ }
+ } else {
+ /* length-specified input */
+ for(j=0; j<srcLength; ++j) {
+ c=src[j];
+ if(IS_BASIC(c)) {
+ cpBuffer[srcCPCount++]=0;
+ if(destLength<destCapacity) {
+ dest[destLength]=
+ caseFlags!=NULL ?
+ asciiCaseMap((char)c, caseFlags[j]) :
+ (char)c;
+ }
+ ++destLength;
+ } else {
+ n=(caseFlags!=NULL && caseFlags[j])<<31L;
+ if(U16_IS_SINGLE(c)) {
+ n|=c;
+ } else if(U16_IS_LEAD(c) && (j+1)<srcLength && U16_IS_TRAIL(c2=src[j+1])) {
+ ++j;
+ n|=(int32_t)U16_GET_SUPPLEMENTARY(c, c2);
+ } else {
+ /* error: unmatched surrogate */
+ *pErrorCode=U_INVALID_CHAR_FOUND;
+ return 0;
+ }
+ cpBuffer[srcCPCount++]=n;
+ }
+ }
+ }
+
+ /* Finish the basic string - if it is not empty - with a delimiter. */
+ basicLength=destLength;
+ if(basicLength>0) {
+ if(destLength<destCapacity) {
+ dest[destLength]=DELIMITER;
+ }
+ ++destLength;
+ }
+
+ /*
+ * handledCPCount is the number of code points that have been handled
+ * basicLength is the number of basic code points
+ * destLength is the number of chars that have been output
+ */
+
+ /* Initialize the state: */
+ n=INITIAL_N;
+ delta=0;
+ bias=INITIAL_BIAS;
+
+ /* Main encoding loop: */
+ for(handledCPCount=basicLength; handledCPCount<srcCPCount; /* no op */) {
+ /*
+ * All non-basic code points < n have been handled already.
+ * Find the next larger one:
+ */
+ for(m=0x7fffffff, j=0; j<srcCPCount; ++j) {
+ q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */
+ if(n<=q && q<m) {
+ m=q;
+ }
+ }
+
+ /*
+ * Increase delta enough to advance the decoder's
+ * <n,i> state to <m,0>, but guard against overflow:
+ */
+ if(m-n>(0x7fffffff-handledCPCount-delta)/(handledCPCount+1)) {
+ *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
+ return 0;
+ }
+ delta+=(m-n)*(handledCPCount+1);
+ n=m;
+
+ /* Encode a sequence of same code points n */
+ for(j=0; j<srcCPCount; ++j) {
+ q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */
+ if(q<n) {
+ ++delta;
+ } else if(q==n) {
+ /* Represent delta as a generalized variable-length integer: */
+ for(q=delta, k=BASE; /* no condition */; k+=BASE) {
+
+ /** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt
+
+ t=k-bias;
+ if(t<TMIN) {
+ t=TMIN;
+ } else if(t>TMAX) {
+ t=TMAX;
+ }
+ */
+
+ t=k-bias;
+ if(t<TMIN) {
+ t=TMIN;
+ } else if(k>=(bias+TMAX)) {
+ t=TMAX;
+ }
+
+ if(q<t) {
+ break;
+ }
+
+ if(destLength<destCapacity) {
+ dest[destLength]=digitToBasic(t+(q-t)%(BASE-t), 0);
+ }
+ ++destLength;
+ q=(q-t)/(BASE-t);
+ }
+
+ if(destLength<destCapacity) {
+ dest[destLength]=digitToBasic(q, (UBool)(cpBuffer[j]<0));
+ }
+ ++destLength;
+ bias=adaptBias(delta, handledCPCount+1, (UBool)(handledCPCount==basicLength));
+ delta=0;
+ ++handledCPCount;
+ }
+ }
+
+ ++delta;
+ ++n;
+ }
+
+ return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
+}
+
+// decode
+U_CAPI int32_t
+u_strFromPunycode(const UChar *src, int32_t srcLength,
+ UChar *dest, int32_t destCapacity,
+ UBool *caseFlags,
+ UErrorCode *pErrorCode) {
+ int32_t n, destLength, i, bias, basicLength, j, in, oldi, w, k, digit, t,
+ destCPCount, firstSupplementaryIndex, cpLength;
+ UChar b;
+
+ /* argument checking */
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ if(srcLength==-1) {
+ srcLength=u_strlen(src);
+ }
+ if (srcLength>DECODE_MAX_CHARS) {
+ *pErrorCode=U_INPUT_TOO_LONG_ERROR;
+ return 0;
+ }
+
+ /*
+ * Handle the basic code points:
+ * Let basicLength be the number of input code points
+ * before the last delimiter, or 0 if there is none,
+ * then copy the first basicLength code points to the output.
+ *
+ * The two following loops iterate backward.
+ */
+ for(j=srcLength; j>0;) {
+ if(src[--j]==DELIMITER) {
+ break;
+ }
+ }
+ destLength=basicLength=destCPCount=j;
+ U_ASSERT(destLength>=0);
+
+ while(j>0) {
+ b=src[--j];
+ if(!IS_BASIC(b)) {
+ *pErrorCode=U_INVALID_CHAR_FOUND;
+ return 0;
+ }
+
+ if(j<destCapacity) {
+ dest[j]=(UChar)b;
+
+ if(caseFlags!=NULL) {
+ caseFlags[j]=IS_BASIC_UPPERCASE(b);
+ }
+ }
+ }
+
+ /* Initialize the state: */
+ n=INITIAL_N;
+ i=0;
+ bias=INITIAL_BIAS;
+ firstSupplementaryIndex=1000000000;
+
+ /*
+ * Main decoding loop:
+ * Start just after the last delimiter if any
+ * basic code points were copied; start at the beginning otherwise.
+ */
+ for(in=basicLength>0 ? basicLength+1 : 0; in<srcLength; /* no op */) {
+ /*
+ * in is the index of the next character to be consumed, and
+ * destCPCount is the number of code points in the output array.
+ *
+ * Decode a generalized variable-length integer into delta,
+ * which gets added to i. The overflow checking is easier
+ * if we increase i as we go, then subtract off its starting
+ * value at the end to obtain delta.
+ */
+ for(oldi=i, w=1, k=BASE; /* no condition */; k+=BASE) {
+ if(in>=srcLength) {
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ return 0;
+ }
+
+ digit=decodeDigit(src[in++]);
+ if(digit<0) {
+ *pErrorCode=U_INVALID_CHAR_FOUND;
+ return 0;
+ }
+ if(digit>(0x7fffffff-i)/w) {
+ /* integer overflow */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ return 0;
+ }
+
+ i+=digit*w;
+ /** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt
+ t=k-bias;
+ if(t<TMIN) {
+ t=TMIN;
+ } else if(t>TMAX) {
+ t=TMAX;
+ }
+ */
+ t=k-bias;
+ if(t<TMIN) {
+ t=TMIN;
+ } else if(k>=(bias+TMAX)) {
+ t=TMAX;
+ }
+ if(digit<t) {
+ break;
+ }
+
+ if(w>0x7fffffff/(BASE-t)) {
+ /* integer overflow */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ return 0;
+ }
+ w*=BASE-t;
+ }
+
+ /*
+ * Modification from sample code:
+ * Increments destCPCount here,
+ * where needed instead of in for() loop tail.
+ */
+ ++destCPCount;
+ bias=adaptBias(i-oldi, destCPCount, (UBool)(oldi==0));
+
+ /*
+ * i was supposed to wrap around from (incremented) destCPCount to 0,
+ * incrementing n each time, so we'll fix that now:
+ */
+ if(i/destCPCount>(0x7fffffff-n)) {
+ /* integer overflow */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ return 0;
+ }
+
+ n+=i/destCPCount;
+ i%=destCPCount;
+ /* not needed for Punycode: */
+ /* if (decode_digit(n) <= BASE) return punycode_invalid_input; */
+
+ if(n>0x10ffff || U_IS_SURROGATE(n)) {
+ /* Unicode code point overflow */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ return 0;
+ }
+
+ /* Insert n at position i of the output: */
+ cpLength=U16_LENGTH(n);
+ if(dest!=NULL && ((destLength+cpLength)<=destCapacity)) {
+ int32_t codeUnitIndex;
+
+ /*
+ * Handle indexes when supplementary code points are present.
+ *
+ * In almost all cases, there will be only BMP code points before i
+ * and even in the entire string.
+ * This is handled with the same efficiency as with UTF-32.
+ *
+ * Only the rare cases with supplementary code points are handled
+ * more slowly - but not too bad since this is an insertion anyway.
+ */
+ if(i<=firstSupplementaryIndex) {
+ codeUnitIndex=i;
+ if(cpLength>1) {
+ firstSupplementaryIndex=codeUnitIndex;
+ } else {
+ ++firstSupplementaryIndex;
+ }
+ } else {
+ codeUnitIndex=firstSupplementaryIndex;
+ U16_FWD_N(dest, codeUnitIndex, destLength, i-codeUnitIndex);
+ }
+
+ /* use the UChar index codeUnitIndex instead of the code point index i */
+ if(codeUnitIndex<destLength) {
+ uprv_memmove(dest+codeUnitIndex+cpLength,
+ dest+codeUnitIndex,
+ (destLength-codeUnitIndex)*U_SIZEOF_UCHAR);
+ if(caseFlags!=NULL) {
+ uprv_memmove(caseFlags+codeUnitIndex+cpLength,
+ caseFlags+codeUnitIndex,
+ destLength-codeUnitIndex);
+ }
+ }
+ if(cpLength==1) {
+ /* BMP, insert one code unit */
+ dest[codeUnitIndex]=(UChar)n;
+ } else {
+ /* supplementary character, insert two code units */
+ dest[codeUnitIndex]=U16_LEAD(n);
+ dest[codeUnitIndex+1]=U16_TRAIL(n);
+ }
+ if(caseFlags!=NULL) {
+ /* Case of last character determines uppercase flag: */
+ caseFlags[codeUnitIndex]=IS_BASIC_UPPERCASE(src[in-1]);
+ if(cpLength==2) {
+ caseFlags[codeUnitIndex+1]=FALSE;
+ }
+ }
+ }
+ destLength+=cpLength;
+ U_ASSERT(destLength>=0);
+ ++i;
+ }
+
+ return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
+}
+
+/* ### check notes on overflow handling - only necessary if not IDNA? are these Punycode functions to be public? */
+
+#endif /* #if !UCONFIG_NO_IDNA */
diff --git a/thirdparty/icu4c/common/punycode.h b/thirdparty/icu4c/common/punycode.h
new file mode 100644
index 0000000000..9e28f770c4
--- /dev/null
+++ b/thirdparty/icu4c/common/punycode.h
@@ -0,0 +1,120 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2003, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: punycode.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002jan31
+* created by: Markus W. Scherer
+*/
+
+/* This ICU code derived from: */
+/*
+punycode.c 0.4.0 (2001-Nov-17-Sat)
+http://www.cs.berkeley.edu/~amc/idn/
+Adam M. Costello
+http://www.nicemice.net/amc/
+*/
+
+#ifndef __PUNYCODE_H__
+#define __PUNYCODE_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_IDNA
+
+/**
+ * u_strToPunycode() converts Unicode to Punycode.
+ *
+ * The input string must not contain single, unpaired surrogates.
+ * The output will be represented as an array of ASCII code points.
+ *
+ * The output string is NUL-terminated according to normal ICU
+ * string output rules.
+ *
+ * @param src Input Unicode string.
+ * This function handles a limited amount of code points
+ * (the limit is >=64).
+ * U_INDEX_OUTOFBOUNDS_ERROR is set if the limit is exceeded.
+ * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
+ * @param dest Output Punycode array.
+ * @param destCapacity Size of dest.
+ * @param caseFlags Vector of boolean values, one per input UChar,
+ * indicating that the corresponding character is to be
+ * marked for the decoder optionally
+ * uppercasing (true) or lowercasing (false)
+ * the character.
+ * ASCII characters are output directly in the case as marked.
+ * Flags corresponding to trail surrogates are ignored.
+ * If caseFlags==NULL then input characters are not
+ * case-mapped.
+ * @param pErrorCode ICU in/out error code parameter.
+ * U_INVALID_CHAR_FOUND if src contains
+ * unmatched single surrogates.
+ * U_INDEX_OUTOFBOUNDS_ERROR if src contains
+ * too many code points.
+ * @return Number of ASCII characters in puny.
+ *
+ * @see u_strFromPunycode
+ */
+U_CAPI int32_t
+u_strToPunycode(const UChar *src, int32_t srcLength,
+ UChar *dest, int32_t destCapacity,
+ const UBool *caseFlags,
+ UErrorCode *pErrorCode);
+
+/**
+ * u_strFromPunycode() converts Punycode to Unicode.
+ * The Unicode string will be at most as long (in UChars)
+ * than the Punycode string (in chars).
+ *
+ * @param src Input Punycode string.
+ * @param srcLength Length of puny, or -1 if NUL-terminated
+ * @param dest Output Unicode string buffer.
+ * @param destCapacity Size of dest in number of UChars,
+ * and of caseFlags in numbers of UBools.
+ * @param caseFlags Output array for case flags as
+ * defined by the Punycode string.
+ * The caller should uppercase (true) or lowercase (FASLE)
+ * the corresponding character in dest.
+ * For supplementary characters, only the lead surrogate
+ * is marked, and false is stored for the trail surrogate.
+ * This is redundant and not necessary for ASCII characters
+ * because they are already in the case indicated.
+ * Can be NULL if the case flags are not needed.
+ * @param pErrorCode ICU in/out error code parameter.
+ * U_INVALID_CHAR_FOUND if a non-ASCII character
+ * precedes the last delimiter ('-'),
+ * or if an invalid character (not a-zA-Z0-9) is found
+ * after the last delimiter.
+ * U_ILLEGAL_CHAR_FOUND if the delta sequence is ill-formed.
+ * @return Number of UChars written to dest.
+ *
+ * @see u_strToPunycode
+ */
+U_CAPI int32_t
+u_strFromPunycode(const UChar *src, int32_t srcLength,
+ UChar *dest, int32_t destCapacity,
+ UBool *caseFlags,
+ UErrorCode *pErrorCode);
+
+#endif /* #if !UCONFIG_NO_IDNA */
+
+#endif
+
+/*
+ * Hey, Emacs, please set the following:
+ *
+ * Local Variables:
+ * indent-tabs-mode: nil
+ * End:
+ *
+ */
diff --git a/thirdparty/icu4c/common/putil.cpp b/thirdparty/icu4c/common/putil.cpp
new file mode 100644
index 0000000000..3ed6a05d22
--- /dev/null
+++ b/thirdparty/icu4c/common/putil.cpp
@@ -0,0 +1,2482 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
+*
+* Date Name Description
+* 04/14/97 aliu Creation.
+* 04/24/97 aliu Added getDefaultDataDirectory() and
+* getDefaultLocaleID().
+* 04/28/97 aliu Rewritten to assume Unix and apply general methods
+* for assumed case. Non-UNIX platforms must be
+* special-cased. Rewrote numeric methods dealing
+* with NaN and Infinity to be platform independent
+* over all IEEE 754 platforms.
+* 05/13/97 aliu Restored sign of timezone
+* (semantics are hours West of GMT)
+* 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
+* nextDouble..
+* 07/22/98 stephen Added remainder, max, min, trunc
+* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
+* 08/24/98 stephen Added longBitsFromDouble
+* 09/08/98 stephen Minor changes for Mac Port
+* 03/02/99 stephen Removed openFile(). Added AS400 support.
+* Fixed EBCDIC tables
+* 04/15/99 stephen Converted to C.
+* 06/28/99 stephen Removed mutex locking in u_isBigEndian().
+* 08/04/99 jeffrey R. Added OS/2 changes
+* 11/15/99 helena Integrated S/390 IEEE support.
+* 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
+* 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
+* 01/03/08 Steven L. Fake Time Support
+******************************************************************************
+*/
+
+// Defines _XOPEN_SOURCE for access to POSIX functions.
+// Must be before any other #includes.
+#include "uposixdefs.h"
+
+// First, the platform type. Need this for U_PLATFORM.
+#include "unicode/platform.h"
+
+#if U_PLATFORM == U_PF_MINGW && defined __STRICT_ANSI__
+/* tzset isn't defined in strict ANSI on MinGW. */
+#undef __STRICT_ANSI__
+#endif
+
+/*
+ * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
+ */
+#include <time.h>
+
+#if !U_PLATFORM_USES_ONLY_WIN32_API
+#include <sys/time.h>
+#endif
+
+/* include the rest of the ICU headers */
+#include "unicode/putil.h"
+#include "unicode/ustring.h"
+#include "putilimp.h"
+#include "uassert.h"
+#include "umutex.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "locmap.h"
+#include "ucln_cmn.h"
+#include "charstr.h"
+
+/* Include standard headers. */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <locale.h>
+#include <float.h>
+
+#ifndef U_COMMON_IMPLEMENTATION
+#error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see https://unicode-org.github.io/icu/userguide/howtouseicu
+#endif
+
+
+/* include system headers */
+#if U_PLATFORM_USES_ONLY_WIN32_API
+ /*
+ * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
+ * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
+ * to use native APIs as much as possible?
+ */
+#ifndef WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN
+#endif
+# define VC_EXTRALEAN
+# define NOUSER
+# define NOSERVICE
+# define NOIME
+# define NOMCX
+# include <windows.h>
+# include "unicode/uloc.h"
+# include "wintz.h"
+#elif U_PLATFORM == U_PF_OS400
+# include <float.h>
+# include <qusec.h> /* error code structure */
+# include <qusrjobi.h>
+# include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
+# include <mih/testptr.h> /* For uprv_maximumPtr */
+#elif U_PLATFORM == U_PF_OS390
+# include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
+#elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS
+# include <limits.h>
+# include <unistd.h>
+# if U_PLATFORM == U_PF_SOLARIS
+# ifndef _XPG4_2
+# define _XPG4_2
+# endif
+# elif U_PLATFORM == U_PF_ANDROID
+# include <sys/system_properties.h>
+# include <dlfcn.h>
+# endif
+#elif U_PLATFORM == U_PF_QNX
+# include <sys/neutrino.h>
+#endif
+
+
+/*
+ * Only include langinfo.h if we have a way to get the codeset. If we later
+ * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
+ *
+ */
+
+#if U_HAVE_NL_LANGINFO_CODESET
+#include <langinfo.h>
+#endif
+
+/**
+ * Simple things (presence of functions, etc) should just go in configure.in and be added to
+ * icucfg.h via autoheader.
+ */
+#if U_PLATFORM_IMPLEMENTS_POSIX
+# if U_PLATFORM == U_PF_OS400
+# define HAVE_DLFCN_H 0
+# define HAVE_DLOPEN 0
+# else
+# ifndef HAVE_DLFCN_H
+# define HAVE_DLFCN_H 1
+# endif
+# ifndef HAVE_DLOPEN
+# define HAVE_DLOPEN 1
+# endif
+# endif
+# ifndef HAVE_GETTIMEOFDAY
+# define HAVE_GETTIMEOFDAY 1
+# endif
+#else
+# define HAVE_DLFCN_H 0
+# define HAVE_DLOPEN 0
+# define HAVE_GETTIMEOFDAY 0
+#endif
+
+U_NAMESPACE_USE
+
+/* Define the extension for data files, again... */
+#define DATA_TYPE "dat"
+
+/* Leave this copyright notice here! */
+static const char copyright[] = U_COPYRIGHT_STRING;
+
+/* floating point implementations ------------------------------------------- */
+
+/* We return QNAN rather than SNAN*/
+#define SIGN 0x80000000U
+
+/* Make it easy to define certain types of constants */
+typedef union {
+ int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
+ double d64;
+} BitPatternConversion;
+static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
+static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
+
+/*---------------------------------------------------------------------------
+ Platform utilities
+ Our general strategy is to assume we're on a POSIX platform. Platforms which
+ are non-POSIX must declare themselves so. The default POSIX implementation
+ will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
+ functions).
+ ---------------------------------------------------------------------------*/
+
+#if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400
+# undef U_POSIX_LOCALE
+#else
+# define U_POSIX_LOCALE 1
+#endif
+
+/*
+ WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
+ can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
+*/
+#if !IEEE_754
+static char*
+u_topNBytesOfDouble(double* d, int n)
+{
+#if U_IS_BIG_ENDIAN
+ return (char*)d;
+#else
+ return (char*)(d + 1) - n;
+#endif
+}
+
+static char*
+u_bottomNBytesOfDouble(double* d, int n)
+{
+#if U_IS_BIG_ENDIAN
+ return (char*)(d + 1) - n;
+#else
+ return (char*)d;
+#endif
+}
+#endif /* !IEEE_754 */
+
+#if IEEE_754
+static UBool
+u_signBit(double d) {
+ uint8_t hiByte;
+#if U_IS_BIG_ENDIAN
+ hiByte = *(uint8_t *)&d;
+#else
+ hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
+#endif
+ return (hiByte & 0x80) != 0;
+}
+#endif
+
+
+
+#if defined (U_DEBUG_FAKETIME)
+/* Override the clock to test things without having to move the system clock.
+ * Assumes POSIX gettimeofday() will function
+ */
+UDate fakeClock_t0 = 0; /** Time to start the clock from **/
+UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
+UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
+
+static UDate getUTCtime_real() {
+ struct timeval posixTime;
+ gettimeofday(&posixTime, NULL);
+ return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
+}
+
+static UDate getUTCtime_fake() {
+ static UMutex fakeClockMutex;
+ umtx_lock(&fakeClockMutex);
+ if(!fakeClock_set) {
+ UDate real = getUTCtime_real();
+ const char *fake_start = getenv("U_FAKETIME_START");
+ if((fake_start!=NULL) && (fake_start[0]!=0)) {
+ sscanf(fake_start,"%lf",&fakeClock_t0);
+ fakeClock_dt = fakeClock_t0 - real;
+ fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
+ "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
+ fakeClock_t0, fake_start, fakeClock_dt, real);
+ } else {
+ fakeClock_dt = 0;
+ fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
+ "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
+ }
+ fakeClock_set = TRUE;
+ }
+ umtx_unlock(&fakeClockMutex);
+
+ return getUTCtime_real() + fakeClock_dt;
+}
+#endif
+
+#if U_PLATFORM_USES_ONLY_WIN32_API
+typedef union {
+ int64_t int64;
+ FILETIME fileTime;
+} FileTimeConversion; /* This is like a ULARGE_INTEGER */
+
+/* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
+#define EPOCH_BIAS INT64_C(116444736000000000)
+#define HECTONANOSECOND_PER_MILLISECOND 10000
+
+#endif
+
+/*---------------------------------------------------------------------------
+ Universal Implementations
+ These are designed to work on all platforms. Try these, and if they
+ don't work on your platform, then special case your platform with new
+ implementations.
+---------------------------------------------------------------------------*/
+
+U_CAPI UDate U_EXPORT2
+uprv_getUTCtime()
+{
+#if defined(U_DEBUG_FAKETIME)
+ return getUTCtime_fake(); /* Hook for overriding the clock */
+#else
+ return uprv_getRawUTCtime();
+#endif
+}
+
+/* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
+U_CAPI UDate U_EXPORT2
+uprv_getRawUTCtime()
+{
+#if U_PLATFORM_USES_ONLY_WIN32_API
+
+ FileTimeConversion winTime;
+ GetSystemTimeAsFileTime(&winTime.fileTime);
+ return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
+#else
+
+#if HAVE_GETTIMEOFDAY
+ struct timeval posixTime;
+ gettimeofday(&posixTime, NULL);
+ return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
+#else
+ time_t epochtime;
+ time(&epochtime);
+ return (UDate)epochtime * U_MILLIS_PER_SECOND;
+#endif
+
+#endif
+}
+
+/*-----------------------------------------------------------------------------
+ IEEE 754
+ These methods detect and return NaN and infinity values for doubles
+ conforming to IEEE 754. Platforms which support this standard include X86,
+ Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
+ If this doesn't work on your platform, you have non-IEEE floating-point, and
+ will need to code your own versions. A naive implementation is to return 0.0
+ for getNaN and getInfinity, and false for isNaN and isInfinite.
+ ---------------------------------------------------------------------------*/
+
+U_CAPI UBool U_EXPORT2
+uprv_isNaN(double number)
+{
+#if IEEE_754
+ BitPatternConversion convertedNumber;
+ convertedNumber.d64 = number;
+ /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
+ return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
+
+#elif U_PLATFORM == U_PF_OS390
+ uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
+ sizeof(uint32_t));
+ uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
+ sizeof(uint32_t));
+
+ return ((highBits & 0x7F080000L) == 0x7F080000L) &&
+ (lowBits == 0x00000000L);
+
+#else
+ /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
+ /* you'll need to replace this default implementation with what's correct*/
+ /* for your platform.*/
+ return number != number;
+#endif
+}
+
+U_CAPI UBool U_EXPORT2
+uprv_isInfinite(double number)
+{
+#if IEEE_754
+ BitPatternConversion convertedNumber;
+ convertedNumber.d64 = number;
+ /* Infinity is exactly 0x7FF0000000000000U. */
+ return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
+#elif U_PLATFORM == U_PF_OS390
+ uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
+ sizeof(uint32_t));
+ uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
+ sizeof(uint32_t));
+
+ return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
+
+#else
+ /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
+ /* value, you'll need to replace this default implementation with what's*/
+ /* correct for your platform.*/
+ return number == (2.0 * number);
+#endif
+}
+
+U_CAPI UBool U_EXPORT2
+uprv_isPositiveInfinity(double number)
+{
+#if IEEE_754 || U_PLATFORM == U_PF_OS390
+ return (UBool)(number > 0 && uprv_isInfinite(number));
+#else
+ return uprv_isInfinite(number);
+#endif
+}
+
+U_CAPI UBool U_EXPORT2
+uprv_isNegativeInfinity(double number)
+{
+#if IEEE_754 || U_PLATFORM == U_PF_OS390
+ return (UBool)(number < 0 && uprv_isInfinite(number));
+
+#else
+ uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
+ sizeof(uint32_t));
+ return((highBits & SIGN) && uprv_isInfinite(number));
+
+#endif
+}
+
+U_CAPI double U_EXPORT2
+uprv_getNaN()
+{
+#if IEEE_754 || U_PLATFORM == U_PF_OS390
+ return gNan.d64;
+#else
+ /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
+ /* you'll need to replace this default implementation with what's correct*/
+ /* for your platform.*/
+ return 0.0;
+#endif
+}
+
+U_CAPI double U_EXPORT2
+uprv_getInfinity()
+{
+#if IEEE_754 || U_PLATFORM == U_PF_OS390
+ return gInf.d64;
+#else
+ /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
+ /* value, you'll need to replace this default implementation with what's*/
+ /* correct for your platform.*/
+ return 0.0;
+#endif
+}
+
+U_CAPI double U_EXPORT2
+uprv_floor(double x)
+{
+ return floor(x);
+}
+
+U_CAPI double U_EXPORT2
+uprv_ceil(double x)
+{
+ return ceil(x);
+}
+
+U_CAPI double U_EXPORT2
+uprv_round(double x)
+{
+ return uprv_floor(x + 0.5);
+}
+
+U_CAPI double U_EXPORT2
+uprv_fabs(double x)
+{
+ return fabs(x);
+}
+
+U_CAPI double U_EXPORT2
+uprv_modf(double x, double* y)
+{
+ return modf(x, y);
+}
+
+U_CAPI double U_EXPORT2
+uprv_fmod(double x, double y)
+{
+ return fmod(x, y);
+}
+
+U_CAPI double U_EXPORT2
+uprv_pow(double x, double y)
+{
+ /* This is declared as "double pow(double x, double y)" */
+ return pow(x, y);
+}
+
+U_CAPI double U_EXPORT2
+uprv_pow10(int32_t x)
+{
+ return pow(10.0, (double)x);
+}
+
+U_CAPI double U_EXPORT2
+uprv_fmax(double x, double y)
+{
+#if IEEE_754
+ /* first handle NaN*/
+ if(uprv_isNaN(x) || uprv_isNaN(y))
+ return uprv_getNaN();
+
+ /* check for -0 and 0*/
+ if(x == 0.0 && y == 0.0 && u_signBit(x))
+ return y;
+
+#endif
+
+ /* this should work for all flt point w/o NaN and Inf special cases */
+ return (x > y ? x : y);
+}
+
+U_CAPI double U_EXPORT2
+uprv_fmin(double x, double y)
+{
+#if IEEE_754
+ /* first handle NaN*/
+ if(uprv_isNaN(x) || uprv_isNaN(y))
+ return uprv_getNaN();
+
+ /* check for -0 and 0*/
+ if(x == 0.0 && y == 0.0 && u_signBit(y))
+ return y;
+
+#endif
+
+ /* this should work for all flt point w/o NaN and Inf special cases */
+ return (x > y ? y : x);
+}
+
+U_CAPI UBool U_EXPORT2
+uprv_add32_overflow(int32_t a, int32_t b, int32_t* res) {
+ // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_add_overflow.
+ // This function could be optimized by calling one of those primitives.
+ auto a64 = static_cast<int64_t>(a);
+ auto b64 = static_cast<int64_t>(b);
+ int64_t res64 = a64 + b64;
+ *res = static_cast<int32_t>(res64);
+ return res64 != *res;
+}
+
+U_CAPI UBool U_EXPORT2
+uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res) {
+ // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_mul_overflow.
+ // This function could be optimized by calling one of those primitives.
+ auto a64 = static_cast<int64_t>(a);
+ auto b64 = static_cast<int64_t>(b);
+ int64_t res64 = a64 * b64;
+ *res = static_cast<int32_t>(res64);
+ return res64 != *res;
+}
+
+/**
+ * Truncates the given double.
+ * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
+ * This is different than calling floor() or ceil():
+ * floor(3.3) = 3, floor(-3.3) = -4
+ * ceil(3.3) = 4, ceil(-3.3) = -3
+ */
+U_CAPI double U_EXPORT2
+uprv_trunc(double d)
+{
+#if IEEE_754
+ /* handle error cases*/
+ if(uprv_isNaN(d))
+ return uprv_getNaN();
+ if(uprv_isInfinite(d))
+ return uprv_getInfinity();
+
+ if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */
+ return ceil(d);
+ else
+ return floor(d);
+
+#else
+ return d >= 0 ? floor(d) : ceil(d);
+
+#endif
+}
+
+/**
+ * Return the largest positive number that can be represented by an integer
+ * type of arbitrary bit length.
+ */
+U_CAPI double U_EXPORT2
+uprv_maxMantissa(void)
+{
+ return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
+}
+
+U_CAPI double U_EXPORT2
+uprv_log(double d)
+{
+ return log(d);
+}
+
+U_CAPI void * U_EXPORT2
+uprv_maximumPtr(void * base)
+{
+#if U_PLATFORM == U_PF_OS400
+ /*
+ * With the provided function we should never be out of range of a given segment
+ * (a traditional/typical segment that is). Our segments have 5 bytes for the
+ * id and 3 bytes for the offset. The key is that the casting takes care of
+ * only retrieving the offset portion minus x1000. Hence, the smallest offset
+ * seen in a program is x001000 and when casted to an int would be 0.
+ * That's why we can only add 0xffefff. Otherwise, we would exceed the segment.
+ *
+ * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
+ * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information).
+ * This function determines the activation based on the pointer that is passed in and
+ * calculates the appropriate maximum available size for
+ * each pointer type (TERASPACE and non-TERASPACE)
+ *
+ * Unlike other operating systems, the pointer model isn't determined at
+ * compile time on i5/OS.
+ */
+ if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
+ /* if it is a TERASPACE pointer the max is 2GB - 4k */
+ return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
+ }
+ /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
+ return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
+
+#else
+ return U_MAX_PTR(base);
+#endif
+}
+
+/*---------------------------------------------------------------------------
+ Platform-specific Implementations
+ Try these, and if they don't work on your platform, then special case your
+ platform with new implementations.
+ ---------------------------------------------------------------------------*/
+
+/* Generic time zone layer -------------------------------------------------- */
+
+/* Time zone utilities */
+U_CAPI void U_EXPORT2
+uprv_tzset()
+{
+#if defined(U_TZSET)
+ U_TZSET();
+#else
+ /* no initialization*/
+#endif
+}
+
+U_CAPI int32_t U_EXPORT2
+uprv_timezone()
+{
+#ifdef U_TIMEZONE
+ return U_TIMEZONE;
+#else
+ time_t t, t1, t2;
+ struct tm tmrec;
+ int32_t tdiff = 0;
+
+ time(&t);
+ uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
+#if U_PLATFORM != U_PF_IPHONE
+ UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
+#endif
+ t1 = mktime(&tmrec); /* local time in seconds*/
+ uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
+ t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/
+ tdiff = t2 - t1;
+
+#if U_PLATFORM != U_PF_IPHONE
+ /* imitate NT behaviour, which returns same timezone offset to GMT for
+ winter and summer.
+ This does not work on all platforms. For instance, on glibc on Linux
+ and on Mac OS 10.5, tdiff calculated above remains the same
+ regardless of whether DST is in effect or not. iOS is another
+ platform where this does not work. Linux + glibc and Mac OS 10.5
+ have U_TIMEZONE defined so that this code is not reached.
+ */
+ if (dst_checked)
+ tdiff += 3600;
+#endif
+ return tdiff;
+#endif
+}
+
+/* Note that U_TZNAME does *not* have to be tzname, but if it is,
+ some platforms need to have it declared here. */
+
+#if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED)
+/* RS6000 and others reject char **tzname. */
+extern U_IMPORT char *U_TZNAME[];
+#endif
+
+#if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
+/* These platforms are likely to use Olson timezone IDs. */
+/* common targets of the symbolic link at TZDEFAULT are:
+ * "/usr/share/zoneinfo/<olsonID>" default, older Linux distros, macOS to 10.12
+ * "../usr/share/zoneinfo/<olsonID>" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu 16, SuSe Linux 12
+ * "/usr/share/lib/zoneinfo/<olsonID>" Solaris
+ * "../usr/share/lib/zoneinfo/<olsonID>" Solaris
+ * "/var/db/timezone/zoneinfo/<olsonID>" macOS 10.13
+ * To avoid checking lots of paths, just check that the target path
+ * before the <olsonID> ends with "/zoneinfo/", and the <olsonID> is valid.
+ */
+
+#define CHECK_LOCALTIME_LINK 1
+#if U_PLATFORM_IS_DARWIN_BASED
+#include <tzfile.h>
+#define TZZONEINFO (TZDIR "/")
+#elif U_PLATFORM == U_PF_SOLARIS
+#define TZDEFAULT "/etc/localtime"
+#define TZZONEINFO "/usr/share/lib/zoneinfo/"
+#define TZ_ENV_CHECK "localtime"
+#else
+#define TZDEFAULT "/etc/localtime"
+#define TZZONEINFO "/usr/share/zoneinfo/"
+#endif
+#define TZZONEINFOTAIL "/zoneinfo/"
+#if U_HAVE_DIRENT_H
+#define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */
+/* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
+ symlinked to /etc/localtime, which makes searchForTZFile return
+ 'localtime' when it's the first match. */
+#define TZFILE_SKIP2 "localtime"
+#define SEARCH_TZFILE
+#include <dirent.h> /* Needed to search through system timezone files */
+#endif
+static char gTimeZoneBuffer[PATH_MAX];
+static char *gTimeZoneBufferPtr = NULL;
+#endif
+
+#if !U_PLATFORM_USES_ONLY_WIN32_API
+#define isNonDigit(ch) (ch < '0' || '9' < ch)
+static UBool isValidOlsonID(const char *id) {
+ int32_t idx = 0;
+
+ /* Determine if this is something like Iceland (Olson ID)
+ or AST4ADT (non-Olson ID) */
+ while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
+ idx++;
+ }
+
+ /* If we went through the whole string, then it might be okay.
+ The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
+ "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
+ The rest of the time it could be an Olson ID. George */
+ return (UBool)(id[idx] == 0
+ || uprv_strcmp(id, "PST8PDT") == 0
+ || uprv_strcmp(id, "MST7MDT") == 0
+ || uprv_strcmp(id, "CST6CDT") == 0
+ || uprv_strcmp(id, "EST5EDT") == 0);
+}
+
+/* On some Unix-like OS, 'posix' subdirectory in
+ /usr/share/zoneinfo replicates the top-level contents. 'right'
+ subdirectory has the same set of files, but individual files
+ are different from those in the top-level directory or 'posix'
+ because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
+ has files for UTC.
+ When the first match for /etc/localtime is in either of them
+ (usually in posix because 'right' has different file contents),
+ or TZ environment variable points to one of them, createTimeZone
+ fails because, say, 'posix/America/New_York' is not an Olson
+ timezone id ('America/New_York' is). So, we have to skip
+ 'posix/' and 'right/' at the beginning. */
+static void skipZoneIDPrefix(const char** id) {
+ if (uprv_strncmp(*id, "posix/", 6) == 0
+ || uprv_strncmp(*id, "right/", 6) == 0)
+ {
+ *id += 6;
+ }
+}
+#endif
+
+#if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
+
+#define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
+typedef struct OffsetZoneMapping {
+ int32_t offsetSeconds;
+ int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
+ const char *stdID;
+ const char *dstID;
+ const char *olsonID;
+} OffsetZoneMapping;
+
+enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 };
+
+/*
+This list tries to disambiguate a set of abbreviated timezone IDs and offsets
+and maps it to an Olson ID.
+Before adding anything to this list, take a look at
+icu/source/tools/tzcode/tz.alias
+Sometimes no daylight savings (0) is important to define due to aliases.
+This list can be tested with icu/source/test/compat/tzone.pl
+More values could be added to daylightType to increase precision.
+*/
+static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
+ {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
+ {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
+ {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
+ {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
+ {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
+ {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
+ {-36000, 2, "EST", "EST", "Australia/Sydney"},
+ {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
+ {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
+ {-34200, 2, "CST", "CST", "Australia/South"},
+ {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
+ {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
+ {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
+ {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
+ {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
+ {-28800, 2, "WST", "WST", "Australia/West"},
+ {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
+ {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
+ {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
+ {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
+ {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
+ {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
+ {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
+ {-14400, 1, "AZT", "AZST", "Asia/Baku"},
+ {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
+ {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
+ {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
+ {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
+ {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
+ {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
+ {-3600, 0, "CET", "WEST", "Africa/Algiers"},
+ {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
+ {0, 1, "GMT", "IST", "Europe/Dublin"},
+ {0, 1, "GMT", "BST", "Europe/London"},
+ {0, 0, "WET", "WEST", "Africa/Casablanca"},
+ {0, 0, "WET", "WET", "Africa/El_Aaiun"},
+ {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
+ {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
+ {10800, 1, "PMST", "PMDT", "America/Miquelon"},
+ {10800, 2, "UYT", "UYST", "America/Montevideo"},
+ {10800, 1, "WGT", "WGST", "America/Godthab"},
+ {10800, 2, "BRT", "BRST", "Brazil/East"},
+ {12600, 1, "NST", "NDT", "America/St_Johns"},
+ {14400, 1, "AST", "ADT", "Canada/Atlantic"},
+ {14400, 2, "AMT", "AMST", "America/Cuiaba"},
+ {14400, 2, "CLT", "CLST", "Chile/Continental"},
+ {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
+ {14400, 2, "PYT", "PYST", "America/Asuncion"},
+ {18000, 1, "CST", "CDT", "America/Havana"},
+ {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
+ {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
+ {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
+ {21600, 0, "CST", "CDT", "America/Guatemala"},
+ {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
+ {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
+ {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
+ {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
+ {32400, 1, "AKST", "AKDT", "US/Alaska"},
+ {36000, 1, "HAST", "HADT", "US/Aleutian"}
+};
+
+/*#define DEBUG_TZNAME*/
+
+static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
+{
+ int32_t idx;
+#ifdef DEBUG_TZNAME
+ fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
+#endif
+ for (idx = 0; idx < UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++)
+ {
+ if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
+ && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
+ && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
+ && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
+ {
+ return OFFSET_ZONE_MAPPINGS[idx].olsonID;
+ }
+ }
+ return NULL;
+}
+#endif
+
+#ifdef SEARCH_TZFILE
+#define MAX_READ_SIZE 512
+
+typedef struct DefaultTZInfo {
+ char* defaultTZBuffer;
+ int64_t defaultTZFileSize;
+ FILE* defaultTZFilePtr;
+ UBool defaultTZstatus;
+ int32_t defaultTZPosition;
+} DefaultTZInfo;
+
+/*
+ * This method compares the two files given to see if they are a match.
+ * It is currently use to compare two TZ files.
+ */
+static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
+ FILE* file;
+ int64_t sizeFile;
+ int64_t sizeFileLeft;
+ int32_t sizeFileRead;
+ int32_t sizeFileToRead;
+ char bufferFile[MAX_READ_SIZE];
+ UBool result = TRUE;
+
+ if (tzInfo->defaultTZFilePtr == NULL) {
+ tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
+ }
+ file = fopen(TZFileName, "r");
+
+ tzInfo->defaultTZPosition = 0; /* reset position to begin search */
+
+ if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
+ /* First check that the file size are equal. */
+ if (tzInfo->defaultTZFileSize == 0) {
+ fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
+ tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
+ }
+ fseek(file, 0, SEEK_END);
+ sizeFile = ftell(file);
+ sizeFileLeft = sizeFile;
+
+ if (sizeFile != tzInfo->defaultTZFileSize) {
+ result = FALSE;
+ } else {
+ /* Store the data from the files in seperate buffers and
+ * compare each byte to determine equality.
+ */
+ if (tzInfo->defaultTZBuffer == NULL) {
+ rewind(tzInfo->defaultTZFilePtr);
+ tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
+ sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
+ }
+ rewind(file);
+ while(sizeFileLeft > 0) {
+ uprv_memset(bufferFile, 0, MAX_READ_SIZE);
+ sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
+
+ sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
+ if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
+ result = FALSE;
+ break;
+ }
+ sizeFileLeft -= sizeFileRead;
+ tzInfo->defaultTZPosition += sizeFileRead;
+ }
+ }
+ } else {
+ result = FALSE;
+ }
+
+ if (file != NULL) {
+ fclose(file);
+ }
+
+ return result;
+}
+
+
+/* dirent also lists two entries: "." and ".." that we can safely ignore. */
+#define SKIP1 "."
+#define SKIP2 ".."
+static UBool U_CALLCONV putil_cleanup(void);
+static CharString *gSearchTZFileResult = NULL;
+
+/*
+ * This method recursively traverses the directory given for a matching TZ file and returns the first match.
+ * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results.
+ */
+static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
+ DIR* dirp = NULL;
+ struct dirent* dirEntry = NULL;
+ char* result = NULL;
+ UErrorCode status = U_ZERO_ERROR;
+
+ /* Save the current path */
+ CharString curpath(path, -1, status);
+ if (U_FAILURE(status)) {
+ goto cleanupAndReturn;
+ }
+
+ dirp = opendir(path);
+ if (dirp == NULL) {
+ goto cleanupAndReturn;
+ }
+
+ if (gSearchTZFileResult == NULL) {
+ gSearchTZFileResult = new CharString;
+ if (gSearchTZFileResult == NULL) {
+ goto cleanupAndReturn;
+ }
+ ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
+ }
+
+ /* Check each entry in the directory. */
+ while((dirEntry = readdir(dirp)) != NULL) {
+ const char* dirName = dirEntry->d_name;
+ if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0
+ && uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
+ /* Create a newpath with the new entry to test each entry in the directory. */
+ CharString newpath(curpath, status);
+ newpath.append(dirName, -1, status);
+ if (U_FAILURE(status)) {
+ break;
+ }
+
+ DIR* subDirp = NULL;
+ if ((subDirp = opendir(newpath.data())) != NULL) {
+ /* If this new path is a directory, make a recursive call with the newpath. */
+ closedir(subDirp);
+ newpath.append('/', status);
+ if (U_FAILURE(status)) {
+ break;
+ }
+ result = searchForTZFile(newpath.data(), tzInfo);
+ /*
+ Have to get out here. Otherwise, we'd keep looking
+ and return the first match in the top-level directory
+ if there's a match in the top-level. If not, this function
+ would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
+ It worked without this in most cases because we have a fallback of calling
+ localtime_r to figure out the default timezone.
+ */
+ if (result != NULL)
+ break;
+ } else {
+ if(compareBinaryFiles(TZDEFAULT, newpath.data(), tzInfo)) {
+ int32_t amountToSkip = sizeof(TZZONEINFO) - 1;
+ if (amountToSkip > newpath.length()) {
+ amountToSkip = newpath.length();
+ }
+ const char* zoneid = newpath.data() + amountToSkip;
+ skipZoneIDPrefix(&zoneid);
+ gSearchTZFileResult->clear();
+ gSearchTZFileResult->append(zoneid, -1, status);
+ if (U_FAILURE(status)) {
+ break;
+ }
+ result = gSearchTZFileResult->data();
+ /* Get out after the first one found. */
+ break;
+ }
+ }
+ }
+ }
+
+ cleanupAndReturn:
+ if (dirp) {
+ closedir(dirp);
+ }
+ return result;
+}
+#endif
+
+#if U_PLATFORM == U_PF_ANDROID
+typedef int(system_property_read_callback)(const prop_info* info,
+ void (*callback)(void* cookie,
+ const char* name,
+ const char* value,
+ uint32_t serial),
+ void* cookie);
+typedef int(system_property_get)(const char*, char*);
+
+static char gAndroidTimeZone[PROP_VALUE_MAX] = { '\0' };
+
+static void u_property_read(void* cookie, const char* name, const char* value,
+ uint32_t serial) {
+ uprv_strcpy((char* )cookie, value);
+}
+#endif
+
+U_CAPI void U_EXPORT2
+uprv_tzname_clear_cache(void)
+{
+#if U_PLATFORM == U_PF_ANDROID
+ /* Android's timezone is stored in system property. */
+ gAndroidTimeZone[0] = '\0';
+ void* libc = dlopen("libc.so", RTLD_NOLOAD);
+ if (libc) {
+ /* Android API 26+ has new API to get system property and old API
+ * (__system_property_get) is deprecated */
+ system_property_read_callback* property_read_callback =
+ (system_property_read_callback*)dlsym(
+ libc, "__system_property_read_callback");
+ if (property_read_callback) {
+ const prop_info* info =
+ __system_property_find("persist.sys.timezone");
+ if (info) {
+ property_read_callback(info, &u_property_read, gAndroidTimeZone);
+ }
+ } else {
+ system_property_get* property_get =
+ (system_property_get*)dlsym(libc, "__system_property_get");
+ if (property_get) {
+ property_get("persist.sys.timezone", gAndroidTimeZone);
+ }
+ }
+ dlclose(libc);
+ }
+#endif
+
+#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
+ gTimeZoneBufferPtr = NULL;
+#endif
+}
+
+U_CAPI const char* U_EXPORT2
+uprv_tzname(int n)
+{
+ (void)n; // Avoid unreferenced parameter warning.
+ const char *tzid = NULL;
+#if U_PLATFORM_USES_ONLY_WIN32_API
+ tzid = uprv_detectWindowsTimeZone();
+
+ if (tzid != NULL) {
+ return tzid;
+ }
+
+#ifndef U_TZNAME
+ // The return value is free'd in timezone.cpp on Windows because
+ // the other code path returns a pointer to a heap location.
+ // If we don't have a name already, then tzname wouldn't be any
+ // better, so just fall back.
+ return uprv_strdup("");
+#endif // !U_TZNAME
+
+#else
+
+/*#if U_PLATFORM_IS_DARWIN_BASED
+ int ret;
+
+ tzid = getenv("TZFILE");
+ if (tzid != NULL) {
+ return tzid;
+ }
+#endif*/
+
+/* This code can be temporarily disabled to test tzname resolution later on. */
+#ifndef DEBUG_TZNAME
+#if U_PLATFORM == U_PF_ANDROID
+ tzid = gAndroidTimeZone;
+#else
+ tzid = getenv("TZ");
+#endif
+ if (tzid != NULL && isValidOlsonID(tzid)
+#if U_PLATFORM == U_PF_SOLARIS
+ /* When TZ equals localtime on Solaris, check the /etc/localtime file. */
+ && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0
+#endif
+ ) {
+ /* The colon forces tzset() to treat the remainder as zoneinfo path */
+ if (tzid[0] == ':') {
+ tzid++;
+ }
+ /* This might be a good Olson ID. */
+ skipZoneIDPrefix(&tzid);
+ return tzid;
+ }
+ /* else U_TZNAME will give a better result. */
+#endif
+
+#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
+ /* Caller must handle threading issues */
+ if (gTimeZoneBufferPtr == NULL) {
+ /*
+ This is a trick to look at the name of the link to get the Olson ID
+ because the tzfile contents is underspecified.
+ This isn't guaranteed to work because it may not be a symlink.
+ */
+ int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1);
+ if (0 < ret) {
+ int32_t tzZoneInfoTailLen = uprv_strlen(TZZONEINFOTAIL);
+ gTimeZoneBuffer[ret] = 0;
+ char * tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL);
+
+ if (tzZoneInfoTailPtr != NULL
+ && isValidOlsonID(tzZoneInfoTailPtr + tzZoneInfoTailLen))
+ {
+ return (gTimeZoneBufferPtr = tzZoneInfoTailPtr + tzZoneInfoTailLen);
+ }
+ } else {
+#if defined(SEARCH_TZFILE)
+ DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
+ if (tzInfo != NULL) {
+ tzInfo->defaultTZBuffer = NULL;
+ tzInfo->defaultTZFileSize = 0;
+ tzInfo->defaultTZFilePtr = NULL;
+ tzInfo->defaultTZstatus = FALSE;
+ tzInfo->defaultTZPosition = 0;
+
+ gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
+
+ /* Free previously allocated memory */
+ if (tzInfo->defaultTZBuffer != NULL) {
+ uprv_free(tzInfo->defaultTZBuffer);
+ }
+ if (tzInfo->defaultTZFilePtr != NULL) {
+ fclose(tzInfo->defaultTZFilePtr);
+ }
+ uprv_free(tzInfo);
+ }
+
+ if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
+ return gTimeZoneBufferPtr;
+ }
+#endif
+ }
+ }
+ else {
+ return gTimeZoneBufferPtr;
+ }
+#endif
+#endif
+
+#ifdef U_TZNAME
+#if U_PLATFORM_USES_ONLY_WIN32_API
+ /* The return value is free'd in timezone.cpp on Windows because
+ * the other code path returns a pointer to a heap location. */
+ return uprv_strdup(U_TZNAME[n]);
+#else
+ /*
+ U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
+ So we remap the abbreviation to an olson ID.
+
+ Since Windows exposes a little more timezone information,
+ we normally don't use this code on Windows because
+ uprv_detectWindowsTimeZone should have already given the correct answer.
+ */
+ {
+ struct tm juneSol, decemberSol;
+ int daylightType;
+ static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
+ static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
+
+ /* This probing will tell us when daylight savings occurs. */
+ localtime_r(&juneSolstice, &juneSol);
+ localtime_r(&decemberSolstice, &decemberSol);
+ if(decemberSol.tm_isdst > 0) {
+ daylightType = U_DAYLIGHT_DECEMBER;
+ } else if(juneSol.tm_isdst > 0) {
+ daylightType = U_DAYLIGHT_JUNE;
+ } else {
+ daylightType = U_DAYLIGHT_NONE;
+ }
+ tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
+ if (tzid != NULL) {
+ return tzid;
+ }
+ }
+ return U_TZNAME[n];
+#endif
+#else
+ return "";
+#endif
+}
+
+/* Get and set the ICU data directory --------------------------------------- */
+
+static icu::UInitOnce gDataDirInitOnce = U_INITONCE_INITIALIZER;
+static char *gDataDirectory = NULL;
+
+UInitOnce gTimeZoneFilesInitOnce = U_INITONCE_INITIALIZER;
+static CharString *gTimeZoneFilesDirectory = NULL;
+
+#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
+ static const char *gCorrectedPOSIXLocale = NULL; /* Sometimes heap allocated */
+ static bool gCorrectedPOSIXLocaleHeapAllocated = false;
+#endif
+
+static UBool U_CALLCONV putil_cleanup(void)
+{
+ if (gDataDirectory && *gDataDirectory) {
+ uprv_free(gDataDirectory);
+ }
+ gDataDirectory = NULL;
+ gDataDirInitOnce.reset();
+
+ delete gTimeZoneFilesDirectory;
+ gTimeZoneFilesDirectory = NULL;
+ gTimeZoneFilesInitOnce.reset();
+
+#ifdef SEARCH_TZFILE
+ delete gSearchTZFileResult;
+ gSearchTZFileResult = NULL;
+#endif
+
+#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
+ if (gCorrectedPOSIXLocale && gCorrectedPOSIXLocaleHeapAllocated) {
+ uprv_free(const_cast<char *>(gCorrectedPOSIXLocale));
+ gCorrectedPOSIXLocale = NULL;
+ gCorrectedPOSIXLocaleHeapAllocated = false;
+ }
+#endif
+ return TRUE;
+}
+
+/*
+ * Set the data directory.
+ * Make a copy of the passed string, and set the global data dir to point to it.
+ */
+U_CAPI void U_EXPORT2
+u_setDataDirectory(const char *directory) {
+ char *newDataDir;
+ int32_t length;
+
+ if(directory==NULL || *directory==0) {
+ /* A small optimization to prevent the malloc and copy when the
+ shared library is used, and this is a way to make sure that NULL
+ is never returned.
+ */
+ newDataDir = (char *)"";
+ }
+ else {
+ length=(int32_t)uprv_strlen(directory);
+ newDataDir = (char *)uprv_malloc(length + 2);
+ /* Exit out if newDataDir could not be created. */
+ if (newDataDir == NULL) {
+ return;
+ }
+ uprv_strcpy(newDataDir, directory);
+
+#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
+ {
+ char *p;
+ while((p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) != NULL) {
+ *p = U_FILE_SEP_CHAR;
+ }
+ }
+#endif
+ }
+
+ if (gDataDirectory && *gDataDirectory) {
+ uprv_free(gDataDirectory);
+ }
+ gDataDirectory = newDataDir;
+ ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
+}
+
+U_CAPI UBool U_EXPORT2
+uprv_pathIsAbsolute(const char *path)
+{
+ if(!path || !*path) {
+ return FALSE;
+ }
+
+ if(*path == U_FILE_SEP_CHAR) {
+ return TRUE;
+ }
+
+#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
+ if(*path == U_FILE_ALT_SEP_CHAR) {
+ return TRUE;
+ }
+#endif
+
+#if U_PLATFORM_USES_ONLY_WIN32_API
+ if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
+ ((path[0] >= 'a') && (path[0] <= 'z'))) &&
+ path[1] == ':' ) {
+ return TRUE;
+ }
+#endif
+
+ return FALSE;
+}
+
+/* Backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
+ (needed for some Darwin ICU build environments) */
+#if U_PLATFORM_IS_DARWIN_BASED && TARGET_OS_SIMULATOR
+# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
+# define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
+# endif
+#endif
+
+#if defined(ICU_DATA_DIR_WINDOWS)
+// Helper function to get the ICU Data Directory under the Windows directory location.
+static BOOL U_CALLCONV getIcuDataDirectoryUnderWindowsDirectory(char* directoryBuffer, UINT bufferLength)
+{
+ wchar_t windowsPath[MAX_PATH];
+ char windowsPathUtf8[MAX_PATH];
+
+ UINT length = GetSystemWindowsDirectoryW(windowsPath, UPRV_LENGTHOF(windowsPath));
+ if ((length > 0) && (length < (UPRV_LENGTHOF(windowsPath) - 1))) {
+ // Convert UTF-16 to a UTF-8 string.
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t windowsPathUtf8Len = 0;
+ u_strToUTF8(windowsPathUtf8, static_cast<int32_t>(UPRV_LENGTHOF(windowsPathUtf8)),
+ &windowsPathUtf8Len, reinterpret_cast<const UChar*>(windowsPath), -1, &status);
+
+ if (U_SUCCESS(status) && (status != U_STRING_NOT_TERMINATED_WARNING) &&
+ (windowsPathUtf8Len < (UPRV_LENGTHOF(windowsPathUtf8) - 1))) {
+ // Ensure it always has a separator, so we can append the ICU data path.
+ if (windowsPathUtf8[windowsPathUtf8Len - 1] != U_FILE_SEP_CHAR) {
+ windowsPathUtf8[windowsPathUtf8Len++] = U_FILE_SEP_CHAR;
+ windowsPathUtf8[windowsPathUtf8Len] = '\0';
+ }
+ // Check if the concatenated string will fit.
+ if ((windowsPathUtf8Len + UPRV_LENGTHOF(ICU_DATA_DIR_WINDOWS)) < bufferLength) {
+ uprv_strcpy(directoryBuffer, windowsPathUtf8);
+ uprv_strcat(directoryBuffer, ICU_DATA_DIR_WINDOWS);
+ return TRUE;
+ }
+ }
+ }
+
+ return FALSE;
+}
+#endif
+
+static void U_CALLCONV dataDirectoryInitFn() {
+ /* If we already have the directory, then return immediately. Will happen if user called
+ * u_setDataDirectory().
+ */
+ if (gDataDirectory) {
+ return;
+ }
+
+ const char *path = NULL;
+#if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
+ char datadir_path_buffer[PATH_MAX];
+#endif
+
+ /*
+ When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
+ override ICU's data with the ICU_DATA environment variable. This prevents
+ problems where multiple custom copies of ICU's specific version of data
+ are installed on a system. Either the application must define the data
+ directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
+ ICU, set the data with udata_setCommonData or trust that all of the
+ required data is contained in ICU's data library that contains
+ the entry point defined by U_ICUDATA_ENTRY_POINT.
+
+ There may also be some platforms where environment variables
+ are not allowed.
+ */
+# if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
+ /* First try to get the environment variable */
+# if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP does not support getenv
+ path=getenv("ICU_DATA");
+# endif
+# endif
+
+ /* ICU_DATA_DIR may be set as a compile option.
+ * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
+ * and is used only when data is built in archive mode eliminating the need
+ * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
+ * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
+ * set their own path.
+ */
+#if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
+ if(path==NULL || *path==0) {
+# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
+ const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
+# endif
+# ifdef ICU_DATA_DIR
+ path=ICU_DATA_DIR;
+# else
+ path=U_ICU_DATA_DEFAULT_DIR;
+# endif
+# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
+ if (prefix != NULL) {
+ snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
+ path=datadir_path_buffer;
+ }
+# endif
+ }
+#endif
+
+#if defined(ICU_DATA_DIR_WINDOWS)
+ char datadir_path_buffer[MAX_PATH];
+ if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
+ path = datadir_path_buffer;
+ }
+#endif
+
+ if(path==NULL) {
+ /* It looks really bad, set it to something. */
+ path = "";
+ }
+
+ u_setDataDirectory(path);
+ return;
+}
+
+U_CAPI const char * U_EXPORT2
+u_getDataDirectory(void) {
+ umtx_initOnce(gDataDirInitOnce, &dataDirectoryInitFn);
+ return gDataDirectory;
+}
+
+static void setTimeZoneFilesDir(const char *path, UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ gTimeZoneFilesDirectory->clear();
+ gTimeZoneFilesDirectory->append(path, status);
+#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
+ char *p = gTimeZoneFilesDirectory->data();
+ while ((p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) != NULL) {
+ *p = U_FILE_SEP_CHAR;
+ }
+#endif
+}
+
+#define TO_STRING(x) TO_STRING_2(x)
+#define TO_STRING_2(x) #x
+
+static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) {
+ U_ASSERT(gTimeZoneFilesDirectory == NULL);
+ ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
+ gTimeZoneFilesDirectory = new CharString();
+ if (gTimeZoneFilesDirectory == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ const char *dir = "";
+
+#if defined(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR)
+ char timezonefilesdir_path_buffer[PATH_MAX];
+ const char *prefix = getenv(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR);
+#endif
+
+#if U_PLATFORM_HAS_WINUWP_API == 1
+// The UWP version does not support the environment variable setting.
+
+# if defined(ICU_DATA_DIR_WINDOWS)
+ // When using the Windows system data, we can possibly pick up time zone data from the Windows directory.
+ char datadir_path_buffer[MAX_PATH];
+ if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
+ dir = datadir_path_buffer;
+ }
+# endif
+
+#else
+ dir = getenv("ICU_TIMEZONE_FILES_DIR");
+#endif // U_PLATFORM_HAS_WINUWP_API
+
+#if defined(U_TIMEZONE_FILES_DIR)
+ if (dir == NULL) {
+ // Build time configuration setting.
+ dir = TO_STRING(U_TIMEZONE_FILES_DIR);
+ }
+#endif
+
+ if (dir == NULL) {
+ dir = "";
+ }
+
+#if defined(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR)
+ if (prefix != NULL) {
+ snprintf(timezonefilesdir_path_buffer, PATH_MAX, "%s%s", prefix, dir);
+ dir = timezonefilesdir_path_buffer;
+ }
+#endif
+
+ setTimeZoneFilesDir(dir, status);
+}
+
+
+U_CAPI const char * U_EXPORT2
+u_getTimeZoneFilesDirectory(UErrorCode *status) {
+ umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
+ return U_SUCCESS(*status) ? gTimeZoneFilesDirectory->data() : "";
+}
+
+U_CAPI void U_EXPORT2
+u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status) {
+ umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
+ setTimeZoneFilesDir(path, *status);
+
+ // Note: this function does some extra churn, first setting based on the
+ // environment, then immediately replacing with the value passed in.
+ // The logic is simpler that way, and performance shouldn't be an issue.
+}
+
+
+#if U_POSIX_LOCALE
+/* A helper function used by uprv_getPOSIXIDForDefaultLocale and
+ * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
+ * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
+ */
+static const char *uprv_getPOSIXIDForCategory(int category)
+{
+ const char* posixID = NULL;
+ if (category == LC_MESSAGES || category == LC_CTYPE) {
+ /*
+ * On Solaris two different calls to setlocale can result in
+ * different values. Only get this value once.
+ *
+ * We must check this first because an application can set this.
+ *
+ * LC_ALL can't be used because it's platform dependent. The LANG
+ * environment variable seems to affect LC_CTYPE variable by default.
+ * Here is what setlocale(LC_ALL, NULL) can return.
+ * HPUX can return 'C C C C C C C'
+ * Solaris can return /en_US/C/C/C/C/C on the second try.
+ * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
+ *
+ * The default codepage detection also needs to use LC_CTYPE.
+ *
+ * Do not call setlocale(LC_*, "")! Using an empty string instead
+ * of NULL, will modify the libc behavior.
+ */
+ posixID = setlocale(category, NULL);
+ if ((posixID == 0)
+ || (uprv_strcmp("C", posixID) == 0)
+ || (uprv_strcmp("POSIX", posixID) == 0))
+ {
+ /* Maybe we got some garbage. Try something more reasonable */
+ posixID = getenv("LC_ALL");
+ /* Solaris speaks POSIX - See IEEE Std 1003.1-2008
+ * This is needed to properly handle empty env. variables
+ */
+#if U_PLATFORM == U_PF_SOLARIS
+ if ((posixID == 0) || (posixID[0] == '\0')) {
+ posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
+ if ((posixID == 0) || (posixID[0] == '\0')) {
+#else
+ if (posixID == 0) {
+ posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
+ if (posixID == 0) {
+#endif
+ posixID = getenv("LANG");
+ }
+ }
+ }
+ }
+ if ((posixID==0)
+ || (uprv_strcmp("C", posixID) == 0)
+ || (uprv_strcmp("POSIX", posixID) == 0))
+ {
+ /* Nothing worked. Give it a nice POSIX default value. */
+ posixID = "en_US_POSIX";
+ // Note: this test will not catch 'C.UTF-8',
+ // that will be handled in uprv_getDefaultLocaleID().
+ // Leave this mapping here for the uprv_getPOSIXIDForDefaultCodepage()
+ // caller which expects to see "en_US_POSIX" in many branches.
+ }
+ return posixID;
+}
+
+/* Return just the POSIX id for the default locale, whatever happens to be in
+ * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
+ */
+static const char *uprv_getPOSIXIDForDefaultLocale(void)
+{
+ static const char* posixID = NULL;
+ if (posixID == 0) {
+ posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
+ }
+ return posixID;
+}
+
+#if !U_CHARSET_IS_UTF8
+/* Return just the POSIX id for the default codepage, whatever happens to be in
+ * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
+ */
+static const char *uprv_getPOSIXIDForDefaultCodepage(void)
+{
+ static const char* posixID = NULL;
+ if (posixID == 0) {
+ posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
+ }
+ return posixID;
+}
+#endif
+#endif
+
+/* NOTE: The caller should handle thread safety */
+U_CAPI const char* U_EXPORT2
+uprv_getDefaultLocaleID()
+{
+#if U_POSIX_LOCALE
+/*
+ Note that: (a '!' means the ID is improper somehow)
+ LC_ALL ----> default_loc codepage
+--------------------------------------------------------
+ ab.CD ab CD
+ ab@CD ab__CD -
+ ab@CD.EF ab__CD EF
+
+ ab_CD.EF@GH ab_CD_GH EF
+
+Some 'improper' ways to do the same as above:
+ ! ab_CD@GH.EF ab_CD_GH EF
+ ! ab_CD.EF@GH.IJ ab_CD_GH EF
+ ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
+
+ _CD@GH _CD_GH -
+ _CD.EF@GH _CD_GH EF
+
+The variant cannot have dots in it.
+The 'rightmost' variant (@xxx) wins.
+The leftmost codepage (.xxx) wins.
+*/
+ const char* posixID = uprv_getPOSIXIDForDefaultLocale();
+
+ /* Format: (no spaces)
+ ll [ _CC ] [ . MM ] [ @ VV]
+
+ l = lang, C = ctry, M = charmap, V = variant
+ */
+
+ if (gCorrectedPOSIXLocale != nullptr) {
+ return gCorrectedPOSIXLocale;
+ }
+
+ // Copy the ID into owned memory.
+ // Over-allocate in case we replace "C" with "en_US_POSIX" (+10), + null termination
+ char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 10 + 1));
+ if (correctedPOSIXLocale == nullptr) {
+ return nullptr;
+ }
+ uprv_strcpy(correctedPOSIXLocale, posixID);
+
+ char *limit;
+ if ((limit = uprv_strchr(correctedPOSIXLocale, '.')) != nullptr) {
+ *limit = 0;
+ }
+ if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) {
+ *limit = 0;
+ }
+
+ if ((uprv_strcmp("C", correctedPOSIXLocale) == 0) // no @ variant
+ || (uprv_strcmp("POSIX", correctedPOSIXLocale) == 0)) {
+ // Raw input was C.* or POSIX.*, Give it a nice POSIX default value.
+ // (The "C"/"POSIX" case is handled in uprv_getPOSIXIDForCategory())
+ uprv_strcpy(correctedPOSIXLocale, "en_US_POSIX");
+ }
+
+ /* Note that we scan the *uncorrected* ID. */
+ const char *p;
+ if ((p = uprv_strrchr(posixID, '@')) != nullptr) {
+ p++;
+
+ /* Take care of any special cases here.. */
+ if (!uprv_strcmp(p, "nynorsk")) {
+ p = "NY";
+ /* Don't worry about no__NY. In practice, it won't appear. */
+ }
+
+ if (uprv_strchr(correctedPOSIXLocale,'_') == nullptr) {
+ uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b (note this can make the new locale 1 char longer) */
+ }
+ else {
+ uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
+ }
+
+ const char *q;
+ if ((q = uprv_strchr(p, '.')) != nullptr) {
+ /* How big will the resulting string be? */
+ int32_t len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
+ uprv_strncat(correctedPOSIXLocale, p, q-p); // do not include charset
+ correctedPOSIXLocale[len] = 0;
+ }
+ else {
+ /* Anything following the @ sign */
+ uprv_strcat(correctedPOSIXLocale, p);
+ }
+
+ /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
+ * How about 'russian' -> 'ru'?
+ * Many of the other locales using ISO codes will be handled by the
+ * canonicalization functions in uloc_getDefault.
+ */
+ }
+
+ if (gCorrectedPOSIXLocale == nullptr) {
+ gCorrectedPOSIXLocale = correctedPOSIXLocale;
+ gCorrectedPOSIXLocaleHeapAllocated = true;
+ ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
+ correctedPOSIXLocale = nullptr;
+ }
+ posixID = gCorrectedPOSIXLocale;
+
+ if (correctedPOSIXLocale != nullptr) { /* Was already set - clean up. */
+ uprv_free(correctedPOSIXLocale);
+ }
+
+ return posixID;
+
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+#define POSIX_LOCALE_CAPACITY 64
+ UErrorCode status = U_ZERO_ERROR;
+ char *correctedPOSIXLocale = nullptr;
+
+ // If we have already figured this out just use the cached value
+ if (gCorrectedPOSIXLocale != nullptr) {
+ return gCorrectedPOSIXLocale;
+ }
+
+ // No cached value, need to determine the current value
+ static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
+ int length = GetLocaleInfoEx(LOCALE_NAME_USER_DEFAULT, LOCALE_SNAME, windowsLocale, LOCALE_NAME_MAX_LENGTH);
+
+ // Now we should have a Windows locale name that needs converted to the POSIX style.
+ if (length > 0) // If length is 0, then the GetLocaleInfoEx failed.
+ {
+ // First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.)
+ char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
+
+ int32_t i;
+ for (i = 0; i < UPRV_LENGTHOF(modifiedWindowsLocale); i++)
+ {
+ if (windowsLocale[i] == '_')
+ {
+ modifiedWindowsLocale[i] = '-';
+ }
+ else
+ {
+ modifiedWindowsLocale[i] = static_cast<char>(windowsLocale[i]);
+ }
+
+ if (modifiedWindowsLocale[i] == '\0')
+ {
+ break;
+ }
+ }
+
+ if (i >= UPRV_LENGTHOF(modifiedWindowsLocale))
+ {
+ // Ran out of room, can't really happen, maybe we'll be lucky about a matching
+ // locale when tags are dropped
+ modifiedWindowsLocale[UPRV_LENGTHOF(modifiedWindowsLocale) - 1] = '\0';
+ }
+
+ // Now normalize the resulting name
+ correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1));
+ /* TODO: Should we just exit on memory allocation failure? */
+ if (correctedPOSIXLocale)
+ {
+ int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status);
+ if (U_SUCCESS(status))
+ {
+ *(correctedPOSIXLocale + posixLen) = 0;
+ gCorrectedPOSIXLocale = correctedPOSIXLocale;
+ gCorrectedPOSIXLocaleHeapAllocated = true;
+ ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
+ }
+ else
+ {
+ uprv_free(correctedPOSIXLocale);
+ }
+ }
+ }
+
+ // If unable to find a locale we can agree upon, use en-US by default
+ if (gCorrectedPOSIXLocale == nullptr) {
+ gCorrectedPOSIXLocale = "en_US";
+ }
+ return gCorrectedPOSIXLocale;
+
+#elif U_PLATFORM == U_PF_OS400
+ /* locales are process scoped and are by definition thread safe */
+ static char correctedLocale[64];
+ const char *localeID = getenv("LC_ALL");
+ char *p;
+
+ if (localeID == NULL)
+ localeID = getenv("LANG");
+ if (localeID == NULL)
+ localeID = setlocale(LC_ALL, NULL);
+ /* Make sure we have something... */
+ if (localeID == NULL)
+ return "en_US_POSIX";
+
+ /* Extract the locale name from the path. */
+ if((p = uprv_strrchr(localeID, '/')) != NULL)
+ {
+ /* Increment p to start of locale name. */
+ p++;
+ localeID = p;
+ }
+
+ /* Copy to work location. */
+ uprv_strcpy(correctedLocale, localeID);
+
+ /* Strip off the '.locale' extension. */
+ if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
+ *p = 0;
+ }
+
+ /* Upper case the locale name. */
+ T_CString_toUpperCase(correctedLocale);
+
+ /* See if we are using the POSIX locale. Any of the
+ * following are equivalent and use the same QLGPGCMA
+ * (POSIX) locale.
+ * QLGPGCMA2 means UCS2
+ * QLGPGCMA_4 means UTF-32
+ * QLGPGCMA_8 means UTF-8
+ */
+ if ((uprv_strcmp("C", correctedLocale) == 0) ||
+ (uprv_strcmp("POSIX", correctedLocale) == 0) ||
+ (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
+ {
+ uprv_strcpy(correctedLocale, "en_US_POSIX");
+ }
+ else
+ {
+ int16_t LocaleLen;
+
+ /* Lower case the lang portion. */
+ for(p = correctedLocale; *p != 0 && *p != '_'; p++)
+ {
+ *p = uprv_tolower(*p);
+ }
+
+ /* Adjust for Euro. After '_E' add 'URO'. */
+ LocaleLen = uprv_strlen(correctedLocale);
+ if (correctedLocale[LocaleLen - 2] == '_' &&
+ correctedLocale[LocaleLen - 1] == 'E')
+ {
+ uprv_strcat(correctedLocale, "URO");
+ }
+
+ /* If using Lotus-based locale then convert to
+ * equivalent non Lotus.
+ */
+ else if (correctedLocale[LocaleLen - 2] == '_' &&
+ correctedLocale[LocaleLen - 1] == 'L')
+ {
+ correctedLocale[LocaleLen - 2] = 0;
+ }
+
+ /* There are separate simplified and traditional
+ * locales called zh_HK_S and zh_HK_T.
+ */
+ else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
+ {
+ uprv_strcpy(correctedLocale, "zh_HK");
+ }
+
+ /* A special zh_CN_GBK locale...
+ */
+ else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
+ {
+ uprv_strcpy(correctedLocale, "zh_CN");
+ }
+
+ }
+
+ return correctedLocale;
+#endif
+
+}
+
+#if !U_CHARSET_IS_UTF8
+#if U_POSIX_LOCALE
+/*
+Due to various platform differences, one platform may specify a charset,
+when they really mean a different charset. Remap the names so that they are
+compatible with ICU. Only conflicting/ambiguous aliases should be resolved
+here. Before adding anything to this function, please consider adding unique
+names to the ICU alias table in the data directory.
+*/
+static const char*
+remapPlatformDependentCodepage(const char *locale, const char *name) {
+ if (locale != NULL && *locale == 0) {
+ /* Make sure that an empty locale is handled the same way. */
+ locale = NULL;
+ }
+ if (name == NULL) {
+ return NULL;
+ }
+#if U_PLATFORM == U_PF_AIX
+ if (uprv_strcmp(name, "IBM-943") == 0) {
+ /* Use the ASCII compatible ibm-943 */
+ name = "Shift-JIS";
+ }
+ else if (uprv_strcmp(name, "IBM-1252") == 0) {
+ /* Use the windows-1252 that contains the Euro */
+ name = "IBM-5348";
+ }
+#elif U_PLATFORM == U_PF_SOLARIS
+ if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
+ /* Solaris underspecifies the "EUC" name. */
+ if (uprv_strcmp(locale, "zh_CN") == 0) {
+ name = "EUC-CN";
+ }
+ else if (uprv_strcmp(locale, "zh_TW") == 0) {
+ name = "EUC-TW";
+ }
+ else if (uprv_strcmp(locale, "ko_KR") == 0) {
+ name = "EUC-KR";
+ }
+ }
+ else if (uprv_strcmp(name, "eucJP") == 0) {
+ /*
+ ibm-954 is the best match.
+ ibm-33722 is the default for eucJP (similar to Windows).
+ */
+ name = "eucjis";
+ }
+ else if (uprv_strcmp(name, "646") == 0) {
+ /*
+ * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
+ * ISO-8859-1 instead of US-ASCII(646).
+ */
+ name = "ISO-8859-1";
+ }
+#elif U_PLATFORM_IS_DARWIN_BASED
+ if (locale == NULL && *name == 0) {
+ /*
+ No locale was specified, and an empty name was passed in.
+ This usually indicates that nl_langinfo didn't return valid information.
+ Mac OS X uses UTF-8 by default (especially the locale data and console).
+ */
+ name = "UTF-8";
+ }
+ else if (uprv_strcmp(name, "CP949") == 0) {
+ /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
+ name = "EUC-KR";
+ }
+ else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
+ /*
+ * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
+ */
+ name = "UTF-8";
+ }
+#elif U_PLATFORM == U_PF_BSD
+ if (uprv_strcmp(name, "CP949") == 0) {
+ /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
+ name = "EUC-KR";
+ }
+#elif U_PLATFORM == U_PF_HPUX
+ if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
+ /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
+ /* zh_TW.big5 is not the same charset as zh_HK.big5! */
+ name = "hkbig5";
+ }
+ else if (uprv_strcmp(name, "eucJP") == 0) {
+ /*
+ ibm-1350 is the best match, but unavailable.
+ ibm-954 is mostly a superset of ibm-1350.
+ ibm-33722 is the default for eucJP (similar to Windows).
+ */
+ name = "eucjis";
+ }
+#elif U_PLATFORM == U_PF_LINUX
+ if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
+ /* Linux underspecifies the "EUC" name. */
+ if (uprv_strcmp(locale, "korean") == 0) {
+ name = "EUC-KR";
+ }
+ else if (uprv_strcmp(locale, "japanese") == 0) {
+ /* See comment below about eucJP */
+ name = "eucjis";
+ }
+ }
+ else if (uprv_strcmp(name, "eucjp") == 0) {
+ /*
+ ibm-1350 is the best match, but unavailable.
+ ibm-954 is mostly a superset of ibm-1350.
+ ibm-33722 is the default for eucJP (similar to Windows).
+ */
+ name = "eucjis";
+ }
+ else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
+ (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
+ /*
+ * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
+ */
+ name = "UTF-8";
+ }
+ /*
+ * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
+ * it by falling back to 'US-ASCII' when NULL is returned from this
+ * function. So, we don't have to worry about it here.
+ */
+#endif
+ /* return NULL when "" is passed in */
+ if (*name == 0) {
+ name = NULL;
+ }
+ return name;
+}
+
+static const char*
+getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
+{
+ char localeBuf[100];
+ const char *name = NULL;
+ char *variant = NULL;
+
+ if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
+ size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
+ uprv_strncpy(localeBuf, localeName, localeCapacity);
+ localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
+ name = uprv_strncpy(buffer, name+1, buffCapacity);
+ buffer[buffCapacity-1] = 0; /* ensure NULL termination */
+ if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) {
+ *variant = 0;
+ }
+ name = remapPlatformDependentCodepage(localeBuf, name);
+ }
+ return name;
+}
+#endif
+
+static const char*
+int_getDefaultCodepage()
+{
+#if U_PLATFORM == U_PF_OS400
+ uint32_t ccsid = 37; /* Default to ibm-37 */
+ static char codepage[64];
+ Qwc_JOBI0400_t jobinfo;
+ Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
+
+ EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
+ "* ", " ", &error);
+
+ if (error.Bytes_Available == 0) {
+ if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
+ ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
+ }
+ else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
+ ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
+ }
+ /* else use the default */
+ }
+ sprintf(codepage,"ibm-%d", ccsid);
+ return codepage;
+
+#elif U_PLATFORM == U_PF_OS390
+ static char codepage[64];
+
+ strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
+ strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
+ codepage[63] = 0; /* NULL terminate */
+
+ return codepage;
+
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+ static char codepage[64];
+ DWORD codepageNumber = 0;
+
+#if U_PLATFORM_HAS_WINUWP_API == 1
+ // UWP doesn't have a direct API to get the default ACP as Microsoft would rather
+ // have folks use Unicode than a "system" code page, however this is the same
+ // codepage as the system default locale codepage. (FWIW, the system locale is
+ // ONLY used for codepage, it should never be used for anything else)
+ GetLocaleInfoEx(LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
+ (LPWSTR)&codepageNumber, sizeof(codepageNumber) / sizeof(WCHAR));
+#else
+ // Win32 apps can call GetACP
+ codepageNumber = GetACP();
+#endif
+ // Special case for UTF-8
+ if (codepageNumber == 65001)
+ {
+ return "UTF-8";
+ }
+ // Windows codepages can look like windows-1252, so format the found number
+ // the numbers are eclectic, however all valid system code pages, besides UTF-8
+ // are between 3 and 19999
+ if (codepageNumber > 0 && codepageNumber < 20000)
+ {
+ sprintf(codepage, "windows-%ld", codepageNumber);
+ return codepage;
+ }
+ // If the codepage number call failed then return UTF-8
+ return "UTF-8";
+
+#elif U_POSIX_LOCALE
+ static char codesetName[100];
+ const char *localeName = NULL;
+ const char *name = NULL;
+
+ localeName = uprv_getPOSIXIDForDefaultCodepage();
+ uprv_memset(codesetName, 0, sizeof(codesetName));
+ /* On Solaris nl_langinfo returns C locale values unless setlocale
+ * was called earlier.
+ */
+#if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS)
+ /* When available, check nl_langinfo first because it usually gives more
+ useful names. It depends on LC_CTYPE.
+ nl_langinfo may use the same buffer as setlocale. */
+ {
+ const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
+#if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
+ /*
+ * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
+ * instead of ASCII.
+ */
+ if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
+ codeset = remapPlatformDependentCodepage(localeName, codeset);
+ } else
+#endif
+ {
+ codeset = remapPlatformDependentCodepage(NULL, codeset);
+ }
+
+ if (codeset != NULL) {
+ uprv_strncpy(codesetName, codeset, sizeof(codesetName));
+ codesetName[sizeof(codesetName)-1] = 0;
+ return codesetName;
+ }
+ }
+#endif
+
+ /* Use setlocale in a nice way, and then check some environment variables.
+ Maybe the application used setlocale already.
+ */
+ uprv_memset(codesetName, 0, sizeof(codesetName));
+ name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
+ if (name) {
+ /* if we can find the codeset name from setlocale, return that. */
+ return name;
+ }
+
+ if (*codesetName == 0)
+ {
+ /* Everything failed. Return US ASCII (ISO 646). */
+ (void)uprv_strcpy(codesetName, "US-ASCII");
+ }
+ return codesetName;
+#else
+ return "US-ASCII";
+#endif
+}
+
+
+U_CAPI const char* U_EXPORT2
+uprv_getDefaultCodepage()
+{
+ static char const *name = NULL;
+ umtx_lock(NULL);
+ if (name == NULL) {
+ name = int_getDefaultCodepage();
+ }
+ umtx_unlock(NULL);
+ return name;
+}
+#endif /* !U_CHARSET_IS_UTF8 */
+
+
+/* end of platform-specific implementation -------------- */
+
+/* version handling --------------------------------------------------------- */
+
+U_CAPI void U_EXPORT2
+u_versionFromString(UVersionInfo versionArray, const char *versionString) {
+ char *end;
+ uint16_t part=0;
+
+ if(versionArray==NULL) {
+ return;
+ }
+
+ if(versionString!=NULL) {
+ for(;;) {
+ versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
+ if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
+ break;
+ }
+ versionString=end+1;
+ }
+ }
+
+ while(part<U_MAX_VERSION_LENGTH) {
+ versionArray[part++]=0;
+ }
+}
+
+U_CAPI void U_EXPORT2
+u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
+ if(versionArray!=NULL && versionString!=NULL) {
+ char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
+ int32_t len = u_strlen(versionString);
+ if(len>U_MAX_VERSION_STRING_LENGTH) {
+ len = U_MAX_VERSION_STRING_LENGTH;
+ }
+ u_UCharsToChars(versionString, versionChars, len);
+ versionChars[len]=0;
+ u_versionFromString(versionArray, versionChars);
+ }
+}
+
+U_CAPI void U_EXPORT2
+u_versionToString(const UVersionInfo versionArray, char *versionString) {
+ uint16_t count, part;
+ uint8_t field;
+
+ if(versionString==NULL) {
+ return;
+ }
+
+ if(versionArray==NULL) {
+ versionString[0]=0;
+ return;
+ }
+
+ /* count how many fields need to be written */
+ for(count=4; count>0 && versionArray[count-1]==0; --count) {
+ }
+
+ if(count <= 1) {
+ count = 2;
+ }
+
+ /* write the first part */
+ /* write the decimal field value */
+ field=versionArray[0];
+ if(field>=100) {
+ *versionString++=(char)('0'+field/100);
+ field%=100;
+ }
+ if(field>=10) {
+ *versionString++=(char)('0'+field/10);
+ field%=10;
+ }
+ *versionString++=(char)('0'+field);
+
+ /* write the following parts */
+ for(part=1; part<count; ++part) {
+ /* write a dot first */
+ *versionString++=U_VERSION_DELIMITER;
+
+ /* write the decimal field value */
+ field=versionArray[part];
+ if(field>=100) {
+ *versionString++=(char)('0'+field/100);
+ field%=100;
+ }
+ if(field>=10) {
+ *versionString++=(char)('0'+field/10);
+ field%=10;
+ }
+ *versionString++=(char)('0'+field);
+ }
+
+ /* NUL-terminate */
+ *versionString=0;
+}
+
+U_CAPI void U_EXPORT2
+u_getVersion(UVersionInfo versionArray) {
+ (void)copyright; // Suppress unused variable warning from clang.
+ u_versionFromString(versionArray, U_ICU_VERSION);
+}
+
+/**
+ * icucfg.h dependent code
+ */
+
+#if U_ENABLE_DYLOAD && HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
+
+#if HAVE_DLFCN_H
+#ifdef __MVS__
+#ifndef __SUSV3
+#define __SUSV3 1
+#endif
+#endif
+#include <dlfcn.h>
+#endif /* HAVE_DLFCN_H */
+
+U_CAPI void * U_EXPORT2
+uprv_dl_open(const char *libName, UErrorCode *status) {
+ void *ret = NULL;
+ if(U_FAILURE(*status)) return ret;
+ ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
+ if(ret==NULL) {
+#ifdef U_TRACE_DYLOAD
+ printf("dlerror on dlopen(%s): %s\n", libName, dlerror());
+#endif
+ *status = U_MISSING_RESOURCE_ERROR;
+ }
+ return ret;
+}
+
+U_CAPI void U_EXPORT2
+uprv_dl_close(void *lib, UErrorCode *status) {
+ if(U_FAILURE(*status)) return;
+ dlclose(lib);
+}
+
+U_CAPI UVoidFunction* U_EXPORT2
+uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
+ union {
+ UVoidFunction *fp;
+ void *vp;
+ } uret;
+ uret.fp = NULL;
+ if(U_FAILURE(*status)) return uret.fp;
+ uret.vp = dlsym(lib, sym);
+ if(uret.vp == NULL) {
+#ifdef U_TRACE_DYLOAD
+ printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror());
+#endif
+ *status = U_MISSING_RESOURCE_ERROR;
+ }
+ return uret.fp;
+}
+
+#elif U_ENABLE_DYLOAD && U_PLATFORM_USES_ONLY_WIN32_API && !U_PLATFORM_HAS_WINUWP_API
+
+/* Windows API implementation. */
+// Note: UWP does not expose/allow these APIs, so the UWP version gets the null implementation. */
+
+U_CAPI void * U_EXPORT2
+uprv_dl_open(const char *libName, UErrorCode *status) {
+ HMODULE lib = NULL;
+
+ if(U_FAILURE(*status)) return NULL;
+
+ lib = LoadLibraryA(libName);
+
+ if(lib==NULL) {
+ *status = U_MISSING_RESOURCE_ERROR;
+ }
+
+ return (void*)lib;
+}
+
+U_CAPI void U_EXPORT2
+uprv_dl_close(void *lib, UErrorCode *status) {
+ HMODULE handle = (HMODULE)lib;
+ if(U_FAILURE(*status)) return;
+
+ FreeLibrary(handle);
+
+ return;
+}
+
+U_CAPI UVoidFunction* U_EXPORT2
+uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
+ HMODULE handle = (HMODULE)lib;
+ UVoidFunction* addr = NULL;
+
+ if(U_FAILURE(*status) || lib==NULL) return NULL;
+
+ addr = (UVoidFunction*)GetProcAddress(handle, sym);
+
+ if(addr==NULL) {
+ DWORD lastError = GetLastError();
+ if(lastError == ERROR_PROC_NOT_FOUND) {
+ *status = U_MISSING_RESOURCE_ERROR;
+ } else {
+ *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
+ }
+ }
+
+ return addr;
+}
+
+#else
+
+/* No dynamic loading, null (nonexistent) implementation. */
+
+U_CAPI void * U_EXPORT2
+uprv_dl_open(const char *libName, UErrorCode *status) {
+ (void)libName;
+ if(U_FAILURE(*status)) return NULL;
+ *status = U_UNSUPPORTED_ERROR;
+ return NULL;
+}
+
+U_CAPI void U_EXPORT2
+uprv_dl_close(void *lib, UErrorCode *status) {
+ (void)lib;
+ if(U_FAILURE(*status)) return;
+ *status = U_UNSUPPORTED_ERROR;
+ return;
+}
+
+U_CAPI UVoidFunction* U_EXPORT2
+uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
+ (void)lib;
+ (void)sym;
+ if(U_SUCCESS(*status)) {
+ *status = U_UNSUPPORTED_ERROR;
+ }
+ return (UVoidFunction*)NULL;
+}
+
+#endif
+
+/*
+ * Hey, Emacs, please set the following:
+ *
+ * Local Variables:
+ * indent-tabs-mode: nil
+ * End:
+ *
+ */
diff --git a/thirdparty/icu4c/common/putilimp.h b/thirdparty/icu4c/common/putilimp.h
new file mode 100644
index 0000000000..a325c6c359
--- /dev/null
+++ b/thirdparty/icu4c/common/putilimp.h
@@ -0,0 +1,615 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* FILE NAME : putilimp.h
+*
+* Date Name Description
+* 10/17/04 grhoten Move internal functions from putil.h to this file.
+******************************************************************************
+*/
+
+#ifndef PUTILIMP_H
+#define PUTILIMP_H
+
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+
+/**
+ * \def U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
+ * Nearly all CPUs and compilers implement a right-shift of a signed integer
+ * as an Arithmetic Shift Right which copies the sign bit (the Most Significant Bit (MSB))
+ * into the vacated bits (sign extension).
+ * For example, (int32_t)0xfff5fff3>>4 becomes 0xffff5fff and -1>>1=-1.
+ *
+ * This can be useful for storing a signed value in the upper bits
+ * and another bit field in the lower bits.
+ * The signed value can be retrieved by simple right-shifting.
+ *
+ * This is consistent with the Java language.
+ *
+ * However, the C standard allows compilers to implement a right-shift of a signed integer
+ * as a Logical Shift Right which copies a 0 into the vacated bits.
+ * For example, (int32_t)0xfff5fff3>>4 becomes 0x0fff5fff and -1>>1=0x7fffffff.
+ *
+ * Code that depends on the natural behavior should be guarded with this macro,
+ * with an alternate path for unusual platforms.
+ * @internal
+ */
+#ifdef U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
+ /* Use the predefined value. */
+#else
+ /*
+ * Nearly all CPUs & compilers implement a right-shift of a signed integer
+ * as an Arithmetic Shift Right (with sign extension).
+ */
+# define U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC 1
+#endif
+
+/** Define this to 1 if your platform supports IEEE 754 floating point,
+ to 0 if it does not. */
+#ifndef IEEE_754
+# define IEEE_754 1
+#endif
+
+/**
+ * uintptr_t is an optional part of the standard definitions in stdint.h.
+ * The opengroup.org documentation for stdint.h says
+ * "On XSI-conformant systems, the intptr_t and uintptr_t types are required;
+ * otherwise, they are optional."
+ * We assume that when uintptr_t is defined, UINTPTR_MAX is defined as well.
+ *
+ * Do not use ptrdiff_t since it is signed. size_t is unsigned.
+ */
+/* TODO: This check fails on some z environments. Filed a ticket #9357 for this. */
+#if !defined(__intptr_t_defined) && !defined(UINTPTR_MAX) && (U_PLATFORM != U_PF_OS390)
+typedef size_t uintptr_t;
+#endif
+
+/*===========================================================================*/
+/** @{ Information about POSIX support */
+/*===========================================================================*/
+
+#ifdef U_HAVE_NL_LANGINFO_CODESET
+ /* Use the predefined value. */
+#elif U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_ANDROID || U_PLATFORM == U_PF_QNX
+# define U_HAVE_NL_LANGINFO_CODESET 0
+#else
+# define U_HAVE_NL_LANGINFO_CODESET 1
+#endif
+
+#ifdef U_NL_LANGINFO_CODESET
+ /* Use the predefined value. */
+#elif !U_HAVE_NL_LANGINFO_CODESET
+# define U_NL_LANGINFO_CODESET -1
+#elif U_PLATFORM == U_PF_OS400
+ /* not defined */
+#else
+# define U_NL_LANGINFO_CODESET CODESET
+#endif
+
+#if defined(U_TZSET) || defined(U_HAVE_TZSET)
+ /* Use the predefined value. */
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+ // UWP doesn't support tzset or environment variables for tz
+#if U_PLATFORM_HAS_WINUWP_API == 0
+# define U_TZSET _tzset
+#endif
+#elif U_PLATFORM == U_PF_OS400
+ /* not defined */
+#else
+# define U_TZSET tzset
+#endif
+
+#if defined(U_TIMEZONE) || defined(U_HAVE_TIMEZONE)
+ /* Use the predefined value. */
+#elif U_PLATFORM == U_PF_ANDROID
+# define U_TIMEZONE timezone
+#elif defined(__UCLIBC__)
+ // uClibc does not have __timezone or _timezone.
+#elif defined(_NEWLIB_VERSION)
+# define U_TIMEZONE _timezone
+#elif defined(__GLIBC__)
+ // glibc
+# define U_TIMEZONE __timezone
+#elif U_PLATFORM_IS_LINUX_BASED
+ // not defined
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+# define U_TIMEZONE _timezone
+#elif U_PLATFORM == U_PF_BSD && !defined(__NetBSD__)
+ /* not defined */
+#elif U_PLATFORM == U_PF_OS400
+ /* not defined */
+#elif U_PLATFORM == U_PF_IPHONE
+ /* not defined */
+#else
+# define U_TIMEZONE timezone
+#endif
+
+#if defined(U_TZNAME) || defined(U_HAVE_TZNAME)
+ /* Use the predefined value. */
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+ /* not usable on all windows platforms */
+#if U_PLATFORM_HAS_WINUWP_API == 0
+# define U_TZNAME _tzname
+#endif
+#elif U_PLATFORM == U_PF_OS400
+ /* not defined */
+#else
+# define U_TZNAME tzname
+#endif
+
+#ifdef U_HAVE_MMAP
+ /* Use the predefined value. */
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+# define U_HAVE_MMAP 0
+#else
+# define U_HAVE_MMAP 1
+#endif
+
+#ifdef U_HAVE_POPEN
+ /* Use the predefined value. */
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+# define U_HAVE_POPEN 0
+#elif U_PLATFORM == U_PF_OS400
+# define U_HAVE_POPEN 0
+#else
+# define U_HAVE_POPEN 1
+#endif
+
+/**
+ * \def U_HAVE_DIRENT_H
+ * Defines whether dirent.h is available.
+ * @internal
+ */
+#ifdef U_HAVE_DIRENT_H
+ /* Use the predefined value. */
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+# define U_HAVE_DIRENT_H 0
+#else
+# define U_HAVE_DIRENT_H 1
+#endif
+
+/** @} */
+
+/*===========================================================================*/
+/** @{ Programs used by ICU code */
+/*===========================================================================*/
+
+/**
+ * \def U_MAKE_IS_NMAKE
+ * Defines whether the "make" program is Windows nmake.
+ */
+#ifdef U_MAKE_IS_NMAKE
+ /* Use the predefined value. */
+#elif U_PLATFORM == U_PF_WINDOWS
+# define U_MAKE_IS_NMAKE 1
+#else
+# define U_MAKE_IS_NMAKE 0
+#endif
+
+/** @} */
+
+/*==========================================================================*/
+/* Platform utilities */
+/*==========================================================================*/
+
+/**
+ * Platform utilities isolates the platform dependencies of the
+ * library. For each platform which this code is ported to, these
+ * functions may have to be re-implemented.
+ */
+
+/**
+ * Floating point utility to determine if a double is Not a Number (NaN).
+ * @internal
+ */
+U_CAPI UBool U_EXPORT2 uprv_isNaN(double d);
+/**
+ * Floating point utility to determine if a double has an infinite value.
+ * @internal
+ */
+U_CAPI UBool U_EXPORT2 uprv_isInfinite(double d);
+/**
+ * Floating point utility to determine if a double has a positive infinite value.
+ * @internal
+ */
+U_CAPI UBool U_EXPORT2 uprv_isPositiveInfinity(double d);
+/**
+ * Floating point utility to determine if a double has a negative infinite value.
+ * @internal
+ */
+U_CAPI UBool U_EXPORT2 uprv_isNegativeInfinity(double d);
+/**
+ * Floating point utility that returns a Not a Number (NaN) value.
+ * @internal
+ */
+U_CAPI double U_EXPORT2 uprv_getNaN(void);
+/**
+ * Floating point utility that returns an infinite value.
+ * @internal
+ */
+U_CAPI double U_EXPORT2 uprv_getInfinity(void);
+
+/**
+ * Floating point utility to truncate a double.
+ * @internal
+ */
+U_CAPI double U_EXPORT2 uprv_trunc(double d);
+/**
+ * Floating point utility to calculate the floor of a double.
+ * @internal
+ */
+U_CAPI double U_EXPORT2 uprv_floor(double d);
+/**
+ * Floating point utility to calculate the ceiling of a double.
+ * @internal
+ */
+U_CAPI double U_EXPORT2 uprv_ceil(double d);
+/**
+ * Floating point utility to calculate the absolute value of a double.
+ * @internal
+ */
+U_CAPI double U_EXPORT2 uprv_fabs(double d);
+/**
+ * Floating point utility to calculate the fractional and integer parts of a double.
+ * @internal
+ */
+U_CAPI double U_EXPORT2 uprv_modf(double d, double* pinteger);
+/**
+ * Floating point utility to calculate the remainder of a double divided by another double.
+ * @internal
+ */
+U_CAPI double U_EXPORT2 uprv_fmod(double d, double y);
+/**
+ * Floating point utility to calculate d to the power of exponent (d^exponent).
+ * @internal
+ */
+U_CAPI double U_EXPORT2 uprv_pow(double d, double exponent);
+/**
+ * Floating point utility to calculate 10 to the power of exponent (10^exponent).
+ * @internal
+ */
+U_CAPI double U_EXPORT2 uprv_pow10(int32_t exponent);
+/**
+ * Floating point utility to calculate the maximum value of two doubles.
+ * @internal
+ */
+U_CAPI double U_EXPORT2 uprv_fmax(double d, double y);
+/**
+ * Floating point utility to calculate the minimum value of two doubles.
+ * @internal
+ */
+U_CAPI double U_EXPORT2 uprv_fmin(double d, double y);
+/**
+ * Private utility to calculate the maximum value of two integers.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2 uprv_max(int32_t d, int32_t y);
+/**
+ * Private utility to calculate the minimum value of two integers.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2 uprv_min(int32_t d, int32_t y);
+
+#if U_IS_BIG_ENDIAN
+# define uprv_isNegative(number) (*((signed char *)&(number))<0)
+#else
+# define uprv_isNegative(number) (*((signed char *)&(number)+sizeof(number)-1)<0)
+#endif
+
+/**
+ * Return the largest positive number that can be represented by an integer
+ * type of arbitrary bit length.
+ * @internal
+ */
+U_CAPI double U_EXPORT2 uprv_maxMantissa(void);
+
+/**
+ * Floating point utility to calculate the logarithm of a double.
+ * @internal
+ */
+U_CAPI double U_EXPORT2 uprv_log(double d);
+
+/**
+ * Does common notion of rounding e.g. uprv_floor(x + 0.5);
+ * @param x the double number
+ * @return the rounded double
+ * @internal
+ */
+U_CAPI double U_EXPORT2 uprv_round(double x);
+
+/**
+ * Adds the signed integers a and b, storing the result in res.
+ * Checks for signed integer overflow.
+ * Similar to the GCC/Clang extension __builtin_add_overflow
+ *
+ * @param a The first operand.
+ * @param b The second operand.
+ * @param res a + b
+ * @return true if overflow occurred; false if no overflow occurred.
+ * @internal
+ */
+U_CAPI UBool U_EXPORT2 uprv_add32_overflow(int32_t a, int32_t b, int32_t* res);
+
+/**
+ * Multiplies the signed integers a and b, storing the result in res.
+ * Checks for signed integer overflow.
+ * Similar to the GCC/Clang extension __builtin_mul_overflow
+ *
+ * @param a The first multiplicand.
+ * @param b The second multiplicand.
+ * @param res a * b
+ * @return true if overflow occurred; false if no overflow occurred.
+ * @internal
+ */
+U_CAPI UBool U_EXPORT2 uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res);
+
+#if 0
+/**
+ * Returns the number of digits after the decimal point in a double number x.
+ *
+ * @param x the double number
+ * @return the number of digits after the decimal point in a double number x.
+ * @internal
+ */
+/*U_CAPI int32_t U_EXPORT2 uprv_digitsAfterDecimal(double x);*/
+#endif
+
+#if !U_CHARSET_IS_UTF8
+/**
+ * Please use ucnv_getDefaultName() instead.
+ * Return the default codepage for this platform and locale.
+ * This function can call setlocale() on Unix platforms. Please read the
+ * platform documentation on setlocale() before calling this function.
+ * @return the default codepage for this platform
+ * @internal
+ */
+U_CAPI const char* U_EXPORT2 uprv_getDefaultCodepage(void);
+#endif
+
+/**
+ * Please use uloc_getDefault() instead.
+ * Return the default locale ID string by querying the system, or
+ * zero if one cannot be found.
+ * This function can call setlocale() on Unix platforms. Please read the
+ * platform documentation on setlocale() before calling this function.
+ * @return the default locale ID string
+ * @internal
+ */
+U_CAPI const char* U_EXPORT2 uprv_getDefaultLocaleID(void);
+
+/**
+ * Time zone utilities
+ *
+ * Wrappers for C runtime library functions relating to timezones.
+ * The t_tzset() function (similar to tzset) uses the current setting
+ * of the environment variable TZ to assign values to three global
+ * variables: daylight, timezone, and tzname. These variables have the
+ * following meanings, and are declared in &lt;time.h&gt;.
+ *
+ * daylight Nonzero if daylight-saving-time zone (DST) is specified
+ * in TZ; otherwise, 0. Default value is 1.
+ * timezone Difference in seconds between coordinated universal
+ * time and local time. E.g., -28,800 for PST (GMT-8hrs)
+ * tzname(0) Three-letter time-zone name derived from TZ environment
+ * variable. E.g., "PST".
+ * tzname(1) Three-letter DST zone name derived from TZ environment
+ * variable. E.g., "PDT". If DST zone is omitted from TZ,
+ * tzname(1) is an empty string.
+ *
+ * Notes: For example, to set the TZ environment variable to correspond
+ * to the current time zone in Germany, you can use one of the
+ * following statements:
+ *
+ * set TZ=GST1GDT
+ * set TZ=GST+1GDT
+ *
+ * If the TZ value is not set, t_tzset() attempts to use the time zone
+ * information specified by the operating system. Under Windows NT
+ * and Windows 95, this information is specified in the Control Panel's
+ * Date/Time application.
+ * @internal
+ */
+U_CAPI void U_EXPORT2 uprv_tzset(void);
+
+/**
+ * Difference in seconds between coordinated universal
+ * time and local time. E.g., -28,800 for PST (GMT-8hrs)
+ * @return the difference in seconds between coordinated universal time and local time.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2 uprv_timezone(void);
+
+/**
+ * tzname(0) Three-letter time-zone name derived from TZ environment
+ * variable. E.g., "PST".
+ * tzname(1) Three-letter DST zone name derived from TZ environment
+ * variable. E.g., "PDT". If DST zone is omitted from TZ,
+ * tzname(1) is an empty string.
+ * @internal
+ */
+U_CAPI const char* U_EXPORT2 uprv_tzname(int n);
+
+/**
+ * Reset the global tzname cache.
+ * @internal
+ */
+U_CAPI void uprv_tzname_clear_cache(void);
+
+/**
+ * Get UTC (GMT) time measured in milliseconds since 0:00 on 1/1/1970.
+ * This function is affected by 'faketime' and should be the bottleneck for all user-visible ICU time functions.
+ * @return the UTC time measured in milliseconds
+ * @internal
+ */
+U_CAPI UDate U_EXPORT2 uprv_getUTCtime(void);
+
+/**
+ * Get UTC (GMT) time measured in milliseconds since 0:00 on 1/1/1970.
+ * This function is not affected by 'faketime', so it should only be used by low level test functions- not by anything that
+ * exposes time to the end user.
+ * @return the UTC time measured in milliseconds
+ * @internal
+ */
+U_CAPI UDate U_EXPORT2 uprv_getRawUTCtime(void);
+
+/**
+ * Determine whether a pathname is absolute or not, as defined by the platform.
+ * @param path Pathname to test
+ * @return true if the path is absolute
+ * @internal (ICU 3.0)
+ */
+U_CAPI UBool U_EXPORT2 uprv_pathIsAbsolute(const char *path);
+
+/**
+ * Use U_MAX_PTR instead of this function.
+ * @param void pointer to test
+ * @return the largest possible pointer greater than the base
+ * @internal (ICU 3.8)
+ */
+U_CAPI void * U_EXPORT2 uprv_maximumPtr(void *base);
+
+/**
+ * Maximum value of a (void*) - use to indicate the limit of an 'infinite' buffer.
+ * In fact, buffer sizes must not exceed 2GB so that the difference between
+ * the buffer limit and the buffer start can be expressed in an int32_t.
+ *
+ * The definition of U_MAX_PTR must fulfill the following conditions:
+ * - return the largest possible pointer greater than base
+ * - return a valid pointer according to the machine architecture (AS/400, 64-bit, etc.)
+ * - avoid wrapping around at high addresses
+ * - make sure that the returned pointer is not farther from base than 0x7fffffff bytes
+ *
+ * @param base The beginning of a buffer to find the maximum offset from
+ * @internal
+ */
+#ifndef U_MAX_PTR
+# if U_PLATFORM == U_PF_OS390 && !defined(_LP64)
+ /* We have 31-bit pointers. */
+# define U_MAX_PTR(base) ((void *)0x7fffffff)
+# elif U_PLATFORM == U_PF_OS400
+# define U_MAX_PTR(base) uprv_maximumPtr((void *)base)
+# elif 0
+ /*
+ * For platforms where pointers are scalar values (which is normal, but unlike i5/OS)
+ * but that do not define uintptr_t.
+ *
+ * However, this does not work on modern compilers:
+ * The C++ standard does not define pointer overflow, and allows compilers to
+ * assume that p+u>p for any pointer p and any integer u>0.
+ * Thus, modern compilers optimize away the ">" comparison.
+ * (See ICU tickets #7187 and #8096.)
+ */
+# define U_MAX_PTR(base) \
+ ((void *)(((char *)(base)+0x7fffffffu) > (char *)(base) \
+ ? ((char *)(base)+0x7fffffffu) \
+ : (char *)-1))
+# else
+ /* Default version. C++ standard compliant for scalar pointers. */
+# define U_MAX_PTR(base) \
+ ((void *)(((uintptr_t)(base)+0x7fffffffu) > (uintptr_t)(base) \
+ ? ((uintptr_t)(base)+0x7fffffffu) \
+ : (uintptr_t)-1))
+# endif
+#endif
+
+
+#ifdef __cplusplus
+/**
+ * Pin a buffer capacity such that doing pointer arithmetic
+ * on the destination pointer and capacity cannot overflow.
+ *
+ * The pinned capacity must fulfill the following conditions (for positive capacities):
+ * - dest + capacity is a valid pointer according to the machine arcitecture (AS/400, 64-bit, etc.)
+ * - (dest + capacity) >= dest
+ * - The size (in bytes) of T[capacity] does not exceed 0x7fffffff
+ *
+ * @param dest the destination buffer pointer.
+ * @param capacity the requested buffer capacity, in units of type T.
+ * @return the pinned capacity.
+ * @internal
+ */
+template <typename T>
+inline int32_t pinCapacity(T *dest, int32_t capacity) {
+ if (capacity <= 0) { return capacity; }
+
+ uintptr_t destInt = (uintptr_t)dest;
+ uintptr_t maxInt;
+
+# if U_PLATFORM == U_PF_OS390 && !defined(_LP64)
+ // We have 31-bit pointers.
+ maxInt = 0x7fffffff;
+# elif U_PLATFORM == U_PF_OS400
+ maxInt = (uintptr_t)uprv_maximumPtr((void *)dest);
+# else
+ maxInt = destInt + 0x7fffffffu;
+ if (maxInt < destInt) {
+ // Less than 2GB to the end of the address space.
+ // Pin to that to prevent address overflow.
+ maxInt = (uintptr_t)-1;
+ }
+# endif
+
+ uintptr_t maxBytes = maxInt - destInt; // max. 2GB
+ int32_t maxCapacity = (int32_t)(maxBytes / sizeof(T));
+ return capacity <= maxCapacity ? capacity : maxCapacity;
+}
+#endif // __cplusplus
+
+/* Dynamic Library Functions */
+
+typedef void (UVoidFunction)(void);
+
+#if U_ENABLE_DYLOAD
+/**
+ * Load a library
+ * @internal (ICU 4.4)
+ */
+U_CAPI void * U_EXPORT2 uprv_dl_open(const char *libName, UErrorCode *status);
+
+/**
+ * Close a library
+ * @internal (ICU 4.4)
+ */
+U_CAPI void U_EXPORT2 uprv_dl_close( void *lib, UErrorCode *status);
+
+/**
+ * Extract a symbol from a library (function)
+ * @internal (ICU 4.8)
+ */
+U_CAPI UVoidFunction* U_EXPORT2 uprv_dlsym_func( void *lib, const char *symbolName, UErrorCode *status);
+
+/**
+ * Extract a symbol from a library (function)
+ * Not implemented, no clients.
+ * @internal
+ */
+/* U_CAPI void * U_EXPORT2 uprv_dlsym_data( void *lib, const char *symbolName, UErrorCode *status); */
+
+#endif
+
+/**
+ * Define malloc and related functions
+ * @internal
+ */
+#if U_PLATFORM == U_PF_OS400
+# define uprv_default_malloc(x) _C_TS_malloc(x)
+# define uprv_default_realloc(x,y) _C_TS_realloc(x,y)
+# define uprv_default_free(x) _C_TS_free(x)
+/* also _C_TS_calloc(x) */
+#else
+/* C defaults */
+# define uprv_default_malloc(x) malloc(x)
+# define uprv_default_realloc(x,y) realloc(x,y)
+# define uprv_default_free(x) free(x)
+#endif
+
+
+#endif
diff --git a/thirdparty/icu4c/common/rbbi.cpp b/thirdparty/icu4c/common/rbbi.cpp
new file mode 100644
index 0000000000..9b7e70c3cf
--- /dev/null
+++ b/thirdparty/icu4c/common/rbbi.cpp
@@ -0,0 +1,1301 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+***************************************************************************
+* Copyright (C) 1999-2016 International Business Machines Corporation
+* and others. All rights reserved.
+***************************************************************************
+*/
+//
+// file: rbbi.cpp Contains the implementation of the rule based break iterator
+// runtime engine and the API implementation for
+// class RuleBasedBreakIterator
+//
+
+#include "utypeinfo.h" // for 'typeid' to work
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include <cinttypes>
+
+#include "unicode/rbbi.h"
+#include "unicode/schriter.h"
+#include "unicode/uchriter.h"
+#include "unicode/uclean.h"
+#include "unicode/udata.h"
+
+#include "brkeng.h"
+#include "ucln_cmn.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "localsvc.h"
+#include "rbbidata.h"
+#include "rbbi_cache.h"
+#include "rbbirb.h"
+#include "uassert.h"
+#include "umutex.h"
+#include "uvectr32.h"
+
+#ifdef RBBI_DEBUG
+static UBool gTrace = FALSE;
+#endif
+
+U_NAMESPACE_BEGIN
+
+// The state number of the starting state
+constexpr int32_t START_STATE = 1;
+
+// The state-transition value indicating "stop"
+constexpr int32_t STOP_STATE = 0;
+
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedBreakIterator)
+
+
+//=======================================================================
+// constructors
+//=======================================================================
+
+/**
+ * Constructs a RuleBasedBreakIterator that uses the already-created
+ * tables object that is passed in as a parameter.
+ */
+RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status)
+ : fSCharIter(UnicodeString())
+{
+ init(status);
+ fData = new RBBIDataWrapper(data, status); // status checked in constructor
+ if (U_FAILURE(status)) {return;}
+ if(fData == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ if (fData->fForwardTable->fLookAheadResultsSize > 0) {
+ fLookAheadMatches = static_cast<int32_t *>(
+ uprv_malloc(fData->fForwardTable->fLookAheadResultsSize * sizeof(int32_t)));
+ if (fLookAheadMatches == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ }
+}
+
+//
+// Construct from precompiled binary rules (tables). This constructor is public API,
+// taking the rules as a (const uint8_t *) to match the type produced by getBinaryRules().
+//
+RuleBasedBreakIterator::RuleBasedBreakIterator(const uint8_t *compiledRules,
+ uint32_t ruleLength,
+ UErrorCode &status)
+ : fSCharIter(UnicodeString())
+{
+ init(status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+ if (compiledRules == NULL || ruleLength < sizeof(RBBIDataHeader)) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ const RBBIDataHeader *data = (const RBBIDataHeader *)compiledRules;
+ if (data->fLength > ruleLength) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ fData = new RBBIDataWrapper(data, RBBIDataWrapper::kDontAdopt, status);
+ if (U_FAILURE(status)) {return;}
+ if(fData == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ if (fData->fForwardTable->fLookAheadResultsSize > 0) {
+ fLookAheadMatches = static_cast<int32_t *>(
+ uprv_malloc(fData->fForwardTable->fLookAheadResultsSize * sizeof(int32_t)));
+ if (fLookAheadMatches == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ }
+}
+
+
+//-------------------------------------------------------------------------------
+//
+// Constructor from a UDataMemory handle to precompiled break rules
+// stored in an ICU data file.
+//
+//-------------------------------------------------------------------------------
+RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UErrorCode &status)
+ : fSCharIter(UnicodeString())
+{
+ init(status);
+ fData = new RBBIDataWrapper(udm, status); // status checked in constructor
+ if (U_FAILURE(status)) {return;}
+ if(fData == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ if (fData->fForwardTable->fLookAheadResultsSize > 0) {
+ fLookAheadMatches = static_cast<int32_t *>(
+ uprv_malloc(fData->fForwardTable->fLookAheadResultsSize * sizeof(int32_t)));
+ if (fLookAheadMatches == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ }
+}
+
+
+
+//-------------------------------------------------------------------------------
+//
+// Constructor from a set of rules supplied as a string.
+//
+//-------------------------------------------------------------------------------
+RuleBasedBreakIterator::RuleBasedBreakIterator( const UnicodeString &rules,
+ UParseError &parseError,
+ UErrorCode &status)
+ : fSCharIter(UnicodeString())
+{
+ init(status);
+ if (U_FAILURE(status)) {return;}
+ RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)
+ RBBIRuleBuilder::createRuleBasedBreakIterator(rules, &parseError, status);
+ // Note: This is a bit awkward. The RBBI ruleBuilder has a factory method that
+ // creates and returns a complete RBBI. From here, in a constructor, we
+ // can't just return the object created by the builder factory, hence
+ // the assignment of the factory created object to "this".
+ if (U_SUCCESS(status)) {
+ *this = *bi;
+ delete bi;
+ }
+}
+
+
+//-------------------------------------------------------------------------------
+//
+// Default Constructor. Create an empty shell that can be set up later.
+// Used when creating a RuleBasedBreakIterator from a set
+// of rules.
+//-------------------------------------------------------------------------------
+RuleBasedBreakIterator::RuleBasedBreakIterator()
+ : fSCharIter(UnicodeString())
+{
+ UErrorCode status = U_ZERO_ERROR;
+ init(status);
+}
+
+
+//-------------------------------------------------------------------------------
+//
+// Copy constructor. Will produce a break iterator with the same behavior,
+// and which iterates over the same text, as the one passed in.
+//
+//-------------------------------------------------------------------------------
+RuleBasedBreakIterator::RuleBasedBreakIterator(const RuleBasedBreakIterator& other)
+: BreakIterator(other),
+ fSCharIter(UnicodeString())
+{
+ UErrorCode status = U_ZERO_ERROR;
+ this->init(status);
+ *this = other;
+}
+
+
+/**
+ * Destructor
+ */
+RuleBasedBreakIterator::~RuleBasedBreakIterator() {
+ if (fCharIter != &fSCharIter) {
+ // fCharIter was adopted from the outside.
+ delete fCharIter;
+ }
+ fCharIter = nullptr;
+
+ utext_close(&fText);
+
+ if (fData != nullptr) {
+ fData->removeReference();
+ fData = nullptr;
+ }
+ delete fBreakCache;
+ fBreakCache = nullptr;
+
+ delete fDictionaryCache;
+ fDictionaryCache = nullptr;
+
+ delete fLanguageBreakEngines;
+ fLanguageBreakEngines = nullptr;
+
+ delete fUnhandledBreakEngine;
+ fUnhandledBreakEngine = nullptr;
+
+ uprv_free(fLookAheadMatches);
+ fLookAheadMatches = nullptr;
+}
+
+/**
+ * Assignment operator. Sets this iterator to have the same behavior,
+ * and iterate over the same text, as the one passed in.
+ * TODO: needs better handling of memory allocation errors.
+ */
+RuleBasedBreakIterator&
+RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) {
+ if (this == &that) {
+ return *this;
+ }
+ BreakIterator::operator=(that);
+
+ if (fLanguageBreakEngines != NULL) {
+ delete fLanguageBreakEngines;
+ fLanguageBreakEngines = NULL; // Just rebuild for now
+ }
+ // TODO: clone fLanguageBreakEngines from "that"
+ UErrorCode status = U_ZERO_ERROR;
+ utext_clone(&fText, &that.fText, FALSE, TRUE, &status);
+
+ if (fCharIter != &fSCharIter) {
+ delete fCharIter;
+ }
+ fCharIter = &fSCharIter;
+
+ if (that.fCharIter != NULL && that.fCharIter != &that.fSCharIter) {
+ // This is a little bit tricky - it will intially appear that
+ // this->fCharIter is adopted, even if that->fCharIter was
+ // not adopted. That's ok.
+ fCharIter = that.fCharIter->clone();
+ }
+ fSCharIter = that.fSCharIter;
+ if (fCharIter == NULL) {
+ fCharIter = &fSCharIter;
+ }
+
+ if (fData != NULL) {
+ fData->removeReference();
+ fData = NULL;
+ }
+ if (that.fData != NULL) {
+ fData = that.fData->addReference();
+ }
+
+ uprv_free(fLookAheadMatches);
+ fLookAheadMatches = nullptr;
+ if (fData && fData->fForwardTable->fLookAheadResultsSize > 0) {
+ fLookAheadMatches = static_cast<int32_t *>(
+ uprv_malloc(fData->fForwardTable->fLookAheadResultsSize * sizeof(int32_t)));
+ }
+
+
+ fPosition = that.fPosition;
+ fRuleStatusIndex = that.fRuleStatusIndex;
+ fDone = that.fDone;
+
+ // TODO: both the dictionary and the main cache need to be copied.
+ // Current position could be within a dictionary range. Trying to continue
+ // the iteration without the caches present would go to the rules, with
+ // the assumption that the current position is on a rule boundary.
+ fBreakCache->reset(fPosition, fRuleStatusIndex);
+ fDictionaryCache->reset();
+
+ return *this;
+}
+
+
+
+//-----------------------------------------------------------------------------
+//
+// init() Shared initialization routine. Used by all the constructors.
+// Initializes all fields, leaving the object in a consistent state.
+//
+//-----------------------------------------------------------------------------
+void RuleBasedBreakIterator::init(UErrorCode &status) {
+ fCharIter = nullptr;
+ fData = nullptr;
+ fPosition = 0;
+ fRuleStatusIndex = 0;
+ fDone = false;
+ fDictionaryCharCount = 0;
+ fLanguageBreakEngines = nullptr;
+ fUnhandledBreakEngine = nullptr;
+ fBreakCache = nullptr;
+ fDictionaryCache = nullptr;
+ fLookAheadMatches = nullptr;
+
+ // Note: IBM xlC is unable to assign or initialize member fText from UTEXT_INITIALIZER.
+ // fText = UTEXT_INITIALIZER;
+ static const UText initializedUText = UTEXT_INITIALIZER;
+ uprv_memcpy(&fText, &initializedUText, sizeof(UText));
+
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ utext_openUChars(&fText, NULL, 0, &status);
+ fDictionaryCache = new DictionaryCache(this, status);
+ fBreakCache = new BreakCache(this, status);
+ if (U_SUCCESS(status) && (fDictionaryCache == NULL || fBreakCache == NULL)) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+
+#ifdef RBBI_DEBUG
+ static UBool debugInitDone = FALSE;
+ if (debugInitDone == FALSE) {
+ char *debugEnv = getenv("U_RBBIDEBUG");
+ if (debugEnv && uprv_strstr(debugEnv, "trace")) {
+ gTrace = TRUE;
+ }
+ debugInitDone = TRUE;
+ }
+#endif
+}
+
+
+
+//-----------------------------------------------------------------------------
+//
+// clone - Returns a newly-constructed RuleBasedBreakIterator with the same
+// behavior, and iterating over the same text, as this one.
+// Virtual function: does the right thing with subclasses.
+//
+//-----------------------------------------------------------------------------
+RuleBasedBreakIterator*
+RuleBasedBreakIterator::clone() const {
+ return new RuleBasedBreakIterator(*this);
+}
+
+/**
+ * Equality operator. Returns TRUE if both BreakIterators are of the
+ * same class, have the same behavior, and iterate over the same text.
+ */
+UBool
+RuleBasedBreakIterator::operator==(const BreakIterator& that) const {
+ if (typeid(*this) != typeid(that)) {
+ return FALSE;
+ }
+ if (this == &that) {
+ return TRUE;
+ }
+
+ // The base class BreakIterator carries no state that participates in equality,
+ // and does not implement an equality function that would otherwise be
+ // checked at this point.
+
+ const RuleBasedBreakIterator& that2 = (const RuleBasedBreakIterator&) that;
+
+ if (!utext_equals(&fText, &that2.fText)) {
+ // The two break iterators are operating on different text,
+ // or have a different iteration position.
+ // Note that fText's position is always the same as the break iterator's position.
+ return FALSE;
+ }
+
+ if (!(fPosition == that2.fPosition &&
+ fRuleStatusIndex == that2.fRuleStatusIndex &&
+ fDone == that2.fDone)) {
+ return FALSE;
+ }
+
+ if (that2.fData == fData ||
+ (fData != NULL && that2.fData != NULL && *that2.fData == *fData)) {
+ // The two break iterators are using the same rules.
+ return TRUE;
+ }
+ return FALSE;
+}
+
+/**
+ * Compute a hash code for this BreakIterator
+ * @return A hash code
+ */
+int32_t
+RuleBasedBreakIterator::hashCode(void) const {
+ int32_t hash = 0;
+ if (fData != NULL) {
+ hash = fData->hashCode();
+ }
+ return hash;
+}
+
+
+void RuleBasedBreakIterator::setText(UText *ut, UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ fBreakCache->reset();
+ fDictionaryCache->reset();
+ utext_clone(&fText, ut, FALSE, TRUE, &status);
+
+ // Set up a dummy CharacterIterator to be returned if anyone
+ // calls getText(). With input from UText, there is no reasonable
+ // way to return a characterIterator over the actual input text.
+ // Return one over an empty string instead - this is the closest
+ // we can come to signaling a failure.
+ // (GetText() is obsolete, this failure is sort of OK)
+ fSCharIter.setText(UnicodeString());
+
+ if (fCharIter != &fSCharIter) {
+ // existing fCharIter was adopted from the outside. Delete it now.
+ delete fCharIter;
+ }
+ fCharIter = &fSCharIter;
+
+ this->first();
+}
+
+
+UText *RuleBasedBreakIterator::getUText(UText *fillIn, UErrorCode &status) const {
+ UText *result = utext_clone(fillIn, &fText, FALSE, TRUE, &status);
+ return result;
+}
+
+
+//=======================================================================
+// BreakIterator overrides
+//=======================================================================
+
+/**
+ * Return a CharacterIterator over the text being analyzed.
+ */
+CharacterIterator&
+RuleBasedBreakIterator::getText() const {
+ return *fCharIter;
+}
+
+/**
+ * Set the iterator to analyze a new piece of text. This function resets
+ * the current iteration position to the beginning of the text.
+ * @param newText An iterator over the text to analyze.
+ */
+void
+RuleBasedBreakIterator::adoptText(CharacterIterator* newText) {
+ // If we are holding a CharacterIterator adopted from a
+ // previous call to this function, delete it now.
+ if (fCharIter != &fSCharIter) {
+ delete fCharIter;
+ }
+
+ fCharIter = newText;
+ UErrorCode status = U_ZERO_ERROR;
+ fBreakCache->reset();
+ fDictionaryCache->reset();
+ if (newText==NULL || newText->startIndex() != 0) {
+ // startIndex !=0 wants to be an error, but there's no way to report it.
+ // Make the iterator text be an empty string.
+ utext_openUChars(&fText, NULL, 0, &status);
+ } else {
+ utext_openCharacterIterator(&fText, newText, &status);
+ }
+ this->first();
+}
+
+/**
+ * Set the iterator to analyze a new piece of text. This function resets
+ * the current iteration position to the beginning of the text.
+ * @param newText An iterator over the text to analyze.
+ */
+void
+RuleBasedBreakIterator::setText(const UnicodeString& newText) {
+ UErrorCode status = U_ZERO_ERROR;
+ fBreakCache->reset();
+ fDictionaryCache->reset();
+ utext_openConstUnicodeString(&fText, &newText, &status);
+
+ // Set up a character iterator on the string.
+ // Needed in case someone calls getText().
+ // Can not, unfortunately, do this lazily on the (probably never)
+ // call to getText(), because getText is const.
+ fSCharIter.setText(newText);
+
+ if (fCharIter != &fSCharIter) {
+ // old fCharIter was adopted from the outside. Delete it.
+ delete fCharIter;
+ }
+ fCharIter = &fSCharIter;
+
+ this->first();
+}
+
+
+/**
+ * Provide a new UText for the input text. Must reference text with contents identical
+ * to the original.
+ * Intended for use with text data originating in Java (garbage collected) environments
+ * where the data may be moved in memory at arbitrary times.
+ */
+RuleBasedBreakIterator &RuleBasedBreakIterator::refreshInputText(UText *input, UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return *this;
+ }
+ if (input == NULL) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return *this;
+ }
+ int64_t pos = utext_getNativeIndex(&fText);
+ // Shallow read-only clone of the new UText into the existing input UText
+ utext_clone(&fText, input, FALSE, TRUE, &status);
+ if (U_FAILURE(status)) {
+ return *this;
+ }
+ utext_setNativeIndex(&fText, pos);
+ if (utext_getNativeIndex(&fText) != pos) {
+ // Sanity check. The new input utext is supposed to have the exact same
+ // contents as the old. If we can't set to the same position, it doesn't.
+ // The contents underlying the old utext might be invalid at this point,
+ // so it's not safe to check directly.
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return *this;
+}
+
+
+/**
+ * Sets the current iteration position to the beginning of the text, position zero.
+ * @return The new iterator position, which is zero.
+ */
+int32_t RuleBasedBreakIterator::first(void) {
+ UErrorCode status = U_ZERO_ERROR;
+ if (!fBreakCache->seek(0)) {
+ fBreakCache->populateNear(0, status);
+ }
+ fBreakCache->current();
+ U_ASSERT(fPosition == 0);
+ return 0;
+}
+
+/**
+ * Sets the current iteration position to the end of the text.
+ * @return The text's past-the-end offset.
+ */
+int32_t RuleBasedBreakIterator::last(void) {
+ int32_t endPos = (int32_t)utext_nativeLength(&fText);
+ UBool endShouldBeBoundary = isBoundary(endPos); // Has side effect of setting iterator position.
+ (void)endShouldBeBoundary;
+ U_ASSERT(endShouldBeBoundary);
+ U_ASSERT(fPosition == endPos);
+ return endPos;
+}
+
+/**
+ * Advances the iterator either forward or backward the specified number of steps.
+ * Negative values move backward, and positive values move forward. This is
+ * equivalent to repeatedly calling next() or previous().
+ * @param n The number of steps to move. The sign indicates the direction
+ * (negative is backwards, and positive is forwards).
+ * @return The character offset of the boundary position n boundaries away from
+ * the current one.
+ */
+int32_t RuleBasedBreakIterator::next(int32_t n) {
+ int32_t result = 0;
+ if (n > 0) {
+ for (; n > 0 && result != UBRK_DONE; --n) {
+ result = next();
+ }
+ } else if (n < 0) {
+ for (; n < 0 && result != UBRK_DONE; ++n) {
+ result = previous();
+ }
+ } else {
+ result = current();
+ }
+ return result;
+}
+
+/**
+ * Advances the iterator to the next boundary position.
+ * @return The position of the first boundary after this one.
+ */
+int32_t RuleBasedBreakIterator::next(void) {
+ fBreakCache->next();
+ return fDone ? UBRK_DONE : fPosition;
+}
+
+/**
+ * Move the iterator backwards, to the boundary preceding the current one.
+ *
+ * Starts from the current position within fText.
+ * Starting position need not be on a boundary.
+ *
+ * @return The position of the boundary position immediately preceding the starting position.
+ */
+int32_t RuleBasedBreakIterator::previous(void) {
+ UErrorCode status = U_ZERO_ERROR;
+ fBreakCache->previous(status);
+ return fDone ? UBRK_DONE : fPosition;
+}
+
+/**
+ * Sets the iterator to refer to the first boundary position following
+ * the specified position.
+ * @param startPos The position from which to begin searching for a break position.
+ * @return The position of the first break after the current position.
+ */
+int32_t RuleBasedBreakIterator::following(int32_t startPos) {
+ // if the supplied position is before the beginning, return the
+ // text's starting offset
+ if (startPos < 0) {
+ return first();
+ }
+
+ // Move requested offset to a code point start. It might be on a trail surrogate,
+ // or on a trail byte if the input is UTF-8. Or it may be beyond the end of the text.
+ utext_setNativeIndex(&fText, startPos);
+ startPos = (int32_t)utext_getNativeIndex(&fText);
+
+ UErrorCode status = U_ZERO_ERROR;
+ fBreakCache->following(startPos, status);
+ return fDone ? UBRK_DONE : fPosition;
+}
+
+/**
+ * Sets the iterator to refer to the last boundary position before the
+ * specified position.
+ * @param offset The position to begin searching for a break from.
+ * @return The position of the last boundary before the starting position.
+ */
+int32_t RuleBasedBreakIterator::preceding(int32_t offset) {
+ if (offset > utext_nativeLength(&fText)) {
+ return last();
+ }
+
+ // Move requested offset to a code point start. It might be on a trail surrogate,
+ // or on a trail byte if the input is UTF-8.
+
+ utext_setNativeIndex(&fText, offset);
+ int32_t adjustedOffset = static_cast<int32_t>(utext_getNativeIndex(&fText));
+
+ UErrorCode status = U_ZERO_ERROR;
+ fBreakCache->preceding(adjustedOffset, status);
+ return fDone ? UBRK_DONE : fPosition;
+}
+
+/**
+ * Returns true if the specfied position is a boundary position. As a side
+ * effect, leaves the iterator pointing to the first boundary position at
+ * or after "offset".
+ *
+ * @param offset the offset to check.
+ * @return True if "offset" is a boundary position.
+ */
+UBool RuleBasedBreakIterator::isBoundary(int32_t offset) {
+ // out-of-range indexes are never boundary positions
+ if (offset < 0) {
+ first(); // For side effects on current position, tag values.
+ return FALSE;
+ }
+
+ // Adjust offset to be on a code point boundary and not beyond the end of the text.
+ // Note that isBoundary() is always false for offsets that are not on code point boundaries.
+ // But we still need the side effect of leaving iteration at the following boundary.
+
+ utext_setNativeIndex(&fText, offset);
+ int32_t adjustedOffset = static_cast<int32_t>(utext_getNativeIndex(&fText));
+
+ bool result = false;
+ UErrorCode status = U_ZERO_ERROR;
+ if (fBreakCache->seek(adjustedOffset) || fBreakCache->populateNear(adjustedOffset, status)) {
+ result = (fBreakCache->current() == offset);
+ }
+
+ if (result && adjustedOffset < offset && utext_char32At(&fText, offset) == U_SENTINEL) {
+ // Original offset is beyond the end of the text. Return FALSE, it's not a boundary,
+ // but the iteration position remains set to the end of the text, which is a boundary.
+ return FALSE;
+ }
+ if (!result) {
+ // Not on a boundary. isBoundary() must leave iterator on the following boundary.
+ // Cache->seek(), above, left us on the preceding boundary, so advance one.
+ next();
+ }
+ return result;
+}
+
+
+/**
+ * Returns the current iteration position.
+ * @return The current iteration position.
+ */
+int32_t RuleBasedBreakIterator::current(void) const {
+ return fPosition;
+}
+
+
+//=======================================================================
+// implementation
+//=======================================================================
+
+//
+// RBBIRunMode - the state machine runs an extra iteration at the beginning and end
+// of user text. A variable with this enum type keeps track of where we
+// are. The state machine only fetches user input while in the RUN mode.
+//
+enum RBBIRunMode {
+ RBBI_START, // state machine processing is before first char of input
+ RBBI_RUN, // state machine processing is in the user text
+ RBBI_END // state machine processing is after end of user text.
+};
+
+
+// Wrapper functions to select the appropriate handleNext() or handleSafePrevious()
+// instantiation, based on whether an 8 or 16 bit table is required.
+//
+// These Trie access functions will be inlined within the handleNext()/Previous() instantions.
+static inline uint16_t TrieFunc8(const UCPTrie *trie, UChar32 c) {
+ return UCPTRIE_FAST_GET(trie, UCPTRIE_8, c);
+}
+
+static inline uint16_t TrieFunc16(const UCPTrie *trie, UChar32 c) {
+ return UCPTRIE_FAST_GET(trie, UCPTRIE_16, c);
+}
+
+int32_t RuleBasedBreakIterator::handleNext() {
+ const RBBIStateTable *statetable = fData->fForwardTable;
+ bool use8BitsTrie = ucptrie_getValueWidth(fData->fTrie) == UCPTRIE_VALUE_BITS_8;
+ if (statetable->fFlags & RBBI_8BITS_ROWS) {
+ if (use8BitsTrie) {
+ return handleNext<RBBIStateTableRow8, TrieFunc8>();
+ } else {
+ return handleNext<RBBIStateTableRow8, TrieFunc16>();
+ }
+ } else {
+ if (use8BitsTrie) {
+ return handleNext<RBBIStateTableRow16, TrieFunc8>();
+ } else {
+ return handleNext<RBBIStateTableRow16, TrieFunc16>();
+ }
+ }
+}
+
+int32_t RuleBasedBreakIterator::handleSafePrevious(int32_t fromPosition) {
+ const RBBIStateTable *statetable = fData->fReverseTable;
+ bool use8BitsTrie = ucptrie_getValueWidth(fData->fTrie) == UCPTRIE_VALUE_BITS_8;
+ if (statetable->fFlags & RBBI_8BITS_ROWS) {
+ if (use8BitsTrie) {
+ return handleSafePrevious<RBBIStateTableRow8, TrieFunc8>(fromPosition);
+ } else {
+ return handleSafePrevious<RBBIStateTableRow8, TrieFunc16>(fromPosition);
+ }
+ } else {
+ if (use8BitsTrie) {
+ return handleSafePrevious<RBBIStateTableRow16, TrieFunc8>(fromPosition);
+ } else {
+ return handleSafePrevious<RBBIStateTableRow16, TrieFunc16>(fromPosition);
+ }
+ }
+}
+
+
+//-----------------------------------------------------------------------------------
+//
+// handleNext()
+// Run the state machine to find a boundary
+//
+//-----------------------------------------------------------------------------------
+template <typename RowType, RuleBasedBreakIterator::PTrieFunc trieFunc>
+int32_t RuleBasedBreakIterator::handleNext() {
+ int32_t state;
+ uint16_t category = 0;
+ RBBIRunMode mode;
+
+ RowType *row;
+ UChar32 c;
+ int32_t result = 0;
+ int32_t initialPosition = 0;
+ const RBBIStateTable *statetable = fData->fForwardTable;
+ const char *tableData = statetable->fTableData;
+ uint32_t tableRowLen = statetable->fRowLen;
+ uint32_t dictStart = statetable->fDictCategoriesStart;
+ #ifdef RBBI_DEBUG
+ if (gTrace) {
+ RBBIDebugPuts("Handle Next pos char state category");
+ }
+ #endif
+
+ // handleNext alway sets the break tag value.
+ // Set the default for it.
+ fRuleStatusIndex = 0;
+
+ fDictionaryCharCount = 0;
+
+ // if we're already at the end of the text, return DONE.
+ initialPosition = fPosition;
+ UTEXT_SETNATIVEINDEX(&fText, initialPosition);
+ result = initialPosition;
+ c = UTEXT_NEXT32(&fText);
+ if (c==U_SENTINEL) {
+ fDone = TRUE;
+ return UBRK_DONE;
+ }
+
+ // Set the initial state for the state machine
+ state = START_STATE;
+ row = (RowType *)
+ //(statetable->fTableData + (statetable->fRowLen * state));
+ (tableData + tableRowLen * state);
+
+
+ mode = RBBI_RUN;
+ if (statetable->fFlags & RBBI_BOF_REQUIRED) {
+ category = 2;
+ mode = RBBI_START;
+ }
+
+
+ // loop until we reach the end of the text or transition to state 0
+ //
+ for (;;) {
+ if (c == U_SENTINEL) {
+ // Reached end of input string.
+ if (mode == RBBI_END) {
+ // We have already run the loop one last time with the
+ // character set to the psueudo {eof} value. Now it is time
+ // to unconditionally bail out.
+ break;
+ }
+ // Run the loop one last time with the fake end-of-input character category.
+ mode = RBBI_END;
+ category = 1;
+ }
+
+ //
+ // Get the char category. An incoming category of 1 or 2 means that
+ // we are preset for doing the beginning or end of input, and
+ // that we shouldn't get a category from an actual text input character.
+ //
+ if (mode == RBBI_RUN) {
+ // look up the current character's character category, which tells us
+ // which column in the state table to look at.
+ category = trieFunc(fData->fTrie, c);
+ fDictionaryCharCount += (category >= dictStart);
+ }
+
+ #ifdef RBBI_DEBUG
+ if (gTrace) {
+ RBBIDebugPrintf(" %4" PRId64 " ", utext_getNativeIndex(&fText));
+ if (0x20<=c && c<0x7f) {
+ RBBIDebugPrintf("\"%c\" ", c);
+ } else {
+ RBBIDebugPrintf("%5x ", c);
+ }
+ RBBIDebugPrintf("%3d %3d\n", state, category);
+ }
+ #endif
+
+ // State Transition - move machine to its next state
+ //
+
+ // fNextState is a variable-length array.
+ U_ASSERT(category<fData->fHeader->fCatCount);
+ state = row->fNextState[category]; /*Not accessing beyond memory*/
+ row = (RowType *)
+ // (statetable->fTableData + (statetable->fRowLen * state));
+ (tableData + tableRowLen * state);
+
+
+ uint16_t accepting = row->fAccepting;
+ if (accepting == ACCEPTING_UNCONDITIONAL) {
+ // Match found, common case.
+ if (mode != RBBI_START) {
+ result = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
+ }
+ fRuleStatusIndex = row->fTagsIdx; // Remember the break status (tag) values.
+ } else if (accepting > ACCEPTING_UNCONDITIONAL) {
+ // Lookahead match is completed.
+ U_ASSERT(accepting < fData->fForwardTable->fLookAheadResultsSize);
+ int32_t lookaheadResult = fLookAheadMatches[accepting];
+ if (lookaheadResult >= 0) {
+ fRuleStatusIndex = row->fTagsIdx;
+ fPosition = lookaheadResult;
+ return lookaheadResult;
+ }
+ }
+
+ // If we are at the position of the '/' in a look-ahead (hard break) rule;
+ // record the current position, to be returned later, if the full rule matches.
+ // TODO: Move this check before the previous check of fAccepting.
+ // This would enable hard-break rules with no following context.
+ // But there are line break test failures when trying this. Investigate.
+ // Issue ICU-20837
+ uint16_t rule = row->fLookAhead;
+ U_ASSERT(rule == 0 || rule > ACCEPTING_UNCONDITIONAL);
+ U_ASSERT(rule == 0 || rule < fData->fForwardTable->fLookAheadResultsSize);
+ if (rule > ACCEPTING_UNCONDITIONAL) {
+ int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
+ fLookAheadMatches[rule] = pos;
+ }
+
+ if (state == STOP_STATE) {
+ // This is the normal exit from the lookup state machine.
+ // We have advanced through the string until it is certain that no
+ // longer match is possible, no matter what characters follow.
+ break;
+ }
+
+ // Advance to the next character.
+ // If this is a beginning-of-input loop iteration, don't advance
+ // the input position. The next iteration will be processing the
+ // first real input character.
+ if (mode == RBBI_RUN) {
+ c = UTEXT_NEXT32(&fText);
+ } else {
+ if (mode == RBBI_START) {
+ mode = RBBI_RUN;
+ }
+ }
+ }
+
+ // The state machine is done. Check whether it found a match...
+
+ // If the iterator failed to advance in the match engine, force it ahead by one.
+ // (This really indicates a defect in the break rules. They should always match
+ // at least one character.)
+ if (result == initialPosition) {
+ utext_setNativeIndex(&fText, initialPosition);
+ utext_next32(&fText);
+ result = (int32_t)utext_getNativeIndex(&fText);
+ fRuleStatusIndex = 0;
+ }
+
+ // Leave the iterator at our result position.
+ fPosition = result;
+ #ifdef RBBI_DEBUG
+ if (gTrace) {
+ RBBIDebugPrintf("result = %d\n\n", result);
+ }
+ #endif
+ return result;
+}
+
+
+//-----------------------------------------------------------------------------------
+//
+// handleSafePrevious()
+//
+// Iterate backwards using the safe reverse rules.
+// The logic of this function is similar to handleNext(), but simpler
+// because the safe table does not require as many options.
+//
+//-----------------------------------------------------------------------------------
+template <typename RowType, RuleBasedBreakIterator::PTrieFunc trieFunc>
+int32_t RuleBasedBreakIterator::handleSafePrevious(int32_t fromPosition) {
+
+ int32_t state;
+ uint16_t category = 0;
+ RowType *row;
+ UChar32 c;
+ int32_t result = 0;
+
+ const RBBIStateTable *stateTable = fData->fReverseTable;
+ UTEXT_SETNATIVEINDEX(&fText, fromPosition);
+ #ifdef RBBI_DEBUG
+ if (gTrace) {
+ RBBIDebugPuts("Handle Previous pos char state category");
+ }
+ #endif
+
+ // if we're already at the start of the text, return DONE.
+ if (fData == NULL || UTEXT_GETNATIVEINDEX(&fText)==0) {
+ return BreakIterator::DONE;
+ }
+
+ // Set the initial state for the state machine
+ c = UTEXT_PREVIOUS32(&fText);
+ state = START_STATE;
+ row = (RowType *)
+ (stateTable->fTableData + (stateTable->fRowLen * state));
+
+ // loop until we reach the start of the text or transition to state 0
+ //
+ for (; c != U_SENTINEL; c = UTEXT_PREVIOUS32(&fText)) {
+
+ // look up the current character's character category, which tells us
+ // which column in the state table to look at.
+ //
+ // Off the dictionary flag bit. For reverse iteration it is not used.
+ category = trieFunc(fData->fTrie, c);
+
+ #ifdef RBBI_DEBUG
+ if (gTrace) {
+ RBBIDebugPrintf(" %4d ", (int32_t)utext_getNativeIndex(&fText));
+ if (0x20<=c && c<0x7f) {
+ RBBIDebugPrintf("\"%c\" ", c);
+ } else {
+ RBBIDebugPrintf("%5x ", c);
+ }
+ RBBIDebugPrintf("%3d %3d\n", state, category);
+ }
+ #endif
+
+ // State Transition - move machine to its next state
+ //
+ // fNextState is a variable-length array.
+ U_ASSERT(category<fData->fHeader->fCatCount);
+ state = row->fNextState[category]; /*Not accessing beyond memory*/
+ row = (RowType *)
+ (stateTable->fTableData + (stateTable->fRowLen * state));
+
+ if (state == STOP_STATE) {
+ // This is the normal exit from the lookup state machine.
+ // Transistion to state zero means we have found a safe point.
+ break;
+ }
+ }
+
+ // The state machine is done. Check whether it found a match...
+ result = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
+ #ifdef RBBI_DEBUG
+ if (gTrace) {
+ RBBIDebugPrintf("result = %d\n\n", result);
+ }
+ #endif
+ return result;
+}
+
+
+//-------------------------------------------------------------------------------
+//
+// getRuleStatus() Return the break rule tag associated with the current
+// iterator position. If the iterator arrived at its current
+// position by iterating forwards, the value will have been
+// cached by the handleNext() function.
+//
+//-------------------------------------------------------------------------------
+
+int32_t RuleBasedBreakIterator::getRuleStatus() const {
+
+ // fLastRuleStatusIndex indexes to the start of the appropriate status record
+ // (the number of status values.)
+ // This function returns the last (largest) of the array of status values.
+ int32_t idx = fRuleStatusIndex + fData->fRuleStatusTable[fRuleStatusIndex];
+ int32_t tagVal = fData->fRuleStatusTable[idx];
+
+ return tagVal;
+}
+
+
+int32_t RuleBasedBreakIterator::getRuleStatusVec(
+ int32_t *fillInVec, int32_t capacity, UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return 0;
+ }
+
+ int32_t numVals = fData->fRuleStatusTable[fRuleStatusIndex];
+ int32_t numValsToCopy = numVals;
+ if (numVals > capacity) {
+ status = U_BUFFER_OVERFLOW_ERROR;
+ numValsToCopy = capacity;
+ }
+ int i;
+ for (i=0; i<numValsToCopy; i++) {
+ fillInVec[i] = fData->fRuleStatusTable[fRuleStatusIndex + i + 1];
+ }
+ return numVals;
+}
+
+
+
+//-------------------------------------------------------------------------------
+//
+// getBinaryRules Access to the compiled form of the rules,
+// for use by build system tools that save the data
+// for standard iterator types.
+//
+//-------------------------------------------------------------------------------
+const uint8_t *RuleBasedBreakIterator::getBinaryRules(uint32_t &length) {
+ const uint8_t *retPtr = NULL;
+ length = 0;
+
+ if (fData != NULL) {
+ retPtr = (const uint8_t *)fData->fHeader;
+ length = fData->fHeader->fLength;
+ }
+ return retPtr;
+}
+
+
+RuleBasedBreakIterator *RuleBasedBreakIterator::createBufferClone(
+ void * /*stackBuffer*/, int32_t &bufferSize, UErrorCode &status) {
+ if (U_FAILURE(status)){
+ return NULL;
+ }
+
+ if (bufferSize == 0) {
+ bufferSize = 1; // preflighting for deprecated functionality
+ return NULL;
+ }
+
+ BreakIterator *clonedBI = clone();
+ if (clonedBI == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ status = U_SAFECLONE_ALLOCATED_WARNING;
+ }
+ return (RuleBasedBreakIterator *)clonedBI;
+}
+
+U_NAMESPACE_END
+
+
+static icu::UStack *gLanguageBreakFactories = nullptr;
+static const icu::UnicodeString *gEmptyString = nullptr;
+static icu::UInitOnce gLanguageBreakFactoriesInitOnce = U_INITONCE_INITIALIZER;
+static icu::UInitOnce gRBBIInitOnce = U_INITONCE_INITIALIZER;
+
+/**
+ * Release all static memory held by breakiterator.
+ */
+U_CDECL_BEGIN
+UBool U_CALLCONV rbbi_cleanup(void) {
+ delete gLanguageBreakFactories;
+ gLanguageBreakFactories = nullptr;
+ delete gEmptyString;
+ gEmptyString = nullptr;
+ gLanguageBreakFactoriesInitOnce.reset();
+ gRBBIInitOnce.reset();
+ return TRUE;
+}
+U_CDECL_END
+
+U_CDECL_BEGIN
+static void U_CALLCONV _deleteFactory(void *obj) {
+ delete (icu::LanguageBreakFactory *) obj;
+}
+U_CDECL_END
+U_NAMESPACE_BEGIN
+
+static void U_CALLCONV rbbiInit() {
+ gEmptyString = new UnicodeString();
+ ucln_common_registerCleanup(UCLN_COMMON_RBBI, rbbi_cleanup);
+}
+
+static void U_CALLCONV initLanguageFactories() {
+ UErrorCode status = U_ZERO_ERROR;
+ U_ASSERT(gLanguageBreakFactories == NULL);
+ gLanguageBreakFactories = new UStack(_deleteFactory, NULL, status);
+ if (gLanguageBreakFactories != NULL && U_SUCCESS(status)) {
+ ICULanguageBreakFactory *builtIn = new ICULanguageBreakFactory(status);
+ gLanguageBreakFactories->push(builtIn, status);
+#ifdef U_LOCAL_SERVICE_HOOK
+ LanguageBreakFactory *extra = (LanguageBreakFactory *)uprv_svc_hook("languageBreakFactory", &status);
+ if (extra != NULL) {
+ gLanguageBreakFactories->push(extra, status);
+ }
+#endif
+ }
+ ucln_common_registerCleanup(UCLN_COMMON_RBBI, rbbi_cleanup);
+}
+
+
+static const LanguageBreakEngine*
+getLanguageBreakEngineFromFactory(UChar32 c)
+{
+ umtx_initOnce(gLanguageBreakFactoriesInitOnce, &initLanguageFactories);
+ if (gLanguageBreakFactories == NULL) {
+ return NULL;
+ }
+
+ int32_t i = gLanguageBreakFactories->size();
+ const LanguageBreakEngine *lbe = NULL;
+ while (--i >= 0) {
+ LanguageBreakFactory *factory = (LanguageBreakFactory *)(gLanguageBreakFactories->elementAt(i));
+ lbe = factory->getEngineFor(c);
+ if (lbe != NULL) {
+ break;
+ }
+ }
+ return lbe;
+}
+
+
+//-------------------------------------------------------------------------------
+//
+// getLanguageBreakEngine Find an appropriate LanguageBreakEngine for the
+// the character c.
+//
+//-------------------------------------------------------------------------------
+const LanguageBreakEngine *
+RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) {
+ const LanguageBreakEngine *lbe = NULL;
+ UErrorCode status = U_ZERO_ERROR;
+
+ if (fLanguageBreakEngines == NULL) {
+ fLanguageBreakEngines = new UStack(status);
+ if (fLanguageBreakEngines == NULL || U_FAILURE(status)) {
+ delete fLanguageBreakEngines;
+ fLanguageBreakEngines = 0;
+ return NULL;
+ }
+ }
+
+ int32_t i = fLanguageBreakEngines->size();
+ while (--i >= 0) {
+ lbe = (const LanguageBreakEngine *)(fLanguageBreakEngines->elementAt(i));
+ if (lbe->handles(c)) {
+ return lbe;
+ }
+ }
+
+ // No existing dictionary took the character. See if a factory wants to
+ // give us a new LanguageBreakEngine for this character.
+ lbe = getLanguageBreakEngineFromFactory(c);
+
+ // If we got one, use it and push it on our stack.
+ if (lbe != NULL) {
+ fLanguageBreakEngines->push((void *)lbe, status);
+ // Even if we can't remember it, we can keep looking it up, so
+ // return it even if the push fails.
+ return lbe;
+ }
+
+ // No engine is forthcoming for this character. Add it to the
+ // reject set. Create the reject break engine if needed.
+ if (fUnhandledBreakEngine == NULL) {
+ fUnhandledBreakEngine = new UnhandledEngine(status);
+ if (U_SUCCESS(status) && fUnhandledBreakEngine == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return nullptr;
+ }
+ // Put it last so that scripts for which we have an engine get tried
+ // first.
+ fLanguageBreakEngines->insertElementAt(fUnhandledBreakEngine, 0, status);
+ // If we can't insert it, or creation failed, get rid of it
+ if (U_FAILURE(status)) {
+ delete fUnhandledBreakEngine;
+ fUnhandledBreakEngine = 0;
+ return NULL;
+ }
+ }
+
+ // Tell the reject engine about the character; at its discretion, it may
+ // add more than just the one character.
+ fUnhandledBreakEngine->handleCharacter(c);
+
+ return fUnhandledBreakEngine;
+}
+
+void RuleBasedBreakIterator::dumpCache() {
+ fBreakCache->dumpCache();
+}
+
+void RuleBasedBreakIterator::dumpTables() {
+ fData->printData();
+}
+
+/**
+ * Returns the description used to create this iterator
+ */
+
+const UnicodeString&
+RuleBasedBreakIterator::getRules() const {
+ if (fData != NULL) {
+ return fData->getRuleSourceString();
+ } else {
+ umtx_initOnce(gRBBIInitOnce, &rbbiInit);
+ return *gEmptyString;
+ }
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
diff --git a/thirdparty/icu4c/common/rbbi_cache.cpp b/thirdparty/icu4c/common/rbbi_cache.cpp
new file mode 100644
index 0000000000..63ff3001c7
--- /dev/null
+++ b/thirdparty/icu4c/common/rbbi_cache.cpp
@@ -0,0 +1,655 @@
+// Copyright (C) 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// file: rbbi_cache.cpp
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/ubrk.h"
+#include "unicode/rbbi.h"
+
+#include "rbbi_cache.h"
+
+#include "brkeng.h"
+#include "cmemory.h"
+#include "rbbidata.h"
+#include "rbbirb.h"
+#include "uassert.h"
+#include "uvectr32.h"
+
+U_NAMESPACE_BEGIN
+
+/*
+ * DictionaryCache implementation
+ */
+
+RuleBasedBreakIterator::DictionaryCache::DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
+ fBI(bi), fBreaks(status), fPositionInCache(-1),
+ fStart(0), fLimit(0), fFirstRuleStatusIndex(0), fOtherRuleStatusIndex(0) {
+}
+
+RuleBasedBreakIterator::DictionaryCache::~DictionaryCache() {
+}
+
+void RuleBasedBreakIterator::DictionaryCache::reset() {
+ fPositionInCache = -1;
+ fStart = 0;
+ fLimit = 0;
+ fFirstRuleStatusIndex = 0;
+ fOtherRuleStatusIndex = 0;
+ fBreaks.removeAllElements();
+}
+
+UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
+ if (fromPos >= fLimit || fromPos < fStart) {
+ fPositionInCache = -1;
+ return FALSE;
+ }
+
+ // Sequential iteration, move from previous boundary to the following
+
+ int32_t r = 0;
+ if (fPositionInCache >= 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
+ ++fPositionInCache;
+ if (fPositionInCache >= fBreaks.size()) {
+ fPositionInCache = -1;
+ return FALSE;
+ }
+ r = fBreaks.elementAti(fPositionInCache);
+ U_ASSERT(r > fromPos);
+ *result = r;
+ *statusIndex = fOtherRuleStatusIndex;
+ return TRUE;
+ }
+
+ // Random indexing. Linear search for the boundary following the given position.
+
+ for (fPositionInCache = 0; fPositionInCache < fBreaks.size(); ++fPositionInCache) {
+ r= fBreaks.elementAti(fPositionInCache);
+ if (r > fromPos) {
+ *result = r;
+ *statusIndex = fOtherRuleStatusIndex;
+ return TRUE;
+ }
+ }
+ UPRV_UNREACHABLE;
+}
+
+
+UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
+ if (fromPos <= fStart || fromPos > fLimit) {
+ fPositionInCache = -1;
+ return FALSE;
+ }
+
+ if (fromPos == fLimit) {
+ fPositionInCache = fBreaks.size() - 1;
+ if (fPositionInCache >= 0) {
+ U_ASSERT(fBreaks.elementAti(fPositionInCache) == fromPos);
+ }
+ }
+
+ int32_t r;
+ if (fPositionInCache > 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
+ --fPositionInCache;
+ r = fBreaks.elementAti(fPositionInCache);
+ U_ASSERT(r < fromPos);
+ *result = r;
+ *statusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
+ return TRUE;
+ }
+
+ if (fPositionInCache == 0) {
+ fPositionInCache = -1;
+ return FALSE;
+ }
+
+ for (fPositionInCache = fBreaks.size()-1; fPositionInCache >= 0; --fPositionInCache) {
+ r = fBreaks.elementAti(fPositionInCache);
+ if (r < fromPos) {
+ *result = r;
+ *statusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
+ return TRUE;
+ }
+ }
+ UPRV_UNREACHABLE;
+}
+
+void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPos, int32_t endPos,
+ int32_t firstRuleStatus, int32_t otherRuleStatus) {
+ if ((endPos - startPos) <= 1) {
+ return;
+ }
+
+ reset();
+ fFirstRuleStatusIndex = firstRuleStatus;
+ fOtherRuleStatusIndex = otherRuleStatus;
+
+ int32_t rangeStart = startPos;
+ int32_t rangeEnd = endPos;
+
+ uint16_t category;
+ int32_t current;
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t foundBreakCount = 0;
+ UText *text = &fBI->fText;
+
+ // Loop through the text, looking for ranges of dictionary characters.
+ // For each span, find the appropriate break engine, and ask it to find
+ // any breaks within the span.
+
+ utext_setNativeIndex(text, rangeStart);
+ UChar32 c = utext_current32(text);
+ category = ucptrie_get(fBI->fData->fTrie, c);
+ uint32_t dictStart = fBI->fData->fForwardTable->fDictCategoriesStart;
+
+ while(U_SUCCESS(status)) {
+ while((current = (int32_t)UTEXT_GETNATIVEINDEX(text)) < rangeEnd
+ && (category < dictStart)) {
+ utext_next32(text); // TODO: cleaner loop structure.
+ c = utext_current32(text);
+ category = ucptrie_get(fBI->fData->fTrie, c);
+ }
+ if (current >= rangeEnd) {
+ break;
+ }
+
+ // We now have a dictionary character. Get the appropriate language object
+ // to deal with it.
+ const LanguageBreakEngine *lbe = fBI->getLanguageBreakEngine(c);
+
+ // Ask the language object if there are any breaks. It will add them to the cache and
+ // leave the text pointer on the other side of its range, ready to search for the next one.
+ if (lbe != NULL) {
+ foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks);
+ }
+
+ // Reload the loop variables for the next go-round
+ c = utext_current32(text);
+ category = ucptrie_get(fBI->fData->fTrie, c);
+ }
+
+ // If we found breaks, ensure that the first and last entries are
+ // the original starting and ending position. And initialize the
+ // cache iteration position to the first entry.
+
+ // printf("foundBreakCount = %d\n", foundBreakCount);
+ if (foundBreakCount > 0) {
+ U_ASSERT(foundBreakCount == fBreaks.size());
+ if (startPos < fBreaks.elementAti(0)) {
+ // The dictionary did not place a boundary at the start of the segment of text.
+ // Add one now. This should not commonly happen, but it would be easy for interactions
+ // of the rules for dictionary segments and the break engine implementations to
+ // inadvertently cause it. Cover it here, just in case.
+ fBreaks.insertElementAt(startPos, 0, status);
+ }
+ if (endPos > fBreaks.peeki()) {
+ fBreaks.push(endPos, status);
+ }
+ fPositionInCache = 0;
+ // Note: Dictionary matching may extend beyond the original limit.
+ fStart = fBreaks.elementAti(0);
+ fLimit = fBreaks.peeki();
+ } else {
+ // there were no language-based breaks, even though the segment contained
+ // dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache
+ // for this range will fail, and the calling code will fall back to the rule based boundaries.
+ }
+}
+
+
+/*
+ * BreakCache implemetation
+ */
+
+RuleBasedBreakIterator::BreakCache::BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
+ fBI(bi), fSideBuffer(status) {
+ reset();
+}
+
+
+RuleBasedBreakIterator::BreakCache::~BreakCache() {
+}
+
+
+void RuleBasedBreakIterator::BreakCache::reset(int32_t pos, int32_t ruleStatus) {
+ fStartBufIdx = 0;
+ fEndBufIdx = 0;
+ fTextIdx = pos;
+ fBufIdx = 0;
+ fBoundaries[0] = pos;
+ fStatuses[0] = (uint16_t)ruleStatus;
+}
+
+
+int32_t RuleBasedBreakIterator::BreakCache::current() {
+ fBI->fPosition = fTextIdx;
+ fBI->fRuleStatusIndex = fStatuses[fBufIdx];
+ fBI->fDone = FALSE;
+ return fTextIdx;
+}
+
+
+void RuleBasedBreakIterator::BreakCache::following(int32_t startPos, UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
+ // startPos is in the cache. Do a next() from that position.
+ // TODO: an awkward set of interactions with bi->fDone
+ // seek() does not clear it; it can't because of interactions with populateNear().
+ // next() does not clear it in the fast-path case, where everything matters. Maybe it should.
+ // So clear it here, for the case where seek() succeeded on an iterator that had previously run off the end.
+ fBI->fDone = false;
+ next();
+ }
+ return;
+}
+
+
+void RuleBasedBreakIterator::BreakCache::preceding(int32_t startPos, UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
+ if (startPos == fTextIdx) {
+ previous(status);
+ } else {
+ // seek() leaves the BreakCache positioned at the preceding boundary
+ // if the requested position is between two bounaries.
+ // current() pushes the BreakCache position out to the BreakIterator itself.
+ U_ASSERT(startPos > fTextIdx);
+ current();
+ }
+ }
+ return;
+}
+
+
+/*
+ * Out-of-line code for BreakCache::next().
+ * Cache does not already contain the boundary
+ */
+void RuleBasedBreakIterator::BreakCache::nextOL() {
+ fBI->fDone = !populateFollowing();
+ fBI->fPosition = fTextIdx;
+ fBI->fRuleStatusIndex = fStatuses[fBufIdx];
+ return;
+}
+
+
+void RuleBasedBreakIterator::BreakCache::previous(UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ int32_t initialBufIdx = fBufIdx;
+ if (fBufIdx == fStartBufIdx) {
+ // At start of cache. Prepend to it.
+ populatePreceding(status);
+ } else {
+ // Cache already holds the next boundary
+ fBufIdx = modChunkSize(fBufIdx - 1);
+ fTextIdx = fBoundaries[fBufIdx];
+ }
+ fBI->fDone = (fBufIdx == initialBufIdx);
+ fBI->fPosition = fTextIdx;
+ fBI->fRuleStatusIndex = fStatuses[fBufIdx];
+ return;
+}
+
+
+UBool RuleBasedBreakIterator::BreakCache::seek(int32_t pos) {
+ if (pos < fBoundaries[fStartBufIdx] || pos > fBoundaries[fEndBufIdx]) {
+ return FALSE;
+ }
+ if (pos == fBoundaries[fStartBufIdx]) {
+ // Common case: seek(0), from BreakIterator::first()
+ fBufIdx = fStartBufIdx;
+ fTextIdx = fBoundaries[fBufIdx];
+ return TRUE;
+ }
+ if (pos == fBoundaries[fEndBufIdx]) {
+ fBufIdx = fEndBufIdx;
+ fTextIdx = fBoundaries[fBufIdx];
+ return TRUE;
+ }
+
+ int32_t min = fStartBufIdx;
+ int32_t max = fEndBufIdx;
+ while (min != max) {
+ int32_t probe = (min + max + (min>max ? CACHE_SIZE : 0)) / 2;
+ probe = modChunkSize(probe);
+ if (fBoundaries[probe] > pos) {
+ max = probe;
+ } else {
+ min = modChunkSize(probe + 1);
+ }
+ }
+ U_ASSERT(fBoundaries[max] > pos);
+ fBufIdx = modChunkSize(max - 1);
+ fTextIdx = fBoundaries[fBufIdx];
+ U_ASSERT(fTextIdx <= pos);
+ return TRUE;
+}
+
+
+UBool RuleBasedBreakIterator::BreakCache::populateNear(int32_t position, UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return FALSE;
+ }
+ U_ASSERT(position < fBoundaries[fStartBufIdx] || position > fBoundaries[fEndBufIdx]);
+
+ // Find a boundary somewhere in the vicinity of the requested position.
+ // Depending on the safe rules and the text data, it could be either before, at, or after
+ // the requested position.
+
+
+ // If the requested position is not near already cached positions, clear the existing cache,
+ // find a near-by boundary and begin new cache contents there.
+
+ if ((position < fBoundaries[fStartBufIdx] - 15) || position > (fBoundaries[fEndBufIdx] + 15)) {
+ int32_t aBoundary = 0;
+ int32_t ruleStatusIndex = 0;
+ if (position > 20) {
+ int32_t backupPos = fBI->handleSafePrevious(position);
+
+ if (backupPos > 0) {
+ // Advance to the boundary following the backup position.
+ // There is a complication: the safe reverse rules identify pairs of code points
+ // that are safe. If advancing from the safe point moves forwards by less than
+ // two code points, we need to advance one more time to ensure that the boundary
+ // is good, including a correct rules status value.
+ //
+ fBI->fPosition = backupPos;
+ aBoundary = fBI->handleNext();
+ if (aBoundary <= backupPos + 4) {
+ // +4 is a quick test for possibly having advanced only one codepoint.
+ // Four being the length of the longest potential code point, a supplementary in UTF-8
+ utext_setNativeIndex(&fBI->fText, aBoundary);
+ if (backupPos == utext_getPreviousNativeIndex(&fBI->fText)) {
+ // The initial handleNext() only advanced by a single code point. Go again.
+ aBoundary = fBI->handleNext(); // Safe rules identify safe pairs.
+ }
+ }
+ ruleStatusIndex = fBI->fRuleStatusIndex;
+ }
+ }
+ reset(aBoundary, ruleStatusIndex); // Reset cache to hold aBoundary as a single starting point.
+ }
+
+ // Fill in boundaries between existing cache content and the new requested position.
+
+ if (fBoundaries[fEndBufIdx] < position) {
+ // The last position in the cache precedes the requested position.
+ // Add following position(s) to the cache.
+ while (fBoundaries[fEndBufIdx] < position) {
+ if (!populateFollowing()) {
+ UPRV_UNREACHABLE;
+ }
+ }
+ fBufIdx = fEndBufIdx; // Set iterator position to the end of the buffer.
+ fTextIdx = fBoundaries[fBufIdx]; // Required because populateFollowing may add extra boundaries.
+ while (fTextIdx > position) { // Move backwards to a position at or preceding the requested pos.
+ previous(status);
+ }
+ return true;
+ }
+
+ if (fBoundaries[fStartBufIdx] > position) {
+ // The first position in the cache is beyond the requested position.
+ // back up more until we get a boundary <= the requested position.
+ while (fBoundaries[fStartBufIdx] > position) {
+ populatePreceding(status);
+ }
+ fBufIdx = fStartBufIdx; // Set iterator position to the start of the buffer.
+ fTextIdx = fBoundaries[fBufIdx]; // Required because populatePreceding may add extra boundaries.
+ while (fTextIdx < position) { // Move forwards to a position at or following the requested pos.
+ next();
+ }
+ if (fTextIdx > position) {
+ // If position is not itself a boundary, the next() loop above will overshoot.
+ // Back up one, leaving cache position at the boundary preceding the requested position.
+ previous(status);
+ }
+ return true;
+ }
+
+ U_ASSERT(fTextIdx == position);
+ return true;
+}
+
+
+
+UBool RuleBasedBreakIterator::BreakCache::populateFollowing() {
+ int32_t fromPosition = fBoundaries[fEndBufIdx];
+ int32_t fromRuleStatusIdx = fStatuses[fEndBufIdx];
+ int32_t pos = 0;
+ int32_t ruleStatusIdx = 0;
+
+ if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
+ addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
+ return TRUE;
+ }
+
+ fBI->fPosition = fromPosition;
+ pos = fBI->handleNext();
+ if (pos == UBRK_DONE) {
+ return FALSE;
+ }
+
+ ruleStatusIdx = fBI->fRuleStatusIndex;
+ if (fBI->fDictionaryCharCount > 0) {
+ // The text segment obtained from the rules includes dictionary characters.
+ // Subdivide it, with subdivided results going into the dictionary cache.
+ fBI->fDictionaryCache->populateDictionary(fromPosition, pos, fromRuleStatusIdx, ruleStatusIdx);
+ if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
+ addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
+ return TRUE;
+ // TODO: may want to move a sizable chunk of dictionary cache to break cache at this point.
+ // But be careful with interactions with populateNear().
+ }
+ }
+
+ // Rule based segment did not include dictionary characters.
+ // Or, it did contain dictionary chars, but the dictionary segmenter didn't handle them,
+ // meaning that we didn't take the return, above.
+ // Add its end point to the cache.
+ addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
+
+ // Add several non-dictionary boundaries at this point, to optimize straight forward iteration.
+ // (subsequent calls to BreakIterator::next() will take the fast path, getting cached results.
+ //
+ for (int count=0; count<6; ++count) {
+ pos = fBI->handleNext();
+ if (pos == UBRK_DONE || fBI->fDictionaryCharCount > 0) {
+ break;
+ }
+ addFollowing(pos, fBI->fRuleStatusIndex, RetainCachePosition);
+ }
+
+ return TRUE;
+}
+
+
+UBool RuleBasedBreakIterator::BreakCache::populatePreceding(UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return FALSE;
+ }
+
+ int32_t fromPosition = fBoundaries[fStartBufIdx];
+ if (fromPosition == 0) {
+ return FALSE;
+ }
+
+ int32_t position = 0;
+ int32_t positionStatusIdx = 0;
+
+ if (fBI->fDictionaryCache->preceding(fromPosition, &position, &positionStatusIdx)) {
+ addPreceding(position, positionStatusIdx, UpdateCachePosition);
+ return TRUE;
+ }
+
+ int32_t backupPosition = fromPosition;
+
+ // Find a boundary somewhere preceding the first already-cached boundary
+ do {
+ backupPosition = backupPosition - 30;
+ if (backupPosition <= 0) {
+ backupPosition = 0;
+ } else {
+ backupPosition = fBI->handleSafePrevious(backupPosition);
+ }
+ if (backupPosition == UBRK_DONE || backupPosition == 0) {
+ position = 0;
+ positionStatusIdx = 0;
+ } else {
+ // Advance to the boundary following the backup position.
+ // There is a complication: the safe reverse rules identify pairs of code points
+ // that are safe. If advancing from the safe point moves forwards by less than
+ // two code points, we need to advance one more time to ensure that the boundary
+ // is good, including a correct rules status value.
+ //
+ fBI->fPosition = backupPosition;
+ position = fBI->handleNext();
+ if (position <= backupPosition + 4) {
+ // +4 is a quick test for possibly having advanced only one codepoint.
+ // Four being the length of the longest potential code point, a supplementary in UTF-8
+ utext_setNativeIndex(&fBI->fText, position);
+ if (backupPosition == utext_getPreviousNativeIndex(&fBI->fText)) {
+ // The initial handleNext() only advanced by a single code point. Go again.
+ position = fBI->handleNext(); // Safe rules identify safe pairs.
+ }
+ }
+ positionStatusIdx = fBI->fRuleStatusIndex;
+ }
+ } while (position >= fromPosition);
+
+ // Find boundaries between the one we just located and the first already-cached boundary
+ // Put them in a side buffer, because we don't yet know where they will fall in the circular cache buffer..
+
+ fSideBuffer.removeAllElements();
+ fSideBuffer.addElement(position, status);
+ fSideBuffer.addElement(positionStatusIdx, status);
+
+ do {
+ int32_t prevPosition = fBI->fPosition = position;
+ int32_t prevStatusIdx = positionStatusIdx;
+ position = fBI->handleNext();
+ positionStatusIdx = fBI->fRuleStatusIndex;
+ if (position == UBRK_DONE) {
+ break;
+ }
+
+ UBool segmentHandledByDictionary = FALSE;
+ if (fBI->fDictionaryCharCount != 0) {
+ // Segment from the rules includes dictionary characters.
+ // Subdivide it, with subdivided results going into the dictionary cache.
+ int32_t dictSegEndPosition = position;
+ fBI->fDictionaryCache->populateDictionary(prevPosition, dictSegEndPosition, prevStatusIdx, positionStatusIdx);
+ while (fBI->fDictionaryCache->following(prevPosition, &position, &positionStatusIdx)) {
+ segmentHandledByDictionary = true;
+ U_ASSERT(position > prevPosition);
+ if (position >= fromPosition) {
+ break;
+ }
+ U_ASSERT(position <= dictSegEndPosition);
+ fSideBuffer.addElement(position, status);
+ fSideBuffer.addElement(positionStatusIdx, status);
+ prevPosition = position;
+ }
+ U_ASSERT(position==dictSegEndPosition || position>=fromPosition);
+ }
+
+ if (!segmentHandledByDictionary && position < fromPosition) {
+ fSideBuffer.addElement(position, status);
+ fSideBuffer.addElement(positionStatusIdx, status);
+ }
+ } while (position < fromPosition);
+
+ // Move boundaries from the side buffer to the main circular buffer.
+ UBool success = FALSE;
+ if (!fSideBuffer.isEmpty()) {
+ positionStatusIdx = fSideBuffer.popi();
+ position = fSideBuffer.popi();
+ addPreceding(position, positionStatusIdx, UpdateCachePosition);
+ success = TRUE;
+ }
+
+ while (!fSideBuffer.isEmpty()) {
+ positionStatusIdx = fSideBuffer.popi();
+ position = fSideBuffer.popi();
+ if (!addPreceding(position, positionStatusIdx, RetainCachePosition)) {
+ // No space in circular buffer to hold a new preceding result while
+ // also retaining the current cache (iteration) position.
+ // Bailing out is safe; the cache will refill again if needed.
+ break;
+ }
+ }
+
+ return success;
+}
+
+
+void RuleBasedBreakIterator::BreakCache::addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
+ U_ASSERT(position > fBoundaries[fEndBufIdx]);
+ U_ASSERT(ruleStatusIdx <= UINT16_MAX);
+ int32_t nextIdx = modChunkSize(fEndBufIdx + 1);
+ if (nextIdx == fStartBufIdx) {
+ fStartBufIdx = modChunkSize(fStartBufIdx + 6); // TODO: experiment. Probably revert to 1.
+ }
+ fBoundaries[nextIdx] = position;
+ fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx);
+ fEndBufIdx = nextIdx;
+ if (update == UpdateCachePosition) {
+ // Set current position to the newly added boundary.
+ fBufIdx = nextIdx;
+ fTextIdx = position;
+ } else {
+ // Retaining the original cache position.
+ // Check if the added boundary wraps around the buffer, and would over-write the original position.
+ // It's the responsibility of callers of this function to not add too many.
+ U_ASSERT(nextIdx != fBufIdx);
+ }
+}
+
+bool RuleBasedBreakIterator::BreakCache::addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
+ U_ASSERT(position < fBoundaries[fStartBufIdx]);
+ U_ASSERT(ruleStatusIdx <= UINT16_MAX);
+ int32_t nextIdx = modChunkSize(fStartBufIdx - 1);
+ if (nextIdx == fEndBufIdx) {
+ if (fBufIdx == fEndBufIdx && update == RetainCachePosition) {
+ // Failure. The insertion of the new boundary would claim the buffer position that is the
+ // current iteration position. And we also want to retain the current iteration position.
+ // (The buffer is already completely full of entries that precede the iteration position.)
+ return false;
+ }
+ fEndBufIdx = modChunkSize(fEndBufIdx - 1);
+ }
+ fBoundaries[nextIdx] = position;
+ fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx);
+ fStartBufIdx = nextIdx;
+ if (update == UpdateCachePosition) {
+ fBufIdx = nextIdx;
+ fTextIdx = position;
+ }
+ return true;
+}
+
+
+void RuleBasedBreakIterator::BreakCache::dumpCache() {
+#ifdef RBBI_DEBUG
+ RBBIDebugPrintf("fTextIdx:%d fBufIdx:%d\n", fTextIdx, fBufIdx);
+ for (int32_t i=fStartBufIdx; ; i=modChunkSize(i+1)) {
+ RBBIDebugPrintf("%d %d\n", i, fBoundaries[i]);
+ if (i == fEndBufIdx) {
+ break;
+ }
+ }
+#endif
+}
+
+U_NAMESPACE_END
+
+#endif // #if !UCONFIG_NO_BREAK_ITERATION
diff --git a/thirdparty/icu4c/common/rbbi_cache.h b/thirdparty/icu4c/common/rbbi_cache.h
new file mode 100644
index 0000000000..597312e85c
--- /dev/null
+++ b/thirdparty/icu4c/common/rbbi_cache.h
@@ -0,0 +1,203 @@
+// Copyright (C) 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// file: rbbi_cache.h
+//
+#ifndef RBBI_CACHE_H
+#define RBBI_CACHE_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/rbbi.h"
+#include "unicode/uobject.h"
+
+#include "uvectr32.h"
+
+U_NAMESPACE_BEGIN
+
+/* DictionaryCache stores the boundaries obtained from a run of dictionary characters.
+ * Dictionary boundaries are moved first to this cache, then from here
+ * to the main BreakCache, where they may inter-leave with non-dictionary
+ * boundaries. The public BreakIterator API always fetches directly
+ * from the main BreakCache, not from here.
+ *
+ * In common situations, the number of boundaries in a single dictionary run
+ * should be quite small, it will be terminated by punctuation, spaces,
+ * or any other non-dictionary characters. The main BreakCache may end
+ * up with boundaries from multiple dictionary based runs.
+ *
+ * The boundaries are stored in a simple ArrayList (vector), with the
+ * assumption that they will be accessed sequentially.
+ */
+class RuleBasedBreakIterator::DictionaryCache: public UMemory {
+ public:
+ DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status);
+ ~DictionaryCache();
+
+ void reset();
+
+ UBool following(int32_t fromPos, int32_t *pos, int32_t *statusIndex);
+ UBool preceding(int32_t fromPos, int32_t *pos, int32_t *statusIndex);
+
+ /**
+ * Populate the cache with the dictionary based boundaries within a region of text.
+ * @param startPos The start position of a range of text
+ * @param endPos The end position of a range of text
+ * @param firstRuleStatus The rule status index that applies to the break at startPos
+ * @param otherRuleStatus The rule status index that applies to boundaries other than startPos
+ * @internal
+ */
+ void populateDictionary(int32_t startPos, int32_t endPos,
+ int32_t firstRuleStatus, int32_t otherRuleStatus);
+
+
+
+ RuleBasedBreakIterator *fBI;
+
+ UVector32 fBreaks; // A vector containing the boundaries.
+ int32_t fPositionInCache; // Index in fBreaks of last boundary returned by following()
+ // or preceding(). Optimizes sequential access.
+ int32_t fStart; // Text position of first boundary in cache.
+ int32_t fLimit; // Last boundary in cache. Which is the limit of the
+ // text segment being handled by the dictionary.
+ int32_t fFirstRuleStatusIndex; // Rule status info for first boundary.
+ int32_t fOtherRuleStatusIndex; // Rule status info for 2nd through last boundaries.
+};
+
+
+/*
+ * class BreakCache
+ *
+ * Cache of break boundary positions and rule status values.
+ * Break iterator API functions, next(), previous(), etc., will use cached results
+ * when possible, and otherwise cache new results as they are obtained.
+ *
+ * Uniformly caches both dictionary and rule based (non-dictionary) boundaries.
+ *
+ * The cache is implemented as a single circular buffer.
+ */
+
+/*
+ * size of the circular cache buffer.
+ */
+
+class RuleBasedBreakIterator::BreakCache: public UMemory {
+ public:
+ BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status);
+ virtual ~BreakCache();
+ void reset(int32_t pos = 0, int32_t ruleStatus = 0);
+ void next() { if (fBufIdx == fEndBufIdx) {
+ nextOL();
+ } else {
+ fBufIdx = modChunkSize(fBufIdx + 1);
+ fTextIdx = fBI->fPosition = fBoundaries[fBufIdx];
+ fBI->fRuleStatusIndex = fStatuses[fBufIdx];
+ }
+ }
+
+
+ void nextOL();
+ void previous(UErrorCode &status);
+
+ // Move the iteration state to the position following the startPosition.
+ // Input position must be pinned to the input length.
+ void following(int32_t startPosition, UErrorCode &status);
+
+ void preceding(int32_t startPosition, UErrorCode &status);
+
+ /*
+ * Update the state of the public BreakIterator (fBI) to reflect the
+ * current state of the break iterator cache (this).
+ */
+ int32_t current();
+
+ /**
+ * Add boundaries to the cache near the specified position.
+ * The given position need not be a boundary itself.
+ * The input position must be within the range of the text, and
+ * on a code point boundary.
+ * If the requested position is a break boundary, leave the iteration
+ * position on it.
+ * If the requested position is not a boundary, leave the iteration
+ * position on the preceding boundary and include both the
+ * preceding and following boundaries in the cache.
+ * Additional boundaries, either preceding or following, may be added
+ * to the cache as a side effect.
+ *
+ * Return false if the operation failed.
+ */
+ UBool populateNear(int32_t position, UErrorCode &status);
+
+ /**
+ * Add boundary(s) to the cache following the current last boundary.
+ * Return false if at the end of the text, and no more boundaries can be added.
+ * Leave iteration position at the first newly added boundary, or unchanged if no boundary was added.
+ */
+ UBool populateFollowing();
+
+ /**
+ * Add one or more boundaries to the cache preceding the first currently cached boundary.
+ * Leave the iteration position on the first added boundary.
+ * Return false if no boundaries could be added (if at the start of the text.)
+ */
+ UBool populatePreceding(UErrorCode &status);
+
+ enum UpdatePositionValues {
+ RetainCachePosition = 0,
+ UpdateCachePosition = 1
+ };
+
+ /*
+ * Add the boundary following the current position.
+ * The current position can be left as it was, or changed to the newly added boundary,
+ * as specified by the update parameter.
+ */
+ void addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update);
+
+
+ /*
+ * Add the boundary preceding the current position.
+ * The current position can be left as it was, or changed to the newly added boundary,
+ * as specified by the update parameter.
+ */
+ bool addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update);
+
+ /**
+ * Set the cache position to the specified position, or, if the position
+ * falls between to cached boundaries, to the preceding boundary.
+ * Fails if the requested position is outside of the range of boundaries currently held by the cache.
+ * The startPosition must be on a code point boundary.
+ *
+ * Return true if successful, false if the specified position is after
+ * the last cached boundary or before the first.
+ */
+ UBool seek(int32_t startPosition);
+
+ void dumpCache();
+
+ private:
+ static inline int32_t modChunkSize(int index) { return index & (CACHE_SIZE - 1); }
+
+ static constexpr int32_t CACHE_SIZE = 128;
+ static_assert((CACHE_SIZE & (CACHE_SIZE-1)) == 0, "CACHE_SIZE must be power of two.");
+
+ RuleBasedBreakIterator *fBI;
+ int32_t fStartBufIdx;
+ int32_t fEndBufIdx; // inclusive
+
+ int32_t fTextIdx;
+ int32_t fBufIdx;
+
+ int32_t fBoundaries[CACHE_SIZE];
+ uint16_t fStatuses[CACHE_SIZE];
+
+ UVector32 fSideBuffer;
+};
+
+U_NAMESPACE_END
+
+#endif // #if !UCONFIG_NO_BREAK_ITERATION
+
+#endif // RBBI_CACHE_H
diff --git a/thirdparty/icu4c/common/rbbidata.cpp b/thirdparty/icu4c/common/rbbidata.cpp
new file mode 100644
index 0000000000..193acafc44
--- /dev/null
+++ b/thirdparty/icu4c/common/rbbidata.cpp
@@ -0,0 +1,476 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+***************************************************************************
+* Copyright (C) 1999-2014 International Business Machines Corporation *
+* and others. All rights reserved. *
+***************************************************************************
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/ucptrie.h"
+#include "unicode/utypes.h"
+#include "rbbidata.h"
+#include "rbbirb.h"
+#include "udatamem.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "umutex.h"
+
+#include "uassert.h"
+
+
+U_NAMESPACE_BEGIN
+
+//-----------------------------------------------------------------------------
+//
+// Constructors.
+//
+//-----------------------------------------------------------------------------
+RBBIDataWrapper::RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status) {
+ init0();
+ init(data, status);
+}
+
+RBBIDataWrapper::RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt, UErrorCode &status) {
+ init0();
+ init(data, status);
+ fDontFreeData = TRUE;
+}
+
+RBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) {
+ init0();
+ if (U_FAILURE(status)) {
+ return;
+ }
+ const DataHeader *dh = udm->pHeader;
+ int32_t headerSize = dh->dataHeader.headerSize;
+ if ( !(headerSize >= 20 &&
+ dh->info.isBigEndian == U_IS_BIG_ENDIAN &&
+ dh->info.charsetFamily == U_CHARSET_FAMILY &&
+ dh->info.dataFormat[0] == 0x42 && // dataFormat="Brk "
+ dh->info.dataFormat[1] == 0x72 &&
+ dh->info.dataFormat[2] == 0x6b &&
+ dh->info.dataFormat[3] == 0x20 &&
+ isDataVersionAcceptable(dh->info.formatVersion))
+ ) {
+ status = U_INVALID_FORMAT_ERROR;
+ return;
+ }
+ const char *dataAsBytes = reinterpret_cast<const char *>(dh);
+ const RBBIDataHeader *rbbidh = reinterpret_cast<const RBBIDataHeader *>(dataAsBytes + headerSize);
+ init(rbbidh, status);
+ fUDataMem = udm;
+}
+
+UBool RBBIDataWrapper::isDataVersionAcceptable(const UVersionInfo version) {
+ return RBBI_DATA_FORMAT_VERSION[0] == version[0];
+}
+
+
+//-----------------------------------------------------------------------------
+//
+// init(). Does most of the work of construction, shared between the
+// constructors.
+//
+//-----------------------------------------------------------------------------
+void RBBIDataWrapper::init0() {
+ fHeader = NULL;
+ fForwardTable = NULL;
+ fReverseTable = NULL;
+ fRuleSource = NULL;
+ fRuleStatusTable = NULL;
+ fTrie = NULL;
+ fUDataMem = NULL;
+ fRefCount = 0;
+ fDontFreeData = TRUE;
+}
+
+void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ fHeader = data;
+ if (fHeader->fMagic != 0xb1a0 || !isDataVersionAcceptable(fHeader->fFormatVersion)) {
+ status = U_INVALID_FORMAT_ERROR;
+ return;
+ }
+ // Note: in ICU version 3.2 and earlier, there was a formatVersion 1
+ // that is no longer supported. At that time fFormatVersion was
+ // an int32_t field, rather than an array of 4 bytes.
+
+ fDontFreeData = FALSE;
+ if (data->fFTableLen != 0) {
+ fForwardTable = (RBBIStateTable *)((char *)data + fHeader->fFTable);
+ }
+ if (data->fRTableLen != 0) {
+ fReverseTable = (RBBIStateTable *)((char *)data + fHeader->fRTable);
+ }
+
+ fTrie = ucptrie_openFromBinary(UCPTRIE_TYPE_FAST,
+ UCPTRIE_VALUE_BITS_ANY,
+ (uint8_t *)data + fHeader->fTrie,
+ fHeader->fTrieLen,
+ nullptr, // *actual length
+ &status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ UCPTrieValueWidth width = ucptrie_getValueWidth(fTrie);
+ if (!(width == UCPTRIE_VALUE_BITS_8 || width == UCPTRIE_VALUE_BITS_16)) {
+ status = U_INVALID_FORMAT_ERROR;
+ return;
+ }
+
+ fRuleSource = ((char *)data + fHeader->fRuleSource);
+ fRuleString = UnicodeString::fromUTF8(StringPiece(fRuleSource, fHeader->fRuleSourceLen));
+ U_ASSERT(data->fRuleSourceLen > 0);
+
+ fRuleStatusTable = (int32_t *)((char *)data + fHeader->fStatusTable);
+ fStatusMaxIdx = data->fStatusTableLen / sizeof(int32_t);
+
+ fRefCount = 1;
+
+#ifdef RBBI_DEBUG
+ char *debugEnv = getenv("U_RBBIDEBUG");
+ if (debugEnv && uprv_strstr(debugEnv, "data")) {this->printData();}
+#endif
+}
+
+
+//-----------------------------------------------------------------------------
+//
+// Destructor. Don't call this - use removeReference() instead.
+//
+//-----------------------------------------------------------------------------
+RBBIDataWrapper::~RBBIDataWrapper() {
+ U_ASSERT(fRefCount == 0);
+ ucptrie_close(fTrie);
+ fTrie = nullptr;
+ if (fUDataMem) {
+ udata_close(fUDataMem);
+ } else if (!fDontFreeData) {
+ uprv_free((void *)fHeader);
+ }
+}
+
+
+
+//-----------------------------------------------------------------------------
+//
+// Operator == Consider two RBBIDataWrappers to be equal if they
+// refer to the same underlying data. Although
+// the data wrappers are normally shared between
+// iterator instances, it's possible to independently
+// open the same data twice, and get two instances, which
+// should still be ==.
+//
+//-----------------------------------------------------------------------------
+UBool RBBIDataWrapper::operator ==(const RBBIDataWrapper &other) const {
+ if (fHeader == other.fHeader) {
+ return TRUE;
+ }
+ if (fHeader->fLength != other.fHeader->fLength) {
+ return FALSE;
+ }
+ if (uprv_memcmp(fHeader, other.fHeader, fHeader->fLength) == 0) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+int32_t RBBIDataWrapper::hashCode() {
+ return fHeader->fFTableLen;
+}
+
+
+
+//-----------------------------------------------------------------------------
+//
+// Reference Counting. A single RBBIDataWrapper object is shared among
+// however many RulesBasedBreakIterator instances are
+// referencing the same data.
+//
+//-----------------------------------------------------------------------------
+void RBBIDataWrapper::removeReference() {
+ if (umtx_atomic_dec(&fRefCount) == 0) {
+ delete this;
+ }
+}
+
+
+RBBIDataWrapper *RBBIDataWrapper::addReference() {
+ umtx_atomic_inc(&fRefCount);
+ return this;
+}
+
+
+
+//-----------------------------------------------------------------------------
+//
+// getRuleSourceString
+//
+//-----------------------------------------------------------------------------
+const UnicodeString &RBBIDataWrapper::getRuleSourceString() const {
+ return fRuleString;
+}
+
+
+//-----------------------------------------------------------------------------
+//
+// print - debugging function to dump the runtime data tables.
+//
+//-----------------------------------------------------------------------------
+#ifdef RBBI_DEBUG
+void RBBIDataWrapper::printTable(const char *heading, const RBBIStateTable *table) {
+ uint32_t c;
+ uint32_t s;
+
+ RBBIDebugPrintf("%s\n", heading);
+
+ RBBIDebugPrintf(" fDictCategoriesStart: %d\n", table->fDictCategoriesStart);
+ RBBIDebugPrintf(" fLookAheadResultsSize: %d\n", table->fLookAheadResultsSize);
+ RBBIDebugPrintf(" Flags: %4x RBBI_LOOKAHEAD_HARD_BREAK=%s RBBI_BOF_REQUIRED=%s RBBI_8BITS_ROWS=%s\n",
+ table->fFlags,
+ table->fFlags & RBBI_LOOKAHEAD_HARD_BREAK ? "T" : "F",
+ table->fFlags & RBBI_BOF_REQUIRED ? "T" : "F",
+ table->fFlags & RBBI_8BITS_ROWS ? "T" : "F");
+ RBBIDebugPrintf("\nState | Acc LA TagIx");
+ for (c=0; c<fHeader->fCatCount; c++) {RBBIDebugPrintf("%3d ", c);}
+ RBBIDebugPrintf("\n------|---------------"); for (c=0;c<fHeader->fCatCount; c++) {
+ RBBIDebugPrintf("----");
+ }
+ RBBIDebugPrintf("\n");
+
+ if (table == NULL) {
+ RBBIDebugPrintf(" N U L L T A B L E\n\n");
+ return;
+ }
+ UBool use8Bits = table->fFlags & RBBI_8BITS_ROWS;
+ for (s=0; s<table->fNumStates; s++) {
+ RBBIStateTableRow *row = (RBBIStateTableRow *)
+ (table->fTableData + (table->fRowLen * s));
+ if (use8Bits) {
+ RBBIDebugPrintf("%4d | %3d %3d %3d ", s, row->r8.fAccepting, row->r8.fLookAhead, row->r8.fTagsIdx);
+ for (c=0; c<fHeader->fCatCount; c++) {
+ RBBIDebugPrintf("%3d ", row->r8.fNextState[c]);
+ }
+ } else {
+ RBBIDebugPrintf("%4d | %3d %3d %3d ", s, row->r16.fAccepting, row->r16.fLookAhead, row->r16.fTagsIdx);
+ for (c=0; c<fHeader->fCatCount; c++) {
+ RBBIDebugPrintf("%3d ", row->r16.fNextState[c]);
+ }
+ }
+ RBBIDebugPrintf("\n");
+ }
+ RBBIDebugPrintf("\n");
+}
+#endif
+
+
+void RBBIDataWrapper::printData() {
+#ifdef RBBI_DEBUG
+ RBBIDebugPrintf("RBBI Data at %p\n", (void *)fHeader);
+ RBBIDebugPrintf(" Version = {%d %d %d %d}\n", fHeader->fFormatVersion[0], fHeader->fFormatVersion[1],
+ fHeader->fFormatVersion[2], fHeader->fFormatVersion[3]);
+ RBBIDebugPrintf(" total length of data = %d\n", fHeader->fLength);
+ RBBIDebugPrintf(" number of character categories = %d\n\n", fHeader->fCatCount);
+
+ printTable("Forward State Transition Table", fForwardTable);
+ printTable("Reverse State Transition Table", fReverseTable);
+
+ RBBIDebugPrintf("\nOrignal Rules source:\n");
+ for (int32_t c=0; fRuleSource[c] != 0; c++) {
+ RBBIDebugPrintf("%c", fRuleSource[c]);
+ }
+ RBBIDebugPrintf("\n\n");
+#endif
+}
+
+
+U_NAMESPACE_END
+U_NAMESPACE_USE
+
+//-----------------------------------------------------------------------------
+//
+// ubrk_swap - byte swap and char encoding swap of RBBI data
+//
+//-----------------------------------------------------------------------------
+
+U_CAPI int32_t U_EXPORT2
+ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData,
+ UErrorCode *status) {
+
+ if (status == NULL || U_FAILURE(*status)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
+ *status=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ //
+ // Check that the data header is for for break data.
+ // (Header contents are defined in genbrk.cpp)
+ //
+ const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData+4);
+ if(!( pInfo->dataFormat[0]==0x42 && /* dataFormat="Brk " */
+ pInfo->dataFormat[1]==0x72 &&
+ pInfo->dataFormat[2]==0x6b &&
+ pInfo->dataFormat[3]==0x20 &&
+ RBBIDataWrapper::isDataVersionAcceptable(pInfo->formatVersion) )) {
+ udata_printError(ds, "ubrk_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n",
+ pInfo->dataFormat[0], pInfo->dataFormat[1],
+ pInfo->dataFormat[2], pInfo->dataFormat[3],
+ pInfo->formatVersion[0]);
+ *status=U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+
+ //
+ // Swap the data header. (This is the generic ICU Data Header, not the RBBI Specific
+ // RBBIDataHeader). This swap also conveniently gets us
+ // the size of the ICU d.h., which lets us locate the start
+ // of the RBBI specific data.
+ //
+ int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, status);
+
+
+ //
+ // Get the RRBI Data Header, and check that it appears to be OK.
+ //
+ const uint8_t *inBytes =(const uint8_t *)inData+headerSize;
+ RBBIDataHeader *rbbiDH = (RBBIDataHeader *)inBytes;
+ if (ds->readUInt32(rbbiDH->fMagic) != 0xb1a0 ||
+ !RBBIDataWrapper::isDataVersionAcceptable(rbbiDH->fFormatVersion) ||
+ ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader)) {
+ udata_printError(ds, "ubrk_swap(): RBBI Data header is invalid.\n");
+ *status=U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+
+ //
+ // Prefight operation? Just return the size
+ //
+ int32_t breakDataLength = ds->readUInt32(rbbiDH->fLength);
+ int32_t totalSize = headerSize + breakDataLength;
+ if (length < 0) {
+ return totalSize;
+ }
+
+ //
+ // Check that length passed in is consistent with length from RBBI data header.
+ //
+ if (length < totalSize) {
+ udata_printError(ds, "ubrk_swap(): too few bytes (%d after ICU Data header) for break data.\n",
+ breakDataLength);
+ *status=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+
+ //
+ // Swap the Data. Do the data itself first, then the RBBI Data Header, because
+ // we need to reference the header to locate the data, and an
+ // inplace swap of the header leaves it unusable.
+ //
+ uint8_t *outBytes = (uint8_t *)outData + headerSize;
+ RBBIDataHeader *outputDH = (RBBIDataHeader *)outBytes;
+
+ int32_t tableStartOffset;
+ int32_t tableLength;
+
+ //
+ // If not swapping in place, zero out the output buffer before starting.
+ // Individual tables and other data items within are aligned to 8 byte boundaries
+ // when originally created. Any unused space between items needs to be zero.
+ //
+ if (inBytes != outBytes) {
+ uprv_memset(outBytes, 0, breakDataLength);
+ }
+
+ //
+ // Each state table begins with several 32 bit fields. Calculate the size
+ // in bytes of these.
+ //
+ int32_t topSize = offsetof(RBBIStateTable, fTableData);
+
+ // Forward state table.
+ tableStartOffset = ds->readUInt32(rbbiDH->fFTable);
+ tableLength = ds->readUInt32(rbbiDH->fFTableLen);
+
+ if (tableLength > 0) {
+ RBBIStateTable *rbbiST = (RBBIStateTable *)(inBytes+tableStartOffset);
+ UBool use8Bits = ds->readUInt32(rbbiST->fFlags) & RBBI_8BITS_ROWS;
+
+ ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
+ outBytes+tableStartOffset, status);
+
+ // Swap the state table if the table is in 16 bits.
+ if (use8Bits) {
+ if (outBytes != inBytes) {
+ uprv_memmove(outBytes+tableStartOffset+topSize,
+ inBytes+tableStartOffset+topSize,
+ tableLength-topSize);
+ }
+ } else {
+ ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
+ outBytes+tableStartOffset+topSize, status);
+ }
+ }
+
+ // Reverse state table. Same layout as forward table, above.
+ tableStartOffset = ds->readUInt32(rbbiDH->fRTable);
+ tableLength = ds->readUInt32(rbbiDH->fRTableLen);
+
+ if (tableLength > 0) {
+ RBBIStateTable *rbbiST = (RBBIStateTable *)(inBytes+tableStartOffset);
+ UBool use8Bits = ds->readUInt32(rbbiST->fFlags) & RBBI_8BITS_ROWS;
+
+ ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
+ outBytes+tableStartOffset, status);
+
+ // Swap the state table if the table is in 16 bits.
+ if (use8Bits) {
+ if (outBytes != inBytes) {
+ uprv_memmove(outBytes+tableStartOffset+topSize,
+ inBytes+tableStartOffset+topSize,
+ tableLength-topSize);
+ }
+ } else {
+ ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
+ outBytes+tableStartOffset+topSize, status);
+ }
+ }
+
+ // Trie table for character categories
+ ucptrie_swap(ds, inBytes+ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen),
+ outBytes+ds->readUInt32(rbbiDH->fTrie), status);
+
+ // Source Rules Text. It's UTF8 data
+ if (outBytes != inBytes) {
+ uprv_memmove(outBytes+ds->readUInt32(rbbiDH->fRuleSource),
+ inBytes+ds->readUInt32(rbbiDH->fRuleSource),
+ ds->readUInt32(rbbiDH->fRuleSourceLen));
+ }
+
+ // Table of rule status values. It's all int_32 values
+ ds->swapArray32(ds, inBytes+ds->readUInt32(rbbiDH->fStatusTable), ds->readUInt32(rbbiDH->fStatusTableLen),
+ outBytes+ds->readUInt32(rbbiDH->fStatusTable), status);
+
+ // And, last, the header.
+ // It is all int32_t values except for fFormataVersion, which is an array of four bytes.
+ // Swap the whole thing as int32_t, then re-swap the one field.
+ //
+ ds->swapArray32(ds, inBytes, sizeof(RBBIDataHeader), outBytes, status);
+ ds->swapArray32(ds, outputDH->fFormatVersion, 4, outputDH->fFormatVersion, status);
+
+ return totalSize;
+}
+
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
diff --git a/thirdparty/icu4c/common/rbbidata.h b/thirdparty/icu4c/common/rbbidata.h
new file mode 100644
index 0000000000..3749f16799
--- /dev/null
+++ b/thirdparty/icu4c/common/rbbidata.h
@@ -0,0 +1,212 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2014 International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: rbbidata.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* RBBI data formats Includes
+*
+* Structs that describes the format of the Binary RBBI data,
+* as it is stored in ICU's data file.
+*
+* RBBIDataWrapper - Instances of this class sit between the
+* raw data structs and the RulesBasedBreakIterator objects
+* that are created by applications. The wrapper class
+* provides reference counting for the underlying data,
+* and direct pointers to data that would not otherwise
+* be accessible without ugly pointer arithmetic. The
+* wrapper does not attempt to provide any higher level
+* abstractions for the data itself.
+*
+* There will be only one instance of RBBIDataWrapper for any
+* set of RBBI run time data being shared by instances
+* (clones) of RulesBasedBreakIterator.
+*/
+
+#ifndef __RBBIDATA_H__
+#define __RBBIDATA_H__
+
+#include "unicode/utypes.h"
+#include "unicode/udata.h"
+#include "udataswp.h"
+
+/**
+ * Swap RBBI data. See udataswp.h.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_swap(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+#ifdef __cplusplus
+
+#include "unicode/ucptrie.h"
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+#include "unicode/uversion.h"
+#include "umutex.h"
+
+
+U_NAMESPACE_BEGIN
+
+// The current RBBI data format version.
+static const uint8_t RBBI_DATA_FORMAT_VERSION[] = {6, 0, 0, 0};
+
+/*
+ * The following structs map exactly onto the raw data from ICU common data file.
+ */
+struct RBBIDataHeader {
+ uint32_t fMagic; /* == 0xbla0 */
+ UVersionInfo fFormatVersion; /* Data Format. Same as the value in struct UDataInfo */
+ /* if there is one associated with this data. */
+ /* (version originates in rbbi, is copied to UDataInfo) */
+ uint32_t fLength; /* Total length in bytes of this RBBI Data, */
+ /* including all sections, not just the header. */
+ uint32_t fCatCount; /* Number of character categories. */
+
+ /* */
+ /* Offsets and sizes of each of the subsections within the RBBI data. */
+ /* All offsets are bytes from the start of the RBBIDataHeader. */
+ /* All sizes are in bytes. */
+ /* */
+ uint32_t fFTable; /* forward state transition table. */
+ uint32_t fFTableLen;
+ uint32_t fRTable; /* Offset to the reverse state transition table. */
+ uint32_t fRTableLen;
+ uint32_t fTrie; /* Offset to Trie data for character categories */
+ uint32_t fTrieLen;
+ uint32_t fRuleSource; /* Offset to the source for for the break */
+ uint32_t fRuleSourceLen; /* rules. Stored UChar *. */
+ uint32_t fStatusTable; /* Offset to the table of rule status values */
+ uint32_t fStatusTableLen;
+
+ uint32_t fReserved[6]; /* Reserved for expansion */
+
+};
+
+
+
+template <typename T>
+struct RBBIStateTableRowT {
+ T fAccepting; // Non-zero if this row is for an accepting state.
+ // Value 0: not an accepting state.
+ // 1: (ACCEPTING_UNCONDITIONAL) Unconditional Accepting state.
+ // >1: Look-ahead match has completed.
+ // Actual boundary position happened earlier.
+ // Value here == fLookAhead in earlier
+ // state, at actual boundary pos.
+ T fLookAhead; // Non-zero if this row is for a state that
+ // corresponds to a '/' in the rule source.
+ // Value is the same as the fAccepting
+ // value for the rule (which will appear
+ // in a different state.
+ T fTagsIdx; // Non-zero if this row covers a {tagged} position
+ // from a rule. Value is the index in the
+ // StatusTable of the set of matching
+ // tags (rule status values)
+ T fNextState[1]; // Next State, indexed by char category.
+ // Variable-length array declared with length 1
+ // to disable bounds checkers.
+ // Array Size is actually fData->fHeader->fCatCount
+ // CAUTION: see RBBITableBuilder::getTableSize()
+ // before changing anything here.
+};
+
+typedef RBBIStateTableRowT<uint8_t> RBBIStateTableRow8;
+typedef RBBIStateTableRowT<uint16_t> RBBIStateTableRow16;
+
+constexpr uint16_t ACCEPTING_UNCONDITIONAL = 1; // Value constant for RBBIStateTableRow::fAccepting
+
+union RBBIStateTableRow {
+ RBBIStateTableRow16 r16;
+ RBBIStateTableRow8 r8;
+};
+
+struct RBBIStateTable {
+ uint32_t fNumStates; // Number of states.
+ uint32_t fRowLen; // Length of a state table row, in bytes.
+ uint32_t fDictCategoriesStart; // Char category number of the first dictionary
+ // char class, or the the largest category number + 1
+ // if there are no dictionary categories.
+ uint32_t fLookAheadResultsSize; // Size of run-time array required for holding
+ // look-ahead results. Indexed by row.fLookAhead.
+ uint32_t fFlags; // Option Flags for this state table.
+ char fTableData[1]; // First RBBIStateTableRow begins here.
+ // Variable-length array declared with length 1
+ // to disable bounds checkers.
+ // (making it char[] simplifies ugly address
+ // arithmetic for indexing variable length rows.)
+};
+
+constexpr uint32_t RBBI_LOOKAHEAD_HARD_BREAK = 1;
+constexpr uint32_t RBBI_BOF_REQUIRED = 2;
+constexpr uint32_t RBBI_8BITS_ROWS = 4;
+
+
+/* */
+/* The reference counting wrapper class */
+/* */
+class RBBIDataWrapper : public UMemory {
+public:
+ enum EDontAdopt {
+ kDontAdopt
+ };
+ RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status);
+ RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt dontAdopt, UErrorCode &status);
+ RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
+ ~RBBIDataWrapper();
+
+ static UBool isDataVersionAcceptable(const UVersionInfo version);
+
+ void init0();
+ void init(const RBBIDataHeader *data, UErrorCode &status);
+ RBBIDataWrapper *addReference();
+ void removeReference();
+ UBool operator ==(const RBBIDataWrapper &other) const;
+ int32_t hashCode();
+ const UnicodeString &getRuleSourceString() const;
+ void printData();
+ void printTable(const char *heading, const RBBIStateTable *table);
+
+ /* */
+ /* Pointers to items within the data */
+ /* */
+ const RBBIDataHeader *fHeader;
+ const RBBIStateTable *fForwardTable;
+ const RBBIStateTable *fReverseTable;
+ const char *fRuleSource;
+ const int32_t *fRuleStatusTable;
+
+ /* number of int32_t values in the rule status table. Used to sanity check indexing */
+ int32_t fStatusMaxIdx;
+
+ UCPTrie *fTrie;
+
+private:
+ u_atomic_int32_t fRefCount;
+ UDataMemory *fUDataMem;
+ UnicodeString fRuleString;
+ UBool fDontFreeData;
+
+ RBBIDataWrapper(const RBBIDataWrapper &other) = delete; /* forbid copying of this class */
+ RBBIDataWrapper &operator=(const RBBIDataWrapper &other) = delete; /* forbid copying of this class */
+};
+
+
+
+U_NAMESPACE_END
+
+U_CFUNC UBool rbbi_cleanup(void);
+
+#endif /* C++ */
+
+#endif
diff --git a/thirdparty/icu4c/common/rbbinode.cpp b/thirdparty/icu4c/common/rbbinode.cpp
new file mode 100644
index 0000000000..69d84151fe
--- /dev/null
+++ b/thirdparty/icu4c/common/rbbinode.cpp
@@ -0,0 +1,372 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+***************************************************************************
+* Copyright (C) 2002-2016 International Business Machines Corporation *
+* and others. All rights reserved. *
+***************************************************************************
+*/
+
+//
+// File: rbbinode.cpp
+//
+// Implementation of class RBBINode, which represents a node in the
+// tree generated when parsing the Rules Based Break Iterator rules.
+//
+// This "Class" is actually closer to a struct.
+// Code using it is expected to directly access fields much of the time.
+//
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/unistr.h"
+#include "unicode/uniset.h"
+#include "unicode/uchar.h"
+#include "unicode/parsepos.h"
+
+#include "cstr.h"
+#include "uvector.h"
+
+#include "rbbirb.h"
+#include "rbbinode.h"
+
+#include "uassert.h"
+
+
+U_NAMESPACE_BEGIN
+
+#ifdef RBBI_DEBUG
+static int gLastSerial = 0;
+#endif
+
+
+//-------------------------------------------------------------------------
+//
+// Constructor. Just set the fields to reasonable default values.
+//
+//-------------------------------------------------------------------------
+RBBINode::RBBINode(NodeType t) : UMemory() {
+#ifdef RBBI_DEBUG
+ fSerialNum = ++gLastSerial;
+#endif
+ fType = t;
+ fParent = NULL;
+ fLeftChild = NULL;
+ fRightChild = NULL;
+ fInputSet = NULL;
+ fFirstPos = 0;
+ fLastPos = 0;
+ fNullable = FALSE;
+ fLookAheadEnd = FALSE;
+ fRuleRoot = FALSE;
+ fChainIn = FALSE;
+ fVal = 0;
+ fPrecedence = precZero;
+
+ UErrorCode status = U_ZERO_ERROR;
+ fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
+ fLastPosSet = new UVector(status);
+ fFollowPos = new UVector(status);
+ if (t==opCat) {fPrecedence = precOpCat;}
+ else if (t==opOr) {fPrecedence = precOpOr;}
+ else if (t==opStart) {fPrecedence = precStart;}
+ else if (t==opLParen) {fPrecedence = precLParen;}
+
+}
+
+
+RBBINode::RBBINode(const RBBINode &other) : UMemory(other) {
+#ifdef RBBI_DEBUG
+ fSerialNum = ++gLastSerial;
+#endif
+ fType = other.fType;
+ fParent = NULL;
+ fLeftChild = NULL;
+ fRightChild = NULL;
+ fInputSet = other.fInputSet;
+ fPrecedence = other.fPrecedence;
+ fText = other.fText;
+ fFirstPos = other.fFirstPos;
+ fLastPos = other.fLastPos;
+ fNullable = other.fNullable;
+ fVal = other.fVal;
+ fRuleRoot = FALSE;
+ fChainIn = other.fChainIn;
+ UErrorCode status = U_ZERO_ERROR;
+ fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
+ fLastPosSet = new UVector(status);
+ fFollowPos = new UVector(status);
+}
+
+
+//-------------------------------------------------------------------------
+//
+// Destructor. Deletes both this node AND any child nodes,
+// except in the case of variable reference nodes. For
+// these, the l. child points back to the definition, which
+// is common for all references to the variable, meaning
+// it can't be deleted here.
+//
+//-------------------------------------------------------------------------
+RBBINode::~RBBINode() {
+ // printf("deleting node %8x serial %4d\n", this, this->fSerialNum);
+ delete fInputSet;
+ fInputSet = NULL;
+
+ switch (this->fType) {
+ case varRef:
+ case setRef:
+ // for these node types, multiple instances point to the same "children"
+ // Storage ownership of children handled elsewhere. Don't delete here.
+ break;
+
+ default:
+ delete fLeftChild;
+ fLeftChild = NULL;
+ delete fRightChild;
+ fRightChild = NULL;
+ }
+
+
+ delete fFirstPosSet;
+ delete fLastPosSet;
+ delete fFollowPos;
+
+}
+
+
+//-------------------------------------------------------------------------
+//
+// cloneTree Make a copy of the subtree rooted at this node.
+// Discard any variable references encountered along the way,
+// and replace with copies of the variable's definitions.
+// Used to replicate the expression underneath variable
+// references in preparation for generating the DFA tables.
+//
+//-------------------------------------------------------------------------
+RBBINode *RBBINode::cloneTree() {
+ RBBINode *n;
+
+ if (fType == RBBINode::varRef) {
+ // If the current node is a variable reference, skip over it
+ // and clone the definition of the variable instead.
+ n = fLeftChild->cloneTree();
+ } else if (fType == RBBINode::uset) {
+ n = this;
+ } else {
+ n = new RBBINode(*this);
+ // Check for null pointer.
+ if (n != NULL) {
+ if (fLeftChild != NULL) {
+ n->fLeftChild = fLeftChild->cloneTree();
+ n->fLeftChild->fParent = n;
+ }
+ if (fRightChild != NULL) {
+ n->fRightChild = fRightChild->cloneTree();
+ n->fRightChild->fParent = n;
+ }
+ }
+ }
+ return n;
+}
+
+
+
+//-------------------------------------------------------------------------
+//
+// flattenVariables Walk a parse tree, replacing any variable
+// references with a copy of the variable's definition.
+// Aside from variables, the tree is not changed.
+//
+// Return the root of the tree. If the root was not a variable
+// reference, it remains unchanged - the root we started with
+// is the root we return. If, however, the root was a variable
+// reference, the root of the newly cloned replacement tree will
+// be returned, and the original tree deleted.
+//
+// This function works by recursively walking the tree
+// without doing anything until a variable reference is
+// found, then calling cloneTree() at that point. Any
+// nested references are handled by cloneTree(), not here.
+//
+//-------------------------------------------------------------------------
+RBBINode *RBBINode::flattenVariables() {
+ if (fType == varRef) {
+ RBBINode *retNode = fLeftChild->cloneTree();
+ if (retNode != NULL) {
+ retNode->fRuleRoot = this->fRuleRoot;
+ retNode->fChainIn = this->fChainIn;
+ }
+ delete this; // TODO: undefined behavior. Fix.
+ return retNode;
+ }
+
+ if (fLeftChild != NULL) {
+ fLeftChild = fLeftChild->flattenVariables();
+ fLeftChild->fParent = this;
+ }
+ if (fRightChild != NULL) {
+ fRightChild = fRightChild->flattenVariables();
+ fRightChild->fParent = this;
+ }
+ return this;
+}
+
+
+//-------------------------------------------------------------------------
+//
+// flattenSets Walk the parse tree, replacing any nodes of type setRef
+// with a copy of the expression tree for the set. A set's
+// equivalent expression tree is precomputed and saved as
+// the left child of the uset node.
+//
+//-------------------------------------------------------------------------
+void RBBINode::flattenSets() {
+ U_ASSERT(fType != setRef);
+
+ if (fLeftChild != NULL) {
+ if (fLeftChild->fType==setRef) {
+ RBBINode *setRefNode = fLeftChild;
+ RBBINode *usetNode = setRefNode->fLeftChild;
+ RBBINode *replTree = usetNode->fLeftChild;
+ fLeftChild = replTree->cloneTree();
+ fLeftChild->fParent = this;
+ delete setRefNode;
+ } else {
+ fLeftChild->flattenSets();
+ }
+ }
+
+ if (fRightChild != NULL) {
+ if (fRightChild->fType==setRef) {
+ RBBINode *setRefNode = fRightChild;
+ RBBINode *usetNode = setRefNode->fLeftChild;
+ RBBINode *replTree = usetNode->fLeftChild;
+ fRightChild = replTree->cloneTree();
+ fRightChild->fParent = this;
+ delete setRefNode;
+ } else {
+ fRightChild->flattenSets();
+ }
+ }
+}
+
+
+
+//-------------------------------------------------------------------------
+//
+// findNodes() Locate all the nodes of the specified type, starting
+// at the specified root.
+//
+//-------------------------------------------------------------------------
+void RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) {
+ /* test for buffer overflows */
+ if (U_FAILURE(status)) {
+ return;
+ }
+ if (fType == kind) {
+ dest->addElement(this, status);
+ }
+ if (fLeftChild != NULL) {
+ fLeftChild->findNodes(dest, kind, status);
+ }
+ if (fRightChild != NULL) {
+ fRightChild->findNodes(dest, kind, status);
+ }
+}
+
+
+//-------------------------------------------------------------------------
+//
+// print. Print out a single node, for debugging.
+//
+//-------------------------------------------------------------------------
+#ifdef RBBI_DEBUG
+
+static int32_t serial(const RBBINode *node) {
+ return (node == NULL? -1 : node->fSerialNum);
+}
+
+
+void RBBINode::printNode(const RBBINode *node) {
+ static const char * const nodeTypeNames[] = {
+ "setRef",
+ "uset",
+ "varRef",
+ "leafChar",
+ "lookAhead",
+ "tag",
+ "endMark",
+ "opStart",
+ "opCat",
+ "opOr",
+ "opStar",
+ "opPlus",
+ "opQuestion",
+ "opBreak",
+ "opReverse",
+ "opLParen"
+ };
+
+ if (node==NULL) {
+ RBBIDebugPrintf("%10p", (void *)node);
+ } else {
+ RBBIDebugPrintf("%10p %5d %12s %c%c %5d %5d %5d %6d %d ",
+ (void *)node, node->fSerialNum, nodeTypeNames[node->fType],
+ node->fRuleRoot?'R':' ', node->fChainIn?'C':' ',
+ serial(node->fLeftChild), serial(node->fRightChild), serial(node->fParent),
+ node->fFirstPos, node->fVal);
+ if (node->fType == varRef) {
+ RBBI_DEBUG_printUnicodeString(node->fText);
+ }
+ }
+ RBBIDebugPrintf("\n");
+}
+#endif
+
+
+#ifdef RBBI_DEBUG
+U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth) {
+ RBBIDebugPrintf("%*s", minWidth, CStr(s)());
+}
+#endif
+
+
+//-------------------------------------------------------------------------
+//
+// print. Print out the tree of nodes rooted at "this"
+//
+//-------------------------------------------------------------------------
+#ifdef RBBI_DEBUG
+void RBBINode::printNodeHeader() {
+ RBBIDebugPrintf(" Address serial type LeftChild RightChild Parent position value\n");
+}
+
+void RBBINode::printTree(const RBBINode *node, UBool printHeading) {
+ if (printHeading) {
+ printNodeHeader();
+ }
+ printNode(node);
+ if (node != NULL) {
+ // Only dump the definition under a variable reference if asked to.
+ // Unconditinally dump children of all other node types.
+ if (node->fType != varRef) {
+ if (node->fLeftChild != NULL) {
+ printTree(node->fLeftChild, FALSE);
+ }
+
+ if (node->fRightChild != NULL) {
+ printTree(node->fRightChild, FALSE);
+ }
+ }
+ }
+}
+#endif
+
+
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
diff --git a/thirdparty/icu4c/common/rbbinode.h b/thirdparty/icu4c/common/rbbinode.h
new file mode 100644
index 0000000000..cff3ba70c4
--- /dev/null
+++ b/thirdparty/icu4c/common/rbbinode.h
@@ -0,0 +1,127 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/********************************************************************
+ * COPYRIGHT:
+ * Copyright (c) 2001-2016, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ ********************************************************************/
+
+#ifndef RBBINODE_H
+#define RBBINODE_H
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+#include "unicode/uobject.h"
+
+//
+// class RBBINode
+//
+// Represents a node in the parse tree generated when reading
+// a rule file.
+//
+
+U_NAMESPACE_BEGIN
+
+class UnicodeSet;
+class UVector;
+
+class RBBINode : public UMemory {
+ public:
+ enum NodeType {
+ setRef,
+ uset,
+ varRef,
+ leafChar,
+ lookAhead,
+ tag,
+ endMark,
+ opStart,
+ opCat,
+ opOr,
+ opStar,
+ opPlus,
+ opQuestion,
+ opBreak,
+ opReverse,
+ opLParen
+ };
+
+ enum OpPrecedence {
+ precZero,
+ precStart,
+ precLParen,
+ precOpOr,
+ precOpCat
+ };
+
+ NodeType fType;
+ RBBINode *fParent;
+ RBBINode *fLeftChild;
+ RBBINode *fRightChild;
+ UnicodeSet *fInputSet; // For uset nodes only.
+ OpPrecedence fPrecedence; // For binary ops only.
+
+ UnicodeString fText; // Text corresponding to this node.
+ // May be lazily evaluated when (if) needed
+ // for some node types.
+ int fFirstPos; // Position in the rule source string of the
+ // first text associated with the node.
+ // If there's a left child, this will be the same
+ // as that child's left pos.
+ int fLastPos; // Last position in the rule source string
+ // of any text associated with this node.
+ // If there's a right child, this will be the same
+ // as that child's last postion.
+
+ UBool fNullable; // See Aho.
+ int32_t fVal; // For leafChar nodes, the value.
+ // Values are the character category,
+ // corresponds to columns in the final
+ // state transition table.
+
+ UBool fLookAheadEnd; // For endMark nodes, set true if
+ // marking the end of a look-ahead rule.
+
+ UBool fRuleRoot; // True if this node is the root of a rule.
+ UBool fChainIn; // True if chaining into this rule is allowed
+ // (no '^' present).
+
+ UVector *fFirstPosSet;
+ UVector *fLastPosSet; // TODO: rename fFirstPos & fLastPos to avoid confusion.
+ UVector *fFollowPos;
+
+
+ RBBINode(NodeType t);
+ RBBINode(const RBBINode &other);
+ ~RBBINode();
+
+ RBBINode *cloneTree();
+ RBBINode *flattenVariables();
+ void flattenSets();
+ void findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status);
+
+#ifdef RBBI_DEBUG
+ static void printNodeHeader();
+ static void printNode(const RBBINode *n);
+ static void printTree(const RBBINode *n, UBool withHeading);
+#endif
+
+ private:
+ RBBINode &operator = (const RBBINode &other); // No defs.
+ UBool operator == (const RBBINode &other); // Private, so these functions won't accidently be used.
+
+#ifdef RBBI_DEBUG
+ public:
+ int fSerialNum; // Debugging aids.
+#endif
+};
+
+#ifdef RBBI_DEBUG
+U_CFUNC void
+RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth=0);
+#endif
+
+U_NAMESPACE_END
+
+#endif
+
diff --git a/thirdparty/icu4c/common/rbbirb.cpp b/thirdparty/icu4c/common/rbbirb.cpp
new file mode 100644
index 0000000000..e5c250dfe4
--- /dev/null
+++ b/thirdparty/icu4c/common/rbbirb.cpp
@@ -0,0 +1,361 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+//
+// file: rbbirb.cpp
+//
+// Copyright (C) 2002-2011, International Business Machines Corporation and others.
+// All Rights Reserved.
+//
+// This file contains the RBBIRuleBuilder class implementation. This is the main class for
+// building (compiling) break rules into the tables required by the runtime
+// RBBI engine.
+//
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/brkiter.h"
+#include "unicode/rbbi.h"
+#include "unicode/ubrk.h"
+#include "unicode/unistr.h"
+#include "unicode/uniset.h"
+#include "unicode/uchar.h"
+#include "unicode/uchriter.h"
+#include "unicode/ustring.h"
+#include "unicode/parsepos.h"
+#include "unicode/parseerr.h"
+
+#include "cmemory.h"
+#include "cstring.h"
+#include "rbbirb.h"
+#include "rbbinode.h"
+#include "rbbiscan.h"
+#include "rbbisetb.h"
+#include "rbbitblb.h"
+#include "rbbidata.h"
+#include "uassert.h"
+
+
+U_NAMESPACE_BEGIN
+
+
+//----------------------------------------------------------------------------------------
+//
+// Constructor.
+//
+//----------------------------------------------------------------------------------------
+RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules,
+ UParseError *parseErr,
+ UErrorCode &status)
+ : fRules(rules), fStrippedRules(rules)
+{
+ fStatus = &status; // status is checked below
+ fParseError = parseErr;
+ fDebugEnv = NULL;
+#ifdef RBBI_DEBUG
+ fDebugEnv = getenv("U_RBBIDEBUG");
+#endif
+
+
+ fForwardTree = NULL;
+ fReverseTree = NULL;
+ fSafeFwdTree = NULL;
+ fSafeRevTree = NULL;
+ fDefaultTree = &fForwardTree;
+ fForwardTable = NULL;
+ fRuleStatusVals = NULL;
+ fChainRules = FALSE;
+ fLBCMNoChain = FALSE;
+ fLookAheadHardBreak = FALSE;
+ fUSetNodes = NULL;
+ fRuleStatusVals = NULL;
+ fScanner = NULL;
+ fSetBuilder = NULL;
+ if (parseErr) {
+ uprv_memset(parseErr, 0, sizeof(UParseError));
+ }
+
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ fUSetNodes = new UVector(status); // bcos status gets overwritten here
+ fRuleStatusVals = new UVector(status);
+ fScanner = new RBBIRuleScanner(this);
+ fSetBuilder = new RBBISetBuilder(this);
+ if (U_FAILURE(status)) {
+ return;
+ }
+ if(fSetBuilder == 0 || fScanner == 0 || fUSetNodes == 0 || fRuleStatusVals == 0) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+}
+
+
+
+//----------------------------------------------------------------------------------------
+//
+// Destructor
+//
+//----------------------------------------------------------------------------------------
+RBBIRuleBuilder::~RBBIRuleBuilder() {
+
+ int i;
+ for (i=0; ; i++) {
+ RBBINode *n = (RBBINode *)fUSetNodes->elementAt(i);
+ if (n==NULL) {
+ break;
+ }
+ delete n;
+ }
+
+ delete fUSetNodes;
+ delete fSetBuilder;
+ delete fForwardTable;
+ delete fForwardTree;
+ delete fReverseTree;
+ delete fSafeFwdTree;
+ delete fSafeRevTree;
+ delete fScanner;
+ delete fRuleStatusVals;
+}
+
+
+
+
+
+//----------------------------------------------------------------------------------------
+//
+// flattenData() - Collect up the compiled RBBI rule data and put it into
+// the format for saving in ICU data files,
+// which is also the format needed by the RBBI runtime engine.
+//
+//----------------------------------------------------------------------------------------
+static int32_t align8(int32_t i) {return (i+7) & 0xfffffff8;}
+
+RBBIDataHeader *RBBIRuleBuilder::flattenData() {
+ int32_t i;
+
+ if (U_FAILURE(*fStatus)) {
+ return NULL;
+ }
+
+ // Remove whitespace from the rules to make it smaller.
+ // The rule parser has already removed comments.
+ fStrippedRules = fScanner->stripRules(fStrippedRules);
+
+ // Calculate the size of each section in the data.
+ // Sizes here are padded up to a multiple of 8 for better memory alignment.
+ // Sections sizes actually stored in the header are for the actual data
+ // without the padding.
+ //
+ int32_t headerSize = align8(sizeof(RBBIDataHeader));
+ int32_t forwardTableSize = align8(fForwardTable->getTableSize());
+ int32_t reverseTableSize = align8(fForwardTable->getSafeTableSize());
+ int32_t trieSize = align8(fSetBuilder->getTrieSize());
+ int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t));
+
+ int32_t rulesLengthInUTF8 = 0;
+ u_strToUTF8WithSub(0, 0, &rulesLengthInUTF8,
+ fStrippedRules.getBuffer(), fStrippedRules.length(),
+ 0xfffd, nullptr, fStatus);
+ *fStatus = U_ZERO_ERROR;
+
+ int32_t rulesSize = align8((rulesLengthInUTF8+1));
+
+ int32_t totalSize = headerSize
+ + forwardTableSize
+ + reverseTableSize
+ + statusTableSize + trieSize + rulesSize;
+
+#ifdef RBBI_DEBUG
+ if (fDebugEnv && uprv_strstr(fDebugEnv, "size")) {
+ RBBIDebugPrintf("Header Size: %8d\n", headerSize);
+ RBBIDebugPrintf("Forward Table Size: %8d\n", forwardTableSize);
+ RBBIDebugPrintf("Reverse Table Size: %8d\n", reverseTableSize);
+ RBBIDebugPrintf("Trie Size: %8d\n", trieSize);
+ RBBIDebugPrintf("Status Table Size: %8d\n", statusTableSize);
+ RBBIDebugPrintf("Rules Size: %8d\n", rulesSize);
+ RBBIDebugPrintf("-----------------------------\n");
+ RBBIDebugPrintf("Total Size: %8d\n", totalSize);
+ }
+#endif
+
+ RBBIDataHeader *data = (RBBIDataHeader *)uprv_malloc(totalSize);
+ if (data == NULL) {
+ *fStatus = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ uprv_memset(data, 0, totalSize);
+
+
+ data->fMagic = 0xb1a0;
+ data->fFormatVersion[0] = RBBI_DATA_FORMAT_VERSION[0];
+ data->fFormatVersion[1] = RBBI_DATA_FORMAT_VERSION[1];
+ data->fFormatVersion[2] = RBBI_DATA_FORMAT_VERSION[2];
+ data->fFormatVersion[3] = RBBI_DATA_FORMAT_VERSION[3];
+ data->fLength = totalSize;
+ data->fCatCount = fSetBuilder->getNumCharCategories();
+
+ data->fFTable = headerSize;
+ data->fFTableLen = forwardTableSize;
+
+ data->fRTable = data->fFTable + data->fFTableLen;
+ data->fRTableLen = reverseTableSize;
+
+ data->fTrie = data->fRTable + data->fRTableLen;
+ data->fTrieLen = trieSize;
+ data->fStatusTable = data->fTrie + data->fTrieLen;
+ data->fStatusTableLen= statusTableSize;
+ data->fRuleSource = data->fStatusTable + statusTableSize;
+ data->fRuleSourceLen = rulesLengthInUTF8;
+
+ uprv_memset(data->fReserved, 0, sizeof(data->fReserved));
+
+ fForwardTable->exportTable((uint8_t *)data + data->fFTable);
+ fForwardTable->exportSafeTable((uint8_t *)data + data->fRTable);
+ fSetBuilder->serializeTrie ((uint8_t *)data + data->fTrie);
+
+ int32_t *ruleStatusTable = (int32_t *)((uint8_t *)data + data->fStatusTable);
+ for (i=0; i<fRuleStatusVals->size(); i++) {
+ ruleStatusTable[i] = fRuleStatusVals->elementAti(i);
+ }
+
+ u_strToUTF8WithSub((char *)data+data->fRuleSource, rulesSize, &rulesLengthInUTF8,
+ fStrippedRules.getBuffer(), fStrippedRules.length(),
+ 0xfffd, nullptr, fStatus);
+ if (U_FAILURE(*fStatus)) {
+ return NULL;
+ }
+
+ return data;
+}
+
+
+//----------------------------------------------------------------------------------------
+//
+// createRuleBasedBreakIterator construct from source rules that are passed in
+// in a UnicodeString
+//
+//----------------------------------------------------------------------------------------
+BreakIterator *
+RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
+ UParseError *parseError,
+ UErrorCode &status)
+{
+ //
+ // Read the input rules, generate a parse tree, symbol table,
+ // and list of all Unicode Sets referenced by the rules.
+ //
+ RBBIRuleBuilder builder(rules, parseError, status);
+ if (U_FAILURE(status)) { // status checked here bcos build below doesn't
+ return NULL;
+ }
+
+ RBBIDataHeader *data = builder.build(status);
+
+ if (U_FAILURE(status)) {
+ return nullptr;
+ }
+
+ //
+ // Create a break iterator from the compiled rules.
+ // (Identical to creation from stored pre-compiled rules)
+ //
+ // status is checked after init in construction.
+ RuleBasedBreakIterator *This = new RuleBasedBreakIterator(data, status);
+ if (U_FAILURE(status)) {
+ delete This;
+ This = NULL;
+ }
+ else if(This == NULL) { // test for NULL
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ return This;
+}
+
+RBBIDataHeader *RBBIRuleBuilder::build(UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return nullptr;
+ }
+
+ fScanner->parse();
+ if (U_FAILURE(status)) {
+ return nullptr;
+ }
+
+ //
+ // UnicodeSet processing.
+ // Munge the Unicode Sets to create an initial set of character categories.
+ //
+ fSetBuilder->buildRanges();
+
+ //
+ // Generate the DFA state transition table.
+ //
+ fForwardTable = new RBBITableBuilder(this, &fForwardTree, status);
+ if (fForwardTable == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return nullptr;
+ }
+
+ fForwardTable->buildForwardTable();
+
+ // State table and character category optimization.
+ // Merge equivalent rows and columns.
+ // Note that this process alters the initial set of character categories,
+ // causing the representation of UnicodeSets in the parse tree to become invalid.
+
+ optimizeTables();
+ fForwardTable->buildSafeReverseTable(status);
+
+
+#ifdef RBBI_DEBUG
+ if (fDebugEnv && uprv_strstr(fDebugEnv, "states")) {
+ fForwardTable->printStates();
+ fForwardTable->printRuleStatusTable();
+ fForwardTable->printReverseTable();
+ }
+#endif
+
+ // Generate the mapping tables (TRIE) from input code points to
+ // the character categories.
+ //
+ fSetBuilder->buildTrie();
+
+ //
+ // Package up the compiled data into a memory image
+ // in the run-time format.
+ //
+ RBBIDataHeader *data = flattenData(); // returns NULL if error
+ if (U_FAILURE(status)) {
+ return nullptr;
+ }
+ return data;
+}
+
+void RBBIRuleBuilder::optimizeTables() {
+ bool didSomething;
+ do {
+ didSomething = false;
+
+ // Begin looking for duplicates with char class 3.
+ // Classes 0, 1 and 2 are special; they are unused, {bof} and {eof} respectively,
+ // and should not have other categories merged into them.
+ IntPair duplPair = {3, 0};
+ while (fForwardTable->findDuplCharClassFrom(&duplPair)) {
+ fSetBuilder->mergeCategories(duplPair);
+ fForwardTable->removeColumn(duplPair.second);
+ didSomething = true;
+ }
+
+ while (fForwardTable->removeDuplicateStates() > 0) {
+ didSomething = true;
+ }
+ } while (didSomething);
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
diff --git a/thirdparty/icu4c/common/rbbirb.h b/thirdparty/icu4c/common/rbbirb.h
new file mode 100644
index 0000000000..037c1dc2ce
--- /dev/null
+++ b/thirdparty/icu4c/common/rbbirb.h
@@ -0,0 +1,237 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+//
+// rbbirb.h
+//
+// Copyright (C) 2002-2008, International Business Machines Corporation and others.
+// All Rights Reserved.
+//
+// This file contains declarations for several classes from the
+// Rule Based Break Iterator rule builder.
+//
+
+
+#ifndef RBBIRB_H
+#define RBBIRB_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include <utility>
+
+#include "unicode/uobject.h"
+#include "unicode/rbbi.h"
+#include "unicode/uniset.h"
+#include "unicode/parseerr.h"
+#include "uhash.h"
+#include "uvector.h"
+#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
+ // looks up references to $variables within a set.
+
+
+U_NAMESPACE_BEGIN
+
+class RBBIRuleScanner;
+struct RBBIRuleTableEl;
+class RBBISetBuilder;
+class RBBINode;
+class RBBITableBuilder;
+
+
+
+//--------------------------------------------------------------------------------
+//
+// RBBISymbolTable. Implements SymbolTable interface that is used by the
+// UnicodeSet parser to resolve references to $variables.
+//
+//--------------------------------------------------------------------------------
+class RBBISymbolTableEntry : public UMemory { // The symbol table hash table contains one
+public: // of these structs for each entry.
+ RBBISymbolTableEntry();
+ UnicodeString key;
+ RBBINode *val;
+ ~RBBISymbolTableEntry();
+
+private:
+ RBBISymbolTableEntry(const RBBISymbolTableEntry &other); // forbid copying of this class
+ RBBISymbolTableEntry &operator=(const RBBISymbolTableEntry &other); // forbid copying of this class
+};
+
+
+class RBBISymbolTable : public UMemory, public SymbolTable {
+private:
+ const UnicodeString &fRules;
+ UHashtable *fHashTable;
+ RBBIRuleScanner *fRuleScanner;
+
+ // These next two fields are part of the mechanism for passing references to
+ // already-constructed UnicodeSets back to the UnicodeSet constructor
+ // when the pattern includes $variable references.
+ const UnicodeString ffffString; // = "/uffff"
+ UnicodeSet *fCachedSetLookup;
+
+public:
+ // API inherited from class SymbolTable
+ virtual const UnicodeString* lookup(const UnicodeString& s) const;
+ virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const;
+ virtual UnicodeString parseReference(const UnicodeString& text,
+ ParsePosition& pos, int32_t limit) const;
+
+ // Additional Functions
+ RBBISymbolTable(RBBIRuleScanner *, const UnicodeString &fRules, UErrorCode &status);
+ virtual ~RBBISymbolTable();
+
+ virtual RBBINode *lookupNode(const UnicodeString &key) const;
+ virtual void addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err);
+
+#ifdef RBBI_DEBUG
+ virtual void rbbiSymtablePrint() const;
+#else
+ // A do-nothing inline function for non-debug builds. Member funcs can't be empty
+ // or the call sites won't compile.
+ int32_t fFakeField;
+ #define rbbiSymtablePrint() fFakeField=0;
+#endif
+
+private:
+ RBBISymbolTable(const RBBISymbolTable &other); // forbid copying of this class
+ RBBISymbolTable &operator=(const RBBISymbolTable &other); // forbid copying of this class
+};
+
+
+//--------------------------------------------------------------------------------
+//
+// class RBBIRuleBuilder The top-level class handling RBBI rule compiling.
+//
+//--------------------------------------------------------------------------------
+class RBBIRuleBuilder : public UMemory {
+public:
+
+ // Create a rule based break iterator from a set of rules.
+ // This function is the main entry point into the rule builder. The
+ // public ICU API for creating RBBIs uses this function to do the actual work.
+ //
+ static BreakIterator * createRuleBasedBreakIterator( const UnicodeString &rules,
+ UParseError *parseError,
+ UErrorCode &status);
+
+public:
+ // The "public" functions and data members that appear below are accessed
+ // (and shared) by the various parts that make up the rule builder. They
+ // are NOT intended to be accessed by anything outside of the
+ // rule builder implementation.
+ RBBIRuleBuilder(const UnicodeString &rules,
+ UParseError *parseErr,
+ UErrorCode &status
+ );
+
+ virtual ~RBBIRuleBuilder();
+
+ /**
+ * Build the state tables and char class Trie from the source rules.
+ */
+ RBBIDataHeader *build(UErrorCode &status);
+
+
+ /**
+ * Fold together redundant character classes (table columns) and
+ * redundant states (table rows). Done after initial table generation,
+ * before serializing the result.
+ */
+ void optimizeTables();
+
+ char *fDebugEnv; // controls debug trace output
+ UErrorCode *fStatus; // Error reporting. Keeping status
+ UParseError *fParseError; // here avoids passing it everywhere.
+ const UnicodeString &fRules; // The rule string that we are compiling
+ UnicodeString fStrippedRules; // The rule string, with comments stripped.
+
+ RBBIRuleScanner *fScanner; // The scanner.
+ RBBINode *fForwardTree; // The parse trees, generated by the scanner,
+ RBBINode *fReverseTree; // then manipulated by subsequent steps.
+ RBBINode *fSafeFwdTree;
+ RBBINode *fSafeRevTree;
+
+ RBBINode **fDefaultTree; // For rules not qualified with a !
+ // the tree to which they belong to.
+
+ UBool fChainRules; // True for chained Unicode TR style rules.
+ // False for traditional regexp rules.
+
+ UBool fLBCMNoChain; // True: suppress chaining of rules on
+ // chars with LineBreak property == CM.
+
+ UBool fLookAheadHardBreak; // True: Look ahead matches cause an
+ // immediate break, no continuing for the
+ // longest match.
+
+ RBBISetBuilder *fSetBuilder; // Set and Character Category builder.
+ UVector *fUSetNodes; // Vector of all uset nodes.
+
+ RBBITableBuilder *fForwardTable; // State transition table, build time form.
+
+ UVector *fRuleStatusVals; // The values that can be returned
+ // from getRuleStatus().
+
+ RBBIDataHeader *flattenData(); // Create the flattened (runtime format)
+ // data tables..
+private:
+ RBBIRuleBuilder(const RBBIRuleBuilder &other); // forbid copying of this class
+ RBBIRuleBuilder &operator=(const RBBIRuleBuilder &other); // forbid copying of this class
+};
+
+
+
+
+//----------------------------------------------------------------------------
+//
+// RBBISetTableEl is an entry in the hash table of UnicodeSets that have
+// been encountered. The val Node will be of nodetype uset
+// and contain pointers to the actual UnicodeSets.
+// The Key is the source string for initializing the set.
+//
+// The hash table is used to avoid creating duplicate
+// unnamed (not $var references) UnicodeSets.
+//
+// Memory Management:
+// The Hash Table owns these RBBISetTableEl structs and
+// the key strings. It does NOT own the val nodes.
+//
+//----------------------------------------------------------------------------
+struct RBBISetTableEl {
+ UnicodeString *key;
+ RBBINode *val;
+};
+
+/**
+ * A pair of ints, used to bundle pairs of states or pairs of character classes.
+ */
+typedef std::pair<int32_t, int32_t> IntPair;
+
+
+//----------------------------------------------------------------------------
+//
+// RBBIDebugPrintf Printf equivalent, for debugging output.
+// Conditional compilation of the implementation lets us
+// get rid of the stdio dependency in environments where it
+// is unavailable.
+//
+//----------------------------------------------------------------------------
+#ifdef RBBI_DEBUG
+#include <stdio.h>
+#define RBBIDebugPrintf printf
+#define RBBIDebugPuts puts
+#else
+#undef RBBIDebugPrintf
+#define RBBIDebugPuts(arg)
+#endif
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
+#endif
+
+
+
diff --git a/thirdparty/icu4c/common/rbbirpt.h b/thirdparty/icu4c/common/rbbirpt.h
new file mode 100644
index 0000000000..586953c90c
--- /dev/null
+++ b/thirdparty/icu4c/common/rbbirpt.h
@@ -0,0 +1,296 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+//---------------------------------------------------------------------------------
+//
+// Generated Header File. Do not edit by hand.
+// This file contains the state table for the ICU Rule Based Break Iterator
+// rule parser.
+// It is generated by the Perl script "rbbicst.pl" from
+// the rule parser state definitions file "rbbirpt.txt".
+//
+// Copyright (C) 2002-2016 International Business Machines Corporation
+// and others. All rights reserved.
+//
+//---------------------------------------------------------------------------------
+#ifndef RBBIRPT_H
+#define RBBIRPT_H
+
+#include "unicode/utypes.h"
+
+U_NAMESPACE_BEGIN
+//
+// Character classes for RBBI rule scanning.
+//
+ static const uint8_t kRuleSet_digit_char = 128;
+ static const uint8_t kRuleSet_name_char = 129;
+ static const uint8_t kRuleSet_name_start_char = 130;
+ static const uint8_t kRuleSet_rule_char = 131;
+ static const uint8_t kRuleSet_white_space = 132;
+
+
+enum RBBI_RuleParseAction {
+ doCheckVarDef,
+ doDotAny,
+ doEndAssign,
+ doEndOfRule,
+ doEndVariableName,
+ doExit,
+ doExprCatOperator,
+ doExprFinished,
+ doExprOrOperator,
+ doExprRParen,
+ doExprStart,
+ doLParen,
+ doNOP,
+ doNoChain,
+ doOptionEnd,
+ doOptionStart,
+ doReverseDir,
+ doRuleChar,
+ doRuleError,
+ doRuleErrorAssignExpr,
+ doScanUnicodeSet,
+ doSlash,
+ doStartAssign,
+ doStartTagValue,
+ doStartVariableName,
+ doTagDigit,
+ doTagExpectedError,
+ doTagValue,
+ doUnaryOpPlus,
+ doUnaryOpQuestion,
+ doUnaryOpStar,
+ doVariableNameExpectedErr,
+ rbbiLastAction};
+
+//-------------------------------------------------------------------------------
+//
+// RBBIRuleTableEl represents the structure of a row in the transition table
+// for the rule parser state machine.
+//-------------------------------------------------------------------------------
+struct RBBIRuleTableEl {
+ RBBI_RuleParseAction fAction;
+ uint8_t fCharClass; // 0-127: an individual ASCII character
+ // 128-255: character class index
+ uint8_t fNextState; // 0-250: normal next-stat numbers
+ // 255: pop next-state from stack.
+ uint8_t fPushState;
+ UBool fNextChar;
+};
+
+static const struct RBBIRuleTableEl gRuleParseStateTable[] = {
+ {doNOP, 0, 0, 0, TRUE}
+ , {doExprStart, 254, 29, 9, FALSE} // 1 start
+ , {doNOP, 132, 1,0, TRUE} // 2
+ , {doNoChain, 94 /* ^ */, 12, 9, TRUE} // 3
+ , {doExprStart, 36 /* $ */, 88, 98, FALSE} // 4
+ , {doNOP, 33 /* ! */, 19,0, TRUE} // 5
+ , {doNOP, 59 /* ; */, 1,0, TRUE} // 6
+ , {doNOP, 252, 0,0, FALSE} // 7
+ , {doExprStart, 255, 29, 9, FALSE} // 8
+ , {doEndOfRule, 59 /* ; */, 1,0, TRUE} // 9 break-rule-end
+ , {doNOP, 132, 9,0, TRUE} // 10
+ , {doRuleError, 255, 103,0, FALSE} // 11
+ , {doExprStart, 254, 29,0, FALSE} // 12 start-after-caret
+ , {doNOP, 132, 12,0, TRUE} // 13
+ , {doRuleError, 94 /* ^ */, 103,0, FALSE} // 14
+ , {doExprStart, 36 /* $ */, 88, 37, FALSE} // 15
+ , {doRuleError, 59 /* ; */, 103,0, FALSE} // 16
+ , {doRuleError, 252, 103,0, FALSE} // 17
+ , {doExprStart, 255, 29,0, FALSE} // 18
+ , {doNOP, 33 /* ! */, 21,0, TRUE} // 19 rev-option
+ , {doReverseDir, 255, 28, 9, FALSE} // 20
+ , {doOptionStart, 130, 23,0, TRUE} // 21 option-scan1
+ , {doRuleError, 255, 103,0, FALSE} // 22
+ , {doNOP, 129, 23,0, TRUE} // 23 option-scan2
+ , {doOptionEnd, 255, 25,0, FALSE} // 24
+ , {doNOP, 59 /* ; */, 1,0, TRUE} // 25 option-scan3
+ , {doNOP, 132, 25,0, TRUE} // 26
+ , {doRuleError, 255, 103,0, FALSE} // 27
+ , {doExprStart, 255, 29, 9, FALSE} // 28 reverse-rule
+ , {doRuleChar, 254, 38,0, TRUE} // 29 term
+ , {doNOP, 132, 29,0, TRUE} // 30
+ , {doRuleChar, 131, 38,0, TRUE} // 31
+ , {doNOP, 91 /* [ */, 94, 38, FALSE} // 32
+ , {doLParen, 40 /* ( */, 29, 38, TRUE} // 33
+ , {doNOP, 36 /* $ */, 88, 37, FALSE} // 34
+ , {doDotAny, 46 /* . */, 38,0, TRUE} // 35
+ , {doRuleError, 255, 103,0, FALSE} // 36
+ , {doCheckVarDef, 255, 38,0, FALSE} // 37 term-var-ref
+ , {doNOP, 132, 38,0, TRUE} // 38 expr-mod
+ , {doUnaryOpStar, 42 /* * */, 43,0, TRUE} // 39
+ , {doUnaryOpPlus, 43 /* + */, 43,0, TRUE} // 40
+ , {doUnaryOpQuestion, 63 /* ? */, 43,0, TRUE} // 41
+ , {doNOP, 255, 43,0, FALSE} // 42
+ , {doExprCatOperator, 254, 29,0, FALSE} // 43 expr-cont
+ , {doNOP, 132, 43,0, TRUE} // 44
+ , {doExprCatOperator, 131, 29,0, FALSE} // 45
+ , {doExprCatOperator, 91 /* [ */, 29,0, FALSE} // 46
+ , {doExprCatOperator, 40 /* ( */, 29,0, FALSE} // 47
+ , {doExprCatOperator, 36 /* $ */, 29,0, FALSE} // 48
+ , {doExprCatOperator, 46 /* . */, 29,0, FALSE} // 49
+ , {doExprCatOperator, 47 /* / */, 55,0, FALSE} // 50
+ , {doExprCatOperator, 123 /* { */, 67,0, TRUE} // 51
+ , {doExprOrOperator, 124 /* | */, 29,0, TRUE} // 52
+ , {doExprRParen, 41 /* ) */, 255,0, TRUE} // 53
+ , {doExprFinished, 255, 255,0, FALSE} // 54
+ , {doSlash, 47 /* / */, 57,0, TRUE} // 55 look-ahead
+ , {doNOP, 255, 103,0, FALSE} // 56
+ , {doExprCatOperator, 254, 29,0, FALSE} // 57 expr-cont-no-slash
+ , {doNOP, 132, 43,0, TRUE} // 58
+ , {doExprCatOperator, 131, 29,0, FALSE} // 59
+ , {doExprCatOperator, 91 /* [ */, 29,0, FALSE} // 60
+ , {doExprCatOperator, 40 /* ( */, 29,0, FALSE} // 61
+ , {doExprCatOperator, 36 /* $ */, 29,0, FALSE} // 62
+ , {doExprCatOperator, 46 /* . */, 29,0, FALSE} // 63
+ , {doExprOrOperator, 124 /* | */, 29,0, TRUE} // 64
+ , {doExprRParen, 41 /* ) */, 255,0, TRUE} // 65
+ , {doExprFinished, 255, 255,0, FALSE} // 66
+ , {doNOP, 132, 67,0, TRUE} // 67 tag-open
+ , {doStartTagValue, 128, 70,0, FALSE} // 68
+ , {doTagExpectedError, 255, 103,0, FALSE} // 69
+ , {doNOP, 132, 74,0, TRUE} // 70 tag-value
+ , {doNOP, 125 /* } */, 74,0, FALSE} // 71
+ , {doTagDigit, 128, 70,0, TRUE} // 72
+ , {doTagExpectedError, 255, 103,0, FALSE} // 73
+ , {doNOP, 132, 74,0, TRUE} // 74 tag-close
+ , {doTagValue, 125 /* } */, 77,0, TRUE} // 75
+ , {doTagExpectedError, 255, 103,0, FALSE} // 76
+ , {doExprCatOperator, 254, 29,0, FALSE} // 77 expr-cont-no-tag
+ , {doNOP, 132, 77,0, TRUE} // 78
+ , {doExprCatOperator, 131, 29,0, FALSE} // 79
+ , {doExprCatOperator, 91 /* [ */, 29,0, FALSE} // 80
+ , {doExprCatOperator, 40 /* ( */, 29,0, FALSE} // 81
+ , {doExprCatOperator, 36 /* $ */, 29,0, FALSE} // 82
+ , {doExprCatOperator, 46 /* . */, 29,0, FALSE} // 83
+ , {doExprCatOperator, 47 /* / */, 55,0, FALSE} // 84
+ , {doExprOrOperator, 124 /* | */, 29,0, TRUE} // 85
+ , {doExprRParen, 41 /* ) */, 255,0, TRUE} // 86
+ , {doExprFinished, 255, 255,0, FALSE} // 87
+ , {doStartVariableName, 36 /* $ */, 90,0, TRUE} // 88 scan-var-name
+ , {doNOP, 255, 103,0, FALSE} // 89
+ , {doNOP, 130, 92,0, TRUE} // 90 scan-var-start
+ , {doVariableNameExpectedErr, 255, 103,0, FALSE} // 91
+ , {doNOP, 129, 92,0, TRUE} // 92 scan-var-body
+ , {doEndVariableName, 255, 255,0, FALSE} // 93
+ , {doScanUnicodeSet, 91 /* [ */, 255,0, TRUE} // 94 scan-unicode-set
+ , {doScanUnicodeSet, 112 /* p */, 255,0, TRUE} // 95
+ , {doScanUnicodeSet, 80 /* P */, 255,0, TRUE} // 96
+ , {doNOP, 255, 103,0, FALSE} // 97
+ , {doNOP, 132, 98,0, TRUE} // 98 assign-or-rule
+ , {doStartAssign, 61 /* = */, 29, 101, TRUE} // 99
+ , {doNOP, 255, 37, 9, FALSE} // 100
+ , {doEndAssign, 59 /* ; */, 1,0, TRUE} // 101 assign-end
+ , {doRuleErrorAssignExpr, 255, 103,0, FALSE} // 102
+ , {doExit, 255, 103,0, TRUE} // 103 errorDeath
+ };
+#ifdef RBBI_DEBUG
+static const char * const RBBIRuleStateNames[] = { 0,
+ "start",
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ "break-rule-end",
+ 0,
+ 0,
+ "start-after-caret",
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ "rev-option",
+ 0,
+ "option-scan1",
+ 0,
+ "option-scan2",
+ 0,
+ "option-scan3",
+ 0,
+ 0,
+ "reverse-rule",
+ "term",
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ "term-var-ref",
+ "expr-mod",
+ 0,
+ 0,
+ 0,
+ 0,
+ "expr-cont",
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ "look-ahead",
+ 0,
+ "expr-cont-no-slash",
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ "tag-open",
+ 0,
+ 0,
+ "tag-value",
+ 0,
+ 0,
+ 0,
+ "tag-close",
+ 0,
+ 0,
+ "expr-cont-no-tag",
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ "scan-var-name",
+ 0,
+ "scan-var-start",
+ 0,
+ "scan-var-body",
+ 0,
+ "scan-unicode-set",
+ 0,
+ 0,
+ 0,
+ "assign-or-rule",
+ 0,
+ 0,
+ "assign-end",
+ 0,
+ "errorDeath",
+ 0};
+#endif
+
+U_NAMESPACE_END
+#endif
diff --git a/thirdparty/icu4c/common/rbbiscan.cpp b/thirdparty/icu4c/common/rbbiscan.cpp
new file mode 100644
index 0000000000..9c406af671
--- /dev/null
+++ b/thirdparty/icu4c/common/rbbiscan.cpp
@@ -0,0 +1,1281 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+//
+// file: rbbiscan.cpp
+//
+// Copyright (C) 2002-2016, International Business Machines Corporation and others.
+// All Rights Reserved.
+//
+// This file contains the Rule Based Break Iterator Rule Builder functions for
+// scanning the rules and assembling a parse tree. This is the first phase
+// of compiling the rules.
+//
+// The overall of the rules is managed by class RBBIRuleBuilder, which will
+// create and use an instance of this class as part of the process.
+//
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/unistr.h"
+#include "unicode/uniset.h"
+#include "unicode/uchar.h"
+#include "unicode/uchriter.h"
+#include "unicode/parsepos.h"
+#include "unicode/parseerr.h"
+#include "cmemory.h"
+#include "cstring.h"
+
+#include "rbbirpt.h" // Contains state table for the rbbi rules parser.
+ // generated by a Perl script.
+#include "rbbirb.h"
+#include "rbbinode.h"
+#include "rbbiscan.h"
+#include "rbbitblb.h"
+
+#include "uassert.h"
+
+//------------------------------------------------------------------------------
+//
+// Unicode Set init strings for each of the character classes needed for parsing a rule file.
+// (Initialized with hex values for portability to EBCDIC based machines.
+// Really ugly, but there's no good way to avoid it.)
+//
+// The sets are referred to by name in the rbbirpt.txt, which is the
+// source form of the state transition table for the RBBI rule parser.
+//
+//------------------------------------------------------------------------------
+static const UChar gRuleSet_rule_char_pattern[] = {
+ // Characters that may appear as literals in patterns without escaping or quoting.
+ // [ ^ [ \ p { Z } \ u 0 0 2 0
+ 0x5b, 0x5e, 0x5b, 0x5c, 0x70, 0x7b, 0x5a, 0x7d, 0x5c, 0x75, 0x30, 0x30, 0x32, 0x30,
+ // - \ u 0 0 7 f ] - [ \ p
+ 0x2d, 0x5c, 0x75, 0x30, 0x30, 0x37, 0x66, 0x5d, 0x2d, 0x5b, 0x5c, 0x70,
+ // { L } ] - [ \ p { N } ] ]
+ 0x7b, 0x4c, 0x7d, 0x5d, 0x2d, 0x5b, 0x5c, 0x70, 0x7b, 0x4e, 0x7d, 0x5d, 0x5d, 0};
+
+static const UChar gRuleSet_name_char_pattern[] = {
+// [ _ \ p { L } \ p { N } ]
+ 0x5b, 0x5f, 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x5c, 0x70, 0x7b, 0x4e, 0x7d, 0x5d, 0};
+
+static const UChar gRuleSet_digit_char_pattern[] = {
+// [ 0 - 9 ]
+ 0x5b, 0x30, 0x2d, 0x39, 0x5d, 0};
+
+static const UChar gRuleSet_name_start_char_pattern[] = {
+// [ _ \ p { L } ]
+ 0x5b, 0x5f, 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x5d, 0 };
+
+static const UChar kAny[] = {0x61, 0x6e, 0x79, 0x00}; // "any"
+
+
+U_CDECL_BEGIN
+static void U_CALLCONV RBBISetTable_deleter(void *p) {
+ icu::RBBISetTableEl *px = (icu::RBBISetTableEl *)p;
+ delete px->key;
+ // Note: px->val is owned by the linked list "fSetsListHead" in scanner.
+ // Don't delete the value nodes here.
+ uprv_free(px);
+}
+U_CDECL_END
+
+U_NAMESPACE_BEGIN
+
+//------------------------------------------------------------------------------
+//
+// Constructor.
+//
+//------------------------------------------------------------------------------
+RBBIRuleScanner::RBBIRuleScanner(RBBIRuleBuilder *rb)
+{
+ fRB = rb;
+ fScanIndex = 0;
+ fNextIndex = 0;
+ fQuoteMode = FALSE;
+ fLineNum = 1;
+ fCharNum = 0;
+ fLastChar = 0;
+
+ fStateTable = NULL;
+ fStack[0] = 0;
+ fStackPtr = 0;
+ fNodeStack[0] = NULL;
+ fNodeStackPtr = 0;
+
+ fReverseRule = FALSE;
+ fLookAheadRule = FALSE;
+ fNoChainInRule = FALSE;
+
+ fSymbolTable = NULL;
+ fSetTable = NULL;
+ fRuleNum = 0;
+ fOptionStart = 0;
+
+ // Do not check status until after all critical fields are sufficiently initialized
+ // that the destructor can run cleanly.
+ if (U_FAILURE(*rb->fStatus)) {
+ return;
+ }
+
+ //
+ // Set up the constant Unicode Sets.
+ // Note: These could be made static, lazily initialized, and shared among
+ // all instances of RBBIRuleScanners. BUT this is quite a bit simpler,
+ // and the time to build these few sets should be small compared to a
+ // full break iterator build.
+ fRuleSets[kRuleSet_rule_char-128]
+ = UnicodeSet(UnicodeString(gRuleSet_rule_char_pattern), *rb->fStatus);
+ // fRuleSets[kRuleSet_white_space-128] = [:Pattern_White_Space:]
+ fRuleSets[kRuleSet_white_space-128].
+ add(9, 0xd).add(0x20).add(0x85).add(0x200e, 0x200f).add(0x2028, 0x2029);
+ fRuleSets[kRuleSet_name_char-128]
+ = UnicodeSet(UnicodeString(gRuleSet_name_char_pattern), *rb->fStatus);
+ fRuleSets[kRuleSet_name_start_char-128]
+ = UnicodeSet(UnicodeString(gRuleSet_name_start_char_pattern), *rb->fStatus);
+ fRuleSets[kRuleSet_digit_char-128]
+ = UnicodeSet(UnicodeString(gRuleSet_digit_char_pattern), *rb->fStatus);
+ if (*rb->fStatus == U_ILLEGAL_ARGUMENT_ERROR) {
+ // This case happens if ICU's data is missing. UnicodeSet tries to look up property
+ // names from the init string, can't find them, and claims an illegal argument.
+ // Change the error so that the actual problem will be clearer to users.
+ *rb->fStatus = U_BRK_INIT_ERROR;
+ }
+ if (U_FAILURE(*rb->fStatus)) {
+ return;
+ }
+
+ fSymbolTable = new RBBISymbolTable(this, rb->fRules, *rb->fStatus);
+ if (fSymbolTable == NULL) {
+ *rb->fStatus = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ fSetTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, rb->fStatus);
+ if (U_FAILURE(*rb->fStatus)) {
+ return;
+ }
+ uhash_setValueDeleter(fSetTable, RBBISetTable_deleter);
+}
+
+
+
+//------------------------------------------------------------------------------
+//
+// Destructor
+//
+//------------------------------------------------------------------------------
+RBBIRuleScanner::~RBBIRuleScanner() {
+ delete fSymbolTable;
+ if (fSetTable != NULL) {
+ uhash_close(fSetTable);
+ fSetTable = NULL;
+
+ }
+
+
+ // Node Stack.
+ // Normally has one entry, which is the entire parse tree for the rules.
+ // If errors occured, there may be additional subtrees left on the stack.
+ while (fNodeStackPtr > 0) {
+ delete fNodeStack[fNodeStackPtr];
+ fNodeStackPtr--;
+ }
+
+}
+
+//------------------------------------------------------------------------------
+//
+// doParseAction Do some action during rule parsing.
+// Called by the parse state machine.
+// Actions build the parse tree and Unicode Sets,
+// and maintain the parse stack for nested expressions.
+//
+// TODO: unify EParseAction and RBBI_RuleParseAction enum types.
+// They represent exactly the same thing. They're separate
+// only to work around enum forward declaration restrictions
+// in some compilers, while at the same time avoiding multiple
+// definitions problems. I'm sure that there's a better way.
+//
+//------------------------------------------------------------------------------
+UBool RBBIRuleScanner::doParseActions(int32_t action)
+{
+ RBBINode *n = NULL;
+
+ UBool returnVal = TRUE;
+
+ switch (action) {
+
+ case doExprStart:
+ pushNewNode(RBBINode::opStart);
+ fRuleNum++;
+ break;
+
+
+ case doNoChain:
+ // Scanned a '^' while on the rule start state.
+ fNoChainInRule = TRUE;
+ break;
+
+
+ case doExprOrOperator:
+ {
+ fixOpStack(RBBINode::precOpCat);
+ RBBINode *operandNode = fNodeStack[fNodeStackPtr--];
+ RBBINode *orNode = pushNewNode(RBBINode::opOr);
+ if (U_FAILURE(*fRB->fStatus)) {
+ break;
+ }
+ orNode->fLeftChild = operandNode;
+ operandNode->fParent = orNode;
+ }
+ break;
+
+ case doExprCatOperator:
+ // concatenation operator.
+ // For the implicit concatenation of adjacent terms in an expression that are
+ // not separated by any other operator. Action is invoked between the
+ // actions for the two terms.
+ {
+ fixOpStack(RBBINode::precOpCat);
+ RBBINode *operandNode = fNodeStack[fNodeStackPtr--];
+ RBBINode *catNode = pushNewNode(RBBINode::opCat);
+ if (U_FAILURE(*fRB->fStatus)) {
+ break;
+ }
+ catNode->fLeftChild = operandNode;
+ operandNode->fParent = catNode;
+ }
+ break;
+
+ case doLParen:
+ // Open Paren.
+ // The openParen node is a dummy operation type with a low precedence,
+ // which has the affect of ensuring that any real binary op that
+ // follows within the parens binds more tightly to the operands than
+ // stuff outside of the parens.
+ pushNewNode(RBBINode::opLParen);
+ break;
+
+ case doExprRParen:
+ fixOpStack(RBBINode::precLParen);
+ break;
+
+ case doNOP:
+ break;
+
+ case doStartAssign:
+ // We've just scanned "$variable = "
+ // The top of the node stack has the $variable ref node.
+
+ // Save the start position of the RHS text in the StartExpression node
+ // that precedes the $variableReference node on the stack.
+ // This will eventually be used when saving the full $variable replacement
+ // text as a string.
+ n = fNodeStack[fNodeStackPtr-1];
+ n->fFirstPos = fNextIndex; // move past the '='
+
+ // Push a new start-of-expression node; needed to keep parse of the
+ // RHS expression happy.
+ pushNewNode(RBBINode::opStart);
+ break;
+
+
+
+
+ case doEndAssign:
+ {
+ // We have reached the end of an assignement statement.
+ // Current scan char is the ';' that terminates the assignment.
+
+ // Terminate expression, leaves expression parse tree rooted in TOS node.
+ fixOpStack(RBBINode::precStart);
+
+ RBBINode *startExprNode = fNodeStack[fNodeStackPtr-2];
+ RBBINode *varRefNode = fNodeStack[fNodeStackPtr-1];
+ RBBINode *RHSExprNode = fNodeStack[fNodeStackPtr];
+
+ // Save original text of right side of assignment, excluding the terminating ';'
+ // in the root of the node for the right-hand-side expression.
+ RHSExprNode->fFirstPos = startExprNode->fFirstPos;
+ RHSExprNode->fLastPos = fScanIndex;
+ fRB->fRules.extractBetween(RHSExprNode->fFirstPos, RHSExprNode->fLastPos, RHSExprNode->fText);
+
+ // Expression parse tree becomes l. child of the $variable reference node.
+ varRefNode->fLeftChild = RHSExprNode;
+ RHSExprNode->fParent = varRefNode;
+
+ // Make a symbol table entry for the $variableRef node.
+ fSymbolTable->addEntry(varRefNode->fText, varRefNode, *fRB->fStatus);
+ if (U_FAILURE(*fRB->fStatus)) {
+ // This is a round-about way to get the parse position set
+ // so that duplicate symbols error messages include a line number.
+ UErrorCode t = *fRB->fStatus;
+ *fRB->fStatus = U_ZERO_ERROR;
+ error(t);
+ }
+
+ // Clean up the stack.
+ delete startExprNode;
+ fNodeStackPtr-=3;
+ break;
+ }
+
+ case doEndOfRule:
+ {
+ fixOpStack(RBBINode::precStart); // Terminate expression, leaves expression
+ if (U_FAILURE(*fRB->fStatus)) { // parse tree rooted in TOS node.
+ break;
+ }
+#ifdef RBBI_DEBUG
+ if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rtree")) {printNodeStack("end of rule");}
+#endif
+ U_ASSERT(fNodeStackPtr == 1);
+ RBBINode *thisRule = fNodeStack[fNodeStackPtr];
+
+ // If this rule includes a look-ahead '/', add a endMark node to the
+ // expression tree.
+ if (fLookAheadRule) {
+ RBBINode *endNode = pushNewNode(RBBINode::endMark);
+ RBBINode *catNode = pushNewNode(RBBINode::opCat);
+ if (U_FAILURE(*fRB->fStatus)) {
+ break;
+ }
+ fNodeStackPtr -= 2;
+ catNode->fLeftChild = thisRule;
+ catNode->fRightChild = endNode;
+ fNodeStack[fNodeStackPtr] = catNode;
+ endNode->fVal = fRuleNum;
+ endNode->fLookAheadEnd = TRUE;
+ thisRule = catNode;
+
+ // TODO: Disable chaining out of look-ahead (hard break) rules.
+ // The break on rule match is forced, so there is no point in building up
+ // the state table to chain into another rule for a longer match.
+ }
+
+ // Mark this node as being the root of a rule.
+ thisRule->fRuleRoot = TRUE;
+
+ // Flag if chaining into this rule is wanted.
+ //
+ if (fRB->fChainRules && // If rule chaining is enabled globally via !!chain
+ !fNoChainInRule) { // and no '^' chain-in inhibit was on this rule
+ thisRule->fChainIn = TRUE;
+ }
+
+
+ // All rule expressions are ORed together.
+ // The ';' that terminates an expression really just functions as a '|' with
+ // a low operator prededence.
+ //
+ // Each of the four sets of rules are collected separately.
+ // (forward, reverse, safe_forward, safe_reverse)
+ // OR this rule into the appropriate group of them.
+ //
+ RBBINode **destRules = (fReverseRule? &fRB->fSafeRevTree : fRB->fDefaultTree);
+
+ if (*destRules != NULL) {
+ // This is not the first rule encounted.
+ // OR previous stuff (from *destRules)
+ // with the current rule expression (on the Node Stack)
+ // with the resulting OR expression going to *destRules
+ //
+ thisRule = fNodeStack[fNodeStackPtr];
+ RBBINode *prevRules = *destRules;
+ RBBINode *orNode = pushNewNode(RBBINode::opOr);
+ if (U_FAILURE(*fRB->fStatus)) {
+ break;
+ }
+ orNode->fLeftChild = prevRules;
+ prevRules->fParent = orNode;
+ orNode->fRightChild = thisRule;
+ thisRule->fParent = orNode;
+ *destRules = orNode;
+ }
+ else
+ {
+ // This is the first rule encountered (for this direction).
+ // Just move its parse tree from the stack to *destRules.
+ *destRules = fNodeStack[fNodeStackPtr];
+ }
+ fReverseRule = FALSE; // in preparation for the next rule.
+ fLookAheadRule = FALSE;
+ fNoChainInRule = FALSE;
+ fNodeStackPtr = 0;
+ }
+ break;
+
+
+ case doRuleError:
+ error(U_BRK_RULE_SYNTAX);
+ returnVal = FALSE;
+ break;
+
+
+ case doVariableNameExpectedErr:
+ error(U_BRK_RULE_SYNTAX);
+ break;
+
+
+ //
+ // Unary operands + ? *
+ // These all appear after the operand to which they apply.
+ // When we hit one, the operand (may be a whole sub expression)
+ // will be on the top of the stack.
+ // Unary Operator becomes TOS, with the old TOS as its one child.
+ case doUnaryOpPlus:
+ {
+ RBBINode *operandNode = fNodeStack[fNodeStackPtr--];
+ RBBINode *plusNode = pushNewNode(RBBINode::opPlus);
+ if (U_FAILURE(*fRB->fStatus)) {
+ break;
+ }
+ plusNode->fLeftChild = operandNode;
+ operandNode->fParent = plusNode;
+ }
+ break;
+
+ case doUnaryOpQuestion:
+ {
+ RBBINode *operandNode = fNodeStack[fNodeStackPtr--];
+ RBBINode *qNode = pushNewNode(RBBINode::opQuestion);
+ if (U_FAILURE(*fRB->fStatus)) {
+ break;
+ }
+ qNode->fLeftChild = operandNode;
+ operandNode->fParent = qNode;
+ }
+ break;
+
+ case doUnaryOpStar:
+ {
+ RBBINode *operandNode = fNodeStack[fNodeStackPtr--];
+ RBBINode *starNode = pushNewNode(RBBINode::opStar);
+ if (U_FAILURE(*fRB->fStatus)) {
+ break;
+ }
+ starNode->fLeftChild = operandNode;
+ operandNode->fParent = starNode;
+ }
+ break;
+
+ case doRuleChar:
+ // A "Rule Character" is any single character that is a literal part
+ // of the regular expression. Like a, b and c in the expression "(abc*) | [:L:]"
+ // These are pretty uncommon in break rules; the terms are more commonly
+ // sets. To keep things uniform, treat these characters like as
+ // sets that just happen to contain only one character.
+ {
+ n = pushNewNode(RBBINode::setRef);
+ if (U_FAILURE(*fRB->fStatus)) {
+ break;
+ }
+ findSetFor(UnicodeString(fC.fChar), n);
+ n->fFirstPos = fScanIndex;
+ n->fLastPos = fNextIndex;
+ fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
+ break;
+ }
+
+ case doDotAny:
+ // scanned a ".", meaning match any single character.
+ {
+ n = pushNewNode(RBBINode::setRef);
+ if (U_FAILURE(*fRB->fStatus)) {
+ break;
+ }
+ findSetFor(UnicodeString(TRUE, kAny, 3), n);
+ n->fFirstPos = fScanIndex;
+ n->fLastPos = fNextIndex;
+ fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
+ break;
+ }
+
+ case doSlash:
+ // Scanned a '/', which identifies a look-ahead break position in a rule.
+ n = pushNewNode(RBBINode::lookAhead);
+ if (U_FAILURE(*fRB->fStatus)) {
+ break;
+ }
+ n->fVal = fRuleNum;
+ n->fFirstPos = fScanIndex;
+ n->fLastPos = fNextIndex;
+ fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
+ fLookAheadRule = TRUE;
+ break;
+
+
+ case doStartTagValue:
+ // Scanned a '{', the opening delimiter for a tag value within a rule.
+ n = pushNewNode(RBBINode::tag);
+ if (U_FAILURE(*fRB->fStatus)) {
+ break;
+ }
+ n->fVal = 0;
+ n->fFirstPos = fScanIndex;
+ n->fLastPos = fNextIndex;
+ break;
+
+ case doTagDigit:
+ // Just scanned a decimal digit that's part of a tag value
+ {
+ n = fNodeStack[fNodeStackPtr];
+ uint32_t v = u_charDigitValue(fC.fChar);
+ U_ASSERT(v < 10);
+ n->fVal = n->fVal*10 + v;
+ break;
+ }
+
+ case doTagValue:
+ n = fNodeStack[fNodeStackPtr];
+ n->fLastPos = fNextIndex;
+ fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
+ break;
+
+ case doTagExpectedError:
+ error(U_BRK_MALFORMED_RULE_TAG);
+ returnVal = FALSE;
+ break;
+
+ case doOptionStart:
+ // Scanning a !!option. At the start of string.
+ fOptionStart = fScanIndex;
+ break;
+
+ case doOptionEnd:
+ {
+ UnicodeString opt(fRB->fRules, fOptionStart, fScanIndex-fOptionStart);
+ if (opt == UNICODE_STRING("chain", 5)) {
+ fRB->fChainRules = TRUE;
+ } else if (opt == UNICODE_STRING("LBCMNoChain", 11)) {
+ fRB->fLBCMNoChain = TRUE;
+ } else if (opt == UNICODE_STRING("forward", 7)) {
+ fRB->fDefaultTree = &fRB->fForwardTree;
+ } else if (opt == UNICODE_STRING("reverse", 7)) {
+ fRB->fDefaultTree = &fRB->fReverseTree;
+ } else if (opt == UNICODE_STRING("safe_forward", 12)) {
+ fRB->fDefaultTree = &fRB->fSafeFwdTree;
+ } else if (opt == UNICODE_STRING("safe_reverse", 12)) {
+ fRB->fDefaultTree = &fRB->fSafeRevTree;
+ } else if (opt == UNICODE_STRING("lookAheadHardBreak", 18)) {
+ fRB->fLookAheadHardBreak = TRUE;
+ } else if (opt == UNICODE_STRING("quoted_literals_only", 20)) {
+ fRuleSets[kRuleSet_rule_char-128].clear();
+ } else if (opt == UNICODE_STRING("unquoted_literals", 17)) {
+ fRuleSets[kRuleSet_rule_char-128].applyPattern(UnicodeString(gRuleSet_rule_char_pattern), *fRB->fStatus);
+ } else {
+ error(U_BRK_UNRECOGNIZED_OPTION);
+ }
+ }
+ break;
+
+ case doReverseDir:
+ fReverseRule = TRUE;
+ break;
+
+ case doStartVariableName:
+ n = pushNewNode(RBBINode::varRef);
+ if (U_FAILURE(*fRB->fStatus)) {
+ break;
+ }
+ n->fFirstPos = fScanIndex;
+ break;
+
+ case doEndVariableName:
+ n = fNodeStack[fNodeStackPtr];
+ if (n==NULL || n->fType != RBBINode::varRef) {
+ error(U_BRK_INTERNAL_ERROR);
+ break;
+ }
+ n->fLastPos = fScanIndex;
+ fRB->fRules.extractBetween(n->fFirstPos+1, n->fLastPos, n->fText);
+ // Look the newly scanned name up in the symbol table
+ // If there's an entry, set the l. child of the var ref to the replacement expression.
+ // (We also pass through here when scanning assignments, but no harm is done, other
+ // than a slight wasted effort that seems hard to avoid. Lookup will be null)
+ n->fLeftChild = fSymbolTable->lookupNode(n->fText);
+ break;
+
+ case doCheckVarDef:
+ n = fNodeStack[fNodeStackPtr];
+ if (n->fLeftChild == NULL) {
+ error(U_BRK_UNDEFINED_VARIABLE);
+ returnVal = FALSE;
+ }
+ break;
+
+ case doExprFinished:
+ break;
+
+ case doRuleErrorAssignExpr:
+ error(U_BRK_ASSIGN_ERROR);
+ returnVal = FALSE;
+ break;
+
+ case doExit:
+ returnVal = FALSE;
+ break;
+
+ case doScanUnicodeSet:
+ scanSet();
+ break;
+
+ default:
+ error(U_BRK_INTERNAL_ERROR);
+ returnVal = FALSE;
+ break;
+ }
+ return returnVal && U_SUCCESS(*fRB->fStatus);
+}
+
+
+
+
+//------------------------------------------------------------------------------
+//
+// Error Report a rule parse error.
+// Only report it if no previous error has been recorded.
+//
+//------------------------------------------------------------------------------
+void RBBIRuleScanner::error(UErrorCode e) {
+ if (U_SUCCESS(*fRB->fStatus)) {
+ *fRB->fStatus = e;
+ if (fRB->fParseError) {
+ fRB->fParseError->line = fLineNum;
+ fRB->fParseError->offset = fCharNum;
+ fRB->fParseError->preContext[0] = 0;
+ fRB->fParseError->postContext[0] = 0;
+ }
+ }
+}
+
+
+
+
+//------------------------------------------------------------------------------
+//
+// fixOpStack The parse stack holds partially assembled chunks of the parse tree.
+// An entry on the stack may be as small as a single setRef node,
+// or as large as the parse tree
+// for an entire expression (this will be the one item left on the stack
+// when the parsing of an RBBI rule completes.
+//
+// This function is called when a binary operator is encountered.
+// It looks back up the stack for operators that are not yet associated
+// with a right operand, and if the precedence of the stacked operator >=
+// the precedence of the current operator, binds the operand left,
+// to the previously encountered operator.
+//
+//------------------------------------------------------------------------------
+void RBBIRuleScanner::fixOpStack(RBBINode::OpPrecedence p) {
+ RBBINode *n;
+ // printNodeStack("entering fixOpStack()");
+ for (;;) {
+ n = fNodeStack[fNodeStackPtr-1]; // an operator node
+ if (n->fPrecedence == 0) {
+ RBBIDebugPuts("RBBIRuleScanner::fixOpStack, bad operator node");
+ error(U_BRK_INTERNAL_ERROR);
+ return;
+ }
+
+ if (n->fPrecedence < p || n->fPrecedence <= RBBINode::precLParen) {
+ // The most recent operand goes with the current operator,
+ // not with the previously stacked one.
+ break;
+ }
+ // Stack operator is a binary op ( '|' or concatenation)
+ // TOS operand becomes right child of this operator.
+ // Resulting subexpression becomes the TOS operand.
+ n->fRightChild = fNodeStack[fNodeStackPtr];
+ fNodeStack[fNodeStackPtr]->fParent = n;
+ fNodeStackPtr--;
+ // printNodeStack("looping in fixOpStack() ");
+ }
+
+ if (p <= RBBINode::precLParen) {
+ // Scan is at a right paren or end of expression.
+ // The scanned item must match the stack, or else there was an error.
+ // Discard the left paren (or start expr) node from the stack,
+ // leaving the completed (sub)expression as TOS.
+ if (n->fPrecedence != p) {
+ // Right paren encountered matched start of expression node, or
+ // end of expression matched with a left paren node.
+ error(U_BRK_MISMATCHED_PAREN);
+ }
+ fNodeStack[fNodeStackPtr-1] = fNodeStack[fNodeStackPtr];
+ fNodeStackPtr--;
+ // Delete the now-discarded LParen or Start node.
+ delete n;
+ }
+ // printNodeStack("leaving fixOpStack()");
+}
+
+
+
+
+//------------------------------------------------------------------------------
+//
+// findSetFor given a UnicodeString,
+// - find the corresponding Unicode Set (uset node)
+// (create one if necessary)
+// - Set fLeftChild of the caller's node (should be a setRef node)
+// to the uset node
+// Maintain a hash table of uset nodes, so the same one is always used
+// for the same string.
+// If a "to adopt" set is provided and we haven't seen this key before,
+// add the provided set to the hash table.
+// If the string is one (32 bit) char in length, the set contains
+// just one element which is the char in question.
+// If the string is "any", return a set containing all chars.
+//
+//------------------------------------------------------------------------------
+void RBBIRuleScanner::findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt) {
+
+ RBBISetTableEl *el;
+
+ // First check whether we've already cached a set for this string.
+ // If so, just use the cached set in the new node.
+ // delete any set provided by the caller, since we own it.
+ el = (RBBISetTableEl *)uhash_get(fSetTable, &s);
+ if (el != NULL) {
+ delete setToAdopt;
+ node->fLeftChild = el->val;
+ U_ASSERT(node->fLeftChild->fType == RBBINode::uset);
+ return;
+ }
+
+ // Haven't seen this set before.
+ // If the caller didn't provide us with a prebuilt set,
+ // create a new UnicodeSet now.
+ if (setToAdopt == NULL) {
+ if (s.compare(kAny, -1) == 0) {
+ setToAdopt = new UnicodeSet(0x000000, 0x10ffff);
+ } else {
+ UChar32 c;
+ c = s.char32At(0);
+ setToAdopt = new UnicodeSet(c, c);
+ }
+ }
+
+ //
+ // Make a new uset node to refer to this UnicodeSet
+ // This new uset node becomes the child of the caller's setReference node.
+ //
+ RBBINode *usetNode = new RBBINode(RBBINode::uset);
+ if (usetNode == NULL) {
+ error(U_MEMORY_ALLOCATION_ERROR);
+ return;
+ }
+ usetNode->fInputSet = setToAdopt;
+ usetNode->fParent = node;
+ node->fLeftChild = usetNode;
+ usetNode->fText = s;
+
+
+ //
+ // Add the new uset node to the list of all uset nodes.
+ //
+ fRB->fUSetNodes->addElement(usetNode, *fRB->fStatus);
+
+
+ //
+ // Add the new set to the set hash table.
+ //
+ el = (RBBISetTableEl *)uprv_malloc(sizeof(RBBISetTableEl));
+ UnicodeString *tkey = new UnicodeString(s);
+ if (tkey == NULL || el == NULL || setToAdopt == NULL) {
+ // Delete to avoid memory leak
+ delete tkey;
+ tkey = NULL;
+ uprv_free(el);
+ el = NULL;
+ delete setToAdopt;
+ setToAdopt = NULL;
+
+ error(U_MEMORY_ALLOCATION_ERROR);
+ return;
+ }
+ el->key = tkey;
+ el->val = usetNode;
+ uhash_put(fSetTable, el->key, el, fRB->fStatus);
+
+ return;
+}
+
+
+
+//
+// Assorted Unicode character constants.
+// Numeric because there is no portable way to enter them as literals.
+// (Think EBCDIC).
+//
+static const UChar chCR = 0x0d; // New lines, for terminating comments.
+static const UChar chLF = 0x0a;
+static const UChar chNEL = 0x85; // NEL newline variant
+static const UChar chLS = 0x2028; // Unicode Line Separator
+static const UChar chApos = 0x27; // single quote, for quoted chars.
+static const UChar chPound = 0x23; // '#', introduces a comment.
+static const UChar chBackSlash = 0x5c; // '\' introduces a char escape
+static const UChar chLParen = 0x28;
+static const UChar chRParen = 0x29;
+
+
+//------------------------------------------------------------------------------
+//
+// stripRules Return a rules string without extra spaces.
+// (Comments are removed separately, during rule parsing.)
+//
+//------------------------------------------------------------------------------
+UnicodeString RBBIRuleScanner::stripRules(const UnicodeString &rules) {
+ UnicodeString strippedRules;
+ int32_t rulesLength = rules.length();
+
+ for (int32_t idx=0; idx<rulesLength; idx = rules.moveIndex32(idx, 1)) {
+ UChar32 cp = rules.char32At(idx);
+ bool whiteSpace = u_hasBinaryProperty(cp, UCHAR_PATTERN_WHITE_SPACE);
+ if (whiteSpace) {
+ continue;
+ }
+ strippedRules.append(cp);
+ }
+ return strippedRules;
+}
+
+
+//------------------------------------------------------------------------------
+//
+// nextCharLL Low Level Next Char from rule input source.
+// Get a char from the input character iterator,
+// keep track of input position for error reporting.
+//
+//------------------------------------------------------------------------------
+UChar32 RBBIRuleScanner::nextCharLL() {
+ UChar32 ch;
+
+ if (fNextIndex >= fRB->fRules.length()) {
+ return (UChar32)-1;
+ }
+ ch = fRB->fRules.char32At(fNextIndex);
+ fNextIndex = fRB->fRules.moveIndex32(fNextIndex, 1);
+
+ if (ch == chCR ||
+ ch == chNEL ||
+ ch == chLS ||
+ (ch == chLF && fLastChar != chCR)) {
+ // Character is starting a new line. Bump up the line number, and
+ // reset the column to 0.
+ fLineNum++;
+ fCharNum=0;
+ if (fQuoteMode) {
+ error(U_BRK_NEW_LINE_IN_QUOTED_STRING);
+ fQuoteMode = FALSE;
+ }
+ }
+ else {
+ // Character is not starting a new line. Except in the case of a
+ // LF following a CR, increment the column position.
+ if (ch != chLF) {
+ fCharNum++;
+ }
+ }
+ fLastChar = ch;
+ return ch;
+}
+
+
+//------------------------------------------------------------------------------
+//
+// nextChar for rules scanning. At this level, we handle stripping
+// out comments and processing backslash character escapes.
+// The rest of the rules grammar is handled at the next level up.
+//
+//------------------------------------------------------------------------------
+void RBBIRuleScanner::nextChar(RBBIRuleChar &c) {
+
+ // Unicode Character constants needed for the processing done by nextChar(),
+ // in hex because literals wont work on EBCDIC machines.
+
+ fScanIndex = fNextIndex;
+ c.fChar = nextCharLL();
+ c.fEscaped = FALSE;
+
+ //
+ // check for '' sequence.
+ // These are recognized in all contexts, whether in quoted text or not.
+ //
+ if (c.fChar == chApos) {
+ if (fRB->fRules.char32At(fNextIndex) == chApos) {
+ c.fChar = nextCharLL(); // get nextChar officially so character counts
+ c.fEscaped = TRUE; // stay correct.
+ }
+ else
+ {
+ // Single quote, by itself.
+ // Toggle quoting mode.
+ // Return either '(' or ')', because quotes cause a grouping of the quoted text.
+ fQuoteMode = !fQuoteMode;
+ if (fQuoteMode == TRUE) {
+ c.fChar = chLParen;
+ } else {
+ c.fChar = chRParen;
+ }
+ c.fEscaped = FALSE; // The paren that we return is not escaped.
+ return;
+ }
+ }
+
+ if (fQuoteMode) {
+ c.fEscaped = TRUE;
+ }
+ else
+ {
+ // We are not in a 'quoted region' of the source.
+ //
+ if (c.fChar == chPound) {
+ // Start of a comment. Consume the rest of it.
+ // The new-line char that terminates the comment is always returned.
+ // It will be treated as white-space, and serves to break up anything
+ // that might otherwise incorrectly clump together with a comment in
+ // the middle (a variable name, for example.)
+ int32_t commentStart = fScanIndex;
+ for (;;) {
+ c.fChar = nextCharLL();
+ if (c.fChar == (UChar32)-1 || // EOF
+ c.fChar == chCR ||
+ c.fChar == chLF ||
+ c.fChar == chNEL ||
+ c.fChar == chLS) {break;}
+ }
+ for (int32_t i=commentStart; i<fNextIndex-1; ++i) {
+ fRB->fStrippedRules.setCharAt(i, u' ');
+ }
+ }
+ if (c.fChar == (UChar32)-1) {
+ return;
+ }
+
+ //
+ // check for backslash escaped characters.
+ // Use UnicodeString::unescapeAt() to handle them.
+ //
+ if (c.fChar == chBackSlash) {
+ c.fEscaped = TRUE;
+ int32_t startX = fNextIndex;
+ c.fChar = fRB->fRules.unescapeAt(fNextIndex);
+ if (fNextIndex == startX) {
+ error(U_BRK_HEX_DIGITS_EXPECTED);
+ }
+ fCharNum += fNextIndex-startX;
+ }
+ }
+ // putc(c.fChar, stdout);
+}
+
+//------------------------------------------------------------------------------
+//
+// Parse RBBI rules. The state machine for rules parsing is here.
+// The state tables are hand-written in the file rbbirpt.txt,
+// and converted to the form used here by a perl
+// script rbbicst.pl
+//
+//------------------------------------------------------------------------------
+void RBBIRuleScanner::parse() {
+ uint16_t state;
+ const RBBIRuleTableEl *tableEl;
+
+ if (U_FAILURE(*fRB->fStatus)) {
+ return;
+ }
+
+ state = 1;
+ nextChar(fC);
+ //
+ // Main loop for the rule parsing state machine.
+ // Runs once per state transition.
+ // Each time through optionally performs, depending on the state table,
+ // - an advance to the the next input char
+ // - an action to be performed.
+ // - pushing or popping a state to/from the local state return stack.
+ //
+ for (;;) {
+ // Bail out if anything has gone wrong.
+ // RBBI rule file parsing stops on the first error encountered.
+ if (U_FAILURE(*fRB->fStatus)) {
+ break;
+ }
+
+ // Quit if state == 0. This is the normal way to exit the state machine.
+ //
+ if (state == 0) {
+ break;
+ }
+
+ // Find the state table element that matches the input char from the rule, or the
+ // class of the input character. Start with the first table row for this
+ // state, then linearly scan forward until we find a row that matches the
+ // character. The last row for each state always matches all characters, so
+ // the search will stop there, if not before.
+ //
+ tableEl = &gRuleParseStateTable[state];
+ #ifdef RBBI_DEBUG
+ if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) {
+ RBBIDebugPrintf("char, line, col = (\'%c\', %d, %d) state=%s ",
+ fC.fChar, fLineNum, fCharNum, RBBIRuleStateNames[state]);
+ }
+ #endif
+
+ for (;;) {
+ #ifdef RBBI_DEBUG
+ if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) { RBBIDebugPrintf("."); fflush(stdout);}
+ #endif
+ if (tableEl->fCharClass < 127 && fC.fEscaped == FALSE && tableEl->fCharClass == fC.fChar) {
+ // Table row specified an individual character, not a set, and
+ // the input character is not escaped, and
+ // the input character matched it.
+ break;
+ }
+ if (tableEl->fCharClass == 255) {
+ // Table row specified default, match anything character class.
+ break;
+ }
+ if (tableEl->fCharClass == 254 && fC.fEscaped) {
+ // Table row specified "escaped" and the char was escaped.
+ break;
+ }
+ if (tableEl->fCharClass == 253 && fC.fEscaped &&
+ (fC.fChar == 0x50 || fC.fChar == 0x70 )) {
+ // Table row specified "escaped P" and the char is either 'p' or 'P'.
+ break;
+ }
+ if (tableEl->fCharClass == 252 && fC.fChar == (UChar32)-1) {
+ // Table row specified eof and we hit eof on the input.
+ break;
+ }
+
+ if (tableEl->fCharClass >= 128 && tableEl->fCharClass < 240 && // Table specs a char class &&
+ fC.fEscaped == FALSE && // char is not escaped &&
+ fC.fChar != (UChar32)-1) { // char is not EOF
+ U_ASSERT((tableEl->fCharClass-128) < UPRV_LENGTHOF(fRuleSets));
+ if (fRuleSets[tableEl->fCharClass-128].contains(fC.fChar)) {
+ // Table row specified a character class, or set of characters,
+ // and the current char matches it.
+ break;
+ }
+ }
+
+ // No match on this row, advance to the next row for this state,
+ tableEl++;
+ }
+ if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) { RBBIDebugPuts("");}
+
+ //
+ // We've found the row of the state table that matches the current input
+ // character from the rules string.
+ // Perform any action specified by this row in the state table.
+ if (doParseActions((int32_t)tableEl->fAction) == FALSE) {
+ // Break out of the state machine loop if the
+ // the action signalled some kind of error, or
+ // the action was to exit, occurs on normal end-of-rules-input.
+ break;
+ }
+
+ if (tableEl->fPushState != 0) {
+ fStackPtr++;
+ if (fStackPtr >= kStackSize) {
+ error(U_BRK_INTERNAL_ERROR);
+ RBBIDebugPuts("RBBIRuleScanner::parse() - state stack overflow.");
+ fStackPtr--;
+ }
+ fStack[fStackPtr] = tableEl->fPushState;
+ }
+
+ if (tableEl->fNextChar) {
+ nextChar(fC);
+ }
+
+ // Get the next state from the table entry, or from the
+ // state stack if the next state was specified as "pop".
+ if (tableEl->fNextState != 255) {
+ state = tableEl->fNextState;
+ } else {
+ state = fStack[fStackPtr];
+ fStackPtr--;
+ if (fStackPtr < 0) {
+ error(U_BRK_INTERNAL_ERROR);
+ RBBIDebugPuts("RBBIRuleScanner::parse() - state stack underflow.");
+ fStackPtr++;
+ }
+ }
+
+ }
+
+ if (U_FAILURE(*fRB->fStatus)) {
+ return;
+ }
+
+ // If there are no forward rules set an error.
+ //
+ if (fRB->fForwardTree == NULL) {
+ error(U_BRK_RULE_SYNTAX);
+ return;
+ }
+
+ //
+ // Parsing of the input RBBI rules is complete.
+ // We now have a parse tree for the rule expressions
+ // and a list of all UnicodeSets that are referenced.
+ //
+#ifdef RBBI_DEBUG
+ if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "symbols")) {fSymbolTable->rbbiSymtablePrint();}
+ if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ptree")) {
+ RBBIDebugPrintf("Completed Forward Rules Parse Tree...\n");
+ RBBINode::printTree(fRB->fForwardTree, TRUE);
+ RBBIDebugPrintf("\nCompleted Reverse Rules Parse Tree...\n");
+ RBBINode::printTree(fRB->fReverseTree, TRUE);
+ RBBIDebugPrintf("\nCompleted Safe Point Forward Rules Parse Tree...\n");
+ RBBINode::printTree(fRB->fSafeFwdTree, TRUE);
+ RBBIDebugPrintf("\nCompleted Safe Point Reverse Rules Parse Tree...\n");
+ RBBINode::printTree(fRB->fSafeRevTree, TRUE);
+ }
+#endif
+}
+
+
+//------------------------------------------------------------------------------
+//
+// printNodeStack for debugging...
+//
+//------------------------------------------------------------------------------
+#ifdef RBBI_DEBUG
+void RBBIRuleScanner::printNodeStack(const char *title) {
+ int i;
+ RBBIDebugPrintf("%s. Dumping node stack...\n", title);
+ for (i=fNodeStackPtr; i>0; i--) {RBBINode::printTree(fNodeStack[i], TRUE);}
+}
+#endif
+
+
+
+
+//------------------------------------------------------------------------------
+//
+// pushNewNode create a new RBBINode of the specified type and push it
+// onto the stack of nodes.
+//
+//------------------------------------------------------------------------------
+RBBINode *RBBIRuleScanner::pushNewNode(RBBINode::NodeType t) {
+ if (U_FAILURE(*fRB->fStatus)) {
+ return NULL;
+ }
+ if (fNodeStackPtr >= kStackSize - 1) {
+ error(U_BRK_RULE_SYNTAX);
+ RBBIDebugPuts("RBBIRuleScanner::pushNewNode - stack overflow.");
+ return NULL;
+ }
+ fNodeStackPtr++;
+ fNodeStack[fNodeStackPtr] = new RBBINode(t);
+ if (fNodeStack[fNodeStackPtr] == NULL) {
+ *fRB->fStatus = U_MEMORY_ALLOCATION_ERROR;
+ }
+ return fNodeStack[fNodeStackPtr];
+}
+
+
+
+//------------------------------------------------------------------------------
+//
+// scanSet Construct a UnicodeSet from the text at the current scan
+// position. Advance the scan position to the first character
+// after the set.
+//
+// A new RBBI setref node referring to the set is pushed onto the node
+// stack.
+//
+// The scan position is normally under the control of the state machine
+// that controls rule parsing. UnicodeSets, however, are parsed by
+// the UnicodeSet constructor, not by the RBBI rule parser.
+//
+//------------------------------------------------------------------------------
+void RBBIRuleScanner::scanSet() {
+ UnicodeSet *uset;
+ ParsePosition pos;
+ int startPos;
+ int i;
+
+ if (U_FAILURE(*fRB->fStatus)) {
+ return;
+ }
+
+ pos.setIndex(fScanIndex);
+ startPos = fScanIndex;
+ UErrorCode localStatus = U_ZERO_ERROR;
+ uset = new UnicodeSet();
+ if (uset == NULL) {
+ localStatus = U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ uset->applyPatternIgnoreSpace(fRB->fRules, pos, fSymbolTable, localStatus);
+ }
+ if (U_FAILURE(localStatus)) {
+ // TODO: Get more accurate position of the error from UnicodeSet's return info.
+ // UnicodeSet appears to not be reporting correctly at this time.
+ #ifdef RBBI_DEBUG
+ RBBIDebugPrintf("UnicodeSet parse postion.ErrorIndex = %d\n", pos.getIndex());
+ #endif
+ error(localStatus);
+ delete uset;
+ return;
+ }
+
+ // Verify that the set contains at least one code point.
+ //
+ U_ASSERT(uset!=NULL);
+ if (uset->isEmpty()) {
+ // This set is empty.
+ // Make it an error, because it almost certainly is not what the user wanted.
+ // Also, avoids having to think about corner cases in the tree manipulation code
+ // that occurs later on.
+ error(U_BRK_RULE_EMPTY_SET);
+ delete uset;
+ return;
+ }
+
+
+ // Advance the RBBI parse postion over the UnicodeSet pattern.
+ // Don't just set fScanIndex because the line/char positions maintained
+ // for error reporting would be thrown off.
+ i = pos.getIndex();
+ for (;;) {
+ if (fNextIndex >= i) {
+ break;
+ }
+ nextCharLL();
+ }
+
+ if (U_SUCCESS(*fRB->fStatus)) {
+ RBBINode *n;
+
+ n = pushNewNode(RBBINode::setRef);
+ if (U_FAILURE(*fRB->fStatus)) {
+ return;
+ }
+ n->fFirstPos = startPos;
+ n->fLastPos = fNextIndex;
+ fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
+ // findSetFor() serves several purposes here:
+ // - Adopts storage for the UnicodeSet, will be responsible for deleting.
+ // - Mantains collection of all sets in use, needed later for establishing
+ // character categories for run time engine.
+ // - Eliminates mulitiple instances of the same set.
+ // - Creates a new uset node if necessary (if this isn't a duplicate.)
+ findSetFor(n->fText, n, uset);
+ }
+
+}
+
+int32_t RBBIRuleScanner::numRules() {
+ return fRuleNum;
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
diff --git a/thirdparty/icu4c/common/rbbiscan.h b/thirdparty/icu4c/common/rbbiscan.h
new file mode 100644
index 0000000000..58022002c5
--- /dev/null
+++ b/thirdparty/icu4c/common/rbbiscan.h
@@ -0,0 +1,167 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+//
+// rbbiscan.h
+//
+// Copyright (C) 2002-2016, International Business Machines Corporation and others.
+// All Rights Reserved.
+//
+// This file contains declarations for class RBBIRuleScanner
+//
+
+
+#ifndef RBBISCAN_H
+#define RBBISCAN_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "unicode/rbbi.h"
+#include "unicode/uniset.h"
+#include "unicode/parseerr.h"
+#include "uhash.h"
+#include "uvector.h"
+#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
+ // looks up references to $variables within a set.
+#include "rbbinode.h"
+#include "rbbirpt.h"
+
+U_NAMESPACE_BEGIN
+
+class RBBIRuleBuilder;
+class RBBISymbolTable;
+
+
+//--------------------------------------------------------------------------------
+//
+// class RBBIRuleScanner does the lowest level, character-at-a-time
+// scanning of break iterator rules.
+//
+// The output of the scanner is parse trees for
+// the rule expressions and a list of all Unicode Sets
+// encountered.
+//
+//--------------------------------------------------------------------------------
+
+class RBBIRuleScanner : public UMemory {
+public:
+
+ enum {
+ kStackSize = 100 // The size of the state stack for
+ }; // rules parsing. Corresponds roughly
+ // to the depth of parentheses nesting
+ // that is allowed in the rules.
+
+ struct RBBIRuleChar {
+ UChar32 fChar;
+ UBool fEscaped;
+ RBBIRuleChar() : fChar(0), fEscaped(false) {}
+ };
+
+ RBBIRuleScanner(RBBIRuleBuilder *rb);
+
+
+ virtual ~RBBIRuleScanner();
+
+ void nextChar(RBBIRuleChar &c); // Get the next char from the input stream.
+ // Return false if at end.
+
+ UBool push(const RBBIRuleChar &c); // Push (unget) one character.
+ // Only a single character may be pushed.
+
+ void parse(); // Parse the rules, generating two parse
+ // trees, one each for the forward and
+ // reverse rules,
+ // and a list of UnicodeSets encountered.
+
+ int32_t numRules(); // Return the number of rules that have been seen.
+
+ /**
+ * Return a rules string without unnecessary
+ * characters.
+ */
+ static UnicodeString stripRules(const UnicodeString &rules);
+private:
+
+ UBool doParseActions(int32_t a);
+ void error(UErrorCode e); // error reporting convenience function.
+ void fixOpStack(RBBINode::OpPrecedence p);
+ // a character.
+ void findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt = NULL);
+
+ UChar32 nextCharLL();
+#ifdef RBBI_DEBUG
+ void printNodeStack(const char *title);
+#endif
+ RBBINode *pushNewNode(RBBINode::NodeType t);
+ void scanSet();
+
+
+ RBBIRuleBuilder *fRB; // The rule builder that we are part of.
+
+ int32_t fScanIndex; // Index of current character being processed
+ // in the rule input string.
+ int32_t fNextIndex; // Index of the next character, which
+ // is the first character not yet scanned.
+ UBool fQuoteMode; // Scan is in a 'quoted region'
+ int32_t fLineNum; // Line number in input file.
+ int32_t fCharNum; // Char position within the line.
+ UChar32 fLastChar; // Previous char, needed to count CR-LF
+ // as a single line, not two.
+
+ RBBIRuleChar fC; // Current char for parse state machine
+ // processing.
+ UnicodeString fVarName; // $variableName, valid when we've just
+ // scanned one.
+
+ RBBIRuleTableEl **fStateTable; // State Transition Table for RBBI Rule
+ // parsing. index by p[state][char-class]
+
+ uint16_t fStack[kStackSize]; // State stack, holds state pushes
+ int32_t fStackPtr; // and pops as specified in the state
+ // transition rules.
+
+ RBBINode *fNodeStack[kStackSize]; // Node stack, holds nodes created
+ // during the parse of a rule
+ int32_t fNodeStackPtr;
+
+
+ UBool fReverseRule; // True if the rule currently being scanned
+ // is a reverse direction rule (if it
+ // starts with a '!')
+
+ UBool fLookAheadRule; // True if the rule includes a '/'
+ // somewhere within it.
+
+ UBool fNoChainInRule; // True if the current rule starts with a '^'.
+
+ RBBISymbolTable *fSymbolTable; // symbol table, holds definitions of
+ // $variable symbols.
+
+ UHashtable *fSetTable; // UnicocodeSet hash table, holds indexes to
+ // the sets created while parsing rules.
+ // The key is the string used for creating
+ // the set.
+
+ UnicodeSet fRuleSets[10]; // Unicode Sets that are needed during
+ // the scanning of RBBI rules. The
+ // indicies for these are assigned by the
+ // perl script that builds the state tables.
+ // See rbbirpt.h.
+
+ int32_t fRuleNum; // Counts each rule as it is scanned.
+
+ int32_t fOptionStart; // Input index of start of a !!option
+ // keyword, while being scanned.
+
+ UnicodeSet *gRuleSet_rule_char;
+ UnicodeSet *gRuleSet_white_space;
+ UnicodeSet *gRuleSet_name_char;
+ UnicodeSet *gRuleSet_name_start_char;
+
+ RBBIRuleScanner(const RBBIRuleScanner &other); // forbid copying of this class
+ RBBIRuleScanner &operator=(const RBBIRuleScanner &other); // forbid copying of this class
+};
+
+U_NAMESPACE_END
+
+#endif
diff --git a/thirdparty/icu4c/common/rbbisetb.cpp b/thirdparty/icu4c/common/rbbisetb.cpp
new file mode 100644
index 0000000000..29faeb8c45
--- /dev/null
+++ b/thirdparty/icu4c/common/rbbisetb.cpp
@@ -0,0 +1,694 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+//
+// rbbisetb.cpp
+//
+/*
+***************************************************************************
+* Copyright (C) 2002-2008 International Business Machines Corporation *
+* and others. All rights reserved. *
+***************************************************************************
+*/
+//
+// RBBISetBuilder Handles processing of Unicode Sets from RBBI rules
+// (part of the rule building process.)
+//
+// Starting with the rules parse tree from the scanner,
+//
+// - Enumerate the set of UnicodeSets that are referenced
+// by the RBBI rules.
+// - compute a set of non-overlapping character ranges
+// with all characters within a range belonging to the same
+// set of input unicode sets.
+// - Derive a set of non-overlapping UnicodeSet (like things)
+// that will correspond to columns in the state table for
+// the RBBI execution engine. All characters within one
+// of these sets belong to the same set of the original
+// UnicodeSets from the user's rules.
+// - construct the trie table that maps input characters
+// to the index of the matching non-overlapping set of set from
+// the previous step.
+//
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/uniset.h"
+#include "uvector.h"
+#include "uassert.h"
+#include "cmemory.h"
+#include "cstring.h"
+
+#include "rbbisetb.h"
+#include "rbbinode.h"
+
+U_NAMESPACE_BEGIN
+
+const int32_t kMaxCharCategoriesFor8BitsTrie = 255;
+//------------------------------------------------------------------------
+//
+// Constructor
+//
+//------------------------------------------------------------------------
+RBBISetBuilder::RBBISetBuilder(RBBIRuleBuilder *rb)
+{
+ fRB = rb;
+ fStatus = rb->fStatus;
+ fRangeList = nullptr;
+ fMutableTrie = nullptr;
+ fTrie = nullptr;
+ fTrieSize = 0;
+ fGroupCount = 0;
+ fSawBOF = false;
+}
+
+
+//------------------------------------------------------------------------
+//
+// Destructor
+//
+//------------------------------------------------------------------------
+RBBISetBuilder::~RBBISetBuilder()
+{
+ RangeDescriptor *nextRangeDesc;
+
+ // Walk through & delete the linked list of RangeDescriptors
+ for (nextRangeDesc = fRangeList; nextRangeDesc!=NULL;) {
+ RangeDescriptor *r = nextRangeDesc;
+ nextRangeDesc = r->fNext;
+ delete r;
+ }
+
+ ucptrie_close(fTrie);
+ umutablecptrie_close(fMutableTrie);
+}
+
+
+
+
+//------------------------------------------------------------------------
+//
+// build Build the list of non-overlapping character ranges
+// from the Unicode Sets.
+//
+//------------------------------------------------------------------------
+void RBBISetBuilder::buildRanges() {
+ RBBINode *usetNode;
+ RangeDescriptor *rlRange;
+
+ if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "usets")) {printSets();}
+
+ //
+ // Initialize the process by creating a single range encompassing all characters
+ // that is in no sets.
+ //
+ fRangeList = new RangeDescriptor(*fStatus); // will check for status here
+ if (fRangeList == NULL) {
+ *fStatus = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ fRangeList->fStartChar = 0;
+ fRangeList->fEndChar = 0x10ffff;
+
+ if (U_FAILURE(*fStatus)) {
+ return;
+ }
+
+ //
+ // Find the set of non-overlapping ranges of characters
+ //
+ int ni;
+ for (ni=0; ; ni++) { // Loop over each of the UnicodeSets encountered in the input rules
+ usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni);
+ if (usetNode==NULL) {
+ break;
+ }
+
+ UnicodeSet *inputSet = usetNode->fInputSet;
+ int32_t inputSetRangeCount = inputSet->getRangeCount();
+ int inputSetRangeIndex = 0;
+ rlRange = fRangeList;
+
+ for (;;) {
+ if (inputSetRangeIndex >= inputSetRangeCount) {
+ break;
+ }
+ UChar32 inputSetRangeBegin = inputSet->getRangeStart(inputSetRangeIndex);
+ UChar32 inputSetRangeEnd = inputSet->getRangeEnd(inputSetRangeIndex);
+
+ // skip over ranges from the range list that are completely
+ // below the current range from the input unicode set.
+ while (rlRange->fEndChar < inputSetRangeBegin) {
+ rlRange = rlRange->fNext;
+ }
+
+ // If the start of the range from the range list is before with
+ // the start of the range from the unicode set, split the range list range
+ // in two, with one part being before (wholly outside of) the unicode set
+ // and the other containing the rest.
+ // Then continue the loop; the post-split current range will then be skipped
+ // over
+ if (rlRange->fStartChar < inputSetRangeBegin) {
+ rlRange->split(inputSetRangeBegin, *fStatus);
+ if (U_FAILURE(*fStatus)) {
+ return;
+ }
+ continue;
+ }
+
+ // Same thing at the end of the ranges...
+ // If the end of the range from the range list doesn't coincide with
+ // the end of the range from the unicode set, split the range list
+ // range in two. The first part of the split range will be
+ // wholly inside the Unicode set.
+ if (rlRange->fEndChar > inputSetRangeEnd) {
+ rlRange->split(inputSetRangeEnd+1, *fStatus);
+ if (U_FAILURE(*fStatus)) {
+ return;
+ }
+ }
+
+ // The current rlRange is now entirely within the UnicodeSet range.
+ // Add this unicode set to the list of sets for this rlRange
+ if (rlRange->fIncludesSets->indexOf(usetNode) == -1) {
+ rlRange->fIncludesSets->addElement(usetNode, *fStatus);
+ if (U_FAILURE(*fStatus)) {
+ return;
+ }
+ }
+
+ // Advance over ranges that we are finished with.
+ if (inputSetRangeEnd == rlRange->fEndChar) {
+ inputSetRangeIndex++;
+ }
+ rlRange = rlRange->fNext;
+ }
+ }
+
+ if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "range")) { printRanges();}
+
+ //
+ // Group the above ranges, with each group consisting of one or more
+ // ranges that are in exactly the same set of original UnicodeSets.
+ // The groups are numbered, and these group numbers are the set of
+ // input symbols recognized by the run-time state machine.
+ //
+ // Numbering: # 0 (state table column 0) is unused.
+ // # 1 is reserved - table column 1 is for end-of-input
+ // # 2 is reserved - table column 2 is for beginning-of-input
+ // # 3 is the first range list.
+ //
+ RangeDescriptor *rlSearchRange;
+ int32_t dictGroupCount = 0;
+
+ for (rlRange = fRangeList; rlRange!=nullptr; rlRange=rlRange->fNext) {
+ for (rlSearchRange=fRangeList; rlSearchRange != rlRange; rlSearchRange=rlSearchRange->fNext) {
+ if (rlRange->fIncludesSets->equals(*rlSearchRange->fIncludesSets)) {
+ rlRange->fNum = rlSearchRange->fNum;
+ rlRange->fIncludesDict = rlSearchRange->fIncludesDict;
+ break;
+ }
+ }
+ if (rlRange->fNum == 0) {
+ rlRange->fFirstInGroup = true;
+ if (rlRange->isDictionaryRange()) {
+ rlRange->fNum = ++dictGroupCount;
+ rlRange->fIncludesDict = true;
+ } else {
+ fGroupCount++;
+ rlRange->fNum = fGroupCount+2;
+ addValToSets(rlRange->fIncludesSets, rlRange->fNum);
+ }
+ }
+ }
+
+ // Move the character category numbers for any dictionary ranges up, so that they
+ // immediately follow the non-dictionary ranges.
+
+ fDictCategoriesStart = fGroupCount + 3;
+ for (rlRange = fRangeList; rlRange!=nullptr; rlRange=rlRange->fNext) {
+ if (rlRange->fIncludesDict) {
+ rlRange->fNum += fDictCategoriesStart - 1;
+ if (rlRange->fFirstInGroup) {
+ addValToSets(rlRange->fIncludesSets, rlRange->fNum);
+ }
+ }
+ }
+ fGroupCount += dictGroupCount;
+
+
+ // Handle input sets that contain the special string {eof}.
+ // Column 1 of the state table is reserved for EOF on input.
+ // Column 2 is reserved for before-the-start-input.
+ // (This column can be optimized away later if there are no rule
+ // references to {bof}.)
+ // Add this column value (1 or 2) to the equivalent expression
+ // subtree for each UnicodeSet that contains the string {eof}
+ // Because {bof} and {eof} are not characters in the normal sense,
+ // they don't affect the computation of the ranges or TRIE.
+
+ UnicodeString eofString(u"eof");
+ UnicodeString bofString(u"bof");
+ for (ni=0; ; ni++) { // Loop over each of the UnicodeSets encountered in the input rules
+ usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni);
+ if (usetNode==NULL) {
+ break;
+ }
+ UnicodeSet *inputSet = usetNode->fInputSet;
+ if (inputSet->contains(eofString)) {
+ addValToSet(usetNode, 1);
+ }
+ if (inputSet->contains(bofString)) {
+ addValToSet(usetNode, 2);
+ fSawBOF = TRUE;
+ }
+ }
+
+
+ if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rgroup")) {printRangeGroups();}
+ if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "esets")) {printSets();}
+}
+
+
+//
+// Build the Trie table for mapping UChar32 values to the corresponding
+// range group number.
+//
+void RBBISetBuilder::buildTrie() {
+ fMutableTrie = umutablecptrie_open(
+ 0, // Initial value for all code points.
+ 0, // Error value for out-of-range input.
+ fStatus);
+
+ for (RangeDescriptor *range = fRangeList; range!=nullptr && U_SUCCESS(*fStatus); range=range->fNext) {
+ umutablecptrie_setRange(fMutableTrie,
+ range->fStartChar, // Range start
+ range->fEndChar, // Range end (inclusive)
+ range->fNum, // value for range
+ fStatus);
+ }
+}
+
+
+void RBBISetBuilder::mergeCategories(IntPair categories) {
+ U_ASSERT(categories.first >= 1);
+ U_ASSERT(categories.second > categories.first);
+ U_ASSERT((categories.first < fDictCategoriesStart && categories.second < fDictCategoriesStart) ||
+ (categories.first >= fDictCategoriesStart && categories.second >= fDictCategoriesStart));
+
+ for (RangeDescriptor *rd = fRangeList; rd != nullptr; rd = rd->fNext) {
+ int32_t rangeNum = rd->fNum;
+ if (rangeNum == categories.second) {
+ rd->fNum = categories.first;
+ } else if (rangeNum > categories.second) {
+ rd->fNum--;
+ }
+ }
+ --fGroupCount;
+ if (categories.second <= fDictCategoriesStart) {
+ --fDictCategoriesStart;
+ }
+}
+
+
+//-----------------------------------------------------------------------------------
+//
+// getTrieSize() Return the size that will be required to serialize the Trie.
+//
+//-----------------------------------------------------------------------------------
+int32_t RBBISetBuilder::getTrieSize() {
+ if (U_FAILURE(*fStatus)) {
+ return 0;
+ }
+ if (fTrie == nullptr) {
+ bool use8Bits = getNumCharCategories() <= kMaxCharCategoriesFor8BitsTrie;
+ fTrie = umutablecptrie_buildImmutable(
+ fMutableTrie,
+ UCPTRIE_TYPE_FAST,
+ use8Bits ? UCPTRIE_VALUE_BITS_8 : UCPTRIE_VALUE_BITS_16,
+ fStatus);
+ fTrieSize = ucptrie_toBinary(fTrie, nullptr, 0, fStatus);
+ if (*fStatus == U_BUFFER_OVERFLOW_ERROR) {
+ *fStatus = U_ZERO_ERROR;
+ }
+ }
+ return fTrieSize;
+}
+
+
+//-----------------------------------------------------------------------------------
+//
+// serializeTrie() Put the serialized trie at the specified address.
+// Trust the caller to have given us enough memory.
+// getTrieSize() MUST be called first.
+//
+//-----------------------------------------------------------------------------------
+void RBBISetBuilder::serializeTrie(uint8_t *where) {
+ ucptrie_toBinary(fTrie,
+ where, // Buffer
+ fTrieSize, // Capacity
+ fStatus);
+}
+
+//------------------------------------------------------------------------
+//
+// addValToSets Add a runtime-mapped input value to each uset from a
+// list of uset nodes. (val corresponds to a state table column.)
+// For each of the original Unicode sets - which correspond
+// directly to uset nodes - a logically equivalent expression
+// is constructed in terms of the remapped runtime input
+// symbol set. This function adds one runtime input symbol to
+// a list of sets.
+//
+// The "logically equivalent expression" is the tree for an
+// or-ing together of all of the symbols that go into the set.
+//
+//------------------------------------------------------------------------
+void RBBISetBuilder::addValToSets(UVector *sets, uint32_t val) {
+ int32_t ix;
+
+ for (ix=0; ix<sets->size(); ix++) {
+ RBBINode *usetNode = (RBBINode *)sets->elementAt(ix);
+ addValToSet(usetNode, val);
+ }
+}
+
+void RBBISetBuilder::addValToSet(RBBINode *usetNode, uint32_t val) {
+ RBBINode *leafNode = new RBBINode(RBBINode::leafChar);
+ if (leafNode == NULL) {
+ *fStatus = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ leafNode->fVal = (unsigned short)val;
+ if (usetNode->fLeftChild == NULL) {
+ usetNode->fLeftChild = leafNode;
+ leafNode->fParent = usetNode;
+ } else {
+ // There are already input symbols present for this set.
+ // Set up an OR node, with the previous stuff as the left child
+ // and the new value as the right child.
+ RBBINode *orNode = new RBBINode(RBBINode::opOr);
+ if (orNode == NULL) {
+ *fStatus = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ orNode->fLeftChild = usetNode->fLeftChild;
+ orNode->fRightChild = leafNode;
+ orNode->fLeftChild->fParent = orNode;
+ orNode->fRightChild->fParent = orNode;
+ usetNode->fLeftChild = orNode;
+ orNode->fParent = usetNode;
+ }
+}
+
+
+//------------------------------------------------------------------------
+//
+// getNumCharCategories
+//
+//------------------------------------------------------------------------
+int32_t RBBISetBuilder::getNumCharCategories() const {
+ return fGroupCount + 3;
+}
+
+
+//------------------------------------------------------------------------
+//
+// getDictCategoriesStart
+//
+//------------------------------------------------------------------------
+int32_t RBBISetBuilder::getDictCategoriesStart() const {
+ return fDictCategoriesStart;
+}
+
+
+//------------------------------------------------------------------------
+//
+// sawBOF
+//
+//------------------------------------------------------------------------
+UBool RBBISetBuilder::sawBOF() const {
+ return fSawBOF;
+}
+
+
+//------------------------------------------------------------------------
+//
+// getFirstChar Given a runtime RBBI character category, find
+// the first UChar32 that is in the set of chars
+// in the category.
+//------------------------------------------------------------------------
+UChar32 RBBISetBuilder::getFirstChar(int32_t category) const {
+ RangeDescriptor *rlRange;
+ UChar32 retVal = (UChar32)-1;
+ for (rlRange = fRangeList; rlRange!=nullptr; rlRange=rlRange->fNext) {
+ if (rlRange->fNum == category) {
+ retVal = rlRange->fStartChar;
+ break;
+ }
+ }
+ return retVal;
+}
+
+
+//------------------------------------------------------------------------
+//
+// printRanges A debugging function.
+// dump out all of the range definitions.
+//
+//------------------------------------------------------------------------
+#ifdef RBBI_DEBUG
+void RBBISetBuilder::printRanges() {
+ RangeDescriptor *rlRange;
+ int i;
+
+ RBBIDebugPrintf("\n\n Nonoverlapping Ranges ...\n");
+ for (rlRange = fRangeList; rlRange!=nullptr; rlRange=rlRange->fNext) {
+ RBBIDebugPrintf("%4x-%4x ", rlRange->fStartChar, rlRange->fEndChar);
+
+ for (i=0; i<rlRange->fIncludesSets->size(); i++) {
+ RBBINode *usetNode = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
+ UnicodeString setName {u"anon"};
+ RBBINode *setRef = usetNode->fParent;
+ if (setRef != nullptr) {
+ RBBINode *varRef = setRef->fParent;
+ if (varRef != nullptr && varRef->fType == RBBINode::varRef) {
+ setName = varRef->fText;
+ }
+ }
+ RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf(" ");
+ }
+ RBBIDebugPrintf("\n");
+ }
+}
+#endif
+
+
+//------------------------------------------------------------------------
+//
+// printRangeGroups A debugging function.
+// dump out all of the range groups.
+//
+//------------------------------------------------------------------------
+#ifdef RBBI_DEBUG
+void RBBISetBuilder::printRangeGroups() {
+ int i;
+
+ RBBIDebugPrintf("\nRanges grouped by Unicode Set Membership...\n");
+ for (RangeDescriptor *rlRange = fRangeList; rlRange!=nullptr; rlRange=rlRange->fNext) {
+ if (rlRange->fFirstInGroup) {
+ int groupNum = rlRange->fNum;
+ RBBIDebugPrintf("%2i ", groupNum);
+
+ if (groupNum >= fDictCategoriesStart) { RBBIDebugPrintf(" <DICT> ");}
+
+ for (i=0; i<rlRange->fIncludesSets->size(); i++) {
+ RBBINode *usetNode = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
+ UnicodeString setName = UNICODE_STRING("anon", 4);
+ RBBINode *setRef = usetNode->fParent;
+ if (setRef != NULL) {
+ RBBINode *varRef = setRef->fParent;
+ if (varRef != NULL && varRef->fType == RBBINode::varRef) {
+ setName = varRef->fText;
+ }
+ }
+ RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf(" ");
+ }
+
+ i = 0;
+ for (RangeDescriptor *tRange = rlRange; tRange != nullptr; tRange = tRange->fNext) {
+ if (tRange->fNum == rlRange->fNum) {
+ if (i++ % 5 == 0) {
+ RBBIDebugPrintf("\n ");
+ }
+ RBBIDebugPrintf(" %05x-%05x", tRange->fStartChar, tRange->fEndChar);
+ }
+ }
+ RBBIDebugPrintf("\n");
+ }
+ }
+ RBBIDebugPrintf("\n");
+}
+#endif
+
+
+//------------------------------------------------------------------------
+//
+// printSets A debugging function.
+// dump out all of the set definitions.
+//
+//------------------------------------------------------------------------
+#ifdef RBBI_DEBUG
+void RBBISetBuilder::printSets() {
+ int i;
+
+ RBBIDebugPrintf("\n\nUnicode Sets List\n------------------\n");
+ for (i=0; ; i++) {
+ RBBINode *usetNode;
+ RBBINode *setRef;
+ RBBINode *varRef;
+ UnicodeString setName;
+
+ usetNode = (RBBINode *)fRB->fUSetNodes->elementAt(i);
+ if (usetNode == NULL) {
+ break;
+ }
+
+ RBBIDebugPrintf("%3d ", i);
+ setName = UNICODE_STRING("anonymous", 9);
+ setRef = usetNode->fParent;
+ if (setRef != NULL) {
+ varRef = setRef->fParent;
+ if (varRef != NULL && varRef->fType == RBBINode::varRef) {
+ setName = varRef->fText;
+ }
+ }
+ RBBI_DEBUG_printUnicodeString(setName);
+ RBBIDebugPrintf(" ");
+ RBBI_DEBUG_printUnicodeString(usetNode->fText);
+ RBBIDebugPrintf("\n");
+ if (usetNode->fLeftChild != NULL) {
+ RBBINode::printTree(usetNode->fLeftChild, TRUE);
+ }
+ }
+ RBBIDebugPrintf("\n");
+}
+#endif
+
+
+
+//-------------------------------------------------------------------------------------
+//
+// RangeDescriptor copy constructor
+//
+//-------------------------------------------------------------------------------------
+
+RangeDescriptor::RangeDescriptor(const RangeDescriptor &other, UErrorCode &status) :
+ fStartChar(other.fStartChar), fEndChar {other.fEndChar}, fNum {other.fNum},
+ fIncludesDict{other.fIncludesDict}, fFirstInGroup{other.fFirstInGroup} {
+
+ if (U_FAILURE(status)) {
+ return;
+ }
+ fIncludesSets = new UVector(status);
+ if (this->fIncludesSets == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ for (int32_t i=0; i<other.fIncludesSets->size(); i++) {
+ this->fIncludesSets->addElement(other.fIncludesSets->elementAt(i), status);
+ }
+}
+
+
+//-------------------------------------------------------------------------------------
+//
+// RangeDesriptor default constructor
+//
+//-------------------------------------------------------------------------------------
+RangeDescriptor::RangeDescriptor(UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ fIncludesSets = new UVector(status);
+ if (fIncludesSets == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+}
+
+
+//-------------------------------------------------------------------------------------
+//
+// RangeDesriptor Destructor
+//
+//-------------------------------------------------------------------------------------
+RangeDescriptor::~RangeDescriptor() {
+ delete fIncludesSets;
+ fIncludesSets = nullptr;
+}
+
+//-------------------------------------------------------------------------------------
+//
+// RangeDesriptor::split()
+//
+//-------------------------------------------------------------------------------------
+void RangeDescriptor::split(UChar32 where, UErrorCode &status) {
+ U_ASSERT(where>fStartChar && where<=fEndChar);
+ RangeDescriptor *nr = new RangeDescriptor(*this, status);
+ if(nr == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ if (U_FAILURE(status)) {
+ delete nr;
+ return;
+ }
+ // RangeDescriptor copy constructor copies all fields.
+ // Only need to update those that are different after the split.
+ nr->fStartChar = where;
+ this->fEndChar = where-1;
+ nr->fNext = this->fNext;
+ this->fNext = nr;
+}
+
+
+//-------------------------------------------------------------------------------------
+//
+// RangeDescriptor::isDictionaryRange
+//
+// Test whether this range includes characters from
+// the original Unicode Set named "dictionary".
+//
+// This function looks through the Unicode Sets that
+// the range includes, checking for one named "dictionary"
+//
+// TODO: a faster way would be to find the set node for
+// "dictionary" just once, rather than looking it
+// up by name every time.
+//
+//-------------------------------------------------------------------------------------
+bool RangeDescriptor::isDictionaryRange() {
+ static const char16_t *dictionary = u"dictionary";
+ for (int32_t i=0; i<fIncludesSets->size(); i++) {
+ RBBINode *usetNode = (RBBINode *)fIncludesSets->elementAt(i);
+ RBBINode *setRef = usetNode->fParent;
+ if (setRef != nullptr) {
+ RBBINode *varRef = setRef->fParent;
+ if (varRef && varRef->fType == RBBINode::varRef) {
+ const UnicodeString *setName = &varRef->fText;
+ if (setName->compare(dictionary, -1) == 0) {
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
diff --git a/thirdparty/icu4c/common/rbbisetb.h b/thirdparty/icu4c/common/rbbisetb.h
new file mode 100644
index 0000000000..6409a4ea57
--- /dev/null
+++ b/thirdparty/icu4c/common/rbbisetb.h
@@ -0,0 +1,147 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+//
+// rbbisetb.h
+/*
+**********************************************************************
+* Copyright (c) 2001-2005, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+
+#ifndef RBBISETB_H
+#define RBBISETB_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/ucptrie.h"
+#include "unicode/umutablecptrie.h"
+#include "unicode/uobject.h"
+#include "rbbirb.h"
+#include "uvector.h"
+
+U_NAMESPACE_BEGIN
+
+//
+// RBBISetBuilder Derives the character categories used by the runtime RBBI engine
+// from the Unicode Sets appearing in the source RBBI rules, and
+// creates the TRIE table used to map from Unicode to the
+// character categories.
+//
+
+
+//
+// RangeDescriptor
+//
+// Each of the non-overlapping character ranges gets one of these descriptors.
+// All of them are strung together in a linked list, which is kept in order
+// (by character)
+//
+class RangeDescriptor : public UMemory {
+public:
+ UChar32 fStartChar {}; // Start of range, unicode 32 bit value.
+ UChar32 fEndChar {}; // End of range, unicode 32 bit value.
+ int32_t fNum {0}; // runtime-mapped input value for this range.
+ bool fIncludesDict {false}; // True if the range includes $dictionary.
+ bool fFirstInGroup {false}; // True if first range in a group with the same fNum.
+ UVector *fIncludesSets {nullptr}; // vector of the the original
+ // Unicode sets that include this range.
+ // (Contains ptrs to uset nodes)
+ RangeDescriptor *fNext {nullptr}; // Next RangeDescriptor in the linked list.
+
+ RangeDescriptor(UErrorCode &status);
+ RangeDescriptor(const RangeDescriptor &other, UErrorCode &status);
+ ~RangeDescriptor();
+ void split(UChar32 where, UErrorCode &status); // Spit this range in two at "where", with
+ // where appearing in the second (higher) part.
+ bool isDictionaryRange(); // Check whether this range appears as part of
+ // the Unicode set named "dictionary"
+
+ RangeDescriptor(const RangeDescriptor &other) = delete; // forbid default copying of this class
+ RangeDescriptor &operator=(const RangeDescriptor &other) = delete; // forbid assigning of this class
+};
+
+
+//
+// RBBISetBuilder Handles processing of Unicode Sets from RBBI rules.
+//
+// Starting with the rules parse tree from the scanner,
+//
+// - Enumerate the set of UnicodeSets that are referenced
+// by the RBBI rules.
+// - compute a derived set of non-overlapping UnicodeSets
+// that will correspond to columns in the state table for
+// the RBBI execution engine.
+// - construct the trie table that maps input characters
+// to set numbers in the non-overlapping set of sets.
+//
+
+
+class RBBISetBuilder : public UMemory {
+public:
+ RBBISetBuilder(RBBIRuleBuilder *rb);
+ ~RBBISetBuilder();
+
+ void buildRanges();
+ void buildTrie();
+ void addValToSets(UVector *sets, uint32_t val);
+ void addValToSet (RBBINode *usetNode, uint32_t val);
+ int32_t getNumCharCategories() const; // CharCategories are the same as input symbol set to the
+ // runtime state machine, which are the same as
+ // columns in the DFA state table
+ int32_t getDictCategoriesStart() const; // First char category that includes $dictionary, or
+ // last category + 1 if there are no dictionary categories.
+ int32_t getTrieSize() /*const*/; // Size in bytes of the serialized Trie.
+ void serializeTrie(uint8_t *where); // write out the serialized Trie.
+ UChar32 getFirstChar(int32_t val) const;
+ UBool sawBOF() const; // Indicate whether any references to the {bof} pseudo
+ // character were encountered.
+ /**
+ * Merge two character categories that have been identified as having equivalent behavior.
+ * The ranges belonging to the second category (table column) will be added to the first.
+ * @param categories the pair of categories to be merged.
+ */
+ void mergeCategories(IntPair categories);
+
+#ifdef RBBI_DEBUG
+ void printSets();
+ void printRanges();
+ void printRangeGroups();
+#else
+ #define printSets()
+ #define printRanges()
+ #define printRangeGroups()
+#endif
+
+private:
+ RBBIRuleBuilder *fRB; // The RBBI Rule Compiler that owns us.
+ UErrorCode *fStatus;
+
+ RangeDescriptor *fRangeList; // Head of the linked list of RangeDescriptors
+
+ UMutableCPTrie *fMutableTrie; // The mapping TRIE that is the end result of processing
+ UCPTrie *fTrie; // the Unicode Sets.
+ uint32_t fTrieSize;
+
+ // Number of range groups, which are groups of ranges that are in the same original UnicodeSets.
+ int32_t fGroupCount;
+
+ // The number of the first dictionary char category.
+ // If there are no Dictionary categories, set to the last category + 1.
+ int32_t fDictCategoriesStart;
+
+ UBool fSawBOF;
+
+ RBBISetBuilder(const RBBISetBuilder &other); // forbid copying of this class
+ RBBISetBuilder &operator=(const RBBISetBuilder &other); // forbid copying of this class
+};
+
+
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
+#endif
diff --git a/thirdparty/icu4c/common/rbbistbl.cpp b/thirdparty/icu4c/common/rbbistbl.cpp
new file mode 100644
index 0000000000..5303f76096
--- /dev/null
+++ b/thirdparty/icu4c/common/rbbistbl.cpp
@@ -0,0 +1,270 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+//
+// file: rbbistbl.cpp Implementation of the ICU RBBISymbolTable class
+//
+/*
+***************************************************************************
+* Copyright (C) 2002-2014 International Business Machines Corporation
+* and others. All rights reserved.
+***************************************************************************
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/unistr.h"
+#include "unicode/uniset.h"
+#include "unicode/uchar.h"
+#include "unicode/parsepos.h"
+
+#include "cstr.h"
+#include "rbbinode.h"
+#include "rbbirb.h"
+#include "umutex.h"
+
+
+//
+// RBBISymbolTableEntry_deleter Used by the UHashTable to delete the contents
+// when the hash table is deleted.
+//
+U_CDECL_BEGIN
+static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) {
+ icu::RBBISymbolTableEntry *px = (icu::RBBISymbolTableEntry *)p;
+ delete px;
+}
+U_CDECL_END
+
+
+
+U_NAMESPACE_BEGIN
+
+RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner *rs, const UnicodeString &rules, UErrorCode &status)
+ :fRules(rules), fRuleScanner(rs), ffffString(UChar(0xffff))
+{
+ fHashTable = NULL;
+ fCachedSetLookup = NULL;
+
+ fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status);
+ // uhash_open checks status
+ if (U_FAILURE(status)) {
+ return;
+ }
+ uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter);
+}
+
+
+
+RBBISymbolTable::~RBBISymbolTable()
+{
+ uhash_close(fHashTable);
+}
+
+
+//
+// RBBISymbolTable::lookup This function from the abstract symbol table inteface
+// looks up a variable name and returns a UnicodeString
+// containing the substitution text.
+//
+// The variable name does NOT include the leading $.
+//
+const UnicodeString *RBBISymbolTable::lookup(const UnicodeString& s) const
+{
+ RBBISymbolTableEntry *el;
+ RBBINode *varRefNode;
+ RBBINode *exprNode;
+ RBBINode *usetNode;
+ const UnicodeString *retString;
+ RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const
+
+ el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s);
+ if (el == NULL) {
+ return NULL;
+ }
+
+ varRefNode = el->val;
+ exprNode = varRefNode->fLeftChild; // Root node of expression for variable
+ if (exprNode->fType == RBBINode::setRef) {
+ // The $variable refers to a single UnicodeSet
+ // return the ffffString, which will subsequently be interpreted as a
+ // stand-in character for the set by RBBISymbolTable::lookupMatcher()
+ usetNode = exprNode->fLeftChild;
+ This->fCachedSetLookup = usetNode->fInputSet;
+ retString = &ffffString;
+ }
+ else
+ {
+ // The variable refers to something other than just a set.
+ // return the original source string for the expression
+ retString = &exprNode->fText;
+ This->fCachedSetLookup = NULL;
+ }
+ return retString;
+}
+
+
+
+//
+// RBBISymbolTable::lookupMatcher This function from the abstract symbol table
+// interface maps a single stand-in character to a
+// pointer to a Unicode Set. The Unicode Set code uses this
+// mechanism to get all references to the same $variable
+// name to refer to a single common Unicode Set instance.
+//
+// This implementation cheats a little, and does not maintain a map of stand-in chars
+// to sets. Instead, it takes advantage of the fact that the UnicodeSet
+// constructor will always call this function right after calling lookup(),
+// and we just need to remember what set to return between these two calls.
+const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const
+{
+ UnicodeSet *retVal = NULL;
+ RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const
+ if (ch == 0xffff) {
+ retVal = fCachedSetLookup;
+ This->fCachedSetLookup = 0;
+ }
+ return retVal;
+}
+
+//
+// RBBISymbolTable::parseReference This function from the abstract symbol table interface
+// looks for a $variable name in the source text.
+// It does not look it up, only scans for it.
+// It is used by the UnicodeSet parser.
+//
+// This implementation is lifted pretty much verbatim
+// from the rules based transliterator implementation.
+// I didn't see an obvious way of sharing it.
+//
+UnicodeString RBBISymbolTable::parseReference(const UnicodeString& text,
+ ParsePosition& pos, int32_t limit) const
+{
+ int32_t start = pos.getIndex();
+ int32_t i = start;
+ UnicodeString result;
+ while (i < limit) {
+ UChar c = text.charAt(i);
+ if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) {
+ break;
+ }
+ ++i;
+ }
+ if (i == start) { // No valid name chars
+ return result; // Indicate failure with empty string
+ }
+ pos.setIndex(i);
+ text.extractBetween(start, i, result);
+ return result;
+}
+
+
+
+//
+// RBBISymbolTable::lookupNode Given a key (a variable name), return the
+// corresponding RBBI Node. If there is no entry
+// in the table for this name, return NULL.
+//
+RBBINode *RBBISymbolTable::lookupNode(const UnicodeString &key) const{
+
+ RBBINode *retNode = NULL;
+ RBBISymbolTableEntry *el;
+
+ el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
+ if (el != NULL) {
+ retNode = el->val;
+ }
+ return retNode;
+}
+
+
+//
+// RBBISymbolTable::addEntry Add a new entry to the symbol table.
+// Indicate an error if the name already exists -
+// this will only occur in the case of duplicate
+// variable assignments.
+//
+void RBBISymbolTable::addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err) {
+ RBBISymbolTableEntry *e;
+ /* test for buffer overflows */
+ if (U_FAILURE(err)) {
+ return;
+ }
+ e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
+ if (e != NULL) {
+ err = U_BRK_VARIABLE_REDFINITION;
+ return;
+ }
+
+ e = new RBBISymbolTableEntry;
+ if (e == NULL) {
+ err = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ e->key = key;
+ e->val = val;
+ uhash_put( fHashTable, &e->key, e, &err);
+}
+
+
+RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(NULL) {}
+
+RBBISymbolTableEntry::~RBBISymbolTableEntry() {
+ // The "val" of a symbol table entry is a variable reference node.
+ // The l. child of the val is the rhs expression from the assignment.
+ // Unlike other node types, children of variable reference nodes are not
+ // automatically recursively deleted. We do it manually here.
+ delete val->fLeftChild;
+ val->fLeftChild = NULL;
+
+ delete val;
+
+ // Note: the key UnicodeString is destructed by virtue of being in the object by value.
+}
+
+
+//
+// RBBISymbolTable::print Debugging function, dump out the symbol table contents.
+//
+#ifdef RBBI_DEBUG
+void RBBISymbolTable::rbbiSymtablePrint() const {
+ RBBIDebugPrintf("Variable Definitions Symbol Table\n"
+ "Name Node serial String Val\n"
+ "-------------------------------------------------------------------\n");
+
+ int32_t pos = UHASH_FIRST;
+ const UHashElement *e = NULL;
+ for (;;) {
+ e = uhash_nextElement(fHashTable, &pos);
+ if (e == NULL ) {
+ break;
+ }
+ RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer;
+
+ RBBIDebugPrintf("%-19s %8p %7d ", CStr(s->key)(), (void *)s->val, s->val->fSerialNum);
+ RBBIDebugPrintf(" %s\n", CStr(s->val->fLeftChild->fText)());
+ }
+
+ RBBIDebugPrintf("\nParsed Variable Definitions\n");
+ pos = -1;
+ for (;;) {
+ e = uhash_nextElement(fHashTable, &pos);
+ if (e == NULL ) {
+ break;
+ }
+ RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer;
+ RBBIDebugPrintf("%s\n", CStr(s->key)());
+ RBBINode::printTree(s->val, TRUE);
+ RBBINode::printTree(s->val->fLeftChild, FALSE);
+ RBBIDebugPrintf("\n");
+ }
+}
+#endif
+
+
+
+
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
diff --git a/thirdparty/icu4c/common/rbbitblb.cpp b/thirdparty/icu4c/common/rbbitblb.cpp
new file mode 100644
index 0000000000..bcbdab9227
--- /dev/null
+++ b/thirdparty/icu4c/common/rbbitblb.cpp
@@ -0,0 +1,1793 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2002-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+//
+// rbbitblb.cpp
+//
+
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/unistr.h"
+#include "rbbitblb.h"
+#include "rbbirb.h"
+#include "rbbiscan.h"
+#include "rbbisetb.h"
+#include "rbbidata.h"
+#include "cstring.h"
+#include "uassert.h"
+#include "uvectr32.h"
+#include "cmemory.h"
+
+U_NAMESPACE_BEGIN
+
+const int32_t kMaxStateFor8BitsTable = 255;
+
+RBBITableBuilder::RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status) :
+ fRB(rb),
+ fTree(*rootNode),
+ fStatus(&status),
+ fDStates(nullptr),
+ fSafeTable(nullptr) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ // fDStates is UVector<RBBIStateDescriptor *>
+ fDStates = new UVector(status);
+ if (U_SUCCESS(status) && fDStates == nullptr ) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+}
+
+
+
+RBBITableBuilder::~RBBITableBuilder() {
+ int i;
+ for (i=0; i<fDStates->size(); i++) {
+ delete (RBBIStateDescriptor *)fDStates->elementAt(i);
+ }
+ delete fDStates;
+ delete fSafeTable;
+ delete fLookAheadRuleMap;
+}
+
+
+//-----------------------------------------------------------------------------
+//
+// RBBITableBuilder::buildForwardTable - This is the main function for building
+// the DFA state transition table from the RBBI rules parse tree.
+//
+//-----------------------------------------------------------------------------
+void RBBITableBuilder::buildForwardTable() {
+
+ if (U_FAILURE(*fStatus)) {
+ return;
+ }
+
+ // If there were no rules, just return. This situation can easily arise
+ // for the reverse rules.
+ if (fTree==NULL) {
+ return;
+ }
+
+ //
+ // Walk through the tree, replacing any references to $variables with a copy of the
+ // parse tree for the substition expression.
+ //
+ fTree = fTree->flattenVariables();
+#ifdef RBBI_DEBUG
+ if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ftree")) {
+ RBBIDebugPuts("\nParse tree after flattening variable references.");
+ RBBINode::printTree(fTree, TRUE);
+ }
+#endif
+
+ //
+ // If the rules contained any references to {bof}
+ // add a {bof} <cat> <former root of tree> to the
+ // tree. Means that all matches must start out with the
+ // {bof} fake character.
+ //
+ if (fRB->fSetBuilder->sawBOF()) {
+ RBBINode *bofTop = new RBBINode(RBBINode::opCat);
+ RBBINode *bofLeaf = new RBBINode(RBBINode::leafChar);
+ // Delete and exit if memory allocation failed.
+ if (bofTop == NULL || bofLeaf == NULL) {
+ *fStatus = U_MEMORY_ALLOCATION_ERROR;
+ delete bofTop;
+ delete bofLeaf;
+ return;
+ }
+ bofTop->fLeftChild = bofLeaf;
+ bofTop->fRightChild = fTree;
+ bofLeaf->fParent = bofTop;
+ bofLeaf->fVal = 2; // Reserved value for {bof}.
+ fTree = bofTop;
+ }
+
+ //
+ // Add a unique right-end marker to the expression.
+ // Appears as a cat-node, left child being the original tree,
+ // right child being the end marker.
+ //
+ RBBINode *cn = new RBBINode(RBBINode::opCat);
+ // Exit if memory allocation failed.
+ if (cn == NULL) {
+ *fStatus = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ cn->fLeftChild = fTree;
+ fTree->fParent = cn;
+ RBBINode *endMarkerNode = cn->fRightChild = new RBBINode(RBBINode::endMark);
+ // Delete and exit if memory allocation failed.
+ if (cn->fRightChild == NULL) {
+ *fStatus = U_MEMORY_ALLOCATION_ERROR;
+ delete cn;
+ return;
+ }
+ cn->fRightChild->fParent = cn;
+ fTree = cn;
+
+ //
+ // Replace all references to UnicodeSets with the tree for the equivalent
+ // expression.
+ //
+ fTree->flattenSets();
+#ifdef RBBI_DEBUG
+ if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "stree")) {
+ RBBIDebugPuts("\nParse tree after flattening Unicode Set references.");
+ RBBINode::printTree(fTree, TRUE);
+ }
+#endif
+
+
+ //
+ // calculate the functions nullable, firstpos, lastpos and followpos on
+ // nodes in the parse tree.
+ // See the alogrithm description in Aho.
+ // Understanding how this works by looking at the code alone will be
+ // nearly impossible.
+ //
+ calcNullable(fTree);
+ calcFirstPos(fTree);
+ calcLastPos(fTree);
+ calcFollowPos(fTree);
+ if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "pos")) {
+ RBBIDebugPuts("\n");
+ printPosSets(fTree);
+ }
+
+ //
+ // For "chained" rules, modify the followPos sets
+ //
+ if (fRB->fChainRules) {
+ calcChainedFollowPos(fTree, endMarkerNode);
+ }
+
+ //
+ // BOF (start of input) test fixup.
+ //
+ if (fRB->fSetBuilder->sawBOF()) {
+ bofFixup();
+ }
+
+ //
+ // Build the DFA state transition tables.
+ //
+ buildStateTable();
+ mapLookAheadRules();
+ flagAcceptingStates();
+ flagLookAheadStates();
+ flagTaggedStates();
+
+ //
+ // Update the global table of rule status {tag} values
+ // The rule builder has a global vector of status values that are common
+ // for all tables. Merge the ones from this table into the global set.
+ //
+ mergeRuleStatusVals();
+}
+
+
+
+//-----------------------------------------------------------------------------
+//
+// calcNullable. Impossible to explain succinctly. See Aho, section 3.9
+//
+//-----------------------------------------------------------------------------
+void RBBITableBuilder::calcNullable(RBBINode *n) {
+ if (n == NULL) {
+ return;
+ }
+ if (n->fType == RBBINode::setRef ||
+ n->fType == RBBINode::endMark ) {
+ // These are non-empty leaf node types.
+ n->fNullable = FALSE;
+ return;
+ }
+
+ if (n->fType == RBBINode::lookAhead || n->fType == RBBINode::tag) {
+ // Lookahead marker node. It's a leaf, so no recursion on children.
+ // It's nullable because it does not match any literal text from the input stream.
+ n->fNullable = TRUE;
+ return;
+ }
+
+
+ // The node is not a leaf.
+ // Calculate nullable on its children.
+ calcNullable(n->fLeftChild);
+ calcNullable(n->fRightChild);
+
+ // Apply functions from table 3.40 in Aho
+ if (n->fType == RBBINode::opOr) {
+ n->fNullable = n->fLeftChild->fNullable || n->fRightChild->fNullable;
+ }
+ else if (n->fType == RBBINode::opCat) {
+ n->fNullable = n->fLeftChild->fNullable && n->fRightChild->fNullable;
+ }
+ else if (n->fType == RBBINode::opStar || n->fType == RBBINode::opQuestion) {
+ n->fNullable = TRUE;
+ }
+ else {
+ n->fNullable = FALSE;
+ }
+}
+
+
+
+
+//-----------------------------------------------------------------------------
+//
+// calcFirstPos. Impossible to explain succinctly. See Aho, section 3.9
+//
+//-----------------------------------------------------------------------------
+void RBBITableBuilder::calcFirstPos(RBBINode *n) {
+ if (n == NULL) {
+ return;
+ }
+ if (n->fType == RBBINode::leafChar ||
+ n->fType == RBBINode::endMark ||
+ n->fType == RBBINode::lookAhead ||
+ n->fType == RBBINode::tag) {
+ // These are non-empty leaf node types.
+ // Note: In order to maintain the sort invariant on the set,
+ // this function should only be called on a node whose set is
+ // empty to start with.
+ n->fFirstPosSet->addElement(n, *fStatus);
+ return;
+ }
+
+ // The node is not a leaf.
+ // Calculate firstPos on its children.
+ calcFirstPos(n->fLeftChild);
+ calcFirstPos(n->fRightChild);
+
+ // Apply functions from table 3.40 in Aho
+ if (n->fType == RBBINode::opOr) {
+ setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet);
+ setAdd(n->fFirstPosSet, n->fRightChild->fFirstPosSet);
+ }
+ else if (n->fType == RBBINode::opCat) {
+ setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet);
+ if (n->fLeftChild->fNullable) {
+ setAdd(n->fFirstPosSet, n->fRightChild->fFirstPosSet);
+ }
+ }
+ else if (n->fType == RBBINode::opStar ||
+ n->fType == RBBINode::opQuestion ||
+ n->fType == RBBINode::opPlus) {
+ setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet);
+ }
+}
+
+
+
+//-----------------------------------------------------------------------------
+//
+// calcLastPos. Impossible to explain succinctly. See Aho, section 3.9
+//
+//-----------------------------------------------------------------------------
+void RBBITableBuilder::calcLastPos(RBBINode *n) {
+ if (n == NULL) {
+ return;
+ }
+ if (n->fType == RBBINode::leafChar ||
+ n->fType == RBBINode::endMark ||
+ n->fType == RBBINode::lookAhead ||
+ n->fType == RBBINode::tag) {
+ // These are non-empty leaf node types.
+ // Note: In order to maintain the sort invariant on the set,
+ // this function should only be called on a node whose set is
+ // empty to start with.
+ n->fLastPosSet->addElement(n, *fStatus);
+ return;
+ }
+
+ // The node is not a leaf.
+ // Calculate lastPos on its children.
+ calcLastPos(n->fLeftChild);
+ calcLastPos(n->fRightChild);
+
+ // Apply functions from table 3.40 in Aho
+ if (n->fType == RBBINode::opOr) {
+ setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet);
+ setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet);
+ }
+ else if (n->fType == RBBINode::opCat) {
+ setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet);
+ if (n->fRightChild->fNullable) {
+ setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet);
+ }
+ }
+ else if (n->fType == RBBINode::opStar ||
+ n->fType == RBBINode::opQuestion ||
+ n->fType == RBBINode::opPlus) {
+ setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet);
+ }
+}
+
+
+
+//-----------------------------------------------------------------------------
+//
+// calcFollowPos. Impossible to explain succinctly. See Aho, section 3.9
+//
+//-----------------------------------------------------------------------------
+void RBBITableBuilder::calcFollowPos(RBBINode *n) {
+ if (n == NULL ||
+ n->fType == RBBINode::leafChar ||
+ n->fType == RBBINode::endMark) {
+ return;
+ }
+
+ calcFollowPos(n->fLeftChild);
+ calcFollowPos(n->fRightChild);
+
+ // Aho rule #1
+ if (n->fType == RBBINode::opCat) {
+ RBBINode *i; // is 'i' in Aho's description
+ uint32_t ix;
+
+ UVector *LastPosOfLeftChild = n->fLeftChild->fLastPosSet;
+
+ for (ix=0; ix<(uint32_t)LastPosOfLeftChild->size(); ix++) {
+ i = (RBBINode *)LastPosOfLeftChild->elementAt(ix);
+ setAdd(i->fFollowPos, n->fRightChild->fFirstPosSet);
+ }
+ }
+
+ // Aho rule #2
+ if (n->fType == RBBINode::opStar ||
+ n->fType == RBBINode::opPlus) {
+ RBBINode *i; // again, n and i are the names from Aho's description.
+ uint32_t ix;
+
+ for (ix=0; ix<(uint32_t)n->fLastPosSet->size(); ix++) {
+ i = (RBBINode *)n->fLastPosSet->elementAt(ix);
+ setAdd(i->fFollowPos, n->fFirstPosSet);
+ }
+ }
+
+
+
+}
+
+//-----------------------------------------------------------------------------
+//
+// addRuleRootNodes Recursively walk a parse tree, adding all nodes flagged
+// as roots of a rule to a destination vector.
+//
+//-----------------------------------------------------------------------------
+void RBBITableBuilder::addRuleRootNodes(UVector *dest, RBBINode *node) {
+ if (node == NULL || U_FAILURE(*fStatus)) {
+ return;
+ }
+ if (node->fRuleRoot) {
+ dest->addElement(node, *fStatus);
+ // Note: rules cannot nest. If we found a rule start node,
+ // no child node can also be a start node.
+ return;
+ }
+ addRuleRootNodes(dest, node->fLeftChild);
+ addRuleRootNodes(dest, node->fRightChild);
+}
+
+//-----------------------------------------------------------------------------
+//
+// calcChainedFollowPos. Modify the previously calculated followPos sets
+// to implement rule chaining. NOT described by Aho
+//
+//-----------------------------------------------------------------------------
+void RBBITableBuilder::calcChainedFollowPos(RBBINode *tree, RBBINode *endMarkNode) {
+
+ UVector leafNodes(*fStatus);
+ if (U_FAILURE(*fStatus)) {
+ return;
+ }
+
+ // get a list all leaf nodes
+ tree->findNodes(&leafNodes, RBBINode::leafChar, *fStatus);
+ if (U_FAILURE(*fStatus)) {
+ return;
+ }
+
+ // Collect all leaf nodes that can start matches for rules
+ // with inbound chaining enabled, which is the union of the
+ // firstPosition sets from each of the rule root nodes.
+
+ UVector ruleRootNodes(*fStatus);
+ addRuleRootNodes(&ruleRootNodes, tree);
+
+ UVector matchStartNodes(*fStatus);
+ for (int j=0; j<ruleRootNodes.size(); ++j) {
+ RBBINode *node = static_cast<RBBINode *>(ruleRootNodes.elementAt(j));
+ if (node->fChainIn) {
+ setAdd(&matchStartNodes, node->fFirstPosSet);
+ }
+ }
+ if (U_FAILURE(*fStatus)) {
+ return;
+ }
+
+ int32_t endNodeIx;
+ int32_t startNodeIx;
+
+ for (endNodeIx=0; endNodeIx<leafNodes.size(); endNodeIx++) {
+ RBBINode *endNode = (RBBINode *)leafNodes.elementAt(endNodeIx);
+
+ // Identify leaf nodes that correspond to overall rule match positions.
+ // These include the endMarkNode in their followPos sets.
+ //
+ // Note: do not consider other end marker nodes, those that are added to
+ // look-ahead rules. These can't chain; a match immediately stops
+ // further matching. This leaves exactly one end marker node, the one
+ // at the end of the complete tree.
+
+ if (!endNode->fFollowPos->contains(endMarkNode)) {
+ continue;
+ }
+
+ // We've got a node that can end a match.
+
+ // !!LBCMNoChain implementation: If this node's val correspond to
+ // the Line Break $CM char class, don't chain from it.
+ // TODO: Remove this. !!LBCMNoChain is deprecated, and is not used
+ // by any of the standard ICU rules.
+ if (fRB->fLBCMNoChain) {
+ UChar32 c = this->fRB->fSetBuilder->getFirstChar(endNode->fVal);
+ if (c != -1) {
+ // c == -1 occurs with sets containing only the {eof} marker string.
+ ULineBreak cLBProp = (ULineBreak)u_getIntPropertyValue(c, UCHAR_LINE_BREAK);
+ if (cLBProp == U_LB_COMBINING_MARK) {
+ continue;
+ }
+ }
+ }
+
+ // Now iterate over the nodes that can start a match, looking for ones
+ // with the same char class as our ending node.
+ RBBINode *startNode;
+ for (startNodeIx = 0; startNodeIx<matchStartNodes.size(); startNodeIx++) {
+ startNode = (RBBINode *)matchStartNodes.elementAt(startNodeIx);
+ if (startNode->fType != RBBINode::leafChar) {
+ continue;
+ }
+
+ if (endNode->fVal == startNode->fVal) {
+ // The end val (character class) of one possible match is the
+ // same as the start of another.
+
+ // Add all nodes from the followPos of the start node to the
+ // followPos set of the end node, which will have the effect of
+ // letting matches transition from a match state at endNode
+ // to the second char of a match starting with startNode.
+ setAdd(endNode->fFollowPos, startNode->fFollowPos);
+ }
+ }
+ }
+}
+
+
+//-----------------------------------------------------------------------------
+//
+// bofFixup. Fixup for state tables that include {bof} beginning of input testing.
+// Do an swizzle similar to chaining, modifying the followPos set of
+// the bofNode to include the followPos nodes from other {bot} nodes
+// scattered through the tree.
+//
+// This function has much in common with calcChainedFollowPos().
+//
+//-----------------------------------------------------------------------------
+void RBBITableBuilder::bofFixup() {
+
+ if (U_FAILURE(*fStatus)) {
+ return;
+ }
+
+ // The parse tree looks like this ...
+ // fTree root ---> <cat>
+ // / \ .
+ // <cat> <#end node>
+ // / \ .
+ // <bofNode> rest
+ // of tree
+ //
+ // We will be adding things to the followPos set of the <bofNode>
+ //
+ RBBINode *bofNode = fTree->fLeftChild->fLeftChild;
+ U_ASSERT(bofNode->fType == RBBINode::leafChar);
+ U_ASSERT(bofNode->fVal == 2);
+
+ // Get all nodes that can be the start a match of the user-written rules
+ // (excluding the fake bofNode)
+ // We want the nodes that can start a match in the
+ // part labeled "rest of tree"
+ //
+ UVector *matchStartNodes = fTree->fLeftChild->fRightChild->fFirstPosSet;
+
+ RBBINode *startNode;
+ int startNodeIx;
+ for (startNodeIx = 0; startNodeIx<matchStartNodes->size(); startNodeIx++) {
+ startNode = (RBBINode *)matchStartNodes->elementAt(startNodeIx);
+ if (startNode->fType != RBBINode::leafChar) {
+ continue;
+ }
+
+ if (startNode->fVal == bofNode->fVal) {
+ // We found a leaf node corresponding to a {bof} that was
+ // explicitly written into a rule.
+ // Add everything from the followPos set of this node to the
+ // followPos set of the fake bofNode at the start of the tree.
+ //
+ setAdd(bofNode->fFollowPos, startNode->fFollowPos);
+ }
+ }
+}
+
+//-----------------------------------------------------------------------------
+//
+// buildStateTable() Determine the set of runtime DFA states and the
+// transition tables for these states, by the algorithm
+// of fig. 3.44 in Aho.
+//
+// Most of the comments are quotes of Aho's psuedo-code.
+//
+//-----------------------------------------------------------------------------
+void RBBITableBuilder::buildStateTable() {
+ if (U_FAILURE(*fStatus)) {
+ return;
+ }
+ RBBIStateDescriptor *failState;
+ // Set it to NULL to avoid uninitialized warning
+ RBBIStateDescriptor *initialState = NULL;
+ //
+ // Add a dummy state 0 - the stop state. Not from Aho.
+ int lastInputSymbol = fRB->fSetBuilder->getNumCharCategories() - 1;
+ failState = new RBBIStateDescriptor(lastInputSymbol, fStatus);
+ if (failState == NULL) {
+ *fStatus = U_MEMORY_ALLOCATION_ERROR;
+ goto ExitBuildSTdeleteall;
+ }
+ failState->fPositions = new UVector(*fStatus);
+ if (failState->fPositions == NULL) {
+ *fStatus = U_MEMORY_ALLOCATION_ERROR;
+ }
+ if (failState->fPositions == NULL || U_FAILURE(*fStatus)) {
+ goto ExitBuildSTdeleteall;
+ }
+ fDStates->addElement(failState, *fStatus);
+ if (U_FAILURE(*fStatus)) {
+ goto ExitBuildSTdeleteall;
+ }
+
+ // initially, the only unmarked state in Dstates is firstpos(root),
+ // where toot is the root of the syntax tree for (r)#;
+ initialState = new RBBIStateDescriptor(lastInputSymbol, fStatus);
+ if (initialState == NULL) {
+ *fStatus = U_MEMORY_ALLOCATION_ERROR;
+ }
+ if (U_FAILURE(*fStatus)) {
+ goto ExitBuildSTdeleteall;
+ }
+ initialState->fPositions = new UVector(*fStatus);
+ if (initialState->fPositions == NULL) {
+ *fStatus = U_MEMORY_ALLOCATION_ERROR;
+ }
+ if (U_FAILURE(*fStatus)) {
+ goto ExitBuildSTdeleteall;
+ }
+ setAdd(initialState->fPositions, fTree->fFirstPosSet);
+ fDStates->addElement(initialState, *fStatus);
+ if (U_FAILURE(*fStatus)) {
+ goto ExitBuildSTdeleteall;
+ }
+
+ // while there is an unmarked state T in Dstates do begin
+ for (;;) {
+ RBBIStateDescriptor *T = NULL;
+ int32_t tx;
+ for (tx=1; tx<fDStates->size(); tx++) {
+ RBBIStateDescriptor *temp;
+ temp = (RBBIStateDescriptor *)fDStates->elementAt(tx);
+ if (temp->fMarked == FALSE) {
+ T = temp;
+ break;
+ }
+ }
+ if (T == NULL) {
+ break;
+ }
+
+ // mark T;
+ T->fMarked = TRUE;
+
+ // for each input symbol a do begin
+ int32_t a;
+ for (a = 1; a<=lastInputSymbol; a++) {
+ // let U be the set of positions that are in followpos(p)
+ // for some position p in T
+ // such that the symbol at position p is a;
+ UVector *U = NULL;
+ RBBINode *p;
+ int32_t px;
+ for (px=0; px<T->fPositions->size(); px++) {
+ p = (RBBINode *)T->fPositions->elementAt(px);
+ if ((p->fType == RBBINode::leafChar) && (p->fVal == a)) {
+ if (U == NULL) {
+ U = new UVector(*fStatus);
+ if (U == NULL) {
+ *fStatus = U_MEMORY_ALLOCATION_ERROR;
+ goto ExitBuildSTdeleteall;
+ }
+ }
+ setAdd(U, p->fFollowPos);
+ }
+ }
+
+ // if U is not empty and not in DStates then
+ int32_t ux = 0;
+ UBool UinDstates = FALSE;
+ if (U != NULL) {
+ U_ASSERT(U->size() > 0);
+ int ix;
+ for (ix=0; ix<fDStates->size(); ix++) {
+ RBBIStateDescriptor *temp2;
+ temp2 = (RBBIStateDescriptor *)fDStates->elementAt(ix);
+ if (setEquals(U, temp2->fPositions)) {
+ delete U;
+ U = temp2->fPositions;
+ ux = ix;
+ UinDstates = TRUE;
+ break;
+ }
+ }
+
+ // Add U as an unmarked state to Dstates
+ if (!UinDstates)
+ {
+ RBBIStateDescriptor *newState = new RBBIStateDescriptor(lastInputSymbol, fStatus);
+ if (newState == NULL) {
+ *fStatus = U_MEMORY_ALLOCATION_ERROR;
+ }
+ if (U_FAILURE(*fStatus)) {
+ goto ExitBuildSTdeleteall;
+ }
+ newState->fPositions = U;
+ fDStates->addElement(newState, *fStatus);
+ if (U_FAILURE(*fStatus)) {
+ return;
+ }
+ ux = fDStates->size()-1;
+ }
+
+ // Dtran[T, a] := U;
+ T->fDtran->setElementAt(ux, a);
+ }
+ }
+ }
+ return;
+ // delete local pointers only if error occured.
+ExitBuildSTdeleteall:
+ delete initialState;
+ delete failState;
+}
+
+
+/**
+ * mapLookAheadRules
+ *
+ */
+void RBBITableBuilder::mapLookAheadRules() {
+ fLookAheadRuleMap = new UVector32(fRB->fScanner->numRules() + 1, *fStatus);
+ if (fLookAheadRuleMap == nullptr) {
+ *fStatus = U_MEMORY_ALLOCATION_ERROR;
+ }
+ if (U_FAILURE(*fStatus)) {
+ return;
+ }
+ fLookAheadRuleMap->setSize(fRB->fScanner->numRules() + 1);
+
+ for (int32_t n=0; n<fDStates->size(); n++) {
+ RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
+ int32_t laSlotForState = 0;
+
+ // Establish the look-ahead slot for this state, if the state covers
+ // any look-ahead nodes - corresponding to the '/' in look-ahead rules.
+
+ // If any of the look-ahead nodes already have a slot assigned, use it,
+ // otherwise assign a new one.
+
+ bool sawLookAheadNode = false;
+ for (int32_t ipos=0; ipos<sd->fPositions->size(); ++ipos) {
+ RBBINode *node = static_cast<RBBINode *>(sd->fPositions->elementAt(ipos));
+ if (node->fType != RBBINode::NodeType::lookAhead) {
+ continue;
+ }
+ sawLookAheadNode = true;
+ int32_t ruleNum = node->fVal; // Set when rule was originally parsed.
+ U_ASSERT(ruleNum < fLookAheadRuleMap->size());
+ U_ASSERT(ruleNum > 0);
+ int32_t laSlot = fLookAheadRuleMap->elementAti(ruleNum);
+ if (laSlot != 0) {
+ if (laSlotForState == 0) {
+ laSlotForState = laSlot;
+ } else {
+ // TODO: figure out if this can fail, change to setting an error code if so.
+ U_ASSERT(laSlot == laSlotForState);
+ }
+ }
+ }
+ if (!sawLookAheadNode) {
+ continue;
+ }
+
+ if (laSlotForState == 0) {
+ laSlotForState = ++fLASlotsInUse;
+ }
+
+ // For each look ahead node covered by this state,
+ // set the mapping from the node's rule number to the look ahead slot.
+ // There can be multiple nodes/rule numbers going to the same la slot.
+
+ for (int32_t ipos=0; ipos<sd->fPositions->size(); ++ipos) {
+ RBBINode *node = static_cast<RBBINode *>(sd->fPositions->elementAt(ipos));
+ if (node->fType != RBBINode::NodeType::lookAhead) {
+ continue;
+ }
+ int32_t ruleNum = node->fVal; // Set when rule was originally parsed.
+ int32_t existingVal = fLookAheadRuleMap->elementAti(ruleNum);
+ (void)existingVal;
+ U_ASSERT(existingVal == 0 || existingVal == laSlotForState);
+ fLookAheadRuleMap->setElementAt(laSlotForState, ruleNum);
+ }
+ }
+
+}
+
+//-----------------------------------------------------------------------------
+//
+// flagAcceptingStates Identify accepting states.
+// First get a list of all of the end marker nodes.
+// Then, for each state s,
+// if s contains one of the end marker nodes in its list of tree positions then
+// s is an accepting state.
+//
+//-----------------------------------------------------------------------------
+void RBBITableBuilder::flagAcceptingStates() {
+ if (U_FAILURE(*fStatus)) {
+ return;
+ }
+ UVector endMarkerNodes(*fStatus);
+ RBBINode *endMarker;
+ int32_t i;
+ int32_t n;
+
+ if (U_FAILURE(*fStatus)) {
+ return;
+ }
+
+ fTree->findNodes(&endMarkerNodes, RBBINode::endMark, *fStatus);
+ if (U_FAILURE(*fStatus)) {
+ return;
+ }
+
+ for (i=0; i<endMarkerNodes.size(); i++) {
+ endMarker = (RBBINode *)endMarkerNodes.elementAt(i);
+ for (n=0; n<fDStates->size(); n++) {
+ RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
+ if (sd->fPositions->indexOf(endMarker) >= 0) {
+ // Any non-zero value for fAccepting means this is an accepting node.
+ // The value is what will be returned to the user as the break status.
+ // If no other value was specified, force it to ACCEPTING_UNCONDITIONAL (1).
+
+ if (sd->fAccepting==0) {
+ // State hasn't been marked as accepting yet. Do it now.
+ sd->fAccepting = fLookAheadRuleMap->elementAti(endMarker->fVal);
+ if (sd->fAccepting == 0) {
+ sd->fAccepting = ACCEPTING_UNCONDITIONAL;
+ }
+ }
+ if (sd->fAccepting==ACCEPTING_UNCONDITIONAL && endMarker->fVal != 0) {
+ // Both lookahead and non-lookahead accepting for this state.
+ // Favor the look-ahead, because a look-ahead match needs to
+ // immediately stop the run-time engine. First match, not longest.
+ sd->fAccepting = fLookAheadRuleMap->elementAti(endMarker->fVal);
+ }
+ // implicit else:
+ // if sd->fAccepting already had a value other than 0 or 1, leave it be.
+ }
+ }
+ }
+}
+
+
+//-----------------------------------------------------------------------------
+//
+// flagLookAheadStates Very similar to flagAcceptingStates, above.
+//
+//-----------------------------------------------------------------------------
+void RBBITableBuilder::flagLookAheadStates() {
+ if (U_FAILURE(*fStatus)) {
+ return;
+ }
+ UVector lookAheadNodes(*fStatus);
+ RBBINode *lookAheadNode;
+ int32_t i;
+ int32_t n;
+
+ fTree->findNodes(&lookAheadNodes, RBBINode::lookAhead, *fStatus);
+ if (U_FAILURE(*fStatus)) {
+ return;
+ }
+ for (i=0; i<lookAheadNodes.size(); i++) {
+ lookAheadNode = (RBBINode *)lookAheadNodes.elementAt(i);
+ U_ASSERT(lookAheadNode->fType == RBBINode::NodeType::lookAhead);
+
+ for (n=0; n<fDStates->size(); n++) {
+ RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
+ int32_t positionsIdx = sd->fPositions->indexOf(lookAheadNode);
+ if (positionsIdx >= 0) {
+ U_ASSERT(lookAheadNode == sd->fPositions->elementAt(positionsIdx));
+ uint32_t lookaheadSlot = fLookAheadRuleMap->elementAti(lookAheadNode->fVal);
+ U_ASSERT(sd->fLookAhead == 0 || sd->fLookAhead == lookaheadSlot);
+ // if (sd->fLookAhead != 0 && sd->fLookAhead != lookaheadSlot) {
+ // printf("%s:%d Bingo. sd->fLookAhead:%d lookaheadSlot:%d\n",
+ // __FILE__, __LINE__, sd->fLookAhead, lookaheadSlot);
+ // }
+ sd->fLookAhead = lookaheadSlot;
+ }
+ }
+ }
+}
+
+
+
+
+//-----------------------------------------------------------------------------
+//
+// flagTaggedStates
+//
+//-----------------------------------------------------------------------------
+void RBBITableBuilder::flagTaggedStates() {
+ if (U_FAILURE(*fStatus)) {
+ return;
+ }
+ UVector tagNodes(*fStatus);
+ RBBINode *tagNode;
+ int32_t i;
+ int32_t n;
+
+ if (U_FAILURE(*fStatus)) {
+ return;
+ }
+ fTree->findNodes(&tagNodes, RBBINode::tag, *fStatus);
+ if (U_FAILURE(*fStatus)) {
+ return;
+ }
+ for (i=0; i<tagNodes.size(); i++) { // For each tag node t (all of 'em)
+ tagNode = (RBBINode *)tagNodes.elementAt(i);
+
+ for (n=0; n<fDStates->size(); n++) { // For each state s (row in the state table)
+ RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
+ if (sd->fPositions->indexOf(tagNode) >= 0) { // if s include the tag node t
+ sortedAdd(&sd->fTagVals, tagNode->fVal);
+ }
+ }
+ }
+}
+
+
+
+
+//-----------------------------------------------------------------------------
+//
+// mergeRuleStatusVals
+//
+// Update the global table of rule status {tag} values
+// The rule builder has a global vector of status values that are common
+// for all tables. Merge the ones from this table into the global set.
+//
+//-----------------------------------------------------------------------------
+void RBBITableBuilder::mergeRuleStatusVals() {
+ //
+ // The basic outline of what happens here is this...
+ //
+ // for each state in this state table
+ // if the status tag list for this state is in the global statuses list
+ // record where and
+ // continue with the next state
+ // else
+ // add the tag list for this state to the global list.
+ //
+ int i;
+ int n;
+
+ // Pre-set a single tag of {0} into the table.
+ // We will need this as a default, for rule sets with no explicit tagging.
+ if (fRB->fRuleStatusVals->size() == 0) {
+ fRB->fRuleStatusVals->addElement(1, *fStatus); // Num of statuses in group
+ fRB->fRuleStatusVals->addElement((int32_t)0, *fStatus); // and our single status of zero
+ }
+
+ // For each state
+ for (n=0; n<fDStates->size(); n++) {
+ RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
+ UVector *thisStatesTagValues = sd->fTagVals;
+ if (thisStatesTagValues == NULL) {
+ // No tag values are explicitly associated with this state.
+ // Set the default tag value.
+ sd->fTagsIdx = 0;
+ continue;
+ }
+
+ // There are tag(s) associated with this state.
+ // fTagsIdx will be the index into the global tag list for this state's tag values.
+ // Initial value of -1 flags that we haven't got it set yet.
+ sd->fTagsIdx = -1;
+ int32_t thisTagGroupStart = 0; // indexes into the global rule status vals list
+ int32_t nextTagGroupStart = 0;
+
+ // Loop runs once per group of tags in the global list
+ while (nextTagGroupStart < fRB->fRuleStatusVals->size()) {
+ thisTagGroupStart = nextTagGroupStart;
+ nextTagGroupStart += fRB->fRuleStatusVals->elementAti(thisTagGroupStart) + 1;
+ if (thisStatesTagValues->size() != fRB->fRuleStatusVals->elementAti(thisTagGroupStart)) {
+ // The number of tags for this state is different from
+ // the number of tags in this group from the global list.
+ // Continue with the next group from the global list.
+ continue;
+ }
+ // The lengths match, go ahead and compare the actual tag values
+ // between this state and the group from the global list.
+ for (i=0; i<thisStatesTagValues->size(); i++) {
+ if (thisStatesTagValues->elementAti(i) !=
+ fRB->fRuleStatusVals->elementAti(thisTagGroupStart + 1 + i) ) {
+ // Mismatch.
+ break;
+ }
+ }
+
+ if (i == thisStatesTagValues->size()) {
+ // We found a set of tag values in the global list that match
+ // those for this state. Use them.
+ sd->fTagsIdx = thisTagGroupStart;
+ break;
+ }
+ }
+
+ if (sd->fTagsIdx == -1) {
+ // No suitable entry in the global tag list already. Add one
+ sd->fTagsIdx = fRB->fRuleStatusVals->size();
+ fRB->fRuleStatusVals->addElement(thisStatesTagValues->size(), *fStatus);
+ for (i=0; i<thisStatesTagValues->size(); i++) {
+ fRB->fRuleStatusVals->addElement(thisStatesTagValues->elementAti(i), *fStatus);
+ }
+ }
+ }
+}
+
+
+
+
+
+
+
+//-----------------------------------------------------------------------------
+//
+// sortedAdd Add a value to a vector of sorted values (ints).
+// Do not replicate entries; if the value is already there, do not
+// add a second one.
+// Lazily create the vector if it does not already exist.
+//
+//-----------------------------------------------------------------------------
+void RBBITableBuilder::sortedAdd(UVector **vector, int32_t val) {
+ int32_t i;
+
+ if (*vector == NULL) {
+ *vector = new UVector(*fStatus);
+ }
+ if (*vector == NULL || U_FAILURE(*fStatus)) {
+ return;
+ }
+ UVector *vec = *vector;
+ int32_t vSize = vec->size();
+ for (i=0; i<vSize; i++) {
+ int32_t valAtI = vec->elementAti(i);
+ if (valAtI == val) {
+ // The value is already in the vector. Don't add it again.
+ return;
+ }
+ if (valAtI > val) {
+ break;
+ }
+ }
+ vec->insertElementAt(val, i, *fStatus);
+}
+
+
+
+//-----------------------------------------------------------------------------
+//
+// setAdd Set operation on UVector
+// dest = dest union source
+// Elements may only appear once and must be sorted.
+//
+//-----------------------------------------------------------------------------
+void RBBITableBuilder::setAdd(UVector *dest, UVector *source) {
+ int32_t destOriginalSize = dest->size();
+ int32_t sourceSize = source->size();
+ int32_t di = 0;
+ MaybeStackArray<void *, 16> destArray, sourceArray; // Handle small cases without malloc
+ void **destPtr, **sourcePtr;
+ void **destLim, **sourceLim;
+
+ if (destOriginalSize > destArray.getCapacity()) {
+ if (destArray.resize(destOriginalSize) == NULL) {
+ return;
+ }
+ }
+ destPtr = destArray.getAlias();
+ destLim = destPtr + destOriginalSize; // destArray.getArrayLimit()?
+
+ if (sourceSize > sourceArray.getCapacity()) {
+ if (sourceArray.resize(sourceSize) == NULL) {
+ return;
+ }
+ }
+ sourcePtr = sourceArray.getAlias();
+ sourceLim = sourcePtr + sourceSize; // sourceArray.getArrayLimit()?
+
+ // Avoid multiple "get element" calls by getting the contents into arrays
+ (void) dest->toArray(destPtr);
+ (void) source->toArray(sourcePtr);
+
+ dest->setSize(sourceSize+destOriginalSize, *fStatus);
+
+ while (sourcePtr < sourceLim && destPtr < destLim) {
+ if (*destPtr == *sourcePtr) {
+ dest->setElementAt(*sourcePtr++, di++);
+ destPtr++;
+ }
+ // This check is required for machines with segmented memory, like i5/OS.
+ // Direct pointer comparison is not recommended.
+ else if (uprv_memcmp(destPtr, sourcePtr, sizeof(void *)) < 0) {
+ dest->setElementAt(*destPtr++, di++);
+ }
+ else { /* *sourcePtr < *destPtr */
+ dest->setElementAt(*sourcePtr++, di++);
+ }
+ }
+
+ // At most one of these two cleanup loops will execute
+ while (destPtr < destLim) {
+ dest->setElementAt(*destPtr++, di++);
+ }
+ while (sourcePtr < sourceLim) {
+ dest->setElementAt(*sourcePtr++, di++);
+ }
+
+ dest->setSize(di, *fStatus);
+}
+
+
+
+//-----------------------------------------------------------------------------
+//
+// setEqual Set operation on UVector.
+// Compare for equality.
+// Elements must be sorted.
+//
+//-----------------------------------------------------------------------------
+UBool RBBITableBuilder::setEquals(UVector *a, UVector *b) {
+ return a->equals(*b);
+}
+
+
+//-----------------------------------------------------------------------------
+//
+// printPosSets Debug function. Dump Nullable, firstpos, lastpos and followpos
+// for each node in the tree.
+//
+//-----------------------------------------------------------------------------
+#ifdef RBBI_DEBUG
+void RBBITableBuilder::printPosSets(RBBINode *n) {
+ if (n==NULL) {
+ return;
+ }
+ printf("\n");
+ RBBINode::printNodeHeader();
+ RBBINode::printNode(n);
+ RBBIDebugPrintf(" Nullable: %s\n", n->fNullable?"TRUE":"FALSE");
+
+ RBBIDebugPrintf(" firstpos: ");
+ printSet(n->fFirstPosSet);
+
+ RBBIDebugPrintf(" lastpos: ");
+ printSet(n->fLastPosSet);
+
+ RBBIDebugPrintf(" followpos: ");
+ printSet(n->fFollowPos);
+
+ printPosSets(n->fLeftChild);
+ printPosSets(n->fRightChild);
+}
+#endif
+
+//
+// findDuplCharClassFrom()
+//
+bool RBBITableBuilder::findDuplCharClassFrom(IntPair *categories) {
+ int32_t numStates = fDStates->size();
+ int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
+
+ for (; categories->first < numCols-1; categories->first++) {
+ // Note: dictionary & non-dictionary columns cannot be merged.
+ // The limitSecond value prevents considering mixed pairs.
+ // Dictionary categories are >= DictCategoriesStart.
+ // Non dict categories are < DictCategoriesStart.
+ int limitSecond = categories->first < fRB->fSetBuilder->getDictCategoriesStart() ?
+ fRB->fSetBuilder->getDictCategoriesStart() : numCols;
+ for (categories->second=categories->first+1; categories->second < limitSecond; categories->second++) {
+ // Initialized to different values to prevent returning true if numStates = 0 (implies no duplicates).
+ uint16_t table_base = 0;
+ uint16_t table_dupl = 1;
+ for (int32_t state=0; state<numStates; state++) {
+ RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
+ table_base = (uint16_t)sd->fDtran->elementAti(categories->first);
+ table_dupl = (uint16_t)sd->fDtran->elementAti(categories->second);
+ if (table_base != table_dupl) {
+ break;
+ }
+ }
+ if (table_base == table_dupl) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+
+//
+// removeColumn()
+//
+void RBBITableBuilder::removeColumn(int32_t column) {
+ int32_t numStates = fDStates->size();
+ for (int32_t state=0; state<numStates; state++) {
+ RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
+ U_ASSERT(column < sd->fDtran->size());
+ sd->fDtran->removeElementAt(column);
+ }
+}
+
+/*
+ * findDuplicateState
+ */
+bool RBBITableBuilder::findDuplicateState(IntPair *states) {
+ int32_t numStates = fDStates->size();
+ int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
+
+ for (; states->first<numStates-1; states->first++) {
+ RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates->elementAt(states->first);
+ for (states->second=states->first+1; states->second<numStates; states->second++) {
+ RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(states->second);
+ if (firstSD->fAccepting != duplSD->fAccepting ||
+ firstSD->fLookAhead != duplSD->fLookAhead ||
+ firstSD->fTagsIdx != duplSD->fTagsIdx) {
+ continue;
+ }
+ bool rowsMatch = true;
+ for (int32_t col=0; col < numCols; ++col) {
+ int32_t firstVal = firstSD->fDtran->elementAti(col);
+ int32_t duplVal = duplSD->fDtran->elementAti(col);
+ if (!((firstVal == duplVal) ||
+ ((firstVal == states->first || firstVal == states->second) &&
+ (duplVal == states->first || duplVal == states->second)))) {
+ rowsMatch = false;
+ break;
+ }
+ }
+ if (rowsMatch) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+
+bool RBBITableBuilder::findDuplicateSafeState(IntPair *states) {
+ int32_t numStates = fSafeTable->size();
+
+ for (; states->first<numStates-1; states->first++) {
+ UnicodeString *firstRow = static_cast<UnicodeString *>(fSafeTable->elementAt(states->first));
+ for (states->second=states->first+1; states->second<numStates; states->second++) {
+ UnicodeString *duplRow = static_cast<UnicodeString *>(fSafeTable->elementAt(states->second));
+ bool rowsMatch = true;
+ int32_t numCols = firstRow->length();
+ for (int32_t col=0; col < numCols; ++col) {
+ int32_t firstVal = firstRow->charAt(col);
+ int32_t duplVal = duplRow->charAt(col);
+ if (!((firstVal == duplVal) ||
+ ((firstVal == states->first || firstVal == states->second) &&
+ (duplVal == states->first || duplVal == states->second)))) {
+ rowsMatch = false;
+ break;
+ }
+ }
+ if (rowsMatch) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+
+void RBBITableBuilder::removeState(IntPair duplStates) {
+ const int32_t keepState = duplStates.first;
+ const int32_t duplState = duplStates.second;
+ U_ASSERT(keepState < duplState);
+ U_ASSERT(duplState < fDStates->size());
+
+ RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState);
+ fDStates->removeElementAt(duplState);
+ delete duplSD;
+
+ int32_t numStates = fDStates->size();
+ int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
+ for (int32_t state=0; state<numStates; ++state) {
+ RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
+ for (int32_t col=0; col<numCols; col++) {
+ int32_t existingVal = sd->fDtran->elementAti(col);
+ int32_t newVal = existingVal;
+ if (existingVal == duplState) {
+ newVal = keepState;
+ } else if (existingVal > duplState) {
+ newVal = existingVal - 1;
+ }
+ sd->fDtran->setElementAt(newVal, col);
+ }
+ }
+}
+
+void RBBITableBuilder::removeSafeState(IntPair duplStates) {
+ const int32_t keepState = duplStates.first;
+ const int32_t duplState = duplStates.second;
+ U_ASSERT(keepState < duplState);
+ U_ASSERT(duplState < fSafeTable->size());
+
+ fSafeTable->removeElementAt(duplState); // Note that fSafeTable has a deleter function
+ // and will auto-delete the removed element.
+ int32_t numStates = fSafeTable->size();
+ for (int32_t state=0; state<numStates; ++state) {
+ UnicodeString *sd = (UnicodeString *)fSafeTable->elementAt(state);
+ int32_t numCols = sd->length();
+ for (int32_t col=0; col<numCols; col++) {
+ int32_t existingVal = sd->charAt(col);
+ int32_t newVal = existingVal;
+ if (existingVal == duplState) {
+ newVal = keepState;
+ } else if (existingVal > duplState) {
+ newVal = existingVal - 1;
+ }
+ sd->setCharAt(col, static_cast<char16_t>(newVal));
+ }
+ }
+}
+
+
+/*
+ * RemoveDuplicateStates
+ */
+int32_t RBBITableBuilder::removeDuplicateStates() {
+ IntPair dupls = {3, 0};
+ int32_t numStatesRemoved = 0;
+
+ while (findDuplicateState(&dupls)) {
+ // printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second);
+ removeState(dupls);
+ ++numStatesRemoved;
+ }
+ return numStatesRemoved;
+}
+
+
+//-----------------------------------------------------------------------------
+//
+// getTableSize() Calculate the size of the runtime form of this
+// state transition table.
+//
+//-----------------------------------------------------------------------------
+int32_t RBBITableBuilder::getTableSize() const {
+ int32_t size = 0;
+ int32_t numRows;
+ int32_t numCols;
+ int32_t rowSize;
+
+ if (fTree == NULL) {
+ return 0;
+ }
+
+ size = offsetof(RBBIStateTable, fTableData); // The header, with no rows to the table.
+
+ numRows = fDStates->size();
+ numCols = fRB->fSetBuilder->getNumCharCategories();
+
+ if (use8BitsForTable()) {
+ rowSize = offsetof(RBBIStateTableRow8, fNextState) + sizeof(int8_t)*numCols;
+ } else {
+ rowSize = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t)*numCols;
+ }
+ size += numRows * rowSize;
+ return size;
+}
+
+bool RBBITableBuilder::use8BitsForTable() const {
+ return fDStates->size() <= kMaxStateFor8BitsTable;
+}
+
+//-----------------------------------------------------------------------------
+//
+// exportTable() export the state transition table in the format required
+// by the runtime engine. getTableSize() bytes of memory
+// must be available at the output address "where".
+//
+//-----------------------------------------------------------------------------
+void RBBITableBuilder::exportTable(void *where) {
+ RBBIStateTable *table = (RBBIStateTable *)where;
+ uint32_t state;
+ int col;
+
+ if (U_FAILURE(*fStatus) || fTree == NULL) {
+ return;
+ }
+
+ int32_t catCount = fRB->fSetBuilder->getNumCharCategories();
+ if (catCount > 0x7fff ||
+ fDStates->size() > 0x7fff) {
+ *fStatus = U_BRK_INTERNAL_ERROR;
+ return;
+ }
+
+ table->fNumStates = fDStates->size();
+ table->fDictCategoriesStart = fRB->fSetBuilder->getDictCategoriesStart();
+ table->fLookAheadResultsSize = fLASlotsInUse == ACCEPTING_UNCONDITIONAL ? 0 : fLASlotsInUse + 1;
+ table->fFlags = 0;
+ if (use8BitsForTable()) {
+ table->fRowLen = offsetof(RBBIStateTableRow8, fNextState) + sizeof(uint8_t) * catCount;
+ table->fFlags |= RBBI_8BITS_ROWS;
+ } else {
+ table->fRowLen = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t) * catCount;
+ }
+ if (fRB->fLookAheadHardBreak) {
+ table->fFlags |= RBBI_LOOKAHEAD_HARD_BREAK;
+ }
+ if (fRB->fSetBuilder->sawBOF()) {
+ table->fFlags |= RBBI_BOF_REQUIRED;
+ }
+
+ for (state=0; state<table->fNumStates; state++) {
+ RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
+ RBBIStateTableRow *row = (RBBIStateTableRow *)(table->fTableData + state*table->fRowLen);
+ if (use8BitsForTable()) {
+ U_ASSERT (sd->fAccepting <= 255);
+ U_ASSERT (sd->fLookAhead <= 255);
+ U_ASSERT (0 <= sd->fTagsIdx && sd->fTagsIdx <= 255);
+ row->r8.fAccepting = sd->fAccepting;
+ row->r8.fLookAhead = sd->fLookAhead;
+ row->r8.fTagsIdx = sd->fTagsIdx;
+ for (col=0; col<catCount; col++) {
+ U_ASSERT (sd->fDtran->elementAti(col) <= kMaxStateFor8BitsTable);
+ row->r8.fNextState[col] = sd->fDtran->elementAti(col);
+ }
+ } else {
+ U_ASSERT (sd->fAccepting <= 0xffff);
+ U_ASSERT (sd->fLookAhead <= 0xffff);
+ U_ASSERT (0 <= sd->fTagsIdx && sd->fTagsIdx <= 0xffff);
+ row->r16.fAccepting = sd->fAccepting;
+ row->r16.fLookAhead = sd->fLookAhead;
+ row->r16.fTagsIdx = sd->fTagsIdx;
+ for (col=0; col<catCount; col++) {
+ row->r16.fNextState[col] = sd->fDtran->elementAti(col);
+ }
+ }
+ }
+}
+
+
+/**
+ * Synthesize a safe state table from the main state table.
+ */
+void RBBITableBuilder::buildSafeReverseTable(UErrorCode &status) {
+ // The safe table creation has three steps:
+
+ // 1. Identifiy pairs of character classes that are "safe." Safe means that boundaries
+ // following the pair do not depend on context or state before the pair. To test
+ // whether a pair is safe, run it through the main forward state table, starting
+ // from each state. If the the final state is the same, no matter what the starting state,
+ // the pair is safe.
+ //
+ // 2. Build a state table that recognizes the safe pairs. It's similar to their
+ // forward table, with a column for each input character [class], and a row for
+ // each state. Row 1 is the start state, and row 0 is the stop state. Initially
+ // create an additional state for each input character category; being in
+ // one of these states means that the character has been seen, and is potentially
+ // the first of a pair. In each of these rows, the entry for the second character
+ // of a safe pair is set to the stop state (0), indicating that a match was found.
+ // All other table entries are set to the state corresponding the current input
+ // character, allowing that charcter to be the of a start following pair.
+ //
+ // Because the safe rules are to be run in reverse, moving backwards in the text,
+ // the first and second pair categories are swapped when building the table.
+ //
+ // 3. Compress the table. There are typically many rows (states) that are
+ // equivalent - that have zeroes (match completed) in the same columns -
+ // and can be folded together.
+
+ // Each safe pair is stored as two UChars in the safePair string.
+ UnicodeString safePairs;
+
+ int32_t numCharClasses = fRB->fSetBuilder->getNumCharCategories();
+ int32_t numStates = fDStates->size();
+
+ for (int32_t c1=0; c1<numCharClasses; ++c1) {
+ for (int32_t c2=0; c2 < numCharClasses; ++c2) {
+ int32_t wantedEndState = -1;
+ int32_t endState = 0;
+ for (int32_t startState = 1; startState < numStates; ++startState) {
+ RBBIStateDescriptor *startStateD = static_cast<RBBIStateDescriptor *>(fDStates->elementAt(startState));
+ int32_t s2 = startStateD->fDtran->elementAti(c1);
+ RBBIStateDescriptor *s2StateD = static_cast<RBBIStateDescriptor *>(fDStates->elementAt(s2));
+ endState = s2StateD->fDtran->elementAti(c2);
+ if (wantedEndState < 0) {
+ wantedEndState = endState;
+ } else {
+ if (wantedEndState != endState) {
+ break;
+ }
+ }
+ }
+ if (wantedEndState == endState) {
+ safePairs.append((char16_t)c1);
+ safePairs.append((char16_t)c2);
+ // printf("(%d, %d) ", c1, c2);
+ }
+ }
+ // printf("\n");
+ }
+
+ // Populate the initial safe table.
+ // The table as a whole is UVector<UnicodeString>
+ // Each row is represented by a UnicodeString, being used as a Vector<int16>.
+ // Row 0 is the stop state.
+ // Row 1 is the start sate.
+ // Row 2 and beyond are other states, initially one per char class, but
+ // after initial construction, many of the states will be combined, compacting the table.
+ // The String holds the nextState data only. The four leading fields of a row, fAccepting,
+ // fLookAhead, etc. are not needed for the safe table, and are omitted at this stage of building.
+
+ U_ASSERT(fSafeTable == nullptr);
+ fSafeTable = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, numCharClasses + 2, status);
+ for (int32_t row=0; row<numCharClasses + 2; ++row) {
+ fSafeTable->addElement(new UnicodeString(numCharClasses, 0, numCharClasses+4), status);
+ }
+
+ // From the start state, each input char class transitions to the state for that input.
+ UnicodeString &startState = *static_cast<UnicodeString *>(fSafeTable->elementAt(1));
+ for (int32_t charClass=0; charClass < numCharClasses; ++charClass) {
+ // Note: +2 for the start & stop state.
+ startState.setCharAt(charClass, static_cast<char16_t>(charClass+2));
+ }
+
+ // Initially make every other state table row look like the start state row,
+ for (int32_t row=2; row<numCharClasses+2; ++row) {
+ UnicodeString &rowState = *static_cast<UnicodeString *>(fSafeTable->elementAt(row));
+ rowState = startState; // UnicodeString assignment, copies contents.
+ }
+
+ // Run through the safe pairs, set the next state to zero when pair has been seen.
+ // Zero being the stop state, meaning we found a safe point.
+ for (int32_t pairIdx=0; pairIdx<safePairs.length(); pairIdx+=2) {
+ int32_t c1 = safePairs.charAt(pairIdx);
+ int32_t c2 = safePairs.charAt(pairIdx + 1);
+
+ UnicodeString &rowState = *static_cast<UnicodeString *>(fSafeTable->elementAt(c2 + 2));
+ rowState.setCharAt(c1, 0);
+ }
+
+ // Remove duplicate or redundant rows from the table.
+ IntPair states = {1, 0};
+ while (findDuplicateSafeState(&states)) {
+ // printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second);
+ removeSafeState(states);
+ }
+}
+
+
+//-----------------------------------------------------------------------------
+//
+// getSafeTableSize() Calculate the size of the runtime form of this
+// safe state table.
+//
+//-----------------------------------------------------------------------------
+int32_t RBBITableBuilder::getSafeTableSize() const {
+ int32_t size = 0;
+ int32_t numRows;
+ int32_t numCols;
+ int32_t rowSize;
+
+ if (fSafeTable == nullptr) {
+ return 0;
+ }
+
+ size = offsetof(RBBIStateTable, fTableData); // The header, with no rows to the table.
+
+ numRows = fSafeTable->size();
+ numCols = fRB->fSetBuilder->getNumCharCategories();
+
+ if (use8BitsForSafeTable()) {
+ rowSize = offsetof(RBBIStateTableRow8, fNextState) + sizeof(int8_t)*numCols;
+ } else {
+ rowSize = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t)*numCols;
+ }
+ size += numRows * rowSize;
+ return size;
+}
+
+bool RBBITableBuilder::use8BitsForSafeTable() const {
+ return fSafeTable->size() <= kMaxStateFor8BitsTable;
+}
+
+//-----------------------------------------------------------------------------
+//
+// exportSafeTable() export the state transition table in the format required
+// by the runtime engine. getTableSize() bytes of memory
+// must be available at the output address "where".
+//
+//-----------------------------------------------------------------------------
+void RBBITableBuilder::exportSafeTable(void *where) {
+ RBBIStateTable *table = (RBBIStateTable *)where;
+ uint32_t state;
+ int col;
+
+ if (U_FAILURE(*fStatus) || fSafeTable == nullptr) {
+ return;
+ }
+
+ int32_t catCount = fRB->fSetBuilder->getNumCharCategories();
+ if (catCount > 0x7fff ||
+ fSafeTable->size() > 0x7fff) {
+ *fStatus = U_BRK_INTERNAL_ERROR;
+ return;
+ }
+
+ table->fNumStates = fSafeTable->size();
+ table->fFlags = 0;
+ if (use8BitsForSafeTable()) {
+ table->fRowLen = offsetof(RBBIStateTableRow8, fNextState) + sizeof(uint8_t) * catCount;
+ table->fFlags |= RBBI_8BITS_ROWS;
+ } else {
+ table->fRowLen = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t) * catCount;
+ }
+
+ for (state=0; state<table->fNumStates; state++) {
+ UnicodeString *rowString = (UnicodeString *)fSafeTable->elementAt(state);
+ RBBIStateTableRow *row = (RBBIStateTableRow *)(table->fTableData + state*table->fRowLen);
+ if (use8BitsForSafeTable()) {
+ row->r8.fAccepting = 0;
+ row->r8.fLookAhead = 0;
+ row->r8.fTagsIdx = 0;
+ for (col=0; col<catCount; col++) {
+ U_ASSERT(rowString->charAt(col) <= kMaxStateFor8BitsTable);
+ row->r8.fNextState[col] = static_cast<uint8_t>(rowString->charAt(col));
+ }
+ } else {
+ row->r16.fAccepting = 0;
+ row->r16.fLookAhead = 0;
+ row->r16.fTagsIdx = 0;
+ for (col=0; col<catCount; col++) {
+ row->r16.fNextState[col] = rowString->charAt(col);
+ }
+ }
+ }
+}
+
+
+
+
+//-----------------------------------------------------------------------------
+//
+// printSet Debug function. Print the contents of a UVector
+//
+//-----------------------------------------------------------------------------
+#ifdef RBBI_DEBUG
+void RBBITableBuilder::printSet(UVector *s) {
+ int32_t i;
+ for (i=0; i<s->size(); i++) {
+ const RBBINode *v = static_cast<const RBBINode *>(s->elementAt(i));
+ RBBIDebugPrintf("%5d", v==NULL? -1 : v->fSerialNum);
+ }
+ RBBIDebugPrintf("\n");
+}
+#endif
+
+
+//-----------------------------------------------------------------------------
+//
+// printStates Debug Function. Dump the fully constructed state transition table.
+//
+//-----------------------------------------------------------------------------
+#ifdef RBBI_DEBUG
+void RBBITableBuilder::printStates() {
+ int c; // input "character"
+ int n; // state number
+
+ RBBIDebugPrintf("state | i n p u t s y m b o l s \n");
+ RBBIDebugPrintf(" | Acc LA Tag");
+ for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
+ RBBIDebugPrintf(" %3d", c);
+ }
+ RBBIDebugPrintf("\n");
+ RBBIDebugPrintf(" |---------------");
+ for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
+ RBBIDebugPrintf("----");
+ }
+ RBBIDebugPrintf("\n");
+
+ for (n=0; n<fDStates->size(); n++) {
+ RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
+ RBBIDebugPrintf(" %3d | " , n);
+ RBBIDebugPrintf("%3d %3d %5d ", sd->fAccepting, sd->fLookAhead, sd->fTagsIdx);
+ for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
+ RBBIDebugPrintf(" %3d", sd->fDtran->elementAti(c));
+ }
+ RBBIDebugPrintf("\n");
+ }
+ RBBIDebugPrintf("\n\n");
+}
+#endif
+
+
+//-----------------------------------------------------------------------------
+//
+// printSafeTable Debug Function. Dump the fully constructed safe table.
+//
+//-----------------------------------------------------------------------------
+#ifdef RBBI_DEBUG
+void RBBITableBuilder::printReverseTable() {
+ int c; // input "character"
+ int n; // state number
+
+ RBBIDebugPrintf(" Safe Reverse Table \n");
+ if (fSafeTable == nullptr) {
+ RBBIDebugPrintf(" --- nullptr ---\n");
+ return;
+ }
+ RBBIDebugPrintf("state | i n p u t s y m b o l s \n");
+ RBBIDebugPrintf(" | Acc LA Tag");
+ for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
+ RBBIDebugPrintf(" %2d", c);
+ }
+ RBBIDebugPrintf("\n");
+ RBBIDebugPrintf(" |---------------");
+ for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
+ RBBIDebugPrintf("---");
+ }
+ RBBIDebugPrintf("\n");
+
+ for (n=0; n<fSafeTable->size(); n++) {
+ UnicodeString *rowString = (UnicodeString *)fSafeTable->elementAt(n);
+ RBBIDebugPrintf(" %3d | " , n);
+ RBBIDebugPrintf("%3d %3d %5d ", 0, 0, 0); // Accepting, LookAhead, Tags
+ for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
+ RBBIDebugPrintf(" %2d", rowString->charAt(c));
+ }
+ RBBIDebugPrintf("\n");
+ }
+ RBBIDebugPrintf("\n\n");
+}
+#endif
+
+
+
+//-----------------------------------------------------------------------------
+//
+// printRuleStatusTable Debug Function. Dump the common rule status table
+//
+//-----------------------------------------------------------------------------
+#ifdef RBBI_DEBUG
+void RBBITableBuilder::printRuleStatusTable() {
+ int32_t thisRecord = 0;
+ int32_t nextRecord = 0;
+ int i;
+ UVector *tbl = fRB->fRuleStatusVals;
+
+ RBBIDebugPrintf("index | tags \n");
+ RBBIDebugPrintf("-------------------\n");
+
+ while (nextRecord < tbl->size()) {
+ thisRecord = nextRecord;
+ nextRecord = thisRecord + tbl->elementAti(thisRecord) + 1;
+ RBBIDebugPrintf("%4d ", thisRecord);
+ for (i=thisRecord+1; i<nextRecord; i++) {
+ RBBIDebugPrintf(" %5d", tbl->elementAti(i));
+ }
+ RBBIDebugPrintf("\n");
+ }
+ RBBIDebugPrintf("\n\n");
+}
+#endif
+
+
+//-----------------------------------------------------------------------------
+//
+// RBBIStateDescriptor Methods. This is a very struct-like class
+// Most access is directly to the fields.
+//
+//-----------------------------------------------------------------------------
+
+RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatus) {
+ fMarked = FALSE;
+ fAccepting = 0;
+ fLookAhead = 0;
+ fTagsIdx = 0;
+ fTagVals = NULL;
+ fPositions = NULL;
+ fDtran = NULL;
+
+ fDtran = new UVector32(lastInputSymbol+1, *fStatus);
+ if (U_FAILURE(*fStatus)) {
+ return;
+ }
+ if (fDtran == NULL) {
+ *fStatus = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ fDtran->setSize(lastInputSymbol+1); // fDtran needs to be pre-sized.
+ // It is indexed by input symbols, and will
+ // hold the next state number for each
+ // symbol.
+}
+
+
+RBBIStateDescriptor::~RBBIStateDescriptor() {
+ delete fPositions;
+ delete fDtran;
+ delete fTagVals;
+ fPositions = NULL;
+ fDtran = NULL;
+ fTagVals = NULL;
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
diff --git a/thirdparty/icu4c/common/rbbitblb.h b/thirdparty/icu4c/common/rbbitblb.h
new file mode 100644
index 0000000000..fe3db8d7bf
--- /dev/null
+++ b/thirdparty/icu4c/common/rbbitblb.h
@@ -0,0 +1,232 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+//
+// rbbitblb.h
+//
+
+/*
+**********************************************************************
+* Copyright (c) 2002-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+
+#ifndef RBBITBLB_H
+#define RBBITBLB_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/uobject.h"
+#include "unicode/rbbi.h"
+#include "rbbidata.h"
+#include "rbbirb.h"
+#include "rbbinode.h"
+
+
+U_NAMESPACE_BEGIN
+
+class RBBIRuleScanner;
+class RBBIRuleBuilder;
+class UVector32;
+
+//
+// class RBBITableBuilder is part of the RBBI rule compiler.
+// It builds the state transition table used by the RBBI runtime
+// from the expression syntax tree generated by the rule scanner.
+//
+// This class is part of the RBBI implementation only.
+// There is no user-visible public API here.
+//
+
+class RBBITableBuilder : public UMemory {
+public:
+ RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status);
+ ~RBBITableBuilder();
+
+ void buildForwardTable();
+
+ /** Return the runtime size in bytes of the built state table. */
+ int32_t getTableSize() const;
+
+ /** Fill in the runtime state table. Sufficient memory must exist at the specified location.
+ */
+ void exportTable(void *where);
+
+ /** Use 8 bits to encode the forward table */
+ bool use8BitsForTable() const;
+
+ /**
+ * Find duplicate (redundant) character classes. Begin looking with categories.first.
+ * Duplicate, if found are returned in the categories parameter.
+ * This is an iterator-like function, used to identify character classes
+ * (state table columns) that can be eliminated.
+ * @param categories in/out parameter, specifies where to start looking for duplicates,
+ * and returns the first pair of duplicates found, if any.
+ * @return true if duplicate char classes were found, false otherwise.
+ */
+ bool findDuplCharClassFrom(IntPair *categories);
+
+ /** Remove a column from the state table. Used when two character categories
+ * have been found equivalent, and merged together, to eliminate the uneeded table column.
+ */
+ void removeColumn(int32_t column);
+
+ /**
+ * Check for, and remove dupicate states (table rows).
+ * @return the number of states removed.
+ */
+ int32_t removeDuplicateStates();
+
+ /** Build the safe reverse table from the already-constructed forward table. */
+ void buildSafeReverseTable(UErrorCode &status);
+
+ /** Return the runtime size in bytes of the built safe reverse state table. */
+ int32_t getSafeTableSize() const;
+
+ /** Fill in the runtime safe state table. Sufficient memory must exist at the specified location.
+ */
+ void exportSafeTable(void *where);
+
+ /** Use 8 bits to encode the safe reverse table */
+ bool use8BitsForSafeTable() const;
+
+private:
+ void calcNullable(RBBINode *n);
+ void calcFirstPos(RBBINode *n);
+ void calcLastPos(RBBINode *n);
+ void calcFollowPos(RBBINode *n);
+ void calcChainedFollowPos(RBBINode *n, RBBINode *endMarkNode);
+ void bofFixup();
+ void buildStateTable();
+ void mapLookAheadRules();
+ void flagAcceptingStates();
+ void flagLookAheadStates();
+ void flagTaggedStates();
+ void mergeRuleStatusVals();
+
+ /**
+ * Merge redundant state table columns, eliminating character classes with identical behavior.
+ * Done after the state tables are generated, just before converting to their run-time format.
+ */
+ int32_t mergeColumns();
+
+ void addRuleRootNodes(UVector *dest, RBBINode *node);
+
+ /**
+ * Find duplicate (redundant) states, beginning at the specified pair,
+ * within this state table. This is an iterator-like function, used to
+ * identify states (state table rows) that can be eliminated.
+ * @param states in/out parameter, specifies where to start looking for duplicates,
+ * and returns the first pair of duplicates found, if any.
+ * @return true if duplicate states were found, false otherwise.
+ */
+ bool findDuplicateState(IntPair *states);
+
+ /** Remove a duplicate state.
+ * @param duplStates The duplicate states. The first is kept, the second is removed.
+ * All references to the second in the state table are retargeted
+ * to the first.
+ */
+ void removeState(IntPair duplStates);
+
+ /** Find the next duplicate state in the safe reverse table. An iterator function.
+ * @param states in/out parameter, specifies where to start looking for duplicates,
+ * and returns the first pair of duplicates found, if any.
+ * @return true if a duplicate pair of states was found.
+ */
+ bool findDuplicateSafeState(IntPair *states);
+
+ /** Remove a duplicate state from the safe table.
+ * @param duplStates The duplicate states. The first is kept, the second is removed.
+ * All references to the second in the state table are retargeted
+ * to the first.
+ */
+ void removeSafeState(IntPair duplStates);
+
+ // Set functions for UVector.
+ // TODO: make a USet subclass of UVector
+
+ void setAdd(UVector *dest, UVector *source);
+ UBool setEquals(UVector *a, UVector *b);
+
+ void sortedAdd(UVector **dest, int32_t val);
+
+public:
+#ifdef RBBI_DEBUG
+ void printSet(UVector *s);
+ void printPosSets(RBBINode *n /* = NULL*/);
+ void printStates();
+ void printRuleStatusTable();
+ void printReverseTable();
+#else
+ #define printSet(s)
+ #define printPosSets(n)
+ #define printStates()
+ #define printRuleStatusTable()
+ #define printReverseTable()
+#endif
+
+private:
+ RBBIRuleBuilder *fRB;
+ RBBINode *&fTree; // The root node of the parse tree to build a
+ // table for.
+ UErrorCode *fStatus;
+
+ /** State Descriptors, UVector<RBBIStateDescriptor> */
+ UVector *fDStates; // D states (Aho's terminology)
+ // Index is state number
+ // Contents are RBBIStateDescriptor pointers.
+
+ /** Synthesized safe table, UVector of UnicodeString, one string per table row. */
+ UVector *fSafeTable;
+
+ /** Map from rule number (fVal in look ahead nodes) to sequential lookahead index. */
+ UVector32 *fLookAheadRuleMap = nullptr;
+
+ /* Counter used when assigning lookahead rule numbers.
+ * Contains the last look-ahead number already in use.
+ * The first look-ahead number is 2; Number 1 (ACCEPTING_UNCONDITIONAL) is reserved
+ * for non-lookahead accepting states. See the declarations of RBBIStateTableRowT. */
+ int32_t fLASlotsInUse = ACCEPTING_UNCONDITIONAL;
+
+
+ RBBITableBuilder(const RBBITableBuilder &other) = delete; // forbid copying of this class
+ RBBITableBuilder &operator=(const RBBITableBuilder &other) = delete; // forbid copying of this class
+};
+
+//
+// RBBIStateDescriptor - The DFA is constructed as a set of these descriptors,
+// one for each state.
+class RBBIStateDescriptor : public UMemory {
+public:
+ UBool fMarked;
+ uint32_t fAccepting;
+ uint32_t fLookAhead;
+ UVector *fTagVals;
+ int32_t fTagsIdx;
+ UVector *fPositions; // Set of parse tree positions associated
+ // with this state. Unordered (it's a set).
+ // UVector contents are RBBINode *
+
+ UVector32 *fDtran; // Transitions out of this state.
+ // indexed by input character
+ // contents is int index of dest state
+ // in RBBITableBuilder.fDStates
+
+ RBBIStateDescriptor(int maxInputSymbol, UErrorCode *fStatus);
+ ~RBBIStateDescriptor();
+
+private:
+ RBBIStateDescriptor(const RBBIStateDescriptor &other); // forbid copying of this class
+ RBBIStateDescriptor &operator=(const RBBIStateDescriptor &other); // forbid copying of this class
+};
+
+
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
+#endif
diff --git a/thirdparty/icu4c/common/resbund.cpp b/thirdparty/icu4c/common/resbund.cpp
new file mode 100644
index 0000000000..7c5063b211
--- /dev/null
+++ b/thirdparty/icu4c/common/resbund.cpp
@@ -0,0 +1,399 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1997-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File resbund.cpp
+*
+* Modification History:
+*
+* Date Name Description
+* 02/05/97 aliu Fixed bug in chopLocale. Added scanForLocaleInFile
+* based on code taken from scanForLocale. Added
+* constructor which attempts to read resource bundle
+* from a specific file, without searching other files.
+* 02/11/97 aliu Added UErrorCode return values to constructors. Fixed
+* infinite loops in scanForFile and scanForLocale.
+* Modified getRawResourceData to not delete storage in
+* localeData and resourceData which it doesn't own.
+* Added Mac compatibility #ifdefs for tellp() and
+* ios::nocreate.
+* 03/04/97 aliu Modified to use ExpandingDataSink objects instead of
+* the highly inefficient ostrstream objects.
+* 03/13/97 aliu Rewrote to load in entire resource bundle and store
+* it as a Hashtable of ResourceBundleData objects.
+* Added state table to govern parsing of files.
+* Modified to load locale index out of new file distinct
+* from default.txt.
+* 03/25/97 aliu Modified to support 2-d arrays, needed for timezone data.
+* Added support for custom file suffixes. Again, needed
+* to support timezone data. Improved error handling to
+* detect duplicate tags and subtags.
+* 04/07/97 aliu Fixed bug in getHashtableForLocale(). Fixed handling
+* of failing UErrorCode values on entry to API methods.
+* Fixed bugs in getArrayItem() for negative indices.
+* 04/29/97 aliu Update to use new Hashtable deletion protocol.
+* 05/06/97 aliu Flattened kTransitionTable for HP compiler.
+* Fixed usage of CharString.
+* 06/11/99 stephen Removed parsing of .txt files.
+* Reworked to use new binary format.
+* Cleaned up.
+* 06/14/99 stephen Removed methods taking a filename suffix.
+* 06/22/99 stephen Added missing T_FileStream_close in parse()
+* 11/09/99 weiv Added getLocale(), rewritten constructForLocale()
+* March 2000 weiv complete overhaul.
+******************************************************************************
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/resbund.h"
+
+#include "cmemory.h"
+#include "mutex.h"
+#include "uassert.h"
+#include "umutex.h"
+
+#include "uresimp.h"
+
+U_NAMESPACE_BEGIN
+
+/*-----------------------------------------------------------------------------
+ * Implementation Notes
+ *
+ * Resource bundles are read in once, and thereafter cached.
+ * ResourceBundle statically keeps track of which files have been
+ * read, so we are guaranteed that each file is read at most once.
+ * Resource bundles can be loaded from different data directories and
+ * will be treated as distinct, even if they are for the same locale.
+ *
+ * Resource bundles are lightweight objects, which have pointers to
+ * one or more shared Hashtable objects containing all the data.
+ * Copying would be cheap, but there is no copy constructor, since
+ * there wasn't one in the original API.
+ *
+ * The ResourceBundle parsing mechanism is implemented as a transition
+ * network, for easy maintenance and modification. The network is
+ * implemented as a matrix (instead of in code) to make this even
+ * easier. The matrix contains Transition objects. Each Transition
+ * object describes a destination node and an action to take before
+ * moving to the destination node. The source node is encoded by the
+ * index of the object in the array that contains it. The pieces
+ * needed to understand the transition network are the enums for node
+ * IDs and actions, the parse() method, which walks through the
+ * network and implements the actions, and the network itself. The
+ * network guarantees certain conditions, for example, that a new
+ * resource will not be closed until one has been opened first; or
+ * that data will not be stored into a TaggedList until a TaggedList
+ * has been created. Nonetheless, the code in parse() does some
+ * consistency checks as it runs the network, and fails with an
+ * U_INTERNAL_PROGRAM_ERROR if one of these checks fails. If the input
+ * data has a bad format, an U_INVALID_FORMAT_ERROR is returned. If you
+ * see an U_INTERNAL_PROGRAM_ERROR the transition matrix has a bug in
+ * it.
+ *
+ * Old functionality of multiple locales in a single file is still
+ * supported. For this reason, LOCALE names override FILE names. If
+ * data for en_US is located in the en.txt file, once it is loaded,
+ * the code will not care where it came from (other than remembering
+ * which directory it came from). However, if there is an en_US
+ * resource in en_US.txt, that will take precedence. There is no
+ * limit to the number or type of resources that can be stored in a
+ * file, however, files are only searched in a specific way. If
+ * en_US_CA is requested, then first en_US_CA.txt is searched, then
+ * en_US.txt, then en.txt, then default.txt. So it only makes sense
+ * to put certain locales in certain files. In this example, it would
+ * be logical to put en_US_CA, en_US, and en into the en.txt file,
+ * since they would be found there if asked for. The extreme example
+ * is to place all locale resources into default.txt, which should
+ * also work.
+ *
+ * Inheritance is implemented. For example, xx_YY_zz inherits as
+ * follows: xx_YY_zz, xx_YY, xx, default. Inheritance is implemented
+ * as an array of hashtables. There will be from 1 to 4 hashtables in
+ * the array.
+ *
+ * Fallback files are implemented. The fallback pattern is Language
+ * Country Variant (LCV) -> LC -> L. Fallback is first done for the
+ * requested locale. Then it is done for the default locale, as
+ * returned by Locale::getDefault(). Then the special file
+ * default.txt is searched for the default locale. The overall FILE
+ * fallback path is LCV -> LC -> L -> dLCV -> dLC -> dL -> default.
+ *
+ * Note that although file name searching includes the default locale,
+ * once a ResourceBundle object is constructed, the inheritance path
+ * no longer includes the default locale. The path is LCV -> LC -> L
+ * -> default.
+ *
+ * File parsing is lazy. Nothing is parsed unless it is called for by
+ * someone. So when a ResourceBundle for xx_YY_zz is constructed,
+ * only that locale is parsed (along with anything else in the same
+ * file). Later, if the FooBar tag is asked for, and if it isn't
+ * found in xx_YY_zz, then xx_YY.txt will be parsed and checked, and
+ * so forth, until the chain is exhausted or the tag is found.
+ *
+ * Thread-safety is implemented around caches, both the cache that
+ * stores all the resouce data, and the cache that stores flags
+ * indicating whether or not a file has been visited. These caches
+ * delete their storage at static cleanup time, when the process
+ * quits.
+ *
+ * ResourceBundle supports TableCollation as a special case. This
+ * involves having special ResourceBundle objects which DO own their
+ * data, since we don't want large collation rule strings in the
+ * ResourceBundle cache (these are already cached in the
+ * TableCollation cache). TableCollation files (.ctx files) have the
+ * same format as normal resource data files, with a different
+ * interpretation, from the standpoint of ResourceBundle. .ctx files
+ * are loaded into otherwise ordinary ResourceBundle objects. They
+ * don't inherit (that's implemented by TableCollation) and they own
+ * their data (as mentioned above). However, they still support
+ * possible multiple locales in a single .ctx file. (This is in
+ * practice a bad idea, since you only want the one locale you're
+ * looking for, and only one tag will be present
+ * ("CollationElements"), so you don't need an inheritance chain of
+ * multiple locales.) Up to 4 locale resources will be loaded from a
+ * .ctx file; everything after the first 4 is ignored (parsed and
+ * deleted). (Normal .txt files have no limit.) Instead of being
+ * loaded into the cache, and then looked up as needed, the locale
+ * resources are read straight into the ResourceBundle object.
+ *
+ * The Index, which used to reside in default.txt, has been moved to a
+ * new file, index.txt. This file contains a slightly modified format
+ * with the addition of the "InstalledLocales" tag; it looks like:
+ *
+ * Index {
+ * InstalledLocales {
+ * ar
+ * ..
+ * zh_TW
+ * }
+ * }
+ */
+//-----------------------------------------------------------------------------
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ResourceBundle)
+
+ResourceBundle::ResourceBundle(UErrorCode &err)
+ :UObject(), fLocale(NULL)
+{
+ fResource = ures_open(0, Locale::getDefault().getName(), &err);
+}
+
+ResourceBundle::ResourceBundle(const ResourceBundle &other)
+ :UObject(other), fLocale(NULL)
+{
+ UErrorCode status = U_ZERO_ERROR;
+
+ if (other.fResource) {
+ fResource = ures_copyResb(0, other.fResource, &status);
+ } else {
+ /* Copying a bad resource bundle */
+ fResource = NULL;
+ }
+}
+
+ResourceBundle::ResourceBundle(UResourceBundle *res, UErrorCode& err)
+ :UObject(), fLocale(NULL)
+{
+ if (res) {
+ fResource = ures_copyResb(0, res, &err);
+ } else {
+ /* Copying a bad resource bundle */
+ fResource = NULL;
+ }
+}
+
+ResourceBundle::ResourceBundle(const char* path, const Locale& locale, UErrorCode& err)
+ :UObject(), fLocale(NULL)
+{
+ fResource = ures_open(path, locale.getName(), &err);
+}
+
+
+ResourceBundle& ResourceBundle::operator=(const ResourceBundle& other)
+{
+ if(this == &other) {
+ return *this;
+ }
+ if(fResource != 0) {
+ ures_close(fResource);
+ fResource = NULL;
+ }
+ if (fLocale != NULL) {
+ delete fLocale;
+ fLocale = NULL;
+ }
+ UErrorCode status = U_ZERO_ERROR;
+ if (other.fResource) {
+ fResource = ures_copyResb(0, other.fResource, &status);
+ } else {
+ /* Copying a bad resource bundle */
+ fResource = NULL;
+ }
+ return *this;
+}
+
+ResourceBundle::~ResourceBundle()
+{
+ if(fResource != 0) {
+ ures_close(fResource);
+ }
+ if(fLocale != NULL) {
+ delete(fLocale);
+ }
+}
+
+ResourceBundle *
+ResourceBundle::clone() const {
+ return new ResourceBundle(*this);
+}
+
+UnicodeString ResourceBundle::getString(UErrorCode& status) const {
+ int32_t len = 0;
+ const UChar *r = ures_getString(fResource, &len, &status);
+ return UnicodeString(TRUE, r, len);
+}
+
+const uint8_t *ResourceBundle::getBinary(int32_t& len, UErrorCode& status) const {
+ return ures_getBinary(fResource, &len, &status);
+}
+
+const int32_t *ResourceBundle::getIntVector(int32_t& len, UErrorCode& status) const {
+ return ures_getIntVector(fResource, &len, &status);
+}
+
+uint32_t ResourceBundle::getUInt(UErrorCode& status) const {
+ return ures_getUInt(fResource, &status);
+}
+
+int32_t ResourceBundle::getInt(UErrorCode& status) const {
+ return ures_getInt(fResource, &status);
+}
+
+const char *ResourceBundle::getName(void) const {
+ return ures_getName(fResource);
+}
+
+const char *ResourceBundle::getKey(void) const {
+ return ures_getKey(fResource);
+}
+
+UResType ResourceBundle::getType(void) const {
+ return ures_getType(fResource);
+}
+
+int32_t ResourceBundle::getSize(void) const {
+ return ures_getSize(fResource);
+}
+
+UBool ResourceBundle::hasNext(void) const {
+ return ures_hasNext(fResource);
+}
+
+void ResourceBundle::resetIterator(void) {
+ ures_resetIterator(fResource);
+}
+
+ResourceBundle ResourceBundle::getNext(UErrorCode& status) {
+ UResourceBundle r;
+
+ ures_initStackObject(&r);
+ ures_getNextResource(fResource, &r, &status);
+ ResourceBundle res(&r, status);
+ if (U_SUCCESS(status)) {
+ ures_close(&r);
+ }
+ return res;
+}
+
+UnicodeString ResourceBundle::getNextString(UErrorCode& status) {
+ int32_t len = 0;
+ const UChar* r = ures_getNextString(fResource, &len, 0, &status);
+ return UnicodeString(TRUE, r, len);
+}
+
+UnicodeString ResourceBundle::getNextString(const char ** key, UErrorCode& status) {
+ int32_t len = 0;
+ const UChar* r = ures_getNextString(fResource, &len, key, &status);
+ return UnicodeString(TRUE, r, len);
+}
+
+ResourceBundle ResourceBundle::get(int32_t indexR, UErrorCode& status) const {
+ UResourceBundle r;
+
+ ures_initStackObject(&r);
+ ures_getByIndex(fResource, indexR, &r, &status);
+ ResourceBundle res(&r, status);
+ if (U_SUCCESS(status)) {
+ ures_close(&r);
+ }
+ return res;
+}
+
+UnicodeString ResourceBundle::getStringEx(int32_t indexS, UErrorCode& status) const {
+ int32_t len = 0;
+ const UChar* r = ures_getStringByIndex(fResource, indexS, &len, &status);
+ return UnicodeString(TRUE, r, len);
+}
+
+ResourceBundle ResourceBundle::get(const char* key, UErrorCode& status) const {
+ UResourceBundle r;
+
+ ures_initStackObject(&r);
+ ures_getByKey(fResource, key, &r, &status);
+ ResourceBundle res(&r, status);
+ if (U_SUCCESS(status)) {
+ ures_close(&r);
+ }
+ return res;
+}
+
+ResourceBundle ResourceBundle::getWithFallback(const char* key, UErrorCode& status){
+ UResourceBundle r;
+ ures_initStackObject(&r);
+ ures_getByKeyWithFallback(fResource, key, &r, &status);
+ ResourceBundle res(&r, status);
+ if(U_SUCCESS(status)){
+ ures_close(&r);
+ }
+ return res;
+}
+UnicodeString ResourceBundle::getStringEx(const char* key, UErrorCode& status) const {
+ int32_t len = 0;
+ const UChar* r = ures_getStringByKey(fResource, key, &len, &status);
+ return UnicodeString(TRUE, r, len);
+}
+
+const char*
+ResourceBundle::getVersionNumber() const
+{
+ return ures_getVersionNumberInternal(fResource);
+}
+
+void ResourceBundle::getVersion(UVersionInfo versionInfo) const {
+ ures_getVersion(fResource, versionInfo);
+}
+
+const Locale &ResourceBundle::getLocale(void) const {
+ static UMutex gLocaleLock;
+ Mutex lock(&gLocaleLock);
+ if (fLocale != NULL) {
+ return *fLocale;
+ }
+ UErrorCode status = U_ZERO_ERROR;
+ const char *localeName = ures_getLocaleInternal(fResource, &status);
+ ResourceBundle *ncThis = const_cast<ResourceBundle *>(this);
+ ncThis->fLocale = new Locale(localeName);
+ return ncThis->fLocale != NULL ? *ncThis->fLocale : Locale::getDefault();
+}
+
+const Locale ResourceBundle::getLocale(ULocDataLocaleType type, UErrorCode &status) const
+{
+ return ures_getLocaleByType(fResource, type, &status);
+}
+
+U_NAMESPACE_END
+//eof
diff --git a/thirdparty/icu4c/common/resbund_cnv.cpp b/thirdparty/icu4c/common/resbund_cnv.cpp
new file mode 100644
index 0000000000..45c0b399bf
--- /dev/null
+++ b/thirdparty/icu4c/common/resbund_cnv.cpp
@@ -0,0 +1,57 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1997-2006, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: resbund_cnv.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2004aug25
+* created by: Markus W. Scherer
+*
+* Character conversion functions moved here from resbund.cpp
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/resbund.h"
+#include "uinvchar.h"
+
+U_NAMESPACE_BEGIN
+
+ResourceBundle::ResourceBundle( const UnicodeString& path,
+ const Locale& locale,
+ UErrorCode& error)
+ :UObject(), fLocale(NULL)
+{
+ constructForLocale(path, locale, error);
+}
+
+ResourceBundle::ResourceBundle( const UnicodeString& path,
+ UErrorCode& error)
+ :UObject(), fLocale(NULL)
+{
+ constructForLocale(path, Locale::getDefault(), error);
+}
+
+void
+ResourceBundle::constructForLocale(const UnicodeString& path,
+ const Locale& locale,
+ UErrorCode& error)
+{
+ if (path.isEmpty()) {
+ fResource = ures_open(NULL, locale.getName(), &error);
+ }
+ else {
+ UnicodeString nullTerminatedPath(path);
+ nullTerminatedPath.append((UChar)0);
+ fResource = ures_openU(nullTerminatedPath.getBuffer(), locale.getName(), &error);
+ }
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/resource.cpp b/thirdparty/icu4c/common/resource.cpp
new file mode 100644
index 0000000000..3d41a16029
--- /dev/null
+++ b/thirdparty/icu4c/common/resource.cpp
@@ -0,0 +1,22 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2015-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* resource.cpp
+*
+* created on: 2015nov04
+* created by: Markus W. Scherer
+*/
+
+#include "resource.h"
+
+U_NAMESPACE_BEGIN
+
+ResourceValue::~ResourceValue() {}
+
+ResourceSink::~ResourceSink() {}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/resource.h b/thirdparty/icu4c/common/resource.h
new file mode 100644
index 0000000000..3795694412
--- /dev/null
+++ b/thirdparty/icu4c/common/resource.h
@@ -0,0 +1,293 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2015-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* resource.h
+*
+* created on: 2015nov04
+* created by: Markus W. Scherer
+*/
+
+#ifndef __URESOURCE_H__
+#define __URESOURCE_H__
+
+/**
+ * \file
+ * \brief ICU resource bundle key and value types.
+ */
+
+// Note: Ported from ICU4J class UResource and its nested classes,
+// but the C++ classes are separate, not nested.
+
+// We use the Resource prefix for C++ classes, as usual.
+// The UResource prefix would be used for C types.
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+#include "unicode/ures.h"
+#include "restrace.h"
+
+struct ResourceData;
+
+U_NAMESPACE_BEGIN
+
+class ResourceValue;
+
+// Note: In C++, we use const char * pointers for keys,
+// rather than an abstraction like Java UResource.Key.
+
+/**
+ * Interface for iterating over a resource bundle array resource.
+ */
+class U_COMMON_API ResourceArray {
+public:
+ /** Constructs an empty array object. */
+ ResourceArray() : items16(NULL), items32(NULL), length(0) {}
+
+ /** Only for implementation use. @internal */
+ ResourceArray(const uint16_t *i16, const uint32_t *i32, int32_t len,
+ const ResourceTracer& traceInfo) :
+ items16(i16), items32(i32), length(len),
+ fTraceInfo(traceInfo) {}
+
+ /**
+ * @return The number of items in the array resource.
+ */
+ int32_t getSize() const { return length; }
+ /**
+ * @param i Array item index.
+ * @param value Output-only, receives the value of the i'th item.
+ * @return true if i is non-negative and less than getSize().
+ */
+ UBool getValue(int32_t i, ResourceValue &value) const;
+
+ /** Only for implementation use. @internal */
+ uint32_t internalGetResource(const ResourceData *pResData, int32_t i) const;
+
+private:
+ const uint16_t *items16;
+ const uint32_t *items32;
+ int32_t length;
+ ResourceTracer fTraceInfo;
+};
+
+/**
+ * Interface for iterating over a resource bundle table resource.
+ */
+class U_COMMON_API ResourceTable {
+public:
+ /** Constructs an empty table object. */
+ ResourceTable() : keys16(NULL), keys32(NULL), items16(NULL), items32(NULL), length(0) {}
+
+ /** Only for implementation use. @internal */
+ ResourceTable(const uint16_t *k16, const int32_t *k32,
+ const uint16_t *i16, const uint32_t *i32, int32_t len,
+ const ResourceTracer& traceInfo) :
+ keys16(k16), keys32(k32), items16(i16), items32(i32), length(len),
+ fTraceInfo(traceInfo) {}
+
+ /**
+ * @return The number of items in the array resource.
+ */
+ int32_t getSize() const { return length; }
+ /**
+ * @param i Table item index.
+ * @param key Output-only, receives the key of the i'th item.
+ * @param value Output-only, receives the value of the i'th item.
+ * @return true if i is non-negative and less than getSize().
+ */
+ UBool getKeyAndValue(int32_t i, const char *&key, ResourceValue &value) const;
+
+ /**
+ * @param key Key string to find in the table.
+ * @param value Output-only, receives the value of the item with that key.
+ * @return true if the table contains the key.
+ */
+ UBool findValue(const char *key, ResourceValue &value) const;
+
+private:
+ const uint16_t *keys16;
+ const int32_t *keys32;
+ const uint16_t *items16;
+ const uint32_t *items32;
+ int32_t length;
+ ResourceTracer fTraceInfo;
+};
+
+/**
+ * Represents a resource bundle item's value.
+ * Avoids object creations as much as possible.
+ * Mutable, not thread-safe.
+ */
+class U_COMMON_API ResourceValue : public UObject {
+public:
+ virtual ~ResourceValue();
+
+ /**
+ * @return ICU resource type, for example, URES_STRING
+ */
+ virtual UResType getType() const = 0;
+
+ /**
+ * Sets U_RESOURCE_TYPE_MISMATCH if this is not a string resource.
+ *
+ * @see ures_getString()
+ */
+ virtual const UChar *getString(int32_t &length, UErrorCode &errorCode) const = 0;
+
+ inline UnicodeString getUnicodeString(UErrorCode &errorCode) const {
+ int32_t len = 0;
+ const UChar *r = getString(len, errorCode);
+ return UnicodeString(true, r, len);
+ }
+
+ /**
+ * Sets U_RESOURCE_TYPE_MISMATCH if this is not an alias resource.
+ */
+ virtual const UChar *getAliasString(int32_t &length, UErrorCode &errorCode) const = 0;
+
+ inline UnicodeString getAliasUnicodeString(UErrorCode &errorCode) const {
+ int32_t len = 0;
+ const UChar *r = getAliasString(len, errorCode);
+ return UnicodeString(true, r, len);
+ }
+
+ /**
+ * Sets U_RESOURCE_TYPE_MISMATCH if this is not an integer resource.
+ *
+ * @see ures_getInt()
+ */
+ virtual int32_t getInt(UErrorCode &errorCode) const = 0;
+
+ /**
+ * Sets U_RESOURCE_TYPE_MISMATCH if this is not an integer resource.
+ *
+ * @see ures_getUInt()
+ */
+ virtual uint32_t getUInt(UErrorCode &errorCode) const = 0;
+
+ /**
+ * Sets U_RESOURCE_TYPE_MISMATCH if this is not an intvector resource.
+ *
+ * @see ures_getIntVector()
+ */
+ virtual const int32_t *getIntVector(int32_t &length, UErrorCode &errorCode) const = 0;
+
+ /**
+ * Sets U_RESOURCE_TYPE_MISMATCH if this is not a binary-blob resource.
+ *
+ * @see ures_getBinary()
+ */
+ virtual const uint8_t *getBinary(int32_t &length, UErrorCode &errorCode) const = 0;
+
+ /**
+ * Sets U_RESOURCE_TYPE_MISMATCH if this is not an array resource
+ */
+ virtual ResourceArray getArray(UErrorCode &errorCode) const = 0;
+
+ /**
+ * Sets U_RESOURCE_TYPE_MISMATCH if this is not a table resource
+ */
+ virtual ResourceTable getTable(UErrorCode &errorCode) const = 0;
+
+ /**
+ * Is this a no-fallback/no-inheritance marker string?
+ * Such a marker is used for
+ * CLDR no-fallback data values of (three empty-set symbols)=={2205, 2205, 2205}
+ * when enumerating tables with fallback from the specific resource bundle to root.
+ *
+ * @return true if this is a no-inheritance marker string
+ */
+ virtual UBool isNoInheritanceMarker() const = 0;
+
+ /**
+ * Sets the dest strings from the string values in this array resource.
+ *
+ * @return the number of strings in this array resource.
+ * If greater than capacity, then an overflow error is set.
+ *
+ * Sets U_RESOURCE_TYPE_MISMATCH if this is not an array resource
+ * or if any of the array items is not a string
+ */
+ virtual int32_t getStringArray(UnicodeString *dest, int32_t capacity,
+ UErrorCode &errorCode) const = 0;
+
+ /**
+ * Same as
+ * <pre>
+ * if (getType() == URES_STRING) {
+ * return new String[] { getString(); }
+ * } else {
+ * return getStringArray();
+ * }
+ * </pre>
+ *
+ * Sets U_RESOURCE_TYPE_MISMATCH if this is
+ * neither a string resource nor an array resource containing strings
+ * @see getString()
+ * @see getStringArray()
+ */
+ virtual int32_t getStringArrayOrStringAsArray(UnicodeString *dest, int32_t capacity,
+ UErrorCode &errorCode) const = 0;
+
+ /**
+ * Same as
+ * <pre>
+ * if (getType() == URES_STRING) {
+ * return getString();
+ * } else {
+ * return getStringArray()[0];
+ * }
+ * </pre>
+ *
+ * Sets U_RESOURCE_TYPE_MISMATCH if this is
+ * neither a string resource nor an array resource containing strings
+ * @see getString()
+ * @see getStringArray()
+ */
+ virtual UnicodeString getStringOrFirstOfArray(UErrorCode &errorCode) const = 0;
+
+protected:
+ ResourceValue() {}
+
+private:
+ ResourceValue(const ResourceValue &); // no copy constructor
+ ResourceValue &operator=(const ResourceValue &); // no assignment operator
+};
+
+/**
+ * Sink for ICU resource bundle contents.
+ */
+class U_COMMON_API ResourceSink : public UObject {
+public:
+ ResourceSink() {}
+ virtual ~ResourceSink();
+
+ /**
+ * Called once for each bundle (child-parent-...-root).
+ * The value is normally an array or table resource,
+ * and implementations of this method normally iterate over the
+ * tree of resource items stored there.
+ *
+ * @param key The key string of the enumeration-start resource.
+ * Empty if the enumeration starts at the top level of the bundle.
+ * @param value Call getArray() or getTable() as appropriate.
+ * Then reuse for output values from Array and Table getters.
+ * @param noFallback true if the bundle has no parent;
+ * that is, its top-level table has the nofallback attribute,
+ * or it is the root bundle of a locale tree.
+ */
+ virtual void put(const char *key, ResourceValue &value, UBool noFallback,
+ UErrorCode &errorCode) = 0;
+
+private:
+ ResourceSink(const ResourceSink &); // no copy constructor
+ ResourceSink &operator=(const ResourceSink &); // no assignment operator
+};
+
+U_NAMESPACE_END
+
+#endif
diff --git a/thirdparty/icu4c/common/restrace.cpp b/thirdparty/icu4c/common/restrace.cpp
new file mode 100644
index 0000000000..5c6498850e
--- /dev/null
+++ b/thirdparty/icu4c/common/restrace.cpp
@@ -0,0 +1,130 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if U_ENABLE_TRACING
+
+#include "restrace.h"
+#include "charstr.h"
+#include "cstring.h"
+#include "utracimp.h"
+#include "uresimp.h"
+#include "uassert.h"
+#include "util.h"
+
+U_NAMESPACE_BEGIN
+
+ResourceTracer::~ResourceTracer() = default;
+
+void ResourceTracer::trace(const char* resType) const {
+ U_ASSERT(fResB || fParent);
+ UTRACE_ENTRY(UTRACE_UDATA_RESOURCE);
+ UErrorCode status = U_ZERO_ERROR;
+
+ CharString filePath;
+ getFilePath(filePath, status);
+
+ CharString resPath;
+ getResPath(resPath, status);
+
+ // The longest type ("intvector") is 9 chars
+ const char kSpaces[] = " ";
+ CharString format;
+ format.append(kSpaces, sizeof(kSpaces) - 1 - uprv_strlen(resType), status);
+ format.append("(%s) %s @ %s", status);
+
+ UTRACE_DATA3(UTRACE_VERBOSE,
+ format.data(),
+ resType,
+ filePath.data(),
+ resPath.data());
+ UTRACE_EXIT_STATUS(status);
+}
+
+void ResourceTracer::traceOpen() const {
+ U_ASSERT(fResB);
+ UTRACE_ENTRY(UTRACE_UDATA_BUNDLE);
+ UErrorCode status = U_ZERO_ERROR;
+
+ CharString filePath;
+ UTRACE_DATA1(UTRACE_VERBOSE, "%s", getFilePath(filePath, status).data());
+ UTRACE_EXIT_STATUS(status);
+}
+
+CharString& ResourceTracer::getFilePath(CharString& output, UErrorCode& status) const {
+ if (fResB) {
+ output.append(fResB->fData->fPath, status);
+ output.append('/', status);
+ output.append(fResB->fData->fName, status);
+ output.append(".res", status);
+ } else {
+ fParent->getFilePath(output, status);
+ }
+ return output;
+}
+
+CharString& ResourceTracer::getResPath(CharString& output, UErrorCode& status) const {
+ if (fResB) {
+ output.append('/', status);
+ output.append(fResB->fResPath, status);
+ // removing the trailing /
+ U_ASSERT(output[output.length()-1] == '/');
+ output.truncate(output.length()-1);
+ } else {
+ fParent->getResPath(output, status);
+ }
+ if (fKey) {
+ output.append('/', status);
+ output.append(fKey, status);
+ }
+ if (fIndex != -1) {
+ output.append('[', status);
+ UnicodeString indexString;
+ ICU_Utility::appendNumber(indexString, fIndex);
+ output.appendInvariantChars(indexString, status);
+ output.append(']', status);
+ }
+ return output;
+}
+
+void FileTracer::traceOpen(const char* path, const char* type, const char* name) {
+ if (uprv_strcmp(type, "res") == 0) {
+ traceOpenResFile(path, name);
+ } else {
+ traceOpenDataFile(path, type, name);
+ }
+}
+
+void FileTracer::traceOpenDataFile(const char* path, const char* type, const char* name) {
+ UTRACE_ENTRY(UTRACE_UDATA_DATA_FILE);
+ UErrorCode status = U_ZERO_ERROR;
+
+ CharString filePath;
+ filePath.append(path, status);
+ filePath.append('/', status);
+ filePath.append(name, status);
+ filePath.append('.', status);
+ filePath.append(type, status);
+
+ UTRACE_DATA1(UTRACE_VERBOSE, "%s", filePath.data());
+ UTRACE_EXIT_STATUS(status);
+}
+
+void FileTracer::traceOpenResFile(const char* path, const char* name) {
+ UTRACE_ENTRY(UTRACE_UDATA_RES_FILE);
+ UErrorCode status = U_ZERO_ERROR;
+
+ CharString filePath;
+ filePath.append(path, status);
+ filePath.append('/', status);
+ filePath.append(name, status);
+ filePath.append(".res", status);
+
+ UTRACE_DATA1(UTRACE_VERBOSE, "%s", filePath.data());
+ UTRACE_EXIT_STATUS(status);
+}
+
+U_NAMESPACE_END
+
+#endif // U_ENABLE_TRACING
diff --git a/thirdparty/icu4c/common/restrace.h b/thirdparty/icu4c/common/restrace.h
new file mode 100644
index 0000000000..ef29eaed57
--- /dev/null
+++ b/thirdparty/icu4c/common/restrace.h
@@ -0,0 +1,147 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#ifndef __RESTRACE_H__
+#define __RESTRACE_H__
+
+#include "unicode/utypes.h"
+
+#if U_ENABLE_TRACING
+
+struct UResourceBundle;
+
+U_NAMESPACE_BEGIN
+
+class CharString;
+
+/**
+ * Instances of this class store information used to trace reads from resource
+ * bundles when ICU is built with --enable-tracing.
+ *
+ * All arguments of type const UResourceBundle*, const char*, and
+ * const ResourceTracer& are stored as pointers. The caller must retain
+ * ownership for the lifetime of this ResourceTracer.
+ *
+ * Exported as U_COMMON_API for Windows because it is a value field
+ * in other exported types.
+ */
+class U_COMMON_API ResourceTracer {
+public:
+ ResourceTracer() :
+ fResB(nullptr),
+ fParent(nullptr),
+ fKey(nullptr),
+ fIndex(-1) {}
+
+ ResourceTracer(const UResourceBundle* resB) :
+ fResB(resB),
+ fParent(nullptr),
+ fKey(nullptr),
+ fIndex(-1) {}
+
+ ResourceTracer(const UResourceBundle* resB, const char* key) :
+ fResB(resB),
+ fParent(nullptr),
+ fKey(key),
+ fIndex(-1) {}
+
+ ResourceTracer(const UResourceBundle* resB, int32_t index) :
+ fResB(resB),
+ fParent(nullptr),
+ fKey(nullptr),
+ fIndex(index) {}
+
+ ResourceTracer(const ResourceTracer& parent, const char* key) :
+ fResB(nullptr),
+ fParent(&parent),
+ fKey(key),
+ fIndex(-1) {}
+
+ ResourceTracer(const ResourceTracer& parent, int32_t index) :
+ fResB(nullptr),
+ fParent(&parent),
+ fKey(nullptr),
+ fIndex(index) {}
+
+ ~ResourceTracer();
+
+ void trace(const char* type) const;
+ void traceOpen() const;
+
+ /**
+ * Calls trace() if the resB or parent provided to the constructor was
+ * non-null; otherwise, does nothing.
+ */
+ void maybeTrace(const char* type) const {
+ if (fResB || fParent) {
+ trace(type);
+ }
+ }
+
+private:
+ const UResourceBundle* fResB;
+ const ResourceTracer* fParent;
+ const char* fKey;
+ int32_t fIndex;
+
+ CharString& getFilePath(CharString& output, UErrorCode& status) const;
+
+ CharString& getResPath(CharString& output, UErrorCode& status) const;
+};
+
+/**
+ * This class provides methods to trace data file reads when ICU is built
+ * with --enable-tracing.
+ */
+class FileTracer {
+public:
+ static void traceOpen(const char* path, const char* type, const char* name);
+
+private:
+ static void traceOpenDataFile(const char* path, const char* type, const char* name);
+ static void traceOpenResFile(const char* path, const char* name);
+};
+
+U_NAMESPACE_END
+
+#else // U_ENABLE_TRACING
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Default trivial implementation when --enable-tracing is not used.
+ */
+class U_COMMON_API ResourceTracer {
+public:
+ ResourceTracer() {}
+
+ ResourceTracer(const void*) {}
+
+ ResourceTracer(const void*, const char*) {}
+
+ ResourceTracer(const void*, int32_t) {}
+
+ ResourceTracer(const ResourceTracer&, const char*) {}
+
+ ResourceTracer(const ResourceTracer&, int32_t) {}
+
+ void trace(const char*) const {}
+
+ void traceOpen() const {}
+
+ void maybeTrace(const char*) const {}
+};
+
+/**
+ * Default trivial implementation when --enable-tracing is not used.
+ */
+class FileTracer {
+public:
+ static void traceOpen(const char*, const char*, const char*) {}
+};
+
+U_NAMESPACE_END
+
+#endif // U_ENABLE_TRACING
+
+#endif //__RESTRACE_H__
diff --git a/thirdparty/icu4c/common/ruleiter.cpp b/thirdparty/icu4c/common/ruleiter.cpp
new file mode 100644
index 0000000000..41eea23c0d
--- /dev/null
+++ b/thirdparty/icu4c/common/ruleiter.cpp
@@ -0,0 +1,162 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2003-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Author: Alan Liu
+* Created: September 24 2003
+* Since: ICU 2.8
+**********************************************************************
+*/
+#include "ruleiter.h"
+#include "unicode/parsepos.h"
+#include "unicode/symtable.h"
+#include "unicode/unistr.h"
+#include "unicode/utf16.h"
+#include "patternprops.h"
+
+/* \U87654321 or \ud800\udc00 */
+#define MAX_U_NOTATION_LEN 12
+
+U_NAMESPACE_BEGIN
+
+RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
+ ParsePosition& thePos) :
+ text(theText),
+ pos(thePos),
+ sym(theSym),
+ buf(0),
+ bufPos(0)
+{}
+
+UBool RuleCharacterIterator::atEnd() const {
+ return buf == 0 && pos.getIndex() == text.length();
+}
+
+UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
+ if (U_FAILURE(ec)) return DONE;
+
+ UChar32 c = DONE;
+ isEscaped = FALSE;
+
+ for (;;) {
+ c = _current();
+ _advance(U16_LENGTH(c));
+
+ if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
+ (options & PARSE_VARIABLES) != 0 && sym != 0) {
+ UnicodeString name = sym->parseReference(text, pos, text.length());
+ // If name is empty there was an isolated SYMBOL_REF;
+ // return it. Caller must be prepared for this.
+ if (name.length() == 0) {
+ break;
+ }
+ bufPos = 0;
+ buf = sym->lookup(name);
+ if (buf == 0) {
+ ec = U_UNDEFINED_VARIABLE;
+ return DONE;
+ }
+ // Handle empty variable value
+ if (buf->length() == 0) {
+ buf = 0;
+ }
+ continue;
+ }
+
+ if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) {
+ continue;
+ }
+
+ if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
+ UnicodeString tempEscape;
+ int32_t offset = 0;
+ c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);
+ jumpahead(offset);
+ isEscaped = TRUE;
+ if (c < 0) {
+ ec = U_MALFORMED_UNICODE_ESCAPE;
+ return DONE;
+ }
+ }
+
+ break;
+ }
+
+ return c;
+}
+
+void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
+ p.buf = buf;
+ p.pos = pos.getIndex();
+ p.bufPos = bufPos;
+}
+
+void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
+ buf = p.buf;
+ pos.setIndex(p.pos);
+ bufPos = p.bufPos;
+}
+
+void RuleCharacterIterator::skipIgnored(int32_t options) {
+ if ((options & SKIP_WHITESPACE) != 0) {
+ for (;;) {
+ UChar32 a = _current();
+ if (!PatternProps::isWhiteSpace(a)) break;
+ _advance(U16_LENGTH(a));
+ }
+ }
+}
+
+UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {
+ if (maxLookAhead < 0) {
+ maxLookAhead = 0x7FFFFFFF;
+ }
+ if (buf != 0) {
+ buf->extract(bufPos, maxLookAhead, result);
+ } else {
+ text.extract(pos.getIndex(), maxLookAhead, result);
+ }
+ return result;
+}
+
+void RuleCharacterIterator::jumpahead(int32_t count) {
+ _advance(count);
+}
+
+/*
+UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
+ int32_t b = pos.getIndex();
+ text.extract(0, b, result);
+ return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
+}
+*/
+
+UChar32 RuleCharacterIterator::_current() const {
+ if (buf != 0) {
+ return buf->char32At(bufPos);
+ } else {
+ int i = pos.getIndex();
+ return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;
+ }
+}
+
+void RuleCharacterIterator::_advance(int32_t count) {
+ if (buf != 0) {
+ bufPos += count;
+ if (bufPos == buf->length()) {
+ buf = 0;
+ }
+ } else {
+ pos.setIndex(pos.getIndex() + count);
+ if (pos.getIndex() > text.length()) {
+ pos.setIndex(text.length());
+ }
+ }
+}
+
+U_NAMESPACE_END
+
+//eof
diff --git a/thirdparty/icu4c/common/ruleiter.h b/thirdparty/icu4c/common/ruleiter.h
new file mode 100644
index 0000000000..28e2ca5753
--- /dev/null
+++ b/thirdparty/icu4c/common/ruleiter.h
@@ -0,0 +1,233 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2003-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Author: Alan Liu
+* Created: September 24 2003
+* Since: ICU 2.8
+**********************************************************************
+*/
+#ifndef _RULEITER_H_
+#define _RULEITER_H_
+
+#include "unicode/uobject.h"
+
+U_NAMESPACE_BEGIN
+
+class UnicodeString;
+class ParsePosition;
+class SymbolTable;
+
+/**
+ * An iterator that returns 32-bit code points. This class is deliberately
+ * <em>not</em> related to any of the ICU character iterator classes
+ * in order to minimize complexity.
+ * @author Alan Liu
+ * @since ICU 2.8
+ */
+class RuleCharacterIterator : public UMemory {
+
+ // TODO: Ideas for later. (Do not implement if not needed, lest the
+ // code coverage numbers go down due to unused methods.)
+ // 1. Add a copy constructor, operator==() method.
+ // 2. Rather than return DONE, throw an exception if the end
+ // is reached -- this is an alternate usage model, probably not useful.
+
+private:
+ /**
+ * Text being iterated.
+ */
+ const UnicodeString& text;
+
+ /**
+ * Position of iterator.
+ */
+ ParsePosition& pos;
+
+ /**
+ * Symbol table used to parse and dereference variables. May be 0.
+ */
+ const SymbolTable* sym;
+
+ /**
+ * Current variable expansion, or 0 if none.
+ */
+ const UnicodeString* buf;
+
+ /**
+ * Position within buf. Meaningless if buf == 0.
+ */
+ int32_t bufPos;
+
+public:
+ /**
+ * Value returned when there are no more characters to iterate.
+ */
+ enum { DONE = -1 };
+
+ /**
+ * Bitmask option to enable parsing of variable names. If (options &
+ * PARSE_VARIABLES) != 0, then an embedded variable will be expanded to
+ * its value. Variables are parsed using the SymbolTable API.
+ */
+ enum { PARSE_VARIABLES = 1 };
+
+ /**
+ * Bitmask option to enable parsing of escape sequences. If (options &
+ * PARSE_ESCAPES) != 0, then an embedded escape sequence will be expanded
+ * to its value. Escapes are parsed using Utility.unescapeAt().
+ */
+ enum { PARSE_ESCAPES = 2 };
+
+ /**
+ * Bitmask option to enable skipping of whitespace. If (options &
+ * SKIP_WHITESPACE) != 0, then Pattern_White_Space characters will be silently
+ * skipped, as if they were not present in the input.
+ */
+ enum { SKIP_WHITESPACE = 4 };
+
+ /**
+ * Constructs an iterator over the given text, starting at the given
+ * position.
+ * @param text the text to be iterated
+ * @param sym the symbol table, or null if there is none. If sym is null,
+ * then variables will not be deferenced, even if the PARSE_VARIABLES
+ * option is set.
+ * @param pos upon input, the index of the next character to return. If a
+ * variable has been dereferenced, then pos will <em>not</em> increment as
+ * characters of the variable value are iterated.
+ */
+ RuleCharacterIterator(const UnicodeString& text, const SymbolTable* sym,
+ ParsePosition& pos);
+
+ /**
+ * Returns true if this iterator has no more characters to return.
+ */
+ UBool atEnd() const;
+
+ /**
+ * Returns the next character using the given options, or DONE if there
+ * are no more characters, and advance the position to the next
+ * character.
+ * @param options one or more of the following options, bitwise-OR-ed
+ * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
+ * @param isEscaped output parameter set to true if the character
+ * was escaped
+ * @param ec input-output error code. An error will only be set by
+ * this routing if options includes PARSE_VARIABLES and an unknown
+ * variable name is seen, or if options includes PARSE_ESCAPES and
+ * an invalid escape sequence is seen.
+ * @return the current 32-bit code point, or DONE
+ */
+ UChar32 next(int32_t options, UBool& isEscaped, UErrorCode& ec);
+
+ /**
+ * Returns true if this iterator is currently within a variable expansion.
+ */
+ inline UBool inVariable() const;
+
+ /**
+ * An opaque object representing the position of a RuleCharacterIterator.
+ */
+ struct Pos : public UMemory {
+ private:
+ const UnicodeString* buf;
+ int32_t pos;
+ int32_t bufPos;
+ friend class RuleCharacterIterator;
+ };
+
+ /**
+ * Sets an object which, when later passed to setPos(), will
+ * restore this iterator's position. Usage idiom:
+ *
+ * RuleCharacterIterator iterator = ...;
+ * RuleCharacterIterator::Pos pos;
+ * iterator.getPos(pos);
+ * for (;;) {
+ * iterator.getPos(pos);
+ * int c = iterator.next(...);
+ * ...
+ * }
+ * iterator.setPos(pos);
+ *
+ * @param p a position object to be set to this iterator's
+ * current position.
+ */
+ void getPos(Pos& p) const;
+
+ /**
+ * Restores this iterator to the position it had when getPos()
+ * set the given object.
+ * @param p a position object previously set by getPos()
+ */
+ void setPos(const Pos& p);
+
+ /**
+ * Skips ahead past any ignored characters, as indicated by the given
+ * options. This is useful in conjunction with the lookahead() method.
+ *
+ * Currently, this only has an effect for SKIP_WHITESPACE.
+ * @param options one or more of the following options, bitwise-OR-ed
+ * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
+ */
+ void skipIgnored(int32_t options);
+
+ /**
+ * Returns a string containing the remainder of the characters to be
+ * returned by this iterator, without any option processing. If the
+ * iterator is currently within a variable expansion, this will only
+ * extend to the end of the variable expansion. This method is provided
+ * so that iterators may interoperate with string-based APIs. The typical
+ * sequence of calls is to call skipIgnored(), then call lookahead(), then
+ * parse the string returned by lookahead(), then call jumpahead() to
+ * resynchronize the iterator.
+ * @param result a string to receive the characters to be returned
+ * by future calls to next()
+ * @param maxLookAhead The maximum to copy into the result.
+ * @return a reference to result
+ */
+ UnicodeString& lookahead(UnicodeString& result, int32_t maxLookAhead = -1) const;
+
+ /**
+ * Advances the position by the given number of 16-bit code units.
+ * This is useful in conjunction with the lookahead() method.
+ * @param count the number of 16-bit code units to jump over
+ */
+ void jumpahead(int32_t count);
+
+ /**
+ * Returns a string representation of this object, consisting of the
+ * characters being iterated, with a '|' marking the current position.
+ * Position within an expanded variable is <em>not</em> indicated.
+ * @param result output parameter to receive a string
+ * representation of this object
+ */
+// UnicodeString& toString(UnicodeString& result) const;
+
+private:
+ /**
+ * Returns the current 32-bit code point without parsing escapes, parsing
+ * variables, or skipping whitespace.
+ * @return the current 32-bit code point
+ */
+ UChar32 _current() const;
+
+ /**
+ * Advances the position by the given amount.
+ * @param count the number of 16-bit code units to advance past
+ */
+ void _advance(int32_t count);
+};
+
+inline UBool RuleCharacterIterator::inVariable() const {
+ return buf != 0;
+}
+
+U_NAMESPACE_END
+
+#endif // _RULEITER_H_
+//eof
diff --git a/thirdparty/icu4c/common/schriter.cpp b/thirdparty/icu4c/common/schriter.cpp
new file mode 100644
index 0000000000..17b68aee9d
--- /dev/null
+++ b/thirdparty/icu4c/common/schriter.cpp
@@ -0,0 +1,119 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 1998-2012, International Business Machines Corporation and
+* others. All Rights Reserved.
+******************************************************************************
+*
+* File schriter.cpp
+*
+* Modification History:
+*
+* Date Name Description
+* 05/05/99 stephen Cleaned up.
+******************************************************************************
+*/
+
+#include "utypeinfo.h" // for 'typeid' to work
+
+#include "unicode/chariter.h"
+#include "unicode/schriter.h"
+
+U_NAMESPACE_BEGIN
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringCharacterIterator)
+
+StringCharacterIterator::StringCharacterIterator()
+ : UCharCharacterIterator(),
+ text()
+{
+ // NEVER DEFAULT CONSTRUCT!
+}
+
+StringCharacterIterator::StringCharacterIterator(const UnicodeString& textStr)
+ : UCharCharacterIterator(textStr.getBuffer(), textStr.length()),
+ text(textStr)
+{
+ // we had set the input parameter's array, now we need to set our copy's array
+ UCharCharacterIterator::text = this->text.getBuffer();
+}
+
+StringCharacterIterator::StringCharacterIterator(const UnicodeString& textStr,
+ int32_t textPos)
+ : UCharCharacterIterator(textStr.getBuffer(), textStr.length(), textPos),
+ text(textStr)
+{
+ // we had set the input parameter's array, now we need to set our copy's array
+ UCharCharacterIterator::text = this->text.getBuffer();
+}
+
+StringCharacterIterator::StringCharacterIterator(const UnicodeString& textStr,
+ int32_t textBegin,
+ int32_t textEnd,
+ int32_t textPos)
+ : UCharCharacterIterator(textStr.getBuffer(), textStr.length(), textBegin, textEnd, textPos),
+ text(textStr)
+{
+ // we had set the input parameter's array, now we need to set our copy's array
+ UCharCharacterIterator::text = this->text.getBuffer();
+}
+
+StringCharacterIterator::StringCharacterIterator(const StringCharacterIterator& that)
+ : UCharCharacterIterator(that),
+ text(that.text)
+{
+ // we had set the input parameter's array, now we need to set our copy's array
+ UCharCharacterIterator::text = this->text.getBuffer();
+}
+
+StringCharacterIterator::~StringCharacterIterator() {
+}
+
+StringCharacterIterator&
+StringCharacterIterator::operator=(const StringCharacterIterator& that) {
+ UCharCharacterIterator::operator=(that);
+ text = that.text;
+ // we had set the input parameter's array, now we need to set our copy's array
+ UCharCharacterIterator::text = this->text.getBuffer();
+ return *this;
+}
+
+UBool
+StringCharacterIterator::operator==(const ForwardCharacterIterator& that) const {
+ if (this == &that) {
+ return TRUE;
+ }
+
+ // do not call UCharCharacterIterator::operator==()
+ // because that checks for array pointer equality
+ // while we compare UnicodeString objects
+
+ if (typeid(*this) != typeid(that)) {
+ return FALSE;
+ }
+
+ StringCharacterIterator& realThat = (StringCharacterIterator&)that;
+
+ return text == realThat.text
+ && pos == realThat.pos
+ && begin == realThat.begin
+ && end == realThat.end;
+}
+
+StringCharacterIterator*
+StringCharacterIterator::clone() const {
+ return new StringCharacterIterator(*this);
+}
+
+void
+StringCharacterIterator::setText(const UnicodeString& newText) {
+ text = newText;
+ UCharCharacterIterator::setText(text.getBuffer(), text.length());
+}
+
+void
+StringCharacterIterator::getText(UnicodeString& result) {
+ result = text;
+}
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/serv.cpp b/thirdparty/icu4c/common/serv.cpp
new file mode 100644
index 0000000000..ce545b9dbd
--- /dev/null
+++ b/thirdparty/icu4c/common/serv.cpp
@@ -0,0 +1,982 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/**
+*******************************************************************************
+* Copyright (C) 2001-2014, International Business Machines Corporation.
+* All Rights Reserved.
+*******************************************************************************
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_SERVICE
+
+#include "serv.h"
+#include "umutex.h"
+
+#undef SERVICE_REFCOUNT
+
+// in case we use the refcount stuff
+
+U_NAMESPACE_BEGIN
+
+/*
+******************************************************************
+*/
+
+const UChar ICUServiceKey::PREFIX_DELIMITER = 0x002F; /* '/' */
+
+ICUServiceKey::ICUServiceKey(const UnicodeString& id)
+: _id(id) {
+}
+
+ICUServiceKey::~ICUServiceKey()
+{
+}
+
+const UnicodeString&
+ICUServiceKey::getID() const
+{
+ return _id;
+}
+
+UnicodeString&
+ICUServiceKey::canonicalID(UnicodeString& result) const
+{
+ return result.append(_id);
+}
+
+UnicodeString&
+ICUServiceKey::currentID(UnicodeString& result) const
+{
+ return canonicalID(result);
+}
+
+UnicodeString&
+ICUServiceKey::currentDescriptor(UnicodeString& result) const
+{
+ prefix(result);
+ result.append(PREFIX_DELIMITER);
+ return currentID(result);
+}
+
+UBool
+ICUServiceKey::fallback()
+{
+ return FALSE;
+}
+
+UBool
+ICUServiceKey::isFallbackOf(const UnicodeString& id) const
+{
+ return id == _id;
+}
+
+UnicodeString&
+ICUServiceKey::prefix(UnicodeString& result) const
+{
+ return result;
+}
+
+UnicodeString&
+ICUServiceKey::parsePrefix(UnicodeString& result)
+{
+ int32_t n = result.indexOf(PREFIX_DELIMITER);
+ if (n < 0) {
+ n = 0;
+ }
+ result.remove(n);
+ return result;
+}
+
+UnicodeString&
+ICUServiceKey::parseSuffix(UnicodeString& result)
+{
+ int32_t n = result.indexOf(PREFIX_DELIMITER);
+ if (n >= 0) {
+ result.remove(0, n+1);
+ }
+ return result;
+}
+
+#ifdef SERVICE_DEBUG
+UnicodeString&
+ICUServiceKey::debug(UnicodeString& result) const
+{
+ debugClass(result);
+ result.append((UnicodeString)" id: ");
+ result.append(_id);
+ return result;
+}
+
+UnicodeString&
+ICUServiceKey::debugClass(UnicodeString& result) const
+{
+ return result.append((UnicodeString)"ICUServiceKey");
+}
+#endif
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ICUServiceKey)
+
+/*
+******************************************************************
+*/
+
+ICUServiceFactory::~ICUServiceFactory() {}
+
+SimpleFactory::SimpleFactory(UObject* instanceToAdopt, const UnicodeString& id, UBool visible)
+: _instance(instanceToAdopt), _id(id), _visible(visible)
+{
+}
+
+SimpleFactory::~SimpleFactory()
+{
+ delete _instance;
+}
+
+UObject*
+SimpleFactory::create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const
+{
+ if (U_SUCCESS(status)) {
+ UnicodeString temp;
+ if (_id == key.currentID(temp)) {
+ return service->cloneInstance(_instance);
+ }
+ }
+ return NULL;
+}
+
+void
+SimpleFactory::updateVisibleIDs(Hashtable& result, UErrorCode& status) const
+{
+ if (_visible) {
+ result.put(_id, (void*)this, status); // cast away const
+ } else {
+ result.remove(_id);
+ }
+}
+
+UnicodeString&
+SimpleFactory::getDisplayName(const UnicodeString& id, const Locale& /* locale */, UnicodeString& result) const
+{
+ if (_visible && _id == id) {
+ result = _id;
+ } else {
+ result.setToBogus();
+ }
+ return result;
+}
+
+#ifdef SERVICE_DEBUG
+UnicodeString&
+SimpleFactory::debug(UnicodeString& toAppendTo) const
+{
+ debugClass(toAppendTo);
+ toAppendTo.append((UnicodeString)" id: ");
+ toAppendTo.append(_id);
+ toAppendTo.append((UnicodeString)", visible: ");
+ toAppendTo.append(_visible ? (UnicodeString)"T" : (UnicodeString)"F");
+ return toAppendTo;
+}
+
+UnicodeString&
+SimpleFactory::debugClass(UnicodeString& toAppendTo) const
+{
+ return toAppendTo.append((UnicodeString)"SimpleFactory");
+}
+#endif
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleFactory)
+
+/*
+******************************************************************
+*/
+
+ServiceListener::~ServiceListener() {}
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ServiceListener)
+
+/*
+******************************************************************
+*/
+
+// Record the actual id for this service in the cache, so we can return it
+// even if we succeed later with a different id.
+class CacheEntry : public UMemory {
+private:
+ int32_t refcount;
+
+public:
+ UnicodeString actualDescriptor;
+ UObject* service;
+
+ /**
+ * Releases a reference to the shared resource.
+ */
+ ~CacheEntry() {
+ delete service;
+ }
+
+ CacheEntry(const UnicodeString& _actualDescriptor, UObject* _service)
+ : refcount(1), actualDescriptor(_actualDescriptor), service(_service) {
+ }
+
+ /**
+ * Instantiation creates an initial reference, so don't call this
+ * unless you're creating a new pointer to this. Management of
+ * that pointer will have to know how to deal with refcounts.
+ * Return true if the resource has not already been released.
+ */
+ CacheEntry* ref() {
+ ++refcount;
+ return this;
+ }
+
+ /**
+ * Destructions removes a reference, so don't call this unless
+ * you're removing pointer to this somewhere. Management of that
+ * pointer will have to know how to deal with refcounts. Once
+ * the refcount drops to zero, the resource is released. Return
+ * false if the resouce has been released.
+ */
+ CacheEntry* unref() {
+ if ((--refcount) == 0) {
+ delete this;
+ return NULL;
+ }
+ return this;
+ }
+
+ /**
+ * Return TRUE if there is at least one reference to this and the
+ * resource has not been released.
+ */
+ UBool isShared() const {
+ return refcount > 1;
+ }
+};
+
+// UObjectDeleter for serviceCache
+U_CDECL_BEGIN
+static void U_CALLCONV
+cacheDeleter(void* obj) {
+ U_NAMESPACE_USE ((CacheEntry*)obj)->unref();
+}
+
+/**
+* Deleter for UObjects
+*/
+static void U_CALLCONV
+deleteUObject(void *obj) {
+ U_NAMESPACE_USE delete (UObject*) obj;
+}
+U_CDECL_END
+
+/*
+******************************************************************
+*/
+
+class DNCache : public UMemory {
+public:
+ Hashtable cache;
+ const Locale locale;
+
+ DNCache(const Locale& _locale)
+ : cache(), locale(_locale)
+ {
+ // cache.setKeyDeleter(uprv_deleteUObject);
+ }
+};
+
+
+/*
+******************************************************************
+*/
+
+StringPair*
+StringPair::create(const UnicodeString& displayName,
+ const UnicodeString& id,
+ UErrorCode& status)
+{
+ if (U_SUCCESS(status)) {
+ StringPair* sp = new StringPair(displayName, id);
+ if (sp == NULL || sp->isBogus()) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ delete sp;
+ return NULL;
+ }
+ return sp;
+ }
+ return NULL;
+}
+
+UBool
+StringPair::isBogus() const {
+ return displayName.isBogus() || id.isBogus();
+}
+
+StringPair::StringPair(const UnicodeString& _displayName,
+ const UnicodeString& _id)
+: displayName(_displayName)
+, id(_id)
+{
+}
+
+U_CDECL_BEGIN
+static void U_CALLCONV
+userv_deleteStringPair(void *obj) {
+ U_NAMESPACE_USE delete (StringPair*) obj;
+}
+U_CDECL_END
+
+/*
+******************************************************************
+*/
+
+static UMutex lock;
+
+ICUService::ICUService()
+: name()
+, timestamp(0)
+, factories(NULL)
+, serviceCache(NULL)
+, idCache(NULL)
+, dnCache(NULL)
+{
+}
+
+ICUService::ICUService(const UnicodeString& newName)
+: name(newName)
+, timestamp(0)
+, factories(NULL)
+, serviceCache(NULL)
+, idCache(NULL)
+, dnCache(NULL)
+{
+}
+
+ICUService::~ICUService()
+{
+ {
+ Mutex mutex(&lock);
+ clearCaches();
+ delete factories;
+ factories = NULL;
+ }
+}
+
+UObject*
+ICUService::get(const UnicodeString& descriptor, UErrorCode& status) const
+{
+ return get(descriptor, NULL, status);
+}
+
+UObject*
+ICUService::get(const UnicodeString& descriptor, UnicodeString* actualReturn, UErrorCode& status) const
+{
+ UObject* result = NULL;
+ ICUServiceKey* key = createKey(&descriptor, status);
+ if (key) {
+ result = getKey(*key, actualReturn, status);
+ delete key;
+ }
+ return result;
+}
+
+UObject*
+ICUService::getKey(ICUServiceKey& key, UErrorCode& status) const
+{
+ return getKey(key, NULL, status);
+}
+
+// this is a vector that subclasses of ICUService can override to further customize the result object
+// before returning it. All other public get functions should call this one.
+
+UObject*
+ICUService::getKey(ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const
+{
+ return getKey(key, actualReturn, NULL, status);
+}
+
+// make it possible to call reentrantly on systems that don't have reentrant mutexes.
+// we can use this simple approach since we know the situation where we're calling
+// reentrantly even without knowing the thread.
+class XMutex : public UMemory {
+public:
+ inline XMutex(UMutex *mutex, UBool reentering)
+ : fMutex(mutex)
+ , fActive(!reentering)
+ {
+ if (fActive) umtx_lock(fMutex);
+ }
+ inline ~XMutex() {
+ if (fActive) umtx_unlock(fMutex);
+ }
+
+private:
+ UMutex *fMutex;
+ UBool fActive;
+};
+
+struct UVectorDeleter {
+ UVector* _obj;
+ UVectorDeleter() : _obj(NULL) {}
+ ~UVectorDeleter() { delete _obj; }
+};
+
+// called only by factories, treat as private
+UObject*
+ICUService::getKey(ICUServiceKey& key, UnicodeString* actualReturn, const ICUServiceFactory* factory, UErrorCode& status) const
+{
+ if (U_FAILURE(status)) {
+ return NULL;
+ }
+
+ if (isDefault()) {
+ return handleDefault(key, actualReturn, status);
+ }
+
+ ICUService* ncthis = (ICUService*)this; // cast away semantic const
+
+ CacheEntry* result = NULL;
+ {
+ // The factory list can't be modified until we're done,
+ // otherwise we might update the cache with an invalid result.
+ // The cache has to stay in synch with the factory list.
+ // ICU doesn't have monitors so we can't use rw locks, so
+ // we single-thread everything using this service, for now.
+
+ // if factory is not null, we're calling from within the mutex,
+ // and since some unix machines don't have reentrant mutexes we
+ // need to make sure not to try to lock it again.
+ XMutex mutex(&lock, factory != NULL);
+
+ if (serviceCache == NULL) {
+ ncthis->serviceCache = new Hashtable(status);
+ if (ncthis->serviceCache == NULL) {
+ return NULL;
+ }
+ if (U_FAILURE(status)) {
+ delete serviceCache;
+ return NULL;
+ }
+ serviceCache->setValueDeleter(cacheDeleter);
+ }
+
+ UnicodeString currentDescriptor;
+ UVectorDeleter cacheDescriptorList;
+ UBool putInCache = FALSE;
+
+ int32_t startIndex = 0;
+ int32_t limit = factories->size();
+ UBool cacheResult = TRUE;
+
+ if (factory != NULL) {
+ for (int32_t i = 0; i < limit; ++i) {
+ if (factory == (const ICUServiceFactory*)factories->elementAt(i)) {
+ startIndex = i + 1;
+ break;
+ }
+ }
+ if (startIndex == 0) {
+ // throw new InternalError("Factory " + factory + "not registered with service: " + this);
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+ cacheResult = FALSE;
+ }
+
+ do {
+ currentDescriptor.remove();
+ key.currentDescriptor(currentDescriptor);
+ result = (CacheEntry*)serviceCache->get(currentDescriptor);
+ if (result != NULL) {
+ break;
+ }
+
+ // first test of cache failed, so we'll have to update
+ // the cache if we eventually succeed-- that is, if we're
+ // going to update the cache at all.
+ putInCache = TRUE;
+
+ int32_t index = startIndex;
+ while (index < limit) {
+ ICUServiceFactory* f = (ICUServiceFactory*)factories->elementAt(index++);
+ UObject* service = f->create(key, this, status);
+ if (U_FAILURE(status)) {
+ delete service;
+ return NULL;
+ }
+ if (service != NULL) {
+ result = new CacheEntry(currentDescriptor, service);
+ if (result == NULL) {
+ delete service;
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+
+ goto outerEnd;
+ }
+ }
+
+ // prepare to load the cache with all additional ids that
+ // will resolve to result, assuming we'll succeed. We
+ // don't want to keep querying on an id that's going to
+ // fallback to the one that succeeded, we want to hit the
+ // cache the first time next goaround.
+ if (cacheDescriptorList._obj == NULL) {
+ cacheDescriptorList._obj = new UVector(uprv_deleteUObject, NULL, 5, status);
+ if (U_FAILURE(status)) {
+ return NULL;
+ }
+ }
+ UnicodeString* idToCache = new UnicodeString(currentDescriptor);
+ if (idToCache == NULL || idToCache->isBogus()) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+
+ cacheDescriptorList._obj->addElement(idToCache, status);
+ if (U_FAILURE(status)) {
+ return NULL;
+ }
+ } while (key.fallback());
+outerEnd:
+
+ if (result != NULL) {
+ if (putInCache && cacheResult) {
+ serviceCache->put(result->actualDescriptor, result, status);
+ if (U_FAILURE(status)) {
+ return NULL;
+ }
+
+ if (cacheDescriptorList._obj != NULL) {
+ for (int32_t i = cacheDescriptorList._obj->size(); --i >= 0;) {
+ UnicodeString* desc = (UnicodeString*)cacheDescriptorList._obj->elementAt(i);
+
+ serviceCache->put(*desc, result, status);
+ if (U_FAILURE(status)) {
+ return NULL;
+ }
+
+ result->ref();
+ cacheDescriptorList._obj->removeElementAt(i);
+ }
+ }
+ }
+
+ if (actualReturn != NULL) {
+ // strip null prefix
+ if (result->actualDescriptor.indexOf((UChar)0x2f) == 0) { // U+002f=slash (/)
+ actualReturn->remove();
+ actualReturn->append(result->actualDescriptor,
+ 1,
+ result->actualDescriptor.length() - 1);
+ } else {
+ *actualReturn = result->actualDescriptor;
+ }
+
+ if (actualReturn->isBogus()) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ delete result;
+ return NULL;
+ }
+ }
+
+ UObject* service = cloneInstance(result->service);
+ if (putInCache && !cacheResult) {
+ delete result;
+ }
+ return service;
+ }
+ }
+
+ return handleDefault(key, actualReturn, status);
+}
+
+UObject*
+ICUService::handleDefault(const ICUServiceKey& /* key */, UnicodeString* /* actualIDReturn */, UErrorCode& /* status */) const
+{
+ return NULL;
+}
+
+UVector&
+ICUService::getVisibleIDs(UVector& result, UErrorCode& status) const {
+ return getVisibleIDs(result, NULL, status);
+}
+
+UVector&
+ICUService::getVisibleIDs(UVector& result, const UnicodeString* matchID, UErrorCode& status) const
+{
+ result.removeAllElements();
+
+ if (U_FAILURE(status)) {
+ return result;
+ }
+
+ {
+ Mutex mutex(&lock);
+ const Hashtable* map = getVisibleIDMap(status);
+ if (map != NULL) {
+ ICUServiceKey* fallbackKey = createKey(matchID, status);
+
+ for (int32_t pos = UHASH_FIRST;;) {
+ const UHashElement* e = map->nextElement(pos);
+ if (e == NULL) {
+ break;
+ }
+
+ const UnicodeString* id = (const UnicodeString*)e->key.pointer;
+ if (fallbackKey != NULL) {
+ if (!fallbackKey->isFallbackOf(*id)) {
+ continue;
+ }
+ }
+
+ UnicodeString* idClone = new UnicodeString(*id);
+ if (idClone == NULL || idClone->isBogus()) {
+ delete idClone;
+ status = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ result.addElement(idClone, status);
+ if (U_FAILURE(status)) {
+ delete idClone;
+ break;
+ }
+ }
+ delete fallbackKey;
+ }
+ }
+ if (U_FAILURE(status)) {
+ result.removeAllElements();
+ }
+ return result;
+}
+
+const Hashtable*
+ICUService::getVisibleIDMap(UErrorCode& status) const {
+ if (U_FAILURE(status)) return NULL;
+
+ // must only be called when lock is already held
+
+ ICUService* ncthis = (ICUService*)this; // cast away semantic const
+ if (idCache == NULL) {
+ ncthis->idCache = new Hashtable(status);
+ if (idCache == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ } else if (factories != NULL) {
+ for (int32_t pos = factories->size(); --pos >= 0;) {
+ ICUServiceFactory* f = (ICUServiceFactory*)factories->elementAt(pos);
+ f->updateVisibleIDs(*idCache, status);
+ }
+ if (U_FAILURE(status)) {
+ delete idCache;
+ ncthis->idCache = NULL;
+ }
+ }
+ }
+
+ return idCache;
+}
+
+
+UnicodeString&
+ICUService::getDisplayName(const UnicodeString& id, UnicodeString& result) const
+{
+ return getDisplayName(id, result, Locale::getDefault());
+}
+
+UnicodeString&
+ICUService::getDisplayName(const UnicodeString& id, UnicodeString& result, const Locale& locale) const
+{
+ {
+ UErrorCode status = U_ZERO_ERROR;
+ Mutex mutex(&lock);
+ const Hashtable* map = getVisibleIDMap(status);
+ if (map != NULL) {
+ ICUServiceFactory* f = (ICUServiceFactory*)map->get(id);
+ if (f != NULL) {
+ f->getDisplayName(id, locale, result);
+ return result;
+ }
+
+ // fallback
+ status = U_ZERO_ERROR;
+ ICUServiceKey* fallbackKey = createKey(&id, status);
+ while (fallbackKey != NULL && fallbackKey->fallback()) {
+ UnicodeString us;
+ fallbackKey->currentID(us);
+ f = (ICUServiceFactory*)map->get(us);
+ if (f != NULL) {
+ f->getDisplayName(id, locale, result);
+ delete fallbackKey;
+ return result;
+ }
+ }
+ delete fallbackKey;
+ }
+ }
+ result.setToBogus();
+ return result;
+}
+
+UVector&
+ICUService::getDisplayNames(UVector& result, UErrorCode& status) const
+{
+ return getDisplayNames(result, Locale::getDefault(), NULL, status);
+}
+
+
+UVector&
+ICUService::getDisplayNames(UVector& result, const Locale& locale, UErrorCode& status) const
+{
+ return getDisplayNames(result, locale, NULL, status);
+}
+
+UVector&
+ICUService::getDisplayNames(UVector& result,
+ const Locale& locale,
+ const UnicodeString* matchID,
+ UErrorCode& status) const
+{
+ result.removeAllElements();
+ result.setDeleter(userv_deleteStringPair);
+ if (U_SUCCESS(status)) {
+ ICUService* ncthis = (ICUService*)this; // cast away semantic const
+ Mutex mutex(&lock);
+
+ if (dnCache != NULL && dnCache->locale != locale) {
+ delete dnCache;
+ ncthis->dnCache = NULL;
+ }
+
+ if (dnCache == NULL) {
+ const Hashtable* m = getVisibleIDMap(status);
+ if (U_FAILURE(status)) {
+ return result;
+ }
+ ncthis->dnCache = new DNCache(locale);
+ if (dnCache == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return result;
+ }
+
+ int32_t pos = UHASH_FIRST;
+ const UHashElement* entry = NULL;
+ while ((entry = m->nextElement(pos)) != NULL) {
+ const UnicodeString* id = (const UnicodeString*)entry->key.pointer;
+ ICUServiceFactory* f = (ICUServiceFactory*)entry->value.pointer;
+ UnicodeString dname;
+ f->getDisplayName(*id, locale, dname);
+ if (dname.isBogus()) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ dnCache->cache.put(dname, (void*)id, status); // share pointer with visibleIDMap
+ if (U_SUCCESS(status)) {
+ continue;
+ }
+ }
+ delete dnCache;
+ ncthis->dnCache = NULL;
+ return result;
+ }
+ }
+ }
+
+ ICUServiceKey* matchKey = createKey(matchID, status);
+ /* To ensure that all elements in the hashtable are iterated, set pos to -1.
+ * nextElement(pos) will skip the position at pos and begin the iteration
+ * at the next position, which in this case will be 0.
+ */
+ int32_t pos = UHASH_FIRST;
+ const UHashElement *entry = NULL;
+ while ((entry = dnCache->cache.nextElement(pos)) != NULL) {
+ const UnicodeString* id = (const UnicodeString*)entry->value.pointer;
+ if (matchKey != NULL && !matchKey->isFallbackOf(*id)) {
+ continue;
+ }
+ const UnicodeString* dn = (const UnicodeString*)entry->key.pointer;
+ StringPair* sp = StringPair::create(*id, *dn, status);
+ result.addElement(sp, status);
+ if (U_FAILURE(status)) {
+ result.removeAllElements();
+ break;
+ }
+ }
+ delete matchKey;
+
+ return result;
+}
+
+URegistryKey
+ICUService::registerInstance(UObject* objToAdopt, const UnicodeString& id, UErrorCode& status)
+{
+ return registerInstance(objToAdopt, id, TRUE, status);
+}
+
+URegistryKey
+ICUService::registerInstance(UObject* objToAdopt, const UnicodeString& id, UBool visible, UErrorCode& status)
+{
+ ICUServiceKey* key = createKey(&id, status);
+ if (key != NULL) {
+ UnicodeString canonicalID;
+ key->canonicalID(canonicalID);
+ delete key;
+
+ ICUServiceFactory* f = createSimpleFactory(objToAdopt, canonicalID, visible, status);
+ if (f != NULL) {
+ return registerFactory(f, status);
+ }
+ }
+ delete objToAdopt;
+ return NULL;
+}
+
+ICUServiceFactory*
+ICUService::createSimpleFactory(UObject* objToAdopt, const UnicodeString& id, UBool visible, UErrorCode& status)
+{
+ if (U_SUCCESS(status)) {
+ if ((objToAdopt != NULL) && (!id.isBogus())) {
+ return new SimpleFactory(objToAdopt, id, visible);
+ }
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return NULL;
+}
+
+URegistryKey
+ICUService::registerFactory(ICUServiceFactory* factoryToAdopt, UErrorCode& status)
+{
+ if (U_SUCCESS(status) && factoryToAdopt != NULL) {
+ Mutex mutex(&lock);
+
+ if (factories == NULL) {
+ factories = new UVector(deleteUObject, NULL, status);
+ if (U_FAILURE(status)) {
+ delete factories;
+ return NULL;
+ }
+ }
+ factories->insertElementAt(factoryToAdopt, 0, status);
+ if (U_SUCCESS(status)) {
+ clearCaches();
+ } else {
+ delete factoryToAdopt;
+ factoryToAdopt = NULL;
+ }
+ }
+
+ if (factoryToAdopt != NULL) {
+ notifyChanged();
+ }
+
+ return (URegistryKey)factoryToAdopt;
+}
+
+UBool
+ICUService::unregister(URegistryKey rkey, UErrorCode& status)
+{
+ ICUServiceFactory *factory = (ICUServiceFactory*)rkey;
+ UBool result = FALSE;
+ if (factory != NULL && factories != NULL) {
+ Mutex mutex(&lock);
+
+ if (factories->removeElement(factory)) {
+ clearCaches();
+ result = TRUE;
+ } else {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ delete factory;
+ }
+ }
+ if (result) {
+ notifyChanged();
+ }
+ return result;
+}
+
+void
+ICUService::reset()
+{
+ {
+ Mutex mutex(&lock);
+ reInitializeFactories();
+ clearCaches();
+ }
+ notifyChanged();
+}
+
+void
+ICUService::reInitializeFactories()
+{
+ if (factories != NULL) {
+ factories->removeAllElements();
+ }
+}
+
+UBool
+ICUService::isDefault() const
+{
+ return countFactories() == 0;
+}
+
+ICUServiceKey*
+ICUService::createKey(const UnicodeString* id, UErrorCode& status) const
+{
+ return (U_FAILURE(status) || id == NULL) ? NULL : new ICUServiceKey(*id);
+}
+
+void
+ICUService::clearCaches()
+{
+ // callers synchronize before use
+ ++timestamp;
+ delete dnCache;
+ dnCache = NULL;
+ delete idCache;
+ idCache = NULL;
+ delete serviceCache; serviceCache = NULL;
+}
+
+void
+ICUService::clearServiceCache()
+{
+ // callers synchronize before use
+ delete serviceCache; serviceCache = NULL;
+}
+
+UBool
+ICUService::acceptsListener(const EventListener& l) const
+{
+ return dynamic_cast<const ServiceListener*>(&l) != NULL;
+}
+
+void
+ICUService::notifyListener(EventListener& l) const
+{
+ ((ServiceListener&)l).serviceChanged(*this);
+}
+
+UnicodeString&
+ICUService::getName(UnicodeString& result) const
+{
+ return result.append(name);
+}
+
+int32_t
+ICUService::countFactories() const
+{
+ return factories == NULL ? 0 : factories->size();
+}
+
+int32_t
+ICUService::getTimestamp() const
+{
+ return timestamp;
+}
+
+U_NAMESPACE_END
+
+/* UCONFIG_NO_SERVICE */
+#endif
diff --git a/thirdparty/icu4c/common/serv.h b/thirdparty/icu4c/common/serv.h
new file mode 100644
index 0000000000..ca070b6c6d
--- /dev/null
+++ b/thirdparty/icu4c/common/serv.h
@@ -0,0 +1,996 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/**
+ *******************************************************************************
+ * Copyright (C) 2001-2011, International Business Machines Corporation. *
+ * All Rights Reserved. *
+ *******************************************************************************
+ */
+
+#ifndef ICUSERV_H
+#define ICUSERV_H
+
+#include "unicode/utypes.h"
+
+#if UCONFIG_NO_SERVICE
+
+U_NAMESPACE_BEGIN
+
+/*
+ * Allow the declaration of APIs with pointers to ICUService
+ * even when service is removed from the build.
+ */
+class ICUService;
+
+U_NAMESPACE_END
+
+#else
+
+#include "unicode/unistr.h"
+#include "unicode/locid.h"
+#include "unicode/umisc.h"
+
+#include "hash.h"
+#include "uvector.h"
+#include "servnotf.h"
+
+class ICUServiceTest;
+
+U_NAMESPACE_BEGIN
+
+class ICUServiceKey;
+class ICUServiceFactory;
+class SimpleFactory;
+class ServiceListener;
+class ICUService;
+
+class DNCache;
+
+/*******************************************************************
+ * ICUServiceKey
+ */
+
+/**
+ * <p>ICUServiceKeys are used to communicate with factories to
+ * generate an instance of the service. ICUServiceKeys define how
+ * ids are canonicalized, provide both a current id and a current
+ * descriptor to use in querying the cache and factories, and
+ * determine the fallback strategy.</p>
+ *
+ * <p>ICUServiceKeys provide both a currentDescriptor and a currentID.
+ * The descriptor contains an optional prefix, followed by '/'
+ * and the currentID. Factories that handle complex keys,
+ * for example number format factories that generate multiple
+ * kinds of formatters for the same locale, use the descriptor
+ * to provide a fully unique identifier for the service object,
+ * while using the currentID (in this case, the locale string),
+ * as the visible IDs that can be localized.</p>
+ *
+ * <p>The default implementation of ICUServiceKey has no fallbacks and
+ * has no custom descriptors.</p>
+ */
+class U_COMMON_API ICUServiceKey : public UObject {
+ private:
+ const UnicodeString _id;
+
+ protected:
+ static const UChar PREFIX_DELIMITER;
+
+ public:
+
+ /**
+ * <p>Construct a key from an id.</p>
+ *
+ * @param id the ID from which to construct the key.
+ */
+ ICUServiceKey(const UnicodeString& id);
+
+ /**
+ * <p>Virtual destructor.</p>
+ */
+ virtual ~ICUServiceKey();
+
+ /**
+ * <p>Return the original ID used to construct this key.</p>
+ *
+ * @return the ID used to construct this key.
+ */
+ virtual const UnicodeString& getID() const;
+
+ /**
+ * <p>Return the canonical version of the original ID. This implementation
+ * appends the original ID to result. Result is returned as a convenience.</p>
+ *
+ * @param result the output parameter to which the id will be appended.
+ * @return the modified result.
+ */
+ virtual UnicodeString& canonicalID(UnicodeString& result) const;
+
+ /**
+ * <p>Return the (canonical) current ID. This implementation appends
+ * the canonical ID to result. Result is returned as a convenience.</p>
+ *
+ * @param result the output parameter to which the current id will be appended.
+ * @return the modified result.
+ */
+ virtual UnicodeString& currentID(UnicodeString& result) const;
+
+ /**
+ * <p>Return the current descriptor. This implementation appends
+ * the current descriptor to result. Result is returned as a convenience.</p>
+ *
+ * <p>The current descriptor is used to fully
+ * identify an instance of the service in the cache. A
+ * factory may handle all descriptors for an ID, or just a
+ * particular descriptor. The factory can either parse the
+ * descriptor or use custom API on the key in order to
+ * instantiate the service.</p>
+ *
+ * @param result the output parameter to which the current id will be appended.
+ * @return the modified result.
+ */
+ virtual UnicodeString& currentDescriptor(UnicodeString& result) const;
+
+ /**
+ * <p>If the key has a fallback, modify the key and return true,
+ * otherwise return false. The current ID will change if there
+ * is a fallback. No currentIDs should be repeated, and fallback
+ * must eventually return false. This implementation has no fallbacks
+ * and always returns false.</p>
+ *
+ * @return true if the ICUServiceKey changed to a valid fallback value.
+ */
+ virtual UBool fallback();
+
+ /**
+ * <p>Return true if a key created from id matches, or would eventually
+ * fallback to match, the canonical ID of this ICUServiceKey.</p>
+ *
+ * @param id the id to test.
+ * @return true if this ICUServiceKey's canonical ID is a fallback of id.
+ */
+ virtual UBool isFallbackOf(const UnicodeString& id) const;
+
+ /**
+ * <p>Return the prefix. This implementation leaves result unchanged.
+ * Result is returned as a convenience.</p>
+ *
+ * @param result the output parameter to which the prefix will be appended.
+ * @return the modified result.
+ */
+ virtual UnicodeString& prefix(UnicodeString& result) const;
+
+ /**
+ * <p>A utility to parse the prefix out of a descriptor string. Only
+ * the (undelimited) prefix, if any, remains in result. Result is returned as a
+ * convenience.</p>
+ *
+ * @param result an input/output parameter that on entry is a descriptor, and
+ * on exit is the prefix of that descriptor.
+ * @return the modified result.
+ */
+ static UnicodeString& parsePrefix(UnicodeString& result);
+
+ /**
+ * <p>A utility to parse the suffix out of a descriptor string. Only
+ * the (undelimited) suffix, if any, remains in result. Result is returned as a
+ * convenience.</p>
+ *
+ * @param result an input/output parameter that on entry is a descriptor, and
+ * on exit is the suffix of that descriptor.
+ * @return the modified result.
+ */
+ static UnicodeString& parseSuffix(UnicodeString& result);
+
+public:
+ /**
+ * UObject RTTI boilerplate.
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ /**
+ * UObject RTTI boilerplate.
+ */
+ virtual UClassID getDynamicClassID() const;
+
+#ifdef SERVICE_DEBUG
+ public:
+ virtual UnicodeString& debug(UnicodeString& result) const;
+ virtual UnicodeString& debugClass(UnicodeString& result) const;
+#endif
+
+};
+
+ /*******************************************************************
+ * ICUServiceFactory
+ */
+
+ /**
+ * <p>An implementing ICUServiceFactory generates the service objects maintained by the
+ * service. A factory generates a service object from a key,
+ * updates id->factory mappings, and returns the display name for
+ * a supported id.</p>
+ */
+class U_COMMON_API ICUServiceFactory : public UObject {
+ public:
+ virtual ~ICUServiceFactory();
+
+ /**
+ * <p>Create a service object from the key, if this factory
+ * supports the key. Otherwise, return NULL.</p>
+ *
+ * <p>If the factory supports the key, then it can call
+ * the service's getKey(ICUServiceKey, String[], ICUServiceFactory) method
+ * passing itself as the factory to get the object that
+ * the service would have created prior to the factory's
+ * registration with the service. This can change the
+ * key, so any information required from the key should
+ * be extracted before making such a callback.</p>
+ *
+ * @param key the service key.
+ * @param service the service with which this factory is registered.
+ * @param status the error code status.
+ * @return the service object, or NULL if the factory does not support the key.
+ */
+ virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const = 0;
+
+ /**
+ * <p>Update result to reflect the IDs (not descriptors) that this
+ * factory publicly handles. Result contains mappings from ID to
+ * factory. On entry it will contain all (visible) mappings from
+ * previously-registered factories.</p>
+ *
+ * <p>This function, together with getDisplayName, are used to
+ * support ICUService::getDisplayNames. The factory determines
+ * which IDs (of those it supports) it will make visible, and of
+ * those, which it will provide localized display names for. In
+ * most cases it will register mappings from all IDs it supports
+ * to itself.</p>
+ *
+ * @param result the mapping table to update.
+ * @param status the error code status.
+ */
+ virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const = 0;
+
+ /**
+ * <p>Return, in result, the display name of the id in the provided locale.
+ * This is an id, not a descriptor. If the id is
+ * not visible, sets result to bogus. If the
+ * incoming result is bogus, it remains bogus. Result is returned as a
+ * convenience. Results are not defined if id is not one supported by this
+ * factory.</p>
+ *
+ * @param id a visible id supported by this factory.
+ * @param locale the locale for which to generate the corresponding localized display name.
+ * @param result output parameter to hold the display name.
+ * @return result.
+ */
+ virtual UnicodeString& getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const = 0;
+};
+
+/*
+ ******************************************************************
+ */
+
+ /**
+ * <p>A default implementation of factory. This provides default
+ * implementations for subclasses, and implements a singleton
+ * factory that matches a single ID and returns a single
+ * (possibly deferred-initialized) instance. This implements
+ * updateVisibleIDs to add a mapping from its ID to itself
+ * if visible is true, or to remove any existing mapping
+ * for its ID if visible is false. No localization of display
+ * names is performed.</p>
+ */
+class U_COMMON_API SimpleFactory : public ICUServiceFactory {
+ protected:
+ UObject* _instance;
+ const UnicodeString _id;
+ const UBool _visible;
+
+ public:
+ /**
+ * <p>Construct a SimpleFactory that maps a single ID to a single
+ * service instance. If visible is true, the ID will be visible.
+ * The instance must not be NULL. The SimpleFactory will adopt
+ * the instance, which must not be changed subsequent to this call.</p>
+ *
+ * @param instanceToAdopt the service instance to adopt.
+ * @param id the ID to assign to this service instance.
+ * @param visible if true, the ID will be visible.
+ */
+ SimpleFactory(UObject* instanceToAdopt, const UnicodeString& id, UBool visible = true);
+
+ /**
+ * <p>Destructor.</p>
+ */
+ virtual ~SimpleFactory();
+
+ /**
+ * <p>This implementation returns a clone of the service instance if the factory's ID is equal to
+ * the key's currentID. Service and prefix are ignored.</p>
+ *
+ * @param key the service key.
+ * @param service the service with which this factory is registered.
+ * @param status the error code status.
+ * @return the service object, or NULL if the factory does not support the key.
+ */
+ virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
+
+ /**
+ * <p>This implementation adds a mapping from ID -> this to result if visible is true,
+ * otherwise it removes ID from result.</p>
+ *
+ * @param result the mapping table to update.
+ * @param status the error code status.
+ */
+ virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const;
+
+ /**
+ * <p>This implementation returns the factory ID if it equals id and visible is true,
+ * otherwise it returns the empty string. (This implementation provides
+ * no localized id information.)</p>
+ *
+ * @param id a visible id supported by this factory.
+ * @param locale the locale for which to generate the corresponding localized display name.
+ * @param result output parameter to hold the display name.
+ * @return result.
+ */
+ virtual UnicodeString& getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const;
+
+public:
+ /**
+ * UObject RTTI boilerplate.
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ /**
+ * UObject RTTI boilerplate.
+ */
+ virtual UClassID getDynamicClassID() const;
+
+#ifdef SERVICE_DEBUG
+ public:
+ virtual UnicodeString& debug(UnicodeString& toAppendTo) const;
+ virtual UnicodeString& debugClass(UnicodeString& toAppendTo) const;
+#endif
+
+};
+
+/*
+ ******************************************************************
+ */
+
+/**
+ * <p>ServiceListener is the listener that ICUService provides by default.
+ * ICUService will notifiy this listener when factories are added to
+ * or removed from the service. Subclasses can provide
+ * different listener interfaces that extend EventListener, and modify
+ * acceptsListener and notifyListener as appropriate.</p>
+ */
+class U_COMMON_API ServiceListener : public EventListener {
+public:
+ virtual ~ServiceListener();
+
+ /**
+ * <p>This method is called when the service changes. At the time of the
+ * call this listener is registered with the service. It must
+ * not modify the notifier in the context of this call.</p>
+ *
+ * @param service the service that changed.
+ */
+ virtual void serviceChanged(const ICUService& service) const = 0;
+
+public:
+ /**
+ * UObject RTTI boilerplate.
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ /**
+ * UObject RTTI boilerplate.
+ */
+ virtual UClassID getDynamicClassID() const;
+
+};
+
+/*
+ ******************************************************************
+ */
+
+/**
+ * <p>A StringPair holds a displayName/ID pair. ICUService uses it
+ * as the array elements returned by getDisplayNames.
+ */
+class U_COMMON_API StringPair : public UMemory {
+public:
+ /**
+ * <p>The display name of the pair.</p>
+ */
+ const UnicodeString displayName;
+
+ /**
+ * <p>The ID of the pair.</p>
+ */
+ const UnicodeString id;
+
+ /**
+ * <p>Creates a string pair from a displayName and an ID.</p>
+ *
+ * @param displayName the displayName.
+ * @param id the ID.
+ * @param status the error code status.
+ * @return a StringPair if the creation was successful, otherwise NULL.
+ */
+ static StringPair* create(const UnicodeString& displayName,
+ const UnicodeString& id,
+ UErrorCode& status);
+
+ /**
+ * <p>Return true if either string of the pair is bogus.</p>
+ * @return true if either string of the pair is bogus.
+ */
+ UBool isBogus() const;
+
+private:
+ StringPair(const UnicodeString& displayName, const UnicodeString& id);
+};
+
+/*******************************************************************
+ * ICUService
+ */
+
+ /**
+ * <p>A Service provides access to service objects that implement a
+ * particular service, e.g. transliterators. Users provide a String
+ * id (for example, a locale string) to the service, and get back an
+ * object for that id. Service objects can be any kind of object. A
+ * new service object is returned for each query. The caller is
+ * responsible for deleting it.</p>
+ *
+ * <p>Services 'canonicalize' the query ID and use the canonical ID to
+ * query for the service. The service also defines a mechanism to
+ * 'fallback' the ID multiple times. Clients can optionally request
+ * the actual ID that was matched by a query when they use an ID to
+ * retrieve a service object.</p>
+ *
+ * <p>Service objects are instantiated by ICUServiceFactory objects
+ * registered with the service. The service queries each
+ * ICUServiceFactory in turn, from most recently registered to
+ * earliest registered, until one returns a service object. If none
+ * responds with a service object, a fallback ID is generated, and the
+ * process repeats until a service object is returned or until the ID
+ * has no further fallbacks.</p>
+ *
+ * <p>In ICU 2.4, UObject (the base class of service instances) does
+ * not define a polymorphic clone function. ICUService uses clones to
+ * manage ownership. Thus, for now, ICUService defines an abstract
+ * method, cloneInstance, that clients must implement to create clones
+ * of the service instances. This may change in future releases of
+ * ICU.</p>
+ *
+ * <p>ICUServiceFactories can be dynamically registered and
+ * unregistered with the service. When registered, an
+ * ICUServiceFactory is installed at the head of the factory list, and
+ * so gets 'first crack' at any keys or fallback keys. When
+ * unregistered, it is removed from the service and can no longer be
+ * located through it. Service objects generated by this factory and
+ * held by the client are unaffected.</p>
+ *
+ * <p>If a service has variants (e.g., the different variants of
+ * BreakIterator) an ICUServiceFactory can use the prefix of the
+ * ICUServiceKey to determine the variant of a service to generate.
+ * If it does not support all variants, it can request
+ * previously-registered factories to handle the ones it does not
+ * support.</p>
+ *
+ * <p>ICUService uses ICUServiceKeys to query factories and perform
+ * fallback. The ICUServiceKey defines the canonical form of the ID,
+ * and implements the fallback strategy. Custom ICUServiceKeys can be
+ * defined that parse complex IDs into components that
+ * ICUServiceFactories can more easily use. The ICUServiceKey can
+ * cache the results of this parsing to save repeated effort.
+ * ICUService provides convenience APIs that take UnicodeStrings and
+ * generate default ICUServiceKeys for use in querying.</p>
+ *
+ * <p>ICUService provides API to get the list of IDs publicly
+ * supported by the service (although queries aren't restricted to
+ * this list). This list contains only 'simple' IDs, and not fully
+ * unique IDs. ICUServiceFactories are associated with each simple ID
+ * and the responsible factory can also return a human-readable
+ * localized version of the simple ID, for use in user interfaces.
+ * ICUService can also provide an array of the all the localized
+ * visible IDs and their corresponding internal IDs.</p>
+ *
+ * <p>ICUService implements ICUNotifier, so that clients can register
+ * to receive notification when factories are added or removed from
+ * the service. ICUService provides a default EventListener
+ * subinterface, ServiceListener, which can be registered with the
+ * service. When the service changes, the ServiceListener's
+ * serviceChanged method is called with the service as the
+ * argument.</p>
+ *
+ * <p>The ICUService API is both rich and generic, and it is expected
+ * that most implementations will statically 'wrap' ICUService to
+ * present a more appropriate API-- for example, to declare the type
+ * of the objects returned from get, to limit the factories that can
+ * be registered with the service, or to define their own listener
+ * interface with a custom callback method. They might also customize
+ * ICUService by overriding it, for example, to customize the
+ * ICUServiceKey and fallback strategy. ICULocaleService is a
+ * subclass of ICUService that uses Locale names as IDs and uses
+ * ICUServiceKeys that implement the standard resource bundle fallback
+ * strategy. Most clients will wish to subclass it instead of
+ * ICUService.</p>
+ */
+class U_COMMON_API ICUService : public ICUNotifier {
+ protected:
+ /**
+ * Name useful for debugging.
+ */
+ const UnicodeString name;
+
+ private:
+
+ /**
+ * Timestamp so iterators can be fail-fast.
+ */
+ uint32_t timestamp;
+
+ /**
+ * All the factories registered with this service.
+ */
+ UVector* factories;
+
+ /**
+ * The service cache.
+ */
+ Hashtable* serviceCache;
+
+ /**
+ * The ID cache.
+ */
+ Hashtable* idCache;
+
+ /**
+ * The name cache.
+ */
+ DNCache* dnCache;
+
+ /**
+ * Constructor.
+ */
+ public:
+ /**
+ * <p>Construct a new ICUService.</p>
+ */
+ ICUService();
+
+ /**
+ * <p>Construct with a name (useful for debugging).</p>
+ *
+ * @param name a name to use in debugging.
+ */
+ ICUService(const UnicodeString& name);
+
+ /**
+ * <p>Destructor.</p>
+ */
+ virtual ~ICUService();
+
+ /**
+ * <p>Return the name of this service. This will be the empty string if none was assigned.
+ * Returns result as a convenience.</p>
+ *
+ * @param result an output parameter to contain the name of this service.
+ * @return the name of this service.
+ */
+ UnicodeString& getName(UnicodeString& result) const;
+
+ /**
+ * <p>Convenience override for get(ICUServiceKey&, UnicodeString*). This uses
+ * createKey to create a key for the provided descriptor.</p>
+ *
+ * @param descriptor the descriptor.
+ * @param status the error code status.
+ * @return the service instance, or NULL.
+ */
+ UObject* get(const UnicodeString& descriptor, UErrorCode& status) const;
+
+ /**
+ * <p>Convenience override for get(ICUServiceKey&, UnicodeString*). This uses
+ * createKey to create a key from the provided descriptor.</p>
+ *
+ * @param descriptor the descriptor.
+ * @param actualReturn a pointer to a UnicodeString to hold the matched descriptor, or NULL.
+ * @param status the error code status.
+ * @return the service instance, or NULL.
+ */
+ UObject* get(const UnicodeString& descriptor, UnicodeString* actualReturn, UErrorCode& status) const;
+
+ /**
+ * <p>Convenience override for get(ICUServiceKey&, UnicodeString*).</p>
+ *
+ * @param key the key.
+ * @param status the error code status.
+ * @return the service instance, or NULL.
+ */
+ UObject* getKey(ICUServiceKey& key, UErrorCode& status) const;
+
+ /**
+ * <p>Given a key, return a service object, and, if actualReturn
+ * is not NULL, the descriptor with which it was found in the
+ * first element of actualReturn. If no service object matches
+ * this key, returns NULL and leaves actualReturn unchanged.</p>
+ *
+ * <p>This queries the cache using the key's descriptor, and if no
+ * object in the cache matches, tries the key on each
+ * registered factory, in order. If none generates a service
+ * object for the key, repeats the process with each fallback of
+ * the key, until either a factory returns a service object, or the key
+ * has no fallback. If no object is found, the result of handleDefault
+ * is returned.</p>
+ *
+ * <p>Subclasses can override this method to further customize the
+ * result before returning it.
+ *
+ * @param key the key.
+ * @param actualReturn a pointer to a UnicodeString to hold the matched descriptor, or NULL.
+ * @param status the error code status.
+ * @return the service instance, or NULL.
+ */
+ virtual UObject* getKey(ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const;
+
+ /**
+ * <p>This version of getKey is only called by ICUServiceFactories within the scope
+ * of a previous getKey call, to determine what previously-registered factories would
+ * have returned. For details, see getKey(ICUServiceKey&, UErrorCode&). Subclasses
+ * should not call it directly, but call through one of the other get functions.</p>
+ *
+ * @param key the key.
+ * @param actualReturn a pointer to a UnicodeString to hold the matched descriptor, or NULL.
+ * @param factory the factory making the recursive call.
+ * @param status the error code status.
+ * @return the service instance, or NULL.
+ */
+ UObject* getKey(ICUServiceKey& key, UnicodeString* actualReturn, const ICUServiceFactory* factory, UErrorCode& status) const;
+
+ /**
+ * <p>Convenience override for getVisibleIDs(String) that passes null
+ * as the fallback, thus returning all visible IDs.</p>
+ *
+ * @param result a vector to hold the returned IDs.
+ * @param status the error code status.
+ * @return the result vector.
+ */
+ UVector& getVisibleIDs(UVector& result, UErrorCode& status) const;
+
+ /**
+ * <p>Return a snapshot of the visible IDs for this service. This
+ * list will not change as ICUServiceFactories are added or removed, but the
+ * supported IDs will, so there is no guarantee that all and only
+ * the IDs in the returned list will be visible and supported by the
+ * service in subsequent calls.</p>
+ *
+ * <p>The IDs are returned as pointers to UnicodeStrings. The
+ * caller owns the IDs. Previous contents of result are discarded before
+ * new elements, if any, are added.</p>
+ *
+ * <p>matchID is passed to createKey to create a key. If the key
+ * is not NULL, its isFallbackOf method is used to filter out IDs
+ * that don't match the key or have it as a fallback.</p>
+ *
+ * @param result a vector to hold the returned IDs.
+ * @param matchID an ID used to filter the result, or NULL if all IDs are desired.
+ * @param status the error code status.
+ * @return the result vector.
+ */
+ UVector& getVisibleIDs(UVector& result, const UnicodeString* matchID, UErrorCode& status) const;
+
+ /**
+ * <p>Convenience override for getDisplayName(const UnicodeString&, const Locale&, UnicodeString&) that
+ * uses the current default locale.</p>
+ *
+ * @param id the ID for which to retrieve the localized displayName.
+ * @param result an output parameter to hold the display name.
+ * @return the modified result.
+ */
+ UnicodeString& getDisplayName(const UnicodeString& id, UnicodeString& result) const;
+
+ /**
+ * <p>Given a visible ID, return the display name in the requested locale.
+ * If there is no directly supported ID corresponding to this ID, result is
+ * set to bogus.</p>
+ *
+ * @param id the ID for which to retrieve the localized displayName.
+ * @param result an output parameter to hold the display name.
+ * @param locale the locale in which to localize the ID.
+ * @return the modified result.
+ */
+ UnicodeString& getDisplayName(const UnicodeString& id, UnicodeString& result, const Locale& locale) const;
+
+ /**
+ * <p>Convenience override of getDisplayNames(const Locale&, const UnicodeString*) that
+ * uses the current default Locale as the locale and NULL for
+ * the matchID.</p>
+ *
+ * @param result a vector to hold the returned displayName/id StringPairs.
+ * @param status the error code status.
+ * @return the modified result vector.
+ */
+ UVector& getDisplayNames(UVector& result, UErrorCode& status) const;
+
+ /**
+ * <p>Convenience override of getDisplayNames(const Locale&, const UnicodeString*) that
+ * uses NULL for the matchID.</p>
+ *
+ * @param result a vector to hold the returned displayName/id StringPairs.
+ * @param locale the locale in which to localize the ID.
+ * @param status the error code status.
+ * @return the modified result vector.
+ */
+ UVector& getDisplayNames(UVector& result, const Locale& locale, UErrorCode& status) const;
+
+ /**
+ * <p>Return a snapshot of the mapping from display names to visible
+ * IDs for this service. This set will not change as factories
+ * are added or removed, but the supported IDs will, so there is
+ * no guarantee that all and only the IDs in the returned map will
+ * be visible and supported by the service in subsequent calls,
+ * nor is there any guarantee that the current display names match
+ * those in the result.</p>
+ *
+ * <p>The names are returned as pointers to StringPairs, which
+ * contain both the displayName and the corresponding ID. The
+ * caller owns the StringPairs. Previous contents of result are
+ * discarded before new elements, if any, are added.</p>
+ *
+ * <p>matchID is passed to createKey to create a key. If the key
+ * is not NULL, its isFallbackOf method is used to filter out IDs
+ * that don't match the key or have it as a fallback.</p>
+ *
+ * @param result a vector to hold the returned displayName/id StringPairs.
+ * @param locale the locale in which to localize the ID.
+ * @param matchID an ID used to filter the result, or NULL if all IDs are desired.
+ * @param status the error code status.
+ * @return the result vector. */
+ UVector& getDisplayNames(UVector& result,
+ const Locale& locale,
+ const UnicodeString* matchID,
+ UErrorCode& status) const;
+
+ /**
+ * <p>A convenience override of registerInstance(UObject*, const UnicodeString&, UBool)
+ * that defaults visible to true.</p>
+ *
+ * @param objToAdopt the object to register and adopt.
+ * @param id the ID to assign to this object.
+ * @param status the error code status.
+ * @return a registry key that can be passed to unregister to unregister
+ * (and discard) this instance.
+ */
+ URegistryKey registerInstance(UObject* objToAdopt, const UnicodeString& id, UErrorCode& status);
+
+ /**
+ * <p>Register a service instance with the provided ID. The ID will be
+ * canonicalized. The canonicalized ID will be returned by
+ * getVisibleIDs if visible is true. The service instance will be adopted and
+ * must not be modified subsequent to this call.</p>
+ *
+ * <p>This issues a serviceChanged notification to registered listeners.</p>
+ *
+ * <p>This implementation wraps the object using
+ * createSimpleFactory, and calls registerFactory.</p>
+ *
+ * @param objToAdopt the object to register and adopt.
+ * @param id the ID to assign to this object.
+ * @param visible true if getVisibleIDs is to return this ID.
+ * @param status the error code status.
+ * @return a registry key that can be passed to unregister() to unregister
+ * (and discard) this instance.
+ */
+ virtual URegistryKey registerInstance(UObject* objToAdopt, const UnicodeString& id, UBool visible, UErrorCode& status);
+
+ /**
+ * <p>Register an ICUServiceFactory. Returns a registry key that
+ * can be used to unregister the factory. The factory
+ * must not be modified subsequent to this call. The service owns
+ * all registered factories. In case of an error, the factory is
+ * deleted.</p>
+ *
+ * <p>This issues a serviceChanged notification to registered listeners.</p>
+ *
+ * <p>The default implementation accepts all factories.</p>
+ *
+ * @param factoryToAdopt the factory to register and adopt.
+ * @param status the error code status.
+ * @return a registry key that can be passed to unregister to unregister
+ * (and discard) this factory.
+ */
+ virtual URegistryKey registerFactory(ICUServiceFactory* factoryToAdopt, UErrorCode& status);
+
+ /**
+ * <p>Unregister a factory using a registry key returned by
+ * registerInstance or registerFactory. After a successful call,
+ * the factory will be removed from the service factory list and
+ * deleted, and the key becomes invalid.</p>
+ *
+ * <p>This issues a serviceChanged notification to registered
+ * listeners.</p>
+ *
+ * @param rkey the registry key.
+ * @param status the error code status.
+ * @return true if the call successfully unregistered the factory.
+ */
+ virtual UBool unregister(URegistryKey rkey, UErrorCode& status);
+
+ /**
+ * </p>Reset the service to the default factories. The factory
+ * lock is acquired and then reInitializeFactories is called.</p>
+ *
+ * <p>This issues a serviceChanged notification to registered listeners.</p>
+ */
+ virtual void reset(void);
+
+ /**
+ * <p>Return true if the service is in its default state.</p>
+ *
+ * <p>The default implementation returns true if there are no
+ * factories registered.</p>
+ */
+ virtual UBool isDefault(void) const;
+
+ /**
+ * <p>Create a key from an ID. If ID is NULL, returns NULL.</p>
+ *
+ * <p>The default implementation creates an ICUServiceKey instance.
+ * Subclasses can override to define more useful keys appropriate
+ * to the factories they accept.</p>
+ *
+ * @param a pointer to the ID for which to create a default ICUServiceKey.
+ * @param status the error code status.
+ * @return the ICUServiceKey corresponding to ID, or NULL.
+ */
+ virtual ICUServiceKey* createKey(const UnicodeString* id, UErrorCode& status) const;
+
+ /**
+ * <p>Clone object so that caller can own the copy. In ICU2.4, UObject doesn't define
+ * clone, so we need an instance-aware method that knows how to do this.
+ * This is public so factories can call it, but should really be protected.</p>
+ *
+ * @param instance the service instance to clone.
+ * @return a clone of the passed-in instance, or NULL if cloning was unsuccessful.
+ */
+ virtual UObject* cloneInstance(UObject* instance) const = 0;
+
+
+ /************************************************************************
+ * Subclassing API
+ */
+
+ protected:
+
+ /**
+ * <p>Create a factory that wraps a single service object. Called by registerInstance.</p>
+ *
+ * <p>The default implementation returns an instance of SimpleFactory.</p>
+ *
+ * @param instanceToAdopt the service instance to adopt.
+ * @param id the ID to assign to this service instance.
+ * @param visible if true, the ID will be visible.
+ * @param status the error code status.
+ * @return an instance of ICUServiceFactory that maps this instance to the provided ID.
+ */
+ virtual ICUServiceFactory* createSimpleFactory(UObject* instanceToAdopt, const UnicodeString& id, UBool visible, UErrorCode& status);
+
+ /**
+ * <p>Reinitialize the factory list to its default state. After this call, isDefault()
+ * must return true.</p>
+ *
+ * <p>This issues a serviceChanged notification to registered listeners.</p>
+ *
+ * <p>The default implementation clears the factory list.
+ * Subclasses can override to provide other default initialization
+ * of the factory list. Subclasses must not call this method
+ * directly, since it must only be called while holding write
+ * access to the factory list.</p>
+ */
+ virtual void reInitializeFactories(void);
+
+ /**
+ * <p>Default handler for this service if no factory in the factory list
+ * handled the key passed to getKey.</p>
+ *
+ * <p>The default implementation returns NULL.</p>
+ *
+ * @param key the key.
+ * @param actualReturn a pointer to a UnicodeString to hold the matched descriptor, or NULL.
+ * @param status the error code status.
+ * @return the service instance, or NULL.
+ */
+ virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const;
+
+ /**
+ * <p>Clear caches maintained by this service.</p>
+ *
+ * <p>Subclasses can override if they implement additional caches
+ * that need to be cleared when the service changes. Subclasses
+ * should generally not call this method directly, as it must only
+ * be called while synchronized on the factory lock.</p>
+ */
+ virtual void clearCaches(void);
+
+ /**
+ * <p>Return true if the listener is accepted.</p>
+ *
+ * <p>The default implementation accepts the listener if it is
+ * a ServiceListener. Subclasses can override this to accept
+ * different listeners.</p>
+ *
+ * @param l the listener to test.
+ * @return true if the service accepts the listener.
+ */
+ virtual UBool acceptsListener(const EventListener& l) const;
+
+ /**
+ * <p>Notify the listener of a service change.</p>
+ *
+ * <p>The default implementation assumes a ServiceListener.
+ * If acceptsListener has been overridden to accept different
+ * listeners, this should be overridden as well.</p>
+ *
+ * @param l the listener to notify.
+ */
+ virtual void notifyListener(EventListener& l) const;
+
+ /************************************************************************
+ * Utilities for subclasses.
+ */
+
+ /**
+ * <p>Clear only the service cache.</p>
+ *
+ * <p>This can be called by subclasses when a change affects the service
+ * cache but not the ID caches, e.g., when the default locale changes
+ * the resolution of IDs also changes, requiring the cache to be
+ * flushed, but not the visible IDs themselves.</p>
+ */
+ void clearServiceCache(void);
+
+ /**
+ * <p>Return a map from visible IDs to factories.
+ * This must only be called when the mutex is held.</p>
+ *
+ * @param status the error code status.
+ * @return a Hashtable containing mappings from visible
+ * IDs to factories.
+ */
+ const Hashtable* getVisibleIDMap(UErrorCode& status) const;
+
+ /**
+ * <p>Allow subclasses to read the time stamp.</p>
+ *
+ * @return the timestamp.
+ */
+ int32_t getTimestamp(void) const;
+
+ /**
+ * <p>Return the number of registered factories.</p>
+ *
+ * @return the number of factories registered at the time of the call.
+ */
+ int32_t countFactories(void) const;
+
+private:
+
+ friend class ::ICUServiceTest; // give tests access to countFactories.
+};
+
+U_NAMESPACE_END
+
+ /* UCONFIG_NO_SERVICE */
+#endif
+
+ /* ICUSERV_H */
+#endif
+
diff --git a/thirdparty/icu4c/common/servlk.cpp b/thirdparty/icu4c/common/servlk.cpp
new file mode 100644
index 0000000000..538982ca36
--- /dev/null
+++ b/thirdparty/icu4c/common/servlk.cpp
@@ -0,0 +1,188 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/**
+ *******************************************************************************
+ * Copyright (C) 2001-2014, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ *
+ *******************************************************************************
+ */
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_SERVICE
+
+#include "unicode/resbund.h"
+#include "uresimp.h"
+#include "cmemory.h"
+#include "servloc.h"
+#include "ustrfmt.h"
+#include "uhash.h"
+#include "charstr.h"
+#include "uassert.h"
+
+#define UNDERSCORE_CHAR ((UChar)0x005f)
+#define AT_SIGN_CHAR ((UChar)64)
+#define PERIOD_CHAR ((UChar)46)
+
+U_NAMESPACE_BEGIN
+
+LocaleKey*
+LocaleKey::createWithCanonicalFallback(const UnicodeString* primaryID,
+ const UnicodeString* canonicalFallbackID,
+ UErrorCode& status)
+{
+ return LocaleKey::createWithCanonicalFallback(primaryID, canonicalFallbackID, KIND_ANY, status);
+}
+
+LocaleKey*
+LocaleKey::createWithCanonicalFallback(const UnicodeString* primaryID,
+ const UnicodeString* canonicalFallbackID,
+ int32_t kind,
+ UErrorCode& status)
+{
+ if (primaryID == NULL || U_FAILURE(status)) {
+ return NULL;
+ }
+ UnicodeString canonicalPrimaryID;
+ LocaleUtility::canonicalLocaleString(primaryID, canonicalPrimaryID);
+ return new LocaleKey(*primaryID, canonicalPrimaryID, canonicalFallbackID, kind);
+}
+
+LocaleKey::LocaleKey(const UnicodeString& primaryID,
+ const UnicodeString& canonicalPrimaryID,
+ const UnicodeString* canonicalFallbackID,
+ int32_t kind)
+ : ICUServiceKey(primaryID)
+ , _kind(kind)
+ , _primaryID(canonicalPrimaryID)
+ , _fallbackID()
+ , _currentID()
+{
+ _fallbackID.setToBogus();
+ if (_primaryID.length() != 0) {
+ if (canonicalFallbackID != NULL && _primaryID != *canonicalFallbackID) {
+ _fallbackID = *canonicalFallbackID;
+ }
+ }
+
+ _currentID = _primaryID;
+}
+
+LocaleKey::~LocaleKey() {}
+
+UnicodeString&
+LocaleKey::prefix(UnicodeString& result) const {
+ if (_kind != KIND_ANY) {
+ UChar buffer[64];
+ uprv_itou(buffer, 64, _kind, 10, 0);
+ UnicodeString temp(buffer);
+ result.append(temp);
+ }
+ return result;
+}
+
+int32_t
+LocaleKey::kind() const {
+ return _kind;
+}
+
+UnicodeString&
+LocaleKey::canonicalID(UnicodeString& result) const {
+ return result.append(_primaryID);
+}
+
+UnicodeString&
+LocaleKey::currentID(UnicodeString& result) const {
+ if (!_currentID.isBogus()) {
+ result.append(_currentID);
+ }
+ return result;
+}
+
+UnicodeString&
+LocaleKey::currentDescriptor(UnicodeString& result) const {
+ if (!_currentID.isBogus()) {
+ prefix(result).append(PREFIX_DELIMITER).append(_currentID);
+ } else {
+ result.setToBogus();
+ }
+ return result;
+}
+
+Locale&
+LocaleKey::canonicalLocale(Locale& result) const {
+ return LocaleUtility::initLocaleFromName(_primaryID, result);
+}
+
+Locale&
+LocaleKey::currentLocale(Locale& result) const {
+ return LocaleUtility::initLocaleFromName(_currentID, result);
+}
+
+UBool
+LocaleKey::fallback() {
+ if (!_currentID.isBogus()) {
+ int x = _currentID.lastIndexOf(UNDERSCORE_CHAR);
+ if (x != -1) {
+ _currentID.remove(x); // truncate current or fallback, whichever we're pointing to
+ return TRUE;
+ }
+
+ if (!_fallbackID.isBogus()) {
+ _currentID = _fallbackID;
+ _fallbackID.setToBogus();
+ return TRUE;
+ }
+
+ if (_currentID.length() > 0) {
+ _currentID.remove(0); // completely truncate
+ return TRUE;
+ }
+
+ _currentID.setToBogus();
+ }
+
+ return FALSE;
+}
+
+UBool
+LocaleKey::isFallbackOf(const UnicodeString& id) const {
+ UnicodeString temp(id);
+ parseSuffix(temp);
+ return temp.indexOf(_primaryID) == 0 &&
+ (temp.length() == _primaryID.length() ||
+ temp.charAt(_primaryID.length()) == UNDERSCORE_CHAR);
+}
+
+#ifdef SERVICE_DEBUG
+UnicodeString&
+LocaleKey::debug(UnicodeString& result) const
+{
+ ICUServiceKey::debug(result);
+ result.append((UnicodeString)" kind: ");
+ result.append(_kind);
+ result.append((UnicodeString)" primaryID: ");
+ result.append(_primaryID);
+ result.append((UnicodeString)" fallbackID: ");
+ result.append(_fallbackID);
+ result.append((UnicodeString)" currentID: ");
+ result.append(_currentID);
+ return result;
+}
+
+UnicodeString&
+LocaleKey::debugClass(UnicodeString& result) const
+{
+ return result.append((UnicodeString)"LocaleKey ");
+}
+#endif
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(LocaleKey)
+
+U_NAMESPACE_END
+
+/* !UCONFIG_NO_SERVICE */
+#endif
+
+
diff --git a/thirdparty/icu4c/common/servlkf.cpp b/thirdparty/icu4c/common/servlkf.cpp
new file mode 100644
index 0000000000..84f2347cdd
--- /dev/null
+++ b/thirdparty/icu4c/common/servlkf.cpp
@@ -0,0 +1,152 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/**
+ *******************************************************************************
+ * Copyright (C) 2001-2014, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ *
+ *******************************************************************************
+ */
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_SERVICE
+
+#include "unicode/resbund.h"
+#include "uresimp.h"
+#include "cmemory.h"
+#include "servloc.h"
+#include "ustrfmt.h"
+#include "uhash.h"
+#include "charstr.h"
+#include "ucln_cmn.h"
+#include "uassert.h"
+
+#define UNDERSCORE_CHAR ((UChar)0x005f)
+#define AT_SIGN_CHAR ((UChar)64)
+#define PERIOD_CHAR ((UChar)46)
+
+
+U_NAMESPACE_BEGIN
+
+LocaleKeyFactory::LocaleKeyFactory(int32_t coverage)
+ : _name()
+ , _coverage(coverage)
+{
+}
+
+LocaleKeyFactory::LocaleKeyFactory(int32_t coverage, const UnicodeString& name)
+ : _name(name)
+ , _coverage(coverage)
+{
+}
+
+LocaleKeyFactory::~LocaleKeyFactory() {
+}
+
+UObject*
+LocaleKeyFactory::create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const {
+ if (handlesKey(key, status)) {
+ const LocaleKey& lkey = (const LocaleKey&)key;
+ int32_t kind = lkey.kind();
+ Locale loc;
+ lkey.currentLocale(loc);
+
+ return handleCreate(loc, kind, service, status);
+ }
+ return NULL;
+}
+
+UBool
+LocaleKeyFactory::handlesKey(const ICUServiceKey& key, UErrorCode& status) const {
+ const Hashtable* supported = getSupportedIDs(status);
+ if (supported) {
+ UnicodeString id;
+ key.currentID(id);
+ return supported->get(id) != NULL;
+ }
+ return FALSE;
+}
+
+void
+LocaleKeyFactory::updateVisibleIDs(Hashtable& result, UErrorCode& status) const {
+ const Hashtable* supported = getSupportedIDs(status);
+ if (supported) {
+ UBool visible = (_coverage & 0x1) == 0;
+ const UHashElement* elem = NULL;
+ int32_t pos = UHASH_FIRST;
+ while ((elem = supported->nextElement(pos)) != NULL) {
+ const UnicodeString& id = *((const UnicodeString*)elem->key.pointer);
+ if (!visible) {
+ result.remove(id);
+ } else {
+ result.put(id, (void*)this, status); // this is dummy non-void marker used for set semantics
+ if (U_FAILURE(status)) {
+ break;
+ }
+ }
+ }
+ }
+}
+
+UnicodeString&
+LocaleKeyFactory::getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const {
+ if ((_coverage & 0x1) == 0) {
+ //UErrorCode status = U_ZERO_ERROR;
+ // assume if this is called on us, we support some fallback of this id
+ // if (isSupportedID(id, status)) {
+ Locale loc;
+ LocaleUtility::initLocaleFromName(id, loc);
+ return loc.getDisplayName(locale, result);
+ // }
+ }
+ result.setToBogus();
+ return result;
+}
+
+UObject*
+LocaleKeyFactory::handleCreate(const Locale& /* loc */,
+ int32_t /* kind */,
+ const ICUService* /* service */,
+ UErrorCode& /* status */) const {
+ return NULL;
+}
+
+//UBool
+//LocaleKeyFactory::isSupportedID(const UnicodeString& id, UErrorCode& status) const {
+// const Hashtable* ids = getSupportedIDs(status);
+// return ids && ids->get(id);
+//}
+
+const Hashtable*
+LocaleKeyFactory::getSupportedIDs(UErrorCode& /* status */) const {
+ return NULL;
+}
+
+#ifdef SERVICE_DEBUG
+UnicodeString&
+LocaleKeyFactory::debug(UnicodeString& result) const
+{
+ debugClass(result);
+ result.append((UnicodeString)", name: ");
+ result.append(_name);
+ result.append((UnicodeString)", coverage: ");
+ result.append(_coverage);
+ return result;
+}
+
+UnicodeString&
+LocaleKeyFactory::debugClass(UnicodeString& result) const
+{
+ return result.append((UnicodeString)"LocaleKeyFactory");
+}
+#endif
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(LocaleKeyFactory)
+
+U_NAMESPACE_END
+
+/* !UCONFIG_NO_SERVICE */
+#endif
+
+
diff --git a/thirdparty/icu4c/common/servloc.h b/thirdparty/icu4c/common/servloc.h
new file mode 100644
index 0000000000..ccf6433379
--- /dev/null
+++ b/thirdparty/icu4c/common/servloc.h
@@ -0,0 +1,551 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/**
+ *******************************************************************************
+ * Copyright (C) 2001-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ *
+ *******************************************************************************
+ */
+#ifndef ICULSERV_H
+#define ICULSERV_H
+
+#include "unicode/utypes.h"
+
+#if UCONFIG_NO_SERVICE
+
+U_NAMESPACE_BEGIN
+
+/*
+ * Allow the declaration of APIs with pointers to ICUService
+ * even when service is removed from the build.
+ */
+class ICULocaleService;
+
+U_NAMESPACE_END
+
+#else
+
+#include "unicode/unistr.h"
+#include "unicode/locid.h"
+#include "unicode/strenum.h"
+
+#include "hash.h"
+#include "uvector.h"
+
+#include "serv.h"
+#include "locutil.h"
+
+U_NAMESPACE_BEGIN
+
+class ICULocaleService;
+
+class LocaleKey;
+class LocaleKeyFactory;
+class SimpleLocaleKeyFactory;
+class ServiceListener;
+
+/*
+ ******************************************************************
+ */
+
+/**
+ * A subclass of Key that implements a locale fallback mechanism.
+ * The first locale to search for is the locale provided by the
+ * client, and the fallback locale to search for is the current
+ * default locale. If a prefix is present, the currentDescriptor
+ * includes it before the locale proper, separated by "/". This
+ * is the default key instantiated by ICULocaleService.</p>
+ *
+ * <p>Canonicalization adjusts the locale string so that the
+ * section before the first understore is in lower case, and the rest
+ * is in upper case, with no trailing underscores.</p>
+ */
+
+class U_COMMON_API LocaleKey : public ICUServiceKey {
+ private:
+ int32_t _kind;
+ UnicodeString _primaryID;
+ UnicodeString _fallbackID;
+ UnicodeString _currentID;
+
+ public:
+ enum {
+ KIND_ANY = -1
+ };
+
+ /**
+ * Create a LocaleKey with canonical primary and fallback IDs.
+ */
+ static LocaleKey* createWithCanonicalFallback(const UnicodeString* primaryID,
+ const UnicodeString* canonicalFallbackID,
+ UErrorCode& status);
+
+ /**
+ * Create a LocaleKey with canonical primary and fallback IDs.
+ */
+ static LocaleKey* createWithCanonicalFallback(const UnicodeString* primaryID,
+ const UnicodeString* canonicalFallbackID,
+ int32_t kind,
+ UErrorCode& status);
+
+ protected:
+ /**
+ * PrimaryID is the user's requested locale string,
+ * canonicalPrimaryID is this string in canonical form,
+ * fallbackID is the current default locale's string in
+ * canonical form.
+ */
+ LocaleKey(const UnicodeString& primaryID,
+ const UnicodeString& canonicalPrimaryID,
+ const UnicodeString* canonicalFallbackID,
+ int32_t kind);
+
+ public:
+ /**
+ * Append the prefix associated with the kind, or nothing if the kind is KIND_ANY.
+ */
+ virtual UnicodeString& prefix(UnicodeString& result) const;
+
+ /**
+ * Return the kind code associated with this key.
+ */
+ virtual int32_t kind() const;
+
+ /**
+ * Return the canonicalID.
+ */
+ virtual UnicodeString& canonicalID(UnicodeString& result) const;
+
+ /**
+ * Return the currentID.
+ */
+ virtual UnicodeString& currentID(UnicodeString& result) const;
+
+ /**
+ * Return the (canonical) current descriptor, or null if no current id.
+ */
+ virtual UnicodeString& currentDescriptor(UnicodeString& result) const;
+
+ /**
+ * Convenience method to return the locale corresponding to the (canonical) original ID.
+ */
+ virtual Locale& canonicalLocale(Locale& result) const;
+
+ /**
+ * Convenience method to return the locale corresponding to the (canonical) current ID.
+ */
+ virtual Locale& currentLocale(Locale& result) const;
+
+ /**
+ * If the key has a fallback, modify the key and return true,
+ * otherwise return false.</p>
+ *
+ * <p>First falls back through the primary ID, then through
+ * the fallbackID. The final fallback is the empty string,
+ * unless the primary id was the empty string, in which case
+ * there is no fallback.
+ */
+ virtual UBool fallback();
+
+ /**
+ * Return true if a key created from id matches, or would eventually
+ * fallback to match, the canonical ID of this key.
+ */
+ virtual UBool isFallbackOf(const UnicodeString& id) const;
+
+ public:
+ /**
+ * UObject boilerplate.
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ virtual UClassID getDynamicClassID() const;
+
+ /**
+ * Destructor.
+ */
+ virtual ~LocaleKey();
+
+#ifdef SERVICE_DEBUG
+ public:
+ virtual UnicodeString& debug(UnicodeString& result) const;
+ virtual UnicodeString& debugClass(UnicodeString& result) const;
+#endif
+
+};
+
+/*
+ ******************************************************************
+ */
+
+/**
+ * A subclass of ICUServiceFactory that uses LocaleKeys, and is able to
+ * 'cover' more specific locales with more general locales that it
+ * supports.
+ *
+ * <p>Coverage may be either of the values VISIBLE or INVISIBLE.
+ *
+ * <p>'Visible' indicates that the specific locale(s) supported by
+ * the factory are registered in getSupportedIDs, 'Invisible'
+ * indicates that they are not.
+ *
+ * <p>Localization of visible ids is handled
+ * by the handling factory, regardless of kind.
+ */
+class U_COMMON_API LocaleKeyFactory : public ICUServiceFactory {
+protected:
+ const UnicodeString _name;
+ const int32_t _coverage;
+
+public:
+ enum {
+ /**
+ * Coverage value indicating that the factory makes
+ * its locales visible, and does not cover more specific
+ * locales.
+ */
+ VISIBLE = 0,
+
+ /**
+ * Coverage value indicating that the factory does not make
+ * its locales visible, and does not cover more specific
+ * locales.
+ */
+ INVISIBLE = 1
+ };
+
+ /**
+ * Destructor.
+ */
+ virtual ~LocaleKeyFactory();
+
+protected:
+ /**
+ * Constructor used by subclasses.
+ */
+ LocaleKeyFactory(int32_t coverage);
+
+ /**
+ * Constructor used by subclasses.
+ */
+ LocaleKeyFactory(int32_t coverage, const UnicodeString& name);
+
+ /**
+ * Implement superclass abstract method. This checks the currentID of
+ * the key against the supported IDs, and passes the canonicalLocale and
+ * kind off to handleCreate (which subclasses must implement).
+ */
+public:
+ virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
+
+protected:
+ virtual UBool handlesKey(const ICUServiceKey& key, UErrorCode& status) const;
+
+public:
+ /**
+ * Override of superclass method. This adjusts the result based
+ * on the coverage rule for this factory.
+ */
+ virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const;
+
+ /**
+ * Return a localized name for the locale represented by id.
+ */
+ virtual UnicodeString& getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const;
+
+protected:
+ /**
+ * Utility method used by create(ICUServiceKey, ICUService). Subclasses can implement
+ * this instead of create. The default returns NULL.
+ */
+ virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* service, UErrorCode& status) const;
+
+ /**
+ * Return true if this id is one the factory supports (visible or
+ * otherwise).
+ */
+ // virtual UBool isSupportedID(const UnicodeString& id, UErrorCode& status) const;
+
+ /**
+ * Return the set of ids that this factory supports (visible or
+ * otherwise). This can be called often and might need to be
+ * cached if it is expensive to create.
+ */
+ virtual const Hashtable* getSupportedIDs(UErrorCode& status) const;
+
+public:
+ /**
+ * UObject boilerplate.
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ virtual UClassID getDynamicClassID() const;
+
+#ifdef SERVICE_DEBUG
+ public:
+ virtual UnicodeString& debug(UnicodeString& result) const;
+ virtual UnicodeString& debugClass(UnicodeString& result) const;
+#endif
+
+};
+
+/*
+ ******************************************************************
+ */
+
+/**
+ * A LocaleKeyFactory that just returns a single object for a kind/locale.
+ */
+
+class U_COMMON_API SimpleLocaleKeyFactory : public LocaleKeyFactory {
+ private:
+ UObject* _obj;
+ UnicodeString _id;
+ const int32_t _kind;
+
+ public:
+ SimpleLocaleKeyFactory(UObject* objToAdopt,
+ const UnicodeString& locale,
+ int32_t kind,
+ int32_t coverage);
+
+ SimpleLocaleKeyFactory(UObject* objToAdopt,
+ const Locale& locale,
+ int32_t kind,
+ int32_t coverage);
+
+ /**
+ * Destructor.
+ */
+ virtual ~SimpleLocaleKeyFactory();
+
+ /**
+ * Override of superclass method. Returns the service object if kind/locale match. Service is not used.
+ */
+ virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
+
+ /**
+ * Override of superclass method. This adjusts the result based
+ * on the coverage rule for this factory.
+ */
+ virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const;
+
+ protected:
+ /**
+ * Return true if this id is equal to the locale name.
+ */
+ //virtual UBool isSupportedID(const UnicodeString& id, UErrorCode& status) const;
+
+
+public:
+ /**
+ * UObject boilerplate.
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ virtual UClassID getDynamicClassID() const;
+
+#ifdef SERVICE_DEBUG
+ public:
+ virtual UnicodeString& debug(UnicodeString& result) const;
+ virtual UnicodeString& debugClass(UnicodeString& result) const;
+#endif
+
+};
+
+/*
+ ******************************************************************
+ */
+
+/**
+ * A LocaleKeyFactory that creates a service based on the ICU locale data.
+ * This is a base class for most ICU factories. Subclasses instantiate it
+ * with a constructor that takes a bundle name, which determines the supported
+ * IDs. Subclasses then override handleCreate to create the actual service
+ * object. The default implementation returns a resource bundle.
+ */
+class U_COMMON_API ICUResourceBundleFactory : public LocaleKeyFactory
+{
+ protected:
+ UnicodeString _bundleName;
+
+ public:
+ /**
+ * Convenience constructor that uses the main ICU bundle name.
+ */
+ ICUResourceBundleFactory();
+
+ /**
+ * A service factory based on ICU resource data in resources with
+ * the given name. This should be a 'path' that can be passed to
+ * ures_openAvailableLocales, such as U_ICUDATA or U_ICUDATA_COLL.
+ * The empty string is equivalent to U_ICUDATA.
+ */
+ ICUResourceBundleFactory(const UnicodeString& bundleName);
+
+ /**
+ * Destructor
+ */
+ virtual ~ICUResourceBundleFactory();
+
+protected:
+ /**
+ * Return the supported IDs. This is the set of all locale names in ICULocaleData.
+ */
+ virtual const Hashtable* getSupportedIDs(UErrorCode& status) const;
+
+ /**
+ * Create the service. The default implementation returns the resource bundle
+ * for the locale, ignoring kind, and service.
+ */
+ virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* service, UErrorCode& status) const;
+
+public:
+ /**
+ * UObject boilerplate.
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+ virtual UClassID getDynamicClassID() const;
+
+
+#ifdef SERVICE_DEBUG
+ public:
+ virtual UnicodeString& debug(UnicodeString& result) const;
+ virtual UnicodeString& debugClass(UnicodeString& result) const;
+#endif
+
+};
+
+/*
+ ******************************************************************
+ */
+
+class U_COMMON_API ICULocaleService : public ICUService
+{
+ private:
+ Locale fallbackLocale;
+ UnicodeString fallbackLocaleName;
+
+ public:
+ /**
+ * Construct an ICULocaleService.
+ */
+ ICULocaleService();
+
+ /**
+ * Construct an ICULocaleService with a name (useful for debugging).
+ */
+ ICULocaleService(const UnicodeString& name);
+
+ /**
+ * Destructor.
+ */
+ virtual ~ICULocaleService();
+
+#if 0
+ // redeclare because of overload resolution rules?
+ // no, causes ambiguities since both UnicodeString and Locale have constructors that take a const char*
+ // need some compiler flag to remove warnings
+ UObject* get(const UnicodeString& descriptor, UErrorCode& status) const {
+ return ICUService::get(descriptor, status);
+ }
+
+ UObject* get(const UnicodeString& descriptor, UnicodeString* actualReturn, UErrorCode& status) const {
+ return ICUService::get(descriptor, actualReturn, status);
+ }
+#endif
+
+ /**
+ * Convenience override for callers using locales. This calls
+ * get(Locale, int, Locale[]) with KIND_ANY for kind and null for
+ * actualReturn.
+ */
+ UObject* get(const Locale& locale, UErrorCode& status) const;
+
+ /**
+ * Convenience override for callers using locales. This calls
+ * get(Locale, int, Locale[]) with a null actualReturn.
+ */
+ UObject* get(const Locale& locale, int32_t kind, UErrorCode& status) const;
+
+ /**
+ * Convenience override for callers using locales. This calls
+ * get(Locale, String, Locale[]) with a null kind.
+ */
+ UObject* get(const Locale& locale, Locale* actualReturn, UErrorCode& status) const;
+
+ /**
+ * Convenience override for callers using locales. This uses
+ * createKey(Locale.toString(), kind) to create a key, calls getKey, and then
+ * if actualReturn is not null, returns the actualResult from
+ * getKey (stripping any prefix) into a Locale.
+ */
+ UObject* get(const Locale& locale, int32_t kind, Locale* actualReturn, UErrorCode& status) const;
+
+ /**
+ * Convenience override for callers using locales. This calls
+ * registerObject(Object, Locale, int32_t kind, int coverage)
+ * passing KIND_ANY for the kind, and VISIBLE for the coverage.
+ */
+ virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, UErrorCode& status);
+
+ /**
+ * Convenience function for callers using locales. This calls
+ * registerObject(Object, Locale, int kind, int coverage)
+ * passing VISIBLE for the coverage.
+ */
+ virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, UErrorCode& status);
+
+ /**
+ * Convenience function for callers using locales. This instantiates
+ * a SimpleLocaleKeyFactory, and registers the factory.
+ */
+ virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, int32_t coverage, UErrorCode& status);
+
+
+ /**
+ * (Stop compiler from complaining about hidden overrides.)
+ * Since both UnicodeString and Locale have constructors that take const char*, adding a public
+ * method that takes UnicodeString causes ambiguity at call sites that use const char*.
+ * We really need a flag that is understood by all compilers that will suppress the warning about
+ * hidden overrides.
+ */
+ virtual URegistryKey registerInstance(UObject* objToAdopt, const UnicodeString& locale, UBool visible, UErrorCode& status);
+
+ /**
+ * Convenience method for callers using locales. This returns the standard
+ * service ID enumeration.
+ */
+ virtual StringEnumeration* getAvailableLocales(void) const;
+
+ protected:
+
+ /**
+ * Return the name of the current fallback locale. If it has changed since this was
+ * last accessed, the service cache is cleared.
+ */
+ const UnicodeString& validateFallbackLocale() const;
+
+ /**
+ * Override superclass createKey method.
+ */
+ virtual ICUServiceKey* createKey(const UnicodeString* id, UErrorCode& status) const;
+
+ /**
+ * Additional createKey that takes a kind.
+ */
+ virtual ICUServiceKey* createKey(const UnicodeString* id, int32_t kind, UErrorCode& status) const;
+
+ friend class ServiceEnumeration;
+};
+
+U_NAMESPACE_END
+
+ /* UCONFIG_NO_SERVICE */
+#endif
+
+ /* ICULSERV_H */
+#endif
+
diff --git a/thirdparty/icu4c/common/servls.cpp b/thirdparty/icu4c/common/servls.cpp
new file mode 100644
index 0000000000..81dc4f750e
--- /dev/null
+++ b/thirdparty/icu4c/common/servls.cpp
@@ -0,0 +1,295 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/**
+ *******************************************************************************
+ * Copyright (C) 2001-2014, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ *
+ *******************************************************************************
+ */
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_SERVICE
+
+#include "unicode/resbund.h"
+#include "uresimp.h"
+#include "cmemory.h"
+#include "servloc.h"
+#include "ustrfmt.h"
+#include "charstr.h"
+#include "uassert.h"
+
+#define UNDERSCORE_CHAR ((UChar)0x005f)
+#define AT_SIGN_CHAR ((UChar)64)
+#define PERIOD_CHAR ((UChar)46)
+
+U_NAMESPACE_BEGIN
+
+ICULocaleService::ICULocaleService()
+ : fallbackLocale(Locale::getDefault())
+{
+}
+
+ICULocaleService::ICULocaleService(const UnicodeString& dname)
+ : ICUService(dname)
+ , fallbackLocale(Locale::getDefault())
+{
+}
+
+ICULocaleService::~ICULocaleService()
+{
+}
+
+UObject*
+ICULocaleService::get(const Locale& locale, UErrorCode& status) const
+{
+ return get(locale, LocaleKey::KIND_ANY, NULL, status);
+}
+
+UObject*
+ICULocaleService::get(const Locale& locale, int32_t kind, UErrorCode& status) const
+{
+ return get(locale, kind, NULL, status);
+}
+
+UObject*
+ICULocaleService::get(const Locale& locale, Locale* actualReturn, UErrorCode& status) const
+{
+ return get(locale, LocaleKey::KIND_ANY, actualReturn, status);
+}
+
+UObject*
+ICULocaleService::get(const Locale& locale, int32_t kind, Locale* actualReturn, UErrorCode& status) const
+{
+ UObject* result = NULL;
+ if (U_FAILURE(status)) {
+ return result;
+ }
+
+ UnicodeString locName(locale.getName(), -1, US_INV);
+ if (locName.isBogus()) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ ICUServiceKey* key = createKey(&locName, kind, status);
+ if (key) {
+ if (actualReturn == NULL) {
+ result = getKey(*key, status);
+ } else {
+ UnicodeString temp;
+ result = getKey(*key, &temp, status);
+
+ if (result != NULL) {
+ key->parseSuffix(temp);
+ LocaleUtility::initLocaleFromName(temp, *actualReturn);
+ }
+ }
+ delete key;
+ }
+ }
+ return result;
+}
+
+
+URegistryKey
+ICULocaleService::registerInstance(UObject* objToAdopt, const UnicodeString& locale,
+ UBool visible, UErrorCode& status)
+{
+ Locale loc;
+ LocaleUtility::initLocaleFromName(locale, loc);
+ return registerInstance(objToAdopt, loc, LocaleKey::KIND_ANY,
+ visible ? LocaleKeyFactory::VISIBLE : LocaleKeyFactory::INVISIBLE, status);
+}
+
+URegistryKey
+ICULocaleService::registerInstance(UObject* objToAdopt, const Locale& locale, UErrorCode& status)
+{
+ return registerInstance(objToAdopt, locale, LocaleKey::KIND_ANY, LocaleKeyFactory::VISIBLE, status);
+}
+
+URegistryKey
+ICULocaleService::registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, UErrorCode& status)
+{
+ return registerInstance(objToAdopt, locale, kind, LocaleKeyFactory::VISIBLE, status);
+}
+
+URegistryKey
+ICULocaleService::registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, int32_t coverage, UErrorCode& status)
+{
+ ICUServiceFactory * factory = new SimpleLocaleKeyFactory(objToAdopt, locale, kind, coverage);
+ if (factory != NULL) {
+ return registerFactory(factory, status);
+ }
+ delete objToAdopt;
+ return NULL;
+}
+
+#if 0
+URegistryKey
+ICULocaleService::registerInstance(UObject* objToAdopt, const UnicodeString& locale, UErrorCode& status)
+{
+ return registerInstance(objToAdopt, locale, LocaleKey::KIND_ANY, LocaleKeyFactory::VISIBLE, status);
+}
+
+URegistryKey
+ICULocaleService::registerInstance(UObject* objToAdopt, const UnicodeString& locale, UBool visible, UErrorCode& status)
+{
+ return registerInstance(objToAdopt, locale, LocaleKey::KIND_ANY,
+ visible ? LocaleKeyFactory::VISIBLE : LocaleKeyFactory::INVISIBLE,
+ status);
+}
+
+URegistryKey
+ICULocaleService::registerInstance(UObject* objToAdopt, const UnicodeString& locale, int32_t kind, int32_t coverage, UErrorCode& status)
+{
+ ICUServiceFactory * factory = new SimpleLocaleKeyFactory(objToAdopt, locale, kind, coverage);
+ if (factory != NULL) {
+ return registerFactory(factory, status);
+ }
+ delete objToAdopt;
+ return NULL;
+}
+#endif
+
+class ServiceEnumeration : public StringEnumeration {
+private:
+ const ICULocaleService* _service;
+ int32_t _timestamp;
+ UVector _ids;
+ int32_t _pos;
+
+private:
+ ServiceEnumeration(const ICULocaleService* service, UErrorCode &status)
+ : _service(service)
+ , _timestamp(service->getTimestamp())
+ , _ids(uprv_deleteUObject, NULL, status)
+ , _pos(0)
+ {
+ _service->getVisibleIDs(_ids, status);
+ }
+
+ ServiceEnumeration(const ServiceEnumeration &other, UErrorCode &status)
+ : _service(other._service)
+ , _timestamp(other._timestamp)
+ , _ids(uprv_deleteUObject, NULL, status)
+ , _pos(0)
+ {
+ if(U_SUCCESS(status)) {
+ int32_t i, length;
+
+ length = other._ids.size();
+ for(i = 0; i < length; ++i) {
+ _ids.addElement(((UnicodeString *)other._ids.elementAt(i))->clone(), status);
+ }
+
+ if(U_SUCCESS(status)) {
+ _pos = other._pos;
+ }
+ }
+ }
+
+public:
+ static ServiceEnumeration* create(const ICULocaleService* service) {
+ UErrorCode status = U_ZERO_ERROR;
+ ServiceEnumeration* result = new ServiceEnumeration(service, status);
+ if (U_SUCCESS(status)) {
+ return result;
+ }
+ delete result;
+ return NULL;
+ }
+
+ virtual ~ServiceEnumeration();
+
+ virtual StringEnumeration *clone() const {
+ UErrorCode status = U_ZERO_ERROR;
+ ServiceEnumeration *cl = new ServiceEnumeration(*this, status);
+ if(U_FAILURE(status)) {
+ delete cl;
+ cl = NULL;
+ }
+ return cl;
+ }
+
+ UBool upToDate(UErrorCode& status) const {
+ if (U_SUCCESS(status)) {
+ if (_timestamp == _service->getTimestamp()) {
+ return TRUE;
+ }
+ status = U_ENUM_OUT_OF_SYNC_ERROR;
+ }
+ return FALSE;
+ }
+
+ virtual int32_t count(UErrorCode& status) const {
+ return upToDate(status) ? _ids.size() : 0;
+ }
+
+ virtual const UnicodeString* snext(UErrorCode& status) {
+ if (upToDate(status) && (_pos < _ids.size())) {
+ return (const UnicodeString*)_ids[_pos++];
+ }
+ return NULL;
+ }
+
+ virtual void reset(UErrorCode& status) {
+ if (status == U_ENUM_OUT_OF_SYNC_ERROR) {
+ status = U_ZERO_ERROR;
+ }
+ if (U_SUCCESS(status)) {
+ _timestamp = _service->getTimestamp();
+ _pos = 0;
+ _service->getVisibleIDs(_ids, status);
+ }
+ }
+
+public:
+ static UClassID U_EXPORT2 getStaticClassID(void);
+ virtual UClassID getDynamicClassID(void) const;
+};
+
+ServiceEnumeration::~ServiceEnumeration() {}
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ServiceEnumeration)
+
+StringEnumeration*
+ICULocaleService::getAvailableLocales(void) const
+{
+ return ServiceEnumeration::create(this);
+}
+
+const UnicodeString&
+ICULocaleService::validateFallbackLocale() const
+{
+ const Locale& loc = Locale::getDefault();
+ ICULocaleService* ncThis = (ICULocaleService*)this;
+ static UMutex llock;
+ {
+ Mutex mutex(&llock);
+ if (loc != fallbackLocale) {
+ ncThis->fallbackLocale = loc;
+ LocaleUtility::initNameFromLocale(loc, ncThis->fallbackLocaleName);
+ ncThis->clearServiceCache();
+ }
+ }
+ return fallbackLocaleName;
+}
+
+ICUServiceKey*
+ICULocaleService::createKey(const UnicodeString* id, UErrorCode& status) const
+{
+ return LocaleKey::createWithCanonicalFallback(id, &validateFallbackLocale(), status);
+}
+
+ICUServiceKey*
+ICULocaleService::createKey(const UnicodeString* id, int32_t kind, UErrorCode& status) const
+{
+ return LocaleKey::createWithCanonicalFallback(id, &validateFallbackLocale(), kind, status);
+}
+
+U_NAMESPACE_END
+
+/* !UCONFIG_NO_SERVICE */
+#endif
+
+
diff --git a/thirdparty/icu4c/common/servnotf.cpp b/thirdparty/icu4c/common/servnotf.cpp
new file mode 100644
index 0000000000..f577795cae
--- /dev/null
+++ b/thirdparty/icu4c/common/servnotf.cpp
@@ -0,0 +1,120 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/**
+ *******************************************************************************
+ * Copyright (C) 2001-2012, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_SERVICE
+
+#include "servnotf.h"
+#ifdef NOTIFIER_DEBUG
+#include <stdio.h>
+#endif
+
+U_NAMESPACE_BEGIN
+
+EventListener::~EventListener() {}
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EventListener)
+
+static UMutex notifyLock;
+
+ICUNotifier::ICUNotifier(void)
+: listeners(NULL)
+{
+}
+
+ICUNotifier::~ICUNotifier(void) {
+ {
+ Mutex lmx(&notifyLock);
+ delete listeners;
+ listeners = NULL;
+ }
+}
+
+
+void
+ICUNotifier::addListener(const EventListener* l, UErrorCode& status)
+{
+ if (U_SUCCESS(status)) {
+ if (l == NULL) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ if (acceptsListener(*l)) {
+ Mutex lmx(&notifyLock);
+ if (listeners == NULL) {
+ listeners = new UVector(5, status);
+ } else {
+ for (int i = 0, e = listeners->size(); i < e; ++i) {
+ const EventListener* el = (const EventListener*)(listeners->elementAt(i));
+ if (l == el) {
+ return;
+ }
+ }
+ }
+
+ listeners->addElement((void*)l, status); // cast away const
+ }
+#ifdef NOTIFIER_DEBUG
+ else {
+ fprintf(stderr, "Listener invalid for this notifier.");
+ exit(1);
+ }
+#endif
+ }
+}
+
+void
+ICUNotifier::removeListener(const EventListener *l, UErrorCode& status)
+{
+ if (U_SUCCESS(status)) {
+ if (l == NULL) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ {
+ Mutex lmx(&notifyLock);
+ if (listeners != NULL) {
+ // identity equality check
+ for (int i = 0, e = listeners->size(); i < e; ++i) {
+ const EventListener* el = (const EventListener*)listeners->elementAt(i);
+ if (l == el) {
+ listeners->removeElementAt(i);
+ if (listeners->size() == 0) {
+ delete listeners;
+ listeners = NULL;
+ }
+ return;
+ }
+ }
+ }
+ }
+ }
+}
+
+void
+ICUNotifier::notifyChanged(void)
+{
+ if (listeners != NULL) {
+ Mutex lmx(&notifyLock);
+ if (listeners != NULL) {
+ for (int i = 0, e = listeners->size(); i < e; ++i) {
+ EventListener* el = (EventListener*)listeners->elementAt(i);
+ notifyListener(*el);
+ }
+ }
+ }
+}
+
+U_NAMESPACE_END
+
+/* UCONFIG_NO_SERVICE */
+#endif
+
diff --git a/thirdparty/icu4c/common/servnotf.h b/thirdparty/icu4c/common/servnotf.h
new file mode 100644
index 0000000000..305570c1e6
--- /dev/null
+++ b/thirdparty/icu4c/common/servnotf.h
@@ -0,0 +1,125 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/**
+ *******************************************************************************
+ * Copyright (C) 2001-2014, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+#ifndef ICUNOTIF_H
+#define ICUNOTIF_H
+
+#include "unicode/utypes.h"
+
+#if UCONFIG_NO_SERVICE
+
+U_NAMESPACE_BEGIN
+
+/*
+ * Allow the declaration of APIs with pointers to BreakIterator
+ * even when break iteration is removed from the build.
+ */
+class ICUNotifier;
+
+U_NAMESPACE_END
+
+#else
+
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+
+#include "mutex.h"
+#include "uvector.h"
+
+U_NAMESPACE_BEGIN
+
+class U_COMMON_API EventListener : public UObject {
+public:
+ virtual ~EventListener();
+
+public:
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ virtual UClassID getDynamicClassID() const;
+
+public:
+#ifdef SERVICE_DEBUG
+ virtual UnicodeString& debug(UnicodeString& result) const {
+ return debugClass(result);
+ }
+
+ virtual UnicodeString& debugClass(UnicodeString& result) const {
+ return result.append((UnicodeString)"Key");
+ }
+#endif
+};
+
+/**
+ * <p>Abstract implementation of a notification facility. Clients add
+ * EventListeners with addListener and remove them with removeListener.
+ * Notifiers call notifyChanged when they wish to notify listeners.
+ * This queues the listener list on the notification thread, which
+ * eventually dequeues the list and calls notifyListener on each
+ * listener in the list.</p>
+ *
+ * <p>Subclasses override acceptsListener and notifyListener
+ * to add type-safe notification. AcceptsListener should return
+ * true if the listener is of the appropriate type; ICUNotifier
+ * itself will ensure the listener is non-null and that the
+ * identical listener is not already registered with the Notifier.
+ * NotifyListener should cast the listener to the appropriate
+ * type and call the appropriate method on the listener.
+ */
+
+class U_COMMON_API ICUNotifier : public UMemory {
+private: UVector* listeners;
+
+public:
+ ICUNotifier(void);
+
+ virtual ~ICUNotifier(void);
+
+ /**
+ * Add a listener to be notified when notifyChanged is called.
+ * The listener must not be null. AcceptsListener must return
+ * true for the listener. Attempts to concurrently
+ * register the identical listener more than once will be
+ * silently ignored.
+ */
+ virtual void addListener(const EventListener* l, UErrorCode& status);
+
+ /**
+ * Stop notifying this listener. The listener must
+ * not be null. Attemps to remove a listener that is
+ * not registered will be silently ignored.
+ */
+ virtual void removeListener(const EventListener* l, UErrorCode& status);
+
+ /**
+ * ICU doesn't spawn its own threads. All listeners are notified in
+ * the thread of the caller. Misbehaved listeners can therefore
+ * indefinitely block the calling thread. Callers should beware of
+ * deadlock situations.
+ */
+ virtual void notifyChanged(void);
+
+protected:
+ /**
+ * Subclasses implement this to return true if the listener is
+ * of the appropriate type.
+ */
+ virtual UBool acceptsListener(const EventListener& l) const = 0;
+
+ /**
+ * Subclasses implement this to notify the listener.
+ */
+ virtual void notifyListener(EventListener& l) const = 0;
+};
+
+U_NAMESPACE_END
+
+/* UCONFIG_NO_SERVICE */
+#endif
+
+/* ICUNOTIF_H */
+#endif
diff --git a/thirdparty/icu4c/common/servrbf.cpp b/thirdparty/icu4c/common/servrbf.cpp
new file mode 100644
index 0000000000..94279ab3a1
--- /dev/null
+++ b/thirdparty/icu4c/common/servrbf.cpp
@@ -0,0 +1,96 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/**
+ *******************************************************************************
+ * Copyright (C) 2001-2014, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ *
+ *******************************************************************************
+ */
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_SERVICE
+
+#include "unicode/resbund.h"
+#include "uresimp.h"
+#include "cmemory.h"
+#include "servloc.h"
+#include "ustrfmt.h"
+#include "uhash.h"
+#include "charstr.h"
+#include "ucln_cmn.h"
+#include "uassert.h"
+
+#define UNDERSCORE_CHAR ((UChar)0x005f)
+#define AT_SIGN_CHAR ((UChar)64)
+#define PERIOD_CHAR ((UChar)46)
+
+U_NAMESPACE_BEGIN
+
+ICUResourceBundleFactory::ICUResourceBundleFactory()
+ : LocaleKeyFactory(VISIBLE)
+ , _bundleName()
+{
+}
+
+ICUResourceBundleFactory::ICUResourceBundleFactory(const UnicodeString& bundleName)
+ : LocaleKeyFactory(VISIBLE)
+ , _bundleName(bundleName)
+{
+}
+
+ICUResourceBundleFactory::~ICUResourceBundleFactory() {}
+
+const Hashtable*
+ICUResourceBundleFactory::getSupportedIDs(UErrorCode& status) const
+{
+ if (U_SUCCESS(status)) {
+ return LocaleUtility::getAvailableLocaleNames(_bundleName);
+ }
+ return NULL;
+}
+
+UObject*
+ICUResourceBundleFactory::handleCreate(const Locale& loc, int32_t /* kind */, const ICUService* /* service */, UErrorCode& status) const
+{
+ if (U_SUCCESS(status)) {
+ // _bundleName is a package name
+ // and should only contain invariant characters
+ // ??? is it always true that the max length of the bundle name is 19?
+ // who made this change? -- dlf
+ char pkg[20];
+ int32_t length;
+ length=_bundleName.extract(0, INT32_MAX, pkg, (int32_t)sizeof(pkg), US_INV);
+ if(length>=(int32_t)sizeof(pkg)) {
+ return NULL;
+ }
+ return new ResourceBundle(pkg, loc, status);
+ }
+ return NULL;
+}
+
+#ifdef SERVICE_DEBUG
+UnicodeString&
+ICUResourceBundleFactory::debug(UnicodeString& result) const
+{
+ LocaleKeyFactory::debug(result);
+ result.append((UnicodeString)", bundle: ");
+ return result.append(_bundleName);
+}
+
+UnicodeString&
+ICUResourceBundleFactory::debugClass(UnicodeString& result) const
+{
+ return result.append((UnicodeString)"ICUResourceBundleFactory");
+}
+#endif
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ICUResourceBundleFactory)
+
+U_NAMESPACE_END
+
+/* !UCONFIG_NO_SERVICE */
+#endif
+
+
diff --git a/thirdparty/icu4c/common/servslkf.cpp b/thirdparty/icu4c/common/servslkf.cpp
new file mode 100644
index 0000000000..09154d1b91
--- /dev/null
+++ b/thirdparty/icu4c/common/servslkf.cpp
@@ -0,0 +1,123 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/**
+ *******************************************************************************
+ * Copyright (C) 2001-2014, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ *
+ *******************************************************************************
+ */
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_SERVICE
+
+#include "unicode/resbund.h"
+#include "uresimp.h"
+#include "cmemory.h"
+#include "servloc.h"
+#include "ustrfmt.h"
+#include "uhash.h"
+#include "charstr.h"
+#include "uassert.h"
+
+#define UNDERSCORE_CHAR ((UChar)0x005f)
+#define AT_SIGN_CHAR ((UChar)64)
+#define PERIOD_CHAR ((UChar)46)
+
+U_NAMESPACE_BEGIN
+
+/*
+ ******************************************************************
+ */
+
+SimpleLocaleKeyFactory::SimpleLocaleKeyFactory(UObject* objToAdopt,
+ const UnicodeString& locale,
+ int32_t kind,
+ int32_t coverage)
+ : LocaleKeyFactory(coverage)
+ , _obj(objToAdopt)
+ , _id(locale)
+ , _kind(kind)
+{
+}
+
+SimpleLocaleKeyFactory::SimpleLocaleKeyFactory(UObject* objToAdopt,
+ const Locale& locale,
+ int32_t kind,
+ int32_t coverage)
+ : LocaleKeyFactory(coverage)
+ , _obj(objToAdopt)
+ , _id()
+ , _kind(kind)
+{
+ LocaleUtility::initNameFromLocale(locale, _id);
+}
+
+SimpleLocaleKeyFactory::~SimpleLocaleKeyFactory()
+{
+ delete _obj;
+ _obj = NULL;
+}
+
+UObject*
+SimpleLocaleKeyFactory::create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const
+{
+ if (U_SUCCESS(status)) {
+ const LocaleKey& lkey = (const LocaleKey&)key;
+ if (_kind == LocaleKey::KIND_ANY || _kind == lkey.kind()) {
+ UnicodeString keyID;
+ lkey.currentID(keyID);
+ if (_id == keyID) {
+ return service->cloneInstance(_obj);
+ }
+ }
+ }
+ return NULL;
+}
+
+//UBool
+//SimpleLocaleKeyFactory::isSupportedID(const UnicodeString& id, UErrorCode& /* status */) const
+//{
+// return id == _id;
+//}
+
+void
+SimpleLocaleKeyFactory::updateVisibleIDs(Hashtable& result, UErrorCode& status) const
+{
+ if (U_SUCCESS(status)) {
+ if (_coverage & 0x1) {
+ result.remove(_id);
+ } else {
+ result.put(_id, (void*)this, status);
+ }
+ }
+}
+
+#ifdef SERVICE_DEBUG
+UnicodeString&
+SimpleLocaleKeyFactory::debug(UnicodeString& result) const
+{
+ LocaleKeyFactory::debug(result);
+ result.append((UnicodeString)", id: ");
+ result.append(_id);
+ result.append((UnicodeString)", kind: ");
+ result.append(_kind);
+ return result;
+}
+
+UnicodeString&
+SimpleLocaleKeyFactory::debugClass(UnicodeString& result) const
+{
+ return result.append((UnicodeString)"SimpleLocaleKeyFactory");
+}
+#endif
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleLocaleKeyFactory)
+
+U_NAMESPACE_END
+
+/* !UCONFIG_NO_SERVICE */
+#endif
+
+
diff --git a/thirdparty/icu4c/common/sharedobject.cpp b/thirdparty/icu4c/common/sharedobject.cpp
new file mode 100644
index 0000000000..6eeca8605f
--- /dev/null
+++ b/thirdparty/icu4c/common/sharedobject.cpp
@@ -0,0 +1,62 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+******************************************************************************
+* sharedobject.cpp
+*/
+#include "sharedobject.h"
+#include "mutex.h"
+#include "uassert.h"
+#include "umutex.h"
+#include "unifiedcache.h"
+
+U_NAMESPACE_BEGIN
+
+SharedObject::~SharedObject() {}
+
+UnifiedCacheBase::~UnifiedCacheBase() {}
+
+void
+SharedObject::addRef() const {
+ umtx_atomic_inc(&hardRefCount);
+}
+
+// removeRef Decrement the reference count and delete if it is zero.
+// Note that SharedObjects with a non-null cachePtr are owned by the
+// unified cache, and the cache will be responsible for the actual deletion.
+// The deletion could be as soon as immediately following the
+// update to the reference count, if another thread is running
+// a cache eviction cycle concurrently.
+// NO ACCESS TO *this PERMITTED AFTER REFERENCE COUNT == 0 for cached objects.
+// THE OBJECT MAY ALREADY BE GONE.
+void
+SharedObject::removeRef() const {
+ const UnifiedCacheBase *cache = this->cachePtr;
+ int32_t updatedRefCount = umtx_atomic_dec(&hardRefCount);
+ U_ASSERT(updatedRefCount >= 0);
+ if (updatedRefCount == 0) {
+ if (cache) {
+ cache->handleUnreferencedObject();
+ } else {
+ delete this;
+ }
+ }
+}
+
+
+int32_t
+SharedObject::getRefCount() const {
+ return umtx_loadAcquire(hardRefCount);
+}
+
+void
+SharedObject::deleteIfZeroRefCount() const {
+ if (this->cachePtr == nullptr && getRefCount() == 0) {
+ delete this;
+ }
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/sharedobject.h b/thirdparty/icu4c/common/sharedobject.h
new file mode 100644
index 0000000000..6ccfb27b01
--- /dev/null
+++ b/thirdparty/icu4c/common/sharedobject.h
@@ -0,0 +1,184 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 2015-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+******************************************************************************
+* sharedobject.h
+*/
+
+#ifndef __SHAREDOBJECT_H__
+#define __SHAREDOBJECT_H__
+
+
+#include "unicode/uobject.h"
+#include "umutex.h"
+
+U_NAMESPACE_BEGIN
+
+class SharedObject;
+
+/**
+ * Base class for unified cache exposing enough methods to SharedObject
+ * instances to allow their addRef() and removeRef() methods to
+ * update cache metrics. No other part of ICU, except for SharedObject,
+ * should directly call the methods of this base class.
+ */
+class U_COMMON_API UnifiedCacheBase : public UObject {
+public:
+ UnifiedCacheBase() { }
+
+ /**
+ * Notify the cache implementation that an object was seen transitioning to
+ * zero hard references. The cache may use this to keep track the number of
+ * unreferenced SharedObjects, and to trigger evictions.
+ */
+ virtual void handleUnreferencedObject() const = 0;
+
+ virtual ~UnifiedCacheBase();
+private:
+ UnifiedCacheBase(const UnifiedCacheBase &);
+ UnifiedCacheBase &operator=(const UnifiedCacheBase &);
+};
+
+/**
+ * Base class for shared, reference-counted, auto-deleted objects.
+ * Subclasses can be immutable.
+ * If they are mutable, then they must implement their copy constructor
+ * so that copyOnWrite() works.
+ *
+ * Either stack-allocate, use LocalPointer, or use addRef()/removeRef().
+ * Sharing requires reference-counting.
+ */
+class U_COMMON_API SharedObject : public UObject {
+public:
+ /** Initializes totalRefCount, softRefCount to 0. */
+ SharedObject() :
+ softRefCount(0),
+ hardRefCount(0),
+ cachePtr(NULL) {}
+
+ /** Initializes totalRefCount, softRefCount to 0. */
+ SharedObject(const SharedObject &other) :
+ UObject(other),
+ softRefCount(0),
+ hardRefCount(0),
+ cachePtr(NULL) {}
+
+ virtual ~SharedObject();
+
+ /**
+ * Increments the number of hard references to this object. Thread-safe.
+ * Not for use from within the Unified Cache implementation.
+ */
+ void addRef() const;
+
+ /**
+ * Decrements the number of hard references to this object, and
+ * arrange for possible cache-eviction and/or deletion if ref
+ * count goes to zero. Thread-safe.
+ *
+ * Not for use from within the UnifiedCache implementation.
+ */
+ void removeRef() const;
+
+ /**
+ * Returns the number of hard references for this object.
+ * Uses a memory barrier.
+ */
+ int32_t getRefCount() const;
+
+ /**
+ * If noHardReferences() == true then this object has no hard references.
+ * Must be called only from within the internals of UnifiedCache.
+ */
+ inline UBool noHardReferences() const { return getRefCount() == 0; }
+
+ /**
+ * If hasHardReferences() == true then this object has hard references.
+ * Must be called only from within the internals of UnifiedCache.
+ */
+ inline UBool hasHardReferences() const { return getRefCount() != 0; }
+
+ /**
+ * Deletes this object if it has no references.
+ * Available for non-cached SharedObjects only. Ownership of cached objects
+ * is with the UnifiedCache, which is solely responsible for eviction and deletion.
+ */
+ void deleteIfZeroRefCount() const;
+
+
+ /**
+ * Returns a writable version of ptr.
+ * If there is exactly one owner, then ptr itself is returned as a
+ * non-const pointer.
+ * If there are multiple owners, then ptr is replaced with a
+ * copy-constructed clone,
+ * and that is returned.
+ * Returns NULL if cloning failed.
+ *
+ * T must be a subclass of SharedObject.
+ */
+ template<typename T>
+ static T *copyOnWrite(const T *&ptr) {
+ const T *p = ptr;
+ if(p->getRefCount() <= 1) { return const_cast<T *>(p); }
+ T *p2 = new T(*p);
+ if(p2 == NULL) { return NULL; }
+ p->removeRef();
+ ptr = p2;
+ p2->addRef();
+ return p2;
+ }
+
+ /**
+ * Makes dest an owner of the object pointed to by src while adjusting
+ * reference counts and deleting the previous object dest pointed to
+ * if necessary. Before this call is made, dest must either be NULL or
+ * be included in the reference count of the object it points to.
+ *
+ * T must be a subclass of SharedObject.
+ */
+ template<typename T>
+ static void copyPtr(const T *src, const T *&dest) {
+ if(src != dest) {
+ if(dest != NULL) { dest->removeRef(); }
+ dest = src;
+ if(src != NULL) { src->addRef(); }
+ }
+ }
+
+ /**
+ * Equivalent to copyPtr(NULL, dest).
+ */
+ template<typename T>
+ static void clearPtr(const T *&ptr) {
+ if (ptr != NULL) {
+ ptr->removeRef();
+ ptr = NULL;
+ }
+ }
+
+private:
+ /**
+ * The number of references from the UnifiedCache, which is
+ * the number of times that the sharedObject is stored as a hash table value.
+ * For use by UnifiedCache implementation code only.
+ * All access is synchronized by UnifiedCache's gCacheMutex
+ */
+ mutable int32_t softRefCount;
+ friend class UnifiedCache;
+
+ /**
+ * Reference count, excluding references from within the UnifiedCache implementation.
+ */
+ mutable u_atomic_int32_t hardRefCount;
+
+ mutable const UnifiedCacheBase *cachePtr;
+
+};
+
+U_NAMESPACE_END
+
+#endif
diff --git a/thirdparty/icu4c/common/simpleformatter.cpp b/thirdparty/icu4c/common/simpleformatter.cpp
new file mode 100644
index 0000000000..f7f7aead61
--- /dev/null
+++ b/thirdparty/icu4c/common/simpleformatter.cpp
@@ -0,0 +1,325 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 2014-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+******************************************************************************
+* simpleformatter.cpp
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/simpleformatter.h"
+#include "unicode/unistr.h"
+#include "uassert.h"
+
+U_NAMESPACE_BEGIN
+
+namespace {
+
+/**
+ * Argument numbers must be smaller than this limit.
+ * Text segment lengths are offset by this much.
+ * This is currently the only unused char value in compiled patterns,
+ * except it is the maximum value of the first unit (max arg +1).
+ */
+const int32_t ARG_NUM_LIMIT = 0x100;
+/**
+ * Initial and maximum char/UChar value set for a text segment.
+ * Segment length char values are from ARG_NUM_LIMIT+1 to this value here.
+ * Normally 0xffff, but can be as small as ARG_NUM_LIMIT+1 for testing.
+ */
+const UChar SEGMENT_LENGTH_PLACEHOLDER_CHAR = 0xffff;
+/**
+ * Maximum length of a text segment. Longer segments are split into shorter ones.
+ */
+const int32_t MAX_SEGMENT_LENGTH = SEGMENT_LENGTH_PLACEHOLDER_CHAR - ARG_NUM_LIMIT;
+
+enum {
+ APOS = 0x27,
+ DIGIT_ZERO = 0x30,
+ DIGIT_ONE = 0x31,
+ DIGIT_NINE = 0x39,
+ OPEN_BRACE = 0x7b,
+ CLOSE_BRACE = 0x7d
+};
+
+inline UBool isInvalidArray(const void *array, int32_t length) {
+ return (length < 0 || (array == NULL && length != 0));
+}
+
+} // namespace
+
+SimpleFormatter &SimpleFormatter::operator=(const SimpleFormatter& other) {
+ if (this == &other) {
+ return *this;
+ }
+ compiledPattern = other.compiledPattern;
+ return *this;
+}
+
+SimpleFormatter::~SimpleFormatter() {}
+
+UBool SimpleFormatter::applyPatternMinMaxArguments(
+ const UnicodeString &pattern,
+ int32_t min, int32_t max,
+ UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) {
+ return FALSE;
+ }
+ // Parse consistent with MessagePattern, but
+ // - support only simple numbered arguments
+ // - build a simple binary structure into the result string
+ const UChar *patternBuffer = pattern.getBuffer();
+ int32_t patternLength = pattern.length();
+ // Reserve the first char for the number of arguments.
+ compiledPattern.setTo((UChar)0);
+ int32_t textLength = 0;
+ int32_t maxArg = -1;
+ UBool inQuote = FALSE;
+ for (int32_t i = 0; i < patternLength;) {
+ UChar c = patternBuffer[i++];
+ if (c == APOS) {
+ if (i < patternLength && (c = patternBuffer[i]) == APOS) {
+ // double apostrophe, skip the second one
+ ++i;
+ } else if (inQuote) {
+ // skip the quote-ending apostrophe
+ inQuote = FALSE;
+ continue;
+ } else if (c == OPEN_BRACE || c == CLOSE_BRACE) {
+ // Skip the quote-starting apostrophe, find the end of the quoted literal text.
+ ++i;
+ inQuote = TRUE;
+ } else {
+ // The apostrophe is part of literal text.
+ c = APOS;
+ }
+ } else if (!inQuote && c == OPEN_BRACE) {
+ if (textLength > 0) {
+ compiledPattern.setCharAt(compiledPattern.length() - textLength - 1,
+ (UChar)(ARG_NUM_LIMIT + textLength));
+ textLength = 0;
+ }
+ int32_t argNumber;
+ if ((i + 1) < patternLength &&
+ 0 <= (argNumber = patternBuffer[i] - DIGIT_ZERO) && argNumber <= 9 &&
+ patternBuffer[i + 1] == CLOSE_BRACE) {
+ i += 2;
+ } else {
+ // Multi-digit argument number (no leading zero) or syntax error.
+ // MessagePattern permits PatternProps.skipWhiteSpace(pattern, index)
+ // around the number, but this class does not.
+ argNumber = -1;
+ if (i < patternLength && DIGIT_ONE <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) {
+ argNumber = c - DIGIT_ZERO;
+ while (i < patternLength &&
+ DIGIT_ZERO <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) {
+ argNumber = argNumber * 10 + (c - DIGIT_ZERO);
+ if (argNumber >= ARG_NUM_LIMIT) {
+ break;
+ }
+ }
+ }
+ if (argNumber < 0 || c != CLOSE_BRACE) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return FALSE;
+ }
+ }
+ if (argNumber > maxArg) {
+ maxArg = argNumber;
+ }
+ compiledPattern.append((UChar)argNumber);
+ continue;
+ } // else: c is part of literal text
+ // Append c and track the literal-text segment length.
+ if (textLength == 0) {
+ // Reserve a char for the length of a new text segment, preset the maximum length.
+ compiledPattern.append(SEGMENT_LENGTH_PLACEHOLDER_CHAR);
+ }
+ compiledPattern.append(c);
+ if (++textLength == MAX_SEGMENT_LENGTH) {
+ textLength = 0;
+ }
+ }
+ if (textLength > 0) {
+ compiledPattern.setCharAt(compiledPattern.length() - textLength - 1,
+ (UChar)(ARG_NUM_LIMIT + textLength));
+ }
+ int32_t argCount = maxArg + 1;
+ if (argCount < min || max < argCount) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return FALSE;
+ }
+ compiledPattern.setCharAt(0, (UChar)argCount);
+ return TRUE;
+}
+
+UnicodeString& SimpleFormatter::format(
+ const UnicodeString &value0,
+ UnicodeString &appendTo, UErrorCode &errorCode) const {
+ const UnicodeString *values[] = { &value0 };
+ return formatAndAppend(values, 1, appendTo, NULL, 0, errorCode);
+}
+
+UnicodeString& SimpleFormatter::format(
+ const UnicodeString &value0,
+ const UnicodeString &value1,
+ UnicodeString &appendTo, UErrorCode &errorCode) const {
+ const UnicodeString *values[] = { &value0, &value1 };
+ return formatAndAppend(values, 2, appendTo, NULL, 0, errorCode);
+}
+
+UnicodeString& SimpleFormatter::format(
+ const UnicodeString &value0,
+ const UnicodeString &value1,
+ const UnicodeString &value2,
+ UnicodeString &appendTo, UErrorCode &errorCode) const {
+ const UnicodeString *values[] = { &value0, &value1, &value2 };
+ return formatAndAppend(values, 3, appendTo, NULL, 0, errorCode);
+}
+
+UnicodeString& SimpleFormatter::formatAndAppend(
+ const UnicodeString *const *values, int32_t valuesLength,
+ UnicodeString &appendTo,
+ int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode)) {
+ return appendTo;
+ }
+ if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength) ||
+ valuesLength < getArgumentLimit()) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return appendTo;
+ }
+ return format(compiledPattern.getBuffer(), compiledPattern.length(), values,
+ appendTo, NULL, TRUE,
+ offsets, offsetsLength, errorCode);
+}
+
+UnicodeString &SimpleFormatter::formatAndReplace(
+ const UnicodeString *const *values, int32_t valuesLength,
+ UnicodeString &result,
+ int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode)) {
+ return result;
+ }
+ if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength)) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return result;
+ }
+ const UChar *cp = compiledPattern.getBuffer();
+ int32_t cpLength = compiledPattern.length();
+ if (valuesLength < getArgumentLimit(cp, cpLength)) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return result;
+ }
+
+ // If the pattern starts with an argument whose value is the same object
+ // as the result, then we keep the result contents and append to it.
+ // Otherwise we replace its contents.
+ int32_t firstArg = -1;
+ // If any non-initial argument value is the same object as the result,
+ // then we first copy its contents and use that instead while formatting.
+ UnicodeString resultCopy;
+ if (getArgumentLimit(cp, cpLength) > 0) {
+ for (int32_t i = 1; i < cpLength;) {
+ int32_t n = cp[i++];
+ if (n < ARG_NUM_LIMIT) {
+ if (values[n] == &result) {
+ if (i == 2) {
+ firstArg = n;
+ } else if (resultCopy.isEmpty() && !result.isEmpty()) {
+ resultCopy = result;
+ }
+ }
+ } else {
+ i += n - ARG_NUM_LIMIT;
+ }
+ }
+ }
+ if (firstArg < 0) {
+ result.remove();
+ }
+ return format(cp, cpLength, values,
+ result, &resultCopy, FALSE,
+ offsets, offsetsLength, errorCode);
+}
+
+UnicodeString SimpleFormatter::getTextWithNoArguments(
+ const UChar *compiledPattern,
+ int32_t compiledPatternLength,
+ int32_t* offsets,
+ int32_t offsetsLength) {
+ for (int32_t i = 0; i < offsetsLength; i++) {
+ offsets[i] = -1;
+ }
+ int32_t capacity = compiledPatternLength - 1 -
+ getArgumentLimit(compiledPattern, compiledPatternLength);
+ UnicodeString sb(capacity, 0, 0); // Java: StringBuilder
+ for (int32_t i = 1; i < compiledPatternLength;) {
+ int32_t n = compiledPattern[i++];
+ if (n > ARG_NUM_LIMIT) {
+ n -= ARG_NUM_LIMIT;
+ sb.append(compiledPattern + i, n);
+ i += n;
+ } else if (n < offsetsLength) {
+ // TODO(ICU-20406): This does not distinguish between "{0}{1}" and "{1}{0}".
+ // Consider removing this function and replacing it with an iterator interface.
+ offsets[n] = sb.length();
+ }
+ }
+ return sb;
+}
+
+UnicodeString &SimpleFormatter::format(
+ const UChar *compiledPattern, int32_t compiledPatternLength,
+ const UnicodeString *const *values,
+ UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
+ int32_t *offsets, int32_t offsetsLength,
+ UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) {
+ return result;
+ }
+ for (int32_t i = 0; i < offsetsLength; i++) {
+ offsets[i] = -1;
+ }
+ for (int32_t i = 1; i < compiledPatternLength;) {
+ int32_t n = compiledPattern[i++];
+ if (n < ARG_NUM_LIMIT) {
+ const UnicodeString *value = values[n];
+ if (value == NULL) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return result;
+ }
+ if (value == &result) {
+ if (forbidResultAsValue) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return result;
+ }
+ if (i == 2) {
+ // We are appending to result which is also the first value object.
+ if (n < offsetsLength) {
+ offsets[n] = 0;
+ }
+ } else {
+ if (n < offsetsLength) {
+ offsets[n] = result.length();
+ }
+ result.append(*resultCopy);
+ }
+ } else {
+ if (n < offsetsLength) {
+ offsets[n] = result.length();
+ }
+ result.append(*value);
+ }
+ } else {
+ int32_t length = n - ARG_NUM_LIMIT;
+ result.append(compiledPattern + i, length);
+ i += length;
+ }
+ }
+ return result;
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/sprpimpl.h b/thirdparty/icu4c/common/sprpimpl.h
new file mode 100644
index 0000000000..ca0bcdb516
--- /dev/null
+++ b/thirdparty/icu4c/common/sprpimpl.h
@@ -0,0 +1,130 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ *******************************************************************************
+ *
+ * Copyright (C) 2003-2006, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ *
+ *******************************************************************************
+ * file name: sprpimpl.h
+ * encoding: UTF-8
+ * tab size: 8 (not used)
+ * indentation:4
+ *
+ * created on: 2003feb1
+ * created by: Ram Viswanadha
+ */
+
+#ifndef SPRPIMPL_H
+#define SPRPIMPL_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_IDNA
+
+#include "unicode/ustring.h"
+#include "unicode/parseerr.h"
+#include "unicode/usprep.h"
+#include "unicode/udata.h"
+#include "utrie.h"
+#include "udataswp.h"
+#include "ubidi_props.h"
+
+#define _SPREP_DATA_TYPE "spp"
+
+enum UStringPrepType{
+ USPREP_UNASSIGNED = 0x0000 ,
+ USPREP_MAP = 0x0001 ,
+ USPREP_PROHIBITED = 0x0002 ,
+ USPREP_DELETE = 0x0003 ,
+ USPREP_TYPE_LIMIT = 0x0004
+};
+
+typedef enum UStringPrepType UStringPrepType;
+
+#ifdef USPREP_TYPE_NAMES_ARRAY
+static const char* usprepTypeNames[] ={
+ "UNASSIGNED" ,
+ "MAP" ,
+ "PROHIBITED" ,
+ "DELETE",
+ "TYPE_LIMIT"
+};
+#endif
+
+enum{
+ _SPREP_NORMALIZATION_ON = 0x0001,
+ _SPREP_CHECK_BIDI_ON = 0x0002
+};
+
+enum{
+ _SPREP_TYPE_THRESHOLD = 0xFFF0,
+ _SPREP_MAX_INDEX_VALUE = 0x3FBF, /*16139*/
+ _SPREP_MAX_INDEX_TOP_LENGTH = 0x0003
+};
+
+/* indexes[] value names */
+enum {
+ _SPREP_INDEX_TRIE_SIZE = 0, /* number of bytes in StringPrep trie */
+ _SPREP_INDEX_MAPPING_DATA_SIZE = 1, /* The array that contains the mapping */
+ _SPREP_NORM_CORRECTNS_LAST_UNI_VERSION = 2, /* The index of Unicode version of last entry in NormalizationCorrections.txt */
+ _SPREP_ONE_UCHAR_MAPPING_INDEX_START = 3, /* The starting index of 1 UChar mapping index in the mapping data array */
+ _SPREP_TWO_UCHARS_MAPPING_INDEX_START = 4, /* The starting index of 2 UChars mapping index in the mapping data array */
+ _SPREP_THREE_UCHARS_MAPPING_INDEX_START = 5, /* The starting index of 3 UChars mapping index in the mapping data array */
+ _SPREP_FOUR_UCHARS_MAPPING_INDEX_START = 6, /* The starting index of 4 UChars mapping index in the mapping data array */
+ _SPREP_OPTIONS = 7, /* Bit set of options to turn on in the profile */
+ _SPREP_INDEX_TOP=16 /* changing this requires a new formatVersion */
+};
+
+typedef struct UStringPrepKey UStringPrepKey;
+
+
+struct UStringPrepKey{
+ char* name;
+ char* path;
+};
+
+struct UStringPrepProfile{
+ int32_t indexes[_SPREP_INDEX_TOP];
+ UTrie sprepTrie;
+ const uint16_t* mappingData;
+ UDataMemory* sprepData;
+ int32_t refCount;
+ UBool isDataLoaded;
+ UBool doNFKC;
+ UBool checkBiDi;
+};
+
+/**
+ * Helper function for populating the UParseError struct
+ * @internal
+ */
+U_CAPI void U_EXPORT2
+uprv_syntaxError(const UChar* rules,
+ int32_t pos,
+ int32_t rulesLen,
+ UParseError* parseError);
+
+
+/**
+ * Swap StringPrep .spp profile data. See udataswp.h.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+usprep_swap(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+#endif /* #if !UCONFIG_NO_IDNA */
+
+#endif
+
+/*
+ * Hey, Emacs, please set the following:
+ *
+ * Local Variables:
+ * indent-tabs-mode: nil
+ * End:
+ *
+ */
diff --git a/thirdparty/icu4c/common/static_unicode_sets.cpp b/thirdparty/icu4c/common/static_unicode_sets.cpp
new file mode 100644
index 0000000000..5dab3931a7
--- /dev/null
+++ b/thirdparty/icu4c/common/static_unicode_sets.cpp
@@ -0,0 +1,245 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+// Allow implicit conversion from char16_t* to UnicodeString for this file:
+// Helpful in toString methods and elsewhere.
+#define UNISTR_FROM_STRING_EXPLICIT
+
+#include "static_unicode_sets.h"
+#include "umutex.h"
+#include "ucln_cmn.h"
+#include "unicode/uniset.h"
+#include "uresimp.h"
+#include "cstring.h"
+#include "uassert.h"
+
+using namespace icu;
+using namespace icu::unisets;
+
+
+namespace {
+
+UnicodeSet* gUnicodeSets[UNISETS_KEY_COUNT] = {};
+
+// Save the empty instance in static memory to have well-defined behavior if a
+// regular UnicodeSet cannot be allocated.
+alignas(UnicodeSet)
+char gEmptyUnicodeSet[sizeof(UnicodeSet)];
+
+// Whether the gEmptyUnicodeSet is initialized and ready to use.
+UBool gEmptyUnicodeSetInitialized = FALSE;
+
+inline UnicodeSet* getImpl(Key key) {
+ UnicodeSet* candidate = gUnicodeSets[key];
+ if (candidate == nullptr) {
+ return reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet);
+ }
+ return candidate;
+}
+
+UnicodeSet* computeUnion(Key k1, Key k2) {
+ UnicodeSet* result = new UnicodeSet();
+ if (result == nullptr) {
+ return nullptr;
+ }
+ result->addAll(*getImpl(k1));
+ result->addAll(*getImpl(k2));
+ result->freeze();
+ return result;
+}
+
+UnicodeSet* computeUnion(Key k1, Key k2, Key k3) {
+ UnicodeSet* result = new UnicodeSet();
+ if (result == nullptr) {
+ return nullptr;
+ }
+ result->addAll(*getImpl(k1));
+ result->addAll(*getImpl(k2));
+ result->addAll(*getImpl(k3));
+ result->freeze();
+ return result;
+}
+
+
+void saveSet(Key key, const UnicodeString& unicodeSetPattern, UErrorCode& status) {
+ // assert unicodeSets.get(key) == null;
+ gUnicodeSets[key] = new UnicodeSet(unicodeSetPattern, status);
+}
+
+class ParseDataSink : public ResourceSink {
+ public:
+ void put(const char* key, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) U_OVERRIDE {
+ ResourceTable contextsTable = value.getTable(status);
+ if (U_FAILURE(status)) { return; }
+ for (int i = 0; contextsTable.getKeyAndValue(i, key, value); i++) {
+ if (uprv_strcmp(key, "date") == 0) {
+ // ignore
+ } else {
+ ResourceTable strictnessTable = value.getTable(status);
+ if (U_FAILURE(status)) { return; }
+ for (int j = 0; strictnessTable.getKeyAndValue(j, key, value); j++) {
+ bool isLenient = (uprv_strcmp(key, "lenient") == 0);
+ ResourceArray array = value.getArray(status);
+ if (U_FAILURE(status)) { return; }
+ for (int k = 0; k < array.getSize(); k++) {
+ array.getValue(k, value);
+ UnicodeString str = value.getUnicodeString(status);
+ if (U_FAILURE(status)) { return; }
+ // There is both lenient and strict data for comma/period,
+ // but not for any of the other symbols.
+ if (str.indexOf(u'.') != -1) {
+ saveSet(isLenient ? PERIOD : STRICT_PERIOD, str, status);
+ } else if (str.indexOf(u',') != -1) {
+ saveSet(isLenient ? COMMA : STRICT_COMMA, str, status);
+ } else if (str.indexOf(u'+') != -1) {
+ saveSet(PLUS_SIGN, str, status);
+ } else if (str.indexOf(u'-') != -1) {
+ saveSet(MINUS_SIGN, str, status);
+ } else if (str.indexOf(u'$') != -1) {
+ saveSet(DOLLAR_SIGN, str, status);
+ } else if (str.indexOf(u'£') != -1) {
+ saveSet(POUND_SIGN, str, status);
+ } else if (str.indexOf(u'₹') != -1) {
+ saveSet(RUPEE_SIGN, str, status);
+ } else if (str.indexOf(u'Â¥') != -1) {
+ saveSet(YEN_SIGN, str, status);
+ } else if (str.indexOf(u'â‚©') != -1) {
+ saveSet(WON_SIGN, str, status);
+ } else if (str.indexOf(u'%') != -1) {
+ saveSet(PERCENT_SIGN, str, status);
+ } else if (str.indexOf(u'‰') != -1) {
+ saveSet(PERMILLE_SIGN, str, status);
+ } else if (str.indexOf(u'’') != -1) {
+ saveSet(APOSTROPHE_SIGN, str, status);
+ } else {
+ // Unknown class of parse lenients
+ // TODO(ICU-20428): Make ICU automatically accept new classes?
+ U_ASSERT(FALSE);
+ }
+ if (U_FAILURE(status)) { return; }
+ }
+ }
+ }
+ }
+ }
+};
+
+
+icu::UInitOnce gNumberParseUniSetsInitOnce = U_INITONCE_INITIALIZER;
+
+UBool U_CALLCONV cleanupNumberParseUniSets() {
+ if (gEmptyUnicodeSetInitialized) {
+ reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet)->~UnicodeSet();
+ gEmptyUnicodeSetInitialized = FALSE;
+ }
+ for (int32_t i = 0; i < UNISETS_KEY_COUNT; i++) {
+ delete gUnicodeSets[i];
+ gUnicodeSets[i] = nullptr;
+ }
+ gNumberParseUniSetsInitOnce.reset();
+ return TRUE;
+}
+
+void U_CALLCONV initNumberParseUniSets(UErrorCode& status) {
+ ucln_common_registerCleanup(UCLN_COMMON_NUMPARSE_UNISETS, cleanupNumberParseUniSets);
+
+ // Initialize the empty instance for well-defined fallback behavior
+ new(gEmptyUnicodeSet) UnicodeSet();
+ reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet)->freeze();
+ gEmptyUnicodeSetInitialized = TRUE;
+
+ // These sets were decided after discussion with icu-design@. See tickets #13084 and #13309.
+ // Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
+ gUnicodeSets[DEFAULT_IGNORABLES] = new UnicodeSet(
+ u"[[:Zs:][\\u0009][:Bidi_Control:][:Variation_Selector:]]", status);
+ gUnicodeSets[STRICT_IGNORABLES] = new UnicodeSet(u"[[:Bidi_Control:]]", status);
+
+ LocalUResourceBundlePointer rb(ures_open(nullptr, "root", &status));
+ if (U_FAILURE(status)) { return; }
+ ParseDataSink sink;
+ ures_getAllItemsWithFallback(rb.getAlias(), "parse", sink, status);
+ if (U_FAILURE(status)) { return; }
+
+ // NOTE: It is OK for these assertions to fail if there was a no-data build.
+ U_ASSERT(gUnicodeSets[COMMA] != nullptr);
+ U_ASSERT(gUnicodeSets[STRICT_COMMA] != nullptr);
+ U_ASSERT(gUnicodeSets[PERIOD] != nullptr);
+ U_ASSERT(gUnicodeSets[STRICT_PERIOD] != nullptr);
+ U_ASSERT(gUnicodeSets[APOSTROPHE_SIGN] != nullptr);
+
+ LocalPointer<UnicodeSet> otherGrouping(new UnicodeSet(
+ u"[٬‘\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]",
+ status
+ ), status);
+ if (U_FAILURE(status)) { return; }
+ otherGrouping->addAll(*gUnicodeSets[APOSTROPHE_SIGN]);
+ gUnicodeSets[OTHER_GROUPING_SEPARATORS] = otherGrouping.orphan();
+ gUnicodeSets[ALL_SEPARATORS] = computeUnion(COMMA, PERIOD, OTHER_GROUPING_SEPARATORS);
+ gUnicodeSets[STRICT_ALL_SEPARATORS] = computeUnion(
+ STRICT_COMMA, STRICT_PERIOD, OTHER_GROUPING_SEPARATORS);
+
+ U_ASSERT(gUnicodeSets[MINUS_SIGN] != nullptr);
+ U_ASSERT(gUnicodeSets[PLUS_SIGN] != nullptr);
+ U_ASSERT(gUnicodeSets[PERCENT_SIGN] != nullptr);
+ U_ASSERT(gUnicodeSets[PERMILLE_SIGN] != nullptr);
+
+ gUnicodeSets[INFINITY_SIGN] = new UnicodeSet(u"[∞]", status);
+ if (U_FAILURE(status)) { return; }
+
+ U_ASSERT(gUnicodeSets[DOLLAR_SIGN] != nullptr);
+ U_ASSERT(gUnicodeSets[POUND_SIGN] != nullptr);
+ U_ASSERT(gUnicodeSets[RUPEE_SIGN] != nullptr);
+ U_ASSERT(gUnicodeSets[YEN_SIGN] != nullptr);
+ U_ASSERT(gUnicodeSets[WON_SIGN] != nullptr);
+
+ gUnicodeSets[DIGITS] = new UnicodeSet(u"[:digit:]", status);
+ if (U_FAILURE(status)) { return; }
+ gUnicodeSets[DIGITS_OR_ALL_SEPARATORS] = computeUnion(DIGITS, ALL_SEPARATORS);
+ gUnicodeSets[DIGITS_OR_STRICT_ALL_SEPARATORS] = computeUnion(DIGITS, STRICT_ALL_SEPARATORS);
+
+ for (auto* uniset : gUnicodeSets) {
+ if (uniset != nullptr) {
+ uniset->freeze();
+ }
+ }
+}
+
+}
+
+const UnicodeSet* unisets::get(Key key) {
+ UErrorCode localStatus = U_ZERO_ERROR;
+ umtx_initOnce(gNumberParseUniSetsInitOnce, &initNumberParseUniSets, localStatus);
+ if (U_FAILURE(localStatus)) {
+ return reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet);
+ }
+ return getImpl(key);
+}
+
+Key unisets::chooseFrom(UnicodeString str, Key key1) {
+ return get(key1)->contains(str) ? key1 : NONE;
+}
+
+Key unisets::chooseFrom(UnicodeString str, Key key1, Key key2) {
+ return get(key1)->contains(str) ? key1 : chooseFrom(str, key2);
+}
+
+//Key unisets::chooseCurrency(UnicodeString str) {
+// if (get(DOLLAR_SIGN)->contains(str)) {
+// return DOLLAR_SIGN;
+// } else if (get(POUND_SIGN)->contains(str)) {
+// return POUND_SIGN;
+// } else if (get(RUPEE_SIGN)->contains(str)) {
+// return RUPEE_SIGN;
+// } else if (get(YEN_SIGN)->contains(str)) {
+// return YEN_SIGN;
+// } else {
+// return NONE;
+// }
+//}
+
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
diff --git a/thirdparty/icu4c/common/static_unicode_sets.h b/thirdparty/icu4c/common/static_unicode_sets.h
new file mode 100644
index 0000000000..5d90ce5908
--- /dev/null
+++ b/thirdparty/icu4c/common/static_unicode_sets.h
@@ -0,0 +1,140 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// This file contains utilities to deal with static-allocated UnicodeSets.
+//
+// Common use case: you write a "private static final" UnicodeSet in Java, and
+// want something similarly easy in C++. Originally written for number
+// parsing, but this header can be used for other applications.
+//
+// Main entrypoint: `unisets::get(unisets::MY_SET_ID_HERE)`
+//
+// This file is in common instead of i18n because it is needed by ucurr.cpp.
+//
+// Author: sffc
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+#ifndef __STATIC_UNICODE_SETS_H__
+#define __STATIC_UNICODE_SETS_H__
+
+#include "unicode/uniset.h"
+#include "unicode/unistr.h"
+
+U_NAMESPACE_BEGIN
+namespace unisets {
+
+enum Key {
+ // NONE is used to indicate null in chooseFrom().
+ // EMPTY is used to get an empty UnicodeSet.
+ NONE = -1,
+ EMPTY = 0,
+
+ // Ignorables
+ DEFAULT_IGNORABLES,
+ STRICT_IGNORABLES,
+
+ // Separators
+ // Notes:
+ // - COMMA is a superset of STRICT_COMMA
+ // - PERIOD is a superset of SCRICT_PERIOD
+ // - ALL_SEPARATORS is the union of COMMA, PERIOD, and OTHER_GROUPING_SEPARATORS
+ // - STRICT_ALL_SEPARATORS is the union of STRICT_COMMA, STRICT_PERIOD, and OTHER_GRP_SEPARATORS
+ COMMA,
+ PERIOD,
+ STRICT_COMMA,
+ STRICT_PERIOD,
+ APOSTROPHE_SIGN,
+ OTHER_GROUPING_SEPARATORS,
+ ALL_SEPARATORS,
+ STRICT_ALL_SEPARATORS,
+
+ // Symbols
+ MINUS_SIGN,
+ PLUS_SIGN,
+ PERCENT_SIGN,
+ PERMILLE_SIGN,
+ INFINITY_SIGN,
+
+ // Currency Symbols
+ DOLLAR_SIGN,
+ POUND_SIGN,
+ RUPEE_SIGN,
+ YEN_SIGN,
+ WON_SIGN,
+
+ // Other
+ DIGITS,
+
+ // Combined Separators with Digits (for lead code points)
+ DIGITS_OR_ALL_SEPARATORS,
+ DIGITS_OR_STRICT_ALL_SEPARATORS,
+
+ // The number of elements in the enum.
+ UNISETS_KEY_COUNT
+};
+
+/**
+ * Gets the static-allocated UnicodeSet according to the provided key. The
+ * pointer will be deleted during u_cleanup(); the caller should NOT delete it.
+ *
+ * Exported as U_COMMON_API for ucurr.cpp
+ *
+ * This method is always safe and OK to chain: in the case of a memory or other
+ * error, it returns an empty set from static memory.
+ *
+ * Example:
+ *
+ * UBool hasIgnorables = unisets::get(unisets::DEFAULT_IGNORABLES)->contains(...);
+ *
+ * @param key The desired UnicodeSet according to the enum in this file.
+ * @return The requested UnicodeSet. Guaranteed to be frozen and non-null, but
+ * may be empty if an error occurred during data loading.
+ */
+U_COMMON_API const UnicodeSet* get(Key key);
+
+/**
+ * Checks if the UnicodeSet given by key1 contains the given string.
+ *
+ * Exported as U_COMMON_API for numparse_decimal.cpp
+ *
+ * @param str The string to check.
+ * @param key1 The set to check.
+ * @return key1 if the set contains str, or NONE if not.
+ */
+U_COMMON_API Key chooseFrom(UnicodeString str, Key key1);
+
+/**
+ * Checks if the UnicodeSet given by either key1 or key2 contains the string.
+ *
+ * Exported as U_COMMON_API for numparse_decimal.cpp
+ *
+ * @param str The string to check.
+ * @param key1 The first set to check.
+ * @param key2 The second set to check.
+ * @return key1 if that set contains str; key2 if that set contains str; or
+ * NONE if neither set contains str.
+ */
+U_COMMON_API Key chooseFrom(UnicodeString str, Key key1, Key key2);
+
+// TODO: Load these from data: ICU-20108
+// Unused in C++:
+// Key chooseCurrency(UnicodeString str);
+// Used instead:
+static const struct {
+ Key key;
+ UChar32 exemplar;
+} kCurrencyEntries[] = {
+ {DOLLAR_SIGN, u'$'},
+ {POUND_SIGN, u'£'},
+ {RUPEE_SIGN, u'₹'},
+ {YEN_SIGN, u'Â¥'},
+ {WON_SIGN, u'â‚©'},
+};
+
+} // namespace unisets
+U_NAMESPACE_END
+
+#endif //__STATIC_UNICODE_SETS_H__
+#endif /* #if !UCONFIG_NO_FORMATTING */
diff --git a/thirdparty/icu4c/common/stringpiece.cpp b/thirdparty/icu4c/common/stringpiece.cpp
new file mode 100644
index 0000000000..99089e08ef
--- /dev/null
+++ b/thirdparty/icu4c/common/stringpiece.cpp
@@ -0,0 +1,116 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+// Copyright (C) 2009-2013, International Business Machines
+// Corporation and others. All Rights Reserved.
+//
+// Copyright 2004 and onwards Google Inc.
+//
+// Author: wilsonh@google.com (Wilson Hsieh)
+//
+
+#include "unicode/utypes.h"
+#include "unicode/stringpiece.h"
+#include "cstring.h"
+#include "cmemory.h"
+
+U_NAMESPACE_BEGIN
+
+StringPiece::StringPiece(const char* str)
+ : ptr_(str), length_((str == NULL) ? 0 : static_cast<int32_t>(uprv_strlen(str))) { }
+
+StringPiece::StringPiece(const StringPiece& x, int32_t pos) {
+ if (pos < 0) {
+ pos = 0;
+ } else if (pos > x.length_) {
+ pos = x.length_;
+ }
+ ptr_ = x.ptr_ + pos;
+ length_ = x.length_ - pos;
+}
+
+StringPiece::StringPiece(const StringPiece& x, int32_t pos, int32_t len) {
+ if (pos < 0) {
+ pos = 0;
+ } else if (pos > x.length_) {
+ pos = x.length_;
+ }
+ if (len < 0) {
+ len = 0;
+ } else if (len > x.length_ - pos) {
+ len = x.length_ - pos;
+ }
+ ptr_ = x.ptr_ + pos;
+ length_ = len;
+}
+
+void StringPiece::set(const char* str) {
+ ptr_ = str;
+ if (str != NULL)
+ length_ = static_cast<int32_t>(uprv_strlen(str));
+ else
+ length_ = 0;
+}
+
+int32_t StringPiece::find(StringPiece needle, int32_t offset) {
+ if (length() == 0 && needle.length() == 0) {
+ return 0;
+ }
+ // TODO: Improve to be better than O(N^2)?
+ for (int32_t i = offset; i < length(); i++) {
+ int32_t j = 0;
+ for (; j < needle.length(); i++, j++) {
+ if (data()[i] != needle.data()[j]) {
+ i -= j;
+ goto outer_end;
+ }
+ }
+ return i - j;
+ outer_end: void();
+ }
+ return -1;
+}
+
+int32_t StringPiece::compare(StringPiece other) {
+ int32_t i = 0;
+ for (; i < length(); i++) {
+ if (i == other.length()) {
+ // this is longer
+ return 1;
+ }
+ char a = data()[i];
+ char b = other.data()[i];
+ if (a < b) {
+ return -1;
+ } else if (a > b) {
+ return 1;
+ }
+ }
+ if (i < other.length()) {
+ // other is longer
+ return -1;
+ }
+ return 0;
+}
+
+U_EXPORT UBool U_EXPORT2
+operator==(const StringPiece& x, const StringPiece& y) {
+ int32_t len = x.size();
+ if (len != y.size()) {
+ return false;
+ }
+ if (len == 0) {
+ return true;
+ }
+ const char* p = x.data();
+ const char* p2 = y.data();
+ // Test last byte in case strings share large common prefix
+ --len;
+ if (p[len] != p2[len]) return false;
+ // At this point we can, but don't have to, ignore the last byte.
+ return uprv_memcmp(p, p2, len) == 0;
+}
+
+
+const int32_t StringPiece::npos = 0x7fffffff;
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/stringtriebuilder.cpp b/thirdparty/icu4c/common/stringtriebuilder.cpp
new file mode 100644
index 0000000000..6f9cc2e5c2
--- /dev/null
+++ b/thirdparty/icu4c/common/stringtriebuilder.cpp
@@ -0,0 +1,618 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: stringtriebuilder.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010dec24
+* created by: Markus W. Scherer
+*/
+
+#include "utypeinfo.h" // for 'typeid' to work
+#include "unicode/utypes.h"
+#include "unicode/stringtriebuilder.h"
+#include "uassert.h"
+#include "uhash.h"
+
+U_CDECL_BEGIN
+
+static int32_t U_CALLCONV
+hashStringTrieNode(const UHashTok key) {
+ return icu::StringTrieBuilder::hashNode(key.pointer);
+}
+
+static UBool U_CALLCONV
+equalStringTrieNodes(const UHashTok key1, const UHashTok key2) {
+ return icu::StringTrieBuilder::equalNodes(key1.pointer, key2.pointer);
+}
+
+U_CDECL_END
+
+U_NAMESPACE_BEGIN
+
+StringTrieBuilder::StringTrieBuilder() : nodes(NULL) {}
+
+StringTrieBuilder::~StringTrieBuilder() {
+ deleteCompactBuilder();
+}
+
+void
+StringTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ nodes=uhash_openSize(hashStringTrieNode, equalStringTrieNodes, NULL,
+ sizeGuess, &errorCode);
+ if(U_SUCCESS(errorCode)) {
+ if(nodes==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ uhash_setKeyDeleter(nodes, uprv_deleteUObject);
+ }
+ }
+}
+
+void
+StringTrieBuilder::deleteCompactBuilder() {
+ uhash_close(nodes);
+ nodes=NULL;
+}
+
+void
+StringTrieBuilder::build(UStringTrieBuildOption buildOption, int32_t elementsLength,
+ UErrorCode &errorCode) {
+ if(buildOption==USTRINGTRIE_BUILD_FAST) {
+ writeNode(0, elementsLength, 0);
+ } else /* USTRINGTRIE_BUILD_SMALL */ {
+ createCompactBuilder(2*elementsLength, errorCode);
+ Node *root=makeNode(0, elementsLength, 0, errorCode);
+ if(U_SUCCESS(errorCode)) {
+ root->markRightEdgesFirst(-1);
+ root->write(*this);
+ }
+ deleteCompactBuilder();
+ }
+}
+
+// Requires start<limit,
+// and all strings of the [start..limit[ elements must be sorted and
+// have a common prefix of length unitIndex.
+int32_t
+StringTrieBuilder::writeNode(int32_t start, int32_t limit, int32_t unitIndex) {
+ UBool hasValue=FALSE;
+ int32_t value=0;
+ int32_t type;
+ if(unitIndex==getElementStringLength(start)) {
+ // An intermediate or final value.
+ value=getElementValue(start++);
+ if(start==limit) {
+ return writeValueAndFinal(value, TRUE); // final-value node
+ }
+ hasValue=TRUE;
+ }
+ // Now all [start..limit[ strings are longer than unitIndex.
+ int32_t minUnit=getElementUnit(start, unitIndex);
+ int32_t maxUnit=getElementUnit(limit-1, unitIndex);
+ if(minUnit==maxUnit) {
+ // Linear-match node: All strings have the same character at unitIndex.
+ int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex);
+ writeNode(start, limit, lastUnitIndex);
+ // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength.
+ int32_t length=lastUnitIndex-unitIndex;
+ int32_t maxLinearMatchLength=getMaxLinearMatchLength();
+ while(length>maxLinearMatchLength) {
+ lastUnitIndex-=maxLinearMatchLength;
+ length-=maxLinearMatchLength;
+ writeElementUnits(start, lastUnitIndex, maxLinearMatchLength);
+ write(getMinLinearMatch()+maxLinearMatchLength-1);
+ }
+ writeElementUnits(start, unitIndex, length);
+ type=getMinLinearMatch()+length-1;
+ } else {
+ // Branch node.
+ int32_t length=countElementUnits(start, limit, unitIndex);
+ // length>=2 because minUnit!=maxUnit.
+ writeBranchSubNode(start, limit, unitIndex, length);
+ if(--length<getMinLinearMatch()) {
+ type=length;
+ } else {
+ write(length);
+ type=0;
+ }
+ }
+ return writeValueAndType(hasValue, value, type);
+}
+
+// start<limit && all strings longer than unitIndex &&
+// length different units at unitIndex
+int32_t
+StringTrieBuilder::writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length) {
+ UChar middleUnits[kMaxSplitBranchLevels];
+ int32_t lessThan[kMaxSplitBranchLevels];
+ int32_t ltLength=0;
+ while(length>getMaxBranchLinearSubNodeLength()) {
+ // Branch on the middle unit.
+ // First, find the middle unit.
+ int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2);
+ // Encode the less-than branch first.
+ middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit
+ lessThan[ltLength]=writeBranchSubNode(start, i, unitIndex, length/2);
+ ++ltLength;
+ // Continue for the greater-or-equal branch.
+ start=i;
+ length=length-length/2;
+ }
+ // For each unit, find its elements array start and whether it has a final value.
+ int32_t starts[kMaxBranchLinearSubNodeLength];
+ UBool isFinal[kMaxBranchLinearSubNodeLength-1];
+ int32_t unitNumber=0;
+ do {
+ int32_t i=starts[unitNumber]=start;
+ UChar unit=getElementUnit(i++, unitIndex);
+ i=indexOfElementWithNextUnit(i, unitIndex, unit);
+ isFinal[unitNumber]= start==i-1 && unitIndex+1==getElementStringLength(start);
+ start=i;
+ } while(++unitNumber<length-1);
+ // unitNumber==length-1, and the maxUnit elements range is [start..limit[
+ starts[unitNumber]=start;
+
+ // Write the sub-nodes in reverse order: The jump lengths are deltas from
+ // after their own positions, so if we wrote the minUnit sub-node first,
+ // then its jump delta would be larger.
+ // Instead we write the minUnit sub-node last, for a shorter delta.
+ int32_t jumpTargets[kMaxBranchLinearSubNodeLength-1];
+ do {
+ --unitNumber;
+ if(!isFinal[unitNumber]) {
+ jumpTargets[unitNumber]=writeNode(starts[unitNumber], starts[unitNumber+1], unitIndex+1);
+ }
+ } while(unitNumber>0);
+ // The maxUnit sub-node is written as the very last one because we do
+ // not jump for it at all.
+ unitNumber=length-1;
+ writeNode(start, limit, unitIndex+1);
+ int32_t offset=write(getElementUnit(start, unitIndex));
+ // Write the rest of this node's unit-value pairs.
+ while(--unitNumber>=0) {
+ start=starts[unitNumber];
+ int32_t value;
+ if(isFinal[unitNumber]) {
+ // Write the final value for the one string ending with this unit.
+ value=getElementValue(start);
+ } else {
+ // Write the delta to the start position of the sub-node.
+ value=offset-jumpTargets[unitNumber];
+ }
+ writeValueAndFinal(value, isFinal[unitNumber]);
+ offset=write(getElementUnit(start, unitIndex));
+ }
+ // Write the split-branch nodes.
+ while(ltLength>0) {
+ --ltLength;
+ writeDeltaTo(lessThan[ltLength]);
+ offset=write(middleUnits[ltLength]);
+ }
+ return offset;
+}
+
+// Requires start<limit,
+// and all strings of the [start..limit[ elements must be sorted and
+// have a common prefix of length unitIndex.
+StringTrieBuilder::Node *
+StringTrieBuilder::makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return NULL;
+ }
+ UBool hasValue=FALSE;
+ int32_t value=0;
+ if(unitIndex==getElementStringLength(start)) {
+ // An intermediate or final value.
+ value=getElementValue(start++);
+ if(start==limit) {
+ return registerFinalValue(value, errorCode);
+ }
+ hasValue=TRUE;
+ }
+ Node *node;
+ // Now all [start..limit[ strings are longer than unitIndex.
+ int32_t minUnit=getElementUnit(start, unitIndex);
+ int32_t maxUnit=getElementUnit(limit-1, unitIndex);
+ if(minUnit==maxUnit) {
+ // Linear-match node: All strings have the same character at unitIndex.
+ int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex);
+ Node *nextNode=makeNode(start, limit, lastUnitIndex, errorCode);
+ // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength.
+ int32_t length=lastUnitIndex-unitIndex;
+ int32_t maxLinearMatchLength=getMaxLinearMatchLength();
+ while(length>maxLinearMatchLength) {
+ lastUnitIndex-=maxLinearMatchLength;
+ length-=maxLinearMatchLength;
+ node=createLinearMatchNode(start, lastUnitIndex, maxLinearMatchLength, nextNode);
+ nextNode=registerNode(node, errorCode);
+ }
+ node=createLinearMatchNode(start, unitIndex, length, nextNode);
+ } else {
+ // Branch node.
+ int32_t length=countElementUnits(start, limit, unitIndex);
+ // length>=2 because minUnit!=maxUnit.
+ Node *subNode=makeBranchSubNode(start, limit, unitIndex, length, errorCode);
+ node=new BranchHeadNode(length, subNode);
+ }
+ if(hasValue && node!=NULL) {
+ if(matchNodesCanHaveValues()) {
+ ((ValueNode *)node)->setValue(value);
+ } else {
+ node=new IntermediateValueNode(value, registerNode(node, errorCode));
+ }
+ }
+ return registerNode(node, errorCode);
+}
+
+// start<limit && all strings longer than unitIndex &&
+// length different units at unitIndex
+StringTrieBuilder::Node *
+StringTrieBuilder::makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
+ int32_t length, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return NULL;
+ }
+ UChar middleUnits[kMaxSplitBranchLevels];
+ Node *lessThan[kMaxSplitBranchLevels];
+ int32_t ltLength=0;
+ while(length>getMaxBranchLinearSubNodeLength()) {
+ // Branch on the middle unit.
+ // First, find the middle unit.
+ int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2);
+ // Create the less-than branch.
+ middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit
+ lessThan[ltLength]=makeBranchSubNode(start, i, unitIndex, length/2, errorCode);
+ ++ltLength;
+ // Continue for the greater-or-equal branch.
+ start=i;
+ length=length-length/2;
+ }
+ if(U_FAILURE(errorCode)) {
+ return NULL;
+ }
+ ListBranchNode *listNode=new ListBranchNode();
+ if(listNode==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ // For each unit, find its elements array start and whether it has a final value.
+ int32_t unitNumber=0;
+ do {
+ int32_t i=start;
+ UChar unit=getElementUnit(i++, unitIndex);
+ i=indexOfElementWithNextUnit(i, unitIndex, unit);
+ if(start==i-1 && unitIndex+1==getElementStringLength(start)) {
+ listNode->add(unit, getElementValue(start));
+ } else {
+ listNode->add(unit, makeNode(start, i, unitIndex+1, errorCode));
+ }
+ start=i;
+ } while(++unitNumber<length-1);
+ // unitNumber==length-1, and the maxUnit elements range is [start..limit[
+ UChar unit=getElementUnit(start, unitIndex);
+ if(start==limit-1 && unitIndex+1==getElementStringLength(start)) {
+ listNode->add(unit, getElementValue(start));
+ } else {
+ listNode->add(unit, makeNode(start, limit, unitIndex+1, errorCode));
+ }
+ Node *node=registerNode(listNode, errorCode);
+ // Create the split-branch nodes.
+ while(ltLength>0) {
+ --ltLength;
+ node=registerNode(
+ new SplitBranchNode(middleUnits[ltLength], lessThan[ltLength], node), errorCode);
+ }
+ return node;
+}
+
+StringTrieBuilder::Node *
+StringTrieBuilder::registerNode(Node *newNode, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ delete newNode;
+ return NULL;
+ }
+ if(newNode==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ const UHashElement *old=uhash_find(nodes, newNode);
+ if(old!=NULL) {
+ delete newNode;
+ return (Node *)old->key.pointer;
+ }
+ // If uhash_puti() returns a non-zero value from an equivalent, previously
+ // registered node, then uhash_find() failed to find that and we will leak newNode.
+#if U_DEBUG
+ int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue.
+#endif
+ uhash_puti(nodes, newNode, 1, &errorCode);
+ U_ASSERT(oldValue==0);
+ if(U_FAILURE(errorCode)) {
+ delete newNode;
+ return NULL;
+ }
+ return newNode;
+}
+
+StringTrieBuilder::Node *
+StringTrieBuilder::registerFinalValue(int32_t value, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return NULL;
+ }
+ FinalValueNode key(value);
+ const UHashElement *old=uhash_find(nodes, &key);
+ if(old!=NULL) {
+ return (Node *)old->key.pointer;
+ }
+ Node *newNode=new FinalValueNode(value);
+ if(newNode==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ // If uhash_puti() returns a non-zero value from an equivalent, previously
+ // registered node, then uhash_find() failed to find that and we will leak newNode.
+#if U_DEBUG
+ int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue.
+#endif
+ uhash_puti(nodes, newNode, 1, &errorCode);
+ U_ASSERT(oldValue==0);
+ if(U_FAILURE(errorCode)) {
+ delete newNode;
+ return NULL;
+ }
+ return newNode;
+}
+
+int32_t
+StringTrieBuilder::hashNode(const void *node) {
+ return ((const Node *)node)->hashCode();
+}
+
+UBool
+StringTrieBuilder::equalNodes(const void *left, const void *right) {
+ return *(const Node *)left==*(const Node *)right;
+}
+
+UBool
+StringTrieBuilder::Node::operator==(const Node &other) const {
+ return this==&other || (typeid(*this)==typeid(other) && hash==other.hash);
+}
+
+int32_t
+StringTrieBuilder::Node::markRightEdgesFirst(int32_t edgeNumber) {
+ if(offset==0) {
+ offset=edgeNumber;
+ }
+ return edgeNumber;
+}
+
+UBool
+StringTrieBuilder::FinalValueNode::operator==(const Node &other) const {
+ if(this==&other) {
+ return TRUE;
+ }
+ if(!Node::operator==(other)) {
+ return FALSE;
+ }
+ const FinalValueNode &o=(const FinalValueNode &)other;
+ return value==o.value;
+}
+
+void
+StringTrieBuilder::FinalValueNode::write(StringTrieBuilder &builder) {
+ offset=builder.writeValueAndFinal(value, TRUE);
+}
+
+UBool
+StringTrieBuilder::ValueNode::operator==(const Node &other) const {
+ if(this==&other) {
+ return TRUE;
+ }
+ if(!Node::operator==(other)) {
+ return FALSE;
+ }
+ const ValueNode &o=(const ValueNode &)other;
+ return hasValue==o.hasValue && (!hasValue || value==o.value);
+}
+
+UBool
+StringTrieBuilder::IntermediateValueNode::operator==(const Node &other) const {
+ if(this==&other) {
+ return TRUE;
+ }
+ if(!ValueNode::operator==(other)) {
+ return FALSE;
+ }
+ const IntermediateValueNode &o=(const IntermediateValueNode &)other;
+ return next==o.next;
+}
+
+int32_t
+StringTrieBuilder::IntermediateValueNode::markRightEdgesFirst(int32_t edgeNumber) {
+ if(offset==0) {
+ offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
+ }
+ return edgeNumber;
+}
+
+void
+StringTrieBuilder::IntermediateValueNode::write(StringTrieBuilder &builder) {
+ next->write(builder);
+ offset=builder.writeValueAndFinal(value, FALSE);
+}
+
+UBool
+StringTrieBuilder::LinearMatchNode::operator==(const Node &other) const {
+ if(this==&other) {
+ return TRUE;
+ }
+ if(!ValueNode::operator==(other)) {
+ return FALSE;
+ }
+ const LinearMatchNode &o=(const LinearMatchNode &)other;
+ return length==o.length && next==o.next;
+}
+
+int32_t
+StringTrieBuilder::LinearMatchNode::markRightEdgesFirst(int32_t edgeNumber) {
+ if(offset==0) {
+ offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
+ }
+ return edgeNumber;
+}
+
+UBool
+StringTrieBuilder::ListBranchNode::operator==(const Node &other) const {
+ if(this==&other) {
+ return TRUE;
+ }
+ if(!Node::operator==(other)) {
+ return FALSE;
+ }
+ const ListBranchNode &o=(const ListBranchNode &)other;
+ for(int32_t i=0; i<length; ++i) {
+ if(units[i]!=o.units[i] || values[i]!=o.values[i] || equal[i]!=o.equal[i]) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+int32_t
+StringTrieBuilder::ListBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
+ if(offset==0) {
+ firstEdgeNumber=edgeNumber;
+ int32_t step=0;
+ int32_t i=length;
+ do {
+ Node *edge=equal[--i];
+ if(edge!=NULL) {
+ edgeNumber=edge->markRightEdgesFirst(edgeNumber-step);
+ }
+ // For all but the rightmost edge, decrement the edge number.
+ step=1;
+ } while(i>0);
+ offset=edgeNumber;
+ }
+ return edgeNumber;
+}
+
+void
+StringTrieBuilder::ListBranchNode::write(StringTrieBuilder &builder) {
+ // Write the sub-nodes in reverse order: The jump lengths are deltas from
+ // after their own positions, so if we wrote the minUnit sub-node first,
+ // then its jump delta would be larger.
+ // Instead we write the minUnit sub-node last, for a shorter delta.
+ int32_t unitNumber=length-1;
+ Node *rightEdge=equal[unitNumber];
+ int32_t rightEdgeNumber= rightEdge==NULL ? firstEdgeNumber : rightEdge->getOffset();
+ do {
+ --unitNumber;
+ if(equal[unitNumber]!=NULL) {
+ equal[unitNumber]->writeUnlessInsideRightEdge(firstEdgeNumber, rightEdgeNumber, builder);
+ }
+ } while(unitNumber>0);
+ // The maxUnit sub-node is written as the very last one because we do
+ // not jump for it at all.
+ unitNumber=length-1;
+ if(rightEdge==NULL) {
+ builder.writeValueAndFinal(values[unitNumber], TRUE);
+ } else {
+ rightEdge->write(builder);
+ }
+ offset=builder.write(units[unitNumber]);
+ // Write the rest of this node's unit-value pairs.
+ while(--unitNumber>=0) {
+ int32_t value;
+ UBool isFinal;
+ if(equal[unitNumber]==NULL) {
+ // Write the final value for the one string ending with this unit.
+ value=values[unitNumber];
+ isFinal=TRUE;
+ } else {
+ // Write the delta to the start position of the sub-node.
+ U_ASSERT(equal[unitNumber]->getOffset()>0);
+ value=offset-equal[unitNumber]->getOffset();
+ isFinal=FALSE;
+ }
+ builder.writeValueAndFinal(value, isFinal);
+ offset=builder.write(units[unitNumber]);
+ }
+}
+
+UBool
+StringTrieBuilder::SplitBranchNode::operator==(const Node &other) const {
+ if(this==&other) {
+ return TRUE;
+ }
+ if(!Node::operator==(other)) {
+ return FALSE;
+ }
+ const SplitBranchNode &o=(const SplitBranchNode &)other;
+ return unit==o.unit && lessThan==o.lessThan && greaterOrEqual==o.greaterOrEqual;
+}
+
+int32_t
+StringTrieBuilder::SplitBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
+ if(offset==0) {
+ firstEdgeNumber=edgeNumber;
+ edgeNumber=greaterOrEqual->markRightEdgesFirst(edgeNumber);
+ offset=edgeNumber=lessThan->markRightEdgesFirst(edgeNumber-1);
+ }
+ return edgeNumber;
+}
+
+void
+StringTrieBuilder::SplitBranchNode::write(StringTrieBuilder &builder) {
+ // Encode the less-than branch first.
+ lessThan->writeUnlessInsideRightEdge(firstEdgeNumber, greaterOrEqual->getOffset(), builder);
+ // Encode the greater-or-equal branch last because we do not jump for it at all.
+ greaterOrEqual->write(builder);
+ // Write this node.
+ U_ASSERT(lessThan->getOffset()>0);
+ builder.writeDeltaTo(lessThan->getOffset()); // less-than
+ offset=builder.write(unit);
+}
+
+UBool
+StringTrieBuilder::BranchHeadNode::operator==(const Node &other) const {
+ if(this==&other) {
+ return TRUE;
+ }
+ if(!ValueNode::operator==(other)) {
+ return FALSE;
+ }
+ const BranchHeadNode &o=(const BranchHeadNode &)other;
+ return length==o.length && next==o.next;
+}
+
+int32_t
+StringTrieBuilder::BranchHeadNode::markRightEdgesFirst(int32_t edgeNumber) {
+ if(offset==0) {
+ offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
+ }
+ return edgeNumber;
+}
+
+void
+StringTrieBuilder::BranchHeadNode::write(StringTrieBuilder &builder) {
+ next->write(builder);
+ if(length<=builder.getMinLinearMatch()) {
+ offset=builder.writeValueAndType(hasValue, value, length-1);
+ } else {
+ builder.write(length-1);
+ offset=builder.writeValueAndType(hasValue, value, 0);
+ }
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/uarrsort.cpp b/thirdparty/icu4c/common/uarrsort.cpp
new file mode 100644
index 0000000000..c17dbb2e2b
--- /dev/null
+++ b/thirdparty/icu4c/common/uarrsort.cpp
@@ -0,0 +1,274 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2003-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uarrsort.c
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2003aug04
+* created by: Markus W. Scherer
+*
+* Internal function for sorting arrays.
+*/
+
+#include <cstddef>
+
+#include "unicode/utypes.h"
+#include "cmemory.h"
+#include "uarrsort.h"
+
+enum {
+ /**
+ * "from Knuth"
+ *
+ * A binary search over 8 items performs 4 comparisons:
+ * log2(8)=3 to subdivide, +1 to check for equality.
+ * A linear search over 8 items on average also performs 4 comparisons.
+ */
+ MIN_QSORT=9,
+ STACK_ITEM_SIZE=200
+};
+
+static constexpr int32_t sizeInMaxAlignTs(int32_t sizeInBytes) {
+ return (sizeInBytes + sizeof(std::max_align_t) - 1) / sizeof(std::max_align_t);
+}
+
+/* UComparator convenience implementations ---------------------------------- */
+
+U_CAPI int32_t U_EXPORT2
+uprv_uint16Comparator(const void *context, const void *left, const void *right) {
+ (void)context;
+ return (int32_t)*(const uint16_t *)left - (int32_t)*(const uint16_t *)right;
+}
+
+U_CAPI int32_t U_EXPORT2
+uprv_int32Comparator(const void *context, const void *left, const void *right) {
+ (void)context;
+ return *(const int32_t *)left - *(const int32_t *)right;
+}
+
+U_CAPI int32_t U_EXPORT2
+uprv_uint32Comparator(const void *context, const void *left, const void *right) {
+ (void)context;
+ uint32_t l=*(const uint32_t *)left, r=*(const uint32_t *)right;
+
+ /* compare directly because (l-r) would overflow the int32_t result */
+ if(l<r) {
+ return -1;
+ } else if(l==r) {
+ return 0;
+ } else /* l>r */ {
+ return 1;
+ }
+}
+
+/* Insertion sort using binary search --------------------------------------- */
+
+U_CAPI int32_t U_EXPORT2
+uprv_stableBinarySearch(char *array, int32_t limit, void *item, int32_t itemSize,
+ UComparator *cmp, const void *context) {
+ int32_t start=0;
+ UBool found=FALSE;
+
+ /* Binary search until we get down to a tiny sub-array. */
+ while((limit-start)>=MIN_QSORT) {
+ int32_t i=(start+limit)/2;
+ int32_t diff=cmp(context, item, array+i*itemSize);
+ if(diff==0) {
+ /*
+ * Found the item. We look for the *last* occurrence of such
+ * an item, for stable sorting.
+ * If we knew that there will be only few equal items,
+ * we could break now and enter the linear search.
+ * However, if there are many equal items, then it should be
+ * faster to continue with the binary search.
+ * It seems likely that we either have all unique items
+ * (where found will never become TRUE in the insertion sort)
+ * or potentially many duplicates.
+ */
+ found=TRUE;
+ start=i+1;
+ } else if(diff<0) {
+ limit=i;
+ } else {
+ start=i;
+ }
+ }
+
+ /* Linear search over the remaining tiny sub-array. */
+ while(start<limit) {
+ int32_t diff=cmp(context, item, array+start*itemSize);
+ if(diff==0) {
+ found=TRUE;
+ } else if(diff<0) {
+ break;
+ }
+ ++start;
+ }
+ return found ? (start-1) : ~start;
+}
+
+static void
+doInsertionSort(char *array, int32_t length, int32_t itemSize,
+ UComparator *cmp, const void *context, void *pv) {
+ int32_t j;
+
+ for(j=1; j<length; ++j) {
+ char *item=array+j*itemSize;
+ int32_t insertionPoint=uprv_stableBinarySearch(array, j, item, itemSize, cmp, context);
+ if(insertionPoint<0) {
+ insertionPoint=~insertionPoint;
+ } else {
+ ++insertionPoint; /* one past the last equal item */
+ }
+ if(insertionPoint<j) {
+ char *dest=array+insertionPoint*itemSize;
+ uprv_memcpy(pv, item, itemSize); /* v=array[j] */
+ uprv_memmove(dest+itemSize, dest, (j-insertionPoint)*(size_t)itemSize);
+ uprv_memcpy(dest, pv, itemSize); /* array[insertionPoint]=v */
+ }
+ }
+}
+
+static void
+insertionSort(char *array, int32_t length, int32_t itemSize,
+ UComparator *cmp, const void *context, UErrorCode *pErrorCode) {
+
+ icu::MaybeStackArray<std::max_align_t, sizeInMaxAlignTs(STACK_ITEM_SIZE)> v;
+ if (sizeInMaxAlignTs(itemSize) > v.getCapacity() &&
+ v.resize(sizeInMaxAlignTs(itemSize)) == nullptr) {
+ *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ doInsertionSort(array, length, itemSize, cmp, context, v.getAlias());
+}
+
+/* QuickSort ---------------------------------------------------------------- */
+
+/*
+ * This implementation is semi-recursive:
+ * It recurses for the smaller sub-array to shorten the recursion depth,
+ * and loops for the larger sub-array.
+ *
+ * Loosely after QuickSort algorithms in
+ * Niklaus Wirth
+ * Algorithmen und Datenstrukturen mit Modula-2
+ * B.G. Teubner Stuttgart
+ * 4. Auflage 1986
+ * ISBN 3-519-02260-5
+ */
+static void
+subQuickSort(char *array, int32_t start, int32_t limit, int32_t itemSize,
+ UComparator *cmp, const void *context,
+ void *px, void *pw) {
+ int32_t left, right;
+
+ /* start and left are inclusive, limit and right are exclusive */
+ do {
+ if((start+MIN_QSORT)>=limit) {
+ doInsertionSort(array+start*itemSize, limit-start, itemSize, cmp, context, px);
+ break;
+ }
+
+ left=start;
+ right=limit;
+
+ /* x=array[middle] */
+ uprv_memcpy(px, array+(size_t)((start+limit)/2)*itemSize, itemSize);
+
+ do {
+ while(/* array[left]<x */
+ cmp(context, array+left*itemSize, px)<0
+ ) {
+ ++left;
+ }
+ while(/* x<array[right-1] */
+ cmp(context, px, array+(right-1)*itemSize)<0
+ ) {
+ --right;
+ }
+
+ /* swap array[left] and array[right-1] via w; ++left; --right */
+ if(left<right) {
+ --right;
+
+ if(left<right) {
+ uprv_memcpy(pw, array+(size_t)left*itemSize, itemSize);
+ uprv_memcpy(array+(size_t)left*itemSize, array+(size_t)right*itemSize, itemSize);
+ uprv_memcpy(array+(size_t)right*itemSize, pw, itemSize);
+ }
+
+ ++left;
+ }
+ } while(left<right);
+
+ /* sort sub-arrays */
+ if((right-start)<(limit-left)) {
+ /* sort [start..right[ */
+ if(start<(right-1)) {
+ subQuickSort(array, start, right, itemSize, cmp, context, px, pw);
+ }
+
+ /* sort [left..limit[ */
+ start=left;
+ } else {
+ /* sort [left..limit[ */
+ if(left<(limit-1)) {
+ subQuickSort(array, left, limit, itemSize, cmp, context, px, pw);
+ }
+
+ /* sort [start..right[ */
+ limit=right;
+ }
+ } while(start<(limit-1));
+}
+
+static void
+quickSort(char *array, int32_t length, int32_t itemSize,
+ UComparator *cmp, const void *context, UErrorCode *pErrorCode) {
+ /* allocate two intermediate item variables (x and w) */
+ icu::MaybeStackArray<std::max_align_t, sizeInMaxAlignTs(STACK_ITEM_SIZE) * 2> xw;
+ if(sizeInMaxAlignTs(itemSize)*2 > xw.getCapacity() &&
+ xw.resize(sizeInMaxAlignTs(itemSize) * 2) == nullptr) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ subQuickSort(array, 0, length, itemSize, cmp, context,
+ xw.getAlias(), xw.getAlias() + sizeInMaxAlignTs(itemSize));
+}
+
+/* uprv_sortArray() API ----------------------------------------------------- */
+
+/*
+ * Check arguments, select an appropriate implementation,
+ * cast the array to char * so that array+i*itemSize works.
+ */
+U_CAPI void U_EXPORT2
+uprv_sortArray(void *array, int32_t length, int32_t itemSize,
+ UComparator *cmp, const void *context,
+ UBool sortStable, UErrorCode *pErrorCode) {
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return;
+ }
+ if((length>0 && array==NULL) || length<0 || itemSize<=0 || cmp==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ if(length<=1) {
+ return;
+ } else if(length<MIN_QSORT || sortStable) {
+ insertionSort((char *)array, length, itemSize, cmp, context, pErrorCode);
+ } else {
+ quickSort((char *)array, length, itemSize, cmp, context, pErrorCode);
+ }
+}
diff --git a/thirdparty/icu4c/common/uarrsort.h b/thirdparty/icu4c/common/uarrsort.h
new file mode 100644
index 0000000000..a55dca5b9e
--- /dev/null
+++ b/thirdparty/icu4c/common/uarrsort.h
@@ -0,0 +1,103 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2003-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uarrsort.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2003aug04
+* created by: Markus W. Scherer
+*
+* Internal function for sorting arrays.
+*/
+
+#ifndef __UARRSORT_H__
+#define __UARRSORT_H__
+
+#include "unicode/utypes.h"
+
+U_CDECL_BEGIN
+/**
+ * Function type for comparing two items as part of sorting an array or similar.
+ * Callback function for uprv_sortArray().
+ *
+ * @param context Application-specific pointer, passed through by uprv_sortArray().
+ * @param left Pointer to the "left" item.
+ * @param right Pointer to the "right" item.
+ * @return 32-bit signed integer comparison result:
+ * <0 if left<right
+ * ==0 if left==right
+ * >0 if left>right
+ *
+ * @internal
+ */
+typedef int32_t U_CALLCONV
+UComparator(const void *context, const void *left, const void *right);
+U_CDECL_END
+
+/**
+ * Array sorting function.
+ * Uses a UComparator for comparing array items to each other, and simple
+ * memory copying to move items.
+ *
+ * @param array The array to be sorted.
+ * @param length The number of items in the array.
+ * @param itemSize The size in bytes of each array item.
+ * @param cmp UComparator function used to compare two items each.
+ * @param context Application-specific pointer, passed through to the UComparator.
+ * @param sortStable If true, a stable sorting algorithm must be used.
+ * @param pErrorCode ICU in/out UErrorCode parameter.
+ *
+ * @internal
+ */
+U_CAPI void U_EXPORT2
+uprv_sortArray(void *array, int32_t length, int32_t itemSize,
+ UComparator *cmp, const void *context,
+ UBool sortStable, UErrorCode *pErrorCode);
+
+/**
+ * Convenience UComparator implementation for uint16_t arrays.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+uprv_uint16Comparator(const void *context, const void *left, const void *right);
+
+/**
+ * Convenience UComparator implementation for int32_t arrays.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+uprv_int32Comparator(const void *context, const void *left, const void *right);
+
+/**
+ * Convenience UComparator implementation for uint32_t arrays.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+uprv_uint32Comparator(const void *context, const void *left, const void *right);
+
+/**
+ * Much like Java Collections.binarySearch(list, key, comparator).
+ *
+ * Except: Java documents "If the list contains multiple elements equal to
+ * the specified object, there is no guarantee which one will be found."
+ *
+ * This version here will return the largest index of any equal item,
+ * for use in stable sorting.
+ *
+ * @return the index>=0 where the item was found:
+ * the largest such index, if multiple, for stable sorting;
+ * or the index<0 for inserting the item at ~index in sorted order
+ */
+U_CAPI int32_t U_EXPORT2
+uprv_stableBinarySearch(char *array, int32_t length, void *item, int32_t itemSize,
+ UComparator *cmp, const void *context);
+
+#endif
diff --git a/thirdparty/icu4c/common/uassert.h b/thirdparty/icu4c/common/uassert.h
new file mode 100644
index 0000000000..afd31eeffd
--- /dev/null
+++ b/thirdparty/icu4c/common/uassert.h
@@ -0,0 +1,51 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2002-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* File uassert.h
+*
+* Contains the U_ASSERT and UPRV_UNREACHABLE macros
+*
+******************************************************************************
+*/
+#ifndef U_ASSERT_H
+#define U_ASSERT_H
+
+/* utypes.h is included to get the proper define for uint8_t */
+#include "unicode/utypes.h"
+/* for abort */
+#include <stdlib.h>
+
+/**
+ * \def U_ASSERT
+ * By default, U_ASSERT just wraps the C library assert macro.
+ * By changing the definition here, the assert behavior for ICU can be changed
+ * without affecting other non - ICU uses of the C library assert().
+*/
+#if U_DEBUG
+# include <assert.h>
+# define U_ASSERT(exp) assert(exp)
+#elif U_CPLUSPLUS_VERSION
+# define U_ASSERT(exp) (void)0
+#else
+# define U_ASSERT(exp)
+#endif
+
+/**
+ * \def UPRV_UNREACHABLE
+ * This macro is used to unconditionally abort if unreachable code is ever executed.
+ * @internal
+*/
+#if defined(UPRV_UNREACHABLE)
+ // Use the predefined value.
+#else
+# define UPRV_UNREACHABLE abort()
+#endif
+
+#endif
diff --git a/thirdparty/icu4c/common/ubidi.cpp b/thirdparty/icu4c/common/ubidi.cpp
new file mode 100644
index 0000000000..3ddb45721e
--- /dev/null
+++ b/thirdparty/icu4c/common/ubidi.cpp
@@ -0,0 +1,3036 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: ubidi.c
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999jul27
+* created by: Markus W. Scherer, updated by Matitiahu Allouche
+*
+*/
+
+#include "cmemory.h"
+#include "unicode/utypes.h"
+#include "unicode/ustring.h"
+#include "unicode/uchar.h"
+#include "unicode/ubidi.h"
+#include "unicode/utf16.h"
+#include "ubidi_props.h"
+#include "ubidiimp.h"
+#include "uassert.h"
+
+/*
+ * General implementation notes:
+ *
+ * Throughout the implementation, there are comments like (W2) that refer to
+ * rules of the BiDi algorithm, in this example to the second rule of the
+ * resolution of weak types.
+ *
+ * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
+ * character according to UTF-16, the second UChar gets the directional property of
+ * the entire character assigned, while the first one gets a BN, a boundary
+ * neutral, type, which is ignored by most of the algorithm according to
+ * rule (X9) and the implementation suggestions of the BiDi algorithm.
+ *
+ * Later, adjustWSLevels() will set the level for each BN to that of the
+ * following character (UChar), which results in surrogate pairs getting the
+ * same level on each of their surrogates.
+ *
+ * In a UTF-8 implementation, the same thing could be done: the last byte of
+ * a multi-byte sequence would get the "real" property, while all previous
+ * bytes of that sequence would get BN.
+ *
+ * It is not possible to assign all those parts of a character the same real
+ * property because this would fail in the resolution of weak types with rules
+ * that look at immediately surrounding types.
+ *
+ * As a related topic, this implementation does not remove Boundary Neutral
+ * types from the input, but ignores them wherever this is relevant.
+ * For example, the loop for the resolution of the weak types reads
+ * types until it finds a non-BN.
+ * Also, explicit embedding codes are neither changed into BN nor removed.
+ * They are only treated the same way real BNs are.
+ * As stated before, adjustWSLevels() takes care of them at the end.
+ * For the purpose of conformance, the levels of all these codes
+ * do not matter.
+ *
+ * Note that this implementation modifies the dirProps
+ * after the initial setup, when applying X5c (replace FSI by LRI or RLI),
+ * X6, N0 (replace paired brackets by L or R).
+ *
+ * In this implementation, the resolution of weak types (W1 to W6),
+ * neutrals (N1 and N2), and the assignment of the resolved level (In)
+ * are all done in one single loop, in resolveImplicitLevels().
+ * Changes of dirProp values are done on the fly, without writing
+ * them back to the dirProps array.
+ *
+ *
+ * This implementation contains code that allows to bypass steps of the
+ * algorithm that are not needed on the specific paragraph
+ * in order to speed up the most common cases considerably,
+ * like text that is entirely LTR, or RTL text without numbers.
+ *
+ * Most of this is done by setting a bit for each directional property
+ * in a flags variable and later checking for whether there are
+ * any LTR characters or any RTL characters, or both, whether
+ * there are any explicit embedding codes, etc.
+ *
+ * If the (Xn) steps are performed, then the flags are re-evaluated,
+ * because they will then not contain the embedding codes any more
+ * and will be adjusted for override codes, so that subsequently
+ * more bypassing may be possible than what the initial flags suggested.
+ *
+ * If the text is not mixed-directional, then the
+ * algorithm steps for the weak type resolution are not performed,
+ * and all levels are set to the paragraph level.
+ *
+ * If there are no explicit embedding codes, then the (Xn) steps
+ * are not performed.
+ *
+ * If embedding levels are supplied as a parameter, then all
+ * explicit embedding codes are ignored, and the (Xn) steps
+ * are not performed.
+ *
+ * White Space types could get the level of the run they belong to,
+ * and are checked with a test of (flags&MASK_EMBEDDING) to
+ * consider if the paragraph direction should be considered in
+ * the flags variable.
+ *
+ * If there are no White Space types in the paragraph, then
+ * (L1) is not necessary in adjustWSLevels().
+ */
+
+/* to avoid some conditional statements, use tiny constant arrays */
+static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
+static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
+static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
+
+#define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
+#define DIRPROP_FLAG_E(level) flagE[(level)&1]
+#define DIRPROP_FLAG_O(level) flagO[(level)&1]
+
+#define DIR_FROM_STRONG(strong) ((strong)==L ? L : R)
+
+#define NO_OVERRIDE(level) ((level)&~UBIDI_LEVEL_OVERRIDE)
+
+/* UBiDi object management -------------------------------------------------- */
+
+U_CAPI UBiDi * U_EXPORT2
+ubidi_open(void)
+{
+ UErrorCode errorCode=U_ZERO_ERROR;
+ return ubidi_openSized(0, 0, &errorCode);
+}
+
+U_CAPI UBiDi * U_EXPORT2
+ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
+ UBiDi *pBiDi;
+
+ /* check the argument values */
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return NULL;
+ } else if(maxLength<0 || maxRunCount<0) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL; /* invalid arguments */
+ }
+
+ /* allocate memory for the object */
+ pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
+ if(pBiDi==NULL) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+
+ /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
+ uprv_memset(pBiDi, 0, sizeof(UBiDi));
+
+ /* allocate memory for arrays as requested */
+ if(maxLength>0) {
+ if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
+ !getInitialLevelsMemory(pBiDi, maxLength)
+ ) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ }
+ } else {
+ pBiDi->mayAllocateText=TRUE;
+ }
+
+ if(maxRunCount>0) {
+ if(maxRunCount==1) {
+ /* use simpleRuns[] */
+ pBiDi->runsSize=sizeof(Run);
+ } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ }
+ } else {
+ pBiDi->mayAllocateRuns=TRUE;
+ }
+
+ if(U_SUCCESS(*pErrorCode)) {
+ return pBiDi;
+ } else {
+ ubidi_close(pBiDi);
+ return NULL;
+ }
+}
+
+/*
+ * We are allowed to allocate memory if memory==NULL or
+ * mayAllocate==TRUE for each array that we need.
+ * We also try to grow memory as needed if we
+ * allocate it.
+ *
+ * Assume sizeNeeded>0.
+ * If *pMemory!=NULL, then assume *pSize>0.
+ *
+ * ### this realloc() may unnecessarily copy the old data,
+ * which we know we don't need any more;
+ * is this the best way to do this??
+ */
+U_CFUNC UBool
+ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) {
+ void **pMemory = (void **)bidiMem;
+ /* check for existing memory */
+ if(*pMemory==NULL) {
+ /* we need to allocate memory */
+ if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
+ *pSize=sizeNeeded;
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+ } else {
+ if(sizeNeeded<=*pSize) {
+ /* there is already enough memory */
+ return TRUE;
+ }
+ else if(!mayAllocate) {
+ /* not enough memory, and we must not allocate */
+ return FALSE;
+ } else {
+ /* we try to grow */
+ void *memory;
+ /* in most cases, we do not need the copy-old-data part of
+ * realloc, but it is needed when adding runs using getRunsMemory()
+ * in setParaRunsOnly()
+ */
+ if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
+ *pMemory=memory;
+ *pSize=sizeNeeded;
+ return TRUE;
+ } else {
+ /* we failed to grow */
+ return FALSE;
+ }
+ }
+ }
+}
+
+U_CAPI void U_EXPORT2
+ubidi_close(UBiDi *pBiDi) {
+ if(pBiDi!=NULL) {
+ pBiDi->pParaBiDi=NULL; /* in case one tries to reuse this block */
+ if(pBiDi->dirPropsMemory!=NULL) {
+ uprv_free(pBiDi->dirPropsMemory);
+ }
+ if(pBiDi->levelsMemory!=NULL) {
+ uprv_free(pBiDi->levelsMemory);
+ }
+ if(pBiDi->openingsMemory!=NULL) {
+ uprv_free(pBiDi->openingsMemory);
+ }
+ if(pBiDi->parasMemory!=NULL) {
+ uprv_free(pBiDi->parasMemory);
+ }
+ if(pBiDi->runsMemory!=NULL) {
+ uprv_free(pBiDi->runsMemory);
+ }
+ if(pBiDi->isolatesMemory!=NULL) {
+ uprv_free(pBiDi->isolatesMemory);
+ }
+ if(pBiDi->insertPoints.points!=NULL) {
+ uprv_free(pBiDi->insertPoints.points);
+ }
+
+ uprv_free(pBiDi);
+ }
+}
+
+/* set to approximate "inverse BiDi" ---------------------------------------- */
+
+U_CAPI void U_EXPORT2
+ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
+ if(pBiDi!=NULL) {
+ pBiDi->isInverse=isInverse;
+ pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
+ : UBIDI_REORDER_DEFAULT;
+ }
+}
+
+U_CAPI UBool U_EXPORT2
+ubidi_isInverse(UBiDi *pBiDi) {
+ if(pBiDi!=NULL) {
+ return pBiDi->isInverse;
+ } else {
+ return FALSE;
+ }
+}
+
+/* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
+ * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
+ * concept of RUNS_ONLY which is a double operation.
+ * It could be advantageous to divide this into 3 concepts:
+ * a) Operation: direct / inverse / RUNS_ONLY
+ * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
+ * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
+ * This would allow combinations not possible today like RUNS_ONLY with
+ * NUMBERS_SPECIAL.
+ * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
+ * REMOVE_CONTROLS for the inverse step.
+ * Not all combinations would be supported, and probably not all do make sense.
+ * This would need to document which ones are supported and what are the
+ * fallbacks for unsupported combinations.
+ */
+U_CAPI void U_EXPORT2
+ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) {
+ if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT)
+ && (reorderingMode < UBIDI_REORDER_COUNT)) {
+ pBiDi->reorderingMode = reorderingMode;
+ pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L);
+ }
+}
+
+U_CAPI UBiDiReorderingMode U_EXPORT2
+ubidi_getReorderingMode(UBiDi *pBiDi) {
+ if (pBiDi!=NULL) {
+ return pBiDi->reorderingMode;
+ } else {
+ return UBIDI_REORDER_DEFAULT;
+ }
+}
+
+U_CAPI void U_EXPORT2
+ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) {
+ if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
+ reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
+ }
+ if (pBiDi!=NULL) {
+ pBiDi->reorderingOptions=reorderingOptions;
+ }
+}
+
+U_CAPI uint32_t U_EXPORT2
+ubidi_getReorderingOptions(UBiDi *pBiDi) {
+ if (pBiDi!=NULL) {
+ return pBiDi->reorderingOptions;
+ } else {
+ return 0;
+ }
+}
+
+U_CAPI UBiDiDirection U_EXPORT2
+ubidi_getBaseDirection(const UChar *text,
+int32_t length){
+
+ int32_t i;
+ UChar32 uchar;
+ UCharDirection dir;
+
+ if( text==NULL || length<-1 ){
+ return UBIDI_NEUTRAL;
+ }
+
+ if(length==-1) {
+ length=u_strlen(text);
+ }
+
+ for( i = 0 ; i < length; ) {
+ /* i is incremented by U16_NEXT */
+ U16_NEXT(text, i, length, uchar);
+ dir = u_charDirection(uchar);
+ if( dir == U_LEFT_TO_RIGHT )
+ return UBIDI_LTR;
+ if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC )
+ return UBIDI_RTL;
+ }
+ return UBIDI_NEUTRAL;
+}
+
+/* perform (P2)..(P3) ------------------------------------------------------- */
+
+/**
+ * Returns the directionality of the first strong character
+ * after the last B in prologue, if any.
+ * Requires prologue!=null.
+ */
+static DirProp
+firstL_R_AL(UBiDi *pBiDi) {
+ const UChar *text=pBiDi->prologue;
+ int32_t length=pBiDi->proLength;
+ int32_t i;
+ UChar32 uchar;
+ DirProp dirProp, result=ON;
+ for(i=0; i<length; ) {
+ /* i is incremented by U16_NEXT */
+ U16_NEXT(text, i, length, uchar);
+ dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
+ if(result==ON) {
+ if(dirProp==L || dirProp==R || dirProp==AL) {
+ result=dirProp;
+ }
+ } else {
+ if(dirProp==B) {
+ result=ON;
+ }
+ }
+ }
+ return result;
+}
+
+/*
+ * Check that there are enough entries in the array pointed to by pBiDi->paras
+ */
+static UBool
+checkParaCount(UBiDi *pBiDi) {
+ int32_t count=pBiDi->paraCount;
+ if(pBiDi->paras==pBiDi->simpleParas) {
+ if(count<=SIMPLE_PARAS_COUNT)
+ return TRUE;
+ if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_COUNT * 2))
+ return FALSE;
+ pBiDi->paras=pBiDi->parasMemory;
+ uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_COUNT * sizeof(Para));
+ return TRUE;
+ }
+ if(!getInitialParasMemory(pBiDi, count * 2))
+ return FALSE;
+ pBiDi->paras=pBiDi->parasMemory;
+ return TRUE;
+}
+
+/*
+ * Get the directional properties for the text, calculate the flags bit-set, and
+ * determine the paragraph level if necessary (in pBiDi->paras[i].level).
+ * FSI initiators are also resolved and their dirProp replaced with LRI or RLI.
+ * When encountering an FSI, it is initially replaced with an LRI, which is the
+ * default. Only if a strong R or AL is found within its scope will the LRI be
+ * replaced by an RLI.
+ */
+static UBool
+getDirProps(UBiDi *pBiDi) {
+ const UChar *text=pBiDi->text;
+ DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */
+
+ int32_t i=0, originalLength=pBiDi->originalLength;
+ Flags flags=0; /* collect all directionalities in the text */
+ UChar32 uchar;
+ DirProp dirProp=0, defaultParaLevel=0; /* initialize to avoid compiler warnings */
+ UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel);
+ /* for inverse BiDi, the default para level is set to RTL if there is a
+ strong R or AL character at either end of the text */
+ UBool isDefaultLevelInverse=isDefaultLevel && (UBool)
+ (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
+ pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
+ int32_t lastArabicPos=-1;
+ int32_t controlCount=0;
+ UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions &
+ UBIDI_OPTION_REMOVE_CONTROLS);
+
+ enum State {
+ NOT_SEEKING_STRONG, /* 0: not contextual paraLevel, not after FSI */
+ SEEKING_STRONG_FOR_PARA, /* 1: looking for first strong char in para */
+ SEEKING_STRONG_FOR_FSI, /* 2: looking for first strong after FSI */
+ LOOKING_FOR_PDI /* 3: found strong after FSI, looking for PDI */
+ };
+ State state;
+ DirProp lastStrong=ON; /* for default level & inverse BiDi */
+ /* The following stacks are used to manage isolate sequences. Those
+ sequences may be nested, but obviously never more deeply than the
+ maximum explicit embedding level.
+ lastStack is the index of the last used entry in the stack. A value of -1
+ means that there is no open isolate sequence.
+ lastStack is reset to -1 on paragraph boundaries. */
+ /* The following stack contains the position of the initiator of
+ each open isolate sequence */
+ int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
+ /* The following stack contains the last known state before
+ encountering the initiator of an isolate sequence */
+ State previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
+ int32_t stackLast=-1;
+
+ if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING)
+ pBiDi->length=0;
+ defaultParaLevel=pBiDi->paraLevel&1;
+ if(isDefaultLevel) {
+ pBiDi->paras[0].level=defaultParaLevel;
+ lastStrong=defaultParaLevel;
+ if(pBiDi->proLength>0 && /* there is a prologue */
+ (dirProp=firstL_R_AL(pBiDi))!=ON) { /* with a strong character */
+ if(dirProp==L)
+ pBiDi->paras[0].level=0; /* set the default para level */
+ else
+ pBiDi->paras[0].level=1; /* set the default para level */
+ state=NOT_SEEKING_STRONG;
+ } else {
+ state=SEEKING_STRONG_FOR_PARA;
+ }
+ } else {
+ pBiDi->paras[0].level=pBiDi->paraLevel;
+ state=NOT_SEEKING_STRONG;
+ }
+ /* count paragraphs and determine the paragraph level (P2..P3) */
+ /*
+ * see comment in ubidi.h:
+ * the UBIDI_DEFAULT_XXX values are designed so that
+ * their bit 0 alone yields the intended default
+ */
+ for( /* i=0 above */ ; i<originalLength; ) {
+ /* i is incremented by U16_NEXT */
+ U16_NEXT(text, i, originalLength, uchar);
+ flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
+ dirProps[i-1]=dirProp;
+ if(uchar>0xffff) { /* set the lead surrogate's property to BN */
+ flags|=DIRPROP_FLAG(BN);
+ dirProps[i-2]=BN;
+ }
+ if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar))
+ controlCount++;
+ if(dirProp==L) {
+ if(state==SEEKING_STRONG_FOR_PARA) {
+ pBiDi->paras[pBiDi->paraCount-1].level=0;
+ state=NOT_SEEKING_STRONG;
+ }
+ else if(state==SEEKING_STRONG_FOR_FSI) {
+ if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
+ /* no need for next statement, already set by default */
+ /* dirProps[isolateStartStack[stackLast]]=LRI; */
+ flags|=DIRPROP_FLAG(LRI);
+ }
+ state=LOOKING_FOR_PDI;
+ }
+ lastStrong=L;
+ continue;
+ }
+ if(dirProp==R || dirProp==AL) {
+ if(state==SEEKING_STRONG_FOR_PARA) {
+ pBiDi->paras[pBiDi->paraCount-1].level=1;
+ state=NOT_SEEKING_STRONG;
+ }
+ else if(state==SEEKING_STRONG_FOR_FSI) {
+ if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
+ dirProps[isolateStartStack[stackLast]]=RLI;
+ flags|=DIRPROP_FLAG(RLI);
+ }
+ state=LOOKING_FOR_PDI;
+ }
+ lastStrong=R;
+ if(dirProp==AL)
+ lastArabicPos=i-1;
+ continue;
+ }
+ if(dirProp>=FSI && dirProp<=RLI) { /* FSI, LRI or RLI */
+ stackLast++;
+ if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
+ isolateStartStack[stackLast]=i-1;
+ previousStateStack[stackLast]=state;
+ }
+ if(dirProp==FSI) {
+ dirProps[i-1]=LRI; /* default if no strong char */
+ state=SEEKING_STRONG_FOR_FSI;
+ }
+ else
+ state=LOOKING_FOR_PDI;
+ continue;
+ }
+ if(dirProp==PDI) {
+ if(state==SEEKING_STRONG_FOR_FSI) {
+ if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
+ /* no need for next statement, already set by default */
+ /* dirProps[isolateStartStack[stackLast]]=LRI; */
+ flags|=DIRPROP_FLAG(LRI);
+ }
+ }
+ if(stackLast>=0) {
+ if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL)
+ state=previousStateStack[stackLast];
+ stackLast--;
+ }
+ continue;
+ }
+ if(dirProp==B) {
+ if(i<originalLength && uchar==CR && text[i]==LF) /* do nothing on the CR */
+ continue;
+ pBiDi->paras[pBiDi->paraCount-1].limit=i;
+ if(isDefaultLevelInverse && lastStrong==R)
+ pBiDi->paras[pBiDi->paraCount-1].level=1;
+ if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
+ /* When streaming, we only process whole paragraphs
+ thus some updates are only done on paragraph boundaries */
+ pBiDi->length=i; /* i is index to next character */
+ pBiDi->controlCount=controlCount;
+ }
+ if(i<originalLength) { /* B not last char in text */
+ pBiDi->paraCount++;
+ if(checkParaCount(pBiDi)==FALSE) /* not enough memory for a new para entry */
+ return FALSE;
+ if(isDefaultLevel) {
+ pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel;
+ state=SEEKING_STRONG_FOR_PARA;
+ lastStrong=defaultParaLevel;
+ } else {
+ pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel;
+ state=NOT_SEEKING_STRONG;
+ }
+ stackLast=-1;
+ }
+ continue;
+ }
+ }
+ /* Ignore still open isolate sequences with overflow */
+ if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) {
+ stackLast=UBIDI_MAX_EXPLICIT_LEVEL;
+ state=SEEKING_STRONG_FOR_FSI; /* to be on the safe side */
+ }
+ /* Resolve direction of still unresolved open FSI sequences */
+ while(stackLast>=0) {
+ if(state==SEEKING_STRONG_FOR_FSI) {
+ /* no need for next statement, already set by default */
+ /* dirProps[isolateStartStack[stackLast]]=LRI; */
+ flags|=DIRPROP_FLAG(LRI);
+ break;
+ }
+ state=previousStateStack[stackLast];
+ stackLast--;
+ }
+ /* When streaming, ignore text after the last paragraph separator */
+ if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
+ if(pBiDi->length<originalLength)
+ pBiDi->paraCount--;
+ } else {
+ pBiDi->paras[pBiDi->paraCount-1].limit=originalLength;
+ pBiDi->controlCount=controlCount;
+ }
+ /* For inverse bidi, default para direction is RTL if there is
+ a strong R or AL at either end of the paragraph */
+ if(isDefaultLevelInverse && lastStrong==R) {
+ pBiDi->paras[pBiDi->paraCount-1].level=1;
+ }
+ if(isDefaultLevel) {
+ pBiDi->paraLevel=static_cast<UBiDiLevel>(pBiDi->paras[0].level);
+ }
+ /* The following is needed to resolve the text direction for default level
+ paragraphs containing no strong character */
+ for(i=0; i<pBiDi->paraCount; i++)
+ flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level);
+
+ if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
+ flags|=DIRPROP_FLAG(L);
+ }
+ pBiDi->flags=flags;
+ pBiDi->lastArabicPos=lastArabicPos;
+ return TRUE;
+}
+
+/* determine the paragraph level at position index */
+U_CFUNC UBiDiLevel
+ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) {
+ int32_t i;
+ for(i=0; i<pBiDi->paraCount; i++)
+ if(pindex<pBiDi->paras[i].limit)
+ break;
+ if(i>=pBiDi->paraCount)
+ i=pBiDi->paraCount-1;
+ return (UBiDiLevel)(pBiDi->paras[i].level);
+}
+
+/* Functions for handling paired brackets ----------------------------------- */
+
+/* In the isoRuns array, the first entry is used for text outside of any
+ isolate sequence. Higher entries are used for each more deeply nested
+ isolate sequence. isoRunLast is the index of the last used entry. The
+ openings array is used to note the data of opening brackets not yet
+ matched by a closing bracket, or matched but still susceptible to change
+ level.
+ Each isoRun entry contains the index of the first and
+ one-after-last openings entries for pending opening brackets it
+ contains. The next openings entry to use is the one-after-last of the
+ most deeply nested isoRun entry.
+ isoRun entries also contain their current embedding level and the last
+ encountered strong character, since these will be needed to resolve
+ the level of paired brackets. */
+
+static void
+bracketInit(UBiDi *pBiDi, BracketData *bd) {
+ bd->pBiDi=pBiDi;
+ bd->isoRunLast=0;
+ bd->isoRuns[0].start=0;
+ bd->isoRuns[0].limit=0;
+ bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0);
+ UBiDiLevel t = GET_PARALEVEL(pBiDi, 0) & 1;
+ bd->isoRuns[0].lastStrong = bd->isoRuns[0].lastBase = t;
+ bd->isoRuns[0].contextDir = (UBiDiDirection)t;
+ bd->isoRuns[0].contextPos=0;
+ if(pBiDi->openingsMemory) {
+ bd->openings=pBiDi->openingsMemory;
+ bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
+ } else {
+ bd->openings=bd->simpleOpenings;
+ bd->openingsCount=SIMPLE_OPENINGS_COUNT;
+ }
+ bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL ||
+ bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL;
+}
+
+/* paragraph boundary */
+static void
+bracketProcessB(BracketData *bd, UBiDiLevel level) {
+ bd->isoRunLast=0;
+ bd->isoRuns[0].limit=0;
+ bd->isoRuns[0].level=level;
+ bd->isoRuns[0].lastStrong=bd->isoRuns[0].lastBase=level&1;
+ bd->isoRuns[0].contextDir=(UBiDiDirection)(level&1);
+ bd->isoRuns[0].contextPos=0;
+}
+
+/* LRE, LRO, RLE, RLO, PDF */
+static void
+bracketProcessBoundary(BracketData *bd, int32_t lastCcPos,
+ UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) {
+ IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
+ DirProp *dirProps=bd->pBiDi->dirProps;
+ if(DIRPROP_FLAG(dirProps[lastCcPos])&MASK_ISO) /* after an isolate */
+ return;
+ if(NO_OVERRIDE(embeddingLevel)>NO_OVERRIDE(contextLevel)) /* not a PDF */
+ contextLevel=embeddingLevel;
+ pLastIsoRun->limit=pLastIsoRun->start;
+ pLastIsoRun->level=embeddingLevel;
+ pLastIsoRun->lastStrong=pLastIsoRun->lastBase=contextLevel&1;
+ pLastIsoRun->contextDir=(UBiDiDirection)(contextLevel&1);
+ pLastIsoRun->contextPos=(UBiDiDirection)lastCcPos;
+}
+
+/* LRI or RLI */
+static void
+bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) {
+ IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
+ int16_t lastLimit;
+ pLastIsoRun->lastBase=ON;
+ lastLimit=pLastIsoRun->limit;
+ bd->isoRunLast++;
+ pLastIsoRun++;
+ pLastIsoRun->start=pLastIsoRun->limit=lastLimit;
+ pLastIsoRun->level=level;
+ pLastIsoRun->lastStrong=pLastIsoRun->lastBase=level&1;
+ pLastIsoRun->contextDir=(UBiDiDirection)(level&1);
+ pLastIsoRun->contextPos=0;
+}
+
+/* PDI */
+static void
+bracketProcessPDI(BracketData *bd) {
+ IsoRun *pLastIsoRun;
+ bd->isoRunLast--;
+ pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
+ pLastIsoRun->lastBase=ON;
+}
+
+/* newly found opening bracket: create an openings entry */
+static UBool /* return TRUE if success */
+bracketAddOpening(BracketData *bd, UChar match, int32_t position) {
+ IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
+ Opening *pOpening;
+ if(pLastIsoRun->limit>=bd->openingsCount) { /* no available new entry */
+ UBiDi *pBiDi=bd->pBiDi;
+ if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2))
+ return FALSE;
+ if(bd->openings==bd->simpleOpenings)
+ uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings,
+ SIMPLE_OPENINGS_COUNT * sizeof(Opening));
+ bd->openings=pBiDi->openingsMemory; /* may have changed */
+ bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
+ }
+ pOpening=&bd->openings[pLastIsoRun->limit];
+ pOpening->position=position;
+ pOpening->match=match;
+ pOpening->contextDir=pLastIsoRun->contextDir;
+ pOpening->contextPos=pLastIsoRun->contextPos;
+ pOpening->flags=0;
+ pLastIsoRun->limit++;
+ return TRUE;
+}
+
+/* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */
+static void
+fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) {
+ /* This function calls itself recursively */
+ IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
+ Opening *qOpening;
+ DirProp *dirProps=bd->pBiDi->dirProps;
+ int32_t k, openingPosition, closingPosition;
+ for(k=openingIndex+1, qOpening=&bd->openings[k]; k<pLastIsoRun->limit; k++, qOpening++) {
+ if(qOpening->match>=0) /* not an N0c match */
+ continue;
+ if(newPropPosition<qOpening->contextPos)
+ break;
+ if(newPropPosition>=qOpening->position)
+ continue;
+ if(newProp==qOpening->contextDir)
+ break;
+ openingPosition=qOpening->position;
+ dirProps[openingPosition]=newProp;
+ closingPosition=-(qOpening->match);
+ dirProps[closingPosition]=newProp;
+ qOpening->match=0; /* prevent further changes */
+ fixN0c(bd, k, openingPosition, newProp);
+ fixN0c(bd, k, closingPosition, newProp);
+ }
+}
+
+/* process closing bracket */
+static DirProp /* return L or R if N0b or N0c, ON if N0d */
+bracketProcessClosing(BracketData *bd, int32_t openIdx, int32_t position) {
+ IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
+ Opening *pOpening, *qOpening;
+ UBiDiDirection direction;
+ UBool stable;
+ DirProp newProp;
+ pOpening=&bd->openings[openIdx];
+ direction=(UBiDiDirection)(pLastIsoRun->level&1);
+ stable=TRUE; /* assume stable until proved otherwise */
+
+ /* The stable flag is set when brackets are paired and their
+ level is resolved and cannot be changed by what will be
+ found later in the source string.
+ An unstable match can occur only when applying N0c, where
+ the resolved level depends on the preceding context, and
+ this context may be affected by text occurring later.
+ Example: RTL paragraph containing: abc[(latin) HEBREW]
+ When the closing parenthesis is encountered, it appears
+ that N0c1 must be applied since 'abc' sets an opposite
+ direction context and both parentheses receive level 2.
+ However, when the closing square bracket is processed,
+ N0b applies because of 'HEBREW' being included within the
+ brackets, thus the square brackets are treated like R and
+ receive level 1. However, this changes the preceding
+ context of the opening parenthesis, and it now appears
+ that N0c2 must be applied to the parentheses rather than
+ N0c1. */
+
+ if((direction==0 && pOpening->flags&FOUND_L) ||
+ (direction==1 && pOpening->flags&FOUND_R)) { /* N0b */
+ newProp=static_cast<DirProp>(direction);
+ }
+ else if(pOpening->flags&(FOUND_L|FOUND_R)) { /* N0c */
+ /* it is stable if there is no containing pair or in
+ conditions too complicated and not worth checking */
+ stable=(openIdx==pLastIsoRun->start);
+ if(direction!=pOpening->contextDir)
+ newProp= static_cast<DirProp>(pOpening->contextDir); /* N0c1 */
+ else
+ newProp= static_cast<DirProp>(direction); /* N0c2 */
+ } else {
+ /* forget this and any brackets nested within this pair */
+ pLastIsoRun->limit= static_cast<uint16_t>(openIdx);
+ return ON; /* N0d */
+ }
+ bd->pBiDi->dirProps[pOpening->position]=newProp;
+ bd->pBiDi->dirProps[position]=newProp;
+ /* Update nested N0c pairs that may be affected */
+ fixN0c(bd, openIdx, pOpening->position, newProp);
+ if(stable) {
+ pLastIsoRun->limit= static_cast<uint16_t>(openIdx); /* forget any brackets nested within this pair */
+ /* remove lower located synonyms if any */
+ while(pLastIsoRun->limit>pLastIsoRun->start &&
+ bd->openings[pLastIsoRun->limit-1].position==pOpening->position)
+ pLastIsoRun->limit--;
+ } else {
+ int32_t k;
+ pOpening->match=-position;
+ /* neutralize lower located synonyms if any */
+ k=openIdx-1;
+ while(k>=pLastIsoRun->start &&
+ bd->openings[k].position==pOpening->position)
+ bd->openings[k--].match=0;
+ /* neutralize any unmatched opening between the current pair;
+ this will also neutralize higher located synonyms if any */
+ for(k=openIdx+1; k<pLastIsoRun->limit; k++) {
+ qOpening=&bd->openings[k];
+ if(qOpening->position>=position)
+ break;
+ if(qOpening->match>0)
+ qOpening->match=0;
+ }
+ }
+ return newProp;
+}
+
+/* handle strong characters, digits and candidates for closing brackets */
+static UBool /* return TRUE if success */
+bracketProcessChar(BracketData *bd, int32_t position) {
+ IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
+ DirProp *dirProps, dirProp, newProp;
+ UBiDiLevel level;
+ dirProps=bd->pBiDi->dirProps;
+ dirProp=dirProps[position];
+ if(dirProp==ON) {
+ UChar c, match;
+ int32_t idx;
+ /* First see if it is a matching closing bracket. Hopefully, this is
+ more efficient than checking if it is a closing bracket at all */
+ c=bd->pBiDi->text[position];
+ for(idx=pLastIsoRun->limit-1; idx>=pLastIsoRun->start; idx--) {
+ if(bd->openings[idx].match!=c)
+ continue;
+ /* We have a match */
+ newProp=bracketProcessClosing(bd, idx, position);
+ if(newProp==ON) { /* N0d */
+ c=0; /* prevent handling as an opening */
+ break;
+ }
+ pLastIsoRun->lastBase=ON;
+ pLastIsoRun->contextDir=(UBiDiDirection)newProp;
+ pLastIsoRun->contextPos=position;
+ level=bd->pBiDi->levels[position];
+ if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */
+ uint16_t flag;
+ int32_t i;
+ newProp=level&1;
+ pLastIsoRun->lastStrong=newProp;
+ flag=DIRPROP_FLAG(newProp);
+ for(i=pLastIsoRun->start; i<idx; i++)
+ bd->openings[i].flags|=flag;
+ /* matching brackets are not overridden by LRO/RLO */
+ bd->pBiDi->levels[position]&=~UBIDI_LEVEL_OVERRIDE;
+ }
+ /* matching brackets are not overridden by LRO/RLO */
+ bd->pBiDi->levels[bd->openings[idx].position]&=~UBIDI_LEVEL_OVERRIDE;
+ return TRUE;
+ }
+ /* We get here only if the ON character is not a matching closing
+ bracket or it is a case of N0d */
+ /* Now see if it is an opening bracket */
+ if(c)
+ match= static_cast<UChar>(u_getBidiPairedBracket(c)); /* get the matching char */
+ else
+ match=0;
+ if(match!=c && /* has a matching char */
+ ubidi_getPairedBracketType(c)==U_BPT_OPEN) { /* opening bracket */
+ /* special case: process synonyms
+ create an opening entry for each synonym */
+ if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */
+ if(!bracketAddOpening(bd, 0x3009, position))
+ return FALSE;
+ }
+ else if(match==0x3009) { /* RIGHT ANGLE BRACKET */
+ if(!bracketAddOpening(bd, 0x232A, position))
+ return FALSE;
+ }
+ if(!bracketAddOpening(bd, match, position))
+ return FALSE;
+ }
+ }
+ level=bd->pBiDi->levels[position];
+ if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */
+ newProp=level&1;
+ if(dirProp!=S && dirProp!=WS && dirProp!=ON)
+ dirProps[position]=newProp;
+ pLastIsoRun->lastBase=newProp;
+ pLastIsoRun->lastStrong=newProp;
+ pLastIsoRun->contextDir=(UBiDiDirection)newProp;
+ pLastIsoRun->contextPos=position;
+ }
+ else if(dirProp<=R || dirProp==AL) {
+ newProp= static_cast<DirProp>(DIR_FROM_STRONG(dirProp));
+ pLastIsoRun->lastBase=dirProp;
+ pLastIsoRun->lastStrong=dirProp;
+ pLastIsoRun->contextDir=(UBiDiDirection)newProp;
+ pLastIsoRun->contextPos=position;
+ }
+ else if(dirProp==EN) {
+ pLastIsoRun->lastBase=EN;
+ if(pLastIsoRun->lastStrong==L) {
+ newProp=L; /* W7 */
+ if(!bd->isNumbersSpecial)
+ dirProps[position]=ENL;
+ pLastIsoRun->contextDir=(UBiDiDirection)L;
+ pLastIsoRun->contextPos=position;
+ }
+ else {
+ newProp=R; /* N0 */
+ if(pLastIsoRun->lastStrong==AL)
+ dirProps[position]=AN; /* W2 */
+ else
+ dirProps[position]=ENR;
+ pLastIsoRun->contextDir=(UBiDiDirection)R;
+ pLastIsoRun->contextPos=position;
+ }
+ }
+ else if(dirProp==AN) {
+ newProp=R; /* N0 */
+ pLastIsoRun->lastBase=AN;
+ pLastIsoRun->contextDir=(UBiDiDirection)R;
+ pLastIsoRun->contextPos=position;
+ }
+ else if(dirProp==NSM) {
+ /* if the last real char was ON, change NSM to ON so that it
+ will stay ON even if the last real char is a bracket which
+ may be changed to L or R */
+ newProp=pLastIsoRun->lastBase;
+ if(newProp==ON)
+ dirProps[position]=newProp;
+ }
+ else {
+ newProp=dirProp;
+ pLastIsoRun->lastBase=dirProp;
+ }
+ if(newProp<=R || newProp==AL) {
+ int32_t i;
+ uint16_t flag=DIRPROP_FLAG(DIR_FROM_STRONG(newProp));
+ for(i=pLastIsoRun->start; i<pLastIsoRun->limit; i++)
+ if(position>bd->openings[i].position)
+ bd->openings[i].flags|=flag;
+ }
+ return TRUE;
+}
+
+/* perform (X1)..(X9) ------------------------------------------------------- */
+
+/* determine if the text is mixed-directional or single-directional */
+static UBiDiDirection
+directionFromFlags(UBiDi *pBiDi) {
+ Flags flags=pBiDi->flags;
+ /* if the text contains AN and neutrals, then some neutrals may become RTL */
+ if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
+ return UBIDI_LTR;
+ } else if(!(flags&MASK_LTR)) {
+ return UBIDI_RTL;
+ } else {
+ return UBIDI_MIXED;
+ }
+}
+
+/*
+ * Resolve the explicit levels as specified by explicit embedding codes.
+ * Recalculate the flags to have them reflect the real properties
+ * after taking the explicit embeddings into account.
+ *
+ * The BiDi algorithm is designed to result in the same behavior whether embedding
+ * levels are externally specified (from "styled text", supposedly the preferred
+ * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
+ * That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
+ * However, in a real implementation, the removal of these codes and their index
+ * positions in the plain text is undesirable since it would result in
+ * reallocated, reindexed text.
+ * Instead, this implementation leaves the codes in there and just ignores them
+ * in the subsequent processing.
+ * In order to get the same reordering behavior, positions with a BN or a not-isolate
+ * explicit embedding code just get the same level assigned as the last "real"
+ * character.
+ *
+ * Some implementations, not this one, then overwrite some of these
+ * directionality properties at "real" same-level-run boundaries by
+ * L or R codes so that the resolution of weak types can be performed on the
+ * entire paragraph at once instead of having to parse it once more and
+ * perform that resolution on same-level-runs.
+ * This limits the scope of the implicit rules in effectively
+ * the same way as the run limits.
+ *
+ * Instead, this implementation does not modify these codes, except for
+ * paired brackets whose properties (ON) may be replaced by L or R.
+ * On one hand, the paragraph has to be scanned for same-level-runs, but
+ * on the other hand, this saves another loop to reset these codes,
+ * or saves making and modifying a copy of dirProps[].
+ *
+ *
+ * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
+ *
+ *
+ * Handling the stack of explicit levels (Xn):
+ *
+ * With the BiDi stack of explicit levels, as pushed with each
+ * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI,
+ * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL.
+ *
+ * In order to have a correct push-pop semantics even in the case of overflows,
+ * overflow counters and a valid isolate counter are used as described in UAX#9
+ * section 3.3.2 "Explicit Levels and Directions".
+ *
+ * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
+ *
+ * Returns normally the direction; -1 if there was a memory shortage
+ *
+ */
+static UBiDiDirection
+resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
+ DirProp *dirProps=pBiDi->dirProps;
+ UBiDiLevel *levels=pBiDi->levels;
+ const UChar *text=pBiDi->text;
+
+ int32_t i=0, length=pBiDi->length;
+ Flags flags=pBiDi->flags; /* collect all directionalities in the text */
+ DirProp dirProp;
+ UBiDiLevel level=GET_PARALEVEL(pBiDi, 0);
+ UBiDiDirection direction;
+ pBiDi->isolateCount=0;
+
+ if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; }
+
+ /* determine if the text is mixed-directional or single-directional */
+ direction=directionFromFlags(pBiDi);
+
+ /* we may not need to resolve any explicit levels */
+ if((direction!=UBIDI_MIXED)) {
+ /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
+ return direction;
+ }
+ if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) {
+ /* inverse BiDi: mixed, but all characters are at the same embedding level */
+ /* set all levels to the paragraph level */
+ int32_t paraIndex, start, limit;
+ for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
+ if(paraIndex==0)
+ start=0;
+ else
+ start=pBiDi->paras[paraIndex-1].limit;
+ limit=pBiDi->paras[paraIndex].limit;
+ level= static_cast<UBiDiLevel>(pBiDi->paras[paraIndex].level);
+ for(i=start; i<limit; i++)
+ levels[i]=level;
+ }
+ return direction; /* no bracket matching for inverse BiDi */
+ }
+ if(!(flags&(MASK_EXPLICIT|MASK_ISO))) {
+ /* no embeddings, set all levels to the paragraph level */
+ /* we still have to perform bracket matching */
+ int32_t paraIndex, start, limit;
+ BracketData bracketData;
+ bracketInit(pBiDi, &bracketData);
+ for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
+ if(paraIndex==0)
+ start=0;
+ else
+ start=pBiDi->paras[paraIndex-1].limit;
+ limit=pBiDi->paras[paraIndex].limit;
+ level= static_cast<UBiDiLevel>(pBiDi->paras[paraIndex].level);
+ for(i=start; i<limit; i++) {
+ levels[i]=level;
+ dirProp=dirProps[i];
+ if(dirProp==BN)
+ continue;
+ if(dirProp==B) {
+ if((i+1)<length) {
+ if(text[i]==CR && text[i+1]==LF)
+ continue; /* skip CR when followed by LF */
+ bracketProcessB(&bracketData, level);
+ }
+ continue;
+ }
+ if(!bracketProcessChar(&bracketData, i)) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return UBIDI_LTR;
+ }
+ }
+ }
+ return direction;
+ }
+ {
+ /* continue to perform (Xn) */
+
+ /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
+ /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
+ UBiDiLevel embeddingLevel=level, newLevel;
+ UBiDiLevel previousLevel=level; /* previous level for regular (not CC) characters */
+ int32_t lastCcPos=0; /* index of last effective LRx,RLx, PDx */
+
+ /* The following stack remembers the embedding level and the ISOLATE flag of level runs.
+ stackLast points to its current entry. */
+ uint16_t stack[UBIDI_MAX_EXPLICIT_LEVEL+2]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL
+ but we need one more entry as base */
+ uint32_t stackLast=0;
+ int32_t overflowIsolateCount=0;
+ int32_t overflowEmbeddingCount=0;
+ int32_t validIsolateCount=0;
+ BracketData bracketData;
+ bracketInit(pBiDi, &bracketData);
+ stack[0]=level; /* initialize base entry to para level, no override, no isolate */
+
+ /* recalculate the flags */
+ flags=0;
+
+ for(i=0; i<length; ++i) {
+ dirProp=dirProps[i];
+ switch(dirProp) {
+ case LRE:
+ case RLE:
+ case LRO:
+ case RLO:
+ /* (X2, X3, X4, X5) */
+ flags|=DIRPROP_FLAG(BN);
+ levels[i]=previousLevel;
+ if (dirProp==LRE || dirProp==LRO)
+ /* least greater even level */
+ newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
+ else
+ /* least greater odd level */
+ newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
+ if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
+ overflowEmbeddingCount==0) {
+ lastCcPos=i;
+ embeddingLevel=newLevel;
+ if(dirProp==LRO || dirProp==RLO)
+ embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
+ stackLast++;
+ stack[stackLast]=embeddingLevel;
+ /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE and RLE
+ since this has already been done for newLevel which is
+ the source for embeddingLevel.
+ */
+ } else {
+ if(overflowIsolateCount==0)
+ overflowEmbeddingCount++;
+ }
+ break;
+ case PDF:
+ /* (X7) */
+ flags|=DIRPROP_FLAG(BN);
+ levels[i]=previousLevel;
+ /* handle all the overflow cases first */
+ if(overflowIsolateCount) {
+ break;
+ }
+ if(overflowEmbeddingCount) {
+ overflowEmbeddingCount--;
+ break;
+ }
+ if(stackLast>0 && stack[stackLast]<ISOLATE) { /* not an isolate entry */
+ lastCcPos=i;
+ stackLast--;
+ embeddingLevel=(UBiDiLevel)stack[stackLast];
+ }
+ break;
+ case LRI:
+ case RLI:
+ flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
+ levels[i]=NO_OVERRIDE(embeddingLevel);
+ if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
+ bracketProcessBoundary(&bracketData, lastCcPos,
+ previousLevel, embeddingLevel);
+ flags|=DIRPROP_FLAG_MULTI_RUNS;
+ }
+ previousLevel=embeddingLevel;
+ /* (X5a, X5b) */
+ if(dirProp==LRI)
+ /* least greater even level */
+ newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
+ else
+ /* least greater odd level */
+ newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
+ if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
+ overflowEmbeddingCount==0) {
+ flags|=DIRPROP_FLAG(dirProp);
+ lastCcPos=i;
+ validIsolateCount++;
+ if(validIsolateCount>pBiDi->isolateCount)
+ pBiDi->isolateCount=validIsolateCount;
+ embeddingLevel=newLevel;
+ /* we can increment stackLast without checking because newLevel
+ will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */
+ stackLast++;
+ stack[stackLast]=embeddingLevel+ISOLATE;
+ bracketProcessLRI_RLI(&bracketData, embeddingLevel);
+ } else {
+ /* make it WS so that it is handled by adjustWSLevels() */
+ dirProps[i]=WS;
+ overflowIsolateCount++;
+ }
+ break;
+ case PDI:
+ if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
+ bracketProcessBoundary(&bracketData, lastCcPos,
+ previousLevel, embeddingLevel);
+ flags|=DIRPROP_FLAG_MULTI_RUNS;
+ }
+ /* (X6a) */
+ if(overflowIsolateCount) {
+ overflowIsolateCount--;
+ /* make it WS so that it is handled by adjustWSLevels() */
+ dirProps[i]=WS;
+ }
+ else if(validIsolateCount) {
+ flags|=DIRPROP_FLAG(PDI);
+ lastCcPos=i;
+ overflowEmbeddingCount=0;
+ while(stack[stackLast]<ISOLATE) /* pop embedding entries */
+ stackLast--; /* until the last isolate entry */
+ stackLast--; /* pop also the last isolate entry */
+ validIsolateCount--;
+ bracketProcessPDI(&bracketData);
+ } else
+ /* make it WS so that it is handled by adjustWSLevels() */
+ dirProps[i]=WS;
+ embeddingLevel=(UBiDiLevel)stack[stackLast]&~ISOLATE;
+ flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
+ previousLevel=embeddingLevel;
+ levels[i]=NO_OVERRIDE(embeddingLevel);
+ break;
+ case B:
+ flags|=DIRPROP_FLAG(B);
+ levels[i]=GET_PARALEVEL(pBiDi, i);
+ if((i+1)<length) {
+ if(text[i]==CR && text[i+1]==LF)
+ break; /* skip CR when followed by LF */
+ overflowEmbeddingCount=overflowIsolateCount=0;
+ validIsolateCount=0;
+ stackLast=0;
+ previousLevel=embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
+ stack[0]=embeddingLevel; /* initialize base entry to para level, no override, no isolate */
+ bracketProcessB(&bracketData, embeddingLevel);
+ }
+ break;
+ case BN:
+ /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
+ /* they will get their levels set correctly in adjustWSLevels() */
+ levels[i]=previousLevel;
+ flags|=DIRPROP_FLAG(BN);
+ break;
+ default:
+ /* all other types are normal characters and get the "real" level */
+ if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
+ bracketProcessBoundary(&bracketData, lastCcPos,
+ previousLevel, embeddingLevel);
+ flags|=DIRPROP_FLAG_MULTI_RUNS;
+ if(embeddingLevel&UBIDI_LEVEL_OVERRIDE)
+ flags|=DIRPROP_FLAG_O(embeddingLevel);
+ else
+ flags|=DIRPROP_FLAG_E(embeddingLevel);
+ }
+ previousLevel=embeddingLevel;
+ levels[i]=embeddingLevel;
+ if(!bracketProcessChar(&bracketData, i))
+ return (UBiDiDirection)-1;
+ /* the dirProp may have been changed in bracketProcessChar() */
+ flags|=DIRPROP_FLAG(dirProps[i]);
+ break;
+ }
+ }
+ if(flags&MASK_EMBEDDING)
+ flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
+ if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B)))
+ flags|=DIRPROP_FLAG(L);
+ /* again, determine if the text is mixed-directional or single-directional */
+ pBiDi->flags=flags;
+ direction=directionFromFlags(pBiDi);
+ }
+ return direction;
+}
+
+/*
+ * Use a pre-specified embedding levels array:
+ *
+ * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
+ * ignore all explicit codes (X9),
+ * and check all the preset levels.
+ *
+ * Recalculate the flags to have them reflect the real properties
+ * after taking the explicit embeddings into account.
+ */
+static UBiDiDirection
+checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
+ DirProp *dirProps=pBiDi->dirProps;
+ UBiDiLevel *levels=pBiDi->levels;
+ int32_t isolateCount=0;
+
+ int32_t length=pBiDi->length;
+ Flags flags=0; /* collect all directionalities in the text */
+ pBiDi->isolateCount=0;
+
+ int32_t currentParaIndex = 0;
+ int32_t currentParaLimit = pBiDi->paras[0].limit;
+ int32_t currentParaLevel = pBiDi->paraLevel;
+
+ for(int32_t i=0; i<length; ++i) {
+ UBiDiLevel level=levels[i];
+ DirProp dirProp=dirProps[i];
+ if(dirProp==LRI || dirProp==RLI) {
+ isolateCount++;
+ if(isolateCount>pBiDi->isolateCount)
+ pBiDi->isolateCount=isolateCount;
+ }
+ else if(dirProp==PDI)
+ isolateCount--;
+ else if(dirProp==B)
+ isolateCount=0;
+
+ // optimized version of int32_t currentParaLevel = GET_PARALEVEL(pBiDi, i);
+ if (pBiDi->defaultParaLevel != 0 &&
+ i == currentParaLimit && (currentParaIndex + 1) < pBiDi->paraCount) {
+ currentParaLevel = pBiDi->paras[++currentParaIndex].level;
+ currentParaLimit = pBiDi->paras[currentParaIndex].limit;
+ }
+
+ UBiDiLevel overrideFlag = level & UBIDI_LEVEL_OVERRIDE;
+ level &= ~UBIDI_LEVEL_OVERRIDE;
+ if (level < currentParaLevel || UBIDI_MAX_EXPLICIT_LEVEL < level) {
+ if (level == 0) {
+ if (dirProp == B) {
+ // Paragraph separators are ok with explicit level 0.
+ // Prevents reordering of paragraphs.
+ } else {
+ // Treat explicit level 0 as a wildcard for the paragraph level.
+ // Avoid making the caller guess what the paragraph level would be.
+ level = (UBiDiLevel)currentParaLevel;
+ levels[i] = level | overrideFlag;
+ }
+ } else {
+ // 1 <= level < currentParaLevel or UBIDI_MAX_EXPLICIT_LEVEL < level
+ /* level out of bounds */
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return UBIDI_LTR;
+ }
+ }
+ if (overrideFlag != 0) {
+ /* keep the override flag in levels[i] but adjust the flags */
+ flags|=DIRPROP_FLAG_O(level);
+ } else {
+ /* set the flags */
+ flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
+ }
+ }
+ if(flags&MASK_EMBEDDING)
+ flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
+ /* determine if the text is mixed-directional or single-directional */
+ pBiDi->flags=flags;
+ return directionFromFlags(pBiDi);
+}
+
+/******************************************************************
+ The Properties state machine table
+*******************************************************************
+
+ All table cells are 8 bits:
+ bits 0..4: next state
+ bits 5..7: action to perform (if > 0)
+
+ Cells may be of format "n" where n represents the next state
+ (except for the rightmost column).
+ Cells may also be of format "s(x,y)" where x represents an action
+ to perform and y represents the next state.
+
+*******************************************************************
+ Definitions and type for properties state table
+*******************************************************************
+*/
+#define IMPTABPROPS_COLUMNS 16
+#define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
+#define GET_STATEPROPS(cell) ((cell)&0x1f)
+#define GET_ACTIONPROPS(cell) ((cell)>>5)
+#define s(action, newState) ((uint8_t)(newState+(action<<5)))
+
+static const uint8_t groupProp[] = /* dirProp regrouped */
+{
+/* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */
+ 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14
+};
+enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */
+
+/******************************************************************
+
+ PROPERTIES STATE TABLE
+
+ In table impTabProps,
+ - the ON column regroups ON and WS, FSI, RLI, LRI and PDI
+ - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
+ - the Res column is the reduced property assigned to a run
+
+ Action 1: process current run1, init new run1
+ 2: init new run2
+ 3: process run1, process run2, init new run1
+ 4: process run1, set run1=run2, init new run2
+
+ Notes:
+ 1) This table is used in resolveImplicitLevels().
+ 2) This table triggers actions when there is a change in the Bidi
+ property of incoming characters (action 1).
+ 3) Most such property sequences are processed immediately (in
+ fact, passed to processPropertySeq().
+ 4) However, numbers are assembled as one sequence. This means
+ that undefined situations (like CS following digits, until
+ it is known if the next char will be a digit) are held until
+ following chars define them.
+ Example: digits followed by CS, then comes another CS or ON;
+ the digits will be processed, then the CS assigned
+ as the start of an ON sequence (action 3).
+ 5) There are cases where more than one sequence must be
+ processed, for instance digits followed by CS followed by L:
+ the digits must be processed as one sequence, and the CS
+ must be processed as an ON sequence, all this before starting
+ assembling chars for the opening L sequence.
+
+
+*/
+static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
+{
+/* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , ENL , ENR , Res */
+/* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , 18 , 21 , DirProp_ON },
+/* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3),s(1,18),s(1,21), DirProp_L },
+/* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3),s(1,18),s(1,21), DirProp_R },
+/* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 ,s(1,18),s(1,21), DirProp_R },
+/* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), 18 , 21 , DirProp_EN },
+/* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3),s(1,18),s(1,21), DirProp_AN },
+/* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), 18 , 21 , DirProp_AN },
+/* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3),s(1,18),s(1,21), DirProp_ON },
+/* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3),s(1,18),s(1,21), DirProp_ON },
+/* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), 18 , 21 , DirProp_ON },
+/*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), 18 , 21 , DirProp_EN },
+/*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), 18 , 21 , DirProp_EN },
+/*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN },
+/*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), 18 , 21 , DirProp_AN },
+/*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3),s(4,18),s(4,21), DirProp_ON },
+/*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_S },
+/*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3),s(1,18),s(1,21), DirProp_S },
+/*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_B },
+/*18 ENL */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19), 20 ,s(2,19), 18 , 18 , s(1,3), 18 , 21 , DirProp_L },
+/*19 ENL+ES/CS */ { s(3,1), s(3,2), 18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 19 , s(4,7), s(3,3), 18 , 21 , DirProp_L },
+/*20 ENL+ET */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 20 , s(1,7), 20 , 20 , s(1,3), 18 , 21 , DirProp_L },
+/*21 ENR */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22), 23 ,s(2,22), 21 , 21 , s(1,3), 18 , 21 , DirProp_AN },
+/*22 ENR+ES/CS */ { s(3,1), s(3,2), 21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 22 , s(4,7), s(3,3), 18 , 21 , DirProp_AN },
+/*23 ENR+ET */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 23 , s(1,7), 23 , 23 , s(1,3), 18 , 21 , DirProp_AN }
+};
+
+/* we must undef macro s because the levels tables have a different
+ * structure (4 bits for action and 4 bits for next state.
+ */
+#undef s
+
+/******************************************************************
+ The levels state machine tables
+*******************************************************************
+
+ All table cells are 8 bits:
+ bits 0..3: next state
+ bits 4..7: action to perform (if > 0)
+
+ Cells may be of format "n" where n represents the next state
+ (except for the rightmost column).
+ Cells may also be of format "s(x,y)" where x represents an action
+ to perform and y represents the next state.
+
+ This format limits each table to 16 states each and to 15 actions.
+
+*******************************************************************
+ Definitions and type for levels state tables
+*******************************************************************
+*/
+#define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
+#define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
+#define GET_STATE(cell) ((cell)&0x0f)
+#define GET_ACTION(cell) ((cell)>>4)
+#define s(action, newState) ((uint8_t)(newState+(action<<4)))
+
+typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS];
+typedef uint8_t ImpAct[];
+
+/* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
+ * instead of having a pair of ImpTab and a pair of ImpAct.
+ */
+typedef struct ImpTabPair {
+ const void * pImpTab[2];
+ const void * pImpAct[2];
+} ImpTabPair;
+
+/******************************************************************
+
+ LEVELS STATE TABLES
+
+ In all levels state tables,
+ - state 0 is the initial state
+ - the Res column is the increment to add to the text level
+ for this property sequence.
+
+ The impAct arrays for each table of a pair map the local action
+ numbers of the table to the total list of actions. For instance,
+ action 2 in a given table corresponds to the action number which
+ appears in entry [2] of the impAct array for that table.
+ The first entry of all impAct arrays must be 0.
+
+ Action 1: init conditional sequence
+ 2: prepend conditional sequence to current sequence
+ 3: set ON sequence to new level - 1
+ 4: init EN/AN/ON sequence
+ 5: fix EN/AN/ON sequence followed by R
+ 6: set previous level sequence to level 2
+
+ Notes:
+ 1) These tables are used in processPropertySeq(). The input
+ is property sequences as determined by resolveImplicitLevels.
+ 2) Most such property sequences are processed immediately
+ (levels are assigned).
+ 3) However, some sequences cannot be assigned a final level till
+ one or more following sequences are received. For instance,
+ ON following an R sequence within an even-level paragraph.
+ If the following sequence is R, the ON sequence will be
+ assigned basic run level+1, and so will the R sequence.
+ 4) S is generally handled like ON, since its level will be fixed
+ to paragraph level in adjustWSLevels().
+
+*/
+
+static const ImpTab impTabL_DEFAULT = /* Even paragraph level */
+/* In this table, conditional sequences receive the lower possible level
+ until proven otherwise.
+*/
+{
+/* L , R , EN , AN , ON , S , B , Res */
+/* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 },
+/* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 },
+/* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 },
+/* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 },
+/* 4 : R+ON */ { 0 , s(2,1), s(3,3), s(3,3), 4 , 4 , 0 , 0 },
+/* 5 : AN+ON */ { 0 , s(2,1), 0 , s(3,2), 5 , 5 , 0 , 0 }
+};
+static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */
+/* In this table, conditional sequences receive the lower possible level
+ until proven otherwise.
+*/
+{
+/* L , R , EN , AN , ON , S , B , Res */
+/* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
+/* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 },
+/* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
+/* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 },
+/* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 },
+/* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 }
+};
+static const ImpAct impAct0 = {0,1,2,3,4};
+static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
+ &impTabR_DEFAULT},
+ {&impAct0, &impAct0}};
+
+static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */
+/* In this table, conditional sequences receive the lower possible level
+ until proven otherwise.
+*/
+{
+/* L , R , EN , AN , ON , S , B , Res */
+/* 0 : init */ { 0 , 2 , s(1,1), s(1,1), 0 , 0 , 0 , 0 },
+/* 1 : L+EN/AN */ { 0 , s(4,2), 1 , 1 , 0 , 0 , 0 , 0 },
+/* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 1 },
+/* 3 : R+ON */ { 0 , s(2,2), s(3,4), s(3,4), 3 , 3 , 0 , 0 },
+/* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 }
+};
+static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
+ &impTabR_DEFAULT},
+ {&impAct0, &impAct0}};
+
+static const ImpTab impTabL_GROUP_NUMBERS_WITH_R =
+/* In this table, EN/AN+ON sequences receive levels as if associated with R
+ until proven that there is L or sor/eor on both sides. AN is handled like EN.
+*/
+{
+/* L , R , EN , AN , ON , S , B , Res */
+/* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 },
+/* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 },
+/* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 },
+/* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 },
+/* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 },
+/* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 }
+};
+static const ImpTab impTabR_GROUP_NUMBERS_WITH_R =
+/* In this table, EN/AN+ON sequences receive levels as if associated with R
+ until proven that there is L on both sides. AN is handled like EN.
+*/
+{
+/* L , R , EN , AN , ON , S , B , Res */
+/* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
+/* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
+/* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 },
+/* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 },
+/* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 }
+};
+static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = {
+ {&impTabL_GROUP_NUMBERS_WITH_R,
+ &impTabR_GROUP_NUMBERS_WITH_R},
+ {&impAct0, &impAct0}};
+
+
+static const ImpTab impTabL_INVERSE_NUMBERS_AS_L =
+/* This table is identical to the Default LTR table except that EN and AN are
+ handled like L.
+*/
+{
+/* L , R , EN , AN , ON , S , B , Res */
+/* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 },
+/* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 },
+/* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 },
+/* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 },
+/* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 },
+/* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 }
+};
+static const ImpTab impTabR_INVERSE_NUMBERS_AS_L =
+/* This table is identical to the Default RTL table except that EN and AN are
+ handled like L.
+*/
+{
+/* L , R , EN , AN , ON , S , B , Res */
+/* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
+/* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 },
+/* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
+/* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 },
+/* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 },
+/* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 }
+};
+static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = {
+ {&impTabL_INVERSE_NUMBERS_AS_L,
+ &impTabR_INVERSE_NUMBERS_AS_L},
+ {&impAct0, &impAct0}};
+
+static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */
+/* In this table, conditional sequences receive the lower possible level
+ until proven otherwise.
+*/
+{
+/* L , R , EN , AN , ON , S , B , Res */
+/* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
+/* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 },
+/* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
+/* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 },
+/* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 },
+/* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 },
+/* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 }
+};
+static const ImpAct impAct1 = {0,1,13,14};
+/* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
+ */
+static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
+ {&impTabL_DEFAULT,
+ &impTabR_INVERSE_LIKE_DIRECT},
+ {&impAct0, &impAct1}};
+
+static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS =
+/* The case handled in this table is (visually): R EN L
+*/
+{
+/* L , R , EN , AN , ON , S , B , Res */
+/* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 },
+/* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 },
+/* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 },
+/* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 },
+/* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 },
+/* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 },
+/* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 }
+};
+static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
+/* The cases handled in this table are (visually): R EN L
+ R L AN L
+*/
+{
+/* L , R , EN , AN , ON , S , B , Res */
+/* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 },
+/* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 },
+/* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 },
+/* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 },
+/* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 },
+/* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 },
+/* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 }
+};
+static const ImpAct impAct2 = {0,1,2,5,6,7,8};
+static const ImpAct impAct3 = {0,1,9,10,11,12};
+static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
+ {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
+ &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
+ {&impAct2, &impAct3}};
+
+static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
+ {&impTabL_NUMBERS_SPECIAL,
+ &impTabR_INVERSE_LIKE_DIRECT},
+ {&impAct0, &impAct1}};
+
+static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
+/* The case handled in this table is (visually): R EN L
+*/
+{
+/* L , R , EN , AN , ON , S , B , Res */
+/* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 },
+/* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 },
+/* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 },
+/* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 },
+/* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 }
+};
+static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
+ {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
+ &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
+ {&impAct2, &impAct3}};
+
+#undef s
+
+typedef struct {
+ const ImpTab * pImpTab; /* level table pointer */
+ const ImpAct * pImpAct; /* action map array */
+ int32_t startON; /* start of ON sequence */
+ int32_t startL2EN; /* start of level 2 sequence */
+ int32_t lastStrongRTL; /* index of last found R or AL */
+ int32_t state; /* current state */
+ int32_t runStart; /* start position of the run */
+ UBiDiLevel runLevel; /* run level before implicit solving */
+} LevState;
+
+/*------------------------------------------------------------------------*/
+
+static void
+addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
+ /* param pos: position where to insert
+ param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
+ */
+{
+#define FIRSTALLOC 10
+ Point point;
+ InsertPoints * pInsertPoints=&(pBiDi->insertPoints);
+
+ if (pInsertPoints->capacity == 0)
+ {
+ pInsertPoints->points=static_cast<Point *>(uprv_malloc(sizeof(Point)*FIRSTALLOC));
+ if (pInsertPoints->points == NULL)
+ {
+ pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ pInsertPoints->capacity=FIRSTALLOC;
+ }
+ if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */
+ {
+ Point * savePoints=pInsertPoints->points;
+ pInsertPoints->points=static_cast<Point *>(uprv_realloc(pInsertPoints->points,
+ pInsertPoints->capacity*2*sizeof(Point)));
+ if (pInsertPoints->points == NULL)
+ {
+ pInsertPoints->points=savePoints;
+ pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ else pInsertPoints->capacity*=2;
+ }
+ point.pos=pos;
+ point.flag=flag;
+ pInsertPoints->points[pInsertPoints->size]=point;
+ pInsertPoints->size++;
+#undef FIRSTALLOC
+}
+
+static void
+setLevelsOutsideIsolates(UBiDi *pBiDi, int32_t start, int32_t limit, UBiDiLevel level)
+{
+ DirProp *dirProps=pBiDi->dirProps, dirProp;
+ UBiDiLevel *levels=pBiDi->levels;
+ int32_t isolateCount=0, k;
+ for(k=start; k<limit; k++) {
+ dirProp=dirProps[k];
+ if(dirProp==PDI)
+ isolateCount--;
+ if(isolateCount==0)
+ levels[k]=level;
+ if(dirProp==LRI || dirProp==RLI)
+ isolateCount++;
+ }
+}
+
+/* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
+
+/*
+ * This implementation of the (Wn) rules applies all rules in one pass.
+ * In order to do so, it needs a look-ahead of typically 1 character
+ * (except for W5: sequences of ET) and keeps track of changes
+ * in a rule Wp that affect a later Wq (p<q).
+ *
+ * The (Nn) and (In) rules are also performed in that same single loop,
+ * but effectively one iteration behind for white space.
+ *
+ * Since all implicit rules are performed in one step, it is not necessary
+ * to actually store the intermediate directional properties in dirProps[].
+ */
+
+static void
+processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
+ int32_t start, int32_t limit) {
+ uint8_t cell, oldStateSeq, actionSeq;
+ const ImpTab * pImpTab=pLevState->pImpTab;
+ const ImpAct * pImpAct=pLevState->pImpAct;
+ UBiDiLevel * levels=pBiDi->levels;
+ UBiDiLevel level, addLevel;
+ InsertPoints * pInsertPoints;
+ int32_t start0, k;
+
+ start0=start; /* save original start position */
+ oldStateSeq=(uint8_t)pLevState->state;
+ cell=(*pImpTab)[oldStateSeq][_prop];
+ pLevState->state=GET_STATE(cell); /* isolate the new state */
+ actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */
+ addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES];
+
+ if(actionSeq) {
+ switch(actionSeq) {
+ case 1: /* init ON seq */
+ pLevState->startON=start0;
+ break;
+
+ case 2: /* prepend ON seq to current seq */
+ start=pLevState->startON;
+ break;
+
+ case 3: /* EN/AN after R+ON */
+ level=pLevState->runLevel+1;
+ setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
+ break;
+
+ case 4: /* EN/AN before R for NUMBERS_SPECIAL */
+ level=pLevState->runLevel+2;
+ setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
+ break;
+
+ case 5: /* L or S after possible relevant EN/AN */
+ /* check if we had EN after R/AL */
+ if (pLevState->startL2EN >= 0) {
+ addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
+ }
+ pLevState->startL2EN=-1; /* not within previous if since could also be -2 */
+ /* check if we had any relevant EN/AN after R/AL */
+ pInsertPoints=&(pBiDi->insertPoints);
+ if ((pInsertPoints->capacity == 0) ||
+ (pInsertPoints->size <= pInsertPoints->confirmed))
+ {
+ /* nothing, just clean up */
+ pLevState->lastStrongRTL=-1;
+ /* check if we have a pending conditional segment */
+ level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES];
+ if ((level & 1) && (pLevState->startON > 0)) { /* after ON */
+ start=pLevState->startON; /* reset to basic run level */
+ }
+ if (_prop == DirProp_S) /* add LRM before S */
+ {
+ addPoint(pBiDi, start0, LRM_BEFORE);
+ pInsertPoints->confirmed=pInsertPoints->size;
+ }
+ break;
+ }
+ /* reset previous RTL cont to level for LTR text */
+ for (k=pLevState->lastStrongRTL+1; k<start0; k++)
+ {
+ /* reset odd level, leave runLevel+2 as is */
+ levels[k]=(levels[k] - 2) & ~1;
+ }
+ /* mark insert points as confirmed */
+ pInsertPoints->confirmed=pInsertPoints->size;
+ pLevState->lastStrongRTL=-1;
+ if (_prop == DirProp_S) /* add LRM before S */
+ {
+ addPoint(pBiDi, start0, LRM_BEFORE);
+ pInsertPoints->confirmed=pInsertPoints->size;
+ }
+ break;
+
+ case 6: /* R/AL after possible relevant EN/AN */
+ /* just clean up */
+ pInsertPoints=&(pBiDi->insertPoints);
+ if (pInsertPoints->capacity > 0)
+ /* remove all non confirmed insert points */
+ pInsertPoints->size=pInsertPoints->confirmed;
+ pLevState->startON=-1;
+ pLevState->startL2EN=-1;
+ pLevState->lastStrongRTL=limit - 1;
+ break;
+
+ case 7: /* EN/AN after R/AL + possible cont */
+ /* check for real AN */
+ if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) &&
+ (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
+ {
+ /* real AN */
+ if (pLevState->startL2EN == -1) /* if no relevant EN already found */
+ {
+ /* just note the righmost digit as a strong RTL */
+ pLevState->lastStrongRTL=limit - 1;
+ break;
+ }
+ if (pLevState->startL2EN >= 0) /* after EN, no AN */
+ {
+ addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
+ pLevState->startL2EN=-2;
+ }
+ /* note AN */
+ addPoint(pBiDi, start0, LRM_BEFORE);
+ break;
+ }
+ /* if first EN/AN after R/AL */
+ if (pLevState->startL2EN == -1) {
+ pLevState->startL2EN=start0;
+ }
+ break;
+
+ case 8: /* note location of latest R/AL */
+ pLevState->lastStrongRTL=limit - 1;
+ pLevState->startON=-1;
+ break;
+
+ case 9: /* L after R+ON/EN/AN */
+ /* include possible adjacent number on the left */
+ for (k=start0-1; k>=0 && !(levels[k]&1); k--);
+ if(k>=0) {
+ addPoint(pBiDi, k, RLM_BEFORE); /* add RLM before */
+ pInsertPoints=&(pBiDi->insertPoints);
+ pInsertPoints->confirmed=pInsertPoints->size; /* confirm it */
+ }
+ pLevState->startON=start0;
+ break;
+
+ case 10: /* AN after L */
+ /* AN numbers between L text on both sides may be trouble. */
+ /* tentatively bracket with LRMs; will be confirmed if followed by L */
+ addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */
+ addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */
+ break;
+
+ case 11: /* R after L+ON/EN/AN */
+ /* false alert, infirm LRMs around previous AN */
+ pInsertPoints=&(pBiDi->insertPoints);
+ pInsertPoints->size=pInsertPoints->confirmed;
+ if (_prop == DirProp_S) /* add RLM before S */
+ {
+ addPoint(pBiDi, start0, RLM_BEFORE);
+ pInsertPoints->confirmed=pInsertPoints->size;
+ }
+ break;
+
+ case 12: /* L after L+ON/AN */
+ level=pLevState->runLevel + addLevel;
+ for(k=pLevState->startON; k<start0; k++) {
+ if (levels[k]<level)
+ levels[k]=level;
+ }
+ pInsertPoints=&(pBiDi->insertPoints);
+ pInsertPoints->confirmed=pInsertPoints->size; /* confirm inserts */
+ pLevState->startON=start0;
+ break;
+
+ case 13: /* L after L+ON+EN/AN/ON */
+ level=pLevState->runLevel;
+ for(k=start0-1; k>=pLevState->startON; k--) {
+ if(levels[k]==level+3) {
+ while(levels[k]==level+3) {
+ levels[k--]-=2;
+ }
+ while(levels[k]==level) {
+ k--;
+ }
+ }
+ if(levels[k]==level+2) {
+ levels[k]=level;
+ continue;
+ }
+ levels[k]=level+1;
+ }
+ break;
+
+ case 14: /* R after L+ON+EN/AN/ON */
+ level=pLevState->runLevel+1;
+ for(k=start0-1; k>=pLevState->startON; k--) {
+ if(levels[k]>level) {
+ levels[k]-=2;
+ }
+ }
+ break;
+
+ default: /* we should never get here */
+ UPRV_UNREACHABLE;
+ }
+ }
+ if((addLevel) || (start < start0)) {
+ level=pLevState->runLevel + addLevel;
+ if(start>=pLevState->runStart) {
+ for(k=start; k<limit; k++) {
+ levels[k]=level;
+ }
+ } else {
+ setLevelsOutsideIsolates(pBiDi, start, limit, level);
+ }
+ }
+}
+
+/**
+ * Returns the directionality of the last strong character at the end of the prologue, if any.
+ * Requires prologue!=null.
+ */
+static DirProp
+lastL_R_AL(UBiDi *pBiDi) {
+ const UChar *text=pBiDi->prologue;
+ int32_t length=pBiDi->proLength;
+ int32_t i;
+ UChar32 uchar;
+ DirProp dirProp;
+ for(i=length; i>0; ) {
+ /* i is decremented by U16_PREV */
+ U16_PREV(text, 0, i, uchar);
+ dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
+ if(dirProp==L) {
+ return DirProp_L;
+ }
+ if(dirProp==R || dirProp==AL) {
+ return DirProp_R;
+ }
+ if(dirProp==B) {
+ return DirProp_ON;
+ }
+ }
+ return DirProp_ON;
+}
+
+/**
+ * Returns the directionality of the first strong character, or digit, in the epilogue, if any.
+ * Requires epilogue!=null.
+ */
+static DirProp
+firstL_R_AL_EN_AN(UBiDi *pBiDi) {
+ const UChar *text=pBiDi->epilogue;
+ int32_t length=pBiDi->epiLength;
+ int32_t i;
+ UChar32 uchar;
+ DirProp dirProp;
+ for(i=0; i<length; ) {
+ /* i is incremented by U16_NEXT */
+ U16_NEXT(text, i, length, uchar);
+ dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
+ if(dirProp==L) {
+ return DirProp_L;
+ }
+ if(dirProp==R || dirProp==AL) {
+ return DirProp_R;
+ }
+ if(dirProp==EN) {
+ return DirProp_EN;
+ }
+ if(dirProp==AN) {
+ return DirProp_AN;
+ }
+ }
+ return DirProp_ON;
+}
+
+static void
+resolveImplicitLevels(UBiDi *pBiDi,
+ int32_t start, int32_t limit,
+ DirProp sor, DirProp eor) {
+ const DirProp *dirProps=pBiDi->dirProps;
+ DirProp dirProp;
+ LevState levState;
+ int32_t i, start1, start2;
+ uint16_t oldStateImp, stateImp, actionImp;
+ uint8_t gprop, resProp, cell;
+ UBool inverseRTL;
+ DirProp nextStrongProp=R;
+ int32_t nextStrongPos=-1;
+
+ /* check for RTL inverse BiDi mode */
+ /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
+ * loop on the text characters from end to start.
+ * This would need a different properties state table (at least different
+ * actions) and different levels state tables (maybe very similar to the
+ * LTR corresponding ones.
+ */
+ inverseRTL=(UBool)
+ ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) &&
+ (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
+ pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
+
+ /* initialize for property and levels state tables */
+ levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
+ levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
+ levState.runStart=start;
+ levState.runLevel=pBiDi->levels[start];
+ levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1];
+ levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1];
+ if(start==0 && pBiDi->proLength>0) {
+ DirProp lastStrong=lastL_R_AL(pBiDi);
+ if(lastStrong!=DirProp_ON) {
+ sor=lastStrong;
+ }
+ }
+ /* The isolates[] entries contain enough information to
+ resume the bidi algorithm in the same state as it was
+ when it was interrupted by an isolate sequence. */
+ if(dirProps[start]==PDI && pBiDi->isolateCount >= 0) {
+ levState.startON=pBiDi->isolates[pBiDi->isolateCount].startON;
+ start1=pBiDi->isolates[pBiDi->isolateCount].start1;
+ stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp;
+ levState.state=pBiDi->isolates[pBiDi->isolateCount].state;
+ pBiDi->isolateCount--;
+ } else {
+ levState.startON=-1;
+ start1=start;
+ if(dirProps[start]==NSM)
+ stateImp = 1 + sor;
+ else
+ stateImp=0;
+ levState.state=0;
+ processPropertySeq(pBiDi, &levState, sor, start, start);
+ }
+ start2=start; /* to make Java compiler happy */
+
+ for(i=start; i<=limit; i++) {
+ if(i>=limit) {
+ int32_t k;
+ for(k=limit-1; k>start&&(DIRPROP_FLAG(dirProps[k])&MASK_BN_EXPLICIT); k--);
+ dirProp=dirProps[k];
+ if(dirProp==LRI || dirProp==RLI)
+ break; /* no forced closing for sequence ending with LRI/RLI */
+ gprop=eor;
+ } else {
+ DirProp prop, prop1;
+ prop=dirProps[i];
+ if(prop==B) {
+ pBiDi->isolateCount=-1; /* current isolates stack entry == none */
+ }
+ if(inverseRTL) {
+ if(prop==AL) {
+ /* AL before EN does not make it AN */
+ prop=R;
+ } else if(prop==EN) {
+ if(nextStrongPos<=i) {
+ /* look for next strong char (L/R/AL) */
+ int32_t j;
+ nextStrongProp=R; /* set default */
+ nextStrongPos=limit;
+ for(j=i+1; j<limit; j++) {
+ prop1=dirProps[j];
+ if(prop1==L || prop1==R || prop1==AL) {
+ nextStrongProp=prop1;
+ nextStrongPos=j;
+ break;
+ }
+ }
+ }
+ if(nextStrongProp==AL) {
+ prop=AN;
+ }
+ }
+ }
+ gprop=groupProp[prop];
+ }
+ oldStateImp=stateImp;
+ cell=impTabProps[oldStateImp][gprop];
+ stateImp=GET_STATEPROPS(cell); /* isolate the new state */
+ actionImp=GET_ACTIONPROPS(cell); /* isolate the action */
+ if((i==limit) && (actionImp==0)) {
+ /* there is an unprocessed sequence if its property == eor */
+ actionImp=1; /* process the last sequence */
+ }
+ if(actionImp) {
+ resProp=impTabProps[oldStateImp][IMPTABPROPS_RES];
+ switch(actionImp) {
+ case 1: /* process current seq1, init new seq1 */
+ processPropertySeq(pBiDi, &levState, resProp, start1, i);
+ start1=i;
+ break;
+ case 2: /* init new seq2 */
+ start2=i;
+ break;
+ case 3: /* process seq1, process seq2, init new seq1 */
+ processPropertySeq(pBiDi, &levState, resProp, start1, start2);
+ processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i);
+ start1=i;
+ break;
+ case 4: /* process seq1, set seq1=seq2, init new seq2 */
+ processPropertySeq(pBiDi, &levState, resProp, start1, start2);
+ start1=start2;
+ start2=i;
+ break;
+ default: /* we should never get here */
+ UPRV_UNREACHABLE;
+ }
+ }
+ }
+
+ /* flush possible pending sequence, e.g. ON */
+ if(limit==pBiDi->length && pBiDi->epiLength>0) {
+ DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi);
+ if(firstStrong!=DirProp_ON) {
+ eor=firstStrong;
+ }
+ }
+
+ /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */
+ for(i=limit-1; i>start&&(DIRPROP_FLAG(dirProps[i])&MASK_BN_EXPLICIT); i--);
+ dirProp=dirProps[i];
+ if((dirProp==LRI || dirProp==RLI) && limit<pBiDi->length) {
+ pBiDi->isolateCount++;
+ pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp;
+ pBiDi->isolates[pBiDi->isolateCount].state=levState.state;
+ pBiDi->isolates[pBiDi->isolateCount].start1=start1;
+ pBiDi->isolates[pBiDi->isolateCount].startON=levState.startON;
+ }
+ else
+ processPropertySeq(pBiDi, &levState, eor, limit, limit);
+}
+
+/* perform (L1) and (X9) ---------------------------------------------------- */
+
+/*
+ * Reset the embedding levels for some non-graphic characters (L1).
+ * This function also sets appropriate levels for BN, and
+ * explicit embedding types that are supposed to have been removed
+ * from the paragraph in (X9).
+ */
+static void
+adjustWSLevels(UBiDi *pBiDi) {
+ const DirProp *dirProps=pBiDi->dirProps;
+ UBiDiLevel *levels=pBiDi->levels;
+ int32_t i;
+
+ if(pBiDi->flags&MASK_WS) {
+ UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
+ Flags flag;
+
+ i=pBiDi->trailingWSStart;
+ while(i>0) {
+ /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
+ while(i>0 && (flag=DIRPROP_FLAG(dirProps[--i]))&MASK_WS) {
+ if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
+ levels[i]=0;
+ } else {
+ levels[i]=GET_PARALEVEL(pBiDi, i);
+ }
+ }
+
+ /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
+ /* here, i+1 is guaranteed to be <length */
+ while(i>0) {
+ flag=DIRPROP_FLAG(dirProps[--i]);
+ if(flag&MASK_BN_EXPLICIT) {
+ levels[i]=levels[i+1];
+ } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
+ levels[i]=0;
+ break;
+ } else if(flag&MASK_B_S) {
+ levels[i]=GET_PARALEVEL(pBiDi, i);
+ break;
+ }
+ }
+ }
+ }
+}
+
+U_CAPI void U_EXPORT2
+ubidi_setContext(UBiDi *pBiDi,
+ const UChar *prologue, int32_t proLength,
+ const UChar *epilogue, int32_t epiLength,
+ UErrorCode *pErrorCode) {
+ /* check the argument values */
+ RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
+ if(pBiDi==NULL || proLength<-1 || epiLength<-1 ||
+ (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ if(proLength==-1) {
+ pBiDi->proLength=u_strlen(prologue);
+ } else {
+ pBiDi->proLength=proLength;
+ }
+ if(epiLength==-1) {
+ pBiDi->epiLength=u_strlen(epilogue);
+ } else {
+ pBiDi->epiLength=epiLength;
+ }
+ pBiDi->prologue=prologue;
+ pBiDi->epilogue=epilogue;
+}
+
+static void
+setParaSuccess(UBiDi *pBiDi) {
+ pBiDi->proLength=0; /* forget the last context */
+ pBiDi->epiLength=0;
+ pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */
+}
+
+#define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y))
+#define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x)))
+
+static void
+setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length,
+ UBiDiLevel paraLevel, UErrorCode *pErrorCode) {
+ int32_t *runsOnlyMemory = NULL;
+ int32_t *visualMap;
+ UChar *visualText;
+ int32_t saveLength, saveTrailingWSStart;
+ const UBiDiLevel *levels;
+ UBiDiLevel *saveLevels;
+ UBiDiDirection saveDirection;
+ UBool saveMayAllocateText;
+ Run *runs;
+ int32_t visualLength, i, j, visualStart, logicalStart,
+ runCount, runLength, addedRuns, insertRemove,
+ start, limit, step, indexOddBit, logicalPos,
+ index0, index1;
+ uint32_t saveOptions;
+
+ pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT;
+ if(length==0) {
+ ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
+ goto cleanup3;
+ }
+ /* obtain memory for mapping table and visual text */
+ runsOnlyMemory=static_cast<int32_t *>(uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel))));
+ if(runsOnlyMemory==NULL) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ goto cleanup3;
+ }
+ visualMap=runsOnlyMemory;
+ visualText=(UChar *)&visualMap[length];
+ saveLevels=(UBiDiLevel *)&visualText[length];
+ saveOptions=pBiDi->reorderingOptions;
+ if(saveOptions & UBIDI_OPTION_INSERT_MARKS) {
+ pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
+ pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS;
+ }
+ paraLevel&=1; /* accept only 0 or 1 */
+ ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ goto cleanup3;
+ }
+ /* we cannot access directly pBiDi->levels since it is not yet set if
+ * direction is not MIXED
+ */
+ levels=ubidi_getLevels(pBiDi, pErrorCode);
+ uprv_memcpy(saveLevels, levels, (size_t)pBiDi->length*sizeof(UBiDiLevel));
+ saveTrailingWSStart=pBiDi->trailingWSStart;
+ saveLength=pBiDi->length;
+ saveDirection=pBiDi->direction;
+
+ /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
+ * the visual map and the dirProps array to drive the second call
+ * to ubidi_setPara (but must make provision for possible removal of
+ * BiDi controls. Alternatively, only use the dirProps array via
+ * customized classifier callback.
+ */
+ visualLength=ubidi_writeReordered(pBiDi, visualText, length,
+ UBIDI_DO_MIRRORING, pErrorCode);
+ ubidi_getVisualMap(pBiDi, visualMap, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ goto cleanup2;
+ }
+ pBiDi->reorderingOptions=saveOptions;
+
+ pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT;
+ paraLevel^=1;
+ /* Because what we did with reorderingOptions, visualText may be shorter
+ * than the original text. But we don't want the levels memory to be
+ * reallocated shorter than the original length, since we need to restore
+ * the levels as after the first call to ubidi_setpara() before returning.
+ * We will force mayAllocateText to FALSE before the second call to
+ * ubidi_setpara(), and will restore it afterwards.
+ */
+ saveMayAllocateText=pBiDi->mayAllocateText;
+ pBiDi->mayAllocateText=FALSE;
+ ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode);
+ pBiDi->mayAllocateText=saveMayAllocateText;
+ ubidi_getRuns(pBiDi, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ goto cleanup1;
+ }
+ /* check if some runs must be split, count how many splits */
+ addedRuns=0;
+ runCount=pBiDi->runCount;
+ runs=pBiDi->runs;
+ visualStart=0;
+ for(i=0; i<runCount; i++, visualStart+=runLength) {
+ runLength=runs[i].visualLimit-visualStart;
+ if(runLength<2) {
+ continue;
+ }
+ logicalStart=GET_INDEX(runs[i].logicalStart);
+ for(j=logicalStart+1; j<logicalStart+runLength; j++) {
+ index0=visualMap[j];
+ index1=visualMap[j-1];
+ if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
+ addedRuns++;
+ }
+ }
+ }
+ if(addedRuns) {
+ if(getRunsMemory(pBiDi, runCount+addedRuns)) {
+ if(runCount==1) {
+ /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
+ pBiDi->runsMemory[0]=runs[0];
+ }
+ runs=pBiDi->runs=pBiDi->runsMemory;
+ pBiDi->runCount+=addedRuns;
+ } else {
+ goto cleanup1;
+ }
+ }
+ /* split runs which are not consecutive in source text */
+ for(i=runCount-1; i>=0; i--) {
+ runLength= i==0 ? runs[0].visualLimit :
+ runs[i].visualLimit-runs[i-1].visualLimit;
+ logicalStart=runs[i].logicalStart;
+ indexOddBit=GET_ODD_BIT(logicalStart);
+ logicalStart=GET_INDEX(logicalStart);
+ if(runLength<2) {
+ if(addedRuns) {
+ runs[i+addedRuns]=runs[i];
+ }
+ logicalPos=visualMap[logicalStart];
+ runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
+ saveLevels[logicalPos]^indexOddBit);
+ continue;
+ }
+ if(indexOddBit) {
+ start=logicalStart;
+ limit=logicalStart+runLength-1;
+ step=1;
+ } else {
+ start=logicalStart+runLength-1;
+ limit=logicalStart;
+ step=-1;
+ }
+ for(j=start; j!=limit; j+=step) {
+ index0=visualMap[j];
+ index1=visualMap[j+step];
+ if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
+ logicalPos=BIDI_MIN(visualMap[start], index0);
+ runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
+ saveLevels[logicalPos]^indexOddBit);
+ runs[i+addedRuns].visualLimit=runs[i].visualLimit;
+ runs[i].visualLimit-=BIDI_ABS(j-start)+1;
+ insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER);
+ runs[i+addedRuns].insertRemove=insertRemove;
+ runs[i].insertRemove&=~insertRemove;
+ start=j+step;
+ addedRuns--;
+ }
+ }
+ if(addedRuns) {
+ runs[i+addedRuns]=runs[i];
+ }
+ logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]);
+ runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
+ saveLevels[logicalPos]^indexOddBit);
+ }
+
+ cleanup1:
+ /* restore initial paraLevel */
+ pBiDi->paraLevel^=1;
+ cleanup2:
+ /* restore real text */
+ pBiDi->text=text;
+ pBiDi->length=saveLength;
+ pBiDi->originalLength=length;
+ pBiDi->direction=saveDirection;
+ /* the saved levels should never excess levelsSize, but we check anyway */
+ if(saveLength>pBiDi->levelsSize) {
+ saveLength=pBiDi->levelsSize;
+ }
+ uprv_memcpy(pBiDi->levels, saveLevels, (size_t)saveLength*sizeof(UBiDiLevel));
+ pBiDi->trailingWSStart=saveTrailingWSStart;
+ if(pBiDi->runCount>1) {
+ pBiDi->direction=UBIDI_MIXED;
+ }
+ cleanup3:
+ /* free memory for mapping table and visual text */
+ uprv_free(runsOnlyMemory);
+
+ pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY;
+}
+
+/* ubidi_setPara ------------------------------------------------------------ */
+
+U_CAPI void U_EXPORT2
+ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
+ UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
+ UErrorCode *pErrorCode) {
+ UBiDiDirection direction;
+ DirProp *dirProps;
+
+ /* check the argument values */
+ RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
+ if(pBiDi==NULL || text==NULL || length<-1 ||
+ (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ if(length==-1) {
+ length=u_strlen(text);
+ }
+
+ /* special treatment for RUNS_ONLY mode */
+ if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
+ setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode);
+ return;
+ }
+
+ /* initialize the UBiDi structure */
+ pBiDi->pParaBiDi=NULL; /* mark unfinished setPara */
+ pBiDi->text=text;
+ pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length;
+ pBiDi->paraLevel=paraLevel;
+ pBiDi->direction=(UBiDiDirection)(paraLevel&1);
+ pBiDi->paraCount=1;
+
+ pBiDi->dirProps=NULL;
+ pBiDi->levels=NULL;
+ pBiDi->runs=NULL;
+ pBiDi->insertPoints.size=0; /* clean up from last call */
+ pBiDi->insertPoints.confirmed=0; /* clean up from last call */
+
+ /*
+ * Save the original paraLevel if contextual; otherwise, set to 0.
+ */
+ pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel);
+
+ if(length==0) {
+ /*
+ * For an empty paragraph, create a UBiDi object with the paraLevel and
+ * the flags and the direction set but without allocating zero-length arrays.
+ * There is nothing more to do.
+ */
+ if(IS_DEFAULT_LEVEL(paraLevel)) {
+ pBiDi->paraLevel&=1;
+ pBiDi->defaultParaLevel=0;
+ }
+ pBiDi->flags=DIRPROP_FLAG_LR(paraLevel);
+ pBiDi->runCount=0;
+ pBiDi->paraCount=0;
+ setParaSuccess(pBiDi); /* mark successful setPara */
+ return;
+ }
+
+ pBiDi->runCount=-1;
+
+ /* allocate paras memory */
+ if(pBiDi->parasMemory)
+ pBiDi->paras=pBiDi->parasMemory;
+ else
+ pBiDi->paras=pBiDi->simpleParas;
+
+ /*
+ * Get the directional properties,
+ * the flags bit-set, and
+ * determine the paragraph level if necessary.
+ */
+ if(getDirPropsMemory(pBiDi, length)) {
+ pBiDi->dirProps=pBiDi->dirPropsMemory;
+ if(!getDirProps(pBiDi)) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ } else {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ dirProps=pBiDi->dirProps;
+ /* the processed length may have changed if UBIDI_OPTION_STREAMING */
+ length= pBiDi->length;
+ pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */
+
+ /* are explicit levels specified? */
+ if(embeddingLevels==NULL) {
+ /* no: determine explicit levels according to the (Xn) rules */\
+ if(getLevelsMemory(pBiDi, length)) {
+ pBiDi->levels=pBiDi->levelsMemory;
+ direction=resolveExplicitLevels(pBiDi, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ return;
+ }
+ } else {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ } else {
+ /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
+ pBiDi->levels=embeddingLevels;
+ direction=checkExplicitLevels(pBiDi, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ return;
+ }
+ }
+
+ /* allocate isolate memory */
+ if(pBiDi->isolateCount<=SIMPLE_ISOLATES_COUNT)
+ pBiDi->isolates=pBiDi->simpleIsolates;
+ else
+ if((int32_t)(pBiDi->isolateCount*sizeof(Isolate))<=pBiDi->isolatesSize)
+ pBiDi->isolates=pBiDi->isolatesMemory;
+ else {
+ if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) {
+ pBiDi->isolates=pBiDi->isolatesMemory;
+ } else {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ }
+ pBiDi->isolateCount=-1; /* current isolates stack entry == none */
+
+ /*
+ * The steps after (X9) in the UBiDi algorithm are performed only if
+ * the paragraph text has mixed directionality!
+ */
+ pBiDi->direction=direction;
+ switch(direction) {
+ case UBIDI_LTR:
+ /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
+ pBiDi->trailingWSStart=0;
+ break;
+ case UBIDI_RTL:
+ /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
+ pBiDi->trailingWSStart=0;
+ break;
+ default:
+ /*
+ * Choose the right implicit state table
+ */
+ switch(pBiDi->reorderingMode) {
+ case UBIDI_REORDER_DEFAULT:
+ pBiDi->pImpTabPair=&impTab_DEFAULT;
+ break;
+ case UBIDI_REORDER_NUMBERS_SPECIAL:
+ pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL;
+ break;
+ case UBIDI_REORDER_GROUP_NUMBERS_WITH_R:
+ pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R;
+ break;
+ case UBIDI_REORDER_INVERSE_NUMBERS_AS_L:
+ pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L;
+ break;
+ case UBIDI_REORDER_INVERSE_LIKE_DIRECT:
+ if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
+ pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
+ } else {
+ pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT;
+ }
+ break;
+ case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
+ if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
+ pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
+ } else {
+ pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL;
+ }
+ break;
+ default:
+ /* we should never get here */
+ UPRV_UNREACHABLE;
+ }
+ /*
+ * If there are no external levels specified and there
+ * are no significant explicit level codes in the text,
+ * then we can treat the entire paragraph as one run.
+ * Otherwise, we need to perform the following rules on runs of
+ * the text with the same embedding levels. (X10)
+ * "Significant" explicit level codes are ones that actually
+ * affect non-BN characters.
+ * Examples for "insignificant" ones are empty embeddings
+ * LRE-PDF, LRE-RLE-PDF-PDF, etc.
+ */
+ if(embeddingLevels==NULL && pBiDi->paraCount<=1 &&
+ !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
+ resolveImplicitLevels(pBiDi, 0, length,
+ GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)),
+ GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1)));
+ } else {
+ /* sor, eor: start and end types of same-level-run */
+ UBiDiLevel *levels=pBiDi->levels;
+ int32_t start, limit=0;
+ UBiDiLevel level, nextLevel;
+ DirProp sor, eor;
+
+ /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
+ level=GET_PARALEVEL(pBiDi, 0);
+ nextLevel=levels[0];
+ if(level<nextLevel) {
+ eor=GET_LR_FROM_LEVEL(nextLevel);
+ } else {
+ eor=GET_LR_FROM_LEVEL(level);
+ }
+
+ do {
+ /* determine start and limit of the run (end points just behind the run) */
+
+ /* the values for this run's start are the same as for the previous run's end */
+ start=limit;
+ level=nextLevel;
+ if((start>0) && (dirProps[start-1]==B)) {
+ /* except if this is a new paragraph, then set sor = para level */
+ sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
+ } else {
+ sor=eor;
+ }
+
+ /* search for the limit of this run */
+ while((++limit<length) &&
+ ((levels[limit]==level) ||
+ (DIRPROP_FLAG(dirProps[limit])&MASK_BN_EXPLICIT))) {}
+
+ /* get the correct level of the next run */
+ if(limit<length) {
+ nextLevel=levels[limit];
+ } else {
+ nextLevel=GET_PARALEVEL(pBiDi, length-1);
+ }
+
+ /* determine eor from max(level, nextLevel); sor is last run's eor */
+ if(NO_OVERRIDE(level)<NO_OVERRIDE(nextLevel)) {
+ eor=GET_LR_FROM_LEVEL(nextLevel);
+ } else {
+ eor=GET_LR_FROM_LEVEL(level);
+ }
+
+ /* if the run consists of overridden directional types, then there
+ are no implicit types to be resolved */
+ if(!(level&UBIDI_LEVEL_OVERRIDE)) {
+ resolveImplicitLevels(pBiDi, start, limit, sor, eor);
+ } else {
+ /* remove the UBIDI_LEVEL_OVERRIDE flags */
+ do {
+ levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
+ } while(start<limit);
+ }
+ } while(limit<length);
+ }
+ /* check if we got any memory shortage while adding insert points */
+ if (U_FAILURE(pBiDi->insertPoints.errorCode))
+ {
+ *pErrorCode=pBiDi->insertPoints.errorCode;
+ return;
+ }
+ /* reset the embedding levels for some non-graphic characters (L1), (X9) */
+ adjustWSLevels(pBiDi);
+ break;
+ }
+ /* add RLM for inverse Bidi with contextual orientation resolving
+ * to RTL which would not round-trip otherwise
+ */
+ if((pBiDi->defaultParaLevel>0) &&
+ (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) &&
+ ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) ||
+ (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
+ int32_t i, j, start, last;
+ UBiDiLevel level;
+ DirProp dirProp;
+ for(i=0; i<pBiDi->paraCount; i++) {
+ last=(pBiDi->paras[i].limit)-1;
+ level= static_cast<UBiDiLevel>(pBiDi->paras[i].level);
+ if(level==0)
+ continue; /* LTR paragraph */
+ start= i==0 ? 0 : pBiDi->paras[i-1].limit;
+ for(j=last; j>=start; j--) {
+ dirProp=dirProps[j];
+ if(dirProp==L) {
+ if(j<last) {
+ while(dirProps[last]==B) {
+ last--;
+ }
+ }
+ addPoint(pBiDi, last, RLM_BEFORE);
+ break;
+ }
+ if(DIRPROP_FLAG(dirProp) & MASK_R_AL) {
+ break;
+ }
+ }
+ }
+ }
+
+ if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
+ pBiDi->resultLength -= pBiDi->controlCount;
+ } else {
+ pBiDi->resultLength += pBiDi->insertPoints.size;
+ }
+ setParaSuccess(pBiDi); /* mark successful setPara */
+}
+
+U_CAPI void U_EXPORT2
+ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
+ if(pBiDi!=NULL) {
+ pBiDi->orderParagraphsLTR=orderParagraphsLTR;
+ }
+}
+
+U_CAPI UBool U_EXPORT2
+ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
+ if(pBiDi!=NULL) {
+ return pBiDi->orderParagraphsLTR;
+ } else {
+ return FALSE;
+ }
+}
+
+U_CAPI UBiDiDirection U_EXPORT2
+ubidi_getDirection(const UBiDi *pBiDi) {
+ if(IS_VALID_PARA_OR_LINE(pBiDi)) {
+ return pBiDi->direction;
+ } else {
+ return UBIDI_LTR;
+ }
+}
+
+U_CAPI const UChar * U_EXPORT2
+ubidi_getText(const UBiDi *pBiDi) {
+ if(IS_VALID_PARA_OR_LINE(pBiDi)) {
+ return pBiDi->text;
+ } else {
+ return NULL;
+ }
+}
+
+U_CAPI int32_t U_EXPORT2
+ubidi_getLength(const UBiDi *pBiDi) {
+ if(IS_VALID_PARA_OR_LINE(pBiDi)) {
+ return pBiDi->originalLength;
+ } else {
+ return 0;
+ }
+}
+
+U_CAPI int32_t U_EXPORT2
+ubidi_getProcessedLength(const UBiDi *pBiDi) {
+ if(IS_VALID_PARA_OR_LINE(pBiDi)) {
+ return pBiDi->length;
+ } else {
+ return 0;
+ }
+}
+
+U_CAPI int32_t U_EXPORT2
+ubidi_getResultLength(const UBiDi *pBiDi) {
+ if(IS_VALID_PARA_OR_LINE(pBiDi)) {
+ return pBiDi->resultLength;
+ } else {
+ return 0;
+ }
+}
+
+/* paragraphs API functions ------------------------------------------------- */
+
+U_CAPI UBiDiLevel U_EXPORT2
+ubidi_getParaLevel(const UBiDi *pBiDi) {
+ if(IS_VALID_PARA_OR_LINE(pBiDi)) {
+ return pBiDi->paraLevel;
+ } else {
+ return 0;
+ }
+}
+
+U_CAPI int32_t U_EXPORT2
+ubidi_countParagraphs(UBiDi *pBiDi) {
+ if(!IS_VALID_PARA_OR_LINE(pBiDi)) {
+ return 0;
+ } else {
+ return pBiDi->paraCount;
+ }
+}
+
+U_CAPI void U_EXPORT2
+ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
+ int32_t *pParaStart, int32_t *pParaLimit,
+ UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
+ int32_t paraStart;
+
+ /* check the argument values */
+ RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
+ RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode);
+ RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode);
+
+ pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */
+ if(paraIndex) {
+ paraStart=pBiDi->paras[paraIndex-1].limit;
+ } else {
+ paraStart=0;
+ }
+ if(pParaStart!=NULL) {
+ *pParaStart=paraStart;
+ }
+ if(pParaLimit!=NULL) {
+ *pParaLimit=pBiDi->paras[paraIndex].limit;
+ }
+ if(pParaLevel!=NULL) {
+ *pParaLevel=GET_PARALEVEL(pBiDi, paraStart);
+ }
+}
+
+U_CAPI int32_t U_EXPORT2
+ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex,
+ int32_t *pParaStart, int32_t *pParaLimit,
+ UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
+ int32_t paraIndex;
+
+ /* check the argument values */
+ /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
+ RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
+ RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
+ pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */
+ RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1);
+
+ for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++);
+ ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode);
+ return paraIndex;
+}
+
+U_CAPI void U_EXPORT2
+ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
+ const void *newContext, UBiDiClassCallback **oldFn,
+ const void **oldContext, UErrorCode *pErrorCode)
+{
+ RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
+ if(pBiDi==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ if( oldFn )
+ {
+ *oldFn = pBiDi->fnClassCallback;
+ }
+ if( oldContext )
+ {
+ *oldContext = pBiDi->coClassCallback;
+ }
+ pBiDi->fnClassCallback = newFn;
+ pBiDi->coClassCallback = newContext;
+}
+
+U_CAPI void U_EXPORT2
+ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context)
+{
+ if(pBiDi==NULL) {
+ return;
+ }
+ if( fn )
+ {
+ *fn = pBiDi->fnClassCallback;
+ }
+ if( context )
+ {
+ *context = pBiDi->coClassCallback;
+ }
+}
+
+U_CAPI UCharDirection U_EXPORT2
+ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
+{
+ UCharDirection dir;
+
+ if( pBiDi->fnClassCallback == NULL ||
+ (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
+ {
+ dir = ubidi_getClass(c);
+ }
+ if(dir >= U_CHAR_DIRECTION_COUNT) {
+ dir = (UCharDirection)ON;
+ }
+ return dir;
+}
diff --git a/thirdparty/icu4c/common/ubidi_props.cpp b/thirdparty/icu4c/common/ubidi_props.cpp
new file mode 100644
index 0000000000..afcc4aaf4f
--- /dev/null
+++ b/thirdparty/icu4c/common/ubidi_props.cpp
@@ -0,0 +1,254 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2004-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: ubidi_props.c
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2004dec30
+* created by: Markus W. Scherer
+*
+* Low-level Unicode bidi/shaping properties access.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/uset.h"
+#include "unicode/udata.h" /* UDataInfo */
+#include "ucmndata.h" /* DataHeader */
+#include "udatamem.h"
+#include "uassert.h"
+#include "cmemory.h"
+#include "utrie2.h"
+#include "ubidi_props.h"
+#include "ucln_cmn.h"
+
+struct UBiDiProps {
+ UDataMemory *mem;
+ const int32_t *indexes;
+ const uint32_t *mirrors;
+ const uint8_t *jgArray;
+ const uint8_t *jgArray2;
+
+ UTrie2 trie;
+ uint8_t formatVersion[4];
+};
+
+/* ubidi_props_data.h is machine-generated by genbidi --csource */
+#define INCLUDED_FROM_UBIDI_PROPS_C
+#include "ubidi_props_data.h"
+
+/* set of property starts for UnicodeSet ------------------------------------ */
+
+static UBool U_CALLCONV
+_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
+ (void)end;
+ (void)value;
+ /* add the start code point to the USet */
+ const USetAdder *sa=(const USetAdder *)context;
+ sa->add(sa->set, start);
+ return TRUE;
+}
+
+U_CFUNC void
+ubidi_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
+ int32_t i, length;
+ UChar32 c, start, limit;
+
+ const uint8_t *jgArray;
+ uint8_t prev, jg;
+
+ if(U_FAILURE(*pErrorCode)) {
+ return;
+ }
+
+ /* add the start code point of each same-value range of the trie */
+ utrie2_enum(&ubidi_props_singleton.trie, NULL, _enumPropertyStartsRange, sa);
+
+ /* add the code points from the bidi mirroring table */
+ length=ubidi_props_singleton.indexes[UBIDI_IX_MIRROR_LENGTH];
+ for(i=0; i<length; ++i) {
+ c=UBIDI_GET_MIRROR_CODE_POINT(ubidi_props_singleton.mirrors[i]);
+ sa->addRange(sa->set, c, c+1);
+ }
+
+ /* add the code points from the Joining_Group array where the value changes */
+ start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START];
+ limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT];
+ jgArray=ubidi_props_singleton.jgArray;
+ for(;;) {
+ prev=0;
+ while(start<limit) {
+ jg=*jgArray++;
+ if(jg!=prev) {
+ sa->add(sa->set, start);
+ prev=jg;
+ }
+ ++start;
+ }
+ if(prev!=0) {
+ /* add the limit code point if the last value was not 0 (it is now start==limit) */
+ sa->add(sa->set, limit);
+ }
+ if(limit==ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT]) {
+ /* switch to the second Joining_Group range */
+ start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START2];
+ limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT2];
+ jgArray=ubidi_props_singleton.jgArray2;
+ } else {
+ break;
+ }
+ }
+
+ /* add code points with hardcoded properties, plus the ones following them */
+
+ /* (none right now) */
+}
+
+/* property access functions ------------------------------------------------ */
+
+U_CFUNC int32_t
+ubidi_getMaxValue(UProperty which) {
+ int32_t max=ubidi_props_singleton.indexes[UBIDI_MAX_VALUES_INDEX];
+ switch(which) {
+ case UCHAR_BIDI_CLASS:
+ return (max&UBIDI_CLASS_MASK);
+ case UCHAR_JOINING_GROUP:
+ return (max&UBIDI_MAX_JG_MASK)>>UBIDI_MAX_JG_SHIFT;
+ case UCHAR_JOINING_TYPE:
+ return (max&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT;
+ case UCHAR_BIDI_PAIRED_BRACKET_TYPE:
+ return (max&UBIDI_BPT_MASK)>>UBIDI_BPT_SHIFT;
+ default:
+ return -1; /* undefined */
+ }
+}
+
+U_CAPI UCharDirection
+ubidi_getClass(UChar32 c) {
+ uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
+ return (UCharDirection)UBIDI_GET_CLASS(props);
+}
+
+U_CFUNC UBool
+ubidi_isMirrored(UChar32 c) {
+ uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
+ return (UBool)UBIDI_GET_FLAG(props, UBIDI_IS_MIRRORED_SHIFT);
+}
+
+static UChar32
+getMirror(UChar32 c, uint16_t props) {
+ int32_t delta=UBIDI_GET_MIRROR_DELTA(props);
+ if(delta!=UBIDI_ESC_MIRROR_DELTA) {
+ return c+delta;
+ } else {
+ /* look for mirror code point in the mirrors[] table */
+ const uint32_t *mirrors;
+ uint32_t m;
+ int32_t i, length;
+ UChar32 c2;
+
+ mirrors=ubidi_props_singleton.mirrors;
+ length=ubidi_props_singleton.indexes[UBIDI_IX_MIRROR_LENGTH];
+
+ /* linear search */
+ for(i=0; i<length; ++i) {
+ m=mirrors[i];
+ c2=UBIDI_GET_MIRROR_CODE_POINT(m);
+ if(c==c2) {
+ /* found c, return its mirror code point using the index in m */
+ return UBIDI_GET_MIRROR_CODE_POINT(mirrors[UBIDI_GET_MIRROR_INDEX(m)]);
+ } else if(c<c2) {
+ break;
+ }
+ }
+
+ /* c not found, return it itself */
+ return c;
+ }
+}
+
+U_CFUNC UChar32
+ubidi_getMirror(UChar32 c) {
+ uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
+ return getMirror(c, props);
+}
+
+U_CFUNC UBool
+ubidi_isBidiControl(UChar32 c) {
+ uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
+ return (UBool)UBIDI_GET_FLAG(props, UBIDI_BIDI_CONTROL_SHIFT);
+}
+
+U_CFUNC UBool
+ubidi_isJoinControl(UChar32 c) {
+ uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
+ return (UBool)UBIDI_GET_FLAG(props, UBIDI_JOIN_CONTROL_SHIFT);
+}
+
+U_CFUNC UJoiningType
+ubidi_getJoiningType(UChar32 c) {
+ uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
+ return (UJoiningType)((props&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT);
+}
+
+U_CFUNC UJoiningGroup
+ubidi_getJoiningGroup(UChar32 c) {
+ UChar32 start, limit;
+
+ start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START];
+ limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT];
+ if(start<=c && c<limit) {
+ return (UJoiningGroup)ubidi_props_singleton.jgArray[c-start];
+ }
+ start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START2];
+ limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT2];
+ if(start<=c && c<limit) {
+ return (UJoiningGroup)ubidi_props_singleton.jgArray2[c-start];
+ }
+ return U_JG_NO_JOINING_GROUP;
+}
+
+U_CFUNC UBidiPairedBracketType
+ubidi_getPairedBracketType(UChar32 c) {
+ uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
+ return (UBidiPairedBracketType)((props&UBIDI_BPT_MASK)>>UBIDI_BPT_SHIFT);
+}
+
+U_CFUNC UChar32
+ubidi_getPairedBracket(UChar32 c) {
+ uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
+ if((props&UBIDI_BPT_MASK)==0) {
+ return c;
+ } else {
+ return getMirror(c, props);
+ }
+}
+
+/* public API (see uchar.h) ------------------------------------------------- */
+
+U_CFUNC UCharDirection
+u_charDirection(UChar32 c) {
+ return ubidi_getClass(c);
+}
+
+U_CFUNC UBool
+u_isMirrored(UChar32 c) {
+ return ubidi_isMirrored(c);
+}
+
+U_CFUNC UChar32
+u_charMirror(UChar32 c) {
+ return ubidi_getMirror(c);
+}
+
+U_CAPI UChar32 U_EXPORT2
+u_getBidiPairedBracket(UChar32 c) {
+ return ubidi_getPairedBracket(c);
+}
diff --git a/thirdparty/icu4c/common/ubidi_props.h b/thirdparty/icu4c/common/ubidi_props.h
new file mode 100644
index 0000000000..698ee9c52b
--- /dev/null
+++ b/thirdparty/icu4c/common/ubidi_props.h
@@ -0,0 +1,148 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2004-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: ubidi_props.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2004dec30
+* created by: Markus W. Scherer
+*
+* Low-level Unicode bidi/shaping properties access.
+*/
+
+#ifndef __UBIDI_PROPS_H__
+#define __UBIDI_PROPS_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uset.h"
+#include "putilimp.h"
+#include "uset_imp.h"
+#include "udataswp.h"
+
+U_CDECL_BEGIN
+
+/* library API -------------------------------------------------------------- */
+
+U_CFUNC void
+ubidi_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);
+
+/* property access functions */
+
+U_CFUNC int32_t
+ubidi_getMaxValue(UProperty which);
+
+U_CAPI UCharDirection
+ubidi_getClass(UChar32 c);
+
+U_CFUNC UBool
+ubidi_isMirrored(UChar32 c);
+
+U_CFUNC UChar32
+ubidi_getMirror(UChar32 c);
+
+U_CFUNC UBool
+ubidi_isBidiControl(UChar32 c);
+
+U_CFUNC UBool
+ubidi_isJoinControl(UChar32 c);
+
+U_CFUNC UJoiningType
+ubidi_getJoiningType(UChar32 c);
+
+U_CFUNC UJoiningGroup
+ubidi_getJoiningGroup(UChar32 c);
+
+U_CFUNC UBidiPairedBracketType
+ubidi_getPairedBracketType(UChar32 c);
+
+U_CFUNC UChar32
+ubidi_getPairedBracket(UChar32 c);
+
+/* file definitions --------------------------------------------------------- */
+
+#define UBIDI_DATA_NAME "ubidi"
+#define UBIDI_DATA_TYPE "icu"
+
+/* format "BiDi" */
+#define UBIDI_FMT_0 0x42
+#define UBIDI_FMT_1 0x69
+#define UBIDI_FMT_2 0x44
+#define UBIDI_FMT_3 0x69
+
+/* indexes into indexes[] */
+enum {
+ UBIDI_IX_INDEX_TOP,
+ UBIDI_IX_LENGTH,
+ UBIDI_IX_TRIE_SIZE,
+ UBIDI_IX_MIRROR_LENGTH,
+
+ UBIDI_IX_JG_START,
+ UBIDI_IX_JG_LIMIT,
+ UBIDI_IX_JG_START2, /* new in format version 2.2, ICU 54 */
+ UBIDI_IX_JG_LIMIT2,
+
+ UBIDI_MAX_VALUES_INDEX=15,
+ UBIDI_IX_TOP=16
+};
+
+/* definitions for 16-bit bidi/shaping properties word ---------------------- */
+
+enum {
+ /* UBIDI_CLASS_SHIFT=0, */ /* bidi class: 5 bits (4..0) */
+ UBIDI_JT_SHIFT=5, /* joining type: 3 bits (7..5) */
+
+ UBIDI_BPT_SHIFT=8, /* Bidi_Paired_Bracket_Type(bpt): 2 bits (9..8) */
+
+ UBIDI_JOIN_CONTROL_SHIFT=10,
+ UBIDI_BIDI_CONTROL_SHIFT=11,
+
+ UBIDI_IS_MIRRORED_SHIFT=12, /* 'is mirrored' */
+ UBIDI_MIRROR_DELTA_SHIFT=13, /* bidi mirroring delta: 3 bits (15..13) */
+
+ UBIDI_MAX_JG_SHIFT=16 /* max JG value in indexes[UBIDI_MAX_VALUES_INDEX] bits 23..16 */
+};
+
+#define UBIDI_CLASS_MASK 0x0000001f
+#define UBIDI_JT_MASK 0x000000e0
+#define UBIDI_BPT_MASK 0x00000300
+
+#define UBIDI_MAX_JG_MASK 0x00ff0000
+
+#define UBIDI_GET_CLASS(props) ((props)&UBIDI_CLASS_MASK)
+#define UBIDI_GET_FLAG(props, shift) (((props)>>(shift))&1)
+
+#if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
+# define UBIDI_GET_MIRROR_DELTA(props) ((int16_t)(props)>>UBIDI_MIRROR_DELTA_SHIFT)
+#else
+# define UBIDI_GET_MIRROR_DELTA(props) (int16_t)(((props)&0x8000) ? (((props)>>UBIDI_MIRROR_DELTA_SHIFT)|0xe000) : ((props)>>UBIDI_MIRROR_DELTA_SHIFT))
+#endif
+
+enum {
+ UBIDI_ESC_MIRROR_DELTA=-4,
+ UBIDI_MIN_MIRROR_DELTA=-3,
+ UBIDI_MAX_MIRROR_DELTA=3
+};
+
+/* definitions for 32-bit mirror table entry -------------------------------- */
+
+enum {
+ /* the source Unicode code point takes 21 bits (20..0) */
+ UBIDI_MIRROR_INDEX_SHIFT=21,
+ UBIDI_MAX_MIRROR_INDEX=0x7ff
+};
+
+#define UBIDI_GET_MIRROR_CODE_POINT(m) (UChar32)((m)&0x1fffff)
+
+#define UBIDI_GET_MIRROR_INDEX(m) ((m)>>UBIDI_MIRROR_INDEX_SHIFT)
+
+U_CDECL_END
+
+#endif
diff --git a/thirdparty/icu4c/common/ubidi_props_data.h b/thirdparty/icu4c/common/ubidi_props_data.h
new file mode 100644
index 0000000000..7a34870bd8
--- /dev/null
+++ b/thirdparty/icu4c/common/ubidi_props_data.h
@@ -0,0 +1,922 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+//
+// Copyright (C) 1999-2016, International Business Machines
+// Corporation and others. All Rights Reserved.
+//
+// file name: ubidi_props_data.h
+//
+// machine-generated by: icu/tools/unicode/c/genprops/bidipropsbuilder.cpp
+
+
+#ifdef INCLUDED_FROM_UBIDI_PROPS_C
+
+static const UVersionInfo ubidi_props_dataVersion={0xd,0,0,0};
+
+static const int32_t ubidi_props_indexes[UBIDI_IX_TOP]={0x10,0x67ec,0x6200,0x28,0x620,0x8c8,0x10ac0,0x10d24,0,0,0,0,0,0,0,0x6502b6};
+
+static const uint16_t ubidi_props_trieIndex[12536]={
+0x37c,0x384,0x38c,0x394,0x3ac,0x3b4,0x3bc,0x3c4,0x39c,0x3a4,0x39c,0x3a4,0x39c,0x3a4,0x39c,0x3a4,
+0x39c,0x3a4,0x39c,0x3a4,0x3ca,0x3d2,0x3da,0x3e2,0x3ea,0x3f2,0x3ee,0x3f6,0x3fe,0x406,0x401,0x409,
+0x39c,0x3a4,0x39c,0x3a4,0x411,0x419,0x39c,0x3a4,0x39c,0x3a4,0x39c,0x3a4,0x41f,0x427,0x42f,0x437,
+0x43f,0x447,0x44f,0x457,0x45d,0x465,0x46d,0x475,0x47d,0x485,0x48b,0x493,0x49b,0x4a3,0x4ab,0x4b3,
+0x4bf,0x4bb,0x4c7,0x4cf,0x431,0x4df,0x4e6,0x4d7,0x4ee,0x4f0,0x4f8,0x500,0x508,0x509,0x511,0x519,
+0x521,0x509,0x529,0x52e,0x521,0x509,0x536,0x53e,0x508,0x546,0x54e,0x500,0x556,0x39c,0x55e,0x562,
+0x56a,0x56c,0x574,0x57c,0x508,0x584,0x58c,0x500,0x413,0x590,0x511,0x500,0x508,0x39c,0x598,0x39c,
+0x39c,0x59e,0x5a6,0x39c,0x39c,0x5aa,0x5b2,0x39c,0x5b6,0x5bd,0x39c,0x5c5,0x5cd,0x5d4,0x555,0x39c,
+0x39c,0x5dc,0x5e4,0x5ec,0x5f4,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x5fc,0x39c,0x604,0x39c,0x39c,0x39c,
+0x60c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x614,0x39c,0x39c,0x39c,0x61c,0x61c,0x515,0x515,0x39c,0x622,0x62a,0x604,
+0x640,0x632,0x632,0x648,0x64f,0x638,0x39c,0x39c,0x39c,0x657,0x65f,0x39c,0x39c,0x39c,0x661,0x669,
+0x671,0x39c,0x678,0x680,0x39c,0x688,0x56b,0x39c,0x545,0x690,0x556,0x698,0x413,0x6a0,0x39c,0x6a7,
+0x39c,0x6ac,0x39c,0x39c,0x39c,0x39c,0x6b2,0x6ba,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x3ea,0x6c2,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x6ca,0x6d2,0x6d6,
+0x6ee,0x6f4,0x6de,0x6e6,0x6fc,0x704,0x708,0x5d7,0x710,0x718,0x720,0x39c,0x728,0x669,0x669,0x669,
+0x738,0x740,0x748,0x750,0x755,0x75d,0x765,0x730,0x76d,0x775,0x39c,0x77b,0x782,0x669,0x669,0x669,
+0x669,0x582,0x788,0x669,0x790,0x39c,0x39c,0x666,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,
+0x669,0x669,0x669,0x669,0x669,0x798,0x669,0x669,0x669,0x669,0x669,0x79e,0x669,0x669,0x7a6,0x7ae,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x669,0x669,0x669,0x669,0x7be,0x7c6,0x7ce,0x7b6,
+0x7de,0x7e6,0x7ee,0x7f5,0x7fc,0x804,0x808,0x7d6,0x669,0x669,0x669,0x810,0x816,0x669,0x669,0x81c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x824,0x39c,0x39c,0x39c,0x82c,0x39c,0x39c,0x39c,0x3ea,
+0x834,0x83c,0x840,0x39c,0x848,0x669,0x669,0x66c,0x669,0x669,0x669,0x669,0x669,0x669,0x84f,0x855,
+0x865,0x85d,0x39c,0x39c,0x86d,0x60c,0x39c,0x3c3,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x669,0x82b,
+0x3d1,0x39c,0x875,0x87d,0x39c,0x885,0x88d,0x39c,0x39c,0x39c,0x39c,0x891,0x39c,0x39c,0x661,0x3c2,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x669,0x669,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x875,0x669,0x582,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x898,0x39c,0x39c,0x89d,0x56c,0x39c,0x39c,0x5b8,0x669,0x660,0x39c,0x39c,0x8a5,0x39c,0x39c,0x39c,
+0x8ad,0x8b4,0x632,0x8bc,0x39c,0x39c,0x58e,0x8c4,0x39c,0x8cc,0x8d3,0x39c,0x4ee,0x8d8,0x39c,0x507,
+0x39c,0x8e0,0x8e8,0x509,0x39c,0x8ec,0x508,0x8f4,0x39c,0x39c,0x39c,0x8fa,0x39c,0x39c,0x39c,0x901,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x915,0x909,0x90d,0x49b,0x49b,0x49b,0x49b,0x49b,
+0x49b,0x49b,0x49b,0x49b,0x49b,0x49b,0x49b,0x49b,0x49b,0x91d,0x49b,0x49b,0x49b,0x49b,0x925,0x929,
+0x931,0x939,0x93d,0x945,0x49b,0x49b,0x49b,0x949,0x951,0x38c,0x959,0x961,0x39c,0x39c,0x39c,0x969,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0xe70,0xe70,0xeb0,0xef0,0xe70,0xe70,0xe70,0xe70,0xe70,0xe70,0xf28,0xf68,0xfa8,0xfb8,0xff8,0x1004,
+0xe70,0xe70,0x1044,0xe70,0xe70,0xe70,0x107c,0x10bc,0x10fc,0x113c,0x1174,0x11b4,0x11f4,0x122c,0x126c,0x12ac,
+0xa40,0xa80,0xac0,0xaff,0x1a0,0x1a0,0xb3f,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xb68,0x1a0,0x1a0,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xba8,0x1a0,0x1a0,0xbdd,0xc1d,0xc5d,0xc9d,0xcdd,0xd1d,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
+0xd9d,0xdad,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
+0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x971,0x39c,0x669,0x669,0x979,0x60c,0x39c,0x501,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x981,0x39c,0x39c,0x39c,0x988,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x990,0x431,0x431,0x431,0x431,0x431,0x431,0x431,
+0x998,0x99c,0x431,0x431,0x431,0x431,0x9ac,0x9a4,0x431,0x9b4,0x431,0x431,0x9bc,0x9c2,0x431,0x431,
+0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x9d2,0x9ca,0x431,0x431,0x431,0x431,0x431,0x431,
+0x431,0x431,0x431,0x9da,0x431,0x9e2,0x431,0x431,0x431,0x9e6,0x9ed,0x9f3,0x431,0x9f7,0x9ff,0x431,
+0x508,0xa07,0xa0e,0xa15,0x413,0xa18,0x39c,0x39c,0x4ee,0xa1f,0x39c,0xa25,0x413,0xa2a,0xa32,0x39c,
+0x39c,0xa37,0x39c,0x39c,0x39c,0x39c,0x82c,0xa3f,0x413,0x590,0x56b,0xa46,0x39c,0x39c,0x39c,0x39c,
+0x39c,0xa07,0xa4e,0x39c,0x39c,0xa56,0xa5e,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xa62,0xa6a,0x39c,
+0x39c,0xa72,0x56b,0xa7a,0x39c,0xa80,0x39c,0x39c,0x5fc,0xa88,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0xa8d,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xa94,0xa9c,0x39c,0x39c,0x39c,0xa9f,0x56b,0xaa7,
+0xaab,0xab3,0x39c,0xaba,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0xac1,0x39c,0x39c,0xacf,0xac9,0x39c,0x39c,0x39c,0xad7,0xadf,0x39c,0xae3,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x592,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xaf0,0xaeb,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0xaf8,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xaff,
+0x39c,0xb05,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0xa26,0x39c,0xb0b,0x39c,0x39c,0xb13,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x522,0xb1b,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xb22,0xb2a,0xb30,0x39c,0x39c,0x669,0x669,0xb38,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x669,0x669,0x83f,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0xb3a,0x39c,0xb41,0x39c,0xb3d,0x39c,0xb44,0x39c,0xb4c,0xb50,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x3ea,0xb58,0x3ea,
+0xb5f,0xb66,0xb6e,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xb76,0xb7e,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xb05,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0xb83,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x431,0x431,0x431,
+0x431,0x431,0x431,0xb8b,0x431,0xb93,0xb93,0xb9a,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,
+0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,
+0x90d,0x49b,0x49b,0x431,0x431,0x49b,0x49b,0x9f3,0x431,0x431,0x431,0x431,0x431,0x49b,0x49b,0x49b,
+0x49b,0x49b,0x49b,0x49b,0xba2,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x669,0xbaa,0x669,
+0x669,0x66c,0xbaf,0xbb3,0x84f,0xbbb,0x3be,0x39c,0xbc1,0x39c,0xbc6,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x779,0x39c,0x39c,0x39c,0x39c,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,
+0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,
+0x669,0x669,0x669,0x66b,0x979,0x669,0x669,0x669,0x66c,0x669,0x669,0xbce,0x66e,0xbaa,0x669,0xbd6,
+0x669,0xbde,0xbe3,0x39c,0x39c,0x669,0x669,0x669,0xbeb,0x669,0x669,0x798,0x669,0x669,0x669,0x66c,
+0xbf2,0xbfa,0xc00,0xc05,0x39c,0x669,0x669,0x669,0x669,0xc0d,0x669,0x788,0xc15,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xc1c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,
+0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xc1c,0xc2c,0xc24,0xc24,
+0xc24,0xc2d,0xc2d,0xc2d,0xc2d,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0xc35,0xc2d,0xc2d,0xc2d,
+0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,
+0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,
+0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,
+0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0x37b,0x37b,0x37b,
+0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,8,7,8,9,7,0x12,0x12,
+0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,7,7,7,8,
+9,0xa,0xa,4,4,4,0xa,0xa,0x310a,0xf20a,0xa,3,6,3,6,6,
+2,2,2,2,2,2,2,2,2,2,6,0xa,0x500a,0xa,0xd00a,0xa,
+0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0x510a,0xa,0xd20a,0xa,0xa,
+0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0x510a,0xa,0xd20a,0xa,0x12,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x12,0x12,0x12,0x12,0x12,7,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+6,0xa,4,4,4,4,0xa,0xa,0xa,0xa,0,0x900a,0xa,0xb2,0xa,0xa,
+4,4,2,2,0xa,0,0xa,0xa,0xa,2,0,0x900a,0xa,0xa,0xa,0xa,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0xa,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0xa,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0xa,0xa,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0xa,0xa,0,0,
+0,0,0,0,0,0,0xa,0,0,0,0,0,0xa,0xa,0,0xa,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0xa,0,0,0,0,0,
+0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0xa,0,0,0xa,0xa,4,1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,1,0xb1,1,0xb1,0xb1,1,
+0xb1,0xb1,1,0xb1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,5,5,5,5,
+5,5,0xa,0xa,0xd,4,4,0xd,6,0xd,0xa,0xa,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xd,0x8ad,0xd,0xd,0xd,0x4d,0xd,0x8d,0x8d,
+0x8d,0x8d,0x4d,0x8d,0x4d,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d,0x8d,0x8d,0x8d,0x8d,0x4d,
+0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x2d,0x4d,0x4d,0x4d,
+0x4d,0x4d,0x4d,0x4d,0x8d,0x4d,0x4d,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,5,5,5,5,
+5,5,5,5,5,5,4,5,5,0xd,0x4d,0x4d,0xb1,0x8d,0x8d,0x8d,
+0xd,0x8d,0x8d,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x8d,0x8d,0x8d,0x8d,
+0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x4d,0x4d,
+0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,
+0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,
+0x4d,0x4d,0x4d,0x4d,0x8d,0x4d,0x4d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,
+0x4d,0x8d,0x4d,0x8d,0x4d,0x4d,0x8d,0x8d,0xd,0x8d,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,5,0xa,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xd,0xd,0xb1,0xb1,0xa,0xb1,0xb1,
+0xb1,0xb1,0x8d,0x8d,2,2,2,2,2,2,2,2,2,2,0x4d,0x4d,
+0x4d,0xd,0xd,0x4d,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
+0xd,0xd,0xd,0xad,0x8d,0xb1,0x4d,0x4d,0x4d,0x8d,0x8d,0x8d,0x8d,0x8d,0x4d,0x4d,
+0x4d,0x4d,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x8d,0x4d,0x8d,0x4d,
+0x8d,0x4d,0x4d,0x8d,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xd,0xd,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,
+0x4d,0x4d,0x4d,0x4d,0x4d,0x8d,0x8d,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,
+0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x8d,0x8d,0x4d,0x4d,0x4d,0x4d,0x8d,0x4d,0x8d,
+0x8d,0x4d,0x4d,0x4d,0x8d,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0xd,0xd,0xd,0xd,
+0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
+0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
+0xd,0xd,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xd,0xd,0xd,
+0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,1,1,1,1,
+1,1,1,1,1,1,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,
+0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,
+0x41,0x41,0x41,0x41,0x41,0x41,0x41,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+1,1,0xa,0xa,0xa,0xa,0x21,1,1,0xb1,1,1,0xb1,0xb1,0xb1,0xb1,
+1,0xb1,0xb1,0xb1,1,0xb1,0xb1,0xb1,0xb1,0xb1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,0xb1,0xb1,0xb1,0xb1,1,0xb1,0xb1,0xb1,0xb1,0xb1,0x81,0x41,0x41,0x41,
+0x41,0x41,0x81,0x81,0x41,0x81,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,
+0x81,0x41,0x81,0x81,0x81,0xb1,0xb1,0xb1,1,1,1,1,0x4d,0xd,0x4d,0x4d,
+0x4d,0x4d,0xd,0x8d,0x4d,0x8d,0x8d,0xd,0xd,0xd,0xd,0xd,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,0xb1,0xb1,5,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0x4d,0x4d,0x4d,0x4d,
+0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x8d,0x8d,0x8d,0xd,0x8d,0x4d,0x4d,0x8d,0x8d,0x4d,
+0x4d,0xd,0x4d,0x4d,0x4d,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,
+0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0xb1,0,0xb1,0,0,0,
+0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0xb1,0,0,
+0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,
+0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0xb1,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,
+0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,4,4,0,0,0,0,0,0,0,4,
+0,0,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0xb1,0xb1,0,0,0,0,0xb1,0xb1,0,0,0xb1,
+0xb1,0xb1,0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,0,0xb1,0,0,
+0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,
+0xb1,0,0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,
+0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,
+0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0xb1,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,0,0,0,
+0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0xb1,0xb1,0,
+0,0,0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0,0,0,
+0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,
+0xa,4,0xa,0,0,0,0,0,0xb1,0,0,0,0xb1,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,
+0xb1,0,0,0,0,0,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0,0,
+0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,
+0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0,0xa0,
+0,0,0,0,0,0,0xa0,0,0,0,0,0,0xb1,0xb1,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0,
+0,0,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0,0,0,0,4,0,0,0,0,0,0,0,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0xb1,0,0xb1,0,0xb1,0x310a,0xf20a,
+0x310a,0xf20a,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0,
+0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,
+0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0xb1,0xb1,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0xb1,0xb1,
+0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,0,0xb1,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,
+0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0x310a,0xf20a,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0xb1,0,
+0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,
+0,0,0,4,0,0xb1,0,0,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
+0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
+0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xb1,0x40,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x4a,0xa,0xa,0x2a,0xb1,0xb1,0xb1,0x12,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
+0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0,0,0,0,0,0,0,
+0,0xb1,0xb1,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
+0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xb1,0xb1,0xb1,0,
+0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0xb1,0,
+0,0,0,0,0,0xb1,0xb1,0xb1,0,0,0,0,0xa,0,0,0,
+0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,0xb1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
+0xb1,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,
+0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0xb1,0xb1,0xb1,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0xb1,0,0xb1,0xb1,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0,0,0,0,0xb1,0,0,0,0,0,0,0xb1,0,0,0,
+0xb1,0xb1,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0xa,0,0xa,0xa,0xa,0,0,0,0,0,0,
+0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0xa,0xa,0,0xa,0xa,0xa,0xa,6,0x310a,0xf20a,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,9,0xb2,0xb2,0xb2,0xb2,0xb2,0x12,0x814,0x815,
+0x813,0x816,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,2,0,0,0,2,2,2,2,
+2,2,3,3,0xa,0x310a,0xf20a,0,9,9,9,9,9,9,9,9,
+9,9,9,0xb2,0x412,0x432,0x8a0,0x8a1,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,9,7,0x8ab,0x8ae,0x8b0,0x8ac,0x8af,6,
+4,4,4,4,4,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,
+2,2,2,2,2,2,2,2,2,2,3,3,0xa,0x310a,0xf20a,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xa,0xa,0,0xa,0xa,0xa,0xa,0,0xa,0xa,0,0,0,0,0,0,
+0,0,0,0,0xa,0,0xa,0xa,0xa,0,0,0,0,0,0xa,0xa,
+0xa,0xa,0xa,0xa,0,0xa,0,0xa,0,0xa,0,0,0,0,4,0,
+0,0,0,0,0,0,0,0,0,0,0xa,0xa,0,0,0,0,
+0x100a,0xa,0xa,0xa,0xa,0,0,0,0,0,0xa,0xa,0xa,0xa,0,0,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,
+0x300a,0xf00a,0x900a,0x900a,0x900a,0x100a,0x900a,0x900a,0x100a,0x100a,0x900a,0x900a,0x900a,0x900a,0x900a,0x100a,
+0xa,0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,0x700a,0x700a,0x700a,0xb00a,0xb00a,0xb00a,0xa,0xa,
+0xa,0x100a,3,4,0xa,0x900a,0x100a,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0x900a,
+0x900a,0x900a,0x900a,0xa,0x900a,0xa,0x100a,0xa,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0x100a,
+0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x100a,0x300a,0xf00a,0x100a,0x100a,
+0x100a,0x100a,0x100a,0x900a,0x100a,0x900a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x900a,0xa,0xa,0xa,
+0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,
+0x100a,0xa,0x100a,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0x300a,0xf00a,
+0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,
+0x100a,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0x900a,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0x900a,0x100a,0x900a,0x900a,0x100a,0x900a,
+0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x900a,0xa,0xa,0xa,
+0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a,
+0xf00a,0x900a,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,
+0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0xa,0x300a,0xf00a,0x310a,0xf20a,0xa,
+0x300a,0xf00a,0xa,0x500a,0x100a,0xd00a,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0x300a,0xf00a,0xa,
+0xa,0xa,0xa,0xa,0x900a,0x300a,0xf00a,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x310a,0xf20a,
+0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x100a,0x100a,0x100a,0xa,0xa,
+0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x900a,0x100a,0x100a,
+0x300a,0xf00a,0xa,0xa,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,0x310a,
+0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x710a,0x320a,0xf10a,0xb20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,
+0xf20a,0xa,0xa,0x900a,0x100a,0x100a,0x100a,0x100a,0x900a,0xa,0x100a,0x900a,0x300a,0xf00a,0x100a,0x100a,
+0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0x900a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0x100a,0x300a,0xf00a,0xa,0xa,
+0xa,0x100a,0xa,0xa,0xa,0xa,0x100a,0x300a,0xf00a,0x300a,0xf00a,0xa,0x300a,0xf00a,0xa,0xa,
+0x310a,0xf20a,0x310a,0xf20a,0x100a,0xa,0xa,0xa,0xa,0xa,0x100a,0x900a,0x900a,0x900a,0x100a,0xa,
+0xa,0xa,0xa,0xa,0x300a,0xf00a,0x900a,0xa,0xa,0xa,0xa,0x100a,0xa,0xa,0xa,0x300a,
+0xf00a,0x300a,0xf00a,0x100a,0xa,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
+0x100a,0x100a,0x100a,0x100a,0x100a,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0xa,0x100a,0xa,0x100a,0xa,
+0xa,0x100a,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,
+0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,
+0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0x100a,0x100a,
+0x100a,0x100a,0xa,0x100a,0x100a,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a,
+0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,
+0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,
+0xf00a,0x300a,0xf00a,0x100a,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0x300a,
+0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,
+0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x900a,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x900a,0xa,
+0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0xb1,
+0xb1,0xb1,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,
+0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0x300a,0xf00a,0xa,0x300a,0xf00a,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,
+0x300a,0xf00a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0,0,0,0,0xa,0,0,0,0,0,0,0,0,0,0xb1,0xb1,
+0xb1,0xb1,0,0,0xa,0,0,0,0,0,0xa,0xa,0,0,0,0,
+0,0xa,0xa,0xa,9,0xa,0xa,0xa,0xa,0,0,0,0x310a,0xf20a,0x310a,0xf20a,
+0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,
+0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xa,
+0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
+0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0xb1,0xb1,0xb1,0xb1,0xa,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xa,0xa,0,0,0,0,0,0,0,0,0xa,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0xb1,0,0,0,0xb1,0,0,0,0,0xb1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0xb1,0xb1,0,0xa,0xa,0xa,0xa,0xb1,0,0,0,
+0,0,0,0,0,0,0,0,4,4,0,0,0,0,0,0,
+0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
+0x40,0x40,0x60,0,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,
+0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0,
+0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
+0,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,
+0,0,0,0xb1,0,0,0,0,0,0,0,0,0xb1,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0xb1,0,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,0xb1,0xb1,
+0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,
+0,0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0,0,
+0,0,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0xb1,0,0,0xb1,0,0,0,
+0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,1,1,1,1,1,1,1,1,1,3,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
+0xd,0xd,0xd,0xd,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,1,0xb1,1,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
+0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
+0xd,0xd,0xa,0xa,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
+0xd,0xd,0xd,0xd,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+0x12,0x12,0x12,0x12,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
+0xd,0xa,0xd,0xd,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,
+0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,6,0xa,6,0,0xa,6,0xa,0xa,0xa,0x310a,0xf20a,0x310a,
+0xf20a,0x310a,0xf20a,4,0xa,0xa,3,3,0x300a,0xf00a,0xa,0,0xa,4,4,0xa,
+0,0,0,0,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
+0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
+0xd,0xd,0xd,0xb2,0,0xa,0xa,4,4,4,0xa,0xa,0x310a,0xf20a,0xa,3,
+6,3,6,6,2,2,2,2,2,2,2,2,2,2,6,0xa,
+0x500a,0xa,0xd00a,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x510a,
+0xa,0xd20a,0xa,0x310a,0xf20a,0xa,0x310a,0xf20a,0xa,0xa,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,4,4,0xa,0xa,0xa,4,4,0,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0xaa,0xaa,0xaa,
+0xa,0xa,0x12,0x12,0,0xa,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0,0,0,0xb1,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xa,
+1,0xb1,0xb1,0xb1,1,0xb1,0xb1,1,1,1,1,1,0xb1,0xb1,0xb1,0xb1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,0xb1,0xb1,0xb1,1,1,1,1,0xb1,
+0x41,0x81,1,1,0x81,0xb1,0xb1,1,1,1,1,0x41,0x41,0x41,0x41,0x81,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+0x41,0x41,0x41,0x41,0x41,0x81,1,0x81,1,0x81,0x81,1,1,0x61,0x81,0x81,
+0x81,0x81,0x81,0x41,0x41,0x41,0x41,0x61,0x41,0x41,0x41,0x41,0x41,0x81,0x41,0x41,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0x41,0x81,0x41,0x81,0x81,0x81,0x41,0x41,0x41,0x81,0x41,0x41,0x81,0x41,0x81,0x81,
+0x41,0x81,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,0x81,0x81,0x81,0x81,0x41,0x41,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,0x4d,0x4d,0x8d,0x4d,0xb1,0xb1,0xb1,0xb1,
+0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,5,5,5,5,5,5,5,5,
+5,5,0xd,0xd,0xd,0xd,0xd,0xd,0x6d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,
+0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,
+0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,1,1,1,1,1,1,1,1,1,
+1,1,1,0xb1,0xb1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,0x4d,0x4d,0x4d,0x8d,0x4d,0x4d,0x4d,0x4d,
+0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0xd,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0x4d,0x4d,0x4d,0x8d,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
+0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,0x41,1,0x41,0x41,
+0x81,0x81,0x81,1,0x41,0x81,0x81,0x41,0x41,0x81,0x41,0x41,1,0x41,0x81,0x81,
+0x41,1,1,1,1,0x81,0x41,0x61,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
+0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,
+0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,
+0,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0,
+0xb1,0,0xb1,0xb1,0,0,0,0,0,0,0xb1,0,0,0,0,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0xb1,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0,0xb1,0,0,0,0,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,
+0,0,0,0,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0,0,0xb1,0,0xb1,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0xb1,0,0,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,0,0,0,0,
+0,0,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0xb1,0,
+0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xa0,0xa0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,
+0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,
+0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
+0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0,0xa0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0,0,0,0xb1,0,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,
+0,0xb1,0,0xb1,0,0,0,0,0,0,0,0,4,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,4,4,4,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,
+0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,0,
+0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0xb2,0xb2,0xb2,0xb2,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,
+0xb1,0xb1,0,0,0,0,0,0,0,0,0,0xb2,0xb2,0xb2,0xb2,0xb2,
+0xb2,0xb2,0xb2,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0xa,0xa,0xb1,0xb1,0xb1,0xa,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0x100a,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0x100a,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0x100a,0,0,0,0,0,0,0,0,0,0,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,
+0,0xb1,0,0,0,0,0,0,0,0,0,0,0xb1,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
+0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0xb1,0xb1,
+0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,4,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,1,1,1,1,1,1,1,1,1,0x41,0x41,0x41,0x41,
+0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,
+0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xa1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
+0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xa,0xa,0xd,0xd,0xd,0xd,0xd,0xd,
+0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,2,2,2,2,
+2,2,2,2,2,2,2,0xa,0xa,0xa,0xa,0xa,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,
+0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0xa,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa,0xa,0xa,0,0,0,
+0xa,0xa,0xa,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,
+0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,
+0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
+0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,2,2,2,2,2,2,2,2,2,2,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x12,0x12,
+0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,
+0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,
+0x12,0xb2,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+0x12,0x12,0x12,0x12,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+0x12,0x12,0x12,0x12,0,0,0,0
+};
+
+static const uint32_t ubidi_props_mirrors[40]={
+0x2000ab,0xbb,0x4202215,0x4e0221f,0x3e02220,0x3a02221,0x3c02222,0x4c02224,0x2202243,0x1402245,0x120224c,0x4002298,0x44022a6,0x48022a8,0x46022a9,0x4a022ab,
+0x38022b8,0x10022cd,0x2e022f2,0x30022f3,0x32022f4,0x34022f6,0x36022f7,0x24022fa,0x26022fb,0x28022fc,0x2a022fd,0x2c022fe,0x20027dc,0xa0299b,0xc029a0,0x8029a3,
+0x16029b8,0x4029f5,0x1802ade,0x1c02ae3,0x1a02ae4,0x1e02ae5,0xe02aee,0x602bfe
+};
+
+static const uint8_t ubidi_props_jgArray[680]={
+0x2d,0,3,3,0x2c,3,0x2d,3,4,0x2a,4,4,0xd,0xd,0xd,6,
+6,0x1f,0x1f,0x23,0x23,0x21,0x21,0x28,0x28,1,1,0xb,0xb,0x37,0x37,0x37,
+0,9,0x1d,0x13,0x16,0x18,0x1a,0x10,0x2c,0x2d,0x2d,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0x1d,
+0,3,3,3,0,3,0x2c,0x2c,0x2d,4,4,4,4,4,4,4,
+4,0xd,0xd,0xd,0xd,0xd,0xd,0xd,6,6,6,6,6,6,6,6,
+6,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x23,0x23,0x23,0x21,0x21,0x28,
+1,9,9,9,9,9,9,0x1d,0x1d,0xb,0x26,0xb,0x13,0x13,0x13,0xb,
+0xb,0xb,0xb,0xb,0xb,0x16,0x16,0x16,0x16,0x1a,0x1a,0x1a,0x1a,0x38,0x15,0xd,
+0x2a,0x11,0x11,0xe,0x2c,0x2c,0x2c,0x2c,0x2c,0x2c,0x2c,0x2c,0x37,0x2f,0x37,0x2c,
+0x2d,0x2d,0x2e,0x2e,0,0x2a,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0x1f,
+0,0,0,0,0,0,0,0,0,0,0x23,0x21,1,0,0,0x15,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,0,5,0xc,0xc,7,7,0xf,0x27,0x32,0x12,0x2b,0x2b,0x30,0x31,0x14,
+0x17,0x19,0x1b,0x24,0xa,8,0x1c,0x20,0x22,0x1e,7,0x25,0x29,5,0xc,7,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0x35,0x34,0x33,
+4,4,4,4,4,4,4,0xd,0xd,6,6,0x1f,0x23,1,1,1,
+9,9,0xb,0xb,0xb,0x18,0x18,0x1a,0x1a,0x1a,0x16,0x1f,0x1f,0x23,0xd,0xd,
+0x23,0x1f,0xd,3,3,0x37,0x37,0x2d,0x2c,0x2c,0x36,0x36,0xd,0x23,0x23,0x13,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x5d,0x5a,0x60,0x63,0x5e,0x5f,0x59,0x61,0x5b,0x5c,0x62,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+4,4,0xd,0x28,9,0x1d,0x16,0x18,0x2d,0x2d,0x1f,0x2c,0x39,0,6,0x21,
+0xb,0x55,0x1f,1,0x13,0,4,4,4,0x1f,0x2d,0x56,0x58,0x57,4,4,
+4,0xd,0xb,1,0x58,0xd,0xd,0x16
+};
+
+static const uint8_t ubidi_props_jgArray2[612]={
+0x3a,0x3c,0x3c,0x40,0x40,0x3d,0,0x52,0,0x54,0x54,0,0,0x41,0x4f,0x53,
+0x43,0x43,0x43,0x44,0x3e,0x50,0x45,0x46,0x4c,0x3b,0x3b,0x48,0x48,0x4b,0x49,0x49,
+0x49,0x4a,0,0,0x4d,0,0,0,0,0,0,0x47,0x3f,0x4e,0x51,0x42,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0x65,0,0,0,0,0,0,0x65,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0x64,0,0,0x65,0,0x64,0,
+0x64,0,0,0x64
+};
+
+static const UBiDiProps ubidi_props_singleton={
+ NULL,
+ ubidi_props_indexes,
+ ubidi_props_mirrors,
+ ubidi_props_jgArray,
+ ubidi_props_jgArray2,
+ {
+ ubidi_props_trieIndex,
+ ubidi_props_trieIndex+3568,
+ NULL,
+ 3568,
+ 8968,
+ 0x1a0,
+ 0xe70,
+ 0x0,
+ 0x0,
+ 0x110000,
+ 0x30f4,
+ NULL, 0, FALSE, FALSE, 0, NULL
+ },
+ { 2,2,0,0 }
+};
+
+#endif // INCLUDED_FROM_UBIDI_PROPS_C
diff --git a/thirdparty/icu4c/common/ubidiimp.h b/thirdparty/icu4c/common/ubidiimp.h
new file mode 100644
index 0000000000..e48fc6f941
--- /dev/null
+++ b/thirdparty/icu4c/common/ubidiimp.h
@@ -0,0 +1,484 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: ubidiimp.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999aug06
+* created by: Markus W. Scherer, updated by Matitiahu Allouche
+*/
+
+#ifndef UBIDIIMP_H
+#define UBIDIIMP_H
+
+#include "unicode/utypes.h"
+#include "unicode/ubidi.h"
+#include "unicode/uchar.h"
+#include "ubidi_props.h"
+
+/* miscellaneous definitions ---------------------------------------------- */
+
+// ICU-20853=ICU-20935 Solaris #defines CS and ES in sys/regset.h
+#ifdef CS
+# undef CS
+#endif
+#ifdef ES
+# undef ES
+#endif
+
+typedef uint8_t DirProp;
+typedef uint32_t Flags;
+
+/* Comparing the description of the BiDi algorithm with this implementation
+ is easier with the same names for the BiDi types in the code as there.
+ See UCharDirection in uchar.h .
+*/
+enum {
+ L= U_LEFT_TO_RIGHT, /* 0 */
+ R= U_RIGHT_TO_LEFT, /* 1 */
+ EN= U_EUROPEAN_NUMBER, /* 2 */
+ ES= U_EUROPEAN_NUMBER_SEPARATOR, /* 3 */
+ ET= U_EUROPEAN_NUMBER_TERMINATOR, /* 4 */
+ AN= U_ARABIC_NUMBER, /* 5 */
+ CS= U_COMMON_NUMBER_SEPARATOR, /* 6 */
+ B= U_BLOCK_SEPARATOR, /* 7 */
+ S= U_SEGMENT_SEPARATOR, /* 8 */
+ WS= U_WHITE_SPACE_NEUTRAL, /* 9 */
+ ON= U_OTHER_NEUTRAL, /* 10 */
+ LRE=U_LEFT_TO_RIGHT_EMBEDDING, /* 11 */
+ LRO=U_LEFT_TO_RIGHT_OVERRIDE, /* 12 */
+ AL= U_RIGHT_TO_LEFT_ARABIC, /* 13 */
+ RLE=U_RIGHT_TO_LEFT_EMBEDDING, /* 14 */
+ RLO=U_RIGHT_TO_LEFT_OVERRIDE, /* 15 */
+ PDF=U_POP_DIRECTIONAL_FORMAT, /* 16 */
+ NSM=U_DIR_NON_SPACING_MARK, /* 17 */
+ BN= U_BOUNDARY_NEUTRAL, /* 18 */
+ FSI=U_FIRST_STRONG_ISOLATE, /* 19 */
+ LRI=U_LEFT_TO_RIGHT_ISOLATE, /* 20 */
+ RLI=U_RIGHT_TO_LEFT_ISOLATE, /* 21 */
+ PDI=U_POP_DIRECTIONAL_ISOLATE, /* 22 */
+ ENL, /* EN after W7 */ /* 23 */
+ ENR, /* EN not subject to W7 */ /* 24 */
+ dirPropCount
+};
+
+/* Sometimes, bit values are more appropriate
+ to deal with directionality properties.
+ Abbreviations in these macro names refer to names
+ used in the BiDi algorithm.
+*/
+#define DIRPROP_FLAG(dir) (1UL<<(dir))
+#define PURE_DIRPROP(prop) ((prop)&~0xE0) ?????????????????????????
+
+/* special flag for multiple runs from explicit embedding codes */
+#define DIRPROP_FLAG_MULTI_RUNS (1UL<<31)
+
+/* are there any characters that are LTR or RTL? */
+#define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(ENL)|DIRPROP_FLAG(ENR)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)|DIRPROP_FLAG(LRI))
+#define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)|DIRPROP_FLAG(RLI))
+#define MASK_R_AL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL))
+#define MASK_STRONG_EN_AN (DIRPROP_FLAG(L)|DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN))
+
+/* explicit embedding codes */
+#define MASK_EXPLICIT (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)|DIRPROP_FLAG(PDF))
+
+/* explicit isolate codes */
+#define MASK_ISO (DIRPROP_FLAG(LRI)|DIRPROP_FLAG(RLI)|DIRPROP_FLAG(FSI)|DIRPROP_FLAG(PDI))
+
+#define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT)
+
+/* paragraph and segment separators */
+#define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S))
+
+/* all types that are counted as White Space or Neutral in some steps */
+#define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT|MASK_ISO)
+
+/* types that are neutrals or could becomes neutrals in (Wn) */
+#define MASK_POSSIBLE_N (DIRPROP_FLAG(ON)|DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_WS)
+
+/*
+ * These types may be changed to "e",
+ * the embedding type (L or R) of the run,
+ * in the BiDi algorithm (N2)
+ */
+#define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N)
+
+/* the dirProp's L and R are defined to 0 and 1 values in UCharDirection */
+#define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1))
+
+#define IS_DEFAULT_LEVEL(level) ((level)>=0xfe)
+
+/*
+ * The following bit is used for the directional isolate status.
+ * Stack entries corresponding to isolate sequences are greater than ISOLATE.
+ */
+#define ISOLATE 0x0100
+
+U_CFUNC UBiDiLevel
+ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t index);
+
+#define GET_PARALEVEL(ubidi, index) \
+ ((UBiDiLevel)(!(ubidi)->defaultParaLevel || (index)<(ubidi)->paras[0].limit ? \
+ (ubidi)->paraLevel : ubidi_getParaLevelAtIndex((ubidi), (index))))
+
+/* number of paras entries allocated initially without malloc */
+#define SIMPLE_PARAS_COUNT 10
+/* number of isolate entries allocated initially without malloc */
+#define SIMPLE_ISOLATES_COUNT 5
+/* number of isolate run entries for paired brackets allocated initially without malloc */
+#define SIMPLE_OPENINGS_COUNT 20
+
+#define CR 0x000D
+#define LF 0x000A
+
+/* Run structure for reordering --------------------------------------------- */
+enum {
+ LRM_BEFORE=1,
+ LRM_AFTER=2,
+ RLM_BEFORE=4,
+ RLM_AFTER=8
+};
+
+typedef struct Para {
+ int32_t limit;
+ int32_t level;
+} Para;
+
+enum { /* flags for Opening.flags */
+ FOUND_L=DIRPROP_FLAG(L),
+ FOUND_R=DIRPROP_FLAG(R)
+};
+
+typedef struct Opening {
+ int32_t position; /* position of opening bracket */
+ int32_t match; /* matching char or -position of closing bracket */
+ int32_t contextPos; /* position of last strong char found before opening */
+ uint16_t flags; /* bits for L or R/AL found within the pair */
+ UBiDiDirection contextDir; /* L or R according to last strong char before opening */
+ uint8_t filler; /* to complete a nice multiple of 4 chars */
+} Opening;
+
+typedef struct IsoRun {
+ int32_t contextPos; /* position of char determining context */
+ uint16_t start; /* index of first opening entry for this run */
+ uint16_t limit; /* index after last opening entry for this run */
+ UBiDiLevel level; /* level of this run */
+ DirProp lastStrong; /* bidi class of last strong char found in this run */
+ DirProp lastBase; /* bidi class of last base char found in this run */
+ UBiDiDirection contextDir; /* L or R to use as context for following openings */
+} IsoRun;
+
+typedef struct BracketData {
+ UBiDi *pBiDi;
+ /* array of opening entries which should be enough in most cases; no malloc() */
+ Opening simpleOpenings[SIMPLE_OPENINGS_COUNT];
+ Opening *openings; /* pointer to current array of entries */
+ int32_t openingsCount; /* number of allocated entries */
+ int32_t isoRunLast; /* index of last used entry */
+ /* array of nested isolated sequence entries; can never excess UBIDI_MAX_EXPLICIT_LEVEL
+ + 1 for index 0, + 1 for before the first isolated sequence */
+ IsoRun isoRuns[UBIDI_MAX_EXPLICIT_LEVEL+2];
+ UBool isNumbersSpecial; /* reordering mode for NUMBERS_SPECIAL */
+} BracketData;
+
+typedef struct Isolate {
+ int32_t startON;
+ int32_t start1;
+ int32_t state;
+ int16_t stateImp;
+} Isolate;
+
+typedef struct Run {
+ int32_t logicalStart, /* first character of the run; b31 indicates even/odd level */
+ visualLimit, /* last visual position of the run +1 */
+ insertRemove; /* if >0, flags for inserting LRM/RLM before/after run,
+ if <0, count of bidi controls within run */
+} Run;
+
+/* in a Run, logicalStart will get this bit set if the run level is odd */
+#define INDEX_ODD_BIT (1UL<<31)
+
+#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)((level)&1)<<31))
+#define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((int32_t)((level)&1)<<31))
+#define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT)
+
+#define GET_INDEX(x) ((x)&~INDEX_ODD_BIT)
+#define GET_ODD_BIT(x) ((uint32_t)(x)>>31)
+#define IS_ODD_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)!=0))
+#define IS_EVEN_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)==0))
+
+U_CFUNC UBool
+ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode);
+
+/** BiDi control code points */
+enum {
+ ZWNJ_CHAR=0x200c,
+ ZWJ_CHAR,
+ LRM_CHAR,
+ RLM_CHAR,
+ LRE_CHAR=0x202a,
+ RLE_CHAR,
+ PDF_CHAR,
+ LRO_CHAR,
+ RLO_CHAR,
+ LRI_CHAR=0x2066,
+ RLI_CHAR,
+ FSI_CHAR,
+ PDI_CHAR
+};
+
+#define IS_BIDI_CONTROL_CHAR(c) (((uint32_t)(c)&0xfffffffc)==ZWNJ_CHAR || (uint32_t)((c)-LRE_CHAR)<5 || (uint32_t)((c)-LRI_CHAR)<4)
+
+/* InsertPoints structure for noting where to put BiDi marks ---------------- */
+
+typedef struct Point {
+ int32_t pos; /* position in text */
+ int32_t flag; /* flag for LRM/RLM, before/after */
+} Point;
+
+typedef struct InsertPoints {
+ int32_t capacity; /* number of points allocated */
+ int32_t size; /* number of points used */
+ int32_t confirmed; /* number of points confirmed */
+ UErrorCode errorCode; /* for eventual memory shortage */
+ Point *points; /* pointer to array of points */
+} InsertPoints;
+
+
+/* UBiDi structure ----------------------------------------------------------- */
+
+struct UBiDi {
+ /* pointer to parent paragraph object (pointer to self if this object is
+ * a paragraph object); set to NULL in a newly opened object; set to a
+ * real value after a successful execution of ubidi_setPara or ubidi_setLine
+ */
+ const UBiDi * pParaBiDi;
+
+ /* alias pointer to the current text */
+ const UChar *text;
+
+ /* length of the current text */
+ int32_t originalLength;
+
+ /* if the UBIDI_OPTION_STREAMING option is set, this is the length
+ * of text actually processed by ubidi_setPara, which may be shorter than
+ * the original length.
+ * Otherwise, it is identical to the original length.
+ */
+ int32_t length;
+
+ /* if the UBIDI_OPTION_REMOVE_CONTROLS option is set, and/or
+ * marks are allowed to be inserted in one of the reordering mode, the
+ * length of the result string may be different from the processed length.
+ */
+ int32_t resultLength;
+
+ /* memory sizes in bytes */
+ int32_t dirPropsSize, levelsSize, openingsSize, parasSize, runsSize, isolatesSize;
+
+ /* allocated memory */
+ DirProp *dirPropsMemory;
+ UBiDiLevel *levelsMemory;
+ Opening *openingsMemory;
+ Para *parasMemory;
+ Run *runsMemory;
+ Isolate *isolatesMemory;
+
+ /* indicators for whether memory may be allocated after ubidi_open() */
+ UBool mayAllocateText, mayAllocateRuns;
+
+ /* arrays with one value per text-character */
+ DirProp *dirProps;
+ UBiDiLevel *levels;
+
+ /* are we performing an approximation of the "inverse BiDi" algorithm? */
+ UBool isInverse;
+
+ /* are we using the basic algorithm or its variation? */
+ UBiDiReorderingMode reorderingMode;
+
+ /* UBIDI_REORDER_xxx values must be ordered so that all the regular
+ * logical to visual modes come first, and all inverse BiDi modes
+ * come last.
+ */
+ #define UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL UBIDI_REORDER_NUMBERS_SPECIAL
+
+ /* bitmask for reordering options */
+ uint32_t reorderingOptions;
+
+ /* must block separators receive level 0? */
+ UBool orderParagraphsLTR;
+
+ /* the paragraph level */
+ UBiDiLevel paraLevel;
+ /* original paraLevel when contextual */
+ /* must be one of UBIDI_DEFAULT_xxx or 0 if not contextual */
+ UBiDiLevel defaultParaLevel;
+
+ /* context data */
+ const UChar *prologue;
+ int32_t proLength;
+ const UChar *epilogue;
+ int32_t epiLength;
+
+ /* the following is set in ubidi_setPara, used in processPropertySeq */
+ const struct ImpTabPair * pImpTabPair; /* pointer to levels state table pair */
+
+ /* the overall paragraph or line directionality - see UBiDiDirection */
+ UBiDiDirection direction;
+
+ /* flags is a bit set for which directional properties are in the text */
+ Flags flags;
+
+ /* lastArabicPos is index to the last AL in the text, -1 if none */
+ int32_t lastArabicPos;
+
+ /* characters after trailingWSStart are WS and are */
+ /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */
+ int32_t trailingWSStart;
+
+ /* fields for paragraph handling */
+ int32_t paraCount; /* set in getDirProps() */
+ /* filled in getDirProps() */
+ Para *paras;
+
+ /* for relatively short text, we only need a tiny array of paras (no malloc()) */
+ Para simpleParas[SIMPLE_PARAS_COUNT];
+
+ /* fields for line reordering */
+ int32_t runCount; /* ==-1: runs not set up yet */
+ Run *runs;
+
+ /* for non-mixed text, we only need a tiny array of runs (no malloc()) */
+ Run simpleRuns[1];
+
+ /* maximum or current nesting depth of isolate sequences */
+ /* Within resolveExplicitLevels() and checkExplicitLevels(), this is the maximal
+ nesting encountered.
+ Within resolveImplicitLevels(), this is the index of the current isolates
+ stack entry. */
+ int32_t isolateCount;
+ Isolate *isolates;
+
+ /* for simple text, have a small stack (no malloc()) */
+ Isolate simpleIsolates[SIMPLE_ISOLATES_COUNT];
+
+ /* for inverse Bidi with insertion of directional marks */
+ InsertPoints insertPoints;
+
+ /* for option UBIDI_OPTION_REMOVE_CONTROLS */
+ int32_t controlCount;
+
+ /* for Bidi class callback */
+ UBiDiClassCallback *fnClassCallback; /* action pointer */
+ const void *coClassCallback; /* context pointer */
+};
+
+#define IS_VALID_PARA(x) ((x) && ((x)->pParaBiDi==(x)))
+#define IS_VALID_PARA_OR_LINE(x) ((x) && ((x)->pParaBiDi==(x) || (((x)->pParaBiDi) && (x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi)))
+
+typedef union {
+ DirProp *dirPropsMemory;
+ UBiDiLevel *levelsMemory;
+ Opening *openingsMemory;
+ Para *parasMemory;
+ Run *runsMemory;
+ Isolate *isolatesMemory;
+} BidiMemoryForAllocation;
+
+/* Macros for initial checks at function entry */
+#define RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrcode, retvalue) UPRV_BLOCK_MACRO_BEGIN { \
+ if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return retvalue; \
+} UPRV_BLOCK_MACRO_END
+#define RETURN_IF_NOT_VALID_PARA(bidi, errcode, retvalue) UPRV_BLOCK_MACRO_BEGIN { \
+ if(!IS_VALID_PARA(bidi)) { \
+ errcode=U_INVALID_STATE_ERROR; \
+ return retvalue; \
+ } \
+} UPRV_BLOCK_MACRO_END
+#define RETURN_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode, retvalue) UPRV_BLOCK_MACRO_BEGIN { \
+ if(!IS_VALID_PARA_OR_LINE(bidi)) { \
+ errcode=U_INVALID_STATE_ERROR; \
+ return retvalue; \
+ } \
+} UPRV_BLOCK_MACRO_END
+#define RETURN_IF_BAD_RANGE(arg, start, limit, errcode, retvalue) UPRV_BLOCK_MACRO_BEGIN { \
+ if((arg)<(start) || (arg)>=(limit)) { \
+ (errcode)=U_ILLEGAL_ARGUMENT_ERROR; \
+ return retvalue; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+#define RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrcode) UPRV_BLOCK_MACRO_BEGIN { \
+ if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return; \
+} UPRV_BLOCK_MACRO_END
+#define RETURN_VOID_IF_NOT_VALID_PARA(bidi, errcode) UPRV_BLOCK_MACRO_BEGIN { \
+ if(!IS_VALID_PARA(bidi)) { \
+ errcode=U_INVALID_STATE_ERROR; \
+ return; \
+ } \
+} UPRV_BLOCK_MACRO_END
+#define RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode) UPRV_BLOCK_MACRO_BEGIN { \
+ if(!IS_VALID_PARA_OR_LINE(bidi)) { \
+ errcode=U_INVALID_STATE_ERROR; \
+ return; \
+ } \
+} UPRV_BLOCK_MACRO_END
+#define RETURN_VOID_IF_BAD_RANGE(arg, start, limit, errcode) UPRV_BLOCK_MACRO_BEGIN { \
+ if((arg)<(start) || (arg)>=(limit)) { \
+ (errcode)=U_ILLEGAL_ARGUMENT_ERROR; \
+ return; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/* helper function to (re)allocate memory if allowed */
+U_CFUNC UBool
+ubidi_getMemory(BidiMemoryForAllocation *pMemory, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded);
+
+/* helper macros for each allocated array in UBiDi */
+#define getDirPropsMemory(pBiDi, length) \
+ ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
+ (pBiDi)->mayAllocateText, (length))
+
+#define getLevelsMemory(pBiDi, length) \
+ ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
+ (pBiDi)->mayAllocateText, (length))
+
+#define getRunsMemory(pBiDi, length) \
+ ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
+ (pBiDi)->mayAllocateRuns, (length)*sizeof(Run))
+
+/* additional macros used by ubidi_open() - always allow allocation */
+#define getInitialDirPropsMemory(pBiDi, length) \
+ ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
+ true, (length))
+
+#define getInitialLevelsMemory(pBiDi, length) \
+ ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
+ true, (length))
+
+#define getInitialOpeningsMemory(pBiDi, length) \
+ ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->openingsMemory, &(pBiDi)->openingsSize, \
+ true, (length)*sizeof(Opening))
+
+#define getInitialParasMemory(pBiDi, length) \
+ ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->parasMemory, &(pBiDi)->parasSize, \
+ true, (length)*sizeof(Para))
+
+#define getInitialRunsMemory(pBiDi, length) \
+ ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
+ true, (length)*sizeof(Run))
+
+#define getInitialIsolatesMemory(pBiDi, length) \
+ ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->isolatesMemory, &(pBiDi)->isolatesSize, \
+ true, (length)*sizeof(Isolate))
+
+#endif
diff --git a/thirdparty/icu4c/common/ubidiln.cpp b/thirdparty/icu4c/common/ubidiln.cpp
new file mode 100644
index 0000000000..3545f4e111
--- /dev/null
+++ b/thirdparty/icu4c/common/ubidiln.cpp
@@ -0,0 +1,1347 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: ubidiln.c
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999aug06
+* created by: Markus W. Scherer, updated by Matitiahu Allouche
+*/
+
+#include "cmemory.h"
+#include "unicode/utypes.h"
+#include "unicode/ustring.h"
+#include "unicode/uchar.h"
+#include "unicode/ubidi.h"
+#include "ubidiimp.h"
+#include "uassert.h"
+
+/*
+ * General remarks about the functions in this file:
+ *
+ * These functions deal with the aspects of potentially mixed-directional
+ * text in a single paragraph or in a line of a single paragraph
+ * which has already been processed according to
+ * the Unicode 6.3 BiDi algorithm as defined in
+ * http://www.unicode.org/unicode/reports/tr9/ , version 28,
+ * also described in The Unicode Standard, Version 6.3.0 .
+ *
+ * This means that there is a UBiDi object with a levels
+ * and a dirProps array.
+ * paraLevel and direction are also set.
+ * Only if the length of the text is zero, then levels==dirProps==NULL.
+ *
+ * The overall directionality of the paragraph
+ * or line is used to bypass the reordering steps if possible.
+ * Even purely RTL text does not need reordering there because
+ * the ubidi_getLogical/VisualIndex() functions can compute the
+ * index on the fly in such a case.
+ *
+ * The implementation of the access to same-level-runs and of the reordering
+ * do attempt to provide better performance and less memory usage compared to
+ * a direct implementation of especially rule (L2) with an array of
+ * one (32-bit) integer per text character.
+ *
+ * Here, the levels array is scanned as soon as necessary, and a vector of
+ * same-level-runs is created. Reordering then is done on this vector.
+ * For each run of text positions that were resolved to the same level,
+ * only 8 bytes are stored: the first text position of the run and the visual
+ * position behind the run after reordering.
+ * One sign bit is used to hold the directionality of the run.
+ * This is inefficient if there are many very short runs. If the average run
+ * length is <2, then this uses more memory.
+ *
+ * In a further attempt to save memory, the levels array is never changed
+ * after all the resolution rules (Xn, Wn, Nn, In).
+ * Many functions have to consider the field trailingWSStart:
+ * if it is less than length, then there is an implicit trailing run
+ * at the paraLevel,
+ * which is not reflected in the levels array.
+ * This allows a line UBiDi object to use the same levels array as
+ * its paragraph parent object.
+ *
+ * When a UBiDi object is created for a line of a paragraph, then the
+ * paragraph's levels and dirProps arrays are reused by way of setting
+ * a pointer into them, not by copying. This again saves memory and forbids to
+ * change the now shared levels for (L1).
+ */
+
+/* handle trailing WS (L1) -------------------------------------------------- */
+
+/*
+ * setTrailingWSStart() sets the start index for a trailing
+ * run of WS in the line. This is necessary because we do not modify
+ * the paragraph's levels array that we just point into.
+ * Using trailingWSStart is another form of performing (L1).
+ *
+ * To make subsequent operations easier, we also include the run
+ * before the WS if it is at the paraLevel - we merge the two here.
+ *
+ * This function is called only from ubidi_setLine(), so pBiDi->paraLevel is
+ * set correctly for the line even when contextual multiple paragraphs.
+ */
+static void
+setTrailingWSStart(UBiDi *pBiDi) {
+ /* pBiDi->direction!=UBIDI_MIXED */
+
+ const DirProp *dirProps=pBiDi->dirProps;
+ UBiDiLevel *levels=pBiDi->levels;
+ int32_t start=pBiDi->length;
+ UBiDiLevel paraLevel=pBiDi->paraLevel;
+
+ /* If the line is terminated by a block separator, all preceding WS etc...
+ are already set to paragraph level.
+ Setting trailingWSStart to pBidi->length will avoid changing the
+ level of B chars from 0 to paraLevel in ubidi_getLevels when
+ orderParagraphsLTR==TRUE.
+ */
+ if(dirProps[start-1]==B) {
+ pBiDi->trailingWSStart=start; /* currently == pBiDi->length */
+ return;
+ }
+ /* go backwards across all WS, BN, explicit codes */
+ while(start>0 && DIRPROP_FLAG(dirProps[start-1])&MASK_WS) {
+ --start;
+ }
+
+ /* if the WS run can be merged with the previous run then do so here */
+ while(start>0 && levels[start-1]==paraLevel) {
+ --start;
+ }
+
+ pBiDi->trailingWSStart=start;
+}
+
+/* ubidi_setLine ------------------------------------------------------------ */
+
+U_CAPI void U_EXPORT2
+ubidi_setLine(const UBiDi *pParaBiDi,
+ int32_t start, int32_t limit,
+ UBiDi *pLineBiDi,
+ UErrorCode *pErrorCode) {
+ int32_t length;
+
+ /* check the argument values */
+ RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
+ RETURN_VOID_IF_NOT_VALID_PARA(pParaBiDi, *pErrorCode);
+ RETURN_VOID_IF_BAD_RANGE(start, 0, limit, *pErrorCode);
+ RETURN_VOID_IF_BAD_RANGE(limit, 0, pParaBiDi->length+1, *pErrorCode);
+ if(pLineBiDi==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ if(ubidi_getParagraph(pParaBiDi, start, NULL, NULL, NULL, pErrorCode) !=
+ ubidi_getParagraph(pParaBiDi, limit-1, NULL, NULL, NULL, pErrorCode)) {
+ /* the line crosses a paragraph boundary */
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ /* set the values in pLineBiDi from its pParaBiDi parent */
+ pLineBiDi->pParaBiDi=NULL; /* mark unfinished setLine */
+ pLineBiDi->text=pParaBiDi->text+start;
+ length=pLineBiDi->length=limit-start;
+ pLineBiDi->resultLength=pLineBiDi->originalLength=length;
+ pLineBiDi->paraLevel=GET_PARALEVEL(pParaBiDi, start);
+ pLineBiDi->paraCount=pParaBiDi->paraCount;
+ pLineBiDi->runs=NULL;
+ pLineBiDi->flags=0;
+ pLineBiDi->reorderingMode=pParaBiDi->reorderingMode;
+ pLineBiDi->reorderingOptions=pParaBiDi->reorderingOptions;
+ pLineBiDi->controlCount=0;
+ if(pParaBiDi->controlCount>0) {
+ int32_t j;
+ for(j=start; j<limit; j++) {
+ if(IS_BIDI_CONTROL_CHAR(pParaBiDi->text[j])) {
+ pLineBiDi->controlCount++;
+ }
+ }
+ pLineBiDi->resultLength-=pLineBiDi->controlCount;
+ }
+
+ pLineBiDi->dirProps=pParaBiDi->dirProps+start;
+ pLineBiDi->levels=pParaBiDi->levels+start;
+ pLineBiDi->runCount=-1;
+
+ if(pParaBiDi->direction!=UBIDI_MIXED) {
+ /* the parent is already trivial */
+ pLineBiDi->direction=pParaBiDi->direction;
+
+ /*
+ * The parent's levels are all either
+ * implicitly or explicitly ==paraLevel;
+ * do the same here.
+ */
+ if(pParaBiDi->trailingWSStart<=start) {
+ pLineBiDi->trailingWSStart=0;
+ } else if(pParaBiDi->trailingWSStart<limit) {
+ pLineBiDi->trailingWSStart=pParaBiDi->trailingWSStart-start;
+ } else {
+ pLineBiDi->trailingWSStart=length;
+ }
+ } else {
+ const UBiDiLevel *levels=pLineBiDi->levels;
+ int32_t i, trailingWSStart;
+ UBiDiLevel level;
+
+ setTrailingWSStart(pLineBiDi);
+ trailingWSStart=pLineBiDi->trailingWSStart;
+
+ /* recalculate pLineBiDi->direction */
+ if(trailingWSStart==0) {
+ /* all levels are at paraLevel */
+ pLineBiDi->direction=(UBiDiDirection)(pLineBiDi->paraLevel&1);
+ } else {
+ /* get the level of the first character */
+ level=(UBiDiLevel)(levels[0]&1);
+
+ /* if there is anything of a different level, then the line is mixed */
+ if(trailingWSStart<length && (pLineBiDi->paraLevel&1)!=level) {
+ /* the trailing WS is at paraLevel, which differs from levels[0] */
+ pLineBiDi->direction=UBIDI_MIXED;
+ } else {
+ /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */
+ i=1;
+ for(;;) {
+ if(i==trailingWSStart) {
+ /* the direction values match those in level */
+ pLineBiDi->direction=(UBiDiDirection)level;
+ break;
+ } else if((levels[i]&1)!=level) {
+ pLineBiDi->direction=UBIDI_MIXED;
+ break;
+ }
+ ++i;
+ }
+ }
+ }
+
+ switch(pLineBiDi->direction) {
+ case UBIDI_LTR:
+ /* make sure paraLevel is even */
+ pLineBiDi->paraLevel=(UBiDiLevel)((pLineBiDi->paraLevel+1)&~1);
+
+ /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
+ pLineBiDi->trailingWSStart=0;
+ break;
+ case UBIDI_RTL:
+ /* make sure paraLevel is odd */
+ pLineBiDi->paraLevel|=1;
+
+ /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
+ pLineBiDi->trailingWSStart=0;
+ break;
+ default:
+ break;
+ }
+ }
+ pLineBiDi->pParaBiDi=pParaBiDi; /* mark successful setLine */
+ return;
+}
+
+U_CAPI UBiDiLevel U_EXPORT2
+ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex) {
+ /* return paraLevel if in the trailing WS run, otherwise the real level */
+ if(!IS_VALID_PARA_OR_LINE(pBiDi) || charIndex<0 || pBiDi->length<=charIndex) {
+ return 0;
+ } else if(pBiDi->direction!=UBIDI_MIXED || charIndex>=pBiDi->trailingWSStart) {
+ return GET_PARALEVEL(pBiDi, charIndex);
+ } else {
+ return pBiDi->levels[charIndex];
+ }
+}
+
+U_CAPI const UBiDiLevel * U_EXPORT2
+ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
+ int32_t start, length;
+
+ RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, NULL);
+ RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, NULL);
+ if((length=pBiDi->length)<=0) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+ if((start=pBiDi->trailingWSStart)==length) {
+ /* the current levels array reflects the WS run */
+ return pBiDi->levels;
+ }
+
+ /*
+ * After the previous if(), we know that the levels array
+ * has an implicit trailing WS run and therefore does not fully
+ * reflect itself all the levels.
+ * This must be a UBiDi object for a line, and
+ * we need to create a new levels array.
+ */
+ if(getLevelsMemory(pBiDi, length)) {
+ UBiDiLevel *levels=pBiDi->levelsMemory;
+
+ if(start>0 && levels!=pBiDi->levels) {
+ uprv_memcpy(levels, pBiDi->levels, start);
+ }
+ /* pBiDi->paraLevel is ok even if contextual multiple paragraphs,
+ since pBidi is a line object */
+ uprv_memset(levels+start, pBiDi->paraLevel, length-start);
+
+ /* this new levels array is set for the line and reflects the WS run */
+ pBiDi->trailingWSStart=length;
+ return pBiDi->levels=levels;
+ } else {
+ /* out of memory */
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+}
+
+U_CAPI void U_EXPORT2
+ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition,
+ int32_t *pLogicalLimit, UBiDiLevel *pLevel) {
+ UErrorCode errorCode;
+ int32_t runCount, visualStart, logicalLimit, logicalFirst, i;
+ Run iRun;
+
+ errorCode=U_ZERO_ERROR;
+ RETURN_VOID_IF_BAD_RANGE(logicalPosition, 0, pBiDi->length, errorCode);
+ /* ubidi_countRuns will check VALID_PARA_OR_LINE */
+ runCount=ubidi_countRuns((UBiDi *)pBiDi, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ /* this is done based on runs rather than on levels since levels have
+ a special interpretation when UBIDI_REORDER_RUNS_ONLY
+ */
+ visualStart=logicalLimit=0;
+ iRun=pBiDi->runs[0];
+
+ for(i=0; i<runCount; i++) {
+ iRun = pBiDi->runs[i];
+ logicalFirst=GET_INDEX(iRun.logicalStart);
+ logicalLimit=logicalFirst+iRun.visualLimit-visualStart;
+ if((logicalPosition>=logicalFirst) &&
+ (logicalPosition<logicalLimit)) {
+ break;
+ }
+ visualStart = iRun.visualLimit;
+ }
+ if(pLogicalLimit) {
+ *pLogicalLimit=logicalLimit;
+ }
+ if(pLevel) {
+ if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
+ *pLevel=(UBiDiLevel)GET_ODD_BIT(iRun.logicalStart);
+ }
+ else if(pBiDi->direction!=UBIDI_MIXED || logicalPosition>=pBiDi->trailingWSStart) {
+ *pLevel=GET_PARALEVEL(pBiDi, logicalPosition);
+ } else {
+ *pLevel=pBiDi->levels[logicalPosition];
+ }
+ }
+}
+
+/* runs API functions ------------------------------------------------------- */
+
+U_CAPI int32_t U_EXPORT2
+ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) {
+ RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
+ RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
+ ubidi_getRuns(pBiDi, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ return -1;
+ }
+ return pBiDi->runCount;
+}
+
+U_CAPI UBiDiDirection U_EXPORT2
+ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex,
+ int32_t *pLogicalStart, int32_t *pLength)
+{
+ int32_t start;
+ UErrorCode errorCode = U_ZERO_ERROR;
+ RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, errorCode, UBIDI_LTR);
+ ubidi_getRuns(pBiDi, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ return UBIDI_LTR;
+ }
+ RETURN_IF_BAD_RANGE(runIndex, 0, pBiDi->runCount, errorCode, UBIDI_LTR);
+
+ start=pBiDi->runs[runIndex].logicalStart;
+ if(pLogicalStart!=NULL) {
+ *pLogicalStart=GET_INDEX(start);
+ }
+ if(pLength!=NULL) {
+ if(runIndex>0) {
+ *pLength=pBiDi->runs[runIndex].visualLimit-
+ pBiDi->runs[runIndex-1].visualLimit;
+ } else {
+ *pLength=pBiDi->runs[0].visualLimit;
+ }
+ }
+ return (UBiDiDirection)GET_ODD_BIT(start);
+}
+
+/* in trivial cases there is only one trivial run; called by ubidi_getRuns() */
+static void
+getSingleRun(UBiDi *pBiDi, UBiDiLevel level) {
+ /* simple, single-run case */
+ pBiDi->runs=pBiDi->simpleRuns;
+ pBiDi->runCount=1;
+
+ /* fill and reorder the single run */
+ pBiDi->runs[0].logicalStart=MAKE_INDEX_ODD_PAIR(0, level);
+ pBiDi->runs[0].visualLimit=pBiDi->length;
+ pBiDi->runs[0].insertRemove=0;
+}
+
+/* reorder the runs array (L2) ---------------------------------------------- */
+
+/*
+ * Reorder the same-level runs in the runs array.
+ * Here, runCount>1 and maxLevel>=minLevel>=paraLevel.
+ * All the visualStart fields=logical start before reordering.
+ * The "odd" bits are not set yet.
+ *
+ * Reordering with this data structure lends itself to some handy shortcuts:
+ *
+ * Since each run is moved but not modified, and since at the initial maxLevel
+ * each sequence of same-level runs consists of only one run each, we
+ * don't need to do anything there and can predecrement maxLevel.
+ * In many simple cases, the reordering is thus done entirely in the
+ * index mapping.
+ * Also, reordering occurs only down to the lowest odd level that occurs,
+ * which is minLevel|1. However, if the lowest level itself is odd, then
+ * in the last reordering the sequence of the runs at this level or higher
+ * will be all runs, and we don't need the elaborate loop to search for them.
+ * This is covered by ++minLevel instead of minLevel|=1 followed
+ * by an extra reorder-all after the reorder-some loop.
+ * About a trailing WS run:
+ * Such a run would need special treatment because its level is not
+ * reflected in levels[] if this is not a paragraph object.
+ * Instead, all characters from trailingWSStart on are implicitly at
+ * paraLevel.
+ * However, for all maxLevel>paraLevel, this run will never be reordered
+ * and does not need to be taken into account. maxLevel==paraLevel is only reordered
+ * if minLevel==paraLevel is odd, which is done in the extra segment.
+ * This means that for the main reordering loop we don't need to consider
+ * this run and can --runCount. If it is later part of the all-runs
+ * reordering, then runCount is adjusted accordingly.
+ */
+static void
+reorderLine(UBiDi *pBiDi, UBiDiLevel minLevel, UBiDiLevel maxLevel) {
+ Run *runs, tempRun;
+ UBiDiLevel *levels;
+ int32_t firstRun, endRun, limitRun, runCount;
+
+ /* nothing to do? */
+ if(maxLevel<=(minLevel|1)) {
+ return;
+ }
+
+ /*
+ * Reorder only down to the lowest odd level
+ * and reorder at an odd minLevel in a separate, simpler loop.
+ * See comments above for why minLevel is always incremented.
+ */
+ ++minLevel;
+
+ runs=pBiDi->runs;
+ levels=pBiDi->levels;
+ runCount=pBiDi->runCount;
+
+ /* do not include the WS run at paraLevel<=old minLevel except in the simple loop */
+ if(pBiDi->trailingWSStart<pBiDi->length) {
+ --runCount;
+ }
+
+ while(--maxLevel>=minLevel) {
+ firstRun=0;
+
+ /* loop for all sequences of runs */
+ for(;;) {
+ /* look for a sequence of runs that are all at >=maxLevel */
+ /* look for the first run of such a sequence */
+ while(firstRun<runCount && levels[runs[firstRun].logicalStart]<maxLevel) {
+ ++firstRun;
+ }
+ if(firstRun>=runCount) {
+ break; /* no more such runs */
+ }
+
+ /* look for the limit run of such a sequence (the run behind it) */
+ for(limitRun=firstRun; ++limitRun<runCount && levels[runs[limitRun].logicalStart]>=maxLevel;) {}
+
+ /* Swap the entire sequence of runs from firstRun to limitRun-1. */
+ endRun=limitRun-1;
+ while(firstRun<endRun) {
+ tempRun = runs[firstRun];
+ runs[firstRun]=runs[endRun];
+ runs[endRun]=tempRun;
+ ++firstRun;
+ --endRun;
+ }
+
+ if(limitRun==runCount) {
+ break; /* no more such runs */
+ } else {
+ firstRun=limitRun+1;
+ }
+ }
+ }
+
+ /* now do maxLevel==old minLevel (==odd!), see above */
+ if(!(minLevel&1)) {
+ firstRun=0;
+
+ /* include the trailing WS run in this complete reordering */
+ if(pBiDi->trailingWSStart==pBiDi->length) {
+ --runCount;
+ }
+
+ /* Swap the entire sequence of all runs. (endRun==runCount) */
+ while(firstRun<runCount) {
+ tempRun=runs[firstRun];
+ runs[firstRun]=runs[runCount];
+ runs[runCount]=tempRun;
+ ++firstRun;
+ --runCount;
+ }
+ }
+}
+
+/* compute the runs array --------------------------------------------------- */
+
+static int32_t getRunFromLogicalIndex(UBiDi *pBiDi, int32_t logicalIndex) {
+ Run *runs=pBiDi->runs;
+ int32_t runCount=pBiDi->runCount, visualStart=0, i, length, logicalStart;
+
+ for(i=0; i<runCount; i++) {
+ length=runs[i].visualLimit-visualStart;
+ logicalStart=GET_INDEX(runs[i].logicalStart);
+ if((logicalIndex>=logicalStart) && (logicalIndex<(logicalStart+length))) {
+ return i;
+ }
+ visualStart+=length;
+ }
+ /* we should never get here */
+ UPRV_UNREACHABLE;
+}
+
+/*
+ * Compute the runs array from the levels array.
+ * After ubidi_getRuns() returns TRUE, runCount is guaranteed to be >0
+ * and the runs are reordered.
+ * Odd-level runs have visualStart on their visual right edge and
+ * they progress visually to the left.
+ * If option UBIDI_OPTION_INSERT_MARKS is set, insertRemove will contain the
+ * sum of appropriate LRM/RLM_BEFORE/AFTER flags.
+ * If option UBIDI_OPTION_REMOVE_CONTROLS is set, insertRemove will contain the
+ * negative number of BiDi control characters within this run.
+ */
+U_CFUNC UBool
+ubidi_getRuns(UBiDi *pBiDi, UErrorCode*) {
+ /*
+ * This method returns immediately if the runs are already set. This
+ * includes the case of length==0 (handled in setPara)..
+ */
+ if (pBiDi->runCount>=0) {
+ return TRUE;
+ }
+
+ if(pBiDi->direction!=UBIDI_MIXED) {
+ /* simple, single-run case - this covers length==0 */
+ /* pBiDi->paraLevel is ok even for contextual multiple paragraphs */
+ getSingleRun(pBiDi, pBiDi->paraLevel);
+ } else /* UBIDI_MIXED, length>0 */ {
+ /* mixed directionality */
+ int32_t length=pBiDi->length, limit;
+ UBiDiLevel *levels=pBiDi->levels;
+ int32_t i, runCount;
+ UBiDiLevel level=UBIDI_DEFAULT_LTR; /* initialize with no valid level */
+ /*
+ * If there are WS characters at the end of the line
+ * and the run preceding them has a level different from
+ * paraLevel, then they will form their own run at paraLevel (L1).
+ * Count them separately.
+ * We need some special treatment for this in order to not
+ * modify the levels array which a line UBiDi object shares
+ * with its paragraph parent and its other line siblings.
+ * In other words, for the trailing WS, it may be
+ * levels[]!=paraLevel but we have to treat it like it were so.
+ */
+ limit=pBiDi->trailingWSStart;
+ /* count the runs, there is at least one non-WS run, and limit>0 */
+ runCount=0;
+ for(i=0; i<limit; ++i) {
+ /* increment runCount at the start of each run */
+ if(levels[i]!=level) {
+ ++runCount;
+ level=levels[i];
+ }
+ }
+
+ /*
+ * We don't need to see if the last run can be merged with a trailing
+ * WS run because setTrailingWSStart() would have done that.
+ */
+ if(runCount==1 && limit==length) {
+ /* There is only one non-WS run and no trailing WS-run. */
+ getSingleRun(pBiDi, levels[0]);
+ } else /* runCount>1 || limit<length */ {
+ /* allocate and set the runs */
+ Run *runs;
+ int32_t runIndex, start;
+ UBiDiLevel minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1, maxLevel=0;
+
+ /* now, count a (non-mergeable) WS run */
+ if(limit<length) {
+ ++runCount;
+ }
+
+ /* runCount>1 */
+ if(getRunsMemory(pBiDi, runCount)) {
+ runs=pBiDi->runsMemory;
+ } else {
+ return FALSE;
+ }
+
+ /* set the runs */
+ /* FOOD FOR THOUGHT: this could be optimized, e.g.:
+ * 464->444, 484->444, 575->555, 595->555
+ * However, that would take longer. Check also how it would
+ * interact with BiDi control removal and inserting Marks.
+ */
+ runIndex=0;
+
+ /* search for the run limits and initialize visualLimit values with the run lengths */
+ i=0;
+ do {
+ /* prepare this run */
+ start=i;
+ level=levels[i];
+ if(level<minLevel) {
+ minLevel=level;
+ }
+ if(level>maxLevel) {
+ maxLevel=level;
+ }
+
+ /* look for the run limit */
+ while(++i<limit && levels[i]==level) {}
+
+ /* i is another run limit */
+ runs[runIndex].logicalStart=start;
+ runs[runIndex].visualLimit=i-start;
+ runs[runIndex].insertRemove=0;
+ ++runIndex;
+ } while(i<limit);
+
+ if(limit<length) {
+ /* there is a separate WS run */
+ runs[runIndex].logicalStart=limit;
+ runs[runIndex].visualLimit=length-limit;
+ /* For the trailing WS run, pBiDi->paraLevel is ok even
+ if contextual multiple paragraphs. */
+ if(pBiDi->paraLevel<minLevel) {
+ minLevel=pBiDi->paraLevel;
+ }
+ }
+
+ /* set the object fields */
+ pBiDi->runs=runs;
+ pBiDi->runCount=runCount;
+
+ reorderLine(pBiDi, minLevel, maxLevel);
+
+ /* now add the direction flags and adjust the visualLimit's to be just that */
+ /* this loop will also handle the trailing WS run */
+ limit=0;
+ for(i=0; i<runCount; ++i) {
+ ADD_ODD_BIT_FROM_LEVEL(runs[i].logicalStart, levels[runs[i].logicalStart]);
+ limit+=runs[i].visualLimit;
+ runs[i].visualLimit=limit;
+ }
+
+ /* Set the "odd" bit for the trailing WS run. */
+ /* For a RTL paragraph, it will be the *first* run in visual order. */
+ /* For the trailing WS run, pBiDi->paraLevel is ok even if
+ contextual multiple paragraphs. */
+ if(runIndex<runCount) {
+ int32_t trailingRun = ((pBiDi->paraLevel & 1) != 0)? 0 : runIndex;
+
+ ADD_ODD_BIT_FROM_LEVEL(runs[trailingRun].logicalStart, pBiDi->paraLevel);
+ }
+ }
+ }
+
+ /* handle insert LRM/RLM BEFORE/AFTER run */
+ if(pBiDi->insertPoints.size>0) {
+ Point *point, *start=pBiDi->insertPoints.points,
+ *limit=start+pBiDi->insertPoints.size;
+ int32_t runIndex;
+ for(point=start; point<limit; point++) {
+ runIndex=getRunFromLogicalIndex(pBiDi, point->pos);
+ pBiDi->runs[runIndex].insertRemove|=point->flag;
+ }
+ }
+
+ /* handle remove BiDi control characters */
+ if(pBiDi->controlCount>0) {
+ int32_t runIndex;
+ const UChar *start=pBiDi->text, *limit=start+pBiDi->length, *pu;
+ for(pu=start; pu<limit; pu++) {
+ if(IS_BIDI_CONTROL_CHAR(*pu)) {
+ runIndex=getRunFromLogicalIndex(pBiDi, (int32_t)(pu-start));
+ pBiDi->runs[runIndex].insertRemove--;
+ }
+ }
+ }
+
+ return TRUE;
+}
+
+static UBool
+prepareReorder(const UBiDiLevel *levels, int32_t length,
+ int32_t *indexMap,
+ UBiDiLevel *pMinLevel, UBiDiLevel *pMaxLevel) {
+ int32_t start;
+ UBiDiLevel level, minLevel, maxLevel;
+
+ if(levels==NULL || length<=0) {
+ return FALSE;
+ }
+
+ /* determine minLevel and maxLevel */
+ minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1;
+ maxLevel=0;
+ for(start=length; start>0;) {
+ level=levels[--start];
+ if(level>UBIDI_MAX_EXPLICIT_LEVEL+1) {
+ return FALSE;
+ }
+ if(level<minLevel) {
+ minLevel=level;
+ }
+ if(level>maxLevel) {
+ maxLevel=level;
+ }
+ }
+ *pMinLevel=minLevel;
+ *pMaxLevel=maxLevel;
+
+ /* initialize the index map */
+ for(start=length; start>0;) {
+ --start;
+ indexMap[start]=start;
+ }
+
+ return TRUE;
+}
+
+/* reorder a line based on a levels array (L2) ------------------------------ */
+
+U_CAPI void U_EXPORT2
+ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) {
+ int32_t start, limit, sumOfSosEos;
+ UBiDiLevel minLevel = 0, maxLevel = 0;
+
+ if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) {
+ return;
+ }
+
+ /* nothing to do? */
+ if(minLevel==maxLevel && (minLevel&1)==0) {
+ return;
+ }
+
+ /* reorder only down to the lowest odd level */
+ minLevel|=1;
+
+ /* loop maxLevel..minLevel */
+ do {
+ start=0;
+
+ /* loop for all sequences of levels to reorder at the current maxLevel */
+ for(;;) {
+ /* look for a sequence of levels that are all at >=maxLevel */
+ /* look for the first index of such a sequence */
+ while(start<length && levels[start]<maxLevel) {
+ ++start;
+ }
+ if(start>=length) {
+ break; /* no more such sequences */
+ }
+
+ /* look for the limit of such a sequence (the index behind it) */
+ for(limit=start; ++limit<length && levels[limit]>=maxLevel;) {}
+
+ /*
+ * sos=start of sequence, eos=end of sequence
+ *
+ * The closed (inclusive) interval from sos to eos includes all the logical
+ * and visual indexes within this sequence. They are logically and
+ * visually contiguous and in the same range.
+ *
+ * For each run, the new visual index=sos+eos-old visual index;
+ * we pre-add sos+eos into sumOfSosEos ->
+ * new visual index=sumOfSosEos-old visual index;
+ */
+ sumOfSosEos=start+limit-1;
+
+ /* reorder each index in the sequence */
+ do {
+ indexMap[start]=sumOfSosEos-indexMap[start];
+ } while(++start<limit);
+
+ /* start==limit */
+ if(limit==length) {
+ break; /* no more such sequences */
+ } else {
+ start=limit+1;
+ }
+ }
+ } while(--maxLevel>=minLevel);
+}
+
+U_CAPI void U_EXPORT2
+ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) {
+ int32_t start, end, limit, temp;
+ UBiDiLevel minLevel = 0, maxLevel = 0;
+
+ if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) {
+ return;
+ }
+
+ /* nothing to do? */
+ if(minLevel==maxLevel && (minLevel&1)==0) {
+ return;
+ }
+
+ /* reorder only down to the lowest odd level */
+ minLevel|=1;
+
+ /* loop maxLevel..minLevel */
+ do {
+ start=0;
+
+ /* loop for all sequences of levels to reorder at the current maxLevel */
+ for(;;) {
+ /* look for a sequence of levels that are all at >=maxLevel */
+ /* look for the first index of such a sequence */
+ while(start<length && levels[start]<maxLevel) {
+ ++start;
+ }
+ if(start>=length) {
+ break; /* no more such runs */
+ }
+
+ /* look for the limit of such a sequence (the index behind it) */
+ for(limit=start; ++limit<length && levels[limit]>=maxLevel;) {}
+
+ /*
+ * Swap the entire interval of indexes from start to limit-1.
+ * We don't need to swap the levels for the purpose of this
+ * algorithm: the sequence of levels that we look at does not
+ * move anyway.
+ */
+ end=limit-1;
+ while(start<end) {
+ temp=indexMap[start];
+ indexMap[start]=indexMap[end];
+ indexMap[end]=temp;
+
+ ++start;
+ --end;
+ }
+
+ if(limit==length) {
+ break; /* no more such sequences */
+ } else {
+ start=limit+1;
+ }
+ }
+ } while(--maxLevel>=minLevel);
+}
+
+/* API functions for logical<->visual mapping ------------------------------- */
+
+U_CAPI int32_t U_EXPORT2
+ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode) {
+ int32_t visualIndex=UBIDI_MAP_NOWHERE;
+ RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
+ RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
+ RETURN_IF_BAD_RANGE(logicalIndex, 0, pBiDi->length, *pErrorCode, -1);
+
+ /* we can do the trivial cases without the runs array */
+ switch(pBiDi->direction) {
+ case UBIDI_LTR:
+ visualIndex=logicalIndex;
+ break;
+ case UBIDI_RTL:
+ visualIndex=pBiDi->length-logicalIndex-1;
+ break;
+ default:
+ if(!ubidi_getRuns(pBiDi, pErrorCode)) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return -1;
+ } else {
+ Run *runs=pBiDi->runs;
+ int32_t i, visualStart=0, offset, length;
+
+ /* linear search for the run, search on the visual runs */
+ for(i=0; i<pBiDi->runCount; ++i) {
+ length=runs[i].visualLimit-visualStart;
+ offset=logicalIndex-GET_INDEX(runs[i].logicalStart);
+ if(offset>=0 && offset<length) {
+ if(IS_EVEN_RUN(runs[i].logicalStart)) {
+ /* LTR */
+ visualIndex=visualStart+offset;
+ } else {
+ /* RTL */
+ visualIndex=visualStart+length-offset-1;
+ }
+ break; /* exit for loop */
+ }
+ visualStart+=length;
+ }
+ if(i>=pBiDi->runCount) {
+ return UBIDI_MAP_NOWHERE;
+ }
+ }
+ }
+
+ if(pBiDi->insertPoints.size>0) {
+ /* add the number of added marks until the calculated visual index */
+ Run *runs=pBiDi->runs;
+ int32_t i, length, insertRemove;
+ int32_t visualStart=0, markFound=0;
+ for(i=0; ; i++, visualStart+=length) {
+ length=runs[i].visualLimit-visualStart;
+ insertRemove=runs[i].insertRemove;
+ if(insertRemove & (LRM_BEFORE|RLM_BEFORE)) {
+ markFound++;
+ }
+ /* is it the run containing the visual index? */
+ if(visualIndex<runs[i].visualLimit) {
+ return visualIndex+markFound;
+ }
+ if(insertRemove & (LRM_AFTER|RLM_AFTER)) {
+ markFound++;
+ }
+ }
+ }
+ else if(pBiDi->controlCount>0) {
+ /* subtract the number of controls until the calculated visual index */
+ Run *runs=pBiDi->runs;
+ int32_t i, j, start, limit, length, insertRemove;
+ int32_t visualStart=0, controlFound=0;
+ UChar uchar=pBiDi->text[logicalIndex];
+ /* is the logical index pointing to a control ? */
+ if(IS_BIDI_CONTROL_CHAR(uchar)) {
+ return UBIDI_MAP_NOWHERE;
+ }
+ /* loop on runs */
+ for(i=0; ; i++, visualStart+=length) {
+ length=runs[i].visualLimit-visualStart;
+ insertRemove=runs[i].insertRemove;
+ /* calculated visual index is beyond this run? */
+ if(visualIndex>=runs[i].visualLimit) {
+ controlFound-=insertRemove;
+ continue;
+ }
+ /* calculated visual index must be within current run */
+ if(insertRemove==0) {
+ return visualIndex-controlFound;
+ }
+ if(IS_EVEN_RUN(runs[i].logicalStart)) {
+ /* LTR: check from run start to logical index */
+ start=runs[i].logicalStart;
+ limit=logicalIndex;
+ } else {
+ /* RTL: check from logical index to run end */
+ start=logicalIndex+1;
+ limit=GET_INDEX(runs[i].logicalStart)+length;
+ }
+ for(j=start; j<limit; j++) {
+ uchar=pBiDi->text[j];
+ if(IS_BIDI_CONTROL_CHAR(uchar)) {
+ controlFound++;
+ }
+ }
+ return visualIndex-controlFound;
+ }
+ }
+
+ return visualIndex;
+}
+
+U_CAPI int32_t U_EXPORT2
+ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode) {
+ Run *runs;
+ int32_t i, runCount, start;
+ RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
+ RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
+ RETURN_IF_BAD_RANGE(visualIndex, 0, pBiDi->resultLength, *pErrorCode, -1);
+ /* we can do the trivial cases without the runs array */
+ if(pBiDi->insertPoints.size==0 && pBiDi->controlCount==0) {
+ if(pBiDi->direction==UBIDI_LTR) {
+ return visualIndex;
+ }
+ else if(pBiDi->direction==UBIDI_RTL) {
+ return pBiDi->length-visualIndex-1;
+ }
+ }
+ if(!ubidi_getRuns(pBiDi, pErrorCode)) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return -1;
+ }
+
+ runs=pBiDi->runs;
+ runCount=pBiDi->runCount;
+ if(pBiDi->insertPoints.size>0) {
+ /* handle inserted LRM/RLM */
+ int32_t markFound=0, insertRemove;
+ int32_t visualStart=0, length;
+ runs=pBiDi->runs;
+ /* subtract number of marks until visual index */
+ for(i=0; ; i++, visualStart+=length) {
+ length=runs[i].visualLimit-visualStart;
+ insertRemove=runs[i].insertRemove;
+ if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
+ if(visualIndex<=(visualStart+markFound)) {
+ return UBIDI_MAP_NOWHERE;
+ }
+ markFound++;
+ }
+ /* is adjusted visual index within this run? */
+ if(visualIndex<(runs[i].visualLimit+markFound)) {
+ visualIndex-=markFound;
+ break;
+ }
+ if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
+ if(visualIndex==(visualStart+length+markFound)) {
+ return UBIDI_MAP_NOWHERE;
+ }
+ markFound++;
+ }
+ }
+ }
+ else if(pBiDi->controlCount>0) {
+ /* handle removed BiDi control characters */
+ int32_t controlFound=0, insertRemove, length;
+ int32_t logicalStart, logicalEnd, visualStart=0, j, k;
+ UChar uchar;
+ UBool evenRun;
+ /* add number of controls until visual index */
+ for(i=0; ; i++, visualStart+=length) {
+ length=runs[i].visualLimit-visualStart;
+ insertRemove=runs[i].insertRemove;
+ /* is adjusted visual index beyond current run? */
+ if(visualIndex>=(runs[i].visualLimit-controlFound+insertRemove)) {
+ controlFound-=insertRemove;
+ continue;
+ }
+ /* adjusted visual index is within current run */
+ if(insertRemove==0) {
+ visualIndex+=controlFound;
+ break;
+ }
+ /* count non-control chars until visualIndex */
+ logicalStart=runs[i].logicalStart;
+ evenRun=IS_EVEN_RUN(logicalStart);
+ REMOVE_ODD_BIT(logicalStart);
+ logicalEnd=logicalStart+length-1;
+ for(j=0; j<length; j++) {
+ k= evenRun ? logicalStart+j : logicalEnd-j;
+ uchar=pBiDi->text[k];
+ if(IS_BIDI_CONTROL_CHAR(uchar)) {
+ controlFound++;
+ }
+ if((visualIndex+controlFound)==(visualStart+j)) {
+ break;
+ }
+ }
+ visualIndex+=controlFound;
+ break;
+ }
+ }
+ /* handle all cases */
+ if(runCount<=10) {
+ /* linear search for the run */
+ for(i=0; visualIndex>=runs[i].visualLimit; ++i) {}
+ } else {
+ /* binary search for the run */
+ int32_t begin=0, limit=runCount;
+
+ /* the middle if() is guaranteed to find the run, we don't need a loop limit */
+ for(;;) {
+ i=(begin+limit)/2;
+ if(visualIndex>=runs[i].visualLimit) {
+ begin=i+1;
+ } else if(i==0 || visualIndex>=runs[i-1].visualLimit) {
+ break;
+ } else {
+ limit=i;
+ }
+ }
+ }
+
+ start=runs[i].logicalStart;
+ if(IS_EVEN_RUN(start)) {
+ /* LTR */
+ /* the offset in runs[i] is visualIndex-runs[i-1].visualLimit */
+ if(i>0) {
+ visualIndex-=runs[i-1].visualLimit;
+ }
+ return start+visualIndex;
+ } else {
+ /* RTL */
+ return GET_INDEX(start)+runs[i].visualLimit-visualIndex-1;
+ }
+}
+
+U_CAPI void U_EXPORT2
+ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) {
+ RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
+ /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */
+ ubidi_countRuns(pBiDi, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ /* no op */
+ } else if(indexMap==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ } else {
+ /* fill a logical-to-visual index map using the runs[] */
+ int32_t visualStart, visualLimit, i, j, k;
+ int32_t logicalStart, logicalLimit;
+ Run *runs=pBiDi->runs;
+ if (pBiDi->length<=0) {
+ return;
+ }
+ if (pBiDi->length>pBiDi->resultLength) {
+ uprv_memset(indexMap, 0xFF, pBiDi->length*sizeof(int32_t));
+ }
+
+ visualStart=0;
+ for(j=0; j<pBiDi->runCount; ++j) {
+ logicalStart=GET_INDEX(runs[j].logicalStart);
+ visualLimit=runs[j].visualLimit;
+ if(IS_EVEN_RUN(runs[j].logicalStart)) {
+ do { /* LTR */
+ indexMap[logicalStart++]=visualStart++;
+ } while(visualStart<visualLimit);
+ } else {
+ logicalStart+=visualLimit-visualStart; /* logicalLimit */
+ do { /* RTL */
+ indexMap[--logicalStart]=visualStart++;
+ } while(visualStart<visualLimit);
+ }
+ /* visualStart==visualLimit; */
+ }
+
+ if(pBiDi->insertPoints.size>0) {
+ int32_t markFound=0, runCount=pBiDi->runCount;
+ int32_t length, insertRemove;
+ visualStart=0;
+ /* add number of marks found until each index */
+ for(i=0; i<runCount; i++, visualStart+=length) {
+ length=runs[i].visualLimit-visualStart;
+ insertRemove=runs[i].insertRemove;
+ if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
+ markFound++;
+ }
+ if(markFound>0) {
+ logicalStart=GET_INDEX(runs[i].logicalStart);
+ logicalLimit=logicalStart+length;
+ for(j=logicalStart; j<logicalLimit; j++) {
+ indexMap[j]+=markFound;
+ }
+ }
+ if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
+ markFound++;
+ }
+ }
+ }
+ else if(pBiDi->controlCount>0) {
+ int32_t controlFound=0, runCount=pBiDi->runCount;
+ int32_t length, insertRemove;
+ UBool evenRun;
+ UChar uchar;
+ visualStart=0;
+ /* subtract number of controls found until each index */
+ for(i=0; i<runCount; i++, visualStart+=length) {
+ length=runs[i].visualLimit-visualStart;
+ insertRemove=runs[i].insertRemove;
+ /* no control found within previous runs nor within this run */
+ if((controlFound-insertRemove)==0) {
+ continue;
+ }
+ logicalStart=runs[i].logicalStart;
+ evenRun=IS_EVEN_RUN(logicalStart);
+ REMOVE_ODD_BIT(logicalStart);
+ logicalLimit=logicalStart+length;
+ /* if no control within this run */
+ if(insertRemove==0) {
+ for(j=logicalStart; j<logicalLimit; j++) {
+ indexMap[j]-=controlFound;
+ }
+ continue;
+ }
+ for(j=0; j<length; j++) {
+ k= evenRun ? logicalStart+j : logicalLimit-j-1;
+ uchar=pBiDi->text[k];
+ if(IS_BIDI_CONTROL_CHAR(uchar)) {
+ controlFound++;
+ indexMap[k]=UBIDI_MAP_NOWHERE;
+ continue;
+ }
+ indexMap[k]-=controlFound;
+ }
+ }
+ }
+ }
+}
+
+U_CAPI void U_EXPORT2
+ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) {
+ RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
+ if(indexMap==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */
+ ubidi_countRuns(pBiDi, pErrorCode);
+ if(U_SUCCESS(*pErrorCode)) {
+ /* fill a visual-to-logical index map using the runs[] */
+ Run *runs=pBiDi->runs, *runsLimit=runs+pBiDi->runCount;
+ int32_t logicalStart, visualStart, visualLimit, *pi=indexMap;
+
+ if (pBiDi->resultLength<=0) {
+ return;
+ }
+ visualStart=0;
+ for(; runs<runsLimit; ++runs) {
+ logicalStart=runs->logicalStart;
+ visualLimit=runs->visualLimit;
+ if(IS_EVEN_RUN(logicalStart)) {
+ do { /* LTR */
+ *pi++ = logicalStart++;
+ } while(++visualStart<visualLimit);
+ } else {
+ REMOVE_ODD_BIT(logicalStart);
+ logicalStart+=visualLimit-visualStart; /* logicalLimit */
+ do { /* RTL */
+ *pi++ = --logicalStart;
+ } while(++visualStart<visualLimit);
+ }
+ /* visualStart==visualLimit; */
+ }
+
+ if(pBiDi->insertPoints.size>0) {
+ int32_t markFound=0, runCount=pBiDi->runCount;
+ int32_t insertRemove, i, j, k;
+ runs=pBiDi->runs;
+ /* count all inserted marks */
+ for(i=0; i<runCount; i++) {
+ insertRemove=runs[i].insertRemove;
+ if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
+ markFound++;
+ }
+ if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
+ markFound++;
+ }
+ }
+ /* move back indexes by number of preceding marks */
+ k=pBiDi->resultLength;
+ for(i=runCount-1; i>=0 && markFound>0; i--) {
+ insertRemove=runs[i].insertRemove;
+ if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
+ indexMap[--k]= UBIDI_MAP_NOWHERE;
+ markFound--;
+ }
+ visualStart= i>0 ? runs[i-1].visualLimit : 0;
+ for(j=runs[i].visualLimit-1; j>=visualStart && markFound>0; j--) {
+ indexMap[--k]=indexMap[j];
+ }
+ if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
+ indexMap[--k]= UBIDI_MAP_NOWHERE;
+ markFound--;
+ }
+ }
+ }
+ else if(pBiDi->controlCount>0) {
+ int32_t runCount=pBiDi->runCount, logicalEnd;
+ int32_t insertRemove, length, i, j, k, m;
+ UChar uchar;
+ UBool evenRun;
+ runs=pBiDi->runs;
+ visualStart=0;
+ /* move forward indexes by number of preceding controls */
+ k=0;
+ for(i=0; i<runCount; i++, visualStart+=length) {
+ length=runs[i].visualLimit-visualStart;
+ insertRemove=runs[i].insertRemove;
+ /* if no control found yet, nothing to do in this run */
+ if((insertRemove==0)&&(k==visualStart)) {
+ k+=length;
+ continue;
+ }
+ /* if no control in this run */
+ if(insertRemove==0) {
+ visualLimit=runs[i].visualLimit;
+ for(j=visualStart; j<visualLimit; j++) {
+ indexMap[k++]=indexMap[j];
+ }
+ continue;
+ }
+ logicalStart=runs[i].logicalStart;
+ evenRun=IS_EVEN_RUN(logicalStart);
+ REMOVE_ODD_BIT(logicalStart);
+ logicalEnd=logicalStart+length-1;
+ for(j=0; j<length; j++) {
+ m= evenRun ? logicalStart+j : logicalEnd-j;
+ uchar=pBiDi->text[m];
+ if(!IS_BIDI_CONTROL_CHAR(uchar)) {
+ indexMap[k++]=m;
+ }
+ }
+ }
+ }
+ }
+}
+
+U_CAPI void U_EXPORT2
+ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length) {
+ if(srcMap!=NULL && destMap!=NULL && length>0) {
+ const int32_t *pi;
+ int32_t destLength=-1, count=0;
+ /* find highest value and count positive indexes in srcMap */
+ pi=srcMap+length;
+ while(pi>srcMap) {
+ if(*--pi>destLength) {
+ destLength=*pi;
+ }
+ if(*pi>=0) {
+ count++;
+ }
+ }
+ destLength++; /* add 1 for origin 0 */
+ if(count<destLength) {
+ /* we must fill unmatched destMap entries with -1 */
+ uprv_memset(destMap, 0xFF, destLength*sizeof(int32_t));
+ }
+ pi=srcMap+length;
+ while(length>0) {
+ if(*--pi>=0) {
+ destMap[*pi]=--length;
+ } else {
+ --length;
+ }
+ }
+ }
+}
diff --git a/thirdparty/icu4c/common/ubiditransform.cpp b/thirdparty/icu4c/common/ubiditransform.cpp
new file mode 100644
index 0000000000..d56bf1518b
--- /dev/null
+++ b/thirdparty/icu4c/common/ubiditransform.cpp
@@ -0,0 +1,530 @@
+/*
+******************************************************************************
+*
+* © 2016 and later: Unicode, Inc. and others.
+* License & terms of use: http://www.unicode.org/copyright.html
+*
+******************************************************************************
+* file name: ubiditransform.c
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2016jul24
+* created by: Lina Kemmel
+*
+*/
+
+#include "cmemory.h"
+#include "unicode/ubidi.h"
+#include "unicode/ustring.h"
+#include "unicode/ushape.h"
+#include "unicode/utf16.h"
+#include "ustr_imp.h"
+#include "unicode/ubiditransform.h"
+
+/* Some convenience defines */
+#define LTR UBIDI_LTR
+#define RTL UBIDI_RTL
+#define LOGICAL UBIDI_LOGICAL
+#define VISUAL UBIDI_VISUAL
+#define SHAPE_LOGICAL U_SHAPE_TEXT_DIRECTION_LOGICAL
+#define SHAPE_VISUAL U_SHAPE_TEXT_DIRECTION_VISUAL_LTR
+
+#define CHECK_LEN(STR, LEN, ERROR) UPRV_BLOCK_MACRO_BEGIN { \
+ if (LEN == 0) return 0; \
+ if (LEN < -1) { *(ERROR) = U_ILLEGAL_ARGUMENT_ERROR; return 0; } \
+ if (LEN == -1) LEN = u_strlen(STR); \
+} UPRV_BLOCK_MACRO_END
+
+#define MAX_ACTIONS 7
+
+/**
+ * Typedef for a pointer to a function, which performs some operation (such as
+ * reordering, setting "inverse" mode, character mirroring, etc.). Return value
+ * indicates whether the text was changed in the course of this operation or
+ * not.
+ */
+typedef UBool (*UBiDiAction)(UBiDiTransform *, UErrorCode *);
+
+/**
+ * Structure that holds a predefined reordering scheme, including the following
+ * information:
+ * <ul>
+ * <li>an input base direction,</li>
+ * <li>an input order,</li>
+ * <li>an output base direction,</li>
+ * <li>an output order,</li>
+ * <li>a digit shaping direction,</li>
+ * <li>a letter shaping direction,</li>
+ * <li>a base direction that should be applied when the reordering engine is
+ * invoked (which can not always be derived from the caller-defined
+ * options),</li>
+ * <li>an array of pointers to functions that accomplish the bidi layout
+ * transformation.</li>
+ * </ul>
+ */
+typedef struct {
+ UBiDiLevel inLevel; /* input level */
+ UBiDiOrder inOrder; /* input order */
+ UBiDiLevel outLevel; /* output level */
+ UBiDiOrder outOrder; /* output order */
+ uint32_t digitsDir; /* digit shaping direction */
+ uint32_t lettersDir; /* letter shaping direction */
+ UBiDiLevel baseLevel; /* paragraph level to be used with setPara */
+ const UBiDiAction actions[MAX_ACTIONS]; /* array of pointers to functions carrying out the transformation */
+} ReorderingScheme;
+
+struct UBiDiTransform {
+ UBiDi *pBidi; /* pointer to a UBiDi object */
+ const ReorderingScheme *pActiveScheme; /* effective reordering scheme */
+ UChar *src; /* input text */
+ UChar *dest; /* output text */
+ uint32_t srcLength; /* input text length - not really needed as we are zero-terminated and can u_strlen */
+ uint32_t srcSize; /* input text capacity excluding the trailing zero */
+ uint32_t destSize; /* output text capacity */
+ uint32_t *pDestLength; /* number of UChars written to dest */
+ uint32_t reorderingOptions; /* reordering options - currently only suppot DO_MIRRORING */
+ uint32_t digits; /* digit option for ArabicShaping */
+ uint32_t letters; /* letter option for ArabicShaping */
+};
+
+U_CAPI UBiDiTransform* U_EXPORT2
+ubiditransform_open(UErrorCode *pErrorCode)
+{
+ UBiDiTransform *pBiDiTransform = NULL;
+ if (U_SUCCESS(*pErrorCode)) {
+ pBiDiTransform = (UBiDiTransform*) uprv_calloc(1, sizeof(UBiDiTransform));
+ if (pBiDiTransform == NULL) {
+ *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
+ }
+ }
+ return pBiDiTransform;
+}
+
+U_CAPI void U_EXPORT2
+ubiditransform_close(UBiDiTransform *pBiDiTransform)
+{
+ if (pBiDiTransform != NULL) {
+ if (pBiDiTransform->pBidi != NULL) {
+ ubidi_close(pBiDiTransform->pBidi);
+ }
+ if (pBiDiTransform->src != NULL) {
+ uprv_free(pBiDiTransform->src);
+ }
+ uprv_free(pBiDiTransform);
+ }
+}
+
+/**
+ * Performs Bidi resolution of text.
+ *
+ * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
+ * @param pErrorCode Pointer to the error code value.
+ *
+ * @return Whether or not this function modifies the text. Besides the return
+ * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
+ */
+static UBool
+action_resolve(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
+{
+ ubidi_setPara(pTransform->pBidi, pTransform->src, pTransform->srcLength,
+ pTransform->pActiveScheme->baseLevel, NULL, pErrorCode);
+ return FALSE;
+}
+
+/**
+ * Performs basic reordering of text (Logical -> Visual LTR).
+ *
+ * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
+ * @param pErrorCode Pointer to the error code value.
+ *
+ * @return Whether or not this function modifies the text. Besides the return
+ * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
+ */
+static UBool
+action_reorder(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
+{
+ ubidi_writeReordered(pTransform->pBidi, pTransform->dest, pTransform->destSize,
+ static_cast<uint16_t>(pTransform->reorderingOptions), pErrorCode);
+
+ *pTransform->pDestLength = pTransform->srcLength;
+ pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT;
+ return TRUE;
+}
+
+/**
+ * Sets "inverse" mode on the <code>UBiDi</code> object.
+ *
+ * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
+ * @param pErrorCode Pointer to the error code value.
+ *
+ * @return Whether or not this function modifies the text. Besides the return
+ * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
+ */
+static UBool
+action_setInverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
+{
+ (void)pErrorCode;
+ ubidi_setInverse(pTransform->pBidi, TRUE);
+ ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_INVERSE_LIKE_DIRECT);
+ return FALSE;
+}
+
+/**
+ * Sets "runs only" reordering mode indicating a Logical LTR <-> Logical RTL
+ * transformation.
+ *
+ * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
+ * @param pErrorCode Pointer to the error code value.
+ *
+ * @return Whether or not this function modifies the text. Besides the return
+ * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
+ */
+static UBool
+action_setRunsOnly(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
+{
+ (void)pErrorCode;
+ ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_RUNS_ONLY);
+ return FALSE;
+}
+
+/**
+ * Performs string reverse.
+ *
+ * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
+ * @param pErrorCode Pointer to the error code value.
+ *
+ * @return Whether or not this function modifies the text. Besides the return
+ * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
+ */
+static UBool
+action_reverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
+{
+ ubidi_writeReverse(pTransform->src, pTransform->srcLength,
+ pTransform->dest, pTransform->destSize,
+ UBIDI_REORDER_DEFAULT, pErrorCode);
+ *pTransform->pDestLength = pTransform->srcLength;
+ return TRUE;
+}
+
+/**
+ * Applies a new value to the text that serves as input at the current
+ * processing step. This value is identical to the original one when we begin
+ * the processing, but usually changes as the transformation progresses.
+ *
+ * @param pTransform A pointer to the <code>UBiDiTransform</code> structure.
+ * @param newSrc A pointer whose value is to be used as input text.
+ * @param newLength A length of the new text in <code>UChar</code>s.
+ * @param newSize A new source capacity in <code>UChar</code>s.
+ * @param pErrorCode Pointer to the error code value.
+ */
+static void
+updateSrc(UBiDiTransform *pTransform, const UChar *newSrc, uint32_t newLength,
+ uint32_t newSize, UErrorCode *pErrorCode)
+{
+ if (newSize < newLength) {
+ *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
+ return;
+ }
+ if (newSize > pTransform->srcSize) {
+ newSize += 50; // allocate slightly more than needed right now
+ if (pTransform->src != NULL) {
+ uprv_free(pTransform->src);
+ pTransform->src = NULL;
+ }
+ pTransform->src = (UChar *)uprv_malloc(newSize * sizeof(UChar));
+ if (pTransform->src == NULL) {
+ *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
+ //pTransform->srcLength = pTransform->srcSize = 0;
+ return;
+ }
+ pTransform->srcSize = newSize;
+ }
+ u_strncpy(pTransform->src, newSrc, newLength);
+ pTransform->srcLength = u_terminateUChars(pTransform->src,
+ pTransform->srcSize, newLength, pErrorCode);
+}
+
+/**
+ * Calls a lower level shaping function.
+ *
+ * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
+ * @param options Shaping options.
+ * @param pErrorCode Pointer to the error code value.
+ */
+static void
+doShape(UBiDiTransform *pTransform, uint32_t options, UErrorCode *pErrorCode)
+{
+ *pTransform->pDestLength = u_shapeArabic(pTransform->src,
+ pTransform->srcLength, pTransform->dest, pTransform->destSize,
+ options, pErrorCode);
+}
+
+/**
+ * Performs digit and letter shaping.
+ *
+ * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
+ * @param pErrorCode Pointer to the error code value.
+ *
+ * @return Whether or not this function modifies the text. Besides the return
+ * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
+ */
+static UBool
+action_shapeArabic(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
+{
+ if ((pTransform->letters | pTransform->digits) == 0) {
+ return FALSE;
+ }
+ if (pTransform->pActiveScheme->lettersDir == pTransform->pActiveScheme->digitsDir) {
+ doShape(pTransform, pTransform->letters | pTransform->digits | pTransform->pActiveScheme->lettersDir,
+ pErrorCode);
+ } else {
+ doShape(pTransform, pTransform->digits | pTransform->pActiveScheme->digitsDir, pErrorCode);
+ if (U_SUCCESS(*pErrorCode)) {
+ updateSrc(pTransform, pTransform->dest, *pTransform->pDestLength,
+ *pTransform->pDestLength, pErrorCode);
+ doShape(pTransform, pTransform->letters | pTransform->pActiveScheme->lettersDir,
+ pErrorCode);
+ }
+ }
+ return TRUE;
+}
+
+/**
+ * Performs character mirroring.
+ *
+ * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
+ * @param pErrorCode Pointer to the error code value.
+ *
+ * @return Whether or not this function modifies the text. Besides the return
+ * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
+ */
+static UBool
+action_mirror(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
+{
+ UChar32 c;
+ uint32_t i = 0, j = 0;
+ if (0 == (pTransform->reorderingOptions & UBIDI_DO_MIRRORING)) {
+ return FALSE;
+ }
+ if (pTransform->destSize < pTransform->srcLength) {
+ *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
+ return FALSE;
+ }
+ do {
+ UBool isOdd = ubidi_getLevelAt(pTransform->pBidi, i) & 1;
+ U16_NEXT(pTransform->src, i, pTransform->srcLength, c);
+ U16_APPEND_UNSAFE(pTransform->dest, j, isOdd ? u_charMirror(c) : c);
+ } while (i < pTransform->srcLength);
+
+ *pTransform->pDestLength = pTransform->srcLength;
+ pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT;
+ return TRUE;
+}
+
+/**
+ * All possible reordering schemes.
+ *
+ */
+static const ReorderingScheme Schemes[] =
+{
+ /* 0: Logical LTR => Visual LTR */
+ {LTR, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
+ {action_shapeArabic, action_resolve, action_reorder, NULL}},
+ /* 1: Logical RTL => Visual LTR */
+ {RTL, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL,
+ {action_resolve, action_reorder, action_shapeArabic, NULL}},
+ /* 2: Logical LTR => Visual RTL */
+ {LTR, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
+ {action_shapeArabic, action_resolve, action_reorder, action_reverse, NULL}},
+ /* 3: Logical RTL => Visual RTL */
+ {RTL, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL,
+ {action_resolve, action_reorder, action_shapeArabic, action_reverse, NULL}},
+ /* 4: Visual LTR => Logical RTL */
+ {LTR, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL,
+ {action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}},
+ /* 5: Visual RTL => Logical RTL */
+ {RTL, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL,
+ {action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}},
+ /* 6: Visual LTR => Logical LTR */
+ {LTR, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
+ {action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}},
+ /* 7: Visual RTL => Logical LTR */
+ {RTL, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
+ {action_reverse, action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}},
+ /* 8: Logical LTR => Logical RTL */
+ {LTR, LOGICAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
+ {action_shapeArabic, action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, NULL}},
+ /* 9: Logical RTL => Logical LTR */
+ {RTL, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, RTL,
+ {action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, action_shapeArabic, NULL}},
+ /* 10: Visual LTR => Visual RTL */
+ {LTR, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR,
+ {action_shapeArabic, action_setInverse, action_resolve, action_mirror, action_reverse, NULL}},
+ /* 11: Visual RTL => Visual LTR */
+ {RTL, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR,
+ {action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_mirror, NULL}},
+ /* 12: Logical LTR => Logical LTR */
+ {LTR, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
+ {action_resolve, action_mirror, action_shapeArabic, NULL}},
+ /* 13: Logical RTL => Logical RTL */
+ {RTL, LOGICAL, RTL, LOGICAL, SHAPE_VISUAL, SHAPE_LOGICAL, RTL,
+ {action_resolve, action_mirror, action_shapeArabic, NULL}},
+ /* 14: Visual LTR => Visual LTR */
+ {LTR, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR,
+ {action_resolve, action_mirror, action_shapeArabic, NULL}},
+ /* 15: Visual RTL => Visual RTL */
+ {RTL, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR,
+ {action_reverse, action_resolve, action_mirror, action_shapeArabic, action_reverse, NULL}}
+};
+
+static const uint32_t nSchemes = sizeof(Schemes) / sizeof(*Schemes);
+
+/**
+ * When the direction option is <code>UBIDI_DEFAULT_LTR</code> or
+ * <code>UBIDI_DEFAULT_RTL</code>, resolve the base direction according to that
+ * of the first strong bidi character.
+ */
+static void
+resolveBaseDirection(const UChar *text, uint32_t length,
+ UBiDiLevel *pInLevel, UBiDiLevel *pOutLevel)
+{
+ switch (*pInLevel) {
+ case UBIDI_DEFAULT_LTR:
+ case UBIDI_DEFAULT_RTL: {
+ UBiDiLevel level = static_cast<UBiDiLevel>(ubidi_getBaseDirection(text, length));
+ *pInLevel = static_cast<UBiDiLevel>(level != UBIDI_NEUTRAL) ? level
+ : *pInLevel == UBIDI_DEFAULT_RTL ? static_cast<UBiDiLevel>(RTL) : static_cast<UBiDiLevel>(LTR);
+ break;
+ }
+ default:
+ *pInLevel &= 1;
+ break;
+ }
+ switch (*pOutLevel) {
+ case UBIDI_DEFAULT_LTR:
+ case UBIDI_DEFAULT_RTL:
+ *pOutLevel = *pInLevel;
+ break;
+ default:
+ *pOutLevel &= 1;
+ break;
+ }
+}
+
+/**
+ * Finds a valid <code>ReorderingScheme</code> matching the
+ * caller-defined scheme.
+ *
+ * @return A valid <code>ReorderingScheme</code> object or NULL
+ */
+static const ReorderingScheme*
+findMatchingScheme(UBiDiLevel inLevel, UBiDiLevel outLevel,
+ UBiDiOrder inOrder, UBiDiOrder outOrder)
+{
+ uint32_t i;
+ for (i = 0; i < nSchemes; i++) {
+ const ReorderingScheme *pScheme = Schemes + i;
+ if (inLevel == pScheme->inLevel && outLevel == pScheme->outLevel
+ && inOrder == pScheme->inOrder && outOrder == pScheme->outOrder) {
+ return pScheme;
+ }
+ }
+ return NULL;
+}
+
+U_CAPI uint32_t U_EXPORT2
+ubiditransform_transform(UBiDiTransform *pBiDiTransform,
+ const UChar *src, int32_t srcLength,
+ UChar *dest, int32_t destSize,
+ UBiDiLevel inParaLevel, UBiDiOrder inOrder,
+ UBiDiLevel outParaLevel, UBiDiOrder outOrder,
+ UBiDiMirroring doMirroring, uint32_t shapingOptions,
+ UErrorCode *pErrorCode)
+{
+ uint32_t destLength = 0;
+ UBool textChanged = FALSE;
+ const UBiDiTransform *pOrigTransform = pBiDiTransform;
+ const UBiDiAction *action = NULL;
+
+ if (U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if (src == NULL || dest == NULL) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ CHECK_LEN(src, srcLength, pErrorCode);
+ CHECK_LEN(dest, destSize, pErrorCode);
+
+ if (pBiDiTransform == NULL) {
+ pBiDiTransform = ubiditransform_open(pErrorCode);
+ if (U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ }
+ /* Current limitation: in multiple paragraphs will be resolved according
+ to the 1st paragraph */
+ resolveBaseDirection(src, srcLength, &inParaLevel, &outParaLevel);
+
+ pBiDiTransform->pActiveScheme = findMatchingScheme(inParaLevel, outParaLevel,
+ inOrder, outOrder);
+ if (pBiDiTransform->pActiveScheme == NULL) {
+ goto cleanup;
+ }
+ pBiDiTransform->reorderingOptions = doMirroring ? UBIDI_DO_MIRRORING
+ : UBIDI_REORDER_DEFAULT;
+
+ /* Ignore TEXT_DIRECTION_* flags, as we apply our own depending on the text
+ scheme at the time shaping is invoked. */
+ shapingOptions &= ~U_SHAPE_TEXT_DIRECTION_MASK;
+ pBiDiTransform->digits = shapingOptions & ~U_SHAPE_LETTERS_MASK;
+ pBiDiTransform->letters = shapingOptions & ~U_SHAPE_DIGITS_MASK;
+
+ updateSrc(pBiDiTransform, src, srcLength, destSize > srcLength ? destSize : srcLength, pErrorCode);
+ if (U_FAILURE(*pErrorCode)) {
+ goto cleanup;
+ }
+ if (pBiDiTransform->pBidi == NULL) {
+ pBiDiTransform->pBidi = ubidi_openSized(0, 0, pErrorCode);
+ if (U_FAILURE(*pErrorCode)) {
+ goto cleanup;
+ }
+ }
+ pBiDiTransform->dest = dest;
+ pBiDiTransform->destSize = destSize;
+ pBiDiTransform->pDestLength = &destLength;
+
+ /* Checking for U_SUCCESS() within the loop to bail out on first failure. */
+ for (action = pBiDiTransform->pActiveScheme->actions; *action && U_SUCCESS(*pErrorCode); action++) {
+ if ((*action)(pBiDiTransform, pErrorCode)) {
+ if (action + 1) {
+ updateSrc(pBiDiTransform, pBiDiTransform->dest, *pBiDiTransform->pDestLength,
+ *pBiDiTransform->pDestLength, pErrorCode);
+ }
+ textChanged = TRUE;
+ }
+ }
+ ubidi_setInverse(pBiDiTransform->pBidi, FALSE);
+
+ if (!textChanged && U_SUCCESS(*pErrorCode)) {
+ /* Text was not changed - just copy src to dest */
+ if (destSize < srcLength) {
+ *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
+ } else {
+ u_strncpy(dest, src, srcLength);
+ destLength = srcLength;
+ }
+ }
+cleanup:
+ if (pOrigTransform != pBiDiTransform) {
+ ubiditransform_close(pBiDiTransform);
+ } else {
+ pBiDiTransform->dest = NULL;
+ pBiDiTransform->pDestLength = NULL;
+ pBiDiTransform->srcLength = 0;
+ pBiDiTransform->destSize = 0;
+ }
+ return U_FAILURE(*pErrorCode) ? 0 : destLength;
+}
diff --git a/thirdparty/icu4c/common/ubidiwrt.cpp b/thirdparty/icu4c/common/ubidiwrt.cpp
new file mode 100644
index 0000000000..a69c0a4b8b
--- /dev/null
+++ b/thirdparty/icu4c/common/ubidiwrt.cpp
@@ -0,0 +1,650 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2000-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: ubidiwrt.c
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999aug06
+* created by: Markus W. Scherer, updated by Matitiahu Allouche
+*
+* This file contains implementations for BiDi functions that use
+* the core algorithm and core API to write reordered text.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/ustring.h"
+#include "unicode/uchar.h"
+#include "unicode/ubidi.h"
+#include "unicode/utf16.h"
+#include "cmemory.h"
+#include "ustr_imp.h"
+#include "ubidiimp.h"
+
+/*
+ * The function implementations in this file are designed
+ * for UTF-16 and UTF-32, not for UTF-8.
+ *
+ * Assumptions that are not true for UTF-8:
+ * - Any code point always needs the same number of code units
+ * ("minimum-length-problem" of UTF-8)
+ * - The BiDi control characters need only one code unit each
+ *
+ * Further assumptions for all UTFs:
+ * - u_charMirror(c) needs the same number of code units as c
+ */
+#if defined(UTF_SIZE) && UTF_SIZE==8
+# error reimplement ubidi_writeReordered() for UTF-8, see comment above
+#endif
+
+#define IS_COMBINING(type) ((1UL<<(type))&(1UL<<U_NON_SPACING_MARK|1UL<<U_COMBINING_SPACING_MARK|1UL<<U_ENCLOSING_MARK))
+
+/*
+ * When we have UBIDI_OUTPUT_REVERSE set on ubidi_writeReordered(), then we
+ * semantically write RTL runs in reverse and later reverse them again.
+ * Instead, we actually write them in forward order to begin with.
+ * However, if the RTL run was to be mirrored, we need to mirror here now
+ * since the implicit second reversal must not do it.
+ * It looks strange to do mirroring in LTR output, but it is only because
+ * we are writing RTL output in reverse.
+ */
+static int32_t
+doWriteForward(const UChar *src, int32_t srcLength,
+ UChar *dest, int32_t destSize,
+ uint16_t options,
+ UErrorCode *pErrorCode) {
+ /* optimize for several combinations of options */
+ switch(options&(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING)) {
+ case 0: {
+ /* simply copy the LTR run to the destination */
+ int32_t length=srcLength;
+ if(destSize<length) {
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ return srcLength;
+ }
+ do {
+ *dest++=*src++;
+ } while(--length>0);
+ return srcLength;
+ }
+ case UBIDI_DO_MIRRORING: {
+ /* do mirroring */
+ int32_t i=0, j=0;
+ UChar32 c;
+
+ if(destSize<srcLength) {
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ return srcLength;
+ }
+ do {
+ U16_NEXT(src, i, srcLength, c);
+ c=u_charMirror(c);
+ U16_APPEND_UNSAFE(dest, j, c);
+ } while(i<srcLength);
+ return srcLength;
+ }
+ case UBIDI_REMOVE_BIDI_CONTROLS: {
+ /* copy the LTR run and remove any BiDi control characters */
+ int32_t remaining=destSize;
+ UChar c;
+ do {
+ c=*src++;
+ if(!IS_BIDI_CONTROL_CHAR(c)) {
+ if(--remaining<0) {
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+
+ /* preflight the length */
+ while(--srcLength>0) {
+ c=*src++;
+ if(!IS_BIDI_CONTROL_CHAR(c)) {
+ --remaining;
+ }
+ }
+ return destSize-remaining;
+ }
+ *dest++=c;
+ }
+ } while(--srcLength>0);
+ return destSize-remaining;
+ }
+ default: {
+ /* remove BiDi control characters and do mirroring */
+ int32_t remaining=destSize;
+ int32_t i, j=0;
+ UChar32 c;
+ do {
+ i=0;
+ U16_NEXT(src, i, srcLength, c);
+ src+=i;
+ srcLength-=i;
+ if(!IS_BIDI_CONTROL_CHAR(c)) {
+ remaining-=i;
+ if(remaining<0) {
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+
+ /* preflight the length */
+ while(srcLength>0) {
+ c=*src++;
+ if(!IS_BIDI_CONTROL_CHAR(c)) {
+ --remaining;
+ }
+ --srcLength;
+ }
+ return destSize-remaining;
+ }
+ c=u_charMirror(c);
+ U16_APPEND_UNSAFE(dest, j, c);
+ }
+ } while(srcLength>0);
+ return j;
+ }
+ } /* end of switch */
+}
+
+static int32_t
+doWriteReverse(const UChar *src, int32_t srcLength,
+ UChar *dest, int32_t destSize,
+ uint16_t options,
+ UErrorCode *pErrorCode) {
+ /*
+ * RTL run -
+ *
+ * RTL runs need to be copied to the destination in reverse order
+ * of code points, not code units, to keep Unicode characters intact.
+ *
+ * The general strategy for this is to read the source text
+ * in backward order, collect all code units for a code point
+ * (and optionally following combining characters, see below),
+ * and copy all these code units in ascending order
+ * to the destination for this run.
+ *
+ * Several options request whether combining characters
+ * should be kept after their base characters,
+ * whether BiDi control characters should be removed, and
+ * whether characters should be replaced by their mirror-image
+ * equivalent Unicode characters.
+ */
+ int32_t i, j;
+ UChar32 c;
+
+ /* optimize for several combinations of options */
+ switch(options&(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING|UBIDI_KEEP_BASE_COMBINING)) {
+ case 0:
+ /*
+ * With none of the "complicated" options set, the destination
+ * run will have the same length as the source run,
+ * and there is no mirroring and no keeping combining characters
+ * with their base characters.
+ */
+ if(destSize<srcLength) {
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ return srcLength;
+ }
+ destSize=srcLength;
+
+ /* preserve character integrity */
+ do {
+ /* i is always after the last code unit known to need to be kept in this segment */
+ i=srcLength;
+
+ /* collect code units for one base character */
+ U16_BACK_1(src, 0, srcLength);
+
+ /* copy this base character */
+ j=srcLength;
+ do {
+ *dest++=src[j++];
+ } while(j<i);
+ } while(srcLength>0);
+ break;
+ case UBIDI_KEEP_BASE_COMBINING:
+ /*
+ * Here, too, the destination
+ * run will have the same length as the source run,
+ * and there is no mirroring.
+ * We do need to keep combining characters with their base characters.
+ */
+ if(destSize<srcLength) {
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ return srcLength;
+ }
+ destSize=srcLength;
+
+ /* preserve character integrity */
+ do {
+ /* i is always after the last code unit known to need to be kept in this segment */
+ i=srcLength;
+
+ /* collect code units and modifier letters for one base character */
+ do {
+ U16_PREV(src, 0, srcLength, c);
+ } while(srcLength>0 && IS_COMBINING(u_charType(c)));
+
+ /* copy this "user character" */
+ j=srcLength;
+ do {
+ *dest++=src[j++];
+ } while(j<i);
+ } while(srcLength>0);
+ break;
+ default:
+ /*
+ * With several "complicated" options set, this is the most
+ * general and the slowest copying of an RTL run.
+ * We will do mirroring, remove BiDi controls, and
+ * keep combining characters with their base characters
+ * as requested.
+ */
+ if(!(options&UBIDI_REMOVE_BIDI_CONTROLS)) {
+ i=srcLength;
+ } else {
+ /* we need to find out the destination length of the run,
+ which will not include the BiDi control characters */
+ int32_t length=srcLength;
+ UChar ch;
+
+ i=0;
+ do {
+ ch=*src++;
+ if(!IS_BIDI_CONTROL_CHAR(ch)) {
+ ++i;
+ }
+ } while(--length>0);
+ src-=srcLength;
+ }
+
+ if(destSize<i) {
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ return i;
+ }
+ destSize=i;
+
+ /* preserve character integrity */
+ do {
+ /* i is always after the last code unit known to need to be kept in this segment */
+ i=srcLength;
+
+ /* collect code units for one base character */
+ U16_PREV(src, 0, srcLength, c);
+ if(options&UBIDI_KEEP_BASE_COMBINING) {
+ /* collect modifier letters for this base character */
+ while(srcLength>0 && IS_COMBINING(u_charType(c))) {
+ U16_PREV(src, 0, srcLength, c);
+ }
+ }
+
+ if(options&UBIDI_REMOVE_BIDI_CONTROLS && IS_BIDI_CONTROL_CHAR(c)) {
+ /* do not copy this BiDi control character */
+ continue;
+ }
+
+ /* copy this "user character" */
+ j=srcLength;
+ if(options&UBIDI_DO_MIRRORING) {
+ /* mirror only the base character */
+ int32_t k=0;
+ c=u_charMirror(c);
+ U16_APPEND_UNSAFE(dest, k, c);
+ dest+=k;
+ j+=k;
+ }
+ while(j<i) {
+ *dest++=src[j++];
+ }
+ } while(srcLength>0);
+ break;
+ } /* end of switch */
+
+ return destSize;
+}
+
+U_CAPI int32_t U_EXPORT2
+ubidi_writeReverse(const UChar *src, int32_t srcLength,
+ UChar *dest, int32_t destSize,
+ uint16_t options,
+ UErrorCode *pErrorCode) {
+ int32_t destLength;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ /* more error checking */
+ if( src==NULL || srcLength<-1 ||
+ destSize<0 || (destSize>0 && dest==NULL))
+ {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* do input and output overlap? */
+ if( dest!=NULL &&
+ ((src>=dest && src<dest+destSize) ||
+ (dest>=src && dest<src+srcLength)))
+ {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ if(srcLength==-1) {
+ srcLength=u_strlen(src);
+ }
+ if(srcLength>0) {
+ destLength=doWriteReverse(src, srcLength, dest, destSize, options, pErrorCode);
+ } else {
+ /* nothing to do */
+ destLength=0;
+ }
+
+ return u_terminateUChars(dest, destSize, destLength, pErrorCode);
+}
+
+// Ticket 20907 - The optimizer in MSVC/Visual Studio versions below 16.4 has trouble with this
+// function on Windows ARM64. As a work-around, we disable optimizations for this function.
+// This work-around could/should be removed once the following versions of Visual Studio are no
+// longer supported: All versions of VS2017, and versions of VS2019 below 16.4.
+#if (defined(_MSC_VER) && (defined(_M_ARM64)) && (_MSC_VER < 1924))
+#pragma optimize( "", off )
+#endif
+U_CAPI int32_t U_EXPORT2
+ubidi_writeReordered(UBiDi *pBiDi,
+ UChar *dest, int32_t destSize,
+ uint16_t options,
+ UErrorCode *pErrorCode) {
+ const UChar *text;
+ UChar *saveDest;
+ int32_t length, destCapacity;
+ int32_t run, runCount, logicalStart, runLength;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ /* more error checking */
+ if( pBiDi==NULL ||
+ (text=pBiDi->text)==NULL || (length=pBiDi->length)<0 ||
+ destSize<0 || (destSize>0 && dest==NULL))
+ {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* do input and output overlap? */
+ if( dest!=NULL &&
+ ((text>=dest && text<dest+destSize) ||
+ (dest>=text && dest<text+pBiDi->originalLength)))
+ {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ if(length==0) {
+ /* nothing to do */
+ return u_terminateUChars(dest, destSize, 0, pErrorCode);
+ }
+
+ runCount=ubidi_countRuns(pBiDi, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ /* destSize shrinks, later destination length=destCapacity-destSize */
+ saveDest=dest;
+ destCapacity=destSize;
+
+ /*
+ * Option "insert marks" implies UBIDI_INSERT_LRM_FOR_NUMERIC if the
+ * reordering mode (checked below) is appropriate.
+ */
+ if(pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
+ options|=UBIDI_INSERT_LRM_FOR_NUMERIC;
+ options&=~UBIDI_REMOVE_BIDI_CONTROLS;
+ }
+ /*
+ * Option "remove controls" implies UBIDI_REMOVE_BIDI_CONTROLS
+ * and cancels UBIDI_INSERT_LRM_FOR_NUMERIC.
+ */
+ if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
+ options|=UBIDI_REMOVE_BIDI_CONTROLS;
+ options&=~UBIDI_INSERT_LRM_FOR_NUMERIC;
+ }
+ /*
+ * If we do not perform the "inverse BiDi" algorithm, then we
+ * don't need to insert any LRMs, and don't need to test for it.
+ */
+ if((pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_NUMBERS_AS_L) &&
+ (pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_LIKE_DIRECT) &&
+ (pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL) &&
+ (pBiDi->reorderingMode != UBIDI_REORDER_RUNS_ONLY)) {
+ options&=~UBIDI_INSERT_LRM_FOR_NUMERIC;
+ }
+ /*
+ * Iterate through all visual runs and copy the run text segments to
+ * the destination, according to the options.
+ *
+ * The tests for where to insert LRMs ignore the fact that there may be
+ * BN codes or non-BMP code points at the beginning and end of a run;
+ * they may insert LRMs unnecessarily but the tests are faster this way
+ * (this would have to be improved for UTF-8).
+ *
+ * Note that the only errors that are set by doWriteXY() are buffer overflow
+ * errors. Ignore them until the end, and continue for preflighting.
+ */
+ if(!(options&UBIDI_OUTPUT_REVERSE)) {
+ /* forward output */
+ if(!(options&UBIDI_INSERT_LRM_FOR_NUMERIC)) {
+ /* do not insert BiDi controls */
+ for(run=0; run<runCount; ++run) {
+ if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength)) {
+ runLength=doWriteForward(text+logicalStart, runLength,
+ dest, destSize,
+ (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
+ } else {
+ runLength=doWriteReverse(text+logicalStart, runLength,
+ dest, destSize,
+ options, pErrorCode);
+ }
+ if(dest!=NULL) {
+ dest+=runLength;
+ }
+ destSize-=runLength;
+ }
+ } else {
+ /* insert BiDi controls for "inverse BiDi" */
+ const DirProp *dirProps=pBiDi->dirProps;
+ const UChar *src;
+ UChar uc;
+ UBiDiDirection dir;
+ int32_t markFlag;
+
+ for(run=0; run<runCount; ++run) {
+ dir=ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength);
+ src=text+logicalStart;
+ /* check if something relevant in insertPoints */
+ markFlag=pBiDi->runs[run].insertRemove;
+ if(markFlag<0) { /* BiDi controls count */
+ markFlag=0;
+ }
+
+ if(UBIDI_LTR==dir) {
+ if((pBiDi->isInverse) &&
+ (/*run>0 &&*/ dirProps[logicalStart]!=L)) {
+ markFlag |= LRM_BEFORE;
+ }
+ if (markFlag & LRM_BEFORE) {
+ uc=LRM_CHAR;
+ }
+ else if (markFlag & RLM_BEFORE) {
+ uc=RLM_CHAR;
+ }
+ else uc=0;
+ if(uc) {
+ if(destSize>0) {
+ *dest++=uc;
+ }
+ --destSize;
+ }
+
+ runLength=doWriteForward(src, runLength,
+ dest, destSize,
+ (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
+ if(dest!=NULL) {
+ dest+=runLength;
+ }
+ destSize-=runLength;
+
+ if((pBiDi->isInverse) &&
+ (/*run<runCount-1 &&*/ dirProps[logicalStart+runLength-1]!=L)) {
+ markFlag |= LRM_AFTER;
+ }
+ if (markFlag & LRM_AFTER) {
+ uc=LRM_CHAR;
+ }
+ else if (markFlag & RLM_AFTER) {
+ uc=RLM_CHAR;
+ }
+ else uc=0;
+ if(uc) {
+ if(destSize>0) {
+ *dest++=uc;
+ }
+ --destSize;
+ }
+ } else { /* RTL run */
+ if((pBiDi->isInverse) &&
+ (/*run>0 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart+runLength-1])))) {
+ markFlag |= RLM_BEFORE;
+ }
+ if (markFlag & LRM_BEFORE) {
+ uc=LRM_CHAR;
+ }
+ else if (markFlag & RLM_BEFORE) {
+ uc=RLM_CHAR;
+ }
+ else uc=0;
+ if(uc) {
+ if(destSize>0) {
+ *dest++=uc;
+ }
+ --destSize;
+ }
+
+ runLength=doWriteReverse(src, runLength,
+ dest, destSize,
+ options, pErrorCode);
+ if(dest!=NULL) {
+ dest+=runLength;
+ }
+ destSize-=runLength;
+
+ if((pBiDi->isInverse) &&
+ (/*run<runCount-1 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart])))) {
+ markFlag |= RLM_AFTER;
+ }
+ if (markFlag & LRM_AFTER) {
+ uc=LRM_CHAR;
+ }
+ else if (markFlag & RLM_AFTER) {
+ uc=RLM_CHAR;
+ }
+ else uc=0;
+ if(uc) {
+ if(destSize>0) {
+ *dest++=uc;
+ }
+ --destSize;
+ }
+ }
+ }
+ }
+ } else {
+ /* reverse output */
+ if(!(options&UBIDI_INSERT_LRM_FOR_NUMERIC)) {
+ /* do not insert BiDi controls */
+ for(run=runCount; --run>=0;) {
+ if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength)) {
+ runLength=doWriteReverse(text+logicalStart, runLength,
+ dest, destSize,
+ (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
+ } else {
+ runLength=doWriteForward(text+logicalStart, runLength,
+ dest, destSize,
+ options, pErrorCode);
+ }
+ if(dest!=NULL) {
+ dest+=runLength;
+ }
+ destSize-=runLength;
+ }
+ } else {
+ /* insert BiDi controls for "inverse BiDi" */
+ const DirProp *dirProps=pBiDi->dirProps;
+ const UChar *src;
+ UBiDiDirection dir;
+
+ for(run=runCount; --run>=0;) {
+ /* reverse output */
+ dir=ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength);
+ src=text+logicalStart;
+
+ if(UBIDI_LTR==dir) {
+ if(/*run<runCount-1 &&*/ dirProps[logicalStart+runLength-1]!=L) {
+ if(destSize>0) {
+ *dest++=LRM_CHAR;
+ }
+ --destSize;
+ }
+
+ runLength=doWriteReverse(src, runLength,
+ dest, destSize,
+ (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
+ if(dest!=NULL) {
+ dest+=runLength;
+ }
+ destSize-=runLength;
+
+ if(/*run>0 &&*/ dirProps[logicalStart]!=L) {
+ if(destSize>0) {
+ *dest++=LRM_CHAR;
+ }
+ --destSize;
+ }
+ } else {
+ if(/*run<runCount-1 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart]))) {
+ if(destSize>0) {
+ *dest++=RLM_CHAR;
+ }
+ --destSize;
+ }
+
+ runLength=doWriteForward(src, runLength,
+ dest, destSize,
+ options, pErrorCode);
+ if(dest!=NULL) {
+ dest+=runLength;
+ }
+ destSize-=runLength;
+
+ if(/*run>0 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart+runLength-1]))) {
+ if(destSize>0) {
+ *dest++=RLM_CHAR;
+ }
+ --destSize;
+ }
+ }
+ }
+ }
+ }
+
+ return u_terminateUChars(saveDest, destCapacity, destCapacity-destSize, pErrorCode);
+}
+#if (defined(_MSC_VER) && (defined(_M_ARM64)) && (_MSC_VER < 1924))
+#pragma optimize( "", on )
+#endif
diff --git a/thirdparty/icu4c/common/ubrk.cpp b/thirdparty/icu4c/common/ubrk.cpp
new file mode 100644
index 0000000000..f8bdf5a6b6
--- /dev/null
+++ b/thirdparty/icu4c/common/ubrk.cpp
@@ -0,0 +1,357 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+********************************************************************************
+* Copyright (C) 1996-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+********************************************************************************
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/ubrk.h"
+
+#include "unicode/brkiter.h"
+#include "unicode/uloc.h"
+#include "unicode/ustring.h"
+#include "unicode/uchriter.h"
+#include "unicode/rbbi.h"
+#include "rbbirb.h"
+#include "uassert.h"
+#include "cmemory.h"
+
+U_NAMESPACE_USE
+
+//------------------------------------------------------------------------------
+//
+// ubrk_open Create a canned type of break iterator based on type (word, line, etc.)
+// and locale.
+//
+//------------------------------------------------------------------------------
+U_CAPI UBreakIterator* U_EXPORT2
+ubrk_open(UBreakIteratorType type,
+ const char *locale,
+ const UChar *text,
+ int32_t textLength,
+ UErrorCode *status)
+{
+
+ if(U_FAILURE(*status)) return 0;
+
+ BreakIterator *result = 0;
+
+ switch(type) {
+
+ case UBRK_CHARACTER:
+ result = BreakIterator::createCharacterInstance(Locale(locale), *status);
+ break;
+
+ case UBRK_WORD:
+ result = BreakIterator::createWordInstance(Locale(locale), *status);
+ break;
+
+ case UBRK_LINE:
+ result = BreakIterator::createLineInstance(Locale(locale), *status);
+ break;
+
+ case UBRK_SENTENCE:
+ result = BreakIterator::createSentenceInstance(Locale(locale), *status);
+ break;
+
+ case UBRK_TITLE:
+ result = BreakIterator::createTitleInstance(Locale(locale), *status);
+ break;
+
+ default:
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+
+ // check for allocation error
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+ if(result == 0) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+
+
+ UBreakIterator *uBI = (UBreakIterator *)result;
+ if (text != NULL) {
+ ubrk_setText(uBI, text, textLength, status);
+ }
+ return uBI;
+}
+
+
+
+//------------------------------------------------------------------------------
+//
+// ubrk_openRules open a break iterator from a set of break rules.
+// Invokes the rule builder.
+//
+//------------------------------------------------------------------------------
+U_CAPI UBreakIterator* U_EXPORT2
+ubrk_openRules( const UChar *rules,
+ int32_t rulesLength,
+ const UChar *text,
+ int32_t textLength,
+ UParseError *parseErr,
+ UErrorCode *status) {
+
+ if (status == NULL || U_FAILURE(*status)){
+ return 0;
+ }
+
+ BreakIterator *result = 0;
+ UnicodeString ruleString(rules, rulesLength);
+ result = RBBIRuleBuilder::createRuleBasedBreakIterator(ruleString, parseErr, *status);
+ if(U_FAILURE(*status)) {
+ return 0;
+ }
+
+ UBreakIterator *uBI = (UBreakIterator *)result;
+ if (text != NULL) {
+ ubrk_setText(uBI, text, textLength, status);
+ }
+ return uBI;
+}
+
+
+U_CAPI UBreakIterator* U_EXPORT2
+ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
+ const UChar * text, int32_t textLength,
+ UErrorCode * status)
+{
+ if (U_FAILURE(*status)) {
+ return NULL;
+ }
+ if (rulesLength < 0) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+ LocalPointer<RuleBasedBreakIterator> lpRBBI(new RuleBasedBreakIterator(binaryRules, rulesLength, *status), *status);
+ if (U_FAILURE(*status)) {
+ return NULL;
+ }
+ UBreakIterator *uBI = reinterpret_cast<UBreakIterator *>(lpRBBI.orphan());
+ if (text != NULL) {
+ ubrk_setText(uBI, text, textLength, status);
+ }
+ return uBI;
+}
+
+
+U_CAPI UBreakIterator * U_EXPORT2
+ubrk_safeClone(
+ const UBreakIterator *bi,
+ void * /*stackBuffer*/,
+ int32_t *pBufferSize,
+ UErrorCode *status)
+{
+ if (status == NULL || U_FAILURE(*status)){
+ return NULL;
+ }
+ if (bi == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+ if (pBufferSize != NULL) {
+ int32_t inputSize = *pBufferSize;
+ *pBufferSize = 1;
+ if (inputSize == 0) {
+ return NULL; // preflighting for deprecated functionality
+ }
+ }
+ BreakIterator *newBI = ((BreakIterator *)bi)->clone();
+ if (newBI == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ *status = U_SAFECLONE_ALLOCATED_WARNING;
+ }
+ return (UBreakIterator *)newBI;
+}
+
+
+
+U_CAPI void U_EXPORT2
+ubrk_close(UBreakIterator *bi)
+{
+ delete (BreakIterator *)bi;
+}
+
+U_CAPI void U_EXPORT2
+ubrk_setText(UBreakIterator* bi,
+ const UChar* text,
+ int32_t textLength,
+ UErrorCode* status)
+{
+ UText ut = UTEXT_INITIALIZER;
+ utext_openUChars(&ut, text, textLength, status);
+ ((BreakIterator*)bi)->setText(&ut, *status);
+ // A stack allocated UText wrapping a UChar * string
+ // can be dumped without explicitly closing it.
+}
+
+
+
+U_CAPI void U_EXPORT2
+ubrk_setUText(UBreakIterator *bi,
+ UText *text,
+ UErrorCode *status)
+{
+ ((BreakIterator*)bi)->setText(text, *status);
+}
+
+
+
+
+
+U_CAPI int32_t U_EXPORT2
+ubrk_current(const UBreakIterator *bi)
+{
+
+ return ((BreakIterator*)bi)->current();
+}
+
+U_CAPI int32_t U_EXPORT2
+ubrk_next(UBreakIterator *bi)
+{
+
+ return ((BreakIterator*)bi)->next();
+}
+
+U_CAPI int32_t U_EXPORT2
+ubrk_previous(UBreakIterator *bi)
+{
+
+ return ((BreakIterator*)bi)->previous();
+}
+
+U_CAPI int32_t U_EXPORT2
+ubrk_first(UBreakIterator *bi)
+{
+
+ return ((BreakIterator*)bi)->first();
+}
+
+U_CAPI int32_t U_EXPORT2
+ubrk_last(UBreakIterator *bi)
+{
+
+ return ((BreakIterator*)bi)->last();
+}
+
+U_CAPI int32_t U_EXPORT2
+ubrk_preceding(UBreakIterator *bi,
+ int32_t offset)
+{
+
+ return ((BreakIterator*)bi)->preceding(offset);
+}
+
+U_CAPI int32_t U_EXPORT2
+ubrk_following(UBreakIterator *bi,
+ int32_t offset)
+{
+
+ return ((BreakIterator*)bi)->following(offset);
+}
+
+U_CAPI const char* U_EXPORT2
+ubrk_getAvailable(int32_t index)
+{
+
+ return uloc_getAvailable(index);
+}
+
+U_CAPI int32_t U_EXPORT2
+ubrk_countAvailable()
+{
+
+ return uloc_countAvailable();
+}
+
+
+U_CAPI UBool U_EXPORT2
+ubrk_isBoundary(UBreakIterator *bi, int32_t offset)
+{
+ return ((BreakIterator*)bi)->isBoundary(offset);
+}
+
+
+U_CAPI int32_t U_EXPORT2
+ubrk_getRuleStatus(UBreakIterator *bi)
+{
+ return ((BreakIterator*)bi)->getRuleStatus();
+}
+
+U_CAPI int32_t U_EXPORT2
+ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status)
+{
+ return ((BreakIterator*)bi)->getRuleStatusVec(fillInVec, capacity, *status);
+}
+
+
+U_CAPI const char* U_EXPORT2
+ubrk_getLocaleByType(const UBreakIterator *bi,
+ ULocDataLocaleType type,
+ UErrorCode* status)
+{
+ if (bi == NULL) {
+ if (U_SUCCESS(*status)) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return NULL;
+ }
+ return ((BreakIterator*)bi)->getLocaleID(type, *status);
+}
+
+
+U_CAPI void U_EXPORT2
+ubrk_refreshUText(UBreakIterator *bi,
+ UText *text,
+ UErrorCode *status)
+{
+ BreakIterator *bii = reinterpret_cast<BreakIterator *>(bi);
+ bii->refreshInputText(text, *status);
+}
+
+U_CAPI int32_t U_EXPORT2
+ubrk_getBinaryRules(UBreakIterator *bi,
+ uint8_t * binaryRules, int32_t rulesCapacity,
+ UErrorCode * status)
+{
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+ if ((binaryRules == NULL && rulesCapacity > 0) || rulesCapacity < 0) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ RuleBasedBreakIterator* rbbi;
+ if ((rbbi = dynamic_cast<RuleBasedBreakIterator*>(reinterpret_cast<BreakIterator*>(bi))) == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ uint32_t rulesLength;
+ const uint8_t * returnedRules = rbbi->getBinaryRules(rulesLength);
+ if (rulesLength > INT32_MAX) {
+ *status = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ if (binaryRules != NULL) { // if not preflighting
+ // Here we know rulesLength <= INT32_MAX and rulesCapacity >= 0, can cast safely
+ if ((int32_t)rulesLength > rulesCapacity) {
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ } else {
+ uprv_memcpy(binaryRules, returnedRules, rulesLength);
+ }
+ }
+ return (int32_t)rulesLength;
+}
+
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
diff --git a/thirdparty/icu4c/common/ubrkimpl.h b/thirdparty/icu4c/common/ubrkimpl.h
new file mode 100644
index 0000000000..8197f66339
--- /dev/null
+++ b/thirdparty/icu4c/common/ubrkimpl.h
@@ -0,0 +1,15 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2006, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+
+#ifndef UBRKIMPL_H
+#define UBRKIMPL_H
+
+#define U_ICUDATA_BRKITR U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "brkitr"
+
+#endif /*UBRKIMPL_H*/
diff --git a/thirdparty/icu4c/common/ucase.cpp b/thirdparty/icu4c/common/ucase.cpp
new file mode 100644
index 0000000000..2b142f5bc2
--- /dev/null
+++ b/thirdparty/icu4c/common/ucase.cpp
@@ -0,0 +1,1608 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2004-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: ucase.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2004aug30
+* created by: Markus W. Scherer
+*
+* Low-level Unicode character/string case mapping code.
+* Much code moved here (and modified) from uchar.c.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+#include "unicode/uset.h"
+#include "unicode/udata.h" /* UDataInfo */
+#include "unicode/utf16.h"
+#include "ucmndata.h" /* DataHeader */
+#include "udatamem.h"
+#include "umutex.h"
+#include "uassert.h"
+#include "cmemory.h"
+#include "utrie2.h"
+#include "ucase.h"
+
+struct UCaseProps {
+ UDataMemory *mem;
+ const int32_t *indexes;
+ const uint16_t *exceptions;
+ const uint16_t *unfold;
+
+ UTrie2 trie;
+ uint8_t formatVersion[4];
+};
+
+/* ucase_props_data.h is machine-generated by gencase --csource */
+#define INCLUDED_FROM_UCASE_CPP
+#include "ucase_props_data.h"
+
+/* set of property starts for UnicodeSet ------------------------------------ */
+
+static UBool U_CALLCONV
+_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, uint32_t /*value*/) {
+ /* add the start code point to the USet */
+ const USetAdder *sa=(const USetAdder *)context;
+ sa->add(sa->set, start);
+ return TRUE;
+}
+
+U_CFUNC void U_EXPORT2
+ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) {
+ return;
+ }
+
+ /* add the start code point of each same-value range of the trie */
+ utrie2_enum(&ucase_props_singleton.trie, NULL, _enumPropertyStartsRange, sa);
+
+ /* add code points with hardcoded properties, plus the ones following them */
+
+ /* (none right now, see comment below) */
+
+ /*
+ * Omit code points with hardcoded specialcasing properties
+ * because we do not build property UnicodeSets for them right now.
+ */
+}
+
+/* data access primitives --------------------------------------------------- */
+
+U_CFUNC const UTrie2 * U_EXPORT2
+ucase_getTrie() {
+ return &ucase_props_singleton.trie;
+}
+
+#define GET_EXCEPTIONS(csp, props) ((csp)->exceptions+((props)>>UCASE_EXC_SHIFT))
+
+/* number of bits in an 8-bit integer value */
+static const uint8_t flagsOffset[256]={
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
+};
+
+#define HAS_SLOT(flags, idx) ((flags)&(1<<(idx)))
+#define SLOT_OFFSET(flags, idx) flagsOffset[(flags)&((1<<(idx))-1)]
+
+/*
+ * Get the value of an optional-value slot where HAS_SLOT(excWord, idx).
+ *
+ * @param excWord (in) initial exceptions word
+ * @param idx (in) desired slot index
+ * @param pExc16 (in/out) const uint16_t * after excWord=*pExc16++;
+ * moved to the last uint16_t of the value, use +1 for beginning of next slot
+ * @param value (out) int32_t or uint32_t output if hasSlot, otherwise not modified
+ */
+#define GET_SLOT_VALUE(excWord, idx, pExc16, value) UPRV_BLOCK_MACRO_BEGIN { \
+ if(((excWord)&UCASE_EXC_DOUBLE_SLOTS)==0) { \
+ (pExc16)+=SLOT_OFFSET(excWord, idx); \
+ (value)=*pExc16; \
+ } else { \
+ (pExc16)+=2*SLOT_OFFSET(excWord, idx); \
+ (value)=*pExc16++; \
+ (value)=((value)<<16)|*pExc16; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/* simple case mappings ----------------------------------------------------- */
+
+U_CAPI UChar32 U_EXPORT2
+ucase_tolower(UChar32 c) {
+ uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
+ if(!UCASE_HAS_EXCEPTION(props)) {
+ if(UCASE_IS_UPPER_OR_TITLE(props)) {
+ c+=UCASE_GET_DELTA(props);
+ }
+ } else {
+ const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
+ uint16_t excWord=*pe++;
+ if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
+ int32_t delta;
+ GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
+ return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
+ }
+ if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
+ GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe, c);
+ }
+ }
+ return c;
+}
+
+U_CAPI UChar32 U_EXPORT2
+ucase_toupper(UChar32 c) {
+ uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
+ if(!UCASE_HAS_EXCEPTION(props)) {
+ if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
+ c+=UCASE_GET_DELTA(props);
+ }
+ } else {
+ const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
+ uint16_t excWord=*pe++;
+ if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) {
+ int32_t delta;
+ GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
+ return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
+ }
+ if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
+ GET_SLOT_VALUE(excWord, UCASE_EXC_UPPER, pe, c);
+ }
+ }
+ return c;
+}
+
+U_CAPI UChar32 U_EXPORT2
+ucase_totitle(UChar32 c) {
+ uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
+ if(!UCASE_HAS_EXCEPTION(props)) {
+ if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
+ c+=UCASE_GET_DELTA(props);
+ }
+ } else {
+ const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
+ uint16_t excWord=*pe++;
+ if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) {
+ int32_t delta;
+ GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
+ return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
+ }
+ int32_t idx;
+ if(HAS_SLOT(excWord, UCASE_EXC_TITLE)) {
+ idx=UCASE_EXC_TITLE;
+ } else if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
+ idx=UCASE_EXC_UPPER;
+ } else {
+ return c;
+ }
+ GET_SLOT_VALUE(excWord, idx, pe, c);
+ }
+ return c;
+}
+
+static const UChar iDot[2] = { 0x69, 0x307 };
+static const UChar jDot[2] = { 0x6a, 0x307 };
+static const UChar iOgonekDot[3] = { 0x12f, 0x307 };
+static const UChar iDotGrave[3] = { 0x69, 0x307, 0x300 };
+static const UChar iDotAcute[3] = { 0x69, 0x307, 0x301 };
+static const UChar iDotTilde[3] = { 0x69, 0x307, 0x303 };
+
+
+U_CFUNC void U_EXPORT2
+ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
+ uint16_t props;
+
+ /*
+ * Hardcode the case closure of i and its relatives and ignore the
+ * data file data for these characters.
+ * The Turkic dotless i and dotted I with their case mapping conditions
+ * and case folding option make the related characters behave specially.
+ * This code matches their closure behavior to their case folding behavior.
+ */
+
+ switch(c) {
+ case 0x49:
+ /* regular i and I are in one equivalence class */
+ sa->add(sa->set, 0x69);
+ return;
+ case 0x69:
+ sa->add(sa->set, 0x49);
+ return;
+ case 0x130:
+ /* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */
+ sa->addString(sa->set, iDot, 2);
+ return;
+ case 0x131:
+ /* dotless i is in a class by itself */
+ return;
+ default:
+ /* otherwise use the data file data */
+ break;
+ }
+
+ props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
+ if(!UCASE_HAS_EXCEPTION(props)) {
+ if(UCASE_GET_TYPE(props)!=UCASE_NONE) {
+ /* add the one simple case mapping, no matter what type it is */
+ int32_t delta=UCASE_GET_DELTA(props);
+ if(delta!=0) {
+ sa->add(sa->set, c+delta);
+ }
+ }
+ } else {
+ /*
+ * c has exceptions, so there may be multiple simple and/or
+ * full case mappings. Add them all.
+ */
+ const uint16_t *pe0, *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
+ const UChar *closure;
+ uint16_t excWord=*pe++;
+ int32_t idx, closureLength, fullLength, length;
+
+ pe0=pe;
+
+ /* add all simple case mappings */
+ for(idx=UCASE_EXC_LOWER; idx<=UCASE_EXC_TITLE; ++idx) {
+ if(HAS_SLOT(excWord, idx)) {
+ pe=pe0;
+ GET_SLOT_VALUE(excWord, idx, pe, c);
+ sa->add(sa->set, c);
+ }
+ }
+ if(HAS_SLOT(excWord, UCASE_EXC_DELTA)) {
+ pe=pe0;
+ int32_t delta;
+ GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
+ sa->add(sa->set, (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta);
+ }
+
+ /* get the closure string pointer & length */
+ if(HAS_SLOT(excWord, UCASE_EXC_CLOSURE)) {
+ pe=pe0;
+ GET_SLOT_VALUE(excWord, UCASE_EXC_CLOSURE, pe, closureLength);
+ closureLength&=UCASE_CLOSURE_MAX_LENGTH; /* higher bits are reserved */
+ closure=(const UChar *)pe+1; /* behind this slot, unless there are full case mappings */
+ } else {
+ closureLength=0;
+ closure=NULL;
+ }
+
+ /* add the full case folding */
+ if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
+ pe=pe0;
+ GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, fullLength);
+
+ /* start of full case mapping strings */
+ ++pe;
+
+ fullLength&=0xffff; /* bits 16 and higher are reserved */
+
+ /* skip the lowercase result string */
+ pe+=fullLength&UCASE_FULL_LOWER;
+ fullLength>>=4;
+
+ /* add the full case folding string */
+ length=fullLength&0xf;
+ if(length!=0) {
+ sa->addString(sa->set, (const UChar *)pe, length);
+ pe+=length;
+ }
+
+ /* skip the uppercase and titlecase strings */
+ fullLength>>=4;
+ pe+=fullLength&0xf;
+ fullLength>>=4;
+ pe+=fullLength;
+
+ closure=(const UChar *)pe; /* behind full case mappings */
+ }
+
+ /* add each code point in the closure string */
+ for(idx=0; idx<closureLength;) {
+ U16_NEXT_UNSAFE(closure, idx, c);
+ sa->add(sa->set, c);
+ }
+ }
+}
+
+/*
+ * compare s, which has a length, with t, which has a maximum length or is NUL-terminated
+ * must be length>0 and max>0 and length<=max
+ */
+static inline int32_t
+strcmpMax(const UChar *s, int32_t length, const UChar *t, int32_t max) {
+ int32_t c1, c2;
+
+ max-=length; /* we require length<=max, so no need to decrement max in the loop */
+ do {
+ c1=*s++;
+ c2=*t++;
+ if(c2==0) {
+ return 1; /* reached the end of t but not of s */
+ }
+ c1-=c2;
+ if(c1!=0) {
+ return c1; /* return difference result */
+ }
+ } while(--length>0);
+ /* ends with length==0 */
+
+ if(max==0 || *t==0) {
+ return 0; /* equal to length of both strings */
+ } else {
+ return -max; /* return lengh difference */
+ }
+}
+
+U_CFUNC UBool U_EXPORT2
+ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa) {
+ int32_t i, start, limit, result, unfoldRows, unfoldRowWidth, unfoldStringWidth;
+
+ if(ucase_props_singleton.unfold==NULL || s==NULL) {
+ return FALSE; /* no reverse case folding data, or no string */
+ }
+ if(length<=1) {
+ /* the string is too short to find any match */
+ /*
+ * more precise would be:
+ * if(!u_strHasMoreChar32Than(s, length, 1))
+ * but this does not make much practical difference because
+ * a single supplementary code point would just not be found
+ */
+ return FALSE;
+ }
+
+ const uint16_t *unfold=ucase_props_singleton.unfold;
+ unfoldRows=unfold[UCASE_UNFOLD_ROWS];
+ unfoldRowWidth=unfold[UCASE_UNFOLD_ROW_WIDTH];
+ unfoldStringWidth=unfold[UCASE_UNFOLD_STRING_WIDTH];
+ unfold+=unfoldRowWidth;
+
+ if(length>unfoldStringWidth) {
+ /* the string is too long to find any match */
+ return FALSE;
+ }
+
+ /* do a binary search for the string */
+ start=0;
+ limit=unfoldRows;
+ while(start<limit) {
+ i=(start+limit)/2;
+ const UChar *p=reinterpret_cast<const UChar *>(unfold+(i*unfoldRowWidth));
+ result=strcmpMax(s, length, p, unfoldStringWidth);
+
+ if(result==0) {
+ /* found the string: add each code point, and its case closure */
+ UChar32 c;
+
+ for(i=unfoldStringWidth; i<unfoldRowWidth && p[i]!=0;) {
+ U16_NEXT_UNSAFE(p, i, c);
+ sa->add(sa->set, c);
+ ucase_addCaseClosure(c, sa);
+ }
+ return TRUE;
+ } else if(result<0) {
+ limit=i;
+ } else /* result>0 */ {
+ start=i+1;
+ }
+ }
+
+ return FALSE; /* string not found */
+}
+
+U_NAMESPACE_BEGIN
+
+FullCaseFoldingIterator::FullCaseFoldingIterator()
+ : unfold(reinterpret_cast<const UChar *>(ucase_props_singleton.unfold)),
+ unfoldRows(unfold[UCASE_UNFOLD_ROWS]),
+ unfoldRowWidth(unfold[UCASE_UNFOLD_ROW_WIDTH]),
+ unfoldStringWidth(unfold[UCASE_UNFOLD_STRING_WIDTH]),
+ currentRow(0),
+ rowCpIndex(unfoldStringWidth) {
+ unfold+=unfoldRowWidth;
+}
+
+UChar32
+FullCaseFoldingIterator::next(UnicodeString &full) {
+ // Advance past the last-delivered code point.
+ const UChar *p=unfold+(currentRow*unfoldRowWidth);
+ if(rowCpIndex>=unfoldRowWidth || p[rowCpIndex]==0) {
+ ++currentRow;
+ p+=unfoldRowWidth;
+ rowCpIndex=unfoldStringWidth;
+ }
+ if(currentRow>=unfoldRows) { return U_SENTINEL; }
+ // Set "full" to the NUL-terminated string in the first unfold column.
+ int32_t length=unfoldStringWidth;
+ while(length>0 && p[length-1]==0) { --length; }
+ full.setTo(FALSE, p, length);
+ // Return the code point.
+ UChar32 c;
+ U16_NEXT_UNSAFE(p, rowCpIndex, c);
+ return c;
+}
+
+namespace LatinCase {
+
+const int8_t TO_LOWER_NORMAL[LIMIT] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+ EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1,
+
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC
+};
+
+const int8_t TO_LOWER_TR_LT[LIMIT] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32, 32, 32, 32,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32,
+ 32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, EXC, 0, 1, 0, 1, 0, EXC, 0,
+ EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1,
+
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC
+};
+
+const int8_t TO_UPPER_NORMAL[LIMIT] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC,
+ -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121,
+
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
+ 0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0,
+
+ -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1,
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC
+};
+
+const int8_t TO_UPPER_TR[LIMIT] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, -32, -32, -32, -32, -32, -32, -32, -32, EXC, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC,
+ -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121,
+
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
+ 0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0,
+
+ -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1,
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC
+};
+
+} // namespace LatinCase
+
+U_NAMESPACE_END
+
+/** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */
+U_CAPI int32_t U_EXPORT2
+ucase_getType(UChar32 c) {
+ uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
+ return UCASE_GET_TYPE(props);
+}
+
+/** @return same as ucase_getType() and set bit 2 if c is case-ignorable */
+U_CAPI int32_t U_EXPORT2
+ucase_getTypeOrIgnorable(UChar32 c) {
+ uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
+ return UCASE_GET_TYPE_AND_IGNORABLE(props);
+}
+
+/** @return UCASE_NO_DOT, UCASE_SOFT_DOTTED, UCASE_ABOVE, UCASE_OTHER_ACCENT */
+static inline int32_t
+getDotType(UChar32 c) {
+ uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
+ if(!UCASE_HAS_EXCEPTION(props)) {
+ return props&UCASE_DOT_MASK;
+ } else {
+ const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
+ return (*pe>>UCASE_EXC_DOT_SHIFT)&UCASE_DOT_MASK;
+ }
+}
+
+U_CAPI UBool U_EXPORT2
+ucase_isSoftDotted(UChar32 c) {
+ return (UBool)(getDotType(c)==UCASE_SOFT_DOTTED);
+}
+
+U_CAPI UBool U_EXPORT2
+ucase_isCaseSensitive(UChar32 c) {
+ uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
+ if(!UCASE_HAS_EXCEPTION(props)) {
+ return (UBool)((props&UCASE_SENSITIVE)!=0);
+ } else {
+ const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
+ return (UBool)((*pe&UCASE_EXC_SENSITIVE)!=0);
+ }
+}
+
+/* string casing ------------------------------------------------------------ */
+
+/*
+ * These internal functions form the core of string case mappings.
+ * They map single code points to result code points or strings and take
+ * all necessary conditions (context, locale ID, options) into account.
+ *
+ * They do not iterate over the source or write to the destination
+ * so that the same functions are useful for non-standard string storage,
+ * such as in a Replaceable (for Transliterator) or UTF-8/32 strings etc.
+ * For the same reason, the "surrounding text" context is passed in as a
+ * UCaseContextIterator which does not make any assumptions about
+ * the underlying storage.
+ *
+ * This section contains helper functions that check for conditions
+ * in the input text surrounding the current code point
+ * according to SpecialCasing.txt.
+ *
+ * Each helper function gets the index
+ * - after the current code point if it looks at following text
+ * - before the current code point if it looks at preceding text
+ *
+ * Unicode 3.2 UAX 21 "Case Mappings" defines the conditions as follows:
+ *
+ * Final_Sigma
+ * C is preceded by a sequence consisting of
+ * a cased letter and a case-ignorable sequence,
+ * and C is not followed by a sequence consisting of
+ * an ignorable sequence and then a cased letter.
+ *
+ * More_Above
+ * C is followed by one or more characters of combining class 230 (ABOVE)
+ * in the combining character sequence.
+ *
+ * After_Soft_Dotted
+ * The last preceding character with combining class of zero before C
+ * was Soft_Dotted,
+ * and there is no intervening combining character class 230 (ABOVE).
+ *
+ * Before_Dot
+ * C is followed by combining dot above (U+0307).
+ * Any sequence of characters with a combining class that is neither 0 nor 230
+ * may intervene between the current character and the combining dot above.
+ *
+ * The erratum from 2002-10-31 adds the condition
+ *
+ * After_I
+ * The last preceding base character was an uppercase I, and there is no
+ * intervening combining character class 230 (ABOVE).
+ *
+ * (See Jitterbug 2344 and the comments on After_I below.)
+ *
+ * Helper definitions in Unicode 3.2 UAX 21:
+ *
+ * D1. A character C is defined to be cased
+ * if it meets any of the following criteria:
+ *
+ * - The general category of C is Titlecase Letter (Lt)
+ * - In [CoreProps], C has one of the properties Uppercase, or Lowercase
+ * - Given D = NFD(C), then it is not the case that:
+ * D = UCD_lower(D) = UCD_upper(D) = UCD_title(D)
+ * (This third criterium does not add any characters to the list
+ * for Unicode 3.2. Ignored.)
+ *
+ * D2. A character C is defined to be case-ignorable
+ * if it meets either of the following criteria:
+ *
+ * - The general category of C is
+ * Nonspacing Mark (Mn), or Enclosing Mark (Me), or Format Control (Cf), or
+ * Letter Modifier (Lm), or Symbol Modifier (Sk)
+ * - C is one of the following characters
+ * U+0027 APOSTROPHE
+ * U+00AD SOFT HYPHEN (SHY)
+ * U+2019 RIGHT SINGLE QUOTATION MARK
+ * (the preferred character for apostrophe)
+ *
+ * D3. A case-ignorable sequence is a sequence of
+ * zero or more case-ignorable characters.
+ */
+
+#define is_d(c) ((c)=='d' || (c)=='D')
+#define is_e(c) ((c)=='e' || (c)=='E')
+#define is_i(c) ((c)=='i' || (c)=='I')
+#define is_l(c) ((c)=='l' || (c)=='L')
+#define is_r(c) ((c)=='r' || (c)=='R')
+#define is_t(c) ((c)=='t' || (c)=='T')
+#define is_u(c) ((c)=='u' || (c)=='U')
+#define is_y(c) ((c)=='y' || (c)=='Y')
+#define is_z(c) ((c)=='z' || (c)=='Z')
+
+/* separator? */
+#define is_sep(c) ((c)=='_' || (c)=='-' || (c)==0)
+
+/**
+ * Requires non-NULL locale ID but otherwise does the equivalent of
+ * checking for language codes as if uloc_getLanguage() were called:
+ * Accepts both 2- and 3-letter codes and accepts case variants.
+ */
+U_CFUNC int32_t
+ucase_getCaseLocale(const char *locale) {
+ /*
+ * This function used to use uloc_getLanguage(), but the current code
+ * removes the dependency of this low-level code on uloc implementation code
+ * and is faster because not the whole locale ID has to be
+ * examined and copied/transformed.
+ *
+ * Because this code does not want to depend on uloc, the caller must
+ * pass in a non-NULL locale, i.e., may need to call uloc_getDefault().
+ */
+ char c=*locale++;
+ // Fastpath for English "en" which is often used for default (=root locale) case mappings,
+ // and for Chinese "zh": Very common but no special case mapping behavior.
+ // Then check lowercase vs. uppercase to reduce the number of comparisons
+ // for other locales without special behavior.
+ if(c=='e') {
+ /* el or ell? */
+ c=*locale++;
+ if(is_l(c)) {
+ c=*locale++;
+ if(is_l(c)) {
+ c=*locale;
+ }
+ if(is_sep(c)) {
+ return UCASE_LOC_GREEK;
+ }
+ }
+ // en, es, ... -> root
+ } else if(c=='z') {
+ return UCASE_LOC_ROOT;
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+ } else if(c>='a') { // ASCII a-z = 0x61..0x7a, after A-Z
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+ } else if(c<='z') { // EBCDIC a-z = 0x81..0xa9 with two gaps, before A-Z
+#else
+# error Unknown charset family!
+#endif
+ // lowercase c
+ if(c=='t') {
+ /* tr or tur? */
+ c=*locale++;
+ if(is_u(c)) {
+ c=*locale++;
+ }
+ if(is_r(c)) {
+ c=*locale;
+ if(is_sep(c)) {
+ return UCASE_LOC_TURKISH;
+ }
+ }
+ } else if(c=='a') {
+ /* az or aze? */
+ c=*locale++;
+ if(is_z(c)) {
+ c=*locale++;
+ if(is_e(c)) {
+ c=*locale;
+ }
+ if(is_sep(c)) {
+ return UCASE_LOC_TURKISH;
+ }
+ }
+ } else if(c=='l') {
+ /* lt or lit? */
+ c=*locale++;
+ if(is_i(c)) {
+ c=*locale++;
+ }
+ if(is_t(c)) {
+ c=*locale;
+ if(is_sep(c)) {
+ return UCASE_LOC_LITHUANIAN;
+ }
+ }
+ } else if(c=='n') {
+ /* nl or nld? */
+ c=*locale++;
+ if(is_l(c)) {
+ c=*locale++;
+ if(is_d(c)) {
+ c=*locale;
+ }
+ if(is_sep(c)) {
+ return UCASE_LOC_DUTCH;
+ }
+ }
+ } else if(c=='h') {
+ /* hy or hye? *not* hyw */
+ c=*locale++;
+ if(is_y(c)) {
+ c=*locale++;
+ if(is_e(c)) {
+ c=*locale;
+ }
+ if(is_sep(c)) {
+ return UCASE_LOC_ARMENIAN;
+ }
+ }
+ }
+ } else {
+ // uppercase c
+ // Same code as for lowercase c but also check for 'E'.
+ if(c=='T') {
+ /* tr or tur? */
+ c=*locale++;
+ if(is_u(c)) {
+ c=*locale++;
+ }
+ if(is_r(c)) {
+ c=*locale;
+ if(is_sep(c)) {
+ return UCASE_LOC_TURKISH;
+ }
+ }
+ } else if(c=='A') {
+ /* az or aze? */
+ c=*locale++;
+ if(is_z(c)) {
+ c=*locale++;
+ if(is_e(c)) {
+ c=*locale;
+ }
+ if(is_sep(c)) {
+ return UCASE_LOC_TURKISH;
+ }
+ }
+ } else if(c=='L') {
+ /* lt or lit? */
+ c=*locale++;
+ if(is_i(c)) {
+ c=*locale++;
+ }
+ if(is_t(c)) {
+ c=*locale;
+ if(is_sep(c)) {
+ return UCASE_LOC_LITHUANIAN;
+ }
+ }
+ } else if(c=='E') {
+ /* el or ell? */
+ c=*locale++;
+ if(is_l(c)) {
+ c=*locale++;
+ if(is_l(c)) {
+ c=*locale;
+ }
+ if(is_sep(c)) {
+ return UCASE_LOC_GREEK;
+ }
+ }
+ } else if(c=='N') {
+ /* nl or nld? */
+ c=*locale++;
+ if(is_l(c)) {
+ c=*locale++;
+ if(is_d(c)) {
+ c=*locale;
+ }
+ if(is_sep(c)) {
+ return UCASE_LOC_DUTCH;
+ }
+ }
+ } else if(c=='H') {
+ /* hy or hye? *not* hyw */
+ c=*locale++;
+ if(is_y(c)) {
+ c=*locale++;
+ if(is_e(c)) {
+ c=*locale;
+ }
+ if(is_sep(c)) {
+ return UCASE_LOC_ARMENIAN;
+ }
+ }
+ }
+ }
+ return UCASE_LOC_ROOT;
+}
+
+/*
+ * Is followed by
+ * {case-ignorable}* cased
+ * ?
+ * (dir determines looking forward/backward)
+ * If a character is case-ignorable, it is skipped regardless of whether
+ * it is also cased or not.
+ */
+static UBool
+isFollowedByCasedLetter(UCaseContextIterator *iter, void *context, int8_t dir) {
+ UChar32 c;
+
+ if(iter==NULL) {
+ return FALSE;
+ }
+
+ for(/* dir!=0 sets direction */; (c=iter(context, dir))>=0; dir=0) {
+ int32_t type=ucase_getTypeOrIgnorable(c);
+ if(type&4) {
+ /* case-ignorable, continue with the loop */
+ } else if(type!=UCASE_NONE) {
+ return TRUE; /* followed by cased letter */
+ } else {
+ return FALSE; /* uncased and not case-ignorable */
+ }
+ }
+
+ return FALSE; /* not followed by cased letter */
+}
+
+/* Is preceded by Soft_Dotted character with no intervening cc=230 ? */
+static UBool
+isPrecededBySoftDotted(UCaseContextIterator *iter, void *context) {
+ UChar32 c;
+ int32_t dotType;
+ int8_t dir;
+
+ if(iter==NULL) {
+ return FALSE;
+ }
+
+ for(dir=-1; (c=iter(context, dir))>=0; dir=0) {
+ dotType=getDotType(c);
+ if(dotType==UCASE_SOFT_DOTTED) {
+ return TRUE; /* preceded by TYPE_i */
+ } else if(dotType!=UCASE_OTHER_ACCENT) {
+ return FALSE; /* preceded by different base character (not TYPE_i), or intervening cc==230 */
+ }
+ }
+
+ return FALSE; /* not preceded by TYPE_i */
+}
+
+/*
+ * See Jitterbug 2344:
+ * The condition After_I for Turkic-lowercasing of U+0307 combining dot above
+ * is checked in ICU 2.0, 2.1, 2.6 but was not in 2.2 & 2.4 because
+ * we made those releases compatible with Unicode 3.2 which had not fixed
+ * a related bug in SpecialCasing.txt.
+ *
+ * From the Jitterbug 2344 text:
+ * ... this bug is listed as a Unicode erratum
+ * from 2002-10-31 at http://www.unicode.org/uni2errata/UnicodeErrata.html
+ * <quote>
+ * There are two errors in SpecialCasing.txt.
+ * 1. Missing semicolons on two lines. ... [irrelevant for ICU]
+ * 2. An incorrect context definition. Correct as follows:
+ * < 0307; ; 0307; 0307; tr After_Soft_Dotted; # COMBINING DOT ABOVE
+ * < 0307; ; 0307; 0307; az After_Soft_Dotted; # COMBINING DOT ABOVE
+ * ---
+ * > 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
+ * > 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
+ * where the context After_I is defined as:
+ * The last preceding base character was an uppercase I, and there is no
+ * intervening combining character class 230 (ABOVE).
+ * </quote>
+ *
+ * Note that SpecialCasing.txt even in Unicode 3.2 described the condition as:
+ *
+ * # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
+ * # This matches the behavior of the canonically equivalent I-dot_above
+ *
+ * See also the description in this place in older versions of uchar.c (revision 1.100).
+ *
+ * Markus W. Scherer 2003-feb-15
+ */
+
+/* Is preceded by base character 'I' with no intervening cc=230 ? */
+static UBool
+isPrecededBy_I(UCaseContextIterator *iter, void *context) {
+ UChar32 c;
+ int32_t dotType;
+ int8_t dir;
+
+ if(iter==NULL) {
+ return FALSE;
+ }
+
+ for(dir=-1; (c=iter(context, dir))>=0; dir=0) {
+ if(c==0x49) {
+ return TRUE; /* preceded by I */
+ }
+ dotType=getDotType(c);
+ if(dotType!=UCASE_OTHER_ACCENT) {
+ return FALSE; /* preceded by different base character (not I), or intervening cc==230 */
+ }
+ }
+
+ return FALSE; /* not preceded by I */
+}
+
+/* Is followed by one or more cc==230 ? */
+static UBool
+isFollowedByMoreAbove(UCaseContextIterator *iter, void *context) {
+ UChar32 c;
+ int32_t dotType;
+ int8_t dir;
+
+ if(iter==NULL) {
+ return FALSE;
+ }
+
+ for(dir=1; (c=iter(context, dir))>=0; dir=0) {
+ dotType=getDotType(c);
+ if(dotType==UCASE_ABOVE) {
+ return TRUE; /* at least one cc==230 following */
+ } else if(dotType!=UCASE_OTHER_ACCENT) {
+ return FALSE; /* next base character, no more cc==230 following */
+ }
+ }
+
+ return FALSE; /* no more cc==230 following */
+}
+
+/* Is followed by a dot above (without cc==230 in between) ? */
+static UBool
+isFollowedByDotAbove(UCaseContextIterator *iter, void *context) {
+ UChar32 c;
+ int32_t dotType;
+ int8_t dir;
+
+ if(iter==NULL) {
+ return FALSE;
+ }
+
+ for(dir=1; (c=iter(context, dir))>=0; dir=0) {
+ if(c==0x307) {
+ return TRUE;
+ }
+ dotType=getDotType(c);
+ if(dotType!=UCASE_OTHER_ACCENT) {
+ return FALSE; /* next base character or cc==230 in between */
+ }
+ }
+
+ return FALSE; /* no dot above following */
+}
+
+U_CAPI int32_t U_EXPORT2
+ucase_toFullLower(UChar32 c,
+ UCaseContextIterator *iter, void *context,
+ const UChar **pString,
+ int32_t loc) {
+ // The sign of the result has meaning, input must be non-negative so that it can be returned as is.
+ U_ASSERT(c >= 0);
+ UChar32 result=c;
+ uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
+ if(!UCASE_HAS_EXCEPTION(props)) {
+ if(UCASE_IS_UPPER_OR_TITLE(props)) {
+ result=c+UCASE_GET_DELTA(props);
+ }
+ } else {
+ const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2;
+ uint16_t excWord=*pe++;
+ int32_t full;
+
+ pe2=pe;
+
+ if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) {
+ /* use hardcoded conditions and mappings */
+
+ /*
+ * Test for conditional mappings first
+ * (otherwise the unconditional default mappings are always taken),
+ * then test for characters that have unconditional mappings in SpecialCasing.txt,
+ * then get the UnicodeData.txt mappings.
+ */
+ if( loc==UCASE_LOC_LITHUANIAN &&
+ /* base characters, find accents above */
+ (((c==0x49 || c==0x4a || c==0x12e) &&
+ isFollowedByMoreAbove(iter, context)) ||
+ /* precomposed with accent above, no need to find one */
+ (c==0xcc || c==0xcd || c==0x128))
+ ) {
+ /*
+ # Lithuanian
+
+ # Lithuanian retains the dot in a lowercase i when followed by accents.
+
+ # Introduce an explicit dot above when lowercasing capital I's and J's
+ # whenever there are more accents above.
+ # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
+
+ 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
+ 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
+ 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
+ 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
+ 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
+ 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
+ */
+ switch(c) {
+ case 0x49: /* LATIN CAPITAL LETTER I */
+ *pString=iDot;
+ return 2;
+ case 0x4a: /* LATIN CAPITAL LETTER J */
+ *pString=jDot;
+ return 2;
+ case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */
+ *pString=iOgonekDot;
+ return 2;
+ case 0xcc: /* LATIN CAPITAL LETTER I WITH GRAVE */
+ *pString=iDotGrave;
+ return 3;
+ case 0xcd: /* LATIN CAPITAL LETTER I WITH ACUTE */
+ *pString=iDotAcute;
+ return 3;
+ case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */
+ *pString=iDotTilde;
+ return 3;
+ default:
+ return 0; /* will not occur */
+ }
+ /* # Turkish and Azeri */
+ } else if(loc==UCASE_LOC_TURKISH && c==0x130) {
+ /*
+ # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
+ # The following rules handle those cases.
+
+ 0130; 0069; 0130; 0130; tr # LATIN CAPITAL LETTER I WITH DOT ABOVE
+ 0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE
+ */
+ return 0x69;
+ } else if(loc==UCASE_LOC_TURKISH && c==0x307 && isPrecededBy_I(iter, context)) {
+ /*
+ # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
+ # This matches the behavior of the canonically equivalent I-dot_above
+
+ 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
+ 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
+ */
+ *pString=nullptr;
+ return 0; /* remove the dot (continue without output) */
+ } else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter, context)) {
+ /*
+ # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
+
+ 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
+ 0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I
+ */
+ return 0x131;
+ } else if(c==0x130) {
+ /*
+ # Preserve canonical equivalence for I with dot. Turkic is handled below.
+
+ 0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
+ */
+ *pString=iDot;
+ return 2;
+ } else if( c==0x3a3 &&
+ !isFollowedByCasedLetter(iter, context, 1) &&
+ isFollowedByCasedLetter(iter, context, -1) /* -1=preceded */
+ ) {
+ /* greek capital sigma maps depending on surrounding cased letters (see SpecialCasing.txt) */
+ /*
+ # Special case for final form of sigma
+
+ 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
+ */
+ return 0x3c2; /* greek small final sigma */
+ } else {
+ /* no known conditional special case mapping, use a normal mapping */
+ }
+ } else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
+ GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full);
+ full&=UCASE_FULL_LOWER;
+ if(full!=0) {
+ /* set the output pointer to the lowercase mapping */
+ *pString=reinterpret_cast<const UChar *>(pe+1);
+
+ /* return the string length */
+ return full;
+ }
+ }
+
+ if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
+ int32_t delta;
+ GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe2, delta);
+ return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
+ }
+ if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
+ GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe2, result);
+ }
+ }
+
+ return (result==c) ? ~result : result;
+}
+
+/* internal */
+static int32_t
+toUpperOrTitle(UChar32 c,
+ UCaseContextIterator *iter, void *context,
+ const UChar **pString,
+ int32_t loc,
+ UBool upperNotTitle) {
+ // The sign of the result has meaning, input must be non-negative so that it can be returned as is.
+ U_ASSERT(c >= 0);
+ UChar32 result=c;
+ uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
+ if(!UCASE_HAS_EXCEPTION(props)) {
+ if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
+ result=c+UCASE_GET_DELTA(props);
+ }
+ } else {
+ const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2;
+ uint16_t excWord=*pe++;
+ int32_t full, idx;
+
+ pe2=pe;
+
+ if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) {
+ /* use hardcoded conditions and mappings */
+ if(loc==UCASE_LOC_TURKISH && c==0x69) {
+ /*
+ # Turkish and Azeri
+
+ # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
+ # The following rules handle those cases.
+
+ # When uppercasing, i turns into a dotted capital I
+
+ 0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
+ 0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I
+ */
+ return 0x130;
+ } else if(loc==UCASE_LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(iter, context)) {
+ /*
+ # Lithuanian
+
+ # Lithuanian retains the dot in a lowercase i when followed by accents.
+
+ # Remove DOT ABOVE after "i" with upper or titlecase
+
+ 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
+ */
+ *pString=nullptr;
+ return 0; /* remove the dot (continue without output) */
+ } else if(c==0x0587) {
+ // See ICU-13416:
+ // Ö‡ ligature ech-yiwn
+ // uppercases to ÔµÕ’=ech+yiwn by default and in Western Armenian,
+ // but to ÔµÕŽ=ech+vew in Eastern Armenian.
+ if(loc==UCASE_LOC_ARMENIAN) {
+ *pString=upperNotTitle ? u"ÔµÕŽ" : u"ÔµÕ¾";
+ } else {
+ *pString=upperNotTitle ? u"ÔµÕ’" : u"ÔµÖ‚";
+ }
+ return 2;
+ } else {
+ /* no known conditional special case mapping, use a normal mapping */
+ }
+ } else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
+ GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full);
+
+ /* start of full case mapping strings */
+ ++pe;
+
+ /* skip the lowercase and case-folding result strings */
+ pe+=full&UCASE_FULL_LOWER;
+ full>>=4;
+ pe+=full&0xf;
+ full>>=4;
+
+ if(upperNotTitle) {
+ full&=0xf;
+ } else {
+ /* skip the uppercase result string */
+ pe+=full&0xf;
+ full=(full>>4)&0xf;
+ }
+
+ if(full!=0) {
+ /* set the output pointer to the result string */
+ *pString=reinterpret_cast<const UChar *>(pe);
+
+ /* return the string length */
+ return full;
+ }
+ }
+
+ if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) {
+ int32_t delta;
+ GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe2, delta);
+ return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
+ }
+ if(!upperNotTitle && HAS_SLOT(excWord, UCASE_EXC_TITLE)) {
+ idx=UCASE_EXC_TITLE;
+ } else if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
+ /* here, titlecase is same as uppercase */
+ idx=UCASE_EXC_UPPER;
+ } else {
+ return ~c;
+ }
+ GET_SLOT_VALUE(excWord, idx, pe2, result);
+ }
+
+ return (result==c) ? ~result : result;
+}
+
+U_CAPI int32_t U_EXPORT2
+ucase_toFullUpper(UChar32 c,
+ UCaseContextIterator *iter, void *context,
+ const UChar **pString,
+ int32_t caseLocale) {
+ return toUpperOrTitle(c, iter, context, pString, caseLocale, TRUE);
+}
+
+U_CAPI int32_t U_EXPORT2
+ucase_toFullTitle(UChar32 c,
+ UCaseContextIterator *iter, void *context,
+ const UChar **pString,
+ int32_t caseLocale) {
+ return toUpperOrTitle(c, iter, context, pString, caseLocale, FALSE);
+}
+
+/* case folding ------------------------------------------------------------- */
+
+/*
+ * Case folding is similar to lowercasing.
+ * The result may be a simple mapping, i.e., a single code point, or
+ * a full mapping, i.e., a string.
+ * If the case folding for a code point is the same as its simple (1:1) lowercase mapping,
+ * then only the lowercase mapping is stored.
+ *
+ * Some special cases are hardcoded because their conditions cannot be
+ * parsed and processed from CaseFolding.txt.
+ *
+ * Unicode 3.2 CaseFolding.txt specifies for its status field:
+
+# C: common case folding, common mappings shared by both simple and full mappings.
+# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
+# S: simple case folding, mappings to single characters where different from F.
+# T: special case for uppercase I and dotted uppercase I
+# - For non-Turkic languages, this mapping is normally not used.
+# - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
+#
+# Usage:
+# A. To do a simple case folding, use the mappings with status C + S.
+# B. To do a full case folding, use the mappings with status C + F.
+#
+# The mappings with status T can be used or omitted depending on the desired case-folding
+# behavior. (The default option is to exclude them.)
+
+ * Unicode 3.2 has 'T' mappings as follows:
+
+0049; T; 0131; # LATIN CAPITAL LETTER I
+0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
+
+ * while the default mappings for these code points are:
+
+0049; C; 0069; # LATIN CAPITAL LETTER I
+0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
+
+ * U+0130 has no simple case folding (simple-case-folds to itself).
+ */
+
+/* return the simple case folding mapping for c */
+U_CAPI UChar32 U_EXPORT2
+ucase_fold(UChar32 c, uint32_t options) {
+ uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
+ if(!UCASE_HAS_EXCEPTION(props)) {
+ if(UCASE_IS_UPPER_OR_TITLE(props)) {
+ c+=UCASE_GET_DELTA(props);
+ }
+ } else {
+ const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
+ uint16_t excWord=*pe++;
+ int32_t idx;
+ if(excWord&UCASE_EXC_CONDITIONAL_FOLD) {
+ /* special case folding mappings, hardcoded */
+ if((options&_FOLD_CASE_OPTIONS_MASK)==U_FOLD_CASE_DEFAULT) {
+ /* default mappings */
+ if(c==0x49) {
+ /* 0049; C; 0069; # LATIN CAPITAL LETTER I */
+ return 0x69;
+ } else if(c==0x130) {
+ /* no simple case folding for U+0130 */
+ return c;
+ }
+ } else {
+ /* Turkic mappings */
+ if(c==0x49) {
+ /* 0049; T; 0131; # LATIN CAPITAL LETTER I */
+ return 0x131;
+ } else if(c==0x130) {
+ /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
+ return 0x69;
+ }
+ }
+ }
+ if((excWord&UCASE_EXC_NO_SIMPLE_CASE_FOLDING)!=0) {
+ return c;
+ }
+ if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
+ int32_t delta;
+ GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
+ return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
+ }
+ if(HAS_SLOT(excWord, UCASE_EXC_FOLD)) {
+ idx=UCASE_EXC_FOLD;
+ } else if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
+ idx=UCASE_EXC_LOWER;
+ } else {
+ return c;
+ }
+ GET_SLOT_VALUE(excWord, idx, pe, c);
+ }
+ return c;
+}
+
+/*
+ * Issue for canonical caseless match (UAX #21):
+ * Turkic casefolding (using "T" mappings in CaseFolding.txt) does not preserve
+ * canonical equivalence, unlike default-option casefolding.
+ * For example, I-grave and I + grave fold to strings that are not canonically
+ * equivalent.
+ * For more details, see the comment in unorm_compare() in unorm.cpp
+ * and the intermediate prototype changes for Jitterbug 2021.
+ * (For example, revision 1.104 of uchar.c and 1.4 of CaseFolding.txt.)
+ *
+ * This did not get fixed because it appears that it is not possible to fix
+ * it for uppercase and lowercase characters (I-grave vs. i-grave)
+ * together in a way that they still fold to common result strings.
+ */
+
+U_CAPI int32_t U_EXPORT2
+ucase_toFullFolding(UChar32 c,
+ const UChar **pString,
+ uint32_t options) {
+ // The sign of the result has meaning, input must be non-negative so that it can be returned as is.
+ U_ASSERT(c >= 0);
+ UChar32 result=c;
+ uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
+ if(!UCASE_HAS_EXCEPTION(props)) {
+ if(UCASE_IS_UPPER_OR_TITLE(props)) {
+ result=c+UCASE_GET_DELTA(props);
+ }
+ } else {
+ const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2;
+ uint16_t excWord=*pe++;
+ int32_t full, idx;
+
+ pe2=pe;
+
+ if(excWord&UCASE_EXC_CONDITIONAL_FOLD) {
+ /* use hardcoded conditions and mappings */
+ if((options&_FOLD_CASE_OPTIONS_MASK)==U_FOLD_CASE_DEFAULT) {
+ /* default mappings */
+ if(c==0x49) {
+ /* 0049; C; 0069; # LATIN CAPITAL LETTER I */
+ return 0x69;
+ } else if(c==0x130) {
+ /* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
+ *pString=iDot;
+ return 2;
+ }
+ } else {
+ /* Turkic mappings */
+ if(c==0x49) {
+ /* 0049; T; 0131; # LATIN CAPITAL LETTER I */
+ return 0x131;
+ } else if(c==0x130) {
+ /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
+ return 0x69;
+ }
+ }
+ } else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
+ GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full);
+
+ /* start of full case mapping strings */
+ ++pe;
+
+ /* skip the lowercase result string */
+ pe+=full&UCASE_FULL_LOWER;
+ full=(full>>4)&0xf;
+
+ if(full!=0) {
+ /* set the output pointer to the result string */
+ *pString=reinterpret_cast<const UChar *>(pe);
+
+ /* return the string length */
+ return full;
+ }
+ }
+
+ if((excWord&UCASE_EXC_NO_SIMPLE_CASE_FOLDING)!=0) {
+ return ~c;
+ }
+ if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
+ int32_t delta;
+ GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe2, delta);
+ return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
+ }
+ if(HAS_SLOT(excWord, UCASE_EXC_FOLD)) {
+ idx=UCASE_EXC_FOLD;
+ } else if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
+ idx=UCASE_EXC_LOWER;
+ } else {
+ return ~c;
+ }
+ GET_SLOT_VALUE(excWord, idx, pe2, result);
+ }
+
+ return (result==c) ? ~result : result;
+}
+
+/* case mapping properties API ---------------------------------------------- */
+
+/* public API (see uchar.h) */
+
+U_CAPI UBool U_EXPORT2
+u_isULowercase(UChar32 c) {
+ return (UBool)(UCASE_LOWER==ucase_getType(c));
+}
+
+U_CAPI UBool U_EXPORT2
+u_isUUppercase(UChar32 c) {
+ return (UBool)(UCASE_UPPER==ucase_getType(c));
+}
+
+/* Transforms the Unicode character to its lower case equivalent.*/
+U_CAPI UChar32 U_EXPORT2
+u_tolower(UChar32 c) {
+ return ucase_tolower(c);
+}
+
+/* Transforms the Unicode character to its upper case equivalent.*/
+U_CAPI UChar32 U_EXPORT2
+u_toupper(UChar32 c) {
+ return ucase_toupper(c);
+}
+
+/* Transforms the Unicode character to its title case equivalent.*/
+U_CAPI UChar32 U_EXPORT2
+u_totitle(UChar32 c) {
+ return ucase_totitle(c);
+}
+
+/* return the simple case folding mapping for c */
+U_CAPI UChar32 U_EXPORT2
+u_foldCase(UChar32 c, uint32_t options) {
+ return ucase_fold(c, options);
+}
+
+U_CFUNC int32_t U_EXPORT2
+ucase_hasBinaryProperty(UChar32 c, UProperty which) {
+ /* case mapping properties */
+ const UChar *resultString;
+ switch(which) {
+ case UCHAR_LOWERCASE:
+ return (UBool)(UCASE_LOWER==ucase_getType(c));
+ case UCHAR_UPPERCASE:
+ return (UBool)(UCASE_UPPER==ucase_getType(c));
+ case UCHAR_SOFT_DOTTED:
+ return ucase_isSoftDotted(c);
+ case UCHAR_CASE_SENSITIVE:
+ return ucase_isCaseSensitive(c);
+ case UCHAR_CASED:
+ return (UBool)(UCASE_NONE!=ucase_getType(c));
+ case UCHAR_CASE_IGNORABLE:
+ return (UBool)(ucase_getTypeOrIgnorable(c)>>2);
+ /*
+ * Note: The following Changes_When_Xyz are defined as testing whether
+ * the NFD form of the input changes when Xyz-case-mapped.
+ * However, this simpler implementation of these properties,
+ * ignoring NFD, passes the tests.
+ * The implementation needs to be changed if the tests start failing.
+ * When that happens, optimizations should be used to work with the
+ * per-single-code point ucase_toFullXyz() functions unless
+ * the NFD form has more than one code point,
+ * and the property starts set needs to be the union of the
+ * start sets for normalization and case mappings.
+ */
+ case UCHAR_CHANGES_WHEN_LOWERCASED:
+ return (UBool)(ucase_toFullLower(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
+ case UCHAR_CHANGES_WHEN_UPPERCASED:
+ return (UBool)(ucase_toFullUpper(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
+ case UCHAR_CHANGES_WHEN_TITLECASED:
+ return (UBool)(ucase_toFullTitle(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
+ /* case UCHAR_CHANGES_WHEN_CASEFOLDED: -- in uprops.c */
+ case UCHAR_CHANGES_WHEN_CASEMAPPED:
+ return (UBool)(
+ ucase_toFullLower(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 ||
+ ucase_toFullUpper(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 ||
+ ucase_toFullTitle(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
+ default:
+ return FALSE;
+ }
+}
diff --git a/thirdparty/icu4c/common/ucase.h b/thirdparty/icu4c/common/ucase.h
new file mode 100644
index 0000000000..a018f82b81
--- /dev/null
+++ b/thirdparty/icu4c/common/ucase.h
@@ -0,0 +1,445 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2004-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: ucase.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2004aug30
+* created by: Markus W. Scherer
+*
+* Low-level Unicode character/string case mapping code.
+*/
+
+#ifndef __UCASE_H__
+#define __UCASE_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uset.h"
+#include "putilimp.h"
+#include "uset_imp.h"
+#include "udataswp.h"
+#include "utrie2.h"
+
+#ifdef __cplusplus
+U_NAMESPACE_BEGIN
+
+class UnicodeString;
+
+U_NAMESPACE_END
+#endif
+
+/* library API -------------------------------------------------------------- */
+
+U_CFUNC void U_EXPORT2
+ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);
+
+/**
+ * Requires non-NULL locale ID but otherwise does the equivalent of
+ * checking for language codes as if uloc_getLanguage() were called:
+ * Accepts both 2- and 3-letter codes and accepts case variants.
+ */
+U_CFUNC int32_t
+ucase_getCaseLocale(const char *locale);
+
+/* Casing locale types for ucase_getCaseLocale */
+enum {
+ UCASE_LOC_UNKNOWN,
+ UCASE_LOC_ROOT,
+ UCASE_LOC_TURKISH,
+ UCASE_LOC_LITHUANIAN,
+ UCASE_LOC_GREEK,
+ UCASE_LOC_DUTCH,
+ UCASE_LOC_ARMENIAN
+};
+
+/**
+ * Bit mask for getting just the options from a string compare options word
+ * that are relevant for case-insensitive string comparison.
+ * See stringoptions.h. Also include _STRNCMP_STYLE and U_COMPARE_CODE_POINT_ORDER.
+ * @internal
+ */
+#define _STRCASECMP_OPTIONS_MASK 0xffff
+
+/**
+ * Bit mask for getting just the options from a string compare options word
+ * that are relevant for case folding (of a single string or code point).
+ *
+ * Currently only bit 0 for U_FOLD_CASE_EXCLUDE_SPECIAL_I.
+ * It is conceivable that at some point we might use one more bit for using uppercase sharp s.
+ * It is conceivable that at some point we might want the option to use only simple case foldings
+ * when operating on strings.
+ *
+ * See stringoptions.h.
+ * @internal
+ */
+#define _FOLD_CASE_OPTIONS_MASK 7
+
+/* single-code point functions */
+
+U_CAPI UChar32 U_EXPORT2
+ucase_tolower(UChar32 c);
+
+U_CAPI UChar32 U_EXPORT2
+ucase_toupper(UChar32 c);
+
+U_CAPI UChar32 U_EXPORT2
+ucase_totitle(UChar32 c);
+
+U_CAPI UChar32 U_EXPORT2
+ucase_fold(UChar32 c, uint32_t options);
+
+/**
+ * Adds all simple case mappings and the full case folding for c to sa,
+ * and also adds special case closure mappings.
+ * c itself is not added.
+ * For example, the mappings
+ * - for s include long s
+ * - for sharp s include ss
+ * - for k include the Kelvin sign
+ */
+U_CFUNC void U_EXPORT2
+ucase_addCaseClosure(UChar32 c, const USetAdder *sa);
+
+/**
+ * Maps the string to single code points and adds the associated case closure
+ * mappings.
+ * The string is mapped to code points if it is their full case folding string.
+ * In other words, this performs a reverse full case folding and then
+ * adds the case closure items of the resulting code points.
+ * If the string is found and its closure applied, then
+ * the string itself is added as well as part of its code points' closure.
+ * It must be length>=0.
+ *
+ * @return true if the string was found
+ */
+U_CFUNC UBool U_EXPORT2
+ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa);
+
+#ifdef __cplusplus
+U_NAMESPACE_BEGIN
+
+/**
+ * Iterator over characters with more than one code point in the full default Case_Folding.
+ */
+class U_COMMON_API FullCaseFoldingIterator {
+public:
+ /** Constructor. */
+ FullCaseFoldingIterator();
+ /**
+ * Returns the next (cp, full) pair where "full" is cp's full default Case_Folding.
+ * Returns a negative cp value at the end of the iteration.
+ */
+ UChar32 next(UnicodeString &full);
+private:
+ FullCaseFoldingIterator(const FullCaseFoldingIterator &); // no copy
+ FullCaseFoldingIterator &operator=(const FullCaseFoldingIterator &); // no assignment
+
+ const UChar *unfold;
+ int32_t unfoldRows;
+ int32_t unfoldRowWidth;
+ int32_t unfoldStringWidth;
+ int32_t currentRow;
+ int32_t rowCpIndex;
+};
+
+/**
+ * Fast case mapping data for ASCII/Latin.
+ * Linear arrays of delta bytes: 0=no mapping; EXC=exception.
+ * Deltas must not cross the ASCII boundary, or else they cannot be easily used
+ * in simple UTF-8 code.
+ */
+namespace LatinCase {
+
+/** Case mapping/folding data for code points up to U+017F. */
+constexpr UChar LIMIT = 0x180;
+/** U+017F case-folds and uppercases crossing the ASCII boundary. */
+constexpr UChar LONG_S = 0x17f;
+/** Exception: Complex mapping, or too-large delta. */
+constexpr int8_t EXC = -0x80;
+
+/** Deltas for lowercasing for most locales, and default case folding. */
+extern const int8_t TO_LOWER_NORMAL[LIMIT];
+/** Deltas for lowercasing for tr/az/lt, and Turkic case folding. */
+extern const int8_t TO_LOWER_TR_LT[LIMIT];
+
+/** Deltas for uppercasing for most locales. */
+extern const int8_t TO_UPPER_NORMAL[LIMIT];
+/** Deltas for uppercasing for tr/az. */
+extern const int8_t TO_UPPER_TR[LIMIT];
+
+} // namespace LatinCase
+
+U_NAMESPACE_END
+#endif
+
+/** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */
+U_CAPI int32_t U_EXPORT2
+ucase_getType(UChar32 c);
+
+/** @return like ucase_getType() but also sets UCASE_IGNORABLE if c is case-ignorable */
+U_CAPI int32_t U_EXPORT2
+ucase_getTypeOrIgnorable(UChar32 c);
+
+U_CAPI UBool U_EXPORT2
+ucase_isSoftDotted(UChar32 c);
+
+U_CAPI UBool U_EXPORT2
+ucase_isCaseSensitive(UChar32 c);
+
+/* string case mapping functions */
+
+U_CDECL_BEGIN
+
+/**
+ * Iterator function for string case mappings, which need to look at the
+ * context (surrounding text) of a given character for conditional mappings.
+ *
+ * The iterator only needs to go backward or forward away from the
+ * character in question. It does not use any indexes on this interface.
+ * It does not support random access or an arbitrary change of
+ * iteration direction.
+ *
+ * The code point being case-mapped itself is never returned by
+ * this iterator.
+ *
+ * @param context A pointer to the iterator's working data.
+ * @param dir If <0 then start iterating backward from the character;
+ * if >0 then start iterating forward from the character;
+ * if 0 then continue iterating in the current direction.
+ * @return Next code point, or <0 when the iteration is done.
+ */
+typedef UChar32 U_CALLCONV
+UCaseContextIterator(void *context, int8_t dir);
+
+/**
+ * Sample struct which may be used by some implementations of
+ * UCaseContextIterator.
+ */
+struct UCaseContext {
+ void *p;
+ int32_t start, index, limit;
+ int32_t cpStart, cpLimit;
+ int8_t dir;
+ int8_t b1, b2, b3;
+};
+typedef struct UCaseContext UCaseContext;
+
+U_CDECL_END
+
+#define UCASECONTEXT_INITIALIZER { NULL, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+
+enum {
+ /**
+ * For string case mappings, a single character (a code point) is mapped
+ * either to itself (in which case in-place mapping functions do nothing),
+ * or to another single code point, or to a string.
+ * Aside from the string contents, these are indicated with a single int32_t
+ * value as follows:
+ *
+ * Mapping to self: Negative values (~self instead of -self to support U+0000)
+ *
+ * Mapping to another code point: Positive values >UCASE_MAX_STRING_LENGTH
+ *
+ * Mapping to a string: The string length (0..UCASE_MAX_STRING_LENGTH) is
+ * returned. Note that the string result may indeed have zero length.
+ */
+ UCASE_MAX_STRING_LENGTH=0x1f
+};
+
+/**
+ * Get the full lowercase mapping for c.
+ *
+ * @param csp Case mapping properties.
+ * @param c Character to be mapped.
+ * @param iter Character iterator, used for context-sensitive mappings.
+ * See UCaseContextIterator for details.
+ * If iter==NULL then a context-independent result is returned.
+ * @param context Pointer to be passed into iter.
+ * @param pString If the mapping result is a string, then the pointer is
+ * written to *pString.
+ * @param caseLocale Case locale value from ucase_getCaseLocale().
+ * @return Output code point or string length, see UCASE_MAX_STRING_LENGTH.
+ *
+ * @see UCaseContextIterator
+ * @see UCASE_MAX_STRING_LENGTH
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+ucase_toFullLower(UChar32 c,
+ UCaseContextIterator *iter, void *context,
+ const UChar **pString,
+ int32_t caseLocale);
+
+U_CAPI int32_t U_EXPORT2
+ucase_toFullUpper(UChar32 c,
+ UCaseContextIterator *iter, void *context,
+ const UChar **pString,
+ int32_t caseLocale);
+
+U_CAPI int32_t U_EXPORT2
+ucase_toFullTitle(UChar32 c,
+ UCaseContextIterator *iter, void *context,
+ const UChar **pString,
+ int32_t caseLocale);
+
+U_CAPI int32_t U_EXPORT2
+ucase_toFullFolding(UChar32 c,
+ const UChar **pString,
+ uint32_t options);
+
+U_CFUNC int32_t U_EXPORT2
+ucase_hasBinaryProperty(UChar32 c, UProperty which);
+
+
+U_CDECL_BEGIN
+
+/**
+ * @internal
+ */
+typedef int32_t U_CALLCONV
+UCaseMapFull(UChar32 c,
+ UCaseContextIterator *iter, void *context,
+ const UChar **pString,
+ int32_t caseLocale);
+
+U_CDECL_END
+
+/* file definitions --------------------------------------------------------- */
+
+#define UCASE_DATA_NAME "ucase"
+#define UCASE_DATA_TYPE "icu"
+
+/* format "cAsE" */
+#define UCASE_FMT_0 0x63
+#define UCASE_FMT_1 0x41
+#define UCASE_FMT_2 0x53
+#define UCASE_FMT_3 0x45
+
+/* indexes into indexes[] */
+enum {
+ UCASE_IX_INDEX_TOP,
+ UCASE_IX_LENGTH,
+ UCASE_IX_TRIE_SIZE,
+ UCASE_IX_EXC_LENGTH,
+ UCASE_IX_UNFOLD_LENGTH,
+
+ UCASE_IX_MAX_FULL_LENGTH=15,
+ UCASE_IX_TOP=16
+};
+
+/* definitions for 16-bit case properties word ------------------------------ */
+
+U_CFUNC const UTrie2 * U_EXPORT2
+ucase_getTrie();
+
+/* 2-bit constants for types of cased characters */
+#define UCASE_TYPE_MASK 3
+enum {
+ UCASE_NONE,
+ UCASE_LOWER,
+ UCASE_UPPER,
+ UCASE_TITLE
+};
+
+#define UCASE_GET_TYPE(props) ((props)&UCASE_TYPE_MASK)
+#define UCASE_GET_TYPE_AND_IGNORABLE(props) ((props)&7)
+
+#define UCASE_IS_UPPER_OR_TITLE(props) ((props)&2)
+
+#define UCASE_IGNORABLE 4
+#define UCASE_EXCEPTION 8
+#define UCASE_SENSITIVE 0x10
+
+#define UCASE_HAS_EXCEPTION(props) ((props)&UCASE_EXCEPTION)
+
+#define UCASE_DOT_MASK 0x60
+enum {
+ UCASE_NO_DOT=0, /* normal characters with cc=0 */
+ UCASE_SOFT_DOTTED=0x20, /* soft-dotted characters with cc=0 */
+ UCASE_ABOVE=0x40, /* "above" accents with cc=230 */
+ UCASE_OTHER_ACCENT=0x60 /* other accent character (0<cc!=230) */
+};
+
+/* no exception: bits 15..7 are a 9-bit signed case mapping delta */
+#define UCASE_DELTA_SHIFT 7
+#define UCASE_DELTA_MASK 0xff80
+#define UCASE_MAX_DELTA 0xff
+#define UCASE_MIN_DELTA (-UCASE_MAX_DELTA-1)
+
+#if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
+# define UCASE_GET_DELTA(props) ((int16_t)(props)>>UCASE_DELTA_SHIFT)
+#else
+# define UCASE_GET_DELTA(props) (int16_t)(((props)&0x8000) ? (((props)>>UCASE_DELTA_SHIFT)|0xfe00) : ((uint16_t)(props)>>UCASE_DELTA_SHIFT))
+#endif
+
+/* exception: bits 15..4 are an unsigned 12-bit index into the exceptions array */
+#define UCASE_EXC_SHIFT 4
+#define UCASE_EXC_MASK 0xfff0
+#define UCASE_MAX_EXCEPTIONS ((UCASE_EXC_MASK>>UCASE_EXC_SHIFT)+1)
+
+/* definitions for 16-bit main exceptions word ------------------------------ */
+
+/* first 8 bits indicate values in optional slots */
+enum {
+ UCASE_EXC_LOWER,
+ UCASE_EXC_FOLD,
+ UCASE_EXC_UPPER,
+ UCASE_EXC_TITLE,
+ UCASE_EXC_DELTA,
+ UCASE_EXC_5, /* reserved */
+ UCASE_EXC_CLOSURE,
+ UCASE_EXC_FULL_MAPPINGS,
+ UCASE_EXC_ALL_SLOTS /* one past the last slot */
+};
+
+/* each slot is 2 uint16_t instead of 1 */
+#define UCASE_EXC_DOUBLE_SLOTS 0x100
+
+enum {
+ UCASE_EXC_NO_SIMPLE_CASE_FOLDING=0x200,
+ UCASE_EXC_DELTA_IS_NEGATIVE=0x400,
+ UCASE_EXC_SENSITIVE=0x800
+};
+
+/* UCASE_EXC_DOT_MASK=UCASE_DOT_MASK<<UCASE_EXC_DOT_SHIFT */
+#define UCASE_EXC_DOT_SHIFT 7
+
+/* normally stored in the main word, but pushed out for larger exception indexes */
+#define UCASE_EXC_DOT_MASK 0x3000
+enum {
+ UCASE_EXC_NO_DOT=0,
+ UCASE_EXC_SOFT_DOTTED=0x1000,
+ UCASE_EXC_ABOVE=0x2000, /* "above" accents with cc=230 */
+ UCASE_EXC_OTHER_ACCENT=0x3000 /* other character (0<cc!=230) */
+};
+
+/* complex/conditional mappings */
+#define UCASE_EXC_CONDITIONAL_SPECIAL 0x4000
+#define UCASE_EXC_CONDITIONAL_FOLD 0x8000
+
+/* definitions for lengths word for full case mappings */
+#define UCASE_FULL_LOWER 0xf
+#define UCASE_FULL_FOLDING 0xf0
+#define UCASE_FULL_UPPER 0xf00
+#define UCASE_FULL_TITLE 0xf000
+
+/* maximum lengths */
+#define UCASE_FULL_MAPPINGS_MAX_LENGTH (4*0xf)
+#define UCASE_CLOSURE_MAX_LENGTH 0xf
+
+/* constants for reverse case folding ("unfold") data */
+enum {
+ UCASE_UNFOLD_ROWS,
+ UCASE_UNFOLD_ROW_WIDTH,
+ UCASE_UNFOLD_STRING_WIDTH
+};
+
+#endif
diff --git a/thirdparty/icu4c/common/ucase_props_data.h b/thirdparty/icu4c/common/ucase_props_data.h
new file mode 100644
index 0000000000..aead6d58d1
--- /dev/null
+++ b/thirdparty/icu4c/common/ucase_props_data.h
@@ -0,0 +1,951 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+//
+// Copyright (C) 1999-2016, International Business Machines
+// Corporation and others. All Rights Reserved.
+//
+// file name: ucase_props_data.h
+//
+// machine-generated by: icu/tools/unicode/c/genprops/casepropsbuilder.cpp
+
+
+#ifdef INCLUDED_FROM_UCASE_CPP
+
+static const UVersionInfo ucase_props_dataVersion={0xd,0,0,0};
+
+static const int32_t ucase_props_indexes[UCASE_IX_TOP]={0x10,0x70c2,0x6098,0x683,0x172,0,0,0,0,0,0,0,0,0,0,3};
+
+static const uint16_t ucase_props_trieIndex[12356]={
+0x336,0x33e,0x346,0x34e,0x35c,0x364,0x36c,0x374,0x37c,0x384,0x38b,0x393,0x39b,0x3a3,0x3ab,0x3b3,
+0x3b9,0x3c1,0x3c9,0x3d1,0x3d9,0x3e1,0x3e9,0x3f1,0x3f9,0x401,0x409,0x411,0x419,0x421,0x429,0x431,
+0x439,0x441,0x449,0x451,0x459,0x461,0x469,0x471,0x46d,0x475,0x47a,0x482,0x489,0x491,0x499,0x4a1,
+0x4a9,0x4b1,0x4b9,0x4c1,0x355,0x35d,0x4c6,0x4ce,0x4d3,0x4db,0x4e3,0x4eb,0x4ea,0x4f2,0x4f7,0x4ff,
+0x507,0x50e,0x512,0x355,0x355,0x355,0x519,0x521,0x529,0x52b,0x533,0x53b,0x53f,0x540,0x548,0x550,
+0x558,0x540,0x560,0x565,0x558,0x540,0x56d,0x575,0x53f,0x57d,0x585,0x58d,0x595,0x355,0x59d,0x355,
+0x5a5,0x4ec,0x5ad,0x58d,0x53f,0x57d,0x5b4,0x58d,0x5bc,0x5be,0x548,0x58d,0x53f,0x355,0x5c6,0x355,
+0x355,0x5cc,0x5d3,0x355,0x355,0x5d7,0x5df,0x355,0x5e3,0x5ea,0x355,0x5f1,0x5f9,0x600,0x608,0x355,
+0x355,0x60d,0x615,0x61d,0x625,0x62d,0x634,0x63c,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x644,0x355,0x355,0x654,0x654,0x64c,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x65c,0x65c,0x54c,0x54c,0x355,0x662,0x66a,0x355,
+0x672,0x355,0x67a,0x355,0x681,0x687,0x355,0x355,0x355,0x68f,0x355,0x355,0x355,0x355,0x355,0x355,
+0x696,0x355,0x69d,0x6a5,0x355,0x6ad,0x6b5,0x355,0x57c,0x6b8,0x6c0,0x6c6,0x5bc,0x6ce,0x355,0x6d5,
+0x355,0x6da,0x355,0x6e0,0x6e8,0x6ec,0x6f4,0x6fc,0x704,0x709,0x70c,0x714,0x724,0x71c,0x734,0x72c,
+0x37c,0x73c,0x37c,0x744,0x747,0x37c,0x74f,0x37c,0x757,0x75f,0x767,0x76f,0x777,0x77f,0x787,0x78f,
+0x797,0x79e,0x355,0x7a6,0x7ae,0x355,0x7b6,0x7be,0x7c6,0x7ce,0x7d6,0x7de,0x7e6,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x7e9,0x7ef,0x7f5,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x7fd,0x802,0x806,0x80e,0x37c,0x37c,0x37c,0x816,0x81e,0x825,0x355,0x82a,0x355,0x355,0x355,0x832,
+0x355,0x677,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x53e,0x83a,0x355,0x355,0x841,0x355,0x355,0x849,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x851,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x6e0,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x857,0x355,0x85f,0x864,0x86c,0x355,0x355,0x874,0x87c,0x884,0x37c,0x889,0x891,0x897,0x89f,0x8a2,
+0x8aa,0x8b1,0x355,0x355,0x355,0x355,0x8b8,0x8c0,0x355,0x8c8,0x8cf,0x355,0x529,0x8d4,0x8dc,0x681,
+0x355,0x8e2,0x8ea,0x8ee,0x355,0x8f6,0x8fe,0x906,0x355,0x90c,0x910,0x918,0x928,0x920,0x355,0x930,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x938,0x355,0x355,0x355,0x355,0x940,0x5bc,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x945,0x94d,0x951,0x355,0x355,0x355,0x355,0x338,0x33e,0x959,0x961,0x968,0x4ec,0x355,0x355,0x970,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0xd58,0xd58,0xd70,0xdb0,0xdf0,0xe2c,0xe6c,0xeac,0xee4,0xf24,0xf64,0xfa4,0xfe4,0x1024,0x1064,0x10a4,
+0x10e4,0x1124,0x1164,0x11a4,0x11b4,0x11e8,0x1224,0x1264,0x12a4,0x12e4,0xd54,0x1318,0x134c,0x138c,0x13a8,0x13dc,
+0x9e1,0xa11,0xa51,0xa90,0x188,0x188,0xac8,0x188,0x188,0x188,0x188,0x188,0x188,0xaf1,0x188,0x188,
+0x188,0x188,0x188,0x188,0x188,0x188,0x188,0xb31,0x188,0x188,0xb66,0xba5,0xbe5,0xc1f,0xc56,0x188,
+0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
+0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
+0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
+0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
+0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
+0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
+0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
+0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
+0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
+0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
+0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
+0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
+0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
+0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
+0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
+0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
+0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
+0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
+0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
+0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
+0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
+0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
+0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
+0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
+0xc96,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x977,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x6b5,0x355,0x355,0x355,0x97f,0x355,0x355,0x355,
+0x355,0x987,0x98d,0x991,0x355,0x355,0x995,0x999,0x99f,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x9a7,0x9ab,0x355,0x355,0x355,0x355,0x355,0x9b3,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x9bb,0x9bf,0x9c7,0x9cb,0x355,0x9d2,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x9d8,0x355,0x355,0x355,0x355,0x9df,0x355,0x355,0x355,0x355,
+0x355,0x53f,0x9e4,0x9eb,0x5bd,0x5bc,0x9ef,0x53c,0x355,0x9f7,0x9fe,0x355,0xa04,0x5bc,0xa09,0xa11,
+0x355,0x355,0xa16,0x355,0x355,0x355,0x355,0x338,0xa1e,0x5bc,0x5be,0xa26,0xa2d,0x355,0x355,0x355,
+0x355,0x355,0x9e4,0xa35,0x355,0x355,0xa3d,0xa45,0x355,0x355,0x355,0x355,0x355,0x355,0xa49,0xa51,
+0x355,0x355,0xa59,0x4b0,0x355,0x355,0xa61,0x355,0x355,0xa67,0xa6f,0x355,0x355,0x355,0x355,0x355,
+0x355,0xa74,0x355,0x355,0x355,0xa7c,0xa84,0x355,0x355,0xa8c,0xa94,0x355,0x355,0x355,0xa97,0x6b5,
+0xa9f,0xaa3,0xaab,0x355,0xab2,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0xab9,0x355,0x355,0x940,0xac1,0x355,0x355,0x355,0xac7,0xacf,0x355,0xad3,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0xad9,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0xadf,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0xae6,0x355,0xaec,0x57c,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0xa7c,0xa84,0x355,0x355,0x355,0x355,0x355,0x355,0x677,0x355,0xaf2,0x355,0x355,
+0xafa,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0xaff,0x57c,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0xb07,0xb0f,0xb15,0x355,0x355,0x355,0x355,0xb1d,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0xb25,0xb2d,0xb32,0xb38,0xb40,0xb48,0xb50,0xb29,0xb58,0xb60,
+0xb68,0xb6f,0xb2a,0xb25,0xb2d,0xb28,0xb38,0xb2b,0xb26,0xb77,0xb29,0xb7f,0xb87,0xb8f,0xb96,0xb82,
+0xb8a,0xb92,0xb99,0xb85,0xba1,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x87c,0xba9,0x87c,0xbb0,0xbb7,0xbbf,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0xbc7,0xbcf,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0xbd3,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x9d0,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0xbdb,0x355,0xbe3,0xbeb,0xbf2,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0xb21,
+0xbfa,0xbfa,0xc00,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x9f9,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x53f,0x87c,0x87c,0x87c,0x355,0x355,0x355,0x355,0x87c,0x87c,
+0x87c,0x87c,0x87c,0x87c,0x87c,0xc08,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,
+0x355,0x355,0x355,0x355,0x355,0x355,0x335,0x335,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
+0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,
+0,0,4,0,0,0,0,0,0,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,
+0x1012,0xa,0x5a,0x7a,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0xba,0x1012,0x1012,0x1012,0x1012,
+0x1012,0x1012,0x1012,0,0,0,4,0,4,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,
+0xf011,0xf9,0xf031,0x149,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0x189,0xf011,0xf011,0xf011,0xf011,
+0xf011,0xf011,0xf011,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,4,0,1,0,0,4,0,4,
+0,0,0,0,4,0x1c9,0,4,4,0,1,0,0,0,0,0,
+0x1012,0x1012,0x1012,0x1012,0x1012,0x1fa,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x5a,0x5a,0x1012,0x1012,
+0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x239,
+0xf011,0xf011,0xf011,0xf011,0xf011,0x2d9,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,
+0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0x3c91,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x31a,0xff91,0x92,0xff91,0x92,0xff91,0x31a,0xffb1,
+0x33a,0x389,0x92,0xff91,0x92,0xff91,0x92,0xff91,1,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,
+0xff91,0x92,0xff91,0x92,0xff91,0x3d9,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0xc392,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x459,0x6191,0x6912,0x92,0xff91,
+0x92,0xff91,0x6712,0x92,0xff91,0x6692,0x6692,0x92,0xff91,1,0x2792,0x6512,0x6592,0x92,0xff91,0x6692,
+0x6792,0x3091,0x6992,0x6892,0x92,0xff91,0x5191,1,0x6992,0x6a92,0x4111,0x6b12,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x6d12,0x92,0xff91,0x6d12,1,1,0x92,0xff91,0x6d12,0x92,0xff91,0x6c92,0x6c92,0x92,
+0xff91,0x92,0xff91,0x6d92,0x92,0xff91,1,0,0x92,0xff91,1,0x1c11,0,0,0,0,
+0x48a,0x4bb,0x4f9,0x52a,0x55b,0x599,0x5ca,0x5fb,0x639,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,
+0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0xd891,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x669,0x6ea,0x71b,0x759,
+0x92,0xff91,0xcf92,0xe412,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0xbf12,1,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,1,1,1,1,1,1,0x78a,0x92,
+0xff91,0xae92,0x7aa,0x7c9,0x7c9,0x92,0xff91,0x9e92,0x2292,0x2392,0x92,0xff91,0x92,0xffb1,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0x7e9,0x809,0x829,0x9711,0x9911,1,0x9991,0x9991,1,0x9b11,1,0x9a91,
+0x849,1,1,1,0x9991,0x869,1,0x9891,1,0x889,0x8a9,1,0x97b1,0x9691,0x8a9,0x8c9,
+0x8e9,1,1,0x9691,1,0x909,0x9591,1,1,0x9511,1,1,1,1,1,1,
+1,0x929,1,1,0x9311,1,0x949,0x9311,1,1,1,0x969,0x9311,0xdd91,0x9391,0x9391,
+0xdc91,1,1,1,1,1,0x9291,1,0,1,1,1,1,1,1,1,
+1,0x989,0x9a9,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,5,5,0x25,5,5,5,5,5,5,4,4,4,
+0x14,4,0x14,4,5,5,4,4,4,4,4,4,4,4,4,4,
+4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+4,4,4,4,5,5,5,5,5,4,4,4,4,4,4,4,
+4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+4,4,4,4,0x54,0x54,0x44,0x44,0x44,0x44,0x44,0x9cc,0x54,0x44,0x54,0x44,
+0x54,0x44,0x44,0x44,0x44,0x44,0x44,0x54,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x64,
+0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,
+0x64,0x64,0x64,0x64,0x64,0x74,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,
+0x64,0x44,0x44,0x44,0x44,0x44,0x54,0x44,0x44,0x9dd,0x44,0x64,0x64,0x64,0x44,0x44,
+0x44,0x64,0x64,4,0x44,0x44,0x44,0x64,0x64,0x64,0x64,0x44,0x64,0x64,0x64,0x44,
+0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
+0x44,0x44,0x44,0x44,0x92,0xff91,0x92,0xff91,4,4,0x92,0xff91,0,0,5,0x4111,
+0x4111,0x4111,0,0x3a12,0,0,0,0,4,4,0x1312,4,0x1292,0x1292,0x1292,0,
+0x2012,0,0x1f92,0x1f92,0xa29,0x1012,0xafa,0x1012,0x1012,0xb3a,0x1012,0x1012,0xb7a,0xbca,0xc1a,0x1012,
+0xc5a,0x1012,0x1012,0x1012,0xc9a,0xcda,0,0xd1a,0x1012,0x1012,0xd5a,0x1012,0x1012,0xd9a,0x1012,0x1012,
+0xed11,0xed91,0xed91,0xed91,0xdd9,0xf011,0xea9,0xf011,0xf011,0xee9,0xf011,0xf011,0xf29,0xf79,0xfc9,0xf011,
+0x1009,0xf011,0xf011,0xf011,0x1049,0x1089,0x10c9,0x10f9,0xf011,0xf011,0x1139,0xf011,0xf011,0x1179,0xf011,0xf011,
+0xe011,0xe091,0xe091,0x412,0x11b9,0x11e9,2,2,2,0x1239,0x1269,0xfc11,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0x1299,0x12c9,0x391,0xc631,0x12fa,0x1349,0,0x92,0xff91,0xfc92,0x92,0xff91,
+1,0xbf12,0xbf12,0xbf12,0x2812,0x2812,0x2812,0x2812,0x2812,0x2812,0x2812,0x2812,0x2812,0x2812,0x2812,0x2812,
+0x2812,0x2812,0x2812,0x2812,0x1012,0x1012,0x137a,0x1012,0x13ba,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,
+0x1012,0x1012,0x13fa,0x1012,0x1012,0x143a,0x147a,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x14ca,0x1012,
+0x1012,0x1012,0x1012,0x1012,0xf011,0xf011,0x1509,0xf011,0x1549,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,
+0xf011,0xf011,0x1589,0xf011,0xf011,0x15c9,0x1609,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0x1659,0xf011,
+0xf011,0xf011,0xf011,0xf011,0xd811,0xd811,0xd811,0xd811,0xd811,0xd811,0xd831,0xd811,0xd831,0xd811,0xd811,0xd811,
+0xd811,0xd811,0xd811,0xd811,0x92,0xff91,0x169a,0x16d9,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0,0x44,0x44,0x44,0x44,0x44,4,4,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0x792,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,
+0xff91,0x92,0xff91,0xf891,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,
+0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0,
+0,4,0,0,0,0,0,4,1,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,
+0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,
+0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0x1719,1,0,0,0,
+0,0,0,0,0,0x64,0x44,0x44,0x44,0x44,0x64,0x44,0x44,0x44,0x64,0x64,
+0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0x44,0x64,0x44,
+0x44,0x64,0x64,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,
+0x64,0x64,0,0x64,0,0x64,0x64,0,0x44,0x64,0,0x64,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,
+0,0,0,0,4,4,4,4,4,4,0,0,0,0,0,0,
+0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,0,
+4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0x64,
+0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0x44,0x64,0x64,0x44,0x44,0x44,0x44,0x44,
+0x64,0x44,0x44,0x64,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0x64,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x44,
+0x44,0x44,0x44,0x44,0x44,4,0,0x44,0x44,0x44,0x44,0x64,0x44,4,4,0x44,
+0x44,0,0x64,0x44,0x44,0x64,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,4,0,0x64,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x64,0x44,0x44,
+0x64,0x44,0x44,0x64,0x64,0x64,0x44,0x64,0x64,0x44,0x64,0x44,0x44,0x44,0x64,0x44,
+0x64,0x44,0x64,0x44,0x64,0x44,0x44,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,
+4,4,4,4,4,4,4,4,4,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x44,
+4,4,0,0,0,0,4,0,0,0x64,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0x44,0x44,0x44,0x44,4,0x44,0x44,0x44,0x44,0x44,4,0x44,0x44,0x44,
+4,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0x64,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
+0x44,0x44,0x44,0x44,0x44,0x44,4,0x64,0x44,0x44,0x64,0x44,0x44,0x64,0x44,0x44,
+0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0x44,0x44,0x64,0x44,0x44,0x64,0x64,0x44,
+0x44,0x44,0x44,0x44,4,4,4,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,4,0,0x64,0,0,0,0,4,4,4,
+4,4,4,4,4,0,0,0,0,0x64,0,0,0,0x44,0x64,0x44,
+0x44,4,4,4,0,0,0,0,0,0,0,0,0,0,4,4,
+0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0x64,0,0,0,
+0,4,4,4,4,0,0,0,0,0,0,0,0,0x64,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,4,4,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x44,0,
+0,4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,4,4,0,0,0,0,4,4,0,0,4,4,0x64,0,0,
+0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,4,4,0,0,0,4,0,0,0,0,0,0,
+0,0,0,0,0,4,4,4,4,4,0,4,4,0,0,0,
+0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,4,4,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,
+4,4,4,4,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x64,0,0,4,0,4,4,4,4,0,0,0,0,0,0,0,
+0,0x64,0,0,0,0,0,0,0,4,4,0,0,0,0,0,
+0,0,0,0,0,0,4,4,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,
+0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,4,0,0,0,4,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,4,0,0,0,0,0,4,4,4,0,4,4,
+4,0x64,0,0,0,0,0,0,0,0x64,0x64,0,0,0,0,0,
+0,0,0,0,0,0,4,0,0,0,0,0,4,0x64,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0x64,0,0,0,0,0,0,0,4,4,4,0,4,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,4,0,0,4,4,4,4,0x64,0x64,0x64,0,0,0,0,0,
+0,0,4,4,0x64,0x64,0x64,0x64,4,4,4,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,
+4,4,4,4,0x64,0x64,0x64,4,4,0,0,0,0,0,0,0,
+0,0,4,0,0x64,0x64,0x64,0x64,4,4,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0x64,0x64,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0x64,0,0x64,
+0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0x64,0x64,4,0x64,4,4,4,4,4,0x64,0x64,
+0x64,0x64,4,0,0x64,4,0x44,0x44,0x64,0,0x44,0x44,0,0,0,0,
+0,4,4,4,4,4,4,4,4,4,4,4,0,4,4,4,
+4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+4,4,4,4,4,4,4,4,4,4,4,4,4,0,0,0,
+0,0,0,0,0,0,0x64,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,4,4,4,4,0,4,4,4,4,4,0x64,0,0x64,0x64,0,
+0,4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,4,4,0,0,
+0,0,4,4,4,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,4,4,4,4,0,0,0,0,0,0,0,
+0,0,0,0,0,0,4,0,0,4,4,0,0,0,0,0,
+0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,4,0,0,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,
+0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,
+0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0,0x175a,0,0,0,0,0,0x175a,0,0,
+0x1779,0x17a9,0x17d9,0x1809,0x1839,0x1869,0x1899,0x18c9,0x18f9,0x1929,0x1959,0x1989,0x19b9,0x19e9,0x1a19,0x1a49,
+0x1a79,0x1aa9,0x1ad9,0x1b09,0x1b39,0x1b69,0x1b99,0x1bc9,0x1bf9,0x1c29,0x1c59,0x1c89,0x1cb9,0x1ce9,0x1d19,0x1d49,
+0x1d79,0x1da9,0x1dd9,0x1e09,0x1e39,0x1e69,0x1e99,0x1ec9,0x1ef9,0x1f29,0x1f59,0,4,0x1f89,0x1fb9,0x1fe9,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x44,
+0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,
+0x203a,0x203a,0x203a,0x203a,0x203a,0x203a,0,0,0x2059,0x2089,0x20b9,0x20e9,0x2119,0x2149,0,0,
+0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,
+0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,4,4,0x64,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,4,4,0,4,
+4,4,4,4,4,4,0,0,0,0,0,0,0,0,4,0,
+0,4,4,4,4,4,4,4,4,4,0x64,4,0,0,0,4,
+0,0,0,0,0,0x44,0,0,0,0,0,0,0,0,0,0,
+0,0,0,4,4,4,4,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,4,4,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0x64,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,0,
+0,0,0,4,4,0,0,0,0,0,0,0,0,0,4,0,
+0,0,0,0,0,0x64,0x44,0x64,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x44,
+0x64,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,4,0,4,4,4,4,
+4,4,4,0,0x64,0,4,0,0,4,4,4,4,4,4,4,
+4,0,0,0,0,0,0,4,4,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
+0x44,0,0,0x64,0,0,0,0,0,0,0,4,0,0,0,0,
+0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x44,
+0x44,0x64,4,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0x64,0,4,4,4,4,4,0,4,0,0,0,
+0,0,4,0,0x60,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0x44,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,4,4,4,4,0,0,
+4,4,0x60,0x64,4,4,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0x64,0,4,4,0,0,
+0,4,0,4,4,4,0x60,0x60,0,0,0,0,0,0,0,0,
+0,0,0,0,4,4,4,4,4,4,4,4,0,0,4,0x64,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,4,4,4,4,4,4,0,0,
+0x2179,0x21a9,0x21d9,0x2209,0x2239,0x2289,0x22d9,0x2309,0x2339,0,0,0,0,0,0,0,
+0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,
+0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0,0,0x236a,0x236a,0x236a,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x44,0x44,0x44,0,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0x44,0x64,0x64,0x64,0x64,
+0x44,0,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0,0,0,0,0x64,0,0,
+0,0,0,0,0x44,0,0,0,0x44,0x44,0,0,0,0,0,0,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,0x25,5,5,5,5,5,5,5,5,1,1,1,1,1,
+1,1,1,1,1,1,1,1,5,0x2389,1,1,1,0x23a9,1,1,
+5,5,5,5,0x25,5,5,5,0x25,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x23c9,1,
+1,1,1,1,1,1,0x21,1,1,1,1,5,5,5,5,5,
+0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
+0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,0x64,0,0x44,0x64,0x64,0x44,0x64,
+0x44,0x44,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x44,0x44,0x64,0x64,0x64,
+0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xffb1,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+0x23ea,0x2429,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x2469,0x24e9,0x2569,0x25e9,0x2669,0x26e9,1,1,0x271a,1,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xffb1,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x411,0x411,0x411,0x411,
+0x411,0x411,0x411,0x411,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0x411,0x411,0x411,0x411,
+0x411,0x411,0,0,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0,0,0x411,0x411,0x411,0x411,
+0x411,0x411,0x411,0x411,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0x411,0x411,0x411,0x411,
+0x411,0x411,0x411,0x411,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0x411,0x411,0x411,0x411,
+0x411,0x411,0,0,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0,0,0x2769,0x411,0x27e9,0x411,
+0x2899,0x411,0x2949,0x411,0,0xfc12,0,0xfc12,0,0xfc12,0,0xfc12,0x411,0x411,0x411,0x411,
+0x411,0x411,0x411,0x411,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0x2511,0x2511,0x2b11,0x2b11,
+0x2b11,0x2b11,0x3211,0x3211,0x4011,0x4011,0x3811,0x3811,0x3f11,0x3f11,0,0,0x29f9,0x2a69,0x2ad9,0x2b49,
+0x2bb9,0x2c29,0x2c99,0x2d09,0x2d7b,0x2deb,0x2e5b,0x2ecb,0x2f3b,0x2fab,0x301b,0x308b,0x30f9,0x3169,0x31d9,0x3249,
+0x32b9,0x3329,0x3399,0x3409,0x347b,0x34eb,0x355b,0x35cb,0x363b,0x36ab,0x371b,0x378b,0x37f9,0x3869,0x38d9,0x3949,
+0x39b9,0x3a29,0x3a99,0x3b09,0x3b7b,0x3beb,0x3c5b,0x3ccb,0x3d3b,0x3dab,0x3e1b,0x3e8b,0x411,0x411,0x3ef9,0x3f79,
+0x3fe9,0,0x4069,0x40e9,0xfc12,0xfc12,0xdb12,0xdb12,0x419b,4,0x4209,4,4,4,0x4259,0x42d9,
+0x4349,0,0x43c9,0x4449,0xd512,0xd512,0xd512,0xd512,0x44fb,4,4,4,0x411,0x411,0x4569,0x4619,
+0,0,0x46e9,0x4769,0xfc12,0xfc12,0xce12,0xce12,0,4,4,4,0x411,0x411,0x4819,0x48c9,
+0x4999,0x391,0x4a19,0x4a99,0xfc12,0xfc12,0xc812,0xc812,0xfc92,4,4,4,0,0,0x4b49,0x4bc9,
+0x4c39,0,0x4cb9,0x4d39,0xc012,0xc012,0xc112,0xc112,0x4deb,4,4,0,0,0,0,0,
+0,0,0,0,0,0,0,4,4,4,4,4,0,0,0,0,
+0,0,0,0,4,4,0,0,0,0,0,0,4,0,0,4,
+0,0,4,4,4,4,4,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,4,4,4,4,4,0,4,4,
+4,4,4,4,4,4,4,4,0,0x25,0,0,0,0,0,0,
+0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
+5,5,5,5,5,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0x44,0x44,0x64,0x64,0x44,0x44,0x44,0x44,
+0x64,0x64,0x64,0x44,0x44,4,4,4,4,0x44,4,4,4,0x64,0x64,0x44,
+0x64,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,2,
+0,0,1,2,2,2,1,1,2,2,2,1,0,2,0,0,
+0,2,2,2,2,2,0,0,0,0,0,0,2,0,0x4e5a,0,
+2,0,0x4e9a,0x4eda,2,2,0,1,2,2,0xe12,2,1,0,0,0,
+0,1,0,0,1,1,2,2,0,0,0,0,0,2,1,1,
+0x21,0x21,0,0,0,0,0xf211,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0x812,0x812,0x812,0x812,0x812,0x812,0x812,0x812,
+0x812,0x812,0x812,0x812,0x812,0x812,0x812,0x812,0xf811,0xf811,0xf811,0xf811,0xf811,0xf811,0xf811,0xf811,
+0xf811,0xf811,0xf811,0xf811,0xf811,0xf811,0xf811,0xf811,0,0,0,0x92,0xff91,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0xd12,0xd12,0xd12,0xd12,0xd12,0xd12,
+0xd12,0xd12,0xd12,0xd12,0xd12,0xd12,0xd12,0xd12,0xd12,0xd12,0xd12,0xd12,0xf311,0xf311,0xf311,0xf311,
+0xf311,0xf311,0xf311,0xf311,0xf311,0xf311,0xf311,0xf311,0xf311,0xf311,0xf311,0xf311,0xf311,0xf311,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,
+0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,
+0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,
+0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,
+0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0,0x92,0xff91,0x4f1a,0x4f3a,0x4f5a,0x4f79,0x4f99,0x92,
+0xff91,0x92,0xff91,0x92,0xff91,0x4fba,0x4fda,0x4ffa,0x501a,1,0x92,0xff91,1,0x92,0xff91,1,
+1,1,1,1,0x25,5,0x503a,0x503a,0x92,0xff91,0x92,0xff91,1,0,0,0,
+0,0,0,0x92,0xff91,0x92,0xff91,0x44,0x44,0x44,0x92,0xff91,0,0,0,0,
+0,0,0,0,0,0,0,0,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,
+0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,
+0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0,0x5059,0,0,0,0,
+0,0x5059,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
+0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
+0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,0,
+0,0,0x64,0x64,0x64,0x64,0x60,0x60,0,4,4,4,4,4,0,0,
+0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0x64,0x64,4,
+4,4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+4,4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0x507a,0x50b9,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0,0x44,
+4,4,4,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,4,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,5,5,0x44,0x44,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+4,4,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+1,1,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,5,1,1,1,1,1,1,1,1,0x92,0xff91,0x92,
+0xff91,0x50fa,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,4,4,4,0x92,
+0xff91,0x511a,1,0,0x92,0xff91,0x92,0xff91,0x1811,1,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0x513a,0x515a,0x517a,0x519a,0x513a,1,0x51ba,0x51da,0x51fa,0x521a,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0,0,0x92,0xff91,
+0xe812,0x523a,0x525a,0x92,0xff91,0x92,0xff91,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0x92,0xff91,0,
+5,5,1,0,0,0,0,0,0,0,4,0,0,0,0x64,0,
+0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,4,4,0,0,0,0,0,
+0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0x64,4,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
+0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
+0,0,0,0,0,0,4,4,4,4,4,0x64,0x64,0x64,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,4,4,4,4,4,4,4,4,4,4,4,0,0x60,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0x64,0,0,4,4,4,4,0,0,4,4,0,0,
+0x60,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,4,4,4,4,4,4,0,0,4,4,0,0,4,4,0,
+0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,
+0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,
+0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0x44,0,0x44,0x44,0x64,0,0,0x44,
+0x44,0,0,0,0,0,0x44,0x44,0,0x44,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,4,4,0,0,0,0,0,4,4,0,0x64,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,0x5279,1,1,1,1,1,1,1,4,5,5,5,5,
+1,1,1,1,1,1,1,1,1,4,4,4,0,0,0,0,
+0x5299,0x52c9,0x52f9,0x5329,0x5359,0x5389,0x53b9,0x53e9,0x5419,0x5449,0x5479,0x54a9,0x54d9,0x5509,0x5539,0x5569,
+0x5b99,0x5bc9,0x5bf9,0x5c29,0x5c59,0x5c89,0x5cb9,0x5ce9,0x5d19,0x5d49,0x5d79,0x5da9,0x5dd9,0x5e09,0x5e39,0x5e69,
+0x5e99,0x5ec9,0x5ef9,0x5f29,0x5f59,0x5f89,0x5fb9,0x5fe9,0x6019,0x6049,0x6079,0x60a9,0x60d9,0x6109,0x6139,0x6169,
+0x5599,0x55c9,0x55f9,0x5629,0x5659,0x5689,0x56b9,0x56e9,0x5719,0x5749,0x5779,0x57a9,0x57d9,0x5809,0x5839,0x5869,
+0x5899,0x58c9,0x58f9,0x5929,0x5959,0x5989,0x59b9,0x59e9,0x5a19,0x5a49,0x5a79,0x5aa9,0x5ad9,0x5b09,0x5b39,0x5b69,
+0,0,0,0,0,4,0,0,4,0,0,0,0,0x64,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x6199,0x6219,0x6299,0x6319,0x63c9,0x6479,0x6519,0,0,0,0,0,0,0,0,0,
+0,0,0,0x65b9,0x6639,0x66b9,0x6739,0x67b9,0,0,0,0,0,0,0x64,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+4,4,4,4,0,0,0,4,0,0,0,0,0,0,0,0,
+0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,0x64,0x64,
+0x64,0x64,0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,4,0,0,4,0,0,0,0,0,0,
+0,0,0,0,0,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,
+0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0,
+0,0,4,0,4,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,
+0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,4,4,4,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0x64,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0x1412,0x1412,0x1412,0x1412,
+0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,
+0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0xec11,0xec11,0xec11,0xec11,
+0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,
+0xec11,0xec11,0xec11,0xec11,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,
+0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0,0,0,0,0xec11,0xec11,0xec11,0xec11,
+0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,
+0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0,0,0,0,0,4,4,4,
+0,4,4,0,0,0,0,0,4,0x64,4,0x44,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0x44,0x64,0x64,0,0,0,0,0x64,0,0,0,0,
+0,0x44,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0x2012,0x2012,0x2012,0x2012,
+0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,
+0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0xe011,0xe011,0xe011,0xe011,
+0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,
+0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0x44,0x44,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0x64,0x64,0x44,0x44,0x44,0x64,0x44,0x64,0x64,0x64,0x64,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,
+4,4,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
+4,4,4,0,0,0x64,0x64,0,0,4,0,0,0x44,0x44,0x44,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
+4,4,4,4,0,4,4,4,4,4,4,0x64,0x64,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,4,4,4,4,4,4,
+4,4,4,0,0x60,0,0,0,0,0,0,0,0,4,0x64,4,
+4,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,4,4,4,0,0,4,0x60,0x64,4,
+0,0,0,0,0,0,4,0,0,0,0,4,4,4,4,4,
+4,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,
+0,0,0,0,0,0x60,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,
+0x44,0,0,0,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0x64,4,4,0,0x64,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0x44,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,4,4,4,4,4,4,0,4,0,
+0,0,0,4,4,0,0x64,0x64,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,4,4,4,4,0,0,0,0,0,0,
+4,4,0,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,0,
+0,4,0,0x64,0,0,0,0,0,0,0,0,0,0,0,4,
+0,4,0,0,4,4,4,4,4,4,0x60,0x64,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,4,4,4,0,0,4,4,
+4,4,0,4,4,4,4,0x64,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
+4,4,4,4,4,4,4,4,0,0x64,0x64,0,0,0,0,0,
+0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,
+0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,
+0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,
+0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,4,4,0x60,0x64,0,
+0,0,0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+4,4,4,4,0,0,4,4,0,0,0,0,0,4,4,4,
+4,4,4,4,4,4,4,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
+0x64,4,4,4,4,0,0,4,4,4,4,0,0,0,0,0,
+0,0,0,0x64,0,0,0,0,0,0,0,0,0,4,4,4,
+4,4,4,0,0,4,4,4,0,0,0,0,0,0,0,0,
+0,0,4,4,4,4,4,4,4,4,4,4,4,4,4,0,
+4,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,4,4,4,4,4,4,4,0,4,4,4,4,
+4,4,0,0x64,4,4,4,4,4,4,4,4,0,0,4,4,
+4,4,4,4,4,0,4,4,0,4,4,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,
+4,4,4,0,0,0,4,0,4,4,0,4,4,4,0x64,4,
+0x64,0x64,0,4,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,4,4,0,0,
+0,4,0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,4,4,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,4,
+4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0x64,0x64,0x64,0x64,0x64,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,4,
+4,4,4,4,4,4,4,4,4,4,0,4,4,0,0,0,
+0,0,0,0,0,0,0,0,0x60,0x60,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,4,0x64,0,0,0,0,0,
+0,0x60,0x60,0x64,0x64,0x64,0,0,0,0x60,0x60,0x60,0x60,0x60,0x60,4,
+4,4,4,4,4,4,4,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0,
+0,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x44,
+0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0x44,0x44,0x44,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,
+1,1,1,1,1,1,0x21,0x21,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,1,1,1,1,1,1,1,0,0x21,0x21,
+1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,1,1,1,1,1,1,1,1,0x21,0x21,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,2,0,2,2,
+0,0,2,0,0,2,2,0,0,2,2,2,2,0,2,2,
+2,2,2,2,2,2,1,1,1,1,0,1,0,1,0x21,0x21,
+1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+1,1,1,1,2,2,0,2,2,2,2,0,0,2,2,2,
+2,2,2,2,2,0,2,2,2,2,2,2,2,0,1,1,
+1,1,1,1,1,1,0x21,0x21,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,2,2,0,2,2,2,2,0,
+2,2,2,2,2,0,2,0,0,0,2,2,2,2,2,2,
+2,0,1,1,1,1,1,1,1,1,0x21,0x21,1,1,1,1,
+1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,
+1,1,0,0,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,0,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,0,1,1,1,1,1,1,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,0,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,
+1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,1,1,1,0,1,1,1,1,1,1,2,1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,4,4,4,4,4,4,4,4,4,4,4,4,
+4,4,4,4,4,4,4,4,4,4,4,0,0,0,0,4,
+4,4,4,4,4,4,4,4,4,4,4,4,4,0,0,0,
+0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,
+4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,4,4,4,4,4,0,4,4,4,
+4,4,4,4,4,4,4,4,4,4,4,4,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,
+0x44,0x44,0x44,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
+0x44,0x44,0x44,0x44,0x44,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0x44,
+0x44,0,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,
+0x44,0x44,0x44,4,4,4,4,4,4,4,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0x64,0x64,0x64,0x64,
+0x64,0x64,0x64,0,0,0,0,0,0,0,0,0,0x1112,0x1112,0x1112,0x1112,
+0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,
+0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0xef11,0xef11,
+0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,
+0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0x44,0x44,0x44,0x44,
+0x44,0x44,0x64,4,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,
+2,2,0,0,0,0,0,0,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0
+};
+
+static const uint16_t ucase_props_exceptions[1667]={
+0xc850,0x20,2,0x130,0x131,0x4810,0x20,0x841,0x6b,1,0x212a,0x841,0x73,1,0x17f,0x5c50,
+0x20,2,0x130,0x131,0x844,0x4b,1,0x212a,0x844,0x53,1,0x17f,0x806,0x3bc,0x39c,0x841,
+0xe5,1,0x212b,0x8c0,1,0x2220,0x73,0x73,0x53,0x53,0x53,0x73,0x1e9e,0x844,0xc5,1,
+0x212b,0x4810,1,0xce50,0xc7,2,0x49,0x131,0x844,0x49,2,0x69,0x130,0x880,0x2220,0x2bc,
+0x6e,0x2bc,0x4e,0x2bc,0x4e,0x806,0x73,0x53,0x809,0x1c6,0x1c5,0x80d,0x1c6,0x1c4,0x1c5,0x80c,
+0x1c4,0x1c5,0x809,0x1c9,0x1c8,0x80d,0x1c9,0x1c7,0x1c8,0x80c,0x1c7,0x1c8,0x809,0x1cc,0x1cb,0x80d,
+0x1cc,0x1ca,0x1cb,0x80c,0x1ca,0x1cb,0x880,0x2220,0x6a,0x30c,0x4a,0x30c,0x4a,0x30c,0x809,0x1f3,
+0x1f2,0x80d,0x1f3,0x1f1,0x1f2,0x80c,0x1f1,0x1f2,0x810,0x2a2b,0x810,0x2a28,0x810,0x2a3f,0x810,0x2a1f,
+0x810,0x2a1c,0x810,0x2a1e,0x810,0xa54f,0x810,0xa54b,0x810,0xa528,0x810,0xa544,0x810,0x29f7,0x810,0xa541,
+0x810,0x29fd,0x810,0x29e7,0x810,0xa543,0x810,0xa52a,0x1810,0xa515,0x810,0xa512,0x6800,0x3846,0x3b9,0x399,
+1,0x1fbe,0x8c0,1,0x3330,0x3b9,0x308,0x301,0x399,0x308,0x301,0x399,0x308,0x301,0x1fd3,0x841,
+0x3b2,1,0x3d0,0x841,0x3b5,1,0x3f5,0x841,0x3b8,2,0x3d1,0x3f4,0x841,0x3b9,2,0x345,
+0x1fbe,0x841,0x3ba,1,0x3f0,0x841,0x3bc,1,0xb5,0x841,0x3c0,1,0x3d6,0x841,0x3c1,1,
+0x3f1,0x4850,0x20,1,0x3c2,0x841,0x3c6,1,0x3d5,0x841,0x3c9,1,0x2126,0x8c0,1,0x3330,
+0x3c5,0x308,0x301,0x3a5,0x308,0x301,0x3a5,0x308,0x301,0x1fe3,0x844,0x392,1,0x3d0,0x844,0x395,
+1,0x3f5,0x844,0x398,2,0x3d1,0x3f4,0x844,0x399,2,0x345,0x1fbe,0x844,0x39a,1,0x3f0,
+0x844,0x39c,1,0xb5,0x844,0x3a0,1,0x3d6,0x844,0x3a1,1,0x3f1,0x806,0x3c3,0x3a3,0x844,
+0x3a3,1,0x3c2,0x844,0x3a6,1,0x3d5,0x844,0x3a9,1,0x2126,0x806,0x3b2,0x392,0x846,0x3b8,
+0x398,1,0x3f4,0x806,0x3c6,0x3a6,0x806,0x3c0,0x3a0,0x806,0x3ba,0x39a,0x806,0x3c1,0x3a1,0x841,
+0x3b8,2,0x398,0x3d1,0x806,0x3b5,0x395,0x841,0x432,1,0x1c80,0x841,0x434,1,0x1c81,0x841,
+0x43e,1,0x1c82,0x841,0x441,1,0x1c83,0x841,0x442,2,0x1c84,0x1c85,0x841,0x44a,1,0x1c86,
+0x844,0x412,1,0x1c80,0x844,0x414,1,0x1c81,0x844,0x41e,1,0x1c82,0x844,0x421,1,0x1c83,
+0x844,0x422,2,0x1c84,0x1c85,0x844,0x42a,1,0x1c86,0x841,0x463,1,0x1c87,0x844,0x462,1,
+0x1c87,0x4880,0x20,0x565,0x582,0x810,0x1c60,0x80c,0x1c90,0x10d0,0x80c,0x1c91,0x10d1,0x80c,0x1c92,0x10d2,
+0x80c,0x1c93,0x10d3,0x80c,0x1c94,0x10d4,0x80c,0x1c95,0x10d5,0x80c,0x1c96,0x10d6,0x80c,0x1c97,0x10d7,0x80c,
+0x1c98,0x10d8,0x80c,0x1c99,0x10d9,0x80c,0x1c9a,0x10da,0x80c,0x1c9b,0x10db,0x80c,0x1c9c,0x10dc,0x80c,0x1c9d,
+0x10dd,0x80c,0x1c9e,0x10de,0x80c,0x1c9f,0x10df,0x80c,0x1ca0,0x10e0,0x80c,0x1ca1,0x10e1,0x80c,0x1ca2,0x10e2,
+0x80c,0x1ca3,0x10e3,0x80c,0x1ca4,0x10e4,0x80c,0x1ca5,0x10e5,0x80c,0x1ca6,0x10e6,0x80c,0x1ca7,0x10e7,0x80c,
+0x1ca8,0x10e8,0x80c,0x1ca9,0x10e9,0x80c,0x1caa,0x10ea,0x80c,0x1cab,0x10eb,0x80c,0x1cac,0x10ec,0x80c,0x1cad,
+0x10ed,0x80c,0x1cae,0x10ee,0x80c,0x1caf,0x10ef,0x80c,0x1cb0,0x10f0,0x80c,0x1cb1,0x10f1,0x80c,0x1cb2,0x10f2,
+0x80c,0x1cb3,0x10f3,0x80c,0x1cb4,0x10f4,0x80c,0x1cb5,0x10f5,0x80c,0x1cb6,0x10f6,0x80c,0x1cb7,0x10f7,0x80c,
+0x1cb8,0x10f8,0x80c,0x1cb9,0x10f9,0x80c,0x1cba,0x10fa,0x80c,0x1cbd,0x10fd,0x80c,0x1cbe,0x10fe,0x80c,0x1cbf,
+0x10ff,0xa10,0x97d0,0xa10,8,0x806,0x13f0,0x13f0,0x806,0x13f1,0x13f1,0x806,0x13f2,0x13f2,0x806,0x13f3,
+0x13f3,0x806,0x13f4,0x13f4,0x806,0x13f5,0x13f5,0x806,0x432,0x412,0x806,0x434,0x414,0x806,0x43e,0x41e,
+0x806,0x441,0x421,0x846,0x442,0x422,1,0x1c85,0x846,0x442,0x422,1,0x1c84,0x806,0x44a,0x42a,
+0x806,0x463,0x462,0x806,0xa64b,0xa64a,0xc10,0xbc0,0x810,0x8a04,0x810,0xee6,0x810,0x8a38,0x841,0x1e61,
+1,0x1e9b,0x844,0x1e60,1,0x1e9b,0x880,0x2220,0x68,0x331,0x48,0x331,0x48,0x331,0x880,0x2220,
+0x74,0x308,0x54,0x308,0x54,0x308,0x880,0x2220,0x77,0x30a,0x57,0x30a,0x57,0x30a,0x880,0x2220,
+0x79,0x30a,0x59,0x30a,0x59,0x30a,0x880,0x2220,0x61,0x2be,0x41,0x2be,0x41,0x2be,0x806,0x1e61,
+0x1e60,0xc90,0x1dbf,0x20,0x73,0x73,0x880,0x2220,0x3c5,0x313,0x3a5,0x313,0x3a5,0x313,0x880,0x3330,
+0x3c5,0x313,0x300,0x3a5,0x313,0x300,0x3a5,0x313,0x300,0x880,0x3330,0x3c5,0x313,0x301,0x3a5,0x313,
+0x301,0x3a5,0x313,0x301,0x880,0x3330,0x3c5,0x313,0x342,0x3a5,0x313,0x342,0x3a5,0x313,0x342,0x890,
+8,0x220,0x1f00,0x3b9,0x1f08,0x399,0x890,8,0x220,0x1f01,0x3b9,0x1f09,0x399,0x890,8,0x220,
+0x1f02,0x3b9,0x1f0a,0x399,0x890,8,0x220,0x1f03,0x3b9,0x1f0b,0x399,0x890,8,0x220,0x1f04,0x3b9,
+0x1f0c,0x399,0x890,8,0x220,0x1f05,0x3b9,0x1f0d,0x399,0x890,8,0x220,0x1f06,0x3b9,0x1f0e,0x399,
+0x890,8,0x220,0x1f07,0x3b9,0x1f0f,0x399,0xc90,8,0x220,0x1f00,0x3b9,0x1f08,0x399,0xc90,8,
+0x220,0x1f01,0x3b9,0x1f09,0x399,0xc90,8,0x220,0x1f02,0x3b9,0x1f0a,0x399,0xc90,8,0x220,0x1f03,
+0x3b9,0x1f0b,0x399,0xc90,8,0x220,0x1f04,0x3b9,0x1f0c,0x399,0xc90,8,0x220,0x1f05,0x3b9,0x1f0d,
+0x399,0xc90,8,0x220,0x1f06,0x3b9,0x1f0e,0x399,0xc90,8,0x220,0x1f07,0x3b9,0x1f0f,0x399,0x890,
+8,0x220,0x1f20,0x3b9,0x1f28,0x399,0x890,8,0x220,0x1f21,0x3b9,0x1f29,0x399,0x890,8,0x220,
+0x1f22,0x3b9,0x1f2a,0x399,0x890,8,0x220,0x1f23,0x3b9,0x1f2b,0x399,0x890,8,0x220,0x1f24,0x3b9,
+0x1f2c,0x399,0x890,8,0x220,0x1f25,0x3b9,0x1f2d,0x399,0x890,8,0x220,0x1f26,0x3b9,0x1f2e,0x399,
+0x890,8,0x220,0x1f27,0x3b9,0x1f2f,0x399,0xc90,8,0x220,0x1f20,0x3b9,0x1f28,0x399,0xc90,8,
+0x220,0x1f21,0x3b9,0x1f29,0x399,0xc90,8,0x220,0x1f22,0x3b9,0x1f2a,0x399,0xc90,8,0x220,0x1f23,
+0x3b9,0x1f2b,0x399,0xc90,8,0x220,0x1f24,0x3b9,0x1f2c,0x399,0xc90,8,0x220,0x1f25,0x3b9,0x1f2d,
+0x399,0xc90,8,0x220,0x1f26,0x3b9,0x1f2e,0x399,0xc90,8,0x220,0x1f27,0x3b9,0x1f2f,0x399,0x890,
+8,0x220,0x1f60,0x3b9,0x1f68,0x399,0x890,8,0x220,0x1f61,0x3b9,0x1f69,0x399,0x890,8,0x220,
+0x1f62,0x3b9,0x1f6a,0x399,0x890,8,0x220,0x1f63,0x3b9,0x1f6b,0x399,0x890,8,0x220,0x1f64,0x3b9,
+0x1f6c,0x399,0x890,8,0x220,0x1f65,0x3b9,0x1f6d,0x399,0x890,8,0x220,0x1f66,0x3b9,0x1f6e,0x399,
+0x890,8,0x220,0x1f67,0x3b9,0x1f6f,0x399,0xc90,8,0x220,0x1f60,0x3b9,0x1f68,0x399,0xc90,8,
+0x220,0x1f61,0x3b9,0x1f69,0x399,0xc90,8,0x220,0x1f62,0x3b9,0x1f6a,0x399,0xc90,8,0x220,0x1f63,
+0x3b9,0x1f6b,0x399,0xc90,8,0x220,0x1f64,0x3b9,0x1f6c,0x399,0xc90,8,0x220,0x1f65,0x3b9,0x1f6d,
+0x399,0xc90,8,0x220,0x1f66,0x3b9,0x1f6e,0x399,0xc90,8,0x220,0x1f67,0x3b9,0x1f6f,0x399,0x880,
+0x2220,0x1f70,0x3b9,0x1fba,0x399,0x1fba,0x345,0x890,9,0x220,0x3b1,0x3b9,0x391,0x399,0x880,0x2220,
+0x3ac,0x3b9,0x386,0x399,0x386,0x345,0x880,0x2220,0x3b1,0x342,0x391,0x342,0x391,0x342,0x880,0x3330,
+0x3b1,0x342,0x3b9,0x391,0x342,0x399,0x391,0x342,0x345,0xc90,9,0x220,0x3b1,0x3b9,0x391,0x399,
+0x846,0x3b9,0x399,1,0x345,0x880,0x2220,0x1f74,0x3b9,0x1fca,0x399,0x1fca,0x345,0x890,9,0x220,
+0x3b7,0x3b9,0x397,0x399,0x880,0x2220,0x3ae,0x3b9,0x389,0x399,0x389,0x345,0x880,0x2220,0x3b7,0x342,
+0x397,0x342,0x397,0x342,0x880,0x3330,0x3b7,0x342,0x3b9,0x397,0x342,0x399,0x397,0x342,0x345,0xc90,
+9,0x220,0x3b7,0x3b9,0x397,0x399,0x880,0x3330,0x3b9,0x308,0x300,0x399,0x308,0x300,0x399,0x308,
+0x300,0x8c0,1,0x3330,0x3b9,0x308,0x301,0x399,0x308,0x301,0x399,0x308,0x301,0x390,0x880,0x2220,
+0x3b9,0x342,0x399,0x342,0x399,0x342,0x880,0x3330,0x3b9,0x308,0x342,0x399,0x308,0x342,0x399,0x308,
+0x342,0x880,0x3330,0x3c5,0x308,0x300,0x3a5,0x308,0x300,0x3a5,0x308,0x300,0x8c0,1,0x3330,0x3c5,
+0x308,0x301,0x3a5,0x308,0x301,0x3a5,0x308,0x301,0x3b0,0x880,0x2220,0x3c1,0x313,0x3a1,0x313,0x3a1,
+0x313,0x880,0x2220,0x3c5,0x342,0x3a5,0x342,0x3a5,0x342,0x880,0x3330,0x3c5,0x308,0x342,0x3a5,0x308,
+0x342,0x3a5,0x308,0x342,0x880,0x2220,0x1f7c,0x3b9,0x1ffa,0x399,0x1ffa,0x345,0x890,9,0x220,0x3c9,
+0x3b9,0x3a9,0x399,0x880,0x2220,0x3ce,0x3b9,0x38f,0x399,0x38f,0x345,0x880,0x2220,0x3c9,0x342,0x3a9,
+0x342,0x3a9,0x342,0x880,0x3330,0x3c9,0x342,0x3b9,0x3a9,0x342,0x399,0x3a9,0x342,0x345,0xc90,9,
+0x220,0x3c9,0x3b9,0x3a9,0x399,0xc50,0x1d5d,1,0x3a9,0xc50,0x20bf,1,0x4b,0xc50,0x2046,1,
+0xc5,0xc10,0x29f7,0xc10,0xee6,0xc10,0x29e7,0xc10,0x2a2b,0xc10,0x2a28,0xc10,0x2a1c,0xc10,0x29fd,0xc10,
+0x2a1f,0xc10,0x2a1e,0xc10,0x2a3f,0xc10,0x1c60,0x841,0xa64b,1,0x1c88,0x844,0xa64a,1,0x1c88,0xc10,
+0x8a04,0xc10,0xa528,0xc10,0xa544,0xc10,0xa54f,0xc10,0xa54b,0xc10,0xa541,0xc10,0xa512,0xc10,0xa52a,0xc10,
+0xa515,0x810,0x3a0,0xc10,0xa543,0xc10,0x8a38,0xc10,0x3a0,0x806,0x13a0,0x13a0,0x806,0x13a1,0x13a1,0x806,
+0x13a2,0x13a2,0x806,0x13a3,0x13a3,0x806,0x13a4,0x13a4,0x806,0x13a5,0x13a5,0x806,0x13a6,0x13a6,0x806,0x13a7,
+0x13a7,0x806,0x13a8,0x13a8,0x806,0x13a9,0x13a9,0x806,0x13aa,0x13aa,0x806,0x13ab,0x13ab,0x806,0x13ac,0x13ac,
+0x806,0x13ad,0x13ad,0x806,0x13ae,0x13ae,0x806,0x13af,0x13af,0x806,0x13b0,0x13b0,0x806,0x13b1,0x13b1,0x806,
+0x13b2,0x13b2,0x806,0x13b3,0x13b3,0x806,0x13b4,0x13b4,0x806,0x13b5,0x13b5,0x806,0x13b6,0x13b6,0x806,0x13b7,
+0x13b7,0x806,0x13b8,0x13b8,0x806,0x13b9,0x13b9,0x806,0x13ba,0x13ba,0x806,0x13bb,0x13bb,0x806,0x13bc,0x13bc,
+0x806,0x13bd,0x13bd,0x806,0x13be,0x13be,0x806,0x13bf,0x13bf,0x806,0x13c0,0x13c0,0x806,0x13c1,0x13c1,0x806,
+0x13c2,0x13c2,0x806,0x13c3,0x13c3,0x806,0x13c4,0x13c4,0x806,0x13c5,0x13c5,0x806,0x13c6,0x13c6,0x806,0x13c7,
+0x13c7,0x806,0x13c8,0x13c8,0x806,0x13c9,0x13c9,0x806,0x13ca,0x13ca,0x806,0x13cb,0x13cb,0x806,0x13cc,0x13cc,
+0x806,0x13cd,0x13cd,0x806,0x13ce,0x13ce,0x806,0x13cf,0x13cf,0x806,0x13d0,0x13d0,0x806,0x13d1,0x13d1,0x806,
+0x13d2,0x13d2,0x806,0x13d3,0x13d3,0x806,0x13d4,0x13d4,0x806,0x13d5,0x13d5,0x806,0x13d6,0x13d6,0x806,0x13d7,
+0x13d7,0x806,0x13d8,0x13d8,0x806,0x13d9,0x13d9,0x806,0x13da,0x13da,0x806,0x13db,0x13db,0x806,0x13dc,0x13dc,
+0x806,0x13dd,0x13dd,0x806,0x13de,0x13de,0x806,0x13df,0x13df,0x806,0x13e0,0x13e0,0x806,0x13e1,0x13e1,0x806,
+0x13e2,0x13e2,0x806,0x13e3,0x13e3,0x806,0x13e4,0x13e4,0x806,0x13e5,0x13e5,0x806,0x13e6,0x13e6,0x806,0x13e7,
+0x13e7,0x806,0x13e8,0x13e8,0x806,0x13e9,0x13e9,0x806,0x13ea,0x13ea,0x806,0x13eb,0x13eb,0x806,0x13ec,0x13ec,
+0x806,0x13ed,0x13ed,0x806,0x13ee,0x13ee,0x806,0x13ef,0x13ef,0x880,0x2220,0x66,0x66,0x46,0x46,0x46,
+0x66,0x880,0x2220,0x66,0x69,0x46,0x49,0x46,0x69,0x880,0x2220,0x66,0x6c,0x46,0x4c,0x46,
+0x6c,0x880,0x3330,0x66,0x66,0x69,0x46,0x46,0x49,0x46,0x66,0x69,0x880,0x3330,0x66,0x66,
+0x6c,0x46,0x46,0x4c,0x46,0x66,0x6c,0x8c0,1,0x2220,0x73,0x74,0x53,0x54,0x53,0x74,
+0xfb06,0x8c0,1,0x2220,0x73,0x74,0x53,0x54,0x53,0x74,0xfb05,0x880,0x2220,0x574,0x576,0x544,
+0x546,0x544,0x576,0x880,0x2220,0x574,0x565,0x544,0x535,0x544,0x565,0x880,0x2220,0x574,0x56b,0x544,
+0x53b,0x544,0x56b,0x880,0x2220,0x57e,0x576,0x54e,0x546,0x54e,0x576,0x880,0x2220,0x574,0x56d,0x544,
+0x53d,0x544,0x56d
+};
+
+static const uint16_t ucase_props_unfold[370]={
+0x49,5,3,0,0,0x61,0x2be,0,0x1e9a,0,0x66,0x66,0,0xfb00,0,0x66,
+0x66,0x69,0xfb03,0,0x66,0x66,0x6c,0xfb04,0,0x66,0x69,0,0xfb01,0,0x66,0x6c,
+0,0xfb02,0,0x68,0x331,0,0x1e96,0,0x69,0x307,0,0x130,0,0x6a,0x30c,0,
+0x1f0,0,0x73,0x73,0,0xdf,0x1e9e,0x73,0x74,0,0xfb05,0xfb06,0x74,0x308,0,0x1e97,
+0,0x77,0x30a,0,0x1e98,0,0x79,0x30a,0,0x1e99,0,0x2bc,0x6e,0,0x149,0,
+0x3ac,0x3b9,0,0x1fb4,0,0x3ae,0x3b9,0,0x1fc4,0,0x3b1,0x342,0,0x1fb6,0,0x3b1,
+0x342,0x3b9,0x1fb7,0,0x3b1,0x3b9,0,0x1fb3,0x1fbc,0x3b7,0x342,0,0x1fc6,0,0x3b7,0x342,
+0x3b9,0x1fc7,0,0x3b7,0x3b9,0,0x1fc3,0x1fcc,0x3b9,0x308,0x300,0x1fd2,0,0x3b9,0x308,0x301,
+0x390,0x1fd3,0x3b9,0x308,0x342,0x1fd7,0,0x3b9,0x342,0,0x1fd6,0,0x3c1,0x313,0,0x1fe4,
+0,0x3c5,0x308,0x300,0x1fe2,0,0x3c5,0x308,0x301,0x3b0,0x1fe3,0x3c5,0x308,0x342,0x1fe7,0,
+0x3c5,0x313,0,0x1f50,0,0x3c5,0x313,0x300,0x1f52,0,0x3c5,0x313,0x301,0x1f54,0,0x3c5,
+0x313,0x342,0x1f56,0,0x3c5,0x342,0,0x1fe6,0,0x3c9,0x342,0,0x1ff6,0,0x3c9,0x342,
+0x3b9,0x1ff7,0,0x3c9,0x3b9,0,0x1ff3,0x1ffc,0x3ce,0x3b9,0,0x1ff4,0,0x565,0x582,0,
+0x587,0,0x574,0x565,0,0xfb14,0,0x574,0x56b,0,0xfb15,0,0x574,0x56d,0,0xfb17,
+0,0x574,0x576,0,0xfb13,0,0x57e,0x576,0,0xfb16,0,0x1f00,0x3b9,0,0x1f80,0x1f88,
+0x1f01,0x3b9,0,0x1f81,0x1f89,0x1f02,0x3b9,0,0x1f82,0x1f8a,0x1f03,0x3b9,0,0x1f83,0x1f8b,0x1f04,
+0x3b9,0,0x1f84,0x1f8c,0x1f05,0x3b9,0,0x1f85,0x1f8d,0x1f06,0x3b9,0,0x1f86,0x1f8e,0x1f07,0x3b9,
+0,0x1f87,0x1f8f,0x1f20,0x3b9,0,0x1f90,0x1f98,0x1f21,0x3b9,0,0x1f91,0x1f99,0x1f22,0x3b9,0,
+0x1f92,0x1f9a,0x1f23,0x3b9,0,0x1f93,0x1f9b,0x1f24,0x3b9,0,0x1f94,0x1f9c,0x1f25,0x3b9,0,0x1f95,
+0x1f9d,0x1f26,0x3b9,0,0x1f96,0x1f9e,0x1f27,0x3b9,0,0x1f97,0x1f9f,0x1f60,0x3b9,0,0x1fa0,0x1fa8,
+0x1f61,0x3b9,0,0x1fa1,0x1fa9,0x1f62,0x3b9,0,0x1fa2,0x1faa,0x1f63,0x3b9,0,0x1fa3,0x1fab,0x1f64,
+0x3b9,0,0x1fa4,0x1fac,0x1f65,0x3b9,0,0x1fa5,0x1fad,0x1f66,0x3b9,0,0x1fa6,0x1fae,0x1f67,0x3b9,
+0,0x1fa7,0x1faf,0x1f70,0x3b9,0,0x1fb2,0,0x1f74,0x3b9,0,0x1fc2,0,0x1f7c,0x3b9,0,
+0x1ff2,0
+};
+
+static const UCaseProps ucase_props_singleton={
+ NULL,
+ ucase_props_indexes,
+ ucase_props_exceptions,
+ ucase_props_unfold,
+ {
+ ucase_props_trieIndex,
+ ucase_props_trieIndex+3288,
+ NULL,
+ 3288,
+ 9068,
+ 0x188,
+ 0xd54,
+ 0x0,
+ 0x0,
+ 0xe0800,
+ 0x3040,
+ NULL, 0, FALSE, FALSE, 0, NULL
+ },
+ { 4,0,0,0 }
+};
+
+#endif // INCLUDED_FROM_UCASE_CPP
diff --git a/thirdparty/icu4c/common/ucasemap.cpp b/thirdparty/icu4c/common/ucasemap.cpp
new file mode 100644
index 0000000000..ed72bda828
--- /dev/null
+++ b/thirdparty/icu4c/common/ucasemap.cpp
@@ -0,0 +1,953 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2005-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: ucasemap.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2005may06
+* created by: Markus W. Scherer
+*
+* Case mapping service object and functions using it.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/brkiter.h"
+#include "unicode/bytestream.h"
+#include "unicode/casemap.h"
+#include "unicode/edits.h"
+#include "unicode/stringoptions.h"
+#include "unicode/stringpiece.h"
+#include "unicode/ubrk.h"
+#include "unicode/uloc.h"
+#include "unicode/ustring.h"
+#include "unicode/ucasemap.h"
+#if !UCONFIG_NO_BREAK_ITERATION
+#include "unicode/utext.h"
+#endif
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+#include "bytesinkutil.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "uassert.h"
+#include "ucase.h"
+#include "ucasemap_imp.h"
+#include "ustr_imp.h"
+
+U_NAMESPACE_USE
+
+/* UCaseMap service object -------------------------------------------------- */
+
+UCaseMap::UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode) :
+#if !UCONFIG_NO_BREAK_ITERATION
+ iter(NULL),
+#endif
+ caseLocale(UCASE_LOC_UNKNOWN), options(opts) {
+ ucasemap_setLocale(this, localeID, pErrorCode);
+}
+
+UCaseMap::~UCaseMap() {
+#if !UCONFIG_NO_BREAK_ITERATION
+ delete iter;
+#endif
+}
+
+U_CAPI UCaseMap * U_EXPORT2
+ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) {
+ return NULL;
+ }
+ UCaseMap *csm = new UCaseMap(locale, options, pErrorCode);
+ if(csm==NULL) {
+ *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ } else if (U_FAILURE(*pErrorCode)) {
+ delete csm;
+ return NULL;
+ }
+ return csm;
+}
+
+U_CAPI void U_EXPORT2
+ucasemap_close(UCaseMap *csm) {
+ delete csm;
+}
+
+U_CAPI const char * U_EXPORT2
+ucasemap_getLocale(const UCaseMap *csm) {
+ return csm->locale;
+}
+
+U_CAPI uint32_t U_EXPORT2
+ucasemap_getOptions(const UCaseMap *csm) {
+ return csm->options;
+}
+
+U_CAPI void U_EXPORT2
+ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) {
+ return;
+ }
+ if (locale != NULL && *locale == 0) {
+ csm->locale[0] = 0;
+ csm->caseLocale = UCASE_LOC_ROOT;
+ return;
+ }
+
+ int32_t length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode);
+ if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || length==sizeof(csm->locale)) {
+ *pErrorCode=U_ZERO_ERROR;
+ /* we only really need the language code for case mappings */
+ length=uloc_getLanguage(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode);
+ }
+ if(length==sizeof(csm->locale)) {
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+ if(U_SUCCESS(*pErrorCode)) {
+ csm->caseLocale=UCASE_LOC_UNKNOWN;
+ csm->caseLocale = ucase_getCaseLocale(csm->locale);
+ } else {
+ csm->locale[0]=0;
+ csm->caseLocale = UCASE_LOC_ROOT;
+ }
+}
+
+U_CAPI void U_EXPORT2
+ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) {
+ return;
+ }
+ csm->options=options;
+}
+
+/* UTF-8 string case mappings ----------------------------------------------- */
+
+/* TODO(markus): Move to a new, separate utf8case.cpp file. */
+
+namespace {
+
+/* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
+inline UBool
+appendResult(int32_t cpLength, int32_t result, const UChar *s,
+ ByteSink &sink, uint32_t options, icu::Edits *edits, UErrorCode &errorCode) {
+ U_ASSERT(U_SUCCESS(errorCode));
+
+ /* decode the result */
+ if(result<0) {
+ /* (not) original code point */
+ if(edits!=NULL) {
+ edits->addUnchanged(cpLength);
+ }
+ if((options & U_OMIT_UNCHANGED_TEXT) == 0) {
+ ByteSinkUtil::appendCodePoint(cpLength, ~result, sink);
+ }
+ } else {
+ if(result<=UCASE_MAX_STRING_LENGTH) {
+ // string: "result" is the UTF-16 length
+ return ByteSinkUtil::appendChange(cpLength, s, result, sink, edits, errorCode);
+ } else {
+ ByteSinkUtil::appendCodePoint(cpLength, result, sink, edits);
+ }
+ }
+ return TRUE;
+}
+
+// See unicode/utf8.h U8_APPEND_UNSAFE().
+inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
+inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
+
+UChar32 U_CALLCONV
+utf8_caseContextIterator(void *context, int8_t dir) {
+ UCaseContext *csc=(UCaseContext *)context;
+ UChar32 c;
+
+ if(dir<0) {
+ /* reset for backward iteration */
+ csc->index=csc->cpStart;
+ csc->dir=dir;
+ } else if(dir>0) {
+ /* reset for forward iteration */
+ csc->index=csc->cpLimit;
+ csc->dir=dir;
+ } else {
+ /* continue current iteration direction */
+ dir=csc->dir;
+ }
+
+ if(dir<0) {
+ if(csc->start<csc->index) {
+ U8_PREV((const uint8_t *)csc->p, csc->start, csc->index, c);
+ return c;
+ }
+ } else {
+ if(csc->index<csc->limit) {
+ U8_NEXT((const uint8_t *)csc->p, csc->index, csc->limit, c);
+ return c;
+ }
+ }
+ return U_SENTINEL;
+}
+
+/**
+ * caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account.
+ * caseLocale < 0: Case-folds [srcStart..srcLimit[.
+ */
+void toLower(int32_t caseLocale, uint32_t options,
+ const uint8_t *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit,
+ icu::ByteSink &sink, icu::Edits *edits, UErrorCode &errorCode) {
+ const int8_t *latinToLower;
+ if (caseLocale == UCASE_LOC_ROOT ||
+ (caseLocale >= 0 ?
+ !(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) :
+ (options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) {
+ latinToLower = LatinCase::TO_LOWER_NORMAL;
+ } else {
+ latinToLower = LatinCase::TO_LOWER_TR_LT;
+ }
+ const UTrie2 *trie = ucase_getTrie();
+ int32_t prev = srcStart;
+ int32_t srcIndex = srcStart;
+ for (;;) {
+ // fast path for simple cases
+ int32_t cpStart;
+ UChar32 c;
+ for (;;) {
+ if (U_FAILURE(errorCode) || srcIndex >= srcLimit) {
+ c = U_SENTINEL;
+ break;
+ }
+ uint8_t lead = src[srcIndex++];
+ if (lead <= 0x7f) {
+ int8_t d = latinToLower[lead];
+ if (d == LatinCase::EXC) {
+ cpStart = srcIndex - 1;
+ c = lead;
+ break;
+ }
+ if (d == 0) { continue; }
+ ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 1 - prev,
+ sink, options, edits, errorCode);
+ char ascii = (char)(lead + d);
+ sink.Append(&ascii, 1);
+ if (edits != nullptr) {
+ edits->addReplace(1, 1);
+ }
+ prev = srcIndex;
+ continue;
+ } else if (lead < 0xe3) {
+ uint8_t t;
+ if (0xc2 <= lead && lead <= 0xc5 && srcIndex < srcLimit &&
+ (t = src[srcIndex] - 0x80) <= 0x3f) {
+ // U+0080..U+017F
+ ++srcIndex;
+ c = ((lead - 0xc0) << 6) | t;
+ int8_t d = latinToLower[c];
+ if (d == LatinCase::EXC) {
+ cpStart = srcIndex - 2;
+ break;
+ }
+ if (d == 0) { continue; }
+ ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 2 - prev,
+ sink, options, edits, errorCode);
+ ByteSinkUtil::appendTwoBytes(c + d, sink);
+ if (edits != nullptr) {
+ edits->addReplace(2, 2);
+ }
+ prev = srcIndex;
+ continue;
+ }
+ } else if ((lead <= 0xe9 || lead == 0xeb || lead == 0xec) &&
+ (srcIndex + 2) <= srcLimit &&
+ U8_IS_TRAIL(src[srcIndex]) && U8_IS_TRAIL(src[srcIndex + 1])) {
+ // most of CJK: no case mappings
+ srcIndex += 2;
+ continue;
+ }
+ cpStart = --srcIndex;
+ U8_NEXT(src, srcIndex, srcLimit, c);
+ if (c < 0) {
+ // ill-formed UTF-8
+ continue;
+ }
+ uint16_t props = UTRIE2_GET16(trie, c);
+ if (UCASE_HAS_EXCEPTION(props)) { break; }
+ int32_t delta;
+ if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) {
+ continue;
+ }
+ ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
+ sink, options, edits, errorCode);
+ ByteSinkUtil::appendCodePoint(srcIndex - cpStart, c + delta, sink, edits);
+ prev = srcIndex;
+ }
+ if (c < 0) {
+ break;
+ }
+ // slow path
+ const UChar *s;
+ if (caseLocale >= 0) {
+ csc->cpStart = cpStart;
+ csc->cpLimit = srcIndex;
+ c = ucase_toFullLower(c, utf8_caseContextIterator, csc, &s, caseLocale);
+ } else {
+ c = ucase_toFullFolding(c, &s, options);
+ }
+ if (c >= 0) {
+ ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
+ sink, options, edits, errorCode);
+ appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
+ prev = srcIndex;
+ }
+ }
+ ByteSinkUtil::appendUnchanged(src + prev, srcIndex - prev,
+ sink, options, edits, errorCode);
+}
+
+void toUpper(int32_t caseLocale, uint32_t options,
+ const uint8_t *src, UCaseContext *csc, int32_t srcLength,
+ icu::ByteSink &sink, icu::Edits *edits, UErrorCode &errorCode) {
+ const int8_t *latinToUpper;
+ if (caseLocale == UCASE_LOC_TURKISH) {
+ latinToUpper = LatinCase::TO_UPPER_TR;
+ } else {
+ latinToUpper = LatinCase::TO_UPPER_NORMAL;
+ }
+ const UTrie2 *trie = ucase_getTrie();
+ int32_t prev = 0;
+ int32_t srcIndex = 0;
+ for (;;) {
+ // fast path for simple cases
+ int32_t cpStart;
+ UChar32 c;
+ for (;;) {
+ if (U_FAILURE(errorCode) || srcIndex >= srcLength) {
+ c = U_SENTINEL;
+ break;
+ }
+ uint8_t lead = src[srcIndex++];
+ if (lead <= 0x7f) {
+ int8_t d = latinToUpper[lead];
+ if (d == LatinCase::EXC) {
+ cpStart = srcIndex - 1;
+ c = lead;
+ break;
+ }
+ if (d == 0) { continue; }
+ ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 1 - prev,
+ sink, options, edits, errorCode);
+ char ascii = (char)(lead + d);
+ sink.Append(&ascii, 1);
+ if (edits != nullptr) {
+ edits->addReplace(1, 1);
+ }
+ prev = srcIndex;
+ continue;
+ } else if (lead < 0xe3) {
+ uint8_t t;
+ if (0xc2 <= lead && lead <= 0xc5 && srcIndex < srcLength &&
+ (t = src[srcIndex] - 0x80) <= 0x3f) {
+ // U+0080..U+017F
+ ++srcIndex;
+ c = ((lead - 0xc0) << 6) | t;
+ int8_t d = latinToUpper[c];
+ if (d == LatinCase::EXC) {
+ cpStart = srcIndex - 2;
+ break;
+ }
+ if (d == 0) { continue; }
+ ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 2 - prev,
+ sink, options, edits, errorCode);
+ ByteSinkUtil::appendTwoBytes(c + d, sink);
+ if (edits != nullptr) {
+ edits->addReplace(2, 2);
+ }
+ prev = srcIndex;
+ continue;
+ }
+ } else if ((lead <= 0xe9 || lead == 0xeb || lead == 0xec) &&
+ (srcIndex + 2) <= srcLength &&
+ U8_IS_TRAIL(src[srcIndex]) && U8_IS_TRAIL(src[srcIndex + 1])) {
+ // most of CJK: no case mappings
+ srcIndex += 2;
+ continue;
+ }
+ cpStart = --srcIndex;
+ U8_NEXT(src, srcIndex, srcLength, c);
+ if (c < 0) {
+ // ill-formed UTF-8
+ continue;
+ }
+ uint16_t props = UTRIE2_GET16(trie, c);
+ if (UCASE_HAS_EXCEPTION(props)) { break; }
+ int32_t delta;
+ if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) {
+ continue;
+ }
+ ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
+ sink, options, edits, errorCode);
+ ByteSinkUtil::appendCodePoint(srcIndex - cpStart, c + delta, sink, edits);
+ prev = srcIndex;
+ }
+ if (c < 0) {
+ break;
+ }
+ // slow path
+ csc->cpStart = cpStart;
+ csc->cpLimit = srcIndex;
+ const UChar *s;
+ c = ucase_toFullUpper(c, utf8_caseContextIterator, csc, &s, caseLocale);
+ if (c >= 0) {
+ ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
+ sink, options, edits, errorCode);
+ appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
+ prev = srcIndex;
+ }
+ }
+ ByteSinkUtil::appendUnchanged(src + prev, srcIndex - prev,
+ sink, options, edits, errorCode);
+}
+
+} // namespace
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+U_CFUNC void U_CALLCONV
+ucasemap_internalUTF8ToTitle(
+ int32_t caseLocale, uint32_t options, BreakIterator *iter,
+ const uint8_t *src, int32_t srcLength,
+ ByteSink &sink, icu::Edits *edits,
+ UErrorCode &errorCode) {
+ if (!ustrcase_checkTitleAdjustmentOptions(options, errorCode)) {
+ return;
+ }
+
+ /* set up local variables */
+ UCaseContext csc=UCASECONTEXT_INITIALIZER;
+ csc.p=(void *)src;
+ csc.limit=srcLength;
+ int32_t prev=0;
+ UBool isFirstIndex=TRUE;
+
+ /* titlecasing loop */
+ while(prev<srcLength) {
+ /* find next index where to titlecase */
+ int32_t index;
+ if(isFirstIndex) {
+ isFirstIndex=FALSE;
+ index=iter->first();
+ } else {
+ index=iter->next();
+ }
+ if(index==UBRK_DONE || index>srcLength) {
+ index=srcLength;
+ }
+
+ /*
+ * Segment [prev..index[ into 3 parts:
+ * a) skipped characters (copy as-is) [prev..titleStart[
+ * b) first letter (titlecase) [titleStart..titleLimit[
+ * c) subsequent characters (lowercase) [titleLimit..index[
+ */
+ if(prev<index) {
+ /* find and copy skipped characters [prev..titleStart[ */
+ int32_t titleStart=prev;
+ int32_t titleLimit=prev;
+ UChar32 c;
+ U8_NEXT(src, titleLimit, index, c);
+ if ((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0) {
+ // Adjust the titlecasing index to the next cased character,
+ // or to the next letter/number/symbol/private use.
+ // Stop with titleStart<titleLimit<=index
+ // if there is a character to be titlecased,
+ // or else stop with titleStart==titleLimit==index.
+ UBool toCased = (options&U_TITLECASE_ADJUST_TO_CASED) != 0;
+ while (toCased ? UCASE_NONE==ucase_getType(c) : !ustrcase_isLNS(c)) {
+ titleStart=titleLimit;
+ if(titleLimit==index) {
+ break;
+ }
+ U8_NEXT(src, titleLimit, index, c);
+ }
+ if (prev < titleStart) {
+ if (!ByteSinkUtil::appendUnchanged(src+prev, titleStart-prev,
+ sink, options, edits, errorCode)) {
+ return;
+ }
+ }
+ }
+
+ if(titleStart<titleLimit) {
+ /* titlecase c which is from [titleStart..titleLimit[ */
+ if(c>=0) {
+ csc.cpStart=titleStart;
+ csc.cpLimit=titleLimit;
+ const UChar *s;
+ c=ucase_toFullTitle(c, utf8_caseContextIterator, &csc, &s, caseLocale);
+ if (!appendResult(titleLimit-titleStart, c, s, sink, options, edits, errorCode)) {
+ return;
+ }
+ } else {
+ // Malformed UTF-8.
+ if (!ByteSinkUtil::appendUnchanged(src+titleStart, titleLimit-titleStart,
+ sink, options, edits, errorCode)) {
+ return;
+ }
+ }
+
+ /* Special case Dutch IJ titlecasing */
+ if (titleStart+1 < index &&
+ caseLocale == UCASE_LOC_DUTCH &&
+ (src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) {
+ if (src[titleStart+1] == 0x006A) {
+ ByteSinkUtil::appendCodePoint(1, 0x004A, sink, edits);
+ titleLimit++;
+ } else if (src[titleStart+1] == 0x004A) {
+ // Keep the capital J from getting lowercased.
+ if (!ByteSinkUtil::appendUnchanged(src+titleStart+1, 1,
+ sink, options, edits, errorCode)) {
+ return;
+ }
+ titleLimit++;
+ }
+ }
+
+ /* lowercase [titleLimit..index[ */
+ if(titleLimit<index) {
+ if((options&U_TITLECASE_NO_LOWERCASE)==0) {
+ /* Normal operation: Lowercase the rest of the word. */
+ toLower(caseLocale, options,
+ src, &csc, titleLimit, index,
+ sink, edits, errorCode);
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ } else {
+ /* Optionally just copy the rest of the word unchanged. */
+ if (!ByteSinkUtil::appendUnchanged(src+titleLimit, index-titleLimit,
+ sink, options, edits, errorCode)) {
+ return;
+ }
+ }
+ }
+ }
+ }
+
+ prev=index;
+ }
+}
+
+#endif
+
+U_NAMESPACE_BEGIN
+namespace GreekUpper {
+
+UBool isFollowedByCasedLetter(const uint8_t *s, int32_t i, int32_t length) {
+ while (i < length) {
+ UChar32 c;
+ U8_NEXT(s, i, length, c);
+ int32_t type = ucase_getTypeOrIgnorable(c);
+ if ((type & UCASE_IGNORABLE) != 0) {
+ // Case-ignorable, continue with the loop.
+ } else if (type != UCASE_NONE) {
+ return TRUE; // Followed by cased letter.
+ } else {
+ return FALSE; // Uncased and not case-ignorable.
+ }
+ }
+ return FALSE; // Not followed by cased letter.
+}
+
+// Keep this consistent with the UTF-16 version in ustrcase.cpp and the Java version in CaseMap.java.
+void toUpper(uint32_t options,
+ const uint8_t *src, int32_t srcLength,
+ ByteSink &sink, Edits *edits,
+ UErrorCode &errorCode) {
+ uint32_t state = 0;
+ for (int32_t i = 0; i < srcLength;) {
+ int32_t nextIndex = i;
+ UChar32 c;
+ U8_NEXT(src, nextIndex, srcLength, c);
+ uint32_t nextState = 0;
+ int32_t type = ucase_getTypeOrIgnorable(c);
+ if ((type & UCASE_IGNORABLE) != 0) {
+ // c is case-ignorable
+ nextState |= (state & AFTER_CASED);
+ } else if (type != UCASE_NONE) {
+ // c is cased
+ nextState |= AFTER_CASED;
+ }
+ uint32_t data = getLetterData(c);
+ if (data > 0) {
+ uint32_t upper = data & UPPER_MASK;
+ // Add a dialytika to this iota or ypsilon vowel
+ // if we removed a tonos from the previous vowel,
+ // and that previous vowel did not also have (or gain) a dialytika.
+ // Adding one only to the final vowel in a longer sequence
+ // (which does not occur in normal writing) would require lookahead.
+ // Set the same flag as for preserving an existing dialytika.
+ if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 &&
+ (upper == 0x399 || upper == 0x3A5)) {
+ data |= HAS_DIALYTIKA;
+ }
+ int32_t numYpogegrammeni = 0; // Map each one to a trailing, spacing, capital iota.
+ if ((data & HAS_YPOGEGRAMMENI) != 0) {
+ numYpogegrammeni = 1;
+ }
+ // Skip combining diacritics after this Greek letter.
+ int32_t nextNextIndex = nextIndex;
+ while (nextIndex < srcLength) {
+ UChar32 c2;
+ U8_NEXT(src, nextNextIndex, srcLength, c2);
+ uint32_t diacriticData = getDiacriticData(c2);
+ if (diacriticData != 0) {
+ data |= diacriticData;
+ if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) {
+ ++numYpogegrammeni;
+ }
+ nextIndex = nextNextIndex;
+ } else {
+ break; // not a Greek diacritic
+ }
+ }
+ if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) {
+ nextState |= AFTER_VOWEL_WITH_ACCENT;
+ }
+ // Map according to Greek rules.
+ UBool addTonos = FALSE;
+ if (upper == 0x397 &&
+ (data & HAS_ACCENT) != 0 &&
+ numYpogegrammeni == 0 &&
+ (state & AFTER_CASED) == 0 &&
+ !isFollowedByCasedLetter(src, nextIndex, srcLength)) {
+ // Keep disjunctive "or" with (only) a tonos.
+ // We use the same "word boundary" conditions as for the Final_Sigma test.
+ if (i == nextIndex) {
+ upper = 0x389; // Preserve the precomposed form.
+ } else {
+ addTonos = TRUE;
+ }
+ } else if ((data & HAS_DIALYTIKA) != 0) {
+ // Preserve a vowel with dialytika in precomposed form if it exists.
+ if (upper == 0x399) {
+ upper = 0x3AA;
+ data &= ~HAS_EITHER_DIALYTIKA;
+ } else if (upper == 0x3A5) {
+ upper = 0x3AB;
+ data &= ~HAS_EITHER_DIALYTIKA;
+ }
+ }
+
+ UBool change;
+ if (edits == nullptr && (options & U_OMIT_UNCHANGED_TEXT) == 0) {
+ change = TRUE; // common, simple usage
+ } else {
+ // Find out first whether we are changing the text.
+ U_ASSERT(0x370 <= upper && upper <= 0x3ff); // 2-byte UTF-8, main Greek block
+ change = (i + 2) > nextIndex ||
+ src[i] != getTwoByteLead(upper) || src[i + 1] != getTwoByteTrail(upper) ||
+ numYpogegrammeni > 0;
+ int32_t i2 = i + 2;
+ if ((data & HAS_EITHER_DIALYTIKA) != 0) {
+ change |= (i2 + 2) > nextIndex ||
+ src[i2] != (uint8_t)u8"\u0308"[0] ||
+ src[i2 + 1] != (uint8_t)u8"\u0308"[1];
+ i2 += 2;
+ }
+ if (addTonos) {
+ change |= (i2 + 2) > nextIndex ||
+ src[i2] != (uint8_t)u8"\u0301"[0] ||
+ src[i2 + 1] != (uint8_t)u8"\u0301"[1];
+ i2 += 2;
+ }
+ int32_t oldLength = nextIndex - i;
+ int32_t newLength = (i2 - i) + numYpogegrammeni * 2; // 2 bytes per U+0399
+ change |= oldLength != newLength;
+ if (change) {
+ if (edits != NULL) {
+ edits->addReplace(oldLength, newLength);
+ }
+ } else {
+ if (edits != NULL) {
+ edits->addUnchanged(oldLength);
+ }
+ // Write unchanged text?
+ change = (options & U_OMIT_UNCHANGED_TEXT) == 0;
+ }
+ }
+
+ if (change) {
+ ByteSinkUtil::appendTwoBytes(upper, sink);
+ if ((data & HAS_EITHER_DIALYTIKA) != 0) {
+ sink.AppendU8(u8"\u0308", 2); // restore or add a dialytika
+ }
+ if (addTonos) {
+ sink.AppendU8(u8"\u0301", 2);
+ }
+ while (numYpogegrammeni > 0) {
+ sink.AppendU8(u8"\u0399", 2);
+ --numYpogegrammeni;
+ }
+ }
+ } else if(c>=0) {
+ const UChar *s;
+ c=ucase_toFullUpper(c, NULL, NULL, &s, UCASE_LOC_GREEK);
+ if (!appendResult(nextIndex - i, c, s, sink, options, edits, errorCode)) {
+ return;
+ }
+ } else {
+ // Malformed UTF-8.
+ if (!ByteSinkUtil::appendUnchanged(src+i, nextIndex-i,
+ sink, options, edits, errorCode)) {
+ return;
+ }
+ }
+ i = nextIndex;
+ state = nextState;
+ }
+}
+
+} // namespace GreekUpper
+U_NAMESPACE_END
+
+static void U_CALLCONV
+ucasemap_internalUTF8ToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
+ const uint8_t *src, int32_t srcLength,
+ icu::ByteSink &sink, icu::Edits *edits,
+ UErrorCode &errorCode) {
+ UCaseContext csc=UCASECONTEXT_INITIALIZER;
+ csc.p=(void *)src;
+ csc.limit=srcLength;
+ toLower(
+ caseLocale, options,
+ src, &csc, 0, srcLength,
+ sink, edits, errorCode);
+}
+
+static void U_CALLCONV
+ucasemap_internalUTF8ToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
+ const uint8_t *src, int32_t srcLength,
+ icu::ByteSink &sink, icu::Edits *edits,
+ UErrorCode &errorCode) {
+ if (caseLocale == UCASE_LOC_GREEK) {
+ GreekUpper::toUpper(options, src, srcLength, sink, edits, errorCode);
+ } else {
+ UCaseContext csc=UCASECONTEXT_INITIALIZER;
+ csc.p=(void *)src;
+ csc.limit=srcLength;
+ toUpper(
+ caseLocale, options,
+ src, &csc, srcLength,
+ sink, edits, errorCode);
+ }
+}
+
+static void U_CALLCONV
+ucasemap_internalUTF8Fold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
+ const uint8_t *src, int32_t srcLength,
+ icu::ByteSink &sink, icu::Edits *edits,
+ UErrorCode &errorCode) {
+ toLower(
+ -1, options,
+ src, nullptr, 0, srcLength,
+ sink, edits, errorCode);
+}
+
+void
+ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
+ const char *src, int32_t srcLength,
+ UTF8CaseMapper *stringCaseMapper,
+ icu::ByteSink &sink, icu::Edits *edits,
+ UErrorCode &errorCode) {
+ /* check argument values */
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
+ if ((src == nullptr && srcLength != 0) || srcLength < -1) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ // Get the string length.
+ if (srcLength == -1) {
+ srcLength = (int32_t)uprv_strlen((const char *)src);
+ }
+
+ if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
+ edits->reset();
+ }
+ stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
+ (const uint8_t *)src, srcLength, sink, edits, errorCode);
+ sink.Flush();
+ if (U_SUCCESS(errorCode)) {
+ if (edits != nullptr) {
+ edits->copyErrorTo(errorCode);
+ }
+ }
+}
+
+int32_t
+ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
+ char *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UTF8CaseMapper *stringCaseMapper,
+ icu::Edits *edits,
+ UErrorCode &errorCode) {
+ /* check argument values */
+ if(U_FAILURE(errorCode)) {
+ return 0;
+ }
+ if( destCapacity<0 ||
+ (dest==NULL && destCapacity>0) ||
+ (src==NULL && srcLength!=0) || srcLength<-1
+ ) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* get the string length */
+ if(srcLength==-1) {
+ srcLength=(int32_t)uprv_strlen((const char *)src);
+ }
+
+ /* check for overlapping source and destination */
+ if( dest!=NULL &&
+ ((src>=dest && src<(dest+destCapacity)) ||
+ (dest>=src && dest<(src+srcLength)))
+ ) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ CheckedArrayByteSink sink(dest, destCapacity);
+ if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
+ edits->reset();
+ }
+ stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
+ (const uint8_t *)src, srcLength, sink, edits, errorCode);
+ sink.Flush();
+ if (U_SUCCESS(errorCode)) {
+ if (sink.Overflowed()) {
+ errorCode = U_BUFFER_OVERFLOW_ERROR;
+ } else if (edits != nullptr) {
+ edits->copyErrorTo(errorCode);
+ }
+ }
+ return u_terminateChars(dest, destCapacity, sink.NumberOfBytesAppended(), &errorCode);
+}
+
+/* public API functions */
+
+U_CAPI int32_t U_EXPORT2
+ucasemap_utf8ToLower(const UCaseMap *csm,
+ char *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UErrorCode *pErrorCode) {
+ return ucasemap_mapUTF8(
+ csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
+ dest, destCapacity,
+ src, srcLength,
+ ucasemap_internalUTF8ToLower, NULL, *pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+ucasemap_utf8ToUpper(const UCaseMap *csm,
+ char *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UErrorCode *pErrorCode) {
+ return ucasemap_mapUTF8(
+ csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
+ dest, destCapacity,
+ src, srcLength,
+ ucasemap_internalUTF8ToUpper, NULL, *pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+ucasemap_utf8FoldCase(const UCaseMap *csm,
+ char *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UErrorCode *pErrorCode) {
+ return ucasemap_mapUTF8(
+ UCASE_LOC_ROOT, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
+ dest, destCapacity,
+ src, srcLength,
+ ucasemap_internalUTF8Fold, NULL, *pErrorCode);
+}
+
+U_NAMESPACE_BEGIN
+
+void CaseMap::utf8ToLower(
+ const char *locale, uint32_t options,
+ StringPiece src, ByteSink &sink, Edits *edits,
+ UErrorCode &errorCode) {
+ ucasemap_mapUTF8(
+ ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
+ src.data(), src.length(),
+ ucasemap_internalUTF8ToLower, sink, edits, errorCode);
+}
+
+void CaseMap::utf8ToUpper(
+ const char *locale, uint32_t options,
+ StringPiece src, ByteSink &sink, Edits *edits,
+ UErrorCode &errorCode) {
+ ucasemap_mapUTF8(
+ ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
+ src.data(), src.length(),
+ ucasemap_internalUTF8ToUpper, sink, edits, errorCode);
+}
+
+void CaseMap::utf8Fold(
+ uint32_t options,
+ StringPiece src, ByteSink &sink, Edits *edits,
+ UErrorCode &errorCode) {
+ ucasemap_mapUTF8(
+ UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
+ src.data(), src.length(),
+ ucasemap_internalUTF8Fold, sink, edits, errorCode);
+}
+
+int32_t CaseMap::utf8ToLower(
+ const char *locale, uint32_t options,
+ const char *src, int32_t srcLength,
+ char *dest, int32_t destCapacity, Edits *edits,
+ UErrorCode &errorCode) {
+ return ucasemap_mapUTF8(
+ ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
+ dest, destCapacity,
+ src, srcLength,
+ ucasemap_internalUTF8ToLower, edits, errorCode);
+}
+
+int32_t CaseMap::utf8ToUpper(
+ const char *locale, uint32_t options,
+ const char *src, int32_t srcLength,
+ char *dest, int32_t destCapacity, Edits *edits,
+ UErrorCode &errorCode) {
+ return ucasemap_mapUTF8(
+ ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
+ dest, destCapacity,
+ src, srcLength,
+ ucasemap_internalUTF8ToUpper, edits, errorCode);
+}
+
+int32_t CaseMap::utf8Fold(
+ uint32_t options,
+ const char *src, int32_t srcLength,
+ char *dest, int32_t destCapacity, Edits *edits,
+ UErrorCode &errorCode) {
+ return ucasemap_mapUTF8(
+ UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
+ dest, destCapacity,
+ src, srcLength,
+ ucasemap_internalUTF8Fold, edits, errorCode);
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/ucasemap_imp.h b/thirdparty/icu4c/common/ucasemap_imp.h
new file mode 100644
index 0000000000..e17a0ae5a3
--- /dev/null
+++ b/thirdparty/icu4c/common/ucasemap_imp.h
@@ -0,0 +1,282 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// ucasemap_imp.h
+// created: 2017feb08 Markus W. Scherer
+
+#ifndef __UCASEMAP_IMP_H__
+#define __UCASEMAP_IMP_H__
+
+#include "unicode/utypes.h"
+#include "unicode/ucasemap.h"
+#include "unicode/uchar.h"
+#include "ucase.h"
+
+/**
+ * Bit mask for the titlecasing iterator options bit field.
+ * Currently only 3 out of 8 values are used:
+ * 0 (words), U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
+ * See stringoptions.h.
+ * @internal
+ */
+#define U_TITLECASE_ITERATOR_MASK 0xe0
+
+/**
+ * Bit mask for the titlecasing index adjustment options bit set.
+ * Currently two bits are defined:
+ * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED.
+ * See stringoptions.h.
+ * @internal
+ */
+#define U_TITLECASE_ADJUSTMENT_MASK 0x600
+
+/**
+ * Internal API, used by u_strcasecmp() etc.
+ * Compare strings case-insensitively,
+ * in code point order or code unit order.
+ */
+U_CFUNC int32_t
+u_strcmpFold(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ uint32_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Internal API, used for detecting length of
+ * shared prefix case-insensitively.
+ * @param s1 input string 1
+ * @param length1 length of string 1, or -1 (NULL terminated)
+ * @param s2 input string 2
+ * @param length2 length of string 2, or -1 (NULL terminated)
+ * @param options compare options
+ * @param matchLen1 (output) length of partial prefix match in s1
+ * @param matchLen2 (output) length of partial prefix match in s2
+ * @param pErrorCode receives error status
+ */
+U_CAPI void
+u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ uint32_t options,
+ int32_t *matchLen1, int32_t *matchLen2,
+ UErrorCode *pErrorCode);
+
+#ifdef __cplusplus
+
+U_NAMESPACE_BEGIN
+
+class BreakIterator; // unicode/brkiter.h
+class ByteSink;
+class Locale; // unicode/locid.h
+
+/** Returns true if the options are valid. Otherwise false, and sets an error. */
+inline UBool ustrcase_checkTitleAdjustmentOptions(uint32_t options, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return false; }
+ if ((options & U_TITLECASE_ADJUSTMENT_MASK) == U_TITLECASE_ADJUSTMENT_MASK) {
+ // Both options together.
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return false;
+ }
+ return true;
+}
+
+inline UBool ustrcase_isLNS(UChar32 c) {
+ // Letter, number, symbol,
+ // or a private use code point because those are typically used as letters or numbers.
+ // Consider modifier letters only if they are cased.
+ const uint32_t LNS = (U_GC_L_MASK|U_GC_N_MASK|U_GC_S_MASK|U_GC_CO_MASK) & ~U_GC_LM_MASK;
+ int gc = u_charType(c);
+ return (U_MASK(gc) & LNS) != 0 || (gc == U_MODIFIER_LETTER && ucase_getType(c) != UCASE_NONE);
+}
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/** Returns nullptr if error. Pass in either locale or locID, not both. */
+U_CFUNC
+BreakIterator *ustrcase_getTitleBreakIterator(
+ const Locale *locale, const char *locID, uint32_t options, BreakIterator *iter,
+ LocalPointer<BreakIterator> &ownedIter, UErrorCode &errorCode);
+
+#endif
+
+U_NAMESPACE_END
+
+#include "unicode/unistr.h" // for UStringCaseMapper
+
+/*
+ * Internal string casing functions implementing
+ * ustring.h/ustrcase.cpp and UnicodeString case mapping functions.
+ */
+
+struct UCaseMap : public icu::UMemory {
+ /** Implements most of ucasemap_open(). */
+ UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode);
+ ~UCaseMap();
+
+#if !UCONFIG_NO_BREAK_ITERATION
+ icu::BreakIterator *iter; /* We adopt the iterator, so we own it. */
+#endif
+ char locale[32];
+ int32_t caseLocale;
+ uint32_t options;
+};
+
+#if UCONFIG_NO_BREAK_ITERATION
+# define UCASEMAP_BREAK_ITERATOR_PARAM
+# define UCASEMAP_BREAK_ITERATOR_UNUSED
+# define UCASEMAP_BREAK_ITERATOR
+# define UCASEMAP_BREAK_ITERATOR_NULL
+#else
+# define UCASEMAP_BREAK_ITERATOR_PARAM icu::BreakIterator *iter,
+# define UCASEMAP_BREAK_ITERATOR_UNUSED icu::BreakIterator *,
+# define UCASEMAP_BREAK_ITERATOR iter,
+# define UCASEMAP_BREAK_ITERATOR_NULL NULL,
+#endif
+
+U_CFUNC int32_t
+ustrcase_getCaseLocale(const char *locale);
+
+// TODO: swap src / dest if approved for new public api
+/** Implements UStringCaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ icu::Edits *edits,
+ UErrorCode &errorCode);
+
+/** Implements UStringCaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ icu::Edits *edits,
+ UErrorCode &errorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/** Implements UStringCaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalToTitle(int32_t caseLocale, uint32_t options,
+ icu::BreakIterator *iter,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ icu::Edits *edits,
+ UErrorCode &errorCode);
+
+#endif
+
+/** Implements UStringCaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalFold(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ icu::Edits *edits,
+ UErrorCode &errorCode);
+
+/**
+ * Common string case mapping implementation for ucasemap_toXyz() and UnicodeString::toXyz().
+ * Implements argument checking.
+ */
+U_CFUNC int32_t
+ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UStringCaseMapper *stringCaseMapper,
+ icu::Edits *edits,
+ UErrorCode &errorCode);
+
+/**
+ * Common string case mapping implementation for old-fashioned u_strToXyz() functions
+ * that allow the source string to overlap the destination buffer.
+ * Implements argument checking and internally works with an intermediate buffer if necessary.
+ */
+U_CFUNC int32_t
+ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UStringCaseMapper *stringCaseMapper,
+ UErrorCode &errorCode);
+
+/**
+ * UTF-8 string case mapping function type, used by ucasemap_mapUTF8().
+ * UTF-8 version of UStringCaseMapper.
+ * All error checking must be done.
+ * The UCaseMap must be fully initialized, with locale and/or iter set as needed.
+ */
+typedef void U_CALLCONV
+UTF8CaseMapper(int32_t caseLocale, uint32_t options,
+#if !UCONFIG_NO_BREAK_ITERATION
+ icu::BreakIterator *iter,
+#endif
+ const uint8_t *src, int32_t srcLength,
+ icu::ByteSink &sink, icu::Edits *edits,
+ UErrorCode &errorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/** Implements UTF8CaseMapper. */
+U_CFUNC void U_CALLCONV
+ucasemap_internalUTF8ToTitle(int32_t caseLocale, uint32_t options,
+ icu::BreakIterator *iter,
+ const uint8_t *src, int32_t srcLength,
+ icu::ByteSink &sink, icu::Edits *edits,
+ UErrorCode &errorCode);
+
+#endif
+
+void
+ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
+ const char *src, int32_t srcLength,
+ UTF8CaseMapper *stringCaseMapper,
+ icu::ByteSink &sink, icu::Edits *edits,
+ UErrorCode &errorCode);
+
+/**
+ * Implements argument checking and buffer handling
+ * for UTF-8 string case mapping as a common function.
+ */
+int32_t
+ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
+ char *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UTF8CaseMapper *stringCaseMapper,
+ icu::Edits *edits,
+ UErrorCode &errorCode);
+
+U_NAMESPACE_BEGIN
+namespace GreekUpper {
+
+// Data bits.
+static const uint32_t UPPER_MASK = 0x3ff;
+static const uint32_t HAS_VOWEL = 0x1000;
+static const uint32_t HAS_YPOGEGRAMMENI = 0x2000;
+static const uint32_t HAS_ACCENT = 0x4000;
+static const uint32_t HAS_DIALYTIKA = 0x8000;
+// Further bits during data building and processing, not stored in the data map.
+static const uint32_t HAS_COMBINING_DIALYTIKA = 0x10000;
+static const uint32_t HAS_OTHER_GREEK_DIACRITIC = 0x20000;
+
+static const uint32_t HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT;
+static const uint32_t HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA =
+ HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA;
+static const uint32_t HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA;
+
+// State bits.
+static const uint32_t AFTER_CASED = 1;
+static const uint32_t AFTER_VOWEL_WITH_ACCENT = 2;
+
+uint32_t getLetterData(UChar32 c);
+
+/**
+ * Returns a non-zero value for each of the Greek combining diacritics
+ * listed in The Unicode Standard, version 8, chapter 7.2 Greek,
+ * plus some perispomeni look-alikes.
+ */
+uint32_t getDiacriticData(UChar32 c);
+
+} // namespace GreekUpper
+U_NAMESPACE_END
+
+#endif // __cplusplus
+
+#endif // __UCASEMAP_IMP_H__
diff --git a/thirdparty/icu4c/common/ucasemap_titlecase_brkiter.cpp b/thirdparty/icu4c/common/ucasemap_titlecase_brkiter.cpp
new file mode 100644
index 0000000000..c21dfb7698
--- /dev/null
+++ b/thirdparty/icu4c/common/ucasemap_titlecase_brkiter.cpp
@@ -0,0 +1,134 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: ucasemap_titlecase_brkiter.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2011jun02
+* created by: Markus W. Scherer
+*
+* Titlecasing functions that are based on BreakIterator
+* were moved here to break dependency cycles among parts of the common library.
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/brkiter.h"
+#include "unicode/ubrk.h"
+#include "unicode/casemap.h"
+#include "unicode/ucasemap.h"
+#include "cmemory.h"
+#include "ucase.h"
+#include "ucasemap_imp.h"
+
+U_NAMESPACE_BEGIN
+
+void CaseMap::utf8ToTitle(
+ const char *locale, uint32_t options, BreakIterator *iter,
+ StringPiece src, ByteSink &sink, Edits *edits,
+ UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
+ UText utext = UTEXT_INITIALIZER;
+ utext_openUTF8(&utext, src.data(), src.length(), &errorCode);
+ LocalPointer<BreakIterator> ownedIter;
+ iter = ustrcase_getTitleBreakIterator(nullptr, locale, options, iter, ownedIter, errorCode);
+ if (iter == nullptr) {
+ utext_close(&utext);
+ return;
+ }
+ iter->setText(&utext, errorCode);
+ ucasemap_mapUTF8(
+ ustrcase_getCaseLocale(locale), options, iter,
+ src.data(), src.length(),
+ ucasemap_internalUTF8ToTitle, sink, edits, errorCode);
+ utext_close(&utext);
+}
+
+int32_t CaseMap::utf8ToTitle(
+ const char *locale, uint32_t options, BreakIterator *iter,
+ const char *src, int32_t srcLength,
+ char *dest, int32_t destCapacity, Edits *edits,
+ UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) {
+ return 0;
+ }
+ UText utext=UTEXT_INITIALIZER;
+ utext_openUTF8(&utext, src, srcLength, &errorCode);
+ LocalPointer<BreakIterator> ownedIter;
+ iter = ustrcase_getTitleBreakIterator(nullptr, locale, options, iter, ownedIter, errorCode);
+ if(iter==NULL) {
+ utext_close(&utext);
+ return 0;
+ }
+ iter->setText(&utext, errorCode);
+ int32_t length=ucasemap_mapUTF8(
+ ustrcase_getCaseLocale(locale), options, iter,
+ dest, destCapacity,
+ src, srcLength,
+ ucasemap_internalUTF8ToTitle, edits, errorCode);
+ utext_close(&utext);
+ return length;
+}
+
+U_NAMESPACE_END
+
+U_NAMESPACE_USE
+
+U_CAPI const UBreakIterator * U_EXPORT2
+ucasemap_getBreakIterator(const UCaseMap *csm) {
+ return reinterpret_cast<UBreakIterator *>(csm->iter);
+}
+
+U_CAPI void U_EXPORT2
+ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) {
+ return;
+ }
+ delete csm->iter;
+ csm->iter=reinterpret_cast<BreakIterator *>(iterToAdopt);
+}
+
+U_CAPI int32_t U_EXPORT2
+ucasemap_utf8ToTitle(UCaseMap *csm,
+ char *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ UText utext=UTEXT_INITIALIZER;
+ utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode);
+ if (U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(csm->iter==NULL) {
+ LocalPointer<BreakIterator> ownedIter;
+ BreakIterator *iter = ustrcase_getTitleBreakIterator(
+ nullptr, csm->locale, csm->options, nullptr, ownedIter, *pErrorCode);
+ if (iter == nullptr) {
+ utext_close(&utext);
+ return 0;
+ }
+ csm->iter = ownedIter.orphan();
+ }
+ csm->iter->setText(&utext, *pErrorCode);
+ int32_t length=ucasemap_mapUTF8(
+ csm->caseLocale, csm->options, csm->iter,
+ dest, destCapacity,
+ src, srcLength,
+ ucasemap_internalUTF8ToTitle, NULL, *pErrorCode);
+ utext_close(&utext);
+ return length;
+}
+
+#endif // !UCONFIG_NO_BREAK_ITERATION
diff --git a/thirdparty/icu4c/common/ucat.cpp b/thirdparty/icu4c/common/ucat.cpp
new file mode 100644
index 0000000000..dac56eeb5c
--- /dev/null
+++ b/thirdparty/icu4c/common/ucat.cpp
@@ -0,0 +1,78 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2003, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Author: Alan Liu
+* Created: March 19 2003
+* Since: ICU 2.6
+**********************************************************************
+*/
+#include "unicode/ucat.h"
+#include "unicode/ustring.h"
+#include "cstring.h"
+#include "uassert.h"
+
+/* Separator between set_num and msg_num */
+static const char SEPARATOR = '%';
+
+/* Maximum length of a set_num/msg_num key, incl. terminating zero.
+ * Longest possible key is "-2147483648%-2147483648" */
+#define MAX_KEY_LEN (24)
+
+/**
+ * Fill in buffer with a set_num/msg_num key string, given the numeric
+ * values. Numeric values must be >= 0. Buffer must be of length
+ * MAX_KEY_LEN or more.
+ */
+static char*
+_catkey(char* buffer, int32_t set_num, int32_t msg_num) {
+ int32_t i = 0;
+ i = T_CString_integerToString(buffer, set_num, 10);
+ buffer[i++] = SEPARATOR;
+ T_CString_integerToString(buffer+i, msg_num, 10);
+ return buffer;
+}
+
+U_CAPI u_nl_catd U_EXPORT2
+u_catopen(const char* name, const char* locale, UErrorCode* ec) {
+ return (u_nl_catd) ures_open(name, locale, ec);
+}
+
+U_CAPI void U_EXPORT2
+u_catclose(u_nl_catd catd) {
+ ures_close((UResourceBundle*) catd); /* may be NULL */
+}
+
+U_CAPI const UChar* U_EXPORT2
+u_catgets(u_nl_catd catd, int32_t set_num, int32_t msg_num,
+ const UChar* s,
+ int32_t* len, UErrorCode* ec) {
+
+ char key[MAX_KEY_LEN];
+ const UChar* result;
+
+ if (ec == NULL || U_FAILURE(*ec)) {
+ goto ERROR;
+ }
+
+ result = ures_getStringByKey((const UResourceBundle*) catd,
+ _catkey(key, set_num, msg_num),
+ len, ec);
+ if (U_FAILURE(*ec)) {
+ goto ERROR;
+ }
+
+ return result;
+
+ ERROR:
+ /* In case of any failure, return s */
+ if (len != NULL) {
+ *len = u_strlen(s);
+ }
+ return s;
+}
+
+/*eof*/
diff --git a/thirdparty/icu4c/common/uchar.cpp b/thirdparty/icu4c/common/uchar.cpp
new file mode 100644
index 0000000000..eb14e4c75d
--- /dev/null
+++ b/thirdparty/icu4c/common/uchar.cpp
@@ -0,0 +1,730 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+********************************************************************************
+* Copyright (C) 1996-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+********************************************************************************
+*
+* File UCHAR.C
+*
+* Modification History:
+*
+* Date Name Description
+* 04/02/97 aliu Creation.
+* 4/15/99 Madhu Updated all the function definitions for C Implementation
+* 5/20/99 Madhu Added the function u_getVersion()
+* 8/19/1999 srl Upgraded scripts to Unicode3.0
+* 11/11/1999 weiv added u_isalnum(), cleaned comments
+* 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion.
+* 06/20/2000 helena OS/400 port changes; mostly typecast.
+******************************************************************************
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/uchar.h"
+#include "unicode/uscript.h"
+#include "unicode/udata.h"
+#include "uassert.h"
+#include "cmemory.h"
+#include "ucln_cmn.h"
+#include "utrie2.h"
+#include "udataswp.h"
+#include "uprops.h"
+#include "ustr_imp.h"
+
+/* uchar_props_data.h is machine-generated by genprops --csource */
+#define INCLUDED_FROM_UCHAR_C
+#include "uchar_props_data.h"
+
+/* constants and macros for access to the data ------------------------------ */
+
+/* getting a uint32_t properties word from the data */
+#define GET_PROPS(c, result) ((result)=UTRIE2_GET16(&propsTrie, c))
+
+/* API functions ------------------------------------------------------------ */
+
+/* Gets the Unicode character's general category.*/
+U_CAPI int8_t U_EXPORT2
+u_charType(UChar32 c) {
+ uint32_t props;
+ GET_PROPS(c, props);
+ return (int8_t)GET_CATEGORY(props);
+}
+
+/* Enumerate all code points with their general categories. */
+struct _EnumTypeCallback {
+ UCharEnumTypeRange *enumRange;
+ const void *context;
+};
+
+static uint32_t U_CALLCONV
+_enumTypeValue(const void *context, uint32_t value) {
+ (void)context;
+ return GET_CATEGORY(value);
+}
+
+static UBool U_CALLCONV
+_enumTypeRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
+ /* just cast the value to UCharCategory */
+ return ((struct _EnumTypeCallback *)context)->
+ enumRange(((struct _EnumTypeCallback *)context)->context,
+ start, end+1, (UCharCategory)value);
+}
+
+U_CAPI void U_EXPORT2
+u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context) {
+ struct _EnumTypeCallback callback;
+
+ if(enumRange==NULL) {
+ return;
+ }
+
+ callback.enumRange=enumRange;
+ callback.context=context;
+ utrie2_enum(&propsTrie, _enumTypeValue, _enumTypeRange, &callback);
+}
+
+/* Checks if ch is a lower case letter.*/
+U_CAPI UBool U_EXPORT2
+u_islower(UChar32 c) {
+ uint32_t props;
+ GET_PROPS(c, props);
+ return (UBool)(GET_CATEGORY(props)==U_LOWERCASE_LETTER);
+}
+
+/* Checks if ch is an upper case letter.*/
+U_CAPI UBool U_EXPORT2
+u_isupper(UChar32 c) {
+ uint32_t props;
+ GET_PROPS(c, props);
+ return (UBool)(GET_CATEGORY(props)==U_UPPERCASE_LETTER);
+}
+
+/* Checks if ch is a title case letter; usually upper case letters.*/
+U_CAPI UBool U_EXPORT2
+u_istitle(UChar32 c) {
+ uint32_t props;
+ GET_PROPS(c, props);
+ return (UBool)(GET_CATEGORY(props)==U_TITLECASE_LETTER);
+}
+
+/* Checks if ch is a decimal digit. */
+U_CAPI UBool U_EXPORT2
+u_isdigit(UChar32 c) {
+ uint32_t props;
+ GET_PROPS(c, props);
+ return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER);
+}
+
+U_CAPI UBool U_EXPORT2
+u_isxdigit(UChar32 c) {
+ uint32_t props;
+
+ /* check ASCII and Fullwidth ASCII a-fA-F */
+ if(
+ (c<=0x66 && c>=0x41 && (c<=0x46 || c>=0x61)) ||
+ (c>=0xff21 && c<=0xff46 && (c<=0xff26 || c>=0xff41))
+ ) {
+ return TRUE;
+ }
+
+ GET_PROPS(c, props);
+ return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER);
+}
+
+/* Checks if the Unicode character is a letter.*/
+U_CAPI UBool U_EXPORT2
+u_isalpha(UChar32 c) {
+ uint32_t props;
+ GET_PROPS(c, props);
+ return (UBool)((CAT_MASK(props)&U_GC_L_MASK)!=0);
+}
+
+U_CAPI UBool U_EXPORT2
+u_isUAlphabetic(UChar32 c) {
+ return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_ALPHABETIC))!=0;
+}
+
+/* Checks if c is a letter or a decimal digit */
+U_CAPI UBool U_EXPORT2
+u_isalnum(UChar32 c) {
+ uint32_t props;
+ GET_PROPS(c, props);
+ return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_ND_MASK))!=0);
+}
+
+/**
+ * Checks if c is alphabetic, or a decimal digit; implements UCHAR_POSIX_ALNUM.
+ * @internal
+ */
+U_CFUNC UBool
+u_isalnumPOSIX(UChar32 c) {
+ return (UBool)(u_isUAlphabetic(c) || u_isdigit(c));
+}
+
+/* Checks if ch is a unicode character with assigned character type.*/
+U_CAPI UBool U_EXPORT2
+u_isdefined(UChar32 c) {
+ uint32_t props;
+ GET_PROPS(c, props);
+ return (UBool)(GET_CATEGORY(props)!=0);
+}
+
+/* Checks if the Unicode character is a base form character that can take a diacritic.*/
+U_CAPI UBool U_EXPORT2
+u_isbase(UChar32 c) {
+ uint32_t props;
+ GET_PROPS(c, props);
+ return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_N_MASK|U_GC_MC_MASK|U_GC_ME_MASK))!=0);
+}
+
+/* Checks if the Unicode character is a control character.*/
+U_CAPI UBool U_EXPORT2
+u_iscntrl(UChar32 c) {
+ uint32_t props;
+ GET_PROPS(c, props);
+ return (UBool)((CAT_MASK(props)&(U_GC_CC_MASK|U_GC_CF_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK))!=0);
+}
+
+U_CAPI UBool U_EXPORT2
+u_isISOControl(UChar32 c) {
+ return (uint32_t)c<=0x9f && (c<=0x1f || c>=0x7f);
+}
+
+/* Some control characters that are used as space. */
+#define IS_THAT_CONTROL_SPACE(c) \
+ (c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==NL))
+
+/* Java has decided that U+0085 New Line is not whitespace any more. */
+#define IS_THAT_ASCII_CONTROL_SPACE(c) \
+ (c<=0x1f && c>=TAB && (c<=CR || c>=0x1c))
+
+/* Checks if the Unicode character is a space character.*/
+U_CAPI UBool U_EXPORT2
+u_isspace(UChar32 c) {
+ uint32_t props;
+ GET_PROPS(c, props);
+ return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0 || IS_THAT_CONTROL_SPACE(c));
+}
+
+U_CAPI UBool U_EXPORT2
+u_isJavaSpaceChar(UChar32 c) {
+ uint32_t props;
+ GET_PROPS(c, props);
+ return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0);
+}
+
+/* Checks if the Unicode character is a whitespace character.*/
+U_CAPI UBool U_EXPORT2
+u_isWhitespace(UChar32 c) {
+ uint32_t props;
+ GET_PROPS(c, props);
+ return (UBool)(
+ ((CAT_MASK(props)&U_GC_Z_MASK)!=0 &&
+ c!=NBSP && c!=FIGURESP && c!=NNBSP) || /* exclude no-break spaces */
+ IS_THAT_ASCII_CONTROL_SPACE(c)
+ );
+}
+
+U_CAPI UBool U_EXPORT2
+u_isblank(UChar32 c) {
+ if((uint32_t)c<=0x9f) {
+ return c==9 || c==0x20; /* TAB or SPACE */
+ } else {
+ /* Zs */
+ uint32_t props;
+ GET_PROPS(c, props);
+ return (UBool)(GET_CATEGORY(props)==U_SPACE_SEPARATOR);
+ }
+}
+
+U_CAPI UBool U_EXPORT2
+u_isUWhiteSpace(UChar32 c) {
+ return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_WHITE_SPACE))!=0;
+}
+
+/* Checks if the Unicode character is printable.*/
+U_CAPI UBool U_EXPORT2
+u_isprint(UChar32 c) {
+ uint32_t props;
+ GET_PROPS(c, props);
+ /* comparing ==0 returns FALSE for the categories mentioned */
+ return (UBool)((CAT_MASK(props)&U_GC_C_MASK)==0);
+}
+
+/**
+ * Checks if c is in \p{graph}\p{blank} - \p{cntrl}.
+ * Implements UCHAR_POSIX_PRINT.
+ * @internal
+ */
+U_CFUNC UBool
+u_isprintPOSIX(UChar32 c) {
+ uint32_t props;
+ GET_PROPS(c, props);
+ /*
+ * The only cntrl character in graph+blank is TAB (in blank).
+ * Here we implement (blank-TAB)=Zs instead of calling u_isblank().
+ */
+ return (UBool)((GET_CATEGORY(props)==U_SPACE_SEPARATOR) || u_isgraphPOSIX(c));
+}
+
+U_CAPI UBool U_EXPORT2
+u_isgraph(UChar32 c) {
+ uint32_t props;
+ GET_PROPS(c, props);
+ /* comparing ==0 returns FALSE for the categories mentioned */
+ return (UBool)((CAT_MASK(props)&
+ (U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK))
+ ==0);
+}
+
+/**
+ * Checks if c is in
+ * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}]
+ * with space=\p{Whitespace} and Control=Cc.
+ * Implements UCHAR_POSIX_GRAPH.
+ * @internal
+ */
+U_CFUNC UBool
+u_isgraphPOSIX(UChar32 c) {
+ uint32_t props;
+ GET_PROPS(c, props);
+ /* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */
+ /* comparing ==0 returns FALSE for the categories mentioned */
+ return (UBool)((CAT_MASK(props)&
+ (U_GC_CC_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK))
+ ==0);
+}
+
+U_CAPI UBool U_EXPORT2
+u_ispunct(UChar32 c) {
+ uint32_t props;
+ GET_PROPS(c, props);
+ return (UBool)((CAT_MASK(props)&U_GC_P_MASK)!=0);
+}
+
+/* Checks if the Unicode character can start a Unicode identifier.*/
+U_CAPI UBool U_EXPORT2
+u_isIDStart(UChar32 c) {
+ /* same as u_isalpha() */
+ uint32_t props;
+ GET_PROPS(c, props);
+ return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_NL_MASK))!=0);
+}
+
+/* Checks if the Unicode character can be a Unicode identifier part other than starting the
+ identifier.*/
+U_CAPI UBool U_EXPORT2
+u_isIDPart(UChar32 c) {
+ uint32_t props;
+ GET_PROPS(c, props);
+ return (UBool)(
+ (CAT_MASK(props)&
+ (U_GC_ND_MASK|U_GC_NL_MASK|
+ U_GC_L_MASK|
+ U_GC_PC_MASK|U_GC_MC_MASK|U_GC_MN_MASK)
+ )!=0 ||
+ u_isIDIgnorable(c));
+}
+
+/*Checks if the Unicode character can be ignorable in a Java or Unicode identifier.*/
+U_CAPI UBool U_EXPORT2
+u_isIDIgnorable(UChar32 c) {
+ if(c<=0x9f) {
+ return u_isISOControl(c) && !IS_THAT_ASCII_CONTROL_SPACE(c);
+ } else {
+ uint32_t props;
+ GET_PROPS(c, props);
+ return (UBool)(GET_CATEGORY(props)==U_FORMAT_CHAR);
+ }
+}
+
+/*Checks if the Unicode character can start a Java identifier.*/
+U_CAPI UBool U_EXPORT2
+u_isJavaIDStart(UChar32 c) {
+ uint32_t props;
+ GET_PROPS(c, props);
+ return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_SC_MASK|U_GC_PC_MASK))!=0);
+}
+
+/*Checks if the Unicode character can be a Java identifier part other than starting the
+ * identifier.
+ */
+U_CAPI UBool U_EXPORT2
+u_isJavaIDPart(UChar32 c) {
+ uint32_t props;
+ GET_PROPS(c, props);
+ return (UBool)(
+ (CAT_MASK(props)&
+ (U_GC_ND_MASK|U_GC_NL_MASK|
+ U_GC_L_MASK|
+ U_GC_SC_MASK|U_GC_PC_MASK|
+ U_GC_MC_MASK|U_GC_MN_MASK)
+ )!=0 ||
+ u_isIDIgnorable(c));
+}
+
+U_CAPI int32_t U_EXPORT2
+u_charDigitValue(UChar32 c) {
+ uint32_t props;
+ int32_t value;
+ GET_PROPS(c, props);
+ value=(int32_t)GET_NUMERIC_TYPE_VALUE(props)-UPROPS_NTV_DECIMAL_START;
+ if(value<=9) {
+ return value;
+ } else {
+ return -1;
+ }
+}
+
+U_CAPI double U_EXPORT2
+u_getNumericValue(UChar32 c) {
+ uint32_t props;
+ int32_t ntv;
+ GET_PROPS(c, props);
+ ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(props);
+
+ if(ntv==UPROPS_NTV_NONE) {
+ return U_NO_NUMERIC_VALUE;
+ } else if(ntv<UPROPS_NTV_DIGIT_START) {
+ /* decimal digit */
+ return ntv-UPROPS_NTV_DECIMAL_START;
+ } else if(ntv<UPROPS_NTV_NUMERIC_START) {
+ /* other digit */
+ return ntv-UPROPS_NTV_DIGIT_START;
+ } else if(ntv<UPROPS_NTV_FRACTION_START) {
+ /* small integer */
+ return ntv-UPROPS_NTV_NUMERIC_START;
+ } else if(ntv<UPROPS_NTV_LARGE_START) {
+ /* fraction */
+ int32_t numerator=(ntv>>4)-12;
+ int32_t denominator=(ntv&0xf)+1;
+ return (double)numerator/denominator;
+ } else if(ntv<UPROPS_NTV_BASE60_START) {
+ /* large, single-significant-digit integer */
+ double numValue;
+ int32_t mant=(ntv>>5)-14;
+ int32_t exp=(ntv&0x1f)+2;
+ numValue=mant;
+
+ /* multiply by 10^exp without math.h */
+ while(exp>=4) {
+ numValue*=10000.;
+ exp-=4;
+ }
+ switch(exp) {
+ case 3:
+ numValue*=1000.;
+ break;
+ case 2:
+ numValue*=100.;
+ break;
+ case 1:
+ numValue*=10.;
+ break;
+ case 0:
+ default:
+ break;
+ }
+
+ return numValue;
+ } else if(ntv<UPROPS_NTV_FRACTION20_START) {
+ /* sexagesimal (base 60) integer */
+ int32_t numValue=(ntv>>2)-0xbf;
+ int32_t exp=(ntv&3)+1;
+
+ switch(exp) {
+ case 4:
+ numValue*=60*60*60*60;
+ break;
+ case 3:
+ numValue*=60*60*60;
+ break;
+ case 2:
+ numValue*=60*60;
+ break;
+ case 1:
+ numValue*=60;
+ break;
+ case 0:
+ default:
+ break;
+ }
+
+ return numValue;
+ } else if(ntv<UPROPS_NTV_FRACTION32_START) {
+ // fraction-20 e.g. 3/80
+ int32_t frac20=ntv-UPROPS_NTV_FRACTION20_START; // 0..0x17
+ int32_t numerator=2*(frac20&3)+1;
+ int32_t denominator=20<<(frac20>>2);
+ return (double)numerator/denominator;
+ } else if(ntv<UPROPS_NTV_RESERVED_START) {
+ // fraction-32 e.g. 3/64
+ int32_t frac32=ntv-UPROPS_NTV_FRACTION32_START; // 0..15
+ int32_t numerator=2*(frac32&3)+1;
+ int32_t denominator=32<<(frac32>>2);
+ return (double)numerator/denominator;
+ } else {
+ /* reserved */
+ return U_NO_NUMERIC_VALUE;
+ }
+}
+
+U_CAPI int32_t U_EXPORT2
+u_digit(UChar32 ch, int8_t radix) {
+ int8_t value;
+ if((uint8_t)(radix-2)<=(36-2)) {
+ value=(int8_t)u_charDigitValue(ch);
+ if(value<0) {
+ /* ch is not a decimal digit, try latin letters */
+ if(ch>=0x61 && ch<=0x7A) {
+ value=(int8_t)(ch-0x57); /* ch - 'a' + 10 */
+ } else if(ch>=0x41 && ch<=0x5A) {
+ value=(int8_t)(ch-0x37); /* ch - 'A' + 10 */
+ } else if(ch>=0xFF41 && ch<=0xFF5A) {
+ value=(int8_t)(ch-0xFF37); /* fullwidth ASCII a-z */
+ } else if(ch>=0xFF21 && ch<=0xFF3A) {
+ value=(int8_t)(ch-0xFF17); /* fullwidth ASCII A-Z */
+ }
+ }
+ } else {
+ value=-1; /* invalid radix */
+ }
+ return (int8_t)((value<radix) ? value : -1);
+}
+
+U_CAPI UChar32 U_EXPORT2
+u_forDigit(int32_t digit, int8_t radix) {
+ if((uint8_t)(radix-2)>(36-2) || (uint32_t)digit>=(uint32_t)radix) {
+ return 0;
+ } else if(digit<10) {
+ return (UChar32)(0x30+digit);
+ } else {
+ return (UChar32)((0x61-10)+digit);
+ }
+}
+
+/* miscellaneous, and support for uprops.cpp -------------------------------- */
+
+U_CAPI void U_EXPORT2
+u_getUnicodeVersion(UVersionInfo versionArray) {
+ if(versionArray!=NULL) {
+ uprv_memcpy(versionArray, dataVersion, U_MAX_VERSION_LENGTH);
+ }
+}
+
+U_CFUNC uint32_t
+u_getMainProperties(UChar32 c) {
+ uint32_t props;
+ GET_PROPS(c, props);
+ return props;
+}
+
+U_CFUNC uint32_t
+u_getUnicodeProperties(UChar32 c, int32_t column) {
+ U_ASSERT(column>=0);
+ if(column>=propsVectorsColumns) {
+ return 0;
+ } else {
+ uint16_t vecIndex=UTRIE2_GET16(&propsVectorsTrie, c);
+ return propsVectors[vecIndex+column];
+ }
+}
+
+U_CFUNC int32_t
+uprv_getMaxValues(int32_t column) {
+ switch(column) {
+ case 0:
+ return indexes[UPROPS_MAX_VALUES_INDEX];
+ case 2:
+ return indexes[UPROPS_MAX_VALUES_2_INDEX];
+ default:
+ return 0;
+ }
+}
+
+U_CAPI void U_EXPORT2
+u_charAge(UChar32 c, UVersionInfo versionArray) {
+ if(versionArray!=NULL) {
+ uint32_t version=u_getUnicodeProperties(c, 0)>>UPROPS_AGE_SHIFT;
+ versionArray[0]=(uint8_t)(version>>4);
+ versionArray[1]=(uint8_t)(version&0xf);
+ versionArray[2]=versionArray[3]=0;
+ }
+}
+
+U_CAPI UScriptCode U_EXPORT2
+uscript_getScript(UChar32 c, UErrorCode *pErrorCode) {
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return USCRIPT_INVALID_CODE;
+ }
+ if((uint32_t)c>0x10ffff) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return USCRIPT_INVALID_CODE;
+ }
+ uint32_t scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK;
+ uint32_t codeOrIndex=uprops_mergeScriptCodeOrIndex(scriptX);
+ if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) {
+ return (UScriptCode)codeOrIndex;
+ } else if(scriptX<UPROPS_SCRIPT_X_WITH_INHERITED) {
+ return USCRIPT_COMMON;
+ } else if(scriptX<UPROPS_SCRIPT_X_WITH_OTHER) {
+ return USCRIPT_INHERITED;
+ } else {
+ return (UScriptCode)scriptExtensions[codeOrIndex];
+ }
+}
+
+U_CAPI UBool U_EXPORT2
+uscript_hasScript(UChar32 c, UScriptCode sc) {
+ uint32_t scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK;
+ uint32_t codeOrIndex=uprops_mergeScriptCodeOrIndex(scriptX);
+ if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) {
+ return sc==(UScriptCode)codeOrIndex;
+ }
+
+ const uint16_t *scx=scriptExtensions+codeOrIndex;
+ if(scriptX>=UPROPS_SCRIPT_X_WITH_OTHER) {
+ scx=scriptExtensions+scx[1];
+ }
+ uint32_t sc32=sc;
+ if(sc32>0x7fff) {
+ /* Guard against bogus input that would make us go past the Script_Extensions terminator. */
+ return FALSE;
+ }
+ while(sc32>*scx) {
+ ++scx;
+ }
+ return sc32==(*scx&0x7fff);
+}
+
+U_CAPI int32_t U_EXPORT2
+uscript_getScriptExtensions(UChar32 c,
+ UScriptCode *scripts, int32_t capacity,
+ UErrorCode *pErrorCode) {
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(capacity<0 || (capacity>0 && scripts==NULL)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ uint32_t scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK;
+ uint32_t codeOrIndex=uprops_mergeScriptCodeOrIndex(scriptX);
+ if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) {
+ if(capacity==0) {
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ } else {
+ scripts[0]=(UScriptCode)codeOrIndex;
+ }
+ return 1;
+ }
+
+ const uint16_t *scx=scriptExtensions+codeOrIndex;
+ if(scriptX>=UPROPS_SCRIPT_X_WITH_OTHER) {
+ scx=scriptExtensions+scx[1];
+ }
+ int32_t length=0;
+ uint16_t sx;
+ do {
+ sx=*scx++;
+ if(length<capacity) {
+ scripts[length]=(UScriptCode)(sx&0x7fff);
+ }
+ ++length;
+ } while(sx<0x8000);
+ if(length>capacity) {
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+ return length;
+}
+
+U_CAPI UBlockCode U_EXPORT2
+ublock_getCode(UChar32 c) {
+ return (UBlockCode)((u_getUnicodeProperties(c, 0)&UPROPS_BLOCK_MASK)>>UPROPS_BLOCK_SHIFT);
+}
+
+/* property starts for UnicodeSet ------------------------------------------- */
+
+static UBool U_CALLCONV
+_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
+ /* add the start code point to the USet */
+ const USetAdder *sa=(const USetAdder *)context;
+ sa->add(sa->set, start);
+ (void)end;
+ (void)value;
+ return TRUE;
+}
+
+#define USET_ADD_CP_AND_NEXT(sa, cp) sa->add(sa->set, cp); sa->add(sa->set, cp+1)
+
+U_CFUNC void U_EXPORT2
+uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) {
+ return;
+ }
+
+ /* add the start code point of each same-value range of the main trie */
+ utrie2_enum(&propsTrie, NULL, _enumPropertyStartsRange, sa);
+
+ /* add code points with hardcoded properties, plus the ones following them */
+
+ /* add for u_isblank() */
+ USET_ADD_CP_AND_NEXT(sa, TAB);
+
+ /* add for IS_THAT_CONTROL_SPACE() */
+ sa->add(sa->set, CR+1); /* range TAB..CR */
+ sa->add(sa->set, 0x1c);
+ sa->add(sa->set, 0x1f+1);
+ USET_ADD_CP_AND_NEXT(sa, NL);
+
+ /* add for u_isIDIgnorable() what was not added above */
+ sa->add(sa->set, DEL); /* range DEL..NBSP-1, NBSP added below */
+ sa->add(sa->set, HAIRSP);
+ sa->add(sa->set, RLM+1);
+ sa->add(sa->set, INHSWAP);
+ sa->add(sa->set, NOMDIG+1);
+ USET_ADD_CP_AND_NEXT(sa, ZWNBSP);
+
+ /* add no-break spaces for u_isWhitespace() what was not added above */
+ USET_ADD_CP_AND_NEXT(sa, NBSP);
+ USET_ADD_CP_AND_NEXT(sa, FIGURESP);
+ USET_ADD_CP_AND_NEXT(sa, NNBSP);
+
+ /* add for u_digit() */
+ sa->add(sa->set, U_a);
+ sa->add(sa->set, U_z+1);
+ sa->add(sa->set, U_A);
+ sa->add(sa->set, U_Z+1);
+ sa->add(sa->set, U_FW_a);
+ sa->add(sa->set, U_FW_z+1);
+ sa->add(sa->set, U_FW_A);
+ sa->add(sa->set, U_FW_Z+1);
+
+ /* add for u_isxdigit() */
+ sa->add(sa->set, U_f+1);
+ sa->add(sa->set, U_F+1);
+ sa->add(sa->set, U_FW_f+1);
+ sa->add(sa->set, U_FW_F+1);
+
+ /* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */
+ sa->add(sa->set, WJ); /* range WJ..NOMDIG */
+ sa->add(sa->set, 0xfff0);
+ sa->add(sa->set, 0xfffb+1);
+ sa->add(sa->set, 0xe0000);
+ sa->add(sa->set, 0xe0fff+1);
+
+ /* add for UCHAR_GRAPHEME_BASE and others */
+ USET_ADD_CP_AND_NEXT(sa, CGJ);
+}
+
+U_CFUNC void U_EXPORT2
+upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) {
+ return;
+ }
+
+ /* add the start code point of each same-value range of the properties vectors trie */
+ utrie2_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, sa);
+}
diff --git a/thirdparty/icu4c/common/uchar_props_data.h b/thirdparty/icu4c/common/uchar_props_data.h
new file mode 100644
index 0000000000..9a78918204
--- /dev/null
+++ b/thirdparty/icu4c/common/uchar_props_data.h
@@ -0,0 +1,3860 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+//
+// Copyright (C) 1999-2016, International Business Machines
+// Corporation and others. All Rights Reserved.
+//
+// file name: uchar_props_data.h
+//
+// machine-generated by: icu/tools/unicode/c/genprops/corepropsbuilder.cpp
+
+
+#ifdef INCLUDED_FROM_UCHAR_C
+
+static const UVersionInfo dataVersion={0xd,0,0,0};
+
+static const uint16_t propsTrie_index[22276]={
+0x46d,0x475,0x47d,0x485,0x49d,0x4a5,0x4ad,0x4b5,0x4bd,0x4c5,0x4cb,0x4d3,0x4db,0x4e3,0x4eb,0x4f3,
+0x4f9,0x501,0x509,0x511,0x514,0x51c,0x524,0x52c,0x534,0x53c,0x538,0x540,0x548,0x550,0x555,0x55d,
+0x565,0x56d,0x571,0x579,0x581,0x589,0x591,0x599,0x595,0x59d,0x5a2,0x5aa,0x5b0,0x5b8,0x5c0,0x5c8,
+0x5d0,0x5d8,0x5e0,0x5e8,0x5ed,0x5f5,0x5f8,0x600,0x608,0x610,0x616,0x61e,0x61d,0x625,0x62d,0x635,
+0x645,0x63d,0x64d,0x655,0x48d,0x665,0x66b,0x65d,0x67b,0x67d,0x685,0x673,0x695,0x69b,0x6a3,0x68d,
+0x6b3,0x6b9,0x6c1,0x6ab,0x6d1,0x6d7,0x6df,0x6c9,0x6ef,0x6f5,0x6fd,0x6e7,0x70d,0x715,0x71d,0x705,
+0x72d,0x733,0x73b,0x725,0x74b,0x751,0x759,0x743,0x769,0x76e,0x776,0x761,0x786,0x78d,0x795,0x77e,
+0x619,0x79d,0x7a5,0x48d,0x7ad,0x7b4,0x7bc,0x48d,0x7c4,0x7cc,0x7d4,0x7d9,0x7e1,0x7e8,0x7f0,0x48d,
+0x5d8,0x7f8,0x800,0x808,0x810,0x565,0x820,0x818,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x828,0x5d8,0x830,0x834,0x83c,0x5d8,0x842,0x5d8,0x848,0x850,0x858,0x565,0x565,0x860,
+0x868,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x86d,0x875,0x5d8,0x5d8,0x87d,0x885,0x88d,0x895,0x89d,0x5d8,0x8a5,0x8ad,0x8b5,
+0x8c5,0x5d8,0x8cd,0x8cf,0x8d7,0x8bd,0x5d8,0x8da,0x8ee,0x8e2,0x8ea,0x8f6,0x5d8,0x8fe,0x904,0x90c,
+0x914,0x5d8,0x924,0x92c,0x934,0x91c,0x944,0x48d,0x94c,0x94f,0x957,0x93c,0x967,0x95f,0x5d8,0x96e,
+0x5d8,0x97d,0x976,0x985,0x98d,0x991,0x999,0x9a1,0x50d,0x9a9,0x9ac,0x9b2,0x9b9,0x9ac,0x534,0x9c1,
+0x4bd,0x4bd,0x4bd,0x4bd,0x9c9,0x4bd,0x4bd,0x4bd,0x9d9,0x9e1,0x9e9,0x9f1,0x9f9,0x9fd,0xa05,0x9d1,
+0xa1d,0xa25,0xa0d,0xa15,0xa2d,0xa35,0xa3d,0xa45,0xa5d,0xa4d,0xa55,0xa65,0xa6d,0xa7c,0xa81,0xa74,
+0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa91,0xa99,0x90c,0xa9c,0xaa4,0xaab,0xab0,0xab8,
+0x90c,0xabf,0xabe,0xacf,0xad2,0x90c,0x90c,0xac7,0x90c,0x90c,0x90c,0x90c,0x90c,0xae1,0xae9,0xad9,
+0x90c,0x90c,0x90c,0xaee,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0xaf4,0xafc,0x90c,0xb04,0xb0b,
+0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0xa89,0xa89,0xa89,0xa89,0xb13,0xa89,0xb1a,0xb21,
+0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0x90c,0xb29,0xb30,0xb34,0xb3a,0x90c,0x90c,0x90c,
+0x565,0xb4a,0xb42,0xb52,0x4bd,0x4bd,0x4bd,0xb5a,0x50d,0xb62,0x5d8,0xb68,0xb78,0xb70,0xb70,0x534,
+0xb80,0xb88,0xb90,0x48d,0xb98,0x90c,0x90c,0xb9f,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0xba7,0xbad,
+0xbbd,0xbb5,0x619,0x5d8,0xbc5,0x868,0x5d8,0xbcd,0xbd5,0xbd9,0x5d8,0x5d8,0xbde,0x5d8,0x90c,0xbe5,
+0xab9,0xbed,0xbf3,0x90c,0xbed,0xbfb,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,
+0xc03,0x5d8,0x5d8,0x5d8,0xc0b,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0xc11,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xc16,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x90c,0x90c,
+0xc1e,0x5d8,0xc21,0x5d8,0xc29,0xc2f,0xc37,0xc3f,0xc44,0x5d8,0x5d8,0xc48,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xc4f,0x5d8,0xc56,0xc5c,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xc64,0x5d8,0x5d8,0x5d8,0xc6c,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xc6e,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xc75,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0xc7c,0x5d8,0x5d8,0x5d8,0xc83,0xc8b,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xc90,0x5d8,0x5d8,0xc98,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xc9c,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xc9f,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xca2,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0xca8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0xcb0,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0xcb5,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xcba,0x5d8,0x5d8,0x5d8,0xcbf,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0xcc7,0xcce,0xcd2,0x5d8,0x5d8,0x5d8,0xcd9,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x8ce,
+0xce7,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0xcdf,0x90c,0xcef,0x985,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0xcf4,0xcfc,0x4bd,0xd0c,0xd04,0x5d8,0x5d8,0xd14,0xd1c,0xd2c,0x4bd,0xd31,0xd39,0xd3f,0xd47,0xd24,
+0xd4f,0xd57,0x5d8,0xd5f,0xd6f,0xd72,0xd67,0xd7a,0x62d,0xd82,0xd89,0x8ce,0x67b,0xd99,0xd91,0xda1,
+0x5d8,0xda9,0xdb1,0xdb9,0x5d8,0xdc1,0xdc9,0xdd1,0xdd9,0xde1,0xde5,0xded,0x50d,0x50d,0x5d8,0xdf5,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xdfd,0xe09,0xe01,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,
+0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,
+0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
+0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
+0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
+0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
+0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
+0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
+0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
+0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
+0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
+0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
+0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
+0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
+0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0x5d8,0x5d8,0x5d8,0xe21,0x5d8,0xcda,0xe28,0xe2d,
+0x5d8,0x5d8,0x5d8,0xe35,0x5d8,0x5d8,0x8d9,0x48d,0xe4b,0xe3b,0xe43,0x5d8,0x5d8,0xe53,0xe5b,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xe60,0xe68,0x5d8,0xe6c,0x5d8,0xe72,0xe76,
+0xe7e,0xe86,0xe8d,0xe95,0x5d8,0x5d8,0x5d8,0xe9b,0xeb3,0x47d,0xebb,0xec3,0xec8,0x8ee,0xea3,0xeab,
+0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,
+0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,
+0x1234,0x1234,0x1274,0x12b4,0x12f4,0x132c,0x136c,0x13ac,0x13e4,0x1424,0x1450,0x1490,0x14d0,0x14e0,0x1520,0x1554,
+0x1594,0x15c4,0x1604,0x1644,0x1654,0x1688,0x16c0,0x1700,0x1740,0x1780,0x17b4,0x17e0,0x1820,0x1858,0x1874,0x18b4,
+0xa80,0xac0,0xb00,0xb40,0xb80,0xa40,0xbc0,0xa40,0xbe2,0xa40,0xa40,0xa40,0xa40,0xc22,0x1db,0x1db,
+0xc62,0xca2,0xa40,0xa40,0xa40,0xa40,0xce2,0xd02,0xa40,0xa40,0xd42,0xd82,0xdc2,0xe02,0xe42,0xe82,
+0xec2,0xef9,0x1db,0x1db,0xf1d,0xf51,0x1db,0xf79,0x1db,0x1db,0x1db,0x1db,0xfa6,0x1db,0x1db,0x1db,
+0x1db,0x1db,0x1db,0x1db,0xfba,0x1db,0xff2,0x1032,0x1db,0x103d,0x1db,0x1db,0x1db,0x1073,0xa40,0x10b3,
+0x1db,0x1db,0x10f3,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0x1133,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,
+0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x1173,
+0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,
+0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x1173,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0xed0,0xed7,0xedf,0x48d,0x5d8,0x5d8,0x5d8,0xee7,0xef7,0xeef,0xf0e,0xeff,0xf06,0xf16,0xf1a,0xf1e,
+0x48d,0x48d,0x48d,0x48d,0x8ce,0x5d8,0xf26,0xf2e,0x5d8,0xf36,0xf3e,0xf42,0xf4a,0x5d8,0xf52,0x48d,
+0x565,0x56f,0xf5a,0x5d8,0xf5e,0xf66,0xf76,0xf6e,0x5d8,0xf7e,0x5d8,0xf85,0x48d,0x48d,0x48d,0x48d,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xb78,0x8da,0xe72,0x48d,0x48d,0x48d,0x48d,
+0xf95,0xf8d,0xf98,0xfa0,0x8ee,0xfa8,0x48d,0xfb0,0xfb8,0xfc0,0x48d,0x48d,0x5d8,0xfd0,0xfd8,0xfc8,
+0xfe8,0xfef,0xfe0,0xff7,0xfff,0x48d,0x100f,0x1007,0x5d8,0x1012,0x101a,0x1022,0x102a,0x1032,0x48d,0x48d,
+0x5d8,0x5d8,0x103a,0x48d,0x565,0x1042,0x50d,0x104a,0x5d8,0x1052,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x105a,0x5d8,0x1062,0x48d,0x48d,0x106a,0x1072,0x1079,0x48d,0x48d,0xe68,0x1081,0xb78,
+0x1091,0x60e,0x1099,0x1089,0x967,0x10a1,0x10a9,0x10af,0x10c7,0x10b7,0x10bf,0x10cb,0x967,0x10db,0x10d3,0x10e3,
+0x10f3,0x10eb,0x48d,0x48d,0x10fa,0x1102,0x630,0x110a,0x111a,0x1120,0x1128,0x1112,0x48d,0x48d,0x48d,0x48d,
+0x5d8,0x1130,0x1138,0x1140,0x5d8,0x1148,0x1150,0x48d,0x48d,0x48d,0x48d,0x48d,0x5d8,0x1158,0x1160,0x48d,
+0x5d8,0x1168,0x1170,0x1178,0x5d8,0x1188,0x1180,0x48d,0x848,0x1190,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x5d8,0x1198,0x48d,0x48d,0x48d,0x565,0x50d,0x11a0,0x11b0,0x11b6,0x11a8,0x48d,0x48d,0x11c6,0x11ca,0x11be,
+0x11e2,0x11d2,0x11da,0x5d8,0x11f2,0x11ea,0x5d8,0x8cf,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x1208,0x120d,0x11fa,0x1202,0x121d,0x1215,0x48d,0x48d,0x122c,0x1230,0x1224,0x1240,0x1238,0x1180,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x1244,0x48d,0x48d,0x48d,0x48d,0x48d,0x124b,0x125b,0x1253,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x8d9,0x48d,0x48d,0x48d,
+0x126b,0x1273,0x127b,0x1263,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x1283,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x128b,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x1293,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x8cf,0x8ee,0x129b,0x48d,0x48d,0xe68,0x12a3,0x5d8,0x12b3,0x12bb,0x12c3,0x12ab,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x565,0x50d,0x12cb,0x48d,0x48d,0x48d,0x5d8,0x5d8,0x12d3,0x12d8,0x12de,0x48d,
+0x48d,0x12e6,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x12ee,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x8da,0x48d,0x103a,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x8ee,0x48d,0x12f4,0x12fb,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xe01,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x5d8,0x5d8,0x5d8,0x1301,0x1306,0x130e,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0xba7,0x90c,0x1316,0x90c,0x131d,0x1325,0x132b,
+0x90c,0x1331,0x90c,0x90c,0x1339,0x48d,0x48d,0x48d,0x48d,0x1341,0x90c,0x90c,0xabb,0x1349,0x48d,0x48d,
+0x48d,0x48d,0x1359,0x1360,0x1365,0x136b,0x1373,0x137b,0x1383,0x135d,0x138b,0x1393,0x139b,0x13a0,0x1372,0x1359,
+0x1360,0x135c,0x136b,0x13a8,0x135a,0x13ab,0x135d,0x13b3,0x13bb,0x13c3,0x13ca,0x13b6,0x13be,0x13c6,0x13cd,0x13b9,
+0x13d5,0x1351,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,
+0x90c,0x90c,0x534,0x13e5,0x534,0x13ec,0x13f3,0x13dd,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x13fa,0x1402,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x5d8,0x1412,0x140a,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x5d8,0x141a,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x1422,0x48d,0x565,0x1432,0x142a,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x143a,0x144a,0x1442,0x48d,0x48d,0x145a,0x1452,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x146a,0x1472,0x147a,0x1482,0x148a,0x1492,0x48d,0x1462,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x90c,0x149a,0x90c,0x90c,0xb9f,0x149f,0x14a3,0xba7,0x14ab,0x90c,0x90c,0x90c,0x90c,0xba9,
+0x48d,0x14b3,0x14bb,0x14bf,0x14c7,0x14cf,0x48d,0x48d,0x48d,0x48d,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,
+0x90c,0x14d7,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,
+0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x14df,0x14e7,0x90c,0x90c,0x90c,0xb9f,0x90c,0x90c,
+0x14ef,0x14f7,0x149a,0x90c,0x14ff,0x90c,0x1507,0x150c,0x48d,0x48d,0x90c,0x90c,0x90c,0x1514,0x90c,0x90c,
+0x151b,0x90c,0x90c,0x90c,0xb9f,0x1520,0x1528,0x152e,0x1533,0x48d,0x90c,0x90c,0x90c,0x90c,0x153b,0x90c,
+0xabe,0x117c,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x1543,0x5d8,0x5d8,0x154a,0x5d8,0x5d8,0x5d8,0x1552,0x5d8,0x155a,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0xc80,0x5d8,0x5d8,0x1562,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x156a,0x1572,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xcbf,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x1579,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x1580,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x1587,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0xf5e,0x48d,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x158b,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0xf5e,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x1066,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x1590,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x5d8,0x5d8,0x5d8,0x5d8,0x1598,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0xf5e,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,
+0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x655,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x15a8,0x15a0,0x15a0,0x15a0,0x48d,0x48d,0x48d,0x48d,0x534,0x534,0x534,0x534,0x534,
+0x534,0x534,0x15b0,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,
+0x48d,0x48d,0x48d,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
+0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
+0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
+0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,
+0xe19,0xe19,0x15b8,0x46c,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
+0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
+0xf,0xf,0xf,0xf,0xc,0x17,0x17,0x17,0x19,0x17,0x17,0x17,0x14,0x15,0x17,0x18,
+0x17,0x13,0x17,0x17,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,
+0x18,0x18,0x18,0x17,0x17,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x14,
+0x17,0x15,0x1a,0x16,0x1a,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0x14,
+0x18,0x15,0x18,0xf,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
+0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
+0xf,0xf,0xf,0xf,0xc,0x17,0x19,0x19,0x19,0x19,0x1b,0x17,0x1a,0x1b,5,0x1c,
+0x18,0x10,0x1b,0x1a,0x1b,0x18,0x34b,0x38b,0x1a,2,0x17,0x17,0x1a,0x30b,5,0x1d,
+0x34cb,0x344b,0x3ccb,0x17,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,0x18,1,1,1,1,
+1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,0x18,2,2,2,2,
+2,2,2,2,1,2,1,2,1,2,1,2,1,2,1,2,
+1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
+1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
+1,2,1,2,1,2,1,2,1,2,1,2,2,1,2,1,
+2,1,2,1,2,2,1,2,1,2,1,2,1,2,1,2,
+1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
+1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
+1,2,1,2,1,1,2,1,2,1,2,2,2,1,1,2,
+1,2,1,1,2,1,1,1,2,2,1,1,1,1,2,1,
+1,2,1,1,1,2,2,2,1,1,2,1,1,2,1,2,
+1,2,1,1,2,1,2,2,1,2,1,1,2,1,1,1,
+2,1,2,1,1,2,2,5,1,2,2,2,5,5,5,5,
+1,3,2,1,3,2,1,3,2,1,2,1,2,1,2,1,
+2,1,2,1,2,1,2,1,2,2,1,2,1,2,1,2,
+1,2,1,2,1,2,1,2,1,2,1,2,2,1,3,2,
+1,2,1,1,1,2,1,2,1,2,1,2,1,2,1,2,
+1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
+1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
+1,2,1,2,1,2,1,2,2,2,2,2,2,2,1,1,
+2,1,1,2,2,1,2,1,1,1,1,2,1,2,1,2,
+1,2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,5,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+4,4,0x1a,0x1a,0x1a,0x1a,4,4,4,4,4,4,4,4,4,4,
+4,4,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,
+4,4,4,4,4,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,4,0x1a,4,0x1a,
+0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,
+6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+1,2,1,2,4,0x1a,1,2,0,0,4,2,2,2,0x17,1,
+0,0,0,0,0x1a,0x1a,1,0x17,1,1,1,0,1,0,1,1,
+2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,0,1,1,1,1,1,1,1,1,1,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,1,2,2,1,1,1,2,2,2,1,2,1,2,
+1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
+1,2,1,2,2,2,2,2,1,2,0x18,1,2,1,1,2,
+2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,1,2,1,2,1,2,1,2,1,2,1,2,
+1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
+1,2,1,2,1,2,0x1b,6,6,6,6,6,7,7,1,2,
+1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
+1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
+1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
+1,2,1,2,1,1,2,1,2,1,2,1,2,1,2,1,
+2,1,2,2,1,2,1,2,1,2,1,2,1,2,1,2,
+1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
+1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
+1,2,1,2,0,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
+0,4,0x17,0x17,0x17,0x17,0x17,0x17,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,0x17,0x13,0,0,0x1b,0x1b,0x19,
+0,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+6,6,6,6,6,6,6,6,6,6,6,6,6,6,0x13,6,
+0x17,6,6,0x17,6,6,0x17,6,0,0,0,0,0,0,0,0,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,0,0,0,0,5,
+5,5,5,0x17,0x17,0,0,0,0,0,0,0,0,0,0,0,
+0x10,0x10,0x10,0x10,0x10,0x10,0x18,0x18,0x18,0x17,0x17,0x19,0x17,0x17,0x1b,0x1b,
+6,6,6,6,6,6,6,6,6,6,6,0x17,0x10,0,0x17,0x17,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+4,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,
+6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,0x17,0x17,5,5,
+6,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,0x17,5,6,6,6,6,6,6,6,0x10,0x1b,6,
+6,6,6,6,6,4,4,6,6,0x1b,6,6,6,6,5,5,
+0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,5,5,5,0x1b,0x1b,5,
+0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0,0x10,
+5,6,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+6,6,6,0,0,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,
+6,6,6,6,6,5,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,
+6,6,6,6,6,6,6,6,4,4,0x1b,0x17,0x17,0x17,4,0,
+0,6,0x19,0x19,6,6,6,6,4,6,6,6,4,6,6,6,
+6,6,0,0,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,
+0x17,0x17,0x17,0,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,6,6,6,6,4,6,
+6,6,6,6,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,
+0,0,0x17,0,5,5,5,5,5,5,5,5,5,5,5,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,6,6,0x10,6,6,6,6,6,6,6,6,6,
+6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+6,6,6,6,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,0,5,5,5,5,5,5,
+5,5,5,5,0,0,0,0,0,0,0,0,0,0,0,6,
+6,6,6,6,6,6,6,6,6,6,6,6,5,5,6,6,
+0x17,0x17,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,4,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,8,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,8,
+6,5,8,8,8,6,6,6,6,6,6,6,6,8,8,8,
+8,6,8,8,5,6,6,6,6,6,6,6,5,5,5,5,
+5,5,5,5,5,5,6,6,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,
+0x1c9,0x209,0x249,0x289,5,5,0x19,0x19,0x37cb,0x35cb,0x3fcb,0x34cb,0x3ccb,0x94b,0x1b,0x19,
+5,0x17,6,0,5,6,8,8,0,5,5,5,5,5,5,5,
+5,0,0,5,5,0,0,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,0,5,5,5,5,5,5,5,0,5,0,
+0,0,5,5,5,5,0,0,6,5,8,8,8,6,6,6,
+6,0,0,8,8,0,0,8,8,6,5,0,0,0,0,0,
+0,0,0,8,0,0,0,0,5,5,0,5,0,0,0,0,
+0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,6,6,5,5,
+5,6,0x17,0,0,0,0,0,0,0,0,0,0,6,6,8,
+0,5,5,5,5,5,5,0,0,0,0,5,5,0,0,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,0,5,5,
+5,5,5,5,5,0,5,5,0,5,5,0,5,5,0,0,
+6,0,8,8,8,6,6,0,0,0,0,6,6,0,0,6,
+6,6,0,0,0,6,0,0,0,0,0,0,0,5,5,5,
+5,0,5,0,5,5,6,6,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,
+0x1c9,0x209,0x249,0x289,0x17,0x19,0,0,0,0,0,0,0,5,6,6,
+6,6,6,6,0,6,6,8,0,5,5,5,5,5,5,5,
+5,5,0,5,5,5,0,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,0,5,5,5,5,5,5,5,0,5,5,
+0,5,5,5,5,5,0,0,6,5,8,8,8,6,6,6,
+6,6,0,6,6,8,0,8,8,6,0,0,5,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,5,5,6,6,
+0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x1b,5,0x34cb,0x344b,
+0x3ccb,0x37cb,0x35cb,0x3fcb,0,0,0,0,0,0,0,0,0,6,8,8,
+0,5,5,5,5,5,5,5,5,0,0,5,5,0,0,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,0,5,5,
+5,5,5,5,5,0,5,5,0,5,5,5,5,5,0,0,
+6,5,8,6,8,6,6,6,6,0,0,8,8,0,0,8,
+8,6,0,0,0,0,0,0,0,6,6,8,0,0,0,0,
+5,5,0,5,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,
+0x1c9,0x209,0x249,0x289,0x7cb,0x1e4b,0x784b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x19,0x1b,0,
+0,0,0,0,0,0,6,5,0,5,5,5,5,5,5,0,
+0,0,5,5,5,0,5,5,5,5,0,0,0,5,5,0,
+5,0,5,5,0,0,0,5,5,0,0,0,5,5,5,0,
+0,0,5,5,5,5,5,5,5,5,5,5,5,5,0,0,
+0,0,8,8,6,8,8,0,0,0,8,8,8,0,8,8,
+8,6,0,0,5,0,0,0,0,0,0,8,0,0,0,0,
+0,0,0,0,5,5,6,6,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,
+0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,0,0x17,0x54b,0x58b,0x5cb,0x60b,
+0x58b,0x5cb,0x60b,0x1b,6,8,8,8,6,5,5,5,5,5,5,5,
+5,0,5,5,5,0,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,0,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,0,0,0,5,6,6,6,8,8,8,
+8,0,6,6,6,0,6,6,6,6,0,0,0,0,0,0,
+0,6,6,0,5,5,5,0,0,0,0,0,5,5,6,6,
+0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,5,5,0,
+0,0,0,0,0,0,0,0,0,0,0,0,5,6,8,8,
+0x17,5,5,5,5,5,5,5,5,0,5,5,5,0,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,0,5,5,
+5,5,5,5,5,5,5,5,0,5,5,5,5,5,0,0,
+6,5,8,6,8,8,8,8,8,0,6,8,8,0,8,8,
+6,6,0,0,0,0,0,0,0,8,8,0,0,0,0,0,
+0,0,5,0,5,5,6,6,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,
+0x1c9,0x209,0x249,0x289,0x7cb,0x1e4b,0x784b,0x34cb,0x344b,0x3ccb,0x37cb,0x35cb,0x3fcb,0x1b,5,5,
+5,5,5,5,6,6,8,8,5,5,5,5,5,5,5,5,
+5,0,5,5,5,0,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,6,6,5,8,8,8,6,6,6,6,0,8,8,
+8,0,8,8,8,6,5,0x1b,0,0,0,0,5,5,5,8,
+0xcc0b,0xca0b,0xcb4b,0xc90b,0x364b,0xc94b,0x350b,5,0,0,0,0,0,0,0x49,0x89,
+0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,8,8,0x17,0,0,0,
+0,0,0,0,0,0,0,0,0,6,8,8,0,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,
+0,0,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,0,5,5,5,5,5,5,5,5,5,
+0,5,0,0,5,5,5,5,5,5,5,0,0,0,6,0,
+0,0,0,8,8,8,6,6,6,0,6,0,8,8,8,8,
+8,8,8,8,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,6,5,5,6,6,6,6,6,6,6,0,
+0,0,0,0x19,5,5,5,5,5,5,4,6,6,6,6,6,
+6,6,6,0x17,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,
+0,0,0,0,0,5,5,0,5,0,5,5,5,5,5,0,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,0,5,0,5,5,5,5,5,5,5,5,5,
+5,6,5,5,6,6,6,6,6,6,6,6,6,5,0,0,
+5,5,5,5,5,0,4,0,6,6,6,6,6,6,0,0,
+0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,5,5,5,5,
+5,0x1b,0x1b,0x1b,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,
+0x17,0x17,0x17,0x1b,0x17,0x1b,0x1b,0x1b,6,6,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x344b,0x3c4b,0x444b,0x4c4b,0x544b,0x5c4b,
+0x644b,0x6c4b,0x744b,0x2c4b,0x1b,6,0x1b,6,0x1b,6,0x14,0x15,0x14,0x15,8,8,
+5,5,5,5,5,5,5,5,0,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,0,0,0,0,6,6,6,6,6,6,6,6,6,6,6,
+6,6,6,8,6,6,6,6,6,0x17,6,6,5,5,5,5,
+5,6,6,6,6,6,6,6,6,6,6,6,0,6,6,6,
+6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+6,6,6,6,6,6,6,6,6,6,6,6,6,0,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,6,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0x1b,0x1b,
+0x17,0x17,0x17,0x17,0x17,0x1b,0x1b,0x1b,0x1b,0x17,0x17,0,0,0,0,0,
+5,5,5,5,5,5,5,5,5,5,5,8,8,6,6,6,
+6,8,6,6,6,6,6,6,8,6,6,8,8,6,6,5,
+0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,0x17,0x17,0x17,0x17,
+5,5,5,5,5,5,8,8,6,6,5,5,5,5,6,6,
+6,5,8,8,8,5,5,8,8,8,8,8,8,8,5,5,
+5,6,6,6,6,5,5,5,5,5,5,5,5,5,5,5,
+5,5,6,8,8,6,6,8,8,8,8,8,8,6,5,8,
+0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,8,8,8,6,0x1b,0x1b,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,0x17,4,2,2,2,
+1,1,1,1,1,1,0,1,0,0,0,0,0,1,0,0,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+5,5,5,5,5,5,5,5,5,0,5,5,5,5,0,0,
+5,5,5,5,5,5,5,0,5,0,5,5,5,5,0,0,
+5,5,5,5,5,5,5,5,5,0,5,5,5,5,0,0,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,0,5,5,5,5,0,0,5,5,5,5,5,5,5,0,
+5,0,5,5,5,5,0,0,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,0,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,0,5,5,5,5,0,0,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,0,0,6,6,6,
+0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,
+0x4cb,0x50b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0x788b,0,0,0,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,0,0,2,2,2,2,2,2,0,0,
+0x13,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,0x1b,0x17,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,0xc,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x14,
+0x15,0,0,0,5,5,5,5,5,5,5,5,5,5,5,0x17,
+0x17,0x17,0x98a,0x9ca,0xa0a,5,5,5,5,5,5,5,5,0,0,0,
+0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
+5,0,5,5,5,5,6,6,6,0,0,0,0,0,0,0,
+0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,6,6,6,0x17,0x17,0,0,0,0,0,
+0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,6,6,0,0,0,0,0,0,0,0,
+0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
+5,0,5,5,5,0,6,6,0,0,0,0,0,0,0,0,
+0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,6,6,8,6,6,6,6,6,
+6,6,8,8,8,8,8,8,8,8,6,8,8,6,6,6,
+6,6,6,6,6,6,6,6,0x17,0x17,0x17,4,0x17,0x17,0x17,0x19,
+5,6,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,
+0,0,0,0,0x54b,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0,0,
+0,0,0,0,5,5,5,5,5,5,5,5,5,6,5,0,
+0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,0x17,0x17,0x17,0x17,0x17,0x17,0x13,0x17,0x17,0x17,0x17,6,
+6,6,0x10,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,
+0,0,0,0,5,5,5,4,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,0,0,0,0,0,0,0,5,5,5,5,
+5,6,6,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,0,
+0,0,0,0,0,0,0,0,6,6,6,8,8,8,8,6,
+6,8,8,8,0,0,0,0,8,8,6,8,8,8,8,8,
+8,6,6,6,0,0,0,0,0x1b,0,0,0,0x17,0x17,0x49,0x89,
+0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,0,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,0,0,5,5,5,5,5,0,0,0,
+0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
+5,5,5,5,0,0,0,0,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,0,0,0,0,0,0,
+0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x30b,0,0,0,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,6,6,8,8,6,0,0,0x17,0x17,
+0x17,0x17,0x17,0x17,0x17,0x17,0x17,4,0x17,0x17,0x17,0x17,0x17,0x17,0,0,
+6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,6,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,8,6,8,6,6,6,6,6,6,6,0,
+6,8,6,8,8,6,6,6,6,6,6,6,6,8,8,8,
+8,8,8,6,6,6,6,6,6,6,6,6,6,0,0,6,
+0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,
+0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,
+0x17,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,6,6,6,6,6,
+6,6,6,6,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,
+6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+6,6,6,6,8,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+6,8,6,6,6,6,6,8,6,8,8,8,8,8,6,8,
+8,5,5,5,5,5,5,5,0,0,0,0,0x49,0x89,0xc9,0x109,
+0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,0x17,0x17,0x17,0x17,5,8,6,6,
+6,6,8,8,6,6,8,6,6,6,5,5,0x49,0x89,0xc9,0x109,
+0x149,0x189,0x1c9,0x209,0x249,0x289,5,5,5,5,5,5,6,6,8,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,8,
+6,6,8,8,8,6,8,6,6,6,8,8,0,0,0,0,
+0,0,0,0,0x17,0x17,0x17,0x17,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,
+0x249,0x289,0,0,0,5,5,5,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,
+0x249,0x289,5,5,5,5,5,5,8,8,8,8,8,8,8,8,
+6,6,6,6,6,6,6,6,8,8,6,6,0,0,0,0x17,
+0x17,0x17,0x17,0x17,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,4,4,4,4,
+4,4,0x17,0x17,2,2,2,2,2,2,2,2,2,0,0,0,
+0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
+0,1,1,1,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0,0,0,0,
+0,0,0,0,6,6,6,0x17,6,6,6,6,6,6,6,6,
+6,6,6,6,6,8,6,6,6,6,6,6,6,5,5,5,
+5,6,5,5,5,5,5,5,6,5,5,8,6,6,5,0,
+0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2,
+4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+4,4,4,2,2,2,2,2,2,2,2,2,2,2,2,2,
+4,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,4,
+4,4,4,4,6,6,6,6,6,6,6,6,6,6,6,6,
+6,6,6,6,6,6,6,6,6,6,6,6,6,6,0,6,
+6,6,6,6,1,2,1,2,1,2,1,2,1,2,1,2,
+1,2,1,2,1,2,1,2,1,2,2,2,2,2,2,2,
+2,2,1,2,2,2,2,2,2,2,2,2,1,1,1,1,
+1,0x1a,0x1a,0x1a,0,0,2,2,2,0,2,2,1,1,1,1,
+3,0x1a,0x1a,0,2,2,2,2,2,2,2,2,1,1,1,1,
+1,1,1,1,2,2,2,2,2,2,0,0,1,1,1,1,
+1,1,0,0,2,2,2,2,2,2,2,2,1,1,1,1,
+1,1,1,1,2,2,2,2,2,2,2,2,1,1,1,1,
+1,1,1,1,2,2,2,2,2,2,0,0,1,1,1,1,
+1,1,0,0,2,2,2,2,2,2,2,2,0,1,0,1,
+0,1,0,1,2,2,2,2,2,2,2,2,1,1,1,1,
+1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,0,0,2,2,2,2,2,2,2,2,3,3,3,3,
+3,3,3,3,2,2,2,2,2,2,2,2,3,3,3,3,
+3,3,3,3,2,2,2,2,2,0,2,2,1,1,1,1,
+3,0x1a,2,0x1a,0x1a,0x1a,2,2,2,0,2,2,1,1,1,1,
+3,0x1a,0x1a,0x1a,2,2,2,2,0,0,2,2,1,1,1,1,
+0,0x1a,0x1a,0x1a,0x16,0x17,0x17,0x17,0x18,0x14,0x15,0x17,0x17,0x17,0x17,0x17,
+0x17,0x17,0x17,0x17,0x17,0x17,0x18,0x17,0x16,0x17,0x17,0x17,0x17,0x17,0x17,0x17,
+0x17,0x17,0x17,0xc,0x10,0x10,0x10,0x10,0x10,0,0x10,0x10,0x10,0x10,0x10,0x10,
+0x10,0x10,0x10,0x10,0x2cb,4,0,0,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x18,0x18,
+0x18,0x14,0x15,4,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0x10,
+0x10,0x10,0x10,0x10,0x13,0x13,0x13,0x13,0x13,0x13,0x17,0x17,0x1c,0x1d,0x14,0x1c,
+0x1c,0x1d,0x14,0x1c,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0xd,0xe,0x10,0x10,
+0x10,0x10,0x10,0xc,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x1c,0x1d,0x17,
+0x17,0x17,0x17,0x16,0x2cb,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x18,0x18,
+0x18,0x14,0x15,0,4,4,4,4,4,4,4,4,4,4,4,4,
+4,0,0,0,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,
+0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,
+0x19,0x19,0x19,0x19,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,6,6,6,6,6,6,6,6,6,6,6,6,
+6,7,7,7,7,6,7,7,7,6,6,6,6,6,6,6,
+6,6,6,6,6,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0x1b,0x1b,0x1b,0x1b,1,0x1b,1,0x1b,1,0x1b,1,1,
+1,1,0x1b,2,1,1,1,1,2,5,5,5,5,2,0x1b,0x1b,
+2,2,1,1,0x18,0x18,0x18,0x18,0x18,1,2,2,2,2,0x1b,0x18,
+0x1b,0x1b,2,0x1b,0x358b,0x360b,0x364b,0x348b,0x388b,0x350b,0x390b,0x3d0b,0x410b,0x354b,0x454b,0x35cb,
+0x3dcb,0x45cb,0x4dcb,0x58b,0x1b,0x1b,1,0x1b,0x1b,0x1b,0x1b,1,0x1b,0x1b,2,1,
+1,1,2,2,1,1,1,2,0x1b,1,0x1b,0x1b,0x18,1,1,1,
+1,1,0x1b,0x1b,0x58a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x7ca,0x80a,0x84a,
+0x11ca,0x1e4a,0x980a,0x784a,0x58a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x7ca,0x80a,0x84a,
+0x11ca,0x1e4a,0x980a,0x784a,0x784a,0x984a,0x788a,1,2,0x6ca,0x11ca,0x988a,0x78ca,0x54b,0x1b,0x1b,
+0,0,0,0,0x18,0x18,0x18,0x18,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x18,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+0x18,0x1b,0x1b,0x18,0x1b,0x1b,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x18,0x18,0x1b,0x1b,0x18,0x1b,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+0x18,0x18,0x18,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x14,0x15,0x14,0x15,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x18,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x14,0x15,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x18,0x18,0x18,0x18,
+0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x18,0x18,0x18,
+0x18,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x2cb,0x80b,0x84b,0x88b,0x8cb,0x90b,0x94b,0x98b,0x9cb,0xa0b,
+0xa4b,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb,0x2cb,0x30b,0x34b,0x38b,0x3cb,
+0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb,0x80b,0x84b,0x88b,0x8cb,0x90b,0x94b,0x98b,0x9cb,0xa0b,0xa4b,
+0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb,0x80b,0x84b,0x88b,0x8cb,0x90b,0x94b,
+0x98b,0x9cb,0xa0b,0xa4b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x18,0x18,0x18,
+0x18,0x18,0x18,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,
+0x14,0x15,0x14,0x15,0x14,0x15,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb,
+0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,
+0x48b,0x4cb,0x50b,0x7cb,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x18,0x18,0x18,0x18,0x18,0x14,0x15,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+0x18,0x18,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x18,0x18,0x18,0x18,
+0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x14,
+0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,
+0x15,0x14,0x15,0x14,0x15,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+0x14,0x15,0x14,0x15,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+0x14,0x15,0x18,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+0x18,0x18,0x18,0x18,0x18,0x1b,0x1b,0x18,0x18,0x18,0x18,0x18,0x18,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,0,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,0,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,1,2,1,1,1,2,2,1,
+2,1,2,1,2,1,1,1,1,2,1,2,2,1,2,2,
+2,2,2,2,4,4,1,1,1,2,1,2,2,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,1,2,1,2,6,6,6,1,2,0,0,0,0,
+0,0x17,0x17,0x17,0x17,0x344b,0x17,0x17,2,2,2,2,2,2,0,2,
+0,0,0,0,0,2,0,0,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,0,0,0,0,0,0,0,4,
+0x17,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,
+5,5,5,5,5,5,5,0,5,5,5,5,5,5,5,0,
+5,5,5,5,5,5,5,0,5,5,5,5,5,5,5,0,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,0,0,0,0,0,0,0,0,0,
+0x17,0x17,0x1c,0x1d,0x1c,0x1d,0x17,0x17,0x17,0x1c,0x1d,0x17,0x1c,0x1d,0x17,0x17,
+0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x13,0x17,0x17,0x13,0x17,0x1c,0x1d,0x17,0x17,
+0x1c,0x1d,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x17,0x17,0x17,0x17,0x17,4,
+0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x13,0x13,0x17,0x17,0x17,0x17,
+0x13,0x17,0x14,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,
+0x1b,0x1b,0x17,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0,0,0,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0,0,0,0,0x1b,0x58a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,6,6,
+6,6,8,8,0x13,4,4,4,4,4,0x1b,0x1b,0x7ca,0xa4a,0xcca,4,
+5,0x17,0x1b,0x1b,0xc,0x17,0x17,0x17,0x1b,4,5,0x54a,0x14,0x15,0x14,0x15,
+0x14,0x15,0x14,0x15,0x14,0x15,0x1b,0x1b,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,
+0x13,0x14,0x15,0x15,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,0,0,6,6,0x1a,
+0x1a,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x17,
+4,4,4,5,0,0,0,0,0,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,0,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,0,0x1b,0x1b,0x58b,0x5cb,0x60b,0x64b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,
+0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1b,0xa8b,0xacb,0xb0b,
+0xb4b,0xb8b,0xbcb,0xc0b,0xc4b,0xc8b,0xccb,0xd0b,0xd4b,0xd8b,0xdcb,0xe0b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0xe4b,0xe8b,0xecb,
+0xf0b,0xf4b,0xf8b,0xfcb,0x100b,0x104b,0x108b,0x10cb,0x110b,0x114b,0x118b,0x11cb,5,5,5,5,
+5,0x685,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x5c5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x685,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,0x705,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,0x585,5,5,0x705,5,5,5,0x7885,
+5,0x605,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,0x785,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+0x5c5,5,5,5,5,5,5,5,0x685,5,0x645,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,0x7985,0x7c5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,0x7845,5,5,5,5,
+5,5,5,5,0x605,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,0x685,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+0x1e45,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+0x7985,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x7a85,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,0x5c5,5,0x745,5,0x6c5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,0x7c5,5,0x7845,0xa45,0xcc5,5,5,5,5,5,5,0xf45,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,0x605,0x605,0x605,0x605,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,0x645,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,0x585,5,5,5,5,5,5,5,0x585,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,0x585,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,0x785,0xa45,5,5,5,5,
+5,5,5,5,5,5,5,5,0x585,0x5c5,0x605,5,0x5c5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x7c5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,0x745,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,0x705,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x785,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x1e45,5,
+5,5,5,5,5,5,0x645,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+0x7885,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,0x5c5,5,5,5,5,0x5c5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,0x5c5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,0x7845,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x6c5,5,
+5,5,5,5,0x1e45,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+0x6c5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,0x545,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,0,0,0,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,4,5,5,5,5,5,5,5,5,5,5,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,4,0x17,0x17,0x17,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,5,5,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
+1,2,1,2,1,2,1,2,1,2,1,2,4,4,6,6,
+1,2,1,2,1,2,1,2,1,2,1,2,1,2,5,6,
+7,7,7,0x17,6,6,6,6,6,6,6,6,6,6,0x17,4,
+5,5,5,5,5,5,0x58a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x54a,
+6,6,0x17,0x17,0x17,0x17,0x17,0x17,0,0,0,0,0,0,0,0,
+0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,
+0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,4,4,4,4,4,4,4,4,4,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,1,2,5,4,4,2,5,5,5,5,5,
+0x1a,0x1a,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
+2,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
+1,2,1,2,4,2,2,2,2,2,2,2,2,1,2,1,
+2,1,1,2,1,2,1,2,1,2,1,2,4,0x1a,0x1a,1,
+2,1,2,5,1,2,1,2,2,2,1,2,1,2,1,2,
+1,2,1,2,1,2,1,1,1,1,1,2,1,1,1,1,
+1,2,1,2,1,2,1,2,1,2,1,2,0,0,1,2,
+1,1,1,1,2,1,2,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,5,5,6,5,
+5,5,6,5,5,5,5,6,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,8,
+8,6,6,8,0x1b,0x1b,0x1b,0x1b,6,0,0,0,0x34cb,0x344b,0x3ccb,0x37cb,
+0x35cb,0x3fcb,0x1b,0x1b,0x19,0x1b,0,0,0,0,0,0,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+0x17,0x17,0x17,0x17,0,0,0,0,0,0,0,0,8,8,8,8,
+6,6,0,0,0,0,0,0,0,0,0x17,0x17,0x49,0x89,0xc9,0x109,
+0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,8,8,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,8,8,8,8,
+8,8,8,8,8,8,8,8,6,6,6,6,6,6,6,6,
+6,6,6,6,6,6,6,6,6,6,5,5,5,5,5,5,
+0x17,0x17,0x17,5,0x17,5,5,6,5,5,5,5,5,5,6,6,
+6,6,6,6,6,6,0x17,0x17,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,
+6,6,6,6,6,6,8,8,0,0,0,0,0,0,0,0,
+0,0,0,0x17,8,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,
+0x17,0x17,0,4,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,
+0,0,0x17,0x17,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,6,8,8,6,6,6,6,8,8,
+6,6,8,8,5,5,5,5,5,6,4,5,5,5,5,5,
+5,5,5,5,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,5,5,
+5,5,5,0,5,5,5,5,5,5,5,5,5,6,6,6,
+6,6,6,8,8,6,6,8,8,6,6,0,0,0,0,0,
+0,0,0,0,5,5,5,6,5,5,5,5,5,5,5,5,
+6,8,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,
+0x17,0x17,0x17,0x17,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,4,5,5,5,5,5,5,0x1b,0x1b,0x1b,5,8,
+6,8,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,6,5,6,6,6,5,5,6,6,5,5,5,
+5,5,6,6,5,6,5,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,
+5,4,0x17,0x17,5,5,5,5,5,5,5,5,5,5,5,8,
+6,6,8,8,0x17,0x17,5,4,4,8,6,0,0,0,0,0,
+0,0,0,0,0,5,5,5,5,5,5,0,0,5,5,5,
+5,5,5,0,0,5,5,5,5,5,5,0,0,0,0,0,
+0,0,0,0,5,5,5,5,5,5,5,0,5,5,5,5,
+5,5,5,0,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0x1a,
+4,4,4,4,2,2,2,2,2,2,2,2,2,4,0x1a,0x1a,
+0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,5,5,5,8,8,6,8,8,6,8,8,0x17,
+8,6,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,
+0,0,0,0,5,5,5,5,0,0,0,0,0,0,0,0,
+0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+0,0,0,0,5,5,5,5,5,5,5,0,0,0,0,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+0x12,0x12,0x12,0x12,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,
+0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,
+0x11,0x11,0x11,0x11,5,5,5,5,5,5,5,5,5,5,5,0x605,
+5,5,5,5,5,5,5,0x7c5,5,5,5,5,0x5c5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,0x6c5,5,0x6c5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,0x7c5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,0x18,5,5,5,5,5,5,5,5,5,5,
+5,5,5,0,5,5,5,5,5,0,5,0,5,5,0,5,
+5,0,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,2,2,2,2,
+2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,
+2,2,2,2,0,0,0,0,0,5,6,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x1a,0x1a,
+0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x15,0x14,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+0,0,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
+5,5,5,5,0x19,0x1b,0,0,6,6,6,6,6,6,6,6,
+6,6,6,6,6,6,6,6,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x14,
+0x15,0x17,0,0,0,0,0,0,6,6,6,6,6,6,6,6,
+6,6,6,6,6,6,6,6,0x17,0x13,0x13,0x16,0x16,0x14,0x15,0x14,
+0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x17,0x17,0x14,0x15,0x17,0x17,0x17,
+0x17,0x16,0x16,0x16,0x17,0x17,0x17,0,0x17,0x17,0x17,0x17,0x13,0x14,0x15,0x14,
+0x15,0x14,0x15,0x17,0x17,0x17,0x18,0x13,0x18,0x18,0x18,0,0x17,0x19,0x17,0x17,
+0,0,0,0,5,5,5,5,5,0,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,0,0,0x10,0,0,5,5,
+5,5,5,5,0,0,5,5,5,5,5,5,0,0,5,5,
+5,5,5,5,0,0,5,5,5,0,0,0,0x19,0x19,0x18,0x1a,
+0x1b,0x19,0x19,0,0x1b,0x18,0x18,0x18,0x18,0x1b,0x1b,0,0,0,0,0,
+0,0,0,0,0,0x10,0x10,0x10,0x1b,0x1b,0,0,0,0x17,0x17,0x17,
+0x19,0x17,0x17,0x17,0x14,0x15,0x17,0x18,0x17,0x13,0x17,0x17,0x49,0x89,0xc9,0x109,
+0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,0x18,0x18,0x18,0x17,0x1a,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,0x14,0x18,0x15,0x18,0x14,0x15,0x17,0x14,0x15,
+0x17,0x17,5,5,5,5,5,5,5,5,5,5,4,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,4,4,
+5,5,5,5,5,5,5,5,5,5,5,5,0,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,0,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,0,5,5,0,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,0,0,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,0,0,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,0,0,0,0,0,0xb00b,0xb80b,0x784b,0x804b,
+0x884b,0x904b,0x984b,0xa04b,0xa84b,0xb04b,0xb84b,0x788b,0x808b,0x888b,0x908b,0x988b,0xa08b,0xa88b,0xb08b,0xb88b,
+0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x17,0x17,0x17,0,
+0,0,0,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,
+0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0x800b,0x880b,0x900b,0x980b,0xa00b,0xa80b,0x7ca,0x7ca,0x7ca,0x7ca,
+0x7ca,0xcca,0x11ca,0x11ca,0x11ca,0x11ca,0x1e4a,0x880a,0x980a,0x980a,0x980a,0x980a,0x980a,0x784a,0x984a,0x68a,
+0x11ca,0x344b,0x344b,0x388b,0x3ccb,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x54b,0x34cb,0x1b,0x1b,0x1b,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0x34ca,0x344a,0x58a,0x68a,0x11ca,0x980a,0x984a,0x988a,
+0x68a,0x7ca,0x11ca,0x1e4a,0x980a,0x784a,0x984a,0x68a,0x7ca,0x11ca,0x1e4a,0x980a,0x784a,0x788a,0x988a,0x7ca,
+0x58a,0x58a,0x58a,0x5ca,0x5ca,0x5ca,0x5ca,0x68a,0x1b,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,6,0,0,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,6,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,
+0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0x800b,0x880b,0x900b,0x980b,
+0xa00b,0xa80b,0xb00b,0xb80b,0,0,0,0,0x58b,0x68b,0x7cb,0x11cb,0,0,0,0,
+0,0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,0x1bca,5,5,5,5,5,5,
+5,5,0xb80a,0,0,0,0,0,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,
+6,6,6,0,0,0,0,0,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,0,0x17,5,5,5,5,0,0,0,0,
+5,5,5,5,5,5,5,5,0x17,0x58a,0x5ca,0x7ca,0xa4a,0x1e4a,0,0,
+0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,
+0x249,0x289,0,0,0,0,0,0,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,0,0,0,0,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
+2,2,2,2,2,2,2,2,5,5,5,5,5,5,5,5,
+0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,0,0,0,0,0,0,0,0,
+0,0,0,0x17,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,0,5,5,0,0,0,
+5,0,0,5,5,5,5,5,5,5,0,0,5,0,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,0,0x17,0x58b,0x5cb,0x60b,0x7cb,0xa4b,0x1e4b,0x784b,0x788b,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,0x1b,0x1b,0x58b,0x5cb,0x60b,0x64b,0x68b,0x7cb,0xa4b,
+0,0,0,0,0,0,0,0x58b,0x5cb,0x60b,0x64b,0x64b,0x68b,0x7cb,0xa4b,0x1e4b,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,0,5,5,0,0,0,0,0,0x58b,0x68b,0x7cb,0xa4b,0x1e4b,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,0x58b,0x7cb,0xa4b,0x1e4b,0x5cb,0x60b,0,0,0,0x17,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,0,0,0,0,0,0x17,
+0xa04b,0xa84b,0xb04b,0xb84b,0x788b,0x808b,0x888b,0x908b,0x988b,0xa08b,0xa88b,0xb08b,0xb88b,0x78cb,0x80cb,0x88cb,
+0x90cb,0x98cb,0xa0cb,0xa8cb,0xb0cb,0xb8cb,0x36cb,0x354b,0x34cb,0x348b,0x46cb,0x344b,0x4ecb,0x388b,0x3ccb,0x454b,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,0,0,0,0,0x5ecb,0x344b,5,5,
+0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,
+0,0,0x1e4b,0x800b,0x880b,0x900b,0x980b,0xa00b,0xa80b,0xb00b,0xb80b,0x784b,0x804b,0x884b,0x904b,0x984b,
+0x30b,0x34b,0x38b,0x3cb,0x7cb,0xa4b,0x1e4b,0x784b,0x344b,0,0,0,0,0,0,0,
+0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0,0,0,0,0,0,0,
+5,6,6,6,0,6,6,0,0,0,0,0,6,6,6,6,
+5,5,5,5,0,5,5,5,0,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,0,0,6,6,6,0,0,0,0,6,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,0x58b,0x11cb,0x17,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,0x58b,0x7cb,0xa4b,5,5,5,5,
+5,6,6,0,0,0,0,0x58b,0x68b,0x7cb,0xa4b,0x1e4b,0x17,0x17,0x17,0x17,
+0x17,0x17,0x17,0,0,0,0,0,0,0,0,0,5,5,5,5,
+5,5,5,5,0x1b,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,0,
+0,0x17,0x17,0x17,0x17,0x17,0x17,0x17,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,0,
+0x58b,0x5cb,0x60b,0x64b,0x7cb,0xa4b,0x1e4b,0x784b,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,0,0,0,0,0,
+0x58b,0x5cb,0x60b,0x64b,0x7cb,0xa4b,0x1e4b,0x784b,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,0,0,0,0,0,0,
+0,0x17,0x17,0x17,0x17,0,0,0,0,0,0,0,0,0,0,0,
+0,0x58b,0x5cb,0x60b,0x64b,0x7cb,0xa4b,0x1e4b,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
+5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
+0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,
+0,0,0x58b,0x68b,0x7cb,0x11cb,0x1e4b,0x784b,5,5,5,5,6,6,6,6,
+0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,
+0x249,0x289,0,0,0,0,0,0,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,
+0x50b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0x800b,0x880b,0x900b,0x980b,0xa00b,
+0xa80b,0xb00b,0xb80b,0x344b,0x34cb,0x348b,0x388b,0,5,5,5,5,5,5,5,5,
+5,5,0,6,6,0x13,0,0,5,5,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,0x58b,0x5cb,0x60b,0x64b,0x68b,0x7cb,0xa4b,0xccb,0x1e4b,0x344b,5,
+0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,
+6,6,6,6,6,0x58b,0x7cb,0xa4b,0x1e4b,0x17,0x17,0x17,0x17,0x17,0,0,
+0,0,0,0,5,5,5,5,5,0x58b,0x5cb,0x60b,0x64b,0x7cb,0xa4b,0x1e4b,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0x784b,0x49,0x89,0xc9,0x109,0x149,0x189,
+0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,6,8,6,8,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,6,6,6,6,6,6,6,0x17,0x17,0x17,0x17,0x17,
+0x17,0x17,0,0,0,0,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb,
+0xa4b,0xccb,0xf4b,0x11cb,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,8,8,8,6,6,6,6,8,8,6,6,0x17,
+0x17,0x10,0x17,0x17,0x17,0x17,0,0,0,0,0,0,0,0,0,0,
+0,0x10,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,
+0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,5,5,5,5,
+5,5,5,6,6,6,6,6,8,6,6,6,6,6,6,6,
+6,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,0x17,0x17,
+5,8,8,5,0,0,0,0,0,0,0,0,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,
+0x17,0x17,5,0,0,0,0,0,0,0,0,0,8,5,5,5,
+5,0x17,0x17,0x17,0x17,6,6,6,6,0x17,8,6,0x49,0x89,0xc9,0x109,
+0x149,0x189,0x1c9,0x209,0x249,0x289,5,0x17,5,0x17,0x17,0x17,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,8,
+8,8,6,6,6,6,6,6,6,6,6,8,0,0x58b,0x5cb,0x60b,
+0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,
+0x784b,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5,
+5,5,5,5,5,5,5,5,8,8,8,6,6,6,8,8,
+6,8,6,6,0x17,0x17,0x17,0x17,0x17,0x17,6,0,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,
+5,0,5,5,5,5,0,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,0,5,5,5,5,5,5,5,5,5,
+5,0x17,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,8,8,8,6,6,6,6,6,
+6,6,6,0,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,
+0x249,0x289,0,0,0,0,0,0,5,5,8,8,0,0,6,6,
+6,6,6,6,6,0,0,0,6,6,6,6,6,0,0,0,
+0,0,0,0,0,0,0,0,6,6,8,8,0,5,5,5,
+5,5,5,5,5,0,0,5,5,0,0,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,0,5,5,5,5,5,5,
+5,0,5,5,0,5,5,5,5,5,0,6,6,5,8,8,
+6,8,8,8,8,0,0,8,8,0,0,8,8,8,0,0,
+5,0,0,0,0,0,0,8,0,0,0,0,0,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,8,8,8,6,6,6,6,6,6,6,6,
+8,8,6,6,6,8,6,5,5,5,5,0x17,0x17,0x17,0x17,0x17,
+0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,0,0x17,6,5,
+5,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+8,8,8,6,6,6,6,6,6,8,6,8,8,8,8,6,
+6,8,6,6,5,5,0x17,5,0,0,0,0,0,0,0,0,
+0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,8,
+8,8,6,6,6,6,0,0,8,8,8,8,6,6,8,6,
+6,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,
+0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,5,5,5,5,6,6,0,0,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+8,8,8,6,6,6,6,6,6,6,6,8,8,6,8,6,
+6,0x17,0x17,0x17,5,0,0,0,0,0,0,0,0,0,0,0,
+0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,
+0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+5,5,5,5,5,5,5,5,5,5,5,6,8,6,8,8,
+6,6,6,6,6,6,8,6,5,0,0,0,0,0,0,0,
+8,8,6,6,6,6,8,6,6,6,6,6,0,0,0,0,
+0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x7cb,0xa4b,0x17,0x17,0x17,0x1b,
+5,5,5,5,5,5,5,5,5,5,5,5,8,8,8,6,
+6,6,6,6,6,6,6,6,8,6,6,0x17,0,0,0,0,
+0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,
+0x16cb,0x194b,0x1bcb,0,0,0,0,0,0,0,0,0,0,0,0,5,
+8,5,8,6,0x17,0x17,0x17,0,0,0,0,0,0,0,0,0,
+0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,
+5,5,5,5,5,5,5,0,0,5,0,0,5,5,5,5,
+5,5,5,5,0,5,5,0,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,8,8,8,8,8,8,0,8,
+8,0,0,6,6,8,6,5,6,5,0x17,5,8,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
+0,0,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,8,8,8,6,6,6,6,
+0,0,6,6,8,8,8,8,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,
+6,8,5,6,6,6,6,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,6,
+0,0,0,0,0,0,0,0,5,6,6,6,6,6,6,8,
+8,6,6,6,5,5,5,5,5,6,6,6,6,6,6,6,
+6,6,6,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,0x17,0x17,0x17,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
+5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,8,
+6,6,0x17,0x17,0x17,5,0x17,0x17,5,0x17,0x17,0x17,0x17,0x17,0,0,
+0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,
+0x249,0x289,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,
+0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0,0,0,0x17,0x17,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,0,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,8,6,6,6,6,6,6,6,0,6,6,6,6,
+6,6,8,6,6,6,6,6,6,6,6,6,0,8,6,6,
+6,6,6,6,6,8,6,6,8,6,6,0,0,0,0,0,
+0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,0,0,6,6,6,6,6,6,6,6,6,6,
+6,6,6,6,6,6,5,6,0,0,0,0,0,0,0,0,
+0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,
+5,5,5,5,5,5,5,0,5,5,0,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,6,6,6,6,6,6,0,0,0,6,0,6,6,0,6,
+5,5,5,5,5,5,5,5,5,5,8,8,8,8,8,0,
+6,6,0,8,8,6,8,6,5,0,0,0,0,0,0,0,
+5,5,5,5,5,5,0,5,5,0,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,6,6,8,8,0x17,0x17,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0x19,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0x17,0xcd0b,0xcc0b,0xcb0b,0xd00b,
+0xca0b,0xcf0b,0xcb4b,0xd04b,0xc90b,0x37cb,0x37cb,0x364b,0x35cb,0xc94b,0x3fcb,0x350b,0x34cb,0x344b,0x344b,0x3ccb,
+0xcd0b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x19,0x19,0x19,0x34ca,0x354a,0x34ca,0x34ca,
+0x344a,0x348a,0x388a,0xf4a,0x11ca,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0,0x17,0x17,0x17,0x17,
+0x17,0,0,0,0,0,0,0,0,0,0,0,0x5ca,0x60a,0x64a,0x68a,
+0x6ca,0x70a,0x74a,0x78a,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x64a,0x68a,0x6ca,0x70a,0x74a,
+0x78a,0x58a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x58a,0x5ca,0x60a,0x64a,0x68a,0x5ca,
+0x60a,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x58a,0x5ca,0x60a,0x60a,0x64a,0x68a,0xc08a,0xc18a,
+0x58a,0x5ca,0x60a,0x60a,0x64a,0x68a,0x60a,0x60a,0x64a,0x64a,0x64a,0x64a,0x6ca,0x70a,0x70a,0x70a,
+0x74a,0x74a,0x78a,0x78a,0x78a,0x78a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x58a,0x5ca,0x60a,0x64a,0x64a,
+0x68a,0x68a,0x5ca,0x60a,0x58a,0x5ca,0x348a,0x388a,0x454a,0x348a,0x388a,0x35ca,5,5,5,5,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,0,0x10,0x10,0x10,0x10,
+0x10,0x10,0x10,0x10,0x10,0,0,0,0,0,0,0,5,5,5,5,
+5,5,5,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,
+0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0x17,0x17,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,0,0,6,6,6,6,
+6,0x17,0,0,0,0,0,0,0,0,0,0,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,
+6,6,6,0x17,0x17,0x17,0x17,0x17,0x1b,0x1b,0x1b,0x1b,4,4,4,4,
+0x17,0x1b,0,0,0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,
+0x149,0x189,0x1c9,0x209,0x249,0x289,0,0x7cb,0x1e4b,0x788b,0x790b,0x798b,0x7a0b,0x7a8b,0,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,0,0,0,0,0,5,5,5,0x54b,0x58b,0x5cb,0x60b,
+0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0x80b,0x84b,0x88b,0x8cb,0x90b,0x94b,0x98b,0x9cb,0xa0b,
+0x58b,0x5cb,0x60b,0x17,0x17,0x17,0x17,0,0,0,0,0,5,5,5,5,
+5,5,5,5,5,5,5,0,0,0,0,6,5,8,8,8,
+8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+0,0,0,0,0,0,0,6,6,6,6,4,4,4,4,4,
+4,4,4,4,4,4,4,4,4,4,0x17,4,6,0,0,0,
+0,0,0,0,0,0,0,0,8,8,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+5,5,5,0,0,0,0,0,0,0,0,0,0,0,0,0,
+5,5,5,5,0,0,0,0,0,0,0,0,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,
+0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
+5,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
+5,5,0,0,0x1b,6,6,0x17,0x10,0x10,0x10,0x10,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,
+0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,8,8,6,6,6,0x1b,0x1b,
+0x1b,8,8,8,8,8,8,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,6,
+6,6,6,6,6,6,6,0x1b,0x1b,6,6,6,6,6,6,6,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,6,6,6,6,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0x1b,0x1b,6,6,6,0x1b,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0x54b,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0x80b,
+0x84b,0x88b,0x8cb,0x90b,0x94b,0x98b,0x9cb,0xa0b,0,0,0,0,0,0,0,0,
+0,0,0,0,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,
+0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x58b,0x5cb,0x60b,0x64b,0x68b,0x58b,0x68b,0,0,0,
+0,0,0,0,0x249,0x289,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,
+0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x49,0x89,0xc9,0x109,0x149,0x189,
+0x1c9,0x209,0x249,0x289,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,2,2,2,2,2,2,2,0,2,2,2,2,2,2,
+2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,1,0,1,1,0,0,1,0,
+0,1,1,0,0,1,1,1,1,0,1,1,1,1,1,1,
+1,1,2,2,2,2,0,2,0,2,2,2,2,2,2,2,
+0,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,
+1,1,0,1,1,1,1,0,0,1,1,1,1,1,1,1,
+1,0,1,1,1,1,1,1,1,0,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,1,1,0,1,1,1,1,0,1,1,1,1,
+1,0,1,0,0,0,1,1,1,1,1,1,1,0,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,
+2,2,0,0,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,0x18,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,0x18,2,2,2,2,2,2,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,0x18,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,0x18,2,2,
+2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,2,2,2,0x18,2,2,2,2,2,2,1,2,
+0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x49,0x89,0xc9,0x109,
+0x149,0x189,0x1c9,0x209,0,6,6,6,6,6,6,6,6,6,6,6,
+6,6,6,6,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,6,6,6,6,6,6,6,6,6,6,6,6,
+6,6,6,6,6,6,6,6,6,6,6,0x1b,0x1b,0x1b,0x1b,6,
+6,6,6,6,6,6,6,6,6,6,6,6,6,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,6,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+6,0x1b,0x1b,0x17,0x17,0x17,0x17,0x17,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,6,6,6,6,6,6,6,6,0,
+6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+6,0,0,6,6,6,6,6,6,6,0,6,6,0,6,6,
+6,6,6,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,
+0x249,0x289,0,0,0,0,5,0x1b,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
+5,5,5,5,5,0,0,0,6,6,6,6,6,6,6,4,
+4,4,4,4,4,4,0,0,5,5,5,5,5,5,5,5,
+5,5,5,5,6,6,6,6,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,
+0x249,0x289,0,0,0,0,0,0x19,5,5,5,5,5,0,0,0x58b,
+0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,6,6,6,6,6,6,6,0,
+0,0,0,0,0,0,0,0,2,2,2,2,6,6,6,6,
+6,6,6,4,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,
+0x249,0x289,0,0,0,0,0x17,0x17,1,1,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,
+0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x78cb,0x794b,0x814b,0x58b,0x5cb,0x60b,0x64b,0x68b,
+0x6cb,0x70b,0x74b,0x78b,0x1b,0x34cb,0x344b,0x3ccb,0x19,0x58b,0x5cb,0x788b,0x78cb,0,0,0,
+0,0,0,0,0,0,0,0,0x16cb,0x194b,0x1bcb,0x1e4b,0x800b,0x880b,0x900b,0x980b,
+0xa00b,0xa80b,0xb00b,0xb80b,0x784b,0x804b,0x884b,0x904b,0x984b,0xa04b,0xa84b,0xb04b,0xb84b,0x788b,0x808b,0x888b,
+0x908b,0x988b,0xa08b,0xa88b,0xb08b,0xb88b,0x78cb,0x80cb,0x984b,0xa04b,0xa84b,0xb04b,0xb84b,0x788b,0x808b,0x888b,
+0x908b,0x988b,0xa08b,0xa88b,0xb08b,0xb88b,0x1b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,
+0x900b,0xa00b,0x804b,0x788b,0x344b,0x354b,0,0,0,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,
+0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0x800b,0x880b,0x900b,0x980b,
+0xa00b,0xa80b,0xb00b,0xb80b,0x784b,0x804b,0x884b,0x904b,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0x18,0x18,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,5,5,5,5,0,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,0,5,5,0,5,0,0,5,
+0,5,5,5,5,5,5,5,5,5,5,0,5,5,5,5,
+0,5,0,5,0,0,0,0,0,0,5,0,0,0,0,5,
+0,5,0,5,0,5,5,5,0,5,5,0,5,0,0,5,
+0,5,0,5,0,5,0,5,0,5,5,0,5,0,0,5,
+5,5,5,0,5,5,5,5,5,5,5,0,5,5,5,5,
+0,5,5,5,5,0,5,0,5,5,5,5,5,5,5,5,
+5,5,0,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,0,0,0,0,0,5,5,5,0,5,5,5,
+5,5,0,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x2cb,0x2cb,0x30b,0x34b,
+0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x54b,0x54b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,
+0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0x1b,0x1b,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1a,0x1a,0x1a,0x1a,0x1a,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,
+0x1b,0x1b,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,
+0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0x1b,0x1b,0x1b,0,0,0,0,0,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,
+0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,5,0x705,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,0x645,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,0x645,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,0x685,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,0xcc5,5,5,5,5,
+5,5,5,5,0xf45,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,0xf45,5,5,5,5,5,5,5,5,5,5,5,
+5,5,0x6c5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,0x605,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,0x605,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,0x605,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,0x605,5,5,5,5,
+5,5,5,5,5,5,5,5,5,0x645,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+0x785,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
+0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
+0,0x10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,
+0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0,0,
+0,0,0,0
+};
+
+static const UTrie2 propsTrie={
+ propsTrie_index,
+ propsTrie_index+4532,
+ NULL,
+ 4532,
+ 17744,
+ 0xa40,
+ 0x1234,
+ 0x0,
+ 0x0,
+ 0x110000,
+ 0x5700,
+ NULL, 0, FALSE, FALSE, 0, NULL
+};
+
+static const uint16_t propsVectorsTrie_index[31228]={
+0x4e8,0x4f0,0x4f8,0x500,0x518,0x520,0x528,0x530,0x538,0x540,0x548,0x550,0x558,0x560,0x568,0x570,
+0x577,0x57f,0x587,0x58f,0x592,0x59a,0x5a2,0x5aa,0x5b2,0x5ba,0x5c2,0x5ca,0x5d2,0x5da,0x5e2,0x5ea,
+0x5f2,0x5fa,0x601,0x609,0x611,0x619,0x621,0x629,0x631,0x639,0x63e,0x646,0x64d,0x655,0x65d,0x665,
+0x66d,0x675,0x67d,0x685,0x68c,0x694,0x69c,0x6a4,0x6ac,0x6b4,0x6bc,0x6c4,0x6cc,0x6d4,0x6dc,0x6e4,
+0x1a38,0xd5e,0xe35,0x6ec,0x508,0xe9c,0xea4,0x1bf2,0x1300,0x1310,0x12f8,0x1308,0x7c5,0x7cb,0x7d3,0x7db,
+0x7e3,0x7e9,0x7f1,0x7f9,0x801,0x807,0x80f,0x817,0x81f,0x825,0x82d,0x835,0x83d,0x845,0x84d,0x854,
+0x85c,0x862,0x86a,0x872,0x87a,0x880,0x888,0x890,0x898,0x1318,0x8a0,0x8a8,0x8b0,0x8b7,0x8bf,0x8c7,
+0x8cf,0x8d3,0x8db,0x8e2,0x8ea,0x8f2,0x8fa,0x902,0x162c,0x1634,0x90a,0x912,0x91a,0x922,0x92a,0x931,
+0x1692,0x1682,0x168a,0x1973,0x197b,0x1328,0x939,0x1320,0x1572,0x1572,0x1574,0x133c,0x133d,0x1330,0x1332,0x1334,
+0x169a,0x169c,0x941,0x169c,0x949,0x94e,0x956,0x16a1,0x95c,0x169c,0x962,0x96a,0xc39,0x16a9,0x16a9,0x972,
+0x16b9,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,
+0x16ba,0x16ba,0x16ba,0x16b1,0x97a,0x16c2,0x16c2,0x982,0xb59,0xb61,0xb69,0xb71,0x16d2,0x16ca,0x98a,0x992,
+0x99a,0x16dc,0x16e4,0x9a2,0x16da,0x9aa,0x1a40,0xd66,0xb79,0xb81,0xb89,0xb8e,0x18e1,0xc6c,0xc73,0x1849,
+0xc09,0x1a48,0xd6e,0xd76,0xd7e,0xd86,0xf47,0xf48,0x1939,0x193e,0xca8,0xcb0,0x19af,0x19b7,0x1b11,0xe3d,
+0x19bf,0xcf2,0xcfa,0x19c7,0x10f6,0x1196,0xf27,0xd8e,0x1869,0x1851,0x1861,0x1859,0x18f9,0x18f1,0x18b9,0xc19,
+0x1345,0x1345,0x1345,0x1345,0x1348,0x1345,0x1345,0x1350,0x9b2,0x1358,0x9b6,0x9be,0x1358,0x9c6,0x9ce,0x9d6,
+0x1368,0x1360,0x1370,0x9de,0x9e6,0x1378,0x9ee,0x9f6,0x1380,0x1388,0x1390,0x1398,0x9fe,0x13a0,0x13a7,0x13af,
+0x13b7,0x13bf,0x13c7,0x13cf,0x13d7,0x13de,0x13e6,0x13ee,0x13f6,0x13fe,0x1401,0x1403,0x16ec,0x17dc,0x17e2,0x1929,
+0x140b,0xa06,0xa0e,0x1525,0x152a,0x152d,0x1535,0x1413,0x153d,0x153d,0x1423,0x141b,0x142b,0x1433,0x143b,0x1443,
+0x144b,0x1453,0x145b,0x1463,0x17ea,0x1841,0x1983,0x1ad9,0x1473,0x147a,0x1482,0x148a,0x146b,0x1492,0x17f2,0x17f9,
+0x16f4,0x16f4,0x16f4,0x16f4,0x16f4,0x16f4,0x16f4,0x16f4,0x1801,0x1804,0x1801,0x1801,0x180c,0x1813,0x1815,0x181c,
+0x1824,0x1828,0x1828,0x182b,0x1828,0x1828,0x1831,0x1828,0x1871,0x1931,0x198b,0xb96,0xb9c,0x1c36,0x1c3e,0x1d15,
+0x18d1,0xc49,0xc4d,0x1946,0x18c1,0x18c1,0x18c1,0xc21,0x18c9,0xc41,0x1911,0xc98,0xc29,0xc31,0xc31,0x19cf,
+0x1901,0x1993,0xc83,0xc88,0xa16,0x16fc,0x16fc,0xa1e,0x1704,0x1704,0x1704,0x1704,0x1704,0x1704,0xa26,0x6f0,
+0x155a,0x157c,0xa2e,0x1584,0xa36,0x158c,0x1594,0x159c,0xa3e,0xa43,0x15a4,0x15ab,0xa48,0x170c,0x1921,0xc11,
+0xa50,0x1606,0x160d,0x15b3,0x1615,0x161c,0x15bb,0x15bf,0x15d8,0x15d8,0x15da,0x15c7,0x15cf,0x15cf,0x15d0,0x1624,
+0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,
+0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,
+0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,
+0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,
+0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,
+0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,
+0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,
+0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,
+0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,
+0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,
+0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,
+0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,
+0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1717,0x1879,0x1879,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,
+0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e9,0x1a30,0x12b5,
+0x171f,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,
+0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,
+0x1725,0x1725,0x1725,0x1725,0xa58,0x172d,0xa60,0x1a50,0x19db,0x19db,0x19db,0x19db,0x19db,0x19db,0x19db,0x19db,
+0x19d7,0xd02,0x19eb,0x19e3,0x19ed,0x1a58,0x1a58,0xd96,0x18d9,0x194e,0x19a3,0x19a7,0x199b,0x1b09,0xcb8,0xcbb,
+0x1909,0xc90,0x1956,0xcc3,0x19f5,0x19f8,0xd0a,0x1a60,0x1a08,0x1a00,0xd12,0xd9e,0x1a68,0x1a6c,0xda6,0xff0,
+0x1a10,0xd1a,0xd22,0x1a74,0x1a84,0x1a7c,0xdae,0xef7,0xe45,0xe4d,0x1c85,0xfa8,0x1d32,0x1d32,0x1a8c,0xdb6,
+0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,
+0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,
+0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,
+0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,
+0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,
+0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,
+0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,
+0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,
+0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,
+0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,
+0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,
+0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,
+0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,
+0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,
+0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,
+0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,
+0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,
+0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,
+0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,
+0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,
+0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,
+0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0xa68,0xdbe,0xdc1,
+0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
+0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
+0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,
+0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,
+0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,
+0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,
+0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,
+0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,
+0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,
+0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,
+0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,
+0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,
+0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,
+0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,
+0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,
+0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,
+0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x15f1,0x15f1,0x15f1,0x15f1,0x15f1,0x15f1,0x15f1,0x15f1,
+0x15f6,0x15fe,0x1839,0x12bd,0x1919,0x1919,0x12c1,0x12c8,0xa70,0xa78,0xa80,0x14b2,0x14b9,0x14c1,0xa88,0x14c9,
+0x14fa,0x14fa,0x14a2,0x14aa,0x14d1,0x14f1,0x14f2,0x1502,0x14d9,0x149a,0xa90,0x14e1,0xa98,0x14e9,0xaa0,0xaa4,
+0xca0,0x150a,0xaac,0xab4,0x1512,0x1518,0x151d,0xabc,0xacc,0x1562,0x156a,0x154d,0x1552,0xad4,0xadc,0xac4,
+0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,
+0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x1644,0x1644,0x1644,0x1644,
+0x1420,0x1420,0x1460,0x14a0,0x14e0,0x1520,0x1560,0x15a0,0x15dc,0x161c,0x1648,0x1688,0x16c8,0x1708,0x1748,0x1788,
+0x17c8,0x1804,0x1844,0x1884,0x18c4,0x18f8,0x1934,0x1974,0x19b4,0x19f4,0x1a30,0x1a70,0x1ab0,0x1af0,0x1b30,0x1b70,
+0xa80,0xac0,0xb00,0xb40,0xb80,0xa40,0xe75,0xa40,0xe97,0xa40,0xa40,0xa40,0xa40,0xbc0,0x12dd,0x12dd,
+0xed7,0xc00,0xa40,0xa40,0xa40,0xa40,0xf17,0xc2d,0xa40,0xa40,0xc6d,0xcad,0xced,0xd2d,0xe35,0xda5,
+0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,
+0x121d,0x121d,0x121d,0x121d,0xf57,0x125d,0x1092,0x10d2,0x129d,0x10dd,0x131d,0x131d,0x131d,0xf97,0xfb7,0xff7,
+0x135d,0x135d,0x1037,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,
+0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0x1052,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65,
+0xde5,0xdf5,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,
+0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65,
+0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,
+0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x111d,
+0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,
+0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x115d,
+0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
+0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
+0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
+0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
+0xba4,0xbab,0xbb3,0xbbb,0x1881,0x1881,0x1881,0xbc3,0xbcb,0xbce,0x18b1,0x18a9,0xc01,0xd2a,0xd2e,0xd32,
+0x508,0x508,0x508,0x508,0xd3a,0x1a18,0xd42,0xf3f,0x1735,0xae4,0xaea,0x1000,0xbd6,0x18e9,0xc7b,0x508,
+0x174a,0x173d,0x1742,0x1889,0xbde,0xbe6,0x1134,0x113a,0x1c6d,0xf5d,0x1c5d,0x6f8,0x508,0x508,0x508,0x508,
+0x1c8d,0x1c8d,0x1c8d,0x1c8d,0x1c8d,0x1c8d,0x1c8d,0x1c8d,0x1c8d,0xfb0,0xfb8,0xfc0,0x508,0x508,0x508,0x508,
+0xbee,0xbf1,0xdc9,0x1cd5,0xff8,0x700,0x508,0x1092,0xccb,0xd4a,0x508,0x508,0x1c02,0xeff,0xf07,0x1d1d,
+0xc55,0xc5c,0xc64,0x1a94,0x1cb5,0x508,0x1c95,0xfd0,0x1a9c,0xdd1,0xdd9,0xde1,0x1020,0x708,0x508,0x508,
+0x1aa4,0x1aa4,0x710,0x508,0x1d4a,0x10aa,0x1d42,0x10b2,0x1e0e,0x11ac,0x508,0x508,0x508,0x508,0x508,0x508,
+0x508,0x508,0x508,0xde9,0x1e66,0x1291,0x508,0x508,0x1e2e,0x11d4,0x11db,0x718,0x508,0x71c,0x1248,0x11e3,
+0x1b19,0x1b1b,0xe55,0xe5c,0x1aac,0x1ab4,0xdf1,0xf1f,0x1bfa,0xee7,0xeef,0xfc8,0x1c1a,0x1c1e,0x1c26,0x1040,
+0xf93,0xf98,0x724,0x508,0x109a,0x10a2,0x1c7d,0xfa0,0xf75,0xf7b,0xf83,0xf8b,0x508,0x508,0x508,0x508,
+0x1daa,0x1da2,0x1124,0x112c,0x1cfd,0x1cf5,0x1068,0x508,0x508,0x508,0x508,0x508,0x1ce5,0x1028,0x1030,0x1038,
+0x1cad,0x1ca5,0xfe0,0x111c,0x1c2e,0xf2f,0x72c,0x508,0x1078,0x1080,0x508,0x508,0x508,0x508,0x508,0x508,
+0x1e06,0x118e,0x734,0x508,0x508,0x1d0d,0x1d05,0x1070,0x1250,0x1256,0x125e,0x508,0x508,0x11eb,0x11ef,0x11f7,
+0x1dde,0x1dd6,0x1176,0x1dce,0x1dc6,0x73c,0x1cdd,0x1018,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
+0x10da,0x10df,0x10e7,0x10ee,0x110e,0x1114,0x508,0x508,0x115a,0x115e,0x1166,0x119e,0x11a4,0x744,0x508,0x508,
+0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x11bc,0x508,0x508,0x508,0x508,0x508,0x748,0x1e4e,0x1238,
+0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,
+0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x1963,0xcd3,0xcda,0xcda,0xcda,
+0x196b,0x196b,0x196b,0xce2,0x1d3a,0x1d3a,0x1d3a,0x1d3a,0x1d3a,0x1d3a,0x750,0x508,0x508,0x508,0x508,0x508,
+0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
+0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,
+0x1b23,0xe64,0xfe8,0x758,0x508,0x508,0x75c,0xf37,0x1ccd,0x1cc5,0x1008,0x1010,0x764,0x508,0x508,0x508,
+0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
+0x508,0x508,0x1e26,0x1e1e,0x11cc,0x508,0x508,0x508,0x1c12,0x1c12,0xf0f,0x1c0a,0xf17,0x508,0x508,0x1106,
+0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,
+0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dbe,0x1e76,0x1e76,0x1e76,0x1e76,0x1e76,0x1e76,0x1e76,0x1e76,
+0x1e76,0x1e76,0x1e76,0x1e76,0x1e76,0x1e76,0x1266,0x126c,0x1286,0x1289,0x1289,0x1289,0x76c,0x508,0x508,0x508,
+0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
+0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x1c65,0x1c65,0x1c65,
+0xf50,0xf55,0x774,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
+0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x1752,0x1752,0x1752,
+0x1752,0x1752,0x1752,0x1752,0xaf2,0x1762,0xafa,0x1763,0x175a,0x176b,0x1771,0x1779,0xb02,0x18a1,0x18a1,0x77c,
+0x508,0x508,0x508,0x508,0x11c4,0x1891,0x1891,0xbf9,0xcea,0x508,0x508,0x508,0x508,0x17aa,0x17b1,0xb0a,
+0x17b4,0xb12,0xb1a,0xb22,0x17ae,0xb2a,0xb32,0xb3a,0x17b3,0x17bb,0x17aa,0x17b1,0x17ad,0x17b4,0x17bc,0x17ab,
+0x17b2,0x17ae,0xb41,0x1781,0x1789,0x1790,0x1797,0x1784,0x178c,0x1793,0x179a,0xb49,0x17a2,0x1d62,0x1d62,0x1d62,
+0x1d62,0x1d62,0x1d62,0x1d62,0x1d62,0x1d62,0x1d62,0x1d62,0x1d62,0x1d62,0x1d62,0x1d62,0x1d62,0x1d52,0x1d55,0x1d52,
+0x1d5c,0x10ca,0x784,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
+0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
+0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x10fe,0x78c,0x508,
+0x508,0x508,0x508,0x508,0x508,0x1e46,0x11ff,0x794,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
+0x508,0x508,0x508,0x1e56,0x1240,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
+0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
+0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x1c9d,0x1c9d,0x1c9d,
+0x1c9d,0x1c9d,0x1c9d,0xfd8,0x508,0x1d9a,0x1d92,0x10d2,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
+0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
+0x798,0x1e16,0x11b4,0x508,0x508,0x1207,0x1208,0x7a0,0x508,0x508,0x508,0x508,0x508,0xeac,0xeb4,0xebc,
+0xec4,0xecc,0xed4,0xedb,0xedf,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
+0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
+0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
+0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
+0x508,0x508,0x508,0x508,0x7a4,0x1048,0x1ced,0x104e,0x1ced,0x1056,0x105b,0x1060,0x1060,0x1d72,0x1d82,0x1d8a,
+0x10ba,0x1d7a,0x1e36,0x10c2,0x1dee,0x1e3e,0x1e3e,0x117e,0x1186,0x121f,0x1225,0x122a,0x1230,0x1e5e,0x1e5e,0x1e5e,
+0x1e5e,0x1274,0x1e5e,0x127a,0x127e,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,
+0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,
+0x7ac,0x7ac,0x7ac,0x7ac,0x7ad,0xb51,0x17c4,0x17c4,0x17c4,0x7b5,0x7b5,0x7b5,0x7b5,0x1899,0x1899,0x1899,
+0x1899,0x1899,0x1899,0x1899,0x7bd,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,
+0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,
+0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,
+0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,
+0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x1a20,0xd52,0x1a28,0x1a28,0xd56,0xe6c,0xe74,0xe7c,0x1ae9,0x1ad1,0x1af1,
+0x1af9,0x1ae1,0xe01,0xe05,0xe0c,0xe14,0xe1b,0xe23,0xe2b,0xe2d,0xe2d,0xe2d,0xe2d,0x1b5a,0x1b62,0x1b5a,
+0x1b68,0x1b70,0x1b3b,0x1b78,0x1b80,0x1b5a,0x1b88,0x1b90,0x1b97,0x1b9f,0x1b43,0x1b5a,0x1ba4,0x1b4b,0x1b52,0x1bac,
+0x1bb2,0x1c4e,0x1c55,0x1c46,0x1bba,0x1bc2,0x1bca,0x1bd2,0x1cbd,0x1bda,0x1be2,0xe84,0xe8c,0x1b2b,0x1b2b,0x1b2b,
+0xe94,0x1c75,0x1c75,0xf65,0xf6d,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,
+0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abe,0x1abc,0x1ac6,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,
+0x1ac9,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0xdf9,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
+0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
+0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x1d25,0x1d25,0x1d25,0x1d25,0x1d25,0x1d25,0x1d25,0x1d25,0x1d25,
+0x1d25,0x1d25,0x1d25,0x1d25,0x1d25,0x1d2a,0x1d25,0x1d25,0x1d25,0x1088,0x108a,0x508,0x508,0x508,0x508,0x508,
+0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,
+0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,
+0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,
+0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,
+0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1142,0x1b33,0x1de6,0x1de6,0x1de6,0x1de6,0x1de6,0x1de6,0x1de6,0x114a,
+0x1152,0x1210,0x1217,0x1dfe,0x1dfe,0x1dfe,0x1dfe,0x1dfe,0x1dfe,0x1dfe,0x1dfe,0x1dfe,0x1dfe,0x1dfe,0x116e,0x508,
+0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
+0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,
+0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,
+0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,
+0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,
+0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x12d0,0x1299,0x1b01,
+0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,
+0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,
+0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x12a1,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,
+0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,
+0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,
+0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,
+0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x17d4,0x17d4,0x17d4,0x17d4,0x17d4,0x17d4,0x17d4,0x17d4,0x17d4,
+0x17d4,0x17d4,0x17d4,0x17d4,0x17d4,0x17d4,0x17d4,0x12d8,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,
+0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,
+0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,
+0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x12a5,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,
+0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,
+0x1e6e,0x12ad,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,
+0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,
+0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,
+0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,
+0x1299,0x12a5,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,
+0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,
+0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,
+0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x12e0,0x1bea,0x1bea,0x1bea,0x1bea,
+0x1bea,0x1bea,0x12e8,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,
+0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,
+0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,
+0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,
+0x1d6a,0x1d6a,0x12f0,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1664,0x1664,0x1664,
+0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,
+0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,
+0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,
+0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1654,0x166c,0x166c,0x166c,
+0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,
+0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,
+0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,
+0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x165c,0x1664,0x1664,0x1664,
+0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,
+0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,
+0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,
+0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x166c,0x166c,0x166c,
+0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,
+0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,
+0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,
+0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x17cc,0x17cc,0x17cc,
+0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,
+0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,
+0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,
+0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x1b01,0x1b01,0x1b01,
+0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,
+0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,
+0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,
+0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1d6a,0x1d6a,0x1d6a,
+0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,
+0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,
+0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,
+0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1db2,0x1db2,0x1db2,
+0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,
+0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,
+0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,
+0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1df6,0x1df6,0x1df6,
+0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,
+0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,
+0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,
+0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1e6e,0x1e6e,0x1e6e,
+0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,
+0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,
+0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,
+0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x4e7,0x4e7,0x4e7,
+0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2ca,0x2d3,0x2cd,0x2cd,0x2d0,0x2c7,0x2c7,
+0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,
+0x7fb,0x7f5,0x7da,0x7d1,0x7c8,0x7c5,0x7bc,0x7d7,0x7c2,0x7ce,0x7d1,0x7ec,0x7e3,0x7d4,0x7f8,0x7cb,
+0x7b9,0x7b9,0x7b9,0x7b9,0x7b9,0x7b9,0x7b9,0x7b9,0x7b9,0x7b9,0x7e0,0x7dd,0x7e6,0x7e6,0x7e6,0x7f5,
+0x7bc,0x807,0x807,0x807,0x807,0x807,0x807,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,
+0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x7c2,0x7c8,0x7ce,0x7f2,0x7b6,
+0x7ef,0x804,0x804,0x804,0x804,0x804,0x804,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,
+0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7c2,0x7e9,0x7bf,0x7e6,0x2c7,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2e5,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,
+0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,
+0x2d9,0x651,0x810,0x813,0x657,0x813,0x80d,0x64e,0x645,0x2df,0x663,0x2e2,0x816,0x63c,0x65a,0x80a,
+0x654,0x660,0x642,0x642,0x648,0x2dc,0x64e,0x64b,0x645,0x642,0x663,0x2e2,0x63f,0x63f,0x63f,0x651,
+0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,0x66c,0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,
+0x66c,0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,0x65d,0x66c,0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,0x66c,0x666,
+0x669,0x669,0x2e8,0x2e8,0x2e8,0x2e8,0x666,0x2e8,0x669,0x669,0x669,0x2e8,0x669,0x669,0x2e8,0x2e8,
+0x666,0x2e8,0x669,0x669,0x2e8,0x2e8,0x2e8,0x65d,0x666,0x669,0x669,0x2e8,0x669,0x2e8,0x666,0x2e8,
+0x2f7,0x672,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,
+0x2f4,0x66f,0x2f7,0x672,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x672,0x2f7,0x2ee,0x2f7,0x2ee,
+0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x678,0x66f,0x2f7,0x2ee,0x2f7,0x672,0x2f7,0x2ee,0x2f7,0x2ee,
+0x2f7,0x66f,0x67b,0x675,0x2f7,0x2ee,0x2f7,0x2ee,0x66f,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x67b,
+0x675,0x678,0x66f,0x2f7,0x672,0x2f7,0x2ee,0x2f7,0x672,0x67e,0x678,0x66f,0x2f7,0x672,0x2f7,0x2ee,
+0x2f7,0x2ee,0x678,0x66f,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,
+0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x678,0x66f,0x2f7,0x2ee,0x2f7,0x672,0x2f7,0x2ee,0x2f7,0x2ee,
+0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f1,
+0x2fa,0x306,0x306,0x2fa,0x306,0x2fa,0x306,0x306,0x2fa,0x306,0x306,0x306,0x2fa,0x2fa,0x306,0x306,
+0x306,0x306,0x2fa,0x306,0x306,0x2fa,0x306,0x306,0x306,0x2fa,0x2fa,0x2fa,0x306,0x306,0x2fa,0x306,
+0x309,0x2fd,0x306,0x2fa,0x306,0x2fa,0x306,0x306,0x2fa,0x306,0x2fa,0x2fa,0x306,0x2fa,0x306,0x309,
+0x2fd,0x306,0x306,0x306,0x2fa,0x306,0x2fa,0x306,0x306,0x2fa,0x2fa,0x303,0x306,0x2fa,0x2fa,0x2fa,
+0x303,0x303,0x303,0x303,0x30c,0x30c,0x300,0x30c,0x30c,0x300,0x30c,0x30c,0x300,0x309,0x681,0x309,
+0x681,0x309,0x681,0x309,0x681,0x309,0x681,0x309,0x681,0x309,0x681,0x309,0x681,0x2fa,0x309,0x2fd,
+0x309,0x2fd,0x309,0x2fd,0x306,0x2fa,0x309,0x2fd,0x309,0x2fd,0x309,0x2fd,0x309,0x2fd,0x309,0x2fd,
+0x2fd,0x30c,0x30c,0x300,0x309,0x2fd,0x9ea,0x9ea,0x9ed,0x9e7,0x309,0x2fd,0x309,0x2fd,0x309,0x2fd,
+0x309,0x2fd,0x309,0x2fd,0x309,0x2fd,0x309,0x2fd,0x309,0x2fd,0x309,0x2fd,0x309,0x2fd,0x309,0x2fd,
+0x309,0x2fd,0x309,0x2fd,0x9ed,0x9e7,0x9ed,0x9e7,0x9ea,0x9e4,0x9ed,0x9e7,0xbaf,0xcb7,0x9ea,0x9e4,
+0x9ea,0x9e4,0x9ed,0x9e7,0x9ed,0x9e7,0x9ed,0x9e7,0x9ed,0x9e7,0x9ed,0x9e7,0x9ed,0x9e7,0x9ed,0x9e7,
+0xcb7,0xcb7,0xcb7,0xdb6,0xdb6,0xdb6,0xdb9,0xdb9,0xdb6,0xdb9,0xdb9,0xdb6,0xdb6,0xdb9,0xefa,0xefd,
+0xefd,0xefd,0xefd,0xefa,0xefd,0xefa,0xefd,0xefa,0xefd,0xefa,0xefd,0xefa,0x30f,0x684,0x30f,0x30f,
+0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x684,0x30f,0x30f,
+0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,
+0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x312,0x30f,0x30f,0x30f,
+0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,
+0x30f,0x9f0,0x9f0,0x9f0,0x9f0,0x9f0,0xcba,0xcba,0x327,0x327,0x327,0x327,0x327,0x327,0x327,0x327,
+0x327,0x31e,0x31e,0x31e,0x31e,0x31e,0x31e,0x31e,0x31b,0x31b,0x318,0x318,0x68a,0x318,0x31e,0x68d,
+0x321,0x68d,0x68d,0x68d,0x321,0x68d,0x31e,0x31e,0x690,0x324,0x318,0x318,0x318,0x318,0x318,0x318,
+0x687,0x687,0x687,0x687,0x315,0x687,0x318,0xb25,0x327,0x327,0x327,0x327,0x327,0x318,0x318,0x318,
+0x318,0x318,0x9f9,0x9f9,0x9f6,0x9f3,0x9f6,0xcbd,0xcbd,0xcbd,0xcbd,0xcbd,0xcbd,0xcbd,0xcbd,0xcbd,
+0xcbd,0xcbd,0xcbd,0xcbd,0xcbd,0xcbd,0xcbd,0xcbd,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,
+0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,
+0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,
+0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,
+0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x696,0x696,0x94b,0x696,0x696,0x94e,0xb28,0xb28,
+0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xc6c,0xd83,0xd83,0xd83,0xd83,0xd83,0xd83,0xd83,0xd83,
+0xebe,0xebe,0xebe,0xebe,0xec1,0xd86,0xd86,0xd86,0x699,0x699,0xb2b,0xcb4,0xcb4,0xcb4,0xcb4,0xcb4,
+0xcb4,0xcb4,0xcb4,0xcb4,0xcb4,0xcb4,0xcb4,0xcb4,0xfa8,0xfa5,0xfa8,0xfa5,0x333,0x33c,0xfa8,0xfa5,
+9,9,0x342,0xf00,0xf00,0xf00,0x32a,0x14fd,9,9,9,9,0x33f,0x32d,0x351,0x330,
+0x351,0x351,0x351,9,0x351,9,0x351,0x351,0x348,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,
+0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,9,0x69f,0x69f,0x69f,0x69f,0x69f,
+0x69f,0x69f,0x351,0x351,0x348,0x348,0x348,0x348,0x348,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,
+0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x345,0x69c,0x69c,0x69c,0x69c,0x69c,
+0x69c,0x69c,0x348,0x348,0x348,0x348,0x348,0xfa8,0x354,0x354,0x357,0x351,0x351,0x354,0x34b,0x9fc,
+0xbb8,0xbb5,0x34e,0x9fc,0x34e,0x9fc,0x34e,0x9fc,0x34e,0x9fc,0x339,0x336,0x339,0x336,0x339,0x336,
+0x339,0x336,0x339,0x336,0x339,0x336,0x339,0x336,0x354,0x354,0x34b,0x345,0xb67,0xb64,0xbb2,0xcc3,
+0xcc0,0xcc6,0xcc3,0xcc0,0xdbc,0xdbf,0xdbf,0xdbf,0xa0b,0x6ab,0x363,0x366,0x363,0x363,0x363,0x366,
+0x363,0x363,0x363,0x363,0x366,0xa0b,0x366,0x363,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,
+0x6a8,0x6ab,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,
+0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,
+0x6a2,0x6a5,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,
+0x6a2,0x6a2,0x6a2,0x6a2,0xa05,0x6a5,0x35d,0x360,0x35d,0x35d,0x35d,0x360,0x35d,0x35d,0x35d,0x35d,
+0x360,0xa05,0x360,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,
+0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x366,0x360,0x363,0x35d,0x363,0x35d,
+0x363,0x35d,0x363,0x35d,0x363,0x35d,0x35a,0x957,0x95a,0x93c,0x93c,0x114f,0x9ff,0x9ff,0xbbe,0xbbb,
+0xa08,0xa02,0xa08,0xa02,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,
+0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,
+0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,
+0x363,0x35d,0x363,0x35d,0x363,0x366,0x360,0x363,0x35d,0xbbe,0xbbb,0x363,0x35d,0xbbe,0xbbb,0x363,
+0x35d,0xbbe,0xbbb,0xf03,0x366,0x360,0x366,0x360,0x363,0x35d,0x366,0x360,0x363,0x35d,0x366,0x360,
+0x366,0x360,0x366,0x360,0x363,0x35d,0x366,0x360,0x366,0x360,0x366,0x360,0x363,0x35d,0x366,0x360,
+0xa0b,0xa05,0x366,0x360,0x366,0x360,0x366,0x360,0x366,0x360,0xdc5,0xdc2,0x366,0x360,0xf06,0xf03,
+0xf06,0xf03,0xf06,0xf03,0xc2d,0xc2a,0xc2d,0xc2a,0xc2d,0xc2a,0xc2d,0xc2a,0xc2d,0xc2a,0xc2d,0xc2a,
+0xc2d,0xc2a,0xc2d,0xc2a,0xf33,0xf30,0xf33,0xf30,0x1023,0x1020,0x1023,0x1020,0x1023,0x1020,0x1023,0x1020,
+0x1023,0x1020,0x1023,0x1020,0x1023,0x1020,0x1023,0x1020,0x1188,0x1185,0x1371,0x136e,0x1536,0x1533,0x1536,0x1533,
+0x1536,0x1533,0x1536,0x1533,0xc,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,
+0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0xc,
+0xc,0x37b,0x369,0x369,0x369,0x36f,0x369,0x36c,0x1941,0x372,0x372,0x372,0x372,0x372,0x372,0x372,
+0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,
+0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x375,0x1941,0x37e,0xa0e,0xc,
+0xc,0x1500,0x1500,0x141c,0xf,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,
+0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0xdc8,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,
+0x97e,0x97e,0x97e,0x97e,0x381,0x381,0x381,0x381,0x381,0x381,0x381,0x381,0x381,0x381,0xf09,0x381,
+0x381,0x381,0x38d,0x381,0x384,0x381,0x381,0x390,0x981,0xdcb,0xdce,0xdcb,0xf,0xf,0xf,0xf,
+0xf,0xf,0xf,0xf,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,
+0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0xf,
+0xf,0xf,0xf,0x1944,0x393,0x393,0x393,0x38a,0x387,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
+0xf,0xf,0xf,0xf,0xcdb,0xcdb,0xcdb,0xcdb,0x141f,0x1503,0xfb1,0xfb1,0xfb1,0xfae,0xfae,0xdd4,
+0x8c7,0xcd5,0xcd2,0xcd2,0xcc9,0xcc9,0xcc9,0xcc9,0xcc9,0xcc9,0xfab,0xfab,0xfab,0xfab,0xfab,0x8c4,
+0x14fa,0x12,0xdd7,0x8ca,0x1338,0x3ae,0x3b1,0x3b1,0x3b1,0x3b1,0x3b1,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,
+0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0xfb4,
+0xfb4,0xfb4,0xfb4,0xfb4,0x8cd,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x942,
+0x942,0x942,0x942,0x942,0x942,0x942,0x942,0xb5e,0xb5e,0xb5e,0xcc9,0xccf,0xccc,0xdd1,0xdd1,0xdd1,
+0xdd1,0xdd1,0xdd1,0x1335,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x3a8,0x3a5,
+0x3a2,0x39f,0xbc1,0xbc1,0x93f,0x3ae,0x3ae,0x3ba,0x3ae,0x3b4,0x3b4,0x3b4,0x3b4,0x3ae,0x3ae,0x3ae,
+0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,
+0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,
+0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,
+0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0xa14,0xa14,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0xa14,
+0x3b1,0x3ae,0x3b1,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0xa14,
+0x3ae,0x3ae,0x3ae,0x3b1,0x95d,0x3ae,0x399,0x399,0x399,0x399,0x399,0x399,0x399,0x396,0x39f,0x39c,
+0x39c,0x399,0x399,0x399,0x399,0x3b7,0x3b7,0x399,0x399,0x39f,0x39c,0x39c,0x39c,0x399,0xcd8,0xcd8,
+0x3ab,0x3ab,0x3ab,0x3ab,0x3ab,0x3ab,0x3ab,0x3ab,0x3ab,0x3ab,0xa14,0xa14,0xa14,0xa11,0xa11,0xcd8,
+0xa29,0xa29,0xa29,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa20,0xa23,0xa20,0x15,0xa2c,
+0xa26,0xa17,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,
+0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xcde,0xcde,0xcde,
+0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,
+0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0x15,0x15,0xcde,0xcde,0xcde,
+0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,
+0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0x1035,0x1035,
+0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,
+0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,
+0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,
+0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,
+0xa2f,0xbc4,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+0xf4b,0xf4b,0xf4b,0xf4b,0xf4b,0xf4b,0xf4b,0xf4b,0xf4b,0xf4b,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,
+0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,
+0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf42,0xf42,0xf42,0xf42,0xf42,
+0xf42,0xf42,0xf42,0xf42,0xf51,0xf51,0xf45,0xf45,0xf48,0xf57,0xf54,0x10b,0x10b,0x1968,0x196b,0x196b,
+0x18f9,0x18f9,0x18f9,0x18f9,0x18f9,0x18f9,0x18f9,0x18f9,0x18f9,0x18f9,0x18f9,0x252,0x252,0x252,0x252,0x252,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0xb37,0xb37,0xb3a,0xb3a,0xb37,0xb37,0xb37,0xb37,0xb37,0xb37,0xb37,0xb37,0x72,0x72,0x72,0x72,
+0x15ba,0x15ba,0x15ba,0x15ba,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x15b7,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x16ad,0x16ad,0x16ad,0x16ad,0x16ad,0x16ad,0x16ad,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x174,0x174,0x174,0x174,0x174,0x174,0x174,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,
+0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x14d6,0x14d6,0x14d6,0x14d6,0x14d6,0x14d6,0x14d6,0x14d6,0x14d6,0x14d6,0x1b6,0x1b6,0x1b6,0x1b6,0x1b6,0x1b6,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x18e7,0x18ea,0x18ea,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x261,0x261,0x261,0x261,0x261,0x261,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x1b1b,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,
+0x17a9,0x17a9,0x17a9,0x17a9,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x1e9,0x1e9,0x1e9,0x1e9,0x1665,0x1665,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,
+0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x1713,0x1713,0x1713,0x1713,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0xe2e,0xe2e,0xe2b,0xe2b,0xe2b,0xe2e,0xd8,0xd8,0xd8,0xd8,0xd8,0xd8,0xd8,0xd8,0xd8,0xd8,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x225,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x1845,0x1845,0x231,0x1845,0x1845,0x231,0x1845,0x1845,0x1845,0x1845,0x1845,0x231,0x231,0x231,0x231,0x231,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x1a6d,0x1a6d,0x1a6d,0x1a6d,0x1a6d,0x1a6d,0x1a6d,0x1a6d,0x1a6d,0x1a6d,0x282,0x282,0x282,0x282,0x1a70,0x1a6a,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x267,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,
+0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x97b,0x97b,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+3,3,0x97b,0x97b,6,6,6,6,6,6,6,6,6,6,6,6,
+6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+6,6,6,6,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,
+0xd8c,0xd8c,0xd8c,0xd8c,6,6,6,6,6,6,6,6,6,6,6,6,
+6,6,6,6,0x1509,0x3d5,0x3e4,0x3e4,0x1b,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,
+0x3ea,0x1b,0x1b,0x3ea,0x3ea,0x1b,0x1b,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,
+0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x1b,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x1b,0x3ea,0x1b,
+0x1b,0x1b,0x3ea,0x3ea,0x3ea,0x3ea,0x1b,0x1b,0x3d8,0xce4,0x3d5,0x3e4,0x3e4,0x3d5,0x3d5,0x3d5,
+0x3d5,0x1b,0x1b,0x3e4,0x3e4,0x1b,0x1b,0x3e7,0x3e7,0x3db,0xddd,0x1b,0x1b,0x1b,0x1b,0x1b,
+0x1b,0x1b,0x1b,0x3d5,0x1b,0x1b,0x1b,0x1b,0x3ed,0x3ed,0x1b,0x3ed,0x3ea,0x3ea,0x3d5,0x3d5,
+0x1b,0x1b,0x966,0x966,0x966,0x966,0x966,0x966,0x966,0x966,0x966,0x966,0x3ea,0x3ea,0x3e1,0x3e1,
+0x3de,0x3de,0x3de,0x3de,0x3de,0x3e1,0x3de,0x115e,0x18a2,0x189f,0x1947,0x1b,0x1e,0xce7,0x3f0,0xcea,
+0x1e,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x1e,0x1e,0x1e,0x1e,0x3fc,0x3fc,0x1e,0x1e,0x3fc,
+0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x1e,0x3fc,0x3fc,
+0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x1e,0x3fc,0x3ff,0x1e,0x3fc,0x3ff,0x1e,0x3fc,0x3fc,0x1e,0x1e,
+0x3f3,0x1e,0x3f9,0x3f9,0x3f9,0x3f0,0x3f0,0x1e,0x1e,0x1e,0x1e,0x3f0,0x3f0,0x1e,0x1e,0x3f0,
+0x3f0,0x3f6,0x1e,0x1e,0x1e,0xfbd,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x3ff,0x3ff,0x3ff,
+0x3fc,0x1e,0x3ff,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x969,0x969,0x969,0x969,0x969,0x969,
+0x969,0x969,0x969,0x969,0x3f0,0x3f0,0x3fc,0x3fc,0x3fc,0xfbd,0x194a,0x1e,0x1e,0x1e,0x1e,0x1e,
+0x1e,0x1e,0x1e,0x1e,0x21,0x402,0x402,0x40b,0x21,0x40e,0x40e,0x40e,0x40e,0x40e,0x40e,0x40e,
+0xcf3,0x40e,0x21,0x40e,0x40e,0x40e,0x21,0x40e,0x40e,0x40e,0x40e,0x40e,0x40e,0x40e,0x40e,0x40e,
+0x40e,0x40e,0x40e,0x40e,0x40e,0x21,0x40e,0x40e,0x40e,0x40e,0x40e,0x40e,0x40e,0x21,0x40e,0x40e,
+0x21,0x40e,0x40e,0x40e,0x40e,0x40e,0x21,0x21,0x405,0x40e,0x40b,0x40b,0x40b,0x402,0x402,0x402,
+0x402,0x402,0x21,0x402,0x402,0x40b,0x21,0x40b,0x40b,0x408,0x21,0x21,0x40e,0x21,0x21,0x21,
+0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x40e,0xcf3,0xced,0xced,
+0x21,0x21,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x1422,0xcf0,0x21,0x21,
+0x21,0x21,0x21,0x21,0x21,0x1725,0x18a5,0x18a5,0x18a5,0x18a8,0x18a8,0x18a8,0x24,0x411,0x420,0x420,
+0x24,0x426,0x426,0x426,0x426,0x426,0x426,0x426,0x426,0x24,0x24,0x426,0x426,0x24,0x24,0x426,
+0x426,0x426,0x426,0x426,0x426,0x426,0x426,0x426,0x426,0x426,0x426,0x426,0x426,0x24,0x426,0x426,
+0x426,0x426,0x426,0x426,0x426,0x24,0x426,0x426,0x24,0xcf6,0x426,0x426,0x426,0x426,0x24,0x24,
+0x414,0x426,0x411,0x411,0x420,0x411,0x411,0x411,0xfc0,0x24,0x24,0x420,0x423,0x24,0x24,0x423,
+0x423,0x417,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x1ab5,0x411,0x411,0x24,0x24,0x24,0x24,
+0x429,0x429,0x24,0x426,0x426,0x426,0xfc0,0xfc0,0x24,0x24,0x41d,0x41d,0x41d,0x41d,0x41d,0x41d,
+0x41d,0x41d,0x41d,0x41d,0x41a,0xcf6,0x1344,0x1344,0x1344,0x1344,0x1344,0x1344,0x24,0x24,0x24,0x24,
+0x24,0x24,0x24,0x24,0x27,0x27,0x42c,0x438,0x27,0x438,0x438,0x438,0x438,0x438,0x438,0x27,
+0x27,0x27,0x438,0x438,0x438,0x27,0x438,0x438,0x43b,0x438,0x27,0x27,0x27,0x438,0x438,0x27,
+0x438,0x27,0x438,0x438,0x27,0x27,0x27,0x438,0x438,0x27,0x27,0x27,0x438,0x438,0x438,0x27,
+0x27,0x27,0x438,0x438,0x438,0x438,0x438,0x438,0x438,0x438,0xde0,0x438,0x438,0x438,0x27,0x27,
+0x27,0x27,0x42c,0x432,0x42c,0x432,0x432,0x27,0x27,0x27,0x432,0x432,0x432,0x27,0x435,0x435,
+0x435,0x42f,0x27,0x27,0xfc3,0x27,0x27,0x27,0x27,0x27,0x27,0x42c,0x27,0x27,0x27,0x27,
+0x27,0x27,0x27,0x27,0x27,0x27,0xef7,0x972,0x972,0x972,0x972,0x972,0x972,0x972,0x972,0x972,
+0x96f,0x96f,0x96f,0xdb0,0xcf9,0xcf9,0xcf9,0xcf9,0xcf9,0xcfc,0xcf9,0x27,0x27,0x27,0x27,0x27,
+0x150c,0x44a,0x44a,0x44a,0x194d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x2a,0x44d,0x44d,
+0x44d,0x2a,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,
+0x44d,0x2a,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x150f,0x44d,0x44d,0x44d,
+0x44d,0x44d,0x2a,0x2a,0x2a,0xfcc,0x43e,0x43e,0x43e,0x44a,0x44a,0x44a,0x44a,0x2a,0x43e,0x43e,
+0x441,0x2a,0x43e,0x43e,0x43e,0x444,0x2a,0x2a,0x2a,0x2a,0x2a,0x2a,0x2a,0x43e,0x43e,0x2a,
+0xfcc,0xfcc,0x1728,0x2a,0x2a,0x2a,0x2a,0x2a,0x44d,0x44d,0xfc6,0xfc6,0x2a,0x2a,0x447,0x447,
+0x447,0x447,0x447,0x447,0x447,0x447,0x447,0x447,0x2a,0x2a,0x2a,0x2a,0x2a,0x2a,0x2a,0x1a19,
+0xfc9,0xfc9,0xfc9,0xfc9,0xfc9,0xfc9,0xfc9,0xfc9,0x17e5,0x1512,0x456,0x456,0x1950,0x45c,0x45c,0x45c,
+0x45c,0x45c,0x45c,0x45c,0x45c,0x2d,0x45c,0x45c,0x45c,0x2d,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,
+0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x2d,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,
+0x45c,0x45c,0x45c,0x45c,0x2d,0x45c,0x45c,0x45c,0x45c,0x45c,0x2d,0x2d,0xcff,0xd02,0x456,0x450,
+0x459,0x456,0x450,0x456,0x456,0x2d,0x450,0x459,0x459,0x2d,0x459,0x459,0x450,0x453,0x2d,0x2d,
+0x2d,0x2d,0x2d,0x2d,0x2d,0x450,0x450,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x45c,0x2d,
+0x45c,0x45c,0xf0f,0xf0f,0x2d,0x2d,0x975,0x975,0x975,0x975,0x975,0x975,0x975,0x975,0x975,0x975,
+0x2d,0xf12,0xf12,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,
+0x18ab,0x1515,0x468,0x468,0x1ab8,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x30,0x46e,0x46e,
+0x46e,0x30,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,
+0x468,0x45f,0x45f,0x45f,0xfcf,0x30,0x468,0x468,0x468,0x30,0x46b,0x46b,0x46b,0x462,0x134a,0x17e8,
+0x30,0x30,0x30,0x30,0x17eb,0x17eb,0x17eb,0x45f,0x17e8,0x17e8,0x17e8,0x17e8,0x17e8,0x17e8,0x17e8,0x172b,
+0x46e,0x46e,0xfcf,0xfcf,0x30,0x30,0x465,0x465,0x465,0x465,0x465,0x465,0x465,0x465,0x465,0x465,
+0xfd2,0xfd2,0xfd2,0xfd2,0xfd2,0xfd2,0x17e8,0x17e8,0x17e8,0xfd5,0xfd8,0xfd8,0xfd8,0xfd8,0xfd8,0xfd8,
+0x33,0x1abb,0xa3e,0xa3e,0x33,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,
+0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0x33,0x33,0x33,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,
+0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0x33,0xa44,
+0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0x33,0xa44,0x33,0x33,0xa44,0xa44,0xa44,0xa44,
+0xa44,0xa44,0xa44,0x33,0x33,0x33,0xa38,0x33,0x33,0x33,0x33,0xa35,0xa3e,0xa3e,0xa35,0xa35,
+0xa35,0x33,0xa35,0x33,0xa3e,0xa3e,0xa41,0xa3e,0xa41,0xa41,0xa41,0xa35,0x33,0x33,0x33,0x33,
+0x33,0x33,0x1518,0x1518,0x1518,0x1518,0x1518,0x1518,0x1518,0x1518,0x1518,0x1518,0x33,0x33,0xa3e,0xa3e,
+0xa3b,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x36,0x489,0x489,0x489,
+0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,
+0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x474,0x489,0x486,
+0x474,0x474,0x474,0x474,0x474,0x474,0x47a,0x36,0x36,0x36,0x36,0x471,0x48f,0x48f,0x48f,0x48f,
+0x48f,0x489,0x48c,0x477,0x477,0x477,0x477,0x477,0x477,0x474,0x477,0x47d,0x483,0x483,0x483,0x483,
+0x483,0x483,0x483,0x483,0x483,0x483,0x480,0x480,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,
+0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,
+0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x39,0x49e,0x49e,0x39,0x49e,0x39,0x1a1f,0x49e,
+0x49e,0x1a1f,0x49e,0x39,0x1a1f,0x49e,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x49e,0x49e,0x49e,0x49e,
+0x1a1f,0x49e,0x49e,0x49e,0x49e,0x49e,0x49e,0x49e,0x1a1f,0x49e,0x49e,0x49e,0x39,0x49e,0x39,0x49e,
+0x1a1f,0x1a1f,0x49e,0x49e,0x1a1f,0x49e,0x49e,0x49e,0x49e,0x492,0x49e,0x49b,0x492,0x492,0x492,0x492,
+0x492,0x492,0x1a1c,0x492,0x492,0x49e,0x39,0x39,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x39,0x4a4,0x39,
+0x495,0x495,0x495,0x495,0x495,0x492,0x39,0x39,0x498,0x498,0x498,0x498,0x498,0x498,0x498,0x498,
+0x498,0x498,0x39,0x39,0x4a1,0x4a1,0x1425,0x1425,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,
+0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,
+0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x9b7,0x9b7,0x9b7,0x9ba,0x9b7,0x9b7,0x9b7,0x9b7,
+0x3c,0x9b7,0x9b7,0x9b7,0x9b7,0x9ba,0x9b7,0x9b7,0x9b7,0x9b7,0x9ba,0x9b7,0x9b7,0x9b7,0x9b7,0x9ba,
+0x9b7,0x9b7,0x9b7,0x9b7,0x9ba,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,
+0x9b7,0x9ba,0xa53,0xfe4,0xfe4,0x3c,0x3c,0x3c,0x3c,0x984,0x984,0x987,0x984,0x987,0x987,0x990,
+0x987,0x990,0x984,0x984,0x984,0x984,0x984,0x9b1,0x984,0x987,0x98a,0x98a,0x98d,0x996,0x98a,0x98a,
+0x9b7,0x9b7,0x9b7,0x9b7,0x1353,0x134d,0x134d,0x134d,0x984,0x984,0x984,0x987,0x984,0x984,0xa47,0x984,
+0x3c,0x984,0x984,0x984,0x984,0x987,0x984,0x984,0x984,0x984,0x987,0x984,0x984,0x984,0x984,0x987,
+0x984,0x984,0x984,0x984,0x987,0x984,0xa47,0xa47,0xa47,0x984,0x984,0x984,0x984,0x984,0x984,0x984,
+0xa47,0x987,0xa47,0xa47,0xa47,0x3c,0xa50,0xa50,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4a,0xa4d,
+0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0x3c,0xfdb,0xa4d,0xde3,0xde3,0xfde,0xfe1,0xfdb,0x1161,0x1161,0x1161,
+0x1161,0x1350,0x1350,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,
+0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,
+0x3c,0x3c,0x3c,0x3c,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x3f,0x142b,0x3f,0x3f,0x3f,0x3f,
+0x3f,0x142b,0x3f,0x3f,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,
+0x4aa,0x4aa,0x4aa,0x4aa,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xdf2,0xa7d,0x42,0xa7d,0xa7d,
+0xa7d,0xa7d,0x42,0x42,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0x42,0xa7d,0x42,0xa7d,0xa7d,
+0xa7d,0xa7d,0x42,0x42,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xdf2,0xa7d,0x42,0xa7d,0xa7d,
+0xa7d,0xa7d,0x42,0x42,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,
+0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xdf2,0xa7d,0x42,0xa7d,0xa7d,0xa7d,0xa7d,0x42,0x42,
+0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0x42,0xa7d,0x42,0xa7d,0xa7d,0xa7d,0xa7d,0x42,0x42,
+0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xdf2,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0x42,
+0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xdf2,
+0xa7d,0x42,0xa7d,0xa7d,0xa7d,0xa7d,0x42,0x42,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xdf2,
+0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,
+0xa7d,0xa7d,0xa7d,0x42,0x42,0x1356,0x1356,0xdec,0xdef,0xa77,0xa80,0xa74,0xa74,0xa74,0xa74,0xa80,
+0xa80,0xa7a,0xa7a,0xa7a,0xa7a,0xa7a,0xa7a,0xa7a,0xa7a,0xa7a,0xa71,0xa71,0xa71,0xa71,0xa71,0xa71,
+0xa71,0xa71,0xa71,0xa71,0xa71,0x42,0x42,0x42,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,
+0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0x1731,0x45,0x45,
+0x172e,0x172e,0x172e,0x172e,0x172e,0x172e,0x45,0x45,0xa95,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,
+0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,
+0xa98,0xa98,0xa98,0xa92,0xa8f,0x48,0x48,0x48,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,
+0xa9e,0xa9e,0xa9e,0xa9b,0xa9b,0xa9b,0xa9e,0xa9e,0xa9e,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,
+0x151b,0x4b,0x4b,0x4b,0x4b,0x4b,0x4b,0x4b,0xabf,0xabf,0xabf,0xabf,0xabf,0xabf,0xaa1,0xabf,
+0xabf,0xaa4,0xaa4,0xaa4,0xaa4,0xaa4,0xaa4,0xaa4,0xaa4,0xaa4,0xaa7,0xaa4,0xab6,0xab6,0xab9,0xac2,
+0xab0,0xaad,0xab6,0xab3,0xac2,0xd05,0x4e,0x4e,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,
+0xabc,0xabc,0x4e,0x4e,0x4e,0x4e,0x4e,0x4e,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,
+0xd08,0xd08,0x4e,0x4e,0x4e,0x4e,0x4e,0x4e,0xad1,0xad1,0xb52,0xb55,0xad7,0xb4f,0xad4,0xad1,
+0xada,0xae9,0xadd,0xaec,0xaec,0xaec,0xac8,0x51,0xae0,0xae0,0xae0,0xae0,0xae0,0xae0,0xae0,0xae0,
+0xae0,0xae0,0x51,0x51,0x51,0x51,0x51,0x51,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,
+0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,
+0x1953,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,
+0xae3,0xacb,0x1002,0x51,0x51,0x51,0x51,0x51,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,
+0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,
+0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x54,0x54,
+0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x54,0x54,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,
+0x54,0x4ce,0x54,0x4ce,0x54,0x4ce,0x54,0x4ce,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,
+0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,
+0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x54,0x54,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,
+0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x54,0x4cb,0x4cb,
+0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4c5,0x4cb,0x4c5,0x4c5,0x4c2,0x4cb,0x4cb,0x4cb,0x54,0x4cb,0x4cb,
+0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4c2,0x4c2,0x4c2,0x4cb,0x4cb,0x4cb,0x4cb,0x54,0x54,0x4cb,0x4cb,
+0x4ce,0x4ce,0x4ce,0x4ce,0x54,0x4c2,0x4c2,0x4c2,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,
+0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4c2,0x4c2,0x4c2,0x54,0x54,0x4cb,0x4cb,0x4cb,0x54,0x4cb,0x4cb,
+0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4c8,0x4c5,0x54,0xbca,0xbcd,0xbcd,0xbcd,0x100b,0x57,0x14f7,0x14f7,
+0x14f7,0x14f7,0x4d7,0x4d7,0x4d7,0x4d7,0x4d7,0x4d7,0x522,0xbdf,0x5a,0x5a,0x6e1,0x522,0x522,0x522,
+0x522,0x522,0x528,0x53a,0x528,0x534,0x52e,0x6e4,0x51f,0x6de,0x6de,0x6de,0x6de,0x51f,0x51f,0x51f,
+0x51f,0x51f,0x525,0x537,0x525,0x531,0x52b,0x5a,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0x1359,0x1359,0x1359,
+0x1359,0x1359,0x1359,0x1359,0x1359,0x5a,0x5a,0x5a,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,
+0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x549,0x549,0x549,0x549,0x549,0x549,0x549,0x549,
+0x549,0x549,0x549,0x549,0x549,0x546,0x546,0x546,0x546,0x549,0xafb,0xafe,0xbe5,0xbeb,0xbeb,0xbe8,
+0xbe8,0xbe8,0xbe8,0xe01,0xf15,0xf15,0xf15,0xf15,0x114c,0x60,0x60,0x60,0x60,0x60,0x60,0x60,
+0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x579,0x579,0x579,0xb07,0xf1e,0x1011,0x1011,0x1011,
+0x1011,0x12ab,0x1737,0x1737,0x63,0x63,0x63,0x63,0x70b,0x70b,0x70b,0x70b,0x70e,0x70e,0x70e,0x70e,
+0x70e,0x70e,0x585,0x585,0x582,0x582,0x582,0x582,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0xb13,0xb13,0x66,
+0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,
+0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x5af,0x5af,0x5af,0x5af,0x5af,0x5af,0x5af,0x5af,
+0x5af,0x5af,0x5af,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,
+0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,
+0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,
+0xb2e,0xb2e,0x6c,0xb2e,0xb2e,0xb2e,0xb2e,0xb31,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,
+0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb31,0x6c,0x6c,0x6c,0x6c,
+0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,
+0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0x6f,0x6f,
+0x6f,0x6f,0x6f,0x6f,0x6f,0x6f,0x6f,0x6f,0x75,0x843,0x83d,0x843,0x83d,0x843,0x83d,0x843,
+0x83d,0x843,0x83d,0x83d,0x840,0x83d,0x840,0x83d,0x840,0x83d,0x840,0x83d,0x840,0x83d,0x840,0x83d,
+0x840,0x83d,0x840,0x83d,0x840,0x83d,0x840,0x83d,0x83d,0x83d,0x83d,0x843,0x83d,0x843,0x83d,0x843,
+0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x843,0x83d,0x83d,0x83d,0x83d,0x83d,0x840,0xc93,0xc93,0x75,
+0x75,0x954,0x954,0x91e,0x91e,0x846,0x849,0xc90,0x78,0x78,0x78,0x78,0x78,0x85b,0x85b,0x85b,
+0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,
+0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x113a,0x191a,0x1a01,0x7b,0x85e,0x85e,0x85e,
+0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x7b,
+0x927,0x927,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,
+0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,
+0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0xd98,0xd98,0x7e,
+0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0x81,0x81,0x81,
+0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,
+0xb4c,0xc9c,0xb4c,0xb4c,0xb4c,0xc9c,0xb4c,0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x84,
+0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,
+0x9db,0x9db,0x9db,0x9db,0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87,
+0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,
+0x60c,0x60c,0x60c,0x60c,0x60c,0x60c,0x60c,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a,
+0x8a,0x8a,0x8a,0x5fa,0x5fa,0x5fa,0x5fa,0x5fa,0x8a,0x8a,0x8a,0x8a,0x8a,0xb1f,0x5fd,0x603,
+0x609,0x609,0x609,0x609,0x609,0x609,0x609,0x609,0x609,0x600,0x603,0x603,0x603,0x603,0x603,0x603,
+0x603,0x603,0x603,0x603,0x603,0x603,0x603,0x8a,0x603,0x603,0x603,0x603,0x603,0x8a,0x603,0x8a,
+0x603,0x603,0x8a,0x603,0x603,0x8a,0x603,0x603,0x603,0x603,0x603,0x603,0x603,0x603,0x603,0x606,
+0x61e,0x618,0x61e,0x618,0x61b,0x621,0x61e,0x618,0x61b,0x621,0x61e,0x618,0x61b,0x621,0x61e,0x618,
+0x136b,0x136b,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,
+0x8d,0x8d,0x8d,0x61e,0x618,0x61b,0x621,0x61e,0x618,0x61e,0x618,0x61e,0x618,0x61e,0x61e,0x618,
+0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,
+0x61b,0x618,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x618,0x61b,0x618,0x618,0x61b,0x61b,0x618,0x618,
+0x618,0x618,0x618,0x61b,0x618,0x618,0x61b,0x618,0x61b,0x61b,0x61b,0x618,0x61b,0x61b,0x61b,0x61b,
+0x8d,0x8d,0x61b,0x61b,0x61b,0x61b,0x618,0x618,0x61b,0x618,0x618,0x618,0x618,0x61b,0x618,0x618,
+0x618,0x618,0x618,0x61b,0x61b,0x61b,0x618,0x618,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,
+0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,
+0x61e,0x61e,0x978,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x615,0x615,0xc24,0xdb3,0x8d,0x8d,
+0x87f,0x891,0x88e,0x891,0x88e,0xcb1,0xcb1,0xda4,0xda1,0x882,0x882,0x882,0x882,0x894,0x894,0x894,
+0x8ac,0x8af,0x8be,0x90,0x8b2,0x8b5,0x8c1,0x8c1,0x8a9,0x8a0,0x89a,0x8a0,0x89a,0x8a0,0x89a,0x89d,
+0x89d,0x8b8,0x8b8,0x8bb,0x8b8,0x8b8,0x8b8,0x90,0x8b8,0x8a6,0x8a3,0x89d,0x90,0x90,0x90,0x90,
+0x62a,0x636,0x62a,0xc27,0x62a,0x93,0x62a,0x636,0x62a,0x636,0x62a,0x636,0x62a,0x636,0x62a,0x636,
+0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x633,
+0x62d,0x633,0x62d,0x630,0x636,0x633,0x62d,0x633,0x62d,0x633,0x62d,0x633,0x62d,0x93,0x93,0x627,
+0x77d,0x780,0x795,0x798,0x777,0x780,0x780,0x99,0x75f,0x762,0x762,0x762,0x762,0x75f,0x75f,0x99,
+0x96,0x96,0x96,0x96,0x96,0x96,0x96,0x96,0x96,0xb22,0xb22,0xb22,0x9de,0x759,0x639,0x639,
+0x99,0x7a7,0x786,0x777,0x780,0x77d,0x777,0x789,0x77a,0x774,0x777,0x795,0x78c,0x783,0x7a4,0x777,
+0x7a1,0x7a1,0x7a1,0x7a1,0x7a1,0x7a1,0x7a1,0x7a1,0x7a1,0x7a1,0x792,0x78f,0x795,0x795,0x795,0x7a7,
+0x768,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,
+0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x99,
+0x99,0x99,0x765,0x765,0x765,0x765,0x765,0x765,0x99,0x99,0x765,0x765,0x765,0x765,0x765,0x765,
+0x99,0x99,0x765,0x765,0x765,0x765,0x765,0x765,0x99,0x99,0x765,0x765,0x765,0x99,0x99,0x99,
+0xb6d,0xb6d,0xb6d,0xb6d,0x9c,0x9c,0x9c,0x9c,0x9c,0x9c,0x9c,0x9c,0x9c,0x18b7,0x18b7,0x18b7,
+0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,
+0xb73,0xb73,0xb73,0x9f,0x9f,0x9f,0x9f,0x9f,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,
+0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,
+0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xa2,0xa2,
+0xa2,0xa2,0xa2,0xa2,0xa2,0xa2,0xa2,0xa2,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xa5,
+0xa5,0x101d,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,
+0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0x173d,0x173d,0x173d,0x173d,0x173d,0x173d,0x173d,0x173d,
+0x173d,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,
+0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,
+0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xa8,0xb9d,0xb9d,
+0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xba0,0xba0,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,
+0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,
+0xb9d,0xb9d,0xb9d,0xb9d,0xba0,0xa8,0xba0,0xba0,0xa8,0xa8,0xba0,0xa8,0xa8,0xba0,0xba0,0xa8,
+0xa8,0xba0,0xba0,0xba0,0xba0,0xa8,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xb9d,0xb9d,
+0xb9d,0xb9d,0xa8,0xb9d,0xa8,0xb9d,0xb9d,0xb9d,0xb9d,0xd29,0xb9d,0xb9d,0xa8,0xb9d,0xb9d,0xb9d,
+0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,
+0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xb9d,0xb9d,0xb9d,0xb9d,0xba0,0xba0,0xa8,0xba0,
+0xba0,0xba0,0xba0,0xa8,0xa8,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xa8,0xba0,0xba0,
+0xba0,0xba0,0xba0,0xba0,0xba0,0xa8,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,
+0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,
+0xba0,0xba0,0xa8,0xba0,0xba0,0xba0,0xba0,0xa8,0xba0,0xba0,0xba0,0xba0,0xba0,0xa8,0xba0,0xa8,
+0xa8,0xa8,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xa8,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,
+0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xe16,0xe16,0xa8,0xa8,0xba0,0xba0,0xba0,0xba0,
+0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,
+0xba0,0xba0,0xba0,0xba0,0xb9d,0xb9d,0xb9d,0xb97,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xf2d,0xf2a,
+0xa8,0xa8,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,
+0xb9a,0xb9a,0xb9a,0xb9a,0xab,0xba6,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,
+0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,
+0xab,0xab,0xab,0xab,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,
+0xc36,0xae,0xc36,0xc36,0xc36,0xc36,0xc30,0xc30,0xc33,0xae,0xae,0xae,0xae,0xae,0xae,0xae,
+0xae,0xae,0xae,0xae,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,
+0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc39,0xc39,0xc3c,0xca5,0xca5,0xb1,0xb1,0xb1,0xb1,0xb1,
+0xb1,0xb1,0xb1,0xb1,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,
+0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc42,0xc42,0xb4,0xb4,0xb4,0xb4,0xb4,0xb4,0xb4,0xb4,
+0xb4,0xb4,0xb4,0xb4,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,
+0xc4b,0xb7,0xc4b,0xc4b,0xc4b,0xb7,0xc48,0xc48,0xb7,0xb7,0xb7,0xb7,0xb7,0xb7,0xb7,0xb7,
+0xb7,0xb7,0xb7,0xb7,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,
+0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,
+0xd3b,0x1539,0x1539,0xba,0xd2c,0xd2c,0xd2c,0xd38,0xd38,0xd38,0xd38,0xd2c,0xd2c,0xd38,0xd38,0xd38,
+0xba,0xba,0xba,0xba,0xd38,0xd38,0xd2c,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd2f,0xd2f,0xd2f,
+0xba,0xba,0xba,0xba,0xd32,0xba,0xba,0xba,0xd3e,0xd3e,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,
+0xd35,0xd35,0xd35,0xd35,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,
+0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xbd,0xbd,0xd41,0xd41,0xd41,0xd41,0xd41,0xbd,0xbd,0xbd,
+0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,
+0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0xc0,0xc0,0x153c,0x153c,
+0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,
+0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0xc0,0x1abe,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,
+0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xc3,0xd68,0xd68,0xd68,
+0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,
+0xd68,0xd68,0xd68,0xc3,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,
+0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xc3,0xd68,0xd68,0xc3,0xd68,0xd68,0xd68,0xd68,0xd68,
+0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xc3,0xc3,0xd68,0xd68,0xd68,0xd68,
+0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,
+0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,
+0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xd6b,0xd6b,0xd6b,0xd6b,
+0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,
+0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xc6,0xc6,0xc6,0xc6,0xc6,0xdad,0xdad,0xdad,0xc9,
+0xc9,0xc9,0xc9,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,
+0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xc9,0xc9,0xc9,0xdaa,
+0xdaa,0xdaa,0xdaa,0xdaa,0xdaa,0xdaa,0xdaa,0xdaa,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,
+0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,
+0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xcc,0xd6e,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,
+0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,
+0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xcf,0xcf,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,
+0xd77,0xd77,0xcf,0xcf,0xcf,0xcf,0xcf,0xcf,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,
+0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd2,0xd2,
+0xd7d,0xd2,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,
+0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd2,0xd7d,0xd7d,0xd2,0xd2,0xd2,
+0xd7d,0xd2,0xd2,0xd7d,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,
+0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd5,0xd5,0xd5,0xd5,0xd5,
+0xd5,0xd5,0xd5,0xd5,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0x153f,
+0x153f,0x17f1,0x17f1,0xdb,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,
+0x1acd,0x132,0x132,0x132,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,
+0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe3a,0xe3a,0xe40,0xe40,0xe3a,
+0xde,0xde,0xe3d,0xe3d,0x1149,0x1149,0x1149,0x1149,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,
+0xe1,0xe1,0xe1,0xe1,0xca2,0xca2,0xca2,0xca2,0xca2,0xca2,0xca2,0xca2,0xca2,0xca2,0xca2,0xca2,
+0xca2,0xca2,0xca2,0xca2,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1542,0x1542,0x1542,0x1542,0x1542,
+0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x1545,0x18bd,0x18bd,0x193e,0x18bd,0xe4,0x17f4,
+0x1377,0x118b,0xf3c,0xf3c,0xe55,0xe52,0xe55,0xe52,0xe52,0xe49,0xe49,0xe49,0xe49,0xe49,0xe49,0x1194,
+0x1191,0x1194,0x1191,0x118e,0x118e,0x118e,0x1434,0x1431,0xe7,0xe7,0xe7,0xe7,0xe7,0xe4f,0xe4c,0xe4c,
+0xe4c,0xe49,0xe4f,0xe4c,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,
+0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xea,0xea,0xea,0xea,0xea,
+0xea,0xea,0xea,0xea,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xea,0xe58,0xe58,0xe58,0xe58,
+0xe58,0xe58,0xe58,0xea,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xea,0xe58,0xe58,0xe58,0xe58,
+0xe58,0xe58,0xe58,0xea,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,
+0xe5e,0xe5e,0xe5e,0xe5e,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xed,0xed,
+0xed,0xed,0xed,0xed,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xf0,0x1437,0xf0,0xf0,0xf0,0xf0,
+0xf0,0x1437,0xf0,0xf0,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,
+0xeb8,0xeb8,0xeb8,0xeb8,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,
+0xe67,0xe67,0xe67,0xf3,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,
+0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,
+0xe64,0xe64,0xe64,0xf3,0xe79,0xe6d,0xe6d,0xe6d,0xf6,0xe6d,0xe6d,0xf6,0xf6,0xf6,0xf6,0xf6,
+0xe6d,0xe6d,0xe6d,0xe6d,0xe79,0xe79,0xe79,0xe79,0xf6,0xe79,0xe79,0xe79,0xf6,0xe79,0xe79,0xe79,
+0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,
+0xe79,0xe79,0xe79,0xe79,0x195c,0x195c,0xf6,0xf6,0xe6a,0xe6a,0xe6a,0xf6,0xf6,0xf6,0xf6,0xe70,
+0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0x1959,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,
+0xe76,0xe76,0xe76,0xe76,0xe76,0xe76,0xe7c,0xe7c,0xe73,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,
+0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0x119a,0x119a,0xf9,0xf9,0xf9,0xf9,
+0xe88,0xe88,0xe88,0xe88,0xe88,0xe8b,0xe8b,0xe8b,0xe88,0xe88,0xe8b,0xe88,0xe88,0xe88,0xe88,0xe88,
+0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xf9,0xf9,0xf9,0xf9,0xf9,0xf9,0xe85,0xe85,0xe85,0xe85,
+0xe85,0xe85,0xe85,0xe85,0xe85,0xe85,0x1197,0xf9,0xf9,0xf9,0xe82,0xe82,0xe91,0xe91,0xe91,0xe91,
+0xfc,0xfc,0xfc,0xfc,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe8e,0xe91,0xe91,0xe91,
+0xe91,0xe91,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0x154e,0x1554,0x1551,0x189c,
+0x17f7,0x18c0,0x18c0,0x18c0,0x18c0,0x18c0,0x1962,0x195f,0x1965,0x195f,0x1965,0x1a25,0x1ac1,0x1ac1,0x1ac1,0xff,
+0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+0xeb5,0xeb5,0xeb5,0xeb2,0xeb2,0xea9,0xea9,0xeb2,0xeaf,0xeaf,0xeaf,0xeaf,0x1ac4,0x102,0x102,0x102,
+0x1314,0x1314,0x1314,0x1317,0x1317,0x1317,0x130e,0x130e,0x1311,0x130e,0x156,0x156,0x156,0x156,0x156,0x156,
+0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0x1443,0x1443,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0xebb,
+0x137d,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x137a,
+0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc75,
+0xee8,0xed9,0xed3,0xee5,0xee2,0xedc,0xedc,0xeeb,0xed6,0xedf,0x108,0x108,0x108,0x108,0x108,0x108,
+0xf6f,0xf6f,0xf5a,0xf6f,0xf72,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0x10e,0x10e,0x10e,0x10e,
+0xf69,0xf69,0xf69,0xf69,0xf69,0xf69,0xf69,0xf69,0xf69,0xf69,0xf7b,0xf7b,0xf60,0xf66,0xf7b,0xf7b,
+0xf63,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf5d,0xf5d,0xf5d,0xf5d,0xf5d,
+0xf5d,0xf5d,0xf5d,0xf5d,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0x10e,0x10e,0x10e,
+0x111,0x111,0x1a2b,0x1a28,0x1a2b,0x1a2b,0x1a2b,0x1aca,0x1ac7,0x1aca,0x1ac7,0x111,0x111,0x111,0x111,0x111,
+0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,
+0x111,0x1aca,0x1ac7,0x155d,0x144c,0x144c,0x1380,0x1074,0x1074,0x1074,0x1074,0x1074,0xf8a,0xf8a,0xf8a,0xf8a,
+0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,
+0xf87,0xf87,0xf8d,0xf8d,0x114,0x114,0x114,0x114,0x114,0x114,0x114,0x114,0xf96,0xf96,0xf96,0xf96,
+0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,
+0xf96,0xf96,0xf90,0xf90,0xf90,0xf90,0x11a3,0x11a3,0x117,0x117,0x117,0xf93,0x1563,0x1563,0x1563,0x1563,
+0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,
+0x1563,0x1563,0x1563,0x1563,0x1563,0x174c,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,
+0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,
+0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0xf9f,0xf9f,0xf9f,0x1569,0x1569,0x1569,0x1569,0x1569,
+0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x11d,0xf9c,0xf9c,0xf9c,0xf9c,0x1566,0x11d,0x11d,0x11d,
+0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,
+0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974,
+0x1974,0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x109b,0x109b,0x109b,0x109b,0x1098,0x1098,0x1098,0x1098,
+0x1098,0x1098,0x1098,0x1098,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1098,0x1098,0x108f,0x108c,
+0x123,0x123,0x123,0x109e,0x109e,0x1092,0x1092,0x1092,0x1095,0x1095,0x1095,0x1095,0x1095,0x1095,0x1095,0x1095,
+0x1095,0x1095,0x123,0x123,0x123,0x109b,0x109b,0x109b,0x10a1,0x10a1,0x10a1,0x10a1,0x10a1,0x10a1,0x10a1,0x10a1,
+0x10a1,0x10a1,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10b6,0x10b6,0x10b6,0x10b6,0x10b6,0x10b6,0x10b6,0x10b6,
+0x10b6,0x10b6,0x10b9,0x10b9,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,
+0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x10e0,0x10e0,0x10e0,0x10e0,0x10da,0x17fd,0x129,0x129,
+0x129,0x129,0x129,0x129,0x129,0x129,0x10e6,0x10e6,0x10dd,0x10dd,0x10dd,0x10dd,0x10dd,0x10dd,0x10dd,0x10dd,
+0x10dd,0x10dd,0x129,0x129,0x129,0x129,0x129,0x129,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x10f8,
+0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10fe,0x1101,0x12c,0x12c,0x12c,0x12c,
+0x12c,0x12c,0x12c,0x12c,0x12c,0x12c,0x12c,0x10fb,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,
+0x1113,0x1107,0x1107,0x1107,0x1107,0x1107,0x1107,0x1110,0x1110,0x1107,0x1107,0x1110,0x1110,0x1107,0x1107,0x12f,
+0x12f,0x12f,0x12f,0x12f,0x12f,0x12f,0x12f,0x12f,0x1113,0x1113,0x1113,0x1107,0x1113,0x1113,0x1113,0x1113,
+0x1113,0x1113,0x1113,0x1113,0x1107,0x1110,0x12f,0x12f,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,
+0x110d,0x110d,0x12f,0x12f,0x110a,0x1116,0x1116,0x1116,0x1575,0x132,0x132,0x132,0x132,0x132,0x132,0x132,
+0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,
+0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,
+0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,
+0x111c,0x111c,0x111c,0x111c,0x111c,0x111f,0x135,0x135,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,
+0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,
+0x1122,0x1122,0x1122,0x1122,0x1122,0x138,0x138,0x138,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,
+0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,
+0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,
+0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,
+0x112b,0x112b,0x13e,0x13e,0x13e,0x13e,0x13e,0x1128,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,
+0x112e,0x112e,0x112e,0x112e,0x141,0x141,0x141,0x141,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,
+0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x144,0x144,0x144,0x144,
+0x144,0x144,0x144,0x144,0x144,0x144,0x144,0x144,0x11a9,0x11a9,0x11a9,0x11a9,0x11b2,0x11a9,0x11a9,0x11a9,
+0x11b2,0x11a9,0x11a9,0x11a9,0x11a9,0x11a6,0x147,0x147,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11b5,
+0x11af,0x11b5,0x11af,0x11af,0x11af,0x11b5,0x11b5,0x147,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,
+0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x14a,0x14a,
+0x14a,0x14a,0x14a,0x14a,0x14a,0x14a,0x14a,0x14a,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,
+0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d0,0x11bb,0x11d0,
+0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x14d,0x11c4,0x11cd,0x11bb,0x11cd,0x11cd,0x11bb,0x11bb,0x11bb,
+0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11d0,0x11d0,0x11d0,0x11d0,0x11d0,0x11d0,0x11bb,0x11bb,0x11c1,0x11c1,0x11c1,
+0x11c1,0x11c1,0x11c1,0x11c1,0x11c1,0x14d,0x14d,0x11be,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,
+0x11ca,0x11ca,0x14d,0x14d,0x14d,0x14d,0x14d,0x14d,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,
+0x11ca,0x11ca,0x14d,0x14d,0x14d,0x14d,0x14d,0x14d,0x11c7,0x11c7,0x11c7,0x11c7,0x11c7,0x11c7,0x11c7,0x11d6,
+0x11d9,0x11d9,0x11d9,0x11d9,0x11c7,0x11c7,0x14d,0x14d,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,
+0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15bd,0x1adf,0x1329,0x1302,0x1320,0x1320,0x1320,0x1320,0x1320,0x1320,
+0x1320,0x1308,0x1305,0x12fc,0x12fc,0x1326,0x12fc,0x12fc,0x12fc,0x12fc,0x130b,0x14eb,0x14f1,0x14ee,0x14ee,0x193b,
+0x1716,0x1716,0x1aac,0x150,0x150,0x150,0x150,0x150,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,
+0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11e5,0x11e5,0x11e8,0x11f1,0x11eb,0x11eb,0x11eb,0x11f1,
+0x153,0x153,0x153,0x153,0x153,0x153,0x153,0x153,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,
+0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,
+0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x159,0x159,0x159,0x120f,0x1203,0x1203,0x1203,0x1203,0x1203,0x1203,0x1206,
+0x1215,0x1215,0x1203,0x1203,0x1203,0x1203,0x15c,0x131a,0x1209,0x1209,0x1209,0x1209,0x1209,0x1209,0x1209,0x1209,
+0x1209,0x1209,0x15c,0x15c,0x15c,0x15c,0x1203,0x1203,0x1233,0x1227,0x1233,0x15f,0x15f,0x15f,0x15f,0x15f,
+0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,
+0x15f,0x15f,0x15f,0x1230,0x1230,0x1236,0x122a,0x122d,0x124b,0x124b,0x124b,0x1245,0x1245,0x123c,0x1245,0x1245,
+0x123c,0x1245,0x1245,0x124e,0x1248,0x123f,0x162,0x162,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,
+0x1242,0x1242,0x162,0x162,0x162,0x162,0x162,0x162,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x165,
+0x165,0x165,0x165,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,
+0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,
+0x165,0x165,0x165,0x165,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,
+0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x168,0x125a,0x1257,0x1257,0x1257,0x1257,
+0x1257,0x1257,0x1257,0x1257,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,
+0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x16b,0x16b,0x16b,0x1266,0x1269,0x1269,
+0x1269,0x1269,0x1269,0x1269,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,
+0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x16e,0x16e,0x126f,0x126f,0x126f,0x126f,
+0x126f,0x126f,0x126f,0x126f,0x1278,0x1278,0x1278,0x1278,0x1278,0x1278,0x1278,0x1278,0x1278,0x1278,0x1278,0x1278,
+0x1278,0x1278,0x1278,0x1278,0x1278,0x1278,0x1278,0x171,0x171,0x171,0x171,0x171,0x1275,0x1275,0x1275,0x1275,
+0x1275,0x1275,0x1275,0x1275,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,
+0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,
+0x127e,0x127e,0x127e,0x177,0x1299,0x1299,0x17a,0x17a,0x17a,0x17a,0x17a,0x17a,0x17a,0x17a,0x17a,0x17a,
+0x17a,0x197d,0x17a,0x17a,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,
+0x14ca,0x14ca,0x14ca,0x14ca,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,
+0x129f,0x129f,0x129f,0x17d,0x1a4f,0x1a4f,0x1a4f,0x1a4f,0x1a4f,0x1a4f,0x1a4f,0x1a52,0x1a4c,0x279,0x279,0x279,
+0x279,0x279,0x279,0x279,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,
+0x187e,0x1ad0,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,
+0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,
+0x180,0x180,0x180,0x180,0x180,0x180,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,
+0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,
+0x12f6,0x13f5,0x13f2,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,
+0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f3,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,
+0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f3,0x12f0,0x12f0,0x13f5,0x13f5,
+0x13f5,0x13f5,0x13f5,0x13f2,0x13f5,0x13f5,0x13f5,0x1881,0x183,0x183,0x183,0x183,0x12ed,0x12ed,0x12ed,0x12ed,
+0x12ed,0x12ed,0x12ed,0x12ed,0x12ed,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x1419,0x1419,0x183,0x183,
+0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x1920,0x1920,0x1920,0x1920,
+0x1920,0x1920,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,
+0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,
+0x183,0x183,0x183,0x183,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,
+0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x138f,0x138f,0x138f,
+0x186,0x186,0x1392,0x186,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x1398,0x13a1,0x139b,0x139b,0x13a1,0x13a1,
+0x13a1,0x139b,0x13a1,0x139b,0x139b,0x139b,0x13a4,0x13a4,0x189,0x189,0x189,0x189,0x189,0x189,0x189,0x189,
+0x139e,0x139e,0x139e,0x139e,0x18c,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x18c,0x18c,0x13aa,0x13aa,0x13aa,
+0x13aa,0x13aa,0x13aa,0x18c,0x18c,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x18c,0x18c,0x18c,0x18c,0x18c,
+0x18c,0x18c,0x18c,0x18c,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x18c,0x13aa,0x13aa,0x13aa,0x13aa,
+0x13aa,0x13aa,0x13aa,0x18c,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,
+0x161d,0x161d,0x161d,0x161d,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13b0,0x13c2,0x13c2,0x13b6,0x13b6,0x13b6,
+0x13b6,0x13b6,0x18f,0x18f,0x18f,0x18f,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,
+0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,
+0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x1584,
+0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,
+0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x192,0x192,0x192,0x192,0x192,0x192,0x192,
+0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x195,
+0x195,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x1587,
+0x195,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13fe,
+0x195,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,
+0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,
+0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x195,0x195,0x195,0x195,0x195,0x195,0x195,0x195,0x195,0x195,
+0x1413,0x1410,0x1410,0x1410,0x1410,0x1410,0x159c,0x159c,0x159c,0x159c,0x159c,0x159f,0x170d,0x159f,0x159f,0x159f,
+0x17d9,0x188a,0x188a,0x18c3,0x18c3,0x1a8e,0x1b39,0x1b39,0x198,0x198,0x198,0x198,0x198,0x198,0x198,0x198,
+0x159f,0x159f,0x159f,0x159f,0x159f,0x159f,0x159c,0x159c,0x159c,0x159f,0x159c,0x170a,0x170a,0x198,0x198,0x198,
+0x159f,0x159c,0x159c,0x159f,0x188a,0x188a,0x188a,0x1926,0x1926,0x1a07,0x1a8e,0x1b39,0x1b39,0x198,0x198,0x198,
+0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,
+0x13cb,0x13cb,0x13cb,0x13cb,0x19b,0x19b,0x19b,0x19b,0x19b,0x19b,0x19b,0x19b,0x19b,0x19b,0x19b,0x19b,
+0x1467,0x15a5,0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x15a5,0x15a5,0x15a5,
+0x15a5,0x15a5,0x15a5,0x175e,0x175e,0x19e,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1ad3,0x1ad3,
+0x1ad3,0x1ad3,0x1ad3,0x1ad3,0x1ad3,0x1ad3,0x1ad3,0x1ad3,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,
+0x19e,0x19e,0x19e,0x1983,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,
+0x146d,0x146d,0x146d,0x146d,0x1a1,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,
+0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,
+0x1a1,0x146d,0x146d,0x1a1,0x146d,0x1a1,0x1a1,0x146d,0x1a1,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,
+0x146d,0x146d,0x146d,0x1a1,0x146d,0x146d,0x146d,0x146d,0x1a1,0x146d,0x1a1,0x146d,0x1a1,0x1a1,0x1a1,0x1a1,
+0x1a1,0x1a1,0x146d,0x1a1,0x1a1,0x1a1,0x1a1,0x146d,0x1a1,0x146d,0x1a1,0x146d,0x1a1,0x146d,0x146d,0x146d,
+0x1a1,0x146d,0x146d,0x1a1,0x146d,0x1a1,0x1a1,0x146d,0x1a1,0x146d,0x1a1,0x146d,0x1a1,0x146d,0x1a1,0x146d,
+0x1a1,0x146d,0x146d,0x1a1,0x146d,0x1a1,0x1a1,0x146d,0x146d,0x146d,0x146d,0x1a1,0x146d,0x146d,0x146d,0x146d,
+0x146d,0x146d,0x146d,0x1a1,0x146d,0x146d,0x146d,0x146d,0x1a1,0x146d,0x146d,0x146d,0x146d,0x1a1,0x146d,0x1a1,
+0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x1a1,0x146d,0x146d,0x146d,0x146d,0x146d,
+0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x1a1,0x1a1,0x1a1,0x1a1,
+0x1a1,0x146d,0x146d,0x146d,0x1a1,0x146d,0x146d,0x146d,0x146d,0x146d,0x1a1,0x146d,0x146d,0x146d,0x146d,0x146d,
+0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x1a1,0x1a1,0x1a1,0x1a1,
+0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,
+0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x146a,0x146a,0x1a1,0x1a1,
+0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1482,0x1482,0x1482,0x1482,
+0x1482,0x1482,0x1482,0x1470,0x1470,0x1470,0x1470,0x1470,0x147f,0x1470,0x1473,0x1473,0x1470,0x1470,0x1470,0x1476,
+0x1476,0x1a4,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x1479,0x1485,0x1485,0x1485,
+0x1989,0x1986,0x1986,0x1ad6,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x162f,0x162f,0x162f,0x162f,
+0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x1491,0x1491,0x1491,0x1491,
+0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x148e,0x1488,0x1488,0x148e,0x148e,0x1497,0x1497,0x1491,0x1494,
+0x1494,0x148e,0x148b,0x1a7,0x1a7,0x1a7,0x1a7,0x1a7,0x1a7,0x1a7,0x1a7,0x1a7,0x149a,0x149a,0x149a,0x149a,
+0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,
+0x149a,0x149a,0x149a,0x149a,0x1aa,0x1aa,0x1aa,0x1aa,0x1761,0x1761,0x149a,0x149a,0x1761,0x1761,0x1761,0x1761,
+0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1aa,0x1aa,0x1761,0x1761,
+0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x14a6,0x14a6,0x14a6,0x14a6,
+0x14a6,0x1a37,0x1a37,0x1a37,0x1a37,0x1a37,0x1a37,0x1ad,0x1ad,0x1ad,0x1ad,0x1a31,0x14a6,0x14a3,0x14a3,0x14a3,
+0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x1a34,0x1a34,0x1a34,0x1a34,
+0x1a34,0x1a34,0x1a34,0x1a34,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x14a0,0x14a0,0x14a0,0x14a0,0x14a9,
+0x14a9,0x14a9,0x14a9,0x14a9,0x14a9,0x14a9,0x14a9,0x14a9,0x14a9,0x14a9,0x14a9,0x14a9,0x14ca,0x14ca,0x14ca,0x14ca,
+0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x14c7,0x14c7,0x14c7,0x14c7,
+0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x14cd,0x14cd,0x14cd,0x14cd,
+0x14cd,0x14cd,0x14cd,0x14cd,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1323,0x1320,0x1323,0x12ff,
+0x1320,0x1326,0x1326,0x1329,0x1326,0x1329,0x132c,0x1320,0x1329,0x1329,0x1320,0x1320,0x14df,0x14df,0x14df,0x14df,
+0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14d0,0x14d9,0x14d0,0x14d9,0x14d9,0x14d0,0x14d0,0x14d0,0x14d0,
+0x14d0,0x14d0,0x14dc,0x14d3,0x1a3a,0x1b6,0x1b6,0x1b6,0x1b6,0x1b6,0x1b6,0x1b6,0x15b1,0x15b1,0x15b1,0x15b1,
+0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x1b9,0x1b9,0x15ae,0x15ae,0x15ae,0x15ae,
+0x15ae,0x15b4,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1719,0x1710,0x1710,0x1710,
+0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,
+0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1bf,0x1bf,0x1bf,0x1bf,0x1adf,0x1c2,0x1c2,0x1c2,
+0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,
+0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,
+0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,
+0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x1c5,0x1c5,0x1c5,
+0x1c5,0x1c5,0x1c5,0x1c5,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x1c5,0x1c5,
+0x15c9,0x15c3,0x15c6,0x15cf,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x1c8,0x1c8,0x1c8,0x1c8,
+0x1c8,0x1c8,0x1c8,0x1c8,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,
+0x15ba,0x15ba,0x15ba,0x15ba,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,
+0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x198c,0x198c,0x198c,0x198c,0x1cb,0x1cb,0x1cb,
+0x1cb,0x1cb,0x1cb,0x1cb,0x1a91,0x1a91,0x1a91,0x1a91,0x1a91,0x1a91,0x1a91,0x1a91,0x1a91,0x1a91,0x1a91,0x1a91,
+0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,
+0x1cb,0x1cb,0x1cb,0x1cb,0x1776,0x171c,0x15de,0x1722,0x1ce,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,
+0x15e7,0x1ce,0x1ce,0x15e7,0x15e7,0x1ce,0x1ce,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,
+0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x1ce,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x1ce,0x15e7,0x15e7,
+0x1ce,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x1ce,0x1a16,0x171f,0x15e7,0x15d8,0x15de,0x15d8,0x15de,0x15de,0x15de,
+0x15de,0x1ce,0x1ce,0x15de,0x15de,0x1ce,0x1ce,0x15e1,0x15e1,0x15e4,0x1ce,0x1ce,0x1779,0x1ce,0x1ce,0x1ce,
+0x1ce,0x1ce,0x1ce,0x15d8,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x15ea,0x15e7,0x15e7,0x15e7,0x15e7,0x15de,0x15de,
+0x1ce,0x1ce,0x15db,0x15db,0x15db,0x15db,0x15db,0x15db,0x15db,0x1ce,0x1ce,0x1ce,0x15db,0x15db,0x15db,0x15db,
+0x15db,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x15ff,0x15ff,0x15ff,0x15ff,
+0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x1d1,0x15ff,
+0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15f9,0x15f9,0x15f9,0x15ed,
+0x15ed,0x15ed,0x15f9,0x15f9,0x15ed,0x15fc,0x15f0,0x15ed,0x1602,0x1602,0x15f6,0x1602,0x1602,0x15f3,0x180c,0x1d1,
+0x1611,0x1611,0x1611,0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1608,0x160b,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,
+0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,
+0x177c,0x177c,0x177c,0x177c,0x161d,0x161a,0x1a3d,0x1a3d,0x1ae5,0x1ae8,0x1ae2,0x1ae2,0x1d7,0x1d7,0x1d7,0x1d7,
+0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,
+0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,
+0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,
+0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,
+0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,
+0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,
+0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,
+0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,
+0x162f,0x162f,0x162f,0x1626,0x1629,0x162c,0x162f,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,
+0x163e,0x163e,0x163e,0x163e,0x163e,0x1632,0x1632,0x1e0,0x1e0,0x1e0,0x1e0,0x1635,0x1635,0x1635,0x1635,0x1635,
+0x163b,0x163b,0x163b,0x163b,0x163b,0x163b,0x1638,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,
+0x1647,0x1647,0x1647,0x1647,0x1647,0x1e3,0x1e3,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,
+0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1e3,0x1e3,0x1e3,0x1e3,0x1e3,0x1e3,0x1e3,0x1e3,0x1e3,
+0x164a,0x165c,0x165c,0x1650,0x1659,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,
+0x1653,0x1653,0x1653,0x1653,0x1653,0x1653,0x1653,0x1653,0x1653,0x1653,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,
+0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,
+0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1e9,
+0x166e,0x166e,0x166e,0x166e,0x166e,0x1668,0x1671,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,
+0x166b,0x166b,0x166b,0x166b,0x166b,0x166b,0x166b,0x166b,0x166b,0x166b,0x166e,0x166e,0x166e,0x166e,0x166e,0x1ec,
+0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,
+0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1ef,
+0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,
+0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1680,0x1680,0x1680,0x1680,0x1680,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,
+0x169b,0x169b,0x169e,0x169e,0x16a1,0x1692,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,
+0x1698,0x1698,0x1698,0x1698,0x1698,0x1698,0x1698,0x1698,0x1698,0x1698,0x1f5,0x1692,0x1692,0x1692,0x1692,0x1692,
+0x1692,0x1692,0x1f5,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,
+0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x169b,0x169b,0x169b,
+0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,
+0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,
+0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,
+0x16b3,0x16b3,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x16b0,0x16b0,0x16b0,0x16b0,0x1fb,0x1fb,0x1fb,
+0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16b6,
+0x16c8,0x16c8,0x16b6,0x16b6,0x16b6,0x16b6,0x201,0x201,0x16c8,0x16c8,0x16cb,0x16cb,0x16b6,0x16b6,0x16c8,0x16bc,
+0x16b9,0x16bf,0x16d1,0x16d1,0x16c2,0x16c2,0x16c5,0x16c5,0x16c5,0x16d1,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,
+0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1782,0x1782,0x1782,0x1782,0x177f,0x177f,0x201,0x201,
+0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,
+0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,
+0x204,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,
+0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,
+0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x207,0x207,0x207,0x207,
+0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,
+0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,
+0x16d7,0x16d7,0x207,0x207,0x207,0x207,0x207,0x207,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,
+0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,
+0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x207,0x207,0x1aeb,0x1aeb,0x207,0x207,
+0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,
+0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,
+0x16da,0x16e9,0x16e0,0x16dd,0x16ef,0x16ef,0x16e3,0x16ef,0x20a,0x20a,0x20a,0x20a,0x20a,0x20a,0x20a,0x20a,
+0x16e6,0x16e6,0x16e6,0x16e6,0x16e6,0x16e6,0x16e6,0x16e6,0x16e6,0x16e6,0x20a,0x20a,0x20a,0x20a,0x20a,0x20a,
+0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f2,0x16f2,0x16f2,0x16f2,0x16f2,0x16f2,
+0x16f2,0x16f2,0x16f2,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x16fb,
+0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,
+0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x198f,0x210,0x210,0x1788,0x1788,0x1788,
+0x1794,0x1794,0x1788,0x1788,0x1788,0x1788,0x1794,0x1788,0x1788,0x1788,0x1788,0x178b,0x210,0x210,0x210,0x210,
+0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x178e,0x178e,0x179a,0x179a,0x179a,0x178e,
+0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,
+0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,
+0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,
+0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x219,0x17af,0x17af,0x219,0x219,
+0x219,0x219,0x219,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x21c,
+0x17b2,0x21c,0x17b2,0x17b2,0x17b2,0x17b2,0x21c,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,
+0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x21c,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,
+0x17b2,0x17b5,0x21c,0x21c,0x21c,0x21c,0x21c,0x21c,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,
+0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,
+0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x21f,0x21f,0x21f,0x21f,0x21f,
+0x21f,0x21f,0x21f,0x21f,0x21f,0x21f,0x21f,0x21f,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,
+0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x21f,0x21f,0x21f,0x21f,0x21f,
+0x21f,0x21f,0x17b8,0x17b8,0x17b8,0x17b8,0x17b8,0x17b8,0x1929,0x1929,0x1929,0x1929,0x1929,0x1929,0x1929,0x1929,
+0x1929,0x1929,0x1929,0x1929,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a94,0x1b3c,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1b3f,
+0x1b3c,0x222,0x1a0a,0x1a94,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x17df,0x1a0a,0x1a0a,0x1a94,0x1a94,0x1a94,0x1a94,0x1a94,
+0x1a94,0x1a94,0x1a94,0x1b3c,0x222,0x1a97,0x1a97,0x1a97,0x1929,0x192c,0x192c,0x192c,0x192c,0x192c,0x192c,0x192c,
+0x192c,0x192c,0x192c,0x192c,0x192c,0x192c,0x1929,0x1929,0x17c4,0x17c4,0x17c4,0x17c4,0x17c1,0x17c4,0x17c4,0x17c7,
+0x17ca,0x17c7,0x17c7,0x17c4,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,
+0x225,0x225,0x225,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x1821,0x1821,0x1821,0x1821,0x1818,0x1818,0x1818,0x1812,
+0x1815,0x1815,0x1815,0x1a40,0x228,0x228,0x228,0x228,0x181e,0x181e,0x181e,0x181e,0x181e,0x181e,0x181e,0x181e,
+0x181e,0x181e,0x228,0x228,0x228,0x228,0x181b,0x181b,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,
+0x183c,0x22b,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,
+0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x1839,0x1827,0x1827,0x1827,0x1827,
+0x1827,0x1827,0x1827,0x22b,0x1827,0x1827,0x1827,0x1827,0x1827,0x1827,0x1839,0x182a,0x183c,0x183f,0x183f,0x1833,
+0x1830,0x1830,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x1836,0x1836,0x1836,0x1836,
+0x1836,0x1836,0x1836,0x1836,0x1836,0x1836,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,
+0x182d,0x182d,0x182d,0x182d,0x182d,0x22b,0x22b,0x22b,0x184b,0x184e,0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,
+0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,
+0x1842,0x22e,0x22e,0x22e,0x22e,0x22e,0x22e,0x22e,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,
+0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x231,
+0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,
+0x1845,0x231,0x231,0x1845,0x1845,0x1845,0x1845,0x1845,0x1893,0x192f,0x1a9a,0x1a9d,0x1b45,0x234,0x234,0x234,
+0x234,0x234,0x234,0x234,0x234,0x234,0x234,0x234,0x1b42,0x1b42,0x234,0x234,0x234,0x234,0x234,0x234,
+0x234,0x234,0x234,0x234,0x234,0x234,0x234,0x234,0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,
+0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x237,0x237,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,
+0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x237,0x1851,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,
+0x1848,0x1851,0x1848,0x1848,0x1851,0x1848,0x1848,0x237,0x237,0x237,0x237,0x237,0x237,0x237,0x237,0x237,
+0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x23a,0x23a,0x23a,
+0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,
+0x186f,0x186f,0x1860,0x185a,0x185a,0x186f,0x185d,0x1872,0x1872,0x1872,0x1872,0x1875,0x1875,0x1869,0x1866,0x1863,
+0x186c,0x186c,0x186c,0x186c,0x186c,0x186c,0x186c,0x186c,0x186c,0x186c,0x1aee,0x1869,0x23d,0x1863,0x1992,0x1a43,
+0x1af1,0x1af1,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,
+0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,
+0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,
+0x187b,0x187b,0x187b,0x187b,0x240,0x240,0x240,0x240,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,
+0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,
+0x1878,0x1878,0x1878,0x1878,0x240,0x240,0x240,0x240,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,
+0x1896,0x1896,0x1896,0x1896,0x1896,0x1a13,0x1a13,0x1a13,0x1a13,0x1a13,0x1aa0,0x1aa0,0x1aa0,0x1aa0,0x1aa0,0x1aa0,
+0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,
+0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,
+0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x246,0x246,0x246,0x246,0x246,0x246,0x246,0x246,0x246,
+0x246,0x246,0x246,0x246,0x246,0x246,0x246,0x246,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,
+0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x249,
+0x18d5,0x18d5,0x249,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,
+0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x249,
+0x249,0x249,0x18c9,0x249,0x18c9,0x18c9,0x249,0x18c9,0x18c9,0x18c9,0x18cc,0x18c9,0x18cf,0x18cf,0x18d8,0x18c9,
+0x249,0x249,0x249,0x249,0x249,0x249,0x249,0x249,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,
+0x18d2,0x18d2,0x249,0x249,0x249,0x249,0x249,0x249,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,
+0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,
+0x1938,0x1938,0x1938,0x1938,0x24c,0x24c,0x24c,0x24c,0x1905,0x1908,0x1917,0x1917,0x1908,0x190b,0x1905,0x1902,
+0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x18f0,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18ed,
+0x18ed,0x18db,0x18db,0x18db,0x18f0,0x18f0,0x18f0,0x18f0,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,
+0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x258,0x258,0x258,0x258,
+0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x1998,0x1998,0x1998,0x1998,0x1998,0x1998,0x1998,0x1998,
+0x1998,0x1998,0x1998,0x1998,0x1998,0x1998,0x258,0x258,0x1aa9,0x1aa9,0x1aa9,0x1aa9,0x1b4b,0x28b,0x28b,0x28b,
+0x1aa9,0x1aa9,0x1aa9,0x28b,0x28b,0x28b,0x28b,0x28b,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,
+0x19aa,0x19aa,0x19aa,0x19aa,0x19a7,0x19a7,0x19a7,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,
+0x19a7,0x19a1,0x199e,0x19a4,0x25b,0x25b,0x25b,0x25b,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,
+0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,
+0x19ad,0x19ad,0x19ad,0x25e,0x25e,0x19ad,0x19ad,0x19ad,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x261,0x19bc,
+0x19bc,0x261,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,
+0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19b9,0x19b9,0x19b9,0x19b9,0x19b9,0x261,
+0x19b0,0x19b0,0x261,0x19b9,0x19b9,0x19b0,0x19b9,0x19b3,0x19bc,0x261,0x261,0x261,0x261,0x261,0x261,0x261,
+0x19c5,0x19c5,0x19c8,0x19c8,0x19bf,0x19bf,0x19bf,0x19bf,0x264,0x264,0x264,0x264,0x264,0x264,0x264,0x264,
+0x19c2,0x19c2,0x19c2,0x19c2,0x19c2,0x19c2,0x19c2,0x19c2,0x19c2,0x19c2,0x264,0x264,0x264,0x264,0x264,0x264,
+0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19ce,0x19cb,0x19cb,0x19cb,
+0x19ce,0x19cb,0x19cb,0x19cb,0x19cb,0x267,0x267,0x267,0x267,0x267,0x267,0x267,0x267,0x267,0x267,0x267,
+0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,
+0x19d7,0x19d7,0x19d7,0x19d1,0x19d1,0x19d4,0x19d4,0x19da,0x19da,0x26a,0x26a,0x26a,0x26a,0x26a,0x26a,0x26a,
+0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,
+0x19dd,0x19dd,0x19dd,0x19dd,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,
+0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,
+0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e3,0x19ec,0x19e0,0x19e0,0x270,0x270,0x270,0x270,0x270,
+0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19f2,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,
+0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,
+0x19fb,0x19fb,0x19f5,0x19f5,0x19f5,0x19f5,0x19f5,0x19f5,0x19f5,0x19f5,0x19f5,0x19f5,0x19f5,0x19f8,0x19f8,0x19f8,
+0x19f8,0x19fe,0x19fe,0x19fe,0x19fe,0x19fe,0x276,0x276,0x276,0x276,0x276,0x276,0x1a55,0x1a55,0x1a55,0x1a55,
+0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,
+0x1a55,0x1a55,0x1a55,0x27c,0x27c,0x27c,0x27c,0x27c,0x27c,0x27c,0x27c,0x27c,0x1a64,0x1a64,0x1a64,0x1a64,
+0x1a64,0x1a64,0x1a64,0x1a64,0x27f,0x27f,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,
+0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a61,0x1a61,0x1a61,
+0x1a58,0x1a58,0x1a58,0x1a58,0x27f,0x27f,0x1a58,0x1a58,0x1a61,0x1a61,0x1a61,0x1a61,0x1a5b,0x1a64,0x1a5e,0x1a64,
+0x1a61,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,
+0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x1a70,0x1a70,0x1a70,0x1a70,
+0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x282,0x282,0x282,0x1a67,0x1a67,0x1a67,0x1a67,
+0x1a67,0x1a67,0x1a67,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a73,0x1a73,0x282,0x282,0x285,0x1a76,0x1a76,0x1a76,
+0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,
+0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x285,0x285,
+0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,
+0x1aa3,0x1aa3,0x1aa3,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,
+0x1aa6,0x1aa6,0x1aa6,0x1aa6,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x1938,0x1938,0x1938,0x1938,
+0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1aa9,0x1aa9,0x1aa9,0x1b4b,
+0x1b4b,0x1b4b,0x1b4b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x1aa9,0x1aa9,0x1aa9,0x1aa9,
+0x1aa9,0x1aa9,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x28b,0x28b,0x28b,
+0x28b,0x28b,0x28b,0x28b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x28b,0x28b,0x28b,0x28b,0x28b,
+0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x28b,
+0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,
+0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,
+0x1a7f,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,
+0x1a79,0x1a79,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x1a7c,
+0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a82,0x1a82,0x1a82,0x1a82,
+0x1a88,0x1a88,0x1a88,0x1a88,0x1a88,0x1a88,0x1a88,0x1a88,0x1a88,0x1a88,0x291,0x291,0x291,0x291,0x291,0x1a85,
+0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x294,0x294,0x294,0x294,
+0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,
+0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x297,0x297,0x1b12,0x297,0x297,0x1b12,0x1b12,0x1b12,0x1b12,
+0x1b12,0x1b12,0x1b12,0x1b12,0x297,0x1b12,0x1b12,0x297,0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,
+0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x1afa,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x297,0x1b09,
+0x1b0c,0x297,0x297,0x1afa,0x1afa,0x1b0f,0x1b00,0x1b15,0x1b09,0x1b15,0x1b09,0x1afd,0x1b18,0x1b03,0x1b18,0x297,
+0x297,0x297,0x297,0x297,0x297,0x297,0x297,0x297,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,
+0x1b06,0x1b06,0x297,0x297,0x297,0x297,0x297,0x297,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,
+0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x29a,0x29a,
+0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,
+0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,
+0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,
+0x1b1e,0x1b1e,0x1b1e,0x2a0,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,
+0x1b1e,0x1b1e,0x1b1e,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,
+0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x1b21,0x1b21,0x1b21,0x1b21,0x1b21,0x1b21,0x1b21,0x1b21,
+0x1b21,0x1b21,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x1b54,0x1b54,0x1b54,0x1b54,0x1b54,0x1b54,0x1b54,0x1b54,
+0x1b54,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,
+0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,
+0x2a3,0x2a3,0x2a3,0x2a3,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x2a6,0x1b24,
+0x1b24,0x1b27,0x2a6,0x2a6,0x1b2a,0x1b2a,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,
+0x2a6,0x2a6,0x2a6,0x2a6,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,
+0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,
+0x2a9,0x2a9,0x2a9,0x2a9,0x1932,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,
+0x2c1,0x2c1,0x2c1,0x2c1,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,
+0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,
+0x2a9,0x2a9,0x97b,0x97b,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x2c4,
+0x2c4,0x2c4,0x2c4,0x2c4,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,
+0x2a9,0x2a9,0x2a9,0x2a9,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x1a04,
+0x1a04,0x1a04,0x1a04,0x1a04,0x1b33,0x1b33,0x1b33,0x1b33,0x1b33,0x1b33,0x1b33,0x1b33,0x1b33,0x1b33,0x1b33,0x1b33,
+0x1b33,0x2ac,0x2ac,0x2ac,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0x12e4,
+0x12e4,0x12e4,0x2af,0x2af,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,
+0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0x2af,0x2af,
+0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,
+0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,
+0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,
+0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0x1b36,0x1b36,0x1b36,0x1b36,0x1b36,0x1b36,0x1b36,0x2b2,0x2b2,
+0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,
+0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0x2b5,0x2b5,
+0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,
+0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,
+0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,
+0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x2bb,0x2bb,
+0x17dc,0x17dc,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,
+0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,
+0x3cc,0x3c0,0x3c0,0x3c0,0x3c0,0x3c0,0x3c0,0x3c0,0x3c0,0x3cc,0x3cc,0x3cc,0x3cc,0x3c6,0x1158,0x133e,
+0x3cf,0x945,0x948,0x3bd,0x3bd,0x1155,0x133b,0x133b,0x3d2,0x3d2,0x3d2,0x3d2,0x3d2,0x3d2,0x3d2,0x3d2,
+0x1155,0x3c0,0x3c0,0x3cc,0xce1,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,
+0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,
+0x3cf,0x3cf,0x3c0,0x3c0,0x8d0,0x8d3,0x963,0x963,0x963,0x963,0x963,0x963,0x963,0x963,0x963,0x963,
+0x3c9,0xfba,0xfb7,0x1341,0x1341,0x1341,0x1341,0x1341,0x1506,0x115b,0x115b,0xf0c,0xf0c,0xdda,0xf0c,0xf0c,
+0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3d2,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,
+0x3cf,0x3d2,0x3cf,0x3cf,0x3d2,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x133b,0x133e,0x3c3,0x3cf,0x3cc,0x3cc,
+0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x1347,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,
+0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x1347,0x18ae,0x18ae,0xfd8,0x45f,0x468,
+0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,
+0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0xbc7,0xbc7,0xde6,0xde6,0x8d6,0xde9,0x1428,0x1428,0x1428,
+0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,
+0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,
+0x4b3,0x4b3,0x4b3,0x1170,0x1170,0x1170,0x1170,0x1170,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,
+0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,
+0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x116d,0x116d,0x116d,0x116d,0x116d,0x116d,
+0x4b6,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,
+0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,
+0x4b3,0x4b3,0x4b3,0x4b3,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,
+0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,
+0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4b9,0x4b9,0x4b9,0x4b9,0x4bc,0x9bd,0x1005,0x1005,0x1008,0x1005,
+0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,
+0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x1008,0x1005,0x1008,0x1005,0x1008,0x1005,
+0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,
+0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,
+0x6ae,0x6ae,0x6b1,0x4e9,0x6bd,0x6ba,0x6ba,0x6b7,0x513,0x513,0x4d1,0x4d1,0x4d1,0x4d1,0x4d1,0xb58,
+0x6c0,0x4f5,0x6d8,0x6db,0x50a,0x6c0,0x4f8,0x4f8,0x4e9,0x504,0x504,0x6ae,0x510,0x50d,0x6b4,0x4e3,
+0x4da,0x4da,0x4dd,0x4dd,0x4dd,0x4dd,0x4dd,0x4e0,0x4dd,0x4dd,0x4dd,0x4d4,0x51c,0x519,0x516,0x516,
+0x6cc,0x4fe,0x4fb,0x6c9,0x6c6,0x6c3,0x6d5,0x4ec,0x6d2,0x6d2,0x501,0x504,0x6cf,0x6cf,0x501,0x504,
+0x4e6,0x4e9,0x4e9,0x4e9,0x507,0x4f2,0x4ef,0xbdc,0xaf2,0xaf5,0xaef,0xaef,0xaef,0xaef,0xbd3,0xbd3,
+0xbd3,0xbd3,0xbd9,0xd0e,0xd0b,0xdf5,0xdf8,0xbd6,0xdf8,0xdf8,0xdf8,0xdf8,0xdf5,0xdf8,0xdf8,0xbd0,
+0x540,0x540,0x540,0x540,0x540,0x540,0x540,0x53d,0x543,0x75c,0x540,0x9c0,0x9e1,0xaf8,0xaf8,0xaf8,
+0xbe2,0xbe2,0xdfe,0xdfe,0xdfe,0xdfe,0x1179,0x117c,0x117c,0x135c,0x14f4,0x151e,0x1521,0x1521,0x1734,0x18b1,
+0x54f,0x54f,0x567,0x6ea,0x54c,0x6e7,0x54f,0x564,0x54c,0x6ea,0x55e,0x567,0x567,0x567,0x55e,0x55e,
+0x567,0x567,0x567,0x6f3,0x54c,0x567,0x6ed,0x54c,0x55b,0x567,0x567,0x567,0x567,0x567,0x54c,0x54c,
+0x552,0x6e7,0x6f0,0x54c,0x567,0x54c,0x6f6,0x54c,0x567,0x555,0x56d,0x6f9,0x567,0x567,0x558,0x55e,
+0x567,0x567,0x56a,0x567,0x55e,0x561,0x561,0x561,0x561,0xb04,0xb01,0xd11,0xe07,0xbf7,0xbfa,0xbfa,
+0xbf4,0xbf1,0xbf1,0xbf1,0xbf1,0xbfa,0xbf7,0xbf7,0xbf7,0xbf7,0xbee,0xbf1,0xe04,0xf18,0xf1b,0x100e,
+0x117f,0x117f,0x117f,0x6ff,0x6fc,0x570,0x573,0x573,0x573,0x573,0x573,0x6fc,0x6ff,0x6ff,0x6fc,0x573,
+0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x57c,0x57c,0x57c,0x57c,
+0x702,0x702,0x702,0x702,0x702,0x702,0x702,0x702,0x702,0x702,0x576,0x576,0x576,0x576,0x576,0x576,
+0x582,0x582,0x582,0x582,0x582,0x582,0x582,0x582,0x57f,0x588,0x588,0x582,0x582,0x582,0x585,0x57f,
+0x582,0x582,0x57f,0x57f,0x57f,0x57f,0x582,0x582,0x708,0x708,0x57f,0x57f,0x582,0x582,0x582,0x582,
+0x582,0x582,0x582,0x582,0x582,0x582,0x582,0x582,0x582,0x585,0x585,0x585,0x582,0x582,0x70b,0x582,
+0x70b,0x582,0x582,0x582,0x582,0x582,0x582,0x582,0x57f,0x582,0x57f,0x57f,0x57f,0x57f,0x57f,0x57f,
+0x582,0x582,0x57f,0x708,0x57f,0x57f,0x57f,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,
+0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0x711,0x58b,0x711,0x711,
+0x58e,0x58b,0x58b,0x711,0x711,0x58e,0x58b,0x711,0x58e,0x58b,0x58b,0x711,0x58b,0x711,0x59a,0x597,
+0x58b,0x711,0x58b,0x58b,0x58b,0x58b,0x711,0x58b,0x58b,0x711,0x711,0x711,0x711,0x58b,0x58b,0x711,
+0x58e,0x711,0x58e,0x711,0x711,0x711,0x711,0x711,0x717,0x591,0x711,0x591,0x591,0x58b,0x58b,0x58b,
+0x711,0x711,0x711,0x711,0x58b,0x58b,0x58b,0x58b,0x711,0x711,0x58b,0x58b,0x58b,0x58e,0x58b,0x58b,
+0x58e,0x58b,0x58b,0x58e,0x711,0x58e,0x58b,0x58b,0x711,0x58b,0x58b,0x58b,0x58b,0x58b,0x711,0x58b,
+0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x714,0x711,0x58e,0x58b,
+0x711,0x711,0x711,0x711,0x58b,0x58b,0x711,0x711,0x58b,0x58e,0x714,0x714,0x58e,0x58e,0x58b,0x58b,
+0x58e,0x58e,0x58b,0x58b,0x58e,0x58e,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58e,0x58e,0x711,0x711,
+0x58e,0x58e,0x711,0x711,0x58e,0x58e,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,
+0x58b,0x711,0x58b,0x58b,0x58b,0x711,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x711,0x58b,0x58b,
+0x58b,0x58b,0x58b,0x58b,0x58e,0x58e,0x58e,0x58e,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,
+0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x711,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,
+0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,
+0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58e,0x58e,0x58e,0x58e,0x58b,0x58b,0x58b,0x58b,
+0x58b,0x58b,0x58e,0x58e,0x58e,0x58e,0x58b,0x594,0x58b,0x58b,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,
+0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0x59d,0xb0d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,
+0x5a9,0x5a6,0x5a9,0x5a6,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x71a,0x59d,0x59d,0x59d,0x59d,0x59d,
+0x59d,0x59d,0x81f,0x81f,0x59d,0x59d,0x59d,0x59d,0x5a3,0x5a3,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,
+0x5a0,0x825,0x822,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,
+0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,
+0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0xb0d,0xc06,0xb0d,0xb0d,0xb0d,0x5ac,0x5ac,0x5ac,0x5ac,
+0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,
+0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x723,0x723,0x723,0x723,
+0x723,0x723,0x723,0x723,0x723,0x723,0x5b2,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,
+0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xd89,0x72c,0x72c,0x72c,0x72c,
+0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,
+0x5b5,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x72c,0x72c,0x72c,0x72c,
+0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x5b8,0x5b8,0x5b8,0x5b8,0x72c,0x72c,0x72c,0x72c,
+0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72f,0x72f,0x72f,0x72f,
+0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x5bb,0x5bb,0x72f,0x72f,
+0x72f,0x72f,0xc09,0xc09,0xc09,0xc09,0xc09,0xc09,0xc09,0xc09,0xc09,0xc09,0x735,0x735,0x5be,0x732,
+0x732,0x732,0x732,0x732,0x732,0x732,0x5c1,0x5c1,0x5be,0x5be,0x5c4,0x5c4,0x5c4,0x5c4,0x735,0x735,
+0x5c4,0x5c4,0x738,0x735,0x5be,0x5be,0x5be,0x5be,0x735,0x735,0x5c4,0x5c4,0x738,0x735,0x5be,0x5be,
+0x5be,0x5be,0x735,0x735,0x732,0x5be,0x5c4,0x735,0x5be,0x5be,0x732,0x735,0x735,0x735,0x5c4,0x5c4,
+0x5be,0x5be,0x5be,0x5be,0x5be,0x5be,0x5be,0x5be,0x5be,0x5be,0x5be,0x5be,0x5be,0x5be,0x735,0x732,
+0x735,0x732,0x5be,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5be,0x5be,0x732,0xb16,0xb16,0xb16,0xb16,
+0xb16,0xb16,0xb16,0xb16,0xc0c,0xc0c,0xc0c,0xc0f,0xc0f,0xc8a,0xc8a,0xc0c,0x5d3,0x5d3,0x5d3,0x5d3,
+0x5d0,0x74a,0x747,0x5ca,0x5ca,0x73b,0x5ca,0x5ca,0x5ca,0x5ca,0x741,0x73b,0x5ca,0x5d0,0x5ca,0x5c7,
+0xd92,0xd92,0xc15,0xc15,0xe13,0xb19,0x5cd,0x5cd,0x73e,0x5d6,0x73e,0x5cd,0x5d0,0x5ca,0x5d0,0x5d0,
+0x5ca,0x5ca,0x5d0,0x5ca,0x5ca,0x5ca,0x5d0,0x5ca,0x5ca,0x5ca,0x5d0,0x5d0,0x5ca,0x5ca,0x5ca,0x5ca,
+0x5ca,0x5ca,0x5ca,0x5ca,0x5d0,0x5d3,0x5d3,0x5cd,0x5ca,0x5ca,0x5ca,0x5ca,0x74d,0x5ca,0x74d,0x5ca,
+0x5ca,0x5ca,0x5ca,0x5ca,0x828,0x828,0x828,0x828,0x828,0x828,0x828,0x828,0x828,0x828,0x828,0x828,
+0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5d0,0x74d,0x74a,0x5d9,0x74d,
+0x73b,0x741,0x5d0,0x73b,0x744,0x73b,0x73b,0x5ca,0x73b,0x74a,0x5d9,0x74a,0xb19,0xb19,0xc18,0xc18,
+0xc18,0xc18,0xc18,0xc18,0xc18,0xc18,0xc18,0xc1b,0xc18,0xc18,0xe10,0xec7,0x5dc,0x5dc,0x5dc,0x5dc,
+0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,
+0x5df,0x13e6,0x13e6,0x13e6,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x152a,0x5eb,0x5f4,0x5eb,
+0x5eb,0x13e6,0x5df,0x5df,0x5f4,0x5f4,0x13e9,0x13e9,0x5f7,0x5f7,0x5e8,0x5ee,0x5e8,0x5e8,0x5ee,0x5df,
+0x5ee,0x5df,0x5ee,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5ee,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,
+0x13e6,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5ee,0x5ee,0x5df,0x5df,0x5df,
+0x5df,0x5df,0x5df,0x5df,0x5df,0x753,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5ee,0x5df,0x5df,0x5ee,
+0x5df,0x5df,0x5df,0x5df,0x13e6,0x5df,0x13e6,0x5df,0x5df,0x5df,0x5df,0x13e6,0x13e6,0x13e6,0x5df,0x12de,
+0x5df,0x5df,0x5df,0x5e5,0x5e5,0x5e5,0x5e5,0x1368,0x1368,0x5df,0x5e2,0x5f1,0x5f4,0x5e8,0x5e8,0x5e8,
+0xc21,0xc1e,0xc21,0xc1e,0xc21,0xc1e,0xc21,0xc1e,0xc21,0xc1e,0xc21,0xc1e,0xc21,0xc1e,0x750,0x750,
+0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x5df,0x5ee,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,
+0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x13e6,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,
+0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x13e6,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,
+0x618,0x618,0x618,0x618,0x618,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x621,0x621,0x621,0x621,
+0x621,0x621,0x621,0x621,0x618,0x61e,0x60f,0x612,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,
+0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,
+0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x615,0x615,0x615,0x615,0x615,0x615,0x618,0x618,0x618,0x618,
+0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,
+0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x61b,0x621,0x61e,0x618,0x61b,0x621,0x61e,0x618,
+0x61b,0x621,0x61e,0x618,0x61b,0x621,0x61e,0x618,0x61b,0x621,0x61e,0x618,0x61b,0x621,0x61e,0x618,
+0x61b,0x621,0x61e,0x618,0x61b,0x621,0x61e,0x618,0x61e,0x618,0x61e,0x618,0x61e,0x618,0x61e,0x618,
+0x61e,0x618,0x61e,0x618,0x61b,0x621,0x61e,0x618,0x61b,0x621,0x61e,0x618,0x61b,0x621,0x61e,0x618,
+0x61b,0x621,0x61e,0x618,0x61e,0x618,0x61b,0x621,0x61e,0x618,0x61e,0x618,0x61b,0x621,0x61e,0x618,
+0x61b,0x621,0x61e,0x618,0x61e,0x618,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,
+0x136b,0x136b,0x136b,0x136b,0x61e,0x618,0x61e,0x618,0x61e,0x618,0x61b,0x621,0x61b,0x621,0x61e,0x618,
+0x61e,0x618,0x61e,0x618,0x61e,0x618,0x61e,0x618,0x61e,0x618,0x61e,0x618,0x61b,0x61e,0x618,0x61b,
+0x61e,0x618,0x61b,0x621,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,
+0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x61b,0x61b,0x61b,0x61b,0x61b,
+0x61b,0x61b,0x61b,0x61b,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,
+0x61e,0x61e,0x61e,0x61e,0x61e,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,
+0x618,0x618,0x618,0x618,0x61b,0x61b,0x618,0x61b,0x618,0x61b,0x618,0x618,0x61b,0x618,0x618,0x61b,
+0x618,0x61b,0x618,0x618,0x61b,0x618,0x61b,0x61b,0x618,0x618,0x618,0x61b,0x618,0x618,0x618,0x618,
+0x618,0x61b,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,
+0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x61b,0x61b,0x618,0x618,0x61b,0x618,0x61b,0x618,
+0x618,0x618,0x618,0x618,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,
+0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,
+0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x621,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,
+0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,
+0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x621,0x621,0x621,0x621,0x621,0x621,0x621,0x621,
+0x621,0x621,0x621,0x621,0x621,0x621,0x621,0x621,0x621,0x621,0x621,0x621,0x621,0x61e,0x61e,0x61e,
+0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x624,0x624,0x624,0x624,0x101a,0x101a,0x101a,0x152d,
+0x152d,0x152d,0x152d,0x152d,0x152d,0x152d,0x173a,0x173a,0x885,0x88b,0x88b,0x897,0x897,0x888,0x87f,0x888,
+0x87f,0x888,0x87f,0x888,0x87f,0x888,0x87f,0x888,0x633,0x633,0x62d,0x633,0x62d,0x633,0x62d,0x633,
+0x62d,0x633,0x62d,0x630,0x636,0x633,0x62d,0x633,0x62d,0x630,0x636,0x633,0x62d,0x633,0x62d,0x630,
+0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x633,0x62d,0x633,0x62d,0x633,
+0x62d,0x633,0x62d,0x630,0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x630,
+0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x630,
+0x636,0x633,0x62d,0x630,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720,
+0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,
+0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,
+0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x726,0x726,0x726,0x726,0x726,0x726,
+0x726,0x726,0x726,0x726,0x726,0x726,0x729,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,
+0x726,0x726,0x726,0x726,0x723,0x723,0x723,0x723,0x723,0x723,0x723,0x723,0x723,0x723,0x723,0x723,
+0x723,0x723,0x723,0x723,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,
+0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,
+0x72c,0x72c,0x72c,0x72c,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,
+0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,
+0x756,0x756,0x756,0x756,0xc78,0x8e8,0x8e2,0x8df,0x8e5,0x8dc,0x76b,0x76e,0x76e,0x76e,0x76e,0x76e,
+0x76e,0x76e,0x76e,0x76e,0x8ee,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,
+0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,
+0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x8eb,0x8eb,0x771,0x8fd,0x900,0x906,0x82b,0x837,0x91b,0x834,
+0x8f4,0x8f1,0x8f4,0x8f1,0x8fa,0x8f7,0x8fa,0x8f7,0x8f4,0x8f1,0x831,0x906,0x8f4,0x8f1,0x8f4,0x8f1,
+0x8f4,0x8f1,0x8f4,0x8f1,0x909,0x912,0x90f,0x90f,0x777,0x7b3,0x7b3,0x7b3,0x7b3,0x7b3,0x7b3,0x7ad,
+0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,
+0x7ad,0x7ad,0x7ad,0x77a,0x795,0x774,0x79b,0x79e,0x798,0x7b0,0x7b0,0x7b0,0x7b0,0x7b0,0x7b0,0x7aa,
+0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,
+0x7aa,0x7aa,0x7aa,0x77a,0x795,0x774,0x795,0xc7b,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,
+0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,
+0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x12d8,0x12d8,0x12d8,0x12d8,0x12d8,0x81c,
+0x831,0x834,0x834,0x834,0x834,0x834,0x834,0x834,0x834,0x834,0x951,0x951,0x951,0x951,0x83a,0x83a,
+0x90c,0x918,0x918,0x918,0x918,0x915,0x82e,0x903,0xb3d,0xb3d,0xb3d,0xc8d,0xcab,0xca8,0xb5b,0x8d9,
+0x840,0x83d,0x840,0x843,0x83d,0x840,0x83d,0x840,0x83d,0x840,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,
+0x840,0x840,0x83d,0x840,0x840,0x83d,0x840,0x840,0x83d,0x840,0x840,0x83d,0x840,0x840,0x83d,0x83d,
+0xcae,0x852,0x84c,0x852,0x84c,0x852,0x84c,0x852,0x84c,0x852,0x84c,0x84c,0x84f,0x84c,0x84f,0x84c,
+0x84f,0x84c,0x84f,0x84c,0x84f,0x84c,0x84f,0x84c,0x84f,0x84c,0x84f,0x84c,0x84f,0x84c,0x84f,0x84c,
+0x84f,0x84c,0x84f,0x852,0x84c,0x84f,0x84c,0x84f,0x84c,0x84f,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,
+0x84f,0x84f,0x84c,0x84f,0x84f,0x84c,0x84f,0x84f,0x84c,0x84f,0x84f,0x84c,0x84f,0x84f,0x84c,0x84c,
+0x84c,0x84c,0x84c,0x852,0x84c,0x852,0x84c,0x852,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x852,0x84c,
+0x84c,0x84c,0x84c,0x84c,0x84f,0x852,0x852,0x84f,0x84f,0x84f,0x84f,0x921,0x924,0x855,0x858,0xc96,
+0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,
+0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,
+0x861,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,
+0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x86a,0x86a,0x86a,0x86a,
+0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,
+0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0xd9b,0xd9b,0xeca,0x864,0x92d,0x92d,0x92d,0x92d,
+0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0xd95,0xd95,0xd95,0xd95,0x86d,0x86d,0x86d,0x86d,
+0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,
+0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x1ab2,0x936,0x936,0x936,0x936,
+0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x870,0x870,0x870,
+0x870,0x870,0x870,0xd9e,0xd9e,0xd9e,0xd9e,0x939,0x939,0x939,0x939,0x939,0x870,0x870,0x870,0x870,
+0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,
+0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0xd9e,0xd9e,
+0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,
+0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,
+0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,
+0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,
+0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,
+0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,
+0x113d,0x113d,0x113d,0x113d,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,
+0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,
+0x879,0x879,0x879,0x879,0x879,0x879,0x87c,0x87c,0x879,0x87c,0x879,0x87c,0x87c,0x879,0x879,0x879,
+0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x87c,0x879,0x87c,0x879,0x87c,0x87c,0x879,0x879,0x87c,
+0x87c,0x87c,0x879,0x879,0x879,0x879,0x14e5,0x14e5,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,
+0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,
+0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,
+0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x131d,0x131d,0x131d,0x131d,0x12bd,0x12bd,0x12bd,0x12bd,
+0x12bd,0x12bd,0x12bd,0x12bd,0xd95,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,
+0xc99,0xc99,0xc99,0xc99,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,
+0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x933,0x930,0x933,0x930,0x930,
+0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,
+0x930,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,
+0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,
+0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0xd9e,
+0x9b7,0x999,0x999,0x999,0x999,0x993,0x999,0x999,0x9ab,0x999,0x999,0x996,0x9a2,0x9a8,0x9a8,0x9a8,
+0x9a8,0x9a8,0x9ab,0x993,0x99f,0x993,0x993,0x993,0x98a,0x98a,0x993,0x993,0x993,0x993,0x993,0x993,
+0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x993,0x993,0x993,0x993,0x993,0x993,
+0x993,0x993,0x993,0x993,0x996,0x98a,0x993,0x98a,0x993,0x98a,0x9a5,0x99c,0x9a5,0x99c,0x9b4,0x9b4,
+0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,
+0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,
+0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,
+0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,
+0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,
+0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,
+0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,
+0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9cc,0x9cc,
+0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,
+0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9cf,0x9cf,
+0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,
+0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,
+0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,
+0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,
+0x9d8,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,
+0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9d8,0x9db,0x9db,0x9db,
+0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,
+0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0xa68,0xa68,0xfff,0xa68,0xa68,0xa68,0xa6b,0xa68,
+0xfff,0xa68,0xa68,0xff6,0xa62,0xa56,0xa56,0xa56,0xa56,0xa65,0xa56,0xfe7,0xfe7,0xfe7,0xa56,0xa59,
+0xa62,0xa5c,0xfed,0xff9,0xff9,0xfe7,0xfe7,0xfff,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,
+0xb61,0xb61,0xa6e,0xa6e,0xa5f,0xa5f,0xa5f,0xa5f,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa65,0xa65,
+0xa56,0xa56,0xfff,0xfff,0xfff,0xfff,0xfe7,0xfe7,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,
+0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,
+0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xdf2,
+0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,
+0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,
+0xa7d,0xa7d,0xa7d,0xdf2,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,
+0xa7d,0xa7d,0xa7d,0xa7d,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,
+0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,
+0xa83,0xa83,0xa83,0xa83,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,
+0xa89,0xa86,0xa8c,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0x1176,0x1176,0x1176,0x1176,0x1176,
+0x1176,0x1176,0x1176,0x1176,0x1173,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,
+0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,
+0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,
+0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,
+0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xac2,0xac2,0xac2,0xac5,0xac5,0xac2,0xac2,0xac2,
+0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xaaa,0xaaa,0xabf,0xaa1,
+0xaa1,0xaa1,0xaa1,0xaa1,0xaa1,0xaa1,0xabf,0xabf,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,
+0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,
+0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xae3,0xae3,0xae3,0xae3,0xae3,0xace,0xace,0xae3,
+0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,
+0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,
+0xae3,0xae3,0xae3,0xae6,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,
+0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,
+0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb10,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,
+0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xc06,0xc06,0xc06,0xc06,0xc06,
+0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,
+0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,
+0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,
+0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,
+0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,
+0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,
+0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,
+0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0x13ec,0x13ec,0x13ec,0x1b2d,0x1b2d,0x1b2d,0x1b2d,0x1b2d,
+0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,
+0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,
+0xb43,0xb43,0x1b30,0x1b30,0x1b30,0x1b30,0x1b30,0x1b30,0x1b30,0x1b30,0x1b30,0x1b30,0xb46,0xb46,0xb46,0xb46,
+0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,
+0xb46,0xb49,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,
+0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,
+0xb46,0xb46,0xb46,0xb46,0xb4c,0xb4c,0xc9c,0xc9c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,
+0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xc9c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,
+0xb4c,0xb4c,0xb4c,0xb4c,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,
+0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,
+0xb70,0xb70,0xb70,0x1530,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xd26,0xd26,0xb76,0xb76,0xb76,0xb76,
+0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,
+0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xd23,0xd23,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,
+0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,
+0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,
+0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,
+0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,
+0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb82,0xb8e,0xb94,
+0xb94,0xb94,0xb88,0xb88,0xb88,0xb91,0xb85,0xb85,0xb85,0xb85,0xb85,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,
+0xb7f,0xb7f,0xb7f,0xb94,0xb94,0xb94,0xb94,0xb94,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,
+0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,
+0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb8b,0xb8b,0xb94,0xb94,0xb94,0xb88,
+0xb88,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,
+0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb94,0xb94,
+0xb94,0xb94,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb8b,
+0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,
+0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,
+0xb88,0xb88,0x173d,0x173d,0xba0,0xb97,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,
+0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb97,
+0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,
+0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xb97,
+0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,
+0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb97,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xba0,0xba0,0xba0,0xba0,
+0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,
+0xba0,0xb97,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,
+0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,
+0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,
+0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,
+0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,
+0xba0,0xba0,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,
+0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,
+0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,
+0xba0,0xba0,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,
+0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xba0,0xba0,0xba0,0xba0,
+0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,
+0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,
+0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,
+0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,
+0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,
+0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,
+0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,
+0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc03,0xc06,0xc03,0xc03,0xc03,0xc03,0xc03,0xc03,0xc03,0xc03,
+0xc03,0xc03,0xc03,0xc03,0xc03,0xc03,0xc03,0xd14,0xd17,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,
+0xe0a,0xe0a,0xe0a,0xe0a,0xf24,0xf24,0xf24,0xf24,0xc18,0xc18,0xc18,0xc18,0xc18,0xc18,0xc12,0xc12,
+0xc12,0xc12,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1d,0xd1d,0xe10,0xec4,0xe10,0xe10,0xe10,0xe10,
+0xe0d,0xe10,0xe0d,0xe10,0xe10,0x1014,0x12ae,0x12ae,0xe19,0xe19,0xe19,0xe19,0xe19,0xe1f,0xe1c,0xf36,
+0xf36,0xf36,0xf36,0x142e,0x1026,0x142e,0x1374,0x1374,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,
+0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc81,0xc7e,0xc81,0xc7e,0xc81,0xc7e,
+0x1137,0x1134,0x102c,0x1029,0xc51,0xc51,0xc51,0xc51,0xc51,0xc51,0xc51,0xc51,0xc51,0xc51,0xc51,0xc51,
+0xc51,0xc51,0xc51,0xc51,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,
+0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,
+0xc54,0xc54,0xc54,0xc54,0xc57,0xc57,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,
+0xc5a,0xc5a,0xc5a,0xc60,0xc5d,0xc87,0xc84,0xc60,0xc5d,0xc60,0xc5d,0xc60,0xc5d,0xc60,0xc5d,0xc60,
+0xc5d,0xc60,0xc5d,0xc60,0xc5d,0xc60,0xc5d,0xc60,0xc5d,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,
+0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,
+0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc60,0xc5d,0xc60,0xc5d,
+0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,
+0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc60,0xc5d,0xc5a,0xc5a,
+0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc69,0xc63,0xc63,0xc63,
+0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,
+0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,
+0xc69,0xc69,0xc69,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,
+0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,
+0xc66,0xc63,0xc63,0xc63,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,
+0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,
+0xc9f,0xc9f,0xc9f,0xc9f,0xd20,0xd8f,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe10,0xe0d,0xe0d,0xec4,0xec4,
+0xe0d,0xe0d,0xe0d,0xe0d,0xe10,0xe10,0xf27,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,
+0x1014,0x12db,0x12db,0x12b1,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,
+0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,
+0xd44,0xd44,0xd44,0xd44,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd4a,0xd4a,0xd4a,0xd4a,0xd4a,0xd47,
+0xd5c,0xd5c,0xd5c,0xd56,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd56,
+0xd5c,0xd5c,0xd5c,0xd5c,0xd50,0xd50,0xd59,0xd59,0xd59,0xd59,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd53,
+0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe22,0xe25,0xe25,0xe25,
+0xe25,0xe25,0xe25,0xe25,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,
+0xd5c,0xd5c,0xd56,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,
+0xd5c,0xd50,0xd50,0xd50,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,
+0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,
+0xd53,0xd53,0xd53,0xd53,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd62,0xd62,0xd62,0xd5f,0xd5f,0xd5f,0xd5f,
+0xd5f,0xd5f,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0x1140,
+0x1140,0x102f,0x102f,0x102f,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,
+0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,
+0xd65,0xd65,0xd65,0xd65,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,
+0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,
+0xd6b,0xd6b,0xd6b,0xd6b,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,
+0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,
+0xd74,0xd74,0xd74,0xd74,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,
+0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,
+0xd80,0xd80,0xd80,0xd80,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,
+0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,
+0xd8c,0xd8c,0xd8c,0xd8c,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,
+0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,
+0xe2e,0xe2e,0xe2e,0xe2e,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,
+0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,
+0xe31,0xe31,0xe31,0xe31,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,
+0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,
+0xe34,0xe34,0xe34,0xe34,0xef4,0xef4,0xe46,0xe46,0xf3c,0xf3c,0xf3c,0xf3c,0xf3c,0xf3c,0xf3c,0x103b,
+0x103b,0x103b,0x103b,0x103b,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,
+0x1038,0x1038,0x1038,0x1038,0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,
+0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,
+0xe55,0xe52,0xe55,0xe52,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,
+0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,
+0xe61,0xe61,0xe61,0xe61,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,
+0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,
+0xe67,0xe67,0xe67,0xe67,0xeee,0xeee,0xeee,0xeee,0xeee,0xeee,0xeee,0xeee,0xe7f,0xe7f,0xe7f,0xe7f,
+0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xf3f,0xf3f,0xf3f,0xf3f,0x103e,
+0x103e,0x103e,0x103e,0x103e,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,
+0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,
+0xe88,0xe88,0xe88,0xe88,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,
+0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,
+0xe91,0xe91,0xe91,0xe91,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,
+0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,
+0xe9a,0xe9a,0xe9a,0xe94,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,
+0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe9a,
+0xe9a,0xe9a,0xe9a,0xe9a,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,
+0xea3,0xea3,0xea0,0xea0,0xea0,0xea0,0xea0,0xea0,0xea0,0xea0,0xe9d,0xea6,0x104a,0x1044,0x1053,0x1041,
+0xea3,0xea3,0x1041,0x1041,0xeb5,0xeb5,0xea9,0xeb5,0xeb5,0xeb5,0xeac,0xeb5,0xeb5,0xeb5,0xeb5,0xea9,
+0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,
+0xeb5,0xeb5,0xeb5,0xeb5,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,
+0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,
+0xeb8,0xeb8,0xeb8,0xeb8,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,
+0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,
+0xed0,0xed0,0xed0,0xed0,0xef1,0xef1,0xef1,0xef1,0xef1,0xef1,0xef1,0xef1,0xef1,0xef1,0xef1,0xef1,
+0xef1,0xef1,0xef1,0xef1,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,
+0x1149,0x1149,0x1149,0x1149,0xf24,0xf24,0xf24,0xf21,0xf21,0xf21,0xf21,0xf21,0x1182,0x13dd,0x13dd,0x13dd,
+0x13dd,0x135f,0x135f,0x135f,0x13e0,0x1362,0x1362,0x13e0,0x1524,0x1524,0x1524,0x1524,0x1527,0x1527,0x1527,0x17ee,
+0x17ee,0x17ee,0x17ee,0x18b4,0xf39,0xf39,0xf39,0xf39,0x102f,0x102f,0x102f,0x102f,0x102f,0x102f,0x102f,0x102f,
+0x102f,0x102f,0x102f,0x102f,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,
+0x1032,0x1032,0x1032,0x1032,0xf5a,0xf5a,0xf5a,0xf5a,0xf6c,0xf75,0xf78,0xf75,0xf78,0xf75,0xf78,0xf75,
+0xf78,0xf75,0xf78,0xf75,0xf75,0xf75,0xf78,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,
+0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf5d,0xf5a,0xf5a,0xf5a,
+0xf5a,0xf5a,0xf5a,0xf6f,0xf5a,0xf6f,0xf6c,0xf6c,0xf81,0xf7e,0xf81,0xf81,0xf81,0xf7e,0xf7e,0xf81,
+0xf7e,0xf81,0xf7e,0xf81,0xf7e,0x1065,0x1065,0x1065,0x11a0,0x105c,0x1065,0x105c,0xf7e,0xf81,0xf7e,0xf7e,
+0x105c,0x105c,0x105c,0x105c,0x105f,0x1062,0x11a0,0x11a0,0xf84,0xf84,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,
+0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x106e,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,
+0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,
+0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,
+0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,
+0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,
+0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0x1563,0x1563,0x1563,0x1563,0x1563,
+0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0xf9f,0xf9f,0xf9f,0xf9f,
+0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,
+0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xfe7,0xfff,0xff6,0xffc,
+0xffc,0xfff,0xfff,0xff6,0xff6,0xffc,0xffc,0xffc,0xffc,0xffc,0xfff,0xfff,0xfff,0xfe7,0xfe7,0xfe7,
+0xfe7,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfe7,0xff6,
+0xff9,0xfe7,0xfe7,0xffc,0xffc,0xffc,0xffc,0xffc,0xffc,0xfea,0xfff,0xffc,0xff3,0xff3,0xff3,0xff3,
+0xff3,0xff3,0xff3,0xff3,0xff3,0xff3,0x116a,0x116a,0x1167,0x1164,0xff0,0xff0,0x1017,0x1017,0x1017,0x1017,
+0x12db,0x12db,0x12b1,0x12b1,0x12b7,0x12ae,0x12ae,0x12ae,0x12ae,0x12b1,0x13e3,0x12b7,0x12b1,0x12b7,0x12ae,0x12b7,
+0x12db,0x12ae,0x12ae,0x12ae,0x12b1,0x12b1,0x12ae,0x12ae,0x12b1,0x12ae,0x12ae,0x12b1,0x1032,0x1032,0x1032,0x1032,
+0x1032,0x102f,0x102f,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x153c,0x153c,0x153c,0x1140,0x102f,0x102f,0x102f,
+0x102f,0x12e7,0x12c0,0x12c0,0x12c0,0x12c0,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x1050,0x1050,0x104d,0x1047,
+0x104d,0x1047,0x104d,0x1047,0x104d,0x1047,0x1044,0x1044,0x1044,0x1044,0x1059,0x1056,0x1044,0x119d,0x143a,0x143d,
+0x143d,0x143a,0x143a,0x143a,0x143a,0x143a,0x1440,0x1440,0x1557,0x154b,0x154b,0x1548,0x1077,0x106e,0x1077,0x106e,
+0x1077,0x106e,0x1077,0x106e,0x106b,0x1068,0x1068,0x1077,0x106e,0x1383,0x1380,0x1746,0x1383,0x1380,0x1449,0x1446,
+0x155a,0x155a,0x1560,0x155a,0x1560,0x155a,0x1560,0x155a,0x1560,0x155a,0x1560,0x155a,0x1077,0x106e,0x1077,0x106e,
+0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,
+0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1071,0x106e,0x106e,0x106e,
+0x106e,0x106e,0x106e,0x106e,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x1077,0x106e,0x107a,0x107a,0x1080,0x1086,
+0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,
+0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1080,0x107a,0x107a,
+0x107a,0x107a,0x1080,0x1080,0x107a,0x107a,0x1083,0x1452,0x144f,0x144f,0x1086,0x1086,0x107d,0x107d,0x107d,0x107d,
+0x107d,0x107d,0x107d,0x107d,0x107d,0x107d,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x109b,0x109b,0x109b,0x109b,
+0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,
+0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x10a4,0x10a4,0x10a4,0x10a4,
+0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,
+0x10a4,0x10a4,0x10a4,0x10a4,0x10a7,0x10a7,0x10a7,0x10aa,0x10a7,0x10a7,0x10ad,0x10ad,0x10b0,0x10b0,0x10b0,0x10b0,
+0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,
+0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b9,0x10b9,0x10b9,0x10b9,
+0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10bc,0x10b3,0x10c2,0x10bf,0x10b9,0x10b9,0x10b9,0x10b9,
+0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,
+0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x1389,0x1386,0x10d4,0x10ce,
+0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d1,0x1152,0x10c5,0x10c5,0x10c5,0x10cb,
+0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x10c8,0x10c8,0x10cb,0x10d7,0x10d4,0x10ce,0x10d4,0x10ce,
+0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,
+0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x156f,0x156c,0x156f,0x156c,
+0x1572,0x1572,0x174f,0x1458,0x10e0,0x10e0,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,
+0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,
+0x10e3,0x10e3,0x10e3,0x10e3,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,
+0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10ec,0x10ec,0x10ec,0x1146,0x10f5,
+0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,
+0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,
+0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,
+0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,
+0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,
+0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,
+0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,
+0x112e,0x112e,0x112e,0x112e,0x1143,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,
+0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,
+0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,
+0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,
+0x113d,0x113d,0x113d,0x113d,0x12e1,0x12e1,0x12e1,0x12e1,0x12e1,0x12e1,0x12e1,0x12e1,0x14e2,0x17cd,0x17cd,0x17cd,
+0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,
+0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,
+0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11a9,0x11a9,0x11ac,0x11ac,0x11b2,0x11a9,0x11a9,0x11a9,0x11a9,0x11a9,
+0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,
+0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,
+0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,
+0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,
+0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,
+0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11dc,0x11e2,
+0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,
+0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,
+0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,
+0x11f4,0x132f,0x11fa,0x1332,0x11fa,0x11fa,0x11fa,0x11fa,0x11f7,0x11f7,0x11f7,0x11fa,0x1752,0x1755,0x197a,0x1977,
+0x11fd,0x11fd,0x11fd,0x120c,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,
+0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,
+0x1212,0x1212,0x1212,0x1200,0x120c,0x120c,0x11fd,0x11fd,0x11fd,0x11fd,0x120c,0x120c,0x11fd,0x11fd,0x120c,0x120c,
+0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,
+0x1221,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x1218,0x1218,0x1218,0x121e,0x121b,0x1578,0x157b,0x157e,0x157e,
+0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,
+0x1224,0x1230,0x1224,0x1224,0x1224,0x1239,0x1239,0x1224,0x1224,0x1239,0x1230,0x1239,0x1239,0x1230,0x1224,0x1227,
+0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,
+0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,
+0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,
+0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,
+0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,
+0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1260,0x1260,0x1260,
+0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,
+0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,
+0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,
+0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,
+0x1281,0x1281,0x1290,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,
+0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1296,0x1293,0x1296,0x1293,0x1293,0x1293,
+0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1296,0x1293,0x1293,0x1293,0x1293,
+0x1290,0x1290,0x1290,0x1284,0x1284,0x1284,0x1284,0x1290,0x1290,0x128a,0x1287,0x128d,0x128d,0x129c,0x1299,0x1299,
+0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,
+0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,
+0x12a5,0x12a5,0x12a5,0x12a2,0x12a2,0x12a2,0x129f,0x129f,0x129f,0x129f,0x12a2,0x129f,0x129f,0x129f,0x12a5,0x12a2,
+0x12a5,0x12a2,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,
+0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x12a5,0x12a2,0x12a2,
+0x129f,0x129f,0x129f,0x129f,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c9,
+0x12c9,0x12c9,0x12a8,0x1980,0x13d7,0x12d2,0x13d7,0x13d7,0x13d7,0x13d7,0x13d7,0x13d7,0x13d7,0x13d7,0x13d7,0x13d7,
+0x13d7,0x12d2,0x13d7,0x12d2,0x12b1,0x12b1,0x1365,0x12ae,0x1365,0x1365,0x1365,0x1365,0x12ae,0x12b4,0x12db,0x12ae,
+0x12ae,0x12ae,0x12ae,0x12ae,0x12b4,0x12b7,0x12db,0x12db,0x12b7,0x12db,0x12ae,0x12b7,0x12b7,0x12ba,0x12db,0x12ae,
+0x12ae,0x12db,0x12b1,0x12b1,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x12c3,0x12c3,
+0x12c3,0x12c3,0x13ef,0x13ce,0x12cc,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x187e,
+0x187e,0x187e,0x187e,0x187e,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x1581,
+0x1581,0x1ad0,0x1ad0,0x1ad0,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,
+0x12c6,0x12c6,0x12c6,0x12c6,0x13d7,0x13d7,0x12d2,0x13d7,0x13d7,0x13d7,0x12d2,0x13d7,0x13d7,0x13d7,0x12cc,0x12cc,
+0x12cc,0x12cc,0x12cc,0x13d1,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x12cf,0x13d4,0x13d4,0x13d4,0x13d4,
+0x13d4,0x13d4,0x13d4,0x12cf,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x145b,0x145b,
+0x1a2e,0x1ad0,0x1ad0,0x1ad0,0x13da,0x13da,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x12cf,0x13d4,0x12cf,
+0x12cf,0x13d4,0x13da,0x12d5,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,
+0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,
+0x12f9,0x12f9,0x12f9,0x12f9,0x1383,0x1380,0x1383,0x1380,0x1383,0x1380,0x1383,0x1380,0x1383,0x1380,0x1449,0x1560,
+0x1560,0x1560,0x17fa,0x196e,0x1560,0x1560,0x1749,0x1749,0x1749,0x1743,0x1749,0x1743,0x1971,0x196e,0x1a2b,0x1a28,
+0x1a2b,0x1a28,0x1a2b,0x1a28,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,
+0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,
+0x13a7,0x13a7,0x13a7,0x13a7,0x13bc,0x13ad,0x13bc,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,
+0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,
+0x13bf,0x13bf,0x13bf,0x13bf,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13c5,0x13c5,0x13c5,0x13c5,
+0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,
+0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13cb,0x13cb,0x13cb,0x13cb,
+0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,
+0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13fb,0x13f8,0x1923,0x1923,
+0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,
+0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1404,0x1404,0x1404,0x1404,
+0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,
+0x1404,0x1401,0x1401,0x1404,0x1404,0x1404,0x1404,0x1404,0x1401,0x1404,0x1404,0x1404,0x1401,0x1404,0x1401,0x1404,
+0x1401,0x1404,0x1404,0x1404,0x1404,0x1404,0x1407,0x1404,0x1404,0x1404,0x1404,0x1401,0x1404,0x1401,0x1401,0x1404,
+0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1401,0x1401,0x1401,0x1401,
+0x1401,0x1401,0x1401,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,
+0x1404,0x1404,0x1404,0x1401,0x1401,0x1401,0x1401,0x1401,0x1401,0x1401,0x1401,0x1401,0x1401,0x1404,0x1404,0x1404,
+0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1401,0x1401,0x1401,0x1401,0x1401,0x1401,
+0x1401,0x1401,0x1401,0x1401,0x1401,0x1401,0x158a,0x158a,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,
+0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,
+0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1596,0x1590,0x1590,0x1596,0x1596,0x1596,0x1596,
+0x1596,0x1596,0x1596,0x1596,0x1596,0x17d0,0x17d0,0x17d0,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1596,0x1404,
+0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,
+0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1596,0x17d0,0x17d0,
+0x1404,0x1404,0x1404,0x1404,0x1404,0x1407,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,
+0x1404,0x1404,0x1404,0x1404,0x1590,0x1590,0x1596,0x1596,0x1590,0x1596,0x1596,0x1596,0x158d,0x158d,0x1596,0x1596,
+0x1404,0x1404,0x1407,0x1407,0x1407,0x1701,0x1404,0x1407,0x1404,0x1404,0x1407,0x1599,0x1599,0x1596,0x1596,0x17d0,
+0x17d0,0x17d0,0x17d0,0x17d0,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,
+0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,
+0x1404,0x1590,0x1590,0x1596,0x1701,0x1596,0x1590,0x1596,0x17d0,0x17d0,0x17d0,0x17d3,0x17d3,0x17d3,0x17d3,0x17d3,
+0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,
+0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1596,
+0x1404,0x1596,0x1407,0x1407,0x1404,0x1404,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,
+0x1407,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,
+0x1404,0x1404,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,
+0x1407,0x1407,0x1407,0x1407,0x1407,0x1404,0x1404,0x1404,0x1407,0x1404,0x1404,0x1404,0x1404,0x1407,0x1407,0x1407,
+0x1404,0x1407,0x1407,0x1407,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1407,0x1404,0x1407,0x1404,0x1404,
+0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,
+0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1701,0x1404,0x1404,0x1404,0x1404,0x1596,0x1590,0x17d0,
+0x145e,0x145e,0x145e,0x145e,0x158a,0x158a,0x158d,0x158d,0x158d,0x1593,0x1596,0x17d0,0x17d0,0x17d0,0x17d0,0x1758,
+0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,
+0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1596,0x1596,0x1590,0x1590,0x1596,0x1599,0x1599,0x1596,0x1596,
+0x1596,0x1596,0x1887,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1596,0x1590,0x1596,0x1590,0x1590,0x1590,0x1590,
+0x1596,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1596,0x1590,0x1590,0x1590,0x1596,0x158d,0x158d,0x158d,0x158d,
+0x158d,0x158d,0x1596,0x1404,0x1404,0x1404,0x1404,0x1404,0x14e8,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,
+0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x14e8,0x140a,0x140a,0x140a,0x14e8,0x140a,0x14e8,
+0x140a,0x14e8,0x140a,0x14e8,0x140a,0x140a,0x140a,0x14e8,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x14e8,0x14e8,
+0x140a,0x140a,0x140a,0x140a,0x14e8,0x140a,0x14e8,0x14e8,0x140a,0x140a,0x140a,0x140a,0x14e8,0x140a,0x140a,0x140a,
+0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x1707,0x1707,0x17d6,0x17d6,0x140d,0x140d,0x140d,
+0x140a,0x140a,0x140a,0x140d,0x140d,0x140d,0x140d,0x140d,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,
+0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,
+0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,
+0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1413,0x1410,0x1410,0x1410,0x1410,
+0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1413,0x1413,0x1413,0x1410,
+0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,
+0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,
+0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1803,0x1803,0x1800,0x175b,0x1464,0x1464,0x1464,0x1464,
+0x1464,0x1464,0x1461,0x1461,0x1461,0x1461,0x1461,0x1461,0x1464,0x1464,0x1464,0x1464,0x1464,0x1464,0x1464,0x1464,
+0x1464,0x1464,0x1464,0x1464,0x1464,0x1464,0x1464,0x15a2,0x1470,0x1470,0x1470,0x1482,0x1482,0x1482,0x1482,0x1482,
+0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,
+0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,
+0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,
+0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,
+0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,
+0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x1a34,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,
+0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,
+0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14ac,0x14ac,0x14b8,0x14be,0x14be,0x14be,0x14be,0x14be,
+0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,
+0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14b8,0x14b8,0x14b8,0x14ac,0x14ac,
+0x14ac,0x14ac,0x14ac,0x14ac,0x14ac,0x14ac,0x14ac,0x14b8,0x14bb,0x14be,0x14c1,0x14c1,0x14be,0x14c4,0x14c4,0x14af,
+0x14b2,0x1764,0x1767,0x1767,0x1767,0x15ab,0x1adc,0x1ad9,0x14b5,0x14b5,0x14b5,0x14b5,0x14b5,0x14b5,0x14b5,0x14b5,
+0x14b5,0x14b5,0x15a8,0x176d,0x1770,0x176a,0x1773,0x1773,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,
+0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,
+0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,
+0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,
+0x153c,0x153c,0x1956,0x1956,0x1956,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,
+0x153c,0x1a22,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x18ba,0x1956,0x1956,0x1956,0x1956,0x1956,
+0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1590,0x1590,0x1596,0x1596,0x1596,0x1590,0x1590,0x1590,
+0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1596,0x1596,0x1596,0x158d,0x158d,0x158d,0x158d,
+0x158d,0x158d,0x158d,0x158d,0x1596,0x1596,0x1596,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1596,
+0x1590,0x1590,0x1596,0x1596,0x1596,0x1596,0x1590,0x1590,0x1599,0x1590,0x1590,0x1590,0x1590,0x1704,0x1704,0x1590,
+0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1884,0x1596,0x1590,0x1590,0x1596,0x1590,0x1590,0x1590,
+0x1590,0x1590,0x1590,0x1590,0x1590,0x1596,0x1596,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,
+0x1596,0x1590,0x1590,0x1590,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,
+0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,
+0x15ba,0x15ba,0x15ba,0x15ba,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,
+0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,
+0x15cc,0x15cc,0x15cc,0x15cc,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,
+0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,
+0x15d2,0x15d2,0x15d2,0x15d2,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,
+0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,
+0x15d5,0x15d5,0x15d5,0x15d5,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,
+0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,
+0x1614,0x1614,0x1614,0x1605,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,
+0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x1617,
+0x1620,0x1620,0x1620,0x1620,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,
+0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,
+0x1623,0x1623,0x1623,0x1623,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x1635,0x163e,0x163e,0x163e,
+0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,
+0x163e,0x163e,0x163e,0x163e,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,
+0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,
+0x1647,0x1647,0x1647,0x1647,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,
+0x1659,0x1659,0x1659,0x1659,0x1656,0x1656,0x1656,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x1656,
+0x1656,0x164a,0x1656,0x164d,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,
+0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,
+0x1659,0x1659,0x1659,0x1659,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,
+0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,
+0x167d,0x167a,0x167a,0x167a,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,
+0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x168c,0x168c,0x168c,0x1689,0x1689,0x1689,
+0x1686,0x1686,0x1686,0x1686,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,
+0x169b,0x169b,0x169b,0x169b,0x168f,0x168f,0x168f,0x168f,0x168f,0x168f,0x168f,0x16a1,0x16a1,0x1695,0x1692,0x1692,
+0x1692,0x1692,0x1692,0x1692,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,
+0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,
+0x169b,0x169b,0x169b,0x169b,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,
+0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a4,0x16a4,0x16a4,0x16a4,0x16a4,
+0x16a4,0x16a4,0x16a4,0x16a4,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,
+0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,
+0x16aa,0x16aa,0x16aa,0x16aa,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,
+0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,
+0x16ce,0x16ce,0x16ce,0x16ce,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,
+0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,
+0x16d7,0x16d7,0x16d7,0x16d7,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,
+0x16ef,0x16ef,0x16ef,0x16ef,0x16da,0x16e9,0x16e9,0x16da,0x16da,0x16da,0x16da,0x16da,0x16da,0x16e9,0x16da,0x16ec,
+0x16ec,0x16da,0x16ec,0x16da,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,
+0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,
+0x16ef,0x16ef,0x16ef,0x16ef,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,
+0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,
+0x16f8,0x16f8,0x16f8,0x16f8,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,
+0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,
+0x16fe,0x16fe,0x16fe,0x16fe,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,
+0x1740,0x1740,0x1740,0x1740,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,
+0x1956,0x1956,0x1956,0x1a22,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,
+0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,
+0x1761,0x1761,0x1761,0x1761,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,
+0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,
+0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x17a3,0x17a0,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,
+0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,
+0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,
+0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,
+0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,
+0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,
+0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,
+0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,
+0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,
+0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,
+0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,
+0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c4,0x17c4,0x17c4,0x17c4,0x17c1,
+0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c4,0x17c4,0x17c4,
+0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c1,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,
+0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,
+0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,
+0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,
+0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x18c6,0x18c6,0x18c6,0x18c6,0x18c6,0x18c6,0x18c6,0x18c6,
+0x18c6,0x18c6,0x18c6,0x18c6,0x1b3f,0x1a94,0x1a94,0x1a97,0x17df,0x17df,0x17df,0x17df,0x17df,0x17df,0x17df,0x17df,
+0x17e2,0x1890,0x1890,0x1890,0x1890,0x1890,0x1890,0x192c,0x17df,0x17df,0x17df,0x17df,0x17df,0x188d,0x188d,0x188d,
+0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x1929,0x1929,0x1929,0x1929,0x1929,0x1929,
+0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x1890,0x188d,
+0x1929,0x1929,0x1929,0x1929,0x1929,0x1929,0x1929,0x1929,0x1890,0x192c,0x192c,0x1890,0x1890,0x1890,0x1890,0x1890,
+0x1890,0x1890,0x188d,0x180f,0x1890,0x1890,0x1890,0x1a94,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x180f,0x188d,
+0x188d,0x188d,0x188d,0x188d,0x1929,0x1a0a,0x1a0a,0x1a0a,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,
+0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x1929,0x1824,0x1824,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,
+0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,
+0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,
+0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,
+0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,
+0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x186f,0x186f,0x186f,
+0x185a,0x185a,0x185a,0x185a,0x185a,0x185a,0x185a,0x185a,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,
+0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,
+0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,
+0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,
+0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,
+0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,
+0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1b48,0x1b48,0x1b48,0x1b48,0x1b48,
+0x1b48,0x1b48,0x1b48,0x1b48,0x1b48,0x1b48,0x1b48,0x1b48,0x18f0,0x18f0,0x18f0,0x18f0,0x1a46,0x1a46,0x18f3,0x18f3,
+0x18f3,0x18f3,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18ed,
+0x18de,0x18e1,0x18e4,0x18f6,0x18f6,0x1995,0x18e7,0x18e7,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,
+0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,
+0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,
+0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x18fc,0x1902,0x18ff,0x18ff,0x18ff,
+0x18ff,0x190e,0x1914,0x18ff,0x18ff,0x18ff,0x18ff,0x190b,0x1911,0x18ff,0x18ff,0x18ff,0x18ff,0x18ff,0x18ff,0x18ff,
+0x18ff,0x18ff,0x18ff,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,
+0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,
+0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,
+0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1929,0x1929,0x1929,0x1929,0x1929,0x1929,0x1929,0x1a0a,
+0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,
+0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,
+0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,
+0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,
+0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,
+0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,
+0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,
+0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,
+0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,
+0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,
+0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,
+0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,
+0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,
+0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,
+0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,
+0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,
+0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,
+0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19ef,0x19ef,0x19ef,0x1a0a,0x1a0a,0x1a0a,0x1b3c,0x1b3c,0x1a94,0x1a94,0x1a94,
+0x1a94,0x1a94,0x1a94,0x1b3c,0x1b3c,0x1b3c,0x1a94,0x1a94,0x1a0d,0x1a0d,0x1a0d,0x1a0d,0x1a0a,0x1a10,0x1a10,0x1a0a,
+0x1a10,0x1a10,0x1a94,0x1a97,0x1a94,0x1a94,0x1a94,0x1a94,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,
+0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,
+0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,
+0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,
+0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,
+0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1aaf,0x1aaf,0x1a79,0x1aaf,0x1a79,0x1a79,0x1a79,0x1a79,
+0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a7f,0x1a7f,0x1a7f,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,
+0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,
+0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,
+0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,
+0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,
+0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,
+0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,
+0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,
+0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,
+0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,
+0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0,0,0,0
+};
+
+static const UTrie2 propsVectorsTrie={
+ propsVectorsTrie_index,
+ propsVectorsTrie_index+5024,
+ NULL,
+ 5024,
+ 26204,
+ 0xa40,
+ 0x1420,
+ 0x0,
+ 0x0,
+ 0x110000,
+ 0x79f8,
+ NULL, 0, FALSE, FALSE, 0, NULL
+};
+
+static const uint32_t propsVectors[6999]={
+0x67,0,0,0x67,0,0x4e00000,0x67,0x80000,0x20,0x867,0,0,0xa67,0,0,0xb67,
+0,0,0xc67,0,0,0xd67,0,0,0xe67,0,0,0x1067,0,0,0x1167,0,
+0,0x1267,0,0,0x1367,0,0,0x1467,0,0,0x1567,0,0,0x1667,0,0,
+0x1767,0,0,0x1867,0,0,0x1967,0,0,0x1a67,0,0,0x1b67,0,0,0x1d67,
+0,0,0x1f67,0,0,0x2067,0,0,0x2267,0,0,0x2367,0,0,0x2467,0,
+0,0x2567,0,0,0x2767,0,0,0x2867,0x80000,0x20,0x2967,0,0,0x2a67,0,0x1600000,
+0x2b67,0,0,0x2d67,0,0,0x3167,0x20000000,0,0x3267,0x20000000,0,0x3a67,0,0,0x3b67,
+0,0,0x3c67,0,0,0x3e67,0,0,0x4067,0,0,0x4167,0,0,0x4467,0,
+0,0x4867,0,0,0x4967,0,0,0x4a67,0,0,0x5067,0,0,0x5167,0,0,
+0x5467,0,0,0x5567,0,0,0x5667,0x80000,0x20,0x5767,0,0,0x5867,0,0,0x5967,
+0,0,0x5b67,0,0,0x5c67,0,0,0x5d67,0,0,0x6067,0x80000,0x20,0x6267,0,
+0,0x6367,0,0,0x6467,0,0,0x6567,0,0,0x6f67,0,0,0x7067,0,0,
+0x7367,0x20000000,0,0x7567,0,0,0x7667,0,0,0x7767,0,0,0x7867,0,0,0x7a67,
+0,0,0x7b67,0,0,0x7c67,0,0,0x7e67,0,0,0x7f67,0,0,0x8167,0,
+0,0x8267,0,0,0x8367,0,0,0x8467,0,0,0x8567,0,0,0x8667,0,0,
+0x8767,0,0,0x8867,0,0,0x8967,0,0,0x8b67,0,0,0x8c67,0,0,0x8e67,
+0x20000000,0,0x8f67,0,0,0x9067,0,0,0x9167,0,0,0x9267,0,0,0x9367,0,
+0,0x9567,0,0,0x9667,0,0,0x9767,0,0,0x9867,0,0,0x9967,0,0,
+0x9a67,0,0,0x9c67,0,0,0x9f67,0,0,0xa167,0,0,0xa367,0,0,0xa467,
+0,0,0xa567,0,0,0xa667,0,0,0xa767,0,0,0xa867,0,0,0xa967,0,
+0,0xaa67,0,0x4e00000,0xab67,0,0x4e00000,0xac67,0,0,0xad67,0,0,0xae67,0,0,
+0xaf67,0,0,0xb167,0,0,0xb267,0,0,0xb467,0,0,0xb567,0,0,0xb767,
+0,0,0xb867,0,0,0xb967,0,0,0xba67,0,0,0xbc67,0,0,0xbd67,0,
+0,0xbe67,0,0,0xbf67,0,0,0xc067,0,0,0xc167,0,0,0xc267,0,0,
+0xc367,0,0x4e00000,0xc467,0,0x4e00000,0xc667,0,0,0xc767,0,0,0xc867,0,0,0xc967,
+0,0,0xca67,0,0,0xcc67,0,0x4e00000,0xcf67,0,0x4e00000,0xd067,0,0x4e00000,0xd267,0,
+0,0xd367,0,0,0xd467,0,0,0xd567,0,0,0xd667,0,0,0xd867,0,0,
+0xda67,0,0,0xdb67,0,0,0xdc67,0,0,0xdd67,0,0,0xde67,0,0,0xdf67,
+0,0,0xe067,0,0,0xe167,0,0,0xe267,0,0,0xe367,0,0x4e00000,0xe467,0,
+0,0xe567,0,0,0xe667,0,0,0xe767,0,0,0xe867,0,0,0xe967,0,0,
+0xea67,0,0,0xeb67,0,0,0xec67,0,0,0xed67,0,0,0xee67,0,0,0xef67,
+0,0,0xf167,0,0,0xf367,0,0,0xf567,0,0,0xf667,0,0,0xf767,0,
+0,0xf867,0,0,0xf967,0,0,0xfa67,0,0x4e00000,0xfb67,0,0,0xfc67,0,0,
+0xfd67,0,0,0xfe67,0,0,0x10167,0,0,0x10267,0,0,0x10367,0,0,0x10467,
+0,0,0x10567,0,0x4e00000,0x10667,0,0,0x10767,0,0,0x10867,0,0,0x10967,0,
+0,0x10a67,0,0,0x10b67,0,0,0x10c67,0,0,0x10d67,0,0,0x10e67,0,0,
+0x10f67,0,0,0x11067,0,0,0x11367,0,0,0x11467,0,0,0x11567,0,0,0x11667,
+0,0,0x11767,0,0,0x11867,0,0,0x11967,0,0x4e00000,0x11a67,0,0,0x11b67,0,
+0,0x11c67,0,0,0x11d67,0,0,0x11e67,0,0,0x11f67,0,0,0x12067,0,0,
+0x12167,0,0,0x12267,0,0,0x12367,0,0,0x12467,0,0,0x12567,0,0,0x12667,
+0,0,0x12767,0,0,0x12867,0,0,0x12967,0,0,0x12a67,0,0x4e00000,0x12b67,0,
+0,0x12c67,0,0,0x12d67,0,0,0x12f67,0,0,0x13067,0,0,0x13167,0,0,
+0x13267,0,0,0x13367,0,0,0x13467,0,0,0xa0067,0,0xe00000,0xa4767,0,0xe00000,0xa4f67,
+0,0xe00000,0xa5e67,0,0xe00000,0xa5f67,0,0xe00000,0xac567,0,0xe00000,0xad167,0,0xe00000,0xb0067,0,
+0xe00000,0xb1267,0,0xe00000,0xb2e67,0,0xe00000,0x11000100,0,0x900020,0x11000100,0x40000001,0x440020,0x11000100,0x40000001,0x643020,
+0x11000100,0x40000001,0xa5a040,0x11000100,0x40000001,0x116a8a0,0x11000200,0,0x900020,0x11000200,0x4000001,0xc4000b,0x11000200,0x7c00100,0x220402,0x11000200,
+0x24000000,0x14200000,0x11000200,0x24000008,0x1710000,0x11000200,0x40000001,0x1d3b020,0x11000219,0x7c00100,0x220401,0x11000219,0x7c00100,0x250401,0x11000319,0x7c00100,
+0x220401,0x11000319,0x7c00100,0x220402,0x11000319,0x7c00100,0x250400,0x11000319,0x7c00100,0x250401,0x11000419,0x7c00100,0x220400,0x11000419,0x7c00100,0x220401,
+0x11000419,0x7c00100,0x220402,0x11000419,0x7c00100,0x230400,0x11000419,0x7c00100,0x250400,0x11000419,0x7c00100,0x250401,0x11000419,0x7c00100,0x250402,0x11000519,
+0x7c00100,0x220400,0x11000519,0x7c00100,0x230400,0x11000600,0x4000400,0x200002,0x11000600,0x4000400,0x200400,0x11000600,0x7c00500,0x220400,0x11000600,0x7c00500,
+0x230400,0x11000600,0x7c00500,0x530400,0x11000600,0x7c00d00,0x230400,0x11000619,0x7c00500,0x22040f,0x11000800,0x4000010,0x1001401,0x11000800,0x4000400,0x200001,
+0x11000800,0x6800010,0x201001,0x11000800,0x7c00500,0x230401,0x11000807,0x7c00100,0x220400,0x11000807,0x7c00100,0x250400,0x1100080e,0x4000400,0x200000,0x1100080e,
+0x4000400,0x200002,0x1100080e,0x7000500,0x220402,0x1100080e,0x7c00100,0x220400,0x1100080e,0x7c00100,0x220401,0x1100080e,0x7c00100,0x220402,0x1100080e,0x7c00100,
+0x250400,0x1100080e,0x7c00100,0x250401,0x1100080e,0x7c00120,0x220402,0x1100080e,0x7c00120,0x250402,0x11000908,0x4000000,0x200000,0x11000908,0x7c00100,0x220400,
+0x11000908,0x7c00100,0x220401,0x11000908,0x7c00100,0x250400,0x11000908,0x7c00100,0x250401,0x11000a03,0x4000000,0x200400,0x11000a03,0x4000000,0x201000,0x11000a03,
+0x4000000,0x270000,0x11000a03,0x7c00100,0x220400,0x11000a03,0x7c00100,0x220402,0x11000a03,0x7c00100,0x250400,0x11000a03,0x7c00500,0x230400,0x11000a03,0xc000010,
+0x1049400,0x11000b13,0x2802500,0x962460,0x11000b13,0x4000000,0x200000,0x11000b13,0x4000000,0x201000,0x11000b13,0x4000000,0x230400,0x11000b13,0x4000002,0x400000,
+0x11000b13,0x4000010,0x200000,0x11000b13,0x7c00100,0x2633800,0x11000c00,0x80000000,0x218960,0x11000c02,0x2802100,0x962460,0x11000c02,0x2802400,0x962460,0x11000c02,
+0x4000000,0x200000,0x11000c02,0x4000000,0x1329400,0x11000c02,0x4000000,0x1329800,0x11000c02,0x4000000,0x1500000,0x11000c02,0x6800000,0x1329800,0x11000c02,0x7c00100,
+0x230400,0x11000c02,0x7c00100,0x230401,0x11000c02,0x7c00100,0x230402,0x11000c02,0x7c00500,0x230400,0x11000c02,0x7d00100,0x230400,0x11000f01,0x2802400,0x962460,
+0x11000f0a,0x2802100,0x962460,0x11000f0a,0x2802400,0x962460,0x11000f0a,0x2806400,0x962460,0x11000f0a,0x4000000,0x200000,0x11000f0a,0x6800100,0x962540,0x11000f0a,
+0x7c00100,0x230400,0x11000f0a,0x7c00100,0x230401,0x11001004,0x2802100,0x962460,0x11001004,0x2802400,0x962460,0x11001004,0x2806400,0x962460,0x11001004,0x4000000,
+0x200000,0x11001004,0x4000000,0x1500000,0x11001004,0x6800100,0x962540,0x11001004,0x6800100,0x962541,0x11001004,0x7c00100,0x230400,0x11001004,0x7c00100,0x230401,
+0x11001110,0x2802100,0x962460,0x11001110,0x2802400,0x962460,0x11001110,0x2806400,0x962460,0x11001110,0x6800100,0x962540,0x11001110,0x7c00100,0x230400,0x11001110,
+0x7c00100,0x230401,0x1100120f,0x2802100,0x962460,0x1100120f,0x2802400,0x962460,0x1100120f,0x2806400,0x962460,0x1100120f,0x6800100,0x962540,0x1100120f,0x7c00100,
+0x230400,0x1100131f,0x2802100,0x962460,0x1100131f,0x2802400,0x962460,0x1100131f,0x2806400,0x962460,0x1100131f,0x4000000,0x200000,0x1100131f,0x6800000,0x1329800,
+0x1100131f,0x6800100,0x962540,0x1100131f,0x6800100,0x962541,0x1100131f,0x7c00100,0x230400,0x1100131f,0x7c00100,0x230401,0x11001423,0x2802100,0x962460,0x11001423,
+0x2806400,0x962460,0x11001423,0x6800100,0x962540,0x11001423,0x6800100,0x962541,0x11001423,0x7c00100,0x230400,0x11001423,0x7c00100,0x230401,0x11001524,0x2802100,
+0x962460,0x11001524,0x2802100,0x962461,0x11001524,0x2806400,0x962460,0x11001524,0x6800000,0x1329800,0x11001524,0x6800100,0x962540,0x11001524,0x7c00100,0x230400,
+0x11001615,0x2802100,0x962460,0x11001615,0x2806400,0x962460,0x11001615,0x6800100,0x962540,0x11001615,0x6800100,0x962541,0x11001615,0x7c00100,0x230400,0x1100171a,
+0x2802100,0x962460,0x1100171a,0x2806400,0x962460,0x1100171a,0x6800000,0x1329800,0x1100171a,0x6800100,0x962540,0x1100171a,0x6800100,0x962541,0x1100171a,0x7c00100,
+0x230400,0x11001900,0x4000000,0x1600000,0x11001926,0x2802100,0x1862460,0x11001926,0x2802400,0x1862460,0x11001926,0x2806100,0x1862460,0x11001926,0x4000000,0x200000,
+0x11001926,0x4000010,0x400000,0x11001926,0x6800000,0x1329800,0x11001926,0x7800100,0x1830142,0x11001926,0x7c00100,0x1830000,0x11001926,0x7c00900,0x1830000,0x11001926,
+0x7e00100,0x1830000,0x11001a18,0x2802100,0x1862460,0x11001a18,0x2802400,0x1862460,0x11001a18,0x6800000,0x1329800,0x11001a18,0x7800100,0x1830142,0x11001a18,0x7c00100,
+0x1830000,0x11001a18,0x7c00100,0x1830002,0x11001a18,0x7c00900,0x1830000,0x11001a18,0x7e00100,0x1830000,0x11001d0c,0x7c00100,0x230400,0x11001d0c,0x7c00100,0x250400,
+0x11001e12,0x7c00100,0x2230500,0x11001e12,0x7c00100,0x2330520,0x11001e12,0x7c80100,0x2330520,0x11002619,0x7c00100,0x220401,0x11002619,0x7c00100,0x220402,0x11002619,
+0x7c00100,0x250401,0x1100270e,0x4000400,0x200001,0x1100270e,0x4000400,0x200002,0x1100270e,0x4000400,0x500001,0x1100270e,0x7c00100,0x220401,0x1100270e,0x7c00100,
+0x250401,0x11002800,0x80000,0x918820,0x11002800,0x80000,0x1c18020,0x11002800,0x180000,0x918820,0x11002800,0x4000001,0x445801,0x11002800,0x4000001,0x445802,
+0x11002800,0x4000001,0xc4000b,0x11002800,0x6800000,0x201c00,0x11002800,0x6800020,0x201c00,0x11002800,0x24000000,0x200000,0x11002800,0x24000000,0x200002,0x11002800,
+0x24000000,0x810000,0x11002800,0x24000000,0x1410000,0x11002800,0x24000000,0x1500000,0x11002800,0x24000000,0x1500002,0x11002800,0x24000002,0x400000,0x11002800,0x24000006,
+0xc0000b,0x11002800,0x24000008,0x1410000,0x11002800,0x24000008,0x1710000,0x11002800,0x24000020,0x1001400,0x11002800,0x24000020,0x1500002,0x11002800,0x2c000010,0x1248000,
+0x11002800,0x2c000010,0x15248002,0x11002800,0x40000001,0x63b020,0x11002800,0x40080000,0x918820,0x11002801,0x80000,0xaa65620,0x11002801,0x82000,0x962460,0x11002900,
+0x4000000,0x20000e,0x11002900,0x4000000,0x20000f,0x11002900,0x4000020,0x20000e,0x11002900,0x4000020,0x20000f,0x11002900,0x4000020,0x81000e,0x11002900,0x4000020,
+0x81000f,0x11002900,0x4000020,0x141000e,0x11002900,0x4000020,0x141000f,0x11002900,0x4000022,0x20000e,0x11002900,0x4000022,0x20000f,0x11002a00,0x4000000,0x1500000,
+0x11002a00,0x4000000,0x1600000,0x11002a00,0x4000000,0x1600002,0x11002b01,0x2000,0x962460,0x11002b01,0x2802020,0x962460,0x11002c00,0x4000000,0x200000,0x11002c00,
+0x4000000,0x200002,0x11002c00,0x4000000,0x20000f,0x11002c00,0x4000020,0x200000,0x11002c00,0x7c00000,0x200000,0x11002c00,0x7c00020,0x200000,0x11002c00,0x7c00120,
+0x220405,0x11002c00,0x7c00120,0x230402,0x11002c00,0x7c00120,0x250402,0x11002c00,0x7c00120,0x250405,0x11002c19,0x7c00100,0x250400,0x11002c19,0x7c00100,0x250401,
+0x11002d00,0x4000000,0x100006,0x11002d00,0x4000000,0x200006,0x11002d19,0x7c00100,0x220402,0x11002d19,0x7c00100,0x230400,0x11002d19,0x7c00100,0x250402,0x11002e00,
+0x24000000,0x200000,0x11002e00,0x24000020,0x200000,0x11002e00,0x24000020,0x200001,0x11002e00,0x24000020,0x14200000,0x11002f00,0x24000020,0x200000,0x11002f00,0x24000020,
+0x200001,0x11002f00,0x24000020,0x200002,0x11002f00,0x24000020,0xf00000,0x11002f00,0x24000020,0x1600000,0x11002f00,0x24000022,0x1600000,0x11003000,0x24000000,0x200000,
+0x11003000,0x24000000,0x14200000,0x11003000,0x24000020,0x200000,0x11003000,0x24000020,0x810000,0x11003000,0x24000020,0x1410000,0x11003100,0x24000000,0x200000,0x11003200,
+0x24000000,0x200000,0x11003300,0x4000000,0x100003,0x11003400,0x24000000,0x100000,0x11003400,0x24000000,0x200000,0x11003500,0x24000000,0x200000,0x11003600,0x24000000,
+0x200000,0x11003600,0x24000000,0x14200000,0x11003600,0x24000020,0x200000,0x11003700,0x24000000,0x200000,0x11003700,0x24000000,0x4200000,0x11003700,0x24000000,0x4e00000,
+0x11003700,0x24000000,0x14200000,0x11003700,0x24000000,0x14e00000,0x11003700,0x24000000,0x96800000,0x11003700,0x24000020,0x4200000,0x11003800,0x4000000,0x100000,0x11003800,
+0x24000000,0x200000,0x11003800,0x24000000,0xb00000,0x11003800,0x24000000,0x1710000,0x11003800,0x24000000,0x4200000,0x11003800,0x24000000,0x4e00000,0x11003800,0x24000000,
+0x14200000,0x11003800,0x24000000,0x14b00000,0x11003800,0x24000000,0x14e00000,0x11003800,0x24000000,0x96800000,0x11005003,0x7c00100,0x220402,0x11005013,0x2802500,0x962460,
+0x11005013,0x4000020,0x200005,0x11005013,0x7c00100,0x2633801,0x11005013,0x7c00100,0x2633802,0x11005013,0x7c00100,0x2633805,0x11005019,0x7c00100,0x220402,0x11005100,
+0x24000000,0x810000,0x11005100,0x24000000,0x1410000,0x11005102,0x7000100,0x230408,0x11005102,0x7c00100,0x230404,0x11005102,0x7c00100,0x230407,0x11005102,0x7c00100,
+0x230408,0x11005102,0x7c00100,0x230409,0x11005201,0x2802400,0x962460,0x11005500,0x80000,0x1e18820,0x11005502,0x7000100,0x230408,0x11005502,0x7c00100,0x230404,
+0x11005502,0x7c00100,0x230407,0x11005502,0x7c00100,0x230408,0x11005502,0x7c00100,0x230409,0x11005667,0x1000,0,0x11020200,0x80004,0x418820,0x11020200,
+0x4000000,0x100006,0x11020200,0x4000000,0x10000f,0x11020200,0x4000400,0x100002,0x11020200,0x4000400,0x500002,0x11020200,0x6800c00,0x101000,0x11020200,0x24000000,
+0x100000,0x11020200,0x24000000,0x1400000,0x11020200,0x24000000,0x1500000,0x11020200,0x24000000,0x1600000,0x11020200,0x24000000,0x14200000,0x11020200,0x24000020,0x100000,
+0x11020200,0x24000020,0x1600000,0x11020219,0x7c00100,0x12040f,0x11020219,0x7c00100,0x220400,0x11020219,0x7c00100,0x220401,0x11020219,0x7c00100,0x250400,0x11020319,
+0x7c00100,0x220400,0x11020319,0x7c00100,0x220401,0x11020319,0x7c00100,0x220402,0x11020319,0x7c00100,0x250400,0x11020319,0x7c00100,0x250402,0x11020319,0x7d00100,
+0x220402,0x11020419,0x7c00100,0x220401,0x11020519,0x7c00100,0x220400,0x11020600,0x4000400,0x100002,0x11020600,0x4000400,0x200400,0x11020600,0x7c00500,0x130400,
+0x11020600,0x7c00d00,0x130400,0x11020701,0x2802400,0x962460,0x11020701,0x2802400,0x962461,0x11020701,0x2802400,0xc62460,0x1102080e,0x7c00100,0x220400,0x1102080e,
+0x7c00100,0x250400,0x11020908,0x7c00100,0x220400,0x11020908,0x7c00100,0x220401,0x11020908,0x7c00100,0x250400,0x11020908,0x7c00100,0x250401,0x11022800,0x24000000,
+0x100000,0x11022800,0x24000000,0x200000,0x11022800,0x24000000,0x200002,0x11022800,0x24000000,0x401000,0x11022800,0x24000000,0xf00002,0x11022800,0x24000000,0xf0ac02,
+0x11022800,0x24000000,0x1500000,0x11022800,0x24000002,0x100000,0x11022800,0x24000002,0x370000,0x11022800,0x24000002,0x470000,0x11022800,0x24000006,0x400000,0x11022800,
+0x24000008,0x1710000,0x11022800,0x24000008,0x1712c00,0x11022800,0x24000020,0x100000,0x11022800,0x24000020,0x1500000,0x11022800,0x24000020,0x1500002,0x11022900,0x4000000,
+0x10000e,0x11022900,0x4000000,0x10000f,0x11022919,0x7c00100,0x12040f,0x11022c00,0x4000000,0x100002,0x11022c00,0x4000000,0x1500002,0x11022c00,0x4000000,0x1600002,
+0x11022c00,0x4000000,0x1410000f,0x11022c00,0x7c00120,0x120405,0x11022c0e,0x7c00100,0x250401,0x11022c19,0x7c00100,0x150401,0x11022d00,0x4000000,0x100006,0x11022d00,
+0x4000000,0x200006,0x11022d19,0x7c00100,0x120402,0x11022d19,0x7c00100,0x150402,0x11022e00,0x24000000,0x200000,0x11022e00,0x24000020,0x100000,0x11022e00,0x24000020,
+0x14100000,0x11022f00,0x24000020,0x100000,0x11022f00,0x24000020,0x100001,0x11022f00,0x24000020,0x100002,0x11023000,0x24000000,0x100000,0x11023300,0x4000000,0x100002,
+0x11023300,0x4000000,0x100003,0x11023300,0x4000100,0x120403,0x11023300,0x4000100,0x150403,0x11023300,0x4000100,0x14150403,0x11023400,0x24000000,0x100000,0x11023500,
+0x24000000,0x100000,0x11023600,0x24000000,0x100000,0x11023600,0x24000020,0x100000,0x11023600,0x24000020,0x14100000,0x11023700,0x24000000,0x4100000,0x11023700,0x24000000,
+0x4e00000,0x11023700,0x24000000,0x14100000,0x11023700,0x24000000,0x14e00000,0x11023700,0x24000020,0x100000,0x11023700,0x24000020,0x4100000,0x11023700,0x24000020,0x14100000,
+0x11023800,0x4000000,0x100000,0x11023800,0x24000000,0x200000,0x11024e67,0,0,0x11025600,0x4000000,0x100000,0x11042a00,0x4000000,0x1600000,0x11045700,
+0x4000000,0x20000a,0x11045700,0x4000020,0x20000a,0x11045712,0x7c00100,0xe3040a,0x11045712,0x7c80100,0xe3040a,0x11045716,0x7c00100,0xe30c0a,0x11045716,0x7c00100,
+0x2530c0a,0x11063d00,0x4000001,0x445811,0x11065700,0x4000000,0x810011,0x11065700,0x4000000,0xe00011,0x11065700,0x4000000,0x1410011,0x11065700,0x4000000,0x1500011,
+0x11065700,0x4000000,0x1600011,0x11065700,0x4000006,0xe70011,0x11065700,0x4000008,0xe00011,0x11065700,0x4000008,0xe02c11,0x11065700,0x4000010,0x871411,0x11065700,
+0x4000010,0x1201411,0x11065700,0x4000010,0x1271011,0x11065700,0x4000020,0xe00011,0x11065700,0x4000400,0xe00011,0x11065700,0x4000420,0xe00011,0x11065700,0x6800000,
+0xe01c11,0x11065700,0x6800040,0xe29811,0x11065700,0xc000010,0x80ac11,0x11065700,0xc000010,0xb48011,0x11065719,0x7c00100,0xe20411,0x11065719,0x7c00100,0xe50411,
+0x11065719,0x7c00140,0xe20411,0x11065719,0x7c00140,0xe50411,0x11080100,0x6800000,0x201c00,0x11080100,0x68000c0,0x19329800,0x11080100,0x24000000,0x200000,0x11080100,
+0x24000000,0x810000,0x11080100,0x24000000,0x1410000,0x11080100,0x24000000,0x1500000,0x11080100,0x24000000,0x1600000,0x11080100,0x24000000,0x1b00000,0x11080100,0x24000000,
+0x2410000,0x11080100,0x24000000,0x18200000,0x11080100,0x24000006,0xd70000,0x11080100,0x24000008,0x1713c00,0x11080100,0x24000008,0x1714000,0x11080100,0x24000010,0x1001400,
+0x11080100,0x24000010,0x1071000,0x11080100,0x24000010,0x1071400,0x11080100,0x24000020,0x200000,0x11080100,0x24000020,0x400000,0x11080100,0x24000020,0x1600000,0x11080100,
+0x24000400,0x200000,0x11080100,0x24000420,0x200000,0x11080100,0x2c000010,0xb48000,0x11080100,0x2c000010,0x100ac00,0x11080100,0x44000001,0x1a45800,0x11080119,0x7c00100,
+0x220400,0x11080119,0x7c00100,0x250400,0x11080119,0x7c001c0,0x220400,0x11080119,0x7c001c0,0x250400,0x11080200,0x4000400,0x200002,0x11080200,0x24000000,0x200000,
+0x11080200,0x24000000,0x1500000,0x11080200,0x24000000,0x1600000,0x11080200,0x24000020,0x200000,0x110a1e12,0x7c00100,0x2130480,0x110a1e12,0x7c80100,0x2130480,0x110a3000,
+0x24000000,0x34e00000,0x110a3000,0x24100000,0x810001,0x110a3000,0x24100000,0x1410001,0x110a3700,0x24000000,0x34200000,0x110a3d00,0x4000000,0xe00000,0x110a3d00,0x4000000,
+0xe00002,0x110a3d00,0x24000000,0xe00000,0x110a3d11,0x7c00300,0xe30000,0x110a3d11,0x7c00900,0x1230400,0x110a3d12,0x2802400,0x962460,0x110a3e14,0x7c00100,0xe30000,
+0x110a3e14,0x7c00100,0xe30001,0x110a3e14,0x7c00100,0x2530000,0x110a3e14,0x7c00900,0x1230000,0x110a3e14,0x7c00900,0x1230001,0x110a3f16,0x7c00100,0xe30c00,0x110a3f16,
+0x7c00100,0xe30c01,0x110a3f16,0x7c00100,0x2530c00,0x110a3f16,0x7c00900,0x1230c00,0x110a3f16,0x7c00900,0x1230c01,0x110a4005,0x7c00100,0xe30400,0x110a4112,0x7c00100,
+0xe30402,0x110a4112,0x7c80100,0xe30402,0x110a4400,0x4000000,0xe00000,0x110a4412,0x4000000,0xe00002,0x110a4412,0x4000000,0xe00003,0x110a4416,0x4000000,0xe00c03,
+0x110a4500,0x4000000,0xe0000d,0x110a4516,0x4000000,0xe00c0d,0x110a4711,0x7c40300,0xe30000,0x110a4f11,0x7c00300,0xe30001,0x110a4f11,0x7c40300,0xe30000,0x110a5300,
+0x4000000,0x810010,0x110a5300,0x4000000,0xe00002,0x110a5300,0x4000000,0xe00010,0x110a5300,0x4000000,0x1410010,0x110a5300,0x4000002,0xe70010,0x110a5300,0x4000008,
+0x810010,0x110a5300,0x4000008,0x1410010,0x110a5300,0x6800000,0xe01c02,0x110a5300,0x6800000,0xe01c10,0x110a5400,0x4000000,0x81000c,0x110a5400,0x4000000,0xe0000c,
+0x110a5400,0x4000000,0x141000c,0x110a5400,0x4000000,0x150000c,0x110a5400,0x4000000,0x160000c,0x110a5400,0x4000002,0xe7000c,0x110a5400,0x4000010,0x87140c,0x110a5400,
+0x4000010,0xe7000c,0x110a5400,0x4000010,0x120140c,0x110a5400,0x4000010,0x127100c,0x110a5400,0x4000020,0xe0000c,0x110a5400,0x4000026,0xe7000c,0x110a5400,0xc000010,
+0x80ac0c,0x110a5400,0xc000010,0xb4800c,0x11400c0c,0x4000010,0xb00000,0x11400c0c,0x4000010,0x1071400,0x11400c0c,0xc000010,0xb48000,0x11400c16,0x7c00900,0x230400,
+0x11400f40,0xc000010,0x448000,0x11400f54,0xc000010,0x448000,0x11401d89,0x4000000,0x200000,0x11403dbf,0x4000000,0xe00000,0x114457b4,0x4000004,0x120000a,0x114457b4,
+0x4000008,0x81000a,0x114457b4,0x4000008,0x141000a,0x114457b4,0x4000010,0x87000a,0x114457b4,0xc000010,0x84800a,0x114457bd,0x3802500,0x126246a,0x114457bd,0x7c00d00,
+0x2530c0a,0x114a3db4,0x24000000,0x810000,0x114a3db4,0x24000000,0x1410000,0x114a3db4,0x24000008,0x810000,0x114a3db4,0x24000008,0x1410000,0x114a3db4,0x24000010,0x870000,
+0x114a3db4,0x2c000010,0x848000,0x114a3dba,0x4000000,0xe00000,0x114a3dba,0x24000000,0xe00000,0x114a3dba,0x24000002,0x1200000,0x114a3dba,0x24000002,0x14e00000,0x114a3dba,
+0x24000008,0x810000,0x114a3dba,0x24000008,0x1410000,0x114a3dbd,0x7c00900,0x930c00,0x114a3dbd,0x7c00900,0xe30c00,0x114a3dbf,0x7c00300,0xe30000,0x114a3ebd,0x7000400,
+0x1200c02,0x114a3fb4,0x4000004,0x1200000,0x114a3fbd,0x7c00d00,0x2530c00,0x114a42bf,0x4000000,0xe00000,0x114a42bf,0x4000000,0xe0000f,0x114a44bf,0x4000000,0xe00002,
+0x114a44bf,0x4000000,0xe00003,0x114a44bf,0x4000000,0x14e00003,0x114a45bf,0x4000000,0xe00002,0x114a45bf,0x4000000,0xe0000d,0x1180090a,0x2802400,0x962460,0x11800c1e,
+0x2802100,0x962460,0x11800c1e,0x2802500,0x962460,0x11800f27,0x2802400,0x962460,0x11800f34,0x2802400,0x962460,0x11820700,0x2802400,0x962460,0x11820700,0x2802500,
+0x962460,0x118a3dc0,0x2802400,0x962460,0x118a3ebd,0x2802400,0x962460,0x11c00904,0x2802400,0x962460,0x11c00908,0x2802400,0x962460,0x11c00c20,0xc000010,0xb48000,
+0x11c00c23,0x6800000,0x1329800,0x11c00f6d,0x6800000,0x1329800,0x11c01072,0x6800000,0x1329800,0x11c01176,0x6800000,0x1329800,0x11c0127a,0x6800000,0x1329800,0x11c0147e,
+0x4000000,0x200000,0x11c0147e,0x6800000,0x1329800,0x11c01682,0x6800000,0x1329800,0x11c051fa,0x7c00100,0x230408,0x20000067,0x1000,0,0x20000b13,0x2802400,
+0x962460,0x20000b13,0x2802500,0x962460,0x20001b27,0x2802100,0x962460,0x20001b27,0x2802100,0x962461,0x20001b27,0x2802400,0x962460,0x20001b27,0x2806400,0x962460,
+0x20001b27,0x2902100,0x962462,0x20001b27,0x4000000,0x200000,0x20001b27,0x4000000,0x400000,0x20001b27,0x4000000,0x500000,0x20001b27,0x4000000,0x810000,0x20001b27,
+0x4000000,0xb00000,0x20001b27,0x4000000,0xc0000b,0x20001b27,0x4000000,0x1410000,0x20001b27,0x4000010,0xb00000,0x20001b27,0x4000010,0xc00000,0x20001b27,0x6800000,
+0x1329800,0x20001b27,0x6800100,0x462540,0x20001b27,0x6800400,0x962540,0x20001b27,0x7c00100,0x230400,0x20001b27,0x7c00100,0x230401,0x20002619,0x7c00100,0x220401,
+0x20002a00,0x4000000,0x1600000,0x20004b67,0,0x1900000,0x20004c67,0,0x1900000,0x20004d67,0,0x1900000,0x20006d67,0x1000,0,0x20006e67,
+0x1000,0,0x20026d67,0,0,0x20026e67,0,0,0x200a4a12,0x7c00100,0x1f304c1,0x200a4a12,0x7c00100,0x20304e1,0x21005600,0x4000000,
+0x700000,0x21022a00,0x4000000,0x1600000,0x30000419,0x7c00100,0x220400,0x30000419,0x7c00100,0x220401,0x30000419,0x7c00100,0x250400,0x30000419,0x7c00100,0x250401,
+0x30000519,0x7c00100,0x220400,0x30000600,0x4000400,0x200400,0x30000600,0x7c00500,0x230400,0x30000605,0x4000400,0x200400,0x3000080e,0x7c00100,0x220400,0x30000908,
+0x2000,0x962460,0x30000908,0x7c00100,0x220400,0x30000908,0x7c00100,0x220401,0x30000908,0x7c00100,0x250400,0x30000908,0x7c00100,0x250401,0x30000a03,0x4000006,
+0x400400,0x30000c02,0x4000000,0x200000,0x30000c02,0x7c00100,0x230400,0x30000d22,0x2802100,0x962460,0x30000d22,0x2802400,0x962460,0x30000d22,0x2802500,0x962460,
+0x30000d22,0x4000000,0x200000,0x30000d22,0x4000010,0x200000,0x30000d22,0x7c00100,0x230400,0x30000d22,0xc000010,0x248000,0x30000d22,0x80000000,0x218960,0x30000e25,
+0x2802500,0x962460,0x30000e25,0x7c00100,0x230400,0x30001821,0x2802100,0x962460,0x30001821,0x2806400,0x962460,0x30001821,0x4000000,0x200000,0x30001821,0x6800100,
+0x962540,0x30001821,0x6800100,0x962541,0x30001821,0x7c00100,0x230400,0x30001b27,0x2802100,0x962460,0x30001b27,0x2802400,0x962460,0x30001b27,0x4000000,0x200000,
+0x30001b27,0x4000000,0x400000,0x30001b27,0x7c00100,0x230400,0x30001c1c,0x2802100,0x1862460,0x30001c1c,0x2802400,0x1862460,0x30001c1c,0x2806400,0x1862460,0x30001c1c,
+0x4000000,0x200000,0x30001c1c,0x6800100,0x1862400,0x30001c1c,0x6800100,0x1862540,0x30001c1c,0x7c00100,0x1830000,0x30001c1c,0x7c00100,0x1830001,0x30001c1c,0xc000010,
+0x448000,0x30001f0b,0x4000000,0x200000,0x30001f0b,0x4000010,0x200000,0x30001f0b,0x4000010,0x400000,0x30001f0b,0x6800000,0x200000,0x30001f0b,0x7c00100,0x230400,
+0x30001f0b,0xc000010,0x248000,0x30002006,0x7c00100,0x250400,0x30002128,0x4000000,0x200000,0x30002128,0x7c00100,0x230400,0x30002128,0xc000010,0x248000,0x3000221d,
+0x4000000,0x810000,0x3000221d,0x4000000,0x1410000,0x3000221d,0x4000001,0x445800,0x3000221d,0x7c00100,0x230400,0x30002300,0x4000010,0x400000,0x30002320,0x7c00100,
+0x230400,0x30002417,0x2802100,0x1862460,0x30002417,0x2802400,0x1862460,0x30002417,0x2806400,0x1862460,0x30002417,0x2882000,0x1862460,0x30002417,0x4000000,0x200000,
+0x30002417,0x4000000,0x400000,0x30002417,0x4000000,0x1600000,0x30002417,0x4000010,0x400000,0x30002417,0x4000010,0x1200000,0x30002417,0x6800000,0x1329800,0x30002417,
+0x6800100,0x1862540,0x30002417,0x7c00100,0x1830000,0x30002417,0x7d00100,0x1830000,0x3000251b,0x80000,0xc18820,0x3000251b,0x2802100,0x962460,0x3000251b,0x3c02100,
+0x962460,0x3000251b,0x4000000,0x200000,0x3000251b,0x4000006,0x500000,0x3000251b,0x4000010,0x400000,0x3000251b,0x4000010,0xb70000,0x3000251b,0x4000800,0x200000,
+0x3000251b,0x6800000,0x1329800,0x3000251b,0x7c00100,0x230400,0x3000251b,0x7c00900,0x230400,0x3000251b,0xc000010,0xb48000,0x3000251b,0x12882000,0x962460,0x30002800,
+0x24000000,0x200000,0x30002800,0x2c000010,0x1248002,0x30002800,0x2c000010,0x15248002,0x30002a00,0x4000000,0x1600000,0x30002b01,0x2000,0x962460,0x30002b01,0x2000,
+0x8962460,0x30002c00,0x4000000,0x200000,0x30002c00,0x7c00100,0x14220405,0x30002d19,0x7c00100,0x250400,0x30002e00,0x24000000,0x200000,0x30003000,0x24000000,0x200000,
+0x30003000,0x24000000,0x4200000,0x30003100,0x24000000,0x200000,0x30003600,0x24000000,0x200000,0x30003700,0x24000000,0x4200000,0x3000392e,0x24000000,0x200000,0x30005013,
+0x7c00100,0x2633801,0x30005600,0,0x918820,0x30020600,0x4000400,0x500400,0x30020701,0x2802400,0x962460,0x30020701,0x2802400,0xc62460,0x300a3a11,0x4020000,
+0xe00000,0x300a3a11,0x4020000,0xe00002,0x300a3b11,0x4020000,0xe00002,0x300a3c00,0x4008000,0xe00000,0x300a3c00,0x4010000,0xe00000,0x300a3d11,0x7c00300,0xe30002,
+0x300a4305,0x7c00100,0xe30400,0x300a4611,0x7c40300,0xe30000,0x300a4829,0x7c00100,0xe30400,0x300a4829,0x7c00900,0x1230400,0x300a4929,0x4000000,0xe00000,0x3040258f,
+0x4000010,0x400000,0x3040258f,0x4000010,0xb70000,0x3040258f,0xc000010,0xb48000,0x304028af,0x4000001,0xc41c0b,0x304a3dbf,0x4000000,0xe00000,0x30800c1e,0x2802100,
+0x962460,0x30c01c87,0x6800000,0x1329800,0x3100080e,0x7c00120,0x220402,0x3100080e,0x7c00120,0x250402,0x31005167,0x1000,0,0x3100581e,0x4000000,0x200000,
+0x3100581e,0x7c00100,0x230400,0x3100590d,0x7c00100,0x230400,0x31005a09,0x7c00100,0x220400,0x31005a09,0x7c00100,0x250400,0x31005b00,0x4000000,0x200000,0x31005c00,
+0x80000,0x918820,0x31005c00,0x2802000,0x962460,0x31005c00,0x2802400,0x962460,0x31005c00,0x4000000,0x200000,0x31005c00,0x4000000,0x200001,0x31005c00,0x6800000,
+0x962540,0x31005c00,0x6800400,0x962540,0x31005c01,0x2802400,0x962460,0x31005d00,0x4000020,0x200005,0x31005d00,0x6800020,0x1329805,0x31005d00,0x7c00120,0x220405,
+0x31005d00,0x7c00120,0x250405,0x31006000,0x82000,0x8962460,0x31006000,0x180000,0x918820,0x310a5e11,0x7c40300,0xe30000,0x310a5f11,0x7c00300,0xe30001,0x32000419,
+0x7c00100,0x250400,0x3200080e,0x4000020,0x200000,0x3200080e,0x7c00100,0x220400,0x3200080e,0x7c00100,0x250400,0x32000908,0x7c00100,0x220400,0x32000908,0x7c00100,
+0x250400,0x32000c02,0x7c00100,0x230400,0x32000e25,0x7c00100,0x230400,0x32001d0c,0x7c00100,0x230400,0x32002800,0x80000,0x1e18820,0x32002800,0x80020,0x218820,
+0x32002800,0x4000001,0x445802,0x32002800,0x24000000,0x200000,0x32002800,0x24000000,0x200002,0x32002800,0x24000020,0x200000,0x32002800,0x2c000010,0x1248002,0x32002919,
+0x7c00100,0x22040f,0x32002a00,0x4000000,0x1600000,0x32002b01,0x2000,0x962460,0x32002b01,0x2802000,0x962460,0x32002b01,0x2802020,0x962460,0x32002c00,0x4000000,
+0x200000,0x32002c00,0x4000020,0x200000,0x32002c00,0x4000020,0x200005,0x32002c00,0x7c00120,0x220405,0x32002c00,0x7c00120,0x250405,0x32002e00,0x24000020,0x200000,
+0x32002f00,0x24000020,0x200000,0x32003000,0x24000000,0x200000,0x32003000,0x24000020,0x200000,0x32003500,0x24000000,0x200000,0x32003600,0x24000020,0x200000,0x32003600,
+0x24000020,0x14200000,0x32003700,0x24000000,0x200000,0x32003700,0x24000000,0x4100000,0x32003700,0x24000000,0x4200000,0x32003700,0x24000000,0x14200000,0x32003800,0x24000000,
+0x810000,0x32003800,0x24000000,0x1410000,0x32005102,0x4000000,0x1500008,0x32005502,0x7c00100,0x230400,0x32006108,0x7c00100,0x220400,0x32006108,0x7c00100,0x250400,
+0x3200622a,0x2802100,0x962460,0x3200622a,0x2806000,0x962460,0x3200622a,0x7c00100,0x230400,0x3200632b,0x2802100,0x962460,0x3200632b,0x2806000,0x962460,0x3200632b,
+0x7c00100,0x230400,0x3200642c,0x2802100,0x962460,0x3200642c,0x7c00100,0x230400,0x3200652d,0x2802100,0x962460,0x3200652d,0x7c00100,0x230400,0x32006600,0x24000020,
+0x200000,0x32006700,0x24000020,0x200000,0x32006800,0x24000020,0x200000,0x32006800,0x24000020,0x14200000,0x32006900,0x24000020,0x200000,0x32006900,0x24000020,0x810000,
+0x32006900,0x24000020,0x1410000,0x32006a00,0x24000020,0x200000,0x32006a00,0x24000020,0x200001,0x32006a00,0x24000020,0x200002,0x32020701,0x2882000,0xc62460,0x32023300,
+0x4000000,0x100000,0x32026c01,0x12882000,0x962460,0x32026c01,0x12882000,0x8962460,0x32065700,0x4000000,0x810011,0x32065700,0x4000000,0x1410011,0x32086600,0x24000020,
+0x810000,0x32086600,0x24000020,0x1410000,0x32086900,0x24000020,0x810000,0x32086900,0x24000020,0x1410000,0x320a3600,0x24000020,0x34200000,0x320a3d11,0x7c00100,0x1230400,
+0x320a3e14,0x7c00100,0xe30010,0x320a3e14,0x7c00100,0x2530000,0x320a3f16,0x7c00100,0xe30c10,0x320a4400,0x4000000,0xe00003,0x320a4929,0x4000000,0xe00000,0x320a4f11,
+0x7c00300,0xe30001,0x320a6b16,0x7c00100,0x2530c00,0x3240638b,0xc000010,0x448000,0x324a3dc2,0x4000000,0x14e00000,0x324a3dc2,0x7c00100,0x1230400,0x324a3fbd,0x4000002,
+0x1200c00,0x324a53ba,0x24000000,0xe00000,0x32820701,0x2802000,0x962460,0x40000419,0x7c00100,0x220400,0x40000519,0x7c00100,0x220400,0x40000600,0x4000400,0x200400,
+0x4000080e,0x7c00100,0x220400,0x4000080e,0x7c00100,0x250400,0x4000080e,0x7c00100,0x250402,0x40000c02,0x2802100,0x962460,0x40000c02,0x2802400,0x962460,0x40000c02,
+0x2802500,0x962460,0x40000c02,0x4000000,0x200000,0x40000c02,0x4000000,0x1071400,0x40000c02,0x7c00100,0x230400,0x40000c02,0x80000000,0x218960,0x40000d22,0x7c00100,
+0x230400,0x40000f0a,0x7c00100,0x230400,0x40001004,0x7c00100,0x230400,0x40001110,0x2802100,0x962460,0x40001110,0x6800100,0x962540,0x4000120f,0x2802100,0x962460,
+0x4000120f,0x4000000,0x1600000,0x4000120f,0x7c00100,0x230400,0x4000131f,0x7c00100,0x230400,0x40001423,0x4000000,0x200000,0x40001423,0x4000000,0x1600000,0x40001615,
+0x2802400,0x962460,0x40001615,0x7c00100,0x230400,0x40002417,0x2802400,0x1862460,0x40002417,0x4000000,0x200000,0x40002800,0x6800000,0x201c00,0x40002800,0x24000002,
+0x200000,0x40002c00,0x4000000,0x200002,0x40003000,0x24000000,0x14200000,0x40003000,0x24000020,0x200000,0x40003700,0x24000000,0x200000,0x40003700,0x24000000,0x4200000,
+0x40003700,0x24000000,0x14200000,0x40005a09,0x7c00100,0x220400,0x40005a09,0x7c00100,0x250400,0x40005d00,0x7c00120,0x220405,0x40006f30,0x2802100,0x962460,0x40006f30,
+0x2802400,0x962460,0x40006f30,0x4000000,0x200000,0x40006f30,0x6800000,0x1329800,0x40006f30,0x6800100,0x962540,0x40006f30,0x7c00100,0x230400,0x40006f30,0xc000010,
+0xb48000,0x40007034,0x7c00100,0x1830000,0x40007117,0x4000000,0x200000,0x40007208,0x7c00100,0x220400,0x4000720e,0x7c00100,0x220400,0x4000720e,0x7c00500,0x22040e,
+0x4000720e,0x7c00500,0x22040f,0x40007219,0x7c00100,0x220400,0x40007219,0x7c00500,0x220400,0x40007219,0x7c00500,0x22040e,0x40007219,0x7c00500,0x22040f,0x40007300,
+0x24000000,0x200000,0x40007300,0x24000000,0x14200000,0x40007400,0x4000000,0x200000,0x40007531,0x7c00100,0x230400,0x40007631,0x7c00100,0x230400,0x40007835,0x4000010,
+0x400000,0x40007835,0x7c00100,0x230400,0x40007933,0x7c00100,0x230400,0x40007a32,0x6800000,0x1329800,0x40007a32,0x7c00100,0x230400,0x40007b2f,0x7c00100,0x230400,
+0x40007c00,0x4000000,0x200000,0x40020701,0x2802400,0x962460,0x40020701,0x2802400,0xc62460,0x40023300,0x4000000,0x200000,0x40027d01,0x12882000,0x962460,0x400a3700,
+0x24000000,0x34200000,0x400a3700,0x24000000,0x34e00000,0x400a4400,0x4000000,0xe0000d,0x400a4412,0x4000000,0xe00002,0x400a4412,0x4000000,0xe00003,0x400a4500,0x4000000,
+0xe0000d,0x400a5300,0x4000000,0x810010,0x400a5300,0x4000000,0x1410010,0x404077fc,0x4000000,0x200000,0x404077ff,0x4000000,0x200000,0x404077ff,0x4000000,0x400000,
+0x40c0147e,0x4000000,0x200000,0x40c051fa,0x4000000,0x200000,0x41000419,0x7c00100,0x220400,0x41000419,0x7c00100,0x250400,0x4100080e,0x7c00100,0x220400,0x4100080e,
+0x7c00100,0x250400,0x41000908,0x7c00100,0x220400,0x41000908,0x7c00100,0x250400,0x41000b13,0x2802000,0x962460,0x41000b13,0x2802100,0x962460,0x41000b13,0x4000000,
+0xb00000,0x41000c02,0x2802100,0x962460,0x41000c02,0x4000000,0x1500000,0x41000c02,0xc000010,0xb48000,0x41000f0a,0x7c00100,0x230400,0x41001004,0x7c00100,0x230400,
+0x41001423,0x7c00100,0x230400,0x41001b27,0x4000000,0x500000,0x41001d0c,0x7c00100,0x230400,0x41001d0c,0x7c00100,0x23040f,0x41001f0b,0x2802400,0x962460,0x41001f0b,
+0x4000000,0x200000,0x41001f0b,0x7c00100,0x230400,0x41002800,0x24000000,0x200000,0x41002800,0x24000000,0x400000,0x41002919,0x7c00100,0x22040e,0x41002a00,0x4000000,
+0x1600000,0x41002b01,0x2802020,0x962460,0x41002c00,0x4000000,0x200000,0x41002c00,0x7c00120,0x220405,0x41003000,0x24000000,0x200000,0x41003700,0x24000000,0x4200000,
+0x41003700,0x24000000,0x14200000,0x41003700,0x24000000,0x14e00000,0x41005d00,0x7c00120,0x220405,0x41006600,0x24000020,0x200000,0x41006600,0x24000020,0x810000,0x41006600,
+0x24000020,0x1410000,0x41007208,0x7c00100,0x22040f,0x41007219,0x7c00100,0x220400,0x41007300,0x24000000,0x200000,0x41007e0e,0x2802000,0x962460,0x41007e0e,0x4000000,
+0x200000,0x41007f0e,0x4000000,0x200000,0x41007f0e,0x7c00100,0x230400,0x41008002,0x7c00100,0x230400,0x41008137,0x2802100,0x962460,0x41008137,0x4000000,0x200000,
+0x41008137,0x6800100,0x962540,0x41008137,0x7c00100,0x230400,0x41008301,0x2802000,0x962460,0x41008407,0x4000000,0x200000,0x41008407,0x4000000,0x400000,0x41008407,
+0x4000000,0xb00000,0x41008407,0x7c00100,0x220400,0x41008407,0x7c00100,0x250400,0x4100850b,0x7c00100,0x230400,0x4100860b,0x4000000,0x200000,0x4100860b,0x7c00100,
+0x230400,0x4100870c,0x7c00100,0x220400,0x41008838,0x7c00100,0x220400,0x41008838,0x7c00100,0x250400,0x41008939,0x2802000,0x962460,0x41008939,0x2802100,0x962460,
+0x41008939,0x2806000,0x962460,0x41008939,0x4000000,0x200000,0x41008939,0x4000000,0x400000,0x41008939,0x7c00100,0x230400,0x41008939,0xc000010,0x448000,0x41008a00,
+0x4000400,0x200400,0x41008b3b,0x4000000,0x1800000,0x41008b3b,0x6800000,0x1329800,0x41008b3b,0x7c00100,0x1830000,0x41008b3b,0x7e00100,0x1830000,0x41008c3d,0x4000010,
+0x400000,0x41008c3d,0x7c00100,0x230400,0x41008d0e,0x7c00100,0x22040f,0x41008d19,0x7c00100,0x220400,0x41008d19,0x7c00100,0x22040f,0x41008e00,0x24000000,0x200000,
+0x41008e00,0x24000000,0x400000,0x41008e00,0x24000000,0x1710000,0x41008e00,0x24000006,0x400000,0x41008f3a,0x2802100,0x962460,0x41008f3a,0x2806000,0x962460,0x41008f3a,
+0x4000000,0x200000,0x41008f3a,0x6800100,0x962540,0x41008f3a,0x7c00100,0x230400,0x4100903c,0x7c00100,0x230400,0x4100903c,0x7c00100,0x23040f,0x41020701,0x2802000,
+0x962460,0x41020701,0x2802000,0xc62460,0x410a3700,0x24000000,0x34200000,0x410a3700,0x24000000,0x34e00000,0x410a4412,0x4000000,0xe00003,0x410a4711,0x7c40300,0xe30000,
+0x410a4f11,0x7c00300,0xe30001,0x410a9100,0x4000000,0x800010,0x410a9100,0x4000000,0x810010,0x410a9100,0x4000000,0x870010,0x410a9100,0x4000000,0xb00010,0x410a9100,
+0x4000000,0xf00010,0x410a9100,0x4000000,0x1001410,0x410a9100,0x4000000,0x1071010,0x410a9100,0x4000000,0x1071410,0x410a9100,0x4000000,0x1410010,0x41408ac5,0x4000400,
+0x200000,0x414a82bf,0x4000000,0xe00000,0x41808300,0x2802000,0x962460,0x41c0147e,0x6800000,0x1329800,0x50000419,0x7c00100,0x220400,0x50000419,0x7c00100,0x250400,
+0x5000080e,0x7c00100,0x220400,0x50000908,0x7c00100,0x220400,0x50000908,0x7c00100,0x250400,0x50000b13,0x2802500,0x962460,0x50000f0a,0x7c00100,0x230400,0x50001615,
+0x2802100,0x962460,0x50001615,0x7c00100,0x230400,0x50002b01,0x2802020,0x962460,0x50002c00,0x4000000,0x200000,0x50002c19,0x7c00100,0x220400,0x50002d19,0x7c00100,
+0x220400,0x50003000,0x24000000,0x200000,0x50003000,0x24000020,0x200000,0x50003700,0x24000000,0x4200000,0x50005d00,0x7c00120,0x220405,0x50005d00,0x7c00120,0x250405,
+0x50006108,0x7c00100,0x220400,0x50006108,0x7c00100,0x250400,0x50006600,0x24000020,0x200000,0x50007300,0x24000000,0x200000,0x50008301,0x2802400,0x962460,0x50008a00,
+0x7c00500,0x230400,0x50009257,0x2802400,0x962460,0x50009257,0x4000000,0x200000,0x50009257,0x4000010,0x1071400,0x50009257,0x6800000,0x1329800,0x50009257,0x7c00100,
+0x230400,0x50009257,0x7c00500,0x230400,0x50009257,0x7c00900,0x230400,0x50009257,0xc000010,0xb48000,0x5000933e,0x2802100,0x962460,0x5000933e,0x2802400,0x962460,
+0x5000933e,0x4000000,0x200000,0x5000933e,0x4000000,0x400000,0x5000933e,0x4000010,0x400000,0x5000933e,0x6800000,0x1329800,0x5000933e,0x6800100,0x962540,0x5000933e,
+0x6800100,0x962541,0x5000933e,0x6804400,0x962540,0x5000933e,0x7c00100,0x230400,0x5000933e,0x7c00100,0x230401,0x5000933e,0xc000010,0x448000,0x50009419,0x7c00100,
+0x220400,0x50009419,0x7c00100,0x250400,0x50009500,0x4000400,0x200400,0x5000965a,0x4000000,0x500000,0x5000965a,0x7c00100,0x230400,0x5000965a,0xc000010,0xb48000,
+0x5000975b,0x4000000,0x200000,0x5000975b,0x4000010,0x400000,0x5000975b,0x7c00100,0x230400,0x50009865,0x7c00100,0x230400,0x50009965,0x4000010,0x400000,0x50009965,
+0x7c00100,0x230400,0x50409abf,0x4000000,0x200000,0x5100080e,0x7c00100,0x220400,0x5100080e,0x7c00100,0x250400,0x51000c02,0x2802100,0x962460,0x51000c02,0x4000000,
+0x1500000,0x51000c02,0x4000020,0x200000,0x51000c02,0x7c00100,0x230400,0x51000f0a,0x7c00100,0x230400,0x51000f0a,0x7c00500,0x230400,0x51001110,0x2802100,0x962460,
+0x5100131f,0x2802100,0x962460,0x51001423,0x7c00100,0x230400,0x51001524,0x2802100,0x962460,0x51001524,0x4000000,0x200000,0x51001524,0x7c00100,0x230400,0x5100171a,
+0x2802100,0x962460,0x5100171a,0x4000000,0x200000,0x5100171a,0x4000000,0x1500000,0x5100171a,0x7c00100,0x230400,0x51001b27,0x4000000,0x200000,0x51001b27,0x4000000,
+0x400000,0x51001b27,0x4000000,0x500000,0x51001b27,0x7c00100,0x230400,0x51001c1c,0x2802100,0x1862460,0x51001c1c,0x2802500,0x1862460,0x51001c1c,0x2806400,0x1862460,
+0x51001c1c,0x4000000,0x1800000,0x51001c1c,0x6800000,0x1329800,0x51001c1c,0x6800100,0x1862400,0x51001c1c,0x6800100,0x1862540,0x51001c1c,0x6800500,0x1862400,0x51001c1c,
+0x7c00100,0x1830000,0x5100251b,0x7c00100,0x230400,0x51002619,0x7c00100,0x220400,0x51002619,0x7c00100,0x250400,0x51002800,0x80020,0x218820,0x51002c00,0x4000000,
+0x200000,0x51002d19,0x7c00100,0x230400,0x51003700,0x24000000,0x4200000,0x51003700,0x24000000,0x4e00000,0x51005201,0x2802400,0x962460,0x51005c00,0x4000000,0x200000,
+0x51006108,0x7c00100,0x220400,0x51006108,0x7c00100,0x250400,0x51006600,0x24000020,0x200000,0x51006600,0x24000020,0x810000,0x51006600,0x24000020,0x1410000,0x51007300,
+0x24000000,0x200000,0x51007300,0x24000020,0x200000,0x51008002,0x7c00100,0x230400,0x51008301,0x2802000,0x962460,0x51008301,0x2802400,0x962460,0x51008a00,0x7c00500,
+0x230400,0x51008e00,0x24000000,0x200000,0x51008e00,0x24000000,0x400000,0x51008e00,0x24000000,0x810000,0x51008e00,0x24000000,0x1400000,0x51008e00,0x24000000,0x1410000,
+0x51008e00,0x24000000,0x1710000,0x51008e00,0x24000002,0x200000,0x51008e00,0x24000500,0x230400,0x51008e00,0x2c000010,0xb48000,0x51009419,0x7c00100,0x220400,0x51009419,
+0x7c00100,0x22040e,0x51009419,0x7c00100,0x22040f,0x51009419,0x7c00100,0x250400,0x51009500,0x4000400,0x200400,0x51009500,0x7c00500,0x230400,0x51009519,0x7c00100,
+0x220400,0x51009519,0x7c00100,0x22040f,0x51009519,0x7c00100,0x230400,0x51009519,0x7c00100,0x250400,0x51009b71,0x2802100,0x962460,0x51009b71,0x6800000,0x1329800,
+0x51009b71,0x6800100,0x962540,0x51009b71,0x6804400,0x962540,0x51009b71,0x7c00100,0x230400,0x51009c52,0x2802100,0x962460,0x51009c52,0x2802400,0x962460,0x51009c52,
+0x2802d00,0x962460,0x51009c52,0x4000010,0x400000,0x51009c52,0x6800000,0x1329800,0x51009c52,0x6800100,0x962540,0x51009c52,0x7c00100,0x230400,0x51009c52,0xc000010,
+0x448000,0x51009d6d,0x6800000,0x1329800,0x51009d6d,0x7c00100,0x230400,0x51009d6d,0x7c00500,0x230400,0x51009d6d,0x7c00d00,0x230400,0x51009d6d,0xc000010,0x448000,
+0x51009e08,0x2802100,0x962460,0x51009f63,0x4000010,0x400000,0x51009f63,0x6800000,0x1329800,0x51009f63,0x7c00100,0x230400,0x51009f63,0x7c00900,0x230400,0x51009f63,
+0xc000010,0x448000,0x51009f63,0xc000010,0xb48000,0x5100a008,0x2000,0x962460,0x5100a008,0x2802400,0x962460,0x5100a008,0x4000000,0x200000,0x5100a008,0x7c00100,
+0x220400,0x5100a008,0x7c00100,0x230400,0x5100a008,0x7c00100,0x250400,0x5100a008,0x7c00500,0x230400,0x5100a16f,0x2806400,0x962460,0x5100a16f,0x6800000,0x1329800,
+0x5100a16f,0x6800100,0x962540,0x5100a16f,0x7c00100,0x230400,0x5100a16f,0xc000010,0x448000,0x5100a24f,0x2802100,0x962460,0x5100a24f,0x2802400,0x962460,0x5100a24f,
+0x6800000,0x1329800,0x5100a24f,0x7c00100,0x230400,0x5100a24f,0xc000010,0x448000,0x5100a36e,0x2802100,0x962460,0x5100a36e,0x4000000,0x200000,0x5100a36e,0x6800100,
+0x962540,0x5100a36e,0x6804400,0x962540,0x5100a36e,0x7c00100,0x230400,0x5100a442,0x2802100,0x962460,0x5100a442,0x4000000,0x200000,0x5100a442,0x6800000,0x1329800,
+0x5100a442,0x6800100,0x962540,0x5100a442,0x7c00100,0x230400,0x5100a442,0xc000010,0x448000,0x5100a500,0x4000000,0x200000,0x5100a600,0x4000000,0x200000,0x5100a601,
+0x2802000,0x962460,0x5100a76b,0x7c00100,0x230400,0x5100a868,0x7c00100,0x230400,0x5100a96c,0x4000000,0x200000,0x5100a96c,0x7c00100,0x230400,0x5100aa00,0x4000000,
+0x4e00000,0x5100ab00,0x4000000,0x4e00000,0x51086600,0x24000020,0x810000,0x51086600,0x24000020,0x1410000,0x510a4005,0x7c00100,0xe30400,0x510a4711,0x7c40300,0xe30000,
+0x510a7300,0x24000000,0x34200000,0x510aaa00,0x4000000,0x34e00000,0x5140a2f3,0x4000400,0x400000,0x514a82bf,0x4000000,0xe00000,0x51802bb1,0x2802000,0x962460,0x51c00908,
+0x2802400,0x962460,0x51c0a008,0x2802400,0x962460,0x52000f0a,0x2802100,0x962460,0x52000f0a,0x6800100,0x962540,0x52000f0a,0x7c00100,0x230400,0x52001004,0x4000000,
+0x1600000,0x52001b00,0x4000000,0x200000,0x52001c1c,0x2802100,0x1862460,0x52001c1c,0x6800100,0x1862400,0x52001c1c,0x6800500,0x1862400,0x52001e12,0x7c00100,0x2230500,
+0x52001e12,0x7c00100,0x2330520,0x52002128,0x4000002,0x400000,0x52002128,0x7c00100,0x230400,0x52002a00,0x4000000,0x1500000,0x52002a00,0x4000000,0x1600000,0x52002d00,
+0x4000000,0x200006,0x52003000,0x24000000,0x200000,0x52006108,0x7c00100,0x220400,0x52006108,0x7c00100,0x250400,0x52008301,0x2802400,0x962460,0x52008407,0x2802400,
+0x962460,0x52008407,0x7c00100,0x220400,0x52008407,0x7c00100,0x250400,0x52008b3b,0x6800000,0x1800000,0x52008b3b,0x7c00100,0x1830000,0x52008e00,0x24000000,0x400000,
+0x52009419,0x7c00100,0x250400,0x5200975b,0x4000000,0x200000,0x5200ac7e,0x2802000,0x962460,0x5200ac7e,0x2802100,0x962460,0x5200ac7e,0x2802400,0x962460,0x5200ac7e,
+0x4000010,0x200000,0x5200ac7e,0x7c00100,0x230400,0x5200ac7e,0xc000010,0x248000,0x5200ad28,0x7c00100,0x230400,0x5200ae6a,0x2802100,0x1862460,0x5200ae6a,0x2802400,
+0x962460,0x5200ae6a,0x2802400,0x1862460,0x5200ae6a,0x2806000,0x1862460,0x5200ae6a,0x4000000,0x1800000,0x5200ae6a,0x6800000,0x1329800,0x5200ae6a,0x6800100,0x1862400,
+0x5200ae6a,0x6800100,0x1862540,0x5200ae6a,0x7c00100,0x1830000,0x5200ae6a,0x7c00900,0x1830000,0x5200ae6a,0xc000010,0x1848000,0x5200b083,0x4000010,0x400000,0x5200b083,
+0x7c00100,0x230400,0x5200b083,0xc000010,0x448000,0x5200b182,0x2802400,0x962460,0x5200b182,0x4000000,0x200000,0x5200b182,0x4000010,0x400000,0x5200b182,0x7c00100,
+0x230400,0x5200b182,0xc000010,0x448000,0x5200b30a,0x2802400,0x962460,0x5200b30a,0x4000000,0x200000,0x5200b30a,0x7c00100,0x230400,0x5200b54e,0x2802100,0x962460,
+0x5200b54e,0x2802400,0x962460,0x5200b54e,0x4000000,0x200000,0x5200b54e,0x4000010,0x400000,0x5200b54e,0x6800000,0x1329800,0x5200b54e,0x6800100,0x962540,0x5200b54e,
+0x6804400,0x962540,0x5200b54e,0x7c00100,0x230400,0x5200b54e,0xc000010,0x448000,0x5200b61c,0x4000000,0x1800000,0x5200b61c,0x6800500,0x1862400,0x5200b61c,0x7c00100,
+0x1830000,0x5200b61c,0x7c00900,0x1830000,0x5200b77f,0x2802100,0x1862460,0x5200b77f,0x2802400,0x1862460,0x5200b77f,0x4000000,0x1800000,0x5200b77f,0x4000010,0x1800000,
+0x5200b77f,0x7c00100,0x1830000,0x5200b77f,0x7c00500,0x1830000,0x5200b77f,0x7c00900,0x1830000,0x5200b77f,0x7e00100,0x1830000,0x5200b873,0x2802100,0x962460,0x5200b873,
+0x2806400,0x962460,0x5200b873,0x6800000,0x1329800,0x5200b873,0x6800100,0x962540,0x5200b873,0x6800400,0x962540,0x5200b873,0x7c00100,0x230400,0x5200b873,0xc000010,
+0x448000,0x5200b912,0x7c00100,0x2230500,0x5200b912,0x7c00100,0x2330520,0x5200ba74,0x4000000,0x200000,0x5200ba74,0x4000010,0x400000,0x5200ba74,0x7c00100,0x230400,
+0x5200bb85,0x4000000,0x200000,0x5200bb85,0x7c00100,0x230400,0x5200bc75,0x4000000,0x400000,0x5200bc75,0x4000010,0x400000,0x5200bc75,0x7c00100,0x230400,0x5200bd7d,
+0x4000000,0x200000,0x5200bd7d,0x7c00100,0x230400,0x5200be7a,0x4000000,0x200000,0x5200be7a,0x7c00100,0x230400,0x5200bf58,0x7c00100,0x230400,0x5200c002,0x4000000,
+0x200000,0x5200c178,0x2802000,0x962460,0x5200c178,0x2802100,0x962460,0x5200c178,0x2802400,0x962460,0x5200c178,0x2806400,0x962460,0x5200c178,0x4000000,0x200000,
+0x5200c178,0x6800100,0x962540,0x5200c178,0x7c00100,0x230400,0x5200c178,0x7c00100,0x230401,0x5200c178,0xc000010,0x448000,0x5200c178,0x80000000,0x218960,0x5200c247,
+0x7c00100,0x230400,0x5200c247,0x7c00100,0x830400,0x5200c247,0x7c00100,0x1430400,0x5200c300,0x4000000,0x200003,0x52022d00,0x4000000,0x100006,0x52023700,0x24000000,
+0x4100000,0x52023700,0x24000000,0x4e00000,0x52023700,0x24000000,0x14100000,0x52023700,0x24000000,0x14e00000,0x52023700,0x24000000,0x96800000,0x52024400,0x4000000,0x100000,
+0x52027300,0x24000000,0x100000,0x5202c300,0x4000000,0x100000,0x5202c300,0x4000000,0x100002,0x5202c300,0x4000000,0x100003,0x5202c300,0x4000000,0x10000d,0x5202c300,
+0x4000100,0x150400,0x5202c300,0x4000100,0x15040d,0x5202c300,0x4000100,0x14150400,0x520a1e12,0x7c00100,0x2130480,0x520a3700,0x24000000,0x34e00000,0x520a3800,0x24000000,
+0x34100000,0x520a4711,0x7c40300,0xe30000,0x520a4f11,0x7c00300,0xe30001,0x520a7300,0x24000000,0x34100000,0x520ab412,0x7c00100,0x2130480,0x520ac400,0x4000000,0xe00002,
+0x520ac400,0x4000000,0xe0000d,0x520ac400,0x4000000,0x34e0000d,0x520ac414,0x4000000,0xe0000d,0x520ac511,0x7c40300,0xe30000,0x5240af91,0x7c00100,0x230400,0x5240af96,
+0x4000400,0x200000,0x5240af98,0x6800400,0x962540,0x5240af98,0x7c00100,0x230400,0x5240afa2,0x7c00100,0x230400,0x5240afa4,0x7c00100,0x230400,0x5240b2c7,0x4000000,
+0x200000,0x5240b2c7,0x4000000,0x1500000,0x5240b2d2,0x4000000,0x200000,0x5240b2e0,0x4000000,0x200000,0x5240b5f6,0x7c00900,0x230400,0x524a44bf,0x4000000,0xe00003,
+0x5280af91,0x2802400,0x962460,0x5280af92,0x2802400,0x962460,0x5280af98,0x2802400,0x962460,0x5280af9a,0x2802400,0x962460,0x5280af9c,0x2802400,0x962460,0x52c0b3ed,
+0x2802400,0x962460,0x52c0b3f1,0x7c00100,0x230400,0x60000c02,0x2802100,0x962460,0x60000c02,0x7c00100,0x230400,0x60000f0a,0x2802100,0x962460,0x60000f0a,0x6800100,
+0x962540,0x60000f0a,0x7c00100,0x230400,0x6000131f,0x4000000,0x200000,0x6000171a,0x7c00100,0x230400,0x6000171a,0x7c00100,0x230560,0x60001b27,0x2802100,0x962460,
+0x60001b27,0x4000000,0xc00000,0x60001b27,0x7c00100,0x230400,0x60001f0b,0x2802400,0x962460,0x60002919,0x7c00100,0x22040e,0x60002a00,0x4000000,0x1600000,0x60003000,
+0x24000000,0x14200000,0x60003000,0x24000000,0x14e00000,0x60003700,0x24000000,0x4200000,0x60003800,0x24000000,0x1710000,0x60005102,0x4000000,0x200000,0x60006108,0x7c00100,
+0x220400,0x60006108,0x7c00100,0x250400,0x60006600,0x24000020,0x200000,0x60008301,0x2802000,0x962460,0x6000903c,0x2806000,0x962460,0x6000903c,0x4000000,0x400000,
+0x60009519,0x7c00100,0x220400,0x60009519,0x7c00100,0x250400,0x6000a008,0x7c00100,0x220400,0x6000a008,0x7c00100,0x250400,0x6000c300,0x4000000,0x3a703580,0x6000c654,
+0x2802000,0x962460,0x6000c654,0x4000010,0x200000,0x6000c654,0x7c00100,0x230400,0x6000c73f,0x2802000,0x962460,0x6000c73f,0x2802100,0x962460,0x6000c73f,0x4000000,
+0x200000,0x6000c73f,0x6800100,0x962540,0x6000c73f,0x6804000,0x962540,0x6000c73f,0x7c00100,0x230400,0x6000c80b,0x7c00100,0x230400,0x6000c941,0x2802100,0x962460,
+0x6000c941,0x2806000,0x962460,0x6000c941,0x4000000,0x200000,0x6000c941,0x4000010,0x200000,0x6000c941,0x6800000,0x1329800,0x6000c941,0x6800100,0x962540,0x6000c941,
+0x7c00100,0x230400,0x6000c941,0xc000010,0x448000,0x6000ca82,0x7c00100,0x230400,0x6000cc00,0x4000000,0x4e00000,0x6000d000,0x4000000,0x200000,0x6002c300,0x4000000,
+0x100000,0x6002c300,0x4000000,0x10000d,0x6002c300,0x4000100,0x150400,0x6002c300,0x4000100,0x15040d,0x6002c300,0x4000100,0x14150400,0x600a3000,0x24000000,0x34200000,
+0x600a3000,0x24000000,0x34e00000,0x600a3700,0x24000000,0x34200000,0x600a3800,0x24000000,0x34200000,0x600a3800,0x24000000,0xb6800000,0x600a4305,0x7c00100,0xe30400,0x600ac300,
+0x4000000,0x34100000,0x600ac400,0x4000000,0x14e0000d,0x600ac400,0x4000000,0x34e0000d,0x600acb14,0x7c00100,0xe30000,0x600acb16,0x7c00100,0xe30c00,0x600acc00,0x4000000,
+0x34e00000,0x600acd00,0x4000000,0x34200000,0x600acd00,0x4000000,0x34e00000,0x600acd00,0x4000000,0xb6800000,0x600ace00,0x4000000,0x34e00000,0x600ace00,0x4000000,0xb6800000,
+0x600acf00,0x4000000,0x34e00000,0x600acf00,0x4000000,0xb6800000,0x600ad111,0x7c40300,0xe30000,0x604ac4bf,0x4000000,0x34e00003,0x61000a03,0x4000000,0x1600000,0x61000c02,
+0x80000000,0x218960,0x6100120f,0x4000000,0x200000,0x61001a18,0x7c00100,0x1830000,0x61001d0c,0x7c00100,0x230400,0x61001d0c,0x7c00100,0x250400,0x61006600,0x24000020,
+0x200000,0x61008407,0x7c00100,0x220400,0x61008407,0x7c00100,0x250400,0x6100870c,0x7c00100,0x220400,0x61008e00,0x24000000,0x200000,0x61008e00,0x24000000,0x400000,
+0x61008e00,0x24000002,0x300000,0x6100903c,0x7c00100,0x230400,0x61009519,0x7c00100,0x220400,0x61009519,0x7c00100,0x250400,0x61009519,0x7c00500,0x22040f,0x61009b71,
+0x2802100,0x962460,0x61009b71,0x2806400,0x962460,0x61009b71,0x7c00100,0x230400,0x6100a008,0x2802100,0x962460,0x6100c300,0x4000000,0x20000f,0x6100cd00,0x4000000,
+0x200000,0x6100d202,0x2802400,0x962460,0x6100d202,0x2802500,0x962460,0x6100d202,0x7c00100,0x230400,0x6100d302,0x4000020,0x200000,0x6100d302,0x7c00120,0x230405,
+0x6100d476,0x2802100,0x962460,0x6100d476,0x2802100,0x962461,0x6100d476,0x2806400,0x962460,0x6100d476,0x4000000,0x400000,0x6100d476,0x6800000,0x1329800,0x6100d476,
+0x6800100,0x962540,0x6100d476,0x7c00100,0x230400,0x6100d476,0xc000010,0x448000,0x6100d573,0x2802100,0x962460,0x6100d573,0x2806400,0x962460,0x6100d573,0x6800100,
+0x962540,0x6100d573,0x7c00100,0x230400,0x6100d573,0x7c00900,0x230400,0x6100d573,0xc000010,0x448000,0x6100d68d,0x7c00100,0x230400,0x6100d756,0x7c00100,0x230400,
+0x6100d85c,0x2802500,0x962460,0x6100d85c,0x6800100,0x962540,0x6100d85c,0x7c00100,0x230400,0x6100d85c,0x7c00500,0x230400,0x6100d997,0x2802100,0x962460,0x6100d997,
+0x4000000,0x200000,0x6100d997,0x4000000,0x400000,0x6100d997,0x6800000,0x1329800,0x6100d997,0x6800100,0x962540,0x6100d997,0x6804400,0x962540,0x6100d997,0x7c00100,
+0x230400,0x6100d997,0x7c00100,0x230560,0x6100d997,0xc000010,0x448000,0x6100da98,0x6800000,0x1329800,0x6100da98,0x7c00100,0x230400,0x6100db71,0x4000000,0x200000,
+0x6100dc99,0x2802100,0x962460,0x6100dc99,0x2802400,0x962460,0x6100dc99,0x6800000,0x1329800,0x6100dc99,0x6800100,0x962540,0x6100dc99,0x6804400,0x962540,0x6100dc99,
+0x7c00100,0x230400,0x610a4711,0x7c40300,0xe30000,0x610a4f11,0x7c00300,0xe30001,0x610ace00,0x4000000,0x34e00000,0x6140af96,0x7c00100,0x230400,0x6140af98,0x7c00100,
+0x230400,0x6180af93,0x2802400,0x962460,0x62002a00,0x4000000,0x1600000,0x63002800,0x80000,0x918820,0x63c00c14,0x80000,0x918820,0x7000080e,0x7c00100,0x250400,
+0x70000a03,0x4000000,0x200000,0x70000c00,0x80000000,0x218960,0x70000f0a,0x7c00100,0x230400,0x70001004,0x7c00100,0x230400,0x70001524,0x2802100,0x962460,0x70001524,
+0x7c00100,0x230400,0x70001615,0x2802100,0x962460,0x7000171a,0x2802100,0x962460,0x70001821,0x6800000,0x1329800,0x70002320,0x7c00100,0x230400,0x70002a00,0x4000000,
+0x1500000,0x70002a00,0x4000000,0x1600000,0x70003000,0x24000000,0x200000,0x70003000,0x24000000,0x14200000,0x70003800,0x24000000,0x4e00000,0x70005201,0x2802400,0x962460,
+0x7000581e,0x7c00100,0x230400,0x70006108,0x7c00100,0x220400,0x70006108,0x7c00100,0x250400,0x70006f30,0x7c00100,0x230400,0x70007300,0x24000000,0x200000,0x70007f0e,
+0x4000000,0x200000,0x70008301,0x2802100,0x962460,0x70008301,0x2802400,0x962460,0x70008e00,0x24000000,0x200000,0x70008e00,0x24000000,0x400000,0x70008e00,0x24000002,
+0x400000,0x70008e00,0x24000008,0x1410000,0x70008e00,0x24000010,0x400000,0x70008e00,0x2c000010,0x448000,0x70009519,0x7c00100,0x220400,0x70009519,0x7c00100,0x230400,
+0x70009519,0x7c00100,0x250400,0x70009865,0x7c00100,0x230400,0x70009965,0x4000010,0x400000,0x70009965,0x7c00100,0x230400,0x7000a008,0x7c00100,0x220400,0x7000a008,
+0x7c00100,0x250400,0x7000a008,0x7c00500,0x22040f,0x7000a50e,0x4000000,0x200000,0x7000b61c,0x2802500,0x1862460,0x7000b61c,0x6800500,0x1862400,0x7000b61c,0x7c00100,
+0x1830000,0x7000c300,0x4000000,0x100000,0x7000c941,0x2806000,0x962460,0x7000cc00,0x4000000,0x4e00000,0x7000cd00,0x4000000,0x200000,0x7000cd00,0x4000000,0x4200000,
+0x7000cd00,0x4000000,0x4e00000,0x7000cd00,0x4000000,0x14200000,0x7000cd00,0x4000000,0x14e00000,0x7000cd00,0x4000000,0x96800000,0x7000cf00,0x4000000,0x4e00000,0x7000cf00,
+0x4000000,0x14e00000,0x7000d202,0x2802100,0x962460,0x7000d202,0x7c00100,0x230400,0x7000d997,0x7c00100,0x230400,0x7000d997,0xc000010,0x248000,0x7000dd86,0x2802400,
+0x962460,0x7000dd86,0x7c00100,0x230400,0x7000dd86,0xc000010,0x448000,0x7000de9f,0x4000000,0x200000,0x7000de9f,0x7c00100,0x230400,0x7000e001,0x2000,0x962460,
+0x7000e001,0x2802400,0x962460,0x7000e187,0x2802000,0x962460,0x7000e187,0x2802100,0x962460,0x7000e187,0x4000000,0x200000,0x7000e187,0x7c00100,0x230400,0x7000e187,
+0xc000010,0x448000,0x7000e288,0x7c00100,0x230400,0x7000e300,0x4000000,0x200000,0x7000e489,0x2802100,0x962460,0x7000e489,0x2802400,0x962460,0x7000e489,0x6800100,
+0x962540,0x7000e489,0x6800100,0x962541,0x7000e489,0x6804400,0x962540,0x7000e489,0x7c00100,0x230400,0x7000e489,0x7c00900,0x230400,0x7000e59d,0x2802100,0x962460,
+0x7000e59d,0x2802400,0x962460,0x7000e59d,0x4000000,0x200000,0x7000e59d,0x4000010,0x200000,0x7000e59d,0x6800100,0x962540,0x7000e59d,0x6804400,0x962540,0x7000e59d,
+0x7c00100,0x230400,0x7000e59d,0xc000010,0x448000,0x7000e691,0x2802100,0x962460,0x7000e691,0x2802400,0x962460,0x7000e691,0x2806400,0x962460,0x7000e691,0x6800000,
+0x1329800,0x7000e691,0x6800100,0x962540,0x7000e691,0x7c00100,0x230400,0x7000e700,0x4000400,0x200400,0x7000e70e,0x7c00100,0x220400,0x7000e719,0x7c00100,0x220400,
+0x7000e719,0x7c00500,0x22040f,0x7000e853,0x7c00100,0x230400,0x7000e9a0,0x2802400,0x962460,0x7000e9a0,0x4000000,0x200000,0x7000e9a0,0x4000000,0x500000,0x7000e9a0,
+0x7c00100,0x230400,0x7000ea79,0x2802400,0x962460,0x7000ea79,0x4000000,0x200000,0x7000ea79,0x4000000,0xf00000,0x7000ea79,0x4000010,0x400000,0x7000ea79,0x7c00100,
+0x230400,0x7000eb8c,0x2802400,0x962460,0x7000eb8c,0x4000000,0x200000,0x7000eb8c,0x7c00100,0x230400,0x7000eca3,0x2802100,0x962460,0x7000eca3,0x2806400,0x962460,
+0x7000eca3,0x4000000,0x200000,0x7000eca3,0x6800000,0x1329800,0x7000eca3,0x6800100,0x962540,0x7000eca3,0x7c00100,0x230400,0x7000eca3,0xc000010,0x448000,0x7000ed95,
+0x6800000,0x1329800,0x7000ed95,0x7c00100,0x230400,0x7000ed95,0xc000010,0x448000,0x7000ee1c,0x2802500,0x1862460,0x7000ee1c,0x6800000,0x1329800,0x7000ee1c,0x7c00100,
+0x1830000,0x7000ee1c,0x7c00900,0x1830000,0x7000ef8f,0x4000000,0x200000,0x7000ef8f,0x7c00100,0x230400,0x7000f08e,0x4000000,0x200000,0x7000f08e,0x7c00100,0x230400,
+0x7000f159,0x2802100,0x962460,0x7000f159,0x7c00100,0x230400,0x7000f200,0x4000000,0x200000,0x7000f200,0x4000000,0x1200000,0x7000f200,0x4000000,0x1710000,0x7000f34b,
+0x2802400,0x962460,0x7000f34b,0x4000000,0x200000,0x7000f34b,0x4000010,0x400000,0x7000f34b,0x6800000,0x1329800,0x7000f34b,0x7c00100,0x230400,0x7000f34b,0x7c00900,
+0x230400,0x7000f34b,0xc000010,0x448000,0x7000f490,0x4000000,0x200000,0x7000f490,0x7c00100,0x230400,0x7000f5a5,0x7c00100,0x230400,0x7000f67b,0x4000000,0x200000,
+0x7000f67b,0x4000010,0x200000,0x7000f67b,0x7c00100,0x230400,0x7000f8a6,0x2802100,0x962460,0x7000f8a6,0x2802400,0x962460,0x7000f8a6,0x2806400,0x962460,0x7000f8a6,
+0x4000000,0x500000,0x7000f8a6,0x4000010,0xb00000,0x7000f8a6,0x4000800,0x200000,0x7000f8a6,0x6800100,0x962540,0x7000f8a6,0x6800100,0x962541,0x7000f8a6,0x7c00100,
+0x230400,0x7000f8a6,0xc000010,0x448000,0x7000f921,0x4000000,0x200000,0x7000fa00,0x4000000,0x200000,0x7000fb9e,0x2802100,0x962460,0x7000fb9e,0x2802400,0x962460,
+0x7000fb9e,0x2806400,0x962460,0x7000fb9e,0x4000000,0x200000,0x7000fb9e,0x6800000,0x1329800,0x7000fb9e,0x6800100,0x962540,0x7000fb9e,0x6800100,0x962541,0x7000fb9e,
+0x7c00100,0x230400,0x7000fc92,0x4000000,0x200000,0x7000fc92,0x6800000,0x1329800,0x7000fc92,0x7c00100,0x220400,0x7000fc92,0x7c00100,0x230400,0x7000fc92,0x7c00100,
+0x250400,0x700acd00,0x4000000,0x34e00000,0x700acd00,0x4000000,0xb6800000,0x700ace00,0x4000000,0x34e00000,0x700acf00,0x4000000,0x34e00000,0x700acf00,0x4000000,0xb6800000,
+0x7050df01,0x4000000,0x200000,0x7050f705,0x80000,0x918820,0x7080af96,0x2802400,0x962460,0x7090df01,0x2802400,0x962460,0x70d0e403,0x2802100,0x962460,0x70d0e403,
+0x2802400,0x962460,0x70d0e403,0x6800100,0x962540,0x8000120f,0x7c00100,0x230400,0x80001524,0x7c00100,0x230400,0x8000171a,0x7c00100,0x230400,0x80002006,0x7c00100,
+0x220400,0x80002006,0x7c00100,0x250400,0x80002a00,0x4000000,0x1500000,0x80002d00,0x4000000,0x200000,0x80005208,0x2802400,0x962460,0x80005c00,0x4000000,0x200000,
+0x80007300,0x24000000,0x200000,0x80009519,0x7c00100,0x220400,0x80009519,0x7c00100,0x230400,0x80009519,0x7c00100,0x250400,0x80009865,0x7c00100,0x230400,0x8000a008,
+0x2802100,0x962460,0x8000b30a,0x4000000,0x500000,0x8000b30a,0x7c00100,0x230400,0x8000cd00,0x4000000,0x4e00000,0x8000d202,0x2802500,0x962460,0x8000d202,0x7c00100,
+0x230400,0x8000d68d,0x4000000,0x200000,0x8000d997,0x2802000,0x962460,0x8000d997,0x2802400,0x962460,0x8000d997,0x4000000,0x400000,0x8000d997,0x4000000,0x500000,
+0x8000d997,0x7c00100,0x230400,0x8000d997,0xc000010,0x448000,0x8000e489,0x2802100,0x962460,0x8000e489,0x7c00100,0x230400,0x8000e719,0x7c00100,0x220400,0x8000f8a6,
+0x2802100,0x962460,0x8000f8a6,0x7c00100,0x230400,0x8000f8a6,0xc000010,0x448000,0x8000fda1,0x2802100,0x1862460,0x8000fda1,0x2806400,0x1862460,0x8000fda1,0x4000000,
+0x1800000,0x8000fda1,0x6800000,0x1329800,0x8000fda1,0x6800100,0x1862540,0x8000fda1,0x7c00100,0x1830000,0x8000fda1,0xc000010,0x448000,0x8000fe9c,0x7c00100,0x230400,
+0x8000fe9c,0x7c00100,0x830400,0x8000fe9c,0x7c00100,0x1430400,0x8000ff06,0x7c00100,0x220400,0x80010165,0x7c00100,0x230400,0x800102a2,0x4000000,0x200000,0x800102a2,
+0x7c00100,0x230400,0x800103a4,0x7c00100,0x230400,0x800103a4,0xc000010,0x448000,0x8001044c,0x4000000,0x200000,0x8001044c,0x7c00100,0x220400,0x8001044c,0x7c00100,
+0x250400,0x80010670,0x2802000,0x962460,0x80010670,0x4000000,0x200000,0x80010670,0x4000010,0x400000,0x80010670,0xc000010,0x448000,0x800a4711,0x7c40300,0xe30000,
+0x800acd00,0x4000000,0x34e00000,0x800acd00,0x4000000,0x7a902460,0x800ace00,0x4000000,0x34e00000,0x800acf00,0x4000000,0x34e00000,0x800b0011,0x7c40300,0xe30000,0x800b0500,
+0x4000000,0x34e00000,0x800b0500,0x4000000,0xb6800000,0x90001615,0x7c00100,0x230400,0x9000171a,0x4000000,0x200000,0x9000171a,0x7c00100,0x230400,0x90003000,0x24000000,
+0x200000,0x90007f0e,0x4000000,0x200000,0x90008301,0x2802000,0x962460,0x90008e00,0x24000000,0x400000,0x90009519,0x7c00100,0x250400,0x9000a16f,0x2802100,0x962460,
+0x9000d200,0x80000000,0x218960,0x9000d202,0x2802000,0x962460,0x9000d202,0x2802100,0x962460,0x9000d202,0x7c00100,0x230400,0x9000e59d,0x2802100,0x962460,0x90010500,
+0x4000000,0xe00000,0x900107a7,0x2802100,0x962460,0x900107a7,0x2802400,0x962460,0x900107a7,0x2802c00,0x962460,0x900107a7,0x4000000,0x1400000,0x900107a7,0x6800000,
+0x1329800,0x900107a7,0x7c00100,0x220400,0x900107a7,0x7c00100,0x250400,0x900108a8,0x2802100,0x962460,0x900108a8,0x2806400,0x962460,0x900108a8,0x4000000,0x200000,
+0x900108a8,0x4000000,0x400000,0x900108a8,0x4000010,0x400000,0x900108a8,0x6800000,0x1329800,0x900108a8,0x6800100,0x962540,0x900108a8,0x7c00100,0x230400,0x900108a8,
+0xc000010,0x448000,0x90010908,0x7c00100,0x220400,0x90010a38,0x2802100,0x962460,0x90010ca9,0x2802100,0x962460,0x90010ca9,0x4000000,0x500000,0x90010ca9,0x4000010,
+0xb00000,0x90010ca9,0x6800100,0x962540,0x90010ca9,0x7c00100,0x230400,0x90010d1b,0x4000000,0x500000,0x90010eaa,0x2802100,0x962460,0x90010eaa,0x2802400,0x962460,
+0x90010eaa,0x2806400,0x962460,0x90010eaa,0x4000000,0x200000,0x90010eaa,0x4000000,0x400000,0x90010eaa,0x4000010,0x400000,0x90010eaa,0x6800000,0x1329800,0x90010eaa,
+0x6800100,0x962540,0x90010eaa,0x7c00100,0x230400,0x90010eaa,0xc000010,0x448000,0x90010fab,0x7c00100,0x220400,0x90010fab,0x7c00100,0x250400,0x9002c300,0x4000000,
+0x100000,0x900ac400,0x4000000,0xe0000d,0x900acd00,0x4000000,0x34e00000,0x900acd00,0x4000000,0xb6800000,0x900acf00,0x4000000,0x34e00000,0x900b0500,0x4000000,0x34e00000,
+0x900b0500,0x4000000,0xb6800000,0x900b0b9a,0x7c00900,0x1230400,0x900b109a,0x7c00300,0xe30000,0x900b119a,0x7c00300,0xe30000,0x90408e06,0x24000000,0x400000,0xa0001004,
+0x4000000,0x200000,0xa0001004,0x7c00100,0x230400,0xa000120f,0x2802100,0x962460,0xa000120f,0x2802400,0x962460,0xa000171a,0x2802100,0x962460,0xa000171a,0x2806400,
+0x962460,0xa0002a00,0x4000000,0x1600000,0xa0003000,0x24000000,0x200000,0xa000581e,0x7c00100,0x230400,0xa0007300,0x24000000,0x200000,0xa0008301,0x2802400,0x962460,
+0xa0008e00,0x24000000,0x400000,0xa000cf00,0x4000000,0x4e00000,0xa0010500,0x4000000,0x200000,0xa00114af,0x2802100,0x962460,0xa00114af,0x2802400,0x962460,0xa00114af,
+0x2806400,0x962460,0xa00114af,0x6800000,0x1329800,0xa00114af,0x7c00100,0x230400,0xa00114af,0x7c00100,0x230560,0xa00116b0,0x2802100,0x962460,0xa00116b0,0x2802800,
+0x962460,0xa00116b0,0x2806400,0x962460,0xa00116b0,0x4000000,0x400000,0xa00116b0,0x4000000,0x500000,0xa00116b0,0x4000010,0x400000,0xa00116b0,0x6800100,0x962540,
+0xa00116b0,0x7c00100,0x230400,0xa00116b0,0x7c00100,0x230560,0xa00116b0,0xc000010,0x448000,0xa0011722,0x7c00100,0x230400,0xa00118b1,0x2802000,0x962460,0xa00118b1,
+0x2802100,0x962460,0xa00118b1,0x2806400,0x962460,0xa00118b1,0x4000000,0x200000,0xa00118b1,0x4000000,0x400000,0xa00118b1,0x4000000,0x500000,0xa00118b1,0x6800100,
+0x962540,0xa00118b1,0x7c00100,0x230400,0xa00118b1,0x7c00100,0x230560,0xa00118b1,0xc000010,0x448000,0xa00a4005,0x7c00100,0xe30400,0xa00a4711,0x7c40300,0xe30000,
+0xa00ac400,0x4000000,0x4e00000,0xa00acb14,0x7c00100,0xe30000,0xa00acf00,0x4000000,0x34e00000,0xa00b0500,0x4000000,0x34e00000,0xa00b0500,0x4000000,0xb6800000,0xa00b0b96,
+0x7c00900,0x1230400,0xa00b1211,0x7c40300,0xe30000,0xa00b1314,0x7c00100,0xe30000,0xa00b1596,0x7c00300,0xe30000,0xa040afac,0x6800400,0x962540,0xa08083ad,0x2802400,
+0x962460,0xb0000a03,0x7c00100,0x220400,0xb0000b13,0x7c00100,0x2633800,0xb0001004,0x2802000,0x962460,0xb0001110,0x4000000,0x200000,0xb0001524,0x2802000,0x962460,
+0xb0001615,0x4000000,0x500000,0xb000251b,0x7c00100,0x230400,0xb0007300,0x24000000,0x200000,0xb0008939,0x4000000,0x200000,0xb0008939,0x7c00100,0x230400,0xb0008e00,
+0x24000000,0x200000,0xb0008e00,0x24000000,0x400000,0xb0008e00,0x24000010,0x400000,0xb0009257,0x2802000,0x962460,0xb0009257,0x4000000,0x1600000,0xb0009519,0x7c00100,
+0x220400,0xb0009519,0x7c00100,0x250400,0xb0009a00,0x4000000,0x200000,0xb000b30a,0x2802100,0x962460,0xb000b30a,0x7c00100,0x230400,0xb000c178,0x80000000,0x218960,
+0xb000c300,0x4000000,0x4200000,0xb000d202,0x2802000,0x962460,0xb000d476,0x6800100,0x962540,0xb000d476,0x7c00100,0x230400,0xb000e300,0x4000000,0x4e00000,0xb000fda1,
+0x7c00100,0x1830000,0xb0010eaa,0x2802000,0x962460,0xb00116b0,0x7c00100,0x230400,0xb0011900,0x4000000,0x4e00000,0xb0011ab2,0x2802100,0x962460,0xb0011ab2,0x2802400,
+0x962460,0xb0011ab2,0x2806400,0x962460,0xb0011ab2,0x4000000,0x200000,0xb0011ab2,0x6800100,0x962540,0xb0011ab2,0x7c00100,0x230400,0xb0011b0c,0x7c00100,0x230400,
+0xb0011cb3,0x2802100,0x962460,0xb0011cb3,0x2806400,0x962460,0xb0011cb3,0x6800000,0x1329800,0xb0011cb3,0x6800100,0x962540,0xb0011cb3,0x7c00100,0x230400,0xb0011db6,
+0x2802500,0x962460,0xb0011db6,0x6800000,0x1329800,0xb0011db6,0x7c00100,0x230400,0xb0011db6,0x7c00500,0x230400,0xb0011e00,0x4000000,0x200000,0xb0011e00,0x4000000,
+0x1500000,0xb0011fb4,0x2802100,0x962460,0xb0011fb4,0x6800100,0x962540,0xb0011fb4,0x7c00100,0x230400,0xb0011fb4,0xc000010,0x248000,0xb0012000,0x4000000,0x200000,
+0xb00121b5,0x4000000,0x200000,0xb00121b5,0x4000010,0x400000,0xb00121b5,0x7c00100,0x220400,0xb00121b5,0x7c00100,0x250400,0xb00121b5,0xc000010,0x448000,0xb00122b8,
+0x4000000,0x200000,0xb00122b8,0x7c00100,0x230400,0xb00123b7,0x2802400,0x962460,0xb00123b7,0x4000000,0x200000,0xb00123b7,0x7c00100,0x230400,0xb00123b7,0xc000010,
+0x248000,0xb00a4005,0x7c00100,0xe30400,0xb00a4711,0x7c40300,0xe30000,0xb00acf00,0x4000000,0x34e00000,0xb00b0500,0x4000000,0x34e00000,0xb00b0500,0x4000000,0x3ce00000,
+0xb00b0500,0x4000000,0xb6800000,0xb00b109a,0x7c00300,0xe30000,0xb080e47c,0x2802000,0x962460,0xc0001524,0x4000000,0x500000,0xc0001a18,0x2806400,0x1862460,0xc0001a18,
+0x7c00100,0x1830000,0xc0007300,0x24000000,0x200000,0xc0008e00,0x24000010,0x400000,0xc0009519,0x7c00100,0x220400,0xc0009519,0x7c00100,0x250400,0xc000c300,0x4000000,
+0x420000f,0xc000d85c,0x2802100,0x962460,0xc000d85c,0x6800100,0x962540,0xc000d85c,0x7c00100,0x230400,0xc000dc99,0x7c00100,0x230400,0xc000e719,0x7c00100,0x220400,
+0xc00107a7,0x7c00100,0x230400,0xc0010eaa,0x7c00100,0x230400,0xc00116b0,0x7c00100,0x230560,0xc0011900,0x4000000,0x4200000,0xc0012447,0,0x818820,0xc0012447,
+0,0xc18820,0xc0012447,0,0x1418820,0xc00125b9,0x7c00100,0x230400,0xc00126bb,0x2802100,0x962460,0xc00126bb,0x2806400,0x962460,0xc00126bb,0x4000000,
+0x500000,0xc00126bb,0x6800100,0x962540,0xc00126bb,0x7c00100,0x230400,0xc00127ba,0x2802400,0x962460,0xc00127ba,0x4000000,0x200000,0xc00127ba,0x6800000,0x1329800,
+0xc00127ba,0x7c00100,0x230400,0xc00127ba,0x7c00900,0x230400,0xc0012800,0x4000000,0x200000,0xc0012b23,0x4000000,0x200000,0xc0012b23,0x4000000,0x400000,0xc0012b23,
+0x4000000,0x1500000,0xc0012cbc,0x2802400,0x962460,0xc0012cbc,0x4000000,0x1600000,0xc0012cbc,0x6800000,0x1329800,0xc0012cbc,0x7c00100,0x230400,0xc00acf00,0x4000000,
+0x34e00000,0xc00ae300,0x4000000,0x34e00000,0xc00b0500,0x4000000,0x34e00000,0xc00b0500,0x4000000,0xb6800000,0xc00b0b00,0x4000000,0x1200000,0xc00b0b00,0x7c00900,0x1230400,
+0xc00b109a,0x7c00300,0xe30000,0xc00b2914,0x7c00100,0x2530000,0xc00b2916,0x7c00100,0x2530c00,0xc00b2a00,0x4000000,0x34e00000,0xc040af53,0x7c00100,0x230400,0xc0c12b7e,
+0x4000000,0x200000,0xc14a44bf,0x4000000,0xe0000d,0xd000131f,0x2802c00,0x962460,0xd000171a,0x7c00100,0x230400,0xd0001821,0x2802100,0x962460,0xd0007300,0x24000000,
+0x200000,0xd0008e00,0x24000000,0x200000,0xd0008f3a,0x2806000,0x962460,0xd0009519,0x7c00100,0x220400,0xd0009519,0x7c00100,0x250400,0xd000a500,0x4000000,0x200000,
+0xd000c300,0x4000000,0x4e00000,0xd000d202,0x7c00100,0x230400,0xd000d476,0x7c00100,0x230400,0xd000d997,0x2802100,0x962460,0xd000d997,0x6800100,0x962540,0xd000e001,
+0x2802100,0x962460,0xd000e700,0x4000400,0x200000,0xd000e719,0x7c00100,0x220400,0xd000e719,0x7c00500,0x23040f,0xd000fa00,0x4000000,0x4e00000,0xd0010eaa,0x4000010,
+0x400000,0xd0010eaa,0x7c00100,0x230400,0xd0012dbd,0x4000000,0x200000,0xd0012dbd,0x7c00100,0x230400,0xd0012fbe,0x2802100,0x962460,0xd0012fbe,0x2802400,0x962460,
+0xd0012fbe,0x2806400,0x962460,0xd0012fbe,0x4000000,0x400000,0xd0012fbe,0x6800000,0x1329800,0xd0012fbe,0x6800100,0x962540,0xd0012fbe,0x6800100,0x962541,0xd0012fbe,
+0x6804400,0x962540,0xd0012fbe,0x7c00100,0x230400,0xd0012fbe,0x7c00100,0x230560,0xd0012fbe,0xc000010,0x448000,0xd0013183,0x7c00100,0x230400,0xd0013200,0x4000000,
+0x200000,0xd0013200,0x6800000,0x1329805,0xd00134c0,0x2802100,0x962460,0xd00134c0,0x4000002,0x400000,0xd00134c0,0x7c00100,0x230400,0xd00a4305,0x7c00100,0xe30400,
+0xd00a4611,0x7c40300,0xe30000,0xd00a4711,0x7c40300,0xe30000,0xd00a5e11,0x7c40300,0xe30000,0xd00acf00,0x4000000,0x34e00000,0xd00b0500,0x4000000,0x34e00000,0xd00b0500,
+0x4000000,0xb6800000,0xd00b0b11,0x6800500,0x962540,0xd00b0bbf,0x2802200,0xc62460,0xd00b119a,0x7c00300,0xe30000,0xd00b2a00,0x4000000,0x34e00000,0xd00b2e11,0x7c40300,
+0xe30000,0xd00b30bf,0x7c00300,0x230000,0xd00b339a,0x7c00300,0xe30000};
+
+static const int32_t countPropsVectors=6999;
+static const int32_t propsVectorsColumns=3;
+static const uint16_t scriptExtensions[262]={
+0x800e,0x8019,8,0x8059,8,2,8,0x8038,8,6,8,0x8019,2,0x22,0x25,0xb6,
+0x80c0,2,0x22,0x8025,2,0x11,2,0x22,0x54,0x79,0x7b,0xa7,0xb6,0x80b7,2,0x8022,
+2,0x25,0x80c0,2,0x20,2,0x80b6,4,0xa,0xf,0x10,0x15,0x19,0x1a,0x1f,0x23,
+0x24,0x89,0x97,0x809e,4,0xa,0xf,0x10,0x15,0x19,0x1a,0x1f,0x23,0x24,0x89,0x809e,
+4,0xa,0xf,0x10,0x15,0x1a,0x1f,0x21,0x23,0x24,0x3a,0x89,0x91,0x99,0x9e,0xa0,
+0xaf,0xb2,0xb3,0x80bb,4,0xa,0xf,0x10,0x15,0x1a,0x1f,0x21,0x23,0x24,0x30,0x3a,
+0x89,0x91,0x99,0x9e,0xa0,0xaf,0xb2,0xb3,0x80bb,0xa,0x78,0xa0,0x80b2,0xa,0x69,4,
+0x3a,0x8076,4,0x6f,0x10,0x80a4,0x10,0x74,0xf,0x809d,0xf,0x78,0x23,0x8089,0x23,0x7c,
+0x15,0x80bb,0x15,0x80,0x1c,0x34,0x8076,0x1c,0x84,0xc,0x8019,0x2a,0x2b,0x2c,0x802d,0x1b,
+0x805a,0x800a,4,0xa,0x15,0x8089,0xa,0x8089,4,0x800a,0xa,0x8097,0xa,0x15,0x1a,0x1f,
+0x23,0x8024,0xa,0x80bb,4,0xa,0x15,0x1f,0x24,0x89,0x9e,0x80bb,0x8004,8,0x8022,0x19,
+0x801b,0xa,0x19,0x8089,5,0x11,0x12,0x14,0x16,0x8029,5,0x11,0x12,0x14,0x8016,0x8011,
+5,0x8011,0x11,0x14,0x8016,0x11,0x8019,0xa,0xf,0x10,0x78,0x91,0x99,0x9d,0x9e,0xa0,
+0xa3,0x80b2,0xa,0xf,0x10,0x15,0x1a,0x78,0x91,0x99,0x9d,0x9e,0xa0,0xa3,0xb2,0x80bb,
+0xa,0xf,0x10,0x15,0x78,0x91,0x99,0x9d,0x9e,0xa0,0xa3,0xb2,0x80bb,0xa,0x98,0xa,
+0x8023,0xa,0xef,0x19,0x1c,0x804f,0x37,0x804e,2,0x8025,2,0xf8,0x2f,0x31,0x8053,0x2f,
+0x8031,2,0x8007,0x89,0x7c,0x8087};
+
+static const int32_t indexes[UPROPS_INDEX_COUNT]={0x2b96,0x2b96,0x2b96,0x2b96,0x6898,3,0x83ef,0x8472,0x8472,0x8472,0xb34c0,0x2a75a31,0,0,0,0};
+
+#endif // INCLUDED_FROM_UCHAR_C
diff --git a/thirdparty/icu4c/common/ucharstrie.cpp b/thirdparty/icu4c/common/ucharstrie.cpp
new file mode 100644
index 0000000000..e0b33af519
--- /dev/null
+++ b/thirdparty/icu4c/common/ucharstrie.cpp
@@ -0,0 +1,414 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: ucharstrie.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010nov14
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/appendable.h"
+#include "unicode/ucharstrie.h"
+#include "unicode/uobject.h"
+#include "unicode/utf16.h"
+#include "cmemory.h"
+#include "uassert.h"
+
+U_NAMESPACE_BEGIN
+
+UCharsTrie::~UCharsTrie() {
+ uprv_free(ownedArray_);
+}
+
+UStringTrieResult
+UCharsTrie::current() const {
+ const UChar *pos=pos_;
+ if(pos==NULL) {
+ return USTRINGTRIE_NO_MATCH;
+ } else {
+ int32_t node;
+ return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
+ valueResult(node) : USTRINGTRIE_NO_VALUE;
+ }
+}
+
+UStringTrieResult
+UCharsTrie::firstForCodePoint(UChar32 cp) {
+ return cp<=0xffff ?
+ first(cp) :
+ (USTRINGTRIE_HAS_NEXT(first(U16_LEAD(cp))) ?
+ next(U16_TRAIL(cp)) :
+ USTRINGTRIE_NO_MATCH);
+}
+
+UStringTrieResult
+UCharsTrie::nextForCodePoint(UChar32 cp) {
+ return cp<=0xffff ?
+ next(cp) :
+ (USTRINGTRIE_HAS_NEXT(next(U16_LEAD(cp))) ?
+ next(U16_TRAIL(cp)) :
+ USTRINGTRIE_NO_MATCH);
+}
+
+UStringTrieResult
+UCharsTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) {
+ // Branch according to the current unit.
+ if(length==0) {
+ length=*pos++;
+ }
+ ++length;
+ // The length of the branch is the number of units to select from.
+ // The data structure encodes a binary search.
+ while(length>kMaxBranchLinearSubNodeLength) {
+ if(uchar<*pos++) {
+ length>>=1;
+ pos=jumpByDelta(pos);
+ } else {
+ length=length-(length>>1);
+ pos=skipDelta(pos);
+ }
+ }
+ // Drop down to linear search for the last few units.
+ // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
+ // and divides length by 2.
+ do {
+ if(uchar==*pos++) {
+ UStringTrieResult result;
+ int32_t node=*pos;
+ if(node&kValueIsFinal) {
+ // Leave the final value for getValue() to read.
+ result=USTRINGTRIE_FINAL_VALUE;
+ } else {
+ // Use the non-final value as the jump delta.
+ ++pos;
+ // int32_t delta=readValue(pos, node);
+ int32_t delta;
+ if(node<kMinTwoUnitValueLead) {
+ delta=node;
+ } else if(node<kThreeUnitValueLead) {
+ delta=((node-kMinTwoUnitValueLead)<<16)|*pos++;
+ } else {
+ delta=(pos[0]<<16)|pos[1];
+ pos+=2;
+ }
+ // end readValue()
+ pos+=delta;
+ node=*pos;
+ result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
+ }
+ pos_=pos;
+ return result;
+ }
+ --length;
+ pos=skipValue(pos);
+ } while(length>1);
+ if(uchar==*pos++) {
+ pos_=pos;
+ int32_t node=*pos;
+ return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
+ } else {
+ stop();
+ return USTRINGTRIE_NO_MATCH;
+ }
+}
+
+UStringTrieResult
+UCharsTrie::nextImpl(const UChar *pos, int32_t uchar) {
+ int32_t node=*pos++;
+ for(;;) {
+ if(node<kMinLinearMatch) {
+ return branchNext(pos, node, uchar);
+ } else if(node<kMinValueLead) {
+ // Match the first of length+1 units.
+ int32_t length=node-kMinLinearMatch; // Actual match length minus 1.
+ if(uchar==*pos++) {
+ remainingMatchLength_=--length;
+ pos_=pos;
+ return (length<0 && (node=*pos)>=kMinValueLead) ?
+ valueResult(node) : USTRINGTRIE_NO_VALUE;
+ } else {
+ // No match.
+ break;
+ }
+ } else if(node&kValueIsFinal) {
+ // No further matching units.
+ break;
+ } else {
+ // Skip intermediate value.
+ pos=skipNodeValue(pos, node);
+ node&=kNodeTypeMask;
+ }
+ }
+ stop();
+ return USTRINGTRIE_NO_MATCH;
+}
+
+UStringTrieResult
+UCharsTrie::next(int32_t uchar) {
+ const UChar *pos=pos_;
+ if(pos==NULL) {
+ return USTRINGTRIE_NO_MATCH;
+ }
+ int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
+ if(length>=0) {
+ // Remaining part of a linear-match node.
+ if(uchar==*pos++) {
+ remainingMatchLength_=--length;
+ pos_=pos;
+ int32_t node;
+ return (length<0 && (node=*pos)>=kMinValueLead) ?
+ valueResult(node) : USTRINGTRIE_NO_VALUE;
+ } else {
+ stop();
+ return USTRINGTRIE_NO_MATCH;
+ }
+ }
+ return nextImpl(pos, uchar);
+}
+
+UStringTrieResult
+UCharsTrie::next(ConstChar16Ptr ptr, int32_t sLength) {
+ const UChar *s=ptr;
+ if(sLength<0 ? *s==0 : sLength==0) {
+ // Empty input.
+ return current();
+ }
+ const UChar *pos=pos_;
+ if(pos==NULL) {
+ return USTRINGTRIE_NO_MATCH;
+ }
+ int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
+ for(;;) {
+ // Fetch the next input unit, if there is one.
+ // Continue a linear-match node without rechecking sLength<0.
+ int32_t uchar;
+ if(sLength<0) {
+ for(;;) {
+ if((uchar=*s++)==0) {
+ remainingMatchLength_=length;
+ pos_=pos;
+ int32_t node;
+ return (length<0 && (node=*pos)>=kMinValueLead) ?
+ valueResult(node) : USTRINGTRIE_NO_VALUE;
+ }
+ if(length<0) {
+ remainingMatchLength_=length;
+ break;
+ }
+ if(uchar!=*pos) {
+ stop();
+ return USTRINGTRIE_NO_MATCH;
+ }
+ ++pos;
+ --length;
+ }
+ } else {
+ for(;;) {
+ if(sLength==0) {
+ remainingMatchLength_=length;
+ pos_=pos;
+ int32_t node;
+ return (length<0 && (node=*pos)>=kMinValueLead) ?
+ valueResult(node) : USTRINGTRIE_NO_VALUE;
+ }
+ uchar=*s++;
+ --sLength;
+ if(length<0) {
+ remainingMatchLength_=length;
+ break;
+ }
+ if(uchar!=*pos) {
+ stop();
+ return USTRINGTRIE_NO_MATCH;
+ }
+ ++pos;
+ --length;
+ }
+ }
+ int32_t node=*pos++;
+ for(;;) {
+ if(node<kMinLinearMatch) {
+ UStringTrieResult result=branchNext(pos, node, uchar);
+ if(result==USTRINGTRIE_NO_MATCH) {
+ return USTRINGTRIE_NO_MATCH;
+ }
+ // Fetch the next input unit, if there is one.
+ if(sLength<0) {
+ if((uchar=*s++)==0) {
+ return result;
+ }
+ } else {
+ if(sLength==0) {
+ return result;
+ }
+ uchar=*s++;
+ --sLength;
+ }
+ if(result==USTRINGTRIE_FINAL_VALUE) {
+ // No further matching units.
+ stop();
+ return USTRINGTRIE_NO_MATCH;
+ }
+ pos=pos_; // branchNext() advanced pos and wrote it to pos_ .
+ node=*pos++;
+ } else if(node<kMinValueLead) {
+ // Match length+1 units.
+ length=node-kMinLinearMatch; // Actual match length minus 1.
+ if(uchar!=*pos) {
+ stop();
+ return USTRINGTRIE_NO_MATCH;
+ }
+ ++pos;
+ --length;
+ break;
+ } else if(node&kValueIsFinal) {
+ // No further matching units.
+ stop();
+ return USTRINGTRIE_NO_MATCH;
+ } else {
+ // Skip intermediate value.
+ pos=skipNodeValue(pos, node);
+ node&=kNodeTypeMask;
+ }
+ }
+ }
+}
+
+const UChar *
+UCharsTrie::findUniqueValueFromBranch(const UChar *pos, int32_t length,
+ UBool haveUniqueValue, int32_t &uniqueValue) {
+ while(length>kMaxBranchLinearSubNodeLength) {
+ ++pos; // ignore the comparison unit
+ if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
+ return NULL;
+ }
+ length=length-(length>>1);
+ pos=skipDelta(pos);
+ }
+ do {
+ ++pos; // ignore a comparison unit
+ // handle its value
+ int32_t node=*pos++;
+ UBool isFinal=(UBool)(node>>15);
+ node&=0x7fff;
+ int32_t value=readValue(pos, node);
+ pos=skipValue(pos, node);
+ if(isFinal) {
+ if(haveUniqueValue) {
+ if(value!=uniqueValue) {
+ return NULL;
+ }
+ } else {
+ uniqueValue=value;
+ haveUniqueValue=TRUE;
+ }
+ } else {
+ if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) {
+ return NULL;
+ }
+ haveUniqueValue=TRUE;
+ }
+ } while(--length>1);
+ return pos+1; // ignore the last comparison unit
+}
+
+UBool
+UCharsTrie::findUniqueValue(const UChar *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
+ int32_t node=*pos++;
+ for(;;) {
+ if(node<kMinLinearMatch) {
+ if(node==0) {
+ node=*pos++;
+ }
+ pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue);
+ if(pos==NULL) {
+ return FALSE;
+ }
+ haveUniqueValue=TRUE;
+ node=*pos++;
+ } else if(node<kMinValueLead) {
+ // linear-match node
+ pos+=node-kMinLinearMatch+1; // Ignore the match units.
+ node=*pos++;
+ } else {
+ UBool isFinal=(UBool)(node>>15);
+ int32_t value;
+ if(isFinal) {
+ value=readValue(pos, node&0x7fff);
+ } else {
+ value=readNodeValue(pos, node);
+ }
+ if(haveUniqueValue) {
+ if(value!=uniqueValue) {
+ return FALSE;
+ }
+ } else {
+ uniqueValue=value;
+ haveUniqueValue=TRUE;
+ }
+ if(isFinal) {
+ return TRUE;
+ }
+ pos=skipNodeValue(pos, node);
+ node&=kNodeTypeMask;
+ }
+ }
+}
+
+int32_t
+UCharsTrie::getNextUChars(Appendable &out) const {
+ const UChar *pos=pos_;
+ if(pos==NULL) {
+ return 0;
+ }
+ if(remainingMatchLength_>=0) {
+ out.appendCodeUnit(*pos); // Next unit of a pending linear-match node.
+ return 1;
+ }
+ int32_t node=*pos++;
+ if(node>=kMinValueLead) {
+ if(node&kValueIsFinal) {
+ return 0;
+ } else {
+ pos=skipNodeValue(pos, node);
+ node&=kNodeTypeMask;
+ }
+ }
+ if(node<kMinLinearMatch) {
+ if(node==0) {
+ node=*pos++;
+ }
+ out.reserveAppendCapacity(++node);
+ getNextBranchUChars(pos, node, out);
+ return node;
+ } else {
+ // First unit of the linear-match node.
+ out.appendCodeUnit(*pos);
+ return 1;
+ }
+}
+
+void
+UCharsTrie::getNextBranchUChars(const UChar *pos, int32_t length, Appendable &out) {
+ while(length>kMaxBranchLinearSubNodeLength) {
+ ++pos; // ignore the comparison unit
+ getNextBranchUChars(jumpByDelta(pos), length>>1, out);
+ length=length-(length>>1);
+ pos=skipDelta(pos);
+ }
+ do {
+ out.appendCodeUnit(*pos++);
+ pos=skipValue(pos);
+ } while(--length>1);
+ out.appendCodeUnit(*pos);
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/ucharstriebuilder.cpp b/thirdparty/icu4c/common/ucharstriebuilder.cpp
new file mode 100644
index 0000000000..049997a275
--- /dev/null
+++ b/thirdparty/icu4c/common/ucharstriebuilder.cpp
@@ -0,0 +1,443 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: ucharstriebuilder.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010nov14
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/ucharstrie.h"
+#include "unicode/ucharstriebuilder.h"
+#include "unicode/unistr.h"
+#include "unicode/ustring.h"
+#include "cmemory.h"
+#include "uarrsort.h"
+#include "uassert.h"
+#include "uhash.h"
+#include "ustr_imp.h"
+
+U_NAMESPACE_BEGIN
+
+/*
+ * Note: This builder implementation stores (string, value) pairs with full copies
+ * of the 16-bit-unit sequences, until the UCharsTrie is built.
+ * It might(!) take less memory if we collected the data in a temporary, dynamic trie.
+ */
+
+class UCharsTrieElement : public UMemory {
+public:
+ // Use compiler's default constructor, initializes nothing.
+
+ void setTo(const UnicodeString &s, int32_t val, UnicodeString &strings, UErrorCode &errorCode);
+
+ UnicodeString getString(const UnicodeString &strings) const {
+ int32_t length=strings[stringOffset];
+ return strings.tempSubString(stringOffset+1, length);
+ }
+ int32_t getStringLength(const UnicodeString &strings) const {
+ return strings[stringOffset];
+ }
+
+ UChar charAt(int32_t index, const UnicodeString &strings) const {
+ return strings[stringOffset+1+index];
+ }
+
+ int32_t getValue() const { return value; }
+
+ int32_t compareStringTo(const UCharsTrieElement &o, const UnicodeString &strings) const;
+
+private:
+ // The first strings unit contains the string length.
+ // (Compared with a stringLength field here, this saves 2 bytes per string.)
+ int32_t stringOffset;
+ int32_t value;
+};
+
+void
+UCharsTrieElement::setTo(const UnicodeString &s, int32_t val,
+ UnicodeString &strings, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ int32_t length=s.length();
+ if(length>0xffff) {
+ // Too long: We store the length in 1 unit.
+ errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return;
+ }
+ stringOffset=strings.length();
+ strings.append((UChar)length);
+ value=val;
+ strings.append(s);
+}
+
+int32_t
+UCharsTrieElement::compareStringTo(const UCharsTrieElement &other, const UnicodeString &strings) const {
+ return getString(strings).compare(other.getString(strings));
+}
+
+UCharsTrieBuilder::UCharsTrieBuilder(UErrorCode & /*errorCode*/)
+ : elements(NULL), elementsCapacity(0), elementsLength(0),
+ uchars(NULL), ucharsCapacity(0), ucharsLength(0) {}
+
+UCharsTrieBuilder::~UCharsTrieBuilder() {
+ delete[] elements;
+ uprv_free(uchars);
+}
+
+UCharsTrieBuilder &
+UCharsTrieBuilder::add(const UnicodeString &s, int32_t value, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return *this;
+ }
+ if(ucharsLength>0) {
+ // Cannot add elements after building.
+ errorCode=U_NO_WRITE_PERMISSION;
+ return *this;
+ }
+ if(elementsLength==elementsCapacity) {
+ int32_t newCapacity;
+ if(elementsCapacity==0) {
+ newCapacity=1024;
+ } else {
+ newCapacity=4*elementsCapacity;
+ }
+ UCharsTrieElement *newElements=new UCharsTrieElement[newCapacity];
+ if(newElements==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return *this;
+ }
+ if(elementsLength>0) {
+ uprv_memcpy(newElements, elements, (size_t)elementsLength*sizeof(UCharsTrieElement));
+ }
+ delete[] elements;
+ elements=newElements;
+ elementsCapacity=newCapacity;
+ }
+ elements[elementsLength++].setTo(s, value, strings, errorCode);
+ if(U_SUCCESS(errorCode) && strings.isBogus()) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ }
+ return *this;
+}
+
+U_CDECL_BEGIN
+
+static int32_t U_CALLCONV
+compareElementStrings(const void *context, const void *left, const void *right) {
+ const UnicodeString *strings=static_cast<const UnicodeString *>(context);
+ const UCharsTrieElement *leftElement=static_cast<const UCharsTrieElement *>(left);
+ const UCharsTrieElement *rightElement=static_cast<const UCharsTrieElement *>(right);
+ return leftElement->compareStringTo(*rightElement, *strings);
+}
+
+U_CDECL_END
+
+UCharsTrie *
+UCharsTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
+ buildUChars(buildOption, errorCode);
+ UCharsTrie *newTrie=NULL;
+ if(U_SUCCESS(errorCode)) {
+ newTrie=new UCharsTrie(uchars, uchars+(ucharsCapacity-ucharsLength));
+ if(newTrie==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ uchars=NULL; // The new trie now owns the array.
+ ucharsCapacity=0;
+ }
+ }
+ return newTrie;
+}
+
+UnicodeString &
+UCharsTrieBuilder::buildUnicodeString(UStringTrieBuildOption buildOption, UnicodeString &result,
+ UErrorCode &errorCode) {
+ buildUChars(buildOption, errorCode);
+ if(U_SUCCESS(errorCode)) {
+ result.setTo(FALSE, uchars+(ucharsCapacity-ucharsLength), ucharsLength);
+ }
+ return result;
+}
+
+void
+UCharsTrieBuilder::buildUChars(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ if(uchars!=NULL && ucharsLength>0) {
+ // Already built.
+ return;
+ }
+ if(ucharsLength==0) {
+ if(elementsLength==0) {
+ errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return;
+ }
+ if(strings.isBogus()) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ uprv_sortArray(elements, elementsLength, (int32_t)sizeof(UCharsTrieElement),
+ compareElementStrings, &strings,
+ FALSE, // need not be a stable sort
+ &errorCode);
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ // Duplicate strings are not allowed.
+ UnicodeString prev=elements[0].getString(strings);
+ for(int32_t i=1; i<elementsLength; ++i) {
+ UnicodeString current=elements[i].getString(strings);
+ if(prev==current) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ prev.fastCopyFrom(current);
+ }
+ }
+ // Create and UChar-serialize the trie for the elements.
+ ucharsLength=0;
+ int32_t capacity=strings.length();
+ if(capacity<1024) {
+ capacity=1024;
+ }
+ if(ucharsCapacity<capacity) {
+ uprv_free(uchars);
+ uchars=static_cast<UChar *>(uprv_malloc(capacity*2));
+ if(uchars==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ ucharsCapacity=0;
+ return;
+ }
+ ucharsCapacity=capacity;
+ }
+ StringTrieBuilder::build(buildOption, elementsLength, errorCode);
+ if(uchars==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ }
+}
+
+int32_t
+UCharsTrieBuilder::getElementStringLength(int32_t i) const {
+ return elements[i].getStringLength(strings);
+}
+
+UChar
+UCharsTrieBuilder::getElementUnit(int32_t i, int32_t unitIndex) const {
+ return elements[i].charAt(unitIndex, strings);
+}
+
+int32_t
+UCharsTrieBuilder::getElementValue(int32_t i) const {
+ return elements[i].getValue();
+}
+
+int32_t
+UCharsTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const {
+ const UCharsTrieElement &firstElement=elements[first];
+ const UCharsTrieElement &lastElement=elements[last];
+ int32_t minStringLength=firstElement.getStringLength(strings);
+ while(++unitIndex<minStringLength &&
+ firstElement.charAt(unitIndex, strings)==
+ lastElement.charAt(unitIndex, strings)) {}
+ return unitIndex;
+}
+
+int32_t
+UCharsTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const {
+ int32_t length=0; // Number of different units at unitIndex.
+ int32_t i=start;
+ do {
+ UChar unit=elements[i++].charAt(unitIndex, strings);
+ while(i<limit && unit==elements[i].charAt(unitIndex, strings)) {
+ ++i;
+ }
+ ++length;
+ } while(i<limit);
+ return length;
+}
+
+int32_t
+UCharsTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const {
+ do {
+ UChar unit=elements[i++].charAt(unitIndex, strings);
+ while(unit==elements[i].charAt(unitIndex, strings)) {
+ ++i;
+ }
+ } while(--count>0);
+ return i;
+}
+
+int32_t
+UCharsTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const {
+ while(unit==elements[i].charAt(unitIndex, strings)) {
+ ++i;
+ }
+ return i;
+}
+
+UCharsTrieBuilder::UCTLinearMatchNode::UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode)
+ : LinearMatchNode(len, nextNode), s(units) {
+ hash=hash*37u+ustr_hashUCharsN(units, len);
+}
+
+UBool
+UCharsTrieBuilder::UCTLinearMatchNode::operator==(const Node &other) const {
+ if(this==&other) {
+ return TRUE;
+ }
+ if(!LinearMatchNode::operator==(other)) {
+ return FALSE;
+ }
+ const UCTLinearMatchNode &o=(const UCTLinearMatchNode &)other;
+ return 0==u_memcmp(s, o.s, length);
+}
+
+void
+UCharsTrieBuilder::UCTLinearMatchNode::write(StringTrieBuilder &builder) {
+ UCharsTrieBuilder &b=(UCharsTrieBuilder &)builder;
+ next->write(builder);
+ b.write(s, length);
+ offset=b.writeValueAndType(hasValue, value, b.getMinLinearMatch()+length-1);
+}
+
+StringTrieBuilder::Node *
+UCharsTrieBuilder::createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
+ Node *nextNode) const {
+ return new UCTLinearMatchNode(
+ elements[i].getString(strings).getBuffer()+unitIndex,
+ length,
+ nextNode);
+}
+
+UBool
+UCharsTrieBuilder::ensureCapacity(int32_t length) {
+ if(uchars==NULL) {
+ return FALSE; // previous memory allocation had failed
+ }
+ if(length>ucharsCapacity) {
+ int32_t newCapacity=ucharsCapacity;
+ do {
+ newCapacity*=2;
+ } while(newCapacity<=length);
+ UChar *newUChars=static_cast<UChar *>(uprv_malloc(newCapacity*2));
+ if(newUChars==NULL) {
+ // unable to allocate memory
+ uprv_free(uchars);
+ uchars=NULL;
+ ucharsCapacity=0;
+ return FALSE;
+ }
+ u_memcpy(newUChars+(newCapacity-ucharsLength),
+ uchars+(ucharsCapacity-ucharsLength), ucharsLength);
+ uprv_free(uchars);
+ uchars=newUChars;
+ ucharsCapacity=newCapacity;
+ }
+ return TRUE;
+}
+
+int32_t
+UCharsTrieBuilder::write(int32_t unit) {
+ int32_t newLength=ucharsLength+1;
+ if(ensureCapacity(newLength)) {
+ ucharsLength=newLength;
+ uchars[ucharsCapacity-ucharsLength]=(UChar)unit;
+ }
+ return ucharsLength;
+}
+
+int32_t
+UCharsTrieBuilder::write(const UChar *s, int32_t length) {
+ int32_t newLength=ucharsLength+length;
+ if(ensureCapacity(newLength)) {
+ ucharsLength=newLength;
+ u_memcpy(uchars+(ucharsCapacity-ucharsLength), s, length);
+ }
+ return ucharsLength;
+}
+
+int32_t
+UCharsTrieBuilder::writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) {
+ return write(elements[i].getString(strings).getBuffer()+unitIndex, length);
+}
+
+int32_t
+UCharsTrieBuilder::writeValueAndFinal(int32_t i, UBool isFinal) {
+ if(0<=i && i<=UCharsTrie::kMaxOneUnitValue) {
+ return write(i|(isFinal<<15));
+ }
+ UChar intUnits[3];
+ int32_t length;
+ if(i<0 || i>UCharsTrie::kMaxTwoUnitValue) {
+ intUnits[0]=(UChar)(UCharsTrie::kThreeUnitValueLead);
+ intUnits[1]=(UChar)((uint32_t)i>>16);
+ intUnits[2]=(UChar)i;
+ length=3;
+ // } else if(i<=UCharsTrie::kMaxOneUnitValue) {
+ // intUnits[0]=(UChar)(i);
+ // length=1;
+ } else {
+ intUnits[0]=(UChar)(UCharsTrie::kMinTwoUnitValueLead+(i>>16));
+ intUnits[1]=(UChar)i;
+ length=2;
+ }
+ intUnits[0]=(UChar)(intUnits[0]|(isFinal<<15));
+ return write(intUnits, length);
+}
+
+int32_t
+UCharsTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) {
+ if(!hasValue) {
+ return write(node);
+ }
+ UChar intUnits[3];
+ int32_t length;
+ if(value<0 || value>UCharsTrie::kMaxTwoUnitNodeValue) {
+ intUnits[0]=(UChar)(UCharsTrie::kThreeUnitNodeValueLead);
+ intUnits[1]=(UChar)((uint32_t)value>>16);
+ intUnits[2]=(UChar)value;
+ length=3;
+ } else if(value<=UCharsTrie::kMaxOneUnitNodeValue) {
+ intUnits[0]=(UChar)((value+1)<<6);
+ length=1;
+ } else {
+ intUnits[0]=(UChar)(UCharsTrie::kMinTwoUnitNodeValueLead+((value>>10)&0x7fc0));
+ intUnits[1]=(UChar)value;
+ length=2;
+ }
+ intUnits[0]|=(UChar)node;
+ return write(intUnits, length);
+}
+
+int32_t
+UCharsTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
+ int32_t i=ucharsLength-jumpTarget;
+ U_ASSERT(i>=0);
+ if(i<=UCharsTrie::kMaxOneUnitDelta) {
+ return write(i);
+ }
+ UChar intUnits[3];
+ int32_t length;
+ if(i<=UCharsTrie::kMaxTwoUnitDelta) {
+ intUnits[0]=(UChar)(UCharsTrie::kMinTwoUnitDeltaLead+(i>>16));
+ length=1;
+ } else {
+ intUnits[0]=(UChar)(UCharsTrie::kThreeUnitDeltaLead);
+ intUnits[1]=(UChar)(i>>16);
+ length=2;
+ }
+ intUnits[length++]=(UChar)i;
+ return write(intUnits, length);
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/ucharstrieiterator.cpp b/thirdparty/icu4c/common/ucharstrieiterator.cpp
new file mode 100644
index 0000000000..b3132241fe
--- /dev/null
+++ b/thirdparty/icu4c/common/ucharstrieiterator.cpp
@@ -0,0 +1,215 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: ucharstrieiterator.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010nov15
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/ucharstrie.h"
+#include "unicode/unistr.h"
+#include "uvectr32.h"
+
+U_NAMESPACE_BEGIN
+
+UCharsTrie::Iterator::Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength,
+ UErrorCode &errorCode)
+ : uchars_(trieUChars),
+ pos_(uchars_), initialPos_(uchars_),
+ remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
+ skipValue_(FALSE),
+ maxLength_(maxStringLength), value_(0), stack_(NULL) {
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ // stack_ is a pointer so that it's easy to turn ucharstrie.h into
+ // a public API header for which we would want it to depend only on
+ // other public headers.
+ // Unlike UCharsTrie itself, its Iterator performs memory allocations anyway
+ // via the UnicodeString and UVector32 implementations, so this additional
+ // cost is minimal.
+ stack_=new UVector32(errorCode);
+ if(stack_==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ }
+}
+
+UCharsTrie::Iterator::Iterator(const UCharsTrie &trie, int32_t maxStringLength,
+ UErrorCode &errorCode)
+ : uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_),
+ remainingMatchLength_(trie.remainingMatchLength_),
+ initialRemainingMatchLength_(trie.remainingMatchLength_),
+ skipValue_(FALSE),
+ maxLength_(maxStringLength), value_(0), stack_(NULL) {
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ stack_=new UVector32(errorCode);
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ if(stack_==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
+ if(length>=0) {
+ // Pending linear-match node, append remaining UChars to str_.
+ ++length;
+ if(maxLength_>0 && length>maxLength_) {
+ length=maxLength_; // This will leave remainingMatchLength>=0 as a signal.
+ }
+ str_.append(pos_, length);
+ pos_+=length;
+ remainingMatchLength_-=length;
+ }
+}
+
+UCharsTrie::Iterator::~Iterator() {
+ delete stack_;
+}
+
+UCharsTrie::Iterator &
+UCharsTrie::Iterator::reset() {
+ pos_=initialPos_;
+ remainingMatchLength_=initialRemainingMatchLength_;
+ skipValue_=FALSE;
+ int32_t length=remainingMatchLength_+1; // Remaining match length.
+ if(maxLength_>0 && length>maxLength_) {
+ length=maxLength_;
+ }
+ str_.truncate(length);
+ pos_+=length;
+ remainingMatchLength_-=length;
+ stack_->setSize(0);
+ return *this;
+}
+
+UBool
+UCharsTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
+
+UBool
+UCharsTrie::Iterator::next(UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return FALSE;
+ }
+ const UChar *pos=pos_;
+ if(pos==NULL) {
+ if(stack_->isEmpty()) {
+ return FALSE;
+ }
+ // Pop the state off the stack and continue with the next outbound edge of
+ // the branch node.
+ int32_t stackSize=stack_->size();
+ int32_t length=stack_->elementAti(stackSize-1);
+ pos=uchars_+stack_->elementAti(stackSize-2);
+ stack_->setSize(stackSize-2);
+ str_.truncate(length&0xffff);
+ length=(int32_t)((uint32_t)length>>16);
+ if(length>1) {
+ pos=branchNext(pos, length, errorCode);
+ if(pos==NULL) {
+ return TRUE; // Reached a final value.
+ }
+ } else {
+ str_.append(*pos++);
+ }
+ }
+ if(remainingMatchLength_>=0) {
+ // We only get here if we started in a pending linear-match node
+ // with more than maxLength remaining units.
+ return truncateAndStop();
+ }
+ for(;;) {
+ int32_t node=*pos++;
+ if(node>=kMinValueLead) {
+ if(skipValue_) {
+ pos=skipNodeValue(pos, node);
+ node&=kNodeTypeMask;
+ skipValue_=FALSE;
+ } else {
+ // Deliver value for the string so far.
+ UBool isFinal=(UBool)(node>>15);
+ if(isFinal) {
+ value_=readValue(pos, node&0x7fff);
+ } else {
+ value_=readNodeValue(pos, node);
+ }
+ if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) {
+ pos_=NULL;
+ } else {
+ // We cannot skip the value right here because it shares its
+ // lead unit with a match node which we have to evaluate
+ // next time.
+ // Instead, keep pos_ on the node lead unit itself.
+ pos_=pos-1;
+ skipValue_=TRUE;
+ }
+ return TRUE;
+ }
+ }
+ if(maxLength_>0 && str_.length()==maxLength_) {
+ return truncateAndStop();
+ }
+ if(node<kMinLinearMatch) {
+ if(node==0) {
+ node=*pos++;
+ }
+ pos=branchNext(pos, node+1, errorCode);
+ if(pos==NULL) {
+ return TRUE; // Reached a final value.
+ }
+ } else {
+ // Linear-match node, append length units to str_.
+ int32_t length=node-kMinLinearMatch+1;
+ if(maxLength_>0 && str_.length()+length>maxLength_) {
+ str_.append(pos, maxLength_-str_.length());
+ return truncateAndStop();
+ }
+ str_.append(pos, length);
+ pos+=length;
+ }
+ }
+}
+
+// Branch node, needs to take the first outbound edge and push state for the rest.
+const UChar *
+UCharsTrie::Iterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) {
+ while(length>kMaxBranchLinearSubNodeLength) {
+ ++pos; // ignore the comparison unit
+ // Push state for the greater-or-equal edge.
+ stack_->addElement((int32_t)(skipDelta(pos)-uchars_), errorCode);
+ stack_->addElement(((length-(length>>1))<<16)|str_.length(), errorCode);
+ // Follow the less-than edge.
+ length>>=1;
+ pos=jumpByDelta(pos);
+ }
+ // List of key-value pairs where values are either final values or jump deltas.
+ // Read the first (key, value) pair.
+ UChar trieUnit=*pos++;
+ int32_t node=*pos++;
+ UBool isFinal=(UBool)(node>>15);
+ int32_t value=readValue(pos, node&=0x7fff);
+ pos=skipValue(pos, node);
+ stack_->addElement((int32_t)(pos-uchars_), errorCode);
+ stack_->addElement(((length-1)<<16)|str_.length(), errorCode);
+ str_.append(trieUnit);
+ if(isFinal) {
+ pos_=NULL;
+ value_=value;
+ return NULL;
+ } else {
+ return pos+value;
+ }
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/uchriter.cpp b/thirdparty/icu4c/common/uchriter.cpp
new file mode 100644
index 0000000000..bedbabc74c
--- /dev/null
+++ b/thirdparty/icu4c/common/uchriter.cpp
@@ -0,0 +1,367 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 1998-2012, International Business Machines Corporation and
+* others. All Rights Reserved.
+******************************************************************************
+*/
+
+#include "utypeinfo.h" // for 'typeid' to work
+
+#include "unicode/uchriter.h"
+#include "unicode/ustring.h"
+#include "unicode/utf16.h"
+#include "ustr_imp.h"
+
+U_NAMESPACE_BEGIN
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UCharCharacterIterator)
+
+UCharCharacterIterator::UCharCharacterIterator()
+ : CharacterIterator(),
+ text(0)
+{
+ // never default construct!
+}
+
+UCharCharacterIterator::UCharCharacterIterator(ConstChar16Ptr textPtr,
+ int32_t length)
+ : CharacterIterator(textPtr != 0 ? (length>=0 ? length : u_strlen(textPtr)) : 0),
+ text(textPtr)
+{
+}
+
+UCharCharacterIterator::UCharCharacterIterator(ConstChar16Ptr textPtr,
+ int32_t length,
+ int32_t position)
+ : CharacterIterator(textPtr != 0 ? (length>=0 ? length : u_strlen(textPtr)) : 0, position),
+ text(textPtr)
+{
+}
+
+UCharCharacterIterator::UCharCharacterIterator(ConstChar16Ptr textPtr,
+ int32_t length,
+ int32_t textBegin,
+ int32_t textEnd,
+ int32_t position)
+ : CharacterIterator(textPtr != 0 ? (length>=0 ? length : u_strlen(textPtr)) : 0, textBegin, textEnd, position),
+ text(textPtr)
+{
+}
+
+UCharCharacterIterator::UCharCharacterIterator(const UCharCharacterIterator& that)
+: CharacterIterator(that),
+ text(that.text)
+{
+}
+
+UCharCharacterIterator&
+UCharCharacterIterator::operator=(const UCharCharacterIterator& that) {
+ CharacterIterator::operator=(that);
+ text = that.text;
+ return *this;
+}
+
+UCharCharacterIterator::~UCharCharacterIterator() {
+}
+
+UBool
+UCharCharacterIterator::operator==(const ForwardCharacterIterator& that) const {
+ if (this == &that) {
+ return TRUE;
+ }
+ if (typeid(*this) != typeid(that)) {
+ return FALSE;
+ }
+
+ UCharCharacterIterator& realThat = (UCharCharacterIterator&)that;
+
+ return text == realThat.text
+ && textLength == realThat.textLength
+ && pos == realThat.pos
+ && begin == realThat.begin
+ && end == realThat.end;
+}
+
+int32_t
+UCharCharacterIterator::hashCode() const {
+ return ustr_hashUCharsN(text, textLength) ^ pos ^ begin ^ end;
+}
+
+UCharCharacterIterator*
+UCharCharacterIterator::clone() const {
+ return new UCharCharacterIterator(*this);
+}
+
+UChar
+UCharCharacterIterator::first() {
+ pos = begin;
+ if(pos < end) {
+ return text[pos];
+ } else {
+ return DONE;
+ }
+}
+
+UChar
+UCharCharacterIterator::firstPostInc() {
+ pos = begin;
+ if(pos < end) {
+ return text[pos++];
+ } else {
+ return DONE;
+ }
+}
+
+UChar
+UCharCharacterIterator::last() {
+ pos = end;
+ if(pos > begin) {
+ return text[--pos];
+ } else {
+ return DONE;
+ }
+}
+
+UChar
+UCharCharacterIterator::setIndex(int32_t position) {
+ if(position < begin) {
+ pos = begin;
+ } else if(position > end) {
+ pos = end;
+ } else {
+ pos = position;
+ }
+ if(pos < end) {
+ return text[pos];
+ } else {
+ return DONE;
+ }
+}
+
+UChar
+UCharCharacterIterator::current() const {
+ if (pos >= begin && pos < end) {
+ return text[pos];
+ } else {
+ return DONE;
+ }
+}
+
+UChar
+UCharCharacterIterator::next() {
+ if (pos + 1 < end) {
+ return text[++pos];
+ } else {
+ /* make current() return DONE */
+ pos = end;
+ return DONE;
+ }
+}
+
+UChar
+UCharCharacterIterator::nextPostInc() {
+ if (pos < end) {
+ return text[pos++];
+ } else {
+ return DONE;
+ }
+}
+
+UBool
+UCharCharacterIterator::hasNext() {
+ return (UBool)(pos < end ? TRUE : FALSE);
+}
+
+UChar
+UCharCharacterIterator::previous() {
+ if (pos > begin) {
+ return text[--pos];
+ } else {
+ return DONE;
+ }
+}
+
+UBool
+UCharCharacterIterator::hasPrevious() {
+ return (UBool)(pos > begin ? TRUE : FALSE);
+}
+
+UChar32
+UCharCharacterIterator::first32() {
+ pos = begin;
+ if(pos < end) {
+ int32_t i = pos;
+ UChar32 c;
+ U16_NEXT(text, i, end, c);
+ return c;
+ } else {
+ return DONE;
+ }
+}
+
+UChar32
+UCharCharacterIterator::first32PostInc() {
+ pos = begin;
+ if(pos < end) {
+ UChar32 c;
+ U16_NEXT(text, pos, end, c);
+ return c;
+ } else {
+ return DONE;
+ }
+}
+
+UChar32
+UCharCharacterIterator::last32() {
+ pos = end;
+ if(pos > begin) {
+ UChar32 c;
+ U16_PREV(text, begin, pos, c);
+ return c;
+ } else {
+ return DONE;
+ }
+}
+
+UChar32
+UCharCharacterIterator::setIndex32(int32_t position) {
+ if(position < begin) {
+ position = begin;
+ } else if(position > end) {
+ position = end;
+ }
+ if(position < end) {
+ U16_SET_CP_START(text, begin, position);
+ int32_t i = this->pos = position;
+ UChar32 c;
+ U16_NEXT(text, i, end, c);
+ return c;
+ } else {
+ this->pos = position;
+ return DONE;
+ }
+}
+
+UChar32
+UCharCharacterIterator::current32() const {
+ if (pos >= begin && pos < end) {
+ UChar32 c;
+ U16_GET(text, begin, pos, end, c);
+ return c;
+ } else {
+ return DONE;
+ }
+}
+
+UChar32
+UCharCharacterIterator::next32() {
+ if (pos < end) {
+ U16_FWD_1(text, pos, end);
+ if(pos < end) {
+ int32_t i = pos;
+ UChar32 c;
+ U16_NEXT(text, i, end, c);
+ return c;
+ }
+ }
+ /* make current() return DONE */
+ pos = end;
+ return DONE;
+}
+
+UChar32
+UCharCharacterIterator::next32PostInc() {
+ if (pos < end) {
+ UChar32 c;
+ U16_NEXT(text, pos, end, c);
+ return c;
+ } else {
+ return DONE;
+ }
+}
+
+UChar32
+UCharCharacterIterator::previous32() {
+ if (pos > begin) {
+ UChar32 c;
+ U16_PREV(text, begin, pos, c);
+ return c;
+ } else {
+ return DONE;
+ }
+}
+
+int32_t
+UCharCharacterIterator::move(int32_t delta, CharacterIterator::EOrigin origin) {
+ switch(origin) {
+ case kStart:
+ pos = begin + delta;
+ break;
+ case kCurrent:
+ pos += delta;
+ break;
+ case kEnd:
+ pos = end + delta;
+ break;
+ default:
+ break;
+ }
+
+ if(pos < begin) {
+ pos = begin;
+ } else if(pos > end) {
+ pos = end;
+ }
+
+ return pos;
+}
+
+int32_t
+UCharCharacterIterator::move32(int32_t delta, CharacterIterator::EOrigin origin) {
+ // this implementation relies on the "safe" version of the UTF macros
+ // (or the trustworthiness of the caller)
+ switch(origin) {
+ case kStart:
+ pos = begin;
+ if(delta > 0) {
+ U16_FWD_N(text, pos, end, delta);
+ }
+ break;
+ case kCurrent:
+ if(delta > 0) {
+ U16_FWD_N(text, pos, end, delta);
+ } else {
+ U16_BACK_N(text, begin, pos, -delta);
+ }
+ break;
+ case kEnd:
+ pos = end;
+ if(delta < 0) {
+ U16_BACK_N(text, begin, pos, -delta);
+ }
+ break;
+ default:
+ break;
+ }
+
+ return pos;
+}
+
+void UCharCharacterIterator::setText(ConstChar16Ptr newText,
+ int32_t newTextLength) {
+ text = newText;
+ if(newText == 0 || newTextLength < 0) {
+ newTextLength = 0;
+ }
+ end = textLength = newTextLength;
+ pos = begin = 0;
+}
+
+void
+UCharCharacterIterator::getText(UnicodeString& result) {
+ result = UnicodeString(text, textLength);
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/ucln.h b/thirdparty/icu4c/common/ucln.h
new file mode 100644
index 0000000000..fe6666efed
--- /dev/null
+++ b/thirdparty/icu4c/common/ucln.h
@@ -0,0 +1,91 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2001-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: ucln.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2001July05
+* created by: George Rhoten
+*/
+
+#ifndef __UCLN_H__
+#define __UCLN_H__
+
+#include "unicode/utypes.h"
+
+/** These are the functions used to register a library's memory cleanup
+ * functions. Each library should define a single library register function
+ * to call this API. In the i18n library, it is ucln_i18n_registerCleanup().
+ *
+ * None of the cleanup functions should use a mutex to clean up an API's
+ * allocated memory because a cleanup function is not meant to be thread safe,
+ * and plenty of data cannot be reference counted in order to make sure that
+ * no one else needs the allocated data.
+ *
+ * In order to make a cleanup function get called when u_cleanup is called,
+ * You should add your function to the library specific cleanup function.
+ * If the cleanup function is not in the common library, the code that
+ * allocates the memory should call the library specific cleanup function.
+ * For instance, in the i18n library, any memory allocated statically must
+ * call ucln_i18n_registerCleanup() from the ucln_in.h header. These library
+ * cleanup functions are needed in order to prevent a circular dependency
+ * between the common library and any other library.
+ *
+ * The order of the cleanup is very important. In general, an API that
+ * depends on a second API should be cleaned up before the second API.
+ * For instance, the default converter in ustring depends upon the converter
+ * API. So the default converter should be closed before the converter API
+ * has its cache flushed. This will prevent any memory leaks due to
+ * reference counting.
+ *
+ * Please see common/ucln_cmn.{h,c} and i18n/ucln_in.{h,c} for examples.
+ */
+
+/**
+ * Data Type for cleanup function selector. These roughly correspond to libraries.
+ */
+typedef enum ECleanupLibraryType {
+ UCLN_START = -1,
+ UCLN_UPLUG, /* ICU plugins */
+ UCLN_CUSTOM, /* Custom is for anyone else. */
+ UCLN_CTESTFW,
+ UCLN_TOOLUTIL,
+ UCLN_LAYOUTEX,
+ UCLN_LAYOUT,
+ UCLN_IO,
+ UCLN_I18N,
+ UCLN_COMMON /* This must be the last one to cleanup. */
+} ECleanupLibraryType;
+
+/**
+ * Data type for cleanup function pointer
+ */
+U_CDECL_BEGIN
+typedef UBool U_CALLCONV cleanupFunc(void);
+typedef void U_CALLCONV initFunc(UErrorCode *);
+U_CDECL_END
+
+/**
+ * Register a cleanup function
+ * @param type which library to register for.
+ * @param func the function pointer
+ */
+U_CAPI void U_EXPORT2 ucln_registerCleanup(ECleanupLibraryType type,
+ cleanupFunc *func);
+
+/**
+ * Request cleanup for one specific library.
+ * Not thread safe.
+ * @param type which library to cleanup
+ */
+U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType type);
+
+#endif
diff --git a/thirdparty/icu4c/common/ucln_cmn.cpp b/thirdparty/icu4c/common/ucln_cmn.cpp
new file mode 100644
index 0000000000..f3e07c6b89
--- /dev/null
+++ b/thirdparty/icu4c/common/ucln_cmn.cpp
@@ -0,0 +1,124 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 2001-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+******************************************************************************
+* file name: ucln_cmn.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2001July05
+* created by: George Rhoten
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/uclean.h"
+#include "cmemory.h"
+#include "mutex.h"
+#include "uassert.h"
+#include "ucln.h"
+#include "ucln_cmn.h"
+#include "utracimp.h"
+#include "umutex.h"
+
+/** Auto-client for UCLN_COMMON **/
+#define UCLN_TYPE_IS_COMMON
+#include "ucln_imp.h"
+
+static cleanupFunc *gCommonCleanupFunctions[UCLN_COMMON_COUNT];
+static cleanupFunc *gLibCleanupFunctions[UCLN_COMMON];
+
+
+/************************************************
+ The cleanup order is important in this function.
+ Please be sure that you have read ucln.h
+ ************************************************/
+U_CAPI void U_EXPORT2
+u_cleanup(void)
+{
+ UTRACE_ENTRY_OC(UTRACE_U_CLEANUP);
+ icu::umtx_lock(NULL); /* Force a memory barrier, so that we are sure to see */
+ icu::umtx_unlock(NULL); /* all state left around by any other threads. */
+
+ ucln_lib_cleanup();
+
+ cmemory_cleanup(); /* undo any heap functions set by u_setMemoryFunctions(). */
+ UTRACE_EXIT(); /* Must be before utrace_cleanup(), which turns off tracing. */
+/*#if U_ENABLE_TRACING*/
+ utrace_cleanup();
+/*#endif*/
+}
+
+U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType libType)
+{
+ if (gLibCleanupFunctions[libType])
+ {
+ gLibCleanupFunctions[libType]();
+ gLibCleanupFunctions[libType] = NULL;
+ }
+}
+
+U_CFUNC void
+ucln_common_registerCleanup(ECleanupCommonType type,
+ cleanupFunc *func)
+{
+ // Thread safety messiness: From ticket 10295, calls to registerCleanup() may occur
+ // concurrently. Although such cases should be storing the same value, they raise errors
+ // from the thread sanity checker. Doing the store within a mutex avoids those.
+ // BUT that can trigger a recursive entry into std::call_once() in umutex.cpp when this code,
+ // running from the call_once function, tries to grab the ICU global mutex, which
+ // re-enters the mutex init path. So, work-around by special casing UCLN_COMMON_MUTEX, not
+ // using the ICU global mutex for it.
+ //
+ // No other point in ICU uses std::call_once().
+
+ U_ASSERT(UCLN_COMMON_START < type && type < UCLN_COMMON_COUNT);
+ if (type == UCLN_COMMON_MUTEX) {
+ gCommonCleanupFunctions[type] = func;
+ } else if (UCLN_COMMON_START < type && type < UCLN_COMMON_COUNT) {
+ icu::Mutex m; // See ticket 10295 for discussion.
+ gCommonCleanupFunctions[type] = func;
+ }
+#if !UCLN_NO_AUTO_CLEANUP && (defined(UCLN_AUTO_ATEXIT) || defined(UCLN_AUTO_LOCAL))
+ ucln_registerAutomaticCleanup();
+#endif
+}
+
+// Note: ucln_registerCleanup() is called with the ICU global mutex locked.
+// Be aware if adding anything to the function.
+// See ticket 10295 for discussion.
+
+U_CAPI void U_EXPORT2
+ucln_registerCleanup(ECleanupLibraryType type,
+ cleanupFunc *func)
+{
+ U_ASSERT(UCLN_START < type && type < UCLN_COMMON);
+ if (UCLN_START < type && type < UCLN_COMMON)
+ {
+ gLibCleanupFunctions[type] = func;
+ }
+}
+
+U_CFUNC UBool ucln_lib_cleanup(void) {
+ int32_t libType = UCLN_START;
+ int32_t commonFunc = UCLN_COMMON_START;
+
+ for (libType++; libType<UCLN_COMMON; libType++) {
+ ucln_cleanupOne(static_cast<ECleanupLibraryType>(libType));
+ }
+
+ for (commonFunc++; commonFunc<UCLN_COMMON_COUNT; commonFunc++) {
+ if (gCommonCleanupFunctions[commonFunc])
+ {
+ gCommonCleanupFunctions[commonFunc]();
+ gCommonCleanupFunctions[commonFunc] = NULL;
+ }
+ }
+#if !UCLN_NO_AUTO_CLEANUP && (defined(UCLN_AUTO_ATEXIT) || defined(UCLN_AUTO_LOCAL))
+ ucln_unRegisterAutomaticCleanup();
+#endif
+ return TRUE;
+}
diff --git a/thirdparty/icu4c/common/ucln_cmn.h b/thirdparty/icu4c/common/ucln_cmn.h
new file mode 100644
index 0000000000..44b73e94da
--- /dev/null
+++ b/thirdparty/icu4c/common/ucln_cmn.h
@@ -0,0 +1,77 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 2001-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+******************************************************************************
+* file name: ucln_cmn.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2001July05
+* created by: George Rhoten
+*/
+
+#ifndef __UCLN_CMN_H__
+#define __UCLN_CMN_H__
+
+#include "unicode/utypes.h"
+#include "ucln.h"
+
+/* These are the cleanup functions for various APIs. */
+/* @return true if cleanup complete successfully.*/
+U_CFUNC UBool utrace_cleanup(void);
+
+U_CFUNC UBool ucln_lib_cleanup(void);
+
+/*
+Please keep the order of enums declared in same order
+as the cleanup functions are suppose to be called. */
+typedef enum ECleanupCommonType {
+ UCLN_COMMON_START = -1,
+ UCLN_COMMON_NUMPARSE_UNISETS,
+ UCLN_COMMON_USPREP,
+ UCLN_COMMON_BREAKITERATOR,
+ UCLN_COMMON_RBBI,
+ UCLN_COMMON_SERVICE,
+ UCLN_COMMON_LOCALE_KEY_TYPE,
+ UCLN_COMMON_LOCALE,
+ UCLN_COMMON_LOCALE_ALIAS,
+ UCLN_COMMON_LOCALE_KNOWN_CANONICALIZED,
+ UCLN_COMMON_LOCALE_AVAILABLE,
+ UCLN_COMMON_LIKELY_SUBTAGS,
+ UCLN_COMMON_LOCALE_DISTANCE,
+ UCLN_COMMON_ULOC,
+ UCLN_COMMON_CURRENCY,
+ UCLN_COMMON_LOADED_NORMALIZER2,
+ UCLN_COMMON_NORMALIZER2,
+ UCLN_COMMON_CHARACTERPROPERTIES,
+ UCLN_COMMON_USET,
+ UCLN_COMMON_UNAMES,
+ UCLN_COMMON_UPROPS,
+ UCLN_COMMON_UCNV,
+ UCLN_COMMON_UCNV_IO,
+ UCLN_COMMON_UDATA,
+ UCLN_COMMON_PUTIL,
+ UCLN_COMMON_UINIT,
+
+ /*
+ Unified caches caches collation stuff. Collation data structures
+ contain resource bundles which means that unified cache cleanup
+ must happen before resource bundle clean up.
+ */
+ UCLN_COMMON_UNIFIED_CACHE,
+ UCLN_COMMON_URES,
+ UCLN_COMMON_MUTEX, // Mutexes should be the last to be cleaned up.
+ UCLN_COMMON_COUNT /* This must be last */
+} ECleanupCommonType;
+
+/* Main library cleanup registration function. */
+/* See common/ucln.h for details on adding a cleanup function. */
+/* Note: the global mutex must not be held when calling this function. */
+U_CFUNC void U_EXPORT2 ucln_common_registerCleanup(ECleanupCommonType type,
+ cleanupFunc *func);
+
+#endif
diff --git a/thirdparty/icu4c/common/ucln_imp.h b/thirdparty/icu4c/common/ucln_imp.h
new file mode 100644
index 0000000000..63a54c86f6
--- /dev/null
+++ b/thirdparty/icu4c/common/ucln_imp.h
@@ -0,0 +1,182 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2009-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: ucln_imp.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* This file contains the platform specific implementation of per-library cleanup.
+*
+*/
+
+
+#ifndef __UCLN_IMP_H__
+#define __UCLN_IMP_H__
+
+#include "ucln.h"
+#include <stdlib.h>
+
+/**
+ * Auto cleanup of ICU libraries
+ * There are several methods in per library cleanup of icu libraries:
+ * 1) Compiler/Platform based cleanup:
+ * a) Windows MSVC uses DllMain()
+ * b) GCC uses destructor function attribute
+ * c) Sun Studio, AIX VA, and HP-UX aCC uses a linker option to set the exit function
+ * 2) Using atexit()
+ * 3) Implementing own automatic cleanup functions
+ *
+ * For option 1, ensure that UCLN_NO_AUTO_CLEANUP is set to 0 by using --enable-auto-cleanup
+ * configure option or by otherwise setting UCLN_NO_AUTO_CLEANUP to 0
+ * For option 2, follow option 1 and also define UCLN_AUTO_ATEXIT
+ * For option 3, follow option 1 and also define UCLN_AUTO_LOCAL (see below for more information)
+ */
+
+#if !UCLN_NO_AUTO_CLEANUP
+
+/*
+ * The following declarations are for when UCLN_AUTO_LOCAL or UCLN_AUTO_ATEXIT
+ * are defined. They are commented out because they are static and will be defined
+ * later. The information is still here to provide some guidance for the developer
+ * who chooses to use UCLN_AUTO_LOCAL.
+ */
+/**
+ * Give the library an opportunity to register an automatic cleanup.
+ * This may be called more than once.
+ */
+/*static void ucln_registerAutomaticCleanup();*/
+/**
+ * Unregister an automatic cleanup, if possible. Called from cleanup.
+ */
+/*static void ucln_unRegisterAutomaticCleanup();*/
+
+#ifdef UCLN_TYPE_IS_COMMON
+# define UCLN_CLEAN_ME_UP u_cleanup()
+#else
+# define UCLN_CLEAN_ME_UP ucln_cleanupOne(UCLN_TYPE)
+#endif
+
+/* ------------ automatic cleanup: registration. Choose ONE ------- */
+#if defined(UCLN_AUTO_LOCAL)
+/* To use:
+ * 1. define UCLN_AUTO_LOCAL,
+ * 2. create ucln_local_hook.c containing implementations of
+ * static void ucln_registerAutomaticCleanup()
+ * static void ucln_unRegisterAutomaticCleanup()
+ */
+#include "ucln_local_hook.c"
+
+#elif defined(UCLN_AUTO_ATEXIT)
+/*
+ * Use the ANSI C 'atexit' function. Note that this mechanism does not
+ * guarantee the order of cleanup relative to other users of ICU!
+ */
+static UBool gAutoCleanRegistered = false;
+
+static void ucln_atexit_handler()
+{
+ UCLN_CLEAN_ME_UP;
+}
+
+static void ucln_registerAutomaticCleanup()
+{
+ if(!gAutoCleanRegistered) {
+ gAutoCleanRegistered = true;
+ atexit(&ucln_atexit_handler);
+ }
+}
+
+static void ucln_unRegisterAutomaticCleanup () {
+}
+/* ------------end of automatic cleanup: registration. ------- */
+
+#elif defined (UCLN_FINI)
+/**
+ * If UCLN_FINI is defined, it is the (versioned, etc) name of a cleanup
+ * entrypoint. Add a stub to call ucln_cleanupOne
+ * Used on AIX, Solaris, and HP-UX
+ */
+U_CAPI void U_EXPORT2 UCLN_FINI (void);
+
+U_CAPI void U_EXPORT2 UCLN_FINI ()
+{
+ /* This function must be defined, if UCLN_FINI is defined, else link error. */
+ UCLN_CLEAN_ME_UP;
+}
+
+/* Windows: DllMain */
+#elif U_PLATFORM_HAS_WIN32_API
+/*
+ * ICU's own DllMain.
+ */
+
+/* these are from putil.c */
+/* READ READ READ READ! Are you getting compilation errors from windows.h?
+ Any source file which includes this (ucln_imp.h) header MUST
+ be defined with language extensions ON. */
+#ifndef WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN
+#endif
+# define VC_EXTRALEAN
+# define NOUSER
+# define NOSERVICE
+# define NOIME
+# define NOMCX
+# include <windows.h>
+/*
+ * This is a stub DllMain function with icu specific process handling code.
+ */
+BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved)
+{
+ BOOL status = true;
+
+ switch(fdwReason) {
+ case DLL_PROCESS_ATTACH:
+ /* ICU does not trap process attach, but must pass these through properly. */
+ /* ICU specific process attach could go here */
+ break;
+
+ case DLL_PROCESS_DETACH:
+ /* Here is the one we actually care about. */
+
+ UCLN_CLEAN_ME_UP;
+
+ break;
+
+ case DLL_THREAD_ATTACH:
+ /* ICU does not trap thread attach, but must pass these through properly. */
+ /* ICU specific thread attach could go here */
+ break;
+
+ case DLL_THREAD_DETACH:
+ /* ICU does not trap thread detach, but must pass these through properly. */
+ /* ICU specific thread detach could go here */
+ break;
+
+ }
+ return status;
+}
+
+#elif defined(__GNUC__)
+/* GCC - use __attribute((destructor)) */
+static void ucln_destructor() __attribute__((destructor)) ;
+
+static void ucln_destructor()
+{
+ UCLN_CLEAN_ME_UP;
+}
+
+#endif
+
+#endif /* UCLN_NO_AUTO_CLEANUP */
+
+#else
+#error This file can only be included once.
+#endif
diff --git a/thirdparty/icu4c/common/ucmndata.cpp b/thirdparty/icu4c/common/ucmndata.cpp
new file mode 100644
index 0000000000..ba2310bb7a
--- /dev/null
+++ b/thirdparty/icu4c/common/ucmndata.cpp
@@ -0,0 +1,393 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************/
+
+
+/*------------------------------------------------------------------------------
+ *
+ * UCommonData An abstract interface for dealing with ICU Common Data Files.
+ * ICU Common Data Files are a grouping of a number of individual
+ * data items (resources, converters, tables, anything) into a
+ * single file or dll. The combined format includes a table of
+ * contents for locating the individual items by name.
+ *
+ * Two formats for the table of contents are supported, which is
+ * why there is an abstract inteface involved.
+ *
+ */
+
+#include "unicode/utypes.h"
+#include "unicode/udata.h"
+#include "cstring.h"
+#include "ucmndata.h"
+#include "udatamem.h"
+
+#if defined(UDATA_DEBUG) || defined(UDATA_DEBUG_DUMP)
+# include <stdio.h>
+#endif
+
+U_CFUNC uint16_t
+udata_getHeaderSize(const DataHeader *udh) {
+ if(udh==NULL) {
+ return 0;
+ } else if(udh->info.isBigEndian==U_IS_BIG_ENDIAN) {
+ /* same endianness */
+ return udh->dataHeader.headerSize;
+ } else {
+ /* opposite endianness */
+ uint16_t x=udh->dataHeader.headerSize;
+ return (uint16_t)((x<<8)|(x>>8));
+ }
+}
+
+U_CFUNC uint16_t
+udata_getInfoSize(const UDataInfo *info) {
+ if(info==NULL) {
+ return 0;
+ } else if(info->isBigEndian==U_IS_BIG_ENDIAN) {
+ /* same endianness */
+ return info->size;
+ } else {
+ /* opposite endianness */
+ uint16_t x=info->size;
+ return (uint16_t)((x<<8)|(x>>8));
+ }
+}
+
+/*-----------------------------------------------------------------------------*
+ * *
+ * Pointer TOCs. TODO: This form of table-of-contents should be removed *
+ * because DLLs must be relocated on loading to correct the *
+ * pointer values and this operation makes shared memory *
+ * mapping of the data much less likely to work. *
+ * *
+ *-----------------------------------------------------------------------------*/
+typedef struct {
+ const char *entryName;
+ const DataHeader *pHeader;
+} PointerTOCEntry;
+
+
+typedef struct {
+ uint32_t count;
+ uint32_t reserved;
+ /**
+ * Variable-length array declared with length 1 to disable bounds checkers.
+ * The actual array length is in the count field.
+ */
+ PointerTOCEntry entry[1];
+} PointerTOC;
+
+
+/* definition of OffsetTOC struct types moved to ucmndata.h */
+
+/*-----------------------------------------------------------------------------*
+ * *
+ * entry point lookup implementations *
+ * *
+ *-----------------------------------------------------------------------------*/
+
+#ifndef MIN
+#define MIN(a,b) (((a)<(b)) ? (a) : (b))
+#endif
+
+/**
+ * Compare strings where we know the shared prefix length,
+ * and advance the prefix length as we find that the strings share even more characters.
+ */
+static int32_t
+strcmpAfterPrefix(const char *s1, const char *s2, int32_t *pPrefixLength) {
+ int32_t pl=*pPrefixLength;
+ int32_t cmp=0;
+ s1+=pl;
+ s2+=pl;
+ for(;;) {
+ int32_t c1=(uint8_t)*s1++;
+ int32_t c2=(uint8_t)*s2++;
+ cmp=c1-c2;
+ if(cmp!=0 || c1==0) { /* different or done */
+ break;
+ }
+ ++pl; /* increment shared same-prefix length */
+ }
+ *pPrefixLength=pl;
+ return cmp;
+}
+
+static int32_t
+offsetTOCPrefixBinarySearch(const char *s, const char *names,
+ const UDataOffsetTOCEntry *toc, int32_t count) {
+ int32_t start=0;
+ int32_t limit=count;
+ /*
+ * Remember the shared prefix between s, start and limit,
+ * and don't compare that shared prefix again.
+ * The shared prefix should get longer as we narrow the [start, limit[ range.
+ */
+ int32_t startPrefixLength=0;
+ int32_t limitPrefixLength=0;
+ if(count==0) {
+ return -1;
+ }
+ /*
+ * Prime the prefix lengths so that we don't keep prefixLength at 0 until
+ * both the start and limit indexes have moved.
+ * At the same time, we find if s is one of the start and (limit-1) names,
+ * and if not, exclude them from the actual binary search.
+ */
+ if(0==strcmpAfterPrefix(s, names+toc[0].nameOffset, &startPrefixLength)) {
+ return 0;
+ }
+ ++start;
+ --limit;
+ if(0==strcmpAfterPrefix(s, names+toc[limit].nameOffset, &limitPrefixLength)) {
+ return limit;
+ }
+ while(start<limit) {
+ int32_t i=(start+limit)/2;
+ int32_t prefixLength=MIN(startPrefixLength, limitPrefixLength);
+ int32_t cmp=strcmpAfterPrefix(s, names+toc[i].nameOffset, &prefixLength);
+ if(cmp<0) {
+ limit=i;
+ limitPrefixLength=prefixLength;
+ } else if(cmp==0) {
+ return i;
+ } else {
+ start=i+1;
+ startPrefixLength=prefixLength;
+ }
+ }
+ return -1;
+}
+
+static int32_t
+pointerTOCPrefixBinarySearch(const char *s, const PointerTOCEntry *toc, int32_t count) {
+ int32_t start=0;
+ int32_t limit=count;
+ /*
+ * Remember the shared prefix between s, start and limit,
+ * and don't compare that shared prefix again.
+ * The shared prefix should get longer as we narrow the [start, limit[ range.
+ */
+ int32_t startPrefixLength=0;
+ int32_t limitPrefixLength=0;
+ if(count==0) {
+ return -1;
+ }
+ /*
+ * Prime the prefix lengths so that we don't keep prefixLength at 0 until
+ * both the start and limit indexes have moved.
+ * At the same time, we find if s is one of the start and (limit-1) names,
+ * and if not, exclude them from the actual binary search.
+ */
+ if(0==strcmpAfterPrefix(s, toc[0].entryName, &startPrefixLength)) {
+ return 0;
+ }
+ ++start;
+ --limit;
+ if(0==strcmpAfterPrefix(s, toc[limit].entryName, &limitPrefixLength)) {
+ return limit;
+ }
+ while(start<limit) {
+ int32_t i=(start+limit)/2;
+ int32_t prefixLength=MIN(startPrefixLength, limitPrefixLength);
+ int32_t cmp=strcmpAfterPrefix(s, toc[i].entryName, &prefixLength);
+ if(cmp<0) {
+ limit=i;
+ limitPrefixLength=prefixLength;
+ } else if(cmp==0) {
+ return i;
+ } else {
+ start=i+1;
+ startPrefixLength=prefixLength;
+ }
+ }
+ return -1;
+}
+
+U_CDECL_BEGIN
+static uint32_t U_CALLCONV
+offsetTOCEntryCount(const UDataMemory *pData) {
+ int32_t retVal=0;
+ const UDataOffsetTOC *toc = (UDataOffsetTOC *)pData->toc;
+ if (toc != NULL) {
+ retVal = toc->count;
+ }
+ return retVal;
+}
+
+static const DataHeader * U_CALLCONV
+offsetTOCLookupFn(const UDataMemory *pData,
+ const char *tocEntryName,
+ int32_t *pLength,
+ UErrorCode *pErrorCode) {
+ (void)pErrorCode;
+ const UDataOffsetTOC *toc = (UDataOffsetTOC *)pData->toc;
+ if(toc!=NULL) {
+ const char *base=(const char *)toc;
+ int32_t number, count=(int32_t)toc->count;
+
+ /* perform a binary search for the data in the common data's table of contents */
+#if defined (UDATA_DEBUG_DUMP)
+ /* list the contents of the TOC each time .. not recommended */
+ for(number=0; number<count; ++number) {
+ fprintf(stderr, "\tx%d: %s\n", number, &base[toc->entry[number].nameOffset]);
+ }
+#endif
+ number=offsetTOCPrefixBinarySearch(tocEntryName, base, toc->entry, count);
+ if(number>=0) {
+ /* found it */
+ const UDataOffsetTOCEntry *entry=toc->entry+number;
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "%s: Found.\n", tocEntryName);
+#endif
+ if((number+1) < count) {
+ *pLength = (int32_t)(entry[1].dataOffset - entry->dataOffset);
+ } else {
+ *pLength = -1;
+ }
+ return (const DataHeader *)(base+entry->dataOffset);
+ } else {
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "%s: Not found.\n", tocEntryName);
+#endif
+ return NULL;
+ }
+ } else {
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "returning header\n");
+#endif
+
+ return pData->pHeader;
+ }
+}
+
+
+static uint32_t U_CALLCONV pointerTOCEntryCount(const UDataMemory *pData) {
+ const PointerTOC *toc = (PointerTOC *)pData->toc;
+ return (uint32_t)((toc != NULL) ? (toc->count) : 0);
+}
+
+static const DataHeader * U_CALLCONV pointerTOCLookupFn(const UDataMemory *pData,
+ const char *name,
+ int32_t *pLength,
+ UErrorCode *pErrorCode) {
+ (void)pErrorCode;
+ if(pData->toc!=NULL) {
+ const PointerTOC *toc = (PointerTOC *)pData->toc;
+ int32_t number, count=(int32_t)toc->count;
+
+#if defined (UDATA_DEBUG_DUMP)
+ /* list the contents of the TOC each time .. not recommended */
+ for(number=0; number<count; ++number) {
+ fprintf(stderr, "\tx%d: %s\n", number, toc->entry[number].entryName);
+ }
+#endif
+ number=pointerTOCPrefixBinarySearch(name, toc->entry, count);
+ if(number>=0) {
+ /* found it */
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "%s: Found.\n", toc->entry[number].entryName);
+#endif
+ *pLength=-1;
+ return UDataMemory_normalizeDataPointer(toc->entry[number].pHeader);
+ } else {
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "%s: Not found.\n", name);
+#endif
+ return NULL;
+ }
+ } else {
+ return pData->pHeader;
+ }
+}
+U_CDECL_END
+
+
+static const commonDataFuncs CmnDFuncs = {offsetTOCLookupFn, offsetTOCEntryCount};
+static const commonDataFuncs ToCPFuncs = {pointerTOCLookupFn, pointerTOCEntryCount};
+
+
+
+/*----------------------------------------------------------------------*
+ * *
+ * checkCommonData Validate the format of a common data file. *
+ * Fill in the virtual function ptr based on TOC type *
+ * If the data is invalid, close the UDataMemory *
+ * and set the appropriate error code. *
+ * *
+ *----------------------------------------------------------------------*/
+U_CFUNC void udata_checkCommonData(UDataMemory *udm, UErrorCode *err) {
+ if (U_FAILURE(*err)) {
+ return;
+ }
+
+ if(udm==NULL || udm->pHeader==NULL) {
+ *err=U_INVALID_FORMAT_ERROR;
+ } else if(!(udm->pHeader->dataHeader.magic1==0xda &&
+ udm->pHeader->dataHeader.magic2==0x27 &&
+ udm->pHeader->info.isBigEndian==U_IS_BIG_ENDIAN &&
+ udm->pHeader->info.charsetFamily==U_CHARSET_FAMILY)
+ ) {
+ /* header not valid */
+ *err=U_INVALID_FORMAT_ERROR;
+ }
+ else if (udm->pHeader->info.dataFormat[0]==0x43 &&
+ udm->pHeader->info.dataFormat[1]==0x6d &&
+ udm->pHeader->info.dataFormat[2]==0x6e &&
+ udm->pHeader->info.dataFormat[3]==0x44 &&
+ udm->pHeader->info.formatVersion[0]==1
+ ) {
+ /* dataFormat="CmnD" */
+ udm->vFuncs = &CmnDFuncs;
+ udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader);
+ }
+ else if(udm->pHeader->info.dataFormat[0]==0x54 &&
+ udm->pHeader->info.dataFormat[1]==0x6f &&
+ udm->pHeader->info.dataFormat[2]==0x43 &&
+ udm->pHeader->info.dataFormat[3]==0x50 &&
+ udm->pHeader->info.formatVersion[0]==1
+ ) {
+ /* dataFormat="ToCP" */
+ udm->vFuncs = &ToCPFuncs;
+ udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader);
+ }
+ else {
+ /* dataFormat not recognized */
+ *err=U_INVALID_FORMAT_ERROR;
+ }
+
+ if (U_FAILURE(*err)) {
+ /* If the data is no good and we memory-mapped it ourselves,
+ * close the memory mapping so it doesn't leak. Note that this has
+ * no effect on non-memory mapped data, other than clearing fields in udm.
+ */
+ udata_close(udm);
+ }
+}
+
+/*
+ * TODO: Add a udata_swapPackageHeader() function that swaps an ICU .dat package
+ * header but not its sub-items.
+ * This function will be needed for automatic runtime swapping.
+ * Sub-items should not be swapped to limit the swapping to the parts of the
+ * package that are actually used.
+ *
+ * Since lengths of items are implicit in the order and offsets of their
+ * ToC entries, and since offsets are relative to the start of the ToC,
+ * a swapped version may need to generate a different data structure
+ * with pointers to the original data items and with their lengths
+ * (-1 for the last one if it is not known), and maybe even pointers to the
+ * swapped versions of the items.
+ * These pointers to swapped versions would establish a cache;
+ * instead, each open data item could simply own the storage for its swapped
+ * data. This fits better with the current design.
+ *
+ * markus 2003sep18 Jitterbug 2235
+ */
diff --git a/thirdparty/icu4c/common/ucmndata.h b/thirdparty/icu4c/common/ucmndata.h
new file mode 100644
index 0000000000..c3eba9f4d0
--- /dev/null
+++ b/thirdparty/icu4c/common/ucmndata.h
@@ -0,0 +1,117 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************/
+
+
+/*----------------------------------------------------------------------------------
+ *
+ * UCommonData An abstract interface for dealing with ICU Common Data Files.
+ * ICU Common Data Files are a grouping of a number of individual
+ * data items (resources, converters, tables, anything) into a
+ * single file or dll. The combined format includes a table of
+ * contents for locating the individual items by name.
+ *
+ * Two formats for the table of contents are supported, which is
+ * why there is an abstract inteface involved.
+ *
+ * These functions are part of the ICU internal implementation, and
+ * are not inteded to be used directly by applications.
+ */
+
+#ifndef __UCMNDATA_H__
+#define __UCMNDATA_H__
+
+#include "unicode/udata.h"
+#include "umapfile.h"
+
+
+#define COMMON_DATA_NAME U_ICUDATA_NAME
+
+typedef struct {
+ uint16_t headerSize;
+ uint8_t magic1;
+ uint8_t magic2;
+} MappedData;
+
+
+typedef struct {
+ MappedData dataHeader;
+ UDataInfo info;
+} DataHeader;
+
+typedef struct {
+ uint32_t nameOffset;
+ uint32_t dataOffset;
+} UDataOffsetTOCEntry;
+
+typedef struct {
+ uint32_t count;
+ /**
+ * Variable-length array declared with length 1 to disable bounds checkers.
+ * The actual array length is in the count field.
+ */
+ UDataOffsetTOCEntry entry[1];
+} UDataOffsetTOC;
+
+/**
+ * Get the header size from a const DataHeader *udh.
+ * Handles opposite-endian data.
+ *
+ * @internal
+ */
+U_CFUNC uint16_t
+udata_getHeaderSize(const DataHeader *udh);
+
+/**
+ * Get the UDataInfo.size from a const UDataInfo *info.
+ * Handles opposite-endian data.
+ *
+ * @internal
+ */
+U_CFUNC uint16_t
+udata_getInfoSize(const UDataInfo *info);
+
+U_CDECL_BEGIN
+/*
+ * "Virtual" functions for data lookup.
+ * To call one, given a UDataMemory *p, the code looks like this:
+ * p->vFuncs.Lookup(p, tocEntryName, pErrorCode);
+ * (I sure do wish this was written in C++, not C)
+ */
+
+typedef const DataHeader *
+(U_CALLCONV * LookupFn)(const UDataMemory *pData,
+ const char *tocEntryName,
+ int32_t *pLength,
+ UErrorCode *pErrorCode);
+
+typedef uint32_t
+(U_CALLCONV * NumEntriesFn)(const UDataMemory *pData);
+
+U_CDECL_END
+
+typedef struct {
+ LookupFn Lookup;
+ NumEntriesFn NumEntries;
+} commonDataFuncs;
+
+
+/*
+ * Functions to check whether a UDataMemory refers to memory containing
+ * a recognizable header and table of contents a Common Data Format
+ *
+ * If a valid header and TOC are found,
+ * set the CommonDataFuncs function dispatch vector in the UDataMemory
+ * to point to the right functions for the TOC type.
+ * otherwise
+ * set an errorcode.
+ */
+U_CFUNC void udata_checkCommonData(UDataMemory *pData, UErrorCode *pErrorCode);
+
+#endif
diff --git a/thirdparty/icu4c/common/ucnv.cpp b/thirdparty/icu4c/common/ucnv.cpp
new file mode 100644
index 0000000000..5dcf35e043
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnv.cpp
@@ -0,0 +1,2910 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1998-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* ucnv.c:
+* Implements APIs for the ICU's codeset conversion library;
+* mostly calls through internal functions;
+* created by Bertrand A. Damiba
+*
+* Modification History:
+*
+* Date Name Description
+* 04/04/99 helena Fixed internal header inclusion.
+* 05/09/00 helena Added implementation to handle fallback mappings.
+* 06/20/2000 helena OS/400 port changes; mostly typecast.
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include <memory>
+
+#include "unicode/ustring.h"
+#include "unicode/ucnv.h"
+#include "unicode/ucnv_err.h"
+#include "unicode/uset.h"
+#include "unicode/utf.h"
+#include "unicode/utf16.h"
+#include "putilimp.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "uassert.h"
+#include "utracimp.h"
+#include "ustr_imp.h"
+#include "ucnv_imp.h"
+#include "ucnv_cnv.h"
+#include "ucnv_bld.h"
+
+/* size of intermediate and preflighting buffers in ucnv_convert() */
+#define CHUNK_SIZE 1024
+
+typedef struct UAmbiguousConverter {
+ const char *name;
+ const UChar variant5c;
+} UAmbiguousConverter;
+
+static const UAmbiguousConverter ambiguousConverters[]={
+ { "ibm-897_P100-1995", 0xa5 },
+ { "ibm-942_P120-1999", 0xa5 },
+ { "ibm-943_P130-1999", 0xa5 },
+ { "ibm-946_P100-1995", 0xa5 },
+ { "ibm-33722_P120-1999", 0xa5 },
+ { "ibm-1041_P100-1995", 0xa5 },
+ /*{ "ibm-54191_P100-2006", 0xa5 },*/
+ /*{ "ibm-62383_P100-2007", 0xa5 },*/
+ /*{ "ibm-891_P100-1995", 0x20a9 },*/
+ { "ibm-944_P100-1995", 0x20a9 },
+ { "ibm-949_P110-1999", 0x20a9 },
+ { "ibm-1363_P110-1997", 0x20a9 },
+ { "ISO_2022,locale=ko,version=0", 0x20a9 },
+ { "ibm-1088_P100-1995", 0x20a9 }
+};
+
+/*Calls through createConverter */
+U_CAPI UConverter* U_EXPORT2
+ucnv_open (const char *name,
+ UErrorCode * err)
+{
+ UConverter *r;
+
+ if (err == NULL || U_FAILURE (*err)) {
+ return NULL;
+ }
+
+ r = ucnv_createConverter(NULL, name, err);
+ return r;
+}
+
+U_CAPI UConverter* U_EXPORT2
+ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err)
+{
+ return ucnv_createConverterFromPackage(packageName, converterName, err);
+}
+
+/*Extracts the UChar* to a char* and calls through createConverter */
+U_CAPI UConverter* U_EXPORT2
+ucnv_openU (const UChar * name,
+ UErrorCode * err)
+{
+ char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH];
+
+ if (err == NULL || U_FAILURE(*err))
+ return NULL;
+ if (name == NULL)
+ return ucnv_open (NULL, err);
+ if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH)
+ {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+ return ucnv_open(u_austrcpy(asciiName, name), err);
+}
+
+/* Copy the string that is represented by the UConverterPlatform enum
+ * @param platformString An output buffer
+ * @param platform An enum representing a platform
+ * @return the length of the copied string.
+ */
+static int32_t
+ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm)
+{
+ switch (pltfrm)
+ {
+ case UCNV_IBM:
+ uprv_strcpy(platformString, "ibm-");
+ return 4;
+ case UCNV_UNKNOWN:
+ break;
+ }
+
+ /* default to empty string */
+ *platformString = 0;
+ return 0;
+}
+
+/*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls
+ *through createConverter*/
+U_CAPI UConverter* U_EXPORT2
+ucnv_openCCSID (int32_t codepage,
+ UConverterPlatform platform,
+ UErrorCode * err)
+{
+ char myName[UCNV_MAX_CONVERTER_NAME_LENGTH];
+ int32_t myNameLen;
+
+ if (err == NULL || U_FAILURE (*err))
+ return NULL;
+
+ /* ucnv_copyPlatformString could return "ibm-" or "cp" */
+ myNameLen = ucnv_copyPlatformString(myName, platform);
+ T_CString_integerToString(myName + myNameLen, codepage, 10);
+
+ return ucnv_createConverter(NULL, myName, err);
+}
+
+/* Creating a temporary stack-based object that can be used in one thread,
+and created from a converter that is shared across threads.
+*/
+
+U_CAPI UConverter* U_EXPORT2
+ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
+{
+ UConverter *localConverter, *allocatedConverter;
+ int32_t stackBufferSize;
+ int32_t bufferSizeNeeded;
+ UErrorCode cbErr;
+ UConverterToUnicodeArgs toUArgs = {
+ sizeof(UConverterToUnicodeArgs),
+ TRUE,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+ };
+ UConverterFromUnicodeArgs fromUArgs = {
+ sizeof(UConverterFromUnicodeArgs),
+ TRUE,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+ };
+
+ UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE);
+
+ if (status == NULL || U_FAILURE(*status)){
+ UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR);
+ return NULL;
+ }
+
+ if (cnv == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ UTRACE_EXIT_STATUS(*status);
+ return NULL;
+ }
+
+ UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p",
+ ucnv_getName(cnv, status), cnv, stackBuffer);
+
+ if (cnv->sharedData->impl->safeClone != NULL) {
+ /* call the custom safeClone function for sizing */
+ bufferSizeNeeded = 0;
+ cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status);
+ if (U_FAILURE(*status)) {
+ UTRACE_EXIT_STATUS(*status);
+ return NULL;
+ }
+ }
+ else
+ {
+ /* inherent sizing */
+ bufferSizeNeeded = sizeof(UConverter);
+ }
+
+ if (pBufferSize == NULL) {
+ stackBufferSize = 1;
+ pBufferSize = &stackBufferSize;
+ } else {
+ stackBufferSize = *pBufferSize;
+ if (stackBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */
+ *pBufferSize = bufferSizeNeeded;
+ UTRACE_EXIT_VALUE(bufferSizeNeeded);
+ return NULL;
+ }
+ }
+
+ /* Adjust (if necessary) the stackBuffer pointer to be aligned correctly for a UConverter.
+ * TODO(Jira ICU-20736) Redo this using std::align() once g++4.9 compatibility is no longer needed.
+ */
+ if (stackBuffer) {
+ uintptr_t p = reinterpret_cast<uintptr_t>(stackBuffer);
+ uintptr_t aligned_p = (p + alignof(UConverter) - 1) & ~(alignof(UConverter) - 1);
+ ptrdiff_t pointerAdjustment = aligned_p - p;
+ if (bufferSizeNeeded + pointerAdjustment <= stackBufferSize) {
+ stackBuffer = reinterpret_cast<void *>(aligned_p);
+ stackBufferSize -= static_cast<int32_t>(pointerAdjustment);
+ } else {
+ /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
+ stackBufferSize = 1;
+ }
+ }
+
+ /* Now, see if we must allocate any memory */
+ if (stackBufferSize < bufferSizeNeeded || stackBuffer == NULL)
+ {
+ /* allocate one here...*/
+ localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded);
+
+ if(localConverter == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ UTRACE_EXIT_STATUS(*status);
+ return NULL;
+ }
+ *status = U_SAFECLONE_ALLOCATED_WARNING;
+
+ /* record the fact that memory was allocated */
+ *pBufferSize = bufferSizeNeeded;
+ } else {
+ /* just use the stack buffer */
+ localConverter = (UConverter*) stackBuffer;
+ allocatedConverter = NULL;
+ }
+
+ uprv_memset(localConverter, 0, bufferSizeNeeded);
+
+ /* Copy initial state */
+ uprv_memcpy(localConverter, cnv, sizeof(UConverter));
+ localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE;
+
+ /* copy the substitution string */
+ if (cnv->subChars == (uint8_t *)cnv->subUChars) {
+ localConverter->subChars = (uint8_t *)localConverter->subUChars;
+ } else {
+ localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
+ if (localConverter->subChars == NULL) {
+ uprv_free(allocatedConverter);
+ UTRACE_EXIT_STATUS(*status);
+ return NULL;
+ }
+ uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
+ }
+
+ /* now either call the safeclone fcn or not */
+ if (cnv->sharedData->impl->safeClone != NULL) {
+ /* call the custom safeClone function */
+ localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status);
+ }
+
+ if(localConverter==NULL || U_FAILURE(*status)) {
+ if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) {
+ uprv_free(allocatedConverter->subChars);
+ }
+ uprv_free(allocatedConverter);
+ UTRACE_EXIT_STATUS(*status);
+ return NULL;
+ }
+
+ /* increment refcount of shared data if needed */
+ if (cnv->sharedData->isReferenceCounted) {
+ ucnv_incrementRefCount(cnv->sharedData);
+ }
+
+ if(localConverter == (UConverter*)stackBuffer) {
+ /* we're using user provided data - set to not destroy */
+ localConverter->isCopyLocal = TRUE;
+ }
+
+ /* allow callback functions to handle any memory allocation */
+ toUArgs.converter = fromUArgs.converter = localConverter;
+ cbErr = U_ZERO_ERROR;
+ cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr);
+ cbErr = U_ZERO_ERROR;
+ cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr);
+
+ UTRACE_EXIT_PTR_STATUS(localConverter, *status);
+ return localConverter;
+}
+
+
+
+/*Decreases the reference counter in the shared immutable section of the object
+ *and frees the mutable part*/
+
+U_CAPI void U_EXPORT2
+ucnv_close (UConverter * converter)
+{
+ UErrorCode errorCode = U_ZERO_ERROR;
+
+ UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE);
+
+ if (converter == NULL)
+ {
+ UTRACE_EXIT();
+ return;
+ }
+
+ UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b",
+ ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal);
+
+ /* In order to speed up the close, only call the callbacks when they have been changed.
+ This performance check will only work when the callbacks are set within a shared library
+ or from user code that statically links this code. */
+ /* first, notify the callback functions that the converter is closed */
+ if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
+ UConverterToUnicodeArgs toUArgs = {
+ sizeof(UConverterToUnicodeArgs),
+ TRUE,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+ };
+
+ toUArgs.converter = converter;
+ errorCode = U_ZERO_ERROR;
+ converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode);
+ }
+ if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
+ UConverterFromUnicodeArgs fromUArgs = {
+ sizeof(UConverterFromUnicodeArgs),
+ TRUE,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+ };
+ fromUArgs.converter = converter;
+ errorCode = U_ZERO_ERROR;
+ converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode);
+ }
+
+ if (converter->sharedData->impl->close != NULL) {
+ converter->sharedData->impl->close(converter);
+ }
+
+ if (converter->subChars != (uint8_t *)converter->subUChars) {
+ uprv_free(converter->subChars);
+ }
+
+ if (converter->sharedData->isReferenceCounted) {
+ ucnv_unloadSharedDataIfReady(converter->sharedData);
+ }
+
+ if(!converter->isCopyLocal){
+ uprv_free(converter);
+ }
+
+ UTRACE_EXIT();
+}
+
+/*returns a single Name from the list, will return NULL if out of bounds
+ */
+U_CAPI const char* U_EXPORT2
+ucnv_getAvailableName (int32_t n)
+{
+ if (0 <= n && n <= 0xffff) {
+ UErrorCode err = U_ZERO_ERROR;
+ const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err);
+ if (U_SUCCESS(err)) {
+ return name;
+ }
+ }
+ return NULL;
+}
+
+U_CAPI int32_t U_EXPORT2
+ucnv_countAvailable ()
+{
+ UErrorCode err = U_ZERO_ERROR;
+ return ucnv_bld_countAvailableConverters(&err);
+}
+
+U_CAPI void U_EXPORT2
+ucnv_getSubstChars (const UConverter * converter,
+ char *mySubChar,
+ int8_t * len,
+ UErrorCode * err)
+{
+ if (U_FAILURE (*err))
+ return;
+
+ if (converter->subCharLen <= 0) {
+ /* Unicode string or empty string from ucnv_setSubstString(). */
+ *len = 0;
+ return;
+ }
+
+ if (*len < converter->subCharLen) /*not enough space in subChars */
+ {
+ *err = U_INDEX_OUTOFBOUNDS_ERROR;
+ return;
+ }
+
+ uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */
+ *len = converter->subCharLen; /*store # of bytes copied to buffer */
+}
+
+U_CAPI void U_EXPORT2
+ucnv_setSubstChars (UConverter * converter,
+ const char *mySubChar,
+ int8_t len,
+ UErrorCode * err)
+{
+ if (U_FAILURE (*err))
+ return;
+
+ /*Makes sure that the subChar is within the codepages char length boundaries */
+ if ((len > converter->sharedData->staticData->maxBytesPerChar)
+ || (len < converter->sharedData->staticData->minBytesPerChar))
+ {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */
+ converter->subCharLen = len; /*sets the new len */
+
+ /*
+ * There is currently (2001Feb) no separate API to set/get subChar1.
+ * In order to always have subChar written after it is explicitly set,
+ * we set subChar1 to 0.
+ */
+ converter->subChar1 = 0;
+
+ return;
+}
+
+U_CAPI void U_EXPORT2
+ucnv_setSubstString(UConverter *cnv,
+ const UChar *s,
+ int32_t length,
+ UErrorCode *err) {
+ alignas(UConverter) char cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE];
+ char chars[UCNV_ERROR_BUFFER_LENGTH];
+
+ UConverter *clone;
+ uint8_t *subChars;
+ int32_t cloneSize, length8;
+
+ /* Let the following functions check all arguments. */
+ cloneSize = sizeof(cloneBuffer);
+ clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err);
+ ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err);
+ length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err);
+ ucnv_close(clone);
+ if (U_FAILURE(*err)) {
+ return;
+ }
+
+ if (cnv->sharedData->impl->writeSub == NULL
+#if !UCONFIG_NO_LEGACY_CONVERSION
+ || (cnv->sharedData->staticData->conversionType == UCNV_MBCS &&
+ ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL)
+#endif
+ ) {
+ /* The converter is not stateful. Store the charset bytes as a fixed string. */
+ subChars = (uint8_t *)chars;
+ } else {
+ /*
+ * The converter has a non-default writeSub() function, indicating
+ * that it is stateful.
+ * Store the Unicode string for on-the-fly conversion for correct
+ * state handling.
+ */
+ if (length > UCNV_ERROR_BUFFER_LENGTH) {
+ /*
+ * Should not occur. The converter should output at least one byte
+ * per UChar, which means that ucnv_fromUChars() should catch all
+ * overflows.
+ */
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ return;
+ }
+ subChars = (uint8_t *)s;
+ if (length < 0) {
+ length = u_strlen(s);
+ }
+ length8 = length * U_SIZEOF_UCHAR;
+ }
+
+ /*
+ * For storing the substitution string, select either the small buffer inside
+ * UConverter or allocate a subChars buffer.
+ */
+ if (length8 > UCNV_MAX_SUBCHAR_LEN) {
+ /* Use a separate buffer for the string. Outside UConverter to not make it too large. */
+ if (cnv->subChars == (uint8_t *)cnv->subUChars) {
+ /* Allocate a new buffer for the string. */
+ cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
+ if (cnv->subChars == NULL) {
+ cnv->subChars = (uint8_t *)cnv->subUChars;
+ *err = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
+ }
+ }
+
+ /* Copy the substitution string into the UConverter or its subChars buffer. */
+ if (length8 == 0) {
+ cnv->subCharLen = 0;
+ } else {
+ uprv_memcpy(cnv->subChars, subChars, length8);
+ if (subChars == (uint8_t *)chars) {
+ cnv->subCharLen = (int8_t)length8;
+ } else /* subChars == s */ {
+ cnv->subCharLen = (int8_t)-length;
+ }
+ }
+
+ /* See comment in ucnv_setSubstChars(). */
+ cnv->subChar1 = 0;
+}
+
+/*resets the internal states of a converter
+ *goal : have the same behaviour than a freshly created converter
+ */
+static void _reset(UConverter *converter, UConverterResetChoice choice,
+ UBool callCallback) {
+ if(converter == NULL) {
+ return;
+ }
+
+ if(callCallback) {
+ /* first, notify the callback functions that the converter is reset */
+ UErrorCode errorCode;
+
+ if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
+ UConverterToUnicodeArgs toUArgs = {
+ sizeof(UConverterToUnicodeArgs),
+ TRUE,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+ };
+ toUArgs.converter = converter;
+ errorCode = U_ZERO_ERROR;
+ converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode);
+ }
+ if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
+ UConverterFromUnicodeArgs fromUArgs = {
+ sizeof(UConverterFromUnicodeArgs),
+ TRUE,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+ };
+ fromUArgs.converter = converter;
+ errorCode = U_ZERO_ERROR;
+ converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode);
+ }
+ }
+
+ /* now reset the converter itself */
+ if(choice<=UCNV_RESET_TO_UNICODE) {
+ converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus;
+ converter->mode = 0;
+ converter->toULength = 0;
+ converter->invalidCharLength = converter->UCharErrorBufferLength = 0;
+ converter->preToULength = 0;
+ }
+ if(choice!=UCNV_RESET_TO_UNICODE) {
+ converter->fromUnicodeStatus = 0;
+ converter->fromUChar32 = 0;
+ converter->invalidUCharLength = converter->charErrorBufferLength = 0;
+ converter->preFromUFirstCP = U_SENTINEL;
+ converter->preFromULength = 0;
+ }
+
+ if (converter->sharedData->impl->reset != NULL) {
+ /* call the custom reset function */
+ converter->sharedData->impl->reset(converter, choice);
+ }
+}
+
+U_CAPI void U_EXPORT2
+ucnv_reset(UConverter *converter)
+{
+ _reset(converter, UCNV_RESET_BOTH, TRUE);
+}
+
+U_CAPI void U_EXPORT2
+ucnv_resetToUnicode(UConverter *converter)
+{
+ _reset(converter, UCNV_RESET_TO_UNICODE, TRUE);
+}
+
+U_CAPI void U_EXPORT2
+ucnv_resetFromUnicode(UConverter *converter)
+{
+ _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE);
+}
+
+U_CAPI int8_t U_EXPORT2
+ucnv_getMaxCharSize (const UConverter * converter)
+{
+ return converter->maxBytesPerUChar;
+}
+
+
+U_CAPI int8_t U_EXPORT2
+ucnv_getMinCharSize (const UConverter * converter)
+{
+ return converter->sharedData->staticData->minBytesPerChar;
+}
+
+U_CAPI const char* U_EXPORT2
+ucnv_getName (const UConverter * converter, UErrorCode * err)
+
+{
+ if (U_FAILURE (*err))
+ return NULL;
+ if(converter->sharedData->impl->getName){
+ const char* temp= converter->sharedData->impl->getName(converter);
+ if(temp)
+ return temp;
+ }
+ return converter->sharedData->staticData->name;
+}
+
+U_CAPI int32_t U_EXPORT2
+ucnv_getCCSID(const UConverter * converter,
+ UErrorCode * err)
+{
+ int32_t ccsid;
+ if (U_FAILURE (*err))
+ return -1;
+
+ ccsid = converter->sharedData->staticData->codepage;
+ if (ccsid == 0) {
+ /* Rare case. This is for cases like gb18030,
+ which doesn't have an IBM canonical name, but does have an IBM alias. */
+ const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err);
+ if (U_SUCCESS(*err) && standardName) {
+ const char *ccsidStr = uprv_strchr(standardName, '-');
+ if (ccsidStr) {
+ ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */
+ }
+ }
+ }
+ return ccsid;
+}
+
+
+U_CAPI UConverterPlatform U_EXPORT2
+ucnv_getPlatform (const UConverter * converter,
+ UErrorCode * err)
+{
+ if (U_FAILURE (*err))
+ return UCNV_UNKNOWN;
+
+ return (UConverterPlatform)converter->sharedData->staticData->platform;
+}
+
+U_CAPI void U_EXPORT2
+ ucnv_getToUCallBack (const UConverter * converter,
+ UConverterToUCallback *action,
+ const void **context)
+{
+ *action = converter->fromCharErrorBehaviour;
+ *context = converter->toUContext;
+}
+
+U_CAPI void U_EXPORT2
+ ucnv_getFromUCallBack (const UConverter * converter,
+ UConverterFromUCallback *action,
+ const void **context)
+{
+ *action = converter->fromUCharErrorBehaviour;
+ *context = converter->fromUContext;
+}
+
+U_CAPI void U_EXPORT2
+ucnv_setToUCallBack (UConverter * converter,
+ UConverterToUCallback newAction,
+ const void* newContext,
+ UConverterToUCallback *oldAction,
+ const void** oldContext,
+ UErrorCode * err)
+{
+ if (U_FAILURE (*err))
+ return;
+ if (oldAction) *oldAction = converter->fromCharErrorBehaviour;
+ converter->fromCharErrorBehaviour = newAction;
+ if (oldContext) *oldContext = converter->toUContext;
+ converter->toUContext = newContext;
+}
+
+U_CAPI void U_EXPORT2
+ucnv_setFromUCallBack (UConverter * converter,
+ UConverterFromUCallback newAction,
+ const void* newContext,
+ UConverterFromUCallback *oldAction,
+ const void** oldContext,
+ UErrorCode * err)
+{
+ if (U_FAILURE (*err))
+ return;
+ if (oldAction) *oldAction = converter->fromUCharErrorBehaviour;
+ converter->fromUCharErrorBehaviour = newAction;
+ if (oldContext) *oldContext = converter->fromUContext;
+ converter->fromUContext = newContext;
+}
+
+static void
+_updateOffsets(int32_t *offsets, int32_t length,
+ int32_t sourceIndex, int32_t errorInputLength) {
+ int32_t *limit;
+ int32_t delta, offset;
+
+ if(sourceIndex>=0) {
+ /*
+ * adjust each offset by adding the previous sourceIndex
+ * minus the length of the input sequence that caused an
+ * error, if any
+ */
+ delta=sourceIndex-errorInputLength;
+ } else {
+ /*
+ * set each offset to -1 because this conversion function
+ * does not handle offsets
+ */
+ delta=-1;
+ }
+
+ limit=offsets+length;
+ if(delta==0) {
+ /* most common case, nothing to do */
+ } else if(delta>0) {
+ /* add the delta to each offset (but not if the offset is <0) */
+ while(offsets<limit) {
+ offset=*offsets;
+ if(offset>=0) {
+ *offsets=offset+delta;
+ }
+ ++offsets;
+ }
+ } else /* delta<0 */ {
+ /*
+ * set each offset to -1 because this conversion function
+ * does not handle offsets
+ * or the error input sequence started in a previous buffer
+ */
+ while(offsets<limit) {
+ *offsets++=-1;
+ }
+ }
+}
+
+/* ucnv_fromUnicode --------------------------------------------------------- */
+
+/*
+ * Implementation note for m:n conversions
+ *
+ * While collecting source units to find the longest match for m:n conversion,
+ * some source units may need to be stored for a partial match.
+ * When a second buffer does not yield a match on all of the previously stored
+ * source units, then they must be "replayed", i.e., fed back into the converter.
+ *
+ * The code relies on the fact that replaying will not nest -
+ * converting a replay buffer will not result in a replay.
+ * This is because a replay is necessary only after the _continuation_ of a
+ * partial match failed, but a replay buffer is converted as a whole.
+ * It may result in some of its units being stored again for a partial match,
+ * but there will not be a continuation _during_ the replay which could fail.
+ *
+ * It is conceivable that a callback function could call the converter
+ * recursively in a way that causes another replay to be stored, but that
+ * would be an error in the callback function.
+ * Such violations will cause assertion failures in a debug build,
+ * and wrong output, but they will not cause a crash.
+ */
+
+static void
+_fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) {
+ UConverterFromUnicode fromUnicode;
+ UConverter *cnv;
+ const UChar *s;
+ char *t;
+ int32_t *offsets;
+ int32_t sourceIndex;
+ int32_t errorInputLength;
+ UBool converterSawEndOfInput, calledCallback;
+
+ /* variables for m:n conversion */
+ UChar replay[UCNV_EXT_MAX_UCHARS];
+ const UChar *realSource, *realSourceLimit;
+ int32_t realSourceIndex;
+ UBool realFlush;
+
+ cnv=pArgs->converter;
+ s=pArgs->source;
+ t=pArgs->target;
+ offsets=pArgs->offsets;
+
+ /* get the converter implementation function */
+ sourceIndex=0;
+ if(offsets==NULL) {
+ fromUnicode=cnv->sharedData->impl->fromUnicode;
+ } else {
+ fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets;
+ if(fromUnicode==NULL) {
+ /* there is no WithOffsets implementation */
+ fromUnicode=cnv->sharedData->impl->fromUnicode;
+ /* we will write -1 for each offset */
+ sourceIndex=-1;
+ }
+ }
+
+ if(cnv->preFromULength>=0) {
+ /* normal mode */
+ realSource=NULL;
+
+ /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
+ realSourceLimit=NULL;
+ realFlush=FALSE;
+ realSourceIndex=0;
+ } else {
+ /*
+ * Previous m:n conversion stored source units from a partial match
+ * and failed to consume all of them.
+ * We need to "replay" them from a temporary buffer and convert them first.
+ */
+ realSource=pArgs->source;
+ realSourceLimit=pArgs->sourceLimit;
+ realFlush=pArgs->flush;
+ realSourceIndex=sourceIndex;
+
+ uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
+ pArgs->source=replay;
+ pArgs->sourceLimit=replay-cnv->preFromULength;
+ pArgs->flush=FALSE;
+ sourceIndex=-1;
+
+ cnv->preFromULength=0;
+ }
+
+ /*
+ * loop for conversion and error handling
+ *
+ * loop {
+ * convert
+ * loop {
+ * update offsets
+ * handle end of input
+ * handle errors/call callback
+ * }
+ * }
+ */
+ for(;;) {
+ if(U_SUCCESS(*err)) {
+ /* convert */
+ fromUnicode(pArgs, err);
+
+ /*
+ * set a flag for whether the converter
+ * successfully processed the end of the input
+ *
+ * need not check cnv->preFromULength==0 because a replay (<0) will cause
+ * s<sourceLimit before converterSawEndOfInput is checked
+ */
+ converterSawEndOfInput=
+ (UBool)(U_SUCCESS(*err) &&
+ pArgs->flush && pArgs->source==pArgs->sourceLimit &&
+ cnv->fromUChar32==0);
+ } else {
+ /* handle error from ucnv_convertEx() */
+ converterSawEndOfInput=FALSE;
+ }
+
+ /* no callback called yet for this iteration */
+ calledCallback=FALSE;
+
+ /* no sourceIndex adjustment for conversion, only for callback output */
+ errorInputLength=0;
+
+ /*
+ * loop for offsets and error handling
+ *
+ * iterates at most 3 times:
+ * 1. to clean up after the conversion function
+ * 2. after the callback
+ * 3. after the callback again if there was truncated input
+ */
+ for(;;) {
+ /* update offsets if we write any */
+ if(offsets!=NULL) {
+ int32_t length=(int32_t)(pArgs->target-t);
+ if(length>0) {
+ _updateOffsets(offsets, length, sourceIndex, errorInputLength);
+
+ /*
+ * if a converter handles offsets and updates the offsets
+ * pointer at the end, then pArgs->offset should not change
+ * here;
+ * however, some converters do not handle offsets at all
+ * (sourceIndex<0) or may not update the offsets pointer
+ */
+ pArgs->offsets=offsets+=length;
+ }
+
+ if(sourceIndex>=0) {
+ sourceIndex+=(int32_t)(pArgs->source-s);
+ }
+ }
+
+ if(cnv->preFromULength<0) {
+ /*
+ * switch the source to new replay units (cannot occur while replaying)
+ * after offset handling and before end-of-input and callback handling
+ */
+ if(realSource==NULL) {
+ realSource=pArgs->source;
+ realSourceLimit=pArgs->sourceLimit;
+ realFlush=pArgs->flush;
+ realSourceIndex=sourceIndex;
+
+ uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
+ pArgs->source=replay;
+ pArgs->sourceLimit=replay-cnv->preFromULength;
+ pArgs->flush=FALSE;
+ if((sourceIndex+=cnv->preFromULength)<0) {
+ sourceIndex=-1;
+ }
+
+ cnv->preFromULength=0;
+ } else {
+ /* see implementation note before _fromUnicodeWithCallback() */
+ U_ASSERT(realSource==NULL);
+ *err=U_INTERNAL_PROGRAM_ERROR;
+ }
+ }
+
+ /* update pointers */
+ s=pArgs->source;
+ t=pArgs->target;
+
+ if(U_SUCCESS(*err)) {
+ if(s<pArgs->sourceLimit) {
+ /*
+ * continue with the conversion loop while there is still input left
+ * (continue converting by breaking out of only the inner loop)
+ */
+ break;
+ } else if(realSource!=NULL) {
+ /* switch back from replaying to the real source and continue */
+ pArgs->source=realSource;
+ pArgs->sourceLimit=realSourceLimit;
+ pArgs->flush=realFlush;
+ sourceIndex=realSourceIndex;
+
+ realSource=NULL;
+ break;
+ } else if(pArgs->flush && cnv->fromUChar32!=0) {
+ /*
+ * the entire input stream is consumed
+ * and there is a partial, truncated input sequence left
+ */
+
+ /* inject an error and continue with callback handling */
+ *err=U_TRUNCATED_CHAR_FOUND;
+ calledCallback=FALSE; /* new error condition */
+ } else {
+ /* input consumed */
+ if(pArgs->flush) {
+ /*
+ * return to the conversion loop once more if the flush
+ * flag is set and the conversion function has not
+ * successfully processed the end of the input yet
+ *
+ * (continue converting by breaking out of only the inner loop)
+ */
+ if(!converterSawEndOfInput) {
+ break;
+ }
+
+ /* reset the converter without calling the callback function */
+ _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE);
+ }
+
+ /* done successfully */
+ return;
+ }
+ }
+
+ /* U_FAILURE(*err) */
+ {
+ UErrorCode e;
+
+ if( calledCallback ||
+ (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
+ (e!=U_INVALID_CHAR_FOUND &&
+ e!=U_ILLEGAL_CHAR_FOUND &&
+ e!=U_TRUNCATED_CHAR_FOUND)
+ ) {
+ /*
+ * the callback did not or cannot resolve the error:
+ * set output pointers and return
+ *
+ * the check for buffer overflow is redundant but it is
+ * a high-runner case and hopefully documents the intent
+ * well
+ *
+ * if we were replaying, then the replay buffer must be
+ * copied back into the UConverter
+ * and the real arguments must be restored
+ */
+ if(realSource!=NULL) {
+ int32_t length;
+
+ U_ASSERT(cnv->preFromULength==0);
+
+ length=(int32_t)(pArgs->sourceLimit-pArgs->source);
+ if(length>0) {
+ u_memcpy(cnv->preFromU, pArgs->source, length);
+ cnv->preFromULength=(int8_t)-length;
+ }
+
+ pArgs->source=realSource;
+ pArgs->sourceLimit=realSourceLimit;
+ pArgs->flush=realFlush;
+ }
+
+ return;
+ }
+ }
+
+ /* callback handling */
+ {
+ UChar32 codePoint;
+
+ /* get and write the code point */
+ codePoint=cnv->fromUChar32;
+ errorInputLength=0;
+ U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint);
+ cnv->invalidUCharLength=(int8_t)errorInputLength;
+
+ /* set the converter state to deal with the next character */
+ cnv->fromUChar32=0;
+
+ /* call the callback function */
+ cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs,
+ cnv->invalidUCharBuffer, errorInputLength, codePoint,
+ *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL,
+ err);
+ }
+
+ /*
+ * loop back to the offset handling
+ *
+ * this flag will indicate after offset handling
+ * that a callback was called;
+ * if the callback did not resolve the error, then we return
+ */
+ calledCallback=TRUE;
+ }
+ }
+}
+
+/*
+ * Output the fromUnicode overflow buffer.
+ * Call this function if(cnv->charErrorBufferLength>0).
+ * @return TRUE if overflow
+ */
+static UBool
+ucnv_outputOverflowFromUnicode(UConverter *cnv,
+ char **target, const char *targetLimit,
+ int32_t **pOffsets,
+ UErrorCode *err) {
+ int32_t *offsets;
+ char *overflow, *t;
+ int32_t i, length;
+
+ t=*target;
+ if(pOffsets!=NULL) {
+ offsets=*pOffsets;
+ } else {
+ offsets=NULL;
+ }
+
+ overflow=(char *)cnv->charErrorBuffer;
+ length=cnv->charErrorBufferLength;
+ i=0;
+ while(i<length) {
+ if(t==targetLimit) {
+ /* the overflow buffer contains too much, keep the rest */
+ int32_t j=0;
+
+ do {
+ overflow[j++]=overflow[i++];
+ } while(i<length);
+
+ cnv->charErrorBufferLength=(int8_t)j;
+ *target=t;
+ if(offsets!=NULL) {
+ *pOffsets=offsets;
+ }
+ *err=U_BUFFER_OVERFLOW_ERROR;
+ return TRUE;
+ }
+
+ /* copy the overflow contents to the target */
+ *t++=overflow[i++];
+ if(offsets!=NULL) {
+ *offsets++=-1; /* no source index available for old output */
+ }
+ }
+
+ /* the overflow buffer is completely copied to the target */
+ cnv->charErrorBufferLength=0;
+ *target=t;
+ if(offsets!=NULL) {
+ *pOffsets=offsets;
+ }
+ return FALSE;
+}
+
+U_CAPI void U_EXPORT2
+ucnv_fromUnicode(UConverter *cnv,
+ char **target, const char *targetLimit,
+ const UChar **source, const UChar *sourceLimit,
+ int32_t *offsets,
+ UBool flush,
+ UErrorCode *err) {
+ UConverterFromUnicodeArgs args;
+ const UChar *s;
+ char *t;
+
+ /* check parameters */
+ if(err==NULL || U_FAILURE(*err)) {
+ return;
+ }
+
+ if(cnv==NULL || target==NULL || source==NULL) {
+ *err=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ s=*source;
+ t=*target;
+
+ if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) {
+ /*
+ Prevent code from going into an infinite loop in case we do hit this
+ limit. The limit pointer is expected to be on a UChar * boundary.
+ This also prevents the next argument check from failing.
+ */
+ sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1);
+ }
+
+ /*
+ * All these conditions should never happen.
+ *
+ * 1) Make sure that the limits are >= to the address source or target
+ *
+ * 2) Make sure that the buffer sizes do not exceed the number range for
+ * int32_t because some functions use the size (in units or bytes)
+ * rather than comparing pointers, and because offsets are int32_t values.
+ *
+ * size_t is guaranteed to be unsigned and large enough for the job.
+ *
+ * Return with an error instead of adjusting the limits because we would
+ * not be able to maintain the semantics that either the source must be
+ * consumed or the target filled (unless an error occurs).
+ * An adjustment would be targetLimit=t+0x7fffffff; for example.
+ *
+ * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
+ * to a char * pointer and provide an incomplete UChar code unit.
+ */
+ if (sourceLimit<s || targetLimit<t ||
+ ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) ||
+ ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) ||
+ (((const char *)sourceLimit-(const char *)s) & 1) != 0)
+ {
+ *err=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ /* output the target overflow buffer */
+ if( cnv->charErrorBufferLength>0 &&
+ ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err)
+ ) {
+ /* U_BUFFER_OVERFLOW_ERROR */
+ return;
+ }
+ /* *target may have moved, therefore stop using t */
+
+ if(!flush && s==sourceLimit && cnv->preFromULength>=0) {
+ /* the overflow buffer is emptied and there is no new input: we are done */
+ return;
+ }
+
+ /*
+ * Do not simply return with a buffer overflow error if
+ * !flush && t==targetLimit
+ * because it is possible that the source will not generate any output.
+ * For example, the skip callback may be called;
+ * it does not output anything.
+ */
+
+ /* prepare the converter arguments */
+ args.converter=cnv;
+ args.flush=flush;
+ args.offsets=offsets;
+ args.source=s;
+ args.sourceLimit=sourceLimit;
+ args.target=*target;
+ args.targetLimit=targetLimit;
+ args.size=sizeof(args);
+
+ _fromUnicodeWithCallback(&args, err);
+
+ *source=args.source;
+ *target=args.target;
+}
+
+/* ucnv_toUnicode() --------------------------------------------------------- */
+
+static void
+_toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
+ UConverterToUnicode toUnicode;
+ UConverter *cnv;
+ const char *s;
+ UChar *t;
+ int32_t *offsets;
+ int32_t sourceIndex;
+ int32_t errorInputLength;
+ UBool converterSawEndOfInput, calledCallback;
+
+ /* variables for m:n conversion */
+ char replay[UCNV_EXT_MAX_BYTES];
+ const char *realSource, *realSourceLimit;
+ int32_t realSourceIndex;
+ UBool realFlush;
+
+ cnv=pArgs->converter;
+ s=pArgs->source;
+ t=pArgs->target;
+ offsets=pArgs->offsets;
+
+ /* get the converter implementation function */
+ sourceIndex=0;
+ if(offsets==NULL) {
+ toUnicode=cnv->sharedData->impl->toUnicode;
+ } else {
+ toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets;
+ if(toUnicode==NULL) {
+ /* there is no WithOffsets implementation */
+ toUnicode=cnv->sharedData->impl->toUnicode;
+ /* we will write -1 for each offset */
+ sourceIndex=-1;
+ }
+ }
+
+ if(cnv->preToULength>=0) {
+ /* normal mode */
+ realSource=NULL;
+
+ /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
+ realSourceLimit=NULL;
+ realFlush=FALSE;
+ realSourceIndex=0;
+ } else {
+ /*
+ * Previous m:n conversion stored source units from a partial match
+ * and failed to consume all of them.
+ * We need to "replay" them from a temporary buffer and convert them first.
+ */
+ realSource=pArgs->source;
+ realSourceLimit=pArgs->sourceLimit;
+ realFlush=pArgs->flush;
+ realSourceIndex=sourceIndex;
+
+ uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
+ pArgs->source=replay;
+ pArgs->sourceLimit=replay-cnv->preToULength;
+ pArgs->flush=FALSE;
+ sourceIndex=-1;
+
+ cnv->preToULength=0;
+ }
+
+ /*
+ * loop for conversion and error handling
+ *
+ * loop {
+ * convert
+ * loop {
+ * update offsets
+ * handle end of input
+ * handle errors/call callback
+ * }
+ * }
+ */
+ for(;;) {
+ if(U_SUCCESS(*err)) {
+ /* convert */
+ toUnicode(pArgs, err);
+
+ /*
+ * set a flag for whether the converter
+ * successfully processed the end of the input
+ *
+ * need not check cnv->preToULength==0 because a replay (<0) will cause
+ * s<sourceLimit before converterSawEndOfInput is checked
+ */
+ converterSawEndOfInput=
+ (UBool)(U_SUCCESS(*err) &&
+ pArgs->flush && pArgs->source==pArgs->sourceLimit &&
+ cnv->toULength==0);
+ } else {
+ /* handle error from getNextUChar() or ucnv_convertEx() */
+ converterSawEndOfInput=FALSE;
+ }
+
+ /* no callback called yet for this iteration */
+ calledCallback=FALSE;
+
+ /* no sourceIndex adjustment for conversion, only for callback output */
+ errorInputLength=0;
+
+ /*
+ * loop for offsets and error handling
+ *
+ * iterates at most 3 times:
+ * 1. to clean up after the conversion function
+ * 2. after the callback
+ * 3. after the callback again if there was truncated input
+ */
+ for(;;) {
+ /* update offsets if we write any */
+ if(offsets!=NULL) {
+ int32_t length=(int32_t)(pArgs->target-t);
+ if(length>0) {
+ _updateOffsets(offsets, length, sourceIndex, errorInputLength);
+
+ /*
+ * if a converter handles offsets and updates the offsets
+ * pointer at the end, then pArgs->offset should not change
+ * here;
+ * however, some converters do not handle offsets at all
+ * (sourceIndex<0) or may not update the offsets pointer
+ */
+ pArgs->offsets=offsets+=length;
+ }
+
+ if(sourceIndex>=0) {
+ sourceIndex+=(int32_t)(pArgs->source-s);
+ }
+ }
+
+ if(cnv->preToULength<0) {
+ /*
+ * switch the source to new replay units (cannot occur while replaying)
+ * after offset handling and before end-of-input and callback handling
+ */
+ if(realSource==NULL) {
+ realSource=pArgs->source;
+ realSourceLimit=pArgs->sourceLimit;
+ realFlush=pArgs->flush;
+ realSourceIndex=sourceIndex;
+
+ uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
+ pArgs->source=replay;
+ pArgs->sourceLimit=replay-cnv->preToULength;
+ pArgs->flush=FALSE;
+ if((sourceIndex+=cnv->preToULength)<0) {
+ sourceIndex=-1;
+ }
+
+ cnv->preToULength=0;
+ } else {
+ /* see implementation note before _fromUnicodeWithCallback() */
+ U_ASSERT(realSource==NULL);
+ *err=U_INTERNAL_PROGRAM_ERROR;
+ }
+ }
+
+ /* update pointers */
+ s=pArgs->source;
+ t=pArgs->target;
+
+ if(U_SUCCESS(*err)) {
+ if(s<pArgs->sourceLimit) {
+ /*
+ * continue with the conversion loop while there is still input left
+ * (continue converting by breaking out of only the inner loop)
+ */
+ break;
+ } else if(realSource!=NULL) {
+ /* switch back from replaying to the real source and continue */
+ pArgs->source=realSource;
+ pArgs->sourceLimit=realSourceLimit;
+ pArgs->flush=realFlush;
+ sourceIndex=realSourceIndex;
+
+ realSource=NULL;
+ break;
+ } else if(pArgs->flush && cnv->toULength>0) {
+ /*
+ * the entire input stream is consumed
+ * and there is a partial, truncated input sequence left
+ */
+
+ /* inject an error and continue with callback handling */
+ *err=U_TRUNCATED_CHAR_FOUND;
+ calledCallback=FALSE; /* new error condition */
+ } else {
+ /* input consumed */
+ if(pArgs->flush) {
+ /*
+ * return to the conversion loop once more if the flush
+ * flag is set and the conversion function has not
+ * successfully processed the end of the input yet
+ *
+ * (continue converting by breaking out of only the inner loop)
+ */
+ if(!converterSawEndOfInput) {
+ break;
+ }
+
+ /* reset the converter without calling the callback function */
+ _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
+ }
+
+ /* done successfully */
+ return;
+ }
+ }
+
+ /* U_FAILURE(*err) */
+ {
+ UErrorCode e;
+
+ if( calledCallback ||
+ (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
+ (e!=U_INVALID_CHAR_FOUND &&
+ e!=U_ILLEGAL_CHAR_FOUND &&
+ e!=U_TRUNCATED_CHAR_FOUND &&
+ e!=U_ILLEGAL_ESCAPE_SEQUENCE &&
+ e!=U_UNSUPPORTED_ESCAPE_SEQUENCE)
+ ) {
+ /*
+ * the callback did not or cannot resolve the error:
+ * set output pointers and return
+ *
+ * the check for buffer overflow is redundant but it is
+ * a high-runner case and hopefully documents the intent
+ * well
+ *
+ * if we were replaying, then the replay buffer must be
+ * copied back into the UConverter
+ * and the real arguments must be restored
+ */
+ if(realSource!=NULL) {
+ int32_t length;
+
+ U_ASSERT(cnv->preToULength==0);
+
+ length=(int32_t)(pArgs->sourceLimit-pArgs->source);
+ if(length>0) {
+ uprv_memcpy(cnv->preToU, pArgs->source, length);
+ cnv->preToULength=(int8_t)-length;
+ }
+
+ pArgs->source=realSource;
+ pArgs->sourceLimit=realSourceLimit;
+ pArgs->flush=realFlush;
+ }
+
+ return;
+ }
+ }
+
+ /* copy toUBytes[] to invalidCharBuffer[] */
+ errorInputLength=cnv->invalidCharLength=cnv->toULength;
+ if(errorInputLength>0) {
+ uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength);
+ }
+
+ /* set the converter state to deal with the next character */
+ cnv->toULength=0;
+
+ /* call the callback function */
+ if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) {
+ cnv->toUCallbackReason = UCNV_UNASSIGNED;
+ }
+ cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs,
+ cnv->invalidCharBuffer, errorInputLength,
+ cnv->toUCallbackReason,
+ err);
+ cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */
+
+ /*
+ * loop back to the offset handling
+ *
+ * this flag will indicate after offset handling
+ * that a callback was called;
+ * if the callback did not resolve the error, then we return
+ */
+ calledCallback=TRUE;
+ }
+ }
+}
+
+/*
+ * Output the toUnicode overflow buffer.
+ * Call this function if(cnv->UCharErrorBufferLength>0).
+ * @return TRUE if overflow
+ */
+static UBool
+ucnv_outputOverflowToUnicode(UConverter *cnv,
+ UChar **target, const UChar *targetLimit,
+ int32_t **pOffsets,
+ UErrorCode *err) {
+ int32_t *offsets;
+ UChar *overflow, *t;
+ int32_t i, length;
+
+ t=*target;
+ if(pOffsets!=NULL) {
+ offsets=*pOffsets;
+ } else {
+ offsets=NULL;
+ }
+
+ overflow=cnv->UCharErrorBuffer;
+ length=cnv->UCharErrorBufferLength;
+ i=0;
+ while(i<length) {
+ if(t==targetLimit) {
+ /* the overflow buffer contains too much, keep the rest */
+ int32_t j=0;
+
+ do {
+ overflow[j++]=overflow[i++];
+ } while(i<length);
+
+ cnv->UCharErrorBufferLength=(int8_t)j;
+ *target=t;
+ if(offsets!=NULL) {
+ *pOffsets=offsets;
+ }
+ *err=U_BUFFER_OVERFLOW_ERROR;
+ return TRUE;
+ }
+
+ /* copy the overflow contents to the target */
+ *t++=overflow[i++];
+ if(offsets!=NULL) {
+ *offsets++=-1; /* no source index available for old output */
+ }
+ }
+
+ /* the overflow buffer is completely copied to the target */
+ cnv->UCharErrorBufferLength=0;
+ *target=t;
+ if(offsets!=NULL) {
+ *pOffsets=offsets;
+ }
+ return FALSE;
+}
+
+U_CAPI void U_EXPORT2
+ucnv_toUnicode(UConverter *cnv,
+ UChar **target, const UChar *targetLimit,
+ const char **source, const char *sourceLimit,
+ int32_t *offsets,
+ UBool flush,
+ UErrorCode *err) {
+ UConverterToUnicodeArgs args;
+ const char *s;
+ UChar *t;
+
+ /* check parameters */
+ if(err==NULL || U_FAILURE(*err)) {
+ return;
+ }
+
+ if(cnv==NULL || target==NULL || source==NULL) {
+ *err=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ s=*source;
+ t=*target;
+
+ if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) {
+ /*
+ Prevent code from going into an infinite loop in case we do hit this
+ limit. The limit pointer is expected to be on a UChar * boundary.
+ This also prevents the next argument check from failing.
+ */
+ targetLimit = (const UChar *)(((const char *)targetLimit) - 1);
+ }
+
+ /*
+ * All these conditions should never happen.
+ *
+ * 1) Make sure that the limits are >= to the address source or target
+ *
+ * 2) Make sure that the buffer sizes do not exceed the number range for
+ * int32_t because some functions use the size (in units or bytes)
+ * rather than comparing pointers, and because offsets are int32_t values.
+ *
+ * size_t is guaranteed to be unsigned and large enough for the job.
+ *
+ * Return with an error instead of adjusting the limits because we would
+ * not be able to maintain the semantics that either the source must be
+ * consumed or the target filled (unless an error occurs).
+ * An adjustment would be sourceLimit=t+0x7fffffff; for example.
+ *
+ * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
+ * to a char * pointer and provide an incomplete UChar code unit.
+ */
+ if (sourceLimit<s || targetLimit<t ||
+ ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) ||
+ ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) ||
+ (((const char *)targetLimit-(const char *)t) & 1) != 0
+ ) {
+ *err=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ /* output the target overflow buffer */
+ if( cnv->UCharErrorBufferLength>0 &&
+ ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err)
+ ) {
+ /* U_BUFFER_OVERFLOW_ERROR */
+ return;
+ }
+ /* *target may have moved, therefore stop using t */
+
+ if(!flush && s==sourceLimit && cnv->preToULength>=0) {
+ /* the overflow buffer is emptied and there is no new input: we are done */
+ return;
+ }
+
+ /*
+ * Do not simply return with a buffer overflow error if
+ * !flush && t==targetLimit
+ * because it is possible that the source will not generate any output.
+ * For example, the skip callback may be called;
+ * it does not output anything.
+ */
+
+ /* prepare the converter arguments */
+ args.converter=cnv;
+ args.flush=flush;
+ args.offsets=offsets;
+ args.source=s;
+ args.sourceLimit=sourceLimit;
+ args.target=*target;
+ args.targetLimit=targetLimit;
+ args.size=sizeof(args);
+
+ _toUnicodeWithCallback(&args, err);
+
+ *source=args.source;
+ *target=args.target;
+}
+
+/* ucnv_to/fromUChars() ----------------------------------------------------- */
+
+U_CAPI int32_t U_EXPORT2
+ucnv_fromUChars(UConverter *cnv,
+ char *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode) {
+ const UChar *srcLimit;
+ char *originalDest, *destLimit;
+ int32_t destLength;
+
+ /* check arguments */
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ if( cnv==NULL ||
+ destCapacity<0 || (destCapacity>0 && dest==NULL) ||
+ srcLength<-1 || (srcLength!=0 && src==NULL)
+ ) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* initialize */
+ ucnv_resetFromUnicode(cnv);
+ originalDest=dest;
+ if(srcLength==-1) {
+ srcLength=u_strlen(src);
+ }
+ if(srcLength>0) {
+ srcLimit=src+srcLength;
+ destCapacity=pinCapacity(dest, destCapacity);
+ destLimit=dest+destCapacity;
+
+ /* perform the conversion */
+ ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
+ destLength=(int32_t)(dest-originalDest);
+
+ /* if an overflow occurs, then get the preflighting length */
+ if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
+ char buffer[1024];
+
+ destLimit=buffer+sizeof(buffer);
+ do {
+ dest=buffer;
+ *pErrorCode=U_ZERO_ERROR;
+ ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
+ destLength+=(int32_t)(dest-buffer);
+ } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
+ }
+ } else {
+ destLength=0;
+ }
+
+ return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+ucnv_toUChars(UConverter *cnv,
+ UChar *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UErrorCode *pErrorCode) {
+ const char *srcLimit;
+ UChar *originalDest, *destLimit;
+ int32_t destLength;
+
+ /* check arguments */
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ if( cnv==NULL ||
+ destCapacity<0 || (destCapacity>0 && dest==NULL) ||
+ srcLength<-1 || (srcLength!=0 && src==NULL))
+ {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* initialize */
+ ucnv_resetToUnicode(cnv);
+ originalDest=dest;
+ if(srcLength==-1) {
+ srcLength=(int32_t)uprv_strlen(src);
+ }
+ if(srcLength>0) {
+ srcLimit=src+srcLength;
+ destCapacity=pinCapacity(dest, destCapacity);
+ destLimit=dest+destCapacity;
+
+ /* perform the conversion */
+ ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
+ destLength=(int32_t)(dest-originalDest);
+
+ /* if an overflow occurs, then get the preflighting length */
+ if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR)
+ {
+ UChar buffer[1024];
+
+ destLimit=buffer+UPRV_LENGTHOF(buffer);
+ do {
+ dest=buffer;
+ *pErrorCode=U_ZERO_ERROR;
+ ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
+ destLength+=(int32_t)(dest-buffer);
+ }
+ while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
+ }
+ } else {
+ destLength=0;
+ }
+
+ return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode);
+}
+
+/* ucnv_getNextUChar() ------------------------------------------------------ */
+
+U_CAPI UChar32 U_EXPORT2
+ucnv_getNextUChar(UConverter *cnv,
+ const char **source, const char *sourceLimit,
+ UErrorCode *err) {
+ UConverterToUnicodeArgs args;
+ UChar buffer[U16_MAX_LENGTH];
+ const char *s;
+ UChar32 c;
+ int32_t i, length;
+
+ /* check parameters */
+ if(err==NULL || U_FAILURE(*err)) {
+ return 0xffff;
+ }
+
+ if(cnv==NULL || source==NULL) {
+ *err=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0xffff;
+ }
+
+ s=*source;
+ if(sourceLimit<s) {
+ *err=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0xffff;
+ }
+
+ /*
+ * Make sure that the buffer sizes do not exceed the number range for
+ * int32_t because some functions use the size (in units or bytes)
+ * rather than comparing pointers, and because offsets are int32_t values.
+ *
+ * size_t is guaranteed to be unsigned and large enough for the job.
+ *
+ * Return with an error instead of adjusting the limits because we would
+ * not be able to maintain the semantics that either the source must be
+ * consumed or the target filled (unless an error occurs).
+ * An adjustment would be sourceLimit=t+0x7fffffff; for example.
+ */
+ if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) {
+ *err=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0xffff;
+ }
+
+ c=U_SENTINEL;
+
+ /* flush the target overflow buffer */
+ if(cnv->UCharErrorBufferLength>0) {
+ UChar *overflow;
+
+ overflow=cnv->UCharErrorBuffer;
+ i=0;
+ length=cnv->UCharErrorBufferLength;
+ U16_NEXT(overflow, i, length, c);
+
+ /* move the remaining overflow contents up to the beginning */
+ if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) {
+ uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i,
+ cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
+ }
+
+ if(!U16_IS_LEAD(c) || i<length) {
+ return c;
+ }
+ /*
+ * Continue if the overflow buffer contained only a lead surrogate,
+ * in case the converter outputs single surrogates from complete
+ * input sequences.
+ */
+ }
+
+ /*
+ * flush==TRUE is implied for ucnv_getNextUChar()
+ *
+ * do not simply return even if s==sourceLimit because the converter may
+ * not have seen flush==TRUE before
+ */
+
+ /* prepare the converter arguments */
+ args.converter=cnv;
+ args.flush=TRUE;
+ args.offsets=NULL;
+ args.source=s;
+ args.sourceLimit=sourceLimit;
+ args.target=buffer;
+ args.targetLimit=buffer+1;
+ args.size=sizeof(args);
+
+ if(c<0) {
+ /*
+ * call the native getNextUChar() implementation if we are
+ * at a character boundary (toULength==0)
+ *
+ * unlike with _toUnicode(), getNextUChar() implementations must set
+ * U_TRUNCATED_CHAR_FOUND for truncated input,
+ * in addition to setting toULength/toUBytes[]
+ */
+ if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) {
+ c=cnv->sharedData->impl->getNextUChar(&args, err);
+ *source=s=args.source;
+ if(*err==U_INDEX_OUTOFBOUNDS_ERROR) {
+ /* reset the converter without calling the callback function */
+ _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
+ return 0xffff; /* no output */
+ } else if(U_SUCCESS(*err) && c>=0) {
+ return c;
+ /*
+ * else fall through to use _toUnicode() because
+ * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all
+ * U_FAILURE: call _toUnicode() for callback handling (do not output c)
+ */
+ }
+ }
+
+ /* convert to one UChar in buffer[0], or handle getNextUChar() errors */
+ _toUnicodeWithCallback(&args, err);
+
+ if(*err==U_BUFFER_OVERFLOW_ERROR) {
+ *err=U_ZERO_ERROR;
+ }
+
+ i=0;
+ length=(int32_t)(args.target-buffer);
+ } else {
+ /* write the lead surrogate from the overflow buffer */
+ buffer[0]=(UChar)c;
+ args.target=buffer+1;
+ i=0;
+ length=1;
+ }
+
+ /* buffer contents starts at i and ends before length */
+
+ if(U_FAILURE(*err)) {
+ c=0xffff; /* no output */
+ } else if(length==0) {
+ /* no input or only state changes */
+ *err=U_INDEX_OUTOFBOUNDS_ERROR;
+ /* no need to reset explicitly because _toUnicodeWithCallback() did it */
+ c=0xffff; /* no output */
+ } else {
+ c=buffer[0];
+ i=1;
+ if(!U16_IS_LEAD(c)) {
+ /* consume c=buffer[0], done */
+ } else {
+ /* got a lead surrogate, see if a trail surrogate follows */
+ UChar c2;
+
+ if(cnv->UCharErrorBufferLength>0) {
+ /* got overflow output from the conversion */
+ if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) {
+ /* got a trail surrogate, too */
+ c=U16_GET_SUPPLEMENTARY(c, c2);
+
+ /* move the remaining overflow contents up to the beginning */
+ if((--cnv->UCharErrorBufferLength)>0) {
+ uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1,
+ cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
+ }
+ } else {
+ /* c is an unpaired lead surrogate, just return it */
+ }
+ } else if(args.source<sourceLimit) {
+ /* convert once more, to buffer[1] */
+ args.targetLimit=buffer+2;
+ _toUnicodeWithCallback(&args, err);
+ if(*err==U_BUFFER_OVERFLOW_ERROR) {
+ *err=U_ZERO_ERROR;
+ }
+
+ length=(int32_t)(args.target-buffer);
+ if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) {
+ /* got a trail surrogate, too */
+ c=U16_GET_SUPPLEMENTARY(c, c2);
+ i=2;
+ }
+ }
+ }
+ }
+
+ /*
+ * move leftover output from buffer[i..length[
+ * into the beginning of the overflow buffer
+ */
+ if(i<length) {
+ /* move further overflow back */
+ int32_t delta=length-i;
+ if((length=cnv->UCharErrorBufferLength)>0) {
+ uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer,
+ length*U_SIZEOF_UCHAR);
+ }
+ cnv->UCharErrorBufferLength=(int8_t)(length+delta);
+
+ cnv->UCharErrorBuffer[0]=buffer[i++];
+ if(delta>1) {
+ cnv->UCharErrorBuffer[1]=buffer[i];
+ }
+ }
+
+ *source=args.source;
+ return c;
+}
+
+/* ucnv_convert() and siblings ---------------------------------------------- */
+
+U_CAPI void U_EXPORT2
+ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
+ char **target, const char *targetLimit,
+ const char **source, const char *sourceLimit,
+ UChar *pivotStart, UChar **pivotSource,
+ UChar **pivotTarget, const UChar *pivotLimit,
+ UBool reset, UBool flush,
+ UErrorCode *pErrorCode) {
+ UChar pivotBuffer[CHUNK_SIZE];
+ const UChar *myPivotSource;
+ UChar *myPivotTarget;
+ const char *s;
+ char *t;
+
+ UConverterToUnicodeArgs toUArgs;
+ UConverterFromUnicodeArgs fromUArgs;
+ UConverterConvert convert;
+
+ /* error checking */
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return;
+ }
+
+ if( targetCnv==NULL || sourceCnv==NULL ||
+ source==NULL || *source==NULL ||
+ target==NULL || *target==NULL || targetLimit==NULL
+ ) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ s=*source;
+ t=*target;
+ if((sourceLimit!=NULL && sourceLimit<s) || targetLimit<t) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ /*
+ * Make sure that the buffer sizes do not exceed the number range for
+ * int32_t. See ucnv_toUnicode() for a more detailed comment.
+ */
+ if(
+ (sourceLimit!=NULL && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) ||
+ ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t)
+ ) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ if(pivotStart==NULL) {
+ if(!flush) {
+ /* streaming conversion requires an explicit pivot buffer */
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ /* use the stack pivot buffer */
+ myPivotSource=myPivotTarget=pivotStart=pivotBuffer;
+ pivotSource=(UChar **)&myPivotSource;
+ pivotTarget=&myPivotTarget;
+ pivotLimit=pivotBuffer+CHUNK_SIZE;
+ } else if( pivotStart>=pivotLimit ||
+ pivotSource==NULL || *pivotSource==NULL ||
+ pivotTarget==NULL || *pivotTarget==NULL ||
+ pivotLimit==NULL
+ ) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ if(sourceLimit==NULL) {
+ /* get limit of single-byte-NUL-terminated source string */
+ sourceLimit=uprv_strchr(*source, 0);
+ }
+
+ if(reset) {
+ ucnv_resetToUnicode(sourceCnv);
+ ucnv_resetFromUnicode(targetCnv);
+ *pivotSource=*pivotTarget=pivotStart;
+ } else if(targetCnv->charErrorBufferLength>0) {
+ /* output the targetCnv overflow buffer */
+ if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) {
+ /* U_BUFFER_OVERFLOW_ERROR */
+ return;
+ }
+ /* *target has moved, therefore stop using t */
+
+ if( !flush &&
+ targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget &&
+ sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit
+ ) {
+ /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */
+ return;
+ }
+ }
+
+ /* Is direct-UTF-8 conversion available? */
+ if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
+ targetCnv->sharedData->impl->fromUTF8!=NULL
+ ) {
+ convert=targetCnv->sharedData->impl->fromUTF8;
+ } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
+ sourceCnv->sharedData->impl->toUTF8!=NULL
+ ) {
+ convert=sourceCnv->sharedData->impl->toUTF8;
+ } else {
+ convert=NULL;
+ }
+
+ /*
+ * If direct-UTF-8 conversion is available, then we use a smaller
+ * pivot buffer for error handling and partial matches
+ * so that we quickly return to direct conversion.
+ *
+ * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH.
+ *
+ * We could reduce the pivot buffer size further, at the cost of
+ * buffer overflows from callbacks.
+ * The pivot buffer should not be smaller than the maximum number of
+ * fromUnicode extension table input UChars
+ * (for m:n conversion, see
+ * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS])
+ * or 2 for surrogate pairs.
+ *
+ * Too small a buffer can cause thrashing between pivoting and direct
+ * conversion, with function call overhead outweighing the benefits
+ * of direct conversion.
+ */
+ if(convert!=NULL && (pivotLimit-pivotStart)>32) {
+ pivotLimit=pivotStart+32;
+ }
+
+ /* prepare the converter arguments */
+ fromUArgs.converter=targetCnv;
+ fromUArgs.flush=FALSE;
+ fromUArgs.offsets=NULL;
+ fromUArgs.target=*target;
+ fromUArgs.targetLimit=targetLimit;
+ fromUArgs.size=sizeof(fromUArgs);
+
+ toUArgs.converter=sourceCnv;
+ toUArgs.flush=flush;
+ toUArgs.offsets=NULL;
+ toUArgs.source=s;
+ toUArgs.sourceLimit=sourceLimit;
+ toUArgs.targetLimit=pivotLimit;
+ toUArgs.size=sizeof(toUArgs);
+
+ /*
+ * TODO: Consider separating this function into two functions,
+ * extracting exactly the conversion loop,
+ * for readability and to reduce the set of visible variables.
+ *
+ * Otherwise stop using s and t from here on.
+ */
+ s=t=NULL;
+
+ /*
+ * conversion loop
+ *
+ * The sequence of steps in the loop may appear backward,
+ * but the principle is simple:
+ * In the chain of
+ * source - sourceCnv overflow - pivot - targetCnv overflow - target
+ * empty out later buffers before refilling them from earlier ones.
+ *
+ * The targetCnv overflow buffer is flushed out only once before the loop.
+ */
+ for(;;) {
+ /*
+ * if(pivot not empty or error or replay or flush fromUnicode) {
+ * fromUnicode(pivot -> target);
+ * }
+ *
+ * For pivoting conversion; and for direct conversion for
+ * error callback handling and flushing the replay buffer.
+ */
+ if( *pivotSource<*pivotTarget ||
+ U_FAILURE(*pErrorCode) ||
+ targetCnv->preFromULength<0 ||
+ fromUArgs.flush
+ ) {
+ fromUArgs.source=*pivotSource;
+ fromUArgs.sourceLimit=*pivotTarget;
+ _fromUnicodeWithCallback(&fromUArgs, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ /* target overflow, or conversion error */
+ *pivotSource=(UChar *)fromUArgs.source;
+ break;
+ }
+
+ /*
+ * _fromUnicodeWithCallback() must have consumed the pivot contents
+ * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS()
+ */
+ }
+
+ /* The pivot buffer is empty; reset it so we start at pivotStart. */
+ *pivotSource=*pivotTarget=pivotStart;
+
+ /*
+ * if(sourceCnv overflow buffer not empty) {
+ * move(sourceCnv overflow buffer -> pivot);
+ * continue;
+ * }
+ */
+ /* output the sourceCnv overflow buffer */
+ if(sourceCnv->UCharErrorBufferLength>0) {
+ if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) {
+ /* U_BUFFER_OVERFLOW_ERROR */
+ *pErrorCode=U_ZERO_ERROR;
+ }
+ continue;
+ }
+
+ /*
+ * check for end of input and break if done
+ *
+ * Checking both flush and fromUArgs.flush ensures that the converters
+ * have been called with the flush flag set if the ucnv_convertEx()
+ * caller set it.
+ */
+ if( toUArgs.source==sourceLimit &&
+ sourceCnv->preToULength>=0 && sourceCnv->toULength==0 &&
+ (!flush || fromUArgs.flush)
+ ) {
+ /* done successfully */
+ break;
+ }
+
+ /*
+ * use direct conversion if available
+ * but not if continuing a partial match
+ * or flushing the toUnicode replay buffer
+ */
+ if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) {
+ if(*pErrorCode==U_USING_DEFAULT_WARNING) {
+ /* remove a warning that may be set by this function */
+ *pErrorCode=U_ZERO_ERROR;
+ }
+ convert(&fromUArgs, &toUArgs, pErrorCode);
+ if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
+ break;
+ } else if(U_FAILURE(*pErrorCode)) {
+ if(sourceCnv->toULength>0) {
+ /*
+ * Fall through to calling _toUnicodeWithCallback()
+ * for callback handling.
+ *
+ * The pivot buffer will be reset with
+ * *pivotSource=*pivotTarget=pivotStart;
+ * which indicates a toUnicode error to the caller
+ * (*pivotSource==pivotStart shows no pivot UChars consumed).
+ */
+ } else {
+ /*
+ * Indicate a fromUnicode error to the caller
+ * (*pivotSource>pivotStart shows some pivot UChars consumed).
+ */
+ *pivotSource=*pivotTarget=pivotStart+1;
+ /*
+ * Loop around to calling _fromUnicodeWithCallbacks()
+ * for callback handling.
+ */
+ continue;
+ }
+ } else if(*pErrorCode==U_USING_DEFAULT_WARNING) {
+ /*
+ * No error, but the implementation requested to temporarily
+ * fall back to pivoting.
+ */
+ *pErrorCode=U_ZERO_ERROR;
+ /*
+ * The following else branches are almost identical to the end-of-input
+ * handling in _toUnicodeWithCallback().
+ * Avoid calling it just for the end of input.
+ */
+ } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */
+ /*
+ * the entire input stream is consumed
+ * and there is a partial, truncated input sequence left
+ */
+
+ /* inject an error and continue with callback handling */
+ *pErrorCode=U_TRUNCATED_CHAR_FOUND;
+ } else {
+ /* input consumed */
+ if(flush) {
+ /* reset the converters without calling the callback functions */
+ _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE);
+ _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE);
+ }
+
+ /* done successfully */
+ break;
+ }
+ }
+
+ /*
+ * toUnicode(source -> pivot);
+ *
+ * For pivoting conversion; and for direct conversion for
+ * error callback handling, continuing partial matches
+ * and flushing the replay buffer.
+ *
+ * The pivot buffer is empty and reset.
+ */
+ toUArgs.target=pivotStart; /* ==*pivotTarget */
+ /* toUArgs.targetLimit=pivotLimit; already set before the loop */
+ _toUnicodeWithCallback(&toUArgs, pErrorCode);
+ *pivotTarget=toUArgs.target;
+ if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
+ /* pivot overflow: continue with the conversion loop */
+ *pErrorCode=U_ZERO_ERROR;
+ } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) {
+ /* conversion error, or there was nothing left to convert */
+ break;
+ }
+ /*
+ * else:
+ * _toUnicodeWithCallback() wrote into the pivot buffer,
+ * continue with fromUnicode conversion.
+ *
+ * Set the fromUnicode flush flag if we flush and if toUnicode has
+ * processed the end of the input.
+ */
+ if( flush && toUArgs.source==sourceLimit &&
+ sourceCnv->preToULength>=0 &&
+ sourceCnv->UCharErrorBufferLength==0
+ ) {
+ fromUArgs.flush=TRUE;
+ }
+ }
+
+ /*
+ * The conversion loop is exited when one of the following is true:
+ * - the entire source text has been converted successfully to the target buffer
+ * - a target buffer overflow occurred
+ * - a conversion error occurred
+ */
+
+ *source=toUArgs.source;
+ *target=fromUArgs.target;
+
+ /* terminate the target buffer if possible */
+ if(flush && U_SUCCESS(*pErrorCode)) {
+ if(*target!=targetLimit) {
+ **target=0;
+ if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
+ *pErrorCode=U_ZERO_ERROR;
+ }
+ } else {
+ *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;
+ }
+ }
+}
+
+/* internal implementation of ucnv_convert() etc. with preflighting */
+static int32_t
+ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter,
+ char *target, int32_t targetCapacity,
+ const char *source, int32_t sourceLength,
+ UErrorCode *pErrorCode) {
+ UChar pivotBuffer[CHUNK_SIZE];
+ UChar *pivot, *pivot2;
+
+ char *myTarget;
+ const char *sourceLimit;
+ const char *targetLimit;
+ int32_t targetLength=0;
+
+ /* set up */
+ if(sourceLength<0) {
+ sourceLimit=uprv_strchr(source, 0);
+ } else {
+ sourceLimit=source+sourceLength;
+ }
+
+ /* if there is no input data, we're done */
+ if(source==sourceLimit) {
+ return u_terminateChars(target, targetCapacity, 0, pErrorCode);
+ }
+
+ pivot=pivot2=pivotBuffer;
+ myTarget=target;
+ targetLength=0;
+
+ if(targetCapacity>0) {
+ /* perform real conversion */
+ targetLimit=target+targetCapacity;
+ ucnv_convertEx(outConverter, inConverter,
+ &myTarget, targetLimit,
+ &source, sourceLimit,
+ pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
+ FALSE,
+ TRUE,
+ pErrorCode);
+ targetLength=(int32_t)(myTarget-target);
+ }
+
+ /*
+ * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing
+ * to it but continue the conversion in order to store in targetCapacity
+ * the number of bytes that was required.
+ */
+ if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0)
+ {
+ char targetBuffer[CHUNK_SIZE];
+
+ targetLimit=targetBuffer+CHUNK_SIZE;
+ do {
+ *pErrorCode=U_ZERO_ERROR;
+ myTarget=targetBuffer;
+ ucnv_convertEx(outConverter, inConverter,
+ &myTarget, targetLimit,
+ &source, sourceLimit,
+ pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
+ FALSE,
+ TRUE,
+ pErrorCode);
+ targetLength+=(int32_t)(myTarget-targetBuffer);
+ } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
+
+ /* done with preflighting, set warnings and errors as appropriate */
+ return u_terminateChars(target, targetCapacity, targetLength, pErrorCode);
+ }
+
+ /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */
+ return targetLength;
+}
+
+U_CAPI int32_t U_EXPORT2
+ucnv_convert(const char *toConverterName, const char *fromConverterName,
+ char *target, int32_t targetCapacity,
+ const char *source, int32_t sourceLength,
+ UErrorCode *pErrorCode) {
+ UConverter in, out; /* stack-allocated */
+ UConverter *inConverter, *outConverter;
+ int32_t targetLength;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ if( source==NULL || sourceLength<-1 ||
+ targetCapacity<0 || (targetCapacity>0 && target==NULL)
+ ) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* if there is no input data, we're done */
+ if(sourceLength==0 || (sourceLength<0 && *source==0)) {
+ return u_terminateChars(target, targetCapacity, 0, pErrorCode);
+ }
+
+ /* create the converters */
+ inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ ucnv_close(inConverter);
+ return 0;
+ }
+
+ targetLength=ucnv_internalConvert(outConverter, inConverter,
+ target, targetCapacity,
+ source, sourceLength,
+ pErrorCode);
+
+ ucnv_close(inConverter);
+ ucnv_close(outConverter);
+
+ return targetLength;
+}
+
+/* @internal */
+static int32_t
+ucnv_convertAlgorithmic(UBool convertToAlgorithmic,
+ UConverterType algorithmicType,
+ UConverter *cnv,
+ char *target, int32_t targetCapacity,
+ const char *source, int32_t sourceLength,
+ UErrorCode *pErrorCode) {
+ UConverter algoConverterStatic; /* stack-allocated */
+ UConverter *algoConverter, *to, *from;
+ int32_t targetLength;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ if( cnv==NULL || source==NULL || sourceLength<-1 ||
+ targetCapacity<0 || (targetCapacity>0 && target==NULL)
+ ) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* if there is no input data, we're done */
+ if(sourceLength==0 || (sourceLength<0 && *source==0)) {
+ return u_terminateChars(target, targetCapacity, 0, pErrorCode);
+ }
+
+ /* create the algorithmic converter */
+ algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType,
+ "", 0, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ /* reset the other converter */
+ if(convertToAlgorithmic) {
+ /* cnv->Unicode->algo */
+ ucnv_resetToUnicode(cnv);
+ to=algoConverter;
+ from=cnv;
+ } else {
+ /* algo->Unicode->cnv */
+ ucnv_resetFromUnicode(cnv);
+ from=algoConverter;
+ to=cnv;
+ }
+
+ targetLength=ucnv_internalConvert(to, from,
+ target, targetCapacity,
+ source, sourceLength,
+ pErrorCode);
+
+ ucnv_close(algoConverter);
+
+ return targetLength;
+}
+
+U_CAPI int32_t U_EXPORT2
+ucnv_toAlgorithmic(UConverterType algorithmicType,
+ UConverter *cnv,
+ char *target, int32_t targetCapacity,
+ const char *source, int32_t sourceLength,
+ UErrorCode *pErrorCode) {
+ return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv,
+ target, targetCapacity,
+ source, sourceLength,
+ pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+ucnv_fromAlgorithmic(UConverter *cnv,
+ UConverterType algorithmicType,
+ char *target, int32_t targetCapacity,
+ const char *source, int32_t sourceLength,
+ UErrorCode *pErrorCode) {
+ return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv,
+ target, targetCapacity,
+ source, sourceLength,
+ pErrorCode);
+}
+
+U_CAPI UConverterType U_EXPORT2
+ucnv_getType(const UConverter* converter)
+{
+ int8_t type = converter->sharedData->staticData->conversionType;
+#if !UCONFIG_NO_LEGACY_CONVERSION
+ if(type == UCNV_MBCS) {
+ return ucnv_MBCSGetType(converter);
+ }
+#endif
+ return (UConverterType)type;
+}
+
+U_CAPI void U_EXPORT2
+ucnv_getStarters(const UConverter* converter,
+ UBool starters[256],
+ UErrorCode* err)
+{
+ if (err == NULL || U_FAILURE(*err)) {
+ return;
+ }
+
+ if(converter->sharedData->impl->getStarters != NULL) {
+ converter->sharedData->impl->getStarters(converter, starters, err);
+ } else {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+}
+
+static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv)
+{
+ UErrorCode errorCode;
+ const char *name;
+ int32_t i;
+
+ if(cnv==NULL) {
+ return NULL;
+ }
+
+ errorCode=U_ZERO_ERROR;
+ name=ucnv_getName(cnv, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ return NULL;
+ }
+
+ for(i=0; i<UPRV_LENGTHOF(ambiguousConverters); ++i)
+ {
+ if(0==uprv_strcmp(name, ambiguousConverters[i].name))
+ {
+ return ambiguousConverters+i;
+ }
+ }
+
+ return NULL;
+}
+
+U_CAPI void U_EXPORT2
+ucnv_fixFileSeparator(const UConverter *cnv,
+ UChar* source,
+ int32_t sourceLength) {
+ const UAmbiguousConverter *a;
+ int32_t i;
+ UChar variant5c;
+
+ if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL)
+ {
+ return;
+ }
+
+ variant5c=a->variant5c;
+ for(i=0; i<sourceLength; ++i) {
+ if(source[i]==variant5c) {
+ source[i]=0x5c;
+ }
+ }
+}
+
+U_CAPI UBool U_EXPORT2
+ucnv_isAmbiguous(const UConverter *cnv) {
+ return (UBool)(ucnv_getAmbiguous(cnv)!=NULL);
+}
+
+U_CAPI void U_EXPORT2
+ucnv_setFallback(UConverter *cnv, UBool usesFallback)
+{
+ cnv->useFallback = usesFallback;
+}
+
+U_CAPI UBool U_EXPORT2
+ucnv_usesFallback(const UConverter *cnv)
+{
+ return cnv->useFallback;
+}
+
+U_CAPI void U_EXPORT2
+ucnv_getInvalidChars (const UConverter * converter,
+ char *errBytes,
+ int8_t * len,
+ UErrorCode * err)
+{
+ if (err == NULL || U_FAILURE(*err))
+ {
+ return;
+ }
+ if (len == NULL || errBytes == NULL || converter == NULL)
+ {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ if (*len < converter->invalidCharLength)
+ {
+ *err = U_INDEX_OUTOFBOUNDS_ERROR;
+ return;
+ }
+ if ((*len = converter->invalidCharLength) > 0)
+ {
+ uprv_memcpy (errBytes, converter->invalidCharBuffer, *len);
+ }
+}
+
+U_CAPI void U_EXPORT2
+ucnv_getInvalidUChars (const UConverter * converter,
+ UChar *errChars,
+ int8_t * len,
+ UErrorCode * err)
+{
+ if (err == NULL || U_FAILURE(*err))
+ {
+ return;
+ }
+ if (len == NULL || errChars == NULL || converter == NULL)
+ {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ if (*len < converter->invalidUCharLength)
+ {
+ *err = U_INDEX_OUTOFBOUNDS_ERROR;
+ return;
+ }
+ if ((*len = converter->invalidUCharLength) > 0)
+ {
+ u_memcpy (errChars, converter->invalidUCharBuffer, *len);
+ }
+}
+
+#define SIG_MAX_LEN 5
+
+U_CAPI const char* U_EXPORT2
+ucnv_detectUnicodeSignature( const char* source,
+ int32_t sourceLength,
+ int32_t* signatureLength,
+ UErrorCode* pErrorCode) {
+ int32_t dummy;
+
+ /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN
+ * bytes we don't misdetect something
+ */
+ char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' };
+ int i = 0;
+
+ if((pErrorCode==NULL) || U_FAILURE(*pErrorCode)){
+ return NULL;
+ }
+
+ if(source == NULL || sourceLength < -1){
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+
+ if(signatureLength == NULL) {
+ signatureLength = &dummy;
+ }
+
+ if(sourceLength==-1){
+ sourceLength=(int32_t)uprv_strlen(source);
+ }
+
+
+ while(i<sourceLength&& i<SIG_MAX_LEN){
+ start[i]=source[i];
+ i++;
+ }
+
+ if(start[0] == '\xFE' && start[1] == '\xFF') {
+ *signatureLength=2;
+ return "UTF-16BE";
+ } else if(start[0] == '\xFF' && start[1] == '\xFE') {
+ if(start[2] == '\x00' && start[3] =='\x00') {
+ *signatureLength=4;
+ return "UTF-32LE";
+ } else {
+ *signatureLength=2;
+ return "UTF-16LE";
+ }
+ } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') {
+ *signatureLength=3;
+ return "UTF-8";
+ } else if(start[0] == '\x00' && start[1] == '\x00' &&
+ start[2] == '\xFE' && start[3]=='\xFF') {
+ *signatureLength=4;
+ return "UTF-32BE";
+ } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') {
+ *signatureLength=3;
+ return "SCSU";
+ } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') {
+ *signatureLength=3;
+ return "BOCU-1";
+ } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') {
+ /*
+ * UTF-7: Initial U+FEFF is encoded as +/v8 or +/v9 or +/v+ or +/v/
+ * depending on the second UTF-16 code unit.
+ * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF
+ * if it occurs.
+ *
+ * So far we have +/v
+ */
+ if(start[3] == '\x38' && start[4] == '\x2D') {
+ /* 5 bytes +/v8- */
+ *signatureLength=5;
+ return "UTF-7";
+ } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') {
+ /* 4 bytes +/v8 or +/v9 or +/v+ or +/v/ */
+ *signatureLength=4;
+ return "UTF-7";
+ }
+ }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){
+ *signatureLength=4;
+ return "UTF-EBCDIC";
+ }
+
+
+ /* no known Unicode signature byte sequence recognized */
+ *signatureLength=0;
+ return NULL;
+}
+
+U_CAPI int32_t U_EXPORT2
+ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status)
+{
+ if(status == NULL || U_FAILURE(*status)){
+ return -1;
+ }
+ if(cnv == NULL){
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return -1;
+ }
+
+ if(cnv->preFromUFirstCP >= 0){
+ return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ;
+ }else if(cnv->preFromULength < 0){
+ return -cnv->preFromULength ;
+ }else if(cnv->fromUChar32 > 0){
+ return 1;
+ }
+ return 0;
+
+}
+
+U_CAPI int32_t U_EXPORT2
+ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){
+
+ if(status == NULL || U_FAILURE(*status)){
+ return -1;
+ }
+ if(cnv == NULL){
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return -1;
+ }
+
+ if(cnv->preToULength > 0){
+ return cnv->preToULength ;
+ }else if(cnv->preToULength < 0){
+ return -cnv->preToULength;
+ }else if(cnv->toULength > 0){
+ return cnv->toULength;
+ }
+ return 0;
+}
+
+U_CAPI UBool U_EXPORT2
+ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){
+ if (U_FAILURE(*status)) {
+ return FALSE;
+ }
+
+ if (cnv == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return FALSE;
+ }
+
+ switch (ucnv_getType(cnv)) {
+ case UCNV_SBCS:
+ case UCNV_DBCS:
+ case UCNV_UTF32_BigEndian:
+ case UCNV_UTF32_LittleEndian:
+ case UCNV_UTF32:
+ case UCNV_US_ASCII:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+#endif
+
+/*
+ * Hey, Emacs, please set the following:
+ *
+ * Local Variables:
+ * indent-tabs-mode: nil
+ * End:
+ *
+ */
diff --git a/thirdparty/icu4c/common/ucnv2022.cpp b/thirdparty/icu4c/common/ucnv2022.cpp
new file mode 100644
index 0000000000..169ad4c526
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnv2022.cpp
@@ -0,0 +1,3973 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2000-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* file name: ucnv2022.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2000feb03
+* created by: Markus W. Scherer
+*
+* Change history:
+*
+* 06/29/2000 helena Major rewrite of the callback APIs.
+* 08/08/2000 Ram Included support for ISO-2022-JP-2
+* Changed implementation of toUnicode
+* function
+* 08/21/2000 Ram Added support for ISO-2022-KR
+* 08/29/2000 Ram Seperated implementation of EBCDIC to
+* ucnvebdc.c
+* 09/20/2000 Ram Added support for ISO-2022-CN
+* Added implementations for getNextUChar()
+* for specific 2022 country variants.
+* 10/31/2000 Ram Implemented offsets logic functions
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
+
+#include "unicode/ucnv.h"
+#include "unicode/uset.h"
+#include "unicode/ucnv_err.h"
+#include "unicode/ucnv_cb.h"
+#include "unicode/utf16.h"
+#include "ucnv_imp.h"
+#include "ucnv_bld.h"
+#include "ucnv_cnv.h"
+#include "ucnvmbcs.h"
+#include "cstring.h"
+#include "cmemory.h"
+#include "uassert.h"
+
+#ifdef U_ENABLE_GENERIC_ISO_2022
+/*
+ * I am disabling the generic ISO-2022 converter after proposing to do so on
+ * the icu mailing list two days ago.
+ *
+ * Reasons:
+ * 1. It does not fully support the ISO-2022/ECMA-35 specification with all of
+ * its designation sequences, single shifts with return to the previous state,
+ * switch-with-no-return to UTF-16BE or similar, etc.
+ * This is unlike the language-specific variants like ISO-2022-JP which
+ * require a much smaller repertoire of ISO-2022 features.
+ * These variants continue to be supported.
+ * 2. I believe that no one is really using the generic ISO-2022 converter
+ * but rather always one of the language-specific variants.
+ * Note that ICU's generic ISO-2022 converter has always output one escape
+ * sequence followed by UTF-8 for the whole stream.
+ * 3. Switching between subcharsets is extremely slow, because each time
+ * the previous converter is closed and a new one opened,
+ * without any kind of caching, least-recently-used list, etc.
+ * 4. The code is currently buggy, and given the above it does not seem
+ * reasonable to spend the time on maintenance.
+ * 5. ISO-2022 subcharsets should normally be used with 7-bit byte encodings.
+ * This means, for example, that when ISO-8859-7 is designated, the following
+ * ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff.
+ * The ICU ISO-2022 converter does not handle this - and has no information
+ * about which subconverter would have to be shifted vs. which is designed
+ * for 7-bit ISO-2022.
+ *
+ * Markus Scherer 2003-dec-03
+ */
+#endif
+
+#if !UCONFIG_ONLY_HTML_CONVERSION
+static const char SHIFT_IN_STR[] = "\x0F";
+// static const char SHIFT_OUT_STR[] = "\x0E";
+#endif
+
+#define CR 0x0D
+#define LF 0x0A
+#define H_TAB 0x09
+#define V_TAB 0x0B
+#define SPACE 0x20
+
+enum {
+ HWKANA_START=0xff61,
+ HWKANA_END=0xff9f
+};
+
+/*
+ * 94-character sets with native byte values A1..FE are encoded in ISO 2022
+ * as bytes 21..7E. (Subtract 0x80.)
+ * 96-character sets with native byte values A0..FF are encoded in ISO 2022
+ * as bytes 20..7F. (Subtract 0x80.)
+ * Do not encode C1 control codes with native bytes 80..9F
+ * as bytes 00..1F (C0 control codes).
+ */
+enum {
+ GR94_START=0xa1,
+ GR94_END=0xfe,
+ GR96_START=0xa0,
+ GR96_END=0xff
+};
+
+/*
+ * ISO 2022 control codes must not be converted from Unicode
+ * because they would mess up the byte stream.
+ * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b
+ * corresponding to SO, SI, and ESC.
+ */
+#define IS_2022_CONTROL(c) (((c)<0x20) && (((uint32_t)1<<(c))&0x0800c000)!=0)
+
+/* for ISO-2022-JP and -CN implementations */
+typedef enum {
+ /* shared values */
+ INVALID_STATE=-1,
+ ASCII = 0,
+
+ SS2_STATE=0x10,
+ SS3_STATE,
+
+ /* JP */
+ ISO8859_1 = 1 ,
+ ISO8859_7 = 2 ,
+ JISX201 = 3,
+ JISX208 = 4,
+ JISX212 = 5,
+ GB2312 =6,
+ KSC5601 =7,
+ HWKANA_7BIT=8, /* Halfwidth Katakana 7 bit */
+
+ /* CN */
+ /* the first few enum constants must keep their values because they correspond to myConverterArray[] */
+ GB2312_1=1,
+ ISO_IR_165=2,
+ CNS_11643=3,
+
+ /*
+ * these are used in StateEnum and ISO2022State variables,
+ * but CNS_11643 must be used to index into myConverterArray[]
+ */
+ CNS_11643_0=0x20,
+ CNS_11643_1,
+ CNS_11643_2,
+ CNS_11643_3,
+ CNS_11643_4,
+ CNS_11643_5,
+ CNS_11643_6,
+ CNS_11643_7
+} StateEnum;
+
+/* is the StateEnum charset value for a DBCS charset? */
+#if UCONFIG_ONLY_HTML_CONVERSION
+#define IS_JP_DBCS(cs) (JISX208==(cs))
+#else
+#define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
+#endif
+
+#define CSM(cs) ((uint16_t)1<<(cs))
+
+/*
+ * Each of these charset masks (with index x) contains a bit for a charset in exact correspondence
+ * to whether that charset is used in the corresponding version x of ISO_2022,locale=ja,version=x
+ *
+ * Note: The converter uses some leniency:
+ * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in
+ * all versions, not just JIS7 and JIS8.
+ * - ICU does not distinguish between different versions of JIS X 0208.
+ */
+#if UCONFIG_ONLY_HTML_CONVERSION
+enum { MAX_JA_VERSION=0 };
+#else
+enum { MAX_JA_VERSION=4 };
+#endif
+static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
+ CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
+#if !UCONFIG_ONLY_HTML_CONVERSION
+ CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
+ CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
+ CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
+ CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)
+#endif
+};
+
+typedef enum {
+ ASCII1=0,
+ LATIN1,
+ SBCS,
+ DBCS,
+ MBCS,
+ HWKANA
+}Cnv2022Type;
+
+typedef struct ISO2022State {
+ int8_t cs[4]; /* charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */
+ int8_t g; /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */
+ int8_t prevG; /* g before single shift (SS2 or SS3) */
+} ISO2022State;
+
+#define UCNV_OPTIONS_VERSION_MASK 0xf
+#define UCNV_2022_MAX_CONVERTERS 10
+
+typedef struct{
+ UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS];
+ UConverter *currentConverter;
+ Cnv2022Type currentType;
+ ISO2022State toU2022State, fromU2022State;
+ uint32_t key;
+ uint32_t version;
+#ifdef U_ENABLE_GENERIC_ISO_2022
+ UBool isFirstBuffer;
+#endif
+ UBool isEmptySegment;
+ char name[30];
+ char locale[3];
+}UConverterDataISO2022;
+
+/* Protos */
+/* ISO-2022 ----------------------------------------------------------------- */
+
+/*Forward declaration */
+U_CFUNC void U_CALLCONV
+ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs * args,
+ UErrorCode * err);
+U_CFUNC void U_CALLCONV
+ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs * args,
+ UErrorCode * err);
+
+#define ESC_2022 0x1B /*ESC*/
+
+typedef enum
+{
+ INVALID_2022 = -1, /*Doesn't correspond to a valid iso 2022 escape sequence*/
+ VALID_NON_TERMINAL_2022 = 0, /*so far corresponds to a valid iso 2022 escape sequence*/
+ VALID_TERMINAL_2022 = 1, /*corresponds to a valid iso 2022 escape sequence*/
+ VALID_MAYBE_TERMINAL_2022 = 2 /*so far matches one iso 2022 escape sequence, but by adding more characters might match another escape sequence*/
+} UCNV_TableStates_2022;
+
+/*
+* The way these state transition arrays work is:
+* ex : ESC$B is the sequence for JISX208
+* a) First Iteration: char is ESC
+* i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index
+* int x = normalize_esq_chars_2022[27] which is equal to 1
+* ii) Search for this value in escSeqStateTable_Key_2022[]
+* value of x is stored at escSeqStateTable_Key_2022[0]
+* iii) Save this index as offset
+* iv) Get state of this sequence from escSeqStateTable_Value_2022[]
+* escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
+* b) Switch on this state and continue to next char
+* i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index
+* which is normalize_esq_chars_2022[36] == 4
+* ii) x is currently 1(from above)
+* x<<=5 -- x is now 32
+* x+=normalize_esq_chars_2022[36]
+* now x is 36
+* iii) Search for this value in escSeqStateTable_Key_2022[]
+* value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2
+* iv) Get state of this sequence from escSeqStateTable_Value_2022[]
+* escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
+* c) Switch on this state and continue to next char
+* i) Get the value of B from normalize_esq_chars_2022[] with int value of B as index
+* ii) x is currently 36 (from above)
+* x<<=5 -- x is now 1152
+* x+=normalize_esq_chars_2022[66]
+* now x is 1161
+* iii) Search for this value in escSeqStateTable_Key_2022[]
+* value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21
+* iv) Get state of this sequence from escSeqStateTable_Value_2022[21]
+* escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022
+* v) Get the converter name form escSeqStateTable_Result_2022[21] which is JISX208
+*/
+
+
+/*Below are the 3 arrays depicting a state transition table*/
+static const int8_t normalize_esq_chars_2022[256] = {
+/* 0 1 2 3 4 5 6 7 8 9 */
+
+ 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
+ ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
+ ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,0 ,0
+ ,0 ,0 ,0 ,0 ,0 ,0 ,4 ,7 ,29 ,0
+ ,2 ,24 ,26 ,27 ,0 ,3 ,23 ,6 ,0 ,0
+ ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
+ ,0 ,0 ,0 ,0 ,5 ,8 ,9 ,10 ,11 ,12
+ ,13 ,14 ,15 ,16 ,17 ,18 ,19 ,20 ,25 ,28
+ ,0 ,0 ,21 ,0 ,0 ,0 ,0 ,0 ,0 ,0
+ ,22 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
+ ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
+ ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
+ ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
+ ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
+ ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
+ ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
+ ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
+ ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
+ ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
+ ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
+ ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
+ ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
+ ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
+ ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
+ ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
+ ,0 ,0 ,0 ,0 ,0 ,0
+};
+
+#ifdef U_ENABLE_GENERIC_ISO_2022
+/*
+ * When the generic ISO-2022 converter is completely removed, not just disabled
+ * per #ifdef, then the following state table and the associated tables that are
+ * dimensioned with MAX_STATES_2022 should be trimmed.
+ *
+ * Especially, VALID_MAYBE_TERMINAL_2022 will not be used any more, and all of
+ * the associated escape sequences starting with ESC ( B should be removed.
+ * This includes the ones with key values 1097 and all of the ones above 1000000.
+ *
+ * For the latter, the tables can simply be truncated.
+ * For the former, since the tables must be kept parallel, it is probably best
+ * to simply duplicate an adjacent table cell, parallel in all tables.
+ *
+ * It may make sense to restructure the tables, especially by using small search
+ * tables for the variants instead of indexing them parallel to the table here.
+ */
+#endif
+
+#define MAX_STATES_2022 74
+static const int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = {
+/* 0 1 2 3 4 5 6 7 8 9 */
+
+ 1 ,34 ,36 ,39 ,55 ,57 ,60 ,61 ,1093 ,1096
+ ,1097 ,1098 ,1099 ,1100 ,1101 ,1102 ,1103 ,1104 ,1105 ,1106
+ ,1109 ,1154 ,1157 ,1160 ,1161 ,1176 ,1178 ,1179 ,1254 ,1257
+ ,1768 ,1773 ,1957 ,35105 ,36933 ,36936 ,36937 ,36938 ,36939 ,36940
+ ,36942 ,36943 ,36944 ,36945 ,36946 ,36947 ,36948 ,37640 ,37642 ,37644
+ ,37646 ,37711 ,37744 ,37745 ,37746 ,37747 ,37748 ,40133 ,40136 ,40138
+ ,40139 ,40140 ,40141 ,1123363 ,35947624 ,35947625 ,35947626 ,35947627 ,35947629 ,35947630
+ ,35947631 ,35947635 ,35947636 ,35947638
+};
+
+#ifdef U_ENABLE_GENERIC_ISO_2022
+
+static const char* const escSeqStateTable_Result_2022[MAX_STATES_2022] = {
+ /* 0 1 2 3 4 5 6 7 8 9 */
+
+ NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,"latin1" ,"latin1"
+ ,"latin1" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"JISX0201" ,"JISX0201" ,"latin1"
+ ,"latin1" ,NULL ,"JISX-208" ,"ibm-5478" ,"JISX-208" ,NULL ,NULL ,NULL ,NULL ,"UTF8"
+ ,"ISO-8859-1" ,"ISO-8859-7" ,"JIS-X-208" ,NULL ,"ibm-955" ,"ibm-367" ,"ibm-952" ,"ibm-949" ,"JISX-212" ,"ibm-1383"
+ ,"ibm-952" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-5478" ,"ibm-949" ,"ISO-IR-165"
+ ,"CNS-11643-1992,1" ,"CNS-11643-1992,2" ,"CNS-11643-1992,3" ,"CNS-11643-1992,4" ,"CNS-11643-1992,5" ,"CNS-11643-1992,6" ,"CNS-11643-1992,7" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian"
+ ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NULL ,"latin1" ,"ibm-912" ,"ibm-913" ,"ibm-914" ,"ibm-813" ,"ibm-1089"
+ ,"ibm-920" ,"ibm-915" ,"ibm-915" ,"latin1"
+};
+
+#endif
+
+static const int8_t escSeqStateTable_Value_2022[MAX_STATES_2022] = {
+/* 0 1 2 3 4 5 6 7 8 9 */
+ VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
+ ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
+ ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022
+ ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
+ ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
+ ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
+ ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
+ ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
+};
+
+/* Type def for refactoring changeState_2022 code*/
+typedef enum{
+#ifdef U_ENABLE_GENERIC_ISO_2022
+ ISO_2022=0,
+#endif
+ ISO_2022_JP=1,
+#if !UCONFIG_ONLY_HTML_CONVERSION
+ ISO_2022_KR=2,
+ ISO_2022_CN=3
+#endif
+} Variant2022;
+
+/*********** ISO 2022 Converter Protos ***********/
+static void U_CALLCONV
+_ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode);
+
+static void U_CALLCONV
+ _ISO2022Close(UConverter *converter);
+
+static void U_CALLCONV
+_ISO2022Reset(UConverter *converter, UConverterResetChoice choice);
+
+U_CDECL_BEGIN
+static const char * U_CALLCONV
+_ISO2022getName(const UConverter* cnv);
+U_CDECL_END
+
+static void U_CALLCONV
+_ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err);
+
+U_CDECL_BEGIN
+static UConverter * U_CALLCONV
+_ISO_2022_SafeClone(const UConverter *cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status);
+
+U_CDECL_END
+
+#ifdef U_ENABLE_GENERIC_ISO_2022
+static void U_CALLCONV
+T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UErrorCode* err);
+#endif
+
+namespace {
+
+/*const UConverterSharedData _ISO2022Data;*/
+extern const UConverterSharedData _ISO2022JPData;
+
+#if !UCONFIG_ONLY_HTML_CONVERSION
+extern const UConverterSharedData _ISO2022KRData;
+extern const UConverterSharedData _ISO2022CNData;
+#endif
+
+} // namespace
+
+/*************** Converter implementations ******************/
+
+/* The purpose of this function is to get around gcc compiler warnings. */
+static inline void
+fromUWriteUInt8(UConverter *cnv,
+ const char *bytes, int32_t length,
+ uint8_t **target, const char *targetLimit,
+ int32_t **offsets,
+ int32_t sourceIndex,
+ UErrorCode *pErrorCode)
+{
+ char *targetChars = (char *)*target;
+ ucnv_fromUWriteBytes(cnv, bytes, length, &targetChars, targetLimit,
+ offsets, sourceIndex, pErrorCode);
+ *target = (uint8_t*)targetChars;
+
+}
+
+static inline void
+setInitialStateToUnicodeKR(UConverter* /*converter*/, UConverterDataISO2022 *myConverterData){
+ if(myConverterData->version == 1) {
+ UConverter *cnv = myConverterData->currentConverter;
+
+ cnv->toUnicodeStatus=0; /* offset */
+ cnv->mode=0; /* state */
+ cnv->toULength=0; /* byteIndex */
+ }
+}
+
+static inline void
+setInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConverterData){
+ /* in ISO-2022-KR the designator sequence appears only once
+ * in a file so we append it only once
+ */
+ if( converter->charErrorBufferLength==0){
+
+ converter->charErrorBufferLength = 4;
+ converter->charErrorBuffer[0] = 0x1b;
+ converter->charErrorBuffer[1] = 0x24;
+ converter->charErrorBuffer[2] = 0x29;
+ converter->charErrorBuffer[3] = 0x43;
+ }
+ if(myConverterData->version == 1) {
+ UConverter *cnv = myConverterData->currentConverter;
+
+ cnv->fromUChar32=0;
+ cnv->fromUnicodeStatus=1; /* prevLength */
+ }
+}
+
+static void U_CALLCONV
+_ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
+
+ char myLocale[7]={' ',' ',' ',' ',' ',' ', '\0'};
+
+ cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));
+ if(cnv->extraInfo != NULL) {
+ UConverterNamePieces stackPieces;
+ UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER;
+ UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;
+ uint32_t version;
+
+ stackArgs.onlyTestIsLoadable = pArgs->onlyTestIsLoadable;
+
+ uprv_memset(myConverterData, 0, sizeof(UConverterDataISO2022));
+ myConverterData->currentType = ASCII1;
+ cnv->fromUnicodeStatus =FALSE;
+ if(pArgs->locale){
+ uprv_strncpy(myLocale, pArgs->locale, sizeof(myLocale)-1);
+ }
+ version = pArgs->options & UCNV_OPTIONS_VERSION_MASK;
+ myConverterData->version = version;
+ if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') &&
+ (myLocale[2]=='_' || myLocale[2]=='\0'))
+ {
+ /* open the required converters and cache them */
+ if(version>MAX_JA_VERSION) {
+ // ICU 55 fails to open a converter for an unsupported version.
+ // Previously, it fell back to version 0, but that would yield
+ // unexpected behavior.
+ *errorCode = U_MISSING_RESOURCE_ERROR;
+ return;
+ }
+ if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
+ myConverterData->myConverterArray[ISO8859_7] =
+ ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode);
+ }
+ myConverterData->myConverterArray[JISX208] =
+ ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, errorCode);
+ if(jpCharsetMasks[version]&CSM(JISX212)) {
+ myConverterData->myConverterArray[JISX212] =
+ ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode);
+ }
+ if(jpCharsetMasks[version]&CSM(GB2312)) {
+ myConverterData->myConverterArray[GB2312] =
+ ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode); /* gb_2312_80-1 */
+ }
+ if(jpCharsetMasks[version]&CSM(KSC5601)) {
+ myConverterData->myConverterArray[KSC5601] =
+ ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode);
+ }
+
+ /* set the function pointers to appropriate funtions */
+ cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
+ uprv_strcpy(myConverterData->locale,"ja");
+
+ (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=");
+ size_t len = uprv_strlen(myConverterData->name);
+ myConverterData->name[len]=(char)(myConverterData->version+(int)'0');
+ myConverterData->name[len+1]='\0';
+ }
+#if !UCONFIG_ONLY_HTML_CONVERSION
+ else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
+ (myLocale[2]=='_' || myLocale[2]=='\0'))
+ {
+ if(version>1) {
+ // ICU 55 fails to open a converter for an unsupported version.
+ // Previously, it fell back to version 0, but that would yield
+ // unexpected behavior.
+ *errorCode = U_MISSING_RESOURCE_ERROR;
+ return;
+ }
+ const char *cnvName;
+ if(version==1) {
+ cnvName="icu-internal-25546";
+ } else {
+ cnvName="ibm-949";
+ myConverterData->version=version=0;
+ }
+ if(pArgs->onlyTestIsLoadable) {
+ ucnv_canCreateConverter(cnvName, errorCode); /* errorCode carries result */
+ uprv_free(cnv->extraInfo);
+ cnv->extraInfo=NULL;
+ return;
+ } else {
+ myConverterData->currentConverter=ucnv_open(cnvName, errorCode);
+ if (U_FAILURE(*errorCode)) {
+ _ISO2022Close(cnv);
+ return;
+ }
+
+ if(version==1) {
+ (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=1");
+ uprv_memcpy(cnv->subChars, myConverterData->currentConverter->subChars, 4);
+ cnv->subCharLen = myConverterData->currentConverter->subCharLen;
+ }else{
+ (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=0");
+ }
+
+ /* initialize the state variables */
+ setInitialStateToUnicodeKR(cnv, myConverterData);
+ setInitialStateFromUnicodeKR(cnv, myConverterData);
+
+ /* set the function pointers to appropriate funtions */
+ cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData;
+ uprv_strcpy(myConverterData->locale,"ko");
+ }
+ }
+ else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& myLocale[1]=='n'))&&
+ (myLocale[2]=='_' || myLocale[2]=='\0'))
+ {
+ if(version>2) {
+ // ICU 55 fails to open a converter for an unsupported version.
+ // Previously, it fell back to version 0, but that would yield
+ // unexpected behavior.
+ *errorCode = U_MISSING_RESOURCE_ERROR;
+ return;
+ }
+
+ /* open the required converters and cache them */
+ myConverterData->myConverterArray[GB2312_1] =
+ ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode);
+ if(version==1) {
+ myConverterData->myConverterArray[ISO_IR_165] =
+ ucnv_loadSharedData("iso-ir-165", &stackPieces, &stackArgs, errorCode);
+ }
+ myConverterData->myConverterArray[CNS_11643] =
+ ucnv_loadSharedData("cns-11643-1992", &stackPieces, &stackArgs, errorCode);
+
+
+ /* set the function pointers to appropriate funtions */
+ cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData;
+ uprv_strcpy(myConverterData->locale,"cn");
+
+ if (version==0){
+ myConverterData->version = 0;
+ (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=0");
+ }else if (version==1){
+ myConverterData->version = 1;
+ (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=1");
+ }else {
+ myConverterData->version = 2;
+ (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2");
+ }
+ }
+#endif // !UCONFIG_ONLY_HTML_CONVERSION
+ else{
+#ifdef U_ENABLE_GENERIC_ISO_2022
+ myConverterData->isFirstBuffer = TRUE;
+
+ /* append the UTF-8 escape sequence */
+ cnv->charErrorBufferLength = 3;
+ cnv->charErrorBuffer[0] = 0x1b;
+ cnv->charErrorBuffer[1] = 0x25;
+ cnv->charErrorBuffer[2] = 0x42;
+
+ cnv->sharedData=(UConverterSharedData*)&_ISO2022Data;
+ /* initialize the state variables */
+ uprv_strcpy(myConverterData->name,"ISO_2022");
+#else
+ *errorCode = U_MISSING_RESOURCE_ERROR;
+ // Was U_UNSUPPORTED_ERROR but changed in ICU 55 to a more standard
+ // data loading error code.
+ return;
+#endif
+ }
+
+ cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar;
+
+ if(U_FAILURE(*errorCode) || pArgs->onlyTestIsLoadable) {
+ _ISO2022Close(cnv);
+ }
+ } else {
+ *errorCode = U_MEMORY_ALLOCATION_ERROR;
+ }
+}
+
+
+static void U_CALLCONV
+_ISO2022Close(UConverter *converter) {
+ UConverterDataISO2022* myData =(UConverterDataISO2022 *) (converter->extraInfo);
+ UConverterSharedData **array = myData->myConverterArray;
+ int32_t i;
+
+ if (converter->extraInfo != NULL) {
+ /*close the array of converter pointers and free the memory*/
+ for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
+ if(array[i]!=NULL) {
+ ucnv_unloadSharedDataIfReady(array[i]);
+ }
+ }
+
+ ucnv_close(myData->currentConverter);
+
+ if(!converter->isExtraLocal){
+ uprv_free (converter->extraInfo);
+ converter->extraInfo = NULL;
+ }
+ }
+}
+
+static void U_CALLCONV
+_ISO2022Reset(UConverter *converter, UConverterResetChoice choice) {
+ UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) (converter->extraInfo);
+ if(choice<=UCNV_RESET_TO_UNICODE) {
+ uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State));
+ myConverterData->key = 0;
+ myConverterData->isEmptySegment = FALSE;
+ }
+ if(choice!=UCNV_RESET_TO_UNICODE) {
+ uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State));
+ }
+#ifdef U_ENABLE_GENERIC_ISO_2022
+ if(myConverterData->locale[0] == 0){
+ if(choice<=UCNV_RESET_TO_UNICODE) {
+ myConverterData->isFirstBuffer = TRUE;
+ myConverterData->key = 0;
+ if (converter->mode == UCNV_SO){
+ ucnv_close (myConverterData->currentConverter);
+ myConverterData->currentConverter=NULL;
+ }
+ converter->mode = UCNV_SI;
+ }
+ if(choice!=UCNV_RESET_TO_UNICODE) {
+ /* re-append UTF-8 escape sequence */
+ converter->charErrorBufferLength = 3;
+ converter->charErrorBuffer[0] = 0x1b;
+ converter->charErrorBuffer[1] = 0x28;
+ converter->charErrorBuffer[2] = 0x42;
+ }
+ }
+ else
+#endif
+ {
+ /* reset the state variables */
+ if(myConverterData->locale[0] == 'k'){
+ if(choice<=UCNV_RESET_TO_UNICODE) {
+ setInitialStateToUnicodeKR(converter, myConverterData);
+ }
+ if(choice!=UCNV_RESET_TO_UNICODE) {
+ setInitialStateFromUnicodeKR(converter, myConverterData);
+ }
+ }
+ }
+}
+
+U_CDECL_BEGIN
+
+static const char * U_CALLCONV
+_ISO2022getName(const UConverter* cnv){
+ if(cnv->extraInfo){
+ UConverterDataISO2022* myData= (UConverterDataISO2022*)cnv->extraInfo;
+ return myData->name;
+ }
+ return NULL;
+}
+
+U_CDECL_END
+
+
+/*************** to unicode *******************/
+/****************************************************************************
+ * Recognized escape sequences are
+ * <ESC>(B ASCII
+ * <ESC>.A ISO-8859-1
+ * <ESC>.F ISO-8859-7
+ * <ESC>(J JISX-201
+ * <ESC>(I JISX-201
+ * <ESC>$B JISX-208
+ * <ESC>$@ JISX-208
+ * <ESC>$(D JISX-212
+ * <ESC>$A GB2312
+ * <ESC>$(C KSC5601
+ */
+static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {
+/* 0 1 2 3 4 5 6 7 8 9 */
+ INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
+ ,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STATE
+ ,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
+ ,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STATE
+ ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
+ ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
+ ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
+ ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
+};
+
+#if !UCONFIG_ONLY_HTML_CONVERSION
+/*************** to unicode *******************/
+static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
+/* 0 1 2 3 4 5 6 7 8 9 */
+ INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
+ ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
+ ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
+ ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
+ ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165
+ ,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5 ,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
+ ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
+ ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
+};
+#endif
+
+
+static UCNV_TableStates_2022
+getKey_2022(char c,int32_t* key,int32_t* offset){
+ int32_t togo;
+ int32_t low = 0;
+ int32_t hi = MAX_STATES_2022;
+ int32_t oldmid=0;
+
+ togo = normalize_esq_chars_2022[(uint8_t)c];
+ if(togo == 0) {
+ /* not a valid character anywhere in an escape sequence */
+ *key = 0;
+ *offset = 0;
+ return INVALID_2022;
+ }
+ togo = (*key << 5) + togo;
+
+ while (hi != low) /*binary search*/{
+
+ int32_t mid = (hi+low) >> 1; /*Finds median*/
+
+ if (mid == oldmid)
+ break;
+
+ if (escSeqStateTable_Key_2022[mid] > togo){
+ hi = mid;
+ }
+ else if (escSeqStateTable_Key_2022[mid] < togo){
+ low = mid;
+ }
+ else /*we found it*/{
+ *key = togo;
+ *offset = mid;
+ return (UCNV_TableStates_2022)escSeqStateTable_Value_2022[mid];
+ }
+ oldmid = mid;
+
+ }
+
+ *key = 0;
+ *offset = 0;
+ return INVALID_2022;
+}
+
+/*runs through a state machine to determine the escape sequence - codepage correspondance
+ */
+static void
+changeState_2022(UConverter* _this,
+ const char** source,
+ const char* sourceLimit,
+ Variant2022 var,
+ UErrorCode* err){
+ UCNV_TableStates_2022 value;
+ UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo);
+ uint32_t key = myData2022->key;
+ int32_t offset = 0;
+ int8_t initialToULength = _this->toULength;
+ char c;
+
+ value = VALID_NON_TERMINAL_2022;
+ while (*source < sourceLimit) {
+ c = *(*source)++;
+ _this->toUBytes[_this->toULength++]=(uint8_t)c;
+ value = getKey_2022(c,(int32_t *) &key, &offset);
+
+ switch (value){
+
+ case VALID_NON_TERMINAL_2022 :
+ /* continue with the loop */
+ break;
+
+ case VALID_TERMINAL_2022:
+ key = 0;
+ goto DONE;
+
+ case INVALID_2022:
+ goto DONE;
+
+ case VALID_MAYBE_TERMINAL_2022:
+#ifdef U_ENABLE_GENERIC_ISO_2022
+ /* ESC ( B is ambiguous only for ISO_2022 itself */
+ if(var == ISO_2022) {
+ /* discard toUBytes[] for ESC ( B because this sequence is correct and complete */
+ _this->toULength = 0;
+
+ /* TODO need to indicate that ESC ( B was seen; if failure, then need to replay from source or from MBCS-style replay */
+
+ /* continue with the loop */
+ value = VALID_NON_TERMINAL_2022;
+ break;
+ } else
+#endif
+ {
+ /* not ISO_2022 itself, finish here */
+ value = VALID_TERMINAL_2022;
+ key = 0;
+ goto DONE;
+ }
+ }
+ }
+
+DONE:
+ myData2022->key = key;
+
+ if (value == VALID_NON_TERMINAL_2022) {
+ /* indicate that the escape sequence is incomplete: key!=0 */
+ return;
+ } else if (value == INVALID_2022 ) {
+ *err = U_ILLEGAL_ESCAPE_SEQUENCE;
+ } else /* value == VALID_TERMINAL_2022 */ {
+ switch(var){
+#ifdef U_ENABLE_GENERIC_ISO_2022
+ case ISO_2022:
+ {
+ const char *chosenConverterName = escSeqStateTable_Result_2022[offset];
+ if(chosenConverterName == NULL) {
+ /* SS2 or SS3 */
+ *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
+ _this->toUCallbackReason = UCNV_UNASSIGNED;
+ return;
+ }
+
+ _this->mode = UCNV_SI;
+ ucnv_close(myData2022->currentConverter);
+ myData2022->currentConverter = myUConverter = ucnv_open(chosenConverterName, err);
+ if(U_SUCCESS(*err)) {
+ myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
+ _this->mode = UCNV_SO;
+ }
+ break;
+ }
+#endif
+ case ISO_2022_JP:
+ {
+ StateEnum tempState=(StateEnum)nextStateToUnicodeJP[offset];
+ switch(tempState) {
+ case INVALID_STATE:
+ *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
+ break;
+ case SS2_STATE:
+ if(myData2022->toU2022State.cs[2]!=0) {
+ if(myData2022->toU2022State.g<2) {
+ myData2022->toU2022State.prevG=myData2022->toU2022State.g;
+ }
+ myData2022->toU2022State.g=2;
+ } else {
+ /* illegal to have SS2 before a matching designator */
+ *err = U_ILLEGAL_ESCAPE_SEQUENCE;
+ }
+ break;
+ /* case SS3_STATE: not used in ISO-2022-JP-x */
+ case ISO8859_1:
+ case ISO8859_7:
+ if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
+ *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
+ } else {
+ /* G2 charset for SS2 */
+ myData2022->toU2022State.cs[2]=(int8_t)tempState;
+ }
+ break;
+ default:
+ if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
+ *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
+ } else {
+ /* G0 charset */
+ myData2022->toU2022State.cs[0]=(int8_t)tempState;
+ }
+ break;
+ }
+ }
+ break;
+#if !UCONFIG_ONLY_HTML_CONVERSION
+ case ISO_2022_CN:
+ {
+ StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
+ switch(tempState) {
+ case INVALID_STATE:
+ *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
+ break;
+ case SS2_STATE:
+ if(myData2022->toU2022State.cs[2]!=0) {
+ if(myData2022->toU2022State.g<2) {
+ myData2022->toU2022State.prevG=myData2022->toU2022State.g;
+ }
+ myData2022->toU2022State.g=2;
+ } else {
+ /* illegal to have SS2 before a matching designator */
+ *err = U_ILLEGAL_ESCAPE_SEQUENCE;
+ }
+ break;
+ case SS3_STATE:
+ if(myData2022->toU2022State.cs[3]!=0) {
+ if(myData2022->toU2022State.g<2) {
+ myData2022->toU2022State.prevG=myData2022->toU2022State.g;
+ }
+ myData2022->toU2022State.g=3;
+ } else {
+ /* illegal to have SS3 before a matching designator */
+ *err = U_ILLEGAL_ESCAPE_SEQUENCE;
+ }
+ break;
+ case ISO_IR_165:
+ if(myData2022->version==0) {
+ *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
+ break;
+ }
+ U_FALLTHROUGH;
+ case GB2312_1:
+ U_FALLTHROUGH;
+ case CNS_11643_1:
+ myData2022->toU2022State.cs[1]=(int8_t)tempState;
+ break;
+ case CNS_11643_2:
+ myData2022->toU2022State.cs[2]=(int8_t)tempState;
+ break;
+ default:
+ /* other CNS 11643 planes */
+ if(myData2022->version==0) {
+ *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
+ } else {
+ myData2022->toU2022State.cs[3]=(int8_t)tempState;
+ }
+ break;
+ }
+ }
+ break;
+ case ISO_2022_KR:
+ if(offset==0x30){
+ /* nothing to be done, just accept this one escape sequence */
+ } else {
+ *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
+ }
+ break;
+#endif // !UCONFIG_ONLY_HTML_CONVERSION
+
+ default:
+ *err = U_ILLEGAL_ESCAPE_SEQUENCE;
+ break;
+ }
+ }
+ if(U_SUCCESS(*err)) {
+ _this->toULength = 0;
+ } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) {
+ if(_this->toULength>1) {
+ /*
+ * Ticket 5691: consistent illegal sequences:
+ * - We include at least the first byte (ESC) in the illegal sequence.
+ * - If any of the non-initial bytes could be the start of a character,
+ * we stop the illegal sequence before the first one of those.
+ * In escape sequences, all following bytes are "printable", that is,
+ * unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS),
+ * they are valid single/lead bytes.
+ * For simplicity, we always only report the initial ESC byte as the
+ * illegal sequence and back out all other bytes we looked at.
+ */
+ /* Back out some bytes. */
+ int8_t backOutDistance=_this->toULength-1;
+ int8_t bytesFromThisBuffer=_this->toULength-initialToULength;
+ if(backOutDistance<=bytesFromThisBuffer) {
+ /* same as initialToULength<=1 */
+ *source-=backOutDistance;
+ } else {
+ /* Back out bytes from the previous buffer: Need to replay them. */
+ _this->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance);
+ /* same as -(initialToULength-1) */
+ /* preToULength is negative! */
+ uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULength);
+ *source-=bytesFromThisBuffer;
+ }
+ _this->toULength=1;
+ }
+ } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) {
+ _this->toUCallbackReason = UCNV_UNASSIGNED;
+ }
+}
+
+#if !UCONFIG_ONLY_HTML_CONVERSION
+/*Checks the characters of the buffer against valid 2022 escape sequences
+*if the match we return a pointer to the initial start of the sequence otherwise
+*we return sourceLimit
+*/
+/*for 2022 looks ahead in the stream
+ *to determine the longest possible convertible
+ *data stream
+ */
+static inline const char*
+getEndOfBuffer_2022(const char** source,
+ const char* sourceLimit,
+ UBool /*flush*/){
+
+ const char* mySource = *source;
+
+#ifdef U_ENABLE_GENERIC_ISO_2022
+ if (*source >= sourceLimit)
+ return sourceLimit;
+
+ do{
+
+ if (*mySource == ESC_2022){
+ int8_t i;
+ int32_t key = 0;
+ int32_t offset;
+ UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022;
+
+ /* Kludge: I could not
+ * figure out the reason for validating an escape sequence
+ * twice - once here and once in changeState_2022().
+ * is it possible to have an ESC character in a ISO2022
+ * byte stream which is valid in a code page? Is it legal?
+ */
+ for (i=0;
+ (mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022);
+ i++) {
+ value = getKey_2022(*(mySource+i), &key, &offset);
+ }
+ if (value > 0 || *mySource==ESC_2022)
+ return mySource;
+
+ if ((value == VALID_NON_TERMINAL_2022)&&(!flush) )
+ return sourceLimit;
+ }
+ }while (++mySource < sourceLimit);
+
+ return sourceLimit;
+#else
+ while(mySource < sourceLimit && *mySource != ESC_2022) {
+ ++mySource;
+ }
+ return mySource;
+#endif
+}
+#endif
+
+/* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c
+ * any future change in _MBCSFromUChar32() function should be reflected here.
+ * @return number of bytes in *value; negative number if fallback; 0 if no mapping
+ */
+static inline int32_t
+MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,
+ UChar32 c,
+ uint32_t* value,
+ UBool useFallback,
+ int outputType)
+{
+ const int32_t *cx;
+ const uint16_t *table;
+ uint32_t stage2Entry;
+ uint32_t myValue;
+ int32_t length;
+ const uint8_t *p;
+ /*
+ * TODO(markus): Use and require new, faster MBCS conversion table structures.
+ * Use internal version of ucnv_open() that verifies that the new structures are available,
+ * else U_INTERNAL_PROGRAM_ERROR.
+ */
+ /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
+ if(c<0x10000 || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
+ table=sharedData->mbcs.fromUnicodeTable;
+ stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
+ /* get the bytes and the length for the output */
+ if(outputType==MBCS_OUTPUT_2){
+ myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
+ if(myValue<=0xff) {
+ length=1;
+ } else {
+ length=2;
+ }
+ } else /* outputType==MBCS_OUTPUT_3 */ {
+ p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
+ myValue=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
+ if(myValue<=0xff) {
+ length=1;
+ } else if(myValue<=0xffff) {
+ length=2;
+ } else {
+ length=3;
+ }
+ }
+ /* is this code point assigned, or do we use fallbacks? */
+ if((stage2Entry&(1<<(16+(c&0xf))))!=0) {
+ /* assigned */
+ *value=myValue;
+ return length;
+ } else if(FROM_U_USE_FALLBACK(useFallback, c) && myValue!=0) {
+ /*
+ * We allow a 0 byte output if the "assigned" bit is set for this entry.
+ * There is no way with this data structure for fallback output
+ * to be a zero byte.
+ */
+ *value=myValue;
+ return -length;
+ }
+ }
+
+ cx=sharedData->mbcs.extIndexes;
+ if(cx!=NULL) {
+ return ucnv_extSimpleMatchFromU(cx, c, value, useFallback);
+ }
+
+ /* unassigned */
+ return 0;
+}
+
+/* This inline function replicates code in _MBCSSingleFromUChar32() function in ucnvmbcs.c
+ * any future change in _MBCSSingleFromUChar32() function should be reflected here.
+ * @param retval pointer to output byte
+ * @return 1 roundtrip byte 0 no mapping -1 fallback byte
+ */
+static inline int32_t
+MBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData,
+ UChar32 c,
+ uint32_t* retval,
+ UBool useFallback)
+{
+ const uint16_t *table;
+ int32_t value;
+ /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
+ if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
+ return 0;
+ }
+ /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
+ table=sharedData->mbcs.fromUnicodeTable;
+ /* get the byte for the output */
+ value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
+ /* is this code point assigned, or do we use fallbacks? */
+ *retval=(uint32_t)(value&0xff);
+ if(value>=0xf00) {
+ return 1; /* roundtrip */
+ } else if(useFallback ? value>=0x800 : value>=0xc00) {
+ return -1; /* fallback taken */
+ } else {
+ return 0; /* no mapping */
+ }
+}
+
+/*
+ * Check that the result is a 2-byte value with each byte in the range A1..FE
+ * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte
+ * to move it to the ISO 2022 range 21..7E.
+ * Return 0 if out of range.
+ */
+static inline uint32_t
+_2022FromGR94DBCS(uint32_t value) {
+ if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) &&
+ (uint8_t)(value - 0xa1) <= (0xfe - 0xa1)
+ ) {
+ return value - 0x8080; /* shift down to 21..7e byte range */
+ } else {
+ return 0; /* not valid for ISO 2022 */
+ }
+}
+
+#if 0 /* 5691: Call sites now check for validity. They can just += 0x8080 after that. */
+/*
+ * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the
+ * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point
+ * unchanged.
+ */
+static inline uint32_t
+_2022ToGR94DBCS(uint32_t value) {
+ uint32_t returnValue = value + 0x8080;
+ if( (uint16_t)(returnValue - 0xa1a1) <= (0xfefe - 0xa1a1) &&
+ (uint8_t)(returnValue - 0xa1) <= (0xfe - 0xa1)) {
+ return returnValue;
+ } else {
+ return value;
+ }
+}
+#endif
+
+#ifdef U_ENABLE_GENERIC_ISO_2022
+
+/**********************************************************************************
+* ISO-2022 Converter
+*
+*
+*/
+
+static void U_CALLCONV
+T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,
+ UErrorCode* err){
+ const char* mySourceLimit, *realSourceLimit;
+ const char* sourceStart;
+ const UChar* myTargetStart;
+ UConverter* saveThis;
+ UConverterDataISO2022* myData;
+ int8_t length;
+
+ saveThis = args->converter;
+ myData=((UConverterDataISO2022*)(saveThis->extraInfo));
+
+ realSourceLimit = args->sourceLimit;
+ while (args->source < realSourceLimit) {
+ if(myData->key == 0) { /* are we in the middle of an escape sequence? */
+ /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
+ mySourceLimit = getEndOfBuffer_2022(&(args->source), realSourceLimit, args->flush);
+
+ if(args->source < mySourceLimit) {
+ if(myData->currentConverter==NULL) {
+ myData->currentConverter = ucnv_open("ASCII",err);
+ if(U_FAILURE(*err)){
+ return;
+ }
+
+ myData->currentConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
+ saveThis->mode = UCNV_SO;
+ }
+
+ /* convert to before the ESC or until the end of the buffer */
+ myData->isFirstBuffer=FALSE;
+ sourceStart = args->source;
+ myTargetStart = args->target;
+ args->converter = myData->currentConverter;
+ ucnv_toUnicode(args->converter,
+ &args->target,
+ args->targetLimit,
+ &args->source,
+ mySourceLimit,
+ args->offsets,
+ (UBool)(args->flush && mySourceLimit == realSourceLimit),
+ err);
+ args->converter = saveThis;
+
+ if (*err == U_BUFFER_OVERFLOW_ERROR) {
+ /* move the overflow buffer */
+ length = saveThis->UCharErrorBufferLength = myData->currentConverter->UCharErrorBufferLength;
+ myData->currentConverter->UCharErrorBufferLength = 0;
+ if(length > 0) {
+ uprv_memcpy(saveThis->UCharErrorBuffer,
+ myData->currentConverter->UCharErrorBuffer,
+ length*U_SIZEOF_UCHAR);
+ }
+ return;
+ }
+
+ /*
+ * At least one of:
+ * -Error while converting
+ * -Done with entire buffer
+ * -Need to write offsets or update the current offset
+ * (leave that up to the code in ucnv.c)
+ *
+ * or else we just stopped at an ESC byte and continue with changeState_2022()
+ */
+ if (U_FAILURE(*err) ||
+ (args->source == realSourceLimit) ||
+ (args->offsets != NULL && (args->target != myTargetStart || args->source != sourceStart) ||
+ (mySourceLimit < realSourceLimit && myData->currentConverter->toULength > 0))
+ ) {
+ /* copy partial or error input for truncated detection and error handling */
+ if(U_FAILURE(*err)) {
+ length = saveThis->invalidCharLength = myData->currentConverter->invalidCharLength;
+ if(length > 0) {
+ uprv_memcpy(saveThis->invalidCharBuffer, myData->currentConverter->invalidCharBuffer, length);
+ }
+ } else {
+ length = saveThis->toULength = myData->currentConverter->toULength;
+ if(length > 0) {
+ uprv_memcpy(saveThis->toUBytes, myData->currentConverter->toUBytes, length);
+ if(args->source < mySourceLimit) {
+ *err = U_TRUNCATED_CHAR_FOUND; /* truncated input before ESC */
+ }
+ }
+ }
+ return;
+ }
+ }
+ }
+
+ sourceStart = args->source;
+ changeState_2022(args->converter,
+ &(args->source),
+ realSourceLimit,
+ ISO_2022,
+ err);
+ if (U_FAILURE(*err) || (args->source != sourceStart && args->offsets != NULL)) {
+ /* let the ucnv.c code update its current offset */
+ return;
+ }
+ }
+}
+
+#endif
+
+/*
+ * To Unicode Callback helper function
+ */
+static void
+toUnicodeCallback(UConverter *cnv,
+ const uint32_t sourceChar, const uint32_t targetUniChar,
+ UErrorCode* err){
+ if(sourceChar>0xff){
+ cnv->toUBytes[0] = (uint8_t)(sourceChar>>8);
+ cnv->toUBytes[1] = (uint8_t)sourceChar;
+ cnv->toULength = 2;
+ }
+ else{
+ cnv->toUBytes[0] =(char) sourceChar;
+ cnv->toULength = 1;
+ }
+
+ if(targetUniChar == (missingCharMarker-1/*0xfffe*/)){
+ *err = U_INVALID_CHAR_FOUND;
+ }
+ else{
+ *err = U_ILLEGAL_CHAR_FOUND;
+ }
+}
+
+/**************************************ISO-2022-JP*************************************************/
+
+/************************************** IMPORTANT **************************************************
+* The UConverter_fromUnicode_ISO2022_JP converter does not use ucnv_fromUnicode() functions for SBCS,DBCS and
+* MBCS; instead, the values are obtained directly by calling _MBCSFromUChar32().
+* The converter iterates over each Unicode codepoint
+* to obtain the equivalent codepoints from the codepages supported. Since the source buffer is
+* processed one char at a time it would make sense to reduce the extra processing a canned converter
+* would do as far as possible.
+*
+* If the implementation of these macros or structure of sharedData struct change in the future, make
+* sure that ISO-2022 is also changed.
+***************************************************************************************************
+*/
+
+/***************************************************************************************************
+* Rules for ISO-2022-jp encoding
+* (i) Escape sequences must be fully contained within a line they should not
+* span new lines or CRs
+* (ii) If the last character on a line is represented by two bytes then an ASCII or
+* JIS-Roman character escape sequence should follow before the line terminates
+* (iii) If the first character on the line is represented by two bytes then a two
+* byte character escape sequence should precede it
+* (iv) If no escape sequence is encountered then the characters are ASCII
+* (v) Latin(ISO-8859-1) and Greek(ISO-8859-7) characters must be designated to G2,
+* and invoked with SS2 (ESC N).
+* (vi) If there is any G0 designation in text, there must be a switch to
+* ASCII or to JIS X 0201-Roman before a space character (but not
+* necessarily before "ESC 4/14 2/0" or "ESC N ' '") or control
+* characters such as tab or CRLF.
+* (vi) Supported encodings:
+* ASCII, JISX201, JISX208, JISX212, GB2312, KSC5601, ISO-8859-1,ISO-8859-7
+*
+* source : RFC-1554
+*
+* JISX201, JISX208,JISX212 : new .cnv data files created
+* KSC5601 : alias to ibm-949 mapping table
+* GB2312 : alias to ibm-1386 mapping table
+* ISO-8859-1 : Algorithmic implemented as LATIN1 case
+* ISO-8859-7 : alisas to ibm-9409 mapping table
+*/
+
+/* preference order of JP charsets */
+static const StateEnum jpCharsetPref[]={
+ ASCII,
+ JISX201,
+ ISO8859_1,
+ JISX208,
+ ISO8859_7,
+ JISX212,
+ GB2312,
+ KSC5601,
+ HWKANA_7BIT
+};
+
+/*
+ * The escape sequences must be in order of the enum constants like JISX201 = 3,
+ * not in order of jpCharsetPref[]!
+ */
+static const char escSeqChars[][6] ={
+ "\x1B\x28\x42", /* <ESC>(B ASCII */
+ "\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */
+ "\x1B\x2E\x46", /* <ESC>.F ISO-8859-7 */
+ "\x1B\x28\x4A", /* <ESC>(J JISX-201 */
+ "\x1B\x24\x42", /* <ESC>$B JISX-208 */
+ "\x1B\x24\x28\x44", /* <ESC>$(D JISX-212 */
+ "\x1B\x24\x41", /* <ESC>$A GB2312 */
+ "\x1B\x24\x28\x43", /* <ESC>$(C KSC5601 */
+ "\x1B\x28\x49" /* <ESC>(I HWKANA_7BIT */
+
+};
+static const int8_t escSeqCharsLen[] ={
+ 3, /* length of <ESC>(B ASCII */
+ 3, /* length of <ESC>.A ISO-8859-1 */
+ 3, /* length of <ESC>.F ISO-8859-7 */
+ 3, /* length of <ESC>(J JISX-201 */
+ 3, /* length of <ESC>$B JISX-208 */
+ 4, /* length of <ESC>$(D JISX-212 */
+ 3, /* length of <ESC>$A GB2312 */
+ 4, /* length of <ESC>$(C KSC5601 */
+ 3 /* length of <ESC>(I HWKANA_7BIT */
+};
+
+/*
+* The iteration over various code pages works this way:
+* i) Get the currentState from myConverterData->currentState
+* ii) Check if the character is mapped to a valid character in the currentState
+* Yes -> a) set the initIterState to currentState
+* b) remain in this state until an invalid character is found
+* No -> a) go to the next code page and find the character
+* iii) Before changing the state increment the current state check if the current state
+* is equal to the intitIteration state
+* Yes -> A character that cannot be represented in any of the supported encodings
+* break and return a U_INVALID_CHARACTER error
+* No -> Continue and find the character in next code page
+*
+*
+* TODO: Implement a priority technique where the users are allowed to set the priority of code pages
+*/
+
+/* Map 00..7F to Unicode according to JIS X 0201. */
+static inline uint32_t
+jisx201ToU(uint32_t value) {
+ if(value < 0x5c) {
+ return value;
+ } else if(value == 0x5c) {
+ return 0xa5;
+ } else if(value == 0x7e) {
+ return 0x203e;
+ } else /* value <= 0x7f */ {
+ return value;
+ }
+}
+
+/* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */
+static inline uint32_t
+jisx201FromU(uint32_t value) {
+ if(value<=0x7f) {
+ if(value!=0x5c && value!=0x7e) {
+ return value;
+ }
+ } else if(value==0xa5) {
+ return 0x5c;
+ } else if(value==0x203e) {
+ return 0x7e;
+ }
+ return 0xfffe;
+}
+
+/*
+ * Take a valid Shift-JIS byte pair, check that it is in the range corresponding
+ * to JIS X 0208, and convert it to a pair of 21..7E bytes.
+ * Return 0 if the byte pair is out of range.
+ */
+static inline uint32_t
+_2022FromSJIS(uint32_t value) {
+ uint8_t trail;
+
+ if(value > 0xEFFC) {
+ return 0; /* beyond JIS X 0208 */
+ }
+
+ trail = (uint8_t)value;
+
+ value &= 0xff00; /* lead byte */
+ if(value <= 0x9f00) {
+ value -= 0x7000;
+ } else /* 0xe000 <= value <= 0xef00 */ {
+ value -= 0xb000;
+ }
+ value <<= 1;
+
+ if(trail <= 0x9e) {
+ value -= 0x100;
+ if(trail <= 0x7e) {
+ value |= trail - 0x1f;
+ } else {
+ value |= trail - 0x20;
+ }
+ } else /* trail <= 0xfc */ {
+ value |= trail - 0x7e;
+ }
+ return value;
+}
+
+/*
+ * Convert a pair of JIS X 0208 21..7E bytes to Shift-JIS.
+ * If either byte is outside 21..7E make sure that the result is not valid
+ * for Shift-JIS so that the converter catches it.
+ * Some invalid byte values already turn into equally invalid Shift-JIS
+ * byte values and need not be tested explicitly.
+ */
+static inline void
+_2022ToSJIS(uint8_t c1, uint8_t c2, char bytes[2]) {
+ if(c1&1) {
+ ++c1;
+ if(c2 <= 0x5f) {
+ c2 += 0x1f;
+ } else if(c2 <= 0x7e) {
+ c2 += 0x20;
+ } else {
+ c2 = 0; /* invalid */
+ }
+ } else {
+ if((uint8_t)(c2-0x21) <= ((0x7e)-0x21)) {
+ c2 += 0x7e;
+ } else {
+ c2 = 0; /* invalid */
+ }
+ }
+ c1 >>= 1;
+ if(c1 <= 0x2f) {
+ c1 += 0x70;
+ } else if(c1 <= 0x3f) {
+ c1 += 0xb0;
+ } else {
+ c1 = 0; /* invalid */
+ }
+ bytes[0] = (char)c1;
+ bytes[1] = (char)c2;
+}
+
+/*
+ * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS)
+ * Katakana.
+ * Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks
+ * because Shift-JIS roundtrips half-width Katakana to single bytes.
+ * These were the only fallbacks in ICU's jisx-208.ucm file.
+ */
+static const uint16_t hwkana_fb[HWKANA_END - HWKANA_START + 1] = {
+ 0x2123, /* U+FF61 */
+ 0x2156,
+ 0x2157,
+ 0x2122,
+ 0x2126,
+ 0x2572,
+ 0x2521,
+ 0x2523,
+ 0x2525,
+ 0x2527,
+ 0x2529,
+ 0x2563,
+ 0x2565,
+ 0x2567,
+ 0x2543,
+ 0x213C, /* U+FF70 */
+ 0x2522,
+ 0x2524,
+ 0x2526,
+ 0x2528,
+ 0x252A,
+ 0x252B,
+ 0x252D,
+ 0x252F,
+ 0x2531,
+ 0x2533,
+ 0x2535,
+ 0x2537,
+ 0x2539,
+ 0x253B,
+ 0x253D,
+ 0x253F, /* U+FF80 */
+ 0x2541,
+ 0x2544,
+ 0x2546,
+ 0x2548,
+ 0x254A,
+ 0x254B,
+ 0x254C,
+ 0x254D,
+ 0x254E,
+ 0x254F,
+ 0x2552,
+ 0x2555,
+ 0x2558,
+ 0x255B,
+ 0x255E,
+ 0x255F, /* U+FF90 */
+ 0x2560,
+ 0x2561,
+ 0x2562,
+ 0x2564,
+ 0x2566,
+ 0x2568,
+ 0x2569,
+ 0x256A,
+ 0x256B,
+ 0x256C,
+ 0x256D,
+ 0x256F,
+ 0x2573,
+ 0x212B,
+ 0x212C /* U+FF9F */
+};
+
+static void U_CALLCONV
+UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) {
+ UConverter *cnv = args->converter;
+ UConverterDataISO2022 *converterData;
+ ISO2022State *pFromU2022State;
+ uint8_t *target = (uint8_t *) args->target;
+ const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;
+ const UChar* source = args->source;
+ const UChar* sourceLimit = args->sourceLimit;
+ int32_t* offsets = args->offsets;
+ UChar32 sourceChar;
+ char buffer[8];
+ int32_t len, outLen;
+ int8_t choices[10];
+ int32_t choiceCount;
+ uint32_t targetValue = 0;
+ UBool useFallback;
+
+ int32_t i;
+ int8_t cs, g;
+
+ /* set up the state */
+ converterData = (UConverterDataISO2022*)cnv->extraInfo;
+ pFromU2022State = &converterData->fromU2022State;
+
+ choiceCount = 0;
+
+ /* check if the last codepoint of previous buffer was a lead surrogate*/
+ if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
+ goto getTrail;
+ }
+
+ while(source < sourceLimit) {
+ if(target < targetLimit) {
+
+ sourceChar = *(source++);
+ /*check if the char is a First surrogate*/
+ if(U16_IS_SURROGATE(sourceChar)) {
+ if(U16_IS_SURROGATE_LEAD(sourceChar)) {
+getTrail:
+ /*look ahead to find the trail surrogate*/
+ if(source < sourceLimit) {
+ /* test the following code unit */
+ UChar trail=(UChar) *source;
+ if(U16_IS_TRAIL(trail)) {
+ source++;
+ sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
+ cnv->fromUChar32=0x00;
+ /* convert this supplementary code point */
+ /* exit this condition tree */
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ *err=U_ILLEGAL_CHAR_FOUND;
+ cnv->fromUChar32=sourceChar;
+ break;
+ }
+ } else {
+ /* no more input */
+ cnv->fromUChar32=sourceChar;
+ break;
+ }
+ } else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ *err=U_ILLEGAL_CHAR_FOUND;
+ cnv->fromUChar32=sourceChar;
+ break;
+ }
+ }
+
+ /* do not convert SO/SI/ESC */
+ if(IS_2022_CONTROL(sourceChar)) {
+ /* callback(illegal) */
+ *err=U_ILLEGAL_CHAR_FOUND;
+ cnv->fromUChar32=sourceChar;
+ break;
+ }
+
+ /* do the conversion */
+
+ if(choiceCount == 0) {
+ uint16_t csm;
+
+ /*
+ * The csm variable keeps track of which charsets are allowed
+ * and not used yet while building the choices[].
+ */
+ csm = jpCharsetMasks[converterData->version];
+ choiceCount = 0;
+
+ /* JIS7/8: try single-byte half-width Katakana before JISX208 */
+ if(converterData->version == 3 || converterData->version == 4) {
+ choices[choiceCount++] = (int8_t)HWKANA_7BIT;
+ }
+ /* Do not try single-byte half-width Katakana for other versions. */
+ csm &= ~CSM(HWKANA_7BIT);
+
+ /* try the current G0 charset */
+ choices[choiceCount++] = cs = pFromU2022State->cs[0];
+ csm &= ~CSM(cs);
+
+ /* try the current G2 charset */
+ if((cs = pFromU2022State->cs[2]) != 0) {
+ choices[choiceCount++] = cs;
+ csm &= ~CSM(cs);
+ }
+
+ /* try all the other possible charsets */
+ for(i = 0; i < UPRV_LENGTHOF(jpCharsetPref); ++i) {
+ cs = (int8_t)jpCharsetPref[i];
+ if(CSM(cs) & csm) {
+ choices[choiceCount++] = cs;
+ csm &= ~CSM(cs);
+ }
+ }
+ }
+
+ cs = g = 0;
+ /*
+ * len==0: no mapping found yet
+ * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
+ * len>0: found a roundtrip result, done
+ */
+ len = 0;
+ /*
+ * We will turn off useFallback after finding a fallback,
+ * but we still get fallbacks from PUA code points as usual.
+ * Therefore, we will also need to check that we don't overwrite
+ * an early fallback with a later one.
+ */
+ useFallback = cnv->useFallback;
+
+ for(i = 0; i < choiceCount && len <= 0; ++i) {
+ uint32_t value;
+ int32_t len2;
+ int8_t cs0 = choices[i];
+ switch(cs0) {
+ case ASCII:
+ if(sourceChar <= 0x7f) {
+ targetValue = (uint32_t)sourceChar;
+ len = 1;
+ cs = cs0;
+ g = 0;
+ }
+ break;
+ case ISO8859_1:
+ if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
+ targetValue = (uint32_t)sourceChar - 0x80;
+ len = 1;
+ cs = cs0;
+ g = 2;
+ }
+ break;
+ case HWKANA_7BIT:
+ if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
+ if(converterData->version==3) {
+ /* JIS7: use G1 (SO) */
+ /* Shift U+FF61..U+FF9F to bytes 21..5F. */
+ targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21));
+ len = 1;
+ pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */
+ g = 1;
+ } else if(converterData->version==4) {
+ /* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */
+ /* Shift U+FF61..U+FF9F to bytes A1..DF. */
+ targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0xa1));
+ len = 1;
+
+ cs = pFromU2022State->cs[0];
+ if(IS_JP_DBCS(cs)) {
+ /* switch from a DBCS charset to JISX201 */
+ cs = (int8_t)JISX201;
+ }
+ /* else stay in the current G0 charset */
+ g = 0;
+ }
+ /* else do not use HWKANA_7BIT with other versions */
+ }
+ break;
+ case JISX201:
+ /* G0 SBCS */
+ value = jisx201FromU(sourceChar);
+ if(value <= 0x7f) {
+ targetValue = value;
+ len = 1;
+ cs = cs0;
+ g = 0;
+ useFallback = FALSE;
+ }
+ break;
+ case JISX208:
+ /* G0 DBCS from Shift-JIS table */
+ len2 = MBCS_FROM_UCHAR32_ISO2022(
+ converterData->myConverterArray[cs0],
+ sourceChar, &value,
+ useFallback, MBCS_OUTPUT_2);
+ if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */
+ value = _2022FromSJIS(value);
+ if(value != 0) {
+ targetValue = value;
+ len = len2;
+ cs = cs0;
+ g = 0;
+ useFallback = FALSE;
+ }
+ } else if(len == 0 && useFallback &&
+ (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
+ targetValue = hwkana_fb[sourceChar - HWKANA_START];
+ len = -2;
+ cs = cs0;
+ g = 0;
+ useFallback = FALSE;
+ }
+ break;
+ case ISO8859_7:
+ /* G0 SBCS forced to 7-bit output */
+ len2 = MBCS_SINGLE_FROM_UCHAR32(
+ converterData->myConverterArray[cs0],
+ sourceChar, &value,
+ useFallback);
+ if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value && value <= GR96_END) {
+ targetValue = value - 0x80;
+ len = len2;
+ cs = cs0;
+ g = 2;
+ useFallback = FALSE;
+ }
+ break;
+ default:
+ /* G0 DBCS */
+ len2 = MBCS_FROM_UCHAR32_ISO2022(
+ converterData->myConverterArray[cs0],
+ sourceChar, &value,
+ useFallback, MBCS_OUTPUT_2);
+ if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */
+ if(cs0 == KSC5601) {
+ /*
+ * Check for valid bytes for the encoding scheme.
+ * This is necessary because the sub-converter (windows-949)
+ * has a broader encoding scheme than is valid for 2022.
+ */
+ value = _2022FromGR94DBCS(value);
+ if(value == 0) {
+ break;
+ }
+ }
+ targetValue = value;
+ len = len2;
+ cs = cs0;
+ g = 0;
+ useFallback = FALSE;
+ }
+ break;
+ }
+ }
+
+ if(len != 0) {
+ if(len < 0) {
+ len = -len; /* fallback */
+ }
+ outLen = 0; /* count output bytes */
+
+ /* write SI if necessary (only for JIS7) */
+ if(pFromU2022State->g == 1 && g == 0) {
+ buffer[outLen++] = UCNV_SI;
+ pFromU2022State->g = 0;
+ }
+
+ /* write the designation sequence if necessary */
+ if(cs != pFromU2022State->cs[g]) {
+ int32_t escLen = escSeqCharsLen[cs];
+ uprv_memcpy(buffer + outLen, escSeqChars[cs], escLen);
+ outLen += escLen;
+ pFromU2022State->cs[g] = cs;
+
+ /* invalidate the choices[] */
+ choiceCount = 0;
+ }
+
+ /* write the shift sequence if necessary */
+ if(g != pFromU2022State->g) {
+ switch(g) {
+ /* case 0 handled before writing escapes */
+ case 1:
+ buffer[outLen++] = UCNV_SO;
+ pFromU2022State->g = 1;
+ break;
+ default: /* case 2 */
+ buffer[outLen++] = 0x1b;
+ buffer[outLen++] = 0x4e;
+ break;
+ /* no case 3: no SS3 in ISO-2022-JP-x */
+ }
+ }
+
+ /* write the output bytes */
+ if(len == 1) {
+ buffer[outLen++] = (char)targetValue;
+ } else /* len == 2 */ {
+ buffer[outLen++] = (char)(targetValue >> 8);
+ buffer[outLen++] = (char)targetValue;
+ }
+ } else {
+ /*
+ * if we cannot find the character after checking all codepages
+ * then this is an error
+ */
+ *err = U_INVALID_CHAR_FOUND;
+ cnv->fromUChar32=sourceChar;
+ break;
+ }
+
+ if(sourceChar == CR || sourceChar == LF) {
+ /* reset the G2 state at the end of a line (conversion got us into ASCII or JISX201 already) */
+ pFromU2022State->cs[2] = 0;
+ choiceCount = 0;
+ }
+
+ /* output outLen>0 bytes in buffer[] */
+ if(outLen == 1) {
+ *target++ = buffer[0];
+ if(offsets) {
+ *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */
+ }
+ } else if(outLen == 2 && (target + 2) <= targetLimit) {
+ *target++ = buffer[0];
+ *target++ = buffer[1];
+ if(offsets) {
+ int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
+ *offsets++ = sourceIndex;
+ *offsets++ = sourceIndex;
+ }
+ } else {
+ fromUWriteUInt8(
+ cnv,
+ buffer, outLen,
+ &target, (const char *)targetLimit,
+ &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
+ err);
+ if(U_FAILURE(*err)) {
+ break;
+ }
+ }
+ } /* end if(myTargetIndex<myTargetLength) */
+ else{
+ *err =U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+
+ }/* end while(mySourceIndex<mySourceLength) */
+
+ /*
+ * the end of the input stream and detection of truncated input
+ * are handled by the framework, but for ISO-2022-JP conversion
+ * we need to be in ASCII mode at the very end
+ *
+ * conditions:
+ * successful
+ * in SO mode or not in ASCII mode
+ * end of input and no truncated input
+ */
+ if( U_SUCCESS(*err) &&
+ (pFromU2022State->g!=0 || pFromU2022State->cs[0]!=ASCII) &&
+ args->flush && source>=sourceLimit && cnv->fromUChar32==0
+ ) {
+ int32_t sourceIndex;
+
+ outLen = 0;
+
+ if(pFromU2022State->g != 0) {
+ buffer[outLen++] = UCNV_SI;
+ pFromU2022State->g = 0;
+ }
+
+ if(pFromU2022State->cs[0] != ASCII) {
+ int32_t escLen = escSeqCharsLen[ASCII];
+ uprv_memcpy(buffer + outLen, escSeqChars[ASCII], escLen);
+ outLen += escLen;
+ pFromU2022State->cs[0] = (int8_t)ASCII;
+ }
+
+ /* get the source index of the last input character */
+ /*
+ * TODO this would be simpler and more reliable if we used a pair
+ * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
+ * so that we could simply use the prevSourceIndex here;
+ * this code gives an incorrect result for the rare case of an unmatched
+ * trail surrogate that is alone in the last buffer of the text stream
+ */
+ sourceIndex=(int32_t)(source-args->source);
+ if(sourceIndex>0) {
+ --sourceIndex;
+ if( U16_IS_TRAIL(args->source[sourceIndex]) &&
+ (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
+ ) {
+ --sourceIndex;
+ }
+ } else {
+ sourceIndex=-1;
+ }
+
+ fromUWriteUInt8(
+ cnv,
+ buffer, outLen,
+ &target, (const char *)targetLimit,
+ &offsets, sourceIndex,
+ err);
+ }
+
+ /*save the state and return */
+ args->source = source;
+ args->target = (char*)target;
+}
+
+/*************** to unicode *******************/
+
+static void U_CALLCONV
+UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
+ UErrorCode* err){
+ char tempBuf[2];
+ const char *mySource = (char *) args->source;
+ UChar *myTarget = args->target;
+ const char *mySourceLimit = args->sourceLimit;
+ uint32_t targetUniChar = 0x0000;
+ uint32_t mySourceChar = 0x0000;
+ uint32_t tmpSourceChar = 0x0000;
+ UConverterDataISO2022* myData;
+ ISO2022State *pToU2022State;
+ StateEnum cs;
+
+ myData=(UConverterDataISO2022*)(args->converter->extraInfo);
+ pToU2022State = &myData->toU2022State;
+
+ if(myData->key != 0) {
+ /* continue with a partial escape sequence */
+ goto escape;
+ } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
+ /* continue with a partial double-byte character */
+ mySourceChar = args->converter->toUBytes[0];
+ args->converter->toULength = 0;
+ cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
+ targetUniChar = missingCharMarker;
+ goto getTrailByte;
+ }
+
+ while(mySource < mySourceLimit){
+
+ targetUniChar =missingCharMarker;
+
+ if(myTarget < args->targetLimit){
+
+ mySourceChar= (unsigned char) *mySource++;
+
+ switch(mySourceChar) {
+ case UCNV_SI:
+ if(myData->version==3) {
+ pToU2022State->g=0;
+ continue;
+ } else {
+ /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
+ myData->isEmptySegment = FALSE; /* reset this, we have a different error */
+ break;
+ }
+
+ case UCNV_SO:
+ if(myData->version==3) {
+ /* JIS7: switch to G1 half-width Katakana */
+ pToU2022State->cs[1] = (int8_t)HWKANA_7BIT;
+ pToU2022State->g=1;
+ continue;
+ } else {
+ /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
+ myData->isEmptySegment = FALSE; /* reset this, we have a different error */
+ break;
+ }
+
+ case ESC_2022:
+ mySource--;
+escape:
+ {
+ const char * mySourceBefore = mySource;
+ int8_t toULengthBefore = args->converter->toULength;
+
+ changeState_2022(args->converter,&(mySource),
+ mySourceLimit, ISO_2022_JP,err);
+
+ /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */
+ if(myData->version==0 && myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
+ *err = U_ILLEGAL_ESCAPE_SEQUENCE;
+ args->converter->toUCallbackReason = UCNV_IRREGULAR;
+ args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));
+ }
+ }
+
+ /* invalid or illegal escape sequence */
+ if(U_FAILURE(*err)){
+ args->target = myTarget;
+ args->source = mySource;
+ myData->isEmptySegment = FALSE; /* Reset to avoid future spurious errors */
+ return;
+ }
+ /* If we successfully completed an escape sequence, we begin a new segment, empty so far */
+ if(myData->key==0) {
+ myData->isEmptySegment = TRUE;
+ }
+ continue;
+
+ /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */
+
+ case CR:
+ case LF:
+ /* automatically reset to single-byte mode */
+ if((StateEnum)pToU2022State->cs[0] != ASCII && (StateEnum)pToU2022State->cs[0] != JISX201) {
+ pToU2022State->cs[0] = (int8_t)ASCII;
+ }
+ pToU2022State->cs[2] = 0;
+ pToU2022State->g = 0;
+ U_FALLTHROUGH;
+ default:
+ /* convert one or two bytes */
+ myData->isEmptySegment = FALSE;
+ cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
+ if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->version==4 &&
+ !IS_JP_DBCS(cs)
+ ) {
+ /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */
+ targetUniChar = mySourceChar + (HWKANA_START - 0xa1);
+
+ /* return from a single-shift state to the previous one */
+ if(pToU2022State->g >= 2) {
+ pToU2022State->g=pToU2022State->prevG;
+ }
+ } else switch(cs) {
+ case ASCII:
+ if(mySourceChar <= 0x7f) {
+ targetUniChar = mySourceChar;
+ }
+ break;
+ case ISO8859_1:
+ if(mySourceChar <= 0x7f) {
+ targetUniChar = mySourceChar + 0x80;
+ }
+ /* return from a single-shift state to the previous one */
+ pToU2022State->g=pToU2022State->prevG;
+ break;
+ case ISO8859_7:
+ if(mySourceChar <= 0x7f) {
+ /* convert mySourceChar+0x80 to use a normal 8-bit table */
+ targetUniChar =
+ _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(
+ myData->myConverterArray[cs],
+ mySourceChar + 0x80);
+ }
+ /* return from a single-shift state to the previous one */
+ pToU2022State->g=pToU2022State->prevG;
+ break;
+ case JISX201:
+ if(mySourceChar <= 0x7f) {
+ targetUniChar = jisx201ToU(mySourceChar);
+ }
+ break;
+ case HWKANA_7BIT:
+ if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) {
+ /* 7-bit halfwidth Katakana */
+ targetUniChar = mySourceChar + (HWKANA_START - 0x21);
+ }
+ break;
+ default:
+ /* G0 DBCS */
+ if(mySource < mySourceLimit) {
+ int leadIsOk, trailIsOk;
+ uint8_t trailByte;
+getTrailByte:
+ trailByte = (uint8_t)*mySource;
+ /*
+ * Ticket 5691: consistent illegal sequences:
+ * - We include at least the first byte in the illegal sequence.
+ * - If any of the non-initial bytes could be the start of a character,
+ * we stop the illegal sequence before the first one of those.
+ *
+ * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
+ * an ESC/SO/SI, we report only the first byte as the illegal sequence.
+ * Otherwise we convert or report the pair of bytes.
+ */
+ leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
+ trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
+ if (leadIsOk && trailIsOk) {
+ ++mySource;
+ tmpSourceChar = (mySourceChar << 8) | trailByte;
+ if(cs == JISX208) {
+ _2022ToSJIS((uint8_t)mySourceChar, trailByte, tempBuf);
+ mySourceChar = tmpSourceChar;
+ } else {
+ /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
+ mySourceChar = tmpSourceChar;
+ if (cs == KSC5601) {
+ tmpSourceChar += 0x8080; /* = _2022ToGR94DBCS(tmpSourceChar) */
+ }
+ tempBuf[0] = (char)(tmpSourceChar >> 8);
+ tempBuf[1] = (char)(tmpSourceChar);
+ }
+ targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE);
+ } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
+ /* report a pair of illegal bytes if the second byte is not a DBCS starter */
+ ++mySource;
+ /* add another bit so that the code below writes 2 bytes in case of error */
+ mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
+ }
+ } else {
+ args->converter->toUBytes[0] = (uint8_t)mySourceChar;
+ args->converter->toULength = 1;
+ goto endloop;
+ }
+ } /* End of inner switch */
+ break;
+ } /* End of outer switch */
+ if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){
+ if(args->offsets){
+ args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
+ }
+ *(myTarget++)=(UChar)targetUniChar;
+ }
+ else if(targetUniChar > missingCharMarker){
+ /* disassemble the surrogate pair and write to output*/
+ targetUniChar-=0x0010000;
+ *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
+ if(args->offsets){
+ args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
+ }
+ ++myTarget;
+ if(myTarget< args->targetLimit){
+ *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
+ if(args->offsets){
+ args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
+ }
+ ++myTarget;
+ }else{
+ args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
+ (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
+ }
+
+ }
+ else{
+ /* Call the callback function*/
+ toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
+ break;
+ }
+ }
+ else{ /* goes with "if(myTarget < args->targetLimit)" way up near top of function */
+ *err =U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+endloop:
+ args->target = myTarget;
+ args->source = mySource;
+}
+
+
+#if !UCONFIG_ONLY_HTML_CONVERSION
+/***************************************************************
+* Rules for ISO-2022-KR encoding
+* i) The KSC5601 designator sequence should appear only once in a file,
+* at the begining of a line before any KSC5601 characters. This usually
+* means that it appears by itself on the first line of the file
+* ii) There are only 2 shifting sequences SO to shift into double byte mode
+* and SI to shift into single byte mode
+*/
+static void U_CALLCONV
+UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){
+
+ UConverter* saveConv = args->converter;
+ UConverterDataISO2022 *myConverterData=(UConverterDataISO2022*)saveConv->extraInfo;
+ args->converter=myConverterData->currentConverter;
+
+ myConverterData->currentConverter->fromUChar32 = saveConv->fromUChar32;
+ ucnv_MBCSFromUnicodeWithOffsets(args,err);
+ saveConv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
+
+ if(*err == U_BUFFER_OVERFLOW_ERROR) {
+ if(myConverterData->currentConverter->charErrorBufferLength > 0) {
+ uprv_memcpy(
+ saveConv->charErrorBuffer,
+ myConverterData->currentConverter->charErrorBuffer,
+ myConverterData->currentConverter->charErrorBufferLength);
+ }
+ saveConv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
+ myConverterData->currentConverter->charErrorBufferLength = 0;
+ }
+ args->converter=saveConv;
+}
+
+static void U_CALLCONV
+UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
+
+ const UChar *source = args->source;
+ const UChar *sourceLimit = args->sourceLimit;
+ unsigned char *target = (unsigned char *) args->target;
+ unsigned char *targetLimit = (unsigned char *) args->targetLimit;
+ int32_t* offsets = args->offsets;
+ uint32_t targetByteUnit = 0x0000;
+ UChar32 sourceChar = 0x0000;
+ UBool isTargetByteDBCS;
+ UBool oldIsTargetByteDBCS;
+ UConverterDataISO2022 *converterData;
+ UConverterSharedData* sharedData;
+ UBool useFallback;
+ int32_t length =0;
+
+ converterData=(UConverterDataISO2022*)args->converter->extraInfo;
+ /* if the version is 1 then the user is requesting
+ * conversion with ibm-25546 pass the arguments to
+ * MBCS converter and return
+ */
+ if(converterData->version==1){
+ UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
+ return;
+ }
+
+ /* initialize data */
+ sharedData = converterData->currentConverter->sharedData;
+ useFallback = args->converter->useFallback;
+ isTargetByteDBCS=(UBool)args->converter->fromUnicodeStatus;
+ oldIsTargetByteDBCS = isTargetByteDBCS;
+
+ isTargetByteDBCS = (UBool) args->converter->fromUnicodeStatus;
+ if((sourceChar = args->converter->fromUChar32)!=0 && target <targetLimit) {
+ goto getTrail;
+ }
+ while(source < sourceLimit){
+
+ targetByteUnit = missingCharMarker;
+
+ if(target < (unsigned char*) args->targetLimit){
+ sourceChar = *source++;
+
+ /* do not convert SO/SI/ESC */
+ if(IS_2022_CONTROL(sourceChar)) {
+ /* callback(illegal) */
+ *err=U_ILLEGAL_CHAR_FOUND;
+ args->converter->fromUChar32=sourceChar;
+ break;
+ }
+
+ length = MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,MBCS_OUTPUT_2);
+ if(length < 0) {
+ length = -length; /* fallback */
+ }
+ /* only DBCS or SBCS characters are expected*/
+ /* DB characters with high bit set to 1 are expected */
+ if( length > 2 || length==0 ||
+ (length == 1 && targetByteUnit > 0x7f) ||
+ (length == 2 &&
+ ((uint16_t)(targetByteUnit - 0xa1a1) > (0xfefe - 0xa1a1) ||
+ (uint8_t)(targetByteUnit - 0xa1) > (0xfe - 0xa1)))
+ ) {
+ targetByteUnit=missingCharMarker;
+ }
+ if (targetByteUnit != missingCharMarker){
+
+ oldIsTargetByteDBCS = isTargetByteDBCS;
+ isTargetByteDBCS = (UBool)(targetByteUnit>0x00FF);
+ /* append the shift sequence */
+ if (oldIsTargetByteDBCS != isTargetByteDBCS ){
+
+ if (isTargetByteDBCS)
+ *target++ = UCNV_SO;
+ else
+ *target++ = UCNV_SI;
+ if(offsets)
+ *(offsets++) = (int32_t)(source - args->source-1);
+ }
+ /* write the targetUniChar to target */
+ if(targetByteUnit <= 0x00FF){
+ if( target < targetLimit){
+ *(target++) = (unsigned char) targetByteUnit;
+ if(offsets){
+ *(offsets++) = (int32_t)(source - args->source-1);
+ }
+
+ }else{
+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit);
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+ }else{
+ if(target < targetLimit){
+ *(target++) =(unsigned char) ((targetByteUnit>>8) -0x80);
+ if(offsets){
+ *(offsets++) = (int32_t)(source - args->source-1);
+ }
+ if(target < targetLimit){
+ *(target++) =(unsigned char) (targetByteUnit -0x80);
+ if(offsets){
+ *(offsets++) = (int32_t)(source - args->source-1);
+ }
+ }else{
+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit -0x80);
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+ }else{
+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) ((targetByteUnit>>8) -0x80);
+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit-0x80);
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+ }
+
+ }
+ else{
+ /* oops.. the code point is unassingned
+ * set the error and reason
+ */
+
+ /*check if the char is a First surrogate*/
+ if(U16_IS_SURROGATE(sourceChar)) {
+ if(U16_IS_SURROGATE_LEAD(sourceChar)) {
+getTrail:
+ /*look ahead to find the trail surrogate*/
+ if(source < sourceLimit) {
+ /* test the following code unit */
+ UChar trail=(UChar) *source;
+ if(U16_IS_TRAIL(trail)) {
+ source++;
+ sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
+ *err = U_INVALID_CHAR_FOUND;
+ /* convert this surrogate code point */
+ /* exit this condition tree */
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ *err=U_ILLEGAL_CHAR_FOUND;
+ }
+ } else {
+ /* no more input */
+ *err = U_ZERO_ERROR;
+ }
+ } else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ *err=U_ILLEGAL_CHAR_FOUND;
+ }
+ } else {
+ /* callback(unassigned) for a BMP code point */
+ *err = U_INVALID_CHAR_FOUND;
+ }
+
+ args->converter->fromUChar32=sourceChar;
+ break;
+ }
+ } /* end if(myTargetIndex<myTargetLength) */
+ else{
+ *err =U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+
+ }/* end while(mySourceIndex<mySourceLength) */
+
+ /*
+ * the end of the input stream and detection of truncated input
+ * are handled by the framework, but for ISO-2022-KR conversion
+ * we need to be in ASCII mode at the very end
+ *
+ * conditions:
+ * successful
+ * not in ASCII mode
+ * end of input and no truncated input
+ */
+ if( U_SUCCESS(*err) &&
+ isTargetByteDBCS &&
+ args->flush && source>=sourceLimit && args->converter->fromUChar32==0
+ ) {
+ int32_t sourceIndex;
+
+ /* we are switching to ASCII */
+ isTargetByteDBCS=FALSE;
+
+ /* get the source index of the last input character */
+ /*
+ * TODO this would be simpler and more reliable if we used a pair
+ * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
+ * so that we could simply use the prevSourceIndex here;
+ * this code gives an incorrect result for the rare case of an unmatched
+ * trail surrogate that is alone in the last buffer of the text stream
+ */
+ sourceIndex=(int32_t)(source-args->source);
+ if(sourceIndex>0) {
+ --sourceIndex;
+ if( U16_IS_TRAIL(args->source[sourceIndex]) &&
+ (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
+ ) {
+ --sourceIndex;
+ }
+ } else {
+ sourceIndex=-1;
+ }
+
+ fromUWriteUInt8(
+ args->converter,
+ SHIFT_IN_STR, 1,
+ &target, (const char *)targetLimit,
+ &offsets, sourceIndex,
+ err);
+ }
+
+ /*save the state and return */
+ args->source = source;
+ args->target = (char*)target;
+ args->converter->fromUnicodeStatus = (uint32_t)isTargetByteDBCS;
+}
+
+/************************ To Unicode ***************************************/
+
+static void U_CALLCONV
+UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterToUnicodeArgs *args,
+ UErrorCode* err){
+ char const* sourceStart;
+ UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo);
+
+ UConverterToUnicodeArgs subArgs;
+ int32_t minArgsSize;
+
+ /* set up the subconverter arguments */
+ if(args->size<sizeof(UConverterToUnicodeArgs)) {
+ minArgsSize = args->size;
+ } else {
+ minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs);
+ }
+
+ uprv_memcpy(&subArgs, args, minArgsSize);
+ subArgs.size = (uint16_t)minArgsSize;
+ subArgs.converter = myData->currentConverter;
+
+ /* remember the original start of the input for offsets */
+ sourceStart = args->source;
+
+ if(myData->key != 0) {
+ /* continue with a partial escape sequence */
+ goto escape;
+ }
+
+ while(U_SUCCESS(*err) && args->source < args->sourceLimit) {
+ /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
+ subArgs.source = args->source;
+ subArgs.sourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, args->flush);
+ if(subArgs.source != subArgs.sourceLimit) {
+ /*
+ * get the current partial byte sequence
+ *
+ * it needs to be moved between the public and the subconverter
+ * so that the conversion framework, which only sees the public
+ * converter, can handle truncated and illegal input etc.
+ */
+ if(args->converter->toULength > 0) {
+ uprv_memcpy(subArgs.converter->toUBytes, args->converter->toUBytes, args->converter->toULength);
+ }
+ subArgs.converter->toULength = args->converter->toULength;
+
+ /*
+ * Convert up to the end of the input, or to before the next escape character.
+ * Does not handle conversion extensions because the preToU[] state etc.
+ * is not copied.
+ */
+ ucnv_MBCSToUnicodeWithOffsets(&subArgs, err);
+
+ if(args->offsets != NULL && sourceStart != args->source) {
+ /* update offsets to base them on the actual start of the input */
+ int32_t *offsets = args->offsets;
+ UChar *target = args->target;
+ int32_t delta = (int32_t)(args->source - sourceStart);
+ while(target < subArgs.target) {
+ if(*offsets >= 0) {
+ *offsets += delta;
+ }
+ ++offsets;
+ ++target;
+ }
+ }
+ args->source = subArgs.source;
+ args->target = subArgs.target;
+ args->offsets = subArgs.offsets;
+
+ /* copy input/error/overflow buffers */
+ if(subArgs.converter->toULength > 0) {
+ uprv_memcpy(args->converter->toUBytes, subArgs.converter->toUBytes, subArgs.converter->toULength);
+ }
+ args->converter->toULength = subArgs.converter->toULength;
+
+ if(*err == U_BUFFER_OVERFLOW_ERROR) {
+ if(subArgs.converter->UCharErrorBufferLength > 0) {
+ uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer,
+ subArgs.converter->UCharErrorBufferLength);
+ }
+ args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength;
+ subArgs.converter->UCharErrorBufferLength = 0;
+ }
+ }
+
+ if (U_FAILURE(*err) || (args->source == args->sourceLimit)) {
+ return;
+ }
+
+escape:
+ changeState_2022(args->converter,
+ &(args->source),
+ args->sourceLimit,
+ ISO_2022_KR,
+ err);
+ }
+}
+
+static void U_CALLCONV
+UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
+ UErrorCode* err){
+ char tempBuf[2];
+ const char *mySource = ( char *) args->source;
+ UChar *myTarget = args->target;
+ const char *mySourceLimit = args->sourceLimit;
+ UChar32 targetUniChar = 0x0000;
+ UChar mySourceChar = 0x0000;
+ UConverterDataISO2022* myData;
+ UConverterSharedData* sharedData ;
+ UBool useFallback;
+
+ myData=(UConverterDataISO2022*)(args->converter->extraInfo);
+ if(myData->version==1){
+ UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
+ return;
+ }
+
+ /* initialize state */
+ sharedData = myData->currentConverter->sharedData;
+ useFallback = args->converter->useFallback;
+
+ if(myData->key != 0) {
+ /* continue with a partial escape sequence */
+ goto escape;
+ } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
+ /* continue with a partial double-byte character */
+ mySourceChar = args->converter->toUBytes[0];
+ args->converter->toULength = 0;
+ goto getTrailByte;
+ }
+
+ while(mySource< mySourceLimit){
+
+ if(myTarget < args->targetLimit){
+
+ mySourceChar= (unsigned char) *mySource++;
+
+ if(mySourceChar==UCNV_SI){
+ myData->toU2022State.g = 0;
+ if (myData->isEmptySegment) {
+ myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */
+ *err = U_ILLEGAL_ESCAPE_SEQUENCE;
+ args->converter->toUCallbackReason = UCNV_IRREGULAR;
+ args->converter->toUBytes[0] = (uint8_t)mySourceChar;
+ args->converter->toULength = 1;
+ args->target = myTarget;
+ args->source = mySource;
+ return;
+ }
+ /*consume the source */
+ continue;
+ }else if(mySourceChar==UCNV_SO){
+ myData->toU2022State.g = 1;
+ myData->isEmptySegment = TRUE; /* Begin a new segment, empty so far */
+ /*consume the source */
+ continue;
+ }else if(mySourceChar==ESC_2022){
+ mySource--;
+escape:
+ myData->isEmptySegment = FALSE; /* Any invalid ESC sequences will be detected separately, so just reset this */
+ changeState_2022(args->converter,&(mySource),
+ mySourceLimit, ISO_2022_KR, err);
+ if(U_FAILURE(*err)){
+ args->target = myTarget;
+ args->source = mySource;
+ return;
+ }
+ continue;
+ }
+
+ myData->isEmptySegment = FALSE; /* Any invalid char errors will be detected separately, so just reset this */
+ if(myData->toU2022State.g == 1) {
+ if(mySource < mySourceLimit) {
+ int leadIsOk, trailIsOk;
+ uint8_t trailByte;
+getTrailByte:
+ targetUniChar = missingCharMarker;
+ trailByte = (uint8_t)*mySource;
+ /*
+ * Ticket 5691: consistent illegal sequences:
+ * - We include at least the first byte in the illegal sequence.
+ * - If any of the non-initial bytes could be the start of a character,
+ * we stop the illegal sequence before the first one of those.
+ *
+ * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
+ * an ESC/SO/SI, we report only the first byte as the illegal sequence.
+ * Otherwise we convert or report the pair of bytes.
+ */
+ leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
+ trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
+ if (leadIsOk && trailIsOk) {
+ ++mySource;
+ tempBuf[0] = (char)(mySourceChar + 0x80);
+ tempBuf[1] = (char)(trailByte + 0x80);
+ targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback);
+ mySourceChar = (mySourceChar << 8) | trailByte;
+ } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
+ /* report a pair of illegal bytes if the second byte is not a DBCS starter */
+ ++mySource;
+ /* add another bit so that the code below writes 2 bytes in case of error */
+ mySourceChar = static_cast<UChar>(0x10000 | (mySourceChar << 8) | trailByte);
+ }
+ } else {
+ args->converter->toUBytes[0] = (uint8_t)mySourceChar;
+ args->converter->toULength = 1;
+ break;
+ }
+ }
+ else if(mySourceChar <= 0x7f) {
+ targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, mySource - 1, 1, useFallback);
+ } else {
+ targetUniChar = 0xffff;
+ }
+ if(targetUniChar < 0xfffe){
+ if(args->offsets) {
+ args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
+ }
+ *(myTarget++)=(UChar)targetUniChar;
+ }
+ else {
+ /* Call the callback function*/
+ toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
+ break;
+ }
+ }
+ else{
+ *err =U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+ args->target = myTarget;
+ args->source = mySource;
+}
+
+/*************************** END ISO2022-KR *********************************/
+
+/*************************** ISO-2022-CN *********************************
+*
+* Rules for ISO-2022-CN Encoding:
+* i) The designator sequence must appear once on a line before any instance
+* of character set it designates.
+* ii) If two lines contain characters from the same character set, both lines
+* must include the designator sequence.
+* iii) Once the designator sequence is known, a shifting sequence has to be found
+* to invoke the shifting
+* iv) All lines start in ASCII and end in ASCII.
+* v) Four shifting sequences are employed for this purpose:
+*
+* Sequcence ASCII Eq Charsets
+* ---------- ------- ---------
+* SI <SI> US-ASCII
+* SO <SO> CNS-11643-1992 Plane 1, GB2312, ISO-IR-165
+* SS2 <ESC>N CNS-11643-1992 Plane 2
+* SS3 <ESC>O CNS-11643-1992 Planes 3-7
+*
+* vi)
+* SOdesignator : ESC "$" ")" finalchar_for_SO
+* SS2designator : ESC "$" "*" finalchar_for_SS2
+* SS3designator : ESC "$" "+" finalchar_for_SS3
+*
+* ESC $ ) A Indicates the bytes following SO are Chinese
+* characters as defined in GB 2312-80, until
+* another SOdesignation appears
+*
+*
+* ESC $ ) E Indicates the bytes following SO are as defined
+* in ISO-IR-165 (for details, see section 2.1),
+* until another SOdesignation appears
+*
+* ESC $ ) G Indicates the bytes following SO are as defined
+* in CNS 11643-plane-1, until another
+* SOdesignation appears
+*
+* ESC $ * H Indicates the two bytes immediately following
+* SS2 is a Chinese character as defined in CNS
+* 11643-plane-2, until another SS2designation
+* appears
+* (Meaning <ESC>N must preceed every 2 byte
+* sequence.)
+*
+* ESC $ + I Indicates the immediate two bytes following SS3
+* is a Chinese character as defined in CNS
+* 11643-plane-3, until another SS3designation
+* appears
+* (Meaning <ESC>O must preceed every 2 byte
+* sequence.)
+*
+* ESC $ + J Indicates the immediate two bytes following SS3
+* is a Chinese character as defined in CNS
+* 11643-plane-4, until another SS3designation
+* appears
+* (In English: <ESC>O must preceed every 2 byte
+* sequence.)
+*
+* ESC $ + K Indicates the immediate two bytes following SS3
+* is a Chinese character as defined in CNS
+* 11643-plane-5, until another SS3designation
+* appears
+*
+* ESC $ + L Indicates the immediate two bytes following SS3
+* is a Chinese character as defined in CNS
+* 11643-plane-6, until another SS3designation
+* appears
+*
+* ESC $ + M Indicates the immediate two bytes following SS3
+* is a Chinese character as defined in CNS
+* 11643-plane-7, until another SS3designation
+* appears
+*
+* As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and
+* has its own designation information before any Chinese characters
+* appear
+*
+*/
+
+/* The following are defined this way to make the strings truly readonly */
+static const char GB_2312_80_STR[] = "\x1B\x24\x29\x41";
+static const char ISO_IR_165_STR[] = "\x1B\x24\x29\x45";
+static const char CNS_11643_1992_Plane_1_STR[] = "\x1B\x24\x29\x47";
+static const char CNS_11643_1992_Plane_2_STR[] = "\x1B\x24\x2A\x48";
+static const char CNS_11643_1992_Plane_3_STR[] = "\x1B\x24\x2B\x49";
+static const char CNS_11643_1992_Plane_4_STR[] = "\x1B\x24\x2B\x4A";
+static const char CNS_11643_1992_Plane_5_STR[] = "\x1B\x24\x2B\x4B";
+static const char CNS_11643_1992_Plane_6_STR[] = "\x1B\x24\x2B\x4C";
+static const char CNS_11643_1992_Plane_7_STR[] = "\x1B\x24\x2B\x4D";
+
+/********************** ISO2022-CN Data **************************/
+static const char* const escSeqCharsCN[10] ={
+ SHIFT_IN_STR, /* 0 ASCII */
+ GB_2312_80_STR, /* 1 GB2312_1 */
+ ISO_IR_165_STR, /* 2 ISO_IR_165 */
+ CNS_11643_1992_Plane_1_STR,
+ CNS_11643_1992_Plane_2_STR,
+ CNS_11643_1992_Plane_3_STR,
+ CNS_11643_1992_Plane_4_STR,
+ CNS_11643_1992_Plane_5_STR,
+ CNS_11643_1992_Plane_6_STR,
+ CNS_11643_1992_Plane_7_STR
+};
+
+static void U_CALLCONV
+UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
+ UConverter *cnv = args->converter;
+ UConverterDataISO2022 *converterData;
+ ISO2022State *pFromU2022State;
+ uint8_t *target = (uint8_t *) args->target;
+ const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;
+ const UChar* source = args->source;
+ const UChar* sourceLimit = args->sourceLimit;
+ int32_t* offsets = args->offsets;
+ UChar32 sourceChar;
+ char buffer[8];
+ int32_t len;
+ int8_t choices[3];
+ int32_t choiceCount;
+ uint32_t targetValue = 0;
+ UBool useFallback;
+
+ /* set up the state */
+ converterData = (UConverterDataISO2022*)cnv->extraInfo;
+ pFromU2022State = &converterData->fromU2022State;
+
+ choiceCount = 0;
+
+ /* check if the last codepoint of previous buffer was a lead surrogate*/
+ if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
+ goto getTrail;
+ }
+
+ while( source < sourceLimit){
+ if(target < targetLimit){
+
+ sourceChar = *(source++);
+ /*check if the char is a First surrogate*/
+ if(U16_IS_SURROGATE(sourceChar)) {
+ if(U16_IS_SURROGATE_LEAD(sourceChar)) {
+getTrail:
+ /*look ahead to find the trail surrogate*/
+ if(source < sourceLimit) {
+ /* test the following code unit */
+ UChar trail=(UChar) *source;
+ if(U16_IS_TRAIL(trail)) {
+ source++;
+ sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
+ cnv->fromUChar32=0x00;
+ /* convert this supplementary code point */
+ /* exit this condition tree */
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ *err=U_ILLEGAL_CHAR_FOUND;
+ cnv->fromUChar32=sourceChar;
+ break;
+ }
+ } else {
+ /* no more input */
+ cnv->fromUChar32=sourceChar;
+ break;
+ }
+ } else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ *err=U_ILLEGAL_CHAR_FOUND;
+ cnv->fromUChar32=sourceChar;
+ break;
+ }
+ }
+
+ /* do the conversion */
+ if(sourceChar <= 0x007f ){
+ /* do not convert SO/SI/ESC */
+ if(IS_2022_CONTROL(sourceChar)) {
+ /* callback(illegal) */
+ *err=U_ILLEGAL_CHAR_FOUND;
+ cnv->fromUChar32=sourceChar;
+ break;
+ }
+
+ /* US-ASCII */
+ if(pFromU2022State->g == 0) {
+ buffer[0] = (char)sourceChar;
+ len = 1;
+ } else {
+ buffer[0] = UCNV_SI;
+ buffer[1] = (char)sourceChar;
+ len = 2;
+ pFromU2022State->g = 0;
+ choiceCount = 0;
+ }
+ if(sourceChar == CR || sourceChar == LF) {
+ /* reset the state at the end of a line */
+ uprv_memset(pFromU2022State, 0, sizeof(ISO2022State));
+ choiceCount = 0;
+ }
+ }
+ else{
+ /* convert U+0080..U+10ffff */
+ int32_t i;
+ int8_t cs, g;
+
+ if(choiceCount == 0) {
+ /* try the current SO/G1 converter first */
+ choices[0] = pFromU2022State->cs[1];
+
+ /* default to GB2312_1 if none is designated yet */
+ if(choices[0] == 0) {
+ choices[0] = GB2312_1;
+ }
+
+ if(converterData->version == 0) {
+ /* ISO-2022-CN */
+
+ /* try the other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */
+ if(choices[0] == GB2312_1) {
+ choices[1] = (int8_t)CNS_11643_1;
+ } else {
+ choices[1] = (int8_t)GB2312_1;
+ }
+
+ choiceCount = 2;
+ } else if (converterData->version == 1) {
+ /* ISO-2022-CN-EXT */
+
+ /* try one of the other converters */
+ switch(choices[0]) {
+ case GB2312_1:
+ choices[1] = (int8_t)CNS_11643_1;
+ choices[2] = (int8_t)ISO_IR_165;
+ break;
+ case ISO_IR_165:
+ choices[1] = (int8_t)GB2312_1;
+ choices[2] = (int8_t)CNS_11643_1;
+ break;
+ default: /* CNS_11643_x */
+ choices[1] = (int8_t)GB2312_1;
+ choices[2] = (int8_t)ISO_IR_165;
+ break;
+ }
+
+ choiceCount = 3;
+ } else {
+ choices[0] = (int8_t)CNS_11643_1;
+ choices[1] = (int8_t)GB2312_1;
+ }
+ }
+
+ cs = g = 0;
+ /*
+ * len==0: no mapping found yet
+ * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
+ * len>0: found a roundtrip result, done
+ */
+ len = 0;
+ /*
+ * We will turn off useFallback after finding a fallback,
+ * but we still get fallbacks from PUA code points as usual.
+ * Therefore, we will also need to check that we don't overwrite
+ * an early fallback with a later one.
+ */
+ useFallback = cnv->useFallback;
+
+ for(i = 0; i < choiceCount && len <= 0; ++i) {
+ int8_t cs0 = choices[i];
+ if(cs0 > 0) {
+ uint32_t value;
+ int32_t len2;
+ if(cs0 >= CNS_11643_0) {
+ len2 = MBCS_FROM_UCHAR32_ISO2022(
+ converterData->myConverterArray[CNS_11643],
+ sourceChar,
+ &value,
+ useFallback,
+ MBCS_OUTPUT_3);
+ if(len2 == 3 || (len2 == -3 && len == 0)) {
+ targetValue = value;
+ cs = (int8_t)(CNS_11643_0 + (value >> 16) - 0x80);
+ if(len2 >= 0) {
+ len = 2;
+ } else {
+ len = -2;
+ useFallback = FALSE;
+ }
+ if(cs == CNS_11643_1) {
+ g = 1;
+ } else if(cs == CNS_11643_2) {
+ g = 2;
+ } else /* plane 3..7 */ if(converterData->version == 1) {
+ g = 3;
+ } else {
+ /* ISO-2022-CN (without -EXT) does not support plane 3..7 */
+ len = 0;
+ }
+ }
+ } else {
+ /* GB2312_1 or ISO-IR-165 */
+ U_ASSERT(cs0<UCNV_2022_MAX_CONVERTERS);
+ len2 = MBCS_FROM_UCHAR32_ISO2022(
+ converterData->myConverterArray[cs0],
+ sourceChar,
+ &value,
+ useFallback,
+ MBCS_OUTPUT_2);
+ if(len2 == 2 || (len2 == -2 && len == 0)) {
+ targetValue = value;
+ len = len2;
+ cs = cs0;
+ g = 1;
+ useFallback = FALSE;
+ }
+ }
+ }
+ }
+
+ if(len != 0) {
+ len = 0; /* count output bytes; it must have been abs(len) == 2 */
+
+ /* write the designation sequence if necessary */
+ if(cs != pFromU2022State->cs[g]) {
+ if(cs < CNS_11643) {
+ uprv_memcpy(buffer, escSeqCharsCN[cs], 4);
+ } else {
+ U_ASSERT(cs >= CNS_11643_1);
+ uprv_memcpy(buffer, escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)], 4);
+ }
+ len = 4;
+ pFromU2022State->cs[g] = cs;
+ if(g == 1) {
+ /* changing the SO/G1 charset invalidates the choices[] */
+ choiceCount = 0;
+ }
+ }
+
+ /* write the shift sequence if necessary */
+ if(g != pFromU2022State->g) {
+ switch(g) {
+ case 1:
+ buffer[len++] = UCNV_SO;
+
+ /* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */
+ pFromU2022State->g = 1;
+ break;
+ case 2:
+ buffer[len++] = 0x1b;
+ buffer[len++] = 0x4e;
+ break;
+ default: /* case 3 */
+ buffer[len++] = 0x1b;
+ buffer[len++] = 0x4f;
+ break;
+ }
+ }
+
+ /* write the two output bytes */
+ buffer[len++] = (char)(targetValue >> 8);
+ buffer[len++] = (char)targetValue;
+ } else {
+ /* if we cannot find the character after checking all codepages
+ * then this is an error
+ */
+ *err = U_INVALID_CHAR_FOUND;
+ cnv->fromUChar32=sourceChar;
+ break;
+ }
+ }
+
+ /* output len>0 bytes in buffer[] */
+ if(len == 1) {
+ *target++ = buffer[0];
+ if(offsets) {
+ *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */
+ }
+ } else if(len == 2 && (target + 2) <= targetLimit) {
+ *target++ = buffer[0];
+ *target++ = buffer[1];
+ if(offsets) {
+ int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
+ *offsets++ = sourceIndex;
+ *offsets++ = sourceIndex;
+ }
+ } else {
+ fromUWriteUInt8(
+ cnv,
+ buffer, len,
+ &target, (const char *)targetLimit,
+ &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
+ err);
+ if(U_FAILURE(*err)) {
+ break;
+ }
+ }
+ } /* end if(myTargetIndex<myTargetLength) */
+ else{
+ *err =U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+
+ }/* end while(mySourceIndex<mySourceLength) */
+
+ /*
+ * the end of the input stream and detection of truncated input
+ * are handled by the framework, but for ISO-2022-CN conversion
+ * we need to be in ASCII mode at the very end
+ *
+ * conditions:
+ * successful
+ * not in ASCII mode
+ * end of input and no truncated input
+ */
+ if( U_SUCCESS(*err) &&
+ pFromU2022State->g!=0 &&
+ args->flush && source>=sourceLimit && cnv->fromUChar32==0
+ ) {
+ int32_t sourceIndex;
+
+ /* we are switching to ASCII */
+ pFromU2022State->g=0;
+
+ /* get the source index of the last input character */
+ /*
+ * TODO this would be simpler and more reliable if we used a pair
+ * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
+ * so that we could simply use the prevSourceIndex here;
+ * this code gives an incorrect result for the rare case of an unmatched
+ * trail surrogate that is alone in the last buffer of the text stream
+ */
+ sourceIndex=(int32_t)(source-args->source);
+ if(sourceIndex>0) {
+ --sourceIndex;
+ if( U16_IS_TRAIL(args->source[sourceIndex]) &&
+ (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
+ ) {
+ --sourceIndex;
+ }
+ } else {
+ sourceIndex=-1;
+ }
+
+ fromUWriteUInt8(
+ cnv,
+ SHIFT_IN_STR, 1,
+ &target, (const char *)targetLimit,
+ &offsets, sourceIndex,
+ err);
+ }
+
+ /*save the state and return */
+ args->source = source;
+ args->target = (char*)target;
+}
+
+
+static void U_CALLCONV
+UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
+ UErrorCode* err){
+ char tempBuf[3];
+ const char *mySource = (char *) args->source;
+ UChar *myTarget = args->target;
+ const char *mySourceLimit = args->sourceLimit;
+ uint32_t targetUniChar = 0x0000;
+ uint32_t mySourceChar = 0x0000;
+ UConverterDataISO2022* myData;
+ ISO2022State *pToU2022State;
+
+ myData=(UConverterDataISO2022*)(args->converter->extraInfo);
+ pToU2022State = &myData->toU2022State;
+
+ if(myData->key != 0) {
+ /* continue with a partial escape sequence */
+ goto escape;
+ } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
+ /* continue with a partial double-byte character */
+ mySourceChar = args->converter->toUBytes[0];
+ args->converter->toULength = 0;
+ targetUniChar = missingCharMarker;
+ goto getTrailByte;
+ }
+
+ while(mySource < mySourceLimit){
+
+ targetUniChar =missingCharMarker;
+
+ if(myTarget < args->targetLimit){
+
+ mySourceChar= (unsigned char) *mySource++;
+
+ switch(mySourceChar){
+ case UCNV_SI:
+ pToU2022State->g=0;
+ if (myData->isEmptySegment) {
+ myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */
+ *err = U_ILLEGAL_ESCAPE_SEQUENCE;
+ args->converter->toUCallbackReason = UCNV_IRREGULAR;
+ args->converter->toUBytes[0] = static_cast<uint8_t>(mySourceChar);
+ args->converter->toULength = 1;
+ args->target = myTarget;
+ args->source = mySource;
+ return;
+ }
+ continue;
+
+ case UCNV_SO:
+ if(pToU2022State->cs[1] != 0) {
+ pToU2022State->g=1;
+ myData->isEmptySegment = TRUE; /* Begin a new segment, empty so far */
+ continue;
+ } else {
+ /* illegal to have SO before a matching designator */
+ myData->isEmptySegment = FALSE; /* Handling a different error, reset this to avoid future spurious errs */
+ break;
+ }
+
+ case ESC_2022:
+ mySource--;
+escape:
+ {
+ const char * mySourceBefore = mySource;
+ int8_t toULengthBefore = args->converter->toULength;
+
+ changeState_2022(args->converter,&(mySource),
+ mySourceLimit, ISO_2022_CN,err);
+
+ /* After SO there must be at least one character before a designator (designator error handled separately) */
+ if(myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
+ *err = U_ILLEGAL_ESCAPE_SEQUENCE;
+ args->converter->toUCallbackReason = UCNV_IRREGULAR;
+ args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));
+ }
+ }
+
+ /* invalid or illegal escape sequence */
+ if(U_FAILURE(*err)){
+ args->target = myTarget;
+ args->source = mySource;
+ myData->isEmptySegment = FALSE; /* Reset to avoid future spurious errors */
+ return;
+ }
+ continue;
+
+ /* ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */
+
+ case CR:
+ case LF:
+ uprv_memset(pToU2022State, 0, sizeof(ISO2022State));
+ U_FALLTHROUGH;
+ default:
+ /* convert one or two bytes */
+ myData->isEmptySegment = FALSE;
+ if(pToU2022State->g != 0) {
+ if(mySource < mySourceLimit) {
+ UConverterSharedData *cnv;
+ StateEnum tempState;
+ int32_t tempBufLen;
+ int leadIsOk, trailIsOk;
+ uint8_t trailByte;
+getTrailByte:
+ trailByte = (uint8_t)*mySource;
+ /*
+ * Ticket 5691: consistent illegal sequences:
+ * - We include at least the first byte in the illegal sequence.
+ * - If any of the non-initial bytes could be the start of a character,
+ * we stop the illegal sequence before the first one of those.
+ *
+ * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
+ * an ESC/SO/SI, we report only the first byte as the illegal sequence.
+ * Otherwise we convert or report the pair of bytes.
+ */
+ leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
+ trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
+ if (leadIsOk && trailIsOk) {
+ ++mySource;
+ tempState = (StateEnum)pToU2022State->cs[pToU2022State->g];
+ if(tempState >= CNS_11643_0) {
+ cnv = myData->myConverterArray[CNS_11643];
+ tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0));
+ tempBuf[1] = (char) (mySourceChar);
+ tempBuf[2] = (char) trailByte;
+ tempBufLen = 3;
+
+ }else{
+ U_ASSERT(tempState<UCNV_2022_MAX_CONVERTERS);
+ cnv = myData->myConverterArray[tempState];
+ tempBuf[0] = (char) (mySourceChar);
+ tempBuf[1] = (char) trailByte;
+ tempBufLen = 2;
+ }
+ targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE);
+ mySourceChar = (mySourceChar << 8) | trailByte;
+ } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
+ /* report a pair of illegal bytes if the second byte is not a DBCS starter */
+ ++mySource;
+ /* add another bit so that the code below writes 2 bytes in case of error */
+ mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
+ }
+ if(pToU2022State->g>=2) {
+ /* return from a single-shift state to the previous one */
+ pToU2022State->g=pToU2022State->prevG;
+ }
+ } else {
+ args->converter->toUBytes[0] = (uint8_t)mySourceChar;
+ args->converter->toULength = 1;
+ goto endloop;
+ }
+ }
+ else{
+ if(mySourceChar <= 0x7f) {
+ targetUniChar = (UChar) mySourceChar;
+ }
+ }
+ break;
+ }
+ if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){
+ if(args->offsets){
+ args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
+ }
+ *(myTarget++)=(UChar)targetUniChar;
+ }
+ else if(targetUniChar > missingCharMarker){
+ /* disassemble the surrogate pair and write to output*/
+ targetUniChar-=0x0010000;
+ *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
+ if(args->offsets){
+ args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
+ }
+ ++myTarget;
+ if(myTarget< args->targetLimit){
+ *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
+ if(args->offsets){
+ args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
+ }
+ ++myTarget;
+ }else{
+ args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
+ (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
+ }
+
+ }
+ else{
+ /* Call the callback function*/
+ toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
+ break;
+ }
+ }
+ else{
+ *err =U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+endloop:
+ args->target = myTarget;
+ args->source = mySource;
+}
+#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
+
+static void U_CALLCONV
+_ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
+ UConverter *cnv = args->converter;
+ UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;
+ ISO2022State *pFromU2022State=&myConverterData->fromU2022State;
+ char *p, *subchar;
+ char buffer[8];
+ int32_t length;
+
+ subchar=(char *)cnv->subChars;
+ length=cnv->subCharLen; /* assume length==1 for most variants */
+
+ p = buffer;
+ switch(myConverterData->locale[0]){
+ case 'j':
+ {
+ int8_t cs;
+
+ if(pFromU2022State->g == 1) {
+ /* JIS7: switch from G1 to G0 */
+ pFromU2022State->g = 0;
+ *p++ = UCNV_SI;
+ }
+
+ cs = pFromU2022State->cs[0];
+ if(cs != ASCII && cs != JISX201) {
+ /* not in ASCII or JIS X 0201: switch to ASCII */
+ pFromU2022State->cs[0] = (int8_t)ASCII;
+ *p++ = '\x1b';
+ *p++ = '\x28';
+ *p++ = '\x42';
+ }
+
+ *p++ = subchar[0];
+ break;
+ }
+ case 'c':
+ if(pFromU2022State->g != 0) {
+ /* not in ASCII mode: switch to ASCII */
+ pFromU2022State->g = 0;
+ *p++ = UCNV_SI;
+ }
+ *p++ = subchar[0];
+ break;
+ case 'k':
+ if(myConverterData->version == 0) {
+ if(length == 1) {
+ if(args->converter->fromUnicodeStatus) {
+ /* in DBCS mode: switch to SBCS */
+ args->converter->fromUnicodeStatus = 0;
+ *p++ = UCNV_SI;
+ }
+ *p++ = subchar[0];
+ } else /* length == 2*/ {
+ if(!args->converter->fromUnicodeStatus) {
+ /* in SBCS mode: switch to DBCS */
+ args->converter->fromUnicodeStatus = 1;
+ *p++ = UCNV_SO;
+ }
+ *p++ = subchar[0];
+ *p++ = subchar[1];
+ }
+ break;
+ } else {
+ /* save the subconverter's substitution string */
+ uint8_t *currentSubChars = myConverterData->currentConverter->subChars;
+ int8_t currentSubCharLen = myConverterData->currentConverter->subCharLen;
+
+ /* set our substitution string into the subconverter */
+ myConverterData->currentConverter->subChars = (uint8_t *)subchar;
+ myConverterData->currentConverter->subCharLen = (int8_t)length;
+
+ /* let the subconverter write the subchar, set/retrieve fromUChar32 state */
+ args->converter = myConverterData->currentConverter;
+ myConverterData->currentConverter->fromUChar32 = cnv->fromUChar32;
+ ucnv_cbFromUWriteSub(args, 0, err);
+ cnv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
+ args->converter = cnv;
+
+ /* restore the subconverter's substitution string */
+ myConverterData->currentConverter->subChars = currentSubChars;
+ myConverterData->currentConverter->subCharLen = currentSubCharLen;
+
+ if(*err == U_BUFFER_OVERFLOW_ERROR) {
+ if(myConverterData->currentConverter->charErrorBufferLength > 0) {
+ uprv_memcpy(
+ cnv->charErrorBuffer,
+ myConverterData->currentConverter->charErrorBuffer,
+ myConverterData->currentConverter->charErrorBufferLength);
+ }
+ cnv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
+ myConverterData->currentConverter->charErrorBufferLength = 0;
+ }
+ return;
+ }
+ default:
+ /* not expected */
+ break;
+ }
+ ucnv_cbFromUWriteBytes(args,
+ buffer, (int32_t)(p - buffer),
+ offsetIndex, err);
+}
+
+/*
+ * Structure for cloning an ISO 2022 converter into a single memory block.
+ */
+struct cloneStruct
+{
+ UConverter cnv;
+ UConverter currentConverter;
+ UConverterDataISO2022 mydata;
+};
+
+
+U_CDECL_BEGIN
+
+static UConverter * U_CALLCONV
+_ISO_2022_SafeClone(
+ const UConverter *cnv,
+ void *stackBuffer,
+ int32_t *pBufferSize,
+ UErrorCode *status)
+{
+ struct cloneStruct * localClone;
+ UConverterDataISO2022 *cnvData;
+ int32_t i, size;
+
+ if (U_FAILURE(*status)){
+ return nullptr;
+ }
+
+ if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
+ *pBufferSize = (int32_t)sizeof(struct cloneStruct);
+ return NULL;
+ }
+
+ cnvData = (UConverterDataISO2022 *)cnv->extraInfo;
+ localClone = (struct cloneStruct *)stackBuffer;
+
+ /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
+
+ uprv_memcpy(&localClone->mydata, cnvData, sizeof(UConverterDataISO2022));
+ localClone->cnv.extraInfo = &localClone->mydata; /* set pointer to extra data */
+ localClone->cnv.isExtraLocal = TRUE;
+
+ /* share the subconverters */
+
+ if(cnvData->currentConverter != NULL) {
+ size = (int32_t)sizeof(UConverter);
+ localClone->mydata.currentConverter =
+ ucnv_safeClone(cnvData->currentConverter,
+ &localClone->currentConverter,
+ &size, status);
+ if(U_FAILURE(*status)) {
+ return NULL;
+ }
+ }
+
+ for(i=0; i<UCNV_2022_MAX_CONVERTERS; ++i) {
+ if(cnvData->myConverterArray[i] != NULL) {
+ ucnv_incrementRefCount(cnvData->myConverterArray[i]);
+ }
+ }
+
+ return &localClone->cnv;
+}
+
+U_CDECL_END
+
+static void U_CALLCONV
+_ISO_2022_GetUnicodeSet(const UConverter *cnv,
+ const USetAdder *sa,
+ UConverterUnicodeSet which,
+ UErrorCode *pErrorCode)
+{
+ int32_t i;
+ UConverterDataISO2022* cnvData;
+
+ if (U_FAILURE(*pErrorCode)) {
+ return;
+ }
+#ifdef U_ENABLE_GENERIC_ISO_2022
+ if (cnv->sharedData == &_ISO2022Data) {
+ /* We use UTF-8 in this case */
+ sa->addRange(sa->set, 0, 0xd7FF);
+ sa->addRange(sa->set, 0xE000, 0x10FFFF);
+ return;
+ }
+#endif
+
+ cnvData = (UConverterDataISO2022*)cnv->extraInfo;
+
+ /* open a set and initialize it with code points that are algorithmically round-tripped */
+ switch(cnvData->locale[0]){
+ case 'j':
+ /* include JIS X 0201 which is hardcoded */
+ sa->add(sa->set, 0xa5);
+ sa->add(sa->set, 0x203e);
+ if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
+ /* include Latin-1 for some variants of JP */
+ sa->addRange(sa->set, 0, 0xff);
+ } else {
+ /* include ASCII for JP */
+ sa->addRange(sa->set, 0, 0x7f);
+ }
+ if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
+ /*
+ * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0
+ * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8)
+ * use half-width Katakana.
+ * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode)
+ * half-width Katakana via the ESC ( I sequence.
+ * However, we only emit (fromUnicode) half-width Katakana according to the
+ * definition of each variant.
+ *
+ * When including fallbacks,
+ * we need to include half-width Katakana Unicode code points for all JP variants because
+ * JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana).
+ */
+ /* include half-width Katakana for JP */
+ sa->addRange(sa->set, HWKANA_START, HWKANA_END);
+ }
+ break;
+#if !UCONFIG_ONLY_HTML_CONVERSION
+ case 'c':
+ case 'z':
+ /* include ASCII for CN */
+ sa->addRange(sa->set, 0, 0x7f);
+ break;
+ case 'k':
+ /* there is only one converter for KR, and it is not in the myConverterArray[] */
+ cnvData->currentConverter->sharedData->impl->getUnicodeSet(
+ cnvData->currentConverter, sa, which, pErrorCode);
+ /* the loop over myConverterArray[] will simply not find another converter */
+ break;
+#endif
+ default:
+ break;
+ }
+
+#if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
+ if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
+ cnvData->version==0 && i==CNS_11643
+ ) {
+ /* special handling for non-EXT ISO-2022-CN: add only code points for CNS planes 1 and 2 */
+ ucnv_MBCSGetUnicodeSetForBytes(
+ cnvData->myConverterArray[i],
+ sa, UCNV_ROUNDTRIP_SET,
+ 0, 0x81, 0x82,
+ pErrorCode);
+ }
+#endif
+
+ for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
+ UConverterSetFilter filter;
+ if(cnvData->myConverterArray[i]!=NULL) {
+ if(cnvData->locale[0]=='j' && i==JISX208) {
+ /*
+ * Only add code points that map to Shift-JIS codes
+ * corresponding to JIS X 0208.
+ */
+ filter=UCNV_SET_FILTER_SJIS;
+#if !UCONFIG_ONLY_HTML_CONVERSION
+ } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
+ cnvData->version==0 && i==CNS_11643) {
+ /*
+ * Version-specific for CN:
+ * CN version 0 does not map CNS planes 3..7 although
+ * they are all available in the CNS conversion table;
+ * CN version 1 (-EXT) does map them all.
+ * The two versions create different Unicode sets.
+ */
+ filter=UCNV_SET_FILTER_2022_CN;
+ } else if(i==KSC5601) {
+ /*
+ * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)
+ * are broader than GR94.
+ */
+ filter=UCNV_SET_FILTER_GR94DBCS;
+#endif
+ } else {
+ filter=UCNV_SET_FILTER_NONE;
+ }
+ ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, filter, pErrorCode);
+ }
+ }
+
+ /*
+ * ISO 2022 converters must not convert SO/SI/ESC despite what
+ * sub-converters do by themselves.
+ * Remove these characters from the set.
+ */
+ sa->remove(sa->set, 0x0e);
+ sa->remove(sa->set, 0x0f);
+ sa->remove(sa->set, 0x1b);
+
+ /* ISO 2022 converters do not convert C1 controls either */
+ sa->removeRange(sa->set, 0x80, 0x9f);
+}
+
+static const UConverterImpl _ISO2022Impl={
+ UCNV_ISO_2022,
+
+ NULL,
+ NULL,
+
+ _ISO2022Open,
+ _ISO2022Close,
+ _ISO2022Reset,
+
+#ifdef U_ENABLE_GENERIC_ISO_2022
+ T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
+ T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
+ ucnv_fromUnicode_UTF8,
+ ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,
+#else
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+#endif
+ NULL,
+
+ NULL,
+ _ISO2022getName,
+ _ISO_2022_WriteSub,
+ _ISO_2022_SafeClone,
+ _ISO_2022_GetUnicodeSet,
+
+ NULL,
+ NULL
+};
+static const UConverterStaticData _ISO2022StaticData={
+ sizeof(UConverterStaticData),
+ "ISO_2022",
+ 2022,
+ UCNV_IBM,
+ UCNV_ISO_2022,
+ 1,
+ 3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */
+ { 0x1a, 0, 0, 0 },
+ 1,
+ FALSE,
+ FALSE,
+ 0,
+ 0,
+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
+};
+const UConverterSharedData _ISO2022Data=
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022StaticData, &_ISO2022Impl);
+
+/*************JP****************/
+static const UConverterImpl _ISO2022JPImpl={
+ UCNV_ISO_2022,
+
+ NULL,
+ NULL,
+
+ _ISO2022Open,
+ _ISO2022Close,
+ _ISO2022Reset,
+
+ UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
+ UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
+ UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
+ UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
+ NULL,
+
+ NULL,
+ _ISO2022getName,
+ _ISO_2022_WriteSub,
+ _ISO_2022_SafeClone,
+ _ISO_2022_GetUnicodeSet,
+
+ NULL,
+ NULL
+};
+static const UConverterStaticData _ISO2022JPStaticData={
+ sizeof(UConverterStaticData),
+ "ISO_2022_JP",
+ 0,
+ UCNV_IBM,
+ UCNV_ISO_2022,
+ 1,
+ 6, /* max 6 bytes per UChar: 4-byte escape sequence + DBCS */
+ { 0x1a, 0, 0, 0 },
+ 1,
+ FALSE,
+ FALSE,
+ 0,
+ 0,
+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
+};
+
+namespace {
+
+const UConverterSharedData _ISO2022JPData=
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022JPStaticData, &_ISO2022JPImpl);
+
+} // namespace
+
+#if !UCONFIG_ONLY_HTML_CONVERSION
+/************* KR ***************/
+static const UConverterImpl _ISO2022KRImpl={
+ UCNV_ISO_2022,
+
+ NULL,
+ NULL,
+
+ _ISO2022Open,
+ _ISO2022Close,
+ _ISO2022Reset,
+
+ UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
+ UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
+ UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
+ UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
+ NULL,
+
+ NULL,
+ _ISO2022getName,
+ _ISO_2022_WriteSub,
+ _ISO_2022_SafeClone,
+ _ISO_2022_GetUnicodeSet,
+
+ NULL,
+ NULL
+};
+static const UConverterStaticData _ISO2022KRStaticData={
+ sizeof(UConverterStaticData),
+ "ISO_2022_KR",
+ 0,
+ UCNV_IBM,
+ UCNV_ISO_2022,
+ 1,
+ 8, /* max 8 bytes per UChar */
+ { 0x1a, 0, 0, 0 },
+ 1,
+ FALSE,
+ FALSE,
+ 0,
+ 0,
+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
+};
+
+namespace {
+
+const UConverterSharedData _ISO2022KRData=
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022KRStaticData, &_ISO2022KRImpl);
+
+} // namespace
+
+/*************** CN ***************/
+static const UConverterImpl _ISO2022CNImpl={
+
+ UCNV_ISO_2022,
+
+ NULL,
+ NULL,
+
+ _ISO2022Open,
+ _ISO2022Close,
+ _ISO2022Reset,
+
+ UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
+ UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
+ UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
+ UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
+ NULL,
+
+ NULL,
+ _ISO2022getName,
+ _ISO_2022_WriteSub,
+ _ISO_2022_SafeClone,
+ _ISO_2022_GetUnicodeSet,
+
+ NULL,
+ NULL
+};
+static const UConverterStaticData _ISO2022CNStaticData={
+ sizeof(UConverterStaticData),
+ "ISO_2022_CN",
+ 0,
+ UCNV_IBM,
+ UCNV_ISO_2022,
+ 1,
+ 8, /* max 8 bytes per UChar: 4-byte CNS designator + 2 bytes for SS2/SS3 + DBCS */
+ { 0x1a, 0, 0, 0 },
+ 1,
+ FALSE,
+ FALSE,
+ 0,
+ 0,
+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
+};
+
+namespace {
+
+const UConverterSharedData _ISO2022CNData=
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022CNStaticData, &_ISO2022CNImpl);
+
+} // namespace
+#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
+
+#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
diff --git a/thirdparty/icu4c/common/ucnv_bld.cpp b/thirdparty/icu4c/common/ucnv_bld.cpp
new file mode 100644
index 0000000000..0e198892f1
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnv_bld.cpp
@@ -0,0 +1,1689 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ ********************************************************************
+ * COPYRIGHT:
+ * Copyright (c) 1996-2016, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ ********************************************************************
+ *
+ * ucnv_bld.cpp:
+ *
+ * Defines functions that are used in the creation/initialization/deletion
+ * of converters and related structures.
+ * uses uconv_io.h routines to access disk information
+ * is used by ucnv.h to implement public API create/delete/flushCache routines
+ * Modification History:
+ *
+ * Date Name Description
+ *
+ * 06/20/2000 helena OS/400 port changes; mostly typecast.
+ * 06/29/2000 helena Major rewrite of the callback interface.
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/putil.h"
+#include "unicode/udata.h"
+#include "unicode/ucnv.h"
+#include "unicode/uloc.h"
+#include "mutex.h"
+#include "putilimp.h"
+#include "uassert.h"
+#include "utracimp.h"
+#include "ucnv_io.h"
+#include "ucnv_bld.h"
+#include "ucnvmbcs.h"
+#include "ucnv_ext.h"
+#include "ucnv_cnv.h"
+#include "ucnv_imp.h"
+#include "uhash.h"
+#include "umutex.h"
+#include "cstring.h"
+#include "cmemory.h"
+#include "ucln_cmn.h"
+#include "ustr_cnv.h"
+
+
+#if 0
+#include <stdio.h>
+extern void UCNV_DEBUG_LOG(char *what, char *who, void *p, int l);
+#define UCNV_DEBUG_LOG(x,y,z) UCNV_DEBUG_LOG(x,y,z,__LINE__)
+#else
+# define UCNV_DEBUG_LOG(x,y,z)
+#endif
+
+static const UConverterSharedData * const
+converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={
+ NULL, NULL,
+
+#if UCONFIG_NO_LEGACY_CONVERSION
+ NULL,
+#else
+ &_MBCSData,
+#endif
+
+ &_Latin1Data,
+ &_UTF8Data, &_UTF16BEData, &_UTF16LEData,
+#if UCONFIG_ONLY_HTML_CONVERSION
+ NULL, NULL,
+#else
+ &_UTF32BEData, &_UTF32LEData,
+#endif
+ NULL,
+
+#if UCONFIG_NO_LEGACY_CONVERSION
+ NULL,
+#else
+ &_ISO2022Data,
+#endif
+
+#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION
+ NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL,
+#else
+ &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6,
+ &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19,
+ &_HZData,
+#endif
+
+#if UCONFIG_ONLY_HTML_CONVERSION
+ NULL,
+#else
+ &_SCSUData,
+#endif
+
+
+#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION
+ NULL,
+#else
+ &_ISCIIData,
+#endif
+
+ &_ASCIIData,
+#if UCONFIG_ONLY_HTML_CONVERSION
+ NULL, NULL, &_UTF16Data, NULL, NULL, NULL,
+#else
+ &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData,
+#endif
+
+#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION
+ NULL,
+#else
+ &_CompoundTextData
+#endif
+};
+
+/* Please keep this in binary sorted order for getAlgorithmicTypeFromName.
+ Also the name should be in lower case and all spaces, dashes and underscores
+ removed
+*/
+static struct {
+ const char *name;
+ const UConverterType type;
+} const cnvNameType[] = {
+#if !UCONFIG_ONLY_HTML_CONVERSION
+ { "bocu1", UCNV_BOCU1 },
+ { "cesu8", UCNV_CESU8 },
+#endif
+#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
+ { "hz",UCNV_HZ },
+#endif
+#if !UCONFIG_ONLY_HTML_CONVERSION
+ { "imapmailboxname", UCNV_IMAP_MAILBOX },
+#endif
+#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
+ { "iscii", UCNV_ISCII },
+#endif
+#if !UCONFIG_NO_LEGACY_CONVERSION
+ { "iso2022", UCNV_ISO_2022 },
+#endif
+ { "iso88591", UCNV_LATIN_1 },
+#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
+ { "lmbcs1", UCNV_LMBCS_1 },
+ { "lmbcs11",UCNV_LMBCS_11 },
+ { "lmbcs16",UCNV_LMBCS_16 },
+ { "lmbcs17",UCNV_LMBCS_17 },
+ { "lmbcs18",UCNV_LMBCS_18 },
+ { "lmbcs19",UCNV_LMBCS_19 },
+ { "lmbcs2", UCNV_LMBCS_2 },
+ { "lmbcs3", UCNV_LMBCS_3 },
+ { "lmbcs4", UCNV_LMBCS_4 },
+ { "lmbcs5", UCNV_LMBCS_5 },
+ { "lmbcs6", UCNV_LMBCS_6 },
+ { "lmbcs8", UCNV_LMBCS_8 },
+#endif
+#if !UCONFIG_ONLY_HTML_CONVERSION
+ { "scsu", UCNV_SCSU },
+#endif
+ { "usascii", UCNV_US_ASCII },
+ { "utf16", UCNV_UTF16 },
+ { "utf16be", UCNV_UTF16_BigEndian },
+ { "utf16le", UCNV_UTF16_LittleEndian },
+#if U_IS_BIG_ENDIAN
+ { "utf16oppositeendian", UCNV_UTF16_LittleEndian },
+ { "utf16platformendian", UCNV_UTF16_BigEndian },
+#else
+ { "utf16oppositeendian", UCNV_UTF16_BigEndian},
+ { "utf16platformendian", UCNV_UTF16_LittleEndian },
+#endif
+#if !UCONFIG_ONLY_HTML_CONVERSION
+ { "utf32", UCNV_UTF32 },
+ { "utf32be", UCNV_UTF32_BigEndian },
+ { "utf32le", UCNV_UTF32_LittleEndian },
+#if U_IS_BIG_ENDIAN
+ { "utf32oppositeendian", UCNV_UTF32_LittleEndian },
+ { "utf32platformendian", UCNV_UTF32_BigEndian },
+#else
+ { "utf32oppositeendian", UCNV_UTF32_BigEndian },
+ { "utf32platformendian", UCNV_UTF32_LittleEndian },
+#endif
+#endif
+#if !UCONFIG_ONLY_HTML_CONVERSION
+ { "utf7", UCNV_UTF7 },
+#endif
+ { "utf8", UCNV_UTF8 },
+#if !UCONFIG_ONLY_HTML_CONVERSION
+ { "x11compoundtext", UCNV_COMPOUND_TEXT}
+#endif
+};
+
+
+/*initializes some global variables */
+static UHashtable *SHARED_DATA_HASHTABLE = NULL;
+static icu::UMutex cnvCacheMutex;
+/* Note: the global mutex is used for */
+/* reference count updates. */
+
+static const char **gAvailableConverters = NULL;
+static uint16_t gAvailableConverterCount = 0;
+static icu::UInitOnce gAvailableConvertersInitOnce = U_INITONCE_INITIALIZER;
+
+#if !U_CHARSET_IS_UTF8
+
+/* This contains the resolved converter name. So no further alias lookup is needed again. */
+static char gDefaultConverterNameBuffer[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; /* +1 for NULL */
+static const char *gDefaultConverterName = NULL;
+
+/*
+If the default converter is an algorithmic converter, this is the cached value.
+We don't cache a full UConverter and clone it because ucnv_clone doesn't have
+less overhead than an algorithmic open. We don't cache non-algorithmic converters
+because ucnv_flushCache must be able to unload the default converter and its table.
+*/
+static const UConverterSharedData *gDefaultAlgorithmicSharedData = NULL;
+
+/* Does gDefaultConverterName have a converter option and require extra parsing? */
+static UBool gDefaultConverterContainsOption;
+
+#endif /* !U_CHARSET_IS_UTF8 */
+
+static const char DATA_TYPE[] = "cnv";
+
+/* ucnv_flushAvailableConverterCache. This is only called from ucnv_cleanup().
+ * If it is ever to be called from elsewhere, synchronization
+ * will need to be considered.
+ */
+static void
+ucnv_flushAvailableConverterCache() {
+ gAvailableConverterCount = 0;
+ if (gAvailableConverters) {
+ uprv_free((char **)gAvailableConverters);
+ gAvailableConverters = NULL;
+ }
+ gAvailableConvertersInitOnce.reset();
+}
+
+/* ucnv_cleanup - delete all storage held by the converter cache, except any */
+/* in use by open converters. */
+/* Not thread safe. */
+/* Not supported API. */
+static UBool U_CALLCONV ucnv_cleanup(void) {
+ ucnv_flushCache();
+ if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
+ uhash_close(SHARED_DATA_HASHTABLE);
+ SHARED_DATA_HASHTABLE = NULL;
+ }
+
+ /* Isn't called from flushCache because other threads may have preexisting references to the table. */
+ ucnv_flushAvailableConverterCache();
+
+#if !U_CHARSET_IS_UTF8
+ gDefaultConverterName = NULL;
+ gDefaultConverterNameBuffer[0] = 0;
+ gDefaultConverterContainsOption = FALSE;
+ gDefaultAlgorithmicSharedData = NULL;
+#endif
+
+ return (SHARED_DATA_HASHTABLE == NULL);
+}
+
+U_CAPI void U_EXPORT2
+ucnv_enableCleanup(void) {
+ ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup);
+}
+
+static UBool U_CALLCONV
+isCnvAcceptable(void * /*context*/,
+ const char * /*type*/, const char * /*name*/,
+ const UDataInfo *pInfo) {
+ return (UBool)(
+ pInfo->size>=20 &&
+ pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
+ pInfo->charsetFamily==U_CHARSET_FAMILY &&
+ pInfo->sizeofUChar==U_SIZEOF_UCHAR &&
+ pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */
+ pInfo->dataFormat[1]==0x6e &&
+ pInfo->dataFormat[2]==0x76 &&
+ pInfo->dataFormat[3]==0x74 &&
+ pInfo->formatVersion[0]==6); /* Everything will be version 6 */
+}
+
+/**
+ * Un flatten shared data from a UDATA..
+ */
+static UConverterSharedData*
+ucnv_data_unFlattenClone(UConverterLoadArgs *pArgs, UDataMemory *pData, UErrorCode *status)
+{
+ /* UDataInfo info; -- necessary only if some converters have different formatVersion */
+ const uint8_t *raw = (const uint8_t *)udata_getMemory(pData);
+ const UConverterStaticData *source = (const UConverterStaticData *) raw;
+ UConverterSharedData *data;
+ UConverterType type = (UConverterType)source->conversionType;
+
+ if(U_FAILURE(*status))
+ return NULL;
+
+ if( (uint16_t)type >= UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES ||
+ converterData[type] == NULL ||
+ !converterData[type]->isReferenceCounted ||
+ converterData[type]->referenceCounter != 1 ||
+ source->structSize != sizeof(UConverterStaticData))
+ {
+ *status = U_INVALID_TABLE_FORMAT;
+ return NULL;
+ }
+
+ data = (UConverterSharedData *)uprv_malloc(sizeof(UConverterSharedData));
+ if(data == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+
+ /* copy initial values from the static structure for this type */
+ uprv_memcpy(data, converterData[type], sizeof(UConverterSharedData));
+
+ data->staticData = source;
+
+ data->sharedDataCached = FALSE;
+
+ /* fill in fields from the loaded data */
+ data->dataMemory = (void*)pData; /* for future use */
+
+ if(data->impl->load != NULL) {
+ data->impl->load(data, pArgs, raw + source->structSize, status);
+ if(U_FAILURE(*status)) {
+ uprv_free(data);
+ return NULL;
+ }
+ }
+ return data;
+}
+
+/*Takes an alias name gets an actual converter file name
+ *goes to disk and opens it.
+ *allocates the memory and returns a new UConverter object
+ */
+static UConverterSharedData *createConverterFromFile(UConverterLoadArgs *pArgs, UErrorCode * err)
+{
+ UDataMemory *data;
+ UConverterSharedData *sharedData;
+
+ UTRACE_ENTRY_OC(UTRACE_UCNV_LOAD);
+
+ if (U_FAILURE (*err)) {
+ UTRACE_EXIT_STATUS(*err);
+ return NULL;
+ }
+
+ UTRACE_DATA2(UTRACE_OPEN_CLOSE, "load converter %s from package %s", pArgs->name, pArgs->pkg);
+
+ data = udata_openChoice(pArgs->pkg, DATA_TYPE, pArgs->name, isCnvAcceptable, NULL, err);
+ if(U_FAILURE(*err))
+ {
+ UTRACE_EXIT_STATUS(*err);
+ return NULL;
+ }
+
+ sharedData = ucnv_data_unFlattenClone(pArgs, data, err);
+ if(U_FAILURE(*err))
+ {
+ udata_close(data);
+ UTRACE_EXIT_STATUS(*err);
+ return NULL;
+ }
+
+ /*
+ * TODO Store pkg in a field in the shared data so that delta-only converters
+ * can load base converters from the same package.
+ * If the pkg name is longer than the field, then either do not load the converter
+ * in the first place, or just set the pkg field to "".
+ */
+
+ UTRACE_EXIT_PTR_STATUS(sharedData, *err);
+ return sharedData;
+}
+
+/*returns a converter type from a string
+ */
+static const UConverterSharedData *
+getAlgorithmicTypeFromName(const char *realName)
+{
+ uint32_t mid, start, limit;
+ uint32_t lastMid;
+ int result;
+ char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
+
+ /* Lower case and remove ignoreable characters. */
+ ucnv_io_stripForCompare(strippedName, realName);
+
+ /* do a binary search for the alias */
+ start = 0;
+ limit = UPRV_LENGTHOF(cnvNameType);
+ mid = limit;
+ lastMid = UINT32_MAX;
+
+ for (;;) {
+ mid = (uint32_t)((start + limit) / 2);
+ if (lastMid == mid) { /* Have we moved? */
+ break; /* We haven't moved, and it wasn't found. */
+ }
+ lastMid = mid;
+ result = uprv_strcmp(strippedName, cnvNameType[mid].name);
+
+ if (result < 0) {
+ limit = mid;
+ } else if (result > 0) {
+ start = mid;
+ } else {
+ return converterData[cnvNameType[mid].type];
+ }
+ }
+
+ return NULL;
+}
+
+/*
+* Based on the number of known converters, this determines how many times larger
+* the shared data hash table should be. When on small platforms, or just a couple
+* of converters are used, this number should be 2. When memory is plentiful, or
+* when ucnv_countAvailable is ever used with a lot of available converters,
+* this should be 4.
+* Larger numbers reduce the number of hash collisions, but use more memory.
+*/
+#define UCNV_CACHE_LOAD_FACTOR 2
+
+/* Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */
+/* Will always be called with the cnvCacheMutex alrady being held */
+/* by the calling function. */
+/* Stores the shared data in the SHARED_DATA_HASHTABLE
+ * @param data The shared data
+ */
+static void
+ucnv_shareConverterData(UConverterSharedData * data)
+{
+ UErrorCode err = U_ZERO_ERROR;
+ /*Lazy evaluates the Hashtable itself */
+ /*void *sanity = NULL;*/
+
+ if (SHARED_DATA_HASHTABLE == NULL)
+ {
+ SHARED_DATA_HASHTABLE = uhash_openSize(uhash_hashChars, uhash_compareChars, NULL,
+ ucnv_io_countKnownConverters(&err)*UCNV_CACHE_LOAD_FACTOR,
+ &err);
+ ucnv_enableCleanup();
+
+ if (U_FAILURE(err))
+ return;
+ }
+
+ /* ### check to see if the element is not already there! */
+
+ /*
+ sanity = ucnv_getSharedConverterData (data->staticData->name);
+ if(sanity != NULL)
+ {
+ UCNV_DEBUG_LOG("put:overwrite!",data->staticData->name,sanity);
+ }
+ UCNV_DEBUG_LOG("put:chk",data->staticData->name,sanity);
+ */
+
+ /* Mark it shared */
+ data->sharedDataCached = TRUE;
+
+ uhash_put(SHARED_DATA_HASHTABLE,
+ (void*) data->staticData->name, /* Okay to cast away const as long as
+ keyDeleter == NULL */
+ data,
+ &err);
+ UCNV_DEBUG_LOG("put", data->staticData->name,data);
+
+}
+
+/* Look up a converter name in the shared data cache. */
+/* cnvCacheMutex must be held by the caller to protect the hash table. */
+/* gets the shared data from the SHARED_DATA_HASHTABLE (might return NULL if it isn't there)
+ * @param name The name of the shared data
+ * @return the shared data from the SHARED_DATA_HASHTABLE
+ */
+static UConverterSharedData *
+ucnv_getSharedConverterData(const char *name)
+{
+ /*special case when no Table has yet been created we return NULL */
+ if (SHARED_DATA_HASHTABLE == NULL)
+ {
+ return NULL;
+ }
+ else
+ {
+ UConverterSharedData *rc;
+
+ rc = (UConverterSharedData*)uhash_get(SHARED_DATA_HASHTABLE, name);
+ UCNV_DEBUG_LOG("get",name,rc);
+ return rc;
+ }
+}
+
+/*frees the string of memory blocks associates with a sharedConverter
+ *if and only if the referenceCounter == 0
+ */
+/* Deletes (frees) the Shared data it's passed. first it checks the referenceCounter to
+ * see if anyone is using it, if not it frees all the memory stemming from sharedConverterData and
+ * returns TRUE,
+ * otherwise returns FALSE
+ * @param sharedConverterData The shared data
+ * @return if not it frees all the memory stemming from sharedConverterData and
+ * returns TRUE, otherwise returns FALSE
+ */
+static UBool
+ucnv_deleteSharedConverterData(UConverterSharedData * deadSharedData)
+{
+ UTRACE_ENTRY_OC(UTRACE_UCNV_UNLOAD);
+ UTRACE_DATA2(UTRACE_OPEN_CLOSE, "unload converter %s shared data %p", deadSharedData->staticData->name, deadSharedData);
+
+ if (deadSharedData->referenceCounter > 0) {
+ UTRACE_EXIT_VALUE((int32_t)FALSE);
+ return FALSE;
+ }
+
+ if (deadSharedData->impl->unload != NULL) {
+ deadSharedData->impl->unload(deadSharedData);
+ }
+
+ if(deadSharedData->dataMemory != NULL)
+ {
+ UDataMemory *data = (UDataMemory*)deadSharedData->dataMemory;
+ udata_close(data);
+ }
+
+ uprv_free(deadSharedData);
+
+ UTRACE_EXIT_VALUE((int32_t)TRUE);
+ return TRUE;
+}
+
+/**
+ * Load a non-algorithmic converter.
+ * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex).
+ */
+UConverterSharedData *
+ucnv_load(UConverterLoadArgs *pArgs, UErrorCode *err) {
+ UConverterSharedData *mySharedConverterData;
+
+ if(err == NULL || U_FAILURE(*err)) {
+ return NULL;
+ }
+
+ if(pArgs->pkg != NULL && *pArgs->pkg != 0) {
+ /* application-provided converters are not currently cached */
+ return createConverterFromFile(pArgs, err);
+ }
+
+ mySharedConverterData = ucnv_getSharedConverterData(pArgs->name);
+ if (mySharedConverterData == NULL)
+ {
+ /*Not cached, we need to stream it in from file */
+ mySharedConverterData = createConverterFromFile(pArgs, err);
+ if (U_FAILURE (*err) || (mySharedConverterData == NULL))
+ {
+ return NULL;
+ }
+ else if (!pArgs->onlyTestIsLoadable)
+ {
+ /* share it with other library clients */
+ ucnv_shareConverterData(mySharedConverterData);
+ }
+ }
+ else
+ {
+ /* The data for this converter was already in the cache. */
+ /* Update the reference counter on the shared data: one more client */
+ mySharedConverterData->referenceCounter++;
+ }
+
+ return mySharedConverterData;
+}
+
+/**
+ * Unload a non-algorithmic converter.
+ * It must be sharedData->isReferenceCounted
+ * and this function must be called inside umtx_lock(&cnvCacheMutex).
+ */
+U_CAPI void
+ucnv_unload(UConverterSharedData *sharedData) {
+ if(sharedData != NULL) {
+ if (sharedData->referenceCounter > 0) {
+ sharedData->referenceCounter--;
+ }
+
+ if((sharedData->referenceCounter <= 0)&&(sharedData->sharedDataCached == FALSE)) {
+ ucnv_deleteSharedConverterData(sharedData);
+ }
+ }
+}
+
+U_CFUNC void
+ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData)
+{
+ if(sharedData != NULL && sharedData->isReferenceCounted) {
+ umtx_lock(&cnvCacheMutex);
+ ucnv_unload(sharedData);
+ umtx_unlock(&cnvCacheMutex);
+ }
+}
+
+U_CFUNC void
+ucnv_incrementRefCount(UConverterSharedData *sharedData)
+{
+ if(sharedData != NULL && sharedData->isReferenceCounted) {
+ umtx_lock(&cnvCacheMutex);
+ sharedData->referenceCounter++;
+ umtx_unlock(&cnvCacheMutex);
+ }
+}
+
+/*
+ * *pPieces must be initialized.
+ * The name without options will be copied to pPieces->cnvName.
+ * The locale and options will be copied to pPieces only if present in inName,
+ * otherwise the existing values in pPieces remain.
+ * *pArgs will be set to the pPieces values.
+ */
+static void
+parseConverterOptions(const char *inName,
+ UConverterNamePieces *pPieces,
+ UConverterLoadArgs *pArgs,
+ UErrorCode *err)
+{
+ char *cnvName = pPieces->cnvName;
+ char c;
+ int32_t len = 0;
+
+ pArgs->name=inName;
+ pArgs->locale=pPieces->locale;
+ pArgs->options=pPieces->options;
+
+ /* copy the converter name itself to cnvName */
+ while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) {
+ if (++len>=UCNV_MAX_CONVERTER_NAME_LENGTH) {
+ *err = U_ILLEGAL_ARGUMENT_ERROR; /* bad name */
+ pPieces->cnvName[0]=0;
+ return;
+ }
+ *cnvName++=c;
+ inName++;
+ }
+ *cnvName=0;
+ pArgs->name=pPieces->cnvName;
+
+ /* parse options. No more name copying should occur. */
+ while((c=*inName)!=0) {
+ if(c==UCNV_OPTION_SEP_CHAR) {
+ ++inName;
+ }
+
+ /* inName is behind an option separator */
+ if(uprv_strncmp(inName, "locale=", 7)==0) {
+ /* do not modify locale itself in case we have multiple locale options */
+ char *dest=pPieces->locale;
+
+ /* copy the locale option value */
+ inName+=7;
+ len=0;
+ while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) {
+ ++inName;
+
+ if(++len>=ULOC_FULLNAME_CAPACITY) {
+ *err=U_ILLEGAL_ARGUMENT_ERROR; /* bad name */
+ pPieces->locale[0]=0;
+ return;
+ }
+
+ *dest++=c;
+ }
+ *dest=0;
+ } else if(uprv_strncmp(inName, "version=", 8)==0) {
+ /* copy the version option value into bits 3..0 of pPieces->options */
+ inName+=8;
+ c=*inName;
+ if(c==0) {
+ pArgs->options=(pPieces->options&=~UCNV_OPTION_VERSION);
+ return;
+ } else if((uint8_t)(c-'0')<10) {
+ pArgs->options=pPieces->options=(pPieces->options&~UCNV_OPTION_VERSION)|(uint32_t)(c-'0');
+ ++inName;
+ }
+ } else if(uprv_strncmp(inName, "swaplfnl", 8)==0) {
+ inName+=8;
+ pArgs->options=(pPieces->options|=UCNV_OPTION_SWAP_LFNL);
+ /* add processing for new options here with another } else if(uprv_strncmp(inName, "option-name=", XX)==0) { */
+ } else {
+ /* ignore any other options until we define some */
+ while(((c = *inName++) != 0) && (c != UCNV_OPTION_SEP_CHAR)) {
+ }
+ if(c==0) {
+ return;
+ }
+ }
+ }
+}
+
+/*Logic determines if the converter is Algorithmic AND/OR cached
+ *depending on that:
+ * -we either go to get data from disk and cache it (Data=TRUE, Cached=False)
+ * -Get it from a Hashtable (Data=X, Cached=TRUE)
+ * -Call dataConverter initializer (Data=TRUE, Cached=TRUE)
+ * -Call AlgorithmicConverter initializer (Data=FALSE, Cached=TRUE)
+ */
+U_CFUNC UConverterSharedData *
+ucnv_loadSharedData(const char *converterName,
+ UConverterNamePieces *pPieces,
+ UConverterLoadArgs *pArgs,
+ UErrorCode * err) {
+ UConverterNamePieces stackPieces;
+ UConverterLoadArgs stackArgs;
+ UConverterSharedData *mySharedConverterData = NULL;
+ UErrorCode internalErrorCode = U_ZERO_ERROR;
+ UBool mayContainOption = TRUE;
+ UBool checkForAlgorithmic = TRUE;
+
+ if (U_FAILURE (*err)) {
+ return NULL;
+ }
+
+ if(pPieces == NULL) {
+ if(pArgs != NULL) {
+ /*
+ * Bad: We may set pArgs pointers to stackPieces fields
+ * which will be invalid after this function returns.
+ */
+ *err = U_INTERNAL_PROGRAM_ERROR;
+ return NULL;
+ }
+ pPieces = &stackPieces;
+ }
+ if(pArgs == NULL) {
+ uprv_memset(&stackArgs, 0, sizeof(stackArgs));
+ stackArgs.size = (int32_t)sizeof(stackArgs);
+ pArgs = &stackArgs;
+ }
+
+ pPieces->cnvName[0] = 0;
+ pPieces->locale[0] = 0;
+ pPieces->options = 0;
+
+ pArgs->name = converterName;
+ pArgs->locale = pPieces->locale;
+ pArgs->options = pPieces->options;
+
+ /* In case "name" is NULL we want to open the default converter. */
+ if (converterName == NULL) {
+#if U_CHARSET_IS_UTF8
+ pArgs->name = "UTF-8";
+ return (UConverterSharedData *)converterData[UCNV_UTF8];
+#else
+ /* Call ucnv_getDefaultName first to query the name from the OS. */
+ pArgs->name = ucnv_getDefaultName();
+ if (pArgs->name == NULL) {
+ *err = U_MISSING_RESOURCE_ERROR;
+ return NULL;
+ }
+ mySharedConverterData = (UConverterSharedData *)gDefaultAlgorithmicSharedData;
+ checkForAlgorithmic = FALSE;
+ mayContainOption = gDefaultConverterContainsOption;
+ /* the default converter name is already canonical */
+#endif
+ }
+ else if(UCNV_FAST_IS_UTF8(converterName)) {
+ /* fastpath for UTF-8 */
+ pArgs->name = "UTF-8";
+ return (UConverterSharedData *)converterData[UCNV_UTF8];
+ }
+ else {
+ /* separate the converter name from the options */
+ parseConverterOptions(converterName, pPieces, pArgs, err);
+ if (U_FAILURE(*err)) {
+ /* Very bad name used. */
+ return NULL;
+ }
+
+ /* get the canonical converter name */
+ pArgs->name = ucnv_io_getConverterName(pArgs->name, &mayContainOption, &internalErrorCode);
+ if (U_FAILURE(internalErrorCode) || pArgs->name == NULL) {
+ /*
+ * set the input name in case the converter was added
+ * without updating the alias table, or when there is no alias table
+ */
+ pArgs->name = pPieces->cnvName;
+ } else if (internalErrorCode == U_AMBIGUOUS_ALIAS_WARNING) {
+ *err = U_AMBIGUOUS_ALIAS_WARNING;
+ }
+ }
+
+ /* separate the converter name from the options */
+ if(mayContainOption && pArgs->name != pPieces->cnvName) {
+ parseConverterOptions(pArgs->name, pPieces, pArgs, err);
+ }
+
+ /* get the shared data for an algorithmic converter, if it is one */
+ if (checkForAlgorithmic) {
+ mySharedConverterData = (UConverterSharedData *)getAlgorithmicTypeFromName(pArgs->name);
+ }
+ if (mySharedConverterData == NULL)
+ {
+ /* it is a data-based converter, get its shared data. */
+ /* Hold the cnvCacheMutex through the whole process of checking the */
+ /* converter data cache, and adding new entries to the cache */
+ /* to prevent other threads from modifying the cache during the */
+ /* process. */
+ pArgs->nestedLoads=1;
+ pArgs->pkg=NULL;
+
+ umtx_lock(&cnvCacheMutex);
+ mySharedConverterData = ucnv_load(pArgs, err);
+ umtx_unlock(&cnvCacheMutex);
+ if (U_FAILURE (*err) || (mySharedConverterData == NULL))
+ {
+ return NULL;
+ }
+ }
+
+ return mySharedConverterData;
+}
+
+U_CAPI UConverter *
+ucnv_createConverter(UConverter *myUConverter, const char *converterName, UErrorCode * err)
+{
+ UConverterNamePieces stackPieces;
+ UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER;
+ UConverterSharedData *mySharedConverterData;
+
+ UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN);
+
+ if(U_SUCCESS(*err)) {
+ UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open converter %s", converterName);
+
+ mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err);
+
+ myUConverter = ucnv_createConverterFromSharedData(
+ myUConverter, mySharedConverterData,
+ &stackArgs,
+ err);
+
+ if(U_SUCCESS(*err)) {
+ UTRACE_EXIT_PTR_STATUS(myUConverter, *err);
+ return myUConverter;
+ }
+ }
+
+ /* exit with error */
+ UTRACE_EXIT_STATUS(*err);
+ return NULL;
+}
+
+U_CFUNC UBool
+ucnv_canCreateConverter(const char *converterName, UErrorCode *err) {
+ UConverter myUConverter;
+ UConverterNamePieces stackPieces;
+ UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER;
+ UConverterSharedData *mySharedConverterData;
+
+ UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN);
+
+ if(U_SUCCESS(*err)) {
+ UTRACE_DATA1(UTRACE_OPEN_CLOSE, "test if can open converter %s", converterName);
+
+ stackArgs.onlyTestIsLoadable=TRUE;
+ mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err);
+ ucnv_createConverterFromSharedData(
+ &myUConverter, mySharedConverterData,
+ &stackArgs,
+ err);
+ ucnv_unloadSharedDataIfReady(mySharedConverterData);
+ }
+
+ UTRACE_EXIT_STATUS(*err);
+ return U_SUCCESS(*err);
+}
+
+UConverter *
+ucnv_createAlgorithmicConverter(UConverter *myUConverter,
+ UConverterType type,
+ const char *locale, uint32_t options,
+ UErrorCode *err) {
+ UConverter *cnv;
+ const UConverterSharedData *sharedData;
+ UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER;
+
+ UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_ALGORITHMIC);
+ UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open algorithmic converter type %d", (int32_t)type);
+
+ if(type<0 || UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES<=type) {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR);
+ return NULL;
+ }
+
+ sharedData = converterData[type];
+ if(sharedData == NULL || sharedData->isReferenceCounted) {
+ /* not a valid type, or not an algorithmic converter */
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR);
+ return NULL;
+ }
+
+ stackArgs.name = "";
+ stackArgs.options = options;
+ stackArgs.locale=locale;
+ cnv = ucnv_createConverterFromSharedData(
+ myUConverter, (UConverterSharedData *)sharedData,
+ &stackArgs, err);
+
+ UTRACE_EXIT_PTR_STATUS(cnv, *err);
+ return cnv;
+}
+
+U_CFUNC UConverter*
+ucnv_createConverterFromPackage(const char *packageName, const char *converterName, UErrorCode * err)
+{
+ UConverter *myUConverter;
+ UConverterSharedData *mySharedConverterData;
+ UConverterNamePieces stackPieces;
+ UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER;
+
+ UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_PACKAGE);
+
+ if(U_FAILURE(*err)) {
+ UTRACE_EXIT_STATUS(*err);
+ return NULL;
+ }
+
+ UTRACE_DATA2(UTRACE_OPEN_CLOSE, "open converter %s from package %s", converterName, packageName);
+
+ /* first, get the options out of the converterName string */
+ stackPieces.cnvName[0] = 0;
+ stackPieces.locale[0] = 0;
+ stackPieces.options = 0;
+ parseConverterOptions(converterName, &stackPieces, &stackArgs, err);
+ if (U_FAILURE(*err)) {
+ /* Very bad name used. */
+ UTRACE_EXIT_STATUS(*err);
+ return NULL;
+ }
+ stackArgs.nestedLoads=1;
+ stackArgs.pkg=packageName;
+
+ /* open the data, unflatten the shared structure */
+ mySharedConverterData = createConverterFromFile(&stackArgs, err);
+
+ if (U_FAILURE(*err)) {
+ UTRACE_EXIT_STATUS(*err);
+ return NULL;
+ }
+
+ /* create the actual converter */
+ myUConverter = ucnv_createConverterFromSharedData(NULL, mySharedConverterData, &stackArgs, err);
+
+ if (U_FAILURE(*err)) {
+ ucnv_close(myUConverter);
+ UTRACE_EXIT_STATUS(*err);
+ return NULL;
+ }
+
+ UTRACE_EXIT_PTR_STATUS(myUConverter, *err);
+ return myUConverter;
+}
+
+
+U_CFUNC UConverter*
+ucnv_createConverterFromSharedData(UConverter *myUConverter,
+ UConverterSharedData *mySharedConverterData,
+ UConverterLoadArgs *pArgs,
+ UErrorCode *err)
+{
+ UBool isCopyLocal;
+
+ if(U_FAILURE(*err)) {
+ ucnv_unloadSharedDataIfReady(mySharedConverterData);
+ return myUConverter;
+ }
+ if(myUConverter == NULL)
+ {
+ myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
+ if(myUConverter == NULL)
+ {
+ *err = U_MEMORY_ALLOCATION_ERROR;
+ ucnv_unloadSharedDataIfReady(mySharedConverterData);
+ return NULL;
+ }
+ isCopyLocal = FALSE;
+ } else {
+ isCopyLocal = TRUE;
+ }
+
+ /* initialize the converter */
+ uprv_memset(myUConverter, 0, sizeof(UConverter));
+ myUConverter->isCopyLocal = isCopyLocal;
+ /*myUConverter->isExtraLocal = FALSE;*/ /* Set by the memset call */
+ myUConverter->sharedData = mySharedConverterData;
+ myUConverter->options = pArgs->options;
+ if(!pArgs->onlyTestIsLoadable) {
+ myUConverter->preFromUFirstCP = U_SENTINEL;
+ myUConverter->fromCharErrorBehaviour = UCNV_TO_U_DEFAULT_CALLBACK;
+ myUConverter->fromUCharErrorBehaviour = UCNV_FROM_U_DEFAULT_CALLBACK;
+ myUConverter->toUnicodeStatus = mySharedConverterData->toUnicodeStatus;
+ myUConverter->maxBytesPerUChar = mySharedConverterData->staticData->maxBytesPerChar;
+ myUConverter->subChar1 = mySharedConverterData->staticData->subChar1;
+ myUConverter->subCharLen = mySharedConverterData->staticData->subCharLen;
+ myUConverter->subChars = (uint8_t *)myUConverter->subUChars;
+ uprv_memcpy(myUConverter->subChars, mySharedConverterData->staticData->subChar, myUConverter->subCharLen);
+ myUConverter->toUCallbackReason = UCNV_ILLEGAL; /* default reason to invoke (*fromCharErrorBehaviour) */
+ }
+
+ if(mySharedConverterData->impl->open != NULL) {
+ mySharedConverterData->impl->open(myUConverter, pArgs, err);
+ if(U_FAILURE(*err) && !pArgs->onlyTestIsLoadable) {
+ /* don't ucnv_close() if onlyTestIsLoadable because not fully initialized */
+ ucnv_close(myUConverter);
+ return NULL;
+ }
+ }
+
+ return myUConverter;
+}
+
+/*Frees all shared immutable objects that aren't referred to (reference count = 0)
+ */
+U_CAPI int32_t U_EXPORT2
+ucnv_flushCache ()
+{
+ UConverterSharedData *mySharedData = NULL;
+ int32_t pos;
+ int32_t tableDeletedNum = 0;
+ const UHashElement *e;
+ /*UErrorCode status = U_ILLEGAL_ARGUMENT_ERROR;*/
+ int32_t i, remaining;
+
+ UTRACE_ENTRY_OC(UTRACE_UCNV_FLUSH_CACHE);
+
+ /* Close the default converter without creating a new one so that everything will be flushed. */
+ u_flushDefaultConverter();
+
+ /*if shared data hasn't even been lazy evaluated yet
+ * return 0
+ */
+ if (SHARED_DATA_HASHTABLE == NULL) {
+ UTRACE_EXIT_VALUE((int32_t)0);
+ return 0;
+ }
+
+ /*creates an enumeration to iterate through every element in the
+ * table
+ *
+ * Synchronization: holding cnvCacheMutex will prevent any other thread from
+ * accessing or modifying the hash table during the iteration.
+ * The reference count of an entry may be decremented by
+ * ucnv_close while the iteration is in process, but this is
+ * benign. It can't be incremented (in ucnv_createConverter())
+ * because the sequence of looking up in the cache + incrementing
+ * is protected by cnvCacheMutex.
+ */
+ umtx_lock(&cnvCacheMutex);
+ /*
+ * double loop: A delta/extension-only converter has a pointer to its base table's
+ * shared data; the first iteration of the outer loop may see the delta converter
+ * before the base converter, and unloading the delta converter may get the base
+ * converter's reference counter down to 0.
+ */
+ i = 0;
+ do {
+ remaining = 0;
+ pos = UHASH_FIRST;
+ while ((e = uhash_nextElement (SHARED_DATA_HASHTABLE, &pos)) != NULL)
+ {
+ mySharedData = (UConverterSharedData *) e->value.pointer;
+ /*deletes only if reference counter == 0 */
+ if (mySharedData->referenceCounter == 0)
+ {
+ tableDeletedNum++;
+
+ UCNV_DEBUG_LOG("del",mySharedData->staticData->name,mySharedData);
+
+ uhash_removeElement(SHARED_DATA_HASHTABLE, e);
+ mySharedData->sharedDataCached = FALSE;
+ ucnv_deleteSharedConverterData (mySharedData);
+ } else {
+ ++remaining;
+ }
+ }
+ } while(++i == 1 && remaining > 0);
+ umtx_unlock(&cnvCacheMutex);
+
+ UTRACE_DATA1(UTRACE_INFO, "ucnv_flushCache() exits with %d converters remaining", remaining);
+
+ UTRACE_EXIT_VALUE(tableDeletedNum);
+ return tableDeletedNum;
+}
+
+/* available converters list --------------------------------------------------- */
+
+static void U_CALLCONV initAvailableConvertersList(UErrorCode &errCode) {
+ U_ASSERT(gAvailableConverterCount == 0);
+ U_ASSERT(gAvailableConverters == NULL);
+
+ ucnv_enableCleanup();
+ UEnumeration *allConvEnum = ucnv_openAllNames(&errCode);
+ int32_t allConverterCount = uenum_count(allConvEnum, &errCode);
+ if (U_FAILURE(errCode)) {
+ return;
+ }
+
+ /* We can't have more than "*converterTable" converters to open */
+ gAvailableConverters = (const char **) uprv_malloc(allConverterCount * sizeof(char*));
+ if (!gAvailableConverters) {
+ errCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ /* Open the default converter to make sure that it has first dibs in the hash table. */
+ UErrorCode localStatus = U_ZERO_ERROR;
+ UConverter tempConverter;
+ ucnv_close(ucnv_createConverter(&tempConverter, NULL, &localStatus));
+
+ gAvailableConverterCount = 0;
+
+ for (int32_t idx = 0; idx < allConverterCount; idx++) {
+ localStatus = U_ZERO_ERROR;
+ const char *converterName = uenum_next(allConvEnum, NULL, &localStatus);
+ if (ucnv_canCreateConverter(converterName, &localStatus)) {
+ gAvailableConverters[gAvailableConverterCount++] = converterName;
+ }
+ }
+
+ uenum_close(allConvEnum);
+}
+
+
+static UBool haveAvailableConverterList(UErrorCode *pErrorCode) {
+ umtx_initOnce(gAvailableConvertersInitOnce, &initAvailableConvertersList, *pErrorCode);
+ return U_SUCCESS(*pErrorCode);
+}
+
+U_CFUNC uint16_t
+ucnv_bld_countAvailableConverters(UErrorCode *pErrorCode) {
+ if (haveAvailableConverterList(pErrorCode)) {
+ return gAvailableConverterCount;
+ }
+ return 0;
+}
+
+U_CFUNC const char *
+ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) {
+ if (haveAvailableConverterList(pErrorCode)) {
+ if (n < gAvailableConverterCount) {
+ return gAvailableConverters[n];
+ }
+ *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ }
+ return NULL;
+}
+
+/* default converter name --------------------------------------------------- */
+
+#if !U_CHARSET_IS_UTF8
+/*
+Copy the canonical converter name.
+ucnv_getDefaultName must be thread safe, which can call this function.
+
+ucnv_setDefaultName calls this function and it doesn't have to be
+thread safe because there is no reliable/safe way to reset the
+converter in use in all threads. If you did reset the converter, you
+would not be sure that retrieving a default converter for one string
+would be the same type of default converter for a successive string.
+Since the name is a returned via ucnv_getDefaultName without copying,
+you shouldn't be modifying or deleting the string from a separate thread.
+*/
+static inline void
+internalSetName(const char *name, UErrorCode *status) {
+ UConverterNamePieces stackPieces;
+ UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER;
+ int32_t length=(int32_t)(uprv_strlen(name));
+ UBool containsOption = (UBool)(uprv_strchr(name, UCNV_OPTION_SEP_CHAR) != NULL);
+ const UConverterSharedData *algorithmicSharedData;
+
+ stackArgs.name = name;
+ if(containsOption) {
+ stackPieces.cnvName[0] = 0;
+ stackPieces.locale[0] = 0;
+ stackPieces.options = 0;
+ parseConverterOptions(name, &stackPieces, &stackArgs, status);
+ if(U_FAILURE(*status)) {
+ return;
+ }
+ }
+ algorithmicSharedData = getAlgorithmicTypeFromName(stackArgs.name);
+
+ umtx_lock(&cnvCacheMutex);
+
+ gDefaultAlgorithmicSharedData = algorithmicSharedData;
+ gDefaultConverterContainsOption = containsOption;
+ uprv_memcpy(gDefaultConverterNameBuffer, name, length);
+ gDefaultConverterNameBuffer[length]=0;
+
+ /* gDefaultConverterName MUST be the last global var set by this function. */
+ /* It is the variable checked in ucnv_getDefaultName() to see if initialization is required. */
+ // But there is nothing here preventing that from being reordered, either by the compiler
+ // or hardware. I'm adding the mutex to ucnv_getDefaultName for now. UMTX_CHECK is not enough.
+ // -- Andy
+ gDefaultConverterName = gDefaultConverterNameBuffer;
+
+ ucnv_enableCleanup();
+
+ umtx_unlock(&cnvCacheMutex);
+}
+#endif
+
+/*
+ * In order to be really thread-safe, the get function would have to take
+ * a buffer parameter and copy the current string inside a mutex block.
+ * This implementation only tries to be really thread-safe while
+ * setting the name.
+ * It assumes that setting a pointer is atomic.
+ */
+
+U_CAPI const char* U_EXPORT2
+ucnv_getDefaultName() {
+#if U_CHARSET_IS_UTF8
+ return "UTF-8";
+#else
+ /* local variable to be thread-safe */
+ const char *name;
+
+ /*
+ Concurrent calls to ucnv_getDefaultName must be thread safe,
+ but ucnv_setDefaultName is not thread safe.
+ */
+ {
+ icu::Mutex lock(&cnvCacheMutex);
+ name = gDefaultConverterName;
+ }
+ if(name==NULL) {
+ UErrorCode errorCode = U_ZERO_ERROR;
+ UConverter *cnv = NULL;
+
+ name = uprv_getDefaultCodepage();
+
+ /* if the name is there, test it out and get the canonical name with options */
+ if(name != NULL) {
+ cnv = ucnv_open(name, &errorCode);
+ if(U_SUCCESS(errorCode) && cnv != NULL) {
+ name = ucnv_getName(cnv, &errorCode);
+ }
+ }
+
+ if(name == NULL || name[0] == 0
+ || U_FAILURE(errorCode) || cnv == NULL
+ || uprv_strlen(name)>=sizeof(gDefaultConverterNameBuffer))
+ {
+ /* Panic time, let's use a fallback. */
+#if (U_CHARSET_FAMILY == U_ASCII_FAMILY)
+ name = "US-ASCII";
+ /* there is no 'algorithmic' converter for EBCDIC */
+#elif U_PLATFORM == U_PF_OS390
+ name = "ibm-1047_P100-1995" UCNV_SWAP_LFNL_OPTION_STRING;
+#else
+ name = "ibm-37_P100-1995";
+#endif
+ }
+
+ internalSetName(name, &errorCode);
+
+ /* The close may make the current name go away. */
+ ucnv_close(cnv);
+ }
+
+ return name;
+#endif
+}
+
+#if U_CHARSET_IS_UTF8
+U_CAPI void U_EXPORT2 ucnv_setDefaultName(const char *) {}
+#else
+/*
+This function is not thread safe, and it can't be thread safe.
+See internalSetName or the API reference for details.
+*/
+U_CAPI void U_EXPORT2
+ucnv_setDefaultName(const char *converterName) {
+ if(converterName==NULL) {
+ /* reset to the default codepage */
+ gDefaultConverterName=NULL;
+ } else {
+ UErrorCode errorCode = U_ZERO_ERROR;
+ UConverter *cnv = NULL;
+ const char *name = NULL;
+
+ /* if the name is there, test it out and get the canonical name with options */
+ cnv = ucnv_open(converterName, &errorCode);
+ if(U_SUCCESS(errorCode) && cnv != NULL) {
+ name = ucnv_getName(cnv, &errorCode);
+ }
+
+ if(U_SUCCESS(errorCode) && name!=NULL) {
+ internalSetName(name, &errorCode);
+ }
+ /* else this converter is bad to use. Don't change it to a bad value. */
+
+ /* The close may make the current name go away. */
+ ucnv_close(cnv);
+
+ /* reset the converter cache */
+ u_flushDefaultConverter();
+ }
+}
+#endif
+
+/* data swapping ------------------------------------------------------------ */
+
+/* most of this might belong more properly into ucnvmbcs.c, but that is so large */
+
+#if !UCONFIG_NO_LEGACY_CONVERSION
+
+U_CAPI int32_t U_EXPORT2
+ucnv_swap(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const UDataInfo *pInfo;
+ int32_t headerSize;
+
+ const uint8_t *inBytes;
+ uint8_t *outBytes;
+
+ uint32_t offset, count, staticDataSize;
+ int32_t size;
+
+ const UConverterStaticData *inStaticData;
+ UConverterStaticData *outStaticData;
+
+ const _MBCSHeader *inMBCSHeader;
+ _MBCSHeader *outMBCSHeader;
+ _MBCSHeader mbcsHeader;
+ uint32_t mbcsHeaderLength;
+ UBool noFromU=FALSE;
+
+ uint8_t outputType;
+
+ int32_t maxFastUChar, mbcsIndexLength;
+
+ const int32_t *inExtIndexes;
+ int32_t extOffset;
+
+ /* udata_swapDataHeader checks the arguments */
+ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ /* check data format and format version */
+ pInfo=(const UDataInfo *)((const char *)inData+4);
+ if(!(
+ pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */
+ pInfo->dataFormat[1]==0x6e &&
+ pInfo->dataFormat[2]==0x76 &&
+ pInfo->dataFormat[3]==0x74 &&
+ pInfo->formatVersion[0]==6 &&
+ pInfo->formatVersion[1]>=2
+ )) {
+ udata_printError(ds, "ucnv_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not recognized as an ICU .cnv conversion table\n",
+ pInfo->dataFormat[0], pInfo->dataFormat[1],
+ pInfo->dataFormat[2], pInfo->dataFormat[3],
+ pInfo->formatVersion[0], pInfo->formatVersion[1]);
+ *pErrorCode=U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+
+ inBytes=(const uint8_t *)inData+headerSize;
+ outBytes=(uint8_t *)outData+headerSize;
+
+ /* read the initial UConverterStaticData structure after the UDataInfo header */
+ inStaticData=(const UConverterStaticData *)inBytes;
+ outStaticData=(UConverterStaticData *)outBytes;
+
+ if(length<0) {
+ staticDataSize=ds->readUInt32(inStaticData->structSize);
+ } else {
+ length-=headerSize;
+ if( length<(int32_t)sizeof(UConverterStaticData) ||
+ (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize))
+ ) {
+ udata_printError(ds, "ucnv_swap(): too few bytes (%d after header) for an ICU .cnv conversion table\n",
+ length);
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ }
+
+ if(length>=0) {
+ /* swap the static data */
+ if(inStaticData!=outStaticData) {
+ uprv_memcpy(outStaticData, inStaticData, staticDataSize);
+ }
+
+ ds->swapArray32(ds, &inStaticData->structSize, 4,
+ &outStaticData->structSize, pErrorCode);
+ ds->swapArray32(ds, &inStaticData->codepage, 4,
+ &outStaticData->codepage, pErrorCode);
+
+ ds->swapInvChars(ds, inStaticData->name, (int32_t)uprv_strlen(inStaticData->name),
+ outStaticData->name, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ udata_printError(ds, "ucnv_swap(): error swapping converter name\n");
+ return 0;
+ }
+ }
+
+ inBytes+=staticDataSize;
+ outBytes+=staticDataSize;
+ if(length>=0) {
+ length-=(int32_t)staticDataSize;
+ }
+
+ /* check for supported conversionType values */
+ if(inStaticData->conversionType==UCNV_MBCS) {
+ /* swap MBCS data */
+ inMBCSHeader=(const _MBCSHeader *)inBytes;
+ outMBCSHeader=(_MBCSHeader *)outBytes;
+
+ if(0<=length && length<(int32_t)sizeof(_MBCSHeader)) {
+ udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n",
+ length);
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) {
+ mbcsHeaderLength=MBCS_HEADER_V4_LENGTH;
+ } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 &&
+ ((mbcsHeader.options=ds->readUInt32(inMBCSHeader->options))&
+ MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0
+ ) {
+ mbcsHeaderLength=mbcsHeader.options&MBCS_OPT_LENGTH_MASK;
+ noFromU=(UBool)((mbcsHeader.options&MBCS_OPT_NO_FROM_U)!=0);
+ } else {
+ udata_printError(ds, "ucnv_swap(): unsupported _MBCSHeader.version %d.%d\n",
+ inMBCSHeader->version[0], inMBCSHeader->version[1]);
+ *pErrorCode=U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+
+ uprv_memcpy(mbcsHeader.version, inMBCSHeader->version, 4);
+ mbcsHeader.countStates= ds->readUInt32(inMBCSHeader->countStates);
+ mbcsHeader.countToUFallbacks= ds->readUInt32(inMBCSHeader->countToUFallbacks);
+ mbcsHeader.offsetToUCodeUnits= ds->readUInt32(inMBCSHeader->offsetToUCodeUnits);
+ mbcsHeader.offsetFromUTable= ds->readUInt32(inMBCSHeader->offsetFromUTable);
+ mbcsHeader.offsetFromUBytes= ds->readUInt32(inMBCSHeader->offsetFromUBytes);
+ mbcsHeader.flags= ds->readUInt32(inMBCSHeader->flags);
+ mbcsHeader.fromUBytesLength= ds->readUInt32(inMBCSHeader->fromUBytesLength);
+ /* mbcsHeader.options have been read above */
+
+ extOffset=(int32_t)(mbcsHeader.flags>>8);
+ outputType=(uint8_t)mbcsHeader.flags;
+ if(noFromU && outputType==MBCS_OUTPUT_1) {
+ udata_printError(ds, "ucnv_swap(): unsupported combination of makeconv --small with SBCS\n");
+ *pErrorCode=U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+
+ /* make sure that the output type is known */
+ switch(outputType) {
+ case MBCS_OUTPUT_1:
+ case MBCS_OUTPUT_2:
+ case MBCS_OUTPUT_3:
+ case MBCS_OUTPUT_4:
+ case MBCS_OUTPUT_3_EUC:
+ case MBCS_OUTPUT_4_EUC:
+ case MBCS_OUTPUT_2_SISO:
+ case MBCS_OUTPUT_EXT_ONLY:
+ /* OK */
+ break;
+ default:
+ udata_printError(ds, "ucnv_swap(): unsupported MBCS output type 0x%x\n",
+ outputType);
+ *pErrorCode=U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+
+ /* calculate the length of the MBCS data */
+
+ /*
+ * utf8Friendly MBCS files (mbcsHeader.version 4.3)
+ * contain an additional mbcsIndex table:
+ * uint16_t[(maxFastUChar+1)>>6];
+ * where maxFastUChar=((mbcsHeader.version[2]<<8)|0xff).
+ */
+ maxFastUChar=0;
+ mbcsIndexLength=0;
+ if( outputType!=MBCS_OUTPUT_EXT_ONLY && outputType!=MBCS_OUTPUT_1 &&
+ mbcsHeader.version[1]>=3 && (maxFastUChar=mbcsHeader.version[2])!=0
+ ) {
+ maxFastUChar=(maxFastUChar<<8)|0xff;
+ mbcsIndexLength=((maxFastUChar+1)>>6)*2; /* number of bytes */
+ }
+
+ if(extOffset==0) {
+ size=(int32_t)(mbcsHeader.offsetFromUBytes+mbcsIndexLength);
+ if(!noFromU) {
+ size+=(int32_t)mbcsHeader.fromUBytesLength;
+ }
+
+ /* avoid compiler warnings - not otherwise necessary, and the value does not matter */
+ inExtIndexes=NULL;
+ } else {
+ /* there is extension data after the base data, see ucnv_ext.h */
+ if(length>=0 && length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) {
+ udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n",
+ length);
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ inExtIndexes=(const int32_t *)(inBytes+extOffset);
+ size=extOffset+udata_readInt32(ds, inExtIndexes[UCNV_EXT_SIZE]);
+ }
+
+ if(length>=0) {
+ if(length<size) {
+ udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n",
+ length);
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ /* copy the data for inaccessible bytes */
+ if(inBytes!=outBytes) {
+ uprv_memcpy(outBytes, inBytes, size);
+ }
+
+ /* swap the MBCSHeader, except for the version field */
+ count=mbcsHeaderLength*4;
+ ds->swapArray32(ds, &inMBCSHeader->countStates, count-4,
+ &outMBCSHeader->countStates, pErrorCode);
+
+ if(outputType==MBCS_OUTPUT_EXT_ONLY) {
+ /*
+ * extension-only file,
+ * contains a base name instead of normal base table data
+ */
+
+ /* swap the base name, between the header and the extension data */
+ const char *inBaseName=(const char *)inBytes+count;
+ char *outBaseName=(char *)outBytes+count;
+ ds->swapInvChars(ds, inBaseName, (int32_t)uprv_strlen(inBaseName),
+ outBaseName, pErrorCode);
+ } else {
+ /* normal file with base table data */
+
+ /* swap the state table, 1kB per state */
+ offset=count;
+ count=mbcsHeader.countStates*1024;
+ ds->swapArray32(ds, inBytes+offset, (int32_t)count,
+ outBytes+offset, pErrorCode);
+
+ /* swap the toUFallbacks[] */
+ offset+=count;
+ count=mbcsHeader.countToUFallbacks*8;
+ ds->swapArray32(ds, inBytes+offset, (int32_t)count,
+ outBytes+offset, pErrorCode);
+
+ /* swap the unicodeCodeUnits[] */
+ offset=mbcsHeader.offsetToUCodeUnits;
+ count=mbcsHeader.offsetFromUTable-offset;
+ ds->swapArray16(ds, inBytes+offset, (int32_t)count,
+ outBytes+offset, pErrorCode);
+
+ /* offset to the stage 1 table, independent of the outputType */
+ offset=mbcsHeader.offsetFromUTable;
+
+ if(outputType==MBCS_OUTPUT_1) {
+ /* SBCS: swap the fromU tables, all 16 bits wide */
+ count=(mbcsHeader.offsetFromUBytes-offset)+mbcsHeader.fromUBytesLength;
+ ds->swapArray16(ds, inBytes+offset, (int32_t)count,
+ outBytes+offset, pErrorCode);
+ } else {
+ /* otherwise: swap the stage tables separately */
+
+ /* stage 1 table: uint16_t[0x440 or 0x40] */
+ if(inStaticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) {
+ count=0x440*2; /* for all of Unicode */
+ } else {
+ count=0x40*2; /* only BMP */
+ }
+ ds->swapArray16(ds, inBytes+offset, (int32_t)count,
+ outBytes+offset, pErrorCode);
+
+ /* stage 2 table: uint32_t[] */
+ offset+=count;
+ count=mbcsHeader.offsetFromUBytes-offset;
+ ds->swapArray32(ds, inBytes+offset, (int32_t)count,
+ outBytes+offset, pErrorCode);
+
+ /* stage 3/result bytes: sometimes uint16_t[] or uint32_t[] */
+ offset=mbcsHeader.offsetFromUBytes;
+ count= noFromU ? 0 : mbcsHeader.fromUBytesLength;
+ switch(outputType) {
+ case MBCS_OUTPUT_2:
+ case MBCS_OUTPUT_3_EUC:
+ case MBCS_OUTPUT_2_SISO:
+ ds->swapArray16(ds, inBytes+offset, (int32_t)count,
+ outBytes+offset, pErrorCode);
+ break;
+ case MBCS_OUTPUT_4:
+ ds->swapArray32(ds, inBytes+offset, (int32_t)count,
+ outBytes+offset, pErrorCode);
+ break;
+ default:
+ /* just uint8_t[], nothing to swap */
+ break;
+ }
+
+ if(mbcsIndexLength!=0) {
+ offset+=count;
+ count=mbcsIndexLength;
+ ds->swapArray16(ds, inBytes+offset, (int32_t)count,
+ outBytes+offset, pErrorCode);
+ }
+ }
+ }
+
+ if(extOffset!=0) {
+ /* swap the extension data */
+ inBytes+=extOffset;
+ outBytes+=extOffset;
+
+ /* swap toUTable[] */
+ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_INDEX]);
+ length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_LENGTH]);
+ ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode);
+
+ /* swap toUUChars[] */
+ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_INDEX]);
+ length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_LENGTH]);
+ ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode);
+
+ /* swap fromUTableUChars[] */
+ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_UCHARS_INDEX]);
+ length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_LENGTH]);
+ ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode);
+
+ /* swap fromUTableValues[] */
+ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_VALUES_INDEX]);
+ /* same length as for fromUTableUChars[] */
+ ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode);
+
+ /* no need to swap fromUBytes[] */
+
+ /* swap fromUStage12[] */
+ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_INDEX]);
+ length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_LENGTH]);
+ ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode);
+
+ /* swap fromUStage3[] */
+ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_INDEX]);
+ length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_LENGTH]);
+ ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode);
+
+ /* swap fromUStage3b[] */
+ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_INDEX]);
+ length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_LENGTH]);
+ ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode);
+
+ /* swap indexes[] */
+ length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_INDEXES_LENGTH]);
+ ds->swapArray32(ds, inBytes, length*4, outBytes, pErrorCode);
+ }
+ }
+ } else {
+ udata_printError(ds, "ucnv_swap(): unknown conversionType=%d!=UCNV_MBCS\n",
+ inStaticData->conversionType);
+ *pErrorCode=U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+
+ return headerSize+(int32_t)staticDataSize+size;
+}
+
+#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
+
+#endif
diff --git a/thirdparty/icu4c/common/ucnv_bld.h b/thirdparty/icu4c/common/ucnv_bld.h
new file mode 100644
index 0000000000..43e6c09ac0
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnv_bld.h
@@ -0,0 +1,296 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1999-2015 International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+*
+* ucnv_bld.h:
+* Contains internal data structure definitions
+* Created by Bertrand A. Damiba
+*
+* Change history:
+*
+* 06/29/2000 helena Major rewrite of the callback APIs.
+*/
+
+#ifndef UCNV_BLD_H
+#define UCNV_BLD_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/ucnv.h"
+#include "unicode/ucnv_err.h"
+#include "unicode/utf16.h"
+#include "ucnv_cnv.h"
+#include "ucnvmbcs.h"
+#include "ucnv_ext.h"
+#include "udataswp.h"
+
+/* size of the overflow buffers in UConverter, enough for escaping callbacks */
+#define UCNV_ERROR_BUFFER_LENGTH 32
+
+/* at most 4 bytes per substitution character (part of .cnv file format! see UConverterStaticData) */
+#define UCNV_MAX_SUBCHAR_LEN 4
+
+/* at most 8 bytes per character in toUBytes[] (UTF-8 uses up to 6) */
+#define UCNV_MAX_CHAR_LEN 8
+
+/* converter options bits */
+#define UCNV_OPTION_VERSION 0xf
+#define UCNV_OPTION_SWAP_LFNL 0x10
+
+#define UCNV_GET_VERSION(cnv) ((cnv)->options&UCNV_OPTION_VERSION)
+
+U_CDECL_BEGIN /* We must declare the following as 'extern "C"' so that if ucnv
+ itself is compiled under C++, the linkage of the funcptrs will
+ work.
+ */
+
+union UConverterTable {
+ UConverterMBCSTable mbcs;
+};
+
+typedef union UConverterTable UConverterTable;
+
+struct UConverterImpl;
+typedef struct UConverterImpl UConverterImpl;
+
+/** values for the unicodeMask */
+#define UCNV_HAS_SUPPLEMENTARY 1
+#define UCNV_HAS_SURROGATES 2
+
+typedef struct UConverterStaticData { /* +offset: size */
+ uint32_t structSize; /* +0: 4 Size of this structure */
+
+ char name
+ [UCNV_MAX_CONVERTER_NAME_LENGTH]; /* +4: 60 internal name of the converter- invariant chars */
+
+ int32_t codepage; /* +64: 4 codepage # (now IBM-$codepage) */
+
+ int8_t platform; /* +68: 1 platform of the converter (only IBM now) */
+ int8_t conversionType; /* +69: 1 conversion type */
+
+ int8_t minBytesPerChar; /* +70: 1 Minimum # bytes per char in this codepage */
+ int8_t maxBytesPerChar; /* +71: 1 Maximum # bytes output per UChar in this codepage */
+
+ uint8_t subChar[UCNV_MAX_SUBCHAR_LEN]; /* +72: 4 [note: 4 and 8 byte boundary] */
+ int8_t subCharLen; /* +76: 1 */
+
+ uint8_t hasToUnicodeFallback; /* +77: 1 UBool needs to be changed to UBool to be consistent across platform */
+ uint8_t hasFromUnicodeFallback; /* +78: 1 */
+ uint8_t unicodeMask; /* +79: 1 bit 0: has supplementary bit 1: has single surrogates */
+ uint8_t subChar1; /* +80: 1 single-byte substitution character for IBM MBCS (0 if none) */
+ uint8_t reserved[19]; /* +81: 19 to round out the structure */
+ /* total size: 100 */
+} UConverterStaticData;
+
+/*
+ * Defines the UConverterSharedData struct,
+ * the immutable, shared part of UConverter.
+ */
+struct UConverterSharedData {
+ uint32_t structSize; /* Size of this structure */
+ uint32_t referenceCounter; /* used to count number of clients, unused for static/immutable SharedData */
+
+ const void *dataMemory; /* from udata_openChoice() - for cleanup */
+
+ const UConverterStaticData *staticData; /* pointer to the static (non changing) data. */
+
+ UBool sharedDataCached; /* true: shared data is in cache, don't destroy on ucnv_close() if 0 ref. false: shared data isn't in the cache, do attempt to clean it up if the ref is 0 */
+ /** If false, then referenceCounter is not used. Must not change after initialization. */
+ UBool isReferenceCounted;
+
+ const UConverterImpl *impl; /* vtable-style struct of mostly function pointers */
+
+ /*initial values of some members of the mutable part of object */
+ uint32_t toUnicodeStatus;
+
+ /*
+ * Shared data structures currently come in two flavors:
+ * - readonly for built-in algorithmic converters
+ * - allocated for MBCS, with a pointer to an allocated UConverterTable
+ * which always has a UConverterMBCSTable
+ *
+ * To eliminate one allocation, I am making the UConverterMBCSTable
+ * a member of the shared data.
+ *
+ * markus 2003-nov-07
+ */
+ UConverterMBCSTable mbcs;
+};
+
+/** UConverterSharedData initializer for static, non-reference-counted converters. */
+#define UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(pStaticData, pImpl) \
+ { \
+ sizeof(UConverterSharedData), ~((uint32_t)0), \
+ NULL, pStaticData, false, false, pImpl, \
+ 0, UCNV_MBCS_TABLE_INITIALIZER \
+ }
+
+/* Defines a UConverter, the lightweight mutable part the user sees */
+
+struct UConverter {
+ /*
+ * Error function pointer called when conversion issues
+ * occur during a ucnv_fromUnicode call
+ */
+ void (U_EXPORT2 *fromUCharErrorBehaviour) (const void *context,
+ UConverterFromUnicodeArgs *args,
+ const UChar *codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode *);
+ /*
+ * Error function pointer called when conversion issues
+ * occur during a ucnv_toUnicode call
+ */
+ void (U_EXPORT2 *fromCharErrorBehaviour) (const void *context,
+ UConverterToUnicodeArgs *args,
+ const char *codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode *);
+
+ /*
+ * Pointer to additional data that depends on the converter type.
+ * Used by ISO 2022, SCSU, GB 18030 converters, possibly more.
+ */
+ void *extraInfo;
+
+ const void *fromUContext;
+ const void *toUContext;
+
+ /*
+ * Pointer to charset bytes for substitution string if subCharLen>0,
+ * or pointer to Unicode string (UChar *) if subCharLen<0.
+ * subCharLen==0 is equivalent to using a skip callback.
+ * If the pointer is !=subUChars then it is allocated with
+ * UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR bytes.
+ * The subUChars field is declared as UChar[] not uint8_t[] to
+ * guarantee alignment for UChars.
+ */
+ uint8_t *subChars;
+
+ UConverterSharedData *sharedData; /* Pointer to the shared immutable part of the converter object */
+
+ uint32_t options; /* options flags from UConverterOpen, may contain additional bits */
+
+ UBool sharedDataIsCached; /* true: shared data is in cache, don't destroy on ucnv_close() if 0 ref. false: shared data isn't in the cache, do attempt to clean it up if the ref is 0 */
+ UBool isCopyLocal; /* true if UConverter is not owned and not released in ucnv_close() (stack-allocated, safeClone(), etc.) */
+ UBool isExtraLocal; /* true if extraInfo is not owned and not released in ucnv_close() (stack-allocated, safeClone(), etc.) */
+
+ UBool useFallback;
+ int8_t toULength; /* number of bytes in toUBytes */
+ uint8_t toUBytes[UCNV_MAX_CHAR_LEN-1];/* more "toU status"; keeps the bytes of the current character */
+ uint32_t toUnicodeStatus; /* Used to internalize stream status information */
+ int32_t mode;
+ uint32_t fromUnicodeStatus;
+
+ /*
+ * More fromUnicode() status. Serves 3 purposes:
+ * - keeps a lead surrogate between buffers (similar to toUBytes[])
+ * - keeps a lead surrogate at the end of the stream,
+ * which the framework handles as truncated input
+ * - if the fromUnicode() implementation returns to the framework
+ * (ucnv.c ucnv_fromUnicode()), then the framework calls the callback
+ * for this code point
+ */
+ UChar32 fromUChar32;
+
+ /*
+ * value for ucnv_getMaxCharSize()
+ *
+ * usually simply copied from the static data, but ucnvmbcs.c modifies
+ * the value depending on the converter type and options
+ */
+ int8_t maxBytesPerUChar;
+
+ int8_t subCharLen; /* length of the codepage specific character sequence */
+ int8_t invalidCharLength;
+ int8_t charErrorBufferLength; /* number of valid bytes in charErrorBuffer */
+
+ int8_t invalidUCharLength;
+ int8_t UCharErrorBufferLength; /* number of valid UChars in charErrorBuffer */
+
+ uint8_t subChar1; /* single-byte substitution character if different from subChar */
+ UBool useSubChar1;
+ char invalidCharBuffer[UCNV_MAX_CHAR_LEN]; /* bytes from last error/callback situation */
+ uint8_t charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* codepage output from Error functions */
+ UChar subUChars[UCNV_MAX_SUBCHAR_LEN/U_SIZEOF_UCHAR]; /* see subChars documentation */
+
+ UChar invalidUCharBuffer[U16_MAX_LENGTH]; /* UChars from last error/callback situation */
+ UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* unicode output from Error functions */
+
+ /* fields for conversion extension */
+
+ /* store previous UChars/chars to continue partial matches */
+ UChar32 preFromUFirstCP; /* >=0: partial match */
+ UChar preFromU[UCNV_EXT_MAX_UCHARS];
+ char preToU[UCNV_EXT_MAX_BYTES];
+ int8_t preFromULength, preToULength; /* negative: replay */
+ int8_t preToUFirstLength; /* length of first character */
+
+ /* new fields for ICU 4.0 */
+ UConverterCallbackReason toUCallbackReason; /* (*fromCharErrorBehaviour) reason, set when error is detected */
+};
+
+U_CDECL_END /* end of UConverter */
+
+#define CONVERTER_FILE_EXTENSION ".cnv"
+
+
+/**
+ * Return the number of all converter names.
+ * @param pErrorCode The error code
+ * @return the number of all converter names
+ */
+U_CFUNC uint16_t
+ucnv_bld_countAvailableConverters(UErrorCode *pErrorCode);
+
+/**
+ * Return the (n)th converter name in mixed case, or NULL
+ * if there is none (typically, if the data cannot be loaded).
+ * 0<=index<ucnv_io_countAvailableConverters().
+ * @param n The number specifies which converter name to get
+ * @param pErrorCode The error code
+ * @return the (n)th converter name in mixed case, or NULL if there is none.
+ */
+U_CFUNC const char *
+ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode);
+
+/**
+ * Load a non-algorithmic converter.
+ * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex).
+ */
+U_CAPI UConverterSharedData *
+ucnv_load(UConverterLoadArgs *pArgs, UErrorCode *err);
+
+/**
+ * Unload a non-algorithmic converter.
+ * It must be sharedData->isReferenceCounted
+ * and this function must be called inside umtx_lock(&cnvCacheMutex).
+ */
+U_CAPI void
+ucnv_unload(UConverterSharedData *sharedData);
+
+/**
+ * Swap ICU .cnv conversion tables. See udataswp.h.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+ucnv_swap(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+U_CAPI void U_EXPORT2
+ucnv_enableCleanup(void);
+
+#endif
+
+#endif /* _UCNV_BLD */
diff --git a/thirdparty/icu4c/common/ucnv_cb.cpp b/thirdparty/icu4c/common/ucnv_cb.cpp
new file mode 100644
index 0000000000..1bb0012014
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnv_cb.cpp
@@ -0,0 +1,261 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2000-2006, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+ * ucnv_cb.c:
+ * External APIs for the ICU's codeset conversion library
+ * Helena Shih
+ *
+ * Modification History:
+ *
+ * Date Name Description
+ * 7/28/2000 srl Implementation
+ */
+
+/**
+ * @name Character Conversion C API
+ *
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/ucnv_cb.h"
+#include "ucnv_bld.h"
+#include "ucnv_cnv.h"
+#include "cmemory.h"
+
+/* need to update the offsets when the target moves. */
+/* Note: Recursion may occur in the cb functions, be sure to update the offsets correctly
+if you don't use ucnv_cbXXX functions. Make sure you don't use the same callback within
+the same call stack if the complexity arises. */
+U_CAPI void U_EXPORT2
+ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args,
+ const char* source,
+ int32_t length,
+ int32_t offsetIndex,
+ UErrorCode * err)
+{
+ if(U_FAILURE(*err)) {
+ return;
+ }
+
+ ucnv_fromUWriteBytes(
+ args->converter,
+ source, length,
+ &args->target, args->targetLimit,
+ &args->offsets, offsetIndex,
+ err);
+}
+
+U_CAPI void U_EXPORT2
+ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args,
+ const UChar** source,
+ const UChar* sourceLimit,
+ int32_t offsetIndex,
+ UErrorCode * err)
+{
+ /*
+ This is a fun one. Recursion can occur - we're basically going to
+ just retry shoving data through the same converter. Note, if you got
+ here through some kind of invalid sequence, you maybe should emit a
+ reset sequence of some kind and/or call ucnv_reset(). Since this
+ IS an actual conversion, take care that you've changed the callback
+ or the data, or you'll get an infinite loop.
+
+ Please set the err value to something reasonable before calling
+ into this.
+ */
+
+ char *oldTarget;
+
+ if(U_FAILURE(*err))
+ {
+ return;
+ }
+
+ oldTarget = args->target;
+
+ ucnv_fromUnicode(args->converter,
+ &args->target,
+ args->targetLimit,
+ source,
+ sourceLimit,
+ NULL, /* no offsets */
+ FALSE, /* no flush */
+ err);
+
+ if(args->offsets)
+ {
+ while (args->target != oldTarget) /* if it moved at all.. */
+ {
+ *(args->offsets)++ = offsetIndex;
+ oldTarget++;
+ }
+ }
+
+ /*
+ Note, if you did something like used a Stop subcallback, things would get interesting.
+ In fact, here's where we want to return the partially consumed in-source!
+ */
+ if(*err == U_BUFFER_OVERFLOW_ERROR)
+ /* && (*source < sourceLimit && args->target >= args->targetLimit)
+ -- S. Hrcek */
+ {
+ /* Overflowed the target. Now, we'll write into the charErrorBuffer.
+ It's a fixed size. If we overflow it... Hmm */
+ char *newTarget;
+ const char *newTargetLimit;
+ UErrorCode err2 = U_ZERO_ERROR;
+
+ int8_t errBuffLen;
+
+ errBuffLen = args->converter->charErrorBufferLength;
+
+ /* start the new target at the first free slot in the errbuff.. */
+ newTarget = (char *)(args->converter->charErrorBuffer + errBuffLen);
+
+ newTargetLimit = (char *)(args->converter->charErrorBuffer +
+ sizeof(args->converter->charErrorBuffer));
+
+ if(newTarget >= newTargetLimit)
+ {
+ *err = U_INTERNAL_PROGRAM_ERROR;
+ return;
+ }
+
+ /* We're going to tell the converter that the errbuff len is empty.
+ This prevents the existing errbuff from being 'flushed' out onto
+ itself. If the errbuff is needed by the converter this time,
+ we're hosed - we're out of space! */
+
+ args->converter->charErrorBufferLength = 0;
+
+ ucnv_fromUnicode(args->converter,
+ &newTarget,
+ newTargetLimit,
+ source,
+ sourceLimit,
+ NULL,
+ FALSE,
+ &err2);
+
+ /* We can go ahead and overwrite the length here. We know just how
+ to recalculate it. */
+
+ args->converter->charErrorBufferLength = (int8_t)(
+ newTarget - (char*)args->converter->charErrorBuffer);
+
+ if((newTarget >= newTargetLimit) || (err2 == U_BUFFER_OVERFLOW_ERROR))
+ {
+ /* now we're REALLY in trouble.
+ Internal program error - callback shouldn't have written this much
+ data!
+ */
+ *err = U_INTERNAL_PROGRAM_ERROR;
+ return;
+ }
+ /*else {*/
+ /* sub errs could be invalid/truncated/illegal chars or w/e.
+ These might want to be passed on up.. But the problem is, we already
+ need to pass U_BUFFER_OVERFLOW_ERROR. That has to override these
+ other errs.. */
+
+ /*
+ if(U_FAILURE(err2))
+ ??
+ */
+ /*}*/
+ }
+}
+
+U_CAPI void U_EXPORT2
+ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args,
+ int32_t offsetIndex,
+ UErrorCode * err)
+{
+ UConverter *converter;
+ int32_t length;
+
+ if(U_FAILURE(*err)) {
+ return;
+ }
+ converter = args->converter;
+ length = converter->subCharLen;
+
+ if(length == 0) {
+ return;
+ }
+
+ if(length < 0) {
+ /*
+ * Write/convert the substitution string. Its real length is -length.
+ * Unlike the escape callback, we need not change the converter's
+ * callback function because ucnv_setSubstString() verified that
+ * the string can be converted, so we will not get a conversion error
+ * and will not recurse.
+ * At worst we should get a U_BUFFER_OVERFLOW_ERROR.
+ */
+ const UChar *source = (const UChar *)converter->subChars;
+ ucnv_cbFromUWriteUChars(args, &source, source - length, offsetIndex, err);
+ return;
+ }
+
+ if(converter->sharedData->impl->writeSub!=NULL) {
+ converter->sharedData->impl->writeSub(args, offsetIndex, err);
+ }
+ else if(converter->subChar1!=0 && (uint16_t)converter->invalidUCharBuffer[0]<=(uint16_t)0xffu) {
+ /*
+ TODO: Is this untestable because the MBCS converter has a writeSub function to call
+ and the other converters don't use subChar1?
+ */
+ ucnv_cbFromUWriteBytes(args,
+ (const char *)&converter->subChar1, 1,
+ offsetIndex, err);
+ }
+ else {
+ ucnv_cbFromUWriteBytes(args,
+ (const char *)converter->subChars, length,
+ offsetIndex, err);
+ }
+}
+
+U_CAPI void U_EXPORT2
+ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args,
+ const UChar* source,
+ int32_t length,
+ int32_t offsetIndex,
+ UErrorCode * err)
+{
+ if(U_FAILURE(*err)) {
+ return;
+ }
+
+ ucnv_toUWriteUChars(
+ args->converter,
+ source, length,
+ &args->target, args->targetLimit,
+ &args->offsets, offsetIndex,
+ err);
+}
+
+U_CAPI void U_EXPORT2
+ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args,
+ int32_t offsetIndex,
+ UErrorCode * err)
+{
+ static const UChar kSubstituteChar1 = 0x1A, kSubstituteChar = 0xFFFD;
+
+ /* could optimize this case, just one uchar */
+ if(args->converter->invalidCharLength == 1 && args->converter->subChar1 != 0) {
+ ucnv_cbToUWriteUChars(args, &kSubstituteChar1, 1, offsetIndex, err);
+ } else {
+ ucnv_cbToUWriteUChars(args, &kSubstituteChar, 1, offsetIndex, err);
+ }
+}
+
+#endif
diff --git a/thirdparty/icu4c/common/ucnv_cnv.cpp b/thirdparty/icu4c/common/ucnv_cnv.cpp
new file mode 100644
index 0000000000..ea71acf92c
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnv_cnv.cpp
@@ -0,0 +1,182 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* uconv_cnv.c:
+* Implements all the low level conversion functions
+* T_UnicodeConverter_{to,from}Unicode_$ConversionType
+*
+* Change history:
+*
+* 06/29/2000 helena Major rewrite of the callback APIs.
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/ucnv_err.h"
+#include "unicode/ucnv.h"
+#include "unicode/uset.h"
+#include "ucnv_cnv.h"
+#include "ucnv_bld.h"
+#include "cmemory.h"
+
+U_CFUNC void
+ucnv_getCompleteUnicodeSet(const UConverter *cnv,
+ const USetAdder *sa,
+ UConverterUnicodeSet which,
+ UErrorCode *pErrorCode) {
+ (void)cnv;
+ (void)which;
+ (void)pErrorCode;
+ sa->addRange(sa->set, 0, 0x10ffff);
+}
+
+U_CFUNC void
+ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv,
+ const USetAdder *sa,
+ UConverterUnicodeSet which,
+ UErrorCode *pErrorCode) {
+ (void)cnv;
+ (void)which;
+ (void)pErrorCode;
+ sa->addRange(sa->set, 0, 0xd7ff);
+ sa->addRange(sa->set, 0xe000, 0x10ffff);
+}
+
+U_CFUNC void
+ucnv_fromUWriteBytes(UConverter *cnv,
+ const char *bytes, int32_t length,
+ char **target, const char *targetLimit,
+ int32_t **offsets,
+ int32_t sourceIndex,
+ UErrorCode *pErrorCode) {
+ char *t=*target;
+ int32_t *o;
+
+ /* write bytes */
+ if(offsets==NULL || (o=*offsets)==NULL) {
+ while(length>0 && t<targetLimit) {
+ *t++=*bytes++;
+ --length;
+ }
+ } else {
+ /* output with offsets */
+ while(length>0 && t<targetLimit) {
+ *t++=*bytes++;
+ *o++=sourceIndex;
+ --length;
+ }
+ *offsets=o;
+ }
+ *target=t;
+
+ /* write overflow */
+ if(length>0) {
+ if(cnv!=NULL) {
+ t=(char *)cnv->charErrorBuffer;
+ cnv->charErrorBufferLength=(int8_t)length;
+ do {
+ *t++=(uint8_t)*bytes++;
+ } while(--length>0);
+ }
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+}
+
+U_CFUNC void
+ucnv_toUWriteUChars(UConverter *cnv,
+ const UChar *uchars, int32_t length,
+ UChar **target, const UChar *targetLimit,
+ int32_t **offsets,
+ int32_t sourceIndex,
+ UErrorCode *pErrorCode) {
+ UChar *t=*target;
+ int32_t *o;
+
+ /* write UChars */
+ if(offsets==NULL || (o=*offsets)==NULL) {
+ while(length>0 && t<targetLimit) {
+ *t++=*uchars++;
+ --length;
+ }
+ } else {
+ /* output with offsets */
+ while(length>0 && t<targetLimit) {
+ *t++=*uchars++;
+ *o++=sourceIndex;
+ --length;
+ }
+ *offsets=o;
+ }
+ *target=t;
+
+ /* write overflow */
+ if(length>0) {
+ if(cnv!=NULL) {
+ t=cnv->UCharErrorBuffer;
+ cnv->UCharErrorBufferLength=(int8_t)length;
+ do {
+ *t++=*uchars++;
+ } while(--length>0);
+ }
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+}
+
+U_CFUNC void
+ucnv_toUWriteCodePoint(UConverter *cnv,
+ UChar32 c,
+ UChar **target, const UChar *targetLimit,
+ int32_t **offsets,
+ int32_t sourceIndex,
+ UErrorCode *pErrorCode) {
+ UChar *t;
+ int32_t *o;
+
+ t=*target;
+
+ if(t<targetLimit) {
+ if(c<=0xffff) {
+ *t++=(UChar)c;
+ c=U_SENTINEL;
+ } else /* c is a supplementary code point */ {
+ *t++=U16_LEAD(c);
+ c=U16_TRAIL(c);
+ if(t<targetLimit) {
+ *t++=(UChar)c;
+ c=U_SENTINEL;
+ }
+ }
+
+ /* write offsets */
+ if(offsets!=NULL && (o=*offsets)!=NULL) {
+ *o++=sourceIndex;
+ if((*target+1)<t) {
+ *o++=sourceIndex;
+ }
+ *offsets=o;
+ }
+ }
+
+ *target=t;
+
+ /* write overflow from c */
+ if(c>=0) {
+ if(cnv!=NULL) {
+ int8_t i=0;
+ U16_APPEND_UNSAFE(cnv->UCharErrorBuffer, i, c);
+ cnv->UCharErrorBufferLength=i;
+ }
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+}
+
+#endif
diff --git a/thirdparty/icu4c/common/ucnv_cnv.h b/thirdparty/icu4c/common/ucnv_cnv.h
new file mode 100644
index 0000000000..59be8bdb37
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnv_cnv.h
@@ -0,0 +1,323 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1999-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* ucnv_cnv.h:
+* Definitions for converter implementations.
+*
+* Modification History:
+*
+* Date Name Description
+* 05/09/00 helena Added implementation to handle fallback mappings.
+* 06/29/2000 helena Major rewrite of the callback APIs.
+*/
+
+#ifndef UCNV_CNV_H
+#define UCNV_CNV_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/ucnv.h"
+#include "unicode/ucnv_err.h"
+#include "unicode/uset.h"
+#include "uset_imp.h"
+
+U_CDECL_BEGIN
+
+/* this is used in fromUnicode DBCS tables as an "unassigned" marker */
+#define missingCharMarker 0xFFFF
+
+/*
+ * #define missingUCharMarker 0xfffe
+ *
+ * commented out because there are actually two values used in toUnicode tables:
+ * U+fffe "unassigned"
+ * U+ffff "illegal"
+ */
+
+/** Forward declaration, see ucnv_bld.h */
+struct UConverterSharedData;
+typedef struct UConverterSharedData UConverterSharedData;
+
+/* function types for UConverterImpl ---------------------------------------- */
+
+/* struct with arguments for UConverterLoad and ucnv_load() */
+typedef struct {
+ int32_t size; /* sizeof(UConverterLoadArgs) */
+ int32_t nestedLoads; /* count nested ucnv_load() calls */
+ UBool onlyTestIsLoadable; /* input: don't actually load */
+ UBool reserved0; /* reserved - for good alignment of the pointers */
+ int16_t reserved; /* reserved - for good alignment of the pointers */
+ uint32_t options;
+ const char *pkg, *name, *locale;
+} UConverterLoadArgs;
+
+#define UCNV_LOAD_ARGS_INITIALIZER \
+ { (int32_t)sizeof(UConverterLoadArgs), 0, false, false, 0, 0, NULL, NULL, NULL }
+
+typedef void (*UConverterLoad) (UConverterSharedData *sharedData,
+ UConverterLoadArgs *pArgs,
+ const uint8_t *raw, UErrorCode *pErrorCode);
+typedef void (*UConverterUnload) (UConverterSharedData *sharedData);
+
+typedef void (*UConverterOpen) (UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *pErrorCode);
+typedef void (*UConverterClose) (UConverter *cnv);
+
+typedef enum UConverterResetChoice {
+ UCNV_RESET_BOTH,
+ UCNV_RESET_TO_UNICODE,
+ UCNV_RESET_FROM_UNICODE
+} UConverterResetChoice;
+
+typedef void (*UConverterReset) (UConverter *cnv, UConverterResetChoice choice);
+
+/*
+ * Converter implementation function(s) for ucnv_toUnicode().
+ * If the toUnicodeWithOffsets function pointer is NULL,
+ * then the toUnicode function will be used and the offsets will be set to -1.
+ *
+ * Must maintain state across buffers. Use toUBytes[toULength] for partial input
+ * sequences; it will be checked in ucnv.c at the end of the input stream
+ * to detect truncated input.
+ * Some converters may need additional detection and may then set U_TRUNCATED_CHAR_FOUND.
+ *
+ * The toUnicodeWithOffsets must write exactly as many offset values as target
+ * units. Write offset values of -1 for when the source index corresponding to
+ * the output unit is not known (e.g., the character started in an earlier buffer).
+ * The pArgs->offsets pointer need not be moved forward.
+ *
+ * At function return, either one of the following conditions must be true:
+ * - U_BUFFER_OVERFLOW_ERROR and the target is full: target==targetLimit
+ * - another error code with toUBytes[toULength] set to the offending input
+ * - no error, and the source is consumed: source==sourceLimit
+ *
+ * The ucnv.c code will handle the end of the input (reset)
+ * (reset, and truncation detection) and callbacks.
+ */
+typedef void (*UConverterToUnicode) (UConverterToUnicodeArgs *, UErrorCode *);
+
+/*
+ * Same rules as for UConverterToUnicode.
+ * A lead surrogate is kept in fromUChar32 across buffers, and if an error
+ * occurs, then the offending input code point must be put into fromUChar32
+ * as well.
+ */
+typedef void (*UConverterFromUnicode) (UConverterFromUnicodeArgs *, UErrorCode *);
+
+/*
+ * Converter implementation function for ucnv_convertEx(), for direct conversion
+ * between two charsets without pivoting through UTF-16.
+ * The rules are the same as for UConverterToUnicode and UConverterFromUnicode.
+ * In addition,
+ * - The toUnicode side must behave and keep state exactly like the
+ * UConverterToUnicode implementation for the same source charset.
+ * - A U_USING_DEFAULT_WARNING can be set to request to temporarily fall back
+ * to pivoting. When this function is called, the conversion framework makes
+ * sure that this warning is not set on input.
+ * - Continuing a partial match and flushing the toUnicode replay buffer
+ * are handled by pivoting, using the toUnicode and fromUnicode functions.
+ */
+typedef void (*UConverterConvert) (UConverterFromUnicodeArgs *pFromUArgs,
+ UConverterToUnicodeArgs *pToUArgs,
+ UErrorCode *pErrorCode);
+
+/*
+ * Converter implementation function for ucnv_getNextUChar().
+ * If the function pointer is NULL, then the toUnicode function will be used.
+ *
+ * Will be called at a character boundary (toULength==0).
+ * May return with
+ * - U_INDEX_OUTOFBOUNDS_ERROR if there was no output for the input
+ * (the return value will be ignored)
+ * - U_TRUNCATED_CHAR_FOUND or another error code (never U_BUFFER_OVERFLOW_ERROR!)
+ * with toUBytes[toULength] set to the offending input
+ * (the return value will be ignored)
+ * - return UCNV_GET_NEXT_UCHAR_USE_TO_U, without moving the source pointer,
+ * to indicate that the ucnv.c code shall call the toUnicode function instead
+ * - return a real code point result
+ *
+ * Unless UCNV_GET_NEXT_UCHAR_USE_TO_U is returned, the source bytes must be consumed.
+ *
+ * The ucnv.c code will handle the end of the input (reset)
+ * (except for truncation detection!) and callbacks.
+ */
+typedef UChar32 (*UConverterGetNextUChar) (UConverterToUnicodeArgs *, UErrorCode *);
+
+typedef void (*UConverterGetStarters)(const UConverter* converter,
+ UBool starters[256],
+ UErrorCode *pErrorCode);
+
+/* If this function pointer is null or if the function returns null
+ * the name field in static data struct should be returned by
+ * ucnv_getName() API function
+ */
+typedef const char * (*UConverterGetName) (const UConverter *cnv);
+
+/**
+ * Write the codepage substitution character.
+ * If this function is not set, then ucnv_cbFromUWriteSub() writes
+ * the substitution character from UConverter.
+ * For stateful converters, it is typically necessary to handle this
+ * specificially for the converter in order to properly maintain the state.
+ */
+typedef void (*UConverterWriteSub) (UConverterFromUnicodeArgs *pArgs, int32_t offsetIndex, UErrorCode *pErrorCode);
+
+/**
+ * For converter-specific safeClone processing
+ * If this function is not set, then ucnv_safeClone assumes that the converter has no private data that changes
+ * after the converter is done opening.
+ * If this function is set, then it is called just after a memcpy() of
+ * converter data to the new, empty converter, and is expected to set up
+ * the initial state of the converter. It is not expected to increment the
+ * reference counts of the standard data types such as the shared data.
+ */
+typedef UConverter * (*UConverterSafeClone) (const UConverter *cnv,
+ void *stackBuffer,
+ int32_t *pBufferSize,
+ UErrorCode *status);
+
+/**
+ * Filters for some ucnv_getUnicodeSet() implementation code.
+ */
+typedef enum UConverterSetFilter {
+ UCNV_SET_FILTER_NONE,
+ UCNV_SET_FILTER_DBCS_ONLY,
+ UCNV_SET_FILTER_2022_CN,
+ UCNV_SET_FILTER_SJIS,
+ UCNV_SET_FILTER_GR94DBCS,
+ UCNV_SET_FILTER_HZ,
+ UCNV_SET_FILTER_COUNT
+} UConverterSetFilter;
+
+/**
+ * Fills the set of Unicode code points that can be converted by an ICU converter.
+ * The API function ucnv_getUnicodeSet() clears the USet before calling
+ * the converter's getUnicodeSet() implementation; the converter should only
+ * add the appropriate code points to allow recursive use.
+ * For example, the ISO-2022-JP converter will call each subconverter's
+ * getUnicodeSet() implementation to consecutively add code points to
+ * the same USet, which will result in a union of the sets of all subconverters.
+ *
+ * For more documentation, see ucnv_getUnicodeSet() in ucnv.h.
+ */
+typedef void (*UConverterGetUnicodeSet) (const UConverter *cnv,
+ const USetAdder *sa,
+ UConverterUnicodeSet which,
+ UErrorCode *pErrorCode);
+
+UBool CONVERSION_U_SUCCESS (UErrorCode err);
+
+/**
+ * UConverterImpl contains all the data and functions for a converter type.
+ * Its function pointers work much like a C++ vtable.
+ * Many converter types need to define only a subset of the functions;
+ * when a function pointer is NULL, then a default action will be performed.
+ *
+ * Every converter type must implement toUnicode, fromUnicode, and getNextUChar,
+ * otherwise the converter may crash.
+ * Every converter type that has variable-length codepage sequences should
+ * also implement toUnicodeWithOffsets and fromUnicodeWithOffsets for
+ * correct offset handling.
+ * All other functions may or may not be implemented - it depends only on
+ * whether the converter type needs them.
+ *
+ * When open() fails, then close() will be called, if present.
+ */
+struct UConverterImpl {
+ UConverterType type;
+
+ UConverterLoad load;
+ UConverterUnload unload;
+
+ UConverterOpen open;
+ UConverterClose close;
+ UConverterReset reset;
+
+ UConverterToUnicode toUnicode;
+ UConverterToUnicode toUnicodeWithOffsets;
+ UConverterFromUnicode fromUnicode;
+ UConverterFromUnicode fromUnicodeWithOffsets;
+ UConverterGetNextUChar getNextUChar;
+
+ UConverterGetStarters getStarters;
+ UConverterGetName getName;
+ UConverterWriteSub writeSub;
+ UConverterSafeClone safeClone;
+ UConverterGetUnicodeSet getUnicodeSet;
+
+ UConverterConvert toUTF8;
+ UConverterConvert fromUTF8;
+};
+
+extern const UConverterSharedData
+ _MBCSData, _Latin1Data,
+ _UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData,
+ _ISO2022Data,
+ _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6,
+ _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData19,
+ _HZData,_ISCIIData, _SCSUData, _ASCIIData,
+ _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _CompoundTextData;
+
+U_CDECL_END
+
+/** Always use fallbacks from codepage to Unicode */
+#define TO_U_USE_FALLBACK(useFallback) true
+#define UCNV_TO_U_USE_FALLBACK(cnv) true
+
+/** Use fallbacks from Unicode to codepage when cnv->useFallback or for private-use code points */
+#define IS_PRIVATE_USE(c) ((uint32_t)((c)-0xe000)<0x1900 || (uint32_t)((c)-0xf0000)<0x20000)
+#define FROM_U_USE_FALLBACK(useFallback, c) ((useFallback) || IS_PRIVATE_USE(c))
+#define UCNV_FROM_U_USE_FALLBACK(cnv, c) FROM_U_USE_FALLBACK((cnv)->useFallback, c)
+
+/**
+ * Magic number for ucnv_getNextUChar(), returned by a
+ * getNextUChar() implementation to indicate to use the converter's toUnicode()
+ * instead of the native function.
+ * @internal
+ */
+#define UCNV_GET_NEXT_UCHAR_USE_TO_U -9
+
+U_CFUNC void
+ucnv_getCompleteUnicodeSet(const UConverter *cnv,
+ const USetAdder *sa,
+ UConverterUnicodeSet which,
+ UErrorCode *pErrorCode);
+
+U_CFUNC void
+ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv,
+ const USetAdder *sa,
+ UConverterUnicodeSet which,
+ UErrorCode *pErrorCode);
+
+U_CFUNC void
+ucnv_fromUWriteBytes(UConverter *cnv,
+ const char *bytes, int32_t length,
+ char **target, const char *targetLimit,
+ int32_t **offsets,
+ int32_t sourceIndex,
+ UErrorCode *pErrorCode);
+U_CFUNC void
+ucnv_toUWriteUChars(UConverter *cnv,
+ const UChar *uchars, int32_t length,
+ UChar **target, const UChar *targetLimit,
+ int32_t **offsets,
+ int32_t sourceIndex,
+ UErrorCode *pErrorCode);
+
+U_CFUNC void
+ucnv_toUWriteCodePoint(UConverter *cnv,
+ UChar32 c,
+ UChar **target, const UChar *targetLimit,
+ int32_t **offsets,
+ int32_t sourceIndex,
+ UErrorCode *pErrorCode);
+
+#endif
+
+#endif /* UCNV_CNV */
diff --git a/thirdparty/icu4c/common/ucnv_ct.cpp b/thirdparty/icu4c/common/ucnv_ct.cpp
new file mode 100644
index 0000000000..b40e1b2c97
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnv_ct.cpp
@@ -0,0 +1,646 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2010-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* file name: ucnv_ct.c
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010Dec09
+* created by: Michael Ow
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
+
+#include "unicode/ucnv.h"
+#include "unicode/uset.h"
+#include "unicode/ucnv_err.h"
+#include "unicode/ucnv_cb.h"
+#include "unicode/utf16.h"
+#include "ucnv_imp.h"
+#include "ucnv_bld.h"
+#include "ucnv_cnv.h"
+#include "ucnvmbcs.h"
+#include "cstring.h"
+#include "cmemory.h"
+
+typedef enum {
+ INVALID = -2,
+ DO_SEARCH = -1,
+
+ COMPOUND_TEXT_SINGLE_0 = 0,
+ COMPOUND_TEXT_SINGLE_1 = 1,
+ COMPOUND_TEXT_SINGLE_2 = 2,
+ COMPOUND_TEXT_SINGLE_3 = 3,
+
+ COMPOUND_TEXT_DOUBLE_1 = 4,
+ COMPOUND_TEXT_DOUBLE_2 = 5,
+ COMPOUND_TEXT_DOUBLE_3 = 6,
+ COMPOUND_TEXT_DOUBLE_4 = 7,
+ COMPOUND_TEXT_DOUBLE_5 = 8,
+ COMPOUND_TEXT_DOUBLE_6 = 9,
+ COMPOUND_TEXT_DOUBLE_7 = 10,
+
+ COMPOUND_TEXT_TRIPLE_DOUBLE = 11,
+
+ IBM_915 = 12,
+ IBM_916 = 13,
+ IBM_914 = 14,
+ IBM_874 = 15,
+ IBM_912 = 16,
+ IBM_913 = 17,
+ ISO_8859_14 = 18,
+ IBM_923 = 19,
+ NUM_OF_CONVERTERS = 20
+} COMPOUND_TEXT_CONVERTERS;
+
+#define SEARCH_LENGTH 12
+
+static const uint8_t escSeqCompoundText[NUM_OF_CONVERTERS][5] = {
+ /* Single */
+ { 0x1B, 0x2D, 0x41, 0, 0 },
+ { 0x1B, 0x2D, 0x4D, 0, 0 },
+ { 0x1B, 0x2D, 0x46, 0, 0 },
+ { 0x1B, 0x2D, 0x47, 0, 0 },
+
+ /* Double */
+ { 0x1B, 0x24, 0x29, 0x41, 0 },
+ { 0x1B, 0x24, 0x29, 0x42, 0 },
+ { 0x1B, 0x24, 0x29, 0x43, 0 },
+ { 0x1B, 0x24, 0x29, 0x44, 0 },
+ { 0x1B, 0x24, 0x29, 0x47, 0 },
+ { 0x1B, 0x24, 0x29, 0x48, 0 },
+ { 0x1B, 0x24, 0x29, 0x49, 0 },
+
+ /* Triple/Double */
+ { 0x1B, 0x25, 0x47, 0, 0 },
+
+ /*IBM-915*/
+ { 0x1B, 0x2D, 0x4C, 0, 0 },
+ /*IBM-916*/
+ { 0x1B, 0x2D, 0x48, 0, 0 },
+ /*IBM-914*/
+ { 0x1B, 0x2D, 0x44, 0, 0 },
+ /*IBM-874*/
+ { 0x1B, 0x2D, 0x54, 0, 0 },
+ /*IBM-912*/
+ { 0x1B, 0x2D, 0x42, 0, 0 },
+ /* IBM-913 */
+ { 0x1B, 0x2D, 0x43, 0, 0 },
+ /* ISO-8859_14 */
+ { 0x1B, 0x2D, 0x5F, 0, 0 },
+ /* IBM-923 */
+ { 0x1B, 0x2D, 0x62, 0, 0 },
+};
+
+#define ESC_START 0x1B
+
+#define isASCIIRange(codepoint) \
+ ((codepoint == 0x0000) || (codepoint == 0x0009) || (codepoint == 0x000A) || \
+ (codepoint >= 0x0020 && codepoint <= 0x007f) || (codepoint >= 0x00A0 && codepoint <= 0x00FF))
+
+#define isIBM915(codepoint) \
+ ((codepoint >= 0x0401 && codepoint <= 0x045F) || (codepoint == 0x2116))
+
+#define isIBM916(codepoint) \
+ ((codepoint >= 0x05D0 && codepoint <= 0x05EA) || (codepoint == 0x2017) || (codepoint == 0x203E))
+
+#define isCompoundS3(codepoint) \
+ ((codepoint == 0x060C) || (codepoint == 0x061B) || (codepoint == 0x061F) || (codepoint >= 0x0621 && codepoint <= 0x063A) || \
+ (codepoint >= 0x0640 && codepoint <= 0x0652) || (codepoint >= 0x0660 && codepoint <= 0x066D) || (codepoint == 0x200B) || \
+ (codepoint >= 0x0FE70 && codepoint <= 0x0FE72) || (codepoint == 0x0FE74) || (codepoint >= 0x0FE76 && codepoint <= 0x0FEBE))
+
+#define isCompoundS2(codepoint) \
+ ((codepoint == 0x02BC) || (codepoint == 0x02BD) || (codepoint >= 0x0384 && codepoint <= 0x03CE) || (codepoint == 0x2015))
+
+#define isIBM914(codepoint) \
+ ((codepoint == 0x0100) || (codepoint == 0x0101) || (codepoint == 0x0112) || (codepoint == 0x0113) || (codepoint == 0x0116) || (codepoint == 0x0117) || \
+ (codepoint == 0x0122) || (codepoint == 0x0123) || (codepoint >= 0x0128 && codepoint <= 0x012B) || (codepoint == 0x012E) || (codepoint == 0x012F) || \
+ (codepoint >= 0x0136 && codepoint <= 0x0138) || (codepoint == 0x013B) || (codepoint == 0x013C) || (codepoint == 0x0145) || (codepoint == 0x0146) || \
+ (codepoint >= 0x014A && codepoint <= 0x014D) || (codepoint == 0x0156) || (codepoint == 0x0157) || (codepoint >= 0x0166 && codepoint <= 0x016B) || \
+ (codepoint == 0x0172) || (codepoint == 0x0173))
+
+#define isIBM874(codepoint) \
+ ((codepoint >= 0x0E01 && codepoint <= 0x0E3A) || (codepoint >= 0x0E3F && codepoint <= 0x0E5B))
+
+#define isIBM912(codepoint) \
+ ((codepoint >= 0x0102 && codepoint <= 0x0107) || (codepoint >= 0x010C && codepoint <= 0x0111) || (codepoint >= 0x0118 && codepoint <= 0x011B) || \
+ (codepoint == 0x0139) || (codepoint == 0x013A) || (codepoint == 0x013D) || (codepoint == 0x013E) || (codepoint >= 0x0141 && codepoint <= 0x0144) || \
+ (codepoint == 0x0147) || (codepoint == 0x0147) || (codepoint == 0x0150) || (codepoint == 0x0151) || (codepoint == 0x0154) || (codepoint == 0x0155) || \
+ (codepoint >= 0x0158 && codepoint <= 0x015B) || (codepoint == 0x015E) || (codepoint == 0x015F) || (codepoint >= 0x0160 && codepoint <= 0x0165) || \
+ (codepoint == 0x016E) || (codepoint == 0x016F) || (codepoint == 0x0170) || (codepoint == 0x0171) || (codepoint >= 0x0179 && codepoint <= 0x017E) || \
+ (codepoint == 0x02C7) || (codepoint == 0x02D8) || (codepoint == 0x02D9) || (codepoint == 0x02DB) || (codepoint == 0x02DD))
+
+#define isIBM913(codepoint) \
+ ((codepoint >= 0x0108 && codepoint <= 0x010B) || (codepoint == 0x011C) || \
+ (codepoint == 0x011D) || (codepoint == 0x0120) || (codepoint == 0x0121) || \
+ (codepoint >= 0x0124 && codepoint <= 0x0127) || (codepoint == 0x0134) || (codepoint == 0x0135) || \
+ (codepoint == 0x015C) || (codepoint == 0x015D) || (codepoint == 0x016C) || (codepoint == 0x016D))
+
+#define isCompoundS1(codepoint) \
+ ((codepoint == 0x011E) || (codepoint == 0x011F) || (codepoint == 0x0130) || \
+ (codepoint == 0x0131) || (codepoint >= 0x0218 && codepoint <= 0x021B))
+
+#define isISO8859_14(codepoint) \
+ ((codepoint >= 0x0174 && codepoint <= 0x0177) || (codepoint == 0x1E0A) || \
+ (codepoint == 0x1E0B) || (codepoint == 0x1E1E) || (codepoint == 0x1E1F) || \
+ (codepoint == 0x1E40) || (codepoint == 0x1E41) || (codepoint == 0x1E56) || \
+ (codepoint == 0x1E57) || (codepoint == 0x1E60) || (codepoint == 0x1E61) || \
+ (codepoint == 0x1E6A) || (codepoint == 0x1E6B) || (codepoint == 0x1EF2) || \
+ (codepoint == 0x1EF3) || (codepoint >= 0x1E80 && codepoint <= 0x1E85))
+
+#define isIBM923(codepoint) \
+ ((codepoint == 0x0152) || (codepoint == 0x0153) || (codepoint == 0x0178) || (codepoint == 0x20AC))
+
+
+typedef struct{
+ UConverterSharedData *myConverterArray[NUM_OF_CONVERTERS];
+ COMPOUND_TEXT_CONVERTERS state;
+} UConverterDataCompoundText;
+
+/*********** Compound Text Converter Protos ***********/
+U_CDECL_BEGIN
+static void U_CALLCONV
+_CompoundTextOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode);
+
+static void U_CALLCONV
+ _CompoundTextClose(UConverter *converter);
+
+static void U_CALLCONV
+_CompoundTextReset(UConverter *converter, UConverterResetChoice choice);
+
+static const char* U_CALLCONV
+_CompoundTextgetName(const UConverter* cnv);
+
+
+static int32_t findNextEsc(const char *source, const char *sourceLimit) {
+ int32_t length = static_cast<int32_t>(sourceLimit - source);
+ int32_t i;
+ for (i = 1; i < length; i++) {
+ if (*(source + i) == 0x1B) {
+ return i;
+ }
+ }
+
+ return length;
+}
+
+static COMPOUND_TEXT_CONVERTERS getState(int codepoint) {
+ COMPOUND_TEXT_CONVERTERS state = DO_SEARCH;
+
+ if (isASCIIRange(codepoint)) {
+ state = COMPOUND_TEXT_SINGLE_0;
+ } else if (isIBM912(codepoint)) {
+ state = IBM_912;
+ }else if (isIBM913(codepoint)) {
+ state = IBM_913;
+ } else if (isISO8859_14(codepoint)) {
+ state = ISO_8859_14;
+ } else if (isIBM923(codepoint)) {
+ state = IBM_923;
+ } else if (isIBM874(codepoint)) {
+ state = IBM_874;
+ } else if (isIBM914(codepoint)) {
+ state = IBM_914;
+ } else if (isCompoundS2(codepoint)) {
+ state = COMPOUND_TEXT_SINGLE_2;
+ } else if (isCompoundS3(codepoint)) {
+ state = COMPOUND_TEXT_SINGLE_3;
+ } else if (isIBM916(codepoint)) {
+ state = IBM_916;
+ } else if (isIBM915(codepoint)) {
+ state = IBM_915;
+ } else if (isCompoundS1(codepoint)) {
+ state = COMPOUND_TEXT_SINGLE_1;
+ }
+
+ return state;
+}
+
+static COMPOUND_TEXT_CONVERTERS findStateFromEscSeq(const char* source, const char* sourceLimit, const uint8_t* toUBytesBuffer, int32_t toUBytesBufferLength, UErrorCode *err) {
+ COMPOUND_TEXT_CONVERTERS state = INVALID;
+ UBool matchFound = FALSE;
+ int32_t i, n, offset = toUBytesBufferLength;
+
+ for (i = 0; i < NUM_OF_CONVERTERS; i++) {
+ matchFound = TRUE;
+ for (n = 0; escSeqCompoundText[i][n] != 0; n++) {
+ if (n < toUBytesBufferLength) {
+ if (toUBytesBuffer[n] != escSeqCompoundText[i][n]) {
+ matchFound = FALSE;
+ break;
+ }
+ } else if ((source + (n - offset)) >= sourceLimit) {
+ *err = U_TRUNCATED_CHAR_FOUND;
+ matchFound = FALSE;
+ break;
+ } else if (*(source + (n - offset)) != escSeqCompoundText[i][n]) {
+ matchFound = FALSE;
+ break;
+ }
+ }
+
+ if (matchFound) {
+ break;
+ }
+ }
+
+ if (matchFound) {
+ state = (COMPOUND_TEXT_CONVERTERS)i;
+ }
+
+ return state;
+}
+
+static void U_CALLCONV
+_CompoundTextOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
+ cnv->extraInfo = uprv_malloc (sizeof (UConverterDataCompoundText));
+ if (cnv->extraInfo != NULL) {
+ UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *) cnv->extraInfo;
+
+ UConverterNamePieces stackPieces;
+ UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER;
+
+ myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_0] = NULL;
+ myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_1] = ucnv_loadSharedData("icu-internal-compound-s1", &stackPieces, &stackArgs, errorCode);
+ myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_2] = ucnv_loadSharedData("icu-internal-compound-s2", &stackPieces, &stackArgs, errorCode);
+ myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_3] = ucnv_loadSharedData("icu-internal-compound-s3", &stackPieces, &stackArgs, errorCode);
+ myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_1] = ucnv_loadSharedData("icu-internal-compound-d1", &stackPieces, &stackArgs, errorCode);
+ myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_2] = ucnv_loadSharedData("icu-internal-compound-d2", &stackPieces, &stackArgs, errorCode);
+ myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_3] = ucnv_loadSharedData("icu-internal-compound-d3", &stackPieces, &stackArgs, errorCode);
+ myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_4] = ucnv_loadSharedData("icu-internal-compound-d4", &stackPieces, &stackArgs, errorCode);
+ myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_5] = ucnv_loadSharedData("icu-internal-compound-d5", &stackPieces, &stackArgs, errorCode);
+ myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_6] = ucnv_loadSharedData("icu-internal-compound-d6", &stackPieces, &stackArgs, errorCode);
+ myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_7] = ucnv_loadSharedData("icu-internal-compound-d7", &stackPieces, &stackArgs, errorCode);
+ myConverterData->myConverterArray[COMPOUND_TEXT_TRIPLE_DOUBLE] = ucnv_loadSharedData("icu-internal-compound-t", &stackPieces, &stackArgs, errorCode);
+
+ myConverterData->myConverterArray[IBM_915] = ucnv_loadSharedData("ibm-915_P100-1995", &stackPieces, &stackArgs, errorCode);
+ myConverterData->myConverterArray[IBM_916] = ucnv_loadSharedData("ibm-916_P100-1995", &stackPieces, &stackArgs, errorCode);
+ myConverterData->myConverterArray[IBM_914] = ucnv_loadSharedData("ibm-914_P100-1995", &stackPieces, &stackArgs, errorCode);
+ myConverterData->myConverterArray[IBM_874] = ucnv_loadSharedData("ibm-874_P100-1995", &stackPieces, &stackArgs, errorCode);
+ myConverterData->myConverterArray[IBM_912] = ucnv_loadSharedData("ibm-912_P100-1995", &stackPieces, &stackArgs, errorCode);
+ myConverterData->myConverterArray[IBM_913] = ucnv_loadSharedData("ibm-913_P100-2000", &stackPieces, &stackArgs, errorCode);
+ myConverterData->myConverterArray[ISO_8859_14] = ucnv_loadSharedData("iso-8859_14-1998", &stackPieces, &stackArgs, errorCode);
+ myConverterData->myConverterArray[IBM_923] = ucnv_loadSharedData("ibm-923_P100-1998", &stackPieces, &stackArgs, errorCode);
+
+ if (U_FAILURE(*errorCode) || pArgs->onlyTestIsLoadable) {
+ _CompoundTextClose(cnv);
+ return;
+ }
+
+ myConverterData->state = (COMPOUND_TEXT_CONVERTERS)0;
+ } else {
+ *errorCode = U_MEMORY_ALLOCATION_ERROR;
+ }
+}
+
+
+static void U_CALLCONV
+_CompoundTextClose(UConverter *converter) {
+ UConverterDataCompoundText* myConverterData = (UConverterDataCompoundText*)(converter->extraInfo);
+ int32_t i;
+
+ if (converter->extraInfo != NULL) {
+ /*close the array of converter pointers and free the memory*/
+ for (i = 0; i < NUM_OF_CONVERTERS; i++) {
+ if (myConverterData->myConverterArray[i] != NULL) {
+ ucnv_unloadSharedDataIfReady(myConverterData->myConverterArray[i]);
+ }
+ }
+
+ uprv_free(converter->extraInfo);
+ converter->extraInfo = NULL;
+ }
+}
+
+static void U_CALLCONV
+_CompoundTextReset(UConverter *converter, UConverterResetChoice choice) {
+ (void)converter;
+ (void)choice;
+}
+
+static const char* U_CALLCONV
+_CompoundTextgetName(const UConverter* cnv){
+ (void)cnv;
+ return "x11-compound-text";
+}
+
+static void U_CALLCONV
+UConverter_fromUnicode_CompoundText_OFFSETS(UConverterFromUnicodeArgs* args, UErrorCode* err){
+ UConverter *cnv = args->converter;
+ uint8_t *target = (uint8_t *) args->target;
+ const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;
+ const UChar* source = args->source;
+ const UChar* sourceLimit = args->sourceLimit;
+ /* int32_t* offsets = args->offsets; */
+ UChar32 sourceChar;
+ UBool useFallback = cnv->useFallback;
+ uint8_t tmpTargetBuffer[7];
+ int32_t tmpTargetBufferLength = 0;
+ COMPOUND_TEXT_CONVERTERS currentState, tmpState;
+ uint32_t pValue;
+ int32_t pValueLength = 0;
+ int32_t i, n, j;
+
+ UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *) cnv->extraInfo;
+
+ currentState = myConverterData->state;
+
+ /* check if the last codepoint of previous buffer was a lead surrogate*/
+ if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
+ goto getTrail;
+ }
+
+ while( source < sourceLimit){
+ if(target < targetLimit){
+
+ sourceChar = *(source++);
+ /*check if the char is a First surrogate*/
+ if(U16_IS_SURROGATE(sourceChar)) {
+ if(U16_IS_SURROGATE_LEAD(sourceChar)) {
+getTrail:
+ /*look ahead to find the trail surrogate*/
+ if(source < sourceLimit) {
+ /* test the following code unit */
+ UChar trail=(UChar) *source;
+ if(U16_IS_TRAIL(trail)) {
+ source++;
+ sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
+ cnv->fromUChar32=0x00;
+ /* convert this supplementary code point */
+ /* exit this condition tree */
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ *err=U_ILLEGAL_CHAR_FOUND;
+ cnv->fromUChar32=sourceChar;
+ break;
+ }
+ } else {
+ /* no more input */
+ cnv->fromUChar32=sourceChar;
+ break;
+ }
+ } else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ *err=U_ILLEGAL_CHAR_FOUND;
+ cnv->fromUChar32=sourceChar;
+ break;
+ }
+ }
+
+ tmpTargetBufferLength = 0;
+ tmpState = getState(sourceChar);
+
+ if (tmpState != DO_SEARCH && currentState != tmpState) {
+ /* Get escape sequence if necessary */
+ currentState = tmpState;
+ for (i = 0; escSeqCompoundText[currentState][i] != 0; i++) {
+ tmpTargetBuffer[tmpTargetBufferLength++] = escSeqCompoundText[currentState][i];
+ }
+ }
+
+ if (tmpState == DO_SEARCH) {
+ /* Test all available converters */
+ for (i = 1; i < SEARCH_LENGTH; i++) {
+ pValueLength = ucnv_MBCSFromUChar32(myConverterData->myConverterArray[i], sourceChar, &pValue, useFallback);
+ if (pValueLength > 0) {
+ tmpState = (COMPOUND_TEXT_CONVERTERS)i;
+ if (currentState != tmpState) {
+ currentState = tmpState;
+ for (j = 0; escSeqCompoundText[currentState][j] != 0; j++) {
+ tmpTargetBuffer[tmpTargetBufferLength++] = escSeqCompoundText[currentState][j];
+ }
+ }
+ for (n = (pValueLength - 1); n >= 0; n--) {
+ tmpTargetBuffer[tmpTargetBufferLength++] = (uint8_t)(pValue >> (n * 8));
+ }
+ break;
+ }
+ }
+ } else if (tmpState == COMPOUND_TEXT_SINGLE_0) {
+ tmpTargetBuffer[tmpTargetBufferLength++] = (uint8_t)sourceChar;
+ } else {
+ pValueLength = ucnv_MBCSFromUChar32(myConverterData->myConverterArray[currentState], sourceChar, &pValue, useFallback);
+ if (pValueLength > 0) {
+ for (n = (pValueLength - 1); n >= 0; n--) {
+ tmpTargetBuffer[tmpTargetBufferLength++] = (uint8_t)(pValue >> (n * 8));
+ }
+ }
+ }
+
+ for (i = 0; i < tmpTargetBufferLength; i++) {
+ if (target < targetLimit) {
+ *target++ = tmpTargetBuffer[i];
+ } else {
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+
+ if (*err == U_BUFFER_OVERFLOW_ERROR) {
+ for (; i < tmpTargetBufferLength; i++) {
+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = tmpTargetBuffer[i];
+ }
+ }
+ } else {
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+
+ /*save the state and return */
+ myConverterData->state = currentState;
+ args->source = source;
+ args->target = (char*)target;
+}
+
+
+static void U_CALLCONV
+UConverter_toUnicode_CompoundText_OFFSETS(UConverterToUnicodeArgs *args,
+ UErrorCode* err){
+ const char *mySource = (char *) args->source;
+ UChar *myTarget = args->target;
+ const char *mySourceLimit = args->sourceLimit;
+ const char *tmpSourceLimit = mySourceLimit;
+ uint32_t mySourceChar = 0x0000;
+ COMPOUND_TEXT_CONVERTERS currentState, tmpState;
+ int32_t sourceOffset = 0;
+ UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *) args->converter->extraInfo;
+ UConverterSharedData* savedSharedData = NULL;
+
+ UConverterToUnicodeArgs subArgs;
+ int32_t minArgsSize;
+
+ /* set up the subconverter arguments */
+ if(args->size<sizeof(UConverterToUnicodeArgs)) {
+ minArgsSize = args->size;
+ } else {
+ minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs);
+ }
+
+ uprv_memcpy(&subArgs, args, minArgsSize);
+ subArgs.size = (uint16_t)minArgsSize;
+
+ currentState = tmpState = myConverterData->state;
+
+ while(mySource < mySourceLimit){
+ if(myTarget < args->targetLimit){
+ if (args->converter->toULength > 0) {
+ mySourceChar = args->converter->toUBytes[0];
+ } else {
+ mySourceChar = (uint8_t)*mySource;
+ }
+
+ if (mySourceChar == ESC_START) {
+ tmpState = findStateFromEscSeq(mySource, mySourceLimit, args->converter->toUBytes, args->converter->toULength, err);
+
+ if (*err == U_TRUNCATED_CHAR_FOUND) {
+ for (; mySource < mySourceLimit;) {
+ args->converter->toUBytes[args->converter->toULength++] = *mySource++;
+ }
+ *err = U_ZERO_ERROR;
+ break;
+ } else if (tmpState == INVALID) {
+ if (args->converter->toULength == 0) {
+ mySource++; /* skip over the 0x1b byte */
+ }
+ *err = U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+
+ if (tmpState != currentState) {
+ currentState = tmpState;
+ }
+
+ sourceOffset = static_cast<int32_t>(uprv_strlen((char*)escSeqCompoundText[currentState]) - args->converter->toULength);
+
+ mySource += sourceOffset;
+
+ args->converter->toULength = 0;
+ }
+
+ if (currentState == COMPOUND_TEXT_SINGLE_0) {
+ while (mySource < mySourceLimit) {
+ if (*mySource == ESC_START) {
+ break;
+ }
+ if (myTarget < args->targetLimit) {
+ *myTarget++ = 0x00ff&(*mySource++);
+ } else {
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+ } else if (mySource < mySourceLimit){
+ sourceOffset = findNextEsc(mySource, mySourceLimit);
+
+ tmpSourceLimit = mySource + sourceOffset;
+
+ subArgs.source = mySource;
+ subArgs.sourceLimit = tmpSourceLimit;
+ subArgs.target = myTarget;
+ savedSharedData = subArgs.converter->sharedData;
+ subArgs.converter->sharedData = myConverterData->myConverterArray[currentState];
+
+ ucnv_MBCSToUnicodeWithOffsets(&subArgs, err);
+
+ subArgs.converter->sharedData = savedSharedData;
+
+ mySource = subArgs.source;
+ myTarget = subArgs.target;
+
+ if (U_FAILURE(*err)) {
+ if(*err == U_BUFFER_OVERFLOW_ERROR) {
+ if(subArgs.converter->UCharErrorBufferLength > 0) {
+ uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer,
+ subArgs.converter->UCharErrorBufferLength);
+ }
+ args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength;
+ subArgs.converter->UCharErrorBufferLength = 0;
+ }
+ break;
+ }
+ }
+ } else {
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+ myConverterData->state = currentState;
+ args->target = myTarget;
+ args->source = mySource;
+}
+
+static void U_CALLCONV
+_CompoundText_GetUnicodeSet(const UConverter *cnv,
+ const USetAdder *sa,
+ UConverterUnicodeSet which,
+ UErrorCode *pErrorCode) {
+ UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *)cnv->extraInfo;
+ int32_t i;
+
+ for (i = 1; i < NUM_OF_CONVERTERS; i++) {
+ ucnv_MBCSGetUnicodeSetForUnicode(myConverterData->myConverterArray[i], sa, which, pErrorCode);
+ }
+ sa->add(sa->set, 0x0000);
+ sa->add(sa->set, 0x0009);
+ sa->add(sa->set, 0x000A);
+ sa->addRange(sa->set, 0x0020, 0x007F);
+ sa->addRange(sa->set, 0x00A0, 0x00FF);
+}
+U_CDECL_END
+
+static const UConverterImpl _CompoundTextImpl = {
+
+ UCNV_COMPOUND_TEXT,
+
+ NULL,
+ NULL,
+
+ _CompoundTextOpen,
+ _CompoundTextClose,
+ _CompoundTextReset,
+
+ UConverter_toUnicode_CompoundText_OFFSETS,
+ UConverter_toUnicode_CompoundText_OFFSETS,
+ UConverter_fromUnicode_CompoundText_OFFSETS,
+ UConverter_fromUnicode_CompoundText_OFFSETS,
+ NULL,
+
+ NULL,
+ _CompoundTextgetName,
+ NULL,
+ NULL,
+ _CompoundText_GetUnicodeSet,
+ NULL,
+ NULL
+};
+
+static const UConverterStaticData _CompoundTextStaticData = {
+ sizeof(UConverterStaticData),
+ "COMPOUND_TEXT",
+ 0,
+ UCNV_IBM,
+ UCNV_COMPOUND_TEXT,
+ 1,
+ 6,
+ { 0xef, 0, 0, 0 },
+ 1,
+ FALSE,
+ FALSE,
+ 0,
+ 0,
+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
+};
+const UConverterSharedData _CompoundTextData =
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_CompoundTextStaticData, &_CompoundTextImpl);
+
+#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
diff --git a/thirdparty/icu4c/common/ucnv_err.cpp b/thirdparty/icu4c/common/ucnv_err.cpp
new file mode 100644
index 0000000000..6b738face5
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnv_err.cpp
@@ -0,0 +1,486 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ *****************************************************************************
+ *
+ * Copyright (C) 1998-2016, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ *
+ *****************************************************************************
+ *
+ * ucnv_err.c
+ * Implements error behaviour functions called by T_UConverter_{from,to}Unicode
+ *
+ *
+* Change history:
+*
+* 06/29/2000 helena Major rewrite of the callback APIs.
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/ucnv_err.h"
+#include "unicode/ucnv_cb.h"
+#include "ucnv_cnv.h"
+#include "cmemory.h"
+#include "unicode/ucnv.h"
+#include "ustrfmt.h"
+
+#define VALUE_STRING_LENGTH 48
+/*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */
+#define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025
+#define UNICODE_U_CODEPOINT 0x0055
+#define UNICODE_X_CODEPOINT 0x0058
+#define UNICODE_RS_CODEPOINT 0x005C
+#define UNICODE_U_LOW_CODEPOINT 0x0075
+#define UNICODE_X_LOW_CODEPOINT 0x0078
+#define UNICODE_AMP_CODEPOINT 0x0026
+#define UNICODE_HASH_CODEPOINT 0x0023
+#define UNICODE_SEMICOLON_CODEPOINT 0x003B
+#define UNICODE_PLUS_CODEPOINT 0x002B
+#define UNICODE_LEFT_CURLY_CODEPOINT 0x007B
+#define UNICODE_RIGHT_CURLY_CODEPOINT 0x007D
+#define UNICODE_SPACE_CODEPOINT 0x0020
+#define UCNV_PRV_ESCAPE_ICU 0
+#define UCNV_PRV_ESCAPE_C 'C'
+#define UCNV_PRV_ESCAPE_XML_DEC 'D'
+#define UCNV_PRV_ESCAPE_XML_HEX 'X'
+#define UCNV_PRV_ESCAPE_JAVA 'J'
+#define UCNV_PRV_ESCAPE_UNICODE 'U'
+#define UCNV_PRV_ESCAPE_CSS2 'S'
+#define UCNV_PRV_STOP_ON_ILLEGAL 'i'
+
+/*
+ * IS_DEFAULT_IGNORABLE_CODE_POINT
+ * This is to check if a code point has the default ignorable unicode property.
+ * As such, this list needs to be updated if the ignorable code point list ever
+ * changes.
+ * To avoid dependency on other code, this list is hard coded here.
+ * When an ignorable code point is found and is unmappable, the default callbacks
+ * will ignore them.
+ * For a list of the default ignorable code points, use this link:
+ * https://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5B%3ADI%3A%5D&abb=on&g=&i=
+ *
+ * This list should be sync with the one in CharsetCallback.java
+ */
+#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) ( \
+ (c == 0x00AD) || \
+ (c == 0x034F) || \
+ (c == 0x061C) || \
+ (c == 0x115F) || \
+ (c == 0x1160) || \
+ (0x17B4 <= c && c <= 0x17B5) || \
+ (0x180B <= c && c <= 0x180E) || \
+ (0x200B <= c && c <= 0x200F) || \
+ (0x202A <= c && c <= 0x202E) || \
+ (0x2060 <= c && c <= 0x206F) || \
+ (c == 0x3164) || \
+ (0xFE00 <= c && c <= 0xFE0F) || \
+ (c == 0xFEFF) || \
+ (c == 0xFFA0) || \
+ (0xFFF0 <= c && c <= 0xFFF8) || \
+ (0x1BCA0 <= c && c <= 0x1BCA3) || \
+ (0x1D173 <= c && c <= 0x1D17A) || \
+ (0xE0000 <= c && c <= 0xE0FFF))
+
+
+/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
+U_CAPI void U_EXPORT2
+UCNV_FROM_U_CALLBACK_STOP (
+ const void *context,
+ UConverterFromUnicodeArgs *fromUArgs,
+ const UChar* codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode * err)
+{
+ (void)context;
+ (void)fromUArgs;
+ (void)codeUnits;
+ (void)length;
+ if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
+ {
+ /*
+ * Skip if the codepoint has unicode property of default ignorable.
+ */
+ *err = U_ZERO_ERROR;
+ }
+ /* the caller must have set the error code accordingly */
+ return;
+}
+
+
+/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
+U_CAPI void U_EXPORT2
+UCNV_TO_U_CALLBACK_STOP (
+ const void *context,
+ UConverterToUnicodeArgs *toUArgs,
+ const char* codePoints,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode * err)
+{
+ /* the caller must have set the error code accordingly */
+ (void)context; (void)toUArgs; (void)codePoints; (void)length; (void)reason; (void)err;
+ return;
+}
+
+U_CAPI void U_EXPORT2
+UCNV_FROM_U_CALLBACK_SKIP (
+ const void *context,
+ UConverterFromUnicodeArgs *fromUArgs,
+ const UChar* codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode * err)
+{
+ (void)fromUArgs;
+ (void)codeUnits;
+ (void)length;
+ if (reason <= UCNV_IRREGULAR)
+ {
+ if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
+ {
+ /*
+ * Skip if the codepoint has unicode property of default ignorable.
+ */
+ *err = U_ZERO_ERROR;
+ }
+ else if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
+ {
+ *err = U_ZERO_ERROR;
+ }
+ /* else the caller must have set the error code accordingly. */
+ }
+ /* else ignore the reset, close and clone calls. */
+}
+
+U_CAPI void U_EXPORT2
+UCNV_FROM_U_CALLBACK_SUBSTITUTE (
+ const void *context,
+ UConverterFromUnicodeArgs *fromArgs,
+ const UChar* codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode * err)
+{
+ (void)codeUnits;
+ (void)length;
+ if (reason <= UCNV_IRREGULAR)
+ {
+ if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
+ {
+ /*
+ * Skip if the codepoint has unicode property of default ignorable.
+ */
+ *err = U_ZERO_ERROR;
+ }
+ else if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
+ {
+ *err = U_ZERO_ERROR;
+ ucnv_cbFromUWriteSub(fromArgs, 0, err);
+ }
+ /* else the caller must have set the error code accordingly. */
+ }
+ /* else ignore the reset, close and clone calls. */
+}
+
+/*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
+ *uses a clean copy (resetted) of the converter, to convert that unicode
+ *escape sequence to the target codepage (if conversion failure happens then
+ *we revert to substituting with subchar)
+ */
+U_CAPI void U_EXPORT2
+UCNV_FROM_U_CALLBACK_ESCAPE (
+ const void *context,
+ UConverterFromUnicodeArgs *fromArgs,
+ const UChar *codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode * err)
+{
+
+ UChar valueString[VALUE_STRING_LENGTH];
+ int32_t valueStringLength = 0;
+ int32_t i = 0;
+
+ const UChar *myValueSource = NULL;
+ UErrorCode err2 = U_ZERO_ERROR;
+ UConverterFromUCallback original = NULL;
+ const void *originalContext;
+
+ UConverterFromUCallback ignoredCallback = NULL;
+ const void *ignoredContext;
+
+ if (reason > UCNV_IRREGULAR)
+ {
+ return;
+ }
+ else if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
+ {
+ /*
+ * Skip if the codepoint has unicode property of default ignorable.
+ */
+ *err = U_ZERO_ERROR;
+ return;
+ }
+
+ ucnv_setFromUCallBack (fromArgs->converter,
+ (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE,
+ NULL,
+ &original,
+ &originalContext,
+ &err2);
+
+ if (U_FAILURE (err2))
+ {
+ *err = err2;
+ return;
+ }
+ if(context==NULL)
+ {
+ while (i < length)
+ {
+ valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
+ valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
+ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
+ }
+ }
+ else
+ {
+ switch(*((char*)context))
+ {
+ case UCNV_PRV_ESCAPE_JAVA:
+ while (i < length)
+ {
+ valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
+ valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
+ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
+ }
+ break;
+
+ case UCNV_PRV_ESCAPE_C:
+ valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
+
+ if(length==2){
+ valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
+ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8);
+
+ }
+ else{
+ valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
+ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
+ }
+ break;
+
+ case UCNV_PRV_ESCAPE_XML_DEC:
+
+ valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
+ valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
+ if(length==2){
+ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0);
+ }
+ else{
+ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0);
+ }
+ valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
+ break;
+
+ case UCNV_PRV_ESCAPE_XML_HEX:
+
+ valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
+ valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
+ valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
+ if(length==2){
+ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
+ }
+ else{
+ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0);
+ }
+ valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
+ break;
+
+ case UCNV_PRV_ESCAPE_UNICODE:
+ valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT; /* adding { */
+ valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
+ valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */
+ if (length == 2) {
+ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4);
+ } else {
+ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
+ }
+ valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */
+ break;
+
+ case UCNV_PRV_ESCAPE_CSS2:
+ valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
+ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
+ /* Always add space character, becase the next character might be whitespace,
+ which would erroneously be considered the termination of the escape sequence. */
+ valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT;
+ break;
+
+ default:
+ while (i < length)
+ {
+ valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
+ valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
+ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
+ }
+ }
+ }
+ myValueSource = valueString;
+
+ /* reset the error */
+ *err = U_ZERO_ERROR;
+
+ ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err);
+
+ ucnv_setFromUCallBack (fromArgs->converter,
+ original,
+ originalContext,
+ &ignoredCallback,
+ &ignoredContext,
+ &err2);
+ if (U_FAILURE (err2))
+ {
+ *err = err2;
+ return;
+ }
+
+ return;
+}
+
+
+
+U_CAPI void U_EXPORT2
+UCNV_TO_U_CALLBACK_SKIP (
+ const void *context,
+ UConverterToUnicodeArgs *toArgs,
+ const char* codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode * err)
+{
+ (void)toArgs;
+ (void)codeUnits;
+ (void)length;
+ if (reason <= UCNV_IRREGULAR)
+ {
+ if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
+ {
+ *err = U_ZERO_ERROR;
+ }
+ /* else the caller must have set the error code accordingly. */
+ }
+ /* else ignore the reset, close and clone calls. */
+}
+
+U_CAPI void U_EXPORT2
+UCNV_TO_U_CALLBACK_SUBSTITUTE (
+ const void *context,
+ UConverterToUnicodeArgs *toArgs,
+ const char* codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode * err)
+{
+ (void)codeUnits;
+ (void)length;
+ if (reason <= UCNV_IRREGULAR)
+ {
+ if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
+ {
+ *err = U_ZERO_ERROR;
+ ucnv_cbToUWriteSub(toArgs,0,err);
+ }
+ /* else the caller must have set the error code accordingly. */
+ }
+ /* else ignore the reset, close and clone calls. */
+}
+
+/*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
+ *and uses that as the substitution sequence
+ */
+U_CAPI void U_EXPORT2
+UCNV_TO_U_CALLBACK_ESCAPE (
+ const void *context,
+ UConverterToUnicodeArgs *toArgs,
+ const char* codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode * err)
+{
+ UChar uniValueString[VALUE_STRING_LENGTH];
+ int32_t valueStringLength = 0;
+ int32_t i = 0;
+
+ if (reason > UCNV_IRREGULAR)
+ {
+ return;
+ }
+
+ if(context==NULL)
+ {
+ while (i < length)
+ {
+ uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
+ uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */
+ valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
+ }
+ }
+ else
+ {
+ switch(*((char*)context))
+ {
+ case UCNV_PRV_ESCAPE_XML_DEC:
+ while (i < length)
+ {
+ uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
+ uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
+ valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0);
+ uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
+ }
+ break;
+
+ case UCNV_PRV_ESCAPE_XML_HEX:
+ while (i < length)
+ {
+ uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
+ uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
+ uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
+ valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0);
+ uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
+ }
+ break;
+ case UCNV_PRV_ESCAPE_C:
+ while (i < length)
+ {
+ uniValueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
+ uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
+ valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2);
+ }
+ break;
+ default:
+ while (i < length)
+ {
+ uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
+ uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */
+ uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
+ valueStringLength += 2;
+ }
+ }
+ }
+ /* reset the error */
+ *err = U_ZERO_ERROR;
+
+ ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err);
+}
+
+#endif
diff --git a/thirdparty/icu4c/common/ucnv_ext.cpp b/thirdparty/icu4c/common/ucnv_ext.cpp
new file mode 100644
index 0000000000..7dea4eef41
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnv_ext.cpp
@@ -0,0 +1,1143 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2003-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: ucnv_ext.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2003jun13
+* created by: Markus W. Scherer
+*
+* Conversion extensions
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
+
+#include "unicode/uset.h"
+#include "unicode/ustring.h"
+#include "ucnv_bld.h"
+#include "ucnv_cnv.h"
+#include "ucnv_ext.h"
+#include "cmemory.h"
+#include "uassert.h"
+
+/* to Unicode --------------------------------------------------------------- */
+
+/*
+ * @return lookup value for the byte, if found; else 0
+ */
+static inline uint32_t
+ucnv_extFindToU(const uint32_t *toUSection, int32_t length, uint8_t byte) {
+ uint32_t word0, word;
+ int32_t i, start, limit;
+
+ /* check the input byte against the lowest and highest section bytes */
+ start=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[0]);
+ limit=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[length-1]);
+ if(byte<start || limit<byte) {
+ return 0; /* the byte is out of range */
+ }
+
+ if(length==((limit-start)+1)) {
+ /* direct access on a linear array */
+ return UCNV_EXT_TO_U_GET_VALUE(toUSection[byte-start]); /* could be 0 */
+ }
+
+ /* word0 is suitable for <=toUSection[] comparison, word for <toUSection[] */
+ word0=UCNV_EXT_TO_U_MAKE_WORD(byte, 0);
+
+ /*
+ * Shift byte once instead of each section word and add 0xffffff.
+ * We will compare the shifted/added byte (bbffffff) against
+ * section words which have byte values in the same bit position.
+ * If and only if byte bb < section byte ss then bbffffff<ssvvvvvv
+ * for all v=0..f
+ * so we need not mask off the lower 24 bits of each section word.
+ */
+ word=word0|UCNV_EXT_TO_U_VALUE_MASK;
+
+ /* binary search */
+ start=0;
+ limit=length;
+ for(;;) {
+ i=limit-start;
+ if(i<=1) {
+ break; /* done */
+ }
+ /* start<limit-1 */
+
+ if(i<=4) {
+ /* linear search for the last part */
+ if(word0<=toUSection[start]) {
+ break;
+ }
+ if(++start<limit && word0<=toUSection[start]) {
+ break;
+ }
+ if(++start<limit && word0<=toUSection[start]) {
+ break;
+ }
+ /* always break at start==limit-1 */
+ ++start;
+ break;
+ }
+
+ i=(start+limit)/2;
+ if(word<toUSection[i]) {
+ limit=i;
+ } else {
+ start=i;
+ }
+ }
+
+ /* did we really find it? */
+ if(start<limit && byte==UCNV_EXT_TO_U_GET_BYTE(word=toUSection[start])) {
+ return UCNV_EXT_TO_U_GET_VALUE(word); /* never 0 */
+ } else {
+ return 0; /* not found */
+ }
+}
+
+/*
+ * TRUE if not an SI/SO stateful converter,
+ * or if the match length fits with the current converter state
+ */
+#define UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, match) \
+ ((sisoState)<0 || ((sisoState)==0) == (match==1))
+
+/*
+ * this works like ucnv_extMatchFromU() except
+ * - the first character is in pre
+ * - no trie is used
+ * - the returned matchLength is not offset by 2
+ */
+static int32_t
+ucnv_extMatchToU(const int32_t *cx, int8_t sisoState,
+ const char *pre, int32_t preLength,
+ const char *src, int32_t srcLength,
+ uint32_t *pMatchValue,
+ UBool /*useFallback*/, UBool flush) {
+ const uint32_t *toUTable, *toUSection;
+
+ uint32_t value, matchValue;
+ int32_t i, j, idx, length, matchLength;
+ uint8_t b;
+
+ if(cx==NULL || cx[UCNV_EXT_TO_U_LENGTH]<=0) {
+ return 0; /* no extension data, no match */
+ }
+
+ /* initialize */
+ toUTable=UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_INDEX, uint32_t);
+ idx=0;
+
+ matchValue=0;
+ i=j=matchLength=0;
+
+ if(sisoState==0) {
+ /* SBCS state of an SI/SO stateful converter, look at only exactly 1 byte */
+ if(preLength>1) {
+ return 0; /* no match of a DBCS sequence in SBCS mode */
+ } else if(preLength==1) {
+ srcLength=0;
+ } else /* preLength==0 */ {
+ if(srcLength>1) {
+ srcLength=1;
+ }
+ }
+ flush=TRUE;
+ }
+
+ /* we must not remember fallback matches when not using fallbacks */
+
+ /* match input units until there is a full match or the input is consumed */
+ for(;;) {
+ /* go to the next section */
+ toUSection=toUTable+idx;
+
+ /* read first pair of the section */
+ value=*toUSection++;
+ length=UCNV_EXT_TO_U_GET_BYTE(value);
+ value=UCNV_EXT_TO_U_GET_VALUE(value);
+ if( value!=0 &&
+ (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) ||
+ TO_U_USE_FALLBACK(useFallback)) &&
+ UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j)
+ ) {
+ /* remember longest match so far */
+ matchValue=value;
+ matchLength=i+j;
+ }
+
+ /* match pre[] then src[] */
+ if(i<preLength) {
+ b=(uint8_t)pre[i++];
+ } else if(j<srcLength) {
+ b=(uint8_t)src[j++];
+ } else {
+ /* all input consumed, partial match */
+ if(flush || (length=(i+j))>UCNV_EXT_MAX_BYTES) {
+ /*
+ * end of the entire input stream, stop with the longest match so far
+ * or: partial match must not be longer than UCNV_EXT_MAX_BYTES
+ * because it must fit into state buffers
+ */
+ break;
+ } else {
+ /* continue with more input next time */
+ return -length;
+ }
+ }
+
+ /* search for the current UChar */
+ value=ucnv_extFindToU(toUSection, length, b);
+ if(value==0) {
+ /* no match here, stop with the longest match so far */
+ break;
+ } else {
+ if(UCNV_EXT_TO_U_IS_PARTIAL(value)) {
+ /* partial match, continue */
+ idx=(int32_t)UCNV_EXT_TO_U_GET_PARTIAL_INDEX(value);
+ } else {
+ if( (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) ||
+ TO_U_USE_FALLBACK(useFallback)) &&
+ UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j)
+ ) {
+ /* full match, stop with result */
+ matchValue=value;
+ matchLength=i+j;
+ } else {
+ /* full match on fallback not taken, stop with the longest match so far */
+ }
+ break;
+ }
+ }
+ }
+
+ if(matchLength==0) {
+ /* no match at all */
+ return 0;
+ }
+
+ /* return result */
+ *pMatchValue=UCNV_EXT_TO_U_MASK_ROUNDTRIP(matchValue);
+ return matchLength;
+}
+
+static inline void
+ucnv_extWriteToU(UConverter *cnv, const int32_t *cx,
+ uint32_t value,
+ UChar **target, const UChar *targetLimit,
+ int32_t **offsets, int32_t srcIndex,
+ UErrorCode *pErrorCode) {
+ /* output the result */
+ if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) {
+ /* output a single code point */
+ ucnv_toUWriteCodePoint(
+ cnv, UCNV_EXT_TO_U_GET_CODE_POINT(value),
+ target, targetLimit,
+ offsets, srcIndex,
+ pErrorCode);
+ } else {
+ /* output a string - with correct data we have resultLength>0 */
+ ucnv_toUWriteUChars(
+ cnv,
+ UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_UCHARS_INDEX, UChar)+
+ UCNV_EXT_TO_U_GET_INDEX(value),
+ UCNV_EXT_TO_U_GET_LENGTH(value),
+ target, targetLimit,
+ offsets, srcIndex,
+ pErrorCode);
+ }
+}
+
+/*
+ * get the SI/SO toU state (state 0 is for SBCS, 1 for DBCS),
+ * or 1 for DBCS-only,
+ * or -1 if the converter is not SI/SO stateful
+ *
+ * Note: For SI/SO stateful converters getting here,
+ * cnv->mode==0 is equivalent to firstLength==1.
+ */
+#define UCNV_SISO_STATE(cnv) \
+ ((cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO ? (int8_t)(cnv)->mode : \
+ (cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? 1 : -1)
+
+/*
+ * target<targetLimit; set error code for overflow
+ */
+U_CFUNC UBool
+ucnv_extInitialMatchToU(UConverter *cnv, const int32_t *cx,
+ int32_t firstLength,
+ const char **src, const char *srcLimit,
+ UChar **target, const UChar *targetLimit,
+ int32_t **offsets, int32_t srcIndex,
+ UBool flush,
+ UErrorCode *pErrorCode) {
+ uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */
+ int32_t match;
+
+ /* try to match */
+ match=ucnv_extMatchToU(cx, (int8_t)UCNV_SISO_STATE(cnv),
+ (const char *)cnv->toUBytes, firstLength,
+ *src, (int32_t)(srcLimit-*src),
+ &value,
+ cnv->useFallback, flush);
+ if(match>0) {
+ /* advance src pointer for the consumed input */
+ *src+=match-firstLength;
+
+ /* write result to target */
+ ucnv_extWriteToU(cnv, cx,
+ value,
+ target, targetLimit,
+ offsets, srcIndex,
+ pErrorCode);
+ return TRUE;
+ } else if(match<0) {
+ /* save state for partial match */
+ const char *s;
+ int32_t j;
+
+ /* copy the first code point */
+ s=(const char *)cnv->toUBytes;
+ cnv->preToUFirstLength=(int8_t)firstLength;
+ for(j=0; j<firstLength; ++j) {
+ cnv->preToU[j]=*s++;
+ }
+
+ /* now copy the newly consumed input */
+ s=*src;
+ match=-match;
+ for(; j<match; ++j) {
+ cnv->preToU[j]=*s++;
+ }
+ *src=s; /* same as *src=srcLimit; because we reached the end of input */
+ cnv->preToULength=(int8_t)match;
+ return TRUE;
+ } else /* match==0 no match */ {
+ return FALSE;
+ }
+}
+
+U_CFUNC UChar32
+ucnv_extSimpleMatchToU(const int32_t *cx,
+ const char *source, int32_t length,
+ UBool useFallback) {
+ uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */
+ int32_t match;
+
+ if(length<=0) {
+ return 0xffff;
+ }
+
+ /* try to match */
+ match=ucnv_extMatchToU(cx, -1,
+ source, length,
+ NULL, 0,
+ &value,
+ useFallback, TRUE);
+ if(match==length) {
+ /* write result for simple, single-character conversion */
+ if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) {
+ return UCNV_EXT_TO_U_GET_CODE_POINT(value);
+ }
+ }
+
+ /*
+ * return no match because
+ * - match>0 && value points to string: simple conversion cannot handle multiple code points
+ * - match>0 && match!=length: not all input consumed, forbidden for this function
+ * - match==0: no match found in the first place
+ * - match<0: partial match, not supported for simple conversion (and flush==TRUE)
+ */
+ return 0xfffe;
+}
+
+/*
+ * continue partial match with new input
+ * never called for simple, single-character conversion
+ */
+U_CFUNC void
+ucnv_extContinueMatchToU(UConverter *cnv,
+ UConverterToUnicodeArgs *pArgs, int32_t srcIndex,
+ UErrorCode *pErrorCode) {
+ uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */
+ int32_t match, length;
+
+ match=ucnv_extMatchToU(cnv->sharedData->mbcs.extIndexes, (int8_t)UCNV_SISO_STATE(cnv),
+ cnv->preToU, cnv->preToULength,
+ pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source),
+ &value,
+ cnv->useFallback, pArgs->flush);
+ if(match>0) {
+ if(match>=cnv->preToULength) {
+ /* advance src pointer for the consumed input */
+ pArgs->source+=match-cnv->preToULength;
+ cnv->preToULength=0;
+ } else {
+ /* the match did not use all of preToU[] - keep the rest for replay */
+ length=cnv->preToULength-match;
+ uprv_memmove(cnv->preToU, cnv->preToU+match, length);
+ cnv->preToULength=(int8_t)-length;
+ }
+
+ /* write result */
+ ucnv_extWriteToU(cnv, cnv->sharedData->mbcs.extIndexes,
+ value,
+ &pArgs->target, pArgs->targetLimit,
+ &pArgs->offsets, srcIndex,
+ pErrorCode);
+ } else if(match<0) {
+ /* save state for partial match */
+ const char *s;
+ int32_t j;
+
+ /* just _append_ the newly consumed input to preToU[] */
+ s=pArgs->source;
+ match=-match;
+ for(j=cnv->preToULength; j<match; ++j) {
+ cnv->preToU[j]=*s++;
+ }
+ pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */
+ cnv->preToULength=(int8_t)match;
+ } else /* match==0 */ {
+ /*
+ * no match
+ *
+ * We need to split the previous input into two parts:
+ *
+ * 1. The first codepage character is unmappable - that's how we got into
+ * trying the extension data in the first place.
+ * We need to move it from the preToU buffer
+ * to the error buffer, set an error code,
+ * and prepare the rest of the previous input for 2.
+ *
+ * 2. The rest of the previous input must be converted once we
+ * come back from the callback for the first character.
+ * At that time, we have to try again from scratch to convert
+ * these input characters.
+ * The replay will be handled by the ucnv.c conversion code.
+ */
+
+ /* move the first codepage character to the error field */
+ uprv_memcpy(cnv->toUBytes, cnv->preToU, cnv->preToUFirstLength);
+ cnv->toULength=cnv->preToUFirstLength;
+
+ /* move the rest up inside the buffer */
+ length=cnv->preToULength-cnv->preToUFirstLength;
+ if(length>0) {
+ uprv_memmove(cnv->preToU, cnv->preToU+cnv->preToUFirstLength, length);
+ }
+
+ /* mark preToU for replay */
+ cnv->preToULength=(int8_t)-length;
+
+ /* set the error code for unassigned */
+ *pErrorCode=U_INVALID_CHAR_FOUND;
+ }
+}
+
+/* from Unicode ------------------------------------------------------------- */
+
+// Use roundtrips, "good one-way" mappings, and some normal fallbacks.
+static inline UBool
+extFromUUseMapping(UBool useFallback, uint32_t value, UChar32 firstCP) {
+ return
+ ((value&UCNV_EXT_FROM_U_STATUS_MASK)!=0 ||
+ FROM_U_USE_FALLBACK(useFallback, firstCP)) &&
+ (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0;
+}
+
+/*
+ * @return index of the UChar, if found; else <0
+ */
+static inline int32_t
+ucnv_extFindFromU(const UChar *fromUSection, int32_t length, UChar u) {
+ int32_t i, start, limit;
+
+ /* binary search */
+ start=0;
+ limit=length;
+ for(;;) {
+ i=limit-start;
+ if(i<=1) {
+ break; /* done */
+ }
+ /* start<limit-1 */
+
+ if(i<=4) {
+ /* linear search for the last part */
+ if(u<=fromUSection[start]) {
+ break;
+ }
+ if(++start<limit && u<=fromUSection[start]) {
+ break;
+ }
+ if(++start<limit && u<=fromUSection[start]) {
+ break;
+ }
+ /* always break at start==limit-1 */
+ ++start;
+ break;
+ }
+
+ i=(start+limit)/2;
+ if(u<fromUSection[i]) {
+ limit=i;
+ } else {
+ start=i;
+ }
+ }
+
+ /* did we really find it? */
+ if(start<limit && u==fromUSection[start]) {
+ return start;
+ } else {
+ return -1; /* not found */
+ }
+}
+
+/*
+ * @param cx pointer to extension data; if NULL, returns 0
+ * @param firstCP the first code point before all the other UChars
+ * @param pre UChars that must match; !initialMatch: partial match with them
+ * @param preLength length of pre, >=0
+ * @param src UChars that can be used to complete a match
+ * @param srcLength length of src, >=0
+ * @param pMatchValue [out] output result value for the match from the data structure
+ * @param useFallback "use fallback" flag, usually from cnv->useFallback
+ * @param flush TRUE if the end of the input stream is reached
+ * @return >1: matched, return value=total match length (number of input units matched)
+ * 1: matched, no mapping but request for <subchar1>
+ * (only for the first code point)
+ * 0: no match
+ * <0: partial match, return value=negative total match length
+ * (partial matches are never returned for flush==TRUE)
+ * (partial matches are never returned as being longer than UCNV_EXT_MAX_UCHARS)
+ * the matchLength is 2 if only firstCP matched, and >2 if firstCP and
+ * further code units matched
+ */
+static int32_t
+ucnv_extMatchFromU(const int32_t *cx,
+ UChar32 firstCP,
+ const UChar *pre, int32_t preLength,
+ const UChar *src, int32_t srcLength,
+ uint32_t *pMatchValue,
+ UBool useFallback, UBool flush) {
+ const uint16_t *stage12, *stage3;
+ const uint32_t *stage3b;
+
+ const UChar *fromUTableUChars, *fromUSectionUChars;
+ const uint32_t *fromUTableValues, *fromUSectionValues;
+
+ uint32_t value, matchValue;
+ int32_t i, j, idx, length, matchLength;
+ UChar c;
+
+ if(cx==NULL) {
+ return 0; /* no extension data, no match */
+ }
+
+ /* trie lookup of firstCP */
+ idx=firstCP>>10; /* stage 1 index */
+ if(idx>=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]) {
+ return 0; /* the first code point is outside the trie */
+ }
+
+ stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t);
+ stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t);
+ idx=UCNV_EXT_FROM_U(stage12, stage3, idx, firstCP);
+
+ stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t);
+ value=stage3b[idx];
+ if(value==0) {
+ return 0;
+ }
+
+ /*
+ * Tests for (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0:
+ * Do not interpret values with reserved bits used, for forward compatibility,
+ * and do not even remember intermediate results with reserved bits used.
+ */
+
+ if(UCNV_EXT_TO_U_IS_PARTIAL(value)) {
+ /* partial match, enter the loop below */
+ idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value);
+
+ /* initialize */
+ fromUTableUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar);
+ fromUTableValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t);
+
+ matchValue=0;
+ i=j=matchLength=0;
+
+ /* we must not remember fallback matches when not using fallbacks */
+
+ /* match input units until there is a full match or the input is consumed */
+ for(;;) {
+ /* go to the next section */
+ fromUSectionUChars=fromUTableUChars+idx;
+ fromUSectionValues=fromUTableValues+idx;
+
+ /* read first pair of the section */
+ length=*fromUSectionUChars++;
+ value=*fromUSectionValues++;
+ if(value!=0 && extFromUUseMapping(useFallback, value, firstCP)) {
+ /* remember longest match so far */
+ matchValue=value;
+ matchLength=2+i+j;
+ }
+
+ /* match pre[] then src[] */
+ if(i<preLength) {
+ c=pre[i++];
+ } else if(j<srcLength) {
+ c=src[j++];
+ } else {
+ /* all input consumed, partial match */
+ if(flush || (length=(i+j))>UCNV_EXT_MAX_UCHARS) {
+ /*
+ * end of the entire input stream, stop with the longest match so far
+ * or: partial match must not be longer than UCNV_EXT_MAX_UCHARS
+ * because it must fit into state buffers
+ */
+ break;
+ } else {
+ /* continue with more input next time */
+ return -(2+length);
+ }
+ }
+
+ /* search for the current UChar */
+ idx=ucnv_extFindFromU(fromUSectionUChars, length, c);
+ if(idx<0) {
+ /* no match here, stop with the longest match so far */
+ break;
+ } else {
+ value=fromUSectionValues[idx];
+ if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) {
+ /* partial match, continue */
+ idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value);
+ } else {
+ if(extFromUUseMapping(useFallback, value, firstCP)) {
+ /* full match, stop with result */
+ matchValue=value;
+ matchLength=2+i+j;
+ } else {
+ /* full match on fallback not taken, stop with the longest match so far */
+ }
+ break;
+ }
+ }
+ }
+
+ if(matchLength==0) {
+ /* no match at all */
+ return 0;
+ }
+ } else /* result from firstCP trie lookup */ {
+ if(extFromUUseMapping(useFallback, value, firstCP)) {
+ /* full match, stop with result */
+ matchValue=value;
+ matchLength=2;
+ } else {
+ /* fallback not taken */
+ return 0;
+ }
+ }
+
+ /* return result */
+ if(matchValue==UCNV_EXT_FROM_U_SUBCHAR1) {
+ return 1; /* assert matchLength==2 */
+ }
+
+ *pMatchValue=matchValue;
+ return matchLength;
+}
+
+/*
+ * @param value fromUnicode mapping table value; ignores roundtrip and reserved bits
+ */
+static inline void
+ucnv_extWriteFromU(UConverter *cnv, const int32_t *cx,
+ uint32_t value,
+ char **target, const char *targetLimit,
+ int32_t **offsets, int32_t srcIndex,
+ UErrorCode *pErrorCode) {
+ uint8_t buffer[1+UCNV_EXT_MAX_BYTES];
+ const uint8_t *result;
+ int32_t length, prevLength;
+
+ length=UCNV_EXT_FROM_U_GET_LENGTH(value);
+ value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value);
+
+ /* output the result */
+ if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) {
+ /*
+ * Generate a byte array and then write it below.
+ * This is not the fastest possible way, but it should be ok for
+ * extension mappings, and it is much simpler.
+ * Offset and overflow handling are only done once this way.
+ */
+ uint8_t *p=buffer+1; /* reserve buffer[0] for shiftByte below */
+ switch(length) {
+ case 3:
+ *p++=(uint8_t)(value>>16);
+ U_FALLTHROUGH;
+ case 2:
+ *p++=(uint8_t)(value>>8);
+ U_FALLTHROUGH;
+ case 1:
+ *p++=(uint8_t)value;
+ U_FALLTHROUGH;
+ default:
+ break; /* will never occur */
+ }
+ result=buffer+1;
+ } else {
+ result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value;
+ }
+
+ /* with correct data we have length>0 */
+
+ if((prevLength=cnv->fromUnicodeStatus)!=0) {
+ /* handle SI/SO stateful output */
+ uint8_t shiftByte;
+
+ if(prevLength>1 && length==1) {
+ /* change from double-byte mode to single-byte */
+ shiftByte=(uint8_t)UCNV_SI;
+ cnv->fromUnicodeStatus=1;
+ } else if(prevLength==1 && length>1) {
+ /* change from single-byte mode to double-byte */
+ shiftByte=(uint8_t)UCNV_SO;
+ cnv->fromUnicodeStatus=2;
+ } else {
+ shiftByte=0;
+ }
+
+ if(shiftByte!=0) {
+ /* prepend the shift byte to the result bytes */
+ buffer[0]=shiftByte;
+ if(result!=buffer+1) {
+ uprv_memcpy(buffer+1, result, length);
+ }
+ result=buffer;
+ ++length;
+ }
+ }
+
+ ucnv_fromUWriteBytes(cnv, (const char *)result, length,
+ target, targetLimit,
+ offsets, srcIndex,
+ pErrorCode);
+}
+
+/*
+ * target<targetLimit; set error code for overflow
+ */
+U_CFUNC UBool
+ucnv_extInitialMatchFromU(UConverter *cnv, const int32_t *cx,
+ UChar32 cp,
+ const UChar **src, const UChar *srcLimit,
+ char **target, const char *targetLimit,
+ int32_t **offsets, int32_t srcIndex,
+ UBool flush,
+ UErrorCode *pErrorCode) {
+ uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */
+ int32_t match;
+
+ /* try to match */
+ match=ucnv_extMatchFromU(cx, cp,
+ NULL, 0,
+ *src, (int32_t)(srcLimit-*src),
+ &value,
+ cnv->useFallback, flush);
+
+ /* reject a match if the result is a single byte for DBCS-only */
+ if( match>=2 &&
+ !(UCNV_EXT_FROM_U_GET_LENGTH(value)==1 &&
+ cnv->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY)
+ ) {
+ /* advance src pointer for the consumed input */
+ *src+=match-2; /* remove 2 for the initial code point */
+
+ /* write result to target */
+ ucnv_extWriteFromU(cnv, cx,
+ value,
+ target, targetLimit,
+ offsets, srcIndex,
+ pErrorCode);
+ return TRUE;
+ } else if(match<0) {
+ /* save state for partial match */
+ const UChar *s;
+ int32_t j;
+
+ /* copy the first code point */
+ cnv->preFromUFirstCP=cp;
+
+ /* now copy the newly consumed input */
+ s=*src;
+ match=-match-2; /* remove 2 for the initial code point */
+ for(j=0; j<match; ++j) {
+ cnv->preFromU[j]=*s++;
+ }
+ *src=s; /* same as *src=srcLimit; because we reached the end of input */
+ cnv->preFromULength=(int8_t)match;
+ return TRUE;
+ } else if(match==1) {
+ /* matched, no mapping but request for <subchar1> */
+ cnv->useSubChar1=TRUE;
+ return FALSE;
+ } else /* match==0 no match */ {
+ return FALSE;
+ }
+}
+
+/*
+ * Used by ISO 2022 implementation.
+ * @return number of bytes in *pValue; negative number if fallback; 0 for no mapping
+ */
+U_CFUNC int32_t
+ucnv_extSimpleMatchFromU(const int32_t *cx,
+ UChar32 cp, uint32_t *pValue,
+ UBool useFallback) {
+ uint32_t value;
+ int32_t match;
+
+ /* try to match */
+ match=ucnv_extMatchFromU(cx,
+ cp,
+ NULL, 0,
+ NULL, 0,
+ &value,
+ useFallback, TRUE);
+ if(match>=2) {
+ /* write result for simple, single-character conversion */
+ int32_t length;
+ int isRoundtrip;
+
+ isRoundtrip=UCNV_EXT_FROM_U_IS_ROUNDTRIP(value);
+ length=UCNV_EXT_FROM_U_GET_LENGTH(value);
+ value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value);
+
+ if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) {
+ *pValue=value;
+ return isRoundtrip ? length : -length;
+#if 0 /* not currently used */
+ } else if(length==4) {
+ /* de-serialize a 4-byte result */
+ const uint8_t *result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value;
+ *pValue=
+ ((uint32_t)result[0]<<24)|
+ ((uint32_t)result[1]<<16)|
+ ((uint32_t)result[2]<<8)|
+ result[3];
+ return isRoundtrip ? 4 : -4;
+#endif
+ }
+ }
+
+ /*
+ * return no match because
+ * - match>1 && resultLength>4: result too long for simple conversion
+ * - match==1: no match found, <subchar1> preferred
+ * - match==0: no match found in the first place
+ * - match<0: partial match, not supported for simple conversion (and flush==TRUE)
+ */
+ return 0;
+}
+
+/*
+ * continue partial match with new input, requires cnv->preFromUFirstCP>=0
+ * never called for simple, single-character conversion
+ */
+U_CFUNC void
+ucnv_extContinueMatchFromU(UConverter *cnv,
+ UConverterFromUnicodeArgs *pArgs, int32_t srcIndex,
+ UErrorCode *pErrorCode) {
+ uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */
+ int32_t match;
+
+ match=ucnv_extMatchFromU(cnv->sharedData->mbcs.extIndexes,
+ cnv->preFromUFirstCP,
+ cnv->preFromU, cnv->preFromULength,
+ pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source),
+ &value,
+ cnv->useFallback, pArgs->flush);
+ if(match>=2) {
+ match-=2; /* remove 2 for the initial code point */
+
+ if(match>=cnv->preFromULength) {
+ /* advance src pointer for the consumed input */
+ pArgs->source+=match-cnv->preFromULength;
+ cnv->preFromULength=0;
+ } else {
+ /* the match did not use all of preFromU[] - keep the rest for replay */
+ int32_t length=cnv->preFromULength-match;
+ u_memmove(cnv->preFromU, cnv->preFromU+match, length);
+ cnv->preFromULength=(int8_t)-length;
+ }
+
+ /* finish the partial match */
+ cnv->preFromUFirstCP=U_SENTINEL;
+
+ /* write result */
+ ucnv_extWriteFromU(cnv, cnv->sharedData->mbcs.extIndexes,
+ value,
+ &pArgs->target, pArgs->targetLimit,
+ &pArgs->offsets, srcIndex,
+ pErrorCode);
+ } else if(match<0) {
+ /* save state for partial match */
+ const UChar *s;
+ int32_t j;
+
+ /* just _append_ the newly consumed input to preFromU[] */
+ s=pArgs->source;
+ match=-match-2; /* remove 2 for the initial code point */
+ for(j=cnv->preFromULength; j<match; ++j) {
+ U_ASSERT(j>=0);
+ cnv->preFromU[j]=*s++;
+ }
+ pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */
+ cnv->preFromULength=(int8_t)match;
+ } else /* match==0 or 1 */ {
+ /*
+ * no match
+ *
+ * We need to split the previous input into two parts:
+ *
+ * 1. The first code point is unmappable - that's how we got into
+ * trying the extension data in the first place.
+ * We need to move it from the preFromU buffer
+ * to the error buffer, set an error code,
+ * and prepare the rest of the previous input for 2.
+ *
+ * 2. The rest of the previous input must be converted once we
+ * come back from the callback for the first code point.
+ * At that time, we have to try again from scratch to convert
+ * these input characters.
+ * The replay will be handled by the ucnv.c conversion code.
+ */
+
+ if(match==1) {
+ /* matched, no mapping but request for <subchar1> */
+ cnv->useSubChar1=TRUE;
+ }
+
+ /* move the first code point to the error field */
+ cnv->fromUChar32=cnv->preFromUFirstCP;
+ cnv->preFromUFirstCP=U_SENTINEL;
+
+ /* mark preFromU for replay */
+ cnv->preFromULength=-cnv->preFromULength;
+
+ /* set the error code for unassigned */
+ *pErrorCode=U_INVALID_CHAR_FOUND;
+ }
+}
+
+static UBool
+extSetUseMapping(UConverterUnicodeSet which, int32_t minLength, uint32_t value) {
+ if(which==UCNV_ROUNDTRIP_SET) {
+ // Add only code points for which the roundtrip flag is set.
+ // Do not add any fallbacks, even if ucnv_fromUnicode() would use them
+ // (fallbacks from PUA). See the API docs for ucnv_getUnicodeSet().
+ //
+ // By analogy, also do not add "good one-way" mappings.
+ //
+ // Do not add entries with reserved bits set.
+ if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))!=
+ UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) {
+ return FALSE;
+ }
+ } else /* UCNV_ROUNDTRIP_AND_FALLBACK_SET */ {
+ // Do not add entries with reserved bits set.
+ if((value&UCNV_EXT_FROM_U_RESERVED_MASK)!=0) {
+ return FALSE;
+ }
+ }
+ // Do not add <subchar1> entries or other (future?) pseudo-entries
+ // with an output length of 0.
+ return UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength;
+}
+
+static void
+ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData,
+ const int32_t *cx,
+ const USetAdder *sa,
+ UConverterUnicodeSet which,
+ int32_t minLength,
+ UChar32 firstCP,
+ UChar s[UCNV_EXT_MAX_UCHARS], int32_t length,
+ int32_t sectionIndex,
+ UErrorCode *pErrorCode) {
+ const UChar *fromUSectionUChars;
+ const uint32_t *fromUSectionValues;
+
+ uint32_t value;
+ int32_t i, count;
+
+ fromUSectionUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar)+sectionIndex;
+ fromUSectionValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t)+sectionIndex;
+
+ /* read first pair of the section */
+ count=*fromUSectionUChars++;
+ value=*fromUSectionValues++;
+
+ if(extSetUseMapping(which, minLength, value)) {
+ if(length==U16_LENGTH(firstCP)) {
+ /* add the initial code point */
+ sa->add(sa->set, firstCP);
+ } else {
+ /* add the string so far */
+ sa->addString(sa->set, s, length);
+ }
+ }
+
+ for(i=0; i<count; ++i) {
+ /* append this code unit and recurse or add the string */
+ s[length]=fromUSectionUChars[i];
+ value=fromUSectionValues[i];
+
+ if(value==0) {
+ /* no mapping, do nothing */
+ } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) {
+ ucnv_extGetUnicodeSetString(
+ sharedData, cx, sa, which, minLength,
+ firstCP, s, length+1,
+ (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
+ pErrorCode);
+ } else if(extSetUseMapping(which, minLength, value)) {
+ sa->addString(sa->set, s, length+1);
+ }
+ }
+}
+
+U_CFUNC void
+ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
+ const USetAdder *sa,
+ UConverterUnicodeSet which,
+ UConverterSetFilter filter,
+ UErrorCode *pErrorCode) {
+ const int32_t *cx;
+ const uint16_t *stage12, *stage3, *ps2, *ps3;
+ const uint32_t *stage3b;
+
+ uint32_t value;
+ int32_t st1, stage1Length, st2, st3, minLength;
+
+ UChar s[UCNV_EXT_MAX_UCHARS];
+ UChar32 c;
+ int32_t length;
+
+ cx=sharedData->mbcs.extIndexes;
+ if(cx==NULL) {
+ return;
+ }
+
+ stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t);
+ stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t);
+ stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t);
+
+ stage1Length=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH];
+
+ /* enumerate the from-Unicode trie table */
+ c=0; /* keep track of the current code point while enumerating */
+
+ if(filter==UCNV_SET_FILTER_2022_CN) {
+ minLength=3;
+ } else if( sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ||
+ filter!=UCNV_SET_FILTER_NONE
+ ) {
+ /* DBCS-only, ignore single-byte results */
+ minLength=2;
+ } else {
+ minLength=1;
+ }
+
+ /*
+ * the trie enumeration is almost the same as
+ * in MBCSGetUnicodeSet() for MBCS_OUTPUT_1
+ */
+ for(st1=0; st1<stage1Length; ++st1) {
+ st2=stage12[st1];
+ if(st2>stage1Length) {
+ ps2=stage12+st2;
+ for(st2=0; st2<64; ++st2) {
+ if((st3=(int32_t)ps2[st2]<<UCNV_EXT_STAGE_2_LEFT_SHIFT)!=0) {
+ /* read the stage 3 block */
+ ps3=stage3+st3;
+
+ do {
+ value=stage3b[*ps3++];
+ if(value==0) {
+ /* no mapping, do nothing */
+ } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) {
+ // Recurse for partial results.
+ length=0;
+ U16_APPEND_UNSAFE(s, length, c);
+ ucnv_extGetUnicodeSetString(
+ sharedData, cx, sa, which, minLength,
+ c, s, length,
+ (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
+ pErrorCode);
+ } else if(extSetUseMapping(which, minLength, value)) {
+ switch(filter) {
+ case UCNV_SET_FILTER_2022_CN:
+ if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==3 && UCNV_EXT_FROM_U_GET_DATA(value)<=0x82ffff)) {
+ continue;
+ }
+ break;
+ case UCNV_SET_FILTER_SJIS:
+ if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && (value=UCNV_EXT_FROM_U_GET_DATA(value))>=0x8140 && value<=0xeffc)) {
+ continue;
+ }
+ break;
+ case UCNV_SET_FILTER_GR94DBCS:
+ if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 &&
+ (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfefe - 0xa1a1) &&
+ (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) {
+ continue;
+ }
+ break;
+ case UCNV_SET_FILTER_HZ:
+ if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 &&
+ (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfdfe - 0xa1a1) &&
+ (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) {
+ continue;
+ }
+ break;
+ default:
+ /*
+ * UCNV_SET_FILTER_NONE,
+ * or UCNV_SET_FILTER_DBCS_ONLY which is handled via minLength
+ */
+ break;
+ }
+ sa->add(sa->set, c);
+ }
+ } while((++c&0xf)!=0);
+ } else {
+ c+=16; /* empty stage 3 block */
+ }
+ }
+ } else {
+ c+=1024; /* empty stage 2 block */
+ }
+ }
+}
+
+#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
diff --git a/thirdparty/icu4c/common/ucnv_ext.h b/thirdparty/icu4c/common/ucnv_ext.h
new file mode 100644
index 0000000000..dceea7ef12
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnv_ext.h
@@ -0,0 +1,481 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2003-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: ucnv_ext.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2003jun13
+* created by: Markus W. Scherer
+*
+* Conversion extensions
+*/
+
+#ifndef __UCNV_EXT_H__
+#define __UCNV_EXT_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/ucnv.h"
+#include "ucnv_cnv.h"
+
+/*
+ * See icuhtml/design/conversion/conversion_extensions.html
+ *
+ * Conversion extensions serve three purposes:
+ * 1. They support m:n mappings.
+ * 2. They support extension-only conversion files that are used together
+ * with the regular conversion data in base files.
+ * 3. They support mappings with more complicated meta data,
+ * for example "good one-way" mappings (|4).
+ *
+ * A base file may contain an extension table (explicitly requested or
+ * implicitly generated for m:n mappings), but its extension table is not
+ * used when an extension-only file is used.
+ *
+ * It is an error if a base file contains any regular (not extension) mapping
+ * from the same sequence as a mapping in the extension file
+ * because the base mapping would hide the extension mapping.
+ *
+ *
+ * Data for conversion extensions:
+ *
+ * One set of data structures per conversion direction (to/from Unicode).
+ * The data structures are sorted by input units to allow for binary search.
+ * Input sequences of more than one unit are handled like contraction tables
+ * in collation:
+ * The lookup value of a unit points to another table that is to be searched
+ * for the next unit, recursively.
+ *
+ * For conversion from Unicode, the initial code point is looked up in
+ * a 3-stage trie for speed,
+ * with an additional table of unique results to save space.
+ *
+ * Long output strings are stored in separate arrays, with length and index
+ * in the lookup tables.
+ * Output results also include a flag distinguishing roundtrip from
+ * (reverse) fallback mappings.
+ *
+ * Input Unicode strings must not begin or end with unpaired surrogates
+ * to avoid problems with matches on parts of surrogate pairs.
+ *
+ * Mappings from multiple characters (code points or codepage state
+ * table sequences) must be searched preferring the longest match.
+ * For this to work and be efficient, the variable-width table must contain
+ * all mappings that contain prefixes of the multiple characters.
+ * If an extension table is built on top of a base table in another file
+ * and a base table entry is a prefix of a multi-character mapping, then
+ * this is an error.
+ *
+ *
+ * Implementation note:
+ *
+ * Currently, the parser and several checks in the code limit the number
+ * of UChars or bytes in a mapping to
+ * UCNV_EXT_MAX_UCHARS and UCNV_EXT_MAX_BYTES, respectively,
+ * which are output value limits in the data structure.
+ *
+ * For input, this is not strictly necessary - it is a hard limit only for the
+ * buffers in UConverter that are used to store partial matches.
+ *
+ * Input sequences could otherwise be arbitrarily long if partial matches
+ * need not be stored (i.e., if a sequence does not span several buffers with too
+ * many units before the last buffer), although then results would differ
+ * depending on whether partial matches exceed the limits or not,
+ * which depends on the pattern of buffer sizes.
+ *
+ *
+ * Data structure:
+ *
+ * int32_t indexes[>=32];
+ *
+ * Array of indexes and lengths etc. The length of the array is at least 32.
+ * The actual length is stored in indexes[0] to be forward compatible.
+ *
+ * Each index to another array is the number of bytes from indexes[].
+ * Each length of an array is the number of array base units in that array.
+ *
+ * Some of the structures may not be present, in which case their indexes
+ * and lengths are 0.
+ *
+ * Usage of indexes[i]:
+ * [0] length of indexes[]
+ *
+ * // to Unicode table
+ * [1] index of toUTable[] (array of uint32_t)
+ * [2] length of toUTable[]
+ * [3] index of toUUChars[] (array of UChar)
+ * [4] length of toUUChars[]
+ *
+ * // from Unicode table, not for the initial code point
+ * [5] index of fromUTableUChars[] (array of UChar)
+ * [6] index of fromUTableValues[] (array of uint32_t)
+ * [7] length of fromUTableUChars[] and fromUTableValues[]
+ * [8] index of fromUBytes[] (array of char)
+ * [9] length of fromUBytes[]
+ *
+ * // from Unicode trie for initial-code point lookup
+ * [10] index of fromUStage12[] (combined array of uint16_t for stages 1 & 2)
+ * [11] length of stage 1 portion of fromUStage12[]
+ * [12] length of fromUStage12[]
+ * [13] index of fromUStage3[] (array of uint16_t indexes into fromUStage3b[])
+ * [14] length of fromUStage3[]
+ * [15] index of fromUStage3b[] (array of uint32_t like fromUTableValues[])
+ * [16] length of fromUStage3b[]
+ *
+ * [17] Bit field containing numbers of bytes:
+ * 31..24 reserved, 0
+ * 23..16 maximum input bytes
+ * 15.. 8 maximum output bytes
+ * 7.. 0 maximum bytes per UChar
+ *
+ * [18] Bit field containing numbers of UChars:
+ * 31..24 reserved, 0
+ * 23..16 maximum input UChars
+ * 15.. 8 maximum output UChars
+ * 7.. 0 maximum UChars per byte
+ *
+ * [19] Bit field containing flags:
+ * (extension table unicodeMask)
+ * 1 UCNV_HAS_SURROGATES flag for the extension table
+ * 0 UCNV_HAS_SUPPLEMENTARY flag for the extension table
+ *
+ * [20]..[30] reserved, 0
+ * [31] number of bytes for the entire extension structure
+ * [>31] reserved; there are indexes[0] indexes
+ *
+ *
+ * uint32_t toUTable[];
+ *
+ * Array of byte/value pairs for lookups for toUnicode conversion.
+ * The array is partitioned into sections like collation contraction tables.
+ * Each section contains one word with the number of following words and
+ * a default value for when the lookup in this section yields no match.
+ *
+ * A section is sorted in ascending order of input bytes,
+ * allowing for fast linear or binary searches.
+ * The builder may store entries for a contiguous range of byte values
+ * (compare difference between the first and last one with count),
+ * which then allows for direct array access.
+ * The builder should always do this for the initial table section.
+ *
+ * Entries may have 0 values, see below.
+ * No two entries in a section have the same byte values.
+ *
+ * Each uint32_t contains an input byte value in bits 31..24 and the
+ * corresponding lookup value in bits 23..0.
+ * Interpret the value as follows:
+ * if(value==0) {
+ * no match, see below
+ * } else if(value<0x1f0000) {
+ * partial match - use value as index to the next toUTable section
+ * and match the next unit; (value indexes toUTable[value])
+ * } else {
+ * if(bit 23 set) {
+ * roundtrip;
+ * } else {
+ * fallback;
+ * }
+ * unset value bit 23;
+ * if(value<=0x2fffff) {
+ * (value-0x1f0000) is a code point; (BMP: value<=0x1fffff)
+ * } else {
+ * bits 17..0 (value&0x3ffff) is an index to
+ * the result UChars in toUUChars[]; (0 indexes toUUChars[0])
+ * length of the result=((value>>18)-12); (length=0..19)
+ * }
+ * }
+ *
+ * The first word in a section contains the number of following words in the
+ * input byte position (bits 31..24, number=1..0xff).
+ * The value of the initial word is used when the current byte is not found
+ * in this section.
+ * If the value is not 0, then it represents a result as above.
+ * If the value is 0, then the search has to return a shorter match with an
+ * earlier default value as the result, or result in "unmappable" even for the
+ * initial bytes.
+ * If the value is 0 for the initial toUTable entry, then the initial byte
+ * does not start any mapping input.
+ *
+ *
+ * UChar toUUChars[];
+ *
+ * Contains toUnicode mapping results, stored as sequences of UChars.
+ * Indexes and lengths stored in the toUTable[].
+ *
+ *
+ * UChar fromUTableUChars[];
+ * uint32_t fromUTableValues[];
+ *
+ * The fromUTable is split into two arrays, but works otherwise much like
+ * the toUTable. The array is partitioned into sections like collation
+ * contraction tables and toUTable.
+ * A row in the table consists of same-index entries in fromUTableUChars[]
+ * and fromUTableValues[].
+ *
+ * Interpret a value as follows:
+ * if(value==0) {
+ * no match, see below
+ * } else if(value<=0xffffff) { (bits 31..24 are 0)
+ * partial match - use value as index to the next fromUTable section
+ * and match the next unit; (value indexes fromUTable[value])
+ * } else {
+ * if(value==0x80000001) {
+ * return no mapping, but request for <subchar1>;
+ * }
+ * if(bit 31 set) {
+ * roundtrip (|0);
+ * } else if(bit 30 set) {
+ * "good one-way" mapping (|4); -- new in ICU4C 51, _MBCSHeader.version 5.4/4.4
+ * } else {
+ * normal fallback (|1);
+ * }
+ * // bit 29 reserved, 0
+ * length=(value>>24)&0x1f; (bits 28..24)
+ * if(length==1..3) {
+ * bits 23..0 contain 1..3 bytes, padded with 00s on the left;
+ * } else {
+ * bits 23..0 (value&0xffffff) is an index to
+ * the result bytes in fromUBytes[]; (0 indexes fromUBytes[0])
+ * }
+ * }
+ *
+ * The first pair in a section contains the number of following pairs in the
+ * UChar position (16 bits, number=1..0xffff).
+ * The value of the initial pair is used when the current UChar is not found
+ * in this section.
+ * If the value is not 0, then it represents a result as above.
+ * If the value is 0, then the search has to return a shorter match with an
+ * earlier default value as the result, or result in "unmappable" even for the
+ * initial UChars.
+ *
+ * If the from Unicode trie is present, then the from Unicode search tables
+ * are not used for initial code points.
+ * In this case, the first entries (index 0) in the tables are not used
+ * (reserved, set to 0) because a value of 0 is used in trie results
+ * to indicate no mapping.
+ *
+ *
+ * uint16_t fromUStage12[];
+ *
+ * Stages 1 & 2 of a trie that maps an initial code point.
+ * Indexes in stage 1 are all offset by the length of stage 1 so that the
+ * same array pointer can be used for both stages.
+ * If (c>>10)>=(length of stage 1) then c does not start any mapping.
+ * Same bit distribution as for regular conversion tries.
+ *
+ *
+ * uint16_t fromUStage3[];
+ * uint32_t fromUStage3b[];
+ *
+ * Stage 3 of the trie. The first array simply contains indexes to the second,
+ * which contains words in the same format as fromUTableValues[].
+ * Use a stage 3 granularity of 4, which allows for 256k stage 3 entries,
+ * and 16-bit entries in stage 3 allow for 64k stage 3b entries.
+ * The stage 3 granularity means that the stage 2 entry needs to be left-shifted.
+ *
+ * Two arrays are used because it is expected that more than half of the stage 3
+ * entries will be zero. The 16-bit index stage 3 array saves space even
+ * considering storing a total of 6 bytes per non-zero entry in both arrays
+ * together.
+ * Using a stage 3 granularity of >1 diminishes the compactability in that stage
+ * but provides a larger effective addressing space in stage 2.
+ * All but the final result stage use 16-bit entries to save space.
+ *
+ * fromUStage3b[] contains a zero for "no mapping" at its index 0,
+ * and may contain UCNV_EXT_FROM_U_SUBCHAR1 at index 1 for "<subchar1> SUB mapping"
+ * (i.e., "no mapping" with preference for <subchar1> rather than <subchar>),
+ * and all other items are unique non-zero results.
+ *
+ * The default value of a fromUTableValues[] section that is referenced
+ * _directly_ from a fromUStage3b[] item may also be UCNV_EXT_FROM_U_SUBCHAR1,
+ * but this value must not occur anywhere else in fromUTableValues[]
+ * because "no mapping" is always a property of a single code point,
+ * never of multiple.
+ *
+ *
+ * char fromUBytes[];
+ *
+ * Contains fromUnicode mapping results, stored as sequences of chars.
+ * Indexes and lengths stored in the fromUTableValues[].
+ */
+enum {
+ UCNV_EXT_INDEXES_LENGTH, /* 0 */
+
+ UCNV_EXT_TO_U_INDEX, /* 1 */
+ UCNV_EXT_TO_U_LENGTH,
+ UCNV_EXT_TO_U_UCHARS_INDEX,
+ UCNV_EXT_TO_U_UCHARS_LENGTH,
+
+ UCNV_EXT_FROM_U_UCHARS_INDEX, /* 5 */
+ UCNV_EXT_FROM_U_VALUES_INDEX,
+ UCNV_EXT_FROM_U_LENGTH,
+ UCNV_EXT_FROM_U_BYTES_INDEX,
+ UCNV_EXT_FROM_U_BYTES_LENGTH,
+
+ UCNV_EXT_FROM_U_STAGE_12_INDEX, /* 10 */
+ UCNV_EXT_FROM_U_STAGE_1_LENGTH,
+ UCNV_EXT_FROM_U_STAGE_12_LENGTH,
+ UCNV_EXT_FROM_U_STAGE_3_INDEX,
+ UCNV_EXT_FROM_U_STAGE_3_LENGTH,
+ UCNV_EXT_FROM_U_STAGE_3B_INDEX,
+ UCNV_EXT_FROM_U_STAGE_3B_LENGTH,
+
+ UCNV_EXT_COUNT_BYTES, /* 17 */
+ UCNV_EXT_COUNT_UCHARS,
+ UCNV_EXT_FLAGS,
+
+ UCNV_EXT_RESERVED_INDEX, /* 20, moves with additional indexes */
+
+ UCNV_EXT_SIZE=31,
+ UCNV_EXT_INDEXES_MIN_LENGTH=32
+};
+
+/* get the pointer to an extension array from indexes[index] */
+#define UCNV_EXT_ARRAY(indexes, index, itemType) \
+ ((const itemType *)((const char *)(indexes)+(indexes)[index]))
+
+#define UCNV_GET_MAX_BYTES_PER_UCHAR(indexes) \
+ ((indexes)[UCNV_EXT_COUNT_BYTES]&0xff)
+
+/* internal API ------------------------------------------------------------- */
+
+U_CFUNC UBool
+ucnv_extInitialMatchToU(UConverter *cnv, const int32_t *cx,
+ int32_t firstLength,
+ const char **src, const char *srcLimit,
+ UChar **target, const UChar *targetLimit,
+ int32_t **offsets, int32_t srcIndex,
+ UBool flush,
+ UErrorCode *pErrorCode);
+
+U_CFUNC UChar32
+ucnv_extSimpleMatchToU(const int32_t *cx,
+ const char *source, int32_t length,
+ UBool useFallback);
+
+U_CFUNC void
+ucnv_extContinueMatchToU(UConverter *cnv,
+ UConverterToUnicodeArgs *pArgs, int32_t srcIndex,
+ UErrorCode *pErrorCode);
+
+
+U_CFUNC UBool
+ucnv_extInitialMatchFromU(UConverter *cnv, const int32_t *cx,
+ UChar32 cp,
+ const UChar **src, const UChar *srcLimit,
+ char **target, const char *targetLimit,
+ int32_t **offsets, int32_t srcIndex,
+ UBool flush,
+ UErrorCode *pErrorCode);
+
+U_CFUNC int32_t
+ucnv_extSimpleMatchFromU(const int32_t *cx,
+ UChar32 cp, uint32_t *pValue,
+ UBool useFallback);
+
+U_CFUNC void
+ucnv_extContinueMatchFromU(UConverter *cnv,
+ UConverterFromUnicodeArgs *pArgs, int32_t srcIndex,
+ UErrorCode *pErrorCode);
+
+/*
+ * Add code points and strings to the set according to the extension mappings.
+ * Limitation on the UConverterSetFilter:
+ * The filters currently assume that they are used with 1:1 mappings.
+ * They only apply to single input code points, and then they pass through
+ * only mappings with single-charset-code results.
+ * For example, the Shift-JIS filter only works for 2-byte results and tests
+ * that those 2 bytes are in the JIS X 0208 range of Shift-JIS.
+ */
+U_CFUNC void
+ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
+ const USetAdder *sa,
+ UConverterUnicodeSet which,
+ UConverterSetFilter filter,
+ UErrorCode *pErrorCode);
+
+/* toUnicode helpers -------------------------------------------------------- */
+
+#define UCNV_EXT_TO_U_BYTE_SHIFT 24
+#define UCNV_EXT_TO_U_VALUE_MASK 0xffffff
+#define UCNV_EXT_TO_U_MIN_CODE_POINT 0x1f0000
+#define UCNV_EXT_TO_U_MAX_CODE_POINT 0x2fffff
+#define UCNV_EXT_TO_U_ROUNDTRIP_FLAG ((uint32_t)1<<23)
+#define UCNV_EXT_TO_U_INDEX_MASK 0x3ffff
+#define UCNV_EXT_TO_U_LENGTH_SHIFT 18
+#define UCNV_EXT_TO_U_LENGTH_OFFSET 12
+
+/* maximum number of indexed UChars */
+#define UCNV_EXT_MAX_UCHARS 19
+
+#define UCNV_EXT_TO_U_MAKE_WORD(byte, value) (((uint32_t)(byte)<<UCNV_EXT_TO_U_BYTE_SHIFT)|(value))
+
+#define UCNV_EXT_TO_U_GET_BYTE(word) ((word)>>UCNV_EXT_TO_U_BYTE_SHIFT)
+#define UCNV_EXT_TO_U_GET_VALUE(word) ((word)&UCNV_EXT_TO_U_VALUE_MASK)
+
+#define UCNV_EXT_TO_U_IS_PARTIAL(value) ((value)<UCNV_EXT_TO_U_MIN_CODE_POINT)
+#define UCNV_EXT_TO_U_GET_PARTIAL_INDEX(value) (value)
+
+#define UCNV_EXT_TO_U_IS_ROUNDTRIP(value) (((value)&UCNV_EXT_TO_U_ROUNDTRIP_FLAG)!=0)
+#define UCNV_EXT_TO_U_MASK_ROUNDTRIP(value) ((value)&~UCNV_EXT_TO_U_ROUNDTRIP_FLAG)
+
+/* use after masking off the roundtrip flag */
+#define UCNV_EXT_TO_U_IS_CODE_POINT(value) ((value)<=UCNV_EXT_TO_U_MAX_CODE_POINT)
+#define UCNV_EXT_TO_U_GET_CODE_POINT(value) ((value)-UCNV_EXT_TO_U_MIN_CODE_POINT)
+
+#define UCNV_EXT_TO_U_GET_INDEX(value) ((value)&UCNV_EXT_TO_U_INDEX_MASK)
+#define UCNV_EXT_TO_U_GET_LENGTH(value) (((value)>>UCNV_EXT_TO_U_LENGTH_SHIFT)-UCNV_EXT_TO_U_LENGTH_OFFSET)
+
+/* fromUnicode helpers ------------------------------------------------------ */
+
+/* most trie constants are shared with ucnvmbcs.h */
+
+/* see similar utrie.h UTRIE_INDEX_SHIFT and UTRIE_DATA_GRANULARITY */
+#define UCNV_EXT_STAGE_2_LEFT_SHIFT 2
+#define UCNV_EXT_STAGE_3_GRANULARITY 4
+
+/* trie access, returns the stage 3 value=index to stage 3b; s1Index=c>>10 */
+#define UCNV_EXT_FROM_U(stage12, stage3, s1Index, c) \
+ (stage3)[ ((int32_t)(stage12)[ (stage12)[s1Index] +(((c)>>4)&0x3f) ]<<UCNV_EXT_STAGE_2_LEFT_SHIFT) +((c)&0xf) ]
+
+#define UCNV_EXT_FROM_U_LENGTH_SHIFT 24
+#define UCNV_EXT_FROM_U_ROUNDTRIP_FLAG ((uint32_t)1<<31)
+#define UCNV_EXT_FROM_U_GOOD_ONE_WAY_FLAG 0x40000000
+#define UCNV_EXT_FROM_U_STATUS_MASK 0xc0000000
+#define UCNV_EXT_FROM_U_RESERVED_MASK 0x20000000
+#define UCNV_EXT_FROM_U_DATA_MASK 0xffffff
+
+/* special value for "no mapping" to <subchar1> (impossible roundtrip to 0 bytes, value 01) */
+#define UCNV_EXT_FROM_U_SUBCHAR1 0x80000001
+
+/* at most 3 bytes in the lower part of the value */
+#define UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH 3
+
+/* maximum number of indexed bytes */
+#define UCNV_EXT_MAX_BYTES 0x1f
+
+#define UCNV_EXT_FROM_U_IS_PARTIAL(value) (((value)>>UCNV_EXT_FROM_U_LENGTH_SHIFT)==0)
+#define UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value) (value)
+
+#define UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) (((value)&UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)!=0)
+#define UCNV_EXT_FROM_U_MASK_ROUNDTRIP(value) ((value)&~UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)
+
+/* get length; masks away all other bits */
+#define UCNV_EXT_FROM_U_GET_LENGTH(value) (int32_t)(((value)>>UCNV_EXT_FROM_U_LENGTH_SHIFT)&UCNV_EXT_MAX_BYTES)
+
+/* get bytes or bytes index */
+#define UCNV_EXT_FROM_U_GET_DATA(value) ((value)&UCNV_EXT_FROM_U_DATA_MASK)
+
+#endif
+
+#endif
diff --git a/thirdparty/icu4c/common/ucnv_imp.h b/thirdparty/icu4c/common/ucnv_imp.h
new file mode 100644
index 0000000000..c5e6aeb47e
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnv_imp.h
@@ -0,0 +1,139 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1999-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+*
+* ucnv_imp.h:
+* Contains all internal and external data structure definitions
+* Created & Maitained by Bertrand A. Damiba
+*
+*
+*
+* ATTENTION:
+* ---------
+* Although the data structures in this file are open and stack allocatable
+* we reserve the right to hide them in further releases.
+*/
+
+#ifndef UCNV_IMP_H
+#define UCNV_IMP_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/uloc.h"
+#include "ucnv_bld.h"
+
+/*
+ * Fast check for whether a charset name is "UTF-8".
+ * This does not recognize all of the variations that ucnv_open()
+ * and other functions recognize, but it covers most cases.
+ * @param name const char * charset name
+ * @return
+ */
+#define UCNV_FAST_IS_UTF8(name) \
+ (((name[0]=='U' ? \
+ ( name[1]=='T' && name[2]=='F') : \
+ (name[0]=='u' && name[1]=='t' && name[2]=='f'))) \
+ && (name[3]=='-' ? \
+ (name[4]=='8' && name[5]==0) : \
+ (name[3]=='8' && name[4]==0)))
+
+typedef struct {
+ char cnvName[UCNV_MAX_CONVERTER_NAME_LENGTH];
+ char locale[ULOC_FULLNAME_CAPACITY];
+ uint32_t options;
+} UConverterNamePieces;
+
+U_CFUNC UBool
+ucnv_canCreateConverter(const char *converterName, UErrorCode *err);
+
+/* figures out if we need to go to file to read in the data tables.
+ * @param converterName The name of the converter
+ * @param err The error code
+ * @return the newly created converter
+ */
+U_CAPI UConverter *
+ucnv_createConverter(UConverter *myUConverter, const char *converterName, UErrorCode * err);
+
+/*
+ * Open a purely algorithmic converter, specified by a type constant.
+ * @param myUConverter NULL, or pre-allocated UConverter structure to avoid
+ * a memory allocation
+ * @param type requested converter type
+ * @param locale locale parameter, or ""
+ * @param options converter options bit set (default 0)
+ * @param err ICU error code, not tested for U_FAILURE on input
+ * because this is an internal function
+ * @internal
+ */
+U_CFUNC UConverter *
+ucnv_createAlgorithmicConverter(UConverter *myUConverter,
+ UConverterType type,
+ const char *locale, uint32_t options,
+ UErrorCode *err);
+
+/*
+ * Creates a converter from shared data.
+ * Adopts mySharedConverterData: No matter what happens, the caller must not
+ * unload mySharedConverterData, except via ucnv_close(return value)
+ * if this function is successful.
+ */
+U_CFUNC UConverter *
+ucnv_createConverterFromSharedData(UConverter *myUConverter,
+ UConverterSharedData *mySharedConverterData,
+ UConverterLoadArgs *pArgs,
+ UErrorCode *err);
+
+U_CFUNC UConverter *
+ucnv_createConverterFromPackage(const char *packageName, const char *converterName, UErrorCode *err);
+
+/**
+ * Load a converter but do not create a UConverter object.
+ * Simply return the UConverterSharedData.
+ * Performs alias lookup etc.
+ * The UConverterNamePieces need not be initialized
+ * before calling this function.
+ * The UConverterLoadArgs must be initialized
+ * before calling this function.
+ * If the args are passed in, then the pieces must be passed in too.
+ * In other words, the following combinations are allowed:
+ * - pieces==NULL && args==NULL
+ * - pieces!=NULL && args==NULL
+ * - pieces!=NULL && args!=NULL
+ * @internal
+ */
+U_CFUNC UConverterSharedData *
+ucnv_loadSharedData(const char *converterName,
+ UConverterNamePieces *pieces,
+ UConverterLoadArgs *pArgs,
+ UErrorCode * err);
+
+/**
+ * This may unload the shared data in a thread safe manner.
+ * This will only unload the data if no other converters are sharing it.
+ */
+U_CFUNC void
+ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData);
+
+/**
+ * This is a thread safe way to increment the reference count.
+ */
+U_CFUNC void
+ucnv_incrementRefCount(UConverterSharedData *sharedData);
+
+/**
+ * These are the default error handling callbacks for the charset conversion framework.
+ * For performance reasons, they are only called to handle an error (not normally called for a reset or close).
+ */
+#define UCNV_TO_U_DEFAULT_CALLBACK ((UConverterToUCallback) UCNV_TO_U_CALLBACK_SUBSTITUTE)
+#define UCNV_FROM_U_DEFAULT_CALLBACK ((UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE)
+
+#endif
+
+#endif /* _UCNV_IMP */
diff --git a/thirdparty/icu4c/common/ucnv_io.cpp b/thirdparty/icu4c/common/ucnv_io.cpp
new file mode 100644
index 0000000000..7a95a3f1e6
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnv_io.cpp
@@ -0,0 +1,1360 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+*
+* ucnv_io.cpp:
+* initializes global variables and defines functions pertaining to converter
+* name resolution aspect of the conversion code.
+*
+* new implementation:
+*
+* created on: 1999nov22
+* created by: Markus W. Scherer
+*
+* Use the binary cnvalias.icu (created from convrtrs.txt) to work
+* with aliases for converter names.
+*
+* Date Name Description
+* 11/22/1999 markus Created
+* 06/28/2002 grhoten Major overhaul of the converter alias design.
+* Now an alias can map to different converters
+* depending on the specified standard.
+*******************************************************************************
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/ucnv.h"
+#include "unicode/udata.h"
+
+#include "umutex.h"
+#include "uarrsort.h"
+#include "uassert.h"
+#include "udataswp.h"
+#include "cstring.h"
+#include "cmemory.h"
+#include "ucnv_io.h"
+#include "uenumimp.h"
+#include "ucln_cmn.h"
+
+/* Format of cnvalias.icu -----------------------------------------------------
+ *
+ * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt.
+ * This binary form contains several tables. All indexes are to uint16_t
+ * units, and not to the bytes (uint8_t units). Addressing everything on
+ * 16-bit boundaries allows us to store more information with small index
+ * numbers, which are also 16-bit in size. The majority of the table (except
+ * the string table) are 16-bit numbers.
+ *
+ * First there is the size of the Table of Contents (TOC). The TOC
+ * entries contain the size of each section. In order to find the offset
+ * you just need to sum up the previous offsets.
+ * The TOC length and entries are an array of uint32_t values.
+ * The first section after the TOC starts immediately after the TOC.
+ *
+ * 1) This section contains a list of converters. This list contains indexes
+ * into the string table for the converter name. The index of this list is
+ * also used by other sections, which are mentioned later on.
+ * This list is not sorted.
+ *
+ * 2) This section contains a list of tags. This list contains indexes
+ * into the string table for the tag name. The index of this list is
+ * also used by other sections, which are mentioned later on.
+ * This list is in priority order of standards.
+ *
+ * 3) This section contains a list of sorted unique aliases. This
+ * list contains indexes into the string table for the alias name. The
+ * index of this list is also used by other sections, like the 4th section.
+ * The index for the 3rd and 4th section is used to get the
+ * alias -> converter name mapping. Section 3 and 4 form a two column table.
+ * Some of the most significant bits of each index may contain other
+ * information (see findConverter for details).
+ *
+ * 4) This section contains a list of mapped converter names. Consider this
+ * as a table that maps the 3rd section to the 1st section. This list contains
+ * indexes into the 1st section. The index of this list is the same index in
+ * the 3rd section. There is also some extra information in the high bits of
+ * each converter index in this table. Currently it's only used to say that
+ * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
+ * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
+ * the predigested form of the 5th section so that an alias lookup can be fast.
+ *
+ * 5) This section contains a 2D array with indexes to the 6th section. This
+ * section is the full form of all alias mappings. The column index is the
+ * index into the converter list (column header). The row index is the index
+ * to tag list (row header). This 2D array is the top part a 3D array. The
+ * third dimension is in the 6th section.
+ *
+ * 6) This is blob of variable length arrays. Each array starts with a size,
+ * and is followed by indexes to alias names in the string table. This is
+ * the third dimension to the section 5. No other section should be referencing
+ * this section.
+ *
+ * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its
+ * presence indicates that a section 9 exists. UConverterAliasOptions specifies
+ * what type of string normalization is used among other potential things in the
+ * future.
+ *
+ * 8) This is the string table. All strings are indexed on an even address.
+ * There are two reasons for this. First many chip architectures locate strings
+ * faster on even address boundaries. Second, since all indexes are 16-bit
+ * numbers, this string table can be 128KB in size instead of 64KB when we
+ * only have strings starting on an even address.
+ *
+ * 9) When present this is a set of prenormalized strings from section 8. This
+ * table contains normalized strings with the dashes and spaces stripped out,
+ * and all strings lowercased. In the future, the options in section 7 may state
+ * other types of normalization.
+ *
+ * Here is the concept of section 5 and 6. It's a 3D cube. Each tag
+ * has a unique alias among all converters. That same alias can
+ * be mentioned in other standards on different converters,
+ * but only one alias per tag can be unique.
+ *
+ *
+ * Converter Names (Usually in TR22 form)
+ * -------------------------------------------.
+ * T / /|
+ * a / / |
+ * g / / |
+ * s / / |
+ * / / |
+ * ------------------------------------------/ |
+ * A | | |
+ * l | | |
+ * i | | /
+ * a | | /
+ * s | | /
+ * e | | /
+ * s | |/
+ * -------------------------------------------
+ *
+ *
+ *
+ * Here is what it really looks like. It's like swiss cheese.
+ * There are holes. Some converters aren't recognized by
+ * a standard, or they are really old converters that the
+ * standard doesn't recognize anymore.
+ *
+ * Converter Names (Usually in TR22 form)
+ * -------------------------------------------.
+ * T /##########################################/|
+ * a / # # /#
+ * g / # ## ## ### # ### ### ### #/
+ * s / # ##### #### ## ## #/#
+ * / ### # # ## # # # ### # # #/##
+ * ------------------------------------------/# #
+ * A |### # # ## # # # ### # # #|# #
+ * l |# # # # # ## # #|# #
+ * i |# # # # # # #|#
+ * a |# #|#
+ * s | #|#
+ * e
+ * s
+ *
+ */
+
+/**
+ * Used by the UEnumeration API
+ */
+typedef struct UAliasContext {
+ uint32_t listOffset;
+ uint32_t listIdx;
+} UAliasContext;
+
+static const char DATA_NAME[] = "cnvalias";
+static const char DATA_TYPE[] = "icu";
+
+static UDataMemory *gAliasData=NULL;
+static icu::UInitOnce gAliasDataInitOnce = U_INITONCE_INITIALIZER;
+
+enum {
+ tocLengthIndex=0,
+ converterListIndex=1,
+ tagListIndex=2,
+ aliasListIndex=3,
+ untaggedConvArrayIndex=4,
+ taggedAliasArrayIndex=5,
+ taggedAliasListsIndex=6,
+ tableOptionsIndex=7,
+ stringTableIndex=8,
+ normalizedStringTableIndex=9,
+ offsetsCount, /* length of the swapper's temporary offsets[] */
+ minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */
+};
+
+static const UConverterAliasOptions defaultTableOptions = {
+ UCNV_IO_UNNORMALIZED,
+ 0 /* containsCnvOptionInfo */
+};
+static UConverterAlias gMainTable;
+
+#define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx))
+#define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx))
+
+static UBool U_CALLCONV
+isAcceptable(void * /*context*/,
+ const char * /*type*/, const char * /*name*/,
+ const UDataInfo *pInfo) {
+ return (UBool)(
+ pInfo->size>=20 &&
+ pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
+ pInfo->charsetFamily==U_CHARSET_FAMILY &&
+ pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */
+ pInfo->dataFormat[1]==0x76 &&
+ pInfo->dataFormat[2]==0x41 &&
+ pInfo->dataFormat[3]==0x6c &&
+ pInfo->formatVersion[0]==3);
+}
+
+static UBool U_CALLCONV ucnv_io_cleanup(void)
+{
+ if (gAliasData) {
+ udata_close(gAliasData);
+ gAliasData = NULL;
+ }
+ gAliasDataInitOnce.reset();
+
+ uprv_memset(&gMainTable, 0, sizeof(gMainTable));
+
+ return TRUE; /* Everything was cleaned up */
+}
+
+static void U_CALLCONV initAliasData(UErrorCode &errCode) {
+ UDataMemory *data;
+ const uint16_t *table;
+ const uint32_t *sectionSizes;
+ uint32_t tableStart;
+ uint32_t currOffset;
+
+ ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup);
+
+ U_ASSERT(gAliasData == NULL);
+ data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &errCode);
+ if(U_FAILURE(errCode)) {
+ return;
+ }
+
+ sectionSizes = (const uint32_t *)udata_getMemory(data);
+ table = (const uint16_t *)sectionSizes;
+
+ tableStart = sectionSizes[0];
+ if (tableStart < minTocLength) {
+ errCode = U_INVALID_FORMAT_ERROR;
+ udata_close(data);
+ return;
+ }
+ gAliasData = data;
+
+ gMainTable.converterListSize = sectionSizes[1];
+ gMainTable.tagListSize = sectionSizes[2];
+ gMainTable.aliasListSize = sectionSizes[3];
+ gMainTable.untaggedConvArraySize = sectionSizes[4];
+ gMainTable.taggedAliasArraySize = sectionSizes[5];
+ gMainTable.taggedAliasListsSize = sectionSizes[6];
+ gMainTable.optionTableSize = sectionSizes[7];
+ gMainTable.stringTableSize = sectionSizes[8];
+
+ if (tableStart > 8) {
+ gMainTable.normalizedStringTableSize = sectionSizes[9];
+ }
+
+ currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
+ gMainTable.converterList = table + currOffset;
+
+ currOffset += gMainTable.converterListSize;
+ gMainTable.tagList = table + currOffset;
+
+ currOffset += gMainTable.tagListSize;
+ gMainTable.aliasList = table + currOffset;
+
+ currOffset += gMainTable.aliasListSize;
+ gMainTable.untaggedConvArray = table + currOffset;
+
+ currOffset += gMainTable.untaggedConvArraySize;
+ gMainTable.taggedAliasArray = table + currOffset;
+
+ /* aliasLists is a 1's based array, but it has a padding character */
+ currOffset += gMainTable.taggedAliasArraySize;
+ gMainTable.taggedAliasLists = table + currOffset;
+
+ currOffset += gMainTable.taggedAliasListsSize;
+ if (gMainTable.optionTableSize > 0
+ && ((const UConverterAliasOptions *)(table + currOffset))->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT)
+ {
+ /* Faster table */
+ gMainTable.optionTable = (const UConverterAliasOptions *)(table + currOffset);
+ }
+ else {
+ /* Smaller table, or I can't handle this normalization mode!
+ Use the original slower table lookup. */
+ gMainTable.optionTable = &defaultTableOptions;
+ }
+
+ currOffset += gMainTable.optionTableSize;
+ gMainTable.stringTable = table + currOffset;
+
+ currOffset += gMainTable.stringTableSize;
+ gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED)
+ ? gMainTable.stringTable : (table + currOffset));
+}
+
+
+static UBool
+haveAliasData(UErrorCode *pErrorCode) {
+ umtx_initOnce(gAliasDataInitOnce, &initAliasData, *pErrorCode);
+ return U_SUCCESS(*pErrorCode);
+}
+
+static inline UBool
+isAlias(const char *alias, UErrorCode *pErrorCode) {
+ if(alias==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return FALSE;
+ }
+ return (UBool)(*alias!=0);
+}
+
+static uint32_t getTagNumber(const char *tagname) {
+ if (gMainTable.tagList) {
+ uint32_t tagNum;
+ for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) {
+ if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) {
+ return tagNum;
+ }
+ }
+ }
+
+ return UINT32_MAX;
+}
+
+/* character types relevant for ucnv_compareNames() */
+enum {
+ UIGNORE,
+ ZERO,
+ NONZERO,
+ MINLETTER /* any values from here on are lowercase letter mappings */
+};
+
+/* character types for ASCII 00..7F */
+static const uint8_t asciiTypes[128] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0,
+ 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0,
+ 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0
+};
+
+#define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)UIGNORE)
+
+/* character types for EBCDIC 80..FF */
+static const uint8_t ebcdicTypes[128] = {
+ 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
+ 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
+ 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
+ ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0
+};
+
+#define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)UIGNORE)
+
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+# define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c)
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+# define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c)
+#else
+# error U_CHARSET_FAMILY is not valid
+#endif
+
+
+/* @see ucnv_compareNames */
+U_CAPI char * U_CALLCONV
+ucnv_io_stripASCIIForCompare(char *dst, const char *name) {
+ char *dstItr = dst;
+ uint8_t type, nextType;
+ char c1;
+ UBool afterDigit = FALSE;
+
+ while ((c1 = *name++) != 0) {
+ type = GET_ASCII_TYPE(c1);
+ switch (type) {
+ case UIGNORE:
+ afterDigit = FALSE;
+ continue; /* ignore all but letters and digits */
+ case ZERO:
+ if (!afterDigit) {
+ nextType = GET_ASCII_TYPE(*name);
+ if (nextType == ZERO || nextType == NONZERO) {
+ continue; /* ignore leading zero before another digit */
+ }
+ }
+ break;
+ case NONZERO:
+ afterDigit = TRUE;
+ break;
+ default:
+ c1 = (char)type; /* lowercased letter */
+ afterDigit = FALSE;
+ break;
+ }
+ *dstItr++ = c1;
+ }
+ *dstItr = 0;
+ return dst;
+}
+
+U_CAPI char * U_CALLCONV
+ucnv_io_stripEBCDICForCompare(char *dst, const char *name) {
+ char *dstItr = dst;
+ uint8_t type, nextType;
+ char c1;
+ UBool afterDigit = FALSE;
+
+ while ((c1 = *name++) != 0) {
+ type = GET_EBCDIC_TYPE(c1);
+ switch (type) {
+ case UIGNORE:
+ afterDigit = FALSE;
+ continue; /* ignore all but letters and digits */
+ case ZERO:
+ if (!afterDigit) {
+ nextType = GET_EBCDIC_TYPE(*name);
+ if (nextType == ZERO || nextType == NONZERO) {
+ continue; /* ignore leading zero before another digit */
+ }
+ }
+ break;
+ case NONZERO:
+ afterDigit = TRUE;
+ break;
+ default:
+ c1 = (char)type; /* lowercased letter */
+ afterDigit = FALSE;
+ break;
+ }
+ *dstItr++ = c1;
+ }
+ *dstItr = 0;
+ return dst;
+}
+
+/**
+ * Do a fuzzy compare of two converter/alias names.
+ * The comparison is case-insensitive, ignores leading zeroes if they are not
+ * followed by further digits, and ignores all but letters and digits.
+ * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
+ * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
+ * at http://www.unicode.org/reports/tr22/
+ *
+ * This is a symmetrical (commutative) operation; order of arguments
+ * is insignificant. This is an important property for sorting the
+ * list (when the list is preprocessed into binary form) and for
+ * performing binary searches on it at run time.
+ *
+ * @param name1 a converter name or alias, zero-terminated
+ * @param name2 a converter name or alias, zero-terminated
+ * @return 0 if the names match, or a negative value if the name1
+ * lexically precedes name2, or a positive value if the name1
+ * lexically follows name2.
+ *
+ * @see ucnv_io_stripForCompare
+ */
+U_CAPI int U_EXPORT2
+ucnv_compareNames(const char *name1, const char *name2) {
+ int rc;
+ uint8_t type, nextType;
+ char c1, c2;
+ UBool afterDigit1 = FALSE, afterDigit2 = FALSE;
+
+ for (;;) {
+ while ((c1 = *name1++) != 0) {
+ type = GET_CHAR_TYPE(c1);
+ switch (type) {
+ case UIGNORE:
+ afterDigit1 = FALSE;
+ continue; /* ignore all but letters and digits */
+ case ZERO:
+ if (!afterDigit1) {
+ nextType = GET_CHAR_TYPE(*name1);
+ if (nextType == ZERO || nextType == NONZERO) {
+ continue; /* ignore leading zero before another digit */
+ }
+ }
+ break;
+ case NONZERO:
+ afterDigit1 = TRUE;
+ break;
+ default:
+ c1 = (char)type; /* lowercased letter */
+ afterDigit1 = FALSE;
+ break;
+ }
+ break; /* deliver c1 */
+ }
+ while ((c2 = *name2++) != 0) {
+ type = GET_CHAR_TYPE(c2);
+ switch (type) {
+ case UIGNORE:
+ afterDigit2 = FALSE;
+ continue; /* ignore all but letters and digits */
+ case ZERO:
+ if (!afterDigit2) {
+ nextType = GET_CHAR_TYPE(*name2);
+ if (nextType == ZERO || nextType == NONZERO) {
+ continue; /* ignore leading zero before another digit */
+ }
+ }
+ break;
+ case NONZERO:
+ afterDigit2 = TRUE;
+ break;
+ default:
+ c2 = (char)type; /* lowercased letter */
+ afterDigit2 = FALSE;
+ break;
+ }
+ break; /* deliver c2 */
+ }
+
+ /* If we reach the ends of both strings then they match */
+ if ((c1|c2)==0) {
+ return 0;
+ }
+
+ /* Case-insensitive comparison */
+ rc = (int)(unsigned char)c1 - (int)(unsigned char)c2;
+ if (rc != 0) {
+ return rc;
+ }
+ }
+}
+
+/*
+ * search for an alias
+ * return the converter number index for gConverterList
+ */
+static inline uint32_t
+findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
+ uint32_t mid, start, limit;
+ uint32_t lastMid;
+ int result;
+ int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED);
+ char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
+
+ if (!isUnnormalized) {
+ if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) {
+ *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
+ return UINT32_MAX;
+ }
+
+ /* Lower case and remove ignoreable characters. */
+ ucnv_io_stripForCompare(strippedName, alias);
+ alias = strippedName;
+ }
+
+ /* do a binary search for the alias */
+ start = 0;
+ limit = gMainTable.untaggedConvArraySize;
+ mid = limit;
+ lastMid = UINT32_MAX;
+
+ for (;;) {
+ mid = (uint32_t)((start + limit) / 2);
+ if (lastMid == mid) { /* Have we moved? */
+ break; /* We haven't moved, and it wasn't found. */
+ }
+ lastMid = mid;
+ if (isUnnormalized) {
+ result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid]));
+ }
+ else {
+ result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid]));
+ }
+
+ if (result < 0) {
+ limit = mid;
+ } else if (result > 0) {
+ start = mid;
+ } else {
+ /* Since the gencnval tool folds duplicates into one entry,
+ * this alias in gAliasList is unique, but different standards
+ * may map an alias to different converters.
+ */
+ if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) {
+ *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING;
+ }
+ /* State whether the canonical converter name contains an option.
+ This information is contained in this list in order to maintain backward & forward compatibility. */
+ if (containsOption) {
+ UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo;
+ *containsOption = (UBool)((containsCnvOptionInfo
+ && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0))
+ || !containsCnvOptionInfo);
+ }
+ return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK;
+ }
+ }
+
+ return UINT32_MAX;
+}
+
+/*
+ * Is this alias in this list?
+ * alias and listOffset should be non-NULL.
+ */
+static inline UBool
+isAliasInList(const char *alias, uint32_t listOffset) {
+ if (listOffset) {
+ uint32_t currAlias;
+ uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
+ /* +1 to skip listCount */
+ const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
+ for (currAlias = 0; currAlias < listCount; currAlias++) {
+ if (currList[currAlias]
+ && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0)
+ {
+ return TRUE;
+ }
+ }
+ }
+ return FALSE;
+}
+
+/*
+ * Search for an standard name of an alias (what is the default name
+ * that this standard uses?)
+ * return the listOffset for gTaggedAliasLists. If it's 0,
+ * the it couldn't be found, but the parameters are valid.
+ */
+static uint32_t
+findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) {
+ uint32_t idx;
+ uint32_t listOffset;
+ uint32_t convNum;
+ UErrorCode myErr = U_ZERO_ERROR;
+ uint32_t tagNum = getTagNumber(standard);
+
+ /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
+ convNum = findConverter(alias, NULL, &myErr);
+ if (myErr != U_ZERO_ERROR) {
+ *pErrorCode = myErr;
+ }
+
+ if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
+ listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
+ if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) {
+ return listOffset;
+ }
+ if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
+ /* Uh Oh! They used an ambiguous alias.
+ We have to search the whole swiss cheese starting
+ at the highest standard affinity.
+ This may take a while.
+ */
+ for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) {
+ listOffset = gMainTable.taggedAliasArray[idx];
+ if (listOffset && isAliasInList(alias, listOffset)) {
+ uint32_t currTagNum = idx/gMainTable.converterListSize;
+ uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize);
+ uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum];
+ if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) {
+ return tempListOffset;
+ }
+ /* else keep on looking */
+ /* We could speed this up by starting on the next row
+ because an alias is unique per row, right now.
+ This would change if alias versioning appears. */
+ }
+ }
+ /* The standard doesn't know about the alias */
+ }
+ /* else no default name */
+ return 0;
+ }
+ /* else converter or tag not found */
+
+ return UINT32_MAX;
+}
+
+/* Return the canonical name */
+static uint32_t
+findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) {
+ uint32_t idx;
+ uint32_t listOffset;
+ uint32_t convNum;
+ UErrorCode myErr = U_ZERO_ERROR;
+ uint32_t tagNum = getTagNumber(standard);
+
+ /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
+ convNum = findConverter(alias, NULL, &myErr);
+ if (myErr != U_ZERO_ERROR) {
+ *pErrorCode = myErr;
+ }
+
+ if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
+ listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
+ if (listOffset && isAliasInList(alias, listOffset)) {
+ return convNum;
+ }
+ if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
+ /* Uh Oh! They used an ambiguous alias.
+ We have to search one slice of the swiss cheese.
+ We search only in the requested tag, not the whole thing.
+ This may take a while.
+ */
+ uint32_t convStart = (tagNum)*gMainTable.converterListSize;
+ uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize;
+ for (idx = convStart; idx < convLimit; idx++) {
+ listOffset = gMainTable.taggedAliasArray[idx];
+ if (listOffset && isAliasInList(alias, listOffset)) {
+ return idx-convStart;
+ }
+ }
+ /* The standard doesn't know about the alias */
+ }
+ /* else no canonical name */
+ }
+ /* else converter or tag not found */
+
+ return UINT32_MAX;
+}
+
+U_CAPI const char *
+ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
+ const char *aliasTmp = alias;
+ int32_t i = 0;
+ for (i = 0; i < 2; i++) {
+ if (i == 1) {
+ /*
+ * After the first unsuccess converter lookup, check to see if
+ * the name begins with 'x-'. If it does, strip it off and try
+ * again. This behaviour is similar to how ICU4J does it.
+ */
+ if (aliasTmp[0] == 'x' && aliasTmp[1] == '-') {
+ aliasTmp = aliasTmp+2;
+ } else {
+ break;
+ }
+ }
+ if(haveAliasData(pErrorCode) && isAlias(aliasTmp, pErrorCode)) {
+ uint32_t convNum = findConverter(aliasTmp, containsOption, pErrorCode);
+ if (convNum < gMainTable.converterListSize) {
+ return GET_STRING(gMainTable.converterList[convNum]);
+ }
+ /* else converter not found */
+ } else {
+ break;
+ }
+ }
+
+ return NULL;
+}
+
+U_CDECL_BEGIN
+
+
+static int32_t U_CALLCONV
+ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
+ int32_t value = 0;
+ UAliasContext *myContext = (UAliasContext *)(enumerator->context);
+ uint32_t listOffset = myContext->listOffset;
+
+ if (listOffset) {
+ value = gMainTable.taggedAliasLists[listOffset];
+ }
+ return value;
+}
+
+static const char * U_CALLCONV
+ucnv_io_nextStandardAliases(UEnumeration *enumerator,
+ int32_t* resultLength,
+ UErrorCode * /*pErrorCode*/)
+{
+ UAliasContext *myContext = (UAliasContext *)(enumerator->context);
+ uint32_t listOffset = myContext->listOffset;
+
+ if (listOffset) {
+ uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
+ const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
+
+ if (myContext->listIdx < listCount) {
+ const char *myStr = GET_STRING(currList[myContext->listIdx++]);
+ if (resultLength) {
+ *resultLength = (int32_t)uprv_strlen(myStr);
+ }
+ return myStr;
+ }
+ }
+ /* Either we accessed a zero length list, or we enumerated too far. */
+ if (resultLength) {
+ *resultLength = 0;
+ }
+ return NULL;
+}
+
+static void U_CALLCONV
+ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
+ ((UAliasContext *)(enumerator->context))->listIdx = 0;
+}
+
+static void U_CALLCONV
+ucnv_io_closeUEnumeration(UEnumeration *enumerator) {
+ uprv_free(enumerator->context);
+ uprv_free(enumerator);
+}
+
+U_CDECL_END
+
+/* Enumerate the aliases for the specified converter and standard tag */
+static const UEnumeration gEnumAliases = {
+ NULL,
+ NULL,
+ ucnv_io_closeUEnumeration,
+ ucnv_io_countStandardAliases,
+ uenum_unextDefault,
+ ucnv_io_nextStandardAliases,
+ ucnv_io_resetStandardAliases
+};
+
+U_CAPI UEnumeration * U_EXPORT2
+ucnv_openStandardNames(const char *convName,
+ const char *standard,
+ UErrorCode *pErrorCode)
+{
+ UEnumeration *myEnum = NULL;
+ if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) {
+ uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode);
+
+ /* When listOffset == 0, we want to acknowledge that the
+ converter name and standard are okay, but there
+ is nothing to enumerate. */
+ if (listOffset < gMainTable.taggedAliasListsSize) {
+ UAliasContext *myContext;
+
+ myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
+ if (myEnum == NULL) {
+ *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration));
+ myContext = static_cast<UAliasContext *>(uprv_malloc(sizeof(UAliasContext)));
+ if (myContext == NULL) {
+ *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
+ uprv_free(myEnum);
+ return NULL;
+ }
+ myContext->listOffset = listOffset;
+ myContext->listIdx = 0;
+ myEnum->context = myContext;
+ }
+ /* else converter or tag not found */
+ }
+ return myEnum;
+}
+
+static uint16_t
+ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) {
+ if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
+ uint32_t convNum = findConverter(alias, NULL, pErrorCode);
+ if (convNum < gMainTable.converterListSize) {
+ /* tagListNum - 1 is the ALL tag */
+ int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
+
+ if (listOffset) {
+ return gMainTable.taggedAliasLists[listOffset];
+ }
+ /* else this shouldn't happen. internal program error */
+ }
+ /* else converter not found */
+ }
+ return 0;
+}
+
+static uint16_t
+ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) {
+ if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
+ uint32_t currAlias;
+ uint32_t convNum = findConverter(alias, NULL, pErrorCode);
+ if (convNum < gMainTable.converterListSize) {
+ /* tagListNum - 1 is the ALL tag */
+ int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
+
+ if (listOffset) {
+ uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
+ /* +1 to skip listCount */
+ const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
+
+ for (currAlias = start; currAlias < listCount; currAlias++) {
+ aliases[currAlias] = GET_STRING(currList[currAlias]);
+ }
+ }
+ /* else this shouldn't happen. internal program error */
+ }
+ /* else converter not found */
+ }
+ return 0;
+}
+
+static const char *
+ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
+ if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
+ uint32_t convNum = findConverter(alias, NULL, pErrorCode);
+ if (convNum < gMainTable.converterListSize) {
+ /* tagListNum - 1 is the ALL tag */
+ int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
+
+ if (listOffset) {
+ uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
+ /* +1 to skip listCount */
+ const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
+
+ if (n < listCount) {
+ return GET_STRING(currList[n]);
+ }
+ *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ }
+ /* else this shouldn't happen. internal program error */
+ }
+ /* else converter not found */
+ }
+ return NULL;
+}
+
+static uint16_t
+ucnv_io_countStandards(UErrorCode *pErrorCode) {
+ if (haveAliasData(pErrorCode)) {
+ /* Don't include the empty list */
+ return (uint16_t)(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS);
+ }
+
+ return 0;
+}
+
+U_CAPI const char * U_EXPORT2
+ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
+ if (haveAliasData(pErrorCode)) {
+ if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) {
+ return GET_STRING(gMainTable.tagList[n]);
+ }
+ *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ }
+
+ return NULL;
+}
+
+U_CAPI const char * U_EXPORT2
+ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
+ if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
+ uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode);
+
+ if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) {
+ const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
+
+ /* Get the preferred name from this list */
+ if (currList[0]) {
+ return GET_STRING(currList[0]);
+ }
+ /* else someone screwed up the alias table. */
+ /* *pErrorCode = U_INVALID_FORMAT_ERROR */
+ }
+ }
+
+ return NULL;
+}
+
+U_CAPI uint16_t U_EXPORT2
+ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
+{
+ return ucnv_io_countAliases(alias, pErrorCode);
+}
+
+
+U_CAPI const char* U_EXPORT2
+ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
+{
+ return ucnv_io_getAlias(alias, n, pErrorCode);
+}
+
+U_CAPI void U_EXPORT2
+ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
+{
+ ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
+}
+
+U_CAPI uint16_t U_EXPORT2
+ucnv_countStandards(void)
+{
+ UErrorCode err = U_ZERO_ERROR;
+ return ucnv_io_countStandards(&err);
+}
+
+U_CAPI const char * U_EXPORT2
+ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
+ if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
+ uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode);
+
+ if (convNum < gMainTable.converterListSize) {
+ return GET_STRING(gMainTable.converterList[convNum]);
+ }
+ }
+
+ return NULL;
+}
+
+U_CDECL_BEGIN
+
+
+static int32_t U_CALLCONV
+ucnv_io_countAllConverters(UEnumeration * /*enumerator*/, UErrorCode * /*pErrorCode*/) {
+ return gMainTable.converterListSize;
+}
+
+static const char * U_CALLCONV
+ucnv_io_nextAllConverters(UEnumeration *enumerator,
+ int32_t* resultLength,
+ UErrorCode * /*pErrorCode*/)
+{
+ uint16_t *myContext = (uint16_t *)(enumerator->context);
+
+ if (*myContext < gMainTable.converterListSize) {
+ const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]);
+ if (resultLength) {
+ *resultLength = (int32_t)uprv_strlen(myStr);
+ }
+ return myStr;
+ }
+ /* Either we accessed a zero length list, or we enumerated too far. */
+ if (resultLength) {
+ *resultLength = 0;
+ }
+ return NULL;
+}
+
+static void U_CALLCONV
+ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
+ *((uint16_t *)(enumerator->context)) = 0;
+}
+U_CDECL_END
+static const UEnumeration gEnumAllConverters = {
+ NULL,
+ NULL,
+ ucnv_io_closeUEnumeration,
+ ucnv_io_countAllConverters,
+ uenum_unextDefault,
+ ucnv_io_nextAllConverters,
+ ucnv_io_resetAllConverters
+};
+
+U_CAPI UEnumeration * U_EXPORT2
+ucnv_openAllNames(UErrorCode *pErrorCode) {
+ UEnumeration *myEnum = NULL;
+ if (haveAliasData(pErrorCode)) {
+ uint16_t *myContext;
+
+ myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
+ if (myEnum == NULL) {
+ *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration));
+ myContext = static_cast<uint16_t *>(uprv_malloc(sizeof(uint16_t)));
+ if (myContext == NULL) {
+ *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
+ uprv_free(myEnum);
+ return NULL;
+ }
+ *myContext = 0;
+ myEnum->context = myContext;
+ }
+ return myEnum;
+}
+
+U_CAPI uint16_t
+ucnv_io_countKnownConverters(UErrorCode *pErrorCode) {
+ if (haveAliasData(pErrorCode)) {
+ return (uint16_t)gMainTable.converterListSize;
+ }
+ return 0;
+}
+
+/* alias table swapping ----------------------------------------------------- */
+
+U_CDECL_BEGIN
+
+typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name);
+U_CDECL_END
+
+
+/*
+ * row of a temporary array
+ *
+ * gets platform-endian charset string indexes and sorting indexes;
+ * after sorting this array by strings, the actual arrays are permutated
+ * according to the sorting indexes
+ */
+typedef struct TempRow {
+ uint16_t strIndex, sortIndex;
+} TempRow;
+
+typedef struct TempAliasTable {
+ const char *chars;
+ TempRow *rows;
+ uint16_t *resort;
+ StripForCompareFn *stripForCompare;
+} TempAliasTable;
+
+enum {
+ STACK_ROW_CAPACITY=500
+};
+
+static int32_t U_CALLCONV
+io_compareRows(const void *context, const void *left, const void *right) {
+ char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH],
+ strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH];
+
+ TempAliasTable *tempTable=(TempAliasTable *)context;
+ const char *chars=tempTable->chars;
+
+ return (int32_t)uprv_strcmp(tempTable->stripForCompare(strippedLeft, chars+2*((const TempRow *)left)->strIndex),
+ tempTable->stripForCompare(strippedRight, chars+2*((const TempRow *)right)->strIndex));
+}
+
+U_CAPI int32_t U_EXPORT2
+ucnv_swapAliases(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const UDataInfo *pInfo;
+ int32_t headerSize;
+
+ const uint16_t *inTable;
+ const uint32_t *inSectionSizes;
+ uint32_t toc[offsetsCount];
+ uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */
+ uint32_t i, count, tocLength, topOffset;
+
+ TempRow rows[STACK_ROW_CAPACITY];
+ uint16_t resort[STACK_ROW_CAPACITY];
+ TempAliasTable tempTable;
+
+ /* udata_swapDataHeader checks the arguments */
+ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ /* check data format and format version */
+ pInfo=(const UDataInfo *)((const char *)inData+4);
+ if(!(
+ pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */
+ pInfo->dataFormat[1]==0x76 &&
+ pInfo->dataFormat[2]==0x41 &&
+ pInfo->dataFormat[3]==0x6c &&
+ pInfo->formatVersion[0]==3
+ )) {
+ udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n",
+ pInfo->dataFormat[0], pInfo->dataFormat[1],
+ pInfo->dataFormat[2], pInfo->dataFormat[3],
+ pInfo->formatVersion[0]);
+ *pErrorCode=U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+
+ /* an alias table must contain at least the table of contents array */
+ if(length>=0 && (length-headerSize)<4*(1+minTocLength)) {
+ udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
+ length-headerSize);
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ inSectionSizes=(const uint32_t *)((const char *)inData+headerSize);
+ inTable=(const uint16_t *)inSectionSizes;
+ uprv_memset(toc, 0, sizeof(toc));
+ toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]);
+ if(tocLength<minTocLength || offsetsCount<=tocLength) {
+ udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength);
+ *pErrorCode=U_INVALID_FORMAT_ERROR;
+ return 0;
+ }
+
+ /* read the known part of the table of contents */
+ for(i=converterListIndex; i<=tocLength; ++i) {
+ toc[i]=ds->readUInt32(inSectionSizes[i]);
+ }
+
+ /* compute offsets */
+ uprv_memset(offsets, 0, sizeof(offsets));
+ offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */
+ for(i=tagListIndex; i<=tocLength; ++i) {
+ offsets[i]=offsets[i-1]+toc[i-1];
+ }
+
+ /* compute the overall size of the after-header data, in numbers of 16-bit units */
+ topOffset=offsets[i-1]+toc[i-1];
+
+ if(length>=0) {
+ uint16_t *outTable;
+ const uint16_t *p, *p2;
+ uint16_t *q, *q2;
+ uint16_t oldIndex;
+
+ if((length-headerSize)<(2*(int32_t)topOffset)) {
+ udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
+ length-headerSize);
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ outTable=(uint16_t *)((char *)outData+headerSize);
+
+ /* swap the entire table of contents */
+ ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode);
+
+ /* swap unormalized strings & normalized strings */
+ ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]),
+ outTable+offsets[stringTableIndex], pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n");
+ return 0;
+ }
+
+ if(ds->inCharset==ds->outCharset) {
+ /* no need to sort, just swap all 16-bit values together */
+ ds->swapArray16(ds,
+ inTable+offsets[converterListIndex],
+ 2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]),
+ outTable+offsets[converterListIndex],
+ pErrorCode);
+ } else {
+ /* allocate the temporary table for sorting */
+ count=toc[aliasListIndex];
+
+ tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */
+
+ if(count<=STACK_ROW_CAPACITY) {
+ tempTable.rows=rows;
+ tempTable.resort=resort;
+ } else {
+ tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2);
+ if(tempTable.rows==NULL) {
+ udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
+ count);
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ tempTable.resort=(uint16_t *)(tempTable.rows+count);
+ }
+
+ if(ds->outCharset==U_ASCII_FAMILY) {
+ tempTable.stripForCompare=ucnv_io_stripASCIIForCompare;
+ } else /* U_EBCDIC_FAMILY */ {
+ tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare;
+ }
+
+ /*
+ * Sort unique aliases+mapped names.
+ *
+ * We need to sort the list again by outCharset strings because they
+ * sort differently for different charset families.
+ * First we set up a temporary table with the string indexes and
+ * sorting indexes and sort that.
+ * Then we permutate and copy/swap the actual values.
+ */
+ p=inTable+offsets[aliasListIndex];
+ q=outTable+offsets[aliasListIndex];
+
+ p2=inTable+offsets[untaggedConvArrayIndex];
+ q2=outTable+offsets[untaggedConvArrayIndex];
+
+ for(i=0; i<count; ++i) {
+ tempTable.rows[i].strIndex=ds->readUInt16(p[i]);
+ tempTable.rows[i].sortIndex=(uint16_t)i;
+ }
+
+ uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow),
+ io_compareRows, &tempTable,
+ FALSE, pErrorCode);
+
+ if(U_SUCCESS(*pErrorCode)) {
+ /* copy/swap/permutate items */
+ if(p!=q) {
+ for(i=0; i<count; ++i) {
+ oldIndex=tempTable.rows[i].sortIndex;
+ ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode);
+ ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode);
+ }
+ } else {
+ /*
+ * If we swap in-place, then the permutation must use another
+ * temporary array (tempTable.resort)
+ * before the results are copied to the outBundle.
+ */
+ uint16_t *r=tempTable.resort;
+
+ for(i=0; i<count; ++i) {
+ oldIndex=tempTable.rows[i].sortIndex;
+ ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode);
+ }
+ uprv_memcpy(q, r, 2*(size_t)count);
+
+ for(i=0; i<count; ++i) {
+ oldIndex=tempTable.rows[i].sortIndex;
+ ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode);
+ }
+ uprv_memcpy(q2, r, 2*(size_t)count);
+ }
+ }
+
+ if(tempTable.rows!=rows) {
+ uprv_free(tempTable.rows);
+ }
+
+ if(U_FAILURE(*pErrorCode)) {
+ udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n",
+ count);
+ return 0;
+ }
+
+ /* swap remaining 16-bit values */
+ ds->swapArray16(ds,
+ inTable+offsets[converterListIndex],
+ 2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]),
+ outTable+offsets[converterListIndex],
+ pErrorCode);
+ ds->swapArray16(ds,
+ inTable+offsets[taggedAliasArrayIndex],
+ 2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]),
+ outTable+offsets[taggedAliasArrayIndex],
+ pErrorCode);
+ }
+ }
+
+ return headerSize+2*(int32_t)topOffset;
+}
+
+#endif
+
+
+/*
+ * Hey, Emacs, please set the following:
+ *
+ * Local Variables:
+ * indent-tabs-mode: nil
+ * End:
+ *
+ */
diff --git a/thirdparty/icu4c/common/ucnv_io.h b/thirdparty/icu4c/common/ucnv_io.h
new file mode 100644
index 0000000000..8f2d7b5a02
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnv_io.h
@@ -0,0 +1,127 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ **********************************************************************
+ * Copyright (C) 1999-2006, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ *
+ *
+ * ucnv_io.h:
+ * defines variables and functions pertaining to converter name resolution
+ * aspect of the conversion code
+ */
+
+#ifndef UCNV_IO_H
+#define UCNV_IO_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "udataswp.h"
+
+#define UCNV_AMBIGUOUS_ALIAS_MAP_BIT 0x8000
+#define UCNV_CONTAINS_OPTION_BIT 0x4000
+#define UCNV_CONVERTER_INDEX_MASK 0xFFF
+#define UCNV_NUM_RESERVED_TAGS 2
+#define UCNV_NUM_HIDDEN_TAGS 1
+
+enum {
+ UCNV_IO_UNNORMALIZED,
+ UCNV_IO_STD_NORMALIZED,
+ UCNV_IO_NORM_TYPE_COUNT
+};
+
+typedef struct {
+ uint16_t stringNormalizationType;
+ uint16_t containsCnvOptionInfo;
+} UConverterAliasOptions;
+
+typedef struct UConverterAlias {
+ const uint16_t *converterList;
+ const uint16_t *tagList;
+ const uint16_t *aliasList;
+ const uint16_t *untaggedConvArray;
+ const uint16_t *taggedAliasArray;
+ const uint16_t *taggedAliasLists;
+ const UConverterAliasOptions *optionTable;
+ const uint16_t *stringTable;
+ const uint16_t *normalizedStringTable;
+
+ uint32_t converterListSize;
+ uint32_t tagListSize;
+ uint32_t aliasListSize;
+ uint32_t untaggedConvArraySize;
+ uint32_t taggedAliasArraySize;
+ uint32_t taggedAliasListsSize;
+ uint32_t optionTableSize;
+ uint32_t stringTableSize;
+ uint32_t normalizedStringTableSize;
+} UConverterAlias;
+
+/**
+ * \var ucnv_io_stripForCompare
+ * Remove the underscores, dashes and spaces from the name, and convert
+ * the name to lower case.
+ * @param dst The destination buffer, which is <= the buffer of name.
+ * @param dst The destination buffer, which is <= the buffer of name.
+ * @see ucnv_compareNames
+ * @return the destination buffer.
+ */
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+# define ucnv_io_stripForCompare ucnv_io_stripASCIIForCompare
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+# define ucnv_io_stripForCompare ucnv_io_stripEBCDICForCompare
+#else
+# error U_CHARSET_FAMILY is not valid
+#endif
+
+U_CAPI char * U_CALLCONV
+ucnv_io_stripASCIIForCompare(char *dst, const char *name);
+
+U_CAPI char * U_CALLCONV
+ucnv_io_stripEBCDICForCompare(char *dst, const char *name);
+
+/**
+ * Map a converter alias name to a canonical converter name.
+ * The alias is searched for case-insensitively, the converter name
+ * is returned in mixed-case.
+ * Returns NULL if the alias is not found.
+ * @param alias The alias name to be searched.
+ * @param containsOption A return value stating whether the returned converter name contains an option (a comma)
+ * @param pErrorCode The error code
+ * @return the converter name in mixed-case, return NULL if the alias is not found.
+ */
+U_CAPI const char *
+ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode);
+
+/**
+ * Return the number of all known converter names (no aliases).
+ * @param pErrorCode The error code
+ * @return the number of all aliases
+ */
+U_CAPI uint16_t
+ucnv_io_countKnownConverters(UErrorCode *pErrorCode);
+
+/**
+ * Swap an ICU converter alias table. See implementation for details.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+ucnv_swapAliases(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+#endif
+
+#endif /* _UCNV_IO */
+
+/*
+ * Hey, Emacs, please set the following:
+ *
+ * Local Variables:
+ * indent-tabs-mode: nil
+ * End:
+ *
+ */
diff --git a/thirdparty/icu4c/common/ucnv_lmb.cpp b/thirdparty/icu4c/common/ucnv_lmb.cpp
new file mode 100644
index 0000000000..168392837b
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnv_lmb.cpp
@@ -0,0 +1,1388 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2000-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* file name: ucnv_lmb.cpp
+* encoding: UTF-8
+* tab size: 4 (not used)
+* indentation:4
+*
+* created on: 2000feb09
+* created by: Brendan Murray
+* extensively hacked up by: Jim Snyder-Grant
+*
+* Modification History:
+*
+* Date Name Description
+*
+* 06/20/2000 helena OS/400 port changes; mostly typecast.
+* 06/27/2000 Jim Snyder-Grant Deal with partial characters and small buffers.
+* Add comments to document LMBCS format and implementation
+* restructured order & breakdown of functions
+* 06/28/2000 helena Major rewrite for the callback API changes.
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
+
+#include "unicode/ucnv_err.h"
+#include "unicode/ucnv.h"
+#include "unicode/uset.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "uassert.h"
+#include "ucnv_imp.h"
+#include "ucnv_bld.h"
+#include "ucnv_cnv.h"
+
+#ifdef EBCDIC_RTL
+ #include "ascii_a.h"
+#endif
+
+/*
+ LMBCS
+
+ (Lotus Multi-Byte Character Set)
+
+ LMBCS was invented in the late 1980's and is primarily used in Lotus Notes
+ databases and in Lotus 1-2-3 files. Programmers who work with the APIs
+ into these products will sometimes need to deal with strings in this format.
+
+ The code in this file provides an implementation for an ICU converter of
+ LMBCS to and from Unicode.
+
+ Since the LMBCS character set is only sparsely documented in existing
+ printed or online material, we have added extensive annotation to this
+ file to serve as a guide to understanding LMBCS.
+
+ LMBCS was originally designed with these four sometimes-competing design goals:
+
+ -Provide encodings for the characters in 12 existing national standards
+ (plus a few other characters)
+ -Minimal memory footprint
+ -Maximal speed of conversion into the existing national character sets
+ -No need to track a changing state as you interpret a string.
+
+
+ All of the national character sets LMBCS was trying to encode are 'ANSI'
+ based, in that the bytes from 0x20 - 0x7F are almost exactly the
+ same common Latin unaccented characters and symbols in all character sets.
+
+ So, in order to help meet the speed & memory design goals, the common ANSI
+ bytes from 0x20-0x7F are represented by the same single-byte values in LMBCS.
+
+ The general LMBCS code unit is from 1-3 bytes. We can describe the 3 bytes as
+ follows:
+
+ [G] D1 [D2]
+
+ That is, a sometimes-optional 'group' byte, followed by 1 and sometimes 2
+ data bytes. The maximum size of a LMBCS chjaracter is 3 bytes:
+*/
+#define ULMBCS_CHARSIZE_MAX 3
+/*
+ The single-byte values from 0x20 to 0x7F are examples of single D1 bytes.
+ We often have to figure out if byte values are below or above this, so we
+ use the ANSI nomenclature 'C0' and 'C1' to refer to the range of control
+ characters just above & below the common lower-ANSI range */
+#define ULMBCS_C0END 0x1F
+#define ULMBCS_C1START 0x80
+/*
+ Since LMBCS is always dealing in byte units. we create a local type here for
+ dealing with these units of LMBCS code units:
+
+*/
+typedef uint8_t ulmbcs_byte_t;
+
+/*
+ Most of the values less than 0x20 are reserved in LMBCS to announce
+ which national character standard is being used for the 'D' bytes.
+ In the comments we show the common name and the IBM character-set ID
+ for these character-set announcers:
+*/
+
+#define ULMBCS_GRP_L1 0x01 /* Latin-1 :ibm-850 */
+#define ULMBCS_GRP_GR 0x02 /* Greek :ibm-851 */
+#define ULMBCS_GRP_HE 0x03 /* Hebrew :ibm-1255 */
+#define ULMBCS_GRP_AR 0x04 /* Arabic :ibm-1256 */
+#define ULMBCS_GRP_RU 0x05 /* Cyrillic :ibm-1251 */
+#define ULMBCS_GRP_L2 0x06 /* Latin-2 :ibm-852 */
+#define ULMBCS_GRP_TR 0x08 /* Turkish :ibm-1254 */
+#define ULMBCS_GRP_TH 0x0B /* Thai :ibm-874 */
+#define ULMBCS_GRP_JA 0x10 /* Japanese :ibm-943 */
+#define ULMBCS_GRP_KO 0x11 /* Korean :ibm-1261 */
+#define ULMBCS_GRP_TW 0x12 /* Chinese SC :ibm-950 */
+#define ULMBCS_GRP_CN 0x13 /* Chinese TC :ibm-1386 */
+
+/*
+ So, the beginning of understanding LMBCS is that IF the first byte of a LMBCS
+ character is one of those 12 values, you can interpret the remaining bytes of
+ that character as coming from one of those character sets. Since the lower
+ ANSI bytes already are represented in single bytes, using one of the character
+ set announcers is used to announce a character that starts with a byte of
+ 0x80 or greater.
+
+ The character sets are arranged so that the single byte sets all appear
+ before the multi-byte character sets. When we need to tell whether a
+ group byte is for a single byte char set or not we use this define: */
+
+#define ULMBCS_DOUBLEOPTGROUP_START 0x10
+
+/*
+However, to fully understand LMBCS, you must also understand a series of
+exceptions & optimizations made in service of the design goals.
+
+First, those of you who are character set mavens may have noticed that
+the 'double-byte' character sets are actually multi-byte character sets
+that can have 1 or two bytes, even in the upper-ascii range. To force
+each group byte to introduce a fixed-width encoding (to make it faster to
+count characters), we use a convention of doubling up on the group byte
+to introduce any single-byte character > 0x80 in an otherwise double-byte
+character set. So, for example, the LMBCS sequence x10 x10 xAE is the
+same as '0xAE' in the Japanese code page 943.
+
+Next, you will notice that the list of group bytes has some gaps.
+These are used in various ways.
+
+We reserve a few special single byte values for common control
+characters. These are in the same place as their ANSI eqivalents for speed.
+*/
+
+#define ULMBCS_HT 0x09 /* Fixed control char - Horizontal Tab */
+#define ULMBCS_LF 0x0A /* Fixed control char - Line Feed */
+#define ULMBCS_CR 0x0D /* Fixed control char - Carriage Return */
+
+/* Then, 1-2-3 reserved a special single-byte character to put at the
+beginning of internal 'system' range names: */
+
+#define ULMBCS_123SYSTEMRANGE 0x19
+
+/* Then we needed a place to put all the other ansi control characters
+that must be moved to different values because LMBCS reserves those
+values for other purposes. To represent the control characters, we start
+with a first byte of 0xF & add the control chaarcter value as the
+second byte */
+#define ULMBCS_GRP_CTRL 0x0F
+
+/* For the C0 controls (less than 0x20), we add 0x20 to preserve the
+useful doctrine that any byte less than 0x20 in a LMBCS char must be
+the first byte of a character:*/
+#define ULMBCS_CTRLOFFSET 0x20
+
+/*
+Where to put the characters that aren't part of any of the 12 national
+character sets? The first thing that was done, in the earlier years of
+LMBCS, was to use up the spaces of the form
+
+ [G] D1,
+
+ where 'G' was one of the single-byte character groups, and
+ D1 was less than 0x80. These sequences are gathered together
+ into a Lotus-invented doublebyte character set to represent a
+ lot of stray values. Internally, in this implementation, we track this
+ as group '0', as a place to tuck this exceptions list.*/
+
+#define ULMBCS_GRP_EXCEPT 0x00
+/*
+ Finally, as the durability and usefulness of UNICODE became clear,
+ LOTUS added a new group 0x14 to hold Unicode values not otherwise
+ represented in LMBCS: */
+#define ULMBCS_GRP_UNICODE 0x14
+/* The two bytes appearing after a 0x14 are intrepreted as UFT-16 BE
+(Big-Endian) characters. The exception comes when the UTF16
+representation would have a zero as the second byte. In that case,
+'F6' is used in its place, and the bytes are swapped. (This prevents
+LMBCS from encoding any Unicode values of the form U+F6xx, but that's OK:
+0xF6xx is in the middle of the Private Use Area.)*/
+#define ULMBCS_UNICOMPATZERO 0xF6
+
+/* It is also useful in our code to have a constant for the size of
+a LMBCS char that holds a literal Unicode value */
+#define ULMBCS_UNICODE_SIZE 3
+
+/*
+To squish the LMBCS representations down even further, and to make
+translations even faster,sometimes the optimization group byte can be dropped
+from a LMBCS character. This is decided on a process-by-process basis. The
+group byte that is dropped is called the 'optimization group'.
+
+For Notes, the optimzation group is always 0x1.*/
+#define ULMBCS_DEFAULTOPTGROUP 0x1
+/* For 1-2-3 files, the optimzation group is stored in the header of the 1-2-3
+file.
+
+ In any case, when using ICU, you either pass in the
+optimization group as part of the name of the converter (LMBCS-1, LMBCS-2,
+etc.). Using plain 'LMBCS' as the name of the converter will give you
+LMBCS-1.
+
+
+*** Implementation strategy ***
+
+
+Because of the extensive use of other character sets, the LMBCS converter
+keeps a mapping between optimization groups and IBM character sets, so that
+ICU converters can be created and used as needed. */
+
+/* As you can see, even though any byte below 0x20 could be an optimization
+byte, only those at 0x13 or below can map to an actual converter. To limit
+some loops and searches, we define a value for that last group converter:*/
+
+#define ULMBCS_GRP_LAST 0x13 /* last LMBCS group that has a converter */
+
+static const char * const OptGroupByteToCPName[ULMBCS_GRP_LAST + 1] = {
+ /* 0x0000 */ "lmb-excp", /* internal home for the LOTUS exceptions list */
+ /* 0x0001 */ "ibm-850",
+ /* 0x0002 */ "ibm-851",
+ /* 0x0003 */ "windows-1255",
+ /* 0x0004 */ "windows-1256",
+ /* 0x0005 */ "windows-1251",
+ /* 0x0006 */ "ibm-852",
+ /* 0x0007 */ NULL, /* Unused */
+ /* 0x0008 */ "windows-1254",
+ /* 0x0009 */ NULL, /* Control char HT */
+ /* 0x000A */ NULL, /* Control char LF */
+ /* 0x000B */ "windows-874",
+ /* 0x000C */ NULL, /* Unused */
+ /* 0x000D */ NULL, /* Control char CR */
+ /* 0x000E */ NULL, /* Unused */
+ /* 0x000F */ NULL, /* Control chars: 0x0F20 + C0/C1 character: algorithmic */
+ /* 0x0010 */ "windows-932",
+ /* 0x0011 */ "windows-949",
+ /* 0x0012 */ "windows-950",
+ /* 0x0013 */ "windows-936"
+
+ /* The rest are null, including the 0x0014 Unicode compatibility region
+ and 0x0019, the 1-2-3 system range control char */
+};
+
+
+/* That's approximately all the data that's needed for translating
+ LMBCS to Unicode.
+
+
+However, to translate Unicode to LMBCS, we need some more support.
+
+That's because there are often more than one possible mappings from a Unicode
+code point back into LMBCS. The first thing we do is look up into a table
+to figure out if there are more than one possible mappings. This table,
+arranged by Unicode values (including ranges) either lists which group
+to use, or says that it could go into one or more of the SBCS sets, or
+into one or more of the DBCS sets. (If the character exists in both DBCS &
+SBCS, the table will place it in the SBCS sets, to make the LMBCS code point
+length as small as possible. Here's the two special markers we use to indicate
+ambiguous mappings: */
+
+#define ULMBCS_AMBIGUOUS_SBCS 0x80 /* could fit in more than one
+ LMBCS sbcs native encoding
+ (example: most accented latin) */
+#define ULMBCS_AMBIGUOUS_MBCS 0x81 /* could fit in more than one
+ LMBCS mbcs native encoding
+ (example: Unihan) */
+#define ULMBCS_AMBIGUOUS_ALL 0x82
+/* And here's a simple way to see if a group falls in an appropriate range */
+#define ULMBCS_AMBIGUOUS_MATCH(agroup, xgroup) \
+ ((((agroup) == ULMBCS_AMBIGUOUS_SBCS) && \
+ (xgroup) < ULMBCS_DOUBLEOPTGROUP_START) || \
+ (((agroup) == ULMBCS_AMBIGUOUS_MBCS) && \
+ (xgroup) >= ULMBCS_DOUBLEOPTGROUP_START)) || \
+ ((agroup) == ULMBCS_AMBIGUOUS_ALL)
+
+
+/* The table & some code to use it: */
+
+
+static const struct _UniLMBCSGrpMap
+{
+ const UChar uniStartRange;
+ const UChar uniEndRange;
+ const ulmbcs_byte_t GrpType;
+} UniLMBCSGrpMap[]
+=
+{
+
+ {0x0001, 0x001F, ULMBCS_GRP_CTRL},
+ {0x0080, 0x009F, ULMBCS_GRP_CTRL},
+ {0x00A0, 0x00A6, ULMBCS_AMBIGUOUS_SBCS},
+ {0x00A7, 0x00A8, ULMBCS_AMBIGUOUS_ALL},
+ {0x00A9, 0x00AF, ULMBCS_AMBIGUOUS_SBCS},
+ {0x00B0, 0x00B1, ULMBCS_AMBIGUOUS_ALL},
+ {0x00B2, 0x00B3, ULMBCS_AMBIGUOUS_SBCS},
+ {0x00B4, 0x00B4, ULMBCS_AMBIGUOUS_ALL},
+ {0x00B5, 0x00B5, ULMBCS_AMBIGUOUS_SBCS},
+ {0x00B6, 0x00B6, ULMBCS_AMBIGUOUS_ALL},
+ {0x00B7, 0x00D6, ULMBCS_AMBIGUOUS_SBCS},
+ {0x00D7, 0x00D7, ULMBCS_AMBIGUOUS_ALL},
+ {0x00D8, 0x00F6, ULMBCS_AMBIGUOUS_SBCS},
+ {0x00F7, 0x00F7, ULMBCS_AMBIGUOUS_ALL},
+ {0x00F8, 0x01CD, ULMBCS_AMBIGUOUS_SBCS},
+ {0x01CE, 0x01CE, ULMBCS_GRP_TW },
+ {0x01CF, 0x02B9, ULMBCS_AMBIGUOUS_SBCS},
+ {0x02BA, 0x02BA, ULMBCS_GRP_CN},
+ {0x02BC, 0x02C8, ULMBCS_AMBIGUOUS_SBCS},
+ {0x02C9, 0x02D0, ULMBCS_AMBIGUOUS_MBCS},
+ {0x02D8, 0x02DD, ULMBCS_AMBIGUOUS_SBCS},
+ {0x0384, 0x0390, ULMBCS_AMBIGUOUS_SBCS},
+ {0x0391, 0x03A9, ULMBCS_AMBIGUOUS_ALL},
+ {0x03AA, 0x03B0, ULMBCS_AMBIGUOUS_SBCS},
+ {0x03B1, 0x03C9, ULMBCS_AMBIGUOUS_ALL},
+ {0x03CA, 0x03CE, ULMBCS_AMBIGUOUS_SBCS},
+ {0x0400, 0x0400, ULMBCS_GRP_RU},
+ {0x0401, 0x0401, ULMBCS_AMBIGUOUS_ALL},
+ {0x0402, 0x040F, ULMBCS_GRP_RU},
+ {0x0410, 0x0431, ULMBCS_AMBIGUOUS_ALL},
+ {0x0432, 0x044E, ULMBCS_GRP_RU},
+ {0x044F, 0x044F, ULMBCS_AMBIGUOUS_ALL},
+ {0x0450, 0x0491, ULMBCS_GRP_RU},
+ {0x05B0, 0x05F2, ULMBCS_GRP_HE},
+ {0x060C, 0x06AF, ULMBCS_GRP_AR},
+ {0x0E01, 0x0E5B, ULMBCS_GRP_TH},
+ {0x200C, 0x200F, ULMBCS_AMBIGUOUS_SBCS},
+ {0x2010, 0x2010, ULMBCS_AMBIGUOUS_MBCS},
+ {0x2013, 0x2014, ULMBCS_AMBIGUOUS_SBCS},
+ {0x2015, 0x2015, ULMBCS_AMBIGUOUS_MBCS},
+ {0x2016, 0x2016, ULMBCS_AMBIGUOUS_MBCS},
+ {0x2017, 0x2017, ULMBCS_AMBIGUOUS_SBCS},
+ {0x2018, 0x2019, ULMBCS_AMBIGUOUS_ALL},
+ {0x201A, 0x201B, ULMBCS_AMBIGUOUS_SBCS},
+ {0x201C, 0x201D, ULMBCS_AMBIGUOUS_ALL},
+ {0x201E, 0x201F, ULMBCS_AMBIGUOUS_SBCS},
+ {0x2020, 0x2021, ULMBCS_AMBIGUOUS_ALL},
+ {0x2022, 0x2024, ULMBCS_AMBIGUOUS_SBCS},
+ {0x2025, 0x2025, ULMBCS_AMBIGUOUS_MBCS},
+ {0x2026, 0x2026, ULMBCS_AMBIGUOUS_ALL},
+ {0x2027, 0x2027, ULMBCS_GRP_TW},
+ {0x2030, 0x2030, ULMBCS_AMBIGUOUS_ALL},
+ {0x2031, 0x2031, ULMBCS_AMBIGUOUS_SBCS},
+ {0x2032, 0x2033, ULMBCS_AMBIGUOUS_MBCS},
+ {0x2035, 0x2035, ULMBCS_AMBIGUOUS_MBCS},
+ {0x2039, 0x203A, ULMBCS_AMBIGUOUS_SBCS},
+ {0x203B, 0x203B, ULMBCS_AMBIGUOUS_MBCS},
+ {0x203C, 0x203C, ULMBCS_GRP_EXCEPT},
+ {0x2074, 0x2074, ULMBCS_GRP_KO},
+ {0x207F, 0x207F, ULMBCS_GRP_EXCEPT},
+ {0x2081, 0x2084, ULMBCS_GRP_KO},
+ {0x20A4, 0x20AC, ULMBCS_AMBIGUOUS_SBCS},
+ {0x2103, 0x2109, ULMBCS_AMBIGUOUS_MBCS},
+ {0x2111, 0x2120, ULMBCS_AMBIGUOUS_SBCS},
+ /*zhujin: upgrade, for regressiont test, spr HKIA4YHTSU*/
+ {0x2121, 0x2121, ULMBCS_AMBIGUOUS_MBCS},
+ {0x2122, 0x2126, ULMBCS_AMBIGUOUS_SBCS},
+ {0x212B, 0x212B, ULMBCS_AMBIGUOUS_MBCS},
+ {0x2135, 0x2135, ULMBCS_AMBIGUOUS_SBCS},
+ {0x2153, 0x2154, ULMBCS_GRP_KO},
+ {0x215B, 0x215E, ULMBCS_GRP_EXCEPT},
+ {0x2160, 0x2179, ULMBCS_AMBIGUOUS_MBCS},
+ {0x2190, 0x2193, ULMBCS_AMBIGUOUS_ALL},
+ {0x2194, 0x2195, ULMBCS_GRP_EXCEPT},
+ {0x2196, 0x2199, ULMBCS_AMBIGUOUS_MBCS},
+ {0x21A8, 0x21A8, ULMBCS_GRP_EXCEPT},
+ {0x21B8, 0x21B9, ULMBCS_GRP_CN},
+ {0x21D0, 0x21D1, ULMBCS_GRP_EXCEPT},
+ {0x21D2, 0x21D2, ULMBCS_AMBIGUOUS_MBCS},
+ {0x21D3, 0x21D3, ULMBCS_GRP_EXCEPT},
+ {0x21D4, 0x21D4, ULMBCS_AMBIGUOUS_MBCS},
+ {0x21D5, 0x21D5, ULMBCS_GRP_EXCEPT},
+ {0x21E7, 0x21E7, ULMBCS_GRP_CN},
+ {0x2200, 0x2200, ULMBCS_AMBIGUOUS_MBCS},
+ {0x2201, 0x2201, ULMBCS_GRP_EXCEPT},
+ {0x2202, 0x2202, ULMBCS_AMBIGUOUS_MBCS},
+ {0x2203, 0x2203, ULMBCS_AMBIGUOUS_MBCS},
+ {0x2204, 0x2206, ULMBCS_GRP_EXCEPT},
+ {0x2207, 0x2208, ULMBCS_AMBIGUOUS_MBCS},
+ {0x2209, 0x220A, ULMBCS_GRP_EXCEPT},
+ {0x220B, 0x220B, ULMBCS_AMBIGUOUS_MBCS},
+ {0x220F, 0x2215, ULMBCS_AMBIGUOUS_MBCS},
+ {0x2219, 0x2219, ULMBCS_GRP_EXCEPT},
+ {0x221A, 0x221A, ULMBCS_AMBIGUOUS_MBCS},
+ {0x221B, 0x221C, ULMBCS_GRP_EXCEPT},
+ {0x221D, 0x221E, ULMBCS_AMBIGUOUS_MBCS},
+ {0x221F, 0x221F, ULMBCS_GRP_EXCEPT},
+ {0x2220, 0x2220, ULMBCS_AMBIGUOUS_MBCS},
+ {0x2223, 0x222A, ULMBCS_AMBIGUOUS_MBCS},
+ {0x222B, 0x223D, ULMBCS_AMBIGUOUS_MBCS},
+ {0x2245, 0x2248, ULMBCS_GRP_EXCEPT},
+ {0x224C, 0x224C, ULMBCS_GRP_TW},
+ {0x2252, 0x2252, ULMBCS_AMBIGUOUS_MBCS},
+ {0x2260, 0x2261, ULMBCS_AMBIGUOUS_MBCS},
+ {0x2262, 0x2265, ULMBCS_GRP_EXCEPT},
+ {0x2266, 0x226F, ULMBCS_AMBIGUOUS_MBCS},
+ {0x2282, 0x2283, ULMBCS_AMBIGUOUS_MBCS},
+ {0x2284, 0x2285, ULMBCS_GRP_EXCEPT},
+ {0x2286, 0x2287, ULMBCS_AMBIGUOUS_MBCS},
+ {0x2288, 0x2297, ULMBCS_GRP_EXCEPT},
+ {0x2299, 0x22BF, ULMBCS_AMBIGUOUS_MBCS},
+ {0x22C0, 0x22C0, ULMBCS_GRP_EXCEPT},
+ {0x2310, 0x2310, ULMBCS_GRP_EXCEPT},
+ {0x2312, 0x2312, ULMBCS_AMBIGUOUS_MBCS},
+ {0x2318, 0x2321, ULMBCS_GRP_EXCEPT},
+ {0x2318, 0x2321, ULMBCS_GRP_CN},
+ {0x2460, 0x24E9, ULMBCS_AMBIGUOUS_MBCS},
+ {0x2500, 0x2500, ULMBCS_AMBIGUOUS_SBCS},
+ {0x2501, 0x2501, ULMBCS_AMBIGUOUS_MBCS},
+ {0x2502, 0x2502, ULMBCS_AMBIGUOUS_ALL},
+ {0x2503, 0x2503, ULMBCS_AMBIGUOUS_MBCS},
+ {0x2504, 0x2505, ULMBCS_GRP_TW},
+ {0x2506, 0x2665, ULMBCS_AMBIGUOUS_ALL},
+ {0x2666, 0x2666, ULMBCS_GRP_EXCEPT},
+ {0x2667, 0x2669, ULMBCS_AMBIGUOUS_SBCS},
+ {0x266A, 0x266A, ULMBCS_AMBIGUOUS_ALL},
+ {0x266B, 0x266C, ULMBCS_AMBIGUOUS_SBCS},
+ {0x266D, 0x266D, ULMBCS_AMBIGUOUS_MBCS},
+ {0x266E, 0x266E, ULMBCS_AMBIGUOUS_SBCS},
+ {0x266F, 0x266F, ULMBCS_GRP_JA},
+ {0x2670, 0x2E7F, ULMBCS_AMBIGUOUS_SBCS},
+ {0x2E80, 0xF861, ULMBCS_AMBIGUOUS_MBCS},
+ {0xF862, 0xF8FF, ULMBCS_GRP_EXCEPT},
+ {0xF900, 0xFA2D, ULMBCS_AMBIGUOUS_MBCS},
+ {0xFB00, 0xFEFF, ULMBCS_AMBIGUOUS_SBCS},
+ {0xFF01, 0xFFEE, ULMBCS_AMBIGUOUS_MBCS},
+ {0xFFFF, 0xFFFF, ULMBCS_GRP_UNICODE}
+};
+
+static ulmbcs_byte_t
+FindLMBCSUniRange(UChar uniChar)
+{
+ const struct _UniLMBCSGrpMap * pTable = UniLMBCSGrpMap;
+
+ while (uniChar > pTable->uniEndRange)
+ {
+ pTable++;
+ }
+
+ if (uniChar >= pTable->uniStartRange)
+ {
+ return pTable->GrpType;
+ }
+ return ULMBCS_GRP_UNICODE;
+}
+
+/*
+We also ask the creator of a converter to send in a preferred locale
+that we can use in resolving ambiguous mappings. They send the locale
+in as a string, and we map it, if possible, to one of the
+LMBCS groups. We use this table, and the associated code, to
+do the lookup: */
+
+/**************************************************
+ This table maps locale ID's to LMBCS opt groups.
+ The default return is group 0x01. Note that for
+ performance reasons, the table is sorted in
+ increasing alphabetic order, with the notable
+ exception of zhTW. This is to force the check
+ for Traditonal Chinese before dropping back to
+ Simplified.
+
+ Note too that the Latin-1 groups have been
+ commented out because it's the default, and
+ this shortens the table, allowing a serial
+ search to go quickly.
+ *************************************************/
+
+static const struct _LocaleLMBCSGrpMap
+{
+ const char *LocaleID;
+ const ulmbcs_byte_t OptGroup;
+} LocaleLMBCSGrpMap[] =
+{
+ {"ar", ULMBCS_GRP_AR},
+ {"be", ULMBCS_GRP_RU},
+ {"bg", ULMBCS_GRP_L2},
+ /* {"ca", ULMBCS_GRP_L1}, */
+ {"cs", ULMBCS_GRP_L2},
+ /* {"da", ULMBCS_GRP_L1}, */
+ /* {"de", ULMBCS_GRP_L1}, */
+ {"el", ULMBCS_GRP_GR},
+ /* {"en", ULMBCS_GRP_L1}, */
+ /* {"es", ULMBCS_GRP_L1}, */
+ /* {"et", ULMBCS_GRP_L1}, */
+ /* {"fi", ULMBCS_GRP_L1}, */
+ /* {"fr", ULMBCS_GRP_L1}, */
+ {"he", ULMBCS_GRP_HE},
+ {"hu", ULMBCS_GRP_L2},
+ /* {"is", ULMBCS_GRP_L1}, */
+ /* {"it", ULMBCS_GRP_L1}, */
+ {"iw", ULMBCS_GRP_HE},
+ {"ja", ULMBCS_GRP_JA},
+ {"ko", ULMBCS_GRP_KO},
+ /* {"lt", ULMBCS_GRP_L1}, */
+ /* {"lv", ULMBCS_GRP_L1}, */
+ {"mk", ULMBCS_GRP_RU},
+ /* {"nl", ULMBCS_GRP_L1}, */
+ /* {"no", ULMBCS_GRP_L1}, */
+ {"pl", ULMBCS_GRP_L2},
+ /* {"pt", ULMBCS_GRP_L1}, */
+ {"ro", ULMBCS_GRP_L2},
+ {"ru", ULMBCS_GRP_RU},
+ {"sh", ULMBCS_GRP_L2},
+ {"sk", ULMBCS_GRP_L2},
+ {"sl", ULMBCS_GRP_L2},
+ {"sq", ULMBCS_GRP_L2},
+ {"sr", ULMBCS_GRP_RU},
+ /* {"sv", ULMBCS_GRP_L1}, */
+ {"th", ULMBCS_GRP_TH},
+ {"tr", ULMBCS_GRP_TR},
+ {"uk", ULMBCS_GRP_RU},
+ /* {"vi", ULMBCS_GRP_L1}, */
+ {"zhTW", ULMBCS_GRP_TW},
+ {"zh", ULMBCS_GRP_CN},
+ {NULL, ULMBCS_GRP_L1}
+};
+
+
+static ulmbcs_byte_t
+FindLMBCSLocale(const char *LocaleID)
+{
+ const struct _LocaleLMBCSGrpMap *pTable = LocaleLMBCSGrpMap;
+
+ if ((!LocaleID) || (!*LocaleID))
+ {
+ return 0;
+ }
+
+ while (pTable->LocaleID)
+ {
+ if (*pTable->LocaleID == *LocaleID) /* Check only first char for speed */
+ {
+ /* First char matches - check whole name, for entry-length */
+ if (uprv_strncmp(pTable->LocaleID, LocaleID, strlen(pTable->LocaleID)) == 0)
+ return pTable->OptGroup;
+ }
+ else
+ if (*pTable->LocaleID > *LocaleID) /* Sorted alphabetically - exit */
+ break;
+ pTable++;
+ }
+ return ULMBCS_GRP_L1;
+}
+
+
+/*
+ Before we get to the main body of code, here's how we hook up to the rest
+ of ICU. ICU converters are required to define a structure that includes
+ some function pointers, and some common data, in the style of a C++
+ vtable. There is also room in there for converter-specific data. LMBCS
+ uses that converter-specific data to keep track of the 12 subconverters
+ we use, the optimization group, and the group (if any) that matches the
+ locale. We have one structure instantiated for each of the 12 possible
+ optimization groups. To avoid typos & to avoid boring the reader, we
+ put the declarations of these structures and functions into macros. To see
+ the definitions of these structures, see unicode\ucnv_bld.h
+*/
+
+typedef struct
+ {
+ UConverterSharedData *OptGrpConverter[ULMBCS_GRP_LAST+1]; /* Converter per Opt. grp. */
+ uint8_t OptGroup; /* default Opt. grp. for this LMBCS session */
+ uint8_t localeConverterIndex; /* reasonable locale match for index */
+ }
+UConverterDataLMBCS;
+
+U_CDECL_BEGIN
+static void U_CALLCONV _LMBCSClose(UConverter * _this);
+U_CDECL_END
+
+#define DECLARE_LMBCS_DATA(n) \
+static const UConverterImpl _LMBCSImpl##n={\
+ UCNV_LMBCS_##n,\
+ NULL,NULL,\
+ _LMBCSOpen##n,\
+ _LMBCSClose,\
+ NULL,\
+ _LMBCSToUnicodeWithOffsets,\
+ _LMBCSToUnicodeWithOffsets,\
+ _LMBCSFromUnicode,\
+ _LMBCSFromUnicode,\
+ NULL,\
+ NULL,\
+ NULL,\
+ NULL,\
+ _LMBCSSafeClone,\
+ ucnv_getCompleteUnicodeSet,\
+ NULL,\
+ NULL\
+};\
+static const UConverterStaticData _LMBCSStaticData##n={\
+ sizeof(UConverterStaticData),\
+ "LMBCS-" #n,\
+ 0, UCNV_IBM, UCNV_LMBCS_##n, 1, 3,\
+ { 0x3f, 0, 0, 0 },1,FALSE,FALSE,0,0,{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} \
+};\
+const UConverterSharedData _LMBCSData##n= \
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_LMBCSStaticData##n, &_LMBCSImpl##n);
+
+ /* The only function we needed to duplicate 12 times was the 'open'
+function, which will do basically the same thing except set a different
+optimization group. So, we put the common stuff into a worker function,
+and set up another macro to stamp out the 12 open functions:*/
+#define DEFINE_LMBCS_OPEN(n) \
+static void U_CALLCONV \
+ _LMBCSOpen##n(UConverter* _this, UConverterLoadArgs* pArgs, UErrorCode* err) \
+{ _LMBCSOpenWorker(_this, pArgs, err, n); }
+
+
+
+/* Here's the open worker & the common close function */
+static void
+_LMBCSOpenWorker(UConverter* _this,
+ UConverterLoadArgs *pArgs,
+ UErrorCode* err,
+ ulmbcs_byte_t OptGroup)
+{
+ UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS*)uprv_malloc (sizeof (UConverterDataLMBCS));
+ _this->extraInfo = extraInfo;
+ if(extraInfo != NULL)
+ {
+ UConverterNamePieces stackPieces;
+ UConverterLoadArgs stackArgs= UCNV_LOAD_ARGS_INITIALIZER;
+ ulmbcs_byte_t i;
+
+ uprv_memset(extraInfo, 0, sizeof(UConverterDataLMBCS));
+
+ stackArgs.onlyTestIsLoadable = pArgs->onlyTestIsLoadable;
+
+ for (i=0; i <= ULMBCS_GRP_LAST && U_SUCCESS(*err); i++)
+ {
+ if(OptGroupByteToCPName[i] != NULL) {
+ extraInfo->OptGrpConverter[i] = ucnv_loadSharedData(OptGroupByteToCPName[i], &stackPieces, &stackArgs, err);
+ }
+ }
+
+ if(U_FAILURE(*err) || pArgs->onlyTestIsLoadable) {
+ _LMBCSClose(_this);
+ return;
+ }
+ extraInfo->OptGroup = OptGroup;
+ extraInfo->localeConverterIndex = FindLMBCSLocale(pArgs->locale);
+ }
+ else
+ {
+ *err = U_MEMORY_ALLOCATION_ERROR;
+ }
+}
+
+U_CDECL_BEGIN
+static void U_CALLCONV
+_LMBCSClose(UConverter * _this)
+{
+ if (_this->extraInfo != NULL)
+ {
+ ulmbcs_byte_t Ix;
+ UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) _this->extraInfo;
+
+ for (Ix=0; Ix <= ULMBCS_GRP_LAST; Ix++)
+ {
+ if (extraInfo->OptGrpConverter[Ix] != NULL)
+ ucnv_unloadSharedDataIfReady(extraInfo->OptGrpConverter[Ix]);
+ }
+ if (!_this->isExtraLocal) {
+ uprv_free (_this->extraInfo);
+ _this->extraInfo = NULL;
+ }
+ }
+}
+
+typedef struct LMBCSClone {
+ UConverter cnv;
+ UConverterDataLMBCS lmbcs;
+} LMBCSClone;
+
+static UConverter * U_CALLCONV
+_LMBCSSafeClone(const UConverter *cnv,
+ void *stackBuffer,
+ int32_t *pBufferSize,
+ UErrorCode *status) {
+ (void)status;
+ LMBCSClone *newLMBCS;
+ UConverterDataLMBCS *extraInfo;
+ int32_t i;
+
+ if(*pBufferSize<=0) {
+ *pBufferSize=(int32_t)sizeof(LMBCSClone);
+ return NULL;
+ }
+
+ extraInfo=(UConverterDataLMBCS *)cnv->extraInfo;
+ newLMBCS=(LMBCSClone *)stackBuffer;
+
+ /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
+
+ uprv_memcpy(&newLMBCS->lmbcs, extraInfo, sizeof(UConverterDataLMBCS));
+
+ /* share the subconverters */
+ for(i = 0; i <= ULMBCS_GRP_LAST; ++i) {
+ if(extraInfo->OptGrpConverter[i] != NULL) {
+ ucnv_incrementRefCount(extraInfo->OptGrpConverter[i]);
+ }
+ }
+
+ newLMBCS->cnv.extraInfo = &newLMBCS->lmbcs;
+ newLMBCS->cnv.isExtraLocal = TRUE;
+ return &newLMBCS->cnv;
+}
+
+/*
+ * There used to be a _LMBCSGetUnicodeSet() function here (up to svn revision 20117)
+ * which added all code points except for U+F6xx
+ * because those cannot be represented in the Unicode group.
+ * However, it turns out that windows-950 has roundtrips for all of U+F6xx
+ * which means that LMBCS can convert all Unicode code points after all.
+ * We now simply use ucnv_getCompleteUnicodeSet().
+ *
+ * This may need to be looked at again as Lotus uses _LMBCSGetUnicodeSet(). (091216)
+ */
+
+/*
+ Here's the basic helper function that we use when converting from
+ Unicode to LMBCS, and we suspect that a Unicode character will fit into
+ one of the 12 groups. The return value is the number of bytes written
+ starting at pStartLMBCS (if any).
+*/
+
+static size_t
+LMBCSConversionWorker (
+ UConverterDataLMBCS * extraInfo, /* subconverters, opt & locale groups */
+ ulmbcs_byte_t group, /* The group to try */
+ ulmbcs_byte_t * pStartLMBCS, /* where to put the results */
+ UChar * pUniChar, /* The input unicode character */
+ ulmbcs_byte_t * lastConverterIndex, /* output: track last successful group used */
+ UBool * groups_tried /* output: track any unsuccessful groups */
+)
+{
+ ulmbcs_byte_t * pLMBCS = pStartLMBCS;
+ UConverterSharedData * xcnv = extraInfo->OptGrpConverter[group];
+
+ int bytesConverted;
+ uint32_t value;
+ ulmbcs_byte_t firstByte;
+
+ U_ASSERT(xcnv);
+ U_ASSERT(group<ULMBCS_GRP_UNICODE);
+
+ bytesConverted = ucnv_MBCSFromUChar32(xcnv, *pUniChar, &value, FALSE);
+
+ /* get the first result byte */
+ if(bytesConverted > 0) {
+ firstByte = (ulmbcs_byte_t)(value >> ((bytesConverted - 1) * 8));
+ } else {
+ /* most common failure mode is an unassigned character */
+ groups_tried[group] = TRUE;
+ return 0;
+ }
+
+ *lastConverterIndex = group;
+
+ /* All initial byte values in lower ascii range should have been caught by now,
+ except with the exception group.
+ */
+ U_ASSERT((firstByte <= ULMBCS_C0END) || (firstByte >= ULMBCS_C1START) || (group == ULMBCS_GRP_EXCEPT));
+
+ /* use converted data: first write 0, 1 or two group bytes */
+ if (group != ULMBCS_GRP_EXCEPT && extraInfo->OptGroup != group)
+ {
+ *pLMBCS++ = group;
+ if (bytesConverted == 1 && group >= ULMBCS_DOUBLEOPTGROUP_START)
+ {
+ *pLMBCS++ = group;
+ }
+ }
+
+ /* don't emit control chars */
+ if ( bytesConverted == 1 && firstByte < 0x20 )
+ return 0;
+
+
+ /* then move over the converted data */
+ switch(bytesConverted)
+ {
+ case 4:
+ *pLMBCS++ = (ulmbcs_byte_t)(value >> 24);
+ U_FALLTHROUGH;
+ case 3:
+ *pLMBCS++ = (ulmbcs_byte_t)(value >> 16);
+ U_FALLTHROUGH;
+ case 2:
+ *pLMBCS++ = (ulmbcs_byte_t)(value >> 8);
+ U_FALLTHROUGH;
+ case 1:
+ *pLMBCS++ = (ulmbcs_byte_t)value;
+ U_FALLTHROUGH;
+ default:
+ /* will never occur */
+ break;
+ }
+
+ return (pLMBCS - pStartLMBCS);
+}
+
+
+/* This is a much simpler version of above, when we
+know we are writing LMBCS using the Unicode group
+*/
+static size_t
+LMBCSConvertUni(ulmbcs_byte_t * pLMBCS, UChar uniChar)
+{
+ /* encode into LMBCS Unicode range */
+ uint8_t LowCh = (uint8_t)(uniChar & 0x00FF);
+ uint8_t HighCh = (uint8_t)(uniChar >> 8);
+
+ *pLMBCS++ = ULMBCS_GRP_UNICODE;
+
+ if (LowCh == 0)
+ {
+ *pLMBCS++ = ULMBCS_UNICOMPATZERO;
+ *pLMBCS++ = HighCh;
+ }
+ else
+ {
+ *pLMBCS++ = HighCh;
+ *pLMBCS++ = LowCh;
+ }
+ return ULMBCS_UNICODE_SIZE;
+}
+
+
+
+/* The main Unicode to LMBCS conversion function */
+static void U_CALLCONV
+_LMBCSFromUnicode(UConverterFromUnicodeArgs* args,
+ UErrorCode* err)
+{
+ ulmbcs_byte_t lastConverterIndex = 0;
+ UChar uniChar;
+ ulmbcs_byte_t LMBCS[ULMBCS_CHARSIZE_MAX];
+ ulmbcs_byte_t * pLMBCS;
+ int32_t bytes_written;
+ UBool groups_tried[ULMBCS_GRP_LAST+1];
+ UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo;
+ int sourceIndex = 0;
+
+ /* Basic strategy: attempt to fill in local LMBCS 1-char buffer.(LMBCS)
+ If that succeeds, see if it will all fit into the target & copy it over
+ if it does.
+
+ We try conversions in the following order:
+
+ 1. Single-byte ascii & special fixed control chars (&null)
+ 2. Look up group in table & try that (could be
+ A) Unicode group
+ B) control group,
+ C) national encoding,
+ or ambiguous SBCS or MBCS group (on to step 4...)
+
+ 3. If its ambiguous, try this order:
+ A) The optimization group
+ B) The locale group
+ C) The last group that succeeded with this string.
+ D) every other group that's relevent (single or double)
+ E) If its single-byte ambiguous, try the exceptions group
+
+ 4. And as a grand fallback: Unicode
+ */
+
+ /*Fix for SPR#DJOE66JFN3 (Lotus)*/
+ ulmbcs_byte_t OldConverterIndex = 0;
+
+ while (args->source < args->sourceLimit && !U_FAILURE(*err))
+ {
+ /*Fix for SPR#DJOE66JFN3 (Lotus)*/
+ OldConverterIndex = extraInfo->localeConverterIndex;
+
+ if (args->target >= args->targetLimit)
+ {
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ uniChar = *(args->source);
+ bytes_written = 0;
+ pLMBCS = LMBCS;
+
+ /* check cases in rough order of how common they are, for speed */
+
+ /* single byte matches: strategy 1 */
+ /*Fix for SPR#DJOE66JFN3 (Lotus)*/
+ if((uniChar>=0x80) && (uniChar<=0xff)
+ /*Fix for SPR#JUYA6XAERU and TSAO7GL5NK (Lotus)*/ &&(uniChar!=0xB1) &&(uniChar!=0xD7) &&(uniChar!=0xF7)
+ &&(uniChar!=0xB0) &&(uniChar!=0xB4) &&(uniChar!=0xB6) &&(uniChar!=0xA7) &&(uniChar!=0xA8))
+ {
+ extraInfo->localeConverterIndex = ULMBCS_GRP_L1;
+ }
+ if (((uniChar > ULMBCS_C0END) && (uniChar < ULMBCS_C1START)) ||
+ uniChar == 0 || uniChar == ULMBCS_HT || uniChar == ULMBCS_CR ||
+ uniChar == ULMBCS_LF || uniChar == ULMBCS_123SYSTEMRANGE
+ )
+ {
+ *pLMBCS++ = (ulmbcs_byte_t ) uniChar;
+ bytes_written = 1;
+ }
+
+
+ if (!bytes_written)
+ {
+ /* Check by UNICODE range (Strategy 2) */
+ ulmbcs_byte_t group = FindLMBCSUniRange(uniChar);
+
+ if (group == ULMBCS_GRP_UNICODE) /* (Strategy 2A) */
+ {
+ pLMBCS += LMBCSConvertUni(pLMBCS,uniChar);
+
+ bytes_written = (int32_t)(pLMBCS - LMBCS);
+ }
+ else if (group == ULMBCS_GRP_CTRL) /* (Strategy 2B) */
+ {
+ /* Handle control characters here */
+ if (uniChar <= ULMBCS_C0END)
+ {
+ *pLMBCS++ = ULMBCS_GRP_CTRL;
+ *pLMBCS++ = (ulmbcs_byte_t)(ULMBCS_CTRLOFFSET + uniChar);
+ }
+ else if (uniChar >= ULMBCS_C1START && uniChar <= ULMBCS_C1START + ULMBCS_CTRLOFFSET)
+ {
+ *pLMBCS++ = ULMBCS_GRP_CTRL;
+ *pLMBCS++ = (ulmbcs_byte_t ) (uniChar & 0x00FF);
+ }
+ bytes_written = (int32_t)(pLMBCS - LMBCS);
+ }
+ else if (group < ULMBCS_GRP_UNICODE) /* (Strategy 2C) */
+ {
+ /* a specific converter has been identified - use it */
+ bytes_written = (int32_t)LMBCSConversionWorker (
+ extraInfo, group, pLMBCS, &uniChar,
+ &lastConverterIndex, groups_tried);
+ }
+ if (!bytes_written) /* the ambiguous group cases (Strategy 3) */
+ {
+ uprv_memset(groups_tried, 0, sizeof(groups_tried));
+
+ /* check for non-default optimization group (Strategy 3A )*/
+ if ((extraInfo->OptGroup != 1) && (ULMBCS_AMBIGUOUS_MATCH(group, extraInfo->OptGroup)))
+ {
+ /*zhujin: upgrade, merge #39299 here (Lotus) */
+ /*To make R5 compatible translation, look for exceptional group first for non-DBCS*/
+
+ if(extraInfo->localeConverterIndex < ULMBCS_DOUBLEOPTGROUP_START)
+ {
+ bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
+ ULMBCS_GRP_L1, pLMBCS, &uniChar,
+ &lastConverterIndex, groups_tried);
+
+ if(!bytes_written)
+ {
+ bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
+ ULMBCS_GRP_EXCEPT, pLMBCS, &uniChar,
+ &lastConverterIndex, groups_tried);
+ }
+ if(!bytes_written)
+ {
+ bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
+ extraInfo->localeConverterIndex, pLMBCS, &uniChar,
+ &lastConverterIndex, groups_tried);
+ }
+ }
+ else
+ {
+ bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
+ extraInfo->localeConverterIndex, pLMBCS, &uniChar,
+ &lastConverterIndex, groups_tried);
+ }
+ }
+ /* check for locale optimization group (Strategy 3B) */
+ if (!bytes_written && (extraInfo->localeConverterIndex) && (ULMBCS_AMBIGUOUS_MATCH(group, extraInfo->localeConverterIndex)))
+ {
+ bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
+ extraInfo->localeConverterIndex, pLMBCS, &uniChar, &lastConverterIndex, groups_tried);
+ }
+ /* check for last optimization group used for this string (Strategy 3C) */
+ if (!bytes_written && (lastConverterIndex) && (ULMBCS_AMBIGUOUS_MATCH(group, lastConverterIndex)))
+ {
+ bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
+ lastConverterIndex, pLMBCS, &uniChar, &lastConverterIndex, groups_tried);
+ }
+ if (!bytes_written)
+ {
+ /* just check every possible matching converter (Strategy 3D) */
+ ulmbcs_byte_t grp_start;
+ ulmbcs_byte_t grp_end;
+ ulmbcs_byte_t grp_ix;
+ grp_start = (ulmbcs_byte_t)((group == ULMBCS_AMBIGUOUS_MBCS)
+ ? ULMBCS_DOUBLEOPTGROUP_START
+ : ULMBCS_GRP_L1);
+ grp_end = (ulmbcs_byte_t)((group == ULMBCS_AMBIGUOUS_MBCS)
+ ? ULMBCS_GRP_LAST
+ : ULMBCS_GRP_TH);
+ if(group == ULMBCS_AMBIGUOUS_ALL)
+ {
+ grp_start = ULMBCS_GRP_L1;
+ grp_end = ULMBCS_GRP_LAST;
+ }
+ for (grp_ix = grp_start;
+ grp_ix <= grp_end && !bytes_written;
+ grp_ix++)
+ {
+ if (extraInfo->OptGrpConverter [grp_ix] && !groups_tried [grp_ix])
+ {
+ bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
+ grp_ix, pLMBCS, &uniChar,
+ &lastConverterIndex, groups_tried);
+ }
+ }
+ /* a final conversion fallback to the exceptions group if its likely
+ to be single byte (Strategy 3E) */
+ if (!bytes_written && grp_start == ULMBCS_GRP_L1)
+ {
+ bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
+ ULMBCS_GRP_EXCEPT, pLMBCS, &uniChar,
+ &lastConverterIndex, groups_tried);
+ }
+ }
+ /* all of our other strategies failed. Fallback to Unicode. (Strategy 4)*/
+ if (!bytes_written)
+ {
+
+ pLMBCS += LMBCSConvertUni(pLMBCS, uniChar);
+ bytes_written = (int32_t)(pLMBCS - LMBCS);
+ }
+ }
+ }
+
+ /* we have a translation. increment source and write as much as posible to target */
+ args->source++;
+ pLMBCS = LMBCS;
+ while (args->target < args->targetLimit && bytes_written--)
+ {
+ *(args->target)++ = *pLMBCS++;
+ if (args->offsets)
+ {
+ *(args->offsets)++ = sourceIndex;
+ }
+ }
+ sourceIndex++;
+ if (bytes_written > 0)
+ {
+ /* write any bytes that didn't fit in target to the error buffer,
+ common code will move this to target if we get called back with
+ enough target room
+ */
+ uint8_t * pErrorBuffer = args->converter->charErrorBuffer;
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ args->converter->charErrorBufferLength = (int8_t)bytes_written;
+ while (bytes_written--)
+ {
+ *pErrorBuffer++ = *pLMBCS++;
+ }
+ }
+ /*Fix for SPR#DJOE66JFN3 (Lotus)*/
+ extraInfo->localeConverterIndex = OldConverterIndex;
+ }
+}
+
+
+/* Now, the Unicode from LMBCS section */
+
+
+/* A function to call when we are looking at the Unicode group byte in LMBCS */
+static UChar
+GetUniFromLMBCSUni(char const ** ppLMBCSin) /* Called with LMBCS-style Unicode byte stream */
+{
+ uint8_t HighCh = *(*ppLMBCSin)++; /* Big-endian Unicode in LMBCS compatibility group*/
+ uint8_t LowCh = *(*ppLMBCSin)++;
+
+ if (HighCh == ULMBCS_UNICOMPATZERO )
+ {
+ HighCh = LowCh;
+ LowCh = 0; /* zero-byte in LSB special character */
+ }
+ return (UChar)((HighCh << 8) | LowCh);
+}
+
+
+
+/* CHECK_SOURCE_LIMIT: Helper macro to verify that there are at least'index'
+ bytes left in source up to sourceLimit.Errors appropriately if not.
+ If we reach the limit, then update the source pointer to there to consume
+ all input as required by ICU converter semantics.
+*/
+
+#define CHECK_SOURCE_LIMIT(index) UPRV_BLOCK_MACRO_BEGIN { \
+ if (args->source+index > args->sourceLimit) { \
+ *err = U_TRUNCATED_CHAR_FOUND; \
+ args->source = args->sourceLimit; \
+ return 0xffff; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/* Return the Unicode representation for the current LMBCS character */
+
+static UChar32 U_CALLCONV
+_LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args,
+ UErrorCode* err)
+{
+ UChar32 uniChar = 0; /* an output UNICODE char */
+ ulmbcs_byte_t CurByte; /* A byte from the input stream */
+
+ /* error check */
+ if (args->source >= args->sourceLimit)
+ {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0xffff;
+ }
+ /* Grab first byte & save address for error recovery */
+ CurByte = *((ulmbcs_byte_t *) (args->source++));
+
+ /*
+ * at entry of each if clause:
+ * 1. 'CurByte' points at the first byte of a LMBCS character
+ * 2. '*source'points to the next byte of the source stream after 'CurByte'
+ *
+ * the job of each if clause is:
+ * 1. set '*source' to point at the beginning of next char (nop if LMBCS char is only 1 byte)
+ * 2. set 'uniChar' up with the right Unicode value, or set 'err' appropriately
+ */
+
+ /* First lets check the simple fixed values. */
+
+ if(((CurByte > ULMBCS_C0END) && (CurByte < ULMBCS_C1START)) /* ascii range */
+ || (CurByte == 0)
+ || CurByte == ULMBCS_HT || CurByte == ULMBCS_CR
+ || CurByte == ULMBCS_LF || CurByte == ULMBCS_123SYSTEMRANGE)
+ {
+ uniChar = CurByte;
+ }
+ else
+ {
+ UConverterDataLMBCS * extraInfo;
+ ulmbcs_byte_t group;
+ UConverterSharedData *cnv;
+
+ if (CurByte == ULMBCS_GRP_CTRL) /* Control character group - no opt group update */
+ {
+ ulmbcs_byte_t C0C1byte;
+ CHECK_SOURCE_LIMIT(1);
+ C0C1byte = *(args->source)++;
+ uniChar = (C0C1byte < ULMBCS_C1START) ? C0C1byte - ULMBCS_CTRLOFFSET : C0C1byte;
+ }
+ else
+ if (CurByte == ULMBCS_GRP_UNICODE) /* Unicode compatibility group: BigEndian UTF16 */
+ {
+ CHECK_SOURCE_LIMIT(2);
+
+ /* don't check for error indicators fffe/ffff below */
+ return GetUniFromLMBCSUni(&(args->source));
+ }
+ else if (CurByte <= ULMBCS_CTRLOFFSET)
+ {
+ group = CurByte; /* group byte is in the source */
+ extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo;
+ if (group > ULMBCS_GRP_LAST || (cnv = extraInfo->OptGrpConverter[group]) == NULL)
+ {
+ /* this is not a valid group byte - no converter*/
+ *err = U_INVALID_CHAR_FOUND;
+ }
+ else if (group >= ULMBCS_DOUBLEOPTGROUP_START) /* double byte conversion */
+ {
+
+ CHECK_SOURCE_LIMIT(2);
+
+ /* check for LMBCS doubled-group-byte case */
+ if (*args->source == group) {
+ /* single byte */
+ ++args->source;
+ uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source, 1, FALSE);
+ ++args->source;
+ } else {
+ /* double byte */
+ uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source, 2, FALSE);
+ args->source += 2;
+ }
+ }
+ else { /* single byte conversion */
+ CHECK_SOURCE_LIMIT(1);
+ CurByte = *(args->source)++;
+
+ if (CurByte >= ULMBCS_C1START)
+ {
+ uniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv, CurByte);
+ }
+ else
+ {
+ /* The non-optimizable oddballs where there is an explicit byte
+ * AND the second byte is not in the upper ascii range
+ */
+ char bytes[2];
+
+ extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo;
+ cnv = extraInfo->OptGrpConverter [ULMBCS_GRP_EXCEPT];
+
+ /* Lookup value must include opt group */
+ bytes[0] = group;
+ bytes[1] = CurByte;
+ uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, bytes, 2, FALSE);
+ }
+ }
+ }
+ else if (CurByte >= ULMBCS_C1START) /* group byte is implicit */
+ {
+ extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo;
+ group = extraInfo->OptGroup;
+ cnv = extraInfo->OptGrpConverter[group];
+ if (group >= ULMBCS_DOUBLEOPTGROUP_START) /* double byte conversion */
+ {
+ if (!ucnv_MBCSIsLeadByte(cnv, CurByte))
+ {
+ CHECK_SOURCE_LIMIT(0);
+
+ /* let the MBCS conversion consume CurByte again */
+ uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source - 1, 1, FALSE);
+ }
+ else
+ {
+ CHECK_SOURCE_LIMIT(1);
+ /* let the MBCS conversion consume CurByte again */
+ uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source - 1, 2, FALSE);
+ ++args->source;
+ }
+ }
+ else /* single byte conversion */
+ {
+ uniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv, CurByte);
+ }
+ }
+ }
+ return uniChar;
+}
+
+
+/* The exported function that converts lmbcs to one or more
+ UChars - currently UTF-16
+*/
+static void U_CALLCONV
+_LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs* args,
+ UErrorCode* err)
+{
+ char LMBCS [ULMBCS_CHARSIZE_MAX];
+ UChar uniChar; /* one output UNICODE char */
+ const char * saveSource; /* beginning of current code point */
+ const char * pStartLMBCS = args->source; /* beginning of whole string */
+ const char * errSource = NULL; /* pointer to actual input in case an error occurs */
+ int8_t savebytes = 0;
+
+ /* Process from source to limit, or until error */
+ while (U_SUCCESS(*err) && args->sourceLimit > args->source && args->targetLimit > args->target)
+ {
+ saveSource = args->source; /* beginning of current code point */
+
+ if (args->converter->toULength) /* reassemble char from previous call */
+ {
+ const char *saveSourceLimit;
+ size_t size_old = args->converter->toULength;
+
+ /* limit from source is either remainder of temp buffer, or user limit on source */
+ size_t size_new_maybe_1 = sizeof(LMBCS) - size_old;
+ size_t size_new_maybe_2 = args->sourceLimit - args->source;
+ size_t size_new = (size_new_maybe_1 < size_new_maybe_2) ? size_new_maybe_1 : size_new_maybe_2;
+
+
+ uprv_memcpy(LMBCS, args->converter->toUBytes, size_old);
+ uprv_memcpy(LMBCS + size_old, args->source, size_new);
+ saveSourceLimit = args->sourceLimit;
+ args->source = errSource = LMBCS;
+ args->sourceLimit = LMBCS+size_old+size_new;
+ savebytes = (int8_t)(size_old+size_new);
+ uniChar = (UChar) _LMBCSGetNextUCharWorker(args, err);
+ args->source = saveSource + ((args->source - LMBCS) - size_old);
+ args->sourceLimit = saveSourceLimit;
+
+ if (*err == U_TRUNCATED_CHAR_FOUND)
+ {
+ /* evil special case: source buffers so small a char spans more than 2 buffers */
+ args->converter->toULength = savebytes;
+ uprv_memcpy(args->converter->toUBytes, LMBCS, savebytes);
+ args->source = args->sourceLimit;
+ *err = U_ZERO_ERROR;
+ return;
+ }
+ else
+ {
+ /* clear the partial-char marker */
+ args->converter->toULength = 0;
+ }
+ }
+ else
+ {
+ errSource = saveSource;
+ uniChar = (UChar) _LMBCSGetNextUCharWorker(args, err);
+ savebytes = (int8_t)(args->source - saveSource);
+ }
+ if (U_SUCCESS(*err))
+ {
+ if (uniChar < 0xfffe)
+ {
+ *(args->target)++ = uniChar;
+ if(args->offsets)
+ {
+ *(args->offsets)++ = (int32_t)(saveSource - pStartLMBCS);
+ }
+ }
+ else if (uniChar == 0xfffe)
+ {
+ *err = U_INVALID_CHAR_FOUND;
+ }
+ else /* if (uniChar == 0xffff) */
+ {
+ *err = U_ILLEGAL_CHAR_FOUND;
+ }
+ }
+ }
+ /* if target ran out before source, return U_BUFFER_OVERFLOW_ERROR */
+ if (U_SUCCESS(*err) && args->sourceLimit > args->source && args->targetLimit <= args->target)
+ {
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+ else if (U_FAILURE(*err))
+ {
+ /* If character incomplete or unmappable/illegal, store it in toUBytes[] */
+ args->converter->toULength = savebytes;
+ if (savebytes > 0) {
+ uprv_memcpy(args->converter->toUBytes, errSource, savebytes);
+ }
+ if (*err == U_TRUNCATED_CHAR_FOUND) {
+ *err = U_ZERO_ERROR;
+ }
+ }
+}
+
+/* And now, the macroized declarations of data & functions: */
+DEFINE_LMBCS_OPEN(1)
+DEFINE_LMBCS_OPEN(2)
+DEFINE_LMBCS_OPEN(3)
+DEFINE_LMBCS_OPEN(4)
+DEFINE_LMBCS_OPEN(5)
+DEFINE_LMBCS_OPEN(6)
+DEFINE_LMBCS_OPEN(8)
+DEFINE_LMBCS_OPEN(11)
+DEFINE_LMBCS_OPEN(16)
+DEFINE_LMBCS_OPEN(17)
+DEFINE_LMBCS_OPEN(18)
+DEFINE_LMBCS_OPEN(19)
+
+
+DECLARE_LMBCS_DATA(1)
+DECLARE_LMBCS_DATA(2)
+DECLARE_LMBCS_DATA(3)
+DECLARE_LMBCS_DATA(4)
+DECLARE_LMBCS_DATA(5)
+DECLARE_LMBCS_DATA(6)
+DECLARE_LMBCS_DATA(8)
+DECLARE_LMBCS_DATA(11)
+DECLARE_LMBCS_DATA(16)
+DECLARE_LMBCS_DATA(17)
+DECLARE_LMBCS_DATA(18)
+DECLARE_LMBCS_DATA(19)
+
+U_CDECL_END
+
+#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
diff --git a/thirdparty/icu4c/common/ucnv_set.cpp b/thirdparty/icu4c/common/ucnv_set.cpp
new file mode 100644
index 0000000000..926cee0de8
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnv_set.cpp
@@ -0,0 +1,70 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2003-2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: ucnv_set.c
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2004sep07
+* created by: Markus W. Scherer
+*
+* Conversion API functions using USet (ucnv_getUnicodeSet())
+* moved here from ucnv.c for removing the dependency of other ucnv_
+* implementation functions on the USet implementation.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/uset.h"
+#include "unicode/ucnv.h"
+#include "ucnv_bld.h"
+#include "uset_imp.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+U_CAPI void U_EXPORT2
+ucnv_getUnicodeSet(const UConverter *cnv,
+ USet *setFillIn,
+ UConverterUnicodeSet whichSet,
+ UErrorCode *pErrorCode) {
+ /* argument checking */
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return;
+ }
+ if(cnv==NULL || setFillIn==NULL || whichSet<UCNV_ROUNDTRIP_SET || UCNV_SET_COUNT<=whichSet) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ /* does this converter support this function? */
+ if(cnv->sharedData->impl->getUnicodeSet==NULL) {
+ *pErrorCode=U_UNSUPPORTED_ERROR;
+ return;
+ }
+
+ {
+ USetAdder sa={
+ NULL,
+ uset_add,
+ uset_addRange,
+ uset_addString,
+ uset_remove,
+ uset_removeRange
+ };
+ sa.set=setFillIn;
+
+ /* empty the set */
+ uset_clear(setFillIn);
+
+ /* call the converter to add the code points it supports */
+ cnv->sharedData->impl->getUnicodeSet(cnv, &sa, whichSet, pErrorCode);
+ }
+}
+
+#endif
diff --git a/thirdparty/icu4c/common/ucnv_u16.cpp b/thirdparty/icu4c/common/ucnv_u16.cpp
new file mode 100644
index 0000000000..a5e8367400
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnv_u16.cpp
@@ -0,0 +1,1579 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2002-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* file name: ucnv_u16.c
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002jul01
+* created by: Markus W. Scherer
+*
+* UTF-16 converter implementation. Used to be in ucnv_utf.c.
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/ucnv.h"
+#include "unicode/uversion.h"
+#include "ucnv_bld.h"
+#include "ucnv_cnv.h"
+#include "cmemory.h"
+
+enum {
+ UCNV_NEED_TO_WRITE_BOM=1
+};
+
+U_CDECL_BEGIN
+/*
+ * The UTF-16 toUnicode implementation is also used for the Java-specific
+ * "with BOM" variants of UTF-16BE and UTF-16LE.
+ */
+static void U_CALLCONV
+_UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode);
+
+/* UTF-16BE ----------------------------------------------------------------- */
+
+#if U_IS_BIG_ENDIAN
+# define _UTF16PEFromUnicodeWithOffsets _UTF16BEFromUnicodeWithOffsets
+#else
+# define _UTF16PEFromUnicodeWithOffsets _UTF16LEFromUnicodeWithOffsets
+#endif
+
+
+static void U_CALLCONV
+_UTF16BEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ const UChar *source;
+ char *target;
+ int32_t *offsets;
+
+ uint32_t targetCapacity, length, sourceIndex;
+ UChar c, trail;
+ char overflow[4];
+
+ source=pArgs->source;
+ length=(int32_t)(pArgs->sourceLimit-source);
+ if(length<=0) {
+ /* no input, nothing to do */
+ return;
+ }
+
+ cnv=pArgs->converter;
+
+ /* write the BOM if necessary */
+ if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
+ static const char bom[]={ (char)0xfeu, (char)0xffu };
+ ucnv_fromUWriteBytes(cnv,
+ bom, 2,
+ &pArgs->target, pArgs->targetLimit,
+ &pArgs->offsets, -1,
+ pErrorCode);
+ cnv->fromUnicodeStatus=0;
+ }
+
+ target=pArgs->target;
+ if(target >= pArgs->targetLimit) {
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ return;
+ }
+
+ targetCapacity=(uint32_t)(pArgs->targetLimit-target);
+ offsets=pArgs->offsets;
+ sourceIndex=0;
+
+ /* c!=0 indicates in several places outside the main loops that a surrogate was found */
+
+ if((c=(UChar)cnv->fromUChar32)!=0 && U16_IS_TRAIL(trail=*source) && targetCapacity>=4) {
+ /* the last buffer ended with a lead surrogate, output the surrogate pair */
+ ++source;
+ --length;
+ target[0]=(uint8_t)(c>>8);
+ target[1]=(uint8_t)c;
+ target[2]=(uint8_t)(trail>>8);
+ target[3]=(uint8_t)trail;
+ target+=4;
+ targetCapacity-=4;
+ if(offsets!=NULL) {
+ *offsets++=-1;
+ *offsets++=-1;
+ *offsets++=-1;
+ *offsets++=-1;
+ }
+ sourceIndex=1;
+ cnv->fromUChar32=c=0;
+ }
+
+ if(c==0) {
+ /* copy an even number of bytes for complete UChars */
+ uint32_t count=2*length;
+ if(count>targetCapacity) {
+ count=targetCapacity&~1;
+ }
+ /* count is even */
+ targetCapacity-=count;
+ count>>=1;
+ length-=count;
+
+ if(offsets==NULL) {
+ while(count>0) {
+ c=*source++;
+ if(U16_IS_SINGLE(c)) {
+ target[0]=(uint8_t)(c>>8);
+ target[1]=(uint8_t)c;
+ target+=2;
+ } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) {
+ ++source;
+ --count;
+ target[0]=(uint8_t)(c>>8);
+ target[1]=(uint8_t)c;
+ target[2]=(uint8_t)(trail>>8);
+ target[3]=(uint8_t)trail;
+ target+=4;
+ } else {
+ break;
+ }
+ --count;
+ }
+ } else {
+ while(count>0) {
+ c=*source++;
+ if(U16_IS_SINGLE(c)) {
+ target[0]=(uint8_t)(c>>8);
+ target[1]=(uint8_t)c;
+ target+=2;
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex++;
+ } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) {
+ ++source;
+ --count;
+ target[0]=(uint8_t)(c>>8);
+ target[1]=(uint8_t)c;
+ target[2]=(uint8_t)(trail>>8);
+ target[3]=(uint8_t)trail;
+ target+=4;
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex;
+ sourceIndex+=2;
+ } else {
+ break;
+ }
+ --count;
+ }
+ }
+
+ if(count==0) {
+ /* done with the loop for complete UChars */
+ if(length>0 && targetCapacity>0) {
+ /*
+ * there is more input and some target capacity -
+ * it must be targetCapacity==1 because otherwise
+ * the above would have copied more;
+ * prepare for overflow output
+ */
+ if(U16_IS_SINGLE(c=*source++)) {
+ overflow[0]=(char)(c>>8);
+ overflow[1]=(char)c;
+ length=2; /* 2 bytes to output */
+ c=0;
+ /* } else { keep c for surrogate handling, length will be set there */
+ }
+ } else {
+ length=0;
+ c=0;
+ }
+ } else {
+ /* keep c for surrogate handling, length will be set there */
+ targetCapacity+=2*count;
+ }
+ } else {
+ length=0; /* from here on, length counts the bytes in overflow[] */
+ }
+
+ if(c!=0) {
+ /*
+ * c is a surrogate, and
+ * - source or target too short
+ * - or the surrogate is unmatched
+ */
+ length=0;
+ if(U16_IS_SURROGATE_LEAD(c)) {
+ if(source<pArgs->sourceLimit) {
+ if(U16_IS_TRAIL(trail=*source)) {
+ /* output the surrogate pair, will overflow (see conditions comment above) */
+ ++source;
+ overflow[0]=(char)(c>>8);
+ overflow[1]=(char)c;
+ overflow[2]=(char)(trail>>8);
+ overflow[3]=(char)trail;
+ length=4; /* 4 bytes to output */
+ c=0;
+ } else {
+ /* unmatched lead surrogate */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ }
+ } else {
+ /* see if the trail surrogate is in the next buffer */
+ }
+ } else {
+ /* unmatched trail surrogate */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ }
+ cnv->fromUChar32=c;
+ }
+
+ if(length>0) {
+ /* output length bytes with overflow (length>targetCapacity>0) */
+ ucnv_fromUWriteBytes(cnv,
+ overflow, length,
+ (char **)&target, pArgs->targetLimit,
+ &offsets, sourceIndex,
+ pErrorCode);
+ targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target);
+ }
+
+ if(U_SUCCESS(*pErrorCode) && source<pArgs->sourceLimit && targetCapacity==0) {
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+
+ /* write back the updated pointers */
+ pArgs->source=source;
+ pArgs->target=(char *)target;
+ pArgs->offsets=offsets;
+}
+
+static void U_CALLCONV
+_UTF16BEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ const uint8_t *source;
+ UChar *target;
+ int32_t *offsets;
+
+ uint32_t targetCapacity, length, count, sourceIndex;
+ UChar c, trail;
+
+ if(pArgs->converter->mode<8) {
+ _UTF16ToUnicodeWithOffsets(pArgs, pErrorCode);
+ return;
+ }
+
+ cnv=pArgs->converter;
+ source=(const uint8_t *)pArgs->source;
+ length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
+ if(length<=0 && cnv->toUnicodeStatus==0) {
+ /* no input, nothing to do */
+ return;
+ }
+
+ target=pArgs->target;
+ if(target >= pArgs->targetLimit) {
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ return;
+ }
+
+ targetCapacity=(uint32_t)(pArgs->targetLimit-target);
+ offsets=pArgs->offsets;
+ sourceIndex=0;
+ c=0;
+
+ /* complete a partial UChar or pair from the last call */
+ if(cnv->toUnicodeStatus!=0) {
+ /*
+ * special case: single byte from a previous buffer,
+ * where the byte turned out not to belong to a trail surrogate
+ * and the preceding, unmatched lead surrogate was put into toUBytes[]
+ * for error handling
+ */
+ cnv->toUBytes[0]=(uint8_t)cnv->toUnicodeStatus;
+ cnv->toULength=1;
+ cnv->toUnicodeStatus=0;
+ }
+ if((count=cnv->toULength)!=0) {
+ uint8_t *p=cnv->toUBytes;
+ do {
+ p[count++]=*source++;
+ ++sourceIndex;
+ --length;
+ if(count==2) {
+ c=((UChar)p[0]<<8)|p[1];
+ if(U16_IS_SINGLE(c)) {
+ /* output the BMP code point */
+ *target++=c;
+ if(offsets!=NULL) {
+ *offsets++=-1;
+ }
+ --targetCapacity;
+ count=0;
+ c=0;
+ break;
+ } else if(U16_IS_SURROGATE_LEAD(c)) {
+ /* continue collecting bytes for the trail surrogate */
+ c=0; /* avoid unnecessary surrogate handling below */
+ } else {
+ /* fall through to error handling for an unmatched trail surrogate */
+ break;
+ }
+ } else if(count==4) {
+ c=((UChar)p[0]<<8)|p[1];
+ trail=((UChar)p[2]<<8)|p[3];
+ if(U16_IS_TRAIL(trail)) {
+ /* output the surrogate pair */
+ *target++=c;
+ if(targetCapacity>=2) {
+ *target++=trail;
+ if(offsets!=NULL) {
+ *offsets++=-1;
+ *offsets++=-1;
+ }
+ targetCapacity-=2;
+ } else /* targetCapacity==1 */ {
+ targetCapacity=0;
+ cnv->UCharErrorBuffer[0]=trail;
+ cnv->UCharErrorBufferLength=1;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+ count=0;
+ c=0;
+ break;
+ } else {
+ /* unmatched lead surrogate, handle here for consistent toUBytes[] */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+
+ /* back out reading the code unit after it */
+ if(((const uint8_t *)pArgs->source-source)>=2) {
+ source-=2;
+ } else {
+ /*
+ * if the trail unit's first byte was in a previous buffer, then
+ * we need to put it into a special place because toUBytes[] will be
+ * used for the lead unit's bytes
+ */
+ cnv->toUnicodeStatus=0x100|p[2];
+ --source;
+ }
+ cnv->toULength=2;
+
+ /* write back the updated pointers */
+ pArgs->source=(const char *)source;
+ pArgs->target=target;
+ pArgs->offsets=offsets;
+ return;
+ }
+ }
+ } while(length>0);
+ cnv->toULength=(int8_t)count;
+ }
+
+ /* copy an even number of bytes for complete UChars */
+ count=2*targetCapacity;
+ if(count>length) {
+ count=length&~1;
+ }
+ if(c==0 && count>0) {
+ length-=count;
+ count>>=1;
+ targetCapacity-=count;
+ if(offsets==NULL) {
+ do {
+ c=((UChar)source[0]<<8)|source[1];
+ source+=2;
+ if(U16_IS_SINGLE(c)) {
+ *target++=c;
+ } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 &&
+ U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1])
+ ) {
+ source+=2;
+ --count;
+ *target++=c;
+ *target++=trail;
+ } else {
+ break;
+ }
+ } while(--count>0);
+ } else {
+ do {
+ c=((UChar)source[0]<<8)|source[1];
+ source+=2;
+ if(U16_IS_SINGLE(c)) {
+ *target++=c;
+ *offsets++=sourceIndex;
+ sourceIndex+=2;
+ } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 &&
+ U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1])
+ ) {
+ source+=2;
+ --count;
+ *target++=c;
+ *target++=trail;
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex;
+ sourceIndex+=4;
+ } else {
+ break;
+ }
+ } while(--count>0);
+ }
+
+ if(count==0) {
+ /* done with the loop for complete UChars */
+ c=0;
+ } else {
+ /* keep c for surrogate handling, trail will be set there */
+ length+=2*(count-1); /* one more byte pair was consumed than count decremented */
+ targetCapacity+=count;
+ }
+ }
+
+ if(c!=0) {
+ /*
+ * c is a surrogate, and
+ * - source or target too short
+ * - or the surrogate is unmatched
+ */
+ cnv->toUBytes[0]=(uint8_t)(c>>8);
+ cnv->toUBytes[1]=(uint8_t)c;
+ cnv->toULength=2;
+
+ if(U16_IS_SURROGATE_LEAD(c)) {
+ if(length>=2) {
+ if(U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1])) {
+ /* output the surrogate pair, will overflow (see conditions comment above) */
+ source+=2;
+ length-=2;
+ *target++=c;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ cnv->UCharErrorBuffer[0]=trail;
+ cnv->UCharErrorBufferLength=1;
+ cnv->toULength=0;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ } else {
+ /* unmatched lead surrogate */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ }
+ } else {
+ /* see if the trail surrogate is in the next buffer */
+ }
+ } else {
+ /* unmatched trail surrogate */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ }
+ }
+
+ if(U_SUCCESS(*pErrorCode)) {
+ /* check for a remaining source byte */
+ if(length>0) {
+ if(targetCapacity==0) {
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ } else {
+ /* it must be length==1 because otherwise the above would have copied more */
+ cnv->toUBytes[cnv->toULength++]=*source++;
+ }
+ }
+ }
+
+ /* write back the updated pointers */
+ pArgs->source=(const char *)source;
+ pArgs->target=target;
+ pArgs->offsets=offsets;
+}
+
+static UChar32 U_CALLCONV
+_UTF16BEGetNextUChar(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
+ const uint8_t *s, *sourceLimit;
+ UChar32 c;
+
+ if(pArgs->converter->mode<8) {
+ return UCNV_GET_NEXT_UCHAR_USE_TO_U;
+ }
+
+ s=(const uint8_t *)pArgs->source;
+ sourceLimit=(const uint8_t *)pArgs->sourceLimit;
+
+ if(s>=sourceLimit) {
+ /* no input */
+ *err=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0xffff;
+ }
+
+ if(s+2>sourceLimit) {
+ /* only one byte: truncated UChar */
+ pArgs->converter->toUBytes[0]=*s++;
+ pArgs->converter->toULength=1;
+ pArgs->source=(const char *)s;
+ *err = U_TRUNCATED_CHAR_FOUND;
+ return 0xffff;
+ }
+
+ /* get one UChar */
+ c=((UChar32)*s<<8)|s[1];
+ s+=2;
+
+ /* check for a surrogate pair */
+ if(U_IS_SURROGATE(c)) {
+ if(U16_IS_SURROGATE_LEAD(c)) {
+ if(s+2<=sourceLimit) {
+ UChar trail;
+
+ /* get a second UChar and see if it is a trail surrogate */
+ trail=((UChar)*s<<8)|s[1];
+ if(U16_IS_TRAIL(trail)) {
+ c=U16_GET_SUPPLEMENTARY(c, trail);
+ s+=2;
+ } else {
+ /* unmatched lead surrogate */
+ c=-2;
+ }
+ } else {
+ /* too few (2 or 3) bytes for a surrogate pair: truncated code point */
+ uint8_t *bytes=pArgs->converter->toUBytes;
+ s-=2;
+ pArgs->converter->toULength=(int8_t)(sourceLimit-s);
+ do {
+ *bytes++=*s++;
+ } while(s<sourceLimit);
+
+ c=0xffff;
+ *err=U_TRUNCATED_CHAR_FOUND;
+ }
+ } else {
+ /* unmatched trail surrogate */
+ c=-2;
+ }
+
+ if(c<0) {
+ /* write the unmatched surrogate */
+ uint8_t *bytes=pArgs->converter->toUBytes;
+ pArgs->converter->toULength=2;
+ *bytes=*(s-2);
+ bytes[1]=*(s-1);
+
+ c=0xffff;
+ *err=U_ILLEGAL_CHAR_FOUND;
+ }
+ }
+
+ pArgs->source=(const char *)s;
+ return c;
+}
+
+static void U_CALLCONV
+_UTF16BEReset(UConverter *cnv, UConverterResetChoice choice) {
+ if(choice<=UCNV_RESET_TO_UNICODE) {
+ /* reset toUnicode state */
+ if(UCNV_GET_VERSION(cnv)==0) {
+ cnv->mode=8; /* no BOM handling */
+ } else {
+ cnv->mode=0; /* Java-specific "UnicodeBig" requires BE BOM or no BOM */
+ }
+ }
+ if(choice!=UCNV_RESET_TO_UNICODE && UCNV_GET_VERSION(cnv)==1) {
+ /* reset fromUnicode for "UnicodeBig": prepare to output the UTF-16BE BOM */
+ cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM;
+ }
+}
+
+static void U_CALLCONV
+_UTF16BEOpen(UConverter *cnv,
+ UConverterLoadArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ (void)pArgs;
+ if(UCNV_GET_VERSION(cnv)<=1) {
+ _UTF16BEReset(cnv, UCNV_RESET_BOTH);
+ } else {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ }
+}
+
+static const char * U_CALLCONV
+_UTF16BEGetName(const UConverter *cnv) {
+ if(UCNV_GET_VERSION(cnv)==0) {
+ return "UTF-16BE";
+ } else {
+ return "UTF-16BE,version=1";
+ }
+}
+U_CDECL_END
+
+static const UConverterImpl _UTF16BEImpl={
+ UCNV_UTF16_BigEndian,
+
+ NULL,
+ NULL,
+
+ _UTF16BEOpen,
+ NULL,
+ _UTF16BEReset,
+
+ _UTF16BEToUnicodeWithOffsets,
+ _UTF16BEToUnicodeWithOffsets,
+ _UTF16BEFromUnicodeWithOffsets,
+ _UTF16BEFromUnicodeWithOffsets,
+ _UTF16BEGetNextUChar,
+
+ NULL,
+ _UTF16BEGetName,
+ NULL,
+ NULL,
+ ucnv_getNonSurrogateUnicodeSet,
+
+ NULL,
+ NULL
+};
+
+static const UConverterStaticData _UTF16BEStaticData={
+ sizeof(UConverterStaticData),
+ "UTF-16BE",
+ 1200, UCNV_IBM, UCNV_UTF16_BigEndian, 2, 2,
+ { 0xff, 0xfd, 0, 0 },2,FALSE,FALSE,
+ 0,
+ 0,
+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
+};
+
+
+const UConverterSharedData _UTF16BEData=
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16BEStaticData, &_UTF16BEImpl);
+
+/* UTF-16LE ----------------------------------------------------------------- */
+U_CDECL_BEGIN
+static void U_CALLCONV
+_UTF16LEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ const UChar *source;
+ char *target;
+ int32_t *offsets;
+
+ uint32_t targetCapacity, length, sourceIndex;
+ UChar c, trail;
+ char overflow[4];
+
+ source=pArgs->source;
+ length=(int32_t)(pArgs->sourceLimit-source);
+ if(length<=0) {
+ /* no input, nothing to do */
+ return;
+ }
+
+ cnv=pArgs->converter;
+
+ /* write the BOM if necessary */
+ if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
+ static const char bom[]={ (char)0xffu, (char)0xfeu };
+ ucnv_fromUWriteBytes(cnv,
+ bom, 2,
+ &pArgs->target, pArgs->targetLimit,
+ &pArgs->offsets, -1,
+ pErrorCode);
+ cnv->fromUnicodeStatus=0;
+ }
+
+ target=pArgs->target;
+ if(target >= pArgs->targetLimit) {
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ return;
+ }
+
+ targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target);
+ offsets=pArgs->offsets;
+ sourceIndex=0;
+
+ /* c!=0 indicates in several places outside the main loops that a surrogate was found */
+
+ if((c=(UChar)cnv->fromUChar32)!=0 && U16_IS_TRAIL(trail=*source) && targetCapacity>=4) {
+ /* the last buffer ended with a lead surrogate, output the surrogate pair */
+ ++source;
+ --length;
+ target[0]=(uint8_t)c;
+ target[1]=(uint8_t)(c>>8);
+ target[2]=(uint8_t)trail;
+ target[3]=(uint8_t)(trail>>8);
+ target+=4;
+ targetCapacity-=4;
+ if(offsets!=NULL) {
+ *offsets++=-1;
+ *offsets++=-1;
+ *offsets++=-1;
+ *offsets++=-1;
+ }
+ sourceIndex=1;
+ cnv->fromUChar32=c=0;
+ }
+
+ if(c==0) {
+ /* copy an even number of bytes for complete UChars */
+ uint32_t count=2*length;
+ if(count>targetCapacity) {
+ count=targetCapacity&~1;
+ }
+ /* count is even */
+ targetCapacity-=count;
+ count>>=1;
+ length-=count;
+
+ if(offsets==NULL) {
+ while(count>0) {
+ c=*source++;
+ if(U16_IS_SINGLE(c)) {
+ target[0]=(uint8_t)c;
+ target[1]=(uint8_t)(c>>8);
+ target+=2;
+ } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) {
+ ++source;
+ --count;
+ target[0]=(uint8_t)c;
+ target[1]=(uint8_t)(c>>8);
+ target[2]=(uint8_t)trail;
+ target[3]=(uint8_t)(trail>>8);
+ target+=4;
+ } else {
+ break;
+ }
+ --count;
+ }
+ } else {
+ while(count>0) {
+ c=*source++;
+ if(U16_IS_SINGLE(c)) {
+ target[0]=(uint8_t)c;
+ target[1]=(uint8_t)(c>>8);
+ target+=2;
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex++;
+ } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) {
+ ++source;
+ --count;
+ target[0]=(uint8_t)c;
+ target[1]=(uint8_t)(c>>8);
+ target[2]=(uint8_t)trail;
+ target[3]=(uint8_t)(trail>>8);
+ target+=4;
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex;
+ sourceIndex+=2;
+ } else {
+ break;
+ }
+ --count;
+ }
+ }
+
+ if(count==0) {
+ /* done with the loop for complete UChars */
+ if(length>0 && targetCapacity>0) {
+ /*
+ * there is more input and some target capacity -
+ * it must be targetCapacity==1 because otherwise
+ * the above would have copied more;
+ * prepare for overflow output
+ */
+ if(U16_IS_SINGLE(c=*source++)) {
+ overflow[0]=(char)c;
+ overflow[1]=(char)(c>>8);
+ length=2; /* 2 bytes to output */
+ c=0;
+ /* } else { keep c for surrogate handling, length will be set there */
+ }
+ } else {
+ length=0;
+ c=0;
+ }
+ } else {
+ /* keep c for surrogate handling, length will be set there */
+ targetCapacity+=2*count;
+ }
+ } else {
+ length=0; /* from here on, length counts the bytes in overflow[] */
+ }
+
+ if(c!=0) {
+ /*
+ * c is a surrogate, and
+ * - source or target too short
+ * - or the surrogate is unmatched
+ */
+ length=0;
+ if(U16_IS_SURROGATE_LEAD(c)) {
+ if(source<pArgs->sourceLimit) {
+ if(U16_IS_TRAIL(trail=*source)) {
+ /* output the surrogate pair, will overflow (see conditions comment above) */
+ ++source;
+ overflow[0]=(char)c;
+ overflow[1]=(char)(c>>8);
+ overflow[2]=(char)trail;
+ overflow[3]=(char)(trail>>8);
+ length=4; /* 4 bytes to output */
+ c=0;
+ } else {
+ /* unmatched lead surrogate */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ }
+ } else {
+ /* see if the trail surrogate is in the next buffer */
+ }
+ } else {
+ /* unmatched trail surrogate */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ }
+ cnv->fromUChar32=c;
+ }
+
+ if(length>0) {
+ /* output length bytes with overflow (length>targetCapacity>0) */
+ ucnv_fromUWriteBytes(cnv,
+ overflow, length,
+ &target, pArgs->targetLimit,
+ &offsets, sourceIndex,
+ pErrorCode);
+ targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target);
+ }
+
+ if(U_SUCCESS(*pErrorCode) && source<pArgs->sourceLimit && targetCapacity==0) {
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+
+ /* write back the updated pointers */
+ pArgs->source=source;
+ pArgs->target=target;
+ pArgs->offsets=offsets;
+}
+
+static void U_CALLCONV
+_UTF16LEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ const uint8_t *source;
+ UChar *target;
+ int32_t *offsets;
+
+ uint32_t targetCapacity, length, count, sourceIndex;
+ UChar c, trail;
+
+ if(pArgs->converter->mode<8) {
+ _UTF16ToUnicodeWithOffsets(pArgs, pErrorCode);
+ return;
+ }
+
+ cnv=pArgs->converter;
+ source=(const uint8_t *)pArgs->source;
+ length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
+ if(length<=0 && cnv->toUnicodeStatus==0) {
+ /* no input, nothing to do */
+ return;
+ }
+
+ target=pArgs->target;
+ if(target >= pArgs->targetLimit) {
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ return;
+ }
+
+ targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target);
+ offsets=pArgs->offsets;
+ sourceIndex=0;
+ c=0;
+
+ /* complete a partial UChar or pair from the last call */
+ if(cnv->toUnicodeStatus!=0) {
+ /*
+ * special case: single byte from a previous buffer,
+ * where the byte turned out not to belong to a trail surrogate
+ * and the preceding, unmatched lead surrogate was put into toUBytes[]
+ * for error handling
+ */
+ cnv->toUBytes[0]=(uint8_t)cnv->toUnicodeStatus;
+ cnv->toULength=1;
+ cnv->toUnicodeStatus=0;
+ }
+ if((count=cnv->toULength)!=0) {
+ uint8_t *p=cnv->toUBytes;
+ do {
+ p[count++]=*source++;
+ ++sourceIndex;
+ --length;
+ if(count==2) {
+ c=((UChar)p[1]<<8)|p[0];
+ if(U16_IS_SINGLE(c)) {
+ /* output the BMP code point */
+ *target++=c;
+ if(offsets!=NULL) {
+ *offsets++=-1;
+ }
+ --targetCapacity;
+ count=0;
+ c=0;
+ break;
+ } else if(U16_IS_SURROGATE_LEAD(c)) {
+ /* continue collecting bytes for the trail surrogate */
+ c=0; /* avoid unnecessary surrogate handling below */
+ } else {
+ /* fall through to error handling for an unmatched trail surrogate */
+ break;
+ }
+ } else if(count==4) {
+ c=((UChar)p[1]<<8)|p[0];
+ trail=((UChar)p[3]<<8)|p[2];
+ if(U16_IS_TRAIL(trail)) {
+ /* output the surrogate pair */
+ *target++=c;
+ if(targetCapacity>=2) {
+ *target++=trail;
+ if(offsets!=NULL) {
+ *offsets++=-1;
+ *offsets++=-1;
+ }
+ targetCapacity-=2;
+ } else /* targetCapacity==1 */ {
+ targetCapacity=0;
+ cnv->UCharErrorBuffer[0]=trail;
+ cnv->UCharErrorBufferLength=1;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+ count=0;
+ c=0;
+ break;
+ } else {
+ /* unmatched lead surrogate, handle here for consistent toUBytes[] */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+
+ /* back out reading the code unit after it */
+ if(((const uint8_t *)pArgs->source-source)>=2) {
+ source-=2;
+ } else {
+ /*
+ * if the trail unit's first byte was in a previous buffer, then
+ * we need to put it into a special place because toUBytes[] will be
+ * used for the lead unit's bytes
+ */
+ cnv->toUnicodeStatus=0x100|p[2];
+ --source;
+ }
+ cnv->toULength=2;
+
+ /* write back the updated pointers */
+ pArgs->source=(const char *)source;
+ pArgs->target=target;
+ pArgs->offsets=offsets;
+ return;
+ }
+ }
+ } while(length>0);
+ cnv->toULength=(int8_t)count;
+ }
+
+ /* copy an even number of bytes for complete UChars */
+ count=2*targetCapacity;
+ if(count>length) {
+ count=length&~1;
+ }
+ if(c==0 && count>0) {
+ length-=count;
+ count>>=1;
+ targetCapacity-=count;
+ if(offsets==NULL) {
+ do {
+ c=((UChar)source[1]<<8)|source[0];
+ source+=2;
+ if(U16_IS_SINGLE(c)) {
+ *target++=c;
+ } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 &&
+ U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0])
+ ) {
+ source+=2;
+ --count;
+ *target++=c;
+ *target++=trail;
+ } else {
+ break;
+ }
+ } while(--count>0);
+ } else {
+ do {
+ c=((UChar)source[1]<<8)|source[0];
+ source+=2;
+ if(U16_IS_SINGLE(c)) {
+ *target++=c;
+ *offsets++=sourceIndex;
+ sourceIndex+=2;
+ } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 &&
+ U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0])
+ ) {
+ source+=2;
+ --count;
+ *target++=c;
+ *target++=trail;
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex;
+ sourceIndex+=4;
+ } else {
+ break;
+ }
+ } while(--count>0);
+ }
+
+ if(count==0) {
+ /* done with the loop for complete UChars */
+ c=0;
+ } else {
+ /* keep c for surrogate handling, trail will be set there */
+ length+=2*(count-1); /* one more byte pair was consumed than count decremented */
+ targetCapacity+=count;
+ }
+ }
+
+ if(c!=0) {
+ /*
+ * c is a surrogate, and
+ * - source or target too short
+ * - or the surrogate is unmatched
+ */
+ cnv->toUBytes[0]=(uint8_t)c;
+ cnv->toUBytes[1]=(uint8_t)(c>>8);
+ cnv->toULength=2;
+
+ if(U16_IS_SURROGATE_LEAD(c)) {
+ if(length>=2) {
+ if(U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0])) {
+ /* output the surrogate pair, will overflow (see conditions comment above) */
+ source+=2;
+ length-=2;
+ *target++=c;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ cnv->UCharErrorBuffer[0]=trail;
+ cnv->UCharErrorBufferLength=1;
+ cnv->toULength=0;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ } else {
+ /* unmatched lead surrogate */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ }
+ } else {
+ /* see if the trail surrogate is in the next buffer */
+ }
+ } else {
+ /* unmatched trail surrogate */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ }
+ }
+
+ if(U_SUCCESS(*pErrorCode)) {
+ /* check for a remaining source byte */
+ if(length>0) {
+ if(targetCapacity==0) {
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ } else {
+ /* it must be length==1 because otherwise the above would have copied more */
+ cnv->toUBytes[cnv->toULength++]=*source++;
+ }
+ }
+ }
+
+ /* write back the updated pointers */
+ pArgs->source=(const char *)source;
+ pArgs->target=target;
+ pArgs->offsets=offsets;
+}
+
+static UChar32 U_CALLCONV
+_UTF16LEGetNextUChar(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
+ const uint8_t *s, *sourceLimit;
+ UChar32 c;
+
+ if(pArgs->converter->mode<8) {
+ return UCNV_GET_NEXT_UCHAR_USE_TO_U;
+ }
+
+ s=(const uint8_t *)pArgs->source;
+ sourceLimit=(const uint8_t *)pArgs->sourceLimit;
+
+ if(s>=sourceLimit) {
+ /* no input */
+ *err=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0xffff;
+ }
+
+ if(s+2>sourceLimit) {
+ /* only one byte: truncated UChar */
+ pArgs->converter->toUBytes[0]=*s++;
+ pArgs->converter->toULength=1;
+ pArgs->source=(const char *)s;
+ *err = U_TRUNCATED_CHAR_FOUND;
+ return 0xffff;
+ }
+
+ /* get one UChar */
+ c=((UChar32)s[1]<<8)|*s;
+ s+=2;
+
+ /* check for a surrogate pair */
+ if(U_IS_SURROGATE(c)) {
+ if(U16_IS_SURROGATE_LEAD(c)) {
+ if(s+2<=sourceLimit) {
+ UChar trail;
+
+ /* get a second UChar and see if it is a trail surrogate */
+ trail=((UChar)s[1]<<8)|*s;
+ if(U16_IS_TRAIL(trail)) {
+ c=U16_GET_SUPPLEMENTARY(c, trail);
+ s+=2;
+ } else {
+ /* unmatched lead surrogate */
+ c=-2;
+ }
+ } else {
+ /* too few (2 or 3) bytes for a surrogate pair: truncated code point */
+ uint8_t *bytes=pArgs->converter->toUBytes;
+ s-=2;
+ pArgs->converter->toULength=(int8_t)(sourceLimit-s);
+ do {
+ *bytes++=*s++;
+ } while(s<sourceLimit);
+
+ c=0xffff;
+ *err=U_TRUNCATED_CHAR_FOUND;
+ }
+ } else {
+ /* unmatched trail surrogate */
+ c=-2;
+ }
+
+ if(c<0) {
+ /* write the unmatched surrogate */
+ uint8_t *bytes=pArgs->converter->toUBytes;
+ pArgs->converter->toULength=2;
+ *bytes=*(s-2);
+ bytes[1]=*(s-1);
+
+ c=0xffff;
+ *err=U_ILLEGAL_CHAR_FOUND;
+ }
+ }
+
+ pArgs->source=(const char *)s;
+ return c;
+}
+
+static void U_CALLCONV
+_UTF16LEReset(UConverter *cnv, UConverterResetChoice choice) {
+ if(choice<=UCNV_RESET_TO_UNICODE) {
+ /* reset toUnicode state */
+ if(UCNV_GET_VERSION(cnv)==0) {
+ cnv->mode=8; /* no BOM handling */
+ } else {
+ cnv->mode=0; /* Java-specific "UnicodeLittle" requires LE BOM or no BOM */
+ }
+ }
+ if(choice!=UCNV_RESET_TO_UNICODE && UCNV_GET_VERSION(cnv)==1) {
+ /* reset fromUnicode for "UnicodeLittle": prepare to output the UTF-16LE BOM */
+ cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM;
+ }
+}
+
+static void U_CALLCONV
+_UTF16LEOpen(UConverter *cnv,
+ UConverterLoadArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ (void)pArgs;
+ if(UCNV_GET_VERSION(cnv)<=1) {
+ _UTF16LEReset(cnv, UCNV_RESET_BOTH);
+ } else {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ }
+}
+
+static const char * U_CALLCONV
+_UTF16LEGetName(const UConverter *cnv) {
+ if(UCNV_GET_VERSION(cnv)==0) {
+ return "UTF-16LE";
+ } else {
+ return "UTF-16LE,version=1";
+ }
+}
+U_CDECL_END
+
+static const UConverterImpl _UTF16LEImpl={
+ UCNV_UTF16_LittleEndian,
+
+ NULL,
+ NULL,
+
+ _UTF16LEOpen,
+ NULL,
+ _UTF16LEReset,
+
+ _UTF16LEToUnicodeWithOffsets,
+ _UTF16LEToUnicodeWithOffsets,
+ _UTF16LEFromUnicodeWithOffsets,
+ _UTF16LEFromUnicodeWithOffsets,
+ _UTF16LEGetNextUChar,
+
+ NULL,
+ _UTF16LEGetName,
+ NULL,
+ NULL,
+ ucnv_getNonSurrogateUnicodeSet,
+
+ NULL,
+ NULL
+};
+
+
+static const UConverterStaticData _UTF16LEStaticData={
+ sizeof(UConverterStaticData),
+ "UTF-16LE",
+ 1202, UCNV_IBM, UCNV_UTF16_LittleEndian, 2, 2,
+ { 0xfd, 0xff, 0, 0 },2,FALSE,FALSE,
+ 0,
+ 0,
+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
+};
+
+
+const UConverterSharedData _UTF16LEData=
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16LEStaticData, &_UTF16LEImpl);
+
+/* UTF-16 (Detect BOM) ------------------------------------------------------ */
+
+/*
+ * Detect a BOM at the beginning of the stream and select UTF-16BE or UTF-16LE
+ * accordingly.
+ * This is a simpler version of the UTF-32 converter, with
+ * fewer states for shorter BOMs.
+ *
+ * State values:
+ * 0 initial state
+ * 1 saw first byte
+ * 2..5 -
+ * 6..7 see _UTF16ToUnicodeWithOffsets() comments in state 1
+ * 8 UTF-16BE mode
+ * 9 UTF-16LE mode
+ *
+ * During detection: state==number of initial bytes seen so far.
+ *
+ * On output, emit U+FEFF as the first code point.
+ *
+ * Variants:
+ * - UTF-16,version=1 (Java "Unicode" encoding) treats a missing BOM as an error.
+ * - UTF-16BE,version=1 (Java "UnicodeBig" encoding) and
+ * UTF-16LE,version=1 (Java "UnicodeLittle" encoding) treat a reverse BOM as an error.
+ */
+U_CDECL_BEGIN
+static void U_CALLCONV
+_UTF16Reset(UConverter *cnv, UConverterResetChoice choice) {
+ if(choice<=UCNV_RESET_TO_UNICODE) {
+ /* reset toUnicode: state=0 */
+ cnv->mode=0;
+ }
+ if(choice!=UCNV_RESET_TO_UNICODE) {
+ /* reset fromUnicode: prepare to output the UTF-16PE BOM */
+ cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM;
+ }
+}
+U_CDECL_END
+extern const UConverterSharedData _UTF16v2Data;
+U_CDECL_BEGIN
+static void U_CALLCONV
+_UTF16Open(UConverter *cnv,
+ UConverterLoadArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ if(UCNV_GET_VERSION(cnv)<=2) {
+ if(UCNV_GET_VERSION(cnv)==2 && !pArgs->onlyTestIsLoadable) {
+ /*
+ * Switch implementation, and switch the staticData that's different
+ * and was copied into the UConverter.
+ * (See ucnv_createConverterFromSharedData() in ucnv_bld.c.)
+ * UTF-16,version=2 fromUnicode() always writes a big-endian byte stream.
+ */
+ cnv->sharedData=(UConverterSharedData*)&_UTF16v2Data;
+ uprv_memcpy(cnv->subChars, _UTF16v2Data.staticData->subChar, UCNV_MAX_SUBCHAR_LEN);
+ }
+ _UTF16Reset(cnv, UCNV_RESET_BOTH);
+ } else {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ }
+}
+
+static const char * U_CALLCONV
+_UTF16GetName(const UConverter *cnv) {
+ if(UCNV_GET_VERSION(cnv)==0) {
+ return "UTF-16";
+ } else if(UCNV_GET_VERSION(cnv)==1) {
+ return "UTF-16,version=1";
+ } else {
+ return "UTF-16,version=2";
+ }
+}
+U_CDECL_END
+extern const UConverterSharedData _UTF16Data;
+
+static inline bool IS_UTF16BE(const UConverter *cnv) {
+ return ((cnv)->sharedData == &_UTF16BEData);
+}
+
+static inline bool IS_UTF16LE(const UConverter *cnv) {
+ return ((cnv)->sharedData == &_UTF16LEData);
+}
+
+static inline bool IS_UTF16(const UConverter *cnv) {
+ return ((cnv)->sharedData==&_UTF16Data) || ((cnv)->sharedData == &_UTF16v2Data);
+}
+
+U_CDECL_BEGIN
+static void U_CALLCONV
+_UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv=pArgs->converter;
+ const char *source=pArgs->source;
+ const char *sourceLimit=pArgs->sourceLimit;
+ int32_t *offsets=pArgs->offsets;
+
+ int32_t state, offsetDelta;
+ uint8_t b;
+
+ state=cnv->mode;
+
+ /*
+ * If we detect a BOM in this buffer, then we must add the BOM size to the
+ * offsets because the actual converter function will not see and count the BOM.
+ * offsetDelta will have the number of the BOM bytes that are in the current buffer.
+ */
+ offsetDelta=0;
+
+ while(source<sourceLimit && U_SUCCESS(*pErrorCode)) {
+ switch(state) {
+ case 0:
+ cnv->toUBytes[0]=(uint8_t)*source++;
+ cnv->toULength=1;
+ state=1;
+ break;
+ case 1:
+ /*
+ * Only inside this switch case can the state variable
+ * temporarily take two additional values:
+ * 6: BOM error, continue with BE
+ * 7: BOM error, continue with LE
+ */
+ b=*source;
+ if(cnv->toUBytes[0]==0xfe && b==0xff) {
+ if(IS_UTF16LE(cnv)) {
+ state=7; /* illegal reverse BOM for Java "UnicodeLittle" */
+ } else {
+ state=8; /* detect UTF-16BE */
+ }
+ } else if(cnv->toUBytes[0]==0xff && b==0xfe) {
+ if(IS_UTF16BE(cnv)) {
+ state=6; /* illegal reverse BOM for Java "UnicodeBig" */
+ } else {
+ state=9; /* detect UTF-16LE */
+ }
+ } else if((IS_UTF16(cnv) && UCNV_GET_VERSION(cnv)==1)) {
+ state=6; /* illegal missing BOM for Java "Unicode" */
+ }
+ if(state>=8) {
+ /* BOM detected, consume it */
+ ++source;
+ cnv->toULength=0;
+ offsetDelta=(int32_t)(source-pArgs->source);
+ } else if(state<6) {
+ /* ok: no BOM, and not a reverse BOM */
+ if(source!=pArgs->source) {
+ /* reset the source for a correct first offset */
+ source=pArgs->source;
+ cnv->toULength=0;
+ }
+ if(IS_UTF16LE(cnv)) {
+ /* Make Java "UnicodeLittle" default to LE. */
+ state=9;
+ } else {
+ /* Make standard UTF-16 and Java "UnicodeBig" default to BE. */
+ state=8;
+ }
+ } else {
+ /*
+ * error: missing BOM, or reverse BOM
+ * UTF-16,version=1: Java-specific "Unicode" requires a BOM.
+ * UTF-16BE,version=1: Java-specific "UnicodeBig" requires a BE BOM or no BOM.
+ * UTF-16LE,version=1: Java-specific "UnicodeLittle" requires an LE BOM or no BOM.
+ */
+ /* report the non-BOM or reverse BOM as an illegal sequence */
+ cnv->toUBytes[1]=b;
+ cnv->toULength=2;
+ pArgs->source=source+1;
+ /* continue with conversion if the callback resets the error */
+ /*
+ * Make Java "Unicode" default to BE like standard UTF-16.
+ * Make Java "UnicodeBig" and "UnicodeLittle" default
+ * to their normal endiannesses.
+ */
+ cnv->mode=state+2;
+ *pErrorCode=U_ILLEGAL_ESCAPE_SEQUENCE;
+ return;
+ }
+ /* convert the rest of the stream */
+ cnv->mode=state;
+ continue;
+ case 8:
+ /* call UTF-16BE */
+ pArgs->source=source;
+ _UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode);
+ source=pArgs->source;
+ break;
+ case 9:
+ /* call UTF-16LE */
+ pArgs->source=source;
+ _UTF16LEToUnicodeWithOffsets(pArgs, pErrorCode);
+ source=pArgs->source;
+ break;
+ default:
+ break; /* does not occur */
+ }
+ }
+
+ /* add BOM size to offsets - see comment at offsetDelta declaration */
+ if(offsets!=NULL && offsetDelta!=0) {
+ int32_t *offsetsLimit=pArgs->offsets;
+ while(offsets<offsetsLimit) {
+ *offsets++ += offsetDelta;
+ }
+ }
+
+ pArgs->source=source;
+
+ if(source==sourceLimit && pArgs->flush) {
+ /* handle truncated input */
+ switch(state) {
+ case 0:
+ break; /* no input at all, nothing to do */
+ case 8:
+ _UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode);
+ break;
+ case 9:
+ _UTF16LEToUnicodeWithOffsets(pArgs, pErrorCode);
+ break;
+ default:
+ /* 0<state<8: framework will report truncation, nothing to do here */
+ break;
+ }
+ }
+
+ cnv->mode=state;
+}
+
+static UChar32 U_CALLCONV
+_UTF16GetNextUChar(UConverterToUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ switch(pArgs->converter->mode) {
+ case 8:
+ return _UTF16BEGetNextUChar(pArgs, pErrorCode);
+ case 9:
+ return _UTF16LEGetNextUChar(pArgs, pErrorCode);
+ default:
+ return UCNV_GET_NEXT_UCHAR_USE_TO_U;
+ }
+}
+U_CDECL_END
+
+static const UConverterImpl _UTF16Impl = {
+ UCNV_UTF16,
+
+ NULL,
+ NULL,
+
+ _UTF16Open,
+ NULL,
+ _UTF16Reset,
+
+ _UTF16ToUnicodeWithOffsets,
+ _UTF16ToUnicodeWithOffsets,
+ _UTF16PEFromUnicodeWithOffsets,
+ _UTF16PEFromUnicodeWithOffsets,
+ _UTF16GetNextUChar,
+
+ NULL, /* ### TODO implement getStarters for all Unicode encodings?! */
+ _UTF16GetName,
+ NULL,
+ NULL,
+ ucnv_getNonSurrogateUnicodeSet,
+
+ NULL,
+ NULL
+};
+
+static const UConverterStaticData _UTF16StaticData = {
+ sizeof(UConverterStaticData),
+ "UTF-16",
+ 1204, /* CCSID for BOM sensitive UTF-16 */
+ UCNV_IBM, UCNV_UTF16, 2, 2,
+#if U_IS_BIG_ENDIAN
+ { 0xff, 0xfd, 0, 0 }, 2,
+#else
+ { 0xfd, 0xff, 0, 0 }, 2,
+#endif
+ FALSE, FALSE,
+ 0,
+ 0,
+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
+};
+
+const UConverterSharedData _UTF16Data =
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16StaticData, &_UTF16Impl);
+
+static const UConverterImpl _UTF16v2Impl = {
+ UCNV_UTF16,
+
+ NULL,
+ NULL,
+
+ _UTF16Open,
+ NULL,
+ _UTF16Reset,
+
+ _UTF16ToUnicodeWithOffsets,
+ _UTF16ToUnicodeWithOffsets,
+ _UTF16BEFromUnicodeWithOffsets,
+ _UTF16BEFromUnicodeWithOffsets,
+ _UTF16GetNextUChar,
+
+ NULL, /* ### TODO implement getStarters for all Unicode encodings?! */
+ _UTF16GetName,
+ NULL,
+ NULL,
+ ucnv_getNonSurrogateUnicodeSet,
+
+ NULL,
+ NULL
+};
+
+static const UConverterStaticData _UTF16v2StaticData = {
+ sizeof(UConverterStaticData),
+ "UTF-16,version=2",
+ 1204, /* CCSID for BOM sensitive UTF-16 */
+ UCNV_IBM, UCNV_UTF16, 2, 2,
+ { 0xff, 0xfd, 0, 0 }, 2,
+ FALSE, FALSE,
+ 0,
+ 0,
+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
+};
+
+const UConverterSharedData _UTF16v2Data =
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16v2StaticData, &_UTF16v2Impl);
+
+#endif
diff --git a/thirdparty/icu4c/common/ucnv_u32.cpp b/thirdparty/icu4c/common/ucnv_u32.cpp
new file mode 100644
index 0000000000..9f98914b9d
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnv_u32.cpp
@@ -0,0 +1,1253 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2002-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* file name: ucnv_u32.c
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002jul01
+* created by: Markus W. Scherer
+*
+* UTF-32 converter implementation. Used to be in ucnv_utf.c.
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
+
+#include "unicode/ucnv.h"
+#include "unicode/utf.h"
+#include "ucnv_bld.h"
+#include "ucnv_cnv.h"
+#include "cmemory.h"
+
+#define MAXIMUM_UCS2 0x0000FFFF
+#define MAXIMUM_UTF 0x0010FFFF
+#define HALF_SHIFT 10
+#define HALF_BASE 0x0010000
+#define HALF_MASK 0x3FF
+#define SURROGATE_HIGH_START 0xD800
+#define SURROGATE_LOW_START 0xDC00
+
+/* -SURROGATE_LOW_START + HALF_BASE */
+#define SURROGATE_LOW_BASE 9216
+
+enum {
+ UCNV_NEED_TO_WRITE_BOM=1
+};
+
+/* UTF-32BE ----------------------------------------------------------------- */
+U_CDECL_BEGIN
+static void U_CALLCONV
+T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args,
+ UErrorCode * err)
+{
+ const unsigned char *mySource = (unsigned char *) args->source;
+ UChar *myTarget = args->target;
+ const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
+ const UChar *targetLimit = args->targetLimit;
+ unsigned char *toUBytes = args->converter->toUBytes;
+ uint32_t ch, i;
+
+ /* Restore state of current sequence */
+ if (args->converter->toULength > 0 && myTarget < targetLimit) {
+ i = args->converter->toULength; /* restore # of bytes consumed */
+ args->converter->toULength = 0;
+
+ ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/
+ args->converter->toUnicodeStatus = 0;
+ goto morebytes;
+ }
+
+ while (mySource < sourceLimit && myTarget < targetLimit) {
+ i = 0;
+ ch = 0;
+morebytes:
+ while (i < sizeof(uint32_t)) {
+ if (mySource < sourceLimit) {
+ ch = (ch << 8) | (uint8_t)(*mySource);
+ toUBytes[i++] = (char) *(mySource++);
+ }
+ else {
+ /* stores a partially calculated target*/
+ /* + 1 to make 0 a valid character */
+ args->converter->toUnicodeStatus = ch + 1;
+ args->converter->toULength = (int8_t) i;
+ goto donefornow;
+ }
+ }
+
+ if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) {
+ /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
+ if (ch <= MAXIMUM_UCS2)
+ {
+ /* fits in 16 bits */
+ *(myTarget++) = (UChar) ch;
+ }
+ else {
+ /* write out the surrogates */
+ *(myTarget++) = U16_LEAD(ch);
+ ch = U16_TRAIL(ch);
+ if (myTarget < targetLimit) {
+ *(myTarget++) = (UChar)ch;
+ }
+ else {
+ /* Put in overflow buffer (not handled here) */
+ args->converter->UCharErrorBuffer[0] = (UChar) ch;
+ args->converter->UCharErrorBufferLength = 1;
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+ }
+ else {
+ args->converter->toULength = (int8_t)i;
+ *err = U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ }
+
+donefornow:
+ if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) {
+ /* End of target buffer */
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+
+ args->target = myTarget;
+ args->source = (const char *) mySource;
+}
+
+static void U_CALLCONV
+T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(UConverterToUnicodeArgs * args,
+ UErrorCode * err)
+{
+ const unsigned char *mySource = (unsigned char *) args->source;
+ UChar *myTarget = args->target;
+ int32_t *myOffsets = args->offsets;
+ const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
+ const UChar *targetLimit = args->targetLimit;
+ unsigned char *toUBytes = args->converter->toUBytes;
+ uint32_t ch, i;
+ int32_t offsetNum = 0;
+
+ /* Restore state of current sequence */
+ if (args->converter->toULength > 0 && myTarget < targetLimit) {
+ i = args->converter->toULength; /* restore # of bytes consumed */
+ args->converter->toULength = 0;
+
+ ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/
+ args->converter->toUnicodeStatus = 0;
+ goto morebytes;
+ }
+
+ while (mySource < sourceLimit && myTarget < targetLimit) {
+ i = 0;
+ ch = 0;
+morebytes:
+ while (i < sizeof(uint32_t)) {
+ if (mySource < sourceLimit) {
+ ch = (ch << 8) | (uint8_t)(*mySource);
+ toUBytes[i++] = (char) *(mySource++);
+ }
+ else {
+ /* stores a partially calculated target*/
+ /* + 1 to make 0 a valid character */
+ args->converter->toUnicodeStatus = ch + 1;
+ args->converter->toULength = (int8_t) i;
+ goto donefornow;
+ }
+ }
+
+ if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) {
+ /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
+ if (ch <= MAXIMUM_UCS2) {
+ /* fits in 16 bits */
+ *(myTarget++) = (UChar) ch;
+ *(myOffsets++) = offsetNum;
+ }
+ else {
+ /* write out the surrogates */
+ *(myTarget++) = U16_LEAD(ch);
+ *myOffsets++ = offsetNum;
+ ch = U16_TRAIL(ch);
+ if (myTarget < targetLimit)
+ {
+ *(myTarget++) = (UChar)ch;
+ *(myOffsets++) = offsetNum;
+ }
+ else {
+ /* Put in overflow buffer (not handled here) */
+ args->converter->UCharErrorBuffer[0] = (UChar) ch;
+ args->converter->UCharErrorBufferLength = 1;
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+ }
+ else {
+ args->converter->toULength = (int8_t)i;
+ *err = U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ offsetNum += i;
+ }
+
+donefornow:
+ if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
+ {
+ /* End of target buffer */
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+
+ args->target = myTarget;
+ args->source = (const char *) mySource;
+ args->offsets = myOffsets;
+}
+
+static void U_CALLCONV
+T_UConverter_fromUnicode_UTF32_BE(UConverterFromUnicodeArgs * args,
+ UErrorCode * err)
+{
+ const UChar *mySource = args->source;
+ unsigned char *myTarget;
+ const UChar *sourceLimit = args->sourceLimit;
+ const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
+ UChar32 ch, ch2;
+ unsigned int indexToWrite;
+ unsigned char temp[sizeof(uint32_t)];
+
+ if(mySource >= sourceLimit) {
+ /* no input, nothing to do */
+ return;
+ }
+
+ /* write the BOM if necessary */
+ if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
+ static const char bom[]={ 0, 0, (char)0xfeu, (char)0xffu };
+ ucnv_fromUWriteBytes(args->converter,
+ bom, 4,
+ &args->target, args->targetLimit,
+ &args->offsets, -1,
+ err);
+ args->converter->fromUnicodeStatus=0;
+ }
+
+ myTarget = (unsigned char *) args->target;
+ temp[0] = 0;
+
+ if (args->converter->fromUChar32) {
+ ch = args->converter->fromUChar32;
+ args->converter->fromUChar32 = 0;
+ goto lowsurogate;
+ }
+
+ while (mySource < sourceLimit && myTarget < targetLimit) {
+ ch = *(mySource++);
+
+ if (U_IS_SURROGATE(ch)) {
+ if (U_IS_LEAD(ch)) {
+lowsurogate:
+ if (mySource < sourceLimit) {
+ ch2 = *mySource;
+ if (U_IS_TRAIL(ch2)) {
+ ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
+ mySource++;
+ }
+ else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ args->converter->fromUChar32 = ch;
+ *err = U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ }
+ else {
+ /* ran out of source */
+ args->converter->fromUChar32 = ch;
+ if (args->flush) {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ *err = U_ILLEGAL_CHAR_FOUND;
+ }
+ break;
+ }
+ }
+ else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ args->converter->fromUChar32 = ch;
+ *err = U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ }
+
+ /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
+ temp[1] = (uint8_t) (ch >> 16 & 0x1F);
+ temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */
+ temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */
+
+ for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) {
+ if (myTarget < targetLimit) {
+ *(myTarget++) = temp[indexToWrite];
+ }
+ else {
+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+ }
+ }
+
+ if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) {
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+
+ args->target = (char *) myTarget;
+ args->source = mySource;
+}
+
+static void U_CALLCONV
+T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,
+ UErrorCode * err)
+{
+ const UChar *mySource = args->source;
+ unsigned char *myTarget;
+ int32_t *myOffsets;
+ const UChar *sourceLimit = args->sourceLimit;
+ const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
+ UChar32 ch, ch2;
+ int32_t offsetNum = 0;
+ unsigned int indexToWrite;
+ unsigned char temp[sizeof(uint32_t)];
+
+ if(mySource >= sourceLimit) {
+ /* no input, nothing to do */
+ return;
+ }
+
+ /* write the BOM if necessary */
+ if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
+ static const char bom[]={ 0, 0, (char)0xfeu, (char)0xffu };
+ ucnv_fromUWriteBytes(args->converter,
+ bom, 4,
+ &args->target, args->targetLimit,
+ &args->offsets, -1,
+ err);
+ args->converter->fromUnicodeStatus=0;
+ }
+
+ myTarget = (unsigned char *) args->target;
+ myOffsets = args->offsets;
+ temp[0] = 0;
+
+ if (args->converter->fromUChar32) {
+ ch = args->converter->fromUChar32;
+ args->converter->fromUChar32 = 0;
+ goto lowsurogate;
+ }
+
+ while (mySource < sourceLimit && myTarget < targetLimit) {
+ ch = *(mySource++);
+
+ if (U_IS_SURROGATE(ch)) {
+ if (U_IS_LEAD(ch)) {
+lowsurogate:
+ if (mySource < sourceLimit) {
+ ch2 = *mySource;
+ if (U_IS_TRAIL(ch2)) {
+ ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
+ mySource++;
+ }
+ else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ args->converter->fromUChar32 = ch;
+ *err = U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ }
+ else {
+ /* ran out of source */
+ args->converter->fromUChar32 = ch;
+ if (args->flush) {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ *err = U_ILLEGAL_CHAR_FOUND;
+ }
+ break;
+ }
+ }
+ else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ args->converter->fromUChar32 = ch;
+ *err = U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ }
+
+ /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
+ temp[1] = (uint8_t) (ch >> 16 & 0x1F);
+ temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */
+ temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */
+
+ for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) {
+ if (myTarget < targetLimit) {
+ *(myTarget++) = temp[indexToWrite];
+ *(myOffsets++) = offsetNum;
+ }
+ else {
+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+ }
+ offsetNum = offsetNum + 1 + (temp[1] != 0);
+ }
+
+ if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) {
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+
+ args->target = (char *) myTarget;
+ args->source = mySource;
+ args->offsets = myOffsets;
+}
+
+static UChar32 U_CALLCONV
+T_UConverter_getNextUChar_UTF32_BE(UConverterToUnicodeArgs* args,
+ UErrorCode* err)
+{
+ const uint8_t *mySource;
+ UChar32 myUChar;
+ int32_t length;
+
+ mySource = (const uint8_t *)args->source;
+ if (mySource >= (const uint8_t *)args->sourceLimit)
+ {
+ /* no input */
+ *err = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0xffff;
+ }
+
+ length = (int32_t)((const uint8_t *)args->sourceLimit - mySource);
+ if (length < 4)
+ {
+ /* got a partial character */
+ uprv_memcpy(args->converter->toUBytes, mySource, length);
+ args->converter->toULength = (int8_t)length;
+ args->source = (const char *)(mySource + length);
+ *err = U_TRUNCATED_CHAR_FOUND;
+ return 0xffff;
+ }
+
+ /* Don't even try to do a direct cast because the value may be on an odd address. */
+ myUChar = ((UChar32)mySource[0] << 24)
+ | ((UChar32)mySource[1] << 16)
+ | ((UChar32)mySource[2] << 8)
+ | ((UChar32)mySource[3]);
+
+ args->source = (const char *)(mySource + 4);
+ if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) {
+ return myUChar;
+ }
+
+ uprv_memcpy(args->converter->toUBytes, mySource, 4);
+ args->converter->toULength = 4;
+
+ *err = U_ILLEGAL_CHAR_FOUND;
+ return 0xffff;
+}
+U_CDECL_END
+static const UConverterImpl _UTF32BEImpl = {
+ UCNV_UTF32_BigEndian,
+
+ NULL,
+ NULL,
+
+ NULL,
+ NULL,
+ NULL,
+
+ T_UConverter_toUnicode_UTF32_BE,
+ T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC,
+ T_UConverter_fromUnicode_UTF32_BE,
+ T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC,
+ T_UConverter_getNextUChar_UTF32_BE,
+
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ ucnv_getNonSurrogateUnicodeSet,
+
+ NULL,
+ NULL
+};
+
+/* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */
+static const UConverterStaticData _UTF32BEStaticData = {
+ sizeof(UConverterStaticData),
+ "UTF-32BE",
+ 1232,
+ UCNV_IBM, UCNV_UTF32_BigEndian, 4, 4,
+ { 0, 0, 0xff, 0xfd }, 4, FALSE, FALSE,
+ 0,
+ 0,
+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
+};
+
+const UConverterSharedData _UTF32BEData =
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32BEStaticData, &_UTF32BEImpl);
+
+/* UTF-32LE ---------------------------------------------------------- */
+U_CDECL_BEGIN
+static void U_CALLCONV
+T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args,
+ UErrorCode * err)
+{
+ const unsigned char *mySource = (unsigned char *) args->source;
+ UChar *myTarget = args->target;
+ const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
+ const UChar *targetLimit = args->targetLimit;
+ unsigned char *toUBytes = args->converter->toUBytes;
+ uint32_t ch, i;
+
+ /* Restore state of current sequence */
+ if (args->converter->toULength > 0 && myTarget < targetLimit)
+ {
+ i = args->converter->toULength; /* restore # of bytes consumed */
+ args->converter->toULength = 0;
+
+ /* Stores the previously calculated ch from a previous call*/
+ ch = args->converter->toUnicodeStatus - 1;
+ args->converter->toUnicodeStatus = 0;
+ goto morebytes;
+ }
+
+ while (mySource < sourceLimit && myTarget < targetLimit)
+ {
+ i = 0;
+ ch = 0;
+morebytes:
+ while (i < sizeof(uint32_t))
+ {
+ if (mySource < sourceLimit)
+ {
+ ch |= ((uint8_t)(*mySource)) << (i * 8);
+ toUBytes[i++] = (char) *(mySource++);
+ }
+ else
+ {
+ /* stores a partially calculated target*/
+ /* + 1 to make 0 a valid character */
+ args->converter->toUnicodeStatus = ch + 1;
+ args->converter->toULength = (int8_t) i;
+ goto donefornow;
+ }
+ }
+
+ if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) {
+ /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
+ if (ch <= MAXIMUM_UCS2) {
+ /* fits in 16 bits */
+ *(myTarget++) = (UChar) ch;
+ }
+ else {
+ /* write out the surrogates */
+ *(myTarget++) = U16_LEAD(ch);
+ ch = U16_TRAIL(ch);
+ if (myTarget < targetLimit) {
+ *(myTarget++) = (UChar)ch;
+ }
+ else {
+ /* Put in overflow buffer (not handled here) */
+ args->converter->UCharErrorBuffer[0] = (UChar) ch;
+ args->converter->UCharErrorBufferLength = 1;
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+ }
+ else {
+ args->converter->toULength = (int8_t)i;
+ *err = U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ }
+
+donefornow:
+ if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
+ {
+ /* End of target buffer */
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+
+ args->target = myTarget;
+ args->source = (const char *) mySource;
+}
+
+static void U_CALLCONV
+T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(UConverterToUnicodeArgs * args,
+ UErrorCode * err)
+{
+ const unsigned char *mySource = (unsigned char *) args->source;
+ UChar *myTarget = args->target;
+ int32_t *myOffsets = args->offsets;
+ const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
+ const UChar *targetLimit = args->targetLimit;
+ unsigned char *toUBytes = args->converter->toUBytes;
+ uint32_t ch, i;
+ int32_t offsetNum = 0;
+
+ /* Restore state of current sequence */
+ if (args->converter->toULength > 0 && myTarget < targetLimit)
+ {
+ i = args->converter->toULength; /* restore # of bytes consumed */
+ args->converter->toULength = 0;
+
+ /* Stores the previously calculated ch from a previous call*/
+ ch = args->converter->toUnicodeStatus - 1;
+ args->converter->toUnicodeStatus = 0;
+ goto morebytes;
+ }
+
+ while (mySource < sourceLimit && myTarget < targetLimit)
+ {
+ i = 0;
+ ch = 0;
+morebytes:
+ while (i < sizeof(uint32_t))
+ {
+ if (mySource < sourceLimit)
+ {
+ ch |= ((uint8_t)(*mySource)) << (i * 8);
+ toUBytes[i++] = (char) *(mySource++);
+ }
+ else
+ {
+ /* stores a partially calculated target*/
+ /* + 1 to make 0 a valid character */
+ args->converter->toUnicodeStatus = ch + 1;
+ args->converter->toULength = (int8_t) i;
+ goto donefornow;
+ }
+ }
+
+ if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch))
+ {
+ /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
+ if (ch <= MAXIMUM_UCS2)
+ {
+ /* fits in 16 bits */
+ *(myTarget++) = (UChar) ch;
+ *(myOffsets++) = offsetNum;
+ }
+ else {
+ /* write out the surrogates */
+ *(myTarget++) = U16_LEAD(ch);
+ *(myOffsets++) = offsetNum;
+ ch = U16_TRAIL(ch);
+ if (myTarget < targetLimit)
+ {
+ *(myTarget++) = (UChar)ch;
+ *(myOffsets++) = offsetNum;
+ }
+ else
+ {
+ /* Put in overflow buffer (not handled here) */
+ args->converter->UCharErrorBuffer[0] = (UChar) ch;
+ args->converter->UCharErrorBufferLength = 1;
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+ }
+ else
+ {
+ args->converter->toULength = (int8_t)i;
+ *err = U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ offsetNum += i;
+ }
+
+donefornow:
+ if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
+ {
+ /* End of target buffer */
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+
+ args->target = myTarget;
+ args->source = (const char *) mySource;
+ args->offsets = myOffsets;
+}
+
+static void U_CALLCONV
+T_UConverter_fromUnicode_UTF32_LE(UConverterFromUnicodeArgs * args,
+ UErrorCode * err)
+{
+ const UChar *mySource = args->source;
+ unsigned char *myTarget;
+ const UChar *sourceLimit = args->sourceLimit;
+ const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
+ UChar32 ch, ch2;
+ unsigned int indexToWrite;
+ unsigned char temp[sizeof(uint32_t)];
+
+ if(mySource >= sourceLimit) {
+ /* no input, nothing to do */
+ return;
+ }
+
+ /* write the BOM if necessary */
+ if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
+ static const char bom[]={ (char)0xffu, (char)0xfeu, 0, 0 };
+ ucnv_fromUWriteBytes(args->converter,
+ bom, 4,
+ &args->target, args->targetLimit,
+ &args->offsets, -1,
+ err);
+ args->converter->fromUnicodeStatus=0;
+ }
+
+ myTarget = (unsigned char *) args->target;
+ temp[3] = 0;
+
+ if (args->converter->fromUChar32)
+ {
+ ch = args->converter->fromUChar32;
+ args->converter->fromUChar32 = 0;
+ goto lowsurogate;
+ }
+
+ while (mySource < sourceLimit && myTarget < targetLimit)
+ {
+ ch = *(mySource++);
+
+ if (U16_IS_SURROGATE(ch)) {
+ if (U16_IS_LEAD(ch))
+ {
+lowsurogate:
+ if (mySource < sourceLimit)
+ {
+ ch2 = *mySource;
+ if (U16_IS_TRAIL(ch2)) {
+ ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
+ mySource++;
+ }
+ else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ args->converter->fromUChar32 = ch;
+ *err = U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ }
+ else {
+ /* ran out of source */
+ args->converter->fromUChar32 = ch;
+ if (args->flush) {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ *err = U_ILLEGAL_CHAR_FOUND;
+ }
+ break;
+ }
+ }
+ else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ args->converter->fromUChar32 = ch;
+ *err = U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ }
+
+ /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
+ temp[2] = (uint8_t) (ch >> 16 & 0x1F);
+ temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */
+ temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */
+
+ for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++)
+ {
+ if (myTarget < targetLimit)
+ {
+ *(myTarget++) = temp[indexToWrite];
+ }
+ else
+ {
+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+ }
+ }
+
+ if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
+ {
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+
+ args->target = (char *) myTarget;
+ args->source = mySource;
+}
+
+static void U_CALLCONV
+T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,
+ UErrorCode * err)
+{
+ const UChar *mySource = args->source;
+ unsigned char *myTarget;
+ int32_t *myOffsets;
+ const UChar *sourceLimit = args->sourceLimit;
+ const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
+ UChar32 ch, ch2;
+ unsigned int indexToWrite;
+ unsigned char temp[sizeof(uint32_t)];
+ int32_t offsetNum = 0;
+
+ if(mySource >= sourceLimit) {
+ /* no input, nothing to do */
+ return;
+ }
+
+ /* write the BOM if necessary */
+ if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
+ static const char bom[]={ (char)0xffu, (char)0xfeu, 0, 0 };
+ ucnv_fromUWriteBytes(args->converter,
+ bom, 4,
+ &args->target, args->targetLimit,
+ &args->offsets, -1,
+ err);
+ args->converter->fromUnicodeStatus=0;
+ }
+
+ myTarget = (unsigned char *) args->target;
+ myOffsets = args->offsets;
+ temp[3] = 0;
+
+ if (args->converter->fromUChar32)
+ {
+ ch = args->converter->fromUChar32;
+ args->converter->fromUChar32 = 0;
+ goto lowsurogate;
+ }
+
+ while (mySource < sourceLimit && myTarget < targetLimit)
+ {
+ ch = *(mySource++);
+
+ if (U16_IS_SURROGATE(ch)) {
+ if (U16_IS_LEAD(ch))
+ {
+lowsurogate:
+ if (mySource < sourceLimit)
+ {
+ ch2 = *mySource;
+ if (U16_IS_TRAIL(ch2))
+ {
+ ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
+ mySource++;
+ }
+ else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ args->converter->fromUChar32 = ch;
+ *err = U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ }
+ else {
+ /* ran out of source */
+ args->converter->fromUChar32 = ch;
+ if (args->flush) {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ *err = U_ILLEGAL_CHAR_FOUND;
+ }
+ break;
+ }
+ }
+ else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ args->converter->fromUChar32 = ch;
+ *err = U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ }
+
+ /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
+ temp[2] = (uint8_t) (ch >> 16 & 0x1F);
+ temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */
+ temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */
+
+ for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++)
+ {
+ if (myTarget < targetLimit)
+ {
+ *(myTarget++) = temp[indexToWrite];
+ *(myOffsets++) = offsetNum;
+ }
+ else
+ {
+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+ }
+ offsetNum = offsetNum + 1 + (temp[2] != 0);
+ }
+
+ if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
+ {
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+
+ args->target = (char *) myTarget;
+ args->source = mySource;
+ args->offsets = myOffsets;
+}
+
+static UChar32 U_CALLCONV
+T_UConverter_getNextUChar_UTF32_LE(UConverterToUnicodeArgs* args,
+ UErrorCode* err)
+{
+ const uint8_t *mySource;
+ UChar32 myUChar;
+ int32_t length;
+
+ mySource = (const uint8_t *)args->source;
+ if (mySource >= (const uint8_t *)args->sourceLimit)
+ {
+ /* no input */
+ *err = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0xffff;
+ }
+
+ length = (int32_t)((const uint8_t *)args->sourceLimit - mySource);
+ if (length < 4)
+ {
+ /* got a partial character */
+ uprv_memcpy(args->converter->toUBytes, mySource, length);
+ args->converter->toULength = (int8_t)length;
+ args->source = (const char *)(mySource + length);
+ *err = U_TRUNCATED_CHAR_FOUND;
+ return 0xffff;
+ }
+
+ /* Don't even try to do a direct cast because the value may be on an odd address. */
+ myUChar = ((UChar32)mySource[3] << 24)
+ | ((UChar32)mySource[2] << 16)
+ | ((UChar32)mySource[1] << 8)
+ | ((UChar32)mySource[0]);
+
+ args->source = (const char *)(mySource + 4);
+ if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) {
+ return myUChar;
+ }
+
+ uprv_memcpy(args->converter->toUBytes, mySource, 4);
+ args->converter->toULength = 4;
+
+ *err = U_ILLEGAL_CHAR_FOUND;
+ return 0xffff;
+}
+U_CDECL_END
+static const UConverterImpl _UTF32LEImpl = {
+ UCNV_UTF32_LittleEndian,
+
+ NULL,
+ NULL,
+
+ NULL,
+ NULL,
+ NULL,
+
+ T_UConverter_toUnicode_UTF32_LE,
+ T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC,
+ T_UConverter_fromUnicode_UTF32_LE,
+ T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC,
+ T_UConverter_getNextUChar_UTF32_LE,
+
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ ucnv_getNonSurrogateUnicodeSet,
+
+ NULL,
+ NULL
+};
+
+/* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */
+static const UConverterStaticData _UTF32LEStaticData = {
+ sizeof(UConverterStaticData),
+ "UTF-32LE",
+ 1234,
+ UCNV_IBM, UCNV_UTF32_LittleEndian, 4, 4,
+ { 0xfd, 0xff, 0, 0 }, 4, FALSE, FALSE,
+ 0,
+ 0,
+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
+};
+
+
+const UConverterSharedData _UTF32LEData =
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32LEStaticData, &_UTF32LEImpl);
+
+/* UTF-32 (Detect BOM) ------------------------------------------------------ */
+
+/*
+ * Detect a BOM at the beginning of the stream and select UTF-32BE or UTF-32LE
+ * accordingly.
+ *
+ * State values:
+ * 0 initial state
+ * 1 saw 00
+ * 2 saw 00 00
+ * 3 saw 00 00 FE
+ * 4 -
+ * 5 saw FF
+ * 6 saw FF FE
+ * 7 saw FF FE 00
+ * 8 UTF-32BE mode
+ * 9 UTF-32LE mode
+ *
+ * During detection: state&3==number of matching bytes so far.
+ *
+ * On output, emit U+FEFF as the first code point.
+ */
+U_CDECL_BEGIN
+static void U_CALLCONV
+_UTF32Reset(UConverter *cnv, UConverterResetChoice choice) {
+ if(choice<=UCNV_RESET_TO_UNICODE) {
+ /* reset toUnicode: state=0 */
+ cnv->mode=0;
+ }
+ if(choice!=UCNV_RESET_TO_UNICODE) {
+ /* reset fromUnicode: prepare to output the UTF-32PE BOM */
+ cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM;
+ }
+}
+
+static void U_CALLCONV
+_UTF32Open(UConverter *cnv,
+ UConverterLoadArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ (void)pArgs;
+ (void)pErrorCode;
+ _UTF32Reset(cnv, UCNV_RESET_BOTH);
+}
+
+static const char utf32BOM[8]={ 0, 0, (char)0xfeu, (char)0xffu, (char)0xffu, (char)0xfeu, 0, 0 };
+
+static void U_CALLCONV
+_UTF32ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv=pArgs->converter;
+ const char *source=pArgs->source;
+ const char *sourceLimit=pArgs->sourceLimit;
+ int32_t *offsets=pArgs->offsets;
+
+ int32_t state, offsetDelta;
+ char b;
+
+ state=cnv->mode;
+
+ /*
+ * If we detect a BOM in this buffer, then we must add the BOM size to the
+ * offsets because the actual converter function will not see and count the BOM.
+ * offsetDelta will have the number of the BOM bytes that are in the current buffer.
+ */
+ offsetDelta=0;
+
+ while(source<sourceLimit && U_SUCCESS(*pErrorCode)) {
+ switch(state) {
+ case 0:
+ b=*source;
+ if(b==0) {
+ state=1; /* could be 00 00 FE FF */
+ } else if(b==(char)0xffu) {
+ state=5; /* could be FF FE 00 00 */
+ } else {
+ state=8; /* default to UTF-32BE */
+ continue;
+ }
+ ++source;
+ break;
+ case 1:
+ case 2:
+ case 3:
+ case 5:
+ case 6:
+ case 7:
+ if(*source==utf32BOM[state]) {
+ ++state;
+ ++source;
+ if(state==4) {
+ state=8; /* detect UTF-32BE */
+ offsetDelta=(int32_t)(source-pArgs->source);
+ } else if(state==8) {
+ state=9; /* detect UTF-32LE */
+ offsetDelta=(int32_t)(source-pArgs->source);
+ }
+ } else {
+ /* switch to UTF-32BE and pass the previous bytes */
+ int32_t count=(int32_t)(source-pArgs->source); /* number of bytes from this buffer */
+
+ /* reset the source */
+ source=pArgs->source;
+
+ if(count==(state&3)) {
+ /* simple: all in the same buffer, just reset source */
+ } else {
+ UBool oldFlush=pArgs->flush;
+
+ /* some of the bytes are from a previous buffer, replay those first */
+ pArgs->source=utf32BOM+(state&4); /* select the correct BOM */
+ pArgs->sourceLimit=pArgs->source+((state&3)-count); /* replay previous bytes */
+ pArgs->flush=FALSE; /* this sourceLimit is not the real source stream limit */
+
+ /* no offsets: bytes from previous buffer, and not enough for output */
+ T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
+
+ /* restore real pointers; pArgs->source will be set in case 8/9 */
+ pArgs->sourceLimit=sourceLimit;
+ pArgs->flush=oldFlush;
+ }
+ state=8;
+ continue;
+ }
+ break;
+ case 8:
+ /* call UTF-32BE */
+ pArgs->source=source;
+ if(offsets==NULL) {
+ T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
+ } else {
+ T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(pArgs, pErrorCode);
+ }
+ source=pArgs->source;
+ break;
+ case 9:
+ /* call UTF-32LE */
+ pArgs->source=source;
+ if(offsets==NULL) {
+ T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode);
+ } else {
+ T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(pArgs, pErrorCode);
+ }
+ source=pArgs->source;
+ break;
+ default:
+ break; /* does not occur */
+ }
+ }
+
+ /* add BOM size to offsets - see comment at offsetDelta declaration */
+ if(offsets!=NULL && offsetDelta!=0) {
+ int32_t *offsetsLimit=pArgs->offsets;
+ while(offsets<offsetsLimit) {
+ *offsets++ += offsetDelta;
+ }
+ }
+
+ pArgs->source=source;
+
+ if(source==sourceLimit && pArgs->flush) {
+ /* handle truncated input */
+ switch(state) {
+ case 0:
+ break; /* no input at all, nothing to do */
+ case 8:
+ T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
+ break;
+ case 9:
+ T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode);
+ break;
+ default:
+ /* handle 0<state<8: call UTF-32BE with too-short input */
+ pArgs->source=utf32BOM+(state&4); /* select the correct BOM */
+ pArgs->sourceLimit=pArgs->source+(state&3); /* replay bytes */
+
+ /* no offsets: not enough for output */
+ T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
+ pArgs->source=source;
+ pArgs->sourceLimit=sourceLimit;
+ state=8;
+ break;
+ }
+ }
+
+ cnv->mode=state;
+}
+
+static UChar32 U_CALLCONV
+_UTF32GetNextUChar(UConverterToUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ switch(pArgs->converter->mode) {
+ case 8:
+ return T_UConverter_getNextUChar_UTF32_BE(pArgs, pErrorCode);
+ case 9:
+ return T_UConverter_getNextUChar_UTF32_LE(pArgs, pErrorCode);
+ default:
+ return UCNV_GET_NEXT_UCHAR_USE_TO_U;
+ }
+}
+U_CDECL_END
+static const UConverterImpl _UTF32Impl = {
+ UCNV_UTF32,
+
+ NULL,
+ NULL,
+
+ _UTF32Open,
+ NULL,
+ _UTF32Reset,
+
+ _UTF32ToUnicodeWithOffsets,
+ _UTF32ToUnicodeWithOffsets,
+#if U_IS_BIG_ENDIAN
+ T_UConverter_fromUnicode_UTF32_BE,
+ T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC,
+#else
+ T_UConverter_fromUnicode_UTF32_LE,
+ T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC,
+#endif
+ _UTF32GetNextUChar,
+
+ NULL, /* ### TODO implement getStarters for all Unicode encodings?! */
+ NULL,
+ NULL,
+ NULL,
+ ucnv_getNonSurrogateUnicodeSet,
+
+ NULL,
+ NULL
+};
+
+/* The 1236 CCSID refers to any version of Unicode with a BOM sensitive endianess of UTF-32 */
+static const UConverterStaticData _UTF32StaticData = {
+ sizeof(UConverterStaticData),
+ "UTF-32",
+ 1236,
+ UCNV_IBM, UCNV_UTF32, 4, 4,
+#if U_IS_BIG_ENDIAN
+ { 0, 0, 0xff, 0xfd }, 4,
+#else
+ { 0xfd, 0xff, 0, 0 }, 4,
+#endif
+ FALSE, FALSE,
+ 0,
+ 0,
+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
+};
+
+const UConverterSharedData _UTF32Data =
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32StaticData, &_UTF32Impl);
+
+#endif
diff --git a/thirdparty/icu4c/common/ucnv_u7.cpp b/thirdparty/icu4c/common/ucnv_u7.cpp
new file mode 100644
index 0000000000..87ba8cf37e
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnv_u7.cpp
@@ -0,0 +1,1491 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2002-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* file name: ucnv_u7.c
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002jul01
+* created by: Markus W. Scherer
+*
+* UTF-7 converter implementation. Used to be in ucnv_utf.c.
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
+
+#include "cmemory.h"
+#include "unicode/ucnv.h"
+#include "ucnv_bld.h"
+#include "ucnv_cnv.h"
+#include "uassert.h"
+
+/* UTF-7 -------------------------------------------------------------------- */
+
+/*
+ * UTF-7 is a stateful encoding of Unicode.
+ * It is defined in RFC 2152. (http://www.ietf.org/rfc/rfc2152.txt)
+ * It was intended for use in Internet email systems, using in its bytewise
+ * encoding only a subset of 7-bit US-ASCII.
+ * UTF-7 is deprecated in favor of UTF-8/16/32 and SCSU, but still
+ * occasionally used.
+ *
+ * For converting Unicode to UTF-7, the RFC allows to encode some US-ASCII
+ * characters directly or in base64. Especially, the characters in set O
+ * as defined in the RFC (see below) may be encoded directly but are not
+ * allowed in, e.g., email headers.
+ * By default, the ICU UTF-7 converter encodes set O directly.
+ * By choosing the option "version=1", set O will be escaped instead.
+ * For example:
+ * utf7Converter=ucnv_open("UTF-7,version=1");
+ *
+ * For details about email headers see RFC 2047.
+ */
+
+/*
+ * Tests for US-ASCII characters belonging to character classes
+ * defined in UTF-7.
+ *
+ * Set D (directly encoded characters) consists of the following
+ * characters: the upper and lower case letters A through Z
+ * and a through z, the 10 digits 0-9, and the following nine special
+ * characters (note that "+" and "=" are omitted):
+ * '(),-./:?
+ *
+ * Set O (optional direct characters) consists of the following
+ * characters (note that "\" and "~" are omitted):
+ * !"#$%&*;<=>@[]^_`{|}
+ *
+ * According to the rules in RFC 2152, the byte values for the following
+ * US-ASCII characters are not used in UTF-7 and are therefore illegal:
+ * - all C0 control codes except for CR LF TAB
+ * - BACKSLASH
+ * - TILDE
+ * - DEL
+ * - all codes beyond US-ASCII, i.e. all >127
+ */
+#define inSetD(c) \
+ ((uint8_t)((c)-97)<26 || (uint8_t)((c)-65)<26 || /* letters */ \
+ (uint8_t)((c)-48)<10 || /* digits */ \
+ (uint8_t)((c)-39)<3 || /* '() */ \
+ (uint8_t)((c)-44)<4 || /* ,-./ */ \
+ (c)==58 || (c)==63 /* :? */ \
+ )
+
+#define inSetO(c) \
+ ((uint8_t)((c)-33)<6 || /* !"#$%& */ \
+ (uint8_t)((c)-59)<4 || /* ;<=> */ \
+ (uint8_t)((c)-93)<4 || /* ]^_` */ \
+ (uint8_t)((c)-123)<3 || /* {|} */ \
+ (c)==42 || (c)==64 || (c)==91 /* *@[ */ \
+ )
+
+#define isCRLFTAB(c) ((c)==13 || (c)==10 || (c)==9)
+#define isCRLFSPTAB(c) ((c)==32 || (c)==13 || (c)==10 || (c)==9)
+
+#define PLUS 43
+#define MINUS 45
+#define BACKSLASH 92
+#define TILDE 126
+
+/* legal byte values: all US-ASCII graphic characters from space to before tilde, and CR LF TAB */
+#define isLegalUTF7(c) (((uint8_t)((c)-32)<94 && (c)!=BACKSLASH) || isCRLFTAB(c))
+
+/* encode directly sets D and O and CR LF SP TAB */
+static const UBool encodeDirectlyMaximum[128]={
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
+
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0
+};
+
+/* encode directly set D and CR LF SP TAB but not set O */
+static const UBool encodeDirectlyRestricted[128]={
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
+
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0
+};
+
+static const uint8_t
+toBase64[64]={
+ /* A-Z */
+ 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
+ 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
+ /* a-z */
+ 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
+ 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
+ /* 0-9 */
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
+ /* +/ */
+ 43, 47
+};
+
+static const int8_t
+fromBase64[128]={
+ /* C0 controls, -1 for legal ones (CR LF TAB), -3 for illegal ones */
+ -3, -3, -3, -3, -3, -3, -3, -3, -3, -1, -1, -3, -3, -1, -3, -3,
+ -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3,
+
+ /* general punctuation with + and / and a special value (-2) for - */
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -2, -1, 63,
+ /* digits */
+ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
+
+ /* A-Z */
+ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -3, -1, -1, -1,
+
+ /* a-z */
+ -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -3, -3
+};
+
+/*
+ * converter status values:
+ *
+ * toUnicodeStatus:
+ * 24 inDirectMode (boolean)
+ * 23..16 base64Counter (-1..7)
+ * 15..0 bits (up to 14 bits incoming base64)
+ *
+ * fromUnicodeStatus:
+ * 31..28 version (0: set O direct 1: set O escaped)
+ * 24 inDirectMode (boolean)
+ * 23..16 base64Counter (0..2)
+ * 7..0 bits (6 bits outgoing base64)
+ *
+ */
+
+U_CDECL_BEGIN
+static void U_CALLCONV
+_UTF7Reset(UConverter *cnv, UConverterResetChoice choice) {
+ if(choice<=UCNV_RESET_TO_UNICODE) {
+ /* reset toUnicode */
+ cnv->toUnicodeStatus=0x1000000; /* inDirectMode=TRUE */
+ cnv->toULength=0;
+ }
+ if(choice!=UCNV_RESET_TO_UNICODE) {
+ /* reset fromUnicode */
+ cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */
+ }
+}
+
+static void U_CALLCONV
+_UTF7Open(UConverter *cnv,
+ UConverterLoadArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ (void)pArgs;
+ if(UCNV_GET_VERSION(cnv)<=1) {
+ /* TODO(markus): Should just use cnv->options rather than copying the version number. */
+ cnv->fromUnicodeStatus=UCNV_GET_VERSION(cnv)<<28;
+ _UTF7Reset(cnv, UCNV_RESET_BOTH);
+ } else {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ }
+}
+
+static void U_CALLCONV
+_UTF7ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ const uint8_t *source, *sourceLimit;
+ UChar *target;
+ const UChar *targetLimit;
+ int32_t *offsets;
+
+ uint8_t *bytes;
+ uint8_t byteIndex;
+
+ int32_t length, targetCapacity;
+
+ /* UTF-7 state */
+ uint16_t bits;
+ int8_t base64Counter;
+ UBool inDirectMode;
+
+ int8_t base64Value;
+
+ int32_t sourceIndex, nextSourceIndex;
+
+ uint8_t b;
+ /* set up the local pointers */
+ cnv=pArgs->converter;
+
+ source=(const uint8_t *)pArgs->source;
+ sourceLimit=(const uint8_t *)pArgs->sourceLimit;
+ target=pArgs->target;
+ targetLimit=pArgs->targetLimit;
+ offsets=pArgs->offsets;
+ /* get the state machine state */
+ {
+ uint32_t status=cnv->toUnicodeStatus;
+ inDirectMode=(UBool)((status>>24)&1);
+ base64Counter=(int8_t)(status>>16);
+ bits=(uint16_t)status;
+ }
+ bytes=cnv->toUBytes;
+ byteIndex=cnv->toULength;
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex=byteIndex==0 ? 0 : -1;
+ nextSourceIndex=0;
+
+ if(inDirectMode) {
+directMode:
+ /*
+ * In Direct Mode, most US-ASCII characters are encoded directly, i.e.,
+ * with their US-ASCII byte values.
+ * Backslash and Tilde and most control characters are not allowed in UTF-7.
+ * A plus sign starts Unicode (or "escape") Mode.
+ *
+ * In Direct Mode, only the sourceIndex is used.
+ */
+ byteIndex=0;
+ length=(int32_t)(sourceLimit-source);
+ targetCapacity=(int32_t)(targetLimit-target);
+ if(length>targetCapacity) {
+ length=targetCapacity;
+ }
+ while(length>0) {
+ b=*source++;
+ if(!isLegalUTF7(b)) {
+ /* illegal */
+ bytes[0]=b;
+ byteIndex=1;
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ break;
+ } else if(b!=PLUS) {
+ /* write directly encoded character */
+ *target++=b;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex++;
+ }
+ } else /* PLUS */ {
+ /* switch to Unicode mode */
+ nextSourceIndex=++sourceIndex;
+ inDirectMode=FALSE;
+ byteIndex=0;
+ bits=0;
+ base64Counter=-1;
+ goto unicodeMode;
+ }
+ --length;
+ }
+ if(source<sourceLimit && target>=targetLimit) {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+ } else {
+unicodeMode:
+ /*
+ * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded.
+ * The base64 sequence ends with any character that is not in the base64 alphabet.
+ * A terminating minus sign is consumed.
+ *
+ * In Unicode Mode, the sourceIndex has the index to the start of the current
+ * base64 bytes, while nextSourceIndex is precisely parallel to source,
+ * keeping the index to the following byte.
+ * Note that in 2 out of 3 cases, UChars overlap within a base64 byte.
+ */
+ while(source<sourceLimit) {
+ if(target<targetLimit) {
+ bytes[byteIndex++]=b=*source++;
+ ++nextSourceIndex;
+ base64Value = -3; /* initialize as illegal */
+ if(b>=126 || (base64Value=fromBase64[b])==-3 || base64Value==-1) {
+ /* either
+ * base64Value==-1 for any legal character except base64 and minus sign, or
+ * base64Value==-3 for illegal characters:
+ * 1. In either case, leave Unicode mode.
+ * 2.1. If we ended with an incomplete UChar or none after the +, then
+ * generate an error for the preceding erroneous sequence and deal with
+ * the current (possibly illegal) character next time through.
+ * 2.2. Else the current char comes after a complete UChar, which was already
+ * pushed to the output buf, so:
+ * 2.2.1. If the current char is legal, just save it for processing next time.
+ * It may be for example, a plus which we need to deal with in direct mode.
+ * 2.2.2. Else if the current char is illegal, we might as well deal with it here.
+ */
+ inDirectMode=TRUE;
+ if(base64Counter==-1) {
+ /* illegal: + immediately followed by something other than base64 or minus sign */
+ /* include the plus sign in the reported sequence, but not the subsequent char */
+ --source;
+ bytes[0]=PLUS;
+ byteIndex=1;
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ break;
+ } else if(bits!=0) {
+ /* bits are illegally left over, a UChar is incomplete */
+ /* don't include current char (legal or illegal) in error seq */
+ --source;
+ --byteIndex;
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ break;
+ } else {
+ /* previous UChar was complete */
+ if(base64Value==-3) {
+ /* current character is illegal, deal with it here */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ break;
+ } else {
+ /* un-read the current character in case it is a plus sign */
+ --source;
+ sourceIndex=nextSourceIndex-1;
+ goto directMode;
+ }
+ }
+ } else if(base64Value>=0) {
+ /* collect base64 bytes into UChars */
+ switch(base64Counter) {
+ case -1: /* -1 is immediately after the + */
+ case 0:
+ bits=base64Value;
+ base64Counter=1;
+ break;
+ case 1:
+ case 3:
+ case 4:
+ case 6:
+ bits=(uint16_t)((bits<<6)|base64Value);
+ ++base64Counter;
+ break;
+ case 2:
+ *target++=(UChar)((bits<<4)|(base64Value>>2));
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ sourceIndex=nextSourceIndex-1;
+ }
+ bytes[0]=b; /* keep this byte in case an error occurs */
+ byteIndex=1;
+ bits=(uint16_t)(base64Value&3);
+ base64Counter=3;
+ break;
+ case 5:
+ *target++=(UChar)((bits<<2)|(base64Value>>4));
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ sourceIndex=nextSourceIndex-1;
+ }
+ bytes[0]=b; /* keep this byte in case an error occurs */
+ byteIndex=1;
+ bits=(uint16_t)(base64Value&15);
+ base64Counter=6;
+ break;
+ case 7:
+ *target++=(UChar)((bits<<6)|base64Value);
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ sourceIndex=nextSourceIndex;
+ }
+ byteIndex=0;
+ bits=0;
+ base64Counter=0;
+ break;
+ default:
+ /* will never occur */
+ break;
+ }
+ } else /*base64Value==-2*/ {
+ /* minus sign terminates the base64 sequence */
+ inDirectMode=TRUE;
+ if(base64Counter==-1) {
+ /* +- i.e. a minus immediately following a plus */
+ *target++=PLUS;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex-1;
+ }
+ } else {
+ /* absorb the minus and leave the Unicode Mode */
+ if(bits!=0) {
+ /* bits are illegally left over, a UChar is incomplete */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ }
+ sourceIndex=nextSourceIndex;
+ goto directMode;
+ }
+ } else {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+ }
+
+ if(U_SUCCESS(*pErrorCode) && pArgs->flush && source==sourceLimit && bits==0) {
+ /*
+ * if we are in Unicode mode, then the byteIndex might not be 0,
+ * but that is ok if bits==0
+ * -> we set byteIndex=0 at the end of the stream to avoid a truncated error
+ * (not true for IMAP-mailbox-name where we must end in direct mode)
+ */
+ byteIndex=0;
+ }
+
+ /* set the converter state back into UConverter */
+ cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits;
+ cnv->toULength=byteIndex;
+
+ /* write back the updated pointers */
+ pArgs->source=(const char *)source;
+ pArgs->target=target;
+ pArgs->offsets=offsets;
+ return;
+}
+
+static void U_CALLCONV
+_UTF7FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ const UChar *source, *sourceLimit;
+ uint8_t *target, *targetLimit;
+ int32_t *offsets;
+
+ int32_t length, targetCapacity, sourceIndex;
+ UChar c;
+
+ /* UTF-7 state */
+ const UBool *encodeDirectly;
+ uint8_t bits;
+ int8_t base64Counter;
+ UBool inDirectMode;
+
+ /* set up the local pointers */
+ cnv=pArgs->converter;
+
+ /* set up the local pointers */
+ source=pArgs->source;
+ sourceLimit=pArgs->sourceLimit;
+ target=(uint8_t *)pArgs->target;
+ targetLimit=(uint8_t *)pArgs->targetLimit;
+ offsets=pArgs->offsets;
+
+ /* get the state machine state */
+ {
+ uint32_t status=cnv->fromUnicodeStatus;
+ encodeDirectly= status<0x10000000 ? encodeDirectlyMaximum : encodeDirectlyRestricted;
+ inDirectMode=(UBool)((status>>24)&1);
+ base64Counter=(int8_t)(status>>16);
+ bits=(uint8_t)status;
+ U_ASSERT(bits<=UPRV_LENGTHOF(toBase64));
+ }
+
+ /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */
+ sourceIndex=0;
+
+ if(inDirectMode) {
+directMode:
+ length=(int32_t)(sourceLimit-source);
+ targetCapacity=(int32_t)(targetLimit-target);
+ if(length>targetCapacity) {
+ length=targetCapacity;
+ }
+ while(length>0) {
+ c=*source++;
+ /* currently always encode CR LF SP TAB directly */
+ if(c<=127 && encodeDirectly[c]) {
+ /* encode directly */
+ *target++=(uint8_t)c;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex++;
+ }
+ } else if(c==PLUS) {
+ /* output +- for + */
+ *target++=PLUS;
+ if(target<targetLimit) {
+ *target++=MINUS;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex++;
+ }
+ /* realign length and targetCapacity */
+ goto directMode;
+ } else {
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex++;
+ }
+ cnv->charErrorBuffer[0]=MINUS;
+ cnv->charErrorBufferLength=1;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ } else {
+ /* un-read this character and switch to Unicode Mode */
+ --source;
+ *target++=PLUS;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ inDirectMode=FALSE;
+ base64Counter=0;
+ goto unicodeMode;
+ }
+ --length;
+ }
+ if(source<sourceLimit && target>=targetLimit) {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+ } else {
+unicodeMode:
+ while(source<sourceLimit) {
+ if(target<targetLimit) {
+ c=*source++;
+ if(c<=127 && encodeDirectly[c]) {
+ /* encode directly */
+ inDirectMode=TRUE;
+
+ /* trick: back out this character to make this easier */
+ --source;
+
+ /* terminate the base64 sequence */
+ if(base64Counter!=0) {
+ /* write remaining bits for the previous character */
+ *target++=toBase64[bits];
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex-1;
+ }
+ }
+ if(fromBase64[c]!=-1) {
+ /* need to terminate with a minus */
+ if(target<targetLimit) {
+ *target++=MINUS;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex-1;
+ }
+ } else {
+ cnv->charErrorBuffer[0]=MINUS;
+ cnv->charErrorBufferLength=1;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+ goto directMode;
+ } else {
+ /*
+ * base64 this character:
+ * Output 2 or 3 base64 bytes for the remaining bits of the previous character
+ * and the bits of this character, each implicitly in UTF-16BE.
+ *
+ * Here, bits is an 8-bit variable because only 6 bits need to be kept from one
+ * character to the next. The actual 2 or 4 bits are shifted to the left edge
+ * of the 6-bits field 5..0 to make the termination of the base64 sequence easier.
+ */
+ switch(base64Counter) {
+ case 0:
+ *target++=toBase64[c>>10];
+ if(target<targetLimit) {
+ *target++=toBase64[(c>>4)&0x3f];
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex++;
+ }
+ } else {
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex++;
+ }
+ cnv->charErrorBuffer[0]=toBase64[(c>>4)&0x3f];
+ cnv->charErrorBufferLength=1;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+ bits=(uint8_t)((c&15)<<2);
+ base64Counter=1;
+ break;
+ case 1:
+ *target++=toBase64[bits|(c>>14)];
+ if(target<targetLimit) {
+ *target++=toBase64[(c>>8)&0x3f];
+ if(target<targetLimit) {
+ *target++=toBase64[(c>>2)&0x3f];
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex++;
+ }
+ } else {
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex++;
+ }
+ cnv->charErrorBuffer[0]=toBase64[(c>>2)&0x3f];
+ cnv->charErrorBufferLength=1;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+ } else {
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex++;
+ }
+ cnv->charErrorBuffer[0]=toBase64[(c>>8)&0x3f];
+ cnv->charErrorBuffer[1]=toBase64[(c>>2)&0x3f];
+ cnv->charErrorBufferLength=2;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+ bits=(uint8_t)((c&3)<<4);
+ base64Counter=2;
+ break;
+ case 2:
+ *target++=toBase64[bits|(c>>12)];
+ if(target<targetLimit) {
+ *target++=toBase64[(c>>6)&0x3f];
+ if(target<targetLimit) {
+ *target++=toBase64[c&0x3f];
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex++;
+ }
+ } else {
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex++;
+ }
+ cnv->charErrorBuffer[0]=toBase64[c&0x3f];
+ cnv->charErrorBufferLength=1;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+ } else {
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex++;
+ }
+ cnv->charErrorBuffer[0]=toBase64[(c>>6)&0x3f];
+ cnv->charErrorBuffer[1]=toBase64[c&0x3f];
+ cnv->charErrorBufferLength=2;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+ bits=0;
+ base64Counter=0;
+ break;
+ default:
+ /* will never occur */
+ break;
+ }
+ }
+ } else {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+ }
+
+ if(pArgs->flush && source>=sourceLimit) {
+ /* flush remaining bits to the target */
+ if(!inDirectMode) {
+ if (base64Counter!=0) {
+ if(target<targetLimit) {
+ *target++=toBase64[bits];
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex-1;
+ }
+ } else {
+ cnv->charErrorBuffer[cnv->charErrorBufferLength++]=toBase64[bits];
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+ }
+ /* Add final MINUS to terminate unicodeMode */
+ if(target<targetLimit) {
+ *target++=MINUS;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex-1;
+ }
+ } else {
+ cnv->charErrorBuffer[cnv->charErrorBufferLength++]=MINUS;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+ }
+ /* reset the state for the next conversion */
+ cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */
+ } else {
+ /* set the converter state back into UConverter */
+ cnv->fromUnicodeStatus=
+ (cnv->fromUnicodeStatus&0xf0000000)| /* keep version*/
+ ((uint32_t)inDirectMode<<24)|((uint32_t)base64Counter<<16)|(uint32_t)bits;
+ }
+
+ /* write back the updated pointers */
+ pArgs->source=source;
+ pArgs->target=(char *)target;
+ pArgs->offsets=offsets;
+ return;
+}
+
+static const char * U_CALLCONV
+_UTF7GetName(const UConverter *cnv) {
+ switch(cnv->fromUnicodeStatus>>28) {
+ case 1:
+ return "UTF-7,version=1";
+ default:
+ return "UTF-7";
+ }
+}
+U_CDECL_END
+
+static const UConverterImpl _UTF7Impl={
+ UCNV_UTF7,
+
+ NULL,
+ NULL,
+
+ _UTF7Open,
+ NULL,
+ _UTF7Reset,
+
+ _UTF7ToUnicodeWithOffsets,
+ _UTF7ToUnicodeWithOffsets,
+ _UTF7FromUnicodeWithOffsets,
+ _UTF7FromUnicodeWithOffsets,
+ NULL,
+
+ NULL,
+ _UTF7GetName,
+ NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */
+ NULL,
+ ucnv_getCompleteUnicodeSet,
+
+ NULL,
+ NULL
+};
+
+static const UConverterStaticData _UTF7StaticData={
+ sizeof(UConverterStaticData),
+ "UTF-7",
+ 0, /* TODO CCSID for UTF-7 */
+ UCNV_IBM, UCNV_UTF7,
+ 1, 4,
+ { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */
+ FALSE, FALSE,
+ 0,
+ 0,
+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
+};
+
+const UConverterSharedData _UTF7Data=
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF7StaticData, &_UTF7Impl);
+
+/* IMAP mailbox name encoding ----------------------------------------------- */
+
+/*
+ * RFC 2060: INTERNET MESSAGE ACCESS PROTOCOL - VERSION 4rev1
+ * http://www.ietf.org/rfc/rfc2060.txt
+ *
+ * 5.1.3. Mailbox International Naming Convention
+ *
+ * By convention, international mailbox names are specified using a
+ * modified version of the UTF-7 encoding described in [UTF-7]. The
+ * purpose of these modifications is to correct the following problems
+ * with UTF-7:
+ *
+ * 1) UTF-7 uses the "+" character for shifting; this conflicts with
+ * the common use of "+" in mailbox names, in particular USENET
+ * newsgroup names.
+ *
+ * 2) UTF-7's encoding is BASE64 which uses the "/" character; this
+ * conflicts with the use of "/" as a popular hierarchy delimiter.
+ *
+ * 3) UTF-7 prohibits the unencoded usage of "\"; this conflicts with
+ * the use of "\" as a popular hierarchy delimiter.
+ *
+ * 4) UTF-7 prohibits the unencoded usage of "~"; this conflicts with
+ * the use of "~" in some servers as a home directory indicator.
+ *
+ * 5) UTF-7 permits multiple alternate forms to represent the same
+ * string; in particular, printable US-ASCII chararacters can be
+ * represented in encoded form.
+ *
+ * In modified UTF-7, printable US-ASCII characters except for "&"
+ * represent themselves; that is, characters with octet values 0x20-0x25
+ * and 0x27-0x7e. The character "&" (0x26) is represented by the two-
+ * octet sequence "&-".
+ *
+ * All other characters (octet values 0x00-0x1f, 0x7f-0xff, and all
+ * Unicode 16-bit octets) are represented in modified BASE64, with a
+ * further modification from [UTF-7] that "," is used instead of "/".
+ * Modified BASE64 MUST NOT be used to represent any printing US-ASCII
+ * character which can represent itself.
+ *
+ * "&" is used to shift to modified BASE64 and "-" to shift back to US-
+ * ASCII. All names start in US-ASCII, and MUST end in US-ASCII (that
+ * is, a name that ends with a Unicode 16-bit octet MUST end with a "-
+ * ").
+ *
+ * For example, here is a mailbox name which mixes English, Japanese,
+ * and Chinese text: ~peter/mail/&ZeVnLIqe-/&U,BTFw-
+ */
+
+/*
+ * Tests for US-ASCII characters belonging to character classes
+ * defined in UTF-7.
+ *
+ * Set D (directly encoded characters) consists of the following
+ * characters: the upper and lower case letters A through Z
+ * and a through z, the 10 digits 0-9, and the following nine special
+ * characters (note that "+" and "=" are omitted):
+ * '(),-./:?
+ *
+ * Set O (optional direct characters) consists of the following
+ * characters (note that "\" and "~" are omitted):
+ * !"#$%&*;<=>@[]^_`{|}
+ *
+ * According to the rules in RFC 2152, the byte values for the following
+ * US-ASCII characters are not used in UTF-7 and are therefore illegal:
+ * - all C0 control codes except for CR LF TAB
+ * - BACKSLASH
+ * - TILDE
+ * - DEL
+ * - all codes beyond US-ASCII, i.e. all >127
+ */
+
+/* uses '&' not '+' to start a base64 sequence */
+#define AMPERSAND 0x26
+#define COMMA 0x2c
+#define SLASH 0x2f
+
+/* legal byte values: all US-ASCII graphic characters 0x20..0x7e */
+#define isLegalIMAP(c) (0x20<=(c) && (c)<=0x7e)
+
+/* direct-encode all of printable ASCII 0x20..0x7e except '&' 0x26 */
+#define inSetDIMAP(c) (isLegalIMAP(c) && c!=AMPERSAND)
+
+#define TO_BASE64_IMAP(n) ((n)<63 ? toBase64[n] : COMMA)
+#define FROM_BASE64_IMAP(c) ((c)==COMMA ? 63 : (c)==SLASH ? -1 : fromBase64[c])
+
+/*
+ * converter status values:
+ *
+ * toUnicodeStatus:
+ * 24 inDirectMode (boolean)
+ * 23..16 base64Counter (-1..7)
+ * 15..0 bits (up to 14 bits incoming base64)
+ *
+ * fromUnicodeStatus:
+ * 24 inDirectMode (boolean)
+ * 23..16 base64Counter (0..2)
+ * 7..0 bits (6 bits outgoing base64)
+ *
+ * ignore bits 31..25
+ */
+
+U_CDECL_BEGIN
+static void U_CALLCONV
+_IMAPToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ const uint8_t *source, *sourceLimit;
+ UChar *target;
+ const UChar *targetLimit;
+ int32_t *offsets;
+
+ uint8_t *bytes;
+ uint8_t byteIndex;
+
+ int32_t length, targetCapacity;
+
+ /* UTF-7 state */
+ uint16_t bits;
+ int8_t base64Counter;
+ UBool inDirectMode;
+
+ int8_t base64Value;
+
+ int32_t sourceIndex, nextSourceIndex;
+
+ UChar c;
+ uint8_t b;
+
+ /* set up the local pointers */
+ cnv=pArgs->converter;
+
+ source=(const uint8_t *)pArgs->source;
+ sourceLimit=(const uint8_t *)pArgs->sourceLimit;
+ target=pArgs->target;
+ targetLimit=pArgs->targetLimit;
+ offsets=pArgs->offsets;
+ /* get the state machine state */
+ {
+ uint32_t status=cnv->toUnicodeStatus;
+ inDirectMode=(UBool)((status>>24)&1);
+ base64Counter=(int8_t)(status>>16);
+ bits=(uint16_t)status;
+ }
+ bytes=cnv->toUBytes;
+ byteIndex=cnv->toULength;
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex=byteIndex==0 ? 0 : -1;
+ nextSourceIndex=0;
+
+ if(inDirectMode) {
+directMode:
+ /*
+ * In Direct Mode, US-ASCII characters are encoded directly, i.e.,
+ * with their US-ASCII byte values.
+ * An ampersand starts Unicode (or "escape") Mode.
+ *
+ * In Direct Mode, only the sourceIndex is used.
+ */
+ byteIndex=0;
+ length=(int32_t)(sourceLimit-source);
+ targetCapacity=(int32_t)(targetLimit-target);
+ if(length>targetCapacity) {
+ length=targetCapacity;
+ }
+ while(length>0) {
+ b=*source++;
+ if(!isLegalIMAP(b)) {
+ /* illegal */
+ bytes[0]=b;
+ byteIndex=1;
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ break;
+ } else if(b!=AMPERSAND) {
+ /* write directly encoded character */
+ *target++=b;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex++;
+ }
+ } else /* AMPERSAND */ {
+ /* switch to Unicode mode */
+ nextSourceIndex=++sourceIndex;
+ inDirectMode=FALSE;
+ byteIndex=0;
+ bits=0;
+ base64Counter=-1;
+ goto unicodeMode;
+ }
+ --length;
+ }
+ if(source<sourceLimit && target>=targetLimit) {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+ } else {
+unicodeMode:
+ /*
+ * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded.
+ * The base64 sequence ends with any character that is not in the base64 alphabet.
+ * A terminating minus sign is consumed.
+ * US-ASCII must not be base64-ed.
+ *
+ * In Unicode Mode, the sourceIndex has the index to the start of the current
+ * base64 bytes, while nextSourceIndex is precisely parallel to source,
+ * keeping the index to the following byte.
+ * Note that in 2 out of 3 cases, UChars overlap within a base64 byte.
+ */
+ while(source<sourceLimit) {
+ if(target<targetLimit) {
+ bytes[byteIndex++]=b=*source++;
+ ++nextSourceIndex;
+ if(b>0x7e) {
+ /* illegal - test other illegal US-ASCII values by base64Value==-3 */
+ inDirectMode=TRUE;
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ break;
+ } else if((base64Value=FROM_BASE64_IMAP(b))>=0) {
+ /* collect base64 bytes into UChars */
+ switch(base64Counter) {
+ case -1: /* -1 is immediately after the & */
+ case 0:
+ bits=base64Value;
+ base64Counter=1;
+ break;
+ case 1:
+ case 3:
+ case 4:
+ case 6:
+ bits=(uint16_t)((bits<<6)|base64Value);
+ ++base64Counter;
+ break;
+ case 2:
+ c=(UChar)((bits<<4)|(base64Value>>2));
+ if(isLegalIMAP(c)) {
+ /* illegal */
+ inDirectMode=TRUE;
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ goto endloop;
+ }
+ *target++=c;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ sourceIndex=nextSourceIndex-1;
+ }
+ bytes[0]=b; /* keep this byte in case an error occurs */
+ byteIndex=1;
+ bits=(uint16_t)(base64Value&3);
+ base64Counter=3;
+ break;
+ case 5:
+ c=(UChar)((bits<<2)|(base64Value>>4));
+ if(isLegalIMAP(c)) {
+ /* illegal */
+ inDirectMode=TRUE;
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ goto endloop;
+ }
+ *target++=c;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ sourceIndex=nextSourceIndex-1;
+ }
+ bytes[0]=b; /* keep this byte in case an error occurs */
+ byteIndex=1;
+ bits=(uint16_t)(base64Value&15);
+ base64Counter=6;
+ break;
+ case 7:
+ c=(UChar)((bits<<6)|base64Value);
+ if(isLegalIMAP(c)) {
+ /* illegal */
+ inDirectMode=TRUE;
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ goto endloop;
+ }
+ *target++=c;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ sourceIndex=nextSourceIndex;
+ }
+ byteIndex=0;
+ bits=0;
+ base64Counter=0;
+ break;
+ default:
+ /* will never occur */
+ break;
+ }
+ } else if(base64Value==-2) {
+ /* minus sign terminates the base64 sequence */
+ inDirectMode=TRUE;
+ if(base64Counter==-1) {
+ /* &- i.e. a minus immediately following an ampersand */
+ *target++=AMPERSAND;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex-1;
+ }
+ } else {
+ /* absorb the minus and leave the Unicode Mode */
+ if(bits!=0 || (base64Counter!=0 && base64Counter!=3 && base64Counter!=6)) {
+ /* bits are illegally left over, a UChar is incomplete */
+ /* base64Counter other than 0, 3, 6 means non-minimal zero-padding, also illegal */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ }
+ sourceIndex=nextSourceIndex;
+ goto directMode;
+ } else {
+ if(base64Counter==-1) {
+ /* illegal: & immediately followed by something other than base64 or minus sign */
+ /* include the ampersand in the reported sequence */
+ --sourceIndex;
+ bytes[0]=AMPERSAND;
+ bytes[1]=b;
+ byteIndex=2;
+ }
+ /* base64Value==-1 for characters that are illegal only in Unicode mode */
+ /* base64Value==-3 for illegal characters */
+ /* illegal */
+ inDirectMode=TRUE;
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ } else {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+ }
+endloop:
+
+ /*
+ * the end of the input stream and detection of truncated input
+ * are handled by the framework, but here we must check if we are in Unicode
+ * mode and byteIndex==0 because we must end in direct mode
+ *
+ * conditions:
+ * successful
+ * in Unicode mode and byteIndex==0
+ * end of input and no truncated input
+ */
+ if( U_SUCCESS(*pErrorCode) &&
+ !inDirectMode && byteIndex==0 &&
+ pArgs->flush && source>=sourceLimit
+ ) {
+ if(base64Counter==-1) {
+ /* & at the very end of the input */
+ /* make the ampersand the reported sequence */
+ bytes[0]=AMPERSAND;
+ byteIndex=1;
+ }
+ /* else if(base64Counter!=-1) byteIndex remains 0 because there is no particular byte sequence */
+
+ inDirectMode=TRUE; /* avoid looping */
+ *pErrorCode=U_TRUNCATED_CHAR_FOUND;
+ }
+
+ /* set the converter state back into UConverter */
+ cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits;
+ cnv->toULength=byteIndex;
+
+ /* write back the updated pointers */
+ pArgs->source=(const char *)source;
+ pArgs->target=target;
+ pArgs->offsets=offsets;
+ return;
+}
+
+static void U_CALLCONV
+_IMAPFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ const UChar *source, *sourceLimit;
+ uint8_t *target, *targetLimit;
+ int32_t *offsets;
+
+ int32_t length, targetCapacity, sourceIndex;
+ UChar c;
+ uint8_t b;
+
+ /* UTF-7 state */
+ uint8_t bits;
+ int8_t base64Counter;
+ UBool inDirectMode;
+
+ /* set up the local pointers */
+ cnv=pArgs->converter;
+
+ /* set up the local pointers */
+ source=pArgs->source;
+ sourceLimit=pArgs->sourceLimit;
+ target=(uint8_t *)pArgs->target;
+ targetLimit=(uint8_t *)pArgs->targetLimit;
+ offsets=pArgs->offsets;
+
+ /* get the state machine state */
+ {
+ uint32_t status=cnv->fromUnicodeStatus;
+ inDirectMode=(UBool)((status>>24)&1);
+ base64Counter=(int8_t)(status>>16);
+ bits=(uint8_t)status;
+ }
+
+ /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */
+ sourceIndex=0;
+
+ if(inDirectMode) {
+directMode:
+ length=(int32_t)(sourceLimit-source);
+ targetCapacity=(int32_t)(targetLimit-target);
+ if(length>targetCapacity) {
+ length=targetCapacity;
+ }
+ while(length>0) {
+ c=*source++;
+ /* encode 0x20..0x7e except '&' directly */
+ if(inSetDIMAP(c)) {
+ /* encode directly */
+ *target++=(uint8_t)c;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex++;
+ }
+ } else if(c==AMPERSAND) {
+ /* output &- for & */
+ *target++=AMPERSAND;
+ if(target<targetLimit) {
+ *target++=MINUS;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex++;
+ }
+ /* realign length and targetCapacity */
+ goto directMode;
+ } else {
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex++;
+ }
+ cnv->charErrorBuffer[0]=MINUS;
+ cnv->charErrorBufferLength=1;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ } else {
+ /* un-read this character and switch to Unicode Mode */
+ --source;
+ *target++=AMPERSAND;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ inDirectMode=FALSE;
+ base64Counter=0;
+ goto unicodeMode;
+ }
+ --length;
+ }
+ if(source<sourceLimit && target>=targetLimit) {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+ } else {
+unicodeMode:
+ while(source<sourceLimit) {
+ if(target<targetLimit) {
+ c=*source++;
+ if(isLegalIMAP(c)) {
+ /* encode directly */
+ inDirectMode=TRUE;
+
+ /* trick: back out this character to make this easier */
+ --source;
+
+ /* terminate the base64 sequence */
+ if(base64Counter!=0) {
+ /* write remaining bits for the previous character */
+ *target++=TO_BASE64_IMAP(bits);
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex-1;
+ }
+ }
+ /* need to terminate with a minus */
+ if(target<targetLimit) {
+ *target++=MINUS;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex-1;
+ }
+ } else {
+ cnv->charErrorBuffer[0]=MINUS;
+ cnv->charErrorBufferLength=1;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ goto directMode;
+ } else {
+ /*
+ * base64 this character:
+ * Output 2 or 3 base64 bytes for the remaining bits of the previous character
+ * and the bits of this character, each implicitly in UTF-16BE.
+ *
+ * Here, bits is an 8-bit variable because only 6 bits need to be kept from one
+ * character to the next. The actual 2 or 4 bits are shifted to the left edge
+ * of the 6-bits field 5..0 to make the termination of the base64 sequence easier.
+ */
+ switch(base64Counter) {
+ case 0:
+ b=(uint8_t)(c>>10);
+ *target++=TO_BASE64_IMAP(b);
+ if(target<targetLimit) {
+ b=(uint8_t)((c>>4)&0x3f);
+ *target++=TO_BASE64_IMAP(b);
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex++;
+ }
+ } else {
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex++;
+ }
+ b=(uint8_t)((c>>4)&0x3f);
+ cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
+ cnv->charErrorBufferLength=1;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+ bits=(uint8_t)((c&15)<<2);
+ base64Counter=1;
+ break;
+ case 1:
+ b=(uint8_t)(bits|(c>>14));
+ *target++=TO_BASE64_IMAP(b);
+ if(target<targetLimit) {
+ b=(uint8_t)((c>>8)&0x3f);
+ *target++=TO_BASE64_IMAP(b);
+ if(target<targetLimit) {
+ b=(uint8_t)((c>>2)&0x3f);
+ *target++=TO_BASE64_IMAP(b);
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex++;
+ }
+ } else {
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex++;
+ }
+ b=(uint8_t)((c>>2)&0x3f);
+ cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
+ cnv->charErrorBufferLength=1;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+ } else {
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex++;
+ }
+ b=(uint8_t)((c>>8)&0x3f);
+ cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
+ b=(uint8_t)((c>>2)&0x3f);
+ cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b);
+ cnv->charErrorBufferLength=2;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+ bits=(uint8_t)((c&3)<<4);
+ base64Counter=2;
+ break;
+ case 2:
+ b=(uint8_t)(bits|(c>>12));
+ *target++=TO_BASE64_IMAP(b);
+ if(target<targetLimit) {
+ b=(uint8_t)((c>>6)&0x3f);
+ *target++=TO_BASE64_IMAP(b);
+ if(target<targetLimit) {
+ b=(uint8_t)(c&0x3f);
+ *target++=TO_BASE64_IMAP(b);
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex++;
+ }
+ } else {
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex++;
+ }
+ b=(uint8_t)(c&0x3f);
+ cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
+ cnv->charErrorBufferLength=1;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+ } else {
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex++;
+ }
+ b=(uint8_t)((c>>6)&0x3f);
+ cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
+ b=(uint8_t)(c&0x3f);
+ cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b);
+ cnv->charErrorBufferLength=2;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+ bits=0;
+ base64Counter=0;
+ break;
+ default:
+ /* will never occur */
+ break;
+ }
+ }
+ } else {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+ }
+
+ if(pArgs->flush && source>=sourceLimit) {
+ /* flush remaining bits to the target */
+ if(!inDirectMode) {
+ if(base64Counter!=0) {
+ if(target<targetLimit) {
+ *target++=TO_BASE64_IMAP(bits);
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex-1;
+ }
+ } else {
+ cnv->charErrorBuffer[cnv->charErrorBufferLength++]=TO_BASE64_IMAP(bits);
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+ }
+ /* need to terminate with a minus */
+ if(target<targetLimit) {
+ *target++=MINUS;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex-1;
+ }
+ } else {
+ cnv->charErrorBuffer[cnv->charErrorBufferLength++]=MINUS;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+ }
+ /* reset the state for the next conversion */
+ cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */
+ } else {
+ /* set the converter state back into UConverter */
+ cnv->fromUnicodeStatus=
+ (cnv->fromUnicodeStatus&0xf0000000)| /* keep version*/
+ ((uint32_t)inDirectMode<<24)|((uint32_t)base64Counter<<16)|(uint32_t)bits;
+ }
+
+ /* write back the updated pointers */
+ pArgs->source=source;
+ pArgs->target=(char *)target;
+ pArgs->offsets=offsets;
+ return;
+}
+U_CDECL_END
+
+static const UConverterImpl _IMAPImpl={
+ UCNV_IMAP_MAILBOX,
+
+ NULL,
+ NULL,
+
+ _UTF7Open,
+ NULL,
+ _UTF7Reset,
+
+ _IMAPToUnicodeWithOffsets,
+ _IMAPToUnicodeWithOffsets,
+ _IMAPFromUnicodeWithOffsets,
+ _IMAPFromUnicodeWithOffsets,
+ NULL,
+
+ NULL,
+ NULL,
+ NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */
+ NULL,
+ ucnv_getCompleteUnicodeSet,
+ NULL,
+ NULL
+};
+
+static const UConverterStaticData _IMAPStaticData={
+ sizeof(UConverterStaticData),
+ "IMAP-mailbox-name",
+ 0, /* TODO CCSID for IMAP-mailbox-name */
+ UCNV_IBM, UCNV_IMAP_MAILBOX,
+ 1, 4,
+ { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */
+ FALSE, FALSE,
+ 0,
+ 0,
+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
+};
+
+const UConverterSharedData _IMAPData=
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_IMAPStaticData, &_IMAPImpl);
+
+#endif
diff --git a/thirdparty/icu4c/common/ucnv_u8.cpp b/thirdparty/icu4c/common/ucnv_u8.cpp
new file mode 100644
index 0000000000..1ef7fa2f02
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnv_u8.cpp
@@ -0,0 +1,944 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2002-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* file name: ucnv_u8.c
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002jul01
+* created by: Markus W. Scherer
+*
+* UTF-8 converter implementation. Used to be in ucnv_utf.c.
+*
+* Also, CESU-8 implementation, see UTR 26.
+* The CESU-8 converter uses all the same functions as the
+* UTF-8 converter, with a branch for converting supplementary code points.
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/ucnv.h"
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+#include "uassert.h"
+#include "ucnv_bld.h"
+#include "ucnv_cnv.h"
+#include "cmemory.h"
+#include "ustr_imp.h"
+
+/* Prototypes --------------------------------------------------------------- */
+
+/* Keep these here to make finicky compilers happy */
+
+U_CFUNC void ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs *args,
+ UErrorCode *err);
+U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs *args,
+ UErrorCode *err);
+
+
+/* UTF-8 -------------------------------------------------------------------- */
+
+#define MAXIMUM_UCS2 0x0000FFFF
+
+static const uint32_t offsetsFromUTF8[5] = {0,
+ (uint32_t) 0x00000000, (uint32_t) 0x00003080, (uint32_t) 0x000E2080,
+ (uint32_t) 0x03C82080
+};
+
+static UBool hasCESU8Data(const UConverter *cnv)
+{
+#if UCONFIG_ONLY_HTML_CONVERSION
+ return FALSE;
+#else
+ return (UBool)(cnv->sharedData == &_CESU8Data);
+#endif
+}
+U_CDECL_BEGIN
+static void U_CALLCONV ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
+ UErrorCode * err)
+{
+ UConverter *cnv = args->converter;
+ const unsigned char *mySource = (unsigned char *) args->source;
+ UChar *myTarget = args->target;
+ const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
+ const UChar *targetLimit = args->targetLimit;
+ unsigned char *toUBytes = cnv->toUBytes;
+ UBool isCESU8 = hasCESU8Data(cnv);
+ uint32_t ch, ch2 = 0;
+ int32_t i, inBytes;
+
+ /* Restore size of current sequence */
+ if (cnv->toULength > 0 && myTarget < targetLimit)
+ {
+ inBytes = cnv->mode; /* restore # of bytes to consume */
+ i = cnv->toULength; /* restore # of bytes consumed */
+ cnv->toULength = 0;
+
+ ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a previous call*/
+ cnv->toUnicodeStatus = 0;
+ goto morebytes;
+ }
+
+
+ while (mySource < sourceLimit && myTarget < targetLimit)
+ {
+ ch = *(mySource++);
+ if (U8_IS_SINGLE(ch)) /* Simple case */
+ {
+ *(myTarget++) = (UChar) ch;
+ }
+ else
+ {
+ /* store the first char */
+ toUBytes[0] = (char)ch;
+ inBytes = U8_COUNT_BYTES_NON_ASCII(ch); /* lookup current sequence length */
+ i = 1;
+
+morebytes:
+ while (i < inBytes)
+ {
+ if (mySource < sourceLimit)
+ {
+ toUBytes[i] = (char) (ch2 = *mySource);
+ if (!icu::UTF8::isValidTrail(ch, static_cast<uint8_t>(ch2), i, inBytes) &&
+ !(isCESU8 && i == 1 && ch == 0xed && U8_IS_TRAIL(ch2)))
+ {
+ break; /* i < inBytes */
+ }
+ ch = (ch << 6) + ch2;
+ ++mySource;
+ i++;
+ }
+ else
+ {
+ /* stores a partially calculated target*/
+ cnv->toUnicodeStatus = ch;
+ cnv->mode = inBytes;
+ cnv->toULength = (int8_t) i;
+ goto donefornow;
+ }
+ }
+
+ // In CESU-8, only surrogates, not supplementary code points, are encoded directly.
+ if (i == inBytes && (!isCESU8 || i <= 3))
+ {
+ /* Remove the accumulated high bits */
+ ch -= offsetsFromUTF8[inBytes];
+
+ /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
+ if (ch <= MAXIMUM_UCS2)
+ {
+ /* fits in 16 bits */
+ *(myTarget++) = (UChar) ch;
+ }
+ else
+ {
+ /* write out the surrogates */
+ *(myTarget++) = U16_LEAD(ch);
+ ch = U16_TRAIL(ch);
+ if (myTarget < targetLimit)
+ {
+ *(myTarget++) = (UChar)ch;
+ }
+ else
+ {
+ /* Put in overflow buffer (not handled here) */
+ cnv->UCharErrorBuffer[0] = (UChar) ch;
+ cnv->UCharErrorBufferLength = 1;
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+ }
+ else
+ {
+ cnv->toULength = (int8_t)i;
+ *err = U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ }
+ }
+
+donefornow:
+ if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
+ {
+ /* End of target buffer */
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+
+ args->target = myTarget;
+ args->source = (const char *) mySource;
+}
+
+static void U_CALLCONV ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeArgs * args,
+ UErrorCode * err)
+{
+ UConverter *cnv = args->converter;
+ const unsigned char *mySource = (unsigned char *) args->source;
+ UChar *myTarget = args->target;
+ int32_t *myOffsets = args->offsets;
+ int32_t offsetNum = 0;
+ const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
+ const UChar *targetLimit = args->targetLimit;
+ unsigned char *toUBytes = cnv->toUBytes;
+ UBool isCESU8 = hasCESU8Data(cnv);
+ uint32_t ch, ch2 = 0;
+ int32_t i, inBytes;
+
+ /* Restore size of current sequence */
+ if (cnv->toULength > 0 && myTarget < targetLimit)
+ {
+ inBytes = cnv->mode; /* restore # of bytes to consume */
+ i = cnv->toULength; /* restore # of bytes consumed */
+ cnv->toULength = 0;
+
+ ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a previous call*/
+ cnv->toUnicodeStatus = 0;
+ goto morebytes;
+ }
+
+ while (mySource < sourceLimit && myTarget < targetLimit)
+ {
+ ch = *(mySource++);
+ if (U8_IS_SINGLE(ch)) /* Simple case */
+ {
+ *(myTarget++) = (UChar) ch;
+ *(myOffsets++) = offsetNum++;
+ }
+ else
+ {
+ toUBytes[0] = (char)ch;
+ inBytes = U8_COUNT_BYTES_NON_ASCII(ch);
+ i = 1;
+
+morebytes:
+ while (i < inBytes)
+ {
+ if (mySource < sourceLimit)
+ {
+ toUBytes[i] = (char) (ch2 = *mySource);
+ if (!icu::UTF8::isValidTrail(ch, static_cast<uint8_t>(ch2), i, inBytes) &&
+ !(isCESU8 && i == 1 && ch == 0xed && U8_IS_TRAIL(ch2)))
+ {
+ break; /* i < inBytes */
+ }
+ ch = (ch << 6) + ch2;
+ ++mySource;
+ i++;
+ }
+ else
+ {
+ cnv->toUnicodeStatus = ch;
+ cnv->mode = inBytes;
+ cnv->toULength = (int8_t)i;
+ goto donefornow;
+ }
+ }
+
+ // In CESU-8, only surrogates, not supplementary code points, are encoded directly.
+ if (i == inBytes && (!isCESU8 || i <= 3))
+ {
+ /* Remove the accumulated high bits */
+ ch -= offsetsFromUTF8[inBytes];
+
+ /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
+ if (ch <= MAXIMUM_UCS2)
+ {
+ /* fits in 16 bits */
+ *(myTarget++) = (UChar) ch;
+ *(myOffsets++) = offsetNum;
+ }
+ else
+ {
+ /* write out the surrogates */
+ *(myTarget++) = U16_LEAD(ch);
+ *(myOffsets++) = offsetNum;
+ ch = U16_TRAIL(ch);
+ if (myTarget < targetLimit)
+ {
+ *(myTarget++) = (UChar)ch;
+ *(myOffsets++) = offsetNum;
+ }
+ else
+ {
+ cnv->UCharErrorBuffer[0] = (UChar) ch;
+ cnv->UCharErrorBufferLength = 1;
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+ }
+ offsetNum += i;
+ }
+ else
+ {
+ cnv->toULength = (int8_t)i;
+ *err = U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ }
+ }
+
+donefornow:
+ if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
+ { /* End of target buffer */
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+
+ args->target = myTarget;
+ args->source = (const char *) mySource;
+ args->offsets = myOffsets;
+}
+U_CDECL_END
+
+U_CFUNC void U_CALLCONV ucnv_fromUnicode_UTF8 (UConverterFromUnicodeArgs * args,
+ UErrorCode * err)
+{
+ UConverter *cnv = args->converter;
+ const UChar *mySource = args->source;
+ const UChar *sourceLimit = args->sourceLimit;
+ uint8_t *myTarget = (uint8_t *) args->target;
+ const uint8_t *targetLimit = (uint8_t *) args->targetLimit;
+ uint8_t *tempPtr;
+ UChar32 ch;
+ uint8_t tempBuf[4];
+ int32_t indexToWrite;
+ UBool isNotCESU8 = !hasCESU8Data(cnv);
+
+ if (cnv->fromUChar32 && myTarget < targetLimit)
+ {
+ ch = cnv->fromUChar32;
+ cnv->fromUChar32 = 0;
+ goto lowsurrogate;
+ }
+
+ while (mySource < sourceLimit && myTarget < targetLimit)
+ {
+ ch = *(mySource++);
+
+ if (ch < 0x80) /* Single byte */
+ {
+ *(myTarget++) = (uint8_t) ch;
+ }
+ else if (ch < 0x800) /* Double byte */
+ {
+ *(myTarget++) = (uint8_t) ((ch >> 6) | 0xc0);
+ if (myTarget < targetLimit)
+ {
+ *(myTarget++) = (uint8_t) ((ch & 0x3f) | 0x80);
+ }
+ else
+ {
+ cnv->charErrorBuffer[0] = (uint8_t) ((ch & 0x3f) | 0x80);
+ cnv->charErrorBufferLength = 1;
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+ }
+ else {
+ /* Check for surrogates */
+ if(U16_IS_SURROGATE(ch) && isNotCESU8) {
+lowsurrogate:
+ if (mySource < sourceLimit) {
+ /* test both code units */
+ if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(*mySource)) {
+ /* convert and consume this supplementary code point */
+ ch=U16_GET_SUPPLEMENTARY(ch, *mySource);
+ ++mySource;
+ /* exit this condition tree */
+ }
+ else {
+ /* this is an unpaired trail or lead code unit */
+ /* callback(illegal) */
+ cnv->fromUChar32 = ch;
+ *err = U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ }
+ else {
+ /* no more input */
+ cnv->fromUChar32 = ch;
+ break;
+ }
+ }
+
+ /* Do we write the buffer directly for speed,
+ or do we have to be careful about target buffer space? */
+ tempPtr = (((targetLimit - myTarget) >= 4) ? myTarget : tempBuf);
+
+ if (ch <= MAXIMUM_UCS2) {
+ indexToWrite = 2;
+ tempPtr[0] = (uint8_t) ((ch >> 12) | 0xe0);
+ }
+ else {
+ indexToWrite = 3;
+ tempPtr[0] = (uint8_t) ((ch >> 18) | 0xf0);
+ tempPtr[1] = (uint8_t) (((ch >> 12) & 0x3f) | 0x80);
+ }
+ tempPtr[indexToWrite-1] = (uint8_t) (((ch >> 6) & 0x3f) | 0x80);
+ tempPtr[indexToWrite] = (uint8_t) ((ch & 0x3f) | 0x80);
+
+ if (tempPtr == myTarget) {
+ /* There was enough space to write the codepoint directly. */
+ myTarget += (indexToWrite + 1);
+ }
+ else {
+ /* We might run out of room soon. Write it slowly. */
+ for (; tempPtr <= (tempBuf + indexToWrite); tempPtr++) {
+ if (myTarget < targetLimit) {
+ *(myTarget++) = *tempPtr;
+ }
+ else {
+ cnv->charErrorBuffer[cnv->charErrorBufferLength++] = *tempPtr;
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+ }
+ }
+ }
+ }
+
+ if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
+ {
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+
+ args->target = (char *) myTarget;
+ args->source = mySource;
+}
+
+U_CFUNC void U_CALLCONV ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
+ UErrorCode * err)
+{
+ UConverter *cnv = args->converter;
+ const UChar *mySource = args->source;
+ int32_t *myOffsets = args->offsets;
+ const UChar *sourceLimit = args->sourceLimit;
+ uint8_t *myTarget = (uint8_t *) args->target;
+ const uint8_t *targetLimit = (uint8_t *) args->targetLimit;
+ uint8_t *tempPtr;
+ UChar32 ch;
+ int32_t offsetNum, nextSourceIndex;
+ int32_t indexToWrite;
+ uint8_t tempBuf[4];
+ UBool isNotCESU8 = !hasCESU8Data(cnv);
+
+ if (cnv->fromUChar32 && myTarget < targetLimit)
+ {
+ ch = cnv->fromUChar32;
+ cnv->fromUChar32 = 0;
+ offsetNum = -1;
+ nextSourceIndex = 0;
+ goto lowsurrogate;
+ } else {
+ offsetNum = 0;
+ }
+
+ while (mySource < sourceLimit && myTarget < targetLimit)
+ {
+ ch = *(mySource++);
+
+ if (ch < 0x80) /* Single byte */
+ {
+ *(myOffsets++) = offsetNum++;
+ *(myTarget++) = (char) ch;
+ }
+ else if (ch < 0x800) /* Double byte */
+ {
+ *(myOffsets++) = offsetNum;
+ *(myTarget++) = (uint8_t) ((ch >> 6) | 0xc0);
+ if (myTarget < targetLimit)
+ {
+ *(myOffsets++) = offsetNum++;
+ *(myTarget++) = (uint8_t) ((ch & 0x3f) | 0x80);
+ }
+ else
+ {
+ cnv->charErrorBuffer[0] = (uint8_t) ((ch & 0x3f) | 0x80);
+ cnv->charErrorBufferLength = 1;
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+ }
+ else
+ /* Check for surrogates */
+ {
+ nextSourceIndex = offsetNum + 1;
+
+ if(U16_IS_SURROGATE(ch) && isNotCESU8) {
+lowsurrogate:
+ if (mySource < sourceLimit) {
+ /* test both code units */
+ if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(*mySource)) {
+ /* convert and consume this supplementary code point */
+ ch=U16_GET_SUPPLEMENTARY(ch, *mySource);
+ ++mySource;
+ ++nextSourceIndex;
+ /* exit this condition tree */
+ }
+ else {
+ /* this is an unpaired trail or lead code unit */
+ /* callback(illegal) */
+ cnv->fromUChar32 = ch;
+ *err = U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ }
+ else {
+ /* no more input */
+ cnv->fromUChar32 = ch;
+ break;
+ }
+ }
+
+ /* Do we write the buffer directly for speed,
+ or do we have to be careful about target buffer space? */
+ tempPtr = (((targetLimit - myTarget) >= 4) ? myTarget : tempBuf);
+
+ if (ch <= MAXIMUM_UCS2) {
+ indexToWrite = 2;
+ tempPtr[0] = (uint8_t) ((ch >> 12) | 0xe0);
+ }
+ else {
+ indexToWrite = 3;
+ tempPtr[0] = (uint8_t) ((ch >> 18) | 0xf0);
+ tempPtr[1] = (uint8_t) (((ch >> 12) & 0x3f) | 0x80);
+ }
+ tempPtr[indexToWrite-1] = (uint8_t) (((ch >> 6) & 0x3f) | 0x80);
+ tempPtr[indexToWrite] = (uint8_t) ((ch & 0x3f) | 0x80);
+
+ if (tempPtr == myTarget) {
+ /* There was enough space to write the codepoint directly. */
+ myTarget += (indexToWrite + 1);
+ myOffsets[0] = offsetNum;
+ myOffsets[1] = offsetNum;
+ myOffsets[2] = offsetNum;
+ if (indexToWrite >= 3) {
+ myOffsets[3] = offsetNum;
+ }
+ myOffsets += (indexToWrite + 1);
+ }
+ else {
+ /* We might run out of room soon. Write it slowly. */
+ for (; tempPtr <= (tempBuf + indexToWrite); tempPtr++) {
+ if (myTarget < targetLimit)
+ {
+ *(myOffsets++) = offsetNum;
+ *(myTarget++) = *tempPtr;
+ }
+ else
+ {
+ cnv->charErrorBuffer[cnv->charErrorBufferLength++] = *tempPtr;
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+ }
+ }
+ offsetNum = nextSourceIndex;
+ }
+ }
+
+ if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
+ {
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+
+ args->target = (char *) myTarget;
+ args->source = mySource;
+ args->offsets = myOffsets;
+}
+
+U_CDECL_BEGIN
+static UChar32 U_CALLCONV ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
+ UErrorCode *err) {
+ UConverter *cnv;
+ const uint8_t *sourceInitial;
+ const uint8_t *source;
+ uint8_t myByte;
+ UChar32 ch;
+ int8_t i;
+
+ /* UTF-8 only here, the framework handles CESU-8 to combine surrogate pairs */
+
+ cnv = args->converter;
+ sourceInitial = source = (const uint8_t *)args->source;
+ if (source >= (const uint8_t *)args->sourceLimit)
+ {
+ /* no input */
+ *err = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0xffff;
+ }
+
+ myByte = (uint8_t)*(source++);
+ if (U8_IS_SINGLE(myByte))
+ {
+ args->source = (const char *)source;
+ return (UChar32)myByte;
+ }
+
+ uint16_t countTrailBytes = U8_COUNT_TRAIL_BYTES(myByte);
+ if (countTrailBytes == 0) {
+ cnv->toUBytes[0] = myByte;
+ cnv->toULength = 1;
+ *err = U_ILLEGAL_CHAR_FOUND;
+ args->source = (const char *)source;
+ return 0xffff;
+ }
+
+ /*The byte sequence is longer than the buffer area passed*/
+ if (((const char *)source + countTrailBytes) > args->sourceLimit)
+ {
+ /* check if all of the remaining bytes are trail bytes */
+ uint16_t extraBytesToWrite = countTrailBytes + 1;
+ cnv->toUBytes[0] = myByte;
+ i = 1;
+ *err = U_TRUNCATED_CHAR_FOUND;
+ while(source < (const uint8_t *)args->sourceLimit) {
+ uint8_t b = *source;
+ if(icu::UTF8::isValidTrail(myByte, b, i, extraBytesToWrite)) {
+ cnv->toUBytes[i++] = b;
+ ++source;
+ } else {
+ /* error even before we run out of input */
+ *err = U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ }
+ cnv->toULength = i;
+ args->source = (const char *)source;
+ return 0xffff;
+ }
+
+ ch = myByte << 6;
+ if(countTrailBytes == 2) {
+ uint8_t t1 = *source, t2;
+ if(U8_IS_VALID_LEAD3_AND_T1(myByte, t1) && U8_IS_TRAIL(t2 = *++source)) {
+ args->source = (const char *)(source + 1);
+ return (((ch + t1) << 6) + t2) - offsetsFromUTF8[3];
+ }
+ } else if(countTrailBytes == 1) {
+ uint8_t t1 = *source;
+ if(U8_IS_TRAIL(t1)) {
+ args->source = (const char *)(source + 1);
+ return (ch + t1) - offsetsFromUTF8[2];
+ }
+ } else { // countTrailBytes == 3
+ uint8_t t1 = *source, t2, t3;
+ if(U8_IS_VALID_LEAD4_AND_T1(myByte, t1) && U8_IS_TRAIL(t2 = *++source) &&
+ U8_IS_TRAIL(t3 = *++source)) {
+ args->source = (const char *)(source + 1);
+ return (((((ch + t1) << 6) + t2) << 6) + t3) - offsetsFromUTF8[4];
+ }
+ }
+ args->source = (const char *)source;
+
+ for(i = 0; sourceInitial < source; ++i) {
+ cnv->toUBytes[i] = *sourceInitial++;
+ }
+ cnv->toULength = i;
+ *err = U_ILLEGAL_CHAR_FOUND;
+ return 0xffff;
+}
+U_CDECL_END
+
+/* UTF-8-from-UTF-8 conversion functions ------------------------------------ */
+
+U_CDECL_BEGIN
+/* "Convert" UTF-8 to UTF-8: Validate and copy. Modified from ucnv_DBCSFromUTF8(). */
+static void U_CALLCONV
+ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
+ UConverterToUnicodeArgs *pToUArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *utf8;
+ const uint8_t *source, *sourceLimit;
+ uint8_t *target;
+ int32_t targetCapacity;
+ int32_t count;
+
+ int8_t oldToULength, toULength, toULimit;
+
+ UChar32 c;
+ uint8_t b, t1, t2;
+
+ /* set up the local pointers */
+ utf8=pToUArgs->converter;
+ source=(uint8_t *)pToUArgs->source;
+ sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
+ target=(uint8_t *)pFromUArgs->target;
+ targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
+
+ /* get the converter state from the UTF-8 UConverter */
+ if(utf8->toULength > 0) {
+ toULength=oldToULength=utf8->toULength;
+ toULimit=(int8_t)utf8->mode;
+ c=(UChar32)utf8->toUnicodeStatus;
+ } else {
+ toULength=oldToULength=toULimit=0;
+ c = 0;
+ }
+
+ count=(int32_t)(sourceLimit-source)+oldToULength;
+ if(count<toULimit) {
+ /*
+ * Not enough input to complete the partial character.
+ * Jump to moreBytes below - it will not output to target.
+ */
+ } else if(targetCapacity<toULimit) {
+ /*
+ * Not enough target capacity to output the partial character.
+ * Let the standard converter handle this.
+ */
+ *pErrorCode=U_USING_DEFAULT_WARNING;
+ return;
+ } else {
+ // Use a single counter for source and target, counting the minimum of
+ // the source length and the target capacity.
+ // Let the standard converter handle edge cases.
+ if(count>targetCapacity) {
+ count=targetCapacity;
+ }
+
+ // The conversion loop checks count>0 only once per character.
+ // If the buffer ends with a truncated sequence,
+ // then we reduce the count to stop before that,
+ // and collect the remaining bytes after the conversion loop.
+
+ // Do not go back into the bytes that will be read for finishing a partial
+ // sequence from the previous buffer.
+ int32_t length=count-toULength;
+ U8_TRUNCATE_IF_INCOMPLETE(source, 0, length);
+ count=toULength+length;
+ }
+
+ if(c!=0) {
+ utf8->toUnicodeStatus=0;
+ utf8->toULength=0;
+ goto moreBytes;
+ /* See note in ucnv_SBCSFromUTF8() about this goto. */
+ }
+
+ /* conversion loop */
+ while(count>0) {
+ b=*source++;
+ if(U8_IS_SINGLE(b)) {
+ /* convert ASCII */
+ *target++=b;
+ --count;
+ continue;
+ } else {
+ if(b>=0xe0) {
+ if( /* handle U+0800..U+FFFF inline */
+ b<0xf0 &&
+ U8_IS_VALID_LEAD3_AND_T1(b, t1=source[0]) &&
+ U8_IS_TRAIL(t2=source[1])
+ ) {
+ source+=2;
+ *target++=b;
+ *target++=t1;
+ *target++=t2;
+ count-=3;
+ continue;
+ }
+ } else {
+ if( /* handle U+0080..U+07FF inline */
+ b>=0xc2 &&
+ U8_IS_TRAIL(t1=*source)
+ ) {
+ ++source;
+ *target++=b;
+ *target++=t1;
+ count-=2;
+ continue;
+ }
+ }
+
+ /* handle "complicated" and error cases, and continuing partial characters */
+ oldToULength=0;
+ toULength=1;
+ toULimit=U8_COUNT_BYTES_NON_ASCII(b);
+ c=b;
+moreBytes:
+ while(toULength<toULimit) {
+ if(source<sourceLimit) {
+ b=*source;
+ if(icu::UTF8::isValidTrail(c, b, toULength, toULimit)) {
+ ++source;
+ ++toULength;
+ c=(c<<6)+b;
+ } else {
+ break; /* sequence too short, stop with toULength<toULimit */
+ }
+ } else {
+ /* store the partial UTF-8 character, compatible with the regular UTF-8 converter */
+ source-=(toULength-oldToULength);
+ while(oldToULength<toULength) {
+ utf8->toUBytes[oldToULength++]=*source++;
+ }
+ utf8->toUnicodeStatus=c;
+ utf8->toULength=toULength;
+ utf8->mode=toULimit;
+ pToUArgs->source=(char *)source;
+ pFromUArgs->target=(char *)target;
+ return;
+ }
+ }
+
+ if(toULength!=toULimit) {
+ /* error handling: illegal UTF-8 byte sequence */
+ source-=(toULength-oldToULength);
+ while(oldToULength<toULength) {
+ utf8->toUBytes[oldToULength++]=*source++;
+ }
+ utf8->toULength=toULength;
+ pToUArgs->source=(char *)source;
+ pFromUArgs->target=(char *)target;
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ return;
+ }
+
+ /* copy the legal byte sequence to the target */
+ {
+ int8_t i;
+
+ for(i=0; i<oldToULength; ++i) {
+ *target++=utf8->toUBytes[i];
+ }
+ source-=(toULength-oldToULength);
+ for(; i<toULength; ++i) {
+ *target++=*source++;
+ }
+ count-=toULength;
+ }
+ }
+ }
+ U_ASSERT(count>=0);
+
+ if(U_SUCCESS(*pErrorCode) && source<sourceLimit) {
+ if(target==(const uint8_t *)pFromUArgs->targetLimit) {
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ } else {
+ b=*source;
+ toULimit=U8_COUNT_BYTES(b);
+ if(toULimit>(sourceLimit-source)) {
+ /* collect a truncated byte sequence */
+ toULength=0;
+ c=b;
+ for(;;) {
+ utf8->toUBytes[toULength++]=b;
+ if(++source==sourceLimit) {
+ /* partial byte sequence at end of source */
+ utf8->toUnicodeStatus=c;
+ utf8->toULength=toULength;
+ utf8->mode=toULimit;
+ break;
+ } else if(!icu::UTF8::isValidTrail(c, b=*source, toULength, toULimit)) {
+ utf8->toULength=toULength;
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ c=(c<<6)+b;
+ }
+ } else {
+ /* partial-sequence target overflow: fall back to the pivoting implementation */
+ *pErrorCode=U_USING_DEFAULT_WARNING;
+ }
+ }
+ }
+
+ /* write back the updated pointers */
+ pToUArgs->source=(char *)source;
+ pFromUArgs->target=(char *)target;
+}
+
+U_CDECL_END
+
+/* UTF-8 converter data ----------------------------------------------------- */
+
+static const UConverterImpl _UTF8Impl={
+ UCNV_UTF8,
+
+ NULL,
+ NULL,
+
+ NULL,
+ NULL,
+ NULL,
+
+ ucnv_toUnicode_UTF8,
+ ucnv_toUnicode_UTF8_OFFSETS_LOGIC,
+ ucnv_fromUnicode_UTF8,
+ ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,
+ ucnv_getNextUChar_UTF8,
+
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ ucnv_getNonSurrogateUnicodeSet,
+
+ ucnv_UTF8FromUTF8,
+ ucnv_UTF8FromUTF8
+};
+
+/* The 1208 CCSID refers to any version of Unicode of UTF-8 */
+static const UConverterStaticData _UTF8StaticData={
+ sizeof(UConverterStaticData),
+ "UTF-8",
+ 1208, UCNV_IBM, UCNV_UTF8,
+ 1, 3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */
+ { 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE,
+ 0,
+ 0,
+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
+};
+
+
+const UConverterSharedData _UTF8Data=
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF8StaticData, &_UTF8Impl);
+
+/* CESU-8 converter data ---------------------------------------------------- */
+
+static const UConverterImpl _CESU8Impl={
+ UCNV_CESU8,
+
+ NULL,
+ NULL,
+
+ NULL,
+ NULL,
+ NULL,
+
+ ucnv_toUnicode_UTF8,
+ ucnv_toUnicode_UTF8_OFFSETS_LOGIC,
+ ucnv_fromUnicode_UTF8,
+ ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,
+ NULL,
+
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ ucnv_getCompleteUnicodeSet,
+
+ NULL,
+ NULL
+};
+
+static const UConverterStaticData _CESU8StaticData={
+ sizeof(UConverterStaticData),
+ "CESU-8",
+ 9400, /* CCSID for CESU-8 */
+ UCNV_UNKNOWN, UCNV_CESU8, 1, 3,
+ { 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE,
+ 0,
+ 0,
+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
+};
+
+
+const UConverterSharedData _CESU8Data=
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_CESU8StaticData, &_CESU8Impl);
+
+#endif
diff --git a/thirdparty/icu4c/common/ucnvbocu.cpp b/thirdparty/icu4c/common/ucnvbocu.cpp
new file mode 100644
index 0000000000..7c2aab5655
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnvbocu.cpp
@@ -0,0 +1,1413 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2002-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: ucnvbocu.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002mar27
+* created by: Markus W. Scherer
+*
+* This is an implementation of the Binary Ordered Compression for Unicode,
+* in its MIME-friendly form as defined in http://www.unicode.org/notes/tn6/
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
+
+#include "unicode/ucnv.h"
+#include "unicode/ucnv_cb.h"
+#include "unicode/utf16.h"
+#include "putilimp.h"
+#include "ucnv_bld.h"
+#include "ucnv_cnv.h"
+#include "uassert.h"
+
+/* BOCU-1 constants and macros ---------------------------------------------- */
+
+/*
+ * BOCU-1 encodes the code points of a Unicode string as
+ * a sequence of byte-encoded differences (slope detection),
+ * preserving lexical order.
+ *
+ * Optimize the difference-taking for runs of Unicode text within
+ * small scripts:
+ *
+ * Most small scripts are allocated within aligned 128-blocks of Unicode
+ * code points. Lexical order is preserved if the "previous code point" state
+ * is always moved into the middle of such a block.
+ *
+ * Additionally, "prev" is moved from anywhere in the Unihan and Hangul
+ * areas into the middle of those areas.
+ *
+ * C0 control codes and space are encoded with their US-ASCII bytes.
+ * "prev" is reset for C0 controls but not for space.
+ */
+
+/* initial value for "prev": middle of the ASCII range */
+#define BOCU1_ASCII_PREV 0x40
+
+/* bounding byte values for differences */
+#define BOCU1_MIN 0x21
+#define BOCU1_MIDDLE 0x90
+#define BOCU1_MAX_LEAD 0xfe
+#define BOCU1_MAX_TRAIL 0xff
+#define BOCU1_RESET 0xff
+
+/* number of lead bytes */
+#define BOCU1_COUNT (BOCU1_MAX_LEAD-BOCU1_MIN+1)
+
+/* adjust trail byte counts for the use of some C0 control byte values */
+#define BOCU1_TRAIL_CONTROLS_COUNT 20
+#define BOCU1_TRAIL_BYTE_OFFSET (BOCU1_MIN-BOCU1_TRAIL_CONTROLS_COUNT)
+
+/* number of trail bytes */
+#define BOCU1_TRAIL_COUNT ((BOCU1_MAX_TRAIL-BOCU1_MIN+1)+BOCU1_TRAIL_CONTROLS_COUNT)
+
+/*
+ * number of positive and negative single-byte codes
+ * (counting 0==BOCU1_MIDDLE among the positive ones)
+ */
+#define BOCU1_SINGLE 64
+
+/* number of lead bytes for positive and negative 2/3/4-byte sequences */
+#define BOCU1_LEAD_2 43
+#define BOCU1_LEAD_3 3
+#define BOCU1_LEAD_4 1
+
+/* The difference value range for single-byters. */
+#define BOCU1_REACH_POS_1 (BOCU1_SINGLE-1)
+#define BOCU1_REACH_NEG_1 (-BOCU1_SINGLE)
+
+/* The difference value range for double-byters. */
+#define BOCU1_REACH_POS_2 (BOCU1_REACH_POS_1+BOCU1_LEAD_2*BOCU1_TRAIL_COUNT)
+#define BOCU1_REACH_NEG_2 (BOCU1_REACH_NEG_1-BOCU1_LEAD_2*BOCU1_TRAIL_COUNT)
+
+/* The difference value range for 3-byters. */
+#define BOCU1_REACH_POS_3 \
+ (BOCU1_REACH_POS_2+BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT)
+
+#define BOCU1_REACH_NEG_3 (BOCU1_REACH_NEG_2-BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT)
+
+/* The lead byte start values. */
+#define BOCU1_START_POS_2 (BOCU1_MIDDLE+BOCU1_REACH_POS_1+1)
+#define BOCU1_START_POS_3 (BOCU1_START_POS_2+BOCU1_LEAD_2)
+#define BOCU1_START_POS_4 (BOCU1_START_POS_3+BOCU1_LEAD_3)
+ /* ==BOCU1_MAX_LEAD */
+
+#define BOCU1_START_NEG_2 (BOCU1_MIDDLE+BOCU1_REACH_NEG_1)
+#define BOCU1_START_NEG_3 (BOCU1_START_NEG_2-BOCU1_LEAD_2)
+#define BOCU1_START_NEG_4 (BOCU1_START_NEG_3-BOCU1_LEAD_3)
+ /* ==BOCU1_MIN+1 */
+
+/* The length of a byte sequence, according to the lead byte (!=BOCU1_RESET). */
+#define BOCU1_LENGTH_FROM_LEAD(lead) \
+ ((BOCU1_START_NEG_2<=(lead) && (lead)<BOCU1_START_POS_2) ? 1 : \
+ (BOCU1_START_NEG_3<=(lead) && (lead)<BOCU1_START_POS_3) ? 2 : \
+ (BOCU1_START_NEG_4<=(lead) && (lead)<BOCU1_START_POS_4) ? 3 : 4)
+
+/* The length of a byte sequence, according to its packed form. */
+#define BOCU1_LENGTH_FROM_PACKED(packed) \
+ ((uint32_t)(packed)<0x04000000 ? (packed)>>24 : 4)
+
+/*
+ * 12 commonly used C0 control codes (and space) are only used to encode
+ * themselves directly,
+ * which makes BOCU-1 MIME-usable and reasonably safe for
+ * ASCII-oriented software.
+ *
+ * These controls are
+ * 0 NUL
+ *
+ * 7 BEL
+ * 8 BS
+ *
+ * 9 TAB
+ * a LF
+ * b VT
+ * c FF
+ * d CR
+ *
+ * e SO
+ * f SI
+ *
+ * 1a SUB
+ * 1b ESC
+ *
+ * The other 20 C0 controls are also encoded directly (to preserve order)
+ * but are also used as trail bytes in difference encoding
+ * (for better compression).
+ */
+#define BOCU1_TRAIL_TO_BYTE(t) ((t)>=BOCU1_TRAIL_CONTROLS_COUNT ? (t)+BOCU1_TRAIL_BYTE_OFFSET : bocu1TrailToByte[t])
+
+/*
+ * Byte value map for control codes,
+ * from external byte values 0x00..0x20
+ * to trail byte values 0..19 (0..0x13) as used in the difference calculation.
+ * External byte values that are illegal as trail bytes are mapped to -1.
+ */
+static const int8_t
+bocu1ByteToTrail[BOCU1_MIN]={
+/* 0 1 2 3 4 5 6 7 */
+ -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, -1,
+
+/* 8 9 a b c d e f */
+ -1, -1, -1, -1, -1, -1, -1, -1,
+
+/* 10 11 12 13 14 15 16 17 */
+ 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
+
+/* 18 19 1a 1b 1c 1d 1e 1f */
+ 0x0e, 0x0f, -1, -1, 0x10, 0x11, 0x12, 0x13,
+
+/* 20 */
+ -1
+};
+
+/*
+ * Byte value map for control codes,
+ * from trail byte values 0..19 (0..0x13) as used in the difference calculation
+ * to external byte values 0x00..0x20.
+ */
+static const int8_t
+bocu1TrailToByte[BOCU1_TRAIL_CONTROLS_COUNT]={
+/* 0 1 2 3 4 5 6 7 */
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x10, 0x11,
+
+/* 8 9 a b c d e f */
+ 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
+
+/* 10 11 12 13 */
+ 0x1c, 0x1d, 0x1e, 0x1f
+};
+
+/**
+ * Integer division and modulo with negative numerators
+ * yields negative modulo results and quotients that are one more than
+ * what we need here.
+ * This macro adjust the results so that the modulo-value m is always >=0.
+ *
+ * For positive n, the if() condition is always FALSE.
+ *
+ * @param n Number to be split into quotient and rest.
+ * Will be modified to contain the quotient.
+ * @param d Divisor.
+ * @param m Output variable for the rest (modulo result).
+ */
+#define NEGDIVMOD(n, d, m) UPRV_BLOCK_MACRO_BEGIN { \
+ (m)=(n)%(d); \
+ (n)/=(d); \
+ if((m)<0) { \
+ --(n); \
+ (m)+=(d); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/* Faster versions of packDiff() for single-byte-encoded diff values. */
+
+/** Is a diff value encodable in a single byte? */
+#define DIFF_IS_SINGLE(diff) (BOCU1_REACH_NEG_1<=(diff) && (diff)<=BOCU1_REACH_POS_1)
+
+/** Encode a diff value in a single byte. */
+#define PACK_SINGLE_DIFF(diff) (BOCU1_MIDDLE+(diff))
+
+/** Is a diff value encodable in two bytes? */
+#define DIFF_IS_DOUBLE(diff) (BOCU1_REACH_NEG_2<=(diff) && (diff)<=BOCU1_REACH_POS_2)
+
+/* BOCU-1 implementation functions ------------------------------------------ */
+
+#define BOCU1_SIMPLE_PREV(c) (((c)&~0x7f)+BOCU1_ASCII_PREV)
+
+/**
+ * Compute the next "previous" value for differencing
+ * from the current code point.
+ *
+ * @param c current code point, 0x3040..0xd7a3 (rest handled by macro below)
+ * @return "previous code point" state value
+ */
+static inline int32_t
+bocu1Prev(int32_t c) {
+ /* compute new prev */
+ if(/* 0x3040<=c && */ c<=0x309f) {
+ /* Hiragana is not 128-aligned */
+ return 0x3070;
+ } else if(0x4e00<=c && c<=0x9fa5) {
+ /* CJK Unihan */
+ return 0x4e00-BOCU1_REACH_NEG_2;
+ } else if(0xac00<=c /* && c<=0xd7a3 */) {
+ /* Korean Hangul */
+ return (0xd7a3+0xac00)/2;
+ } else {
+ /* mostly small scripts */
+ return BOCU1_SIMPLE_PREV(c);
+ }
+}
+
+/** Fast version of bocu1Prev() for most scripts. */
+#define BOCU1_PREV(c) ((c)<0x3040 || (c)>0xd7a3 ? BOCU1_SIMPLE_PREV(c) : bocu1Prev(c))
+
+/*
+ * The BOCU-1 converter uses the standard setup code in ucnv.c/ucnv_bld.c.
+ * The UConverter fields are used as follows:
+ *
+ * fromUnicodeStatus encoder's prev (0 will be interpreted as BOCU1_ASCII_PREV)
+ *
+ * toUnicodeStatus decoder's prev (0 will be interpreted as BOCU1_ASCII_PREV)
+ * mode decoder's incomplete (diff<<2)|count (ignored when toULength==0)
+ */
+
+/* BOCU-1-from-Unicode conversion functions --------------------------------- */
+
+/**
+ * Encode a difference -0x10ffff..0x10ffff in 1..4 bytes
+ * and return a packed integer with them.
+ *
+ * The encoding favors small absolute differences with short encodings
+ * to compress runs of same-script characters.
+ *
+ * Optimized version with unrolled loops and fewer floating-point operations
+ * than the standard packDiff().
+ *
+ * @param diff difference value -0x10ffff..0x10ffff
+ * @return
+ * 0x010000zz for 1-byte sequence zz
+ * 0x0200yyzz for 2-byte sequence yy zz
+ * 0x03xxyyzz for 3-byte sequence xx yy zz
+ * 0xwwxxyyzz for 4-byte sequence ww xx yy zz (ww>0x03)
+ */
+static int32_t
+packDiff(int32_t diff) {
+ int32_t result, m;
+
+ U_ASSERT(!DIFF_IS_SINGLE(diff)); /* assume we won't be called where diff==BOCU1_REACH_NEG_1=-64 */
+ if(diff>=BOCU1_REACH_NEG_1) {
+ /* mostly positive differences, and single-byte negative ones */
+#if 0 /* single-byte case handled in macros, see below */
+ if(diff<=BOCU1_REACH_POS_1) {
+ /* single byte */
+ return 0x01000000|(BOCU1_MIDDLE+diff);
+ } else
+#endif
+ if(diff<=BOCU1_REACH_POS_2) {
+ /* two bytes */
+ diff-=BOCU1_REACH_POS_1+1;
+ result=0x02000000;
+
+ m=diff%BOCU1_TRAIL_COUNT;
+ diff/=BOCU1_TRAIL_COUNT;
+ result|=BOCU1_TRAIL_TO_BYTE(m);
+
+ result|=(BOCU1_START_POS_2+diff)<<8;
+ } else if(diff<=BOCU1_REACH_POS_3) {
+ /* three bytes */
+ diff-=BOCU1_REACH_POS_2+1;
+ result=0x03000000;
+
+ m=diff%BOCU1_TRAIL_COUNT;
+ diff/=BOCU1_TRAIL_COUNT;
+ result|=BOCU1_TRAIL_TO_BYTE(m);
+
+ m=diff%BOCU1_TRAIL_COUNT;
+ diff/=BOCU1_TRAIL_COUNT;
+ result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
+
+ result|=(BOCU1_START_POS_3+diff)<<16;
+ } else {
+ /* four bytes */
+ diff-=BOCU1_REACH_POS_3+1;
+
+ m=diff%BOCU1_TRAIL_COUNT;
+ diff/=BOCU1_TRAIL_COUNT;
+ result=BOCU1_TRAIL_TO_BYTE(m);
+
+ m=diff%BOCU1_TRAIL_COUNT;
+ diff/=BOCU1_TRAIL_COUNT;
+ result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
+
+ /*
+ * We know that / and % would deliver quotient 0 and rest=diff.
+ * Avoid division and modulo for performance.
+ */
+ result|=BOCU1_TRAIL_TO_BYTE(diff)<<16;
+
+ result|=((uint32_t)BOCU1_START_POS_4)<<24;
+ }
+ } else {
+ /* two- to four-byte negative differences */
+ if(diff>=BOCU1_REACH_NEG_2) {
+ /* two bytes */
+ diff-=BOCU1_REACH_NEG_1;
+ result=0x02000000;
+
+ NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
+ result|=BOCU1_TRAIL_TO_BYTE(m);
+
+ result|=(BOCU1_START_NEG_2+diff)<<8;
+ } else if(diff>=BOCU1_REACH_NEG_3) {
+ /* three bytes */
+ diff-=BOCU1_REACH_NEG_2;
+ result=0x03000000;
+
+ NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
+ result|=BOCU1_TRAIL_TO_BYTE(m);
+
+ NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
+ result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
+
+ result|=(BOCU1_START_NEG_3+diff)<<16;
+ } else {
+ /* four bytes */
+ diff-=BOCU1_REACH_NEG_3;
+
+ NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
+ result=BOCU1_TRAIL_TO_BYTE(m);
+
+ NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
+ result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
+
+ /*
+ * We know that NEGDIVMOD would deliver
+ * quotient -1 and rest=diff+BOCU1_TRAIL_COUNT.
+ * Avoid division and modulo for performance.
+ */
+ m=diff+BOCU1_TRAIL_COUNT;
+ result|=BOCU1_TRAIL_TO_BYTE(m)<<16;
+
+ result|=BOCU1_MIN<<24;
+ }
+ }
+ return result;
+}
+
+
+static void U_CALLCONV
+_Bocu1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ const UChar *source, *sourceLimit;
+ uint8_t *target;
+ int32_t targetCapacity;
+ int32_t *offsets;
+
+ int32_t prev, c, diff;
+
+ int32_t sourceIndex, nextSourceIndex;
+
+ /* set up the local pointers */
+ cnv=pArgs->converter;
+ source=pArgs->source;
+ sourceLimit=pArgs->sourceLimit;
+ target=(uint8_t *)pArgs->target;
+ targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
+ offsets=pArgs->offsets;
+
+ /* get the converter state from UConverter */
+ c=cnv->fromUChar32;
+ prev=(int32_t)cnv->fromUnicodeStatus;
+ if(prev==0) {
+ prev=BOCU1_ASCII_PREV;
+ }
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex= c==0 ? 0 : -1;
+ nextSourceIndex=0;
+
+ /* conversion loop */
+ if(c!=0 && targetCapacity>0) {
+ goto getTrail;
+ }
+
+fastSingle:
+ /* fast loop for single-byte differences */
+ /* use only one loop counter variable, targetCapacity, not also source */
+ diff=(int32_t)(sourceLimit-source);
+ if(targetCapacity>diff) {
+ targetCapacity=diff;
+ }
+ while(targetCapacity>0 && (c=*source)<0x3000) {
+ if(c<=0x20) {
+ if(c!=0x20) {
+ prev=BOCU1_ASCII_PREV;
+ }
+ *target++=(uint8_t)c;
+ *offsets++=nextSourceIndex++;
+ ++source;
+ --targetCapacity;
+ } else {
+ diff=c-prev;
+ if(DIFF_IS_SINGLE(diff)) {
+ prev=BOCU1_SIMPLE_PREV(c);
+ *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
+ *offsets++=nextSourceIndex++;
+ ++source;
+ --targetCapacity;
+ } else {
+ break;
+ }
+ }
+ }
+ /* restore real values */
+ targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target);
+ sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */
+
+ /* regular loop for all cases */
+ while(source<sourceLimit) {
+ if(targetCapacity>0) {
+ c=*source++;
+ ++nextSourceIndex;
+
+ if(c<=0x20) {
+ /*
+ * ISO C0 control & space:
+ * Encode directly for MIME compatibility,
+ * and reset state except for space, to not disrupt compression.
+ */
+ if(c!=0x20) {
+ prev=BOCU1_ASCII_PREV;
+ }
+ *target++=(uint8_t)c;
+ *offsets++=sourceIndex;
+ --targetCapacity;
+
+ sourceIndex=nextSourceIndex;
+ continue;
+ }
+
+ if(U16_IS_LEAD(c)) {
+getTrail:
+ if(source<sourceLimit) {
+ /* test the following code unit */
+ UChar trail=*source;
+ if(U16_IS_TRAIL(trail)) {
+ ++source;
+ ++nextSourceIndex;
+ c=U16_GET_SUPPLEMENTARY(c, trail);
+ }
+ } else {
+ /* no more input */
+ c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */
+ break;
+ }
+ }
+
+ /*
+ * all other Unicode code points c==U+0021..U+10ffff
+ * are encoded with the difference c-prev
+ *
+ * a new prev is computed from c,
+ * placed in the middle of a 0x80-block (for most small scripts) or
+ * in the middle of the Unihan and Hangul blocks
+ * to statistically minimize the following difference
+ */
+ diff=c-prev;
+ prev=BOCU1_PREV(c);
+ if(DIFF_IS_SINGLE(diff)) {
+ *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
+ *offsets++=sourceIndex;
+ --targetCapacity;
+ sourceIndex=nextSourceIndex;
+ if(c<0x3000) {
+ goto fastSingle;
+ }
+ } else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) {
+ /* optimize 2-byte case */
+ int32_t m;
+
+ if(diff>=0) {
+ diff-=BOCU1_REACH_POS_1+1;
+ m=diff%BOCU1_TRAIL_COUNT;
+ diff/=BOCU1_TRAIL_COUNT;
+ diff+=BOCU1_START_POS_2;
+ } else {
+ diff-=BOCU1_REACH_NEG_1;
+ NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
+ diff+=BOCU1_START_NEG_2;
+ }
+ *target++=(uint8_t)diff;
+ *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m);
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex;
+ targetCapacity-=2;
+ sourceIndex=nextSourceIndex;
+ } else {
+ int32_t length; /* will be 2..4 */
+
+ diff=packDiff(diff);
+ length=BOCU1_LENGTH_FROM_PACKED(diff);
+
+ /* write the output character bytes from diff and length */
+ /* from the first if in the loop we know that targetCapacity>0 */
+ if(length<=targetCapacity) {
+ switch(length) {
+ /* each branch falls through to the next one */
+ case 4:
+ *target++=(uint8_t)(diff>>24);
+ *offsets++=sourceIndex;
+ U_FALLTHROUGH;
+ case 3:
+ *target++=(uint8_t)(diff>>16);
+ *offsets++=sourceIndex;
+ U_FALLTHROUGH;
+ case 2:
+ *target++=(uint8_t)(diff>>8);
+ *offsets++=sourceIndex;
+ /* case 1: handled above */
+ *target++=(uint8_t)diff;
+ *offsets++=sourceIndex;
+ U_FALLTHROUGH;
+ default:
+ /* will never occur */
+ break;
+ }
+ targetCapacity-=length;
+ sourceIndex=nextSourceIndex;
+ } else {
+ uint8_t *charErrorBuffer;
+
+ /*
+ * We actually do this backwards here:
+ * In order to save an intermediate variable, we output
+ * first to the overflow buffer what does not fit into the
+ * regular target.
+ */
+ /* we know that 1<=targetCapacity<length<=4 */
+ length-=targetCapacity;
+ charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
+ switch(length) {
+ /* each branch falls through to the next one */
+ case 3:
+ *charErrorBuffer++=(uint8_t)(diff>>16);
+ U_FALLTHROUGH;
+ case 2:
+ *charErrorBuffer++=(uint8_t)(diff>>8);
+ U_FALLTHROUGH;
+ case 1:
+ *charErrorBuffer=(uint8_t)diff;
+ U_FALLTHROUGH;
+ default:
+ /* will never occur */
+ break;
+ }
+ cnv->charErrorBufferLength=(int8_t)length;
+
+ /* now output what fits into the regular target */
+ diff>>=8*length; /* length was reduced by targetCapacity */
+ switch(targetCapacity) {
+ /* each branch falls through to the next one */
+ case 3:
+ *target++=(uint8_t)(diff>>16);
+ *offsets++=sourceIndex;
+ U_FALLTHROUGH;
+ case 2:
+ *target++=(uint8_t)(diff>>8);
+ *offsets++=sourceIndex;
+ U_FALLTHROUGH;
+ case 1:
+ *target++=(uint8_t)diff;
+ *offsets++=sourceIndex;
+ U_FALLTHROUGH;
+ default:
+ /* will never occur */
+ break;
+ }
+
+ /* target overflow */
+ targetCapacity=0;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+ } else {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+
+ /* set the converter state back into UConverter */
+ cnv->fromUChar32= c<0 ? -c : 0;
+ cnv->fromUnicodeStatus=(uint32_t)prev;
+
+ /* write back the updated pointers */
+ pArgs->source=source;
+ pArgs->target=(char *)target;
+ pArgs->offsets=offsets;
+}
+
+/*
+ * Identical to _Bocu1FromUnicodeWithOffsets but without offset handling.
+ * If a change is made in the original function, then either
+ * change this function the same way or
+ * re-copy the original function and remove the variables
+ * offsets, sourceIndex, and nextSourceIndex.
+ */
+static void U_CALLCONV
+_Bocu1FromUnicode(UConverterFromUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ const UChar *source, *sourceLimit;
+ uint8_t *target;
+ int32_t targetCapacity;
+
+ int32_t prev, c, diff;
+
+ /* set up the local pointers */
+ cnv=pArgs->converter;
+ source=pArgs->source;
+ sourceLimit=pArgs->sourceLimit;
+ target=(uint8_t *)pArgs->target;
+ targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
+
+ /* get the converter state from UConverter */
+ c=cnv->fromUChar32;
+ prev=(int32_t)cnv->fromUnicodeStatus;
+ if(prev==0) {
+ prev=BOCU1_ASCII_PREV;
+ }
+
+ /* conversion loop */
+ if(c!=0 && targetCapacity>0) {
+ goto getTrail;
+ }
+
+fastSingle:
+ /* fast loop for single-byte differences */
+ /* use only one loop counter variable, targetCapacity, not also source */
+ diff=(int32_t)(sourceLimit-source);
+ if(targetCapacity>diff) {
+ targetCapacity=diff;
+ }
+ while(targetCapacity>0 && (c=*source)<0x3000) {
+ if(c<=0x20) {
+ if(c!=0x20) {
+ prev=BOCU1_ASCII_PREV;
+ }
+ *target++=(uint8_t)c;
+ } else {
+ diff=c-prev;
+ if(DIFF_IS_SINGLE(diff)) {
+ prev=BOCU1_SIMPLE_PREV(c);
+ *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
+ } else {
+ break;
+ }
+ }
+ ++source;
+ --targetCapacity;
+ }
+ /* restore real values */
+ targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target);
+
+ /* regular loop for all cases */
+ while(source<sourceLimit) {
+ if(targetCapacity>0) {
+ c=*source++;
+
+ if(c<=0x20) {
+ /*
+ * ISO C0 control & space:
+ * Encode directly for MIME compatibility,
+ * and reset state except for space, to not disrupt compression.
+ */
+ if(c!=0x20) {
+ prev=BOCU1_ASCII_PREV;
+ }
+ *target++=(uint8_t)c;
+ --targetCapacity;
+ continue;
+ }
+
+ if(U16_IS_LEAD(c)) {
+getTrail:
+ if(source<sourceLimit) {
+ /* test the following code unit */
+ UChar trail=*source;
+ if(U16_IS_TRAIL(trail)) {
+ ++source;
+ c=U16_GET_SUPPLEMENTARY(c, trail);
+ }
+ } else {
+ /* no more input */
+ c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */
+ break;
+ }
+ }
+
+ /*
+ * all other Unicode code points c==U+0021..U+10ffff
+ * are encoded with the difference c-prev
+ *
+ * a new prev is computed from c,
+ * placed in the middle of a 0x80-block (for most small scripts) or
+ * in the middle of the Unihan and Hangul blocks
+ * to statistically minimize the following difference
+ */
+ diff=c-prev;
+ prev=BOCU1_PREV(c);
+ if(DIFF_IS_SINGLE(diff)) {
+ *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
+ --targetCapacity;
+ if(c<0x3000) {
+ goto fastSingle;
+ }
+ } else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) {
+ /* optimize 2-byte case */
+ int32_t m;
+
+ if(diff>=0) {
+ diff-=BOCU1_REACH_POS_1+1;
+ m=diff%BOCU1_TRAIL_COUNT;
+ diff/=BOCU1_TRAIL_COUNT;
+ diff+=BOCU1_START_POS_2;
+ } else {
+ diff-=BOCU1_REACH_NEG_1;
+ NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
+ diff+=BOCU1_START_NEG_2;
+ }
+ *target++=(uint8_t)diff;
+ *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m);
+ targetCapacity-=2;
+ } else {
+ int32_t length; /* will be 2..4 */
+
+ diff=packDiff(diff);
+ length=BOCU1_LENGTH_FROM_PACKED(diff);
+
+ /* write the output character bytes from diff and length */
+ /* from the first if in the loop we know that targetCapacity>0 */
+ if(length<=targetCapacity) {
+ switch(length) {
+ /* each branch falls through to the next one */
+ case 4:
+ *target++=(uint8_t)(diff>>24);
+ U_FALLTHROUGH;
+ case 3:
+ *target++=(uint8_t)(diff>>16);
+ /* case 2: handled above */
+ *target++=(uint8_t)(diff>>8);
+ /* case 1: handled above */
+ *target++=(uint8_t)diff;
+ U_FALLTHROUGH;
+ default:
+ /* will never occur */
+ break;
+ }
+ targetCapacity-=length;
+ } else {
+ uint8_t *charErrorBuffer;
+
+ /*
+ * We actually do this backwards here:
+ * In order to save an intermediate variable, we output
+ * first to the overflow buffer what does not fit into the
+ * regular target.
+ */
+ /* we know that 1<=targetCapacity<length<=4 */
+ length-=targetCapacity;
+ charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
+ switch(length) {
+ /* each branch falls through to the next one */
+ case 3:
+ *charErrorBuffer++=(uint8_t)(diff>>16);
+ U_FALLTHROUGH;
+ case 2:
+ *charErrorBuffer++=(uint8_t)(diff>>8);
+ U_FALLTHROUGH;
+ case 1:
+ *charErrorBuffer=(uint8_t)diff;
+ U_FALLTHROUGH;
+ default:
+ /* will never occur */
+ break;
+ }
+ cnv->charErrorBufferLength=(int8_t)length;
+
+ /* now output what fits into the regular target */
+ diff>>=8*length; /* length was reduced by targetCapacity */
+ switch(targetCapacity) {
+ /* each branch falls through to the next one */
+ case 3:
+ *target++=(uint8_t)(diff>>16);
+ U_FALLTHROUGH;
+ case 2:
+ *target++=(uint8_t)(diff>>8);
+ U_FALLTHROUGH;
+ case 1:
+ *target++=(uint8_t)diff;
+ U_FALLTHROUGH;
+ default:
+ /* will never occur */
+ break;
+ }
+
+ /* target overflow */
+ targetCapacity=0;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+ } else {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+
+ /* set the converter state back into UConverter */
+ cnv->fromUChar32= c<0 ? -c : 0;
+ cnv->fromUnicodeStatus=(uint32_t)prev;
+
+ /* write back the updated pointers */
+ pArgs->source=source;
+ pArgs->target=(char *)target;
+}
+
+/* BOCU-1-to-Unicode conversion functions ----------------------------------- */
+
+/**
+ * Function for BOCU-1 decoder; handles multi-byte lead bytes.
+ *
+ * @param b lead byte;
+ * BOCU1_MIN<=b<BOCU1_START_NEG_2 or BOCU1_START_POS_2<=b<BOCU1_MAX_LEAD
+ * @return (diff<<2)|count
+ */
+static inline int32_t
+decodeBocu1LeadByte(int32_t b) {
+ int32_t diff, count;
+
+ if(b>=BOCU1_START_NEG_2) {
+ /* positive difference */
+ if(b<BOCU1_START_POS_3) {
+ /* two bytes */
+ diff=((int32_t)b-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
+ count=1;
+ } else if(b<BOCU1_START_POS_4) {
+ /* three bytes */
+ diff=((int32_t)b-BOCU1_START_POS_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_2+1;
+ count=2;
+ } else {
+ /* four bytes */
+ diff=BOCU1_REACH_POS_3+1;
+ count=3;
+ }
+ } else {
+ /* negative difference */
+ if(b>=BOCU1_START_NEG_3) {
+ /* two bytes */
+ diff=((int32_t)b-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
+ count=1;
+ } else if(b>BOCU1_MIN) {
+ /* three bytes */
+ diff=((int32_t)b-BOCU1_START_NEG_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_2;
+ count=2;
+ } else {
+ /* four bytes */
+ diff=-BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_3;
+ count=3;
+ }
+ }
+
+ /* return the state for decoding the trail byte(s) */
+ return (diff<<2)|count;
+}
+
+/**
+ * Function for BOCU-1 decoder; handles multi-byte trail bytes.
+ *
+ * @param count number of remaining trail bytes including this one
+ * @param b trail byte
+ * @return new delta for diff including b - <0 indicates an error
+ *
+ * @see decodeBocu1
+ */
+static inline int32_t
+decodeBocu1TrailByte(int32_t count, int32_t b) {
+ if(b<=0x20) {
+ /* skip some C0 controls and make the trail byte range contiguous */
+ b=bocu1ByteToTrail[b];
+ /* b<0 for an illegal trail byte value will result in return<0 below */
+#if BOCU1_MAX_TRAIL<0xff
+ } else if(b>BOCU1_MAX_TRAIL) {
+ return -99;
+#endif
+ } else {
+ b-=BOCU1_TRAIL_BYTE_OFFSET;
+ }
+
+ /* add trail byte into difference and decrement count */
+ if(count==1) {
+ return b;
+ } else if(count==2) {
+ return b*BOCU1_TRAIL_COUNT;
+ } else /* count==3 */ {
+ return b*(BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT);
+ }
+}
+
+static void U_CALLCONV
+_Bocu1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ const uint8_t *source, *sourceLimit;
+ UChar *target;
+ const UChar *targetLimit;
+ int32_t *offsets;
+
+ int32_t prev, count, diff, c;
+
+ int8_t byteIndex;
+ uint8_t *bytes;
+
+ int32_t sourceIndex, nextSourceIndex;
+
+ /* set up the local pointers */
+ cnv=pArgs->converter;
+ source=(const uint8_t *)pArgs->source;
+ sourceLimit=(const uint8_t *)pArgs->sourceLimit;
+ target=pArgs->target;
+ targetLimit=pArgs->targetLimit;
+ offsets=pArgs->offsets;
+
+ /* get the converter state from UConverter */
+ prev=(int32_t)cnv->toUnicodeStatus;
+ if(prev==0) {
+ prev=BOCU1_ASCII_PREV;
+ }
+ diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */
+ count=diff&3;
+ diff>>=2;
+
+ byteIndex=cnv->toULength;
+ bytes=cnv->toUBytes;
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex=byteIndex==0 ? 0 : -1;
+ nextSourceIndex=0;
+
+ /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */
+ if(count>0 && byteIndex>0 && target<targetLimit) {
+ goto getTrail;
+ }
+
+fastSingle:
+ /* fast loop for single-byte differences */
+ /* use count as the only loop counter variable */
+ diff=(int32_t)(sourceLimit-source);
+ count=(int32_t)(pArgs->targetLimit-target);
+ if(count>diff) {
+ count=diff;
+ }
+ while(count>0) {
+ if(BOCU1_START_NEG_2<=(c=*source) && c<BOCU1_START_POS_2) {
+ c=prev+(c-BOCU1_MIDDLE);
+ if(c<0x3000) {
+ *target++=(UChar)c;
+ *offsets++=nextSourceIndex++;
+ prev=BOCU1_SIMPLE_PREV(c);
+ } else {
+ break;
+ }
+ } else if(c<=0x20) {
+ if(c!=0x20) {
+ prev=BOCU1_ASCII_PREV;
+ }
+ *target++=(UChar)c;
+ *offsets++=nextSourceIndex++;
+ } else {
+ break;
+ }
+ ++source;
+ --count;
+ }
+ sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */
+
+ /* decode a sequence of single and lead bytes */
+ while(source<sourceLimit) {
+ if(target>=targetLimit) {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+
+ ++nextSourceIndex;
+ c=*source++;
+ if(BOCU1_START_NEG_2<=c && c<BOCU1_START_POS_2) {
+ /* Write a code point directly from a single-byte difference. */
+ c=prev+(c-BOCU1_MIDDLE);
+ if(c<0x3000) {
+ *target++=(UChar)c;
+ *offsets++=sourceIndex;
+ prev=BOCU1_SIMPLE_PREV(c);
+ sourceIndex=nextSourceIndex;
+ goto fastSingle;
+ }
+ } else if(c<=0x20) {
+ /*
+ * Direct-encoded C0 control code or space.
+ * Reset prev for C0 control codes but not for space.
+ */
+ if(c!=0x20) {
+ prev=BOCU1_ASCII_PREV;
+ }
+ *target++=(UChar)c;
+ *offsets++=sourceIndex;
+ sourceIndex=nextSourceIndex;
+ continue;
+ } else if(BOCU1_START_NEG_3<=c && c<BOCU1_START_POS_3 && source<sourceLimit) {
+ /* Optimize two-byte case. */
+ if(c>=BOCU1_MIDDLE) {
+ diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
+ } else {
+ diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
+ }
+
+ /* trail byte */
+ ++nextSourceIndex;
+ c=decodeBocu1TrailByte(1, *source++);
+ if(c<0 || (uint32_t)(c=prev+diff+c)>0x10ffff) {
+ bytes[0]=source[-2];
+ bytes[1]=source[-1];
+ byteIndex=2;
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ } else if(c==BOCU1_RESET) {
+ /* only reset the state, no code point */
+ prev=BOCU1_ASCII_PREV;
+ sourceIndex=nextSourceIndex;
+ continue;
+ } else {
+ /*
+ * For multi-byte difference lead bytes, set the decoder state
+ * with the partial difference value from the lead byte and
+ * with the number of trail bytes.
+ */
+ bytes[0]=(uint8_t)c;
+ byteIndex=1;
+
+ diff=decodeBocu1LeadByte(c);
+ count=diff&3;
+ diff>>=2;
+getTrail:
+ for(;;) {
+ if(source>=sourceLimit) {
+ goto endloop;
+ }
+ ++nextSourceIndex;
+ c=bytes[byteIndex++]=*source++;
+
+ /* trail byte in any position */
+ c=decodeBocu1TrailByte(count, c);
+ if(c<0) {
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ goto endloop;
+ }
+
+ diff+=c;
+ if(--count==0) {
+ /* final trail byte, deliver a code point */
+ byteIndex=0;
+ c=prev+diff;
+ if((uint32_t)c>0x10ffff) {
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ goto endloop;
+ }
+ break;
+ }
+ }
+ }
+
+ /* calculate the next prev and output c */
+ prev=BOCU1_PREV(c);
+ if(c<=0xffff) {
+ *target++=(UChar)c;
+ *offsets++=sourceIndex;
+ } else {
+ /* output surrogate pair */
+ *target++=U16_LEAD(c);
+ if(target<targetLimit) {
+ *target++=U16_TRAIL(c);
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex;
+ } else {
+ /* target overflow */
+ *offsets++=sourceIndex;
+ cnv->UCharErrorBuffer[0]=U16_TRAIL(c);
+ cnv->UCharErrorBufferLength=1;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+ sourceIndex=nextSourceIndex;
+ }
+endloop:
+
+ if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) {
+ /* set the converter state in UConverter to deal with the next character */
+ cnv->toUnicodeStatus=BOCU1_ASCII_PREV;
+ cnv->mode=0;
+ } else {
+ /* set the converter state back into UConverter */
+ cnv->toUnicodeStatus=(uint32_t)prev;
+ cnv->mode=(diff<<2)|count;
+ }
+ cnv->toULength=byteIndex;
+
+ /* write back the updated pointers */
+ pArgs->source=(const char *)source;
+ pArgs->target=target;
+ pArgs->offsets=offsets;
+ return;
+}
+
+/*
+ * Identical to _Bocu1ToUnicodeWithOffsets but without offset handling.
+ * If a change is made in the original function, then either
+ * change this function the same way or
+ * re-copy the original function and remove the variables
+ * offsets, sourceIndex, and nextSourceIndex.
+ */
+static void U_CALLCONV
+_Bocu1ToUnicode(UConverterToUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ const uint8_t *source, *sourceLimit;
+ UChar *target;
+ const UChar *targetLimit;
+
+ int32_t prev, count, diff, c;
+
+ int8_t byteIndex;
+ uint8_t *bytes;
+
+ /* set up the local pointers */
+ cnv=pArgs->converter;
+ source=(const uint8_t *)pArgs->source;
+ sourceLimit=(const uint8_t *)pArgs->sourceLimit;
+ target=pArgs->target;
+ targetLimit=pArgs->targetLimit;
+
+ /* get the converter state from UConverter */
+ prev=(int32_t)cnv->toUnicodeStatus;
+ if(prev==0) {
+ prev=BOCU1_ASCII_PREV;
+ }
+ diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */
+ count=diff&3;
+ diff>>=2;
+
+ byteIndex=cnv->toULength;
+ bytes=cnv->toUBytes;
+
+ /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */
+ if(count>0 && byteIndex>0 && target<targetLimit) {
+ goto getTrail;
+ }
+
+fastSingle:
+ /* fast loop for single-byte differences */
+ /* use count as the only loop counter variable */
+ diff=(int32_t)(sourceLimit-source);
+ count=(int32_t)(pArgs->targetLimit-target);
+ if(count>diff) {
+ count=diff;
+ }
+ while(count>0) {
+ if(BOCU1_START_NEG_2<=(c=*source) && c<BOCU1_START_POS_2) {
+ c=prev+(c-BOCU1_MIDDLE);
+ if(c<0x3000) {
+ *target++=(UChar)c;
+ prev=BOCU1_SIMPLE_PREV(c);
+ } else {
+ break;
+ }
+ } else if(c<=0x20) {
+ if(c!=0x20) {
+ prev=BOCU1_ASCII_PREV;
+ }
+ *target++=(UChar)c;
+ } else {
+ break;
+ }
+ ++source;
+ --count;
+ }
+
+ /* decode a sequence of single and lead bytes */
+ while(source<sourceLimit) {
+ if(target>=targetLimit) {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+
+ c=*source++;
+ if(BOCU1_START_NEG_2<=c && c<BOCU1_START_POS_2) {
+ /* Write a code point directly from a single-byte difference. */
+ c=prev+(c-BOCU1_MIDDLE);
+ if(c<0x3000) {
+ *target++=(UChar)c;
+ prev=BOCU1_SIMPLE_PREV(c);
+ goto fastSingle;
+ }
+ } else if(c<=0x20) {
+ /*
+ * Direct-encoded C0 control code or space.
+ * Reset prev for C0 control codes but not for space.
+ */
+ if(c!=0x20) {
+ prev=BOCU1_ASCII_PREV;
+ }
+ *target++=(UChar)c;
+ continue;
+ } else if(BOCU1_START_NEG_3<=c && c<BOCU1_START_POS_3 && source<sourceLimit) {
+ /* Optimize two-byte case. */
+ if(c>=BOCU1_MIDDLE) {
+ diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
+ } else {
+ diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
+ }
+
+ /* trail byte */
+ c=decodeBocu1TrailByte(1, *source++);
+ if(c<0 || (uint32_t)(c=prev+diff+c)>0x10ffff) {
+ bytes[0]=source[-2];
+ bytes[1]=source[-1];
+ byteIndex=2;
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ } else if(c==BOCU1_RESET) {
+ /* only reset the state, no code point */
+ prev=BOCU1_ASCII_PREV;
+ continue;
+ } else {
+ /*
+ * For multi-byte difference lead bytes, set the decoder state
+ * with the partial difference value from the lead byte and
+ * with the number of trail bytes.
+ */
+ bytes[0]=(uint8_t)c;
+ byteIndex=1;
+
+ diff=decodeBocu1LeadByte(c);
+ count=diff&3;
+ diff>>=2;
+getTrail:
+ for(;;) {
+ if(source>=sourceLimit) {
+ goto endloop;
+ }
+ c=bytes[byteIndex++]=*source++;
+
+ /* trail byte in any position */
+ c=decodeBocu1TrailByte(count, c);
+ if(c<0) {
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ goto endloop;
+ }
+
+ diff+=c;
+ if(--count==0) {
+ /* final trail byte, deliver a code point */
+ byteIndex=0;
+ c=prev+diff;
+ if((uint32_t)c>0x10ffff) {
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ goto endloop;
+ }
+ break;
+ }
+ }
+ }
+
+ /* calculate the next prev and output c */
+ prev=BOCU1_PREV(c);
+ if(c<=0xffff) {
+ *target++=(UChar)c;
+ } else {
+ /* output surrogate pair */
+ *target++=U16_LEAD(c);
+ if(target<targetLimit) {
+ *target++=U16_TRAIL(c);
+ } else {
+ /* target overflow */
+ cnv->UCharErrorBuffer[0]=U16_TRAIL(c);
+ cnv->UCharErrorBufferLength=1;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+ }
+endloop:
+
+ if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) {
+ /* set the converter state in UConverter to deal with the next character */
+ cnv->toUnicodeStatus=BOCU1_ASCII_PREV;
+ cnv->mode=0;
+ } else {
+ /* set the converter state back into UConverter */
+ cnv->toUnicodeStatus=(uint32_t)prev;
+ cnv->mode=(diff<<2)|count;
+ }
+ cnv->toULength=byteIndex;
+
+ /* write back the updated pointers */
+ pArgs->source=(const char *)source;
+ pArgs->target=target;
+ return;
+}
+
+/* miscellaneous ------------------------------------------------------------ */
+
+static const UConverterImpl _Bocu1Impl={
+ UCNV_BOCU1,
+
+ NULL,
+ NULL,
+
+ NULL,
+ NULL,
+ NULL,
+
+ _Bocu1ToUnicode,
+ _Bocu1ToUnicodeWithOffsets,
+ _Bocu1FromUnicode,
+ _Bocu1FromUnicodeWithOffsets,
+ NULL,
+
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ ucnv_getCompleteUnicodeSet,
+
+ NULL,
+ NULL
+};
+
+static const UConverterStaticData _Bocu1StaticData={
+ sizeof(UConverterStaticData),
+ "BOCU-1",
+ 1214, /* CCSID for BOCU-1 */
+ UCNV_IBM, UCNV_BOCU1,
+ 1, 4, /* one UChar generates at least 1 byte and at most 4 bytes */
+ { 0x1a, 0, 0, 0 }, 1, /* BOCU-1 never needs to write a subchar */
+ FALSE, FALSE,
+ 0,
+ 0,
+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
+};
+
+const UConverterSharedData _Bocu1Data=
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Bocu1StaticData, &_Bocu1Impl);
+
+#endif
diff --git a/thirdparty/icu4c/common/ucnvdisp.cpp b/thirdparty/icu4c/common/ucnvdisp.cpp
new file mode 100644
index 0000000000..ac86b98597
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnvdisp.cpp
@@ -0,0 +1,88 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1998-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* ucnvdisp.c:
+* Implements APIs for the ICU's codeset conversion library display names.
+*
+* Modification History:
+*
+* Date Name Description
+* 04/04/99 helena Fixed internal header inclusion.
+* 05/09/00 helena Added implementation to handle fallback mappings.
+* 06/20/2000 helena OS/400 port changes; mostly typecast.
+* 09/08/2004 grhoten split from ucnv.c
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/ustring.h"
+#include "unicode/ures.h"
+#include "unicode/ucnv.h"
+#include "cstring.h"
+#include "ustr_imp.h"
+#include "ucnv_imp.h"
+#include "putilimp.h"
+
+U_CAPI int32_t U_EXPORT2
+ucnv_getDisplayName(const UConverter *cnv,
+ const char *displayLocale,
+ UChar *displayName, int32_t displayNameCapacity,
+ UErrorCode *pErrorCode) {
+ UResourceBundle *rb;
+ const UChar *name;
+ int32_t length;
+ UErrorCode localStatus = U_ZERO_ERROR;
+
+ /* check arguments */
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ if(cnv==NULL || displayNameCapacity<0 || (displayNameCapacity>0 && displayName==NULL)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* open the resource bundle and get the display name string */
+ rb=ures_open(NULL, displayLocale, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ /* use the internal name as the key */
+ name=ures_getStringByKey(rb, cnv->sharedData->staticData->name, &length, &localStatus);
+ ures_close(rb);
+
+ if(U_SUCCESS(localStatus)) {
+ /* copy the string */
+ if (*pErrorCode == U_ZERO_ERROR) {
+ *pErrorCode = localStatus;
+ }
+ u_memcpy(displayName, name, uprv_min(length, displayNameCapacity)*U_SIZEOF_UCHAR);
+ } else {
+ /* convert the internal name into a Unicode string */
+ length=(int32_t)uprv_strlen(cnv->sharedData->staticData->name);
+ u_charsToUChars(cnv->sharedData->staticData->name, displayName, uprv_min(length, displayNameCapacity));
+ }
+ return u_terminateUChars(displayName, displayNameCapacity, length, pErrorCode);
+}
+
+#endif
+
+/*
+ * Hey, Emacs, please set the following:
+ *
+ * Local Variables:
+ * indent-tabs-mode: nil
+ * End:
+ *
+ */
diff --git a/thirdparty/icu4c/common/ucnvhz.cpp b/thirdparty/icu4c/common/ucnvhz.cpp
new file mode 100644
index 0000000000..6b2f5faaf0
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnvhz.cpp
@@ -0,0 +1,625 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2000-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* file name: ucnvhz.c
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2000oct16
+* created by: Ram Viswanadha
+* 10/31/2000 Ram Implemented offsets logic function
+*
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
+
+#include "cmemory.h"
+#include "unicode/ucnv.h"
+#include "unicode/ucnv_cb.h"
+#include "unicode/uset.h"
+#include "unicode/utf16.h"
+#include "ucnv_bld.h"
+#include "ucnv_cnv.h"
+#include "ucnv_imp.h"
+
+#define UCNV_TILDE 0x7E /* ~ */
+#define UCNV_OPEN_BRACE 0x7B /* { */
+#define UCNV_CLOSE_BRACE 0x7D /* } */
+#define SB_ESCAPE "\x7E\x7D"
+#define DB_ESCAPE "\x7E\x7B"
+#define TILDE_ESCAPE "\x7E\x7E"
+#define ESC_LEN 2
+
+
+#define CONCAT_ESCAPE_MACRO(args, targetIndex,targetLength,strToAppend, err, len,sourceIndex) UPRV_BLOCK_MACRO_BEGIN { \
+ while(len-->0){ \
+ if(targetIndex < targetLength){ \
+ args->target[targetIndex] = (unsigned char) *strToAppend; \
+ if(args->offsets!=NULL){ \
+ *(offsets++) = sourceIndex-1; \
+ } \
+ targetIndex++; \
+ } \
+ else{ \
+ args->converter->charErrorBuffer[(int)args->converter->charErrorBufferLength++] = (unsigned char) *strToAppend; \
+ *err =U_BUFFER_OVERFLOW_ERROR; \
+ } \
+ strToAppend++; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+
+typedef struct{
+ UConverter* gbConverter;
+ int32_t targetIndex;
+ int32_t sourceIndex;
+ UBool isEscapeAppended;
+ UBool isStateDBCS;
+ UBool isTargetUCharDBCS;
+ UBool isEmptySegment;
+}UConverterDataHZ;
+
+
+U_CDECL_BEGIN
+static void U_CALLCONV
+_HZOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
+ UConverter *gbConverter;
+ if(pArgs->onlyTestIsLoadable) {
+ ucnv_canCreateConverter("GBK", errorCode); /* errorCode carries result */
+ return;
+ }
+ gbConverter = ucnv_open("GBK", errorCode);
+ if(U_FAILURE(*errorCode)) {
+ return;
+ }
+ cnv->toUnicodeStatus = 0;
+ cnv->fromUnicodeStatus= 0;
+ cnv->mode=0;
+ cnv->fromUChar32=0x0000;
+ cnv->extraInfo = uprv_calloc(1, sizeof(UConverterDataHZ));
+ if(cnv->extraInfo != NULL){
+ ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = gbConverter;
+ }
+ else {
+ ucnv_close(gbConverter);
+ *errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+}
+
+static void U_CALLCONV
+_HZClose(UConverter *cnv){
+ if(cnv->extraInfo != NULL) {
+ ucnv_close (((UConverterDataHZ *) (cnv->extraInfo))->gbConverter);
+ if(!cnv->isExtraLocal) {
+ uprv_free(cnv->extraInfo);
+ }
+ cnv->extraInfo = NULL;
+ }
+}
+
+static void U_CALLCONV
+_HZReset(UConverter *cnv, UConverterResetChoice choice){
+ if(choice<=UCNV_RESET_TO_UNICODE) {
+ cnv->toUnicodeStatus = 0;
+ cnv->mode=0;
+ if(cnv->extraInfo != NULL){
+ ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE;
+ ((UConverterDataHZ*)cnv->extraInfo)->isEmptySegment = FALSE;
+ }
+ }
+ if(choice!=UCNV_RESET_TO_UNICODE) {
+ cnv->fromUnicodeStatus= 0;
+ cnv->fromUChar32=0x0000;
+ if(cnv->extraInfo != NULL){
+ ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE;
+ ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0;
+ ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0;
+ ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE;
+ }
+ }
+}
+
+/**************************************HZ Encoding*************************************************
+* Rules for HZ encoding
+*
+* In ASCII mode, a byte is interpreted as an ASCII character, unless a
+* '~' is encountered. The character '~' is an escape character. By
+* convention, it must be immediately followed ONLY by '~', '{' or '\n'
+* (<LF>), with the following special meaning.
+
+* 1. The escape sequence '~~' is interpreted as a '~'.
+* 2. The escape-to-GB sequence '~{' switches the mode from ASCII to GB.
+* 3. The escape sequence '~\n' is a line-continuation marker to be
+* consumed with no output produced.
+* In GB mode, characters are interpreted two bytes at a time as (pure)
+* GB codes until the escape-from-GB code '~}' is read. This code
+* switches the mode from GB back to ASCII. (Note that the escape-
+* from-GB code '~}' ($7E7D) is outside the defined GB range.)
+*
+* Source: RFC 1842
+*
+* Note that the formal syntax in RFC 1842 is invalid. I assume that the
+* intended definition of single-byte-segment is as follows (pedberg):
+* single-byte-segment = single-byte-seq 1*single-byte-char
+*/
+
+
+static void U_CALLCONV
+UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
+ UErrorCode* err){
+ char tempBuf[2];
+ const char *mySource = ( char *) args->source;
+ UChar *myTarget = args->target;
+ const char *mySourceLimit = args->sourceLimit;
+ UChar32 targetUniChar = 0x0000;
+ int32_t mySourceChar = 0x0000;
+ UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo);
+ tempBuf[0]=0;
+ tempBuf[1]=0;
+
+ /* Calling code already handles this situation. */
+ /*if ((args->converter == NULL) || (args->targetLimit < args->target) || (mySourceLimit < args->source)){
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }*/
+
+ while(mySource< mySourceLimit){
+
+ if(myTarget < args->targetLimit){
+
+ mySourceChar= (unsigned char) *mySource++;
+
+ if(args->converter->mode == UCNV_TILDE) {
+ /* second byte after ~ */
+ args->converter->mode=0;
+ switch(mySourceChar) {
+ case 0x0A:
+ /* no output for ~\n (line-continuation marker) */
+ continue;
+ case UCNV_TILDE:
+ if(args->offsets) {
+ args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 2);
+ }
+ *(myTarget++)=(UChar)mySourceChar;
+ myData->isEmptySegment = FALSE;
+ continue;
+ case UCNV_OPEN_BRACE:
+ case UCNV_CLOSE_BRACE:
+ myData->isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE);
+ if (myData->isEmptySegment) {
+ myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */
+ *err = U_ILLEGAL_ESCAPE_SEQUENCE;
+ args->converter->toUCallbackReason = UCNV_IRREGULAR;
+ args->converter->toUBytes[0] = UCNV_TILDE;
+ args->converter->toUBytes[1] = static_cast<uint8_t>(mySourceChar);
+ args->converter->toULength = 2;
+ args->target = myTarget;
+ args->source = mySource;
+ return;
+ }
+ myData->isEmptySegment = TRUE;
+ continue;
+ default:
+ /* if the first byte is equal to TILDE and the trail byte
+ * is not a valid byte then it is an error condition
+ */
+ /*
+ * Ticket 5691: consistent illegal sequences:
+ * - We include at least the first byte in the illegal sequence.
+ * - If any of the non-initial bytes could be the start of a character,
+ * we stop the illegal sequence before the first one of those.
+ */
+ myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */
+ *err = U_ILLEGAL_ESCAPE_SEQUENCE;
+ args->converter->toUBytes[0] = UCNV_TILDE;
+ if( myData->isStateDBCS ?
+ (0x21 <= mySourceChar && mySourceChar <= 0x7e) :
+ mySourceChar <= 0x7f
+ ) {
+ /* The current byte could be the start of a character: Back it out. */
+ args->converter->toULength = 1;
+ --mySource;
+ } else {
+ /* Include the current byte in the illegal sequence. */
+ args->converter->toUBytes[1] = static_cast<uint8_t>(mySourceChar);
+ args->converter->toULength = 2;
+ }
+ args->target = myTarget;
+ args->source = mySource;
+ return;
+ }
+ } else if(myData->isStateDBCS) {
+ if(args->converter->toUnicodeStatus == 0x00){
+ /* lead byte */
+ if(mySourceChar == UCNV_TILDE) {
+ args->converter->mode = UCNV_TILDE;
+ } else {
+ /* add another bit to distinguish a 0 byte from not having seen a lead byte */
+ args->converter->toUnicodeStatus = (uint32_t) (mySourceChar | 0x100);
+ myData->isEmptySegment = FALSE; /* the segment has something, either valid or will produce a different error, so reset this */
+ }
+ continue;
+ }
+ else{
+ /* trail byte */
+ int leadIsOk, trailIsOk;
+ uint32_t leadByte = args->converter->toUnicodeStatus & 0xff;
+ targetUniChar = 0xffff;
+ /*
+ * Ticket 5691: consistent illegal sequences:
+ * - We include at least the first byte in the illegal sequence.
+ * - If any of the non-initial bytes could be the start of a character,
+ * we stop the illegal sequence before the first one of those.
+ *
+ * In HZ DBCS, if the second byte is in the 21..7e range,
+ * we report only the first byte as the illegal sequence.
+ * Otherwise we convert or report the pair of bytes.
+ */
+ leadIsOk = (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21);
+ trailIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
+ if (leadIsOk && trailIsOk) {
+ tempBuf[0] = (char) (leadByte+0x80) ;
+ tempBuf[1] = (char) (mySourceChar+0x80);
+ targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
+ tempBuf, 2, args->converter->useFallback);
+ mySourceChar= (leadByte << 8) | mySourceChar;
+ } else if (trailIsOk) {
+ /* report a single illegal byte and continue with the following DBCS starter byte */
+ --mySource;
+ mySourceChar = (int32_t)leadByte;
+ } else {
+ /* report a pair of illegal bytes if the second byte is not a DBCS starter */
+ /* add another bit so that the code below writes 2 bytes in case of error */
+ mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar;
+ }
+ args->converter->toUnicodeStatus =0x00;
+ }
+ }
+ else{
+ if(mySourceChar == UCNV_TILDE) {
+ args->converter->mode = UCNV_TILDE;
+ continue;
+ } else if(mySourceChar <= 0x7f) {
+ targetUniChar = (UChar)mySourceChar; /* ASCII */
+ myData->isEmptySegment = FALSE; /* the segment has something valid */
+ } else {
+ targetUniChar = 0xffff;
+ myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */
+ }
+ }
+ if(targetUniChar < 0xfffe){
+ if(args->offsets) {
+ args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 1-(myData->isStateDBCS));
+ }
+
+ *(myTarget++)=(UChar)targetUniChar;
+ }
+ else /* targetUniChar>=0xfffe */ {
+ if(targetUniChar == 0xfffe){
+ *err = U_INVALID_CHAR_FOUND;
+ }
+ else{
+ *err = U_ILLEGAL_CHAR_FOUND;
+ }
+ if(mySourceChar > 0xff){
+ args->converter->toUBytes[0] = (uint8_t)(mySourceChar >> 8);
+ args->converter->toUBytes[1] = (uint8_t)mySourceChar;
+ args->converter->toULength=2;
+ }
+ else{
+ args->converter->toUBytes[0] = (uint8_t)mySourceChar;
+ args->converter->toULength=1;
+ }
+ break;
+ }
+ }
+ else{
+ *err =U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+
+ args->target = myTarget;
+ args->source = mySource;
+}
+
+
+static void U_CALLCONV
+UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
+ UErrorCode * err){
+ const UChar *mySource = args->source;
+ char *myTarget = args->target;
+ int32_t* offsets = args->offsets;
+ int32_t mySourceIndex = 0;
+ int32_t myTargetIndex = 0;
+ int32_t targetLength = (int32_t)(args->targetLimit - myTarget);
+ int32_t mySourceLength = (int32_t)(args->sourceLimit - args->source);
+ uint32_t targetUniChar = 0x0000;
+ UChar32 mySourceChar = 0x0000;
+ UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraInfo;
+ UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS;
+ UBool oldIsTargetUCharDBCS;
+ int len =0;
+ const char* escSeq=NULL;
+
+ /* Calling code already handles this situation. */
+ /*if ((args->converter == NULL) || (args->targetLimit < myTarget) || (args->sourceLimit < args->source)){
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }*/
+ if(args->converter->fromUChar32!=0 && myTargetIndex < targetLength) {
+ goto getTrail;
+ }
+ /*writing the char to the output stream */
+ while (mySourceIndex < mySourceLength){
+ targetUniChar = missingCharMarker;
+ if (myTargetIndex < targetLength){
+
+ mySourceChar = (UChar) mySource[mySourceIndex++];
+
+
+ oldIsTargetUCharDBCS = isTargetUCharDBCS;
+ if(mySourceChar ==UCNV_TILDE){
+ /*concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex);*/
+ len = ESC_LEN;
+ escSeq = TILDE_ESCAPE;
+ CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
+ continue;
+ } else if(mySourceChar <= 0x7f) {
+ targetUniChar = mySourceChar;
+ } else {
+ int32_t length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->sharedData,
+ mySourceChar,&targetUniChar,args->converter->useFallback);
+ /* we can only use lead bytes 21..7D and trail bytes 21..7E */
+ if( length == 2 &&
+ (uint16_t)(targetUniChar - 0xa1a1) <= (0xfdfe - 0xa1a1) &&
+ (uint8_t)(targetUniChar - 0xa1) <= (0xfe - 0xa1)
+ ) {
+ targetUniChar -= 0x8080;
+ } else {
+ targetUniChar = missingCharMarker;
+ }
+ }
+ if (targetUniChar != missingCharMarker){
+ myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF);
+ if(oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterData->isEscapeAppended ){
+ /*Shifting from a double byte to single byte mode*/
+ if(!isTargetUCharDBCS){
+ len =ESC_LEN;
+ escSeq = SB_ESCAPE;
+ CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
+ myConverterData->isEscapeAppended = TRUE;
+ }
+ else{ /* Shifting from a single byte to double byte mode*/
+ len =ESC_LEN;
+ escSeq = DB_ESCAPE;
+ CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
+ myConverterData->isEscapeAppended = TRUE;
+
+ }
+ }
+
+ if(isTargetUCharDBCS){
+ if( myTargetIndex <targetLength){
+ myTarget[myTargetIndex++] =(char) (targetUniChar >> 8);
+ if(offsets){
+ *(offsets++) = mySourceIndex-1;
+ }
+ if(myTargetIndex < targetLength){
+ myTarget[myTargetIndex++] =(char) targetUniChar;
+ if(offsets){
+ *(offsets++) = mySourceIndex-1;
+ }
+ }else{
+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+ }else{
+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) (targetUniChar >> 8);
+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+
+ }else{
+ if( myTargetIndex <targetLength){
+ myTarget[myTargetIndex++] = (char) (targetUniChar );
+ if(offsets){
+ *(offsets++) = mySourceIndex-1;
+ }
+
+ }else{
+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+ }
+
+ }
+ else{
+ /* oops.. the code point is unassigned */
+ /*Handle surrogates */
+ /*check if the char is a First surrogate*/
+ if(U16_IS_SURROGATE(mySourceChar)) {
+ if(U16_IS_SURROGATE_LEAD(mySourceChar)) {
+ args->converter->fromUChar32=mySourceChar;
+getTrail:
+ /*look ahead to find the trail surrogate*/
+ if(mySourceIndex < mySourceLength) {
+ /* test the following code unit */
+ UChar trail=(UChar) args->source[mySourceIndex];
+ if(U16_IS_TRAIL(trail)) {
+ ++mySourceIndex;
+ mySourceChar=U16_GET_SUPPLEMENTARY(args->converter->fromUChar32, trail);
+ args->converter->fromUChar32=0x00;
+ /* there are no surrogates in GB2312*/
+ *err = U_INVALID_CHAR_FOUND;
+ /* exit this condition tree */
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ *err=U_ILLEGAL_CHAR_FOUND;
+ }
+ } else {
+ /* no more input */
+ *err = U_ZERO_ERROR;
+ }
+ } else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ *err=U_ILLEGAL_CHAR_FOUND;
+ }
+ } else {
+ /* callback(unassigned) for a BMP code point */
+ *err = U_INVALID_CHAR_FOUND;
+ }
+
+ args->converter->fromUChar32=mySourceChar;
+ break;
+ }
+ }
+ else{
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ targetUniChar=missingCharMarker;
+ }
+
+ args->target += myTargetIndex;
+ args->source += mySourceIndex;
+ myConverterData->isTargetUCharDBCS = isTargetUCharDBCS;
+}
+
+static void U_CALLCONV
+_HZ_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
+ UConverter *cnv = args->converter;
+ UConverterDataHZ *convData=(UConverterDataHZ *) cnv->extraInfo;
+ char *p;
+ char buffer[4];
+ p = buffer;
+
+ if( convData->isTargetUCharDBCS){
+ *p++= UCNV_TILDE;
+ *p++= UCNV_CLOSE_BRACE;
+ convData->isTargetUCharDBCS=FALSE;
+ }
+ *p++= (char)cnv->subChars[0];
+
+ ucnv_cbFromUWriteBytes(args,
+ buffer, (int32_t)(p - buffer),
+ offsetIndex, err);
+}
+
+/*
+ * Structure for cloning an HZ converter into a single memory block.
+ */
+struct cloneHZStruct
+{
+ UConverter cnv;
+ UConverter subCnv;
+ UConverterDataHZ mydata;
+};
+
+
+static UConverter * U_CALLCONV
+_HZ_SafeClone(const UConverter *cnv,
+ void *stackBuffer,
+ int32_t *pBufferSize,
+ UErrorCode *status)
+{
+ struct cloneHZStruct * localClone;
+ int32_t size, bufferSizeNeeded = sizeof(struct cloneHZStruct);
+
+ if (U_FAILURE(*status)){
+ return nullptr;
+ }
+
+ if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
+ *pBufferSize = bufferSizeNeeded;
+ return nullptr;
+ }
+
+ localClone = (struct cloneHZStruct *)stackBuffer;
+ /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
+
+ uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataHZ));
+ localClone->cnv.extraInfo = &localClone->mydata;
+ localClone->cnv.isExtraLocal = TRUE;
+
+ /* deep-clone the sub-converter */
+ size = (int32_t)sizeof(UConverter);
+ ((UConverterDataHZ*)localClone->cnv.extraInfo)->gbConverter =
+ ucnv_safeClone(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, &localClone->subCnv, &size, status);
+
+ return &localClone->cnv;
+}
+
+static void U_CALLCONV
+_HZ_GetUnicodeSet(const UConverter *cnv,
+ const USetAdder *sa,
+ UConverterUnicodeSet which,
+ UErrorCode *pErrorCode) {
+ /* HZ converts all of ASCII */
+ sa->addRange(sa->set, 0, 0x7f);
+
+ /* add all of the code points that the sub-converter handles */
+ ucnv_MBCSGetFilteredUnicodeSetForUnicode(
+ ((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData,
+ sa, which, UCNV_SET_FILTER_HZ,
+ pErrorCode);
+}
+U_CDECL_END
+static const UConverterImpl _HZImpl={
+
+ UCNV_HZ,
+
+ NULL,
+ NULL,
+
+ _HZOpen,
+ _HZClose,
+ _HZReset,
+
+ UConverter_toUnicode_HZ_OFFSETS_LOGIC,
+ UConverter_toUnicode_HZ_OFFSETS_LOGIC,
+ UConverter_fromUnicode_HZ_OFFSETS_LOGIC,
+ UConverter_fromUnicode_HZ_OFFSETS_LOGIC,
+ NULL,
+
+ NULL,
+ NULL,
+ _HZ_WriteSub,
+ _HZ_SafeClone,
+ _HZ_GetUnicodeSet,
+ NULL,
+ NULL
+};
+
+static const UConverterStaticData _HZStaticData={
+ sizeof(UConverterStaticData),
+ "HZ",
+ 0,
+ UCNV_IBM,
+ UCNV_HZ,
+ 1,
+ 4,
+ { 0x1a, 0, 0, 0 },
+ 1,
+ FALSE,
+ FALSE,
+ 0,
+ 0,
+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */
+
+};
+
+const UConverterSharedData _HZData=
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_HZStaticData, &_HZImpl);
+
+#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION */
diff --git a/thirdparty/icu4c/common/ucnvisci.cpp b/thirdparty/icu4c/common/ucnvisci.cpp
new file mode 100644
index 0000000000..44a7c05a3c
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnvisci.cpp
@@ -0,0 +1,1635 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2000-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* file name: ucnvisci.c
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2001JUN26
+* created by: Ram Viswanadha
+*
+* Date Name Description
+* 24/7/2001 Ram Added support for EXT character handling
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
+
+#include "unicode/ucnv.h"
+#include "unicode/ucnv_cb.h"
+#include "unicode/utf16.h"
+#include "cmemory.h"
+#include "ucnv_bld.h"
+#include "ucnv_cnv.h"
+#include "cstring.h"
+#include "uassert.h"
+
+#define UCNV_OPTIONS_VERSION_MASK 0xf
+#define NUKTA 0x093c
+#define HALANT 0x094d
+#define ZWNJ 0x200c /* Zero Width Non Joiner */
+#define ZWJ 0x200d /* Zero width Joiner */
+#define INVALID_CHAR 0xffff
+#define ATR 0xEF /* Attribute code */
+#define EXT 0xF0 /* Extension code */
+#define DANDA 0x0964
+#define DOUBLE_DANDA 0x0965
+#define ISCII_NUKTA 0xE9
+#define ISCII_HALANT 0xE8
+#define ISCII_DANDA 0xEA
+#define ISCII_INV 0xD9
+#define ISCII_VOWEL_SIGN_E 0xE0
+#define INDIC_BLOCK_BEGIN 0x0900
+#define INDIC_BLOCK_END 0x0D7F
+#define INDIC_RANGE (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN)
+#define VOCALLIC_RR 0x0931
+#define LF 0x0A
+#define ASCII_END 0xA0
+#define NO_CHAR_MARKER 0xFFFE
+#define TELUGU_DELTA DELTA * TELUGU
+#define DEV_ABBR_SIGN 0x0970
+#define DEV_ANUDATTA 0x0952
+#define EXT_RANGE_BEGIN 0xA1
+#define EXT_RANGE_END 0xEE
+
+#define PNJ_DELTA 0x0100
+#define PNJ_BINDI 0x0A02
+#define PNJ_TIPPI 0x0A70
+#define PNJ_SIGN_VIRAMA 0x0A4D
+#define PNJ_ADHAK 0x0A71
+#define PNJ_HA 0x0A39
+#define PNJ_RRA 0x0A5C
+
+typedef enum {
+ DEVANAGARI =0,
+ BENGALI,
+ GURMUKHI,
+ GUJARATI,
+ ORIYA,
+ TAMIL,
+ TELUGU,
+ KANNADA,
+ MALAYALAM,
+ DELTA=0x80
+}UniLang;
+
+/**
+ * Enumeration for switching code pages if <ATR>+<one of below values>
+ * is encountered
+ */
+typedef enum {
+ DEF = 0x40,
+ RMN = 0x41,
+ DEV = 0x42,
+ BNG = 0x43,
+ TML = 0x44,
+ TLG = 0x45,
+ ASM = 0x46,
+ ORI = 0x47,
+ KND = 0x48,
+ MLM = 0x49,
+ GJR = 0x4A,
+ PNJ = 0x4B,
+ ARB = 0x71,
+ PES = 0x72,
+ URD = 0x73,
+ SND = 0x74,
+ KSM = 0x75,
+ PST = 0x76
+}ISCIILang;
+
+typedef enum {
+ DEV_MASK =0x80,
+ PNJ_MASK =0x40,
+ GJR_MASK =0x20,
+ ORI_MASK =0x10,
+ BNG_MASK =0x08,
+ KND_MASK =0x04,
+ MLM_MASK =0x02,
+ TML_MASK =0x01,
+ ZERO =0x00
+}MaskEnum;
+
+#define ISCII_CNV_PREFIX "ISCII,version="
+
+typedef struct {
+ UChar contextCharToUnicode; /* previous Unicode codepoint for contextual analysis */
+ UChar contextCharFromUnicode; /* previous Unicode codepoint for contextual analysis */
+ uint16_t defDeltaToUnicode; /* delta for switching to default state when DEF is encountered */
+ uint16_t currentDeltaFromUnicode; /* current delta in Indic block */
+ uint16_t currentDeltaToUnicode; /* current delta in Indic block */
+ MaskEnum currentMaskFromUnicode; /* mask for current state in toUnicode */
+ MaskEnum currentMaskToUnicode; /* mask for current state in toUnicode */
+ MaskEnum defMaskToUnicode; /* mask for default state in toUnicode */
+ UBool isFirstBuffer; /* boolean for fromUnicode to see if we need to announce the first script */
+ UBool resetToDefaultToUnicode; /* boolean for reseting to default delta and mask when a newline is encountered*/
+ char name[sizeof(ISCII_CNV_PREFIX) + 1];
+ UChar32 prevToUnicodeStatus; /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */
+} UConverterDataISCII;
+
+typedef struct LookupDataStruct {
+ UniLang uniLang;
+ MaskEnum maskEnum;
+ ISCIILang isciiLang;
+} LookupDataStruct;
+
+static const LookupDataStruct lookupInitialData[]={
+ { DEVANAGARI, DEV_MASK, DEV },
+ { BENGALI, BNG_MASK, BNG },
+ { GURMUKHI, PNJ_MASK, PNJ },
+ { GUJARATI, GJR_MASK, GJR },
+ { ORIYA, ORI_MASK, ORI },
+ { TAMIL, TML_MASK, TML },
+ { TELUGU, KND_MASK, TLG },
+ { KANNADA, KND_MASK, KND },
+ { MALAYALAM, MLM_MASK, MLM }
+};
+
+/*
+ * For special handling of certain Gurmukhi characters.
+ * Bit 0 (value 1): PNJ consonant
+ * Bit 1 (value 2): PNJ Bindi Tippi
+ */
+static const uint8_t pnjMap[80] = {
+ /* 0A00..0A0F */
+ 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0A10..0A1F */
+ 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ /* 0A20..0A2F */
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3,
+ /* 0A30..0A3F */
+ 3, 0, 0, 0, 0, 3, 3, 0, 3, 3, 0, 0, 0, 0, 0, 2,
+ /* 0A40..0A4F */
+ 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+static UBool
+isPNJConsonant(UChar32 c) {
+ if (c < 0xa00 || 0xa50 <= c) {
+ return FALSE;
+ } else {
+ return (UBool)(pnjMap[c - 0xa00] & 1);
+ }
+}
+
+static UBool
+isPNJBindiTippi(UChar32 c) {
+ if (c < 0xa00 || 0xa50 <= c) {
+ return FALSE;
+ } else {
+ return (UBool)(pnjMap[c - 0xa00] >> 1);
+ }
+}
+U_CDECL_BEGIN
+static void U_CALLCONV
+_ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) {
+ if(pArgs->onlyTestIsLoadable) {
+ return;
+ }
+
+ cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII));
+
+ if (cnv->extraInfo != NULL) {
+ int32_t len=0;
+ UConverterDataISCII *converterData=
+ (UConverterDataISCII *) cnv->extraInfo;
+ converterData->contextCharToUnicode=NO_CHAR_MARKER;
+ cnv->toUnicodeStatus = missingCharMarker;
+ converterData->contextCharFromUnicode=0x0000;
+ converterData->resetToDefaultToUnicode=FALSE;
+ /* check if the version requested is supported */
+ if ((pArgs->options & UCNV_OPTIONS_VERSION_MASK) < 9) {
+ /* initialize state variables */
+ converterData->currentDeltaFromUnicode
+ = converterData->currentDeltaToUnicode
+ = converterData->defDeltaToUnicode = (uint16_t)(lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].uniLang * DELTA);
+
+ converterData->currentMaskFromUnicode
+ = converterData->currentMaskToUnicode
+ = converterData->defMaskToUnicode = lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].maskEnum;
+
+ converterData->isFirstBuffer=TRUE;
+ (void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX);
+ len = (int32_t)uprv_strlen(converterData->name);
+ converterData->name[len]= (char)((pArgs->options & UCNV_OPTIONS_VERSION_MASK) + '0');
+ converterData->name[len+1]=0;
+
+ converterData->prevToUnicodeStatus = 0x0000;
+ } else {
+ uprv_free(cnv->extraInfo);
+ cnv->extraInfo = NULL;
+ *errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+
+ } else {
+ *errorCode =U_MEMORY_ALLOCATION_ERROR;
+ }
+}
+
+static void U_CALLCONV
+_ISCIIClose(UConverter *cnv) {
+ if (cnv->extraInfo!=NULL) {
+ if (!cnv->isExtraLocal) {
+ uprv_free(cnv->extraInfo);
+ }
+ cnv->extraInfo=NULL;
+ }
+}
+
+static const char* U_CALLCONV
+_ISCIIgetName(const UConverter* cnv) {
+ if (cnv->extraInfo) {
+ UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo;
+ return myData->name;
+ }
+ return NULL;
+}
+
+static void U_CALLCONV
+_ISCIIReset(UConverter *cnv, UConverterResetChoice choice) {
+ UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo);
+ if (choice<=UCNV_RESET_TO_UNICODE) {
+ cnv->toUnicodeStatus = missingCharMarker;
+ cnv->mode=0;
+ data->currentDeltaToUnicode=data->defDeltaToUnicode;
+ data->currentMaskToUnicode = data->defMaskToUnicode;
+ data->contextCharToUnicode=NO_CHAR_MARKER;
+ data->prevToUnicodeStatus = 0x0000;
+ }
+ if (choice!=UCNV_RESET_TO_UNICODE) {
+ cnv->fromUChar32=0x0000;
+ data->contextCharFromUnicode=0x00;
+ data->currentMaskFromUnicode=data->defMaskToUnicode;
+ data->currentDeltaFromUnicode=data->defDeltaToUnicode;
+ data->isFirstBuffer=TRUE;
+ data->resetToDefaultToUnicode=FALSE;
+ }
+}
+
+/**
+ * The values in validity table are indexed by the lower bits of Unicode
+ * range 0x0900 - 0x09ff. The values have a structure like:
+ * ---------------------------------------------------------------
+ * | DEV | PNJ | GJR | ORI | BNG | TLG | MLM | TML |
+ * | | | | | ASM | KND | | |
+ * ---------------------------------------------------------------
+ * If a code point is valid in a particular script
+ * then that bit is turned on
+ *
+ * Unicode does not distinguish between Bengali and Assamese so we use 1 bit for
+ * to represent these languages
+ *
+ * Telugu and Kannada have same codepoints except for Vocallic_RR which we special case
+ * and combine and use 1 bit to represent these languages.
+ *
+ * TODO: It is probably easier to understand and maintain to change this
+ * to use uint16_t and give each of the 9 Unicode/script blocks its own bit.
+ */
+
+static const uint8_t validityTable[128] = {
+/* This state table is tool generated please do not edit unless you know exactly what you are doing */
+/* Note: This table was edited to mirror the Windows XP implementation */
+/*ISCII:Valid:Unicode */
+/*0xa0 : 0x00: 0x900 */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
+/*0xa1 : 0xb8: 0x901 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,
+/*0xa2 : 0xfe: 0x902 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xa3 : 0xbf: 0x903 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0x00 : 0x00: 0x904 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
+/*0xa4 : 0xff: 0x905 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xa5 : 0xff: 0x906 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xa6 : 0xff: 0x907 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xa7 : 0xff: 0x908 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xa8 : 0xff: 0x909 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xa9 : 0xff: 0x90a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xaa : 0xfe: 0x90b */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
+/*0x00 : 0x00: 0x90c */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
+/*0xae : 0x80: 0x90d */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,
+/*0xab : 0x87: 0x90e */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xac : 0xff: 0x90f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xad : 0xff: 0x910 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xb2 : 0x80: 0x911 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,
+/*0xaf : 0x87: 0x912 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xb0 : 0xff: 0x913 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xb1 : 0xff: 0x914 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xb3 : 0xff: 0x915 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xb4 : 0xfe: 0x916 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
+/*0xb5 : 0xfe: 0x917 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
+/*0xb6 : 0xfe: 0x918 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
+/*0xb7 : 0xff: 0x919 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xb8 : 0xff: 0x91a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xb9 : 0xfe: 0x91b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
+/*0xba : 0xff: 0x91c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xbb : 0xfe: 0x91d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
+/*0xbc : 0xff: 0x91e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xbd : 0xff: 0x91f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xbe : 0xfe: 0x920 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
+/*0xbf : 0xfe: 0x921 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
+/*0xc0 : 0xfe: 0x922 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
+/*0xc1 : 0xff: 0x923 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xc2 : 0xff: 0x924 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xc3 : 0xfe: 0x925 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
+/*0xc4 : 0xfe: 0x926 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
+/*0xc5 : 0xfe: 0x927 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
+/*0xc6 : 0xff: 0x928 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xc7 : 0x81: 0x929 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + TML_MASK ,
+/*0xc8 : 0xff: 0x92a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xc9 : 0xfe: 0x92b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
+/*0xca : 0xfe: 0x92c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
+/*0xcb : 0xfe: 0x92d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
+/*0xcc : 0xfe: 0x92e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xcd : 0xff: 0x92f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xcf : 0xff: 0x930 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xd0 : 0x87: 0x931 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK ,
+/*0xd1 : 0xff: 0x932 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xd2 : 0xb7: 0x933 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xd3 : 0x83: 0x934 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK ,
+/*0xd4 : 0xff: 0x935 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xd5 : 0xfe: 0x936 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
+/*0xd6 : 0xbf: 0x937 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xd7 : 0xff: 0x938 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xd8 : 0xff: 0x939 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0x00 : 0x00: 0x93A */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
+/*0x00 : 0x00: 0x93B */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
+/*0xe9 : 0xda: 0x93c */ DEV_MASK + PNJ_MASK + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,
+/*0x00 : 0x00: 0x93d */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
+/*0xda : 0xff: 0x93e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xdb : 0xff: 0x93f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xdc : 0xff: 0x940 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xdd : 0xff: 0x941 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xde : 0xff: 0x942 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xdf : 0xbe: 0x943 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
+/*0x00 : 0x00: 0x944 */ DEV_MASK + ZERO + GJR_MASK + ZERO + BNG_MASK + KND_MASK + ZERO + ZERO ,
+/*0xe3 : 0x80: 0x945 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,
+/*0xe0 : 0x87: 0x946 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xe1 : 0xff: 0x947 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xe2 : 0xff: 0x948 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xe7 : 0x80: 0x949 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,
+/*0xe4 : 0x87: 0x94a */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xe5 : 0xff: 0x94b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xe6 : 0xff: 0x94c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xe8 : 0xff: 0x94d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xec : 0x00: 0x94e */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
+/*0xed : 0x00: 0x94f */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
+/*0x00 : 0x00: 0x950 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,
+/*0x00 : 0x00: 0x951 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
+/*0x00 : 0x00: 0x952 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
+/*0x00 : 0x00: 0x953 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
+/*0x00 : 0x00: 0x954 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
+/*0x00 : 0x00: 0x955 */ ZERO + ZERO + ZERO + ZERO + ZERO + KND_MASK + ZERO + ZERO ,
+/*0x00 : 0x00: 0x956 */ ZERO + ZERO + ZERO + ORI_MASK + ZERO + KND_MASK + ZERO + ZERO ,
+/*0x00 : 0x00: 0x957 */ ZERO + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + MLM_MASK + ZERO ,
+/*0x00 : 0x00: 0x958 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
+/*0x00 : 0x00: 0x959 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
+/*0x00 : 0x00: 0x95a */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
+/*0x00 : 0x00: 0x95b */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
+/*0x00 : 0x00: 0x95c */ DEV_MASK + PNJ_MASK + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO ,
+/*0x00 : 0x00: 0x95d */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,
+/*0x00 : 0x00: 0x95e */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
+/*0xce : 0x98: 0x95f */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,
+/*0x00 : 0x00: 0x960 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
+/*0x00 : 0x00: 0x961 */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
+/*0x00 : 0x00: 0x962 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO ,
+/*0x00 : 0x00: 0x963 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO ,
+/*0xea : 0xf8: 0x964 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
+/*0xeaea : 0x00: 0x965*/ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
+/*0xf1 : 0xff: 0x966 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xf2 : 0xff: 0x967 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xf3 : 0xff: 0x968 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xf4 : 0xff: 0x969 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xf5 : 0xff: 0x96a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xf6 : 0xff: 0x96b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xf7 : 0xff: 0x96c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xf8 : 0xff: 0x96d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xf9 : 0xff: 0x96e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xfa : 0xff: 0x96f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0x00 : 0x80: 0x970 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
+/*
+ * The length of the array is 128 to provide values for 0x900..0x97f.
+ * The last 15 entries for 0x971..0x97f of the validity table are all zero
+ * because no Indic script uses such Unicode code points.
+ */
+/*0x00 : 0x00: 0x9yz */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO
+};
+
+static const uint16_t fromUnicodeTable[128]={
+ 0x00a0 ,/* 0x0900 */
+ 0x00a1 ,/* 0x0901 */
+ 0x00a2 ,/* 0x0902 */
+ 0x00a3 ,/* 0x0903 */
+ 0xa4e0 ,/* 0x0904 */
+ 0x00a4 ,/* 0x0905 */
+ 0x00a5 ,/* 0x0906 */
+ 0x00a6 ,/* 0x0907 */
+ 0x00a7 ,/* 0x0908 */
+ 0x00a8 ,/* 0x0909 */
+ 0x00a9 ,/* 0x090a */
+ 0x00aa ,/* 0x090b */
+ 0xA6E9 ,/* 0x090c */
+ 0x00ae ,/* 0x090d */
+ 0x00ab ,/* 0x090e */
+ 0x00ac ,/* 0x090f */
+ 0x00ad ,/* 0x0910 */
+ 0x00b2 ,/* 0x0911 */
+ 0x00af ,/* 0x0912 */
+ 0x00b0 ,/* 0x0913 */
+ 0x00b1 ,/* 0x0914 */
+ 0x00b3 ,/* 0x0915 */
+ 0x00b4 ,/* 0x0916 */
+ 0x00b5 ,/* 0x0917 */
+ 0x00b6 ,/* 0x0918 */
+ 0x00b7 ,/* 0x0919 */
+ 0x00b8 ,/* 0x091a */
+ 0x00b9 ,/* 0x091b */
+ 0x00ba ,/* 0x091c */
+ 0x00bb ,/* 0x091d */
+ 0x00bc ,/* 0x091e */
+ 0x00bd ,/* 0x091f */
+ 0x00be ,/* 0x0920 */
+ 0x00bf ,/* 0x0921 */
+ 0x00c0 ,/* 0x0922 */
+ 0x00c1 ,/* 0x0923 */
+ 0x00c2 ,/* 0x0924 */
+ 0x00c3 ,/* 0x0925 */
+ 0x00c4 ,/* 0x0926 */
+ 0x00c5 ,/* 0x0927 */
+ 0x00c6 ,/* 0x0928 */
+ 0x00c7 ,/* 0x0929 */
+ 0x00c8 ,/* 0x092a */
+ 0x00c9 ,/* 0x092b */
+ 0x00ca ,/* 0x092c */
+ 0x00cb ,/* 0x092d */
+ 0x00cc ,/* 0x092e */
+ 0x00cd ,/* 0x092f */
+ 0x00cf ,/* 0x0930 */
+ 0x00d0 ,/* 0x0931 */
+ 0x00d1 ,/* 0x0932 */
+ 0x00d2 ,/* 0x0933 */
+ 0x00d3 ,/* 0x0934 */
+ 0x00d4 ,/* 0x0935 */
+ 0x00d5 ,/* 0x0936 */
+ 0x00d6 ,/* 0x0937 */
+ 0x00d7 ,/* 0x0938 */
+ 0x00d8 ,/* 0x0939 */
+ 0xFFFF ,/* 0x093A */
+ 0xFFFF ,/* 0x093B */
+ 0x00e9 ,/* 0x093c */
+ 0xEAE9 ,/* 0x093d */
+ 0x00da ,/* 0x093e */
+ 0x00db ,/* 0x093f */
+ 0x00dc ,/* 0x0940 */
+ 0x00dd ,/* 0x0941 */
+ 0x00de ,/* 0x0942 */
+ 0x00df ,/* 0x0943 */
+ 0xDFE9 ,/* 0x0944 */
+ 0x00e3 ,/* 0x0945 */
+ 0x00e0 ,/* 0x0946 */
+ 0x00e1 ,/* 0x0947 */
+ 0x00e2 ,/* 0x0948 */
+ 0x00e7 ,/* 0x0949 */
+ 0x00e4 ,/* 0x094a */
+ 0x00e5 ,/* 0x094b */
+ 0x00e6 ,/* 0x094c */
+ 0x00e8 ,/* 0x094d */
+ 0x00ec ,/* 0x094e */
+ 0x00ed ,/* 0x094f */
+ 0xA1E9 ,/* 0x0950 */ /* OM Symbol */
+ 0xFFFF ,/* 0x0951 */
+ 0xF0B8 ,/* 0x0952 */
+ 0xFFFF ,/* 0x0953 */
+ 0xFFFF ,/* 0x0954 */
+ 0xFFFF ,/* 0x0955 */
+ 0xFFFF ,/* 0x0956 */
+ 0xFFFF ,/* 0x0957 */
+ 0xb3e9 ,/* 0x0958 */
+ 0xb4e9 ,/* 0x0959 */
+ 0xb5e9 ,/* 0x095a */
+ 0xbae9 ,/* 0x095b */
+ 0xbfe9 ,/* 0x095c */
+ 0xC0E9 ,/* 0x095d */
+ 0xc9e9 ,/* 0x095e */
+ 0x00ce ,/* 0x095f */
+ 0xAAe9 ,/* 0x0960 */
+ 0xA7E9 ,/* 0x0961 */
+ 0xDBE9 ,/* 0x0962 */
+ 0xDCE9 ,/* 0x0963 */
+ 0x00ea ,/* 0x0964 */
+ 0xeaea ,/* 0x0965 */
+ 0x00f1 ,/* 0x0966 */
+ 0x00f2 ,/* 0x0967 */
+ 0x00f3 ,/* 0x0968 */
+ 0x00f4 ,/* 0x0969 */
+ 0x00f5 ,/* 0x096a */
+ 0x00f6 ,/* 0x096b */
+ 0x00f7 ,/* 0x096c */
+ 0x00f8 ,/* 0x096d */
+ 0x00f9 ,/* 0x096e */
+ 0x00fa ,/* 0x096f */
+ 0xF0BF ,/* 0x0970 */
+ 0xFFFF ,/* 0x0971 */
+ 0xFFFF ,/* 0x0972 */
+ 0xFFFF ,/* 0x0973 */
+ 0xFFFF ,/* 0x0974 */
+ 0xFFFF ,/* 0x0975 */
+ 0xFFFF ,/* 0x0976 */
+ 0xFFFF ,/* 0x0977 */
+ 0xFFFF ,/* 0x0978 */
+ 0xFFFF ,/* 0x0979 */
+ 0xFFFF ,/* 0x097a */
+ 0xFFFF ,/* 0x097b */
+ 0xFFFF ,/* 0x097c */
+ 0xFFFF ,/* 0x097d */
+ 0xFFFF ,/* 0x097e */
+ 0xFFFF ,/* 0x097f */
+};
+static const uint16_t toUnicodeTable[256]={
+ 0x0000,/* 0x00 */
+ 0x0001,/* 0x01 */
+ 0x0002,/* 0x02 */
+ 0x0003,/* 0x03 */
+ 0x0004,/* 0x04 */
+ 0x0005,/* 0x05 */
+ 0x0006,/* 0x06 */
+ 0x0007,/* 0x07 */
+ 0x0008,/* 0x08 */
+ 0x0009,/* 0x09 */
+ 0x000a,/* 0x0a */
+ 0x000b,/* 0x0b */
+ 0x000c,/* 0x0c */
+ 0x000d,/* 0x0d */
+ 0x000e,/* 0x0e */
+ 0x000f,/* 0x0f */
+ 0x0010,/* 0x10 */
+ 0x0011,/* 0x11 */
+ 0x0012,/* 0x12 */
+ 0x0013,/* 0x13 */
+ 0x0014,/* 0x14 */
+ 0x0015,/* 0x15 */
+ 0x0016,/* 0x16 */
+ 0x0017,/* 0x17 */
+ 0x0018,/* 0x18 */
+ 0x0019,/* 0x19 */
+ 0x001a,/* 0x1a */
+ 0x001b,/* 0x1b */
+ 0x001c,/* 0x1c */
+ 0x001d,/* 0x1d */
+ 0x001e,/* 0x1e */
+ 0x001f,/* 0x1f */
+ 0x0020,/* 0x20 */
+ 0x0021,/* 0x21 */
+ 0x0022,/* 0x22 */
+ 0x0023,/* 0x23 */
+ 0x0024,/* 0x24 */
+ 0x0025,/* 0x25 */
+ 0x0026,/* 0x26 */
+ 0x0027,/* 0x27 */
+ 0x0028,/* 0x28 */
+ 0x0029,/* 0x29 */
+ 0x002a,/* 0x2a */
+ 0x002b,/* 0x2b */
+ 0x002c,/* 0x2c */
+ 0x002d,/* 0x2d */
+ 0x002e,/* 0x2e */
+ 0x002f,/* 0x2f */
+ 0x0030,/* 0x30 */
+ 0x0031,/* 0x31 */
+ 0x0032,/* 0x32 */
+ 0x0033,/* 0x33 */
+ 0x0034,/* 0x34 */
+ 0x0035,/* 0x35 */
+ 0x0036,/* 0x36 */
+ 0x0037,/* 0x37 */
+ 0x0038,/* 0x38 */
+ 0x0039,/* 0x39 */
+ 0x003A,/* 0x3A */
+ 0x003B,/* 0x3B */
+ 0x003c,/* 0x3c */
+ 0x003d,/* 0x3d */
+ 0x003e,/* 0x3e */
+ 0x003f,/* 0x3f */
+ 0x0040,/* 0x40 */
+ 0x0041,/* 0x41 */
+ 0x0042,/* 0x42 */
+ 0x0043,/* 0x43 */
+ 0x0044,/* 0x44 */
+ 0x0045,/* 0x45 */
+ 0x0046,/* 0x46 */
+ 0x0047,/* 0x47 */
+ 0x0048,/* 0x48 */
+ 0x0049,/* 0x49 */
+ 0x004a,/* 0x4a */
+ 0x004b,/* 0x4b */
+ 0x004c,/* 0x4c */
+ 0x004d,/* 0x4d */
+ 0x004e,/* 0x4e */
+ 0x004f,/* 0x4f */
+ 0x0050,/* 0x50 */
+ 0x0051,/* 0x51 */
+ 0x0052,/* 0x52 */
+ 0x0053,/* 0x53 */
+ 0x0054,/* 0x54 */
+ 0x0055,/* 0x55 */
+ 0x0056,/* 0x56 */
+ 0x0057,/* 0x57 */
+ 0x0058,/* 0x58 */
+ 0x0059,/* 0x59 */
+ 0x005a,/* 0x5a */
+ 0x005b,/* 0x5b */
+ 0x005c,/* 0x5c */
+ 0x005d,/* 0x5d */
+ 0x005e,/* 0x5e */
+ 0x005f,/* 0x5f */
+ 0x0060,/* 0x60 */
+ 0x0061,/* 0x61 */
+ 0x0062,/* 0x62 */
+ 0x0063,/* 0x63 */
+ 0x0064,/* 0x64 */
+ 0x0065,/* 0x65 */
+ 0x0066,/* 0x66 */
+ 0x0067,/* 0x67 */
+ 0x0068,/* 0x68 */
+ 0x0069,/* 0x69 */
+ 0x006a,/* 0x6a */
+ 0x006b,/* 0x6b */
+ 0x006c,/* 0x6c */
+ 0x006d,/* 0x6d */
+ 0x006e,/* 0x6e */
+ 0x006f,/* 0x6f */
+ 0x0070,/* 0x70 */
+ 0x0071,/* 0x71 */
+ 0x0072,/* 0x72 */
+ 0x0073,/* 0x73 */
+ 0x0074,/* 0x74 */
+ 0x0075,/* 0x75 */
+ 0x0076,/* 0x76 */
+ 0x0077,/* 0x77 */
+ 0x0078,/* 0x78 */
+ 0x0079,/* 0x79 */
+ 0x007a,/* 0x7a */
+ 0x007b,/* 0x7b */
+ 0x007c,/* 0x7c */
+ 0x007d,/* 0x7d */
+ 0x007e,/* 0x7e */
+ 0x007f,/* 0x7f */
+ 0x0080,/* 0x80 */
+ 0x0081,/* 0x81 */
+ 0x0082,/* 0x82 */
+ 0x0083,/* 0x83 */
+ 0x0084,/* 0x84 */
+ 0x0085,/* 0x85 */
+ 0x0086,/* 0x86 */
+ 0x0087,/* 0x87 */
+ 0x0088,/* 0x88 */
+ 0x0089,/* 0x89 */
+ 0x008a,/* 0x8a */
+ 0x008b,/* 0x8b */
+ 0x008c,/* 0x8c */
+ 0x008d,/* 0x8d */
+ 0x008e,/* 0x8e */
+ 0x008f,/* 0x8f */
+ 0x0090,/* 0x90 */
+ 0x0091,/* 0x91 */
+ 0x0092,/* 0x92 */
+ 0x0093,/* 0x93 */
+ 0x0094,/* 0x94 */
+ 0x0095,/* 0x95 */
+ 0x0096,/* 0x96 */
+ 0x0097,/* 0x97 */
+ 0x0098,/* 0x98 */
+ 0x0099,/* 0x99 */
+ 0x009a,/* 0x9a */
+ 0x009b,/* 0x9b */
+ 0x009c,/* 0x9c */
+ 0x009d,/* 0x9d */
+ 0x009e,/* 0x9e */
+ 0x009f,/* 0x9f */
+ 0x00A0,/* 0xa0 */
+ 0x0901,/* 0xa1 */
+ 0x0902,/* 0xa2 */
+ 0x0903,/* 0xa3 */
+ 0x0905,/* 0xa4 */
+ 0x0906,/* 0xa5 */
+ 0x0907,/* 0xa6 */
+ 0x0908,/* 0xa7 */
+ 0x0909,/* 0xa8 */
+ 0x090a,/* 0xa9 */
+ 0x090b,/* 0xaa */
+ 0x090e,/* 0xab */
+ 0x090f,/* 0xac */
+ 0x0910,/* 0xad */
+ 0x090d,/* 0xae */
+ 0x0912,/* 0xaf */
+ 0x0913,/* 0xb0 */
+ 0x0914,/* 0xb1 */
+ 0x0911,/* 0xb2 */
+ 0x0915,/* 0xb3 */
+ 0x0916,/* 0xb4 */
+ 0x0917,/* 0xb5 */
+ 0x0918,/* 0xb6 */
+ 0x0919,/* 0xb7 */
+ 0x091a,/* 0xb8 */
+ 0x091b,/* 0xb9 */
+ 0x091c,/* 0xba */
+ 0x091d,/* 0xbb */
+ 0x091e,/* 0xbc */
+ 0x091f,/* 0xbd */
+ 0x0920,/* 0xbe */
+ 0x0921,/* 0xbf */
+ 0x0922,/* 0xc0 */
+ 0x0923,/* 0xc1 */
+ 0x0924,/* 0xc2 */
+ 0x0925,/* 0xc3 */
+ 0x0926,/* 0xc4 */
+ 0x0927,/* 0xc5 */
+ 0x0928,/* 0xc6 */
+ 0x0929,/* 0xc7 */
+ 0x092a,/* 0xc8 */
+ 0x092b,/* 0xc9 */
+ 0x092c,/* 0xca */
+ 0x092d,/* 0xcb */
+ 0x092e,/* 0xcc */
+ 0x092f,/* 0xcd */
+ 0x095f,/* 0xce */
+ 0x0930,/* 0xcf */
+ 0x0931,/* 0xd0 */
+ 0x0932,/* 0xd1 */
+ 0x0933,/* 0xd2 */
+ 0x0934,/* 0xd3 */
+ 0x0935,/* 0xd4 */
+ 0x0936,/* 0xd5 */
+ 0x0937,/* 0xd6 */
+ 0x0938,/* 0xd7 */
+ 0x0939,/* 0xd8 */
+ 0x200D,/* 0xd9 */
+ 0x093e,/* 0xda */
+ 0x093f,/* 0xdb */
+ 0x0940,/* 0xdc */
+ 0x0941,/* 0xdd */
+ 0x0942,/* 0xde */
+ 0x0943,/* 0xdf */
+ 0x0946,/* 0xe0 */
+ 0x0947,/* 0xe1 */
+ 0x0948,/* 0xe2 */
+ 0x0945,/* 0xe3 */
+ 0x094a,/* 0xe4 */
+ 0x094b,/* 0xe5 */
+ 0x094c,/* 0xe6 */
+ 0x0949,/* 0xe7 */
+ 0x094d,/* 0xe8 */
+ 0x093c,/* 0xe9 */
+ 0x0964,/* 0xea */
+ 0xFFFF,/* 0xeb */
+ 0xFFFF,/* 0xec */
+ 0xFFFF,/* 0xed */
+ 0xFFFF,/* 0xee */
+ 0xFFFF,/* 0xef */
+ 0xFFFF,/* 0xf0 */
+ 0x0966,/* 0xf1 */
+ 0x0967,/* 0xf2 */
+ 0x0968,/* 0xf3 */
+ 0x0969,/* 0xf4 */
+ 0x096a,/* 0xf5 */
+ 0x096b,/* 0xf6 */
+ 0x096c,/* 0xf7 */
+ 0x096d,/* 0xf8 */
+ 0x096e,/* 0xf9 */
+ 0x096f,/* 0xfa */
+ 0xFFFF,/* 0xfb */
+ 0xFFFF,/* 0xfc */
+ 0xFFFF,/* 0xfd */
+ 0xFFFF,/* 0xfe */
+ 0xFFFF /* 0xff */
+};
+
+static const uint16_t vowelSignESpecialCases[][2]={
+ { 2 /*length of array*/ , 0 },
+ { 0xA4 , 0x0904 },
+};
+
+static const uint16_t nuktaSpecialCases[][2]={
+ { 16 /*length of array*/ , 0 },
+ { 0xA6 , 0x090c },
+ { 0xEA , 0x093D },
+ { 0xDF , 0x0944 },
+ { 0xA1 , 0x0950 },
+ { 0xb3 , 0x0958 },
+ { 0xb4 , 0x0959 },
+ { 0xb5 , 0x095a },
+ { 0xba , 0x095b },
+ { 0xbf , 0x095c },
+ { 0xC0 , 0x095d },
+ { 0xc9 , 0x095e },
+ { 0xAA , 0x0960 },
+ { 0xA7 , 0x0961 },
+ { 0xDB , 0x0962 },
+ { 0xDC , 0x0963 },
+};
+
+
+#define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t offset = (int32_t)(source - args->source-1); \
+ /* write the targetUniChar to target */ \
+ if(target < targetLimit){ \
+ if(targetByteUnit <= 0xFF){ \
+ *(target)++ = (uint8_t)(targetByteUnit); \
+ if(offsets){ \
+ *(offsets++) = offset; \
+ } \
+ }else{ \
+ if (targetByteUnit > 0xFFFF) { \
+ *(target)++ = (uint8_t)(targetByteUnit>>16); \
+ if (offsets) { \
+ --offset; \
+ *(offsets++) = offset; \
+ } \
+ } \
+ if (!(target < targetLimit)) { \
+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \
+ (uint8_t)(targetByteUnit >> 8); \
+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \
+ (uint8_t)targetByteUnit; \
+ *err = U_BUFFER_OVERFLOW_ERROR; \
+ } else { \
+ *(target)++ = (uint8_t)(targetByteUnit>>8); \
+ if(offsets){ \
+ *(offsets++) = offset; \
+ } \
+ if(target < targetLimit){ \
+ *(target)++ = (uint8_t) targetByteUnit; \
+ if(offsets){ \
+ *(offsets++) = offset ; \
+ } \
+ }else{ \
+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =\
+ (uint8_t) (targetByteUnit); \
+ *err = U_BUFFER_OVERFLOW_ERROR; \
+ } \
+ } \
+ } \
+ }else{ \
+ if (targetByteUnit & 0xFF0000) { \
+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \
+ (uint8_t) (targetByteUnit >>16); \
+ } \
+ if(targetByteUnit & 0xFF00){ \
+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \
+ (uint8_t) (targetByteUnit >>8); \
+ } \
+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \
+ (uint8_t) (targetByteUnit); \
+ *err = U_BUFFER_OVERFLOW_ERROR; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/* Rules:
+ * Explicit Halant :
+ * <HALANT> + <ZWNJ>
+ * Soft Halant :
+ * <HALANT> + <ZWJ>
+ */
+static void U_CALLCONV
+UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(
+ UConverterFromUnicodeArgs * args, UErrorCode * err) {
+ const UChar *source = args->source;
+ const UChar *sourceLimit = args->sourceLimit;
+ unsigned char *target = (unsigned char *) args->target;
+ unsigned char *targetLimit = (unsigned char *) args->targetLimit;
+ int32_t* offsets = args->offsets;
+ uint32_t targetByteUnit = 0x0000;
+ UChar32 sourceChar = 0x0000;
+ UChar32 tempContextFromUnicode = 0x0000; /* For special handling of the Gurmukhi script. */
+ UConverterDataISCII *converterData;
+ uint16_t newDelta=0;
+ uint16_t range = 0;
+ UBool deltaChanged = FALSE;
+
+ if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)) {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ /* initialize data */
+ converterData=(UConverterDataISCII*)args->converter->extraInfo;
+ newDelta=converterData->currentDeltaFromUnicode;
+ range = (uint16_t)(newDelta/DELTA);
+
+ if ((sourceChar = args->converter->fromUChar32)!=0) {
+ goto getTrail;
+ }
+
+ /*writing the char to the output stream */
+ while (source < sourceLimit) {
+ /* Write the language code following LF only if LF is not the last character. */
+ if (args->converter->fromUnicodeStatus == LF) {
+ targetByteUnit = ATR<<8;
+ targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang;
+ args->converter->fromUnicodeStatus = 0x0000;
+ /* now append ATR and language code */
+ WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
+ if (U_FAILURE(*err)) {
+ break;
+ }
+ }
+
+ sourceChar = *source++;
+ tempContextFromUnicode = converterData->contextCharFromUnicode;
+
+ targetByteUnit = missingCharMarker;
+
+ /*check if input is in ASCII and C0 control codes range*/
+ if (sourceChar <= ASCII_END) {
+ args->converter->fromUnicodeStatus = sourceChar;
+ WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,sourceChar,err);
+ if (U_FAILURE(*err)) {
+ break;
+ }
+ continue;
+ }
+ switch (sourceChar) {
+ case ZWNJ:
+ /* contextChar has HALANT */
+ if (converterData->contextCharFromUnicode) {
+ converterData->contextCharFromUnicode = 0x00;
+ targetByteUnit = ISCII_HALANT;
+ } else {
+ /* consume ZWNJ and continue */
+ converterData->contextCharFromUnicode = 0x00;
+ continue;
+ }
+ break;
+ case ZWJ:
+ /* contextChar has HALANT */
+ if (converterData->contextCharFromUnicode) {
+ targetByteUnit = ISCII_NUKTA;
+ } else {
+ targetByteUnit =ISCII_INV;
+ }
+ converterData->contextCharFromUnicode = 0x00;
+ break;
+ default:
+ /* is the sourceChar in the INDIC_RANGE? */
+ if ((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE) {
+ /* Danda and Double Danda are valid in Northern scripts.. since Unicode
+ * does not include these codepoints in all Northern scrips we need to
+ * filter them out
+ */
+ if (sourceChar!= DANDA && sourceChar != DOUBLE_DANDA) {
+ /* find out to which block the souceChar belongs*/
+ range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA);
+ newDelta =(uint16_t)(range*DELTA);
+
+ /* Now are we in the same block as the previous? */
+ if (newDelta!= converterData->currentDeltaFromUnicode || converterData->isFirstBuffer) {
+ converterData->currentDeltaFromUnicode = newDelta;
+ converterData->currentMaskFromUnicode = lookupInitialData[range].maskEnum;
+ deltaChanged =TRUE;
+ converterData->isFirstBuffer=FALSE;
+ }
+
+ if (converterData->currentDeltaFromUnicode == PNJ_DELTA) {
+ if (sourceChar == PNJ_TIPPI) {
+ /* Make sure Tippi is converterd to Bindi. */
+ sourceChar = PNJ_BINDI;
+ } else if (sourceChar == PNJ_ADHAK) {
+ /* This is for consonant cluster handling. */
+ converterData->contextCharFromUnicode = PNJ_ADHAK;
+ }
+
+ }
+ /* Normalize all Indic codepoints to Devanagari and map them to ISCII */
+ /* now subtract the new delta from sourceChar*/
+ sourceChar -= converterData->currentDeltaFromUnicode;
+ }
+
+ /* get the target byte unit */
+ targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar];
+
+ /* is the code point valid in current script? */
+ if ((validityTable[(uint8_t)sourceChar] & converterData->currentMaskFromUnicode)==0) {
+ /* Vocallic RR is assigned in ISCII Telugu and Unicode */
+ if (converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) || sourceChar!=VOCALLIC_RR) {
+ targetByteUnit=missingCharMarker;
+ }
+ }
+
+ if (deltaChanged) {
+ /* we are in a script block which is different than
+ * previous sourceChar's script block write ATR and language codes
+ */
+ uint32_t temp=0;
+ temp =(uint16_t)(ATR<<8);
+ temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiLang);
+ /* reset */
+ deltaChanged=FALSE;
+ /* now append ATR and language code */
+ WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,temp,err);
+ if (U_FAILURE(*err)) {
+ break;
+ }
+ }
+
+ if (converterData->currentDeltaFromUnicode == PNJ_DELTA && (sourceChar + PNJ_DELTA) == PNJ_ADHAK) {
+ continue;
+ }
+ }
+ /* reset context char */
+ converterData->contextCharFromUnicode = 0x00;
+ break;
+ }
+ if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && isPNJConsonant((sourceChar + PNJ_DELTA))) {
+ /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */
+ /* reset context char */
+ converterData->contextCharFromUnicode = 0x0000;
+ targetByteUnit = targetByteUnit << 16 | ISCII_HALANT << 8 | targetByteUnit;
+ /* write targetByteUnit to target */
+ WRITE_TO_TARGET_FROM_U(args, offsets, source, target, targetLimit, targetByteUnit,err);
+ if (U_FAILURE(*err)) {
+ break;
+ }
+ } else if (targetByteUnit != missingCharMarker) {
+ if (targetByteUnit==ISCII_HALANT) {
+ converterData->contextCharFromUnicode = (UChar)targetByteUnit;
+ }
+ /* write targetByteUnit to target*/
+ WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
+ if (U_FAILURE(*err)) {
+ break;
+ }
+ } else {
+ /* oops.. the code point is unassigned */
+ /*check if the char is a First surrogate*/
+ if (U16_IS_SURROGATE(sourceChar)) {
+ if (U16_IS_SURROGATE_LEAD(sourceChar)) {
+getTrail:
+ /*look ahead to find the trail surrogate*/
+ if (source < sourceLimit) {
+ /* test the following code unit */
+ UChar trail= (*source);
+ if (U16_IS_TRAIL(trail)) {
+ source++;
+ sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
+ *err =U_INVALID_CHAR_FOUND;
+ /* convert this surrogate code point */
+ /* exit this condition tree */
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ *err=U_ILLEGAL_CHAR_FOUND;
+ }
+ } else {
+ /* no more input */
+ *err = U_ZERO_ERROR;
+ }
+ } else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ *err=U_ILLEGAL_CHAR_FOUND;
+ }
+ } else {
+ /* callback(unassigned) for a BMP code point */
+ *err = U_INVALID_CHAR_FOUND;
+ }
+
+ args->converter->fromUChar32=sourceChar;
+ break;
+ }
+ }/* end while(mySourceIndex<mySourceLength) */
+
+ /*save the state and return */
+ args->source = source;
+ args->target = (char*)target;
+}
+
+static const uint16_t lookupTable[][2]={
+ { ZERO, ZERO }, /*DEFALT*/
+ { ZERO, ZERO }, /*ROMAN*/
+ { DEVANAGARI, DEV_MASK },
+ { BENGALI, BNG_MASK },
+ { TAMIL, TML_MASK },
+ { TELUGU, KND_MASK },
+ { BENGALI, BNG_MASK },
+ { ORIYA, ORI_MASK },
+ { KANNADA, KND_MASK },
+ { MALAYALAM, MLM_MASK },
+ { GUJARATI, GJR_MASK },
+ { GURMUKHI, PNJ_MASK }
+};
+
+#define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err) UPRV_BLOCK_MACRO_BEGIN { \
+ /* add offset to current Indic Block */ \
+ if(targetUniChar>ASCII_END && \
+ targetUniChar != ZWJ && \
+ targetUniChar != ZWNJ && \
+ targetUniChar != DANDA && \
+ targetUniChar != DOUBLE_DANDA){ \
+ \
+ targetUniChar+=(uint16_t)(delta); \
+ } \
+ /* now write the targetUniChar */ \
+ if(target<args->targetLimit){ \
+ *(target)++ = (UChar)targetUniChar; \
+ if(offsets){ \
+ *(offsets)++ = (int32_t)(offset); \
+ } \
+ }else{ \
+ args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] = \
+ (UChar)targetUniChar; \
+ *err = U_BUFFER_OVERFLOW_ERROR; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+#define GET_MAPPING(sourceChar,targetUniChar,data) UPRV_BLOCK_MACRO_BEGIN { \
+ targetUniChar = toUnicodeTable[(sourceChar)] ; \
+ /* is the code point valid in current script? */ \
+ if(sourceChar> ASCII_END && \
+ (validityTable[(targetUniChar & 0x7F)] & data->currentMaskToUnicode)==0){ \
+ /* Vocallic RR is assigne in ISCII Telugu and Unicode */ \
+ if(data->currentDeltaToUnicode!=(TELUGU_DELTA) || \
+ targetUniChar!=VOCALLIC_RR){ \
+ targetUniChar=missingCharMarker; \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/***********
+ * Rules for ISCII to Unicode converter
+ * ISCII is stateful encoding. To convert ISCII bytes to Unicode,
+ * which has both precomposed and decomposed forms characters
+ * pre-context and post-context need to be considered.
+ *
+ * Post context
+ * i) ATR : Attribute code is used to declare the font and script switching.
+ * Currently we only switch scripts and font codes consumed without generating an error
+ * ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure,
+ * obsolete characters
+ * Pre context
+ * i) Halant: if preceeded by a halant then it is a explicit halant
+ * ii) Nukta :
+ * a) if preceeded by a halant then it is a soft halant
+ * b) if preceeded by specific consonants and the ligatures have pre-composed
+ * characters in Unicode then convert to pre-composed characters
+ * iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda
+ *
+ */
+
+static void U_CALLCONV
+UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCode* err) {
+ const char *source = ( char *) args->source;
+ UChar *target = args->target;
+ const char *sourceLimit = args->sourceLimit;
+ const UChar* targetLimit = args->targetLimit;
+ uint32_t targetUniChar = 0x0000;
+ uint8_t sourceChar = 0x0000;
+ UConverterDataISCII* data;
+ UChar32* toUnicodeStatus=NULL;
+ UChar32 tempTargetUniChar = 0x0000;
+ UChar* contextCharToUnicode= NULL;
+ UBool found;
+ int i;
+ int offset = 0;
+
+ if ((args->converter == NULL) || (target < args->target) || (source < args->source)) {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ data = (UConverterDataISCII*)(args->converter->extraInfo);
+ contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISCII codepoint visited */
+ toUnicodeStatus = (UChar32*)&args->converter->toUnicodeStatus;/* contains the mapping to Unicode of the above codepoint*/
+
+ while (U_SUCCESS(*err) && source<sourceLimit) {
+
+ targetUniChar = missingCharMarker;
+
+ if (target < targetLimit) {
+ sourceChar = (unsigned char)*(source)++;
+
+ /* look at the post-context preform special processing */
+ if (*contextCharToUnicode==ATR) {
+
+ /* If we have ATR in *contextCharToUnicode then we need to change our
+ * state to the Indic Script specified by sourceChar
+ */
+
+ /* check if the sourceChar is supported script range*/
+ if ((uint8_t)(PNJ-sourceChar)<=PNJ-DEV) {
+ data->currentDeltaToUnicode = (uint16_t)(lookupTable[sourceChar & 0x0F][0] * DELTA);
+ data->currentMaskToUnicode = (MaskEnum)lookupTable[sourceChar & 0x0F][1];
+ } else if (sourceChar==DEF) {
+ /* switch back to default */
+ data->currentDeltaToUnicode = data->defDeltaToUnicode;
+ data->currentMaskToUnicode = data->defMaskToUnicode;
+ } else {
+ if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) {
+ /* these are display codes consume and continue */
+ } else {
+ *err =U_ILLEGAL_CHAR_FOUND;
+ /* reset */
+ *contextCharToUnicode=NO_CHAR_MARKER;
+ goto CALLBACK;
+ }
+ }
+
+ /* reset */
+ *contextCharToUnicode=NO_CHAR_MARKER;
+
+ continue;
+
+ } else if (*contextCharToUnicode==EXT) {
+ /* check if sourceChar is in 0xA1-0xEE range */
+ if ((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) {
+ /* We currently support only Anudatta and Devanagari abbreviation sign */
+ if (sourceChar==0xBF || sourceChar == 0xB8) {
+ targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA;
+
+ /* find out if the mapping is valid in this state */
+ if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
+ *contextCharToUnicode= NO_CHAR_MARKER;
+
+ /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
+ if (data->prevToUnicodeStatus) {
+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
+ data->prevToUnicodeStatus = 0x0000;
+ }
+ /* write to target */
+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
+
+ continue;
+ }
+ }
+ /* byte unit is unassigned */
+ targetUniChar = missingCharMarker;
+ *err= U_INVALID_CHAR_FOUND;
+ } else {
+ /* only 0xA1 - 0xEE are legal after EXT char */
+ *contextCharToUnicode= NO_CHAR_MARKER;
+ *err = U_ILLEGAL_CHAR_FOUND;
+ }
+ goto CALLBACK;
+ } else if (*contextCharToUnicode==ISCII_INV) {
+ if (sourceChar==ISCII_HALANT) {
+ targetUniChar = 0x0020; /* replace with space accoding to Indic FAQ */
+ } else {
+ targetUniChar = ZWJ;
+ }
+
+ /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
+ if (data->prevToUnicodeStatus) {
+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
+ data->prevToUnicodeStatus = 0x0000;
+ }
+ /* write to target */
+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
+ /* reset */
+ *contextCharToUnicode=NO_CHAR_MARKER;
+ }
+
+ /* look at the pre-context and perform special processing */
+ switch (sourceChar) {
+ case ISCII_INV:
+ case EXT:
+ case ATR:
+ *contextCharToUnicode = (UChar)sourceChar;
+
+ if (*toUnicodeStatus != missingCharMarker) {
+ /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
+ if (data->prevToUnicodeStatus) {
+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
+ data->prevToUnicodeStatus = 0x0000;
+ }
+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);
+ *toUnicodeStatus = missingCharMarker;
+ }
+ continue;
+ case ISCII_DANDA:
+ /* handle double danda*/
+ if (*contextCharToUnicode== ISCII_DANDA) {
+ targetUniChar = DOUBLE_DANDA;
+ /* clear the context */
+ *contextCharToUnicode = NO_CHAR_MARKER;
+ *toUnicodeStatus = missingCharMarker;
+ } else {
+ GET_MAPPING(sourceChar,targetUniChar,data);
+ *contextCharToUnicode = sourceChar;
+ }
+ break;
+ case ISCII_HALANT:
+ /* handle explicit halant */
+ if (*contextCharToUnicode == ISCII_HALANT) {
+ targetUniChar = ZWNJ;
+ /* clear the context */
+ *contextCharToUnicode = NO_CHAR_MARKER;
+ } else {
+ GET_MAPPING(sourceChar,targetUniChar,data);
+ *contextCharToUnicode = sourceChar;
+ }
+ break;
+ case 0x0A:
+ case 0x0D:
+ data->resetToDefaultToUnicode = TRUE;
+ GET_MAPPING(sourceChar,targetUniChar,data)
+ ;
+ *contextCharToUnicode = sourceChar;
+ break;
+
+ case ISCII_VOWEL_SIGN_E:
+ i=1;
+ found=FALSE;
+ for (; i<vowelSignESpecialCases[0][0]; i++) {
+ U_ASSERT(i<UPRV_LENGTHOF(vowelSignESpecialCases));
+ if (vowelSignESpecialCases[i][0]==(uint8_t)*contextCharToUnicode) {
+ targetUniChar=vowelSignESpecialCases[i][1];
+ found=TRUE;
+ break;
+ }
+ }
+ if (found) {
+ /* find out if the mapping is valid in this state */
+ if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
+ /*targetUniChar += data->currentDeltaToUnicode ;*/
+ *contextCharToUnicode= NO_CHAR_MARKER;
+ *toUnicodeStatus = missingCharMarker;
+ break;
+ }
+ }
+ GET_MAPPING(sourceChar,targetUniChar,data);
+ *contextCharToUnicode = sourceChar;
+ break;
+
+ case ISCII_NUKTA:
+ /* handle soft halant */
+ if (*contextCharToUnicode == ISCII_HALANT) {
+ targetUniChar = ZWJ;
+ /* clear the context */
+ *contextCharToUnicode = NO_CHAR_MARKER;
+ break;
+ } else if (data->currentDeltaToUnicode == PNJ_DELTA && data->contextCharToUnicode == 0xc0) {
+ /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
+ if (data->prevToUnicodeStatus) {
+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
+ data->prevToUnicodeStatus = 0x0000;
+ }
+ /* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi.
+ * In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39).
+ */
+ targetUniChar = PNJ_RRA;
+ WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
+ if (U_SUCCESS(*err)) {
+ targetUniChar = PNJ_SIGN_VIRAMA;
+ WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
+ if (U_SUCCESS(*err)) {
+ targetUniChar = PNJ_HA;
+ WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
+ } else {
+ args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
+ }
+ } else {
+ args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_SIGN_VIRAMA;
+ args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
+ }
+ *toUnicodeStatus = missingCharMarker;
+ data->contextCharToUnicode = NO_CHAR_MARKER;
+ continue;
+ } else {
+ /* try to handle <CHAR> + ISCII_NUKTA special mappings */
+ i=1;
+ found =FALSE;
+ for (; i<nuktaSpecialCases[0][0]; i++) {
+ if (nuktaSpecialCases[i][0]==(uint8_t)
+ *contextCharToUnicode) {
+ targetUniChar=nuktaSpecialCases[i][1];
+ found =TRUE;
+ break;
+ }
+ }
+ if (found) {
+ /* find out if the mapping is valid in this state */
+ if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
+ /*targetUniChar += data->currentDeltaToUnicode ;*/
+ *contextCharToUnicode= NO_CHAR_MARKER;
+ *toUnicodeStatus = missingCharMarker;
+ if (data->currentDeltaToUnicode == PNJ_DELTA) {
+ /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
+ if (data->prevToUnicodeStatus) {
+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
+ data->prevToUnicodeStatus = 0x0000;
+ }
+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
+ continue;
+ }
+ break;
+ }
+ /* else fall through to default */
+ }
+ /* else fall through to default */
+ U_FALLTHROUGH;
+ }
+ default:GET_MAPPING(sourceChar,targetUniChar,data)
+ ;
+ *contextCharToUnicode = sourceChar;
+ break;
+ }
+
+ if (*toUnicodeStatus != missingCharMarker) {
+ /* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */
+ if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && isPNJConsonant(data->prevToUnicodeStatus) &&
+ (*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && ((UChar32)(targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus)) {
+ /* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */
+ offset = (int)(source-args->source - 3);
+ tempTargetUniChar = PNJ_ADHAK; /* This is necessary to avoid some compiler warnings. */
+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,tempTargetUniChar,0,err);
+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,data->prevToUnicodeStatus,0,err);
+ data->prevToUnicodeStatus = 0x0000; /* reset the previous unicode code point */
+ *toUnicodeStatus = missingCharMarker;
+ continue;
+ } else {
+ /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
+ if (data->prevToUnicodeStatus) {
+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
+ data->prevToUnicodeStatus = 0x0000;
+ }
+ /* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script.
+ * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi.
+ */
+ if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && isPNJBindiTippi((*toUnicodeStatus + PNJ_DELTA))) {
+ targetUniChar = PNJ_TIPPI - PNJ_DELTA;
+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err);
+ } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && isPNJConsonant((*toUnicodeStatus + PNJ_DELTA))) {
+ /* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */
+ data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA;
+ } else {
+ /* write the previously mapped codepoint */
+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);
+ }
+ }
+ *toUnicodeStatus = missingCharMarker;
+ }
+
+ if (targetUniChar != missingCharMarker) {
+ /* now save the targetUniChar for delayed write */
+ *toUnicodeStatus = (UChar) targetUniChar;
+ if (data->resetToDefaultToUnicode==TRUE) {
+ data->currentDeltaToUnicode = data->defDeltaToUnicode;
+ data->currentMaskToUnicode = data->defMaskToUnicode;
+ data->resetToDefaultToUnicode=FALSE;
+ }
+ } else {
+
+ /* we reach here only if targetUniChar == missingCharMarker
+ * so assign codes to reason and err
+ */
+ *err = U_INVALID_CHAR_FOUND;
+CALLBACK:
+ args->converter->toUBytes[0] = (uint8_t) sourceChar;
+ args->converter->toULength = 1;
+ break;
+ }
+
+ } else {
+ *err =U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+
+ if (U_SUCCESS(*err) && args->flush && source == sourceLimit) {
+ /* end of the input stream */
+ UConverter *cnv = args->converter;
+
+ if (*contextCharToUnicode==ATR || *contextCharToUnicode==EXT || *contextCharToUnicode==ISCII_INV) {
+ /* set toUBytes[] */
+ cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode;
+ cnv->toULength = 1;
+
+ /* avoid looping on truncated sequences */
+ *contextCharToUnicode = NO_CHAR_MARKER;
+ } else {
+ cnv->toULength = 0;
+ }
+
+ if (*toUnicodeStatus != missingCharMarker) {
+ /* output a remaining target character */
+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),*toUnicodeStatus,data->currentDeltaToUnicode,err);
+ *toUnicodeStatus = missingCharMarker;
+ }
+ }
+
+ args->target = target;
+ args->source = source;
+}
+
+/* structure for SafeClone calculations */
+struct cloneISCIIStruct {
+ UConverter cnv;
+ UConverterDataISCII mydata;
+};
+
+static UConverter * U_CALLCONV
+_ISCII_SafeClone(const UConverter *cnv,
+ void *stackBuffer,
+ int32_t *pBufferSize,
+ UErrorCode *status)
+{
+ struct cloneISCIIStruct * localClone;
+ int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct);
+
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+
+ if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
+ *pBufferSize = bufferSizeNeeded;
+ return 0;
+ }
+
+ localClone = (struct cloneISCIIStruct *)stackBuffer;
+ /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
+
+ uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII));
+ localClone->cnv.extraInfo = &localClone->mydata;
+ localClone->cnv.isExtraLocal = TRUE;
+
+ return &localClone->cnv;
+}
+
+static void U_CALLCONV
+_ISCIIGetUnicodeSet(const UConverter *cnv,
+ const USetAdder *sa,
+ UConverterUnicodeSet which,
+ UErrorCode *pErrorCode)
+{
+ (void)cnv;
+ (void)which;
+ (void)pErrorCode;
+ int32_t idx, script;
+ uint8_t mask;
+
+ /* Since all ISCII versions allow switching to other ISCII
+ scripts, we add all roundtrippable characters to this set. */
+ sa->addRange(sa->set, 0, ASCII_END);
+ for (script = DEVANAGARI; script <= MALAYALAM; script++) {
+ mask = (uint8_t)(lookupInitialData[script].maskEnum);
+ for (idx = 0; idx < DELTA; idx++) {
+ /* added check for TELUGU character */
+ if ((validityTable[idx] & mask) || (script==TELUGU && idx==0x31)) {
+ sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN);
+ }
+ }
+ }
+ sa->add(sa->set, DANDA);
+ sa->add(sa->set, DOUBLE_DANDA);
+ sa->add(sa->set, ZWNJ);
+ sa->add(sa->set, ZWJ);
+}
+U_CDECL_END
+static const UConverterImpl _ISCIIImpl={
+
+ UCNV_ISCII,
+
+ NULL,
+ NULL,
+
+ _ISCIIOpen,
+ _ISCIIClose,
+ _ISCIIReset,
+
+ UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
+ UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
+ UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
+ UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
+ NULL,
+
+ NULL,
+ _ISCIIgetName,
+ NULL,
+ _ISCII_SafeClone,
+ _ISCIIGetUnicodeSet,
+ NULL,
+ NULL
+};
+
+static const UConverterStaticData _ISCIIStaticData={
+ sizeof(UConverterStaticData),
+ "ISCII",
+ 0,
+ UCNV_IBM,
+ UCNV_ISCII,
+ 1,
+ 4,
+ { 0x1a, 0, 0, 0 },
+ 0x1,
+ FALSE,
+ FALSE,
+ 0x0,
+ 0x0,
+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */
+
+};
+
+const UConverterSharedData _ISCIIData=
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISCIIStaticData, &_ISCIIImpl);
+
+#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
diff --git a/thirdparty/icu4c/common/ucnvlat1.cpp b/thirdparty/icu4c/common/ucnvlat1.cpp
new file mode 100644
index 0000000000..358bc0caa2
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnvlat1.cpp
@@ -0,0 +1,756 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2000-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* file name: ucnvlat1.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2000feb07
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/ucnv.h"
+#include "unicode/uset.h"
+#include "unicode/utf8.h"
+#include "ucnv_bld.h"
+#include "ucnv_cnv.h"
+#include "ustr_imp.h"
+
+/* control optimizations according to the platform */
+#define LATIN1_UNROLL_FROM_UNICODE 1
+
+/* ISO 8859-1 --------------------------------------------------------------- */
+
+/* This is a table-less and callback-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
+U_CDECL_BEGIN
+static void U_CALLCONV
+_Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ const uint8_t *source;
+ UChar *target;
+ int32_t targetCapacity, length;
+ int32_t *offsets;
+
+ int32_t sourceIndex;
+
+ /* set up the local pointers */
+ source=(const uint8_t *)pArgs->source;
+ target=pArgs->target;
+ targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
+ offsets=pArgs->offsets;
+
+ sourceIndex=0;
+
+ /*
+ * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
+ * for the minimum of the sourceLength and targetCapacity
+ */
+ length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
+ if(length<=targetCapacity) {
+ targetCapacity=length;
+ } else {
+ /* target will be full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ length=targetCapacity;
+ }
+
+ if(targetCapacity>=8) {
+ /* This loop is unrolled for speed and improved pipelining. */
+ int32_t count, loops;
+
+ loops=count=targetCapacity>>3;
+ length=targetCapacity&=0x7;
+ do {
+ target[0]=source[0];
+ target[1]=source[1];
+ target[2]=source[2];
+ target[3]=source[3];
+ target[4]=source[4];
+ target[5]=source[5];
+ target[6]=source[6];
+ target[7]=source[7];
+ target+=8;
+ source+=8;
+ } while(--count>0);
+
+ if(offsets!=NULL) {
+ do {
+ offsets[0]=sourceIndex++;
+ offsets[1]=sourceIndex++;
+ offsets[2]=sourceIndex++;
+ offsets[3]=sourceIndex++;
+ offsets[4]=sourceIndex++;
+ offsets[5]=sourceIndex++;
+ offsets[6]=sourceIndex++;
+ offsets[7]=sourceIndex++;
+ offsets+=8;
+ } while(--loops>0);
+ }
+ }
+
+ /* conversion loop */
+ while(targetCapacity>0) {
+ *target++=*source++;
+ --targetCapacity;
+ }
+
+ /* write back the updated pointers */
+ pArgs->source=(const char *)source;
+ pArgs->target=target;
+
+ /* set offsets */
+ if(offsets!=NULL) {
+ while(length>0) {
+ *offsets++=sourceIndex++;
+ --length;
+ }
+ pArgs->offsets=offsets;
+ }
+}
+
+/* This is a table-less and callback-less version of ucnv_MBCSSingleGetNextUChar(). */
+static UChar32 U_CALLCONV
+_Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ const uint8_t *source=(const uint8_t *)pArgs->source;
+ if(source<(const uint8_t *)pArgs->sourceLimit) {
+ pArgs->source=(const char *)(source+1);
+ return *source;
+ }
+
+ /* no output because of empty input */
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0xffff;
+}
+
+/* This is a table-less version of ucnv_MBCSSingleFromBMPWithOffsets(). */
+static void U_CALLCONV
+_Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ const UChar *source, *sourceLimit;
+ uint8_t *target, *oldTarget;
+ int32_t targetCapacity, length;
+ int32_t *offsets;
+
+ UChar32 cp;
+ UChar c, max;
+
+ int32_t sourceIndex;
+
+ /* set up the local pointers */
+ cnv=pArgs->converter;
+ source=pArgs->source;
+ sourceLimit=pArgs->sourceLimit;
+ target=oldTarget=(uint8_t *)pArgs->target;
+ targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
+ offsets=pArgs->offsets;
+
+ if(cnv->sharedData==&_Latin1Data) {
+ max=0xff; /* Latin-1 */
+ } else {
+ max=0x7f; /* US-ASCII */
+ }
+
+ /* get the converter state from UConverter */
+ cp=cnv->fromUChar32;
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex= cp==0 ? 0 : -1;
+
+ /*
+ * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
+ * for the minimum of the sourceLength and targetCapacity
+ */
+ length=(int32_t)(sourceLimit-source);
+ if(length<targetCapacity) {
+ targetCapacity=length;
+ }
+
+ /* conversion loop */
+ if(cp!=0 && targetCapacity>0) {
+ goto getTrail;
+ }
+
+#if LATIN1_UNROLL_FROM_UNICODE
+ /* unroll the loop with the most common case */
+ if(targetCapacity>=16) {
+ int32_t count, loops;
+ UChar u, oredChars;
+
+ loops=count=targetCapacity>>4;
+ do {
+ oredChars=u=*source++;
+ *target++=(uint8_t)u;
+ oredChars|=u=*source++;
+ *target++=(uint8_t)u;
+ oredChars|=u=*source++;
+ *target++=(uint8_t)u;
+ oredChars|=u=*source++;
+ *target++=(uint8_t)u;
+ oredChars|=u=*source++;
+ *target++=(uint8_t)u;
+ oredChars|=u=*source++;
+ *target++=(uint8_t)u;
+ oredChars|=u=*source++;
+ *target++=(uint8_t)u;
+ oredChars|=u=*source++;
+ *target++=(uint8_t)u;
+ oredChars|=u=*source++;
+ *target++=(uint8_t)u;
+ oredChars|=u=*source++;
+ *target++=(uint8_t)u;
+ oredChars|=u=*source++;
+ *target++=(uint8_t)u;
+ oredChars|=u=*source++;
+ *target++=(uint8_t)u;
+ oredChars|=u=*source++;
+ *target++=(uint8_t)u;
+ oredChars|=u=*source++;
+ *target++=(uint8_t)u;
+ oredChars|=u=*source++;
+ *target++=(uint8_t)u;
+ oredChars|=u=*source++;
+ *target++=(uint8_t)u;
+
+ /* were all 16 entries really valid? */
+ if(oredChars>max) {
+ /* no, return to the first of these 16 */
+ source-=16;
+ target-=16;
+ break;
+ }
+ } while(--count>0);
+ count=loops-count;
+ targetCapacity-=16*count;
+
+ if(offsets!=NULL) {
+ oldTarget+=16*count;
+ while(count>0) {
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ --count;
+ }
+ }
+ }
+#endif
+
+ /* conversion loop */
+ c=0;
+ while(targetCapacity>0 && (c=*source++)<=max) {
+ /* convert the Unicode code point */
+ *target++=(uint8_t)c;
+ --targetCapacity;
+ }
+
+ if(c>max) {
+ cp=c;
+ if(!U_IS_SURROGATE(cp)) {
+ /* callback(unassigned) */
+ } else if(U_IS_SURROGATE_LEAD(cp)) {
+getTrail:
+ if(source<sourceLimit) {
+ /* test the following code unit */
+ UChar trail=*source;
+ if(U16_IS_TRAIL(trail)) {
+ ++source;
+ cp=U16_GET_SUPPLEMENTARY(cp, trail);
+ /* this codepage does not map supplementary code points */
+ /* callback(unassigned) */
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ }
+ } else {
+ /* no more input */
+ cnv->fromUChar32=cp;
+ goto noMoreInput;
+ }
+ } else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ }
+
+ *pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND;
+ cnv->fromUChar32=cp;
+ }
+noMoreInput:
+
+ /* set offsets since the start */
+ if(offsets!=NULL) {
+ size_t count=target-oldTarget;
+ while(count>0) {
+ *offsets++=sourceIndex++;
+ --count;
+ }
+ }
+
+ if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+
+ /* write back the updated pointers */
+ pArgs->source=source;
+ pArgs->target=(char *)target;
+ pArgs->offsets=offsets;
+}
+
+/* Convert UTF-8 to Latin-1. Adapted from ucnv_SBCSFromUTF8(). */
+static void U_CALLCONV
+ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
+ UConverterToUnicodeArgs *pToUArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *utf8;
+ const uint8_t *source, *sourceLimit;
+ uint8_t *target;
+ int32_t targetCapacity;
+
+ UChar32 c;
+ uint8_t b, t1;
+
+ /* set up the local pointers */
+ utf8=pToUArgs->converter;
+ source=(uint8_t *)pToUArgs->source;
+ sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
+ target=(uint8_t *)pFromUArgs->target;
+ targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
+
+ /* get the converter state from the UTF-8 UConverter */
+ if (utf8->toULength > 0) {
+ c=(UChar32)utf8->toUnicodeStatus;
+ } else {
+ c = 0;
+ }
+ if(c!=0 && source<sourceLimit) {
+ if(targetCapacity==0) {
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ return;
+ } else if(c>=0xc2 && c<=0xc3 && (t1=(uint8_t)(*source-0x80)) <= 0x3f) {
+ ++source;
+ *target++=(uint8_t)(((c&3)<<6)|t1);
+ --targetCapacity;
+
+ utf8->toUnicodeStatus=0;
+ utf8->toULength=0;
+ } else {
+ /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
+ *pErrorCode=U_USING_DEFAULT_WARNING;
+ return;
+ }
+ }
+
+ /*
+ * Make sure that the last byte sequence before sourceLimit is complete
+ * or runs into a lead byte.
+ * In the conversion loop compare source with sourceLimit only once
+ * per multi-byte character.
+ * For Latin-1, adjust sourceLimit only for 1 trail byte because
+ * the conversion loop handles at most 2-byte sequences.
+ */
+ if(source<sourceLimit && U8_IS_LEAD(*(sourceLimit-1))) {
+ --sourceLimit;
+ }
+
+ /* conversion loop */
+ while(source<sourceLimit) {
+ if(targetCapacity>0) {
+ b=*source++;
+ if(U8_IS_SINGLE(b)) {
+ /* convert ASCII */
+ *target++=(uint8_t)b;
+ --targetCapacity;
+ } else if( /* handle U+0080..U+00FF inline */
+ b>=0xc2 && b<=0xc3 &&
+ (t1=(uint8_t)(*source-0x80)) <= 0x3f
+ ) {
+ ++source;
+ *target++=(uint8_t)(((b&3)<<6)|t1);
+ --targetCapacity;
+ } else {
+ /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
+ pToUArgs->source=(char *)(source-1);
+ pFromUArgs->target=(char *)target;
+ *pErrorCode=U_USING_DEFAULT_WARNING;
+ return;
+ }
+ } else {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+
+ /*
+ * The sourceLimit may have been adjusted before the conversion loop
+ * to stop before a truncated sequence.
+ * If so, then collect the truncated sequence now.
+ * For Latin-1, there is at most exactly one lead byte because of the
+ * smaller sourceLimit adjustment logic.
+ */
+ if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
+ utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++;
+ utf8->toULength=1;
+ utf8->mode=U8_COUNT_BYTES(b);
+ }
+
+ /* write back the updated pointers */
+ pToUArgs->source=(char *)source;
+ pFromUArgs->target=(char *)target;
+}
+
+static void U_CALLCONV
+_Latin1GetUnicodeSet(const UConverter *cnv,
+ const USetAdder *sa,
+ UConverterUnicodeSet which,
+ UErrorCode *pErrorCode) {
+ (void)cnv;
+ (void)which;
+ (void)pErrorCode;
+ sa->addRange(sa->set, 0, 0xff);
+}
+U_CDECL_END
+
+
+static const UConverterImpl _Latin1Impl={
+ UCNV_LATIN_1,
+
+ NULL,
+ NULL,
+
+ NULL,
+ NULL,
+ NULL,
+
+ _Latin1ToUnicodeWithOffsets,
+ _Latin1ToUnicodeWithOffsets,
+ _Latin1FromUnicodeWithOffsets,
+ _Latin1FromUnicodeWithOffsets,
+ _Latin1GetNextUChar,
+
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ _Latin1GetUnicodeSet,
+
+ NULL,
+ ucnv_Latin1FromUTF8
+};
+
+static const UConverterStaticData _Latin1StaticData={
+ sizeof(UConverterStaticData),
+ "ISO-8859-1",
+ 819, UCNV_IBM, UCNV_LATIN_1, 1, 1,
+ { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
+ 0,
+ 0,
+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
+};
+
+const UConverterSharedData _Latin1Data=
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Latin1StaticData, &_Latin1Impl);
+
+/* US-ASCII ----------------------------------------------------------------- */
+
+U_CDECL_BEGIN
+/* This is a table-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
+static void U_CALLCONV
+_ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ const uint8_t *source, *sourceLimit;
+ UChar *target, *oldTarget;
+ int32_t targetCapacity, length;
+ int32_t *offsets;
+
+ int32_t sourceIndex;
+
+ uint8_t c;
+
+ /* set up the local pointers */
+ source=(const uint8_t *)pArgs->source;
+ sourceLimit=(const uint8_t *)pArgs->sourceLimit;
+ target=oldTarget=pArgs->target;
+ targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
+ offsets=pArgs->offsets;
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex=0;
+
+ /*
+ * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
+ * for the minimum of the sourceLength and targetCapacity
+ */
+ length=(int32_t)(sourceLimit-source);
+ if(length<targetCapacity) {
+ targetCapacity=length;
+ }
+
+ if(targetCapacity>=8) {
+ /* This loop is unrolled for speed and improved pipelining. */
+ int32_t count, loops;
+ UChar oredChars;
+
+ loops=count=targetCapacity>>3;
+ do {
+ oredChars=target[0]=source[0];
+ oredChars|=target[1]=source[1];
+ oredChars|=target[2]=source[2];
+ oredChars|=target[3]=source[3];
+ oredChars|=target[4]=source[4];
+ oredChars|=target[5]=source[5];
+ oredChars|=target[6]=source[6];
+ oredChars|=target[7]=source[7];
+
+ /* were all 16 entries really valid? */
+ if(oredChars>0x7f) {
+ /* no, return to the first of these 16 */
+ break;
+ }
+ source+=8;
+ target+=8;
+ } while(--count>0);
+ count=loops-count;
+ targetCapacity-=count*8;
+
+ if(offsets!=NULL) {
+ oldTarget+=count*8;
+ while(count>0) {
+ offsets[0]=sourceIndex++;
+ offsets[1]=sourceIndex++;
+ offsets[2]=sourceIndex++;
+ offsets[3]=sourceIndex++;
+ offsets[4]=sourceIndex++;
+ offsets[5]=sourceIndex++;
+ offsets[6]=sourceIndex++;
+ offsets[7]=sourceIndex++;
+ offsets+=8;
+ --count;
+ }
+ }
+ }
+
+ /* conversion loop */
+ c=0;
+ while(targetCapacity>0 && (c=*source++)<=0x7f) {
+ *target++=c;
+ --targetCapacity;
+ }
+
+ if(c>0x7f) {
+ /* callback(illegal); copy the current bytes to toUBytes[] */
+ UConverter *cnv=pArgs->converter;
+ cnv->toUBytes[0]=c;
+ cnv->toULength=1;
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ } else if(source<sourceLimit && target>=pArgs->targetLimit) {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+
+ /* set offsets since the start */
+ if(offsets!=NULL) {
+ size_t count=target-oldTarget;
+ while(count>0) {
+ *offsets++=sourceIndex++;
+ --count;
+ }
+ }
+
+ /* write back the updated pointers */
+ pArgs->source=(const char *)source;
+ pArgs->target=target;
+ pArgs->offsets=offsets;
+}
+
+/* This is a table-less version of ucnv_MBCSSingleGetNextUChar(). */
+static UChar32 U_CALLCONV
+_ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ const uint8_t *source;
+ uint8_t b;
+
+ source=(const uint8_t *)pArgs->source;
+ if(source<(const uint8_t *)pArgs->sourceLimit) {
+ b=*source++;
+ pArgs->source=(const char *)source;
+ if(b<=0x7f) {
+ return b;
+ } else {
+ UConverter *cnv=pArgs->converter;
+ cnv->toUBytes[0]=b;
+ cnv->toULength=1;
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ return 0xffff;
+ }
+ }
+
+ /* no output because of empty input */
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0xffff;
+}
+
+/* "Convert" UTF-8 to US-ASCII: Validate and copy. */
+static void U_CALLCONV
+ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
+ UConverterToUnicodeArgs *pToUArgs,
+ UErrorCode *pErrorCode) {
+ const uint8_t *source, *sourceLimit;
+ uint8_t *target;
+ int32_t targetCapacity, length;
+
+ uint8_t c;
+
+ if(pToUArgs->converter->toULength > 0) {
+ /* no handling of partial UTF-8 characters here, fall back to pivoting */
+ *pErrorCode=U_USING_DEFAULT_WARNING;
+ return;
+ }
+
+ /* set up the local pointers */
+ source=(const uint8_t *)pToUArgs->source;
+ sourceLimit=(const uint8_t *)pToUArgs->sourceLimit;
+ target=(uint8_t *)pFromUArgs->target;
+ targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
+
+ /*
+ * since the conversion here is 1:1 uint8_t:uint8_t, we need only one counter
+ * for the minimum of the sourceLength and targetCapacity
+ */
+ length=(int32_t)(sourceLimit-source);
+ if(length<targetCapacity) {
+ targetCapacity=length;
+ }
+
+ /* unroll the loop with the most common case */
+ if(targetCapacity>=16) {
+ int32_t count, loops;
+ uint8_t oredChars;
+
+ loops=count=targetCapacity>>4;
+ do {
+ oredChars=*target++=*source++;
+ oredChars|=*target++=*source++;
+ oredChars|=*target++=*source++;
+ oredChars|=*target++=*source++;
+ oredChars|=*target++=*source++;
+ oredChars|=*target++=*source++;
+ oredChars|=*target++=*source++;
+ oredChars|=*target++=*source++;
+ oredChars|=*target++=*source++;
+ oredChars|=*target++=*source++;
+ oredChars|=*target++=*source++;
+ oredChars|=*target++=*source++;
+ oredChars|=*target++=*source++;
+ oredChars|=*target++=*source++;
+ oredChars|=*target++=*source++;
+ oredChars|=*target++=*source++;
+
+ /* were all 16 entries really valid? */
+ if(oredChars>0x7f) {
+ /* no, return to the first of these 16 */
+ source-=16;
+ target-=16;
+ break;
+ }
+ } while(--count>0);
+ count=loops-count;
+ targetCapacity-=16*count;
+ }
+
+ /* conversion loop */
+ c=0;
+ while(targetCapacity>0 && (c=*source)<=0x7f) {
+ ++source;
+ *target++=c;
+ --targetCapacity;
+ }
+
+ if(c>0x7f) {
+ /* non-ASCII character, handle in standard converter */
+ *pErrorCode=U_USING_DEFAULT_WARNING;
+ } else if(source<sourceLimit && target>=(const uint8_t *)pFromUArgs->targetLimit) {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+
+ /* write back the updated pointers */
+ pToUArgs->source=(const char *)source;
+ pFromUArgs->target=(char *)target;
+}
+
+static void U_CALLCONV
+_ASCIIGetUnicodeSet(const UConverter *cnv,
+ const USetAdder *sa,
+ UConverterUnicodeSet which,
+ UErrorCode *pErrorCode) {
+ (void)cnv;
+ (void)which;
+ (void)pErrorCode;
+ sa->addRange(sa->set, 0, 0x7f);
+}
+U_CDECL_END
+
+static const UConverterImpl _ASCIIImpl={
+ UCNV_US_ASCII,
+
+ NULL,
+ NULL,
+
+ NULL,
+ NULL,
+ NULL,
+
+ _ASCIIToUnicodeWithOffsets,
+ _ASCIIToUnicodeWithOffsets,
+ _Latin1FromUnicodeWithOffsets,
+ _Latin1FromUnicodeWithOffsets,
+ _ASCIIGetNextUChar,
+
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ _ASCIIGetUnicodeSet,
+
+ NULL,
+ ucnv_ASCIIFromUTF8
+};
+
+static const UConverterStaticData _ASCIIStaticData={
+ sizeof(UConverterStaticData),
+ "US-ASCII",
+ 367, UCNV_IBM, UCNV_US_ASCII, 1, 1,
+ { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
+ 0,
+ 0,
+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
+};
+
+const UConverterSharedData _ASCIIData=
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ASCIIStaticData, &_ASCIIImpl);
+
+#endif
diff --git a/thirdparty/icu4c/common/ucnvmbcs.cpp b/thirdparty/icu4c/common/ucnvmbcs.cpp
new file mode 100644
index 0000000000..ca9b0a335a
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnvmbcs.cpp
@@ -0,0 +1,5723 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2000-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: ucnvmbcs.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2000jul03
+* created by: Markus W. Scherer
+*
+* The current code in this file replaces the previous implementation
+* of conversion code from multi-byte codepages to Unicode and back.
+* This implementation supports the following:
+* - legacy variable-length codepages with up to 4 bytes per character
+* - all Unicode code points (up to 0x10ffff)
+* - efficient distinction of unassigned vs. illegal byte sequences
+* - it is possible in fromUnicode() to directly deal with simple
+* stateful encodings (used for EBCDIC_STATEFUL)
+* - it is possible to convert Unicode code points
+* to a single zero byte (but not as a fallback except for SBCS)
+*
+* Remaining limitations in fromUnicode:
+* - byte sequences must not have leading zero bytes
+* - except for SBCS codepages: no fallback mapping from Unicode to a zero byte
+* - limitation to up to 4 bytes per character
+*
+* ICU 2.8 (late 2003) adds a secondary data structure which lifts some of these
+* limitations and adds m:n character mappings and other features.
+* See ucnv_ext.h for details.
+*
+* Change history:
+*
+* 5/6/2001 Ram Moved MBCS_SINGLE_RESULT_FROM_U,MBCS_STAGE_2_FROM_U,
+* MBCS_VALUE_2_FROM_STAGE_2, MBCS_VALUE_4_FROM_STAGE_2
+* macros to ucnvmbcs.h file
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
+
+#include "unicode/ucnv.h"
+#include "unicode/ucnv_cb.h"
+#include "unicode/udata.h"
+#include "unicode/uset.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+#include "ucnv_bld.h"
+#include "ucnvmbcs.h"
+#include "ucnv_ext.h"
+#include "ucnv_cnv.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "umutex.h"
+#include "ustr_imp.h"
+
+/* control optimizations according to the platform */
+#define MBCS_UNROLL_SINGLE_TO_BMP 1
+#define MBCS_UNROLL_SINGLE_FROM_BMP 0
+
+/*
+ * _MBCSHeader versions 5.3 & 4.3
+ * (Note that the _MBCSHeader version is in addition to the converter formatVersion.)
+ *
+ * This version is optional. Version 5 is used for incompatible data format changes.
+ * makeconv will continue to generate version 4 files if possible.
+ *
+ * Changes from version 4:
+ *
+ * The main difference is an additional _MBCSHeader field with
+ * - the length (number of uint32_t) of the _MBCSHeader
+ * - flags for further incompatible data format changes
+ * - flags for further, backward compatible data format changes
+ *
+ * The MBCS_OPT_FROM_U flag indicates that most of the fromUnicode data is omitted from
+ * the file and needs to be reconstituted at load time.
+ * This requires a utf8Friendly format with an additional mbcsIndex table for fast
+ * (and UTF-8-friendly) fromUnicode conversion for Unicode code points up to maxFastUChar.
+ * (For details about these structures see below, and see ucnvmbcs.h.)
+ *
+ * utf8Friendly also implies that the fromUnicode mappings are stored in ascending order
+ * of the Unicode code points. (This requires that the .ucm file has the |0 etc.
+ * precision markers for all mappings.)
+ *
+ * All fallbacks have been moved to the extension table, leaving only roundtrips in the
+ * omitted data that can be reconstituted from the toUnicode data.
+ *
+ * Of the stage 2 table, the part corresponding to maxFastUChar and below is omitted.
+ * With only roundtrip mappings in the base fromUnicode data, this part is fully
+ * redundant with the mbcsIndex and will be reconstituted from that (also using the
+ * stage 1 table which contains the information about how stage 2 was compacted).
+ *
+ * The rest of the stage 2 table, the part for code points above maxFastUChar,
+ * is stored in the file and will be appended to the reconstituted part.
+ *
+ * The entire fromUBytes array is omitted from the file and will be reconstitued.
+ * This is done by enumerating all toUnicode roundtrip mappings, performing
+ * each mapping (using the stage 1 and reconstituted stage 2 tables) and
+ * writing instead of reading the byte values.
+ *
+ * _MBCSHeader version 4.3
+ *
+ * Change from version 4.2:
+ * - Optional utf8Friendly data structures, with 64-entry stage 3 block
+ * allocation for parts of the BMP, and an additional mbcsIndex in non-SBCS
+ * files which can be used instead of stages 1 & 2.
+ * Faster lookups for roundtrips from most commonly used characters,
+ * and lookups from UTF-8 byte sequences with a natural bit distribution.
+ * See ucnvmbcs.h for more details.
+ *
+ * Change from version 4.1:
+ * - Added an optional extension table structure at the end of the .cnv file.
+ * It is present if the upper bits of the header flags field contains a non-zero
+ * byte offset to it.
+ * Files that contain only a conversion table and no base table
+ * use the special outputType MBCS_OUTPUT_EXT_ONLY.
+ * These contain the base table name between the MBCS header and the extension
+ * data.
+ *
+ * Change from version 4.0:
+ * - Replace header.reserved with header.fromUBytesLength so that all
+ * fields in the data have length.
+ *
+ * Changes from version 3 (for performance improvements):
+ * - new bit distribution for state table entries
+ * - reordered action codes
+ * - new data structure for single-byte fromUnicode
+ * + stage 2 only contains indexes
+ * + stage 3 stores 16 bits per character with classification bits 15..8
+ * - no multiplier for stage 1 entries
+ * - stage 2 for non-single-byte codepages contains the index and the flags in
+ * one 32-bit value
+ * - 2-byte and 4-byte fromUnicode results are stored directly as 16/32-bit integers
+ *
+ * For more details about old versions of the MBCS data structure, see
+ * the corresponding versions of this file.
+ *
+ * Converting stateless codepage data ---------------------------------------***
+ * (or codepage data with simple states) to Unicode.
+ *
+ * Data structure and algorithm for converting from complex legacy codepages
+ * to Unicode. (Designed before 2000-may-22.)
+ *
+ * The basic idea is that the structure of legacy codepages can be described
+ * with state tables.
+ * When reading a byte stream, each input byte causes a state transition.
+ * Some transitions result in the output of a code point, some result in
+ * "unassigned" or "illegal" output.
+ * This is used here for character conversion.
+ *
+ * The data structure begins with a state table consisting of a row
+ * per state, with 256 entries (columns) per row for each possible input
+ * byte value.
+ * Each entry is 32 bits wide, with two formats distinguished by
+ * the sign bit (bit 31):
+ *
+ * One format for transitional entries (bit 31 not set) for non-final bytes, and
+ * one format for final entries (bit 31 set).
+ * Both formats contain the number of the next state in the same bit
+ * positions.
+ * State 0 is the initial state.
+ *
+ * Most of the time, the offset values of subsequent states are added
+ * up to a scalar value. This value will eventually be the index of
+ * the Unicode code point in a table that follows the state table.
+ * The effect is that the code points for final state table rows
+ * are contiguous. The code points of final state rows follow each other
+ * in the order of the references to those final states by previous
+ * states, etc.
+ *
+ * For some terminal states, the offset is itself the output Unicode
+ * code point (16 bits for a BMP code point or 20 bits for a supplementary
+ * code point (stored as code point minus 0x10000 so that 20 bits are enough).
+ * For others, the code point in the Unicode table is stored with either
+ * one or two code units: one for BMP code points, two for a pair of
+ * surrogates.
+ * All code points for a final state entry take up the same number of code
+ * units, regardless of whether they all actually _use_ the same number
+ * of code units. This is necessary for simple array access.
+ *
+ * An additional feature comes in with what in ICU is called "fallback"
+ * mappings:
+ *
+ * In addition to round-trippable, precise, 1:1 mappings, there are often
+ * mappings defined between similar, though not the same, characters.
+ * Typically, such mappings occur only in fromUnicode mapping tables because
+ * Unicode has a superset repertoire of most other codepages. However, it
+ * is possible to provide such mappings in the toUnicode tables, too.
+ * In this case, the fallback mappings are partly integrated into the
+ * general state tables because the structure of the encoding includes their
+ * byte sequences.
+ * For final entries in an initial state, fallback mappings are stored in
+ * the entry itself like with roundtrip mappings.
+ * For other final entries, they are stored in the code units table if
+ * the entry is for a pair of code units.
+ * For single-unit results in the code units table, there is no space to
+ * alternatively hold a fallback mapping; in this case, the code unit
+ * is stored as U+fffe (unassigned), and the fallback mapping needs to
+ * be looked up by the scalar offset value in a separate table.
+ *
+ * "Unassigned" state entries really mean "structurally unassigned",
+ * i.e., such a byte sequence will never have a mapping result.
+ *
+ * The interpretation of the bits in each entry is as follows:
+ *
+ * Bit 31 not set, not a terminal entry ("transitional"):
+ * 30..24 next state
+ * 23..0 offset delta, to be added up
+ *
+ * Bit 31 set, terminal ("final") entry:
+ * 30..24 next state (regardless of action code)
+ * 23..20 action code:
+ * action codes 0 and 1 result in precise-mapping Unicode code points
+ * 0 valid byte sequence
+ * 19..16 not used, 0
+ * 15..0 16-bit Unicode BMP code point
+ * never U+fffe or U+ffff
+ * 1 valid byte sequence
+ * 19..0 20-bit Unicode supplementary code point
+ * never U+fffe or U+ffff
+ *
+ * action codes 2 and 3 result in fallback (unidirectional-mapping) Unicode code points
+ * 2 valid byte sequence (fallback)
+ * 19..16 not used, 0
+ * 15..0 16-bit Unicode BMP code point as fallback result
+ * 3 valid byte sequence (fallback)
+ * 19..0 20-bit Unicode supplementary code point as fallback result
+ *
+ * action codes 4 and 5 may result in roundtrip/fallback/unassigned/illegal results
+ * depending on the code units they result in
+ * 4 valid byte sequence
+ * 19..9 not used, 0
+ * 8..0 final offset delta
+ * pointing to one 16-bit code unit which may be
+ * fffe unassigned -- look for a fallback for this offset
+ * ffff illegal
+ * 5 valid byte sequence
+ * 19..9 not used, 0
+ * 8..0 final offset delta
+ * pointing to two 16-bit code units
+ * (typically UTF-16 surrogates)
+ * the result depends on the first code unit as follows:
+ * 0000..d7ff roundtrip BMP code point (1st alone)
+ * d800..dbff roundtrip surrogate pair (1st, 2nd)
+ * dc00..dfff fallback surrogate pair (1st-400, 2nd)
+ * e000 roundtrip BMP code point (2nd alone)
+ * e001 fallback BMP code point (2nd alone)
+ * fffe unassigned
+ * ffff illegal
+ * (the final offset deltas are at most 255 * 2,
+ * times 2 because of storing code unit pairs)
+ *
+ * 6 unassigned byte sequence
+ * 19..16 not used, 0
+ * 15..0 16-bit Unicode BMP code point U+fffe (new with version 2)
+ * this does not contain a final offset delta because the main
+ * purpose of this action code is to save scalar offset values;
+ * therefore, fallback values cannot be assigned to byte
+ * sequences that result in this action code
+ * 7 illegal byte sequence
+ * 19..16 not used, 0
+ * 15..0 16-bit Unicode BMP code point U+ffff (new with version 2)
+ * 8 state change only
+ * 19..0 not used, 0
+ * useful for state changes in simple stateful encodings,
+ * at Shift-In/Shift-Out codes
+ *
+ *
+ * 9..15 reserved for future use
+ * current implementations will only perform a state change
+ * and ignore bits 19..0
+ *
+ * An encoding with contiguous ranges of unassigned byte sequences, like
+ * Shift-JIS and especially EUC-TW, can be stored efficiently by having
+ * at least two states for the trail bytes:
+ * One trail byte state that results in code points, and one that only
+ * has "unassigned" and "illegal" terminal states.
+ *
+ * Note: partly by accident, this data structure supports simple stateful
+ * encodings without any additional logic.
+ * Currently, only simple Shift-In/Shift-Out schemes are handled with
+ * appropriate state tables (especially EBCDIC_STATEFUL!).
+ *
+ * MBCS version 2 added:
+ * unassigned and illegal action codes have U+fffe and U+ffff
+ * instead of unused bits; this is useful for _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP()
+ *
+ * Converting from Unicode to codepage bytes --------------------------------***
+ *
+ * The conversion data structure for fromUnicode is designed for the known
+ * structure of Unicode. It maps from 21-bit code points (0..0x10ffff) to
+ * a sequence of 1..4 bytes, in addition to a flag that indicates if there is
+ * a roundtrip mapping.
+ *
+ * The lookup is done with a 3-stage trie, using 11/6/4 bits for stage 1/2/3
+ * like in the character properties table.
+ * The beginning of the trie is at offsetFromUTable, the beginning of stage 3
+ * with the resulting bytes is at offsetFromUBytes.
+ *
+ * Beginning with version 4, single-byte codepages have a significantly different
+ * trie compared to other codepages.
+ * In all cases, the entry in stage 1 is directly the index of the block of
+ * 64 entries in stage 2.
+ *
+ * Single-byte lookup:
+ *
+ * Stage 2 only contains 16-bit indexes directly to the 16-blocks in stage 3.
+ * Stage 3 contains one 16-bit word per result:
+ * Bits 15..8 indicate the kind of result:
+ * f roundtrip result
+ * c fallback result from private-use code point
+ * 8 fallback result from other code points
+ * 0 unassigned
+ * Bits 7..0 contain the codepage byte. A zero byte is always possible.
+ *
+ * In version 4.3, the runtime code can build an sbcsIndex for a utf8Friendly
+ * file. For 2-byte UTF-8 byte sequences and some 3-byte sequences the lookup
+ * becomes a 2-stage (single-index) trie lookup with 6 bits for stage 3.
+ * ASCII code points can be looked up with a linear array access into stage 3.
+ * See maxFastUChar and other details in ucnvmbcs.h.
+ *
+ * Multi-byte lookup:
+ *
+ * Stage 2 contains a 32-bit word for each 16-block in stage 3:
+ * Bits 31..16 contain flags for which stage 3 entries contain roundtrip results
+ * test: MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)
+ * If this test is false, then a non-zero result will be interpreted as
+ * a fallback mapping.
+ * Bits 15..0 contain the index to stage 3, which must be multiplied by 16*(bytes per char)
+ *
+ * Stage 3 contains 2, 3, or 4 bytes per result.
+ * 2 or 4 bytes are stored as uint16_t/uint32_t in platform endianness,
+ * while 3 bytes are stored as bytes in big-endian order.
+ * Leading zero bytes are ignored, and the number of bytes is counted.
+ * A zero byte mapping result is possible as a roundtrip result.
+ * For some output types, the actual result is processed from this;
+ * see ucnv_MBCSFromUnicodeWithOffsets().
+ *
+ * Note that stage 1 always contains 0x440=1088 entries (0x440==0x110000>>10),
+ * or (version 3 and up) for BMP-only codepages, it contains 64 entries.
+ *
+ * In version 4.3, a utf8Friendly file contains an mbcsIndex table.
+ * For 2-byte UTF-8 byte sequences and most 3-byte sequences the lookup
+ * becomes a 2-stage (single-index) trie lookup with 6 bits for stage 3.
+ * ASCII code points can be looked up with a linear array access into stage 3.
+ * See maxFastUChar, mbcsIndex and other details in ucnvmbcs.h.
+ *
+ * In version 3, stage 2 blocks may overlap by multiples of the multiplier
+ * for compaction.
+ * In version 4, stage 2 blocks (and for single-byte codepages, stage 3 blocks)
+ * may overlap by any number of entries.
+ *
+ * MBCS version 2 added:
+ * the converter checks for known output types, which allows
+ * adding new ones without crashing an unaware converter
+ */
+
+/**
+ * Callback from ucnv_MBCSEnumToUnicode(), takes 32 mappings from
+ * consecutive sequences of bytes, starting from the one encoded in value,
+ * to Unicode code points. (Multiple mappings to reduce per-function call overhead.)
+ * Does not currently support m:n mappings or reverse fallbacks.
+ * This function will not be called for sequences of bytes with leading zeros.
+ *
+ * @param context an opaque pointer, as passed into ucnv_MBCSEnumToUnicode()
+ * @param value contains 1..4 bytes of the first byte sequence, right-aligned
+ * @param codePoints resulting Unicode code points, or negative if a byte sequence does
+ * not map to anything
+ * @return TRUE to continue enumeration, FALSE to stop
+ */
+typedef UBool U_CALLCONV
+UConverterEnumToUCallback(const void *context, uint32_t value, UChar32 codePoints[32]);
+
+static void U_CALLCONV
+ucnv_MBCSLoad(UConverterSharedData *sharedData,
+ UConverterLoadArgs *pArgs,
+ const uint8_t *raw,
+ UErrorCode *pErrorCode);
+
+static void U_CALLCONV
+ucnv_MBCSUnload(UConverterSharedData *sharedData);
+
+static void U_CALLCONV
+ucnv_MBCSOpen(UConverter *cnv,
+ UConverterLoadArgs *pArgs,
+ UErrorCode *pErrorCode);
+
+static UChar32 U_CALLCONV
+ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode);
+
+static void U_CALLCONV
+ucnv_MBCSGetStarters(const UConverter* cnv,
+ UBool starters[256],
+ UErrorCode *pErrorCode);
+
+U_CDECL_BEGIN
+static const char* U_CALLCONV
+ucnv_MBCSGetName(const UConverter *cnv);
+U_CDECL_END
+
+static void U_CALLCONV
+ucnv_MBCSWriteSub(UConverterFromUnicodeArgs *pArgs,
+ int32_t offsetIndex,
+ UErrorCode *pErrorCode);
+
+static UChar32 U_CALLCONV
+ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode);
+
+static void U_CALLCONV
+ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
+ UConverterToUnicodeArgs *pToUArgs,
+ UErrorCode *pErrorCode);
+
+static void U_CALLCONV
+ucnv_MBCSGetUnicodeSet(const UConverter *cnv,
+ const USetAdder *sa,
+ UConverterUnicodeSet which,
+ UErrorCode *pErrorCode);
+
+static void U_CALLCONV
+ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
+ UConverterToUnicodeArgs *pToUArgs,
+ UErrorCode *pErrorCode);
+
+static const UConverterImpl _SBCSUTF8Impl={
+ UCNV_MBCS,
+
+ ucnv_MBCSLoad,
+ ucnv_MBCSUnload,
+
+ ucnv_MBCSOpen,
+ NULL,
+ NULL,
+
+ ucnv_MBCSToUnicodeWithOffsets,
+ ucnv_MBCSToUnicodeWithOffsets,
+ ucnv_MBCSFromUnicodeWithOffsets,
+ ucnv_MBCSFromUnicodeWithOffsets,
+ ucnv_MBCSGetNextUChar,
+
+ ucnv_MBCSGetStarters,
+ ucnv_MBCSGetName,
+ ucnv_MBCSWriteSub,
+ NULL,
+ ucnv_MBCSGetUnicodeSet,
+
+ NULL,
+ ucnv_SBCSFromUTF8
+};
+
+static const UConverterImpl _DBCSUTF8Impl={
+ UCNV_MBCS,
+
+ ucnv_MBCSLoad,
+ ucnv_MBCSUnload,
+
+ ucnv_MBCSOpen,
+ NULL,
+ NULL,
+
+ ucnv_MBCSToUnicodeWithOffsets,
+ ucnv_MBCSToUnicodeWithOffsets,
+ ucnv_MBCSFromUnicodeWithOffsets,
+ ucnv_MBCSFromUnicodeWithOffsets,
+ ucnv_MBCSGetNextUChar,
+
+ ucnv_MBCSGetStarters,
+ ucnv_MBCSGetName,
+ ucnv_MBCSWriteSub,
+ NULL,
+ ucnv_MBCSGetUnicodeSet,
+
+ NULL,
+ ucnv_DBCSFromUTF8
+};
+
+static const UConverterImpl _MBCSImpl={
+ UCNV_MBCS,
+
+ ucnv_MBCSLoad,
+ ucnv_MBCSUnload,
+
+ ucnv_MBCSOpen,
+ NULL,
+ NULL,
+
+ ucnv_MBCSToUnicodeWithOffsets,
+ ucnv_MBCSToUnicodeWithOffsets,
+ ucnv_MBCSFromUnicodeWithOffsets,
+ ucnv_MBCSFromUnicodeWithOffsets,
+ ucnv_MBCSGetNextUChar,
+
+ ucnv_MBCSGetStarters,
+ ucnv_MBCSGetName,
+ ucnv_MBCSWriteSub,
+ NULL,
+ ucnv_MBCSGetUnicodeSet,
+ NULL,
+ NULL
+};
+
+/* Static data is in tools/makeconv/ucnvstat.c for data-based
+ * converters. Be sure to update it as well.
+ */
+
+const UConverterSharedData _MBCSData={
+ sizeof(UConverterSharedData), 1,
+ NULL, NULL, FALSE, TRUE, &_MBCSImpl,
+ 0, UCNV_MBCS_TABLE_INITIALIZER
+};
+
+
+/* GB 18030 data ------------------------------------------------------------ */
+
+/* helper macros for linear values for GB 18030 four-byte sequences */
+#define LINEAR_18030(a, b, c, d) ((((a)*10+(b))*126L+(c))*10L+(d))
+
+#define LINEAR_18030_BASE LINEAR_18030(0x81, 0x30, 0x81, 0x30)
+
+#define LINEAR(x) LINEAR_18030(x>>24, (x>>16)&0xff, (x>>8)&0xff, x&0xff)
+
+/*
+ * Some ranges of GB 18030 where both the Unicode code points and the
+ * GB four-byte sequences are contiguous and are handled algorithmically by
+ * the special callback functions below.
+ * The values are start & end of Unicode & GB codes.
+ *
+ * Note that single surrogates are not mapped by GB 18030
+ * as of the re-released mapping tables from 2000-nov-30.
+ */
+static const uint32_t
+gb18030Ranges[14][4]={
+ {0x10000, 0x10FFFF, LINEAR(0x90308130), LINEAR(0xE3329A35)},
+ {0x9FA6, 0xD7FF, LINEAR(0x82358F33), LINEAR(0x8336C738)},
+ {0x0452, 0x1E3E, LINEAR(0x8130D330), LINEAR(0x8135F436)},
+ {0x1E40, 0x200F, LINEAR(0x8135F438), LINEAR(0x8136A531)},
+ {0xE865, 0xF92B, LINEAR(0x8336D030), LINEAR(0x84308534)},
+ {0x2643, 0x2E80, LINEAR(0x8137A839), LINEAR(0x8138FD38)},
+ {0xFA2A, 0xFE2F, LINEAR(0x84309C38), LINEAR(0x84318537)},
+ {0x3CE1, 0x4055, LINEAR(0x8231D438), LINEAR(0x8232AF32)},
+ {0x361B, 0x3917, LINEAR(0x8230A633), LINEAR(0x8230F237)},
+ {0x49B8, 0x4C76, LINEAR(0x8234A131), LINEAR(0x8234E733)},
+ {0x4160, 0x4336, LINEAR(0x8232C937), LINEAR(0x8232F837)},
+ {0x478E, 0x4946, LINEAR(0x8233E838), LINEAR(0x82349638)},
+ {0x44D7, 0x464B, LINEAR(0x8233A339), LINEAR(0x8233C931)},
+ {0xFFE6, 0xFFFF, LINEAR(0x8431A234), LINEAR(0x8431A439)}
+};
+
+/* bit flag for UConverter.options indicating GB 18030 special handling */
+#define _MBCS_OPTION_GB18030 0x8000
+
+/* bit flag for UConverter.options indicating KEIS,JEF,JIF special handling */
+#define _MBCS_OPTION_KEIS 0x01000
+#define _MBCS_OPTION_JEF 0x02000
+#define _MBCS_OPTION_JIPS 0x04000
+
+#define KEIS_SO_CHAR_1 0x0A
+#define KEIS_SO_CHAR_2 0x42
+#define KEIS_SI_CHAR_1 0x0A
+#define KEIS_SI_CHAR_2 0x41
+
+#define JEF_SO_CHAR 0x28
+#define JEF_SI_CHAR 0x29
+
+#define JIPS_SO_CHAR_1 0x1A
+#define JIPS_SO_CHAR_2 0x70
+#define JIPS_SI_CHAR_1 0x1A
+#define JIPS_SI_CHAR_2 0x71
+
+enum SISO_Option {
+ SI,
+ SO
+};
+typedef enum SISO_Option SISO_Option;
+
+static int32_t getSISOBytes(SISO_Option option, uint32_t cnvOption, uint8_t *value) {
+ int32_t SISOLength = 0;
+
+ switch (option) {
+ case SI:
+ if ((cnvOption&_MBCS_OPTION_KEIS)!=0) {
+ value[0] = KEIS_SI_CHAR_1;
+ value[1] = KEIS_SI_CHAR_2;
+ SISOLength = 2;
+ } else if ((cnvOption&_MBCS_OPTION_JEF)!=0) {
+ value[0] = JEF_SI_CHAR;
+ SISOLength = 1;
+ } else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) {
+ value[0] = JIPS_SI_CHAR_1;
+ value[1] = JIPS_SI_CHAR_2;
+ SISOLength = 2;
+ } else {
+ value[0] = UCNV_SI;
+ SISOLength = 1;
+ }
+ break;
+ case SO:
+ if ((cnvOption&_MBCS_OPTION_KEIS)!=0) {
+ value[0] = KEIS_SO_CHAR_1;
+ value[1] = KEIS_SO_CHAR_2;
+ SISOLength = 2;
+ } else if ((cnvOption&_MBCS_OPTION_JEF)!=0) {
+ value[0] = JEF_SO_CHAR;
+ SISOLength = 1;
+ } else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) {
+ value[0] = JIPS_SO_CHAR_1;
+ value[1] = JIPS_SO_CHAR_2;
+ SISOLength = 2;
+ } else {
+ value[0] = UCNV_SO;
+ SISOLength = 1;
+ }
+ break;
+ default:
+ /* Should never happen. */
+ break;
+ }
+
+ return SISOLength;
+}
+
+/* Miscellaneous ------------------------------------------------------------ */
+
+/* similar to ucnv_MBCSGetNextUChar() but recursive */
+static UBool
+enumToU(UConverterMBCSTable *mbcsTable, int8_t stateProps[],
+ int32_t state, uint32_t offset,
+ uint32_t value,
+ UConverterEnumToUCallback *callback, const void *context,
+ UErrorCode *pErrorCode) {
+ UChar32 codePoints[32];
+ const int32_t *row;
+ const uint16_t *unicodeCodeUnits;
+ UChar32 anyCodePoints;
+ int32_t b, limit;
+
+ row=mbcsTable->stateTable[state];
+ unicodeCodeUnits=mbcsTable->unicodeCodeUnits;
+
+ value<<=8;
+ anyCodePoints=-1; /* becomes non-negative if there is a mapping */
+
+ b=(stateProps[state]&0x38)<<2;
+ if(b==0 && stateProps[state]>=0x40) {
+ /* skip byte sequences with leading zeros because they are not stored in the fromUnicode table */
+ codePoints[0]=U_SENTINEL;
+ b=1;
+ }
+ limit=((stateProps[state]&7)+1)<<5;
+ while(b<limit) {
+ int32_t entry=row[b];
+ if(MBCS_ENTRY_IS_TRANSITION(entry)) {
+ int32_t nextState=MBCS_ENTRY_TRANSITION_STATE(entry);
+ if(stateProps[nextState]>=0) {
+ /* recurse to a state with non-ignorable actions */
+ if(!enumToU(
+ mbcsTable, stateProps, nextState,
+ offset+MBCS_ENTRY_TRANSITION_OFFSET(entry),
+ value|(uint32_t)b,
+ callback, context,
+ pErrorCode)) {
+ return FALSE;
+ }
+ }
+ codePoints[b&0x1f]=U_SENTINEL;
+ } else {
+ UChar32 c;
+ int32_t action;
+
+ /*
+ * An if-else-if chain provides more reliable performance for
+ * the most common cases compared to a switch.
+ */
+ action=MBCS_ENTRY_FINAL_ACTION(entry);
+ if(action==MBCS_STATE_VALID_DIRECT_16) {
+ /* output BMP code point */
+ c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ } else if(action==MBCS_STATE_VALID_16) {
+ int32_t finalOffset=offset+MBCS_ENTRY_FINAL_VALUE_16(entry);
+ c=unicodeCodeUnits[finalOffset];
+ if(c<0xfffe) {
+ /* output BMP code point */
+ } else {
+ c=U_SENTINEL;
+ }
+ } else if(action==MBCS_STATE_VALID_16_PAIR) {
+ int32_t finalOffset=offset+MBCS_ENTRY_FINAL_VALUE_16(entry);
+ c=unicodeCodeUnits[finalOffset++];
+ if(c<0xd800) {
+ /* output BMP code point below 0xd800 */
+ } else if(c<=0xdbff) {
+ /* output roundtrip or fallback supplementary code point */
+ c=((c&0x3ff)<<10)+unicodeCodeUnits[finalOffset]+(0x10000-0xdc00);
+ } else if(c==0xe000) {
+ /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
+ c=unicodeCodeUnits[finalOffset];
+ } else {
+ c=U_SENTINEL;
+ }
+ } else if(action==MBCS_STATE_VALID_DIRECT_20) {
+ /* output supplementary code point */
+ c=(UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000);
+ } else {
+ c=U_SENTINEL;
+ }
+
+ codePoints[b&0x1f]=c;
+ anyCodePoints&=c;
+ }
+ if(((++b)&0x1f)==0) {
+ if(anyCodePoints>=0) {
+ if(!callback(context, value|(uint32_t)(b-0x20), codePoints)) {
+ return FALSE;
+ }
+ anyCodePoints=-1;
+ }
+ }
+ }
+ return TRUE;
+}
+
+/*
+ * Only called if stateProps[state]==-1.
+ * A recursive call may do stateProps[state]|=0x40 if this state is the target of an
+ * MBCS_STATE_CHANGE_ONLY.
+ */
+static int8_t
+getStateProp(const int32_t (*stateTable)[256], int8_t stateProps[], int state) {
+ const int32_t *row;
+ int32_t min, max, entry, nextState;
+
+ row=stateTable[state];
+ stateProps[state]=0;
+
+ /* find first non-ignorable state */
+ for(min=0;; ++min) {
+ entry=row[min];
+ nextState=MBCS_ENTRY_STATE(entry);
+ if(stateProps[nextState]==-1) {
+ getStateProp(stateTable, stateProps, nextState);
+ }
+ if(MBCS_ENTRY_IS_TRANSITION(entry)) {
+ if(stateProps[nextState]>=0) {
+ break;
+ }
+ } else if(MBCS_ENTRY_FINAL_ACTION(entry)<MBCS_STATE_UNASSIGNED) {
+ break;
+ }
+ if(min==0xff) {
+ stateProps[state]=-0x40; /* (int8_t)0xc0 */
+ return stateProps[state];
+ }
+ }
+ stateProps[state]|=(int8_t)((min>>5)<<3);
+
+ /* find last non-ignorable state */
+ for(max=0xff; min<max; --max) {
+ entry=row[max];
+ nextState=MBCS_ENTRY_STATE(entry);
+ if(stateProps[nextState]==-1) {
+ getStateProp(stateTable, stateProps, nextState);
+ }
+ if(MBCS_ENTRY_IS_TRANSITION(entry)) {
+ if(stateProps[nextState]>=0) {
+ break;
+ }
+ } else if(MBCS_ENTRY_FINAL_ACTION(entry)<MBCS_STATE_UNASSIGNED) {
+ break;
+ }
+ }
+ stateProps[state]|=(int8_t)(max>>5);
+
+ /* recurse further and collect direct-state information */
+ while(min<=max) {
+ entry=row[min];
+ nextState=MBCS_ENTRY_STATE(entry);
+ if(stateProps[nextState]==-1) {
+ getStateProp(stateTable, stateProps, nextState);
+ }
+ if(MBCS_ENTRY_IS_FINAL(entry)) {
+ stateProps[nextState]|=0x40;
+ if(MBCS_ENTRY_FINAL_ACTION(entry)<=MBCS_STATE_FALLBACK_DIRECT_20) {
+ stateProps[state]|=0x40;
+ }
+ }
+ ++min;
+ }
+ return stateProps[state];
+}
+
+/*
+ * Internal function enumerating the toUnicode data of an MBCS converter.
+ * Currently only used for reconstituting data for a MBCS_OPT_NO_FROM_U
+ * table, but could also be used for a future ucnv_getUnicodeSet() option
+ * that includes reverse fallbacks (after updating this function's implementation).
+ * Currently only handles roundtrip mappings.
+ * Does not currently handle extensions.
+ */
+static void
+ucnv_MBCSEnumToUnicode(UConverterMBCSTable *mbcsTable,
+ UConverterEnumToUCallback *callback, const void *context,
+ UErrorCode *pErrorCode) {
+ /*
+ * Properties for each state, to speed up the enumeration.
+ * Ignorable actions are unassigned/illegal/state-change-only:
+ * They do not lead to mappings.
+ *
+ * Bits 7..6:
+ * 1 direct/initial state (stateful converters have multiple)
+ * 0 non-initial state with transitions or with non-ignorable result actions
+ * -1 final state with only ignorable actions
+ *
+ * Bits 5..3:
+ * The lowest byte value with non-ignorable actions is
+ * value<<5 (rounded down).
+ *
+ * Bits 2..0:
+ * The highest byte value with non-ignorable actions is
+ * (value<<5)&0x1f (rounded up).
+ */
+ int8_t stateProps[MBCS_MAX_STATE_COUNT];
+ int32_t state;
+
+ uprv_memset(stateProps, -1, sizeof(stateProps));
+
+ /* recurse from state 0 and set all stateProps */
+ getStateProp(mbcsTable->stateTable, stateProps, 0);
+
+ for(state=0; state<mbcsTable->countStates; ++state) {
+ /*if(stateProps[state]==-1) {
+ printf("unused/unreachable <icu:state> %d\n", state);
+ }*/
+ if(stateProps[state]>=0x40) {
+ /* start from each direct state */
+ enumToU(
+ mbcsTable, stateProps, state, 0, 0,
+ callback, context,
+ pErrorCode);
+ }
+ }
+}
+
+U_CFUNC void
+ucnv_MBCSGetFilteredUnicodeSetForUnicode(const UConverterSharedData *sharedData,
+ const USetAdder *sa,
+ UConverterUnicodeSet which,
+ UConverterSetFilter filter,
+ UErrorCode *pErrorCode) {
+ const UConverterMBCSTable *mbcsTable;
+ const uint16_t *table;
+
+ uint32_t st3;
+ uint16_t st1, maxStage1, st2;
+
+ UChar32 c;
+
+ /* enumerate the from-Unicode trie table */
+ mbcsTable=&sharedData->mbcs;
+ table=mbcsTable->fromUnicodeTable;
+ if(mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY) {
+ maxStage1=0x440;
+ } else {
+ maxStage1=0x40;
+ }
+
+ c=0; /* keep track of the current code point while enumerating */
+
+ if(mbcsTable->outputType==MBCS_OUTPUT_1) {
+ const uint16_t *stage2, *stage3, *results;
+ uint16_t minValue;
+
+ results=(const uint16_t *)mbcsTable->fromUnicodeBytes;
+
+ /*
+ * Set a threshold variable for selecting which mappings to use.
+ * See ucnv_MBCSSingleFromBMPWithOffsets() and
+ * MBCS_SINGLE_RESULT_FROM_U() for details.
+ */
+ if(which==UCNV_ROUNDTRIP_SET) {
+ /* use only roundtrips */
+ minValue=0xf00;
+ } else /* UCNV_ROUNDTRIP_AND_FALLBACK_SET */ {
+ /* use all roundtrip and fallback results */
+ minValue=0x800;
+ }
+
+ for(st1=0; st1<maxStage1; ++st1) {
+ st2=table[st1];
+ if(st2>maxStage1) {
+ stage2=table+st2;
+ for(st2=0; st2<64; ++st2) {
+ if((st3=stage2[st2])!=0) {
+ /* read the stage 3 block */
+ stage3=results+st3;
+
+ do {
+ if(*stage3++>=minValue) {
+ sa->add(sa->set, c);
+ }
+ } while((++c&0xf)!=0);
+ } else {
+ c+=16; /* empty stage 3 block */
+ }
+ }
+ } else {
+ c+=1024; /* empty stage 2 block */
+ }
+ }
+ } else {
+ const uint32_t *stage2;
+ const uint8_t *stage3, *bytes;
+ uint32_t st3Multiplier;
+ uint32_t value;
+ UBool useFallback;
+
+ bytes=mbcsTable->fromUnicodeBytes;
+
+ useFallback=(UBool)(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET);
+
+ switch(mbcsTable->outputType) {
+ case MBCS_OUTPUT_3:
+ case MBCS_OUTPUT_4_EUC:
+ st3Multiplier=3;
+ break;
+ case MBCS_OUTPUT_4:
+ st3Multiplier=4;
+ break;
+ default:
+ st3Multiplier=2;
+ break;
+ }
+
+ for(st1=0; st1<maxStage1; ++st1) {
+ st2=table[st1];
+ if(st2>(maxStage1>>1)) {
+ stage2=(const uint32_t *)table+st2;
+ for(st2=0; st2<64; ++st2) {
+ if((st3=stage2[st2])!=0) {
+ /* read the stage 3 block */
+ stage3=bytes+st3Multiplier*16*(uint32_t)(uint16_t)st3;
+
+ /* get the roundtrip flags for the stage 3 block */
+ st3>>=16;
+
+ /*
+ * Add code points for which the roundtrip flag is set,
+ * or which map to non-zero bytes if we use fallbacks.
+ * See ucnv_MBCSFromUnicodeWithOffsets() for details.
+ */
+ switch(filter) {
+ case UCNV_SET_FILTER_NONE:
+ do {
+ if(st3&1) {
+ sa->add(sa->set, c);
+ stage3+=st3Multiplier;
+ } else if(useFallback) {
+ uint8_t b=0;
+ switch(st3Multiplier) {
+ case 4:
+ b|=*stage3++;
+ U_FALLTHROUGH;
+ case 3:
+ b|=*stage3++;
+ U_FALLTHROUGH;
+ case 2:
+ b|=stage3[0]|stage3[1];
+ stage3+=2;
+ U_FALLTHROUGH;
+ default:
+ break;
+ }
+ if(b!=0) {
+ sa->add(sa->set, c);
+ }
+ }
+ st3>>=1;
+ } while((++c&0xf)!=0);
+ break;
+ case UCNV_SET_FILTER_DBCS_ONLY:
+ /* Ignore single-byte results (<0x100). */
+ do {
+ if(((st3&1)!=0 || useFallback) && *((const uint16_t *)stage3)>=0x100) {
+ sa->add(sa->set, c);
+ }
+ st3>>=1;
+ stage3+=2; /* +=st3Multiplier */
+ } while((++c&0xf)!=0);
+ break;
+ case UCNV_SET_FILTER_2022_CN:
+ /* Only add code points that map to CNS 11643 planes 1 & 2 for non-EXT ISO-2022-CN. */
+ do {
+ if(((st3&1)!=0 || useFallback) && ((value=*stage3)==0x81 || value==0x82)) {
+ sa->add(sa->set, c);
+ }
+ st3>>=1;
+ stage3+=3; /* +=st3Multiplier */
+ } while((++c&0xf)!=0);
+ break;
+ case UCNV_SET_FILTER_SJIS:
+ /* Only add code points that map to Shift-JIS codes corresponding to JIS X 0208. */
+ do {
+ if(((st3&1)!=0 || useFallback) && (value=*((const uint16_t *)stage3))>=0x8140 && value<=0xeffc) {
+ sa->add(sa->set, c);
+ }
+ st3>>=1;
+ stage3+=2; /* +=st3Multiplier */
+ } while((++c&0xf)!=0);
+ break;
+ case UCNV_SET_FILTER_GR94DBCS:
+ /* Only add code points that map to ISO 2022 GR 94 DBCS codes (each byte A1..FE). */
+ do {
+ if( ((st3&1)!=0 || useFallback) &&
+ (uint16_t)((value=*((const uint16_t *)stage3)) - 0xa1a1)<=(0xfefe - 0xa1a1) &&
+ (uint8_t)(value-0xa1)<=(0xfe - 0xa1)
+ ) {
+ sa->add(sa->set, c);
+ }
+ st3>>=1;
+ stage3+=2; /* +=st3Multiplier */
+ } while((++c&0xf)!=0);
+ break;
+ case UCNV_SET_FILTER_HZ:
+ /* Only add code points that are suitable for HZ DBCS (lead byte A1..FD). */
+ do {
+ if( ((st3&1)!=0 || useFallback) &&
+ (uint16_t)((value=*((const uint16_t *)stage3))-0xa1a1)<=(0xfdfe - 0xa1a1) &&
+ (uint8_t)(value-0xa1)<=(0xfe - 0xa1)
+ ) {
+ sa->add(sa->set, c);
+ }
+ st3>>=1;
+ stage3+=2; /* +=st3Multiplier */
+ } while((++c&0xf)!=0);
+ break;
+ default:
+ *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
+ return;
+ }
+ } else {
+ c+=16; /* empty stage 3 block */
+ }
+ }
+ } else {
+ c+=1024; /* empty stage 2 block */
+ }
+ }
+ }
+
+ ucnv_extGetUnicodeSet(sharedData, sa, which, filter, pErrorCode);
+}
+
+U_CFUNC void
+ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
+ const USetAdder *sa,
+ UConverterUnicodeSet which,
+ UErrorCode *pErrorCode) {
+ ucnv_MBCSGetFilteredUnicodeSetForUnicode(
+ sharedData, sa, which,
+ sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ?
+ UCNV_SET_FILTER_DBCS_ONLY :
+ UCNV_SET_FILTER_NONE,
+ pErrorCode);
+}
+
+static void U_CALLCONV
+ucnv_MBCSGetUnicodeSet(const UConverter *cnv,
+ const USetAdder *sa,
+ UConverterUnicodeSet which,
+ UErrorCode *pErrorCode) {
+ if(cnv->options&_MBCS_OPTION_GB18030) {
+ sa->addRange(sa->set, 0, 0xd7ff);
+ sa->addRange(sa->set, 0xe000, 0x10ffff);
+ } else {
+ ucnv_MBCSGetUnicodeSetForUnicode(cnv->sharedData, sa, which, pErrorCode);
+ }
+}
+
+/* conversion extensions for input not in the main table -------------------- */
+
+/*
+ * Hardcoded extension handling for GB 18030.
+ * Definition of LINEAR macros and gb18030Ranges see near the beginning of the file.
+ *
+ * In the future, conversion extensions may handle m:n mappings and delta tables,
+ * see http://source.icu-project.org/repos/icu/icuhtml/trunk/design/conversion/conversion_extensions.html
+ *
+ * If an input character cannot be mapped, then these functions set an error
+ * code. The framework will then call the callback function.
+ */
+
+/*
+ * @return if(U_FAILURE) return the code point for cnv->fromUChar32
+ * else return 0 after output has been written to the target
+ */
+static UChar32
+_extFromU(UConverter *cnv, const UConverterSharedData *sharedData,
+ UChar32 cp,
+ const UChar **source, const UChar *sourceLimit,
+ uint8_t **target, const uint8_t *targetLimit,
+ int32_t **offsets, int32_t sourceIndex,
+ UBool flush,
+ UErrorCode *pErrorCode) {
+ const int32_t *cx;
+
+ cnv->useSubChar1=FALSE;
+
+ if( (cx=sharedData->mbcs.extIndexes)!=NULL &&
+ ucnv_extInitialMatchFromU(
+ cnv, cx,
+ cp, source, sourceLimit,
+ (char **)target, (char *)targetLimit,
+ offsets, sourceIndex,
+ flush,
+ pErrorCode)
+ ) {
+ return 0; /* an extension mapping handled the input */
+ }
+
+ /* GB 18030 */
+ if((cnv->options&_MBCS_OPTION_GB18030)!=0) {
+ const uint32_t *range;
+ int32_t i;
+
+ range=gb18030Ranges[0];
+ for(i=0; i<UPRV_LENGTHOF(gb18030Ranges); range+=4, ++i) {
+ if(range[0]<=(uint32_t)cp && (uint32_t)cp<=range[1]) {
+ /* found the Unicode code point, output the four-byte sequence for it */
+ uint32_t linear;
+ char bytes[4];
+
+ /* get the linear value of the first GB 18030 code in this range */
+ linear=range[2]-LINEAR_18030_BASE;
+
+ /* add the offset from the beginning of the range */
+ linear+=((uint32_t)cp-range[0]);
+
+ /* turn this into a four-byte sequence */
+ bytes[3]=(char)(0x30+linear%10); linear/=10;
+ bytes[2]=(char)(0x81+linear%126); linear/=126;
+ bytes[1]=(char)(0x30+linear%10); linear/=10;
+ bytes[0]=(char)(0x81+linear);
+
+ /* output this sequence */
+ ucnv_fromUWriteBytes(cnv,
+ bytes, 4, (char **)target, (char *)targetLimit,
+ offsets, sourceIndex, pErrorCode);
+ return 0;
+ }
+ }
+ }
+
+ /* no mapping */
+ *pErrorCode=U_INVALID_CHAR_FOUND;
+ return cp;
+}
+
+/*
+ * Input sequence: cnv->toUBytes[0..length[
+ * @return if(U_FAILURE) return the length (toULength, byteIndex) for the input
+ * else return 0 after output has been written to the target
+ */
+static int8_t
+_extToU(UConverter *cnv, const UConverterSharedData *sharedData,
+ int8_t length,
+ const uint8_t **source, const uint8_t *sourceLimit,
+ UChar **target, const UChar *targetLimit,
+ int32_t **offsets, int32_t sourceIndex,
+ UBool flush,
+ UErrorCode *pErrorCode) {
+ const int32_t *cx;
+
+ if( (cx=sharedData->mbcs.extIndexes)!=NULL &&
+ ucnv_extInitialMatchToU(
+ cnv, cx,
+ length, (const char **)source, (const char *)sourceLimit,
+ target, targetLimit,
+ offsets, sourceIndex,
+ flush,
+ pErrorCode)
+ ) {
+ return 0; /* an extension mapping handled the input */
+ }
+
+ /* GB 18030 */
+ if(length==4 && (cnv->options&_MBCS_OPTION_GB18030)!=0) {
+ const uint32_t *range;
+ uint32_t linear;
+ int32_t i;
+
+ linear=LINEAR_18030(cnv->toUBytes[0], cnv->toUBytes[1], cnv->toUBytes[2], cnv->toUBytes[3]);
+ range=gb18030Ranges[0];
+ for(i=0; i<UPRV_LENGTHOF(gb18030Ranges); range+=4, ++i) {
+ if(range[2]<=linear && linear<=range[3]) {
+ /* found the sequence, output the Unicode code point for it */
+ *pErrorCode=U_ZERO_ERROR;
+
+ /* add the linear difference between the input and start sequences to the start code point */
+ linear=range[0]+(linear-range[2]);
+
+ /* output this code point */
+ ucnv_toUWriteCodePoint(cnv, linear, target, targetLimit, offsets, sourceIndex, pErrorCode);
+
+ return 0;
+ }
+ }
+ }
+
+ /* no mapping */
+ *pErrorCode=U_INVALID_CHAR_FOUND;
+ return length;
+}
+
+/* EBCDIC swap LF<->NL ------------------------------------------------------ */
+
+/*
+ * This code modifies a standard EBCDIC<->Unicode mapping table for
+ * OS/390 (z/OS) Unix System Services (Open Edition).
+ * The difference is in the mapping of Line Feed and New Line control codes:
+ * Standard EBCDIC maps
+ *
+ * <U000A> \x25 |0
+ * <U0085> \x15 |0
+ *
+ * but OS/390 USS EBCDIC swaps the control codes for LF and NL,
+ * mapping
+ *
+ * <U000A> \x15 |0
+ * <U0085> \x25 |0
+ *
+ * This code modifies a loaded standard EBCDIC<->Unicode mapping table
+ * by copying it into allocated memory and swapping the LF and NL values.
+ * It allows to support the same EBCDIC charset in both versions without
+ * duplicating the entire installed table.
+ */
+
+/* standard EBCDIC codes */
+#define EBCDIC_LF 0x25
+#define EBCDIC_NL 0x15
+
+/* standard EBCDIC codes with roundtrip flag as stored in Unicode-to-single-byte tables */
+#define EBCDIC_RT_LF 0xf25
+#define EBCDIC_RT_NL 0xf15
+
+/* Unicode code points */
+#define U_LF 0x0a
+#define U_NL 0x85
+
+static UBool
+_EBCDICSwapLFNL(UConverterSharedData *sharedData, UErrorCode *pErrorCode) {
+ UConverterMBCSTable *mbcsTable;
+
+ const uint16_t *table, *results;
+ const uint8_t *bytes;
+
+ int32_t (*newStateTable)[256];
+ uint16_t *newResults;
+ uint8_t *p;
+ char *name;
+
+ uint32_t stage2Entry;
+ uint32_t size, sizeofFromUBytes;
+
+ mbcsTable=&sharedData->mbcs;
+
+ table=mbcsTable->fromUnicodeTable;
+ bytes=mbcsTable->fromUnicodeBytes;
+ results=(const uint16_t *)bytes;
+
+ /*
+ * Check that this is an EBCDIC table with SBCS portion -
+ * SBCS or EBCDIC_STATEFUL with standard EBCDIC LF and NL mappings.
+ *
+ * If not, ignore the option. Options are always ignored if they do not apply.
+ */
+ if(!(
+ (mbcsTable->outputType==MBCS_OUTPUT_1 || mbcsTable->outputType==MBCS_OUTPUT_2_SISO) &&
+ mbcsTable->stateTable[0][EBCDIC_LF]==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF) &&
+ mbcsTable->stateTable[0][EBCDIC_NL]==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL)
+ )) {
+ return FALSE;
+ }
+
+ if(mbcsTable->outputType==MBCS_OUTPUT_1) {
+ if(!(
+ EBCDIC_RT_LF==MBCS_SINGLE_RESULT_FROM_U(table, results, U_LF) &&
+ EBCDIC_RT_NL==MBCS_SINGLE_RESULT_FROM_U(table, results, U_NL)
+ )) {
+ return FALSE;
+ }
+ } else /* MBCS_OUTPUT_2_SISO */ {
+ stage2Entry=MBCS_STAGE_2_FROM_U(table, U_LF);
+ if(!(
+ MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_LF)!=0 &&
+ EBCDIC_LF==MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_LF)
+ )) {
+ return FALSE;
+ }
+
+ stage2Entry=MBCS_STAGE_2_FROM_U(table, U_NL);
+ if(!(
+ MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_NL)!=0 &&
+ EBCDIC_NL==MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_NL)
+ )) {
+ return FALSE;
+ }
+ }
+
+ if(mbcsTable->fromUBytesLength>0) {
+ /*
+ * We _know_ the number of bytes in the fromUnicodeBytes array
+ * starting with header.version 4.1.
+ */
+ sizeofFromUBytes=mbcsTable->fromUBytesLength;
+ } else {
+ /*
+ * Otherwise:
+ * There used to be code to enumerate the fromUnicode
+ * trie and find the highest entry, but it was removed in ICU 3.2
+ * because it was not tested and caused a low code coverage number.
+ * See Jitterbug 3674.
+ * This affects only some .cnv file formats with a header.version
+ * below 4.1, and only when swaplfnl is requested.
+ *
+ * ucnvmbcs.c revision 1.99 is the last one with the
+ * ucnv_MBCSSizeofFromUBytes() function.
+ */
+ *pErrorCode=U_INVALID_FORMAT_ERROR;
+ return FALSE;
+ }
+
+ /*
+ * The table has an appropriate format.
+ * Allocate and build
+ * - a modified to-Unicode state table
+ * - a modified from-Unicode output array
+ * - a converter name string with the swap option appended
+ */
+ size=
+ mbcsTable->countStates*1024+
+ sizeofFromUBytes+
+ UCNV_MAX_CONVERTER_NAME_LENGTH+20;
+ p=(uint8_t *)uprv_malloc(size);
+ if(p==NULL) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return FALSE;
+ }
+
+ /* copy and modify the to-Unicode state table */
+ newStateTable=(int32_t (*)[256])p;
+ uprv_memcpy(newStateTable, mbcsTable->stateTable, mbcsTable->countStates*1024);
+
+ newStateTable[0][EBCDIC_LF]=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL);
+ newStateTable[0][EBCDIC_NL]=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF);
+
+ /* copy and modify the from-Unicode result table */
+ newResults=(uint16_t *)newStateTable[mbcsTable->countStates];
+ uprv_memcpy(newResults, bytes, sizeofFromUBytes);
+
+ /* conveniently, the table access macros work on the left side of expressions */
+ if(mbcsTable->outputType==MBCS_OUTPUT_1) {
+ MBCS_SINGLE_RESULT_FROM_U(table, newResults, U_LF)=EBCDIC_RT_NL;
+ MBCS_SINGLE_RESULT_FROM_U(table, newResults, U_NL)=EBCDIC_RT_LF;
+ } else /* MBCS_OUTPUT_2_SISO */ {
+ stage2Entry=MBCS_STAGE_2_FROM_U(table, U_LF);
+ MBCS_VALUE_2_FROM_STAGE_2(newResults, stage2Entry, U_LF)=EBCDIC_NL;
+
+ stage2Entry=MBCS_STAGE_2_FROM_U(table, U_NL);
+ MBCS_VALUE_2_FROM_STAGE_2(newResults, stage2Entry, U_NL)=EBCDIC_LF;
+ }
+
+ /* set the canonical converter name */
+ name=(char *)newResults+sizeofFromUBytes;
+ uprv_strcpy(name, sharedData->staticData->name);
+ uprv_strcat(name, UCNV_SWAP_LFNL_OPTION_STRING);
+
+ /* set the pointers */
+ icu::umtx_lock(NULL);
+ if(mbcsTable->swapLFNLStateTable==NULL) {
+ mbcsTable->swapLFNLStateTable=newStateTable;
+ mbcsTable->swapLFNLFromUnicodeBytes=(uint8_t *)newResults;
+ mbcsTable->swapLFNLName=name;
+
+ newStateTable=NULL;
+ }
+ icu::umtx_unlock(NULL);
+
+ /* release the allocated memory if another thread beat us to it */
+ if(newStateTable!=NULL) {
+ uprv_free(newStateTable);
+ }
+ return TRUE;
+}
+
+/* reconstitute omitted fromUnicode data ------------------------------------ */
+
+/* for details, compare with genmbcs.c MBCSAddFromUnicode() and transformEUC() */
+static UBool U_CALLCONV
+writeStage3Roundtrip(const void *context, uint32_t value, UChar32 codePoints[32]) {
+ UConverterMBCSTable *mbcsTable=(UConverterMBCSTable *)context;
+ const uint16_t *table;
+ uint32_t *stage2;
+ uint8_t *bytes, *p;
+ UChar32 c;
+ int32_t i, st3;
+
+ table=mbcsTable->fromUnicodeTable;
+ bytes=(uint8_t *)mbcsTable->fromUnicodeBytes;
+
+ /* for EUC outputTypes, modify the value like genmbcs.c's transformEUC() */
+ switch(mbcsTable->outputType) {
+ case MBCS_OUTPUT_3_EUC:
+ if(value<=0xffff) {
+ /* short sequences are stored directly */
+ /* code set 0 or 1 */
+ } else if(value<=0x8effff) {
+ /* code set 2 */
+ value&=0x7fff;
+ } else /* first byte is 0x8f */ {
+ /* code set 3 */
+ value&=0xff7f;
+ }
+ break;
+ case MBCS_OUTPUT_4_EUC:
+ if(value<=0xffffff) {
+ /* short sequences are stored directly */
+ /* code set 0 or 1 */
+ } else if(value<=0x8effffff) {
+ /* code set 2 */
+ value&=0x7fffff;
+ } else /* first byte is 0x8f */ {
+ /* code set 3 */
+ value&=0xff7fff;
+ }
+ break;
+ default:
+ break;
+ }
+
+ for(i=0; i<=0x1f; ++value, ++i) {
+ c=codePoints[i];
+ if(c<0) {
+ continue;
+ }
+
+ /* locate the stage 2 & 3 data */
+ stage2=((uint32_t *)table)+table[c>>10]+((c>>4)&0x3f);
+ p=bytes;
+ st3=(int32_t)(uint16_t)*stage2*16+(c&0xf);
+
+ /* write the codepage bytes into stage 3 */
+ switch(mbcsTable->outputType) {
+ case MBCS_OUTPUT_3:
+ case MBCS_OUTPUT_4_EUC:
+ p+=st3*3;
+ p[0]=(uint8_t)(value>>16);
+ p[1]=(uint8_t)(value>>8);
+ p[2]=(uint8_t)value;
+ break;
+ case MBCS_OUTPUT_4:
+ ((uint32_t *)p)[st3]=value;
+ break;
+ default:
+ /* 2 bytes per character */
+ ((uint16_t *)p)[st3]=(uint16_t)value;
+ break;
+ }
+
+ /* set the roundtrip flag */
+ *stage2|=(1UL<<(16+(c&0xf)));
+ }
+ return TRUE;
+ }
+
+static void
+reconstituteData(UConverterMBCSTable *mbcsTable,
+ uint32_t stage1Length, uint32_t stage2Length,
+ uint32_t fullStage2Length, /* lengths are numbers of units, not bytes */
+ UErrorCode *pErrorCode) {
+ uint16_t *stage1;
+ uint32_t *stage2;
+ uint32_t dataLength=stage1Length*2+fullStage2Length*4+mbcsTable->fromUBytesLength;
+ mbcsTable->reconstitutedData=(uint8_t *)uprv_malloc(dataLength);
+ if(mbcsTable->reconstitutedData==NULL) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ uprv_memset(mbcsTable->reconstitutedData, 0, dataLength);
+
+ /* copy existing data and reroute the pointers */
+ stage1=(uint16_t *)mbcsTable->reconstitutedData;
+ uprv_memcpy(stage1, mbcsTable->fromUnicodeTable, stage1Length*2);
+
+ stage2=(uint32_t *)(stage1+stage1Length);
+ uprv_memcpy(stage2+(fullStage2Length-stage2Length),
+ mbcsTable->fromUnicodeTable+stage1Length,
+ stage2Length*4);
+
+ mbcsTable->fromUnicodeTable=stage1;
+ mbcsTable->fromUnicodeBytes=(uint8_t *)(stage2+fullStage2Length);
+
+ /* indexes into stage 2 count from the bottom of the fromUnicodeTable */
+ stage2=(uint32_t *)stage1;
+
+ /* reconstitute the initial part of stage 2 from the mbcsIndex */
+ {
+ int32_t stageUTF8Length=((int32_t)mbcsTable->maxFastUChar+1)>>6;
+ int32_t stageUTF8Index=0;
+ int32_t st1, st2, st3, i;
+
+ for(st1=0; stageUTF8Index<stageUTF8Length; ++st1) {
+ st2=stage1[st1];
+ if(st2!=(int32_t)stage1Length/2) {
+ /* each stage 2 block has 64 entries corresponding to 16 entries in the mbcsIndex */
+ for(i=0; i<16; ++i) {
+ st3=mbcsTable->mbcsIndex[stageUTF8Index++];
+ if(st3!=0) {
+ /* an stage 2 entry's index is per stage 3 16-block, not per stage 3 entry */
+ st3>>=4;
+ /*
+ * 4 stage 2 entries point to 4 consecutive stage 3 16-blocks which are
+ * allocated together as a single 64-block for access from the mbcsIndex
+ */
+ stage2[st2++]=st3++;
+ stage2[st2++]=st3++;
+ stage2[st2++]=st3++;
+ stage2[st2++]=st3;
+ } else {
+ /* no stage 3 block, skip */
+ st2+=4;
+ }
+ }
+ } else {
+ /* no stage 2 block, skip */
+ stageUTF8Index+=16;
+ }
+ }
+ }
+
+ /* reconstitute fromUnicodeBytes with roundtrips from toUnicode data */
+ ucnv_MBCSEnumToUnicode(mbcsTable, writeStage3Roundtrip, mbcsTable, pErrorCode);
+}
+
+/* MBCS setup functions ----------------------------------------------------- */
+
+static void U_CALLCONV
+ucnv_MBCSLoad(UConverterSharedData *sharedData,
+ UConverterLoadArgs *pArgs,
+ const uint8_t *raw,
+ UErrorCode *pErrorCode) {
+ UDataInfo info;
+ UConverterMBCSTable *mbcsTable=&sharedData->mbcs;
+ _MBCSHeader *header=(_MBCSHeader *)raw;
+ uint32_t offset;
+ uint32_t headerLength;
+ UBool noFromU=FALSE;
+
+ if(header->version[0]==4) {
+ headerLength=MBCS_HEADER_V4_LENGTH;
+ } else if(header->version[0]==5 && header->version[1]>=3 &&
+ (header->options&MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0) {
+ headerLength=header->options&MBCS_OPT_LENGTH_MASK;
+ noFromU=(UBool)((header->options&MBCS_OPT_NO_FROM_U)!=0);
+ } else {
+ *pErrorCode=U_INVALID_TABLE_FORMAT;
+ return;
+ }
+
+ mbcsTable->outputType=(uint8_t)header->flags;
+ if(noFromU && mbcsTable->outputType==MBCS_OUTPUT_1) {
+ *pErrorCode=U_INVALID_TABLE_FORMAT;
+ return;
+ }
+
+ /* extension data, header version 4.2 and higher */
+ offset=header->flags>>8;
+ if(offset!=0) {
+ mbcsTable->extIndexes=(const int32_t *)(raw+offset);
+ }
+
+ if(mbcsTable->outputType==MBCS_OUTPUT_EXT_ONLY) {
+ UConverterLoadArgs args=UCNV_LOAD_ARGS_INITIALIZER;
+ UConverterSharedData *baseSharedData;
+ const int32_t *extIndexes;
+ const char *baseName;
+
+ /* extension-only file, load the base table and set values appropriately */
+ if((extIndexes=mbcsTable->extIndexes)==NULL) {
+ /* extension-only file without extension */
+ *pErrorCode=U_INVALID_TABLE_FORMAT;
+ return;
+ }
+
+ if(pArgs->nestedLoads!=1) {
+ /* an extension table must not be loaded as a base table */
+ *pErrorCode=U_INVALID_TABLE_FILE;
+ return;
+ }
+
+ /* load the base table */
+ baseName=(const char *)header+headerLength*4;
+ if(0==uprv_strcmp(baseName, sharedData->staticData->name)) {
+ /* forbid loading this same extension-only file */
+ *pErrorCode=U_INVALID_TABLE_FORMAT;
+ return;
+ }
+
+ /* TODO parse package name out of the prefix of the base name in the extension .cnv file? */
+ args.size=sizeof(UConverterLoadArgs);
+ args.nestedLoads=2;
+ args.onlyTestIsLoadable=pArgs->onlyTestIsLoadable;
+ args.reserved=pArgs->reserved;
+ args.options=pArgs->options;
+ args.pkg=pArgs->pkg;
+ args.name=baseName;
+ baseSharedData=ucnv_load(&args, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ return;
+ }
+ if( baseSharedData->staticData->conversionType!=UCNV_MBCS ||
+ baseSharedData->mbcs.baseSharedData!=NULL
+ ) {
+ ucnv_unload(baseSharedData);
+ *pErrorCode=U_INVALID_TABLE_FORMAT;
+ return;
+ }
+ if(pArgs->onlyTestIsLoadable) {
+ /*
+ * Exit as soon as we know that we can load the converter
+ * and the format is valid and supported.
+ * The worst that can happen in the following code is a memory
+ * allocation error.
+ */
+ ucnv_unload(baseSharedData);
+ return;
+ }
+
+ /* copy the base table data */
+ uprv_memcpy(mbcsTable, &baseSharedData->mbcs, sizeof(UConverterMBCSTable));
+
+ /* overwrite values with relevant ones for the extension converter */
+ mbcsTable->baseSharedData=baseSharedData;
+ mbcsTable->extIndexes=extIndexes;
+
+ /*
+ * It would be possible to share the swapLFNL data with a base converter,
+ * but the generated name would have to be different, and the memory
+ * would have to be free'd only once.
+ * It is easier to just create the data for the extension converter
+ * separately when it is requested.
+ */
+ mbcsTable->swapLFNLStateTable=NULL;
+ mbcsTable->swapLFNLFromUnicodeBytes=NULL;
+ mbcsTable->swapLFNLName=NULL;
+
+ /*
+ * The reconstitutedData must be deleted only when the base converter
+ * is unloaded.
+ */
+ mbcsTable->reconstitutedData=NULL;
+
+ /*
+ * Set a special, runtime-only outputType if the extension converter
+ * is a DBCS version of a base converter that also maps single bytes.
+ */
+ if( sharedData->staticData->conversionType==UCNV_DBCS ||
+ (sharedData->staticData->conversionType==UCNV_MBCS &&
+ sharedData->staticData->minBytesPerChar>=2)
+ ) {
+ if(baseSharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO) {
+ /* the base converter is SI/SO-stateful */
+ int32_t entry;
+
+ /* get the dbcs state from the state table entry for SO=0x0e */
+ entry=mbcsTable->stateTable[0][0xe];
+ if( MBCS_ENTRY_IS_FINAL(entry) &&
+ MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_CHANGE_ONLY &&
+ MBCS_ENTRY_FINAL_STATE(entry)!=0
+ ) {
+ mbcsTable->dbcsOnlyState=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry);
+
+ mbcsTable->outputType=MBCS_OUTPUT_DBCS_ONLY;
+ }
+ } else if(
+ baseSharedData->staticData->conversionType==UCNV_MBCS &&
+ baseSharedData->staticData->minBytesPerChar==1 &&
+ baseSharedData->staticData->maxBytesPerChar==2 &&
+ mbcsTable->countStates<=127
+ ) {
+ /* non-stateful base converter, need to modify the state table */
+ int32_t (*newStateTable)[256];
+ int32_t *state;
+ int32_t i, count;
+
+ /* allocate a new state table and copy the base state table contents */
+ count=mbcsTable->countStates;
+ newStateTable=(int32_t (*)[256])uprv_malloc((count+1)*1024);
+ if(newStateTable==NULL) {
+ ucnv_unload(baseSharedData);
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ uprv_memcpy(newStateTable, mbcsTable->stateTable, count*1024);
+
+ /* change all final single-byte entries to go to a new all-illegal state */
+ state=newStateTable[0];
+ for(i=0; i<256; ++i) {
+ if(MBCS_ENTRY_IS_FINAL(state[i])) {
+ state[i]=MBCS_ENTRY_TRANSITION(count, 0);
+ }
+ }
+
+ /* build the new all-illegal state */
+ state=newStateTable[count];
+ for(i=0; i<256; ++i) {
+ state[i]=MBCS_ENTRY_FINAL(0, MBCS_STATE_ILLEGAL, 0);
+ }
+ mbcsTable->stateTable=(const int32_t (*)[256])newStateTable;
+ mbcsTable->countStates=(uint8_t)(count+1);
+ mbcsTable->stateTableOwned=TRUE;
+
+ mbcsTable->outputType=MBCS_OUTPUT_DBCS_ONLY;
+ }
+ }
+
+ /*
+ * unlike below for files with base tables, do not get the unicodeMask
+ * from the sharedData; instead, use the base table's unicodeMask,
+ * which we copied in the memcpy above;
+ * this is necessary because the static data unicodeMask, especially
+ * the UCNV_HAS_SUPPLEMENTARY flag, is part of the base table data
+ */
+ } else {
+ /* conversion file with a base table; an additional extension table is optional */
+ /* make sure that the output type is known */
+ switch(mbcsTable->outputType) {
+ case MBCS_OUTPUT_1:
+ case MBCS_OUTPUT_2:
+ case MBCS_OUTPUT_3:
+ case MBCS_OUTPUT_4:
+ case MBCS_OUTPUT_3_EUC:
+ case MBCS_OUTPUT_4_EUC:
+ case MBCS_OUTPUT_2_SISO:
+ /* OK */
+ break;
+ default:
+ *pErrorCode=U_INVALID_TABLE_FORMAT;
+ return;
+ }
+ if(pArgs->onlyTestIsLoadable) {
+ /*
+ * Exit as soon as we know that we can load the converter
+ * and the format is valid and supported.
+ * The worst that can happen in the following code is a memory
+ * allocation error.
+ */
+ return;
+ }
+
+ mbcsTable->countStates=(uint8_t)header->countStates;
+ mbcsTable->countToUFallbacks=header->countToUFallbacks;
+ mbcsTable->stateTable=(const int32_t (*)[256])(raw+headerLength*4);
+ mbcsTable->toUFallbacks=(const _MBCSToUFallback *)(mbcsTable->stateTable+header->countStates);
+ mbcsTable->unicodeCodeUnits=(const uint16_t *)(raw+header->offsetToUCodeUnits);
+
+ mbcsTable->fromUnicodeTable=(const uint16_t *)(raw+header->offsetFromUTable);
+ mbcsTable->fromUnicodeBytes=(const uint8_t *)(raw+header->offsetFromUBytes);
+ mbcsTable->fromUBytesLength=header->fromUBytesLength;
+
+ /*
+ * converter versions 6.1 and up contain a unicodeMask that is
+ * used here to select the most efficient function implementations
+ */
+ info.size=sizeof(UDataInfo);
+ udata_getInfo((UDataMemory *)sharedData->dataMemory, &info);
+ if(info.formatVersion[0]>6 || (info.formatVersion[0]==6 && info.formatVersion[1]>=1)) {
+ /* mask off possible future extensions to be safe */
+ mbcsTable->unicodeMask=(uint8_t)(sharedData->staticData->unicodeMask&3);
+ } else {
+ /* for older versions, assume worst case: contains anything possible (prevent over-optimizations) */
+ mbcsTable->unicodeMask=UCNV_HAS_SUPPLEMENTARY|UCNV_HAS_SURROGATES;
+ }
+
+ /*
+ * _MBCSHeader.version 4.3 adds utf8Friendly data structures.
+ * Check for the header version, SBCS vs. MBCS, and for whether the
+ * data structures are optimized for code points as high as what the
+ * runtime code is designed for.
+ * The implementation does not handle mapping tables with entries for
+ * unpaired surrogates.
+ */
+ if( header->version[1]>=3 &&
+ (mbcsTable->unicodeMask&UCNV_HAS_SURROGATES)==0 &&
+ (mbcsTable->countStates==1 ?
+ (header->version[2]>=(SBCS_FAST_MAX>>8)) :
+ (header->version[2]>=(MBCS_FAST_MAX>>8))
+ )
+ ) {
+ mbcsTable->utf8Friendly=TRUE;
+
+ if(mbcsTable->countStates==1) {
+ /*
+ * SBCS: Stage 3 is allocated in 64-entry blocks for U+0000..SBCS_FAST_MAX or higher.
+ * Build a table with indexes to each block, to be used instead of
+ * the regular stage 1/2 table.
+ */
+ int32_t i;
+ for(i=0; i<(SBCS_FAST_LIMIT>>6); ++i) {
+ mbcsTable->sbcsIndex[i]=mbcsTable->fromUnicodeTable[mbcsTable->fromUnicodeTable[i>>4]+((i<<2)&0x3c)];
+ }
+ /* set SBCS_FAST_MAX to reflect the reach of sbcsIndex[] even if header->version[2]>(SBCS_FAST_MAX>>8) */
+ mbcsTable->maxFastUChar=SBCS_FAST_MAX;
+ } else {
+ /*
+ * MBCS: Stage 3 is allocated in 64-entry blocks for U+0000..MBCS_FAST_MAX or higher.
+ * The .cnv file is prebuilt with an additional stage table with indexes
+ * to each block.
+ */
+ mbcsTable->mbcsIndex=(const uint16_t *)
+ (mbcsTable->fromUnicodeBytes+
+ (noFromU ? 0 : mbcsTable->fromUBytesLength));
+ mbcsTable->maxFastUChar=(((UChar)header->version[2])<<8)|0xff;
+ }
+ }
+
+ /* calculate a bit set of 4 ASCII characters per bit that round-trip to ASCII bytes */
+ {
+ uint32_t asciiRoundtrips=0xffffffff;
+ int32_t i;
+
+ for(i=0; i<0x80; ++i) {
+ if(mbcsTable->stateTable[0][i]!=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, i)) {
+ asciiRoundtrips&=~((uint32_t)1<<(i>>2));
+ }
+ }
+ mbcsTable->asciiRoundtrips=asciiRoundtrips;
+ }
+
+ if(noFromU) {
+ uint32_t stage1Length=
+ mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY ?
+ 0x440 : 0x40;
+ uint32_t stage2Length=
+ (header->offsetFromUBytes-header->offsetFromUTable)/4-
+ stage1Length/2;
+ reconstituteData(mbcsTable, stage1Length, stage2Length, header->fullStage2Length, pErrorCode);
+ }
+ }
+
+ /* Set the impl pointer here so that it is set for both extension-only and base tables. */
+ if(mbcsTable->utf8Friendly) {
+ if(mbcsTable->countStates==1) {
+ sharedData->impl=&_SBCSUTF8Impl;
+ } else {
+ if(mbcsTable->outputType==MBCS_OUTPUT_2) {
+ sharedData->impl=&_DBCSUTF8Impl;
+ }
+ }
+ }
+
+ if(mbcsTable->outputType==MBCS_OUTPUT_DBCS_ONLY || mbcsTable->outputType==MBCS_OUTPUT_2_SISO) {
+ /*
+ * MBCS_OUTPUT_DBCS_ONLY: No SBCS mappings, therefore ASCII does not roundtrip.
+ * MBCS_OUTPUT_2_SISO: Bypass the ASCII fastpath to handle prevLength correctly.
+ */
+ mbcsTable->asciiRoundtrips=0;
+ }
+}
+
+static void U_CALLCONV
+ucnv_MBCSUnload(UConverterSharedData *sharedData) {
+ UConverterMBCSTable *mbcsTable=&sharedData->mbcs;
+
+ if(mbcsTable->swapLFNLStateTable!=NULL) {
+ uprv_free(mbcsTable->swapLFNLStateTable);
+ }
+ if(mbcsTable->stateTableOwned) {
+ uprv_free((void *)mbcsTable->stateTable);
+ }
+ if(mbcsTable->baseSharedData!=NULL) {
+ ucnv_unload(mbcsTable->baseSharedData);
+ }
+ if(mbcsTable->reconstitutedData!=NULL) {
+ uprv_free(mbcsTable->reconstitutedData);
+ }
+}
+
+static void U_CALLCONV
+ucnv_MBCSOpen(UConverter *cnv,
+ UConverterLoadArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverterMBCSTable *mbcsTable;
+ const int32_t *extIndexes;
+ uint8_t outputType;
+ int8_t maxBytesPerUChar;
+
+ if(pArgs->onlyTestIsLoadable) {
+ return;
+ }
+
+ mbcsTable=&cnv->sharedData->mbcs;
+ outputType=mbcsTable->outputType;
+
+ if(outputType==MBCS_OUTPUT_DBCS_ONLY) {
+ /* the swaplfnl option does not apply, remove it */
+ cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL;
+ }
+
+ if((pArgs->options&UCNV_OPTION_SWAP_LFNL)!=0) {
+ /* do this because double-checked locking is broken */
+ UBool isCached;
+
+ icu::umtx_lock(NULL);
+ isCached=mbcsTable->swapLFNLStateTable!=NULL;
+ icu::umtx_unlock(NULL);
+
+ if(!isCached) {
+ if(!_EBCDICSwapLFNL(cnv->sharedData, pErrorCode)) {
+ if(U_FAILURE(*pErrorCode)) {
+ return; /* something went wrong */
+ }
+
+ /* the option does not apply, remove it */
+ cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL;
+ }
+ }
+ }
+
+ if(uprv_strstr(pArgs->name, "18030")!=NULL) {
+ if(uprv_strstr(pArgs->name, "gb18030")!=NULL || uprv_strstr(pArgs->name, "GB18030")!=NULL) {
+ /* set a flag for GB 18030 mode, which changes the callback behavior */
+ cnv->options|=_MBCS_OPTION_GB18030;
+ }
+ } else if((uprv_strstr(pArgs->name, "KEIS")!=NULL) || (uprv_strstr(pArgs->name, "keis")!=NULL)) {
+ /* set a flag for KEIS converter, which changes the SI/SO character sequence */
+ cnv->options|=_MBCS_OPTION_KEIS;
+ } else if((uprv_strstr(pArgs->name, "JEF")!=NULL) || (uprv_strstr(pArgs->name, "jef")!=NULL)) {
+ /* set a flag for JEF converter, which changes the SI/SO character sequence */
+ cnv->options|=_MBCS_OPTION_JEF;
+ } else if((uprv_strstr(pArgs->name, "JIPS")!=NULL) || (uprv_strstr(pArgs->name, "jips")!=NULL)) {
+ /* set a flag for JIPS converter, which changes the SI/SO character sequence */
+ cnv->options|=_MBCS_OPTION_JIPS;
+ }
+
+ /* fix maxBytesPerUChar depending on outputType and options etc. */
+ if(outputType==MBCS_OUTPUT_2_SISO) {
+ cnv->maxBytesPerUChar=3; /* SO+DBCS */
+ }
+
+ extIndexes=mbcsTable->extIndexes;
+ if(extIndexes!=NULL) {
+ maxBytesPerUChar=(int8_t)UCNV_GET_MAX_BYTES_PER_UCHAR(extIndexes);
+ if(outputType==MBCS_OUTPUT_2_SISO) {
+ ++maxBytesPerUChar; /* SO + multiple DBCS */
+ }
+
+ if(maxBytesPerUChar>cnv->maxBytesPerUChar) {
+ cnv->maxBytesPerUChar=maxBytesPerUChar;
+ }
+ }
+
+#if 0
+ /*
+ * documentation of UConverter fields used for status
+ * all of these fields are (re)set to 0 by ucnv_bld.c and ucnv_reset()
+ */
+
+ /* toUnicode */
+ cnv->toUnicodeStatus=0; /* offset */
+ cnv->mode=0; /* state */
+ cnv->toULength=0; /* byteIndex */
+
+ /* fromUnicode */
+ cnv->fromUChar32=0;
+ cnv->fromUnicodeStatus=1; /* prevLength */
+#endif
+}
+
+U_CDECL_BEGIN
+
+static const char* U_CALLCONV
+ucnv_MBCSGetName(const UConverter *cnv) {
+ if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0 && cnv->sharedData->mbcs.swapLFNLName!=NULL) {
+ return cnv->sharedData->mbcs.swapLFNLName;
+ } else {
+ return cnv->sharedData->staticData->name;
+ }
+}
+U_CDECL_END
+
+
+/* MBCS-to-Unicode conversion functions ------------------------------------- */
+
+static UChar32 U_CALLCONV
+ucnv_MBCSGetFallback(UConverterMBCSTable *mbcsTable, uint32_t offset) {
+ const _MBCSToUFallback *toUFallbacks;
+ uint32_t i, start, limit;
+
+ limit=mbcsTable->countToUFallbacks;
+ if(limit>0) {
+ /* do a binary search for the fallback mapping */
+ toUFallbacks=mbcsTable->toUFallbacks;
+ start=0;
+ while(start<limit-1) {
+ i=(start+limit)/2;
+ if(offset<toUFallbacks[i].offset) {
+ limit=i;
+ } else {
+ start=i;
+ }
+ }
+
+ /* did we really find it? */
+ if(offset==toUFallbacks[start].offset) {
+ return toUFallbacks[start].codePoint;
+ }
+ }
+
+ return 0xfffe;
+}
+
+/* This version of ucnv_MBCSToUnicodeWithOffsets() is optimized for single-byte, single-state codepages. */
+static void
+ucnv_MBCSSingleToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ const uint8_t *source, *sourceLimit;
+ UChar *target;
+ const UChar *targetLimit;
+ int32_t *offsets;
+
+ const int32_t (*stateTable)[256];
+
+ int32_t sourceIndex;
+
+ int32_t entry;
+ UChar c;
+ uint8_t action;
+
+ /* set up the local pointers */
+ cnv=pArgs->converter;
+ source=(const uint8_t *)pArgs->source;
+ sourceLimit=(const uint8_t *)pArgs->sourceLimit;
+ target=pArgs->target;
+ targetLimit=pArgs->targetLimit;
+ offsets=pArgs->offsets;
+
+ if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
+ stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
+ } else {
+ stateTable=cnv->sharedData->mbcs.stateTable;
+ }
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex=0;
+
+ /* conversion loop */
+ while(source<sourceLimit) {
+ /*
+ * This following test is to see if available input would overflow the output.
+ * It does not catch output of more than one code unit that
+ * overflows as a result of a surrogate pair or callback output
+ * from the last source byte.
+ * Therefore, those situations also test for overflows and will
+ * then break the loop, too.
+ */
+ if(target>=targetLimit) {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+
+ entry=stateTable[0][*source++];
+ /* MBCS_ENTRY_IS_FINAL(entry) */
+
+ /* test the most common case first */
+ if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
+ /* output BMP code point */
+ *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+
+ /* normal end of action codes: prepare for a new character */
+ ++sourceIndex;
+ continue;
+ }
+
+ /*
+ * An if-else-if chain provides more reliable performance for
+ * the most common cases compared to a switch.
+ */
+ action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
+ if(action==MBCS_STATE_VALID_DIRECT_20 ||
+ (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv))
+ ) {
+ entry=MBCS_ENTRY_FINAL_VALUE(entry);
+ /* output surrogate pair */
+ *target++=(UChar)(0xd800|(UChar)(entry>>10));
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ c=(UChar)(0xdc00|(UChar)(entry&0x3ff));
+ if(target<targetLimit) {
+ *target++=c;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ } else {
+ /* target overflow */
+ cnv->UCharErrorBuffer[0]=c;
+ cnv->UCharErrorBufferLength=1;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+
+ ++sourceIndex;
+ continue;
+ } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
+ if(UCNV_TO_U_USE_FALLBACK(cnv)) {
+ /* output BMP code point */
+ *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+
+ ++sourceIndex;
+ continue;
+ }
+ } else if(action==MBCS_STATE_UNASSIGNED) {
+ /* just fall through */
+ } else if(action==MBCS_STATE_ILLEGAL) {
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ } else {
+ /* reserved, must never occur */
+ ++sourceIndex;
+ continue;
+ }
+
+ if(U_FAILURE(*pErrorCode)) {
+ /* callback(illegal) */
+ break;
+ } else /* unassigned sequences indicated with byteIndex>0 */ {
+ /* try an extension mapping */
+ pArgs->source=(const char *)source;
+ cnv->toUBytes[0]=*(source-1);
+ cnv->toULength=_extToU(cnv, cnv->sharedData,
+ 1, &source, sourceLimit,
+ &target, targetLimit,
+ &offsets, sourceIndex,
+ pArgs->flush,
+ pErrorCode);
+ sourceIndex+=1+(int32_t)(source-(const uint8_t *)pArgs->source);
+
+ if(U_FAILURE(*pErrorCode)) {
+ /* not mappable or buffer overflow */
+ break;
+ }
+ }
+ }
+
+ /* write back the updated pointers */
+ pArgs->source=(const char *)source;
+ pArgs->target=target;
+ pArgs->offsets=offsets;
+}
+
+/*
+ * This version of ucnv_MBCSSingleToUnicodeWithOffsets() is optimized for single-byte, single-state codepages
+ * that only map to and from the BMP.
+ * In addition to single-byte optimizations, the offset calculations
+ * become much easier.
+ */
+static void
+ucnv_MBCSSingleToBMPWithOffsets(UConverterToUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ const uint8_t *source, *sourceLimit, *lastSource;
+ UChar *target;
+ int32_t targetCapacity, length;
+ int32_t *offsets;
+
+ const int32_t (*stateTable)[256];
+
+ int32_t sourceIndex;
+
+ int32_t entry;
+ uint8_t action;
+
+ /* set up the local pointers */
+ cnv=pArgs->converter;
+ source=(const uint8_t *)pArgs->source;
+ sourceLimit=(const uint8_t *)pArgs->sourceLimit;
+ target=pArgs->target;
+ targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
+ offsets=pArgs->offsets;
+
+ if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
+ stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
+ } else {
+ stateTable=cnv->sharedData->mbcs.stateTable;
+ }
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex=0;
+ lastSource=source;
+
+ /*
+ * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
+ * for the minimum of the sourceLength and targetCapacity
+ */
+ length=(int32_t)(sourceLimit-source);
+ if(length<targetCapacity) {
+ targetCapacity=length;
+ }
+
+#if MBCS_UNROLL_SINGLE_TO_BMP
+ /* unrolling makes it faster on Pentium III/Windows 2000 */
+ /* unroll the loop with the most common case */
+unrolled:
+ if(targetCapacity>=16) {
+ int32_t count, loops, oredEntries;
+
+ loops=count=targetCapacity>>4;
+ do {
+ oredEntries=entry=stateTable[0][*source++];
+ *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ oredEntries|=entry=stateTable[0][*source++];
+ *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ oredEntries|=entry=stateTable[0][*source++];
+ *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ oredEntries|=entry=stateTable[0][*source++];
+ *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ oredEntries|=entry=stateTable[0][*source++];
+ *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ oredEntries|=entry=stateTable[0][*source++];
+ *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ oredEntries|=entry=stateTable[0][*source++];
+ *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ oredEntries|=entry=stateTable[0][*source++];
+ *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ oredEntries|=entry=stateTable[0][*source++];
+ *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ oredEntries|=entry=stateTable[0][*source++];
+ *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ oredEntries|=entry=stateTable[0][*source++];
+ *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ oredEntries|=entry=stateTable[0][*source++];
+ *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ oredEntries|=entry=stateTable[0][*source++];
+ *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ oredEntries|=entry=stateTable[0][*source++];
+ *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ oredEntries|=entry=stateTable[0][*source++];
+ *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ oredEntries|=entry=stateTable[0][*source++];
+ *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+
+ /* were all 16 entries really valid? */
+ if(!MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(oredEntries)) {
+ /* no, return to the first of these 16 */
+ source-=16;
+ target-=16;
+ break;
+ }
+ } while(--count>0);
+ count=loops-count;
+ targetCapacity-=16*count;
+
+ if(offsets!=NULL) {
+ lastSource+=16*count;
+ while(count>0) {
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ --count;
+ }
+ }
+ }
+#endif
+
+ /* conversion loop */
+ while(targetCapacity > 0 && source < sourceLimit) {
+ entry=stateTable[0][*source++];
+ /* MBCS_ENTRY_IS_FINAL(entry) */
+
+ /* test the most common case first */
+ if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
+ /* output BMP code point */
+ *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ --targetCapacity;
+ continue;
+ }
+
+ /*
+ * An if-else-if chain provides more reliable performance for
+ * the most common cases compared to a switch.
+ */
+ action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
+ if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
+ if(UCNV_TO_U_USE_FALLBACK(cnv)) {
+ /* output BMP code point */
+ *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ --targetCapacity;
+ continue;
+ }
+ } else if(action==MBCS_STATE_UNASSIGNED) {
+ /* just fall through */
+ } else if(action==MBCS_STATE_ILLEGAL) {
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ } else {
+ /* reserved, must never occur */
+ continue;
+ }
+
+ /* set offsets since the start or the last extension */
+ if(offsets!=NULL) {
+ int32_t count=(int32_t)(source-lastSource);
+
+ /* predecrement: do not set the offset for the callback-causing character */
+ while(--count>0) {
+ *offsets++=sourceIndex++;
+ }
+ /* offset and sourceIndex are now set for the current character */
+ }
+
+ if(U_FAILURE(*pErrorCode)) {
+ /* callback(illegal) */
+ break;
+ } else /* unassigned sequences indicated with byteIndex>0 */ {
+ /* try an extension mapping */
+ lastSource=source;
+ cnv->toUBytes[0]=*(source-1);
+ cnv->toULength=_extToU(cnv, cnv->sharedData,
+ 1, &source, sourceLimit,
+ &target, pArgs->targetLimit,
+ &offsets, sourceIndex,
+ pArgs->flush,
+ pErrorCode);
+ sourceIndex+=1+(int32_t)(source-lastSource);
+
+ if(U_FAILURE(*pErrorCode)) {
+ /* not mappable or buffer overflow */
+ break;
+ }
+
+ /* recalculate the targetCapacity after an extension mapping */
+ targetCapacity=(int32_t)(pArgs->targetLimit-target);
+ length=(int32_t)(sourceLimit-source);
+ if(length<targetCapacity) {
+ targetCapacity=length;
+ }
+ }
+
+#if MBCS_UNROLL_SINGLE_TO_BMP
+ /* unrolling makes it faster on Pentium III/Windows 2000 */
+ goto unrolled;
+#endif
+ }
+
+ if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=pArgs->targetLimit) {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+
+ /* set offsets since the start or the last callback */
+ if(offsets!=NULL) {
+ size_t count=source-lastSource;
+ while(count>0) {
+ *offsets++=sourceIndex++;
+ --count;
+ }
+ }
+
+ /* write back the updated pointers */
+ pArgs->source=(const char *)source;
+ pArgs->target=target;
+ pArgs->offsets=offsets;
+}
+
+static UBool
+hasValidTrailBytes(const int32_t (*stateTable)[256], uint8_t state) {
+ const int32_t *row=stateTable[state];
+ int32_t b, entry;
+ /* First test for final entries in this state for some commonly valid byte values. */
+ entry=row[0xa1];
+ if( !MBCS_ENTRY_IS_TRANSITION(entry) &&
+ MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL
+ ) {
+ return TRUE;
+ }
+ entry=row[0x41];
+ if( !MBCS_ENTRY_IS_TRANSITION(entry) &&
+ MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL
+ ) {
+ return TRUE;
+ }
+ /* Then test for final entries in this state. */
+ for(b=0; b<=0xff; ++b) {
+ entry=row[b];
+ if( !MBCS_ENTRY_IS_TRANSITION(entry) &&
+ MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL
+ ) {
+ return TRUE;
+ }
+ }
+ /* Then recurse for transition entries. */
+ for(b=0; b<=0xff; ++b) {
+ entry=row[b];
+ if( MBCS_ENTRY_IS_TRANSITION(entry) &&
+ hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry))
+ ) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+/*
+ * Is byte b a single/lead byte in this state?
+ * Recurse for transition states, because here we don't want to say that
+ * b is a lead byte if all byte sequences that start with b are illegal.
+ */
+static UBool
+isSingleOrLead(const int32_t (*stateTable)[256], uint8_t state, UBool isDBCSOnly, uint8_t b) {
+ const int32_t *row=stateTable[state];
+ int32_t entry=row[b];
+ if(MBCS_ENTRY_IS_TRANSITION(entry)) { /* lead byte */
+ return hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry));
+ } else {
+ uint8_t action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
+ if(action==MBCS_STATE_CHANGE_ONLY && isDBCSOnly) {
+ return FALSE; /* SI/SO are illegal for DBCS-only conversion */
+ } else {
+ return action!=MBCS_STATE_ILLEGAL;
+ }
+ }
+}
+
+U_CFUNC void
+ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ const uint8_t *source, *sourceLimit;
+ UChar *target;
+ const UChar *targetLimit;
+ int32_t *offsets;
+
+ const int32_t (*stateTable)[256];
+ const uint16_t *unicodeCodeUnits;
+
+ uint32_t offset;
+ uint8_t state;
+ int8_t byteIndex;
+ uint8_t *bytes;
+
+ int32_t sourceIndex, nextSourceIndex;
+
+ int32_t entry;
+ UChar c;
+ uint8_t action;
+
+ /* use optimized function if possible */
+ cnv=pArgs->converter;
+
+ if(cnv->preToULength>0) {
+ /*
+ * pass sourceIndex=-1 because we continue from an earlier buffer
+ * in the future, this may change with continuous offsets
+ */
+ ucnv_extContinueMatchToU(cnv, pArgs, -1, pErrorCode);
+
+ if(U_FAILURE(*pErrorCode) || cnv->preToULength<0) {
+ return;
+ }
+ }
+
+ if(cnv->sharedData->mbcs.countStates==1) {
+ if(!(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
+ ucnv_MBCSSingleToBMPWithOffsets(pArgs, pErrorCode);
+ } else {
+ ucnv_MBCSSingleToUnicodeWithOffsets(pArgs, pErrorCode);
+ }
+ return;
+ }
+
+ /* set up the local pointers */
+ source=(const uint8_t *)pArgs->source;
+ sourceLimit=(const uint8_t *)pArgs->sourceLimit;
+ target=pArgs->target;
+ targetLimit=pArgs->targetLimit;
+ offsets=pArgs->offsets;
+
+ if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
+ stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
+ } else {
+ stateTable=cnv->sharedData->mbcs.stateTable;
+ }
+ unicodeCodeUnits=cnv->sharedData->mbcs.unicodeCodeUnits;
+
+ /* get the converter state from UConverter */
+ offset=cnv->toUnicodeStatus;
+ byteIndex=cnv->toULength;
+ bytes=cnv->toUBytes;
+
+ /*
+ * if we are in the SBCS state for a DBCS-only converter,
+ * then load the DBCS state from the MBCS data
+ * (dbcsOnlyState==0 if it is not a DBCS-only converter)
+ */
+ if((state=(uint8_t)(cnv->mode))==0) {
+ state=cnv->sharedData->mbcs.dbcsOnlyState;
+ }
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex=byteIndex==0 ? 0 : -1;
+ nextSourceIndex=0;
+
+ /* conversion loop */
+ while(source<sourceLimit) {
+ /*
+ * This following test is to see if available input would overflow the output.
+ * It does not catch output of more than one code unit that
+ * overflows as a result of a surrogate pair or callback output
+ * from the last source byte.
+ * Therefore, those situations also test for overflows and will
+ * then break the loop, too.
+ */
+ if(target>=targetLimit) {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+
+ if(byteIndex==0) {
+ /* optimized loop for 1/2-byte input and BMP output */
+ if(offsets==NULL) {
+ do {
+ entry=stateTable[state][*source];
+ if(MBCS_ENTRY_IS_TRANSITION(entry)) {
+ state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
+ offset=MBCS_ENTRY_TRANSITION_OFFSET(entry);
+
+ ++source;
+ if( source<sourceLimit &&
+ MBCS_ENTRY_IS_FINAL(entry=stateTable[state][*source]) &&
+ MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 &&
+ (c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe
+ ) {
+ ++source;
+ *target++=c;
+ state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
+ offset=0;
+ } else {
+ /* set the state and leave the optimized loop */
+ bytes[0]=*(source-1);
+ byteIndex=1;
+ break;
+ }
+ } else {
+ if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
+ /* output BMP code point */
+ ++source;
+ *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
+ } else {
+ /* leave the optimized loop */
+ break;
+ }
+ }
+ } while(source<sourceLimit && target<targetLimit);
+ } else /* offsets!=NULL */ {
+ do {
+ entry=stateTable[state][*source];
+ if(MBCS_ENTRY_IS_TRANSITION(entry)) {
+ state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
+ offset=MBCS_ENTRY_TRANSITION_OFFSET(entry);
+
+ ++source;
+ if( source<sourceLimit &&
+ MBCS_ENTRY_IS_FINAL(entry=stateTable[state][*source]) &&
+ MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 &&
+ (c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe
+ ) {
+ ++source;
+ *target++=c;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ sourceIndex=(nextSourceIndex+=2);
+ }
+ state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
+ offset=0;
+ } else {
+ /* set the state and leave the optimized loop */
+ ++nextSourceIndex;
+ bytes[0]=*(source-1);
+ byteIndex=1;
+ break;
+ }
+ } else {
+ if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
+ /* output BMP code point */
+ ++source;
+ *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ sourceIndex=++nextSourceIndex;
+ }
+ state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
+ } else {
+ /* leave the optimized loop */
+ break;
+ }
+ }
+ } while(source<sourceLimit && target<targetLimit);
+ }
+
+ /*
+ * these tests and break statements could be put inside the loop
+ * if C had "break outerLoop" like Java
+ */
+ if(source>=sourceLimit) {
+ break;
+ }
+ if(target>=targetLimit) {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+
+ ++nextSourceIndex;
+ bytes[byteIndex++]=*source++;
+ } else /* byteIndex>0 */ {
+ ++nextSourceIndex;
+ entry=stateTable[state][bytes[byteIndex++]=*source++];
+ }
+
+ if(MBCS_ENTRY_IS_TRANSITION(entry)) {
+ state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
+ offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry);
+ continue;
+ }
+
+ /* save the previous state for proper extension mapping with SI/SO-stateful converters */
+ cnv->mode=state;
+
+ /* set the next state early so that we can reuse the entry variable */
+ state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
+
+ /*
+ * An if-else-if chain provides more reliable performance for
+ * the most common cases compared to a switch.
+ */
+ action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
+ if(action==MBCS_STATE_VALID_16) {
+ offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
+ c=unicodeCodeUnits[offset];
+ if(c<0xfffe) {
+ /* output BMP code point */
+ *target++=c;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ byteIndex=0;
+ } else if(c==0xfffe) {
+ if(UCNV_TO_U_USE_FALLBACK(cnv) && (entry=(int32_t)ucnv_MBCSGetFallback(&cnv->sharedData->mbcs, offset))!=0xfffe) {
+ /* output fallback BMP code point */
+ *target++=(UChar)entry;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ byteIndex=0;
+ }
+ } else {
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ }
+ } else if(action==MBCS_STATE_VALID_DIRECT_16) {
+ /* output BMP code point */
+ *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ byteIndex=0;
+ } else if(action==MBCS_STATE_VALID_16_PAIR) {
+ offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
+ c=unicodeCodeUnits[offset++];
+ if(c<0xd800) {
+ /* output BMP code point below 0xd800 */
+ *target++=c;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ byteIndex=0;
+ } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) {
+ /* output roundtrip or fallback surrogate pair */
+ *target++=(UChar)(c&0xdbff);
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ byteIndex=0;
+ if(target<targetLimit) {
+ *target++=unicodeCodeUnits[offset];
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ } else {
+ /* target overflow */
+ cnv->UCharErrorBuffer[0]=unicodeCodeUnits[offset];
+ cnv->UCharErrorBufferLength=1;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+
+ offset=0;
+ break;
+ }
+ } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) {
+ /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
+ *target++=unicodeCodeUnits[offset];
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ byteIndex=0;
+ } else if(c==0xffff) {
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ }
+ } else if(action==MBCS_STATE_VALID_DIRECT_20 ||
+ (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv))
+ ) {
+ entry=MBCS_ENTRY_FINAL_VALUE(entry);
+ /* output surrogate pair */
+ *target++=(UChar)(0xd800|(UChar)(entry>>10));
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ byteIndex=0;
+ c=(UChar)(0xdc00|(UChar)(entry&0x3ff));
+ if(target<targetLimit) {
+ *target++=c;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ } else {
+ /* target overflow */
+ cnv->UCharErrorBuffer[0]=c;
+ cnv->UCharErrorBufferLength=1;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+
+ offset=0;
+ break;
+ }
+ } else if(action==MBCS_STATE_CHANGE_ONLY) {
+ /*
+ * This serves as a state change without any output.
+ * It is useful for reading simple stateful encodings,
+ * for example using just Shift-In/Shift-Out codes.
+ * The 21 unused bits may later be used for more sophisticated
+ * state transitions.
+ */
+ if(cnv->sharedData->mbcs.dbcsOnlyState==0) {
+ byteIndex=0;
+ } else {
+ /* SI/SO are illegal for DBCS-only conversion */
+ state=(uint8_t)(cnv->mode); /* restore the previous state */
+
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ }
+ } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
+ if(UCNV_TO_U_USE_FALLBACK(cnv)) {
+ /* output BMP code point */
+ *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ byteIndex=0;
+ }
+ } else if(action==MBCS_STATE_UNASSIGNED) {
+ /* just fall through */
+ } else if(action==MBCS_STATE_ILLEGAL) {
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ } else {
+ /* reserved, must never occur */
+ byteIndex=0;
+ }
+
+ /* end of action codes: prepare for a new character */
+ offset=0;
+
+ if(byteIndex==0) {
+ sourceIndex=nextSourceIndex;
+ } else if(U_FAILURE(*pErrorCode)) {
+ /* callback(illegal) */
+ if(byteIndex>1) {
+ /*
+ * Ticket 5691: consistent illegal sequences:
+ * - We include at least the first byte in the illegal sequence.
+ * - If any of the non-initial bytes could be the start of a character,
+ * we stop the illegal sequence before the first one of those.
+ */
+ UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0);
+ int8_t i;
+ for(i=1;
+ i<byteIndex && !isSingleOrLead(stateTable, state, isDBCSOnly, bytes[i]);
+ ++i) {}
+ if(i<byteIndex) {
+ /* Back out some bytes. */
+ int8_t backOutDistance=byteIndex-i;
+ int32_t bytesFromThisBuffer=(int32_t)(source-(const uint8_t *)pArgs->source);
+ byteIndex=i; /* length of reported illegal byte sequence */
+ if(backOutDistance<=bytesFromThisBuffer) {
+ source-=backOutDistance;
+ } else {
+ /* Back out bytes from the previous buffer: Need to replay them. */
+ cnv->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance);
+ /* preToULength is negative! */
+ uprv_memcpy(cnv->preToU, bytes+i, -cnv->preToULength);
+ source=(const uint8_t *)pArgs->source;
+ }
+ }
+ }
+ break;
+ } else /* unassigned sequences indicated with byteIndex>0 */ {
+ /* try an extension mapping */
+ pArgs->source=(const char *)source;
+ byteIndex=_extToU(cnv, cnv->sharedData,
+ byteIndex, &source, sourceLimit,
+ &target, targetLimit,
+ &offsets, sourceIndex,
+ pArgs->flush,
+ pErrorCode);
+ sourceIndex=nextSourceIndex+=(int32_t)(source-(const uint8_t *)pArgs->source);
+
+ if(U_FAILURE(*pErrorCode)) {
+ /* not mappable or buffer overflow */
+ break;
+ }
+ }
+ }
+
+ /* set the converter state back into UConverter */
+ cnv->toUnicodeStatus=offset;
+ cnv->mode=state;
+ cnv->toULength=byteIndex;
+
+ /* write back the updated pointers */
+ pArgs->source=(const char *)source;
+ pArgs->target=target;
+ pArgs->offsets=offsets;
+}
+
+/*
+ * This version of ucnv_MBCSGetNextUChar() is optimized for single-byte, single-state codepages.
+ * We still need a conversion loop in case we find reserved action codes, which are to be ignored.
+ */
+static UChar32
+ucnv_MBCSSingleGetNextUChar(UConverterToUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ const int32_t (*stateTable)[256];
+ const uint8_t *source, *sourceLimit;
+
+ int32_t entry;
+ uint8_t action;
+
+ /* set up the local pointers */
+ cnv=pArgs->converter;
+ source=(const uint8_t *)pArgs->source;
+ sourceLimit=(const uint8_t *)pArgs->sourceLimit;
+ if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
+ stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
+ } else {
+ stateTable=cnv->sharedData->mbcs.stateTable;
+ }
+
+ /* conversion loop */
+ while(source<sourceLimit) {
+ entry=stateTable[0][*source++];
+ /* MBCS_ENTRY_IS_FINAL(entry) */
+
+ /* write back the updated pointer early so that we can return directly */
+ pArgs->source=(const char *)source;
+
+ if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
+ /* output BMP code point */
+ return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ }
+
+ /*
+ * An if-else-if chain provides more reliable performance for
+ * the most common cases compared to a switch.
+ */
+ action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
+ if( action==MBCS_STATE_VALID_DIRECT_20 ||
+ (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv))
+ ) {
+ /* output supplementary code point */
+ return (UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000);
+ } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
+ if(UCNV_TO_U_USE_FALLBACK(cnv)) {
+ /* output BMP code point */
+ return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ }
+ } else if(action==MBCS_STATE_UNASSIGNED) {
+ /* just fall through */
+ } else if(action==MBCS_STATE_ILLEGAL) {
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ } else {
+ /* reserved, must never occur */
+ continue;
+ }
+
+ if(U_FAILURE(*pErrorCode)) {
+ /* callback(illegal) */
+ break;
+ } else /* unassigned sequence */ {
+ /* defer to the generic implementation */
+ pArgs->source=(const char *)source-1;
+ return UCNV_GET_NEXT_UCHAR_USE_TO_U;
+ }
+ }
+
+ /* no output because of empty input or only state changes */
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0xffff;
+}
+
+/*
+ * Version of _MBCSToUnicodeWithOffsets() optimized for single-character
+ * conversion without offset handling.
+ *
+ * When a character does not have a mapping to Unicode, then we return to the
+ * generic ucnv_getNextUChar() code for extension/GB 18030 and error/callback
+ * handling.
+ * We also defer to the generic code in other complicated cases and have them
+ * ultimately handled by _MBCSToUnicodeWithOffsets() itself.
+ *
+ * All normal mappings and errors are handled here.
+ */
+static UChar32 U_CALLCONV
+ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ const uint8_t *source, *sourceLimit, *lastSource;
+
+ const int32_t (*stateTable)[256];
+ const uint16_t *unicodeCodeUnits;
+
+ uint32_t offset;
+ uint8_t state;
+
+ int32_t entry;
+ UChar32 c;
+ uint8_t action;
+
+ /* use optimized function if possible */
+ cnv=pArgs->converter;
+
+ if(cnv->preToULength>0) {
+ /* use the generic code in ucnv_getNextUChar() to continue with a partial match */
+ return UCNV_GET_NEXT_UCHAR_USE_TO_U;
+ }
+
+ if(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SURROGATES) {
+ /*
+ * Using the generic ucnv_getNextUChar() code lets us deal correctly
+ * with the rare case of a codepage that maps single surrogates
+ * without adding the complexity to this already complicated function here.
+ */
+ return UCNV_GET_NEXT_UCHAR_USE_TO_U;
+ } else if(cnv->sharedData->mbcs.countStates==1) {
+ return ucnv_MBCSSingleGetNextUChar(pArgs, pErrorCode);
+ }
+
+ /* set up the local pointers */
+ source=lastSource=(const uint8_t *)pArgs->source;
+ sourceLimit=(const uint8_t *)pArgs->sourceLimit;
+
+ if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
+ stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
+ } else {
+ stateTable=cnv->sharedData->mbcs.stateTable;
+ }
+ unicodeCodeUnits=cnv->sharedData->mbcs.unicodeCodeUnits;
+
+ /* get the converter state from UConverter */
+ offset=cnv->toUnicodeStatus;
+
+ /*
+ * if we are in the SBCS state for a DBCS-only converter,
+ * then load the DBCS state from the MBCS data
+ * (dbcsOnlyState==0 if it is not a DBCS-only converter)
+ */
+ if((state=(uint8_t)(cnv->mode))==0) {
+ state=cnv->sharedData->mbcs.dbcsOnlyState;
+ }
+
+ /* conversion loop */
+ c=U_SENTINEL;
+ while(source<sourceLimit) {
+ entry=stateTable[state][*source++];
+ if(MBCS_ENTRY_IS_TRANSITION(entry)) {
+ state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
+ offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry);
+
+ /* optimization for 1/2-byte input and BMP output */
+ if( source<sourceLimit &&
+ MBCS_ENTRY_IS_FINAL(entry=stateTable[state][*source]) &&
+ MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 &&
+ (c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe
+ ) {
+ ++source;
+ state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
+ /* output BMP code point */
+ break;
+ }
+ } else {
+ /* save the previous state for proper extension mapping with SI/SO-stateful converters */
+ cnv->mode=state;
+
+ /* set the next state early so that we can reuse the entry variable */
+ state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
+
+ /*
+ * An if-else-if chain provides more reliable performance for
+ * the most common cases compared to a switch.
+ */
+ action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
+ if(action==MBCS_STATE_VALID_DIRECT_16) {
+ /* output BMP code point */
+ c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ break;
+ } else if(action==MBCS_STATE_VALID_16) {
+ offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
+ c=unicodeCodeUnits[offset];
+ if(c<0xfffe) {
+ /* output BMP code point */
+ break;
+ } else if(c==0xfffe) {
+ if(UCNV_TO_U_USE_FALLBACK(cnv) && (c=ucnv_MBCSGetFallback(&cnv->sharedData->mbcs, offset))!=0xfffe) {
+ break;
+ }
+ } else {
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ }
+ } else if(action==MBCS_STATE_VALID_16_PAIR) {
+ offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
+ c=unicodeCodeUnits[offset++];
+ if(c<0xd800) {
+ /* output BMP code point below 0xd800 */
+ break;
+ } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) {
+ /* output roundtrip or fallback supplementary code point */
+ c=((c&0x3ff)<<10)+unicodeCodeUnits[offset]+(0x10000-0xdc00);
+ break;
+ } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) {
+ /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
+ c=unicodeCodeUnits[offset];
+ break;
+ } else if(c==0xffff) {
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ }
+ } else if(action==MBCS_STATE_VALID_DIRECT_20 ||
+ (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv))
+ ) {
+ /* output supplementary code point */
+ c=(UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000);
+ break;
+ } else if(action==MBCS_STATE_CHANGE_ONLY) {
+ /*
+ * This serves as a state change without any output.
+ * It is useful for reading simple stateful encodings,
+ * for example using just Shift-In/Shift-Out codes.
+ * The 21 unused bits may later be used for more sophisticated
+ * state transitions.
+ */
+ if(cnv->sharedData->mbcs.dbcsOnlyState!=0) {
+ /* SI/SO are illegal for DBCS-only conversion */
+ state=(uint8_t)(cnv->mode); /* restore the previous state */
+
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ }
+ } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
+ if(UCNV_TO_U_USE_FALLBACK(cnv)) {
+ /* output BMP code point */
+ c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ break;
+ }
+ } else if(action==MBCS_STATE_UNASSIGNED) {
+ /* just fall through */
+ } else if(action==MBCS_STATE_ILLEGAL) {
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ } else {
+ /* reserved (must never occur), or only state change */
+ offset=0;
+ lastSource=source;
+ continue;
+ }
+
+ /* end of action codes: prepare for a new character */
+ offset=0;
+
+ if(U_FAILURE(*pErrorCode)) {
+ /* callback(illegal) */
+ break;
+ } else /* unassigned sequence */ {
+ /* defer to the generic implementation */
+ cnv->toUnicodeStatus=0;
+ cnv->mode=state;
+ pArgs->source=(const char *)lastSource;
+ return UCNV_GET_NEXT_UCHAR_USE_TO_U;
+ }
+ }
+ }
+
+ if(c<0) {
+ if(U_SUCCESS(*pErrorCode) && source==sourceLimit && lastSource<source) {
+ /* incomplete character byte sequence */
+ uint8_t *bytes=cnv->toUBytes;
+ cnv->toULength=(int8_t)(source-lastSource);
+ do {
+ *bytes++=*lastSource++;
+ } while(lastSource<source);
+ *pErrorCode=U_TRUNCATED_CHAR_FOUND;
+ } else if(U_FAILURE(*pErrorCode)) {
+ /* callback(illegal) */
+ /*
+ * Ticket 5691: consistent illegal sequences:
+ * - We include at least the first byte in the illegal sequence.
+ * - If any of the non-initial bytes could be the start of a character,
+ * we stop the illegal sequence before the first one of those.
+ */
+ UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0);
+ uint8_t *bytes=cnv->toUBytes;
+ *bytes++=*lastSource++; /* first byte */
+ if(lastSource==source) {
+ cnv->toULength=1;
+ } else /* lastSource<source: multi-byte character */ {
+ int8_t i;
+ for(i=1;
+ lastSource<source && !isSingleOrLead(stateTable, state, isDBCSOnly, *lastSource);
+ ++i
+ ) {
+ *bytes++=*lastSource++;
+ }
+ cnv->toULength=i;
+ source=lastSource;
+ }
+ } else {
+ /* no output because of empty input or only state changes */
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ }
+ c=0xffff;
+ }
+
+ /* set the converter state back into UConverter, ready for a new character */
+ cnv->toUnicodeStatus=0;
+ cnv->mode=state;
+
+ /* write back the updated pointer */
+ pArgs->source=(const char *)source;
+ return c;
+}
+
+#if 0
+/*
+ * Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus
+ * Removal improves code coverage.
+ */
+/**
+ * This version of ucnv_MBCSSimpleGetNextUChar() is optimized for single-byte, single-state codepages.
+ * It does not handle the EBCDIC swaplfnl option (set in UConverter).
+ * It does not handle conversion extensions (_extToU()).
+ */
+U_CFUNC UChar32
+ucnv_MBCSSingleSimpleGetNextUChar(UConverterSharedData *sharedData,
+ uint8_t b, UBool useFallback) {
+ int32_t entry;
+ uint8_t action;
+
+ entry=sharedData->mbcs.stateTable[0][b];
+ /* MBCS_ENTRY_IS_FINAL(entry) */
+
+ if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
+ /* output BMP code point */
+ return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ }
+
+ /*
+ * An if-else-if chain provides more reliable performance for
+ * the most common cases compared to a switch.
+ */
+ action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
+ if(action==MBCS_STATE_VALID_DIRECT_20) {
+ /* output supplementary code point */
+ return 0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
+ } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
+ if(!TO_U_USE_FALLBACK(useFallback)) {
+ return 0xfffe;
+ }
+ /* output BMP code point */
+ return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ } else if(action==MBCS_STATE_FALLBACK_DIRECT_20) {
+ if(!TO_U_USE_FALLBACK(useFallback)) {
+ return 0xfffe;
+ }
+ /* output supplementary code point */
+ return 0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
+ } else if(action==MBCS_STATE_UNASSIGNED) {
+ return 0xfffe;
+ } else if(action==MBCS_STATE_ILLEGAL) {
+ return 0xffff;
+ } else {
+ /* reserved, must never occur */
+ return 0xffff;
+ }
+}
+#endif
+
+/*
+ * This is a simple version of _MBCSGetNextUChar() that is used
+ * by other converter implementations.
+ * It only returns an "assigned" result if it consumes the entire input.
+ * It does not use state from the converter, nor error codes.
+ * It does not handle the EBCDIC swaplfnl option (set in UConverter).
+ * It handles conversion extensions but not GB 18030.
+ *
+ * Return value:
+ * U+fffe unassigned
+ * U+ffff illegal
+ * otherwise the Unicode code point
+ */
+U_CFUNC UChar32
+ucnv_MBCSSimpleGetNextUChar(UConverterSharedData *sharedData,
+ const char *source, int32_t length,
+ UBool useFallback) {
+ const int32_t (*stateTable)[256];
+ const uint16_t *unicodeCodeUnits;
+
+ uint32_t offset;
+ uint8_t state, action;
+
+ UChar32 c;
+ int32_t i, entry;
+
+ if(length<=0) {
+ /* no input at all: "illegal" */
+ return 0xffff;
+ }
+
+#if 0
+/*
+ * Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus
+ * TODO In future releases, verify that this function is never called for SBCS
+ * conversions, i.e., that sharedData->mbcs.countStates==1 is still true.
+ * Removal improves code coverage.
+ */
+ /* use optimized function if possible */
+ if(sharedData->mbcs.countStates==1) {
+ if(length==1) {
+ return ucnv_MBCSSingleSimpleGetNextUChar(sharedData, (uint8_t)*source, useFallback);
+ } else {
+ return 0xffff; /* illegal: more than a single byte for an SBCS converter */
+ }
+ }
+#endif
+
+ /* set up the local pointers */
+ stateTable=sharedData->mbcs.stateTable;
+ unicodeCodeUnits=sharedData->mbcs.unicodeCodeUnits;
+
+ /* converter state */
+ offset=0;
+ state=sharedData->mbcs.dbcsOnlyState;
+
+ /* conversion loop */
+ for(i=0;;) {
+ entry=stateTable[state][(uint8_t)source[i++]];
+ if(MBCS_ENTRY_IS_TRANSITION(entry)) {
+ state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
+ offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry);
+
+ if(i==length) {
+ return 0xffff; /* truncated character */
+ }
+ } else {
+ /*
+ * An if-else-if chain provides more reliable performance for
+ * the most common cases compared to a switch.
+ */
+ action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
+ if(action==MBCS_STATE_VALID_16) {
+ offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
+ c=unicodeCodeUnits[offset];
+ if(c!=0xfffe) {
+ /* done */
+ } else if(UCNV_TO_U_USE_FALLBACK(cnv)) {
+ c=ucnv_MBCSGetFallback(&sharedData->mbcs, offset);
+ /* else done with 0xfffe */
+ }
+ break;
+ } else if(action==MBCS_STATE_VALID_DIRECT_16) {
+ /* output BMP code point */
+ c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ break;
+ } else if(action==MBCS_STATE_VALID_16_PAIR) {
+ offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
+ c=unicodeCodeUnits[offset++];
+ if(c<0xd800) {
+ /* output BMP code point below 0xd800 */
+ } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) {
+ /* output roundtrip or fallback supplementary code point */
+ c=(UChar32)(((c&0x3ff)<<10)+unicodeCodeUnits[offset]+(0x10000-0xdc00));
+ } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) {
+ /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
+ c=unicodeCodeUnits[offset];
+ } else if(c==0xffff) {
+ return 0xffff;
+ } else {
+ c=0xfffe;
+ }
+ break;
+ } else if(action==MBCS_STATE_VALID_DIRECT_20) {
+ /* output supplementary code point */
+ c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
+ break;
+ } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
+ if(!TO_U_USE_FALLBACK(useFallback)) {
+ c=0xfffe;
+ break;
+ }
+ /* output BMP code point */
+ c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
+ break;
+ } else if(action==MBCS_STATE_FALLBACK_DIRECT_20) {
+ if(!TO_U_USE_FALLBACK(useFallback)) {
+ c=0xfffe;
+ break;
+ }
+ /* output supplementary code point */
+ c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
+ break;
+ } else if(action==MBCS_STATE_UNASSIGNED) {
+ c=0xfffe;
+ break;
+ }
+
+ /*
+ * forbid MBCS_STATE_CHANGE_ONLY for this function,
+ * and MBCS_STATE_ILLEGAL and reserved action codes
+ */
+ return 0xffff;
+ }
+ }
+
+ if(i!=length) {
+ /* illegal for this function: not all input consumed */
+ return 0xffff;
+ }
+
+ if(c==0xfffe) {
+ /* try an extension mapping */
+ const int32_t *cx=sharedData->mbcs.extIndexes;
+ if(cx!=NULL) {
+ return ucnv_extSimpleMatchToU(cx, source, length, useFallback);
+ }
+ }
+
+ return c;
+}
+
+/* MBCS-from-Unicode conversion functions ----------------------------------- */
+
+/* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for double-byte codepages. */
+static void
+ucnv_MBCSDoubleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ const UChar *source, *sourceLimit;
+ uint8_t *target;
+ int32_t targetCapacity;
+ int32_t *offsets;
+
+ const uint16_t *table;
+ const uint16_t *mbcsIndex;
+ const uint8_t *bytes;
+
+ UChar32 c;
+
+ int32_t sourceIndex, nextSourceIndex;
+
+ uint32_t stage2Entry;
+ uint32_t asciiRoundtrips;
+ uint32_t value;
+ uint8_t unicodeMask;
+
+ /* use optimized function if possible */
+ cnv=pArgs->converter;
+ unicodeMask=cnv->sharedData->mbcs.unicodeMask;
+
+ /* set up the local pointers */
+ source=pArgs->source;
+ sourceLimit=pArgs->sourceLimit;
+ target=(uint8_t *)pArgs->target;
+ targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
+ offsets=pArgs->offsets;
+
+ table=cnv->sharedData->mbcs.fromUnicodeTable;
+ mbcsIndex=cnv->sharedData->mbcs.mbcsIndex;
+ if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
+ bytes=cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
+ } else {
+ bytes=cnv->sharedData->mbcs.fromUnicodeBytes;
+ }
+ asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
+
+ /* get the converter state from UConverter */
+ c=cnv->fromUChar32;
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex= c==0 ? 0 : -1;
+ nextSourceIndex=0;
+
+ /* conversion loop */
+ if(c!=0 && targetCapacity>0) {
+ goto getTrail;
+ }
+
+ while(source<sourceLimit) {
+ /*
+ * This following test is to see if available input would overflow the output.
+ * It does not catch output of more than one byte that
+ * overflows as a result of a multi-byte character or callback output
+ * from the last source character.
+ * Therefore, those situations also test for overflows and will
+ * then break the loop, too.
+ */
+ if(targetCapacity>0) {
+ /*
+ * Get a correct Unicode code point:
+ * a single UChar for a BMP code point or
+ * a matched surrogate pair for a "supplementary code point".
+ */
+ c=*source++;
+ ++nextSourceIndex;
+ if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) {
+ *target++=(uint8_t)c;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ sourceIndex=nextSourceIndex;
+ }
+ --targetCapacity;
+ c=0;
+ continue;
+ }
+ /*
+ * utf8Friendly table: Test for <=0xd7ff rather than <=MBCS_FAST_MAX
+ * to avoid dealing with surrogates.
+ * MBCS_FAST_MAX must be >=0xd7ff.
+ */
+ if(c<=0xd7ff) {
+ value=DBCS_RESULT_FROM_MOST_BMP(mbcsIndex, (const uint16_t *)bytes, c);
+ /* There are only roundtrips (!=0) and no-mapping (==0) entries. */
+ if(value==0) {
+ goto unassigned;
+ }
+ /* output the value */
+ } else {
+ /*
+ * This also tests if the codepage maps single surrogates.
+ * If it does, then surrogates are not paired but mapped separately.
+ * Note that in this case unmatched surrogates are not detected.
+ */
+ if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
+ if(U16_IS_SURROGATE_LEAD(c)) {
+getTrail:
+ if(source<sourceLimit) {
+ /* test the following code unit */
+ UChar trail=*source;
+ if(U16_IS_TRAIL(trail)) {
+ ++source;
+ ++nextSourceIndex;
+ c=U16_GET_SUPPLEMENTARY(c, trail);
+ if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
+ /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
+ /* callback(unassigned) */
+ goto unassigned;
+ }
+ /* convert this supplementary code point */
+ /* exit this condition tree */
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ } else {
+ /* no more input */
+ break;
+ }
+ } else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ }
+
+ /* convert the Unicode code point in c into codepage bytes */
+ stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
+
+ /* get the bytes and the length for the output */
+ /* MBCS_OUTPUT_2 */
+ value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
+
+ /* is this code point assigned, or do we use fallbacks? */
+ if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) ||
+ (UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0))
+ ) {
+ /*
+ * We allow a 0 byte output if the "assigned" bit is set for this entry.
+ * There is no way with this data structure for fallback output
+ * to be a zero byte.
+ */
+
+unassigned:
+ /* try an extension mapping */
+ pArgs->source=source;
+ c=_extFromU(cnv, cnv->sharedData,
+ c, &source, sourceLimit,
+ &target, target+targetCapacity,
+ &offsets, sourceIndex,
+ pArgs->flush,
+ pErrorCode);
+ nextSourceIndex+=(int32_t)(source-pArgs->source);
+
+ if(U_FAILURE(*pErrorCode)) {
+ /* not mappable or buffer overflow */
+ break;
+ } else {
+ /* a mapping was written to the target, continue */
+
+ /* recalculate the targetCapacity after an extension mapping */
+ targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
+
+ /* normal end of conversion: prepare for a new character */
+ sourceIndex=nextSourceIndex;
+ continue;
+ }
+ }
+ }
+
+ /* write the output character bytes from value and length */
+ /* from the first if in the loop we know that targetCapacity>0 */
+ if(value<=0xff) {
+ /* this is easy because we know that there is enough space */
+ *target++=(uint8_t)value;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ --targetCapacity;
+ } else /* length==2 */ {
+ *target++=(uint8_t)(value>>8);
+ if(2<=targetCapacity) {
+ *target++=(uint8_t)value;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex;
+ }
+ targetCapacity-=2;
+ } else {
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ cnv->charErrorBuffer[0]=(char)value;
+ cnv->charErrorBufferLength=1;
+
+ /* target overflow */
+ targetCapacity=0;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ c=0;
+ break;
+ }
+ }
+
+ /* normal end of conversion: prepare for a new character */
+ c=0;
+ sourceIndex=nextSourceIndex;
+ continue;
+ } else {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+
+ /* set the converter state back into UConverter */
+ cnv->fromUChar32=c;
+
+ /* write back the updated pointers */
+ pArgs->source=source;
+ pArgs->target=(char *)target;
+ pArgs->offsets=offsets;
+}
+
+/* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for single-byte codepages. */
+static void
+ucnv_MBCSSingleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ const UChar *source, *sourceLimit;
+ uint8_t *target;
+ int32_t targetCapacity;
+ int32_t *offsets;
+
+ const uint16_t *table;
+ const uint16_t *results;
+
+ UChar32 c;
+
+ int32_t sourceIndex, nextSourceIndex;
+
+ uint16_t value, minValue;
+ UBool hasSupplementary;
+
+ /* set up the local pointers */
+ cnv=pArgs->converter;
+ source=pArgs->source;
+ sourceLimit=pArgs->sourceLimit;
+ target=(uint8_t *)pArgs->target;
+ targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
+ offsets=pArgs->offsets;
+
+ table=cnv->sharedData->mbcs.fromUnicodeTable;
+ if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
+ results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
+ } else {
+ results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
+ }
+
+ if(cnv->useFallback) {
+ /* use all roundtrip and fallback results */
+ minValue=0x800;
+ } else {
+ /* use only roundtrips and fallbacks from private-use characters */
+ minValue=0xc00;
+ }
+ hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
+
+ /* get the converter state from UConverter */
+ c=cnv->fromUChar32;
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex= c==0 ? 0 : -1;
+ nextSourceIndex=0;
+
+ /* conversion loop */
+ if(c!=0 && targetCapacity>0) {
+ goto getTrail;
+ }
+
+ while(source<sourceLimit) {
+ /*
+ * This following test is to see if available input would overflow the output.
+ * It does not catch output of more than one byte that
+ * overflows as a result of a multi-byte character or callback output
+ * from the last source character.
+ * Therefore, those situations also test for overflows and will
+ * then break the loop, too.
+ */
+ if(targetCapacity>0) {
+ /*
+ * Get a correct Unicode code point:
+ * a single UChar for a BMP code point or
+ * a matched surrogate pair for a "supplementary code point".
+ */
+ c=*source++;
+ ++nextSourceIndex;
+ if(U16_IS_SURROGATE(c)) {
+ if(U16_IS_SURROGATE_LEAD(c)) {
+getTrail:
+ if(source<sourceLimit) {
+ /* test the following code unit */
+ UChar trail=*source;
+ if(U16_IS_TRAIL(trail)) {
+ ++source;
+ ++nextSourceIndex;
+ c=U16_GET_SUPPLEMENTARY(c, trail);
+ if(!hasSupplementary) {
+ /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
+ /* callback(unassigned) */
+ goto unassigned;
+ }
+ /* convert this supplementary code point */
+ /* exit this condition tree */
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ } else {
+ /* no more input */
+ break;
+ }
+ } else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ }
+
+ /* convert the Unicode code point in c into codepage bytes */
+ value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
+
+ /* is this code point assigned, or do we use fallbacks? */
+ if(value>=minValue) {
+ /* assigned, write the output character bytes from value and length */
+ /* length==1 */
+ /* this is easy because we know that there is enough space */
+ *target++=(uint8_t)value;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ --targetCapacity;
+
+ /* normal end of conversion: prepare for a new character */
+ c=0;
+ sourceIndex=nextSourceIndex;
+ } else { /* unassigned */
+unassigned:
+ /* try an extension mapping */
+ pArgs->source=source;
+ c=_extFromU(cnv, cnv->sharedData,
+ c, &source, sourceLimit,
+ &target, target+targetCapacity,
+ &offsets, sourceIndex,
+ pArgs->flush,
+ pErrorCode);
+ nextSourceIndex+=(int32_t)(source-pArgs->source);
+
+ if(U_FAILURE(*pErrorCode)) {
+ /* not mappable or buffer overflow */
+ break;
+ } else {
+ /* a mapping was written to the target, continue */
+
+ /* recalculate the targetCapacity after an extension mapping */
+ targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
+
+ /* normal end of conversion: prepare for a new character */
+ sourceIndex=nextSourceIndex;
+ }
+ }
+ } else {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+
+ /* set the converter state back into UConverter */
+ cnv->fromUChar32=c;
+
+ /* write back the updated pointers */
+ pArgs->source=source;
+ pArgs->target=(char *)target;
+ pArgs->offsets=offsets;
+}
+
+/*
+ * This version of ucnv_MBCSFromUnicode() is optimized for single-byte codepages
+ * that map only to and from the BMP.
+ * In addition to single-byte/state optimizations, the offset calculations
+ * become much easier.
+ * It would be possible to use the sbcsIndex for UTF-8-friendly tables,
+ * but measurements have shown that this diminishes performance
+ * in more cases than it improves it.
+ * See SVN revision 21013 (2007-feb-06) for the last version with #if switches
+ * for various MBCS and SBCS optimizations.
+ */
+static void
+ucnv_MBCSSingleFromBMPWithOffsets(UConverterFromUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ const UChar *source, *sourceLimit, *lastSource;
+ uint8_t *target;
+ int32_t targetCapacity, length;
+ int32_t *offsets;
+
+ const uint16_t *table;
+ const uint16_t *results;
+
+ UChar32 c;
+
+ int32_t sourceIndex;
+
+ uint32_t asciiRoundtrips;
+ uint16_t value, minValue;
+
+ /* set up the local pointers */
+ cnv=pArgs->converter;
+ source=pArgs->source;
+ sourceLimit=pArgs->sourceLimit;
+ target=(uint8_t *)pArgs->target;
+ targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
+ offsets=pArgs->offsets;
+
+ table=cnv->sharedData->mbcs.fromUnicodeTable;
+ if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
+ results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
+ } else {
+ results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
+ }
+ asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
+
+ if(cnv->useFallback) {
+ /* use all roundtrip and fallback results */
+ minValue=0x800;
+ } else {
+ /* use only roundtrips and fallbacks from private-use characters */
+ minValue=0xc00;
+ }
+
+ /* get the converter state from UConverter */
+ c=cnv->fromUChar32;
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex= c==0 ? 0 : -1;
+ lastSource=source;
+
+ /*
+ * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
+ * for the minimum of the sourceLength and targetCapacity
+ */
+ length=(int32_t)(sourceLimit-source);
+ if(length<targetCapacity) {
+ targetCapacity=length;
+ }
+
+ /* conversion loop */
+ if(c!=0 && targetCapacity>0) {
+ goto getTrail;
+ }
+
+#if MBCS_UNROLL_SINGLE_FROM_BMP
+ /* unrolling makes it slower on Pentium III/Windows 2000?! */
+ /* unroll the loop with the most common case */
+unrolled:
+ if(targetCapacity>=4) {
+ int32_t count, loops;
+ uint16_t andedValues;
+
+ loops=count=targetCapacity>>2;
+ do {
+ c=*source++;
+ andedValues=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
+ *target++=(uint8_t)value;
+ c=*source++;
+ andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
+ *target++=(uint8_t)value;
+ c=*source++;
+ andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
+ *target++=(uint8_t)value;
+ c=*source++;
+ andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
+ *target++=(uint8_t)value;
+
+ /* were all 4 entries really valid? */
+ if(andedValues<minValue) {
+ /* no, return to the first of these 4 */
+ source-=4;
+ target-=4;
+ break;
+ }
+ } while(--count>0);
+ count=loops-count;
+ targetCapacity-=4*count;
+
+ if(offsets!=NULL) {
+ lastSource+=4*count;
+ while(count>0) {
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ *offsets++=sourceIndex++;
+ --count;
+ }
+ }
+
+ c=0;
+ }
+#endif
+
+ while(targetCapacity>0) {
+ /*
+ * Get a correct Unicode code point:
+ * a single UChar for a BMP code point or
+ * a matched surrogate pair for a "supplementary code point".
+ */
+ c=*source++;
+ /*
+ * Do not immediately check for single surrogates:
+ * Assume that they are unassigned and check for them in that case.
+ * This speeds up the conversion of assigned characters.
+ */
+ /* convert the Unicode code point in c into codepage bytes */
+ if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) {
+ *target++=(uint8_t)c;
+ --targetCapacity;
+ c=0;
+ continue;
+ }
+ value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
+ /* is this code point assigned, or do we use fallbacks? */
+ if(value>=minValue) {
+ /* assigned, write the output character bytes from value and length */
+ /* length==1 */
+ /* this is easy because we know that there is enough space */
+ *target++=(uint8_t)value;
+ --targetCapacity;
+
+ /* normal end of conversion: prepare for a new character */
+ c=0;
+ continue;
+ } else if(!U16_IS_SURROGATE(c)) {
+ /* normal, unassigned BMP character */
+ } else if(U16_IS_SURROGATE_LEAD(c)) {
+getTrail:
+ if(source<sourceLimit) {
+ /* test the following code unit */
+ UChar trail=*source;
+ if(U16_IS_TRAIL(trail)) {
+ ++source;
+ c=U16_GET_SUPPLEMENTARY(c, trail);
+ /* this codepage does not map supplementary code points */
+ /* callback(unassigned) */
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ } else {
+ /* no more input */
+ if (pArgs->flush) {
+ *pErrorCode=U_TRUNCATED_CHAR_FOUND;
+ }
+ break;
+ }
+ } else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+
+ /* c does not have a mapping */
+
+ /* get the number of code units for c to correctly advance sourceIndex */
+ length=U16_LENGTH(c);
+
+ /* set offsets since the start or the last extension */
+ if(offsets!=NULL) {
+ int32_t count=(int32_t)(source-lastSource);
+
+ /* do not set the offset for this character */
+ count-=length;
+
+ while(count>0) {
+ *offsets++=sourceIndex++;
+ --count;
+ }
+ /* offsets and sourceIndex are now set for the current character */
+ }
+
+ /* try an extension mapping */
+ lastSource=source;
+ c=_extFromU(cnv, cnv->sharedData,
+ c, &source, sourceLimit,
+ &target, (const uint8_t *)(pArgs->targetLimit),
+ &offsets, sourceIndex,
+ pArgs->flush,
+ pErrorCode);
+ sourceIndex+=length+(int32_t)(source-lastSource);
+ lastSource=source;
+
+ if(U_FAILURE(*pErrorCode)) {
+ /* not mappable or buffer overflow */
+ break;
+ } else {
+ /* a mapping was written to the target, continue */
+
+ /* recalculate the targetCapacity after an extension mapping */
+ targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
+ length=(int32_t)(sourceLimit-source);
+ if(length<targetCapacity) {
+ targetCapacity=length;
+ }
+ }
+
+#if MBCS_UNROLL_SINGLE_FROM_BMP
+ /* unrolling makes it slower on Pentium III/Windows 2000?! */
+ goto unrolled;
+#endif
+ }
+
+ if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+
+ /* set offsets since the start or the last callback */
+ if(offsets!=NULL) {
+ size_t count=source-lastSource;
+ if (count > 0 && *pErrorCode == U_TRUNCATED_CHAR_FOUND) {
+ /*
+ Caller gave us a partial supplementary character,
+ which this function couldn't convert in any case.
+ The callback will handle the offset.
+ */
+ count--;
+ }
+ while(count>0) {
+ *offsets++=sourceIndex++;
+ --count;
+ }
+ }
+
+ /* set the converter state back into UConverter */
+ cnv->fromUChar32=c;
+
+ /* write back the updated pointers */
+ pArgs->source=source;
+ pArgs->target=(char *)target;
+ pArgs->offsets=offsets;
+}
+
+U_CFUNC void
+ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ const UChar *source, *sourceLimit;
+ uint8_t *target;
+ int32_t targetCapacity;
+ int32_t *offsets;
+
+ const uint16_t *table;
+ const uint16_t *mbcsIndex;
+ const uint8_t *p, *bytes;
+ uint8_t outputType;
+
+ UChar32 c;
+
+ int32_t prevSourceIndex, sourceIndex, nextSourceIndex;
+
+ uint32_t stage2Entry;
+ uint32_t asciiRoundtrips;
+ uint32_t value;
+ /* Shift-In and Shift-Out byte sequences differ by encoding scheme. */
+ uint8_t siBytes[2] = {0, 0};
+ uint8_t soBytes[2] = {0, 0};
+ uint8_t siLength, soLength;
+ int32_t length = 0, prevLength;
+ uint8_t unicodeMask;
+
+ cnv=pArgs->converter;
+
+ if(cnv->preFromUFirstCP>=0) {
+ /*
+ * pass sourceIndex=-1 because we continue from an earlier buffer
+ * in the future, this may change with continuous offsets
+ */
+ ucnv_extContinueMatchFromU(cnv, pArgs, -1, pErrorCode);
+
+ if(U_FAILURE(*pErrorCode) || cnv->preFromULength<0) {
+ return;
+ }
+ }
+
+ /* use optimized function if possible */
+ outputType=cnv->sharedData->mbcs.outputType;
+ unicodeMask=cnv->sharedData->mbcs.unicodeMask;
+ if(outputType==MBCS_OUTPUT_1 && !(unicodeMask&UCNV_HAS_SURROGATES)) {
+ if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
+ ucnv_MBCSSingleFromBMPWithOffsets(pArgs, pErrorCode);
+ } else {
+ ucnv_MBCSSingleFromUnicodeWithOffsets(pArgs, pErrorCode);
+ }
+ return;
+ } else if(outputType==MBCS_OUTPUT_2 && cnv->sharedData->mbcs.utf8Friendly) {
+ ucnv_MBCSDoubleFromUnicodeWithOffsets(pArgs, pErrorCode);
+ return;
+ }
+
+ /* set up the local pointers */
+ source=pArgs->source;
+ sourceLimit=pArgs->sourceLimit;
+ target=(uint8_t *)pArgs->target;
+ targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
+ offsets=pArgs->offsets;
+
+ table=cnv->sharedData->mbcs.fromUnicodeTable;
+ if(cnv->sharedData->mbcs.utf8Friendly) {
+ mbcsIndex=cnv->sharedData->mbcs.mbcsIndex;
+ } else {
+ mbcsIndex=NULL;
+ }
+ if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
+ bytes=cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
+ } else {
+ bytes=cnv->sharedData->mbcs.fromUnicodeBytes;
+ }
+ asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
+
+ /* get the converter state from UConverter */
+ c=cnv->fromUChar32;
+
+ if(outputType==MBCS_OUTPUT_2_SISO) {
+ prevLength=cnv->fromUnicodeStatus;
+ if(prevLength==0) {
+ /* set the real value */
+ prevLength=1;
+ }
+ } else {
+ /* prevent fromUnicodeStatus from being set to something non-0 */
+ prevLength=0;
+ }
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ prevSourceIndex=-1;
+ sourceIndex= c==0 ? 0 : -1;
+ nextSourceIndex=0;
+
+ /* Get the SI/SO character for the converter */
+ siLength = static_cast<uint8_t>(getSISOBytes(SI, cnv->options, siBytes));
+ soLength = static_cast<uint8_t>(getSISOBytes(SO, cnv->options, soBytes));
+
+ /* conversion loop */
+ /*
+ * This is another piece of ugly code:
+ * A goto into the loop if the converter state contains a first surrogate
+ * from the previous function call.
+ * It saves me to check in each loop iteration a check of if(c==0)
+ * and duplicating the trail-surrogate-handling code in the else
+ * branch of that check.
+ * I could not find any other way to get around this other than
+ * using a function call for the conversion and callback, which would
+ * be even more inefficient.
+ *
+ * Markus Scherer 2000-jul-19
+ */
+ if(c!=0 && targetCapacity>0) {
+ goto getTrail;
+ }
+
+ while(source<sourceLimit) {
+ /*
+ * This following test is to see if available input would overflow the output.
+ * It does not catch output of more than one byte that
+ * overflows as a result of a multi-byte character or callback output
+ * from the last source character.
+ * Therefore, those situations also test for overflows and will
+ * then break the loop, too.
+ */
+ if(targetCapacity>0) {
+ /*
+ * Get a correct Unicode code point:
+ * a single UChar for a BMP code point or
+ * a matched surrogate pair for a "supplementary code point".
+ */
+ c=*source++;
+ ++nextSourceIndex;
+ if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) {
+ *target++=(uint8_t)c;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ prevSourceIndex=sourceIndex;
+ sourceIndex=nextSourceIndex;
+ }
+ --targetCapacity;
+ c=0;
+ continue;
+ }
+ /*
+ * utf8Friendly table: Test for <=0xd7ff rather than <=MBCS_FAST_MAX
+ * to avoid dealing with surrogates.
+ * MBCS_FAST_MAX must be >=0xd7ff.
+ */
+ if(c<=0xd7ff && mbcsIndex!=NULL) {
+ value=mbcsIndex[c>>6];
+
+ /* get the bytes and the length for the output (copied from below and adapted for utf8Friendly data) */
+ /* There are only roundtrips (!=0) and no-mapping (==0) entries. */
+ switch(outputType) {
+ case MBCS_OUTPUT_2:
+ value=((const uint16_t *)bytes)[value +(c&0x3f)];
+ if(value<=0xff) {
+ if(value==0) {
+ goto unassigned;
+ } else {
+ length=1;
+ }
+ } else {
+ length=2;
+ }
+ break;
+ case MBCS_OUTPUT_2_SISO:
+ /* 1/2-byte stateful with Shift-In/Shift-Out */
+ /*
+ * Save the old state in the converter object
+ * right here, then change the local prevLength state variable if necessary.
+ * Then, if this character turns out to be unassigned or a fallback that
+ * is not taken, the callback code must not save the new state in the converter
+ * because the new state is for a character that is not output.
+ * However, the callback must still restore the state from the converter
+ * in case the callback function changed it for its output.
+ */
+ cnv->fromUnicodeStatus=prevLength; /* save the old state */
+ value=((const uint16_t *)bytes)[value +(c&0x3f)];
+ if(value<=0xff) {
+ if(value==0) {
+ goto unassigned;
+ } else if(prevLength<=1) {
+ length=1;
+ } else {
+ /* change from double-byte mode to single-byte */
+ if (siLength == 1) {
+ value|=(uint32_t)siBytes[0]<<8;
+ length = 2;
+ } else if (siLength == 2) {
+ value|=(uint32_t)siBytes[1]<<8;
+ value|=(uint32_t)siBytes[0]<<16;
+ length = 3;
+ }
+ prevLength=1;
+ }
+ } else {
+ if(prevLength==2) {
+ length=2;
+ } else {
+ /* change from single-byte mode to double-byte */
+ if (soLength == 1) {
+ value|=(uint32_t)soBytes[0]<<16;
+ length = 3;
+ } else if (soLength == 2) {
+ value|=(uint32_t)soBytes[1]<<16;
+ value|=(uint32_t)soBytes[0]<<24;
+ length = 4;
+ }
+ prevLength=2;
+ }
+ }
+ break;
+ case MBCS_OUTPUT_DBCS_ONLY:
+ /* table with single-byte results, but only DBCS mappings used */
+ value=((const uint16_t *)bytes)[value +(c&0x3f)];
+ if(value<=0xff) {
+ /* no mapping or SBCS result, not taken for DBCS-only */
+ goto unassigned;
+ } else {
+ length=2;
+ }
+ break;
+ case MBCS_OUTPUT_3:
+ p=bytes+(value+(c&0x3f))*3;
+ value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
+ if(value<=0xff) {
+ if(value==0) {
+ goto unassigned;
+ } else {
+ length=1;
+ }
+ } else if(value<=0xffff) {
+ length=2;
+ } else {
+ length=3;
+ }
+ break;
+ case MBCS_OUTPUT_4:
+ value=((const uint32_t *)bytes)[value +(c&0x3f)];
+ if(value<=0xff) {
+ if(value==0) {
+ goto unassigned;
+ } else {
+ length=1;
+ }
+ } else if(value<=0xffff) {
+ length=2;
+ } else if(value<=0xffffff) {
+ length=3;
+ } else {
+ length=4;
+ }
+ break;
+ case MBCS_OUTPUT_3_EUC:
+ value=((const uint16_t *)bytes)[value +(c&0x3f)];
+ /* EUC 16-bit fixed-length representation */
+ if(value<=0xff) {
+ if(value==0) {
+ goto unassigned;
+ } else {
+ length=1;
+ }
+ } else if((value&0x8000)==0) {
+ value|=0x8e8000;
+ length=3;
+ } else if((value&0x80)==0) {
+ value|=0x8f0080;
+ length=3;
+ } else {
+ length=2;
+ }
+ break;
+ case MBCS_OUTPUT_4_EUC:
+ p=bytes+(value+(c&0x3f))*3;
+ value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
+ /* EUC 16-bit fixed-length representation applied to the first two bytes */
+ if(value<=0xff) {
+ if(value==0) {
+ goto unassigned;
+ } else {
+ length=1;
+ }
+ } else if(value<=0xffff) {
+ length=2;
+ } else if((value&0x800000)==0) {
+ value|=0x8e800000;
+ length=4;
+ } else if((value&0x8000)==0) {
+ value|=0x8f008000;
+ length=4;
+ } else {
+ length=3;
+ }
+ break;
+ default:
+ /* must not occur */
+ /*
+ * To avoid compiler warnings that value & length may be
+ * used without having been initialized, we set them here.
+ * In reality, this is unreachable code.
+ * Not having a default branch also causes warnings with
+ * some compilers.
+ */
+ value=0;
+ length=0;
+ break;
+ }
+ /* output the value */
+ } else {
+ /*
+ * This also tests if the codepage maps single surrogates.
+ * If it does, then surrogates are not paired but mapped separately.
+ * Note that in this case unmatched surrogates are not detected.
+ */
+ if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
+ if(U16_IS_SURROGATE_LEAD(c)) {
+getTrail:
+ if(source<sourceLimit) {
+ /* test the following code unit */
+ UChar trail=*source;
+ if(U16_IS_TRAIL(trail)) {
+ ++source;
+ ++nextSourceIndex;
+ c=U16_GET_SUPPLEMENTARY(c, trail);
+ if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
+ /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
+ cnv->fromUnicodeStatus=prevLength; /* save the old state */
+ /* callback(unassigned) */
+ goto unassigned;
+ }
+ /* convert this supplementary code point */
+ /* exit this condition tree */
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ } else {
+ /* no more input */
+ break;
+ }
+ } else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ }
+
+ /* convert the Unicode code point in c into codepage bytes */
+
+ /*
+ * The basic lookup is a triple-stage compact array (trie) lookup.
+ * For details see the beginning of this file.
+ *
+ * Single-byte codepages are handled with a different data structure
+ * by _MBCSSingle... functions.
+ *
+ * The result consists of a 32-bit value from stage 2 and
+ * a pointer to as many bytes as are stored per character.
+ * The pointer points to the character's bytes in stage 3.
+ * Bits 15..0 of the stage 2 entry contain the stage 3 index
+ * for that pointer, while bits 31..16 are flags for which of
+ * the 16 characters in the block are roundtrip-assigned.
+ *
+ * For 2-byte and 4-byte codepages, the bytes are stored as uint16_t
+ * respectively as uint32_t, in the platform encoding.
+ * For 3-byte codepages, the bytes are always stored in big-endian order.
+ *
+ * For EUC encodings that use only either 0x8e or 0x8f as the first
+ * byte of their longest byte sequences, the first two bytes in
+ * this third stage indicate with their 7th bits whether these bytes
+ * are to be written directly or actually need to be preceeded by
+ * one of the two Single-Shift codes. With this, the third stage
+ * stores one byte fewer per character than the actual maximum length of
+ * EUC byte sequences.
+ *
+ * Other than that, leading zero bytes are removed and the other
+ * bytes output. A single zero byte may be output if the "assigned"
+ * bit in stage 2 was on.
+ * The data structure does not support zero byte output as a fallback,
+ * and also does not allow output of leading zeros.
+ */
+ stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
+
+ /* get the bytes and the length for the output */
+ switch(outputType) {
+ case MBCS_OUTPUT_2:
+ value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
+ if(value<=0xff) {
+ length=1;
+ } else {
+ length=2;
+ }
+ break;
+ case MBCS_OUTPUT_2_SISO:
+ /* 1/2-byte stateful with Shift-In/Shift-Out */
+ /*
+ * Save the old state in the converter object
+ * right here, then change the local prevLength state variable if necessary.
+ * Then, if this character turns out to be unassigned or a fallback that
+ * is not taken, the callback code must not save the new state in the converter
+ * because the new state is for a character that is not output.
+ * However, the callback must still restore the state from the converter
+ * in case the callback function changed it for its output.
+ */
+ cnv->fromUnicodeStatus=prevLength; /* save the old state */
+ value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
+ if(value<=0xff) {
+ if(value==0 && MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)==0) {
+ /* no mapping, leave value==0 */
+ length=0;
+ } else if(prevLength<=1) {
+ length=1;
+ } else {
+ /* change from double-byte mode to single-byte */
+ if (siLength == 1) {
+ value|=(uint32_t)siBytes[0]<<8;
+ length = 2;
+ } else if (siLength == 2) {
+ value|=(uint32_t)siBytes[1]<<8;
+ value|=(uint32_t)siBytes[0]<<16;
+ length = 3;
+ }
+ prevLength=1;
+ }
+ } else {
+ if(prevLength==2) {
+ length=2;
+ } else {
+ /* change from single-byte mode to double-byte */
+ if (soLength == 1) {
+ value|=(uint32_t)soBytes[0]<<16;
+ length = 3;
+ } else if (soLength == 2) {
+ value|=(uint32_t)soBytes[1]<<16;
+ value|=(uint32_t)soBytes[0]<<24;
+ length = 4;
+ }
+ prevLength=2;
+ }
+ }
+ break;
+ case MBCS_OUTPUT_DBCS_ONLY:
+ /* table with single-byte results, but only DBCS mappings used */
+ value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
+ if(value<=0xff) {
+ /* no mapping or SBCS result, not taken for DBCS-only */
+ value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
+ length=0;
+ } else {
+ length=2;
+ }
+ break;
+ case MBCS_OUTPUT_3:
+ p=MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
+ value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
+ if(value<=0xff) {
+ length=1;
+ } else if(value<=0xffff) {
+ length=2;
+ } else {
+ length=3;
+ }
+ break;
+ case MBCS_OUTPUT_4:
+ value=MBCS_VALUE_4_FROM_STAGE_2(bytes, stage2Entry, c);
+ if(value<=0xff) {
+ length=1;
+ } else if(value<=0xffff) {
+ length=2;
+ } else if(value<=0xffffff) {
+ length=3;
+ } else {
+ length=4;
+ }
+ break;
+ case MBCS_OUTPUT_3_EUC:
+ value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
+ /* EUC 16-bit fixed-length representation */
+ if(value<=0xff) {
+ length=1;
+ } else if((value&0x8000)==0) {
+ value|=0x8e8000;
+ length=3;
+ } else if((value&0x80)==0) {
+ value|=0x8f0080;
+ length=3;
+ } else {
+ length=2;
+ }
+ break;
+ case MBCS_OUTPUT_4_EUC:
+ p=MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
+ value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
+ /* EUC 16-bit fixed-length representation applied to the first two bytes */
+ if(value<=0xff) {
+ length=1;
+ } else if(value<=0xffff) {
+ length=2;
+ } else if((value&0x800000)==0) {
+ value|=0x8e800000;
+ length=4;
+ } else if((value&0x8000)==0) {
+ value|=0x8f008000;
+ length=4;
+ } else {
+ length=3;
+ }
+ break;
+ default:
+ /* must not occur */
+ /*
+ * To avoid compiler warnings that value & length may be
+ * used without having been initialized, we set them here.
+ * In reality, this is unreachable code.
+ * Not having a default branch also causes warnings with
+ * some compilers.
+ */
+ value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
+ length=0;
+ break;
+ }
+
+ /* is this code point assigned, or do we use fallbacks? */
+ if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)!=0 ||
+ (UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0))
+ ) {
+ /*
+ * We allow a 0 byte output if the "assigned" bit is set for this entry.
+ * There is no way with this data structure for fallback output
+ * to be a zero byte.
+ */
+
+unassigned:
+ /* try an extension mapping */
+ pArgs->source=source;
+ c=_extFromU(cnv, cnv->sharedData,
+ c, &source, sourceLimit,
+ &target, target+targetCapacity,
+ &offsets, sourceIndex,
+ pArgs->flush,
+ pErrorCode);
+ nextSourceIndex+=(int32_t)(source-pArgs->source);
+ prevLength=cnv->fromUnicodeStatus; /* restore SISO state */
+
+ if(U_FAILURE(*pErrorCode)) {
+ /* not mappable or buffer overflow */
+ break;
+ } else {
+ /* a mapping was written to the target, continue */
+
+ /* recalculate the targetCapacity after an extension mapping */
+ targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
+
+ /* normal end of conversion: prepare for a new character */
+ if(offsets!=NULL) {
+ prevSourceIndex=sourceIndex;
+ sourceIndex=nextSourceIndex;
+ }
+ continue;
+ }
+ }
+ }
+
+ /* write the output character bytes from value and length */
+ /* from the first if in the loop we know that targetCapacity>0 */
+ if(length<=targetCapacity) {
+ if(offsets==NULL) {
+ switch(length) {
+ /* each branch falls through to the next one */
+ case 4:
+ *target++=(uint8_t)(value>>24);
+ U_FALLTHROUGH;
+ case 3:
+ *target++=(uint8_t)(value>>16);
+ U_FALLTHROUGH;
+ case 2:
+ *target++=(uint8_t)(value>>8);
+ U_FALLTHROUGH;
+ case 1:
+ *target++=(uint8_t)value;
+ U_FALLTHROUGH;
+ default:
+ /* will never occur */
+ break;
+ }
+ } else {
+ switch(length) {
+ /* each branch falls through to the next one */
+ case 4:
+ *target++=(uint8_t)(value>>24);
+ *offsets++=sourceIndex;
+ U_FALLTHROUGH;
+ case 3:
+ *target++=(uint8_t)(value>>16);
+ *offsets++=sourceIndex;
+ U_FALLTHROUGH;
+ case 2:
+ *target++=(uint8_t)(value>>8);
+ *offsets++=sourceIndex;
+ U_FALLTHROUGH;
+ case 1:
+ *target++=(uint8_t)value;
+ *offsets++=sourceIndex;
+ U_FALLTHROUGH;
+ default:
+ /* will never occur */
+ break;
+ }
+ }
+ targetCapacity-=length;
+ } else {
+ uint8_t *charErrorBuffer;
+
+ /*
+ * We actually do this backwards here:
+ * In order to save an intermediate variable, we output
+ * first to the overflow buffer what does not fit into the
+ * regular target.
+ */
+ /* we know that 1<=targetCapacity<length<=4 */
+ length-=targetCapacity;
+ charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
+ switch(length) {
+ /* each branch falls through to the next one */
+ case 3:
+ *charErrorBuffer++=(uint8_t)(value>>16);
+ U_FALLTHROUGH;
+ case 2:
+ *charErrorBuffer++=(uint8_t)(value>>8);
+ U_FALLTHROUGH;
+ case 1:
+ *charErrorBuffer=(uint8_t)value;
+ U_FALLTHROUGH;
+ default:
+ /* will never occur */
+ break;
+ }
+ cnv->charErrorBufferLength=(int8_t)length;
+
+ /* now output what fits into the regular target */
+ value>>=8*length; /* length was reduced by targetCapacity */
+ switch(targetCapacity) {
+ /* each branch falls through to the next one */
+ case 3:
+ *target++=(uint8_t)(value>>16);
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ U_FALLTHROUGH;
+ case 2:
+ *target++=(uint8_t)(value>>8);
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ U_FALLTHROUGH;
+ case 1:
+ *target++=(uint8_t)value;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ U_FALLTHROUGH;
+ default:
+ /* will never occur */
+ break;
+ }
+
+ /* target overflow */
+ targetCapacity=0;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ c=0;
+ break;
+ }
+
+ /* normal end of conversion: prepare for a new character */
+ c=0;
+ if(offsets!=NULL) {
+ prevSourceIndex=sourceIndex;
+ sourceIndex=nextSourceIndex;
+ }
+ continue;
+ } else {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+
+ /*
+ * the end of the input stream and detection of truncated input
+ * are handled by the framework, but for EBCDIC_STATEFUL conversion
+ * we need to emit an SI at the very end
+ *
+ * conditions:
+ * successful
+ * EBCDIC_STATEFUL in DBCS mode
+ * end of input and no truncated input
+ */
+ if( U_SUCCESS(*pErrorCode) &&
+ outputType==MBCS_OUTPUT_2_SISO && prevLength==2 &&
+ pArgs->flush && source>=sourceLimit && c==0
+ ) {
+ /* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */
+ if(targetCapacity>0) {
+ *target++=(uint8_t)siBytes[0];
+ if (siLength == 2) {
+ if (targetCapacity<2) {
+ cnv->charErrorBuffer[0]=(uint8_t)siBytes[1];
+ cnv->charErrorBufferLength=1;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ } else {
+ *target++=(uint8_t)siBytes[1];
+ }
+ }
+ if(offsets!=NULL) {
+ /* set the last source character's index (sourceIndex points at sourceLimit now) */
+ *offsets++=prevSourceIndex;
+ }
+ } else {
+ /* target is full */
+ cnv->charErrorBuffer[0]=(uint8_t)siBytes[0];
+ if (siLength == 2) {
+ cnv->charErrorBuffer[1]=(uint8_t)siBytes[1];
+ }
+ cnv->charErrorBufferLength=siLength;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+ prevLength=1; /* we switched into SBCS */
+ }
+
+ /* set the converter state back into UConverter */
+ cnv->fromUChar32=c;
+ cnv->fromUnicodeStatus=prevLength;
+
+ /* write back the updated pointers */
+ pArgs->source=source;
+ pArgs->target=(char *)target;
+ pArgs->offsets=offsets;
+}
+
+/*
+ * This is another simple conversion function for internal use by other
+ * conversion implementations.
+ * It does not use the converter state nor call callbacks.
+ * It does not handle the EBCDIC swaplfnl option (set in UConverter).
+ * It handles conversion extensions but not GB 18030.
+ *
+ * It converts one single Unicode code point into codepage bytes, encoded
+ * as one 32-bit value. The function returns the number of bytes in *pValue:
+ * 1..4 the number of bytes in *pValue
+ * 0 unassigned (*pValue undefined)
+ * -1 illegal (currently not used, *pValue undefined)
+ *
+ * *pValue will contain the resulting bytes with the last byte in bits 7..0,
+ * the second to last byte in bits 15..8, etc.
+ * Currently, the function assumes but does not check that 0<=c<=0x10ffff.
+ */
+U_CFUNC int32_t
+ucnv_MBCSFromUChar32(UConverterSharedData *sharedData,
+ UChar32 c, uint32_t *pValue,
+ UBool useFallback) {
+ const int32_t *cx;
+ const uint16_t *table;
+#if 0
+/* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */
+ const uint8_t *p;
+#endif
+ uint32_t stage2Entry;
+ uint32_t value;
+ int32_t length;
+
+ /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
+ if(c<=0xffff || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
+ table=sharedData->mbcs.fromUnicodeTable;
+
+ /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
+ if(sharedData->mbcs.outputType==MBCS_OUTPUT_1) {
+ value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
+ /* is this code point assigned, or do we use fallbacks? */
+ if(useFallback ? value>=0x800 : value>=0xc00) {
+ *pValue=value&0xff;
+ return 1;
+ }
+ } else /* outputType!=MBCS_OUTPUT_1 */ {
+ stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
+
+ /* get the bytes and the length for the output */
+ switch(sharedData->mbcs.outputType) {
+ case MBCS_OUTPUT_2:
+ value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
+ if(value<=0xff) {
+ length=1;
+ } else {
+ length=2;
+ }
+ break;
+#if 0
+/* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */
+ case MBCS_OUTPUT_DBCS_ONLY:
+ /* table with single-byte results, but only DBCS mappings used */
+ value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
+ if(value<=0xff) {
+ /* no mapping or SBCS result, not taken for DBCS-only */
+ value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
+ length=0;
+ } else {
+ length=2;
+ }
+ break;
+ case MBCS_OUTPUT_3:
+ p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
+ value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
+ if(value<=0xff) {
+ length=1;
+ } else if(value<=0xffff) {
+ length=2;
+ } else {
+ length=3;
+ }
+ break;
+ case MBCS_OUTPUT_4:
+ value=MBCS_VALUE_4_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
+ if(value<=0xff) {
+ length=1;
+ } else if(value<=0xffff) {
+ length=2;
+ } else if(value<=0xffffff) {
+ length=3;
+ } else {
+ length=4;
+ }
+ break;
+ case MBCS_OUTPUT_3_EUC:
+ value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
+ /* EUC 16-bit fixed-length representation */
+ if(value<=0xff) {
+ length=1;
+ } else if((value&0x8000)==0) {
+ value|=0x8e8000;
+ length=3;
+ } else if((value&0x80)==0) {
+ value|=0x8f0080;
+ length=3;
+ } else {
+ length=2;
+ }
+ break;
+ case MBCS_OUTPUT_4_EUC:
+ p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
+ value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
+ /* EUC 16-bit fixed-length representation applied to the first two bytes */
+ if(value<=0xff) {
+ length=1;
+ } else if(value<=0xffff) {
+ length=2;
+ } else if((value&0x800000)==0) {
+ value|=0x8e800000;
+ length=4;
+ } else if((value&0x8000)==0) {
+ value|=0x8f008000;
+ length=4;
+ } else {
+ length=3;
+ }
+ break;
+#endif
+ default:
+ /* must not occur */
+ return -1;
+ }
+
+ /* is this code point assigned, or do we use fallbacks? */
+ if( MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) ||
+ (FROM_U_USE_FALLBACK(useFallback, c) && value!=0)
+ ) {
+ /*
+ * We allow a 0 byte output if the "assigned" bit is set for this entry.
+ * There is no way with this data structure for fallback output
+ * to be a zero byte.
+ */
+ /* assigned */
+ *pValue=value;
+ return length;
+ }
+ }
+ }
+
+ cx=sharedData->mbcs.extIndexes;
+ if(cx!=NULL) {
+ length=ucnv_extSimpleMatchFromU(cx, c, pValue, useFallback);
+ return length>=0 ? length : -length; /* return abs(length); */
+ }
+
+ /* unassigned */
+ return 0;
+}
+
+
+#if 0
+/*
+ * This function has been moved to ucnv2022.c for inlining.
+ * This implementation is here only for documentation purposes
+ */
+
+/**
+ * This version of ucnv_MBCSFromUChar32() is optimized for single-byte codepages.
+ * It does not handle the EBCDIC swaplfnl option (set in UConverter).
+ * It does not handle conversion extensions (_extFromU()).
+ *
+ * It returns the codepage byte for the code point, or -1 if it is unassigned.
+ */
+U_CFUNC int32_t
+ucnv_MBCSSingleFromUChar32(UConverterSharedData *sharedData,
+ UChar32 c,
+ UBool useFallback) {
+ const uint16_t *table;
+ int32_t value;
+
+ /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
+ if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
+ return -1;
+ }
+
+ /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
+ table=sharedData->mbcs.fromUnicodeTable;
+
+ /* get the byte for the output */
+ value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
+ /* is this code point assigned, or do we use fallbacks? */
+ if(useFallback ? value>=0x800 : value>=0xc00) {
+ return value&0xff;
+ } else {
+ return -1;
+ }
+}
+#endif
+
+/* MBCS-from-UTF-8 conversion functions ------------------------------------- */
+
+/* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */
+static const UChar32
+utf8_offsets[5]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 };
+
+static void U_CALLCONV
+ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
+ UConverterToUnicodeArgs *pToUArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *utf8, *cnv;
+ const uint8_t *source, *sourceLimit;
+ uint8_t *target;
+ int32_t targetCapacity;
+
+ const uint16_t *table, *sbcsIndex;
+ const uint16_t *results;
+
+ int8_t oldToULength, toULength, toULimit;
+
+ UChar32 c;
+ uint8_t b, t1, t2;
+
+ uint32_t asciiRoundtrips;
+ uint16_t value, minValue = 0;
+ UBool hasSupplementary;
+
+ /* set up the local pointers */
+ utf8=pToUArgs->converter;
+ cnv=pFromUArgs->converter;
+ source=(uint8_t *)pToUArgs->source;
+ sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
+ target=(uint8_t *)pFromUArgs->target;
+ targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
+
+ table=cnv->sharedData->mbcs.fromUnicodeTable;
+ sbcsIndex=cnv->sharedData->mbcs.sbcsIndex;
+ if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
+ results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
+ } else {
+ results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
+ }
+ asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
+
+ if(cnv->useFallback) {
+ /* use all roundtrip and fallback results */
+ minValue=0x800;
+ } else {
+ /* use only roundtrips and fallbacks from private-use characters */
+ minValue=0xc00;
+ }
+ hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
+
+ /* get the converter state from the UTF-8 UConverter */
+ if(utf8->toULength > 0) {
+ toULength=oldToULength=utf8->toULength;
+ toULimit=(int8_t)utf8->mode;
+ c=(UChar32)utf8->toUnicodeStatus;
+ } else {
+ toULength=oldToULength=toULimit=0;
+ c = 0;
+ }
+
+ // The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.
+ // If the buffer ends with a truncated 2- or 3-byte sequence,
+ // then we reduce the sourceLimit to before that,
+ // and collect the remaining bytes after the conversion loop.
+ {
+ // Do not go back into the bytes that will be read for finishing a partial
+ // sequence from the previous buffer.
+ int32_t length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength);
+ if(length>0) {
+ uint8_t b1=*(sourceLimit-1);
+ if(U8_IS_SINGLE(b1)) {
+ // common ASCII character
+ } else if(U8_IS_TRAIL(b1) && length>=2) {
+ uint8_t b2=*(sourceLimit-2);
+ if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
+ // truncated 3-byte sequence
+ sourceLimit-=2;
+ }
+ } else if(0xc2<=b1 && b1<0xf0) {
+ // truncated 2- or 3-byte sequence
+ --sourceLimit;
+ }
+ }
+ }
+
+ if(c!=0 && targetCapacity>0) {
+ utf8->toUnicodeStatus=0;
+ utf8->toULength=0;
+ goto moreBytes;
+ /*
+ * Note: We could avoid the goto by duplicating some of the moreBytes
+ * code, but only up to the point of collecting a complete UTF-8
+ * sequence; then recurse for the toUBytes[toULength]
+ * and then continue with normal conversion.
+ *
+ * If so, move this code to just after initializing the minimum
+ * set of local variables for reading the UTF-8 input
+ * (utf8, source, target, limits but not cnv, table, minValue, etc.).
+ *
+ * Potential advantages:
+ * - avoid the goto
+ * - oldToULength could become a local variable in just those code blocks
+ * that deal with buffer boundaries
+ * - possibly faster if the goto prevents some compiler optimizations
+ * (this would need measuring to confirm)
+ * Disadvantage:
+ * - code duplication
+ */
+ }
+
+ /* conversion loop */
+ while(source<sourceLimit) {
+ if(targetCapacity>0) {
+ b=*source++;
+ if(U8_IS_SINGLE(b)) {
+ /* convert ASCII */
+ if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) {
+ *target++=(uint8_t)b;
+ --targetCapacity;
+ continue;
+ } else {
+ c=b;
+ value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, 0, c);
+ }
+ } else {
+ if(b<0xe0) {
+ if( /* handle U+0080..U+07FF inline */
+ b>=0xc2 &&
+ (t1=(uint8_t)(*source-0x80)) <= 0x3f
+ ) {
+ c=b&0x1f;
+ ++source;
+ value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, c, t1);
+ if(value>=minValue) {
+ *target++=(uint8_t)value;
+ --targetCapacity;
+ continue;
+ } else {
+ c=(c<<6)|t1;
+ }
+ } else {
+ c=-1;
+ }
+ } else if(b==0xe0) {
+ if( /* handle U+0800..U+0FFF inline */
+ (t1=(uint8_t)(source[0]-0x80)) <= 0x3f && t1 >= 0x20 &&
+ (t2=(uint8_t)(source[1]-0x80)) <= 0x3f
+ ) {
+ c=t1;
+ source+=2;
+ value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, c, t2);
+ if(value>=minValue) {
+ *target++=(uint8_t)value;
+ --targetCapacity;
+ continue;
+ } else {
+ c=(c<<6)|t2;
+ }
+ } else {
+ c=-1;
+ }
+ } else {
+ c=-1;
+ }
+
+ if(c<0) {
+ /* handle "complicated" and error cases, and continuing partial characters */
+ oldToULength=0;
+ toULength=1;
+ toULimit=U8_COUNT_BYTES_NON_ASCII(b);
+ c=b;
+moreBytes:
+ while(toULength<toULimit) {
+ /*
+ * The sourceLimit may have been adjusted before the conversion loop
+ * to stop before a truncated sequence.
+ * Here we need to use the real limit in case we have two truncated
+ * sequences at the end.
+ * See ticket #7492.
+ */
+ if(source<(uint8_t *)pToUArgs->sourceLimit) {
+ b=*source;
+ if(icu::UTF8::isValidTrail(c, b, toULength, toULimit)) {
+ ++source;
+ ++toULength;
+ c=(c<<6)+b;
+ } else {
+ break; /* sequence too short, stop with toULength<toULimit */
+ }
+ } else {
+ /* store the partial UTF-8 character, compatible with the regular UTF-8 converter */
+ source-=(toULength-oldToULength);
+ while(oldToULength<toULength) {
+ utf8->toUBytes[oldToULength++]=*source++;
+ }
+ utf8->toUnicodeStatus=c;
+ utf8->toULength=toULength;
+ utf8->mode=toULimit;
+ pToUArgs->source=(char *)source;
+ pFromUArgs->target=(char *)target;
+ return;
+ }
+ }
+
+ if(toULength==toULimit) {
+ c-=utf8_offsets[toULength];
+ if(toULength<=3) { /* BMP */
+ value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
+ } else {
+ /* supplementary code point */
+ if(!hasSupplementary) {
+ /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
+ value=0;
+ } else {
+ value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
+ }
+ }
+ } else {
+ /* error handling: illegal UTF-8 byte sequence */
+ source-=(toULength-oldToULength);
+ while(oldToULength<toULength) {
+ utf8->toUBytes[oldToULength++]=*source++;
+ }
+ utf8->toULength=toULength;
+ pToUArgs->source=(char *)source;
+ pFromUArgs->target=(char *)target;
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ return;
+ }
+ }
+ }
+
+ if(value>=minValue) {
+ /* output the mapping for c */
+ *target++=(uint8_t)value;
+ --targetCapacity;
+ } else {
+ /* value<minValue means c is unassigned (unmappable) */
+ /*
+ * Try an extension mapping.
+ * Pass in no source because we don't have UTF-16 input.
+ * If we have a partial match on c, we will return and revert
+ * to UTF-8->UTF-16->charset conversion.
+ */
+ static const UChar nul=0;
+ const UChar *noSource=&nul;
+ c=_extFromU(cnv, cnv->sharedData,
+ c, &noSource, noSource,
+ &target, target+targetCapacity,
+ NULL, -1,
+ pFromUArgs->flush,
+ pErrorCode);
+
+ if(U_FAILURE(*pErrorCode)) {
+ /* not mappable or buffer overflow */
+ cnv->fromUChar32=c;
+ break;
+ } else if(cnv->preFromUFirstCP>=0) {
+ /*
+ * Partial match, return and revert to pivoting.
+ * In normal from-UTF-16 conversion, we would just continue
+ * but then exit the loop because the extension match would
+ * have consumed the source.
+ */
+ *pErrorCode=U_USING_DEFAULT_WARNING;
+ break;
+ } else {
+ /* a mapping was written to the target, continue */
+
+ /* recalculate the targetCapacity after an extension mapping */
+ targetCapacity=(int32_t)(pFromUArgs->targetLimit-(char *)target);
+ }
+ }
+ } else {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+
+ /*
+ * The sourceLimit may have been adjusted before the conversion loop
+ * to stop before a truncated sequence.
+ * If so, then collect the truncated sequence now.
+ */
+ if(U_SUCCESS(*pErrorCode) &&
+ cnv->preFromUFirstCP<0 &&
+ source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
+ c=utf8->toUBytes[0]=b=*source++;
+ toULength=1;
+ toULimit=U8_COUNT_BYTES(b);
+ while(source<sourceLimit) {
+ utf8->toUBytes[toULength++]=b=*source++;
+ c=(c<<6)+b;
+ }
+ utf8->toUnicodeStatus=c;
+ utf8->toULength=toULength;
+ utf8->mode=toULimit;
+ }
+
+ /* write back the updated pointers */
+ pToUArgs->source=(char *)source;
+ pFromUArgs->target=(char *)target;
+}
+
+static void U_CALLCONV
+ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
+ UConverterToUnicodeArgs *pToUArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *utf8, *cnv;
+ const uint8_t *source, *sourceLimit;
+ uint8_t *target;
+ int32_t targetCapacity;
+
+ const uint16_t *table, *mbcsIndex;
+ const uint16_t *results;
+
+ int8_t oldToULength, toULength, toULimit;
+
+ UChar32 c;
+ uint8_t b, t1, t2;
+
+ uint32_t stage2Entry;
+ uint32_t asciiRoundtrips;
+ uint16_t value = 0;
+ UBool hasSupplementary;
+
+ /* set up the local pointers */
+ utf8=pToUArgs->converter;
+ cnv=pFromUArgs->converter;
+ source=(uint8_t *)pToUArgs->source;
+ sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
+ target=(uint8_t *)pFromUArgs->target;
+ targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
+
+ table=cnv->sharedData->mbcs.fromUnicodeTable;
+ mbcsIndex=cnv->sharedData->mbcs.mbcsIndex;
+ if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
+ results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
+ } else {
+ results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
+ }
+ asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
+
+ hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
+
+ /* get the converter state from the UTF-8 UConverter */
+ if(utf8->toULength > 0) {
+ toULength=oldToULength=utf8->toULength;
+ toULimit=(int8_t)utf8->mode;
+ c=(UChar32)utf8->toUnicodeStatus;
+ } else {
+ toULength=oldToULength=toULimit=0;
+ c = 0;
+ }
+
+ // The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.
+ // If the buffer ends with a truncated 2- or 3-byte sequence,
+ // then we reduce the sourceLimit to before that,
+ // and collect the remaining bytes after the conversion loop.
+ {
+ // Do not go back into the bytes that will be read for finishing a partial
+ // sequence from the previous buffer.
+ int32_t length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength);
+ if(length>0) {
+ uint8_t b1=*(sourceLimit-1);
+ if(U8_IS_SINGLE(b1)) {
+ // common ASCII character
+ } else if(U8_IS_TRAIL(b1) && length>=2) {
+ uint8_t b2=*(sourceLimit-2);
+ if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
+ // truncated 3-byte sequence
+ sourceLimit-=2;
+ }
+ } else if(0xc2<=b1 && b1<0xf0) {
+ // truncated 2- or 3-byte sequence
+ --sourceLimit;
+ }
+ }
+ }
+
+ if(c!=0 && targetCapacity>0) {
+ utf8->toUnicodeStatus=0;
+ utf8->toULength=0;
+ goto moreBytes;
+ /* See note in ucnv_SBCSFromUTF8() about this goto. */
+ }
+
+ /* conversion loop */
+ while(source<sourceLimit) {
+ if(targetCapacity>0) {
+ b=*source++;
+ if(U8_IS_SINGLE(b)) {
+ /* convert ASCII */
+ if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) {
+ *target++=b;
+ --targetCapacity;
+ continue;
+ } else {
+ value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, 0, b);
+ if(value==0) {
+ c=b;
+ goto unassigned;
+ }
+ }
+ } else {
+ if(b>=0xe0) {
+ if( /* handle U+0800..U+D7FF inline */
+ b<=0xed && // do not assume maxFastUChar>0xd7ff
+ U8_IS_VALID_LEAD3_AND_T1(b, t1=source[0]) &&
+ (t2=(uint8_t)(source[1]-0x80)) <= 0x3f
+ ) {
+ c=((b&0xf)<<6)|(t1&0x3f);
+ source+=2;
+ value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t2);
+ if(value==0) {
+ c=(c<<6)|t2;
+ goto unassigned;
+ }
+ } else {
+ c=-1;
+ }
+ } else {
+ if( /* handle U+0080..U+07FF inline */
+ b>=0xc2 &&
+ (t1=(uint8_t)(*source-0x80)) <= 0x3f
+ ) {
+ c=b&0x1f;
+ ++source;
+ value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t1);
+ if(value==0) {
+ c=(c<<6)|t1;
+ goto unassigned;
+ }
+ } else {
+ c=-1;
+ }
+ }
+
+ if(c<0) {
+ /* handle "complicated" and error cases, and continuing partial characters */
+ oldToULength=0;
+ toULength=1;
+ toULimit=U8_COUNT_BYTES_NON_ASCII(b);
+ c=b;
+moreBytes:
+ while(toULength<toULimit) {
+ /*
+ * The sourceLimit may have been adjusted before the conversion loop
+ * to stop before a truncated sequence.
+ * Here we need to use the real limit in case we have two truncated
+ * sequences at the end.
+ * See ticket #7492.
+ */
+ if(source<(uint8_t *)pToUArgs->sourceLimit) {
+ b=*source;
+ if(icu::UTF8::isValidTrail(c, b, toULength, toULimit)) {
+ ++source;
+ ++toULength;
+ c=(c<<6)+b;
+ } else {
+ break; /* sequence too short, stop with toULength<toULimit */
+ }
+ } else {
+ /* store the partial UTF-8 character, compatible with the regular UTF-8 converter */
+ source-=(toULength-oldToULength);
+ while(oldToULength<toULength) {
+ utf8->toUBytes[oldToULength++]=*source++;
+ }
+ utf8->toUnicodeStatus=c;
+ utf8->toULength=toULength;
+ utf8->mode=toULimit;
+ pToUArgs->source=(char *)source;
+ pFromUArgs->target=(char *)target;
+ return;
+ }
+ }
+
+ if(toULength==toULimit) {
+ c-=utf8_offsets[toULength];
+ if(toULength<=3) { /* BMP */
+ stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
+ } else {
+ /* supplementary code point */
+ if(!hasSupplementary) {
+ /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
+ stage2Entry=0;
+ } else {
+ stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
+ }
+ }
+ } else {
+ /* error handling: illegal UTF-8 byte sequence */
+ source-=(toULength-oldToULength);
+ while(oldToULength<toULength) {
+ utf8->toUBytes[oldToULength++]=*source++;
+ }
+ utf8->toULength=toULength;
+ pToUArgs->source=(char *)source;
+ pFromUArgs->target=(char *)target;
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ return;
+ }
+
+ /* get the bytes and the length for the output */
+ /* MBCS_OUTPUT_2 */
+ value=MBCS_VALUE_2_FROM_STAGE_2(results, stage2Entry, c);
+
+ /* is this code point assigned, or do we use fallbacks? */
+ if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) ||
+ (UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0))
+ ) {
+ goto unassigned;
+ }
+ }
+ }
+
+ /* write the output character bytes from value and length */
+ /* from the first if in the loop we know that targetCapacity>0 */
+ if(value<=0xff) {
+ /* this is easy because we know that there is enough space */
+ *target++=(uint8_t)value;
+ --targetCapacity;
+ } else /* length==2 */ {
+ *target++=(uint8_t)(value>>8);
+ if(2<=targetCapacity) {
+ *target++=(uint8_t)value;
+ targetCapacity-=2;
+ } else {
+ cnv->charErrorBuffer[0]=(char)value;
+ cnv->charErrorBufferLength=1;
+
+ /* target overflow */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+ continue;
+
+unassigned:
+ {
+ /*
+ * Try an extension mapping.
+ * Pass in no source because we don't have UTF-16 input.
+ * If we have a partial match on c, we will return and revert
+ * to UTF-8->UTF-16->charset conversion.
+ */
+ static const UChar nul=0;
+ const UChar *noSource=&nul;
+ c=_extFromU(cnv, cnv->sharedData,
+ c, &noSource, noSource,
+ &target, target+targetCapacity,
+ NULL, -1,
+ pFromUArgs->flush,
+ pErrorCode);
+
+ if(U_FAILURE(*pErrorCode)) {
+ /* not mappable or buffer overflow */
+ cnv->fromUChar32=c;
+ break;
+ } else if(cnv->preFromUFirstCP>=0) {
+ /*
+ * Partial match, return and revert to pivoting.
+ * In normal from-UTF-16 conversion, we would just continue
+ * but then exit the loop because the extension match would
+ * have consumed the source.
+ */
+ *pErrorCode=U_USING_DEFAULT_WARNING;
+ break;
+ } else {
+ /* a mapping was written to the target, continue */
+
+ /* recalculate the targetCapacity after an extension mapping */
+ targetCapacity=(int32_t)(pFromUArgs->targetLimit-(char *)target);
+ continue;
+ }
+ }
+ } else {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+
+ /*
+ * The sourceLimit may have been adjusted before the conversion loop
+ * to stop before a truncated sequence.
+ * If so, then collect the truncated sequence now.
+ */
+ if(U_SUCCESS(*pErrorCode) &&
+ cnv->preFromUFirstCP<0 &&
+ source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
+ c=utf8->toUBytes[0]=b=*source++;
+ toULength=1;
+ toULimit=U8_COUNT_BYTES(b);
+ while(source<sourceLimit) {
+ utf8->toUBytes[toULength++]=b=*source++;
+ c=(c<<6)+b;
+ }
+ utf8->toUnicodeStatus=c;
+ utf8->toULength=toULength;
+ utf8->mode=toULimit;
+ }
+
+ /* write back the updated pointers */
+ pToUArgs->source=(char *)source;
+ pFromUArgs->target=(char *)target;
+}
+
+/* miscellaneous ------------------------------------------------------------ */
+
+static void U_CALLCONV
+ucnv_MBCSGetStarters(const UConverter* cnv,
+ UBool starters[256],
+ UErrorCode *) {
+ const int32_t *state0;
+ int i;
+
+ state0=cnv->sharedData->mbcs.stateTable[cnv->sharedData->mbcs.dbcsOnlyState];
+ for(i=0; i<256; ++i) {
+ /* all bytes that cause a state transition from state 0 are lead bytes */
+ starters[i]= (UBool)MBCS_ENTRY_IS_TRANSITION(state0[i]);
+ }
+}
+
+/*
+ * This is an internal function that allows other converter implementations
+ * to check whether a byte is a lead byte.
+ */
+U_CFUNC UBool
+ucnv_MBCSIsLeadByte(UConverterSharedData *sharedData, char byte) {
+ return (UBool)MBCS_ENTRY_IS_TRANSITION(sharedData->mbcs.stateTable[0][(uint8_t)byte]);
+}
+
+static void U_CALLCONV
+ucnv_MBCSWriteSub(UConverterFromUnicodeArgs *pArgs,
+ int32_t offsetIndex,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv=pArgs->converter;
+ char *p, *subchar;
+ char buffer[4];
+ int32_t length;
+
+ /* first, select between subChar and subChar1 */
+ if( cnv->subChar1!=0 &&
+ (cnv->sharedData->mbcs.extIndexes!=NULL ?
+ cnv->useSubChar1 :
+ (cnv->invalidUCharBuffer[0]<=0xff))
+ ) {
+ /* select subChar1 if it is set (not 0) and the unmappable Unicode code point is up to U+00ff (IBM MBCS behavior) */
+ subchar=(char *)&cnv->subChar1;
+ length=1;
+ } else {
+ /* select subChar in all other cases */
+ subchar=(char *)cnv->subChars;
+ length=cnv->subCharLen;
+ }
+
+ /* reset the selector for the next code point */
+ cnv->useSubChar1=FALSE;
+
+ if (cnv->sharedData->mbcs.outputType == MBCS_OUTPUT_2_SISO) {
+ p=buffer;
+
+ /* fromUnicodeStatus contains prevLength */
+ switch(length) {
+ case 1:
+ if(cnv->fromUnicodeStatus==2) {
+ /* DBCS mode and SBCS sub char: change to SBCS */
+ cnv->fromUnicodeStatus=1;
+ *p++=UCNV_SI;
+ }
+ *p++=subchar[0];
+ break;
+ case 2:
+ if(cnv->fromUnicodeStatus<=1) {
+ /* SBCS mode and DBCS sub char: change to DBCS */
+ cnv->fromUnicodeStatus=2;
+ *p++=UCNV_SO;
+ }
+ *p++=subchar[0];
+ *p++=subchar[1];
+ break;
+ default:
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ subchar=buffer;
+ length=(int32_t)(p-buffer);
+ }
+
+ ucnv_cbFromUWriteBytes(pArgs, subchar, length, offsetIndex, pErrorCode);
+}
+
+U_CFUNC UConverterType
+ucnv_MBCSGetType(const UConverter* converter) {
+ /* SBCS, DBCS, and EBCDIC_STATEFUL are replaced by MBCS, but here we cheat a little */
+ if(converter->sharedData->mbcs.countStates==1) {
+ return (UConverterType)UCNV_SBCS;
+ } else if((converter->sharedData->mbcs.outputType&0xff)==MBCS_OUTPUT_2_SISO) {
+ return (UConverterType)UCNV_EBCDIC_STATEFUL;
+ } else if(converter->sharedData->staticData->minBytesPerChar==2 && converter->sharedData->staticData->maxBytesPerChar==2) {
+ return (UConverterType)UCNV_DBCS;
+ }
+ return (UConverterType)UCNV_MBCS;
+}
+
+#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
diff --git a/thirdparty/icu4c/common/ucnvmbcs.h b/thirdparty/icu4c/common/ucnvmbcs.h
new file mode 100644
index 0000000000..c8f3b89a5e
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnvmbcs.h
@@ -0,0 +1,605 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2000-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: ucnvmbcs.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2000jul07
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UCNVMBCS_H__
+#define __UCNVMBCS_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/ucnv.h"
+#include "ucnv_cnv.h"
+#include "ucnv_ext.h"
+
+/**
+ * ICU conversion (.cnv) data file structure, following the usual UDataInfo
+ * header.
+ *
+ * Format version: 6.2
+ *
+ * struct UConverterStaticData -- struct containing the converter name, IBM CCSID,
+ * min/max bytes per character, etc.
+ * see ucnv_bld.h
+ *
+ * --------------------
+ *
+ * The static data is followed by conversionType-specific data structures.
+ * At the moment, there are only variations of MBCS converters. They all have
+ * the same toUnicode structures, while the fromUnicode structures for SBCS
+ * differ from those for other MBCS-style converters.
+ *
+ * _MBCSHeader.version 5 is optional and not backward-compatible
+ * (as usual for changes in the major version field).
+ *
+ * Versions 5.m work like versions 4.m except:
+ * - The _MBCSHeader has variable length (and is always longer than in version 4).
+ * See the struct _MBCSHeader further description below.
+ * - There is a set of flags which indicate further incompatible changes.
+ * (Reader code must reject the file if it does not recognize them all.)
+ * - In particular, one of these flags indicates that most of the fromUnicode
+ * data is missing and must be reconstituted from the toUnicode data
+ * and from the utf8Friendly mbcsIndex at load time.
+ * (This only works with a utf8Friendly table.)
+ * In this case, makeconv may increase maxFastUChar automatically to U+FFFF.
+ *
+ * The first of these versions is 5.3, which is like 4.3 except for the differences above.
+ *
+ * When possible, makeconv continues to generate version 4.m files.
+ *
+ * _MBCSHeader.version 5.4/4.4 supports "good one-way" mappings (|4)
+ * in the extension tables (fromUTableValues bit 30). See ucnv_ext.h for details.
+ *
+ * _MBCSHeader.version 4.3 optionally modifies the fromUnicode data structures
+ * slightly and optionally adds a table for conversion to MBCS (non-SBCS)
+ * charsets.
+ *
+ * The modifications are to make the data utf8Friendly. Not every 4.3 file
+ * file contains utf8Friendly data.
+ * It is utf8Friendly if _MBCSHeader.version[2]!=0.
+ * In this case, the data structures are utf8Friendly up to the code point
+ * maxFastUChar=((_MBCSHeader.version[2]<<8)|0xff)
+ *
+ * A utf8Friendly file has fromUnicode stage 3 entries for code points up to
+ * maxFastUChar allocated in blocks of 64 for indexing with the 6 bits from
+ * a UTF-8 trail byte. ASCII is allocated linearly with 128 contiguous entries.
+ *
+ * In addition, a utf8Friendly MBCS file contains an additional
+ * uint16_t mbcsIndex[(maxFastUChar+1)>>6];
+ * which replaces the stage 1 and 2 tables for indexing with bits from the
+ * UTF-8 lead byte and middle trail byte. Unlike the older MBCS stage 2 table,
+ * the mbcsIndex does not contain roundtrip flags. Therefore, all fallbacks
+ * from code points up to maxFastUChar (and roundtrips to 0x00) are moved to
+ * the extension data structure. This also allows for faster roundtrip
+ * conversion from UTF-16.
+ *
+ * SBCS files do not contain an additional sbcsIndex[] array because the
+ * proportional size increase would be noticeable, but the runtime
+ * code builds one for the code point range for which the runtime conversion
+ * code is optimized.
+ *
+ * For SBCS, maxFastUChar should be at least U+0FFF. The initial makeconv
+ * implementation sets it to U+1FFF. Because the sbcsIndex is not stored in
+ * the file, a larger maxFastUChar only affects stage 3 block allocation size
+ * and is free in empty blocks. (Larger blocks with sparse contents cause larger
+ * files.) U+1FFF includes almost all of the small scripts.
+ * U+0FFF covers UTF-8 two-byte sequences and three-byte sequences starting with
+ * 0xe0. This includes most scripts with legacy SBCS charsets.
+ * The initial runtime implementation using 4.3 files only builds an sbcsIndex
+ * for code points up to U+0FFF.
+ *
+ * For MBCS, maxFastUChar should be at least U+D7FF (=initial value).
+ * This boundary is convenient because practically all of the commonly used
+ * characters are below it, and because it is the boundary to surrogate
+ * code points, above which special handling is necessary anyway.
+ * (Surrogate pair assembly for UTF-16, validity checking for UTF-8.)
+ *
+ * maxFastUChar could be up to U+FFFF to cover the whole BMP, which could be
+ * useful especially for conversion from UTF-8 when the input can be assumed
+ * to be valid, because the surrogate range would then not have to be
+ * checked.
+ * (With maxFastUChar=0xffff, makeconv would have to check for mbcsIndex value
+ * overflow because with the all-unassigned block 0 and nearly full mappings
+ * from the BMP it is theoretically possible that an index into stage 3
+ * exceeds 16 bits.)
+ *
+ * _MBCSHeader.version 4.2 adds an optional conversion extension data structure.
+ * If it is present, then an ICU version reading header versions 4.0 or 4.1
+ * will be able to use the base table and ignore the extension.
+ *
+ * The unicodeMask in the static data is part of the base table data structure.
+ * Especially, the UCNV_HAS_SUPPLEMENTARY flag determines the length of the
+ * fromUnicode stage 1 array.
+ * The static data unicodeMask refers only to the base table's properties if
+ * a base table is included.
+ * In an extension-only file, the static data unicodeMask is 0.
+ * The extension data indexes have a separate field with the unicodeMask flags.
+ *
+ * MBCS-style data structure following the static data.
+ * Offsets are counted in bytes from the beginning of the MBCS header structure.
+ * Details about usage in comments in ucnvmbcs.c.
+ *
+ * struct _MBCSHeader (see the definition in this header file below)
+ * contains 32-bit fields as follows:
+ * 8 values:
+ * 0 uint8_t[4] MBCS version in UVersionInfo format (currently 4.3.x.0)
+ * 1 uint32_t countStates
+ * 2 uint32_t countToUFallbacks
+ * 3 uint32_t offsetToUCodeUnits
+ * 4 uint32_t offsetFromUTable
+ * 5 uint32_t offsetFromUBytes
+ * 6 uint32_t flags, bits:
+ * 31.. 8 offsetExtension -- _MBCSHeader.version 4.2 (ICU 2.8) and higher
+ * 0 for older versions and if
+ * there is not extension structure
+ * 7.. 0 outputType
+ * 7 uint32_t fromUBytesLength -- _MBCSHeader.version 4.1 (ICU 2.4) and higher
+ * counts bytes in fromUBytes[]
+ *
+ * New and required in version 5:
+ * 8 uint32_t options, bits:
+ * 31..16 reserved for flags that can be added without breaking
+ * backward compatibility
+ * 15.. 6 reserved for flags whose addition will break
+ * backward compatibility
+ * 6 MBCS_OPT_FROM_U -- if set,
+ * then most of the fromUnicode data is omitted;
+ * fullStage2Length is present and the missing
+ * bottom part of stage 2 must be reconstituted from
+ * the toUnicode data;
+ * stage 3 is missing completely as well;
+ * not used for SBCS tables
+ * 5.. 0 length of the _MBCSHeader (number of uint32_t)
+ *
+ * New and optional in version 5:
+ * 9 uint32_t fullStage2Length: used if MBCS_OPT_FROM_U is set
+ * specifies the full length of stage 2
+ * including the omitted part
+ *
+ * if(outputType==MBCS_OUTPUT_EXT_ONLY) {
+ * -- base table name for extension-only table
+ * char baseTableName[variable]; -- with NUL plus padding for 4-alignment
+ *
+ * -- all _MBCSHeader fields except for version and flags are 0
+ * } else {
+ * -- normal base table with optional extension
+ *
+ * int32_t stateTable[countStates][256];
+ *
+ * struct _MBCSToUFallback { (fallbacks are sorted by offset)
+ * uint32_t offset;
+ * UChar32 codePoint;
+ * } toUFallbacks[countToUFallbacks];
+ *
+ * uint16_t unicodeCodeUnits[(offsetFromUTable-offsetToUCodeUnits)/2];
+ * (padded to an even number of units)
+ *
+ * -- stage 1 tables
+ * if(staticData.unicodeMask&UCNV_HAS_SUPPLEMENTARY) {
+ * -- stage 1 table for all of Unicode
+ * uint16_t fromUTable[0x440]; (32-bit-aligned)
+ * } else {
+ * -- BMP-only tables have a smaller stage 1 table
+ * uint16_t fromUTable[0x40]; (32-bit-aligned)
+ * }
+ *
+ * -- stage 2 tables
+ * length determined by top of stage 1 and bottom of stage 3 tables
+ * if(outputType==MBCS_OUTPUT_1) {
+ * -- SBCS: pure indexes
+ * uint16_t stage 2 indexes[?];
+ * } else {
+ * -- DBCS, MBCS, EBCDIC_STATEFUL, ...: roundtrip flags and indexes
+ * uint32_t stage 2 flags and indexes[?];
+ * if(options&MBCS_OPT_NO_FROM_U) {
+ * stage 2 really has length fullStage2Length
+ * and the omitted lower part must be reconstituted from
+ * the toUnicode data
+ * }
+ * }
+ *
+ * -- stage 3 tables with byte results
+ * if(outputType==MBCS_OUTPUT_1) {
+ * -- SBCS: each 16-bit result contains flags and the result byte, see ucnvmbcs.c
+ * uint16_t fromUBytes[fromUBytesLength/2];
+ * } else if(!(options&MBCS_OPT_NO_FROM_U)) {
+ * -- DBCS, MBCS, EBCDIC_STATEFUL, ... 2/3/4 bytes result, see ucnvmbcs.c
+ * uint8_t fromUBytes[fromUBytesLength]; or
+ * uint16_t fromUBytes[fromUBytesLength/2]; or
+ * uint32_t fromUBytes[fromUBytesLength/4];
+ * } else {
+ * fromUBytes[] must be reconstituted from the toUnicode data
+ * }
+ *
+ * -- optional utf8Friendly mbcsIndex -- _MBCSHeader.version 4.3 (ICU 3.8) and higher
+ * if(outputType!=MBCS_OUTPUT_1 &&
+ * _MBCSHeader.version[1]>=3 &&
+ * (maxFastUChar=_MBCSHeader.version[2])!=0
+ * ) {
+ * maxFastUChar=(maxFastUChar<<8)|0xff;
+ * uint16_t mbcsIndex[(maxFastUChar+1)>>6];
+ * }
+ * }
+ *
+ * -- extension table, details see ucnv_ext.h
+ * int32_t indexes[>=32]; ...
+ */
+
+/* MBCS converter data and state -------------------------------------------- */
+
+enum {
+ MBCS_MAX_STATE_COUNT=128
+};
+
+/**
+ * MBCS action codes for conversions to Unicode.
+ * These values are in bits 23..20 of the state table entries.
+ */
+enum {
+ MBCS_STATE_VALID_DIRECT_16,
+ MBCS_STATE_VALID_DIRECT_20,
+
+ MBCS_STATE_FALLBACK_DIRECT_16,
+ MBCS_STATE_FALLBACK_DIRECT_20,
+
+ MBCS_STATE_VALID_16,
+ MBCS_STATE_VALID_16_PAIR,
+
+ MBCS_STATE_UNASSIGNED,
+ MBCS_STATE_ILLEGAL,
+
+ MBCS_STATE_CHANGE_ONLY
+};
+
+/* Macros for state table entries */
+#define MBCS_ENTRY_TRANSITION(state, offset) (int32_t)(((int32_t)(state)<<24L)|(offset))
+#define MBCS_ENTRY_TRANSITION_SET_OFFSET(entry, offset) (int32_t)(((entry)&0xff000000)|(offset))
+#define MBCS_ENTRY_TRANSITION_ADD_OFFSET(entry, offset) (int32_t)((entry)+(offset))
+
+#define MBCS_ENTRY_FINAL(state, action, value) (int32_t)(0x80000000|((int32_t)(state)<<24L)|((action)<<20L)|(value))
+#define MBCS_ENTRY_SET_FINAL(entry) (int32_t)((entry)|0x80000000)
+#define MBCS_ENTRY_FINAL_SET_ACTION(entry, action) (int32_t)(((entry)&0xff0fffff)|((int32_t)(action)<<20L))
+#define MBCS_ENTRY_FINAL_SET_VALUE(entry, value) (int32_t)(((entry)&0xfff00000)|(value))
+#define MBCS_ENTRY_FINAL_SET_ACTION_VALUE(entry, action, value) (int32_t)(((entry)&0xff000000)|((int32_t)(action)<<20L)|(value))
+
+#define MBCS_ENTRY_SET_STATE(entry, state) (int32_t)(((entry)&0x80ffffff)|((int32_t)(state)<<24L))
+
+#define MBCS_ENTRY_STATE(entry) ((((uint32_t)entry)>>24)&0x7f)
+
+#define MBCS_ENTRY_IS_TRANSITION(entry) ((entry)>=0)
+#define MBCS_ENTRY_IS_FINAL(entry) ((entry)<0)
+
+#define MBCS_ENTRY_TRANSITION_STATE(entry) (((uint32_t)entry)>>24)
+#define MBCS_ENTRY_TRANSITION_OFFSET(entry) ((entry)&0xffffff)
+
+#define MBCS_ENTRY_FINAL_STATE(entry) ((((uint32_t)entry)>>24)&0x7f)
+#define MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry) ((entry)<(int32_t)0x80100000)
+#define MBCS_ENTRY_FINAL_ACTION(entry) ((((uint32_t)entry)>>20)&0xf)
+#define MBCS_ENTRY_FINAL_VALUE(entry) ((entry)&0xfffff)
+#define MBCS_ENTRY_FINAL_VALUE_16(entry) (uint16_t)(entry)
+
+#define IS_ASCII_ROUNDTRIP(b, asciiRoundtrips) (((asciiRoundtrips) & (1<<((b)>>2)))!=0)
+
+/* single-byte fromUnicode: get the 16-bit result word */
+#define MBCS_SINGLE_RESULT_FROM_U(table, results, c) (results)[ (table)[ (table)[(c)>>10] +(((c)>>4)&0x3f) ] +((c)&0xf) ]
+
+/* single-byte fromUnicode using the sbcsIndex */
+#define SBCS_RESULT_FROM_LOW_BMP(table, results, c) (results)[ (table)[(c)>>6] +((c)&0x3f) ]
+
+/* single-byte fromUTF8 using the sbcsIndex; l and t must be masked externally; can be l=0 and t<=0x7f */
+#define SBCS_RESULT_FROM_UTF8(table, results, l, t) (results)[ (table)[l] +(t) ]
+
+/* multi-byte fromUnicode: get the 32-bit stage 2 entry */
+#define MBCS_STAGE_2_FROM_U(table, c) ((const uint32_t *)(table))[ (table)[(c)>>10] +(((c)>>4)&0x3f) ]
+#define MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) ( ((stage2Entry) & ((uint32_t)1<< (16+((c)&0xf)) )) !=0)
+
+#define MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c) ((uint16_t *)(bytes))[16*(uint32_t)(uint16_t)(stage2Entry)+((c)&0xf)]
+#define MBCS_VALUE_4_FROM_STAGE_2(bytes, stage2Entry, c) ((uint32_t *)(bytes))[16*(uint32_t)(uint16_t)(stage2Entry)+((c)&0xf)]
+
+#define MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c) ((bytes)+(16*(uint32_t)(uint16_t)(stage2Entry)+((c)&0xf))*3)
+
+/* double-byte fromUnicode using the mbcsIndex */
+#define DBCS_RESULT_FROM_MOST_BMP(table, results, c) (results)[ (table)[(c)>>6] +((c)&0x3f) ]
+
+/* double-byte fromUTF8 using the mbcsIndex; l and t1 combined into lt1; lt1 and t2 must be masked externally */
+#define DBCS_RESULT_FROM_UTF8(table, results, lt1, t2) (results)[ (table)[lt1] +(t2) ]
+
+
+/**
+ * MBCS output types for conversions from Unicode.
+ * These per-converter types determine the storage method in stage 3 of the lookup table,
+ * mostly how many bytes are stored per entry.
+ */
+enum {
+ MBCS_OUTPUT_1, /* 0 */
+ MBCS_OUTPUT_2, /* 1 */
+ MBCS_OUTPUT_3, /* 2 */
+ MBCS_OUTPUT_4, /* 3 */
+
+ MBCS_OUTPUT_3_EUC=8, /* 8 */
+ MBCS_OUTPUT_4_EUC, /* 9 */
+
+ MBCS_OUTPUT_2_SISO=12, /* c */
+ MBCS_OUTPUT_2_HZ, /* d */
+
+ MBCS_OUTPUT_EXT_ONLY, /* e */
+
+ MBCS_OUTPUT_COUNT,
+
+ MBCS_OUTPUT_DBCS_ONLY=0xdb /* runtime-only type for DBCS-only handling of SISO tables */
+};
+
+/**
+ * Fallbacks to Unicode are stored outside the normal state table and code point structures
+ * in a vector of items of this type. They are sorted by offset.
+ */
+typedef struct {
+ uint32_t offset;
+ UChar32 codePoint;
+} _MBCSToUFallback;
+
+/** Constants for fast and UTF-8-friendly conversion. */
+enum {
+ SBCS_FAST_MAX=0x0fff, /* maximum code point with UTF-8-friendly SBCS runtime code, see makeconv SBCS_UTF8_MAX */
+ SBCS_FAST_LIMIT=SBCS_FAST_MAX+1, /* =0x1000 */
+ MBCS_FAST_MAX=0xd7ff, /* maximum code point with UTF-8-friendly MBCS runtime code, see makeconv MBCS_UTF8_MAX */
+ MBCS_FAST_LIMIT=MBCS_FAST_MAX+1 /* =0xd800 */
+};
+
+/**
+ * This is the MBCS part of the UConverterTable union (a runtime data structure).
+ * It keeps all the per-converter data and points into the loaded mapping tables.
+ *
+ * utf8Friendly data structures added with _MBCSHeader.version 4.3
+ */
+typedef struct UConverterMBCSTable {
+ /* toUnicode */
+ uint8_t countStates, dbcsOnlyState, stateTableOwned;
+ uint32_t countToUFallbacks;
+
+ const int32_t (*stateTable)/*[countStates]*/[256];
+ int32_t (*swapLFNLStateTable)/*[countStates]*/[256]; /* for swaplfnl */
+ const uint16_t *unicodeCodeUnits/*[countUnicodeResults]*/;
+ const _MBCSToUFallback *toUFallbacks;
+
+ /* fromUnicode */
+ const uint16_t *fromUnicodeTable;
+ const uint16_t *mbcsIndex; /* for fast conversion from most of BMP to MBCS (utf8Friendly data) */
+ uint16_t sbcsIndex[SBCS_FAST_LIMIT>>6]; /* for fast conversion from low BMP to SBCS (utf8Friendly data) */
+ const uint8_t *fromUnicodeBytes;
+ uint8_t *swapLFNLFromUnicodeBytes; /* for swaplfnl */
+ uint32_t fromUBytesLength;
+ uint8_t outputType, unicodeMask;
+ UBool utf8Friendly; /* for utf8Friendly data */
+ UChar maxFastUChar; /* for utf8Friendly data */
+
+ /* roundtrips */
+ uint32_t asciiRoundtrips;
+
+ /* reconstituted data that was omitted from the .cnv file */
+ uint8_t *reconstitutedData;
+
+ /* converter name for swaplfnl */
+ char *swapLFNLName;
+
+ /* extension data */
+ struct UConverterSharedData *baseSharedData;
+ const int32_t *extIndexes;
+} UConverterMBCSTable;
+
+#define UCNV_MBCS_TABLE_INITIALIZER { \
+ /* toUnicode */ \
+ 0, 0, 0, \
+ 0, \
+ \
+ NULL, \
+ NULL, \
+ NULL, \
+ NULL, \
+ \
+ /* fromUnicode */ \
+ NULL, \
+ NULL, \
+ { 0 }, \
+ NULL, \
+ NULL, \
+ 0, \
+ 0, 0, \
+ false, \
+ 0, \
+ \
+ /* roundtrips */ \
+ 0, \
+ \
+ /* reconstituted data that was omitted from the .cnv file */ \
+ NULL, \
+ \
+ /* converter name for swaplfnl */ \
+ NULL, \
+ \
+ /* extension data */ \
+ NULL, \
+ NULL \
+}
+
+enum {
+ MBCS_OPT_LENGTH_MASK=0x3f,
+ MBCS_OPT_NO_FROM_U=0x40,
+ /*
+ * If any of the following options bits are set,
+ * then the file must be rejected.
+ */
+ MBCS_OPT_INCOMPATIBLE_MASK=0xffc0,
+ /*
+ * Remove bits from this mask as more options are recognized
+ * by all implementations that use this constant.
+ */
+ MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK=0xff80
+};
+
+enum {
+ MBCS_HEADER_V4_LENGTH=8,
+ MBCS_HEADER_V5_MIN_LENGTH=9
+};
+
+/**
+ * MBCS data header. See data format description above.
+ */
+typedef struct {
+ UVersionInfo version;
+ uint32_t countStates,
+ countToUFallbacks,
+ offsetToUCodeUnits,
+ offsetFromUTable,
+ offsetFromUBytes,
+ flags,
+ fromUBytesLength;
+
+ /* new and required in version 5 */
+ uint32_t options;
+
+ /* new and optional in version 5; used if options&MBCS_OPT_NO_FROM_U */
+ uint32_t fullStage2Length; /* number of 32-bit units */
+} _MBCSHeader;
+
+#define UCNV_MBCS_HEADER_INITIALIZER { { 0 }, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+
+/*
+ * This is a simple version of _MBCSGetNextUChar() that is used
+ * by other converter implementations.
+ * It only returns an "assigned" result if it consumes the entire input.
+ * It does not use state from the converter, nor error codes.
+ * It does not handle the EBCDIC swaplfnl option (set in UConverter).
+ * It handles conversion extensions but not GB 18030.
+ *
+ * Return value:
+ * U+fffe unassigned
+ * U+ffff illegal
+ * otherwise the Unicode code point
+ */
+U_CFUNC UChar32
+ucnv_MBCSSimpleGetNextUChar(UConverterSharedData *sharedData,
+ const char *source, int32_t length,
+ UBool useFallback);
+
+/**
+ * This version of _MBCSSimpleGetNextUChar() is optimized for single-byte, single-state codepages.
+ * It does not handle the EBCDIC swaplfnl option (set in UConverter).
+ * It does not handle conversion extensions (_extToU()).
+ */
+U_CFUNC UChar32
+ucnv_MBCSSingleSimpleGetNextUChar(UConverterSharedData *sharedData,
+ uint8_t b, UBool useFallback);
+
+/**
+ * This macro version of _MBCSSingleSimpleGetNextUChar() gets a code point from a byte.
+ * It works for single-byte, single-state codepages that only map
+ * to and from BMP code points, and it always
+ * returns fallback values.
+ */
+#define _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(sharedData, b) \
+ (UChar)MBCS_ENTRY_FINAL_VALUE_16((sharedData)->mbcs.stateTable[0][(uint8_t)(b)])
+
+/**
+ * This is an internal function that allows other converter implementations
+ * to check whether a byte is a lead byte.
+ */
+U_CFUNC UBool
+ucnv_MBCSIsLeadByte(UConverterSharedData *sharedData, char byte);
+
+/** This is a macro version of _MBCSIsLeadByte(). */
+#define _MBCS_IS_LEAD_BYTE(sharedData, byte) \
+ (UBool)MBCS_ENTRY_IS_TRANSITION((sharedData)->mbcs.stateTable[0][(uint8_t)(byte)])
+
+/*
+ * This is another simple conversion function for internal use by other
+ * conversion implementations.
+ * It does not use the converter state nor call callbacks.
+ * It does not handle the EBCDIC swaplfnl option (set in UConverter).
+ * It handles conversion extensions but not GB 18030.
+ *
+ * It converts one single Unicode code point into codepage bytes, encoded
+ * as one 32-bit value. The function returns the number of bytes in *pValue:
+ * 1..4 the number of bytes in *pValue
+ * 0 unassigned (*pValue undefined)
+ * -1 illegal (currently not used, *pValue undefined)
+ *
+ * *pValue will contain the resulting bytes with the last byte in bits 7..0,
+ * the second to last byte in bits 15..8, etc.
+ * Currently, the function assumes but does not check that 0<=c<=0x10ffff.
+ */
+U_CFUNC int32_t
+ucnv_MBCSFromUChar32(UConverterSharedData *sharedData,
+ UChar32 c, uint32_t *pValue,
+ UBool useFallback);
+
+/**
+ * This version of _MBCSFromUChar32() is optimized for single-byte codepages.
+ * It does not handle the EBCDIC swaplfnl option (set in UConverter).
+ *
+ * It returns the codepage byte for the code point, or -1 if it is unassigned.
+ */
+U_CFUNC int32_t
+ucnv_MBCSSingleFromUChar32(UConverterSharedData *sharedData,
+ UChar32 c,
+ UBool useFallback);
+
+/**
+ * SBCS, DBCS, and EBCDIC_STATEFUL are replaced by MBCS, but
+ * we cheat a little about the type, returning the old types if appropriate.
+ */
+U_CFUNC UConverterType
+ucnv_MBCSGetType(const UConverter* converter);
+
+U_CFUNC void
+ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode);
+U_CFUNC void
+ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode);
+
+/*
+ * Internal function returning a UnicodeSet for toUnicode() conversion.
+ * Currently only used for ISO-2022-CN, and only handles roundtrip mappings.
+ * In the future, if we add support for fallback sets, this function
+ * needs to be updated.
+ * Handles extensions.
+ * Does not empty the set first.
+ */
+U_CFUNC void
+ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
+ const USetAdder *sa,
+ UConverterUnicodeSet which,
+ UErrorCode *pErrorCode);
+
+/*
+ * Same as ucnv_MBCSGetUnicodeSetForUnicode() but
+ * the set can be filtered by encoding scheme.
+ * Used by stateful converters which share regular conversion tables
+ * but only use a subset of their mappings.
+ */
+U_CFUNC void
+ucnv_MBCSGetFilteredUnicodeSetForUnicode(const UConverterSharedData *sharedData,
+ const USetAdder *sa,
+ UConverterUnicodeSet which,
+ UConverterSetFilter filter,
+ UErrorCode *pErrorCode);
+
+#endif
+
+#endif
diff --git a/thirdparty/icu4c/common/ucnvscsu.cpp b/thirdparty/icu4c/common/ucnvscsu.cpp
new file mode 100644
index 0000000000..74b5722b97
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnvscsu.cpp
@@ -0,0 +1,2045 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2000-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: ucnvscsu.c
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2000nov18
+* created by: Markus W. Scherer
+*
+* This is an implementation of the Standard Compression Scheme for Unicode
+* as defined in http://www.unicode.org/unicode/reports/tr6/ .
+* Reserved commands and window settings are treated as illegal sequences and
+* will result in callback calls.
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
+
+#include "unicode/ucnv.h"
+#include "unicode/ucnv_cb.h"
+#include "unicode/utf16.h"
+#include "ucnv_bld.h"
+#include "ucnv_cnv.h"
+#include "cmemory.h"
+
+/* SCSU definitions --------------------------------------------------------- */
+
+/* SCSU command byte values */
+enum {
+ SQ0=0x01, /* Quote from window pair 0 */
+ SQ7=0x08, /* Quote from window pair 7 */
+ SDX=0x0B, /* Define a window as extended */
+ Srs=0x0C, /* reserved */
+ SQU=0x0E, /* Quote a single Unicode character */
+ SCU=0x0F, /* Change to Unicode mode */
+ SC0=0x10, /* Select window 0 */
+ SC7=0x17, /* Select window 7 */
+ SD0=0x18, /* Define and select window 0 */
+ SD7=0x1F, /* Define and select window 7 */
+
+ UC0=0xE0, /* Select window 0 */
+ UC7=0xE7, /* Select window 7 */
+ UD0=0xE8, /* Define and select window 0 */
+ UD7=0xEF, /* Define and select window 7 */
+ UQU=0xF0, /* Quote a single Unicode character */
+ UDX=0xF1, /* Define a Window as extended */
+ Urs=0xF2 /* reserved */
+};
+
+enum {
+ /*
+ * Unicode code points from 3400 to E000 are not adressible by
+ * dynamic window, since in these areas no short run alphabets are
+ * found. Therefore add gapOffset to all values from gapThreshold.
+ */
+ gapThreshold=0x68,
+ gapOffset=0xAC00,
+
+ /* values between reservedStart and fixedThreshold are reserved */
+ reservedStart=0xA8,
+
+ /* use table of predefined fixed offsets for values from fixedThreshold */
+ fixedThreshold=0xF9
+};
+
+/* constant offsets for the 8 static windows */
+static const uint32_t staticOffsets[8]={
+ 0x0000, /* ASCII for quoted tags */
+ 0x0080, /* Latin - 1 Supplement (for access to punctuation) */
+ 0x0100, /* Latin Extended-A */
+ 0x0300, /* Combining Diacritical Marks */
+ 0x2000, /* General Punctuation */
+ 0x2080, /* Currency Symbols */
+ 0x2100, /* Letterlike Symbols and Number Forms */
+ 0x3000 /* CJK Symbols and punctuation */
+};
+
+/* initial offsets for the 8 dynamic (sliding) windows */
+static const uint32_t initialDynamicOffsets[8]={
+ 0x0080, /* Latin-1 */
+ 0x00C0, /* Latin Extended A */
+ 0x0400, /* Cyrillic */
+ 0x0600, /* Arabic */
+ 0x0900, /* Devanagari */
+ 0x3040, /* Hiragana */
+ 0x30A0, /* Katakana */
+ 0xFF00 /* Fullwidth ASCII */
+};
+
+/* Table of fixed predefined Offsets */
+static const uint32_t fixedOffsets[]={
+ /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */
+ /* 0xFA */ 0x0250, /* IPA extensions */
+ /* 0xFB */ 0x0370, /* Greek */
+ /* 0xFC */ 0x0530, /* Armenian */
+ /* 0xFD */ 0x3040, /* Hiragana */
+ /* 0xFE */ 0x30A0, /* Katakana */
+ /* 0xFF */ 0xFF60 /* Halfwidth Katakana */
+};
+
+/* state values */
+enum {
+ readCommand,
+ quotePairOne,
+ quotePairTwo,
+ quoteOne,
+ definePairOne,
+ definePairTwo,
+ defineOne
+};
+
+typedef struct SCSUData {
+ /* dynamic window offsets, intitialize to default values from initialDynamicOffsets */
+ uint32_t toUDynamicOffsets[8];
+ uint32_t fromUDynamicOffsets[8];
+
+ /* state machine state - toUnicode */
+ UBool toUIsSingleByteMode;
+ uint8_t toUState;
+ int8_t toUQuoteWindow, toUDynamicWindow;
+ uint8_t toUByteOne;
+ uint8_t toUPadding[3];
+
+ /* state machine state - fromUnicode */
+ UBool fromUIsSingleByteMode;
+ int8_t fromUDynamicWindow;
+
+ /*
+ * windowUse[] keeps track of the use of the dynamic windows:
+ * At nextWindowUseIndex there is the least recently used window,
+ * and the following windows (in a wrapping manner) are more and more
+ * recently used.
+ * At nextWindowUseIndex-1 there is the most recently used window.
+ */
+ uint8_t locale;
+ int8_t nextWindowUseIndex;
+ int8_t windowUse[8];
+} SCSUData;
+
+static const int8_t initialWindowUse[8]={ 7, 0, 3, 2, 4, 5, 6, 1 };
+static const int8_t initialWindowUse_ja[8]={ 3, 2, 4, 1, 0, 7, 5, 6 };
+
+enum {
+ lGeneric, l_ja
+};
+
+/* SCSU setup functions ----------------------------------------------------- */
+U_CDECL_BEGIN
+static void U_CALLCONV
+_SCSUReset(UConverter *cnv, UConverterResetChoice choice) {
+ SCSUData *scsu=(SCSUData *)cnv->extraInfo;
+
+ if(choice<=UCNV_RESET_TO_UNICODE) {
+ /* reset toUnicode */
+ uprv_memcpy(scsu->toUDynamicOffsets, initialDynamicOffsets, 32);
+
+ scsu->toUIsSingleByteMode=TRUE;
+ scsu->toUState=readCommand;
+ scsu->toUQuoteWindow=scsu->toUDynamicWindow=0;
+ scsu->toUByteOne=0;
+
+ cnv->toULength=0;
+ }
+ if(choice!=UCNV_RESET_TO_UNICODE) {
+ /* reset fromUnicode */
+ uprv_memcpy(scsu->fromUDynamicOffsets, initialDynamicOffsets, 32);
+
+ scsu->fromUIsSingleByteMode=TRUE;
+ scsu->fromUDynamicWindow=0;
+
+ scsu->nextWindowUseIndex=0;
+ switch(scsu->locale) {
+ case l_ja:
+ uprv_memcpy(scsu->windowUse, initialWindowUse_ja, 8);
+ break;
+ default:
+ uprv_memcpy(scsu->windowUse, initialWindowUse, 8);
+ break;
+ }
+
+ cnv->fromUChar32=0;
+ }
+}
+
+static void U_CALLCONV
+_SCSUOpen(UConverter *cnv,
+ UConverterLoadArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ const char *locale=pArgs->locale;
+ if(pArgs->onlyTestIsLoadable) {
+ return;
+ }
+ cnv->extraInfo=uprv_malloc(sizeof(SCSUData));
+ if(cnv->extraInfo!=NULL) {
+ if(locale!=NULL && locale[0]=='j' && locale[1]=='a' && (locale[2]==0 || locale[2]=='_')) {
+ ((SCSUData *)cnv->extraInfo)->locale=l_ja;
+ } else {
+ ((SCSUData *)cnv->extraInfo)->locale=lGeneric;
+ }
+ _SCSUReset(cnv, UCNV_RESET_BOTH);
+ } else {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ }
+
+ /* Set the substitution character U+fffd as a Unicode string. */
+ cnv->subUChars[0]=0xfffd;
+ cnv->subCharLen=-1;
+}
+
+static void U_CALLCONV
+_SCSUClose(UConverter *cnv) {
+ if(cnv->extraInfo!=NULL) {
+ if(!cnv->isExtraLocal) {
+ uprv_free(cnv->extraInfo);
+ }
+ cnv->extraInfo=NULL;
+ }
+}
+
+/* SCSU-to-Unicode conversion functions ------------------------------------- */
+
+static void U_CALLCONV
+_SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ SCSUData *scsu;
+ const uint8_t *source, *sourceLimit;
+ UChar *target;
+ const UChar *targetLimit;
+ int32_t *offsets;
+ UBool isSingleByteMode;
+ uint8_t state, byteOne;
+ int8_t quoteWindow, dynamicWindow;
+
+ int32_t sourceIndex, nextSourceIndex;
+
+ uint8_t b;
+
+ /* set up the local pointers */
+ cnv=pArgs->converter;
+ scsu=(SCSUData *)cnv->extraInfo;
+
+ source=(const uint8_t *)pArgs->source;
+ sourceLimit=(const uint8_t *)pArgs->sourceLimit;
+ target=pArgs->target;
+ targetLimit=pArgs->targetLimit;
+ offsets=pArgs->offsets;
+
+ /* get the state machine state */
+ isSingleByteMode=scsu->toUIsSingleByteMode;
+ state=scsu->toUState;
+ quoteWindow=scsu->toUQuoteWindow;
+ dynamicWindow=scsu->toUDynamicWindow;
+ byteOne=scsu->toUByteOne;
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex=state==readCommand ? 0 : -1;
+ nextSourceIndex=0;
+
+ /*
+ * conversion "loop"
+ *
+ * For performance, this is not a normal C loop.
+ * Instead, there are two code blocks for the two SCSU modes.
+ * The function branches to either one, and a change of the mode is done with a goto to
+ * the other branch.
+ *
+ * Each branch has two conventional loops:
+ * - a fast-path loop for the most common codes in the mode
+ * - a loop for all other codes in the mode
+ * When the fast-path runs into a code that it cannot handle, its loop ends and it
+ * runs into the following loop to handle the other codes.
+ * The end of the input or output buffer is also handled by the slower loop.
+ * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
+ *
+ * The callback handling is done by returning with an error code.
+ * The conversion framework actually calls the callback function.
+ */
+ if(isSingleByteMode) {
+ /* fast path for single-byte mode */
+ if(state==readCommand) {
+fastSingle:
+ while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
+ ++source;
+ ++nextSourceIndex;
+ if(b<=0x7f) {
+ /* write US-ASCII graphic character or DEL */
+ *target++=(UChar)b;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ } else {
+ /* write from dynamic window */
+ uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
+ if(c<=0xffff) {
+ *target++=(UChar)c;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ } else {
+ /* output surrogate pair */
+ *target++=(UChar)(0xd7c0+(c>>10));
+ if(target<targetLimit) {
+ *target++=(UChar)(0xdc00|(c&0x3ff));
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex;
+ }
+ } else {
+ /* target overflow */
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
+ cnv->UCharErrorBufferLength=1;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ goto endloop;
+ }
+ }
+ }
+ sourceIndex=nextSourceIndex;
+ }
+ }
+
+ /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
+singleByteMode:
+ while(source<sourceLimit) {
+ if(target>=targetLimit) {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ b=*source++;
+ ++nextSourceIndex;
+ switch(state) {
+ case readCommand:
+ /* redundant conditions are commented out */
+ /* here: b<0x20 because otherwise we would be in fastSingle */
+ if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
+ /* CR/LF/TAB/NUL */
+ *target++=(UChar)b;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ sourceIndex=nextSourceIndex;
+ goto fastSingle;
+ } else if(SC0<=b) {
+ if(b<=SC7) {
+ dynamicWindow=(int8_t)(b-SC0);
+ sourceIndex=nextSourceIndex;
+ goto fastSingle;
+ } else /* if(SD0<=b && b<=SD7) */ {
+ dynamicWindow=(int8_t)(b-SD0);
+ state=defineOne;
+ }
+ } else if(/* SQ0<=b && */ b<=SQ7) {
+ quoteWindow=(int8_t)(b-SQ0);
+ state=quoteOne;
+ } else if(b==SDX) {
+ state=definePairOne;
+ } else if(b==SQU) {
+ state=quotePairOne;
+ } else if(b==SCU) {
+ sourceIndex=nextSourceIndex;
+ isSingleByteMode=FALSE;
+ goto fastUnicode;
+ } else /* Srs */ {
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ cnv->toUBytes[0]=b;
+ cnv->toULength=1;
+ goto endloop;
+ }
+
+ /* store the first byte of a multibyte sequence in toUBytes[] */
+ cnv->toUBytes[0]=b;
+ cnv->toULength=1;
+ break;
+ case quotePairOne:
+ byteOne=b;
+ cnv->toUBytes[1]=b;
+ cnv->toULength=2;
+ state=quotePairTwo;
+ break;
+ case quotePairTwo:
+ *target++=(UChar)((byteOne<<8)|b);
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ sourceIndex=nextSourceIndex;
+ state=readCommand;
+ goto fastSingle;
+ case quoteOne:
+ if(b<0x80) {
+ /* all static offsets are in the BMP */
+ *target++=(UChar)(staticOffsets[quoteWindow]+b);
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ } else {
+ /* write from dynamic window */
+ uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
+ if(c<=0xffff) {
+ *target++=(UChar)c;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ } else {
+ /* output surrogate pair */
+ *target++=(UChar)(0xd7c0+(c>>10));
+ if(target<targetLimit) {
+ *target++=(UChar)(0xdc00|(c&0x3ff));
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex;
+ }
+ } else {
+ /* target overflow */
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
+ cnv->UCharErrorBufferLength=1;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ goto endloop;
+ }
+ }
+ }
+ sourceIndex=nextSourceIndex;
+ state=readCommand;
+ goto fastSingle;
+ case definePairOne:
+ dynamicWindow=(int8_t)((b>>5)&7);
+ byteOne=(uint8_t)(b&0x1f);
+ cnv->toUBytes[1]=b;
+ cnv->toULength=2;
+ state=definePairTwo;
+ break;
+ case definePairTwo:
+ scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
+ sourceIndex=nextSourceIndex;
+ state=readCommand;
+ goto fastSingle;
+ case defineOne:
+ if(b==0) {
+ /* callback(illegal): Reserved window offset value 0 */
+ cnv->toUBytes[1]=b;
+ cnv->toULength=2;
+ goto endloop;
+ } else if(b<gapThreshold) {
+ scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
+ } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
+ scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
+ } else if(b>=fixedThreshold) {
+ scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
+ } else {
+ /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
+ cnv->toUBytes[1]=b;
+ cnv->toULength=2;
+ goto endloop;
+ }
+ sourceIndex=nextSourceIndex;
+ state=readCommand;
+ goto fastSingle;
+ }
+ }
+ } else {
+ /* fast path for Unicode mode */
+ if(state==readCommand) {
+fastUnicode:
+ while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
+ *target++=(UChar)((b<<8)|source[1]);
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ sourceIndex=nextSourceIndex;
+ nextSourceIndex+=2;
+ source+=2;
+ }
+ }
+
+ /* normal state machine for Unicode mode */
+/* unicodeByteMode: */
+ while(source<sourceLimit) {
+ if(target>=targetLimit) {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ b=*source++;
+ ++nextSourceIndex;
+ switch(state) {
+ case readCommand:
+ if((uint8_t)(b-UC0)>(Urs-UC0)) {
+ byteOne=b;
+ cnv->toUBytes[0]=b;
+ cnv->toULength=1;
+ state=quotePairTwo;
+ } else if(/* UC0<=b && */ b<=UC7) {
+ dynamicWindow=(int8_t)(b-UC0);
+ sourceIndex=nextSourceIndex;
+ isSingleByteMode=TRUE;
+ goto fastSingle;
+ } else if(/* UD0<=b && */ b<=UD7) {
+ dynamicWindow=(int8_t)(b-UD0);
+ isSingleByteMode=TRUE;
+ cnv->toUBytes[0]=b;
+ cnv->toULength=1;
+ state=defineOne;
+ goto singleByteMode;
+ } else if(b==UDX) {
+ isSingleByteMode=TRUE;
+ cnv->toUBytes[0]=b;
+ cnv->toULength=1;
+ state=definePairOne;
+ goto singleByteMode;
+ } else if(b==UQU) {
+ cnv->toUBytes[0]=b;
+ cnv->toULength=1;
+ state=quotePairOne;
+ } else /* Urs */ {
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ cnv->toUBytes[0]=b;
+ cnv->toULength=1;
+ goto endloop;
+ }
+ break;
+ case quotePairOne:
+ byteOne=b;
+ cnv->toUBytes[1]=b;
+ cnv->toULength=2;
+ state=quotePairTwo;
+ break;
+ case quotePairTwo:
+ *target++=(UChar)((byteOne<<8)|b);
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ sourceIndex=nextSourceIndex;
+ state=readCommand;
+ goto fastUnicode;
+ }
+ }
+ }
+endloop:
+
+ /* set the converter state back into UConverter */
+ if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
+ /* reset to deal with the next character */
+ state=readCommand;
+ } else if(state==readCommand) {
+ /* not in a multi-byte sequence, reset toULength */
+ cnv->toULength=0;
+ }
+ scsu->toUIsSingleByteMode=isSingleByteMode;
+ scsu->toUState=state;
+ scsu->toUQuoteWindow=quoteWindow;
+ scsu->toUDynamicWindow=dynamicWindow;
+ scsu->toUByteOne=byteOne;
+
+ /* write back the updated pointers */
+ pArgs->source=(const char *)source;
+ pArgs->target=target;
+ pArgs->offsets=offsets;
+ return;
+}
+
+/*
+ * Identical to _SCSUToUnicodeWithOffsets but without offset handling.
+ * If a change is made in the original function, then either
+ * change this function the same way or
+ * re-copy the original function and remove the variables
+ * offsets, sourceIndex, and nextSourceIndex.
+ */
+static void U_CALLCONV
+_SCSUToUnicode(UConverterToUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ SCSUData *scsu;
+ const uint8_t *source, *sourceLimit;
+ UChar *target;
+ const UChar *targetLimit;
+ UBool isSingleByteMode;
+ uint8_t state, byteOne;
+ int8_t quoteWindow, dynamicWindow;
+
+ uint8_t b;
+
+ /* set up the local pointers */
+ cnv=pArgs->converter;
+ scsu=(SCSUData *)cnv->extraInfo;
+
+ source=(const uint8_t *)pArgs->source;
+ sourceLimit=(const uint8_t *)pArgs->sourceLimit;
+ target=pArgs->target;
+ targetLimit=pArgs->targetLimit;
+
+ /* get the state machine state */
+ isSingleByteMode=scsu->toUIsSingleByteMode;
+ state=scsu->toUState;
+ quoteWindow=scsu->toUQuoteWindow;
+ dynamicWindow=scsu->toUDynamicWindow;
+ byteOne=scsu->toUByteOne;
+
+ /*
+ * conversion "loop"
+ *
+ * For performance, this is not a normal C loop.
+ * Instead, there are two code blocks for the two SCSU modes.
+ * The function branches to either one, and a change of the mode is done with a goto to
+ * the other branch.
+ *
+ * Each branch has two conventional loops:
+ * - a fast-path loop for the most common codes in the mode
+ * - a loop for all other codes in the mode
+ * When the fast-path runs into a code that it cannot handle, its loop ends and it
+ * runs into the following loop to handle the other codes.
+ * The end of the input or output buffer is also handled by the slower loop.
+ * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
+ *
+ * The callback handling is done by returning with an error code.
+ * The conversion framework actually calls the callback function.
+ */
+ if(isSingleByteMode) {
+ /* fast path for single-byte mode */
+ if(state==readCommand) {
+fastSingle:
+ while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
+ ++source;
+ if(b<=0x7f) {
+ /* write US-ASCII graphic character or DEL */
+ *target++=(UChar)b;
+ } else {
+ /* write from dynamic window */
+ uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
+ if(c<=0xffff) {
+ *target++=(UChar)c;
+ } else {
+ /* output surrogate pair */
+ *target++=(UChar)(0xd7c0+(c>>10));
+ if(target<targetLimit) {
+ *target++=(UChar)(0xdc00|(c&0x3ff));
+ } else {
+ /* target overflow */
+ cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
+ cnv->UCharErrorBufferLength=1;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ goto endloop;
+ }
+ }
+ }
+ }
+ }
+
+ /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
+singleByteMode:
+ while(source<sourceLimit) {
+ if(target>=targetLimit) {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ b=*source++;
+ switch(state) {
+ case readCommand:
+ /* redundant conditions are commented out */
+ /* here: b<0x20 because otherwise we would be in fastSingle */
+ if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
+ /* CR/LF/TAB/NUL */
+ *target++=(UChar)b;
+ goto fastSingle;
+ } else if(SC0<=b) {
+ if(b<=SC7) {
+ dynamicWindow=(int8_t)(b-SC0);
+ goto fastSingle;
+ } else /* if(SD0<=b && b<=SD7) */ {
+ dynamicWindow=(int8_t)(b-SD0);
+ state=defineOne;
+ }
+ } else if(/* SQ0<=b && */ b<=SQ7) {
+ quoteWindow=(int8_t)(b-SQ0);
+ state=quoteOne;
+ } else if(b==SDX) {
+ state=definePairOne;
+ } else if(b==SQU) {
+ state=quotePairOne;
+ } else if(b==SCU) {
+ isSingleByteMode=FALSE;
+ goto fastUnicode;
+ } else /* Srs */ {
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ cnv->toUBytes[0]=b;
+ cnv->toULength=1;
+ goto endloop;
+ }
+
+ /* store the first byte of a multibyte sequence in toUBytes[] */
+ cnv->toUBytes[0]=b;
+ cnv->toULength=1;
+ break;
+ case quotePairOne:
+ byteOne=b;
+ cnv->toUBytes[1]=b;
+ cnv->toULength=2;
+ state=quotePairTwo;
+ break;
+ case quotePairTwo:
+ *target++=(UChar)((byteOne<<8)|b);
+ state=readCommand;
+ goto fastSingle;
+ case quoteOne:
+ if(b<0x80) {
+ /* all static offsets are in the BMP */
+ *target++=(UChar)(staticOffsets[quoteWindow]+b);
+ } else {
+ /* write from dynamic window */
+ uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
+ if(c<=0xffff) {
+ *target++=(UChar)c;
+ } else {
+ /* output surrogate pair */
+ *target++=(UChar)(0xd7c0+(c>>10));
+ if(target<targetLimit) {
+ *target++=(UChar)(0xdc00|(c&0x3ff));
+ } else {
+ /* target overflow */
+ cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
+ cnv->UCharErrorBufferLength=1;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ goto endloop;
+ }
+ }
+ }
+ state=readCommand;
+ goto fastSingle;
+ case definePairOne:
+ dynamicWindow=(int8_t)((b>>5)&7);
+ byteOne=(uint8_t)(b&0x1f);
+ cnv->toUBytes[1]=b;
+ cnv->toULength=2;
+ state=definePairTwo;
+ break;
+ case definePairTwo:
+ scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
+ state=readCommand;
+ goto fastSingle;
+ case defineOne:
+ if(b==0) {
+ /* callback(illegal): Reserved window offset value 0 */
+ cnv->toUBytes[1]=b;
+ cnv->toULength=2;
+ goto endloop;
+ } else if(b<gapThreshold) {
+ scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
+ } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
+ scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
+ } else if(b>=fixedThreshold) {
+ scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
+ } else {
+ /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
+ cnv->toUBytes[1]=b;
+ cnv->toULength=2;
+ goto endloop;
+ }
+ state=readCommand;
+ goto fastSingle;
+ }
+ }
+ } else {
+ /* fast path for Unicode mode */
+ if(state==readCommand) {
+fastUnicode:
+ while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
+ *target++=(UChar)((b<<8)|source[1]);
+ source+=2;
+ }
+ }
+
+ /* normal state machine for Unicode mode */
+/* unicodeByteMode: */
+ while(source<sourceLimit) {
+ if(target>=targetLimit) {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ b=*source++;
+ switch(state) {
+ case readCommand:
+ if((uint8_t)(b-UC0)>(Urs-UC0)) {
+ byteOne=b;
+ cnv->toUBytes[0]=b;
+ cnv->toULength=1;
+ state=quotePairTwo;
+ } else if(/* UC0<=b && */ b<=UC7) {
+ dynamicWindow=(int8_t)(b-UC0);
+ isSingleByteMode=TRUE;
+ goto fastSingle;
+ } else if(/* UD0<=b && */ b<=UD7) {
+ dynamicWindow=(int8_t)(b-UD0);
+ isSingleByteMode=TRUE;
+ cnv->toUBytes[0]=b;
+ cnv->toULength=1;
+ state=defineOne;
+ goto singleByteMode;
+ } else if(b==UDX) {
+ isSingleByteMode=TRUE;
+ cnv->toUBytes[0]=b;
+ cnv->toULength=1;
+ state=definePairOne;
+ goto singleByteMode;
+ } else if(b==UQU) {
+ cnv->toUBytes[0]=b;
+ cnv->toULength=1;
+ state=quotePairOne;
+ } else /* Urs */ {
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ cnv->toUBytes[0]=b;
+ cnv->toULength=1;
+ goto endloop;
+ }
+ break;
+ case quotePairOne:
+ byteOne=b;
+ cnv->toUBytes[1]=b;
+ cnv->toULength=2;
+ state=quotePairTwo;
+ break;
+ case quotePairTwo:
+ *target++=(UChar)((byteOne<<8)|b);
+ state=readCommand;
+ goto fastUnicode;
+ }
+ }
+ }
+endloop:
+
+ /* set the converter state back into UConverter */
+ if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
+ /* reset to deal with the next character */
+ state=readCommand;
+ } else if(state==readCommand) {
+ /* not in a multi-byte sequence, reset toULength */
+ cnv->toULength=0;
+ }
+ scsu->toUIsSingleByteMode=isSingleByteMode;
+ scsu->toUState=state;
+ scsu->toUQuoteWindow=quoteWindow;
+ scsu->toUDynamicWindow=dynamicWindow;
+ scsu->toUByteOne=byteOne;
+
+ /* write back the updated pointers */
+ pArgs->source=(const char *)source;
+ pArgs->target=target;
+ return;
+}
+U_CDECL_END
+/* SCSU-from-Unicode conversion functions ----------------------------------- */
+
+/*
+ * This SCSU Encoder is fairly simple but uses all SCSU commands to achieve
+ * reasonable results. The lookahead is minimal.
+ * Many cases are simple:
+ * A character fits directly into the current mode, a dynamic or static window,
+ * or is not compressible. These cases are tested first.
+ * Real compression heuristics are applied to the rest, in code branches for
+ * single/Unicode mode and BMP/supplementary code points.
+ * The heuristics used here are extremely simple.
+ */
+
+/* get the number of the window that this character is in, or -1 */
+static int8_t
+getWindow(const uint32_t offsets[8], uint32_t c) {
+ int i;
+ for(i=0; i<8; ++i) {
+ if((uint32_t)(c-offsets[i])<=0x7f) {
+ return (int8_t)(i);
+ }
+ }
+ return -1;
+}
+
+/* is the character in the dynamic window starting at the offset, or in the direct-encoded range? */
+static UBool
+isInOffsetWindowOrDirect(uint32_t offset, uint32_t c) {
+ return (UBool)(c<=offset+0x7f &&
+ (c>=offset || (c<=0x7f &&
+ (c>=0x20 || (1UL<<c)&0x2601))));
+ /* binary 0010 0110 0000 0001,
+ check for b==0xd || b==0xa || b==9 || b==0 */
+}
+
+/*
+ * getNextDynamicWindow returns the next dynamic window to be redefined
+ */
+static int8_t
+getNextDynamicWindow(SCSUData *scsu) {
+ int8_t window=scsu->windowUse[scsu->nextWindowUseIndex];
+ if(++scsu->nextWindowUseIndex==8) {
+ scsu->nextWindowUseIndex=0;
+ }
+ return window;
+}
+
+/*
+ * useDynamicWindow() adjusts
+ * windowUse[] and nextWindowUseIndex for the algorithm to choose
+ * the next dynamic window to be defined;
+ * a subclass may override it and provide its own algorithm.
+ */
+static void
+useDynamicWindow(SCSUData *scsu, int8_t window) {
+ /*
+ * move the existing window, which just became the most recently used one,
+ * up in windowUse[] to nextWindowUseIndex-1
+ */
+
+ /* first, find the index of the window - backwards to favor the more recently used windows */
+ int i, j;
+
+ i=scsu->nextWindowUseIndex;
+ do {
+ if(--i<0) {
+ i=7;
+ }
+ } while(scsu->windowUse[i]!=window);
+
+ /* now copy each windowUse[i+1] to [i] */
+ j=i+1;
+ if(j==8) {
+ j=0;
+ }
+ while(j!=scsu->nextWindowUseIndex) {
+ scsu->windowUse[i]=scsu->windowUse[j];
+ i=j;
+ if(++j==8) { j=0; }
+ }
+
+ /* finally, set the window into the most recently used index */
+ scsu->windowUse[i]=window;
+}
+
+/*
+ * calculate the offset and the code for a dynamic window that contains the character
+ * takes fixed offsets into account
+ * the offset of the window is stored in the offset variable,
+ * the code is returned
+ *
+ * return offset code: -1 none <=0xff code for SDn/UDn else code for SDX/UDX, subtract 0x200 to get the true code
+ */
+static int
+getDynamicOffset(uint32_t c, uint32_t *pOffset) {
+ int i;
+
+ for(i=0; i<7; ++i) {
+ if((uint32_t)(c-fixedOffsets[i])<=0x7f) {
+ *pOffset=fixedOffsets[i];
+ return 0xf9+i;
+ }
+ }
+
+ if(c<0x80) {
+ /* No dynamic window for US-ASCII. */
+ return -1;
+ } else if(c<0x3400 ||
+ (uint32_t)(c-0x10000)<(0x14000-0x10000) ||
+ (uint32_t)(c-0x1d000)<=(0x1ffff-0x1d000)
+ ) {
+ /* This character is in a code range for a "small", i.e., reasonably windowable, script. */
+ *pOffset=c&0x7fffff80;
+ return (int)(c>>7);
+ } else if(0xe000<=c && c!=0xfeff && c<0xfff0) {
+ /* For these characters we need to take the gapOffset into account. */
+ *pOffset=c&0x7fffff80;
+ return (int)((c-gapOffset)>>7);
+ } else {
+ return -1;
+ }
+}
+U_CDECL_BEGIN
+/*
+ * Idea for compression:
+ * - save SCSUData and other state before really starting work
+ * - at endloop, see if compression could be better with just unicode mode
+ * - don't do this if a callback has been called
+ * - if unicode mode would be smaller, then override the results with it - may need SCU at the beginning
+ * - different buffer handling!
+ *
+ * Drawback or need for corrective handling:
+ * it is desirable to encode U+feff as SQU fe ff for the SCSU signature, and
+ * it is desirable to start a document in US-ASCII/Latin-1 for as long as possible
+ * not only for compression but also for HTML/XML documents with following charset/encoding announcers.
+ *
+ * How to achieve both?
+ * - Only replace the result after an SDX or SCU?
+ */
+
+static void U_CALLCONV
+_SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ SCSUData *scsu;
+ const UChar *source, *sourceLimit;
+ uint8_t *target;
+ int32_t targetCapacity;
+ int32_t *offsets;
+
+ UBool isSingleByteMode;
+ uint8_t dynamicWindow;
+ uint32_t currentOffset;
+
+ uint32_t c, delta;
+
+ int32_t sourceIndex, nextSourceIndex;
+
+ int32_t length;
+
+ /* variables for compression heuristics */
+ uint32_t offset;
+ UChar lead, trail;
+ int code;
+ int8_t window;
+
+ /* set up the local pointers */
+ cnv=pArgs->converter;
+ scsu=(SCSUData *)cnv->extraInfo;
+
+ /* set up the local pointers */
+ source=pArgs->source;
+ sourceLimit=pArgs->sourceLimit;
+ target=(uint8_t *)pArgs->target;
+ targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
+ offsets=pArgs->offsets;
+
+ /* get the state machine state */
+ isSingleByteMode=scsu->fromUIsSingleByteMode;
+ dynamicWindow=scsu->fromUDynamicWindow;
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
+
+ c=cnv->fromUChar32;
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex= c==0 ? 0 : -1;
+ nextSourceIndex=0;
+
+ /* similar conversion "loop" as in toUnicode */
+loop:
+ if(isSingleByteMode) {
+ if(c!=0 && targetCapacity>0) {
+ goto getTrailSingle;
+ }
+
+ /* state machine for single-byte mode */
+/* singleByteMode: */
+ while(source<sourceLimit) {
+ if(targetCapacity<=0) {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ c=*source++;
+ ++nextSourceIndex;
+
+ if((c-0x20)<=0x5f) {
+ /* pass US-ASCII graphic character through */
+ *target++=(uint8_t)c;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ --targetCapacity;
+ } else if(c<0x20) {
+ if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
+ /* CR/LF/TAB/NUL */
+ *target++=(uint8_t)c;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ --targetCapacity;
+ } else {
+ /* quote C0 control character */
+ c|=SQ0<<8;
+ length=2;
+ goto outputBytes;
+ }
+ } else if((delta=c-currentOffset)<=0x7f) {
+ /* use the current dynamic window */
+ *target++=(uint8_t)(delta|0x80);
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ --targetCapacity;
+ } else if(U16_IS_SURROGATE(c)) {
+ if(U16_IS_SURROGATE_LEAD(c)) {
+getTrailSingle:
+ lead=(UChar)c;
+ if(source<sourceLimit) {
+ /* test the following code unit */
+ trail=*source;
+ if(U16_IS_TRAIL(trail)) {
+ ++source;
+ ++nextSourceIndex;
+ c=U16_GET_SUPPLEMENTARY(c, trail);
+ /* convert this surrogate code point */
+ /* exit this condition tree */
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ goto endloop;
+ }
+ } else {
+ /* no more input */
+ break;
+ }
+ } else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ goto endloop;
+ }
+
+ /* compress supplementary character U+10000..U+10ffff */
+ if((delta=c-currentOffset)<=0x7f) {
+ /* use the current dynamic window */
+ *target++=(uint8_t)(delta|0x80);
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ --targetCapacity;
+ } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
+ /* there is a dynamic window that contains this character, change to it */
+ dynamicWindow=window;
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
+ useDynamicWindow(scsu, dynamicWindow);
+ c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
+ length=2;
+ goto outputBytes;
+ } else if((code=getDynamicOffset(c, &offset))>=0) {
+ /* might check if there are more characters in this window to come */
+ /* define an extended window with this character */
+ code-=0x200;
+ dynamicWindow=getNextDynamicWindow(scsu);
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
+ useDynamicWindow(scsu, dynamicWindow);
+ c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
+ length=4;
+ goto outputBytes;
+ } else {
+ /* change to Unicode mode and output this (lead, trail) pair */
+ isSingleByteMode=FALSE;
+ *target++=(uint8_t)SCU;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ --targetCapacity;
+ c=((uint32_t)lead<<16)|trail;
+ length=4;
+ goto outputBytes;
+ }
+ } else if(c<0xa0) {
+ /* quote C1 control character */
+ c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
+ length=2;
+ goto outputBytes;
+ } else if(c==0xfeff || c>=0xfff0) {
+ /* quote signature character=byte order mark and specials */
+ c|=SQU<<16;
+ length=3;
+ goto outputBytes;
+ } else {
+ /* compress all other BMP characters */
+ if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
+ /* there is a window defined that contains this character - switch to it or quote from it? */
+ if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
+ /* change to dynamic window */
+ dynamicWindow=window;
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
+ useDynamicWindow(scsu, dynamicWindow);
+ c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
+ length=2;
+ goto outputBytes;
+ } else {
+ /* quote from dynamic window */
+ c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
+ length=2;
+ goto outputBytes;
+ }
+ } else if((window=getWindow(staticOffsets, c))>=0) {
+ /* quote from static window */
+ c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
+ length=2;
+ goto outputBytes;
+ } else if((code=getDynamicOffset(c, &offset))>=0) {
+ /* define a dynamic window with this character */
+ dynamicWindow=getNextDynamicWindow(scsu);
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
+ useDynamicWindow(scsu, dynamicWindow);
+ c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
+ length=3;
+ goto outputBytes;
+ } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) &&
+ (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400))
+ ) {
+ /*
+ * this character is not compressible (a BMP ideograph or similar);
+ * switch to Unicode mode if this is the last character in the block
+ * or there is at least one more ideograph following immediately
+ */
+ isSingleByteMode=FALSE;
+ c|=SCU<<16;
+ length=3;
+ goto outputBytes;
+ } else {
+ /* quote Unicode */
+ c|=SQU<<16;
+ length=3;
+ goto outputBytes;
+ }
+ }
+
+ /* normal end of conversion: prepare for a new character */
+ c=0;
+ sourceIndex=nextSourceIndex;
+ }
+ } else {
+ if(c!=0 && targetCapacity>0) {
+ goto getTrailUnicode;
+ }
+
+ /* state machine for Unicode mode */
+/* unicodeByteMode: */
+ while(source<sourceLimit) {
+ if(targetCapacity<=0) {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ c=*source++;
+ ++nextSourceIndex;
+
+ if((uint32_t)(c-0x3400)<(0xd800-0x3400)) {
+ /* not compressible, write character directly */
+ if(targetCapacity>=2) {
+ *target++=(uint8_t)(c>>8);
+ *target++=(uint8_t)c;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex;
+ }
+ targetCapacity-=2;
+ } else {
+ length=2;
+ goto outputBytes;
+ }
+ } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) {
+ /* compress BMP character if the following one is not an uncompressible ideograph */
+ if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
+ if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) {
+ /* ASCII digit or letter */
+ isSingleByteMode=TRUE;
+ c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
+ length=2;
+ goto outputBytes;
+ } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
+ /* there is a dynamic window that contains this character, change to it */
+ isSingleByteMode=TRUE;
+ dynamicWindow=window;
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
+ useDynamicWindow(scsu, dynamicWindow);
+ c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
+ length=2;
+ goto outputBytes;
+ } else if((code=getDynamicOffset(c, &offset))>=0) {
+ /* define a dynamic window with this character */
+ isSingleByteMode=TRUE;
+ dynamicWindow=getNextDynamicWindow(scsu);
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
+ useDynamicWindow(scsu, dynamicWindow);
+ c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
+ length=3;
+ goto outputBytes;
+ }
+ }
+
+ /* don't know how to compress this character, just write it directly */
+ length=2;
+ goto outputBytes;
+ } else if(c<0xe000) {
+ /* c is a surrogate */
+ if(U16_IS_SURROGATE_LEAD(c)) {
+getTrailUnicode:
+ lead=(UChar)c;
+ if(source<sourceLimit) {
+ /* test the following code unit */
+ trail=*source;
+ if(U16_IS_TRAIL(trail)) {
+ ++source;
+ ++nextSourceIndex;
+ c=U16_GET_SUPPLEMENTARY(c, trail);
+ /* convert this surrogate code point */
+ /* exit this condition tree */
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ goto endloop;
+ }
+ } else {
+ /* no more input */
+ break;
+ }
+ } else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ goto endloop;
+ }
+
+ /* compress supplementary character */
+ if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
+ !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
+ ) {
+ /*
+ * there is a dynamic window that contains this character and
+ * the following character is not uncompressible,
+ * change to the window
+ */
+ isSingleByteMode=TRUE;
+ dynamicWindow=window;
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
+ useDynamicWindow(scsu, dynamicWindow);
+ c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
+ length=2;
+ goto outputBytes;
+ } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
+ (code=getDynamicOffset(c, &offset))>=0
+ ) {
+ /* two supplementary characters in (probably) the same window - define an extended one */
+ isSingleByteMode=TRUE;
+ code-=0x200;
+ dynamicWindow=getNextDynamicWindow(scsu);
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
+ useDynamicWindow(scsu, dynamicWindow);
+ c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
+ length=4;
+ goto outputBytes;
+ } else {
+ /* don't know how to compress this character, just write it directly */
+ c=((uint32_t)lead<<16)|trail;
+ length=4;
+ goto outputBytes;
+ }
+ } else /* 0xe000<=c<0xf300 */ {
+ /* quote to avoid SCSU tags */
+ c|=UQU<<16;
+ length=3;
+ goto outputBytes;
+ }
+
+ /* normal end of conversion: prepare for a new character */
+ c=0;
+ sourceIndex=nextSourceIndex;
+ }
+ }
+endloop:
+
+ /* set the converter state back into UConverter */
+ scsu->fromUIsSingleByteMode=isSingleByteMode;
+ scsu->fromUDynamicWindow=dynamicWindow;
+
+ cnv->fromUChar32=c;
+
+ /* write back the updated pointers */
+ pArgs->source=source;
+ pArgs->target=(char *)target;
+ pArgs->offsets=offsets;
+ return;
+
+outputBytes:
+ /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
+ /* from the first if in the loop we know that targetCapacity>0 */
+ if(length<=targetCapacity) {
+ if(offsets==NULL) {
+ switch(length) {
+ /* each branch falls through to the next one */
+ case 4:
+ *target++=(uint8_t)(c>>24);
+ U_FALLTHROUGH;
+ case 3:
+ *target++=(uint8_t)(c>>16);
+ U_FALLTHROUGH;
+ case 2:
+ *target++=(uint8_t)(c>>8);
+ U_FALLTHROUGH;
+ case 1:
+ *target++=(uint8_t)c;
+ U_FALLTHROUGH;
+ default:
+ /* will never occur */
+ break;
+ }
+ } else {
+ switch(length) {
+ /* each branch falls through to the next one */
+ case 4:
+ *target++=(uint8_t)(c>>24);
+ *offsets++=sourceIndex;
+ U_FALLTHROUGH;
+ case 3:
+ *target++=(uint8_t)(c>>16);
+ *offsets++=sourceIndex;
+ U_FALLTHROUGH;
+ case 2:
+ *target++=(uint8_t)(c>>8);
+ *offsets++=sourceIndex;
+ U_FALLTHROUGH;
+ case 1:
+ *target++=(uint8_t)c;
+ *offsets++=sourceIndex;
+ U_FALLTHROUGH;
+ default:
+ /* will never occur */
+ break;
+ }
+ }
+ targetCapacity-=length;
+
+ /* normal end of conversion: prepare for a new character */
+ c=0;
+ sourceIndex=nextSourceIndex;
+ goto loop;
+ } else {
+ uint8_t *p;
+
+ /*
+ * We actually do this backwards here:
+ * In order to save an intermediate variable, we output
+ * first to the overflow buffer what does not fit into the
+ * regular target.
+ */
+ /* we know that 0<=targetCapacity<length<=4 */
+ /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
+ length-=targetCapacity;
+ p=(uint8_t *)cnv->charErrorBuffer;
+ switch(length) {
+ /* each branch falls through to the next one */
+ case 4:
+ *p++=(uint8_t)(c>>24);
+ U_FALLTHROUGH;
+ case 3:
+ *p++=(uint8_t)(c>>16);
+ U_FALLTHROUGH;
+ case 2:
+ *p++=(uint8_t)(c>>8);
+ U_FALLTHROUGH;
+ case 1:
+ *p=(uint8_t)c;
+ U_FALLTHROUGH;
+ default:
+ /* will never occur */
+ break;
+ }
+ cnv->charErrorBufferLength=(int8_t)length;
+
+ /* now output what fits into the regular target */
+ c>>=8*length; /* length was reduced by targetCapacity */
+ switch(targetCapacity) {
+ /* each branch falls through to the next one */
+ case 3:
+ *target++=(uint8_t)(c>>16);
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ U_FALLTHROUGH;
+ case 2:
+ *target++=(uint8_t)(c>>8);
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ U_FALLTHROUGH;
+ case 1:
+ *target++=(uint8_t)c;
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex;
+ }
+ U_FALLTHROUGH;
+ default:
+ break;
+ }
+
+ /* target overflow */
+ targetCapacity=0;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ c=0;
+ goto endloop;
+ }
+}
+
+/*
+ * Identical to _SCSUFromUnicodeWithOffsets but without offset handling.
+ * If a change is made in the original function, then either
+ * change this function the same way or
+ * re-copy the original function and remove the variables
+ * offsets, sourceIndex, and nextSourceIndex.
+ */
+static void U_CALLCONV
+_SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ SCSUData *scsu;
+ const UChar *source, *sourceLimit;
+ uint8_t *target;
+ int32_t targetCapacity;
+
+ UBool isSingleByteMode;
+ uint8_t dynamicWindow;
+ uint32_t currentOffset;
+
+ uint32_t c, delta;
+
+ int32_t length;
+
+ /* variables for compression heuristics */
+ uint32_t offset;
+ UChar lead, trail;
+ int code;
+ int8_t window;
+
+ /* set up the local pointers */
+ cnv=pArgs->converter;
+ scsu=(SCSUData *)cnv->extraInfo;
+
+ /* set up the local pointers */
+ source=pArgs->source;
+ sourceLimit=pArgs->sourceLimit;
+ target=(uint8_t *)pArgs->target;
+ targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
+
+ /* get the state machine state */
+ isSingleByteMode=scsu->fromUIsSingleByteMode;
+ dynamicWindow=scsu->fromUDynamicWindow;
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
+
+ c=cnv->fromUChar32;
+
+ /* similar conversion "loop" as in toUnicode */
+loop:
+ if(isSingleByteMode) {
+ if(c!=0 && targetCapacity>0) {
+ goto getTrailSingle;
+ }
+
+ /* state machine for single-byte mode */
+/* singleByteMode: */
+ while(source<sourceLimit) {
+ if(targetCapacity<=0) {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ c=*source++;
+
+ if((c-0x20)<=0x5f) {
+ /* pass US-ASCII graphic character through */
+ *target++=(uint8_t)c;
+ --targetCapacity;
+ } else if(c<0x20) {
+ if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
+ /* CR/LF/TAB/NUL */
+ *target++=(uint8_t)c;
+ --targetCapacity;
+ } else {
+ /* quote C0 control character */
+ c|=SQ0<<8;
+ length=2;
+ goto outputBytes;
+ }
+ } else if((delta=c-currentOffset)<=0x7f) {
+ /* use the current dynamic window */
+ *target++=(uint8_t)(delta|0x80);
+ --targetCapacity;
+ } else if(U16_IS_SURROGATE(c)) {
+ if(U16_IS_SURROGATE_LEAD(c)) {
+getTrailSingle:
+ lead=(UChar)c;
+ if(source<sourceLimit) {
+ /* test the following code unit */
+ trail=*source;
+ if(U16_IS_TRAIL(trail)) {
+ ++source;
+ c=U16_GET_SUPPLEMENTARY(c, trail);
+ /* convert this surrogate code point */
+ /* exit this condition tree */
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ goto endloop;
+ }
+ } else {
+ /* no more input */
+ break;
+ }
+ } else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ goto endloop;
+ }
+
+ /* compress supplementary character U+10000..U+10ffff */
+ if((delta=c-currentOffset)<=0x7f) {
+ /* use the current dynamic window */
+ *target++=(uint8_t)(delta|0x80);
+ --targetCapacity;
+ } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
+ /* there is a dynamic window that contains this character, change to it */
+ dynamicWindow=window;
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
+ useDynamicWindow(scsu, dynamicWindow);
+ c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
+ length=2;
+ goto outputBytes;
+ } else if((code=getDynamicOffset(c, &offset))>=0) {
+ /* might check if there are more characters in this window to come */
+ /* define an extended window with this character */
+ code-=0x200;
+ dynamicWindow=getNextDynamicWindow(scsu);
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
+ useDynamicWindow(scsu, dynamicWindow);
+ c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
+ length=4;
+ goto outputBytes;
+ } else {
+ /* change to Unicode mode and output this (lead, trail) pair */
+ isSingleByteMode=FALSE;
+ *target++=(uint8_t)SCU;
+ --targetCapacity;
+ c=((uint32_t)lead<<16)|trail;
+ length=4;
+ goto outputBytes;
+ }
+ } else if(c<0xa0) {
+ /* quote C1 control character */
+ c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
+ length=2;
+ goto outputBytes;
+ } else if(c==0xfeff || c>=0xfff0) {
+ /* quote signature character=byte order mark and specials */
+ c|=SQU<<16;
+ length=3;
+ goto outputBytes;
+ } else {
+ /* compress all other BMP characters */
+ if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
+ /* there is a window defined that contains this character - switch to it or quote from it? */
+ if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
+ /* change to dynamic window */
+ dynamicWindow=window;
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
+ useDynamicWindow(scsu, dynamicWindow);
+ c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
+ length=2;
+ goto outputBytes;
+ } else {
+ /* quote from dynamic window */
+ c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
+ length=2;
+ goto outputBytes;
+ }
+ } else if((window=getWindow(staticOffsets, c))>=0) {
+ /* quote from static window */
+ c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
+ length=2;
+ goto outputBytes;
+ } else if((code=getDynamicOffset(c, &offset))>=0) {
+ /* define a dynamic window with this character */
+ dynamicWindow=getNextDynamicWindow(scsu);
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
+ useDynamicWindow(scsu, dynamicWindow);
+ c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
+ length=3;
+ goto outputBytes;
+ } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) &&
+ (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400))
+ ) {
+ /*
+ * this character is not compressible (a BMP ideograph or similar);
+ * switch to Unicode mode if this is the last character in the block
+ * or there is at least one more ideograph following immediately
+ */
+ isSingleByteMode=FALSE;
+ c|=SCU<<16;
+ length=3;
+ goto outputBytes;
+ } else {
+ /* quote Unicode */
+ c|=SQU<<16;
+ length=3;
+ goto outputBytes;
+ }
+ }
+
+ /* normal end of conversion: prepare for a new character */
+ c=0;
+ }
+ } else {
+ if(c!=0 && targetCapacity>0) {
+ goto getTrailUnicode;
+ }
+
+ /* state machine for Unicode mode */
+/* unicodeByteMode: */
+ while(source<sourceLimit) {
+ if(targetCapacity<=0) {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ c=*source++;
+
+ if((uint32_t)(c-0x3400)<(0xd800-0x3400)) {
+ /* not compressible, write character directly */
+ if(targetCapacity>=2) {
+ *target++=(uint8_t)(c>>8);
+ *target++=(uint8_t)c;
+ targetCapacity-=2;
+ } else {
+ length=2;
+ goto outputBytes;
+ }
+ } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) {
+ /* compress BMP character if the following one is not an uncompressible ideograph */
+ if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
+ if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) {
+ /* ASCII digit or letter */
+ isSingleByteMode=TRUE;
+ c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
+ length=2;
+ goto outputBytes;
+ } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
+ /* there is a dynamic window that contains this character, change to it */
+ isSingleByteMode=TRUE;
+ dynamicWindow=window;
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
+ useDynamicWindow(scsu, dynamicWindow);
+ c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
+ length=2;
+ goto outputBytes;
+ } else if((code=getDynamicOffset(c, &offset))>=0) {
+ /* define a dynamic window with this character */
+ isSingleByteMode=TRUE;
+ dynamicWindow=getNextDynamicWindow(scsu);
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
+ useDynamicWindow(scsu, dynamicWindow);
+ c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
+ length=3;
+ goto outputBytes;
+ }
+ }
+
+ /* don't know how to compress this character, just write it directly */
+ length=2;
+ goto outputBytes;
+ } else if(c<0xe000) {
+ /* c is a surrogate */
+ if(U16_IS_SURROGATE_LEAD(c)) {
+getTrailUnicode:
+ lead=(UChar)c;
+ if(source<sourceLimit) {
+ /* test the following code unit */
+ trail=*source;
+ if(U16_IS_TRAIL(trail)) {
+ ++source;
+ c=U16_GET_SUPPLEMENTARY(c, trail);
+ /* convert this surrogate code point */
+ /* exit this condition tree */
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ goto endloop;
+ }
+ } else {
+ /* no more input */
+ break;
+ }
+ } else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ goto endloop;
+ }
+
+ /* compress supplementary character */
+ if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
+ !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
+ ) {
+ /*
+ * there is a dynamic window that contains this character and
+ * the following character is not uncompressible,
+ * change to the window
+ */
+ isSingleByteMode=TRUE;
+ dynamicWindow=window;
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
+ useDynamicWindow(scsu, dynamicWindow);
+ c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
+ length=2;
+ goto outputBytes;
+ } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
+ (code=getDynamicOffset(c, &offset))>=0
+ ) {
+ /* two supplementary characters in (probably) the same window - define an extended one */
+ isSingleByteMode=TRUE;
+ code-=0x200;
+ dynamicWindow=getNextDynamicWindow(scsu);
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
+ useDynamicWindow(scsu, dynamicWindow);
+ c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
+ length=4;
+ goto outputBytes;
+ } else {
+ /* don't know how to compress this character, just write it directly */
+ c=((uint32_t)lead<<16)|trail;
+ length=4;
+ goto outputBytes;
+ }
+ } else /* 0xe000<=c<0xf300 */ {
+ /* quote to avoid SCSU tags */
+ c|=UQU<<16;
+ length=3;
+ goto outputBytes;
+ }
+
+ /* normal end of conversion: prepare for a new character */
+ c=0;
+ }
+ }
+endloop:
+
+ /* set the converter state back into UConverter */
+ scsu->fromUIsSingleByteMode=isSingleByteMode;
+ scsu->fromUDynamicWindow=dynamicWindow;
+
+ cnv->fromUChar32=c;
+
+ /* write back the updated pointers */
+ pArgs->source=source;
+ pArgs->target=(char *)target;
+ return;
+
+outputBytes:
+ /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
+ /* from the first if in the loop we know that targetCapacity>0 */
+ if(length<=targetCapacity) {
+ switch(length) {
+ /* each branch falls through to the next one */
+ case 4:
+ *target++=(uint8_t)(c>>24);
+ U_FALLTHROUGH;
+ case 3:
+ *target++=(uint8_t)(c>>16);
+ U_FALLTHROUGH;
+ case 2:
+ *target++=(uint8_t)(c>>8);
+ U_FALLTHROUGH;
+ case 1:
+ *target++=(uint8_t)c;
+ U_FALLTHROUGH;
+ default:
+ /* will never occur */
+ break;
+ }
+ targetCapacity-=length;
+
+ /* normal end of conversion: prepare for a new character */
+ c=0;
+ goto loop;
+ } else {
+ uint8_t *p;
+
+ /*
+ * We actually do this backwards here:
+ * In order to save an intermediate variable, we output
+ * first to the overflow buffer what does not fit into the
+ * regular target.
+ */
+ /* we know that 0<=targetCapacity<length<=4 */
+ /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
+ length-=targetCapacity;
+ p=(uint8_t *)cnv->charErrorBuffer;
+ switch(length) {
+ /* each branch falls through to the next one */
+ case 4:
+ *p++=(uint8_t)(c>>24);
+ U_FALLTHROUGH;
+ case 3:
+ *p++=(uint8_t)(c>>16);
+ U_FALLTHROUGH;
+ case 2:
+ *p++=(uint8_t)(c>>8);
+ U_FALLTHROUGH;
+ case 1:
+ *p=(uint8_t)c;
+ U_FALLTHROUGH;
+ default:
+ /* will never occur */
+ break;
+ }
+ cnv->charErrorBufferLength=(int8_t)length;
+
+ /* now output what fits into the regular target */
+ c>>=8*length; /* length was reduced by targetCapacity */
+ switch(targetCapacity) {
+ /* each branch falls through to the next one */
+ case 3:
+ *target++=(uint8_t)(c>>16);
+ U_FALLTHROUGH;
+ case 2:
+ *target++=(uint8_t)(c>>8);
+ U_FALLTHROUGH;
+ case 1:
+ *target++=(uint8_t)c;
+ U_FALLTHROUGH;
+ default:
+ break;
+ }
+
+ /* target overflow */
+ targetCapacity=0;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ c=0;
+ goto endloop;
+ }
+}
+
+/* miscellaneous ------------------------------------------------------------ */
+
+static const char * U_CALLCONV
+_SCSUGetName(const UConverter *cnv) {
+ SCSUData *scsu=(SCSUData *)cnv->extraInfo;
+
+ switch(scsu->locale) {
+ case l_ja:
+ return "SCSU,locale=ja";
+ default:
+ return "SCSU";
+ }
+}
+
+/* structure for SafeClone calculations */
+struct cloneSCSUStruct
+{
+ UConverter cnv;
+ SCSUData mydata;
+};
+
+static UConverter * U_CALLCONV
+_SCSUSafeClone(const UConverter *cnv,
+ void *stackBuffer,
+ int32_t *pBufferSize,
+ UErrorCode *status)
+{
+ struct cloneSCSUStruct * localClone;
+ int32_t bufferSizeNeeded = sizeof(struct cloneSCSUStruct);
+
+ if (U_FAILURE(*status)){
+ return 0;
+ }
+
+ if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
+ *pBufferSize = bufferSizeNeeded;
+ return 0;
+ }
+
+ localClone = (struct cloneSCSUStruct *)stackBuffer;
+ /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
+
+ uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData));
+ localClone->cnv.extraInfo = &localClone->mydata;
+ localClone->cnv.isExtraLocal = TRUE;
+
+ return &localClone->cnv;
+}
+U_CDECL_END
+
+static const UConverterImpl _SCSUImpl={
+ UCNV_SCSU,
+
+ NULL,
+ NULL,
+
+ _SCSUOpen,
+ _SCSUClose,
+ _SCSUReset,
+
+ _SCSUToUnicode,
+ _SCSUToUnicodeWithOffsets,
+ _SCSUFromUnicode,
+ _SCSUFromUnicodeWithOffsets,
+ NULL,
+
+ NULL,
+ _SCSUGetName,
+ NULL,
+ _SCSUSafeClone,
+ ucnv_getCompleteUnicodeSet,
+ NULL,
+ NULL
+};
+
+static const UConverterStaticData _SCSUStaticData={
+ sizeof(UConverterStaticData),
+ "SCSU",
+ 1212, /* CCSID for SCSU */
+ UCNV_IBM, UCNV_SCSU,
+ 1, 3, /* one UChar generates at least 1 byte and at most 3 bytes */
+ /*
+ * The subchar here is ignored because _SCSUOpen() sets U+fffd as a Unicode
+ * substitution string.
+ */
+ { 0x0e, 0xff, 0xfd, 0 }, 3,
+ FALSE, FALSE,
+ 0,
+ 0,
+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
+};
+
+const UConverterSharedData _SCSUData=
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_SCSUStaticData, &_SCSUImpl);
+
+#endif
diff --git a/thirdparty/icu4c/common/ucnvsel.cpp b/thirdparty/icu4c/common/ucnvsel.cpp
new file mode 100644
index 0000000000..2dff5ac1bc
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnvsel.cpp
@@ -0,0 +1,823 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2008-2011, International Business Machines
+* Corporation, Google and others. All Rights Reserved.
+*
+*******************************************************************************
+*/
+// Author : eldawy@google.com (Mohamed Eldawy)
+// ucnvsel.cpp
+//
+// Purpose: To generate a list of encodings capable of handling
+// a given Unicode text
+//
+// Started 09-April-2008
+
+/**
+ * \file
+ *
+ * This is an implementation of an encoding selector.
+ * The goal is, given a unicode string, find the encodings
+ * this string can be mapped to. To make processing faster
+ * a trie is built when you call ucnvsel_open() that
+ * stores all encodings a codepoint can map to
+ */
+
+#include "unicode/ucnvsel.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include <string.h>
+
+#include "unicode/uchar.h"
+#include "unicode/uniset.h"
+#include "unicode/ucnv.h"
+#include "unicode/ustring.h"
+#include "unicode/uchriter.h"
+#include "utrie2.h"
+#include "propsvec.h"
+#include "uassert.h"
+#include "ucmndata.h"
+#include "udataswp.h"
+#include "uenumimp.h"
+#include "cmemory.h"
+#include "cstring.h"
+
+U_NAMESPACE_USE
+
+struct UConverterSelector {
+ UTrie2 *trie; // 16 bit trie containing offsets into pv
+ uint32_t* pv; // table of bits!
+ int32_t pvCount;
+ char** encodings; // which encodings did user ask to use?
+ int32_t encodingsCount;
+ int32_t encodingStrLength;
+ uint8_t* swapped;
+ UBool ownPv, ownEncodingStrings;
+};
+
+static void generateSelectorData(UConverterSelector* result,
+ UPropsVectors *upvec,
+ const USet* excludedCodePoints,
+ const UConverterUnicodeSet whichSet,
+ UErrorCode* status) {
+ if (U_FAILURE(*status)) {
+ return;
+ }
+
+ int32_t columns = (result->encodingsCount+31)/32;
+
+ // set errorValue to all-ones
+ for (int32_t col = 0; col < columns; col++) {
+ upvec_setValue(upvec, UPVEC_ERROR_VALUE_CP, UPVEC_ERROR_VALUE_CP,
+ col, static_cast<uint32_t>(~0), static_cast<uint32_t>(~0), status);
+ }
+
+ for (int32_t i = 0; i < result->encodingsCount; ++i) {
+ uint32_t mask;
+ uint32_t column;
+ int32_t item_count;
+ int32_t j;
+ UConverter* test_converter = ucnv_open(result->encodings[i], status);
+ if (U_FAILURE(*status)) {
+ return;
+ }
+ USet* unicode_point_set;
+ unicode_point_set = uset_open(1, 0); // empty set
+
+ ucnv_getUnicodeSet(test_converter, unicode_point_set,
+ whichSet, status);
+ if (U_FAILURE(*status)) {
+ ucnv_close(test_converter);
+ return;
+ }
+
+ column = i / 32;
+ mask = 1 << (i%32);
+ // now iterate over intervals on set i!
+ item_count = uset_getItemCount(unicode_point_set);
+
+ for (j = 0; j < item_count; ++j) {
+ UChar32 start_char;
+ UChar32 end_char;
+ UErrorCode smallStatus = U_ZERO_ERROR;
+ uset_getItem(unicode_point_set, j, &start_char, &end_char, NULL, 0,
+ &smallStatus);
+ if (U_FAILURE(smallStatus)) {
+ // this will be reached for the converters that fill the set with
+ // strings. Those should be ignored by our system
+ } else {
+ upvec_setValue(upvec, start_char, end_char, column, static_cast<uint32_t>(~0), mask,
+ status);
+ }
+ }
+ ucnv_close(test_converter);
+ uset_close(unicode_point_set);
+ if (U_FAILURE(*status)) {
+ return;
+ }
+ }
+
+ // handle excluded encodings! Simply set their values to all 1's in the upvec
+ if (excludedCodePoints) {
+ int32_t item_count = uset_getItemCount(excludedCodePoints);
+ for (int32_t j = 0; j < item_count; ++j) {
+ UChar32 start_char;
+ UChar32 end_char;
+
+ uset_getItem(excludedCodePoints, j, &start_char, &end_char, NULL, 0,
+ status);
+ for (int32_t col = 0; col < columns; col++) {
+ upvec_setValue(upvec, start_char, end_char, col, static_cast<uint32_t>(~0), static_cast<uint32_t>(~0),
+ status);
+ }
+ }
+ }
+
+ // alright. Now, let's put things in the same exact form you'd get when you
+ // unserialize things.
+ result->trie = upvec_compactToUTrie2WithRowIndexes(upvec, status);
+ result->pv = upvec_cloneArray(upvec, &result->pvCount, NULL, status);
+ result->pvCount *= columns; // number of uint32_t = rows * columns
+ result->ownPv = TRUE;
+}
+
+/* open a selector. If converterListSize is 0, build for all converters.
+ If excludedCodePoints is NULL, don't exclude any codepoints */
+U_CAPI UConverterSelector* U_EXPORT2
+ucnvsel_open(const char* const* converterList, int32_t converterListSize,
+ const USet* excludedCodePoints,
+ const UConverterUnicodeSet whichSet, UErrorCode* status) {
+ // check if already failed
+ if (U_FAILURE(*status)) {
+ return NULL;
+ }
+ // ensure args make sense!
+ if (converterListSize < 0 || (converterList == NULL && converterListSize != 0)) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+
+ // allocate a new converter
+ LocalUConverterSelectorPointer newSelector(
+ (UConverterSelector*)uprv_malloc(sizeof(UConverterSelector)));
+ if (newSelector.isNull()) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ uprv_memset(newSelector.getAlias(), 0, sizeof(UConverterSelector));
+
+ if (converterListSize == 0) {
+ converterList = NULL;
+ converterListSize = ucnv_countAvailable();
+ }
+ newSelector->encodings =
+ (char**)uprv_malloc(converterListSize * sizeof(char*));
+ if (!newSelector->encodings) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ newSelector->encodings[0] = NULL; // now we can call ucnvsel_close()
+
+ // make a backup copy of the list of converters
+ int32_t totalSize = 0;
+ int32_t i;
+ for (i = 0; i < converterListSize; i++) {
+ totalSize +=
+ (int32_t)uprv_strlen(converterList != NULL ? converterList[i] : ucnv_getAvailableName(i)) + 1;
+ }
+ // 4-align the totalSize to 4-align the size of the serialized form
+ int32_t encodingStrPadding = totalSize & 3;
+ if (encodingStrPadding != 0) {
+ encodingStrPadding = 4 - encodingStrPadding;
+ }
+ newSelector->encodingStrLength = totalSize += encodingStrPadding;
+ char* allStrings = (char*) uprv_malloc(totalSize);
+ if (!allStrings) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+
+ for (i = 0; i < converterListSize; i++) {
+ newSelector->encodings[i] = allStrings;
+ uprv_strcpy(newSelector->encodings[i],
+ converterList != NULL ? converterList[i] : ucnv_getAvailableName(i));
+ allStrings += uprv_strlen(newSelector->encodings[i]) + 1;
+ }
+ while (encodingStrPadding > 0) {
+ *allStrings++ = 0;
+ --encodingStrPadding;
+ }
+
+ newSelector->ownEncodingStrings = TRUE;
+ newSelector->encodingsCount = converterListSize;
+ UPropsVectors *upvec = upvec_open((converterListSize+31)/32, status);
+ generateSelectorData(newSelector.getAlias(), upvec, excludedCodePoints, whichSet, status);
+ upvec_close(upvec);
+
+ if (U_FAILURE(*status)) {
+ return NULL;
+ }
+
+ return newSelector.orphan();
+}
+
+/* close opened selector */
+U_CAPI void U_EXPORT2
+ucnvsel_close(UConverterSelector *sel) {
+ if (!sel) {
+ return;
+ }
+ if (sel->ownEncodingStrings) {
+ uprv_free(sel->encodings[0]);
+ }
+ uprv_free(sel->encodings);
+ if (sel->ownPv) {
+ uprv_free(sel->pv);
+ }
+ utrie2_close(sel->trie);
+ uprv_free(sel->swapped);
+ uprv_free(sel);
+}
+
+static const UDataInfo dataInfo = {
+ sizeof(UDataInfo),
+ 0,
+
+ U_IS_BIG_ENDIAN,
+ U_CHARSET_FAMILY,
+ U_SIZEOF_UCHAR,
+ 0,
+
+ { 0x43, 0x53, 0x65, 0x6c }, /* dataFormat="CSel" */
+ { 1, 0, 0, 0 }, /* formatVersion */
+ { 0, 0, 0, 0 } /* dataVersion */
+};
+
+enum {
+ UCNVSEL_INDEX_TRIE_SIZE, // trie size in bytes
+ UCNVSEL_INDEX_PV_COUNT, // number of uint32_t in the bit vectors
+ UCNVSEL_INDEX_NAMES_COUNT, // number of encoding names
+ UCNVSEL_INDEX_NAMES_LENGTH, // number of encoding name bytes including padding
+ UCNVSEL_INDEX_SIZE = 15, // bytes following the DataHeader
+ UCNVSEL_INDEX_COUNT = 16
+};
+
+/*
+ * Serialized form of a UConverterSelector, formatVersion 1:
+ *
+ * The serialized form begins with a standard ICU DataHeader with a UDataInfo
+ * as the template above.
+ * This is followed by:
+ * int32_t indexes[UCNVSEL_INDEX_COUNT]; // see index entry constants above
+ * serialized UTrie2; // indexes[UCNVSEL_INDEX_TRIE_SIZE] bytes
+ * uint32_t pv[indexes[UCNVSEL_INDEX_PV_COUNT]]; // bit vectors
+ * char* encodingNames[indexes[UCNVSEL_INDEX_NAMES_LENGTH]]; // NUL-terminated strings + padding
+ */
+
+/* serialize a selector */
+U_CAPI int32_t U_EXPORT2
+ucnvsel_serialize(const UConverterSelector* sel,
+ void* buffer, int32_t bufferCapacity, UErrorCode* status) {
+ // check if already failed
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+ // ensure args make sense!
+ uint8_t *p = (uint8_t *)buffer;
+ if (bufferCapacity < 0 ||
+ (bufferCapacity > 0 && (p == NULL || (U_POINTER_MASK_LSB(p, 3) != 0)))
+ ) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ // add up the size of the serialized form
+ int32_t serializedTrieSize = utrie2_serialize(sel->trie, NULL, 0, status);
+ if (*status != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(*status)) {
+ return 0;
+ }
+ *status = U_ZERO_ERROR;
+
+ DataHeader header;
+ uprv_memset(&header, 0, sizeof(header));
+ header.dataHeader.headerSize = (uint16_t)((sizeof(header) + 15) & ~15);
+ header.dataHeader.magic1 = 0xda;
+ header.dataHeader.magic2 = 0x27;
+ uprv_memcpy(&header.info, &dataInfo, sizeof(dataInfo));
+
+ int32_t indexes[UCNVSEL_INDEX_COUNT] = {
+ serializedTrieSize,
+ sel->pvCount,
+ sel->encodingsCount,
+ sel->encodingStrLength
+ };
+
+ int32_t totalSize =
+ header.dataHeader.headerSize +
+ (int32_t)sizeof(indexes) +
+ serializedTrieSize +
+ sel->pvCount * 4 +
+ sel->encodingStrLength;
+ indexes[UCNVSEL_INDEX_SIZE] = totalSize - header.dataHeader.headerSize;
+ if (totalSize > bufferCapacity) {
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ return totalSize;
+ }
+ // ok, save!
+ int32_t length = header.dataHeader.headerSize;
+ uprv_memcpy(p, &header, sizeof(header));
+ uprv_memset(p + sizeof(header), 0, length - sizeof(header));
+ p += length;
+
+ length = (int32_t)sizeof(indexes);
+ uprv_memcpy(p, indexes, length);
+ p += length;
+
+ utrie2_serialize(sel->trie, p, serializedTrieSize, status);
+ p += serializedTrieSize;
+
+ length = sel->pvCount * 4;
+ uprv_memcpy(p, sel->pv, length);
+ p += length;
+
+ uprv_memcpy(p, sel->encodings[0], sel->encodingStrLength);
+ p += sel->encodingStrLength;
+
+ return totalSize;
+}
+
+/**
+ * swap a selector into the desired Endianness and Asciiness of
+ * the system. Just as FYI, selectors are always saved in the format
+ * of the system that created them. They are only converted if used
+ * on another system. In other words, selectors created on different
+ * system can be different even if the params are identical (endianness
+ * and Asciiness differences only)
+ *
+ * @param ds pointer to data swapper containing swapping info
+ * @param inData pointer to incoming data
+ * @param length length of inData in bytes
+ * @param outData pointer to output data. Capacity should
+ * be at least equal to capacity of inData
+ * @param status an in/out ICU UErrorCode
+ * @return 0 on failure, number of bytes swapped on success
+ * number of bytes swapped can be smaller than length
+ */
+static int32_t
+ucnvsel_swap(const UDataSwapper *ds,
+ const void *inData, int32_t length,
+ void *outData, UErrorCode *status) {
+ /* udata_swapDataHeader checks the arguments */
+ int32_t headerSize = udata_swapDataHeader(ds, inData, length, outData, status);
+ if(U_FAILURE(*status)) {
+ return 0;
+ }
+
+ /* check data format and format version */
+ const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData + 4);
+ if(!(
+ pInfo->dataFormat[0] == 0x43 && /* dataFormat="CSel" */
+ pInfo->dataFormat[1] == 0x53 &&
+ pInfo->dataFormat[2] == 0x65 &&
+ pInfo->dataFormat[3] == 0x6c
+ )) {
+ udata_printError(ds, "ucnvsel_swap(): data format %02x.%02x.%02x.%02x is not recognized as UConverterSelector data\n",
+ pInfo->dataFormat[0], pInfo->dataFormat[1],
+ pInfo->dataFormat[2], pInfo->dataFormat[3]);
+ *status = U_INVALID_FORMAT_ERROR;
+ return 0;
+ }
+ if(pInfo->formatVersion[0] != 1) {
+ udata_printError(ds, "ucnvsel_swap(): format version %02x is not supported\n",
+ pInfo->formatVersion[0]);
+ *status = U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+
+ if(length >= 0) {
+ length -= headerSize;
+ if(length < 16*4) {
+ udata_printError(ds, "ucnvsel_swap(): too few bytes (%d after header) for UConverterSelector data\n",
+ length);
+ *status = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ }
+
+ const uint8_t *inBytes = (const uint8_t *)inData + headerSize;
+ uint8_t *outBytes = (uint8_t *)outData + headerSize;
+
+ /* read the indexes */
+ const int32_t *inIndexes = (const int32_t *)inBytes;
+ int32_t indexes[16];
+ int32_t i;
+ for(i = 0; i < 16; ++i) {
+ indexes[i] = udata_readInt32(ds, inIndexes[i]);
+ }
+
+ /* get the total length of the data */
+ int32_t size = indexes[UCNVSEL_INDEX_SIZE];
+ if(length >= 0) {
+ if(length < size) {
+ udata_printError(ds, "ucnvsel_swap(): too few bytes (%d after header) for all of UConverterSelector data\n",
+ length);
+ *status = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ /* copy the data for inaccessible bytes */
+ if(inBytes != outBytes) {
+ uprv_memcpy(outBytes, inBytes, size);
+ }
+
+ int32_t offset = 0, count;
+
+ /* swap the int32_t indexes[] */
+ count = UCNVSEL_INDEX_COUNT*4;
+ ds->swapArray32(ds, inBytes, count, outBytes, status);
+ offset += count;
+
+ /* swap the UTrie2 */
+ count = indexes[UCNVSEL_INDEX_TRIE_SIZE];
+ utrie2_swap(ds, inBytes + offset, count, outBytes + offset, status);
+ offset += count;
+
+ /* swap the uint32_t pv[] */
+ count = indexes[UCNVSEL_INDEX_PV_COUNT]*4;
+ ds->swapArray32(ds, inBytes + offset, count, outBytes + offset, status);
+ offset += count;
+
+ /* swap the encoding names */
+ count = indexes[UCNVSEL_INDEX_NAMES_LENGTH];
+ ds->swapInvChars(ds, inBytes + offset, count, outBytes + offset, status);
+ offset += count;
+
+ U_ASSERT(offset == size);
+ }
+
+ return headerSize + size;
+}
+
+/* unserialize a selector */
+U_CAPI UConverterSelector* U_EXPORT2
+ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status) {
+ // check if already failed
+ if (U_FAILURE(*status)) {
+ return NULL;
+ }
+ // ensure args make sense!
+ const uint8_t *p = (const uint8_t *)buffer;
+ if (length <= 0 ||
+ (length > 0 && (p == NULL || (U_POINTER_MASK_LSB(p, 3) != 0)))
+ ) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+ // header
+ if (length < 32) {
+ // not even enough space for a minimal header
+ *status = U_INDEX_OUTOFBOUNDS_ERROR;
+ return NULL;
+ }
+ const DataHeader *pHeader = (const DataHeader *)p;
+ if (!(
+ pHeader->dataHeader.magic1==0xda &&
+ pHeader->dataHeader.magic2==0x27 &&
+ pHeader->info.dataFormat[0] == 0x43 &&
+ pHeader->info.dataFormat[1] == 0x53 &&
+ pHeader->info.dataFormat[2] == 0x65 &&
+ pHeader->info.dataFormat[3] == 0x6c
+ )) {
+ /* header not valid or dataFormat not recognized */
+ *status = U_INVALID_FORMAT_ERROR;
+ return NULL;
+ }
+ if (pHeader->info.formatVersion[0] != 1) {
+ *status = U_UNSUPPORTED_ERROR;
+ return NULL;
+ }
+ uint8_t* swapped = NULL;
+ if (pHeader->info.isBigEndian != U_IS_BIG_ENDIAN ||
+ pHeader->info.charsetFamily != U_CHARSET_FAMILY
+ ) {
+ // swap the data
+ UDataSwapper *ds =
+ udata_openSwapperForInputData(p, length, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, status);
+ int32_t totalSize = ucnvsel_swap(ds, p, -1, NULL, status);
+ if (U_FAILURE(*status)) {
+ udata_closeSwapper(ds);
+ return NULL;
+ }
+ if (length < totalSize) {
+ udata_closeSwapper(ds);
+ *status = U_INDEX_OUTOFBOUNDS_ERROR;
+ return NULL;
+ }
+ swapped = (uint8_t*)uprv_malloc(totalSize);
+ if (swapped == NULL) {
+ udata_closeSwapper(ds);
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ ucnvsel_swap(ds, p, length, swapped, status);
+ udata_closeSwapper(ds);
+ if (U_FAILURE(*status)) {
+ uprv_free(swapped);
+ return NULL;
+ }
+ p = swapped;
+ pHeader = (const DataHeader *)p;
+ }
+ if (length < (pHeader->dataHeader.headerSize + 16 * 4)) {
+ // not even enough space for the header and the indexes
+ uprv_free(swapped);
+ *status = U_INDEX_OUTOFBOUNDS_ERROR;
+ return NULL;
+ }
+ p += pHeader->dataHeader.headerSize;
+ length -= pHeader->dataHeader.headerSize;
+ // indexes
+ const int32_t *indexes = (const int32_t *)p;
+ if (length < indexes[UCNVSEL_INDEX_SIZE]) {
+ uprv_free(swapped);
+ *status = U_INDEX_OUTOFBOUNDS_ERROR;
+ return NULL;
+ }
+ p += UCNVSEL_INDEX_COUNT * 4;
+ // create and populate the selector object
+ UConverterSelector* sel = (UConverterSelector*)uprv_malloc(sizeof(UConverterSelector));
+ char **encodings =
+ (char **)uprv_malloc(
+ indexes[UCNVSEL_INDEX_NAMES_COUNT] * sizeof(char *));
+ if (sel == NULL || encodings == NULL) {
+ uprv_free(swapped);
+ uprv_free(sel);
+ uprv_free(encodings);
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ uprv_memset(sel, 0, sizeof(UConverterSelector));
+ sel->pvCount = indexes[UCNVSEL_INDEX_PV_COUNT];
+ sel->encodings = encodings;
+ sel->encodingsCount = indexes[UCNVSEL_INDEX_NAMES_COUNT];
+ sel->encodingStrLength = indexes[UCNVSEL_INDEX_NAMES_LENGTH];
+ sel->swapped = swapped;
+ // trie
+ sel->trie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
+ p, indexes[UCNVSEL_INDEX_TRIE_SIZE], NULL,
+ status);
+ p += indexes[UCNVSEL_INDEX_TRIE_SIZE];
+ if (U_FAILURE(*status)) {
+ ucnvsel_close(sel);
+ return NULL;
+ }
+ // bit vectors
+ sel->pv = (uint32_t *)p;
+ p += sel->pvCount * 4;
+ // encoding names
+ char* s = (char*)p;
+ for (int32_t i = 0; i < sel->encodingsCount; ++i) {
+ sel->encodings[i] = s;
+ s += uprv_strlen(s) + 1;
+ }
+ p += sel->encodingStrLength;
+
+ return sel;
+}
+
+// a bunch of functions for the enumeration thingie! Nothing fancy here. Just
+// iterate over the selected encodings
+struct Enumerator {
+ int16_t* index;
+ int16_t length;
+ int16_t cur;
+ const UConverterSelector* sel;
+};
+
+U_CDECL_BEGIN
+
+static void U_CALLCONV
+ucnvsel_close_selector_iterator(UEnumeration *enumerator) {
+ uprv_free(((Enumerator*)(enumerator->context))->index);
+ uprv_free(enumerator->context);
+ uprv_free(enumerator);
+}
+
+
+static int32_t U_CALLCONV
+ucnvsel_count_encodings(UEnumeration *enumerator, UErrorCode *status) {
+ // check if already failed
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+ return ((Enumerator*)(enumerator->context))->length;
+}
+
+
+static const char* U_CALLCONV ucnvsel_next_encoding(UEnumeration* enumerator,
+ int32_t* resultLength,
+ UErrorCode* status) {
+ // check if already failed
+ if (U_FAILURE(*status)) {
+ return NULL;
+ }
+
+ int16_t cur = ((Enumerator*)(enumerator->context))->cur;
+ const UConverterSelector* sel;
+ const char* result;
+ if (cur >= ((Enumerator*)(enumerator->context))->length) {
+ return NULL;
+ }
+ sel = ((Enumerator*)(enumerator->context))->sel;
+ result = sel->encodings[((Enumerator*)(enumerator->context))->index[cur] ];
+ ((Enumerator*)(enumerator->context))->cur++;
+ if (resultLength) {
+ *resultLength = (int32_t)uprv_strlen(result);
+ }
+ return result;
+}
+
+static void U_CALLCONV ucnvsel_reset_iterator(UEnumeration* enumerator,
+ UErrorCode* status) {
+ // check if already failed
+ if (U_FAILURE(*status)) {
+ return ;
+ }
+ ((Enumerator*)(enumerator->context))->cur = 0;
+}
+
+U_CDECL_END
+
+
+static const UEnumeration defaultEncodings = {
+ NULL,
+ NULL,
+ ucnvsel_close_selector_iterator,
+ ucnvsel_count_encodings,
+ uenum_unextDefault,
+ ucnvsel_next_encoding,
+ ucnvsel_reset_iterator
+};
+
+
+// internal fn to intersect two sets of masks
+// returns whether the mask has reduced to all zeros
+static UBool intersectMasks(uint32_t* dest, const uint32_t* source1, int32_t len) {
+ int32_t i;
+ uint32_t oredDest = 0;
+ for (i = 0 ; i < len ; ++i) {
+ oredDest |= (dest[i] &= source1[i]);
+ }
+ return oredDest == 0;
+}
+
+// internal fn to count how many 1's are there in a mask
+// algorithm taken from http://graphics.stanford.edu/~seander/bithacks.html
+static int16_t countOnes(uint32_t* mask, int32_t len) {
+ int32_t i, totalOnes = 0;
+ for (i = 0 ; i < len ; ++i) {
+ uint32_t ent = mask[i];
+ for (; ent; totalOnes++)
+ {
+ ent &= ent - 1; // clear the least significant bit set
+ }
+ }
+ return static_cast<int16_t>(totalOnes);
+}
+
+
+/* internal function! */
+static UEnumeration *selectForMask(const UConverterSelector* sel,
+ uint32_t *theMask, UErrorCode *status) {
+ LocalMemory<uint32_t> mask(theMask);
+ // this is the context we will use. Store a table of indices to which
+ // encodings are legit.
+ LocalMemory<Enumerator> result(static_cast<Enumerator *>(uprv_malloc(sizeof(Enumerator))));
+ if (result.isNull()) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return nullptr;
+ }
+ result->index = nullptr; // this will be allocated later!
+ result->length = result->cur = 0;
+ result->sel = sel;
+
+ LocalMemory<UEnumeration> en(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))));
+ if (en.isNull()) {
+ // TODO(markus): Combine Enumerator and UEnumeration into one struct.
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return nullptr;
+ }
+ memcpy(en.getAlias(), &defaultEncodings, sizeof(UEnumeration));
+
+ int32_t columns = (sel->encodingsCount+31)/32;
+ int16_t numOnes = countOnes(mask.getAlias(), columns);
+ // now, we know the exact space we need for index
+ if (numOnes > 0) {
+ result->index = static_cast<int16_t*>(uprv_malloc(numOnes * sizeof(int16_t)));
+ if (result->index == nullptr) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return nullptr;
+ }
+ int32_t i, j;
+ int16_t k = 0;
+ for (j = 0 ; j < columns; j++) {
+ uint32_t v = mask[j];
+ for (i = 0 ; i < 32 && k < sel->encodingsCount; i++, k++) {
+ if ((v & 1) != 0) {
+ result->index[result->length++] = k;
+ }
+ v >>= 1;
+ }
+ }
+ } //otherwise, index will remain NULL (and will never be touched by
+ //the enumerator code anyway)
+ en->context = result.orphan();
+ return en.orphan();
+}
+
+/* check a string against the selector - UTF16 version */
+U_CAPI UEnumeration * U_EXPORT2
+ucnvsel_selectForString(const UConverterSelector* sel,
+ const UChar *s, int32_t length, UErrorCode *status) {
+ // check if already failed
+ if (U_FAILURE(*status)) {
+ return NULL;
+ }
+ // ensure args make sense!
+ if (sel == NULL || (s == NULL && length != 0)) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+
+ int32_t columns = (sel->encodingsCount+31)/32;
+ uint32_t* mask = (uint32_t*) uprv_malloc(columns * 4);
+ if (mask == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ uprv_memset(mask, ~0, columns *4);
+
+ if(s!=NULL) {
+ const UChar *limit;
+ if (length >= 0) {
+ limit = s + length;
+ } else {
+ limit = NULL;
+ }
+
+ while (limit == NULL ? *s != 0 : s != limit) {
+ UChar32 c;
+ uint16_t pvIndex;
+ UTRIE2_U16_NEXT16(sel->trie, s, limit, c, pvIndex);
+ if (intersectMasks(mask, sel->pv+pvIndex, columns)) {
+ break;
+ }
+ }
+ }
+ return selectForMask(sel, mask, status);
+}
+
+/* check a string against the selector - UTF8 version */
+U_CAPI UEnumeration * U_EXPORT2
+ucnvsel_selectForUTF8(const UConverterSelector* sel,
+ const char *s, int32_t length, UErrorCode *status) {
+ // check if already failed
+ if (U_FAILURE(*status)) {
+ return NULL;
+ }
+ // ensure args make sense!
+ if (sel == NULL || (s == NULL && length != 0)) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+
+ int32_t columns = (sel->encodingsCount+31)/32;
+ uint32_t* mask = (uint32_t*) uprv_malloc(columns * 4);
+ if (mask == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ uprv_memset(mask, ~0, columns *4);
+
+ if (length < 0) {
+ length = (int32_t)uprv_strlen(s);
+ }
+
+ if(s!=NULL) {
+ const char *limit = s + length;
+
+ while (s != limit) {
+ uint16_t pvIndex;
+ UTRIE2_U8_NEXT16(sel->trie, s, limit, pvIndex);
+ if (intersectMasks(mask, sel->pv+pvIndex, columns)) {
+ break;
+ }
+ }
+ }
+ return selectForMask(sel, mask, status);
+}
+
+#endif // !UCONFIG_NO_CONVERSION
diff --git a/thirdparty/icu4c/common/ucol_data.h b/thirdparty/icu4c/common/ucol_data.h
new file mode 100644
index 0000000000..83f54abba1
--- /dev/null
+++ b/thirdparty/icu4c/common/ucol_data.h
@@ -0,0 +1,89 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2000-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: ucol_data.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2011jul02
+* created by: Markus Scherer
+*
+* Private implementation header for C/C++ collation.
+* Some file data structure definitions were moved here from i18n/ucol_imp.h
+* so that the common library (via ucol_swp.cpp) need not depend on the i18n library at all.
+*
+* We do not want to move the collation swapper to the i18n library because
+* a) the resource bundle swapper depends on it and would have to move too, and
+* b) we might want to eventually implement runtime data swapping,
+* which might (or might not) be easier if all swappers are in the common library.
+*/
+
+#ifndef __UCOL_DATA_H__
+#define __UCOL_DATA_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+/* let us know whether reserved fields are reset to zero or junked */
+#define UCOL_HEADER_MAGIC 0x20030618
+
+typedef struct {
+ int32_t size;
+ /* all the offsets are in bytes */
+ /* to get the address add to the header address and cast properly */
+ uint32_t options; /* these are the default options for the collator */
+ uint32_t UCAConsts; /* structure which holds values for indirect positioning and implicit ranges */
+ uint32_t contractionUCACombos; /* this one is needed only for UCA, to copy the appropriate contractions */
+ uint32_t magic; /* magic number - lets us know whether reserved data is reset or junked */
+ uint32_t mappingPosition; /* const uint8_t *mappingPosition; */
+ uint32_t expansion; /* uint32_t *expansion; */
+ uint32_t contractionIndex; /* UChar *contractionIndex; */
+ uint32_t contractionCEs; /* uint32_t *contractionCEs; */
+ uint32_t contractionSize; /* needed for various closures */
+ /*int32_t latinOneMapping;*/ /* this is now handled in the trie itself *//* fast track to latin1 chars */
+
+ uint32_t endExpansionCE; /* array of last collation element in
+ expansion */
+ uint32_t expansionCESize; /* array of maximum expansion size
+ corresponding to the expansion
+ collation elements with last element
+ in endExpansionCE*/
+ int32_t endExpansionCECount; /* size of endExpansionCE */
+ uint32_t unsafeCP; /* hash table of unsafe code points */
+ uint32_t contrEndCP; /* hash table of final code points */
+ /* in contractions. */
+
+ int32_t contractionUCACombosSize; /* number of UCA contraction items. */
+ /*Length is contractionUCACombosSize*contractionUCACombosWidth*sizeof(UChar) */
+ UBool jamoSpecial; /* is jamoSpecial */
+ UBool isBigEndian; /* is this data big endian? from the UDataInfo header*/
+ uint8_t charSetFamily; /* what is the charset family of this data from the UDataInfo header*/
+ uint8_t contractionUCACombosWidth; /* width of UCA combos field */
+ UVersionInfo version;
+ UVersionInfo UCAVersion; /* version of the UCA, read from file */
+ UVersionInfo UCDVersion; /* UCD version, obtained by u_getUnicodeVersion */
+ UVersionInfo formatVersion; /* format version from the UDataInfo header */
+ uint32_t scriptToLeadByte; /* offset to script to lead collation byte mapping data */
+ uint32_t leadByteToScript; /* offset to lead collation byte to script mapping data */
+ uint8_t reserved[76]; /* for future use */
+} UCATableHeader;
+
+typedef struct {
+ uint32_t byteSize;
+ uint32_t tableSize;
+ uint32_t contsSize;
+ uint32_t table;
+ uint32_t conts;
+ UVersionInfo UCAVersion; /* version of the UCA, read from file */
+ uint8_t padding[8];
+} InverseUCATableHeader;
+
+#endif /* !UCONFIG_NO_COLLATION */
+
+#endif /* __UCOL_DATA_H__ */
diff --git a/thirdparty/icu4c/common/ucol_swp.cpp b/thirdparty/icu4c/common/ucol_swp.cpp
new file mode 100644
index 0000000000..1af19863fa
--- /dev/null
+++ b/thirdparty/icu4c/common/ucol_swp.cpp
@@ -0,0 +1,615 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2003-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: ucol_swp.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2003sep10
+* created by: Markus W. Scherer
+*
+* Swap collation binaries.
+*/
+
+#include "unicode/udata.h" /* UDataInfo */
+#include "utrie.h"
+#include "utrie2.h"
+#include "udataswp.h"
+#include "cmemory.h"
+#include "ucol_data.h"
+#include "ucol_swp.h"
+
+/* swapping ----------------------------------------------------------------- */
+
+#if !UCONFIG_NO_COLLATION
+
+U_CAPI UBool U_EXPORT2
+ucol_looksLikeCollationBinary(const UDataSwapper *ds,
+ const void *inData, int32_t length) {
+ if(ds==NULL || inData==NULL || length<-1) {
+ return FALSE;
+ }
+
+ // First check for format version 4+ which has a standard data header.
+ UErrorCode errorCode=U_ZERO_ERROR;
+ (void)udata_swapDataHeader(ds, inData, -1, NULL, &errorCode);
+ if(U_SUCCESS(errorCode)) {
+ const UDataInfo &info=*(const UDataInfo *)((const char *)inData+4);
+ if(info.dataFormat[0]==0x55 && // dataFormat="UCol"
+ info.dataFormat[1]==0x43 &&
+ info.dataFormat[2]==0x6f &&
+ info.dataFormat[3]==0x6c) {
+ return TRUE;
+ }
+ }
+
+ // Else check for format version 3.
+ const UCATableHeader *inHeader=(const UCATableHeader *)inData;
+
+ /*
+ * The collation binary must contain at least the UCATableHeader,
+ * starting with its size field.
+ * sizeof(UCATableHeader)==42*4 in ICU 2.8
+ * check the length against the header size before reading the size field
+ */
+ UCATableHeader header;
+ uprv_memset(&header, 0, sizeof(header));
+ if(length<0) {
+ header.size=udata_readInt32(ds, inHeader->size);
+ } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) {
+ return FALSE;
+ }
+
+ header.magic=ds->readUInt32(inHeader->magic);
+ if(!(
+ header.magic==UCOL_HEADER_MAGIC &&
+ inHeader->formatVersion[0]==3 /*&&
+ inHeader->formatVersion[1]>=0*/
+ )) {
+ return FALSE;
+ }
+
+ if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) {
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+namespace {
+
+/* swap a header-less collation formatVersion=3 binary, inside a resource bundle or ucadata.icu */
+int32_t
+swapFormatVersion3(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const uint8_t *inBytes;
+ uint8_t *outBytes;
+
+ const UCATableHeader *inHeader;
+ UCATableHeader *outHeader;
+ UCATableHeader header;
+
+ uint32_t count;
+
+ /* argument checking in case we were not called from ucol_swap() */
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ inBytes=(const uint8_t *)inData;
+ outBytes=(uint8_t *)outData;
+
+ inHeader=(const UCATableHeader *)inData;
+ outHeader=(UCATableHeader *)outData;
+
+ /*
+ * The collation binary must contain at least the UCATableHeader,
+ * starting with its size field.
+ * sizeof(UCATableHeader)==42*4 in ICU 2.8
+ * check the length against the header size before reading the size field
+ */
+ uprv_memset(&header, 0, sizeof(header));
+ if(length<0) {
+ header.size=udata_readInt32(ds, inHeader->size);
+ } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) {
+ udata_printError(ds, "ucol_swap(formatVersion=3): too few bytes (%d after header) for collation data\n",
+ length);
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ header.magic=ds->readUInt32(inHeader->magic);
+ if(!(
+ header.magic==UCOL_HEADER_MAGIC &&
+ inHeader->formatVersion[0]==3 /*&&
+ inHeader->formatVersion[1]>=0*/
+ )) {
+ udata_printError(ds, "ucol_swap(formatVersion=3): magic 0x%08x or format version %02x.%02x is not a collation binary\n",
+ header.magic,
+ inHeader->formatVersion[0], inHeader->formatVersion[1]);
+ *pErrorCode=U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+
+ if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) {
+ udata_printError(ds, "ucol_swap(formatVersion=3): endianness %d or charset %d does not match the swapper\n",
+ inHeader->isBigEndian, inHeader->charSetFamily);
+ *pErrorCode=U_INVALID_FORMAT_ERROR;
+ return 0;
+ }
+
+ if(length>=0) {
+ /* copy everything, takes care of data that needs no swapping */
+ if(inBytes!=outBytes) {
+ uprv_memcpy(outBytes, inBytes, header.size);
+ }
+
+ /* swap the necessary pieces in the order of their occurrence in the data */
+
+ /* read more of the UCATableHeader (the size field was read above) */
+ header.options= ds->readUInt32(inHeader->options);
+ header.UCAConsts= ds->readUInt32(inHeader->UCAConsts);
+ header.contractionUCACombos= ds->readUInt32(inHeader->contractionUCACombos);
+ header.mappingPosition= ds->readUInt32(inHeader->mappingPosition);
+ header.expansion= ds->readUInt32(inHeader->expansion);
+ header.contractionIndex= ds->readUInt32(inHeader->contractionIndex);
+ header.contractionCEs= ds->readUInt32(inHeader->contractionCEs);
+ header.contractionSize= ds->readUInt32(inHeader->contractionSize);
+ header.endExpansionCE= ds->readUInt32(inHeader->endExpansionCE);
+ header.expansionCESize= ds->readUInt32(inHeader->expansionCESize);
+ header.endExpansionCECount= udata_readInt32(ds, inHeader->endExpansionCECount);
+ header.contractionUCACombosSize=udata_readInt32(ds, inHeader->contractionUCACombosSize);
+ header.scriptToLeadByte= ds->readUInt32(inHeader->scriptToLeadByte);
+ header.leadByteToScript= ds->readUInt32(inHeader->leadByteToScript);
+
+ /* swap the 32-bit integers in the header */
+ ds->swapArray32(ds, inHeader, (int32_t)((const char *)&inHeader->jamoSpecial-(const char *)inHeader),
+ outHeader, pErrorCode);
+ ds->swapArray32(ds, &(inHeader->scriptToLeadByte), sizeof(header.scriptToLeadByte) + sizeof(header.leadByteToScript),
+ &(outHeader->scriptToLeadByte), pErrorCode);
+ /* set the output platform properties */
+ outHeader->isBigEndian=ds->outIsBigEndian;
+ outHeader->charSetFamily=ds->outCharset;
+
+ /* swap the options */
+ if(header.options!=0) {
+ ds->swapArray32(ds, inBytes+header.options, header.expansion-header.options,
+ outBytes+header.options, pErrorCode);
+ }
+
+ /* swap the expansions */
+ if(header.mappingPosition!=0 && header.expansion!=0) {
+ if(header.contractionIndex!=0) {
+ /* expansions bounded by contractions */
+ count=header.contractionIndex-header.expansion;
+ } else {
+ /* no contractions: expansions bounded by the main trie */
+ count=header.mappingPosition-header.expansion;
+ }
+ ds->swapArray32(ds, inBytes+header.expansion, (int32_t)count,
+ outBytes+header.expansion, pErrorCode);
+ }
+
+ /* swap the contractions */
+ if(header.contractionSize!=0) {
+ /* contractionIndex: UChar[] */
+ ds->swapArray16(ds, inBytes+header.contractionIndex, header.contractionSize*2,
+ outBytes+header.contractionIndex, pErrorCode);
+
+ /* contractionCEs: CEs[] */
+ ds->swapArray32(ds, inBytes+header.contractionCEs, header.contractionSize*4,
+ outBytes+header.contractionCEs, pErrorCode);
+ }
+
+ /* swap the main trie */
+ if(header.mappingPosition!=0) {
+ count=header.endExpansionCE-header.mappingPosition;
+ utrie_swap(ds, inBytes+header.mappingPosition, (int32_t)count,
+ outBytes+header.mappingPosition, pErrorCode);
+ }
+
+ /* swap the max expansion table */
+ if(header.endExpansionCECount!=0) {
+ ds->swapArray32(ds, inBytes+header.endExpansionCE, header.endExpansionCECount*4,
+ outBytes+header.endExpansionCE, pErrorCode);
+ }
+
+ /* expansionCESize, unsafeCP, contrEndCP: uint8_t[], no need to swap */
+
+ /* swap UCA constants */
+ if(header.UCAConsts!=0) {
+ /*
+ * if UCAConsts!=0 then contractionUCACombos because we are swapping
+ * the UCA data file, and we know that the UCA contains contractions
+ */
+ ds->swapArray32(ds, inBytes+header.UCAConsts, header.contractionUCACombos-header.UCAConsts,
+ outBytes+header.UCAConsts, pErrorCode);
+ }
+
+ /* swap UCA contractions */
+ if(header.contractionUCACombosSize!=0) {
+ count=header.contractionUCACombosSize*inHeader->contractionUCACombosWidth*U_SIZEOF_UCHAR;
+ ds->swapArray16(ds, inBytes+header.contractionUCACombos, (int32_t)count,
+ outBytes+header.contractionUCACombos, pErrorCode);
+ }
+
+ /* swap the script to lead bytes */
+ if(header.scriptToLeadByte!=0) {
+ int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte))); // each entry = 2 * uint16
+ int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte + 2))); // each entry = uint16
+ ds->swapArray16(ds, inBytes+header.scriptToLeadByte,
+ 4 + (4 * indexCount) + (2 * dataCount),
+ outBytes+header.scriptToLeadByte, pErrorCode);
+ }
+
+ /* swap the lead byte to scripts */
+ if(header.leadByteToScript!=0) {
+ int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript))); // each entry = uint16
+ int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript + 2))); // each entry = uint16
+ ds->swapArray16(ds, inBytes+header.leadByteToScript,
+ 4 + (2 * indexCount) + (2 * dataCount),
+ outBytes+header.leadByteToScript, pErrorCode);
+ }
+ }
+
+ return header.size;
+}
+
+// swap formatVersion 4 or 5 ----------------------------------------------- ***
+
+// The following are copied from CollationDataReader, trading an awkward copy of constants
+// for an awkward relocation of the i18n collationdatareader.h file into the common library.
+// Keep them in sync!
+
+enum {
+ IX_INDEXES_LENGTH, // 0
+ IX_OPTIONS,
+ IX_RESERVED2,
+ IX_RESERVED3,
+
+ IX_JAMO_CE32S_START, // 4
+ IX_REORDER_CODES_OFFSET,
+ IX_REORDER_TABLE_OFFSET,
+ IX_TRIE_OFFSET,
+
+ IX_RESERVED8_OFFSET, // 8
+ IX_CES_OFFSET,
+ IX_RESERVED10_OFFSET,
+ IX_CE32S_OFFSET,
+
+ IX_ROOT_ELEMENTS_OFFSET, // 12
+ IX_CONTEXTS_OFFSET,
+ IX_UNSAFE_BWD_OFFSET,
+ IX_FAST_LATIN_TABLE_OFFSET,
+
+ IX_SCRIPTS_OFFSET, // 16
+ IX_COMPRESSIBLE_BYTES_OFFSET,
+ IX_RESERVED18_OFFSET,
+ IX_TOTAL_SIZE
+};
+
+int32_t
+swapFormatVersion4(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return 0; }
+
+ const uint8_t *inBytes=(const uint8_t *)inData;
+ uint8_t *outBytes=(uint8_t *)outData;
+
+ const int32_t *inIndexes=(const int32_t *)inBytes;
+ int32_t indexes[IX_TOTAL_SIZE+1];
+
+ // Need at least IX_INDEXES_LENGTH and IX_OPTIONS.
+ if(0<=length && length<8) {
+ udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes "
+ "(%d after header) for collation data\n",
+ length);
+ errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ int32_t indexesLength=indexes[0]=udata_readInt32(ds, inIndexes[0]);
+ if(0<=length && length<(indexesLength*4)) {
+ udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes "
+ "(%d after header) for collation data\n",
+ length);
+ errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ for(int32_t i=1; i<=IX_TOTAL_SIZE && i<indexesLength; ++i) {
+ indexes[i]=udata_readInt32(ds, inIndexes[i]);
+ }
+ for(int32_t i=indexesLength; i<=IX_TOTAL_SIZE; ++i) {
+ indexes[i]=-1;
+ }
+ inIndexes=NULL; // Make sure we do not accidentally use these instead of indexes[].
+
+ // Get the total length of the data.
+ int32_t size;
+ if(indexesLength>IX_TOTAL_SIZE) {
+ size=indexes[IX_TOTAL_SIZE];
+ } else if(indexesLength>IX_REORDER_CODES_OFFSET) {
+ size=indexes[indexesLength-1];
+ } else {
+ size=indexesLength*4;
+ }
+ if(length<0) { return size; }
+
+ if(length<size) {
+ udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes "
+ "(%d after header) for collation data\n",
+ length);
+ errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ // Copy the data for inaccessible bytes and arrays of bytes.
+ if(inBytes!=outBytes) {
+ uprv_memcpy(outBytes, inBytes, size);
+ }
+
+ // Swap the int32_t indexes[].
+ ds->swapArray32(ds, inBytes, indexesLength * 4, outBytes, &errorCode);
+
+ // The following is a modified version of CollationDataReader::read().
+ // Here we use indexes[] not inIndexes[] because
+ // the inIndexes[] may not be in this machine's endianness.
+ int32_t index; // one of the indexes[] slots
+ int32_t offset; // byte offset for the index part
+ // int32_t length; // number of bytes in the index part
+
+ index = IX_REORDER_CODES_OFFSET;
+ offset = indexes[index];
+ length = indexes[index + 1] - offset;
+ if(length > 0) {
+ ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode);
+ }
+
+ // Skip the IX_REORDER_TABLE_OFFSET byte array.
+
+ index = IX_TRIE_OFFSET;
+ offset = indexes[index];
+ length = indexes[index + 1] - offset;
+ if(length > 0) {
+ utrie2_swap(ds, inBytes + offset, length, outBytes + offset, &errorCode);
+ }
+
+ index = IX_RESERVED8_OFFSET;
+ offset = indexes[index];
+ length = indexes[index + 1] - offset;
+ if(length > 0) {
+ udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED8_OFFSET\n", length);
+ errorCode = U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+
+ index = IX_CES_OFFSET;
+ offset = indexes[index];
+ length = indexes[index + 1] - offset;
+ if(length > 0) {
+ ds->swapArray64(ds, inBytes + offset, length, outBytes + offset, &errorCode);
+ }
+
+ index = IX_RESERVED10_OFFSET;
+ offset = indexes[index];
+ length = indexes[index + 1] - offset;
+ if(length > 0) {
+ udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED10_OFFSET\n", length);
+ errorCode = U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+
+ index = IX_CE32S_OFFSET;
+ offset = indexes[index];
+ length = indexes[index + 1] - offset;
+ if(length > 0) {
+ ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode);
+ }
+
+ index = IX_ROOT_ELEMENTS_OFFSET;
+ offset = indexes[index];
+ length = indexes[index + 1] - offset;
+ if(length > 0) {
+ ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode);
+ }
+
+ index = IX_CONTEXTS_OFFSET;
+ offset = indexes[index];
+ length = indexes[index + 1] - offset;
+ if(length > 0) {
+ ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
+ }
+
+ index = IX_UNSAFE_BWD_OFFSET;
+ offset = indexes[index];
+ length = indexes[index + 1] - offset;
+ if(length > 0) {
+ ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
+ }
+
+ index = IX_FAST_LATIN_TABLE_OFFSET;
+ offset = indexes[index];
+ length = indexes[index + 1] - offset;
+ if(length > 0) {
+ ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
+ }
+
+ index = IX_SCRIPTS_OFFSET;
+ offset = indexes[index];
+ length = indexes[index + 1] - offset;
+ if(length > 0) {
+ ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
+ }
+
+ // Skip the IX_COMPRESSIBLE_BYTES_OFFSET byte array.
+
+ index = IX_RESERVED18_OFFSET;
+ offset = indexes[index];
+ length = indexes[index + 1] - offset;
+ if(length > 0) {
+ udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED18_OFFSET\n", length);
+ errorCode = U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+
+ return size;
+}
+
+} // namespace
+
+/* swap ICU collation data like ucadata.icu */
+U_CAPI int32_t U_EXPORT2
+ucol_swap(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) { return 0; }
+
+ /* udata_swapDataHeader checks the arguments */
+ int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ // Try to swap the old format version which did not have a standard data header.
+ *pErrorCode=U_ZERO_ERROR;
+ return swapFormatVersion3(ds, inData, length, outData, pErrorCode);
+ }
+
+ /* check data format and format version */
+ const UDataInfo &info=*(const UDataInfo *)((const char *)inData+4);
+ if(!(
+ info.dataFormat[0]==0x55 && // dataFormat="UCol"
+ info.dataFormat[1]==0x43 &&
+ info.dataFormat[2]==0x6f &&
+ info.dataFormat[3]==0x6c &&
+ (3<=info.formatVersion[0] && info.formatVersion[0]<=5)
+ )) {
+ udata_printError(ds, "ucol_swap(): data format %02x.%02x.%02x.%02x "
+ "(format version %02x.%02x) is not recognized as collation data\n",
+ info.dataFormat[0], info.dataFormat[1],
+ info.dataFormat[2], info.dataFormat[3],
+ info.formatVersion[0], info.formatVersion[1]);
+ *pErrorCode=U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+
+ inData=(const char *)inData+headerSize;
+ if(length>=0) { length-=headerSize; }
+ outData=(char *)outData+headerSize;
+ int32_t collationSize;
+ if(info.formatVersion[0]>=4) {
+ collationSize=swapFormatVersion4(ds, inData, length, outData, *pErrorCode);
+ } else {
+ collationSize=swapFormatVersion3(ds, inData, length, outData, pErrorCode);
+ }
+ if(U_SUCCESS(*pErrorCode)) {
+ return headerSize+collationSize;
+ } else {
+ return 0;
+ }
+}
+
+/* swap inverse UCA collation data (invuca.icu) */
+U_CAPI int32_t U_EXPORT2
+ucol_swapInverseUCA(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const UDataInfo *pInfo;
+ int32_t headerSize;
+
+ const uint8_t *inBytes;
+ uint8_t *outBytes;
+
+ const InverseUCATableHeader *inHeader;
+ InverseUCATableHeader *outHeader;
+ InverseUCATableHeader header={ 0,0,0,0,0,{0,0,0,0},{0,0,0,0,0,0,0,0} };
+
+ /* udata_swapDataHeader checks the arguments */
+ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ /* check data format and format version */
+ pInfo=(const UDataInfo *)((const char *)inData+4);
+ if(!(
+ pInfo->dataFormat[0]==0x49 && /* dataFormat="InvC" */
+ pInfo->dataFormat[1]==0x6e &&
+ pInfo->dataFormat[2]==0x76 &&
+ pInfo->dataFormat[3]==0x43 &&
+ pInfo->formatVersion[0]==2 &&
+ pInfo->formatVersion[1]>=1
+ )) {
+ udata_printError(ds, "ucol_swapInverseUCA(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not an inverse UCA collation file\n",
+ pInfo->dataFormat[0], pInfo->dataFormat[1],
+ pInfo->dataFormat[2], pInfo->dataFormat[3],
+ pInfo->formatVersion[0], pInfo->formatVersion[1]);
+ *pErrorCode=U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+
+ inBytes=(const uint8_t *)inData+headerSize;
+ outBytes=(uint8_t *)outData+headerSize;
+
+ inHeader=(const InverseUCATableHeader *)inBytes;
+ outHeader=(InverseUCATableHeader *)outBytes;
+
+ /*
+ * The inverse UCA collation binary must contain at least the InverseUCATableHeader,
+ * starting with its size field.
+ * sizeof(UCATableHeader)==8*4 in ICU 2.8
+ * check the length against the header size before reading the size field
+ */
+ if(length<0) {
+ header.byteSize=udata_readInt32(ds, inHeader->byteSize);
+ } else if(
+ ((length-headerSize)<(8*4) ||
+ (uint32_t)(length-headerSize)<(header.byteSize=udata_readInt32(ds, inHeader->byteSize)))
+ ) {
+ udata_printError(ds, "ucol_swapInverseUCA(): too few bytes (%d after header) for inverse UCA collation data\n",
+ length);
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ if(length>=0) {
+ /* copy everything, takes care of data that needs no swapping */
+ if(inBytes!=outBytes) {
+ uprv_memcpy(outBytes, inBytes, header.byteSize);
+ }
+
+ /* swap the necessary pieces in the order of their occurrence in the data */
+
+ /* read more of the InverseUCATableHeader (the byteSize field was read above) */
+ header.tableSize= ds->readUInt32(inHeader->tableSize);
+ header.contsSize= ds->readUInt32(inHeader->contsSize);
+ header.table= ds->readUInt32(inHeader->table);
+ header.conts= ds->readUInt32(inHeader->conts);
+
+ /* swap the 32-bit integers in the header */
+ ds->swapArray32(ds, inHeader, 5*4, outHeader, pErrorCode);
+
+ /* swap the inverse table; tableSize counts uint32_t[3] rows */
+ ds->swapArray32(ds, inBytes+header.table, header.tableSize*3*4,
+ outBytes+header.table, pErrorCode);
+
+ /* swap the continuation table; contsSize counts UChars */
+ ds->swapArray16(ds, inBytes+header.conts, header.contsSize*U_SIZEOF_UCHAR,
+ outBytes+header.conts, pErrorCode);
+ }
+
+ return headerSize+header.byteSize;
+}
+
+#endif /* #if !UCONFIG_NO_COLLATION */
diff --git a/thirdparty/icu4c/common/ucol_swp.h b/thirdparty/icu4c/common/ucol_swp.h
new file mode 100644
index 0000000000..0c2990a85e
--- /dev/null
+++ b/thirdparty/icu4c/common/ucol_swp.h
@@ -0,0 +1,58 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2003-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: ucol_swp.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2003sep10
+* created by: Markus W. Scherer
+*
+* Swap collation binaries.
+*/
+
+#ifndef __UCOL_SWP_H__
+#define __UCOL_SWP_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+#include "udataswp.h"
+
+/*
+ * Does the data look like a collation binary?
+ * @internal
+ */
+U_CAPI UBool U_EXPORT2
+ucol_looksLikeCollationBinary(const UDataSwapper *ds,
+ const void *inData, int32_t length);
+
+/**
+ * Swap ICU collation data like ucadata.icu. See udataswp.h.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+ucol_swap(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+/**
+ * Swap inverse UCA collation data (invuca.icu). See udataswp.h.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+ucol_swapInverseUCA(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+#endif /* #if !UCONFIG_NO_COLLATION */
+
+#endif
diff --git a/thirdparty/icu4c/common/ucptrie.cpp b/thirdparty/icu4c/common/ucptrie.cpp
new file mode 100644
index 0000000000..0004160a23
--- /dev/null
+++ b/thirdparty/icu4c/common/ucptrie.cpp
@@ -0,0 +1,601 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// ucptrie.cpp (modified from utrie2.cpp)
+// created: 2017dec29 Markus W. Scherer
+
+// #define UCPTRIE_DEBUG
+#ifdef UCPTRIE_DEBUG
+# include <stdio.h>
+#endif
+
+#include "unicode/utypes.h"
+#include "unicode/ucptrie.h"
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+#include "cmemory.h"
+#include "uassert.h"
+#include "ucptrie_impl.h"
+
+U_CAPI UCPTrie * U_EXPORT2
+ucptrie_openFromBinary(UCPTrieType type, UCPTrieValueWidth valueWidth,
+ const void *data, int32_t length, int32_t *pActualLength,
+ UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return nullptr;
+ }
+
+ if (length <= 0 || (U_POINTER_MASK_LSB(data, 3) != 0) ||
+ type < UCPTRIE_TYPE_ANY || UCPTRIE_TYPE_SMALL < type ||
+ valueWidth < UCPTRIE_VALUE_BITS_ANY || UCPTRIE_VALUE_BITS_8 < valueWidth) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return nullptr;
+ }
+
+ // Enough data for a trie header?
+ if (length < (int32_t)sizeof(UCPTrieHeader)) {
+ *pErrorCode = U_INVALID_FORMAT_ERROR;
+ return nullptr;
+ }
+
+ // Check the signature.
+ const UCPTrieHeader *header = (const UCPTrieHeader *)data;
+ if (header->signature != UCPTRIE_SIG) {
+ *pErrorCode = U_INVALID_FORMAT_ERROR;
+ return nullptr;
+ }
+
+ int32_t options = header->options;
+ int32_t typeInt = (options >> 6) & 3;
+ int32_t valueWidthInt = options & UCPTRIE_OPTIONS_VALUE_BITS_MASK;
+ if (typeInt > UCPTRIE_TYPE_SMALL || valueWidthInt > UCPTRIE_VALUE_BITS_8 ||
+ (options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0) {
+ *pErrorCode = U_INVALID_FORMAT_ERROR;
+ return nullptr;
+ }
+ UCPTrieType actualType = (UCPTrieType)typeInt;
+ UCPTrieValueWidth actualValueWidth = (UCPTrieValueWidth)valueWidthInt;
+ if (type < 0) {
+ type = actualType;
+ }
+ if (valueWidth < 0) {
+ valueWidth = actualValueWidth;
+ }
+ if (type != actualType || valueWidth != actualValueWidth) {
+ *pErrorCode = U_INVALID_FORMAT_ERROR;
+ return nullptr;
+ }
+
+ // Get the length values and offsets.
+ UCPTrie tempTrie;
+ uprv_memset(&tempTrie, 0, sizeof(tempTrie));
+ tempTrie.indexLength = header->indexLength;
+ tempTrie.dataLength =
+ ((options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | header->dataLength;
+ tempTrie.index3NullOffset = header->index3NullOffset;
+ tempTrie.dataNullOffset =
+ ((options & UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK) << 8) | header->dataNullOffset;
+
+ tempTrie.highStart = header->shiftedHighStart << UCPTRIE_SHIFT_2;
+ tempTrie.shifted12HighStart = (tempTrie.highStart + 0xfff) >> 12;
+ tempTrie.type = type;
+ tempTrie.valueWidth = valueWidth;
+
+ // Calculate the actual length.
+ int32_t actualLength = (int32_t)sizeof(UCPTrieHeader) + tempTrie.indexLength * 2;
+ if (valueWidth == UCPTRIE_VALUE_BITS_16) {
+ actualLength += tempTrie.dataLength * 2;
+ } else if (valueWidth == UCPTRIE_VALUE_BITS_32) {
+ actualLength += tempTrie.dataLength * 4;
+ } else {
+ actualLength += tempTrie.dataLength;
+ }
+ if (length < actualLength) {
+ *pErrorCode = U_INVALID_FORMAT_ERROR; // Not enough bytes.
+ return nullptr;
+ }
+
+ // Allocate the trie.
+ UCPTrie *trie = (UCPTrie *)uprv_malloc(sizeof(UCPTrie));
+ if (trie == nullptr) {
+ *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
+ return nullptr;
+ }
+ uprv_memcpy(trie, &tempTrie, sizeof(tempTrie));
+#ifdef UCPTRIE_DEBUG
+ trie->name = "fromSerialized";
+#endif
+
+ // Set the pointers to its index and data arrays.
+ const uint16_t *p16 = (const uint16_t *)(header + 1);
+ trie->index = p16;
+ p16 += trie->indexLength;
+
+ // Get the data.
+ int32_t nullValueOffset = trie->dataNullOffset;
+ if (nullValueOffset >= trie->dataLength) {
+ nullValueOffset = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET;
+ }
+ switch (valueWidth) {
+ case UCPTRIE_VALUE_BITS_16:
+ trie->data.ptr16 = p16;
+ trie->nullValue = trie->data.ptr16[nullValueOffset];
+ break;
+ case UCPTRIE_VALUE_BITS_32:
+ trie->data.ptr32 = (const uint32_t *)p16;
+ trie->nullValue = trie->data.ptr32[nullValueOffset];
+ break;
+ case UCPTRIE_VALUE_BITS_8:
+ trie->data.ptr8 = (const uint8_t *)p16;
+ trie->nullValue = trie->data.ptr8[nullValueOffset];
+ break;
+ default:
+ // Unreachable because valueWidth was checked above.
+ *pErrorCode = U_INVALID_FORMAT_ERROR;
+ return nullptr;
+ }
+
+ if (pActualLength != nullptr) {
+ *pActualLength = actualLength;
+ }
+ return trie;
+}
+
+U_CAPI void U_EXPORT2
+ucptrie_close(UCPTrie *trie) {
+ uprv_free(trie);
+}
+
+U_CAPI UCPTrieType U_EXPORT2
+ucptrie_getType(const UCPTrie *trie) {
+ return (UCPTrieType)trie->type;
+}
+
+U_CAPI UCPTrieValueWidth U_EXPORT2
+ucptrie_getValueWidth(const UCPTrie *trie) {
+ return (UCPTrieValueWidth)trie->valueWidth;
+}
+
+U_CAPI int32_t U_EXPORT2
+ucptrie_internalSmallIndex(const UCPTrie *trie, UChar32 c) {
+ int32_t i1 = c >> UCPTRIE_SHIFT_1;
+ if (trie->type == UCPTRIE_TYPE_FAST) {
+ U_ASSERT(0xffff < c && c < trie->highStart);
+ i1 += UCPTRIE_BMP_INDEX_LENGTH - UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH;
+ } else {
+ U_ASSERT((uint32_t)c < (uint32_t)trie->highStart && trie->highStart > UCPTRIE_SMALL_LIMIT);
+ i1 += UCPTRIE_SMALL_INDEX_LENGTH;
+ }
+ int32_t i3Block = trie->index[
+ (int32_t)trie->index[i1] + ((c >> UCPTRIE_SHIFT_2) & UCPTRIE_INDEX_2_MASK)];
+ int32_t i3 = (c >> UCPTRIE_SHIFT_3) & UCPTRIE_INDEX_3_MASK;
+ int32_t dataBlock;
+ if ((i3Block & 0x8000) == 0) {
+ // 16-bit indexes
+ dataBlock = trie->index[i3Block + i3];
+ } else {
+ // 18-bit indexes stored in groups of 9 entries per 8 indexes.
+ i3Block = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3);
+ i3 &= 7;
+ dataBlock = ((int32_t)trie->index[i3Block++] << (2 + (2 * i3))) & 0x30000;
+ dataBlock |= trie->index[i3Block + i3];
+ }
+ return dataBlock + (c & UCPTRIE_SMALL_DATA_MASK);
+}
+
+U_CAPI int32_t U_EXPORT2
+ucptrie_internalSmallU8Index(const UCPTrie *trie, int32_t lt1, uint8_t t2, uint8_t t3) {
+ UChar32 c = (lt1 << 12) | (t2 << 6) | t3;
+ if (c >= trie->highStart) {
+ // Possible because the UTF-8 macro compares with shifted12HighStart which may be higher.
+ return trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET;
+ }
+ return ucptrie_internalSmallIndex(trie, c);
+}
+
+U_CAPI int32_t U_EXPORT2
+ucptrie_internalU8PrevIndex(const UCPTrie *trie, UChar32 c,
+ const uint8_t *start, const uint8_t *src) {
+ int32_t i, length;
+ // Support 64-bit pointers by avoiding cast of arbitrary difference.
+ if ((src - start) <= 7) {
+ i = length = (int32_t)(src - start);
+ } else {
+ i = length = 7;
+ start = src - 7;
+ }
+ c = utf8_prevCharSafeBody(start, 0, &i, c, -1);
+ i = length - i; // Number of bytes read backward from src.
+ int32_t idx = _UCPTRIE_CP_INDEX(trie, 0xffff, c);
+ return (idx << 3) | i;
+}
+
+namespace {
+
+inline uint32_t getValue(UCPTrieData data, UCPTrieValueWidth valueWidth, int32_t dataIndex) {
+ switch (valueWidth) {
+ case UCPTRIE_VALUE_BITS_16:
+ return data.ptr16[dataIndex];
+ case UCPTRIE_VALUE_BITS_32:
+ return data.ptr32[dataIndex];
+ case UCPTRIE_VALUE_BITS_8:
+ return data.ptr8[dataIndex];
+ default:
+ // Unreachable if the trie is properly initialized.
+ return 0xffffffff;
+ }
+}
+
+} // namespace
+
+U_CAPI uint32_t U_EXPORT2
+ucptrie_get(const UCPTrie *trie, UChar32 c) {
+ int32_t dataIndex;
+ if ((uint32_t)c <= 0x7f) {
+ // linear ASCII
+ dataIndex = c;
+ } else {
+ UChar32 fastMax = trie->type == UCPTRIE_TYPE_FAST ? 0xffff : UCPTRIE_SMALL_MAX;
+ dataIndex = _UCPTRIE_CP_INDEX(trie, fastMax, c);
+ }
+ return getValue(trie->data, (UCPTrieValueWidth)trie->valueWidth, dataIndex);
+}
+
+namespace {
+
+constexpr int32_t MAX_UNICODE = 0x10ffff;
+
+inline uint32_t maybeFilterValue(uint32_t value, uint32_t trieNullValue, uint32_t nullValue,
+ UCPMapValueFilter *filter, const void *context) {
+ if (value == trieNullValue) {
+ value = nullValue;
+ } else if (filter != nullptr) {
+ value = filter(context, value);
+ }
+ return value;
+}
+
+UChar32 getRange(const void *t, UChar32 start,
+ UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
+ if ((uint32_t)start > MAX_UNICODE) {
+ return U_SENTINEL;
+ }
+ const UCPTrie *trie = reinterpret_cast<const UCPTrie *>(t);
+ UCPTrieValueWidth valueWidth = (UCPTrieValueWidth)trie->valueWidth;
+ if (start >= trie->highStart) {
+ if (pValue != nullptr) {
+ int32_t di = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET;
+ uint32_t value = getValue(trie->data, valueWidth, di);
+ if (filter != nullptr) { value = filter(context, value); }
+ *pValue = value;
+ }
+ return MAX_UNICODE;
+ }
+
+ uint32_t nullValue = trie->nullValue;
+ if (filter != nullptr) { nullValue = filter(context, nullValue); }
+ const uint16_t *index = trie->index;
+
+ int32_t prevI3Block = -1;
+ int32_t prevBlock = -1;
+ UChar32 c = start;
+ uint32_t trieValue, value = nullValue;
+ bool haveValue = false;
+ do {
+ int32_t i3Block;
+ int32_t i3;
+ int32_t i3BlockLength;
+ int32_t dataBlockLength;
+ if (c <= 0xffff && (trie->type == UCPTRIE_TYPE_FAST || c <= UCPTRIE_SMALL_MAX)) {
+ i3Block = 0;
+ i3 = c >> UCPTRIE_FAST_SHIFT;
+ i3BlockLength = trie->type == UCPTRIE_TYPE_FAST ?
+ UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH;
+ dataBlockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;
+ } else {
+ // Use the multi-stage index.
+ int32_t i1 = c >> UCPTRIE_SHIFT_1;
+ if (trie->type == UCPTRIE_TYPE_FAST) {
+ U_ASSERT(0xffff < c && c < trie->highStart);
+ i1 += UCPTRIE_BMP_INDEX_LENGTH - UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH;
+ } else {
+ U_ASSERT(c < trie->highStart && trie->highStart > UCPTRIE_SMALL_LIMIT);
+ i1 += UCPTRIE_SMALL_INDEX_LENGTH;
+ }
+ i3Block = trie->index[
+ (int32_t)trie->index[i1] + ((c >> UCPTRIE_SHIFT_2) & UCPTRIE_INDEX_2_MASK)];
+ if (i3Block == prevI3Block && (c - start) >= UCPTRIE_CP_PER_INDEX_2_ENTRY) {
+ // The index-3 block is the same as the previous one, and filled with value.
+ U_ASSERT((c & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0);
+ c += UCPTRIE_CP_PER_INDEX_2_ENTRY;
+ continue;
+ }
+ prevI3Block = i3Block;
+ if (i3Block == trie->index3NullOffset) {
+ // This is the index-3 null block.
+ if (haveValue) {
+ if (nullValue != value) {
+ return c - 1;
+ }
+ } else {
+ trieValue = trie->nullValue;
+ value = nullValue;
+ if (pValue != nullptr) { *pValue = nullValue; }
+ haveValue = true;
+ }
+ prevBlock = trie->dataNullOffset;
+ c = (c + UCPTRIE_CP_PER_INDEX_2_ENTRY) & ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1);
+ continue;
+ }
+ i3 = (c >> UCPTRIE_SHIFT_3) & UCPTRIE_INDEX_3_MASK;
+ i3BlockLength = UCPTRIE_INDEX_3_BLOCK_LENGTH;
+ dataBlockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
+ }
+ // Enumerate data blocks for one index-3 block.
+ do {
+ int32_t block;
+ if ((i3Block & 0x8000) == 0) {
+ block = index[i3Block + i3];
+ } else {
+ // 18-bit indexes stored in groups of 9 entries per 8 indexes.
+ int32_t group = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3);
+ int32_t gi = i3 & 7;
+ block = ((int32_t)index[group++] << (2 + (2 * gi))) & 0x30000;
+ block |= index[group + gi];
+ }
+ if (block == prevBlock && (c - start) >= dataBlockLength) {
+ // The block is the same as the previous one, and filled with value.
+ U_ASSERT((c & (dataBlockLength - 1)) == 0);
+ c += dataBlockLength;
+ } else {
+ int32_t dataMask = dataBlockLength - 1;
+ prevBlock = block;
+ if (block == trie->dataNullOffset) {
+ // This is the data null block.
+ if (haveValue) {
+ if (nullValue != value) {
+ return c - 1;
+ }
+ } else {
+ trieValue = trie->nullValue;
+ value = nullValue;
+ if (pValue != nullptr) { *pValue = nullValue; }
+ haveValue = true;
+ }
+ c = (c + dataBlockLength) & ~dataMask;
+ } else {
+ int32_t di = block + (c & dataMask);
+ uint32_t trieValue2 = getValue(trie->data, valueWidth, di);
+ if (haveValue) {
+ if (trieValue2 != trieValue) {
+ if (filter == nullptr ||
+ maybeFilterValue(trieValue2, trie->nullValue, nullValue,
+ filter, context) != value) {
+ return c - 1;
+ }
+ trieValue = trieValue2; // may or may not help
+ }
+ } else {
+ trieValue = trieValue2;
+ value = maybeFilterValue(trieValue2, trie->nullValue, nullValue,
+ filter, context);
+ if (pValue != nullptr) { *pValue = value; }
+ haveValue = true;
+ }
+ while ((++c & dataMask) != 0) {
+ trieValue2 = getValue(trie->data, valueWidth, ++di);
+ if (trieValue2 != trieValue) {
+ if (filter == nullptr ||
+ maybeFilterValue(trieValue2, trie->nullValue, nullValue,
+ filter, context) != value) {
+ return c - 1;
+ }
+ trieValue = trieValue2; // may or may not help
+ }
+ }
+ }
+ }
+ } while (++i3 < i3BlockLength);
+ } while (c < trie->highStart);
+ U_ASSERT(haveValue);
+ int32_t di = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET;
+ uint32_t highValue = getValue(trie->data, valueWidth, di);
+ if (maybeFilterValue(highValue, trie->nullValue, nullValue,
+ filter, context) != value) {
+ return c - 1;
+ } else {
+ return MAX_UNICODE;
+ }
+}
+
+} // namespace
+
+U_CFUNC UChar32
+ucptrie_internalGetRange(UCPTrieGetRange *getRange,
+ const void *trie, UChar32 start,
+ UCPMapRangeOption option, uint32_t surrogateValue,
+ UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
+ if (option == UCPMAP_RANGE_NORMAL) {
+ return getRange(trie, start, filter, context, pValue);
+ }
+ uint32_t value;
+ if (pValue == nullptr) {
+ // We need to examine the range value even if the caller does not want it.
+ pValue = &value;
+ }
+ UChar32 surrEnd = option == UCPMAP_RANGE_FIXED_ALL_SURROGATES ? 0xdfff : 0xdbff;
+ UChar32 end = getRange(trie, start, filter, context, pValue);
+ if (end < 0xd7ff || start > surrEnd) {
+ return end;
+ }
+ // The range overlaps with surrogates, or ends just before the first one.
+ if (*pValue == surrogateValue) {
+ if (end >= surrEnd) {
+ // Surrogates followed by a non-surrogateValue range,
+ // or surrogates are part of a larger surrogateValue range.
+ return end;
+ }
+ } else {
+ if (start <= 0xd7ff) {
+ return 0xd7ff; // Non-surrogateValue range ends before surrogateValue surrogates.
+ }
+ // Start is a surrogate with a non-surrogateValue code *unit* value.
+ // Return a surrogateValue code *point* range.
+ *pValue = surrogateValue;
+ if (end > surrEnd) {
+ return surrEnd; // Surrogate range ends before non-surrogateValue rest of range.
+ }
+ }
+ // See if the surrogateValue surrogate range can be merged with
+ // an immediately following range.
+ uint32_t value2;
+ UChar32 end2 = getRange(trie, surrEnd + 1, filter, context, &value2);
+ if (value2 == surrogateValue) {
+ return end2;
+ }
+ return surrEnd;
+}
+
+U_CAPI UChar32 U_EXPORT2
+ucptrie_getRange(const UCPTrie *trie, UChar32 start,
+ UCPMapRangeOption option, uint32_t surrogateValue,
+ UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
+ return ucptrie_internalGetRange(getRange, trie, start,
+ option, surrogateValue,
+ filter, context, pValue);
+}
+
+U_CAPI int32_t U_EXPORT2
+ucptrie_toBinary(const UCPTrie *trie,
+ void *data, int32_t capacity,
+ UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ UCPTrieType type = (UCPTrieType)trie->type;
+ UCPTrieValueWidth valueWidth = (UCPTrieValueWidth)trie->valueWidth;
+ if (type < UCPTRIE_TYPE_FAST || UCPTRIE_TYPE_SMALL < type ||
+ valueWidth < UCPTRIE_VALUE_BITS_16 || UCPTRIE_VALUE_BITS_8 < valueWidth ||
+ capacity < 0 ||
+ (capacity > 0 && (data == nullptr || (U_POINTER_MASK_LSB(data, 3) != 0)))) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ int32_t length = (int32_t)sizeof(UCPTrieHeader) + trie->indexLength * 2;
+ switch (valueWidth) {
+ case UCPTRIE_VALUE_BITS_16:
+ length += trie->dataLength * 2;
+ break;
+ case UCPTRIE_VALUE_BITS_32:
+ length += trie->dataLength * 4;
+ break;
+ case UCPTRIE_VALUE_BITS_8:
+ length += trie->dataLength;
+ break;
+ default:
+ // unreachable
+ break;
+ }
+ if (capacity < length) {
+ *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
+ return length;
+ }
+
+ char *bytes = (char *)data;
+ UCPTrieHeader *header = (UCPTrieHeader *)bytes;
+ header->signature = UCPTRIE_SIG; // "Tri3"
+ header->options = (uint16_t)(
+ ((trie->dataLength & 0xf0000) >> 4) |
+ ((trie->dataNullOffset & 0xf0000) >> 8) |
+ (trie->type << 6) |
+ valueWidth);
+ header->indexLength = (uint16_t)trie->indexLength;
+ header->dataLength = (uint16_t)trie->dataLength;
+ header->index3NullOffset = trie->index3NullOffset;
+ header->dataNullOffset = (uint16_t)trie->dataNullOffset;
+ header->shiftedHighStart = trie->highStart >> UCPTRIE_SHIFT_2;
+ bytes += sizeof(UCPTrieHeader);
+
+ uprv_memcpy(bytes, trie->index, trie->indexLength * 2);
+ bytes += trie->indexLength * 2;
+
+ switch (valueWidth) {
+ case UCPTRIE_VALUE_BITS_16:
+ uprv_memcpy(bytes, trie->data.ptr16, trie->dataLength * 2);
+ break;
+ case UCPTRIE_VALUE_BITS_32:
+ uprv_memcpy(bytes, trie->data.ptr32, trie->dataLength * 4);
+ break;
+ case UCPTRIE_VALUE_BITS_8:
+ uprv_memcpy(bytes, trie->data.ptr8, trie->dataLength);
+ break;
+ default:
+ // unreachable
+ break;
+ }
+ return length;
+}
+
+namespace {
+
+#ifdef UCPTRIE_DEBUG
+long countNull(const UCPTrie *trie) {
+ uint32_t nullValue=trie->nullValue;
+ int32_t length=trie->dataLength;
+ long count=0;
+ switch (trie->valueWidth) {
+ case UCPTRIE_VALUE_BITS_16:
+ for(int32_t i=0; i<length; ++i) {
+ if(trie->data.ptr16[i]==nullValue) { ++count; }
+ }
+ break;
+ case UCPTRIE_VALUE_BITS_32:
+ for(int32_t i=0; i<length; ++i) {
+ if(trie->data.ptr32[i]==nullValue) { ++count; }
+ }
+ break;
+ case UCPTRIE_VALUE_BITS_8:
+ for(int32_t i=0; i<length; ++i) {
+ if(trie->data.ptr8[i]==nullValue) { ++count; }
+ }
+ break;
+ default:
+ // unreachable
+ break;
+ }
+ return count;
+}
+
+U_CFUNC void
+ucptrie_printLengths(const UCPTrie *trie, const char *which) {
+ long indexLength=trie->indexLength;
+ long dataLength=(long)trie->dataLength;
+ long totalLength=(long)sizeof(UCPTrieHeader)+indexLength*2+
+ dataLength*(trie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 2 :
+ trie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 4 : 1);
+ printf("**UCPTrieLengths(%s %s)** index:%6ld data:%6ld countNull:%6ld serialized:%6ld\n",
+ which, trie->name, indexLength, dataLength, countNull(trie), totalLength);
+}
+#endif
+
+} // namespace
+
+// UCPMap ----
+// Initially, this is the same as UCPTrie. This may well change.
+
+U_CAPI uint32_t U_EXPORT2
+ucpmap_get(const UCPMap *map, UChar32 c) {
+ return ucptrie_get(reinterpret_cast<const UCPTrie *>(map), c);
+}
+
+U_CAPI UChar32 U_EXPORT2
+ucpmap_getRange(const UCPMap *map, UChar32 start,
+ UCPMapRangeOption option, uint32_t surrogateValue,
+ UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
+ return ucptrie_getRange(reinterpret_cast<const UCPTrie *>(map), start,
+ option, surrogateValue,
+ filter, context, pValue);
+}
diff --git a/thirdparty/icu4c/common/ucptrie_impl.h b/thirdparty/icu4c/common/ucptrie_impl.h
new file mode 100644
index 0000000000..1fe6a18ac5
--- /dev/null
+++ b/thirdparty/icu4c/common/ucptrie_impl.h
@@ -0,0 +1,289 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// ucptrie_impl.h (modified from utrie2_impl.h)
+// created: 2017dec29 Markus W. Scherer
+
+#ifndef __UCPTRIE_IMPL_H__
+#define __UCPTRIE_IMPL_H__
+
+#include "unicode/ucptrie.h"
+#ifdef UCPTRIE_DEBUG
+#include "unicode/umutablecptrie.h"
+#endif
+
+// UCPTrie signature values, in platform endianness and opposite endianness.
+// The UCPTrie signature ASCII byte values spell "Tri3".
+#define UCPTRIE_SIG 0x54726933
+#define UCPTRIE_OE_SIG 0x33697254
+
+/**
+ * Header data for the binary, memory-mappable representation of a UCPTrie/CodePointTrie.
+ * @internal
+ */
+struct UCPTrieHeader {
+ /** "Tri3" in big-endian US-ASCII (0x54726933) */
+ uint32_t signature;
+
+ /**
+ * Options bit field:
+ * Bits 15..12: Data length bits 19..16.
+ * Bits 11..8: Data null block offset bits 19..16.
+ * Bits 7..6: UCPTrieType
+ * Bits 5..3: Reserved (0).
+ * Bits 2..0: UCPTrieValueWidth
+ */
+ uint16_t options;
+
+ /** Total length of the index tables. */
+ uint16_t indexLength;
+
+ /** Data length bits 15..0. */
+ uint16_t dataLength;
+
+ /** Index-3 null block offset, 0x7fff or 0xffff if none. */
+ uint16_t index3NullOffset;
+
+ /** Data null block offset bits 15..0, 0xfffff if none. */
+ uint16_t dataNullOffset;
+
+ /**
+ * First code point of the single-value range ending with U+10ffff,
+ * rounded up and then shifted right by UCPTRIE_SHIFT_2.
+ */
+ uint16_t shiftedHighStart;
+};
+
+/**
+ * Constants for use with UCPTrieHeader.options.
+ * @internal
+ */
+enum {
+ UCPTRIE_OPTIONS_DATA_LENGTH_MASK = 0xf000,
+ UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK = 0xf00,
+ UCPTRIE_OPTIONS_RESERVED_MASK = 0x38,
+ UCPTRIE_OPTIONS_VALUE_BITS_MASK = 7,
+ /**
+ * Value for index3NullOffset which indicates that there is no index-3 null block.
+ * Bit 15 is unused for this value because this bit is used if the index-3 contains
+ * 18-bit indexes.
+ */
+ UCPTRIE_NO_INDEX3_NULL_OFFSET = 0x7fff,
+ UCPTRIE_NO_DATA_NULL_OFFSET = 0xfffff
+};
+
+// Internal constants.
+enum {
+ /** The length of the BMP index table. 1024=0x400 */
+ UCPTRIE_BMP_INDEX_LENGTH = 0x10000 >> UCPTRIE_FAST_SHIFT,
+
+ UCPTRIE_SMALL_LIMIT = 0x1000,
+ UCPTRIE_SMALL_INDEX_LENGTH = UCPTRIE_SMALL_LIMIT >> UCPTRIE_FAST_SHIFT,
+
+ /** Shift size for getting the index-3 table offset. */
+ UCPTRIE_SHIFT_3 = 4,
+
+ /** Shift size for getting the index-2 table offset. */
+ UCPTRIE_SHIFT_2 = 5 + UCPTRIE_SHIFT_3,
+
+ /** Shift size for getting the index-1 table offset. */
+ UCPTRIE_SHIFT_1 = 5 + UCPTRIE_SHIFT_2,
+
+ /**
+ * Difference between two shift sizes,
+ * for getting an index-2 offset from an index-3 offset. 5=9-4
+ */
+ UCPTRIE_SHIFT_2_3 = UCPTRIE_SHIFT_2 - UCPTRIE_SHIFT_3,
+
+ /**
+ * Difference between two shift sizes,
+ * for getting an index-1 offset from an index-2 offset. 5=14-9
+ */
+ UCPTRIE_SHIFT_1_2 = UCPTRIE_SHIFT_1 - UCPTRIE_SHIFT_2,
+
+ /**
+ * Number of index-1 entries for the BMP. (4)
+ * This part of the index-1 table is omitted from the serialized form.
+ */
+ UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> UCPTRIE_SHIFT_1,
+
+ /** Number of entries in an index-2 block. 32=0x20 */
+ UCPTRIE_INDEX_2_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_1_2,
+
+ /** Mask for getting the lower bits for the in-index-2-block offset. */
+ UCPTRIE_INDEX_2_MASK = UCPTRIE_INDEX_2_BLOCK_LENGTH - 1,
+
+ /** Number of code points per index-2 table entry. 512=0x200 */
+ UCPTRIE_CP_PER_INDEX_2_ENTRY = 1 << UCPTRIE_SHIFT_2,
+
+ /** Number of entries in an index-3 block. 32=0x20 */
+ UCPTRIE_INDEX_3_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_2_3,
+
+ /** Mask for getting the lower bits for the in-index-3-block offset. */
+ UCPTRIE_INDEX_3_MASK = UCPTRIE_INDEX_3_BLOCK_LENGTH - 1,
+
+ /** Number of entries in a small data block. 16=0x10 */
+ UCPTRIE_SMALL_DATA_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_3,
+
+ /** Mask for getting the lower bits for the in-small-data-block offset. */
+ UCPTRIE_SMALL_DATA_MASK = UCPTRIE_SMALL_DATA_BLOCK_LENGTH - 1
+};
+
+typedef UChar32
+UCPTrieGetRange(const void *trie, UChar32 start,
+ UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
+
+U_CFUNC UChar32
+ucptrie_internalGetRange(UCPTrieGetRange *getRange,
+ const void *trie, UChar32 start,
+ UCPMapRangeOption option, uint32_t surrogateValue,
+ UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
+
+#ifdef UCPTRIE_DEBUG
+U_CFUNC void
+ucptrie_printLengths(const UCPTrie *trie, const char *which);
+
+U_CFUNC void umutablecptrie_setName(UMutableCPTrie *builder, const char *name);
+#endif
+
+/*
+ * Format of the binary, memory-mappable representation of a UCPTrie/CodePointTrie.
+ * For overview information see http://site.icu-project.org/design/struct/utrie
+ *
+ * The binary trie data should be 32-bit-aligned.
+ * The overall layout is:
+ *
+ * UCPTrieHeader header; -- 16 bytes, see struct definition above
+ * uint16_t index[header.indexLength];
+ * uintXY_t data[header.dataLength];
+ *
+ * The trie data array is an array of uint16_t, uint32_t, or uint8_t,
+ * specified via the UCPTrieValueWidth when building the trie.
+ * The data array is 32-bit-aligned for uint32_t, otherwise 16-bit-aligned.
+ * The overall length of the trie data is a multiple of 4 bytes.
+ * (Padding is added at the end of the index array and/or near the end of the data array as needed.)
+ *
+ * The length of the data array (dataLength) is stored as an integer split across two fields
+ * of the header struct (high bits in header.options).
+ *
+ * The trie type can be "fast" or "small" which determines the index structure,
+ * specified via the UCPTrieType when building the trie.
+ *
+ * The type and valueWidth are stored in the header.options.
+ * There are reserved type and valueWidth values, and reserved header.options bits.
+ * They could be used in future format extensions.
+ * Code reading the trie structure must fail with an error when unknown values or options are set.
+ *
+ * Values for ASCII character (U+0000..U+007F) can always be found at the start of the data array.
+ *
+ * Values for code points below a type-specific fast-indexing limit are found via two-stage lookup.
+ * For a "fast" trie, the limit is the BMP/supplementary boundary at U+10000.
+ * For a "small" trie, the limit is UCPTRIE_SMALL_MAX+1=U+1000.
+ *
+ * All code points in the range highStart..U+10FFFF map to a single highValue
+ * which is stored at the second-to-last position of the data array.
+ * (See UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET.)
+ * The highStart value is header.shiftedHighStart<<UCPTRIE_SHIFT_2.
+ * (UCPTRIE_SHIFT_2=9)
+ *
+ * Values for code points fast_limit..highStart-1 are found via four-stage lookup.
+ * The data block size is smaller for this range than for the fast range.
+ * This together with more index stages with small blocks makes this range
+ * more easily compactable.
+ *
+ * There is also a trie error value stored at the last position of the data array.
+ * (See UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET.)
+ * It is intended to be returned for inputs that are not Unicode code points
+ * (outside U+0000..U+10FFFF), or in string processing for ill-formed input
+ * (unpaired surrogate in UTF-16, ill-formed UTF-8 subsequence).
+ *
+ * For a "fast" trie:
+ *
+ * The index array starts with the BMP index table for BMP code point lookup.
+ * Its length is 1024=0x400.
+ *
+ * The supplementary index-1 table follows the BMP index table.
+ * Variable length, for code points up to highStart-1.
+ * Maximum length 64=0x40=0x100000>>UCPTRIE_SHIFT_1.
+ * (For 0x100000 supplementary code points U+10000..U+10ffff.)
+ *
+ * After this index-1 table follow the variable-length index-3 and index-2 tables.
+ *
+ * The supplementary index tables are omitted completely
+ * if there is only BMP data (highStart<=U+10000).
+ *
+ * For a "small" trie:
+ *
+ * The index array starts with a fast-index table for lookup of code points U+0000..U+0FFF.
+ *
+ * The "supplementary" index tables are always stored.
+ * The index-1 table starts from U+0000, its maximum length is 68=0x44=0x110000>>UCPTRIE_SHIFT_1.
+ *
+ * For both trie types:
+ *
+ * The last index-2 block may be a partial block, storing indexes only for code points
+ * below highStart.
+ *
+ * Lookup for ASCII code point c:
+ *
+ * Linear access from the start of the data array.
+ *
+ * value = data[c];
+ *
+ * Lookup for fast-range code point c:
+ *
+ * Shift the code point right by UCPTRIE_FAST_SHIFT=6 bits,
+ * fetch the index array value at that offset,
+ * add the lower code point bits, index into the data array.
+ *
+ * value = data[index[c>>6] + (c&0x3f)];
+ *
+ * (This works for ASCII as well.)
+ *
+ * Lookup for small-range code point c below highStart:
+ *
+ * Split the code point into four bit fields using several sets of shifts & masks
+ * to read consecutive values from the index-1, index-2, index-3 and data tables.
+ *
+ * If all of the data block offsets in an index-3 block fit within 16 bits (up to 0xffff),
+ * then the data block offsets are stored directly as uint16_t.
+ *
+ * Otherwise (this is very unusual but possible), the index-2 entry for the index-3 block
+ * has bit 15 (0x8000) set, and each set of 8 index-3 entries is preceded by
+ * an additional uint16_t word. Data block offsets are 18 bits wide, with the top 2 bits stored
+ * in the additional word.
+ *
+ * See ucptrie_internalSmallIndex() for details.
+ *
+ * (In a "small" trie, this works for ASCII and below-fast_limit code points as well.)
+ *
+ * Compaction:
+ *
+ * Multiple code point ranges ("blocks") that are aligned on certain boundaries
+ * (determined by the shifting/bit fields of code points) and
+ * map to the same data values normally share a single subsequence of the data array.
+ * Data blocks can also overlap partially.
+ * (Depending on the builder code finding duplicate and overlapping blocks.)
+ *
+ * Iteration over same-value ranges:
+ *
+ * Range iteration (ucptrie_getRange()) walks the structure from a start code point
+ * until some code point is found that maps to a different value;
+ * the end of the returned range is just before that.
+ *
+ * The header.dataNullOffset (split across two header fields, high bits in header.options)
+ * is the offset of a widely shared data block filled with one single value.
+ * It helps quickly skip over large ranges of data with that value.
+ * The builder must ensure that if the start of any data block (fast or small)
+ * matches the dataNullOffset, then the whole block must be filled with the null value.
+ * Special care must be taken if there is no fast null data block
+ * but a small one, which is shorter, and it matches the *start* of some fast data block.
+ *
+ * Similarly, the header.index3NullOffset is the index-array offset of an index-3 block
+ * where all index entries point to the dataNullOffset.
+ * If there is no such data or index-3 block, then these offsets are set to
+ * values that cannot be reached (data offset out of range/reserved index offset),
+ * normally UCPTRIE_NO_DATA_NULL_OFFSET or UCPTRIE_NO_INDEX3_NULL_OFFSET respectively.
+ */
+
+#endif
diff --git a/thirdparty/icu4c/common/ucurr.cpp b/thirdparty/icu4c/common/ucurr.cpp
new file mode 100644
index 0000000000..0e14cddcff
--- /dev/null
+++ b/thirdparty/icu4c/common/ucurr.cpp
@@ -0,0 +1,2701 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2002-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/ucurr.h"
+#include "unicode/locid.h"
+#include "unicode/ures.h"
+#include "unicode/ustring.h"
+#include "unicode/parsepos.h"
+#include "unicode/uniset.h"
+#include "unicode/usetiter.h"
+#include "unicode/utf16.h"
+#include "ustr_imp.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "static_unicode_sets.h"
+#include "uassert.h"
+#include "umutex.h"
+#include "ucln_cmn.h"
+#include "uenumimp.h"
+#include "uhash.h"
+#include "hash.h"
+#include "uinvchar.h"
+#include "uresimp.h"
+#include "ulist.h"
+#include "uresimp.h"
+#include "ureslocs.h"
+#include "ulocimp.h"
+
+using namespace icu;
+
+//#define UCURR_DEBUG_EQUIV 1
+#ifdef UCURR_DEBUG_EQUIV
+#include "stdio.h"
+#endif
+//#define UCURR_DEBUG 1
+#ifdef UCURR_DEBUG
+#include "stdio.h"
+#endif
+
+typedef struct IsoCodeEntry {
+ const UChar *isoCode; /* const because it's a reference to a resource bundle string. */
+ UDate from;
+ UDate to;
+} IsoCodeEntry;
+
+//------------------------------------------------------------
+// Constants
+
+// Default currency meta data of last resort. We try to use the
+// defaults encoded in the meta data resource bundle. If there is a
+// configuration/build error and these are not available, we use these
+// hard-coded defaults (which should be identical).
+static const int32_t LAST_RESORT_DATA[] = { 2, 0, 2, 0 };
+
+// POW10[i] = 10^i, i=0..MAX_POW10
+static const int32_t POW10[] = { 1, 10, 100, 1000, 10000, 100000,
+ 1000000, 10000000, 100000000, 1000000000 };
+
+static const int32_t MAX_POW10 = UPRV_LENGTHOF(POW10) - 1;
+
+#define ISO_CURRENCY_CODE_LENGTH 3
+
+//------------------------------------------------------------
+// Resource tags
+//
+
+static const char CURRENCY_DATA[] = "supplementalData";
+// Tag for meta-data, in root.
+static const char CURRENCY_META[] = "CurrencyMeta";
+
+// Tag for map from countries to currencies, in root.
+static const char CURRENCY_MAP[] = "CurrencyMap";
+
+// Tag for default meta-data, in CURRENCY_META
+static const char DEFAULT_META[] = "DEFAULT";
+
+// Variant delimiter
+static const char VAR_DELIM = '_';
+
+// Tag for localized display names (symbols) of currencies
+static const char CURRENCIES[] = "Currencies";
+static const char CURRENCIES_NARROW[] = "Currencies%narrow";
+static const char CURRENCIES_FORMAL[] = "Currencies%formal";
+static const char CURRENCIES_VARIANT[] = "Currencies%variant";
+static const char CURRENCYPLURALS[] = "CurrencyPlurals";
+
+// ISO codes mapping table
+static const UHashtable* gIsoCodes = NULL;
+static icu::UInitOnce gIsoCodesInitOnce = U_INITONCE_INITIALIZER;
+
+// Currency symbol equivalances
+static const icu::Hashtable* gCurrSymbolsEquiv = NULL;
+static icu::UInitOnce gCurrSymbolsEquivInitOnce = U_INITONCE_INITIALIZER;
+
+U_NAMESPACE_BEGIN
+
+// EquivIterator iterates over all strings that are equivalent to a given
+// string, s. Note that EquivIterator will never yield s itself.
+class EquivIterator : public icu::UMemory {
+public:
+ // Constructor. hash stores the equivalence relationships; s is the string
+ // for which we find equivalent strings.
+ inline EquivIterator(const icu::Hashtable& hash, const icu::UnicodeString& s)
+ : _hash(hash) {
+ _start = _current = &s;
+ }
+ inline ~EquivIterator() { }
+
+ // next returns the next equivalent string or NULL if there are no more.
+ // If s has no equivalent strings, next returns NULL on the first call.
+ const icu::UnicodeString *next();
+private:
+ const icu::Hashtable& _hash;
+ const icu::UnicodeString* _start;
+ const icu::UnicodeString* _current;
+};
+
+const icu::UnicodeString *
+EquivIterator::next() {
+ const icu::UnicodeString* _next = (const icu::UnicodeString*) _hash.get(*_current);
+ if (_next == NULL) {
+ U_ASSERT(_current == _start);
+ return NULL;
+ }
+ if (*_next == *_start) {
+ return NULL;
+ }
+ _current = _next;
+ return _next;
+}
+
+U_NAMESPACE_END
+
+// makeEquivalent makes lhs and rhs equivalent by updating the equivalence
+// relations in hash accordingly.
+static void makeEquivalent(
+ const icu::UnicodeString &lhs,
+ const icu::UnicodeString &rhs,
+ icu::Hashtable* hash, UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ if (lhs == rhs) {
+ // already equivalent
+ return;
+ }
+ icu::EquivIterator leftIter(*hash, lhs);
+ icu::EquivIterator rightIter(*hash, rhs);
+ const icu::UnicodeString *firstLeft = leftIter.next();
+ const icu::UnicodeString *firstRight = rightIter.next();
+ const icu::UnicodeString *nextLeft = firstLeft;
+ const icu::UnicodeString *nextRight = firstRight;
+ while (nextLeft != NULL && nextRight != NULL) {
+ if (*nextLeft == rhs || *nextRight == lhs) {
+ // Already equivalent
+ return;
+ }
+ nextLeft = leftIter.next();
+ nextRight = rightIter.next();
+ }
+ // Not equivalent. Must join.
+ icu::UnicodeString *newFirstLeft;
+ icu::UnicodeString *newFirstRight;
+ if (firstRight == NULL && firstLeft == NULL) {
+ // Neither lhs or rhs belong to an equivalence circle, so we form
+ // a new equivalnce circle of just lhs and rhs.
+ newFirstLeft = new icu::UnicodeString(rhs);
+ newFirstRight = new icu::UnicodeString(lhs);
+ } else if (firstRight == NULL) {
+ // lhs belongs to an equivalence circle, but rhs does not, so we link
+ // rhs into lhs' circle.
+ newFirstLeft = new icu::UnicodeString(rhs);
+ newFirstRight = new icu::UnicodeString(*firstLeft);
+ } else if (firstLeft == NULL) {
+ // rhs belongs to an equivlance circle, but lhs does not, so we link
+ // lhs into rhs' circle.
+ newFirstLeft = new icu::UnicodeString(*firstRight);
+ newFirstRight = new icu::UnicodeString(lhs);
+ } else {
+ // Both lhs and rhs belong to different equivalnce circles. We link
+ // them together to form one single, larger equivalnce circle.
+ newFirstLeft = new icu::UnicodeString(*firstRight);
+ newFirstRight = new icu::UnicodeString(*firstLeft);
+ }
+ if (newFirstLeft == NULL || newFirstRight == NULL) {
+ delete newFirstLeft;
+ delete newFirstRight;
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ hash->put(lhs, (void *) newFirstLeft, status);
+ hash->put(rhs, (void *) newFirstRight, status);
+}
+
+// countEquivalent counts how many strings are equivalent to s.
+// hash stores all the equivalnce relations.
+// countEquivalent does not include s itself in the count.
+static int32_t countEquivalent(const icu::Hashtable &hash, const icu::UnicodeString &s) {
+ int32_t result = 0;
+ icu::EquivIterator iter(hash, s);
+ while (iter.next() != NULL) {
+ ++result;
+ }
+#ifdef UCURR_DEBUG_EQUIV
+ {
+ char tmp[200];
+ s.extract(0,s.length(),tmp, "UTF-8");
+ printf("CountEquivalent('%s') = %d\n", tmp, result);
+ }
+#endif
+ return result;
+}
+
+static const icu::Hashtable* getCurrSymbolsEquiv();
+
+//------------------------------------------------------------
+// Code
+
+/**
+ * Cleanup callback func
+ */
+static UBool U_CALLCONV
+isoCodes_cleanup(void)
+{
+ if (gIsoCodes != NULL) {
+ uhash_close(const_cast<UHashtable *>(gIsoCodes));
+ gIsoCodes = NULL;
+ }
+ gIsoCodesInitOnce.reset();
+ return TRUE;
+}
+
+/**
+ * Cleanup callback func
+ */
+static UBool U_CALLCONV
+currSymbolsEquiv_cleanup(void)
+{
+ delete const_cast<icu::Hashtable *>(gCurrSymbolsEquiv);
+ gCurrSymbolsEquiv = NULL;
+ gCurrSymbolsEquivInitOnce.reset();
+ return TRUE;
+}
+
+/**
+ * Deleter for OlsonToMetaMappingEntry
+ */
+static void U_CALLCONV
+deleteIsoCodeEntry(void *obj) {
+ IsoCodeEntry *entry = (IsoCodeEntry*)obj;
+ uprv_free(entry);
+}
+
+/**
+ * Deleter for gCurrSymbolsEquiv.
+ */
+static void U_CALLCONV
+deleteUnicode(void *obj) {
+ icu::UnicodeString *entry = (icu::UnicodeString*)obj;
+ delete entry;
+}
+
+/**
+ * Unfortunately, we have to convert the UChar* currency code to char*
+ * to use it as a resource key.
+ */
+static inline char*
+myUCharsToChars(char* resultOfLen4, const UChar* currency) {
+ u_UCharsToChars(currency, resultOfLen4, ISO_CURRENCY_CODE_LENGTH);
+ resultOfLen4[ISO_CURRENCY_CODE_LENGTH] = 0;
+ return resultOfLen4;
+}
+
+/**
+ * Internal function to look up currency data. Result is an array of
+ * four integers. The first is the fraction digits. The second is the
+ * rounding increment, or 0 if none. The rounding increment is in
+ * units of 10^(-fraction_digits). The third and fourth are the same
+ * except that they are those used in cash transations ( cashDigits
+ * and cashRounding ).
+ */
+static const int32_t*
+_findMetaData(const UChar* currency, UErrorCode& ec) {
+
+ if (currency == 0 || *currency == 0) {
+ if (U_SUCCESS(ec)) {
+ ec = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return LAST_RESORT_DATA;
+ }
+
+ // Get CurrencyMeta resource out of root locale file. [This may
+ // move out of the root locale file later; if it does, update this
+ // code.]
+ UResourceBundle* currencyData = ures_openDirect(U_ICUDATA_CURR, CURRENCY_DATA, &ec);
+ UResourceBundle* currencyMeta = ures_getByKey(currencyData, CURRENCY_META, currencyData, &ec);
+
+ if (U_FAILURE(ec)) {
+ ures_close(currencyMeta);
+ // Config/build error; return hard-coded defaults
+ return LAST_RESORT_DATA;
+ }
+
+ // Look up our currency, or if that's not available, then DEFAULT
+ char buf[ISO_CURRENCY_CODE_LENGTH+1];
+ UErrorCode ec2 = U_ZERO_ERROR; // local error code: soft failure
+ UResourceBundle* rb = ures_getByKey(currencyMeta, myUCharsToChars(buf, currency), NULL, &ec2);
+ if (U_FAILURE(ec2)) {
+ ures_close(rb);
+ rb = ures_getByKey(currencyMeta,DEFAULT_META, NULL, &ec);
+ if (U_FAILURE(ec)) {
+ ures_close(currencyMeta);
+ ures_close(rb);
+ // Config/build error; return hard-coded defaults
+ return LAST_RESORT_DATA;
+ }
+ }
+
+ int32_t len;
+ const int32_t *data = ures_getIntVector(rb, &len, &ec);
+ if (U_FAILURE(ec) || len != 4) {
+ // Config/build error; return hard-coded defaults
+ if (U_SUCCESS(ec)) {
+ ec = U_INVALID_FORMAT_ERROR;
+ }
+ ures_close(currencyMeta);
+ ures_close(rb);
+ return LAST_RESORT_DATA;
+ }
+
+ ures_close(currencyMeta);
+ ures_close(rb);
+ return data;
+}
+
+// -------------------------------------
+
+static void
+idForLocale(const char* locale, char* countryAndVariant, int capacity, UErrorCode* ec)
+{
+ ulocimp_getRegionForSupplementalData(locale, FALSE, countryAndVariant, capacity, ec);
+}
+
+// ------------------------------------------
+//
+// Registration
+//
+//-------------------------------------------
+
+// don't use ICUService since we don't need fallback
+
+U_CDECL_BEGIN
+static UBool U_CALLCONV currency_cleanup(void);
+U_CDECL_END
+
+#if !UCONFIG_NO_SERVICE
+struct CReg;
+
+static UMutex gCRegLock;
+static CReg* gCRegHead = 0;
+
+struct CReg : public icu::UMemory {
+ CReg *next;
+ UChar iso[ISO_CURRENCY_CODE_LENGTH+1];
+ char id[ULOC_FULLNAME_CAPACITY];
+
+ CReg(const UChar* _iso, const char* _id)
+ : next(0)
+ {
+ int32_t len = (int32_t)uprv_strlen(_id);
+ if (len > (int32_t)(sizeof(id)-1)) {
+ len = (sizeof(id)-1);
+ }
+ uprv_strncpy(id, _id, len);
+ id[len] = 0;
+ u_memcpy(iso, _iso, ISO_CURRENCY_CODE_LENGTH);
+ iso[ISO_CURRENCY_CODE_LENGTH] = 0;
+ }
+
+ static UCurrRegistryKey reg(const UChar* _iso, const char* _id, UErrorCode* status)
+ {
+ if (status && U_SUCCESS(*status) && _iso && _id) {
+ CReg* n = new CReg(_iso, _id);
+ if (n) {
+ umtx_lock(&gCRegLock);
+ if (!gCRegHead) {
+ /* register for the first time */
+ ucln_common_registerCleanup(UCLN_COMMON_CURRENCY, currency_cleanup);
+ }
+ n->next = gCRegHead;
+ gCRegHead = n;
+ umtx_unlock(&gCRegLock);
+ return n;
+ }
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ return 0;
+ }
+
+ static UBool unreg(UCurrRegistryKey key) {
+ UBool found = FALSE;
+ umtx_lock(&gCRegLock);
+
+ CReg** p = &gCRegHead;
+ while (*p) {
+ if (*p == key) {
+ *p = ((CReg*)key)->next;
+ delete (CReg*)key;
+ found = TRUE;
+ break;
+ }
+ p = &((*p)->next);
+ }
+
+ umtx_unlock(&gCRegLock);
+ return found;
+ }
+
+ static const UChar* get(const char* id) {
+ const UChar* result = NULL;
+ umtx_lock(&gCRegLock);
+ CReg* p = gCRegHead;
+
+ /* register cleanup of the mutex */
+ ucln_common_registerCleanup(UCLN_COMMON_CURRENCY, currency_cleanup);
+ while (p) {
+ if (uprv_strcmp(id, p->id) == 0) {
+ result = p->iso;
+ break;
+ }
+ p = p->next;
+ }
+ umtx_unlock(&gCRegLock);
+ return result;
+ }
+
+ /* This doesn't need to be thread safe. It's for u_cleanup only. */
+ static void cleanup(void) {
+ while (gCRegHead) {
+ CReg* n = gCRegHead;
+ gCRegHead = gCRegHead->next;
+ delete n;
+ }
+ }
+};
+
+// -------------------------------------
+
+U_CAPI UCurrRegistryKey U_EXPORT2
+ucurr_register(const UChar* isoCode, const char* locale, UErrorCode *status)
+{
+ if (status && U_SUCCESS(*status)) {
+ char id[ULOC_FULLNAME_CAPACITY];
+ idForLocale(locale, id, sizeof(id), status);
+ return CReg::reg(isoCode, id, status);
+ }
+ return NULL;
+}
+
+// -------------------------------------
+
+U_CAPI UBool U_EXPORT2
+ucurr_unregister(UCurrRegistryKey key, UErrorCode* status)
+{
+ if (status && U_SUCCESS(*status)) {
+ return CReg::unreg(key);
+ }
+ return FALSE;
+}
+#endif /* UCONFIG_NO_SERVICE */
+
+// -------------------------------------
+
+/**
+ * Release all static memory held by currency.
+ */
+/*The declaration here is needed so currency_cleanup(void)
+ * can call this function.
+ */
+static UBool U_CALLCONV
+currency_cache_cleanup(void);
+
+U_CDECL_BEGIN
+static UBool U_CALLCONV currency_cleanup(void) {
+#if !UCONFIG_NO_SERVICE
+ CReg::cleanup();
+#endif
+ /*
+ * There might be some cached currency data or isoCodes data.
+ */
+ currency_cache_cleanup();
+ isoCodes_cleanup();
+ currSymbolsEquiv_cleanup();
+
+ return TRUE;
+}
+U_CDECL_END
+
+// -------------------------------------
+
+U_CAPI int32_t U_EXPORT2
+ucurr_forLocale(const char* locale,
+ UChar* buff,
+ int32_t buffCapacity,
+ UErrorCode* ec) {
+ if (U_FAILURE(*ec)) { return 0; }
+ if (buffCapacity < 0 || (buff == nullptr && buffCapacity > 0)) {
+ *ec = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ char currency[4]; // ISO currency codes are alpha3 codes.
+ UErrorCode localStatus = U_ZERO_ERROR;
+ int32_t resLen = uloc_getKeywordValue(locale, "currency",
+ currency, UPRV_LENGTHOF(currency), &localStatus);
+ if (U_SUCCESS(localStatus) && resLen == 3 && uprv_isInvariantString(currency, resLen)) {
+ if (resLen < buffCapacity) {
+ T_CString_toUpperCase(currency);
+ u_charsToUChars(currency, buff, resLen);
+ }
+ return u_terminateUChars(buff, buffCapacity, resLen, ec);
+ }
+
+ // get country or country_variant in `id'
+ char id[ULOC_FULLNAME_CAPACITY];
+ idForLocale(locale, id, UPRV_LENGTHOF(id), ec);
+ if (U_FAILURE(*ec)) {
+ return 0;
+ }
+
+#if !UCONFIG_NO_SERVICE
+ const UChar* result = CReg::get(id);
+ if (result) {
+ if(buffCapacity > u_strlen(result)) {
+ u_strcpy(buff, result);
+ }
+ resLen = u_strlen(result);
+ return u_terminateUChars(buff, buffCapacity, resLen, ec);
+ }
+#endif
+ // Remove variants, which is only needed for registration.
+ char *idDelim = uprv_strchr(id, VAR_DELIM);
+ if (idDelim) {
+ idDelim[0] = 0;
+ }
+
+ const UChar* s = NULL; // Currency code from data file.
+ if (id[0] == 0) {
+ // No point looking in the data for an empty string.
+ // This is what we would get.
+ localStatus = U_MISSING_RESOURCE_ERROR;
+ } else {
+ // Look up the CurrencyMap element in the root bundle.
+ localStatus = U_ZERO_ERROR;
+ UResourceBundle *rb = ures_openDirect(U_ICUDATA_CURR, CURRENCY_DATA, &localStatus);
+ UResourceBundle *cm = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus);
+ UResourceBundle *countryArray = ures_getByKey(rb, id, cm, &localStatus);
+ UResourceBundle *currencyReq = ures_getByIndex(countryArray, 0, NULL, &localStatus);
+ s = ures_getStringByKey(currencyReq, "id", &resLen, &localStatus);
+ ures_close(currencyReq);
+ ures_close(countryArray);
+ }
+
+ if ((U_FAILURE(localStatus)) && strchr(id, '_') != 0) {
+ // We don't know about it. Check to see if we support the variant.
+ uloc_getParent(locale, id, UPRV_LENGTHOF(id), ec);
+ *ec = U_USING_FALLBACK_WARNING;
+ // TODO: Loop over the shortened id rather than recursing and
+ // looking again for a currency keyword.
+ return ucurr_forLocale(id, buff, buffCapacity, ec);
+ }
+ if (*ec == U_ZERO_ERROR || localStatus != U_ZERO_ERROR) {
+ // There is nothing to fallback to. Report the failure/warning if possible.
+ *ec = localStatus;
+ }
+ if (U_SUCCESS(*ec)) {
+ if(buffCapacity > resLen) {
+ u_strcpy(buff, s);
+ }
+ }
+ return u_terminateUChars(buff, buffCapacity, resLen, ec);
+}
+
+// end registration
+
+/**
+ * Modify the given locale name by removing the rightmost _-delimited
+ * element. If there is none, empty the string ("" == root).
+ * NOTE: The string "root" is not recognized; do not use it.
+ * @return TRUE if the fallback happened; FALSE if locale is already
+ * root ("").
+ */
+static UBool fallback(char *loc) {
+ if (!*loc) {
+ return FALSE;
+ }
+ UErrorCode status = U_ZERO_ERROR;
+ if (uprv_strcmp(loc, "en_GB") == 0) {
+ // HACK: See #13368. We need "en_GB" to fall back to "en_001" instead of "en"
+ // in order to consume the correct data strings. This hack will be removed
+ // when proper data sink loading is implemented here.
+ // NOTE: "001" adds 1 char over "GB". However, both call sites allocate
+ // arrays with length ULOC_FULLNAME_CAPACITY (plenty of room for en_001).
+ uprv_strcpy(loc + 3, "001");
+ } else {
+ uloc_getParent(loc, loc, (int32_t)uprv_strlen(loc), &status);
+ }
+ /*
+ char *i = uprv_strrchr(loc, '_');
+ if (i == NULL) {
+ i = loc;
+ }
+ *i = 0;
+ */
+ return TRUE;
+}
+
+
+U_CAPI const UChar* U_EXPORT2
+ucurr_getName(const UChar* currency,
+ const char* locale,
+ UCurrNameStyle nameStyle,
+ UBool* isChoiceFormat, // fillin
+ int32_t* len, // fillin
+ UErrorCode* ec) {
+
+ // Look up the Currencies resource for the given locale. The
+ // Currencies locale data looks like this:
+ //|en {
+ //| Currencies {
+ //| USD { "US$", "US Dollar" }
+ //| CHF { "Sw F", "Swiss Franc" }
+ //| INR { "=0#Rs|1#Re|1<Rs", "=0#Rupees|1#Rupee|1<Rupees" }
+ //| //...
+ //| }
+ //|}
+
+ if (U_FAILURE(*ec)) {
+ return 0;
+ }
+
+ int32_t choice = (int32_t) nameStyle;
+ if (choice < 0 || choice > 4) {
+ *ec = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ // In the future, resource bundles may implement multi-level
+ // fallback. That is, if a currency is not found in the en_US
+ // Currencies data, then the en Currencies data will be searched.
+ // Currently, if a Currencies datum exists in en_US and en, the
+ // en_US entry hides that in en.
+
+ // We want multi-level fallback for this resource, so we implement
+ // it manually.
+
+ // Use a separate UErrorCode here that does not propagate out of
+ // this function.
+ UErrorCode ec2 = U_ZERO_ERROR;
+
+ char loc[ULOC_FULLNAME_CAPACITY];
+ uloc_getName(locale, loc, sizeof(loc), &ec2);
+ if (U_FAILURE(ec2) || ec2 == U_STRING_NOT_TERMINATED_WARNING) {
+ *ec = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ char buf[ISO_CURRENCY_CODE_LENGTH+1];
+ myUCharsToChars(buf, currency);
+
+ /* Normalize the keyword value to uppercase */
+ T_CString_toUpperCase(buf);
+
+ const UChar* s = NULL;
+ ec2 = U_ZERO_ERROR;
+ LocalUResourceBundlePointer rb(ures_open(U_ICUDATA_CURR, loc, &ec2));
+
+ if (nameStyle == UCURR_NARROW_SYMBOL_NAME || nameStyle == UCURR_FORMAL_SYMBOL_NAME || nameStyle == UCURR_VARIANT_SYMBOL_NAME) {
+ CharString key;
+ switch (nameStyle) {
+ case UCURR_NARROW_SYMBOL_NAME:
+ key.append(CURRENCIES_NARROW, ec2);
+ break;
+ case UCURR_FORMAL_SYMBOL_NAME:
+ key.append(CURRENCIES_FORMAL, ec2);
+ break;
+ case UCURR_VARIANT_SYMBOL_NAME:
+ key.append(CURRENCIES_VARIANT, ec2);
+ break;
+ default:
+ *ec = U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+ key.append("/", ec2);
+ key.append(buf, ec2);
+ s = ures_getStringByKeyWithFallback(rb.getAlias(), key.data(), len, &ec2);
+ if (ec2 == U_MISSING_RESOURCE_ERROR) {
+ *ec = U_USING_FALLBACK_WARNING;
+ ec2 = U_ZERO_ERROR;
+ choice = UCURR_SYMBOL_NAME;
+ }
+ }
+ if (s == NULL) {
+ ures_getByKey(rb.getAlias(), CURRENCIES, rb.getAlias(), &ec2);
+ ures_getByKeyWithFallback(rb.getAlias(), buf, rb.getAlias(), &ec2);
+ s = ures_getStringByIndex(rb.getAlias(), choice, len, &ec2);
+ }
+
+ // If we've succeeded we're done. Otherwise, try to fallback.
+ // If that fails (because we are already at root) then exit.
+ if (U_SUCCESS(ec2)) {
+ if (ec2 == U_USING_DEFAULT_WARNING
+ || (ec2 == U_USING_FALLBACK_WARNING && *ec != U_USING_DEFAULT_WARNING)) {
+ *ec = ec2;
+ }
+ }
+
+ // We no longer support choice format data in names. Data should not contain
+ // choice patterns.
+ if (isChoiceFormat != NULL) {
+ *isChoiceFormat = FALSE;
+ }
+ if (U_SUCCESS(ec2)) {
+ U_ASSERT(s != NULL);
+ return s;
+ }
+
+ // If we fail to find a match, use the ISO 4217 code
+ *len = u_strlen(currency); // Should == ISO_CURRENCY_CODE_LENGTH, but maybe not...?
+ *ec = U_USING_DEFAULT_WARNING;
+ return currency;
+}
+
+U_CAPI const UChar* U_EXPORT2
+ucurr_getPluralName(const UChar* currency,
+ const char* locale,
+ UBool* isChoiceFormat,
+ const char* pluralCount,
+ int32_t* len, // fillin
+ UErrorCode* ec) {
+ // Look up the Currencies resource for the given locale. The
+ // Currencies locale data looks like this:
+ //|en {
+ //| CurrencyPlurals {
+ //| USD{
+ //| one{"US dollar"}
+ //| other{"US dollars"}
+ //| }
+ //| }
+ //|}
+
+ if (U_FAILURE(*ec)) {
+ return 0;
+ }
+
+ // Use a separate UErrorCode here that does not propagate out of
+ // this function.
+ UErrorCode ec2 = U_ZERO_ERROR;
+
+ char loc[ULOC_FULLNAME_CAPACITY];
+ uloc_getName(locale, loc, sizeof(loc), &ec2);
+ if (U_FAILURE(ec2) || ec2 == U_STRING_NOT_TERMINATED_WARNING) {
+ *ec = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ char buf[ISO_CURRENCY_CODE_LENGTH+1];
+ myUCharsToChars(buf, currency);
+
+ const UChar* s = NULL;
+ ec2 = U_ZERO_ERROR;
+ UResourceBundle* rb = ures_open(U_ICUDATA_CURR, loc, &ec2);
+
+ rb = ures_getByKey(rb, CURRENCYPLURALS, rb, &ec2);
+
+ // Fetch resource with multi-level resource inheritance fallback
+ rb = ures_getByKeyWithFallback(rb, buf, rb, &ec2);
+
+ s = ures_getStringByKeyWithFallback(rb, pluralCount, len, &ec2);
+ if (U_FAILURE(ec2)) {
+ // fall back to "other"
+ ec2 = U_ZERO_ERROR;
+ s = ures_getStringByKeyWithFallback(rb, "other", len, &ec2);
+ if (U_FAILURE(ec2)) {
+ ures_close(rb);
+ // fall back to long name in Currencies
+ return ucurr_getName(currency, locale, UCURR_LONG_NAME,
+ isChoiceFormat, len, ec);
+ }
+ }
+ ures_close(rb);
+
+ // If we've succeeded we're done. Otherwise, try to fallback.
+ // If that fails (because we are already at root) then exit.
+ if (U_SUCCESS(ec2)) {
+ if (ec2 == U_USING_DEFAULT_WARNING
+ || (ec2 == U_USING_FALLBACK_WARNING && *ec != U_USING_DEFAULT_WARNING)) {
+ *ec = ec2;
+ }
+ U_ASSERT(s != NULL);
+ return s;
+ }
+
+ // If we fail to find a match, use the ISO 4217 code
+ *len = u_strlen(currency); // Should == ISO_CURRENCY_CODE_LENGTH, but maybe not...?
+ *ec = U_USING_DEFAULT_WARNING;
+ return currency;
+}
+
+
+//========================================================================
+// Following are structure and function for parsing currency names
+
+#define NEED_TO_BE_DELETED 0x1
+
+// TODO: a better way to define this?
+#define MAX_CURRENCY_NAME_LEN 100
+
+typedef struct {
+ const char* IsoCode; // key
+ UChar* currencyName; // value
+ int32_t currencyNameLen; // value length
+ int32_t flag; // flags
+} CurrencyNameStruct;
+
+
+#ifndef MIN
+#define MIN(a,b) (((a)<(b)) ? (a) : (b))
+#endif
+
+#ifndef MAX
+#define MAX(a,b) (((a)<(b)) ? (b) : (a))
+#endif
+
+
+// Comparason function used in quick sort.
+static int U_CALLCONV currencyNameComparator(const void* a, const void* b) {
+ const CurrencyNameStruct* currName_1 = (const CurrencyNameStruct*)a;
+ const CurrencyNameStruct* currName_2 = (const CurrencyNameStruct*)b;
+ for (int32_t i = 0;
+ i < MIN(currName_1->currencyNameLen, currName_2->currencyNameLen);
+ ++i) {
+ if (currName_1->currencyName[i] < currName_2->currencyName[i]) {
+ return -1;
+ }
+ if (currName_1->currencyName[i] > currName_2->currencyName[i]) {
+ return 1;
+ }
+ }
+ if (currName_1->currencyNameLen < currName_2->currencyNameLen) {
+ return -1;
+ } else if (currName_1->currencyNameLen > currName_2->currencyNameLen) {
+ return 1;
+ }
+ return 0;
+}
+
+
+// Give a locale, return the maximum number of currency names associated with
+// this locale.
+// It gets currency names from resource bundles using fallback.
+// It is the maximum number because in the fallback chain, some of the
+// currency names are duplicated.
+// For example, given locale as "en_US", the currency names get from resource
+// bundle in "en_US" and "en" are duplicated. The fallback mechanism will count
+// all currency names in "en_US" and "en".
+static void
+getCurrencyNameCount(const char* loc, int32_t* total_currency_name_count, int32_t* total_currency_symbol_count) {
+ U_NAMESPACE_USE
+ *total_currency_name_count = 0;
+ *total_currency_symbol_count = 0;
+ const UChar* s = NULL;
+ char locale[ULOC_FULLNAME_CAPACITY] = "";
+ uprv_strcpy(locale, loc);
+ const icu::Hashtable *currencySymbolsEquiv = getCurrSymbolsEquiv();
+ for (;;) {
+ UErrorCode ec2 = U_ZERO_ERROR;
+ // TODO: ures_openDirect?
+ UResourceBundle* rb = ures_open(U_ICUDATA_CURR, locale, &ec2);
+ UResourceBundle* curr = ures_getByKey(rb, CURRENCIES, NULL, &ec2);
+ int32_t n = ures_getSize(curr);
+ for (int32_t i=0; i<n; ++i) {
+ UResourceBundle* names = ures_getByIndex(curr, i, NULL, &ec2);
+ int32_t len;
+ s = ures_getStringByIndex(names, UCURR_SYMBOL_NAME, &len, &ec2);
+ ++(*total_currency_symbol_count); // currency symbol
+ if (currencySymbolsEquiv != NULL) {
+ *total_currency_symbol_count += countEquivalent(*currencySymbolsEquiv, UnicodeString(TRUE, s, len));
+ }
+ ++(*total_currency_symbol_count); // iso code
+ ++(*total_currency_name_count); // long name
+ ures_close(names);
+ }
+
+ // currency plurals
+ UErrorCode ec3 = U_ZERO_ERROR;
+ UResourceBundle* curr_p = ures_getByKey(rb, CURRENCYPLURALS, NULL, &ec3);
+ n = ures_getSize(curr_p);
+ for (int32_t i=0; i<n; ++i) {
+ UResourceBundle* names = ures_getByIndex(curr_p, i, NULL, &ec3);
+ *total_currency_name_count += ures_getSize(names);
+ ures_close(names);
+ }
+ ures_close(curr_p);
+ ures_close(curr);
+ ures_close(rb);
+
+ if (!fallback(locale)) {
+ break;
+ }
+ }
+}
+
+static UChar*
+toUpperCase(const UChar* source, int32_t len, const char* locale) {
+ UChar* dest = NULL;
+ UErrorCode ec = U_ZERO_ERROR;
+ int32_t destLen = u_strToUpper(dest, 0, source, len, locale, &ec);
+
+ ec = U_ZERO_ERROR;
+ dest = (UChar*)uprv_malloc(sizeof(UChar) * MAX(destLen, len));
+ u_strToUpper(dest, destLen, source, len, locale, &ec);
+ if (U_FAILURE(ec)) {
+ u_memcpy(dest, source, len);
+ }
+ return dest;
+}
+
+
+// Collect all available currency names associated with the given locale
+// (enable fallback chain).
+// Read currenc names defined in resource bundle "Currencies" and
+// "CurrencyPlural", enable fallback chain.
+// return the malloc-ed currency name arrays and the total number of currency
+// names in the array.
+static void
+collectCurrencyNames(const char* locale,
+ CurrencyNameStruct** currencyNames,
+ int32_t* total_currency_name_count,
+ CurrencyNameStruct** currencySymbols,
+ int32_t* total_currency_symbol_count,
+ UErrorCode& ec) {
+ U_NAMESPACE_USE
+ const icu::Hashtable *currencySymbolsEquiv = getCurrSymbolsEquiv();
+ // Look up the Currencies resource for the given locale.
+ UErrorCode ec2 = U_ZERO_ERROR;
+
+ char loc[ULOC_FULLNAME_CAPACITY] = "";
+ uloc_getName(locale, loc, sizeof(loc), &ec2);
+ if (U_FAILURE(ec2) || ec2 == U_STRING_NOT_TERMINATED_WARNING) {
+ ec = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+
+ // Get maximum currency name count first.
+ getCurrencyNameCount(loc, total_currency_name_count, total_currency_symbol_count);
+
+ *currencyNames = (CurrencyNameStruct*)uprv_malloc
+ (sizeof(CurrencyNameStruct) * (*total_currency_name_count));
+ *currencySymbols = (CurrencyNameStruct*)uprv_malloc
+ (sizeof(CurrencyNameStruct) * (*total_currency_symbol_count));
+
+ if(currencyNames == NULL || currencySymbols == NULL) {
+ ec = U_MEMORY_ALLOCATION_ERROR;
+ }
+
+ if (U_FAILURE(ec)) return;
+
+ const UChar* s = NULL; // currency name
+ char* iso = NULL; // currency ISO code
+
+ *total_currency_name_count = 0;
+ *total_currency_symbol_count = 0;
+
+ UErrorCode ec3 = U_ZERO_ERROR;
+ UErrorCode ec4 = U_ZERO_ERROR;
+
+ // Using hash to remove duplicates caused by locale fallback
+ UHashtable* currencyIsoCodes = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &ec3);
+ UHashtable* currencyPluralIsoCodes = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &ec4);
+ for (int32_t localeLevel = 0; ; ++localeLevel) {
+ ec2 = U_ZERO_ERROR;
+ // TODO: ures_openDirect
+ UResourceBundle* rb = ures_open(U_ICUDATA_CURR, loc, &ec2);
+ UResourceBundle* curr = ures_getByKey(rb, CURRENCIES, NULL, &ec2);
+ int32_t n = ures_getSize(curr);
+ for (int32_t i=0; i<n; ++i) {
+ UResourceBundle* names = ures_getByIndex(curr, i, NULL, &ec2);
+ int32_t len;
+ s = ures_getStringByIndex(names, UCURR_SYMBOL_NAME, &len, &ec2);
+ // TODO: uhash_put wont change key/value?
+ iso = (char*)ures_getKey(names);
+ if (localeLevel == 0) {
+ uhash_put(currencyIsoCodes, iso, iso, &ec3);
+ } else {
+ if (uhash_get(currencyIsoCodes, iso) != NULL) {
+ ures_close(names);
+ continue;
+ } else {
+ uhash_put(currencyIsoCodes, iso, iso, &ec3);
+ }
+ }
+ // Add currency symbol.
+ (*currencySymbols)[*total_currency_symbol_count].IsoCode = iso;
+ (*currencySymbols)[*total_currency_symbol_count].currencyName = (UChar*)s;
+ (*currencySymbols)[*total_currency_symbol_count].flag = 0;
+ (*currencySymbols)[(*total_currency_symbol_count)++].currencyNameLen = len;
+ // Add equivalent symbols
+ if (currencySymbolsEquiv != NULL) {
+ UnicodeString str(TRUE, s, len);
+ icu::EquivIterator iter(*currencySymbolsEquiv, str);
+ const UnicodeString *symbol;
+ while ((symbol = iter.next()) != NULL) {
+ (*currencySymbols)[*total_currency_symbol_count].IsoCode = iso;
+ (*currencySymbols)[*total_currency_symbol_count].currencyName =
+ const_cast<UChar*>(symbol->getBuffer());
+ (*currencySymbols)[*total_currency_symbol_count].flag = 0;
+ (*currencySymbols)[(*total_currency_symbol_count)++].currencyNameLen = symbol->length();
+ }
+ }
+
+ // Add currency long name.
+ s = ures_getStringByIndex(names, UCURR_LONG_NAME, &len, &ec2);
+ (*currencyNames)[*total_currency_name_count].IsoCode = iso;
+ UChar* upperName = toUpperCase(s, len, locale);
+ (*currencyNames)[*total_currency_name_count].currencyName = upperName;
+ (*currencyNames)[*total_currency_name_count].flag = NEED_TO_BE_DELETED;
+ (*currencyNames)[(*total_currency_name_count)++].currencyNameLen = len;
+
+ // put (iso, 3, and iso) in to array
+ // Add currency ISO code.
+ (*currencySymbols)[*total_currency_symbol_count].IsoCode = iso;
+ (*currencySymbols)[*total_currency_symbol_count].currencyName = (UChar*)uprv_malloc(sizeof(UChar)*3);
+ // Must convert iso[] into Unicode
+ u_charsToUChars(iso, (*currencySymbols)[*total_currency_symbol_count].currencyName, 3);
+ (*currencySymbols)[*total_currency_symbol_count].flag = NEED_TO_BE_DELETED;
+ (*currencySymbols)[(*total_currency_symbol_count)++].currencyNameLen = 3;
+
+ ures_close(names);
+ }
+
+ // currency plurals
+ UErrorCode ec5 = U_ZERO_ERROR;
+ UResourceBundle* curr_p = ures_getByKey(rb, CURRENCYPLURALS, NULL, &ec5);
+ n = ures_getSize(curr_p);
+ for (int32_t i=0; i<n; ++i) {
+ UResourceBundle* names = ures_getByIndex(curr_p, i, NULL, &ec5);
+ iso = (char*)ures_getKey(names);
+ // Using hash to remove duplicated ISO codes in fallback chain.
+ if (localeLevel == 0) {
+ uhash_put(currencyPluralIsoCodes, iso, iso, &ec4);
+ } else {
+ if (uhash_get(currencyPluralIsoCodes, iso) != NULL) {
+ ures_close(names);
+ continue;
+ } else {
+ uhash_put(currencyPluralIsoCodes, iso, iso, &ec4);
+ }
+ }
+ int32_t num = ures_getSize(names);
+ int32_t len;
+ for (int32_t j = 0; j < num; ++j) {
+ // TODO: remove duplicates between singular name and
+ // currency long name?
+ s = ures_getStringByIndex(names, j, &len, &ec5);
+ (*currencyNames)[*total_currency_name_count].IsoCode = iso;
+ UChar* upperName = toUpperCase(s, len, locale);
+ (*currencyNames)[*total_currency_name_count].currencyName = upperName;
+ (*currencyNames)[*total_currency_name_count].flag = NEED_TO_BE_DELETED;
+ (*currencyNames)[(*total_currency_name_count)++].currencyNameLen = len;
+ }
+ ures_close(names);
+ }
+ ures_close(curr_p);
+ ures_close(curr);
+ ures_close(rb);
+
+ if (!fallback(loc)) {
+ break;
+ }
+ }
+
+ uhash_close(currencyIsoCodes);
+ uhash_close(currencyPluralIsoCodes);
+
+ // quick sort the struct
+ qsort(*currencyNames, *total_currency_name_count,
+ sizeof(CurrencyNameStruct), currencyNameComparator);
+ qsort(*currencySymbols, *total_currency_symbol_count,
+ sizeof(CurrencyNameStruct), currencyNameComparator);
+
+#ifdef UCURR_DEBUG
+ printf("currency name count: %d\n", *total_currency_name_count);
+ for (int32_t index = 0; index < *total_currency_name_count; ++index) {
+ printf("index: %d\n", index);
+ printf("iso: %s\n", (*currencyNames)[index].IsoCode);
+ char curNameBuf[1024];
+ memset(curNameBuf, 0, 1024);
+ u_austrncpy(curNameBuf, (*currencyNames)[index].currencyName, (*currencyNames)[index].currencyNameLen);
+ printf("currencyName: %s\n", curNameBuf);
+ printf("len: %d\n", (*currencyNames)[index].currencyNameLen);
+ }
+ printf("currency symbol count: %d\n", *total_currency_symbol_count);
+ for (int32_t index = 0; index < *total_currency_symbol_count; ++index) {
+ printf("index: %d\n", index);
+ printf("iso: %s\n", (*currencySymbols)[index].IsoCode);
+ char curNameBuf[1024];
+ memset(curNameBuf, 0, 1024);
+ u_austrncpy(curNameBuf, (*currencySymbols)[index].currencyName, (*currencySymbols)[index].currencyNameLen);
+ printf("currencySymbol: %s\n", curNameBuf);
+ printf("len: %d\n", (*currencySymbols)[index].currencyNameLen);
+ }
+#endif
+ // fail on hashtable errors
+ if (U_FAILURE(ec3)) {
+ ec = ec3;
+ return;
+ }
+ if (U_FAILURE(ec4)) {
+ ec = ec4;
+ return;
+ }
+}
+
+// @param currencyNames: currency names array
+// @param indexInCurrencyNames: the index of the character in currency names
+// array against which the comparison is done
+// @param key: input text char to compare against
+// @param begin(IN/OUT): the begin index of matching range in currency names array
+// @param end(IN/OUT): the end index of matching range in currency names array.
+static int32_t
+binarySearch(const CurrencyNameStruct* currencyNames,
+ int32_t indexInCurrencyNames,
+ const UChar key,
+ int32_t* begin, int32_t* end) {
+#ifdef UCURR_DEBUG
+ printf("key = %x\n", key);
+#endif
+ int32_t first = *begin;
+ int32_t last = *end;
+ while (first <= last) {
+ int32_t mid = (first + last) / 2; // compute mid point.
+ if (indexInCurrencyNames >= currencyNames[mid].currencyNameLen) {
+ first = mid + 1;
+ } else {
+ if (key > currencyNames[mid].currencyName[indexInCurrencyNames]) {
+ first = mid + 1;
+ }
+ else if (key < currencyNames[mid].currencyName[indexInCurrencyNames]) {
+ last = mid - 1;
+ }
+ else {
+ // Find a match, and looking for ranges
+ // Now do two more binary searches. First, on the left side for
+ // the greatest L such that CurrencyNameStruct[L] < key.
+ int32_t L = *begin;
+ int32_t R = mid;
+
+#ifdef UCURR_DEBUG
+ printf("mid = %d\n", mid);
+#endif
+ while (L < R) {
+ int32_t M = (L + R) / 2;
+#ifdef UCURR_DEBUG
+ printf("L = %d, R = %d, M = %d\n", L, R, M);
+#endif
+ if (indexInCurrencyNames >= currencyNames[M].currencyNameLen) {
+ L = M + 1;
+ } else {
+ if (currencyNames[M].currencyName[indexInCurrencyNames] < key) {
+ L = M + 1;
+ } else {
+#ifdef UCURR_DEBUG
+ U_ASSERT(currencyNames[M].currencyName[indexInCurrencyNames] == key);
+#endif
+ R = M;
+ }
+ }
+ }
+#ifdef UCURR_DEBUG
+ U_ASSERT(L == R);
+#endif
+ *begin = L;
+#ifdef UCURR_DEBUG
+ printf("begin = %d\n", *begin);
+ U_ASSERT(currencyNames[*begin].currencyName[indexInCurrencyNames] == key);
+#endif
+
+ // Now for the second search, finding the least R such that
+ // key < CurrencyNameStruct[R].
+ L = mid;
+ R = *end;
+ while (L < R) {
+ int32_t M = (L + R) / 2;
+#ifdef UCURR_DEBUG
+ printf("L = %d, R = %d, M = %d\n", L, R, M);
+#endif
+ if (currencyNames[M].currencyNameLen < indexInCurrencyNames) {
+ L = M + 1;
+ } else {
+ if (currencyNames[M].currencyName[indexInCurrencyNames] > key) {
+ R = M;
+ } else {
+#ifdef UCURR_DEBUG
+ U_ASSERT(currencyNames[M].currencyName[indexInCurrencyNames] == key);
+#endif
+ L = M + 1;
+ }
+ }
+ }
+#ifdef UCURR_DEBUG
+ U_ASSERT(L == R);
+#endif
+ if (currencyNames[R].currencyName[indexInCurrencyNames] > key) {
+ *end = R - 1;
+ } else {
+ *end = R;
+ }
+#ifdef UCURR_DEBUG
+ printf("end = %d\n", *end);
+#endif
+
+ // now, found the range. check whether there is exact match
+ if (currencyNames[*begin].currencyNameLen == indexInCurrencyNames + 1) {
+ return *begin; // find range and exact match.
+ }
+ return -1; // find range, but no exact match.
+ }
+ }
+ }
+ *begin = -1;
+ *end = -1;
+ return -1; // failed to find range.
+}
+
+
+// Linear search "text" in "currencyNames".
+// @param begin, end: the begin and end index in currencyNames, within which
+// range should the search be performed.
+// @param textLen: the length of the text to be compared
+// @param maxMatchLen(IN/OUT): passing in the computed max matching length
+// pass out the new max matching length
+// @param maxMatchIndex: the index in currencyName which has the longest
+// match with input text.
+static void
+linearSearch(const CurrencyNameStruct* currencyNames,
+ int32_t begin, int32_t end,
+ const UChar* text, int32_t textLen,
+ int32_t *partialMatchLen,
+ int32_t *maxMatchLen, int32_t* maxMatchIndex) {
+ int32_t initialPartialMatchLen = *partialMatchLen;
+ for (int32_t index = begin; index <= end; ++index) {
+ int32_t len = currencyNames[index].currencyNameLen;
+ if (len > *maxMatchLen && len <= textLen &&
+ uprv_memcmp(currencyNames[index].currencyName, text, len * sizeof(UChar)) == 0) {
+ *partialMatchLen = MAX(*partialMatchLen, len);
+ *maxMatchIndex = index;
+ *maxMatchLen = len;
+#ifdef UCURR_DEBUG
+ printf("maxMatchIndex = %d, maxMatchLen = %d\n",
+ *maxMatchIndex, *maxMatchLen);
+#endif
+ } else {
+ // Check for partial matches.
+ for (int32_t i=initialPartialMatchLen; i<MIN(len, textLen); i++) {
+ if (currencyNames[index].currencyName[i] != text[i]) {
+ break;
+ }
+ *partialMatchLen = MAX(*partialMatchLen, i + 1);
+ }
+ }
+ }
+}
+
+#define LINEAR_SEARCH_THRESHOLD 10
+
+// Find longest match between "text" and currency names in "currencyNames".
+// @param total_currency_count: total number of currency names in CurrencyNames.
+// @param textLen: the length of the text to be compared
+// @param maxMatchLen: passing in the computed max matching length
+// pass out the new max matching length
+// @param maxMatchIndex: the index in currencyName which has the longest
+// match with input text.
+static void
+searchCurrencyName(const CurrencyNameStruct* currencyNames,
+ int32_t total_currency_count,
+ const UChar* text, int32_t textLen,
+ int32_t *partialMatchLen,
+ int32_t* maxMatchLen, int32_t* maxMatchIndex) {
+ *maxMatchIndex = -1;
+ *maxMatchLen = 0;
+ int32_t matchIndex = -1;
+ int32_t binarySearchBegin = 0;
+ int32_t binarySearchEnd = total_currency_count - 1;
+ // It is a variant of binary search.
+ // For example, given the currency names in currencyNames array are:
+ // A AB ABC AD AZ B BB BBEX BBEXYZ BS C D E....
+ // and the input text is BBEXST
+ // The first round binary search search "B" in the text against
+ // the first char in currency names, and find the first char matching range
+ // to be "B BB BBEX BBEXYZ BS" (and the maximum matching "B").
+ // The 2nd round binary search search the second "B" in the text against
+ // the 2nd char in currency names, and narrow the matching range to
+ // "BB BBEX BBEXYZ" (and the maximum matching "BB").
+ // The 3rd round returnes the range as "BBEX BBEXYZ" (without changing
+ // maximum matching).
+ // The 4th round returns the same range (the maximum matching is "BBEX").
+ // The 5th round returns no matching range.
+ for (int32_t index = 0; index < textLen; ++index) {
+ // matchIndex saves the one with exact match till the current point.
+ // [binarySearchBegin, binarySearchEnd] saves the matching range.
+ matchIndex = binarySearch(currencyNames, index,
+ text[index],
+ &binarySearchBegin, &binarySearchEnd);
+ if (binarySearchBegin == -1) { // did not find the range
+ break;
+ }
+ *partialMatchLen = MAX(*partialMatchLen, index + 1);
+ if (matchIndex != -1) {
+ // find an exact match for text from text[0] to text[index]
+ // in currencyNames array.
+ *maxMatchLen = index + 1;
+ *maxMatchIndex = matchIndex;
+ }
+ if (binarySearchEnd - binarySearchBegin < LINEAR_SEARCH_THRESHOLD) {
+ // linear search if within threshold.
+ linearSearch(currencyNames, binarySearchBegin, binarySearchEnd,
+ text, textLen,
+ partialMatchLen,
+ maxMatchLen, maxMatchIndex);
+ break;
+ }
+ }
+ return;
+}
+
+//========================= currency name cache =====================
+typedef struct {
+ char locale[ULOC_FULLNAME_CAPACITY]; //key
+ // currency names, case insensitive
+ CurrencyNameStruct* currencyNames; // value
+ int32_t totalCurrencyNameCount; // currency name count
+ // currency symbols and ISO code, case sensitive
+ CurrencyNameStruct* currencySymbols; // value
+ int32_t totalCurrencySymbolCount; // count
+ // reference count.
+ // reference count is set to 1 when an entry is put to cache.
+ // it increases by 1 before accessing, and decreased by 1 after accessing.
+ // The entry is deleted when ref count is zero, which means
+ // the entry is replaced out of cache and no process is accessing it.
+ int32_t refCount;
+} CurrencyNameCacheEntry;
+
+
+#define CURRENCY_NAME_CACHE_NUM 10
+
+// Reserve 10 cache entries.
+static CurrencyNameCacheEntry* currCache[CURRENCY_NAME_CACHE_NUM] = {NULL};
+// Using an index to indicate which entry to be replaced when cache is full.
+// It is a simple round-robin replacement strategy.
+static int8_t currentCacheEntryIndex = 0;
+
+static UMutex gCurrencyCacheMutex;
+
+// Cache deletion
+static void
+deleteCurrencyNames(CurrencyNameStruct* currencyNames, int32_t count) {
+ for (int32_t index = 0; index < count; ++index) {
+ if ( (currencyNames[index].flag & NEED_TO_BE_DELETED) ) {
+ uprv_free(currencyNames[index].currencyName);
+ }
+ }
+ uprv_free(currencyNames);
+}
+
+
+static void
+deleteCacheEntry(CurrencyNameCacheEntry* entry) {
+ deleteCurrencyNames(entry->currencyNames, entry->totalCurrencyNameCount);
+ deleteCurrencyNames(entry->currencySymbols, entry->totalCurrencySymbolCount);
+ uprv_free(entry);
+}
+
+
+// Cache clean up
+static UBool U_CALLCONV
+currency_cache_cleanup(void) {
+ for (int32_t i = 0; i < CURRENCY_NAME_CACHE_NUM; ++i) {
+ if (currCache[i]) {
+ deleteCacheEntry(currCache[i]);
+ currCache[i] = 0;
+ }
+ }
+ return TRUE;
+}
+
+
+/**
+ * Loads the currency name data from the cache, or from resource bundles if necessary.
+ * The refCount is automatically incremented. It is the caller's responsibility
+ * to decrement it when done!
+ */
+static CurrencyNameCacheEntry*
+getCacheEntry(const char* locale, UErrorCode& ec) {
+
+ int32_t total_currency_name_count = 0;
+ CurrencyNameStruct* currencyNames = NULL;
+ int32_t total_currency_symbol_count = 0;
+ CurrencyNameStruct* currencySymbols = NULL;
+ CurrencyNameCacheEntry* cacheEntry = NULL;
+
+ umtx_lock(&gCurrencyCacheMutex);
+ // in order to handle racing correctly,
+ // not putting 'search' in a separate function.
+ int8_t found = -1;
+ for (int8_t i = 0; i < CURRENCY_NAME_CACHE_NUM; ++i) {
+ if (currCache[i]!= NULL &&
+ uprv_strcmp(locale, currCache[i]->locale) == 0) {
+ found = i;
+ break;
+ }
+ }
+ if (found != -1) {
+ cacheEntry = currCache[found];
+ ++(cacheEntry->refCount);
+ }
+ umtx_unlock(&gCurrencyCacheMutex);
+ if (found == -1) {
+ collectCurrencyNames(locale, &currencyNames, &total_currency_name_count, &currencySymbols, &total_currency_symbol_count, ec);
+ if (U_FAILURE(ec)) {
+ return NULL;
+ }
+ umtx_lock(&gCurrencyCacheMutex);
+ // check again.
+ for (int8_t i = 0; i < CURRENCY_NAME_CACHE_NUM; ++i) {
+ if (currCache[i]!= NULL &&
+ uprv_strcmp(locale, currCache[i]->locale) == 0) {
+ found = i;
+ break;
+ }
+ }
+ if (found == -1) {
+ // insert new entry to
+ // currentCacheEntryIndex % CURRENCY_NAME_CACHE_NUM
+ // and remove the existing entry
+ // currentCacheEntryIndex % CURRENCY_NAME_CACHE_NUM
+ // from cache.
+ cacheEntry = currCache[currentCacheEntryIndex];
+ if (cacheEntry) {
+ --(cacheEntry->refCount);
+ // delete if the ref count is zero
+ if (cacheEntry->refCount == 0) {
+ deleteCacheEntry(cacheEntry);
+ }
+ }
+ cacheEntry = (CurrencyNameCacheEntry*)uprv_malloc(sizeof(CurrencyNameCacheEntry));
+ currCache[currentCacheEntryIndex] = cacheEntry;
+ uprv_strcpy(cacheEntry->locale, locale);
+ cacheEntry->currencyNames = currencyNames;
+ cacheEntry->totalCurrencyNameCount = total_currency_name_count;
+ cacheEntry->currencySymbols = currencySymbols;
+ cacheEntry->totalCurrencySymbolCount = total_currency_symbol_count;
+ cacheEntry->refCount = 2; // one for cache, one for reference
+ currentCacheEntryIndex = (currentCacheEntryIndex + 1) % CURRENCY_NAME_CACHE_NUM;
+ ucln_common_registerCleanup(UCLN_COMMON_CURRENCY, currency_cleanup);
+ } else {
+ deleteCurrencyNames(currencyNames, total_currency_name_count);
+ deleteCurrencyNames(currencySymbols, total_currency_symbol_count);
+ cacheEntry = currCache[found];
+ ++(cacheEntry->refCount);
+ }
+ umtx_unlock(&gCurrencyCacheMutex);
+ }
+
+ return cacheEntry;
+}
+
+static void releaseCacheEntry(CurrencyNameCacheEntry* cacheEntry) {
+ umtx_lock(&gCurrencyCacheMutex);
+ --(cacheEntry->refCount);
+ if (cacheEntry->refCount == 0) { // remove
+ deleteCacheEntry(cacheEntry);
+ }
+ umtx_unlock(&gCurrencyCacheMutex);
+}
+
+U_CAPI void
+uprv_parseCurrency(const char* locale,
+ const icu::UnicodeString& text,
+ icu::ParsePosition& pos,
+ int8_t type,
+ int32_t* partialMatchLen,
+ UChar* result,
+ UErrorCode& ec) {
+ U_NAMESPACE_USE
+ if (U_FAILURE(ec)) {
+ return;
+ }
+ CurrencyNameCacheEntry* cacheEntry = getCacheEntry(locale, ec);
+ if (U_FAILURE(ec)) {
+ return;
+ }
+
+ int32_t total_currency_name_count = cacheEntry->totalCurrencyNameCount;
+ CurrencyNameStruct* currencyNames = cacheEntry->currencyNames;
+ int32_t total_currency_symbol_count = cacheEntry->totalCurrencySymbolCount;
+ CurrencyNameStruct* currencySymbols = cacheEntry->currencySymbols;
+
+ int32_t start = pos.getIndex();
+
+ UChar inputText[MAX_CURRENCY_NAME_LEN];
+ UChar upperText[MAX_CURRENCY_NAME_LEN];
+ int32_t textLen = MIN(MAX_CURRENCY_NAME_LEN, text.length() - start);
+ text.extract(start, textLen, inputText);
+ UErrorCode ec1 = U_ZERO_ERROR;
+ textLen = u_strToUpper(upperText, MAX_CURRENCY_NAME_LEN, inputText, textLen, locale, &ec1);
+
+ // Make sure partialMatchLen is initialized
+ *partialMatchLen = 0;
+
+ int32_t max = 0;
+ int32_t matchIndex = -1;
+ // case in-sensitive comparision against currency names
+ searchCurrencyName(currencyNames, total_currency_name_count,
+ upperText, textLen, partialMatchLen, &max, &matchIndex);
+
+#ifdef UCURR_DEBUG
+ printf("search in names, max = %d, matchIndex = %d\n", max, matchIndex);
+#endif
+
+ int32_t maxInSymbol = 0;
+ int32_t matchIndexInSymbol = -1;
+ if (type != UCURR_LONG_NAME) { // not name only
+ // case sensitive comparison against currency symbols and ISO code.
+ searchCurrencyName(currencySymbols, total_currency_symbol_count,
+ inputText, textLen,
+ partialMatchLen,
+ &maxInSymbol, &matchIndexInSymbol);
+ }
+
+#ifdef UCURR_DEBUG
+ printf("search in symbols, maxInSymbol = %d, matchIndexInSymbol = %d\n", maxInSymbol, matchIndexInSymbol);
+ if(matchIndexInSymbol != -1) {
+ printf("== ISO=%s\n", currencySymbols[matchIndexInSymbol].IsoCode);
+ }
+#endif
+
+ if (max >= maxInSymbol && matchIndex != -1) {
+ u_charsToUChars(currencyNames[matchIndex].IsoCode, result, 4);
+ pos.setIndex(start + max);
+ } else if (maxInSymbol >= max && matchIndexInSymbol != -1) {
+ u_charsToUChars(currencySymbols[matchIndexInSymbol].IsoCode, result, 4);
+ pos.setIndex(start + maxInSymbol);
+ }
+
+ // decrease reference count
+ releaseCacheEntry(cacheEntry);
+}
+
+void uprv_currencyLeads(const char* locale, icu::UnicodeSet& result, UErrorCode& ec) {
+ U_NAMESPACE_USE
+ if (U_FAILURE(ec)) {
+ return;
+ }
+ CurrencyNameCacheEntry* cacheEntry = getCacheEntry(locale, ec);
+ if (U_FAILURE(ec)) {
+ return;
+ }
+
+ for (int32_t i=0; i<cacheEntry->totalCurrencySymbolCount; i++) {
+ const CurrencyNameStruct& info = cacheEntry->currencySymbols[i];
+ UChar32 cp;
+ U16_GET(info.currencyName, 0, 0, info.currencyNameLen, cp);
+ result.add(cp);
+ }
+
+ for (int32_t i=0; i<cacheEntry->totalCurrencyNameCount; i++) {
+ const CurrencyNameStruct& info = cacheEntry->currencyNames[i];
+ UChar32 cp;
+ U16_GET(info.currencyName, 0, 0, info.currencyNameLen, cp);
+ result.add(cp);
+ }
+
+ // decrease reference count
+ releaseCacheEntry(cacheEntry);
+}
+
+
+/**
+ * Internal method. Given a currency ISO code and a locale, return
+ * the "static" currency name. This is usually the same as the
+ * UCURR_SYMBOL_NAME, but if the latter is a choice format, then the
+ * format is applied to the number 2.0 (to yield the more common
+ * plural) to return a static name.
+ *
+ * This is used for backward compatibility with old currency logic in
+ * DecimalFormat and DecimalFormatSymbols.
+ */
+U_CAPI void
+uprv_getStaticCurrencyName(const UChar* iso, const char* loc,
+ icu::UnicodeString& result, UErrorCode& ec)
+{
+ U_NAMESPACE_USE
+
+ int32_t len;
+ const UChar* currname = ucurr_getName(iso, loc, UCURR_SYMBOL_NAME,
+ nullptr /* isChoiceFormat */, &len, &ec);
+ if (U_SUCCESS(ec)) {
+ result.setTo(currname, len);
+ }
+}
+
+U_CAPI int32_t U_EXPORT2
+ucurr_getDefaultFractionDigits(const UChar* currency, UErrorCode* ec) {
+ return ucurr_getDefaultFractionDigitsForUsage(currency,UCURR_USAGE_STANDARD,ec);
+}
+
+U_CAPI int32_t U_EXPORT2
+ucurr_getDefaultFractionDigitsForUsage(const UChar* currency, const UCurrencyUsage usage, UErrorCode* ec) {
+ int32_t fracDigits = 0;
+ if (U_SUCCESS(*ec)) {
+ switch (usage) {
+ case UCURR_USAGE_STANDARD:
+ fracDigits = (_findMetaData(currency, *ec))[0];
+ break;
+ case UCURR_USAGE_CASH:
+ fracDigits = (_findMetaData(currency, *ec))[2];
+ break;
+ default:
+ *ec = U_UNSUPPORTED_ERROR;
+ }
+ }
+ return fracDigits;
+}
+
+U_CAPI double U_EXPORT2
+ucurr_getRoundingIncrement(const UChar* currency, UErrorCode* ec) {
+ return ucurr_getRoundingIncrementForUsage(currency, UCURR_USAGE_STANDARD, ec);
+}
+
+U_CAPI double U_EXPORT2
+ucurr_getRoundingIncrementForUsage(const UChar* currency, const UCurrencyUsage usage, UErrorCode* ec) {
+ double result = 0.0;
+
+ const int32_t *data = _findMetaData(currency, *ec);
+ if (U_SUCCESS(*ec)) {
+ int32_t fracDigits;
+ int32_t increment;
+ switch (usage) {
+ case UCURR_USAGE_STANDARD:
+ fracDigits = data[0];
+ increment = data[1];
+ break;
+ case UCURR_USAGE_CASH:
+ fracDigits = data[2];
+ increment = data[3];
+ break;
+ default:
+ *ec = U_UNSUPPORTED_ERROR;
+ return result;
+ }
+
+ // If the meta data is invalid, return 0.0
+ if (fracDigits < 0 || fracDigits > MAX_POW10) {
+ *ec = U_INVALID_FORMAT_ERROR;
+ } else {
+ // A rounding value of 0 or 1 indicates no rounding.
+ if (increment >= 2) {
+ // Return (increment) / 10^(fracDigits). The only actual rounding data,
+ // as of this writing, is CHF { 2, 5 }.
+ result = double(increment) / POW10[fracDigits];
+ }
+ }
+ }
+
+ return result;
+}
+
+U_CDECL_BEGIN
+
+typedef struct UCurrencyContext {
+ uint32_t currType; /* UCurrCurrencyType */
+ uint32_t listIdx;
+} UCurrencyContext;
+
+/*
+Please keep this list in alphabetical order.
+You can look at the CLDR supplemental data or ISO-4217 for the meaning of some
+of these items.
+ISO-4217: http://www.iso.org/iso/en/prods-services/popstds/currencycodeslist.html
+*/
+static const struct CurrencyList {
+ const char *currency;
+ uint32_t currType;
+} gCurrencyList[] = {
+ {"ADP", UCURR_COMMON|UCURR_DEPRECATED},
+ {"AED", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"AFA", UCURR_COMMON|UCURR_DEPRECATED},
+ {"AFN", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"ALK", UCURR_COMMON|UCURR_DEPRECATED},
+ {"ALL", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"AMD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"ANG", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"AOA", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"AOK", UCURR_COMMON|UCURR_DEPRECATED},
+ {"AON", UCURR_COMMON|UCURR_DEPRECATED},
+ {"AOR", UCURR_COMMON|UCURR_DEPRECATED},
+ {"ARA", UCURR_COMMON|UCURR_DEPRECATED},
+ {"ARL", UCURR_COMMON|UCURR_DEPRECATED},
+ {"ARM", UCURR_COMMON|UCURR_DEPRECATED},
+ {"ARP", UCURR_COMMON|UCURR_DEPRECATED},
+ {"ARS", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"ATS", UCURR_COMMON|UCURR_DEPRECATED},
+ {"AUD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"AWG", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"AZM", UCURR_COMMON|UCURR_DEPRECATED},
+ {"AZN", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"BAD", UCURR_COMMON|UCURR_DEPRECATED},
+ {"BAM", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"BAN", UCURR_COMMON|UCURR_DEPRECATED},
+ {"BBD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"BDT", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"BEC", UCURR_UNCOMMON|UCURR_DEPRECATED},
+ {"BEF", UCURR_COMMON|UCURR_DEPRECATED},
+ {"BEL", UCURR_UNCOMMON|UCURR_DEPRECATED},
+ {"BGL", UCURR_COMMON|UCURR_DEPRECATED},
+ {"BGM", UCURR_COMMON|UCURR_DEPRECATED},
+ {"BGN", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"BGO", UCURR_COMMON|UCURR_DEPRECATED},
+ {"BHD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"BIF", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"BMD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"BND", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"BOB", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"BOL", UCURR_COMMON|UCURR_DEPRECATED},
+ {"BOP", UCURR_COMMON|UCURR_DEPRECATED},
+ {"BOV", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
+ {"BRB", UCURR_COMMON|UCURR_DEPRECATED},
+ {"BRC", UCURR_COMMON|UCURR_DEPRECATED},
+ {"BRE", UCURR_COMMON|UCURR_DEPRECATED},
+ {"BRL", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"BRN", UCURR_COMMON|UCURR_DEPRECATED},
+ {"BRR", UCURR_COMMON|UCURR_DEPRECATED},
+ {"BRZ", UCURR_COMMON|UCURR_DEPRECATED},
+ {"BSD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"BTN", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"BUK", UCURR_COMMON|UCURR_DEPRECATED},
+ {"BWP", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"BYB", UCURR_COMMON|UCURR_DEPRECATED},
+ {"BYN", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"BYR", UCURR_COMMON|UCURR_DEPRECATED},
+ {"BZD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"CAD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"CDF", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"CHE", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
+ {"CHF", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"CHW", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
+ {"CLE", UCURR_COMMON|UCURR_DEPRECATED},
+ {"CLF", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
+ {"CLP", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"CNH", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
+ {"CNX", UCURR_UNCOMMON|UCURR_DEPRECATED},
+ {"CNY", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"COP", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"COU", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
+ {"CRC", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"CSD", UCURR_COMMON|UCURR_DEPRECATED},
+ {"CSK", UCURR_COMMON|UCURR_DEPRECATED},
+ {"CUC", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"CUP", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"CVE", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"CYP", UCURR_COMMON|UCURR_DEPRECATED},
+ {"CZK", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"DDM", UCURR_COMMON|UCURR_DEPRECATED},
+ {"DEM", UCURR_COMMON|UCURR_DEPRECATED},
+ {"DJF", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"DKK", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"DOP", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"DZD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"ECS", UCURR_COMMON|UCURR_DEPRECATED},
+ {"ECV", UCURR_UNCOMMON|UCURR_DEPRECATED},
+ {"EEK", UCURR_COMMON|UCURR_DEPRECATED},
+ {"EGP", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"EQE", UCURR_COMMON|UCURR_DEPRECATED}, // questionable, remove?
+ {"ERN", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"ESA", UCURR_UNCOMMON|UCURR_DEPRECATED},
+ {"ESB", UCURR_UNCOMMON|UCURR_DEPRECATED},
+ {"ESP", UCURR_COMMON|UCURR_DEPRECATED},
+ {"ETB", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"EUR", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"FIM", UCURR_COMMON|UCURR_DEPRECATED},
+ {"FJD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"FKP", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"FRF", UCURR_COMMON|UCURR_DEPRECATED},
+ {"GBP", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"GEK", UCURR_COMMON|UCURR_DEPRECATED},
+ {"GEL", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"GHC", UCURR_COMMON|UCURR_DEPRECATED},
+ {"GHS", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"GIP", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"GMD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"GNF", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"GNS", UCURR_COMMON|UCURR_DEPRECATED},
+ {"GQE", UCURR_COMMON|UCURR_DEPRECATED},
+ {"GRD", UCURR_COMMON|UCURR_DEPRECATED},
+ {"GTQ", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"GWE", UCURR_COMMON|UCURR_DEPRECATED},
+ {"GWP", UCURR_COMMON|UCURR_DEPRECATED},
+ {"GYD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"HKD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"HNL", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"HRD", UCURR_COMMON|UCURR_DEPRECATED},
+ {"HRK", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"HTG", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"HUF", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"IDR", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"IEP", UCURR_COMMON|UCURR_DEPRECATED},
+ {"ILP", UCURR_COMMON|UCURR_DEPRECATED},
+ {"ILR", UCURR_COMMON|UCURR_DEPRECATED},
+ {"ILS", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"INR", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"IQD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"IRR", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"ISJ", UCURR_COMMON|UCURR_DEPRECATED},
+ {"ISK", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"ITL", UCURR_COMMON|UCURR_DEPRECATED},
+ {"JMD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"JOD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"JPY", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"KES", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"KGS", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"KHR", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"KMF", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"KPW", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"KRH", UCURR_COMMON|UCURR_DEPRECATED},
+ {"KRO", UCURR_COMMON|UCURR_DEPRECATED},
+ {"KRW", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"KWD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"KYD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"KZT", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"LAK", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"LBP", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"LKR", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"LRD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"LSL", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"LSM", UCURR_COMMON|UCURR_DEPRECATED}, // questionable, remove?
+ {"LTL", UCURR_COMMON|UCURR_DEPRECATED},
+ {"LTT", UCURR_COMMON|UCURR_DEPRECATED},
+ {"LUC", UCURR_UNCOMMON|UCURR_DEPRECATED},
+ {"LUF", UCURR_COMMON|UCURR_DEPRECATED},
+ {"LUL", UCURR_UNCOMMON|UCURR_DEPRECATED},
+ {"LVL", UCURR_COMMON|UCURR_DEPRECATED},
+ {"LVR", UCURR_COMMON|UCURR_DEPRECATED},
+ {"LYD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"MAD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"MAF", UCURR_COMMON|UCURR_DEPRECATED},
+ {"MCF", UCURR_COMMON|UCURR_DEPRECATED},
+ {"MDC", UCURR_COMMON|UCURR_DEPRECATED},
+ {"MDL", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"MGA", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"MGF", UCURR_COMMON|UCURR_DEPRECATED},
+ {"MKD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"MKN", UCURR_COMMON|UCURR_DEPRECATED},
+ {"MLF", UCURR_COMMON|UCURR_DEPRECATED},
+ {"MMK", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"MNT", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"MOP", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"MRO", UCURR_COMMON|UCURR_DEPRECATED},
+ {"MRU", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"MTL", UCURR_COMMON|UCURR_DEPRECATED},
+ {"MTP", UCURR_COMMON|UCURR_DEPRECATED},
+ {"MUR", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"MVP", UCURR_COMMON|UCURR_DEPRECATED}, // questionable, remove?
+ {"MVR", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"MWK", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"MXN", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"MXP", UCURR_COMMON|UCURR_DEPRECATED},
+ {"MXV", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
+ {"MYR", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"MZE", UCURR_COMMON|UCURR_DEPRECATED},
+ {"MZM", UCURR_COMMON|UCURR_DEPRECATED},
+ {"MZN", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"NAD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"NGN", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"NIC", UCURR_COMMON|UCURR_DEPRECATED},
+ {"NIO", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"NLG", UCURR_COMMON|UCURR_DEPRECATED},
+ {"NOK", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"NPR", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"NZD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"OMR", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"PAB", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"PEI", UCURR_COMMON|UCURR_DEPRECATED},
+ {"PEN", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"PES", UCURR_COMMON|UCURR_DEPRECATED},
+ {"PGK", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"PHP", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"PKR", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"PLN", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"PLZ", UCURR_COMMON|UCURR_DEPRECATED},
+ {"PTE", UCURR_COMMON|UCURR_DEPRECATED},
+ {"PYG", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"QAR", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"RHD", UCURR_COMMON|UCURR_DEPRECATED},
+ {"ROL", UCURR_COMMON|UCURR_DEPRECATED},
+ {"RON", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"RSD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"RUB", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"RUR", UCURR_COMMON|UCURR_DEPRECATED},
+ {"RWF", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"SAR", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"SBD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"SCR", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"SDD", UCURR_COMMON|UCURR_DEPRECATED},
+ {"SDG", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"SDP", UCURR_COMMON|UCURR_DEPRECATED},
+ {"SEK", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"SGD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"SHP", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"SIT", UCURR_COMMON|UCURR_DEPRECATED},
+ {"SKK", UCURR_COMMON|UCURR_DEPRECATED},
+ {"SLL", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"SOS", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"SRD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"SRG", UCURR_COMMON|UCURR_DEPRECATED},
+ {"SSP", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"STD", UCURR_COMMON|UCURR_DEPRECATED},
+ {"STN", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"SUR", UCURR_COMMON|UCURR_DEPRECATED},
+ {"SVC", UCURR_COMMON|UCURR_DEPRECATED},
+ {"SYP", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"SZL", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"THB", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"TJR", UCURR_COMMON|UCURR_DEPRECATED},
+ {"TJS", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"TMM", UCURR_COMMON|UCURR_DEPRECATED},
+ {"TMT", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"TND", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"TOP", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"TPE", UCURR_COMMON|UCURR_DEPRECATED},
+ {"TRL", UCURR_COMMON|UCURR_DEPRECATED},
+ {"TRY", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"TTD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"TWD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"TZS", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"UAH", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"UAK", UCURR_COMMON|UCURR_DEPRECATED},
+ {"UGS", UCURR_COMMON|UCURR_DEPRECATED},
+ {"UGX", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"USD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"USN", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
+ {"USS", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
+ {"UYI", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
+ {"UYP", UCURR_COMMON|UCURR_DEPRECATED},
+ {"UYU", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"UZS", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"VEB", UCURR_COMMON|UCURR_DEPRECATED},
+ {"VEF", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"VND", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"VNN", UCURR_COMMON|UCURR_DEPRECATED},
+ {"VUV", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"WST", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"XAF", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"XAG", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
+ {"XAU", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
+ {"XBA", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
+ {"XBB", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
+ {"XBC", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
+ {"XBD", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
+ {"XCD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"XDR", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
+ {"XEU", UCURR_UNCOMMON|UCURR_DEPRECATED},
+ {"XFO", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
+ {"XFU", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
+ {"XOF", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"XPD", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
+ {"XPF", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"XPT", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
+ {"XRE", UCURR_UNCOMMON|UCURR_DEPRECATED},
+ {"XSU", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
+ {"XTS", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
+ {"XUA", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
+ {"XXX", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
+ {"YDD", UCURR_COMMON|UCURR_DEPRECATED},
+ {"YER", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"YUD", UCURR_COMMON|UCURR_DEPRECATED},
+ {"YUM", UCURR_COMMON|UCURR_DEPRECATED},
+ {"YUN", UCURR_COMMON|UCURR_DEPRECATED},
+ {"YUR", UCURR_COMMON|UCURR_DEPRECATED},
+ {"ZAL", UCURR_UNCOMMON|UCURR_DEPRECATED},
+ {"ZAR", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"ZMK", UCURR_COMMON|UCURR_DEPRECATED},
+ {"ZMW", UCURR_COMMON|UCURR_NON_DEPRECATED},
+ {"ZRN", UCURR_COMMON|UCURR_DEPRECATED},
+ {"ZRZ", UCURR_COMMON|UCURR_DEPRECATED},
+ {"ZWD", UCURR_COMMON|UCURR_DEPRECATED},
+ {"ZWL", UCURR_COMMON|UCURR_DEPRECATED},
+ {"ZWR", UCURR_COMMON|UCURR_DEPRECATED},
+ { NULL, 0 } // Leave here to denote the end of the list.
+};
+
+#define UCURR_MATCHES_BITMASK(variable, typeToMatch) \
+ ((typeToMatch) == UCURR_ALL || ((variable) & (typeToMatch)) == (typeToMatch))
+
+static int32_t U_CALLCONV
+ucurr_countCurrencyList(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
+ UCurrencyContext *myContext = (UCurrencyContext *)(enumerator->context);
+ uint32_t currType = myContext->currType;
+ int32_t count = 0;
+
+ /* Count the number of items matching the type we are looking for. */
+ for (int32_t idx = 0; gCurrencyList[idx].currency != NULL; idx++) {
+ if (UCURR_MATCHES_BITMASK(gCurrencyList[idx].currType, currType)) {
+ count++;
+ }
+ }
+ return count;
+}
+
+static const char* U_CALLCONV
+ucurr_nextCurrencyList(UEnumeration *enumerator,
+ int32_t* resultLength,
+ UErrorCode * /*pErrorCode*/)
+{
+ UCurrencyContext *myContext = (UCurrencyContext *)(enumerator->context);
+
+ /* Find the next in the list that matches the type we are looking for. */
+ while (myContext->listIdx < UPRV_LENGTHOF(gCurrencyList)-1) {
+ const struct CurrencyList *currItem = &gCurrencyList[myContext->listIdx++];
+ if (UCURR_MATCHES_BITMASK(currItem->currType, myContext->currType))
+ {
+ if (resultLength) {
+ *resultLength = 3; /* Currency codes are only 3 chars long */
+ }
+ return currItem->currency;
+ }
+ }
+ /* We enumerated too far. */
+ if (resultLength) {
+ *resultLength = 0;
+ }
+ return NULL;
+}
+
+static void U_CALLCONV
+ucurr_resetCurrencyList(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
+ ((UCurrencyContext *)(enumerator->context))->listIdx = 0;
+}
+
+static void U_CALLCONV
+ucurr_closeCurrencyList(UEnumeration *enumerator) {
+ uprv_free(enumerator->context);
+ uprv_free(enumerator);
+}
+
+static void U_CALLCONV
+ucurr_createCurrencyList(UHashtable *isoCodes, UErrorCode* status){
+ UErrorCode localStatus = U_ZERO_ERROR;
+
+ // Look up the CurrencyMap element in the root bundle.
+ UResourceBundle *rb = ures_openDirect(U_ICUDATA_CURR, CURRENCY_DATA, &localStatus);
+ UResourceBundle *currencyMapArray = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus);
+
+ if (U_SUCCESS(localStatus)) {
+ // process each entry in currency map
+ for (int32_t i=0; i<ures_getSize(currencyMapArray); i++) {
+ // get the currency resource
+ UResourceBundle *currencyArray = ures_getByIndex(currencyMapArray, i, NULL, &localStatus);
+ // process each currency
+ if (U_SUCCESS(localStatus)) {
+ for (int32_t j=0; j<ures_getSize(currencyArray); j++) {
+ // get the currency resource
+ UResourceBundle *currencyRes = ures_getByIndex(currencyArray, j, NULL, &localStatus);
+ IsoCodeEntry *entry = (IsoCodeEntry*)uprv_malloc(sizeof(IsoCodeEntry));
+ if (entry == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ // get the ISO code
+ int32_t isoLength = 0;
+ UResourceBundle *idRes = ures_getByKey(currencyRes, "id", NULL, &localStatus);
+ if (idRes == NULL) {
+ continue;
+ }
+ const UChar *isoCode = ures_getString(idRes, &isoLength, &localStatus);
+
+ // get from date
+ UDate fromDate = U_DATE_MIN;
+ UResourceBundle *fromRes = ures_getByKey(currencyRes, "from", NULL, &localStatus);
+
+ if (U_SUCCESS(localStatus)) {
+ int32_t fromLength = 0;
+ const int32_t *fromArray = ures_getIntVector(fromRes, &fromLength, &localStatus);
+ int64_t currDate64 = (int64_t)fromArray[0] << 32;
+ currDate64 |= ((int64_t)fromArray[1] & (int64_t)INT64_C(0x00000000FFFFFFFF));
+ fromDate = (UDate)currDate64;
+ }
+ ures_close(fromRes);
+
+ // get to date
+ UDate toDate = U_DATE_MAX;
+ localStatus = U_ZERO_ERROR;
+ UResourceBundle *toRes = ures_getByKey(currencyRes, "to", NULL, &localStatus);
+
+ if (U_SUCCESS(localStatus)) {
+ int32_t toLength = 0;
+ const int32_t *toArray = ures_getIntVector(toRes, &toLength, &localStatus);
+ int64_t currDate64 = (int64_t)toArray[0] << 32;
+ currDate64 |= ((int64_t)toArray[1] & (int64_t)INT64_C(0x00000000FFFFFFFF));
+ toDate = (UDate)currDate64;
+ }
+ ures_close(toRes);
+
+ ures_close(idRes);
+ ures_close(currencyRes);
+
+ entry->isoCode = isoCode;
+ entry->from = fromDate;
+ entry->to = toDate;
+
+ localStatus = U_ZERO_ERROR;
+ uhash_put(isoCodes, (UChar *)isoCode, entry, &localStatus);
+ }
+ } else {
+ *status = localStatus;
+ }
+ ures_close(currencyArray);
+ }
+ } else {
+ *status = localStatus;
+ }
+
+ ures_close(currencyMapArray);
+}
+
+static const UEnumeration gEnumCurrencyList = {
+ NULL,
+ NULL,
+ ucurr_closeCurrencyList,
+ ucurr_countCurrencyList,
+ uenum_unextDefault,
+ ucurr_nextCurrencyList,
+ ucurr_resetCurrencyList
+};
+U_CDECL_END
+
+
+static void U_CALLCONV initIsoCodes(UErrorCode &status) {
+ U_ASSERT(gIsoCodes == NULL);
+ ucln_common_registerCleanup(UCLN_COMMON_CURRENCY, currency_cleanup);
+
+ UHashtable *isoCodes = uhash_open(uhash_hashUChars, uhash_compareUChars, NULL, &status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+ uhash_setValueDeleter(isoCodes, deleteIsoCodeEntry);
+
+ ucurr_createCurrencyList(isoCodes, &status);
+ if (U_FAILURE(status)) {
+ uhash_close(isoCodes);
+ return;
+ }
+ gIsoCodes = isoCodes; // Note: gIsoCodes is const. Once set up here it is never altered,
+ // and read only access is safe without synchronization.
+}
+
+static void populateCurrSymbolsEquiv(icu::Hashtable *hash, UErrorCode &status) {
+ if (U_FAILURE(status)) { return; }
+ for (auto& entry : unisets::kCurrencyEntries) {
+ UnicodeString exemplar(entry.exemplar);
+ const UnicodeSet* set = unisets::get(entry.key);
+ if (set == nullptr) { return; }
+ UnicodeSetIterator it(*set);
+ while (it.next()) {
+ UnicodeString value = it.getString();
+ if (value == exemplar) {
+ // No need to mark the exemplar character as an equivalent
+ continue;
+ }
+ makeEquivalent(exemplar, value, hash, status);
+ if (U_FAILURE(status)) { return; }
+ }
+ }
+}
+
+static void U_CALLCONV initCurrSymbolsEquiv() {
+ U_ASSERT(gCurrSymbolsEquiv == NULL);
+ UErrorCode status = U_ZERO_ERROR;
+ ucln_common_registerCleanup(UCLN_COMMON_CURRENCY, currency_cleanup);
+ icu::Hashtable *temp = new icu::Hashtable(status);
+ if (temp == NULL) {
+ return;
+ }
+ if (U_FAILURE(status)) {
+ delete temp;
+ return;
+ }
+ temp->setValueDeleter(deleteUnicode);
+ populateCurrSymbolsEquiv(temp, status);
+ if (U_FAILURE(status)) {
+ delete temp;
+ return;
+ }
+ gCurrSymbolsEquiv = temp;
+}
+
+U_CAPI UBool U_EXPORT2
+ucurr_isAvailable(const UChar* isoCode, UDate from, UDate to, UErrorCode* eErrorCode) {
+ umtx_initOnce(gIsoCodesInitOnce, &initIsoCodes, *eErrorCode);
+ if (U_FAILURE(*eErrorCode)) {
+ return FALSE;
+ }
+
+ IsoCodeEntry* result = (IsoCodeEntry *) uhash_get(gIsoCodes, isoCode);
+ if (result == NULL) {
+ return FALSE;
+ } else if (from > to) {
+ *eErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return FALSE;
+ } else if ((from > result->to) || (to < result->from)) {
+ return FALSE;
+ }
+ return TRUE;
+}
+
+static const icu::Hashtable* getCurrSymbolsEquiv() {
+ umtx_initOnce(gCurrSymbolsEquivInitOnce, &initCurrSymbolsEquiv);
+ return gCurrSymbolsEquiv;
+}
+
+U_CAPI UEnumeration * U_EXPORT2
+ucurr_openISOCurrencies(uint32_t currType, UErrorCode *pErrorCode) {
+ UEnumeration *myEnum = NULL;
+ UCurrencyContext *myContext;
+
+ myEnum = (UEnumeration*)uprv_malloc(sizeof(UEnumeration));
+ if (myEnum == NULL) {
+ *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ uprv_memcpy(myEnum, &gEnumCurrencyList, sizeof(UEnumeration));
+ myContext = (UCurrencyContext*)uprv_malloc(sizeof(UCurrencyContext));
+ if (myContext == NULL) {
+ *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
+ uprv_free(myEnum);
+ return NULL;
+ }
+ myContext->currType = currType;
+ myContext->listIdx = 0;
+ myEnum->context = myContext;
+ return myEnum;
+}
+
+U_CAPI int32_t U_EXPORT2
+ucurr_countCurrencies(const char* locale,
+ UDate date,
+ UErrorCode* ec)
+{
+ int32_t currCount = 0;
+
+ if (ec != NULL && U_SUCCESS(*ec))
+ {
+ // local variables
+ UErrorCode localStatus = U_ZERO_ERROR;
+ char id[ULOC_FULLNAME_CAPACITY];
+
+ // get country or country_variant in `id'
+ idForLocale(locale, id, sizeof(id), ec);
+
+ if (U_FAILURE(*ec))
+ {
+ return 0;
+ }
+
+ // Remove variants, which is only needed for registration.
+ char *idDelim = strchr(id, VAR_DELIM);
+ if (idDelim)
+ {
+ idDelim[0] = 0;
+ }
+
+ // Look up the CurrencyMap element in the root bundle.
+ UResourceBundle *rb = ures_openDirect(U_ICUDATA_CURR, CURRENCY_DATA, &localStatus);
+ UResourceBundle *cm = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus);
+
+ // Using the id derived from the local, get the currency data
+ UResourceBundle *countryArray = ures_getByKey(rb, id, cm, &localStatus);
+
+ // process each currency to see which one is valid for the given date
+ if (U_SUCCESS(localStatus))
+ {
+ for (int32_t i=0; i<ures_getSize(countryArray); i++)
+ {
+ // get the currency resource
+ UResourceBundle *currencyRes = ures_getByIndex(countryArray, i, NULL, &localStatus);
+
+ // get the from date
+ int32_t fromLength = 0;
+ UResourceBundle *fromRes = ures_getByKey(currencyRes, "from", NULL, &localStatus);
+ const int32_t *fromArray = ures_getIntVector(fromRes, &fromLength, &localStatus);
+
+ int64_t currDate64 = (int64_t)fromArray[0] << 32;
+ currDate64 |= ((int64_t)fromArray[1] & (int64_t)INT64_C(0x00000000FFFFFFFF));
+ UDate fromDate = (UDate)currDate64;
+
+ if (ures_getSize(currencyRes)> 2)
+ {
+ int32_t toLength = 0;
+ UResourceBundle *toRes = ures_getByKey(currencyRes, "to", NULL, &localStatus);
+ const int32_t *toArray = ures_getIntVector(toRes, &toLength, &localStatus);
+
+ currDate64 = (int64_t)toArray[0] << 32;
+ currDate64 |= ((int64_t)toArray[1] & (int64_t)INT64_C(0x00000000FFFFFFFF));
+ UDate toDate = (UDate)currDate64;
+
+ if ((fromDate <= date) && (date < toDate))
+ {
+ currCount++;
+ }
+
+ ures_close(toRes);
+ }
+ else
+ {
+ if (fromDate <= date)
+ {
+ currCount++;
+ }
+ }
+
+ // close open resources
+ ures_close(currencyRes);
+ ures_close(fromRes);
+
+ } // end For loop
+ } // end if (U_SUCCESS(localStatus))
+
+ ures_close(countryArray);
+
+ // Check for errors
+ if (*ec == U_ZERO_ERROR || localStatus != U_ZERO_ERROR)
+ {
+ // There is nothing to fallback to.
+ // Report the failure/warning if possible.
+ *ec = localStatus;
+ }
+
+ if (U_SUCCESS(*ec))
+ {
+ // no errors
+ return currCount;
+ }
+
+ }
+
+ // If we got here, either error code is invalid or
+ // some argument passed is no good.
+ return 0;
+}
+
+U_CAPI int32_t U_EXPORT2
+ucurr_forLocaleAndDate(const char* locale,
+ UDate date,
+ int32_t index,
+ UChar* buff,
+ int32_t buffCapacity,
+ UErrorCode* ec)
+{
+ int32_t resLen = 0;
+ int32_t currIndex = 0;
+ const UChar* s = NULL;
+
+ if (ec != NULL && U_SUCCESS(*ec))
+ {
+ // check the arguments passed
+ if ((buff && buffCapacity) || !buffCapacity )
+ {
+ // local variables
+ UErrorCode localStatus = U_ZERO_ERROR;
+ char id[ULOC_FULLNAME_CAPACITY];
+
+ // get country or country_variant in `id'
+ idForLocale(locale, id, sizeof(id), ec);
+ if (U_FAILURE(*ec))
+ {
+ return 0;
+ }
+
+ // Remove variants, which is only needed for registration.
+ char *idDelim = strchr(id, VAR_DELIM);
+ if (idDelim)
+ {
+ idDelim[0] = 0;
+ }
+
+ // Look up the CurrencyMap element in the root bundle.
+ UResourceBundle *rb = ures_openDirect(U_ICUDATA_CURR, CURRENCY_DATA, &localStatus);
+ UResourceBundle *cm = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus);
+
+ // Using the id derived from the local, get the currency data
+ UResourceBundle *countryArray = ures_getByKey(rb, id, cm, &localStatus);
+
+ // process each currency to see which one is valid for the given date
+ bool matchFound = false;
+ if (U_SUCCESS(localStatus))
+ {
+ if ((index <= 0) || (index> ures_getSize(countryArray)))
+ {
+ // requested index is out of bounds
+ ures_close(countryArray);
+ return 0;
+ }
+
+ for (int32_t i=0; i<ures_getSize(countryArray); i++)
+ {
+ // get the currency resource
+ UResourceBundle *currencyRes = ures_getByIndex(countryArray, i, NULL, &localStatus);
+ s = ures_getStringByKey(currencyRes, "id", &resLen, &localStatus);
+
+ // get the from date
+ int32_t fromLength = 0;
+ UResourceBundle *fromRes = ures_getByKey(currencyRes, "from", NULL, &localStatus);
+ const int32_t *fromArray = ures_getIntVector(fromRes, &fromLength, &localStatus);
+
+ int64_t currDate64 = (int64_t)fromArray[0] << 32;
+ currDate64 |= ((int64_t)fromArray[1] & (int64_t)INT64_C(0x00000000FFFFFFFF));
+ UDate fromDate = (UDate)currDate64;
+
+ if (ures_getSize(currencyRes)> 2)
+ {
+ int32_t toLength = 0;
+ UResourceBundle *toRes = ures_getByKey(currencyRes, "to", NULL, &localStatus);
+ const int32_t *toArray = ures_getIntVector(toRes, &toLength, &localStatus);
+
+ currDate64 = (int64_t)toArray[0] << 32;
+ currDate64 |= ((int64_t)toArray[1] & (int64_t)INT64_C(0x00000000FFFFFFFF));
+ UDate toDate = (UDate)currDate64;
+
+ if ((fromDate <= date) && (date < toDate))
+ {
+ currIndex++;
+ if (currIndex == index)
+ {
+ matchFound = true;
+ }
+ }
+
+ ures_close(toRes);
+ }
+ else
+ {
+ if (fromDate <= date)
+ {
+ currIndex++;
+ if (currIndex == index)
+ {
+ matchFound = true;
+ }
+ }
+ }
+
+ // close open resources
+ ures_close(currencyRes);
+ ures_close(fromRes);
+
+ // check for loop exit
+ if (matchFound)
+ {
+ break;
+ }
+
+ } // end For loop
+ }
+
+ ures_close(countryArray);
+
+ // Check for errors
+ if (*ec == U_ZERO_ERROR || localStatus != U_ZERO_ERROR)
+ {
+ // There is nothing to fallback to.
+ // Report the failure/warning if possible.
+ *ec = localStatus;
+ }
+
+ if (U_SUCCESS(*ec))
+ {
+ // no errors
+ if((buffCapacity> resLen) && matchFound)
+ {
+ // write out the currency value
+ u_strcpy(buff, s);
+ }
+ else
+ {
+ return 0;
+ }
+ }
+
+ // return null terminated currency string
+ return u_terminateUChars(buff, buffCapacity, resLen, ec);
+ }
+ else
+ {
+ // illegal argument encountered
+ *ec = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+
+ }
+
+ // If we got here, either error code is invalid or
+ // some argument passed is no good.
+ return resLen;
+}
+
+static const UEnumeration defaultKeywordValues = {
+ NULL,
+ NULL,
+ ulist_close_keyword_values_iterator,
+ ulist_count_keyword_values,
+ uenum_unextDefault,
+ ulist_next_keyword_value,
+ ulist_reset_keyword_values_iterator
+};
+
+U_CAPI UEnumeration *U_EXPORT2 ucurr_getKeywordValuesForLocale(const char *key, const char *locale, UBool commonlyUsed, UErrorCode* status) {
+ // Resolve region
+ char prefRegion[ULOC_COUNTRY_CAPACITY];
+ ulocimp_getRegionForSupplementalData(locale, TRUE, prefRegion, sizeof(prefRegion), status);
+
+ // Read value from supplementalData
+ UList *values = ulist_createEmptyList(status);
+ UList *otherValues = ulist_createEmptyList(status);
+ UEnumeration *en = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
+ if (U_FAILURE(*status) || en == NULL) {
+ if (en == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ uprv_free(en);
+ }
+ ulist_deleteList(values);
+ ulist_deleteList(otherValues);
+ return NULL;
+ }
+ memcpy(en, &defaultKeywordValues, sizeof(UEnumeration));
+ en->context = values;
+
+ UResourceBundle *bundle = ures_openDirect(U_ICUDATA_CURR, "supplementalData", status);
+ ures_getByKey(bundle, "CurrencyMap", bundle, status);
+ UResourceBundle bundlekey, regbndl, curbndl, to;
+ ures_initStackObject(&bundlekey);
+ ures_initStackObject(&regbndl);
+ ures_initStackObject(&curbndl);
+ ures_initStackObject(&to);
+
+ while (U_SUCCESS(*status) && ures_hasNext(bundle)) {
+ ures_getNextResource(bundle, &bundlekey, status);
+ if (U_FAILURE(*status)) {
+ break;
+ }
+ const char *region = ures_getKey(&bundlekey);
+ UBool isPrefRegion = uprv_strcmp(region, prefRegion) == 0 ? TRUE : FALSE;
+ if (!isPrefRegion && commonlyUsed) {
+ // With commonlyUsed=true, we do not put
+ // currencies for other regions in the
+ // result list.
+ continue;
+ }
+ ures_getByKey(bundle, region, &regbndl, status);
+ if (U_FAILURE(*status)) {
+ break;
+ }
+ while (U_SUCCESS(*status) && ures_hasNext(&regbndl)) {
+ ures_getNextResource(&regbndl, &curbndl, status);
+ if (ures_getType(&curbndl) != URES_TABLE) {
+ // Currently, an empty ARRAY is mixed in.
+ continue;
+ }
+ char *curID = (char *)uprv_malloc(sizeof(char) * ULOC_KEYWORDS_CAPACITY);
+ int32_t curIDLength = ULOC_KEYWORDS_CAPACITY;
+ if (curID == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+ ures_getUTF8StringByKey(&curbndl, "id", curID, &curIDLength, TRUE, status);
+ /* optimize - use the utf-8 string */
+#else
+ {
+ const UChar* defString = ures_getStringByKey(&curbndl, "id", &curIDLength, status);
+ if(U_SUCCESS(*status)) {
+ if(curIDLength+1 > ULOC_KEYWORDS_CAPACITY) {
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ } else {
+ u_UCharsToChars(defString, curID, curIDLength+1);
+ }
+ }
+ }
+#endif
+
+ if (U_FAILURE(*status)) {
+ break;
+ }
+ UBool hasTo = FALSE;
+ ures_getByKey(&curbndl, "to", &to, status);
+ if (U_FAILURE(*status)) {
+ // Do nothing here...
+ *status = U_ZERO_ERROR;
+ } else {
+ hasTo = TRUE;
+ }
+ if (isPrefRegion && !hasTo && !ulist_containsString(values, curID, (int32_t)uprv_strlen(curID))) {
+ // Currently active currency for the target country
+ ulist_addItemEndList(values, curID, TRUE, status);
+ } else if (!ulist_containsString(otherValues, curID, (int32_t)uprv_strlen(curID)) && !commonlyUsed) {
+ ulist_addItemEndList(otherValues, curID, TRUE, status);
+ } else {
+ uprv_free(curID);
+ }
+ }
+
+ }
+ if (U_SUCCESS(*status)) {
+ if (commonlyUsed) {
+ if (ulist_getListSize(values) == 0) {
+ // This could happen if no valid region is supplied in the input
+ // locale. In this case, we use the CLDR's default.
+ uenum_close(en);
+ en = ucurr_getKeywordValuesForLocale(key, "und", TRUE, status);
+ }
+ } else {
+ // Consolidate the list
+ char *value = NULL;
+ ulist_resetList(otherValues);
+ while ((value = (char *)ulist_getNext(otherValues)) != NULL) {
+ if (!ulist_containsString(values, value, (int32_t)uprv_strlen(value))) {
+ char *tmpValue = (char *)uprv_malloc(sizeof(char) * ULOC_KEYWORDS_CAPACITY);
+ uprv_memcpy(tmpValue, value, uprv_strlen(value) + 1);
+ ulist_addItemEndList(values, tmpValue, TRUE, status);
+ if (U_FAILURE(*status)) {
+ break;
+ }
+ }
+ }
+ }
+
+ ulist_resetList((UList *)(en->context));
+ } else {
+ ulist_deleteList(values);
+ uprv_free(en);
+ values = NULL;
+ en = NULL;
+ }
+ ures_close(&to);
+ ures_close(&curbndl);
+ ures_close(&regbndl);
+ ures_close(&bundlekey);
+ ures_close(bundle);
+
+ ulist_deleteList(otherValues);
+
+ return en;
+}
+
+
+U_CAPI int32_t U_EXPORT2
+ucurr_getNumericCode(const UChar* currency) {
+ int32_t code = 0;
+ if (currency && u_strlen(currency) == ISO_CURRENCY_CODE_LENGTH) {
+ UErrorCode status = U_ZERO_ERROR;
+
+ UResourceBundle *bundle = ures_openDirect(0, "currencyNumericCodes", &status);
+ ures_getByKey(bundle, "codeMap", bundle, &status);
+ if (U_SUCCESS(status)) {
+ char alphaCode[ISO_CURRENCY_CODE_LENGTH+1];
+ myUCharsToChars(alphaCode, currency);
+ T_CString_toUpperCase(alphaCode);
+ ures_getByKey(bundle, alphaCode, bundle, &status);
+ int tmpCode = ures_getInt(bundle, &status);
+ if (U_SUCCESS(status)) {
+ code = tmpCode;
+ }
+ }
+ ures_close(bundle);
+ }
+ return code;
+}
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+//eof
diff --git a/thirdparty/icu4c/common/ucurrimp.h b/thirdparty/icu4c/common/ucurrimp.h
new file mode 100644
index 0000000000..6d9588295d
--- /dev/null
+++ b/thirdparty/icu4c/common/ucurrimp.h
@@ -0,0 +1,78 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2002-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+
+#ifndef _UCURR_IMP_H_
+#define _UCURR_IMP_H_
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+#include "unicode/parsepos.h"
+#include "unicode/uniset.h"
+
+/**
+ * Internal method. Given a currency ISO code and a locale, return
+ * the "static" currency name. This is usually the same as the
+ * UCURR_SYMBOL_NAME, but if the latter is a choice format, then the
+ * format is applied to the number 2.0 (to yield the more common
+ * plural) to return a static name.
+ *
+ * This is used for backward compatibility with old currency logic in
+ * DecimalFormat and DecimalFormatSymbols.
+ */
+U_CAPI void
+uprv_getStaticCurrencyName(const UChar* iso, const char* loc,
+ icu::UnicodeString& result, UErrorCode& ec);
+
+/**
+ * Attempt to parse the given string as a currency, either as a
+ * display name in the given locale, or as a 3-letter ISO 4217
+ * code. If multiple display names match, then the longest one is
+ * selected. If both a display name and a 3-letter ISO code
+ * match, then the display name is preferred, unless it's length
+ * is less than 3.
+ *
+ * The parameters must not be NULL.
+ *
+ * @param locale the locale of the display names to match
+ * @param text the text to parse
+ * @param pos input-output position; on input, the position within
+ * text to match; must have 0 <= pos.getIndex() < text.length();
+ * on output, the position after the last matched character. If
+ * the parse fails, the position in unchanged upon output.
+ * @param type currency type to parse against, LONG_NAME only or not
+ * @param partialMatchLen The length of the longest matching prefix;
+ * this may be nonzero even if no full currency was matched.
+ * @return the ISO 4217 code, as a string, of the best match, or
+ * null if there is no match
+ *
+ * @internal
+ */
+U_CAPI void
+uprv_parseCurrency(const char* locale,
+ const icu::UnicodeString& text,
+ icu::ParsePosition& pos,
+ int8_t type,
+ int32_t* partialMatchLen,
+ UChar* result,
+ UErrorCode& ec);
+
+/**
+ * Puts all possible first-characters of a currency into the
+ * specified UnicodeSet.
+ *
+ * @param locale the locale of the display names of interest
+ * @param result the UnicodeSet to which to add the starting characters
+ */
+void uprv_currencyLeads(const char* locale, icu::UnicodeSet& result, UErrorCode& ec);
+
+
+
+#endif /* #ifndef _UCURR_IMP_H_ */
+
+//eof
diff --git a/thirdparty/icu4c/common/udata.cpp b/thirdparty/icu4c/common/udata.cpp
new file mode 100644
index 0000000000..ec9c999cea
--- /dev/null
+++ b/thirdparty/icu4c/common/udata.cpp
@@ -0,0 +1,1460 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: udata.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999oct25
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h" /* U_PLATFORM etc. */
+
+#ifdef __GNUC__
+/* if gcc
+#define ATTRIBUTE_WEAK __attribute__ ((weak))
+might have to #include some other header
+*/
+#endif
+
+#include "unicode/putil.h"
+#include "unicode/udata.h"
+#include "unicode/uversion.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "mutex.h"
+#include "putilimp.h"
+#include "restrace.h"
+#include "uassert.h"
+#include "ucln_cmn.h"
+#include "ucmndata.h"
+#include "udatamem.h"
+#include "uhash.h"
+#include "umapfile.h"
+#include "umutex.h"
+
+/***********************************************************************
+*
+* Notes on the organization of the ICU data implementation
+*
+* All of the public API is defined in udata.h
+*
+* The implementation is split into several files...
+*
+* - udata.c (this file) contains higher level code that knows about
+* the search paths for locating data, caching opened data, etc.
+*
+* - umapfile.c contains the low level platform-specific code for actually loading
+* (memory mapping, file reading, whatever) data into memory.
+*
+* - ucmndata.c deals with the tables of contents of ICU data items within
+* an ICU common format data file. The implementation includes
+* an abstract interface and support for multiple TOC formats.
+* All knowledge of any specific TOC format is encapsulated here.
+*
+* - udatamem.c has code for managing UDataMemory structs. These are little
+* descriptor objects for blocks of memory holding ICU data of
+* various types.
+*/
+
+/* configuration ---------------------------------------------------------- */
+
+/* If you are excruciatingly bored turn this on .. */
+/* #define UDATA_DEBUG 1 */
+
+#if defined(UDATA_DEBUG)
+# include <stdio.h>
+#endif
+
+U_NAMESPACE_USE
+
+/*
+ * Forward declarations
+ */
+static UDataMemory *udata_findCachedData(const char *path, UErrorCode &err);
+
+/***********************************************************************
+*
+* static (Global) data
+*
+************************************************************************/
+
+/*
+ * Pointers to the common ICU data.
+ *
+ * We store multiple pointers to ICU data packages and iterate through them
+ * when looking for a data item.
+ *
+ * It is possible to combine this with dependency inversion:
+ * One or more data package libraries may export
+ * functions that each return a pointer to their piece of the ICU data,
+ * and this file would import them as weak functions, without a
+ * strong linker dependency from the common library on the data library.
+ *
+ * Then we can have applications depend on only that part of ICU's data
+ * that they really need, reducing the size of binaries that take advantage
+ * of this.
+ */
+static UDataMemory *gCommonICUDataArray[10] = { NULL }; // Access protected by icu global mutex.
+
+static u_atomic_int32_t gHaveTriedToLoadCommonData = ATOMIC_INT32_T_INITIALIZER(0); // See extendICUData().
+
+static UHashtable *gCommonDataCache = NULL; /* Global hash table of opened ICU data files. */
+static icu::UInitOnce gCommonDataCacheInitOnce = U_INITONCE_INITIALIZER;
+
+#if !defined(ICU_DATA_DIR_WINDOWS)
+static UDataFileAccess gDataFileAccess = UDATA_DEFAULT_ACCESS; // Access not synchronized.
+ // Modifying is documented as thread-unsafe.
+#else
+// If we are using the Windows data directory, then look in one spot only.
+static UDataFileAccess gDataFileAccess = UDATA_NO_FILES;
+#endif
+
+static UBool U_CALLCONV
+udata_cleanup(void)
+{
+ int32_t i;
+
+ if (gCommonDataCache) { /* Delete the cache of user data mappings. */
+ uhash_close(gCommonDataCache); /* Table owns the contents, and will delete them. */
+ gCommonDataCache = NULL; /* Cleanup is not thread safe. */
+ }
+ gCommonDataCacheInitOnce.reset();
+
+ for (i = 0; i < UPRV_LENGTHOF(gCommonICUDataArray) && gCommonICUDataArray[i] != NULL; ++i) {
+ udata_close(gCommonICUDataArray[i]);
+ gCommonICUDataArray[i] = NULL;
+ }
+ gHaveTriedToLoadCommonData = 0;
+
+ return TRUE; /* Everything was cleaned up */
+}
+
+static UBool U_CALLCONV
+findCommonICUDataByName(const char *inBasename, UErrorCode &err)
+{
+ UBool found = FALSE;
+ int32_t i;
+
+ UDataMemory *pData = udata_findCachedData(inBasename, err);
+ if (U_FAILURE(err) || pData == NULL)
+ return FALSE;
+
+ {
+ Mutex lock;
+ for (i = 0; i < UPRV_LENGTHOF(gCommonICUDataArray); ++i) {
+ if ((gCommonICUDataArray[i] != NULL) && (gCommonICUDataArray[i]->pHeader == pData->pHeader)) {
+ /* The data pointer is already in the array. */
+ found = TRUE;
+ break;
+ }
+ }
+ }
+ return found;
+}
+
+
+/*
+ * setCommonICUData. Set a UDataMemory to be the global ICU Data
+ */
+static UBool
+setCommonICUData(UDataMemory *pData, /* The new common data. Belongs to caller, we copy it. */
+ UBool warn, /* If true, set USING_DEFAULT warning if ICUData was */
+ /* changed by another thread before we got to it. */
+ UErrorCode *pErr)
+{
+ UDataMemory *newCommonData = UDataMemory_createNewInstance(pErr);
+ int32_t i;
+ UBool didUpdate = FALSE;
+ if (U_FAILURE(*pErr)) {
+ return FALSE;
+ }
+
+ /* For the assignment, other threads must cleanly see either the old */
+ /* or the new, not some partially initialized new. The old can not be */
+ /* deleted - someone may still have a pointer to it lying around in */
+ /* their locals. */
+ UDatamemory_assign(newCommonData, pData);
+ umtx_lock(NULL);
+ for (i = 0; i < UPRV_LENGTHOF(gCommonICUDataArray); ++i) {
+ if (gCommonICUDataArray[i] == NULL) {
+ gCommonICUDataArray[i] = newCommonData;
+ didUpdate = TRUE;
+ break;
+ } else if (gCommonICUDataArray[i]->pHeader == pData->pHeader) {
+ /* The same data pointer is already in the array. */
+ break;
+ }
+ }
+ umtx_unlock(NULL);
+
+ if (i == UPRV_LENGTHOF(gCommonICUDataArray) && warn) {
+ *pErr = U_USING_DEFAULT_WARNING;
+ }
+ if (didUpdate) {
+ ucln_common_registerCleanup(UCLN_COMMON_UDATA, udata_cleanup);
+ } else {
+ uprv_free(newCommonData);
+ }
+ return didUpdate;
+}
+
+#if !defined(ICU_DATA_DIR_WINDOWS)
+
+static UBool
+setCommonICUDataPointer(const void *pData, UBool /*warn*/, UErrorCode *pErrorCode) {
+ UDataMemory tData;
+ UDataMemory_init(&tData);
+ UDataMemory_setData(&tData, pData);
+ udata_checkCommonData(&tData, pErrorCode);
+ return setCommonICUData(&tData, FALSE, pErrorCode);
+}
+
+#endif
+
+static const char *
+findBasename(const char *path) {
+ const char *basename=uprv_strrchr(path, U_FILE_SEP_CHAR);
+ if(basename==NULL) {
+ return path;
+ } else {
+ return basename+1;
+ }
+}
+
+#ifdef UDATA_DEBUG
+static const char *
+packageNameFromPath(const char *path)
+{
+ if((path == NULL) || (*path == 0)) {
+ return U_ICUDATA_NAME;
+ }
+
+ path = findBasename(path);
+
+ if((path == NULL) || (*path == 0)) {
+ return U_ICUDATA_NAME;
+ }
+
+ return path;
+}
+#endif
+
+/*----------------------------------------------------------------------*
+ * *
+ * Cache for common data *
+ * Functions for looking up or adding entries to a cache of *
+ * data that has been previously opened. Avoids a potentially *
+ * expensive operation of re-opening the data for subsequent *
+ * uses. *
+ * *
+ * Data remains cached for the duration of the process. *
+ * *
+ *----------------------------------------------------------------------*/
+
+typedef struct DataCacheElement {
+ char *name;
+ UDataMemory *item;
+} DataCacheElement;
+
+
+
+/*
+ * Deleter function for DataCacheElements.
+ * udata cleanup function closes the hash table; hash table in turn calls back to
+ * here for each entry.
+ */
+static void U_CALLCONV DataCacheElement_deleter(void *pDCEl) {
+ DataCacheElement *p = (DataCacheElement *)pDCEl;
+ udata_close(p->item); /* unmaps storage */
+ uprv_free(p->name); /* delete the hash key string. */
+ uprv_free(pDCEl); /* delete 'this' */
+}
+
+static void U_CALLCONV udata_initHashTable(UErrorCode &err) {
+ U_ASSERT(gCommonDataCache == NULL);
+ gCommonDataCache = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &err);
+ if (U_FAILURE(err)) {
+ return;
+ }
+ U_ASSERT(gCommonDataCache != NULL);
+ uhash_setValueDeleter(gCommonDataCache, DataCacheElement_deleter);
+ ucln_common_registerCleanup(UCLN_COMMON_UDATA, udata_cleanup);
+}
+
+ /* udata_getCacheHashTable()
+ * Get the hash table used to store the data cache entries.
+ * Lazy create it if it doesn't yet exist.
+ */
+static UHashtable *udata_getHashTable(UErrorCode &err) {
+ umtx_initOnce(gCommonDataCacheInitOnce, &udata_initHashTable, err);
+ return gCommonDataCache;
+}
+
+
+
+static UDataMemory *udata_findCachedData(const char *path, UErrorCode &err)
+{
+ UHashtable *htable;
+ UDataMemory *retVal = NULL;
+ DataCacheElement *el;
+ const char *baseName;
+
+ htable = udata_getHashTable(err);
+ if (U_FAILURE(err)) {
+ return NULL;
+ }
+
+ baseName = findBasename(path); /* Cache remembers only the base name, not the full path. */
+ umtx_lock(NULL);
+ el = (DataCacheElement *)uhash_get(htable, baseName);
+ umtx_unlock(NULL);
+ if (el != NULL) {
+ retVal = el->item;
+ }
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "Cache: [%s] -> %p\n", baseName, (void*) retVal);
+#endif
+ return retVal;
+}
+
+
+static UDataMemory *udata_cacheDataItem(const char *path, UDataMemory *item, UErrorCode *pErr) {
+ DataCacheElement *newElement;
+ const char *baseName;
+ int32_t nameLen;
+ UHashtable *htable;
+ DataCacheElement *oldValue = NULL;
+ UErrorCode subErr = U_ZERO_ERROR;
+
+ htable = udata_getHashTable(*pErr);
+ if (U_FAILURE(*pErr)) {
+ return NULL;
+ }
+
+ /* Create a new DataCacheElement - the thingy we store in the hash table -
+ * and copy the supplied path and UDataMemoryItems into it.
+ */
+ newElement = (DataCacheElement *)uprv_malloc(sizeof(DataCacheElement));
+ if (newElement == NULL) {
+ *pErr = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ newElement->item = UDataMemory_createNewInstance(pErr);
+ if (U_FAILURE(*pErr)) {
+ uprv_free(newElement);
+ return NULL;
+ }
+ UDatamemory_assign(newElement->item, item);
+
+ baseName = findBasename(path);
+ nameLen = (int32_t)uprv_strlen(baseName);
+ newElement->name = (char *)uprv_malloc(nameLen+1);
+ if (newElement->name == NULL) {
+ *pErr = U_MEMORY_ALLOCATION_ERROR;
+ uprv_free(newElement->item);
+ uprv_free(newElement);
+ return NULL;
+ }
+ uprv_strcpy(newElement->name, baseName);
+
+ /* Stick the new DataCacheElement into the hash table.
+ */
+ umtx_lock(NULL);
+ oldValue = (DataCacheElement *)uhash_get(htable, path);
+ if (oldValue != NULL) {
+ subErr = U_USING_DEFAULT_WARNING;
+ }
+ else {
+ uhash_put(
+ htable,
+ newElement->name, /* Key */
+ newElement, /* Value */
+ &subErr);
+ }
+ umtx_unlock(NULL);
+
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "Cache: [%s] <<< %p : %s. vFunc=%p\n", newElement->name,
+ (void*) newElement->item, u_errorName(subErr), (void*) newElement->item->vFuncs);
+#endif
+
+ if (subErr == U_USING_DEFAULT_WARNING || U_FAILURE(subErr)) {
+ *pErr = subErr; /* copy sub err unto fillin ONLY if something happens. */
+ uprv_free(newElement->name);
+ uprv_free(newElement->item);
+ uprv_free(newElement);
+ return oldValue ? oldValue->item : NULL;
+ }
+
+ return newElement->item;
+}
+
+/*----------------------------------------------------------------------*==============
+ * *
+ * Path management. Could be shared with other tools/etc if need be *
+ * later on. *
+ * *
+ *----------------------------------------------------------------------*/
+
+U_NAMESPACE_BEGIN
+
+class UDataPathIterator
+{
+public:
+ UDataPathIterator(const char *path, const char *pkg,
+ const char *item, const char *suffix, UBool doCheckLastFour,
+ UErrorCode *pErrorCode);
+ const char *next(UErrorCode *pErrorCode);
+
+private:
+ const char *path; /* working path (u_icudata_Dir) */
+ const char *nextPath; /* path following this one */
+ const char *basename; /* item's basename (icudt22e_mt.res)*/
+
+ StringPiece suffix; /* item suffix (can be null) */
+
+ uint32_t basenameLen; /* length of basename */
+
+ CharString itemPath; /* path passed in with item name */
+ CharString pathBuffer; /* output path for this it'ion */
+ CharString packageStub; /* example: "/icudt28b". Will ignore that leaf in set paths. */
+
+ UBool checkLastFour; /* if TRUE then allow paths such as '/foo/myapp.dat'
+ * to match, checks last 4 chars of suffix with
+ * last 4 of path, then previous chars. */
+};
+
+/**
+ * @param iter The iterator to be initialized. Its current state does not matter.
+ * @param inPath The full pathname to be iterated over. If NULL, defaults to U_ICUDATA_NAME
+ * @param pkg Package which is being searched for, ex "icudt28l". Will ignore leaf directories such as /icudt28l
+ * @param item Item to be searched for. Can include full path, such as /a/b/foo.dat
+ * @param inSuffix Optional item suffix, if not-null (ex. ".dat") then 'path' can contain 'item' explicitly.
+ * Ex: 'stuff.dat' would be found in '/a/foo:/tmp/stuff.dat:/bar/baz' as item #2.
+ * '/blarg/stuff.dat' would also be found.
+ * Note: inSuffix may also be the 'item' being searched for as well, (ex: "ibm-5348_P100-1997.cnv"), in which case
+ * the 'item' parameter is often the same as pkg. (Though sometimes might have a tree part as well, ex: "icudt62l-curr").
+ */
+UDataPathIterator::UDataPathIterator(const char *inPath, const char *pkg,
+ const char *item, const char *inSuffix, UBool doCheckLastFour,
+ UErrorCode *pErrorCode)
+{
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "SUFFIX1=%s PATH=%s\n", inSuffix, inPath);
+#endif
+ /** Path **/
+ if(inPath == NULL) {
+ path = u_getDataDirectory();
+ } else {
+ path = inPath;
+ }
+
+ /** Package **/
+ if(pkg != NULL) {
+ packageStub.append(U_FILE_SEP_CHAR, *pErrorCode).append(pkg, *pErrorCode);
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "STUB=%s [%d]\n", packageStub.data(), packageStub.length());
+#endif
+ }
+
+ /** Item **/
+ basename = findBasename(item);
+ basenameLen = (int32_t)uprv_strlen(basename);
+
+ /** Item path **/
+ if(basename == item) {
+ nextPath = path;
+ } else {
+ itemPath.append(item, (int32_t)(basename-item), *pErrorCode);
+ nextPath = itemPath.data();
+ }
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "SUFFIX=%s [%p]\n", inSuffix, (void*) inSuffix);
+#endif
+
+ /** Suffix **/
+ if(inSuffix != NULL) {
+ suffix = inSuffix;
+ } else {
+ suffix = "";
+ }
+
+ checkLastFour = doCheckLastFour;
+
+ /* pathBuffer will hold the output path strings returned by this iterator */
+
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "0: init %s -> [path=%s], [base=%s], [suff=%s], [itempath=%s], [nextpath=%s], [checklast4=%s]\n",
+ item,
+ path,
+ basename,
+ suffix.data(),
+ itemPath.data(),
+ nextPath,
+ checkLastFour?"TRUE":"false");
+#endif
+}
+
+/**
+ * Get the next path on the list.
+ *
+ * @param iter The Iter to be used
+ * @param len If set, pointer to the length of the returned path, for convenience.
+ * @return Pointer to the next path segment, or NULL if there are no more.
+ */
+const char *UDataPathIterator::next(UErrorCode *pErrorCode)
+{
+ if(U_FAILURE(*pErrorCode)) {
+ return NULL;
+ }
+
+ const char *currentPath = NULL;
+ int32_t pathLen = 0;
+ const char *pathBasename;
+
+ do
+ {
+ if( nextPath == NULL ) {
+ break;
+ }
+ currentPath = nextPath;
+
+ if(nextPath == itemPath.data()) { /* we were processing item's path. */
+ nextPath = path; /* start with regular path next tm. */
+ pathLen = (int32_t)uprv_strlen(currentPath);
+ } else {
+ /* fix up next for next time */
+ nextPath = uprv_strchr(currentPath, U_PATH_SEP_CHAR);
+ if(nextPath == NULL) {
+ /* segment: entire path */
+ pathLen = (int32_t)uprv_strlen(currentPath);
+ } else {
+ /* segment: until next segment */
+ pathLen = (int32_t)(nextPath - currentPath);
+ /* skip divider */
+ nextPath ++;
+ }
+ }
+
+ if(pathLen == 0) {
+ continue;
+ }
+
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "rest of path (IDD) = %s\n", currentPath);
+ fprintf(stderr, " ");
+ {
+ int32_t qqq;
+ for(qqq=0;qqq<pathLen;qqq++)
+ {
+ fprintf(stderr, " ");
+ }
+
+ fprintf(stderr, "^\n");
+ }
+#endif
+ pathBuffer.clear().append(currentPath, pathLen, *pErrorCode);
+
+ /* check for .dat files */
+ pathBasename = findBasename(pathBuffer.data());
+
+ if(checkLastFour == TRUE &&
+ (pathLen>=4) &&
+ uprv_strncmp(pathBuffer.data() +(pathLen-4), suffix.data(), 4)==0 && /* suffix matches */
+ uprv_strncmp(findBasename(pathBuffer.data()), basename, basenameLen)==0 && /* base matches */
+ uprv_strlen(pathBasename)==(basenameLen+4)) { /* base+suffix = full len */
+
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "Have %s file on the path: %s\n", suffix.data(), pathBuffer.data());
+#endif
+ /* do nothing */
+ }
+ else
+ { /* regular dir path */
+ if(pathBuffer[pathLen-1] != U_FILE_SEP_CHAR) {
+ if((pathLen>=4) &&
+ uprv_strncmp(pathBuffer.data()+(pathLen-4), ".dat", 4) == 0)
+ {
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "skipping non-directory .dat file %s\n", pathBuffer.data());
+#endif
+ continue;
+ }
+
+ /* Check if it is a directory with the same name as our package */
+ if(!packageStub.isEmpty() &&
+ (pathLen > packageStub.length()) &&
+ !uprv_strcmp(pathBuffer.data() + pathLen - packageStub.length(), packageStub.data())) {
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "Found stub %s (will add package %s of len %d)\n", packageStub.data(), basename, basenameLen);
+#endif
+ pathBuffer.truncate(pathLen - packageStub.length());
+ }
+ pathBuffer.append(U_FILE_SEP_CHAR, *pErrorCode);
+ }
+
+ /* + basename */
+ pathBuffer.append(packageStub.data()+1, packageStub.length()-1, *pErrorCode);
+
+ if (!suffix.empty()) /* tack on suffix */
+ {
+ if (suffix.length() > 4) {
+ // If the suffix is actually an item ("ibm-5348_P100-1997.cnv") and not an extension (".res")
+ // then we need to ensure that the path ends with a separator.
+ pathBuffer.ensureEndsWithFileSeparator(*pErrorCode);
+ }
+ pathBuffer.append(suffix, *pErrorCode);
+ }
+ }
+
+#ifdef UDATA_DEBUG
+ fprintf(stderr, " --> %s\n", pathBuffer.data());
+#endif
+
+ return pathBuffer.data();
+
+ } while(path);
+
+ /* fell way off the end */
+ return NULL;
+}
+
+U_NAMESPACE_END
+
+/* ==================================================================================*/
+
+
+/*----------------------------------------------------------------------*
+ * *
+ * Add a static reference to the common data library *
+ * Unless overridden by an explicit udata_setCommonData, this will be *
+ * our common data. *
+ * *
+ *----------------------------------------------------------------------*/
+#if !defined(ICU_DATA_DIR_WINDOWS)
+// When using the Windows system data, we expect only a single data file.
+extern "C" const DataHeader U_DATA_API U_ICUDATA_ENTRY_POINT;
+#endif
+
+/*
+ * This would be a good place for weak-linkage declarations of
+ * partial-data-library access functions where each returns a pointer
+ * to its data package, if it is linked in.
+ */
+/*
+extern const void *uprv_getICUData_collation(void) ATTRIBUTE_WEAK;
+extern const void *uprv_getICUData_conversion(void) ATTRIBUTE_WEAK;
+*/
+
+/*----------------------------------------------------------------------*
+ * *
+ * openCommonData Attempt to open a common format (.dat) file *
+ * Map it into memory (if it's not there already) *
+ * and return a UDataMemory object for it. *
+ * *
+ * If the requested data is already open and cached *
+ * just return the cached UDataMem object. *
+ * *
+ *----------------------------------------------------------------------*/
+static UDataMemory *
+openCommonData(const char *path, /* Path from OpenChoice? */
+ int32_t commonDataIndex, /* ICU Data (index >= 0) if path == NULL */
+ UErrorCode *pErrorCode)
+{
+ UDataMemory tData;
+ const char *pathBuffer;
+ const char *inBasename;
+
+ if (U_FAILURE(*pErrorCode)) {
+ return NULL;
+ }
+
+ UDataMemory_init(&tData);
+
+ /* ??????? TODO revisit this */
+ if (commonDataIndex >= 0) {
+ /* "mini-cache" for common ICU data */
+ if(commonDataIndex >= UPRV_LENGTHOF(gCommonICUDataArray)) {
+ return NULL;
+ }
+ {
+ Mutex lock;
+ if(gCommonICUDataArray[commonDataIndex] != NULL) {
+ return gCommonICUDataArray[commonDataIndex];
+ }
+#if !defined(ICU_DATA_DIR_WINDOWS)
+// When using the Windows system data, we expect only a single data file.
+ int32_t i;
+ for(i = 0; i < commonDataIndex; ++i) {
+ if(gCommonICUDataArray[i]->pHeader == &U_ICUDATA_ENTRY_POINT) {
+ /* The linked-in data is already in the list. */
+ return NULL;
+ }
+ }
+#endif
+ }
+
+ /* Add the linked-in data to the list. */
+ /*
+ * This is where we would check and call weakly linked partial-data-library
+ * access functions.
+ */
+ /*
+ if (uprv_getICUData_collation) {
+ setCommonICUDataPointer(uprv_getICUData_collation(), FALSE, pErrorCode);
+ }
+ if (uprv_getICUData_conversion) {
+ setCommonICUDataPointer(uprv_getICUData_conversion(), FALSE, pErrorCode);
+ }
+ */
+#if !defined(ICU_DATA_DIR_WINDOWS)
+// When using the Windows system data, we expect only a single data file.
+ setCommonICUDataPointer(&U_ICUDATA_ENTRY_POINT, FALSE, pErrorCode);
+ {
+ Mutex lock;
+ return gCommonICUDataArray[commonDataIndex];
+ }
+#endif
+ }
+
+
+ /* request is NOT for ICU Data. */
+
+ /* Find the base name portion of the supplied path. */
+ /* inBasename will be left pointing somewhere within the original path string. */
+ inBasename = findBasename(path);
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "inBasename = %s\n", inBasename);
+#endif
+
+ if(*inBasename==0) {
+ /* no basename. This will happen if the original path was a directory name, */
+ /* like "a/b/c/". (Fallback to separate files will still work.) */
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "ocd: no basename in %s, bailing.\n", path);
+#endif
+ if (U_SUCCESS(*pErrorCode)) {
+ *pErrorCode=U_FILE_ACCESS_ERROR;
+ }
+ return NULL;
+ }
+
+ /* Is the requested common data file already open and cached? */
+ /* Note that the cache is keyed by the base name only. The rest of the path, */
+ /* if any, is not considered. */
+ UDataMemory *dataToReturn = udata_findCachedData(inBasename, *pErrorCode);
+ if (dataToReturn != NULL || U_FAILURE(*pErrorCode)) {
+ return dataToReturn;
+ }
+
+ /* Requested item is not in the cache.
+ * Hunt it down, trying all the path locations
+ */
+
+ UDataPathIterator iter(u_getDataDirectory(), inBasename, path, ".dat", TRUE, pErrorCode);
+
+ while ((UDataMemory_isLoaded(&tData)==FALSE) && (pathBuffer = iter.next(pErrorCode)) != NULL)
+ {
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "ocd: trying path %s - ", pathBuffer);
+#endif
+ uprv_mapFile(&tData, pathBuffer, pErrorCode);
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "%s\n", UDataMemory_isLoaded(&tData)?"LOADED":"not loaded");
+#endif
+ }
+ if (U_FAILURE(*pErrorCode)) {
+ return NULL;
+ }
+
+#if defined(OS390_STUBDATA) && defined(OS390BATCH)
+ if (!UDataMemory_isLoaded(&tData)) {
+ char ourPathBuffer[1024];
+ /* One more chance, for extendCommonData() */
+ uprv_strncpy(ourPathBuffer, path, 1019);
+ ourPathBuffer[1019]=0;
+ uprv_strcat(ourPathBuffer, ".dat");
+ uprv_mapFile(&tData, ourPathBuffer, pErrorCode);
+ }
+#endif
+
+ if (U_FAILURE(*pErrorCode)) {
+ return NULL;
+ }
+ if (!UDataMemory_isLoaded(&tData)) {
+ /* no common data */
+ *pErrorCode=U_FILE_ACCESS_ERROR;
+ return NULL;
+ }
+
+ /* we have mapped a file, check its header */
+ udata_checkCommonData(&tData, pErrorCode);
+
+
+ /* Cache the UDataMemory struct for this .dat file,
+ * so we won't need to hunt it down and map it again next time
+ * something is needed from it. */
+ return udata_cacheDataItem(inBasename, &tData, pErrorCode);
+}
+
+
+/*----------------------------------------------------------------------*
+ * *
+ * extendICUData If the full set of ICU data was not loaded at *
+ * program startup, load it now. This function will *
+ * be called when the lookup of an ICU data item in *
+ * the common ICU data fails. *
+ * *
+ * return true if new data is loaded, false otherwise.*
+ * *
+ *----------------------------------------------------------------------*/
+static UBool extendICUData(UErrorCode *pErr)
+{
+ UDataMemory *pData;
+ UDataMemory copyPData;
+ UBool didUpdate = FALSE;
+
+ /*
+ * There is a chance for a race condition here.
+ * Normally, ICU data is loaded from a DLL or via mmap() and
+ * setCommonICUData() will detect if the same address is set twice.
+ * If ICU is built with data loading via fread() then the address will
+ * be different each time the common data is loaded and we may add
+ * multiple copies of the data.
+ * In this case, use a mutex to prevent the race.
+ * Use a specific mutex to avoid nested locks of the global mutex.
+ */
+#if MAP_IMPLEMENTATION==MAP_STDIO
+ static UMutex extendICUDataMutex;
+ umtx_lock(&extendICUDataMutex);
+#endif
+ if(!umtx_loadAcquire(gHaveTriedToLoadCommonData)) {
+ /* See if we can explicitly open a .dat file for the ICUData. */
+ pData = openCommonData(
+ U_ICUDATA_NAME, /* "icudt20l" , for example. */
+ -1, /* Pretend we're not opening ICUData */
+ pErr);
+
+ /* How about if there is no pData, eh... */
+
+ UDataMemory_init(&copyPData);
+ if(pData != NULL) {
+ UDatamemory_assign(&copyPData, pData);
+ copyPData.map = 0; /* The mapping for this data is owned by the hash table */
+ copyPData.mapAddr = 0; /* which will unmap it when ICU is shut down. */
+ /* CommonICUData is also unmapped when ICU is shut down.*/
+ /* To avoid unmapping the data twice, zero out the map */
+ /* fields in the UDataMemory that we're assigning */
+ /* to CommonICUData. */
+
+ didUpdate = /* no longer using this result */
+ setCommonICUData(&copyPData,/* The new common data. */
+ FALSE, /* No warnings if write didn't happen */
+ pErr); /* setCommonICUData honors errors; NOP if error set */
+ }
+
+ umtx_storeRelease(gHaveTriedToLoadCommonData, 1);
+ }
+
+ didUpdate = findCommonICUDataByName(U_ICUDATA_NAME, *pErr); /* Return 'true' when a racing writes out the extended */
+ /* data after another thread has failed to see it (in openCommonData), so */
+ /* extended data can be examined. */
+ /* Also handles a race through here before gHaveTriedToLoadCommonData is set. */
+
+#if MAP_IMPLEMENTATION==MAP_STDIO
+ umtx_unlock(&extendICUDataMutex);
+#endif
+ return didUpdate; /* Return true if ICUData pointer was updated. */
+ /* (Could potentially have been done by another thread racing */
+ /* us through here, but that's fine, we still return true */
+ /* so that current thread will also examine extended data. */
+}
+
+/*----------------------------------------------------------------------*
+ * *
+ * udata_setCommonData *
+ * *
+ *----------------------------------------------------------------------*/
+U_CAPI void U_EXPORT2
+udata_setCommonData(const void *data, UErrorCode *pErrorCode) {
+ UDataMemory dataMemory;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return;
+ }
+
+ if(data==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ /* set the data pointer and test for validity */
+ UDataMemory_init(&dataMemory);
+ UDataMemory_setData(&dataMemory, data);
+ udata_checkCommonData(&dataMemory, pErrorCode);
+ if (U_FAILURE(*pErrorCode)) {return;}
+
+ /* we have good data */
+ /* Set it up as the ICU Common Data. */
+ setCommonICUData(&dataMemory, TRUE, pErrorCode);
+}
+
+/*---------------------------------------------------------------------------
+ *
+ * udata_setAppData
+ *
+ *---------------------------------------------------------------------------- */
+U_CAPI void U_EXPORT2
+udata_setAppData(const char *path, const void *data, UErrorCode *err)
+{
+ UDataMemory udm;
+
+ if(err==NULL || U_FAILURE(*err)) {
+ return;
+ }
+ if(data==NULL) {
+ *err=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ UDataMemory_init(&udm);
+ UDataMemory_setData(&udm, data);
+ udata_checkCommonData(&udm, err);
+ udata_cacheDataItem(path, &udm, err);
+}
+
+/*----------------------------------------------------------------------------*
+ * *
+ * checkDataItem Given a freshly located/loaded data item, either *
+ * an entry in a common file or a separately loaded file, *
+ * sanity check its header, and see if the data is *
+ * acceptable to the app. *
+ * If the data is good, create and return a UDataMemory *
+ * object that can be returned to the application. *
+ * Return NULL on any sort of failure. *
+ * *
+ *----------------------------------------------------------------------------*/
+static UDataMemory *
+checkDataItem
+(
+ const DataHeader *pHeader, /* The data item to be checked. */
+ UDataMemoryIsAcceptable *isAcceptable, /* App's call-back function */
+ void *context, /* pass-thru param for above. */
+ const char *type, /* pass-thru param for above. */
+ const char *name, /* pass-thru param for above. */
+ UErrorCode *nonFatalErr, /* Error code if this data was not acceptable */
+ /* but openChoice should continue with */
+ /* trying to get data from fallback path. */
+ UErrorCode *fatalErr /* Bad error, caller should return immediately */
+ )
+{
+ UDataMemory *rDataMem = NULL; /* the new UDataMemory, to be returned. */
+
+ if (U_FAILURE(*fatalErr)) {
+ return NULL;
+ }
+
+ if(pHeader->dataHeader.magic1==0xda &&
+ pHeader->dataHeader.magic2==0x27 &&
+ (isAcceptable==NULL || isAcceptable(context, type, name, &pHeader->info))
+ ) {
+ rDataMem=UDataMemory_createNewInstance(fatalErr);
+ if (U_FAILURE(*fatalErr)) {
+ return NULL;
+ }
+ rDataMem->pHeader = pHeader;
+ } else {
+ /* the data is not acceptable, look further */
+ /* If we eventually find something good, this errorcode will be */
+ /* cleared out. */
+ *nonFatalErr=U_INVALID_FORMAT_ERROR;
+ }
+ return rDataMem;
+}
+
+/**
+ * @return 0 if not loaded, 1 if loaded or err
+ */
+static UDataMemory *doLoadFromIndividualFiles(const char *pkgName,
+ const char *dataPath, const char *tocEntryPathSuffix,
+ /* following arguments are the same as doOpenChoice itself */
+ const char *path, const char *type, const char *name,
+ UDataMemoryIsAcceptable *isAcceptable, void *context,
+ UErrorCode *subErrorCode,
+ UErrorCode *pErrorCode)
+{
+ const char *pathBuffer;
+ UDataMemory dataMemory;
+ UDataMemory *pEntryData;
+
+ /* look in ind. files: package\nam.typ ========================= */
+ /* init path iterator for individual files */
+ UDataPathIterator iter(dataPath, pkgName, path, tocEntryPathSuffix, FALSE, pErrorCode);
+
+ while ((pathBuffer = iter.next(pErrorCode)) != NULL)
+ {
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "UDATA: trying individual file %s\n", pathBuffer);
+#endif
+ if (uprv_mapFile(&dataMemory, pathBuffer, pErrorCode))
+ {
+ pEntryData = checkDataItem(dataMemory.pHeader, isAcceptable, context, type, name, subErrorCode, pErrorCode);
+ if (pEntryData != NULL) {
+ /* Data is good.
+ * Hand off ownership of the backing memory to the user's UDataMemory.
+ * and return it. */
+ pEntryData->mapAddr = dataMemory.mapAddr;
+ pEntryData->map = dataMemory.map;
+
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "** Mapped file: %s\n", pathBuffer);
+#endif
+ return pEntryData;
+ }
+
+ /* the data is not acceptable, or some error occurred. Either way, unmap the memory */
+ udata_close(&dataMemory);
+
+ /* If we had a nasty error, bail out completely. */
+ if (U_FAILURE(*pErrorCode)) {
+ return NULL;
+ }
+
+ /* Otherwise remember that we found data but didn't like it for some reason */
+ *subErrorCode=U_INVALID_FORMAT_ERROR;
+ }
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "%s\n", UDataMemory_isLoaded(&dataMemory)?"LOADED":"not loaded");
+#endif
+ }
+ return NULL;
+}
+
+/**
+ * @return 0 if not loaded, 1 if loaded or err
+ */
+static UDataMemory *doLoadFromCommonData(UBool isICUData, const char * /*pkgName*/,
+ const char * /*dataPath*/, const char * /*tocEntryPathSuffix*/, const char *tocEntryName,
+ /* following arguments are the same as doOpenChoice itself */
+ const char *path, const char *type, const char *name,
+ UDataMemoryIsAcceptable *isAcceptable, void *context,
+ UErrorCode *subErrorCode,
+ UErrorCode *pErrorCode)
+{
+ UDataMemory *pEntryData;
+ const DataHeader *pHeader;
+ UDataMemory *pCommonData;
+ int32_t commonDataIndex;
+ UBool checkedExtendedICUData = FALSE;
+ /* try to get common data. The loop is for platforms such as the 390 that do
+ * not initially load the full set of ICU data. If the lookup of an ICU data item
+ * fails, the full (but slower to load) set is loaded, the and the loop repeats,
+ * trying the lookup again. Once the full set of ICU data is loaded, the loop wont
+ * repeat because the full set will be checked the first time through.
+ *
+ * The loop also handles the fallback to a .dat file if the application linked
+ * to the stub data library rather than a real library.
+ */
+ for (commonDataIndex = isICUData ? 0 : -1;;) {
+ pCommonData=openCommonData(path, commonDataIndex, subErrorCode); /** search for pkg **/
+
+ if(U_SUCCESS(*subErrorCode) && pCommonData!=NULL) {
+ int32_t length;
+
+ /* look up the data piece in the common data */
+ pHeader=pCommonData->vFuncs->Lookup(pCommonData, tocEntryName, &length, subErrorCode);
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "%s: pHeader=%p - %s\n", tocEntryName, (void*) pHeader, u_errorName(*subErrorCode));
+#endif
+
+ if(pHeader!=NULL) {
+ pEntryData = checkDataItem(pHeader, isAcceptable, context, type, name, subErrorCode, pErrorCode);
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "pEntryData=%p\n", (void*) pEntryData);
+#endif
+ if (U_FAILURE(*pErrorCode)) {
+ return NULL;
+ }
+ if (pEntryData != NULL) {
+ pEntryData->length = length;
+ return pEntryData;
+ }
+ }
+ }
+ // If we failed due to being out-of-memory, then stop early and report the error.
+ if (*subErrorCode == U_MEMORY_ALLOCATION_ERROR) {
+ *pErrorCode = *subErrorCode;
+ return NULL;
+ }
+ /* Data wasn't found. If we were looking for an ICUData item and there is
+ * more data available, load it and try again,
+ * otherwise break out of this loop. */
+ if (!isICUData) {
+ return NULL;
+ } else if (pCommonData != NULL) {
+ ++commonDataIndex; /* try the next data package */
+ } else if ((!checkedExtendedICUData) && extendICUData(subErrorCode)) {
+ checkedExtendedICUData = TRUE;
+ /* try this data package slot again: it changed from NULL to non-NULL */
+ } else {
+ return NULL;
+ }
+ }
+}
+
+/*
+ * Identify the Time Zone resources that are subject to special override data loading.
+ */
+static UBool isTimeZoneFile(const char *name, const char *type) {
+ return ((uprv_strcmp(type, "res") == 0) &&
+ (uprv_strcmp(name, "zoneinfo64") == 0 ||
+ uprv_strcmp(name, "timezoneTypes") == 0 ||
+ uprv_strcmp(name, "windowsZones") == 0 ||
+ uprv_strcmp(name, "metaZones") == 0));
+}
+
+/*
+ * A note on the ownership of Mapped Memory
+ *
+ * For common format files, ownership resides with the UDataMemory object
+ * that lives in the cache of opened common data. These UDataMemorys are private
+ * to the udata implementation, and are never seen directly by users.
+ *
+ * The UDataMemory objects returned to users will have the address of some desired
+ * data within the mapped region, but they wont have the mapping info itself, and thus
+ * won't cause anything to be removed from memory when they are closed.
+ *
+ * For individual data files, the UDataMemory returned to the user holds the
+ * information necessary to unmap the data on close. If the user independently
+ * opens the same data file twice, two completely independent mappings will be made.
+ * (There is no cache of opened data items from individual files, only a cache of
+ * opened Common Data files, that is, files containing a collection of data items.)
+ *
+ * For common data passed in from the user via udata_setAppData() or
+ * udata_setCommonData(), ownership remains with the user.
+ *
+ * UDataMemory objects themselves, as opposed to the memory they describe,
+ * can be anywhere - heap, stack/local or global.
+ * They have a flag to indicate when they're heap allocated and thus
+ * must be deleted when closed.
+ */
+
+
+/*----------------------------------------------------------------------------*
+ * *
+ * main data loading functions *
+ * *
+ *----------------------------------------------------------------------------*/
+static UDataMemory *
+doOpenChoice(const char *path, const char *type, const char *name,
+ UDataMemoryIsAcceptable *isAcceptable, void *context,
+ UErrorCode *pErrorCode)
+{
+ UDataMemory *retVal = NULL;
+
+ const char *dataPath;
+
+ int32_t tocEntrySuffixIndex;
+ const char *tocEntryPathSuffix;
+ UErrorCode subErrorCode=U_ZERO_ERROR;
+ const char *treeChar;
+
+ UBool isICUData = FALSE;
+
+
+ FileTracer::traceOpen(path, type, name);
+
+
+ /* Is this path ICU data? */
+ if(path == NULL ||
+ !strcmp(path, U_ICUDATA_ALIAS) || /* "ICUDATA" */
+ !uprv_strncmp(path, U_ICUDATA_NAME U_TREE_SEPARATOR_STRING, /* "icudt26e-" */
+ uprv_strlen(U_ICUDATA_NAME U_TREE_SEPARATOR_STRING)) ||
+ !uprv_strncmp(path, U_ICUDATA_ALIAS U_TREE_SEPARATOR_STRING, /* "ICUDATA-" */
+ uprv_strlen(U_ICUDATA_ALIAS U_TREE_SEPARATOR_STRING))) {
+ isICUData = TRUE;
+ }
+
+#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) /* Windows: try "foo\bar" and "foo/bar" */
+ /* remap from alternate path char to the main one */
+ CharString altSepPath;
+ if(path) {
+ if(uprv_strchr(path,U_FILE_ALT_SEP_CHAR) != NULL) {
+ altSepPath.append(path, *pErrorCode);
+ char *p;
+ while ((p = uprv_strchr(altSepPath.data(), U_FILE_ALT_SEP_CHAR)) != NULL) {
+ *p = U_FILE_SEP_CHAR;
+ }
+#if defined (UDATA_DEBUG)
+ fprintf(stderr, "Changed path from [%s] to [%s]\n", path, altSepPath.s);
+#endif
+ path = altSepPath.data();
+ }
+ }
+#endif
+
+ CharString tocEntryName; /* entry name in tree format. ex: 'icudt28b/coll/ar.res' */
+ CharString tocEntryPath; /* entry name in path format. ex: 'icudt28b\\coll\\ar.res' */
+
+ CharString pkgName;
+ CharString treeName;
+
+ /* ======= Set up strings */
+ if(path==NULL) {
+ pkgName.append(U_ICUDATA_NAME, *pErrorCode);
+ } else {
+ const char *pkg;
+ const char *first;
+ pkg = uprv_strrchr(path, U_FILE_SEP_CHAR);
+ first = uprv_strchr(path, U_FILE_SEP_CHAR);
+ if(uprv_pathIsAbsolute(path) || (pkg != first)) { /* more than one slash in the path- not a tree name */
+ /* see if this is an /absolute/path/to/package path */
+ if(pkg) {
+ pkgName.append(pkg+1, *pErrorCode);
+ } else {
+ pkgName.append(path, *pErrorCode);
+ }
+ } else {
+ treeChar = uprv_strchr(path, U_TREE_SEPARATOR);
+ if(treeChar) {
+ treeName.append(treeChar+1, *pErrorCode); /* following '-' */
+ if(isICUData) {
+ pkgName.append(U_ICUDATA_NAME, *pErrorCode);
+ } else {
+ pkgName.append(path, (int32_t)(treeChar-path), *pErrorCode);
+ if (first == NULL) {
+ /*
+ This user data has no path, but there is a tree name.
+ Look up the correct path from the data cache later.
+ */
+ path = pkgName.data();
+ }
+ }
+ } else {
+ if(isICUData) {
+ pkgName.append(U_ICUDATA_NAME, *pErrorCode);
+ } else {
+ pkgName.append(path, *pErrorCode);
+ }
+ }
+ }
+ }
+
+#ifdef UDATA_DEBUG
+ fprintf(stderr, " P=%s T=%s\n", pkgName.data(), treeName.data());
+#endif
+
+ /* setting up the entry name and file name
+ * Make up a full name by appending the type to the supplied
+ * name, assuming that a type was supplied.
+ */
+
+ /* prepend the package */
+ tocEntryName.append(pkgName, *pErrorCode);
+ tocEntryPath.append(pkgName, *pErrorCode);
+ tocEntrySuffixIndex = tocEntryName.length();
+
+ if(!treeName.isEmpty()) {
+ tocEntryName.append(U_TREE_ENTRY_SEP_CHAR, *pErrorCode).append(treeName, *pErrorCode);
+ tocEntryPath.append(U_FILE_SEP_CHAR, *pErrorCode).append(treeName, *pErrorCode);
+ }
+
+ tocEntryName.append(U_TREE_ENTRY_SEP_CHAR, *pErrorCode).append(name, *pErrorCode);
+ tocEntryPath.append(U_FILE_SEP_CHAR, *pErrorCode).append(name, *pErrorCode);
+ if(type!=NULL && *type!=0) {
+ tocEntryName.append(".", *pErrorCode).append(type, *pErrorCode);
+ tocEntryPath.append(".", *pErrorCode).append(type, *pErrorCode);
+ }
+ // The +1 is for the U_FILE_SEP_CHAR that is always appended above.
+ tocEntryPathSuffix = tocEntryPath.data() + tocEntrySuffixIndex + 1; /* suffix starts here */
+
+#ifdef UDATA_DEBUG
+ fprintf(stderr, " tocEntryName = %s\n", tocEntryName.data());
+ fprintf(stderr, " tocEntryPath = %s\n", tocEntryName.data());
+#endif
+
+#if !defined(ICU_DATA_DIR_WINDOWS)
+ if(path == NULL) {
+ path = COMMON_DATA_NAME; /* "icudt26e" */
+ }
+#else
+ // When using the Windows system data, we expects only a single data file.
+ path = COMMON_DATA_NAME; /* "icudt26e" */
+#endif
+
+ /************************ Begin loop looking for ind. files ***************/
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "IND: inBasename = %s, pkg=%s\n", "(n/a)", packageNameFromPath(path));
+#endif
+
+ /* End of dealing with a null basename */
+ dataPath = u_getDataDirectory();
+
+ /**** Time zone individual files override */
+ if (isICUData && isTimeZoneFile(name, type)) {
+ const char *tzFilesDir = u_getTimeZoneFilesDirectory(pErrorCode);
+ if (tzFilesDir[0] != 0) {
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "Trying Time Zone Files directory = %s\n", tzFilesDir);
+#endif
+ retVal = doLoadFromIndividualFiles(/* pkgName.data() */ "", tzFilesDir, tocEntryPathSuffix,
+ /* path */ "", type, name, isAcceptable, context, &subErrorCode, pErrorCode);
+ if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
+ return retVal;
+ }
+ }
+ }
+
+ /**** COMMON PACKAGE - only if packages are first. */
+ if(gDataFileAccess == UDATA_PACKAGES_FIRST) {
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "Trying packages (UDATA_PACKAGES_FIRST)\n");
+#endif
+ /* #2 */
+ retVal = doLoadFromCommonData(isICUData,
+ pkgName.data(), dataPath, tocEntryPathSuffix, tocEntryName.data(),
+ path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
+ if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
+ return retVal;
+ }
+ }
+
+ /**** INDIVIDUAL FILES */
+ if((gDataFileAccess==UDATA_PACKAGES_FIRST) ||
+ (gDataFileAccess==UDATA_FILES_FIRST)) {
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "Trying individual files\n");
+#endif
+ /* Check to make sure that there is a dataPath to iterate over */
+ if ((dataPath && *dataPath) || !isICUData) {
+ retVal = doLoadFromIndividualFiles(pkgName.data(), dataPath, tocEntryPathSuffix,
+ path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
+ if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
+ return retVal;
+ }
+ }
+ }
+
+ /**** COMMON PACKAGE */
+ if((gDataFileAccess==UDATA_ONLY_PACKAGES) ||
+ (gDataFileAccess==UDATA_FILES_FIRST)) {
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "Trying packages (UDATA_ONLY_PACKAGES || UDATA_FILES_FIRST)\n");
+#endif
+ retVal = doLoadFromCommonData(isICUData,
+ pkgName.data(), dataPath, tocEntryPathSuffix, tocEntryName.data(),
+ path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
+ if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
+ return retVal;
+ }
+ }
+
+ /* Load from DLL. If we haven't attempted package load, we also haven't had any chance to
+ try a DLL (static or setCommonData/etc) load.
+ If we ever have a "UDATA_ONLY_FILES", add it to the or list here. */
+ if(gDataFileAccess==UDATA_NO_FILES) {
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "Trying common data (UDATA_NO_FILES)\n");
+#endif
+ retVal = doLoadFromCommonData(isICUData,
+ pkgName.data(), "", tocEntryPathSuffix, tocEntryName.data(),
+ path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
+ if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
+ return retVal;
+ }
+ }
+
+ /* data not found */
+ if(U_SUCCESS(*pErrorCode)) {
+ if(U_SUCCESS(subErrorCode)) {
+ /* file not found */
+ *pErrorCode=U_FILE_ACCESS_ERROR;
+ } else {
+ /* entry point not found or rejected */
+ *pErrorCode=subErrorCode;
+ }
+ }
+ return retVal;
+}
+
+
+
+/* API ---------------------------------------------------------------------- */
+
+U_CAPI UDataMemory * U_EXPORT2
+udata_open(const char *path, const char *type, const char *name,
+ UErrorCode *pErrorCode) {
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "udata_open(): Opening: %s : %s . %s\n", (path?path:"NULL"), name, type);
+ fflush(stderr);
+#endif
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return NULL;
+ } else if(name==NULL || *name==0) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ } else {
+ return doOpenChoice(path, type, name, NULL, NULL, pErrorCode);
+ }
+}
+
+
+
+U_CAPI UDataMemory * U_EXPORT2
+udata_openChoice(const char *path, const char *type, const char *name,
+ UDataMemoryIsAcceptable *isAcceptable, void *context,
+ UErrorCode *pErrorCode) {
+#ifdef UDATA_DEBUG
+ fprintf(stderr, "udata_openChoice(): Opening: %s : %s . %s\n", (path?path:"NULL"), name, type);
+#endif
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return NULL;
+ } else if(name==NULL || *name==0 || isAcceptable==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ } else {
+ return doOpenChoice(path, type, name, isAcceptable, context, pErrorCode);
+ }
+}
+
+
+
+U_CAPI void U_EXPORT2
+udata_getInfo(UDataMemory *pData, UDataInfo *pInfo) {
+ if(pInfo!=NULL) {
+ if(pData!=NULL && pData->pHeader!=NULL) {
+ const UDataInfo *info=&pData->pHeader->info;
+ uint16_t dataInfoSize=udata_getInfoSize(info);
+ if(pInfo->size>dataInfoSize) {
+ pInfo->size=dataInfoSize;
+ }
+ uprv_memcpy((uint16_t *)pInfo+1, (const uint16_t *)info+1, pInfo->size-2);
+ if(info->isBigEndian!=U_IS_BIG_ENDIAN) {
+ /* opposite endianness */
+ uint16_t x=info->reservedWord;
+ pInfo->reservedWord=(uint16_t)((x<<8)|(x>>8));
+ }
+ } else {
+ pInfo->size=0;
+ }
+ }
+}
+
+
+U_CAPI void U_EXPORT2 udata_setFileAccess(UDataFileAccess access, UErrorCode * /*status*/)
+{
+ // Note: this function is documented as not thread safe.
+ gDataFileAccess = access;
+}
diff --git a/thirdparty/icu4c/common/udatamem.cpp b/thirdparty/icu4c/common/udatamem.cpp
new file mode 100644
index 0000000000..6bf7c01235
--- /dev/null
+++ b/thirdparty/icu4c/common/udatamem.cpp
@@ -0,0 +1,161 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************/
+
+
+/*----------------------------------------------------------------------------------
+ *
+ * UDataMemory A class-like struct that serves as a handle to a piece of memory
+ * that contains some ICU data (resource, converters, whatever.)
+ *
+ * When an application opens ICU data (with udata_open, for example,
+ * a UDataMemory * is returned.
+ *
+ *----------------------------------------------------------------------------------*/
+
+#include "unicode/utypes.h"
+#include "cmemory.h"
+#include "unicode/udata.h"
+
+#include "udatamem.h"
+
+U_CFUNC void UDataMemory_init(UDataMemory *This) {
+ uprv_memset(This, 0, sizeof(UDataMemory));
+ This->length=-1;
+}
+
+
+U_CFUNC void UDatamemory_assign(UDataMemory *dest, UDataMemory *source) {
+ /* UDataMemory Assignment. Destination UDataMemory must be initialized first. */
+ UBool mallocedFlag = dest->heapAllocated;
+ uprv_memcpy(dest, source, sizeof(UDataMemory));
+ dest->heapAllocated = mallocedFlag;
+}
+
+U_CFUNC UDataMemory *UDataMemory_createNewInstance(UErrorCode *pErr) {
+ UDataMemory *This;
+
+ if (U_FAILURE(*pErr)) {
+ return NULL;
+ }
+ This = (UDataMemory *)uprv_malloc(sizeof(UDataMemory));
+ if (This == NULL) {
+ *pErr = U_MEMORY_ALLOCATION_ERROR; }
+ else {
+ UDataMemory_init(This);
+ This->heapAllocated = TRUE;
+ }
+ return This;
+}
+
+
+U_CFUNC const DataHeader *
+UDataMemory_normalizeDataPointer(const void *p) {
+ /* allow the data to be optionally prepended with an alignment-forcing double value */
+ const DataHeader *pdh = (const DataHeader *)p;
+ if(pdh==NULL || (pdh->dataHeader.magic1==0xda && pdh->dataHeader.magic2==0x27)) {
+ return pdh;
+ } else {
+#if U_PLATFORM == U_PF_OS400
+ /*
+ TODO: Fix this once the compiler implements this feature. Keep in sync with genccode.c
+
+ This is here because this platform can't currently put
+ const data into the read-only pages of an object or
+ shared library (service program). Only strings are allowed in read-only
+ pages, so we use char * strings to store the data.
+
+ In order to prevent the beginning of the data from ever matching the
+ magic numbers we must skip the initial double.
+ [grhoten 4/24/2003]
+ */
+ return (const DataHeader *)*((const void **)p+1);
+#else
+ return (const DataHeader *)((const double *)p+1);
+#endif
+ }
+}
+
+
+U_CFUNC void UDataMemory_setData (UDataMemory *This, const void *dataAddr) {
+ This->pHeader = UDataMemory_normalizeDataPointer(dataAddr);
+}
+
+
+U_CAPI void U_EXPORT2
+udata_close(UDataMemory *pData) {
+ if(pData!=NULL) {
+ uprv_unmapFile(pData);
+ if(pData->heapAllocated ) {
+ uprv_free(pData);
+ } else {
+ UDataMemory_init(pData);
+ }
+ }
+}
+
+U_CAPI const void * U_EXPORT2
+udata_getMemory(UDataMemory *pData) {
+ if(pData!=NULL && pData->pHeader!=NULL) {
+ return (char *)(pData->pHeader)+udata_getHeaderSize(pData->pHeader);
+ } else {
+ return NULL;
+ }
+}
+
+/**
+ * Get the length of the data item if possible.
+ * The length may be up to 15 bytes larger than the actual data.
+ *
+ * TODO Consider making this function public.
+ * It would have to return the actual length in more cases.
+ * For example, the length of the last item in a .dat package could be
+ * computed from the size of the whole .dat package minus the offset of the
+ * last item.
+ * The size of a file that was directly memory-mapped could be determined
+ * using some system API.
+ *
+ * In order to get perfect values for all data items, we may have to add a
+ * length field to UDataInfo, but that complicates data generation
+ * and may be overkill.
+ *
+ * @param pData The data item.
+ * @return the length of the data item, or -1 if not known
+ * @internal Currently used only in cintltst/udatatst.c
+ */
+U_CAPI int32_t U_EXPORT2
+udata_getLength(const UDataMemory *pData) {
+ if(pData!=NULL && pData->pHeader!=NULL && pData->length>=0) {
+ /*
+ * subtract the header size,
+ * return only the size of the actual data starting at udata_getMemory()
+ */
+ return pData->length-udata_getHeaderSize(pData->pHeader);
+ } else {
+ return -1;
+ }
+}
+
+/**
+ * Get the memory including the data header.
+ * Used in cintltst/udatatst.c
+ * @internal
+ */
+U_CAPI const void * U_EXPORT2
+udata_getRawMemory(const UDataMemory *pData) {
+ if(pData!=NULL && pData->pHeader!=NULL) {
+ return pData->pHeader;
+ } else {
+ return NULL;
+ }
+}
+
+U_CFUNC UBool UDataMemory_isLoaded(const UDataMemory *This) {
+ return This->pHeader != NULL;
+}
diff --git a/thirdparty/icu4c/common/udatamem.h b/thirdparty/icu4c/common/udatamem.h
new file mode 100644
index 0000000000..a05dd69756
--- /dev/null
+++ b/thirdparty/icu4c/common/udatamem.h
@@ -0,0 +1,61 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2010, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************/
+
+
+/*----------------------------------------------------------------------------------
+ *
+ * UDataMemory A class-like struct that serves as a handle to a piece of memory
+ * that contains some ICU data (resource, converters, whatever.)
+ *
+ * When an application opens ICU data (with udata_open, for example,
+ * a UDataMemory * is returned.
+ *
+ *----------------------------------------------------------------------------------*/
+#ifndef __UDATAMEM_H__
+#define __UDATAMEM_H__
+
+#include "unicode/udata.h"
+#include "ucmndata.h"
+
+struct UDataMemory {
+ const commonDataFuncs *vFuncs; /* Function Pointers for accessing TOC */
+
+ const DataHeader *pHeader; /* Header of the memory being described by this */
+ /* UDataMemory object. */
+ const void *toc; /* For common memory, table of contents for */
+ /* the pieces within. */
+ UBool heapAllocated; /* True if this UDataMemory Object is on the */
+ /* heap and thus needs to be deleted when closed. */
+
+ void *mapAddr; /* For mapped or allocated memory, the start addr. */
+ /* Only non-null if a close operation should unmap */
+ /* the associated data. */
+ void *map; /* Handle, or other data, OS dependent. */
+ /* Only non-null if a close operation should unmap */
+ /* the associated data, and additional info */
+ /* beyond the mapAddr is needed to do that. */
+ int32_t length; /* Length of the data in bytes; -1 if unknown. */
+};
+
+U_CFUNC UDataMemory *UDataMemory_createNewInstance(UErrorCode *pErr);
+U_CFUNC void UDatamemory_assign (UDataMemory *dest, UDataMemory *source);
+U_CFUNC void UDataMemory_init (UDataMemory *This);
+U_CFUNC UBool UDataMemory_isLoaded(const UDataMemory *This);
+U_CFUNC void UDataMemory_setData (UDataMemory *This, const void *dataAddr);
+
+U_CFUNC const DataHeader *UDataMemory_normalizeDataPointer(const void *p);
+
+U_CAPI int32_t U_EXPORT2
+udata_getLength(const UDataMemory *pData);
+
+U_CAPI const void * U_EXPORT2
+udata_getRawMemory(const UDataMemory *pData);
+
+#endif
diff --git a/thirdparty/icu4c/common/udataswp.cpp b/thirdparty/icu4c/common/udataswp.cpp
new file mode 100644
index 0000000000..86f302bd9c
--- /dev/null
+++ b/thirdparty/icu4c/common/udataswp.cpp
@@ -0,0 +1,473 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2003-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: udataswp.c
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2003jun05
+* created by: Markus W. Scherer
+*
+* Definitions for ICU data transformations for different platforms,
+* changing between big- and little-endian data and/or between
+* charset families (ASCII<->EBCDIC).
+*/
+
+#include <stdarg.h>
+#include "unicode/utypes.h"
+#include "unicode/udata.h" /* UDataInfo */
+#include "ucmndata.h" /* DataHeader */
+#include "cmemory.h"
+#include "udataswp.h"
+
+/* swapping primitives ------------------------------------------------------ */
+
+static int32_t U_CALLCONV
+uprv_swapArray16(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const uint16_t *p;
+ uint16_t *q;
+ int32_t count;
+ uint16_t x;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || length<0 || (length&1)!=0 || outData==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* setup and swapping */
+ p=(const uint16_t *)inData;
+ q=(uint16_t *)outData;
+ count=length/2;
+ while(count>0) {
+ x=*p++;
+ *q++=(uint16_t)((x<<8)|(x>>8));
+ --count;
+ }
+
+ return length;
+}
+
+static int32_t U_CALLCONV
+uprv_copyArray16(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || length<0 || (length&1)!=0 || outData==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ if(length>0 && inData!=outData) {
+ uprv_memcpy(outData, inData, length);
+ }
+ return length;
+}
+
+static int32_t U_CALLCONV
+uprv_swapArray32(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const uint32_t *p;
+ uint32_t *q;
+ int32_t count;
+ uint32_t x;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || length<0 || (length&3)!=0 || outData==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* setup and swapping */
+ p=(const uint32_t *)inData;
+ q=(uint32_t *)outData;
+ count=length/4;
+ while(count>0) {
+ x=*p++;
+ *q++=(uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24));
+ --count;
+ }
+
+ return length;
+}
+
+static int32_t U_CALLCONV
+uprv_copyArray32(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || length<0 || (length&3)!=0 || outData==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ if(length>0 && inData!=outData) {
+ uprv_memcpy(outData, inData, length);
+ }
+ return length;
+}
+
+static int32_t U_CALLCONV
+uprv_swapArray64(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const uint64_t *p;
+ uint64_t *q;
+ int32_t count;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || length<0 || (length&7)!=0 || outData==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* setup and swapping */
+ p=(const uint64_t *)inData;
+ q=(uint64_t *)outData;
+ count=length/8;
+ while(count>0) {
+ uint64_t x=*p++;
+ x=(x<<56)|((x&0xff00)<<40)|((x&0xff0000)<<24)|((x&0xff000000)<<8)|
+ ((x>>8)&0xff000000)|((x>>24)&0xff0000)|((x>>40)&0xff00)|(x>>56);
+ *q++=x;
+ --count;
+ }
+
+ return length;
+}
+
+static int32_t U_CALLCONV
+uprv_copyArray64(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || length<0 || (length&7)!=0 || outData==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ if(length>0 && inData!=outData) {
+ uprv_memcpy(outData, inData, length);
+ }
+ return length;
+}
+
+static uint16_t U_CALLCONV
+uprv_readSwapUInt16(uint16_t x) {
+ return (uint16_t)((x<<8)|(x>>8));
+}
+
+static uint16_t U_CALLCONV
+uprv_readDirectUInt16(uint16_t x) {
+ return x;
+}
+
+static uint32_t U_CALLCONV
+uprv_readSwapUInt32(uint32_t x) {
+ return (uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24));
+}
+
+static uint32_t U_CALLCONV
+uprv_readDirectUInt32(uint32_t x) {
+ return x;
+}
+
+static void U_CALLCONV
+uprv_writeSwapUInt16(uint16_t *p, uint16_t x) {
+ *p=(uint16_t)((x<<8)|(x>>8));
+}
+
+static void U_CALLCONV
+uprv_writeDirectUInt16(uint16_t *p, uint16_t x) {
+ *p=x;
+}
+
+static void U_CALLCONV
+uprv_writeSwapUInt32(uint32_t *p, uint32_t x) {
+ *p=(uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24));
+}
+
+static void U_CALLCONV
+uprv_writeDirectUInt32(uint32_t *p, uint32_t x) {
+ *p=x;
+}
+
+U_CAPI int16_t U_EXPORT2
+udata_readInt16(const UDataSwapper *ds, int16_t x) {
+ return (int16_t)ds->readUInt16((uint16_t)x);
+}
+
+U_CAPI int32_t U_EXPORT2
+udata_readInt32(const UDataSwapper *ds, int32_t x) {
+ return (int32_t)ds->readUInt32((uint32_t)x);
+}
+
+/**
+ * Swap a block of invariant, NUL-terminated strings, but not padding
+ * bytes after the last string.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+udata_swapInvStringBlock(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const char *inChars;
+ int32_t stringsLength;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* reduce the strings length to not include bytes after the last NUL */
+ inChars=(const char *)inData;
+ stringsLength=length;
+ while(stringsLength>0 && inChars[stringsLength-1]!=0) {
+ --stringsLength;
+ }
+
+ /* swap up to the last NUL */
+ ds->swapInvChars(ds, inData, stringsLength, outData, pErrorCode);
+
+ /* copy the bytes after the last NUL */
+ if(inData!=outData && length>stringsLength) {
+ uprv_memcpy((char *)outData+stringsLength, inChars+stringsLength, length-stringsLength);
+ }
+
+ /* return the length including padding bytes */
+ if(U_SUCCESS(*pErrorCode)) {
+ return length;
+ } else {
+ return 0;
+ }
+}
+
+U_CAPI void U_EXPORT2
+udata_printError(const UDataSwapper *ds,
+ const char *fmt,
+ ...) {
+ va_list args;
+
+ if(ds->printError!=NULL) {
+ va_start(args, fmt);
+ ds->printError(ds->printErrorContext, fmt, args);
+ va_end(args);
+ }
+}
+
+/* swap a data header ------------------------------------------------------- */
+
+U_CAPI int32_t U_EXPORT2
+udata_swapDataHeader(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const DataHeader *pHeader;
+ uint16_t headerSize, infoSize;
+
+ /* argument checking */
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* check minimum length and magic bytes */
+ pHeader=(const DataHeader *)inData;
+ if( (length>=0 && length<(int32_t)sizeof(DataHeader)) ||
+ pHeader->dataHeader.magic1!=0xda ||
+ pHeader->dataHeader.magic2!=0x27 ||
+ pHeader->info.sizeofUChar!=2
+ ) {
+ udata_printError(ds, "udata_swapDataHeader(): initial bytes do not look like ICU data\n");
+ *pErrorCode=U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+
+ headerSize=ds->readUInt16(pHeader->dataHeader.headerSize);
+ infoSize=ds->readUInt16(pHeader->info.size);
+
+ if( headerSize<sizeof(DataHeader) ||
+ infoSize<sizeof(UDataInfo) ||
+ headerSize<(sizeof(pHeader->dataHeader)+infoSize) ||
+ (length>=0 && length<headerSize)
+ ) {
+ udata_printError(ds, "udata_swapDataHeader(): header size mismatch - headerSize %d infoSize %d length %d\n",
+ headerSize, infoSize, length);
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ if(length>0) {
+ DataHeader *outHeader;
+ const char *s;
+ int32_t maxLength;
+
+ /* Most of the fields are just bytes and need no swapping. */
+ if(inData!=outData) {
+ uprv_memcpy(outData, inData, headerSize);
+ }
+ outHeader=(DataHeader *)outData;
+
+ outHeader->info.isBigEndian = ds->outIsBigEndian;
+ outHeader->info.charsetFamily = ds->outCharset;
+
+ /* swap headerSize */
+ ds->swapArray16(ds, &pHeader->dataHeader.headerSize, 2, &outHeader->dataHeader.headerSize, pErrorCode);
+
+ /* swap UDataInfo size and reservedWord */
+ ds->swapArray16(ds, &pHeader->info.size, 4, &outHeader->info.size, pErrorCode);
+
+ /* swap copyright statement after the UDataInfo */
+ infoSize+=sizeof(pHeader->dataHeader);
+ s=(const char *)inData+infoSize;
+ maxLength=headerSize-infoSize;
+ /* get the length of the string */
+ for(length=0; length<maxLength && s[length]!=0; ++length) {}
+ /* swap the string contents */
+ ds->swapInvChars(ds, s, length, (char *)outData+infoSize, pErrorCode);
+ }
+
+ return headerSize;
+}
+
+/* API functions ------------------------------------------------------------ */
+
+U_CAPI UDataSwapper * U_EXPORT2
+udata_openSwapper(UBool inIsBigEndian, uint8_t inCharset,
+ UBool outIsBigEndian, uint8_t outCharset,
+ UErrorCode *pErrorCode) {
+ UDataSwapper *swapper;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return NULL;
+ }
+ if(inCharset>U_EBCDIC_FAMILY || outCharset>U_EBCDIC_FAMILY) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+
+ /* allocate the swapper */
+ swapper=(UDataSwapper *)uprv_malloc(sizeof(UDataSwapper));
+ if(swapper==NULL) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ uprv_memset(swapper, 0, sizeof(UDataSwapper));
+
+ /* set values and functions pointers according to in/out parameters */
+ swapper->inIsBigEndian=inIsBigEndian;
+ swapper->inCharset=inCharset;
+ swapper->outIsBigEndian=outIsBigEndian;
+ swapper->outCharset=outCharset;
+
+ swapper->readUInt16= inIsBigEndian==U_IS_BIG_ENDIAN ? uprv_readDirectUInt16 : uprv_readSwapUInt16;
+ swapper->readUInt32= inIsBigEndian==U_IS_BIG_ENDIAN ? uprv_readDirectUInt32 : uprv_readSwapUInt32;
+
+ swapper->writeUInt16= outIsBigEndian==U_IS_BIG_ENDIAN ? uprv_writeDirectUInt16 : uprv_writeSwapUInt16;
+ swapper->writeUInt32= outIsBigEndian==U_IS_BIG_ENDIAN ? uprv_writeDirectUInt32 : uprv_writeSwapUInt32;
+
+ swapper->compareInvChars= outCharset==U_ASCII_FAMILY ? uprv_compareInvAscii : uprv_compareInvEbcdic;
+
+ if(inIsBigEndian==outIsBigEndian) {
+ swapper->swapArray16=uprv_copyArray16;
+ swapper->swapArray32=uprv_copyArray32;
+ swapper->swapArray64=uprv_copyArray64;
+ } else {
+ swapper->swapArray16=uprv_swapArray16;
+ swapper->swapArray32=uprv_swapArray32;
+ swapper->swapArray64=uprv_swapArray64;
+ }
+
+ if(inCharset==U_ASCII_FAMILY) {
+ swapper->swapInvChars= outCharset==U_ASCII_FAMILY ? uprv_copyAscii : uprv_ebcdicFromAscii;
+ } else /* U_EBCDIC_FAMILY */ {
+ swapper->swapInvChars= outCharset==U_EBCDIC_FAMILY ? uprv_copyEbcdic : uprv_asciiFromEbcdic;
+ }
+
+ return swapper;
+}
+
+U_CAPI UDataSwapper * U_EXPORT2
+udata_openSwapperForInputData(const void *data, int32_t length,
+ UBool outIsBigEndian, uint8_t outCharset,
+ UErrorCode *pErrorCode) {
+ const DataHeader *pHeader;
+ uint16_t headerSize, infoSize;
+ UBool inIsBigEndian;
+ int8_t inCharset;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return NULL;
+ }
+ if( data==NULL ||
+ (length>=0 && length<(int32_t)sizeof(DataHeader)) ||
+ outCharset>U_EBCDIC_FAMILY
+ ) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+
+ pHeader=(const DataHeader *)data;
+ if( (length>=0 && length<(int32_t)sizeof(DataHeader)) ||
+ pHeader->dataHeader.magic1!=0xda ||
+ pHeader->dataHeader.magic2!=0x27 ||
+ pHeader->info.sizeofUChar!=2
+ ) {
+ *pErrorCode=U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+
+ inIsBigEndian=(UBool)pHeader->info.isBigEndian;
+ inCharset=pHeader->info.charsetFamily;
+
+ if(inIsBigEndian==U_IS_BIG_ENDIAN) {
+ headerSize=pHeader->dataHeader.headerSize;
+ infoSize=pHeader->info.size;
+ } else {
+ headerSize=uprv_readSwapUInt16(pHeader->dataHeader.headerSize);
+ infoSize=uprv_readSwapUInt16(pHeader->info.size);
+ }
+
+ if( headerSize<sizeof(DataHeader) ||
+ infoSize<sizeof(UDataInfo) ||
+ headerSize<(sizeof(pHeader->dataHeader)+infoSize) ||
+ (length>=0 && length<headerSize)
+ ) {
+ *pErrorCode=U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+
+ return udata_openSwapper(inIsBigEndian, inCharset, outIsBigEndian, outCharset, pErrorCode);
+}
+
+U_CAPI void U_EXPORT2
+udata_closeSwapper(UDataSwapper *ds) {
+ uprv_free(ds);
+}
diff --git a/thirdparty/icu4c/common/udataswp.h b/thirdparty/icu4c/common/udataswp.h
new file mode 100644
index 0000000000..5e7b043c4c
--- /dev/null
+++ b/thirdparty/icu4c/common/udataswp.h
@@ -0,0 +1,404 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2003-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: udataswp.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2003jun05
+* created by: Markus W. Scherer
+*
+* Definitions for ICU data transformations for different platforms,
+* changing between big- and little-endian data and/or between
+* charset families (ASCII<->EBCDIC).
+*/
+
+#ifndef __UDATASWP_H__
+#define __UDATASWP_H__
+
+#include <stdarg.h>
+#include "unicode/utypes.h"
+
+/* forward declaration */
+
+U_CDECL_BEGIN
+
+struct UDataSwapper;
+typedef struct UDataSwapper UDataSwapper;
+
+/**
+ * Function type for data transformation.
+ * Transforms data, or just returns the length of the data if
+ * the input length is -1.
+ * Swap functions assume that their data pointers are aligned properly.
+ *
+ * Quick implementation outline:
+ * (best to copy and adapt and existing swapper implementation)
+ * check that the data looks like the expected format
+ * if(length<0) {
+ * preflight:
+ * never dereference outData
+ * read inData and determine the data size
+ * assume that inData is long enough for this
+ * } else {
+ * outData can be NULL if length==0
+ * inData==outData (in-place swapping) possible but not required!
+ * verify that length>=(actual size)
+ * if there is a chance that not every byte up to size is reached
+ * due to padding etc.:
+ * if(inData!=outData) {
+ * memcpy(outData, inData, actual size);
+ * }
+ * swap contents
+ * }
+ * return actual size
+ *
+ * Further implementation notes:
+ * - read integers from inData before swapping them
+ * because in-place swapping can make them unreadable
+ * - compareInvChars compares a local Unicode string with already-swapped
+ * output charset strings
+ *
+ * @param ds Pointer to UDataSwapper containing global data about the
+ * transformation and function pointers for handling primitive
+ * types.
+ * @param inData Pointer to the input data to be transformed or examined.
+ * @param length Length of the data, counting bytes. May be -1 for preflighting.
+ * If length>=0, then transform the data.
+ * If length==-1, then only determine the length of the data.
+ * The length cannot be determined from the data itself for all
+ * types of data (e.g., not for simple arrays of integers).
+ * @param outData Pointer to the output data buffer.
+ * If length>=0 (transformation), then the output buffer must
+ * have a capacity of at least length.
+ * If length==-1, then outData will not be used and can be NULL.
+ * @param pErrorCode ICU UErrorCode parameter, must not be NULL and must
+ * fulfill U_SUCCESS on input.
+ * @return The actual length of the data.
+ *
+ * @see UDataSwapper
+ * @internal ICU 2.8
+ */
+typedef int32_t U_CALLCONV
+UDataSwapFn(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert one uint16_t from input to platform endianness.
+ * @internal ICU 2.8
+ */
+typedef uint16_t U_CALLCONV
+UDataReadUInt16(uint16_t x);
+
+/**
+ * Convert one uint32_t from input to platform endianness.
+ * @internal ICU 2.8
+ */
+typedef uint32_t U_CALLCONV
+UDataReadUInt32(uint32_t x);
+
+/**
+ * Convert one uint16_t from platform to input endianness.
+ * @internal ICU 2.8
+ */
+typedef void U_CALLCONV
+UDataWriteUInt16(uint16_t *p, uint16_t x);
+
+/**
+ * Convert one uint32_t from platform to input endianness.
+ * @internal ICU 2.8
+ */
+typedef void U_CALLCONV
+UDataWriteUInt32(uint32_t *p, uint32_t x);
+
+/**
+ * Compare invariant-character strings, one in the output data and the
+ * other one caller-provided in Unicode.
+ * An output data string is compared because strings are usually swapped
+ * before the rest of the data, to allow for sorting of string tables
+ * according to the output charset.
+ * You can use -1 for the length parameters of NUL-terminated strings as usual.
+ * Returns Unicode code point order for invariant characters.
+ * @internal ICU 2.8
+ */
+typedef int32_t U_CALLCONV
+UDataCompareInvChars(const UDataSwapper *ds,
+ const char *outString, int32_t outLength,
+ const UChar *localString, int32_t localLength);
+
+/**
+ * Function for message output when an error occurs during data swapping.
+ * A format string and variable number of arguments are passed
+ * like for vprintf().
+ *
+ * @param context A function-specific context pointer.
+ * @param fmt The format string.
+ * @param args The arguments for format string inserts.
+ *
+ * @internal ICU 2.8
+ */
+typedef void U_CALLCONV
+UDataPrintError(void *context, const char *fmt, va_list args);
+
+struct UDataSwapper {
+ /** Input endianness. @internal ICU 2.8 */
+ UBool inIsBigEndian;
+ /** Input charset family. @see U_CHARSET_FAMILY @internal ICU 2.8 */
+ uint8_t inCharset;
+ /** Output endianness. @internal ICU 2.8 */
+ UBool outIsBigEndian;
+ /** Output charset family. @see U_CHARSET_FAMILY @internal ICU 2.8 */
+ uint8_t outCharset;
+
+ /* basic functions for reading data values */
+
+ /** Convert one uint16_t from input to platform endianness. @internal ICU 2.8 */
+ UDataReadUInt16 *readUInt16;
+ /** Convert one uint32_t from input to platform endianness. @internal ICU 2.8 */
+ UDataReadUInt32 *readUInt32;
+ /** Compare an invariant-character output string with a local one. @internal ICU 2.8 */
+ UDataCompareInvChars *compareInvChars;
+
+ /* basic functions for writing data values */
+
+ /** Convert one uint16_t from platform to input endianness. @internal ICU 2.8 */
+ UDataWriteUInt16 *writeUInt16;
+ /** Convert one uint32_t from platform to input endianness. @internal ICU 2.8 */
+ UDataWriteUInt32 *writeUInt32;
+
+ /* basic functions for data transformations */
+
+ /** Transform an array of 16-bit integers. @internal ICU 2.8 */
+ UDataSwapFn *swapArray16;
+ /** Transform an array of 32-bit integers. @internal ICU 2.8 */
+ UDataSwapFn *swapArray32;
+ /** Transform an array of 64-bit integers. @internal ICU 53 */
+ UDataSwapFn *swapArray64;
+ /** Transform an invariant-character string. @internal ICU 2.8 */
+ UDataSwapFn *swapInvChars;
+
+ /**
+ * Function for message output when an error occurs during data swapping.
+ * Can be NULL.
+ * @internal ICU 2.8
+ */
+ UDataPrintError *printError;
+ /** Context pointer for printError. @internal ICU 2.8 */
+ void *printErrorContext;
+};
+
+U_CDECL_END
+
+U_CAPI UDataSwapper * U_EXPORT2
+udata_openSwapper(UBool inIsBigEndian, uint8_t inCharset,
+ UBool outIsBigEndian, uint8_t outCharset,
+ UErrorCode *pErrorCode);
+
+/**
+ * Open a UDataSwapper for the given input data and the specified output
+ * characteristics.
+ * Values of -1 for any of the characteristics mean the local platform's
+ * characteristics.
+ *
+ * @see udata_swap
+ * @internal ICU 2.8
+ */
+U_CAPI UDataSwapper * U_EXPORT2
+udata_openSwapperForInputData(const void *data, int32_t length,
+ UBool outIsBigEndian, uint8_t outCharset,
+ UErrorCode *pErrorCode);
+
+U_CAPI void U_EXPORT2
+udata_closeSwapper(UDataSwapper *ds);
+
+/**
+ * Read the beginning of an ICU data piece, recognize magic bytes,
+ * swap the structure.
+ * Set a U_UNSUPPORTED_ERROR if it does not look like an ICU data piece.
+ *
+ * @return The size of the data header, in bytes.
+ *
+ * @internal ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+udata_swapDataHeader(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert one int16_t from input to platform endianness.
+ * @internal ICU 2.8
+ */
+U_CAPI int16_t U_EXPORT2
+udata_readInt16(const UDataSwapper *ds, int16_t x);
+
+/**
+ * Convert one int32_t from input to platform endianness.
+ * @internal ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+udata_readInt32(const UDataSwapper *ds, int32_t x);
+
+/**
+ * Swap a block of invariant, NUL-terminated strings, but not padding
+ * bytes after the last string.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+udata_swapInvStringBlock(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+U_CAPI void U_EXPORT2
+udata_printError(const UDataSwapper *ds,
+ const char *fmt,
+ ...);
+
+/* internal exports from putil.c -------------------------------------------- */
+
+/* declared here to keep them out of the public putil.h */
+
+/**
+ * Swap invariant char * strings ASCII->EBCDIC.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+uprv_ebcdicFromAscii(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+/**
+ * Copy invariant ASCII char * strings and verify they are invariant.
+ * @internal
+ */
+U_CFUNC int32_t
+uprv_copyAscii(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+/**
+ * Swap invariant char * strings EBCDIC->ASCII.
+ * @internal
+ */
+U_CFUNC int32_t
+uprv_asciiFromEbcdic(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+/**
+ * Copy invariant EBCDIC char * strings and verify they are invariant.
+ * @internal
+ */
+U_CFUNC int32_t
+uprv_copyEbcdic(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+/**
+ * Compare ASCII invariant char * with Unicode invariant UChar *
+ * @internal
+ */
+U_CFUNC int32_t
+uprv_compareInvAscii(const UDataSwapper *ds,
+ const char *outString, int32_t outLength,
+ const UChar *localString, int32_t localLength);
+
+/**
+ * Compare EBCDIC invariant char * with Unicode invariant UChar *
+ * @internal
+ */
+U_CFUNC int32_t
+uprv_compareInvEbcdic(const UDataSwapper *ds,
+ const char *outString, int32_t outLength,
+ const UChar *localString, int32_t localLength);
+
+/**
+ * \def uprv_compareInvWithUChar
+ * Compare an invariant-character strings with a UChar string
+ * @internal
+ */
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+# define uprv_compareInvWithUChar uprv_compareInvAscii
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+# define uprv_compareInvWithUChar uprv_compareInvEbcdic
+#else
+# error Unknown charset family!
+#endif
+
+// utrie_swap.cpp -----------------------------------------------------------***
+
+/**
+ * Swaps a serialized UTrie.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+utrie_swap(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+/**
+ * Swaps a serialized UTrie2.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+utrie2_swap(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+/**
+ * Swaps a serialized UCPTrie.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+ucptrie_swap(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+/**
+ * Swaps a serialized UTrie, UTrie2, or UCPTrie.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+utrie_swapAnyVersion(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+/* material... -------------------------------------------------------------- */
+
+#if 0
+
+/* udata.h */
+
+/**
+ * Public API function in udata.c
+ *
+ * Same as udata_openChoice() but automatically swaps the data.
+ * isAcceptable, if not NULL, may accept data with endianness and charset family
+ * different from the current platform's properties.
+ * If the data is acceptable and the platform properties do not match, then
+ * the swap function is called to swap an allocated version of the data.
+ * Preflighting may or may not be performed depending on whether the size of
+ * the loaded data item is known.
+ *
+ * @param isAcceptable Same as for udata_openChoice(). May be NULL.
+ *
+ * @internal ICU 2.8
+ */
+U_CAPI UDataMemory * U_EXPORT2
+udata_openSwap(const char *path, const char *type, const char *name,
+ UDataMemoryIsAcceptable *isAcceptable, void *isAcceptableContext,
+ UDataSwapFn *swap,
+ UDataPrintError *printError, void *printErrorContext,
+ UErrorCode *pErrorCode);
+
+#endif
+
+#endif
diff --git a/thirdparty/icu4c/common/uelement.h b/thirdparty/icu4c/common/uelement.h
new file mode 100644
index 0000000000..88dd4d66fb
--- /dev/null
+++ b/thirdparty/icu4c/common/uelement.h
@@ -0,0 +1,91 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 1997-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: uelement.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2011jul04
+* created by: Markus W. Scherer
+*
+* Common definitions for UHashTable and UVector.
+* UHashTok moved here from uhash.h and renamed UElement.
+* This allows users of UVector to avoid the confusing #include of uhash.h.
+* uhash.h aliases UElement to UHashTok,
+* so that we need not change all of its code and its users.
+*/
+
+#ifndef __UELEMENT_H__
+#define __UELEMENT_H__
+
+#include "unicode/utypes.h"
+
+U_CDECL_BEGIN
+
+/**
+ * A UVector element, or a key or value within a UHashtable.
+ * It may be either a 32-bit integral value or an opaque void* pointer.
+ * The void* pointer may be smaller than 32 bits (e.g. 24 bits)
+ * or may be larger (e.g. 64 bits).
+ *
+ * Because a UElement is the size of a native pointer or a 32-bit
+ * integer, we pass it around by value.
+ */
+union UElement {
+ void* pointer;
+ int32_t integer;
+};
+typedef union UElement UElement;
+
+/**
+ * An element-equality (boolean) comparison function.
+ * @param e1 An element (object or integer)
+ * @param e2 An element (object or integer)
+ * @return true if the two elements are equal.
+ */
+typedef UBool U_CALLCONV UElementsAreEqual(const UElement e1, const UElement e2);
+
+/**
+ * An element sorting (three-way) comparison function.
+ * @param e1 An element (object or integer)
+ * @param e2 An element (object or integer)
+ * @return 0 if the two elements are equal, -1 if e1 is < e2, or +1 if e1 is > e2.
+ */
+typedef int8_t U_CALLCONV UElementComparator(UElement e1, UElement e2);
+
+/**
+ * An element assignment function. It may copy an integer, copy
+ * a pointer, or clone a pointer, as appropriate.
+ * @param dst The element to be assigned to
+ * @param src The element to assign from
+ */
+typedef void U_CALLCONV UElementAssigner(UElement *dst, UElement *src);
+
+U_CDECL_END
+
+/**
+ * Comparator function for UnicodeString* keys. Implements UElementsAreEqual.
+ * @param key1 The string for comparison
+ * @param key2 The string for comparison
+ * @return true if key1 and key2 are equal, return false otherwise.
+ */
+U_CAPI UBool U_EXPORT2
+uhash_compareUnicodeString(const UElement key1, const UElement key2);
+
+/**
+ * Comparator function for UnicodeString* keys (case insensitive).
+ * Make sure to use together with uhash_hashCaselessUnicodeString.
+ * Implements UElementsAreEqual.
+ * @param key1 The string for comparison
+ * @param key2 The string for comparison
+ * @return true if key1 and key2 are equal, return false otherwise.
+ */
+U_CAPI UBool U_EXPORT2
+uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2);
+
+#endif /* __UELEMENT_H__ */
diff --git a/thirdparty/icu4c/common/uenum.cpp b/thirdparty/icu4c/common/uenum.cpp
new file mode 100644
index 0000000000..11d895ebcd
--- /dev/null
+++ b/thirdparty/icu4c/common/uenum.cpp
@@ -0,0 +1,189 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uenum.c
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:2
+*
+* created on: 2002jul08
+* created by: Vladimir Weinstein
+*/
+
+#include "unicode/putil.h"
+#include "uenumimp.h"
+#include "cmemory.h"
+
+/* Layout of the baseContext buffer. */
+typedef struct {
+ int32_t len; /* number of bytes available starting at 'data' */
+ char data; /* actual data starts here */
+} _UEnumBuffer;
+
+/* Extra bytes to allocate in the baseContext buffer. */
+static const int32_t PAD = 8;
+
+/* Return a pointer to the baseContext buffer, possibly allocating
+ or reallocating it if at least 'capacity' bytes are not available. */
+static void* _getBuffer(UEnumeration* en, int32_t capacity) {
+
+ if (en->baseContext != NULL) {
+ if (((_UEnumBuffer*) en->baseContext)->len < capacity) {
+ capacity += PAD;
+ en->baseContext = uprv_realloc(en->baseContext,
+ sizeof(int32_t) + capacity);
+ if (en->baseContext == NULL) {
+ return NULL;
+ }
+ ((_UEnumBuffer*) en->baseContext)->len = capacity;
+ }
+ } else {
+ capacity += PAD;
+ en->baseContext = uprv_malloc(sizeof(int32_t) + capacity);
+ if (en->baseContext == NULL) {
+ return NULL;
+ }
+ ((_UEnumBuffer*) en->baseContext)->len = capacity;
+ }
+
+ return (void*) & ((_UEnumBuffer*) en->baseContext)->data;
+}
+
+U_CAPI void U_EXPORT2
+uenum_close(UEnumeration* en)
+{
+ if (en) {
+ if (en->close != NULL) {
+ if (en->baseContext) {
+ uprv_free(en->baseContext);
+ }
+ en->close(en);
+ } else { /* this seems dangerous, but we better kill the object */
+ uprv_free(en);
+ }
+ }
+}
+
+U_CAPI int32_t U_EXPORT2
+uenum_count(UEnumeration* en, UErrorCode* status)
+{
+ if (!en || U_FAILURE(*status)) {
+ return -1;
+ }
+ if (en->count != NULL) {
+ return en->count(en, status);
+ } else {
+ *status = U_UNSUPPORTED_ERROR;
+ return -1;
+ }
+}
+
+/* Don't call this directly. Only uenum_unext should be calling this. */
+U_CAPI const UChar* U_EXPORT2
+uenum_unextDefault(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* status)
+{
+ UChar *ustr = NULL;
+ int32_t len = 0;
+ if (en->next != NULL) {
+ const char *cstr = en->next(en, &len, status);
+ if (cstr != NULL) {
+ ustr = (UChar*) _getBuffer(en, (len+1) * sizeof(UChar));
+ if (ustr == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ u_charsToUChars(cstr, ustr, len+1);
+ }
+ }
+ } else {
+ *status = U_UNSUPPORTED_ERROR;
+ }
+ if (resultLength) {
+ *resultLength = len;
+ }
+ return ustr;
+}
+
+/* Don't call this directly. Only uenum_next should be calling this. */
+U_CAPI const char* U_EXPORT2
+uenum_nextDefault(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* status)
+{
+ if (en->uNext != NULL) {
+ char *tempCharVal;
+ const UChar *tempUCharVal = en->uNext(en, resultLength, status);
+ if (tempUCharVal == NULL) {
+ return NULL;
+ }
+ tempCharVal = (char*)
+ _getBuffer(en, (*resultLength+1) * sizeof(char));
+ if (!tempCharVal) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ u_UCharsToChars(tempUCharVal, tempCharVal, *resultLength + 1);
+ return tempCharVal;
+ } else {
+ *status = U_UNSUPPORTED_ERROR;
+ return NULL;
+ }
+}
+
+U_CAPI const UChar* U_EXPORT2
+uenum_unext(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* status)
+{
+ if (!en || U_FAILURE(*status)) {
+ return NULL;
+ }
+ if (en->uNext != NULL) {
+ return en->uNext(en, resultLength, status);
+ } else {
+ *status = U_UNSUPPORTED_ERROR;
+ return NULL;
+ }
+}
+
+U_CAPI const char* U_EXPORT2
+uenum_next(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* status)
+{
+ if (!en || U_FAILURE(*status)) {
+ return NULL;
+ }
+ if (en->next != NULL) {
+ if (resultLength != NULL) {
+ return en->next(en, resultLength, status);
+ }
+ else {
+ int32_t dummyLength=0;
+ return en->next(en, &dummyLength, status);
+ }
+ } else {
+ *status = U_UNSUPPORTED_ERROR;
+ return NULL;
+ }
+}
+
+U_CAPI void U_EXPORT2
+uenum_reset(UEnumeration* en, UErrorCode* status)
+{
+ if (!en || U_FAILURE(*status)) {
+ return;
+ }
+ if (en->reset != NULL) {
+ en->reset(en, status);
+ } else {
+ *status = U_UNSUPPORTED_ERROR;
+ }
+}
diff --git a/thirdparty/icu4c/common/uenumimp.h b/thirdparty/icu4c/common/uenumimp.h
new file mode 100644
index 0000000000..9c9df75ae0
--- /dev/null
+++ b/thirdparty/icu4c/common/uenumimp.h
@@ -0,0 +1,155 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2006, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uenumimp.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:2
+*
+* created on: 2002jul08
+* created by: Vladimir Weinstein
+*/
+
+#ifndef __UENUMIMP_H
+#define __UENUMIMP_H
+
+#include "unicode/uenum.h"
+
+U_CDECL_BEGIN
+
+/**
+ * following are the type declarations for
+ * implementations of APIs. If any of these
+ * functions are NULL, U_UNSUPPORTED_ERROR
+ * is returned. If close is NULL, the enumeration
+ * object is going to be released.
+ * Initial error checking is done in the body
+ * of API function, so the implementations
+ * need not to check the initial error condition.
+ */
+
+/**
+ * Function type declaration for uenum_close().
+ *
+ * This function should cleanup the enumerator object
+ *
+ * @param en enumeration to be closed
+ */
+typedef void U_CALLCONV
+UEnumClose(UEnumeration *en);
+
+/**
+ * Function type declaration for uenum_count().
+ *
+ * This function should count the number of elements
+ * in this enumeration
+ *
+ * @param en enumeration to be counted
+ * @param status pointer to UErrorCode variable
+ * @return number of elements in enumeration
+ */
+typedef int32_t U_CALLCONV
+UEnumCount(UEnumeration *en, UErrorCode *status);
+
+/**
+ * Function type declaration for uenum_unext().
+ *
+ * This function returns the next element as a UChar *,
+ * or NULL after all elements haven been enumerated.
+ *
+ * @param en enumeration
+ * @param resultLength pointer to result length
+ * @param status pointer to UErrorCode variable
+ * @return next element as UChar *,
+ * or NULL after all elements haven been enumerated
+ */
+typedef const UChar* U_CALLCONV
+UEnumUNext(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* status);
+
+/**
+ * Function type declaration for uenum_next().
+ *
+ * This function returns the next element as a char *,
+ * or NULL after all elements haven been enumerated.
+ *
+ * @param en enumeration
+ * @param resultLength pointer to result length
+ * @param status pointer to UErrorCode variable
+ * @return next element as char *,
+ * or NULL after all elements haven been enumerated
+ */
+typedef const char* U_CALLCONV
+UEnumNext(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* status);
+
+/**
+ * Function type declaration for uenum_reset().
+ *
+ * This function should reset the enumeration
+ * object
+ *
+ * @param en enumeration
+ * @param status pointer to UErrorCode variable
+ */
+typedef void U_CALLCONV
+UEnumReset(UEnumeration* en,
+ UErrorCode* status);
+
+
+struct UEnumeration {
+ /* baseContext. For the base class only. Don't touch! */
+ void *baseContext;
+
+ /* context. Use it for what you need */
+ void *context;
+
+ /**
+ * these are functions that will
+ * be used for APIs
+ */
+ /* called from uenum_close */
+ UEnumClose *close;
+ /* called from uenum_count */
+ UEnumCount *count;
+ /* called from uenum_unext */
+ UEnumUNext *uNext;
+ /* called from uenum_next */
+ UEnumNext *next;
+ /* called from uenum_reset */
+ UEnumReset *reset;
+};
+
+U_CDECL_END
+
+/* This is the default implementation for uenum_unext().
+ * It automatically converts the char * string to UChar *.
+ * Don't call this directly. This is called internally by uenum_unext
+ * when a UEnumeration is defined with 'uNext' pointing to this
+ * function.
+ */
+U_CAPI const UChar* U_EXPORT2
+uenum_unextDefault(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* status);
+
+/* This is the default implementation for uenum_next().
+ * It automatically converts the UChar * string to char *.
+ * Don't call this directly. This is called internally by uenum_next
+ * when a UEnumeration is defined with 'next' pointing to this
+ * function.
+ */
+U_CAPI const char* U_EXPORT2
+uenum_nextDefault(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* status);
+
+#endif
diff --git a/thirdparty/icu4c/common/uhash.cpp b/thirdparty/icu4c/common/uhash.cpp
new file mode 100644
index 0000000000..86311ceb0b
--- /dev/null
+++ b/thirdparty/icu4c/common/uhash.cpp
@@ -0,0 +1,991 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 1997-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+******************************************************************************
+* Date Name Description
+* 03/22/00 aliu Adapted from original C++ ICU Hashtable.
+* 07/06/01 aliu Modified to support int32_t keys on
+* platforms with sizeof(void*) < 32.
+******************************************************************************
+*/
+
+#include "uhash.h"
+#include "unicode/ustring.h"
+#include "cstring.h"
+#include "cmemory.h"
+#include "uassert.h"
+#include "ustr_imp.h"
+
+/* This hashtable is implemented as a double hash. All elements are
+ * stored in a single array with no secondary storage for collision
+ * resolution (no linked list, etc.). When there is a hash collision
+ * (when two unequal keys have the same hashcode) we resolve this by
+ * using a secondary hash. The secondary hash is an increment
+ * computed as a hash function (a different one) of the primary
+ * hashcode. This increment is added to the initial hash value to
+ * obtain further slots assigned to the same hash code. For this to
+ * work, the length of the array and the increment must be relatively
+ * prime. The easiest way to achieve this is to have the length of
+ * the array be prime, and the increment be any value from
+ * 1..length-1.
+ *
+ * Hashcodes are 32-bit integers. We make sure all hashcodes are
+ * non-negative by masking off the top bit. This has two effects: (1)
+ * modulo arithmetic is simplified. If we allowed negative hashcodes,
+ * then when we computed hashcode % length, we could get a negative
+ * result, which we would then have to adjust back into range. It's
+ * simpler to just make hashcodes non-negative. (2) It makes it easy
+ * to check for empty vs. occupied slots in the table. We just mark
+ * empty or deleted slots with a negative hashcode.
+ *
+ * The central function is _uhash_find(). This function looks for a
+ * slot matching the given key and hashcode. If one is found, it
+ * returns a pointer to that slot. If the table is full, and no match
+ * is found, it returns NULL -- in theory. This would make the code
+ * more complicated, since all callers of _uhash_find() would then
+ * have to check for a NULL result. To keep this from happening, we
+ * don't allow the table to fill. When there is only one
+ * empty/deleted slot left, uhash_put() will refuse to increase the
+ * count, and fail. This simplifies the code. In practice, one will
+ * seldom encounter this using default UHashtables. However, if a
+ * hashtable is set to a U_FIXED resize policy, or if memory is
+ * exhausted, then the table may fill.
+ *
+ * High and low water ratios control rehashing. They establish levels
+ * of fullness (from 0 to 1) outside of which the data array is
+ * reallocated and repopulated. Setting the low water ratio to zero
+ * means the table will never shrink. Setting the high water ratio to
+ * one means the table will never grow. The ratios should be
+ * coordinated with the ratio between successive elements of the
+ * PRIMES table, so that when the primeIndex is incremented or
+ * decremented during rehashing, it brings the ratio of count / length
+ * back into the desired range (between low and high water ratios).
+ */
+
+/********************************************************************
+ * PRIVATE Constants, Macros
+ ********************************************************************/
+
+/* This is a list of non-consecutive primes chosen such that
+ * PRIMES[i+1] ~ 2*PRIMES[i]. (Currently, the ratio ranges from 1.81
+ * to 2.18; the inverse ratio ranges from 0.459 to 0.552.) If this
+ * ratio is changed, the low and high water ratios should also be
+ * adjusted to suit.
+ *
+ * These prime numbers were also chosen so that they are the largest
+ * prime number while being less than a power of two.
+ */
+static const int32_t PRIMES[] = {
+ 7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749,
+ 65521, 131071, 262139, 524287, 1048573, 2097143, 4194301, 8388593,
+ 16777213, 33554393, 67108859, 134217689, 268435399, 536870909,
+ 1073741789, 2147483647 /*, 4294967291 */
+};
+
+#define PRIMES_LENGTH UPRV_LENGTHOF(PRIMES)
+#define DEFAULT_PRIME_INDEX 4
+
+/* These ratios are tuned to the PRIMES array such that a resize
+ * places the table back into the zone of non-resizing. That is,
+ * after a call to _uhash_rehash(), a subsequent call to
+ * _uhash_rehash() should do nothing (should not churn). This is only
+ * a potential problem with U_GROW_AND_SHRINK.
+ */
+static const float RESIZE_POLICY_RATIO_TABLE[6] = {
+ /* low, high water ratio */
+ 0.0F, 0.5F, /* U_GROW: Grow on demand, do not shrink */
+ 0.1F, 0.5F, /* U_GROW_AND_SHRINK: Grow and shrink on demand */
+ 0.0F, 1.0F /* U_FIXED: Never change size */
+};
+
+/*
+ Invariants for hashcode values:
+
+ * DELETED < 0
+ * EMPTY < 0
+ * Real hashes >= 0
+
+ Hashcodes may not start out this way, but internally they are
+ adjusted so that they are always positive. We assume 32-bit
+ hashcodes; adjust these constants for other hashcode sizes.
+*/
+#define HASH_DELETED ((int32_t) 0x80000000)
+#define HASH_EMPTY ((int32_t) HASH_DELETED + 1)
+
+#define IS_EMPTY_OR_DELETED(x) ((x) < 0)
+
+/* This macro expects a UHashTok.pointer as its keypointer and
+ valuepointer parameters */
+#define HASH_DELETE_KEY_VALUE(hash, keypointer, valuepointer) UPRV_BLOCK_MACRO_BEGIN { \
+ if (hash->keyDeleter != NULL && keypointer != NULL) { \
+ (*hash->keyDeleter)(keypointer); \
+ } \
+ if (hash->valueDeleter != NULL && valuepointer != NULL) { \
+ (*hash->valueDeleter)(valuepointer); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/*
+ * Constants for hinting whether a key or value is an integer
+ * or a pointer. If a hint bit is zero, then the associated
+ * token is assumed to be an integer.
+ */
+#define HINT_KEY_POINTER (1)
+#define HINT_VALUE_POINTER (2)
+
+/********************************************************************
+ * PRIVATE Implementation
+ ********************************************************************/
+
+static UHashTok
+_uhash_setElement(UHashtable *hash, UHashElement* e,
+ int32_t hashcode,
+ UHashTok key, UHashTok value, int8_t hint) {
+
+ UHashTok oldValue = e->value;
+ if (hash->keyDeleter != NULL && e->key.pointer != NULL &&
+ e->key.pointer != key.pointer) { /* Avoid double deletion */
+ (*hash->keyDeleter)(e->key.pointer);
+ }
+ if (hash->valueDeleter != NULL) {
+ if (oldValue.pointer != NULL &&
+ oldValue.pointer != value.pointer) { /* Avoid double deletion */
+ (*hash->valueDeleter)(oldValue.pointer);
+ }
+ oldValue.pointer = NULL;
+ }
+ /* Compilers should copy the UHashTok union correctly, but even if
+ * they do, memory heap tools (e.g. BoundsChecker) can get
+ * confused when a pointer is cloaked in a union and then copied.
+ * TO ALLEVIATE THIS, we use hints (based on what API the user is
+ * calling) to copy pointers when we know the user thinks
+ * something is a pointer. */
+ if (hint & HINT_KEY_POINTER) {
+ e->key.pointer = key.pointer;
+ } else {
+ e->key = key;
+ }
+ if (hint & HINT_VALUE_POINTER) {
+ e->value.pointer = value.pointer;
+ } else {
+ e->value = value;
+ }
+ e->hashcode = hashcode;
+ return oldValue;
+}
+
+/**
+ * Assumes that the given element is not empty or deleted.
+ */
+static UHashTok
+_uhash_internalRemoveElement(UHashtable *hash, UHashElement* e) {
+ UHashTok empty;
+ U_ASSERT(!IS_EMPTY_OR_DELETED(e->hashcode));
+ --hash->count;
+ empty.pointer = NULL; empty.integer = 0;
+ return _uhash_setElement(hash, e, HASH_DELETED, empty, empty, 0);
+}
+
+static void
+_uhash_internalSetResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) {
+ U_ASSERT(hash != NULL);
+ U_ASSERT(((int32_t)policy) >= 0);
+ U_ASSERT(((int32_t)policy) < 3);
+ hash->lowWaterRatio = RESIZE_POLICY_RATIO_TABLE[policy * 2];
+ hash->highWaterRatio = RESIZE_POLICY_RATIO_TABLE[policy * 2 + 1];
+}
+
+/**
+ * Allocate internal data array of a size determined by the given
+ * prime index. If the index is out of range it is pinned into range.
+ * If the allocation fails the status is set to
+ * U_MEMORY_ALLOCATION_ERROR and all array storage is freed. In
+ * either case the previous array pointer is overwritten.
+ *
+ * Caller must ensure primeIndex is in range 0..PRIME_LENGTH-1.
+ */
+static void
+_uhash_allocate(UHashtable *hash,
+ int32_t primeIndex,
+ UErrorCode *status) {
+
+ UHashElement *p, *limit;
+ UHashTok emptytok;
+
+ if (U_FAILURE(*status)) return;
+
+ U_ASSERT(primeIndex >= 0 && primeIndex < PRIMES_LENGTH);
+
+ hash->primeIndex = static_cast<int8_t>(primeIndex);
+ hash->length = PRIMES[primeIndex];
+
+ p = hash->elements = (UHashElement*)
+ uprv_malloc(sizeof(UHashElement) * hash->length);
+
+ if (hash->elements == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ emptytok.pointer = NULL; /* Only one of these two is needed */
+ emptytok.integer = 0; /* but we don't know which one. */
+
+ limit = p + hash->length;
+ while (p < limit) {
+ p->key = emptytok;
+ p->value = emptytok;
+ p->hashcode = HASH_EMPTY;
+ ++p;
+ }
+
+ hash->count = 0;
+ hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio);
+ hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);
+}
+
+static UHashtable*
+_uhash_init(UHashtable *result,
+ UHashFunction *keyHash,
+ UKeyComparator *keyComp,
+ UValueComparator *valueComp,
+ int32_t primeIndex,
+ UErrorCode *status)
+{
+ if (U_FAILURE(*status)) return NULL;
+ U_ASSERT(keyHash != NULL);
+ U_ASSERT(keyComp != NULL);
+
+ result->keyHasher = keyHash;
+ result->keyComparator = keyComp;
+ result->valueComparator = valueComp;
+ result->keyDeleter = NULL;
+ result->valueDeleter = NULL;
+ result->allocated = FALSE;
+ _uhash_internalSetResizePolicy(result, U_GROW);
+
+ _uhash_allocate(result, primeIndex, status);
+
+ if (U_FAILURE(*status)) {
+ return NULL;
+ }
+
+ return result;
+}
+
+static UHashtable*
+_uhash_create(UHashFunction *keyHash,
+ UKeyComparator *keyComp,
+ UValueComparator *valueComp,
+ int32_t primeIndex,
+ UErrorCode *status) {
+ UHashtable *result;
+
+ if (U_FAILURE(*status)) return NULL;
+
+ result = (UHashtable*) uprv_malloc(sizeof(UHashtable));
+ if (result == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+
+ _uhash_init(result, keyHash, keyComp, valueComp, primeIndex, status);
+ result->allocated = TRUE;
+
+ if (U_FAILURE(*status)) {
+ uprv_free(result);
+ return NULL;
+ }
+
+ return result;
+}
+
+/**
+ * Look for a key in the table, or if no such key exists, the first
+ * empty slot matching the given hashcode. Keys are compared using
+ * the keyComparator function.
+ *
+ * First find the start position, which is the hashcode modulo
+ * the length. Test it to see if it is:
+ *
+ * a. identical: First check the hash values for a quick check,
+ * then compare keys for equality using keyComparator.
+ * b. deleted
+ * c. empty
+ *
+ * Stop if it is identical or empty, otherwise continue by adding a
+ * "jump" value (moduloing by the length again to keep it within
+ * range) and retesting. For efficiency, there need enough empty
+ * values so that the searchs stop within a reasonable amount of time.
+ * This can be changed by changing the high/low water marks.
+ *
+ * In theory, this function can return NULL, if it is full (no empty
+ * or deleted slots) and if no matching key is found. In practice, we
+ * prevent this elsewhere (in uhash_put) by making sure the last slot
+ * in the table is never filled.
+ *
+ * The size of the table should be prime for this algorithm to work;
+ * otherwise we are not guaranteed that the jump value (the secondary
+ * hash) is relatively prime to the table length.
+ */
+static UHashElement*
+_uhash_find(const UHashtable *hash, UHashTok key,
+ int32_t hashcode) {
+
+ int32_t firstDeleted = -1; /* assume invalid index */
+ int32_t theIndex, startIndex;
+ int32_t jump = 0; /* lazy evaluate */
+ int32_t tableHash;
+ UHashElement *elements = hash->elements;
+
+ hashcode &= 0x7FFFFFFF; /* must be positive */
+ startIndex = theIndex = (hashcode ^ 0x4000000) % hash->length;
+
+ do {
+ tableHash = elements[theIndex].hashcode;
+ if (tableHash == hashcode) { /* quick check */
+ if ((*hash->keyComparator)(key, elements[theIndex].key)) {
+ return &(elements[theIndex]);
+ }
+ } else if (!IS_EMPTY_OR_DELETED(tableHash)) {
+ /* We have hit a slot which contains a key-value pair,
+ * but for which the hash code does not match. Keep
+ * looking.
+ */
+ } else if (tableHash == HASH_EMPTY) { /* empty, end o' the line */
+ break;
+ } else if (firstDeleted < 0) { /* remember first deleted */
+ firstDeleted = theIndex;
+ }
+ if (jump == 0) { /* lazy compute jump */
+ /* The jump value must be relatively prime to the table
+ * length. As long as the length is prime, then any value
+ * 1..length-1 will be relatively prime to it.
+ */
+ jump = (hashcode % (hash->length - 1)) + 1;
+ }
+ theIndex = (theIndex + jump) % hash->length;
+ } while (theIndex != startIndex);
+
+ if (firstDeleted >= 0) {
+ theIndex = firstDeleted; /* reset if had deleted slot */
+ } else if (tableHash != HASH_EMPTY) {
+ /* We get to this point if the hashtable is full (no empty or
+ * deleted slots), and we've failed to find a match. THIS
+ * WILL NEVER HAPPEN as long as uhash_put() makes sure that
+ * count is always < length.
+ */
+ UPRV_UNREACHABLE;
+ }
+ return &(elements[theIndex]);
+}
+
+/**
+ * Attempt to grow or shrink the data arrays in order to make the
+ * count fit between the high and low water marks. hash_put() and
+ * hash_remove() call this method when the count exceeds the high or
+ * low water marks. This method may do nothing, if memory allocation
+ * fails, or if the count is already in range, or if the length is
+ * already at the low or high limit. In any case, upon return the
+ * arrays will be valid.
+ */
+static void
+_uhash_rehash(UHashtable *hash, UErrorCode *status) {
+
+ UHashElement *old = hash->elements;
+ int32_t oldLength = hash->length;
+ int32_t newPrimeIndex = hash->primeIndex;
+ int32_t i;
+
+ if (hash->count > hash->highWaterMark) {
+ if (++newPrimeIndex >= PRIMES_LENGTH) {
+ return;
+ }
+ } else if (hash->count < hash->lowWaterMark) {
+ if (--newPrimeIndex < 0) {
+ return;
+ }
+ } else {
+ return;
+ }
+
+ _uhash_allocate(hash, newPrimeIndex, status);
+
+ if (U_FAILURE(*status)) {
+ hash->elements = old;
+ hash->length = oldLength;
+ return;
+ }
+
+ for (i = oldLength - 1; i >= 0; --i) {
+ if (!IS_EMPTY_OR_DELETED(old[i].hashcode)) {
+ UHashElement *e = _uhash_find(hash, old[i].key, old[i].hashcode);
+ U_ASSERT(e != NULL);
+ U_ASSERT(e->hashcode == HASH_EMPTY);
+ e->key = old[i].key;
+ e->value = old[i].value;
+ e->hashcode = old[i].hashcode;
+ ++hash->count;
+ }
+ }
+
+ uprv_free(old);
+}
+
+static UHashTok
+_uhash_remove(UHashtable *hash,
+ UHashTok key) {
+ /* First find the position of the key in the table. If the object
+ * has not been removed already, remove it. If the user wanted
+ * keys deleted, then delete it also. We have to put a special
+ * hashcode in that position that means that something has been
+ * deleted, since when we do a find, we have to continue PAST any
+ * deleted values.
+ */
+ UHashTok result;
+ UHashElement* e = _uhash_find(hash, key, hash->keyHasher(key));
+ U_ASSERT(e != NULL);
+ result.pointer = NULL;
+ result.integer = 0;
+ if (!IS_EMPTY_OR_DELETED(e->hashcode)) {
+ result = _uhash_internalRemoveElement(hash, e);
+ if (hash->count < hash->lowWaterMark) {
+ UErrorCode status = U_ZERO_ERROR;
+ _uhash_rehash(hash, &status);
+ }
+ }
+ return result;
+}
+
+static UHashTok
+_uhash_put(UHashtable *hash,
+ UHashTok key,
+ UHashTok value,
+ int8_t hint,
+ UErrorCode *status) {
+
+ /* Put finds the position in the table for the new value. If the
+ * key is already in the table, it is deleted, if there is a
+ * non-NULL keyDeleter. Then the key, the hash and the value are
+ * all put at the position in their respective arrays.
+ */
+ int32_t hashcode;
+ UHashElement* e;
+ UHashTok emptytok;
+
+ if (U_FAILURE(*status)) {
+ goto err;
+ }
+ U_ASSERT(hash != NULL);
+ /* Cannot always check pointer here or iSeries sees NULL every time. */
+ if ((hint & HINT_VALUE_POINTER) && value.pointer == NULL) {
+ /* Disallow storage of NULL values, since NULL is returned by
+ * get() to indicate an absent key. Storing NULL == removing.
+ */
+ return _uhash_remove(hash, key);
+ }
+ if (hash->count > hash->highWaterMark) {
+ _uhash_rehash(hash, status);
+ if (U_FAILURE(*status)) {
+ goto err;
+ }
+ }
+
+ hashcode = (*hash->keyHasher)(key);
+ e = _uhash_find(hash, key, hashcode);
+ U_ASSERT(e != NULL);
+
+ if (IS_EMPTY_OR_DELETED(e->hashcode)) {
+ /* Important: We must never actually fill the table up. If we
+ * do so, then _uhash_find() will return NULL, and we'll have
+ * to check for NULL after every call to _uhash_find(). To
+ * avoid this we make sure there is always at least one empty
+ * or deleted slot in the table. This only is a problem if we
+ * are out of memory and rehash isn't working.
+ */
+ ++hash->count;
+ if (hash->count == hash->length) {
+ /* Don't allow count to reach length */
+ --hash->count;
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ goto err;
+ }
+ }
+
+ /* We must in all cases handle storage properly. If there was an
+ * old key, then it must be deleted (if the deleter != NULL).
+ * Make hashcodes stored in table positive.
+ */
+ return _uhash_setElement(hash, e, hashcode & 0x7FFFFFFF, key, value, hint);
+
+ err:
+ /* If the deleters are non-NULL, this method adopts its key and/or
+ * value arguments, and we must be sure to delete the key and/or
+ * value in all cases, even upon failure.
+ */
+ HASH_DELETE_KEY_VALUE(hash, key.pointer, value.pointer);
+ emptytok.pointer = NULL; emptytok.integer = 0;
+ return emptytok;
+}
+
+
+/********************************************************************
+ * PUBLIC API
+ ********************************************************************/
+
+U_CAPI UHashtable* U_EXPORT2
+uhash_open(UHashFunction *keyHash,
+ UKeyComparator *keyComp,
+ UValueComparator *valueComp,
+ UErrorCode *status) {
+
+ return _uhash_create(keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status);
+}
+
+U_CAPI UHashtable* U_EXPORT2
+uhash_openSize(UHashFunction *keyHash,
+ UKeyComparator *keyComp,
+ UValueComparator *valueComp,
+ int32_t size,
+ UErrorCode *status) {
+
+ /* Find the smallest index i for which PRIMES[i] >= size. */
+ int32_t i = 0;
+ while (i<(PRIMES_LENGTH-1) && PRIMES[i]<size) {
+ ++i;
+ }
+
+ return _uhash_create(keyHash, keyComp, valueComp, i, status);
+}
+
+U_CAPI UHashtable* U_EXPORT2
+uhash_init(UHashtable *fillinResult,
+ UHashFunction *keyHash,
+ UKeyComparator *keyComp,
+ UValueComparator *valueComp,
+ UErrorCode *status) {
+
+ return _uhash_init(fillinResult, keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status);
+}
+
+U_CAPI UHashtable* U_EXPORT2
+uhash_initSize(UHashtable *fillinResult,
+ UHashFunction *keyHash,
+ UKeyComparator *keyComp,
+ UValueComparator *valueComp,
+ int32_t size,
+ UErrorCode *status) {
+
+ // Find the smallest index i for which PRIMES[i] >= size.
+ int32_t i = 0;
+ while (i<(PRIMES_LENGTH-1) && PRIMES[i]<size) {
+ ++i;
+ }
+ return _uhash_init(fillinResult, keyHash, keyComp, valueComp, i, status);
+}
+
+U_CAPI void U_EXPORT2
+uhash_close(UHashtable *hash) {
+ if (hash == NULL) {
+ return;
+ }
+ if (hash->elements != NULL) {
+ if (hash->keyDeleter != NULL || hash->valueDeleter != NULL) {
+ int32_t pos=UHASH_FIRST;
+ UHashElement *e;
+ while ((e = (UHashElement*) uhash_nextElement(hash, &pos)) != NULL) {
+ HASH_DELETE_KEY_VALUE(hash, e->key.pointer, e->value.pointer);
+ }
+ }
+ uprv_free(hash->elements);
+ hash->elements = NULL;
+ }
+ if (hash->allocated) {
+ uprv_free(hash);
+ }
+}
+
+U_CAPI UHashFunction *U_EXPORT2
+uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn) {
+ UHashFunction *result = hash->keyHasher;
+ hash->keyHasher = fn;
+ return result;
+}
+
+U_CAPI UKeyComparator *U_EXPORT2
+uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn) {
+ UKeyComparator *result = hash->keyComparator;
+ hash->keyComparator = fn;
+ return result;
+}
+U_CAPI UValueComparator *U_EXPORT2
+uhash_setValueComparator(UHashtable *hash, UValueComparator *fn){
+ UValueComparator *result = hash->valueComparator;
+ hash->valueComparator = fn;
+ return result;
+}
+
+U_CAPI UObjectDeleter *U_EXPORT2
+uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn) {
+ UObjectDeleter *result = hash->keyDeleter;
+ hash->keyDeleter = fn;
+ return result;
+}
+
+U_CAPI UObjectDeleter *U_EXPORT2
+uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn) {
+ UObjectDeleter *result = hash->valueDeleter;
+ hash->valueDeleter = fn;
+ return result;
+}
+
+U_CAPI void U_EXPORT2
+uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) {
+ UErrorCode status = U_ZERO_ERROR;
+ _uhash_internalSetResizePolicy(hash, policy);
+ hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio);
+ hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);
+ _uhash_rehash(hash, &status);
+}
+
+U_CAPI int32_t U_EXPORT2
+uhash_count(const UHashtable *hash) {
+ return hash->count;
+}
+
+U_CAPI void* U_EXPORT2
+uhash_get(const UHashtable *hash,
+ const void* key) {
+ UHashTok keyholder;
+ keyholder.pointer = (void*) key;
+ return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer;
+}
+
+U_CAPI void* U_EXPORT2
+uhash_iget(const UHashtable *hash,
+ int32_t key) {
+ UHashTok keyholder;
+ keyholder.integer = key;
+ return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer;
+}
+
+U_CAPI int32_t U_EXPORT2
+uhash_geti(const UHashtable *hash,
+ const void* key) {
+ UHashTok keyholder;
+ keyholder.pointer = (void*) key;
+ return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer;
+}
+
+U_CAPI int32_t U_EXPORT2
+uhash_igeti(const UHashtable *hash,
+ int32_t key) {
+ UHashTok keyholder;
+ keyholder.integer = key;
+ return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer;
+}
+
+U_CAPI void* U_EXPORT2
+uhash_put(UHashtable *hash,
+ void* key,
+ void* value,
+ UErrorCode *status) {
+ UHashTok keyholder, valueholder;
+ keyholder.pointer = key;
+ valueholder.pointer = value;
+ return _uhash_put(hash, keyholder, valueholder,
+ HINT_KEY_POINTER | HINT_VALUE_POINTER,
+ status).pointer;
+}
+
+U_CAPI void* U_EXPORT2
+uhash_iput(UHashtable *hash,
+ int32_t key,
+ void* value,
+ UErrorCode *status) {
+ UHashTok keyholder, valueholder;
+ keyholder.integer = key;
+ valueholder.pointer = value;
+ return _uhash_put(hash, keyholder, valueholder,
+ HINT_VALUE_POINTER,
+ status).pointer;
+}
+
+U_CAPI int32_t U_EXPORT2
+uhash_puti(UHashtable *hash,
+ void* key,
+ int32_t value,
+ UErrorCode *status) {
+ UHashTok keyholder, valueholder;
+ keyholder.pointer = key;
+ valueholder.integer = value;
+ return _uhash_put(hash, keyholder, valueholder,
+ HINT_KEY_POINTER,
+ status).integer;
+}
+
+
+U_CAPI int32_t U_EXPORT2
+uhash_iputi(UHashtable *hash,
+ int32_t key,
+ int32_t value,
+ UErrorCode *status) {
+ UHashTok keyholder, valueholder;
+ keyholder.integer = key;
+ valueholder.integer = value;
+ return _uhash_put(hash, keyholder, valueholder,
+ 0, /* neither is a ptr */
+ status).integer;
+}
+
+U_CAPI void* U_EXPORT2
+uhash_remove(UHashtable *hash,
+ const void* key) {
+ UHashTok keyholder;
+ keyholder.pointer = (void*) key;
+ return _uhash_remove(hash, keyholder).pointer;
+}
+
+U_CAPI void* U_EXPORT2
+uhash_iremove(UHashtable *hash,
+ int32_t key) {
+ UHashTok keyholder;
+ keyholder.integer = key;
+ return _uhash_remove(hash, keyholder).pointer;
+}
+
+U_CAPI int32_t U_EXPORT2
+uhash_removei(UHashtable *hash,
+ const void* key) {
+ UHashTok keyholder;
+ keyholder.pointer = (void*) key;
+ return _uhash_remove(hash, keyholder).integer;
+}
+
+U_CAPI int32_t U_EXPORT2
+uhash_iremovei(UHashtable *hash,
+ int32_t key) {
+ UHashTok keyholder;
+ keyholder.integer = key;
+ return _uhash_remove(hash, keyholder).integer;
+}
+
+U_CAPI void U_EXPORT2
+uhash_removeAll(UHashtable *hash) {
+ int32_t pos = UHASH_FIRST;
+ const UHashElement *e;
+ U_ASSERT(hash != NULL);
+ if (hash->count != 0) {
+ while ((e = uhash_nextElement(hash, &pos)) != NULL) {
+ uhash_removeElement(hash, e);
+ }
+ }
+ U_ASSERT(hash->count == 0);
+}
+
+U_CAPI const UHashElement* U_EXPORT2
+uhash_find(const UHashtable *hash, const void* key) {
+ UHashTok keyholder;
+ const UHashElement *e;
+ keyholder.pointer = (void*) key;
+ e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
+ return IS_EMPTY_OR_DELETED(e->hashcode) ? NULL : e;
+}
+
+U_CAPI const UHashElement* U_EXPORT2
+uhash_nextElement(const UHashtable *hash, int32_t *pos) {
+ /* Walk through the array until we find an element that is not
+ * EMPTY and not DELETED.
+ */
+ int32_t i;
+ U_ASSERT(hash != NULL);
+ for (i = *pos + 1; i < hash->length; ++i) {
+ if (!IS_EMPTY_OR_DELETED(hash->elements[i].hashcode)) {
+ *pos = i;
+ return &(hash->elements[i]);
+ }
+ }
+
+ /* No more elements */
+ return NULL;
+}
+
+U_CAPI void* U_EXPORT2
+uhash_removeElement(UHashtable *hash, const UHashElement* e) {
+ U_ASSERT(hash != NULL);
+ U_ASSERT(e != NULL);
+ if (!IS_EMPTY_OR_DELETED(e->hashcode)) {
+ UHashElement *nce = (UHashElement *)e;
+ return _uhash_internalRemoveElement(hash, nce).pointer;
+ }
+ return NULL;
+}
+
+/********************************************************************
+ * UHashTok convenience
+ ********************************************************************/
+
+/**
+ * Return a UHashTok for an integer.
+ */
+/*U_CAPI UHashTok U_EXPORT2
+uhash_toki(int32_t i) {
+ UHashTok tok;
+ tok.integer = i;
+ return tok;
+}*/
+
+/**
+ * Return a UHashTok for a pointer.
+ */
+/*U_CAPI UHashTok U_EXPORT2
+uhash_tokp(void* p) {
+ UHashTok tok;
+ tok.pointer = p;
+ return tok;
+}*/
+
+/********************************************************************
+ * PUBLIC Key Hash Functions
+ ********************************************************************/
+
+U_CAPI int32_t U_EXPORT2
+uhash_hashUChars(const UHashTok key) {
+ const UChar *s = (const UChar *)key.pointer;
+ return s == NULL ? 0 : ustr_hashUCharsN(s, u_strlen(s));
+}
+
+U_CAPI int32_t U_EXPORT2
+uhash_hashChars(const UHashTok key) {
+ const char *s = (const char *)key.pointer;
+ return s == NULL ? 0 : static_cast<int32_t>(ustr_hashCharsN(s, static_cast<int32_t>(uprv_strlen(s))));
+}
+
+U_CAPI int32_t U_EXPORT2
+uhash_hashIChars(const UHashTok key) {
+ const char *s = (const char *)key.pointer;
+ return s == NULL ? 0 : ustr_hashICharsN(s, static_cast<int32_t>(uprv_strlen(s)));
+}
+
+U_CAPI UBool U_EXPORT2
+uhash_equals(const UHashtable* hash1, const UHashtable* hash2){
+ int32_t count1, count2, pos, i;
+
+ if(hash1==hash2){
+ return TRUE;
+ }
+
+ /*
+ * Make sure that we are comparing 2 valid hashes of the same type
+ * with valid comparison functions.
+ * Without valid comparison functions, a binary comparison
+ * of the hash values will yield random results on machines
+ * with 64-bit pointers and 32-bit integer hashes.
+ * A valueComparator is normally optional.
+ */
+ if (hash1==NULL || hash2==NULL ||
+ hash1->keyComparator != hash2->keyComparator ||
+ hash1->valueComparator != hash2->valueComparator ||
+ hash1->valueComparator == NULL)
+ {
+ /*
+ Normally we would return an error here about incompatible hash tables,
+ but we return FALSE instead.
+ */
+ return FALSE;
+ }
+
+ count1 = uhash_count(hash1);
+ count2 = uhash_count(hash2);
+ if(count1!=count2){
+ return FALSE;
+ }
+
+ pos=UHASH_FIRST;
+ for(i=0; i<count1; i++){
+ const UHashElement* elem1 = uhash_nextElement(hash1, &pos);
+ const UHashTok key1 = elem1->key;
+ const UHashTok val1 = elem1->value;
+ /* here the keys are not compared, instead the key form hash1 is used to fetch
+ * value from hash2. If the hashes are equal then then both hashes should
+ * contain equal values for the same key!
+ */
+ const UHashElement* elem2 = _uhash_find(hash2, key1, hash2->keyHasher(key1));
+ const UHashTok val2 = elem2->value;
+ if(hash1->valueComparator(val1, val2)==FALSE){
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+/********************************************************************
+ * PUBLIC Comparator Functions
+ ********************************************************************/
+
+U_CAPI UBool U_EXPORT2
+uhash_compareUChars(const UHashTok key1, const UHashTok key2) {
+ const UChar *p1 = (const UChar*) key1.pointer;
+ const UChar *p2 = (const UChar*) key2.pointer;
+ if (p1 == p2) {
+ return TRUE;
+ }
+ if (p1 == NULL || p2 == NULL) {
+ return FALSE;
+ }
+ while (*p1 != 0 && *p1 == *p2) {
+ ++p1;
+ ++p2;
+ }
+ return (UBool)(*p1 == *p2);
+}
+
+U_CAPI UBool U_EXPORT2
+uhash_compareChars(const UHashTok key1, const UHashTok key2) {
+ const char *p1 = (const char*) key1.pointer;
+ const char *p2 = (const char*) key2.pointer;
+ if (p1 == p2) {
+ return TRUE;
+ }
+ if (p1 == NULL || p2 == NULL) {
+ return FALSE;
+ }
+ while (*p1 != 0 && *p1 == *p2) {
+ ++p1;
+ ++p2;
+ }
+ return (UBool)(*p1 == *p2);
+}
+
+U_CAPI UBool U_EXPORT2
+uhash_compareIChars(const UHashTok key1, const UHashTok key2) {
+ const char *p1 = (const char*) key1.pointer;
+ const char *p2 = (const char*) key2.pointer;
+ if (p1 == p2) {
+ return TRUE;
+ }
+ if (p1 == NULL || p2 == NULL) {
+ return FALSE;
+ }
+ while (*p1 != 0 && uprv_tolower(*p1) == uprv_tolower(*p2)) {
+ ++p1;
+ ++p2;
+ }
+ return (UBool)(*p1 == *p2);
+}
+
+/********************************************************************
+ * PUBLIC int32_t Support Functions
+ ********************************************************************/
+
+U_CAPI int32_t U_EXPORT2
+uhash_hashLong(const UHashTok key) {
+ return key.integer;
+}
+
+U_CAPI UBool U_EXPORT2
+uhash_compareLong(const UHashTok key1, const UHashTok key2) {
+ return (UBool)(key1.integer == key2.integer);
+}
diff --git a/thirdparty/icu4c/common/uhash.h b/thirdparty/icu4c/common/uhash.h
new file mode 100644
index 0000000000..b59d2711bb
--- /dev/null
+++ b/thirdparty/icu4c/common/uhash.h
@@ -0,0 +1,718 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 1997-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+******************************************************************************
+* Date Name Description
+* 03/22/00 aliu Adapted from original C++ ICU Hashtable.
+* 07/06/01 aliu Modified to support int32_t keys on
+* platforms with sizeof(void*) < 32.
+******************************************************************************
+*/
+
+#ifndef UHASH_H
+#define UHASH_H
+
+#include "unicode/utypes.h"
+#include "cmemory.h"
+#include "uelement.h"
+#include "unicode/localpointer.h"
+
+/**
+ * UHashtable stores key-value pairs and does moderately fast lookup
+ * based on keys. It provides a good tradeoff between access time and
+ * storage space. As elements are added to it, it grows to accomodate
+ * them. By default, the table never shrinks, even if all elements
+ * are removed from it.
+ *
+ * Keys and values are stored as void* pointers. These void* pointers
+ * may be actual pointers to strings, objects, or any other structure
+ * in memory, or they may simply be integral values cast to void*.
+ * UHashtable doesn't care and manipulates them via user-supplied
+ * functions. These functions hash keys, compare keys, delete keys,
+ * and delete values. Some function pointers are optional (may be
+ * NULL); others must be supplied. Several prebuilt functions exist
+ * to handle common key types.
+ *
+ * UHashtable ownership of keys and values is flexible, and controlled
+ * by whether or not the key deleter and value deleter functions are
+ * set. If a void* key is actually a pointer to a deletable object,
+ * then UHashtable can be made to delete that object by setting the
+ * key deleter function pointer to a non-NULL value. If this is done,
+ * then keys passed to uhash_put() are owned by the hashtable and will
+ * be deleted by it at some point, either as keys are replaced, or
+ * when uhash_close() is finally called. The same is true of values
+ * and the value deleter function pointer. Keys passed to methods
+ * other than uhash_put() are never owned by the hashtable.
+ *
+ * NULL values are not allowed. uhash_get() returns NULL to indicate
+ * a key that is not in the table, and having a NULL value in the
+ * table would generate an ambiguous result. If a key and a NULL
+ * value is passed to uhash_put(), this has the effect of doing a
+ * uhash_remove() on that key. This keeps uhash_get(), uhash_count(),
+ * and uhash_nextElement() consistent with one another.
+ *
+ * To see everything in a hashtable, use uhash_nextElement() to
+ * iterate through its contents. Each call to this function returns a
+ * UHashElement pointer. A hash element contains a key, value, and
+ * hashcode. During iteration an element may be deleted by calling
+ * uhash_removeElement(); iteration may safely continue thereafter.
+ * The uhash_remove() function may also be safely called in
+ * mid-iteration. If uhash_put() is called during iteration,
+ * the iteration is still guaranteed to terminate reasonably, but
+ * there is no guarantee that every element will be returned or that
+ * some won't be returned more than once.
+ *
+ * Under no circumstances should the UHashElement returned by
+ * uhash_nextElement be modified directly.
+ *
+ * By default, the hashtable grows when necessary, but never shrinks,
+ * even if all items are removed. For most applications this is
+ * optimal. However, in a highly dynamic usage where memory is at a
+ * premium, the table can be set to both grow and shrink by calling
+ * uhash_setResizePolicy() with the policy U_GROW_AND_SHRINK. In a
+ * situation where memory is critical and the client wants a table
+ * that does not grow at all, the constant U_FIXED can be used.
+ */
+
+/********************************************************************
+ * Data Structures
+ ********************************************************************/
+
+U_CDECL_BEGIN
+
+/**
+ * A key or value within a UHashtable.
+ * The hashing and comparison functions take a pointer to a
+ * UHashTok, but the deleter receives the void* pointer within it.
+ */
+typedef UElement UHashTok;
+
+/**
+ * This is a single hash element.
+ */
+struct UHashElement {
+ /* Reorder these elements to pack nicely if necessary */
+ int32_t hashcode;
+ UHashTok value;
+ UHashTok key;
+};
+typedef struct UHashElement UHashElement;
+
+/**
+ * A hashing function.
+ * @param key A key stored in a hashtable
+ * @return A NON-NEGATIVE hash code for parm.
+ */
+typedef int32_t U_CALLCONV UHashFunction(const UHashTok key);
+
+/**
+ * A key equality (boolean) comparison function.
+ */
+typedef UElementsAreEqual UKeyComparator;
+
+/**
+ * A value equality (boolean) comparison function.
+ */
+typedef UElementsAreEqual UValueComparator;
+
+/* see cmemory.h for UObjectDeleter and uprv_deleteUObject() */
+
+/**
+ * This specifies whether or not, and how, the hastable resizes itself.
+ * See uhash_setResizePolicy().
+ */
+enum UHashResizePolicy {
+ U_GROW, /* Grow on demand, do not shrink */
+ U_GROW_AND_SHRINK, /* Grow and shrink on demand */
+ U_FIXED /* Never change size */
+};
+
+/**
+ * The UHashtable struct. Clients should treat this as an opaque data
+ * type and manipulate it only through the uhash_... API.
+ */
+struct UHashtable {
+
+ /* Main key-value pair storage array */
+
+ UHashElement *elements;
+
+ /* Function pointers */
+
+ UHashFunction *keyHasher; /* Computes hash from key.
+ * Never null. */
+ UKeyComparator *keyComparator; /* Compares keys for equality.
+ * Never null. */
+ UValueComparator *valueComparator; /* Compares the values for equality */
+
+ UObjectDeleter *keyDeleter; /* Deletes keys when required.
+ * If NULL won't do anything */
+ UObjectDeleter *valueDeleter; /* Deletes values when required.
+ * If NULL won't do anything */
+
+ /* Size parameters */
+
+ int32_t count; /* The number of key-value pairs in this table.
+ * 0 <= count <= length. In practice we
+ * never let count == length (see code). */
+ int32_t length; /* The physical size of the arrays hashes, keys
+ * and values. Must be prime. */
+
+ /* Rehashing thresholds */
+
+ int32_t highWaterMark; /* If count > highWaterMark, rehash */
+ int32_t lowWaterMark; /* If count < lowWaterMark, rehash */
+ float highWaterRatio; /* 0..1; high water as a fraction of length */
+ float lowWaterRatio; /* 0..1; low water as a fraction of length */
+
+ int8_t primeIndex; /* Index into our prime table for length.
+ * length == PRIMES[primeIndex] */
+ UBool allocated; /* Was this UHashtable allocated? */
+};
+typedef struct UHashtable UHashtable;
+
+U_CDECL_END
+
+/********************************************************************
+ * API
+ ********************************************************************/
+
+/**
+ * Initialize a new UHashtable.
+ * @param keyHash A pointer to the key hashing function. Must not be
+ * NULL.
+ * @param keyComp A pointer to the function that compares keys. Must
+ * not be NULL.
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return A pointer to a UHashtable, or 0 if an error occurred.
+ * @see uhash_openSize
+ */
+U_CAPI UHashtable* U_EXPORT2
+uhash_open(UHashFunction *keyHash,
+ UKeyComparator *keyComp,
+ UValueComparator *valueComp,
+ UErrorCode *status);
+
+/**
+ * Initialize a new UHashtable with a given initial size.
+ * @param keyHash A pointer to the key hashing function. Must not be
+ * NULL.
+ * @param keyComp A pointer to the function that compares keys. Must
+ * not be NULL.
+ * @param size The initial capacity of this hash table.
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return A pointer to a UHashtable, or 0 if an error occurred.
+ * @see uhash_open
+ */
+U_CAPI UHashtable* U_EXPORT2
+uhash_openSize(UHashFunction *keyHash,
+ UKeyComparator *keyComp,
+ UValueComparator *valueComp,
+ int32_t size,
+ UErrorCode *status);
+
+/**
+ * Initialize an existing UHashtable.
+ * @param keyHash A pointer to the key hashing function. Must not be
+ * NULL.
+ * @param keyComp A pointer to the function that compares keys. Must
+ * not be NULL.
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return A pointer to a UHashtable, or 0 if an error occurred.
+ * @see uhash_openSize
+ */
+U_CAPI UHashtable* U_EXPORT2
+uhash_init(UHashtable *hash,
+ UHashFunction *keyHash,
+ UKeyComparator *keyComp,
+ UValueComparator *valueComp,
+ UErrorCode *status);
+
+/**
+ * Initialize an existing UHashtable.
+ * @param keyHash A pointer to the key hashing function. Must not be
+ * NULL.
+ * @param keyComp A pointer to the function that compares keys. Must
+ * not be NULL.
+ * @param size The initial capacity of this hash table.
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return A pointer to a UHashtable, or 0 if an error occurred.
+ * @see uhash_openSize
+ */
+U_CAPI UHashtable* U_EXPORT2
+uhash_initSize(UHashtable *hash,
+ UHashFunction *keyHash,
+ UKeyComparator *keyComp,
+ UValueComparator *valueComp,
+ int32_t size,
+ UErrorCode *status);
+
+/**
+ * Close a UHashtable, releasing the memory used.
+ * @param hash The UHashtable to close. If hash is NULL no operation is performed.
+ */
+U_CAPI void U_EXPORT2
+uhash_close(UHashtable *hash);
+
+
+
+/**
+ * Set the function used to hash keys.
+ * @param hash The UHashtable to set
+ * @param fn the function to be used hash keys; must not be NULL
+ * @return the previous key hasher; non-NULL
+ */
+U_CAPI UHashFunction *U_EXPORT2
+uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn);
+
+/**
+ * Set the function used to compare keys. The default comparison is a
+ * void* pointer comparison.
+ * @param hash The UHashtable to set
+ * @param fn the function to be used compare keys; must not be NULL
+ * @return the previous key comparator; non-NULL
+ */
+U_CAPI UKeyComparator *U_EXPORT2
+uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn);
+
+/**
+ * Set the function used to compare values. The default comparison is a
+ * void* pointer comparison.
+ * @param hash The UHashtable to set
+ * @param fn the function to be used compare keys; must not be NULL
+ * @return the previous key comparator; non-NULL
+ */
+U_CAPI UValueComparator *U_EXPORT2
+uhash_setValueComparator(UHashtable *hash, UValueComparator *fn);
+
+/**
+ * Set the function used to delete keys. If this function pointer is
+ * NULL, this hashtable does not delete keys. If it is non-NULL, this
+ * hashtable does delete keys. This function should be set once
+ * before any elements are added to the hashtable and should not be
+ * changed thereafter.
+ * @param hash The UHashtable to set
+ * @param fn the function to be used delete keys, or NULL
+ * @return the previous key deleter; may be NULL
+ */
+U_CAPI UObjectDeleter *U_EXPORT2
+uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn);
+
+/**
+ * Set the function used to delete values. If this function pointer
+ * is NULL, this hashtable does not delete values. If it is non-NULL,
+ * this hashtable does delete values. This function should be set
+ * once before any elements are added to the hashtable and should not
+ * be changed thereafter.
+ * @param hash The UHashtable to set
+ * @param fn the function to be used delete values, or NULL
+ * @return the previous value deleter; may be NULL
+ */
+U_CAPI UObjectDeleter *U_EXPORT2
+uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn);
+
+/**
+ * Specify whether or not, and how, the hastable resizes itself.
+ * By default, tables grow but do not shrink (policy U_GROW).
+ * See enum UHashResizePolicy.
+ * @param hash The UHashtable to set
+ * @param policy The way the hashtable resizes itself, {U_GROW, U_GROW_AND_SHRINK, U_FIXED}
+ */
+U_CAPI void U_EXPORT2
+uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy);
+
+/**
+ * Get the number of key-value pairs stored in a UHashtable.
+ * @param hash The UHashtable to query.
+ * @return The number of key-value pairs stored in hash.
+ */
+U_CAPI int32_t U_EXPORT2
+uhash_count(const UHashtable *hash);
+
+/**
+ * Put a (key=pointer, value=pointer) item in a UHashtable. If the
+ * keyDeleter is non-NULL, then the hashtable owns 'key' after this
+ * call. If the valueDeleter is non-NULL, then the hashtable owns
+ * 'value' after this call. Storing a NULL value is the same as
+ * calling uhash_remove().
+ * @param hash The target UHashtable.
+ * @param key The key to store.
+ * @param value The value to store, may be NULL (see above).
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return The previous value, or NULL if none.
+ * @see uhash_get
+ */
+U_CAPI void* U_EXPORT2
+uhash_put(UHashtable *hash,
+ void *key,
+ void *value,
+ UErrorCode *status);
+
+/**
+ * Put a (key=integer, value=pointer) item in a UHashtable.
+ * keyDeleter must be NULL. If the valueDeleter is non-NULL, then the
+ * hashtable owns 'value' after this call. Storing a NULL value is
+ * the same as calling uhash_remove().
+ * @param hash The target UHashtable.
+ * @param key The integer key to store.
+ * @param value The value to store, may be NULL (see above).
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return The previous value, or NULL if none.
+ * @see uhash_get
+ */
+U_CAPI void* U_EXPORT2
+uhash_iput(UHashtable *hash,
+ int32_t key,
+ void* value,
+ UErrorCode *status);
+
+/**
+ * Put a (key=pointer, value=integer) item in a UHashtable. If the
+ * keyDeleter is non-NULL, then the hashtable owns 'key' after this
+ * call. valueDeleter must be NULL. Storing a 0 value is the same as
+ * calling uhash_remove().
+ * @param hash The target UHashtable.
+ * @param key The key to store.
+ * @param value The integer value to store.
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return The previous value, or 0 if none.
+ * @see uhash_get
+ */
+U_CAPI int32_t U_EXPORT2
+uhash_puti(UHashtable *hash,
+ void* key,
+ int32_t value,
+ UErrorCode *status);
+
+/**
+ * Put a (key=integer, value=integer) item in a UHashtable. If the
+ * keyDeleter is non-NULL, then the hashtable owns 'key' after this
+ * call. valueDeleter must be NULL. Storing a 0 value is the same as
+ * calling uhash_remove().
+ * @param hash The target UHashtable.
+ * @param key The key to store.
+ * @param value The integer value to store.
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return The previous value, or 0 if none.
+ * @see uhash_get
+ */
+U_CAPI int32_t U_EXPORT2
+uhash_iputi(UHashtable *hash,
+ int32_t key,
+ int32_t value,
+ UErrorCode *status);
+
+/**
+ * Retrieve a pointer value from a UHashtable using a pointer key,
+ * as previously stored by uhash_put().
+ * @param hash The target UHashtable.
+ * @param key A pointer key stored in a hashtable
+ * @return The requested item, or NULL if not found.
+ */
+U_CAPI void* U_EXPORT2
+uhash_get(const UHashtable *hash,
+ const void *key);
+
+/**
+ * Retrieve a pointer value from a UHashtable using a integer key,
+ * as previously stored by uhash_iput().
+ * @param hash The target UHashtable.
+ * @param key An integer key stored in a hashtable
+ * @return The requested item, or NULL if not found.
+ */
+U_CAPI void* U_EXPORT2
+uhash_iget(const UHashtable *hash,
+ int32_t key);
+
+/**
+ * Retrieve an integer value from a UHashtable using a pointer key,
+ * as previously stored by uhash_puti().
+ * @param hash The target UHashtable.
+ * @param key A pointer key stored in a hashtable
+ * @return The requested item, or 0 if not found.
+ */
+U_CAPI int32_t U_EXPORT2
+uhash_geti(const UHashtable *hash,
+ const void* key);
+/**
+ * Retrieve an integer value from a UHashtable using an integer key,
+ * as previously stored by uhash_iputi().
+ * @param hash The target UHashtable.
+ * @param key An integer key stored in a hashtable
+ * @return The requested item, or 0 if not found.
+ */
+U_CAPI int32_t U_EXPORT2
+uhash_igeti(const UHashtable *hash,
+ int32_t key);
+
+/**
+ * Remove an item from a UHashtable stored by uhash_put().
+ * @param hash The target UHashtable.
+ * @param key A key stored in a hashtable
+ * @return The item removed, or NULL if not found.
+ */
+U_CAPI void* U_EXPORT2
+uhash_remove(UHashtable *hash,
+ const void *key);
+
+/**
+ * Remove an item from a UHashtable stored by uhash_iput().
+ * @param hash The target UHashtable.
+ * @param key An integer key stored in a hashtable
+ * @return The item removed, or NULL if not found.
+ */
+U_CAPI void* U_EXPORT2
+uhash_iremove(UHashtable *hash,
+ int32_t key);
+
+/**
+ * Remove an item from a UHashtable stored by uhash_puti().
+ * @param hash The target UHashtable.
+ * @param key An key stored in a hashtable
+ * @return The item removed, or 0 if not found.
+ */
+U_CAPI int32_t U_EXPORT2
+uhash_removei(UHashtable *hash,
+ const void* key);
+
+/**
+ * Remove an item from a UHashtable stored by uhash_iputi().
+ * @param hash The target UHashtable.
+ * @param key An integer key stored in a hashtable
+ * @return The item removed, or 0 if not found.
+ */
+U_CAPI int32_t U_EXPORT2
+uhash_iremovei(UHashtable *hash,
+ int32_t key);
+
+/**
+ * Remove all items from a UHashtable.
+ * @param hash The target UHashtable.
+ */
+U_CAPI void U_EXPORT2
+uhash_removeAll(UHashtable *hash);
+
+/**
+ * Locate an element of a UHashtable. The caller must not modify the
+ * returned object. The primary use of this function is to obtain the
+ * stored key when it may not be identical to the search key. For
+ * example, if the compare function is a case-insensitive string
+ * compare, then the hash key may be desired in order to obtain the
+ * canonical case corresponding to a search key.
+ * @param hash The target UHashtable.
+ * @param key A key stored in a hashtable
+ * @return a hash element, or NULL if the key is not found.
+ */
+U_CAPI const UHashElement* U_EXPORT2
+uhash_find(const UHashtable *hash, const void* key);
+
+/**
+ * \def UHASH_FIRST
+ * Constant for use with uhash_nextElement
+ * @see uhash_nextElement
+ */
+#define UHASH_FIRST (-1)
+
+/**
+ * Iterate through the elements of a UHashtable. The caller must not
+ * modify the returned object. However, uhash_removeElement() may be
+ * called during iteration to remove an element from the table.
+ * Iteration may safely be resumed afterwards. If uhash_put() is
+ * called during iteration the iteration will then be out of sync and
+ * should be restarted.
+ * @param hash The target UHashtable.
+ * @param pos This should be set to UHASH_FIRST initially, and left untouched
+ * thereafter.
+ * @return a hash element, or NULL if no further key-value pairs
+ * exist in the table.
+ */
+U_CAPI const UHashElement* U_EXPORT2
+uhash_nextElement(const UHashtable *hash,
+ int32_t *pos);
+
+/**
+ * Remove an element, returned by uhash_nextElement(), from the table.
+ * Iteration may be safely continued afterwards.
+ * @param hash The hashtable
+ * @param e The element, returned by uhash_nextElement(), to remove.
+ * Must not be NULL. Must not be an empty or deleted element (as long
+ * as this was returned by uhash_nextElement() it will not be empty or
+ * deleted). Note: Although this parameter is const, it will be
+ * modified.
+ * @return the value that was removed.
+ */
+U_CAPI void* U_EXPORT2
+uhash_removeElement(UHashtable *hash, const UHashElement* e);
+
+/********************************************************************
+ * UHashTok convenience
+ ********************************************************************/
+
+/**
+ * Return a UHashTok for an integer.
+ * @param i The given integer
+ * @return a UHashTok for an integer.
+ */
+/*U_CAPI UHashTok U_EXPORT2
+uhash_toki(int32_t i);*/
+
+/**
+ * Return a UHashTok for a pointer.
+ * @param p The given pointer
+ * @return a UHashTok for a pointer.
+ */
+/*U_CAPI UHashTok U_EXPORT2
+uhash_tokp(void* p);*/
+
+/********************************************************************
+ * UChar* and char* Support Functions
+ ********************************************************************/
+
+/**
+ * Generate a hash code for a null-terminated UChar* string. If the
+ * string is not null-terminated do not use this function. Use
+ * together with uhash_compareUChars.
+ * @param key The string (const UChar*) to hash.
+ * @return A hash code for the key.
+ */
+U_CAPI int32_t U_EXPORT2
+uhash_hashUChars(const UHashTok key);
+
+/**
+ * Generate a hash code for a null-terminated char* string. If the
+ * string is not null-terminated do not use this function. Use
+ * together with uhash_compareChars.
+ * @param key The string (const char*) to hash.
+ * @return A hash code for the key.
+ */
+U_CAPI int32_t U_EXPORT2
+uhash_hashChars(const UHashTok key);
+
+/**
+ * Generate a case-insensitive hash code for a null-terminated char*
+ * string. If the string is not null-terminated do not use this
+ * function. Use together with uhash_compareIChars.
+ * @param key The string (const char*) to hash.
+ * @return A hash code for the key.
+ */
+U_CAPI int32_t U_EXPORT2
+uhash_hashIChars(const UHashTok key);
+
+/**
+ * Comparator for null-terminated UChar* strings. Use together with
+ * uhash_hashUChars.
+ * @param key1 The string for comparison
+ * @param key2 The string for comparison
+ * @return true if key1 and key2 are equal, return false otherwise.
+ */
+U_CAPI UBool U_EXPORT2
+uhash_compareUChars(const UHashTok key1, const UHashTok key2);
+
+/**
+ * Comparator for null-terminated char* strings. Use together with
+ * uhash_hashChars.
+ * @param key1 The string for comparison
+ * @param key2 The string for comparison
+ * @return true if key1 and key2 are equal, return false otherwise.
+ */
+U_CAPI UBool U_EXPORT2
+uhash_compareChars(const UHashTok key1, const UHashTok key2);
+
+/**
+ * Case-insensitive comparator for null-terminated char* strings. Use
+ * together with uhash_hashIChars.
+ * @param key1 The string for comparison
+ * @param key2 The string for comparison
+ * @return true if key1 and key2 are equal, return false otherwise.
+ */
+U_CAPI UBool U_EXPORT2
+uhash_compareIChars(const UHashTok key1, const UHashTok key2);
+
+/********************************************************************
+ * UnicodeString Support Functions
+ ********************************************************************/
+
+/**
+ * Hash function for UnicodeString* keys.
+ * @param key The string (const char*) to hash.
+ * @return A hash code for the key.
+ */
+U_CAPI int32_t U_EXPORT2
+uhash_hashUnicodeString(const UElement key);
+
+/**
+ * Hash function for UnicodeString* keys (case insensitive).
+ * Make sure to use together with uhash_compareCaselessUnicodeString.
+ * @param key The string (const char*) to hash.
+ * @return A hash code for the key.
+ */
+U_CAPI int32_t U_EXPORT2
+uhash_hashCaselessUnicodeString(const UElement key);
+
+/********************************************************************
+ * int32_t Support Functions
+ ********************************************************************/
+
+/**
+ * Hash function for 32-bit integer keys.
+ * @param key The string (const char*) to hash.
+ * @return A hash code for the key.
+ */
+U_CAPI int32_t U_EXPORT2
+uhash_hashLong(const UHashTok key);
+
+/**
+ * Comparator function for 32-bit integer keys.
+ * @param key1 The integer for comparison
+ * @param Key2 The integer for comparison
+ * @return true if key1 and key2 are equal, return false otherwise
+ */
+U_CAPI UBool U_EXPORT2
+uhash_compareLong(const UHashTok key1, const UHashTok key2);
+
+/********************************************************************
+ * Other Support Functions
+ ********************************************************************/
+
+/**
+ * Deleter for Hashtable objects.
+ * @param obj The object to be deleted
+ */
+U_CAPI void U_EXPORT2
+uhash_deleteHashtable(void *obj);
+
+/* Use uprv_free() itself as a deleter for any key or value allocated using uprv_malloc. */
+
+/**
+ * Checks if the given hash tables are equal or not.
+ * @param hash1
+ * @param hash2
+ * @return true if the hashtables are equal and false if not.
+ */
+U_CAPI UBool U_EXPORT2
+uhash_equals(const UHashtable* hash1, const UHashtable* hash2);
+
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUHashtablePointer
+ * "Smart pointer" class, closes a UHashtable via uhash_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUHashtablePointer, UHashtable, uhash_close);
+
+U_NAMESPACE_END
+
+#endif
+
+#endif
diff --git a/thirdparty/icu4c/common/uhash_us.cpp b/thirdparty/icu4c/common/uhash_us.cpp
new file mode 100644
index 0000000000..ef482c2746
--- /dev/null
+++ b/thirdparty/icu4c/common/uhash_us.cpp
@@ -0,0 +1,26 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 1997-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+******************************************************************************
+* Date Name Description
+* 03/22/00 aliu Creation.
+* 07/06/01 aliu Modified to support int32_t keys on
+* platforms with sizeof(void*) < 32.
+******************************************************************************
+*/
+
+#include "hash.h"
+
+/**
+ * Deleter for Hashtable objects.
+ */
+U_CAPI void U_EXPORT2
+uhash_deleteHashtable(void *obj) {
+ U_NAMESPACE_USE
+ delete (Hashtable*) obj;
+}
+
+//eof
diff --git a/thirdparty/icu4c/common/uidna.cpp b/thirdparty/icu4c/common/uidna.cpp
new file mode 100644
index 0000000000..ac2f9c3c8c
--- /dev/null
+++ b/thirdparty/icu4c/common/uidna.cpp
@@ -0,0 +1,921 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ *******************************************************************************
+ *
+ * Copyright (C) 2003-2014, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ *
+ *******************************************************************************
+ * file name: uidna.cpp
+ * encoding: UTF-8
+ * tab size: 8 (not used)
+ * indentation:4
+ *
+ * created on: 2003feb1
+ * created by: Ram Viswanadha
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_IDNA
+
+#include "unicode/uidna.h"
+#include "unicode/ustring.h"
+#include "unicode/usprep.h"
+#include "punycode.h"
+#include "ustr_imp.h"
+#include "cmemory.h"
+#include "uassert.h"
+#include "sprpimpl.h"
+
+/* it is official IDNA ACE Prefix is "xn--" */
+static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ;
+#define ACE_PREFIX_LENGTH 4
+
+#define MAX_LABEL_LENGTH 63
+/* The Max length of the labels should not be more than MAX_LABEL_LENGTH */
+#define MAX_LABEL_BUFFER_SIZE 100
+
+#define MAX_DOMAIN_NAME_LENGTH 255
+/* The Max length of the domain names should not be more than MAX_DOMAIN_NAME_LENGTH */
+#define MAX_IDN_BUFFER_SIZE MAX_DOMAIN_NAME_LENGTH+1
+
+#define LOWER_CASE_DELTA 0x0020
+#define HYPHEN 0x002D
+#define FULL_STOP 0x002E
+#define CAPITAL_A 0x0041
+#define CAPITAL_Z 0x005A
+
+inline static UChar
+toASCIILower(UChar ch){
+ if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
+ return ch + LOWER_CASE_DELTA;
+ }
+ return ch;
+}
+
+inline static UBool
+startsWithPrefix(const UChar* src , int32_t srcLength){
+ if(srcLength < ACE_PREFIX_LENGTH){
+ return FALSE;
+ }
+
+ for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){
+ if(toASCIILower(src[i]) != ACE_PREFIX[i]){
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+
+inline static int32_t
+compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len,
+ const UChar* s2, int32_t s2Len){
+
+ int32_t minLength;
+ int32_t lengthResult;
+
+ // are we comparing different lengths?
+ if(s1Len != s2Len) {
+ if(s1Len < s2Len) {
+ minLength = s1Len;
+ lengthResult = -1;
+ } else {
+ minLength = s2Len;
+ lengthResult = 1;
+ }
+ } else {
+ // ok the lengths are equal
+ minLength = s1Len;
+ lengthResult = 0;
+ }
+
+ UChar c1,c2;
+ int32_t rc;
+
+ for(int32_t i =0;/* no condition */;i++) {
+
+ /* If we reach the ends of both strings then they match */
+ if(i == minLength) {
+ return lengthResult;
+ }
+
+ c1 = s1[i];
+ c2 = s2[i];
+
+ /* Case-insensitive comparison */
+ if(c1!=c2) {
+ rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2);
+ if(rc!=0) {
+ lengthResult=rc;
+ break;
+ }
+ }
+ }
+ return lengthResult;
+}
+
+
+/**
+ * Ascertain if the given code point is a label separator as
+ * defined by the IDNA RFC
+ *
+ * @param ch The code point to be ascertained
+ * @return true if the char is a label separator
+ * @stable ICU 2.8
+ */
+static inline UBool isLabelSeparator(UChar ch){
+ switch(ch){
+ case 0x002e:
+ case 0x3002:
+ case 0xFF0E:
+ case 0xFF61:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+// returns the length of the label excluding the separator
+// if *limit == separator then the length returned does not include
+// the separtor.
+static inline int32_t
+getNextSeparator(UChar *src, int32_t srcLength,
+ UChar **limit, UBool *done){
+ if(srcLength == -1){
+ int32_t i;
+ for(i=0 ; ;i++){
+ if(src[i] == 0){
+ *limit = src + i; // point to null
+ *done = TRUE;
+ return i;
+ }
+ if(isLabelSeparator(src[i])){
+ *limit = src + (i+1); // go past the delimiter
+ return i;
+
+ }
+ }
+ }else{
+ int32_t i;
+ for(i=0;i<srcLength;i++){
+ if(isLabelSeparator(src[i])){
+ *limit = src + (i+1); // go past the delimiter
+ return i;
+ }
+ }
+ // we have not found the delimiter
+ // if(i==srcLength)
+ *limit = src+srcLength;
+ *done = TRUE;
+
+ return i;
+ }
+}
+static inline UBool isLDHChar(UChar ch){
+ // high runner case
+ if(ch>0x007A){
+ return FALSE;
+ }
+ //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
+ if( (ch==0x002D) ||
+ (0x0030 <= ch && ch <= 0x0039) ||
+ (0x0041 <= ch && ch <= 0x005A) ||
+ (0x0061 <= ch && ch <= 0x007A)
+ ){
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static int32_t
+_internal_toASCII(const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UStringPrepProfile* nameprep,
+ UParseError* parseError,
+ UErrorCode* status)
+{
+
+ // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too.
+ UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE];
+ //initialize pointers to stack buffers
+ UChar *b1 = b1Stack, *b2 = b2Stack;
+ int32_t b1Len=0, b2Len,
+ b1Capacity = MAX_LABEL_BUFFER_SIZE,
+ b2Capacity = MAX_LABEL_BUFFER_SIZE ,
+ reqLength=0;
+
+ int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0;
+ UBool* caseFlags = NULL;
+
+ // the source contains all ascii codepoints
+ UBool srcIsASCII = TRUE;
+ // assume the source contains all LDH codepoints
+ UBool srcIsLDH = TRUE;
+
+ int32_t j=0;
+
+ //get the options
+ UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0);
+
+ int32_t failPos = -1;
+
+ if(srcLength == -1){
+ srcLength = u_strlen(src);
+ }
+
+ if(srcLength > b1Capacity){
+ b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR);
+ if(b1==NULL){
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ goto CLEANUP;
+ }
+ b1Capacity = srcLength;
+ }
+
+ // step 1
+ for( j=0;j<srcLength;j++){
+ if(src[j] > 0x7F){
+ srcIsASCII = FALSE;
+ }
+ b1[b1Len++] = src[j];
+ }
+
+ // step 2 is performed only if the source contains non ASCII
+ if(srcIsASCII == FALSE){
+
+ // step 2
+ b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
+
+ if(*status == U_BUFFER_OVERFLOW_ERROR){
+ // redo processing of string
+ // we do not have enough room so grow the buffer
+ if(b1 != b1Stack){
+ uprv_free(b1);
+ }
+ b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
+ if(b1==NULL){
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ goto CLEANUP;
+ }
+
+ *status = U_ZERO_ERROR; // reset error
+
+ b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
+ }
+ }
+ // error bail out
+ if(U_FAILURE(*status)){
+ goto CLEANUP;
+ }
+ if(b1Len == 0){
+ *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
+ goto CLEANUP;
+ }
+
+ // for step 3 & 4
+ srcIsASCII = TRUE;
+ for( j=0;j<b1Len;j++){
+ // check if output of usprep_prepare is all ASCII
+ if(b1[j] > 0x7F){
+ srcIsASCII = FALSE;
+ }else if(isLDHChar(b1[j])==FALSE){ // if the char is in ASCII range verify that it is an LDH character
+ srcIsLDH = FALSE;
+ failPos = j;
+ }
+ }
+ if(useSTD3ASCIIRules == TRUE){
+ // verify 3a and 3b
+ // 3(a) Verify the absence of non-LDH ASCII code points; that is, the
+ // absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
+ // 3(b) Verify the absence of leading and trailing hyphen-minus; that
+ // is, the absence of U+002D at the beginning and end of the
+ // sequence.
+ if( srcIsLDH == FALSE /* source at this point should not contain anyLDH characters */
+ || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){
+ *status = U_IDNA_STD3_ASCII_RULES_ERROR;
+
+ /* populate the parseError struct */
+ if(srcIsLDH==FALSE){
+ // failPos is always set the index of failure
+ uprv_syntaxError(b1,failPos, b1Len,parseError);
+ }else if(b1[0] == HYPHEN){
+ // fail position is 0
+ uprv_syntaxError(b1,0,b1Len,parseError);
+ }else{
+ // the last index in the source is always length-1
+ uprv_syntaxError(b1, (b1Len>0) ? b1Len-1 : b1Len, b1Len,parseError);
+ }
+
+ goto CLEANUP;
+ }
+ }
+ // Step 4: if the source is ASCII then proceed to step 8
+ if(srcIsASCII){
+ if(b1Len <= destCapacity){
+ u_memmove(dest, b1, b1Len);
+ reqLength = b1Len;
+ }else{
+ reqLength = b1Len;
+ goto CLEANUP;
+ }
+ }else{
+ // step 5 : verify the sequence does not begin with ACE prefix
+ if(!startsWithPrefix(b1,b1Len)){
+
+ //step 6: encode the sequence with punycode
+
+ // do not preserve the case flags for now!
+ // TODO: Preserve the case while implementing the RFE
+ // caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool));
+ // uprv_memset(caseFlags,TRUE,b1Len);
+
+ b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags, status);
+
+ if(*status == U_BUFFER_OVERFLOW_ERROR){
+ // redo processing of string
+ /* we do not have enough room so grow the buffer*/
+ b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
+ if(b2 == NULL){
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ goto CLEANUP;
+ }
+
+ *status = U_ZERO_ERROR; // reset error
+
+ b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags, status);
+ }
+ //error bail out
+ if(U_FAILURE(*status)){
+ goto CLEANUP;
+ }
+ // TODO : Reconsider while implementing the case preserve RFE
+ // convert all codepoints to lower case ASCII
+ // toASCIILower(b2,b2Len);
+ reqLength = b2Len+ACE_PREFIX_LENGTH;
+
+ if(reqLength > destCapacity){
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ goto CLEANUP;
+ }
+ //Step 7: prepend the ACE prefix
+ u_memcpy(dest, ACE_PREFIX, ACE_PREFIX_LENGTH);
+ //Step 6: copy the contents in b2 into dest
+ u_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len);
+
+ }else{
+ *status = U_IDNA_ACE_PREFIX_ERROR;
+ //position of failure is 0
+ uprv_syntaxError(b1,0,b1Len,parseError);
+ goto CLEANUP;
+ }
+ }
+ // step 8: verify the length of label
+ if(reqLength > MAX_LABEL_LENGTH){
+ *status = U_IDNA_LABEL_TOO_LONG_ERROR;
+ }
+
+CLEANUP:
+ if(b1 != b1Stack){
+ uprv_free(b1);
+ }
+ if(b2 != b2Stack){
+ uprv_free(b2);
+ }
+ uprv_free(caseFlags);
+
+ return u_terminateUChars(dest, destCapacity, reqLength, status);
+}
+
+static int32_t
+_internal_toUnicode(const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UStringPrepProfile* nameprep,
+ UParseError* parseError,
+ UErrorCode* status)
+{
+
+ //get the options
+ //UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0);
+ int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0;
+
+ // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too.
+ UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE];
+
+ //initialize pointers to stack buffers
+ UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack;
+ int32_t b1Len = 0, b2Len, b1PrimeLen, b3Len,
+ b1Capacity = MAX_LABEL_BUFFER_SIZE,
+ b2Capacity = MAX_LABEL_BUFFER_SIZE,
+ b3Capacity = MAX_LABEL_BUFFER_SIZE,
+ reqLength=0;
+
+ UBool* caseFlags = NULL;
+
+ UBool srcIsASCII = TRUE;
+ /*UBool srcIsLDH = TRUE;
+ int32_t failPos =0;*/
+
+ // step 1: find out if all the codepoints in src are ASCII
+ if(srcLength==-1){
+ srcLength = 0;
+ for(;src[srcLength]!=0;){
+ if(src[srcLength]> 0x7f){
+ srcIsASCII = FALSE;
+ }/*else if(isLDHChar(src[srcLength])==FALSE){
+ // here we do not assemble surrogates
+ // since we know that LDH code points
+ // are in the ASCII range only
+ srcIsLDH = FALSE;
+ failPos = srcLength;
+ }*/
+ srcLength++;
+ }
+ }else if(srcLength > 0){
+ for(int32_t j=0; j<srcLength; j++){
+ if(src[j]> 0x7f){
+ srcIsASCII = FALSE;
+ break;
+ }/*else if(isLDHChar(src[j])==FALSE){
+ // here we do not assemble surrogates
+ // since we know that LDH code points
+ // are in the ASCII range only
+ srcIsLDH = FALSE;
+ failPos = j;
+ }*/
+ }
+ }else{
+ return 0;
+ }
+
+ if(srcIsASCII == FALSE){
+ // step 2: process the string
+ b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
+ if(*status == U_BUFFER_OVERFLOW_ERROR){
+ // redo processing of string
+ /* we do not have enough room so grow the buffer*/
+ b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
+ if(b1==NULL){
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ goto CLEANUP;
+ }
+
+ *status = U_ZERO_ERROR; // reset error
+
+ b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
+ }
+ //bail out on error
+ if(U_FAILURE(*status)){
+ goto CLEANUP;
+ }
+ }else{
+
+ //just point src to b1
+ b1 = (UChar*) src;
+ b1Len = srcLength;
+ }
+
+ // The RFC states that
+ // <quote>
+ // ToUnicode never fails. If any step fails, then the original input
+ // is returned immediately in that step.
+ // </quote>
+
+ //step 3: verify ACE Prefix
+ if(startsWithPrefix(b1,b1Len)){
+
+ //step 4: Remove the ACE Prefix
+ b1Prime = b1 + ACE_PREFIX_LENGTH;
+ b1PrimeLen = b1Len - ACE_PREFIX_LENGTH;
+
+ //step 5: Decode using punycode
+ b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags,status);
+
+ if(*status == U_BUFFER_OVERFLOW_ERROR){
+ // redo processing of string
+ /* we do not have enough room so grow the buffer*/
+ b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
+ if(b2==NULL){
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ goto CLEANUP;
+ }
+
+ *status = U_ZERO_ERROR; // reset error
+
+ b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, status);
+ }
+
+
+ //step 6:Apply toASCII
+ b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity, options, parseError, status);
+
+ if(*status == U_BUFFER_OVERFLOW_ERROR){
+ // redo processing of string
+ /* we do not have enough room so grow the buffer*/
+ b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR);
+ if(b3==NULL){
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ goto CLEANUP;
+ }
+
+ *status = U_ZERO_ERROR; // reset error
+
+ b3Len = uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError, status);
+
+ }
+ //bail out on error
+ if(U_FAILURE(*status)){
+ goto CLEANUP;
+ }
+
+ //step 7: verify
+ if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){
+ // Cause the original to be returned.
+ *status = U_IDNA_VERIFICATION_ERROR;
+ goto CLEANUP;
+ }
+
+ //step 8: return output of step 5
+ reqLength = b2Len;
+ if(b2Len <= destCapacity) {
+ u_memmove(dest, b2, b2Len);
+ }
+ }
+ else{
+ // See the start of this if statement for why this is commented out.
+ // verify that STD3 ASCII rules are satisfied
+ /*if(useSTD3ASCIIRules == TRUE){
+ if( srcIsLDH == FALSE // source contains some non-LDH characters
+ || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){
+ *status = U_IDNA_STD3_ASCII_RULES_ERROR;
+
+ // populate the parseError struct
+ if(srcIsLDH==FALSE){
+ // failPos is always set the index of failure
+ uprv_syntaxError(src,failPos, srcLength,parseError);
+ }else if(src[0] == HYPHEN){
+ // fail position is 0
+ uprv_syntaxError(src,0,srcLength,parseError);
+ }else{
+ // the last index in the source is always length-1
+ uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError);
+ }
+
+ goto CLEANUP;
+ }
+ }*/
+ // just return the source
+ //copy the source to destination
+ if(srcLength <= destCapacity){
+ u_memmove(dest, src, srcLength);
+ }
+ reqLength = srcLength;
+ }
+
+
+CLEANUP:
+
+ if(b1 != b1Stack && b1!=src){
+ uprv_free(b1);
+ }
+ if(b2 != b2Stack){
+ uprv_free(b2);
+ }
+ uprv_free(caseFlags);
+
+ // The RFC states that
+ // <quote>
+ // ToUnicode never fails. If any step fails, then the original input
+ // is returned immediately in that step.
+ // </quote>
+ // So if any step fails lets copy source to destination
+ if(U_FAILURE(*status)){
+ //copy the source to destination
+ if(dest && srcLength <= destCapacity){
+ // srcLength should have already been set earlier.
+ U_ASSERT(srcLength >= 0);
+ u_memmove(dest, src, srcLength);
+ }
+ reqLength = srcLength;
+ *status = U_ZERO_ERROR;
+ }
+
+ return u_terminateUChars(dest, destCapacity, reqLength, status);
+}
+
+U_CAPI int32_t U_EXPORT2
+uidna_toASCII(const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UParseError* parseError,
+ UErrorCode* status){
+
+ if(status == NULL || U_FAILURE(*status)){
+ return 0;
+ }
+ if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
+
+ if(U_FAILURE(*status)){
+ return -1;
+ }
+
+ int32_t retLen = _internal_toASCII(src, srcLength, dest, destCapacity, options, nameprep, parseError, status);
+
+ /* close the profile*/
+ usprep_close(nameprep);
+
+ return retLen;
+}
+
+U_CAPI int32_t U_EXPORT2
+uidna_toUnicode(const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UParseError* parseError,
+ UErrorCode* status){
+
+ if(status == NULL || U_FAILURE(*status)){
+ return 0;
+ }
+ if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
+
+ if(U_FAILURE(*status)){
+ return -1;
+ }
+
+ int32_t retLen = _internal_toUnicode(src, srcLength, dest, destCapacity, options, nameprep, parseError, status);
+
+ usprep_close(nameprep);
+
+ return retLen;
+}
+
+
+U_CAPI int32_t U_EXPORT2
+uidna_IDNToASCII( const UChar *src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UParseError *parseError,
+ UErrorCode *status){
+
+ if(status == NULL || U_FAILURE(*status)){
+ return 0;
+ }
+ if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ int32_t reqLength = 0;
+
+ UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
+
+ if(U_FAILURE(*status)){
+ return 0;
+ }
+
+ //initialize pointers
+ UChar *delimiter = (UChar*)src;
+ UChar *labelStart = (UChar*)src;
+ UChar *currentDest = (UChar*) dest;
+ int32_t remainingLen = srcLength;
+ int32_t remainingDestCapacity = destCapacity;
+ int32_t labelLen = 0, labelReqLength = 0;
+ UBool done = FALSE;
+
+
+ for(;;){
+
+ labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done);
+ labelReqLength = 0;
+ if(!(labelLen==0 && done)){// make sure this is not a root label separator.
+
+ labelReqLength = _internal_toASCII( labelStart, labelLen,
+ currentDest, remainingDestCapacity,
+ options, nameprep,
+ parseError, status);
+
+ if(*status == U_BUFFER_OVERFLOW_ERROR){
+
+ *status = U_ZERO_ERROR; // reset error
+ remainingDestCapacity = 0;
+ }
+ }
+
+
+ if(U_FAILURE(*status)){
+ break;
+ }
+
+ reqLength +=labelReqLength;
+ // adjust the destination pointer
+ if(labelReqLength < remainingDestCapacity){
+ currentDest = currentDest + labelReqLength;
+ remainingDestCapacity -= labelReqLength;
+ }else{
+ // should never occur
+ remainingDestCapacity = 0;
+ }
+
+ if(done == TRUE){
+ break;
+ }
+
+ // add the label separator
+ if(remainingDestCapacity > 0){
+ *currentDest++ = FULL_STOP;
+ remainingDestCapacity--;
+ }
+ reqLength++;
+
+ labelStart = delimiter;
+ if(remainingLen >0 ){
+ remainingLen = (int32_t)(srcLength - (delimiter - src));
+ }
+
+ }
+
+ if(reqLength > MAX_DOMAIN_NAME_LENGTH){
+ *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR;
+ }
+
+ usprep_close(nameprep);
+
+ return u_terminateUChars(dest, destCapacity, reqLength, status);
+}
+
+U_CAPI int32_t U_EXPORT2
+uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UParseError* parseError,
+ UErrorCode* status){
+
+ if(status == NULL || U_FAILURE(*status)){
+ return 0;
+ }
+ if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ int32_t reqLength = 0;
+
+ UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
+
+ if(U_FAILURE(*status)){
+ return 0;
+ }
+
+ //initialize pointers
+ UChar *delimiter = (UChar*)src;
+ UChar *labelStart = (UChar*)src;
+ UChar *currentDest = (UChar*) dest;
+ int32_t remainingLen = srcLength;
+ int32_t remainingDestCapacity = destCapacity;
+ int32_t labelLen = 0, labelReqLength = 0;
+ UBool done = FALSE;
+
+ for(;;){
+
+ labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done);
+
+ // The RFC states that
+ // <quote>
+ // ToUnicode never fails. If any step fails, then the original input
+ // is returned immediately in that step.
+ // </quote>
+ // _internal_toUnicode will copy the label.
+ /*if(labelLen==0 && done==FALSE){
+ *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
+ break;
+ }*/
+
+ labelReqLength = _internal_toUnicode(labelStart, labelLen,
+ currentDest, remainingDestCapacity,
+ options, nameprep,
+ parseError, status);
+
+ if(*status == U_BUFFER_OVERFLOW_ERROR){
+ *status = U_ZERO_ERROR; // reset error
+ remainingDestCapacity = 0;
+ }
+
+ if(U_FAILURE(*status)){
+ break;
+ }
+
+ reqLength +=labelReqLength;
+ // adjust the destination pointer
+ if(labelReqLength < remainingDestCapacity){
+ currentDest = currentDest + labelReqLength;
+ remainingDestCapacity -= labelReqLength;
+ }else{
+ // should never occur
+ remainingDestCapacity = 0;
+ }
+
+ if(done == TRUE){
+ break;
+ }
+
+ // add the label separator
+ // Unlike the ToASCII operation we don't normalize the label separators
+ if(remainingDestCapacity > 0){
+ *currentDest++ = *(labelStart + labelLen);
+ remainingDestCapacity--;
+ }
+ reqLength++;
+
+ labelStart = delimiter;
+ if(remainingLen >0 ){
+ remainingLen = (int32_t)(srcLength - (delimiter - src));
+ }
+
+ }
+
+ if(reqLength > MAX_DOMAIN_NAME_LENGTH){
+ *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR;
+ }
+
+ usprep_close(nameprep);
+
+ return u_terminateUChars(dest, destCapacity, reqLength, status);
+}
+
+U_CAPI int32_t U_EXPORT2
+uidna_compare( const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ int32_t options,
+ UErrorCode* status){
+
+ if(status == NULL || U_FAILURE(*status)){
+ return -1;
+ }
+
+ UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE];
+ UChar *b1 = b1Stack, *b2 = b2Stack;
+ int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE;
+ int32_t result=-1;
+
+ UParseError parseError;
+
+ b1Len = uidna_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status);
+ if(*status == U_BUFFER_OVERFLOW_ERROR){
+ // redo processing of string
+ b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
+ if(b1==NULL){
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ goto CLEANUP;
+ }
+
+ *status = U_ZERO_ERROR; // reset error
+
+ b1Len = uidna_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status);
+
+ }
+
+ b2Len = uidna_IDNToASCII(s2,length2, b2,b2Capacity, options, &parseError, status);
+ if(*status == U_BUFFER_OVERFLOW_ERROR){
+ // redo processing of string
+ b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
+ if(b2==NULL){
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ goto CLEANUP;
+ }
+
+ *status = U_ZERO_ERROR; // reset error
+
+ b2Len = uidna_IDNToASCII(s2, length2, b2, b2Len, options, &parseError, status);
+
+ }
+ // when toASCII is applied all label separators are replaced with FULL_STOP
+ result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len);
+
+CLEANUP:
+ if(b1 != b1Stack){
+ uprv_free(b1);
+ }
+
+ if(b2 != b2Stack){
+ uprv_free(b2);
+ }
+
+ return result;
+}
+
+#endif /* #if !UCONFIG_NO_IDNA */
diff --git a/thirdparty/icu4c/common/uinit.cpp b/thirdparty/icu4c/common/uinit.cpp
new file mode 100644
index 0000000000..624431be02
--- /dev/null
+++ b/thirdparty/icu4c/common/uinit.cpp
@@ -0,0 +1,74 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 2001-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+******************************************************************************
+* file name: uinit.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2001July05
+* created by: George Rhoten
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/icuplug.h"
+#include "unicode/uclean.h"
+#include "cmemory.h"
+#include "icuplugimp.h"
+#include "ucln_cmn.h"
+#include "ucnv_io.h"
+#include "umutex.h"
+#include "utracimp.h"
+
+U_NAMESPACE_BEGIN
+
+static UInitOnce gICUInitOnce = U_INITONCE_INITIALIZER;
+
+static UBool U_CALLCONV uinit_cleanup() {
+ gICUInitOnce.reset();
+ return TRUE;
+}
+
+static void U_CALLCONV
+initData(UErrorCode &status)
+{
+#if UCONFIG_ENABLE_PLUGINS
+ /* initialize plugins */
+ uplug_init(&status);
+#endif
+
+#if !UCONFIG_NO_CONVERSION
+ /*
+ * 2005-may-02
+ *
+ * ICU4C 3.4 (jitterbug 4497) hardcodes the data for Unicode character
+ * properties for APIs that want to be fast.
+ * Therefore, we need not load them here nor check for errors.
+ * Instead, we load the converter alias table to see if any ICU data
+ * is available.
+ * Users should really open the service objects they need and check
+ * for errors there, to make sure that the actual items they need are
+ * available.
+ */
+ ucnv_io_countKnownConverters(&status);
+#endif
+ ucln_common_registerCleanup(UCLN_COMMON_UINIT, uinit_cleanup);
+}
+
+U_NAMESPACE_END
+
+U_NAMESPACE_USE
+
+/*
+ * ICU Initialization Function. Need not be called.
+ */
+U_CAPI void U_EXPORT2
+u_init(UErrorCode *status) {
+ UTRACE_ENTRY_OC(UTRACE_U_INIT);
+ umtx_initOnce(gICUInitOnce, &initData, *status);
+ UTRACE_EXIT_STATUS(*status);
+}
diff --git a/thirdparty/icu4c/common/uinvchar.cpp b/thirdparty/icu4c/common/uinvchar.cpp
new file mode 100644
index 0000000000..52b8906568
--- /dev/null
+++ b/thirdparty/icu4c/common/uinvchar.cpp
@@ -0,0 +1,627 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2010, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uinvchar.c
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:2
+*
+* created on: 2004sep14
+* created by: Markus W. Scherer
+*
+* Functions for handling invariant characters, moved here from putil.c
+* for better modularization.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/ustring.h"
+#include "udataswp.h"
+#include "cstring.h"
+#include "cmemory.h"
+#include "uassert.h"
+#include "uinvchar.h"
+
+/* invariant-character handling --------------------------------------------- */
+
+/*
+ * These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h)
+ * appropriately for most EBCDIC codepages.
+ *
+ * They currently also map most other ASCII graphic characters,
+ * appropriately for codepages 37 and 1047.
+ * Exceptions: The characters for []^ have different codes in 37 & 1047.
+ * Both versions are mapped to ASCII.
+ *
+ * ASCII 37 1047
+ * [ 5B BA AD
+ * ] 5D BB BD
+ * ^ 5E B0 5F
+ *
+ * There are no mappings for variant characters from Unicode to EBCDIC.
+ *
+ * Currently, C0 control codes are also included in these maps.
+ * Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other
+ * EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A),
+ * but there is no mapping for ASCII LF back to EBCDIC.
+ *
+ * ASCII EBCDIC S/390-OE
+ * LF 0A 25 15
+ * NEL 85 15 25
+ *
+ * The maps below explicitly exclude the variant
+ * control and graphical characters that are in ASCII-based
+ * codepages at 0x80 and above.
+ * "No mapping" is expressed by mapping to a 00 byte.
+ *
+ * These tables do not establish a converter or a codepage.
+ */
+
+static const uint8_t asciiFromEbcdic[256]={
+ 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
+ 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
+
+ 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
+ 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
+ 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
+
+ 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
+ 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
+
+ 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static const uint8_t ebcdicFromAscii[256]={
+ 0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x40, 0x00, 0x7f, 0x00, 0x00, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61,
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f,
+
+ 0x00, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
+ 0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x6d,
+ 0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
+ 0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x07,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/* Same as asciiFromEbcdic[] except maps all letters to lowercase. */
+static const uint8_t lowercaseAsciiFromEbcdic[256]={
+ 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
+ 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
+
+ 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
+ 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
+ 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
+
+ 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
+ 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
+
+ 0x7b, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x7d, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x7c, 0x00, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+/*
+ * Bit sets indicating which characters of the ASCII repertoire
+ * (by ASCII/Unicode code) are "invariant".
+ * See utypes.h for more details.
+ *
+ * As invariant are considered the characters of the ASCII repertoire except
+ * for the following:
+ * 21 '!' <exclamation mark>
+ * 23 '#' <number sign>
+ * 24 '$' <dollar sign>
+ *
+ * 40 '@' <commercial at>
+ *
+ * 5b '[' <left bracket>
+ * 5c '\' <backslash>
+ * 5d ']' <right bracket>
+ * 5e '^' <circumflex>
+ *
+ * 60 '`' <grave accent>
+ *
+ * 7b '{' <left brace>
+ * 7c '|' <vertical line>
+ * 7d '}' <right brace>
+ * 7e '~' <tilde>
+ */
+static const uint32_t invariantChars[4]={
+ 0xfffffbff, /* 00..1f but not 0a */
+ 0xffffffe5, /* 20..3f but not 21 23 24 */
+ 0x87fffffe, /* 40..5f but not 40 5b..5e */
+ 0x87fffffe /* 60..7f but not 60 7b..7e */
+};
+
+/*
+ * test unsigned types (or values known to be non-negative) for invariant characters,
+ * tests ASCII-family character values
+ */
+#define UCHAR_IS_INVARIANT(c) (((c)<=0x7f) && (invariantChars[(c)>>5]&((uint32_t)1<<((c)&0x1f)))!=0)
+
+/* test signed types for invariant characters, adds test for positive values */
+#define SCHAR_IS_INVARIANT(c) ((0<=(c)) && UCHAR_IS_INVARIANT(c))
+
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+#define CHAR_TO_UCHAR(c) c
+#define UCHAR_TO_CHAR(c) c
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+#define CHAR_TO_UCHAR(u) asciiFromEbcdic[u]
+#define UCHAR_TO_CHAR(u) ebcdicFromAscii[u]
+#else
+# error U_CHARSET_FAMILY is not valid
+#endif
+
+
+U_CAPI void U_EXPORT2
+u_charsToUChars(const char *cs, UChar *us, int32_t length) {
+ UChar u;
+ uint8_t c;
+
+ /*
+ * Allow the entire ASCII repertoire to be mapped _to_ Unicode.
+ * For EBCDIC systems, this works for characters with codes from
+ * codepages 37 and 1047 or compatible.
+ */
+ while(length>0) {
+ c=(uint8_t)(*cs++);
+ u=(UChar)CHAR_TO_UCHAR(c);
+ U_ASSERT((u!=0 || c==0)); /* only invariant chars converted? */
+ *us++=u;
+ --length;
+ }
+}
+
+U_CAPI void U_EXPORT2
+u_UCharsToChars(const UChar *us, char *cs, int32_t length) {
+ UChar u;
+
+ while(length>0) {
+ u=*us++;
+ if(!UCHAR_IS_INVARIANT(u)) {
+ U_ASSERT(FALSE); /* Variant characters were used. These are not portable in ICU. */
+ u=0;
+ }
+ *cs++=(char)UCHAR_TO_CHAR(u);
+ --length;
+ }
+}
+
+U_CAPI UBool U_EXPORT2
+uprv_isInvariantString(const char *s, int32_t length) {
+ uint8_t c;
+
+ for(;;) {
+ if(length<0) {
+ /* NUL-terminated */
+ c=(uint8_t)*s++;
+ if(c==0) {
+ break;
+ }
+ } else {
+ /* count length */
+ if(length==0) {
+ break;
+ }
+ --length;
+ c=(uint8_t)*s++;
+ if(c==0) {
+ continue; /* NUL is invariant */
+ }
+ }
+ /* c!=0 now, one branch below checks c==0 for variant characters */
+
+ /*
+ * no assertions here because these functions are legitimately called
+ * for strings with variant characters
+ */
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+ if(!UCHAR_IS_INVARIANT(c)) {
+ return FALSE; /* found a variant char */
+ }
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+ c=CHAR_TO_UCHAR(c);
+ if(c==0 || !UCHAR_IS_INVARIANT(c)) {
+ return FALSE; /* found a variant char */
+ }
+#else
+# error U_CHARSET_FAMILY is not valid
+#endif
+ }
+ return TRUE;
+}
+
+U_CAPI UBool U_EXPORT2
+uprv_isInvariantUString(const UChar *s, int32_t length) {
+ UChar c;
+
+ for(;;) {
+ if(length<0) {
+ /* NUL-terminated */
+ c=*s++;
+ if(c==0) {
+ break;
+ }
+ } else {
+ /* count length */
+ if(length==0) {
+ break;
+ }
+ --length;
+ c=*s++;
+ }
+
+ /*
+ * no assertions here because these functions are legitimately called
+ * for strings with variant characters
+ */
+ if(!UCHAR_IS_INVARIANT(c)) {
+ return FALSE; /* found a variant char */
+ }
+ }
+ return TRUE;
+}
+
+/* UDataSwapFn implementations used in udataswp.c ------- */
+
+/* convert ASCII to EBCDIC and verify that all characters are invariant */
+U_CAPI int32_t U_EXPORT2
+uprv_ebcdicFromAscii(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const uint8_t *s;
+ uint8_t *t;
+ uint8_t c;
+
+ int32_t count;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* setup and swapping */
+ s=(const uint8_t *)inData;
+ t=(uint8_t *)outData;
+ count=length;
+ while(count>0) {
+ c=*s++;
+ if(!UCHAR_IS_INVARIANT(c)) {
+ udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a variant character in position %d\n",
+ length, length-count);
+ *pErrorCode=U_INVALID_CHAR_FOUND;
+ return 0;
+ }
+ *t++=ebcdicFromAscii[c];
+ --count;
+ }
+
+ return length;
+}
+
+/* this function only checks and copies ASCII strings without conversion */
+U_CFUNC int32_t
+uprv_copyAscii(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const uint8_t *s;
+ uint8_t c;
+
+ int32_t count;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* setup and checking */
+ s=(const uint8_t *)inData;
+ count=length;
+ while(count>0) {
+ c=*s++;
+ if(!UCHAR_IS_INVARIANT(c)) {
+ udata_printError(ds, "uprv_copyFromAscii() string[%d] contains a variant character in position %d\n",
+ length, length-count);
+ *pErrorCode=U_INVALID_CHAR_FOUND;
+ return 0;
+ }
+ --count;
+ }
+
+ if(length>0 && inData!=outData) {
+ uprv_memcpy(outData, inData, length);
+ }
+
+ return length;
+}
+
+/* convert EBCDIC to ASCII and verify that all characters are invariant */
+U_CFUNC int32_t
+uprv_asciiFromEbcdic(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const uint8_t *s;
+ uint8_t *t;
+ uint8_t c;
+
+ int32_t count;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* setup and swapping */
+ s=(const uint8_t *)inData;
+ t=(uint8_t *)outData;
+ count=length;
+ while(count>0) {
+ c=*s++;
+ if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
+ udata_printError(ds, "uprv_asciiFromEbcdic() string[%d] contains a variant character in position %d\n",
+ length, length-count);
+ *pErrorCode=U_INVALID_CHAR_FOUND;
+ return 0;
+ }
+ *t++=c;
+ --count;
+ }
+
+ return length;
+}
+
+/* this function only checks and copies EBCDIC strings without conversion */
+U_CFUNC int32_t
+uprv_copyEbcdic(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const uint8_t *s;
+ uint8_t c;
+
+ int32_t count;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* setup and checking */
+ s=(const uint8_t *)inData;
+ count=length;
+ while(count>0) {
+ c=*s++;
+ if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
+ udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant character in position %d\n",
+ length, length-count);
+ *pErrorCode=U_INVALID_CHAR_FOUND;
+ return 0;
+ }
+ --count;
+ }
+
+ if(length>0 && inData!=outData) {
+ uprv_memcpy(outData, inData, length);
+ }
+
+ return length;
+}
+
+U_CFUNC UBool
+uprv_isEbcdicAtSign(char c) {
+ static const uint8_t ebcdicAtSigns[] = {
+ 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
+ return c != 0 && uprv_strchr((const char *)ebcdicAtSigns, c) != nullptr;
+}
+
+/* compare invariant strings; variant characters compare less than others and unlike each other */
+U_CFUNC int32_t
+uprv_compareInvAscii(const UDataSwapper *ds,
+ const char *outString, int32_t outLength,
+ const UChar *localString, int32_t localLength) {
+ (void)ds;
+ int32_t minLength;
+ UChar32 c1, c2;
+ uint8_t c;
+
+ if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
+ return 0;
+ }
+
+ if(outLength<0) {
+ outLength=(int32_t)uprv_strlen(outString);
+ }
+ if(localLength<0) {
+ localLength=u_strlen(localString);
+ }
+
+ minLength= outLength<localLength ? outLength : localLength;
+
+ while(minLength>0) {
+ c=(uint8_t)*outString++;
+ if(UCHAR_IS_INVARIANT(c)) {
+ c1=c;
+ } else {
+ c1=-1;
+ }
+
+ c2=*localString++;
+ if(!UCHAR_IS_INVARIANT(c2)) {
+ c2=-2;
+ }
+
+ if((c1-=c2)!=0) {
+ return c1;
+ }
+
+ --minLength;
+ }
+
+ /* strings start with same prefix, compare lengths */
+ return outLength-localLength;
+}
+
+U_CFUNC int32_t
+uprv_compareInvEbcdic(const UDataSwapper *ds,
+ const char *outString, int32_t outLength,
+ const UChar *localString, int32_t localLength) {
+ (void)ds;
+ int32_t minLength;
+ UChar32 c1, c2;
+ uint8_t c;
+
+ if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
+ return 0;
+ }
+
+ if(outLength<0) {
+ outLength=(int32_t)uprv_strlen(outString);
+ }
+ if(localLength<0) {
+ localLength=u_strlen(localString);
+ }
+
+ minLength= outLength<localLength ? outLength : localLength;
+
+ while(minLength>0) {
+ c=(uint8_t)*outString++;
+ if(c==0) {
+ c1=0;
+ } else if((c1=asciiFromEbcdic[c])!=0 && UCHAR_IS_INVARIANT(c1)) {
+ /* c1 is set */
+ } else {
+ c1=-1;
+ }
+
+ c2=*localString++;
+ if(!UCHAR_IS_INVARIANT(c2)) {
+ c2=-2;
+ }
+
+ if((c1-=c2)!=0) {
+ return c1;
+ }
+
+ --minLength;
+ }
+
+ /* strings start with same prefix, compare lengths */
+ return outLength-localLength;
+}
+
+U_CAPI int32_t U_EXPORT2
+uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) {
+ int32_t c1, c2;
+
+ for(;; ++s1, ++s2) {
+ c1=(uint8_t)*s1;
+ c2=(uint8_t)*s2;
+ if(c1!=c2) {
+ if(c1!=0 && ((c1=asciiFromEbcdic[c1])==0 || !UCHAR_IS_INVARIANT(c1))) {
+ c1=-(int32_t)(uint8_t)*s1;
+ }
+ if(c2!=0 && ((c2=asciiFromEbcdic[c2])==0 || !UCHAR_IS_INVARIANT(c2))) {
+ c2=-(int32_t)(uint8_t)*s2;
+ }
+ return c1-c2;
+ } else if(c1==0) {
+ return 0;
+ }
+ }
+}
+
+U_CAPI char U_EXPORT2
+uprv_ebcdicToAscii(char c) {
+ return (char)asciiFromEbcdic[(uint8_t)c];
+}
+
+U_CAPI char U_EXPORT2
+uprv_ebcdicToLowercaseAscii(char c) {
+ return (char)lowercaseAsciiFromEbcdic[(uint8_t)c];
+}
+
+U_CAPI uint8_t* U_EXPORT2
+uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
+{
+ uint8_t *orig_dst = dst;
+
+ if(n==-1) {
+ n = static_cast<int32_t>(uprv_strlen((const char*)src)+1); /* copy NUL */
+ }
+ /* copy non-null */
+ while(*src && n>0) {
+ *(dst++) = asciiFromEbcdic[*(src++)];
+ n--;
+ }
+ /* pad */
+ while(n>0) {
+ *(dst++) = 0;
+ n--;
+ }
+ return orig_dst;
+}
+
+U_CAPI uint8_t* U_EXPORT2
+uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
+{
+ uint8_t *orig_dst = dst;
+
+ if(n==-1) {
+ n = static_cast<int32_t>(uprv_strlen((const char*)src)+1); /* copy NUL */
+ }
+ /* copy non-null */
+ while(*src && n>0) {
+ char ch = ebcdicFromAscii[*(src++)];
+ if(ch == 0) {
+ ch = ebcdicFromAscii[0x3f]; /* questionmark (subchar) */
+ }
+ *(dst++) = ch;
+ n--;
+ }
+ /* pad */
+ while(n>0) {
+ *(dst++) = 0;
+ n--;
+ }
+ return orig_dst;
+}
+
diff --git a/thirdparty/icu4c/common/uinvchar.h b/thirdparty/icu4c/common/uinvchar.h
new file mode 100644
index 0000000000..9b7a9bd114
--- /dev/null
+++ b/thirdparty/icu4c/common/uinvchar.h
@@ -0,0 +1,219 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uinvchar.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:2
+*
+* created on: 2004sep14
+* created by: Markus W. Scherer
+*
+* Definitions for handling invariant characters, moved here from putil.c
+* for better modularization.
+*/
+
+#ifndef __UINVCHAR_H__
+#define __UINVCHAR_H__
+
+#include "unicode/utypes.h"
+#ifdef __cplusplus
+#include "unicode/unistr.h"
+#endif
+
+/**
+ * Check if a char string only contains invariant characters.
+ * See utypes.h for details.
+ *
+ * @param s Input string pointer.
+ * @param length Length of the string, can be -1 if NUL-terminated.
+ * @return true if s contains only invariant characters.
+ *
+ * @internal (ICU 2.8)
+ */
+U_CAPI UBool U_EXPORT2
+uprv_isInvariantString(const char *s, int32_t length);
+
+/**
+ * Check if a Unicode string only contains invariant characters.
+ * See utypes.h for details.
+ *
+ * @param s Input string pointer.
+ * @param length Length of the string, can be -1 if NUL-terminated.
+ * @return true if s contains only invariant characters.
+ *
+ * @internal (ICU 2.8)
+ */
+U_CAPI UBool U_EXPORT2
+uprv_isInvariantUString(const UChar *s, int32_t length);
+
+/**
+ * \def U_UPPER_ORDINAL
+ * Get the ordinal number of an uppercase invariant character
+ * @internal
+ */
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+# define U_UPPER_ORDINAL(x) ((x)-'A')
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+# define U_UPPER_ORDINAL(x) (((x) < 'J') ? ((x)-'A') : \
+ (((x) < 'S') ? ((x)-'J'+9) : \
+ ((x)-'S'+18)))
+#else
+# error Unknown charset family!
+#endif
+
+#ifdef __cplusplus
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Like U_UPPER_ORDINAL(x) but with validation.
+ * Returns 0..25 for A..Z else a value outside 0..25.
+ */
+inline int32_t uprv_upperOrdinal(int32_t c) {
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+ return c - 'A';
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+ // EBCDIC: A-Z (26 letters) is split into three ranges A-I (9 letters), J-R (9), S-Z (8).
+ // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout
+ if (c <= 'I') { return c - 'A'; } // A-I --> 0-8
+ if (c < 'J') { return -1; }
+ if (c <= 'R') { return c - 'J' + 9; } // J-R --> 9..17
+ if (c < 'S') { return -1; }
+ return c - 'S' + 18; // S-Z --> 18..25
+#else
+# error Unknown charset family!
+#endif
+}
+
+// Like U_UPPER_ORDINAL(x) but for lowercase and with validation.
+// Returns 0..25 for a..z else a value outside 0..25.
+inline int32_t uprv_lowerOrdinal(int32_t c) {
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+ return c - 'a';
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+ // EBCDIC: a-z (26 letters) is split into three ranges a-i (9 letters), j-r (9), s-z (8).
+ // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout
+ if (c <= 'i') { return c - 'a'; } // a-i --> 0-8
+ if (c < 'j') { return -1; }
+ if (c <= 'r') { return c - 'j' + 9; } // j-r --> 9..17
+ if (c < 's') { return -1; }
+ return c - 's' + 18; // s-z --> 18..25
+#else
+# error Unknown charset family!
+#endif
+}
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Returns true if c == '@' is possible.
+ * The @ sign is variant, and the @ sign used on one
+ * EBCDIC machine won't be compiled the same way on other EBCDIC based machines.
+ * @internal
+ */
+U_CFUNC UBool
+uprv_isEbcdicAtSign(char c);
+
+/**
+ * \def uprv_isAtSign
+ * Returns true if c == '@' is possible.
+ * For ASCII, checks for exactly '@'. For EBCDIC, calls uprv_isEbcdicAtSign().
+ * @internal
+ */
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+# define uprv_isAtSign(c) ((c)=='@')
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+# define uprv_isAtSign(c) uprv_isEbcdicAtSign(c)
+#else
+# error Unknown charset family!
+#endif
+
+/**
+ * Compare two EBCDIC invariant-character strings in ASCII order.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2);
+
+/**
+ * \def uprv_compareInvCharsAsAscii
+ * Compare two invariant-character strings in ASCII order.
+ * @internal
+ */
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+# define uprv_compareInvCharsAsAscii(s1, s2) uprv_strcmp(s1, s2)
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+# define uprv_compareInvCharsAsAscii(s1, s2) uprv_compareInvEbcdicAsAscii(s1, s2)
+#else
+# error Unknown charset family!
+#endif
+
+/**
+ * Converts an EBCDIC invariant character to ASCII.
+ * @internal
+ */
+U_CAPI char U_EXPORT2
+uprv_ebcdicToAscii(char c);
+
+/**
+ * \def uprv_invCharToAscii
+ * Converts an invariant character to ASCII.
+ * @internal
+ */
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+# define uprv_invCharToAscii(c) (c)
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+# define uprv_invCharToAscii(c) uprv_ebcdicToAscii(c)
+#else
+# error Unknown charset family!
+#endif
+
+/**
+ * Converts an EBCDIC invariant character to lowercase ASCII.
+ * @internal
+ */
+U_CAPI char U_EXPORT2
+uprv_ebcdicToLowercaseAscii(char c);
+
+/**
+ * \def uprv_invCharToLowercaseAscii
+ * Converts an invariant character to lowercase ASCII.
+ * @internal
+ */
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+# define uprv_invCharToLowercaseAscii uprv_asciitolower
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+# define uprv_invCharToLowercaseAscii uprv_ebcdicToLowercaseAscii
+#else
+# error Unknown charset family!
+#endif
+
+/**
+ * Copy EBCDIC to ASCII
+ * @internal
+ * @see uprv_strncpy
+ */
+U_CAPI uint8_t* U_EXPORT2
+uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n);
+
+
+/**
+ * Copy ASCII to EBCDIC
+ * @internal
+ * @see uprv_strncpy
+ */
+U_CAPI uint8_t* U_EXPORT2
+uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n);
+
+
+
+#endif
diff --git a/thirdparty/icu4c/common/uiter.cpp b/thirdparty/icu4c/common/uiter.cpp
new file mode 100644
index 0000000000..b9252d81c2
--- /dev/null
+++ b/thirdparty/icu4c/common/uiter.cpp
@@ -0,0 +1,1108 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uiter.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002jan18
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/ustring.h"
+#include "unicode/chariter.h"
+#include "unicode/rep.h"
+#include "unicode/uiter.h"
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+#include "cstring.h"
+
+U_NAMESPACE_USE
+
+#define IS_EVEN(n) (((n)&1)==0)
+#define IS_POINTER_EVEN(p) IS_EVEN((size_t)p)
+
+U_CDECL_BEGIN
+
+/* No-Op UCharIterator implementation for illegal input --------------------- */
+
+static int32_t U_CALLCONV
+noopGetIndex(UCharIterator * /*iter*/, UCharIteratorOrigin /*origin*/) {
+ return 0;
+}
+
+static int32_t U_CALLCONV
+noopMove(UCharIterator * /*iter*/, int32_t /*delta*/, UCharIteratorOrigin /*origin*/) {
+ return 0;
+}
+
+static UBool U_CALLCONV
+noopHasNext(UCharIterator * /*iter*/) {
+ return FALSE;
+}
+
+static UChar32 U_CALLCONV
+noopCurrent(UCharIterator * /*iter*/) {
+ return U_SENTINEL;
+}
+
+static uint32_t U_CALLCONV
+noopGetState(const UCharIterator * /*iter*/) {
+ return UITER_NO_STATE;
+}
+
+static void U_CALLCONV
+noopSetState(UCharIterator * /*iter*/, uint32_t /*state*/, UErrorCode *pErrorCode) {
+ *pErrorCode=U_UNSUPPORTED_ERROR;
+}
+
+static const UCharIterator noopIterator={
+ 0, 0, 0, 0, 0, 0,
+ noopGetIndex,
+ noopMove,
+ noopHasNext,
+ noopHasNext,
+ noopCurrent,
+ noopCurrent,
+ noopCurrent,
+ NULL,
+ noopGetState,
+ noopSetState
+};
+
+/* UCharIterator implementation for simple strings -------------------------- */
+
+/*
+ * This is an implementation of a code unit (UChar) iterator
+ * for UChar * strings.
+ *
+ * The UCharIterator.context field holds a pointer to the string.
+ */
+
+static int32_t U_CALLCONV
+stringIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
+ switch(origin) {
+ case UITER_ZERO:
+ return 0;
+ case UITER_START:
+ return iter->start;
+ case UITER_CURRENT:
+ return iter->index;
+ case UITER_LIMIT:
+ return iter->limit;
+ case UITER_LENGTH:
+ return iter->length;
+ default:
+ /* not a valid origin */
+ /* Should never get here! */
+ return -1;
+ }
+}
+
+static int32_t U_CALLCONV
+stringIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) {
+ int32_t pos;
+
+ switch(origin) {
+ case UITER_ZERO:
+ pos=delta;
+ break;
+ case UITER_START:
+ pos=iter->start+delta;
+ break;
+ case UITER_CURRENT:
+ pos=iter->index+delta;
+ break;
+ case UITER_LIMIT:
+ pos=iter->limit+delta;
+ break;
+ case UITER_LENGTH:
+ pos=iter->length+delta;
+ break;
+ default:
+ return -1; /* Error */
+ }
+
+ if(pos<iter->start) {
+ pos=iter->start;
+ } else if(pos>iter->limit) {
+ pos=iter->limit;
+ }
+
+ return iter->index=pos;
+}
+
+static UBool U_CALLCONV
+stringIteratorHasNext(UCharIterator *iter) {
+ return iter->index<iter->limit;
+}
+
+static UBool U_CALLCONV
+stringIteratorHasPrevious(UCharIterator *iter) {
+ return iter->index>iter->start;
+}
+
+static UChar32 U_CALLCONV
+stringIteratorCurrent(UCharIterator *iter) {
+ if(iter->index<iter->limit) {
+ return ((const UChar *)(iter->context))[iter->index];
+ } else {
+ return U_SENTINEL;
+ }
+}
+
+static UChar32 U_CALLCONV
+stringIteratorNext(UCharIterator *iter) {
+ if(iter->index<iter->limit) {
+ return ((const UChar *)(iter->context))[iter->index++];
+ } else {
+ return U_SENTINEL;
+ }
+}
+
+static UChar32 U_CALLCONV
+stringIteratorPrevious(UCharIterator *iter) {
+ if(iter->index>iter->start) {
+ return ((const UChar *)(iter->context))[--iter->index];
+ } else {
+ return U_SENTINEL;
+ }
+}
+
+static uint32_t U_CALLCONV
+stringIteratorGetState(const UCharIterator *iter) {
+ return (uint32_t)iter->index;
+}
+
+static void U_CALLCONV
+stringIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode) {
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ /* do nothing */
+ } else if(iter==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ } else if((int32_t)state<iter->start || iter->limit<(int32_t)state) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ } else {
+ iter->index=(int32_t)state;
+ }
+}
+
+static const UCharIterator stringIterator={
+ 0, 0, 0, 0, 0, 0,
+ stringIteratorGetIndex,
+ stringIteratorMove,
+ stringIteratorHasNext,
+ stringIteratorHasPrevious,
+ stringIteratorCurrent,
+ stringIteratorNext,
+ stringIteratorPrevious,
+ NULL,
+ stringIteratorGetState,
+ stringIteratorSetState
+};
+
+U_CAPI void U_EXPORT2
+uiter_setString(UCharIterator *iter, const UChar *s, int32_t length) {
+ if(iter!=0) {
+ if(s!=0 && length>=-1) {
+ *iter=stringIterator;
+ iter->context=s;
+ if(length>=0) {
+ iter->length=length;
+ } else {
+ iter->length=u_strlen(s);
+ }
+ iter->limit=iter->length;
+ } else {
+ *iter=noopIterator;
+ }
+ }
+}
+
+/* UCharIterator implementation for UTF-16BE strings ------------------------ */
+
+/*
+ * This is an implementation of a code unit (UChar) iterator
+ * for UTF-16BE strings, i.e., strings in byte-vectors where
+ * each UChar is stored as a big-endian pair of bytes.
+ *
+ * The UCharIterator.context field holds a pointer to the string.
+ * Everything works just like with a normal UChar iterator (uiter_setString),
+ * except that UChars are assembled from byte pairs.
+ */
+
+/* internal helper function */
+static inline UChar32
+utf16BEIteratorGet(UCharIterator *iter, int32_t index) {
+ const uint8_t *p=(const uint8_t *)iter->context;
+ return ((UChar)p[2*index]<<8)|(UChar)p[2*index+1];
+}
+
+static UChar32 U_CALLCONV
+utf16BEIteratorCurrent(UCharIterator *iter) {
+ int32_t index;
+
+ if((index=iter->index)<iter->limit) {
+ return utf16BEIteratorGet(iter, index);
+ } else {
+ return U_SENTINEL;
+ }
+}
+
+static UChar32 U_CALLCONV
+utf16BEIteratorNext(UCharIterator *iter) {
+ int32_t index;
+
+ if((index=iter->index)<iter->limit) {
+ iter->index=index+1;
+ return utf16BEIteratorGet(iter, index);
+ } else {
+ return U_SENTINEL;
+ }
+}
+
+static UChar32 U_CALLCONV
+utf16BEIteratorPrevious(UCharIterator *iter) {
+ int32_t index;
+
+ if((index=iter->index)>iter->start) {
+ iter->index=--index;
+ return utf16BEIteratorGet(iter, index);
+ } else {
+ return U_SENTINEL;
+ }
+}
+
+static const UCharIterator utf16BEIterator={
+ 0, 0, 0, 0, 0, 0,
+ stringIteratorGetIndex,
+ stringIteratorMove,
+ stringIteratorHasNext,
+ stringIteratorHasPrevious,
+ utf16BEIteratorCurrent,
+ utf16BEIteratorNext,
+ utf16BEIteratorPrevious,
+ NULL,
+ stringIteratorGetState,
+ stringIteratorSetState
+};
+
+/*
+ * Count the number of UChars in a UTF-16BE string before a terminating UChar NUL,
+ * i.e., before a pair of 0 bytes where the first 0 byte is at an even
+ * offset from s.
+ */
+static int32_t
+utf16BE_strlen(const char *s) {
+ if(IS_POINTER_EVEN(s)) {
+ /*
+ * even-aligned, call u_strlen(s)
+ * we are probably on a little-endian machine, but searching for UChar NUL
+ * does not care about endianness
+ */
+ return u_strlen((const UChar *)s);
+ } else {
+ /* odd-aligned, search for pair of 0 bytes */
+ const char *p=s;
+
+ while(!(*p==0 && p[1]==0)) {
+ p+=2;
+ }
+ return (int32_t)((p-s)/2);
+ }
+}
+
+U_CAPI void U_EXPORT2
+uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length) {
+ if(iter!=NULL) {
+ /* allow only even-length strings (the input length counts bytes) */
+ if(s!=NULL && (length==-1 || (length>=0 && IS_EVEN(length)))) {
+ /* length/=2, except that >>=1 also works for -1 (-1/2==0, -1>>1==-1) */
+ length>>=1;
+
+ if(U_IS_BIG_ENDIAN && IS_POINTER_EVEN(s)) {
+ /* big-endian machine and 2-aligned UTF-16BE string: use normal UChar iterator */
+ uiter_setString(iter, (const UChar *)s, length);
+ return;
+ }
+
+ *iter=utf16BEIterator;
+ iter->context=s;
+ if(length>=0) {
+ iter->length=length;
+ } else {
+ iter->length=utf16BE_strlen(s);
+ }
+ iter->limit=iter->length;
+ } else {
+ *iter=noopIterator;
+ }
+ }
+}
+
+/* UCharIterator wrapper around CharacterIterator --------------------------- */
+
+/*
+ * This is wrapper code around a C++ CharacterIterator to
+ * look like a C UCharIterator.
+ *
+ * The UCharIterator.context field holds a pointer to the CharacterIterator.
+ */
+
+static int32_t U_CALLCONV
+characterIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
+ switch(origin) {
+ case UITER_ZERO:
+ return 0;
+ case UITER_START:
+ return ((CharacterIterator *)(iter->context))->startIndex();
+ case UITER_CURRENT:
+ return ((CharacterIterator *)(iter->context))->getIndex();
+ case UITER_LIMIT:
+ return ((CharacterIterator *)(iter->context))->endIndex();
+ case UITER_LENGTH:
+ return ((CharacterIterator *)(iter->context))->getLength();
+ default:
+ /* not a valid origin */
+ /* Should never get here! */
+ return -1;
+ }
+}
+
+static int32_t U_CALLCONV
+characterIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) {
+ switch(origin) {
+ case UITER_ZERO:
+ ((CharacterIterator *)(iter->context))->setIndex(delta);
+ return ((CharacterIterator *)(iter->context))->getIndex();
+ case UITER_START:
+ case UITER_CURRENT:
+ case UITER_LIMIT:
+ return ((CharacterIterator *)(iter->context))->move(delta, (CharacterIterator::EOrigin)origin);
+ case UITER_LENGTH:
+ ((CharacterIterator *)(iter->context))->setIndex(((CharacterIterator *)(iter->context))->getLength()+delta);
+ return ((CharacterIterator *)(iter->context))->getIndex();
+ default:
+ /* not a valid origin */
+ /* Should never get here! */
+ return -1;
+ }
+}
+
+static UBool U_CALLCONV
+characterIteratorHasNext(UCharIterator *iter) {
+ return ((CharacterIterator *)(iter->context))->hasNext();
+}
+
+static UBool U_CALLCONV
+characterIteratorHasPrevious(UCharIterator *iter) {
+ return ((CharacterIterator *)(iter->context))->hasPrevious();
+}
+
+static UChar32 U_CALLCONV
+characterIteratorCurrent(UCharIterator *iter) {
+ UChar32 c;
+
+ c=((CharacterIterator *)(iter->context))->current();
+ if(c!=0xffff || ((CharacterIterator *)(iter->context))->hasNext()) {
+ return c;
+ } else {
+ return U_SENTINEL;
+ }
+}
+
+static UChar32 U_CALLCONV
+characterIteratorNext(UCharIterator *iter) {
+ if(((CharacterIterator *)(iter->context))->hasNext()) {
+ return ((CharacterIterator *)(iter->context))->nextPostInc();
+ } else {
+ return U_SENTINEL;
+ }
+}
+
+static UChar32 U_CALLCONV
+characterIteratorPrevious(UCharIterator *iter) {
+ if(((CharacterIterator *)(iter->context))->hasPrevious()) {
+ return ((CharacterIterator *)(iter->context))->previous();
+ } else {
+ return U_SENTINEL;
+ }
+}
+
+static uint32_t U_CALLCONV
+characterIteratorGetState(const UCharIterator *iter) {
+ return ((CharacterIterator *)(iter->context))->getIndex();
+}
+
+static void U_CALLCONV
+characterIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode) {
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ /* do nothing */
+ } else if(iter==NULL || iter->context==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ } else if((int32_t)state<((CharacterIterator *)(iter->context))->startIndex() || ((CharacterIterator *)(iter->context))->endIndex()<(int32_t)state) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ } else {
+ ((CharacterIterator *)(iter->context))->setIndex((int32_t)state);
+ }
+}
+
+static const UCharIterator characterIteratorWrapper={
+ 0, 0, 0, 0, 0, 0,
+ characterIteratorGetIndex,
+ characterIteratorMove,
+ characterIteratorHasNext,
+ characterIteratorHasPrevious,
+ characterIteratorCurrent,
+ characterIteratorNext,
+ characterIteratorPrevious,
+ NULL,
+ characterIteratorGetState,
+ characterIteratorSetState
+};
+
+U_CAPI void U_EXPORT2
+uiter_setCharacterIterator(UCharIterator *iter, CharacterIterator *charIter) {
+ if(iter!=0) {
+ if(charIter!=0) {
+ *iter=characterIteratorWrapper;
+ iter->context=charIter;
+ } else {
+ *iter=noopIterator;
+ }
+ }
+}
+
+/* UCharIterator wrapper around Replaceable --------------------------------- */
+
+/*
+ * This is an implementation of a code unit (UChar) iterator
+ * based on a Replaceable object.
+ *
+ * The UCharIterator.context field holds a pointer to the Replaceable.
+ * UCharIterator.length and UCharIterator.index hold Replaceable.length()
+ * and the iteration index.
+ */
+
+static UChar32 U_CALLCONV
+replaceableIteratorCurrent(UCharIterator *iter) {
+ if(iter->index<iter->limit) {
+ return ((Replaceable *)(iter->context))->charAt(iter->index);
+ } else {
+ return U_SENTINEL;
+ }
+}
+
+static UChar32 U_CALLCONV
+replaceableIteratorNext(UCharIterator *iter) {
+ if(iter->index<iter->limit) {
+ return ((Replaceable *)(iter->context))->charAt(iter->index++);
+ } else {
+ return U_SENTINEL;
+ }
+}
+
+static UChar32 U_CALLCONV
+replaceableIteratorPrevious(UCharIterator *iter) {
+ if(iter->index>iter->start) {
+ return ((Replaceable *)(iter->context))->charAt(--iter->index);
+ } else {
+ return U_SENTINEL;
+ }
+}
+
+static const UCharIterator replaceableIterator={
+ 0, 0, 0, 0, 0, 0,
+ stringIteratorGetIndex,
+ stringIteratorMove,
+ stringIteratorHasNext,
+ stringIteratorHasPrevious,
+ replaceableIteratorCurrent,
+ replaceableIteratorNext,
+ replaceableIteratorPrevious,
+ NULL,
+ stringIteratorGetState,
+ stringIteratorSetState
+};
+
+U_CAPI void U_EXPORT2
+uiter_setReplaceable(UCharIterator *iter, const Replaceable *rep) {
+ if(iter!=0) {
+ if(rep!=0) {
+ *iter=replaceableIterator;
+ iter->context=rep;
+ iter->limit=iter->length=rep->length();
+ } else {
+ *iter=noopIterator;
+ }
+ }
+}
+
+/* UCharIterator implementation for UTF-8 strings --------------------------- */
+
+/*
+ * Possible, probably necessary only for an implementation for arbitrary
+ * converters:
+ * Maintain a buffer (ring buffer?) for a piece of converted 16-bit text.
+ * This would require to turn reservedFn into a close function and
+ * to introduce a uiter_close(iter).
+ */
+
+#define UITER_CNV_CAPACITY 16
+
+/*
+ * Minimal implementation:
+ * Maintain a single-UChar buffer for an additional surrogate.
+ * The caller must not modify start and limit because they are used internally.
+ *
+ * Use UCharIterator fields as follows:
+ * context pointer to UTF-8 string
+ * length UTF-16 length of the string; -1 until lazy evaluation
+ * start current UTF-8 index
+ * index current UTF-16 index; may be -1="unknown" after setState()
+ * limit UTF-8 length of the string
+ * reservedField supplementary code point
+ *
+ * Since UCharIterator delivers 16-bit code units, the iteration can be
+ * currently in the middle of the byte sequence for a supplementary code point.
+ * In this case, reservedField will contain that code point and start will
+ * point to after the corresponding byte sequence. The UTF-16 index will be
+ * one less than what it would otherwise be corresponding to the UTF-8 index.
+ * Otherwise, reservedField will be 0.
+ */
+
+/*
+ * Possible optimization for NUL-terminated UTF-8 and UTF-16 strings:
+ * Add implementations that do not call strlen() for iteration but check for NUL.
+ */
+
+static int32_t U_CALLCONV
+utf8IteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
+ switch(origin) {
+ case UITER_ZERO:
+ case UITER_START:
+ return 0;
+ case UITER_CURRENT:
+ if(iter->index<0) {
+ /* the current UTF-16 index is unknown after setState(), count from the beginning */
+ const uint8_t *s;
+ UChar32 c;
+ int32_t i, limit, index;
+
+ s=(const uint8_t *)iter->context;
+ i=index=0;
+ limit=iter->start; /* count up to the UTF-8 index */
+ while(i<limit) {
+ U8_NEXT_OR_FFFD(s, i, limit, c);
+ index+=U16_LENGTH(c);
+ }
+
+ iter->start=i; /* just in case setState() did not get us to a code point boundary */
+ if(i==iter->limit) {
+ iter->length=index; /* in case it was <0 or wrong */
+ }
+ if(iter->reservedField!=0) {
+ --index; /* we are in the middle of a supplementary code point */
+ }
+ iter->index=index;
+ }
+ return iter->index;
+ case UITER_LIMIT:
+ case UITER_LENGTH:
+ if(iter->length<0) {
+ const uint8_t *s;
+ UChar32 c;
+ int32_t i, limit, length;
+
+ s=(const uint8_t *)iter->context;
+ if(iter->index<0) {
+ /*
+ * the current UTF-16 index is unknown after setState(),
+ * we must first count from the beginning to here
+ */
+ i=length=0;
+ limit=iter->start;
+
+ /* count from the beginning to the current index */
+ while(i<limit) {
+ U8_NEXT_OR_FFFD(s, i, limit, c);
+ length+=U16_LENGTH(c);
+ }
+
+ /* assume i==limit==iter->start, set the UTF-16 index */
+ iter->start=i; /* just in case setState() did not get us to a code point boundary */
+ iter->index= iter->reservedField!=0 ? length-1 : length;
+ } else {
+ i=iter->start;
+ length=iter->index;
+ if(iter->reservedField!=0) {
+ ++length;
+ }
+ }
+
+ /* count from the current index to the end */
+ limit=iter->limit;
+ while(i<limit) {
+ U8_NEXT_OR_FFFD(s, i, limit, c);
+ length+=U16_LENGTH(c);
+ }
+ iter->length=length;
+ }
+ return iter->length;
+ default:
+ /* not a valid origin */
+ /* Should never get here! */
+ return -1;
+ }
+}
+
+static int32_t U_CALLCONV
+utf8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) {
+ const uint8_t *s;
+ UChar32 c;
+ int32_t pos; /* requested UTF-16 index */
+ int32_t i; /* UTF-8 index */
+ UBool havePos;
+
+ /* calculate the requested UTF-16 index */
+ switch(origin) {
+ case UITER_ZERO:
+ case UITER_START:
+ pos=delta;
+ havePos=TRUE;
+ /* iter->index<0 (unknown) is possible */
+ break;
+ case UITER_CURRENT:
+ if(iter->index>=0) {
+ pos=iter->index+delta;
+ havePos=TRUE;
+ } else {
+ /* the current UTF-16 index is unknown after setState(), use only delta */
+ pos=0;
+ havePos=FALSE;
+ }
+ break;
+ case UITER_LIMIT:
+ case UITER_LENGTH:
+ if(iter->length>=0) {
+ pos=iter->length+delta;
+ havePos=TRUE;
+ } else {
+ /* pin to the end, avoid counting the length */
+ iter->index=-1;
+ iter->start=iter->limit;
+ iter->reservedField=0;
+ if(delta>=0) {
+ return UITER_UNKNOWN_INDEX;
+ } else {
+ /* the current UTF-16 index is unknown, use only delta */
+ pos=0;
+ havePos=FALSE;
+ }
+ }
+ break;
+ default:
+ return -1; /* Error */
+ }
+
+ if(havePos) {
+ /* shortcuts: pinning to the edges of the string */
+ if(pos<=0) {
+ iter->index=iter->start=iter->reservedField=0;
+ return 0;
+ } else if(iter->length>=0 && pos>=iter->length) {
+ iter->index=iter->length;
+ iter->start=iter->limit;
+ iter->reservedField=0;
+ return iter->index;
+ }
+
+ /* minimize the number of U8_NEXT/PREV operations */
+ if(iter->index<0 || pos<iter->index/2) {
+ /* go forward from the start instead of backward from the current index */
+ iter->index=iter->start=iter->reservedField=0;
+ } else if(iter->length>=0 && (iter->length-pos)<(pos-iter->index)) {
+ /*
+ * if we have the UTF-16 index and length and the new position is
+ * closer to the end than the current index,
+ * then go backward from the end instead of forward from the current index
+ */
+ iter->index=iter->length;
+ iter->start=iter->limit;
+ iter->reservedField=0;
+ }
+
+ delta=pos-iter->index;
+ if(delta==0) {
+ return iter->index; /* nothing to do */
+ }
+ } else {
+ /* move relative to unknown UTF-16 index */
+ if(delta==0) {
+ return UITER_UNKNOWN_INDEX; /* nothing to do */
+ } else if(-delta>=iter->start) {
+ /* moving backwards by more UChars than there are UTF-8 bytes, pin to 0 */
+ iter->index=iter->start=iter->reservedField=0;
+ return 0;
+ } else if(delta>=(iter->limit-iter->start)) {
+ /* moving forward by more UChars than the remaining UTF-8 bytes, pin to the end */
+ iter->index=iter->length; /* may or may not be <0 (unknown) */
+ iter->start=iter->limit;
+ iter->reservedField=0;
+ return iter->index>=0 ? iter->index : (int32_t)UITER_UNKNOWN_INDEX;
+ }
+ }
+
+ /* delta!=0 */
+
+ /* move towards the requested position, pin to the edges of the string */
+ s=(const uint8_t *)iter->context;
+ pos=iter->index; /* could be <0 (unknown) */
+ i=iter->start;
+ if(delta>0) {
+ /* go forward */
+ int32_t limit=iter->limit;
+ if(iter->reservedField!=0) {
+ iter->reservedField=0;
+ ++pos;
+ --delta;
+ }
+ while(delta>0 && i<limit) {
+ U8_NEXT_OR_FFFD(s, i, limit, c);
+ if(c<=0xffff) {
+ ++pos;
+ --delta;
+ } else if(delta>=2) {
+ pos+=2;
+ delta-=2;
+ } else /* delta==1 */ {
+ /* stop in the middle of a supplementary code point */
+ iter->reservedField=c;
+ ++pos;
+ break; /* delta=0; */
+ }
+ }
+ if(i==limit) {
+ if(iter->length<0 && iter->index>=0) {
+ iter->length= iter->reservedField==0 ? pos : pos+1;
+ } else if(iter->index<0 && iter->length>=0) {
+ iter->index= iter->reservedField==0 ? iter->length : iter->length-1;
+ }
+ }
+ } else /* delta<0 */ {
+ /* go backward */
+ if(iter->reservedField!=0) {
+ iter->reservedField=0;
+ i-=4; /* we stayed behind the supplementary code point; go before it now */
+ --pos;
+ ++delta;
+ }
+ while(delta<0 && i>0) {
+ U8_PREV_OR_FFFD(s, 0, i, c);
+ if(c<=0xffff) {
+ --pos;
+ ++delta;
+ } else if(delta<=-2) {
+ pos-=2;
+ delta+=2;
+ } else /* delta==-1 */ {
+ /* stop in the middle of a supplementary code point */
+ i+=4; /* back to behind this supplementary code point for consistent state */
+ iter->reservedField=c;
+ --pos;
+ break; /* delta=0; */
+ }
+ }
+ }
+
+ iter->start=i;
+ if(iter->index>=0) {
+ return iter->index=pos;
+ } else {
+ /* we started with index<0 (unknown) so pos is bogus */
+ if(i<=1) {
+ return iter->index=i; /* reached the beginning */
+ } else {
+ /* we still don't know the UTF-16 index */
+ return UITER_UNKNOWN_INDEX;
+ }
+ }
+}
+
+static UBool U_CALLCONV
+utf8IteratorHasNext(UCharIterator *iter) {
+ return iter->start<iter->limit || iter->reservedField!=0;
+}
+
+static UBool U_CALLCONV
+utf8IteratorHasPrevious(UCharIterator *iter) {
+ return iter->start>0;
+}
+
+static UChar32 U_CALLCONV
+utf8IteratorCurrent(UCharIterator *iter) {
+ if(iter->reservedField!=0) {
+ return U16_TRAIL(iter->reservedField);
+ } else if(iter->start<iter->limit) {
+ const uint8_t *s=(const uint8_t *)iter->context;
+ UChar32 c;
+ int32_t i=iter->start;
+
+ U8_NEXT_OR_FFFD(s, i, iter->limit, c);
+ if(c<=0xffff) {
+ return c;
+ } else {
+ return U16_LEAD(c);
+ }
+ } else {
+ return U_SENTINEL;
+ }
+}
+
+static UChar32 U_CALLCONV
+utf8IteratorNext(UCharIterator *iter) {
+ int32_t index;
+
+ if(iter->reservedField!=0) {
+ UChar trail=U16_TRAIL(iter->reservedField);
+ iter->reservedField=0;
+ if((index=iter->index)>=0) {
+ iter->index=index+1;
+ }
+ return trail;
+ } else if(iter->start<iter->limit) {
+ const uint8_t *s=(const uint8_t *)iter->context;
+ UChar32 c;
+
+ U8_NEXT_OR_FFFD(s, iter->start, iter->limit, c);
+ if((index=iter->index)>=0) {
+ iter->index=++index;
+ if(iter->length<0 && iter->start==iter->limit) {
+ iter->length= c<=0xffff ? index : index+1;
+ }
+ } else if(iter->start==iter->limit && iter->length>=0) {
+ iter->index= c<=0xffff ? iter->length : iter->length-1;
+ }
+ if(c<=0xffff) {
+ return c;
+ } else {
+ iter->reservedField=c;
+ return U16_LEAD(c);
+ }
+ } else {
+ return U_SENTINEL;
+ }
+}
+
+static UChar32 U_CALLCONV
+utf8IteratorPrevious(UCharIterator *iter) {
+ int32_t index;
+
+ if(iter->reservedField!=0) {
+ UChar lead=U16_LEAD(iter->reservedField);
+ iter->reservedField=0;
+ iter->start-=4; /* we stayed behind the supplementary code point; go before it now */
+ if((index=iter->index)>0) {
+ iter->index=index-1;
+ }
+ return lead;
+ } else if(iter->start>0) {
+ const uint8_t *s=(const uint8_t *)iter->context;
+ UChar32 c;
+
+ U8_PREV_OR_FFFD(s, 0, iter->start, c);
+ if((index=iter->index)>0) {
+ iter->index=index-1;
+ } else if(iter->start<=1) {
+ iter->index= c<=0xffff ? iter->start : iter->start+1;
+ }
+ if(c<=0xffff) {
+ return c;
+ } else {
+ iter->start+=4; /* back to behind this supplementary code point for consistent state */
+ iter->reservedField=c;
+ return U16_TRAIL(c);
+ }
+ } else {
+ return U_SENTINEL;
+ }
+}
+
+static uint32_t U_CALLCONV
+utf8IteratorGetState(const UCharIterator *iter) {
+ uint32_t state=(uint32_t)(iter->start<<1);
+ if(iter->reservedField!=0) {
+ state|=1;
+ }
+ return state;
+}
+
+static void U_CALLCONV
+utf8IteratorSetState(UCharIterator *iter,
+ uint32_t state,
+ UErrorCode *pErrorCode)
+{
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ /* do nothing */
+ } else if(iter==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ } else if(state==utf8IteratorGetState(iter)) {
+ /* setting to the current state: no-op */
+ } else {
+ int32_t index=(int32_t)(state>>1); /* UTF-8 index */
+ state&=1; /* 1 if in surrogate pair, must be index>=4 */
+
+ if((state==0 ? index<0 : index<4) || iter->limit<index) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ } else {
+ iter->start=index; /* restore UTF-8 byte index */
+ if(index<=1) {
+ iter->index=index;
+ } else {
+ iter->index=-1; /* unknown UTF-16 index */
+ }
+ if(state==0) {
+ iter->reservedField=0;
+ } else {
+ /* verified index>=4 above */
+ UChar32 c;
+ U8_PREV_OR_FFFD((const uint8_t *)iter->context, 0, index, c);
+ if(c<=0xffff) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ } else {
+ iter->reservedField=c;
+ }
+ }
+ }
+ }
+}
+
+static const UCharIterator utf8Iterator={
+ 0, 0, 0, 0, 0, 0,
+ utf8IteratorGetIndex,
+ utf8IteratorMove,
+ utf8IteratorHasNext,
+ utf8IteratorHasPrevious,
+ utf8IteratorCurrent,
+ utf8IteratorNext,
+ utf8IteratorPrevious,
+ NULL,
+ utf8IteratorGetState,
+ utf8IteratorSetState
+};
+
+U_CAPI void U_EXPORT2
+uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length) {
+ if(iter!=0) {
+ if(s!=0 && length>=-1) {
+ *iter=utf8Iterator;
+ iter->context=s;
+ if(length>=0) {
+ iter->limit=length;
+ } else {
+ iter->limit=(int32_t)uprv_strlen(s);
+ }
+ iter->length= iter->limit<=1 ? iter->limit : -1;
+ } else {
+ *iter=noopIterator;
+ }
+ }
+}
+
+/* Helper functions --------------------------------------------------------- */
+
+U_CAPI UChar32 U_EXPORT2
+uiter_current32(UCharIterator *iter) {
+ UChar32 c, c2;
+
+ c=iter->current(iter);
+ if(U16_IS_SURROGATE(c)) {
+ if(U16_IS_SURROGATE_LEAD(c)) {
+ /*
+ * go to the next code unit
+ * we know that we are not at the limit because c!=U_SENTINEL
+ */
+ iter->move(iter, 1, UITER_CURRENT);
+ if(U16_IS_TRAIL(c2=iter->current(iter))) {
+ c=U16_GET_SUPPLEMENTARY(c, c2);
+ }
+
+ /* undo index movement */
+ iter->move(iter, -1, UITER_CURRENT);
+ } else {
+ if(U16_IS_LEAD(c2=iter->previous(iter))) {
+ c=U16_GET_SUPPLEMENTARY(c2, c);
+ }
+ if(c2>=0) {
+ /* undo index movement */
+ iter->move(iter, 1, UITER_CURRENT);
+ }
+ }
+ }
+ return c;
+}
+
+U_CAPI UChar32 U_EXPORT2
+uiter_next32(UCharIterator *iter) {
+ UChar32 c, c2;
+
+ c=iter->next(iter);
+ if(U16_IS_LEAD(c)) {
+ if(U16_IS_TRAIL(c2=iter->next(iter))) {
+ c=U16_GET_SUPPLEMENTARY(c, c2);
+ } else if(c2>=0) {
+ /* unmatched first surrogate, undo index movement */
+ iter->move(iter, -1, UITER_CURRENT);
+ }
+ }
+ return c;
+}
+
+U_CAPI UChar32 U_EXPORT2
+uiter_previous32(UCharIterator *iter) {
+ UChar32 c, c2;
+
+ c=iter->previous(iter);
+ if(U16_IS_TRAIL(c)) {
+ if(U16_IS_LEAD(c2=iter->previous(iter))) {
+ c=U16_GET_SUPPLEMENTARY(c2, c);
+ } else if(c2>=0) {
+ /* unmatched second surrogate, undo index movement */
+ iter->move(iter, 1, UITER_CURRENT);
+ }
+ }
+ return c;
+}
+
+U_CAPI uint32_t U_EXPORT2
+uiter_getState(const UCharIterator *iter) {
+ if(iter==NULL || iter->getState==NULL) {
+ return UITER_NO_STATE;
+ } else {
+ return iter->getState(iter);
+ }
+}
+
+U_CAPI void U_EXPORT2
+uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode) {
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ /* do nothing */
+ } else if(iter==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ } else if(iter->setState==NULL) {
+ *pErrorCode=U_UNSUPPORTED_ERROR;
+ } else {
+ iter->setState(iter, state, pErrorCode);
+ }
+}
+
+U_CDECL_END
diff --git a/thirdparty/icu4c/common/ulayout_props.h b/thirdparty/icu4c/common/ulayout_props.h
new file mode 100644
index 0000000000..c0f028c713
--- /dev/null
+++ b/thirdparty/icu4c/common/ulayout_props.h
@@ -0,0 +1,46 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// ulayout_props.h
+// created: 2019feb12 Markus W. Scherer
+
+#ifndef __ULAYOUT_PROPS_H__
+#define __ULAYOUT_PROPS_H__
+
+#include "unicode/utypes.h"
+
+// file definitions ------------------------------------------------------------
+
+#define ULAYOUT_DATA_NAME "ulayout"
+#define ULAYOUT_DATA_TYPE "icu"
+
+// data format "Layo"
+#define ULAYOUT_FMT_0 0x4c
+#define ULAYOUT_FMT_1 0x61
+#define ULAYOUT_FMT_2 0x79
+#define ULAYOUT_FMT_3 0x6f
+
+// indexes into indexes[]
+enum {
+ // Element 0 stores the length of the indexes[] array.
+ ULAYOUT_IX_INDEXES_LENGTH,
+ // Elements 1..7 store the tops of consecutive code point tries.
+ // No trie is stored if the difference between two of these is less than 16.
+ ULAYOUT_IX_INPC_TRIE_TOP,
+ ULAYOUT_IX_INSC_TRIE_TOP,
+ ULAYOUT_IX_VO_TRIE_TOP,
+ ULAYOUT_IX_RESERVED_TOP,
+
+ ULAYOUT_IX_TRIES_TOP = 7,
+
+ ULAYOUT_IX_MAX_VALUES = 9,
+
+ // Length of indexes[]. Multiple of 4 to 16-align the tries.
+ ULAYOUT_IX_COUNT = 12
+};
+
+constexpr int32_t ULAYOUT_MAX_INPC_SHIFT = 24;
+constexpr int32_t ULAYOUT_MAX_INSC_SHIFT = 16;
+constexpr int32_t ULAYOUT_MAX_VO_SHIFT = 8;
+
+#endif // __ULAYOUT_PROPS_H__
diff --git a/thirdparty/icu4c/common/ulist.cpp b/thirdparty/icu4c/common/ulist.cpp
new file mode 100644
index 0000000000..c5180431c3
--- /dev/null
+++ b/thirdparty/icu4c/common/ulist.cpp
@@ -0,0 +1,270 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 2009-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+******************************************************************************
+*/
+
+#include "ulist.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "uenumimp.h"
+
+typedef struct UListNode UListNode;
+struct UListNode {
+ void *data;
+
+ UListNode *next;
+ UListNode *previous;
+
+ /* When data is created with uprv_malloc, needs to be freed during deleteList function. */
+ UBool forceDelete;
+};
+
+struct UList {
+ UListNode *curr;
+ UListNode *head;
+ UListNode *tail;
+
+ int32_t size;
+};
+
+static void ulist_addFirstItem(UList *list, UListNode *newItem);
+
+U_CAPI UList *U_EXPORT2 ulist_createEmptyList(UErrorCode *status) {
+ UList *newList = NULL;
+
+ if (U_FAILURE(*status)) {
+ return NULL;
+ }
+
+ newList = (UList *)uprv_malloc(sizeof(UList));
+ if (newList == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+
+ newList->curr = NULL;
+ newList->head = NULL;
+ newList->tail = NULL;
+ newList->size = 0;
+
+ return newList;
+}
+
+/*
+ * Function called by addItemEndList or addItemBeginList when the first item is added to the list.
+ * This function properly sets the pointers for the first item added.
+ */
+static void ulist_addFirstItem(UList *list, UListNode *newItem) {
+ newItem->next = NULL;
+ newItem->previous = NULL;
+ list->head = newItem;
+ list->tail = newItem;
+}
+
+static void ulist_removeItem(UList *list, UListNode *p) {
+ if (p->previous == NULL) {
+ // p is the list head.
+ list->head = p->next;
+ } else {
+ p->previous->next = p->next;
+ }
+ if (p->next == NULL) {
+ // p is the list tail.
+ list->tail = p->previous;
+ } else {
+ p->next->previous = p->previous;
+ }
+ if (p == list->curr) {
+ list->curr = p->next;
+ }
+ --list->size;
+ if (p->forceDelete) {
+ uprv_free(p->data);
+ }
+ uprv_free(p);
+}
+
+U_CAPI void U_EXPORT2 ulist_addItemEndList(UList *list, const void *data, UBool forceDelete, UErrorCode *status) {
+ UListNode *newItem = NULL;
+
+ if (U_FAILURE(*status) || list == NULL || data == NULL) {
+ if (forceDelete) {
+ uprv_free((void *)data);
+ }
+ return;
+ }
+
+ newItem = (UListNode *)uprv_malloc(sizeof(UListNode));
+ if (newItem == NULL) {
+ if (forceDelete) {
+ uprv_free((void *)data);
+ }
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ newItem->data = (void *)(data);
+ newItem->forceDelete = forceDelete;
+
+ if (list->size == 0) {
+ ulist_addFirstItem(list, newItem);
+ } else {
+ newItem->next = NULL;
+ newItem->previous = list->tail;
+ list->tail->next = newItem;
+ list->tail = newItem;
+ }
+
+ list->size++;
+}
+
+U_CAPI void U_EXPORT2 ulist_addItemBeginList(UList *list, const void *data, UBool forceDelete, UErrorCode *status) {
+ UListNode *newItem = NULL;
+
+ if (U_FAILURE(*status) || list == NULL || data == NULL) {
+ if (forceDelete) {
+ uprv_free((void *)data);
+ }
+ return;
+ }
+
+ newItem = (UListNode *)uprv_malloc(sizeof(UListNode));
+ if (newItem == NULL) {
+ if (forceDelete) {
+ uprv_free((void *)data);
+ }
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ newItem->data = (void *)(data);
+ newItem->forceDelete = forceDelete;
+
+ if (list->size == 0) {
+ ulist_addFirstItem(list, newItem);
+ } else {
+ newItem->previous = NULL;
+ newItem->next = list->head;
+ list->head->previous = newItem;
+ list->head = newItem;
+ }
+
+ list->size++;
+}
+
+U_CAPI UBool U_EXPORT2 ulist_containsString(const UList *list, const char *data, int32_t length) {
+ if (list != NULL) {
+ const UListNode *pointer;
+ for (pointer = list->head; pointer != NULL; pointer = pointer->next) {
+ if (length == (int32_t)uprv_strlen((const char *)pointer->data)) {
+ if (uprv_memcmp(data, pointer->data, length) == 0) {
+ return TRUE;
+ }
+ }
+ }
+ }
+ return FALSE;
+}
+
+U_CAPI UBool U_EXPORT2 ulist_removeString(UList *list, const char *data) {
+ if (list != NULL) {
+ UListNode *pointer;
+ for (pointer = list->head; pointer != NULL; pointer = pointer->next) {
+ if (uprv_strcmp(data, (const char *)pointer->data) == 0) {
+ ulist_removeItem(list, pointer);
+ // Remove only the first occurrence, like Java LinkedList.remove(Object).
+ return TRUE;
+ }
+ }
+ }
+ return FALSE;
+}
+
+U_CAPI void *U_EXPORT2 ulist_getNext(UList *list) {
+ UListNode *curr = NULL;
+
+ if (list == NULL || list->curr == NULL) {
+ return NULL;
+ }
+
+ curr = list->curr;
+ list->curr = curr->next;
+
+ return curr->data;
+}
+
+U_CAPI int32_t U_EXPORT2 ulist_getListSize(const UList *list) {
+ if (list != NULL) {
+ return list->size;
+ }
+
+ return -1;
+}
+
+U_CAPI void U_EXPORT2 ulist_resetList(UList *list) {
+ if (list != NULL) {
+ list->curr = list->head;
+ }
+}
+
+U_CAPI void U_EXPORT2 ulist_deleteList(UList *list) {
+ UListNode *listHead = NULL;
+
+ if (list != NULL) {
+ listHead = list->head;
+ while (listHead != NULL) {
+ UListNode *listPointer = listHead->next;
+
+ if (listHead->forceDelete) {
+ uprv_free(listHead->data);
+ }
+
+ uprv_free(listHead);
+ listHead = listPointer;
+ }
+ uprv_free(list);
+ list = NULL;
+ }
+}
+
+U_CAPI void U_EXPORT2 ulist_close_keyword_values_iterator(UEnumeration *en) {
+ if (en != NULL) {
+ ulist_deleteList((UList *)(en->context));
+ uprv_free(en);
+ }
+}
+
+U_CAPI int32_t U_EXPORT2 ulist_count_keyword_values(UEnumeration *en, UErrorCode *status) {
+ if (U_FAILURE(*status)) {
+ return -1;
+ }
+
+ return ulist_getListSize((UList *)(en->context));
+}
+
+U_CAPI const char * U_EXPORT2 ulist_next_keyword_value(UEnumeration *en, int32_t *resultLength, UErrorCode *status) {
+ const char *s;
+ if (U_FAILURE(*status)) {
+ return NULL;
+ }
+
+ s = (const char *)ulist_getNext((UList *)(en->context));
+ if (s != NULL && resultLength != NULL) {
+ *resultLength = static_cast<int32_t>(uprv_strlen(s));
+ }
+ return s;
+}
+
+U_CAPI void U_EXPORT2 ulist_reset_keyword_values_iterator(UEnumeration *en, UErrorCode *status) {
+ if (U_FAILURE(*status)) {
+ return ;
+ }
+
+ ulist_resetList((UList *)(en->context));
+}
+
+U_CAPI UList * U_EXPORT2 ulist_getListFromEnum(UEnumeration *en) {
+ return (UList *)(en->context);
+}
diff --git a/thirdparty/icu4c/common/ulist.h b/thirdparty/icu4c/common/ulist.h
new file mode 100644
index 0000000000..de58a4ad02
--- /dev/null
+++ b/thirdparty/icu4c/common/ulist.h
@@ -0,0 +1,50 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 2009-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+******************************************************************************
+*/
+
+#ifndef ULIST_H
+#define ULIST_H
+
+#include "unicode/utypes.h"
+#include "unicode/uenum.h"
+
+struct UList;
+typedef struct UList UList;
+
+U_CAPI UList * U_EXPORT2 ulist_createEmptyList(UErrorCode *status);
+
+U_CAPI void U_EXPORT2 ulist_addItemEndList(UList *list, const void *data, UBool forceDelete, UErrorCode *status);
+
+U_CAPI void U_EXPORT2 ulist_addItemBeginList(UList *list, const void *data, UBool forceDelete, UErrorCode *status);
+
+U_CAPI UBool U_EXPORT2 ulist_containsString(const UList *list, const char *data, int32_t length);
+
+U_CAPI UBool U_EXPORT2 ulist_removeString(UList *list, const char *data);
+
+U_CAPI void *U_EXPORT2 ulist_getNext(UList *list);
+
+U_CAPI int32_t U_EXPORT2 ulist_getListSize(const UList *list);
+
+U_CAPI void U_EXPORT2 ulist_resetList(UList *list);
+
+U_CAPI void U_EXPORT2 ulist_deleteList(UList *list);
+
+/*
+ * The following are for use when creating UEnumeration object backed by UList.
+ */
+U_CAPI void U_EXPORT2 ulist_close_keyword_values_iterator(UEnumeration *en);
+
+U_CAPI int32_t U_EXPORT2 ulist_count_keyword_values(UEnumeration *en, UErrorCode *status);
+
+U_CAPI const char * U_EXPORT2 ulist_next_keyword_value(UEnumeration* en, int32_t *resultLength, UErrorCode* status);
+
+U_CAPI void U_EXPORT2 ulist_reset_keyword_values_iterator(UEnumeration* en, UErrorCode* status);
+
+U_CAPI UList * U_EXPORT2 ulist_getListFromEnum(UEnumeration *en);
+
+#endif
diff --git a/thirdparty/icu4c/common/uloc.cpp b/thirdparty/icu4c/common/uloc.cpp
new file mode 100644
index 0000000000..522f33dbe2
--- /dev/null
+++ b/thirdparty/icu4c/common/uloc.cpp
@@ -0,0 +1,2176 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1997-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File ULOC.CPP
+*
+* Modification History:
+*
+* Date Name Description
+* 04/01/97 aliu Creation.
+* 08/21/98 stephen JDK 1.2 sync
+* 12/08/98 rtg New Locale implementation and C API
+* 03/15/99 damiba overhaul.
+* 04/06/99 stephen changed setDefault() to realloc and copy
+* 06/14/99 stephen Changed calls to ures_open for new params
+* 07/21/99 stephen Modified setDefault() to propagate to C++
+* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
+* brought canonicalization code into line with spec
+*****************************************************************************/
+
+/*
+ POSIX's locale format, from putil.c: [no spaces]
+
+ ll [ _CC ] [ . MM ] [ @ VV]
+
+ l = lang, C = ctry, M = charmap, V = variant
+*/
+
+#include "unicode/bytestream.h"
+#include "unicode/errorcode.h"
+#include "unicode/stringpiece.h"
+#include "unicode/utypes.h"
+#include "unicode/ustring.h"
+#include "unicode/uloc.h"
+
+#include "bytesinkutil.h"
+#include "putilimp.h"
+#include "ustr_imp.h"
+#include "ulocimp.h"
+#include "umutex.h"
+#include "cstring.h"
+#include "cmemory.h"
+#include "locmap.h"
+#include "uarrsort.h"
+#include "uenumimp.h"
+#include "uassert.h"
+#include "charstr.h"
+
+U_NAMESPACE_USE
+
+/* ### Declarations **************************************************/
+
+/* Locale stuff from locid.cpp */
+U_CFUNC void locale_set_default(const char *id);
+U_CFUNC const char *locale_get_default(void);
+
+/* ### Data tables **************************************************/
+
+/**
+ * Table of language codes, both 2- and 3-letter, with preference
+ * given to 2-letter codes where possible. Includes 3-letter codes
+ * that lack a 2-letter equivalent.
+ *
+ * This list must be in sorted order. This list is returned directly
+ * to the user by some API.
+ *
+ * This list must be kept in sync with LANGUAGES_3, with corresponding
+ * entries matched.
+ *
+ * This table should be terminated with a NULL entry, followed by a
+ * second list, and another NULL entry. The first list is visible to
+ * user code when this array is returned by API. The second list
+ * contains codes we support, but do not expose through user API.
+ *
+ * Notes
+ *
+ * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
+ * include the revisions up to 2001/7/27 *CWB*
+ *
+ * The 3 character codes are the terminology codes like RFC 3066. This
+ * is compatible with prior ICU codes
+ *
+ * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
+ * table but now at the end of the table because 3 character codes are
+ * duplicates. This avoids bad searches going from 3 to 2 character
+ * codes.
+ *
+ * The range qaa-qtz is reserved for local use
+ */
+/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
+/* ISO639 table version is 20150505 */
+/* Subsequent hand addition of selected languages */
+static const char * const LANGUAGES[] = {
+ "aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb",
+ "af", "afh", "agq", "ain", "ak", "akk", "akz", "ale",
+ "aln", "alt", "am", "an", "ang", "anp", "ar", "arc",
+ "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
+ "asa", "ase", "ast", "av", "avk", "awa", "ay", "az",
+ "ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
+ "be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
+ "bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla",
+ "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
+ "brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv",
+ "ca", "cad", "car", "cay", "cch", "ccp", "ce", "ceb", "cgg",
+ "ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
+ "chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh",
+ "cs", "csb", "cu", "cv", "cy",
+ "da", "dak", "dar", "dav", "de", "del", "den", "dgr",
+ "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
+ "dyo", "dyu", "dz", "dzg",
+ "ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx",
+ "en", "enm", "eo", "es", "esu", "et", "eu", "ewo",
+ "ext",
+ "fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj",
+ "fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr",
+ "frs", "fur", "fy",
+ "ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
+ "gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom",
+ "gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc",
+ "gur", "guz", "gv", "gwi",
+ "ha", "hai", "hak", "haw", "he", "hi", "hif", "hil",
+ "hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu",
+ "hup", "hy", "hz",
+ "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik",
+ "ilo", "inh", "io", "is", "it", "iu", "izh",
+ "ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
+ "jv",
+ "ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
+ "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp",
+ "kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk",
+ "kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi",
+ "kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl",
+ "kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut",
+ "kv", "kw", "ky",
+ "la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn",
+ "lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo",
+ "lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui",
+ "lun", "luo", "lus", "luy", "lv", "lzh", "lzz",
+ "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
+ "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga",
+ "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
+ "ml", "mn", "mnc", "mni", "mo",
+ "moh", "mos", "mr", "mrj",
+ "ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
+ "my", "mye", "myv", "mzn",
+ "na", "nan", "nap", "naq", "nb", "nd", "nds", "ne",
+ "new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn",
+ "nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso",
+ "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi",
+ "oc", "oj", "om", "or", "os", "osa", "ota",
+ "pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",
+ "pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt",
+ "pon", "prg", "pro", "ps", "pt",
+ "qu", "quc", "qug",
+ "raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro",
+ "rof", "rom", "rtm", "ru", "rue", "rug", "rup",
+ "rw", "rwk",
+ "sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
+ "sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh",
+ "se", "see", "seh", "sei", "sel", "ses", "sg", "sga",
+ "sgs", "shi", "shn", "shu", "si", "sid", "sk",
+ "sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms",
+ "sn", "snk", "so", "sog", "sq", "sr", "srn", "srr",
+ "ss", "ssy", "st", "stq", "su", "suk", "sus", "sux",
+ "sv", "sw", "swb", "swc", "syc", "syr", "szl",
+ "ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
+ "th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl",
+ "tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
+ "tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
+ "tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
+ "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
+ "vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo",
+ "vot", "vro", "vun",
+ "wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu",
+ "xal", "xh", "xmf", "xog",
+ "yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue",
+ "za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
+ "zun", "zxx", "zza",
+NULL,
+ "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
+NULL
+};
+
+static const char* const DEPRECATED_LANGUAGES[]={
+ "in", "iw", "ji", "jw", NULL, NULL
+};
+static const char* const REPLACEMENT_LANGUAGES[]={
+ "id", "he", "yi", "jv", NULL, NULL
+};
+
+/**
+ * Table of 3-letter language codes.
+ *
+ * This is a lookup table used to convert 3-letter language codes to
+ * their 2-letter equivalent, where possible. It must be kept in sync
+ * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
+ * same language as LANGUAGES_3[i]. The commented-out lines are
+ * copied from LANGUAGES to make eyeballing this baby easier.
+ *
+ * Where a 3-letter language code has no 2-letter equivalent, the
+ * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
+ *
+ * This table should be terminated with a NULL entry, followed by a
+ * second list, and another NULL entry. The two lists correspond to
+ * the two lists in LANGUAGES.
+ */
+/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
+/* ISO639 table version is 20150505 */
+/* Subsequent hand addition of selected languages */
+static const char * const LANGUAGES_3[] = {
+ "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
+ "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
+ "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
+ "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
+ "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
+ "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
+ "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
+ "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
+ "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
+ "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
+ "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
+ "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
+ "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
+ "ces", "csb", "chu", "chv", "cym",
+ "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
+ "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
+ "dyo", "dyu", "dzo", "dzg",
+ "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
+ "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
+ "ext",
+ "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
+ "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
+ "frs", "fur", "fry",
+ "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
+ "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
+ "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
+ "gur", "guz", "glv", "gwi",
+ "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
+ "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
+ "hup", "hye", "her",
+ "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
+ "ilo", "inh", "ido", "isl", "ita", "iku", "izh",
+ "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
+ "jav",
+ "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
+ "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
+ "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
+ "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
+ "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
+ "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
+ "kom", "cor", "kir",
+ "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
+ "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
+ "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
+ "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
+ "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
+ "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
+ "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
+ "mal", "mon", "mnc", "mni", "mol",
+ "moh", "mos", "mar", "mrj",
+ "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
+ "mya", "mye", "myv", "mzn",
+ "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
+ "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
+ "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
+ "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
+ "oci", "oji", "orm", "ori", "oss", "osa", "ota",
+ "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",
+ "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
+ "pon", "prg", "pro", "pus", "por",
+ "que", "quc", "qug",
+ "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
+ "rof", "rom", "rtm", "rus", "rue", "rug", "rup",
+ "kin", "rwk",
+ "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
+ "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
+ "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
+ "sgs", "shi", "shn", "shu", "sin", "sid", "slk",
+ "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
+ "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
+ "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
+ "swe", "swa", "swb", "swc", "syc", "syr", "szl",
+ "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
+ "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
+ "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
+ "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
+ "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
+ "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
+ "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
+ "vot", "vro", "vun",
+ "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
+ "xal", "xho", "xmf", "xog",
+ "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
+ "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
+ "zun", "zxx", "zza",
+NULL,
+/* "in", "iw", "ji", "jw", "sh", */
+ "ind", "heb", "yid", "jaw", "srp",
+NULL
+};
+
+/**
+ * Table of 2-letter country codes.
+ *
+ * This list must be in sorted order. This list is returned directly
+ * to the user by some API.
+ *
+ * This list must be kept in sync with COUNTRIES_3, with corresponding
+ * entries matched.
+ *
+ * This table should be terminated with a NULL entry, followed by a
+ * second list, and another NULL entry. The first list is visible to
+ * user code when this array is returned by API. The second list
+ * contains codes we support, but do not expose through user API.
+ *
+ * Notes:
+ *
+ * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
+ * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
+ * new codes keeping the old ones for compatibility updated to include
+ * 1999/12/03 revisions *CWB*
+ *
+ * RO(ROM) is now RO(ROU) according to
+ * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
+ */
+static const char * const COUNTRIES[] = {
+ "AD", "AE", "AF", "AG", "AI", "AL", "AM",
+ "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
+ "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
+ "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
+ "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
+ "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
+ "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK",
+ "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
+ "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
+ "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
+ "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
+ "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
+ "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
+ "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
+ "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
+ "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
+ "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
+ "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
+ "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
+ "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
+ "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
+ "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
+ "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
+ "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
+ "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
+ "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
+ "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
+ "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
+ "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
+ "WS", "YE", "YT", "ZA", "ZM", "ZW",
+NULL,
+ "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
+NULL
+};
+
+static const char* const DEPRECATED_COUNTRIES[] = {
+ "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
+};
+static const char* const REPLACEMENT_COUNTRIES[] = {
+/* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
+ "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */
+};
+
+/**
+ * Table of 3-letter country codes.
+ *
+ * This is a lookup table used to convert 3-letter country codes to
+ * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
+ * For all valid i, COUNTRIES[i] must refer to the same country as
+ * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
+ * to make eyeballing this baby easier.
+ *
+ * This table should be terminated with a NULL entry, followed by a
+ * second list, and another NULL entry. The two lists correspond to
+ * the two lists in COUNTRIES.
+ */
+static const char * const COUNTRIES_3[] = {
+/* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
+ "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
+/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
+ "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
+/* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
+ "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
+/* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
+ "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
+/* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
+ "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
+/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
+ "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
+/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */
+ "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
+/* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
+ "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
+/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
+ "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
+/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
+ "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
+/* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
+ "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
+/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
+ "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
+/* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
+ "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
+/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
+ "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
+/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
+ "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
+/* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
+ "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
+/* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
+ "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
+/* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
+ "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
+/* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
+ "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
+/* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
+ "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
+/* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
+ "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
+/* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
+ "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
+/* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
+ "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
+/* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
+ "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
+/* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
+ "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
+/* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
+ "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
+/* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
+ "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
+/* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
+ "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
+/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
+ "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
+/* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
+ "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
+NULL,
+/* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
+ "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
+NULL
+};
+
+typedef struct CanonicalizationMap {
+ const char *id; /* input ID */
+ const char *canonicalID; /* canonicalized output ID */
+} CanonicalizationMap;
+
+/**
+ * A map to canonicalize locale IDs. This handles a variety of
+ * different semantic kinds of transformations.
+ */
+static const CanonicalizationMap CANONICALIZE_MAP[] = {
+ { "art__LOJBAN", "jbo" }, /* registered name */
+ { "hy__AREVELA", "hy" }, /* Registered IANA variant */
+ { "hy__AREVMDA", "hyw" }, /* Registered IANA variant */
+ { "zh__GUOYU", "zh" }, /* registered name */
+ { "zh__HAKKA", "hak" }, /* registered name */
+ { "zh__XIANG", "hsn" }, /* registered name */
+ // subtags with 3 chars won't be treated as variants.
+ { "zh_GAN", "gan" }, /* registered name */
+ { "zh_MIN_NAN", "nan" }, /* registered name */
+ { "zh_WUU", "wuu" }, /* registered name */
+ { "zh_YUE", "yue" }, /* registered name */
+};
+
+/* ### BCP47 Conversion *******************************************/
+/* Test if the locale id has BCP47 u extension and does not have '@' */
+#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
+/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
+#define _ConvertBCP47(finalID, id, buffer, length,err) UPRV_BLOCK_MACRO_BEGIN { \
+ if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || \
+ U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { \
+ finalID=id; \
+ if (*err == U_STRING_NOT_TERMINATED_WARNING) { *err = U_BUFFER_OVERFLOW_ERROR; } \
+ } else { \
+ finalID=buffer; \
+ } \
+} UPRV_BLOCK_MACRO_END
+/* Gets the size of the shortest subtag in the given localeID. */
+static int32_t getShortestSubtagLength(const char *localeID) {
+ int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
+ int32_t length = localeIDLength;
+ int32_t tmpLength = 0;
+ int32_t i;
+ UBool reset = TRUE;
+
+ for (i = 0; i < localeIDLength; i++) {
+ if (localeID[i] != '_' && localeID[i] != '-') {
+ if (reset) {
+ tmpLength = 0;
+ reset = FALSE;
+ }
+ tmpLength++;
+ } else {
+ if (tmpLength != 0 && tmpLength < length) {
+ length = tmpLength;
+ }
+ reset = TRUE;
+ }
+ }
+
+ return length;
+}
+
+/* ### Keywords **************************************************/
+#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
+#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
+/* Punctuation/symbols allowed in legacy key values */
+#define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/')
+
+#define ULOC_KEYWORD_BUFFER_LEN 25
+#define ULOC_MAX_NO_KEYWORDS 25
+
+U_CAPI const char * U_EXPORT2
+locale_getKeywordsStart(const char *localeID) {
+ const char *result = NULL;
+ if((result = uprv_strchr(localeID, '@')) != NULL) {
+ return result;
+ }
+#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
+ else {
+ /* We do this because the @ sign is variant, and the @ sign used on one
+ EBCDIC machine won't be compiled the same way on other EBCDIC based
+ machines. */
+ static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
+ const uint8_t *charToFind = ebcdicSigns;
+ while(*charToFind) {
+ if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
+ return result;
+ }
+ charToFind++;
+ }
+ }
+#endif
+ return NULL;
+}
+
+/**
+ * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
+ * @param keywordName incoming name to be canonicalized
+ * @param status return status (keyword too long)
+ * @return length of the keyword name
+ */
+static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
+{
+ int32_t keywordNameLen = 0;
+
+ for (; *keywordName != 0; keywordName++) {
+ if (!UPRV_ISALPHANUM(*keywordName)) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
+ return 0;
+ }
+ if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
+ buf[keywordNameLen++] = uprv_tolower(*keywordName);
+ } else {
+ /* keyword name too long for internal buffer */
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ return 0;
+ }
+ }
+ if (keywordNameLen == 0) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */
+ return 0;
+ }
+ buf[keywordNameLen] = 0; /* terminate */
+
+ return keywordNameLen;
+}
+
+typedef struct {
+ char keyword[ULOC_KEYWORD_BUFFER_LEN];
+ int32_t keywordLen;
+ const char *valueStart;
+ int32_t valueLen;
+} KeywordStruct;
+
+static int32_t U_CALLCONV
+compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
+ const char* leftString = ((const KeywordStruct *)left)->keyword;
+ const char* rightString = ((const KeywordStruct *)right)->keyword;
+ return uprv_strcmp(leftString, rightString);
+}
+
+U_CFUNC void
+ulocimp_getKeywords(const char *localeID,
+ char prev,
+ ByteSink& sink,
+ UBool valuesToo,
+ UErrorCode *status)
+{
+ KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
+
+ int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
+ int32_t numKeywords = 0;
+ const char* pos = localeID;
+ const char* equalSign = NULL;
+ const char* semicolon = NULL;
+ int32_t i = 0, j, n;
+
+ if(prev == '@') { /* start of keyword definition */
+ /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
+ do {
+ UBool duplicate = FALSE;
+ /* skip leading spaces */
+ while(*pos == ' ') {
+ pos++;
+ }
+ if (!*pos) { /* handle trailing "; " */
+ break;
+ }
+ if(numKeywords == maxKeywords) {
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ return;
+ }
+ equalSign = uprv_strchr(pos, '=');
+ semicolon = uprv_strchr(pos, ';');
+ /* lack of '=' [foo@currency] is illegal */
+ /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
+ if(!equalSign || (semicolon && semicolon<equalSign)) {
+ *status = U_INVALID_FORMAT_ERROR;
+ return;
+ }
+ /* need to normalize both keyword and keyword name */
+ if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
+ /* keyword name too long for internal buffer */
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ return;
+ }
+ for(i = 0, n = 0; i < equalSign - pos; ++i) {
+ if (pos[i] != ' ') {
+ keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
+ }
+ }
+
+ /* zero-length keyword is an error. */
+ if (n == 0) {
+ *status = U_INVALID_FORMAT_ERROR;
+ return;
+ }
+
+ keywordList[numKeywords].keyword[n] = 0;
+ keywordList[numKeywords].keywordLen = n;
+ /* now grab the value part. First we skip the '=' */
+ equalSign++;
+ /* then we leading spaces */
+ while(*equalSign == ' ') {
+ equalSign++;
+ }
+
+ /* Premature end or zero-length value */
+ if (!*equalSign || equalSign == semicolon) {
+ *status = U_INVALID_FORMAT_ERROR;
+ return;
+ }
+
+ keywordList[numKeywords].valueStart = equalSign;
+
+ pos = semicolon;
+ i = 0;
+ if(pos) {
+ while(*(pos - i - 1) == ' ') {
+ i++;
+ }
+ keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
+ pos++;
+ } else {
+ i = (int32_t)uprv_strlen(equalSign);
+ while(i && equalSign[i-1] == ' ') {
+ i--;
+ }
+ keywordList[numKeywords].valueLen = i;
+ }
+ /* If this is a duplicate keyword, then ignore it */
+ for (j=0; j<numKeywords; ++j) {
+ if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
+ duplicate = TRUE;
+ break;
+ }
+ }
+ if (!duplicate) {
+ ++numKeywords;
+ }
+ } while(pos);
+
+ /* now we have a list of keywords */
+ /* we need to sort it */
+ uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
+
+ /* Now construct the keyword part */
+ for(i = 0; i < numKeywords; i++) {
+ sink.Append(keywordList[i].keyword, keywordList[i].keywordLen);
+ if(valuesToo) {
+ sink.Append("=", 1);
+ sink.Append(keywordList[i].valueStart, keywordList[i].valueLen);
+ if(i < numKeywords - 1) {
+ sink.Append(";", 1);
+ }
+ } else {
+ sink.Append("\0", 1);
+ }
+ }
+ }
+}
+
+U_CAPI int32_t U_EXPORT2
+uloc_getKeywordValue(const char* localeID,
+ const char* keywordName,
+ char* buffer, int32_t bufferCapacity,
+ UErrorCode* status)
+{
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+
+ CheckedArrayByteSink sink(buffer, bufferCapacity);
+ ulocimp_getKeywordValue(localeID, keywordName, sink, status);
+
+ int32_t reslen = sink.NumberOfBytesAppended();
+
+ if (U_FAILURE(*status)) {
+ return reslen;
+ }
+
+ if (sink.Overflowed()) {
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ } else {
+ u_terminateChars(buffer, bufferCapacity, reslen, status);
+ }
+
+ return reslen;
+}
+
+U_CAPI void U_EXPORT2
+ulocimp_getKeywordValue(const char* localeID,
+ const char* keywordName,
+ icu::ByteSink& sink,
+ UErrorCode* status)
+{
+ const char* startSearchHere = NULL;
+ const char* nextSeparator = NULL;
+ char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
+ char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
+
+ if(status && U_SUCCESS(*status) && localeID) {
+ char tempBuffer[ULOC_FULLNAME_CAPACITY];
+ const char* tmpLocaleID;
+
+ if (keywordName == NULL || keywordName[0] == 0) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ locale_canonKeywordName(keywordNameBuffer, keywordName, status);
+ if(U_FAILURE(*status)) {
+ return;
+ }
+
+ if (_hasBCP47Extension(localeID)) {
+ _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
+ } else {
+ tmpLocaleID=localeID;
+ }
+
+ startSearchHere = locale_getKeywordsStart(tmpLocaleID);
+ if(startSearchHere == NULL) {
+ /* no keywords, return at once */
+ return;
+ }
+
+ /* find the first keyword */
+ while(startSearchHere) {
+ const char* keyValueTail;
+ int32_t keyValueLen;
+
+ startSearchHere++; /* skip @ or ; */
+ nextSeparator = uprv_strchr(startSearchHere, '=');
+ if(!nextSeparator) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
+ return;
+ }
+ /* strip leading & trailing spaces (TC decided to tolerate these) */
+ while(*startSearchHere == ' ') {
+ startSearchHere++;
+ }
+ keyValueTail = nextSeparator;
+ while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
+ keyValueTail--;
+ }
+ /* now keyValueTail points to first char after the keyName */
+ /* copy & normalize keyName from locale */
+ if (startSearchHere == keyValueTail) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
+ return;
+ }
+ keyValueLen = 0;
+ while (startSearchHere < keyValueTail) {
+ if (!UPRV_ISALPHANUM(*startSearchHere)) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
+ return;
+ }
+ if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
+ localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);
+ } else {
+ /* keyword name too long for internal buffer */
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ return;
+ }
+ }
+ localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
+
+ startSearchHere = uprv_strchr(nextSeparator, ';');
+
+ if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
+ /* current entry matches the keyword. */
+ nextSeparator++; /* skip '=' */
+ /* First strip leading & trailing spaces (TC decided to tolerate these) */
+ while(*nextSeparator == ' ') {
+ nextSeparator++;
+ }
+ keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
+ while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
+ keyValueTail--;
+ }
+ /* Now copy the value, but check well-formedness */
+ if (nextSeparator == keyValueTail) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */
+ return;
+ }
+ while (nextSeparator < keyValueTail) {
+ if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
+ return;
+ }
+ /* Should we lowercase value to return here? Tests expect as-is. */
+ sink.Append(nextSeparator++, 1);
+ }
+ return;
+ }
+ }
+ }
+}
+
+U_CAPI int32_t U_EXPORT2
+uloc_setKeywordValue(const char* keywordName,
+ const char* keywordValue,
+ char* buffer, int32_t bufferCapacity,
+ UErrorCode* status)
+{
+ /* TODO: sorting. removal. */
+ int32_t keywordNameLen;
+ int32_t keywordValueLen;
+ int32_t bufLen;
+ int32_t needLen = 0;
+ char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
+ char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];
+ char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
+ int32_t rc;
+ char* nextSeparator = NULL;
+ char* nextEqualsign = NULL;
+ char* startSearchHere = NULL;
+ char* keywordStart = NULL;
+ CharString updatedKeysAndValues;
+ UBool handledInputKeyAndValue = FALSE;
+ char keyValuePrefix = '@';
+
+ if(U_FAILURE(*status)) {
+ return -1;
+ }
+ if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ bufLen = (int32_t)uprv_strlen(buffer);
+ if(bufferCapacity<bufLen) {
+ /* The capacity is less than the length?! Is this NULL terminated? */
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
+ if(U_FAILURE(*status)) {
+ return 0;
+ }
+
+ keywordValueLen = 0;
+ if(keywordValue) {
+ while (*keywordValue != 0) {
+ if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
+ return 0;
+ }
+ if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {
+ /* Should we force lowercase in value to set? */
+ keywordValueBuffer[keywordValueLen++] = *keywordValue++;
+ } else {
+ /* keywordValue too long for internal buffer */
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ return 0;
+ }
+ }
+ }
+ keywordValueBuffer[keywordValueLen] = 0; /* terminate */
+
+ startSearchHere = (char*)locale_getKeywordsStart(buffer);
+ if(startSearchHere == NULL || (startSearchHere[1]==0)) {
+ if(keywordValueLen == 0) { /* no keywords = nothing to remove */
+ return bufLen;
+ }
+
+ needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
+ if(startSearchHere) { /* had a single @ */
+ needLen--; /* already had the @ */
+ /* startSearchHere points at the @ */
+ } else {
+ startSearchHere=buffer+bufLen;
+ }
+ if(needLen >= bufferCapacity) {
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ return needLen; /* no change */
+ }
+ *startSearchHere++ = '@';
+ uprv_strcpy(startSearchHere, keywordNameBuffer);
+ startSearchHere += keywordNameLen;
+ *startSearchHere++ = '=';
+ uprv_strcpy(startSearchHere, keywordValueBuffer);
+ return needLen;
+ } /* end shortcut - no @ */
+
+ keywordStart = startSearchHere;
+ /* search for keyword */
+ while(keywordStart) {
+ const char* keyValueTail;
+ int32_t keyValueLen;
+
+ keywordStart++; /* skip @ or ; */
+ nextEqualsign = uprv_strchr(keywordStart, '=');
+ if (!nextEqualsign) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
+ return 0;
+ }
+ /* strip leading & trailing spaces (TC decided to tolerate these) */
+ while(*keywordStart == ' ') {
+ keywordStart++;
+ }
+ keyValueTail = nextEqualsign;
+ while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') {
+ keyValueTail--;
+ }
+ /* now keyValueTail points to first char after the keyName */
+ /* copy & normalize keyName from locale */
+ if (keywordStart == keyValueTail) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
+ return 0;
+ }
+ keyValueLen = 0;
+ while (keywordStart < keyValueTail) {
+ if (!UPRV_ISALPHANUM(*keywordStart)) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
+ return 0;
+ }
+ if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
+ localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);
+ } else {
+ /* keyword name too long for internal buffer */
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ return 0;
+ }
+ }
+ localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
+
+ nextSeparator = uprv_strchr(nextEqualsign, ';');
+
+ /* start processing the value part */
+ nextEqualsign++; /* skip '=' */
+ /* First strip leading & trailing spaces (TC decided to tolerate these) */
+ while(*nextEqualsign == ' ') {
+ nextEqualsign++;
+ }
+ keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);
+ while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') {
+ keyValueTail--;
+ }
+ if (nextEqualsign == keyValueTail) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */
+ return 0;
+ }
+
+ rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
+ if(rc == 0) {
+ /* Current entry matches the input keyword. Update the entry */
+ if(keywordValueLen > 0) { /* updating a value */
+ updatedKeysAndValues.append(keyValuePrefix, *status);
+ keyValuePrefix = ';'; /* for any subsequent key-value pair */
+ updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
+ updatedKeysAndValues.append('=', *status);
+ updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
+ } /* else removing this entry, don't emit anything */
+ handledInputKeyAndValue = TRUE;
+ } else {
+ /* input keyword sorts earlier than current entry, add before current entry */
+ if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) {
+ /* insert new entry at this location */
+ updatedKeysAndValues.append(keyValuePrefix, *status);
+ keyValuePrefix = ';'; /* for any subsequent key-value pair */
+ updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
+ updatedKeysAndValues.append('=', *status);
+ updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
+ handledInputKeyAndValue = TRUE;
+ }
+ /* copy the current entry */
+ updatedKeysAndValues.append(keyValuePrefix, *status);
+ keyValuePrefix = ';'; /* for any subsequent key-value pair */
+ updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
+ updatedKeysAndValues.append('=', *status);
+ updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), *status);
+ }
+ if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
+ /* append new entry at the end, it sorts later than existing entries */
+ updatedKeysAndValues.append(keyValuePrefix, *status);
+ /* skip keyValuePrefix update, no subsequent key-value pair */
+ updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
+ updatedKeysAndValues.append('=', *status);
+ updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
+ handledInputKeyAndValue = TRUE;
+ }
+ keywordStart = nextSeparator;
+ } /* end loop searching */
+
+ /* Any error from updatedKeysAndValues.append above would be internal and not due to
+ * problems with the passed-in locale. So if we did encounter problems with the
+ * passed-in locale above, those errors took precedence and overrode any error
+ * status from updatedKeysAndValues.append, and also caused a return of 0. If there
+ * are errors here they are from updatedKeysAndValues.append; they do cause an
+ * error return but the passed-in locale is unmodified and the original bufLen is
+ * returned.
+ */
+ if (!handledInputKeyAndValue || U_FAILURE(*status)) {
+ /* if input key/value specified removal of a keyword not present in locale, or
+ * there was an error in CharString.append, leave original locale alone. */
+ return bufLen;
+ }
+
+ // needLen = length of the part before '@'
+ needLen = (int32_t)(startSearchHere - buffer);
+ return needLen + updatedKeysAndValues.extract(
+ startSearchHere, bufferCapacity - needLen, *status);
+}
+
+/* ### ID parsing implementation **************************************************/
+
+#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
+
+/*returns TRUE if one of the special prefixes is here (s=string)
+ 'x-' or 'i-' */
+#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
+
+/* Dot terminates it because of POSIX form where dot precedes the codepage
+ * except for variant
+ */
+#define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
+
+/**
+ * Lookup 'key' in the array 'list'. The array 'list' should contain
+ * a NULL entry, followed by more entries, and a second NULL entry.
+ *
+ * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
+ * COUNTRIES_3.
+ */
+static int16_t _findIndex(const char* const* list, const char* key)
+{
+ const char* const* anchor = list;
+ int32_t pass = 0;
+
+ /* Make two passes through two NULL-terminated arrays at 'list' */
+ while (pass++ < 2) {
+ while (*list) {
+ if (uprv_strcmp(key, *list) == 0) {
+ return (int16_t)(list - anchor);
+ }
+ list++;
+ }
+ ++list; /* skip final NULL *CWB*/
+ }
+ return -1;
+}
+
+U_CFUNC const char*
+uloc_getCurrentCountryID(const char* oldID){
+ int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
+ if (offset >= 0) {
+ return REPLACEMENT_COUNTRIES[offset];
+ }
+ return oldID;
+}
+U_CFUNC const char*
+uloc_getCurrentLanguageID(const char* oldID){
+ int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
+ if (offset >= 0) {
+ return REPLACEMENT_LANGUAGES[offset];
+ }
+ return oldID;
+}
+/*
+ * the internal functions _getLanguage(), _getCountry(), _getVariant()
+ * avoid duplicating code to handle the earlier locale ID pieces
+ * in the functions for the later ones by
+ * setting the *pEnd pointer to where they stopped parsing
+ *
+ * TODO try to use this in Locale
+ */
+CharString U_EXPORT2
+ulocimp_getLanguage(const char *localeID,
+ const char **pEnd,
+ UErrorCode &status) {
+ CharString result;
+
+ if (uprv_stricmp(localeID, "root") == 0) {
+ localeID += 4;
+ } else if (uprv_strnicmp(localeID, "und", 3) == 0 &&
+ (localeID[3] == '\0' ||
+ localeID[3] == '-' ||
+ localeID[3] == '_' ||
+ localeID[3] == '@')) {
+ localeID += 3;
+ }
+
+ /* if it starts with i- or x- then copy that prefix */
+ if(_isIDPrefix(localeID)) {
+ result.append((char)uprv_tolower(*localeID), status);
+ result.append('-', status);
+ localeID+=2;
+ }
+
+ /* copy the language as far as possible and count its length */
+ while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
+ result.append((char)uprv_tolower(*localeID), status);
+ localeID++;
+ }
+
+ if(result.length()==3) {
+ /* convert 3 character code to 2 character code if possible *CWB*/
+ int32_t offset = _findIndex(LANGUAGES_3, result.data());
+ if(offset>=0) {
+ result.clear();
+ result.append(LANGUAGES[offset], status);
+ }
+ }
+
+ if(pEnd!=NULL) {
+ *pEnd=localeID;
+ }
+
+ return result;
+}
+
+CharString U_EXPORT2
+ulocimp_getScript(const char *localeID,
+ const char **pEnd,
+ UErrorCode &status) {
+ CharString result;
+ int32_t idLen = 0;
+
+ if (pEnd != NULL) {
+ *pEnd = localeID;
+ }
+
+ /* copy the second item as far as possible and count its length */
+ while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
+ && uprv_isASCIILetter(localeID[idLen])) {
+ idLen++;
+ }
+
+ /* If it's exactly 4 characters long, then it's a script and not a country. */
+ if (idLen == 4) {
+ int32_t i;
+ if (pEnd != NULL) {
+ *pEnd = localeID+idLen;
+ }
+ if (idLen >= 1) {
+ result.append((char)uprv_toupper(*(localeID++)), status);
+ }
+ for (i = 1; i < idLen; i++) {
+ result.append((char)uprv_tolower(*(localeID++)), status);
+ }
+ }
+
+ return result;
+}
+
+CharString U_EXPORT2
+ulocimp_getCountry(const char *localeID,
+ const char **pEnd,
+ UErrorCode &status) {
+ CharString result;
+ int32_t idLen=0;
+
+ /* copy the country as far as possible and count its length */
+ while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
+ result.append((char)uprv_toupper(localeID[idLen]), status);
+ idLen++;
+ }
+
+ /* the country should be either length 2 or 3 */
+ if (idLen == 2 || idLen == 3) {
+ /* convert 3 character code to 2 character code if possible *CWB*/
+ if(idLen==3) {
+ int32_t offset = _findIndex(COUNTRIES_3, result.data());
+ if(offset>=0) {
+ result.clear();
+ result.append(COUNTRIES[offset], status);
+ }
+ }
+ localeID+=idLen;
+ } else {
+ result.clear();
+ }
+
+ if(pEnd!=NULL) {
+ *pEnd=localeID;
+ }
+
+ return result;
+}
+
+/**
+ * @param needSeparator if true, then add leading '_' if any variants
+ * are added to 'variant'
+ */
+static void
+_getVariant(const char *localeID,
+ char prev,
+ ByteSink& sink,
+ UBool needSeparator) {
+ UBool hasVariant = FALSE;
+
+ /* get one or more variant tags and separate them with '_' */
+ if(_isIDSeparator(prev)) {
+ /* get a variant string after a '-' or '_' */
+ while(!_isTerminator(*localeID)) {
+ if (needSeparator) {
+ sink.Append("_", 1);
+ needSeparator = FALSE;
+ }
+ char c = (char)uprv_toupper(*localeID);
+ if (c == '-') c = '_';
+ sink.Append(&c, 1);
+ hasVariant = TRUE;
+ localeID++;
+ }
+ }
+
+ /* if there is no variant tag after a '-' or '_' then look for '@' */
+ if(!hasVariant) {
+ if(prev=='@') {
+ /* keep localeID */
+ } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
+ ++localeID; /* point after the '@' */
+ } else {
+ return;
+ }
+ while(!_isTerminator(*localeID)) {
+ if (needSeparator) {
+ sink.Append("_", 1);
+ needSeparator = FALSE;
+ }
+ char c = (char)uprv_toupper(*localeID);
+ if (c == '-' || c == ',') c = '_';
+ sink.Append(&c, 1);
+ localeID++;
+ }
+ }
+}
+
+/* Keyword enumeration */
+
+typedef struct UKeywordsContext {
+ char* keywords;
+ char* current;
+} UKeywordsContext;
+
+U_CDECL_BEGIN
+
+static void U_CALLCONV
+uloc_kw_closeKeywords(UEnumeration *enumerator) {
+ uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
+ uprv_free(enumerator->context);
+ uprv_free(enumerator);
+}
+
+static int32_t U_CALLCONV
+uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
+ char *kw = ((UKeywordsContext *)en->context)->keywords;
+ int32_t result = 0;
+ while(*kw) {
+ result++;
+ kw += uprv_strlen(kw)+1;
+ }
+ return result;
+}
+
+static const char * U_CALLCONV
+uloc_kw_nextKeyword(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* /*status*/) {
+ const char* result = ((UKeywordsContext *)en->context)->current;
+ int32_t len = 0;
+ if(*result) {
+ len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
+ ((UKeywordsContext *)en->context)->current += len+1;
+ } else {
+ result = NULL;
+ }
+ if (resultLength) {
+ *resultLength = len;
+ }
+ return result;
+}
+
+static void U_CALLCONV
+uloc_kw_resetKeywords(UEnumeration* en,
+ UErrorCode* /*status*/) {
+ ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
+}
+
+U_CDECL_END
+
+
+static const UEnumeration gKeywordsEnum = {
+ NULL,
+ NULL,
+ uloc_kw_closeKeywords,
+ uloc_kw_countKeywords,
+ uenum_unextDefault,
+ uloc_kw_nextKeyword,
+ uloc_kw_resetKeywords
+};
+
+U_CAPI UEnumeration* U_EXPORT2
+uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
+{
+ LocalMemory<UKeywordsContext> myContext;
+ LocalMemory<UEnumeration> result;
+
+ if (U_FAILURE(*status)) {
+ return nullptr;
+ }
+ myContext.adoptInstead(static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext))));
+ result.adoptInstead(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))));
+ if (myContext.isNull() || result.isNull()) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return nullptr;
+ }
+ uprv_memcpy(result.getAlias(), &gKeywordsEnum, sizeof(UEnumeration));
+ myContext->keywords = static_cast<char *>(uprv_malloc(keywordListSize+1));
+ if (myContext->keywords == nullptr) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return nullptr;
+ }
+ uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
+ myContext->keywords[keywordListSize] = 0;
+ myContext->current = myContext->keywords;
+ result->context = myContext.orphan();
+ return result.orphan();
+}
+
+U_CAPI UEnumeration* U_EXPORT2
+uloc_openKeywords(const char* localeID,
+ UErrorCode* status)
+{
+ char tempBuffer[ULOC_FULLNAME_CAPACITY];
+ const char* tmpLocaleID;
+
+ if(status==NULL || U_FAILURE(*status)) {
+ return 0;
+ }
+
+ if (_hasBCP47Extension(localeID)) {
+ _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
+ } else {
+ if (localeID==NULL) {
+ localeID=uloc_getDefault();
+ }
+ tmpLocaleID=localeID;
+ }
+
+ /* Skip the language */
+ ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *status);
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+
+ if(_isIDSeparator(*tmpLocaleID)) {
+ const char *scriptID;
+ /* Skip the script if available */
+ ulocimp_getScript(tmpLocaleID+1, &scriptID, *status);
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+ if(scriptID != tmpLocaleID+1) {
+ /* Found optional script */
+ tmpLocaleID = scriptID;
+ }
+ /* Skip the Country */
+ if (_isIDSeparator(*tmpLocaleID)) {
+ ulocimp_getCountry(tmpLocaleID+1, &tmpLocaleID, *status);
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+ }
+ }
+
+ /* keywords are located after '@' */
+ if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
+ CharString keywords;
+ CharStringByteSink sink(&keywords);
+ ulocimp_getKeywords(tmpLocaleID+1, '@', sink, FALSE, status);
+ if (U_FAILURE(*status)) {
+ return NULL;
+ }
+ return uloc_openKeywordList(keywords.data(), keywords.length(), status);
+ }
+ return NULL;
+}
+
+
+/* bit-flags for 'options' parameter of _canonicalize */
+#define _ULOC_STRIP_KEYWORDS 0x2
+#define _ULOC_CANONICALIZE 0x1
+
+#define OPTION_SET(options, mask) ((options & mask) != 0)
+
+static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
+#define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
+
+/**
+ * Canonicalize the given localeID, to level 1 or to level 2,
+ * depending on the options. To specify level 1, pass in options=0.
+ * To specify level 2, pass in options=_ULOC_CANONICALIZE.
+ *
+ * This is the code underlying uloc_getName and uloc_canonicalize.
+ */
+static void
+_canonicalize(const char* localeID,
+ ByteSink& sink,
+ uint32_t options,
+ UErrorCode* err) {
+ int32_t j, fieldCount=0, scriptSize=0, variantSize=0;
+ char tempBuffer[ULOC_FULLNAME_CAPACITY];
+ const char* origLocaleID;
+ const char* tmpLocaleID;
+ const char* keywordAssign = NULL;
+ const char* separatorIndicator = NULL;
+
+ if (U_FAILURE(*err)) {
+ return;
+ }
+
+ if (_hasBCP47Extension(localeID)) {
+ _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
+ } else {
+ if (localeID==NULL) {
+ localeID=uloc_getDefault();
+ }
+ tmpLocaleID=localeID;
+ }
+
+ origLocaleID=tmpLocaleID;
+
+ /* get all pieces, one after another, and separate with '_' */
+ CharString tag = ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);
+
+ if (tag.length() == I_DEFAULT_LENGTH &&
+ uprv_strncmp(origLocaleID, i_default, I_DEFAULT_LENGTH) == 0) {
+ tag.clear();
+ tag.append(uloc_getDefault(), *err);
+ } else if(_isIDSeparator(*tmpLocaleID)) {
+ const char *scriptID;
+
+ ++fieldCount;
+ tag.append('_', *err);
+
+ CharString script = ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);
+ tag.append(script, *err);
+ scriptSize = script.length();
+ if(scriptSize > 0) {
+ /* Found optional script */
+ tmpLocaleID = scriptID;
+ ++fieldCount;
+ if (_isIDSeparator(*tmpLocaleID)) {
+ /* If there is something else, then we add the _ */
+ tag.append('_', *err);
+ }
+ }
+
+ if (_isIDSeparator(*tmpLocaleID)) {
+ const char *cntryID;
+
+ CharString country = ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);
+ tag.append(country, *err);
+ if (!country.isEmpty()) {
+ /* Found optional country */
+ tmpLocaleID = cntryID;
+ }
+ if(_isIDSeparator(*tmpLocaleID)) {
+ /* If there is something else, then we add the _ if we found country before. */
+ if (!_isIDSeparator(*(tmpLocaleID+1))) {
+ ++fieldCount;
+ tag.append('_', *err);
+ }
+
+ variantSize = -tag.length();
+ {
+ CharStringByteSink s(&tag);
+ _getVariant(tmpLocaleID+1, *tmpLocaleID, s, FALSE);
+ }
+ variantSize += tag.length();
+ if (variantSize > 0) {
+ tmpLocaleID += variantSize + 1; /* skip '_' and variant */
+ }
+ }
+ }
+ }
+
+ /* Copy POSIX-style charset specifier, if any [mr.utf8] */
+ if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
+ UBool done = FALSE;
+ do {
+ char c = *tmpLocaleID;
+ switch (c) {
+ case 0:
+ case '@':
+ done = TRUE;
+ break;
+ default:
+ tag.append(c, *err);
+ ++tmpLocaleID;
+ break;
+ }
+ } while (!done);
+ }
+
+ /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
+ After this, tmpLocaleID either points to '@' or is NULL */
+ if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
+ keywordAssign = uprv_strchr(tmpLocaleID, '=');
+ separatorIndicator = uprv_strchr(tmpLocaleID, ';');
+ }
+
+ /* Copy POSIX-style variant, if any [mr@FOO] */
+ if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
+ tmpLocaleID != NULL && keywordAssign == NULL) {
+ for (;;) {
+ char c = *tmpLocaleID;
+ if (c == 0) {
+ break;
+ }
+ tag.append(c, *err);
+ ++tmpLocaleID;
+ }
+ }
+
+ if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
+ /* Handle @FOO variant if @ is present and not followed by = */
+ if (tmpLocaleID!=NULL && keywordAssign==NULL) {
+ /* Add missing '_' if needed */
+ if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
+ do {
+ tag.append('_', *err);
+ ++fieldCount;
+ } while(fieldCount<2);
+ }
+
+ int32_t posixVariantSize = -tag.length();
+ {
+ CharStringByteSink s(&tag);
+ _getVariant(tmpLocaleID+1, '@', s, (UBool)(variantSize > 0));
+ }
+ posixVariantSize += tag.length();
+ if (posixVariantSize > 0) {
+ variantSize += posixVariantSize;
+ }
+ }
+
+ /* Look up the ID in the canonicalization map */
+ for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
+ StringPiece id(CANONICALIZE_MAP[j].id);
+ if (tag == id) {
+ if (id.empty() && tmpLocaleID != NULL) {
+ break; /* Don't remap "" if keywords present */
+ }
+ tag.clear();
+ tag.append(CANONICALIZE_MAP[j].canonicalID, *err);
+ break;
+ }
+ }
+ }
+
+ sink.Append(tag.data(), tag.length());
+
+ if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
+ if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
+ (!separatorIndicator || separatorIndicator > keywordAssign)) {
+ sink.Append("@", 1);
+ ++fieldCount;
+ ulocimp_getKeywords(tmpLocaleID+1, '@', sink, TRUE, err);
+ }
+ }
+}
+
+/* ### ID parsing API **************************************************/
+
+U_CAPI int32_t U_EXPORT2
+uloc_getParent(const char* localeID,
+ char* parent,
+ int32_t parentCapacity,
+ UErrorCode* err)
+{
+ const char *lastUnderscore;
+ int32_t i;
+
+ if (U_FAILURE(*err))
+ return 0;
+
+ if (localeID == NULL)
+ localeID = uloc_getDefault();
+
+ lastUnderscore=uprv_strrchr(localeID, '_');
+ if(lastUnderscore!=NULL) {
+ i=(int32_t)(lastUnderscore-localeID);
+ } else {
+ i=0;
+ }
+
+ if (i > 0) {
+ if (uprv_strnicmp(localeID, "und_", 4) == 0) {
+ localeID += 3;
+ i -= 3;
+ uprv_memmove(parent, localeID, uprv_min(i, parentCapacity));
+ } else if (parent != localeID) {
+ uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
+ }
+ }
+
+ return u_terminateChars(parent, parentCapacity, i, err);
+}
+
+U_CAPI int32_t U_EXPORT2
+uloc_getLanguage(const char* localeID,
+ char* language,
+ int32_t languageCapacity,
+ UErrorCode* err)
+{
+ /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
+
+ if (err==NULL || U_FAILURE(*err)) {
+ return 0;
+ }
+
+ if(localeID==NULL) {
+ localeID=uloc_getDefault();
+ }
+
+ return ulocimp_getLanguage(localeID, NULL, *err).extract(language, languageCapacity, *err);
+}
+
+U_CAPI int32_t U_EXPORT2
+uloc_getScript(const char* localeID,
+ char* script,
+ int32_t scriptCapacity,
+ UErrorCode* err)
+{
+ if(err==NULL || U_FAILURE(*err)) {
+ return 0;
+ }
+
+ if(localeID==NULL) {
+ localeID=uloc_getDefault();
+ }
+
+ /* skip the language */
+ ulocimp_getLanguage(localeID, &localeID, *err);
+ if (U_FAILURE(*err)) {
+ return 0;
+ }
+
+ if(_isIDSeparator(*localeID)) {
+ return ulocimp_getScript(localeID+1, NULL, *err).extract(script, scriptCapacity, *err);
+ }
+ return u_terminateChars(script, scriptCapacity, 0, err);
+}
+
+U_CAPI int32_t U_EXPORT2
+uloc_getCountry(const char* localeID,
+ char* country,
+ int32_t countryCapacity,
+ UErrorCode* err)
+{
+ if(err==NULL || U_FAILURE(*err)) {
+ return 0;
+ }
+
+ if(localeID==NULL) {
+ localeID=uloc_getDefault();
+ }
+
+ /* Skip the language */
+ ulocimp_getLanguage(localeID, &localeID, *err);
+ if (U_FAILURE(*err)) {
+ return 0;
+ }
+
+ if(_isIDSeparator(*localeID)) {
+ const char *scriptID;
+ /* Skip the script if available */
+ ulocimp_getScript(localeID+1, &scriptID, *err);
+ if (U_FAILURE(*err)) {
+ return 0;
+ }
+ if(scriptID != localeID+1) {
+ /* Found optional script */
+ localeID = scriptID;
+ }
+ if(_isIDSeparator(*localeID)) {
+ return ulocimp_getCountry(localeID+1, NULL, *err).extract(country, countryCapacity, *err);
+ }
+ }
+ return u_terminateChars(country, countryCapacity, 0, err);
+}
+
+U_CAPI int32_t U_EXPORT2
+uloc_getVariant(const char* localeID,
+ char* variant,
+ int32_t variantCapacity,
+ UErrorCode* err)
+{
+ char tempBuffer[ULOC_FULLNAME_CAPACITY];
+ const char* tmpLocaleID;
+ int32_t i=0;
+
+ if(err==NULL || U_FAILURE(*err)) {
+ return 0;
+ }
+
+ if (_hasBCP47Extension(localeID)) {
+ _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
+ } else {
+ if (localeID==NULL) {
+ localeID=uloc_getDefault();
+ }
+ tmpLocaleID=localeID;
+ }
+
+ /* Skip the language */
+ ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);
+ if (U_FAILURE(*err)) {
+ return 0;
+ }
+
+ if(_isIDSeparator(*tmpLocaleID)) {
+ const char *scriptID;
+ /* Skip the script if available */
+ ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);
+ if (U_FAILURE(*err)) {
+ return 0;
+ }
+ if(scriptID != tmpLocaleID+1) {
+ /* Found optional script */
+ tmpLocaleID = scriptID;
+ }
+ /* Skip the Country */
+ if (_isIDSeparator(*tmpLocaleID)) {
+ const char *cntryID;
+ ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);
+ if (U_FAILURE(*err)) {
+ return 0;
+ }
+ if (cntryID != tmpLocaleID+1) {
+ /* Found optional country */
+ tmpLocaleID = cntryID;
+ }
+ if(_isIDSeparator(*tmpLocaleID)) {
+ /* If there was no country ID, skip a possible extra IDSeparator */
+ if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
+ tmpLocaleID++;
+ }
+
+ CheckedArrayByteSink sink(variant, variantCapacity);
+ _getVariant(tmpLocaleID+1, *tmpLocaleID, sink, FALSE);
+
+ i = sink.NumberOfBytesAppended();
+
+ if (U_FAILURE(*err)) {
+ return i;
+ }
+
+ if (sink.Overflowed()) {
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ return i;
+ }
+ }
+ }
+ }
+
+ return u_terminateChars(variant, variantCapacity, i, err);
+}
+
+U_CAPI int32_t U_EXPORT2
+uloc_getName(const char* localeID,
+ char* name,
+ int32_t nameCapacity,
+ UErrorCode* err)
+{
+ if (U_FAILURE(*err)) {
+ return 0;
+ }
+
+ CheckedArrayByteSink sink(name, nameCapacity);
+ ulocimp_getName(localeID, sink, err);
+
+ int32_t reslen = sink.NumberOfBytesAppended();
+
+ if (U_FAILURE(*err)) {
+ return reslen;
+ }
+
+ if (sink.Overflowed()) {
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ } else {
+ u_terminateChars(name, nameCapacity, reslen, err);
+ }
+
+ return reslen;
+}
+
+U_CAPI void U_EXPORT2
+ulocimp_getName(const char* localeID,
+ ByteSink& sink,
+ UErrorCode* err)
+{
+ _canonicalize(localeID, sink, 0, err);
+}
+
+U_CAPI int32_t U_EXPORT2
+uloc_getBaseName(const char* localeID,
+ char* name,
+ int32_t nameCapacity,
+ UErrorCode* err)
+{
+ if (U_FAILURE(*err)) {
+ return 0;
+ }
+
+ CheckedArrayByteSink sink(name, nameCapacity);
+ ulocimp_getBaseName(localeID, sink, err);
+
+ int32_t reslen = sink.NumberOfBytesAppended();
+
+ if (U_FAILURE(*err)) {
+ return reslen;
+ }
+
+ if (sink.Overflowed()) {
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ } else {
+ u_terminateChars(name, nameCapacity, reslen, err);
+ }
+
+ return reslen;
+}
+
+U_CAPI void U_EXPORT2
+ulocimp_getBaseName(const char* localeID,
+ ByteSink& sink,
+ UErrorCode* err)
+{
+ _canonicalize(localeID, sink, _ULOC_STRIP_KEYWORDS, err);
+}
+
+U_CAPI int32_t U_EXPORT2
+uloc_canonicalize(const char* localeID,
+ char* name,
+ int32_t nameCapacity,
+ UErrorCode* err)
+{
+ if (U_FAILURE(*err)) {
+ return 0;
+ }
+
+ CheckedArrayByteSink sink(name, nameCapacity);
+ ulocimp_canonicalize(localeID, sink, err);
+
+ int32_t reslen = sink.NumberOfBytesAppended();
+
+ if (U_FAILURE(*err)) {
+ return reslen;
+ }
+
+ if (sink.Overflowed()) {
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ } else {
+ u_terminateChars(name, nameCapacity, reslen, err);
+ }
+
+ return reslen;
+}
+
+U_CAPI void U_EXPORT2
+ulocimp_canonicalize(const char* localeID,
+ ByteSink& sink,
+ UErrorCode* err)
+{
+ _canonicalize(localeID, sink, _ULOC_CANONICALIZE, err);
+}
+
+U_CAPI const char* U_EXPORT2
+uloc_getISO3Language(const char* localeID)
+{
+ int16_t offset;
+ char lang[ULOC_LANG_CAPACITY];
+ UErrorCode err = U_ZERO_ERROR;
+
+ if (localeID == NULL)
+ {
+ localeID = uloc_getDefault();
+ }
+ uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
+ if (U_FAILURE(err))
+ return "";
+ offset = _findIndex(LANGUAGES, lang);
+ if (offset < 0)
+ return "";
+ return LANGUAGES_3[offset];
+}
+
+U_CAPI const char* U_EXPORT2
+uloc_getISO3Country(const char* localeID)
+{
+ int16_t offset;
+ char cntry[ULOC_LANG_CAPACITY];
+ UErrorCode err = U_ZERO_ERROR;
+
+ if (localeID == NULL)
+ {
+ localeID = uloc_getDefault();
+ }
+ uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
+ if (U_FAILURE(err))
+ return "";
+ offset = _findIndex(COUNTRIES, cntry);
+ if (offset < 0)
+ return "";
+
+ return COUNTRIES_3[offset];
+}
+
+U_CAPI uint32_t U_EXPORT2
+uloc_getLCID(const char* localeID)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ char langID[ULOC_FULLNAME_CAPACITY];
+ uint32_t lcid = 0;
+
+ /* Check for incomplete id. */
+ if (!localeID || uprv_strlen(localeID) < 2) {
+ return 0;
+ }
+
+ // First, attempt Windows platform lookup if available, but fall
+ // through to catch any special cases (ICU vs Windows name differences).
+ lcid = uprv_convertToLCIDPlatform(localeID, &status);
+ if (U_FAILURE(status)) {
+ return 0;
+ }
+ if (lcid > 0) {
+ // Windows found an LCID, return that
+ return lcid;
+ }
+
+ uloc_getLanguage(localeID, langID, sizeof(langID), &status);
+ if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
+ return 0;
+ }
+
+ if (uprv_strchr(localeID, '@')) {
+ // uprv_convertToLCID does not support keywords other than collation.
+ // Remove all keywords except collation.
+ int32_t len;
+ char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
+
+ CharString collVal;
+ {
+ CharStringByteSink sink(&collVal);
+ ulocimp_getKeywordValue(localeID, "collation", sink, &status);
+ }
+
+ if (U_SUCCESS(status) && !collVal.isEmpty()) {
+ len = uloc_getBaseName(localeID, tmpLocaleID,
+ UPRV_LENGTHOF(tmpLocaleID) - 1, &status);
+
+ if (U_SUCCESS(status) && len > 0) {
+ tmpLocaleID[len] = 0;
+
+ len = uloc_setKeywordValue("collation", collVal.data(), tmpLocaleID,
+ UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);
+
+ if (U_SUCCESS(status) && len > 0) {
+ tmpLocaleID[len] = 0;
+ return uprv_convertToLCID(langID, tmpLocaleID, &status);
+ }
+ }
+ }
+
+ // fall through - all keywords are simply ignored
+ status = U_ZERO_ERROR;
+ }
+
+ return uprv_convertToLCID(langID, localeID, &status);
+}
+
+U_CAPI int32_t U_EXPORT2
+uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
+ UErrorCode *status)
+{
+ return uprv_convertToPosix(hostid, locale, localeCapacity, status);
+}
+
+/* ### Default locale **************************************************/
+
+U_CAPI const char* U_EXPORT2
+uloc_getDefault()
+{
+ return locale_get_default();
+}
+
+U_CAPI void U_EXPORT2
+uloc_setDefault(const char* newDefaultLocale,
+ UErrorCode* err)
+{
+ if (U_FAILURE(*err))
+ return;
+ /* the error code isn't currently used for anything by this function*/
+
+ /* propagate change to C++ */
+ locale_set_default(newDefaultLocale);
+}
+
+/**
+ * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
+ * to an array of pointers to arrays of char. All of these pointers are owned
+ * by ICU-- do not delete them, and do not write through them. The array is
+ * terminated with a null pointer.
+ */
+U_CAPI const char* const* U_EXPORT2
+uloc_getISOLanguages()
+{
+ return LANGUAGES;
+}
+
+/**
+ * Returns a list of all 2-letter country codes defined in ISO 639. This is a
+ * pointer to an array of pointers to arrays of char. All of these pointers are
+ * owned by ICU-- do not delete them, and do not write through them. The array is
+ * terminated with a null pointer.
+ */
+U_CAPI const char* const* U_EXPORT2
+uloc_getISOCountries()
+{
+ return COUNTRIES;
+}
+
+U_CAPI const char* U_EXPORT2
+uloc_toUnicodeLocaleKey(const char* keyword)
+{
+ const char* bcpKey = ulocimp_toBcpKey(keyword);
+ if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
+ // unknown keyword, but syntax is fine..
+ return keyword;
+ }
+ return bcpKey;
+}
+
+U_CAPI const char* U_EXPORT2
+uloc_toUnicodeLocaleType(const char* keyword, const char* value)
+{
+ const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
+ if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
+ // unknown keyword, but syntax is fine..
+ return value;
+ }
+ return bcpType;
+}
+
+static UBool
+isWellFormedLegacyKey(const char* legacyKey)
+{
+ const char* p = legacyKey;
+ while (*p) {
+ if (!UPRV_ISALPHANUM(*p)) {
+ return FALSE;
+ }
+ p++;
+ }
+ return TRUE;
+}
+
+static UBool
+isWellFormedLegacyType(const char* legacyType)
+{
+ const char* p = legacyType;
+ int32_t alphaNumLen = 0;
+ while (*p) {
+ if (*p == '_' || *p == '/' || *p == '-') {
+ if (alphaNumLen == 0) {
+ return FALSE;
+ }
+ alphaNumLen = 0;
+ } else if (UPRV_ISALPHANUM(*p)) {
+ alphaNumLen++;
+ } else {
+ return FALSE;
+ }
+ p++;
+ }
+ return (alphaNumLen != 0);
+}
+
+U_CAPI const char* U_EXPORT2
+uloc_toLegacyKey(const char* keyword)
+{
+ const char* legacyKey = ulocimp_toLegacyKey(keyword);
+ if (legacyKey == NULL) {
+ // Checks if the specified locale key is well-formed with the legacy locale syntax.
+ //
+ // Note:
+ // LDML/CLDR provides some definition of keyword syntax in
+ // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
+ // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
+ // Keys can only consist of [0-9a-zA-Z].
+ if (isWellFormedLegacyKey(keyword)) {
+ return keyword;
+ }
+ }
+ return legacyKey;
+}
+
+U_CAPI const char* U_EXPORT2
+uloc_toLegacyType(const char* keyword, const char* value)
+{
+ const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
+ if (legacyType == NULL) {
+ // Checks if the specified locale type is well-formed with the legacy locale syntax.
+ //
+ // Note:
+ // LDML/CLDR provides some definition of keyword syntax in
+ // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
+ // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
+ // Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
+ // we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
+ if (isWellFormedLegacyType(value)) {
+ return value;
+ }
+ }
+ return legacyType;
+}
+
+/*eof*/
diff --git a/thirdparty/icu4c/common/uloc_keytype.cpp b/thirdparty/icu4c/common/uloc_keytype.cpp
new file mode 100644
index 0000000000..019da058cf
--- /dev/null
+++ b/thirdparty/icu4c/common/uloc_keytype.cpp
@@ -0,0 +1,534 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2014-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+#include <algorithm>
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+#include "unicode/uobject.h"
+
+#include "charstr.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "uassert.h"
+#include "ucln_cmn.h"
+#include "uhash.h"
+#include "umutex.h"
+#include "uresimp.h"
+#include "uvector.h"
+#include "udataswp.h" /* for InvChar functions */
+
+static UHashtable* gLocExtKeyMap = NULL;
+static icu::UInitOnce gLocExtKeyMapInitOnce = U_INITONCE_INITIALIZER;
+
+// bit flags for special types
+typedef enum {
+ SPECIALTYPE_NONE = 0,
+ SPECIALTYPE_CODEPOINTS = 1,
+ SPECIALTYPE_REORDER_CODE = 2,
+ SPECIALTYPE_RG_KEY_VALUE = 4
+} SpecialType;
+
+struct LocExtKeyData : public icu::UMemory {
+ const char* legacyId;
+ const char* bcpId;
+ icu::LocalUHashtablePointer typeMap;
+ uint32_t specialTypes;
+};
+
+struct LocExtType : public icu::UMemory {
+ const char* legacyId;
+ const char* bcpId;
+};
+
+static icu::MemoryPool<icu::CharString>* gKeyTypeStringPool = NULL;
+static icu::MemoryPool<LocExtKeyData>* gLocExtKeyDataEntries = NULL;
+static icu::MemoryPool<LocExtType>* gLocExtTypeEntries = NULL;
+
+U_CDECL_BEGIN
+
+static UBool U_CALLCONV
+uloc_key_type_cleanup(void) {
+ if (gLocExtKeyMap != NULL) {
+ uhash_close(gLocExtKeyMap);
+ gLocExtKeyMap = NULL;
+ }
+
+ delete gLocExtKeyDataEntries;
+ gLocExtKeyDataEntries = NULL;
+
+ delete gLocExtTypeEntries;
+ gLocExtTypeEntries = NULL;
+
+ delete gKeyTypeStringPool;
+ gKeyTypeStringPool = NULL;
+
+ gLocExtKeyMapInitOnce.reset();
+ return TRUE;
+}
+
+U_CDECL_END
+
+
+static void U_CALLCONV
+initFromResourceBundle(UErrorCode& sts) {
+ U_NAMESPACE_USE
+ ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KEY_TYPE, uloc_key_type_cleanup);
+
+ gLocExtKeyMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);
+
+ LocalUResourceBundlePointer keyTypeDataRes(ures_openDirect(NULL, "keyTypeData", &sts));
+ LocalUResourceBundlePointer keyMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "keyMap", NULL, &sts));
+ LocalUResourceBundlePointer typeMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeMap", NULL, &sts));
+
+ if (U_FAILURE(sts)) {
+ return;
+ }
+
+ UErrorCode tmpSts = U_ZERO_ERROR;
+ LocalUResourceBundlePointer typeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeAlias", NULL, &tmpSts));
+ tmpSts = U_ZERO_ERROR;
+ LocalUResourceBundlePointer bcpTypeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "bcpTypeAlias", NULL, &tmpSts));
+
+ // initialize pools storing dynamically allocated objects
+ gKeyTypeStringPool = new icu::MemoryPool<icu::CharString>;
+ if (gKeyTypeStringPool == NULL) {
+ sts = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ gLocExtKeyDataEntries = new icu::MemoryPool<LocExtKeyData>;
+ if (gLocExtKeyDataEntries == NULL) {
+ sts = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ gLocExtTypeEntries = new icu::MemoryPool<LocExtType>;
+ if (gLocExtTypeEntries == NULL) {
+ sts = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ // iterate through keyMap resource
+ LocalUResourceBundlePointer keyMapEntry;
+
+ while (ures_hasNext(keyMapRes.getAlias())) {
+ keyMapEntry.adoptInstead(ures_getNextResource(keyMapRes.getAlias(), keyMapEntry.orphan(), &sts));
+ if (U_FAILURE(sts)) {
+ break;
+ }
+ const char* legacyKeyId = ures_getKey(keyMapEntry.getAlias());
+ UnicodeString uBcpKeyId = ures_getUnicodeString(keyMapEntry.getAlias(), &sts);
+ if (U_FAILURE(sts)) {
+ break;
+ }
+
+ // empty value indicates that BCP key is same with the legacy key.
+ const char* bcpKeyId = legacyKeyId;
+ if (!uBcpKeyId.isEmpty()) {
+ icu::CharString* bcpKeyIdBuf = gKeyTypeStringPool->create();
+ if (bcpKeyIdBuf == NULL) {
+ sts = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ bcpKeyIdBuf->appendInvariantChars(uBcpKeyId, sts);
+ if (U_FAILURE(sts)) {
+ break;
+ }
+ bcpKeyId = bcpKeyIdBuf->data();
+ }
+
+ UBool isTZ = uprv_strcmp(legacyKeyId, "timezone") == 0;
+
+ UHashtable* typeDataMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);
+ if (U_FAILURE(sts)) {
+ break;
+ }
+ uint32_t specialTypes = SPECIALTYPE_NONE;
+
+ LocalUResourceBundlePointer typeAliasResByKey;
+ LocalUResourceBundlePointer bcpTypeAliasResByKey;
+
+ if (typeAliasRes.isValid()) {
+ tmpSts = U_ZERO_ERROR;
+ typeAliasResByKey.adoptInstead(ures_getByKey(typeAliasRes.getAlias(), legacyKeyId, NULL, &tmpSts));
+ if (U_FAILURE(tmpSts)) {
+ typeAliasResByKey.orphan();
+ }
+ }
+ if (bcpTypeAliasRes.isValid()) {
+ tmpSts = U_ZERO_ERROR;
+ bcpTypeAliasResByKey.adoptInstead(ures_getByKey(bcpTypeAliasRes.getAlias(), bcpKeyId, NULL, &tmpSts));
+ if (U_FAILURE(tmpSts)) {
+ bcpTypeAliasResByKey.orphan();
+ }
+ }
+
+ // look up type map for the key, and walk through the mapping data
+ tmpSts = U_ZERO_ERROR;
+ LocalUResourceBundlePointer typeMapResByKey(ures_getByKey(typeMapRes.getAlias(), legacyKeyId, NULL, &tmpSts));
+ if (U_FAILURE(tmpSts)) {
+ // type map for each key must exist
+ UPRV_UNREACHABLE;
+ } else {
+ LocalUResourceBundlePointer typeMapEntry;
+
+ while (ures_hasNext(typeMapResByKey.getAlias())) {
+ typeMapEntry.adoptInstead(ures_getNextResource(typeMapResByKey.getAlias(), typeMapEntry.orphan(), &sts));
+ if (U_FAILURE(sts)) {
+ break;
+ }
+ const char* legacyTypeId = ures_getKey(typeMapEntry.getAlias());
+
+ // special types
+ if (uprv_strcmp(legacyTypeId, "CODEPOINTS") == 0) {
+ specialTypes |= SPECIALTYPE_CODEPOINTS;
+ continue;
+ }
+ if (uprv_strcmp(legacyTypeId, "REORDER_CODE") == 0) {
+ specialTypes |= SPECIALTYPE_REORDER_CODE;
+ continue;
+ }
+ if (uprv_strcmp(legacyTypeId, "RG_KEY_VALUE") == 0) {
+ specialTypes |= SPECIALTYPE_RG_KEY_VALUE;
+ continue;
+ }
+
+ if (isTZ) {
+ // a timezone key uses a colon instead of a slash in the resource.
+ // e.g. America:Los_Angeles
+ if (uprv_strchr(legacyTypeId, ':') != NULL) {
+ icu::CharString* legacyTypeIdBuf =
+ gKeyTypeStringPool->create(legacyTypeId, sts);
+ if (legacyTypeIdBuf == NULL) {
+ sts = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ if (U_FAILURE(sts)) {
+ break;
+ }
+ std::replace(
+ legacyTypeIdBuf->data(),
+ legacyTypeIdBuf->data() + legacyTypeIdBuf->length(),
+ ':', '/');
+ legacyTypeId = legacyTypeIdBuf->data();
+ }
+ }
+
+ UnicodeString uBcpTypeId = ures_getUnicodeString(typeMapEntry.getAlias(), &sts);
+ if (U_FAILURE(sts)) {
+ break;
+ }
+
+ // empty value indicates that BCP type is same with the legacy type.
+ const char* bcpTypeId = legacyTypeId;
+ if (!uBcpTypeId.isEmpty()) {
+ icu::CharString* bcpTypeIdBuf = gKeyTypeStringPool->create();
+ if (bcpTypeIdBuf == NULL) {
+ sts = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ bcpTypeIdBuf->appendInvariantChars(uBcpTypeId, sts);
+ if (U_FAILURE(sts)) {
+ break;
+ }
+ bcpTypeId = bcpTypeIdBuf->data();
+ }
+
+ // Note: legacy type value should never be
+ // equivalent to bcp type value of a different
+ // type under the same key. So we use a single
+ // map for lookup.
+ LocExtType* t = gLocExtTypeEntries->create();
+ if (t == NULL) {
+ sts = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ t->bcpId = bcpTypeId;
+ t->legacyId = legacyTypeId;
+
+ uhash_put(typeDataMap, (void*)legacyTypeId, t, &sts);
+ if (bcpTypeId != legacyTypeId) {
+ // different type value
+ uhash_put(typeDataMap, (void*)bcpTypeId, t, &sts);
+ }
+ if (U_FAILURE(sts)) {
+ break;
+ }
+
+ // also put aliases in the map
+ if (typeAliasResByKey.isValid()) {
+ LocalUResourceBundlePointer typeAliasDataEntry;
+
+ ures_resetIterator(typeAliasResByKey.getAlias());
+ while (ures_hasNext(typeAliasResByKey.getAlias()) && U_SUCCESS(sts)) {
+ int32_t toLen;
+ typeAliasDataEntry.adoptInstead(ures_getNextResource(typeAliasResByKey.getAlias(), typeAliasDataEntry.orphan(), &sts));
+ const UChar* to = ures_getString(typeAliasDataEntry.getAlias(), &toLen, &sts);
+ if (U_FAILURE(sts)) {
+ break;
+ }
+ // check if this is an alias of canoncal legacy type
+ if (uprv_compareInvWithUChar(NULL, legacyTypeId, -1, to, toLen) == 0) {
+ const char* from = ures_getKey(typeAliasDataEntry.getAlias());
+ if (isTZ) {
+ // replace colon with slash if necessary
+ if (uprv_strchr(from, ':') != NULL) {
+ icu::CharString* fromBuf =
+ gKeyTypeStringPool->create(from, sts);
+ if (fromBuf == NULL) {
+ sts = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ if (U_FAILURE(sts)) {
+ break;
+ }
+ std::replace(
+ fromBuf->data(),
+ fromBuf->data() + fromBuf->length(),
+ ':', '/');
+ from = fromBuf->data();
+ }
+ }
+ uhash_put(typeDataMap, (void*)from, t, &sts);
+ }
+ }
+ if (U_FAILURE(sts)) {
+ break;
+ }
+ }
+
+ if (bcpTypeAliasResByKey.isValid()) {
+ LocalUResourceBundlePointer bcpTypeAliasDataEntry;
+
+ ures_resetIterator(bcpTypeAliasResByKey.getAlias());
+ while (ures_hasNext(bcpTypeAliasResByKey.getAlias()) && U_SUCCESS(sts)) {
+ int32_t toLen;
+ bcpTypeAliasDataEntry.adoptInstead(ures_getNextResource(bcpTypeAliasResByKey.getAlias(), bcpTypeAliasDataEntry.orphan(), &sts));
+ const UChar* to = ures_getString(bcpTypeAliasDataEntry.getAlias(), &toLen, &sts);
+ if (U_FAILURE(sts)) {
+ break;
+ }
+ // check if this is an alias of bcp type
+ if (uprv_compareInvWithUChar(NULL, bcpTypeId, -1, to, toLen) == 0) {
+ const char* from = ures_getKey(bcpTypeAliasDataEntry.getAlias());
+ uhash_put(typeDataMap, (void*)from, t, &sts);
+ }
+ }
+ if (U_FAILURE(sts)) {
+ break;
+ }
+ }
+ }
+ }
+ if (U_FAILURE(sts)) {
+ break;
+ }
+
+ LocExtKeyData* keyData = gLocExtKeyDataEntries->create();
+ if (keyData == NULL) {
+ sts = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ keyData->bcpId = bcpKeyId;
+ keyData->legacyId = legacyKeyId;
+ keyData->specialTypes = specialTypes;
+ keyData->typeMap.adoptInstead(typeDataMap);
+
+ uhash_put(gLocExtKeyMap, (void*)legacyKeyId, keyData, &sts);
+ if (legacyKeyId != bcpKeyId) {
+ // different key value
+ uhash_put(gLocExtKeyMap, (void*)bcpKeyId, keyData, &sts);
+ }
+ if (U_FAILURE(sts)) {
+ break;
+ }
+ }
+}
+
+static UBool
+init() {
+ UErrorCode sts = U_ZERO_ERROR;
+ umtx_initOnce(gLocExtKeyMapInitOnce, &initFromResourceBundle, sts);
+ if (U_FAILURE(sts)) {
+ return FALSE;
+ }
+ return TRUE;
+}
+
+static UBool
+isSpecialTypeCodepoints(const char* val) {
+ int32_t subtagLen = 0;
+ const char* p = val;
+ while (*p) {
+ if (*p == '-') {
+ if (subtagLen < 4 || subtagLen > 6) {
+ return FALSE;
+ }
+ subtagLen = 0;
+ } else if ((*p >= '0' && *p <= '9') ||
+ (*p >= 'A' && *p <= 'F') || // A-F/a-f are contiguous
+ (*p >= 'a' && *p <= 'f')) { // also in EBCDIC
+ subtagLen++;
+ } else {
+ return FALSE;
+ }
+ p++;
+ }
+ return (subtagLen >= 4 && subtagLen <= 6);
+}
+
+static UBool
+isSpecialTypeReorderCode(const char* val) {
+ int32_t subtagLen = 0;
+ const char* p = val;
+ while (*p) {
+ if (*p == '-') {
+ if (subtagLen < 3 || subtagLen > 8) {
+ return FALSE;
+ }
+ subtagLen = 0;
+ } else if (uprv_isASCIILetter(*p)) {
+ subtagLen++;
+ } else {
+ return FALSE;
+ }
+ p++;
+ }
+ return (subtagLen >=3 && subtagLen <=8);
+}
+
+static UBool
+isSpecialTypeRgKeyValue(const char* val) {
+ int32_t subtagLen = 0;
+ const char* p = val;
+ while (*p) {
+ if ( (subtagLen < 2 && uprv_isASCIILetter(*p)) ||
+ (subtagLen >= 2 && (*p == 'Z' || *p == 'z')) ) {
+ subtagLen++;
+ } else {
+ return FALSE;
+ }
+ p++;
+ }
+ return (subtagLen == 6);
+}
+
+U_CFUNC const char*
+ulocimp_toBcpKey(const char* key) {
+ if (!init()) {
+ return NULL;
+ }
+
+ LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
+ if (keyData != NULL) {
+ return keyData->bcpId;
+ }
+ return NULL;
+}
+
+U_CFUNC const char*
+ulocimp_toLegacyKey(const char* key) {
+ if (!init()) {
+ return NULL;
+ }
+
+ LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
+ if (keyData != NULL) {
+ return keyData->legacyId;
+ }
+ return NULL;
+}
+
+U_CFUNC const char*
+ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
+ if (isKnownKey != NULL) {
+ *isKnownKey = FALSE;
+ }
+ if (isSpecialType != NULL) {
+ *isSpecialType = FALSE;
+ }
+
+ if (!init()) {
+ return NULL;
+ }
+
+ LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
+ if (keyData != NULL) {
+ if (isKnownKey != NULL) {
+ *isKnownKey = TRUE;
+ }
+ LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap.getAlias(), type);
+ if (t != NULL) {
+ return t->bcpId;
+ }
+ if (keyData->specialTypes != SPECIALTYPE_NONE) {
+ UBool matched = FALSE;
+ if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
+ matched = isSpecialTypeCodepoints(type);
+ }
+ if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {
+ matched = isSpecialTypeReorderCode(type);
+ }
+ if (!matched && keyData->specialTypes & SPECIALTYPE_RG_KEY_VALUE) {
+ matched = isSpecialTypeRgKeyValue(type);
+ }
+ if (matched) {
+ if (isSpecialType != NULL) {
+ *isSpecialType = TRUE;
+ }
+ return type;
+ }
+ }
+ }
+ return NULL;
+}
+
+
+U_CFUNC const char*
+ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
+ if (isKnownKey != NULL) {
+ *isKnownKey = FALSE;
+ }
+ if (isSpecialType != NULL) {
+ *isSpecialType = FALSE;
+ }
+
+ if (!init()) {
+ return NULL;
+ }
+
+ LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
+ if (keyData != NULL) {
+ if (isKnownKey != NULL) {
+ *isKnownKey = TRUE;
+ }
+ LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap.getAlias(), type);
+ if (t != NULL) {
+ return t->legacyId;
+ }
+ if (keyData->specialTypes != SPECIALTYPE_NONE) {
+ UBool matched = FALSE;
+ if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
+ matched = isSpecialTypeCodepoints(type);
+ }
+ if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {
+ matched = isSpecialTypeReorderCode(type);
+ }
+ if (!matched && keyData->specialTypes & SPECIALTYPE_RG_KEY_VALUE) {
+ matched = isSpecialTypeRgKeyValue(type);
+ }
+ if (matched) {
+ if (isSpecialType != NULL) {
+ *isSpecialType = TRUE;
+ }
+ return type;
+ }
+ }
+ }
+ return NULL;
+}
+
diff --git a/thirdparty/icu4c/common/uloc_tag.cpp b/thirdparty/icu4c/common/uloc_tag.cpp
new file mode 100644
index 0000000000..7f7fd9119e
--- /dev/null
+++ b/thirdparty/icu4c/common/uloc_tag.cpp
@@ -0,0 +1,2844 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2009-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+
+#include "unicode/bytestream.h"
+#include "unicode/utypes.h"
+#include "unicode/ures.h"
+#include "unicode/localpointer.h"
+#include "unicode/putil.h"
+#include "unicode/uenum.h"
+#include "unicode/uloc.h"
+#include "ustr_imp.h"
+#include "bytesinkutil.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "putilimp.h"
+#include "uinvchar.h"
+#include "ulocimp.h"
+#include "uassert.h"
+
+
+/* struct holding a single variant */
+typedef struct VariantListEntry {
+ const char *variant;
+ struct VariantListEntry *next;
+} VariantListEntry;
+
+/* struct holding a single attribute value */
+struct AttributeListEntry : public icu::UMemory {
+ const char *attribute;
+ struct AttributeListEntry *next;
+};
+
+/* struct holding a single extension */
+struct ExtensionListEntry : public icu::UMemory {
+ const char *key;
+ const char *value;
+ struct ExtensionListEntry *next;
+};
+
+#define MAXEXTLANG 3
+typedef struct ULanguageTag {
+ char *buf; /* holding parsed subtags */
+ const char *language;
+ const char *extlang[MAXEXTLANG];
+ const char *script;
+ const char *region;
+ VariantListEntry *variants;
+ ExtensionListEntry *extensions;
+ const char *privateuse;
+ const char *legacy;
+} ULanguageTag;
+
+#define MINLEN 2
+#define SEP '-'
+#define PRIVATEUSE 'x'
+#define LDMLEXT 'u'
+
+#define LOCALE_SEP '_'
+#define LOCALE_EXT_SEP '@'
+#define LOCALE_KEYWORD_SEP ';'
+#define LOCALE_KEY_TYPE_SEP '='
+
+#define ISALPHA(c) uprv_isASCIILetter(c)
+#define ISNUMERIC(c) ((c)>='0' && (c)<='9')
+
+static const char EMPTY[] = "";
+static const char LANG_UND[] = "und";
+static const char PRIVATEUSE_KEY[] = "x";
+static const char _POSIX[] = "_POSIX";
+static const char POSIX_KEY[] = "va";
+static const char POSIX_VALUE[] = "posix";
+static const char LOCALE_ATTRIBUTE_KEY[] = "attribute";
+static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant";
+static const char LOCALE_TYPE_YES[] = "yes";
+
+#define LANG_UND_LEN 3
+
+/*
+ Updated on 2018-09-12 from
+ https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
+
+ This table has 2 parts. The part for
+ legacy language tags (marked as “Type: grandfathered†in BCP 47)
+ is generated by the following scripts from the IANA language tag registry.
+
+ curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
+ egrep -A 7 'Type: grandfathered' | \
+ egrep 'Tag|Prefe' | grep -B1 'Preferred' | grep -v '^--' | \
+ awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' |\
+ tr 'A-Z' 'a-z'
+
+
+ The 2nd part is made of five ICU-specific entries. They're kept for
+ the backward compatibility for now, even though there are no preferred
+ values. They may have to be removed for the strict BCP 47 compliance.
+
+*/
+static const char* const LEGACY[] = {
+/* legacy preferred */
+ "art-lojban", "jbo",
+ "en-gb-oed", "en-gb-oxendict",
+ "i-ami", "ami",
+ "i-bnn", "bnn",
+ "i-hak", "hak",
+ "i-klingon", "tlh",
+ "i-lux", "lb",
+ "i-navajo", "nv",
+ "i-pwn", "pwn",
+ "i-tao", "tao",
+ "i-tay", "tay",
+ "i-tsu", "tsu",
+ "no-bok", "nb",
+ "no-nyn", "nn",
+ "sgn-be-fr", "sfb",
+ "sgn-be-nl", "vgt",
+ "sgn-ch-de", "sgg",
+ "zh-guoyu", "cmn",
+ "zh-hakka", "hak",
+ "zh-min-nan", "nan",
+ "zh-xiang", "hsn",
+
+ // Legacy tags with no preferred value in the IANA
+ // registry. Kept for now for the backward compatibility
+ // because ICU has mapped them this way.
+ "cel-gaulish", "xtg-x-cel-gaulish",
+ "i-default", "en-x-i-default",
+ "i-enochian", "und-x-i-enochian",
+ "i-mingo", "see-x-i-mingo",
+ "zh-min", "nan-x-zh-min",
+};
+
+/*
+ Updated on 2018-09-12 from
+ https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
+
+ The table lists redundant tags with preferred value in the IANA languate tag registry.
+ It's generated with the following command:
+
+ curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
+ grep 'Type: redundant' -A 5 | egrep '^(Tag:|Prefer)' | grep -B1 'Preferred' | \
+ awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' | \
+ tr 'A-Z' 'a-z'
+
+ In addition, ja-latn-hepburn-heploc is mapped to ja-latn-alalc97 because
+ a variant tag 'hepburn-heploc' has the preferred subtag, 'alaic97'.
+*/
+
+static const char* const REDUNDANT[] = {
+// redundant preferred
+ "sgn-br", "bzs",
+ "sgn-co", "csn",
+ "sgn-de", "gsg",
+ "sgn-dk", "dsl",
+ "sgn-es", "ssp",
+ "sgn-fr", "fsl",
+ "sgn-gb", "bfi",
+ "sgn-gr", "gss",
+ "sgn-ie", "isg",
+ "sgn-it", "ise",
+ "sgn-jp", "jsl",
+ "sgn-mx", "mfs",
+ "sgn-ni", "ncs",
+ "sgn-nl", "dse",
+ "sgn-no", "nsl",
+ "sgn-pt", "psr",
+ "sgn-se", "swl",
+ "sgn-us", "ase",
+ "sgn-za", "sfs",
+ "zh-cmn", "cmn",
+ "zh-cmn-hans", "cmn-hans",
+ "zh-cmn-hant", "cmn-hant",
+ "zh-gan", "gan",
+ "zh-wuu", "wuu",
+ "zh-yue", "yue",
+
+ // variant tag with preferred value
+ "ja-latn-hepburn-heploc", "ja-latn-alalc97",
+};
+
+/*
+ Updated on 2018-09-12 from
+ https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
+
+ grep 'Type: language' -A 7 language-subtag-registry | egrep 'Subtag|Prefe' | \
+ grep -B1 'Preferred' | grep -v '^--' | \
+ awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
+
+ Make sure that 2-letter language subtags come before 3-letter subtags.
+*/
+static const char DEPRECATEDLANGS[][4] = {
+/* deprecated new */
+ "in", "id",
+ "iw", "he",
+ "ji", "yi",
+ "jw", "jv",
+ "mo", "ro",
+ "aam", "aas",
+ "adp", "dz",
+ "aue", "ktz",
+ "ayx", "nun",
+ "bgm", "bcg",
+ "bjd", "drl",
+ "ccq", "rki",
+ "cjr", "mom",
+ "cka", "cmr",
+ "cmk", "xch",
+ "coy", "pij",
+ "cqu", "quh",
+ "drh", "khk",
+ "drw", "prs",
+ "gav", "dev",
+ "gfx", "vaj",
+ "ggn", "gvr",
+ "gti", "nyc",
+ "guv", "duz",
+ "hrr", "jal",
+ "ibi", "opa",
+ "ilw", "gal",
+ "jeg", "oyb",
+ "kgc", "tdf",
+ "kgh", "kml",
+ "koj", "kwv",
+ "krm", "bmf",
+ "ktr", "dtp",
+ "kvs", "gdj",
+ "kwq", "yam",
+ "kxe", "tvd",
+ "kzj", "dtp",
+ "kzt", "dtp",
+ "lii", "raq",
+ "lmm", "rmx",
+ "meg", "cir",
+ "mst", "mry",
+ "mwj", "vaj",
+ "myt", "mry",
+ "nad", "xny",
+ "ncp", "kdz",
+ "nnx", "ngv",
+ "nts", "pij",
+ "oun", "vaj",
+ "pcr", "adx",
+ "pmc", "huw",
+ "pmu", "phr",
+ "ppa", "bfy",
+ "ppr", "lcq",
+ "pry", "prt",
+ "puz", "pub",
+ "sca", "hle",
+ "skk", "oyb",
+ "tdu", "dtp",
+ "thc", "tpo",
+ "thx", "oyb",
+ "tie", "ras",
+ "tkk", "twm",
+ "tlw", "weo",
+ "tmp", "tyj",
+ "tne", "kak",
+ "tnf", "prs",
+ "tsf", "taj",
+ "uok", "ema",
+ "xba", "cax",
+ "xia", "acn",
+ "xkh", "waw",
+ "xsj", "suj",
+ "ybd", "rki",
+ "yma", "lrr",
+ "ymt", "mtm",
+ "yos", "zom",
+ "yuu", "yug",
+};
+
+/*
+ Updated on 2018-04-24 from
+
+ curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry | \
+ grep 'Type: region' -A 7 | egrep 'Subtag|Prefe' | \
+ grep -B1 'Preferred' | \
+ awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
+*/
+static const char DEPRECATEDREGIONS[][3] = {
+/* deprecated new */
+ "BU", "MM",
+ "DD", "DE",
+ "FX", "FR",
+ "TP", "TL",
+ "YD", "YE",
+ "ZR", "CD",
+};
+
+/*
+* -------------------------------------------------
+*
+* These ultag_ functions may be exposed as APIs later
+*
+* -------------------------------------------------
+*/
+
+static ULanguageTag*
+ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
+
+static void
+ultag_close(ULanguageTag* langtag);
+
+static const char*
+ultag_getLanguage(const ULanguageTag* langtag);
+
+#if 0
+static const char*
+ultag_getJDKLanguage(const ULanguageTag* langtag);
+#endif
+
+static const char*
+ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
+
+static int32_t
+ultag_getExtlangSize(const ULanguageTag* langtag);
+
+static const char*
+ultag_getScript(const ULanguageTag* langtag);
+
+static const char*
+ultag_getRegion(const ULanguageTag* langtag);
+
+static const char*
+ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
+
+static int32_t
+ultag_getVariantsSize(const ULanguageTag* langtag);
+
+static const char*
+ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
+
+static const char*
+ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
+
+static int32_t
+ultag_getExtensionsSize(const ULanguageTag* langtag);
+
+static const char*
+ultag_getPrivateUse(const ULanguageTag* langtag);
+
+#if 0
+static const char*
+ultag_getLegacy(const ULanguageTag* langtag);
+#endif
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalULanguageTagPointer
+ * "Smart pointer" class, closes a ULanguageTag via ultag_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @internal
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalULanguageTagPointer, ULanguageTag, ultag_close);
+
+U_NAMESPACE_END
+
+/*
+* -------------------------------------------------
+*
+* Language subtag syntax validation functions
+*
+* -------------------------------------------------
+*/
+
+static UBool
+_isAlphaString(const char* s, int32_t len) {
+ int32_t i;
+ for (i = 0; i < len; i++) {
+ if (!ISALPHA(*(s + i))) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+static UBool
+_isNumericString(const char* s, int32_t len) {
+ int32_t i;
+ for (i = 0; i < len; i++) {
+ if (!ISNUMERIC(*(s + i))) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+static UBool
+_isAlphaNumericString(const char* s, int32_t len) {
+ int32_t i;
+ for (i = 0; i < len; i++) {
+ if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+static UBool
+_isAlphaNumericStringLimitedLength(const char* s, int32_t len, int32_t min, int32_t max) {
+ if (len < 0) {
+ len = (int32_t)uprv_strlen(s);
+ }
+ if (len >= min && len <= max && _isAlphaNumericString(s, len)) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+U_CFUNC UBool
+ultag_isLanguageSubtag(const char* s, int32_t len) {
+ /*
+ * unicode_language_subtag = alpha{2,3} | alpha{5,8};
+ * NOTE: Per ICUTC 2019/01/23- accepting alpha 4
+ * See ICU-20372
+ */
+ if (len < 0) {
+ len = (int32_t)uprv_strlen(s);
+ }
+ if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static UBool
+_isExtlangSubtag(const char* s, int32_t len) {
+ /*
+ * extlang = 3ALPHA ; selected ISO 639 codes
+ * *2("-" 3ALPHA) ; permanently reserved
+ */
+ if (len < 0) {
+ len = (int32_t)uprv_strlen(s);
+ }
+ if (len == 3 && _isAlphaString(s, len)) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+U_CFUNC UBool
+ultag_isScriptSubtag(const char* s, int32_t len) {
+ /*
+ * script = 4ALPHA ; ISO 15924 code
+ */
+ if (len < 0) {
+ len = (int32_t)uprv_strlen(s);
+ }
+ if (len == 4 && _isAlphaString(s, len)) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+U_CFUNC UBool
+ultag_isRegionSubtag(const char* s, int32_t len) {
+ /*
+ * region = 2ALPHA ; ISO 3166-1 code
+ * / 3DIGIT ; UN M.49 code
+ */
+ if (len < 0) {
+ len = (int32_t)uprv_strlen(s);
+ }
+ if (len == 2 && _isAlphaString(s, len)) {
+ return TRUE;
+ }
+ if (len == 3 && _isNumericString(s, len)) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static UBool
+_isVariantSubtag(const char* s, int32_t len) {
+ /*
+ * variant = 5*8alphanum ; registered variants
+ * / (DIGIT 3alphanum)
+ */
+ if (len < 0) {
+ len = (int32_t)uprv_strlen(s);
+ }
+ if (_isAlphaNumericStringLimitedLength(s, len, 5, 8)) {
+ return TRUE;
+ }
+ if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static UBool
+_isSepListOf(UBool (*test)(const char*, int32_t), const char* s, int32_t len) {
+ const char *p = s;
+ const char *pSubtag = NULL;
+
+ if (len < 0) {
+ len = (int32_t)uprv_strlen(s);
+ }
+
+ while ((p - s) < len) {
+ if (*p == SEP) {
+ if (pSubtag == NULL) {
+ return FALSE;
+ }
+ if (!test(pSubtag, (int32_t)(p - pSubtag))) {
+ return FALSE;
+ }
+ pSubtag = NULL;
+ } else if (pSubtag == NULL) {
+ pSubtag = p;
+ }
+ p++;
+ }
+ if (pSubtag == NULL) {
+ return FALSE;
+ }
+ return test(pSubtag, (int32_t)(p - pSubtag));
+}
+
+U_CFUNC UBool
+ultag_isVariantSubtags(const char* s, int32_t len) {
+ return _isSepListOf(&_isVariantSubtag, s, len);
+}
+
+// This is for the ICU-specific "lvariant" handling.
+static UBool
+_isPrivateuseVariantSubtag(const char* s, int32_t len) {
+ /*
+ * variant = 1*8alphanum ; registered variants
+ * / (DIGIT 3alphanum)
+ */
+ return _isAlphaNumericStringLimitedLength(s, len , 1, 8);
+}
+
+static UBool
+_isExtensionSingleton(const char* s, int32_t len) {
+ /*
+ * extension = singleton 1*("-" (2*8alphanum))
+ *
+ * singleton = DIGIT ; 0 - 9
+ * / %x41-57 ; A - W
+ * / %x59-5A ; Y - Z
+ * / %x61-77 ; a - w
+ * / %x79-7A ; y - z
+ */
+ if (len < 0) {
+ len = (int32_t)uprv_strlen(s);
+ }
+ if (len == 1 && (ISALPHA(*s) || ISNUMERIC(*s)) && (uprv_tolower(*s) != PRIVATEUSE)) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static UBool
+_isExtensionSubtag(const char* s, int32_t len) {
+ /*
+ * extension = singleton 1*("-" (2*8alphanum))
+ */
+ return _isAlphaNumericStringLimitedLength(s, len, 2, 8);
+}
+
+U_CFUNC UBool
+ultag_isExtensionSubtags(const char* s, int32_t len) {
+ return _isSepListOf(&_isExtensionSubtag, s, len);
+}
+
+static UBool
+_isPrivateuseValueSubtag(const char* s, int32_t len) {
+ /*
+ * privateuse = "x" 1*("-" (1*8alphanum))
+ */
+ return _isAlphaNumericStringLimitedLength(s, len, 1, 8);
+}
+
+U_CFUNC UBool
+ultag_isPrivateuseValueSubtags(const char* s, int32_t len) {
+ return _isSepListOf(&_isPrivateuseValueSubtag, s, len);
+}
+
+U_CFUNC UBool
+ultag_isUnicodeLocaleAttribute(const char* s, int32_t len) {
+ /*
+ * attribute = alphanum{3,8} ;
+ */
+ return _isAlphaNumericStringLimitedLength(s, len , 3, 8);
+}
+
+U_CFUNC UBool
+ultag_isUnicodeLocaleAttributes(const char* s, int32_t len) {
+ return _isSepListOf(&ultag_isUnicodeLocaleAttribute, s, len);
+}
+
+U_CFUNC UBool
+ultag_isUnicodeLocaleKey(const char* s, int32_t len) {
+ /*
+ * key = alphanum alpha ;
+ */
+ if (len < 0) {
+ len = (int32_t)uprv_strlen(s);
+ }
+ if (len == 2 && (ISALPHA(*s) || ISNUMERIC(*s)) && ISALPHA(s[1])) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+U_CFUNC UBool
+_isUnicodeLocaleTypeSubtag(const char*s, int32_t len) {
+ /*
+ * alphanum{3,8}
+ */
+ return _isAlphaNumericStringLimitedLength(s, len , 3, 8);
+}
+
+U_CFUNC UBool
+ultag_isUnicodeLocaleType(const char*s, int32_t len) {
+ /*
+ * type = alphanum{3,8} (sep alphanum{3,8})* ;
+ */
+ return _isSepListOf(&_isUnicodeLocaleTypeSubtag, s, len);
+}
+
+static UBool
+_isTKey(const char* s, int32_t len)
+{
+ /*
+ * tkey = alpha digit ;
+ */
+ if (len < 0) {
+ len = (int32_t)uprv_strlen(s);
+ }
+ if (len == 2 && ISALPHA(*s) && ISNUMERIC(*(s + 1))) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static UBool
+_isTValue(const char* s, int32_t len)
+{
+ /*
+ * tvalue = (sep alphanum{3,8})+ ;
+ */
+ return _isAlphaNumericStringLimitedLength(s, len , 3, 8);
+}
+
+static UBool
+_isTransformedExtensionSubtag(int32_t& state, const char* s, int32_t len)
+{
+ const int32_t kStart = 0; // Start, wait for unicode_language_subtag, tkey or end
+ const int32_t kGotLanguage = 1; // Got unicode_language_subtag, wait for unicode_script_subtag,
+ // unicode_region_subtag, unicode_variant_subtag, tkey or end
+ const int32_t kGotScript = 2; // Got unicode_script_subtag, wait for unicode_region_subtag,
+ // unicode_variant_subtag, tkey, or end
+ const int32_t kGotRegion = 3; // Got unicode_region_subtag, wait for unicode_variant_subtag,
+ // tkey, or end.
+ const int32_t kGotVariant = 4; // Got unicode_variant_subtag, wait for unicode_variant_subtag
+ // tkey or end.
+ const int32_t kGotTKey = -1; // Got tkey, wait for tvalue. ERROR if stop here.
+ const int32_t kGotTValue = 6; // Got tvalue, wait for tkey, tvalue or end
+
+ switch (state) {
+ case kStart:
+ if (ultag_isLanguageSubtag(s, len)) {
+ state = kGotLanguage;
+ return TRUE;
+ }
+ if (_isTKey(s, len)) {
+ state = kGotTKey;
+ return TRUE;
+ }
+ return FALSE;
+ case kGotLanguage:
+ if (ultag_isScriptSubtag(s, len)) {
+ state = kGotScript;
+ return TRUE;
+ }
+ U_FALLTHROUGH;
+ case kGotScript:
+ if (ultag_isRegionSubtag(s, len)) {
+ state = kGotRegion;
+ return TRUE;
+ }
+ U_FALLTHROUGH;
+ case kGotRegion:
+ U_FALLTHROUGH;
+ case kGotVariant:
+ if (_isVariantSubtag(s, len)) {
+ state = kGotVariant;
+ return TRUE;
+ }
+ if (_isTKey(s, len)) {
+ state = kGotTKey;
+ return TRUE;
+ }
+ return FALSE;
+ case kGotTKey:
+ if (_isTValue(s, len)) {
+ state = kGotTValue;
+ return TRUE;
+ }
+ return FALSE;
+ case kGotTValue:
+ if (_isTKey(s, len)) {
+ state = kGotTKey;
+ return TRUE;
+ }
+ if (_isTValue(s, len)) {
+ return TRUE;
+ }
+ return FALSE;
+ }
+ return FALSE;
+}
+
+static UBool
+_isUnicodeExtensionSubtag(int32_t& state, const char* s, int32_t len)
+{
+ const int32_t kStart = 0; // Start, wait for a key or attribute or end
+ const int32_t kGotKey = 1; // Got a key, wait for type or key or end
+ const int32_t kGotType = 2; // Got a type, wait for key or end
+
+ switch (state) {
+ case kStart:
+ if (ultag_isUnicodeLocaleKey(s, len)) {
+ state = kGotKey;
+ return TRUE;
+ }
+ if (ultag_isUnicodeLocaleAttribute(s, len)) {
+ return TRUE;
+ }
+ return FALSE;
+ case kGotKey:
+ if (ultag_isUnicodeLocaleKey(s, len)) {
+ return TRUE;
+ }
+ if (_isUnicodeLocaleTypeSubtag(s, len)) {
+ state = kGotType;
+ return TRUE;
+ }
+ return FALSE;
+ case kGotType:
+ if (ultag_isUnicodeLocaleKey(s, len)) {
+ state = kGotKey;
+ return TRUE;
+ }
+ if (_isUnicodeLocaleTypeSubtag(s, len)) {
+ return TRUE;
+ }
+ return FALSE;
+ }
+ return FALSE;
+}
+
+static UBool
+_isStatefulSepListOf(UBool (*test)(int32_t&, const char*, int32_t), const char* s, int32_t len)
+{
+ int32_t state = 0;
+ const char* p;
+ const char* start = s;
+ int32_t subtagLen = 0;
+
+ if (len < 0) {
+ len = (int32_t)uprv_strlen(s);
+ }
+
+ for (p = s; len > 0; p++, len--) {
+ if (*p == SEP) {
+ if (!test(state, start, subtagLen)) {
+ return FALSE;
+ }
+ subtagLen = 0;
+ start = p + 1;
+ } else {
+ subtagLen++;
+ }
+ }
+
+ if (test(state, start, subtagLen) && state >= 0) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+U_CFUNC UBool
+ultag_isTransformedExtensionSubtags(const char* s, int32_t len)
+{
+ return _isStatefulSepListOf(&_isTransformedExtensionSubtag, s, len);
+}
+
+U_CFUNC UBool
+ultag_isUnicodeExtensionSubtags(const char* s, int32_t len) {
+ return _isStatefulSepListOf(&_isUnicodeExtensionSubtag, s, len);
+}
+
+
+/*
+* -------------------------------------------------
+*
+* Helper functions
+*
+* -------------------------------------------------
+*/
+
+static UBool
+_addVariantToList(VariantListEntry **first, VariantListEntry *var) {
+ UBool bAdded = TRUE;
+
+ if (*first == NULL) {
+ var->next = NULL;
+ *first = var;
+ } else {
+ VariantListEntry *prev, *cur;
+ int32_t cmp;
+
+ /* variants order should be preserved */
+ prev = NULL;
+ cur = *first;
+ while (TRUE) {
+ if (cur == NULL) {
+ prev->next = var;
+ var->next = NULL;
+ break;
+ }
+
+ /* Checking for duplicate variant */
+ cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
+ if (cmp == 0) {
+ /* duplicated variant */
+ bAdded = FALSE;
+ break;
+ }
+ prev = cur;
+ cur = cur->next;
+ }
+ }
+
+ return bAdded;
+}
+
+static UBool
+_addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {
+ UBool bAdded = TRUE;
+
+ if (*first == NULL) {
+ attr->next = NULL;
+ *first = attr;
+ } else {
+ AttributeListEntry *prev, *cur;
+ int32_t cmp;
+
+ /* reorder variants in alphabetical order */
+ prev = NULL;
+ cur = *first;
+ while (TRUE) {
+ if (cur == NULL) {
+ prev->next = attr;
+ attr->next = NULL;
+ break;
+ }
+ cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute);
+ if (cmp < 0) {
+ if (prev == NULL) {
+ *first = attr;
+ } else {
+ prev->next = attr;
+ }
+ attr->next = cur;
+ break;
+ }
+ if (cmp == 0) {
+ /* duplicated variant */
+ bAdded = FALSE;
+ break;
+ }
+ prev = cur;
+ cur = cur->next;
+ }
+ }
+
+ return bAdded;
+}
+
+
+static UBool
+_addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
+ UBool bAdded = TRUE;
+
+ if (*first == NULL) {
+ ext->next = NULL;
+ *first = ext;
+ } else {
+ ExtensionListEntry *prev, *cur;
+ int32_t cmp;
+
+ /* reorder variants in alphabetical order */
+ prev = NULL;
+ cur = *first;
+ while (TRUE) {
+ if (cur == NULL) {
+ prev->next = ext;
+ ext->next = NULL;
+ break;
+ }
+ if (localeToBCP) {
+ /* special handling for locale to bcp conversion */
+ int32_t len, curlen;
+
+ len = (int32_t)uprv_strlen(ext->key);
+ curlen = (int32_t)uprv_strlen(cur->key);
+
+ if (len == 1 && curlen == 1) {
+ if (*(ext->key) == *(cur->key)) {
+ cmp = 0;
+ } else if (*(ext->key) == PRIVATEUSE) {
+ cmp = 1;
+ } else if (*(cur->key) == PRIVATEUSE) {
+ cmp = -1;
+ } else {
+ cmp = *(ext->key) - *(cur->key);
+ }
+ } else if (len == 1) {
+ cmp = *(ext->key) - LDMLEXT;
+ } else if (curlen == 1) {
+ cmp = LDMLEXT - *(cur->key);
+ } else {
+ cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
+ /* Both are u extension keys - we need special handling for 'attribute' */
+ if (cmp != 0) {
+ if (uprv_strcmp(cur->key, LOCALE_ATTRIBUTE_KEY) == 0) {
+ cmp = 1;
+ } else if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
+ cmp = -1;
+ }
+ }
+ }
+ } else {
+ cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
+ }
+ if (cmp < 0) {
+ if (prev == NULL) {
+ *first = ext;
+ } else {
+ prev->next = ext;
+ }
+ ext->next = cur;
+ break;
+ }
+ if (cmp == 0) {
+ /* duplicated extension key */
+ bAdded = FALSE;
+ break;
+ }
+ prev = cur;
+ cur = cur->next;
+ }
+ }
+
+ return bAdded;
+}
+
+static void
+_initializeULanguageTag(ULanguageTag* langtag) {
+ int32_t i;
+
+ langtag->buf = NULL;
+
+ langtag->language = EMPTY;
+ for (i = 0; i < MAXEXTLANG; i++) {
+ langtag->extlang[i] = NULL;
+ }
+
+ langtag->script = EMPTY;
+ langtag->region = EMPTY;
+
+ langtag->variants = NULL;
+ langtag->extensions = NULL;
+
+ langtag->legacy = EMPTY;
+ langtag->privateuse = EMPTY;
+}
+
+static void
+_appendLanguageToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UErrorCode* status) {
+ char buf[ULOC_LANG_CAPACITY];
+ UErrorCode tmpStatus = U_ZERO_ERROR;
+ int32_t len, i;
+
+ if (U_FAILURE(*status)) {
+ return;
+ }
+
+ len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
+ if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ len = 0;
+ }
+
+ /* Note: returned language code is in lower case letters */
+
+ if (len == 0) {
+ sink.Append(LANG_UND, LANG_UND_LEN);
+ } else if (!ultag_isLanguageSubtag(buf, len)) {
+ /* invalid language code */
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ sink.Append(LANG_UND, LANG_UND_LEN);
+ } else {
+ /* resolve deprecated */
+ for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) {
+ // 2-letter deprecated subtags are listede before 3-letter
+ // ones in DEPRECATEDLANGS[]. Get out of loop on coming
+ // across the 1st 3-letter subtag, if the input is a 2-letter code.
+ // to avoid continuing to try when there's no match.
+ if (uprv_strlen(buf) < uprv_strlen(DEPRECATEDLANGS[i])) break;
+ if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
+ uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
+ len = (int32_t)uprv_strlen(buf);
+ break;
+ }
+ }
+ sink.Append(buf, len);
+ }
+}
+
+static void
+_appendScriptToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UErrorCode* status) {
+ char buf[ULOC_SCRIPT_CAPACITY];
+ UErrorCode tmpStatus = U_ZERO_ERROR;
+ int32_t len;
+
+ if (U_FAILURE(*status)) {
+ return;
+ }
+
+ len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
+ if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return;
+ }
+
+ if (len > 0) {
+ if (!ultag_isScriptSubtag(buf, len)) {
+ /* invalid script code */
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return;
+ } else {
+ sink.Append("-", 1);
+ sink.Append(buf, len);
+ }
+ }
+}
+
+static void
+_appendRegionToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UErrorCode* status) {
+ char buf[ULOC_COUNTRY_CAPACITY];
+ UErrorCode tmpStatus = U_ZERO_ERROR;
+ int32_t len;
+
+ if (U_FAILURE(*status)) {
+ return;
+ }
+
+ len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
+ if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return;
+ }
+
+ if (len > 0) {
+ if (!ultag_isRegionSubtag(buf, len)) {
+ /* invalid region code */
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return;
+ } else {
+ sink.Append("-", 1);
+ /* resolve deprecated */
+ for (int i = 0; i < UPRV_LENGTHOF(DEPRECATEDREGIONS); i += 2) {
+ if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDREGIONS[i]) == 0) {
+ uprv_strcpy(buf, DEPRECATEDREGIONS[i + 1]);
+ len = (int32_t)uprv_strlen(buf);
+ break;
+ }
+ }
+ sink.Append(buf, len);
+ }
+ }
+}
+
+static void _sortVariants(VariantListEntry* first) {
+ for (VariantListEntry* var1 = first; var1 != NULL; var1 = var1->next) {
+ for (VariantListEntry* var2 = var1->next; var2 != NULL; var2 = var2->next) {
+ // Swap var1->variant and var2->variant.
+ if (uprv_compareInvCharsAsAscii(var1->variant, var2->variant) > 0) {
+ const char* temp = var1->variant;
+ var1->variant = var2->variant;
+ var2->variant = temp;
+ }
+ }
+ }
+}
+
+static void
+_appendVariantsToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UBool *hadPosix, UErrorCode* status) {
+ char buf[ULOC_FULLNAME_CAPACITY];
+ UErrorCode tmpStatus = U_ZERO_ERROR;
+ int32_t len, i;
+
+ if (U_FAILURE(*status)) {
+ return;
+ }
+
+ len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
+ if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return;
+ }
+
+ if (len > 0) {
+ char *p, *pVar;
+ UBool bNext = TRUE;
+ VariantListEntry *var;
+ VariantListEntry *varFirst = NULL;
+
+ pVar = NULL;
+ p = buf;
+ while (bNext) {
+ if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
+ if (*p == 0) {
+ bNext = FALSE;
+ } else {
+ *p = 0; /* terminate */
+ }
+ if (pVar == NULL) {
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+ /* ignore empty variant */
+ } else {
+ /* ICU uses upper case letters for variants, but
+ the canonical format is lowercase in BCP47 */
+ for (i = 0; *(pVar + i) != 0; i++) {
+ *(pVar + i) = uprv_tolower(*(pVar + i));
+ }
+
+ /* validate */
+ if (_isVariantSubtag(pVar, -1)) {
+ if (uprv_strcmp(pVar,POSIX_VALUE) || len != (int32_t)uprv_strlen(POSIX_VALUE)) {
+ /* emit the variant to the list */
+ var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
+ if (var == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ var->variant = pVar;
+ if (!_addVariantToList(&varFirst, var)) {
+ /* duplicated variant */
+ uprv_free(var);
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+ }
+ } else {
+ /* Special handling for POSIX variant, need to remember that we had it and then */
+ /* treat it like an extension later. */
+ *hadPosix = TRUE;
+ }
+ } else if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ } else if (_isPrivateuseValueSubtag(pVar, -1)) {
+ /* Handle private use subtags separately */
+ break;
+ }
+ }
+ /* reset variant starting position */
+ pVar = NULL;
+ } else if (pVar == NULL) {
+ pVar = p;
+ }
+ p++;
+ }
+
+ if (U_SUCCESS(*status)) {
+ if (varFirst != NULL) {
+ int32_t varLen;
+
+ /* per UTS35, we should sort the variants */
+ _sortVariants(varFirst);
+
+ /* write out validated/normalized variants to the target */
+ var = varFirst;
+ while (var != NULL) {
+ sink.Append("-", 1);
+ varLen = (int32_t)uprv_strlen(var->variant);
+ sink.Append(var->variant, varLen);
+ var = var->next;
+ }
+ }
+ }
+
+ /* clean up */
+ var = varFirst;
+ while (var != NULL) {
+ VariantListEntry *tmpVar = var->next;
+ uprv_free(var);
+ var = tmpVar;
+ }
+
+ if (U_FAILURE(*status)) {
+ return;
+ }
+ }
+}
+
+static void
+_appendKeywordsToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UBool hadPosix, UErrorCode* status) {
+ char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
+ int32_t attrBufLength = 0;
+
+ icu::MemoryPool<AttributeListEntry> attrPool;
+ icu::MemoryPool<ExtensionListEntry> extPool;
+ icu::MemoryPool<icu::CharString> strPool;
+
+ icu::LocalUEnumerationPointer keywordEnum(uloc_openKeywords(localeID, status));
+ if (U_FAILURE(*status) && !hadPosix) {
+ return;
+ }
+ if (keywordEnum.isValid() || hadPosix) {
+ /* reorder extensions */
+ int32_t len;
+ const char *key;
+ ExtensionListEntry *firstExt = NULL;
+ ExtensionListEntry *ext;
+ AttributeListEntry *firstAttr = NULL;
+ AttributeListEntry *attr;
+ icu::MemoryPool<icu::CharString> extBufPool;
+ const char *bcpKey=nullptr, *bcpValue=nullptr;
+ UErrorCode tmpStatus = U_ZERO_ERROR;
+ int32_t keylen;
+ UBool isBcpUExt;
+
+ while (TRUE) {
+ key = uenum_next(keywordEnum.getAlias(), NULL, status);
+ if (key == NULL) {
+ break;
+ }
+
+ icu::CharString buf;
+ {
+ icu::CharStringByteSink sink(&buf);
+ ulocimp_getKeywordValue(localeID, key, sink, &tmpStatus);
+ }
+ len = buf.length();
+
+ if (U_FAILURE(tmpStatus)) {
+ if (tmpStatus == U_MEMORY_ALLOCATION_ERROR) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+ /* ignore this keyword */
+ tmpStatus = U_ZERO_ERROR;
+ continue;
+ }
+
+ keylen = (int32_t)uprv_strlen(key);
+ isBcpUExt = (keylen > 1);
+
+ /* special keyword used for representing Unicode locale attributes */
+ if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
+ if (len > 0) {
+ int32_t i = 0;
+ while (TRUE) {
+ attrBufLength = 0;
+ for (; i < len; i++) {
+ if (buf[i] != '-') {
+ attrBuf[attrBufLength++] = buf[i];
+ } else {
+ i++;
+ break;
+ }
+ }
+ if (attrBufLength > 0) {
+ attrBuf[attrBufLength] = 0;
+
+ } else if (i >= len){
+ break;
+ }
+
+ /* create AttributeListEntry */
+ attr = attrPool.create();
+ if (attr == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ icu::CharString* attrValue =
+ strPool.create(attrBuf, attrBufLength, *status);
+ if (attrValue == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ if (U_FAILURE(*status)) {
+ break;
+ }
+ attr->attribute = attrValue->data();
+
+ if (!_addAttributeToList(&firstAttr, attr)) {
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+ }
+ }
+ /* for a place holder ExtensionListEntry */
+ bcpKey = LOCALE_ATTRIBUTE_KEY;
+ bcpValue = NULL;
+ }
+ } else if (isBcpUExt) {
+ bcpKey = uloc_toUnicodeLocaleKey(key);
+ if (bcpKey == NULL) {
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+ continue;
+ }
+
+ /* we've checked buf is null-terminated above */
+ bcpValue = uloc_toUnicodeLocaleType(key, buf.data());
+ if (bcpValue == NULL) {
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+ continue;
+ }
+ if (bcpValue == buf.data()) {
+ /*
+ When uloc_toUnicodeLocaleType(key, buf) returns the
+ input value as is, the value is well-formed, but has
+ no known mapping. This implementation normalizes the
+ value to lower case
+ */
+ icu::CharString* extBuf = extBufPool.create(buf, tmpStatus);
+
+ if (extBuf == nullptr) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ if (U_FAILURE(tmpStatus)) {
+ *status = tmpStatus;
+ break;
+ }
+
+ T_CString_toLowerCase(extBuf->data());
+ bcpValue = extBuf->data();
+ }
+ } else {
+ if (*key == PRIVATEUSE) {
+ if (!ultag_isPrivateuseValueSubtags(buf.data(), len)) {
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+ continue;
+ }
+ } else {
+ if (!_isExtensionSingleton(key, keylen) || !ultag_isExtensionSubtags(buf.data(), len)) {
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+ continue;
+ }
+ }
+ bcpKey = key;
+ icu::CharString* extBuf =
+ extBufPool.create(buf.data(), len, tmpStatus);
+ if (extBuf == nullptr) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ if (U_FAILURE(tmpStatus)) {
+ *status = tmpStatus;
+ break;
+ }
+ bcpValue = extBuf->data();
+ }
+
+ /* create ExtensionListEntry */
+ ext = extPool.create();
+ if (ext == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ ext->key = bcpKey;
+ ext->value = bcpValue;
+
+ if (!_addExtensionToList(&firstExt, ext, TRUE)) {
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+ }
+ }
+
+ /* Special handling for POSIX variant - add the keywords for POSIX */
+ if (hadPosix) {
+ /* create ExtensionListEntry for POSIX */
+ ext = extPool.create();
+ if (ext == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ ext->key = POSIX_KEY;
+ ext->value = POSIX_VALUE;
+
+ if (!_addExtensionToList(&firstExt, ext, TRUE)) {
+ // Silently ignore errors.
+ }
+ }
+
+ if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) {
+ UBool startLDMLExtension = FALSE;
+ for (ext = firstExt; ext; ext = ext->next) {
+ if (!startLDMLExtension && uprv_strlen(ext->key) > 1) {
+ /* first LDML u singlton extension */
+ sink.Append("-u", 2);
+ startLDMLExtension = TRUE;
+ }
+
+ /* write out the sorted BCP47 attributes, extensions and private use */
+ if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
+ /* write the value for the attributes */
+ for (attr = firstAttr; attr; attr = attr->next) {
+ sink.Append("-", 1);
+ sink.Append(
+ attr->attribute, static_cast<int32_t>(uprv_strlen(attr->attribute)));
+ }
+ } else {
+ sink.Append("-", 1);
+ sink.Append(ext->key, static_cast<int32_t>(uprv_strlen(ext->key)));
+ if (uprv_strcmp(ext->value, "true") != 0 &&
+ uprv_strcmp(ext->value, "yes") != 0) {
+ sink.Append("-", 1);
+ sink.Append(ext->value, static_cast<int32_t>(uprv_strlen(ext->value)));
+ }
+ }
+ }
+ }
+ }
+}
+
+/**
+ * Append keywords parsed from LDML extension value
+ * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
+ * Note: char* buf is used for storing keywords
+ */
+static void
+_appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, icu::MemoryPool<ExtensionListEntry>& extPool, icu::MemoryPool<icu::CharString>& kwdBuf, UBool *posixVariant, UErrorCode *status) {
+ const char *pTag; /* beginning of current subtag */
+ const char *pKwds; /* beginning of key-type pairs */
+ UBool variantExists = *posixVariant;
+
+ ExtensionListEntry *kwdFirst = NULL; /* first LDML keyword */
+ ExtensionListEntry *kwd, *nextKwd;
+
+ int32_t len;
+
+ /* Reset the posixVariant value */
+ *posixVariant = FALSE;
+
+ pTag = ldmlext;
+ pKwds = NULL;
+
+ {
+ AttributeListEntry *attrFirst = NULL; /* first attribute */
+ AttributeListEntry *attr, *nextAttr;
+
+ char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
+ int32_t attrBufIdx = 0;
+
+ icu::MemoryPool<AttributeListEntry> attrPool;
+
+ /* Iterate through u extension attributes */
+ while (*pTag) {
+ /* locate next separator char */
+ for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
+
+ if (ultag_isUnicodeLocaleKey(pTag, len)) {
+ pKwds = pTag;
+ break;
+ }
+
+ /* add this attribute to the list */
+ attr = attrPool.create();
+ if (attr == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) {
+ uprv_memcpy(&attrBuf[attrBufIdx], pTag, len);
+ attrBuf[attrBufIdx + len] = 0;
+ attr->attribute = &attrBuf[attrBufIdx];
+ attrBufIdx += (len + 1);
+ } else {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ // duplicate attribute is ignored, causes no error.
+ _addAttributeToList(&attrFirst, attr);
+
+ /* next tag */
+ pTag += len;
+ if (*pTag) {
+ /* next to the separator */
+ pTag++;
+ }
+ }
+
+ if (attrFirst) {
+ /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */
+
+ kwd = extPool.create();
+ if (kwd == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ icu::CharString* value = kwdBuf.create();
+ if (value == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ /* attribute subtags sorted in alphabetical order as type */
+ attr = attrFirst;
+ while (attr != NULL) {
+ nextAttr = attr->next;
+ if (attr != attrFirst) {
+ value->append('-', *status);
+ }
+ value->append(attr->attribute, *status);
+ attr = nextAttr;
+ }
+ if (U_FAILURE(*status)) {
+ return;
+ }
+
+ kwd->key = LOCALE_ATTRIBUTE_KEY;
+ kwd->value = value->data();
+
+ if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ }
+ }
+
+ if (pKwds) {
+ const char *pBcpKey = NULL; /* u extenstion key subtag */
+ const char *pBcpType = NULL; /* beginning of u extension type subtag(s) */
+ int32_t bcpKeyLen = 0;
+ int32_t bcpTypeLen = 0;
+ UBool isDone = FALSE;
+
+ pTag = pKwds;
+ /* BCP47 representation of LDML key/type pairs */
+ while (!isDone) {
+ const char *pNextBcpKey = NULL;
+ int32_t nextBcpKeyLen = 0;
+ UBool emitKeyword = FALSE;
+
+ if (*pTag) {
+ /* locate next separator char */
+ for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
+
+ if (ultag_isUnicodeLocaleKey(pTag, len)) {
+ if (pBcpKey) {
+ emitKeyword = TRUE;
+ pNextBcpKey = pTag;
+ nextBcpKeyLen = len;
+ } else {
+ pBcpKey = pTag;
+ bcpKeyLen = len;
+ }
+ } else {
+ U_ASSERT(pBcpKey != NULL);
+ /* within LDML type subtags */
+ if (pBcpType) {
+ bcpTypeLen += (len + 1);
+ } else {
+ pBcpType = pTag;
+ bcpTypeLen = len;
+ }
+ }
+
+ /* next tag */
+ pTag += len;
+ if (*pTag) {
+ /* next to the separator */
+ pTag++;
+ }
+ } else {
+ /* processing last one */
+ emitKeyword = TRUE;
+ isDone = TRUE;
+ }
+
+ if (emitKeyword) {
+ const char *pKey = NULL; /* LDML key */
+ const char *pType = NULL; /* LDML type */
+
+ char bcpKeyBuf[3]; /* BCP key length is always 2 for now */
+
+ U_ASSERT(pBcpKey != NULL);
+
+ if (bcpKeyLen >= (int32_t)sizeof(bcpKeyBuf)) {
+ /* the BCP key is invalid */
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ U_ASSERT(bcpKeyLen <= 2);
+
+ uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen);
+ bcpKeyBuf[bcpKeyLen] = 0;
+
+ /* u extension key to LDML key */
+ pKey = uloc_toLegacyKey(bcpKeyBuf);
+ if (pKey == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ if (pKey == bcpKeyBuf) {
+ /*
+ The key returned by toLegacyKey points to the input buffer.
+ We normalize the result key to lower case.
+ */
+ T_CString_toLowerCase(bcpKeyBuf);
+ icu::CharString* key = kwdBuf.create(bcpKeyBuf, bcpKeyLen, *status);
+ if (key == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ if (U_FAILURE(*status)) {
+ return;
+ }
+ pKey = key->data();
+ }
+
+ if (pBcpType) {
+ char bcpTypeBuf[128]; /* practically long enough even considering multiple subtag type */
+ if (bcpTypeLen >= (int32_t)sizeof(bcpTypeBuf)) {
+ /* the BCP type is too long */
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen);
+ bcpTypeBuf[bcpTypeLen] = 0;
+
+ /* BCP type to locale type */
+ pType = uloc_toLegacyType(pKey, bcpTypeBuf);
+ if (pType == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ if (pType == bcpTypeBuf) {
+ /*
+ The type returned by toLegacyType points to the input buffer.
+ We normalize the result type to lower case.
+ */
+ /* normalize to lower case */
+ T_CString_toLowerCase(bcpTypeBuf);
+ icu::CharString* type = kwdBuf.create(bcpTypeBuf, bcpTypeLen, *status);
+ if (type == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ if (U_FAILURE(*status)) {
+ return;
+ }
+ pType = type->data();
+ }
+ } else {
+ /* typeless - default type value is "yes" */
+ pType = LOCALE_TYPE_YES;
+ }
+
+ /* Special handling for u-va-posix, since we want to treat this as a variant,
+ not as a keyword */
+ if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) {
+ *posixVariant = TRUE;
+ } else {
+ /* create an ExtensionListEntry for this keyword */
+ kwd = extPool.create();
+ if (kwd == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ kwd->key = pKey;
+ kwd->value = pType;
+
+ if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
+ // duplicate keyword is allowed, Only the first
+ // is honored.
+ }
+ }
+
+ pBcpKey = pNextBcpKey;
+ bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0;
+ pBcpType = NULL;
+ bcpTypeLen = 0;
+ }
+ }
+ }
+
+ kwd = kwdFirst;
+ while (kwd != NULL) {
+ nextKwd = kwd->next;
+ _addExtensionToList(appendTo, kwd, FALSE);
+ kwd = nextKwd;
+ }
+}
+
+
+static void
+_appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode* status) {
+ int32_t i, n;
+ int32_t len;
+ ExtensionListEntry *kwdFirst = NULL;
+ ExtensionListEntry *kwd;
+ const char *key, *type;
+ icu::MemoryPool<ExtensionListEntry> extPool;
+ icu::MemoryPool<icu::CharString> kwdBuf;
+ UBool posixVariant = FALSE;
+
+ if (U_FAILURE(*status)) {
+ return;
+ }
+
+ /* Determine if variants already exists */
+ if (ultag_getVariantsSize(langtag)) {
+ posixVariant = TRUE;
+ }
+
+ n = ultag_getExtensionsSize(langtag);
+
+ /* resolve locale keywords and reordering keys */
+ for (i = 0; i < n; i++) {
+ key = ultag_getExtensionKey(langtag, i);
+ type = ultag_getExtensionValue(langtag, i);
+ if (*key == LDMLEXT) {
+ _appendLDMLExtensionAsKeywords(type, &kwdFirst, extPool, kwdBuf, &posixVariant, status);
+ if (U_FAILURE(*status)) {
+ break;
+ }
+ } else {
+ kwd = extPool.create();
+ if (kwd == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ kwd->key = key;
+ kwd->value = type;
+ if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+ }
+ }
+
+ if (U_SUCCESS(*status)) {
+ type = ultag_getPrivateUse(langtag);
+ if ((int32_t)uprv_strlen(type) > 0) {
+ /* add private use as a keyword */
+ kwd = extPool.create();
+ if (kwd == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ kwd->key = PRIVATEUSE_KEY;
+ kwd->value = type;
+ if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ }
+ }
+ }
+
+ /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
+
+ if (U_SUCCESS(*status) && posixVariant) {
+ len = (int32_t) uprv_strlen(_POSIX);
+ sink.Append(_POSIX, len);
+ }
+
+ if (U_SUCCESS(*status) && kwdFirst != NULL) {
+ /* write out the sorted keywords */
+ UBool firstValue = TRUE;
+ kwd = kwdFirst;
+ do {
+ if (firstValue) {
+ sink.Append("@", 1);
+ firstValue = FALSE;
+ } else {
+ sink.Append(";", 1);
+ }
+
+ /* key */
+ len = (int32_t)uprv_strlen(kwd->key);
+ sink.Append(kwd->key, len);
+ sink.Append("=", 1);
+
+ /* type */
+ len = (int32_t)uprv_strlen(kwd->value);
+ sink.Append(kwd->value, len);
+
+ kwd = kwd->next;
+ } while (kwd);
+ }
+}
+
+static void
+_appendPrivateuseToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UBool hadPosix, UErrorCode* status) {
+ (void)hadPosix;
+ char buf[ULOC_FULLNAME_CAPACITY];
+ char tmpAppend[ULOC_FULLNAME_CAPACITY];
+ UErrorCode tmpStatus = U_ZERO_ERROR;
+ int32_t len, i;
+ int32_t reslen = 0;
+ int32_t capacity = sizeof tmpAppend;
+
+ if (U_FAILURE(*status)) {
+ return;
+ }
+
+ len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
+ if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return;
+ }
+
+ if (len > 0) {
+ char *p, *pPriv;
+ UBool bNext = TRUE;
+ UBool firstValue = TRUE;
+ UBool writeValue;
+
+ pPriv = NULL;
+ p = buf;
+ while (bNext) {
+ writeValue = FALSE;
+ if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
+ if (*p == 0) {
+ bNext = FALSE;
+ } else {
+ *p = 0; /* terminate */
+ }
+ if (pPriv != NULL) {
+ /* Private use in the canonical format is lowercase in BCP47 */
+ for (i = 0; *(pPriv + i) != 0; i++) {
+ *(pPriv + i) = uprv_tolower(*(pPriv + i));
+ }
+
+ /* validate */
+ if (_isPrivateuseValueSubtag(pPriv, -1)) {
+ if (firstValue) {
+ if (!_isVariantSubtag(pPriv, -1)) {
+ writeValue = TRUE;
+ }
+ } else {
+ writeValue = TRUE;
+ }
+ } else if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ } else {
+ break;
+ }
+
+ if (writeValue) {
+ if (reslen < capacity) {
+ tmpAppend[reslen++] = SEP;
+ }
+
+ if (firstValue) {
+ if (reslen < capacity) {
+ tmpAppend[reslen++] = *PRIVATEUSE_KEY;
+ }
+
+ if (reslen < capacity) {
+ tmpAppend[reslen++] = SEP;
+ }
+
+ len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX);
+ if (reslen < capacity) {
+ uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen));
+ }
+ reslen += len;
+
+ if (reslen < capacity) {
+ tmpAppend[reslen++] = SEP;
+ }
+
+ firstValue = FALSE;
+ }
+
+ len = (int32_t)uprv_strlen(pPriv);
+ if (reslen < capacity) {
+ uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen));
+ }
+ reslen += len;
+ }
+ }
+ /* reset private use starting position */
+ pPriv = NULL;
+ } else if (pPriv == NULL) {
+ pPriv = p;
+ }
+ p++;
+ }
+
+ if (U_FAILURE(*status)) {
+ return;
+ }
+ }
+
+ if (U_SUCCESS(*status)) {
+ len = reslen;
+ sink.Append(tmpAppend, len);
+ }
+}
+
+/*
+* -------------------------------------------------
+*
+* ultag_ functions
+*
+* -------------------------------------------------
+*/
+
+/* Bit flags used by the parser */
+#define LANG 0x0001
+#define EXTL 0x0002
+#define SCRT 0x0004
+#define REGN 0x0008
+#define VART 0x0010
+#define EXTS 0x0020
+#define EXTV 0x0040
+#define PRIV 0x0080
+
+/**
+ * Ticket #12705 - The optimizer in Visual Studio 2015 Update 3 has problems optimizing this function.
+ * As a work-around, optimization is disabled for this function on VS2015 and VS2017.
+ * This work-around should be removed once the following versions of Visual Studio are no
+ * longer supported: All versions of VS2015/VS2017, and versions of VS2019 below 16.4.
+ */
+#if defined(_MSC_VER) && (_MSC_VER >= 1900) && (_MSC_VER < 1924)
+#pragma optimize( "", off )
+#endif
+
+static ULanguageTag*
+ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
+ char *tagBuf;
+ int16_t next;
+ char *pSubtag, *pNext, *pLastGoodPosition;
+ int32_t subtagLen;
+ int32_t extlangIdx;
+ ExtensionListEntry *pExtension;
+ char *pExtValueSubtag, *pExtValueSubtagEnd;
+ int32_t i;
+ UBool privateuseVar = FALSE;
+ int32_t legacyLen = 0;
+
+ if (parsedLen != NULL) {
+ *parsedLen = 0;
+ }
+
+ if (U_FAILURE(*status)) {
+ return NULL;
+ }
+
+ if (tagLen < 0) {
+ tagLen = (int32_t)uprv_strlen(tag);
+ }
+
+ /* copy the entire string */
+ tagBuf = (char*)uprv_malloc(tagLen + 1);
+ if (tagBuf == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ uprv_memcpy(tagBuf, tag, tagLen);
+ *(tagBuf + tagLen) = 0;
+
+ /* create a ULanguageTag */
+ icu::LocalULanguageTagPointer t(
+ (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag)));
+ if (t.isNull()) {
+ uprv_free(tagBuf);
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ _initializeULanguageTag(t.getAlias());
+ t->buf = tagBuf;
+
+ if (tagLen < MINLEN) {
+ /* the input tag is too short - return empty ULanguageTag */
+ return t.orphan();
+ }
+
+ size_t parsedLenDelta = 0;
+ // Legacy tag will be consider together. Legacy tag with intervening
+ // script and region such as art-DE-lojban or art-Latn-lojban won't be
+ // matched.
+ /* check if the tag is legacy */
+ for (i = 0; i < UPRV_LENGTHOF(LEGACY); i += 2) {
+ int32_t checkLegacyLen = static_cast<int32_t>(uprv_strlen(LEGACY[i]));
+ if (tagLen < checkLegacyLen) {
+ continue;
+ }
+ if (tagLen > checkLegacyLen && tagBuf[checkLegacyLen] != '-') {
+ // make sure next char is '-'.
+ continue;
+ }
+ if (uprv_strnicmp(LEGACY[i], tagBuf, checkLegacyLen) == 0) {
+ int32_t newTagLength;
+
+ legacyLen = checkLegacyLen; /* back up for output parsedLen */
+ int32_t replacementLen = static_cast<int32_t>(uprv_strlen(LEGACY[i+1]));
+ newTagLength = replacementLen + tagLen - checkLegacyLen;
+ if (tagLen < newTagLength) {
+ uprv_free(tagBuf);
+ tagBuf = (char*)uprv_malloc(newTagLength + 1);
+ if (tagBuf == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ t->buf = tagBuf;
+ tagLen = newTagLength;
+ }
+ parsedLenDelta = checkLegacyLen - replacementLen;
+ uprv_strcpy(t->buf, LEGACY[i + 1]);
+ if (checkLegacyLen != tagLen) {
+ uprv_strcpy(t->buf + replacementLen, tag + checkLegacyLen);
+ }
+ break;
+ }
+ }
+
+ if (legacyLen == 0) {
+ for (i = 0; i < UPRV_LENGTHOF(REDUNDANT); i += 2) {
+ const char* redundantTag = REDUNDANT[i];
+ size_t redundantTagLen = uprv_strlen(redundantTag);
+ // The preferred tag for a redundant tag is always shorter than redundant
+ // tag. A redundant tag may or may not be followed by other subtags.
+ // (i.e. "zh-yue" or "zh-yue-u-co-pinyin").
+ if (uprv_strnicmp(redundantTag, tagBuf, static_cast<uint32_t>(redundantTagLen)) == 0) {
+ const char* redundantTagEnd = tagBuf + redundantTagLen;
+ if (*redundantTagEnd == '\0' || *redundantTagEnd == SEP) {
+ const char* preferredTag = REDUNDANT[i + 1];
+ size_t preferredTagLen = uprv_strlen(preferredTag);
+ uprv_strncpy(t->buf, preferredTag, preferredTagLen);
+ if (*redundantTagEnd == SEP) {
+ uprv_memmove(tagBuf + preferredTagLen,
+ redundantTagEnd,
+ tagLen - redundantTagLen + 1);
+ } else {
+ tagBuf[preferredTagLen] = '\0';
+ }
+ // parsedLen should be the length of the input
+ // before redundantTag is replaced by preferredTag.
+ // Save the delta to add it back later.
+ parsedLenDelta = redundantTagLen - preferredTagLen;
+ break;
+ }
+ }
+ }
+ }
+
+ /*
+ * langtag = language
+ * ["-" script]
+ * ["-" region]
+ * *("-" variant)
+ * *("-" extension)
+ * ["-" privateuse]
+ */
+
+ next = LANG | PRIV;
+ pNext = pLastGoodPosition = tagBuf;
+ extlangIdx = 0;
+ pExtension = NULL;
+ pExtValueSubtag = NULL;
+ pExtValueSubtagEnd = NULL;
+
+ while (pNext) {
+ char *pSep;
+
+ pSubtag = pNext;
+
+ /* locate next separator char */
+ pSep = pSubtag;
+ while (*pSep) {
+ if (*pSep == SEP) {
+ break;
+ }
+ pSep++;
+ }
+ if (*pSep == 0) {
+ /* last subtag */
+ pNext = NULL;
+ } else {
+ pNext = pSep + 1;
+ }
+ subtagLen = (int32_t)(pSep - pSubtag);
+
+ if (next & LANG) {
+ if (ultag_isLanguageSubtag(pSubtag, subtagLen)) {
+ *pSep = 0; /* terminate */
+ // TODO: move deprecated language code handling here.
+ t->language = T_CString_toLowerCase(pSubtag);
+
+ pLastGoodPosition = pSep;
+ next = SCRT | REGN | VART | EXTS | PRIV;
+ if (subtagLen <= 3)
+ next |= EXTL;
+ continue;
+ }
+ }
+ if (next & EXTL) {
+ if (_isExtlangSubtag(pSubtag, subtagLen)) {
+ *pSep = 0;
+ t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
+
+ pLastGoodPosition = pSep;
+ if (extlangIdx < 3) {
+ next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
+ } else {
+ next = SCRT | REGN | VART | EXTS | PRIV;
+ }
+ continue;
+ }
+ }
+ if (next & SCRT) {
+ if (ultag_isScriptSubtag(pSubtag, subtagLen)) {
+ char *p = pSubtag;
+
+ *pSep = 0;
+
+ /* to title case */
+ *p = uprv_toupper(*p);
+ p++;
+ for (; *p; p++) {
+ *p = uprv_tolower(*p);
+ }
+
+ t->script = pSubtag;
+
+ pLastGoodPosition = pSep;
+ next = REGN | VART | EXTS | PRIV;
+ continue;
+ }
+ }
+ if (next & REGN) {
+ if (ultag_isRegionSubtag(pSubtag, subtagLen)) {
+ *pSep = 0;
+ // TODO: move deprecated region code handling here.
+ t->region = T_CString_toUpperCase(pSubtag);
+
+ pLastGoodPosition = pSep;
+ next = VART | EXTS | PRIV;
+ continue;
+ }
+ }
+ if (next & VART) {
+ if (_isVariantSubtag(pSubtag, subtagLen) ||
+ (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) {
+ VariantListEntry *var;
+ UBool isAdded;
+
+ var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
+ if (var == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ *pSep = 0;
+ var->variant = T_CString_toUpperCase(pSubtag);
+ isAdded = _addVariantToList(&(t->variants), var);
+ if (!isAdded) {
+ /* duplicated variant entry */
+ uprv_free(var);
+ break;
+ }
+ pLastGoodPosition = pSep;
+ next = VART | EXTS | PRIV;
+ continue;
+ }
+ }
+ if (next & EXTS) {
+ if (_isExtensionSingleton(pSubtag, subtagLen)) {
+ if (pExtension != NULL) {
+ if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
+ /* the previous extension is incomplete */
+ uprv_free(pExtension);
+ pExtension = NULL;
+ break;
+ }
+
+ /* terminate the previous extension value */
+ *pExtValueSubtagEnd = 0;
+ pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
+
+ /* insert the extension to the list */
+ if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
+ pLastGoodPosition = pExtValueSubtagEnd;
+ } else {
+ /* stop parsing here */
+ uprv_free(pExtension);
+ pExtension = NULL;
+ break;
+ }
+ }
+
+ /* create a new extension */
+ pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
+ if (pExtension == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ *pSep = 0;
+ pExtension->key = T_CString_toLowerCase(pSubtag);
+ pExtension->value = NULL; /* will be set later */
+
+ /*
+ * reset the start and the end location of extension value
+ * subtags for this extension
+ */
+ pExtValueSubtag = NULL;
+ pExtValueSubtagEnd = NULL;
+
+ next = EXTV;
+ continue;
+ }
+ }
+ if (next & EXTV) {
+ if (_isExtensionSubtag(pSubtag, subtagLen)) {
+ if (pExtValueSubtag == NULL) {
+ /* if the start postion of this extension's value is not yet,
+ this one is the first value subtag */
+ pExtValueSubtag = pSubtag;
+ }
+
+ /* Mark the end of this subtag */
+ pExtValueSubtagEnd = pSep;
+ next = EXTS | EXTV | PRIV;
+
+ continue;
+ }
+ }
+ if (next & PRIV) {
+ if (uprv_tolower(*pSubtag) == PRIVATEUSE && subtagLen == 1) {
+ char *pPrivuseVal;
+
+ if (pExtension != NULL) {
+ /* Process the last extension */
+ if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
+ /* the previous extension is incomplete */
+ uprv_free(pExtension);
+ pExtension = NULL;
+ break;
+ } else {
+ /* terminate the previous extension value */
+ *pExtValueSubtagEnd = 0;
+ pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
+
+ /* insert the extension to the list */
+ if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
+ pLastGoodPosition = pExtValueSubtagEnd;
+ pExtension = NULL;
+ } else {
+ /* stop parsing here */
+ uprv_free(pExtension);
+ pExtension = NULL;
+ break;
+ }
+ }
+ }
+
+ /* The rest of part will be private use value subtags */
+ if (pNext == NULL) {
+ /* empty private use subtag */
+ break;
+ }
+ /* back up the private use value start position */
+ pPrivuseVal = pNext;
+
+ /* validate private use value subtags */
+ while (pNext) {
+ pSubtag = pNext;
+ pSep = pSubtag;
+ while (*pSep) {
+ if (*pSep == SEP) {
+ break;
+ }
+ pSep++;
+ }
+ if (*pSep == 0) {
+ /* last subtag */
+ pNext = NULL;
+ } else {
+ pNext = pSep + 1;
+ }
+ subtagLen = (int32_t)(pSep - pSubtag);
+
+ if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) {
+ *pSep = 0;
+ next = VART;
+ privateuseVar = TRUE;
+ break;
+ } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
+ pLastGoodPosition = pSep;
+ } else {
+ break;
+ }
+ }
+
+ if (next == VART) {
+ continue;
+ }
+
+ if (pLastGoodPosition - pPrivuseVal > 0) {
+ *pLastGoodPosition = 0;
+ t->privateuse = T_CString_toLowerCase(pPrivuseVal);
+ }
+ /* No more subtags, exiting the parse loop */
+ break;
+ }
+ break;
+ }
+
+ /* If we fell through here, it means this subtag is illegal - quit parsing */
+ break;
+ }
+
+ if (pExtension != NULL) {
+ /* Process the last extension */
+ if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
+ /* the previous extension is incomplete */
+ uprv_free(pExtension);
+ } else {
+ /* terminate the previous extension value */
+ *pExtValueSubtagEnd = 0;
+ pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
+ /* insert the extension to the list */
+ if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
+ pLastGoodPosition = pExtValueSubtagEnd;
+ } else {
+ uprv_free(pExtension);
+ }
+ }
+ }
+
+ if (parsedLen != NULL) {
+ *parsedLen = (int32_t)(pLastGoodPosition - t->buf + parsedLenDelta);
+ }
+
+ return t.orphan();
+}
+
+// Ticket #12705 - Turn optimization back on.
+#if defined(_MSC_VER) && (_MSC_VER >= 1900) && (_MSC_VER < 1924)
+#pragma optimize( "", on )
+#endif
+
+static void
+ultag_close(ULanguageTag* langtag) {
+
+ if (langtag == NULL) {
+ return;
+ }
+
+ uprv_free(langtag->buf);
+
+ if (langtag->variants) {
+ VariantListEntry *curVar = langtag->variants;
+ while (curVar) {
+ VariantListEntry *nextVar = curVar->next;
+ uprv_free(curVar);
+ curVar = nextVar;
+ }
+ }
+
+ if (langtag->extensions) {
+ ExtensionListEntry *curExt = langtag->extensions;
+ while (curExt) {
+ ExtensionListEntry *nextExt = curExt->next;
+ uprv_free(curExt);
+ curExt = nextExt;
+ }
+ }
+
+ uprv_free(langtag);
+}
+
+static const char*
+ultag_getLanguage(const ULanguageTag* langtag) {
+ return langtag->language;
+}
+
+#if 0
+static const char*
+ultag_getJDKLanguage(const ULanguageTag* langtag) {
+ int32_t i;
+ for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
+ if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
+ return DEPRECATEDLANGS[i + 1];
+ }
+ }
+ return langtag->language;
+}
+#endif
+
+static const char*
+ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
+ if (idx >= 0 && idx < MAXEXTLANG) {
+ return langtag->extlang[idx];
+ }
+ return NULL;
+}
+
+static int32_t
+ultag_getExtlangSize(const ULanguageTag* langtag) {
+ int32_t size = 0;
+ int32_t i;
+ for (i = 0; i < MAXEXTLANG; i++) {
+ if (langtag->extlang[i]) {
+ size++;
+ }
+ }
+ return size;
+}
+
+static const char*
+ultag_getScript(const ULanguageTag* langtag) {
+ return langtag->script;
+}
+
+static const char*
+ultag_getRegion(const ULanguageTag* langtag) {
+ return langtag->region;
+}
+
+static const char*
+ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
+ const char *var = NULL;
+ VariantListEntry *cur = langtag->variants;
+ int32_t i = 0;
+ while (cur) {
+ if (i == idx) {
+ var = cur->variant;
+ break;
+ }
+ cur = cur->next;
+ i++;
+ }
+ return var;
+}
+
+static int32_t
+ultag_getVariantsSize(const ULanguageTag* langtag) {
+ int32_t size = 0;
+ VariantListEntry *cur = langtag->variants;
+ while (TRUE) {
+ if (cur == NULL) {
+ break;
+ }
+ size++;
+ cur = cur->next;
+ }
+ return size;
+}
+
+static const char*
+ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
+ const char *key = NULL;
+ ExtensionListEntry *cur = langtag->extensions;
+ int32_t i = 0;
+ while (cur) {
+ if (i == idx) {
+ key = cur->key;
+ break;
+ }
+ cur = cur->next;
+ i++;
+ }
+ return key;
+}
+
+static const char*
+ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
+ const char *val = NULL;
+ ExtensionListEntry *cur = langtag->extensions;
+ int32_t i = 0;
+ while (cur) {
+ if (i == idx) {
+ val = cur->value;
+ break;
+ }
+ cur = cur->next;
+ i++;
+ }
+ return val;
+}
+
+static int32_t
+ultag_getExtensionsSize(const ULanguageTag* langtag) {
+ int32_t size = 0;
+ ExtensionListEntry *cur = langtag->extensions;
+ while (TRUE) {
+ if (cur == NULL) {
+ break;
+ }
+ size++;
+ cur = cur->next;
+ }
+ return size;
+}
+
+static const char*
+ultag_getPrivateUse(const ULanguageTag* langtag) {
+ return langtag->privateuse;
+}
+
+#if 0
+static const char*
+ultag_getLegacy(const ULanguageTag* langtag) {
+ return langtag->legacy;
+}
+#endif
+
+
+/*
+* -------------------------------------------------
+*
+* Locale/BCP47 conversion APIs, exposed as uloc_*
+*
+* -------------------------------------------------
+*/
+U_CAPI int32_t U_EXPORT2
+uloc_toLanguageTag(const char* localeID,
+ char* langtag,
+ int32_t langtagCapacity,
+ UBool strict,
+ UErrorCode* status) {
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+
+ icu::CheckedArrayByteSink sink(langtag, langtagCapacity);
+ ulocimp_toLanguageTag(localeID, sink, strict, status);
+
+ int32_t reslen = sink.NumberOfBytesAppended();
+
+ if (U_FAILURE(*status)) {
+ return reslen;
+ }
+
+ if (sink.Overflowed()) {
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ } else {
+ u_terminateChars(langtag, langtagCapacity, reslen, status);
+ }
+
+ return reslen;
+}
+
+
+U_CAPI void U_EXPORT2
+ulocimp_toLanguageTag(const char* localeID,
+ icu::ByteSink& sink,
+ UBool strict,
+ UErrorCode* status) {
+ icu::CharString canonical;
+ int32_t reslen;
+ UErrorCode tmpStatus = U_ZERO_ERROR;
+ UBool hadPosix = FALSE;
+ const char* pKeywordStart;
+
+ /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
+ int32_t resultCapacity = static_cast<int32_t>(uprv_strlen(localeID));
+ if (resultCapacity > 0) {
+ char* buffer;
+
+ for (;;) {
+ buffer = canonical.getAppendBuffer(
+ /*minCapacity=*/resultCapacity,
+ /*desiredCapacityHint=*/resultCapacity,
+ resultCapacity,
+ tmpStatus);
+
+ if (U_FAILURE(tmpStatus)) {
+ *status = tmpStatus;
+ return;
+ }
+
+ reslen =
+ uloc_canonicalize(localeID, buffer, resultCapacity, &tmpStatus);
+
+ if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
+ break;
+ }
+
+ resultCapacity = reslen;
+ tmpStatus = U_ZERO_ERROR;
+ }
+
+ if (U_FAILURE(tmpStatus)) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ canonical.append(buffer, reslen, tmpStatus);
+ if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
+ tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString.
+ }
+
+ if (U_FAILURE(tmpStatus)) {
+ *status = tmpStatus;
+ return;
+ }
+ }
+
+ /* For handling special case - private use only tag */
+ pKeywordStart = locale_getKeywordsStart(canonical.data());
+ if (pKeywordStart == canonical.data()) {
+ int kwdCnt = 0;
+ UBool done = FALSE;
+
+ icu::LocalUEnumerationPointer kwdEnum(uloc_openKeywords(canonical.data(), &tmpStatus));
+ if (U_SUCCESS(tmpStatus)) {
+ kwdCnt = uenum_count(kwdEnum.getAlias(), &tmpStatus);
+ if (kwdCnt == 1) {
+ const char *key;
+ int32_t len = 0;
+
+ key = uenum_next(kwdEnum.getAlias(), &len, &tmpStatus);
+ if (len == 1 && *key == PRIVATEUSE) {
+ icu::CharString buf;
+ {
+ icu::CharStringByteSink sink(&buf);
+ ulocimp_getKeywordValue(localeID, key, sink, &tmpStatus);
+ }
+ if (U_SUCCESS(tmpStatus)) {
+ if (ultag_isPrivateuseValueSubtags(buf.data(), buf.length())) {
+ /* return private use only tag */
+ static const char PREFIX[] = { PRIVATEUSE, SEP };
+ sink.Append(PREFIX, sizeof(PREFIX));
+ sink.Append(buf.data(), buf.length());
+ done = TRUE;
+ } else if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ done = TRUE;
+ }
+ /* if not strict mode, then "und" will be returned */
+ } else {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ done = TRUE;
+ }
+ }
+ }
+ if (done) {
+ return;
+ }
+ }
+ }
+
+ _appendLanguageToLanguageTag(canonical.data(), sink, strict, status);
+ _appendScriptToLanguageTag(canonical.data(), sink, strict, status);
+ _appendRegionToLanguageTag(canonical.data(), sink, strict, status);
+ _appendVariantsToLanguageTag(canonical.data(), sink, strict, &hadPosix, status);
+ _appendKeywordsToLanguageTag(canonical.data(), sink, strict, hadPosix, status);
+ _appendPrivateuseToLanguageTag(canonical.data(), sink, strict, hadPosix, status);
+}
+
+
+U_CAPI int32_t U_EXPORT2
+uloc_forLanguageTag(const char* langtag,
+ char* localeID,
+ int32_t localeIDCapacity,
+ int32_t* parsedLength,
+ UErrorCode* status) {
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+
+ icu::CheckedArrayByteSink sink(localeID, localeIDCapacity);
+ ulocimp_forLanguageTag(langtag, -1, sink, parsedLength, status);
+
+ int32_t reslen = sink.NumberOfBytesAppended();
+
+ if (U_FAILURE(*status)) {
+ return reslen;
+ }
+
+ if (sink.Overflowed()) {
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ } else {
+ u_terminateChars(localeID, localeIDCapacity, reslen, status);
+ }
+
+ return reslen;
+}
+
+
+U_CAPI void U_EXPORT2
+ulocimp_forLanguageTag(const char* langtag,
+ int32_t tagLen,
+ icu::ByteSink& sink,
+ int32_t* parsedLength,
+ UErrorCode* status) {
+ UBool isEmpty = TRUE;
+ const char *subtag, *p;
+ int32_t len;
+ int32_t i, n;
+ UBool noRegion = TRUE;
+
+ icu::LocalULanguageTagPointer lt(ultag_parse(langtag, tagLen, parsedLength, status));
+ if (U_FAILURE(*status)) {
+ return;
+ }
+
+ /* language */
+ subtag = ultag_getExtlangSize(lt.getAlias()) > 0 ? ultag_getExtlang(lt.getAlias(), 0) : ultag_getLanguage(lt.getAlias());
+ if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
+ len = (int32_t)uprv_strlen(subtag);
+ if (len > 0) {
+ sink.Append(subtag, len);
+ isEmpty = FALSE;
+ }
+ }
+
+ /* script */
+ subtag = ultag_getScript(lt.getAlias());
+ len = (int32_t)uprv_strlen(subtag);
+ if (len > 0) {
+ sink.Append("_", 1);
+ isEmpty = FALSE;
+
+ /* write out the script in title case */
+ char c = uprv_toupper(*subtag);
+ sink.Append(&c, 1);
+ sink.Append(subtag + 1, len - 1);
+ }
+
+ /* region */
+ subtag = ultag_getRegion(lt.getAlias());
+ len = (int32_t)uprv_strlen(subtag);
+ if (len > 0) {
+ sink.Append("_", 1);
+ isEmpty = FALSE;
+
+ /* write out the region in upper case */
+ p = subtag;
+ while (*p) {
+ char c = uprv_toupper(*p);
+ sink.Append(&c, 1);
+ p++;
+ }
+ noRegion = FALSE;
+ }
+
+ /* variants */
+ _sortVariants(lt.getAlias()->variants);
+ n = ultag_getVariantsSize(lt.getAlias());
+ if (n > 0) {
+ if (noRegion) {
+ sink.Append("_", 1);
+ isEmpty = FALSE;
+ }
+
+ for (i = 0; i < n; i++) {
+ subtag = ultag_getVariant(lt.getAlias(), i);
+ sink.Append("_", 1);
+
+ /* write out the variant in upper case */
+ p = subtag;
+ while (*p) {
+ char c = uprv_toupper(*p);
+ sink.Append(&c, 1);
+ p++;
+ }
+ }
+ }
+
+ /* keywords */
+ n = ultag_getExtensionsSize(lt.getAlias());
+ subtag = ultag_getPrivateUse(lt.getAlias());
+ if (n > 0 || uprv_strlen(subtag) > 0) {
+ if (isEmpty && n > 0) {
+ /* need a language */
+ sink.Append(LANG_UND, LANG_UND_LEN);
+ }
+ _appendKeywords(lt.getAlias(), sink, status);
+ }
+}
diff --git a/thirdparty/icu4c/common/ulocimp.h b/thirdparty/icu4c/common/ulocimp.h
new file mode 100644
index 0000000000..5691fe9a77
--- /dev/null
+++ b/thirdparty/icu4c/common/ulocimp.h
@@ -0,0 +1,307 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2004-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+
+#ifndef ULOCIMP_H
+#define ULOCIMP_H
+
+#include "unicode/bytestream.h"
+#include "unicode/uloc.h"
+
+#include "charstr.h"
+
+/**
+ * Create an iterator over the specified keywords list
+ * @param keywordList double-null terminated list. Will be copied.
+ * @param keywordListSize size in bytes of keywordList
+ * @param status err code
+ * @return enumeration (owned by caller) of the keyword list.
+ * @internal ICU 3.0
+ */
+U_CAPI UEnumeration* U_EXPORT2
+uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status);
+
+/**
+ * Look up a resource bundle table item with fallback on the table level.
+ * This is accessible so it can be called by C++ code.
+ */
+U_CAPI const UChar * U_EXPORT2
+uloc_getTableStringWithFallback(
+ const char *path,
+ const char *locale,
+ const char *tableKey,
+ const char *subTableKey,
+ const char *itemKey,
+ int32_t *pLength,
+ UErrorCode *pErrorCode);
+
+/*returns true if a is an ID separator false otherwise*/
+#define _isIDSeparator(a) (a == '_' || a == '-')
+
+U_CFUNC const char*
+uloc_getCurrentCountryID(const char* oldID);
+
+U_CFUNC const char*
+uloc_getCurrentLanguageID(const char* oldID);
+
+U_CFUNC void
+ulocimp_getKeywords(const char *localeID,
+ char prev,
+ icu::ByteSink& sink,
+ UBool valuesToo,
+ UErrorCode *status);
+
+icu::CharString U_EXPORT2
+ulocimp_getLanguage(const char *localeID,
+ const char **pEnd,
+ UErrorCode &status);
+
+icu::CharString U_EXPORT2
+ulocimp_getScript(const char *localeID,
+ const char **pEnd,
+ UErrorCode &status);
+
+icu::CharString U_EXPORT2
+ulocimp_getCountry(const char *localeID,
+ const char **pEnd,
+ UErrorCode &status);
+
+U_CAPI void U_EXPORT2
+ulocimp_getName(const char* localeID,
+ icu::ByteSink& sink,
+ UErrorCode* err);
+
+U_CAPI void U_EXPORT2
+ulocimp_getBaseName(const char* localeID,
+ icu::ByteSink& sink,
+ UErrorCode* err);
+
+U_CAPI void U_EXPORT2
+ulocimp_canonicalize(const char* localeID,
+ icu::ByteSink& sink,
+ UErrorCode* err);
+
+U_CAPI void U_EXPORT2
+ulocimp_getKeywordValue(const char* localeID,
+ const char* keywordName,
+ icu::ByteSink& sink,
+ UErrorCode* status);
+
+/**
+ * Writes a well-formed language tag for this locale ID.
+ *
+ * **Note**: When `strict` is false, any locale fields which do not satisfy the
+ * BCP47 syntax requirement will be omitted from the result. When `strict` is
+ * true, this function sets U_ILLEGAL_ARGUMENT_ERROR to the `err` if any locale
+ * fields do not satisfy the BCP47 syntax requirement.
+ *
+ * @param localeID the input locale ID
+ * @param sink the output sink receiving the BCP47 language
+ * tag for this Locale.
+ * @param strict boolean value indicating if the function returns
+ * an error for an ill-formed input locale ID.
+ * @param err error information if receiving the language
+ * tag failed.
+ * @return The length of the BCP47 language tag.
+ *
+ * @internal ICU 64
+ */
+U_CAPI void U_EXPORT2
+ulocimp_toLanguageTag(const char* localeID,
+ icu::ByteSink& sink,
+ UBool strict,
+ UErrorCode* err);
+
+/**
+ * Returns a locale ID for the specified BCP47 language tag string.
+ * If the specified language tag contains any ill-formed subtags,
+ * the first such subtag and all following subtags are ignored.
+ * <p>
+ * This implements the 'Language-Tag' production of BCP 47, and so
+ * supports legacy language tags (marked as “Type: grandfathered†in BCP 47)
+ * (regular and irregular) as well as private use language tags.
+ *
+ * Private use tags are represented as 'x-whatever',
+ * and legacy tags are converted to their canonical replacements where they exist.
+ *
+ * Note that a few legacy tags have no modern replacement;
+ * these will be converted using the fallback described in
+ * the first paragraph, so some information might be lost.
+ *
+ * @param langtag the input BCP47 language tag.
+ * @param tagLen the length of langtag, or -1 to call uprv_strlen().
+ * @param sink the output sink receiving a locale ID for the
+ * specified BCP47 language tag.
+ * @param parsedLength if not NULL, successfully parsed length
+ * for the input language tag is set.
+ * @param err error information if receiving the locald ID
+ * failed.
+ * @internal ICU 63
+ */
+U_CAPI void U_EXPORT2
+ulocimp_forLanguageTag(const char* langtag,
+ int32_t tagLen,
+ icu::ByteSink& sink,
+ int32_t* parsedLength,
+ UErrorCode* err);
+
+/**
+ * Get the region to use for supplemental data lookup. Uses
+ * (1) any region specified by locale tag "rg"; if none then
+ * (2) any unicode_region_tag in the locale ID; if none then
+ * (3) if inferRegion is true, the region suggested by
+ * getLikelySubtags on the localeID.
+ * If no region is found, returns length 0.
+ *
+ * @param localeID
+ * The complete locale ID (with keywords) from which
+ * to get the region to use for supplemental data.
+ * @param inferRegion
+ * If true, will try to infer region from localeID if
+ * no other region is found.
+ * @param region
+ * Buffer in which to put the region ID found; should
+ * have a capacity at least ULOC_COUNTRY_CAPACITY.
+ * @param regionCapacity
+ * The actual capacity of the region buffer.
+ * @param status
+ * Pointer to in/out UErrorCode value for latest status.
+ * @return
+ * The length of any region code found, or 0 if none.
+ * @internal ICU 57
+ */
+U_CAPI int32_t U_EXPORT2
+ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
+ char *region, int32_t regionCapacity, UErrorCode* status);
+
+/**
+ * Add the likely subtags for a provided locale ID, per the algorithm described
+ * in the following CLDR technical report:
+ *
+ * http://www.unicode.org/reports/tr35/#Likely_Subtags
+ *
+ * If localeID is already in the maximal form, or there is no data available
+ * for maximization, it will be copied to the output buffer. For example,
+ * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
+ *
+ * Examples:
+ *
+ * "en" maximizes to "en_Latn_US"
+ *
+ * "de" maximizes to "de_Latn_US"
+ *
+ * "sr" maximizes to "sr_Cyrl_RS"
+ *
+ * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
+ *
+ * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
+ *
+ * @param localeID The locale to maximize
+ * @param sink The output sink receiving the maximized locale
+ * @param err Error information if maximizing the locale failed. If the length
+ * of the localeID and the null-terminator is greater than the maximum allowed size,
+ * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
+ * @internal ICU 64
+ */
+U_CAPI void U_EXPORT2
+ulocimp_addLikelySubtags(const char* localeID,
+ icu::ByteSink& sink,
+ UErrorCode* err);
+
+/**
+ * Minimize the subtags for a provided locale ID, per the algorithm described
+ * in the following CLDR technical report:
+ *
+ * http://www.unicode.org/reports/tr35/#Likely_Subtags
+ *
+ * If localeID is already in the minimal form, or there is no data available
+ * for minimization, it will be copied to the output buffer. Since the
+ * minimization algorithm relies on proper maximization, see the comments
+ * for ulocimp_addLikelySubtags for reasons why there might not be any data.
+ *
+ * Examples:
+ *
+ * "en_Latn_US" minimizes to "en"
+ *
+ * "de_Latn_US" minimizes to "de"
+ *
+ * "sr_Cyrl_RS" minimizes to "sr"
+ *
+ * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
+ * script, and minimizing to "zh" would imply "zh_Hans_CN".)
+ *
+ * @param localeID The locale to minimize
+ * @param sink The output sink receiving the maximized locale
+ * @param err Error information if minimizing the locale failed. If the length
+ * of the localeID and the null-terminator is greater than the maximum allowed size,
+ * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
+ * @internal ICU 64
+ */
+U_CAPI void U_EXPORT2
+ulocimp_minimizeSubtags(const char* localeID,
+ icu::ByteSink& sink,
+ UErrorCode* err);
+
+U_CAPI const char * U_EXPORT2
+locale_getKeywordsStart(const char *localeID);
+
+U_CFUNC UBool
+ultag_isExtensionSubtags(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isLanguageSubtag(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isPrivateuseValueSubtags(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isRegionSubtag(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isScriptSubtag(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isTransformedExtensionSubtags(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isUnicodeExtensionSubtags(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isUnicodeLocaleAttribute(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isUnicodeLocaleAttributes(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isUnicodeLocaleKey(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isUnicodeLocaleType(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isVariantSubtags(const char* s, int32_t len);
+
+U_CFUNC const char*
+ulocimp_toBcpKey(const char* key);
+
+U_CFUNC const char*
+ulocimp_toLegacyKey(const char* key);
+
+U_CFUNC const char*
+ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
+
+U_CFUNC const char*
+ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
+
+/* Function for testing purpose */
+U_CAPI const char* const* ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* length);
+
+// Return true if the value is already canonicalized.
+U_CAPI bool ulocimp_isCanonicalizedLocaleForTest(const char* localeName);
+
+#endif
diff --git a/thirdparty/icu4c/common/umapfile.cpp b/thirdparty/icu4c/common/umapfile.cpp
new file mode 100644
index 0000000000..3e714876a4
--- /dev/null
+++ b/thirdparty/icu4c/common/umapfile.cpp
@@ -0,0 +1,530 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************/
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Memory mapped file wrappers for use by the ICU Data Implementation
+ * All of the platform-specific implementation for mapping data files
+ * is here. The rest of the ICU Data implementation uses only the
+ * wrapper functions.
+ *
+ *----------------------------------------------------------------------------*/
+/* Defines _XOPEN_SOURCE for access to POSIX functions.
+ * Must be before any other #includes. */
+#include "uposixdefs.h"
+
+#include "unicode/putil.h"
+#include "unicode/ustring.h"
+#include "udatamem.h"
+#include "umapfile.h"
+
+/* memory-mapping base definitions ------------------------------------------ */
+
+#if MAP_IMPLEMENTATION==MAP_WIN32
+#ifndef WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN
+#endif
+# define VC_EXTRALEAN
+# define NOUSER
+# define NOSERVICE
+# define NOIME
+# define NOMCX
+
+# if U_PLATFORM_HAS_WINUWP_API == 1
+ // Some previous versions of the Windows 10 SDK don't expose various APIs for UWP applications
+ // to use, even though UWP apps are allowed to call and use them. Temporarily change the
+ // WINAPI family partition below to Desktop, so that function declarations are visible for UWP.
+# include <winapifamily.h>
+# if !(WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_SYSTEM))
+# pragma push_macro("WINAPI_PARTITION_DESKTOP")
+# undef WINAPI_PARTITION_DESKTOP
+# define WINAPI_PARTITION_DESKTOP 1
+# define CHANGED_WINAPI_PARTITION_DESKTOP_VALUE
+# endif
+# endif
+
+# include <windows.h>
+
+# if U_PLATFORM_HAS_WINUWP_API == 1 && defined(CHANGED_WINAPI_PARTITION_DESKTOP_VALUE)
+# pragma pop_macro("WINAPI_PARTITION_DESKTOP")
+# endif
+
+# include "cmemory.h"
+
+typedef HANDLE MemoryMap;
+
+# define IS_MAP(map) ((map)!=nullptr)
+
+#elif MAP_IMPLEMENTATION==MAP_POSIX || MAP_IMPLEMENTATION==MAP_390DLL
+ typedef size_t MemoryMap;
+
+# define IS_MAP(map) ((map)!=0)
+
+# include <unistd.h>
+# include <sys/mman.h>
+# include <sys/stat.h>
+# include <fcntl.h>
+
+# ifndef MAP_FAILED
+# define MAP_FAILED ((void*)-1)
+# endif
+
+# if MAP_IMPLEMENTATION==MAP_390DLL
+ /* No memory mapping for 390 batch mode. Fake it using dll loading. */
+# include <dll.h>
+# include "cstring.h"
+# include "cmemory.h"
+# include "unicode/udata.h"
+# define LIB_PREFIX "lib"
+# define LIB_SUFFIX ".dll"
+ /* This is inconvenient until we figure out what to do with U_ICUDATA_NAME in utypes.h */
+# define U_ICUDATA_ENTRY_NAME "icudt" U_ICU_VERSION_SHORT U_LIB_SUFFIX_C_NAME_STRING "_dat"
+# endif
+#elif MAP_IMPLEMENTATION==MAP_STDIO
+# include <stdio.h>
+# include "cmemory.h"
+
+ typedef void *MemoryMap;
+
+# define IS_MAP(map) ((map)!=nullptr)
+#endif
+
+/*----------------------------------------------------------------------------*
+ * *
+ * Memory Mapped File support. Platform dependent implementation of *
+ * functions used by the rest of the implementation.*
+ * *
+ *----------------------------------------------------------------------------*/
+#if MAP_IMPLEMENTATION==MAP_NONE
+ U_CFUNC UBool
+ uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
+ if (U_FAILURE(*status)) {
+ return FALSE;
+ }
+ UDataMemory_init(pData); /* Clear the output struct. */
+ return FALSE; /* no file access */
+ }
+
+ U_CFUNC void uprv_unmapFile(UDataMemory *pData) {
+ /* nothing to do */
+ }
+#elif MAP_IMPLEMENTATION==MAP_WIN32
+ U_CFUNC UBool
+ uprv_mapFile(
+ UDataMemory *pData, /* Fill in with info on the result doing the mapping. */
+ /* Output only; any original contents are cleared. */
+ const char *path, /* File path to be opened/mapped. */
+ UErrorCode *status /* Error status, used to report out-of-memory errors. */
+ )
+ {
+ if (U_FAILURE(*status)) {
+ return FALSE;
+ }
+
+ HANDLE map = nullptr;
+ HANDLE file = INVALID_HANDLE_VALUE;
+
+ UDataMemory_init(pData); /* Clear the output struct. */
+
+ /* open the input file */
+#if U_PLATFORM_HAS_WINUWP_API == 0
+ // Note: In the non-UWP code-path (ie: Win32), the value of the path variable might have come from
+ // the CRT 'getenv' function, and would be therefore be encoded in the default ANSI code page.
+ // This means that we can't call the *W version of API below, whereas in the UWP code-path
+ // there is no 'getenv' call, and thus the string will be only UTF-8/Invariant characters.
+ file=CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, nullptr,
+ OPEN_EXISTING,
+ FILE_ATTRIBUTE_NORMAL|FILE_FLAG_RANDOM_ACCESS, nullptr);
+#else
+ // Convert from UTF-8 string to UTF-16 string.
+ wchar_t utf16Path[MAX_PATH];
+ int32_t pathUtf16Len = 0;
+ u_strFromUTF8(reinterpret_cast<UChar*>(utf16Path), static_cast<int32_t>(UPRV_LENGTHOF(utf16Path)), &pathUtf16Len, path, -1, status);
+
+ if (U_FAILURE(*status)) {
+ return FALSE;
+ }
+ if (*status == U_STRING_NOT_TERMINATED_WARNING) {
+ // Report back an error instead of a warning.
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ return FALSE;
+ }
+
+ file = CreateFileW(utf16Path, GENERIC_READ, FILE_SHARE_READ, nullptr,
+ OPEN_EXISTING,
+ FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS, nullptr);
+#endif
+ if (file == INVALID_HANDLE_VALUE) {
+ // If we failed to open the file due to an out-of-memory error, then we want
+ // to report that error back to the caller.
+ if (HRESULT_FROM_WIN32(GetLastError()) == E_OUTOFMEMORY) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ return FALSE;
+ }
+
+ // Note: We use NULL/nullptr for lpAttributes parameter below.
+ // This means our handle cannot be inherited and we will get the default security descriptor.
+ /* create an unnamed Windows file-mapping object for the specified file */
+ map = CreateFileMappingW(file, nullptr, PAGE_READONLY, 0, 0, nullptr);
+
+ CloseHandle(file);
+ if (map == nullptr) {
+ // If we failed to create the mapping due to an out-of-memory error, then
+ // we want to report that error back to the caller.
+ if (HRESULT_FROM_WIN32(GetLastError()) == E_OUTOFMEMORY) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ return FALSE;
+ }
+
+ /* map a view of the file into our address space */
+ pData->pHeader = reinterpret_cast<const DataHeader *>(MapViewOfFile(map, FILE_MAP_READ, 0, 0, 0));
+ if (pData->pHeader == nullptr) {
+ CloseHandle(map);
+ return FALSE;
+ }
+ pData->map = map;
+ return TRUE;
+ }
+
+ U_CFUNC void
+ uprv_unmapFile(UDataMemory *pData) {
+ if (pData != nullptr && pData->map != nullptr) {
+ UnmapViewOfFile(pData->pHeader);
+ CloseHandle(pData->map);
+ pData->pHeader = nullptr;
+ pData->map = nullptr;
+ }
+ }
+
+
+
+#elif MAP_IMPLEMENTATION==MAP_POSIX
+ U_CFUNC UBool
+ uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
+ int fd;
+ int length;
+ struct stat mystat;
+ void *data;
+
+ if (U_FAILURE(*status)) {
+ return FALSE;
+ }
+
+ UDataMemory_init(pData); /* Clear the output struct. */
+
+ /* determine the length of the file */
+ if(stat(path, &mystat)!=0 || mystat.st_size<=0) {
+ return FALSE;
+ }
+ length=mystat.st_size;
+
+ /* open the file */
+ fd=open(path, O_RDONLY);
+ if(fd==-1) {
+ return FALSE;
+ }
+
+ /* get a view of the mapping */
+#if U_PLATFORM != U_PF_HPUX
+ data=mmap(0, length, PROT_READ, MAP_SHARED, fd, 0);
+#else
+ data=mmap(0, length, PROT_READ, MAP_PRIVATE, fd, 0);
+#endif
+ close(fd); /* no longer needed */
+ if(data==MAP_FAILED) {
+ // Possibly check the errno value for ENOMEM, and report U_MEMORY_ALLOCATION_ERROR?
+ return FALSE;
+ }
+
+ pData->map = (char *)data + length;
+ pData->pHeader=(const DataHeader *)data;
+ pData->mapAddr = data;
+#if U_PLATFORM == U_PF_IPHONE
+ posix_madvise(data, length, POSIX_MADV_RANDOM);
+#endif
+ return TRUE;
+ }
+
+ U_CFUNC void
+ uprv_unmapFile(UDataMemory *pData) {
+ if(pData!=nullptr && pData->map!=nullptr) {
+ size_t dataLen = (char *)pData->map - (char *)pData->mapAddr;
+ if(munmap(pData->mapAddr, dataLen)==-1) {
+ }
+ pData->pHeader=nullptr;
+ pData->map=0;
+ pData->mapAddr=nullptr;
+ }
+ }
+
+
+
+#elif MAP_IMPLEMENTATION==MAP_STDIO
+ /* copy of the filestrm.c/T_FileStream_size() implementation */
+ static int32_t
+ umap_fsize(FILE *f) {
+ int32_t savedPos = ftell(f);
+ int32_t size = 0;
+
+ /*Changes by Bertrand A. D. doesn't affect the current position
+ goes to the end of the file before ftell*/
+ fseek(f, 0, SEEK_END);
+ size = (int32_t)ftell(f);
+ fseek(f, savedPos, SEEK_SET);
+ return size;
+ }
+
+ U_CFUNC UBool
+ uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
+ FILE *file;
+ int32_t fileLength;
+ void *p;
+
+ if (U_FAILURE(*status)) {
+ return FALSE;
+ }
+
+ UDataMemory_init(pData); /* Clear the output struct. */
+ /* open the input file */
+ file=fopen(path, "rb");
+ if(file==nullptr) {
+ return FALSE;
+ }
+
+ /* get the file length */
+ fileLength=umap_fsize(file);
+ if(ferror(file) || fileLength<=20) {
+ fclose(file);
+ return FALSE;
+ }
+
+ /* allocate the memory to hold the file data */
+ p=uprv_malloc(fileLength);
+ if(p==nullptr) {
+ fclose(file);
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return FALSE;
+ }
+
+ /* read the file */
+ if(fileLength!=fread(p, 1, fileLength, file)) {
+ uprv_free(p);
+ fclose(file);
+ return FALSE;
+ }
+
+ fclose(file);
+ pData->map=p;
+ pData->pHeader=(const DataHeader *)p;
+ pData->mapAddr=p;
+ return TRUE;
+ }
+
+ U_CFUNC void
+ uprv_unmapFile(UDataMemory *pData) {
+ if(pData!=nullptr && pData->map!=nullptr) {
+ uprv_free(pData->map);
+ pData->map = nullptr;
+ pData->mapAddr = nullptr;
+ pData->pHeader = nullptr;
+ }
+ }
+
+
+#elif MAP_IMPLEMENTATION==MAP_390DLL
+ /* 390 specific Library Loading.
+ * This is the only platform left that dynamically loads an ICU Data Library.
+ * All other platforms use .data files when dynamic loading is required, but
+ * this turn out to be awkward to support in 390 batch mode.
+ *
+ * The idea here is to hide the fact that 390 is using dll loading from the
+ * rest of ICU, and make it look like there is file loading happening.
+ *
+ */
+
+ static char *strcpy_returnEnd(char *dest, const char *src)
+ {
+ while((*dest=*src)!=0) {
+ ++dest;
+ ++src;
+ }
+ return dest;
+ }
+
+ /*------------------------------------------------------------------------------
+ *
+ * computeDirPath given a user-supplied path of an item to be opened,
+ * compute and return
+ * - the full directory path to be used
+ * when opening the file.
+ * - Pointer to null at end of above returned path
+ *
+ * Parameters:
+ * path: input path. Buffer is not altered.
+ * pathBuffer: Output buffer. Any contents are overwritten.
+ *
+ * Returns:
+ * Pointer to null termination in returned pathBuffer.
+ *
+ * TODO: This works the way ICU historically has, but the
+ * whole data fallback search path is so complicated that
+ * probably almost no one will ever really understand it,
+ * the potential for confusion is large. (It's not just
+ * this one function, but the whole scheme.)
+ *
+ *------------------------------------------------------------------------------*/
+ static char *uprv_computeDirPath(const char *path, char *pathBuffer)
+ {
+ char *finalSlash; /* Ptr to last dir separator in input path, or null if none. */
+ int32_t pathLen; /* Length of the returned directory path */
+
+ finalSlash = 0;
+ if (path != 0) {
+ finalSlash = uprv_strrchr(path, U_FILE_SEP_CHAR);
+ }
+
+ *pathBuffer = 0;
+ if (finalSlash == 0) {
+ /* No user-supplied path.
+ * Copy the ICU_DATA path to the path buffer and return that*/
+ const char *icuDataDir;
+ icuDataDir=u_getDataDirectory();
+ if(icuDataDir!=nullptr && *icuDataDir!=0) {
+ return strcpy_returnEnd(pathBuffer, icuDataDir);
+ } else {
+ /* there is no icuDataDir either. Just return the empty pathBuffer. */
+ return pathBuffer;
+ }
+ }
+
+ /* User supplied path did contain a directory portion.
+ * Copy it to the output path buffer */
+ pathLen = (int32_t)(finalSlash - path + 1);
+ uprv_memcpy(pathBuffer, path, pathLen);
+ *(pathBuffer+pathLen) = 0;
+ return pathBuffer+pathLen;
+ }
+
+
+# define DATA_TYPE "dat"
+
+ U_CFUNC UBool uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
+ const char *inBasename;
+ char *basename;
+ char pathBuffer[1024];
+ const DataHeader *pHeader;
+ dllhandle *handle;
+ void *val=0;
+
+ if (U_FAILURE(*status)) {
+ return FALSE;
+ }
+
+ inBasename=uprv_strrchr(path, U_FILE_SEP_CHAR);
+ if(inBasename==nullptr) {
+ inBasename = path;
+ } else {
+ inBasename++;
+ }
+ basename=uprv_computeDirPath(path, pathBuffer);
+ if(uprv_strcmp(inBasename, U_ICUDATA_NAME".dat") != 0) {
+ /* must mmap file... for build */
+ int fd;
+ int length;
+ struct stat mystat;
+ void *data;
+ UDataMemory_init(pData); /* Clear the output struct. */
+
+ /* determine the length of the file */
+ if(stat(path, &mystat)!=0 || mystat.st_size<=0) {
+ return FALSE;
+ }
+ length=mystat.st_size;
+
+ /* open the file */
+ fd=open(path, O_RDONLY);
+ if(fd==-1) {
+ return FALSE;
+ }
+
+ /* get a view of the mapping */
+ data=mmap(0, length, PROT_READ, MAP_PRIVATE, fd, 0);
+ close(fd); /* no longer needed */
+ if(data==MAP_FAILED) {
+ // Possibly check the errorno value for ENOMEM, and report U_MEMORY_ALLOCATION_ERROR?
+ return FALSE;
+ }
+ pData->map = (char *)data + length;
+ pData->pHeader=(const DataHeader *)data;
+ pData->mapAddr = data;
+ return TRUE;
+ }
+
+# ifdef OS390BATCH
+ /* ### hack: we still need to get u_getDataDirectory() fixed
+ for OS/390 (batch mode - always return "//"? )
+ and this here straightened out with LIB_PREFIX and LIB_SUFFIX (both empty?!)
+ This is probably due to the strange file system on OS/390. It's more like
+ a database with short entry names than a typical file system. */
+ /* U_ICUDATA_NAME should always have the correct name */
+ /* BUT FOR BATCH MODE IT IS AN EXCEPTION BECAUSE */
+ /* THE FIRST THREE LETTERS ARE PREASSIGNED TO THE */
+ /* PROJECT!!!!! */
+ uprv_strcpy(pathBuffer, "IXMI" U_ICU_VERSION_SHORT "DA");
+# else
+ /* set up the library name */
+ uprv_strcpy(basename, LIB_PREFIX U_LIBICUDATA_NAME U_ICU_VERSION_SHORT LIB_SUFFIX);
+# endif
+
+# ifdef UDATA_DEBUG
+ fprintf(stderr, "dllload: %s ", pathBuffer);
+# endif
+
+ handle=dllload(pathBuffer);
+
+# ifdef UDATA_DEBUG
+ fprintf(stderr, " -> %08X\n", handle );
+# endif
+
+ if(handle != nullptr) {
+ /* we have a data DLL - what kind of lookup do we need here? */
+ /* try to find the Table of Contents */
+ UDataMemory_init(pData); /* Clear the output struct. */
+ val=dllqueryvar((dllhandle*)handle, U_ICUDATA_ENTRY_NAME);
+ if(val == 0) {
+ /* failed... so keep looking */
+ return FALSE;
+ }
+# ifdef UDATA_DEBUG
+ fprintf(stderr, "dllqueryvar(%08X, %s) -> %08X\n", handle, U_ICUDATA_ENTRY_NAME, val);
+# endif
+
+ pData->pHeader=(const DataHeader *)val;
+ return TRUE;
+ } else {
+ return FALSE; /* no handle */
+ }
+ }
+
+ U_CFUNC void uprv_unmapFile(UDataMemory *pData) {
+ if(pData!=nullptr && pData->map!=nullptr) {
+ uprv_free(pData->map);
+ pData->map = nullptr;
+ pData->mapAddr = nullptr;
+ pData->pHeader = nullptr;
+ }
+ }
+
+#else
+# error MAP_IMPLEMENTATION is set incorrectly
+#endif
diff --git a/thirdparty/icu4c/common/umapfile.h b/thirdparty/icu4c/common/umapfile.h
new file mode 100644
index 0000000000..92bd567a2a
--- /dev/null
+++ b/thirdparty/icu4c/common/umapfile.h
@@ -0,0 +1,57 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************/
+
+/*----------------------------------------------------------------------------------
+ *
+ * Memory mapped file wrappers for use by the ICU Data Implementation
+ *
+ * Porting note: The implementation of these functions is very platform specific.
+ * Not all platforms can do real memory mapping. Those that can't
+ * still must implement these functions, getting the data into memory using
+ * whatever means are available.
+ *
+ * These functions are part of the ICU internal implementation, and
+ * are not inteded to be used directly by applications.
+ *
+ *----------------------------------------------------------------------------------*/
+
+#ifndef __UMAPFILE_H__
+#define __UMAPFILE_H__
+
+#include "unicode/putil.h"
+#include "unicode/udata.h"
+#include "putilimp.h"
+
+U_CFUNC UBool uprv_mapFile(UDataMemory *pdm, const char *path, UErrorCode *status);
+U_CFUNC void uprv_unmapFile(UDataMemory *pData);
+
+/* MAP_NONE: no memory mapping, no file access at all */
+#define MAP_NONE 0
+#define MAP_WIN32 1
+#define MAP_POSIX 2
+#define MAP_STDIO 3
+#define MAP_390DLL 4
+
+#if UCONFIG_NO_FILE_IO
+# define MAP_IMPLEMENTATION MAP_NONE
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+# define MAP_IMPLEMENTATION MAP_WIN32
+#elif U_HAVE_MMAP || U_PLATFORM == U_PF_OS390
+# if U_PLATFORM == U_PF_OS390 && defined (OS390_STUBDATA)
+ /* No memory mapping for 390 batch mode. Fake it using dll loading. */
+# define MAP_IMPLEMENTATION MAP_390DLL
+# else
+# define MAP_IMPLEMENTATION MAP_POSIX
+# endif
+#else /* unknown platform, no memory map implementation: use stdio.h and uprv_malloc() instead */
+# define MAP_IMPLEMENTATION MAP_STDIO
+#endif
+
+#endif
diff --git a/thirdparty/icu4c/common/umath.cpp b/thirdparty/icu4c/common/umath.cpp
new file mode 100644
index 0000000000..7cf4b31749
--- /dev/null
+++ b/thirdparty/icu4c/common/umath.cpp
@@ -0,0 +1,26 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2006, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* This file contains platform independent math.
+*/
+
+#include "putilimp.h"
+
+U_CAPI int32_t U_EXPORT2
+uprv_max(int32_t x, int32_t y)
+{
+ return (x > y ? x : y);
+}
+
+U_CAPI int32_t U_EXPORT2
+uprv_min(int32_t x, int32_t y)
+{
+ return (x > y ? y : x);
+}
+
diff --git a/thirdparty/icu4c/common/umutablecptrie.cpp b/thirdparty/icu4c/common/umutablecptrie.cpp
new file mode 100644
index 0000000000..cdbe27080b
--- /dev/null
+++ b/thirdparty/icu4c/common/umutablecptrie.cpp
@@ -0,0 +1,1852 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// umutablecptrie.cpp (inspired by utrie2_builder.cpp)
+// created: 2017dec29 Markus W. Scherer
+
+// #define UCPTRIE_DEBUG
+#ifdef UCPTRIE_DEBUG
+# include <stdio.h>
+#endif
+
+#include "unicode/utypes.h"
+#include "unicode/ucptrie.h"
+#include "unicode/umutablecptrie.h"
+#include "unicode/uobject.h"
+#include "unicode/utf16.h"
+#include "cmemory.h"
+#include "uassert.h"
+#include "ucptrie_impl.h"
+
+// ICU-20235 In case Microsoft math.h has defined this, undefine it.
+#ifdef OVERFLOW
+#undef OVERFLOW
+#endif
+
+U_NAMESPACE_BEGIN
+
+namespace {
+
+constexpr int32_t MAX_UNICODE = 0x10ffff;
+
+constexpr int32_t UNICODE_LIMIT = 0x110000;
+constexpr int32_t BMP_LIMIT = 0x10000;
+constexpr int32_t ASCII_LIMIT = 0x80;
+
+constexpr int32_t I_LIMIT = UNICODE_LIMIT >> UCPTRIE_SHIFT_3;
+constexpr int32_t BMP_I_LIMIT = BMP_LIMIT >> UCPTRIE_SHIFT_3;
+constexpr int32_t ASCII_I_LIMIT = ASCII_LIMIT >> UCPTRIE_SHIFT_3;
+
+constexpr int32_t SMALL_DATA_BLOCKS_PER_BMP_BLOCK = (1 << (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3));
+
+// Flag values for data blocks.
+constexpr uint8_t ALL_SAME = 0;
+constexpr uint8_t MIXED = 1;
+constexpr uint8_t SAME_AS = 2;
+
+/** Start with allocation of 16k data entries. */
+constexpr int32_t INITIAL_DATA_LENGTH = ((int32_t)1 << 14);
+
+/** Grow about 8x each time. */
+constexpr int32_t MEDIUM_DATA_LENGTH = ((int32_t)1 << 17);
+
+/**
+ * Maximum length of the build-time data array.
+ * One entry per 0x110000 code points.
+ */
+constexpr int32_t MAX_DATA_LENGTH = UNICODE_LIMIT;
+
+// Flag values for index-3 blocks while compacting/building.
+constexpr uint8_t I3_NULL = 0;
+constexpr uint8_t I3_BMP = 1;
+constexpr uint8_t I3_16 = 2;
+constexpr uint8_t I3_18 = 3;
+
+constexpr int32_t INDEX_3_18BIT_BLOCK_LENGTH = UCPTRIE_INDEX_3_BLOCK_LENGTH + UCPTRIE_INDEX_3_BLOCK_LENGTH / 8;
+
+class AllSameBlocks;
+class MixedBlocks;
+
+class MutableCodePointTrie : public UMemory {
+public:
+ MutableCodePointTrie(uint32_t initialValue, uint32_t errorValue, UErrorCode &errorCode);
+ MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode);
+ MutableCodePointTrie(const MutableCodePointTrie &other) = delete;
+ ~MutableCodePointTrie();
+
+ MutableCodePointTrie &operator=(const MutableCodePointTrie &other) = delete;
+
+ static MutableCodePointTrie *fromUCPMap(const UCPMap *map, UErrorCode &errorCode);
+ static MutableCodePointTrie *fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode);
+
+ uint32_t get(UChar32 c) const;
+ int32_t getRange(UChar32 start, UCPMapValueFilter *filter, const void *context,
+ uint32_t *pValue) const;
+
+ void set(UChar32 c, uint32_t value, UErrorCode &errorCode);
+ void setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode);
+
+ UCPTrie *build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode);
+
+private:
+ void clear();
+
+ bool ensureHighStart(UChar32 c);
+ int32_t allocDataBlock(int32_t blockLength);
+ int32_t getDataBlock(int32_t i);
+
+ void maskValues(uint32_t mask);
+ UChar32 findHighStart() const;
+ int32_t compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks);
+ int32_t compactData(
+ int32_t fastILimit, uint32_t *newData, int32_t newDataCapacity,
+ int32_t dataNullIndex, MixedBlocks &mixedBlocks, UErrorCode &errorCode);
+ int32_t compactIndex(int32_t fastILimit, MixedBlocks &mixedBlocks, UErrorCode &errorCode);
+ int32_t compactTrie(int32_t fastILimit, UErrorCode &errorCode);
+
+ uint32_t *index = nullptr;
+ int32_t indexCapacity = 0;
+ int32_t index3NullOffset = -1;
+ uint32_t *data = nullptr;
+ int32_t dataCapacity = 0;
+ int32_t dataLength = 0;
+ int32_t dataNullOffset = -1;
+
+ uint32_t origInitialValue;
+ uint32_t initialValue;
+ uint32_t errorValue;
+ UChar32 highStart;
+ uint32_t highValue;
+#ifdef UCPTRIE_DEBUG
+public:
+ const char *name;
+#endif
+private:
+ /** Temporary array while building the final data. */
+ uint16_t *index16 = nullptr;
+ uint8_t flags[UNICODE_LIMIT >> UCPTRIE_SHIFT_3];
+};
+
+MutableCodePointTrie::MutableCodePointTrie(uint32_t iniValue, uint32_t errValue, UErrorCode &errorCode) :
+ origInitialValue(iniValue), initialValue(iniValue), errorValue(errValue),
+ highStart(0), highValue(initialValue)
+#ifdef UCPTRIE_DEBUG
+ , name("open")
+#endif
+ {
+ if (U_FAILURE(errorCode)) { return; }
+ index = (uint32_t *)uprv_malloc(BMP_I_LIMIT * 4);
+ data = (uint32_t *)uprv_malloc(INITIAL_DATA_LENGTH * 4);
+ if (index == nullptr || data == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ indexCapacity = BMP_I_LIMIT;
+ dataCapacity = INITIAL_DATA_LENGTH;
+}
+
+MutableCodePointTrie::MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode) :
+ index3NullOffset(other.index3NullOffset),
+ dataNullOffset(other.dataNullOffset),
+ origInitialValue(other.origInitialValue), initialValue(other.initialValue),
+ errorValue(other.errorValue),
+ highStart(other.highStart), highValue(other.highValue)
+#ifdef UCPTRIE_DEBUG
+ , name("mutable clone")
+#endif
+ {
+ if (U_FAILURE(errorCode)) { return; }
+ int32_t iCapacity = highStart <= BMP_LIMIT ? BMP_I_LIMIT : I_LIMIT;
+ index = (uint32_t *)uprv_malloc(iCapacity * 4);
+ data = (uint32_t *)uprv_malloc(other.dataCapacity * 4);
+ if (index == nullptr || data == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ indexCapacity = iCapacity;
+ dataCapacity = other.dataCapacity;
+
+ int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
+ uprv_memcpy(flags, other.flags, iLimit);
+ uprv_memcpy(index, other.index, iLimit * 4);
+ uprv_memcpy(data, other.data, (size_t)other.dataLength * 4);
+ dataLength = other.dataLength;
+ U_ASSERT(other.index16 == nullptr);
+}
+
+MutableCodePointTrie::~MutableCodePointTrie() {
+ uprv_free(index);
+ uprv_free(data);
+ uprv_free(index16);
+}
+
+MutableCodePointTrie *MutableCodePointTrie::fromUCPMap(const UCPMap *map, UErrorCode &errorCode) {
+ // Use the highValue as the initialValue to reduce the highStart.
+ uint32_t errorValue = ucpmap_get(map, -1);
+ uint32_t initialValue = ucpmap_get(map, 0x10ffff);
+ LocalPointer<MutableCodePointTrie> mutableTrie(
+ new MutableCodePointTrie(initialValue, errorValue, errorCode),
+ errorCode);
+ if (U_FAILURE(errorCode)) {
+ return nullptr;
+ }
+ UChar32 start = 0, end;
+ uint32_t value;
+ while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0,
+ nullptr, nullptr, &value)) >= 0) {
+ if (value != initialValue) {
+ if (start == end) {
+ mutableTrie->set(start, value, errorCode);
+ } else {
+ mutableTrie->setRange(start, end, value, errorCode);
+ }
+ }
+ start = end + 1;
+ }
+ if (U_SUCCESS(errorCode)) {
+ return mutableTrie.orphan();
+ } else {
+ return nullptr;
+ }
+}
+
+MutableCodePointTrie *MutableCodePointTrie::fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode) {
+ // Use the highValue as the initialValue to reduce the highStart.
+ uint32_t errorValue;
+ uint32_t initialValue;
+ switch (trie->valueWidth) {
+ case UCPTRIE_VALUE_BITS_16:
+ errorValue = trie->data.ptr16[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET];
+ initialValue = trie->data.ptr16[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET];
+ break;
+ case UCPTRIE_VALUE_BITS_32:
+ errorValue = trie->data.ptr32[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET];
+ initialValue = trie->data.ptr32[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET];
+ break;
+ case UCPTRIE_VALUE_BITS_8:
+ errorValue = trie->data.ptr8[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET];
+ initialValue = trie->data.ptr8[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET];
+ break;
+ default:
+ // Unreachable if the trie is properly initialized.
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return nullptr;
+ }
+ LocalPointer<MutableCodePointTrie> mutableTrie(
+ new MutableCodePointTrie(initialValue, errorValue, errorCode),
+ errorCode);
+ if (U_FAILURE(errorCode)) {
+ return nullptr;
+ }
+ UChar32 start = 0, end;
+ uint32_t value;
+ while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0,
+ nullptr, nullptr, &value)) >= 0) {
+ if (value != initialValue) {
+ if (start == end) {
+ mutableTrie->set(start, value, errorCode);
+ } else {
+ mutableTrie->setRange(start, end, value, errorCode);
+ }
+ }
+ start = end + 1;
+ }
+ if (U_SUCCESS(errorCode)) {
+ return mutableTrie.orphan();
+ } else {
+ return nullptr;
+ }
+}
+
+void MutableCodePointTrie::clear() {
+ index3NullOffset = dataNullOffset = -1;
+ dataLength = 0;
+ highValue = initialValue = origInitialValue;
+ highStart = 0;
+ uprv_free(index16);
+ index16 = nullptr;
+}
+
+uint32_t MutableCodePointTrie::get(UChar32 c) const {
+ if ((uint32_t)c > MAX_UNICODE) {
+ return errorValue;
+ }
+ if (c >= highStart) {
+ return highValue;
+ }
+ int32_t i = c >> UCPTRIE_SHIFT_3;
+ if (flags[i] == ALL_SAME) {
+ return index[i];
+ } else {
+ return data[index[i] + (c & UCPTRIE_SMALL_DATA_MASK)];
+ }
+}
+
+inline uint32_t maybeFilterValue(uint32_t value, uint32_t initialValue, uint32_t nullValue,
+ UCPMapValueFilter *filter, const void *context) {
+ if (value == initialValue) {
+ value = nullValue;
+ } else if (filter != nullptr) {
+ value = filter(context, value);
+ }
+ return value;
+}
+
+UChar32 MutableCodePointTrie::getRange(
+ UChar32 start, UCPMapValueFilter *filter, const void *context,
+ uint32_t *pValue) const {
+ if ((uint32_t)start > MAX_UNICODE) {
+ return U_SENTINEL;
+ }
+ if (start >= highStart) {
+ if (pValue != nullptr) {
+ uint32_t value = highValue;
+ if (filter != nullptr) { value = filter(context, value); }
+ *pValue = value;
+ }
+ return MAX_UNICODE;
+ }
+ uint32_t nullValue = initialValue;
+ if (filter != nullptr) { nullValue = filter(context, nullValue); }
+ UChar32 c = start;
+ uint32_t trieValue, value;
+ bool haveValue = false;
+ int32_t i = c >> UCPTRIE_SHIFT_3;
+ do {
+ if (flags[i] == ALL_SAME) {
+ uint32_t trieValue2 = index[i];
+ if (haveValue) {
+ if (trieValue2 != trieValue) {
+ if (filter == nullptr ||
+ maybeFilterValue(trieValue2, initialValue, nullValue,
+ filter, context) != value) {
+ return c - 1;
+ }
+ trieValue = trieValue2; // may or may not help
+ }
+ } else {
+ trieValue = trieValue2;
+ value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context);
+ if (pValue != nullptr) { *pValue = value; }
+ haveValue = true;
+ }
+ c = (c + UCPTRIE_SMALL_DATA_BLOCK_LENGTH) & ~UCPTRIE_SMALL_DATA_MASK;
+ } else /* MIXED */ {
+ int32_t di = index[i] + (c & UCPTRIE_SMALL_DATA_MASK);
+ uint32_t trieValue2 = data[di];
+ if (haveValue) {
+ if (trieValue2 != trieValue) {
+ if (filter == nullptr ||
+ maybeFilterValue(trieValue2, initialValue, nullValue,
+ filter, context) != value) {
+ return c - 1;
+ }
+ trieValue = trieValue2; // may or may not help
+ }
+ } else {
+ trieValue = trieValue2;
+ value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context);
+ if (pValue != nullptr) { *pValue = value; }
+ haveValue = true;
+ }
+ while ((++c & UCPTRIE_SMALL_DATA_MASK) != 0) {
+ trieValue2 = data[++di];
+ if (trieValue2 != trieValue) {
+ if (filter == nullptr ||
+ maybeFilterValue(trieValue2, initialValue, nullValue,
+ filter, context) != value) {
+ return c - 1;
+ }
+ }
+ trieValue = trieValue2; // may or may not help
+ }
+ }
+ ++i;
+ } while (c < highStart);
+ U_ASSERT(haveValue);
+ if (maybeFilterValue(highValue, initialValue, nullValue,
+ filter, context) != value) {
+ return c - 1;
+ } else {
+ return MAX_UNICODE;
+ }
+}
+
+void
+writeBlock(uint32_t *block, uint32_t value) {
+ uint32_t *limit = block + UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
+ while (block < limit) {
+ *block++ = value;
+ }
+}
+
+bool MutableCodePointTrie::ensureHighStart(UChar32 c) {
+ if (c >= highStart) {
+ // Round up to a UCPTRIE_CP_PER_INDEX_2_ENTRY boundary to simplify compaction.
+ c = (c + UCPTRIE_CP_PER_INDEX_2_ENTRY) & ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1);
+ int32_t i = highStart >> UCPTRIE_SHIFT_3;
+ int32_t iLimit = c >> UCPTRIE_SHIFT_3;
+ if (iLimit > indexCapacity) {
+ uint32_t *newIndex = (uint32_t *)uprv_malloc(I_LIMIT * 4);
+ if (newIndex == nullptr) { return false; }
+ uprv_memcpy(newIndex, index, i * 4);
+ uprv_free(index);
+ index = newIndex;
+ indexCapacity = I_LIMIT;
+ }
+ do {
+ flags[i] = ALL_SAME;
+ index[i] = initialValue;
+ } while(++i < iLimit);
+ highStart = c;
+ }
+ return true;
+}
+
+int32_t MutableCodePointTrie::allocDataBlock(int32_t blockLength) {
+ int32_t newBlock = dataLength;
+ int32_t newTop = newBlock + blockLength;
+ if (newTop > dataCapacity) {
+ int32_t capacity;
+ if (dataCapacity < MEDIUM_DATA_LENGTH) {
+ capacity = MEDIUM_DATA_LENGTH;
+ } else if (dataCapacity < MAX_DATA_LENGTH) {
+ capacity = MAX_DATA_LENGTH;
+ } else {
+ // Should never occur.
+ // Either MAX_DATA_LENGTH is incorrect,
+ // or the code writes more values than should be possible.
+ return -1;
+ }
+ uint32_t *newData = (uint32_t *)uprv_malloc(capacity * 4);
+ if (newData == nullptr) {
+ return -1;
+ }
+ uprv_memcpy(newData, data, (size_t)dataLength * 4);
+ uprv_free(data);
+ data = newData;
+ dataCapacity = capacity;
+ }
+ dataLength = newTop;
+ return newBlock;
+}
+
+/**
+ * No error checking for illegal arguments.
+ *
+ * @return -1 if no new data block available (out of memory in data array)
+ * @internal
+ */
+int32_t MutableCodePointTrie::getDataBlock(int32_t i) {
+ if (flags[i] == MIXED) {
+ return index[i];
+ }
+ if (i < BMP_I_LIMIT) {
+ int32_t newBlock = allocDataBlock(UCPTRIE_FAST_DATA_BLOCK_LENGTH);
+ if (newBlock < 0) { return newBlock; }
+ int32_t iStart = i & ~(SMALL_DATA_BLOCKS_PER_BMP_BLOCK -1);
+ int32_t iLimit = iStart + SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
+ do {
+ U_ASSERT(flags[iStart] == ALL_SAME);
+ writeBlock(data + newBlock, index[iStart]);
+ flags[iStart] = MIXED;
+ index[iStart++] = newBlock;
+ newBlock += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
+ } while (iStart < iLimit);
+ return index[i];
+ } else {
+ int32_t newBlock = allocDataBlock(UCPTRIE_SMALL_DATA_BLOCK_LENGTH);
+ if (newBlock < 0) { return newBlock; }
+ writeBlock(data + newBlock, index[i]);
+ flags[i] = MIXED;
+ index[i] = newBlock;
+ return newBlock;
+ }
+}
+
+void MutableCodePointTrie::set(UChar32 c, uint32_t value, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
+ if ((uint32_t)c > MAX_UNICODE) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ int32_t block;
+ if (!ensureHighStart(c) || (block = getDataBlock(c >> UCPTRIE_SHIFT_3)) < 0) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ data[block + (c & UCPTRIE_SMALL_DATA_MASK)] = value;
+}
+
+void
+fillBlock(uint32_t *block, UChar32 start, UChar32 limit, uint32_t value) {
+ uint32_t *pLimit = block + limit;
+ block += start;
+ while (block < pLimit) {
+ *block++ = value;
+ }
+}
+
+void MutableCodePointTrie::setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
+ if ((uint32_t)start > MAX_UNICODE || (uint32_t)end > MAX_UNICODE || start > end) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ if (!ensureHighStart(end)) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ UChar32 limit = end + 1;
+ if (start & UCPTRIE_SMALL_DATA_MASK) {
+ // Set partial block at [start..following block boundary[.
+ int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3);
+ if (block < 0) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ UChar32 nextStart = (start + UCPTRIE_SMALL_DATA_MASK) & ~UCPTRIE_SMALL_DATA_MASK;
+ if (nextStart <= limit) {
+ fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, UCPTRIE_SMALL_DATA_BLOCK_LENGTH,
+ value);
+ start = nextStart;
+ } else {
+ fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, limit & UCPTRIE_SMALL_DATA_MASK,
+ value);
+ return;
+ }
+ }
+
+ // Number of positions in the last, partial block.
+ int32_t rest = limit & UCPTRIE_SMALL_DATA_MASK;
+
+ // Round down limit to a block boundary.
+ limit &= ~UCPTRIE_SMALL_DATA_MASK;
+
+ // Iterate over all-value blocks.
+ while (start < limit) {
+ int32_t i = start >> UCPTRIE_SHIFT_3;
+ if (flags[i] == ALL_SAME) {
+ index[i] = value;
+ } else /* MIXED */ {
+ fillBlock(data + index[i], 0, UCPTRIE_SMALL_DATA_BLOCK_LENGTH, value);
+ }
+ start += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
+ }
+
+ if (rest > 0) {
+ // Set partial block at [last block boundary..limit[.
+ int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3);
+ if (block < 0) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ fillBlock(data + block, 0, rest, value);
+ }
+}
+
+/* compaction --------------------------------------------------------------- */
+
+void MutableCodePointTrie::maskValues(uint32_t mask) {
+ initialValue &= mask;
+ errorValue &= mask;
+ highValue &= mask;
+ int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
+ for (int32_t i = 0; i < iLimit; ++i) {
+ if (flags[i] == ALL_SAME) {
+ index[i] &= mask;
+ }
+ }
+ for (int32_t i = 0; i < dataLength; ++i) {
+ data[i] &= mask;
+ }
+}
+
+template<typename UIntA, typename UIntB>
+bool equalBlocks(const UIntA *s, const UIntB *t, int32_t length) {
+ while (length > 0 && *s == *t) {
+ ++s;
+ ++t;
+ --length;
+ }
+ return length == 0;
+}
+
+bool allValuesSameAs(const uint32_t *p, int32_t length, uint32_t value) {
+ const uint32_t *pLimit = p + length;
+ while (p < pLimit && *p == value) { ++p; }
+ return p == pLimit;
+}
+
+/** Search for an identical block. */
+int32_t findSameBlock(const uint16_t *p, int32_t pStart, int32_t length,
+ const uint16_t *q, int32_t qStart, int32_t blockLength) {
+ // Ensure that we do not even partially get past length.
+ length -= blockLength;
+
+ q += qStart;
+ while (pStart <= length) {
+ if (equalBlocks(p + pStart, q, blockLength)) {
+ return pStart;
+ }
+ ++pStart;
+ }
+ return -1;
+}
+
+int32_t findAllSameBlock(const uint32_t *p, int32_t start, int32_t limit,
+ uint32_t value, int32_t blockLength) {
+ // Ensure that we do not even partially get past limit.
+ limit -= blockLength;
+
+ for (int32_t block = start; block <= limit; ++block) {
+ if (p[block] == value) {
+ for (int32_t i = 1;; ++i) {
+ if (i == blockLength) {
+ return block;
+ }
+ if (p[block + i] != value) {
+ block += i;
+ break;
+ }
+ }
+ }
+ }
+ return -1;
+}
+
+/**
+ * Look for maximum overlap of the beginning of the other block
+ * with the previous, adjacent block.
+ */
+template<typename UIntA, typename UIntB>
+int32_t getOverlap(const UIntA *p, int32_t length,
+ const UIntB *q, int32_t qStart, int32_t blockLength) {
+ int32_t overlap = blockLength - 1;
+ U_ASSERT(overlap <= length);
+ q += qStart;
+ while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) {
+ --overlap;
+ }
+ return overlap;
+}
+
+int32_t getAllSameOverlap(const uint32_t *p, int32_t length, uint32_t value,
+ int32_t blockLength) {
+ int32_t min = length - (blockLength - 1);
+ int32_t i = length;
+ while (min < i && p[i - 1] == value) { --i; }
+ return length - i;
+}
+
+bool isStartOfSomeFastBlock(uint32_t dataOffset, const uint32_t index[], int32_t fastILimit) {
+ for (int32_t i = 0; i < fastILimit; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) {
+ if (index[i] == dataOffset) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/**
+ * Finds the start of the last range in the trie by enumerating backward.
+ * Indexes for code points higher than this will be omitted.
+ */
+UChar32 MutableCodePointTrie::findHighStart() const {
+ int32_t i = highStart >> UCPTRIE_SHIFT_3;
+ while (i > 0) {
+ bool match;
+ if (flags[--i] == ALL_SAME) {
+ match = index[i] == highValue;
+ } else /* MIXED */ {
+ const uint32_t *p = data + index[i];
+ for (int32_t j = 0;; ++j) {
+ if (j == UCPTRIE_SMALL_DATA_BLOCK_LENGTH) {
+ match = true;
+ break;
+ }
+ if (p[j] != highValue) {
+ match = false;
+ break;
+ }
+ }
+ }
+ if (!match) {
+ return (i + 1) << UCPTRIE_SHIFT_3;
+ }
+ }
+ return 0;
+}
+
+class AllSameBlocks {
+public:
+ static constexpr int32_t NEW_UNIQUE = -1;
+ static constexpr int32_t OVERFLOW = -2;
+
+ AllSameBlocks() : length(0), mostRecent(-1) {}
+
+ int32_t findOrAdd(int32_t index, int32_t count, uint32_t value) {
+ if (mostRecent >= 0 && values[mostRecent] == value) {
+ refCounts[mostRecent] += count;
+ return indexes[mostRecent];
+ }
+ for (int32_t i = 0; i < length; ++i) {
+ if (values[i] == value) {
+ mostRecent = i;
+ refCounts[i] += count;
+ return indexes[i];
+ }
+ }
+ if (length == CAPACITY) {
+ return OVERFLOW;
+ }
+ mostRecent = length;
+ indexes[length] = index;
+ values[length] = value;
+ refCounts[length++] = count;
+ return NEW_UNIQUE;
+ }
+
+ /** Replaces the block which has the lowest reference count. */
+ void add(int32_t index, int32_t count, uint32_t value) {
+ U_ASSERT(length == CAPACITY);
+ int32_t least = -1;
+ int32_t leastCount = I_LIMIT;
+ for (int32_t i = 0; i < length; ++i) {
+ U_ASSERT(values[i] != value);
+ if (refCounts[i] < leastCount) {
+ least = i;
+ leastCount = refCounts[i];
+ }
+ }
+ U_ASSERT(least >= 0);
+ mostRecent = least;
+ indexes[least] = index;
+ values[least] = value;
+ refCounts[least] = count;
+ }
+
+ int32_t findMostUsed() const {
+ if (length == 0) { return -1; }
+ int32_t max = -1;
+ int32_t maxCount = 0;
+ for (int32_t i = 0; i < length; ++i) {
+ if (refCounts[i] > maxCount) {
+ max = i;
+ maxCount = refCounts[i];
+ }
+ }
+ return indexes[max];
+ }
+
+private:
+ static constexpr int32_t CAPACITY = 32;
+
+ int32_t length;
+ int32_t mostRecent;
+
+ int32_t indexes[CAPACITY];
+ uint32_t values[CAPACITY];
+ int32_t refCounts[CAPACITY];
+};
+
+// Custom hash table for mixed-value blocks to be found anywhere in the
+// compacted data or index so far.
+class MixedBlocks {
+public:
+ MixedBlocks() {}
+ ~MixedBlocks() {
+ uprv_free(table);
+ }
+
+ bool init(int32_t maxLength, int32_t newBlockLength) {
+ // We store actual data indexes + 1 to reserve 0 for empty entries.
+ int32_t maxDataIndex = maxLength - newBlockLength + 1;
+ int32_t newLength;
+ if (maxDataIndex <= 0xfff) { // 4k
+ newLength = 6007;
+ shift = 12;
+ mask = 0xfff;
+ } else if (maxDataIndex <= 0x7fff) { // 32k
+ newLength = 50021;
+ shift = 15;
+ mask = 0x7fff;
+ } else if (maxDataIndex <= 0x1ffff) { // 128k
+ newLength = 200003;
+ shift = 17;
+ mask = 0x1ffff;
+ } else {
+ // maxDataIndex up to around MAX_DATA_LENGTH, ca. 1.1M
+ newLength = 1500007;
+ shift = 21;
+ mask = 0x1fffff;
+ }
+ if (newLength > capacity) {
+ uprv_free(table);
+ table = (uint32_t *)uprv_malloc(newLength * 4);
+ if (table == nullptr) {
+ return false;
+ }
+ capacity = newLength;
+ }
+ length = newLength;
+ uprv_memset(table, 0, length * 4);
+
+ blockLength = newBlockLength;
+ return true;
+ }
+
+ template<typename UInt>
+ void extend(const UInt *data, int32_t minStart, int32_t prevDataLength, int32_t newDataLength) {
+ int32_t start = prevDataLength - blockLength;
+ if (start >= minStart) {
+ ++start; // Skip the last block that we added last time.
+ } else {
+ start = minStart; // Begin with the first full block.
+ }
+ for (int32_t end = newDataLength - blockLength; start <= end; ++start) {
+ uint32_t hashCode = makeHashCode(data, start);
+ addEntry(data, start, hashCode, start);
+ }
+ }
+
+ template<typename UIntA, typename UIntB>
+ int32_t findBlock(const UIntA *data, const UIntB *blockData, int32_t blockStart) const {
+ uint32_t hashCode = makeHashCode(blockData, blockStart);
+ int32_t entryIndex = findEntry(data, blockData, blockStart, hashCode);
+ if (entryIndex >= 0) {
+ return (table[entryIndex] & mask) - 1;
+ } else {
+ return -1;
+ }
+ }
+
+ int32_t findAllSameBlock(const uint32_t *data, uint32_t blockValue) const {
+ uint32_t hashCode = makeHashCode(blockValue);
+ int32_t entryIndex = findEntry(data, blockValue, hashCode);
+ if (entryIndex >= 0) {
+ return (table[entryIndex] & mask) - 1;
+ } else {
+ return -1;
+ }
+ }
+
+private:
+ template<typename UInt>
+ uint32_t makeHashCode(const UInt *blockData, int32_t blockStart) const {
+ int32_t blockLimit = blockStart + blockLength;
+ uint32_t hashCode = blockData[blockStart++];
+ do {
+ hashCode = 37 * hashCode + blockData[blockStart++];
+ } while (blockStart < blockLimit);
+ return hashCode;
+ }
+
+ uint32_t makeHashCode(uint32_t blockValue) const {
+ uint32_t hashCode = blockValue;
+ for (int32_t i = 1; i < blockLength; ++i) {
+ hashCode = 37 * hashCode + blockValue;
+ }
+ return hashCode;
+ }
+
+ template<typename UInt>
+ void addEntry(const UInt *data, int32_t blockStart, uint32_t hashCode, int32_t dataIndex) {
+ U_ASSERT(0 <= dataIndex && dataIndex < (int32_t)mask);
+ int32_t entryIndex = findEntry(data, data, blockStart, hashCode);
+ if (entryIndex < 0) {
+ table[~entryIndex] = (hashCode << shift) | (dataIndex + 1);
+ }
+ }
+
+ template<typename UIntA, typename UIntB>
+ int32_t findEntry(const UIntA *data, const UIntB *blockData, int32_t blockStart,
+ uint32_t hashCode) const {
+ uint32_t shiftedHashCode = hashCode << shift;
+ int32_t initialEntryIndex = (hashCode % (length - 1)) + 1; // 1..length-1
+ for (int32_t entryIndex = initialEntryIndex;;) {
+ uint32_t entry = table[entryIndex];
+ if (entry == 0) {
+ return ~entryIndex;
+ }
+ if ((entry & ~mask) == shiftedHashCode) {
+ int32_t dataIndex = (entry & mask) - 1;
+ if (equalBlocks(data + dataIndex, blockData + blockStart, blockLength)) {
+ return entryIndex;
+ }
+ }
+ entryIndex = nextIndex(initialEntryIndex, entryIndex);
+ }
+ }
+
+ int32_t findEntry(const uint32_t *data, uint32_t blockValue, uint32_t hashCode) const {
+ uint32_t shiftedHashCode = hashCode << shift;
+ int32_t initialEntryIndex = (hashCode % (length - 1)) + 1; // 1..length-1
+ for (int32_t entryIndex = initialEntryIndex;;) {
+ uint32_t entry = table[entryIndex];
+ if (entry == 0) {
+ return ~entryIndex;
+ }
+ if ((entry & ~mask) == shiftedHashCode) {
+ int32_t dataIndex = (entry & mask) - 1;
+ if (allValuesSameAs(data + dataIndex, blockLength, blockValue)) {
+ return entryIndex;
+ }
+ }
+ entryIndex = nextIndex(initialEntryIndex, entryIndex);
+ }
+ }
+
+ inline int32_t nextIndex(int32_t initialEntryIndex, int32_t entryIndex) const {
+ // U_ASSERT(0 < initialEntryIndex && initialEntryIndex < length);
+ return (entryIndex + initialEntryIndex) % length;
+ }
+
+ // Hash table.
+ // The length is a prime number, larger than the maximum data length.
+ // The "shift" lower bits store a data index + 1.
+ // The remaining upper bits store a partial hashCode of the block data values.
+ uint32_t *table = nullptr;
+ int32_t capacity = 0;
+ int32_t length = 0;
+ int32_t shift = 0;
+ uint32_t mask = 0;
+
+ int32_t blockLength = 0;
+};
+
+int32_t MutableCodePointTrie::compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks) {
+#ifdef UCPTRIE_DEBUG
+ bool overflow = false;
+#endif
+
+ // ASCII data will be stored as a linear table, even if the following code
+ // does not yet count it that way.
+ int32_t newDataCapacity = ASCII_LIMIT;
+ // Add room for a small data null block in case it would match the start of
+ // a fast data block where dataNullOffset must not be set in that case.
+ newDataCapacity += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
+ // Add room for special values (errorValue, highValue) and padding.
+ newDataCapacity += 4;
+ int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
+ int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;
+ int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
+ for (int32_t i = 0; i < iLimit; i += inc) {
+ if (i == fastILimit) {
+ blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
+ inc = 1;
+ }
+ uint32_t value = index[i];
+ if (flags[i] == MIXED) {
+ // Really mixed?
+ const uint32_t *p = data + value;
+ value = *p;
+ if (allValuesSameAs(p + 1, blockLength - 1, value)) {
+ flags[i] = ALL_SAME;
+ index[i] = value;
+ // Fall through to ALL_SAME handling.
+ } else {
+ newDataCapacity += blockLength;
+ continue;
+ }
+ } else {
+ U_ASSERT(flags[i] == ALL_SAME);
+ if (inc > 1) {
+ // Do all of the fast-range data block's ALL_SAME parts have the same value?
+ bool allSame = true;
+ int32_t next_i = i + inc;
+ for (int32_t j = i + 1; j < next_i; ++j) {
+ U_ASSERT(flags[j] == ALL_SAME);
+ if (index[j] != value) {
+ allSame = false;
+ break;
+ }
+ }
+ if (!allSame) {
+ // Turn it into a MIXED block.
+ if (getDataBlock(i) < 0) {
+ return -1;
+ }
+ newDataCapacity += blockLength;
+ continue;
+ }
+ }
+ }
+ // Is there another ALL_SAME block with the same value?
+ int32_t other = allSameBlocks.findOrAdd(i, inc, value);
+ if (other == AllSameBlocks::OVERFLOW) {
+ // The fixed-size array overflowed. Slow check for a duplicate block.
+#ifdef UCPTRIE_DEBUG
+ if (!overflow) {
+ puts("UCPTrie AllSameBlocks overflow");
+ overflow = true;
+ }
+#endif
+ int32_t jInc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
+ for (int32_t j = 0;; j += jInc) {
+ if (j == i) {
+ allSameBlocks.add(i, inc, value);
+ break;
+ }
+ if (j == fastILimit) {
+ jInc = 1;
+ }
+ if (flags[j] == ALL_SAME && index[j] == value) {
+ allSameBlocks.add(j, jInc + inc, value);
+ other = j;
+ break;
+ // We could keep counting blocks with the same value
+ // before we add the first one, which may improve compaction in rare cases,
+ // but it would make it slower.
+ }
+ }
+ }
+ if (other >= 0) {
+ flags[i] = SAME_AS;
+ index[i] = other;
+ } else {
+ // New unique same-value block.
+ newDataCapacity += blockLength;
+ }
+ }
+ return newDataCapacity;
+}
+
+#ifdef UCPTRIE_DEBUG
+# define DEBUG_DO(expr) expr
+#else
+# define DEBUG_DO(expr)
+#endif
+
+#ifdef UCPTRIE_DEBUG
+// Braille symbols: U+28xx = UTF-8 E2 A0 80..E2 A3 BF
+int32_t appendValue(char s[], int32_t length, uint32_t value) {
+ value ^= value >> 16;
+ value ^= value >> 8;
+ s[length] = 0xE2;
+ s[length + 1] = (char)(0xA0 + ((value >> 6) & 3));
+ s[length + 2] = (char)(0x80 + (value & 0x3F));
+ return length + 3;
+}
+
+void printBlock(const uint32_t *block, int32_t blockLength, uint32_t value,
+ UChar32 start, int32_t overlap, uint32_t initialValue) {
+ char s[UCPTRIE_FAST_DATA_BLOCK_LENGTH * 3 + 3];
+ int32_t length = 0;
+ int32_t i;
+ for (i = 0; i < overlap; ++i) {
+ length = appendValue(s, length, 0); // Braille blank
+ }
+ s[length++] = '|';
+ for (; i < blockLength; ++i) {
+ if (block != nullptr) {
+ value = block[i];
+ }
+ if (value == initialValue) {
+ value = 0x40; // Braille lower left dot
+ }
+ length = appendValue(s, length, value);
+ }
+ s[length] = 0;
+ start += overlap;
+ if (start <= 0xffff) {
+ printf(" %04lX %s|\n", (long)start, s);
+ } else if (start <= 0xfffff) {
+ printf(" %5lX %s|\n", (long)start, s);
+ } else {
+ printf(" %6lX %s|\n", (long)start, s);
+ }
+}
+#endif
+
+/**
+ * Compacts a build-time trie.
+ *
+ * The compaction
+ * - removes blocks that are identical with earlier ones
+ * - overlaps each new non-duplicate block as much as possible with the previously-written one
+ * - works with fast-range data blocks whose length is a multiple of that of
+ * higher-code-point data blocks
+ *
+ * It does not try to find an optimal order of writing, deduplicating, and overlapping blocks.
+ */
+int32_t MutableCodePointTrie::compactData(
+ int32_t fastILimit, uint32_t *newData, int32_t newDataCapacity,
+ int32_t dataNullIndex, MixedBlocks &mixedBlocks, UErrorCode &errorCode) {
+#ifdef UCPTRIE_DEBUG
+ int32_t countSame=0, sumOverlaps=0;
+ bool printData = dataLength == 29088 /* line.brk */ ||
+ // dataLength == 30048 /* CanonIterData */ ||
+ dataLength == 50400 /* zh.txt~stroke */;
+#endif
+
+ // The linear ASCII data has been copied into newData already.
+ int32_t newDataLength = 0;
+ for (int32_t i = 0; newDataLength < ASCII_LIMIT;
+ newDataLength += UCPTRIE_FAST_DATA_BLOCK_LENGTH, i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) {
+ index[i] = newDataLength;
+#ifdef UCPTRIE_DEBUG
+ if (printData) {
+ printBlock(newData + newDataLength, UCPTRIE_FAST_DATA_BLOCK_LENGTH, 0, newDataLength, 0, initialValue);
+ }
+#endif
+ }
+
+ int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;
+ if (!mixedBlocks.init(newDataCapacity, blockLength)) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ mixedBlocks.extend(newData, 0, 0, newDataLength);
+
+ int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
+ int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
+ int32_t fastLength = 0;
+ for (int32_t i = ASCII_I_LIMIT; i < iLimit; i += inc) {
+ if (i == fastILimit) {
+ blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
+ inc = 1;
+ fastLength = newDataLength;
+ if (!mixedBlocks.init(newDataCapacity, blockLength)) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ mixedBlocks.extend(newData, 0, 0, newDataLength);
+ }
+ if (flags[i] == ALL_SAME) {
+ uint32_t value = index[i];
+ // Find an earlier part of the data array of length blockLength
+ // that is filled with this value.
+ int32_t n = mixedBlocks.findAllSameBlock(newData, value);
+ // If we find a match, and the current block is the data null block,
+ // and it is not a fast block but matches the start of a fast block,
+ // then we need to continue looking.
+ // This is because this small block is shorter than the fast block,
+ // and not all of the rest of the fast block is filled with this value.
+ // Otherwise trie.getRange() would detect that the fast block starts at
+ // dataNullOffset and assume incorrectly that it is filled with the null value.
+ while (n >= 0 && i == dataNullIndex && i >= fastILimit && n < fastLength &&
+ isStartOfSomeFastBlock(n, index, fastILimit)) {
+ n = findAllSameBlock(newData, n + 1, newDataLength, value, blockLength);
+ }
+ if (n >= 0) {
+ DEBUG_DO(++countSame);
+ index[i] = n;
+ } else {
+ n = getAllSameOverlap(newData, newDataLength, value, blockLength);
+ DEBUG_DO(sumOverlaps += n);
+#ifdef UCPTRIE_DEBUG
+ if (printData) {
+ printBlock(nullptr, blockLength, value, i << UCPTRIE_SHIFT_3, n, initialValue);
+ }
+#endif
+ index[i] = newDataLength - n;
+ int32_t prevDataLength = newDataLength;
+ while (n < blockLength) {
+ newData[newDataLength++] = value;
+ ++n;
+ }
+ mixedBlocks.extend(newData, 0, prevDataLength, newDataLength);
+ }
+ } else if (flags[i] == MIXED) {
+ const uint32_t *block = data + index[i];
+ int32_t n = mixedBlocks.findBlock(newData, block, 0);
+ if (n >= 0) {
+ DEBUG_DO(++countSame);
+ index[i] = n;
+ } else {
+ n = getOverlap(newData, newDataLength, block, 0, blockLength);
+ DEBUG_DO(sumOverlaps += n);
+#ifdef UCPTRIE_DEBUG
+ if (printData) {
+ printBlock(block, blockLength, 0, i << UCPTRIE_SHIFT_3, n, initialValue);
+ }
+#endif
+ index[i] = newDataLength - n;
+ int32_t prevDataLength = newDataLength;
+ while (n < blockLength) {
+ newData[newDataLength++] = block[n++];
+ }
+ mixedBlocks.extend(newData, 0, prevDataLength, newDataLength);
+ }
+ } else /* SAME_AS */ {
+ uint32_t j = index[i];
+ index[i] = index[j];
+ }
+ }
+
+#ifdef UCPTRIE_DEBUG
+ /* we saved some space */
+ printf("compacting UCPTrie: count of 32-bit data words %lu->%lu countSame=%ld sumOverlaps=%ld\n",
+ (long)dataLength, (long)newDataLength, (long)countSame, (long)sumOverlaps);
+#endif
+ return newDataLength;
+}
+
+int32_t MutableCodePointTrie::compactIndex(int32_t fastILimit, MixedBlocks &mixedBlocks,
+ UErrorCode &errorCode) {
+ int32_t fastIndexLength = fastILimit >> (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3);
+ if ((highStart >> UCPTRIE_FAST_SHIFT) <= fastIndexLength) {
+ // Only the linear fast index, no multi-stage index tables.
+ index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET;
+ return fastIndexLength;
+ }
+
+ // Condense the fast index table.
+ // Also, does it contain an index-3 block with all dataNullOffset?
+ uint16_t fastIndex[UCPTRIE_BMP_INDEX_LENGTH]; // fastIndexLength
+ int32_t i3FirstNull = -1;
+ for (int32_t i = 0, j = 0; i < fastILimit; ++j) {
+ uint32_t i3 = index[i];
+ fastIndex[j] = (uint16_t)i3;
+ if (i3 == (uint32_t)dataNullOffset) {
+ if (i3FirstNull < 0) {
+ i3FirstNull = j;
+ } else if (index3NullOffset < 0 &&
+ (j - i3FirstNull + 1) == UCPTRIE_INDEX_3_BLOCK_LENGTH) {
+ index3NullOffset = i3FirstNull;
+ }
+ } else {
+ i3FirstNull = -1;
+ }
+ // Set the index entries that compactData() skipped.
+ // Needed when the multi-stage index covers the fast index range as well.
+ int32_t iNext = i + SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
+ while (++i < iNext) {
+ i3 += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
+ index[i] = i3;
+ }
+ }
+
+ if (!mixedBlocks.init(fastIndexLength, UCPTRIE_INDEX_3_BLOCK_LENGTH)) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ mixedBlocks.extend(fastIndex, 0, 0, fastIndexLength);
+
+ // Examine index-3 blocks. For each determine one of:
+ // - same as the index-3 null block
+ // - same as a fast-index block
+ // - 16-bit indexes
+ // - 18-bit indexes
+ // We store this in the first flags entry for the index-3 block.
+ //
+ // Also determine an upper limit for the index-3 table length.
+ int32_t index3Capacity = 0;
+ i3FirstNull = index3NullOffset;
+ bool hasLongI3Blocks = false;
+ // If the fast index covers the whole BMP, then
+ // the multi-stage index is only for supplementary code points.
+ // Otherwise, the multi-stage index covers all of Unicode.
+ int32_t iStart = fastILimit < BMP_I_LIMIT ? 0 : BMP_I_LIMIT;
+ int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
+ for (int32_t i = iStart; i < iLimit;) {
+ int32_t j = i;
+ int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH;
+ uint32_t oredI3 = 0;
+ bool isNull = true;
+ do {
+ uint32_t i3 = index[j];
+ oredI3 |= i3;
+ if (i3 != (uint32_t)dataNullOffset) {
+ isNull = false;
+ }
+ } while (++j < jLimit);
+ if (isNull) {
+ flags[i] = I3_NULL;
+ if (i3FirstNull < 0) {
+ if (oredI3 <= 0xffff) {
+ index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH;
+ } else {
+ index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;
+ hasLongI3Blocks = true;
+ }
+ i3FirstNull = 0;
+ }
+ } else {
+ if (oredI3 <= 0xffff) {
+ int32_t n = mixedBlocks.findBlock(fastIndex, index, i);
+ if (n >= 0) {
+ flags[i] = I3_BMP;
+ index[i] = n;
+ } else {
+ flags[i] = I3_16;
+ index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH;
+ }
+ } else {
+ flags[i] = I3_18;
+ index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;
+ hasLongI3Blocks = true;
+ }
+ }
+ i = j;
+ }
+
+ int32_t index2Capacity = (iLimit - iStart) >> UCPTRIE_SHIFT_2_3;
+
+ // Length of the index-1 table, rounded up.
+ int32_t index1Length = (index2Capacity + UCPTRIE_INDEX_2_MASK) >> UCPTRIE_SHIFT_1_2;
+
+ // Index table: Fast index, index-1, index-3, index-2.
+ // +1 for possible index table padding.
+ int32_t index16Capacity = fastIndexLength + index1Length + index3Capacity + index2Capacity + 1;
+ index16 = (uint16_t *)uprv_malloc(index16Capacity * 2);
+ if (index16 == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ uprv_memcpy(index16, fastIndex, fastIndexLength * 2);
+
+ if (!mixedBlocks.init(index16Capacity, UCPTRIE_INDEX_3_BLOCK_LENGTH)) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ MixedBlocks longI3Blocks;
+ if (hasLongI3Blocks) {
+ if (!longI3Blocks.init(index16Capacity, INDEX_3_18BIT_BLOCK_LENGTH)) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ }
+
+ // Compact the index-3 table and write an uncompacted version of the index-2 table.
+ uint16_t index2[UNICODE_LIMIT >> UCPTRIE_SHIFT_2]; // index2Capacity
+ int32_t i2Length = 0;
+ i3FirstNull = index3NullOffset;
+ int32_t index3Start = fastIndexLength + index1Length;
+ int32_t indexLength = index3Start;
+ for (int32_t i = iStart; i < iLimit; i += UCPTRIE_INDEX_3_BLOCK_LENGTH) {
+ int32_t i3;
+ uint8_t f = flags[i];
+ if (f == I3_NULL && i3FirstNull < 0) {
+ // First index-3 null block. Write & overlap it like a normal block, then remember it.
+ f = dataNullOffset <= 0xffff ? I3_16 : I3_18;
+ i3FirstNull = 0;
+ }
+ if (f == I3_NULL) {
+ i3 = index3NullOffset;
+ } else if (f == I3_BMP) {
+ i3 = index[i];
+ } else if (f == I3_16) {
+ int32_t n = mixedBlocks.findBlock(index16, index, i);
+ if (n >= 0) {
+ i3 = n;
+ } else {
+ if (indexLength == index3Start) {
+ // No overlap at the boundary between the index-1 and index-3 tables.
+ n = 0;
+ } else {
+ n = getOverlap(index16, indexLength,
+ index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH);
+ }
+ i3 = indexLength - n;
+ int32_t prevIndexLength = indexLength;
+ while (n < UCPTRIE_INDEX_3_BLOCK_LENGTH) {
+ index16[indexLength++] = index[i + n++];
+ }
+ mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
+ if (hasLongI3Blocks) {
+ longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength);
+ }
+ }
+ } else {
+ U_ASSERT(f == I3_18);
+ U_ASSERT(hasLongI3Blocks);
+ // Encode an index-3 block that contains one or more data indexes exceeding 16 bits.
+ int32_t j = i;
+ int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH;
+ int32_t k = indexLength;
+ do {
+ ++k;
+ uint32_t v = index[j++];
+ uint32_t upperBits = (v & 0x30000) >> 2;
+ index16[k++] = v;
+ v = index[j++];
+ upperBits |= (v & 0x30000) >> 4;
+ index16[k++] = v;
+ v = index[j++];
+ upperBits |= (v & 0x30000) >> 6;
+ index16[k++] = v;
+ v = index[j++];
+ upperBits |= (v & 0x30000) >> 8;
+ index16[k++] = v;
+ v = index[j++];
+ upperBits |= (v & 0x30000) >> 10;
+ index16[k++] = v;
+ v = index[j++];
+ upperBits |= (v & 0x30000) >> 12;
+ index16[k++] = v;
+ v = index[j++];
+ upperBits |= (v & 0x30000) >> 14;
+ index16[k++] = v;
+ v = index[j++];
+ upperBits |= (v & 0x30000) >> 16;
+ index16[k++] = v;
+ index16[k - 9] = upperBits;
+ } while (j < jLimit);
+ int32_t n = longI3Blocks.findBlock(index16, index16, indexLength);
+ if (n >= 0) {
+ i3 = n | 0x8000;
+ } else {
+ if (indexLength == index3Start) {
+ // No overlap at the boundary between the index-1 and index-3 tables.
+ n = 0;
+ } else {
+ n = getOverlap(index16, indexLength,
+ index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH);
+ }
+ i3 = (indexLength - n) | 0x8000;
+ int32_t prevIndexLength = indexLength;
+ if (n > 0) {
+ int32_t start = indexLength;
+ while (n < INDEX_3_18BIT_BLOCK_LENGTH) {
+ index16[indexLength++] = index16[start + n++];
+ }
+ } else {
+ indexLength += INDEX_3_18BIT_BLOCK_LENGTH;
+ }
+ mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
+ if (hasLongI3Blocks) {
+ longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength);
+ }
+ }
+ }
+ if (index3NullOffset < 0 && i3FirstNull >= 0) {
+ index3NullOffset = i3;
+ }
+ // Set the index-2 table entry.
+ index2[i2Length++] = i3;
+ }
+ U_ASSERT(i2Length == index2Capacity);
+ U_ASSERT(indexLength <= index3Start + index3Capacity);
+
+ if (index3NullOffset < 0) {
+ index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET;
+ }
+ if (indexLength >= (UCPTRIE_NO_INDEX3_NULL_OFFSET + UCPTRIE_INDEX_3_BLOCK_LENGTH)) {
+ // The index-3 offsets exceed 15 bits, or
+ // the last one cannot be distinguished from the no-null-block value.
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ // Compact the index-2 table and write the index-1 table.
+ static_assert(UCPTRIE_INDEX_2_BLOCK_LENGTH == UCPTRIE_INDEX_3_BLOCK_LENGTH,
+ "must re-init mixedBlocks");
+ int32_t blockLength = UCPTRIE_INDEX_2_BLOCK_LENGTH;
+ int32_t i1 = fastIndexLength;
+ for (int32_t i = 0; i < i2Length; i += blockLength) {
+ int32_t n;
+ if ((i2Length - i) >= blockLength) {
+ // normal block
+ U_ASSERT(blockLength == UCPTRIE_INDEX_2_BLOCK_LENGTH);
+ n = mixedBlocks.findBlock(index16, index2, i);
+ } else {
+ // highStart is inside the last index-2 block. Shorten it.
+ blockLength = i2Length - i;
+ n = findSameBlock(index16, index3Start, indexLength,
+ index2, i, blockLength);
+ }
+ int32_t i2;
+ if (n >= 0) {
+ i2 = n;
+ } else {
+ if (indexLength == index3Start) {
+ // No overlap at the boundary between the index-1 and index-3/2 tables.
+ n = 0;
+ } else {
+ n = getOverlap(index16, indexLength, index2, i, blockLength);
+ }
+ i2 = indexLength - n;
+ int32_t prevIndexLength = indexLength;
+ while (n < blockLength) {
+ index16[indexLength++] = index2[i + n++];
+ }
+ mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
+ }
+ // Set the index-1 table entry.
+ index16[i1++] = i2;
+ }
+ U_ASSERT(i1 == index3Start);
+ U_ASSERT(indexLength <= index16Capacity);
+
+#ifdef UCPTRIE_DEBUG
+ /* we saved some space */
+ printf("compacting UCPTrie: count of 16-bit index words %lu->%lu\n",
+ (long)iLimit, (long)indexLength);
+#endif
+
+ return indexLength;
+}
+
+int32_t MutableCodePointTrie::compactTrie(int32_t fastILimit, UErrorCode &errorCode) {
+ // Find the real highStart and round it up.
+ U_ASSERT((highStart & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0);
+ highValue = get(MAX_UNICODE);
+ int32_t realHighStart = findHighStart();
+ realHighStart = (realHighStart + (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) &
+ ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1);
+ if (realHighStart == UNICODE_LIMIT) {
+ highValue = initialValue;
+ }
+
+#ifdef UCPTRIE_DEBUG
+ printf("UCPTrie: highStart U+%06lx highValue 0x%lx initialValue 0x%lx\n",
+ (long)realHighStart, (long)highValue, (long)initialValue);
+#endif
+
+ // We always store indexes and data values for the fast range.
+ // Pin highStart to the top of that range while building.
+ UChar32 fastLimit = fastILimit << UCPTRIE_SHIFT_3;
+ if (realHighStart < fastLimit) {
+ for (int32_t i = (realHighStart >> UCPTRIE_SHIFT_3); i < fastILimit; ++i) {
+ flags[i] = ALL_SAME;
+ index[i] = highValue;
+ }
+ highStart = fastLimit;
+ } else {
+ highStart = realHighStart;
+ }
+
+ uint32_t asciiData[ASCII_LIMIT];
+ for (int32_t i = 0; i < ASCII_LIMIT; ++i) {
+ asciiData[i] = get(i);
+ }
+
+ // First we look for which data blocks have the same value repeated over the whole block,
+ // deduplicate such blocks, find a good null data block (for faster enumeration),
+ // and get an upper bound for the necessary data array length.
+ AllSameBlocks allSameBlocks;
+ int32_t newDataCapacity = compactWholeDataBlocks(fastILimit, allSameBlocks);
+ if (newDataCapacity < 0) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ uint32_t *newData = (uint32_t *)uprv_malloc(newDataCapacity * 4);
+ if (newData == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ uprv_memcpy(newData, asciiData, sizeof(asciiData));
+
+ int32_t dataNullIndex = allSameBlocks.findMostUsed();
+
+ MixedBlocks mixedBlocks;
+ int32_t newDataLength = compactData(fastILimit, newData, newDataCapacity,
+ dataNullIndex, mixedBlocks, errorCode);
+ if (U_FAILURE(errorCode)) { return 0; }
+ U_ASSERT(newDataLength <= newDataCapacity);
+ uprv_free(data);
+ data = newData;
+ dataCapacity = newDataCapacity;
+ dataLength = newDataLength;
+ if (dataLength > (0x3ffff + UCPTRIE_SMALL_DATA_BLOCK_LENGTH)) {
+ // The offset of the last data block is too high to be stored in the index table.
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ if (dataNullIndex >= 0) {
+ dataNullOffset = index[dataNullIndex];
+#ifdef UCPTRIE_DEBUG
+ if (data[dataNullOffset] != initialValue) {
+ printf("UCPTrie initialValue %lx -> more common nullValue %lx\n",
+ (long)initialValue, (long)data[dataNullOffset]);
+ }
+#endif
+ initialValue = data[dataNullOffset];
+ } else {
+ dataNullOffset = UCPTRIE_NO_DATA_NULL_OFFSET;
+ }
+
+ int32_t indexLength = compactIndex(fastILimit, mixedBlocks, errorCode);
+ highStart = realHighStart;
+ return indexLength;
+}
+
+UCPTrie *MutableCodePointTrie::build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) {
+ return nullptr;
+ }
+ if (type < UCPTRIE_TYPE_FAST || UCPTRIE_TYPE_SMALL < type ||
+ valueWidth < UCPTRIE_VALUE_BITS_16 || UCPTRIE_VALUE_BITS_8 < valueWidth) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return nullptr;
+ }
+
+ // The mutable trie always stores 32-bit values.
+ // When we build a UCPTrie for a smaller value width, we first mask off unused bits
+ // before compacting the data.
+ switch (valueWidth) {
+ case UCPTRIE_VALUE_BITS_32:
+ break;
+ case UCPTRIE_VALUE_BITS_16:
+ maskValues(0xffff);
+ break;
+ case UCPTRIE_VALUE_BITS_8:
+ maskValues(0xff);
+ break;
+ default:
+ break;
+ }
+
+ UChar32 fastLimit = type == UCPTRIE_TYPE_FAST ? BMP_LIMIT : UCPTRIE_SMALL_LIMIT;
+ int32_t indexLength = compactTrie(fastLimit >> UCPTRIE_SHIFT_3, errorCode);
+ if (U_FAILURE(errorCode)) {
+ clear();
+ return nullptr;
+ }
+
+ // Ensure data table alignment: The index length must be even for uint32_t data.
+ if (valueWidth == UCPTRIE_VALUE_BITS_32 && (indexLength & 1) != 0) {
+ index16[indexLength++] = 0xffee; // arbitrary value
+ }
+
+ // Make the total trie structure length a multiple of 4 bytes by padding the data table,
+ // and store special values as the last two data values.
+ int32_t length = indexLength * 2;
+ if (valueWidth == UCPTRIE_VALUE_BITS_16) {
+ if (((indexLength ^ dataLength) & 1) != 0) {
+ // padding
+ data[dataLength++] = errorValue;
+ }
+ if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) {
+ data[dataLength++] = highValue;
+ data[dataLength++] = errorValue;
+ }
+ length += dataLength * 2;
+ } else if (valueWidth == UCPTRIE_VALUE_BITS_32) {
+ // 32-bit data words never need padding to a multiple of 4 bytes.
+ if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) {
+ if (data[dataLength - 1] != highValue) {
+ data[dataLength++] = highValue;
+ }
+ data[dataLength++] = errorValue;
+ }
+ length += dataLength * 4;
+ } else {
+ int32_t and3 = (length + dataLength) & 3;
+ if (and3 == 0 && data[dataLength - 1] == errorValue && data[dataLength - 2] == highValue) {
+ // all set
+ } else if(and3 == 3 && data[dataLength - 1] == highValue) {
+ data[dataLength++] = errorValue;
+ } else {
+ while (and3 != 2) {
+ data[dataLength++] = highValue;
+ and3 = (and3 + 1) & 3;
+ }
+ data[dataLength++] = highValue;
+ data[dataLength++] = errorValue;
+ }
+ length += dataLength;
+ }
+
+ // Calculate the total length of the UCPTrie as a single memory block.
+ length += sizeof(UCPTrie);
+ U_ASSERT((length & 3) == 0);
+
+ uint8_t *bytes = (uint8_t *)uprv_malloc(length);
+ if (bytes == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ clear();
+ return nullptr;
+ }
+ UCPTrie *trie = reinterpret_cast<UCPTrie *>(bytes);
+ uprv_memset(trie, 0, sizeof(UCPTrie));
+ trie->indexLength = indexLength;
+ trie->dataLength = dataLength;
+
+ trie->highStart = highStart;
+ // Round up shifted12HighStart to a multiple of 0x1000 for easy testing from UTF-8 lead bytes.
+ // Runtime code needs to then test for the real highStart as well.
+ trie->shifted12HighStart = (highStart + 0xfff) >> 12;
+ trie->type = type;
+ trie->valueWidth = valueWidth;
+
+ trie->index3NullOffset = index3NullOffset;
+ trie->dataNullOffset = dataNullOffset;
+ trie->nullValue = initialValue;
+
+ bytes += sizeof(UCPTrie);
+
+ // Fill the index and data arrays.
+ uint16_t *dest16 = (uint16_t *)bytes;
+ trie->index = dest16;
+
+ if (highStart <= fastLimit) {
+ // Condense only the fast index from the mutable-trie index.
+ for (int32_t i = 0, j = 0; j < indexLength; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK, ++j) {
+ *dest16++ = (uint16_t)index[i]; // dest16[j]
+ }
+ } else {
+ uprv_memcpy(dest16, index16, indexLength * 2);
+ dest16 += indexLength;
+ }
+ bytes += indexLength * 2;
+
+ // Write the data array.
+ const uint32_t *p = data;
+ switch (valueWidth) {
+ case UCPTRIE_VALUE_BITS_16:
+ // Write 16-bit data values.
+ trie->data.ptr16 = dest16;
+ for (int32_t i = dataLength; i > 0; --i) {
+ *dest16++ = (uint16_t)*p++;
+ }
+ break;
+ case UCPTRIE_VALUE_BITS_32:
+ // Write 32-bit data values.
+ trie->data.ptr32 = (uint32_t *)bytes;
+ uprv_memcpy(bytes, p, (size_t)dataLength * 4);
+ break;
+ case UCPTRIE_VALUE_BITS_8:
+ // Write 8-bit data values.
+ trie->data.ptr8 = bytes;
+ for (int32_t i = dataLength; i > 0; --i) {
+ *bytes++ = (uint8_t)*p++;
+ }
+ break;
+ default:
+ // Will not occur, valueWidth checked at the beginning.
+ break;
+ }
+
+#ifdef UCPTRIE_DEBUG
+ trie->name = name;
+
+ ucptrie_printLengths(trie, "");
+#endif
+
+ clear();
+ return trie;
+}
+
+} // namespace
+
+U_NAMESPACE_END
+
+U_NAMESPACE_USE
+
+U_CAPI UMutableCPTrie * U_EXPORT2
+umutablecptrie_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return nullptr;
+ }
+ LocalPointer<MutableCodePointTrie> trie(
+ new MutableCodePointTrie(initialValue, errorValue, *pErrorCode), *pErrorCode);
+ if (U_FAILURE(*pErrorCode)) {
+ return nullptr;
+ }
+ return reinterpret_cast<UMutableCPTrie *>(trie.orphan());
+}
+
+U_CAPI UMutableCPTrie * U_EXPORT2
+umutablecptrie_clone(const UMutableCPTrie *other, UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return nullptr;
+ }
+ if (other == nullptr) {
+ return nullptr;
+ }
+ LocalPointer<MutableCodePointTrie> clone(
+ new MutableCodePointTrie(*reinterpret_cast<const MutableCodePointTrie *>(other), *pErrorCode), *pErrorCode);
+ if (U_FAILURE(*pErrorCode)) {
+ return nullptr;
+ }
+ return reinterpret_cast<UMutableCPTrie *>(clone.orphan());
+}
+
+U_CAPI void U_EXPORT2
+umutablecptrie_close(UMutableCPTrie *trie) {
+ delete reinterpret_cast<MutableCodePointTrie *>(trie);
+}
+
+U_CAPI UMutableCPTrie * U_EXPORT2
+umutablecptrie_fromUCPMap(const UCPMap *map, UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return nullptr;
+ }
+ if (map == nullptr) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return nullptr;
+ }
+ return reinterpret_cast<UMutableCPTrie *>(MutableCodePointTrie::fromUCPMap(map, *pErrorCode));
+}
+
+U_CAPI UMutableCPTrie * U_EXPORT2
+umutablecptrie_fromUCPTrie(const UCPTrie *trie, UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return nullptr;
+ }
+ if (trie == nullptr) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return nullptr;
+ }
+ return reinterpret_cast<UMutableCPTrie *>(MutableCodePointTrie::fromUCPTrie(trie, *pErrorCode));
+}
+
+U_CAPI uint32_t U_EXPORT2
+umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c) {
+ return reinterpret_cast<const MutableCodePointTrie *>(trie)->get(c);
+}
+
+namespace {
+
+UChar32 getRange(const void *trie, UChar32 start,
+ UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
+ return reinterpret_cast<const MutableCodePointTrie *>(trie)->
+ getRange(start, filter, context, pValue);
+}
+
+} // namespace
+
+U_CAPI UChar32 U_EXPORT2
+umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start,
+ UCPMapRangeOption option, uint32_t surrogateValue,
+ UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
+ return ucptrie_internalGetRange(getRange, trie, start,
+ option, surrogateValue,
+ filter, context, pValue);
+}
+
+U_CAPI void U_EXPORT2
+umutablecptrie_set(UMutableCPTrie *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return;
+ }
+ reinterpret_cast<MutableCodePointTrie *>(trie)->set(c, value, *pErrorCode);
+}
+
+U_CAPI void U_EXPORT2
+umutablecptrie_setRange(UMutableCPTrie *trie, UChar32 start, UChar32 end,
+ uint32_t value, UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return;
+ }
+ reinterpret_cast<MutableCodePointTrie *>(trie)->setRange(start, end, value, *pErrorCode);
+}
+
+/* Compact and internally serialize the trie. */
+U_CAPI UCPTrie * U_EXPORT2
+umutablecptrie_buildImmutable(UMutableCPTrie *trie, UCPTrieType type, UCPTrieValueWidth valueWidth,
+ UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return nullptr;
+ }
+ return reinterpret_cast<MutableCodePointTrie *>(trie)->build(type, valueWidth, *pErrorCode);
+}
+
+#ifdef UCPTRIE_DEBUG
+U_CFUNC void umutablecptrie_setName(UMutableCPTrie *trie, const char *name) {
+ reinterpret_cast<MutableCodePointTrie *>(trie)->name = name;
+}
+#endif
diff --git a/thirdparty/icu4c/common/umutex.cpp b/thirdparty/icu4c/common/umutex.cpp
new file mode 100644
index 0000000000..ccbee9960a
--- /dev/null
+++ b/thirdparty/icu4c/common/umutex.cpp
@@ -0,0 +1,204 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* File umutex.cpp
+*
+* Modification History:
+*
+* Date Name Description
+* 04/02/97 aliu Creation.
+* 04/07/99 srl updated
+* 05/13/99 stephen Changed to umutex (from cmutex).
+* 11/22/99 aliu Make non-global mutex autoinitialize [j151]
+******************************************************************************
+*/
+
+#include "umutex.h"
+
+#include "unicode/utypes.h"
+#include "uassert.h"
+#include "ucln_cmn.h"
+#include "cmemory.h"
+
+U_NAMESPACE_BEGIN
+
+
+#if defined(U_USER_MUTEX_CPP)
+// Support for including an alternate implementation of mutexes has been withdrawn.
+// See issue ICU-20185.
+#error U_USER_MUTEX_CPP not supported
+#endif
+
+
+/*************************************************************************************************
+ *
+ * ICU Mutex wrappers.
+ *
+ *************************************************************************************************/
+
+namespace {
+std::mutex *initMutex;
+std::condition_variable *initCondition;
+
+// The ICU global mutex.
+// Used when ICU implementation code passes nullptr for the mutex pointer.
+UMutex globalMutex;
+
+std::once_flag initFlag;
+std::once_flag *pInitFlag = &initFlag;
+
+} // Anonymous namespace
+
+U_CDECL_BEGIN
+static UBool U_CALLCONV umtx_cleanup() {
+ initMutex->~mutex();
+ initCondition->~condition_variable();
+ UMutex::cleanup();
+
+ // Reset the once_flag, by destructing it and creating a fresh one in its place.
+ // Do not use this trick anywhere else in ICU; use umtx_initOnce, not std::call_once().
+ pInitFlag->~once_flag();
+ pInitFlag = new(&initFlag) std::once_flag();
+ return true;
+}
+
+static void U_CALLCONV umtx_init() {
+ initMutex = STATIC_NEW(std::mutex);
+ initCondition = STATIC_NEW(std::condition_variable);
+ ucln_common_registerCleanup(UCLN_COMMON_MUTEX, umtx_cleanup);
+}
+U_CDECL_END
+
+
+std::mutex *UMutex::getMutex() {
+ std::mutex *retPtr = fMutex.load(std::memory_order_acquire);
+ if (retPtr == nullptr) {
+ std::call_once(*pInitFlag, umtx_init);
+ std::lock_guard<std::mutex> guard(*initMutex);
+ retPtr = fMutex.load(std::memory_order_acquire);
+ if (retPtr == nullptr) {
+ fMutex = new(fStorage) std::mutex();
+ retPtr = fMutex;
+ fListLink = gListHead;
+ gListHead = this;
+ }
+ }
+ U_ASSERT(retPtr != nullptr);
+ return retPtr;
+}
+
+UMutex *UMutex::gListHead = nullptr;
+
+void UMutex::cleanup() {
+ UMutex *next = nullptr;
+ for (UMutex *m = gListHead; m != nullptr; m = next) {
+ (*m->fMutex).~mutex();
+ m->fMutex = nullptr;
+ next = m->fListLink;
+ m->fListLink = nullptr;
+ }
+ gListHead = nullptr;
+}
+
+
+U_CAPI void U_EXPORT2
+umtx_lock(UMutex *mutex) {
+ if (mutex == nullptr) {
+ mutex = &globalMutex;
+ }
+ mutex->lock();
+}
+
+
+U_CAPI void U_EXPORT2
+umtx_unlock(UMutex* mutex)
+{
+ if (mutex == nullptr) {
+ mutex = &globalMutex;
+ }
+ mutex->unlock();
+}
+
+
+/*************************************************************************************************
+ *
+ * UInitOnce Implementation
+ *
+ *************************************************************************************************/
+
+// This function is called when a test of a UInitOnce::fState reveals that
+// initialization has not completed, that we either need to call the init
+// function on this thread, or wait for some other thread to complete.
+//
+// The actual call to the init function is made inline by template code
+// that knows the C++ types involved. This function returns true if
+// the caller needs to call the Init function.
+//
+U_COMMON_API UBool U_EXPORT2
+umtx_initImplPreInit(UInitOnce &uio) {
+ std::call_once(*pInitFlag, umtx_init);
+ std::unique_lock<std::mutex> lock(*initMutex);
+ if (umtx_loadAcquire(uio.fState) == 0) {
+ umtx_storeRelease(uio.fState, 1);
+ return true; // Caller will next call the init function.
+ } else {
+ while (umtx_loadAcquire(uio.fState) == 1) {
+ // Another thread is currently running the initialization.
+ // Wait until it completes.
+ initCondition->wait(lock);
+ }
+ U_ASSERT(uio.fState == 2);
+ return false;
+ }
+}
+
+
+// This function is called by the thread that ran an initialization function,
+// just after completing the function.
+// Some threads may be waiting on the condition, requiring the broadcast wakeup.
+// Some threads may be racing to test the fState variable outside of the mutex,
+// requiring the use of store/release when changing its value.
+
+U_COMMON_API void U_EXPORT2
+umtx_initImplPostInit(UInitOnce &uio) {
+ {
+ std::unique_lock<std::mutex> lock(*initMutex);
+ umtx_storeRelease(uio.fState, 2);
+ }
+ initCondition->notify_all();
+}
+
+U_NAMESPACE_END
+
+/*************************************************************************************************
+ *
+ * Deprecated functions for setting user mutexes.
+ *
+ *************************************************************************************************/
+
+U_DEPRECATED void U_EXPORT2
+u_setMutexFunctions(const void * /*context */, UMtxInitFn *, UMtxFn *,
+ UMtxFn *, UMtxFn *, UErrorCode *status) {
+ if (U_SUCCESS(*status)) {
+ *status = U_UNSUPPORTED_ERROR;
+ }
+ return;
+}
+
+
+
+U_DEPRECATED void U_EXPORT2
+u_setAtomicIncDecFunctions(const void * /*context */, UMtxAtomicFn *, UMtxAtomicFn *,
+ UErrorCode *status) {
+ if (U_SUCCESS(*status)) {
+ *status = U_UNSUPPORTED_ERROR;
+ }
+ return;
+}
diff --git a/thirdparty/icu4c/common/umutex.h b/thirdparty/icu4c/common/umutex.h
new file mode 100644
index 0000000000..8d76b3f3e6
--- /dev/null
+++ b/thirdparty/icu4c/common/umutex.h
@@ -0,0 +1,277 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1997-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File UMUTEX.H
+*
+* Modification History:
+*
+* Date Name Description
+* 04/02/97 aliu Creation.
+* 04/07/99 srl rewrite - C interface, multiple mutices
+* 05/13/99 stephen Changed to umutex (from cmutex)
+******************************************************************************
+*/
+
+#ifndef UMUTEX_H
+#define UMUTEX_H
+
+#include <atomic>
+#include <condition_variable>
+#include <mutex>
+#include <type_traits>
+
+#include "unicode/utypes.h"
+#include "unicode/uclean.h"
+#include "unicode/uobject.h"
+
+#include "putilimp.h"
+
+#if defined(U_USER_ATOMICS_H) || defined(U_USER_MUTEX_H)
+// Support for including an alternate implementation of atomic & mutex operations has been withdrawn.
+// See issue ICU-20185.
+#error U_USER_ATOMICS and U_USER_MUTEX_H are not supported
+#endif
+
+// Export an explicit template instantiation of std::atomic<int32_t>.
+// When building DLLs for Windows this is required as it is used as a data member of the exported SharedObject class.
+// See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples.
+//
+// Similar story for std::atomic<std::mutex *>, and the exported UMutex class.
+#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN && !defined(U_IN_DOXYGEN)
+#if defined(__clang__) || defined(_MSC_VER)
+ #if defined(__clang__)
+ // Suppress the warning that the explicit instantiation after explicit specialization has no effect.
+ #pragma clang diagnostic push
+ #pragma clang diagnostic ignored "-Winstantiation-after-specialization"
+ #endif
+template struct U_COMMON_API std::atomic<int32_t>;
+template struct U_COMMON_API std::atomic<std::mutex *>;
+ #if defined(__clang__)
+ #pragma clang diagnostic pop
+ #endif
+#elif defined(__GNUC__)
+// For GCC this class is already exported/visible, so no need for U_COMMON_API.
+template struct std::atomic<int32_t>;
+template struct std::atomic<std::mutex *>;
+#endif
+#endif
+
+
+U_NAMESPACE_BEGIN
+
+/****************************************************************************
+ *
+ * Low Level Atomic Operations, ICU wrappers for.
+ *
+ ****************************************************************************/
+
+typedef std::atomic<int32_t> u_atomic_int32_t;
+#define ATOMIC_INT32_T_INITIALIZER(val) ATOMIC_VAR_INIT(val)
+
+inline int32_t umtx_loadAcquire(u_atomic_int32_t &var) {
+ return var.load(std::memory_order_acquire);
+}
+
+inline void umtx_storeRelease(u_atomic_int32_t &var, int32_t val) {
+ var.store(val, std::memory_order_release);
+}
+
+inline int32_t umtx_atomic_inc(u_atomic_int32_t *var) {
+ return var->fetch_add(1) + 1;
+}
+
+inline int32_t umtx_atomic_dec(u_atomic_int32_t *var) {
+ return var->fetch_sub(1) - 1;
+}
+
+
+/*************************************************************************************************
+ *
+ * UInitOnce Definitions.
+ *
+ *************************************************************************************************/
+
+struct UInitOnce {
+ u_atomic_int32_t fState;
+ UErrorCode fErrCode;
+ void reset() {fState = 0;}
+ UBool isReset() {return umtx_loadAcquire(fState) == 0;}
+// Note: isReset() is used by service registration code.
+// Thread safety of this usage needs review.
+};
+
+#define U_INITONCE_INITIALIZER {ATOMIC_INT32_T_INITIALIZER(0), U_ZERO_ERROR}
+
+
+U_COMMON_API UBool U_EXPORT2 umtx_initImplPreInit(UInitOnce &);
+U_COMMON_API void U_EXPORT2 umtx_initImplPostInit(UInitOnce &);
+
+template<class T> void umtx_initOnce(UInitOnce &uio, T *obj, void (U_CALLCONV T::*fp)()) {
+ if (umtx_loadAcquire(uio.fState) == 2) {
+ return;
+ }
+ if (umtx_initImplPreInit(uio)) {
+ (obj->*fp)();
+ umtx_initImplPostInit(uio);
+ }
+}
+
+
+// umtx_initOnce variant for plain functions, or static class functions.
+// No context parameter.
+inline void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)()) {
+ if (umtx_loadAcquire(uio.fState) == 2) {
+ return;
+ }
+ if (umtx_initImplPreInit(uio)) {
+ (*fp)();
+ umtx_initImplPostInit(uio);
+ }
+}
+
+// umtx_initOnce variant for plain functions, or static class functions.
+// With ErrorCode, No context parameter.
+inline void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)(UErrorCode &), UErrorCode &errCode) {
+ if (U_FAILURE(errCode)) {
+ return;
+ }
+ if (umtx_loadAcquire(uio.fState) != 2 && umtx_initImplPreInit(uio)) {
+ // We run the initialization.
+ (*fp)(errCode);
+ uio.fErrCode = errCode;
+ umtx_initImplPostInit(uio);
+ } else {
+ // Someone else already ran the initialization.
+ if (U_FAILURE(uio.fErrCode)) {
+ errCode = uio.fErrCode;
+ }
+ }
+}
+
+// umtx_initOnce variant for plain functions, or static class functions,
+// with a context parameter.
+template<class T> void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)(T), T context) {
+ if (umtx_loadAcquire(uio.fState) == 2) {
+ return;
+ }
+ if (umtx_initImplPreInit(uio)) {
+ (*fp)(context);
+ umtx_initImplPostInit(uio);
+ }
+}
+
+// umtx_initOnce variant for plain functions, or static class functions,
+// with a context parameter and an error code.
+template<class T> void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)(T, UErrorCode &), T context, UErrorCode &errCode) {
+ if (U_FAILURE(errCode)) {
+ return;
+ }
+ if (umtx_loadAcquire(uio.fState) != 2 && umtx_initImplPreInit(uio)) {
+ // We run the initialization.
+ (*fp)(context, errCode);
+ uio.fErrCode = errCode;
+ umtx_initImplPostInit(uio);
+ } else {
+ // Someone else already ran the initialization.
+ if (U_FAILURE(uio.fErrCode)) {
+ errCode = uio.fErrCode;
+ }
+ }
+}
+
+// UMutex should be constexpr-constructible, so that no initialization code
+// is run during startup.
+// This works on all C++ libraries except MS VS before VS2019.
+#if (defined(_CPPLIB_VER) && !defined(_MSVC_STL_VERSION)) || \
+ (defined(_MSVC_STL_VERSION) && _MSVC_STL_VERSION < 142)
+ // (VS std lib older than VS2017) || (VS std lib version < VS2019)
+# define UMUTEX_CONSTEXPR
+#else
+# define UMUTEX_CONSTEXPR constexpr
+#endif
+
+/**
+ * UMutex - ICU Mutex class.
+ *
+ * This is the preferred Mutex class for use within ICU implementation code.
+ * It is a thin wrapper over C++ std::mutex, with these additions:
+ * - Static instances are safe, not triggering static construction or destruction,
+ * and the associated order of construction or destruction issues.
+ * - Plumbed into u_cleanup() for destructing the underlying std::mutex,
+ * which frees any OS level resources they may be holding.
+ *
+ * Limitations:
+ * - Static or global instances only. Cannot be heap allocated. Cannot appear as a
+ * member of another class.
+ * - No condition variables or other advanced features. If needed, you will need to use
+ * std::mutex and std::condition_variable directly. For an example, see unifiedcache.cpp
+ *
+ * Typical Usage:
+ * static UMutex myMutex;
+ *
+ * {
+ * Mutex lock(myMutex);
+ * ... // Do stuff that is protected by myMutex;
+ * } // myMutex is released when lock goes out of scope.
+ */
+
+class U_COMMON_API UMutex {
+public:
+ UMUTEX_CONSTEXPR UMutex() {}
+ ~UMutex() = default;
+
+ UMutex(const UMutex &other) = delete;
+ UMutex &operator =(const UMutex &other) = delete;
+ void *operator new(size_t) = delete;
+
+ // requirements for C++ BasicLockable, allows UMutex to work with std::lock_guard
+ void lock() {
+ std::mutex *m = fMutex.load(std::memory_order_acquire);
+ if (m == nullptr) { m = getMutex(); }
+ m->lock();
+ }
+ void unlock() { fMutex.load(std::memory_order_relaxed)->unlock(); }
+
+ static void cleanup();
+
+private:
+ alignas(std::mutex) char fStorage[sizeof(std::mutex)] {};
+ std::atomic<std::mutex *> fMutex { nullptr };
+
+ /** All initialized UMutexes are kept in a linked list, so that they can be found,
+ * and the underlying std::mutex destructed, by u_cleanup().
+ */
+ UMutex *fListLink { nullptr };
+ static UMutex *gListHead;
+
+ /** Out-of-line function to lazily initialize a UMutex on first use.
+ * Initial fast check is inline, in lock(). The returned value may never
+ * be nullptr.
+ */
+ std::mutex *getMutex();
+};
+
+
+/* Lock a mutex.
+ * @param mutex The given mutex to be locked. Pass NULL to specify
+ * the global ICU mutex. Recursive locks are an error
+ * and may cause a deadlock on some platforms.
+ */
+U_CAPI void U_EXPORT2 umtx_lock(UMutex* mutex);
+
+/* Unlock a mutex.
+ * @param mutex The given mutex to be unlocked. Pass NULL to specify
+ * the global ICU mutex.
+ */
+U_CAPI void U_EXPORT2 umtx_unlock (UMutex* mutex);
+
+
+U_NAMESPACE_END
+
+#endif /* UMUTEX_H */
+/*eof*/
diff --git a/thirdparty/icu4c/common/unames.cpp b/thirdparty/icu4c/common/unames.cpp
new file mode 100644
index 0000000000..5776058f95
--- /dev/null
+++ b/thirdparty/icu4c/common/unames.cpp
@@ -0,0 +1,2108 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: unames.c
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999oct04
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "unicode/uchar.h"
+#include "unicode/udata.h"
+#include "unicode/utf.h"
+#include "unicode/utf16.h"
+#include "uassert.h"
+#include "ustr_imp.h"
+#include "umutex.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "ucln_cmn.h"
+#include "udataswp.h"
+#include "uprops.h"
+
+U_NAMESPACE_BEGIN
+
+/* prototypes ------------------------------------------------------------- */
+
+static const char DATA_NAME[] = "unames";
+static const char DATA_TYPE[] = "icu";
+
+#define GROUP_SHIFT 5
+#define LINES_PER_GROUP (1L<<GROUP_SHIFT)
+#define GROUP_MASK (LINES_PER_GROUP-1)
+
+/*
+ * This struct was replaced by explicitly accessing equivalent
+ * fields from triples of uint16_t.
+ * The Group struct was padded to 8 bytes on compilers for early ARM CPUs,
+ * which broke the assumption that sizeof(Group)==6 and that the ++ operator
+ * would advance by 6 bytes (3 uint16_t).
+ *
+ * We can't just change the data structure because it's loaded from a data file,
+ * and we don't want to make it less compact, so we changed the access code.
+ *
+ * For details see ICU tickets 6331 and 6008.
+typedef struct {
+ uint16_t groupMSB,
+ offsetHigh, offsetLow; / * avoid padding * /
+} Group;
+ */
+enum {
+ GROUP_MSB,
+ GROUP_OFFSET_HIGH,
+ GROUP_OFFSET_LOW,
+ GROUP_LENGTH
+};
+
+/*
+ * Get the 32-bit group offset.
+ * @param group (const uint16_t *) pointer to a Group triple of uint16_t
+ * @return group offset (int32_t)
+ */
+#define GET_GROUP_OFFSET(group) ((int32_t)(group)[GROUP_OFFSET_HIGH]<<16|(group)[GROUP_OFFSET_LOW])
+
+#define NEXT_GROUP(group) ((group)+GROUP_LENGTH)
+#define PREV_GROUP(group) ((group)-GROUP_LENGTH)
+
+typedef struct {
+ uint32_t start, end;
+ uint8_t type, variant;
+ uint16_t size;
+} AlgorithmicRange;
+
+typedef struct {
+ uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset;
+} UCharNames;
+
+/*
+ * Get the groups table from a UCharNames struct.
+ * The groups table consists of one uint16_t groupCount followed by
+ * groupCount groups. Each group is a triple of uint16_t, see GROUP_LENGTH
+ * and the comment for the old struct Group above.
+ *
+ * @param names (const UCharNames *) pointer to the UCharNames indexes
+ * @return (const uint16_t *) pointer to the groups table
+ */
+#define GET_GROUPS(names) (const uint16_t *)((const char *)names+names->groupsOffset)
+
+typedef struct {
+ const char *otherName;
+ UChar32 code;
+} FindName;
+
+#define DO_FIND_NAME NULL
+
+static UDataMemory *uCharNamesData=NULL;
+static UCharNames *uCharNames=NULL;
+static icu::UInitOnce gCharNamesInitOnce = U_INITONCE_INITIALIZER;
+
+/*
+ * Maximum length of character names (regular & 1.0).
+ */
+static int32_t gMaxNameLength=0;
+
+/*
+ * Set of chars used in character names (regular & 1.0).
+ * Chars are platform-dependent (can be EBCDIC).
+ */
+static uint32_t gNameSet[8]={ 0 };
+
+#define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT
+#define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1
+#define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2
+
+#define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3)
+
+static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = {
+ "unassigned",
+ "uppercase letter",
+ "lowercase letter",
+ "titlecase letter",
+ "modifier letter",
+ "other letter",
+ "non spacing mark",
+ "enclosing mark",
+ "combining spacing mark",
+ "decimal digit number",
+ "letter number",
+ "other number",
+ "space separator",
+ "line separator",
+ "paragraph separator",
+ "control",
+ "format",
+ "private use area",
+ "surrogate",
+ "dash punctuation",
+ "start punctuation",
+ "end punctuation",
+ "connector punctuation",
+ "other punctuation",
+ "math symbol",
+ "currency symbol",
+ "modifier symbol",
+ "other symbol",
+ "initial punctuation",
+ "final punctuation",
+ "noncharacter",
+ "lead surrogate",
+ "trail surrogate"
+};
+
+/* implementation ----------------------------------------------------------- */
+
+static UBool U_CALLCONV unames_cleanup(void)
+{
+ if(uCharNamesData) {
+ udata_close(uCharNamesData);
+ uCharNamesData = NULL;
+ }
+ if(uCharNames) {
+ uCharNames = NULL;
+ }
+ gCharNamesInitOnce.reset();
+ gMaxNameLength=0;
+ return TRUE;
+}
+
+static UBool U_CALLCONV
+isAcceptable(void * /*context*/,
+ const char * /*type*/, const char * /*name*/,
+ const UDataInfo *pInfo) {
+ return (UBool)(
+ pInfo->size>=20 &&
+ pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
+ pInfo->charsetFamily==U_CHARSET_FAMILY &&
+ pInfo->dataFormat[0]==0x75 && /* dataFormat="unam" */
+ pInfo->dataFormat[1]==0x6e &&
+ pInfo->dataFormat[2]==0x61 &&
+ pInfo->dataFormat[3]==0x6d &&
+ pInfo->formatVersion[0]==1);
+}
+
+static void U_CALLCONV
+loadCharNames(UErrorCode &status) {
+ U_ASSERT(uCharNamesData == NULL);
+ U_ASSERT(uCharNames == NULL);
+
+ uCharNamesData = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &status);
+ if(U_FAILURE(status)) {
+ uCharNamesData = NULL;
+ } else {
+ uCharNames = (UCharNames *)udata_getMemory(uCharNamesData);
+ }
+ ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup);
+}
+
+
+static UBool
+isDataLoaded(UErrorCode *pErrorCode) {
+ umtx_initOnce(gCharNamesInitOnce, &loadCharNames, *pErrorCode);
+ return U_SUCCESS(*pErrorCode);
+}
+
+#define WRITE_CHAR(buffer, bufferLength, bufferPos, c) UPRV_BLOCK_MACRO_BEGIN { \
+ if((bufferLength)>0) { \
+ *(buffer)++=c; \
+ --(bufferLength); \
+ } \
+ ++(bufferPos); \
+} UPRV_BLOCK_MACRO_END
+
+#define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT
+
+/*
+ * Important: expandName() and compareName() are almost the same -
+ * apply fixes to both.
+ *
+ * UnicodeData.txt uses ';' as a field separator, so no
+ * field can contain ';' as part of its contents.
+ * In unames.dat, it is marked as token[';']==-1 only if the
+ * semicolon is used in the data file - which is iff we
+ * have Unicode 1.0 names or ISO comments or aliases.
+ * So, it will be token[';']==-1 if we store U1.0 names/ISO comments/aliases
+ * although we know that it will never be part of a name.
+ */
+static uint16_t
+expandName(UCharNames *names,
+ const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
+ char *buffer, uint16_t bufferLength) {
+ uint16_t *tokens=(uint16_t *)names+8;
+ uint16_t token, tokenCount=*tokens++, bufferPos=0;
+ uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
+ uint8_t c;
+
+ if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
+ /*
+ * skip the modern name if it is not requested _and_
+ * if the semicolon byte value is a character, not a token number
+ */
+ if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
+ int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
+ do {
+ while(nameLength>0) {
+ --nameLength;
+ if(*name++==';') {
+ break;
+ }
+ }
+ } while(--fieldIndex>0);
+ } else {
+ /*
+ * the semicolon byte value is a token number, therefore
+ * only modern names are stored in unames.dat and there is no
+ * such requested alternate name here
+ */
+ nameLength=0;
+ }
+ }
+
+ /* write each letter directly, and write a token word per token */
+ while(nameLength>0) {
+ --nameLength;
+ c=*name++;
+
+ if(c>=tokenCount) {
+ if(c!=';') {
+ /* implicit letter */
+ WRITE_CHAR(buffer, bufferLength, bufferPos, c);
+ } else {
+ /* finished */
+ break;
+ }
+ } else {
+ token=tokens[c];
+ if(token==(uint16_t)(-2)) {
+ /* this is a lead byte for a double-byte token */
+ token=tokens[c<<8|*name++];
+ --nameLength;
+ }
+ if(token==(uint16_t)(-1)) {
+ if(c!=';') {
+ /* explicit letter */
+ WRITE_CHAR(buffer, bufferLength, bufferPos, c);
+ } else {
+ /* stop, but skip the semicolon if we are seeking
+ extended names and there was no 2.0 name but there
+ is a 1.0 name. */
+ if(!bufferPos && nameChoice == U_EXTENDED_CHAR_NAME) {
+ if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
+ continue;
+ }
+ }
+ /* finished */
+ break;
+ }
+ } else {
+ /* write token word */
+ uint8_t *tokenString=tokenStrings+token;
+ while((c=*tokenString++)!=0) {
+ WRITE_CHAR(buffer, bufferLength, bufferPos, c);
+ }
+ }
+ }
+ }
+
+ /* zero-terminate */
+ if(bufferLength>0) {
+ *buffer=0;
+ }
+
+ return bufferPos;
+}
+
+/*
+ * compareName() is almost the same as expandName() except that it compares
+ * the currently expanded name to an input name.
+ * It returns the match/no match result as soon as possible.
+ */
+static UBool
+compareName(UCharNames *names,
+ const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
+ const char *otherName) {
+ uint16_t *tokens=(uint16_t *)names+8;
+ uint16_t token, tokenCount=*tokens++;
+ uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
+ uint8_t c;
+ const char *origOtherName = otherName;
+
+ if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
+ /*
+ * skip the modern name if it is not requested _and_
+ * if the semicolon byte value is a character, not a token number
+ */
+ if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
+ int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
+ do {
+ while(nameLength>0) {
+ --nameLength;
+ if(*name++==';') {
+ break;
+ }
+ }
+ } while(--fieldIndex>0);
+ } else {
+ /*
+ * the semicolon byte value is a token number, therefore
+ * only modern names are stored in unames.dat and there is no
+ * such requested alternate name here
+ */
+ nameLength=0;
+ }
+ }
+
+ /* compare each letter directly, and compare a token word per token */
+ while(nameLength>0) {
+ --nameLength;
+ c=*name++;
+
+ if(c>=tokenCount) {
+ if(c!=';') {
+ /* implicit letter */
+ if((char)c!=*otherName++) {
+ return FALSE;
+ }
+ } else {
+ /* finished */
+ break;
+ }
+ } else {
+ token=tokens[c];
+ if(token==(uint16_t)(-2)) {
+ /* this is a lead byte for a double-byte token */
+ token=tokens[c<<8|*name++];
+ --nameLength;
+ }
+ if(token==(uint16_t)(-1)) {
+ if(c!=';') {
+ /* explicit letter */
+ if((char)c!=*otherName++) {
+ return FALSE;
+ }
+ } else {
+ /* stop, but skip the semicolon if we are seeking
+ extended names and there was no 2.0 name but there
+ is a 1.0 name. */
+ if(otherName == origOtherName && nameChoice == U_EXTENDED_CHAR_NAME) {
+ if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
+ continue;
+ }
+ }
+ /* finished */
+ break;
+ }
+ } else {
+ /* write token word */
+ uint8_t *tokenString=tokenStrings+token;
+ while((c=*tokenString++)!=0) {
+ if((char)c!=*otherName++) {
+ return FALSE;
+ }
+ }
+ }
+ }
+ }
+
+ /* complete match? */
+ return (UBool)(*otherName==0);
+}
+
+static uint8_t getCharCat(UChar32 cp) {
+ uint8_t cat;
+
+ if (U_IS_UNICODE_NONCHAR(cp)) {
+ return U_NONCHARACTER_CODE_POINT;
+ }
+
+ if ((cat = u_charType(cp)) == U_SURROGATE) {
+ cat = U_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE;
+ }
+
+ return cat;
+}
+
+static const char *getCharCatName(UChar32 cp) {
+ uint8_t cat = getCharCat(cp);
+
+ /* Return unknown if the table of names above is not up to
+ date. */
+
+ if (cat >= UPRV_LENGTHOF(charCatNames)) {
+ return "unknown";
+ } else {
+ return charCatNames[cat];
+ }
+}
+
+static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) {
+ const char *catname = getCharCatName(code);
+ uint16_t length = 0;
+
+ UChar32 cp;
+ int ndigits, i;
+
+ WRITE_CHAR(buffer, bufferLength, length, '<');
+ while (catname[length - 1]) {
+ WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]);
+ }
+ WRITE_CHAR(buffer, bufferLength, length, '-');
+ for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4)
+ ;
+ if (ndigits < 4)
+ ndigits = 4;
+ for (cp = code, i = ndigits; (cp || i > 0) && bufferLength; cp >>= 4, bufferLength--) {
+ uint8_t v = (uint8_t)(cp & 0xf);
+ buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10);
+ }
+ buffer += ndigits;
+ length += static_cast<uint16_t>(ndigits);
+ WRITE_CHAR(buffer, bufferLength, length, '>');
+
+ return length;
+}
+
+/*
+ * getGroup() does a binary search for the group that contains the
+ * Unicode code point "code".
+ * The return value is always a valid Group* that may contain "code"
+ * or else is the highest group before "code".
+ * If the lowest group is after "code", then that one is returned.
+ */
+static const uint16_t *
+getGroup(UCharNames *names, uint32_t code) {
+ const uint16_t *groups=GET_GROUPS(names);
+ uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT),
+ start=0,
+ limit=*groups++,
+ number;
+
+ /* binary search for the group of names that contains the one for code */
+ while(start<limit-1) {
+ number=(uint16_t)((start+limit)/2);
+ if(groupMSB<groups[number*GROUP_LENGTH+GROUP_MSB]) {
+ limit=number;
+ } else {
+ start=number;
+ }
+ }
+
+ /* return this regardless of whether it is an exact match */
+ return groups+start*GROUP_LENGTH;
+}
+
+/*
+ * expandGroupLengths() reads a block of compressed lengths of 32 strings and
+ * expands them into offsets and lengths for each string.
+ * Lengths are stored with a variable-width encoding in consecutive nibbles:
+ * If a nibble<0xc, then it is the length itself (0=empty string).
+ * If a nibble>=0xc, then it forms a length value with the following nibble.
+ * Calculation see below.
+ * The offsets and lengths arrays must be at least 33 (one more) long because
+ * there is no check here at the end if the last nibble is still used.
+ */
+static const uint8_t *
+expandGroupLengths(const uint8_t *s,
+ uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]) {
+ /* read the lengths of the 32 strings in this group and get each string's offset */
+ uint16_t i=0, offset=0, length=0;
+ uint8_t lengthByte;
+
+ /* all 32 lengths must be read to get the offset of the first group string */
+ while(i<LINES_PER_GROUP) {
+ lengthByte=*s++;
+
+ /* read even nibble - MSBs of lengthByte */
+ if(length>=12) {
+ /* double-nibble length spread across two bytes */
+ length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12);
+ lengthByte&=0xf;
+ } else if((lengthByte /* &0xf0 */)>=0xc0) {
+ /* double-nibble length spread across this one byte */
+ length=(uint16_t)((lengthByte&0x3f)+12);
+ } else {
+ /* single-nibble length in MSBs */
+ length=(uint16_t)(lengthByte>>4);
+ lengthByte&=0xf;
+ }
+
+ *offsets++=offset;
+ *lengths++=length;
+
+ offset+=length;
+ ++i;
+
+ /* read odd nibble - LSBs of lengthByte */
+ if((lengthByte&0xf0)==0) {
+ /* this nibble was not consumed for a double-nibble length above */
+ length=lengthByte;
+ if(length<12) {
+ /* single-nibble length in LSBs */
+ *offsets++=offset;
+ *lengths++=length;
+
+ offset+=length;
+ ++i;
+ }
+ } else {
+ length=0; /* prevent double-nibble detection in the next iteration */
+ }
+ }
+
+ /* now, s is at the first group string */
+ return s;
+}
+
+static uint16_t
+expandGroupName(UCharNames *names, const uint16_t *group,
+ uint16_t lineNumber, UCharNameChoice nameChoice,
+ char *buffer, uint16_t bufferLength) {
+ uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
+ const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);
+ s=expandGroupLengths(s, offsets, lengths);
+ return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice,
+ buffer, bufferLength);
+}
+
+static uint16_t
+getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice,
+ char *buffer, uint16_t bufferLength) {
+ const uint16_t *group=getGroup(names, code);
+ if((uint16_t)(code>>GROUP_SHIFT)==group[GROUP_MSB]) {
+ return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice,
+ buffer, bufferLength);
+ } else {
+ /* group not found */
+ /* zero-terminate */
+ if(bufferLength>0) {
+ *buffer=0;
+ }
+ return 0;
+ }
+}
+
+/*
+ * enumGroupNames() enumerates all the names in a 32-group
+ * and either calls the enumerator function or finds a given input name.
+ */
+static UBool
+enumGroupNames(UCharNames *names, const uint16_t *group,
+ UChar32 start, UChar32 end,
+ UEnumCharNamesFn *fn, void *context,
+ UCharNameChoice nameChoice) {
+ uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
+ const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);
+
+ s=expandGroupLengths(s, offsets, lengths);
+ if(fn!=DO_FIND_NAME) {
+ char buffer[200];
+ uint16_t length;
+
+ while(start<=end) {
+ length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, buffer, sizeof(buffer));
+ if (!length && nameChoice == U_EXTENDED_CHAR_NAME) {
+ buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
+ }
+ /* here, we assume that the buffer is large enough */
+ if(length>0) {
+ if(!fn(context, start, nameChoice, buffer, length)) {
+ return FALSE;
+ }
+ }
+ ++start;
+ }
+ } else {
+ const char *otherName=((FindName *)context)->otherName;
+ while(start<=end) {
+ if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, otherName)) {
+ ((FindName *)context)->code=start;
+ return FALSE;
+ }
+ ++start;
+ }
+ }
+ return TRUE;
+}
+
+/*
+ * enumExtNames enumerate extended names.
+ * It only needs to do it if it is called with a real function and not
+ * with the dummy DO_FIND_NAME, because u_charFromName() does a check
+ * for extended names by itself.
+ */
+static UBool
+enumExtNames(UChar32 start, UChar32 end,
+ UEnumCharNamesFn *fn, void *context)
+{
+ if(fn!=DO_FIND_NAME) {
+ char buffer[200];
+ uint16_t length;
+
+ while(start<=end) {
+ buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
+ /* here, we assume that the buffer is large enough */
+ if(length>0) {
+ if(!fn(context, start, U_EXTENDED_CHAR_NAME, buffer, length)) {
+ return FALSE;
+ }
+ }
+ ++start;
+ }
+ }
+
+ return TRUE;
+}
+
+static UBool
+enumNames(UCharNames *names,
+ UChar32 start, UChar32 limit,
+ UEnumCharNamesFn *fn, void *context,
+ UCharNameChoice nameChoice) {
+ uint16_t startGroupMSB, endGroupMSB, groupCount;
+ const uint16_t *group, *groupLimit;
+
+ startGroupMSB=(uint16_t)(start>>GROUP_SHIFT);
+ endGroupMSB=(uint16_t)((limit-1)>>GROUP_SHIFT);
+
+ /* find the group that contains start, or the highest before it */
+ group=getGroup(names, start);
+
+ if(startGroupMSB<group[GROUP_MSB] && nameChoice==U_EXTENDED_CHAR_NAME) {
+ /* enumerate synthetic names between start and the group start */
+ UChar32 extLimit=((UChar32)group[GROUP_MSB]<<GROUP_SHIFT);
+ if(extLimit>limit) {
+ extLimit=limit;
+ }
+ if(!enumExtNames(start, extLimit-1, fn, context)) {
+ return FALSE;
+ }
+ start=extLimit;
+ }
+
+ if(startGroupMSB==endGroupMSB) {
+ if(startGroupMSB==group[GROUP_MSB]) {
+ /* if start and limit-1 are in the same group, then enumerate only in that one */
+ return enumGroupNames(names, group, start, limit-1, fn, context, nameChoice);
+ }
+ } else {
+ const uint16_t *groups=GET_GROUPS(names);
+ groupCount=*groups++;
+ groupLimit=groups+groupCount*GROUP_LENGTH;
+
+ if(startGroupMSB==group[GROUP_MSB]) {
+ /* enumerate characters in the partial start group */
+ if((start&GROUP_MASK)!=0) {
+ if(!enumGroupNames(names, group,
+ start, ((UChar32)startGroupMSB<<GROUP_SHIFT)+LINES_PER_GROUP-1,
+ fn, context, nameChoice)) {
+ return FALSE;
+ }
+ group=NEXT_GROUP(group); /* continue with the next group */
+ }
+ } else if(startGroupMSB>group[GROUP_MSB]) {
+ /* make sure that we start enumerating with the first group after start */
+ const uint16_t *nextGroup=NEXT_GROUP(group);
+ if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > startGroupMSB && nameChoice == U_EXTENDED_CHAR_NAME) {
+ UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
+ if (end > limit) {
+ end = limit;
+ }
+ if (!enumExtNames(start, end - 1, fn, context)) {
+ return FALSE;
+ }
+ }
+ group=nextGroup;
+ }
+
+ /* enumerate entire groups between the start- and end-groups */
+ while(group<groupLimit && group[GROUP_MSB]<endGroupMSB) {
+ const uint16_t *nextGroup;
+ start=(UChar32)group[GROUP_MSB]<<GROUP_SHIFT;
+ if(!enumGroupNames(names, group, start, start+LINES_PER_GROUP-1, fn, context, nameChoice)) {
+ return FALSE;
+ }
+ nextGroup=NEXT_GROUP(group);
+ if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > group[GROUP_MSB] + 1 && nameChoice == U_EXTENDED_CHAR_NAME) {
+ UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
+ if (end > limit) {
+ end = limit;
+ }
+ if (!enumExtNames((group[GROUP_MSB] + 1) << GROUP_SHIFT, end - 1, fn, context)) {
+ return FALSE;
+ }
+ }
+ group=nextGroup;
+ }
+
+ /* enumerate within the end group (group[GROUP_MSB]==endGroupMSB) */
+ if(group<groupLimit && group[GROUP_MSB]==endGroupMSB) {
+ return enumGroupNames(names, group, (limit-1)&~GROUP_MASK, limit-1, fn, context, nameChoice);
+ } else if (nameChoice == U_EXTENDED_CHAR_NAME && group == groupLimit) {
+ UChar32 next = (PREV_GROUP(group)[GROUP_MSB] + 1) << GROUP_SHIFT;
+ if (next > start) {
+ start = next;
+ }
+ } else {
+ return TRUE;
+ }
+ }
+
+ /* we have not found a group, which means everything is made of
+ extended names. */
+ if (nameChoice == U_EXTENDED_CHAR_NAME) {
+ if (limit > UCHAR_MAX_VALUE + 1) {
+ limit = UCHAR_MAX_VALUE + 1;
+ }
+ return enumExtNames(start, limit - 1, fn, context);
+ }
+
+ return TRUE;
+}
+
+static uint16_t
+writeFactorSuffix(const uint16_t *factors, uint16_t count,
+ const char *s, /* suffix elements */
+ uint32_t code,
+ uint16_t indexes[8], /* output fields from here */
+ const char *elementBases[8], const char *elements[8],
+ char *buffer, uint16_t bufferLength) {
+ uint16_t i, factor, bufferPos=0;
+ char c;
+
+ /* write elements according to the factors */
+
+ /*
+ * the factorized elements are determined by modulo arithmetic
+ * with the factors of this algorithm
+ *
+ * note that for fewer operations, count is decremented here
+ */
+ --count;
+ for(i=count; i>0; --i) {
+ factor=factors[i];
+ indexes[i]=(uint16_t)(code%factor);
+ code/=factor;
+ }
+ /*
+ * we don't need to calculate the last modulus because start<=code<=end
+ * guarantees here that code<=factors[0]
+ */
+ indexes[0]=(uint16_t)code;
+
+ /* write each element */
+ for(;;) {
+ if(elementBases!=NULL) {
+ *elementBases++=s;
+ }
+
+ /* skip indexes[i] strings */
+ factor=indexes[i];
+ while(factor>0) {
+ while(*s++!=0) {}
+ --factor;
+ }
+ if(elements!=NULL) {
+ *elements++=s;
+ }
+
+ /* write element */
+ while((c=*s++)!=0) {
+ WRITE_CHAR(buffer, bufferLength, bufferPos, c);
+ }
+
+ /* we do not need to perform the rest of this loop for i==count - break here */
+ if(i>=count) {
+ break;
+ }
+
+ /* skip the rest of the strings for this factors[i] */
+ factor=(uint16_t)(factors[i]-indexes[i]-1);
+ while(factor>0) {
+ while(*s++!=0) {}
+ --factor;
+ }
+
+ ++i;
+ }
+
+ /* zero-terminate */
+ if(bufferLength>0) {
+ *buffer=0;
+ }
+
+ return bufferPos;
+}
+
+/*
+ * Important:
+ * Parts of findAlgName() are almost the same as some of getAlgName().
+ * Fixes must be applied to both.
+ */
+static uint16_t
+getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice,
+ char *buffer, uint16_t bufferLength) {
+ uint16_t bufferPos=0;
+
+ /* Only the normative character name can be algorithmic. */
+ if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
+ /* zero-terminate */
+ if(bufferLength>0) {
+ *buffer=0;
+ }
+ return 0;
+ }
+
+ switch(range->type) {
+ case 0: {
+ /* name = prefix hex-digits */
+ const char *s=(const char *)(range+1);
+ char c;
+
+ uint16_t i, count;
+
+ /* copy prefix */
+ while((c=*s++)!=0) {
+ WRITE_CHAR(buffer, bufferLength, bufferPos, c);
+ }
+
+ /* write hexadecimal code point value */
+ count=range->variant;
+
+ /* zero-terminate */
+ if(count<bufferLength) {
+ buffer[count]=0;
+ }
+
+ for(i=count; i>0;) {
+ if(--i<bufferLength) {
+ c=(char)(code&0xf);
+ if(c<10) {
+ c+='0';
+ } else {
+ c+='A'-10;
+ }
+ buffer[i]=c;
+ }
+ code>>=4;
+ }
+
+ bufferPos+=count;
+ break;
+ }
+ case 1: {
+ /* name = prefix factorized-elements */
+ uint16_t indexes[8];
+ const uint16_t *factors=(const uint16_t *)(range+1);
+ uint16_t count=range->variant;
+ const char *s=(const char *)(factors+count);
+ char c;
+
+ /* copy prefix */
+ while((c=*s++)!=0) {
+ WRITE_CHAR(buffer, bufferLength, bufferPos, c);
+ }
+
+ bufferPos+=writeFactorSuffix(factors, count,
+ s, code-range->start, indexes, NULL, NULL, buffer, bufferLength);
+ break;
+ }
+ default:
+ /* undefined type */
+ /* zero-terminate */
+ if(bufferLength>0) {
+ *buffer=0;
+ }
+ break;
+ }
+
+ return bufferPos;
+}
+
+/*
+ * Important: enumAlgNames() and findAlgName() are almost the same.
+ * Any fix must be applied to both.
+ */
+static UBool
+enumAlgNames(AlgorithmicRange *range,
+ UChar32 start, UChar32 limit,
+ UEnumCharNamesFn *fn, void *context,
+ UCharNameChoice nameChoice) {
+ char buffer[200];
+ uint16_t length;
+
+ if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
+ return TRUE;
+ }
+
+ switch(range->type) {
+ case 0: {
+ char *s, *end;
+ char c;
+
+ /* get the full name of the start character */
+ length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buffer));
+ if(length<=0) {
+ return TRUE;
+ }
+
+ /* call the enumerator function with this first character */
+ if(!fn(context, start, nameChoice, buffer, length)) {
+ return FALSE;
+ }
+
+ /* go to the end of the name; all these names have the same length */
+ end=buffer;
+ while(*end!=0) {
+ ++end;
+ }
+
+ /* enumerate the rest of the names */
+ while(++start<limit) {
+ /* increment the hexadecimal number on a character-basis */
+ s=end;
+ for (;;) {
+ c=*--s;
+ if(('0'<=c && c<'9') || ('A'<=c && c<'F')) {
+ *s=(char)(c+1);
+ break;
+ } else if(c=='9') {
+ *s='A';
+ break;
+ } else if(c=='F') {
+ *s='0';
+ }
+ }
+
+ if(!fn(context, start, nameChoice, buffer, length)) {
+ return FALSE;
+ }
+ }
+ break;
+ }
+ case 1: {
+ uint16_t indexes[8];
+ const char *elementBases[8], *elements[8];
+ const uint16_t *factors=(const uint16_t *)(range+1);
+ uint16_t count=range->variant;
+ const char *s=(const char *)(factors+count);
+ char *suffix, *t;
+ uint16_t prefixLength, i, idx;
+
+ char c;
+
+ /* name = prefix factorized-elements */
+
+ /* copy prefix */
+ suffix=buffer;
+ prefixLength=0;
+ while((c=*s++)!=0) {
+ *suffix++=c;
+ ++prefixLength;
+ }
+
+ /* append the suffix of the start character */
+ length=(uint16_t)(prefixLength+writeFactorSuffix(factors, count,
+ s, (uint32_t)start-range->start,
+ indexes, elementBases, elements,
+ suffix, (uint16_t)(sizeof(buffer)-prefixLength)));
+
+ /* call the enumerator function with this first character */
+ if(!fn(context, start, nameChoice, buffer, length)) {
+ return FALSE;
+ }
+
+ /* enumerate the rest of the names */
+ while(++start<limit) {
+ /* increment the indexes in lexical order bound by the factors */
+ i=count;
+ for (;;) {
+ idx=(uint16_t)(indexes[--i]+1);
+ if(idx<factors[i]) {
+ /* skip one index and its element string */
+ indexes[i]=idx;
+ s=elements[i];
+ while(*s++!=0) {
+ }
+ elements[i]=s;
+ break;
+ } else {
+ /* reset this index to 0 and its element string to the first one */
+ indexes[i]=0;
+ elements[i]=elementBases[i];
+ }
+ }
+
+ /* to make matters a little easier, just append all elements to the suffix */
+ t=suffix;
+ length=prefixLength;
+ for(i=0; i<count; ++i) {
+ s=elements[i];
+ while((c=*s++)!=0) {
+ *t++=c;
+ ++length;
+ }
+ }
+ /* zero-terminate */
+ *t=0;
+
+ if(!fn(context, start, nameChoice, buffer, length)) {
+ return FALSE;
+ }
+ }
+ break;
+ }
+ default:
+ /* undefined type */
+ break;
+ }
+
+ return TRUE;
+}
+
+/*
+ * findAlgName() is almost the same as enumAlgNames() except that it
+ * returns the code point for a name if it fits into the range.
+ * It returns 0xffff otherwise.
+ */
+static UChar32
+findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *otherName) {
+ UChar32 code;
+
+ if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
+ return 0xffff;
+ }
+
+ switch(range->type) {
+ case 0: {
+ /* name = prefix hex-digits */
+ const char *s=(const char *)(range+1);
+ char c;
+
+ uint16_t i, count;
+
+ /* compare prefix */
+ while((c=*s++)!=0) {
+ if((char)c!=*otherName++) {
+ return 0xffff;
+ }
+ }
+
+ /* read hexadecimal code point value */
+ count=range->variant;
+ code=0;
+ for(i=0; i<count; ++i) {
+ c=*otherName++;
+ if('0'<=c && c<='9') {
+ code=(code<<4)|(c-'0');
+ } else if('A'<=c && c<='F') {
+ code=(code<<4)|(c-'A'+10);
+ } else {
+ return 0xffff;
+ }
+ }
+
+ /* does it fit into the range? */
+ if(*otherName==0 && range->start<=(uint32_t)code && (uint32_t)code<=range->end) {
+ return code;
+ }
+ break;
+ }
+ case 1: {
+ char buffer[64];
+ uint16_t indexes[8];
+ const char *elementBases[8], *elements[8];
+ const uint16_t *factors=(const uint16_t *)(range+1);
+ uint16_t count=range->variant;
+ const char *s=(const char *)(factors+count), *t;
+ UChar32 start, limit;
+ uint16_t i, idx;
+
+ char c;
+
+ /* name = prefix factorized-elements */
+
+ /* compare prefix */
+ while((c=*s++)!=0) {
+ if((char)c!=*otherName++) {
+ return 0xffff;
+ }
+ }
+
+ start=(UChar32)range->start;
+ limit=(UChar32)(range->end+1);
+
+ /* initialize the suffix elements for enumeration; indexes should all be set to 0 */
+ writeFactorSuffix(factors, count, s, 0,
+ indexes, elementBases, elements, buffer, sizeof(buffer));
+
+ /* compare the first suffix */
+ if(0==uprv_strcmp(otherName, buffer)) {
+ return start;
+ }
+
+ /* enumerate and compare the rest of the suffixes */
+ while(++start<limit) {
+ /* increment the indexes in lexical order bound by the factors */
+ i=count;
+ for (;;) {
+ idx=(uint16_t)(indexes[--i]+1);
+ if(idx<factors[i]) {
+ /* skip one index and its element string */
+ indexes[i]=idx;
+ s=elements[i];
+ while(*s++!=0) {}
+ elements[i]=s;
+ break;
+ } else {
+ /* reset this index to 0 and its element string to the first one */
+ indexes[i]=0;
+ elements[i]=elementBases[i];
+ }
+ }
+
+ /* to make matters a little easier, just compare all elements of the suffix */
+ t=otherName;
+ for(i=0; i<count; ++i) {
+ s=elements[i];
+ while((c=*s++)!=0) {
+ if(c!=*t++) {
+ s=""; /* does not match */
+ i=99;
+ }
+ }
+ }
+ if(i<99 && *t==0) {
+ return start;
+ }
+ }
+ break;
+ }
+ default:
+ /* undefined type */
+ break;
+ }
+
+ return 0xffff;
+}
+
+/* sets of name characters, maximum name lengths ---------------------------- */
+
+#define SET_ADD(set, c) ((set)[(uint8_t)c>>5]|=((uint32_t)1<<((uint8_t)c&0x1f)))
+#define SET_CONTAINS(set, c) (((set)[(uint8_t)c>>5]&((uint32_t)1<<((uint8_t)c&0x1f)))!=0)
+
+static int32_t
+calcStringSetLength(uint32_t set[8], const char *s) {
+ int32_t length=0;
+ char c;
+
+ while((c=*s++)!=0) {
+ SET_ADD(set, c);
+ ++length;
+ }
+ return length;
+}
+
+static int32_t
+calcAlgNameSetsLengths(int32_t maxNameLength) {
+ AlgorithmicRange *range;
+ uint32_t *p;
+ uint32_t rangeCount;
+ int32_t length;
+
+ /* enumerate algorithmic ranges */
+ p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
+ rangeCount=*p;
+ range=(AlgorithmicRange *)(p+1);
+ while(rangeCount>0) {
+ switch(range->type) {
+ case 0:
+ /* name = prefix + (range->variant times) hex-digits */
+ /* prefix */
+ length=calcStringSetLength(gNameSet, (const char *)(range+1))+range->variant;
+ if(length>maxNameLength) {
+ maxNameLength=length;
+ }
+ break;
+ case 1: {
+ /* name = prefix factorized-elements */
+ const uint16_t *factors=(const uint16_t *)(range+1);
+ const char *s;
+ int32_t i, count=range->variant, factor, factorLength, maxFactorLength;
+
+ /* prefix length */
+ s=(const char *)(factors+count);
+ length=calcStringSetLength(gNameSet, s);
+ s+=length+1; /* start of factor suffixes */
+
+ /* get the set and maximum factor suffix length for each factor */
+ for(i=0; i<count; ++i) {
+ maxFactorLength=0;
+ for(factor=factors[i]; factor>0; --factor) {
+ factorLength=calcStringSetLength(gNameSet, s);
+ s+=factorLength+1;
+ if(factorLength>maxFactorLength) {
+ maxFactorLength=factorLength;
+ }
+ }
+ length+=maxFactorLength;
+ }
+
+ if(length>maxNameLength) {
+ maxNameLength=length;
+ }
+ break;
+ }
+ default:
+ /* unknown type */
+ break;
+ }
+
+ range=(AlgorithmicRange *)((uint8_t *)range+range->size);
+ --rangeCount;
+ }
+ return maxNameLength;
+}
+
+static int32_t
+calcExtNameSetsLengths(int32_t maxNameLength) {
+ int32_t i, length;
+
+ for(i=0; i<UPRV_LENGTHOF(charCatNames); ++i) {
+ /*
+ * for each category, count the length of the category name
+ * plus 9=
+ * 2 for <>
+ * 1 for -
+ * 6 for most hex digits per code point
+ */
+ length=9+calcStringSetLength(gNameSet, charCatNames[i]);
+ if(length>maxNameLength) {
+ maxNameLength=length;
+ }
+ }
+ return maxNameLength;
+}
+
+static int32_t
+calcNameSetLength(const uint16_t *tokens, uint16_t tokenCount, const uint8_t *tokenStrings, int8_t *tokenLengths,
+ uint32_t set[8],
+ const uint8_t **pLine, const uint8_t *lineLimit) {
+ const uint8_t *line=*pLine;
+ int32_t length=0, tokenLength;
+ uint16_t c, token;
+
+ while(line!=lineLimit && (c=*line++)!=(uint8_t)';') {
+ if(c>=tokenCount) {
+ /* implicit letter */
+ SET_ADD(set, c);
+ ++length;
+ } else {
+ token=tokens[c];
+ if(token==(uint16_t)(-2)) {
+ /* this is a lead byte for a double-byte token */
+ c=c<<8|*line++;
+ token=tokens[c];
+ }
+ if(token==(uint16_t)(-1)) {
+ /* explicit letter */
+ SET_ADD(set, c);
+ ++length;
+ } else {
+ /* count token word */
+ if(tokenLengths!=NULL) {
+ /* use cached token length */
+ tokenLength=tokenLengths[c];
+ if(tokenLength==0) {
+ tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
+ tokenLengths[c]=(int8_t)tokenLength;
+ }
+ } else {
+ tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
+ }
+ length+=tokenLength;
+ }
+ }
+ }
+
+ *pLine=line;
+ return length;
+}
+
+static void
+calcGroupNameSetsLengths(int32_t maxNameLength) {
+ uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
+
+ uint16_t *tokens=(uint16_t *)uCharNames+8;
+ uint16_t tokenCount=*tokens++;
+ uint8_t *tokenStrings=(uint8_t *)uCharNames+uCharNames->tokenStringOffset;
+
+ int8_t *tokenLengths;
+
+ const uint16_t *group;
+ const uint8_t *s, *line, *lineLimit;
+
+ int32_t groupCount, lineNumber, length;
+
+ tokenLengths=(int8_t *)uprv_malloc(tokenCount);
+ if(tokenLengths!=NULL) {
+ uprv_memset(tokenLengths, 0, tokenCount);
+ }
+
+ group=GET_GROUPS(uCharNames);
+ groupCount=*group++;
+
+ /* enumerate all groups */
+ while(groupCount>0) {
+ s=(uint8_t *)uCharNames+uCharNames->groupStringOffset+GET_GROUP_OFFSET(group);
+ s=expandGroupLengths(s, offsets, lengths);
+
+ /* enumerate all lines in each group */
+ for(lineNumber=0; lineNumber<LINES_PER_GROUP; ++lineNumber) {
+ line=s+offsets[lineNumber];
+ length=lengths[lineNumber];
+ if(length==0) {
+ continue;
+ }
+
+ lineLimit=line+length;
+
+ /* read regular name */
+ length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
+ if(length>maxNameLength) {
+ maxNameLength=length;
+ }
+ if(line==lineLimit) {
+ continue;
+ }
+
+ /* read Unicode 1.0 name */
+ length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
+ if(length>maxNameLength) {
+ maxNameLength=length;
+ }
+ if(line==lineLimit) {
+ continue;
+ }
+
+ /* read ISO comment */
+ /*length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gISOCommentSet, &line, lineLimit);*/
+ }
+
+ group=NEXT_GROUP(group);
+ --groupCount;
+ }
+
+ if(tokenLengths!=NULL) {
+ uprv_free(tokenLengths);
+ }
+
+ /* set gMax... - name length last for threading */
+ gMaxNameLength=maxNameLength;
+}
+
+static UBool
+calcNameSetsLengths(UErrorCode *pErrorCode) {
+ static const char extChars[]="0123456789ABCDEF<>-";
+ int32_t i, maxNameLength;
+
+ if(gMaxNameLength!=0) {
+ return TRUE;
+ }
+
+ if(!isDataLoaded(pErrorCode)) {
+ return FALSE;
+ }
+
+ /* set hex digits, used in various names, and <>-, used in extended names */
+ for(i=0; i<(int32_t)sizeof(extChars)-1; ++i) {
+ SET_ADD(gNameSet, extChars[i]);
+ }
+
+ /* set sets and lengths from algorithmic names */
+ maxNameLength=calcAlgNameSetsLengths(0);
+
+ /* set sets and lengths from extended names */
+ maxNameLength=calcExtNameSetsLengths(maxNameLength);
+
+ /* set sets and lengths from group names, set global maximum values */
+ calcGroupNameSetsLengths(maxNameLength);
+
+ return TRUE;
+}
+
+U_NAMESPACE_END
+
+/* public API --------------------------------------------------------------- */
+
+U_NAMESPACE_USE
+
+U_CAPI int32_t U_EXPORT2
+u_charName(UChar32 code, UCharNameChoice nameChoice,
+ char *buffer, int32_t bufferLength,
+ UErrorCode *pErrorCode) {
+ AlgorithmicRange *algRange;
+ uint32_t *p;
+ uint32_t i;
+ int32_t length;
+
+ /* check the argument values */
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT ||
+ bufferLength<0 || (bufferLength>0 && buffer==NULL)
+ ) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
+ return u_terminateChars(buffer, bufferLength, 0, pErrorCode);
+ }
+
+ length=0;
+
+ /* try algorithmic names first */
+ p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
+ i=*p;
+ algRange=(AlgorithmicRange *)(p+1);
+ while(i>0) {
+ if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) {
+ length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
+ break;
+ }
+ algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
+ --i;
+ }
+
+ if(i==0) {
+ if (nameChoice == U_EXTENDED_CHAR_NAME) {
+ length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength);
+ if (!length) {
+ /* extended character name */
+ length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength);
+ }
+ } else {
+ /* normal character name */
+ length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
+ }
+ }
+
+ return u_terminateChars(buffer, bufferLength, length, pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+u_getISOComment(UChar32 /*c*/,
+ char *dest, int32_t destCapacity,
+ UErrorCode *pErrorCode) {
+ /* check the argument values */
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ } else if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ return u_terminateChars(dest, destCapacity, 0, pErrorCode);
+}
+
+U_CAPI UChar32 U_EXPORT2
+u_charFromName(UCharNameChoice nameChoice,
+ const char *name,
+ UErrorCode *pErrorCode) {
+ char upper[120] = {0};
+ char lower[120] = {0};
+ FindName findName;
+ AlgorithmicRange *algRange;
+ uint32_t *p;
+ uint32_t i;
+ UChar32 cp = 0;
+ char c0;
+ static constexpr UChar32 error = 0xffff; /* Undefined, but use this for backwards compatibility. */
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return error;
+ }
+
+ if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || name==NULL || *name==0) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return error;
+ }
+
+ if(!isDataLoaded(pErrorCode)) {
+ return error;
+ }
+
+ /* construct the uppercase and lowercase of the name first */
+ for(i=0; i<sizeof(upper); ++i) {
+ if((c0=*name++)!=0) {
+ upper[i]=uprv_toupper(c0);
+ lower[i]=uprv_tolower(c0);
+ } else {
+ upper[i]=lower[i]=0;
+ break;
+ }
+ }
+ if(i==sizeof(upper)) {
+ /* name too long, there is no such character */
+ *pErrorCode = U_ILLEGAL_CHAR_FOUND;
+ return error;
+ }
+ // i==strlen(name)==strlen(lower)==strlen(upper)
+
+ /* try extended names first */
+ if (lower[0] == '<') {
+ if (nameChoice == U_EXTENDED_CHAR_NAME && lower[--i] == '>') {
+ // Parse a string like "<category-HHHH>" where HHHH is a hex code point.
+ uint32_t limit = i;
+ while (i >= 3 && lower[--i] != '-') {}
+
+ // There should be 1 to 8 hex digits.
+ int32_t hexLength = limit - (i + 1);
+ if (i >= 2 && lower[i] == '-' && 1 <= hexLength && hexLength <= 8) {
+ uint32_t cIdx;
+
+ lower[i] = 0;
+
+ for (++i; i < limit; ++i) {
+ if (lower[i] >= '0' && lower[i] <= '9') {
+ cp = (cp << 4) + lower[i] - '0';
+ } else if (lower[i] >= 'a' && lower[i] <= 'f') {
+ cp = (cp << 4) + lower[i] - 'a' + 10;
+ } else {
+ *pErrorCode = U_ILLEGAL_CHAR_FOUND;
+ return error;
+ }
+ // Prevent signed-integer overflow and out-of-range code points.
+ if (cp > UCHAR_MAX_VALUE) {
+ *pErrorCode = U_ILLEGAL_CHAR_FOUND;
+ return error;
+ }
+ }
+
+ /* Now validate the category name.
+ We could use a binary search, or a trie, if
+ we really wanted to. */
+ uint8_t cat = getCharCat(cp);
+ for (lower[i] = 0, cIdx = 0; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) {
+
+ if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
+ if (cat == cIdx) {
+ return cp;
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ *pErrorCode = U_ILLEGAL_CHAR_FOUND;
+ return error;
+ }
+
+ /* try algorithmic names now */
+ p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
+ i=*p;
+ algRange=(AlgorithmicRange *)(p+1);
+ while(i>0) {
+ if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) {
+ return cp;
+ }
+ algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
+ --i;
+ }
+
+ /* normal character name */
+ findName.otherName=upper;
+ findName.code=error;
+ enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameChoice);
+ if (findName.code == error) {
+ *pErrorCode = U_ILLEGAL_CHAR_FOUND;
+ }
+ return findName.code;
+}
+
+U_CAPI void U_EXPORT2
+u_enumCharNames(UChar32 start, UChar32 limit,
+ UEnumCharNamesFn *fn,
+ void *context,
+ UCharNameChoice nameChoice,
+ UErrorCode *pErrorCode) {
+ AlgorithmicRange *algRange;
+ uint32_t *p;
+ uint32_t i;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return;
+ }
+
+ if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || fn==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ if((uint32_t) limit > UCHAR_MAX_VALUE + 1) {
+ limit = UCHAR_MAX_VALUE + 1;
+ }
+ if((uint32_t)start>=(uint32_t)limit) {
+ return;
+ }
+
+ if(!isDataLoaded(pErrorCode)) {
+ return;
+ }
+
+ /* interleave the data-driven ones with the algorithmic ones */
+ /* iterate over all algorithmic ranges; assume that they are in ascending order */
+ p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
+ i=*p;
+ algRange=(AlgorithmicRange *)(p+1);
+ while(i>0) {
+ /* enumerate the character names before the current algorithmic range */
+ /* here: start<limit */
+ if((uint32_t)start<algRange->start) {
+ if((uint32_t)limit<=algRange->start) {
+ enumNames(uCharNames, start, limit, fn, context, nameChoice);
+ return;
+ }
+ if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) {
+ return;
+ }
+ start=(UChar32)algRange->start;
+ }
+ /* enumerate the character names in the current algorithmic range */
+ /* here: algRange->start<=start<limit */
+ if((uint32_t)start<=algRange->end) {
+ if((uint32_t)limit<=(algRange->end+1)) {
+ enumAlgNames(algRange, start, limit, fn, context, nameChoice);
+ return;
+ }
+ if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) {
+ return;
+ }
+ start=(UChar32)algRange->end+1;
+ }
+ /* continue to the next algorithmic range (here: start<limit) */
+ algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
+ --i;
+ }
+ /* enumerate the character names after the last algorithmic range */
+ enumNames(uCharNames, start, limit, fn, context, nameChoice);
+}
+
+U_CAPI int32_t U_EXPORT2
+uprv_getMaxCharNameLength() {
+ UErrorCode errorCode=U_ZERO_ERROR;
+ if(calcNameSetsLengths(&errorCode)) {
+ return gMaxNameLength;
+ } else {
+ return 0;
+ }
+}
+
+/**
+ * Converts the char set cset into a Unicode set uset.
+ * @param cset Set of 256 bit flags corresponding to a set of chars.
+ * @param uset USet to receive characters. Existing contents are deleted.
+ */
+static void
+charSetToUSet(uint32_t cset[8], const USetAdder *sa) {
+ UChar us[256];
+ char cs[256];
+
+ int32_t i, length;
+ UErrorCode errorCode;
+
+ errorCode=U_ZERO_ERROR;
+
+ if(!calcNameSetsLengths(&errorCode)) {
+ return;
+ }
+
+ /* build a char string with all chars that are used in character names */
+ length=0;
+ for(i=0; i<256; ++i) {
+ if(SET_CONTAINS(cset, i)) {
+ cs[length++]=(char)i;
+ }
+ }
+
+ /* convert the char string to a UChar string */
+ u_charsToUChars(cs, us, length);
+
+ /* add each UChar to the USet */
+ for(i=0; i<length; ++i) {
+ if(us[i]!=0 || cs[i]==0) { /* non-invariant chars become (UChar)0 */
+ sa->add(sa->set, us[i]);
+ }
+ }
+}
+
+/**
+ * Fills set with characters that are used in Unicode character names.
+ * @param set USet to receive characters.
+ */
+U_CAPI void U_EXPORT2
+uprv_getCharNameCharacters(const USetAdder *sa) {
+ charSetToUSet(gNameSet, sa);
+}
+
+/* data swapping ------------------------------------------------------------ */
+
+/*
+ * The token table contains non-negative entries for token bytes,
+ * and -1 for bytes that represent themselves in the data file's charset.
+ * -2 entries are used for lead bytes.
+ *
+ * Direct bytes (-1 entries) must be translated from the input charset family
+ * to the output charset family.
+ * makeTokenMap() writes a permutation mapping for this.
+ * Use it once for single-/lead-byte tokens and once more for all trail byte
+ * tokens. (';' is an unused trail byte marked with -1.)
+ */
+static void
+makeTokenMap(const UDataSwapper *ds,
+ int16_t tokens[], uint16_t tokenCount,
+ uint8_t map[256],
+ UErrorCode *pErrorCode) {
+ UBool usedOutChar[256];
+ uint16_t i, j;
+ uint8_t c1, c2;
+
+ if(U_FAILURE(*pErrorCode)) {
+ return;
+ }
+
+ if(ds->inCharset==ds->outCharset) {
+ /* Same charset family: identity permutation */
+ for(i=0; i<256; ++i) {
+ map[i]=(uint8_t)i;
+ }
+ } else {
+ uprv_memset(map, 0, 256);
+ uprv_memset(usedOutChar, 0, 256);
+
+ if(tokenCount>256) {
+ tokenCount=256;
+ }
+
+ /* set the direct bytes (byte 0 always maps to itself) */
+ for(i=1; i<tokenCount; ++i) {
+ if(tokens[i]==-1) {
+ /* convert the direct byte character */
+ c1=(uint8_t)i;
+ ds->swapInvChars(ds, &c1, 1, &c2, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ udata_printError(ds, "unames/makeTokenMap() finds variant character 0x%02x used (input charset family %d)\n",
+ i, ds->inCharset);
+ return;
+ }
+
+ /* enter the converted character into the map and mark it used */
+ map[c1]=c2;
+ usedOutChar[c2]=TRUE;
+ }
+ }
+
+ /* set the mappings for the rest of the permutation */
+ for(i=j=1; i<tokenCount; ++i) {
+ /* set mappings that were not set for direct bytes */
+ if(map[i]==0) {
+ /* set an output byte value that was not used as an output byte above */
+ while(usedOutChar[j]) {
+ ++j;
+ }
+ map[i]=(uint8_t)j++;
+ }
+ }
+
+ /*
+ * leave mappings at tokenCount and above unset if tokenCount<256
+ * because they won't be used
+ */
+ }
+}
+
+U_CAPI int32_t U_EXPORT2
+uchar_swapNames(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const UDataInfo *pInfo;
+ int32_t headerSize;
+
+ const uint8_t *inBytes;
+ uint8_t *outBytes;
+
+ uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset,
+ offset, i, count, stringsCount;
+
+ const AlgorithmicRange *inRange;
+ AlgorithmicRange *outRange;
+
+ /* udata_swapDataHeader checks the arguments */
+ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ /* check data format and format version */
+ pInfo=(const UDataInfo *)((const char *)inData+4);
+ if(!(
+ pInfo->dataFormat[0]==0x75 && /* dataFormat="unam" */
+ pInfo->dataFormat[1]==0x6e &&
+ pInfo->dataFormat[2]==0x61 &&
+ pInfo->dataFormat[3]==0x6d &&
+ pInfo->formatVersion[0]==1
+ )) {
+ udata_printError(ds, "uchar_swapNames(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unames.icu\n",
+ pInfo->dataFormat[0], pInfo->dataFormat[1],
+ pInfo->dataFormat[2], pInfo->dataFormat[3],
+ pInfo->formatVersion[0]);
+ *pErrorCode=U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+
+ inBytes=(const uint8_t *)inData+headerSize;
+ outBytes=(uint8_t *)outData+headerSize;
+ if(length<0) {
+ algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]);
+ } else {
+ length-=headerSize;
+ if( length<20 ||
+ (uint32_t)length<(algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]))
+ ) {
+ udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu\n",
+ length);
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ }
+
+ if(length<0) {
+ /* preflighting: iterate through algorithmic ranges */
+ offset=algNamesOffset;
+ count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
+ offset+=4;
+
+ for(i=0; i<count; ++i) {
+ inRange=(const AlgorithmicRange *)(inBytes+offset);
+ offset+=ds->readUInt16(inRange->size);
+ }
+ } else {
+ /* swap data */
+ const uint16_t *p;
+ uint16_t *q, *temp;
+
+ int16_t tokens[512];
+ uint16_t tokenCount;
+
+ uint8_t map[256], trailMap[256];
+
+ /* copy the data for inaccessible bytes */
+ if(inBytes!=outBytes) {
+ uprv_memcpy(outBytes, inBytes, length);
+ }
+
+ /* the initial 4 offsets first */
+ tokenStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[0]);
+ groupsOffset=ds->readUInt32(((const uint32_t *)inBytes)[1]);
+ groupStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[2]);
+ ds->swapArray32(ds, inBytes, 16, outBytes, pErrorCode);
+
+ /*
+ * now the tokens table
+ * it needs to be permutated along with the compressed name strings
+ */
+ p=(const uint16_t *)(inBytes+16);
+ q=(uint16_t *)(outBytes+16);
+
+ /* read and swap the tokenCount */
+ tokenCount=ds->readUInt16(*p);
+ ds->swapArray16(ds, p, 2, q, pErrorCode);
+ ++p;
+ ++q;
+
+ /* read the first 512 tokens and make the token maps */
+ if(tokenCount<=512) {
+ count=tokenCount;
+ } else {
+ count=512;
+ }
+ for(i=0; i<count; ++i) {
+ tokens[i]=udata_readInt16(ds, p[i]);
+ }
+ for(; i<512; ++i) {
+ tokens[i]=0; /* fill the rest of the tokens array if tokenCount<512 */
+ }
+ makeTokenMap(ds, tokens, tokenCount, map, pErrorCode);
+ makeTokenMap(ds, tokens+256, (uint16_t)(tokenCount>256 ? tokenCount-256 : 0), trailMap, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ /*
+ * swap and permutate the tokens
+ * go through a temporary array to support in-place swapping
+ */
+ temp=(uint16_t *)uprv_malloc(tokenCount*2);
+ if(temp==NULL) {
+ udata_printError(ds, "out of memory swapping %u unames.icu tokens\n",
+ tokenCount);
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+
+ /* swap and permutate single-/lead-byte tokens */
+ for(i=0; i<tokenCount && i<256; ++i) {
+ ds->swapArray16(ds, p+i, 2, temp+map[i], pErrorCode);
+ }
+
+ /* swap and permutate trail-byte tokens */
+ for(; i<tokenCount; ++i) {
+ ds->swapArray16(ds, p+i, 2, temp+(i&0xffffff00)+trailMap[i&0xff], pErrorCode);
+ }
+
+ /* copy the result into the output and free the temporary array */
+ uprv_memcpy(q, temp, tokenCount*2);
+ uprv_free(temp);
+
+ /*
+ * swap the token strings but not a possible padding byte after
+ * the terminating NUL of the last string
+ */
+ udata_swapInvStringBlock(ds, inBytes+tokenStringOffset, (int32_t)(groupsOffset-tokenStringOffset),
+ outBytes+tokenStringOffset, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ udata_printError(ds, "uchar_swapNames(token strings) failed\n");
+ return 0;
+ }
+
+ /* swap the group table */
+ count=ds->readUInt16(*((const uint16_t *)(inBytes+groupsOffset)));
+ ds->swapArray16(ds, inBytes+groupsOffset, (int32_t)((1+count*3)*2),
+ outBytes+groupsOffset, pErrorCode);
+
+ /*
+ * swap the group strings
+ * swap the string bytes but not the nibble-encoded string lengths
+ */
+ if(ds->inCharset!=ds->outCharset) {
+ uint16_t offsets[LINES_PER_GROUP+1], lengths[LINES_PER_GROUP+1];
+
+ const uint8_t *inStrings, *nextInStrings;
+ uint8_t *outStrings;
+
+ uint8_t c;
+
+ inStrings=inBytes+groupStringOffset;
+ outStrings=outBytes+groupStringOffset;
+
+ stringsCount=algNamesOffset-groupStringOffset;
+
+ /* iterate through string groups until only a few padding bytes are left */
+ while(stringsCount>32) {
+ nextInStrings=expandGroupLengths(inStrings, offsets, lengths);
+
+ /* move past the length bytes */
+ stringsCount-=(uint32_t)(nextInStrings-inStrings);
+ outStrings+=nextInStrings-inStrings;
+ inStrings=nextInStrings;
+
+ count=offsets[31]+lengths[31]; /* total number of string bytes in this group */
+ stringsCount-=count;
+
+ /* swap the string bytes using map[] and trailMap[] */
+ while(count>0) {
+ c=*inStrings++;
+ *outStrings++=map[c];
+ if(tokens[c]!=-2) {
+ --count;
+ } else {
+ /* token lead byte: swap the trail byte, too */
+ *outStrings++=trailMap[*inStrings++];
+ count-=2;
+ }
+ }
+ }
+ }
+
+ /* swap the algorithmic ranges */
+ offset=algNamesOffset;
+ count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
+ ds->swapArray32(ds, inBytes+offset, 4, outBytes+offset, pErrorCode);
+ offset+=4;
+
+ for(i=0; i<count; ++i) {
+ if(offset>(uint32_t)length) {
+ udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu algorithmic range %u\n",
+ length, i);
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ inRange=(const AlgorithmicRange *)(inBytes+offset);
+ outRange=(AlgorithmicRange *)(outBytes+offset);
+ offset+=ds->readUInt16(inRange->size);
+
+ ds->swapArray32(ds, inRange, 8, outRange, pErrorCode);
+ ds->swapArray16(ds, &inRange->size, 2, &outRange->size, pErrorCode);
+ switch(inRange->type) {
+ case 0:
+ /* swap prefix string */
+ ds->swapInvChars(ds, inRange+1, (int32_t)uprv_strlen((const char *)(inRange+1)),
+ outRange+1, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ udata_printError(ds, "uchar_swapNames(prefix string of algorithmic range %u) failed\n",
+ i);
+ return 0;
+ }
+ break;
+ case 1:
+ {
+ /* swap factors and the prefix and factor strings */
+ uint32_t factorsCount;
+
+ factorsCount=inRange->variant;
+ p=(const uint16_t *)(inRange+1);
+ q=(uint16_t *)(outRange+1);
+ ds->swapArray16(ds, p, (int32_t)(factorsCount*2), q, pErrorCode);
+
+ /* swap the strings, up to the last terminating NUL */
+ p+=factorsCount;
+ q+=factorsCount;
+ stringsCount=(uint32_t)((inBytes+offset)-(const uint8_t *)p);
+ while(stringsCount>0 && ((const uint8_t *)p)[stringsCount-1]!=0) {
+ --stringsCount;
+ }
+ ds->swapInvChars(ds, p, (int32_t)stringsCount, q, pErrorCode);
+ }
+ break;
+ default:
+ udata_printError(ds, "uchar_swapNames(): unknown type %u of algorithmic range %u\n",
+ inRange->type, i);
+ *pErrorCode=U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+ }
+ }
+
+ return headerSize+(int32_t)offset;
+}
+
+/*
+ * Hey, Emacs, please set the following:
+ *
+ * Local Variables:
+ * indent-tabs-mode: nil
+ * End:
+ *
+ */
diff --git a/thirdparty/icu4c/common/unicode/appendable.h b/thirdparty/icu4c/common/unicode/appendable.h
new file mode 100644
index 0000000000..fc99254de1
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/appendable.h
@@ -0,0 +1,239 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2011-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: appendable.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010dec07
+* created by: Markus W. Scherer
+*/
+
+#ifndef __APPENDABLE_H__
+#define __APPENDABLE_H__
+
+/**
+ * \file
+ * \brief C++ API: Appendable class: Sink for Unicode code points and 16-bit code units (char16_ts).
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+
+U_NAMESPACE_BEGIN
+
+class UnicodeString;
+
+/**
+ * Base class for objects to which Unicode characters and strings can be appended.
+ * Combines elements of Java Appendable and ICU4C ByteSink.
+ *
+ * This class can be used in APIs where it does not matter whether the actual destination is
+ * a UnicodeString, a char16_t[] array, a UnicodeSet, or any other object
+ * that receives and processes characters and/or strings.
+ *
+ * Implementation classes must implement at least appendCodeUnit(char16_t).
+ * The base class provides default implementations for the other methods.
+ *
+ * The methods do not take UErrorCode parameters.
+ * If an error occurs (e.g., out-of-memory),
+ * in addition to returning false from failing operations,
+ * the implementation must prevent unexpected behavior (e.g., crashes)
+ * from further calls and should make the error condition available separately
+ * (e.g., store a UErrorCode, make/keep a UnicodeString bogus).
+ * @stable ICU 4.8
+ */
+class U_COMMON_API Appendable : public UObject {
+public:
+ /**
+ * Destructor.
+ * @stable ICU 4.8
+ */
+ ~Appendable();
+
+ /**
+ * Appends a 16-bit code unit.
+ * @param c code unit
+ * @return true if the operation succeeded
+ * @stable ICU 4.8
+ */
+ virtual UBool appendCodeUnit(char16_t c) = 0;
+
+ /**
+ * Appends a code point.
+ * The default implementation calls appendCodeUnit(char16_t) once or twice.
+ * @param c code point 0..0x10ffff
+ * @return true if the operation succeeded
+ * @stable ICU 4.8
+ */
+ virtual UBool appendCodePoint(UChar32 c);
+
+ /**
+ * Appends a string.
+ * The default implementation calls appendCodeUnit(char16_t) for each code unit.
+ * @param s string, must not be NULL if length!=0
+ * @param length string length, or -1 if NUL-terminated
+ * @return true if the operation succeeded
+ * @stable ICU 4.8
+ */
+ virtual UBool appendString(const char16_t *s, int32_t length);
+
+ /**
+ * Tells the object that the caller is going to append roughly
+ * appendCapacity char16_ts. A subclass might use this to pre-allocate
+ * a larger buffer if necessary.
+ * The default implementation does nothing. (It always returns true.)
+ * @param appendCapacity estimated number of char16_ts that will be appended
+ * @return true if the operation succeeded
+ * @stable ICU 4.8
+ */
+ virtual UBool reserveAppendCapacity(int32_t appendCapacity);
+
+ /**
+ * Returns a writable buffer for appending and writes the buffer's capacity to
+ * *resultCapacity. Guarantees *resultCapacity>=minCapacity.
+ * May return a pointer to the caller-owned scratch buffer which must have
+ * scratchCapacity>=minCapacity.
+ * The returned buffer is only valid until the next operation
+ * on this Appendable.
+ *
+ * After writing at most *resultCapacity char16_ts, call appendString() with the
+ * pointer returned from this function and the number of char16_ts written.
+ * Many appendString() implementations will avoid copying char16_ts if this function
+ * returned an internal buffer.
+ *
+ * Partial usage example:
+ * \code
+ * int32_t capacity;
+ * char16_t* buffer = app.getAppendBuffer(..., &capacity);
+ * ... Write n char16_ts into buffer, with n <= capacity.
+ * app.appendString(buffer, n);
+ * \endcode
+ * In many implementations, that call to append will avoid copying char16_ts.
+ *
+ * If the Appendable allocates or reallocates an internal buffer, it should use
+ * the desiredCapacityHint if appropriate.
+ * If a caller cannot provide a reasonable guess at the desired capacity,
+ * it should pass desiredCapacityHint=0.
+ *
+ * If a non-scratch buffer is returned, the caller may only pass
+ * a prefix to it to appendString().
+ * That is, it is not correct to pass an interior pointer to appendString().
+ *
+ * The default implementation always returns the scratch buffer.
+ *
+ * @param minCapacity required minimum capacity of the returned buffer;
+ * must be non-negative
+ * @param desiredCapacityHint desired capacity of the returned buffer;
+ * must be non-negative
+ * @param scratch default caller-owned buffer
+ * @param scratchCapacity capacity of the scratch buffer
+ * @param resultCapacity pointer to an integer which will be set to the
+ * capacity of the returned buffer
+ * @return a buffer with *resultCapacity>=minCapacity
+ * @stable ICU 4.8
+ */
+ virtual char16_t *getAppendBuffer(int32_t minCapacity,
+ int32_t desiredCapacityHint,
+ char16_t *scratch, int32_t scratchCapacity,
+ int32_t *resultCapacity);
+};
+
+/**
+ * An Appendable implementation which writes to a UnicodeString.
+ *
+ * This class is not intended for public subclassing.
+ * @stable ICU 4.8
+ */
+class U_COMMON_API UnicodeStringAppendable : public Appendable {
+public:
+ /**
+ * Aliases the UnicodeString (keeps its reference) for writing.
+ * @param s The UnicodeString to which this Appendable will write.
+ * @stable ICU 4.8
+ */
+ explicit UnicodeStringAppendable(UnicodeString &s) : str(s) {}
+
+ /**
+ * Destructor.
+ * @stable ICU 4.8
+ */
+ ~UnicodeStringAppendable();
+
+ /**
+ * Appends a 16-bit code unit to the string.
+ * @param c code unit
+ * @return true if the operation succeeded
+ * @stable ICU 4.8
+ */
+ virtual UBool appendCodeUnit(char16_t c);
+
+ /**
+ * Appends a code point to the string.
+ * @param c code point 0..0x10ffff
+ * @return true if the operation succeeded
+ * @stable ICU 4.8
+ */
+ virtual UBool appendCodePoint(UChar32 c);
+
+ /**
+ * Appends a string to the UnicodeString.
+ * @param s string, must not be NULL if length!=0
+ * @param length string length, or -1 if NUL-terminated
+ * @return true if the operation succeeded
+ * @stable ICU 4.8
+ */
+ virtual UBool appendString(const char16_t *s, int32_t length);
+
+ /**
+ * Tells the UnicodeString that the caller is going to append roughly
+ * appendCapacity char16_ts.
+ * @param appendCapacity estimated number of char16_ts that will be appended
+ * @return true if the operation succeeded
+ * @stable ICU 4.8
+ */
+ virtual UBool reserveAppendCapacity(int32_t appendCapacity);
+
+ /**
+ * Returns a writable buffer for appending and writes the buffer's capacity to
+ * *resultCapacity. Guarantees *resultCapacity>=minCapacity.
+ * May return a pointer to the caller-owned scratch buffer which must have
+ * scratchCapacity>=minCapacity.
+ * The returned buffer is only valid until the next write operation
+ * on the UnicodeString.
+ *
+ * For details see Appendable::getAppendBuffer().
+ *
+ * @param minCapacity required minimum capacity of the returned buffer;
+ * must be non-negative
+ * @param desiredCapacityHint desired capacity of the returned buffer;
+ * must be non-negative
+ * @param scratch default caller-owned buffer
+ * @param scratchCapacity capacity of the scratch buffer
+ * @param resultCapacity pointer to an integer which will be set to the
+ * capacity of the returned buffer
+ * @return a buffer with *resultCapacity>=minCapacity
+ * @stable ICU 4.8
+ */
+ virtual char16_t *getAppendBuffer(int32_t minCapacity,
+ int32_t desiredCapacityHint,
+ char16_t *scratch, int32_t scratchCapacity,
+ int32_t *resultCapacity);
+
+private:
+ UnicodeString &str;
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __APPENDABLE_H__
diff --git a/thirdparty/icu4c/common/unicode/brkiter.h b/thirdparty/icu4c/common/unicode/brkiter.h
new file mode 100644
index 0000000000..9bba5fcccc
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/brkiter.h
@@ -0,0 +1,670 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+********************************************************************************
+* Copyright (C) 1997-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+********************************************************************************
+*
+* File brkiter.h
+*
+* Modification History:
+*
+* Date Name Description
+* 02/18/97 aliu Added typedef for TextCount. Made DONE const.
+* 05/07/97 aliu Fixed DLL declaration.
+* 07/09/97 jfitz Renamed BreakIterator and interface synced with JDK
+* 08/11/98 helena Sync-up JDK1.2.
+* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
+********************************************************************************
+*/
+
+#ifndef BRKITER_H
+#define BRKITER_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C++ API: Break Iterator.
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#if UCONFIG_NO_BREAK_ITERATION
+
+U_NAMESPACE_BEGIN
+
+/*
+ * Allow the declaration of APIs with pointers to BreakIterator
+ * even when break iteration is removed from the build.
+ */
+class BreakIterator;
+
+U_NAMESPACE_END
+
+#else
+
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+#include "unicode/chariter.h"
+#include "unicode/locid.h"
+#include "unicode/ubrk.h"
+#include "unicode/strenum.h"
+#include "unicode/utext.h"
+#include "unicode/umisc.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * The BreakIterator class implements methods for finding the location
+ * of boundaries in text. BreakIterator is an abstract base class.
+ * Instances of BreakIterator maintain a current position and scan over
+ * text returning the index of characters where boundaries occur.
+ * <p>
+ * Line boundary analysis determines where a text string can be broken
+ * when line-wrapping. The mechanism correctly handles punctuation and
+ * hyphenated words.
+ * <p>
+ * Sentence boundary analysis allows selection with correct
+ * interpretation of periods within numbers and abbreviations, and
+ * trailing punctuation marks such as quotation marks and parentheses.
+ * <p>
+ * Word boundary analysis is used by search and replace functions, as
+ * well as within text editing applications that allow the user to
+ * select words with a double click. Word selection provides correct
+ * interpretation of punctuation marks within and following
+ * words. Characters that are not part of a word, such as symbols or
+ * punctuation marks, have word-breaks on both sides.
+ * <p>
+ * Character boundary analysis allows users to interact with
+ * characters as they expect to, for example, when moving the cursor
+ * through a text string. Character boundary analysis provides correct
+ * navigation of through character strings, regardless of how the
+ * character is stored. For example, an accented character might be
+ * stored as a base character and a diacritical mark. What users
+ * consider to be a character can differ between languages.
+ * <p>
+ * The text boundary positions are found according to the rules
+ * described in Unicode Standard Annex #29, Text Boundaries, and
+ * Unicode Standard Annex #14, Line Breaking Properties. These
+ * are available at http://www.unicode.org/reports/tr14/ and
+ * http://www.unicode.org/reports/tr29/.
+ * <p>
+ * In addition to the C++ API defined in this header file, a
+ * plain C API with equivalent functionality is defined in the
+ * file ubrk.h
+ * <p>
+ * Code snippets illustrating the use of the Break Iterator APIs
+ * are available in the ICU User Guide,
+ * http://icu-project.org/userguide/boundaryAnalysis.html
+ * and in the sample program icu/source/samples/break/break.cpp
+ *
+ */
+class U_COMMON_API BreakIterator : public UObject {
+public:
+ /**
+ * destructor
+ * @stable ICU 2.0
+ */
+ virtual ~BreakIterator();
+
+ /**
+ * Return true if another object is semantically equal to this
+ * one. The other object should be an instance of the same subclass of
+ * BreakIterator. Objects of different subclasses are considered
+ * unequal.
+ * <P>
+ * Return true if this BreakIterator is at the same position in the
+ * same text, and is the same class and type (word, line, etc.) of
+ * BreakIterator, as the argument. Text is considered the same if
+ * it contains the same characters, it need not be the same
+ * object, and styles are not considered.
+ * @stable ICU 2.0
+ */
+ virtual UBool operator==(const BreakIterator&) const = 0;
+
+ /**
+ * Returns the complement of the result of operator==
+ * @param rhs The BreakIterator to be compared for inequality
+ * @return the complement of the result of operator==
+ * @stable ICU 2.0
+ */
+ UBool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); }
+
+ /**
+ * Return a polymorphic copy of this object. This is an abstract
+ * method which subclasses implement.
+ * @stable ICU 2.0
+ */
+ virtual BreakIterator* clone() const = 0;
+
+ /**
+ * Return a polymorphic class ID for this object. Different subclasses
+ * will return distinct unequal values.
+ * @stable ICU 2.0
+ */
+ virtual UClassID getDynamicClassID(void) const = 0;
+
+ /**
+ * Return a CharacterIterator over the text being analyzed.
+ * @stable ICU 2.0
+ */
+ virtual CharacterIterator& getText(void) const = 0;
+
+
+ /**
+ * Get a UText for the text being analyzed.
+ * The returned UText is a shallow clone of the UText used internally
+ * by the break iterator implementation. It can safely be used to
+ * access the text without impacting any break iterator operations,
+ * but the underlying text itself must not be altered.
+ *
+ * @param fillIn A UText to be filled in. If NULL, a new UText will be
+ * allocated to hold the result.
+ * @param status receives any error codes.
+ * @return The current UText for this break iterator. If an input
+ * UText was provided, it will always be returned.
+ * @stable ICU 3.4
+ */
+ virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0;
+
+ /**
+ * Change the text over which this operates. The text boundary is
+ * reset to the start.
+ *
+ * The BreakIterator will retain a reference to the supplied string.
+ * The caller must not modify or delete the text while the BreakIterator
+ * retains the reference.
+ *
+ * @param text The UnicodeString used to change the text.
+ * @stable ICU 2.0
+ */
+ virtual void setText(const UnicodeString &text) = 0;
+
+ /**
+ * Reset the break iterator to operate over the text represented by
+ * the UText. The iterator position is reset to the start.
+ *
+ * This function makes a shallow clone of the supplied UText. This means
+ * that the caller is free to immediately close or otherwise reuse the
+ * Utext that was passed as a parameter, but that the underlying text itself
+ * must not be altered while being referenced by the break iterator.
+ *
+ * All index positions returned by break iterator functions are
+ * native indices from the UText. For example, when breaking UTF-8
+ * encoded text, the break positions returned by next(), previous(), etc.
+ * will be UTF-8 string indices, not UTF-16 positions.
+ *
+ * @param text The UText used to change the text.
+ * @param status receives any error codes.
+ * @stable ICU 3.4
+ */
+ virtual void setText(UText *text, UErrorCode &status) = 0;
+
+ /**
+ * Change the text over which this operates. The text boundary is
+ * reset to the start.
+ * Note that setText(UText *) provides similar functionality to this function,
+ * and is more efficient.
+ * @param it The CharacterIterator used to change the text.
+ * @stable ICU 2.0
+ */
+ virtual void adoptText(CharacterIterator* it) = 0;
+
+ enum {
+ /**
+ * DONE is returned by previous() and next() after all valid
+ * boundaries have been returned.
+ * @stable ICU 2.0
+ */
+ DONE = (int32_t)-1
+ };
+
+ /**
+ * Sets the current iteration position to the beginning of the text, position zero.
+ * @return The offset of the beginning of the text, zero.
+ * @stable ICU 2.0
+ */
+ virtual int32_t first(void) = 0;
+
+ /**
+ * Set the iterator position to the index immediately BEYOND the last character in the text being scanned.
+ * @return The index immediately BEYOND the last character in the text being scanned.
+ * @stable ICU 2.0
+ */
+ virtual int32_t last(void) = 0;
+
+ /**
+ * Set the iterator position to the boundary preceding the current boundary.
+ * @return The character index of the previous text boundary or DONE if all
+ * boundaries have been returned.
+ * @stable ICU 2.0
+ */
+ virtual int32_t previous(void) = 0;
+
+ /**
+ * Advance the iterator to the boundary following the current boundary.
+ * @return The character index of the next text boundary or DONE if all
+ * boundaries have been returned.
+ * @stable ICU 2.0
+ */
+ virtual int32_t next(void) = 0;
+
+ /**
+ * Return character index of the current iterator position within the text.
+ * @return The boundary most recently returned.
+ * @stable ICU 2.0
+ */
+ virtual int32_t current(void) const = 0;
+
+ /**
+ * Advance the iterator to the first boundary following the specified offset.
+ * The value returned is always greater than the offset or
+ * the value BreakIterator.DONE
+ * @param offset the offset to begin scanning.
+ * @return The first boundary after the specified offset.
+ * @stable ICU 2.0
+ */
+ virtual int32_t following(int32_t offset) = 0;
+
+ /**
+ * Set the iterator position to the first boundary preceding the specified offset.
+ * The value returned is always smaller than the offset or
+ * the value BreakIterator.DONE
+ * @param offset the offset to begin scanning.
+ * @return The first boundary before the specified offset.
+ * @stable ICU 2.0
+ */
+ virtual int32_t preceding(int32_t offset) = 0;
+
+ /**
+ * Return true if the specified position is a boundary position.
+ * As a side effect, the current position of the iterator is set
+ * to the first boundary position at or following the specified offset.
+ * @param offset the offset to check.
+ * @return True if "offset" is a boundary position.
+ * @stable ICU 2.0
+ */
+ virtual UBool isBoundary(int32_t offset) = 0;
+
+ /**
+ * Set the iterator position to the nth boundary from the current boundary
+ * @param n the number of boundaries to move by. A value of 0
+ * does nothing. Negative values move to previous boundaries
+ * and positive values move to later boundaries.
+ * @return The new iterator position, or
+ * DONE if there are fewer than |n| boundaries in the specified direction.
+ * @stable ICU 2.0
+ */
+ virtual int32_t next(int32_t n) = 0;
+
+ /**
+ * For RuleBasedBreakIterators, return the status tag from the break rule
+ * that determined the boundary at the current iteration position.
+ * <p>
+ * For break iterator types that do not support a rule status,
+ * a default value of 0 is returned.
+ * <p>
+ * @return the status from the break rule that determined the boundary at
+ * the current iteration position.
+ * @see RuleBaseBreakIterator::getRuleStatus()
+ * @see UWordBreak
+ * @stable ICU 52
+ */
+ virtual int32_t getRuleStatus() const;
+
+ /**
+ * For RuleBasedBreakIterators, get the status (tag) values from the break rule(s)
+ * that determined the boundary at the current iteration position.
+ * <p>
+ * For break iterator types that do not support rule status,
+ * no values are returned.
+ * <p>
+ * The returned status value(s) are stored into an array provided by the caller.
+ * The values are stored in sorted (ascending) order.
+ * If the capacity of the output array is insufficient to hold the data,
+ * the output will be truncated to the available length, and a
+ * U_BUFFER_OVERFLOW_ERROR will be signaled.
+ * <p>
+ * @see RuleBaseBreakIterator::getRuleStatusVec
+ *
+ * @param fillInVec an array to be filled in with the status values.
+ * @param capacity the length of the supplied vector. A length of zero causes
+ * the function to return the number of status values, in the
+ * normal way, without attempting to store any values.
+ * @param status receives error codes.
+ * @return The number of rule status values from rules that determined
+ * the boundary at the current iteration position.
+ * In the event of a U_BUFFER_OVERFLOW_ERROR, the return value
+ * is the total number of status values that were available,
+ * not the reduced number that were actually returned.
+ * @see getRuleStatus
+ * @stable ICU 52
+ */
+ virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
+
+ /**
+ * Create BreakIterator for word-breaks using the given locale.
+ * Returns an instance of a BreakIterator implementing word breaks.
+ * WordBreak is useful for word selection (ex. double click)
+ * @param where the locale.
+ * @param status the error code
+ * @return A BreakIterator for word-breaks. The UErrorCode& status
+ * parameter is used to return status information to the user.
+ * To check whether the construction succeeded or not, you should check
+ * the value of U_SUCCESS(err). If you wish more detailed information, you
+ * can check for informational error results which still indicate success.
+ * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
+ * example, 'de_CH' was requested, but nothing was found there, so 'de' was
+ * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
+ * used; neither the requested locale nor any of its fall back locales
+ * could be found.
+ * The caller owns the returned object and is responsible for deleting it.
+ * @stable ICU 2.0
+ */
+ static BreakIterator* U_EXPORT2
+ createWordInstance(const Locale& where, UErrorCode& status);
+
+ /**
+ * Create BreakIterator for line-breaks using specified locale.
+ * Returns an instance of a BreakIterator implementing line breaks. Line
+ * breaks are logically possible line breaks, actual line breaks are
+ * usually determined based on display width.
+ * LineBreak is useful for word wrapping text.
+ * @param where the locale.
+ * @param status The error code.
+ * @return A BreakIterator for line-breaks. The UErrorCode& status
+ * parameter is used to return status information to the user.
+ * To check whether the construction succeeded or not, you should check
+ * the value of U_SUCCESS(err). If you wish more detailed information, you
+ * can check for informational error results which still indicate success.
+ * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
+ * example, 'de_CH' was requested, but nothing was found there, so 'de' was
+ * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
+ * used; neither the requested locale nor any of its fall back locales
+ * could be found.
+ * The caller owns the returned object and is responsible for deleting it.
+ * @stable ICU 2.0
+ */
+ static BreakIterator* U_EXPORT2
+ createLineInstance(const Locale& where, UErrorCode& status);
+
+ /**
+ * Create BreakIterator for character-breaks using specified locale
+ * Returns an instance of a BreakIterator implementing character breaks.
+ * Character breaks are boundaries of combining character sequences.
+ * @param where the locale.
+ * @param status The error code.
+ * @return A BreakIterator for character-breaks. The UErrorCode& status
+ * parameter is used to return status information to the user.
+ * To check whether the construction succeeded or not, you should check
+ * the value of U_SUCCESS(err). If you wish more detailed information, you
+ * can check for informational error results which still indicate success.
+ * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
+ * example, 'de_CH' was requested, but nothing was found there, so 'de' was
+ * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
+ * used; neither the requested locale nor any of its fall back locales
+ * could be found.
+ * The caller owns the returned object and is responsible for deleting it.
+ * @stable ICU 2.0
+ */
+ static BreakIterator* U_EXPORT2
+ createCharacterInstance(const Locale& where, UErrorCode& status);
+
+ /**
+ * Create BreakIterator for sentence-breaks using specified locale
+ * Returns an instance of a BreakIterator implementing sentence breaks.
+ * @param where the locale.
+ * @param status The error code.
+ * @return A BreakIterator for sentence-breaks. The UErrorCode& status
+ * parameter is used to return status information to the user.
+ * To check whether the construction succeeded or not, you should check
+ * the value of U_SUCCESS(err). If you wish more detailed information, you
+ * can check for informational error results which still indicate success.
+ * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
+ * example, 'de_CH' was requested, but nothing was found there, so 'de' was
+ * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
+ * used; neither the requested locale nor any of its fall back locales
+ * could be found.
+ * The caller owns the returned object and is responsible for deleting it.
+ * @stable ICU 2.0
+ */
+ static BreakIterator* U_EXPORT2
+ createSentenceInstance(const Locale& where, UErrorCode& status);
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * Create BreakIterator for title-casing breaks using the specified locale
+ * Returns an instance of a BreakIterator implementing title breaks.
+ * The iterator returned locates title boundaries as described for
+ * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
+ * please use a word boundary iterator. See {@link #createWordInstance }.
+ *
+ * @param where the locale.
+ * @param status The error code.
+ * @return A BreakIterator for title-breaks. The UErrorCode& status
+ * parameter is used to return status information to the user.
+ * To check whether the construction succeeded or not, you should check
+ * the value of U_SUCCESS(err). If you wish more detailed information, you
+ * can check for informational error results which still indicate success.
+ * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
+ * example, 'de_CH' was requested, but nothing was found there, so 'de' was
+ * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
+ * used; neither the requested locale nor any of its fall back locales
+ * could be found.
+ * The caller owns the returned object and is responsible for deleting it.
+ * @deprecated ICU 64 Use createWordInstance instead.
+ */
+ static BreakIterator* U_EXPORT2
+ createTitleInstance(const Locale& where, UErrorCode& status);
+#endif /* U_HIDE_DEPRECATED_API */
+
+ /**
+ * Get the set of Locales for which TextBoundaries are installed.
+ * <p><b>Note:</b> this will not return locales added through the register
+ * call. To see the registered locales too, use the getAvailableLocales
+ * function that returns a StringEnumeration object </p>
+ * @param count the output parameter of number of elements in the locale list
+ * @return available locales
+ * @stable ICU 2.0
+ */
+ static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
+
+ /**
+ * Get name of the object for the desired Locale, in the desired language.
+ * @param objectLocale must be from getAvailableLocales.
+ * @param displayLocale specifies the desired locale for output.
+ * @param name the fill-in parameter of the return value
+ * Uses best match.
+ * @return user-displayable name
+ * @stable ICU 2.0
+ */
+ static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
+ const Locale& displayLocale,
+ UnicodeString& name);
+
+ /**
+ * Get name of the object for the desired Locale, in the language of the
+ * default locale.
+ * @param objectLocale must be from getMatchingLocales
+ * @param name the fill-in parameter of the return value
+ * @return user-displayable name
+ * @stable ICU 2.0
+ */
+ static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
+ UnicodeString& name);
+
+#ifndef U_FORCE_HIDE_DEPRECATED_API
+ /**
+ * Deprecated functionality. Use clone() instead.
+ *
+ * Thread safe client-buffer-based cloning operation
+ * Do NOT call delete on a safeclone, since 'new' is not used to create it.
+ * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated.
+ * If buffer is not large enough, new memory will be allocated.
+ * @param BufferSize reference to size of allocated space.
+ * If BufferSize == 0, a sufficient size for use in cloning will
+ * be returned ('pre-flighting')
+ * If BufferSize is not enough for a stack-based safe clone,
+ * new memory will be allocated.
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were
+ * necessary.
+ * @return pointer to the new clone
+ *
+ * @deprecated ICU 52. Use clone() instead.
+ */
+ virtual BreakIterator * createBufferClone(void *stackBuffer,
+ int32_t &BufferSize,
+ UErrorCode &status) = 0;
+#endif // U_FORCE_HIDE_DEPRECATED_API
+
+#ifndef U_HIDE_DEPRECATED_API
+
+ /**
+ * Determine whether the BreakIterator was created in user memory by
+ * createBufferClone(), and thus should not be deleted. Such objects
+ * must be closed by an explicit call to the destructor (not delete).
+ * @deprecated ICU 52. Always delete the BreakIterator.
+ */
+ inline UBool isBufferClone(void);
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+#if !UCONFIG_NO_SERVICE
+ /**
+ * Register a new break iterator of the indicated kind, to use in the given locale.
+ * The break iterator will be adopted. Clones of the iterator will be returned
+ * if a request for a break iterator of the given kind matches or falls back to
+ * this locale.
+ * Because ICU may choose to cache BreakIterators internally, this must
+ * be called at application startup, prior to any calls to
+ * BreakIterator::createXXXInstance to avoid undefined behavior.
+ * @param toAdopt the BreakIterator instance to be adopted
+ * @param locale the Locale for which this instance is to be registered
+ * @param kind the type of iterator for which this instance is to be registered
+ * @param status the in/out status code, no special meanings are assigned
+ * @return a registry key that can be used to unregister this instance
+ * @stable ICU 2.4
+ */
+ static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt,
+ const Locale& locale,
+ UBreakIteratorType kind,
+ UErrorCode& status);
+
+ /**
+ * Unregister a previously-registered BreakIterator using the key returned from the
+ * register call. Key becomes invalid after a successful call and should not be used again.
+ * The BreakIterator corresponding to the key will be deleted.
+ * Because ICU may choose to cache BreakIterators internally, this should
+ * be called during application shutdown, after all calls to
+ * BreakIterator::createXXXInstance to avoid undefined behavior.
+ * @param key the registry key returned by a previous call to registerInstance
+ * @param status the in/out status code, no special meanings are assigned
+ * @return true if the iterator for the key was successfully unregistered
+ * @stable ICU 2.4
+ */
+ static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
+
+ /**
+ * Return a StringEnumeration over the locales available at the time of the call,
+ * including registered locales.
+ * @return a StringEnumeration over the locales available at the time of the call
+ * @stable ICU 2.4
+ */
+ static StringEnumeration* U_EXPORT2 getAvailableLocales(void);
+#endif
+
+ /**
+ * Returns the locale for this break iterator. Two flavors are available: valid and
+ * actual locale.
+ * @stable ICU 2.8
+ */
+ Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
+
+#ifndef U_HIDE_INTERNAL_API
+ /** Get the locale for this break iterator object. You can choose between valid and actual locale.
+ * @param type type of the locale we're looking for (valid or actual)
+ * @param status error code for the operation
+ * @return the locale
+ * @internal
+ */
+ const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
+#endif /* U_HIDE_INTERNAL_API */
+
+ /**
+ * Set the subject text string upon which the break iterator is operating
+ * without changing any other aspect of the matching state.
+ * The new and previous text strings must have the same content.
+ *
+ * This function is intended for use in environments where ICU is operating on
+ * strings that may move around in memory. It provides a mechanism for notifying
+ * ICU that the string has been relocated, and providing a new UText to access the
+ * string in its new position.
+ *
+ * Note that the break iterator implementation never copies the underlying text
+ * of a string being processed, but always operates directly on the original text
+ * provided by the user. Refreshing simply drops the references to the old text
+ * and replaces them with references to the new.
+ *
+ * Caution: this function is normally used only by very specialized,
+ * system-level code. One example use case is with garbage collection that moves
+ * the text in memory.
+ *
+ * @param input The new (moved) text string.
+ * @param status Receives errors detected by this function.
+ * @return *this
+ *
+ * @stable ICU 49
+ */
+ virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0;
+
+ private:
+ static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status);
+ static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status);
+ static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status);
+
+ friend class ICUBreakIteratorFactory;
+ friend class ICUBreakIteratorService;
+
+protected:
+ // Do not enclose protected default/copy constructors with #ifndef U_HIDE_INTERNAL_API
+ // or else the compiler will create a public ones.
+ /** @internal */
+ BreakIterator();
+ /** @internal */
+ BreakIterator (const BreakIterator &other);
+#ifndef U_HIDE_INTERNAL_API
+ /** @internal */
+ BreakIterator (const Locale& valid, const Locale &actual);
+ /** @internal. Assignment Operator, used by RuleBasedBreakIterator. */
+ BreakIterator &operator = (const BreakIterator &other);
+#endif /* U_HIDE_INTERNAL_API */
+
+private:
+
+ /** @internal (private) */
+ char actualLocale[ULOC_FULLNAME_CAPACITY];
+ char validLocale[ULOC_FULLNAME_CAPACITY];
+};
+
+#ifndef U_HIDE_DEPRECATED_API
+
+inline UBool BreakIterator::isBufferClone()
+{
+ return false;
+}
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // BRKITER_H
+//eof
diff --git a/thirdparty/icu4c/common/unicode/bytestream.h b/thirdparty/icu4c/common/unicode/bytestream.h
new file mode 100644
index 0000000000..044f7a77e7
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/bytestream.h
@@ -0,0 +1,309 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+// Copyright (C) 2009-2012, International Business Machines
+// Corporation and others. All Rights Reserved.
+//
+// Copyright 2007 Google Inc. All Rights Reserved.
+// Author: sanjay@google.com (Sanjay Ghemawat)
+//
+// Abstract interface that consumes a sequence of bytes (ByteSink).
+//
+// Used so that we can write a single piece of code that can operate
+// on a variety of output string types.
+//
+// Various implementations of this interface are provided:
+// ByteSink:
+// CheckedArrayByteSink Write to a flat array, with bounds checking
+// StringByteSink Write to an STL string
+
+// This code is a contribution of Google code, and the style used here is
+// a compromise between the original Google code and the ICU coding guidelines.
+// For example, data types are ICU-ified (size_t,int->int32_t),
+// and API comments doxygen-ified, but function names and behavior are
+// as in the original, if possible.
+// Assertion-style error handling, not available in ICU, was changed to
+// parameter "pinning" similar to UnicodeString.
+//
+// In addition, this is only a partial port of the original Google code,
+// limited to what was needed so far. The (nearly) complete original code
+// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib
+// (see ICU ticket 6765, r25517).
+
+#ifndef __BYTESTREAM_H__
+#define __BYTESTREAM_H__
+
+/**
+ * \file
+ * \brief C++ API: Interface for writing bytes, and implementation classes.
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+#include "unicode/std_string.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * A ByteSink can be filled with bytes.
+ * @stable ICU 4.2
+ */
+class U_COMMON_API ByteSink : public UMemory {
+public:
+ /**
+ * Default constructor.
+ * @stable ICU 4.2
+ */
+ ByteSink() { }
+ /**
+ * Virtual destructor.
+ * @stable ICU 4.2
+ */
+ virtual ~ByteSink();
+
+ /**
+ * Append "bytes[0,n-1]" to this.
+ * @param bytes the pointer to the bytes
+ * @param n the number of bytes; must be non-negative
+ * @stable ICU 4.2
+ */
+ virtual void Append(const char* bytes, int32_t n) = 0;
+
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * Appends n bytes to this. Same as Append().
+ * Call AppendU8() with u8"string literals" which are const char * in C++11
+ * but const char8_t * in C++20.
+ * If the compiler does support char8_t as a distinct type,
+ * then an AppendU8() overload for that is defined and will be chosen.
+ *
+ * @param bytes the pointer to the bytes
+ * @param n the number of bytes; must be non-negative
+ * @draft ICU 67
+ */
+ inline void AppendU8(const char* bytes, int32_t n) {
+ Append(bytes, n);
+ }
+
+#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
+ /**
+ * Appends n bytes to this. Same as Append() but for a const char8_t * pointer.
+ * Call AppendU8() with u8"string literals" which are const char * in C++11
+ * but const char8_t * in C++20.
+ * If the compiler does support char8_t as a distinct type,
+ * then this AppendU8() overload for that is defined and will be chosen.
+ *
+ * @param bytes the pointer to the bytes
+ * @param n the number of bytes; must be non-negative
+ * @draft ICU 67
+ */
+ inline void AppendU8(const char8_t* bytes, int32_t n) {
+ Append(reinterpret_cast<const char*>(bytes), n);
+ }
+#endif
+#endif // U_HIDE_DRAFT_API
+
+ /**
+ * Returns a writable buffer for appending and writes the buffer's capacity to
+ * *result_capacity. Guarantees *result_capacity>=min_capacity.
+ * May return a pointer to the caller-owned scratch buffer which must have
+ * scratch_capacity>=min_capacity.
+ * The returned buffer is only valid until the next operation
+ * on this ByteSink.
+ *
+ * After writing at most *result_capacity bytes, call Append() with the
+ * pointer returned from this function and the number of bytes written.
+ * Many Append() implementations will avoid copying bytes if this function
+ * returned an internal buffer.
+ *
+ * Partial usage example:
+ * int32_t capacity;
+ * char* buffer = sink->GetAppendBuffer(..., &capacity);
+ * ... Write n bytes into buffer, with n <= capacity.
+ * sink->Append(buffer, n);
+ * In many implementations, that call to Append will avoid copying bytes.
+ *
+ * If the ByteSink allocates or reallocates an internal buffer, it should use
+ * the desired_capacity_hint if appropriate.
+ * If a caller cannot provide a reasonable guess at the desired capacity,
+ * it should pass desired_capacity_hint=0.
+ *
+ * If a non-scratch buffer is returned, the caller may only pass
+ * a prefix to it to Append().
+ * That is, it is not correct to pass an interior pointer to Append().
+ *
+ * The default implementation always returns the scratch buffer.
+ *
+ * @param min_capacity required minimum capacity of the returned buffer;
+ * must be non-negative
+ * @param desired_capacity_hint desired capacity of the returned buffer;
+ * must be non-negative
+ * @param scratch default caller-owned buffer
+ * @param scratch_capacity capacity of the scratch buffer
+ * @param result_capacity pointer to an integer which will be set to the
+ * capacity of the returned buffer
+ * @return a buffer with *result_capacity>=min_capacity
+ * @stable ICU 4.2
+ */
+ virtual char* GetAppendBuffer(int32_t min_capacity,
+ int32_t desired_capacity_hint,
+ char* scratch, int32_t scratch_capacity,
+ int32_t* result_capacity);
+
+ /**
+ * Flush internal buffers.
+ * Some byte sinks use internal buffers or provide buffering
+ * and require calling Flush() at the end of the stream.
+ * The ByteSink should be ready for further Append() calls after Flush().
+ * The default implementation of Flush() does nothing.
+ * @stable ICU 4.2
+ */
+ virtual void Flush();
+
+private:
+ ByteSink(const ByteSink &) = delete;
+ ByteSink &operator=(const ByteSink &) = delete;
+};
+
+// -------------------------------------------------------------
+// Some standard implementations
+
+/**
+ * Implementation of ByteSink that writes to a flat byte array,
+ * with bounds-checking:
+ * This sink will not write more than capacity bytes to outbuf.
+ * If more than capacity bytes are Append()ed, then excess bytes are ignored,
+ * and Overflowed() will return true.
+ * Overflow does not cause a runtime error.
+ * @stable ICU 4.2
+ */
+class U_COMMON_API CheckedArrayByteSink : public ByteSink {
+public:
+ /**
+ * Constructs a ByteSink that will write to outbuf[0..capacity-1].
+ * @param outbuf buffer to write to
+ * @param capacity size of the buffer
+ * @stable ICU 4.2
+ */
+ CheckedArrayByteSink(char* outbuf, int32_t capacity);
+ /**
+ * Destructor.
+ * @stable ICU 4.2
+ */
+ virtual ~CheckedArrayByteSink();
+ /**
+ * Returns the sink to its original state, without modifying the buffer.
+ * Useful for reusing both the buffer and the sink for multiple streams.
+ * Resets the state to NumberOfBytesWritten()=NumberOfBytesAppended()=0
+ * and Overflowed()=false.
+ * @return *this
+ * @stable ICU 4.6
+ */
+ virtual CheckedArrayByteSink& Reset();
+ /**
+ * Append "bytes[0,n-1]" to this.
+ * @param bytes the pointer to the bytes
+ * @param n the number of bytes; must be non-negative
+ * @stable ICU 4.2
+ */
+ virtual void Append(const char* bytes, int32_t n);
+ /**
+ * Returns a writable buffer for appending and writes the buffer's capacity to
+ * *result_capacity. For details see the base class documentation.
+ * @param min_capacity required minimum capacity of the returned buffer;
+ * must be non-negative
+ * @param desired_capacity_hint desired capacity of the returned buffer;
+ * must be non-negative
+ * @param scratch default caller-owned buffer
+ * @param scratch_capacity capacity of the scratch buffer
+ * @param result_capacity pointer to an integer which will be set to the
+ * capacity of the returned buffer
+ * @return a buffer with *result_capacity>=min_capacity
+ * @stable ICU 4.2
+ */
+ virtual char* GetAppendBuffer(int32_t min_capacity,
+ int32_t desired_capacity_hint,
+ char* scratch, int32_t scratch_capacity,
+ int32_t* result_capacity);
+ /**
+ * Returns the number of bytes actually written to the sink.
+ * @return number of bytes written to the buffer
+ * @stable ICU 4.2
+ */
+ int32_t NumberOfBytesWritten() const { return size_; }
+ /**
+ * Returns true if any bytes were discarded, i.e., if there was an
+ * attempt to write more than 'capacity' bytes.
+ * @return true if more than 'capacity' bytes were Append()ed
+ * @stable ICU 4.2
+ */
+ UBool Overflowed() const { return overflowed_; }
+ /**
+ * Returns the number of bytes appended to the sink.
+ * If Overflowed() then NumberOfBytesAppended()>NumberOfBytesWritten()
+ * else they return the same number.
+ * @return number of bytes written to the buffer
+ * @stable ICU 4.6
+ */
+ int32_t NumberOfBytesAppended() const { return appended_; }
+private:
+ char* outbuf_;
+ const int32_t capacity_;
+ int32_t size_;
+ int32_t appended_;
+ UBool overflowed_;
+
+ CheckedArrayByteSink() = delete;
+ CheckedArrayByteSink(const CheckedArrayByteSink &) = delete;
+ CheckedArrayByteSink &operator=(const CheckedArrayByteSink &) = delete;
+};
+
+/**
+ * Implementation of ByteSink that writes to a "string".
+ * The StringClass is usually instantiated with a std::string.
+ * @stable ICU 4.2
+ */
+template<typename StringClass>
+class StringByteSink : public ByteSink {
+ public:
+ /**
+ * Constructs a ByteSink that will append bytes to the dest string.
+ * @param dest pointer to string object to append to
+ * @stable ICU 4.2
+ */
+ StringByteSink(StringClass* dest) : dest_(dest) { }
+ /**
+ * Constructs a ByteSink that reserves append capacity and will append bytes to the dest string.
+ *
+ * @param dest pointer to string object to append to
+ * @param initialAppendCapacity capacity beyond dest->length() to be reserve()d
+ * @stable ICU 60
+ */
+ StringByteSink(StringClass* dest, int32_t initialAppendCapacity) : dest_(dest) {
+ if (initialAppendCapacity > 0 &&
+ (uint32_t)initialAppendCapacity > (dest->capacity() - dest->length())) {
+ dest->reserve(dest->length() + initialAppendCapacity);
+ }
+ }
+ /**
+ * Append "bytes[0,n-1]" to this.
+ * @param data the pointer to the bytes
+ * @param n the number of bytes; must be non-negative
+ * @stable ICU 4.2
+ */
+ virtual void Append(const char* data, int32_t n) { dest_->append(data, n); }
+ private:
+ StringClass* dest_;
+
+ StringByteSink() = delete;
+ StringByteSink(const StringByteSink &) = delete;
+ StringByteSink &operator=(const StringByteSink &) = delete;
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __BYTESTREAM_H__
diff --git a/thirdparty/icu4c/common/unicode/bytestrie.h b/thirdparty/icu4c/common/unicode/bytestrie.h
new file mode 100644
index 0000000000..85f802df42
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/bytestrie.h
@@ -0,0 +1,565 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: bytestrie.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010sep25
+* created by: Markus W. Scherer
+*/
+
+#ifndef __BYTESTRIE_H__
+#define __BYTESTRIE_H__
+
+/**
+ * \file
+ * \brief C++ API: Trie for mapping byte sequences to integer values.
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+#include "unicode/ustringtrie.h"
+
+U_NAMESPACE_BEGIN
+
+class ByteSink;
+class BytesTrieBuilder;
+class CharString;
+class UVector32;
+
+/**
+ * Light-weight, non-const reader class for a BytesTrie.
+ * Traverses a byte-serialized data structure with minimal state,
+ * for mapping byte sequences to non-negative integer values.
+ *
+ * This class owns the serialized trie data only if it was constructed by
+ * the builder's build() method.
+ * The public constructor and the copy constructor only alias the data (only copy the pointer).
+ * There is no assignment operator.
+ *
+ * This class is not intended for public subclassing.
+ * @stable ICU 4.8
+ */
+class U_COMMON_API BytesTrie : public UMemory {
+public:
+ /**
+ * Constructs a BytesTrie reader instance.
+ *
+ * The trieBytes must contain a copy of a byte sequence from the BytesTrieBuilder,
+ * starting with the first byte of that sequence.
+ * The BytesTrie object will not read more bytes than
+ * the BytesTrieBuilder generated in the corresponding build() call.
+ *
+ * The array is not copied/cloned and must not be modified while
+ * the BytesTrie object is in use.
+ *
+ * @param trieBytes The byte array that contains the serialized trie.
+ * @stable ICU 4.8
+ */
+ BytesTrie(const void *trieBytes)
+ : ownedArray_(NULL), bytes_(static_cast<const uint8_t *>(trieBytes)),
+ pos_(bytes_), remainingMatchLength_(-1) {}
+
+ /**
+ * Destructor.
+ * @stable ICU 4.8
+ */
+ ~BytesTrie();
+
+ /**
+ * Copy constructor, copies the other trie reader object and its state,
+ * but not the byte array which will be shared. (Shallow copy.)
+ * @param other Another BytesTrie object.
+ * @stable ICU 4.8
+ */
+ BytesTrie(const BytesTrie &other)
+ : ownedArray_(NULL), bytes_(other.bytes_),
+ pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {}
+
+ /**
+ * Resets this trie to its initial state.
+ * @return *this
+ * @stable ICU 4.8
+ */
+ BytesTrie &reset() {
+ pos_=bytes_;
+ remainingMatchLength_=-1;
+ return *this;
+ }
+
+ /**
+ * Returns the state of this trie as a 64-bit integer.
+ * The state value is never 0.
+ *
+ * @return opaque state value
+ * @see resetToState64
+ * @stable ICU 65
+ */
+ uint64_t getState64() const {
+ return (static_cast<uint64_t>(remainingMatchLength_ + 2) << kState64RemainingShift) |
+ (uint64_t)(pos_ - bytes_);
+ }
+
+ /**
+ * Resets this trie to the saved state.
+ * Unlike resetToState(State), the 64-bit state value
+ * must be from getState64() from the same trie object or
+ * from one initialized the exact same way.
+ * Because of no validation, this method is faster.
+ *
+ * @param state The opaque trie state value from getState64().
+ * @return *this
+ * @see getState64
+ * @see resetToState
+ * @see reset
+ * @stable ICU 65
+ */
+ BytesTrie &resetToState64(uint64_t state) {
+ remainingMatchLength_ = static_cast<int32_t>(state >> kState64RemainingShift) - 2;
+ pos_ = bytes_ + (state & kState64PosMask);
+ return *this;
+ }
+
+ /**
+ * BytesTrie state object, for saving a trie's current state
+ * and resetting the trie back to this state later.
+ * @stable ICU 4.8
+ */
+ class State : public UMemory {
+ public:
+ /**
+ * Constructs an empty State.
+ * @stable ICU 4.8
+ */
+ State() { bytes=NULL; }
+ private:
+ friend class BytesTrie;
+
+ const uint8_t *bytes;
+ const uint8_t *pos;
+ int32_t remainingMatchLength;
+ };
+
+ /**
+ * Saves the state of this trie.
+ * @param state The State object to hold the trie's state.
+ * @return *this
+ * @see resetToState
+ * @stable ICU 4.8
+ */
+ const BytesTrie &saveState(State &state) const {
+ state.bytes=bytes_;
+ state.pos=pos_;
+ state.remainingMatchLength=remainingMatchLength_;
+ return *this;
+ }
+
+ /**
+ * Resets this trie to the saved state.
+ * If the state object contains no state, or the state of a different trie,
+ * then this trie remains unchanged.
+ * @param state The State object which holds a saved trie state.
+ * @return *this
+ * @see saveState
+ * @see reset
+ * @stable ICU 4.8
+ */
+ BytesTrie &resetToState(const State &state) {
+ if(bytes_==state.bytes && bytes_!=NULL) {
+ pos_=state.pos;
+ remainingMatchLength_=state.remainingMatchLength;
+ }
+ return *this;
+ }
+
+ /**
+ * Determines whether the byte sequence so far matches, whether it has a value,
+ * and whether another input byte can continue a matching byte sequence.
+ * @return The match/value Result.
+ * @stable ICU 4.8
+ */
+ UStringTrieResult current() const;
+
+ /**
+ * Traverses the trie from the initial state for this input byte.
+ * Equivalent to reset().next(inByte).
+ * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff.
+ * Values below -0x100 and above 0xff will never match.
+ * @return The match/value Result.
+ * @stable ICU 4.8
+ */
+ inline UStringTrieResult first(int32_t inByte) {
+ remainingMatchLength_=-1;
+ if(inByte<0) {
+ inByte+=0x100;
+ }
+ return nextImpl(bytes_, inByte);
+ }
+
+ /**
+ * Traverses the trie from the current state for this input byte.
+ * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff.
+ * Values below -0x100 and above 0xff will never match.
+ * @return The match/value Result.
+ * @stable ICU 4.8
+ */
+ UStringTrieResult next(int32_t inByte);
+
+ /**
+ * Traverses the trie from the current state for this byte sequence.
+ * Equivalent to
+ * \code
+ * Result result=current();
+ * for(each c in s)
+ * if(!USTRINGTRIE_HAS_NEXT(result)) return USTRINGTRIE_NO_MATCH;
+ * result=next(c);
+ * return result;
+ * \endcode
+ * @param s A string or byte sequence. Can be NULL if length is 0.
+ * @param length The length of the byte sequence. Can be -1 if NUL-terminated.
+ * @return The match/value Result.
+ * @stable ICU 4.8
+ */
+ UStringTrieResult next(const char *s, int32_t length);
+
+ /**
+ * Returns a matching byte sequence's value if called immediately after
+ * current()/first()/next() returned USTRINGTRIE_INTERMEDIATE_VALUE or USTRINGTRIE_FINAL_VALUE.
+ * getValue() can be called multiple times.
+ *
+ * Do not call getValue() after USTRINGTRIE_NO_MATCH or USTRINGTRIE_NO_VALUE!
+ * @return The value for the byte sequence so far.
+ * @stable ICU 4.8
+ */
+ inline int32_t getValue() const {
+ const uint8_t *pos=pos_;
+ int32_t leadByte=*pos++;
+ // U_ASSERT(leadByte>=kMinValueLead);
+ return readValue(pos, leadByte>>1);
+ }
+
+ /**
+ * Determines whether all byte sequences reachable from the current state
+ * map to the same value.
+ * @param uniqueValue Receives the unique value, if this function returns true.
+ * (output-only)
+ * @return true if all byte sequences reachable from the current state
+ * map to the same value.
+ * @stable ICU 4.8
+ */
+ inline UBool hasUniqueValue(int32_t &uniqueValue) const {
+ const uint8_t *pos=pos_;
+ // Skip the rest of a pending linear-match node.
+ return pos!=NULL && findUniqueValue(pos+remainingMatchLength_+1, false, uniqueValue);
+ }
+
+ /**
+ * Finds each byte which continues the byte sequence from the current state.
+ * That is, each byte b for which it would be next(b)!=USTRINGTRIE_NO_MATCH now.
+ * @param out Each next byte is appended to this object.
+ * (Only uses the out.Append(s, length) method.)
+ * @return the number of bytes which continue the byte sequence from here
+ * @stable ICU 4.8
+ */
+ int32_t getNextBytes(ByteSink &out) const;
+
+ /**
+ * Iterator for all of the (byte sequence, value) pairs in a BytesTrie.
+ * @stable ICU 4.8
+ */
+ class U_COMMON_API Iterator : public UMemory {
+ public:
+ /**
+ * Iterates from the root of a byte-serialized BytesTrie.
+ * @param trieBytes The trie bytes.
+ * @param maxStringLength If 0, the iterator returns full strings/byte sequences.
+ * Otherwise, the iterator returns strings with this maximum length.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @stable ICU 4.8
+ */
+ Iterator(const void *trieBytes, int32_t maxStringLength, UErrorCode &errorCode);
+
+ /**
+ * Iterates from the current state of the specified BytesTrie.
+ * @param trie The trie whose state will be copied for iteration.
+ * @param maxStringLength If 0, the iterator returns full strings/byte sequences.
+ * Otherwise, the iterator returns strings with this maximum length.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @stable ICU 4.8
+ */
+ Iterator(const BytesTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);
+
+ /**
+ * Destructor.
+ * @stable ICU 4.8
+ */
+ ~Iterator();
+
+ /**
+ * Resets this iterator to its initial state.
+ * @return *this
+ * @stable ICU 4.8
+ */
+ Iterator &reset();
+
+ /**
+ * @return true if there are more elements.
+ * @stable ICU 4.8
+ */
+ UBool hasNext() const;
+
+ /**
+ * Finds the next (byte sequence, value) pair if there is one.
+ *
+ * If the byte sequence is truncated to the maximum length and does not
+ * have a real value, then the value is set to -1.
+ * In this case, this "not a real value" is indistinguishable from
+ * a real value of -1.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return true if there is another element.
+ * @stable ICU 4.8
+ */
+ UBool next(UErrorCode &errorCode);
+
+ /**
+ * @return The NUL-terminated byte sequence for the last successful next().
+ * @stable ICU 4.8
+ */
+ StringPiece getString() const;
+ /**
+ * @return The value for the last successful next().
+ * @stable ICU 4.8
+ */
+ int32_t getValue() const { return value_; }
+
+ private:
+ UBool truncateAndStop();
+
+ const uint8_t *branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode);
+
+ const uint8_t *bytes_;
+ const uint8_t *pos_;
+ const uint8_t *initialPos_;
+ int32_t remainingMatchLength_;
+ int32_t initialRemainingMatchLength_;
+
+ CharString *str_;
+ int32_t maxLength_;
+ int32_t value_;
+
+ // The stack stores pairs of integers for backtracking to another
+ // outbound edge of a branch node.
+ // The first integer is an offset from bytes_.
+ // The second integer has the str_->length() from before the node in bits 15..0,
+ // and the remaining branch length in bits 24..16. (Bits 31..25 are unused.)
+ // (We could store the remaining branch length minus 1 in bits 23..16 and not use bits 31..24,
+ // but the code looks more confusing that way.)
+ UVector32 *stack_;
+ };
+
+private:
+ friend class BytesTrieBuilder;
+
+ /**
+ * Constructs a BytesTrie reader instance.
+ * Unlike the public constructor which just aliases an array,
+ * this constructor adopts the builder's array.
+ * This constructor is only called by the builder.
+ */
+ BytesTrie(void *adoptBytes, const void *trieBytes)
+ : ownedArray_(static_cast<uint8_t *>(adoptBytes)),
+ bytes_(static_cast<const uint8_t *>(trieBytes)),
+ pos_(bytes_), remainingMatchLength_(-1) {}
+
+ // No assignment operator.
+ BytesTrie &operator=(const BytesTrie &other);
+
+ inline void stop() {
+ pos_=NULL;
+ }
+
+ // Reads a compact 32-bit integer.
+ // pos is already after the leadByte, and the lead byte is already shifted right by 1.
+ static int32_t readValue(const uint8_t *pos, int32_t leadByte);
+ static inline const uint8_t *skipValue(const uint8_t *pos, int32_t leadByte) {
+ // U_ASSERT(leadByte>=kMinValueLead);
+ if(leadByte>=(kMinTwoByteValueLead<<1)) {
+ if(leadByte<(kMinThreeByteValueLead<<1)) {
+ ++pos;
+ } else if(leadByte<(kFourByteValueLead<<1)) {
+ pos+=2;
+ } else {
+ pos+=3+((leadByte>>1)&1);
+ }
+ }
+ return pos;
+ }
+ static inline const uint8_t *skipValue(const uint8_t *pos) {
+ int32_t leadByte=*pos++;
+ return skipValue(pos, leadByte);
+ }
+
+ // Reads a jump delta and jumps.
+ static const uint8_t *jumpByDelta(const uint8_t *pos);
+
+ static inline const uint8_t *skipDelta(const uint8_t *pos) {
+ int32_t delta=*pos++;
+ if(delta>=kMinTwoByteDeltaLead) {
+ if(delta<kMinThreeByteDeltaLead) {
+ ++pos;
+ } else if(delta<kFourByteDeltaLead) {
+ pos+=2;
+ } else {
+ pos+=3+(delta&1);
+ }
+ }
+ return pos;
+ }
+
+ static inline UStringTrieResult valueResult(int32_t node) {
+ return (UStringTrieResult)(USTRINGTRIE_INTERMEDIATE_VALUE-(node&kValueIsFinal));
+ }
+
+ // Handles a branch node for both next(byte) and next(string).
+ UStringTrieResult branchNext(const uint8_t *pos, int32_t length, int32_t inByte);
+
+ // Requires remainingLength_<0.
+ UStringTrieResult nextImpl(const uint8_t *pos, int32_t inByte);
+
+ // Helper functions for hasUniqueValue().
+ // Recursively finds a unique value (or whether there is not a unique one)
+ // from a branch.
+ static const uint8_t *findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
+ UBool haveUniqueValue, int32_t &uniqueValue);
+ // Recursively finds a unique value (or whether there is not a unique one)
+ // starting from a position on a node lead byte.
+ static UBool findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue);
+
+ // Helper functions for getNextBytes().
+ // getNextBytes() when pos is on a branch node.
+ static void getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out);
+ static void append(ByteSink &out, int c);
+
+ // BytesTrie data structure
+ //
+ // The trie consists of a series of byte-serialized nodes for incremental
+ // string/byte sequence matching. The root node is at the beginning of the trie data.
+ //
+ // Types of nodes are distinguished by their node lead byte ranges.
+ // After each node, except a final-value node, another node follows to
+ // encode match values or continue matching further bytes.
+ //
+ // Node types:
+ // - Value node: Stores a 32-bit integer in a compact, variable-length format.
+ // The value is for the string/byte sequence so far.
+ // One node bit indicates whether the value is final or whether
+ // matching continues with the next node.
+ // - Linear-match node: Matches a number of bytes.
+ // - Branch node: Branches to other nodes according to the current input byte.
+ // The node byte is the length of the branch (number of bytes to select from)
+ // minus 1. It is followed by a sub-node:
+ // - If the length is at most kMaxBranchLinearSubNodeLength, then
+ // there are length-1 (key, value) pairs and then one more comparison byte.
+ // If one of the key bytes matches, then the value is either a final value for
+ // the string/byte sequence so far, or a "jump" delta to the next node.
+ // If the last byte matches, then matching continues with the next node.
+ // (Values have the same encoding as value nodes.)
+ // - If the length is greater than kMaxBranchLinearSubNodeLength, then
+ // there is one byte and one "jump" delta.
+ // If the input byte is less than the sub-node byte, then "jump" by delta to
+ // the next sub-node which will have a length of length/2.
+ // (The delta has its own compact encoding.)
+ // Otherwise, skip the "jump" delta to the next sub-node
+ // which will have a length of length-length/2.
+
+ // Node lead byte values.
+
+ // 00..0f: Branch node. If node!=0 then the length is node+1, otherwise
+ // the length is one more than the next byte.
+
+ // For a branch sub-node with at most this many entries, we drop down
+ // to a linear search.
+ static const int32_t kMaxBranchLinearSubNodeLength=5;
+
+ // 10..1f: Linear-match node, match 1..16 bytes and continue reading the next node.
+ static const int32_t kMinLinearMatch=0x10;
+ static const int32_t kMaxLinearMatchLength=0x10;
+
+ // 20..ff: Variable-length value node.
+ // If odd, the value is final. (Otherwise, intermediate value or jump delta.)
+ // Then shift-right by 1 bit.
+ // The remaining lead byte value indicates the number of following bytes (0..4)
+ // and contains the value's top bits.
+ static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength; // 0x20
+ // It is a final value if bit 0 is set.
+ static const int32_t kValueIsFinal=1;
+
+ // Compact value: After testing bit 0, shift right by 1 and then use the following thresholds.
+ static const int32_t kMinOneByteValueLead=kMinValueLead/2; // 0x10
+ static const int32_t kMaxOneByteValue=0x40; // At least 6 bits in the first byte.
+
+ static const int32_t kMinTwoByteValueLead=kMinOneByteValueLead+kMaxOneByteValue+1; // 0x51
+ static const int32_t kMaxTwoByteValue=0x1aff;
+
+ static const int32_t kMinThreeByteValueLead=kMinTwoByteValueLead+(kMaxTwoByteValue>>8)+1; // 0x6c
+ static const int32_t kFourByteValueLead=0x7e;
+
+ // A little more than Unicode code points. (0x11ffff)
+ static const int32_t kMaxThreeByteValue=((kFourByteValueLead-kMinThreeByteValueLead)<<16)-1;
+
+ static const int32_t kFiveByteValueLead=0x7f;
+
+ // Compact delta integers.
+ static const int32_t kMaxOneByteDelta=0xbf;
+ static const int32_t kMinTwoByteDeltaLead=kMaxOneByteDelta+1; // 0xc0
+ static const int32_t kMinThreeByteDeltaLead=0xf0;
+ static const int32_t kFourByteDeltaLead=0xfe;
+ static const int32_t kFiveByteDeltaLead=0xff;
+
+ static const int32_t kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1; // 0x2fff
+ static const int32_t kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1; // 0xdffff
+
+ // For getState64():
+ // The remainingMatchLength_ is -1..14=(kMaxLinearMatchLength=0x10)-2
+ // so we need at least 5 bits for that.
+ // We add 2 to store it as a positive value 1..16=kMaxLinearMatchLength.
+ static constexpr int32_t kState64RemainingShift = 59;
+ static constexpr uint64_t kState64PosMask = (UINT64_C(1) << kState64RemainingShift) - 1;
+
+ uint8_t *ownedArray_;
+
+ // Fixed value referencing the BytesTrie bytes.
+ const uint8_t *bytes_;
+
+ // Iterator variables.
+
+ // Pointer to next trie byte to read. NULL if no more matches.
+ const uint8_t *pos_;
+ // Remaining length of a linear-match node, minus 1. Negative if not in such a node.
+ int32_t remainingMatchLength_;
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __BYTESTRIE_H__
diff --git a/thirdparty/icu4c/common/unicode/bytestriebuilder.h b/thirdparty/icu4c/common/unicode/bytestriebuilder.h
new file mode 100644
index 0000000000..cae16e48b4
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/bytestriebuilder.h
@@ -0,0 +1,188 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: bytestriebuilder.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010sep25
+* created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C++ API: Builder for icu::BytesTrie
+ */
+
+#ifndef __BYTESTRIEBUILDER_H__
+#define __BYTESTRIEBUILDER_H__
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/bytestrie.h"
+#include "unicode/stringpiece.h"
+#include "unicode/stringtriebuilder.h"
+
+U_NAMESPACE_BEGIN
+
+class BytesTrieElement;
+class CharString;
+/**
+ * Builder class for BytesTrie.
+ *
+ * This class is not intended for public subclassing.
+ * @stable ICU 4.8
+ */
+class U_COMMON_API BytesTrieBuilder : public StringTrieBuilder {
+public:
+ /**
+ * Constructs an empty builder.
+ * @param errorCode Standard ICU error code.
+ * @stable ICU 4.8
+ */
+ BytesTrieBuilder(UErrorCode &errorCode);
+
+ /**
+ * Destructor.
+ * @stable ICU 4.8
+ */
+ virtual ~BytesTrieBuilder();
+
+ /**
+ * Adds a (byte sequence, value) pair.
+ * The byte sequence must be unique.
+ * The bytes will be copied; the builder does not keep
+ * a reference to the input StringPiece or its data().
+ * @param s The input byte sequence.
+ * @param value The value associated with this byte sequence.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return *this
+ * @stable ICU 4.8
+ */
+ BytesTrieBuilder &add(StringPiece s, int32_t value, UErrorCode &errorCode);
+
+ /**
+ * Builds a BytesTrie for the add()ed data.
+ * Once built, no further data can be add()ed until clear() is called.
+ *
+ * A BytesTrie cannot be empty. At least one (byte sequence, value) pair
+ * must have been add()ed.
+ *
+ * This method passes ownership of the builder's internal result array to the new trie object.
+ * Another call to any build() variant will re-serialize the trie.
+ * After clear() has been called, a new array will be used as well.
+ * @param buildOption Build option, see UStringTrieBuildOption.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return A new BytesTrie for the add()ed data.
+ * @stable ICU 4.8
+ */
+ BytesTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
+
+ /**
+ * Builds a BytesTrie for the add()ed data and byte-serializes it.
+ * Once built, no further data can be add()ed until clear() is called.
+ *
+ * A BytesTrie cannot be empty. At least one (byte sequence, value) pair
+ * must have been add()ed.
+ *
+ * Multiple calls to buildStringPiece() return StringPieces referring to the
+ * builder's same byte array, without rebuilding.
+ * If buildStringPiece() is called after build(), the trie will be
+ * re-serialized into a new array (because build() passes on ownership).
+ * If build() is called after buildStringPiece(), the trie object returned
+ * by build() will become the owner of the underlying string for the
+ * previously returned StringPiece.
+ * After clear() has been called, a new array will be used as well.
+ * @param buildOption Build option, see UStringTrieBuildOption.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return A StringPiece which refers to the byte-serialized BytesTrie for the add()ed data.
+ * @stable ICU 4.8
+ */
+ StringPiece buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
+
+ /**
+ * Removes all (byte sequence, value) pairs.
+ * New data can then be add()ed and a new trie can be built.
+ * @return *this
+ * @stable ICU 4.8
+ */
+ BytesTrieBuilder &clear();
+
+private:
+ BytesTrieBuilder(const BytesTrieBuilder &other); // no copy constructor
+ BytesTrieBuilder &operator=(const BytesTrieBuilder &other); // no assignment operator
+
+ void buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
+
+ virtual int32_t getElementStringLength(int32_t i) const;
+ virtual char16_t getElementUnit(int32_t i, int32_t byteIndex) const;
+ virtual int32_t getElementValue(int32_t i) const;
+
+ virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const;
+
+ virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const;
+ virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const;
+ virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, char16_t byte) const;
+
+ virtual UBool matchNodesCanHaveValues() const { return false; }
+
+ virtual int32_t getMaxBranchLinearSubNodeLength() const { return BytesTrie::kMaxBranchLinearSubNodeLength; }
+ virtual int32_t getMinLinearMatch() const { return BytesTrie::kMinLinearMatch; }
+ virtual int32_t getMaxLinearMatchLength() const { return BytesTrie::kMaxLinearMatchLength; }
+
+ /**
+ * @internal (private)
+ */
+ class BTLinearMatchNode : public LinearMatchNode {
+ public:
+ BTLinearMatchNode(const char *units, int32_t len, Node *nextNode);
+ virtual UBool operator==(const Node &other) const;
+ virtual void write(StringTrieBuilder &builder);
+ private:
+ const char *s;
+ };
+
+ virtual Node *createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length,
+ Node *nextNode) const;
+
+ UBool ensureCapacity(int32_t length);
+ virtual int32_t write(int32_t byte);
+ int32_t write(const char *b, int32_t length);
+ virtual int32_t writeElementUnits(int32_t i, int32_t byteIndex, int32_t length);
+ virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal);
+ virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node);
+ virtual int32_t writeDeltaTo(int32_t jumpTarget);
+
+ CharString *strings; // Pointer not object so we need not #include internal charstr.h.
+ BytesTrieElement *elements;
+ int32_t elementsCapacity;
+ int32_t elementsLength;
+
+ // Byte serialization of the trie.
+ // Grows from the back: bytesLength measures from the end of the buffer!
+ char *bytes;
+ int32_t bytesCapacity;
+ int32_t bytesLength;
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __BYTESTRIEBUILDER_H__
diff --git a/thirdparty/icu4c/common/unicode/caniter.h b/thirdparty/icu4c/common/unicode/caniter.h
new file mode 100644
index 0000000000..4ed2b74b10
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/caniter.h
@@ -0,0 +1,214 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *******************************************************************************
+ */
+
+#ifndef CANITER_H
+#define CANITER_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+
+/**
+ * \file
+ * \brief C++ API: Canonical Iterator
+ */
+
+/** Should permutation skip characters with combining class zero
+ * Should be either true or false. This is a compile time option
+ * @stable ICU 2.4
+ */
+#ifndef CANITER_SKIP_ZEROES
+#define CANITER_SKIP_ZEROES true
+#endif
+
+U_NAMESPACE_BEGIN
+
+class Hashtable;
+class Normalizer2;
+class Normalizer2Impl;
+
+/**
+ * This class allows one to iterate through all the strings that are canonically equivalent to a given
+ * string. For example, here are some sample results:
+Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
+1: \\u0041\\u030A\\u0064\\u0307\\u0327
+ = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
+2: \\u0041\\u030A\\u0064\\u0327\\u0307
+ = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
+3: \\u0041\\u030A\\u1E0B\\u0327
+ = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
+4: \\u0041\\u030A\\u1E11\\u0307
+ = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
+5: \\u00C5\\u0064\\u0307\\u0327
+ = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
+6: \\u00C5\\u0064\\u0327\\u0307
+ = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
+7: \\u00C5\\u1E0B\\u0327
+ = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
+8: \\u00C5\\u1E11\\u0307
+ = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
+9: \\u212B\\u0064\\u0307\\u0327
+ = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
+10: \\u212B\\u0064\\u0327\\u0307
+ = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
+11: \\u212B\\u1E0B\\u0327
+ = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
+12: \\u212B\\u1E11\\u0307
+ = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
+ *<br>Note: the code is intended for use with small strings, and is not suitable for larger ones,
+ * since it has not been optimized for that situation.
+ * Note, CanonicalIterator is not intended to be subclassed.
+ * @author M. Davis
+ * @author C++ port by V. Weinstein
+ * @stable ICU 2.4
+ */
+class U_COMMON_API CanonicalIterator U_FINAL : public UObject {
+public:
+ /**
+ * Construct a CanonicalIterator object
+ * @param source string to get results for
+ * @param status Fill-in parameter which receives the status of this operation.
+ * @stable ICU 2.4
+ */
+ CanonicalIterator(const UnicodeString &source, UErrorCode &status);
+
+ /** Destructor
+ * Cleans pieces
+ * @stable ICU 2.4
+ */
+ virtual ~CanonicalIterator();
+
+ /**
+ * Gets the NFD form of the current source we are iterating over.
+ * @return gets the source: NOTE: it is the NFD form of source
+ * @stable ICU 2.4
+ */
+ UnicodeString getSource();
+
+ /**
+ * Resets the iterator so that one can start again from the beginning.
+ * @stable ICU 2.4
+ */
+ void reset();
+
+ /**
+ * Get the next canonically equivalent string.
+ * <br><b>Warning: The strings are not guaranteed to be in any particular order.</b>
+ * @return the next string that is canonically equivalent. A bogus string is returned when
+ * the iteration is done.
+ * @stable ICU 2.4
+ */
+ UnicodeString next();
+
+ /**
+ * Set a new source for this iterator. Allows object reuse.
+ * @param newSource the source string to iterate against. This allows the same iterator to be used
+ * while changing the source string, saving object creation.
+ * @param status Fill-in parameter which receives the status of this operation.
+ * @stable ICU 2.4
+ */
+ void setSource(const UnicodeString &newSource, UErrorCode &status);
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * Dumb recursive implementation of permutation.
+ * TODO: optimize
+ * @param source the string to find permutations for
+ * @param skipZeros determine if skip zeros
+ * @param result the results in a set.
+ * @param status Fill-in parameter which receives the status of this operation.
+ * @internal
+ */
+ static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status);
+#endif /* U_HIDE_INTERNAL_API */
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ *
+ * @stable ICU 2.2
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ *
+ * @stable ICU 2.2
+ */
+ virtual UClassID getDynamicClassID() const;
+
+private:
+ // ===================== PRIVATES ==============================
+ // private default constructor
+ CanonicalIterator();
+
+
+ /**
+ * Copy constructor. Private for now.
+ * @internal (private)
+ */
+ CanonicalIterator(const CanonicalIterator& other);
+
+ /**
+ * Assignment operator. Private for now.
+ * @internal (private)
+ */
+ CanonicalIterator& operator=(const CanonicalIterator& other);
+
+ // fields
+ UnicodeString source;
+ UBool done;
+
+ // 2 dimensional array holds the pieces of the string with
+ // their different canonically equivalent representations
+ UnicodeString **pieces;
+ int32_t pieces_length;
+ int32_t *pieces_lengths;
+
+ // current is used in iterating to combine pieces
+ int32_t *current;
+ int32_t current_length;
+
+ // transient fields
+ UnicodeString buffer;
+
+ const Normalizer2 &nfd;
+ const Normalizer2Impl &nfcImpl;
+
+ // we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
+ UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment)
+
+ //Set getEquivalents2(String segment);
+ Hashtable *getEquivalents2(Hashtable *fillinResult, const char16_t *segment, int32_t segLen, UErrorCode &status);
+ //Hashtable *getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status);
+
+ /**
+ * See if the decomposition of cp2 is at segment starting at segmentPos
+ * (with canonical rearrangment!)
+ * If so, take the remainder, and return the equivalents
+ */
+ //Set extract(int comp, String segment, int segmentPos, StringBuffer buffer);
+ Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const char16_t *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
+ //Hashtable *extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
+
+ void cleanPieces();
+
+};
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_NORMALIZATION */
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/casemap.h b/thirdparty/icu4c/common/unicode/casemap.h
new file mode 100644
index 0000000000..53af84fa74
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/casemap.h
@@ -0,0 +1,497 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// casemap.h
+// created: 2017jan12 Markus W. Scherer
+
+#ifndef __CASEMAP_H__
+#define __CASEMAP_H__
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+
+/**
+ * \file
+ * \brief C++ API: Low-level C++ case mapping functions.
+ */
+
+U_NAMESPACE_BEGIN
+
+class BreakIterator;
+class ByteSink;
+class Edits;
+
+/**
+ * Low-level C++ case mapping functions.
+ *
+ * @stable ICU 59
+ */
+class U_COMMON_API CaseMap U_FINAL : public UMemory {
+public:
+ /**
+ * Lowercases a UTF-16 string and optionally records edits.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param locale The locale ID. ("" = root locale, NULL = default locale.)
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be NULL.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful.
+ * When the result would be longer than destCapacity,
+ * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see u_strToLower
+ * @stable ICU 59
+ */
+ static int32_t toLower(
+ const char *locale, uint32_t options,
+ const char16_t *src, int32_t srcLength,
+ char16_t *dest, int32_t destCapacity, Edits *edits,
+ UErrorCode &errorCode);
+
+ /**
+ * Uppercases a UTF-16 string and optionally records edits.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param locale The locale ID. ("" = root locale, NULL = default locale.)
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be NULL.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful.
+ * When the result would be longer than destCapacity,
+ * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see u_strToUpper
+ * @stable ICU 59
+ */
+ static int32_t toUpper(
+ const char *locale, uint32_t options,
+ const char16_t *src, int32_t srcLength,
+ char16_t *dest, int32_t destCapacity, Edits *edits,
+ UErrorCode &errorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+ /**
+ * Titlecases a UTF-16 string and optionally records edits.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with options bits.)
+ *
+ * @param locale The locale ID. ("" = root locale, NULL = default locale.)
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
+ * U_TITLECASE_NO_LOWERCASE,
+ * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
+ * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
+ * @param iter A break iterator to find the first characters of words that are to be titlecased.
+ * It is set to the source string (setText())
+ * and used one or more times for iteration (first() and next()).
+ * If NULL, then a word break iterator for the locale is used
+ * (or something equivalent).
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be NULL.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful.
+ * When the result would be longer than destCapacity,
+ * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see u_strToTitle
+ * @see ucasemap_toTitle
+ * @stable ICU 59
+ */
+ static int32_t toTitle(
+ const char *locale, uint32_t options, BreakIterator *iter,
+ const char16_t *src, int32_t srcLength,
+ char16_t *dest, int32_t destCapacity, Edits *edits,
+ UErrorCode &errorCode);
+
+#endif // UCONFIG_NO_BREAK_ITERATION
+
+ /**
+ * Case-folds a UTF-16 string and optionally records edits.
+ *
+ * Case folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'T' in CaseFolding.txt.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
+ * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be NULL.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful.
+ * When the result would be longer than destCapacity,
+ * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see u_strFoldCase
+ * @stable ICU 59
+ */
+ static int32_t fold(
+ uint32_t options,
+ const char16_t *src, int32_t srcLength,
+ char16_t *dest, int32_t destCapacity, Edits *edits,
+ UErrorCode &errorCode);
+
+ /**
+ * Lowercases a UTF-8 string and optionally records edits.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ *
+ * @param locale The locale ID. ("" = root locale, NULL = default locale.)
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
+ * @param src The original string.
+ * @param sink A ByteSink to which the result string is written.
+ * sink.Flush() is called at the end.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be NULL.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_utf8ToLower
+ * @stable ICU 60
+ */
+ static void utf8ToLower(
+ const char *locale, uint32_t options,
+ StringPiece src, ByteSink &sink, Edits *edits,
+ UErrorCode &errorCode);
+
+ /**
+ * Uppercases a UTF-8 string and optionally records edits.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ *
+ * @param locale The locale ID. ("" = root locale, NULL = default locale.)
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
+ * @param src The original string.
+ * @param sink A ByteSink to which the result string is written.
+ * sink.Flush() is called at the end.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be NULL.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_utf8ToUpper
+ * @stable ICU 60
+ */
+ static void utf8ToUpper(
+ const char *locale, uint32_t options,
+ StringPiece src, ByteSink &sink, Edits *edits,
+ UErrorCode &errorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+ /**
+ * Titlecases a UTF-8 string and optionally records edits.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ *
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with options bits.)
+ *
+ * @param locale The locale ID. ("" = root locale, NULL = default locale.)
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
+ * U_TITLECASE_NO_LOWERCASE,
+ * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
+ * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
+ * @param iter A break iterator to find the first characters of words that are to be titlecased.
+ * It is set to the source string (setUText())
+ * and used one or more times for iteration (first() and next()).
+ * If NULL, then a word break iterator for the locale is used
+ * (or something equivalent).
+ * @param src The original string.
+ * @param sink A ByteSink to which the result string is written.
+ * sink.Flush() is called at the end.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be NULL.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_utf8ToTitle
+ * @stable ICU 60
+ */
+ static void utf8ToTitle(
+ const char *locale, uint32_t options, BreakIterator *iter,
+ StringPiece src, ByteSink &sink, Edits *edits,
+ UErrorCode &errorCode);
+
+#endif // UCONFIG_NO_BREAK_ITERATION
+
+ /**
+ * Case-folds a UTF-8 string and optionally records edits.
+ *
+ * Case folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'T' in CaseFolding.txt.
+ *
+ * The result may be longer or shorter than the original.
+ *
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
+ * @param src The original string.
+ * @param sink A ByteSink to which the result string is written.
+ * sink.Flush() is called at the end.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be NULL.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_utf8FoldCase
+ * @stable ICU 60
+ */
+ static void utf8Fold(
+ uint32_t options,
+ StringPiece src, ByteSink &sink, Edits *edits,
+ UErrorCode &errorCode);
+
+ /**
+ * Lowercases a UTF-8 string and optionally records edits.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param locale The locale ID. ("" = root locale, NULL = default locale.)
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be NULL.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful.
+ * When the result would be longer than destCapacity,
+ * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see ucasemap_utf8ToLower
+ * @stable ICU 59
+ */
+ static int32_t utf8ToLower(
+ const char *locale, uint32_t options,
+ const char *src, int32_t srcLength,
+ char *dest, int32_t destCapacity, Edits *edits,
+ UErrorCode &errorCode);
+
+ /**
+ * Uppercases a UTF-8 string and optionally records edits.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param locale The locale ID. ("" = root locale, NULL = default locale.)
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be NULL.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful.
+ * When the result would be longer than destCapacity,
+ * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see ucasemap_utf8ToUpper
+ * @stable ICU 59
+ */
+ static int32_t utf8ToUpper(
+ const char *locale, uint32_t options,
+ const char *src, int32_t srcLength,
+ char *dest, int32_t destCapacity, Edits *edits,
+ UErrorCode &errorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+ /**
+ * Titlecases a UTF-8 string and optionally records edits.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with options bits.)
+ *
+ * @param locale The locale ID. ("" = root locale, NULL = default locale.)
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
+ * U_TITLECASE_NO_LOWERCASE,
+ * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
+ * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
+ * @param iter A break iterator to find the first characters of words that are to be titlecased.
+ * It is set to the source string (setUText())
+ * and used one or more times for iteration (first() and next()).
+ * If NULL, then a word break iterator for the locale is used
+ * (or something equivalent).
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be NULL.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful.
+ * When the result would be longer than destCapacity,
+ * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see ucasemap_utf8ToTitle
+ * @stable ICU 59
+ */
+ static int32_t utf8ToTitle(
+ const char *locale, uint32_t options, BreakIterator *iter,
+ const char *src, int32_t srcLength,
+ char *dest, int32_t destCapacity, Edits *edits,
+ UErrorCode &errorCode);
+
+#endif // UCONFIG_NO_BREAK_ITERATION
+
+ /**
+ * Case-folds a UTF-8 string and optionally records edits.
+ *
+ * Case folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'T' in CaseFolding.txt.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
+ * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be NULL.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful.
+ * When the result would be longer than destCapacity,
+ * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see ucasemap_utf8FoldCase
+ * @stable ICU 59
+ */
+ static int32_t utf8Fold(
+ uint32_t options,
+ const char *src, int32_t srcLength,
+ char *dest, int32_t destCapacity, Edits *edits,
+ UErrorCode &errorCode);
+
+private:
+ CaseMap() = delete;
+ CaseMap(const CaseMap &other) = delete;
+ CaseMap &operator=(const CaseMap &other) = delete;
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __CASEMAP_H__
diff --git a/thirdparty/icu4c/common/unicode/char16ptr.h b/thirdparty/icu4c/common/unicode/char16ptr.h
new file mode 100644
index 0000000000..c8a9ae6c35
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/char16ptr.h
@@ -0,0 +1,313 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// char16ptr.h
+// created: 2017feb28 Markus W. Scherer
+
+#ifndef __CHAR16PTR_H__
+#define __CHAR16PTR_H__
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include <cstddef>
+
+/**
+ * \file
+ * \brief C++ API: char16_t pointer wrappers with
+ * implicit conversion from bit-compatible raw pointer types.
+ * Also conversion functions from char16_t * to UChar * and OldUChar *.
+ */
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \def U_ALIASING_BARRIER
+ * Barrier for pointer anti-aliasing optimizations even across function boundaries.
+ * @internal
+ */
+#ifdef U_ALIASING_BARRIER
+ // Use the predefined value.
+#elif (defined(__clang__) || defined(__GNUC__)) && U_PLATFORM != U_PF_BROWSER_NATIVE_CLIENT
+# define U_ALIASING_BARRIER(ptr) asm volatile("" : : "rm"(ptr) : "memory")
+#elif defined(U_IN_DOXYGEN)
+# define U_ALIASING_BARRIER(ptr)
+#endif
+
+/**
+ * char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
+ * @stable ICU 59
+ */
+class U_COMMON_API Char16Ptr U_FINAL {
+public:
+ /**
+ * Copies the pointer.
+ * @param p pointer
+ * @stable ICU 59
+ */
+ inline Char16Ptr(char16_t *p);
+#if !U_CHAR16_IS_TYPEDEF
+ /**
+ * Converts the pointer to char16_t *.
+ * @param p pointer to be converted
+ * @stable ICU 59
+ */
+ inline Char16Ptr(uint16_t *p);
+#endif
+#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
+ /**
+ * Converts the pointer to char16_t *.
+ * (Only defined if U_SIZEOF_WCHAR_T==2.)
+ * @param p pointer to be converted
+ * @stable ICU 59
+ */
+ inline Char16Ptr(wchar_t *p);
+#endif
+ /**
+ * nullptr constructor.
+ * @param p nullptr
+ * @stable ICU 59
+ */
+ inline Char16Ptr(std::nullptr_t p);
+ /**
+ * Destructor.
+ * @stable ICU 59
+ */
+ inline ~Char16Ptr();
+
+ /**
+ * Pointer access.
+ * @return the wrapped pointer
+ * @stable ICU 59
+ */
+ inline char16_t *get() const;
+ /**
+ * char16_t pointer access via type conversion (e.g., static_cast).
+ * @return the wrapped pointer
+ * @stable ICU 59
+ */
+ inline operator char16_t *() const { return get(); }
+
+private:
+ Char16Ptr() = delete;
+
+#ifdef U_ALIASING_BARRIER
+ template<typename T> static char16_t *cast(T *t) {
+ U_ALIASING_BARRIER(t);
+ return reinterpret_cast<char16_t *>(t);
+ }
+
+ char16_t *p_;
+#else
+ union {
+ char16_t *cp;
+ uint16_t *up;
+ wchar_t *wp;
+ } u_;
+#endif
+};
+
+/// \cond
+#ifdef U_ALIASING_BARRIER
+
+Char16Ptr::Char16Ptr(char16_t *p) : p_(p) {}
+#if !U_CHAR16_IS_TYPEDEF
+Char16Ptr::Char16Ptr(uint16_t *p) : p_(cast(p)) {}
+#endif
+#if U_SIZEOF_WCHAR_T==2
+Char16Ptr::Char16Ptr(wchar_t *p) : p_(cast(p)) {}
+#endif
+Char16Ptr::Char16Ptr(std::nullptr_t p) : p_(p) {}
+Char16Ptr::~Char16Ptr() {
+ U_ALIASING_BARRIER(p_);
+}
+
+char16_t *Char16Ptr::get() const { return p_; }
+
+#else
+
+Char16Ptr::Char16Ptr(char16_t *p) { u_.cp = p; }
+#if !U_CHAR16_IS_TYPEDEF
+Char16Ptr::Char16Ptr(uint16_t *p) { u_.up = p; }
+#endif
+#if U_SIZEOF_WCHAR_T==2
+Char16Ptr::Char16Ptr(wchar_t *p) { u_.wp = p; }
+#endif
+Char16Ptr::Char16Ptr(std::nullptr_t p) { u_.cp = p; }
+Char16Ptr::~Char16Ptr() {}
+
+char16_t *Char16Ptr::get() const { return u_.cp; }
+
+#endif
+/// \endcond
+
+/**
+ * const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
+ * @stable ICU 59
+ */
+class U_COMMON_API ConstChar16Ptr U_FINAL {
+public:
+ /**
+ * Copies the pointer.
+ * @param p pointer
+ * @stable ICU 59
+ */
+ inline ConstChar16Ptr(const char16_t *p);
+#if !U_CHAR16_IS_TYPEDEF
+ /**
+ * Converts the pointer to char16_t *.
+ * @param p pointer to be converted
+ * @stable ICU 59
+ */
+ inline ConstChar16Ptr(const uint16_t *p);
+#endif
+#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
+ /**
+ * Converts the pointer to char16_t *.
+ * (Only defined if U_SIZEOF_WCHAR_T==2.)
+ * @param p pointer to be converted
+ * @stable ICU 59
+ */
+ inline ConstChar16Ptr(const wchar_t *p);
+#endif
+ /**
+ * nullptr constructor.
+ * @param p nullptr
+ * @stable ICU 59
+ */
+ inline ConstChar16Ptr(const std::nullptr_t p);
+
+ /**
+ * Destructor.
+ * @stable ICU 59
+ */
+ inline ~ConstChar16Ptr();
+
+ /**
+ * Pointer access.
+ * @return the wrapped pointer
+ * @stable ICU 59
+ */
+ inline const char16_t *get() const;
+ /**
+ * char16_t pointer access via type conversion (e.g., static_cast).
+ * @return the wrapped pointer
+ * @stable ICU 59
+ */
+ inline operator const char16_t *() const { return get(); }
+
+private:
+ ConstChar16Ptr() = delete;
+
+#ifdef U_ALIASING_BARRIER
+ template<typename T> static const char16_t *cast(const T *t) {
+ U_ALIASING_BARRIER(t);
+ return reinterpret_cast<const char16_t *>(t);
+ }
+
+ const char16_t *p_;
+#else
+ union {
+ const char16_t *cp;
+ const uint16_t *up;
+ const wchar_t *wp;
+ } u_;
+#endif
+};
+
+/// \cond
+#ifdef U_ALIASING_BARRIER
+
+ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p_(p) {}
+#if !U_CHAR16_IS_TYPEDEF
+ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p_(cast(p)) {}
+#endif
+#if U_SIZEOF_WCHAR_T==2
+ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p_(cast(p)) {}
+#endif
+ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p_(p) {}
+ConstChar16Ptr::~ConstChar16Ptr() {
+ U_ALIASING_BARRIER(p_);
+}
+
+const char16_t *ConstChar16Ptr::get() const { return p_; }
+
+#else
+
+ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u_.cp = p; }
+#if !U_CHAR16_IS_TYPEDEF
+ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u_.up = p; }
+#endif
+#if U_SIZEOF_WCHAR_T==2
+ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u_.wp = p; }
+#endif
+ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u_.cp = p; }
+ConstChar16Ptr::~ConstChar16Ptr() {}
+
+const char16_t *ConstChar16Ptr::get() const { return u_.cp; }
+
+#endif
+/// \endcond
+
+/**
+ * Converts from const char16_t * to const UChar *.
+ * Includes an aliasing barrier if available.
+ * @param p pointer
+ * @return p as const UChar *
+ * @stable ICU 59
+ */
+inline const UChar *toUCharPtr(const char16_t *p) {
+#ifdef U_ALIASING_BARRIER
+ U_ALIASING_BARRIER(p);
+#endif
+ return reinterpret_cast<const UChar *>(p);
+}
+
+/**
+ * Converts from char16_t * to UChar *.
+ * Includes an aliasing barrier if available.
+ * @param p pointer
+ * @return p as UChar *
+ * @stable ICU 59
+ */
+inline UChar *toUCharPtr(char16_t *p) {
+#ifdef U_ALIASING_BARRIER
+ U_ALIASING_BARRIER(p);
+#endif
+ return reinterpret_cast<UChar *>(p);
+}
+
+/**
+ * Converts from const char16_t * to const OldUChar *.
+ * Includes an aliasing barrier if available.
+ * @param p pointer
+ * @return p as const OldUChar *
+ * @stable ICU 59
+ */
+inline const OldUChar *toOldUCharPtr(const char16_t *p) {
+#ifdef U_ALIASING_BARRIER
+ U_ALIASING_BARRIER(p);
+#endif
+ return reinterpret_cast<const OldUChar *>(p);
+}
+
+/**
+ * Converts from char16_t * to OldUChar *.
+ * Includes an aliasing barrier if available.
+ * @param p pointer
+ * @return p as OldUChar *
+ * @stable ICU 59
+ */
+inline OldUChar *toOldUCharPtr(char16_t *p) {
+#ifdef U_ALIASING_BARRIER
+ U_ALIASING_BARRIER(p);
+#endif
+ return reinterpret_cast<OldUChar *>(p);
+}
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __CHAR16PTR_H__
diff --git a/thirdparty/icu4c/common/unicode/chariter.h b/thirdparty/icu4c/common/unicode/chariter.h
new file mode 100644
index 0000000000..96dc5db7c9
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/chariter.h
@@ -0,0 +1,734 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+********************************************************************
+*
+* Copyright (C) 1997-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+********************************************************************
+*/
+
+#ifndef CHARITER_H
+#define CHARITER_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+/**
+ * \file
+ * \brief C++ API: Character Iterator
+ */
+
+U_NAMESPACE_BEGIN
+/**
+ * Abstract class that defines an API for forward-only iteration
+ * on text objects.
+ * This is a minimal interface for iteration without random access
+ * or backwards iteration. It is especially useful for wrapping
+ * streams with converters into an object for collation or
+ * normalization.
+ *
+ * <p>Characters can be accessed in two ways: as code units or as
+ * code points.
+ * Unicode code points are 21-bit integers and are the scalar values
+ * of Unicode characters. ICU uses the type UChar32 for them.
+ * Unicode code units are the storage units of a given
+ * Unicode/UCS Transformation Format (a character encoding scheme).
+ * With UTF-16, all code points can be represented with either one
+ * or two code units ("surrogates").
+ * String storage is typically based on code units, while properties
+ * of characters are typically determined using code point values.
+ * Some processes may be designed to work with sequences of code units,
+ * or it may be known that all characters that are important to an
+ * algorithm can be represented with single code units.
+ * Other processes will need to use the code point access functions.</p>
+ *
+ * <p>ForwardCharacterIterator provides nextPostInc() to access
+ * a code unit and advance an internal position into the text object,
+ * similar to a <code>return text[position++]</code>.<br>
+ * It provides next32PostInc() to access a code point and advance an internal
+ * position.</p>
+ *
+ * <p>next32PostInc() assumes that the current position is that of
+ * the beginning of a code point, i.e., of its first code unit.
+ * After next32PostInc(), this will be true again.
+ * In general, access to code units and code points in the same
+ * iteration loop should not be mixed. In UTF-16, if the current position
+ * is on a second code unit (Low Surrogate), then only that code unit
+ * is returned even by next32PostInc().</p>
+ *
+ * <p>For iteration with either function, there are two ways to
+ * check for the end of the iteration. When there are no more
+ * characters in the text object:
+ * <ul>
+ * <li>The hasNext() function returns false.</li>
+ * <li>nextPostInc() and next32PostInc() return DONE
+ * when one attempts to read beyond the end of the text object.</li>
+ * </ul>
+ *
+ * Example:
+ * \code
+ * void function1(ForwardCharacterIterator &it) {
+ * UChar32 c;
+ * while(it.hasNext()) {
+ * c=it.next32PostInc();
+ * // use c
+ * }
+ * }
+ *
+ * void function1(ForwardCharacterIterator &it) {
+ * char16_t c;
+ * while((c=it.nextPostInc())!=ForwardCharacterIterator::DONE) {
+ * // use c
+ * }
+ * }
+ * \endcode
+ * </p>
+ *
+ * @stable ICU 2.0
+ */
+class U_COMMON_API ForwardCharacterIterator : public UObject {
+public:
+ /**
+ * Value returned by most of ForwardCharacterIterator's functions
+ * when the iterator has reached the limits of its iteration.
+ * @stable ICU 2.0
+ */
+ enum { DONE = 0xffff };
+
+ /**
+ * Destructor.
+ * @stable ICU 2.0
+ */
+ virtual ~ForwardCharacterIterator();
+
+ /**
+ * Returns true when both iterators refer to the same
+ * character in the same character-storage object.
+ * @param that The ForwardCharacterIterator to be compared for equality
+ * @return true when both iterators refer to the same
+ * character in the same character-storage object
+ * @stable ICU 2.0
+ */
+ virtual UBool operator==(const ForwardCharacterIterator& that) const = 0;
+
+ /**
+ * Returns true when the iterators refer to different
+ * text-storage objects, or to different characters in the
+ * same text-storage object.
+ * @param that The ForwardCharacterIterator to be compared for inequality
+ * @return true when the iterators refer to different
+ * text-storage objects, or to different characters in the
+ * same text-storage object
+ * @stable ICU 2.0
+ */
+ inline UBool operator!=(const ForwardCharacterIterator& that) const;
+
+ /**
+ * Generates a hash code for this iterator.
+ * @return the hash code.
+ * @stable ICU 2.0
+ */
+ virtual int32_t hashCode(void) const = 0;
+
+ /**
+ * Returns a UClassID for this ForwardCharacterIterator ("poor man's
+ * RTTI").<P> Despite the fact that this function is public,
+ * DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API!
+ * @return a UClassID for this ForwardCharacterIterator
+ * @stable ICU 2.0
+ */
+ virtual UClassID getDynamicClassID(void) const = 0;
+
+ /**
+ * Gets the current code unit for returning and advances to the next code unit
+ * in the iteration range
+ * (toward endIndex()). If there are
+ * no more code units to return, returns DONE.
+ * @return the current code unit.
+ * @stable ICU 2.0
+ */
+ virtual char16_t nextPostInc(void) = 0;
+
+ /**
+ * Gets the current code point for returning and advances to the next code point
+ * in the iteration range
+ * (toward endIndex()). If there are
+ * no more code points to return, returns DONE.
+ * @return the current code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 next32PostInc(void) = 0;
+
+ /**
+ * Returns false if there are no more code units or code points
+ * at or after the current position in the iteration range.
+ * This is used with nextPostInc() or next32PostInc() in forward
+ * iteration.
+ * @returns false if there are no more code units or code points
+ * at or after the current position in the iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UBool hasNext() = 0;
+
+protected:
+ /** Default constructor to be overridden in the implementing class. @stable ICU 2.0*/
+ ForwardCharacterIterator();
+
+ /** Copy constructor to be overridden in the implementing class. @stable ICU 2.0*/
+ ForwardCharacterIterator(const ForwardCharacterIterator &other);
+
+ /**
+ * Assignment operator to be overridden in the implementing class.
+ * @stable ICU 2.0
+ */
+ ForwardCharacterIterator &operator=(const ForwardCharacterIterator&) { return *this; }
+};
+
+/**
+ * Abstract class that defines an API for iteration
+ * on text objects.
+ * This is an interface for forward and backward iteration
+ * and random access into a text object.
+ *
+ * <p>The API provides backward compatibility to the Java and older ICU
+ * CharacterIterator classes but extends them significantly:
+ * <ol>
+ * <li>CharacterIterator is now a subclass of ForwardCharacterIterator.</li>
+ * <li>While the old API functions provided forward iteration with
+ * "pre-increment" semantics, the new one also provides functions
+ * with "post-increment" semantics. They are more efficient and should
+ * be the preferred iterator functions for new implementations.
+ * The backward iteration always had "pre-decrement" semantics, which
+ * are efficient.</li>
+ * <li>Just like ForwardCharacterIterator, it provides access to
+ * both code units and code points. Code point access versions are available
+ * for the old and the new iteration semantics.</li>
+ * <li>There are new functions for setting and moving the current position
+ * without returning a character, for efficiency.</li>
+ * </ol>
+ *
+ * See ForwardCharacterIterator for examples for using the new forward iteration
+ * functions. For backward iteration, there is also a hasPrevious() function
+ * that can be used analogously to hasNext().
+ * The old functions work as before and are shown below.</p>
+ *
+ * <p>Examples for some of the new functions:</p>
+ *
+ * Forward iteration with hasNext():
+ * \code
+ * void forward1(CharacterIterator &it) {
+ * UChar32 c;
+ * for(it.setToStart(); it.hasNext();) {
+ * c=it.next32PostInc();
+ * // use c
+ * }
+ * }
+ * \endcode
+ * Forward iteration more similar to loops with the old forward iteration,
+ * showing a way to convert simple for() loops:
+ * \code
+ * void forward2(CharacterIterator &it) {
+ * char16_t c;
+ * for(c=it.firstPostInc(); c!=CharacterIterator::DONE; c=it.nextPostInc()) {
+ * // use c
+ * }
+ * }
+ * \endcode
+ * Backward iteration with setToEnd() and hasPrevious():
+ * \code
+ * void backward1(CharacterIterator &it) {
+ * UChar32 c;
+ * for(it.setToEnd(); it.hasPrevious();) {
+ * c=it.previous32();
+ * // use c
+ * }
+ * }
+ * \endcode
+ * Backward iteration with a more traditional for() loop:
+ * \code
+ * void backward2(CharacterIterator &it) {
+ * char16_t c;
+ * for(c=it.last(); c!=CharacterIterator::DONE; c=it.previous()) {
+ * // use c
+ * }
+ * }
+ * \endcode
+ *
+ * Example for random access:
+ * \code
+ * void random(CharacterIterator &it) {
+ * // set to the third code point from the beginning
+ * it.move32(3, CharacterIterator::kStart);
+ * // get a code point from here without moving the position
+ * UChar32 c=it.current32();
+ * // get the position
+ * int32_t pos=it.getIndex();
+ * // get the previous code unit
+ * char16_t u=it.previous();
+ * // move back one more code unit
+ * it.move(-1, CharacterIterator::kCurrent);
+ * // set the position back to where it was
+ * // and read the same code point c and move beyond it
+ * it.setIndex(pos);
+ * if(c!=it.next32PostInc()) {
+ * exit(1); // CharacterIterator inconsistent
+ * }
+ * }
+ * \endcode
+ *
+ * <p>Examples, especially for the old API:</p>
+ *
+ * Function processing characters, in this example simple output
+ * <pre>
+ * \code
+ * void processChar( char16_t c )
+ * {
+ * cout << " " << c;
+ * }
+ * \endcode
+ * </pre>
+ * Traverse the text from start to finish
+ * <pre>
+ * \code
+ * void traverseForward(CharacterIterator& iter)
+ * {
+ * for(char16_t c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
+ * processChar(c);
+ * }
+ * }
+ * \endcode
+ * </pre>
+ * Traverse the text backwards, from end to start
+ * <pre>
+ * \code
+ * void traverseBackward(CharacterIterator& iter)
+ * {
+ * for(char16_t c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) {
+ * processChar(c);
+ * }
+ * }
+ * \endcode
+ * </pre>
+ * Traverse both forward and backward from a given position in the text.
+ * Calls to notBoundary() in this example represents some additional stopping criteria.
+ * <pre>
+ * \code
+ * void traverseOut(CharacterIterator& iter, int32_t pos)
+ * {
+ * char16_t c;
+ * for (c = iter.setIndex(pos);
+ * c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
+ * c = iter.next()) {}
+ * int32_t end = iter.getIndex();
+ * for (c = iter.setIndex(pos);
+ * c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
+ * c = iter.previous()) {}
+ * int32_t start = iter.getIndex() + 1;
+ *
+ * cout << "start: " << start << " end: " << end << endl;
+ * for (c = iter.setIndex(start); iter.getIndex() < end; c = iter.next() ) {
+ * processChar(c);
+ * }
+ * }
+ * \endcode
+ * </pre>
+ * Creating a StringCharacterIterator and calling the test functions
+ * <pre>
+ * \code
+ * void CharacterIterator_Example( void )
+ * {
+ * cout << endl << "===== CharacterIterator_Example: =====" << endl;
+ * UnicodeString text("Ein kleiner Satz.");
+ * StringCharacterIterator iterator(text);
+ * cout << "----- traverseForward: -----------" << endl;
+ * traverseForward( iterator );
+ * cout << endl << endl << "----- traverseBackward: ----------" << endl;
+ * traverseBackward( iterator );
+ * cout << endl << endl << "----- traverseOut: ---------------" << endl;
+ * traverseOut( iterator, 7 );
+ * cout << endl << endl << "-----" << endl;
+ * }
+ * \endcode
+ * </pre>
+ *
+ * @stable ICU 2.0
+ */
+class U_COMMON_API CharacterIterator : public ForwardCharacterIterator {
+public:
+ /**
+ * Origin enumeration for the move() and move32() functions.
+ * @stable ICU 2.0
+ */
+ enum EOrigin { kStart, kCurrent, kEnd };
+
+ /**
+ * Destructor.
+ * @stable ICU 2.0
+ */
+ virtual ~CharacterIterator();
+
+ /**
+ * Returns a pointer to a new CharacterIterator of the same
+ * concrete class as this one, and referring to the same
+ * character in the same text-storage object as this one. The
+ * caller is responsible for deleting the new clone.
+ * @return a pointer to a new CharacterIterator
+ * @stable ICU 2.0
+ */
+ virtual CharacterIterator* clone() const = 0;
+
+ /**
+ * Sets the iterator to refer to the first code unit in its
+ * iteration range, and returns that code unit.
+ * This can be used to begin an iteration with next().
+ * @return the first code unit in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual char16_t first(void) = 0;
+
+ /**
+ * Sets the iterator to refer to the first code unit in its
+ * iteration range, returns that code unit, and moves the position
+ * to the second code unit. This is an alternative to setToStart()
+ * for forward iteration with nextPostInc().
+ * @return the first code unit in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual char16_t firstPostInc(void);
+
+ /**
+ * Sets the iterator to refer to the first code point in its
+ * iteration range, and returns that code unit,
+ * This can be used to begin an iteration with next32().
+ * Note that an iteration with next32PostInc(), beginning with,
+ * e.g., setToStart() or firstPostInc(), is more efficient.
+ * @return the first code point in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 first32(void) = 0;
+
+ /**
+ * Sets the iterator to refer to the first code point in its
+ * iteration range, returns that code point, and moves the position
+ * to the second code point. This is an alternative to setToStart()
+ * for forward iteration with next32PostInc().
+ * @return the first code point in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 first32PostInc(void);
+
+ /**
+ * Sets the iterator to refer to the first code unit or code point in its
+ * iteration range. This can be used to begin a forward
+ * iteration with nextPostInc() or next32PostInc().
+ * @return the start position of the iteration range
+ * @stable ICU 2.0
+ */
+ inline int32_t setToStart();
+
+ /**
+ * Sets the iterator to refer to the last code unit in its
+ * iteration range, and returns that code unit.
+ * This can be used to begin an iteration with previous().
+ * @return the last code unit.
+ * @stable ICU 2.0
+ */
+ virtual char16_t last(void) = 0;
+
+ /**
+ * Sets the iterator to refer to the last code point in its
+ * iteration range, and returns that code unit.
+ * This can be used to begin an iteration with previous32().
+ * @return the last code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 last32(void) = 0;
+
+ /**
+ * Sets the iterator to the end of its iteration range, just behind
+ * the last code unit or code point. This can be used to begin a backward
+ * iteration with previous() or previous32().
+ * @return the end position of the iteration range
+ * @stable ICU 2.0
+ */
+ inline int32_t setToEnd();
+
+ /**
+ * Sets the iterator to refer to the "position"-th code unit
+ * in the text-storage object the iterator refers to, and
+ * returns that code unit.
+ * @param position the "position"-th code unit in the text-storage object
+ * @return the "position"-th code unit.
+ * @stable ICU 2.0
+ */
+ virtual char16_t setIndex(int32_t position) = 0;
+
+ /**
+ * Sets the iterator to refer to the beginning of the code point
+ * that contains the "position"-th code unit
+ * in the text-storage object the iterator refers to, and
+ * returns that code point.
+ * The current position is adjusted to the beginning of the code point
+ * (its first code unit).
+ * @param position the "position"-th code unit in the text-storage object
+ * @return the "position"-th code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 setIndex32(int32_t position) = 0;
+
+ /**
+ * Returns the code unit the iterator currently refers to.
+ * @return the current code unit.
+ * @stable ICU 2.0
+ */
+ virtual char16_t current(void) const = 0;
+
+ /**
+ * Returns the code point the iterator currently refers to.
+ * @return the current code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 current32(void) const = 0;
+
+ /**
+ * Advances to the next code unit in the iteration range
+ * (toward endIndex()), and returns that code unit. If there are
+ * no more code units to return, returns DONE.
+ * @return the next code unit.
+ * @stable ICU 2.0
+ */
+ virtual char16_t next(void) = 0;
+
+ /**
+ * Advances to the next code point in the iteration range
+ * (toward endIndex()), and returns that code point. If there are
+ * no more code points to return, returns DONE.
+ * Note that iteration with "pre-increment" semantics is less
+ * efficient than iteration with "post-increment" semantics
+ * that is provided by next32PostInc().
+ * @return the next code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 next32(void) = 0;
+
+ /**
+ * Advances to the previous code unit in the iteration range
+ * (toward startIndex()), and returns that code unit. If there are
+ * no more code units to return, returns DONE.
+ * @return the previous code unit.
+ * @stable ICU 2.0
+ */
+ virtual char16_t previous(void) = 0;
+
+ /**
+ * Advances to the previous code point in the iteration range
+ * (toward startIndex()), and returns that code point. If there are
+ * no more code points to return, returns DONE.
+ * @return the previous code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 previous32(void) = 0;
+
+ /**
+ * Returns false if there are no more code units or code points
+ * before the current position in the iteration range.
+ * This is used with previous() or previous32() in backward
+ * iteration.
+ * @return false if there are no more code units or code points
+ * before the current position in the iteration range, return true otherwise.
+ * @stable ICU 2.0
+ */
+ virtual UBool hasPrevious() = 0;
+
+ /**
+ * Returns the numeric index in the underlying text-storage
+ * object of the character returned by first(). Since it's
+ * possible to create an iterator that iterates across only
+ * part of a text-storage object, this number isn't
+ * necessarily 0.
+ * @returns the numeric index in the underlying text-storage
+ * object of the character returned by first().
+ * @stable ICU 2.0
+ */
+ inline int32_t startIndex(void) const;
+
+ /**
+ * Returns the numeric index in the underlying text-storage
+ * object of the position immediately BEYOND the character
+ * returned by last().
+ * @return the numeric index in the underlying text-storage
+ * object of the position immediately BEYOND the character
+ * returned by last().
+ * @stable ICU 2.0
+ */
+ inline int32_t endIndex(void) const;
+
+ /**
+ * Returns the numeric index in the underlying text-storage
+ * object of the character the iterator currently refers to
+ * (i.e., the character returned by current()).
+ * @return the numeric index in the text-storage object of
+ * the character the iterator currently refers to
+ * @stable ICU 2.0
+ */
+ inline int32_t getIndex(void) const;
+
+ /**
+ * Returns the length of the entire text in the underlying
+ * text-storage object.
+ * @return the length of the entire text in the text-storage object
+ * @stable ICU 2.0
+ */
+ inline int32_t getLength() const;
+
+ /**
+ * Moves the current position relative to the start or end of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code units forward
+ * or backward by specifying a positive or negative delta.
+ * @param delta the position relative to origin. A positive delta means forward;
+ * a negative delta means backward.
+ * @param origin Origin enumeration {kStart, kCurrent, kEnd}
+ * @return the new position
+ * @stable ICU 2.0
+ */
+ virtual int32_t move(int32_t delta, EOrigin origin) = 0;
+
+ /**
+ * Moves the current position relative to the start or end of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code points forward
+ * or backward by specifying a positive or negative delta.
+ * @param delta the position relative to origin. A positive delta means forward;
+ * a negative delta means backward.
+ * @param origin Origin enumeration {kStart, kCurrent, kEnd}
+ * @return the new position
+ * @stable ICU 2.0
+ */
+#ifdef move32
+ // One of the system headers right now is sometimes defining a conflicting macro we don't use
+#undef move32
+#endif
+ virtual int32_t move32(int32_t delta, EOrigin origin) = 0;
+
+ /**
+ * Copies the text under iteration into the UnicodeString
+ * referred to by "result".
+ * @param result Receives a copy of the text under iteration.
+ * @stable ICU 2.0
+ */
+ virtual void getText(UnicodeString& result) = 0;
+
+protected:
+ /**
+ * Empty constructor.
+ * @stable ICU 2.0
+ */
+ CharacterIterator();
+
+ /**
+ * Constructor, just setting the length field in this base class.
+ * @stable ICU 2.0
+ */
+ CharacterIterator(int32_t length);
+
+ /**
+ * Constructor, just setting the length and position fields in this base class.
+ * @stable ICU 2.0
+ */
+ CharacterIterator(int32_t length, int32_t position);
+
+ /**
+ * Constructor, just setting the length, start, end, and position fields in this base class.
+ * @stable ICU 2.0
+ */
+ CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position);
+
+ /**
+ * Copy constructor.
+ *
+ * @param that The CharacterIterator to be copied
+ * @stable ICU 2.0
+ */
+ CharacterIterator(const CharacterIterator &that);
+
+ /**
+ * Assignment operator. Sets this CharacterIterator to have the same behavior,
+ * as the one passed in.
+ * @param that The CharacterIterator passed in.
+ * @return the newly set CharacterIterator.
+ * @stable ICU 2.0
+ */
+ CharacterIterator &operator=(const CharacterIterator &that);
+
+ /**
+ * Base class text length field.
+ * Necessary this for correct getText() and hashCode().
+ * @stable ICU 2.0
+ */
+ int32_t textLength;
+
+ /**
+ * Base class field for the current position.
+ * @stable ICU 2.0
+ */
+ int32_t pos;
+
+ /**
+ * Base class field for the start of the iteration range.
+ * @stable ICU 2.0
+ */
+ int32_t begin;
+
+ /**
+ * Base class field for the end of the iteration range.
+ * @stable ICU 2.0
+ */
+ int32_t end;
+};
+
+inline UBool
+ForwardCharacterIterator::operator!=(const ForwardCharacterIterator& that) const {
+ return !operator==(that);
+}
+
+inline int32_t
+CharacterIterator::setToStart() {
+ return move(0, kStart);
+}
+
+inline int32_t
+CharacterIterator::setToEnd() {
+ return move(0, kEnd);
+}
+
+inline int32_t
+CharacterIterator::startIndex(void) const {
+ return begin;
+}
+
+inline int32_t
+CharacterIterator::endIndex(void) const {
+ return end;
+}
+
+inline int32_t
+CharacterIterator::getIndex(void) const {
+ return pos;
+}
+
+inline int32_t
+CharacterIterator::getLength(void) const {
+ return textLength;
+}
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/dbbi.h b/thirdparty/icu4c/common/unicode/dbbi.h
new file mode 100644
index 0000000000..3de9cc3814
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/dbbi.h
@@ -0,0 +1,48 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1999-2006,2013 IBM Corp. All rights reserved.
+**********************************************************************
+* Date Name Description
+* 12/1/99 rgillam Complete port from Java.
+* 01/13/2000 helena Added UErrorCode to ctors.
+**********************************************************************
+*/
+
+#ifndef DBBI_H
+#define DBBI_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/rbbi.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * \file
+ * \brief C++ API: Dictionary Based Break Iterator
+ */
+
+U_NAMESPACE_BEGIN
+
+#ifndef U_HIDE_DEPRECATED_API
+/**
+ * An obsolete subclass of RuleBasedBreakIterator. Handling of dictionary-
+ * based break iteration has been folded into the base class. This class
+ * is deprecated as of ICU 3.6.
+ * @deprecated ICU 3.6
+ */
+typedef RuleBasedBreakIterator DictionaryBasedBreakIterator;
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/docmain.h b/thirdparty/icu4c/common/unicode/docmain.h
new file mode 100644
index 0000000000..b7984ada03
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/docmain.h
@@ -0,0 +1,232 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/********************************************************************
+ * COPYRIGHT:
+ * Copyright (c) 1997-2012, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *
+ * FILE NAME: DOCMAIN.h
+ *
+ * Date Name Description
+ * 12/11/2000 Ram Creation.
+ */
+
+/**
+ * \file
+ * \brief (Non API- contains Doxygen definitions)
+ *
+ * This file contains documentation for Doxygen and doesnot have
+ * any significance with respect to C or C++ API
+ */
+
+/*! \mainpage
+ *
+ * \section API API Reference Usage
+ *
+ * <h3>C++ Programmers:</h3>
+ * <p>Use <a href="hierarchy.html">Class Hierarchy</a> or <a href="classes.html"> Alphabetical List </a>
+ * or <a href="annotated.html"> Compound List</a>
+ * to find the class you are interested in. For example, to find BreakIterator,
+ * you can go to the <a href="classes.html"> Alphabetical List</a>, then click on
+ * "BreakIterator". Once you are at the class, you will find an inheritance
+ * chart, a list of the public members, a detailed description of the class,
+ * then detailed member descriptions.</p>
+ *
+ * <h3>C Programmers:</h3>
+ * <p>Use <a href="#Module">Module List</a> or <a href="globals_u.html">File Members</a>
+ * to find a list of all the functions and constants.
+ * For example, to find BreakIterator functions you would click on
+ * <a href="files.html"> File List</a>,
+ * then find "ubrk.h" and click on it. You will find descriptions of Defines,
+ * Typedefs, Enumerations, and Functions, with detailed descriptions below.
+ * If you want to find a specific function, such as ubrk_next(), then click
+ * first on <a href="globals.html"> File Members</a>, then use your browser
+ * Find dialog to search for "ubrk_next()".</p>
+ *
+ *
+ * <h3>API References for Previous Releases</h3>
+ * <p>The API References for each release of ICU are also available as
+ * a zip file from the ICU
+ * <a href="http://site.icu-project.org/download">download page</a>.</p>
+ *
+ * <hr>
+ *
+ * <h2>Architecture (User's Guide)</h2>
+ * <ul>
+ * <li><a href="https://unicode-org.github.io/icu/userguide/">Introduction</a></li>
+ * <li><a href="https://unicode-org.github.io/icu/userguide/i18n">Internationalization</a></li>
+ * <li><a href="https://unicode-org.github.io/icu/userguide/design">Locale Model, Multithreading, Error Handling, etc.</a></li>
+ * <li><a href="https://unicode-org.github.io/icu/userguide/conversion">Conversion</a></li>
+ * </ul>
+ *
+ * <hr>
+ *\htmlonly <h2><a NAME="Module">Module List</a></h2> \endhtmlonly
+ * <table border="1" cols="3" align="center">
+ * <tr>
+ * <td><strong>Module Name</strong></td>
+ * <td><strong>C</strong></td>
+ * <td><strong>C++</strong></td>
+ * </tr>
+ * <tr>
+ * <td>Basic Types and Constants</td>
+ * <td>utypes.h</td>
+ * <td>utypes.h</td>
+ * </tr>
+ * <tr>
+ * <td>Strings and Character Iteration</td>
+ * <td>ustring.h, utf8.h, utf16.h, UText, UCharIterator</td>
+ * <td>icu::UnicodeString, icu::CharacterIterator, icu::Appendable, icu::StringPiece,icu::ByteSink</td>
+ * </tr>
+ * <tr>
+ * <td>Unicode Character<br/>Properties and Names</td>
+ * <td>uchar.h, uscript.h</td>
+ * <td>C API</td>
+ * </tr>
+ * <tr>
+ * <td>Sets of Unicode Code Points and Strings</td>
+ * <td>uset.h</td>
+ * <td>icu::UnicodeSet</td>
+ * </tr>
+ * <tr>
+ * <td>Maps from Unicode Code Points to Integer Values</td>
+ * <td>ucptrie.h, umutablecptrie.h</td>
+ * <td>C API</td>
+ * </tr>
+ * <tr>
+ * <td>Maps from Strings to Integer Values</td>
+ * <td>(no C API)</td>
+ * <td>icu::BytesTrie, icu::UCharsTrie</td>
+ * </tr>
+ * <tr>
+ * <td>Codepage Conversion</td>
+ * <td>ucnv.h, ucnvsel.h</td>
+ * <td>C API</td>
+ * </tr>
+ * <tr>
+ * <td>Codepage Detection</td>
+ * <td>ucsdet.h</td>
+ * <td>C API</td>
+ * </tr>
+ * <tr>
+ * <td>Unicode Text Compression</td>
+ * <td>ucnv.h<br/>(encoding name "SCSU" or "BOCU-1")</td>
+ * <td>C API</td>
+ * </tr>
+ * <tr>
+ * <td>Locales </td>
+ * <td>uloc.h</a></td>
+ * <td>icu::Locale, icu::LocaleBuilder, icu::LocaleMatcher</td>
+ * </tr>
+ * <tr>
+ * <td>Resource Bundles</td>
+ * <td>ures.h</td>
+ * <td>icu::ResourceBundle</td>
+ * </tr>
+ * <tr>
+ * <td>Normalization</td>
+ * <td>unorm2.h</td>
+ * <td>icu::Normalizer2</td>
+ * </tr>
+ * <tr>
+ * <td>Calendars</td>
+ * <td>ucal.h</td>
+ * <td>icu::Calendar</td>
+ * </tr>
+ * <tr>
+ * <td>Date and Time Formatting</td>
+ * <td>udat.h</td>
+ * <td>icu::DateFormat</td>
+ * </tr>
+ * <tr>
+ * <td>Message Formatting</td>
+ * <td>umsg.h</td>
+ * <td>icu::MessageFormat</td>
+ * </tr>
+ * <tr>
+ * <td>Number Formatting<br/>(includes currency and unit formatting)</td>
+ * <td>unumberformatter.h, unum.h</td>
+ * <td>icu::number::NumberFormatter (ICU 60+) or icu::NumberFormat (older versions)</td>
+ * </tr>
+ * <tr>
+ * <td>Number Range Formatting<br />(includes currency and unit ranges)</td>
+ * <td>unumberrangeformatter.h</td>
+ * <td>icu::number::NumberRangeFormatter</td>
+ * </tr>
+ * <tr>
+ * <td>Number Spellout<br/>(Rule Based Number Formatting)</td>
+ * <td>unum.h<br/>(use UNUM_SPELLOUT)</td>
+ * <td>icu::RuleBasedNumberFormat</td>
+ * </tr>
+ * <tr>
+ * <td>Text Transformation<br/>(Transliteration)</td>
+ * <td>utrans.h</td>
+ * <td>icu::Transliterator</td>
+ * </tr>
+ * <tr>
+ * <td>Bidirectional Algorithm</td>
+ * <td>ubidi.h, ubiditransform.h</td>
+ * <td>C API</td>
+ * </tr>
+ * <tr>
+ * <td>Arabic Shaping</td>
+ * <td>ushape.h</td>
+ * <td>C API</td>
+ * </tr>
+ * <tr>
+ * <td>Collation</td>
+ * <td>ucol.h</td>
+ * <td>icu::Collator</td>
+ * </tr>
+ * <tr>
+ * <td>String Searching</td>
+ * <td>usearch.h</td>
+ * <td>icu::StringSearch</td>
+ * </tr>
+ * <tr>
+ * <td>Index Characters/<br/>Bucketing for Sorted Lists</td>
+ * <td>(no C API)</td>
+ * <td>icu::AlphabeticIndex</td>
+ * </tr>
+ * <tr>
+ * <td>Text Boundary Analysis<br/>(Break Iteration)</td>
+ * <td>ubrk.h</td>
+ * <td>icu::BreakIterator</td>
+ * </tr>
+ * <tr>
+ * <td>Regular Expressions</td>
+ * <td>uregex.h</td>
+ * <td>icu::RegexPattern, icu::RegexMatcher</td>
+ * </tr>
+ * <tr>
+ * <td>StringPrep</td>
+ * <td>usprep.h</td>
+ * <td>C API</td>
+ * </tr>
+ * <tr>
+ * <td>International Domain Names in Applications:<br/>
+ * UTS #46 in C/C++, IDNA2003 only via C API</td>
+ * <td>uidna.h</td>
+ * <td>idna.h</td>
+ * </tr>
+ * <tr>
+ * <td>Identifier Spoofing & Confusability</td>
+ * <td>uspoof.h</td>
+ * <td>C API</td>
+ * <tr>
+ * <td>Universal Time Scale</td>
+ * <td>utmscale.h</td>
+ * <td>C API</td>
+ * </tr>
+ * <tr>
+ * <td>Paragraph Layout / Complex Text Layout</td>
+ * <td>playout.h</td>
+ * <td>icu::ParagraphLayout</td>
+ * </tr>
+ * <tr>
+ * <td>ICU I/O</td>
+ * <td>ustdio.h</td>
+ * <td>ustream.h</td>
+ * </tr>
+ * </table>
+ * <i>This main page is generated from docmain.h</i>
+ */
diff --git a/thirdparty/icu4c/common/unicode/dtintrv.h b/thirdparty/icu4c/common/unicode/dtintrv.h
new file mode 100644
index 0000000000..4f4b6bf7f4
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/dtintrv.h
@@ -0,0 +1,164 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2008-2009, International Business Machines Corporation and
+* others. All Rights Reserved.
+*******************************************************************************
+*
+* File DTINTRV.H
+*
+*******************************************************************************
+*/
+
+#ifndef __DTINTRV_H__
+#define __DTINTRV_H__
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+
+/**
+ * \file
+ * \brief C++ API: Date Interval data type
+ */
+
+U_NAMESPACE_BEGIN
+
+
+/**
+ * This class represents a date interval.
+ * It is a pair of UDate representing from UDate 1 to UDate 2.
+ * @stable ICU 4.0
+**/
+class U_COMMON_API DateInterval : public UObject {
+public:
+
+ /**
+ * Construct a DateInterval given a from date and a to date.
+ * @param fromDate The from date in date interval.
+ * @param toDate The to date in date interval.
+ * @stable ICU 4.0
+ */
+ DateInterval(UDate fromDate, UDate toDate);
+
+ /**
+ * destructor
+ * @stable ICU 4.0
+ */
+ virtual ~DateInterval();
+
+ /**
+ * Get the from date.
+ * @return the from date in dateInterval.
+ * @stable ICU 4.0
+ */
+ inline UDate getFromDate() const;
+
+ /**
+ * Get the to date.
+ * @return the to date in dateInterval.
+ * @stable ICU 4.0
+ */
+ inline UDate getToDate() const;
+
+
+ /**
+ * Return the class ID for this class. This is useful only for comparing to
+ * a return value from getDynamicClassID(). For example:
+ * <pre>
+ * . Base* polymorphic_pointer = createPolymorphicObject();
+ * . if (polymorphic_pointer->getDynamicClassID() ==
+ * . derived::getStaticClassID()) ...
+ * </pre>
+ * @return The class ID for all objects of this class.
+ * @stable ICU 4.0
+ */
+ static UClassID U_EXPORT2 getStaticClassID(void);
+
+ /**
+ * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
+ * method is to implement a simple version of RTTI, since not all C++
+ * compilers support genuine RTTI. Polymorphic operator==() and clone()
+ * methods call this method.
+ *
+ * @return The class ID for this object. All objects of a
+ * given class have the same class ID. Objects of
+ * other classes have different class IDs.
+ * @stable ICU 4.0
+ */
+ virtual UClassID getDynamicClassID(void) const;
+
+
+ /**
+ * Copy constructor.
+ * @stable ICU 4.0
+ */
+ DateInterval(const DateInterval& other);
+
+ /**
+ * Default assignment operator
+ * @stable ICU 4.0
+ */
+ DateInterval& operator=(const DateInterval&);
+
+ /**
+ * Equality operator.
+ * @return true if the two DateIntervals are the same
+ * @stable ICU 4.0
+ */
+ virtual UBool operator==(const DateInterval& other) const;
+
+ /**
+ * Non-equality operator
+ * @return true if the two DateIntervals are not the same
+ * @stable ICU 4.0
+ */
+ inline UBool operator!=(const DateInterval& other) const;
+
+
+ /**
+ * clone this object.
+ * The caller owns the result and should delete it when done.
+ * @return a cloned DateInterval
+ * @stable ICU 4.0
+ */
+ virtual DateInterval* clone() const;
+
+private:
+ /**
+ * Default constructor, not implemented.
+ */
+ DateInterval();
+
+ UDate fromDate;
+ UDate toDate;
+
+} ;// end class DateInterval
+
+
+inline UDate
+DateInterval::getFromDate() const {
+ return fromDate;
+}
+
+
+inline UDate
+DateInterval::getToDate() const {
+ return toDate;
+}
+
+
+inline UBool
+DateInterval::operator!=(const DateInterval& other) const {
+ return ( !operator==(other) );
+}
+
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/edits.h b/thirdparty/icu4c/common/unicode/edits.h
new file mode 100644
index 0000000000..bfa07fa676
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/edits.h
@@ -0,0 +1,531 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// edits.h
+// created: 2016dec30 Markus W. Scherer
+
+#ifndef __EDITS_H__
+#define __EDITS_H__
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+
+/**
+ * \file
+ * \brief C++ API: C++ class Edits for low-level string transformations on styled text.
+ */
+
+U_NAMESPACE_BEGIN
+
+class UnicodeString;
+
+/**
+ * Records lengths of string edits but not replacement text. Supports replacements, insertions, deletions
+ * in linear progression. Does not support moving/reordering of text.
+ *
+ * There are two types of edits: <em>change edits</em> and <em>no-change edits</em>. Add edits to
+ * instances of this class using {@link #addReplace(int32_t, int32_t)} (for change edits) and
+ * {@link #addUnchanged(int32_t)} (for no-change edits). Change edits are retained with full granularity,
+ * whereas adjacent no-change edits are always merged together. In no-change edits, there is a one-to-one
+ * mapping between code points in the source and destination strings.
+ *
+ * After all edits have been added, instances of this class should be considered immutable, and an
+ * {@link Edits::Iterator} can be used for queries.
+ *
+ * There are four flavors of Edits::Iterator:
+ *
+ * <ul>
+ * <li>{@link #getFineIterator()} retains full granularity of change edits.
+ * <li>{@link #getFineChangesIterator()} retains full granularity of change edits, and when calling
+ * next() on the iterator, skips over no-change edits (unchanged regions).
+ * <li>{@link #getCoarseIterator()} treats adjacent change edits as a single edit. (Adjacent no-change
+ * edits are automatically merged during the construction phase.)
+ * <li>{@link #getCoarseChangesIterator()} treats adjacent change edits as a single edit, and when
+ * calling next() on the iterator, skips over no-change edits (unchanged regions).
+ * </ul>
+ *
+ * For example, consider the string "abcßDeF", which case-folds to "abcssdef". This string has the
+ * following fine edits:
+ * <ul>
+ * <li>abc ⇨ abc (no-change)
+ * <li>ß ⇨ ss (change)
+ * <li>D ⇨ d (change)
+ * <li>e ⇨ e (no-change)
+ * <li>F ⇨ f (change)
+ * </ul>
+ * and the following coarse edits (note how adjacent change edits get merged together):
+ * <ul>
+ * <li>abc ⇨ abc (no-change)
+ * <li>ßD ⇨ ssd (change)
+ * <li>e ⇨ e (no-change)
+ * <li>F ⇨ f (change)
+ * </ul>
+ *
+ * The "fine changes" and "coarse changes" iterators will step through only the change edits when their
+ * `Edits::Iterator::next()` methods are called. They are identical to the non-change iterators when
+ * their `Edits::Iterator::findSourceIndex()` or `Edits::Iterator::findDestinationIndex()`
+ * methods are used to walk through the string.
+ *
+ * For examples of how to use this class, see the test `TestCaseMapEditsIteratorDocs` in
+ * UCharacterCaseTest.java.
+ *
+ * An Edits object tracks a separate UErrorCode, but ICU string transformation functions
+ * (e.g., case mapping functions) merge any such errors into their API's UErrorCode.
+ *
+ * @stable ICU 59
+ */
+class U_COMMON_API Edits U_FINAL : public UMemory {
+public:
+ /**
+ * Constructs an empty object.
+ * @stable ICU 59
+ */
+ Edits() :
+ array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0),
+ errorCode_(U_ZERO_ERROR) {}
+ /**
+ * Copy constructor.
+ * @param other source edits
+ * @stable ICU 60
+ */
+ Edits(const Edits &other) :
+ array(stackArray), capacity(STACK_CAPACITY), length(other.length),
+ delta(other.delta), numChanges(other.numChanges),
+ errorCode_(other.errorCode_) {
+ copyArray(other);
+ }
+ /**
+ * Move constructor, might leave src empty.
+ * This object will have the same contents that the source object had.
+ * @param src source edits
+ * @stable ICU 60
+ */
+ Edits(Edits &&src) U_NOEXCEPT :
+ array(stackArray), capacity(STACK_CAPACITY), length(src.length),
+ delta(src.delta), numChanges(src.numChanges),
+ errorCode_(src.errorCode_) {
+ moveArray(src);
+ }
+
+ /**
+ * Destructor.
+ * @stable ICU 59
+ */
+ ~Edits();
+
+ /**
+ * Assignment operator.
+ * @param other source edits
+ * @return *this
+ * @stable ICU 60
+ */
+ Edits &operator=(const Edits &other);
+
+ /**
+ * Move assignment operator, might leave src empty.
+ * This object will have the same contents that the source object had.
+ * The behavior is undefined if *this and src are the same object.
+ * @param src source edits
+ * @return *this
+ * @stable ICU 60
+ */
+ Edits &operator=(Edits &&src) U_NOEXCEPT;
+
+ /**
+ * Resets the data but may not release memory.
+ * @stable ICU 59
+ */
+ void reset() U_NOEXCEPT;
+
+ /**
+ * Adds a no-change edit: a record for an unchanged segment of text.
+ * Normally called from inside ICU string transformation functions, not user code.
+ * @stable ICU 59
+ */
+ void addUnchanged(int32_t unchangedLength);
+ /**
+ * Adds a change edit: a record for a text replacement/insertion/deletion.
+ * Normally called from inside ICU string transformation functions, not user code.
+ * @stable ICU 59
+ */
+ void addReplace(int32_t oldLength, int32_t newLength);
+ /**
+ * Sets the UErrorCode if an error occurred while recording edits.
+ * Preserves older error codes in the outErrorCode.
+ * Normally called from inside ICU string transformation functions, not user code.
+ * @param outErrorCode Set to an error code if it does not contain one already
+ * and an error occurred while recording edits.
+ * Otherwise unchanged.
+ * @return true if U_FAILURE(outErrorCode)
+ * @stable ICU 59
+ */
+ UBool copyErrorTo(UErrorCode &outErrorCode) const;
+
+ /**
+ * How much longer is the new text compared with the old text?
+ * @return new length minus old length
+ * @stable ICU 59
+ */
+ int32_t lengthDelta() const { return delta; }
+ /**
+ * @return true if there are any change edits
+ * @stable ICU 59
+ */
+ UBool hasChanges() const { return numChanges != 0; }
+
+ /**
+ * @return the number of change edits
+ * @stable ICU 60
+ */
+ int32_t numberOfChanges() const { return numChanges; }
+
+ /**
+ * Access to the list of edits.
+ *
+ * At any moment in time, an instance of this class points to a single edit: a "window" into a span
+ * of the source string and the corresponding span of the destination string. The source string span
+ * starts at {@link #sourceIndex()} and runs for {@link #oldLength()} chars; the destination string
+ * span starts at {@link #destinationIndex()} and runs for {@link #newLength()} chars.
+ *
+ * The iterator can be moved between edits using the `next()`, `findSourceIndex(int32_t, UErrorCode &)`,
+ * and `findDestinationIndex(int32_t, UErrorCode &)` methods.
+ * Calling any of these methods mutates the iterator to make it point to the corresponding edit.
+ *
+ * For more information, see the documentation for {@link Edits}.
+ *
+ * @see getCoarseIterator
+ * @see getFineIterator
+ * @stable ICU 59
+ */
+ struct U_COMMON_API Iterator U_FINAL : public UMemory {
+ /**
+ * Default constructor, empty iterator.
+ * @stable ICU 60
+ */
+ Iterator() :
+ array(nullptr), index(0), length(0),
+ remaining(0), onlyChanges_(false), coarse(false),
+ dir(0), changed(false), oldLength_(0), newLength_(0),
+ srcIndex(0), replIndex(0), destIndex(0) {}
+ /**
+ * Copy constructor.
+ * @stable ICU 59
+ */
+ Iterator(const Iterator &other) = default;
+ /**
+ * Assignment operator.
+ * @stable ICU 59
+ */
+ Iterator &operator=(const Iterator &other) = default;
+
+ /**
+ * Advances the iterator to the next edit.
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return true if there is another edit
+ * @stable ICU 59
+ */
+ UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); }
+
+ /**
+ * Moves the iterator to the edit that contains the source index.
+ * The source index may be found in a no-change edit
+ * even if normal iteration would skip no-change edits.
+ * Normal iteration can continue from a found edit.
+ *
+ * The iterator state before this search logically does not matter.
+ * (It may affect the performance of the search.)
+ *
+ * The iterator state after this search is undefined
+ * if the source index is out of bounds for the source string.
+ *
+ * @param i source index
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return true if the edit for the source index was found
+ * @stable ICU 59
+ */
+ UBool findSourceIndex(int32_t i, UErrorCode &errorCode) {
+ return findIndex(i, true, errorCode) == 0;
+ }
+
+ /**
+ * Moves the iterator to the edit that contains the destination index.
+ * The destination index may be found in a no-change edit
+ * even if normal iteration would skip no-change edits.
+ * Normal iteration can continue from a found edit.
+ *
+ * The iterator state before this search logically does not matter.
+ * (It may affect the performance of the search.)
+ *
+ * The iterator state after this search is undefined
+ * if the source index is out of bounds for the source string.
+ *
+ * @param i destination index
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return true if the edit for the destination index was found
+ * @stable ICU 60
+ */
+ UBool findDestinationIndex(int32_t i, UErrorCode &errorCode) {
+ return findIndex(i, false, errorCode) == 0;
+ }
+
+ /**
+ * Computes the destination index corresponding to the given source index.
+ * If the source index is inside a change edit (not at its start),
+ * then the destination index at the end of that edit is returned,
+ * since there is no information about index mapping inside a change edit.
+ *
+ * (This means that indexes to the start and middle of an edit,
+ * for example around a grapheme cluster, are mapped to indexes
+ * encompassing the entire edit.
+ * The alternative, mapping an interior index to the start,
+ * would map such an interval to an empty one.)
+ *
+ * This operation will usually but not always modify this object.
+ * The iterator state after this search is undefined.
+ *
+ * @param i source index
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return destination index; undefined if i is not 0..string length
+ * @stable ICU 60
+ */
+ int32_t destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode);
+
+ /**
+ * Computes the source index corresponding to the given destination index.
+ * If the destination index is inside a change edit (not at its start),
+ * then the source index at the end of that edit is returned,
+ * since there is no information about index mapping inside a change edit.
+ *
+ * (This means that indexes to the start and middle of an edit,
+ * for example around a grapheme cluster, are mapped to indexes
+ * encompassing the entire edit.
+ * The alternative, mapping an interior index to the start,
+ * would map such an interval to an empty one.)
+ *
+ * This operation will usually but not always modify this object.
+ * The iterator state after this search is undefined.
+ *
+ * @param i destination index
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return source index; undefined if i is not 0..string length
+ * @stable ICU 60
+ */
+ int32_t sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode);
+
+ /**
+ * Returns whether the edit currently represented by the iterator is a change edit.
+ *
+ * @return true if this edit replaces oldLength() units with newLength() different ones.
+ * false if oldLength units remain unchanged.
+ * @stable ICU 59
+ */
+ UBool hasChange() const { return changed; }
+
+ /**
+ * The length of the current span in the source string, which starts at {@link #sourceIndex}.
+ *
+ * @return the number of units in the original string which are replaced or remain unchanged.
+ * @stable ICU 59
+ */
+ int32_t oldLength() const { return oldLength_; }
+
+ /**
+ * The length of the current span in the destination string, which starts at
+ * {@link #destinationIndex}, or in the replacement string, which starts at
+ * {@link #replacementIndex}.
+ *
+ * @return the number of units in the modified string, if hasChange() is true.
+ * Same as oldLength if hasChange() is false.
+ * @stable ICU 59
+ */
+ int32_t newLength() const { return newLength_; }
+
+ /**
+ * The start index of the current span in the source string; the span has length
+ * {@link #oldLength}.
+ *
+ * @return the current index into the source string
+ * @stable ICU 59
+ */
+ int32_t sourceIndex() const { return srcIndex; }
+
+ /**
+ * The start index of the current span in the replacement string; the span has length
+ * {@link #newLength}. Well-defined only if the current edit is a change edit.
+ *
+ * The *replacement string* is the concatenation of all substrings of the destination
+ * string corresponding to change edits.
+ *
+ * This method is intended to be used together with operations that write only replacement
+ * characters (e.g. operations specifying the \ref U_OMIT_UNCHANGED_TEXT option).
+ * The source string can then be modified in-place.
+ *
+ * @return the current index into the replacement-characters-only string,
+ * not counting unchanged spans
+ * @stable ICU 59
+ */
+ int32_t replacementIndex() const {
+ // TODO: Throw an exception if we aren't in a change edit?
+ return replIndex;
+ }
+
+ /**
+ * The start index of the current span in the destination string; the span has length
+ * {@link #newLength}.
+ *
+ * @return the current index into the full destination string
+ * @stable ICU 59
+ */
+ int32_t destinationIndex() const { return destIndex; }
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * A string representation of the current edit represented by the iterator for debugging. You
+ * should not depend on the contents of the return string.
+ * @internal
+ */
+ UnicodeString& toString(UnicodeString& appendTo) const;
+#endif // U_HIDE_INTERNAL_API
+
+ private:
+ friend class Edits;
+
+ Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs);
+
+ int32_t readLength(int32_t head);
+ void updateNextIndexes();
+ void updatePreviousIndexes();
+ UBool noNext();
+ UBool next(UBool onlyChanges, UErrorCode &errorCode);
+ UBool previous(UErrorCode &errorCode);
+ /** @return -1: error or i<0; 0: found; 1: i>=string length */
+ int32_t findIndex(int32_t i, UBool findSource, UErrorCode &errorCode);
+
+ const uint16_t *array;
+ int32_t index, length;
+ // 0 if we are not within compressed equal-length changes.
+ // Otherwise the number of remaining changes, including the current one.
+ int32_t remaining;
+ UBool onlyChanges_, coarse;
+
+ int8_t dir; // iteration direction: back(<0), initial(0), forward(>0)
+ UBool changed;
+ int32_t oldLength_, newLength_;
+ int32_t srcIndex, replIndex, destIndex;
+ };
+
+ /**
+ * Returns an Iterator for coarse-grained change edits
+ * (adjacent change edits are treated as one).
+ * Can be used to perform simple string updates.
+ * Skips no-change edits.
+ * @return an Iterator that merges adjacent changes.
+ * @stable ICU 59
+ */
+ Iterator getCoarseChangesIterator() const {
+ return Iterator(array, length, true, true);
+ }
+
+ /**
+ * Returns an Iterator for coarse-grained change and no-change edits
+ * (adjacent change edits are treated as one).
+ * Can be used to perform simple string updates.
+ * Adjacent change edits are treated as one edit.
+ * @return an Iterator that merges adjacent changes.
+ * @stable ICU 59
+ */
+ Iterator getCoarseIterator() const {
+ return Iterator(array, length, false, true);
+ }
+
+ /**
+ * Returns an Iterator for fine-grained change edits
+ * (full granularity of change edits is retained).
+ * Can be used for modifying styled text.
+ * Skips no-change edits.
+ * @return an Iterator that separates adjacent changes.
+ * @stable ICU 59
+ */
+ Iterator getFineChangesIterator() const {
+ return Iterator(array, length, true, false);
+ }
+
+ /**
+ * Returns an Iterator for fine-grained change and no-change edits
+ * (full granularity of change edits is retained).
+ * Can be used for modifying styled text.
+ * @return an Iterator that separates adjacent changes.
+ * @stable ICU 59
+ */
+ Iterator getFineIterator() const {
+ return Iterator(array, length, false, false);
+ }
+
+ /**
+ * Merges the two input Edits and appends the result to this object.
+ *
+ * Consider two string transformations (for example, normalization and case mapping)
+ * where each records Edits in addition to writing an output string.<br>
+ * Edits ab reflect how substrings of input string a
+ * map to substrings of intermediate string b.<br>
+ * Edits bc reflect how substrings of intermediate string b
+ * map to substrings of output string c.<br>
+ * This function merges ab and bc such that the additional edits
+ * recorded in this object reflect how substrings of input string a
+ * map to substrings of output string c.
+ *
+ * If unrelated Edits are passed in where the output string of the first
+ * has a different length than the input string of the second,
+ * then a U_ILLEGAL_ARGUMENT_ERROR is reported.
+ *
+ * @param ab reflects how substrings of input string a
+ * map to substrings of intermediate string b.
+ * @param bc reflects how substrings of intermediate string b
+ * map to substrings of output string c.
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return *this, with the merged edits appended
+ * @stable ICU 60
+ */
+ Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode);
+
+private:
+ void releaseArray() U_NOEXCEPT;
+ Edits &copyArray(const Edits &other);
+ Edits &moveArray(Edits &src) U_NOEXCEPT;
+
+ void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; }
+ int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; }
+
+ void append(int32_t r);
+ UBool growArray();
+
+ static const int32_t STACK_CAPACITY = 100;
+ uint16_t *array;
+ int32_t capacity;
+ int32_t length;
+ int32_t delta;
+ int32_t numChanges;
+ UErrorCode errorCode_;
+ uint16_t stackArray[STACK_CAPACITY];
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __EDITS_H__
diff --git a/thirdparty/icu4c/common/unicode/enumset.h b/thirdparty/icu4c/common/unicode/enumset.h
new file mode 100644
index 0000000000..bde8c455c0
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/enumset.h
@@ -0,0 +1,69 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2012,2014 International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*/
+
+/**
+ * \file
+ * \brief C++: internal template EnumSet<>
+ */
+
+#ifndef ENUMSET_H
+#define ENUMSET_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/* Can't use #ifndef U_HIDE_INTERNAL_API for the entire EnumSet class, needed in .h file declarations */
+/**
+ * enum bitset for boolean fields. Similar to Java EnumSet<>.
+ * Needs to range check. Used for private instance variables.
+ * @internal
+ * \cond
+ */
+template<typename T, uint32_t minValue, uint32_t limitValue>
+class EnumSet {
+public:
+ inline EnumSet() : fBools(0) {}
+ inline EnumSet(const EnumSet<T,minValue,limitValue>& other) : fBools(other.fBools) {}
+ inline ~EnumSet() {}
+#ifndef U_HIDE_INTERNAL_API
+ inline void clear() { fBools=0; }
+ inline void add(T toAdd) { set(toAdd, 1); }
+ inline void remove(T toRemove) { set(toRemove, 0); }
+ inline int32_t contains(T toCheck) const { return get(toCheck); }
+ inline void set(T toSet, int32_t v) { fBools=(fBools&(~flag(toSet)))|(v?(flag(toSet)):0); }
+ inline int32_t get(T toCheck) const { return (fBools & flag(toCheck))?1:0; }
+ inline UBool isValidEnum(T toCheck) const { return (toCheck>=minValue&&toCheck<limitValue); }
+ inline UBool isValidValue(int32_t v) const { return (v==0||v==1); }
+ inline const EnumSet<T,minValue,limitValue>& operator=(const EnumSet<T,minValue,limitValue>& other) {
+ fBools = other.fBools;
+ return *this;
+ }
+
+ inline uint32_t getAll() const {
+ return fBools;
+ }
+#endif /* U_HIDE_INTERNAL_API */
+
+private:
+ inline uint32_t flag(T toCheck) const { return (1<<(toCheck-minValue)); }
+private:
+ uint32_t fBools;
+};
+
+/** \endcond */
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+#endif /* ENUMSET_H */
diff --git a/thirdparty/icu4c/common/unicode/errorcode.h b/thirdparty/icu4c/common/unicode/errorcode.h
new file mode 100644
index 0000000000..fe7b518323
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/errorcode.h
@@ -0,0 +1,144 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2009-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: errorcode.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2009mar10
+* created by: Markus W. Scherer
+*/
+
+#ifndef __ERRORCODE_H__
+#define __ERRORCODE_H__
+
+/**
+ * \file
+ * \brief C++ API: ErrorCode class intended to make it easier to use
+ * ICU C and C++ APIs from C++ user code.
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Wrapper class for UErrorCode, with conversion operators for direct use
+ * in ICU C and C++ APIs.
+ * Intended to be used as a base class, where a subclass overrides
+ * the handleFailure() function so that it throws an exception,
+ * does an assert(), logs an error, etc.
+ * This is not an abstract base class. This class can be used and instantiated
+ * by itself, although it will be more useful when subclassed.
+ *
+ * Features:
+ * - The constructor initializes the internal UErrorCode to U_ZERO_ERROR,
+ * removing one common source of errors.
+ * - Same use in C APIs taking a UErrorCode * (pointer)
+ * and C++ taking UErrorCode & (reference) via conversion operators.
+ * - Possible automatic checking for success when it goes out of scope.
+ *
+ * Note: For automatic checking for success in the destructor, a subclass
+ * must implement such logic in its own destructor because the base class
+ * destructor cannot call a subclass function (like handleFailure()).
+ * The ErrorCode base class destructor does nothing.
+ *
+ * Note also: While it is possible for a destructor to throw an exception,
+ * it is generally unsafe to do so. This means that in a subclass the destructor
+ * and the handleFailure() function may need to take different actions.
+ *
+ * Sample code:
+ * \code
+ * class IcuErrorCode: public icu::ErrorCode {
+ * public:
+ * virtual ~IcuErrorCode() { // should be defined in .cpp as "key function"
+ * // Safe because our handleFailure() does not throw exceptions.
+ * if(isFailure()) { handleFailure(); }
+ * }
+ * protected:
+ * virtual void handleFailure() const {
+ * log_failure(u_errorName(errorCode));
+ * exit(errorCode);
+ * }
+ * };
+ * IcuErrorCode error_code;
+ * UConverter *cnv = ucnv_open("Shift-JIS", error_code);
+ * length = ucnv_fromUChars(dest, capacity, src, length, error_code);
+ * ucnv_close(cnv);
+ * // IcuErrorCode destructor checks for success.
+ * \endcode
+ *
+ * @stable ICU 4.2
+ */
+class U_COMMON_API ErrorCode: public UMemory {
+public:
+ /**
+ * Default constructor. Initializes its UErrorCode to U_ZERO_ERROR.
+ * @stable ICU 4.2
+ */
+ ErrorCode() : errorCode(U_ZERO_ERROR) {}
+ /** Destructor, does nothing. See class documentation for details. @stable ICU 4.2 */
+ virtual ~ErrorCode();
+ /** Conversion operator, returns a reference. @stable ICU 4.2 */
+ operator UErrorCode & () { return errorCode; }
+ /** Conversion operator, returns a pointer. @stable ICU 4.2 */
+ operator UErrorCode * () { return &errorCode; }
+ /** Tests for U_SUCCESS(). @stable ICU 4.2 */
+ UBool isSuccess() const { return U_SUCCESS(errorCode); }
+ /** Tests for U_FAILURE(). @stable ICU 4.2 */
+ UBool isFailure() const { return U_FAILURE(errorCode); }
+ /** Returns the UErrorCode value. @stable ICU 4.2 */
+ UErrorCode get() const { return errorCode; }
+ /** Sets the UErrorCode value. @stable ICU 4.2 */
+ void set(UErrorCode value) { errorCode=value; }
+ /** Returns the UErrorCode value and resets it to U_ZERO_ERROR. @stable ICU 4.2 */
+ UErrorCode reset();
+ /**
+ * Asserts isSuccess().
+ * In other words, this method checks for a failure code,
+ * and the base class handles it like this:
+ * \code
+ * if(isFailure()) { handleFailure(); }
+ * \endcode
+ * @stable ICU 4.4
+ */
+ void assertSuccess() const;
+ /**
+ * Return a string for the UErrorCode value.
+ * The string will be the same as the name of the error code constant
+ * in the UErrorCode enum.
+ * @stable ICU 4.4
+ */
+ const char* errorName() const;
+
+protected:
+ /**
+ * Internal UErrorCode, accessible to subclasses.
+ * @stable ICU 4.2
+ */
+ UErrorCode errorCode;
+ /**
+ * Called by assertSuccess() if isFailure() is true.
+ * A subclass should override this function to deal with a failure code:
+ * Throw an exception, log an error, terminate the program, or similar.
+ * @stable ICU 4.2
+ */
+ virtual void handleFailure() const {}
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __ERRORCODE_H__
diff --git a/thirdparty/icu4c/common/unicode/filteredbrk.h b/thirdparty/icu4c/common/unicode/filteredbrk.h
new file mode 100644
index 0000000000..8b07e39ae1
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/filteredbrk.h
@@ -0,0 +1,152 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+********************************************************************************
+* Copyright (C) 1997-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+********************************************************************************
+*/
+
+#ifndef FILTEREDBRK_H
+#define FILTEREDBRK_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/brkiter.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \file
+ * \brief C++ API: FilteredBreakIteratorBuilder
+ */
+
+/**
+ * The BreakIteratorFilter is used to modify the behavior of a BreakIterator
+ * by constructing a new BreakIterator which suppresses certain segment boundaries.
+ * See http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions .
+ * For example, a typical English Sentence Break Iterator would break on the space
+ * in the string "Mr. Smith" (resulting in two segments),
+ * but with "Mr." as an exception, a filtered break iterator
+ * would consider the string "Mr. Smith" to be a single segment.
+ *
+ * @stable ICU 56
+ */
+class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
+ public:
+ /**
+ * destructor.
+ * @stable ICU 56
+ */
+ virtual ~FilteredBreakIteratorBuilder();
+
+ /**
+ * Construct a FilteredBreakIteratorBuilder based on rules in a locale.
+ * The rules are taken from CLDR exception data for the locale,
+ * see http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions
+ * This is the equivalent of calling createInstance(UErrorCode&)
+ * and then repeatedly calling addNoBreakAfter(...) with the contents
+ * of the CLDR exception data.
+ * @param where the locale.
+ * @param status The error code.
+ * @return the new builder
+ * @stable ICU 56
+ */
+ static FilteredBreakIteratorBuilder *createInstance(const Locale& where, UErrorCode& status);
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * This function has been deprecated in favor of createEmptyInstance, which has
+ * identical behavior.
+ * @param status The error code.
+ * @return the new builder
+ * @deprecated ICU 60 use createEmptyInstance instead
+ * @see createEmptyInstance()
+ */
+ static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status);
+#endif /* U_HIDE_DEPRECATED_API */
+
+ /**
+ * Construct an empty FilteredBreakIteratorBuilder.
+ * In this state, it will not suppress any segment boundaries.
+ * @param status The error code.
+ * @return the new builder
+ * @stable ICU 60
+ */
+ static FilteredBreakIteratorBuilder *createEmptyInstance(UErrorCode &status);
+
+ /**
+ * Suppress a certain string from being the end of a segment.
+ * For example, suppressing "Mr.", then segments ending in "Mr." will not be returned
+ * by the iterator.
+ * @param string the string to suppress, such as "Mr."
+ * @param status error code
+ * @return returns true if the string was not present and now added,
+ * false if the call was a no-op because the string was already being suppressed.
+ * @stable ICU 56
+ */
+ virtual UBool suppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0;
+
+ /**
+ * Stop suppressing a certain string from being the end of the segment.
+ * This function does not create any new segment boundaries, but only serves to un-do
+ * the effect of earlier calls to suppressBreakAfter, or to un-do the effect of
+ * locale data which may be suppressing certain strings.
+ * @param string the exception to remove
+ * @param status error code
+ * @return returns true if the string was present and now removed,
+ * false if the call was a no-op because the string was not being suppressed.
+ * @stable ICU 56
+ */
+ virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0;
+
+#ifndef U_FORCE_HIDE_DEPRECATED_API
+ /**
+ * This function has been deprecated in favor of wrapIteratorWithFilter()
+ * The behavior is identical.
+ * @param adoptBreakIterator the break iterator to adopt
+ * @param status error code
+ * @return the new BreakIterator, owned by the caller.
+ * @deprecated ICU 60 use wrapIteratorWithFilter() instead
+ * @see wrapBreakIteratorWithFilter()
+ */
+ virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0;
+#endif // U_FORCE_HIDE_DEPRECATED_API
+
+ /**
+ * Wrap (adopt) an existing break iterator in a new filtered instance.
+ * The resulting BreakIterator is owned by the caller.
+ * The BreakIteratorFilter may be destroyed before the BreakIterator is destroyed.
+ * Note that the adoptBreakIterator is adopted by the new BreakIterator
+ * and should no longer be used by the caller.
+ * The FilteredBreakIteratorBuilder may be reused.
+ * This function is an alias for build()
+ * @param adoptBreakIterator the break iterator to adopt
+ * @param status error code
+ * @return the new BreakIterator, owned by the caller.
+ * @stable ICU 60
+ */
+ inline BreakIterator *wrapIteratorWithFilter(BreakIterator* adoptBreakIterator, UErrorCode& status) {
+ return build(adoptBreakIterator, status);
+ }
+
+ protected:
+ /**
+ * For subclass use
+ * @stable ICU 56
+ */
+ FilteredBreakIteratorBuilder();
+};
+
+
+U_NAMESPACE_END
+
+#endif // #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // #ifndef FILTEREDBRK_H
diff --git a/thirdparty/icu4c/common/unicode/icudataver.h b/thirdparty/icu4c/common/unicode/icudataver.h
new file mode 100644
index 0000000000..f218ed8ebc
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/icudataver.h
@@ -0,0 +1,43 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2009-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*/
+
+
+/**
+ * \file
+ * \brief C API: access to ICU Data Version number
+ */
+
+#ifndef __ICU_DATA_VER_H__
+#define __ICU_DATA_VER_H__
+
+#include "unicode/utypes.h"
+
+/**
+ * @stable ICU 49
+ */
+#define U_ICU_VERSION_BUNDLE "icuver"
+
+/**
+ * @stable ICU 49
+ */
+#define U_ICU_DATA_KEY "DataVersion"
+
+/**
+ * Retrieves the data version from icuver and stores it in dataVersionFillin.
+ *
+ * @param dataVersionFillin icuver data version information to be filled in if not-null
+ * @param status stores the error code from the calls to resource bundle
+ *
+ * @stable ICU 49
+ */
+U_CAPI void U_EXPORT2 u_getDataVersion(UVersionInfo dataVersionFillin, UErrorCode *status);
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/icuplug.h b/thirdparty/icu4c/common/unicode/icuplug.h
new file mode 100644
index 0000000000..52f810da57
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/icuplug.h
@@ -0,0 +1,388 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2009-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* FILE NAME : icuplug.h
+*
+* Date Name Description
+* 10/29/2009 sl New.
+******************************************************************************
+*/
+
+/**
+ * \file
+ * \brief C API: ICU Plugin API
+ *
+ * <h2>C API: ICU Plugin API</h2>
+ *
+ * <p>C API allowing run-time loadable modules that extend or modify ICU functionality.</p>
+ *
+ * <h3>Loading and Configuration</h3>
+ *
+ * <p>At ICU startup time, the environment variable "ICU_PLUGINS" will be
+ * queried for a directory name. If it is not set, the preprocessor symbol
+ * "DEFAULT_ICU_PLUGINS" will be checked for a default value.</p>
+ *
+ * <p>Within the above-named directory, the file "icuplugins##.txt" will be
+ * opened, if present, where ## is the major+minor number of the currently
+ * running ICU (such as, 44 for ICU 4.4, thus icuplugins44.txt)</p>
+ *
+ * <p>The configuration file has this format:</p>
+ *
+ * <ul>
+ * <li>Hash (#) begins a comment line</li>
+ *
+ * <li>Non-comment lines have two or three components:
+ * LIBRARYNAME ENTRYPOINT [ CONFIGURATION .. ]</li>
+ *
+ * <li>Tabs or spaces separate the three items.</li>
+ *
+ * <li>LIBRARYNAME is the name of a shared library, either a short name if
+ * it is on the loader path, or a full pathname.</li>
+ *
+ * <li>ENTRYPOINT is the short (undecorated) symbol name of the plugin's
+ * entrypoint, as above.</li>
+ *
+ * <li>CONFIGURATION is the entire rest of the line . It's passed as-is to
+ * the plugin.</li>
+ * </ul>
+ *
+ * <p>An example configuration file is, in its entirety:</p>
+ *
+ * \code
+ * # this is icuplugins44.txt
+ * testplug.dll myPlugin hello=world
+ * \endcode
+ * <p>Plugins are categorized as "high" or "low" level. Low level are those
+ * which must be run BEFORE high level plugins, and before any operations
+ * which cause ICU to be 'initialized'. If a plugin is low level but
+ * causes ICU to allocate memory or become initialized, that plugin is said
+ * to cause a 'level change'. </p>
+ *
+ * <p>At load time, ICU first queries all plugins to determine their level,
+ * then loads all 'low' plugins first, and then loads all 'high' plugins.
+ * Plugins are otherwise loaded in the order listed in the configuration file.</p>
+ *
+ * <h3>Implementing a Plugin</h3>
+ * \code
+ * U_CAPI UPlugTokenReturn U_EXPORT2
+ * myPlugin (UPlugData *plug, UPlugReason reason, UErrorCode *status) {
+ * if(reason==UPLUG_REASON_QUERY) {
+ * uplug_setPlugName(plug, "Simple Plugin");
+ * uplug_setPlugLevel(plug, UPLUG_LEVEL_HIGH);
+ * } else if(reason==UPLUG_REASON_LOAD) {
+ * ... Set up some ICU things here....
+ * } else if(reason==UPLUG_REASON_UNLOAD) {
+ * ... unload, clean up ...
+ * }
+ * return UPLUG_TOKEN;
+ * }
+ * \endcode
+ *
+ * <p>The UPlugData* is an opaque pointer to the plugin-specific data, and is
+ * used in all other API calls.</p>
+ *
+ * <p>The API contract is:</p>
+ * <ol><li>The plugin MUST always return UPLUG_TOKEN as a return value- to
+ * indicate that it is a valid plugin.</li>
+ *
+ * <li>When the 'reason' parameter is set to UPLUG_REASON_QUERY, the
+ * plugin MUST call uplug_setPlugLevel() to indicate whether it is a high
+ * level or low level plugin.</li>
+ *
+ * <li>When the 'reason' parameter is UPLUG_REASON_QUERY, the plugin
+ * SHOULD call uplug_setPlugName to indicate a human readable plugin name.</li></ol>
+ *
+ *
+ * \internal ICU 4.4 Technology Preview
+ */
+
+
+#ifndef ICUPLUG_H
+#define ICUPLUG_H
+
+#include "unicode/utypes.h"
+
+
+#if UCONFIG_ENABLE_PLUGINS || defined(U_IN_DOXYGEN)
+
+
+
+/* === Basic types === */
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * @{
+ * Opaque structure passed to/from a plugin.
+ * use the APIs to access it.
+ * @internal ICU 4.4 Technology Preview
+ */
+
+struct UPlugData;
+typedef struct UPlugData UPlugData;
+
+/** @} */
+
+/**
+ * Random Token to identify a valid ICU plugin. Plugins must return this
+ * from the entrypoint.
+ * @internal ICU 4.4 Technology Preview
+ */
+#define UPLUG_TOKEN 0x54762486
+
+/**
+ * Max width of names, symbols, and configuration strings
+ * @internal ICU 4.4 Technology Preview
+ */
+#define UPLUG_NAME_MAX 100
+
+
+/**
+ * Return value from a plugin entrypoint.
+ * Must always be set to UPLUG_TOKEN
+ * @see UPLUG_TOKEN
+ * @internal ICU 4.4 Technology Preview
+ */
+typedef uint32_t UPlugTokenReturn;
+
+/**
+ * Reason code for the entrypoint's call
+ * @internal ICU 4.4 Technology Preview
+ */
+typedef enum {
+ UPLUG_REASON_QUERY = 0, /**< The plugin is being queried for info. **/
+ UPLUG_REASON_LOAD = 1, /**< The plugin is being loaded. **/
+ UPLUG_REASON_UNLOAD = 2, /**< The plugin is being unloaded. **/
+ /**
+ * Number of known reasons.
+ * @internal The numeric value may change over time, see ICU ticket #12420.
+ */
+ UPLUG_REASON_COUNT
+} UPlugReason;
+
+
+/**
+ * Level of plugin loading
+ * INITIAL: UNKNOWN
+ * QUERY: INVALID -> { LOW | HIGH }
+ * ERR -> INVALID
+ * @internal ICU 4.4 Technology Preview
+ */
+typedef enum {
+ UPLUG_LEVEL_INVALID = 0, /**< The plugin is invalid, hasn't called uplug_setLevel, or can't load. **/
+ UPLUG_LEVEL_UNKNOWN = 1, /**< The plugin is waiting to be installed. **/
+ UPLUG_LEVEL_LOW = 2, /**< The plugin must be called before u_init completes **/
+ UPLUG_LEVEL_HIGH = 3, /**< The plugin can run at any time. **/
+ /**
+ * Number of known levels.
+ * @internal The numeric value may change over time, see ICU ticket #12420.
+ */
+ UPLUG_LEVEL_COUNT
+} UPlugLevel;
+
+/**
+ * Entrypoint for an ICU plugin.
+ * @param plug the UPlugData handle.
+ * @param status the plugin's extended status code.
+ * @return A valid plugin must return UPLUG_TOKEN
+ * @internal ICU 4.4 Technology Preview
+ */
+typedef UPlugTokenReturn (U_EXPORT2 UPlugEntrypoint) (
+ UPlugData *plug,
+ UPlugReason reason,
+ UErrorCode *status);
+
+/* === Needed for Implementing === */
+
+/**
+ * Request that this plugin not be unloaded at cleanup time.
+ * This is appropriate for plugins which cannot be cleaned up.
+ * @see u_cleanup()
+ * @param plug plugin
+ * @param dontUnload set true if this plugin can't be unloaded
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI void U_EXPORT2
+uplug_setPlugNoUnload(UPlugData *plug, UBool dontUnload);
+
+/**
+ * Set the level of this plugin.
+ * @param plug plugin data handle
+ * @param level the level of this plugin
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI void U_EXPORT2
+uplug_setPlugLevel(UPlugData *plug, UPlugLevel level);
+
+/**
+ * Get the level of this plugin.
+ * @param plug plugin data handle
+ * @return the level of this plugin
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI UPlugLevel U_EXPORT2
+uplug_getPlugLevel(UPlugData *plug);
+
+/**
+ * Get the lowest level of plug which can currently load.
+ * For example, if UPLUG_LEVEL_LOW is returned, then low level plugins may load
+ * if UPLUG_LEVEL_HIGH is returned, then only high level plugins may load.
+ * @return the lowest level of plug which can currently load
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI UPlugLevel U_EXPORT2
+uplug_getCurrentLevel(void);
+
+
+/**
+ * Get plug load status
+ * @return The error code of this plugin's load attempt.
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI UErrorCode U_EXPORT2
+uplug_getPlugLoadStatus(UPlugData *plug);
+
+/**
+ * Set the human-readable name of this plugin.
+ * @param plug plugin data handle
+ * @param name the name of this plugin. The first UPLUG_NAME_MAX characters willi be copied into a new buffer.
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI void U_EXPORT2
+uplug_setPlugName(UPlugData *plug, const char *name);
+
+/**
+ * Get the human-readable name of this plugin.
+ * @param plug plugin data handle
+ * @return the name of this plugin
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI const char * U_EXPORT2
+uplug_getPlugName(UPlugData *plug);
+
+/**
+ * Return the symbol name for this plugin, if known.
+ * @param plug plugin data handle
+ * @return the symbol name, or NULL
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI const char * U_EXPORT2
+uplug_getSymbolName(UPlugData *plug);
+
+/**
+ * Return the library name for this plugin, if known.
+ * @param plug plugin data handle
+ * @param status error code
+ * @return the library name, or NULL
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI const char * U_EXPORT2
+uplug_getLibraryName(UPlugData *plug, UErrorCode *status);
+
+/**
+ * Return the library used for this plugin, if known.
+ * Plugins could use this to load data out of their
+ * @param plug plugin data handle
+ * @return the library, or NULL
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI void * U_EXPORT2
+uplug_getLibrary(UPlugData *plug);
+
+/**
+ * Return the plugin-specific context data.
+ * @param plug plugin data handle
+ * @return the context, or NULL if not set
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI void * U_EXPORT2
+uplug_getContext(UPlugData *plug);
+
+/**
+ * Set the plugin-specific context data.
+ * @param plug plugin data handle
+ * @param context new context to set
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI void U_EXPORT2
+uplug_setContext(UPlugData *plug, void *context);
+
+
+/**
+ * Get the configuration string, if available.
+ * The string is in the platform default codepage.
+ * @param plug plugin data handle
+ * @return configuration string, or else null.
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI const char * U_EXPORT2
+uplug_getConfiguration(UPlugData *plug);
+
+/**
+ * Return all currently installed plugins, from newest to oldest
+ * Usage Example:
+ * \code
+ * UPlugData *plug = NULL;
+ * while(plug=uplug_nextPlug(plug)) {
+ * ... do something with 'plug' ...
+ * }
+ * \endcode
+ * Not thread safe- do not call while plugs are added or removed.
+ * @param prior pass in 'NULL' to get the first (most recent) plug,
+ * otherwise pass the value returned on a prior call to uplug_nextPlug
+ * @return the next oldest plugin, or NULL if no more.
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI UPlugData* U_EXPORT2
+uplug_nextPlug(UPlugData *prior);
+
+/**
+ * Inject a plugin as if it were loaded from a library.
+ * This is useful for testing plugins.
+ * Note that it will have a 'NULL' library pointer associated
+ * with it, and therefore no llibrary will be closed at cleanup time.
+ * Low level plugins may not be able to load, as ordering can't be enforced.
+ * @param entrypoint entrypoint to install
+ * @param config user specified configuration string, if available, or NULL.
+ * @param status error result
+ * @return the new UPlugData associated with this plugin, or NULL if error.
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI UPlugData* U_EXPORT2
+uplug_loadPlugFromEntrypoint(UPlugEntrypoint *entrypoint, const char *config, UErrorCode *status);
+
+
+/**
+ * Inject a plugin from a library, as if the information came from a config file.
+ * Low level plugins may not be able to load, and ordering can't be enforced.
+ * @param libName DLL name to load
+ * @param sym symbol of plugin (UPlugEntrypoint function)
+ * @param config configuration string, or NULL
+ * @param status error result
+ * @return the new UPlugData associated with this plugin, or NULL if error.
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI UPlugData* U_EXPORT2
+uplug_loadPlugFromLibrary(const char *libName, const char *sym, const char *config, UErrorCode *status);
+
+/**
+ * Remove a plugin.
+ * Will request the plugin to be unloaded, and close the library if needed
+ * @param plug plugin handle to close
+ * @param status error result
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI void U_EXPORT2
+uplug_removePlug(UPlugData *plug, UErrorCode *status);
+#endif /* U_HIDE_INTERNAL_API */
+
+#endif /* UCONFIG_ENABLE_PLUGINS */
+
+#endif /* _ICUPLUG */
+
diff --git a/thirdparty/icu4c/common/unicode/idna.h b/thirdparty/icu4c/common/unicode/idna.h
new file mode 100644
index 0000000000..1305dc6048
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/idna.h
@@ -0,0 +1,330 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: idna.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010mar05
+* created by: Markus W. Scherer
+*/
+
+#ifndef __IDNA_H__
+#define __IDNA_H__
+
+/**
+ * \file
+ * \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#if !UCONFIG_NO_IDNA
+
+#include "unicode/bytestream.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uidna.h"
+#include "unicode/unistr.h"
+
+U_NAMESPACE_BEGIN
+
+class IDNAInfo;
+
+/**
+ * Abstract base class for IDNA processing.
+ * See http://www.unicode.org/reports/tr46/
+ * and http://www.ietf.org/rfc/rfc3490.txt
+ *
+ * The IDNA class is not intended for public subclassing.
+ *
+ * This C++ API currently only implements UTS #46.
+ * The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
+ * and IDNA2003 (functions that do not use a service object).
+ * @stable ICU 4.6
+ */
+class U_COMMON_API IDNA : public UObject {
+public:
+ /**
+ * Destructor.
+ * @stable ICU 4.6
+ */
+ ~IDNA();
+
+ /**
+ * Returns an IDNA instance which implements UTS #46.
+ * Returns an unmodifiable instance, owned by the caller.
+ * Cache it for multiple operations, and delete it when done.
+ * The instance is thread-safe, that is, it can be used concurrently.
+ *
+ * UTS #46 defines Unicode IDNA Compatibility Processing,
+ * updated to the latest version of Unicode and compatible with both
+ * IDNA2003 and IDNA2008.
+ *
+ * The worker functions use transitional processing, including deviation mappings,
+ * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
+ * is used in which case the deviation characters are passed through without change.
+ *
+ * Disallowed characters are mapped to U+FFFD.
+ *
+ * For available options see the uidna.h header.
+ * Operations with the UTS #46 instance do not support the
+ * UIDNA_ALLOW_UNASSIGNED option.
+ *
+ * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
+ * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
+ * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
+ *
+ * @param options Bit set to modify the processing and error checking.
+ * See option bit set values in uidna.h.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the UTS #46 IDNA instance, if successful
+ * @stable ICU 4.6
+ */
+ static IDNA *
+ createUTS46Instance(uint32_t options, UErrorCode &errorCode);
+
+ /**
+ * Converts a single domain name label into its ASCII form for DNS lookup.
+ * If any processing step fails, then info.hasErrors() will be true and
+ * the result might not be an ASCII string.
+ * The label might be modified according to the types of errors.
+ * Labels with severe errors will be left in (or turned into) their Unicode form.
+ *
+ * The UErrorCode indicates an error only in exceptional cases,
+ * such as a U_MEMORY_ALLOCATION_ERROR.
+ *
+ * @param label Input domain name label
+ * @param dest Destination string object
+ * @param info Output container of IDNA processing details.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return dest
+ * @stable ICU 4.6
+ */
+ virtual UnicodeString &
+ labelToASCII(const UnicodeString &label, UnicodeString &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const = 0;
+
+ /**
+ * Converts a single domain name label into its Unicode form for human-readable display.
+ * If any processing step fails, then info.hasErrors() will be true.
+ * The label might be modified according to the types of errors.
+ *
+ * The UErrorCode indicates an error only in exceptional cases,
+ * such as a U_MEMORY_ALLOCATION_ERROR.
+ *
+ * @param label Input domain name label
+ * @param dest Destination string object
+ * @param info Output container of IDNA processing details.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return dest
+ * @stable ICU 4.6
+ */
+ virtual UnicodeString &
+ labelToUnicode(const UnicodeString &label, UnicodeString &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const = 0;
+
+ /**
+ * Converts a whole domain name into its ASCII form for DNS lookup.
+ * If any processing step fails, then info.hasErrors() will be true and
+ * the result might not be an ASCII string.
+ * The domain name might be modified according to the types of errors.
+ * Labels with severe errors will be left in (or turned into) their Unicode form.
+ *
+ * The UErrorCode indicates an error only in exceptional cases,
+ * such as a U_MEMORY_ALLOCATION_ERROR.
+ *
+ * @param name Input domain name
+ * @param dest Destination string object
+ * @param info Output container of IDNA processing details.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return dest
+ * @stable ICU 4.6
+ */
+ virtual UnicodeString &
+ nameToASCII(const UnicodeString &name, UnicodeString &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const = 0;
+
+ /**
+ * Converts a whole domain name into its Unicode form for human-readable display.
+ * If any processing step fails, then info.hasErrors() will be true.
+ * The domain name might be modified according to the types of errors.
+ *
+ * The UErrorCode indicates an error only in exceptional cases,
+ * such as a U_MEMORY_ALLOCATION_ERROR.
+ *
+ * @param name Input domain name
+ * @param dest Destination string object
+ * @param info Output container of IDNA processing details.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return dest
+ * @stable ICU 4.6
+ */
+ virtual UnicodeString &
+ nameToUnicode(const UnicodeString &name, UnicodeString &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const = 0;
+
+ // UTF-8 versions of the processing methods ---------------------------- ***
+
+ /**
+ * Converts a single domain name label into its ASCII form for DNS lookup.
+ * UTF-8 version of labelToASCII(), same behavior.
+ *
+ * @param label Input domain name label
+ * @param dest Destination byte sink; Flush()ed if successful
+ * @param info Output container of IDNA processing details.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return dest
+ * @stable ICU 4.6
+ */
+ virtual void
+ labelToASCII_UTF8(StringPiece label, ByteSink &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const;
+
+ /**
+ * Converts a single domain name label into its Unicode form for human-readable display.
+ * UTF-8 version of labelToUnicode(), same behavior.
+ *
+ * @param label Input domain name label
+ * @param dest Destination byte sink; Flush()ed if successful
+ * @param info Output container of IDNA processing details.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return dest
+ * @stable ICU 4.6
+ */
+ virtual void
+ labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const;
+
+ /**
+ * Converts a whole domain name into its ASCII form for DNS lookup.
+ * UTF-8 version of nameToASCII(), same behavior.
+ *
+ * @param name Input domain name
+ * @param dest Destination byte sink; Flush()ed if successful
+ * @param info Output container of IDNA processing details.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return dest
+ * @stable ICU 4.6
+ */
+ virtual void
+ nameToASCII_UTF8(StringPiece name, ByteSink &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const;
+
+ /**
+ * Converts a whole domain name into its Unicode form for human-readable display.
+ * UTF-8 version of nameToUnicode(), same behavior.
+ *
+ * @param name Input domain name
+ * @param dest Destination byte sink; Flush()ed if successful
+ * @param info Output container of IDNA processing details.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return dest
+ * @stable ICU 4.6
+ */
+ virtual void
+ nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const;
+};
+
+class UTS46;
+
+/**
+ * Output container for IDNA processing errors.
+ * The IDNAInfo class is not suitable for subclassing.
+ * @stable ICU 4.6
+ */
+class U_COMMON_API IDNAInfo : public UMemory {
+public:
+ /**
+ * Constructor for stack allocation.
+ * @stable ICU 4.6
+ */
+ IDNAInfo() : errors(0), labelErrors(0), isTransDiff(false), isBiDi(false), isOkBiDi(true) {}
+ /**
+ * Were there IDNA processing errors?
+ * @return true if there were processing errors
+ * @stable ICU 4.6
+ */
+ UBool hasErrors() const { return errors!=0; }
+ /**
+ * Returns a bit set indicating IDNA processing errors.
+ * See UIDNA_ERROR_... constants in uidna.h.
+ * @return bit set of processing errors
+ * @stable ICU 4.6
+ */
+ uint32_t getErrors() const { return errors; }
+ /**
+ * Returns true if transitional and nontransitional processing produce different results.
+ * This is the case when the input label or domain name contains
+ * one or more deviation characters outside a Punycode label (see UTS #46).
+ * <ul>
+ * <li>With nontransitional processing, such characters are
+ * copied to the destination string.
+ * <li>With transitional processing, such characters are
+ * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
+ * </ul>
+ * @return true if transitional and nontransitional processing produce different results
+ * @stable ICU 4.6
+ */
+ UBool isTransitionalDifferent() const { return isTransDiff; }
+
+private:
+ friend class UTS46;
+
+ IDNAInfo(const IDNAInfo &other); // no copying
+ IDNAInfo &operator=(const IDNAInfo &other); // no copying
+
+ void reset() {
+ errors=labelErrors=0;
+ isTransDiff=false;
+ isBiDi=false;
+ isOkBiDi=true;
+ }
+
+ uint32_t errors, labelErrors;
+ UBool isTransDiff;
+ UBool isBiDi;
+ UBool isOkBiDi;
+};
+
+U_NAMESPACE_END
+
+#endif // UCONFIG_NO_IDNA
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __IDNA_H__
diff --git a/thirdparty/icu4c/common/unicode/localebuilder.h b/thirdparty/icu4c/common/unicode/localebuilder.h
new file mode 100644
index 0000000000..27a894de10
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/localebuilder.h
@@ -0,0 +1,311 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+#ifndef __LOCALEBUILDER_H__
+#define __LOCALEBUILDER_H__
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/locid.h"
+#include "unicode/localematcher.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+
+/**
+ * \file
+ * \brief C++ API: Builder API for Locale
+ */
+
+U_NAMESPACE_BEGIN
+class CharString;
+
+/**
+ * <code>LocaleBuilder</code> is used to build instances of <code>Locale</code>
+ * from values configured by the setters. Unlike the <code>Locale</code>
+ * constructors, the <code>LocaleBuilder</code> checks if a value configured by a
+ * setter satisfies the syntax requirements defined by the <code>Locale</code>
+ * class. A <code>Locale</code> object created by a <code>LocaleBuilder</code> is
+ * well-formed and can be transformed to a well-formed IETF BCP 47 language tag
+ * without losing information.
+ *
+ * <p>The following example shows how to create a <code>Locale</code> object
+ * with the <code>LocaleBuilder</code>.
+ * <blockquote>
+ * <pre>
+ * UErrorCode status = U_ZERO_ERROR;
+ * Locale aLocale = LocaleBuilder()
+ * .setLanguage("sr")
+ * .setScript("Latn")
+ * .setRegion("RS")
+ * .build(status);
+ * if (U_SUCCESS(status)) {
+ * // ...
+ * }
+ * </pre>
+ * </blockquote>
+ *
+ * <p>LocaleBuilders can be reused; <code>clear()</code> resets all
+ * fields to their default values.
+ *
+ * <p>LocaleBuilder tracks errors in an internal UErrorCode. For all setters,
+ * except setLanguageTag and setLocale, LocaleBuilder will return immediately
+ * if the internal UErrorCode is in error state.
+ * To reset internal state and error code, call clear method.
+ * The setLanguageTag and setLocale method will first clear the internal
+ * UErrorCode, then track the error of the validation of the input parameter
+ * into the internal UErrorCode.
+ *
+ * @stable ICU 64
+ */
+class U_COMMON_API LocaleBuilder : public UObject {
+public:
+ /**
+ * Constructs an empty LocaleBuilder. The default value of all
+ * fields, extensions, and private use information is the
+ * empty string.
+ *
+ * @stable ICU 64
+ */
+ LocaleBuilder();
+
+ /**
+ * Destructor
+ * @stable ICU 64
+ */
+ virtual ~LocaleBuilder();
+
+ /**
+ * Resets the <code>LocaleBuilder</code> to match the provided
+ * <code>locale</code>. Existing state is discarded.
+ *
+ * <p>All fields of the locale must be well-formed.
+ * <p>This method clears the internal UErrorCode.
+ *
+ * @param locale the locale
+ * @return This builder.
+ *
+ * @stable ICU 64
+ */
+ LocaleBuilder& setLocale(const Locale& locale);
+
+ /**
+ * Resets the LocaleBuilder to match the provided
+ * [Unicode Locale Identifier](http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_id) .
+ * Discards the existing state.
+ * The empty string causes the builder to be reset, like {@link #clear}.
+ * Legacy language tags (marked as “Type: grandfathered†in BCP 47)
+ * are converted to their canonical form before being processed.
+ * Otherwise, the <code>language tag</code> must be well-formed,
+ * or else the build() method will later report an U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * <p>This method clears the internal UErrorCode.
+ *
+ * @param tag the language tag, defined as
+ * [unicode_locale_id](http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_id).
+ * @return This builder.
+ * @stable ICU 64
+ */
+ LocaleBuilder& setLanguageTag(StringPiece tag);
+
+ /**
+ * Sets the language. If <code>language</code> is the empty string, the
+ * language in this <code>LocaleBuilder</code> is removed. Otherwise, the
+ * <code>language</code> must be well-formed, or else the build() method will
+ * later report an U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * <p>The syntax of language value is defined as
+ * [unicode_language_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_language_subtag).
+ *
+ * @param language the language
+ * @return This builder.
+ * @stable ICU 64
+ */
+ LocaleBuilder& setLanguage(StringPiece language);
+
+ /**
+ * Sets the script. If <code>script</code> is the empty string, the script in
+ * this <code>LocaleBuilder</code> is removed.
+ * Otherwise, the <code>script</code> must be well-formed, or else the build()
+ * method will later report an U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * <p>The script value is a four-letter script code as
+ * [unicode_script_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_script_subtag)
+ * defined by ISO 15924
+ *
+ * @param script the script
+ * @return This builder.
+ * @stable ICU 64
+ */
+ LocaleBuilder& setScript(StringPiece script);
+
+ /**
+ * Sets the region. If region is the empty string, the region in this
+ * <code>LocaleBuilder</code> is removed. Otherwise, the <code>region</code>
+ * must be well-formed, or else the build() method will later report an
+ * U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * <p>The region value is defined by
+ * [unicode_region_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_region_subtag)
+ * as a two-letter ISO 3166 code or a three-digit UN M.49 area code.
+ *
+ * <p>The region value in the <code>Locale</code> created by the
+ * <code>LocaleBuilder</code> is always normalized to upper case.
+ *
+ * @param region the region
+ * @return This builder.
+ * @stable ICU 64
+ */
+ LocaleBuilder& setRegion(StringPiece region);
+
+ /**
+ * Sets the variant. If variant is the empty string, the variant in this
+ * <code>LocaleBuilder</code> is removed. Otherwise, the <code>variant</code>
+ * must be well-formed, or else the build() method will later report an
+ * U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * <p><b>Note:</b> This method checks if <code>variant</code>
+ * satisfies the
+ * [unicode_variant_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_variant_subtag)
+ * syntax requirements, and normalizes the value to lowercase letters. However,
+ * the <code>Locale</code> class does not impose any syntactic
+ * restriction on variant. To set an ill-formed variant, use a Locale constructor.
+ * If there are multiple unicode_variant_subtag, the caller must concatenate
+ * them with '-' as separator (ex: "foobar-fibar").
+ *
+ * @param variant the variant
+ * @return This builder.
+ * @stable ICU 64
+ */
+ LocaleBuilder& setVariant(StringPiece variant);
+
+ /**
+ * Sets the extension for the given key. If the value is the empty string,
+ * the extension is removed. Otherwise, the <code>key</code> and
+ * <code>value</code> must be well-formed, or else the build() method will
+ * later report an U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * <p><b>Note:</b> The key ('u') is used for the Unicode locale extension.
+ * Setting a value for this key replaces any existing Unicode locale key/type
+ * pairs with those defined in the extension.
+ *
+ * <p><b>Note:</b> The key ('x') is used for the private use code. To be
+ * well-formed, the value for this key needs only to have subtags of one to
+ * eight alphanumeric characters, not two to eight as in the general case.
+ *
+ * @param key the extension key
+ * @param value the extension value
+ * @return This builder.
+ * @stable ICU 64
+ */
+ LocaleBuilder& setExtension(char key, StringPiece value);
+
+ /**
+ * Sets the Unicode locale keyword type for the given key. If the type
+ * StringPiece is constructed with a nullptr, the keyword is removed.
+ * If the type is the empty string, the keyword is set without type subtags.
+ * Otherwise, the key and type must be well-formed, or else the build()
+ * method will later report an U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * <p>Keys and types are converted to lower case.
+ *
+ * <p><b>Note</b>:Setting the 'u' extension via {@link #setExtension}
+ * replaces all Unicode locale keywords with those defined in the
+ * extension.
+ *
+ * @param key the Unicode locale key
+ * @param type the Unicode locale type
+ * @return This builder.
+ * @stable ICU 64
+ */
+ LocaleBuilder& setUnicodeLocaleKeyword(
+ StringPiece key, StringPiece type);
+
+ /**
+ * Adds a unicode locale attribute, if not already present, otherwise
+ * has no effect. The attribute must not be empty string and must be
+ * well-formed or U_ILLEGAL_ARGUMENT_ERROR will be set to status
+ * during the build() call.
+ *
+ * @param attribute the attribute
+ * @return This builder.
+ * @stable ICU 64
+ */
+ LocaleBuilder& addUnicodeLocaleAttribute(StringPiece attribute);
+
+ /**
+ * Removes a unicode locale attribute, if present, otherwise has no
+ * effect. The attribute must not be empty string and must be well-formed
+ * or U_ILLEGAL_ARGUMENT_ERROR will be set to status during the build() call.
+ *
+ * <p>Attribute comparison for removal is case-insensitive.
+ *
+ * @param attribute the attribute
+ * @return This builder.
+ * @stable ICU 64
+ */
+ LocaleBuilder& removeUnicodeLocaleAttribute(StringPiece attribute);
+
+ /**
+ * Resets the builder to its initial, empty state.
+ * <p>This method clears the internal UErrorCode.
+ *
+ * @return this builder
+ * @stable ICU 64
+ */
+ LocaleBuilder& clear();
+
+ /**
+ * Resets the extensions to their initial, empty state.
+ * Language, script, region and variant are unchanged.
+ *
+ * @return this builder
+ * @stable ICU 64
+ */
+ LocaleBuilder& clearExtensions();
+
+ /**
+ * Returns an instance of <code>Locale</code> created from the fields set
+ * on this builder.
+ * If any set methods or during the build() call require memory allocation
+ * but fail U_MEMORY_ALLOCATION_ERROR will be set to status.
+ * If any of the fields set by the setters are not well-formed, the status
+ * will be set to U_ILLEGAL_ARGUMENT_ERROR. The state of the builder will
+ * not change after the build() call and the caller is free to keep using
+ * the same builder to build more locales.
+ *
+ * @return a new Locale
+ * @stable ICU 64
+ */
+ Locale build(UErrorCode& status);
+
+ /**
+ * Sets the UErrorCode if an error occurred while recording sets.
+ * Preserves older error codes in the outErrorCode.
+ * @param outErrorCode Set to an error code that occurred while setting subtags.
+ * Unchanged if there is no such error or if outErrorCode
+ * already contained an error.
+ * @return true if U_FAILURE(outErrorCode)
+ * @stable ICU 65
+ */
+ UBool copyErrorTo(UErrorCode &outErrorCode) const;
+
+private:
+ friend class LocaleMatcher::Result;
+
+ void copyExtensionsFrom(const Locale& src, UErrorCode& errorCode);
+
+ UErrorCode status_;
+ char language_[9];
+ char script_[5];
+ char region_[4];
+ CharString *variant_; // Pointer not object so we need not #include internal charstr.h.
+ icu::Locale *extensions_; // Pointer not object. Storage for all other fields.
+
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __LOCALEBUILDER_H__
diff --git a/thirdparty/icu4c/common/unicode/localematcher.h b/thirdparty/icu4c/common/unicode/localematcher.h
new file mode 100644
index 0000000000..63a68b0b7f
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/localematcher.h
@@ -0,0 +1,720 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// localematcher.h
+// created: 2019may08 Markus W. Scherer
+
+#ifndef __LOCALEMATCHER_H__
+#define __LOCALEMATCHER_H__
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/locid.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+
+/**
+ * \file
+ * \brief C++ API: Locale matcher: User's desired locales vs. application's supported locales.
+ */
+
+/**
+ * Builder option for whether the language subtag or the script subtag is most important.
+ *
+ * @see LocaleMatcher::Builder#setFavorSubtag(ULocMatchFavorSubtag)
+ * @stable ICU 65
+ */
+enum ULocMatchFavorSubtag {
+ /**
+ * Language differences are most important, then script differences, then region differences.
+ * (This is the default behavior.)
+ *
+ * @stable ICU 65
+ */
+ ULOCMATCH_FAVOR_LANGUAGE,
+ /**
+ * Makes script differences matter relatively more than language differences.
+ *
+ * @stable ICU 65
+ */
+ ULOCMATCH_FAVOR_SCRIPT
+};
+#ifndef U_IN_DOXYGEN
+typedef enum ULocMatchFavorSubtag ULocMatchFavorSubtag;
+#endif
+
+/**
+ * Builder option for whether all desired locales are treated equally or
+ * earlier ones are preferred.
+ *
+ * @see LocaleMatcher::Builder#setDemotionPerDesiredLocale(ULocMatchDemotion)
+ * @stable ICU 65
+ */
+enum ULocMatchDemotion {
+ /**
+ * All desired locales are treated equally.
+ *
+ * @stable ICU 65
+ */
+ ULOCMATCH_DEMOTION_NONE,
+ /**
+ * Earlier desired locales are preferred.
+ *
+ * <p>From each desired locale to the next,
+ * the distance to any supported locale is increased by an additional amount
+ * which is at least as large as most region mismatches.
+ * A later desired locale has to have a better match with some supported locale
+ * due to more than merely having the same region subtag.
+ *
+ * <p>For example: <code>Supported={en, sv} desired=[en-GB, sv]</code>
+ * yields <code>Result(en-GB, en)</code> because
+ * with the demotion of sv its perfect match is no better than
+ * the region distance between the earlier desired locale en-GB and en=en-US.
+ *
+ * <p>Notes:
+ * <ul>
+ * <li>In some cases, language and/or script differences can be as small as
+ * the typical region difference. (Example: sr-Latn vs. sr-Cyrl)
+ * <li>It is possible for certain region differences to be larger than usual,
+ * and larger than the demotion.
+ * (As of CLDR 35 there is no such case, but
+ * this is possible in future versions of the data.)
+ * </ul>
+ *
+ * @stable ICU 65
+ */
+ ULOCMATCH_DEMOTION_REGION
+};
+#ifndef U_IN_DOXYGEN
+typedef enum ULocMatchDemotion ULocMatchDemotion;
+#endif
+
+#ifndef U_FORCE_HIDE_DRAFT_API
+
+/**
+ * Builder option for whether to include or ignore one-way (fallback) match data.
+ * The LocaleMatcher uses CLDR languageMatch data which includes fallback (oneway=true) entries.
+ * Sometimes it is desirable to ignore those.
+ *
+ * <p>For example, consider a web application with the UI in a given language,
+ * with a link to another, related web app.
+ * The link should include the UI language, and the target server may also use
+ * the client’s Accept-Language header data.
+ * The target server has its own list of supported languages.
+ * One may want to favor UI language consistency, that is,
+ * if there is a decent match for the original UI language, we want to use it,
+ * but not if it is merely a fallback.
+ *
+ * @see LocaleMatcher::Builder#setDirection(ULocMatchDirection)
+ * @draft ICU 67
+ */
+enum ULocMatchDirection {
+ /**
+ * Locale matching includes one-way matches such as Breton→French. (default)
+ *
+ * @draft ICU 67
+ */
+ ULOCMATCH_DIRECTION_WITH_ONE_WAY,
+ /**
+ * Locale matching limited to two-way matches including e.g. Danish↔Norwegian
+ * but ignoring one-way matches.
+ *
+ * @draft ICU 67
+ */
+ ULOCMATCH_DIRECTION_ONLY_TWO_WAY
+};
+#ifndef U_IN_DOXYGEN
+typedef enum ULocMatchDirection ULocMatchDirection;
+#endif
+
+#endif // U_FORCE_HIDE_DRAFT_API
+
+struct UHashtable;
+
+U_NAMESPACE_BEGIN
+
+struct LSR;
+
+class LocaleDistance;
+class LocaleLsrIterator;
+class UVector;
+class XLikelySubtags;
+
+/**
+ * Immutable class that picks the best match between a user's desired locales and
+ * an application's supported locales.
+ * Movable but not copyable.
+ *
+ * <p>Example:
+ * <pre>
+ * UErrorCode errorCode = U_ZERO_ERROR;
+ * LocaleMatcher matcher = LocaleMatcher::Builder().setSupportedLocales("fr, en-GB, en").build(errorCode);
+ * Locale *bestSupported = matcher.getBestLocale(Locale.US, errorCode); // "en"
+ * </pre>
+ *
+ * <p>A matcher takes into account when languages are close to one another,
+ * such as Danish and Norwegian,
+ * and when regional variants are close, like en-GB and en-AU as opposed to en-US.
+ *
+ * <p>If there are multiple supported locales with the same (language, script, region)
+ * likely subtags, then the current implementation returns the first of those locales.
+ * It ignores variant subtags (except for pseudolocale variants) and extensions.
+ * This may change in future versions.
+ *
+ * <p>For example, the current implementation does not distinguish between
+ * de, de-DE, de-Latn, de-1901, de-u-co-phonebk.
+ *
+ * <p>If you prefer one equivalent locale over another, then provide only the preferred one,
+ * or place it earlier in the list of supported locales.
+ *
+ * <p>Otherwise, the order of supported locales may have no effect on the best-match results.
+ * The current implementation compares each desired locale with supported locales
+ * in the following order:
+ * 1. Default locale, if supported;
+ * 2. CLDR "paradigm locales" like en-GB and es-419;
+ * 3. other supported locales.
+ * This may change in future versions.
+ *
+ * <p>Often a product will just need one matcher instance, built with the languages
+ * that it supports. However, it may want multiple instances with different
+ * default languages based on additional information, such as the domain.
+ *
+ * <p>This class is not intended for public subclassing.
+ *
+ * @stable ICU 65
+ */
+class U_COMMON_API LocaleMatcher : public UMemory {
+public:
+ /**
+ * Data for the best-matching pair of a desired and a supported locale.
+ * Movable but not copyable.
+ *
+ * @stable ICU 65
+ */
+ class U_COMMON_API Result : public UMemory {
+ public:
+ /**
+ * Move constructor; might modify the source.
+ * This object will have the same contents that the source object had.
+ *
+ * @param src Result to move contents from.
+ * @stable ICU 65
+ */
+ Result(Result &&src) U_NOEXCEPT;
+
+ /**
+ * Destructor.
+ *
+ * @stable ICU 65
+ */
+ ~Result();
+
+ /**
+ * Move assignment; might modify the source.
+ * This object will have the same contents that the source object had.
+ *
+ * @param src Result to move contents from.
+ * @stable ICU 65
+ */
+ Result &operator=(Result &&src) U_NOEXCEPT;
+
+ /**
+ * Returns the best-matching desired locale.
+ * nullptr if the list of desired locales is empty or if none matched well enough.
+ *
+ * @return the best-matching desired locale, or nullptr.
+ * @stable ICU 65
+ */
+ inline const Locale *getDesiredLocale() const { return desiredLocale; }
+
+ /**
+ * Returns the best-matching supported locale.
+ * If none matched well enough, this is the default locale.
+ * The default locale is nullptr if Builder::setNoDefaultLocale() was called,
+ * or if the list of supported locales is empty and no explicit default locale is set.
+ *
+ * @return the best-matching supported locale, or nullptr.
+ * @stable ICU 65
+ */
+ inline const Locale *getSupportedLocale() const { return supportedLocale; }
+
+ /**
+ * Returns the index of the best-matching desired locale in the input Iterable order.
+ * -1 if the list of desired locales is empty or if none matched well enough.
+ *
+ * @return the index of the best-matching desired locale, or -1.
+ * @stable ICU 65
+ */
+ inline int32_t getDesiredIndex() const { return desiredIndex; }
+
+ /**
+ * Returns the index of the best-matching supported locale in the
+ * constructor’s or builder’s input order (“set†Collection plus “added†locales).
+ * If the matcher was built from a locale list string, then the iteration order is that
+ * of a LocalePriorityList built from the same string.
+ * -1 if the list of supported locales is empty or if none matched well enough.
+ *
+ * @return the index of the best-matching supported locale, or -1.
+ * @stable ICU 65
+ */
+ inline int32_t getSupportedIndex() const { return supportedIndex; }
+
+ /**
+ * Takes the best-matching supported locale and adds relevant fields of the
+ * best-matching desired locale, such as the -t- and -u- extensions.
+ * May replace some fields of the supported locale.
+ * The result is the locale that should be used for date and number formatting, collation, etc.
+ * Returns the root locale if getSupportedLocale() returns nullptr.
+ *
+ * <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, resolved locale=ar-SA-u-nu-latn
+ *
+ * @return a locale combining the best-matching desired and supported locales.
+ * @stable ICU 65
+ */
+ Locale makeResolvedLocale(UErrorCode &errorCode) const;
+
+ private:
+ Result(const Locale *desired, const Locale *supported,
+ int32_t desIndex, int32_t suppIndex, UBool owned) :
+ desiredLocale(desired), supportedLocale(supported),
+ desiredIndex(desIndex), supportedIndex(suppIndex),
+ desiredIsOwned(owned) {}
+
+ Result(const Result &other) = delete;
+ Result &operator=(const Result &other) = delete;
+
+ const Locale *desiredLocale;
+ const Locale *supportedLocale;
+ int32_t desiredIndex;
+ int32_t supportedIndex;
+ UBool desiredIsOwned;
+
+ friend class LocaleMatcher;
+ };
+
+ /**
+ * LocaleMatcher builder.
+ * Movable but not copyable.
+ *
+ * @stable ICU 65
+ */
+ class U_COMMON_API Builder : public UMemory {
+ public:
+ /**
+ * Constructs a builder used in chaining parameters for building a LocaleMatcher.
+ *
+ * @return a new Builder object
+ * @stable ICU 65
+ */
+ Builder() {}
+
+ /**
+ * Move constructor; might modify the source.
+ * This builder will have the same contents that the source builder had.
+ *
+ * @param src Builder to move contents from.
+ * @stable ICU 65
+ */
+ Builder(Builder &&src) U_NOEXCEPT;
+
+ /**
+ * Destructor.
+ *
+ * @stable ICU 65
+ */
+ ~Builder();
+
+ /**
+ * Move assignment; might modify the source.
+ * This builder will have the same contents that the source builder had.
+ *
+ * @param src Builder to move contents from.
+ * @stable ICU 65
+ */
+ Builder &operator=(Builder &&src) U_NOEXCEPT;
+
+ /**
+ * Parses an Accept-Language string
+ * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>),
+ * such as "af, en, fr;q=0.9", and sets the supported locales accordingly.
+ * Allows whitespace in more places but does not allow "*".
+ * Clears any previously set/added supported locales first.
+ *
+ * @param locales the Accept-Language string of locales to set
+ * @return this Builder object
+ * @stable ICU 65
+ */
+ Builder &setSupportedLocalesFromListString(StringPiece locales);
+
+ /**
+ * Copies the supported locales, preserving iteration order.
+ * Clears any previously set/added supported locales first.
+ * Duplicates are allowed, and are not removed.
+ *
+ * @param locales the list of locale
+ * @return this Builder object
+ * @stable ICU 65
+ */
+ Builder &setSupportedLocales(Locale::Iterator &locales);
+
+ /**
+ * Copies the supported locales from the begin/end range, preserving iteration order.
+ * Clears any previously set/added supported locales first.
+ * Duplicates are allowed, and are not removed.
+ *
+ * Each of the iterator parameter values must be an
+ * input iterator whose value is convertible to const Locale &.
+ *
+ * @param begin Start of range.
+ * @param end Exclusive end of range.
+ * @return this Builder object
+ * @stable ICU 65
+ */
+ template<typename Iter>
+ Builder &setSupportedLocales(Iter begin, Iter end) {
+ if (U_FAILURE(errorCode_)) { return *this; }
+ clearSupportedLocales();
+ while (begin != end) {
+ addSupportedLocale(*begin++);
+ }
+ return *this;
+ }
+
+ /**
+ * Copies the supported locales from the begin/end range, preserving iteration order.
+ * Calls the converter to convert each *begin to a Locale or const Locale &.
+ * Clears any previously set/added supported locales first.
+ * Duplicates are allowed, and are not removed.
+ *
+ * Each of the iterator parameter values must be an
+ * input iterator whose value is convertible to const Locale &.
+ *
+ * @param begin Start of range.
+ * @param end Exclusive end of range.
+ * @param converter Converter from *begin to const Locale & or compatible.
+ * @return this Builder object
+ * @stable ICU 65
+ */
+ template<typename Iter, typename Conv>
+ Builder &setSupportedLocalesViaConverter(Iter begin, Iter end, Conv converter) {
+ if (U_FAILURE(errorCode_)) { return *this; }
+ clearSupportedLocales();
+ while (begin != end) {
+ addSupportedLocale(converter(*begin++));
+ }
+ return *this;
+ }
+
+ /**
+ * Adds another supported locale.
+ * Duplicates are allowed, and are not removed.
+ *
+ * @param locale another locale
+ * @return this Builder object
+ * @stable ICU 65
+ */
+ Builder &addSupportedLocale(const Locale &locale);
+
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * Sets no default locale.
+ * There will be no explicit or implicit default locale.
+ * If there is no good match, then the matcher will return nullptr for the
+ * best supported locale.
+ *
+ * @draft ICU 68
+ */
+ Builder &setNoDefaultLocale();
+#endif // U_HIDE_DRAFT_API
+
+ /**
+ * Sets the default locale; if nullptr, or if it is not set explicitly,
+ * then the first supported locale is used as the default locale.
+ * There is no default locale at all (nullptr will be returned instead)
+ * if setNoDefaultLocale() is called.
+ *
+ * @param defaultLocale the default locale (will be copied)
+ * @return this Builder object
+ * @stable ICU 65
+ */
+ Builder &setDefaultLocale(const Locale *defaultLocale);
+
+ /**
+ * If ULOCMATCH_FAVOR_SCRIPT, then the language differences are smaller than script
+ * differences.
+ * This is used in situations (such as maps) where
+ * it is better to fall back to the same script than a similar language.
+ *
+ * @param subtag the subtag to favor
+ * @return this Builder object
+ * @stable ICU 65
+ */
+ Builder &setFavorSubtag(ULocMatchFavorSubtag subtag);
+
+ /**
+ * Option for whether all desired locales are treated equally or
+ * earlier ones are preferred (this is the default).
+ *
+ * @param demotion the demotion per desired locale to set.
+ * @return this Builder object
+ * @stable ICU 65
+ */
+ Builder &setDemotionPerDesiredLocale(ULocMatchDemotion demotion);
+
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * Option for whether to include or ignore one-way (fallback) match data.
+ * By default, they are included.
+ *
+ * @param direction the match direction to set.
+ * @return this Builder object
+ * @draft ICU 67
+ */
+ Builder &setDirection(ULocMatchDirection direction) {
+ if (U_SUCCESS(errorCode_)) {
+ direction_ = direction;
+ }
+ return *this;
+ }
+#endif // U_HIDE_DRAFT_API
+
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * Sets the maximum distance for an acceptable match.
+ * The matcher will return a match for a pair of locales only if
+ * they match at least as well as the pair given here.
+ *
+ * For example, setMaxDistance(en-US, en-GB) limits matches to ones where the
+ * (desired, support) locales have a distance no greater than a region subtag difference.
+ * This is much stricter than the CLDR default.
+ *
+ * The details of locale matching are subject to changes in
+ * CLDR data and in the algorithm.
+ * Specifying a maximum distance in relative terms via a sample pair of locales
+ * insulates from changes that affect all distance metrics similarly,
+ * but some changes will necessarily affect relative distances between
+ * different pairs of locales.
+ *
+ * @param desired the desired locale for distance comparison.
+ * @param supported the supported locale for distance comparison.
+ * @return this Builder object
+ * @draft ICU 68
+ */
+ Builder &setMaxDistance(const Locale &desired, const Locale &supported);
+#endif // U_HIDE_DRAFT_API
+
+ /**
+ * Sets the UErrorCode if an error occurred while setting parameters.
+ * Preserves older error codes in the outErrorCode.
+ *
+ * @param outErrorCode Set to an error code if it does not contain one already
+ * and an error occurred while setting parameters.
+ * Otherwise unchanged.
+ * @return true if U_FAILURE(outErrorCode)
+ * @stable ICU 65
+ */
+ UBool copyErrorTo(UErrorCode &outErrorCode) const;
+
+ /**
+ * Builds and returns a new locale matcher.
+ * This builder can continue to be used.
+ *
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return LocaleMatcher
+ * @stable ICU 65
+ */
+ LocaleMatcher build(UErrorCode &errorCode) const;
+
+ private:
+ friend class LocaleMatcher;
+
+ Builder(const Builder &other) = delete;
+ Builder &operator=(const Builder &other) = delete;
+
+ void clearSupportedLocales();
+ bool ensureSupportedLocaleVector();
+
+ UErrorCode errorCode_ = U_ZERO_ERROR;
+ UVector *supportedLocales_ = nullptr;
+ int32_t thresholdDistance_ = -1;
+ ULocMatchDemotion demotion_ = ULOCMATCH_DEMOTION_REGION;
+ Locale *defaultLocale_ = nullptr;
+ bool withDefault_ = true;
+ ULocMatchFavorSubtag favor_ = ULOCMATCH_FAVOR_LANGUAGE;
+ ULocMatchDirection direction_ = ULOCMATCH_DIRECTION_WITH_ONE_WAY;
+ Locale *maxDistanceDesired_ = nullptr;
+ Locale *maxDistanceSupported_ = nullptr;
+ };
+
+ // FYI No public LocaleMatcher constructors in C++; use the Builder.
+
+ /**
+ * Move copy constructor; might modify the source.
+ * This matcher will have the same settings that the source matcher had.
+ * @param src source matcher
+ * @stable ICU 65
+ */
+ LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT;
+
+ /**
+ * Destructor.
+ * @stable ICU 65
+ */
+ ~LocaleMatcher();
+
+ /**
+ * Move assignment operator; might modify the source.
+ * This matcher will have the same settings that the source matcher had.
+ * The behavior is undefined if *this and src are the same object.
+ * @param src source matcher
+ * @return *this
+ * @stable ICU 65
+ */
+ LocaleMatcher &operator=(LocaleMatcher &&src) U_NOEXCEPT;
+
+ /**
+ * Returns the supported locale which best matches the desired locale.
+ *
+ * @param desiredLocale Typically a user's language.
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return the best-matching supported locale.
+ * @stable ICU 65
+ */
+ const Locale *getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const;
+
+ /**
+ * Returns the supported locale which best matches one of the desired locales.
+ *
+ * @param desiredLocales Typically a user's languages, in order of preference (descending).
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return the best-matching supported locale.
+ * @stable ICU 65
+ */
+ const Locale *getBestMatch(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const;
+
+ /**
+ * Parses an Accept-Language string
+ * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>),
+ * such as "af, en, fr;q=0.9",
+ * and returns the supported locale which best matches one of the desired locales.
+ * Allows whitespace in more places but does not allow "*".
+ *
+ * @param desiredLocaleList Typically a user's languages, as an Accept-Language string.
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return the best-matching supported locale.
+ * @stable ICU 65
+ */
+ const Locale *getBestMatchForListString(StringPiece desiredLocaleList, UErrorCode &errorCode) const;
+
+ /**
+ * Returns the best match between the desired locale and the supported locales.
+ * If the result's desired locale is not nullptr, then it is the address of the input locale.
+ * It has not been cloned.
+ *
+ * @param desiredLocale Typically a user's language.
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return the best-matching pair of the desired and a supported locale.
+ * @stable ICU 65
+ */
+ Result getBestMatchResult(const Locale &desiredLocale, UErrorCode &errorCode) const;
+
+ /**
+ * Returns the best match between the desired and supported locales.
+ * If the result's desired locale is not nullptr, then it is a clone of
+ * the best-matching desired locale. The Result object owns the clone.
+ *
+ * @param desiredLocales Typically a user's languages, in order of preference (descending).
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return the best-matching pair of a desired and a supported locale.
+ * @stable ICU 65
+ */
+ Result getBestMatchResult(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const;
+
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * Returns true if the pair of locales matches acceptably.
+ * This is influenced by Builder options such as setDirection(), setFavorSubtag(),
+ * and setMaxDistance().
+ *
+ * @param desired The desired locale.
+ * @param supported The supported locale.
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return true if the pair of locales matches acceptably.
+ * @draft ICU 68
+ */
+ UBool isMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const;
+#endif // U_HIDE_DRAFT_API
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * Returns a fraction between 0 and 1, where 1 means that the languages are a
+ * perfect match, and 0 means that they are completely different.
+ *
+ * <p>This is mostly an implementation detail, and the precise values may change over time.
+ * The implementation may use either the maximized forms or the others ones, or both.
+ * The implementation may or may not rely on the forms to be consistent with each other.
+ *
+ * <p>Callers should construct and use a matcher rather than match pairs of locales directly.
+ *
+ * @param desired Desired locale.
+ * @param supported Supported locale.
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return value between 0 and 1, inclusive.
+ * @internal (has a known user)
+ */
+ double internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const;
+#endif // U_HIDE_INTERNAL_API
+
+private:
+ LocaleMatcher(const Builder &builder, UErrorCode &errorCode);
+ LocaleMatcher(const LocaleMatcher &other) = delete;
+ LocaleMatcher &operator=(const LocaleMatcher &other) = delete;
+
+ int32_t putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength, UErrorCode &errorCode);
+
+ int32_t getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter, UErrorCode &errorCode) const;
+
+ const XLikelySubtags &likelySubtags;
+ const LocaleDistance &localeDistance;
+ int32_t thresholdDistance;
+ int32_t demotionPerDesiredLocale;
+ ULocMatchFavorSubtag favorSubtag;
+ ULocMatchDirection direction;
+
+ // These are in input order.
+ const Locale ** supportedLocales;
+ LSR *lsrs;
+ int32_t supportedLocalesLength;
+ // These are in preference order: 1. Default locale 2. paradigm locales 3. others.
+ UHashtable *supportedLsrToIndex; // Map<LSR, Integer> stores index+1 because 0 is "not found"
+ // Array versions of the supportedLsrToIndex keys and values.
+ // The distance lookup loops over the supportedLSRs and returns the index of the best match.
+ const LSR **supportedLSRs;
+ int32_t *supportedIndexes;
+ int32_t supportedLSRsLength;
+ Locale *ownedDefaultLocale;
+ const Locale *defaultLocale;
+};
+
+U_NAMESPACE_END
+
+#endif // U_SHOW_CPLUSPLUS_API
+#endif // __LOCALEMATCHER_H__
diff --git a/thirdparty/icu4c/common/unicode/localpointer.h b/thirdparty/icu4c/common/unicode/localpointer.h
new file mode 100644
index 0000000000..2a65f2d382
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/localpointer.h
@@ -0,0 +1,595 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2009-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: localpointer.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2009nov13
+* created by: Markus W. Scherer
+*/
+
+#ifndef __LOCALPOINTER_H__
+#define __LOCALPOINTER_H__
+
+/**
+ * \file
+ * \brief C++ API: "Smart pointers" for use with and in ICU4C C++ code.
+ *
+ * These classes are inspired by
+ * - std::auto_ptr
+ * - boost::scoped_ptr & boost::scoped_array
+ * - Taligent Safe Pointers (TOnlyPointerTo)
+ *
+ * but none of those provide for all of the goals for ICU smart pointers:
+ * - Smart pointer owns the object and releases it when it goes out of scope.
+ * - No transfer of ownership via copy/assignment to reduce misuse. Simpler & more robust.
+ * - ICU-compatible: No exceptions.
+ * - Need to be able to orphan/release the pointer and its ownership.
+ * - Need variants for normal C++ object pointers, C++ arrays, and ICU C service objects.
+ *
+ * For details see http://site.icu-project.org/design/cpp/scoped_ptr
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include <memory>
+
+U_NAMESPACE_BEGIN
+
+/**
+ * "Smart pointer" base class; do not use directly: use LocalPointer etc.
+ *
+ * Base class for smart pointer classes that do not throw exceptions.
+ *
+ * Do not use this base class directly, since it does not delete its pointer.
+ * A subclass must implement methods that delete the pointer:
+ * Destructor and adoptInstead().
+ *
+ * There is no operator T *() provided because the programmer must decide
+ * whether to use getAlias() (without transfer of ownership) or orphan()
+ * (with transfer of ownership and NULLing of the pointer).
+ *
+ * @see LocalPointer
+ * @see LocalArray
+ * @see U_DEFINE_LOCAL_OPEN_POINTER
+ * @stable ICU 4.4
+ */
+template<typename T>
+class LocalPointerBase {
+public:
+ // No heap allocation. Use only on the stack.
+ static void* U_EXPORT2 operator new(size_t) = delete;
+ static void* U_EXPORT2 operator new[](size_t) = delete;
+#if U_HAVE_PLACEMENT_NEW
+ static void* U_EXPORT2 operator new(size_t, void*) = delete;
+#endif
+
+ /**
+ * Constructor takes ownership.
+ * @param p simple pointer to an object that is adopted
+ * @stable ICU 4.4
+ */
+ explicit LocalPointerBase(T *p=NULL) : ptr(p) {}
+ /**
+ * Destructor deletes the object it owns.
+ * Subclass must override: Base class does nothing.
+ * @stable ICU 4.4
+ */
+ ~LocalPointerBase() { /* delete ptr; */ }
+ /**
+ * NULL check.
+ * @return true if ==NULL
+ * @stable ICU 4.4
+ */
+ UBool isNull() const { return ptr==NULL; }
+ /**
+ * NULL check.
+ * @return true if !=NULL
+ * @stable ICU 4.4
+ */
+ UBool isValid() const { return ptr!=NULL; }
+ /**
+ * Comparison with a simple pointer, so that existing code
+ * with ==NULL need not be changed.
+ * @param other simple pointer for comparison
+ * @return true if this pointer value equals other
+ * @stable ICU 4.4
+ */
+ bool operator==(const T *other) const { return ptr==other; }
+ /**
+ * Comparison with a simple pointer, so that existing code
+ * with !=NULL need not be changed.
+ * @param other simple pointer for comparison
+ * @return true if this pointer value differs from other
+ * @stable ICU 4.4
+ */
+ bool operator!=(const T *other) const { return ptr!=other; }
+ /**
+ * Access without ownership change.
+ * @return the pointer value
+ * @stable ICU 4.4
+ */
+ T *getAlias() const { return ptr; }
+ /**
+ * Access without ownership change.
+ * @return the pointer value as a reference
+ * @stable ICU 4.4
+ */
+ T &operator*() const { return *ptr; }
+ /**
+ * Access without ownership change.
+ * @return the pointer value
+ * @stable ICU 4.4
+ */
+ T *operator->() const { return ptr; }
+ /**
+ * Gives up ownership; the internal pointer becomes NULL.
+ * @return the pointer value;
+ * caller becomes responsible for deleting the object
+ * @stable ICU 4.4
+ */
+ T *orphan() {
+ T *p=ptr;
+ ptr=NULL;
+ return p;
+ }
+ /**
+ * Deletes the object it owns,
+ * and adopts (takes ownership of) the one passed in.
+ * Subclass must override: Base class does not delete the object.
+ * @param p simple pointer to an object that is adopted
+ * @stable ICU 4.4
+ */
+ void adoptInstead(T *p) {
+ // delete ptr;
+ ptr=p;
+ }
+protected:
+ /**
+ * Actual pointer.
+ * @internal
+ */
+ T *ptr;
+private:
+ // No comparison operators with other LocalPointerBases.
+ bool operator==(const LocalPointerBase<T> &other);
+ bool operator!=(const LocalPointerBase<T> &other);
+ // No ownership sharing: No copy constructor, no assignment operator.
+ LocalPointerBase(const LocalPointerBase<T> &other);
+ void operator=(const LocalPointerBase<T> &other);
+};
+
+/**
+ * "Smart pointer" class, deletes objects via the standard C++ delete operator.
+ * For most methods see the LocalPointerBase base class.
+ *
+ * Usage example:
+ * \code
+ * LocalPointer<UnicodeString> s(new UnicodeString((UChar32)0x50005));
+ * int32_t length=s->length(); // 2
+ * char16_t lead=s->charAt(0); // 0xd900
+ * if(some condition) { return; } // no need to explicitly delete the pointer
+ * s.adoptInstead(new UnicodeString((char16_t)0xfffc));
+ * length=s->length(); // 1
+ * // no need to explicitly delete the pointer
+ * \endcode
+ *
+ * @see LocalPointerBase
+ * @stable ICU 4.4
+ */
+template<typename T>
+class LocalPointer : public LocalPointerBase<T> {
+public:
+ using LocalPointerBase<T>::operator*;
+ using LocalPointerBase<T>::operator->;
+ /**
+ * Constructor takes ownership.
+ * @param p simple pointer to an object that is adopted
+ * @stable ICU 4.4
+ */
+ explicit LocalPointer(T *p=NULL) : LocalPointerBase<T>(p) {}
+ /**
+ * Constructor takes ownership and reports an error if NULL.
+ *
+ * This constructor is intended to be used with other-class constructors
+ * that may report a failure UErrorCode,
+ * so that callers need to check only for U_FAILURE(errorCode)
+ * and not also separately for isNull().
+ *
+ * @param p simple pointer to an object that is adopted
+ * @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
+ * if p==NULL and no other failure code had been set
+ * @stable ICU 55
+ */
+ LocalPointer(T *p, UErrorCode &errorCode) : LocalPointerBase<T>(p) {
+ if(p==NULL && U_SUCCESS(errorCode)) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ }
+ }
+ /**
+ * Move constructor, leaves src with isNull().
+ * @param src source smart pointer
+ * @stable ICU 56
+ */
+ LocalPointer(LocalPointer<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
+ src.ptr=NULL;
+ }
+
+ /**
+ * Constructs a LocalPointer from a C++11 std::unique_ptr.
+ * The LocalPointer steals the object owned by the std::unique_ptr.
+ *
+ * This constructor works via move semantics. If your std::unique_ptr is
+ * in a local variable, you must use std::move.
+ *
+ * @param p The std::unique_ptr from which the pointer will be stolen.
+ * @stable ICU 64
+ */
+ explicit LocalPointer(std::unique_ptr<T> &&p)
+ : LocalPointerBase<T>(p.release()) {}
+
+ /**
+ * Destructor deletes the object it owns.
+ * @stable ICU 4.4
+ */
+ ~LocalPointer() {
+ delete LocalPointerBase<T>::ptr;
+ }
+ /**
+ * Move assignment operator, leaves src with isNull().
+ * The behavior is undefined if *this and src are the same object.
+ * @param src source smart pointer
+ * @return *this
+ * @stable ICU 56
+ */
+ LocalPointer<T> &operator=(LocalPointer<T> &&src) U_NOEXCEPT {
+ delete LocalPointerBase<T>::ptr;
+ LocalPointerBase<T>::ptr=src.ptr;
+ src.ptr=NULL;
+ return *this;
+ }
+
+ /**
+ * Move-assign from an std::unique_ptr to this LocalPointer.
+ * Steals the pointer from the std::unique_ptr.
+ *
+ * @param p The std::unique_ptr from which the pointer will be stolen.
+ * @return *this
+ * @stable ICU 64
+ */
+ LocalPointer<T> &operator=(std::unique_ptr<T> &&p) U_NOEXCEPT {
+ adoptInstead(p.release());
+ return *this;
+ }
+
+ /**
+ * Swap pointers.
+ * @param other other smart pointer
+ * @stable ICU 56
+ */
+ void swap(LocalPointer<T> &other) U_NOEXCEPT {
+ T *temp=LocalPointerBase<T>::ptr;
+ LocalPointerBase<T>::ptr=other.ptr;
+ other.ptr=temp;
+ }
+ /**
+ * Non-member LocalPointer swap function.
+ * @param p1 will get p2's pointer
+ * @param p2 will get p1's pointer
+ * @stable ICU 56
+ */
+ friend inline void swap(LocalPointer<T> &p1, LocalPointer<T> &p2) U_NOEXCEPT {
+ p1.swap(p2);
+ }
+ /**
+ * Deletes the object it owns,
+ * and adopts (takes ownership of) the one passed in.
+ * @param p simple pointer to an object that is adopted
+ * @stable ICU 4.4
+ */
+ void adoptInstead(T *p) {
+ delete LocalPointerBase<T>::ptr;
+ LocalPointerBase<T>::ptr=p;
+ }
+ /**
+ * Deletes the object it owns,
+ * and adopts (takes ownership of) the one passed in.
+ *
+ * If U_FAILURE(errorCode), then the current object is retained and the new one deleted.
+ *
+ * If U_SUCCESS(errorCode) but the input pointer is NULL,
+ * then U_MEMORY_ALLOCATION_ERROR is set,
+ * the current object is deleted, and NULL is set.
+ *
+ * @param p simple pointer to an object that is adopted
+ * @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
+ * if p==NULL and no other failure code had been set
+ * @stable ICU 55
+ */
+ void adoptInsteadAndCheckErrorCode(T *p, UErrorCode &errorCode) {
+ if(U_SUCCESS(errorCode)) {
+ delete LocalPointerBase<T>::ptr;
+ LocalPointerBase<T>::ptr=p;
+ if(p==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ }
+ } else {
+ delete p;
+ }
+ }
+
+ /**
+ * Conversion operator to a C++11 std::unique_ptr.
+ * Disowns the object and gives it to the returned std::unique_ptr.
+ *
+ * This operator works via move semantics. If your LocalPointer is
+ * in a local variable, you must use std::move.
+ *
+ * @return An std::unique_ptr owning the pointer previously owned by this
+ * icu::LocalPointer.
+ * @stable ICU 64
+ */
+ operator std::unique_ptr<T> () && {
+ return std::unique_ptr<T>(LocalPointerBase<T>::orphan());
+ }
+};
+
+/**
+ * "Smart pointer" class, deletes objects via the C++ array delete[] operator.
+ * For most methods see the LocalPointerBase base class.
+ * Adds operator[] for array item access.
+ *
+ * Usage example:
+ * \code
+ * LocalArray<UnicodeString> a(new UnicodeString[2]);
+ * a[0].append((char16_t)0x61);
+ * if(some condition) { return; } // no need to explicitly delete the array
+ * a.adoptInstead(new UnicodeString[4]);
+ * a[3].append((char16_t)0x62).append((char16_t)0x63).reverse();
+ * // no need to explicitly delete the array
+ * \endcode
+ *
+ * @see LocalPointerBase
+ * @stable ICU 4.4
+ */
+template<typename T>
+class LocalArray : public LocalPointerBase<T> {
+public:
+ using LocalPointerBase<T>::operator*;
+ using LocalPointerBase<T>::operator->;
+ /**
+ * Constructor takes ownership.
+ * @param p simple pointer to an array of T objects that is adopted
+ * @stable ICU 4.4
+ */
+ explicit LocalArray(T *p=NULL) : LocalPointerBase<T>(p) {}
+ /**
+ * Constructor takes ownership and reports an error if NULL.
+ *
+ * This constructor is intended to be used with other-class constructors
+ * that may report a failure UErrorCode,
+ * so that callers need to check only for U_FAILURE(errorCode)
+ * and not also separately for isNull().
+ *
+ * @param p simple pointer to an array of T objects that is adopted
+ * @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
+ * if p==NULL and no other failure code had been set
+ * @stable ICU 56
+ */
+ LocalArray(T *p, UErrorCode &errorCode) : LocalPointerBase<T>(p) {
+ if(p==NULL && U_SUCCESS(errorCode)) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ }
+ }
+ /**
+ * Move constructor, leaves src with isNull().
+ * @param src source smart pointer
+ * @stable ICU 56
+ */
+ LocalArray(LocalArray<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
+ src.ptr=NULL;
+ }
+
+ /**
+ * Constructs a LocalArray from a C++11 std::unique_ptr of an array type.
+ * The LocalPointer steals the array owned by the std::unique_ptr.
+ *
+ * This constructor works via move semantics. If your std::unique_ptr is
+ * in a local variable, you must use std::move.
+ *
+ * @param p The std::unique_ptr from which the array will be stolen.
+ * @stable ICU 64
+ */
+ explicit LocalArray(std::unique_ptr<T[]> &&p)
+ : LocalPointerBase<T>(p.release()) {}
+
+ /**
+ * Destructor deletes the array it owns.
+ * @stable ICU 4.4
+ */
+ ~LocalArray() {
+ delete[] LocalPointerBase<T>::ptr;
+ }
+ /**
+ * Move assignment operator, leaves src with isNull().
+ * The behavior is undefined if *this and src are the same object.
+ * @param src source smart pointer
+ * @return *this
+ * @stable ICU 56
+ */
+ LocalArray<T> &operator=(LocalArray<T> &&src) U_NOEXCEPT {
+ delete[] LocalPointerBase<T>::ptr;
+ LocalPointerBase<T>::ptr=src.ptr;
+ src.ptr=NULL;
+ return *this;
+ }
+
+ /**
+ * Move-assign from an std::unique_ptr to this LocalPointer.
+ * Steals the array from the std::unique_ptr.
+ *
+ * @param p The std::unique_ptr from which the array will be stolen.
+ * @return *this
+ * @stable ICU 64
+ */
+ LocalArray<T> &operator=(std::unique_ptr<T[]> &&p) U_NOEXCEPT {
+ adoptInstead(p.release());
+ return *this;
+ }
+
+ /**
+ * Swap pointers.
+ * @param other other smart pointer
+ * @stable ICU 56
+ */
+ void swap(LocalArray<T> &other) U_NOEXCEPT {
+ T *temp=LocalPointerBase<T>::ptr;
+ LocalPointerBase<T>::ptr=other.ptr;
+ other.ptr=temp;
+ }
+ /**
+ * Non-member LocalArray swap function.
+ * @param p1 will get p2's pointer
+ * @param p2 will get p1's pointer
+ * @stable ICU 56
+ */
+ friend inline void swap(LocalArray<T> &p1, LocalArray<T> &p2) U_NOEXCEPT {
+ p1.swap(p2);
+ }
+ /**
+ * Deletes the array it owns,
+ * and adopts (takes ownership of) the one passed in.
+ * @param p simple pointer to an array of T objects that is adopted
+ * @stable ICU 4.4
+ */
+ void adoptInstead(T *p) {
+ delete[] LocalPointerBase<T>::ptr;
+ LocalPointerBase<T>::ptr=p;
+ }
+ /**
+ * Deletes the array it owns,
+ * and adopts (takes ownership of) the one passed in.
+ *
+ * If U_FAILURE(errorCode), then the current array is retained and the new one deleted.
+ *
+ * If U_SUCCESS(errorCode) but the input pointer is NULL,
+ * then U_MEMORY_ALLOCATION_ERROR is set,
+ * the current array is deleted, and NULL is set.
+ *
+ * @param p simple pointer to an array of T objects that is adopted
+ * @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
+ * if p==NULL and no other failure code had been set
+ * @stable ICU 56
+ */
+ void adoptInsteadAndCheckErrorCode(T *p, UErrorCode &errorCode) {
+ if(U_SUCCESS(errorCode)) {
+ delete[] LocalPointerBase<T>::ptr;
+ LocalPointerBase<T>::ptr=p;
+ if(p==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ }
+ } else {
+ delete[] p;
+ }
+ }
+ /**
+ * Array item access (writable).
+ * No index bounds check.
+ * @param i array index
+ * @return reference to the array item
+ * @stable ICU 4.4
+ */
+ T &operator[](ptrdiff_t i) const { return LocalPointerBase<T>::ptr[i]; }
+
+ /**
+ * Conversion operator to a C++11 std::unique_ptr.
+ * Disowns the object and gives it to the returned std::unique_ptr.
+ *
+ * This operator works via move semantics. If your LocalPointer is
+ * in a local variable, you must use std::move.
+ *
+ * @return An std::unique_ptr owning the pointer previously owned by this
+ * icu::LocalPointer.
+ * @stable ICU 64
+ */
+ operator std::unique_ptr<T[]> () && {
+ return std::unique_ptr<T[]>(LocalPointerBase<T>::orphan());
+ }
+};
+
+/**
+ * \def U_DEFINE_LOCAL_OPEN_POINTER
+ * "Smart pointer" definition macro, deletes objects via the closeFunction.
+ * Defines a subclass of LocalPointerBase which works just
+ * like LocalPointer<Type> except that this subclass will use the closeFunction
+ * rather than the C++ delete operator.
+ *
+ * Usage example:
+ * \code
+ * LocalUCaseMapPointer csm(ucasemap_open(localeID, options, &errorCode));
+ * utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(),
+ * utf8Out, (int32_t)sizeof(utf8Out),
+ * utf8In, utf8InLength, &errorCode);
+ * if(U_FAILURE(errorCode)) { return; } // no need to explicitly delete the UCaseMap
+ * \endcode
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \
+ class LocalPointerClassName : public LocalPointerBase<Type> { \
+ public: \
+ using LocalPointerBase<Type>::operator*; \
+ using LocalPointerBase<Type>::operator->; \
+ explicit LocalPointerClassName(Type *p=NULL) : LocalPointerBase<Type>(p) {} \
+ LocalPointerClassName(LocalPointerClassName &&src) U_NOEXCEPT \
+ : LocalPointerBase<Type>(src.ptr) { \
+ src.ptr=NULL; \
+ } \
+ /* TODO: Be agnostic of the deleter function signature from the user-provided std::unique_ptr? */ \
+ explicit LocalPointerClassName(std::unique_ptr<Type, decltype(&closeFunction)> &&p) \
+ : LocalPointerBase<Type>(p.release()) {} \
+ ~LocalPointerClassName() { if (ptr != NULL) { closeFunction(ptr); } } \
+ LocalPointerClassName &operator=(LocalPointerClassName &&src) U_NOEXCEPT { \
+ if (ptr != NULL) { closeFunction(ptr); } \
+ LocalPointerBase<Type>::ptr=src.ptr; \
+ src.ptr=NULL; \
+ return *this; \
+ } \
+ /* TODO: Be agnostic of the deleter function signature from the user-provided std::unique_ptr? */ \
+ LocalPointerClassName &operator=(std::unique_ptr<Type, decltype(&closeFunction)> &&p) { \
+ adoptInstead(p.release()); \
+ return *this; \
+ } \
+ void swap(LocalPointerClassName &other) U_NOEXCEPT { \
+ Type *temp=LocalPointerBase<Type>::ptr; \
+ LocalPointerBase<Type>::ptr=other.ptr; \
+ other.ptr=temp; \
+ } \
+ friend inline void swap(LocalPointerClassName &p1, LocalPointerClassName &p2) U_NOEXCEPT { \
+ p1.swap(p2); \
+ } \
+ void adoptInstead(Type *p) { \
+ if (ptr != NULL) { closeFunction(ptr); } \
+ ptr=p; \
+ } \
+ operator std::unique_ptr<Type, decltype(&closeFunction)> () && { \
+ return std::unique_ptr<Type, decltype(&closeFunction)>(LocalPointerBase<Type>::orphan(), closeFunction); \
+ } \
+ }
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+#endif /* __LOCALPOINTER_H__ */
diff --git a/thirdparty/icu4c/common/unicode/locdspnm.h b/thirdparty/icu4c/common/unicode/locdspnm.h
new file mode 100644
index 0000000000..4f06f85704
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/locdspnm.h
@@ -0,0 +1,211 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 2010-2016, International Business Machines Corporation and
+* others. All Rights Reserved.
+******************************************************************************
+*/
+
+#ifndef LOCDSPNM_H
+#define LOCDSPNM_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+/**
+ * \file
+ * \brief C++ API: Provides display names of Locale and its components.
+ */
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/locid.h"
+#include "unicode/strenum.h"
+#include "unicode/uscript.h"
+#include "unicode/uldnames.h"
+#include "unicode/udisplaycontext.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Returns display names of Locales and components of Locales. For
+ * more information on language, script, region, variant, key, and
+ * values, see Locale.
+ * @stable ICU 4.4
+ */
+class U_COMMON_API LocaleDisplayNames : public UObject {
+public:
+ /**
+ * Destructor.
+ * @stable ICU 4.4
+ */
+ virtual ~LocaleDisplayNames();
+
+ /**
+ * Convenience overload of
+ * {@link #createInstance(const Locale& locale, UDialectHandling dialectHandling)}
+ * that specifies STANDARD dialect handling.
+ * @param locale the display locale
+ * @return a LocaleDisplayNames instance
+ * @stable ICU 4.4
+ */
+ inline static LocaleDisplayNames* U_EXPORT2 createInstance(const Locale& locale);
+
+ /**
+ * Returns an instance of LocaleDisplayNames that returns names
+ * formatted for the provided locale, using the provided
+ * dialectHandling.
+ *
+ * @param locale the display locale
+ * @param dialectHandling how to select names for locales
+ * @return a LocaleDisplayNames instance
+ * @stable ICU 4.4
+ */
+ static LocaleDisplayNames* U_EXPORT2 createInstance(const Locale& locale,
+ UDialectHandling dialectHandling);
+
+ /**
+ * Returns an instance of LocaleDisplayNames that returns names formatted
+ * for the provided locale, using the provided UDisplayContext settings.
+ *
+ * @param locale the display locale
+ * @param contexts List of one or more context settings (e.g. for dialect
+ * handling, capitalization, etc.
+ * @param length Number of items in the contexts list
+ * @return a LocaleDisplayNames instance
+ * @stable ICU 51
+ */
+ static LocaleDisplayNames* U_EXPORT2 createInstance(const Locale& locale,
+ UDisplayContext *contexts, int32_t length);
+
+ // getters for state
+ /**
+ * Returns the locale used to determine the display names. This is
+ * not necessarily the same locale passed to {@link #createInstance}.
+ * @return the display locale
+ * @stable ICU 4.4
+ */
+ virtual const Locale& getLocale() const = 0;
+
+ /**
+ * Returns the dialect handling used in the display names.
+ * @return the dialect handling enum
+ * @stable ICU 4.4
+ */
+ virtual UDialectHandling getDialectHandling() const = 0;
+
+ /**
+ * Returns the UDisplayContext value for the specified UDisplayContextType.
+ * @param type the UDisplayContextType whose value to return
+ * @return the UDisplayContext for the specified type.
+ * @stable ICU 51
+ */
+ virtual UDisplayContext getContext(UDisplayContextType type) const = 0;
+
+ // names for entire locales
+ /**
+ * Returns the display name of the provided locale.
+ * @param locale the locale whose display name to return
+ * @param result receives the locale's display name
+ * @return the display name of the provided locale
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString& localeDisplayName(const Locale& locale,
+ UnicodeString& result) const = 0;
+
+ /**
+ * Returns the display name of the provided locale id.
+ * @param localeId the id of the locale whose display name to return
+ * @param result receives the locale's display name
+ * @return the display name of the provided locale
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString& localeDisplayName(const char* localeId,
+ UnicodeString& result) const = 0;
+
+ // names for components of a locale id
+ /**
+ * Returns the display name of the provided language code.
+ * @param lang the language code
+ * @param result receives the language code's display name
+ * @return the display name of the provided language code
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString& languageDisplayName(const char* lang,
+ UnicodeString& result) const = 0;
+
+ /**
+ * Returns the display name of the provided script code.
+ * @param script the script code
+ * @param result receives the script code's display name
+ * @return the display name of the provided script code
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString& scriptDisplayName(const char* script,
+ UnicodeString& result) const = 0;
+
+ /**
+ * Returns the display name of the provided script code.
+ * @param scriptCode the script code number
+ * @param result receives the script code's display name
+ * @return the display name of the provided script code
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString& scriptDisplayName(UScriptCode scriptCode,
+ UnicodeString& result) const = 0;
+
+ /**
+ * Returns the display name of the provided region code.
+ * @param region the region code
+ * @param result receives the region code's display name
+ * @return the display name of the provided region code
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString& regionDisplayName(const char* region,
+ UnicodeString& result) const = 0;
+
+ /**
+ * Returns the display name of the provided variant.
+ * @param variant the variant string
+ * @param result receives the variant's display name
+ * @return the display name of the provided variant
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString& variantDisplayName(const char* variant,
+ UnicodeString& result) const = 0;
+
+ /**
+ * Returns the display name of the provided locale key.
+ * @param key the locale key name
+ * @param result receives the locale key's display name
+ * @return the display name of the provided locale key
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString& keyDisplayName(const char* key,
+ UnicodeString& result) const = 0;
+
+ /**
+ * Returns the display name of the provided value (used with the provided key).
+ * @param key the locale key name
+ * @param value the locale key's value
+ * @param result receives the value's display name
+ * @return the display name of the provided value
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString& keyValueDisplayName(const char* key, const char* value,
+ UnicodeString& result) const = 0;
+};
+
+inline LocaleDisplayNames* LocaleDisplayNames::createInstance(const Locale& locale) {
+ return LocaleDisplayNames::createInstance(locale, ULDN_STANDARD_NAMES);
+}
+
+U_NAMESPACE_END
+
+#endif
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/locid.h b/thirdparty/icu4c/common/unicode/locid.h
new file mode 100644
index 0000000000..ba858d702a
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/locid.h
@@ -0,0 +1,1274 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1996-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* File locid.h
+*
+* Created by: Helena Shih
+*
+* Modification History:
+*
+* Date Name Description
+* 02/11/97 aliu Changed gLocPath to fgLocPath and added methods to
+* get and set it.
+* 04/02/97 aliu Made operator!= inline; fixed return value of getName().
+* 04/15/97 aliu Cleanup for AIX/Win32.
+* 04/24/97 aliu Numerous changes per code review.
+* 08/18/98 stephen Added tokenizeString(),changed getDisplayName()
+* 09/08/98 stephen Moved definition of kEmptyString for Mac Port
+* 11/09/99 weiv Added const char * getName() const;
+* 04/12/00 srl removing unicodestring api's and cached hash code
+* 08/10/01 grhoten Change the static Locales to accessor functions
+******************************************************************************
+*/
+
+#ifndef LOCID_H
+#define LOCID_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/bytestream.h"
+#include "unicode/localpointer.h"
+#include "unicode/strenum.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+#include "unicode/putil.h"
+#include "unicode/uloc.h"
+
+/**
+ * \file
+ * \brief C++ API: Locale ID object.
+ */
+
+U_NAMESPACE_BEGIN
+
+// Forward Declarations
+void U_CALLCONV locale_available_init(); /**< @internal */
+
+class StringEnumeration;
+class UnicodeString;
+
+/**
+ * A <code>Locale</code> object represents a specific geographical, political,
+ * or cultural region. An operation that requires a <code>Locale</code> to perform
+ * its task is called <em>locale-sensitive</em> and uses the <code>Locale</code>
+ * to tailor information for the user. For example, displaying a number
+ * is a locale-sensitive operation--the number should be formatted
+ * according to the customs/conventions of the user's native country,
+ * region, or culture.
+ *
+ * The Locale class is not suitable for subclassing.
+ *
+ * <P>
+ * You can create a <code>Locale</code> object using the constructor in
+ * this class:
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * Locale( const char* language,
+ * const char* country,
+ * const char* variant);
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ * The first argument to the constructors is a valid <STRONG>ISO
+ * Language Code.</STRONG> These codes are the lower-case two-letter
+ * codes as defined by ISO-639.
+ * You can find a full list of these codes at:
+ * <BR><a href ="http://www.loc.gov/standards/iso639-2/">
+ * http://www.loc.gov/standards/iso639-2/</a>
+ *
+ * <P>
+ * The second argument to the constructors is a valid <STRONG>ISO Country
+ * Code.</STRONG> These codes are the upper-case two-letter codes
+ * as defined by ISO-3166.
+ * You can find a full list of these codes at a number of sites, such as:
+ * <BR><a href="http://www.iso.org/iso/en/prods-services/iso3166ma/index.html">
+ * http://www.iso.org/iso/en/prods-services/iso3166ma/index.html</a>
+ *
+ * <P>
+ * The third constructor requires a third argument--the <STRONG>Variant.</STRONG>
+ * The Variant codes are vendor and browser-specific.
+ * For example, use REVISED for a language's revised script orthography, and POSIX for POSIX.
+ * Where there are two variants, separate them with an underscore, and
+ * put the most important one first. For
+ * example, a Traditional Spanish collation might be referenced, with
+ * "ES", "ES", "Traditional_POSIX".
+ *
+ * <P>
+ * Because a <code>Locale</code> object is just an identifier for a region,
+ * no validity check is performed when you construct a <code>Locale</code>.
+ * If you want to see whether particular resources are available for the
+ * <code>Locale</code> you construct, you must query those resources. For
+ * example, ask the <code>NumberFormat</code> for the locales it supports
+ * using its <code>getAvailableLocales</code> method.
+ * <BR><STRONG>Note:</STRONG> When you ask for a resource for a particular
+ * locale, you get back the best available match, not necessarily
+ * precisely what you asked for. For more information, look at
+ * <code>ResourceBundle</code>.
+ *
+ * <P>
+ * The <code>Locale</code> class provides a number of convenient constants
+ * that you can use to create <code>Locale</code> objects for commonly used
+ * locales. For example, the following refers to a <code>Locale</code> object
+ * for the United States:
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * Locale::getUS()
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ *
+ * <P>
+ * Once you've created a <code>Locale</code> you can query it for information about
+ * itself. Use <code>getCountry</code> to get the ISO Country Code and
+ * <code>getLanguage</code> to get the ISO Language Code. You can
+ * use <code>getDisplayCountry</code> to get the
+ * name of the country suitable for displaying to the user. Similarly,
+ * you can use <code>getDisplayLanguage</code> to get the name of
+ * the language suitable for displaying to the user. Interestingly,
+ * the <code>getDisplayXXX</code> methods are themselves locale-sensitive
+ * and have two versions: one that uses the default locale and one
+ * that takes a locale as an argument and displays the name or country in
+ * a language appropriate to that locale.
+ *
+ * <P>
+ * ICU provides a number of classes that perform locale-sensitive
+ * operations. For example, the <code>NumberFormat</code> class formats
+ * numbers, currency, or percentages in a locale-sensitive manner. Classes
+ * such as <code>NumberFormat</code> have a number of convenience methods
+ * for creating a default object of that type. For example, the
+ * <code>NumberFormat</code> class provides these three convenience methods
+ * for creating a default <code>NumberFormat</code> object:
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * UErrorCode success = U_ZERO_ERROR;
+ * Locale myLocale;
+ * NumberFormat *nf;
+ *
+ * nf = NumberFormat::createInstance( success ); delete nf;
+ * nf = NumberFormat::createCurrencyInstance( success ); delete nf;
+ * nf = NumberFormat::createPercentInstance( success ); delete nf;
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ * Each of these methods has two variants; one with an explicit locale
+ * and one without; the latter using the default locale.
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * nf = NumberFormat::createInstance( myLocale, success ); delete nf;
+ * nf = NumberFormat::createCurrencyInstance( myLocale, success ); delete nf;
+ * nf = NumberFormat::createPercentInstance( myLocale, success ); delete nf;
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ * A <code>Locale</code> is the mechanism for identifying the kind of object
+ * (<code>NumberFormat</code>) that you would like to get. The locale is
+ * <STRONG>just</STRONG> a mechanism for identifying objects,
+ * <STRONG>not</STRONG> a container for the objects themselves.
+ *
+ * <P>
+ * Each class that performs locale-sensitive operations allows you
+ * to get all the available objects of that type. You can sift
+ * through these objects by language, country, or variant,
+ * and use the display names to present a menu to the user.
+ * For example, you can create a menu of all the collation objects
+ * suitable for a given language. Such classes implement these
+ * three class methods:
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * static Locale* getAvailableLocales(int32_t& numLocales)
+ * static UnicodeString& getDisplayName(const Locale& objectLocale,
+ * const Locale& displayLocale,
+ * UnicodeString& displayName)
+ * static UnicodeString& getDisplayName(const Locale& objectLocale,
+ * UnicodeString& displayName)
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ *
+ * @stable ICU 2.0
+ * @see ResourceBundle
+ */
+class U_COMMON_API Locale : public UObject {
+public:
+ /** Useful constant for the Root locale. @stable ICU 4.4 */
+ static const Locale &U_EXPORT2 getRoot(void);
+ /** Useful constant for this language. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getEnglish(void);
+ /** Useful constant for this language. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getFrench(void);
+ /** Useful constant for this language. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getGerman(void);
+ /** Useful constant for this language. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getItalian(void);
+ /** Useful constant for this language. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getJapanese(void);
+ /** Useful constant for this language. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getKorean(void);
+ /** Useful constant for this language. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getChinese(void);
+ /** Useful constant for this language. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getSimplifiedChinese(void);
+ /** Useful constant for this language. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getTraditionalChinese(void);
+
+ /** Useful constant for this country/region. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getFrance(void);
+ /** Useful constant for this country/region. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getGermany(void);
+ /** Useful constant for this country/region. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getItaly(void);
+ /** Useful constant for this country/region. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getJapan(void);
+ /** Useful constant for this country/region. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getKorea(void);
+ /** Useful constant for this country/region. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getChina(void);
+ /** Useful constant for this country/region. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getPRC(void);
+ /** Useful constant for this country/region. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getTaiwan(void);
+ /** Useful constant for this country/region. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getUK(void);
+ /** Useful constant for this country/region. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getUS(void);
+ /** Useful constant for this country/region. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getCanada(void);
+ /** Useful constant for this country/region. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getCanadaFrench(void);
+
+
+ /**
+ * Construct a default locale object, a Locale for the default locale ID.
+ *
+ * @see getDefault
+ * @see uloc_getDefault
+ * @stable ICU 2.0
+ */
+ Locale();
+
+ /**
+ * Construct a locale from language, country, variant.
+ * If an error occurs, then the constructed object will be "bogus"
+ * (isBogus() will return true).
+ *
+ * @param language Lowercase two-letter or three-letter ISO-639 code.
+ * This parameter can instead be an ICU style C locale (e.g. "en_US"),
+ * but the other parameters must not be used.
+ * This parameter can be NULL; if so,
+ * the locale is initialized to match the current default locale.
+ * (This is the same as using the default constructor.)
+ * Please note: The Java Locale class does NOT accept the form
+ * 'new Locale("en_US")' but only 'new Locale("en","US")'
+ *
+ * @param country Uppercase two-letter ISO-3166 code. (optional)
+ * @param variant Uppercase vendor and browser specific code. See class
+ * description. (optional)
+ * @param keywordsAndValues A string consisting of keyword/values pairs, such as
+ * "collation=phonebook;currency=euro"
+ *
+ * @see getDefault
+ * @see uloc_getDefault
+ * @stable ICU 2.0
+ */
+ Locale( const char * language,
+ const char * country = 0,
+ const char * variant = 0,
+ const char * keywordsAndValues = 0);
+
+ /**
+ * Initializes a Locale object from another Locale object.
+ *
+ * @param other The Locale object being copied in.
+ * @stable ICU 2.0
+ */
+ Locale(const Locale& other);
+
+ /**
+ * Move constructor; might leave source in bogus state.
+ * This locale will have the same contents that the source locale had.
+ *
+ * @param other The Locale object being moved in.
+ * @stable ICU 63
+ */
+ Locale(Locale&& other) U_NOEXCEPT;
+
+ /**
+ * Destructor
+ * @stable ICU 2.0
+ */
+ virtual ~Locale() ;
+
+ /**
+ * Replaces the entire contents of *this with the specified value.
+ *
+ * @param other The Locale object being copied in.
+ * @return *this
+ * @stable ICU 2.0
+ */
+ Locale& operator=(const Locale& other);
+
+ /**
+ * Move assignment operator; might leave source in bogus state.
+ * This locale will have the same contents that the source locale had.
+ * The behavior is undefined if *this and the source are the same object.
+ *
+ * @param other The Locale object being moved in.
+ * @return *this
+ * @stable ICU 63
+ */
+ Locale& operator=(Locale&& other) U_NOEXCEPT;
+
+ /**
+ * Checks if two locale keys are the same.
+ *
+ * @param other The locale key object to be compared with this.
+ * @return True if the two locale keys are the same, false otherwise.
+ * @stable ICU 2.0
+ */
+ UBool operator==(const Locale& other) const;
+
+ /**
+ * Checks if two locale keys are not the same.
+ *
+ * @param other The locale key object to be compared with this.
+ * @return True if the two locale keys are not the same, false
+ * otherwise.
+ * @stable ICU 2.0
+ */
+ inline UBool operator!=(const Locale& other) const;
+
+ /**
+ * Clone this object.
+ * Clones can be used concurrently in multiple threads.
+ * If an error occurs, then NULL is returned.
+ * The caller must delete the clone.
+ *
+ * @return a clone of this object
+ *
+ * @see getDynamicClassID
+ * @stable ICU 2.8
+ */
+ Locale *clone() const;
+
+#ifndef U_HIDE_SYSTEM_API
+ /**
+ * Common methods of getting the current default Locale. Used for the
+ * presentation: menus, dialogs, etc. Generally set once when your applet or
+ * application is initialized, then never reset. (If you do reset the
+ * default locale, you probably want to reload your GUI, so that the change
+ * is reflected in your interface.)
+ *
+ * More advanced programs will allow users to use different locales for
+ * different fields, e.g. in a spreadsheet.
+ *
+ * Note that the initial setting will match the host system.
+ * @return a reference to the Locale object for the default locale ID
+ * @system
+ * @stable ICU 2.0
+ */
+ static const Locale& U_EXPORT2 getDefault(void);
+
+ /**
+ * Sets the default. Normally set once at the beginning of a process,
+ * then never reset.
+ * setDefault() only changes ICU's default locale ID, <strong>not</strong>
+ * the default locale ID of the runtime environment.
+ *
+ * @param newLocale Locale to set to. If NULL, set to the value obtained
+ * from the runtime environment.
+ * @param success The error code.
+ * @system
+ * @stable ICU 2.0
+ */
+ static void U_EXPORT2 setDefault(const Locale& newLocale,
+ UErrorCode& success);
+#endif /* U_HIDE_SYSTEM_API */
+
+ /**
+ * Returns a Locale for the specified BCP47 language tag string.
+ * If the specified language tag contains any ill-formed subtags,
+ * the first such subtag and all following subtags are ignored.
+ * <p>
+ * This implements the 'Language-Tag' production of BCP 47, and so
+ * supports legacy language tags (marked as “Type: grandfathered†in BCP 47)
+ * (regular and irregular) as well as private use language tags.
+ *
+ * Private use tags are represented as 'x-whatever',
+ * and legacy tags are converted to their canonical replacements where they exist.
+ *
+ * Note that a few legacy tags have no modern replacement;
+ * these will be converted using the fallback described in
+ * the first paragraph, so some information might be lost.
+ *
+ * @param tag the input BCP47 language tag.
+ * @param status error information if creating the Locale failed.
+ * @return the Locale for the specified BCP47 language tag.
+ * @stable ICU 63
+ */
+ static Locale U_EXPORT2 forLanguageTag(StringPiece tag, UErrorCode& status);
+
+ /**
+ * Returns a well-formed language tag for this Locale.
+ * <p>
+ * <b>Note</b>: Any locale fields which do not satisfy the BCP47 syntax
+ * requirement will be silently omitted from the result.
+ *
+ * If this function fails, partial output may have been written to the sink.
+ *
+ * @param sink the output sink receiving the BCP47 language
+ * tag for this Locale.
+ * @param status error information if creating the language tag failed.
+ * @stable ICU 63
+ */
+ void toLanguageTag(ByteSink& sink, UErrorCode& status) const;
+
+ /**
+ * Returns a well-formed language tag for this Locale.
+ * <p>
+ * <b>Note</b>: Any locale fields which do not satisfy the BCP47 syntax
+ * requirement will be silently omitted from the result.
+ *
+ * @param status error information if creating the language tag failed.
+ * @return the BCP47 language tag for this Locale.
+ * @stable ICU 63
+ */
+ template<typename StringClass>
+ inline StringClass toLanguageTag(UErrorCode& status) const;
+
+ /**
+ * Creates a locale which has had minimal canonicalization
+ * as per uloc_getName().
+ * @param name The name to create from. If name is null,
+ * the default Locale is used.
+ * @return new locale object
+ * @stable ICU 2.0
+ * @see uloc_getName
+ */
+ static Locale U_EXPORT2 createFromName(const char *name);
+
+ /**
+ * Creates a locale from the given string after canonicalizing
+ * the string according to CLDR by calling uloc_canonicalize().
+ * @param name the locale ID to create from. Must not be NULL.
+ * @return a new locale object corresponding to the given name
+ * @stable ICU 3.0
+ * @see uloc_canonicalize
+ */
+ static Locale U_EXPORT2 createCanonical(const char* name);
+
+ /**
+ * Returns the locale's ISO-639 language code.
+ * @return An alias to the code
+ * @stable ICU 2.0
+ */
+ inline const char * getLanguage( ) const;
+
+ /**
+ * Returns the locale's ISO-15924 abbreviation script code.
+ * @return An alias to the code
+ * @see uscript_getShortName
+ * @see uscript_getCode
+ * @stable ICU 2.8
+ */
+ inline const char * getScript( ) const;
+
+ /**
+ * Returns the locale's ISO-3166 country code.
+ * @return An alias to the code
+ * @stable ICU 2.0
+ */
+ inline const char * getCountry( ) const;
+
+ /**
+ * Returns the locale's variant code.
+ * @return An alias to the code
+ * @stable ICU 2.0
+ */
+ inline const char * getVariant( ) const;
+
+ /**
+ * Returns the programmatic name of the entire locale, with the language,
+ * country and variant separated by underbars. If a field is missing, up
+ * to two leading underbars will occur. Example: "en", "de_DE", "en_US_WIN",
+ * "de__POSIX", "fr__MAC", "__MAC", "_MT", "_FR_EURO"
+ * @return A pointer to "name".
+ * @stable ICU 2.0
+ */
+ inline const char * getName() const;
+
+ /**
+ * Returns the programmatic name of the entire locale as getName() would return,
+ * but without keywords.
+ * @return A pointer to "name".
+ * @see getName
+ * @stable ICU 2.8
+ */
+ const char * getBaseName() const;
+
+ /**
+ * Add the likely subtags for this Locale, per the algorithm described
+ * in the following CLDR technical report:
+ *
+ * http://www.unicode.org/reports/tr35/#Likely_Subtags
+ *
+ * If this Locale is already in the maximal form, or not valid, or there is
+ * no data available for maximization, the Locale will be unchanged.
+ *
+ * For example, "und-Zzzz" cannot be maximized, since there is no
+ * reasonable maximization.
+ *
+ * Examples:
+ *
+ * "en" maximizes to "en_Latn_US"
+ *
+ * "de" maximizes to "de_Latn_US"
+ *
+ * "sr" maximizes to "sr_Cyrl_RS"
+ *
+ * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
+ *
+ * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
+ *
+ * @param status error information if maximizing this Locale failed.
+ * If this Locale is not well-formed, the error code is
+ * U_ILLEGAL_ARGUMENT_ERROR.
+ * @stable ICU 63
+ */
+ void addLikelySubtags(UErrorCode& status);
+
+ /**
+ * Minimize the subtags for this Locale, per the algorithm described
+ * in the following CLDR technical report:
+ *
+ * http://www.unicode.org/reports/tr35/#Likely_Subtags
+ *
+ * If this Locale is already in the minimal form, or not valid, or there is
+ * no data available for minimization, the Locale will be unchanged.
+ *
+ * Since the minimization algorithm relies on proper maximization, see the
+ * comments for addLikelySubtags for reasons why there might not be any
+ * data.
+ *
+ * Examples:
+ *
+ * "en_Latn_US" minimizes to "en"
+ *
+ * "de_Latn_US" minimizes to "de"
+ *
+ * "sr_Cyrl_RS" minimizes to "sr"
+ *
+ * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
+ * script, and minimizing to "zh" would imply "zh_Hans_CN".)
+ *
+ * @param status error information if maximizing this Locale failed.
+ * If this Locale is not well-formed, the error code is
+ * U_ILLEGAL_ARGUMENT_ERROR.
+ * @stable ICU 63
+ */
+ void minimizeSubtags(UErrorCode& status);
+
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * Canonicalize the locale ID of this object according to CLDR.
+ * @param status the status code
+ * @draft ICU 67
+ * @see createCanonical
+ */
+ void canonicalize(UErrorCode& status);
+#endif // U_HIDE_DRAFT_API
+
+ /**
+ * Gets the list of keywords for the specified locale.
+ *
+ * @param status the status code
+ * @return pointer to StringEnumeration class, or NULL if there are no keywords.
+ * Client must dispose of it by calling delete.
+ * @see getKeywords
+ * @stable ICU 2.8
+ */
+ StringEnumeration * createKeywords(UErrorCode &status) const;
+
+ /**
+ * Gets the list of Unicode keywords for the specified locale.
+ *
+ * @param status the status code
+ * @return pointer to StringEnumeration class, or NULL if there are no keywords.
+ * Client must dispose of it by calling delete.
+ * @see getUnicodeKeywords
+ * @stable ICU 63
+ */
+ StringEnumeration * createUnicodeKeywords(UErrorCode &status) const;
+
+ /**
+ * Gets the set of keywords for this Locale.
+ *
+ * A wrapper to call createKeywords() and write the resulting
+ * keywords as standard strings (or compatible objects) into any kind of
+ * container that can be written to by an STL style output iterator.
+ *
+ * @param iterator an STL style output iterator to write the keywords to.
+ * @param status error information if creating set of keywords failed.
+ * @stable ICU 63
+ */
+ template<typename StringClass, typename OutputIterator>
+ inline void getKeywords(OutputIterator iterator, UErrorCode& status) const;
+
+ /**
+ * Gets the set of Unicode keywords for this Locale.
+ *
+ * A wrapper to call createUnicodeKeywords() and write the resulting
+ * keywords as standard strings (or compatible objects) into any kind of
+ * container that can be written to by an STL style output iterator.
+ *
+ * @param iterator an STL style output iterator to write the keywords to.
+ * @param status error information if creating set of keywords failed.
+ * @stable ICU 63
+ */
+ template<typename StringClass, typename OutputIterator>
+ inline void getUnicodeKeywords(OutputIterator iterator, UErrorCode& status) const;
+
+ /**
+ * Gets the value for a keyword.
+ *
+ * This uses legacy keyword=value pairs, like "collation=phonebook".
+ *
+ * ICU4C doesn't do automatic conversion between legacy and Unicode
+ * keywords and values in getters and setters (as opposed to ICU4J).
+ *
+ * @param keywordName name of the keyword for which we want the value. Case insensitive.
+ * @param buffer The buffer to receive the keyword value.
+ * @param bufferCapacity The capacity of receiving buffer
+ * @param status Returns any error information while performing this operation.
+ * @return the length of the keyword value
+ *
+ * @stable ICU 2.8
+ */
+ int32_t getKeywordValue(const char* keywordName, char *buffer, int32_t bufferCapacity, UErrorCode &status) const;
+
+ /**
+ * Gets the value for a keyword.
+ *
+ * This uses legacy keyword=value pairs, like "collation=phonebook".
+ *
+ * ICU4C doesn't do automatic conversion between legacy and Unicode
+ * keywords and values in getters and setters (as opposed to ICU4J).
+ *
+ * @param keywordName name of the keyword for which we want the value.
+ * @param sink the sink to receive the keyword value.
+ * @param status error information if getting the value failed.
+ * @stable ICU 63
+ */
+ void getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const;
+
+ /**
+ * Gets the value for a keyword.
+ *
+ * This uses legacy keyword=value pairs, like "collation=phonebook".
+ *
+ * ICU4C doesn't do automatic conversion between legacy and Unicode
+ * keywords and values in getters and setters (as opposed to ICU4J).
+ *
+ * @param keywordName name of the keyword for which we want the value.
+ * @param status error information if getting the value failed.
+ * @return the keyword value.
+ * @stable ICU 63
+ */
+ template<typename StringClass>
+ inline StringClass getKeywordValue(StringPiece keywordName, UErrorCode& status) const;
+
+ /**
+ * Gets the Unicode value for a Unicode keyword.
+ *
+ * This uses Unicode key-value pairs, like "co-phonebk".
+ *
+ * ICU4C doesn't do automatic conversion between legacy and Unicode
+ * keywords and values in getters and setters (as opposed to ICU4J).
+ *
+ * @param keywordName name of the keyword for which we want the value.
+ * @param sink the sink to receive the keyword value.
+ * @param status error information if getting the value failed.
+ * @stable ICU 63
+ */
+ void getUnicodeKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const;
+
+ /**
+ * Gets the Unicode value for a Unicode keyword.
+ *
+ * This uses Unicode key-value pairs, like "co-phonebk".
+ *
+ * ICU4C doesn't do automatic conversion between legacy and Unicode
+ * keywords and values in getters and setters (as opposed to ICU4J).
+ *
+ * @param keywordName name of the keyword for which we want the value.
+ * @param status error information if getting the value failed.
+ * @return the keyword value.
+ * @stable ICU 63
+ */
+ template<typename StringClass>
+ inline StringClass getUnicodeKeywordValue(StringPiece keywordName, UErrorCode& status) const;
+
+ /**
+ * Sets or removes the value for a keyword.
+ *
+ * For removing all keywords, use getBaseName(),
+ * and construct a new Locale if it differs from getName().
+ *
+ * This uses legacy keyword=value pairs, like "collation=phonebook".
+ *
+ * ICU4C doesn't do automatic conversion between legacy and Unicode
+ * keywords and values in getters and setters (as opposed to ICU4J).
+ *
+ * @param keywordName name of the keyword to be set. Case insensitive.
+ * @param keywordValue value of the keyword to be set. If 0-length or
+ * NULL, will result in the keyword being removed. No error is given if
+ * that keyword does not exist.
+ * @param status Returns any error information while performing this operation.
+ *
+ * @stable ICU 49
+ */
+ void setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status);
+
+ /**
+ * Sets or removes the value for a keyword.
+ *
+ * For removing all keywords, use getBaseName(),
+ * and construct a new Locale if it differs from getName().
+ *
+ * This uses legacy keyword=value pairs, like "collation=phonebook".
+ *
+ * ICU4C doesn't do automatic conversion between legacy and Unicode
+ * keywords and values in getters and setters (as opposed to ICU4J).
+ *
+ * @param keywordName name of the keyword to be set.
+ * @param keywordValue value of the keyword to be set. If 0-length or
+ * NULL, will result in the keyword being removed. No error is given if
+ * that keyword does not exist.
+ * @param status Returns any error information while performing this operation.
+ * @stable ICU 63
+ */
+ void setKeywordValue(StringPiece keywordName, StringPiece keywordValue, UErrorCode& status);
+
+ /**
+ * Sets or removes the Unicode value for a Unicode keyword.
+ *
+ * For removing all keywords, use getBaseName(),
+ * and construct a new Locale if it differs from getName().
+ *
+ * This uses Unicode key-value pairs, like "co-phonebk".
+ *
+ * ICU4C doesn't do automatic conversion between legacy and Unicode
+ * keywords and values in getters and setters (as opposed to ICU4J).
+ *
+ * @param keywordName name of the keyword to be set.
+ * @param keywordValue value of the keyword to be set. If 0-length or
+ * NULL, will result in the keyword being removed. No error is given if
+ * that keyword does not exist.
+ * @param status Returns any error information while performing this operation.
+ * @stable ICU 63
+ */
+ void setUnicodeKeywordValue(StringPiece keywordName, StringPiece keywordValue, UErrorCode& status);
+
+ /**
+ * returns the locale's three-letter language code, as specified
+ * in ISO draft standard ISO-639-2.
+ * @return An alias to the code, or an empty string
+ * @stable ICU 2.0
+ */
+ const char * getISO3Language() const;
+
+ /**
+ * Fills in "name" with the locale's three-letter ISO-3166 country code.
+ * @return An alias to the code, or an empty string
+ * @stable ICU 2.0
+ */
+ const char * getISO3Country() const;
+
+ /**
+ * Returns the Windows LCID value corresponding to this locale.
+ * This value is stored in the resource data for the locale as a one-to-four-digit
+ * hexadecimal number. If the resource is missing, in the wrong format, or
+ * there is no Windows LCID value that corresponds to this locale, returns 0.
+ * @stable ICU 2.0
+ */
+ uint32_t getLCID(void) const;
+
+ /**
+ * Returns whether this locale's script is written right-to-left.
+ * If there is no script subtag, then the likely script is used, see uloc_addLikelySubtags().
+ * If no likely script is known, then false is returned.
+ *
+ * A script is right-to-left according to the CLDR script metadata
+ * which corresponds to whether the script's letters have Bidi_Class=R or AL.
+ *
+ * Returns true for "ar" and "en-Hebr", false for "zh" and "fa-Cyrl".
+ *
+ * @return true if the locale's script is written right-to-left
+ * @stable ICU 54
+ */
+ UBool isRightToLeft() const;
+
+ /**
+ * Fills in "dispLang" with the name of this locale's language in a format suitable for
+ * user display in the default locale. For example, if the locale's language code is
+ * "fr" and the default locale's language code is "en", this function would set
+ * dispLang to "French".
+ * @param dispLang Receives the language's display name.
+ * @return A reference to "dispLang".
+ * @stable ICU 2.0
+ */
+ UnicodeString& getDisplayLanguage(UnicodeString& dispLang) const;
+
+ /**
+ * Fills in "dispLang" with the name of this locale's language in a format suitable for
+ * user display in the locale specified by "displayLocale". For example, if the locale's
+ * language code is "en" and displayLocale's language code is "fr", this function would set
+ * dispLang to "Anglais".
+ * @param displayLocale Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * displayLocale would result in "Anglais", while passing Locale::getGerman()
+ * for displayLocale would result in "Englisch".
+ * @param dispLang Receives the language's display name.
+ * @return A reference to "dispLang".
+ * @stable ICU 2.0
+ */
+ UnicodeString& getDisplayLanguage( const Locale& displayLocale,
+ UnicodeString& dispLang) const;
+
+ /**
+ * Fills in "dispScript" with the name of this locale's script in a format suitable
+ * for user display in the default locale. For example, if the locale's script code
+ * is "LATN" and the default locale's language code is "en", this function would set
+ * dispScript to "Latin".
+ * @param dispScript Receives the scripts's display name.
+ * @return A reference to "dispScript".
+ * @stable ICU 2.8
+ */
+ UnicodeString& getDisplayScript( UnicodeString& dispScript) const;
+
+ /**
+ * Fills in "dispScript" with the name of this locale's country in a format suitable
+ * for user display in the locale specified by "displayLocale". For example, if the locale's
+ * script code is "LATN" and displayLocale's language code is "en", this function would set
+ * dispScript to "Latin".
+ * @param displayLocale Specifies the locale to be used to display the name. In other
+ * words, if the locale's script code is "LATN", passing
+ * Locale::getFrench() for displayLocale would result in "", while
+ * passing Locale::getGerman() for displayLocale would result in
+ * "".
+ * @param dispScript Receives the scripts's display name.
+ * @return A reference to "dispScript".
+ * @stable ICU 2.8
+ */
+ UnicodeString& getDisplayScript( const Locale& displayLocale,
+ UnicodeString& dispScript) const;
+
+ /**
+ * Fills in "dispCountry" with the name of this locale's country in a format suitable
+ * for user display in the default locale. For example, if the locale's country code
+ * is "FR" and the default locale's language code is "en", this function would set
+ * dispCountry to "France".
+ * @param dispCountry Receives the country's display name.
+ * @return A reference to "dispCountry".
+ * @stable ICU 2.0
+ */
+ UnicodeString& getDisplayCountry( UnicodeString& dispCountry) const;
+
+ /**
+ * Fills in "dispCountry" with the name of this locale's country in a format suitable
+ * for user display in the locale specified by "displayLocale". For example, if the locale's
+ * country code is "US" and displayLocale's language code is "fr", this function would set
+ * dispCountry to "&Eacute;tats-Unis".
+ * @param displayLocale Specifies the locale to be used to display the name. In other
+ * words, if the locale's country code is "US", passing
+ * Locale::getFrench() for displayLocale would result in "&Eacute;tats-Unis", while
+ * passing Locale::getGerman() for displayLocale would result in
+ * "Vereinigte Staaten".
+ * @param dispCountry Receives the country's display name.
+ * @return A reference to "dispCountry".
+ * @stable ICU 2.0
+ */
+ UnicodeString& getDisplayCountry( const Locale& displayLocale,
+ UnicodeString& dispCountry) const;
+
+ /**
+ * Fills in "dispVar" with the name of this locale's variant code in a format suitable
+ * for user display in the default locale.
+ * @param dispVar Receives the variant's name.
+ * @return A reference to "dispVar".
+ * @stable ICU 2.0
+ */
+ UnicodeString& getDisplayVariant( UnicodeString& dispVar) const;
+
+ /**
+ * Fills in "dispVar" with the name of this locale's variant code in a format
+ * suitable for user display in the locale specified by "displayLocale".
+ * @param displayLocale Specifies the locale to be used to display the name.
+ * @param dispVar Receives the variant's display name.
+ * @return A reference to "dispVar".
+ * @stable ICU 2.0
+ */
+ UnicodeString& getDisplayVariant( const Locale& displayLocale,
+ UnicodeString& dispVar) const;
+
+ /**
+ * Fills in "name" with the name of this locale in a format suitable for user display
+ * in the default locale. This function uses getDisplayLanguage(), getDisplayCountry(),
+ * and getDisplayVariant() to do its work, and outputs the display name in the format
+ * "language (country[,variant])". For example, if the default locale is en_US, then
+ * fr_FR's display name would be "French (France)", and es_MX_Traditional's display name
+ * would be "Spanish (Mexico,Traditional)".
+ * @param name Receives the locale's display name.
+ * @return A reference to "name".
+ * @stable ICU 2.0
+ */
+ UnicodeString& getDisplayName( UnicodeString& name) const;
+
+ /**
+ * Fills in "name" with the name of this locale in a format suitable for user display
+ * in the locale specified by "displayLocale". This function uses getDisplayLanguage(),
+ * getDisplayCountry(), and getDisplayVariant() to do its work, and outputs the display
+ * name in the format "language (country[,variant])". For example, if displayLocale is
+ * fr_FR, then en_US's display name would be "Anglais (&Eacute;tats-Unis)", and no_NO_NY's
+ * display name would be "norv&eacute;gien (Norv&egrave;ge,NY)".
+ * @param displayLocale Specifies the locale to be used to display the name.
+ * @param name Receives the locale's display name.
+ * @return A reference to "name".
+ * @stable ICU 2.0
+ */
+ UnicodeString& getDisplayName( const Locale& displayLocale,
+ UnicodeString& name) const;
+
+ /**
+ * Generates a hash code for the locale.
+ * @stable ICU 2.0
+ */
+ int32_t hashCode(void) const;
+
+ /**
+ * Sets the locale to bogus
+ * A bogus locale represents a non-existing locale associated
+ * with services that can be instantiated from non-locale data
+ * in addition to locale (for example, collation can be
+ * instantiated from a locale and from a rule set).
+ * @stable ICU 2.1
+ */
+ void setToBogus();
+
+ /**
+ * Gets the bogus state. Locale object can be bogus if it doesn't exist
+ * @return false if it is a real locale, true if it is a bogus locale
+ * @stable ICU 2.1
+ */
+ inline UBool isBogus(void) const;
+
+ /**
+ * Returns a list of all installed locales.
+ * @param count Receives the number of locales in the list.
+ * @return A pointer to an array of Locale objects. This array is the list
+ * of all locales with installed resource files. The called does NOT
+ * get ownership of this list, and must NOT delete it.
+ * @stable ICU 2.0
+ */
+ static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
+
+ /**
+ * Gets a list of all available 2-letter country codes defined in ISO 3166. This is a
+ * pointer to an array of pointers to arrays of char. All of these pointers are
+ * owned by ICU-- do not delete them, and do not write through them. The array is
+ * terminated with a null pointer.
+ * @return a list of all available country codes
+ * @stable ICU 2.0
+ */
+ static const char* const* U_EXPORT2 getISOCountries();
+
+ /**
+ * Gets a list of all available language codes defined in ISO 639. This is a pointer
+ * to an array of pointers to arrays of char. All of these pointers are owned
+ * by ICU-- do not delete them, and do not write through them. The array is
+ * terminated with a null pointer.
+ * @return a list of all available language codes
+ * @stable ICU 2.0
+ */
+ static const char* const* U_EXPORT2 getISOLanguages();
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ *
+ * @stable ICU 2.2
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ *
+ * @stable ICU 2.2
+ */
+ virtual UClassID getDynamicClassID() const;
+
+ /**
+ * A Locale iterator interface similar to a Java Iterator<Locale>.
+ * @stable ICU 65
+ */
+ class U_COMMON_API Iterator /* not : public UObject because this is an interface/mixin class */ {
+ public:
+ /** @stable ICU 65 */
+ virtual ~Iterator();
+
+ /**
+ * @return true if next() can be called again.
+ * @stable ICU 65
+ */
+ virtual UBool hasNext() const = 0;
+
+ /**
+ * @return the next locale.
+ * @stable ICU 65
+ */
+ virtual const Locale &next() = 0;
+ };
+
+ /**
+ * A generic Locale iterator implementation over Locale input iterators.
+ * @stable ICU 65
+ */
+ template<typename Iter>
+ class RangeIterator : public Iterator, public UMemory {
+ public:
+ /**
+ * Constructs an iterator from a begin/end range.
+ * Each of the iterator parameter values must be an
+ * input iterator whose value is convertible to const Locale &.
+ *
+ * @param begin Start of range.
+ * @param end Exclusive end of range.
+ * @stable ICU 65
+ */
+ RangeIterator(Iter begin, Iter end) : it_(begin), end_(end) {}
+
+ /**
+ * @return true if next() can be called again.
+ * @stable ICU 65
+ */
+ UBool hasNext() const override { return it_ != end_; }
+
+ /**
+ * @return the next locale.
+ * @stable ICU 65
+ */
+ const Locale &next() override { return *it_++; }
+
+ private:
+ Iter it_;
+ const Iter end_;
+ };
+
+ /**
+ * A generic Locale iterator implementation over Locale input iterators.
+ * Calls the converter to convert each *begin to a const Locale &.
+ * @stable ICU 65
+ */
+ template<typename Iter, typename Conv>
+ class ConvertingIterator : public Iterator, public UMemory {
+ public:
+ /**
+ * Constructs an iterator from a begin/end range.
+ * Each of the iterator parameter values must be an
+ * input iterator whose value the converter converts to const Locale &.
+ *
+ * @param begin Start of range.
+ * @param end Exclusive end of range.
+ * @param converter Converter from *begin to const Locale & or compatible.
+ * @stable ICU 65
+ */
+ ConvertingIterator(Iter begin, Iter end, Conv converter) :
+ it_(begin), end_(end), converter_(converter) {}
+
+ /**
+ * @return true if next() can be called again.
+ * @stable ICU 65
+ */
+ UBool hasNext() const override { return it_ != end_; }
+
+ /**
+ * @return the next locale.
+ * @stable ICU 65
+ */
+ const Locale &next() override { return converter_(*it_++); }
+
+ private:
+ Iter it_;
+ const Iter end_;
+ Conv converter_;
+ };
+
+protected: /* only protected for testing purposes. DO NOT USE. */
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * Set this from a single POSIX style locale string.
+ * @internal
+ */
+ void setFromPOSIXID(const char *posixID);
+#endif /* U_HIDE_INTERNAL_API */
+
+private:
+ /**
+ * Initialize the locale object with a new name.
+ * Was deprecated - used in implementation - moved internal
+ *
+ * @param cLocaleID The new locale name.
+ * @param canonicalize whether to call uloc_canonicalize on cLocaleID
+ */
+ Locale& init(const char* cLocaleID, UBool canonicalize);
+
+ /*
+ * Internal constructor to allow construction of a locale object with
+ * NO side effects. (Default constructor tries to get
+ * the default locale.)
+ */
+ enum ELocaleType {
+ eBOGUS
+ };
+ Locale(ELocaleType);
+
+ /**
+ * Initialize the locale cache for commonly used locales
+ */
+ static Locale *getLocaleCache(void);
+
+ char language[ULOC_LANG_CAPACITY];
+ char script[ULOC_SCRIPT_CAPACITY];
+ char country[ULOC_COUNTRY_CAPACITY];
+ int32_t variantBegin;
+ char* fullName;
+ char fullNameBuffer[ULOC_FULLNAME_CAPACITY];
+ // name without keywords
+ char* baseName;
+ void initBaseName(UErrorCode& status);
+
+ UBool fIsBogus;
+
+ static const Locale &getLocale(int locid);
+
+ /**
+ * A friend to allow the default locale to be set by either the C or C++ API.
+ * @internal (private)
+ */
+ friend Locale *locale_set_default_internal(const char *, UErrorCode& status);
+
+ /**
+ * @internal (private)
+ */
+ friend void U_CALLCONV locale_available_init();
+};
+
+inline UBool
+Locale::operator!=(const Locale& other) const
+{
+ return !operator==(other);
+}
+
+template<typename StringClass> inline StringClass
+Locale::toLanguageTag(UErrorCode& status) const
+{
+ StringClass result;
+ StringByteSink<StringClass> sink(&result);
+ toLanguageTag(sink, status);
+ return result;
+}
+
+inline const char *
+Locale::getCountry() const
+{
+ return country;
+}
+
+inline const char *
+Locale::getLanguage() const
+{
+ return language;
+}
+
+inline const char *
+Locale::getScript() const
+{
+ return script;
+}
+
+inline const char *
+Locale::getVariant() const
+{
+ return &baseName[variantBegin];
+}
+
+inline const char *
+Locale::getName() const
+{
+ return fullName;
+}
+
+template<typename StringClass, typename OutputIterator> inline void
+Locale::getKeywords(OutputIterator iterator, UErrorCode& status) const
+{
+ LocalPointer<StringEnumeration> keys(createKeywords(status));
+ if (U_FAILURE(status) || keys.isNull()) {
+ return;
+ }
+ for (;;) {
+ int32_t resultLength;
+ const char* buffer = keys->next(&resultLength, status);
+ if (U_FAILURE(status) || buffer == nullptr) {
+ return;
+ }
+ *iterator++ = StringClass(buffer, resultLength);
+ }
+}
+
+template<typename StringClass, typename OutputIterator> inline void
+Locale::getUnicodeKeywords(OutputIterator iterator, UErrorCode& status) const
+{
+ LocalPointer<StringEnumeration> keys(createUnicodeKeywords(status));
+ if (U_FAILURE(status) || keys.isNull()) {
+ return;
+ }
+ for (;;) {
+ int32_t resultLength;
+ const char* buffer = keys->next(&resultLength, status);
+ if (U_FAILURE(status) || buffer == nullptr) {
+ return;
+ }
+ *iterator++ = StringClass(buffer, resultLength);
+ }
+}
+
+template<typename StringClass> inline StringClass
+Locale::getKeywordValue(StringPiece keywordName, UErrorCode& status) const
+{
+ StringClass result;
+ StringByteSink<StringClass> sink(&result);
+ getKeywordValue(keywordName, sink, status);
+ return result;
+}
+
+template<typename StringClass> inline StringClass
+Locale::getUnicodeKeywordValue(StringPiece keywordName, UErrorCode& status) const
+{
+ StringClass result;
+ StringByteSink<StringClass> sink(&result);
+ getUnicodeKeywordValue(keywordName, sink, status);
+ return result;
+}
+
+inline UBool
+Locale::isBogus(void) const {
+ return fIsBogus;
+}
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/messagepattern.h b/thirdparty/icu4c/common/unicode/messagepattern.h
new file mode 100644
index 0000000000..98e7b70b1f
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/messagepattern.h
@@ -0,0 +1,949 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2011-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: messagepattern.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2011mar14
+* created by: Markus W. Scherer
+*/
+
+#ifndef __MESSAGEPATTERN_H__
+#define __MESSAGEPATTERN_H__
+
+/**
+ * \file
+ * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns.
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/parseerr.h"
+#include "unicode/unistr.h"
+
+/**
+ * Mode for when an apostrophe starts quoted literal text for MessageFormat output.
+ * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h
+ * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE).
+ * <p>
+ * A pair of adjacent apostrophes always results in a single apostrophe in the output,
+ * even when the pair is between two single, text-quoting apostrophes.
+ * <p>
+ * The following table shows examples of desired MessageFormat.format() output
+ * with the pattern strings that yield that output.
+ * <p>
+ * <table>
+ * <tr>
+ * <th>Desired output</th>
+ * <th>DOUBLE_OPTIONAL</th>
+ * <th>DOUBLE_REQUIRED</th>
+ * </tr>
+ * <tr>
+ * <td>I see {many}</td>
+ * <td>I see '{many}'</td>
+ * <td>(same)</td>
+ * </tr>
+ * <tr>
+ * <td>I said {'Wow!'}</td>
+ * <td>I said '{''Wow!''}'</td>
+ * <td>(same)</td>
+ * </tr>
+ * <tr>
+ * <td>I don't know</td>
+ * <td>I don't know OR<br> I don''t know</td>
+ * <td>I don''t know</td>
+ * </tr>
+ * </table>
+ * @stable ICU 4.8
+ * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
+ */
+enum UMessagePatternApostropheMode {
+ /**
+ * A literal apostrophe is represented by
+ * either a single or a double apostrophe pattern character.
+ * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
+ * if it immediately precedes a curly brace {},
+ * or a pipe symbol | if inside a choice format,
+ * or a pound symbol # if inside a plural format.
+ * <p>
+ * This is the default behavior starting with ICU 4.8.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_APOS_DOUBLE_OPTIONAL,
+ /**
+ * A literal apostrophe must be represented by
+ * a double apostrophe pattern character.
+ * A single apostrophe always starts quoted literal text.
+ * <p>
+ * This is the behavior of ICU 4.6 and earlier, and of the JDK.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_APOS_DOUBLE_REQUIRED
+};
+/**
+ * @stable ICU 4.8
+ */
+typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
+
+/**
+ * MessagePattern::Part type constants.
+ * @stable ICU 4.8
+ */
+enum UMessagePatternPartType {
+ /**
+ * Start of a message pattern (main or nested).
+ * The length is 0 for the top-level message
+ * and for a choice argument sub-message, otherwise 1 for the '{'.
+ * The value indicates the nesting level, starting with 0 for the main message.
+ * <p>
+ * There is always a later MSG_LIMIT part.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_MSG_START,
+ /**
+ * End of a message pattern (main or nested).
+ * The length is 0 for the top-level message and
+ * the last sub-message of a choice argument,
+ * otherwise 1 for the '}' or (in a choice argument style) the '|'.
+ * The value indicates the nesting level, starting with 0 for the main message.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_MSG_LIMIT,
+ /**
+ * Indicates a substring of the pattern string which is to be skipped when formatting.
+ * For example, an apostrophe that begins or ends quoted text
+ * would be indicated with such a part.
+ * The value is undefined and currently always 0.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_SKIP_SYNTAX,
+ /**
+ * Indicates that a syntax character needs to be inserted for auto-quoting.
+ * The length is 0.
+ * The value is the character code of the insertion character. (U+0027=APOSTROPHE)
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_INSERT_CHAR,
+ /**
+ * Indicates a syntactic (non-escaped) # symbol in a plural variant.
+ * When formatting, replace this part's substring with the
+ * (value-offset) for the plural argument value.
+ * The value is undefined and currently always 0.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_REPLACE_NUMBER,
+ /**
+ * Start of an argument.
+ * The length is 1 for the '{'.
+ * The value is the ordinal value of the ArgType. Use getArgType().
+ * <p>
+ * This part is followed by either an ARG_NUMBER or ARG_NAME,
+ * followed by optional argument sub-parts (see UMessagePatternArgType constants)
+ * and finally an ARG_LIMIT part.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_ARG_START,
+ /**
+ * End of an argument.
+ * The length is 1 for the '}'.
+ * The value is the ordinal value of the ArgType. Use getArgType().
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_ARG_LIMIT,
+ /**
+ * The argument number, provided by the value.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_ARG_NUMBER,
+ /**
+ * The argument name.
+ * The value is undefined and currently always 0.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_ARG_NAME,
+ /**
+ * The argument type.
+ * The value is undefined and currently always 0.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_ARG_TYPE,
+ /**
+ * The argument style text.
+ * The value is undefined and currently always 0.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_ARG_STYLE,
+ /**
+ * A selector substring in a "complex" argument style.
+ * The value is undefined and currently always 0.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_ARG_SELECTOR,
+ /**
+ * An integer value, for example the offset or an explicit selector value
+ * in a PluralFormat style.
+ * The part value is the integer value.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_ARG_INT,
+ /**
+ * A numeric value, for example the offset or an explicit selector value
+ * in a PluralFormat style.
+ * The part value is an index into an internal array of numeric values;
+ * use getNumericValue().
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_ARG_DOUBLE
+};
+/**
+ * @stable ICU 4.8
+ */
+typedef enum UMessagePatternPartType UMessagePatternPartType;
+
+/**
+ * Argument type constants.
+ * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
+ *
+ * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
+ * with a nesting level one greater than the surrounding message.
+ * @stable ICU 4.8
+ */
+enum UMessagePatternArgType {
+ /**
+ * The argument has no specified type.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_ARG_TYPE_NONE,
+ /**
+ * The argument has a "simple" type which is provided by the ARG_TYPE part.
+ * An ARG_STYLE part might follow that.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_ARG_TYPE_SIMPLE,
+ /**
+ * The argument is a ChoiceFormat with one or more
+ * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_ARG_TYPE_CHOICE,
+ /**
+ * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
+ * (e.g., offset:1)
+ * and one or more (ARG_SELECTOR [explicit-value] message) tuples.
+ * If the selector has an explicit value (e.g., =2), then
+ * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
+ * Otherwise the message immediately follows the ARG_SELECTOR.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_ARG_TYPE_PLURAL,
+ /**
+ * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_ARG_TYPE_SELECT,
+ /**
+ * The argument is an ordinal-number PluralFormat
+ * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL.
+ * @stable ICU 50
+ */
+ UMSGPAT_ARG_TYPE_SELECTORDINAL
+};
+/**
+ * @stable ICU 4.8
+ */
+typedef enum UMessagePatternArgType UMessagePatternArgType;
+
+/**
+ * \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE
+ * Returns true if the argument type has a plural style part sequence and semantics,
+ * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL.
+ * @stable ICU 50
+ */
+#define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
+ ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
+
+enum {
+ /**
+ * Return value from MessagePattern.validateArgumentName() for when
+ * the string is a valid "pattern identifier" but not a number.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
+
+ /**
+ * Return value from MessagePattern.validateArgumentName() for when
+ * the string is invalid.
+ * It might not be a valid "pattern identifier",
+ * or it have only ASCII digits but there is a leading zero or the number is too large.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_ARG_NAME_NOT_VALID=-2
+};
+
+/**
+ * Special value that is returned by getNumericValue(Part) when no
+ * numeric value is defined for a part.
+ * @see MessagePattern.getNumericValue()
+ * @stable ICU 4.8
+ */
+#define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
+
+U_NAMESPACE_BEGIN
+
+class MessagePatternDoubleList;
+class MessagePatternPartsList;
+
+/**
+ * Parses and represents ICU MessageFormat patterns.
+ * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
+ * Used in the implementations of those classes as well as in tools
+ * for message validation, translation and format conversion.
+ * <p>
+ * The parser handles all syntax relevant for identifying message arguments.
+ * This includes "complex" arguments whose style strings contain
+ * nested MessageFormat pattern substrings.
+ * For "simple" arguments (with no nested MessageFormat pattern substrings),
+ * the argument style is not parsed any further.
+ * <p>
+ * The parser handles named and numbered message arguments and allows both in one message.
+ * <p>
+ * Once a pattern has been parsed successfully, iterate through the parsed data
+ * with countParts(), getPart() and related methods.
+ * <p>
+ * The data logically represents a parse tree, but is stored and accessed
+ * as a list of "parts" for fast and simple parsing and to minimize object allocations.
+ * Arguments and nested messages are best handled via recursion.
+ * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns
+ * the index of the corresponding _LIMIT "part".
+ * <p>
+ * List of "parts":
+ * <pre>
+ * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
+ * argument = noneArg | simpleArg | complexArg
+ * complexArg = choiceArg | pluralArg | selectArg
+ *
+ * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
+ * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
+ * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
+ * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
+ * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
+ *
+ * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
+ * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
+ * selectStyle = (ARG_SELECTOR message)+
+ * </pre>
+ * <ul>
+ * <li>Literal output text is not represented directly by "parts" but accessed
+ * between parts of a message, from one part's getLimit() to the next part's getIndex().
+ * <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
+ * <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
+ * the less-than-or-equal-to sign (U+2264).
+ * <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
+ * The optional numeric Part between each (ARG_SELECTOR, message) pair
+ * is the value of an explicit-number selector like "=2",
+ * otherwise the selector is a non-numeric identifier.
+ * <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
+ * </ul>
+ * <p>
+ * This class is not intended for public subclassing.
+ *
+ * @stable ICU 4.8
+ */
+class U_COMMON_API MessagePattern : public UObject {
+public:
+ /**
+ * Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @stable ICU 4.8
+ */
+ MessagePattern(UErrorCode &errorCode);
+
+ /**
+ * Constructs an empty MessagePattern.
+ * @param mode Explicit UMessagePatternApostropheMode.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @stable ICU 4.8
+ */
+ MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
+
+ /**
+ * Constructs a MessagePattern with default UMessagePatternApostropheMode and
+ * parses the MessageFormat pattern string.
+ * @param pattern a MessageFormat pattern string
+ * @param parseError Struct to receive information on the position
+ * of an error within the pattern.
+ * Can be NULL.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * TODO: turn @throws into UErrorCode specifics?
+ * @throws IllegalArgumentException for syntax errors in the pattern string
+ * @throws IndexOutOfBoundsException if certain limits are exceeded
+ * (e.g., argument number too high, argument name too long, etc.)
+ * @throws NumberFormatException if a number could not be parsed
+ * @stable ICU 4.8
+ */
+ MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
+
+ /**
+ * Copy constructor.
+ * @param other Object to copy.
+ * @stable ICU 4.8
+ */
+ MessagePattern(const MessagePattern &other);
+
+ /**
+ * Assignment operator.
+ * @param other Object to copy.
+ * @return *this=other
+ * @stable ICU 4.8
+ */
+ MessagePattern &operator=(const MessagePattern &other);
+
+ /**
+ * Destructor.
+ * @stable ICU 4.8
+ */
+ virtual ~MessagePattern();
+
+ /**
+ * Parses a MessageFormat pattern string.
+ * @param pattern a MessageFormat pattern string
+ * @param parseError Struct to receive information on the position
+ * of an error within the pattern.
+ * Can be NULL.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return *this
+ * @throws IllegalArgumentException for syntax errors in the pattern string
+ * @throws IndexOutOfBoundsException if certain limits are exceeded
+ * (e.g., argument number too high, argument name too long, etc.)
+ * @throws NumberFormatException if a number could not be parsed
+ * @stable ICU 4.8
+ */
+ MessagePattern &parse(const UnicodeString &pattern,
+ UParseError *parseError, UErrorCode &errorCode);
+
+ /**
+ * Parses a ChoiceFormat pattern string.
+ * @param pattern a ChoiceFormat pattern string
+ * @param parseError Struct to receive information on the position
+ * of an error within the pattern.
+ * Can be NULL.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return *this
+ * @throws IllegalArgumentException for syntax errors in the pattern string
+ * @throws IndexOutOfBoundsException if certain limits are exceeded
+ * (e.g., argument number too high, argument name too long, etc.)
+ * @throws NumberFormatException if a number could not be parsed
+ * @stable ICU 4.8
+ */
+ MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
+ UParseError *parseError, UErrorCode &errorCode);
+
+ /**
+ * Parses a PluralFormat pattern string.
+ * @param pattern a PluralFormat pattern string
+ * @param parseError Struct to receive information on the position
+ * of an error within the pattern.
+ * Can be NULL.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return *this
+ * @throws IllegalArgumentException for syntax errors in the pattern string
+ * @throws IndexOutOfBoundsException if certain limits are exceeded
+ * (e.g., argument number too high, argument name too long, etc.)
+ * @throws NumberFormatException if a number could not be parsed
+ * @stable ICU 4.8
+ */
+ MessagePattern &parsePluralStyle(const UnicodeString &pattern,
+ UParseError *parseError, UErrorCode &errorCode);
+
+ /**
+ * Parses a SelectFormat pattern string.
+ * @param pattern a SelectFormat pattern string
+ * @param parseError Struct to receive information on the position
+ * of an error within the pattern.
+ * Can be NULL.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return *this
+ * @throws IllegalArgumentException for syntax errors in the pattern string
+ * @throws IndexOutOfBoundsException if certain limits are exceeded
+ * (e.g., argument number too high, argument name too long, etc.)
+ * @throws NumberFormatException if a number could not be parsed
+ * @stable ICU 4.8
+ */
+ MessagePattern &parseSelectStyle(const UnicodeString &pattern,
+ UParseError *parseError, UErrorCode &errorCode);
+
+ /**
+ * Clears this MessagePattern.
+ * countParts() will return 0.
+ * @stable ICU 4.8
+ */
+ void clear();
+
+ /**
+ * Clears this MessagePattern and sets the UMessagePatternApostropheMode.
+ * countParts() will return 0.
+ * @param mode The new UMessagePatternApostropheMode.
+ * @stable ICU 4.8
+ */
+ void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
+ clear();
+ aposMode=mode;
+ }
+
+ /**
+ * @param other another object to compare with.
+ * @return true if this object is equivalent to the other one.
+ * @stable ICU 4.8
+ */
+ UBool operator==(const MessagePattern &other) const;
+
+ /**
+ * @param other another object to compare with.
+ * @return false if this object is equivalent to the other one.
+ * @stable ICU 4.8
+ */
+ inline UBool operator!=(const MessagePattern &other) const {
+ return !operator==(other);
+ }
+
+ /**
+ * @return A hash code for this object.
+ * @stable ICU 4.8
+ */
+ int32_t hashCode() const;
+
+ /**
+ * @return this instance's UMessagePatternApostropheMode.
+ * @stable ICU 4.8
+ */
+ UMessagePatternApostropheMode getApostropheMode() const {
+ return aposMode;
+ }
+
+ // Java has package-private jdkAposMode() here.
+ // In C++, this is declared in the MessageImpl class.
+
+ /**
+ * @return the parsed pattern string (null if none was parsed).
+ * @stable ICU 4.8
+ */
+ const UnicodeString &getPatternString() const {
+ return msg;
+ }
+
+ /**
+ * Does the parsed pattern have named arguments like {first_name}?
+ * @return true if the parsed pattern has at least one named argument.
+ * @stable ICU 4.8
+ */
+ UBool hasNamedArguments() const {
+ return hasArgNames;
+ }
+
+ /**
+ * Does the parsed pattern have numbered arguments like {2}?
+ * @return true if the parsed pattern has at least one numbered argument.
+ * @stable ICU 4.8
+ */
+ UBool hasNumberedArguments() const {
+ return hasArgNumbers;
+ }
+
+ /**
+ * Validates and parses an argument name or argument number string.
+ * An argument name must be a "pattern identifier", that is, it must contain
+ * no Unicode Pattern_Syntax or Pattern_White_Space characters.
+ * If it only contains ASCII digits, then it must be a small integer with no leading zero.
+ * @param name Input string.
+ * @return &gt;=0 if the name is a valid number,
+ * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
+ * ARG_NAME_NOT_VALID (-2) if it is neither.
+ * @stable ICU 4.8
+ */
+ static int32_t validateArgumentName(const UnicodeString &name);
+
+ /**
+ * Returns a version of the parsed pattern string where each ASCII apostrophe
+ * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
+ * <p>
+ * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
+ * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
+ * @return the deep-auto-quoted version of the parsed pattern string.
+ * @see MessageFormat.autoQuoteApostrophe()
+ * @stable ICU 4.8
+ */
+ UnicodeString autoQuoteApostropheDeep() const;
+
+ class Part;
+
+ /**
+ * Returns the number of "parts" created by parsing the pattern string.
+ * Returns 0 if no pattern has been parsed or clear() was called.
+ * @return the number of pattern parts.
+ * @stable ICU 4.8
+ */
+ int32_t countParts() const {
+ return partsLength;
+ }
+
+ /**
+ * Gets the i-th pattern "part".
+ * @param i The index of the Part data. (0..countParts()-1)
+ * @return the i-th pattern "part".
+ * @stable ICU 4.8
+ */
+ const Part &getPart(int32_t i) const {
+ return parts[i];
+ }
+
+ /**
+ * Returns the UMessagePatternPartType of the i-th pattern "part".
+ * Convenience method for getPart(i).getType().
+ * @param i The index of the Part data. (0..countParts()-1)
+ * @return The UMessagePatternPartType of the i-th Part.
+ * @stable ICU 4.8
+ */
+ UMessagePatternPartType getPartType(int32_t i) const {
+ return getPart(i).type;
+ }
+
+ /**
+ * Returns the pattern index of the specified pattern "part".
+ * Convenience method for getPart(partIndex).getIndex().
+ * @param partIndex The index of the Part data. (0..countParts()-1)
+ * @return The pattern index of this Part.
+ * @stable ICU 4.8
+ */
+ int32_t getPatternIndex(int32_t partIndex) const {
+ return getPart(partIndex).index;
+ }
+
+ /**
+ * Returns the substring of the pattern string indicated by the Part.
+ * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
+ * @param part a part of this MessagePattern.
+ * @return the substring associated with part.
+ * @stable ICU 4.8
+ */
+ UnicodeString getSubstring(const Part &part) const {
+ return msg.tempSubString(part.index, part.length);
+ }
+
+ /**
+ * Compares the part's substring with the input string s.
+ * @param part a part of this MessagePattern.
+ * @param s a string.
+ * @return true if getSubstring(part).equals(s).
+ * @stable ICU 4.8
+ */
+ UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
+ return 0==msg.compare(part.index, part.length, s);
+ }
+
+ /**
+ * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
+ * @param part a part of this MessagePattern.
+ * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part.
+ * @stable ICU 4.8
+ */
+ double getNumericValue(const Part &part) const;
+
+ /**
+ * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
+ * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
+ * @return the "offset:" value.
+ * @stable ICU 4.8
+ */
+ double getPluralOffset(int32_t pluralStart) const;
+
+ /**
+ * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
+ * @param start The index of some Part data (0..countParts()-1);
+ * this Part should be of Type ARG_START or MSG_START.
+ * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
+ * or start itself if getPartType(msgStart)!=ARG|MSG_START.
+ * @stable ICU 4.8
+ */
+ int32_t getLimitPartIndex(int32_t start) const {
+ int32_t limit=getPart(start).limitPartIndex;
+ if(limit<start) {
+ return start;
+ }
+ return limit;
+ }
+
+ /**
+ * A message pattern "part", representing a pattern parsing event.
+ * There is a part for the start and end of a message or argument,
+ * for quoting and escaping of and with ASCII apostrophes,
+ * and for syntax elements of "complex" arguments.
+ * @stable ICU 4.8
+ */
+ class Part : public UMemory {
+ public:
+ /**
+ * Default constructor, do not use.
+ * @internal
+ */
+ Part() {}
+
+ /**
+ * Returns the type of this part.
+ * @return the part type.
+ * @stable ICU 4.8
+ */
+ UMessagePatternPartType getType() const {
+ return type;
+ }
+
+ /**
+ * Returns the pattern string index associated with this Part.
+ * @return this part's pattern string index.
+ * @stable ICU 4.8
+ */
+ int32_t getIndex() const {
+ return index;
+ }
+
+ /**
+ * Returns the length of the pattern substring associated with this Part.
+ * This is 0 for some parts.
+ * @return this part's pattern substring length.
+ * @stable ICU 4.8
+ */
+ int32_t getLength() const {
+ return length;
+ }
+
+ /**
+ * Returns the pattern string limit (exclusive-end) index associated with this Part.
+ * Convenience method for getIndex()+getLength().
+ * @return this part's pattern string limit index, same as getIndex()+getLength().
+ * @stable ICU 4.8
+ */
+ int32_t getLimit() const {
+ return index+length;
+ }
+
+ /**
+ * Returns a value associated with this part.
+ * See the documentation of each part type for details.
+ * @return the part value.
+ * @stable ICU 4.8
+ */
+ int32_t getValue() const {
+ return value;
+ }
+
+ /**
+ * Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
+ * otherwise UMSGPAT_ARG_TYPE_NONE.
+ * @return the argument type for this part.
+ * @stable ICU 4.8
+ */
+ UMessagePatternArgType getArgType() const {
+ UMessagePatternPartType msgType=getType();
+ if(msgType ==UMSGPAT_PART_TYPE_ARG_START || msgType ==UMSGPAT_PART_TYPE_ARG_LIMIT) {
+ return (UMessagePatternArgType)value;
+ } else {
+ return UMSGPAT_ARG_TYPE_NONE;
+ }
+ }
+
+ /**
+ * Indicates whether the Part type has a numeric value.
+ * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue().
+ * @param type The Part type to be tested.
+ * @return true if the Part type has a numeric value.
+ * @stable ICU 4.8
+ */
+ static UBool hasNumericValue(UMessagePatternPartType type) {
+ return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
+ }
+
+ /**
+ * @param other another object to compare with.
+ * @return true if this object is equivalent to the other one.
+ * @stable ICU 4.8
+ */
+ UBool operator==(const Part &other) const;
+
+ /**
+ * @param other another object to compare with.
+ * @return false if this object is equivalent to the other one.
+ * @stable ICU 4.8
+ */
+ inline UBool operator!=(const Part &other) const {
+ return !operator==(other);
+ }
+
+ /**
+ * @return A hash code for this object.
+ * @stable ICU 4.8
+ */
+ int32_t hashCode() const {
+ return ((type*37+index)*37+length)*37+value;
+ }
+
+ private:
+ friend class MessagePattern;
+
+ static const int32_t MAX_LENGTH=0xffff;
+ static const int32_t MAX_VALUE=0x7fff;
+
+ // Some fields are not final because they are modified during pattern parsing.
+ // After pattern parsing, the parts are effectively immutable.
+ UMessagePatternPartType type;
+ int32_t index;
+ uint16_t length;
+ int16_t value;
+ int32_t limitPartIndex;
+ };
+
+private:
+ void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
+
+ void postParse();
+
+ int32_t parseMessage(int32_t index, int32_t msgStartLength,
+ int32_t nestingLevel, UMessagePatternArgType parentType,
+ UParseError *parseError, UErrorCode &errorCode);
+
+ int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
+ UParseError *parseError, UErrorCode &errorCode);
+
+ int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
+
+ int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
+ UParseError *parseError, UErrorCode &errorCode);
+
+ int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
+ UParseError *parseError, UErrorCode &errorCode);
+
+ /**
+ * Validates and parses an argument name or argument number string.
+ * This internal method assumes that the input substring is a "pattern identifier".
+ * @return &gt;=0 if the name is a valid number,
+ * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
+ * ARG_NAME_NOT_VALID (-2) if it is neither.
+ * @see #validateArgumentName(String)
+ */
+ static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
+
+ int32_t parseArgNumber(int32_t start, int32_t limit) {
+ return parseArgNumber(msg, start, limit);
+ }
+
+ /**
+ * Parses a number from the specified message substring.
+ * @param start start index into the message string
+ * @param limit limit index into the message string, must be start<limit
+ * @param allowInfinity true if U+221E is allowed (for ChoiceFormat)
+ * @param parseError
+ * @param errorCode
+ */
+ void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
+ UParseError *parseError, UErrorCode &errorCode);
+
+ // Java has package-private appendReducedApostrophes() here.
+ // In C++, this is declared in the MessageImpl class.
+
+ int32_t skipWhiteSpace(int32_t index);
+
+ int32_t skipIdentifier(int32_t index);
+
+ /**
+ * Skips a sequence of characters that could occur in a double value.
+ * Does not fully parse or validate the value.
+ */
+ int32_t skipDouble(int32_t index);
+
+ static UBool isArgTypeChar(UChar32 c);
+
+ UBool isChoice(int32_t index);
+
+ UBool isPlural(int32_t index);
+
+ UBool isSelect(int32_t index);
+
+ UBool isOrdinal(int32_t index);
+
+ /**
+ * @return true if we are inside a MessageFormat (sub-)pattern,
+ * as opposed to inside a top-level choice/plural/select pattern.
+ */
+ UBool inMessageFormatPattern(int32_t nestingLevel);
+
+ /**
+ * @return true if we are in a MessageFormat sub-pattern
+ * of a top-level ChoiceFormat pattern.
+ */
+ UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
+
+ void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
+ int32_t value, UErrorCode &errorCode);
+
+ void addLimitPart(int32_t start,
+ UMessagePatternPartType type, int32_t index, int32_t length,
+ int32_t value, UErrorCode &errorCode);
+
+ void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
+
+ void setParseError(UParseError *parseError, int32_t index);
+
+ UBool init(UErrorCode &errorCode);
+ UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
+
+ UMessagePatternApostropheMode aposMode;
+ UnicodeString msg;
+ // ArrayList<Part> parts=new ArrayList<Part>();
+ MessagePatternPartsList *partsList;
+ Part *parts;
+ int32_t partsLength;
+ // ArrayList<Double> numericValues;
+ MessagePatternDoubleList *numericValuesList;
+ double *numericValues;
+ int32_t numericValuesLength;
+ UBool hasArgNames;
+ UBool hasArgNumbers;
+ UBool needsAutoQuoting;
+};
+
+U_NAMESPACE_END
+
+#endif // !UCONFIG_NO_FORMATTING
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __MESSAGEPATTERN_H__
diff --git a/thirdparty/icu4c/common/unicode/normalizer2.h b/thirdparty/icu4c/common/unicode/normalizer2.h
new file mode 100644
index 0000000000..5eb1d95caf
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/normalizer2.h
@@ -0,0 +1,779 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2009-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: normalizer2.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2009nov22
+* created by: Markus W. Scherer
+*/
+
+#ifndef __NORMALIZER2_H__
+#define __NORMALIZER2_H__
+
+/**
+ * \file
+ * \brief C++ API: New API for Unicode Normalization.
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/stringpiece.h"
+#include "unicode/uniset.h"
+#include "unicode/unistr.h"
+#include "unicode/unorm2.h"
+
+U_NAMESPACE_BEGIN
+
+class ByteSink;
+
+/**
+ * Unicode normalization functionality for standard Unicode normalization or
+ * for using custom mapping tables.
+ * All instances of this class are unmodifiable/immutable.
+ * Instances returned by getInstance() are singletons that must not be deleted by the caller.
+ * The Normalizer2 class is not intended for public subclassing.
+ *
+ * The primary functions are to produce a normalized string and to detect whether
+ * a string is already normalized.
+ * The most commonly used normalization forms are those defined in
+ * http://www.unicode.org/unicode/reports/tr15/
+ * However, this API supports additional normalization forms for specialized purposes.
+ * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE)
+ * and can be used in implementations of UTS #46.
+ *
+ * Not only are the standard compose and decompose modes supplied,
+ * but additional modes are provided as documented in the Mode enum.
+ *
+ * Some of the functions in this class identify normalization boundaries.
+ * At a normalization boundary, the portions of the string
+ * before it and starting from it do not interact and can be handled independently.
+ *
+ * The spanQuickCheckYes() stops at a normalization boundary.
+ * When the goal is a normalized string, then the text before the boundary
+ * can be copied, and the remainder can be processed with normalizeSecondAndAppend().
+ *
+ * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether
+ * a character is guaranteed to be at a normalization boundary,
+ * regardless of context.
+ * This is used for moving from one normalization boundary to the next
+ * or preceding boundary, and for performing iterative normalization.
+ *
+ * Iterative normalization is useful when only a small portion of a
+ * longer string needs to be processed.
+ * For example, in ICU, iterative normalization is used by the NormalizationTransliterator
+ * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart()
+ * (to process only the substring for which sort key bytes are computed).
+ *
+ * The set of normalization boundaries returned by these functions may not be
+ * complete: There may be more boundaries that could be returned.
+ * Different functions may return different boundaries.
+ * @stable ICU 4.4
+ */
+class U_COMMON_API Normalizer2 : public UObject {
+public:
+ /**
+ * Destructor.
+ * @stable ICU 4.4
+ */
+ ~Normalizer2();
+
+ /**
+ * Returns a Normalizer2 instance for Unicode NFC normalization.
+ * Same as getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+ static const Normalizer2 *
+ getNFCInstance(UErrorCode &errorCode);
+
+ /**
+ * Returns a Normalizer2 instance for Unicode NFD normalization.
+ * Same as getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+ static const Normalizer2 *
+ getNFDInstance(UErrorCode &errorCode);
+
+ /**
+ * Returns a Normalizer2 instance for Unicode NFKC normalization.
+ * Same as getInstance(NULL, "nfkc", UNORM2_COMPOSE, errorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+ static const Normalizer2 *
+ getNFKCInstance(UErrorCode &errorCode);
+
+ /**
+ * Returns a Normalizer2 instance for Unicode NFKD normalization.
+ * Same as getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, errorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+ static const Normalizer2 *
+ getNFKDInstance(UErrorCode &errorCode);
+
+ /**
+ * Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization.
+ * Same as getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, errorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+ static const Normalizer2 *
+ getNFKCCasefoldInstance(UErrorCode &errorCode);
+
+ /**
+ * Returns a Normalizer2 instance which uses the specified data file
+ * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
+ * and which composes or decomposes text according to the specified mode.
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ *
+ * Use packageName=NULL for data files that are part of ICU's own data.
+ * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
+ * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
+ * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
+ *
+ * @param packageName NULL for ICU built-in data, otherwise application data package name
+ * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
+ * @param mode normalization mode (compose or decompose etc.)
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 4.4
+ */
+ static const Normalizer2 *
+ getInstance(const char *packageName,
+ const char *name,
+ UNormalization2Mode mode,
+ UErrorCode &errorCode);
+
+ /**
+ * Returns the normalized form of the source string.
+ * @param src source string
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return normalized src
+ * @stable ICU 4.4
+ */
+ UnicodeString
+ normalize(const UnicodeString &src, UErrorCode &errorCode) const {
+ UnicodeString result;
+ normalize(src, result, errorCode);
+ return result;
+ }
+ /**
+ * Writes the normalized form of the source string to the destination string
+ * (replacing its contents) and returns the destination string.
+ * The source and destination strings must be different objects.
+ * @param src source string
+ * @param dest destination string; its contents is replaced with normalized src
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return dest
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString &
+ normalize(const UnicodeString &src,
+ UnicodeString &dest,
+ UErrorCode &errorCode) const = 0;
+
+ /**
+ * Normalizes a UTF-8 string and optionally records how source substrings
+ * relate to changed and unchanged result substrings.
+ *
+ * Currently implemented completely only for "compose" modes,
+ * such as for NFC, NFKC, and NFKC_Casefold
+ * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
+ * Otherwise currently converts to & from UTF-16 and does not support edits.
+ *
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
+ * @param src Source UTF-8 string.
+ * @param sink A ByteSink to which the normalized UTF-8 result string is written.
+ * sink.Flush() is called at the end.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be nullptr.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @stable ICU 60
+ */
+ virtual void
+ normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
+ Edits *edits, UErrorCode &errorCode) const;
+
+ /**
+ * Appends the normalized form of the second string to the first string
+ * (merging them at the boundary) and returns the first string.
+ * The result is normalized if the first string was normalized.
+ * The first and second strings must be different objects.
+ * @param first string, should be normalized
+ * @param second string, will be normalized
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return first
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString &
+ normalizeSecondAndAppend(UnicodeString &first,
+ const UnicodeString &second,
+ UErrorCode &errorCode) const = 0;
+ /**
+ * Appends the second string to the first string
+ * (merging them at the boundary) and returns the first string.
+ * The result is normalized if both the strings were normalized.
+ * The first and second strings must be different objects.
+ * @param first string, should be normalized
+ * @param second string, should be normalized
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return first
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString &
+ append(UnicodeString &first,
+ const UnicodeString &second,
+ UErrorCode &errorCode) const = 0;
+
+ /**
+ * Gets the decomposition mapping of c.
+ * Roughly equivalent to normalizing the String form of c
+ * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster, and except that this function
+ * returns false and does not write a string
+ * if c does not have a decomposition mapping in this instance's data.
+ * This function is independent of the mode of the Normalizer2.
+ * @param c code point
+ * @param decomposition String object which will be set to c's
+ * decomposition mapping, if there is one.
+ * @return true if c has a decomposition, otherwise false
+ * @stable ICU 4.6
+ */
+ virtual UBool
+ getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
+
+ /**
+ * Gets the raw decomposition mapping of c.
+ *
+ * This is similar to the getDecomposition() method but returns the
+ * raw decomposition mapping as specified in UnicodeData.txt or
+ * (for custom data) in the mapping files processed by the gennorm2 tool.
+ * By contrast, getDecomposition() returns the processed,
+ * recursively-decomposed version of this mapping.
+ *
+ * When used on a standard NFKC Normalizer2 instance,
+ * getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
+ *
+ * When used on a standard NFC Normalizer2 instance,
+ * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
+ * in this case, the result contains either one or two code points (=1..4 char16_ts).
+ *
+ * This function is independent of the mode of the Normalizer2.
+ * The default implementation returns false.
+ * @param c code point
+ * @param decomposition String object which will be set to c's
+ * raw decomposition mapping, if there is one.
+ * @return true if c has a decomposition, otherwise false
+ * @stable ICU 49
+ */
+ virtual UBool
+ getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
+
+ /**
+ * Performs pairwise composition of a & b and returns the composite if there is one.
+ *
+ * Returns a composite code point c only if c has a two-way mapping to a+b.
+ * In standard Unicode normalization, this means that
+ * c has a canonical decomposition to a+b
+ * and c does not have the Full_Composition_Exclusion property.
+ *
+ * This function is independent of the mode of the Normalizer2.
+ * The default implementation returns a negative value.
+ * @param a A (normalization starter) code point.
+ * @param b Another code point.
+ * @return The non-negative composite code point if there is one; otherwise a negative value.
+ * @stable ICU 49
+ */
+ virtual UChar32
+ composePair(UChar32 a, UChar32 b) const;
+
+ /**
+ * Gets the combining class of c.
+ * The default implementation returns 0
+ * but all standard implementations return the Unicode Canonical_Combining_Class value.
+ * @param c code point
+ * @return c's combining class
+ * @stable ICU 49
+ */
+ virtual uint8_t
+ getCombiningClass(UChar32 c) const;
+
+ /**
+ * Tests if the string is normalized.
+ * Internally, in cases where the quickCheck() method would return "maybe"
+ * (which is only possible for the two COMPOSE modes) this method
+ * resolves to "yes" or "no" to provide a definitive result,
+ * at the cost of doing more work in those cases.
+ * @param s input string
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return true if s is normalized
+ * @stable ICU 4.4
+ */
+ virtual UBool
+ isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
+ /**
+ * Tests if the UTF-8 string is normalized.
+ * Internally, in cases where the quickCheck() method would return "maybe"
+ * (which is only possible for the two COMPOSE modes) this method
+ * resolves to "yes" or "no" to provide a definitive result,
+ * at the cost of doing more work in those cases.
+ *
+ * This works for all normalization modes,
+ * but it is currently optimized for UTF-8 only for "compose" modes,
+ * such as for NFC, NFKC, and NFKC_Casefold
+ * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
+ * For other modes it currently converts to UTF-16 and calls isNormalized().
+ *
+ * @param s UTF-8 input string
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return true if s is normalized
+ * @stable ICU 60
+ */
+ virtual UBool
+ isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const;
+
+
+ /**
+ * Tests if the string is normalized.
+ * For the two COMPOSE modes, the result could be "maybe" in cases that
+ * would take a little more work to resolve definitively.
+ * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
+ * combination of quick check + normalization, to avoid
+ * re-checking the "yes" prefix.
+ * @param s input string
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return UNormalizationCheckResult
+ * @stable ICU 4.4
+ */
+ virtual UNormalizationCheckResult
+ quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
+
+ /**
+ * Returns the end of the normalized substring of the input string.
+ * In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
+ * the substring <code>UnicodeString(s, 0, end)</code>
+ * will pass the quick check with a "yes" result.
+ *
+ * The returned end index is usually one or more characters before the
+ * "no" or "maybe" character: The end index is at a normalization boundary.
+ * (See the class documentation for more about normalization boundaries.)
+ *
+ * When the goal is a normalized string and most input strings are expected
+ * to be normalized already, then call this method,
+ * and if it returns a prefix shorter than the input string,
+ * copy that prefix and use normalizeSecondAndAppend() for the remainder.
+ * @param s input string
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return "yes" span end index
+ * @stable ICU 4.4
+ */
+ virtual int32_t
+ spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
+
+ /**
+ * Tests if the character always has a normalization boundary before it,
+ * regardless of context.
+ * If true, then the character does not normalization-interact with
+ * preceding characters.
+ * In other words, a string containing this character can be normalized
+ * by processing portions before this character and starting from this
+ * character independently.
+ * This is used for iterative normalization. See the class documentation for details.
+ * @param c character to test
+ * @return true if c has a normalization boundary before it
+ * @stable ICU 4.4
+ */
+ virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
+
+ /**
+ * Tests if the character always has a normalization boundary after it,
+ * regardless of context.
+ * If true, then the character does not normalization-interact with
+ * following characters.
+ * In other words, a string containing this character can be normalized
+ * by processing portions up to this character and after this
+ * character independently.
+ * This is used for iterative normalization. See the class documentation for details.
+ * Note that this operation may be significantly slower than hasBoundaryBefore().
+ * @param c character to test
+ * @return true if c has a normalization boundary after it
+ * @stable ICU 4.4
+ */
+ virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
+
+ /**
+ * Tests if the character is normalization-inert.
+ * If true, then the character does not change, nor normalization-interact with
+ * preceding or following characters.
+ * In other words, a string containing this character can be normalized
+ * by processing portions before this character and after this
+ * character independently.
+ * This is used for iterative normalization. See the class documentation for details.
+ * Note that this operation may be significantly slower than hasBoundaryBefore().
+ * @param c character to test
+ * @return true if c is normalization-inert
+ * @stable ICU 4.4
+ */
+ virtual UBool isInert(UChar32 c) const = 0;
+};
+
+/**
+ * Normalization filtered by a UnicodeSet.
+ * Normalizes portions of the text contained in the filter set and leaves
+ * portions not contained in the filter set unchanged.
+ * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE).
+ * Not-in-the-filter text is treated as "is normalized" and "quick check yes".
+ * This class implements all of (and only) the Normalizer2 API.
+ * An instance of this class is unmodifiable/immutable but is constructed and
+ * must be destructed by the owner.
+ * @stable ICU 4.4
+ */
+class U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
+public:
+ /**
+ * Constructs a filtered normalizer wrapping any Normalizer2 instance
+ * and a filter set.
+ * Both are aliased and must not be modified or deleted while this object
+ * is used.
+ * The filter set should be frozen; otherwise the performance will suffer greatly.
+ * @param n2 wrapped Normalizer2 instance
+ * @param filterSet UnicodeSet which determines the characters to be normalized
+ * @stable ICU 4.4
+ */
+ FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
+ norm2(n2), set(filterSet) {}
+
+ /**
+ * Destructor.
+ * @stable ICU 4.4
+ */
+ ~FilteredNormalizer2();
+
+ /**
+ * Writes the normalized form of the source string to the destination string
+ * (replacing its contents) and returns the destination string.
+ * The source and destination strings must be different objects.
+ * @param src source string
+ * @param dest destination string; its contents is replaced with normalized src
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return dest
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString &
+ normalize(const UnicodeString &src,
+ UnicodeString &dest,
+ UErrorCode &errorCode) const U_OVERRIDE;
+
+ /**
+ * Normalizes a UTF-8 string and optionally records how source substrings
+ * relate to changed and unchanged result substrings.
+ *
+ * Currently implemented completely only for "compose" modes,
+ * such as for NFC, NFKC, and NFKC_Casefold
+ * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
+ * Otherwise currently converts to & from UTF-16 and does not support edits.
+ *
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
+ * @param src Source UTF-8 string.
+ * @param sink A ByteSink to which the normalized UTF-8 result string is written.
+ * sink.Flush() is called at the end.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be nullptr.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @stable ICU 60
+ */
+ virtual void
+ normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
+ Edits *edits, UErrorCode &errorCode) const U_OVERRIDE;
+
+ /**
+ * Appends the normalized form of the second string to the first string
+ * (merging them at the boundary) and returns the first string.
+ * The result is normalized if the first string was normalized.
+ * The first and second strings must be different objects.
+ * @param first string, should be normalized
+ * @param second string, will be normalized
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return first
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString &
+ normalizeSecondAndAppend(UnicodeString &first,
+ const UnicodeString &second,
+ UErrorCode &errorCode) const U_OVERRIDE;
+ /**
+ * Appends the second string to the first string
+ * (merging them at the boundary) and returns the first string.
+ * The result is normalized if both the strings were normalized.
+ * The first and second strings must be different objects.
+ * @param first string, should be normalized
+ * @param second string, should be normalized
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return first
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString &
+ append(UnicodeString &first,
+ const UnicodeString &second,
+ UErrorCode &errorCode) const U_OVERRIDE;
+
+ /**
+ * Gets the decomposition mapping of c.
+ * For details see the base class documentation.
+ *
+ * This function is independent of the mode of the Normalizer2.
+ * @param c code point
+ * @param decomposition String object which will be set to c's
+ * decomposition mapping, if there is one.
+ * @return true if c has a decomposition, otherwise false
+ * @stable ICU 4.6
+ */
+ virtual UBool
+ getDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE;
+
+ /**
+ * Gets the raw decomposition mapping of c.
+ * For details see the base class documentation.
+ *
+ * This function is independent of the mode of the Normalizer2.
+ * @param c code point
+ * @param decomposition String object which will be set to c's
+ * raw decomposition mapping, if there is one.
+ * @return true if c has a decomposition, otherwise false
+ * @stable ICU 49
+ */
+ virtual UBool
+ getRawDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE;
+
+ /**
+ * Performs pairwise composition of a & b and returns the composite if there is one.
+ * For details see the base class documentation.
+ *
+ * This function is independent of the mode of the Normalizer2.
+ * @param a A (normalization starter) code point.
+ * @param b Another code point.
+ * @return The non-negative composite code point if there is one; otherwise a negative value.
+ * @stable ICU 49
+ */
+ virtual UChar32
+ composePair(UChar32 a, UChar32 b) const U_OVERRIDE;
+
+ /**
+ * Gets the combining class of c.
+ * The default implementation returns 0
+ * but all standard implementations return the Unicode Canonical_Combining_Class value.
+ * @param c code point
+ * @return c's combining class
+ * @stable ICU 49
+ */
+ virtual uint8_t
+ getCombiningClass(UChar32 c) const U_OVERRIDE;
+
+ /**
+ * Tests if the string is normalized.
+ * For details see the Normalizer2 base class documentation.
+ * @param s input string
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return true if s is normalized
+ * @stable ICU 4.4
+ */
+ virtual UBool
+ isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
+ /**
+ * Tests if the UTF-8 string is normalized.
+ * Internally, in cases where the quickCheck() method would return "maybe"
+ * (which is only possible for the two COMPOSE modes) this method
+ * resolves to "yes" or "no" to provide a definitive result,
+ * at the cost of doing more work in those cases.
+ *
+ * This works for all normalization modes,
+ * but it is currently optimized for UTF-8 only for "compose" modes,
+ * such as for NFC, NFKC, and NFKC_Casefold
+ * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
+ * For other modes it currently converts to UTF-16 and calls isNormalized().
+ *
+ * @param s UTF-8 input string
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return true if s is normalized
+ * @stable ICU 60
+ */
+ virtual UBool
+ isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const U_OVERRIDE;
+ /**
+ * Tests if the string is normalized.
+ * For details see the Normalizer2 base class documentation.
+ * @param s input string
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return UNormalizationCheckResult
+ * @stable ICU 4.4
+ */
+ virtual UNormalizationCheckResult
+ quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
+ /**
+ * Returns the end of the normalized substring of the input string.
+ * For details see the Normalizer2 base class documentation.
+ * @param s input string
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return "yes" span end index
+ * @stable ICU 4.4
+ */
+ virtual int32_t
+ spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
+
+ /**
+ * Tests if the character always has a normalization boundary before it,
+ * regardless of context.
+ * For details see the Normalizer2 base class documentation.
+ * @param c character to test
+ * @return true if c has a normalization boundary before it
+ * @stable ICU 4.4
+ */
+ virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE;
+
+ /**
+ * Tests if the character always has a normalization boundary after it,
+ * regardless of context.
+ * For details see the Normalizer2 base class documentation.
+ * @param c character to test
+ * @return true if c has a normalization boundary after it
+ * @stable ICU 4.4
+ */
+ virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE;
+
+ /**
+ * Tests if the character is normalization-inert.
+ * For details see the Normalizer2 base class documentation.
+ * @param c character to test
+ * @return true if c is normalization-inert
+ * @stable ICU 4.4
+ */
+ virtual UBool isInert(UChar32 c) const U_OVERRIDE;
+private:
+ UnicodeString &
+ normalize(const UnicodeString &src,
+ UnicodeString &dest,
+ USetSpanCondition spanCondition,
+ UErrorCode &errorCode) const;
+
+ void
+ normalizeUTF8(uint32_t options, const char *src, int32_t length,
+ ByteSink &sink, Edits *edits,
+ USetSpanCondition spanCondition,
+ UErrorCode &errorCode) const;
+
+ UnicodeString &
+ normalizeSecondAndAppend(UnicodeString &first,
+ const UnicodeString &second,
+ UBool doNormalize,
+ UErrorCode &errorCode) const;
+
+ const Normalizer2 &norm2;
+ const UnicodeSet &set;
+};
+
+U_NAMESPACE_END
+
+#endif // !UCONFIG_NO_NORMALIZATION
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __NORMALIZER2_H__
diff --git a/thirdparty/icu4c/common/unicode/normlzr.h b/thirdparty/icu4c/common/unicode/normlzr.h
new file mode 100644
index 0000000000..3352983cdc
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/normlzr.h
@@ -0,0 +1,816 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ ********************************************************************
+ * COPYRIGHT:
+ * Copyright (c) 1996-2015, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ ********************************************************************
+ */
+
+#ifndef NORMLZR_H
+#define NORMLZR_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+/**
+ * \file
+ * \brief C++ API: Unicode Normalization
+ */
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/chariter.h"
+#include "unicode/normalizer2.h"
+#include "unicode/unistr.h"
+#include "unicode/unorm.h"
+#include "unicode/uobject.h"
+
+U_NAMESPACE_BEGIN
+/**
+ * Old Unicode normalization API.
+ *
+ * This API has been replaced by the Normalizer2 class and is only available
+ * for backward compatibility. This class simply delegates to the Normalizer2 class.
+ * There is one exception: The new API does not provide a replacement for Normalizer::compare().
+ *
+ * The Normalizer class supports the standard normalization forms described in
+ * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
+ * Unicode Standard Annex #15: Unicode Normalization Forms</a>.
+ *
+ * The Normalizer class consists of two parts:
+ * - static functions that normalize strings or test if strings are normalized
+ * - a Normalizer object is an iterator that takes any kind of text and
+ * provides iteration over its normalized form
+ *
+ * The Normalizer class is not suitable for subclassing.
+ *
+ * For basic information about normalization forms and details about the C API
+ * please see the documentation in unorm.h.
+ *
+ * The iterator API with the Normalizer constructors and the non-static functions
+ * use a CharacterIterator as input. It is possible to pass a string which
+ * is then internally wrapped in a CharacterIterator.
+ * The input text is not normalized all at once, but incrementally where needed
+ * (providing efficient random access).
+ * This allows to pass in a large text but spend only a small amount of time
+ * normalizing a small part of that text.
+ * However, if the entire text is normalized, then the iterator will be
+ * slower than normalizing the entire text at once and iterating over the result.
+ * A possible use of the Normalizer iterator is also to report an index into the
+ * original text that is close to where the normalized characters come from.
+ *
+ * <em>Important:</em> The iterator API was cleaned up significantly for ICU 2.0.
+ * The earlier implementation reported the getIndex() inconsistently,
+ * and previous() could not be used after setIndex(), next(), first(), and current().
+ *
+ * Normalizer allows to start normalizing from anywhere in the input text by
+ * calling setIndexOnly(), first(), or last().
+ * Without calling any of these, the iterator will start at the beginning of the text.
+ *
+ * At any time, next() returns the next normalized code point (UChar32),
+ * with post-increment semantics (like CharacterIterator::next32PostInc()).
+ * previous() returns the previous normalized code point (UChar32),
+ * with pre-decrement semantics (like CharacterIterator::previous32()).
+ *
+ * current() returns the current code point
+ * (respectively the one at the newly set index) without moving
+ * the getIndex(). Note that if the text at the current position
+ * needs to be normalized, then these functions will do that.
+ * (This is why current() is not const.)
+ * It is more efficient to call setIndexOnly() instead, which does not
+ * normalize.
+ *
+ * getIndex() always refers to the position in the input text where the normalized
+ * code points are returned from. It does not always change with each returned
+ * code point.
+ * The code point that is returned from any of the functions
+ * corresponds to text at or after getIndex(), according to the
+ * function's iteration semantics (post-increment or pre-decrement).
+ *
+ * next() returns a code point from at or after the getIndex()
+ * from before the next() call. After the next() call, the getIndex()
+ * might have moved to where the next code point will be returned from
+ * (from a next() or current() call).
+ * This is semantically equivalent to array access with array[index++]
+ * (post-increment semantics).
+ *
+ * previous() returns a code point from at or after the getIndex()
+ * from after the previous() call.
+ * This is semantically equivalent to array access with array[--index]
+ * (pre-decrement semantics).
+ *
+ * Internally, the Normalizer iterator normalizes a small piece of text
+ * starting at the getIndex() and ending at a following "safe" index.
+ * The normalized results is stored in an internal string buffer, and
+ * the code points are iterated from there.
+ * With multiple iteration calls, this is repeated until the next piece
+ * of text needs to be normalized, and the getIndex() needs to be moved.
+ *
+ * The following "safe" index, the internal buffer, and the secondary
+ * iteration index into that buffer are not exposed on the API.
+ * This also means that it is currently not practical to return to
+ * a particular, arbitrary position in the text because one would need to
+ * know, and be able to set, in addition to the getIndex(), at least also the
+ * current index into the internal buffer.
+ * It is currently only possible to observe when getIndex() changes
+ * (with careful consideration of the iteration semantics),
+ * at which time the internal index will be 0.
+ * For example, if getIndex() is different after next() than before it,
+ * then the internal index is 0 and one can return to this getIndex()
+ * later with setIndexOnly().
+ *
+ * Note: While the setIndex() and getIndex() refer to indices in the
+ * underlying Unicode input text, the next() and previous() methods
+ * iterate through characters in the normalized output.
+ * This means that there is not necessarily a one-to-one correspondence
+ * between characters returned by next() and previous() and the indices
+ * passed to and returned from setIndex() and getIndex().
+ * It is for this reason that Normalizer does not implement the CharacterIterator interface.
+ *
+ * @author Laura Werner, Mark Davis, Markus Scherer
+ * @stable ICU 2.0
+ */
+class U_COMMON_API Normalizer : public UObject {
+public:
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * If DONE is returned from an iteration function that returns a code point,
+ * then there are no more normalization results available.
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ enum {
+ DONE=0xffff
+ };
+
+ // Constructors
+
+ /**
+ * Creates a new <code>Normalizer</code> object for iterating over the
+ * normalized form of a given string.
+ * <p>
+ * @param str The string to be normalized. The normalization
+ * will start at the beginning of the string.
+ *
+ * @param mode The normalization mode.
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ Normalizer(const UnicodeString& str, UNormalizationMode mode);
+
+ /**
+ * Creates a new <code>Normalizer</code> object for iterating over the
+ * normalized form of a given string.
+ * <p>
+ * @param str The string to be normalized. The normalization
+ * will start at the beginning of the string.
+ *
+ * @param length Length of the string, or -1 if NUL-terminated.
+ * @param mode The normalization mode.
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode);
+
+ /**
+ * Creates a new <code>Normalizer</code> object for iterating over the
+ * normalized form of the given text.
+ * <p>
+ * @param iter The input text to be normalized. The normalization
+ * will start at the beginning of the string.
+ *
+ * @param mode The normalization mode.
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ Normalizer(const CharacterIterator& iter, UNormalizationMode mode);
+#endif /* U_HIDE_DEPRECATED_API */
+
+#ifndef U_FORCE_HIDE_DEPRECATED_API
+ /**
+ * Copy constructor.
+ * @param copy The object to be copied.
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ Normalizer(const Normalizer& copy);
+
+ /**
+ * Destructor
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ virtual ~Normalizer();
+#endif // U_FORCE_HIDE_DEPRECATED_API
+
+ //-------------------------------------------------------------------------
+ // Static utility methods
+ //-------------------------------------------------------------------------
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * Normalizes a <code>UnicodeString</code> according to the specified normalization mode.
+ * This is a wrapper for unorm_normalize(), using UnicodeString's.
+ *
+ * The <code>options</code> parameter specifies which optional
+ * <code>Normalizer</code> features are to be enabled for this operation.
+ *
+ * @param source the input string to be normalized.
+ * @param mode the normalization mode
+ * @param options the optional features to be enabled (0 for no options)
+ * @param result The normalized string (on output).
+ * @param status The error code.
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ static void U_EXPORT2 normalize(const UnicodeString& source,
+ UNormalizationMode mode, int32_t options,
+ UnicodeString& result,
+ UErrorCode &status);
+
+ /**
+ * Compose a <code>UnicodeString</code>.
+ * This is equivalent to normalize() with mode UNORM_NFC or UNORM_NFKC.
+ * This is a wrapper for unorm_normalize(), using UnicodeString's.
+ *
+ * The <code>options</code> parameter specifies which optional
+ * <code>Normalizer</code> features are to be enabled for this operation.
+ *
+ * @param source the string to be composed.
+ * @param compat Perform compatibility decomposition before composition.
+ * If this argument is <code>false</code>, only canonical
+ * decomposition will be performed.
+ * @param options the optional features to be enabled (0 for no options)
+ * @param result The composed string (on output).
+ * @param status The error code.
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ static void U_EXPORT2 compose(const UnicodeString& source,
+ UBool compat, int32_t options,
+ UnicodeString& result,
+ UErrorCode &status);
+
+ /**
+ * Static method to decompose a <code>UnicodeString</code>.
+ * This is equivalent to normalize() with mode UNORM_NFD or UNORM_NFKD.
+ * This is a wrapper for unorm_normalize(), using UnicodeString's.
+ *
+ * The <code>options</code> parameter specifies which optional
+ * <code>Normalizer</code> features are to be enabled for this operation.
+ *
+ * @param source the string to be decomposed.
+ * @param compat Perform compatibility decomposition.
+ * If this argument is <code>false</code>, only canonical
+ * decomposition will be performed.
+ * @param options the optional features to be enabled (0 for no options)
+ * @param result The decomposed string (on output).
+ * @param status The error code.
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ static void U_EXPORT2 decompose(const UnicodeString& source,
+ UBool compat, int32_t options,
+ UnicodeString& result,
+ UErrorCode &status);
+
+ /**
+ * Performing quick check on a string, to quickly determine if the string is
+ * in a particular normalization format.
+ * This is a wrapper for unorm_quickCheck(), using a UnicodeString.
+ *
+ * Three types of result can be returned UNORM_YES, UNORM_NO or
+ * UNORM_MAYBE. Result UNORM_YES indicates that the argument
+ * string is in the desired normalized format, UNORM_NO determines that
+ * argument string is not in the desired normalized format. A
+ * UNORM_MAYBE result indicates that a more thorough check is required,
+ * the user may have to put the string in its normalized form and compare the
+ * results.
+ * @param source string for determining if it is in a normalized format
+ * @param mode normalization format
+ * @param status A reference to a UErrorCode to receive any errors
+ * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
+ *
+ * @see isNormalized
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ static inline UNormalizationCheckResult
+ quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
+
+ /**
+ * Performing quick check on a string; same as the other version of quickCheck
+ * but takes an extra options parameter like most normalization functions.
+ *
+ * @param source string for determining if it is in a normalized format
+ * @param mode normalization format
+ * @param options the optional features to be enabled (0 for no options)
+ * @param status A reference to a UErrorCode to receive any errors
+ * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
+ *
+ * @see isNormalized
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ static UNormalizationCheckResult
+ quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
+
+ /**
+ * Test if a string is in a given normalization form.
+ * This is semantically equivalent to source.equals(normalize(source, mode)) .
+ *
+ * Unlike unorm_quickCheck(), this function returns a definitive result,
+ * never a "maybe".
+ * For NFD, NFKD, and FCD, both functions work exactly the same.
+ * For NFC and NFKC where quickCheck may return "maybe", this function will
+ * perform further tests to arrive at a true/false result.
+ *
+ * @param src String that is to be tested if it is in a normalization format.
+ * @param mode Which normalization form to test for.
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Boolean value indicating whether the source string is in the
+ * "mode" normalization form.
+ *
+ * @see quickCheck
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ static inline UBool
+ isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
+
+ /**
+ * Test if a string is in a given normalization form; same as the other version of isNormalized
+ * but takes an extra options parameter like most normalization functions.
+ *
+ * @param src String that is to be tested if it is in a normalization format.
+ * @param mode Which normalization form to test for.
+ * @param options the optional features to be enabled (0 for no options)
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Boolean value indicating whether the source string is in the
+ * "mode" normalization form.
+ *
+ * @see quickCheck
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ static UBool
+ isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
+
+ /**
+ * Concatenate normalized strings, making sure that the result is normalized as well.
+ *
+ * If both the left and the right strings are in
+ * the normalization form according to "mode/options",
+ * then the result will be
+ *
+ * \code
+ * dest=normalize(left+right, mode, options)
+ * \endcode
+ *
+ * For details see unorm_concatenate in unorm.h.
+ *
+ * @param left Left source string.
+ * @param right Right source string.
+ * @param result The output string.
+ * @param mode The normalization mode.
+ * @param options A bit set of normalization options.
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return result
+ *
+ * @see unorm_concatenate
+ * @see normalize
+ * @see unorm_next
+ * @see unorm_previous
+ *
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ static UnicodeString &
+ U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right,
+ UnicodeString &result,
+ UNormalizationMode mode, int32_t options,
+ UErrorCode &errorCode);
+#endif /* U_HIDE_DEPRECATED_API */
+
+ /**
+ * Compare two strings for canonical equivalence.
+ * Further options include case-insensitive comparison and
+ * code point order (as opposed to code unit order).
+ *
+ * Canonical equivalence between two strings is defined as their normalized
+ * forms (NFD or NFC) being identical.
+ * This function compares strings incrementally instead of normalizing
+ * (and optionally case-folding) both strings entirely,
+ * improving performance significantly.
+ *
+ * Bulk normalization is only necessary if the strings do not fulfill the FCD
+ * conditions. Only in this case, and only if the strings are relatively long,
+ * is memory allocated temporarily.
+ * For FCD strings and short non-FCD strings there is no memory allocation.
+ *
+ * Semantically, this is equivalent to
+ * strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2)))
+ * where code point order and foldCase are all optional.
+ *
+ * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
+ * the case folding must be performed first, then the normalization.
+ *
+ * @param s1 First source string.
+ * @param s2 Second source string.
+ *
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Case-sensitive comparison in code unit order, and the input strings
+ * are quick-checked for FCD.
+ *
+ * - UNORM_INPUT_IS_FCD
+ * Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
+ * If not set, the function will quickCheck for FCD
+ * and normalize if necessary.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_COMPARE_IGNORE_CASE
+ * Set to compare strings case-insensitively using case folding,
+ * instead of case-sensitively.
+ * If set, then the following case folding options are used.
+ *
+ * - Options as used with case-insensitive comparisons, currently:
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * (see u_strCaseCompare for details)
+ *
+ * - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
+ *
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @see unorm_compare
+ * @see normalize
+ * @see UNORM_FCD
+ * @see u_strCompare
+ * @see u_strCaseCompare
+ *
+ * @stable ICU 2.2
+ */
+ static inline int32_t
+ compare(const UnicodeString &s1, const UnicodeString &s2,
+ uint32_t options,
+ UErrorCode &errorCode);
+
+#ifndef U_HIDE_DEPRECATED_API
+ //-------------------------------------------------------------------------
+ // Iteration API
+ //-------------------------------------------------------------------------
+
+ /**
+ * Return the current character in the normalized text.
+ * current() may need to normalize some text at getIndex().
+ * The getIndex() is not changed.
+ *
+ * @return the current normalized code point
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ UChar32 current(void);
+
+ /**
+ * Return the first character in the normalized text.
+ * This is equivalent to setIndexOnly(startIndex()) followed by next().
+ * (Post-increment semantics.)
+ *
+ * @return the first normalized code point
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ UChar32 first(void);
+
+ /**
+ * Return the last character in the normalized text.
+ * This is equivalent to setIndexOnly(endIndex()) followed by previous().
+ * (Pre-decrement semantics.)
+ *
+ * @return the last normalized code point
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ UChar32 last(void);
+
+ /**
+ * Return the next character in the normalized text.
+ * (Post-increment semantics.)
+ * If the end of the text has already been reached, DONE is returned.
+ * The DONE value could be confused with a U+FFFF non-character code point
+ * in the text. If this is possible, you can test getIndex()<endIndex()
+ * before calling next(), or (getIndex()<endIndex() || last()!=DONE)
+ * after calling next(). (Calling last() will change the iterator state!)
+ *
+ * The C API unorm_next() is more efficient and does not have this ambiguity.
+ *
+ * @return the next normalized code point
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ UChar32 next(void);
+
+ /**
+ * Return the previous character in the normalized text and decrement.
+ * (Pre-decrement semantics.)
+ * If the beginning of the text has already been reached, DONE is returned.
+ * The DONE value could be confused with a U+FFFF non-character code point
+ * in the text. If this is possible, you can test
+ * (getIndex()>startIndex() || first()!=DONE). (Calling first() will change
+ * the iterator state!)
+ *
+ * The C API unorm_previous() is more efficient and does not have this ambiguity.
+ *
+ * @return the previous normalized code point
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ UChar32 previous(void);
+
+ /**
+ * Set the iteration position in the input text that is being normalized,
+ * without any immediate normalization.
+ * After setIndexOnly(), getIndex() will return the same index that is
+ * specified here.
+ *
+ * @param index the desired index in the input text.
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ void setIndexOnly(int32_t index);
+
+ /**
+ * Reset the index to the beginning of the text.
+ * This is equivalent to setIndexOnly(startIndex)).
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ void reset(void);
+
+ /**
+ * Retrieve the current iteration position in the input text that is
+ * being normalized.
+ *
+ * A following call to next() will return a normalized code point from
+ * the input text at or after this index.
+ *
+ * After a call to previous(), getIndex() will point at or before the
+ * position in the input text where the normalized code point
+ * was returned from with previous().
+ *
+ * @return the current index in the input text
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ int32_t getIndex(void) const;
+
+ /**
+ * Retrieve the index of the start of the input text. This is the begin index
+ * of the <code>CharacterIterator</code> or the start (i.e. index 0) of the string
+ * over which this <code>Normalizer</code> is iterating.
+ *
+ * @return the smallest index in the input text where the Normalizer operates
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ int32_t startIndex(void) const;
+
+ /**
+ * Retrieve the index of the end of the input text. This is the end index
+ * of the <code>CharacterIterator</code> or the length of the string
+ * over which this <code>Normalizer</code> is iterating.
+ * This end index is exclusive, i.e., the Normalizer operates only on characters
+ * before this index.
+ *
+ * @return the first index in the input text where the Normalizer does not operate
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ int32_t endIndex(void) const;
+
+ /**
+ * Returns true when both iterators refer to the same character in the same
+ * input text.
+ *
+ * @param that a Normalizer object to compare this one to
+ * @return comparison result
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ UBool operator==(const Normalizer& that) const;
+
+ /**
+ * Returns false when both iterators refer to the same character in the same
+ * input text.
+ *
+ * @param that a Normalizer object to compare this one to
+ * @return comparison result
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ inline UBool operator!=(const Normalizer& that) const;
+
+ /**
+ * Returns a pointer to a new Normalizer that is a clone of this one.
+ * The caller is responsible for deleting the new clone.
+ * @return a pointer to a new Normalizer
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ Normalizer* clone() const;
+
+ /**
+ * Generates a hash code for this iterator.
+ *
+ * @return the hash code
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ int32_t hashCode(void) const;
+
+ //-------------------------------------------------------------------------
+ // Property access methods
+ //-------------------------------------------------------------------------
+
+ /**
+ * Set the normalization mode for this object.
+ * <p>
+ * <b>Note:</b>If the normalization mode is changed while iterating
+ * over a string, calls to {@link #next() } and {@link #previous() } may
+ * return previously buffers characters in the old normalization mode
+ * until the iteration is able to re-sync at the next base character.
+ * It is safest to call {@link #setIndexOnly }, {@link #reset() },
+ * {@link #setText }, {@link #first() },
+ * {@link #last() }, etc. after calling <code>setMode</code>.
+ * <p>
+ * @param newMode the new mode for this <code>Normalizer</code>.
+ * @see #getUMode
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ void setMode(UNormalizationMode newMode);
+
+ /**
+ * Return the normalization mode for this object.
+ *
+ * This is an unusual name because there used to be a getMode() that
+ * returned a different type.
+ *
+ * @return the mode for this <code>Normalizer</code>
+ * @see #setMode
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ UNormalizationMode getUMode(void) const;
+
+ /**
+ * Set options that affect this <code>Normalizer</code>'s operation.
+ * Options do not change the basic composition or decomposition operation
+ * that is being performed, but they control whether
+ * certain optional portions of the operation are done.
+ * Currently the only available option is obsolete.
+ *
+ * It is possible to specify multiple options that are all turned on or off.
+ *
+ * @param option the option(s) whose value is/are to be set.
+ * @param value the new setting for the option. Use <code>true</code> to
+ * turn the option(s) on and <code>false</code> to turn it/them off.
+ *
+ * @see #getOption
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ void setOption(int32_t option,
+ UBool value);
+
+ /**
+ * Determine whether an option is turned on or off.
+ * If multiple options are specified, then the result is true if any
+ * of them are set.
+ * <p>
+ * @param option the option(s) that are to be checked
+ * @return true if any of the option(s) are set
+ * @see #setOption
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ UBool getOption(int32_t option) const;
+
+ /**
+ * Set the input text over which this <code>Normalizer</code> will iterate.
+ * The iteration position is set to the beginning.
+ *
+ * @param newText a string that replaces the current input text
+ * @param status a UErrorCode
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ void setText(const UnicodeString& newText,
+ UErrorCode &status);
+
+ /**
+ * Set the input text over which this <code>Normalizer</code> will iterate.
+ * The iteration position is set to the beginning.
+ *
+ * @param newText a CharacterIterator object that replaces the current input text
+ * @param status a UErrorCode
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ void setText(const CharacterIterator& newText,
+ UErrorCode &status);
+
+ /**
+ * Set the input text over which this <code>Normalizer</code> will iterate.
+ * The iteration position is set to the beginning.
+ *
+ * @param newText a string that replaces the current input text
+ * @param length the length of the string, or -1 if NUL-terminated
+ * @param status a UErrorCode
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ void setText(ConstChar16Ptr newText,
+ int32_t length,
+ UErrorCode &status);
+ /**
+ * Copies the input text into the UnicodeString argument.
+ *
+ * @param result Receives a copy of the text under iteration.
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ void getText(UnicodeString& result);
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ * @returns a UClassID for this class.
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+#endif /* U_HIDE_DEPRECATED_API */
+
+#ifndef U_FORCE_HIDE_DEPRECATED_API
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ * @return a UClassID for the actual class.
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ virtual UClassID getDynamicClassID() const;
+#endif // U_FORCE_HIDE_DEPRECATED_API
+
+private:
+ //-------------------------------------------------------------------------
+ // Private functions
+ //-------------------------------------------------------------------------
+
+ Normalizer(); // default constructor not implemented
+ Normalizer &operator=(const Normalizer &that); // assignment operator not implemented
+
+ // Private utility methods for iteration
+ // For documentation, see the source code
+ UBool nextNormalize();
+ UBool previousNormalize();
+
+ void init();
+ void clearBuffer(void);
+
+ //-------------------------------------------------------------------------
+ // Private data
+ //-------------------------------------------------------------------------
+
+ FilteredNormalizer2*fFilteredNorm2; // owned if not NULL
+ const Normalizer2 *fNorm2; // not owned; may be equal to fFilteredNorm2
+ UNormalizationMode fUMode; // deprecated
+ int32_t fOptions;
+
+ // The input text and our position in it
+ CharacterIterator *text;
+
+ // The normalization buffer is the result of normalization
+ // of the source in [currentIndex..nextIndex[ .
+ int32_t currentIndex, nextIndex;
+
+ // A buffer for holding intermediate results
+ UnicodeString buffer;
+ int32_t bufferPos;
+};
+
+//-------------------------------------------------------------------------
+// Inline implementations
+//-------------------------------------------------------------------------
+
+#ifndef U_HIDE_DEPRECATED_API
+inline UBool
+Normalizer::operator!= (const Normalizer& other) const
+{ return ! operator==(other); }
+
+inline UNormalizationCheckResult
+Normalizer::quickCheck(const UnicodeString& source,
+ UNormalizationMode mode,
+ UErrorCode &status) {
+ return quickCheck(source, mode, 0, status);
+}
+
+inline UBool
+Normalizer::isNormalized(const UnicodeString& source,
+ UNormalizationMode mode,
+ UErrorCode &status) {
+ return isNormalized(source, mode, 0, status);
+}
+#endif /* U_HIDE_DEPRECATED_API */
+
+inline int32_t
+Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
+ uint32_t options,
+ UErrorCode &errorCode) {
+ // all argument checking is done in unorm_compare
+ return unorm_compare(toUCharPtr(s1.getBuffer()), s1.length(),
+ toUCharPtr(s2.getBuffer()), s2.length(),
+ options,
+ &errorCode);
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_NORMALIZATION */
+
+#endif // NORMLZR_H
+
+#endif /* U_SHOW_CPLUSPLUS_API */
diff --git a/thirdparty/icu4c/common/unicode/parseerr.h b/thirdparty/icu4c/common/unicode/parseerr.h
new file mode 100644
index 0000000000..c23cc273b8
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/parseerr.h
@@ -0,0 +1,94 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1999-2005, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Date Name Description
+* 03/14/00 aliu Creation.
+* 06/27/00 aliu Change from C++ class to C struct
+**********************************************************************
+*/
+#ifndef PARSEERR_H
+#define PARSEERR_H
+
+#include "unicode/utypes.h"
+
+
+/**
+ * \file
+ * \brief C API: Parse Error Information
+ */
+/**
+ * The capacity of the context strings in UParseError.
+ * @stable ICU 2.0
+ */
+enum { U_PARSE_CONTEXT_LEN = 16 };
+
+/**
+ * A UParseError struct is used to returned detailed information about
+ * parsing errors. It is used by ICU parsing engines that parse long
+ * rules, patterns, or programs, where the text being parsed is long
+ * enough that more information than a UErrorCode is needed to
+ * localize the error.
+ *
+ * <p>The line, offset, and context fields are optional; parsing
+ * engines may choose not to use to use them.
+ *
+ * <p>The preContext and postContext strings include some part of the
+ * context surrounding the error. If the source text is "let for=7"
+ * and "for" is the error (e.g., because it is a reserved word), then
+ * some examples of what a parser might produce are the following:
+ *
+ * <pre>
+ * preContext postContext
+ * "" "" The parser does not support context
+ * "let " "=7" Pre- and post-context only
+ * "let " "for=7" Pre- and post-context and error text
+ * "" "for" Error text only
+ * </pre>
+ *
+ * <p>Examples of engines which use UParseError (or may use it in the
+ * future) are Transliterator, RuleBasedBreakIterator, and
+ * RegexPattern.
+ *
+ * @stable ICU 2.0
+ */
+typedef struct UParseError {
+
+ /**
+ * The line on which the error occurred. If the parser uses this
+ * field, it sets it to the line number of the source text line on
+ * which the error appears, which will be a value >= 1. If the
+ * parse does not support line numbers, the value will be <= 0.
+ * @stable ICU 2.0
+ */
+ int32_t line;
+
+ /**
+ * The character offset to the error. If the line field is >= 1,
+ * then this is the offset from the start of the line. Otherwise,
+ * this is the offset from the start of the text. If the parser
+ * does not support this field, it will have a value < 0.
+ * @stable ICU 2.0
+ */
+ int32_t offset;
+
+ /**
+ * Textual context before the error. Null-terminated. The empty
+ * string if not supported by parser.
+ * @stable ICU 2.0
+ */
+ UChar preContext[U_PARSE_CONTEXT_LEN];
+
+ /**
+ * The error itself and/or textual context after the error.
+ * Null-terminated. The empty string if not supported by parser.
+ * @stable ICU 2.0
+ */
+ UChar postContext[U_PARSE_CONTEXT_LEN];
+
+} UParseError;
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/parsepos.h b/thirdparty/icu4c/common/unicode/parsepos.h
new file mode 100644
index 0000000000..260ed4cbb8
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/parsepos.h
@@ -0,0 +1,237 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+* Copyright (C) 1997-2005, International Business Machines Corporation and others. All Rights Reserved.
+*******************************************************************************
+*
+* File PARSEPOS.H
+*
+* Modification History:
+*
+* Date Name Description
+* 07/09/97 helena Converted from java.
+* 07/17/98 stephen Added errorIndex support.
+* 05/11/99 stephen Cleaned up.
+*******************************************************************************
+*/
+
+#ifndef PARSEPOS_H
+#define PARSEPOS_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \file
+ * \brief C++ API: Canonical Iterator
+ */
+/**
+ * <code>ParsePosition</code> is a simple class used by <code>Format</code>
+ * and its subclasses to keep track of the current position during parsing.
+ * The <code>parseObject</code> method in the various <code>Format</code>
+ * classes requires a <code>ParsePosition</code> object as an argument.
+ *
+ * <p>
+ * By design, as you parse through a string with different formats,
+ * you can use the same <code>ParsePosition</code>, since the index parameter
+ * records the current position.
+ *
+ * The ParsePosition class is not suitable for subclassing.
+ *
+ * @version 1.3 10/30/97
+ * @author Mark Davis, Helena Shih
+ * @see java.text.Format
+ */
+
+class U_COMMON_API ParsePosition : public UObject {
+public:
+ /**
+ * Default constructor, the index starts with 0 as default.
+ * @stable ICU 2.0
+ */
+ ParsePosition()
+ : UObject(),
+ index(0),
+ errorIndex(-1)
+ {}
+
+ /**
+ * Create a new ParsePosition with the given initial index.
+ * @param newIndex the new text offset.
+ * @stable ICU 2.0
+ */
+ ParsePosition(int32_t newIndex)
+ : UObject(),
+ index(newIndex),
+ errorIndex(-1)
+ {}
+
+ /**
+ * Copy constructor
+ * @param copy the object to be copied from.
+ * @stable ICU 2.0
+ */
+ ParsePosition(const ParsePosition& copy)
+ : UObject(copy),
+ index(copy.index),
+ errorIndex(copy.errorIndex)
+ {}
+
+ /**
+ * Destructor
+ * @stable ICU 2.0
+ */
+ virtual ~ParsePosition();
+
+ /**
+ * Assignment operator
+ * @stable ICU 2.0
+ */
+ inline ParsePosition& operator=(const ParsePosition& copy);
+
+ /**
+ * Equality operator.
+ * @return true if the two parse positions are equal, false otherwise.
+ * @stable ICU 2.0
+ */
+ inline UBool operator==(const ParsePosition& that) const;
+
+ /**
+ * Equality operator.
+ * @return true if the two parse positions are not equal, false otherwise.
+ * @stable ICU 2.0
+ */
+ inline UBool operator!=(const ParsePosition& that) const;
+
+ /**
+ * Clone this object.
+ * Clones can be used concurrently in multiple threads.
+ * If an error occurs, then NULL is returned.
+ * The caller must delete the clone.
+ *
+ * @return a clone of this object
+ *
+ * @see getDynamicClassID
+ * @stable ICU 2.8
+ */
+ ParsePosition *clone() const;
+
+ /**
+ * Retrieve the current parse position. On input to a parse method, this
+ * is the index of the character at which parsing will begin; on output, it
+ * is the index of the character following the last character parsed.
+ * @return the current index.
+ * @stable ICU 2.0
+ */
+ inline int32_t getIndex(void) const;
+
+ /**
+ * Set the current parse position.
+ * @param index the new index.
+ * @stable ICU 2.0
+ */
+ inline void setIndex(int32_t index);
+
+ /**
+ * Set the index at which a parse error occurred. Formatters
+ * should set this before returning an error code from their
+ * parseObject method. The default value is -1 if this is not
+ * set.
+ * @stable ICU 2.0
+ */
+ inline void setErrorIndex(int32_t ei);
+
+ /**
+ * Retrieve the index at which an error occurred, or -1 if the
+ * error index has not been set.
+ * @stable ICU 2.0
+ */
+ inline int32_t getErrorIndex(void) const;
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ *
+ * @stable ICU 2.2
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ *
+ * @stable ICU 2.2
+ */
+ virtual UClassID getDynamicClassID() const;
+
+private:
+ /**
+ * Input: the place you start parsing.
+ * <br>Output: position where the parse stopped.
+ * This is designed to be used serially,
+ * with each call setting index up for the next one.
+ */
+ int32_t index;
+
+ /**
+ * The index at which a parse error occurred.
+ */
+ int32_t errorIndex;
+
+};
+
+inline ParsePosition&
+ParsePosition::operator=(const ParsePosition& copy)
+{
+ index = copy.index;
+ errorIndex = copy.errorIndex;
+ return *this;
+}
+
+inline UBool
+ParsePosition::operator==(const ParsePosition& copy) const
+{
+ if(index != copy.index || errorIndex != copy.errorIndex)
+ return false;
+ else
+ return true;
+}
+
+inline UBool
+ParsePosition::operator!=(const ParsePosition& copy) const
+{
+ return !operator==(copy);
+}
+
+inline int32_t
+ParsePosition::getIndex() const
+{
+ return index;
+}
+
+inline void
+ParsePosition::setIndex(int32_t offset)
+{
+ this->index = offset;
+}
+
+inline int32_t
+ParsePosition::getErrorIndex() const
+{
+ return errorIndex;
+}
+
+inline void
+ParsePosition::setErrorIndex(int32_t ei)
+{
+ this->errorIndex = ei;
+}
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/platform.h b/thirdparty/icu4c/common/unicode/platform.h
new file mode 100644
index 0000000000..2bb2f8b318
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/platform.h
@@ -0,0 +1,885 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* FILE NAME : platform.h
+*
+* Date Name Description
+* 05/13/98 nos Creation (content moved here from ptypes.h).
+* 03/02/99 stephen Added AS400 support.
+* 03/30/99 stephen Added Linux support.
+* 04/13/99 stephen Reworked for autoconf.
+******************************************************************************
+*/
+
+#ifndef _PLATFORM_H
+#define _PLATFORM_H
+
+#include "unicode/uconfig.h"
+#include "unicode/uvernum.h"
+
+/**
+ * \file
+ * \brief Basic types for the platform.
+ *
+ * This file used to be generated by autoconf/configure.
+ * Starting with ICU 49, platform.h is a normal source file,
+ * to simplify cross-compiling and working with non-autoconf/make build systems.
+ *
+ * When a value in this file does not work on a platform, then please
+ * try to derive it from the U_PLATFORM value
+ * (for which we might need a new value constant in rare cases)
+ * and/or from other macros that are predefined by the compiler
+ * or defined in standard (POSIX or platform or compiler) headers.
+ *
+ * As a temporary workaround, you can add an explicit \#define for some macros
+ * before it is first tested, or add an equivalent -D macro definition
+ * to the compiler's command line.
+ *
+ * Note: Some compilers provide ways to show the predefined macros.
+ * For example, with gcc you can compile an empty .c file and have the compiler
+ * print the predefined macros with
+ * \code
+ * gcc -E -dM -x c /dev/null | sort
+ * \endcode
+ * (You can provide an actual empty .c file rather than /dev/null.
+ * <code>-x c++</code> is for C++.)
+ */
+
+/**
+ * Define some things so that they can be documented.
+ * @internal
+ */
+#ifdef U_IN_DOXYGEN
+/*
+ * Problem: "platform.h:335: warning: documentation for unknown define U_HAVE_STD_STRING found." means that U_HAVE_STD_STRING is not documented.
+ * Solution: #define any defines for non @internal API here, so that they are visible in the docs. If you just set PREDEFINED in Doxyfile.in, they won't be documented.
+ */
+
+/* None for now. */
+#endif
+
+/**
+ * \def U_PLATFORM
+ * The U_PLATFORM macro defines the platform we're on.
+ *
+ * We used to define one different, value-less macro per platform.
+ * That made it hard to know the set of relevant platforms and macros,
+ * and hard to deal with variants of platforms.
+ *
+ * Starting with ICU 49, we define platforms as numeric macros,
+ * with ranges of values for related platforms and their variants.
+ * The U_PLATFORM macro is set to one of these values.
+ *
+ * Historical note from the Solaris Wikipedia article:
+ * AT&T and Sun collaborated on a project to merge the most popular Unix variants
+ * on the market at that time: BSD, System V, and Xenix.
+ * This became Unix System V Release 4 (SVR4).
+ *
+ * @internal
+ */
+
+/** Unknown platform. @internal */
+#define U_PF_UNKNOWN 0
+/** Windows @internal */
+#define U_PF_WINDOWS 1000
+/** MinGW. Windows, calls to Win32 API, but using GNU gcc and binutils. @internal */
+#define U_PF_MINGW 1800
+/**
+ * Cygwin. Windows, calls to cygwin1.dll for Posix functions,
+ * using MSVC or GNU gcc and binutils.
+ * @internal
+ */
+#define U_PF_CYGWIN 1900
+/* Reserve 2000 for U_PF_UNIX? */
+/** HP-UX is based on UNIX System V. @internal */
+#define U_PF_HPUX 2100
+/** Solaris is a Unix operating system based on SVR4. @internal */
+#define U_PF_SOLARIS 2600
+/** BSD is a UNIX operating system derivative. @internal */
+#define U_PF_BSD 3000
+/** AIX is based on UNIX System V Releases and 4.3 BSD. @internal */
+#define U_PF_AIX 3100
+/** IRIX is based on UNIX System V with BSD extensions. @internal */
+#define U_PF_IRIX 3200
+/**
+ * Darwin is a POSIX-compliant operating system, composed of code developed by Apple,
+ * as well as code derived from NeXTSTEP, BSD, and other projects,
+ * built around the Mach kernel.
+ * Darwin forms the core set of components upon which Mac OS X, Apple TV, and iOS are based.
+ * (Original description modified from WikiPedia.)
+ * @internal
+ */
+#define U_PF_DARWIN 3500
+/** iPhone OS (iOS) is a derivative of Mac OS X. @internal */
+#define U_PF_IPHONE 3550
+/** QNX is a commercial Unix-like real-time operating system related to BSD. @internal */
+#define U_PF_QNX 3700
+/** Linux is a Unix-like operating system. @internal */
+#define U_PF_LINUX 4000
+/**
+ * Native Client is pretty close to Linux.
+ * See https://developer.chrome.com/native-client and
+ * http://www.chromium.org/nativeclient
+ * @internal
+ */
+#define U_PF_BROWSER_NATIVE_CLIENT 4020
+/** Android is based on Linux. @internal */
+#define U_PF_ANDROID 4050
+/** Fuchsia is a POSIX-ish platform. @internal */
+#define U_PF_FUCHSIA 4100
+/* Maximum value for Linux-based platform is 4499 */
+/**
+ * Emscripten is a C++ transpiler for the Web that can target asm.js or
+ * WebAssembly. It provides some POSIX-compatible wrappers and stubs and
+ * some Linux-like functionality, but is not fully compatible with
+ * either.
+ * @internal
+ */
+#define U_PF_EMSCRIPTEN 5010
+/** z/OS is the successor to OS/390 which was the successor to MVS. @internal */
+#define U_PF_OS390 9000
+/** "IBM i" is the current name of what used to be i5/OS and earlier OS/400. @internal */
+#define U_PF_OS400 9400
+
+#ifdef U_PLATFORM
+ /* Use the predefined value. */
+#elif defined(__MINGW32__)
+# define U_PLATFORM U_PF_MINGW
+#elif defined(__CYGWIN__)
+# define U_PLATFORM U_PF_CYGWIN
+#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
+# define U_PLATFORM U_PF_WINDOWS
+#elif defined(__ANDROID__)
+# define U_PLATFORM U_PF_ANDROID
+ /* Android wchar_t support depends on the API level. */
+# include <android/api-level.h>
+#elif defined(__pnacl__) || defined(__native_client__)
+# define U_PLATFORM U_PF_BROWSER_NATIVE_CLIENT
+#elif defined(__Fuchsia__)
+# define U_PLATFORM U_PF_FUCHSIA
+#elif defined(linux) || defined(__linux__) || defined(__linux)
+# define U_PLATFORM U_PF_LINUX
+#elif defined(__APPLE__) && defined(__MACH__)
+# include <TargetConditionals.h>
+# if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE /* variant of TARGET_OS_MAC */
+# define U_PLATFORM U_PF_IPHONE
+# else
+# define U_PLATFORM U_PF_DARWIN
+# endif
+#elif defined(BSD) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__MirBSD__)
+# if defined(__FreeBSD__)
+# include <sys/endian.h>
+# endif
+# define U_PLATFORM U_PF_BSD
+#elif defined(sun) || defined(__sun)
+ /* Check defined(__SVR4) || defined(__svr4__) to distinguish Solaris from SunOS? */
+# define U_PLATFORM U_PF_SOLARIS
+# if defined(__GNUC__)
+ /* Solaris/GCC needs this header file to get the proper endianness. Normally, this
+ * header file is included with stddef.h but on Solairs/GCC, the GCC version of stddef.h
+ * is included which does not include this header file.
+ */
+# include <sys/isa_defs.h>
+# endif
+#elif defined(_AIX) || defined(__TOS_AIX__)
+# define U_PLATFORM U_PF_AIX
+#elif defined(_hpux) || defined(hpux) || defined(__hpux)
+# define U_PLATFORM U_PF_HPUX
+#elif defined(sgi) || defined(__sgi)
+# define U_PLATFORM U_PF_IRIX
+#elif defined(__QNX__) || defined(__QNXNTO__)
+# define U_PLATFORM U_PF_QNX
+#elif defined(__TOS_MVS__)
+# define U_PLATFORM U_PF_OS390
+#elif defined(__OS400__) || defined(__TOS_OS400__)
+# define U_PLATFORM U_PF_OS400
+#elif defined(__EMSCRIPTEN__)
+# define U_PLATFORM U_PF_EMSCRIPTEN
+#else
+# define U_PLATFORM U_PF_UNKNOWN
+#endif
+
+/**
+ * \def CYGWINMSVC
+ * Defined if this is Windows with Cygwin, but using MSVC rather than gcc.
+ * Otherwise undefined.
+ * @internal
+ */
+/* Commented out because this is already set in mh-cygwin-msvc
+#if U_PLATFORM == U_PF_CYGWIN && defined(_MSC_VER)
+# define CYGWINMSVC
+#endif
+*/
+#ifdef U_IN_DOXYGEN
+# define CYGWINMSVC
+#endif
+
+/**
+ * \def U_PLATFORM_USES_ONLY_WIN32_API
+ * Defines whether the platform uses only the Win32 API.
+ * Set to 1 for Windows/MSVC and MinGW but not Cygwin.
+ * @internal
+ */
+#ifdef U_PLATFORM_USES_ONLY_WIN32_API
+ /* Use the predefined value. */
+#elif (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_MINGW) || defined(CYGWINMSVC)
+# define U_PLATFORM_USES_ONLY_WIN32_API 1
+#else
+ /* Cygwin implements POSIX. */
+# define U_PLATFORM_USES_ONLY_WIN32_API 0
+#endif
+
+/**
+ * \def U_PLATFORM_HAS_WIN32_API
+ * Defines whether the Win32 API is available on the platform.
+ * Set to 1 for Windows/MSVC, MinGW and Cygwin.
+ * @internal
+ */
+#ifdef U_PLATFORM_HAS_WIN32_API
+ /* Use the predefined value. */
+#elif U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
+# define U_PLATFORM_HAS_WIN32_API 1
+#else
+# define U_PLATFORM_HAS_WIN32_API 0
+#endif
+
+/**
+ * \def U_PLATFORM_HAS_WINUWP_API
+ * Defines whether target is intended for Universal Windows Platform API
+ * Set to 1 for Windows10 Release Solution Configuration
+ * @internal
+ */
+#ifdef U_PLATFORM_HAS_WINUWP_API
+ /* Use the predefined value. */
+#else
+# define U_PLATFORM_HAS_WINUWP_API 0
+#endif
+
+/**
+ * \def U_PLATFORM_IMPLEMENTS_POSIX
+ * Defines whether the platform implements (most of) the POSIX API.
+ * Set to 1 for Cygwin and most other platforms.
+ * @internal
+ */
+#ifdef U_PLATFORM_IMPLEMENTS_POSIX
+ /* Use the predefined value. */
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+# define U_PLATFORM_IMPLEMENTS_POSIX 0
+#else
+# define U_PLATFORM_IMPLEMENTS_POSIX 1
+#endif
+
+/**
+ * \def U_PLATFORM_IS_LINUX_BASED
+ * Defines whether the platform is Linux or one of its derivatives.
+ * @internal
+ */
+#ifdef U_PLATFORM_IS_LINUX_BASED
+ /* Use the predefined value. */
+#elif U_PF_LINUX <= U_PLATFORM && U_PLATFORM <= 4499
+# define U_PLATFORM_IS_LINUX_BASED 1
+#else
+# define U_PLATFORM_IS_LINUX_BASED 0
+#endif
+
+/**
+ * \def U_PLATFORM_IS_DARWIN_BASED
+ * Defines whether the platform is Darwin or one of its derivatives.
+ * @internal
+ */
+#ifdef U_PLATFORM_IS_DARWIN_BASED
+ /* Use the predefined value. */
+#elif U_PF_DARWIN <= U_PLATFORM && U_PLATFORM <= U_PF_IPHONE
+# define U_PLATFORM_IS_DARWIN_BASED 1
+#else
+# define U_PLATFORM_IS_DARWIN_BASED 0
+#endif
+
+/**
+ * \def U_HAVE_STDINT_H
+ * Defines whether stdint.h is available. It is a C99 standard header.
+ * We used to include inttypes.h which includes stdint.h but we usually do not need
+ * the additional definitions from inttypes.h.
+ * @internal
+ */
+#ifdef U_HAVE_STDINT_H
+ /* Use the predefined value. */
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+# if defined(__BORLANDC__) || U_PLATFORM == U_PF_MINGW || (defined(_MSC_VER) && _MSC_VER>=1600)
+ /* Windows Visual Studio 9 and below do not have stdint.h & inttypes.h, but VS 2010 adds them. */
+# define U_HAVE_STDINT_H 1
+# else
+# define U_HAVE_STDINT_H 0
+# endif
+#elif U_PLATFORM == U_PF_SOLARIS
+ /* Solaris has inttypes.h but not stdint.h. */
+# define U_HAVE_STDINT_H 0
+#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER)
+ /* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */
+# define U_HAVE_STDINT_H 0
+#else
+# define U_HAVE_STDINT_H 1
+#endif
+
+/**
+ * \def U_HAVE_INTTYPES_H
+ * Defines whether inttypes.h is available. It is a C99 standard header.
+ * We include inttypes.h where it is available but stdint.h is not.
+ * @internal
+ */
+#ifdef U_HAVE_INTTYPES_H
+ /* Use the predefined value. */
+#elif U_PLATFORM == U_PF_SOLARIS
+ /* Solaris has inttypes.h but not stdint.h. */
+# define U_HAVE_INTTYPES_H 1
+#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER)
+ /* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */
+# define U_HAVE_INTTYPES_H 1
+#else
+ /* Most platforms have both inttypes.h and stdint.h, or neither. */
+# define U_HAVE_INTTYPES_H U_HAVE_STDINT_H
+#endif
+
+/*===========================================================================*/
+/** @{ Compiler and environment features */
+/*===========================================================================*/
+
+/**
+ * \def U_GCC_MAJOR_MINOR
+ * Indicates whether the compiler is gcc (test for != 0),
+ * and if so, contains its major (times 100) and minor version numbers.
+ * If the compiler is not gcc, then U_GCC_MAJOR_MINOR == 0.
+ *
+ * For example, for testing for whether we have gcc, and whether it's 4.6 or higher,
+ * use "#if U_GCC_MAJOR_MINOR >= 406".
+ * @internal
+ */
+#ifdef __GNUC__
+# define U_GCC_MAJOR_MINOR (__GNUC__ * 100 + __GNUC_MINOR__)
+#else
+# define U_GCC_MAJOR_MINOR 0
+#endif
+
+/**
+ * \def U_IS_BIG_ENDIAN
+ * Determines the endianness of the platform.
+ * @internal
+ */
+#ifdef U_IS_BIG_ENDIAN
+ /* Use the predefined value. */
+#elif defined(BYTE_ORDER) && defined(BIG_ENDIAN)
+# define U_IS_BIG_ENDIAN (BYTE_ORDER == BIG_ENDIAN)
+#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)
+ /* gcc */
+# define U_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#elif defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN)
+# define U_IS_BIG_ENDIAN 1
+#elif defined(__LITTLE_ENDIAN__) || defined(_LITTLE_ENDIAN)
+# define U_IS_BIG_ENDIAN 0
+#elif U_PLATFORM == U_PF_OS390 || U_PLATFORM == U_PF_OS400 || defined(__s390__) || defined(__s390x__)
+ /* These platforms do not appear to predefine any endianness macros. */
+# define U_IS_BIG_ENDIAN 1
+#elif defined(_PA_RISC1_0) || defined(_PA_RISC1_1) || defined(_PA_RISC2_0)
+ /* HPPA do not appear to predefine any endianness macros. */
+# define U_IS_BIG_ENDIAN 1
+#elif defined(sparc) || defined(__sparc) || defined(__sparc__)
+ /* Some sparc based systems (e.g. Linux) do not predefine any endianness macros. */
+# define U_IS_BIG_ENDIAN 1
+#else
+# define U_IS_BIG_ENDIAN 0
+#endif
+
+/**
+ * \def U_HAVE_PLACEMENT_NEW
+ * Determines whether to override placement new and delete for STL.
+ * @stable ICU 2.6
+ */
+#ifdef U_HAVE_PLACEMENT_NEW
+ /* Use the predefined value. */
+#elif defined(__BORLANDC__)
+# define U_HAVE_PLACEMENT_NEW 0
+#else
+# define U_HAVE_PLACEMENT_NEW 1
+#endif
+
+/**
+ * \def U_HAVE_DEBUG_LOCATION_NEW
+ * Define this to define the MFC debug version of the operator new.
+ *
+ * @stable ICU 3.4
+ */
+#ifdef U_HAVE_DEBUG_LOCATION_NEW
+ /* Use the predefined value. */
+#elif defined(_MSC_VER)
+# define U_HAVE_DEBUG_LOCATION_NEW 1
+#else
+# define U_HAVE_DEBUG_LOCATION_NEW 0
+#endif
+
+/* Compatibility with compilers other than clang: http://clang.llvm.org/docs/LanguageExtensions.html */
+#ifdef __has_attribute
+# define UPRV_HAS_ATTRIBUTE(x) __has_attribute(x)
+#else
+# define UPRV_HAS_ATTRIBUTE(x) 0
+#endif
+#ifdef __has_cpp_attribute
+# define UPRV_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
+#else
+# define UPRV_HAS_CPP_ATTRIBUTE(x) 0
+#endif
+#ifdef __has_declspec_attribute
+# define UPRV_HAS_DECLSPEC_ATTRIBUTE(x) __has_declspec_attribute(x)
+#else
+# define UPRV_HAS_DECLSPEC_ATTRIBUTE(x) 0
+#endif
+#ifdef __has_builtin
+# define UPRV_HAS_BUILTIN(x) __has_builtin(x)
+#else
+# define UPRV_HAS_BUILTIN(x) 0
+#endif
+#ifdef __has_feature
+# define UPRV_HAS_FEATURE(x) __has_feature(x)
+#else
+# define UPRV_HAS_FEATURE(x) 0
+#endif
+#ifdef __has_extension
+# define UPRV_HAS_EXTENSION(x) __has_extension(x)
+#else
+# define UPRV_HAS_EXTENSION(x) 0
+#endif
+#ifdef __has_warning
+# define UPRV_HAS_WARNING(x) __has_warning(x)
+#else
+# define UPRV_HAS_WARNING(x) 0
+#endif
+
+/**
+ * \def U_MALLOC_ATTR
+ * Attribute to mark functions as malloc-like
+ * @internal
+ */
+#if defined(__GNUC__) && __GNUC__>=3
+# define U_MALLOC_ATTR __attribute__ ((__malloc__))
+#else
+# define U_MALLOC_ATTR
+#endif
+
+/**
+ * \def U_ALLOC_SIZE_ATTR
+ * Attribute to specify the size of the allocated buffer for malloc-like functions
+ * @internal
+ */
+#if (defined(__GNUC__) && \
+ (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || \
+ UPRV_HAS_ATTRIBUTE(alloc_size)
+# define U_ALLOC_SIZE_ATTR(X) __attribute__ ((alloc_size(X)))
+# define U_ALLOC_SIZE_ATTR2(X,Y) __attribute__ ((alloc_size(X,Y)))
+#else
+# define U_ALLOC_SIZE_ATTR(X)
+# define U_ALLOC_SIZE_ATTR2(X,Y)
+#endif
+
+/**
+ * \def U_CPLUSPLUS_VERSION
+ * 0 if no C++; 1, 11, 14, ... if C++.
+ * Support for specific features cannot always be determined by the C++ version alone.
+ * @internal
+ */
+#ifdef U_CPLUSPLUS_VERSION
+# if U_CPLUSPLUS_VERSION != 0 && !defined(__cplusplus)
+# undef U_CPLUSPLUS_VERSION
+# define U_CPLUSPLUS_VERSION 0
+# endif
+ /* Otherwise use the predefined value. */
+#elif !defined(__cplusplus)
+# define U_CPLUSPLUS_VERSION 0
+#elif __cplusplus >= 201402L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L)
+# define U_CPLUSPLUS_VERSION 14
+#elif __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
+# define U_CPLUSPLUS_VERSION 11
+#else
+ // C++98 or C++03
+# define U_CPLUSPLUS_VERSION 1
+#endif
+
+#if (U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11)
+// add in std::nullptr_t
+namespace std {
+ typedef decltype(nullptr) nullptr_t;
+};
+#endif
+
+/**
+ * \def U_NOEXCEPT
+ * "noexcept" if supported, otherwise empty.
+ * Some code, especially STL containers, uses move semantics of objects only
+ * if the move constructor and the move operator are declared as not throwing exceptions.
+ * @internal
+ */
+#ifdef U_NOEXCEPT
+ /* Use the predefined value. */
+#else
+# define U_NOEXCEPT noexcept
+#endif
+
+/**
+ * \def U_FALLTHROUGH
+ * Annotate intentional fall-through between switch labels.
+ * http://clang.llvm.org/docs/AttributeReference.html#fallthrough-clang-fallthrough
+ * @internal
+ */
+#ifndef __cplusplus
+ // Not for C.
+#elif defined(U_FALLTHROUGH)
+ // Use the predefined value.
+#elif defined(__clang__)
+ // Test for compiler vs. feature separately.
+ // Other compilers might choke on the feature test.
+# if UPRV_HAS_CPP_ATTRIBUTE(clang::fallthrough) || \
+ (UPRV_HAS_FEATURE(cxx_attributes) && \
+ UPRV_HAS_WARNING("-Wimplicit-fallthrough"))
+# define U_FALLTHROUGH [[clang::fallthrough]]
+# endif
+#elif defined(__GNUC__) && (__GNUC__ >= 7)
+# define U_FALLTHROUGH __attribute__((fallthrough))
+#endif
+
+#ifndef U_FALLTHROUGH
+# define U_FALLTHROUGH
+#endif
+
+/** @} */
+
+/*===========================================================================*/
+/** @{ Character data types */
+/*===========================================================================*/
+
+/**
+ * U_CHARSET_FAMILY is equal to this value when the platform is an ASCII based platform.
+ * @stable ICU 2.0
+ */
+#define U_ASCII_FAMILY 0
+
+/**
+ * U_CHARSET_FAMILY is equal to this value when the platform is an EBCDIC based platform.
+ * @stable ICU 2.0
+ */
+#define U_EBCDIC_FAMILY 1
+
+/**
+ * \def U_CHARSET_FAMILY
+ *
+ * <p>These definitions allow to specify the encoding of text
+ * in the char data type as defined by the platform and the compiler.
+ * It is enough to determine the code point values of "invariant characters",
+ * which are the ones shared by all encodings that are in use
+ * on a given platform.</p>
+ *
+ * <p>Those "invariant characters" should be all the uppercase and lowercase
+ * latin letters, the digits, the space, and "basic punctuation".
+ * Also, '\\n', '\\r', '\\t' should be available.</p>
+ *
+ * <p>The list of "invariant characters" is:<br>
+ * \code
+ * A-Z a-z 0-9 SPACE " % &amp; ' ( ) * + , - . / : ; < = > ? _
+ * \endcode
+ * <br>
+ * (52 letters + 10 numbers + 20 punc/sym/space = 82 total)</p>
+ *
+ * <p>This matches the IBM Syntactic Character Set (CS 640).</p>
+ *
+ * <p>In other words, all the graphic characters in 7-bit ASCII should
+ * be safely accessible except the following:</p>
+ *
+ * \code
+ * '\' <backslash>
+ * '[' <left bracket>
+ * ']' <right bracket>
+ * '{' <left brace>
+ * '}' <right brace>
+ * '^' <circumflex>
+ * '~' <tilde>
+ * '!' <exclamation mark>
+ * '#' <number sign>
+ * '|' <vertical line>
+ * '$' <dollar sign>
+ * '@' <commercial at>
+ * '`' <grave accent>
+ * \endcode
+ * @stable ICU 2.0
+ */
+#ifdef U_CHARSET_FAMILY
+ /* Use the predefined value. */
+#elif U_PLATFORM == U_PF_OS390 && (!defined(__CHARSET_LIB) || !__CHARSET_LIB)
+# define U_CHARSET_FAMILY U_EBCDIC_FAMILY
+#elif U_PLATFORM == U_PF_OS400 && !defined(__UTF32__)
+# define U_CHARSET_FAMILY U_EBCDIC_FAMILY
+#else
+# define U_CHARSET_FAMILY U_ASCII_FAMILY
+#endif
+
+/**
+ * \def U_CHARSET_IS_UTF8
+ *
+ * Hardcode the default charset to UTF-8.
+ *
+ * If this is set to 1, then
+ * - ICU will assume that all non-invariant char*, StringPiece, std::string etc.
+ * contain UTF-8 text, regardless of what the system API uses
+ * - some ICU code will use fast functions like u_strFromUTF8()
+ * rather than the more general and more heavy-weight conversion API (ucnv.h)
+ * - ucnv_getDefaultName() always returns "UTF-8"
+ * - ucnv_setDefaultName() is disabled and will not change the default charset
+ * - static builds of ICU are smaller
+ * - more functionality is available with the UCONFIG_NO_CONVERSION build-time
+ * configuration option (see unicode/uconfig.h)
+ * - the UCONFIG_NO_CONVERSION build option in uconfig.h is more usable
+ *
+ * @stable ICU 4.2
+ * @see UCONFIG_NO_CONVERSION
+ */
+#ifdef U_CHARSET_IS_UTF8
+ /* Use the predefined value. */
+#elif U_PLATFORM_IS_LINUX_BASED || U_PLATFORM_IS_DARWIN_BASED || \
+ U_PLATFORM == U_PF_EMSCRIPTEN
+# define U_CHARSET_IS_UTF8 1
+#else
+# define U_CHARSET_IS_UTF8 0
+#endif
+
+/** @} */
+
+/*===========================================================================*/
+/** @{ Information about wchar support */
+/*===========================================================================*/
+
+/**
+ * \def U_HAVE_WCHAR_H
+ * Indicates whether <wchar.h> is available (1) or not (0). Set to 1 by default.
+ *
+ * @stable ICU 2.0
+ */
+#ifdef U_HAVE_WCHAR_H
+ /* Use the predefined value. */
+#elif U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9
+ /*
+ * Android before Gingerbread (Android 2.3, API level 9) did not support wchar_t.
+ * The type and header existed, but the library functions did not work as expected.
+ * The size of wchar_t was 1 but L"xyz" string literals had 32-bit units anyway.
+ */
+# define U_HAVE_WCHAR_H 0
+#else
+# define U_HAVE_WCHAR_H 1
+#endif
+
+/**
+ * \def U_SIZEOF_WCHAR_T
+ * U_SIZEOF_WCHAR_T==sizeof(wchar_t)
+ *
+ * @stable ICU 2.0
+ */
+#ifdef U_SIZEOF_WCHAR_T
+ /* Use the predefined value. */
+#elif (U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9)
+ /*
+ * Classic Mac OS and Mac OS X before 10.3 (Panther) did not support wchar_t or wstring.
+ * Newer Mac OS X has size 4.
+ */
+# define U_SIZEOF_WCHAR_T 1
+#elif U_PLATFORM_HAS_WIN32_API || U_PLATFORM == U_PF_CYGWIN
+# define U_SIZEOF_WCHAR_T 2
+#elif U_PLATFORM == U_PF_AIX
+ /*
+ * AIX 6.1 information, section "Wide character data representation":
+ * "... the wchar_t datatype is 32-bit in the 64-bit environment and
+ * 16-bit in the 32-bit environment."
+ * and
+ * "All locales use Unicode for their wide character code values (process code),
+ * except the IBM-eucTW codeset."
+ */
+# ifdef __64BIT__
+# define U_SIZEOF_WCHAR_T 4
+# else
+# define U_SIZEOF_WCHAR_T 2
+# endif
+#elif U_PLATFORM == U_PF_OS390
+ /*
+ * z/OS V1R11 information center, section "LP64 | ILP32":
+ * "In 31-bit mode, the size of long and pointers is 4 bytes and the size of wchar_t is 2 bytes.
+ * Under LP64, the size of long and pointer is 8 bytes and the size of wchar_t is 4 bytes."
+ */
+# ifdef _LP64
+# define U_SIZEOF_WCHAR_T 4
+# else
+# define U_SIZEOF_WCHAR_T 2
+# endif
+#elif U_PLATFORM == U_PF_OS400
+# if defined(__UTF32__)
+ /*
+ * LOCALETYPE(*LOCALEUTF) is specified.
+ * Wide-character strings are in UTF-32,
+ * narrow-character strings are in UTF-8.
+ */
+# define U_SIZEOF_WCHAR_T 4
+# elif defined(__UCS2__)
+ /*
+ * LOCALETYPE(*LOCALEUCS2) is specified.
+ * Wide-character strings are in UCS-2,
+ * narrow-character strings are in EBCDIC.
+ */
+# define U_SIZEOF_WCHAR_T 2
+# else
+ /*
+ * LOCALETYPE(*CLD) or LOCALETYPE(*LOCALE) is specified.
+ * Wide-character strings are in 16-bit EBCDIC,
+ * narrow-character strings are in EBCDIC.
+ */
+# define U_SIZEOF_WCHAR_T 2
+# endif
+#else
+# define U_SIZEOF_WCHAR_T 4
+#endif
+
+#ifndef U_HAVE_WCSCPY
+#define U_HAVE_WCSCPY U_HAVE_WCHAR_H
+#endif
+
+/** @} */
+
+/**
+ * \def U_HAVE_CHAR16_T
+ * Defines whether the char16_t type is available for UTF-16
+ * and u"abc" UTF-16 string literals are supported.
+ * This is a new standard type and standard string literal syntax in C++0x
+ * but has been available in some compilers before.
+ * @internal
+ */
+#ifdef U_HAVE_CHAR16_T
+ /* Use the predefined value. */
+#else
+ /*
+ * Notes:
+ * Visual Studio 2010 (_MSC_VER==1600) defines char16_t as a typedef
+ * and does not support u"abc" string literals.
+ * Visual Studio 2015 (_MSC_VER>=1900) and above adds support for
+ * both char16_t and u"abc" string literals.
+ * gcc 4.4 defines the __CHAR16_TYPE__ macro to a usable type but
+ * does not support u"abc" string literals.
+ * C++11 and C11 require support for UTF-16 literals
+ * TODO: Fix for plain C. Doesn't work on Mac.
+ */
+# if U_CPLUSPLUS_VERSION >= 11 || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L)
+# define U_HAVE_CHAR16_T 1
+# else
+# define U_HAVE_CHAR16_T 0
+# endif
+#endif
+
+/**
+ * @{
+ * \def U_DECLARE_UTF16
+ * Do not use this macro because it is not defined on all platforms.
+ * Use the UNICODE_STRING or U_STRING_DECL macros instead.
+ * @internal
+ */
+#ifdef U_DECLARE_UTF16
+ /* Use the predefined value. */
+#elif U_HAVE_CHAR16_T \
+ || (defined(__xlC__) && defined(__IBM_UTF_LITERAL) && U_SIZEOF_WCHAR_T != 2) \
+ || (defined(__HP_aCC) && __HP_aCC >= 035000) \
+ || (defined(__HP_cc) && __HP_cc >= 111106) \
+ || (defined(U_IN_DOXYGEN))
+# define U_DECLARE_UTF16(string) u ## string
+#elif U_SIZEOF_WCHAR_T == 2 \
+ && (U_CHARSET_FAMILY == 0 || (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400 && defined(__UCS2__)))
+# define U_DECLARE_UTF16(string) L ## string
+#else
+ /* Leave U_DECLARE_UTF16 undefined. See unistr.h. */
+#endif
+
+/** @} */
+
+/*===========================================================================*/
+/** @{ Symbol import-export control */
+/*===========================================================================*/
+
+#ifdef U_EXPORT
+ /* Use the predefined value. */
+#elif defined(U_STATIC_IMPLEMENTATION)
+# define U_EXPORT
+#elif defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(dllexport) && \
+ UPRV_HAS_DECLSPEC_ATTRIBUTE(dllimport))
+# define U_EXPORT __declspec(dllexport)
+#elif defined(__GNUC__)
+# define U_EXPORT __attribute__((visibility("default")))
+#elif (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x550) \
+ || (defined(__SUNPRO_C) && __SUNPRO_C >= 0x550)
+# define U_EXPORT __global
+/*#elif defined(__HP_aCC) || defined(__HP_cc)
+# define U_EXPORT __declspec(dllexport)*/
+#else
+# define U_EXPORT
+#endif
+
+/* U_CALLCONV is related to U_EXPORT2 */
+#ifdef U_EXPORT2
+ /* Use the predefined value. */
+#elif defined(_MSC_VER)
+# define U_EXPORT2 __cdecl
+#else
+# define U_EXPORT2
+#endif
+
+#ifdef U_IMPORT
+ /* Use the predefined value. */
+#elif defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(dllexport) && \
+ UPRV_HAS_DECLSPEC_ATTRIBUTE(dllimport))
+ /* Windows needs to export/import data. */
+# define U_IMPORT __declspec(dllimport)
+#else
+# define U_IMPORT
+#endif
+
+/**
+ * \def U_CALLCONV
+ * Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary
+ * in callback function typedefs to make sure that the calling convention
+ * is compatible.
+ *
+ * This is only used for non-ICU-API functions.
+ * When a function is a public ICU API,
+ * you must use the U_CAPI and U_EXPORT2 qualifiers.
+ *
+ * Please note, you need to use U_CALLCONV after the *.
+ *
+ * NO : "static const char U_CALLCONV *func( . . . )"
+ * YES: "static const char* U_CALLCONV func( . . . )"
+ *
+ * @stable ICU 2.0
+ */
+#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus)
+# define U_CALLCONV __cdecl
+#else
+# define U_CALLCONV U_EXPORT2
+#endif
+
+/**
+ * \def U_CALLCONV_FPTR
+ * Similar to U_CALLCONV, but only used on function pointers.
+ * @internal
+ */
+#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus)
+# define U_CALLCONV_FPTR U_CALLCONV
+#else
+# define U_CALLCONV_FPTR
+#endif
+/* @} */
+
+#endif // _PLATFORM_H
diff --git a/thirdparty/icu4c/common/unicode/ptypes.h b/thirdparty/icu4c/common/unicode/ptypes.h
new file mode 100644
index 0000000000..70324ffee3
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/ptypes.h
@@ -0,0 +1,130 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* FILE NAME : ptypes.h
+*
+* Date Name Description
+* 05/13/98 nos Creation (content moved here from ptypes.h).
+* 03/02/99 stephen Added AS400 support.
+* 03/30/99 stephen Added Linux support.
+* 04/13/99 stephen Reworked for autoconf.
+* 09/18/08 srl Moved basic types back to ptypes.h from platform.h
+******************************************************************************
+*/
+
+/**
+ * \file
+ * \brief C API: Definitions of integer types of various widths
+ */
+
+#ifndef _PTYPES_H
+#define _PTYPES_H
+
+/**
+ * \def __STDC_LIMIT_MACROS
+ * According to the Linux stdint.h, the ISO C99 standard specifies that in C++ implementations
+ * macros like INT32_MIN and UINTPTR_MAX should only be defined if explicitly requested.
+ * We need to define __STDC_LIMIT_MACROS before including stdint.h in C++ code
+ * that uses such limit macros.
+ * @internal
+ */
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS
+#endif
+
+/* NULL, size_t, wchar_t */
+#include <stddef.h>
+
+/*
+ * If all compilers provided all of the C99 headers and types,
+ * we would just unconditionally #include <stdint.h> here
+ * and not need any of the stuff after including platform.h.
+ */
+
+/* Find out if we have stdint.h etc. */
+#include "unicode/platform.h"
+
+/*===========================================================================*/
+/* Generic data types */
+/*===========================================================================*/
+
+/* If your platform does not have the <stdint.h> header, you may
+ need to edit the typedefs in the #else section below.
+ Use #if...#else...#endif with predefined compiler macros if possible. */
+#if U_HAVE_STDINT_H
+
+/*
+ * We mostly need <stdint.h> (which defines the standard integer types) but not <inttypes.h>.
+ * <inttypes.h> includes <stdint.h> and adds the printf/scanf helpers PRId32, SCNx16 etc.
+ * which we almost never use, plus stuff like imaxabs() which we never use.
+ */
+#include <stdint.h>
+
+#if U_PLATFORM == U_PF_OS390
+/* The features header is needed to get (u)int64_t sometimes. */
+#include <features.h>
+/* z/OS has <stdint.h>, but some versions are missing uint8_t (APAR PK62248). */
+#if !defined(__uint8_t)
+#define __uint8_t 1
+typedef unsigned char uint8_t;
+#endif
+#endif /* U_PLATFORM == U_PF_OS390 */
+
+#elif U_HAVE_INTTYPES_H
+
+# include <inttypes.h>
+
+#else /* neither U_HAVE_STDINT_H nor U_HAVE_INTTYPES_H */
+
+/// \cond
+#if ! U_HAVE_INT8_T
+typedef signed char int8_t;
+#endif
+
+#if ! U_HAVE_UINT8_T
+typedef unsigned char uint8_t;
+#endif
+
+#if ! U_HAVE_INT16_T
+typedef signed short int16_t;
+#endif
+
+#if ! U_HAVE_UINT16_T
+typedef unsigned short uint16_t;
+#endif
+
+#if ! U_HAVE_INT32_T
+typedef signed int int32_t;
+#endif
+
+#if ! U_HAVE_UINT32_T
+typedef unsigned int uint32_t;
+#endif
+
+#if ! U_HAVE_INT64_T
+#ifdef _MSC_VER
+ typedef signed __int64 int64_t;
+#else
+ typedef signed long long int64_t;
+#endif
+#endif
+
+#if ! U_HAVE_UINT64_T
+#ifdef _MSC_VER
+ typedef unsigned __int64 uint64_t;
+#else
+ typedef unsigned long long uint64_t;
+#endif
+#endif
+/// \endcond
+
+#endif /* U_HAVE_STDINT_H / U_HAVE_INTTYPES_H */
+
+#endif /* _PTYPES_H */
diff --git a/thirdparty/icu4c/common/unicode/putil.h b/thirdparty/icu4c/common/unicode/putil.h
new file mode 100644
index 0000000000..500c21252f
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/putil.h
@@ -0,0 +1,183 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* FILE NAME : putil.h
+*
+* Date Name Description
+* 05/14/98 nos Creation (content moved here from utypes.h).
+* 06/17/99 erm Added IEEE_754
+* 07/22/98 stephen Added IEEEremainder, max, min, trunc
+* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
+* 08/24/98 stephen Added longBitsFromDouble
+* 03/02/99 stephen Removed openFile(). Added AS400 support.
+* 04/15/99 stephen Converted to C
+* 11/15/99 helena Integrated S/390 changes for IEEE support.
+* 01/11/00 helena Added u_getVersion.
+******************************************************************************
+*/
+
+#ifndef PUTIL_H
+#define PUTIL_H
+
+#include "unicode/utypes.h"
+ /**
+ * \file
+ * \brief C API: Platform Utilities
+ */
+
+/*==========================================================================*/
+/* Platform utilities */
+/*==========================================================================*/
+
+/**
+ * Platform utilities isolates the platform dependencies of the
+ * library. For each platform which this code is ported to, these
+ * functions may have to be re-implemented.
+ */
+
+/**
+ * Return the ICU data directory.
+ * The data directory is where common format ICU data files (.dat files)
+ * are loaded from. Note that normal use of the built-in ICU
+ * facilities does not require loading of an external data file;
+ * unless you are adding custom data to ICU, the data directory
+ * does not need to be set.
+ *
+ * The data directory is determined as follows:
+ * If u_setDataDirectory() has been called, that is it, otherwise
+ * if the ICU_DATA environment variable is set, use that, otherwise
+ * If a data directory was specified at ICU build time
+ * <code>
+ * \code
+ * #define ICU_DATA_DIR "path"
+ * \endcode
+ * </code> use that,
+ * otherwise no data directory is available.
+ *
+ * @return the data directory, or an empty string ("") if no data directory has
+ * been specified.
+ *
+ * @stable ICU 2.0
+ */
+U_CAPI const char* U_EXPORT2 u_getDataDirectory(void);
+
+
+/**
+ * Set the ICU data directory.
+ * The data directory is where common format ICU data files (.dat files)
+ * are loaded from. Note that normal use of the built-in ICU
+ * facilities does not require loading of an external data file;
+ * unless you are adding custom data to ICU, the data directory
+ * does not need to be set.
+ *
+ * This function should be called at most once in a process, before the
+ * first ICU operation (e.g., u_init()) that will require the loading of an
+ * ICU data file.
+ * This function is not thread-safe. Use it before calling ICU APIs from
+ * multiple threads.
+ *
+ * @param directory The directory to be set.
+ *
+ * @see u_init
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2 u_setDataDirectory(const char *directory);
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * Return the time zone files override directory, or an empty string if
+ * no directory was specified. Certain time zone resources will be preferentially
+ * loaded from individual files in this directory.
+ *
+ * @return the time zone data override directory.
+ * @internal
+ */
+U_CAPI const char * U_EXPORT2 u_getTimeZoneFilesDirectory(UErrorCode *status);
+
+/**
+ * Set the time zone files override directory.
+ * This function is not thread safe; it must not be called concurrently with
+ * u_getTimeZoneFilesDirectory() or any other use of ICU time zone functions.
+ * This function should only be called before using any ICU service that
+ * will access the time zone data.
+ * @internal
+ */
+U_CAPI void U_EXPORT2 u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status);
+#endif /* U_HIDE_INTERNAL_API */
+
+
+/**
+ * @{
+ * Filesystem file and path separator characters.
+ * Example: '/' and ':' on Unix, '\\' and ';' on Windows.
+ * @stable ICU 2.0
+ */
+#if U_PLATFORM_USES_ONLY_WIN32_API
+# define U_FILE_SEP_CHAR '\\'
+# define U_FILE_ALT_SEP_CHAR '/'
+# define U_PATH_SEP_CHAR ';'
+# define U_FILE_SEP_STRING "\\"
+# define U_FILE_ALT_SEP_STRING "/"
+# define U_PATH_SEP_STRING ";"
+#else
+# define U_FILE_SEP_CHAR '/'
+# define U_FILE_ALT_SEP_CHAR '/'
+# define U_PATH_SEP_CHAR ':'
+# define U_FILE_SEP_STRING "/"
+# define U_FILE_ALT_SEP_STRING "/"
+# define U_PATH_SEP_STRING ":"
+#endif
+
+/** @} */
+
+/**
+ * Convert char characters to UChar characters.
+ * This utility function is useful only for "invariant characters"
+ * that are encoded in the platform default encoding.
+ * They are a small, constant subset of the encoding and include
+ * just the latin letters, digits, and some punctuation.
+ * For details, see U_CHARSET_FAMILY.
+ *
+ * @param cs Input string, points to <code>length</code>
+ * character bytes from a subset of the platform encoding.
+ * @param us Output string, points to memory for <code>length</code>
+ * Unicode characters.
+ * @param length The number of characters to convert; this may
+ * include the terminating <code>NUL</code>.
+ *
+ * @see U_CHARSET_FAMILY
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+u_charsToUChars(const char *cs, UChar *us, int32_t length);
+
+/**
+ * Convert UChar characters to char characters.
+ * This utility function is useful only for "invariant characters"
+ * that can be encoded in the platform default encoding.
+ * They are a small, constant subset of the encoding and include
+ * just the latin letters, digits, and some punctuation.
+ * For details, see U_CHARSET_FAMILY.
+ *
+ * @param us Input string, points to <code>length</code>
+ * Unicode characters that can be encoded with the
+ * codepage-invariant subset of the platform encoding.
+ * @param cs Output string, points to memory for <code>length</code>
+ * character bytes.
+ * @param length The number of characters to convert; this may
+ * include the terminating <code>NUL</code>.
+ *
+ * @see U_CHARSET_FAMILY
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+u_UCharsToChars(const UChar *us, char *cs, int32_t length);
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/rbbi.h b/thirdparty/icu4c/common/unicode/rbbi.h
new file mode 100644
index 0000000000..65117f616c
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/rbbi.h
@@ -0,0 +1,732 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+***************************************************************************
+* Copyright (C) 1999-2016 International Business Machines Corporation *
+* and others. All rights reserved. *
+***************************************************************************
+
+**********************************************************************
+* Date Name Description
+* 10/22/99 alan Creation.
+* 11/11/99 rgillam Complete port from Java.
+**********************************************************************
+*/
+
+#ifndef RBBI_H
+#define RBBI_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+/**
+ * \file
+ * \brief C++ API: Rule Based Break Iterator
+ */
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/brkiter.h"
+#include "unicode/udata.h"
+#include "unicode/parseerr.h"
+#include "unicode/schriter.h"
+
+struct UCPTrie;
+
+U_NAMESPACE_BEGIN
+
+/** @internal */
+class LanguageBreakEngine;
+struct RBBIDataHeader;
+class RBBIDataWrapper;
+class UnhandledEngine;
+class UStack;
+
+/**
+ *
+ * A subclass of BreakIterator whose behavior is specified using a list of rules.
+ * <p>Instances of this class are most commonly created by the factory methods of
+ * BreakIterator::createWordInstance(), BreakIterator::createLineInstance(), etc.,
+ * and then used via the abstract API in class BreakIterator</p>
+ *
+ * <p>See the ICU User Guide for information on Break Iterator Rules.</p>
+ *
+ * <p>This class is not intended to be subclassed.</p>
+ */
+class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator {
+
+private:
+ /**
+ * The UText through which this BreakIterator accesses the text
+ * @internal (private)
+ */
+ UText fText;
+
+#ifndef U_HIDE_INTERNAL_API
+public:
+#endif /* U_HIDE_INTERNAL_API */
+ /**
+ * The rule data for this BreakIterator instance.
+ * Not for general use; Public only for testing purposes.
+ * @internal
+ */
+ RBBIDataWrapper *fData;
+private:
+
+ /**
+ * The current position of the iterator. Pinned, 0 < fPosition <= text.length.
+ * Never has the value UBRK_DONE (-1).
+ */
+ int32_t fPosition;
+
+ /**
+ * TODO:
+ */
+ int32_t fRuleStatusIndex;
+
+ /**
+ * Cache of previously determined boundary positions.
+ */
+ class BreakCache;
+ BreakCache *fBreakCache;
+
+ /**
+ * Cache of boundary positions within a region of text that has been
+ * sub-divided by dictionary based breaking.
+ */
+ class DictionaryCache;
+ DictionaryCache *fDictionaryCache;
+
+ /**
+ *
+ * If present, UStack of LanguageBreakEngine objects that might handle
+ * dictionary characters. Searched from top to bottom to find an object to
+ * handle a given character.
+ * @internal (private)
+ */
+ UStack *fLanguageBreakEngines;
+
+ /**
+ *
+ * If present, the special LanguageBreakEngine used for handling
+ * characters that are in the dictionary set, but not handled by any
+ * LanguageBreakEngine.
+ * @internal (private)
+ */
+ UnhandledEngine *fUnhandledBreakEngine;
+
+ /**
+ * Counter for the number of characters encountered with the "dictionary"
+ * flag set.
+ * @internal (private)
+ */
+ uint32_t fDictionaryCharCount;
+
+ /**
+ * A character iterator that refers to the same text as the UText, above.
+ * Only included for compatibility with old API, which was based on CharacterIterators.
+ * Value may be adopted from outside, or one of fSCharIter or fDCharIter, below.
+ */
+ CharacterIterator *fCharIter;
+
+ /**
+ * When the input text is provided by a UnicodeString, this will point to
+ * a characterIterator that wraps that data. Needed only for the
+ * implementation of getText(), a backwards compatibility issue.
+ */
+ StringCharacterIterator fSCharIter;
+
+ /**
+ * True when iteration has run off the end, and iterator functions should return UBRK_DONE.
+ */
+ UBool fDone;
+
+ /**
+ * Array of look-ahead tentative results.
+ */
+ int32_t *fLookAheadMatches;
+
+ //=======================================================================
+ // constructors
+ //=======================================================================
+
+ /**
+ * Constructor from a flattened set of RBBI data in malloced memory.
+ * RulesBasedBreakIterators built from a custom set of rules
+ * are created via this constructor; the rules are compiled
+ * into memory, then the break iterator is constructed here.
+ *
+ * The break iterator adopts the memory, and will
+ * free it when done.
+ * @internal (private)
+ */
+ RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
+
+ /** @internal */
+ friend class RBBIRuleBuilder;
+ /** @internal */
+ friend class BreakIterator;
+
+public:
+
+ /** Default constructor. Creates an empty shell of an iterator, with no
+ * rules or text to iterate over. Object can subsequently be assigned to.
+ * @stable ICU 2.2
+ */
+ RuleBasedBreakIterator();
+
+ /**
+ * Copy constructor. Will produce a break iterator with the same behavior,
+ * and which iterates over the same text, as the one passed in.
+ * @param that The RuleBasedBreakIterator passed to be copied
+ * @stable ICU 2.0
+ */
+ RuleBasedBreakIterator(const RuleBasedBreakIterator& that);
+
+ /**
+ * Construct a RuleBasedBreakIterator from a set of rules supplied as a string.
+ * @param rules The break rules to be used.
+ * @param parseError In the event of a syntax error in the rules, provides the location
+ * within the rules of the problem.
+ * @param status Information on any errors encountered.
+ * @stable ICU 2.2
+ */
+ RuleBasedBreakIterator( const UnicodeString &rules,
+ UParseError &parseError,
+ UErrorCode &status);
+
+ /**
+ * Construct a RuleBasedBreakIterator from a set of precompiled binary rules.
+ * Binary rules are obtained from RulesBasedBreakIterator::getBinaryRules().
+ * Construction of a break iterator in this way is substantially faster than
+ * construction from source rules.
+ *
+ * Ownership of the storage containing the compiled rules remains with the
+ * caller of this function. The compiled rules must not be modified or
+ * deleted during the life of the break iterator.
+ *
+ * The compiled rules are not compatible across different major versions of ICU.
+ * The compiled rules are compatible only between machines with the same
+ * byte ordering (little or big endian) and the same base character set family
+ * (ASCII or EBCDIC).
+ *
+ * @see #getBinaryRules
+ * @param compiledRules A pointer to the compiled break rules to be used.
+ * @param ruleLength The length of the compiled break rules, in bytes. This
+ * corresponds to the length value produced by getBinaryRules().
+ * @param status Information on any errors encountered, including invalid
+ * binary rules.
+ * @stable ICU 4.8
+ */
+ RuleBasedBreakIterator(const uint8_t *compiledRules,
+ uint32_t ruleLength,
+ UErrorCode &status);
+
+ /**
+ * This constructor uses the udata interface to create a BreakIterator
+ * whose internal tables live in a memory-mapped file. "image" is an
+ * ICU UDataMemory handle for the pre-compiled break iterator tables.
+ * @param image handle to the memory image for the break iterator data.
+ * Ownership of the UDataMemory handle passes to the Break Iterator,
+ * which will be responsible for closing it when it is no longer needed.
+ * @param status Information on any errors encountered.
+ * @see udata_open
+ * @see #getBinaryRules
+ * @stable ICU 2.8
+ */
+ RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status);
+
+ /**
+ * Destructor
+ * @stable ICU 2.0
+ */
+ virtual ~RuleBasedBreakIterator();
+
+ /**
+ * Assignment operator. Sets this iterator to have the same behavior,
+ * and iterate over the same text, as the one passed in.
+ * @param that The RuleBasedBreakItertor passed in
+ * @return the newly created RuleBasedBreakIterator
+ * @stable ICU 2.0
+ */
+ RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that);
+
+ /**
+ * Equality operator. Returns true if both BreakIterators are of the
+ * same class, have the same behavior, and iterate over the same text.
+ * @param that The BreakIterator to be compared for equality
+ * @return true if both BreakIterators are of the
+ * same class, have the same behavior, and iterate over the same text.
+ * @stable ICU 2.0
+ */
+ virtual UBool operator==(const BreakIterator& that) const;
+
+ /**
+ * Not-equal operator. If operator== returns true, this returns false,
+ * and vice versa.
+ * @param that The BreakIterator to be compared for inequality
+ * @return true if both BreakIterators are not same.
+ * @stable ICU 2.0
+ */
+ inline UBool operator!=(const BreakIterator& that) const;
+
+ /**
+ * Returns a newly-constructed RuleBasedBreakIterator with the same
+ * behavior, and iterating over the same text, as this one.
+ * Differs from the copy constructor in that it is polymorphic, and
+ * will correctly clone (copy) a derived class.
+ * clone() is thread safe. Multiple threads may simultaneously
+ * clone the same source break iterator.
+ * @return a newly-constructed RuleBasedBreakIterator
+ * @stable ICU 2.0
+ */
+ virtual RuleBasedBreakIterator* clone() const;
+
+ /**
+ * Compute a hash code for this BreakIterator
+ * @return A hash code
+ * @stable ICU 2.0
+ */
+ virtual int32_t hashCode(void) const;
+
+ /**
+ * Returns the description used to create this iterator
+ * @return the description used to create this iterator
+ * @stable ICU 2.0
+ */
+ virtual const UnicodeString& getRules(void) const;
+
+ //=======================================================================
+ // BreakIterator overrides
+ //=======================================================================
+
+ /**
+ * <p>
+ * Return a CharacterIterator over the text being analyzed.
+ * The returned character iterator is owned by the break iterator, and must
+ * not be deleted by the caller. Repeated calls to this function may
+ * return the same CharacterIterator.
+ * </p>
+ * <p>
+ * The returned character iterator must not be used concurrently with
+ * the break iterator. If concurrent operation is needed, clone the
+ * returned character iterator first and operate on the clone.
+ * </p>
+ * <p>
+ * When the break iterator is operating on text supplied via a UText,
+ * this function will fail. Lacking any way to signal failures, it
+ * returns an CharacterIterator containing no text.
+ * The function getUText() provides similar functionality,
+ * is reliable, and is more efficient.
+ * </p>
+ *
+ * TODO: deprecate this function?
+ *
+ * @return An iterator over the text being analyzed.
+ * @stable ICU 2.0
+ */
+ virtual CharacterIterator& getText(void) const;
+
+
+ /**
+ * Get a UText for the text being analyzed.
+ * The returned UText is a shallow clone of the UText used internally
+ * by the break iterator implementation. It can safely be used to
+ * access the text without impacting any break iterator operations,
+ * but the underlying text itself must not be altered.
+ *
+ * @param fillIn A UText to be filled in. If NULL, a new UText will be
+ * allocated to hold the result.
+ * @param status receives any error codes.
+ * @return The current UText for this break iterator. If an input
+ * UText was provided, it will always be returned.
+ * @stable ICU 3.4
+ */
+ virtual UText *getUText(UText *fillIn, UErrorCode &status) const;
+
+ /**
+ * Set the iterator to analyze a new piece of text. This function resets
+ * the current iteration position to the beginning of the text.
+ * @param newText An iterator over the text to analyze. The BreakIterator
+ * takes ownership of the character iterator. The caller MUST NOT delete it!
+ * @stable ICU 2.0
+ */
+ virtual void adoptText(CharacterIterator* newText);
+
+ /**
+ * Set the iterator to analyze a new piece of text. This function resets
+ * the current iteration position to the beginning of the text.
+ *
+ * The BreakIterator will retain a reference to the supplied string.
+ * The caller must not modify or delete the text while the BreakIterator
+ * retains the reference.
+ *
+ * @param newText The text to analyze.
+ * @stable ICU 2.0
+ */
+ virtual void setText(const UnicodeString& newText);
+
+ /**
+ * Reset the break iterator to operate over the text represented by
+ * the UText. The iterator position is reset to the start.
+ *
+ * This function makes a shallow clone of the supplied UText. This means
+ * that the caller is free to immediately close or otherwise reuse the
+ * Utext that was passed as a parameter, but that the underlying text itself
+ * must not be altered while being referenced by the break iterator.
+ *
+ * @param text The UText used to change the text.
+ * @param status Receives any error codes.
+ * @stable ICU 3.4
+ */
+ virtual void setText(UText *text, UErrorCode &status);
+
+ /**
+ * Sets the current iteration position to the beginning of the text, position zero.
+ * @return The offset of the beginning of the text, zero.
+ * @stable ICU 2.0
+ */
+ virtual int32_t first(void);
+
+ /**
+ * Sets the current iteration position to the end of the text.
+ * @return The text's past-the-end offset.
+ * @stable ICU 2.0
+ */
+ virtual int32_t last(void);
+
+ /**
+ * Advances the iterator either forward or backward the specified number of steps.
+ * Negative values move backward, and positive values move forward. This is
+ * equivalent to repeatedly calling next() or previous().
+ * @param n The number of steps to move. The sign indicates the direction
+ * (negative is backwards, and positive is forwards).
+ * @return The character offset of the boundary position n boundaries away from
+ * the current one.
+ * @stable ICU 2.0
+ */
+ virtual int32_t next(int32_t n);
+
+ /**
+ * Advances the iterator to the next boundary position.
+ * @return The position of the first boundary after this one.
+ * @stable ICU 2.0
+ */
+ virtual int32_t next(void);
+
+ /**
+ * Moves the iterator backwards, to the last boundary preceding this one.
+ * @return The position of the last boundary position preceding this one.
+ * @stable ICU 2.0
+ */
+ virtual int32_t previous(void);
+
+ /**
+ * Sets the iterator to refer to the first boundary position following
+ * the specified position.
+ * @param offset The position from which to begin searching for a break position.
+ * @return The position of the first break after the current position.
+ * @stable ICU 2.0
+ */
+ virtual int32_t following(int32_t offset);
+
+ /**
+ * Sets the iterator to refer to the last boundary position before the
+ * specified position.
+ * @param offset The position to begin searching for a break from.
+ * @return The position of the last boundary before the starting position.
+ * @stable ICU 2.0
+ */
+ virtual int32_t preceding(int32_t offset);
+
+ /**
+ * Returns true if the specified position is a boundary position. As a side
+ * effect, leaves the iterator pointing to the first boundary position at
+ * or after "offset".
+ * @param offset the offset to check.
+ * @return True if "offset" is a boundary position.
+ * @stable ICU 2.0
+ */
+ virtual UBool isBoundary(int32_t offset);
+
+ /**
+ * Returns the current iteration position. Note that UBRK_DONE is never
+ * returned from this function; if iteration has run to the end of a
+ * string, current() will return the length of the string while
+ * next() will return UBRK_DONE).
+ * @return The current iteration position.
+ * @stable ICU 2.0
+ */
+ virtual int32_t current(void) const;
+
+
+ /**
+ * Return the status tag from the break rule that determined the boundary at
+ * the current iteration position. For break rules that do not specify a
+ * status, a default value of 0 is returned. If more than one break rule
+ * would cause a boundary to be located at some position in the text,
+ * the numerically largest of the applicable status values is returned.
+ * <p>
+ * Of the standard types of ICU break iterators, only word break and
+ * line break provide status values. The values are defined in
+ * the header file ubrk.h. For Word breaks, the status allows distinguishing between words
+ * that contain alphabetic letters, "words" that appear to be numbers,
+ * punctuation and spaces, words containing ideographic characters, and
+ * more. For Line Break, the status distinguishes between hard (mandatory) breaks
+ * and soft (potential) break positions.
+ * <p>
+ * <code>getRuleStatus()</code> can be called after obtaining a boundary
+ * position from <code>next()</code>, <code>previous()</code>, or
+ * any other break iterator functions that returns a boundary position.
+ * <p>
+ * Note that <code>getRuleStatus()</code> returns the value corresponding to
+ * <code>current()</code> index even after <code>next()</code> has returned DONE.
+ * <p>
+ * When creating custom break rules, one is free to define whatever
+ * status values may be convenient for the application.
+ * <p>
+ * @return the status from the break rule that determined the boundary
+ * at the current iteration position.
+ *
+ * @see UWordBreak
+ * @stable ICU 2.2
+ */
+ virtual int32_t getRuleStatus() const;
+
+ /**
+ * Get the status (tag) values from the break rule(s) that determined the boundary
+ * at the current iteration position.
+ * <p>
+ * The returned status value(s) are stored into an array provided by the caller.
+ * The values are stored in sorted (ascending) order.
+ * If the capacity of the output array is insufficient to hold the data,
+ * the output will be truncated to the available length, and a
+ * U_BUFFER_OVERFLOW_ERROR will be signaled.
+ *
+ * @param fillInVec an array to be filled in with the status values.
+ * @param capacity the length of the supplied vector. A length of zero causes
+ * the function to return the number of status values, in the
+ * normal way, without attempting to store any values.
+ * @param status receives error codes.
+ * @return The number of rule status values from the rules that determined
+ * the boundary at the current iteration position.
+ * In the event of a U_BUFFER_OVERFLOW_ERROR, the return value
+ * is the total number of status values that were available,
+ * not the reduced number that were actually returned.
+ * @see getRuleStatus
+ * @stable ICU 3.0
+ */
+ virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
+
+ /**
+ * Returns a unique class ID POLYMORPHICALLY. Pure virtual override.
+ * This method is to implement a simple version of RTTI, since not all
+ * C++ compilers support genuine RTTI. Polymorphic operator==() and
+ * clone() methods call this method.
+ *
+ * @return The class ID for this object. All objects of a
+ * given class have the same class ID. Objects of
+ * other classes have different class IDs.
+ * @stable ICU 2.0
+ */
+ virtual UClassID getDynamicClassID(void) const;
+
+ /**
+ * Returns the class ID for this class. This is useful only for
+ * comparing to a return value from getDynamicClassID(). For example:
+ *
+ * Base* polymorphic_pointer = createPolymorphicObject();
+ * if (polymorphic_pointer->getDynamicClassID() ==
+ * Derived::getStaticClassID()) ...
+ *
+ * @return The class ID for all objects of this class.
+ * @stable ICU 2.0
+ */
+ static UClassID U_EXPORT2 getStaticClassID(void);
+
+#ifndef U_FORCE_HIDE_DEPRECATED_API
+ /**
+ * Deprecated functionality. Use clone() instead.
+ *
+ * Create a clone (copy) of this break iterator in memory provided
+ * by the caller. The idea is to increase performance by avoiding
+ * a storage allocation. Use of this function is NOT RECOMMENDED.
+ * Performance gains are minimal, and correct buffer management is
+ * tricky. Use clone() instead.
+ *
+ * @param stackBuffer The pointer to the memory into which the cloned object
+ * should be placed. If NULL, allocate heap memory
+ * for the cloned object.
+ * @param BufferSize The size of the buffer. If zero, return the required
+ * buffer size, but do not clone the object. If the
+ * size was too small (but not zero), allocate heap
+ * storage for the cloned object.
+ *
+ * @param status Error status. U_SAFECLONE_ALLOCATED_WARNING will be
+ * returned if the provided buffer was too small, and
+ * the clone was therefore put on the heap.
+ *
+ * @return Pointer to the clone object. This may differ from the stackBuffer
+ * address if the byte alignment of the stack buffer was not suitable
+ * or if the stackBuffer was too small to hold the clone.
+ * @deprecated ICU 52. Use clone() instead.
+ */
+ virtual RuleBasedBreakIterator *createBufferClone(void *stackBuffer,
+ int32_t &BufferSize,
+ UErrorCode &status);
+#endif // U_FORCE_HIDE_DEPRECATED_API
+
+ /**
+ * Return the binary form of compiled break rules,
+ * which can then be used to create a new break iterator at some
+ * time in the future. Creating a break iterator from pre-compiled rules
+ * is much faster than building one from the source form of the
+ * break rules.
+ *
+ * The binary data can only be used with the same version of ICU
+ * and on the same platform type (processor endian-ness)
+ *
+ * @param length Returns the length of the binary data. (Out parameter.)
+ *
+ * @return A pointer to the binary (compiled) rule data. The storage
+ * belongs to the RulesBasedBreakIterator object, not the
+ * caller, and must not be modified or deleted.
+ * @stable ICU 4.8
+ */
+ virtual const uint8_t *getBinaryRules(uint32_t &length);
+
+ /**
+ * Set the subject text string upon which the break iterator is operating
+ * without changing any other aspect of the matching state.
+ * The new and previous text strings must have the same content.
+ *
+ * This function is intended for use in environments where ICU is operating on
+ * strings that may move around in memory. It provides a mechanism for notifying
+ * ICU that the string has been relocated, and providing a new UText to access the
+ * string in its new position.
+ *
+ * Note that the break iterator implementation never copies the underlying text
+ * of a string being processed, but always operates directly on the original text
+ * provided by the user. Refreshing simply drops the references to the old text
+ * and replaces them with references to the new.
+ *
+ * Caution: this function is normally used only by very specialized,
+ * system-level code. One example use case is with garbage collection that moves
+ * the text in memory.
+ *
+ * @param input The new (moved) text string.
+ * @param status Receives errors detected by this function.
+ * @return *this
+ *
+ * @stable ICU 49
+ */
+ virtual RuleBasedBreakIterator &refreshInputText(UText *input, UErrorCode &status);
+
+
+private:
+ //=======================================================================
+ // implementation
+ //=======================================================================
+ /**
+ * Dumps caches and performs other actions associated with a complete change
+ * in text or iteration position.
+ * @internal (private)
+ */
+ void reset(void);
+
+ /**
+ * Common initialization function, used by constructors and bufferClone.
+ * @internal (private)
+ */
+ void init(UErrorCode &status);
+
+ /**
+ * Iterate backwards from an arbitrary position in the input text using the
+ * synthesized Safe Reverse rules.
+ * This locates a "Safe Position" from which the forward break rules
+ * will operate correctly. A Safe Position is not necessarily a boundary itself.
+ *
+ * @param fromPosition the position in the input text to begin the iteration.
+ * @internal (private)
+ */
+ int32_t handleSafePrevious(int32_t fromPosition);
+
+ /**
+ * Find a rule-based boundary by running the state machine.
+ * Input
+ * fPosition, the position in the text to begin from.
+ * Output
+ * fPosition: the boundary following the starting position.
+ * fDictionaryCharCount the number of dictionary characters encountered.
+ * If > 0, the segment will be further subdivided
+ * fRuleStatusIndex Info from the state table indicating which rules caused the boundary.
+ *
+ * @internal (private)
+ */
+ int32_t handleNext();
+
+ /*
+ * Templatized version of handleNext() and handleSafePrevious().
+ *
+ * There will be exactly four instantiations, two each for 8 and 16 bit tables,
+ * two each for 8 and 16 bit trie.
+ * Having separate instantiations for the table types keeps conditional tests of
+ * the table type out of the inner loops, at the expense of replicated code.
+ *
+ * The template parameter for the Trie access function is a value, not a type.
+ * Doing it this way, the compiler will inline the Trie function in the
+ * expanded functions. (Both the 8 and 16 bit access functions have the same type
+ * signature)
+ */
+
+ typedef uint16_t (*PTrieFunc)(const UCPTrie *, UChar32);
+
+ template<typename RowType, PTrieFunc trieFunc>
+ int32_t handleSafePrevious(int32_t fromPosition);
+
+ template<typename RowType, PTrieFunc trieFunc>
+ int32_t handleNext();
+
+
+ /**
+ * This function returns the appropriate LanguageBreakEngine for a
+ * given character c.
+ * @param c A character in the dictionary set
+ * @internal (private)
+ */
+ const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
+
+ public:
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * Debugging function only.
+ * @internal
+ */
+ void dumpCache();
+
+ /**
+ * Debugging function only.
+ * @internal
+ */
+ void dumpTables();
+#endif /* U_HIDE_INTERNAL_API */
+};
+
+//------------------------------------------------------------------------------
+//
+// Inline Functions Definitions ...
+//
+//------------------------------------------------------------------------------
+
+inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const {
+ return !operator==(that);
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/rep.h b/thirdparty/icu4c/common/unicode/rep.h
new file mode 100644
index 0000000000..6dd4530647
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/rep.h
@@ -0,0 +1,266 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**************************************************************************
+* Copyright (C) 1999-2012, International Business Machines Corporation and
+* others. All Rights Reserved.
+**************************************************************************
+* Date Name Description
+* 11/17/99 aliu Creation. Ported from java. Modified to
+* match current UnicodeString API. Forced
+* to use name "handleReplaceBetween" because
+* of existing methods in UnicodeString.
+**************************************************************************
+*/
+
+#ifndef REP_H
+#define REP_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+
+/**
+ * \file
+ * \brief C++ API: Replaceable String
+ */
+
+U_NAMESPACE_BEGIN
+
+class UnicodeString;
+
+/**
+ * <code>Replaceable</code> is an abstract base class representing a
+ * string of characters that supports the replacement of a range of
+ * itself with a new string of characters. It is used by APIs that
+ * change a piece of text while retaining metadata. Metadata is data
+ * other than the Unicode characters returned by char32At(). One
+ * example of metadata is style attributes; another is an edit
+ * history, marking each character with an author and revision number.
+ *
+ * <p>An implicit aspect of the <code>Replaceable</code> API is that
+ * during a replace operation, new characters take on the metadata of
+ * the old characters. For example, if the string "the <b>bold</b>
+ * font" has range (4, 8) replaced with "strong", then it becomes "the
+ * <b>strong</b> font".
+ *
+ * <p><code>Replaceable</code> specifies ranges using a start
+ * offset and a limit offset. The range of characters thus specified
+ * includes the characters at offset start..limit-1. That is, the
+ * start offset is inclusive, and the limit offset is exclusive.
+ *
+ * <p><code>Replaceable</code> also includes API to access characters
+ * in the string: <code>length()</code>, <code>charAt()</code>,
+ * <code>char32At()</code>, and <code>extractBetween()</code>.
+ *
+ * <p>For a subclass to support metadata, typical behavior of
+ * <code>replace()</code> is the following:
+ * <ul>
+ * <li>Set the metadata of the new text to the metadata of the first
+ * character replaced</li>
+ * <li>If no characters are replaced, use the metadata of the
+ * previous character</li>
+ * <li>If there is no previous character (i.e. start == 0), use the
+ * following character</li>
+ * <li>If there is no following character (i.e. the replaceable was
+ * empty), use default metadata.<br>
+ * <li>If the code point U+FFFF is seen, it should be interpreted as
+ * a special marker having no metadata<li>
+ * </li>
+ * </ul>
+ * If this is not the behavior, the subclass should document any differences.
+ * @author Alan Liu
+ * @stable ICU 2.0
+ */
+class U_COMMON_API Replaceable : public UObject {
+
+public:
+ /**
+ * Destructor.
+ * @stable ICU 2.0
+ */
+ virtual ~Replaceable();
+
+ /**
+ * Returns the number of 16-bit code units in the text.
+ * @return number of 16-bit code units in text
+ * @stable ICU 1.8
+ */
+ inline int32_t length() const;
+
+ /**
+ * Returns the 16-bit code unit at the given offset into the text.
+ * @param offset an integer between 0 and <code>length()</code>-1
+ * inclusive
+ * @return 16-bit code unit of text at given offset
+ * @stable ICU 1.8
+ */
+ inline char16_t charAt(int32_t offset) const;
+
+ /**
+ * Returns the 32-bit code point at the given 16-bit offset into
+ * the text. This assumes the text is stored as 16-bit code units
+ * with surrogate pairs intermixed. If the offset of a leading or
+ * trailing code unit of a surrogate pair is given, return the
+ * code point of the surrogate pair.
+ *
+ * @param offset an integer between 0 and <code>length()</code>-1
+ * inclusive
+ * @return 32-bit code point of text at given offset
+ * @stable ICU 1.8
+ */
+ inline UChar32 char32At(int32_t offset) const;
+
+ /**
+ * Copies characters in the range [<tt>start</tt>, <tt>limit</tt>)
+ * into the UnicodeString <tt>target</tt>.
+ * @param start offset of first character which will be copied
+ * @param limit offset immediately following the last character to
+ * be copied
+ * @param target UnicodeString into which to copy characters.
+ * @return A reference to <TT>target</TT>
+ * @stable ICU 2.1
+ */
+ virtual void extractBetween(int32_t start,
+ int32_t limit,
+ UnicodeString& target) const = 0;
+
+ /**
+ * Replaces a substring of this object with the given text. If the
+ * characters being replaced have metadata, the new characters
+ * that replace them should be given the same metadata.
+ *
+ * <p>Subclasses must ensure that if the text between start and
+ * limit is equal to the replacement text, that replace has no
+ * effect. That is, any metadata
+ * should be unaffected. In addition, subclasses are encouraged to
+ * check for initial and trailing identical characters, and make a
+ * smaller replacement if possible. This will preserve as much
+ * metadata as possible.
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= length()</code>.
+ * @param text the text to replace characters <code>start</code>
+ * to <code>limit - 1</code>
+ * @stable ICU 2.0
+ */
+ virtual void handleReplaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& text) = 0;
+ // Note: All other methods in this class take the names of
+ // existing UnicodeString methods. This method is the exception.
+ // It is named differently because all replace methods of
+ // UnicodeString return a UnicodeString&. The 'between' is
+ // required in order to conform to the UnicodeString naming
+ // convention; API taking start/length are named <operation>, and
+ // those taking start/limit are named <operationBetween>. The
+ // 'handle' is added because 'replaceBetween' and
+ // 'doReplaceBetween' are already taken.
+
+ /**
+ * Copies a substring of this object, retaining metadata.
+ * This method is used to duplicate or reorder substrings.
+ * The destination index must not overlap the source range.
+ *
+ * @param start the beginning index, inclusive; <code>0 <= start <=
+ * limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit <=
+ * length()</code>.
+ * @param dest the destination index. The characters from
+ * <code>start..limit-1</code> will be copied to <code>dest</code>.
+ * Implementations of this method may assume that <code>dest <= start ||
+ * dest >= limit</code>.
+ * @stable ICU 2.0
+ */
+ virtual void copy(int32_t start, int32_t limit, int32_t dest) = 0;
+
+ /**
+ * Returns true if this object contains metadata. If a
+ * Replaceable object has metadata, calls to the Replaceable API
+ * must be made so as to preserve metadata. If it does not, calls
+ * to the Replaceable API may be optimized to improve performance.
+ * The default implementation returns true.
+ * @return true if this object contains metadata
+ * @stable ICU 2.2
+ */
+ virtual UBool hasMetaData() const;
+
+ /**
+ * Clone this object, an instance of a subclass of Replaceable.
+ * Clones can be used concurrently in multiple threads.
+ * If a subclass does not implement clone(), or if an error occurs,
+ * then NULL is returned.
+ * The caller must delete the clone.
+ *
+ * @return a clone of this object
+ *
+ * @see getDynamicClassID
+ * @stable ICU 2.6
+ */
+ virtual Replaceable *clone() const;
+
+protected:
+
+ /**
+ * Default constructor.
+ * @stable ICU 2.4
+ */
+ inline Replaceable();
+
+ /*
+ * Assignment operator not declared. The compiler will provide one
+ * which does nothing since this class does not contain any data members.
+ * API/code coverage may show the assignment operator as present and
+ * untested - ignore.
+ * Subclasses need this assignment operator if they use compiler-provided
+ * assignment operators of their own. An alternative to not declaring one
+ * here would be to declare and empty-implement a protected or public one.
+ Replaceable &Replaceable::operator=(const Replaceable &);
+ */
+
+ /**
+ * Virtual version of length().
+ * @stable ICU 2.4
+ */
+ virtual int32_t getLength() const = 0;
+
+ /**
+ * Virtual version of charAt().
+ * @stable ICU 2.4
+ */
+ virtual char16_t getCharAt(int32_t offset) const = 0;
+
+ /**
+ * Virtual version of char32At().
+ * @stable ICU 2.4
+ */
+ virtual UChar32 getChar32At(int32_t offset) const = 0;
+};
+
+inline Replaceable::Replaceable() {}
+
+inline int32_t
+Replaceable::length() const {
+ return getLength();
+}
+
+inline char16_t
+Replaceable::charAt(int32_t offset) const {
+ return getCharAt(offset);
+}
+
+inline UChar32
+Replaceable::char32At(int32_t offset) const {
+ return getChar32At(offset);
+}
+
+// There is no rep.cpp, see unistr.cpp for Replaceable function implementations.
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/resbund.h b/thirdparty/icu4c/common/unicode/resbund.h
new file mode 100644
index 0000000000..37738e277b
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/resbund.h
@@ -0,0 +1,498 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1996-2013, International Business Machines Corporation
+* and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* File resbund.h
+*
+* CREATED BY
+* Richard Gillam
+*
+* Modification History:
+*
+* Date Name Description
+* 2/5/97 aliu Added scanForLocaleInFile. Added
+* constructor which attempts to read resource bundle
+* from a specific file, without searching other files.
+* 2/11/97 aliu Added UErrorCode return values to constructors. Fixed
+* infinite loops in scanForFile and scanForLocale.
+* Modified getRawResourceData to not delete storage
+* in localeData and resourceData which it doesn't own.
+* Added Mac compatibility #ifdefs for tellp() and
+* ios::nocreate.
+* 2/18/97 helena Updated with 100% documentation coverage.
+* 3/13/97 aliu Rewrote to load in entire resource bundle and store
+* it as a Hashtable of ResourceBundleData objects.
+* Added state table to govern parsing of files.
+* Modified to load locale index out of new file
+* distinct from default.txt.
+* 3/25/97 aliu Modified to support 2-d arrays, needed for timezone
+* data. Added support for custom file suffixes. Again,
+* needed to support timezone data.
+* 4/7/97 aliu Cleaned up.
+* 03/02/99 stephen Removed dependency on FILE*.
+* 03/29/99 helena Merged Bertrand and Stephen's changes.
+* 06/11/99 stephen Removed parsing of .txt files.
+* Reworked to use new binary format.
+* Cleaned up.
+* 06/14/99 stephen Removed methods taking a filename suffix.
+* 11/09/99 weiv Added getLocale(), fRealLocale, removed fRealLocaleID
+******************************************************************************
+*/
+
+#ifndef RESBUND_H
+#define RESBUND_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+#include "unicode/ures.h"
+#include "unicode/unistr.h"
+#include "unicode/locid.h"
+
+/**
+ * \file
+ * \brief C++ API: Resource Bundle
+ */
+
+U_NAMESPACE_BEGIN
+
+/**
+ * A class representing a collection of resource information pertaining to a given
+ * locale. A resource bundle provides a way of accessing locale- specfic information in
+ * a data file. You create a resource bundle that manages the resources for a given
+ * locale and then ask it for individual resources.
+ * <P>
+ * Resource bundles in ICU4C are currently defined using text files which conform to the following
+ * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/bnf_rb.txt">BNF definition</a>.
+ * More on resource bundle concepts and syntax can be found in the
+ * <a href="http://icu-project.org/userguide/ResourceManagement.html">Users Guide</a>.
+ * <P>
+ *
+ * The ResourceBundle class is not suitable for subclassing.
+ *
+ * @stable ICU 2.0
+ */
+class U_COMMON_API ResourceBundle : public UObject {
+public:
+ /**
+ * Constructor
+ *
+ * @param packageName The packageName and locale together point to an ICU udata object,
+ * as defined by <code> udata_open( packageName, "res", locale, err) </code>
+ * or equivalent. Typically, packageName will refer to a (.dat) file, or to
+ * a package registered with udata_setAppData(). Using a full file or directory
+ * pathname for packageName is deprecated.
+ * @param locale This is the locale this resource bundle is for. To get resources
+ * for the French locale, for example, you would create a
+ * ResourceBundle passing Locale::FRENCH for the "locale" parameter,
+ * and all subsequent calls to that resource bundle will return
+ * resources that pertain to the French locale. If the caller doesn't
+ * pass a locale parameter, the default locale for the system (as
+ * returned by Locale::getDefault()) will be used.
+ * @param err The Error Code.
+ * The UErrorCode& err parameter is used to return status information to the user. To
+ * check whether the construction succeeded or not, you should check the value of
+ * U_SUCCESS(err). If you wish more detailed information, you can check for
+ * informational error results which still indicate success. U_USING_FALLBACK_WARNING
+ * indicates that a fall back locale was used. For example, 'de_CH' was requested,
+ * but nothing was found there, so 'de' was used. U_USING_DEFAULT_WARNING indicates that
+ * the default locale data was used; neither the requested locale nor any of its
+ * fall back locales could be found.
+ * @stable ICU 2.0
+ */
+ ResourceBundle(const UnicodeString& packageName,
+ const Locale& locale,
+ UErrorCode& err);
+
+ /**
+ * Construct a resource bundle for the default bundle in the specified package.
+ *
+ * @param packageName The packageName and locale together point to an ICU udata object,
+ * as defined by <code> udata_open( packageName, "res", locale, err) </code>
+ * or equivalent. Typically, packageName will refer to a (.dat) file, or to
+ * a package registered with udata_setAppData(). Using a full file or directory
+ * pathname for packageName is deprecated.
+ * @param err A UErrorCode value
+ * @stable ICU 2.0
+ */
+ ResourceBundle(const UnicodeString& packageName,
+ UErrorCode& err);
+
+ /**
+ * Construct a resource bundle for the ICU default bundle.
+ *
+ * @param err A UErrorCode value
+ * @stable ICU 2.0
+ */
+ ResourceBundle(UErrorCode &err);
+
+ /**
+ * Standard constructor, constructs a resource bundle for the locale-specific
+ * bundle in the specified package.
+ *
+ * @param packageName The packageName and locale together point to an ICU udata object,
+ * as defined by <code> udata_open( packageName, "res", locale, err) </code>
+ * or equivalent. Typically, packageName will refer to a (.dat) file, or to
+ * a package registered with udata_setAppData(). Using a full file or directory
+ * pathname for packageName is deprecated.
+ * NULL is used to refer to ICU data.
+ * @param locale The locale for which to open a resource bundle.
+ * @param err A UErrorCode value
+ * @stable ICU 2.0
+ */
+ ResourceBundle(const char* packageName,
+ const Locale& locale,
+ UErrorCode& err);
+
+ /**
+ * Copy constructor.
+ *
+ * @param original The resource bundle to copy.
+ * @stable ICU 2.0
+ */
+ ResourceBundle(const ResourceBundle &original);
+
+ /**
+ * Constructor from a C UResourceBundle. The resource bundle is
+ * copied and not adopted. ures_close will still need to be used on the
+ * original resource bundle.
+ *
+ * @param res A pointer to the C resource bundle.
+ * @param status A UErrorCode value.
+ * @stable ICU 2.0
+ */
+ ResourceBundle(UResourceBundle *res,
+ UErrorCode &status);
+
+ /**
+ * Assignment operator.
+ *
+ * @param other The resource bundle to copy.
+ * @stable ICU 2.0
+ */
+ ResourceBundle&
+ operator=(const ResourceBundle& other);
+
+ /** Destructor.
+ * @stable ICU 2.0
+ */
+ virtual ~ResourceBundle();
+
+ /**
+ * Clone this object.
+ * Clones can be used concurrently in multiple threads.
+ * If an error occurs, then NULL is returned.
+ * The caller must delete the clone.
+ *
+ * @return a clone of this object
+ *
+ * @see getDynamicClassID
+ * @stable ICU 2.8
+ */
+ ResourceBundle *clone() const;
+
+ /**
+ * Returns the size of a resource. Size for scalar types is always 1, and for vector/table types is
+ * the number of child resources.
+ * @warning Integer array is treated as a scalar type. There are no
+ * APIs to access individual members of an integer array. It
+ * is always returned as a whole.
+ *
+ * @return number of resources in a given resource.
+ * @stable ICU 2.0
+ */
+ int32_t
+ getSize(void) const;
+
+ /**
+ * returns a string from a string resource type
+ *
+ * @param status fills in the outgoing error code
+ * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ * could be a warning
+ * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return a pointer to a zero-terminated char16_t array which lives in a memory mapped/DLL file.
+ * @stable ICU 2.0
+ */
+ UnicodeString
+ getString(UErrorCode& status) const;
+
+ /**
+ * returns a binary data from a resource. Can be used at most primitive resource types (binaries,
+ * strings, ints)
+ *
+ * @param len fills in the length of resulting byte chunk
+ * @param status fills in the outgoing error code
+ * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ * could be a warning
+ * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return a pointer to a chunk of unsigned bytes which live in a memory mapped/DLL file.
+ * @stable ICU 2.0
+ */
+ const uint8_t*
+ getBinary(int32_t& len, UErrorCode& status) const;
+
+
+ /**
+ * returns an integer vector from a resource.
+ *
+ * @param len fills in the length of resulting integer vector
+ * @param status fills in the outgoing error code
+ * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ * could be a warning
+ * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return a pointer to a vector of integers that lives in a memory mapped/DLL file.
+ * @stable ICU 2.0
+ */
+ const int32_t*
+ getIntVector(int32_t& len, UErrorCode& status) const;
+
+ /**
+ * returns an unsigned integer from a resource.
+ * This integer is originally 28 bits.
+ *
+ * @param status fills in the outgoing error code
+ * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ * could be a warning
+ * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return an unsigned integer value
+ * @stable ICU 2.0
+ */
+ uint32_t
+ getUInt(UErrorCode& status) const;
+
+ /**
+ * returns a signed integer from a resource.
+ * This integer is originally 28 bit and the sign gets propagated.
+ *
+ * @param status fills in the outgoing error code
+ * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ * could be a warning
+ * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return a signed integer value
+ * @stable ICU 2.0
+ */
+ int32_t
+ getInt(UErrorCode& status) const;
+
+ /**
+ * Checks whether the resource has another element to iterate over.
+ *
+ * @return true if there are more elements, false if there is no more elements
+ * @stable ICU 2.0
+ */
+ UBool
+ hasNext(void) const;
+
+ /**
+ * Resets the internal context of a resource so that iteration starts from the first element.
+ *
+ * @stable ICU 2.0
+ */
+ void
+ resetIterator(void);
+
+ /**
+ * Returns the key associated with this resource. Not all the resources have a key - only
+ * those that are members of a table.
+ *
+ * @return a key associated to this resource, or NULL if it doesn't have a key
+ * @stable ICU 2.0
+ */
+ const char*
+ getKey(void) const;
+
+ /**
+ * Gets the locale ID of the resource bundle as a string.
+ * Same as getLocale().getName() .
+ *
+ * @return the locale ID of the resource bundle as a string
+ * @stable ICU 2.0
+ */
+ const char*
+ getName(void) const;
+
+
+ /**
+ * Returns the type of a resource. Available types are defined in enum UResType
+ *
+ * @return type of the given resource.
+ * @stable ICU 2.0
+ */
+ UResType
+ getType(void) const;
+
+ /**
+ * Returns the next resource in a given resource or NULL if there are no more resources
+ *
+ * @param status fills in the outgoing error code
+ * @return ResourceBundle object.
+ * @stable ICU 2.0
+ */
+ ResourceBundle
+ getNext(UErrorCode& status);
+
+ /**
+ * Returns the next string in a resource or NULL if there are no more resources
+ * to iterate over.
+ *
+ * @param status fills in the outgoing error code
+ * @return an UnicodeString object.
+ * @stable ICU 2.0
+ */
+ UnicodeString
+ getNextString(UErrorCode& status);
+
+ /**
+ * Returns the next string in a resource or NULL if there are no more resources
+ * to iterate over.
+ *
+ * @param key fill in for key associated with this string
+ * @param status fills in the outgoing error code
+ * @return an UnicodeString object.
+ * @stable ICU 2.0
+ */
+ UnicodeString
+ getNextString(const char ** key,
+ UErrorCode& status);
+
+ /**
+ * Returns the resource in a resource at the specified index.
+ *
+ * @param index an index to the wanted resource.
+ * @param status fills in the outgoing error code
+ * @return ResourceBundle object. If there is an error, resource is invalid.
+ * @stable ICU 2.0
+ */
+ ResourceBundle
+ get(int32_t index,
+ UErrorCode& status) const;
+
+ /**
+ * Returns the string in a given resource at the specified index.
+ *
+ * @param index an index to the wanted string.
+ * @param status fills in the outgoing error code
+ * @return an UnicodeString object. If there is an error, string is bogus
+ * @stable ICU 2.0
+ */
+ UnicodeString
+ getStringEx(int32_t index,
+ UErrorCode& status) const;
+
+ /**
+ * Returns a resource in a resource that has a given key. This procedure works only with table
+ * resources.
+ *
+ * @param key a key associated with the wanted resource
+ * @param status fills in the outgoing error code.
+ * @return ResourceBundle object. If there is an error, resource is invalid.
+ * @stable ICU 2.0
+ */
+ ResourceBundle
+ get(const char* key,
+ UErrorCode& status) const;
+
+ /**
+ * Returns a string in a resource that has a given key. This procedure works only with table
+ * resources.
+ *
+ * @param key a key associated with the wanted string
+ * @param status fills in the outgoing error code
+ * @return an UnicodeString object. If there is an error, string is bogus
+ * @stable ICU 2.0
+ */
+ UnicodeString
+ getStringEx(const char* key,
+ UErrorCode& status) const;
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * Return the version number associated with this ResourceBundle as a string. Please
+ * use getVersion, as this method is going to be deprecated.
+ *
+ * @return A version number string as specified in the resource bundle or its parent.
+ * The caller does not own this string.
+ * @see getVersion
+ * @deprecated ICU 2.8 Use getVersion instead.
+ */
+ const char*
+ getVersionNumber(void) const;
+#endif /* U_HIDE_DEPRECATED_API */
+
+ /**
+ * Return the version number associated with this ResourceBundle as a UVersionInfo array.
+ *
+ * @param versionInfo A UVersionInfo array that is filled with the version number
+ * as specified in the resource bundle or its parent.
+ * @stable ICU 2.0
+ */
+ void
+ getVersion(UVersionInfo versionInfo) const;
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * Return the Locale associated with this ResourceBundle.
+ *
+ * @return a Locale object
+ * @deprecated ICU 2.8 Use getLocale(ULocDataLocaleType type, UErrorCode &status) overload instead.
+ */
+ const Locale&
+ getLocale(void) const;
+#endif /* U_HIDE_DEPRECATED_API */
+
+ /**
+ * Return the Locale associated with this ResourceBundle.
+ * @param type You can choose between requested, valid and actual
+ * locale. For description see the definition of
+ * ULocDataLocaleType in uloc.h
+ * @param status just for catching illegal arguments
+ *
+ * @return a Locale object
+ * @stable ICU 2.8
+ */
+ const Locale
+ getLocale(ULocDataLocaleType type, UErrorCode &status) const;
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * This API implements multilevel fallback
+ * @internal
+ */
+ ResourceBundle
+ getWithFallback(const char* key, UErrorCode& status);
+#endif /* U_HIDE_INTERNAL_API */
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ *
+ * @stable ICU 2.2
+ */
+ virtual UClassID getDynamicClassID() const;
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ *
+ * @stable ICU 2.2
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+private:
+ ResourceBundle(); // default constructor not implemented
+
+ UResourceBundle *fResource;
+ void constructForLocale(const UnicodeString& path, const Locale& locale, UErrorCode& error);
+ Locale *fLocale;
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/schriter.h b/thirdparty/icu4c/common/unicode/schriter.h
new file mode 100644
index 0000000000..1ca5b70fca
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/schriter.h
@@ -0,0 +1,195 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1998-2005, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* File schriter.h
+*
+* Modification History:
+*
+* Date Name Description
+* 05/05/99 stephen Cleaned up.
+******************************************************************************
+*/
+
+#ifndef SCHRITER_H
+#define SCHRITER_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/chariter.h"
+#include "unicode/uchriter.h"
+
+/**
+ * \file
+ * \brief C++ API: String Character Iterator
+ */
+
+U_NAMESPACE_BEGIN
+/**
+ * A concrete subclass of CharacterIterator that iterates over the
+ * characters (code units or code points) in a UnicodeString.
+ * It's possible not only to create an
+ * iterator that iterates over an entire UnicodeString, but also to
+ * create one that iterates over only a subrange of a UnicodeString
+ * (iterators over different subranges of the same UnicodeString don't
+ * compare equal).
+ * @see CharacterIterator
+ * @see ForwardCharacterIterator
+ * @stable ICU 2.0
+ */
+class U_COMMON_API StringCharacterIterator : public UCharCharacterIterator {
+public:
+ /**
+ * Create an iterator over the UnicodeString referred to by "textStr".
+ * The UnicodeString object is copied.
+ * The iteration range is the whole string, and the starting position is 0.
+ * @param textStr The unicode string used to create an iterator
+ * @stable ICU 2.0
+ */
+ StringCharacterIterator(const UnicodeString& textStr);
+
+ /**
+ * Create an iterator over the UnicodeString referred to by "textStr".
+ * The iteration range is the whole string, and the starting
+ * position is specified by "textPos". If "textPos" is outside the valid
+ * iteration range, the behavior of this object is undefined.
+ * @param textStr The unicode string used to create an iterator
+ * @param textPos The starting position of the iteration
+ * @stable ICU 2.0
+ */
+ StringCharacterIterator(const UnicodeString& textStr,
+ int32_t textPos);
+
+ /**
+ * Create an iterator over the UnicodeString referred to by "textStr".
+ * The UnicodeString object is copied.
+ * The iteration range begins with the code unit specified by
+ * "textBegin" and ends with the code unit BEFORE the code unit specified
+ * by "textEnd". The starting position is specified by "textPos". If
+ * "textBegin" and "textEnd" don't form a valid range on "text" (i.e.,
+ * textBegin >= textEnd or either is negative or greater than text.size()),
+ * or "textPos" is outside the range defined by "textBegin" and "textEnd",
+ * the behavior of this iterator is undefined.
+ * @param textStr The unicode string used to create the StringCharacterIterator
+ * @param textBegin The begin position of the iteration range
+ * @param textEnd The end position of the iteration range
+ * @param textPos The starting position of the iteration
+ * @stable ICU 2.0
+ */
+ StringCharacterIterator(const UnicodeString& textStr,
+ int32_t textBegin,
+ int32_t textEnd,
+ int32_t textPos);
+
+ /**
+ * Copy constructor. The new iterator iterates over the same range
+ * of the same string as "that", and its initial position is the
+ * same as "that"'s current position.
+ * The UnicodeString object in "that" is copied.
+ * @param that The StringCharacterIterator to be copied
+ * @stable ICU 2.0
+ */
+ StringCharacterIterator(const StringCharacterIterator& that);
+
+ /**
+ * Destructor.
+ * @stable ICU 2.0
+ */
+ virtual ~StringCharacterIterator();
+
+ /**
+ * Assignment operator. *this is altered to iterate over the same
+ * range of the same string as "that", and refers to the same
+ * character within that string as "that" does.
+ * @param that The object to be copied.
+ * @return the newly created object.
+ * @stable ICU 2.0
+ */
+ StringCharacterIterator&
+ operator=(const StringCharacterIterator& that);
+
+ /**
+ * Returns true if the iterators iterate over the same range of the
+ * same string and are pointing at the same character.
+ * @param that The ForwardCharacterIterator to be compared for equality
+ * @return true if the iterators iterate over the same range of the
+ * same string and are pointing at the same character.
+ * @stable ICU 2.0
+ */
+ virtual UBool operator==(const ForwardCharacterIterator& that) const;
+
+ /**
+ * Returns a new StringCharacterIterator referring to the same
+ * character in the same range of the same string as this one. The
+ * caller must delete the new iterator.
+ * @return the newly cloned object.
+ * @stable ICU 2.0
+ */
+ virtual StringCharacterIterator* clone() const;
+
+ /**
+ * Sets the iterator to iterate over the provided string.
+ * @param newText The string to be iterated over
+ * @stable ICU 2.0
+ */
+ void setText(const UnicodeString& newText);
+
+ /**
+ * Copies the UnicodeString under iteration into the UnicodeString
+ * referred to by "result". Even if this iterator iterates across
+ * only a part of this string, the whole string is copied.
+ * @param result Receives a copy of the text under iteration.
+ * @stable ICU 2.0
+ */
+ virtual void getText(UnicodeString& result);
+
+ /**
+ * Return a class ID for this object (not really public)
+ * @return a class ID for this object.
+ * @stable ICU 2.0
+ */
+ virtual UClassID getDynamicClassID(void) const;
+
+ /**
+ * Return a class ID for this class (not really public)
+ * @return a class ID for this class
+ * @stable ICU 2.0
+ */
+ static UClassID U_EXPORT2 getStaticClassID(void);
+
+protected:
+ /**
+ * Default constructor, iteration over empty string.
+ * @stable ICU 2.0
+ */
+ StringCharacterIterator();
+
+ /**
+ * Sets the iterator to iterate over the provided string.
+ * @param newText The string to be iterated over
+ * @param newTextLength The length of the String
+ * @stable ICU 2.0
+ */
+ void setText(const char16_t* newText, int32_t newTextLength);
+
+ /**
+ * Copy of the iterated string object.
+ * @stable ICU 2.0
+ */
+ UnicodeString text;
+
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/simpleformatter.h b/thirdparty/icu4c/common/unicode/simpleformatter.h
new file mode 100644
index 0000000000..6d9c04ace2
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/simpleformatter.h
@@ -0,0 +1,341 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 2014-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+******************************************************************************
+* simpleformatter.h
+*/
+
+#ifndef __SIMPLEFORMATTER_H__
+#define __SIMPLEFORMATTER_H__
+
+/**
+ * \file
+ * \brief C++ API: Simple formatter, minimal subset of MessageFormat.
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/unistr.h"
+
+U_NAMESPACE_BEGIN
+
+// Forward declaration:
+namespace number {
+namespace impl {
+class SimpleModifier;
+}
+}
+
+/**
+ * Formats simple patterns like "{1} was born in {0}".
+ * Minimal subset of MessageFormat; fast, simple, minimal dependencies.
+ * Supports only numbered arguments with no type nor style parameters,
+ * and formats only string values.
+ * Quoting via ASCII apostrophe compatible with ICU MessageFormat default behavior.
+ *
+ * Factory methods set error codes for syntax errors
+ * and for too few or too many arguments/placeholders.
+ *
+ * SimpleFormatter objects are thread-safe except for assignment and applying new patterns.
+ *
+ * Example:
+ * <pre>
+ * UErrorCode errorCode = U_ZERO_ERROR;
+ * SimpleFormatter fmt("{1} '{born}' in {0}", errorCode);
+ * UnicodeString result;
+ *
+ * // Output: "paul {born} in england"
+ * fmt.format("england", "paul", result, errorCode);
+ * </pre>
+ *
+ * This class is not intended for public subclassing.
+ *
+ * @see MessageFormat
+ * @see UMessagePatternApostropheMode
+ * @stable ICU 57
+ */
+class U_COMMON_API SimpleFormatter U_FINAL : public UMemory {
+public:
+ /**
+ * Default constructor.
+ * @stable ICU 57
+ */
+ SimpleFormatter() : compiledPattern((char16_t)0) {}
+
+ /**
+ * Constructs a formatter from the pattern string.
+ *
+ * @param pattern The pattern string.
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax.
+ * @stable ICU 57
+ */
+ SimpleFormatter(const UnicodeString& pattern, UErrorCode &errorCode) {
+ applyPattern(pattern, errorCode);
+ }
+
+ /**
+ * Constructs a formatter from the pattern string.
+ * The number of arguments checked against the given limits is the
+ * highest argument number plus one, not the number of occurrences of arguments.
+ *
+ * @param pattern The pattern string.
+ * @param min The pattern must have at least this many arguments.
+ * @param max The pattern must have at most this many arguments.
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax and
+ * too few or too many arguments.
+ * @stable ICU 57
+ */
+ SimpleFormatter(const UnicodeString& pattern, int32_t min, int32_t max,
+ UErrorCode &errorCode) {
+ applyPatternMinMaxArguments(pattern, min, max, errorCode);
+ }
+
+ /**
+ * Copy constructor.
+ * @stable ICU 57
+ */
+ SimpleFormatter(const SimpleFormatter& other)
+ : compiledPattern(other.compiledPattern) {}
+
+ /**
+ * Assignment operator.
+ * @stable ICU 57
+ */
+ SimpleFormatter &operator=(const SimpleFormatter& other);
+
+ /**
+ * Destructor.
+ * @stable ICU 57
+ */
+ ~SimpleFormatter();
+
+ /**
+ * Changes this object according to the new pattern.
+ *
+ * @param pattern The pattern string.
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax.
+ * @return true if U_SUCCESS(errorCode).
+ * @stable ICU 57
+ */
+ UBool applyPattern(const UnicodeString &pattern, UErrorCode &errorCode) {
+ return applyPatternMinMaxArguments(pattern, 0, INT32_MAX, errorCode);
+ }
+
+ /**
+ * Changes this object according to the new pattern.
+ * The number of arguments checked against the given limits is the
+ * highest argument number plus one, not the number of occurrences of arguments.
+ *
+ * @param pattern The pattern string.
+ * @param min The pattern must have at least this many arguments.
+ * @param max The pattern must have at most this many arguments.
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax and
+ * too few or too many arguments.
+ * @return true if U_SUCCESS(errorCode).
+ * @stable ICU 57
+ */
+ UBool applyPatternMinMaxArguments(const UnicodeString &pattern,
+ int32_t min, int32_t max, UErrorCode &errorCode);
+
+ /**
+ * @return The max argument number + 1.
+ * @stable ICU 57
+ */
+ int32_t getArgumentLimit() const {
+ return getArgumentLimit(compiledPattern.getBuffer(), compiledPattern.length());
+ }
+
+ /**
+ * Formats the given value, appending to the appendTo builder.
+ * The argument value must not be the same object as appendTo.
+ * getArgumentLimit() must be at most 1.
+ *
+ * @param value0 Value for argument {0}.
+ * @param appendTo Gets the formatted pattern and value appended.
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return appendTo
+ * @stable ICU 57
+ */
+ UnicodeString &format(
+ const UnicodeString &value0,
+ UnicodeString &appendTo, UErrorCode &errorCode) const;
+
+ /**
+ * Formats the given values, appending to the appendTo builder.
+ * An argument value must not be the same object as appendTo.
+ * getArgumentLimit() must be at most 2.
+ *
+ * @param value0 Value for argument {0}.
+ * @param value1 Value for argument {1}.
+ * @param appendTo Gets the formatted pattern and values appended.
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return appendTo
+ * @stable ICU 57
+ */
+ UnicodeString &format(
+ const UnicodeString &value0,
+ const UnicodeString &value1,
+ UnicodeString &appendTo, UErrorCode &errorCode) const;
+
+ /**
+ * Formats the given values, appending to the appendTo builder.
+ * An argument value must not be the same object as appendTo.
+ * getArgumentLimit() must be at most 3.
+ *
+ * @param value0 Value for argument {0}.
+ * @param value1 Value for argument {1}.
+ * @param value2 Value for argument {2}.
+ * @param appendTo Gets the formatted pattern and values appended.
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return appendTo
+ * @stable ICU 57
+ */
+ UnicodeString &format(
+ const UnicodeString &value0,
+ const UnicodeString &value1,
+ const UnicodeString &value2,
+ UnicodeString &appendTo, UErrorCode &errorCode) const;
+
+ /**
+ * Formats the given values, appending to the appendTo string.
+ *
+ * @param values The argument values.
+ * An argument value must not be the same object as appendTo.
+ * Can be NULL if valuesLength==getArgumentLimit()==0.
+ * @param valuesLength The length of the values array.
+ * Must be at least getArgumentLimit().
+ * @param appendTo Gets the formatted pattern and values appended.
+ * @param offsets offsets[i] receives the offset of where
+ * values[i] replaced pattern argument {i}.
+ * Can be shorter or longer than values. Can be NULL if offsetsLength==0.
+ * If there is no {i} in the pattern, then offsets[i] is set to -1.
+ * @param offsetsLength The length of the offsets array.
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return appendTo
+ * @stable ICU 57
+ */
+ UnicodeString &formatAndAppend(
+ const UnicodeString *const *values, int32_t valuesLength,
+ UnicodeString &appendTo,
+ int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const;
+
+ /**
+ * Formats the given values, replacing the contents of the result string.
+ * May optimize by actually appending to the result if it is the same object
+ * as the value corresponding to the initial argument in the pattern.
+ *
+ * @param values The argument values.
+ * An argument value may be the same object as result.
+ * Can be NULL if valuesLength==getArgumentLimit()==0.
+ * @param valuesLength The length of the values array.
+ * Must be at least getArgumentLimit().
+ * @param result Gets its contents replaced by the formatted pattern and values.
+ * @param offsets offsets[i] receives the offset of where
+ * values[i] replaced pattern argument {i}.
+ * Can be shorter or longer than values. Can be NULL if offsetsLength==0.
+ * If there is no {i} in the pattern, then offsets[i] is set to -1.
+ * @param offsetsLength The length of the offsets array.
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return result
+ * @stable ICU 57
+ */
+ UnicodeString &formatAndReplace(
+ const UnicodeString *const *values, int32_t valuesLength,
+ UnicodeString &result,
+ int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const;
+
+ /**
+ * Returns the pattern text with none of the arguments.
+ * Like formatting with all-empty string values.
+ * @stable ICU 57
+ */
+ UnicodeString getTextWithNoArguments() const {
+ return getTextWithNoArguments(
+ compiledPattern.getBuffer(),
+ compiledPattern.length(),
+ nullptr,
+ 0);
+ }
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * Returns the pattern text with none of the arguments.
+ * Like formatting with all-empty string values.
+ *
+ * TODO(ICU-20406): Replace this with an Iterator interface.
+ *
+ * @param offsets offsets[i] receives the offset of where {i} was located
+ * before it was replaced by an empty string.
+ * For example, "a{0}b{1}" produces offset 1 for i=0 and 2 for i=1.
+ * Can be nullptr if offsetsLength==0.
+ * If there is no {i} in the pattern, then offsets[i] is set to -1.
+ * @param offsetsLength The length of the offsets array.
+ *
+ * @internal
+ */
+ UnicodeString getTextWithNoArguments(int32_t *offsets, int32_t offsetsLength) const {
+ return getTextWithNoArguments(
+ compiledPattern.getBuffer(),
+ compiledPattern.length(),
+ offsets,
+ offsetsLength);
+ }
+#endif // U_HIDE_INTERNAL_API
+
+private:
+ /**
+ * Binary representation of the compiled pattern.
+ * Index 0: One more than the highest argument number.
+ * Followed by zero or more arguments or literal-text segments.
+ *
+ * An argument is stored as its number, less than ARG_NUM_LIMIT.
+ * A literal-text segment is stored as its length (at least 1) offset by ARG_NUM_LIMIT,
+ * followed by that many chars.
+ */
+ UnicodeString compiledPattern;
+
+ static inline int32_t getArgumentLimit(const char16_t *compiledPattern,
+ int32_t compiledPatternLength) {
+ return compiledPatternLength == 0 ? 0 : compiledPattern[0];
+ }
+
+ static UnicodeString getTextWithNoArguments(
+ const char16_t *compiledPattern,
+ int32_t compiledPatternLength,
+ int32_t *offsets,
+ int32_t offsetsLength);
+
+ static UnicodeString &format(
+ const char16_t *compiledPattern, int32_t compiledPatternLength,
+ const UnicodeString *const *values,
+ UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
+ int32_t *offsets, int32_t offsetsLength,
+ UErrorCode &errorCode);
+
+ // Give access to internals to SimpleModifier for number formatting
+ friend class number::impl::SimpleModifier;
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __SIMPLEFORMATTER_H__
diff --git a/thirdparty/icu4c/common/unicode/std_string.h b/thirdparty/icu4c/common/unicode/std_string.h
new file mode 100644
index 0000000000..bf87230167
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/std_string.h
@@ -0,0 +1,41 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2009-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: std_string.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2009feb19
+* created by: Markus W. Scherer
+*/
+
+#ifndef __STD_STRING_H__
+#define __STD_STRING_H__
+
+/**
+ * \file
+ * \brief C++ API: Central ICU header for including the C++ standard &lt;string&gt;
+ * header and for related definitions.
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+// Workaround for a libstdc++ bug before libstdc++4.6 (2011).
+// https://bugs.llvm.org/show_bug.cgi?id=13364
+#if defined(__GLIBCXX__)
+namespace std { class type_info; }
+#endif
+#include <string>
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __STD_STRING_H__
diff --git a/thirdparty/icu4c/common/unicode/strenum.h b/thirdparty/icu4c/common/unicode/strenum.h
new file mode 100644
index 0000000000..df72b4b7e8
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/strenum.h
@@ -0,0 +1,281 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*/
+
+#ifndef STRENUM_H
+#define STRENUM_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+
+/**
+ * \file
+ * \brief C++ API: String Enumeration
+ */
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Base class for 'pure' C++ implementations of uenum api. Adds a
+ * method that returns the next UnicodeString since in C++ this can
+ * be a common storage format for strings.
+ *
+ * <p>The model is that the enumeration is over strings maintained by
+ * a 'service.' At any point, the service might change, invalidating
+ * the enumerator (though this is expected to be rare). The iterator
+ * returns an error if this has occurred. Lack of the error is no
+ * guarantee that the service didn't change immediately after the
+ * call, so the returned string still might not be 'valid' on
+ * subsequent use.</p>
+ *
+ * <p>Strings may take the form of const char*, const char16_t*, or const
+ * UnicodeString*. The type you get is determine by the variant of
+ * 'next' that you call. In general the StringEnumeration is
+ * optimized for one of these types, but all StringEnumerations can
+ * return all types. Returned strings are each terminated with a NUL.
+ * Depending on the service data, they might also include embedded NUL
+ * characters, so API is provided to optionally return the true
+ * length, counting the embedded NULs but not counting the terminating
+ * NUL.</p>
+ *
+ * <p>The pointers returned by next, unext, and snext become invalid
+ * upon any subsequent call to the enumeration's destructor, next,
+ * unext, snext, or reset.</p>
+ *
+ * ICU 2.8 adds some default implementations and helper functions
+ * for subclasses.
+ *
+ * @stable ICU 2.4
+ */
+class U_COMMON_API StringEnumeration : public UObject {
+public:
+ /**
+ * Destructor.
+ * @stable ICU 2.4
+ */
+ virtual ~StringEnumeration();
+
+ /**
+ * Clone this object, an instance of a subclass of StringEnumeration.
+ * Clones can be used concurrently in multiple threads.
+ * If a subclass does not implement clone(), or if an error occurs,
+ * then NULL is returned.
+ * The caller must delete the clone.
+ *
+ * @return a clone of this object
+ *
+ * @see getDynamicClassID
+ * @stable ICU 2.8
+ */
+ virtual StringEnumeration *clone() const;
+
+ /**
+ * <p>Return the number of elements that the iterator traverses. If
+ * the iterator is out of sync with its service, status is set to
+ * U_ENUM_OUT_OF_SYNC_ERROR, and the return value is zero.</p>
+ *
+ * <p>The return value will not change except possibly as a result of
+ * a subsequent call to reset, or if the iterator becomes out of sync.</p>
+ *
+ * <p>This is a convenience function. It can end up being very
+ * expensive as all the items might have to be pre-fetched
+ * (depending on the storage format of the data being
+ * traversed).</p>
+ *
+ * @param status the error code.
+ * @return number of elements in the iterator.
+ *
+ * @stable ICU 2.4 */
+ virtual int32_t count(UErrorCode& status) const = 0;
+
+ /**
+ * <p>Returns the next element as a NUL-terminated char*. If there
+ * are no more elements, returns NULL. If the resultLength pointer
+ * is not NULL, the length of the string (not counting the
+ * terminating NUL) is returned at that address. If an error
+ * status is returned, the value at resultLength is undefined.</p>
+ *
+ * <p>The returned pointer is owned by this iterator and must not be
+ * deleted by the caller. The pointer is valid until the next call
+ * to next, unext, snext, reset, or the enumerator's destructor.</p>
+ *
+ * <p>If the iterator is out of sync with its service, status is set
+ * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
+ *
+ * <p>If the native service string is a char16_t* string, it is
+ * converted to char* with the invariant converter. If the
+ * conversion fails (because a character cannot be converted) then
+ * status is set to U_INVARIANT_CONVERSION_ERROR and the return
+ * value is undefined (though not NULL).</p>
+ *
+ * Starting with ICU 2.8, the default implementation calls snext()
+ * and handles the conversion.
+ * Either next() or snext() must be implemented differently by a subclass.
+ *
+ * @param status the error code.
+ * @param resultLength a pointer to receive the length, can be NULL.
+ * @return a pointer to the string, or NULL.
+ *
+ * @stable ICU 2.4
+ */
+ virtual const char* next(int32_t *resultLength, UErrorCode& status);
+
+ /**
+ * <p>Returns the next element as a NUL-terminated char16_t*. If there
+ * are no more elements, returns NULL. If the resultLength pointer
+ * is not NULL, the length of the string (not counting the
+ * terminating NUL) is returned at that address. If an error
+ * status is returned, the value at resultLength is undefined.</p>
+ *
+ * <p>The returned pointer is owned by this iterator and must not be
+ * deleted by the caller. The pointer is valid until the next call
+ * to next, unext, snext, reset, or the enumerator's destructor.</p>
+ *
+ * <p>If the iterator is out of sync with its service, status is set
+ * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
+ *
+ * Starting with ICU 2.8, the default implementation calls snext()
+ * and handles the conversion.
+ *
+ * @param status the error code.
+ * @param resultLength a ponter to receive the length, can be NULL.
+ * @return a pointer to the string, or NULL.
+ *
+ * @stable ICU 2.4
+ */
+ virtual const char16_t* unext(int32_t *resultLength, UErrorCode& status);
+
+ /**
+ * <p>Returns the next element a UnicodeString*. If there are no
+ * more elements, returns NULL.</p>
+ *
+ * <p>The returned pointer is owned by this iterator and must not be
+ * deleted by the caller. The pointer is valid until the next call
+ * to next, unext, snext, reset, or the enumerator's destructor.</p>
+ *
+ * <p>If the iterator is out of sync with its service, status is set
+ * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
+ *
+ * Starting with ICU 2.8, the default implementation calls next()
+ * and handles the conversion.
+ * Either next() or snext() must be implemented differently by a subclass.
+ *
+ * @param status the error code.
+ * @return a pointer to the string, or NULL.
+ *
+ * @stable ICU 2.4
+ */
+ virtual const UnicodeString* snext(UErrorCode& status);
+
+ /**
+ * <p>Resets the iterator. This re-establishes sync with the
+ * service and rewinds the iterator to start at the first
+ * element.</p>
+ *
+ * <p>Previous pointers returned by next, unext, or snext become
+ * invalid, and the value returned by count might change.</p>
+ *
+ * @param status the error code.
+ *
+ * @stable ICU 2.4
+ */
+ virtual void reset(UErrorCode& status) = 0;
+
+ /**
+ * Compares this enumeration to other to check if both are equal
+ *
+ * @param that The other string enumeration to compare this object to
+ * @return true if the enumerations are equal. false if not.
+ * @stable ICU 3.6
+ */
+ virtual UBool operator==(const StringEnumeration& that)const;
+ /**
+ * Compares this enumeration to other to check if both are not equal
+ *
+ * @param that The other string enumeration to compare this object to
+ * @return true if the enumerations are equal. false if not.
+ * @stable ICU 3.6
+ */
+ virtual UBool operator!=(const StringEnumeration& that)const;
+
+protected:
+ /**
+ * UnicodeString field for use with default implementations and subclasses.
+ * @stable ICU 2.8
+ */
+ UnicodeString unistr;
+ /**
+ * char * default buffer for use with default implementations and subclasses.
+ * @stable ICU 2.8
+ */
+ char charsBuffer[32];
+ /**
+ * char * buffer for use with default implementations and subclasses.
+ * Allocated in constructor and in ensureCharsCapacity().
+ * @stable ICU 2.8
+ */
+ char *chars;
+ /**
+ * Capacity of chars, for use with default implementations and subclasses.
+ * @stable ICU 2.8
+ */
+ int32_t charsCapacity;
+
+ /**
+ * Default constructor for use with default implementations and subclasses.
+ * @stable ICU 2.8
+ */
+ StringEnumeration();
+
+ /**
+ * Ensures that chars is at least as large as the requested capacity.
+ * For use with default implementations and subclasses.
+ *
+ * @param capacity Requested capacity.
+ * @param status ICU in/out error code.
+ * @stable ICU 2.8
+ */
+ void ensureCharsCapacity(int32_t capacity, UErrorCode &status);
+
+ /**
+ * Converts s to Unicode and sets unistr to the result.
+ * For use with default implementations and subclasses,
+ * especially for implementations of snext() in terms of next().
+ * This is provided with a helper function instead of a default implementation
+ * of snext() to avoid potential infinite loops between next() and snext().
+ *
+ * For example:
+ * \code
+ * const UnicodeString* snext(UErrorCode& status) {
+ * int32_t resultLength=0;
+ * const char *s=next(&resultLength, status);
+ * return setChars(s, resultLength, status);
+ * }
+ * \endcode
+ *
+ * @param s String to be converted to Unicode.
+ * @param length Length of the string.
+ * @param status ICU in/out error code.
+ * @return A pointer to unistr.
+ * @stable ICU 2.8
+ */
+ UnicodeString *setChars(const char *s, int32_t length, UErrorCode &status);
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+/* STRENUM_H */
+#endif
diff --git a/thirdparty/icu4c/common/unicode/stringoptions.h b/thirdparty/icu4c/common/unicode/stringoptions.h
new file mode 100644
index 0000000000..7b9f70944f
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/stringoptions.h
@@ -0,0 +1,190 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// stringoptions.h
+// created: 2017jun08 Markus W. Scherer
+
+#ifndef __STRINGOPTIONS_H__
+#define __STRINGOPTIONS_H__
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C API: Bit set option bit constants for various string and character processing functions.
+ */
+
+/**
+ * Option value for case folding: Use default mappings defined in CaseFolding.txt.
+ *
+ * @stable ICU 2.0
+ */
+#define U_FOLD_CASE_DEFAULT 0
+
+/**
+ * Option value for case folding:
+ *
+ * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
+ * and dotless i appropriately for Turkic languages (tr, az).
+ *
+ * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
+ * are to be included for default mappings and
+ * excluded for the Turkic-specific mappings.
+ *
+ * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
+ * are to be excluded for default mappings and
+ * included for the Turkic-specific mappings.
+ *
+ * @stable ICU 2.0
+ */
+#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
+
+/**
+ * Titlecase the string as a whole rather than each word.
+ * (Titlecase only the character at index 0, possibly adjusted.)
+ * Option bits value for titlecasing APIs that take an options bit set.
+ *
+ * It is an error to specify multiple titlecasing iterator options together,
+ * including both an options bit and an explicit BreakIterator.
+ *
+ * @see U_TITLECASE_ADJUST_TO_CASED
+ * @stable ICU 60
+ */
+#define U_TITLECASE_WHOLE_STRING 0x20
+
+/**
+ * Titlecase sentences rather than words.
+ * (Titlecase only the first character of each sentence, possibly adjusted.)
+ * Option bits value for titlecasing APIs that take an options bit set.
+ *
+ * It is an error to specify multiple titlecasing iterator options together,
+ * including both an options bit and an explicit BreakIterator.
+ *
+ * @see U_TITLECASE_ADJUST_TO_CASED
+ * @stable ICU 60
+ */
+#define U_TITLECASE_SENTENCES 0x40
+
+/**
+ * Do not lowercase non-initial parts of words when titlecasing.
+ * Option bit for titlecasing APIs that take an options bit set.
+ *
+ * By default, titlecasing will titlecase the character at each
+ * (possibly adjusted) BreakIterator index and
+ * lowercase all other characters up to the next iterator index.
+ * With this option, the other characters will not be modified.
+ *
+ * @see U_TITLECASE_ADJUST_TO_CASED
+ * @see UnicodeString::toTitle
+ * @see CaseMap::toTitle
+ * @see ucasemap_setOptions
+ * @see ucasemap_toTitle
+ * @see ucasemap_utf8ToTitle
+ * @stable ICU 3.8
+ */
+#define U_TITLECASE_NO_LOWERCASE 0x100
+
+/**
+ * Do not adjust the titlecasing BreakIterator indexes;
+ * titlecase exactly the characters at breaks from the iterator.
+ * Option bit for titlecasing APIs that take an options bit set.
+ *
+ * By default, titlecasing will take each break iterator index,
+ * adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED),
+ * and titlecase that one.
+ *
+ * Other characters are lowercased.
+ *
+ * It is an error to specify multiple titlecasing adjustment options together.
+ *
+ * @see U_TITLECASE_ADJUST_TO_CASED
+ * @see U_TITLECASE_NO_LOWERCASE
+ * @see UnicodeString::toTitle
+ * @see CaseMap::toTitle
+ * @see ucasemap_setOptions
+ * @see ucasemap_toTitle
+ * @see ucasemap_utf8ToTitle
+ * @stable ICU 3.8
+ */
+#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
+
+/**
+ * Adjust each titlecasing BreakIterator index to the next cased character.
+ * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).)
+ * Option bit for titlecasing APIs that take an options bit set.
+ *
+ * This used to be the default index adjustment in ICU.
+ * Since ICU 60, the default index adjustment is to the next character that is
+ * a letter, number, symbol, or private use code point.
+ * (Uncased modifier letters are skipped.)
+ * The difference in behavior is small for word titlecasing,
+ * but the new adjustment is much better for whole-string and sentence titlecasing:
+ * It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»".
+ *
+ * It is an error to specify multiple titlecasing adjustment options together.
+ *
+ * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
+ * @stable ICU 60
+ */
+#define U_TITLECASE_ADJUST_TO_CASED 0x400
+
+/**
+ * Option for string transformation functions to not first reset the Edits object.
+ * Used for example in some case-mapping and normalization functions.
+ *
+ * @see CaseMap
+ * @see Edits
+ * @see Normalizer2
+ * @stable ICU 60
+ */
+#define U_EDITS_NO_RESET 0x2000
+
+/**
+ * Omit unchanged text when recording how source substrings
+ * relate to changed and unchanged result substrings.
+ * Used for example in some case-mapping and normalization functions.
+ *
+ * @see CaseMap
+ * @see Edits
+ * @see Normalizer2
+ * @stable ICU 60
+ */
+#define U_OMIT_UNCHANGED_TEXT 0x4000
+
+/**
+ * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
+ * Compare strings in code point order instead of code unit order.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_CODE_POINT_ORDER 0x8000
+
+/**
+ * Option bit for unorm_compare:
+ * Perform case-insensitive comparison.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_IGNORE_CASE 0x10000
+
+/**
+ * Option bit for unorm_compare:
+ * Both input strings are assumed to fulfill FCD conditions.
+ * @stable ICU 2.2
+ */
+#define UNORM_INPUT_IS_FCD 0x20000
+
+// Related definitions elsewhere.
+// Options that are not meaningful in the same functions
+// can share the same bits.
+//
+// Public:
+// unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
+//
+// Internal: (may change or be removed)
+// ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff
+// ucase.h #define _FOLD_CASE_OPTIONS_MASK 7
+// ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0
+// ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600
+// ustr_imp.h #define _STRNCMP_STYLE 0x1000
+// unormcmp.cpp #define _COMPARE_EQUIV 0x80000
+
+#endif // __STRINGOPTIONS_H__
diff --git a/thirdparty/icu4c/common/unicode/stringpiece.h b/thirdparty/icu4c/common/unicode/stringpiece.h
new file mode 100644
index 0000000000..7d7d871e1f
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/stringpiece.h
@@ -0,0 +1,353 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+// Copyright (C) 2009-2013, International Business Machines
+// Corporation and others. All Rights Reserved.
+//
+// Copyright 2001 and onwards Google Inc.
+// Author: Sanjay Ghemawat
+
+// This code is a contribution of Google code, and the style used here is
+// a compromise between the original Google code and the ICU coding guidelines.
+// For example, data types are ICU-ified (size_t,int->int32_t),
+// and API comments doxygen-ified, but function names and behavior are
+// as in the original, if possible.
+// Assertion-style error handling, not available in ICU, was changed to
+// parameter "pinning" similar to UnicodeString.
+//
+// In addition, this is only a partial port of the original Google code,
+// limited to what was needed so far. The (nearly) complete original code
+// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib
+// (see ICU ticket 6765, r25517).
+
+#ifndef __STRINGPIECE_H__
+#define __STRINGPIECE_H__
+
+/**
+ * \file
+ * \brief C++ API: StringPiece: Read-only byte string wrapper class.
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include <cstddef>
+#include <type_traits>
+
+#include "unicode/uobject.h"
+#include "unicode/std_string.h"
+
+// Arghh! I wish C++ literals were "string".
+
+U_NAMESPACE_BEGIN
+
+/**
+ * A string-like object that points to a sized piece of memory.
+ *
+ * We provide non-explicit singleton constructors so users can pass
+ * in a "const char*" or a "string" wherever a "StringPiece" is
+ * expected.
+ *
+ * Functions or methods may use StringPiece parameters to accept either a
+ * "const char*" or a "string" value that will be implicitly converted to a
+ * StringPiece.
+ *
+ * Systematic usage of StringPiece is encouraged as it will reduce unnecessary
+ * conversions from "const char*" to "string" and back again.
+ *
+ * @stable ICU 4.2
+ */
+class U_COMMON_API StringPiece : public UMemory {
+ private:
+ const char* ptr_;
+ int32_t length_;
+
+ public:
+ /**
+ * Default constructor, creates an empty StringPiece.
+ * @stable ICU 4.2
+ */
+ StringPiece() : ptr_(nullptr), length_(0) { }
+
+ /**
+ * Constructs from a NUL-terminated const char * pointer.
+ * @param str a NUL-terminated const char * pointer
+ * @stable ICU 4.2
+ */
+ StringPiece(const char* str);
+#ifndef U_HIDE_DRAFT_API
+#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
+ /**
+ * Constructs from a NUL-terminated const char8_t * pointer.
+ * @param str a NUL-terminated const char8_t * pointer
+ * @draft ICU 67
+ */
+ StringPiece(const char8_t* str) : StringPiece(reinterpret_cast<const char*>(str)) {}
+#endif
+ /**
+ * Constructs an empty StringPiece.
+ * Needed for type disambiguation from multiple other overloads.
+ * @param p nullptr
+ * @draft ICU 67
+ */
+ StringPiece(std::nullptr_t p) : ptr_(p), length_(0) {}
+#endif // U_HIDE_DRAFT_API
+
+ /**
+ * Constructs from a std::string.
+ * @stable ICU 4.2
+ */
+ StringPiece(const std::string& str)
+ : ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { }
+#ifndef U_HIDE_DRAFT_API
+#if defined(__cpp_lib_char8_t) || defined(U_IN_DOXYGEN)
+ /**
+ * Constructs from a std::u8string.
+ * @draft ICU 67
+ */
+ StringPiece(const std::u8string& str)
+ : ptr_(reinterpret_cast<const char*>(str.data())),
+ length_(static_cast<int32_t>(str.size())) { }
+#endif
+#endif // U_HIDE_DRAFT_API
+
+ /**
+ * Constructs from some other implementation of a string piece class, from any
+ * C++ record type that has these two methods:
+ *
+ * \code{.cpp}
+ *
+ * struct OtherStringPieceClass {
+ * const char* data(); // or const char8_t*
+ * size_t size();
+ * };
+ *
+ * \endcode
+ *
+ * The other string piece class will typically be std::string_view from C++17
+ * or absl::string_view from Abseil.
+ *
+ * Starting with C++20, data() may also return a const char8_t* pointer,
+ * as from std::u8string_view.
+ *
+ * @param str the other string piece
+ * @stable ICU 65
+ */
+ template <typename T,
+ typename = typename std::enable_if<
+ (std::is_same<decltype(T().data()), const char*>::value
+#if defined(__cpp_char8_t)
+ || std::is_same<decltype(T().data()), const char8_t*>::value
+#endif
+ ) &&
+ std::is_same<decltype(T().size()), size_t>::value>::type>
+ StringPiece(T str)
+ : ptr_(reinterpret_cast<const char*>(str.data())),
+ length_(static_cast<int32_t>(str.size())) {}
+
+ /**
+ * Constructs from a const char * pointer and a specified length.
+ * @param offset a const char * pointer (need not be terminated)
+ * @param len the length of the string; must be non-negative
+ * @stable ICU 4.2
+ */
+ StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { }
+#ifndef U_HIDE_DRAFT_API
+#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
+ /**
+ * Constructs from a const char8_t * pointer and a specified length.
+ * @param str a const char8_t * pointer (need not be terminated)
+ * @param len the length of the string; must be non-negative
+ * @draft ICU 67
+ */
+ StringPiece(const char8_t* str, int32_t len) :
+ StringPiece(reinterpret_cast<const char*>(str), len) {}
+#endif
+#endif // U_HIDE_DRAFT_API
+
+ /**
+ * Substring of another StringPiece.
+ * @param x the other StringPiece
+ * @param pos start position in x; must be non-negative and <= x.length().
+ * @stable ICU 4.2
+ */
+ StringPiece(const StringPiece& x, int32_t pos);
+ /**
+ * Substring of another StringPiece.
+ * @param x the other StringPiece
+ * @param pos start position in x; must be non-negative and <= x.length().
+ * @param len length of the substring;
+ * must be non-negative and will be pinned to at most x.length() - pos.
+ * @stable ICU 4.2
+ */
+ StringPiece(const StringPiece& x, int32_t pos, int32_t len);
+
+ /**
+ * Returns the string pointer. May be nullptr if it is empty.
+ *
+ * data() may return a pointer to a buffer with embedded NULs, and the
+ * returned buffer may or may not be null terminated. Therefore it is
+ * typically a mistake to pass data() to a routine that expects a NUL
+ * terminated string.
+ * @return the string pointer
+ * @stable ICU 4.2
+ */
+ const char* data() const { return ptr_; }
+ /**
+ * Returns the string length. Same as length().
+ * @return the string length
+ * @stable ICU 4.2
+ */
+ int32_t size() const { return length_; }
+ /**
+ * Returns the string length. Same as size().
+ * @return the string length
+ * @stable ICU 4.2
+ */
+ int32_t length() const { return length_; }
+ /**
+ * Returns whether the string is empty.
+ * @return true if the string is empty
+ * @stable ICU 4.2
+ */
+ UBool empty() const { return length_ == 0; }
+
+ /**
+ * Sets to an empty string.
+ * @stable ICU 4.2
+ */
+ void clear() { ptr_ = nullptr; length_ = 0; }
+
+ /**
+ * Reset the stringpiece to refer to new data.
+ * @param xdata pointer the new string data. Need not be nul terminated.
+ * @param len the length of the new data
+ * @stable ICU 4.8
+ */
+ void set(const char* xdata, int32_t len) { ptr_ = xdata; length_ = len; }
+
+ /**
+ * Reset the stringpiece to refer to new data.
+ * @param str a pointer to a NUL-terminated string.
+ * @stable ICU 4.8
+ */
+ void set(const char* str);
+
+#ifndef U_HIDE_DRAFT_API
+#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
+ /**
+ * Resets the stringpiece to refer to new data.
+ * @param xdata pointer the new string data. Need not be NUL-terminated.
+ * @param len the length of the new data
+ * @draft ICU 67
+ */
+ inline void set(const char8_t* xdata, int32_t len) {
+ set(reinterpret_cast<const char*>(xdata), len);
+ }
+
+ /**
+ * Resets the stringpiece to refer to new data.
+ * @param str a pointer to a NUL-terminated string.
+ * @draft ICU 67
+ */
+ inline void set(const char8_t* str) {
+ set(reinterpret_cast<const char*>(str));
+ }
+#endif
+#endif // U_HIDE_DRAFT_API
+
+ /**
+ * Removes the first n string units.
+ * @param n prefix length, must be non-negative and <=length()
+ * @stable ICU 4.2
+ */
+ void remove_prefix(int32_t n) {
+ if (n >= 0) {
+ if (n > length_) {
+ n = length_;
+ }
+ ptr_ += n;
+ length_ -= n;
+ }
+ }
+
+ /**
+ * Removes the last n string units.
+ * @param n suffix length, must be non-negative and <=length()
+ * @stable ICU 4.2
+ */
+ void remove_suffix(int32_t n) {
+ if (n >= 0) {
+ if (n <= length_) {
+ length_ -= n;
+ } else {
+ length_ = 0;
+ }
+ }
+ }
+
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * Searches the StringPiece for the given search string (needle);
+ * @param needle The string for which to search.
+ * @param offset Where to start searching within this string (haystack).
+ * @return The offset of needle in haystack, or -1 if not found.
+ * @draft ICU 67
+ */
+ int32_t find(StringPiece needle, int32_t offset);
+
+ /**
+ * Compares this StringPiece with the other StringPiece, with semantics
+ * similar to std::string::compare().
+ * @param other The string to compare to.
+ * @return below zero if this < other; above zero if this > other; 0 if this == other.
+ * @draft ICU 67
+ */
+ int32_t compare(StringPiece other);
+#endif // U_HIDE_DRAFT_API
+
+ /**
+ * Maximum integer, used as a default value for substring methods.
+ * @stable ICU 4.2
+ */
+ static const int32_t npos; // = 0x7fffffff;
+
+ /**
+ * Returns a substring of this StringPiece.
+ * @param pos start position; must be non-negative and <= length().
+ * @param len length of the substring;
+ * must be non-negative and will be pinned to at most length() - pos.
+ * @return the substring StringPiece
+ * @stable ICU 4.2
+ */
+ StringPiece substr(int32_t pos, int32_t len = npos) const {
+ return StringPiece(*this, pos, len);
+ }
+};
+
+/**
+ * Global operator == for StringPiece
+ * @param x The first StringPiece to compare.
+ * @param y The second StringPiece to compare.
+ * @return true if the string data is equal
+ * @stable ICU 4.8
+ */
+U_EXPORT UBool U_EXPORT2
+operator==(const StringPiece& x, const StringPiece& y);
+
+/**
+ * Global operator != for StringPiece
+ * @param x The first StringPiece to compare.
+ * @param y The second StringPiece to compare.
+ * @return true if the string data is not equal
+ * @stable ICU 4.8
+ */
+inline UBool operator!=(const StringPiece& x, const StringPiece& y) {
+ return !(x == y);
+}
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __STRINGPIECE_H__
diff --git a/thirdparty/icu4c/common/unicode/stringtriebuilder.h b/thirdparty/icu4c/common/unicode/stringtriebuilder.h
new file mode 100644
index 0000000000..fe471bbbf9
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/stringtriebuilder.h
@@ -0,0 +1,426 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2012,2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: stringtriebuilder.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010dec24
+* created by: Markus W. Scherer
+*/
+
+#ifndef __STRINGTRIEBUILDER_H__
+#define __STRINGTRIEBUILDER_H__
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+
+/**
+ * \file
+ * \brief C++ API: Builder API for trie builders
+ */
+
+// Forward declaration.
+/// \cond
+struct UHashtable;
+typedef struct UHashtable UHashtable;
+/// \endcond
+
+/**
+ * Build options for BytesTrieBuilder and CharsTrieBuilder.
+ * @stable ICU 4.8
+ */
+enum UStringTrieBuildOption {
+ /**
+ * Builds a trie quickly.
+ * @stable ICU 4.8
+ */
+ USTRINGTRIE_BUILD_FAST,
+ /**
+ * Builds a trie more slowly, attempting to generate
+ * a shorter but equivalent serialization.
+ * This build option also uses more memory.
+ *
+ * This option can be effective when many integer values are the same
+ * and string/byte sequence suffixes can be shared.
+ * Runtime speed is not expected to improve.
+ * @stable ICU 4.8
+ */
+ USTRINGTRIE_BUILD_SMALL
+};
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Base class for string trie builder classes.
+ *
+ * This class is not intended for public subclassing.
+ * @stable ICU 4.8
+ */
+class U_COMMON_API StringTrieBuilder : public UObject {
+public:
+#ifndef U_HIDE_INTERNAL_API
+ /** @internal */
+ static int32_t hashNode(const void *node);
+ /** @internal */
+ static UBool equalNodes(const void *left, const void *right);
+#endif /* U_HIDE_INTERNAL_API */
+
+protected:
+ // Do not enclose the protected default constructor with #ifndef U_HIDE_INTERNAL_API
+ // or else the compiler will create a public default constructor.
+ /** @internal */
+ StringTrieBuilder();
+ /** @internal */
+ virtual ~StringTrieBuilder();
+
+#ifndef U_HIDE_INTERNAL_API
+ /** @internal */
+ void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode);
+ /** @internal */
+ void deleteCompactBuilder();
+
+ /** @internal */
+ void build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode);
+
+ /** @internal */
+ int32_t writeNode(int32_t start, int32_t limit, int32_t unitIndex);
+ /** @internal */
+ int32_t writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length);
+#endif /* U_HIDE_INTERNAL_API */
+
+ class Node;
+
+#ifndef U_HIDE_INTERNAL_API
+ /** @internal */
+ Node *makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode);
+ /** @internal */
+ Node *makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
+ int32_t length, UErrorCode &errorCode);
+#endif /* U_HIDE_INTERNAL_API */
+
+ /** @internal */
+ virtual int32_t getElementStringLength(int32_t i) const = 0;
+ /** @internal */
+ virtual char16_t getElementUnit(int32_t i, int32_t unitIndex) const = 0;
+ /** @internal */
+ virtual int32_t getElementValue(int32_t i) const = 0;
+
+ // Finds the first unit index after this one where
+ // the first and last element have different units again.
+ /** @internal */
+ virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const = 0;
+
+ // Number of different units at unitIndex.
+ /** @internal */
+ virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const = 0;
+ /** @internal */
+ virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0;
+ /** @internal */
+ virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, char16_t unit) const = 0;
+
+ /** @internal */
+ virtual UBool matchNodesCanHaveValues() const = 0;
+
+ /** @internal */
+ virtual int32_t getMaxBranchLinearSubNodeLength() const = 0;
+ /** @internal */
+ virtual int32_t getMinLinearMatch() const = 0;
+ /** @internal */
+ virtual int32_t getMaxLinearMatchLength() const = 0;
+
+#ifndef U_HIDE_INTERNAL_API
+ // max(BytesTrie::kMaxBranchLinearSubNodeLength, UCharsTrie::kMaxBranchLinearSubNodeLength).
+ /** @internal */
+ static const int32_t kMaxBranchLinearSubNodeLength=5;
+
+ // Maximum number of nested split-branch levels for a branch on all 2^16 possible char16_t units.
+ // log2(2^16/kMaxBranchLinearSubNodeLength) rounded up.
+ /** @internal */
+ static const int32_t kMaxSplitBranchLevels=14;
+
+ /**
+ * Makes sure that there is only one unique node registered that is
+ * equivalent to newNode.
+ * @param newNode Input node. The builder takes ownership.
+ * @param errorCode ICU in/out UErrorCode.
+ Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL.
+ * @return newNode if it is the first of its kind, or
+ * an equivalent node if newNode is a duplicate.
+ * @internal
+ */
+ Node *registerNode(Node *newNode, UErrorCode &errorCode);
+ /**
+ * Makes sure that there is only one unique FinalValueNode registered
+ * with this value.
+ * Avoids creating a node if the value is a duplicate.
+ * @param value A final value.
+ * @param errorCode ICU in/out UErrorCode.
+ Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL.
+ * @return A FinalValueNode with the given value.
+ * @internal
+ */
+ Node *registerFinalValue(int32_t value, UErrorCode &errorCode);
+#endif /* U_HIDE_INTERNAL_API */
+
+ /*
+ * C++ note:
+ * registerNode() and registerFinalValue() take ownership of their input nodes,
+ * and only return owned nodes.
+ * If they see a failure UErrorCode, they will delete the input node.
+ * If they get a NULL pointer, they will record a U_MEMORY_ALLOCATION_ERROR.
+ * If there is a failure, they return NULL.
+ *
+ * NULL Node pointers can be safely passed into other Nodes because
+ * they call the static Node::hashCode() which checks for a NULL pointer first.
+ *
+ * Therefore, as long as builder functions register a new node,
+ * they need to check for failures only before explicitly dereferencing
+ * a Node pointer, or before setting a new UErrorCode.
+ */
+
+ // Hash set of nodes, maps from nodes to integer 1.
+ /** @internal */
+ UHashtable *nodes;
+
+ // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
+ // it is needed for layout of other objects.
+ /**
+ * @internal
+ * \cond
+ */
+ class Node : public UObject {
+ public:
+ Node(int32_t initialHash) : hash(initialHash), offset(0) {}
+ inline int32_t hashCode() const { return hash; }
+ // Handles node==NULL.
+ static inline int32_t hashCode(const Node *node) { return node==NULL ? 0 : node->hashCode(); }
+ // Base class operator==() compares the actual class types.
+ virtual UBool operator==(const Node &other) const;
+ inline UBool operator!=(const Node &other) const { return !operator==(other); }
+ /**
+ * Traverses the Node graph and numbers branch edges, with rightmost edges first.
+ * This is to avoid writing a duplicate node twice.
+ *
+ * Branch nodes in this trie data structure are not symmetric.
+ * Most branch edges "jump" to other nodes but the rightmost branch edges
+ * just continue without a jump.
+ * Therefore, write() must write the rightmost branch edge last
+ * (trie units are written backwards), and must write it at that point even if
+ * it is a duplicate of a node previously written elsewhere.
+ *
+ * This function visits and marks right branch edges first.
+ * Edges are numbered with increasingly negative values because we share the
+ * offset field which gets positive values when nodes are written.
+ * A branch edge also remembers the first number for any of its edges.
+ *
+ * When a further-left branch edge has a number in the range of the rightmost
+ * edge's numbers, then it will be written as part of the required right edge
+ * and we can avoid writing it first.
+ *
+ * After root.markRightEdgesFirst(-1) the offsets of all nodes are negative
+ * edge numbers.
+ *
+ * @param edgeNumber The first edge number for this node and its sub-nodes.
+ * @return An edge number that is at least the maximum-negative
+ * of the input edge number and the numbers of this node and all of its sub-nodes.
+ */
+ virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
+ // write() must set the offset to a positive value.
+ virtual void write(StringTrieBuilder &builder) = 0;
+ // See markRightEdgesFirst.
+ inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight,
+ StringTrieBuilder &builder) {
+ // Note: Edge numbers are negative, lastRight<=firstRight.
+ // If offset>0 then this node and its sub-nodes have been written already
+ // and we need not write them again.
+ // If this node is part of the unwritten right branch edge,
+ // then we wait until that is written.
+ if(offset<0 && (offset<lastRight || firstRight<offset)) {
+ write(builder);
+ }
+ }
+ inline int32_t getOffset() const { return offset; }
+ protected:
+ int32_t hash;
+ int32_t offset;
+ };
+
+#ifndef U_HIDE_INTERNAL_API
+ // This class should not be overridden because
+ // registerFinalValue() compares a stack-allocated FinalValueNode
+ // (stack-allocated so that we don't unnecessarily create lots of duplicate nodes)
+ // with the input node, and the
+ // !Node::operator==(other) used inside FinalValueNode::operator==(other)
+ // will be false if the typeid's are different.
+ /** @internal */
+ class FinalValueNode : public Node {
+ public:
+ FinalValueNode(int32_t v) : Node(0x111111u*37u+v), value(v) {}
+ virtual UBool operator==(const Node &other) const;
+ virtual void write(StringTrieBuilder &builder);
+ protected:
+ int32_t value;
+ };
+#endif /* U_HIDE_INTERNAL_API */
+
+ // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
+ // it is needed for layout of other objects.
+ /**
+ * @internal
+ */
+ class ValueNode : public Node {
+ public:
+ ValueNode(int32_t initialHash) : Node(initialHash), hasValue(false), value(0) {}
+ virtual UBool operator==(const Node &other) const;
+ void setValue(int32_t v) {
+ hasValue=true;
+ value=v;
+ hash=hash*37u+v;
+ }
+ protected:
+ UBool hasValue;
+ int32_t value;
+ };
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * @internal
+ */
+ class IntermediateValueNode : public ValueNode {
+ public:
+ IntermediateValueNode(int32_t v, Node *nextNode)
+ : ValueNode(0x222222u*37u+hashCode(nextNode)), next(nextNode) { setValue(v); }
+ virtual UBool operator==(const Node &other) const;
+ virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
+ virtual void write(StringTrieBuilder &builder);
+ protected:
+ Node *next;
+ };
+#endif /* U_HIDE_INTERNAL_API */
+
+ // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
+ // it is needed for layout of other objects.
+ /**
+ * @internal
+ */
+ class LinearMatchNode : public ValueNode {
+ public:
+ LinearMatchNode(int32_t len, Node *nextNode)
+ : ValueNode((0x333333u*37u+len)*37u+hashCode(nextNode)),
+ length(len), next(nextNode) {}
+ virtual UBool operator==(const Node &other) const;
+ virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
+ protected:
+ int32_t length;
+ Node *next;
+ };
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * @internal
+ */
+ class BranchNode : public Node {
+ public:
+ BranchNode(int32_t initialHash) : Node(initialHash) {}
+ protected:
+ int32_t firstEdgeNumber;
+ };
+
+ /**
+ * @internal
+ */
+ class ListBranchNode : public BranchNode {
+ public:
+ ListBranchNode() : BranchNode(0x444444), length(0) {}
+ virtual UBool operator==(const Node &other) const;
+ virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
+ virtual void write(StringTrieBuilder &builder);
+ // Adds a unit with a final value.
+ void add(int32_t c, int32_t value) {
+ units[length]=(char16_t)c;
+ equal[length]=NULL;
+ values[length]=value;
+ ++length;
+ hash=(hash*37u+c)*37u+value;
+ }
+ // Adds a unit which leads to another match node.
+ void add(int32_t c, Node *node) {
+ units[length]=(char16_t)c;
+ equal[length]=node;
+ values[length]=0;
+ ++length;
+ hash=(hash*37u+c)*37u+hashCode(node);
+ }
+ protected:
+ Node *equal[kMaxBranchLinearSubNodeLength]; // NULL means "has final value".
+ int32_t length;
+ int32_t values[kMaxBranchLinearSubNodeLength];
+ char16_t units[kMaxBranchLinearSubNodeLength];
+ };
+
+ /**
+ * @internal
+ */
+ class SplitBranchNode : public BranchNode {
+ public:
+ SplitBranchNode(char16_t middleUnit, Node *lessThanNode, Node *greaterOrEqualNode)
+ : BranchNode(((0x555555u*37u+middleUnit)*37u+
+ hashCode(lessThanNode))*37u+hashCode(greaterOrEqualNode)),
+ unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {}
+ virtual UBool operator==(const Node &other) const;
+ virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
+ virtual void write(StringTrieBuilder &builder);
+ protected:
+ char16_t unit;
+ Node *lessThan;
+ Node *greaterOrEqual;
+ };
+
+ // Branch head node, for writing the actual node lead unit.
+ /** @internal */
+ class BranchHeadNode : public ValueNode {
+ public:
+ BranchHeadNode(int32_t len, Node *subNode)
+ : ValueNode((0x666666u*37u+len)*37u+hashCode(subNode)),
+ length(len), next(subNode) {}
+ virtual UBool operator==(const Node &other) const;
+ virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
+ virtual void write(StringTrieBuilder &builder);
+ protected:
+ int32_t length;
+ Node *next; // A branch sub-node.
+ };
+
+#endif /* U_HIDE_INTERNAL_API */
+ /// \endcond
+
+ /** @internal */
+ virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
+ Node *nextNode) const = 0;
+
+ /** @internal */
+ virtual int32_t write(int32_t unit) = 0;
+ /** @internal */
+ virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) = 0;
+ /** @internal */
+ virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) = 0;
+ /** @internal */
+ virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) = 0;
+ /** @internal */
+ virtual int32_t writeDeltaTo(int32_t jumpTarget) = 0;
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __STRINGTRIEBUILDER_H__
diff --git a/thirdparty/icu4c/common/unicode/symtable.h b/thirdparty/icu4c/common/unicode/symtable.h
new file mode 100644
index 0000000000..b64d877f97
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/symtable.h
@@ -0,0 +1,119 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2000-2005, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Date Name Description
+* 02/04/00 aliu Creation.
+**********************************************************************
+*/
+#ifndef SYMTABLE_H
+#define SYMTABLE_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+
+/**
+ * \file
+ * \brief C++ API: An interface that defines both lookup protocol and parsing of
+ * symbolic names.
+ */
+
+U_NAMESPACE_BEGIN
+
+class ParsePosition;
+class UnicodeFunctor;
+class UnicodeSet;
+class UnicodeString;
+
+/**
+ * An interface that defines both lookup protocol and parsing of
+ * symbolic names.
+ *
+ * <p>A symbol table maintains two kinds of mappings. The first is
+ * between symbolic names and their values. For example, if the
+ * variable with the name "start" is set to the value "alpha"
+ * (perhaps, though not necessarily, through an expression such as
+ * "$start=alpha"), then the call lookup("start") will return the
+ * char[] array ['a', 'l', 'p', 'h', 'a'].
+ *
+ * <p>The second kind of mapping is between character values and
+ * UnicodeMatcher objects. This is used by RuleBasedTransliterator,
+ * which uses characters in the private use area to represent objects
+ * such as UnicodeSets. If U+E015 is mapped to the UnicodeSet [a-z],
+ * then lookupMatcher(0xE015) will return the UnicodeSet [a-z].
+ *
+ * <p>Finally, a symbol table defines parsing behavior for symbolic
+ * names. All symbolic names start with the SYMBOL_REF character.
+ * When a parser encounters this character, it calls parseReference()
+ * with the position immediately following the SYMBOL_REF. The symbol
+ * table parses the name, if there is one, and returns it.
+ *
+ * @stable ICU 2.8
+ */
+class U_COMMON_API SymbolTable /* not : public UObject because this is an interface/mixin class */ {
+public:
+
+ /**
+ * The character preceding a symbol reference name.
+ * @stable ICU 2.8
+ */
+ enum { SYMBOL_REF = 0x0024 /*$*/ };
+
+ /**
+ * Destructor.
+ * @stable ICU 2.8
+ */
+ virtual ~SymbolTable();
+
+ /**
+ * Lookup the characters associated with this string and return it.
+ * Return <tt>NULL</tt> if no such name exists. The resultant
+ * string may have length zero.
+ * @param s the symbolic name to lookup
+ * @return a string containing the name's value, or <tt>NULL</tt> if
+ * there is no mapping for s.
+ * @stable ICU 2.8
+ */
+ virtual const UnicodeString* lookup(const UnicodeString& s) const = 0;
+
+ /**
+ * Lookup the UnicodeMatcher associated with the given character, and
+ * return it. Return <tt>NULL</tt> if not found.
+ * @param ch a 32-bit code point from 0 to 0x10FFFF inclusive.
+ * @return the UnicodeMatcher object represented by the given
+ * character, or NULL if there is no mapping for ch.
+ * @stable ICU 2.8
+ */
+ virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const = 0;
+
+ /**
+ * Parse a symbol reference name from the given string, starting
+ * at the given position. If no valid symbol reference name is
+ * found, return the empty string and leave pos unchanged. That is, if the
+ * character at pos cannot start a name, or if pos is at or after
+ * text.length(), then return an empty string. This indicates an
+ * isolated SYMBOL_REF character.
+ * @param text the text to parse for the name
+ * @param pos on entry, the index of the first character to parse.
+ * This is the character following the SYMBOL_REF character. On
+ * exit, the index after the last parsed character. If the parse
+ * failed, pos is unchanged on exit.
+ * @param limit the index after the last character to be parsed.
+ * @return the parsed name, or an empty string if there is no
+ * valid symbolic name at the given position.
+ * @stable ICU 2.8
+ */
+ virtual UnicodeString parseReference(const UnicodeString& text,
+ ParsePosition& pos, int32_t limit) const = 0;
+};
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/ubidi.h b/thirdparty/icu4c/common/unicode/ubidi.h
new file mode 100644
index 0000000000..63d0e45cb7
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/ubidi.h
@@ -0,0 +1,2210 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: ubidi.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999jul27
+* created by: Markus W. Scherer, updated by Matitiahu Allouche
+*/
+
+#ifndef UBIDI_H
+#define UBIDI_H
+
+#include "unicode/utypes.h"
+#include "unicode/uchar.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ *\file
+ * \brief C API: Bidi algorithm
+ *
+ * <h2>Bidi algorithm for ICU</h2>
+ *
+ * This is an implementation of the Unicode Bidirectional Algorithm.
+ * The algorithm is defined in the
+ * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>.<p>
+ *
+ * Note: Libraries that perform a bidirectional algorithm and
+ * reorder strings accordingly are sometimes called "Storage Layout Engines".
+ * ICU's Bidi and shaping (u_shapeArabic()) APIs can be used at the core of such
+ * "Storage Layout Engines".
+ *
+ * <h3>General remarks about the API:</h3>
+ *
+ * In functions with an error code parameter,
+ * the <code>pErrorCode</code> pointer must be valid
+ * and the value that it points to must not indicate a failure before
+ * the function call. Otherwise, the function returns immediately.
+ * After the function call, the value indicates success or failure.<p>
+ *
+ * The &quot;limit&quot; of a sequence of characters is the position just after their
+ * last character, i.e., one more than that position.<p>
+ *
+ * Some of the API functions provide access to &quot;runs&quot;.
+ * Such a &quot;run&quot; is defined as a sequence of characters
+ * that are at the same embedding level
+ * after performing the Bidi algorithm.<p>
+ *
+ * @author Markus W. Scherer
+ * @version 1.0
+ *
+ *
+ * <h4> Sample code for the ICU Bidi API </h4>
+ *
+ * <h5>Rendering a paragraph with the ICU Bidi API</h5>
+ *
+ * This is (hypothetical) sample code that illustrates
+ * how the ICU Bidi API could be used to render a paragraph of text.
+ * Rendering code depends highly on the graphics system,
+ * therefore this sample code must make a lot of assumptions,
+ * which may or may not match any existing graphics system's properties.
+ *
+ * <p>The basic assumptions are:</p>
+ * <ul>
+ * <li>Rendering is done from left to right on a horizontal line.</li>
+ * <li>A run of single-style, unidirectional text can be rendered at once.</li>
+ * <li>Such a run of text is passed to the graphics system with
+ * characters (code units) in logical order.</li>
+ * <li>The line-breaking algorithm is very complicated
+ * and Locale-dependent -
+ * and therefore its implementation omitted from this sample code.</li>
+ * </ul>
+ *
+ * <pre>
+ * \code
+ *#include "unicode/ubidi.h"
+ *
+ *typedef enum {
+ * styleNormal=0, styleSelected=1,
+ * styleBold=2, styleItalics=4,
+ * styleSuper=8, styleSub=16
+ *} Style;
+ *
+ *typedef struct { int32_t limit; Style style; } StyleRun;
+ *
+ *int getTextWidth(const UChar *text, int32_t start, int32_t limit,
+ * const StyleRun *styleRuns, int styleRunCount);
+ *
+ * // set *pLimit and *pStyleRunLimit for a line
+ * // from text[start] and from styleRuns[styleRunStart]
+ * // using ubidi_getLogicalRun(para, ...)
+ *void getLineBreak(const UChar *text, int32_t start, int32_t *pLimit,
+ * UBiDi *para,
+ * const StyleRun *styleRuns, int styleRunStart, int *pStyleRunLimit,
+ * int *pLineWidth);
+ *
+ * // render runs on a line sequentially, always from left to right
+ *
+ * // prepare rendering a new line
+ * void startLine(UBiDiDirection textDirection, int lineWidth);
+ *
+ * // render a run of text and advance to the right by the run width
+ * // the text[start..limit-1] is always in logical order
+ * void renderRun(const UChar *text, int32_t start, int32_t limit,
+ * UBiDiDirection textDirection, Style style);
+ *
+ * // We could compute a cross-product
+ * // from the style runs with the directional runs
+ * // and then reorder it.
+ * // Instead, here we iterate over each run type
+ * // and render the intersections -
+ * // with shortcuts in simple (and common) cases.
+ * // renderParagraph() is the main function.
+ *
+ * // render a directional run with
+ * // (possibly) multiple style runs intersecting with it
+ * void renderDirectionalRun(const UChar *text,
+ * int32_t start, int32_t limit,
+ * UBiDiDirection direction,
+ * const StyleRun *styleRuns, int styleRunCount) {
+ * int i;
+ *
+ * // iterate over style runs
+ * if(direction==UBIDI_LTR) {
+ * int styleLimit;
+ *
+ * for(i=0; i<styleRunCount; ++i) {
+ * styleLimit=styleRun[i].limit;
+ * if(start<styleLimit) {
+ * if(styleLimit>limit) { styleLimit=limit; }
+ * renderRun(text, start, styleLimit,
+ * direction, styleRun[i].style);
+ * if(styleLimit==limit) { break; }
+ * start=styleLimit;
+ * }
+ * }
+ * } else {
+ * int styleStart;
+ *
+ * for(i=styleRunCount-1; i>=0; --i) {
+ * if(i>0) {
+ * styleStart=styleRun[i-1].limit;
+ * } else {
+ * styleStart=0;
+ * }
+ * if(limit>=styleStart) {
+ * if(styleStart<start) { styleStart=start; }
+ * renderRun(text, styleStart, limit,
+ * direction, styleRun[i].style);
+ * if(styleStart==start) { break; }
+ * limit=styleStart;
+ * }
+ * }
+ * }
+ * }
+ *
+ * // the line object represents text[start..limit-1]
+ * void renderLine(UBiDi *line, const UChar *text,
+ * int32_t start, int32_t limit,
+ * const StyleRun *styleRuns, int styleRunCount) {
+ * UBiDiDirection direction=ubidi_getDirection(line);
+ * if(direction!=UBIDI_MIXED) {
+ * // unidirectional
+ * if(styleRunCount<=1) {
+ * renderRun(text, start, limit, direction, styleRuns[0].style);
+ * } else {
+ * renderDirectionalRun(text, start, limit,
+ * direction, styleRuns, styleRunCount);
+ * }
+ * } else {
+ * // mixed-directional
+ * int32_t count, i, length;
+ * UBiDiLevel level;
+ *
+ * count=ubidi_countRuns(para, pErrorCode);
+ * if(U_SUCCESS(*pErrorCode)) {
+ * if(styleRunCount<=1) {
+ * Style style=styleRuns[0].style;
+ *
+ * // iterate over directional runs
+ * for(i=0; i<count; ++i) {
+ * direction=ubidi_getVisualRun(para, i, &start, &length);
+ * renderRun(text, start, start+length, direction, style);
+ * }
+ * } else {
+ * int32_t j;
+ *
+ * // iterate over both directional and style runs
+ * for(i=0; i<count; ++i) {
+ * direction=ubidi_getVisualRun(line, i, &start, &length);
+ * renderDirectionalRun(text, start, start+length,
+ * direction, styleRuns, styleRunCount);
+ * }
+ * }
+ * }
+ * }
+ * }
+ *
+ *void renderParagraph(const UChar *text, int32_t length,
+ * UBiDiDirection textDirection,
+ * const StyleRun *styleRuns, int styleRunCount,
+ * int lineWidth,
+ * UErrorCode *pErrorCode) {
+ * UBiDi *para;
+ *
+ * if(pErrorCode==NULL || U_FAILURE(*pErrorCode) || length<=0) {
+ * return;
+ * }
+ *
+ * para=ubidi_openSized(length, 0, pErrorCode);
+ * if(para==NULL) { return; }
+ *
+ * ubidi_setPara(para, text, length,
+ * textDirection ? UBIDI_DEFAULT_RTL : UBIDI_DEFAULT_LTR,
+ * NULL, pErrorCode);
+ * if(U_SUCCESS(*pErrorCode)) {
+ * UBiDiLevel paraLevel=1&ubidi_getParaLevel(para);
+ * StyleRun styleRun={ length, styleNormal };
+ * int width;
+ *
+ * if(styleRuns==NULL || styleRunCount<=0) {
+ * styleRunCount=1;
+ * styleRuns=&styleRun;
+ * }
+ *
+ * // assume styleRuns[styleRunCount-1].limit>=length
+ *
+ * width=getTextWidth(text, 0, length, styleRuns, styleRunCount);
+ * if(width<=lineWidth) {
+ * // everything fits onto one line
+ *
+ * // prepare rendering a new line from either left or right
+ * startLine(paraLevel, width);
+ *
+ * renderLine(para, text, 0, length,
+ * styleRuns, styleRunCount);
+ * } else {
+ * UBiDi *line;
+ *
+ * // we need to render several lines
+ * line=ubidi_openSized(length, 0, pErrorCode);
+ * if(line!=NULL) {
+ * int32_t start=0, limit;
+ * int styleRunStart=0, styleRunLimit;
+ *
+ * for(;;) {
+ * limit=length;
+ * styleRunLimit=styleRunCount;
+ * getLineBreak(text, start, &limit, para,
+ * styleRuns, styleRunStart, &styleRunLimit,
+ * &width);
+ * ubidi_setLine(para, start, limit, line, pErrorCode);
+ * if(U_SUCCESS(*pErrorCode)) {
+ * // prepare rendering a new line
+ * // from either left or right
+ * startLine(paraLevel, width);
+ *
+ * renderLine(line, text, start, limit,
+ * styleRuns+styleRunStart,
+ * styleRunLimit-styleRunStart);
+ * }
+ * if(limit==length) { break; }
+ * start=limit;
+ * styleRunStart=styleRunLimit-1;
+ * if(start>=styleRuns[styleRunStart].limit) {
+ * ++styleRunStart;
+ * }
+ * }
+ *
+ * ubidi_close(line);
+ * }
+ * }
+ * }
+ *
+ * ubidi_close(para);
+ *}
+ *\endcode
+ * </pre>
+ */
+
+/*DOCXX_TAG*/
+/*@{*/
+
+/**
+ * UBiDiLevel is the type of the level values in this
+ * Bidi implementation.
+ * It holds an embedding level and indicates the visual direction
+ * by its bit&nbsp;0 (even/odd value).<p>
+ *
+ * It can also hold non-level values for the
+ * <code>paraLevel</code> and <code>embeddingLevels</code>
+ * arguments of <code>ubidi_setPara()</code>; there:
+ * <ul>
+ * <li>bit&nbsp;7 of an <code>embeddingLevels[]</code>
+ * value indicates whether the using application is
+ * specifying the level of a character to <i>override</i> whatever the
+ * Bidi implementation would resolve it to.</li>
+ * <li><code>paraLevel</code> can be set to the
+ * pseudo-level values <code>UBIDI_DEFAULT_LTR</code>
+ * and <code>UBIDI_DEFAULT_RTL</code>.</li>
+ * </ul>
+ *
+ * @see ubidi_setPara
+ *
+ * <p>The related constants are not real, valid level values.
+ * <code>UBIDI_DEFAULT_XXX</code> can be used to specify
+ * a default for the paragraph level for
+ * when the <code>ubidi_setPara()</code> function
+ * shall determine it but there is no
+ * strongly typed character in the input.<p>
+ *
+ * Note that the value for <code>UBIDI_DEFAULT_LTR</code> is even
+ * and the one for <code>UBIDI_DEFAULT_RTL</code> is odd,
+ * just like with normal LTR and RTL level values -
+ * these special values are designed that way. Also, the implementation
+ * assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
+ *
+ * Note: The numeric values of the related constants will not change:
+ * They are tied to the use of 7-bit byte values (plus the override bit)
+ * and of the UBiDiLevel=uint8_t data type in this API.
+ *
+ * @see UBIDI_DEFAULT_LTR
+ * @see UBIDI_DEFAULT_RTL
+ * @see UBIDI_LEVEL_OVERRIDE
+ * @see UBIDI_MAX_EXPLICIT_LEVEL
+ * @stable ICU 2.0
+ */
+typedef uint8_t UBiDiLevel;
+
+/** Paragraph level setting.<p>
+ *
+ * Constant indicating that the base direction depends on the first strong
+ * directional character in the text according to the Unicode Bidirectional
+ * Algorithm. If no strong directional character is present,
+ * then set the paragraph level to 0 (left-to-right).<p>
+ *
+ * If this value is used in conjunction with reordering modes
+ * <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code> or
+ * <code>UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
+ * is assumed to be visual LTR, and the text after reordering is required
+ * to be the corresponding logical string with appropriate contextual
+ * direction. The direction of the result string will be RTL if either
+ * the righmost or leftmost strong character of the source text is RTL
+ * or Arabic Letter, the direction will be LTR otherwise.<p>
+ *
+ * If reordering option <code>UBIDI_OPTION_INSERT_MARKS</code> is set, an RLM may
+ * be added at the beginning of the result string to ensure round trip
+ * (that the result string, when reordered back to visual, will produce
+ * the original source text).
+ * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
+ * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
+ * @stable ICU 2.0
+ */
+#define UBIDI_DEFAULT_LTR 0xfe
+
+/** Paragraph level setting.<p>
+ *
+ * Constant indicating that the base direction depends on the first strong
+ * directional character in the text according to the Unicode Bidirectional
+ * Algorithm. If no strong directional character is present,
+ * then set the paragraph level to 1 (right-to-left).<p>
+ *
+ * If this value is used in conjunction with reordering modes
+ * <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code> or
+ * <code>UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
+ * is assumed to be visual LTR, and the text after reordering is required
+ * to be the corresponding logical string with appropriate contextual
+ * direction. The direction of the result string will be RTL if either
+ * the righmost or leftmost strong character of the source text is RTL
+ * or Arabic Letter, or if the text contains no strong character;
+ * the direction will be LTR otherwise.<p>
+ *
+ * If reordering option <code>UBIDI_OPTION_INSERT_MARKS</code> is set, an RLM may
+ * be added at the beginning of the result string to ensure round trip
+ * (that the result string, when reordered back to visual, will produce
+ * the original source text).
+ * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
+ * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
+ * @stable ICU 2.0
+ */
+#define UBIDI_DEFAULT_RTL 0xff
+
+/**
+ * Maximum explicit embedding level.
+ * Same as the max_depth value in the
+ * <a href="http://www.unicode.org/reports/tr9/#BD2">Unicode Bidirectional Algorithm</a>.
+ * (The maximum resolved level can be up to <code>UBIDI_MAX_EXPLICIT_LEVEL+1</code>).
+ * @stable ICU 2.0
+ */
+#define UBIDI_MAX_EXPLICIT_LEVEL 125
+
+/** Bit flag for level input.
+ * Overrides directional properties.
+ * @stable ICU 2.0
+ */
+#define UBIDI_LEVEL_OVERRIDE 0x80
+
+/**
+ * Special value which can be returned by the mapping functions when a logical
+ * index has no corresponding visual index or vice-versa. This may happen
+ * for the logical-to-visual mapping of a Bidi control when option
+ * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> is specified. This can also happen
+ * for the visual-to-logical mapping of a Bidi mark (LRM or RLM) inserted
+ * by option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
+ * @see ubidi_getVisualIndex
+ * @see ubidi_getVisualMap
+ * @see ubidi_getLogicalIndex
+ * @see ubidi_getLogicalMap
+ * @stable ICU 3.6
+ */
+#define UBIDI_MAP_NOWHERE (-1)
+
+/**
+ * <code>UBiDiDirection</code> values indicate the text direction.
+ * @stable ICU 2.0
+ */
+enum UBiDiDirection {
+ /** Left-to-right text. This is a 0 value.
+ * <ul>
+ * <li>As return value for <code>ubidi_getDirection()</code>, it means
+ * that the source string contains no right-to-left characters, or
+ * that the source string is empty and the paragraph level is even.
+ * <li> As return value for <code>ubidi_getBaseDirection()</code>, it
+ * means that the first strong character of the source string has
+ * a left-to-right direction.
+ * </ul>
+ * @stable ICU 2.0
+ */
+ UBIDI_LTR,
+ /** Right-to-left text. This is a 1 value.
+ * <ul>
+ * <li>As return value for <code>ubidi_getDirection()</code>, it means
+ * that the source string contains no left-to-right characters, or
+ * that the source string is empty and the paragraph level is odd.
+ * <li> As return value for <code>ubidi_getBaseDirection()</code>, it
+ * means that the first strong character of the source string has
+ * a right-to-left direction.
+ * </ul>
+ * @stable ICU 2.0
+ */
+ UBIDI_RTL,
+ /** Mixed-directional text.
+ * <p>As return value for <code>ubidi_getDirection()</code>, it means
+ * that the source string contains both left-to-right and
+ * right-to-left characters.
+ * @stable ICU 2.0
+ */
+ UBIDI_MIXED,
+ /** No strongly directional text.
+ * <p>As return value for <code>ubidi_getBaseDirection()</code>, it means
+ * that the source string is missing or empty, or contains neither left-to-right
+ * nor right-to-left characters.
+ * @stable ICU 4.6
+ */
+ UBIDI_NEUTRAL
+};
+
+/** @stable ICU 2.0 */
+typedef enum UBiDiDirection UBiDiDirection;
+
+/**
+ * Forward declaration of the <code>UBiDi</code> structure for the declaration of
+ * the API functions. Its fields are implementation-specific.<p>
+ * This structure holds information about a paragraph (or multiple paragraphs)
+ * of text with Bidi-algorithm-related details, or about one line of
+ * such a paragraph.<p>
+ * Reordering can be done on a line, or on one or more paragraphs which are
+ * then interpreted each as one single line.
+ * @stable ICU 2.0
+ */
+struct UBiDi;
+
+/** @stable ICU 2.0 */
+typedef struct UBiDi UBiDi;
+
+/**
+ * Allocate a <code>UBiDi</code> structure.
+ * Such an object is initially empty. It is assigned
+ * the Bidi properties of a piece of text containing one or more paragraphs
+ * by <code>ubidi_setPara()</code>
+ * or the Bidi properties of a line within a paragraph by
+ * <code>ubidi_setLine()</code>.<p>
+ * This object can be reused for as long as it is not deallocated
+ * by calling <code>ubidi_close()</code>.<p>
+ * <code>ubidi_setPara()</code> and <code>ubidi_setLine()</code> will allocate
+ * additional memory for internal structures as necessary.
+ *
+ * @return An empty <code>UBiDi</code> object.
+ * @stable ICU 2.0
+ */
+U_CAPI UBiDi * U_EXPORT2
+ubidi_open(void);
+
+/**
+ * Allocate a <code>UBiDi</code> structure with preallocated memory
+ * for internal structures.
+ * This function provides a <code>UBiDi</code> object like <code>ubidi_open()</code>
+ * with no arguments, but it also preallocates memory for internal structures
+ * according to the sizings supplied by the caller.<p>
+ * Subsequent functions will not allocate any more memory, and are thus
+ * guaranteed not to fail because of lack of memory.<p>
+ * The preallocation can be limited to some of the internal memory
+ * by setting some values to 0 here. That means that if, e.g.,
+ * <code>maxRunCount</code> cannot be reasonably predetermined and should not
+ * be set to <code>maxLength</code> (the only failproof value) to avoid
+ * wasting memory, then <code>maxRunCount</code> could be set to 0 here
+ * and the internal structures that are associated with it will be allocated
+ * on demand, just like with <code>ubidi_open()</code>.
+ *
+ * @param maxLength is the maximum text or line length that internal memory
+ * will be preallocated for. An attempt to associate this object with a
+ * longer text will fail, unless this value is 0, which leaves the allocation
+ * up to the implementation.
+ *
+ * @param maxRunCount is the maximum anticipated number of same-level runs
+ * that internal memory will be preallocated for. An attempt to access
+ * visual runs on an object that was not preallocated for as many runs
+ * as the text was actually resolved to will fail,
+ * unless this value is 0, which leaves the allocation up to the implementation.<br><br>
+ * The number of runs depends on the actual text and maybe anywhere between
+ * 1 and <code>maxLength</code>. It is typically small.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return An empty <code>UBiDi</code> object with preallocated memory.
+ * @stable ICU 2.0
+ */
+U_CAPI UBiDi * U_EXPORT2
+ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode);
+
+/**
+ * <code>ubidi_close()</code> must be called to free the memory
+ * associated with a UBiDi object.<p>
+ *
+ * <strong>Important: </strong>
+ * A parent <code>UBiDi</code> object must not be destroyed or reused if
+ * it still has children.
+ * If a <code>UBiDi</code> object has become the <i>child</i>
+ * of another one (its <i>parent</i>) by calling
+ * <code>ubidi_setLine()</code>, then the child object must
+ * be destroyed (closed) or reused (by calling
+ * <code>ubidi_setPara()</code> or <code>ubidi_setLine()</code>)
+ * before the parent object.
+ *
+ * @param pBiDi is a <code>UBiDi</code> object.
+ *
+ * @see ubidi_setPara
+ * @see ubidi_setLine
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_close(UBiDi *pBiDi);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUBiDiPointer
+ * "Smart pointer" class, closes a UBiDi via ubidi_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiPointer, UBiDi, ubidi_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Modify the operation of the Bidi algorithm such that it
+ * approximates an "inverse Bidi" algorithm. This function
+ * must be called before <code>ubidi_setPara()</code>.
+ *
+ * <p>The normal operation of the Bidi algorithm as described
+ * in the Unicode Technical Report is to take text stored in logical
+ * (keyboard, typing) order and to determine the reordering of it for visual
+ * rendering.
+ * Some legacy systems store text in visual order, and for operations
+ * with standard, Unicode-based algorithms, the text needs to be transformed
+ * to logical order. This is effectively the inverse algorithm of the
+ * described Bidi algorithm. Note that there is no standard algorithm for
+ * this "inverse Bidi" and that the current implementation provides only an
+ * approximation of "inverse Bidi".</p>
+ *
+ * <p>With <code>isInverse</code> set to <code>true</code>,
+ * this function changes the behavior of some of the subsequent functions
+ * in a way that they can be used for the inverse Bidi algorithm.
+ * Specifically, runs of text with numeric characters will be treated in a
+ * special way and may need to be surrounded with LRM characters when they are
+ * written in reordered sequence.</p>
+ *
+ * <p>Output runs should be retrieved using <code>ubidi_getVisualRun()</code>.
+ * Since the actual input for "inverse Bidi" is visually ordered text and
+ * <code>ubidi_getVisualRun()</code> gets the reordered runs, these are actually
+ * the runs of the logically ordered output.</p>
+ *
+ * <p>Calling this function with argument <code>isInverse</code> set to
+ * <code>true</code> is equivalent to calling
+ * <code>ubidi_setReorderingMode</code> with argument
+ * <code>reorderingMode</code>
+ * set to <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.<br>
+ * Calling this function with argument <code>isInverse</code> set to
+ * <code>false</code> is equivalent to calling
+ * <code>ubidi_setReorderingMode</code> with argument
+ * <code>reorderingMode</code>
+ * set to <code>#UBIDI_REORDER_DEFAULT</code>.
+ *
+ * @param pBiDi is a <code>UBiDi</code> object.
+ *
+ * @param isInverse specifies "forward" or "inverse" Bidi operation.
+ *
+ * @see ubidi_setPara
+ * @see ubidi_writeReordered
+ * @see ubidi_setReorderingMode
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_setInverse(UBiDi *pBiDi, UBool isInverse);
+
+/**
+ * Is this Bidi object set to perform the inverse Bidi algorithm?
+ * <p>Note: calling this function after setting the reordering mode with
+ * <code>ubidi_setReorderingMode</code> will return <code>true</code> if the
+ * reordering mode was set to <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>,
+ * <code>false</code> for all other values.</p>
+ *
+ * @param pBiDi is a <code>UBiDi</code> object.
+ * @return true if the Bidi object is set to perform the inverse Bidi algorithm
+ * by handling numbers as L.
+ *
+ * @see ubidi_setInverse
+ * @see ubidi_setReorderingMode
+ * @stable ICU 2.0
+ */
+
+U_CAPI UBool U_EXPORT2
+ubidi_isInverse(UBiDi *pBiDi);
+
+/**
+ * Specify whether block separators must be allocated level zero,
+ * so that successive paragraphs will progress from left to right.
+ * This function must be called before <code>ubidi_setPara()</code>.
+ * Paragraph separators (B) may appear in the text. Setting them to level zero
+ * means that all paragraph separators (including one possibly appearing
+ * in the last text position) are kept in the reordered text after the text
+ * that they follow in the source text.
+ * When this feature is not enabled, a paragraph separator at the last
+ * position of the text before reordering will go to the first position
+ * of the reordered text when the paragraph level is odd.
+ *
+ * @param pBiDi is a <code>UBiDi</code> object.
+ *
+ * @param orderParagraphsLTR specifies whether paragraph separators (B) must
+ * receive level 0, so that successive paragraphs progress from left to right.
+ *
+ * @see ubidi_setPara
+ * @stable ICU 3.4
+ */
+U_CAPI void U_EXPORT2
+ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR);
+
+/**
+ * Is this Bidi object set to allocate level 0 to block separators so that
+ * successive paragraphs progress from left to right?
+ *
+ * @param pBiDi is a <code>UBiDi</code> object.
+ * @return true if the Bidi object is set to allocate level 0 to block
+ * separators.
+ *
+ * @see ubidi_orderParagraphsLTR
+ * @stable ICU 3.4
+ */
+U_CAPI UBool U_EXPORT2
+ubidi_isOrderParagraphsLTR(UBiDi *pBiDi);
+
+/**
+ * <code>UBiDiReorderingMode</code> values indicate which variant of the Bidi
+ * algorithm to use.
+ *
+ * @see ubidi_setReorderingMode
+ * @stable ICU 3.6
+ */
+typedef enum UBiDiReorderingMode {
+ /** Regular Logical to Visual Bidi algorithm according to Unicode.
+ * This is a 0 value.
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_DEFAULT = 0,
+ /** Logical to Visual algorithm which handles numbers in a way which
+ * mimics the behavior of Windows XP.
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_NUMBERS_SPECIAL,
+ /** Logical to Visual algorithm grouping numbers with adjacent R characters
+ * (reversible algorithm).
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_GROUP_NUMBERS_WITH_R,
+ /** Reorder runs only to transform a Logical LTR string to the Logical RTL
+ * string with the same display, or vice-versa.<br>
+ * If this mode is set together with option
+ * <code>#UBIDI_OPTION_INSERT_MARKS</code>, some Bidi controls in the source
+ * text may be removed and other controls may be added to produce the
+ * minimum combination which has the required display.
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_RUNS_ONLY,
+ /** Visual to Logical algorithm which handles numbers like L
+ * (same algorithm as selected by <code>ubidi_setInverse(true)</code>.
+ * @see ubidi_setInverse
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_INVERSE_NUMBERS_AS_L,
+ /** Visual to Logical algorithm equivalent to the regular Logical to Visual
+ * algorithm.
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_INVERSE_LIKE_DIRECT,
+ /** Inverse Bidi (Visual to Logical) algorithm for the
+ * <code>UBIDI_REORDER_NUMBERS_SPECIAL</code> Bidi algorithm.
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * Number of values for reordering mode.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UBIDI_REORDER_COUNT
+#endif // U_HIDE_DEPRECATED_API
+} UBiDiReorderingMode;
+
+/**
+ * Modify the operation of the Bidi algorithm such that it implements some
+ * variant to the basic Bidi algorithm or approximates an "inverse Bidi"
+ * algorithm, depending on different values of the "reordering mode".
+ * This function must be called before <code>ubidi_setPara()</code>, and stays
+ * in effect until called again with a different argument.
+ *
+ * <p>The normal operation of the Bidi algorithm as described
+ * in the Unicode Standard Annex #9 is to take text stored in logical
+ * (keyboard, typing) order and to determine how to reorder it for visual
+ * rendering.</p>
+ *
+ * <p>With the reordering mode set to a value other than
+ * <code>#UBIDI_REORDER_DEFAULT</code>, this function changes the behavior of
+ * some of the subsequent functions in a way such that they implement an
+ * inverse Bidi algorithm or some other algorithm variants.</p>
+ *
+ * <p>Some legacy systems store text in visual order, and for operations
+ * with standard, Unicode-based algorithms, the text needs to be transformed
+ * into logical order. This is effectively the inverse algorithm of the
+ * described Bidi algorithm. Note that there is no standard algorithm for
+ * this "inverse Bidi", so a number of variants are implemented here.</p>
+ *
+ * <p>In other cases, it may be desirable to emulate some variant of the
+ * Logical to Visual algorithm (e.g. one used in MS Windows), or perform a
+ * Logical to Logical transformation.</p>
+ *
+ * <ul>
+ * <li>When the reordering mode is set to <code>#UBIDI_REORDER_DEFAULT</code>,
+ * the standard Bidi Logical to Visual algorithm is applied.</li>
+ *
+ * <li>When the reordering mode is set to
+ * <code>#UBIDI_REORDER_NUMBERS_SPECIAL</code>,
+ * the algorithm used to perform Bidi transformations when calling
+ * <code>ubidi_setPara</code> should approximate the algorithm used in
+ * Microsoft Windows XP rather than strictly conform to the Unicode Bidi
+ * algorithm.
+ * <br>
+ * The differences between the basic algorithm and the algorithm addressed
+ * by this option are as follows:
+ * <ul>
+ * <li>Within text at an even embedding level, the sequence "123AB"
+ * (where AB represent R or AL letters) is transformed to "123BA" by the
+ * Unicode algorithm and to "BA123" by the Windows algorithm.</li>
+ * <li>Arabic-Indic numbers (AN) are handled by the Windows algorithm just
+ * like regular numbers (EN).</li>
+ * </ul></li>
+ *
+ * <li>When the reordering mode is set to
+ * <code>#UBIDI_REORDER_GROUP_NUMBERS_WITH_R</code>,
+ * numbers located between LTR text and RTL text are associated with the RTL
+ * text. For instance, an LTR paragraph with content "abc 123 DEF" (where
+ * upper case letters represent RTL characters) will be transformed to
+ * "abc FED 123" (and not "abc 123 FED"), "DEF 123 abc" will be transformed
+ * to "123 FED abc" and "123 FED abc" will be transformed to "DEF 123 abc".
+ * This makes the algorithm reversible and makes it useful when round trip
+ * (from visual to logical and back to visual) must be achieved without
+ * adding LRM characters. However, this is a variation from the standard
+ * Unicode Bidi algorithm.<br>
+ * The source text should not contain Bidi control characters other than LRM
+ * or RLM.</li>
+ *
+ * <li>When the reordering mode is set to
+ * <code>#UBIDI_REORDER_RUNS_ONLY</code>,
+ * a "Logical to Logical" transformation must be performed:
+ * <ul>
+ * <li>If the default text level of the source text (argument <code>paraLevel</code>
+ * in <code>ubidi_setPara</code>) is even, the source text will be handled as
+ * LTR logical text and will be transformed to the RTL logical text which has
+ * the same LTR visual display.</li>
+ * <li>If the default level of the source text is odd, the source text
+ * will be handled as RTL logical text and will be transformed to the
+ * LTR logical text which has the same LTR visual display.</li>
+ * </ul>
+ * This mode may be needed when logical text which is basically Arabic or
+ * Hebrew, with possible included numbers or phrases in English, has to be
+ * displayed as if it had an even embedding level (this can happen if the
+ * displaying application treats all text as if it was basically LTR).
+ * <br>
+ * This mode may also be needed in the reverse case, when logical text which is
+ * basically English, with possible included phrases in Arabic or Hebrew, has to
+ * be displayed as if it had an odd embedding level.
+ * <br>
+ * Both cases could be handled by adding LRE or RLE at the head of the text,
+ * if the display subsystem supports these formatting controls. If it does not,
+ * the problem may be handled by transforming the source text in this mode
+ * before displaying it, so that it will be displayed properly.<br>
+ * The source text should not contain Bidi control characters other than LRM
+ * or RLM.</li>
+ *
+ * <li>When the reordering mode is set to
+ * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>, an "inverse Bidi" algorithm
+ * is applied.
+ * Runs of text with numeric characters will be treated like LTR letters and
+ * may need to be surrounded with LRM characters when they are written in
+ * reordered sequence (the option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> can
+ * be used with function <code>ubidi_writeReordered</code> to this end. This
+ * mode is equivalent to calling <code>ubidi_setInverse()</code> with
+ * argument <code>isInverse</code> set to <code>true</code>.</li>
+ *
+ * <li>When the reordering mode is set to
+ * <code>#UBIDI_REORDER_INVERSE_LIKE_DIRECT</code>, the "direct" Logical to Visual
+ * Bidi algorithm is used as an approximation of an "inverse Bidi" algorithm.
+ * This mode is similar to mode <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>
+ * but is closer to the regular Bidi algorithm.
+ * <br>
+ * For example, an LTR paragraph with the content "FED 123 456 CBA" (where
+ * upper case represents RTL characters) will be transformed to
+ * "ABC 456 123 DEF", as opposed to "DEF 123 456 ABC"
+ * with mode <code>UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.<br>
+ * When used in conjunction with option
+ * <code>#UBIDI_OPTION_INSERT_MARKS</code>, this mode generally
+ * adds Bidi marks to the output significantly more sparingly than mode
+ * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> with option
+ * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls to
+ * <code>ubidi_writeReordered</code>.</li>
+ *
+ * <li>When the reordering mode is set to
+ * <code>#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the Logical to Visual
+ * Bidi algorithm used in Windows XP is used as an approximation of an "inverse Bidi" algorithm.
+ * <br>
+ * For example, an LTR paragraph with the content "abc FED123" (where
+ * upper case represents RTL characters) will be transformed to "abc 123DEF."</li>
+ * </ul>
+ *
+ * <p>In all the reordering modes specifying an "inverse Bidi" algorithm
+ * (i.e. those with a name starting with <code>UBIDI_REORDER_INVERSE</code>),
+ * output runs should be retrieved using
+ * <code>ubidi_getVisualRun()</code>, and the output text with
+ * <code>ubidi_writeReordered()</code>. The caller should keep in mind that in
+ * "inverse Bidi" modes the input is actually visually ordered text and
+ * reordered output returned by <code>ubidi_getVisualRun()</code> or
+ * <code>ubidi_writeReordered()</code> are actually runs or character string
+ * of logically ordered output.<br>
+ * For all the "inverse Bidi" modes, the source text should not contain
+ * Bidi control characters other than LRM or RLM.</p>
+ *
+ * <p>Note that option <code>#UBIDI_OUTPUT_REVERSE</code> of
+ * <code>ubidi_writeReordered</code> has no useful meaning and should not be
+ * used in conjunction with any value of the reordering mode specifying
+ * "inverse Bidi" or with value <code>UBIDI_REORDER_RUNS_ONLY</code>.
+ *
+ * @param pBiDi is a <code>UBiDi</code> object.
+ * @param reorderingMode specifies the required variant of the Bidi algorithm.
+ *
+ * @see UBiDiReorderingMode
+ * @see ubidi_setInverse
+ * @see ubidi_setPara
+ * @see ubidi_writeReordered
+ * @stable ICU 3.6
+ */
+U_CAPI void U_EXPORT2
+ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode);
+
+/**
+ * What is the requested reordering mode for a given Bidi object?
+ *
+ * @param pBiDi is a <code>UBiDi</code> object.
+ * @return the current reordering mode of the Bidi object
+ * @see ubidi_setReorderingMode
+ * @stable ICU 3.6
+ */
+U_CAPI UBiDiReorderingMode U_EXPORT2
+ubidi_getReorderingMode(UBiDi *pBiDi);
+
+/**
+ * <code>UBiDiReorderingOption</code> values indicate which options are
+ * specified to affect the Bidi algorithm.
+ *
+ * @see ubidi_setReorderingOptions
+ * @stable ICU 3.6
+ */
+typedef enum UBiDiReorderingOption {
+ /**
+ * option value for <code>ubidi_setReorderingOptions</code>:
+ * disable all the options which can be set with this function
+ * @see ubidi_setReorderingOptions
+ * @stable ICU 3.6
+ */
+ UBIDI_OPTION_DEFAULT = 0,
+
+ /**
+ * option bit for <code>ubidi_setReorderingOptions</code>:
+ * insert Bidi marks (LRM or RLM) when needed to ensure correct result of
+ * a reordering to a Logical order
+ *
+ * <p>This option must be set or reset before calling
+ * <code>ubidi_setPara</code>.</p>
+ *
+ * <p>This option is significant only with reordering modes which generate
+ * a result with Logical order, specifically:</p>
+ * <ul>
+ * <li><code>#UBIDI_REORDER_RUNS_ONLY</code></li>
+ * <li><code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code></li>
+ * <li><code>#UBIDI_REORDER_INVERSE_LIKE_DIRECT</code></li>
+ * <li><code>#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code></li>
+ * </ul>
+ *
+ * <p>If this option is set in conjunction with reordering mode
+ * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> or with calling
+ * <code>ubidi_setInverse(true)</code>, it implies
+ * option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code>
+ * in calls to function <code>ubidi_writeReordered()</code>.</p>
+ *
+ * <p>For other reordering modes, a minimum number of LRM or RLM characters
+ * will be added to the source text after reordering it so as to ensure
+ * round trip, i.e. when applying the inverse reordering mode on the
+ * resulting logical text with removal of Bidi marks
+ * (option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> set before calling
+ * <code>ubidi_setPara()</code> or option <code>#UBIDI_REMOVE_BIDI_CONTROLS</code>
+ * in <code>ubidi_writeReordered</code>), the result will be identical to the
+ * source text in the first transformation.
+ *
+ * <p>This option will be ignored if specified together with option
+ * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>. It inhibits option
+ * <code>UBIDI_REMOVE_BIDI_CONTROLS</code> in calls to function
+ * <code>ubidi_writeReordered()</code> and it implies option
+ * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls to function
+ * <code>ubidi_writeReordered()</code> if the reordering mode is
+ * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.</p>
+ *
+ * @see ubidi_setReorderingMode
+ * @see ubidi_setReorderingOptions
+ * @stable ICU 3.6
+ */
+ UBIDI_OPTION_INSERT_MARKS = 1,
+
+ /**
+ * option bit for <code>ubidi_setReorderingOptions</code>:
+ * remove Bidi control characters
+ *
+ * <p>This option must be set or reset before calling
+ * <code>ubidi_setPara</code>.</p>
+ *
+ * <p>This option nullifies option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
+ * It inhibits option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls
+ * to function <code>ubidi_writeReordered()</code> and it implies option
+ * <code>#UBIDI_REMOVE_BIDI_CONTROLS</code> in calls to that function.</p>
+ *
+ * @see ubidi_setReorderingMode
+ * @see ubidi_setReorderingOptions
+ * @stable ICU 3.6
+ */
+ UBIDI_OPTION_REMOVE_CONTROLS = 2,
+
+ /**
+ * option bit for <code>ubidi_setReorderingOptions</code>:
+ * process the output as part of a stream to be continued
+ *
+ * <p>This option must be set or reset before calling
+ * <code>ubidi_setPara</code>.</p>
+ *
+ * <p>This option specifies that the caller is interested in processing large
+ * text object in parts.
+ * The results of the successive calls are expected to be concatenated by the
+ * caller. Only the call for the last part will have this option bit off.</p>
+ *
+ * <p>When this option bit is on, <code>ubidi_setPara()</code> may process
+ * less than the full source text in order to truncate the text at a meaningful
+ * boundary. The caller should call <code>ubidi_getProcessedLength()</code>
+ * immediately after calling <code>ubidi_setPara()</code> in order to
+ * determine how much of the source text has been processed.
+ * Source text beyond that length should be resubmitted in following calls to
+ * <code>ubidi_setPara</code>. The processed length may be less than
+ * the length of the source text if a character preceding the last character of
+ * the source text constitutes a reasonable boundary (like a block separator)
+ * for text to be continued.<br>
+ * If the last character of the source text constitutes a reasonable
+ * boundary, the whole text will be processed at once.<br>
+ * If nowhere in the source text there exists
+ * such a reasonable boundary, the processed length will be zero.<br>
+ * The caller should check for such an occurrence and do one of the following:
+ * <ul><li>submit a larger amount of text with a better chance to include
+ * a reasonable boundary.</li>
+ * <li>resubmit the same text after turning off option
+ * <code>UBIDI_OPTION_STREAMING</code>.</li></ul>
+ * In all cases, this option should be turned off before processing the last
+ * part of the text.</p>
+ *
+ * <p>When the <code>UBIDI_OPTION_STREAMING</code> option is used,
+ * it is recommended to call <code>ubidi_orderParagraphsLTR()</code> with
+ * argument <code>orderParagraphsLTR</code> set to <code>true</code> before
+ * calling <code>ubidi_setPara</code> so that later paragraphs may be
+ * concatenated to previous paragraphs on the right.</p>
+ *
+ * @see ubidi_setReorderingMode
+ * @see ubidi_setReorderingOptions
+ * @see ubidi_getProcessedLength
+ * @see ubidi_orderParagraphsLTR
+ * @stable ICU 3.6
+ */
+ UBIDI_OPTION_STREAMING = 4
+} UBiDiReorderingOption;
+
+/**
+ * Specify which of the reordering options
+ * should be applied during Bidi transformations.
+ *
+ * @param pBiDi is a <code>UBiDi</code> object.
+ * @param reorderingOptions is a combination of zero or more of the following
+ * options:
+ * <code>#UBIDI_OPTION_DEFAULT</code>, <code>#UBIDI_OPTION_INSERT_MARKS</code>,
+ * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>, <code>#UBIDI_OPTION_STREAMING</code>.
+ *
+ * @see ubidi_getReorderingOptions
+ * @stable ICU 3.6
+ */
+U_CAPI void U_EXPORT2
+ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions);
+
+/**
+ * What are the reordering options applied to a given Bidi object?
+ *
+ * @param pBiDi is a <code>UBiDi</code> object.
+ * @return the current reordering options of the Bidi object
+ * @see ubidi_setReorderingOptions
+ * @stable ICU 3.6
+ */
+U_CAPI uint32_t U_EXPORT2
+ubidi_getReorderingOptions(UBiDi *pBiDi);
+
+/**
+ * Set the context before a call to ubidi_setPara().<p>
+ *
+ * ubidi_setPara() computes the left-right directionality for a given piece
+ * of text which is supplied as one of its arguments. Sometimes this piece
+ * of text (the "main text") should be considered in context, because text
+ * appearing before ("prologue") and/or after ("epilogue") the main text
+ * may affect the result of this computation.<p>
+ *
+ * This function specifies the prologue and/or the epilogue for the next
+ * call to ubidi_setPara(). The characters specified as prologue and
+ * epilogue should not be modified by the calling program until the call
+ * to ubidi_setPara() has returned. If successive calls to ubidi_setPara()
+ * all need specification of a context, ubidi_setContext() must be called
+ * before each call to ubidi_setPara(). In other words, a context is not
+ * "remembered" after the following successful call to ubidi_setPara().<p>
+ *
+ * If a call to ubidi_setPara() specifies UBIDI_DEFAULT_LTR or
+ * UBIDI_DEFAULT_RTL as paraLevel and is preceded by a call to
+ * ubidi_setContext() which specifies a prologue, the paragraph level will
+ * be computed taking in consideration the text in the prologue.<p>
+ *
+ * When ubidi_setPara() is called without a previous call to
+ * ubidi_setContext, the main text is handled as if preceded and followed
+ * by strong directional characters at the current paragraph level.
+ * Calling ubidi_setContext() with specification of a prologue will change
+ * this behavior by handling the main text as if preceded by the last
+ * strong character appearing in the prologue, if any.
+ * Calling ubidi_setContext() with specification of an epilogue will change
+ * the behavior of ubidi_setPara() by handling the main text as if followed
+ * by the first strong character or digit appearing in the epilogue, if any.<p>
+ *
+ * Note 1: if <code>ubidi_setContext</code> is called repeatedly without
+ * calling <code>ubidi_setPara</code>, the earlier calls have no effect,
+ * only the last call will be remembered for the next call to
+ * <code>ubidi_setPara</code>.<p>
+ *
+ * Note 2: calling <code>ubidi_setContext(pBiDi, NULL, 0, NULL, 0, &errorCode)</code>
+ * cancels any previous setting of non-empty prologue or epilogue.
+ * The next call to <code>ubidi_setPara()</code> will process no
+ * prologue or epilogue.<p>
+ *
+ * Note 3: users must be aware that even after setting the context
+ * before a call to ubidi_setPara() to perform e.g. a logical to visual
+ * transformation, the resulting string may not be identical to what it
+ * would have been if all the text, including prologue and epilogue, had
+ * been processed together.<br>
+ * Example (upper case letters represent RTL characters):<br>
+ * &nbsp;&nbsp;prologue = "<code>abc DE</code>"<br>
+ * &nbsp;&nbsp;epilogue = none<br>
+ * &nbsp;&nbsp;main text = "<code>FGH xyz</code>"<br>
+ * &nbsp;&nbsp;paraLevel = UBIDI_LTR<br>
+ * &nbsp;&nbsp;display without prologue = "<code>HGF xyz</code>"
+ * ("HGF" is adjacent to "xyz")<br>
+ * &nbsp;&nbsp;display with prologue = "<code>abc HGFED xyz</code>"
+ * ("HGF" is not adjacent to "xyz")<br>
+ *
+ * @param pBiDi is a paragraph <code>UBiDi</code> object.
+ *
+ * @param prologue is a pointer to the text which precedes the text that
+ * will be specified in a coming call to ubidi_setPara().
+ * If there is no prologue to consider, then <code>proLength</code>
+ * must be zero and this pointer can be NULL.
+ *
+ * @param proLength is the length of the prologue; if <code>proLength==-1</code>
+ * then the prologue must be zero-terminated.
+ * Otherwise proLength must be >= 0. If <code>proLength==0</code>, it means
+ * that there is no prologue to consider.
+ *
+ * @param epilogue is a pointer to the text which follows the text that
+ * will be specified in a coming call to ubidi_setPara().
+ * If there is no epilogue to consider, then <code>epiLength</code>
+ * must be zero and this pointer can be NULL.
+ *
+ * @param epiLength is the length of the epilogue; if <code>epiLength==-1</code>
+ * then the epilogue must be zero-terminated.
+ * Otherwise epiLength must be >= 0. If <code>epiLength==0</code>, it means
+ * that there is no epilogue to consider.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @see ubidi_setPara
+ * @stable ICU 4.8
+ */
+U_CAPI void U_EXPORT2
+ubidi_setContext(UBiDi *pBiDi,
+ const UChar *prologue, int32_t proLength,
+ const UChar *epilogue, int32_t epiLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Perform the Unicode Bidi algorithm. It is defined in the
+ * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
+ * version 13,
+ * also described in The Unicode Standard, Version 4.0 .<p>
+ *
+ * This function takes a piece of plain text containing one or more paragraphs,
+ * with or without externally specified embedding levels from <i>styled</i>
+ * text and computes the left-right-directionality of each character.<p>
+ *
+ * If the entire text is all of the same directionality, then
+ * the function may not perform all the steps described by the algorithm,
+ * i.e., some levels may not be the same as if all steps were performed.
+ * This is not relevant for unidirectional text.<br>
+ * For example, in pure LTR text with numbers the numbers would get
+ * a resolved level of 2 higher than the surrounding text according to
+ * the algorithm. This implementation may set all resolved levels to
+ * the same value in such a case.<p>
+ *
+ * The text can be composed of multiple paragraphs. Occurrence of a block
+ * separator in the text terminates a paragraph, and whatever comes next starts
+ * a new paragraph. The exception to this rule is when a Carriage Return (CR)
+ * is followed by a Line Feed (LF). Both CR and LF are block separators, but
+ * in that case, the pair of characters is considered as terminating the
+ * preceding paragraph, and a new paragraph will be started by a character
+ * coming after the LF.
+ *
+ * @param pBiDi A <code>UBiDi</code> object allocated with <code>ubidi_open()</code>
+ * which will be set to contain the reordering information,
+ * especially the resolved levels for all the characters in <code>text</code>.
+ *
+ * @param text is a pointer to the text that the Bidi algorithm will be performed on.
+ * This pointer is stored in the UBiDi object and can be retrieved
+ * with <code>ubidi_getText()</code>.<br>
+ * <strong>Note:</strong> the text must be (at least) <code>length</code> long.
+ *
+ * @param length is the length of the text; if <code>length==-1</code> then
+ * the text must be zero-terminated.
+ *
+ * @param paraLevel specifies the default level for the text;
+ * it is typically 0 (LTR) or 1 (RTL).
+ * If the function shall determine the paragraph level from the text,
+ * then <code>paraLevel</code> can be set to
+ * either <code>#UBIDI_DEFAULT_LTR</code>
+ * or <code>#UBIDI_DEFAULT_RTL</code>; if the text contains multiple
+ * paragraphs, the paragraph level shall be determined separately for
+ * each paragraph; if a paragraph does not include any strongly typed
+ * character, then the desired default is used (0 for LTR or 1 for RTL).
+ * Any other value between 0 and <code>#UBIDI_MAX_EXPLICIT_LEVEL</code>
+ * is also valid, with odd levels indicating RTL.
+ *
+ * @param embeddingLevels (in) may be used to preset the embedding and override levels,
+ * ignoring characters like LRE and PDF in the text.
+ * A level overrides the directional property of its corresponding
+ * (same index) character if the level has the
+ * <code>#UBIDI_LEVEL_OVERRIDE</code> bit set.<br><br>
+ * Aside from that bit, it must be
+ * <code>paraLevel<=embeddingLevels[]<=UBIDI_MAX_EXPLICIT_LEVEL</code>,
+ * except that level 0 is always allowed.
+ * Level 0 for a paragraph separator prevents reordering of paragraphs;
+ * this only works reliably if <code>#UBIDI_LEVEL_OVERRIDE</code>
+ * is also set for paragraph separators.
+ * Level 0 for other characters is treated as a wildcard
+ * and is lifted up to the resolved level of the surrounding paragraph.<br><br>
+ * <strong>Caution: </strong>A copy of this pointer, not of the levels,
+ * will be stored in the <code>UBiDi</code> object;
+ * the <code>embeddingLevels</code> array must not be
+ * deallocated before the <code>UBiDi</code> structure is destroyed or reused,
+ * and the <code>embeddingLevels</code>
+ * should not be modified to avoid unexpected results on subsequent Bidi operations.
+ * However, the <code>ubidi_setPara()</code> and
+ * <code>ubidi_setLine()</code> functions may modify some or all of the levels.<br><br>
+ * After the <code>UBiDi</code> object is reused or destroyed, the caller
+ * must take care of the deallocation of the <code>embeddingLevels</code> array.<br><br>
+ * <strong>Note:</strong> the <code>embeddingLevels</code> array must be
+ * at least <code>length</code> long.
+ * This pointer can be <code>NULL</code> if this
+ * value is not necessary.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
+ UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
+ UErrorCode *pErrorCode);
+
+/**
+ * <code>ubidi_setLine()</code> sets a <code>UBiDi</code> to
+ * contain the reordering information, especially the resolved levels,
+ * for all the characters in a line of text. This line of text is
+ * specified by referring to a <code>UBiDi</code> object representing
+ * this information for a piece of text containing one or more paragraphs,
+ * and by specifying a range of indexes in this text.<p>
+ * In the new line object, the indexes will range from 0 to <code>limit-start-1</code>.<p>
+ *
+ * This is used after calling <code>ubidi_setPara()</code>
+ * for a piece of text, and after line-breaking on that text.
+ * It is not necessary if each paragraph is treated as a single line.<p>
+ *
+ * After line-breaking, rules (L1) and (L2) for the treatment of
+ * trailing WS and for reordering are performed on
+ * a <code>UBiDi</code> object that represents a line.<p>
+ *
+ * <strong>Important: </strong><code>pLineBiDi</code> shares data with
+ * <code>pParaBiDi</code>.
+ * You must destroy or reuse <code>pLineBiDi</code> before <code>pParaBiDi</code>.
+ * In other words, you must destroy or reuse the <code>UBiDi</code> object for a line
+ * before the object for its parent paragraph.<p>
+ *
+ * The text pointer that was stored in <code>pParaBiDi</code> is also copied,
+ * and <code>start</code> is added to it so that it points to the beginning of the
+ * line for this object.
+ *
+ * @param pParaBiDi is the parent paragraph object. It must have been set
+ * by a successful call to ubidi_setPara.
+ *
+ * @param start is the line's first index into the text.
+ *
+ * @param limit is just behind the line's last index into the text
+ * (its last index +1).<br>
+ * It must be <code>0<=start<limit<=</code>containing paragraph limit.
+ * If the specified line crosses a paragraph boundary, the function
+ * will terminate with error code U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * @param pLineBiDi is the object that will now represent a line of the text.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @see ubidi_setPara
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_setLine(const UBiDi *pParaBiDi,
+ int32_t start, int32_t limit,
+ UBiDi *pLineBiDi,
+ UErrorCode *pErrorCode);
+
+/**
+ * Get the directionality of the text.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @return a value of <code>UBIDI_LTR</code>, <code>UBIDI_RTL</code>
+ * or <code>UBIDI_MIXED</code>
+ * that indicates if the entire text
+ * represented by this object is unidirectional,
+ * and which direction, or if it is mixed-directional.
+ * Note - The value <code>UBIDI_NEUTRAL</code> is never returned from this method.
+ *
+ * @see UBiDiDirection
+ * @stable ICU 2.0
+ */
+U_CAPI UBiDiDirection U_EXPORT2
+ubidi_getDirection(const UBiDi *pBiDi);
+
+/**
+ * Gets the base direction of the text provided according
+ * to the Unicode Bidirectional Algorithm. The base direction
+ * is derived from the first character in the string with bidirectional
+ * character type L, R, or AL. If the first such character has type L,
+ * <code>UBIDI_LTR</code> is returned. If the first such character has
+ * type R or AL, <code>UBIDI_RTL</code> is returned. If the string does
+ * not contain any character of these types, then
+ * <code>UBIDI_NEUTRAL</code> is returned.
+ *
+ * This is a lightweight function for use when only the base direction
+ * is needed and no further bidi processing of the text is needed.
+ *
+ * @param text is a pointer to the text whose base
+ * direction is needed.
+ * Note: the text must be (at least) @c length long.
+ *
+ * @param length is the length of the text;
+ * if <code>length==-1</code> then the text
+ * must be zero-terminated.
+ *
+ * @return <code>UBIDI_LTR</code>, <code>UBIDI_RTL</code>,
+ * <code>UBIDI_NEUTRAL</code>
+ *
+ * @see UBiDiDirection
+ * @stable ICU 4.6
+ */
+U_CAPI UBiDiDirection U_EXPORT2
+ubidi_getBaseDirection(const UChar *text, int32_t length );
+
+/**
+ * Get the pointer to the text.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @return The pointer to the text that the UBiDi object was created for.
+ *
+ * @see ubidi_setPara
+ * @see ubidi_setLine
+ * @stable ICU 2.0
+ */
+U_CAPI const UChar * U_EXPORT2
+ubidi_getText(const UBiDi *pBiDi);
+
+/**
+ * Get the length of the text.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @return The length of the text that the UBiDi object was created for.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_getLength(const UBiDi *pBiDi);
+
+/**
+ * Get the paragraph level of the text.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @return The paragraph level. If there are multiple paragraphs, their
+ * level may vary if the required paraLevel is UBIDI_DEFAULT_LTR or
+ * UBIDI_DEFAULT_RTL. In that case, the level of the first paragraph
+ * is returned.
+ *
+ * @see UBiDiLevel
+ * @see ubidi_getParagraph
+ * @see ubidi_getParagraphByIndex
+ * @stable ICU 2.0
+ */
+U_CAPI UBiDiLevel U_EXPORT2
+ubidi_getParaLevel(const UBiDi *pBiDi);
+
+/**
+ * Get the number of paragraphs.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @return The number of paragraphs.
+ * @stable ICU 3.4
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_countParagraphs(UBiDi *pBiDi);
+
+/**
+ * Get a paragraph, given a position within the text.
+ * This function returns information about a paragraph.<br>
+ * Note: if the paragraph index is known, it is more efficient to
+ * retrieve the paragraph information using ubidi_getParagraphByIndex().<p>
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @param charIndex is the index of a character within the text, in the
+ * range <code>[0..ubidi_getProcessedLength(pBiDi)-1]</code>.
+ *
+ * @param pParaStart will receive the index of the first character of the
+ * paragraph in the text.
+ * This pointer can be <code>NULL</code> if this
+ * value is not necessary.
+ *
+ * @param pParaLimit will receive the limit of the paragraph.
+ * The l-value that you point to here may be the
+ * same expression (variable) as the one for
+ * <code>charIndex</code>.
+ * This pointer can be <code>NULL</code> if this
+ * value is not necessary.
+ *
+ * @param pParaLevel will receive the level of the paragraph.
+ * This pointer can be <code>NULL</code> if this
+ * value is not necessary.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The index of the paragraph containing the specified position.
+ *
+ * @see ubidi_getProcessedLength
+ * @stable ICU 3.4
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, int32_t *pParaStart,
+ int32_t *pParaLimit, UBiDiLevel *pParaLevel,
+ UErrorCode *pErrorCode);
+
+/**
+ * Get a paragraph, given the index of this paragraph.
+ *
+ * This function returns information about a paragraph.<p>
+ *
+ * @param pBiDi is the paragraph <code>UBiDi</code> object.
+ *
+ * @param paraIndex is the number of the paragraph, in the
+ * range <code>[0..ubidi_countParagraphs(pBiDi)-1]</code>.
+ *
+ * @param pParaStart will receive the index of the first character of the
+ * paragraph in the text.
+ * This pointer can be <code>NULL</code> if this
+ * value is not necessary.
+ *
+ * @param pParaLimit will receive the limit of the paragraph.
+ * This pointer can be <code>NULL</code> if this
+ * value is not necessary.
+ *
+ * @param pParaLevel will receive the level of the paragraph.
+ * This pointer can be <code>NULL</code> if this
+ * value is not necessary.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @stable ICU 3.4
+ */
+U_CAPI void U_EXPORT2
+ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
+ int32_t *pParaStart, int32_t *pParaLimit,
+ UBiDiLevel *pParaLevel, UErrorCode *pErrorCode);
+
+/**
+ * Get the level for one character.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @param charIndex the index of a character. It must be in the range
+ * [0..ubidi_getProcessedLength(pBiDi)].
+ *
+ * @return The level for the character at charIndex (0 if charIndex is not
+ * in the valid range).
+ *
+ * @see UBiDiLevel
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_CAPI UBiDiLevel U_EXPORT2
+ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex);
+
+/**
+ * Get an array of levels for each character.<p>
+ *
+ * Note that this function may allocate memory under some
+ * circumstances, unlike <code>ubidi_getLevelAt()</code>.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object, whose
+ * text length must be strictly positive.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The levels array for the text,
+ * or <code>NULL</code> if an error occurs.
+ *
+ * @see UBiDiLevel
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_CAPI const UBiDiLevel * U_EXPORT2
+ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode);
+
+/**
+ * Get a logical run.
+ * This function returns information about a run and is used
+ * to retrieve runs in logical order.<p>
+ * This is especially useful for line-breaking on a paragraph.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @param logicalPosition is a logical position within the source text.
+ *
+ * @param pLogicalLimit will receive the limit of the corresponding run.
+ * The l-value that you point to here may be the
+ * same expression (variable) as the one for
+ * <code>logicalPosition</code>.
+ * This pointer can be <code>NULL</code> if this
+ * value is not necessary.
+ *
+ * @param pLevel will receive the level of the corresponding run.
+ * This pointer can be <code>NULL</code> if this
+ * value is not necessary.
+ *
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition,
+ int32_t *pLogicalLimit, UBiDiLevel *pLevel);
+
+/**
+ * Get the number of runs.
+ * This function may invoke the actual reordering on the
+ * <code>UBiDi</code> object, after <code>ubidi_setPara()</code>
+ * may have resolved only the levels of the text. Therefore,
+ * <code>ubidi_countRuns()</code> may have to allocate memory,
+ * and may fail doing so.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The number of runs.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode);
+
+/**
+ * Get one run's logical start, length, and directionality,
+ * which can be 0 for LTR or 1 for RTL.
+ * In an RTL run, the character at the logical start is
+ * visually on the right of the displayed run.
+ * The length is the number of characters in the run.<p>
+ * <code>ubidi_countRuns()</code> should be called
+ * before the runs are retrieved.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @param runIndex is the number of the run in visual order, in the
+ * range <code>[0..ubidi_countRuns(pBiDi)-1]</code>.
+ *
+ * @param pLogicalStart is the first logical character index in the text.
+ * The pointer may be <code>NULL</code> if this index is not needed.
+ *
+ * @param pLength is the number of characters (at least one) in the run.
+ * The pointer may be <code>NULL</code> if this is not needed.
+ *
+ * @return the directionality of the run,
+ * <code>UBIDI_LTR==0</code> or <code>UBIDI_RTL==1</code>,
+ * never <code>UBIDI_MIXED</code>,
+ * never <code>UBIDI_NEUTRAL</code>.
+ *
+ * @see ubidi_countRuns
+ *
+ * Example:
+ * <pre>
+ * \code
+ * int32_t i, count=ubidi_countRuns(pBiDi),
+ * logicalStart, visualIndex=0, length;
+ * for(i=0; i<count; ++i) {
+ * if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, i, &logicalStart, &length)) {
+ * do { // LTR
+ * show_char(text[logicalStart++], visualIndex++);
+ * } while(--length>0);
+ * } else {
+ * logicalStart+=length; // logicalLimit
+ * do { // RTL
+ * show_char(text[--logicalStart], visualIndex++);
+ * } while(--length>0);
+ * }
+ * }
+ *\endcode
+ * </pre>
+ *
+ * Note that in right-to-left runs, code like this places
+ * second surrogates before first ones (which is generally a bad idea)
+ * and combining characters before base characters.
+ * <p>
+ * Use of <code>ubidi_writeReordered()</code>, optionally with the
+ * <code>#UBIDI_KEEP_BASE_COMBINING</code> option, can be considered in order
+ * to avoid these issues.
+ * @stable ICU 2.0
+ */
+U_CAPI UBiDiDirection U_EXPORT2
+ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex,
+ int32_t *pLogicalStart, int32_t *pLength);
+
+/**
+ * Get the visual position from a logical text position.
+ * If such a mapping is used many times on the same
+ * <code>UBiDi</code> object, then calling
+ * <code>ubidi_getLogicalMap()</code> is more efficient.<p>
+ *
+ * The value returned may be <code>#UBIDI_MAP_NOWHERE</code> if there is no
+ * visual position because the corresponding text character is a Bidi control
+ * removed from output by the option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>.
+ * <p>
+ * When the visual output is altered by using options of
+ * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
+ * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
+ * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the visual position returned may not
+ * be correct. It is advised to use, when possible, reordering options
+ * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
+ * <p>
+ * Note that in right-to-left runs, this mapping places
+ * second surrogates before first ones (which is generally a bad idea)
+ * and combining characters before base characters.
+ * Use of <code>ubidi_writeReordered()</code>, optionally with the
+ * <code>#UBIDI_KEEP_BASE_COMBINING</code> option can be considered instead
+ * of using the mapping, in order to avoid these issues.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @param logicalIndex is the index of a character in the text.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The visual position of this character.
+ *
+ * @see ubidi_getLogicalMap
+ * @see ubidi_getLogicalIndex
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode);
+
+/**
+ * Get the logical text position from a visual position.
+ * If such a mapping is used many times on the same
+ * <code>UBiDi</code> object, then calling
+ * <code>ubidi_getVisualMap()</code> is more efficient.<p>
+ *
+ * The value returned may be <code>#UBIDI_MAP_NOWHERE</code> if there is no
+ * logical position because the corresponding text character is a Bidi mark
+ * inserted in the output by option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
+ * <p>
+ * This is the inverse function to <code>ubidi_getVisualIndex()</code>.
+ * <p>
+ * When the visual output is altered by using options of
+ * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
+ * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
+ * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the logical position returned may not
+ * be correct. It is advised to use, when possible, reordering options
+ * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @param visualIndex is the visual position of a character.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The index of this character in the text.
+ *
+ * @see ubidi_getVisualMap
+ * @see ubidi_getVisualIndex
+ * @see ubidi_getResultLength
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode);
+
+/**
+ * Get a logical-to-visual index map (array) for the characters in the UBiDi
+ * (paragraph or line) object.
+ * <p>
+ * Some values in the map may be <code>#UBIDI_MAP_NOWHERE</code> if the
+ * corresponding text characters are Bidi controls removed from the visual
+ * output by the option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>.
+ * <p>
+ * When the visual output is altered by using options of
+ * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
+ * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
+ * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the visual positions returned may not
+ * be correct. It is advised to use, when possible, reordering options
+ * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
+ * <p>
+ * Note that in right-to-left runs, this mapping places
+ * second surrogates before first ones (which is generally a bad idea)
+ * and combining characters before base characters.
+ * Use of <code>ubidi_writeReordered()</code>, optionally with the
+ * <code>#UBIDI_KEEP_BASE_COMBINING</code> option can be considered instead
+ * of using the mapping, in order to avoid these issues.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @param indexMap is a pointer to an array of <code>ubidi_getProcessedLength()</code>
+ * indexes which will reflect the reordering of the characters.
+ * If option <code>#UBIDI_OPTION_INSERT_MARKS</code> is set, the number
+ * of elements allocated in <code>indexMap</code> must be no less than
+ * <code>ubidi_getResultLength()</code>.
+ * The array does not need to be initialized.<br><br>
+ * The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @see ubidi_getVisualMap
+ * @see ubidi_getVisualIndex
+ * @see ubidi_getProcessedLength
+ * @see ubidi_getResultLength
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode);
+
+/**
+ * Get a visual-to-logical index map (array) for the characters in the UBiDi
+ * (paragraph or line) object.
+ * <p>
+ * Some values in the map may be <code>#UBIDI_MAP_NOWHERE</code> if the
+ * corresponding text characters are Bidi marks inserted in the visual output
+ * by the option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
+ * <p>
+ * When the visual output is altered by using options of
+ * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
+ * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
+ * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the logical positions returned may not
+ * be correct. It is advised to use, when possible, reordering options
+ * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @param indexMap is a pointer to an array of <code>ubidi_getResultLength()</code>
+ * indexes which will reflect the reordering of the characters.
+ * If option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> is set, the number
+ * of elements allocated in <code>indexMap</code> must be no less than
+ * <code>ubidi_getProcessedLength()</code>.
+ * The array does not need to be initialized.<br><br>
+ * The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @see ubidi_getLogicalMap
+ * @see ubidi_getLogicalIndex
+ * @see ubidi_getProcessedLength
+ * @see ubidi_getResultLength
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode);
+
+/**
+ * This is a convenience function that does not use a UBiDi object.
+ * It is intended to be used for when an application has determined the levels
+ * of objects (character sequences) and just needs to have them reordered (L2).
+ * This is equivalent to using <code>ubidi_getLogicalMap()</code> on a
+ * <code>UBiDi</code> object.
+ *
+ * @param levels is an array with <code>length</code> levels that have been determined by
+ * the application.
+ *
+ * @param length is the number of levels in the array, or, semantically,
+ * the number of objects to be reordered.
+ * It must be <code>length>0</code>.
+ *
+ * @param indexMap is a pointer to an array of <code>length</code>
+ * indexes which will reflect the reordering of the characters.
+ * The array does not need to be initialized.<p>
+ * The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap);
+
+/**
+ * This is a convenience function that does not use a UBiDi object.
+ * It is intended to be used for when an application has determined the levels
+ * of objects (character sequences) and just needs to have them reordered (L2).
+ * This is equivalent to using <code>ubidi_getVisualMap()</code> on a
+ * <code>UBiDi</code> object.
+ *
+ * @param levels is an array with <code>length</code> levels that have been determined by
+ * the application.
+ *
+ * @param length is the number of levels in the array, or, semantically,
+ * the number of objects to be reordered.
+ * It must be <code>length>0</code>.
+ *
+ * @param indexMap is a pointer to an array of <code>length</code>
+ * indexes which will reflect the reordering of the characters.
+ * The array does not need to be initialized.<p>
+ * The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap);
+
+/**
+ * Invert an index map.
+ * The index mapping of the first map is inverted and written to
+ * the second one.
+ *
+ * @param srcMap is an array with <code>length</code> elements
+ * which defines the original mapping from a source array containing
+ * <code>length</code> elements to a destination array.
+ * Some elements of the source array may have no mapping in the
+ * destination array. In that case, their value will be
+ * the special value <code>UBIDI_MAP_NOWHERE</code>.
+ * All elements must be >=0 or equal to <code>UBIDI_MAP_NOWHERE</code>.
+ * Some elements may have a value >= <code>length</code>, if the
+ * destination array has more elements than the source array.
+ * There must be no duplicate indexes (two or more elements with the
+ * same value except <code>UBIDI_MAP_NOWHERE</code>).
+ *
+ * @param destMap is an array with a number of elements equal to 1 + the highest
+ * value in <code>srcMap</code>.
+ * <code>destMap</code> will be filled with the inverse mapping.
+ * If element with index i in <code>srcMap</code> has a value k different
+ * from <code>UBIDI_MAP_NOWHERE</code>, this means that element i of
+ * the source array maps to element k in the destination array.
+ * The inverse map will have value i in its k-th element.
+ * For all elements of the destination array which do not map to
+ * an element in the source array, the corresponding element in the
+ * inverse map will have a value equal to <code>UBIDI_MAP_NOWHERE</code>.
+ *
+ * @param length is the length of each array.
+ * @see UBIDI_MAP_NOWHERE
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length);
+
+/** option flags for ubidi_writeReordered() */
+
+/**
+ * option bit for ubidi_writeReordered():
+ * keep combining characters after their base characters in RTL runs
+ *
+ * @see ubidi_writeReordered
+ * @stable ICU 2.0
+ */
+#define UBIDI_KEEP_BASE_COMBINING 1
+
+/**
+ * option bit for ubidi_writeReordered():
+ * replace characters with the "mirrored" property in RTL runs
+ * by their mirror-image mappings
+ *
+ * @see ubidi_writeReordered
+ * @stable ICU 2.0
+ */
+#define UBIDI_DO_MIRRORING 2
+
+/**
+ * option bit for ubidi_writeReordered():
+ * surround the run with LRMs if necessary;
+ * this is part of the approximate "inverse Bidi" algorithm
+ *
+ * <p>This option does not imply corresponding adjustment of the index
+ * mappings.</p>
+ *
+ * @see ubidi_setInverse
+ * @see ubidi_writeReordered
+ * @stable ICU 2.0
+ */
+#define UBIDI_INSERT_LRM_FOR_NUMERIC 4
+
+/**
+ * option bit for ubidi_writeReordered():
+ * remove Bidi control characters
+ * (this does not affect #UBIDI_INSERT_LRM_FOR_NUMERIC)
+ *
+ * <p>This option does not imply corresponding adjustment of the index
+ * mappings.</p>
+ *
+ * @see ubidi_writeReordered
+ * @stable ICU 2.0
+ */
+#define UBIDI_REMOVE_BIDI_CONTROLS 8
+
+/**
+ * option bit for ubidi_writeReordered():
+ * write the output in reverse order
+ *
+ * <p>This has the same effect as calling <code>ubidi_writeReordered()</code>
+ * first without this option, and then calling
+ * <code>ubidi_writeReverse()</code> without mirroring.
+ * Doing this in the same step is faster and avoids a temporary buffer.
+ * An example for using this option is output to a character terminal that
+ * is designed for RTL scripts and stores text in reverse order.</p>
+ *
+ * @see ubidi_writeReordered
+ * @stable ICU 2.0
+ */
+#define UBIDI_OUTPUT_REVERSE 16
+
+/**
+ * Get the length of the source text processed by the last call to
+ * <code>ubidi_setPara()</code>. This length may be different from the length
+ * of the source text if option <code>#UBIDI_OPTION_STREAMING</code>
+ * has been set.
+ * <br>
+ * Note that whenever the length of the text affects the execution or the
+ * result of a function, it is the processed length which must be considered,
+ * except for <code>ubidi_setPara</code> (which receives unprocessed source
+ * text) and <code>ubidi_getLength</code> (which returns the original length
+ * of the source text).<br>
+ * In particular, the processed length is the one to consider in the following
+ * cases:
+ * <ul>
+ * <li>maximum value of the <code>limit</code> argument of
+ * <code>ubidi_setLine</code></li>
+ * <li>maximum value of the <code>charIndex</code> argument of
+ * <code>ubidi_getParagraph</code></li>
+ * <li>maximum value of the <code>charIndex</code> argument of
+ * <code>ubidi_getLevelAt</code></li>
+ * <li>number of elements in the array returned by <code>ubidi_getLevels</code></li>
+ * <li>maximum value of the <code>logicalStart</code> argument of
+ * <code>ubidi_getLogicalRun</code></li>
+ * <li>maximum value of the <code>logicalIndex</code> argument of
+ * <code>ubidi_getVisualIndex</code></li>
+ * <li>number of elements filled in the <code>*indexMap</code> argument of
+ * <code>ubidi_getLogicalMap</code></li>
+ * <li>length of text processed by <code>ubidi_writeReordered</code></li>
+ * </ul>
+ *
+ * @param pBiDi is the paragraph <code>UBiDi</code> object.
+ *
+ * @return The length of the part of the source text processed by
+ * the last call to <code>ubidi_setPara</code>.
+ * @see ubidi_setPara
+ * @see UBIDI_OPTION_STREAMING
+ * @stable ICU 3.6
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_getProcessedLength(const UBiDi *pBiDi);
+
+/**
+ * Get the length of the reordered text resulting from the last call to
+ * <code>ubidi_setPara()</code>. This length may be different from the length
+ * of the source text if option <code>#UBIDI_OPTION_INSERT_MARKS</code>
+ * or option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> has been set.
+ * <br>
+ * This resulting length is the one to consider in the following cases:
+ * <ul>
+ * <li>maximum value of the <code>visualIndex</code> argument of
+ * <code>ubidi_getLogicalIndex</code></li>
+ * <li>number of elements of the <code>*indexMap</code> argument of
+ * <code>ubidi_getVisualMap</code></li>
+ * </ul>
+ * Note that this length stays identical to the source text length if
+ * Bidi marks are inserted or removed using option bits of
+ * <code>ubidi_writeReordered</code>, or if option
+ * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> has been set.
+ *
+ * @param pBiDi is the paragraph <code>UBiDi</code> object.
+ *
+ * @return The length of the reordered text resulting from
+ * the last call to <code>ubidi_setPara</code>.
+ * @see ubidi_setPara
+ * @see UBIDI_OPTION_INSERT_MARKS
+ * @see UBIDI_OPTION_REMOVE_CONTROLS
+ * @stable ICU 3.6
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_getResultLength(const UBiDi *pBiDi);
+
+U_CDECL_BEGIN
+
+#ifndef U_HIDE_DEPRECATED_API
+/**
+ * Value returned by <code>UBiDiClassCallback</code> callbacks when
+ * there is no need to override the standard Bidi class for a given code point.
+ *
+ * This constant is deprecated; use u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1 instead.
+ *
+ * @see UBiDiClassCallback
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+#define U_BIDI_CLASS_DEFAULT U_CHAR_DIRECTION_COUNT
+#endif // U_HIDE_DEPRECATED_API
+
+/**
+ * Callback type declaration for overriding default Bidi class values with
+ * custom ones.
+ * <p>Usually, the function pointer will be propagated to a <code>UBiDi</code>
+ * object by calling the <code>ubidi_setClassCallback()</code> function;
+ * then the callback will be invoked by the UBA implementation any time the
+ * class of a character is to be determined.</p>
+ *
+ * @param context is a pointer to the callback private data.
+ *
+ * @param c is the code point to get a Bidi class for.
+ *
+ * @return The directional property / Bidi class for the given code point
+ * <code>c</code> if the default class has been overridden, or
+ * <code>u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1</code>
+ * if the standard Bidi class value for <code>c</code> is to be used.
+ * @see ubidi_setClassCallback
+ * @see ubidi_getClassCallback
+ * @stable ICU 3.6
+ */
+typedef UCharDirection U_CALLCONV
+UBiDiClassCallback(const void *context, UChar32 c);
+
+U_CDECL_END
+
+/**
+ * Retrieve the Bidi class for a given code point.
+ * <p>If a <code>#UBiDiClassCallback</code> callback is defined and returns a
+ * value other than <code>u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1</code>,
+ * that value is used; otherwise the default class determination mechanism is invoked.</p>
+ *
+ * @param pBiDi is the paragraph <code>UBiDi</code> object.
+ *
+ * @param c is the code point whose Bidi class must be retrieved.
+ *
+ * @return The Bidi class for character <code>c</code> based
+ * on the given <code>pBiDi</code> instance.
+ * @see UBiDiClassCallback
+ * @stable ICU 3.6
+ */
+U_CAPI UCharDirection U_EXPORT2
+ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c);
+
+/**
+ * Set the callback function and callback data used by the UBA
+ * implementation for Bidi class determination.
+ * <p>This may be useful for assigning Bidi classes to PUA characters, or
+ * for special application needs. For instance, an application may want to
+ * handle all spaces like L or R characters (according to the base direction)
+ * when creating the visual ordering of logical lines which are part of a report
+ * organized in columns: there should not be interaction between adjacent
+ * cells.<p>
+ *
+ * @param pBiDi is the paragraph <code>UBiDi</code> object.
+ *
+ * @param newFn is the new callback function pointer.
+ *
+ * @param newContext is the new callback context pointer. This can be NULL.
+ *
+ * @param oldFn fillin: Returns the old callback function pointer. This can be
+ * NULL.
+ *
+ * @param oldContext fillin: Returns the old callback's context. This can be
+ * NULL.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @see ubidi_getClassCallback
+ * @stable ICU 3.6
+ */
+U_CAPI void U_EXPORT2
+ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
+ const void *newContext, UBiDiClassCallback **oldFn,
+ const void **oldContext, UErrorCode *pErrorCode);
+
+/**
+ * Get the current callback function used for Bidi class determination.
+ *
+ * @param pBiDi is the paragraph <code>UBiDi</code> object.
+ *
+ * @param fn fillin: Returns the callback function pointer.
+ *
+ * @param context fillin: Returns the callback's private context.
+ *
+ * @see ubidi_setClassCallback
+ * @stable ICU 3.6
+ */
+U_CAPI void U_EXPORT2
+ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context);
+
+/**
+ * Take a <code>UBiDi</code> object containing the reordering
+ * information for a piece of text (one or more paragraphs) set by
+ * <code>ubidi_setPara()</code> or for a line of text set by
+ * <code>ubidi_setLine()</code> and write a reordered string to the
+ * destination buffer.
+ *
+ * This function preserves the integrity of characters with multiple
+ * code units and (optionally) combining characters.
+ * Characters in RTL runs can be replaced by mirror-image characters
+ * in the destination buffer. Note that "real" mirroring has
+ * to be done in a rendering engine by glyph selection
+ * and that for many "mirrored" characters there are no
+ * Unicode characters as mirror-image equivalents.
+ * There are also options to insert or remove Bidi control
+ * characters; see the description of the <code>destSize</code>
+ * and <code>options</code> parameters and of the option bit flags.
+ *
+ * @param pBiDi A pointer to a <code>UBiDi</code> object that
+ * is set by <code>ubidi_setPara()</code> or
+ * <code>ubidi_setLine()</code> and contains the reordering
+ * information for the text that it was defined for,
+ * as well as a pointer to that text.<br><br>
+ * The text was aliased (only the pointer was stored
+ * without copying the contents) and must not have been modified
+ * since the <code>ubidi_setPara()</code> call.
+ *
+ * @param dest A pointer to where the reordered text is to be copied.
+ * The source text and <code>dest[destSize]</code>
+ * must not overlap.
+ *
+ * @param destSize The size of the <code>dest</code> buffer,
+ * in number of UChars.
+ * If the <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>
+ * option is set, then the destination length could be
+ * as large as
+ * <code>ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi)</code>.
+ * If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option
+ * is set, then the destination length may be less than
+ * <code>ubidi_getLength(pBiDi)</code>.
+ * If none of these options is set, then the destination length
+ * will be exactly <code>ubidi_getProcessedLength(pBiDi)</code>.
+ *
+ * @param options A bit set of options for the reordering that control
+ * how the reordered text is written.
+ * The options include mirroring the characters on a code
+ * point basis and inserting LRM characters, which is used
+ * especially for transforming visually stored text
+ * to logically stored text (although this is still an
+ * imperfect implementation of an "inverse Bidi" algorithm
+ * because it uses the "forward Bidi" algorithm at its core).
+ * The available options are:
+ * <code>#UBIDI_DO_MIRRORING</code>,
+ * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
+ * <code>#UBIDI_KEEP_BASE_COMBINING</code>,
+ * <code>#UBIDI_OUTPUT_REVERSE</code>,
+ * <code>#UBIDI_REMOVE_BIDI_CONTROLS</code>
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The length of the output string.
+ *
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_writeReordered(UBiDi *pBiDi,
+ UChar *dest, int32_t destSize,
+ uint16_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Reverse a Right-To-Left run of Unicode text.
+ *
+ * This function preserves the integrity of characters with multiple
+ * code units and (optionally) combining characters.
+ * Characters can be replaced by mirror-image characters
+ * in the destination buffer. Note that "real" mirroring has
+ * to be done in a rendering engine by glyph selection
+ * and that for many "mirrored" characters there are no
+ * Unicode characters as mirror-image equivalents.
+ * There are also options to insert or remove Bidi control
+ * characters.
+ *
+ * This function is the implementation for reversing RTL runs as part
+ * of <code>ubidi_writeReordered()</code>. For detailed descriptions
+ * of the parameters, see there.
+ * Since no Bidi controls are inserted here, the output string length
+ * will never exceed <code>srcLength</code>.
+ *
+ * @see ubidi_writeReordered
+ *
+ * @param src A pointer to the RTL run text.
+ *
+ * @param srcLength The length of the RTL run.
+ *
+ * @param dest A pointer to where the reordered text is to be copied.
+ * <code>src[srcLength]</code> and <code>dest[destSize]</code>
+ * must not overlap.
+ *
+ * @param destSize The size of the <code>dest</code> buffer,
+ * in number of UChars.
+ * If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option
+ * is set, then the destination length may be less than
+ * <code>srcLength</code>.
+ * If this option is not set, then the destination length
+ * will be exactly <code>srcLength</code>.
+ *
+ * @param options A bit set of options for the reordering that control
+ * how the reordered text is written.
+ * See the <code>options</code> parameter in <code>ubidi_writeReordered()</code>.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The length of the output string.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_writeReverse(const UChar *src, int32_t srcLength,
+ UChar *dest, int32_t destSize,
+ uint16_t options,
+ UErrorCode *pErrorCode);
+
+/*#define BIDI_SAMPLE_CODE*/
+/*@}*/
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/ubiditransform.h b/thirdparty/icu4c/common/unicode/ubiditransform.h
new file mode 100644
index 0000000000..2dd7564010
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/ubiditransform.h
@@ -0,0 +1,326 @@
+/*
+******************************************************************************
+*
+* © 2016 and later: Unicode, Inc. and others.
+* License & terms of use: http://www.unicode.org/copyright.html
+*
+******************************************************************************
+* file name: ubiditransform.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2016jul24
+* created by: Lina Kemmel
+*
+*/
+
+#ifndef UBIDITRANSFORM_H
+#define UBIDITRANSFORM_H
+
+#include "unicode/utypes.h"
+#include "unicode/ubidi.h"
+#include "unicode/uchar.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ * \file
+ * \brief Bidi Transformations
+ */
+
+/**
+ * `UBiDiOrder` indicates the order of text.
+ *
+ * This bidi transformation engine supports all possible combinations (4 in
+ * total) of input and output text order:
+ *
+ * - <logical input, visual output>: unless the output direction is RTL, this
+ * corresponds to a normal operation of the Bidi algorithm as described in the
+ * Unicode Technical Report and implemented by `UBiDi` when the
+ * reordering mode is set to `UBIDI_REORDER_DEFAULT`. Visual RTL
+ * mode is not supported by `UBiDi` and is accomplished through
+ * reversing a visual LTR string,
+ *
+ * - <visual input, logical output>: unless the input direction is RTL, this
+ * corresponds to an "inverse bidi algorithm" in `UBiDi` with the
+ * reordering mode set to `UBIDI_REORDER_INVERSE_LIKE_DIRECT`.
+ * Visual RTL mode is not not supported by `UBiDi` and is
+ * accomplished through reversing a visual LTR string,
+ *
+ * - <logical input, logical output>: if the input and output base directions
+ * mismatch, this corresponds to the `UBiDi` implementation with the
+ * reordering mode set to `UBIDI_REORDER_RUNS_ONLY`; and if the
+ * input and output base directions are identical, the transformation engine
+ * will only handle character mirroring and Arabic shaping operations without
+ * reordering,
+ *
+ * - <visual input, visual output>: this reordering mode is not supported by
+ * the `UBiDi` engine; it implies character mirroring, Arabic
+ * shaping, and - if the input/output base directions mismatch - string
+ * reverse operations.
+ * @see ubidi_setInverse
+ * @see ubidi_setReorderingMode
+ * @see UBIDI_REORDER_DEFAULT
+ * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
+ * @see UBIDI_REORDER_RUNS_ONLY
+ * @stable ICU 58
+ */
+typedef enum {
+ /** 0: Constant indicating a logical order.
+ * This is the default for input text.
+ * @stable ICU 58
+ */
+ UBIDI_LOGICAL = 0,
+ /** 1: Constant indicating a visual order.
+ * This is a default for output text.
+ * @stable ICU 58
+ */
+ UBIDI_VISUAL
+} UBiDiOrder;
+
+/**
+ * <code>UBiDiMirroring</code> indicates whether or not characters with the
+ * "mirrored" property in RTL runs should be replaced with their mirror-image
+ * counterparts.
+ * @see UBIDI_DO_MIRRORING
+ * @see ubidi_setReorderingOptions
+ * @see ubidi_writeReordered
+ * @see ubidi_writeReverse
+ * @stable ICU 58
+ */
+typedef enum {
+ /** 0: Constant indicating that character mirroring should not be
+ * performed.
+ * This is the default.
+ * @stable ICU 58
+ */
+ UBIDI_MIRRORING_OFF = 0,
+ /** 1: Constant indicating that character mirroring should be performed.
+ * This corresponds to calling <code>ubidi_writeReordered</code> or
+ * <code>ubidi_writeReverse</code> with the
+ * <code>UBIDI_DO_MIRRORING</code> option bit set.
+ * @stable ICU 58
+ */
+ UBIDI_MIRRORING_ON
+} UBiDiMirroring;
+
+/**
+ * Forward declaration of the <code>UBiDiTransform</code> structure that stores
+ * information used by the layout transformation engine.
+ * @stable ICU 58
+ */
+typedef struct UBiDiTransform UBiDiTransform;
+
+/**
+ * Performs transformation of text from the bidi layout defined by the input
+ * ordering scheme to the bidi layout defined by the output ordering scheme,
+ * and applies character mirroring and Arabic shaping operations.<p>
+ * In terms of <code>UBiDi</code>, such a transformation implies:
+ * <ul>
+ * <li>calling <code>ubidi_setReorderingMode</code> as needed (when the
+ * reordering mode is other than normal),</li>
+ * <li>calling <code>ubidi_setInverse</code> as needed (when text should be
+ * transformed from a visual to a logical form),</li>
+ * <li>resolving embedding levels of each character in the input text by
+ * calling <code>ubidi_setPara</code>,</li>
+ * <li>reordering the characters based on the computed embedding levels, also
+ * performing character mirroring as needed, and streaming the result to the
+ * output, by calling <code>ubidi_writeReordered</code>,</li>
+ * <li>performing Arabic digit and letter shaping on the output text by calling
+ * <code>u_shapeArabic</code>.</li>
+ * </ul>
+ * An "ordering scheme" encompasses the base direction and the order of text,
+ * and these characteristics must be defined by the caller for both input and
+ * output explicitly .<p>
+ * There are 36 possible combinations of <input, output> ordering schemes,
+ * which are partially supported by <code>UBiDi</code> already. Examples of the
+ * currently supported combinations:
+ * <ul>
+ * <li><Logical LTR, Visual LTR>: this is equivalent to calling
+ * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li>
+ * <li><Logical RTL, Visual LTR>: this is equivalent to calling
+ * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>,</li>
+ * <li><Logical Default ("Auto") LTR, Visual LTR>: this is equivalent to
+ * calling <code>ubidi_setPara</code> with
+ * <code>paraLevel == UBIDI_DEFAULT_LTR</code>,</li>
+ * <li><Logical Default ("Auto") RTL, Visual LTR>: this is equivalent to
+ * calling <code>ubidi_setPara</code> with
+ * <code>paraLevel == UBIDI_DEFAULT_RTL</code>,</li>
+ * <li><Visual LTR, Logical LTR>: this is equivalent to
+ * calling <code>ubidi_setInverse(UBiDi*, true)</code> and then
+ * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li>
+ * <li><Visual LTR, Logical RTL>: this is equivalent to
+ * calling <code>ubidi_setInverse(UBiDi*, true)</code> and then
+ * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>.</li>
+ * </ul>
+ * All combinations that involve the Visual RTL scheme are unsupported by
+ * <code>UBiDi</code>, for instance:
+ * <ul>
+ * <li><Logical LTR, Visual RTL>,</li>
+ * <li><Visual RTL, Logical RTL>.</li>
+ * </ul>
+ * <p>Example of usage of the transformation engine:<br>
+ * <pre>
+ * \code
+ * UChar text1[] = {'a', 'b', 'c', 0x0625, '1', 0};
+ * UChar text2[] = {'a', 'b', 'c', 0x0625, '1', 0};
+ * UErrorCode errorCode = U_ZERO_ERROR;
+ * // Run a transformation.
+ * ubiditransform_transform(pBidiTransform,
+ * text1, -1, text2, -1,
+ * UBIDI_LTR, UBIDI_VISUAL,
+ * UBIDI_RTL, UBIDI_LOGICAL,
+ * UBIDI_MIRRORING_OFF,
+ * U_SHAPE_DIGITS_AN2EN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
+ * &errorCode);
+ * // Do something with text2.
+ * text2[4] = '2';
+ * // Run a reverse transformation.
+ * ubiditransform_transform(pBidiTransform,
+ * text2, -1, text1, -1,
+ * UBIDI_RTL, UBIDI_LOGICAL,
+ * UBIDI_LTR, UBIDI_VISUAL,
+ * UBIDI_MIRRORING_OFF,
+ * U_SHAPE_DIGITS_EN2AN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
+ * &errorCode);
+ *\endcode
+ * </pre>
+ * </p>
+ *
+ * @param pBiDiTransform A pointer to a <code>UBiDiTransform</code> object
+ * allocated with <code>ubiditransform_open()</code> or
+ * <code>NULL</code>.<p>
+ * This object serves for one-time setup to amortize initialization
+ * overheads. Use of this object is not thread-safe. All other threads
+ * should allocate a new <code>UBiDiTransform</code> object by calling
+ * <code>ubiditransform_open()</code> before using it. Alternatively,
+ * a caller can set this parameter to <code>NULL</code>, in which case
+ * the object will be allocated by the engine on the fly.</p>
+ * @param src A pointer to the text that the Bidi layout transformations will
+ * be performed on.
+ * <p><strong>Note:</strong> the text must be (at least)
+ * <code>srcLength</code> long.</p>
+ * @param srcLength The length of the text, in number of UChars. If
+ * <code>length == -1</code> then the text must be zero-terminated.
+ * @param dest A pointer to where the processed text is to be copied.
+ * @param destSize The size of the <code>dest</code> buffer, in number of
+ * UChars. If the <code>U_SHAPE_LETTERS_UNSHAPE</code> option is set,
+ * then the destination length could be as large as
+ * <code>srcLength * 2</code>. Otherwise, the destination length will
+ * not exceed <code>srcLength</code>. If the caller reserves the last
+ * position for zero-termination, it should be excluded from
+ * <code>destSize</code>.
+ * <p><code>destSize == -1</code> is allowed and makes sense when
+ * <code>dest</code> was holds some meaningful value, e.g. that of
+ * <code>src</code>. In this case <code>dest</code> must be
+ * zero-terminated.</p>
+ * @param inParaLevel A base embedding level of the input as defined in
+ * <code>ubidi_setPara</code> documentation for the
+ * <code>paraLevel</code> parameter.
+ * @param inOrder An order of the input, which can be one of the
+ * <code>UBiDiOrder</code> values.
+ * @param outParaLevel A base embedding level of the output as defined in
+ * <code>ubidi_setPara</code> documentation for the
+ * <code>paraLevel</code> parameter.
+ * @param outOrder An order of the output, which can be one of the
+ * <code>UBiDiOrder</code> values.
+ * @param doMirroring Indicates whether or not to perform character mirroring,
+ * and can accept one of the <code>UBiDiMirroring</code> values.
+ * @param shapingOptions Arabic digit and letter shaping options defined in the
+ * ushape.h documentation.
+ * <p><strong>Note:</strong> Direction indicator options are computed by
+ * the transformation engine based on the effective ordering schemes, so
+ * user-defined direction indicators will be ignored.</p>
+ * @param pErrorCode A pointer to an error code value.
+ *
+ * @return The destination length, i.e. the number of UChars written to
+ * <code>dest</code>. If the transformation fails, the return value
+ * will be 0 (and the error code will be written to
+ * <code>pErrorCode</code>).
+ *
+ * @see UBiDiLevel
+ * @see UBiDiOrder
+ * @see UBiDiMirroring
+ * @see ubidi_setPara
+ * @see u_shapeArabic
+ * @stable ICU 58
+ */
+U_CAPI uint32_t U_EXPORT2
+ubiditransform_transform(UBiDiTransform *pBiDiTransform,
+ const UChar *src, int32_t srcLength,
+ UChar *dest, int32_t destSize,
+ UBiDiLevel inParaLevel, UBiDiOrder inOrder,
+ UBiDiLevel outParaLevel, UBiDiOrder outOrder,
+ UBiDiMirroring doMirroring, uint32_t shapingOptions,
+ UErrorCode *pErrorCode);
+
+/**
+ * Allocates a <code>UBiDiTransform</code> object. This object can be reused,
+ * e.g. with different ordering schemes, mirroring or shaping options.<p>
+ * <strong>Note:</strong>The object can only be reused in the same thread.
+ * All other threads should allocate a new <code>UBiDiTransform</code> object
+ * before using it.<p>
+ * Example of usage:<p>
+ * <pre>
+ * \code
+ * UErrorCode errorCode = U_ZERO_ERROR;
+ * // Open a new UBiDiTransform.
+ * UBiDiTransform* transform = ubiditransform_open(&errorCode);
+ * // Run a transformation.
+ * ubiditransform_transform(transform,
+ * text1, -1, text2, -1,
+ * UBIDI_RTL, UBIDI_LOGICAL,
+ * UBIDI_LTR, UBIDI_VISUAL,
+ * UBIDI_MIRRORING_ON,
+ * U_SHAPE_DIGITS_EN2AN,
+ * &errorCode);
+ * // Do something with the output text and invoke another transformation using
+ * // that text as input.
+ * ubiditransform_transform(transform,
+ * text2, -1, text3, -1,
+ * UBIDI_LTR, UBIDI_VISUAL,
+ * UBIDI_RTL, UBIDI_VISUAL,
+ * UBIDI_MIRRORING_ON,
+ * 0, &errorCode);
+ *\endcode
+ * </pre>
+ * <p>
+ * The <code>UBiDiTransform</code> object must be deallocated by calling
+ * <code>ubiditransform_close()</code>.
+ *
+ * @return An empty <code>UBiDiTransform</code> object.
+ * @stable ICU 58
+ */
+U_CAPI UBiDiTransform* U_EXPORT2
+ubiditransform_open(UErrorCode *pErrorCode);
+
+/**
+ * Deallocates the given <code>UBiDiTransform</code> object.
+ * @stable ICU 58
+ */
+U_CAPI void U_EXPORT2
+ubiditransform_close(UBiDiTransform *pBidiTransform);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUBiDiTransformPointer
+ * "Smart pointer" class, closes a UBiDiTransform via ubiditransform_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 58
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiTransformPointer, UBiDiTransform, ubiditransform_close);
+
+U_NAMESPACE_END
+
+#endif
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/ubrk.h b/thirdparty/icu4c/common/unicode/ubrk.h
new file mode 100644
index 0000000000..37189a8598
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/ubrk.h
@@ -0,0 +1,631 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 1996-2015, International Business Machines Corporation and others.
+* All Rights Reserved.
+******************************************************************************
+*/
+
+#ifndef UBRK_H
+#define UBRK_H
+
+#include "unicode/utypes.h"
+#include "unicode/uloc.h"
+#include "unicode/utext.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ * A text-break iterator.
+ * For usage in C programs.
+ */
+#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
+# define UBRK_TYPEDEF_UBREAK_ITERATOR
+ /**
+ * Opaque type representing an ICU Break iterator object.
+ * @stable ICU 2.0
+ */
+ typedef struct UBreakIterator UBreakIterator;
+#endif
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/parseerr.h"
+
+/**
+ * \file
+ * \brief C API: BreakIterator
+ *
+ * <h2> BreakIterator C API </h2>
+ *
+ * The BreakIterator C API defines methods for finding the location
+ * of boundaries in text. Pointer to a UBreakIterator maintain a
+ * current position and scan over text returning the index of characters
+ * where boundaries occur.
+ * <p>
+ * Line boundary analysis determines where a text string can be broken
+ * when line-wrapping. The mechanism correctly handles punctuation and
+ * hyphenated words.
+ * <p>
+ * Note: The locale keyword "lb" can be used to modify line break
+ * behavior according to the CSS level 3 line-break options, see
+ * <http://dev.w3.org/csswg/css-text/#line-breaking>. For example:
+ * "ja@lb=strict", "zh@lb=loose".
+ * <p>
+ * Sentence boundary analysis allows selection with correct
+ * interpretation of periods within numbers and abbreviations, and
+ * trailing punctuation marks such as quotation marks and parentheses.
+ * <p>
+ * Note: The locale keyword "ss" can be used to enable use of
+ * segmentation suppression data (preventing breaks in English after
+ * abbreviations such as "Mr." or "Est.", for example), as follows:
+ * "en@ss=standard".
+ * <p>
+ * Word boundary analysis is used by search and replace functions, as
+ * well as within text editing applications that allow the user to
+ * select words with a double click. Word selection provides correct
+ * interpretation of punctuation marks within and following
+ * words. Characters that are not part of a word, such as symbols or
+ * punctuation marks, have word-breaks on both sides.
+ * <p>
+ * Character boundary analysis identifies the boundaries of
+ * "Extended Grapheme Clusters", which are groupings of codepoints
+ * that should be treated as character-like units for many text operations.
+ * Please see Unicode Standard Annex #29, Unicode Text Segmentation,
+ * http://www.unicode.org/reports/tr29/ for additional information
+ * on grapheme clusters and guidelines on their use.
+ * <p>
+ * Title boundary analysis locates all positions,
+ * typically starts of words, that should be set to Title Case
+ * when title casing the text.
+ * <p>
+ * The text boundary positions are found according to the rules
+ * described in Unicode Standard Annex #29, Text Boundaries, and
+ * Unicode Standard Annex #14, Line Breaking Properties. These
+ * are available at http://www.unicode.org/reports/tr14/ and
+ * http://www.unicode.org/reports/tr29/.
+ * <p>
+ * In addition to the plain C API defined in this header file, an
+ * object oriented C++ API with equivalent functionality is defined in the
+ * file brkiter.h.
+ * <p>
+ * Code snippets illustrating the use of the Break Iterator APIs
+ * are available in the ICU User Guide,
+ * http://icu-project.org/userguide/boundaryAnalysis.html
+ * and in the sample program icu/source/samples/break/break.cpp
+ */
+
+/** The possible types of text boundaries. @stable ICU 2.0 */
+typedef enum UBreakIteratorType {
+ /** Character breaks @stable ICU 2.0 */
+ UBRK_CHARACTER = 0,
+ /** Word breaks @stable ICU 2.0 */
+ UBRK_WORD = 1,
+ /** Line breaks @stable ICU 2.0 */
+ UBRK_LINE = 2,
+ /** Sentence breaks @stable ICU 2.0 */
+ UBRK_SENTENCE = 3,
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * Title Case breaks
+ * The iterator created using this type locates title boundaries as described for
+ * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
+ * please use Word Boundary iterator.
+ *
+ * @deprecated ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later.
+ */
+ UBRK_TITLE = 4,
+ /**
+ * One more than the highest normal UBreakIteratorType value.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UBRK_COUNT = 5
+#endif // U_HIDE_DEPRECATED_API
+} UBreakIteratorType;
+
+/** Value indicating all text boundaries have been returned.
+ * @stable ICU 2.0
+ */
+#define UBRK_DONE ((int32_t) -1)
+
+
+/**
+ * Enum constants for the word break tags returned by
+ * getRuleStatus(). A range of values is defined for each category of
+ * word, to allow for further subdivisions of a category in future releases.
+ * Applications should check for tag values falling within the range, rather
+ * than for single individual values.
+ *
+ * The numeric values of all of these constants are stable (will not change).
+ *
+ * @stable ICU 2.2
+*/
+typedef enum UWordBreak {
+ /** Tag value for "words" that do not fit into any of other categories.
+ * Includes spaces and most punctuation. */
+ UBRK_WORD_NONE = 0,
+ /** Upper bound for tags for uncategorized words. */
+ UBRK_WORD_NONE_LIMIT = 100,
+ /** Tag value for words that appear to be numbers, lower limit. */
+ UBRK_WORD_NUMBER = 100,
+ /** Tag value for words that appear to be numbers, upper limit. */
+ UBRK_WORD_NUMBER_LIMIT = 200,
+ /** Tag value for words that contain letters, excluding
+ * hiragana, katakana or ideographic characters, lower limit. */
+ UBRK_WORD_LETTER = 200,
+ /** Tag value for words containing letters, upper limit */
+ UBRK_WORD_LETTER_LIMIT = 300,
+ /** Tag value for words containing kana characters, lower limit */
+ UBRK_WORD_KANA = 300,
+ /** Tag value for words containing kana characters, upper limit */
+ UBRK_WORD_KANA_LIMIT = 400,
+ /** Tag value for words containing ideographic characters, lower limit */
+ UBRK_WORD_IDEO = 400,
+ /** Tag value for words containing ideographic characters, upper limit */
+ UBRK_WORD_IDEO_LIMIT = 500
+} UWordBreak;
+
+/**
+ * Enum constants for the line break tags returned by getRuleStatus().
+ * A range of values is defined for each category of
+ * word, to allow for further subdivisions of a category in future releases.
+ * Applications should check for tag values falling within the range, rather
+ * than for single individual values.
+ *
+ * The numeric values of all of these constants are stable (will not change).
+ *
+ * @stable ICU 2.8
+*/
+typedef enum ULineBreakTag {
+ /** Tag value for soft line breaks, positions at which a line break
+ * is acceptable but not required */
+ UBRK_LINE_SOFT = 0,
+ /** Upper bound for soft line breaks. */
+ UBRK_LINE_SOFT_LIMIT = 100,
+ /** Tag value for a hard, or mandatory line break */
+ UBRK_LINE_HARD = 100,
+ /** Upper bound for hard line breaks. */
+ UBRK_LINE_HARD_LIMIT = 200
+} ULineBreakTag;
+
+
+
+/**
+ * Enum constants for the sentence break tags returned by getRuleStatus().
+ * A range of values is defined for each category of
+ * sentence, to allow for further subdivisions of a category in future releases.
+ * Applications should check for tag values falling within the range, rather
+ * than for single individual values.
+ *
+ * The numeric values of all of these constants are stable (will not change).
+ *
+ * @stable ICU 2.8
+*/
+typedef enum USentenceBreakTag {
+ /** Tag value for for sentences ending with a sentence terminator
+ * ('.', '?', '!', etc.) character, possibly followed by a
+ * hard separator (CR, LF, PS, etc.)
+ */
+ UBRK_SENTENCE_TERM = 0,
+ /** Upper bound for tags for sentences ended by sentence terminators. */
+ UBRK_SENTENCE_TERM_LIMIT = 100,
+ /** Tag value for for sentences that do not contain an ending
+ * sentence terminator ('.', '?', '!', etc.) character, but
+ * are ended only by a hard separator (CR, LF, PS, etc.) or end of input.
+ */
+ UBRK_SENTENCE_SEP = 100,
+ /** Upper bound for tags for sentences ended by a separator. */
+ UBRK_SENTENCE_SEP_LIMIT = 200
+ /** Tag value for a hard, or mandatory line break */
+} USentenceBreakTag;
+
+
+/**
+ * Open a new UBreakIterator for locating text boundaries for a specified locale.
+ * A UBreakIterator may be used for detecting character, line, word,
+ * and sentence breaks in text.
+ * @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_WORD,
+ * UBRK_LINE, UBRK_SENTENCE
+ * @param locale The locale specifying the text-breaking conventions. Note that
+ * locale keys such as "lb" and "ss" may be used to modify text break behavior,
+ * see general discussion of BreakIterator C API.
+ * @param text The text to be iterated over. May be null, in which case ubrk_setText() is
+ * used to specify the text to be iterated.
+ * @param textLength The number of characters in text, or -1 if null-terminated.
+ * @param status A UErrorCode to receive any errors.
+ * @return A UBreakIterator for the specified locale.
+ * @see ubrk_openRules
+ * @stable ICU 2.0
+ */
+U_CAPI UBreakIterator* U_EXPORT2
+ubrk_open(UBreakIteratorType type,
+ const char *locale,
+ const UChar *text,
+ int32_t textLength,
+ UErrorCode *status);
+
+/**
+ * Open a new UBreakIterator for locating text boundaries using specified breaking rules.
+ * The rule syntax is ... (TBD)
+ * @param rules A set of rules specifying the text breaking conventions.
+ * @param rulesLength The number of characters in rules, or -1 if null-terminated.
+ * @param text The text to be iterated over. May be null, in which case ubrk_setText() is
+ * used to specify the text to be iterated.
+ * @param textLength The number of characters in text, or -1 if null-terminated.
+ * @param parseErr Receives position and context information for any syntax errors
+ * detected while parsing the rules.
+ * @param status A UErrorCode to receive any errors.
+ * @return A UBreakIterator for the specified rules.
+ * @see ubrk_open
+ * @stable ICU 2.2
+ */
+U_CAPI UBreakIterator* U_EXPORT2
+ubrk_openRules(const UChar *rules,
+ int32_t rulesLength,
+ const UChar *text,
+ int32_t textLength,
+ UParseError *parseErr,
+ UErrorCode *status);
+
+/**
+ * Open a new UBreakIterator for locating text boundaries using precompiled binary rules.
+ * Opening a UBreakIterator this way is substantially faster than using ubrk_openRules.
+ * Binary rules may be obtained using ubrk_getBinaryRules. The compiled rules are not
+ * compatible across different major versions of ICU, nor across platforms of different
+ * endianness or different base character set family (ASCII vs EBCDIC).
+ * @param binaryRules A set of compiled binary rules specifying the text breaking
+ * conventions. Ownership of the storage containing the compiled
+ * rules remains with the caller of this function. The compiled
+ * rules must not be modified or deleted during the life of the
+ * break iterator.
+ * @param rulesLength The length of binaryRules in bytes; must be >= 0.
+ * @param text The text to be iterated over. May be null, in which case
+ * ubrk_setText() is used to specify the text to be iterated.
+ * @param textLength The number of characters in text, or -1 if null-terminated.
+ * @param status Pointer to UErrorCode to receive any errors.
+ * @return UBreakIterator for the specified rules.
+ * @see ubrk_getBinaryRules
+ * @stable ICU 59
+ */
+U_CAPI UBreakIterator* U_EXPORT2
+ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
+ const UChar * text, int32_t textLength,
+ UErrorCode * status);
+
+/**
+ * Thread safe cloning operation
+ * @param bi iterator to be cloned
+ * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br>
+ * user allocated space for the new clone. If NULL new memory will be allocated.
+ * If buffer is not large enough, new memory will be allocated.
+ * Clients can use the U_BRK_SAFECLONE_BUFFERSIZE.
+ * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br>
+ * pointer to size of allocated space.
+ * If *pBufferSize == 0, a sufficient size for use in cloning will
+ * be returned ('pre-flighting')
+ * If *pBufferSize is not enough for a stack-based safe clone,
+ * new memory will be allocated.
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary.
+ * @return pointer to the new clone
+ * @stable ICU 2.0
+ */
+U_CAPI UBreakIterator * U_EXPORT2
+ubrk_safeClone(
+ const UBreakIterator *bi,
+ void *stackBuffer,
+ int32_t *pBufferSize,
+ UErrorCode *status);
+
+#ifndef U_HIDE_DEPRECATED_API
+
+/**
+ * A recommended size (in bytes) for the memory buffer to be passed to ubrk_saveClone().
+ * @deprecated ICU 52. Do not rely on ubrk_safeClone() cloning into any provided buffer.
+ */
+#define U_BRK_SAFECLONE_BUFFERSIZE 1
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+/**
+* Close a UBreakIterator.
+* Once closed, a UBreakIterator may no longer be used.
+* @param bi The break iterator to close.
+ * @stable ICU 2.0
+*/
+U_CAPI void U_EXPORT2
+ubrk_close(UBreakIterator *bi);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUBreakIteratorPointer
+ * "Smart pointer" class, closes a UBreakIterator via ubrk_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUBreakIteratorPointer, UBreakIterator, ubrk_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Sets an existing iterator to point to a new piece of text.
+ * The break iterator retains a pointer to the supplied text.
+ * The caller must not modify or delete the text while the BreakIterator
+ * retains the reference.
+ *
+ * @param bi The iterator to use
+ * @param text The text to be set
+ * @param textLength The length of the text
+ * @param status The error code
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubrk_setText(UBreakIterator* bi,
+ const UChar* text,
+ int32_t textLength,
+ UErrorCode* status);
+
+
+/**
+ * Sets an existing iterator to point to a new piece of text.
+ *
+ * All index positions returned by break iterator functions are
+ * native indices from the UText. For example, when breaking UTF-8
+ * encoded text, the break positions returned by \ref ubrk_next, \ref ubrk_previous, etc.
+ * will be UTF-8 string indices, not UTF-16 positions.
+ *
+ * @param bi The iterator to use
+ * @param text The text to be set.
+ * This function makes a shallow clone of the supplied UText. This means
+ * that the caller is free to immediately close or otherwise reuse the
+ * UText that was passed as a parameter, but that the underlying text itself
+ * must not be altered while being referenced by the break iterator.
+ * @param status The error code
+ * @stable ICU 3.4
+ */
+U_CAPI void U_EXPORT2
+ubrk_setUText(UBreakIterator* bi,
+ UText* text,
+ UErrorCode* status);
+
+
+
+/**
+ * Determine the most recently-returned text boundary.
+ *
+ * @param bi The break iterator to use.
+ * @return The character index most recently returned by \ref ubrk_next, \ref ubrk_previous,
+ * \ref ubrk_first, or \ref ubrk_last.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_current(const UBreakIterator *bi);
+
+/**
+ * Advance the iterator to the boundary following the current boundary.
+ *
+ * @param bi The break iterator to use.
+ * @return The character index of the next text boundary, or UBRK_DONE
+ * if all text boundaries have been returned.
+ * @see ubrk_previous
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_next(UBreakIterator *bi);
+
+/**
+ * Set the iterator position to the boundary preceding the current boundary.
+ *
+ * @param bi The break iterator to use.
+ * @return The character index of the preceding text boundary, or UBRK_DONE
+ * if all text boundaries have been returned.
+ * @see ubrk_next
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_previous(UBreakIterator *bi);
+
+/**
+ * Set the iterator position to zero, the start of the text being scanned.
+ * @param bi The break iterator to use.
+ * @return The new iterator position (zero).
+ * @see ubrk_last
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_first(UBreakIterator *bi);
+
+/**
+ * Set the iterator position to the index immediately <EM>beyond</EM> the last character in the text being scanned.
+ * This is not the same as the last character.
+ * @param bi The break iterator to use.
+ * @return The character offset immediately <EM>beyond</EM> the last character in the
+ * text being scanned.
+ * @see ubrk_first
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_last(UBreakIterator *bi);
+
+/**
+ * Set the iterator position to the first boundary preceding the specified offset.
+ * The new position is always smaller than offset, or UBRK_DONE.
+ * @param bi The break iterator to use.
+ * @param offset The offset to begin scanning.
+ * @return The text boundary preceding offset, or UBRK_DONE.
+ * @see ubrk_following
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_preceding(UBreakIterator *bi,
+ int32_t offset);
+
+/**
+ * Advance the iterator to the first boundary following the specified offset.
+ * The value returned is always greater than offset, or UBRK_DONE.
+ * @param bi The break iterator to use.
+ * @param offset The offset to begin scanning.
+ * @return The text boundary following offset, or UBRK_DONE.
+ * @see ubrk_preceding
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_following(UBreakIterator *bi,
+ int32_t offset);
+
+/**
+* Get a locale for which text breaking information is available.
+* A UBreakIterator in a locale returned by this function will perform the correct
+* text breaking for the locale.
+* @param index The index of the desired locale.
+* @return A locale for which number text breaking information is available, or 0 if none.
+* @see ubrk_countAvailable
+* @stable ICU 2.0
+*/
+U_CAPI const char* U_EXPORT2
+ubrk_getAvailable(int32_t index);
+
+/**
+* Determine how many locales have text breaking information available.
+* This function is most useful as determining the loop ending condition for
+* calls to \ref ubrk_getAvailable.
+* @return The number of locales for which text breaking information is available.
+* @see ubrk_getAvailable
+* @stable ICU 2.0
+*/
+U_CAPI int32_t U_EXPORT2
+ubrk_countAvailable(void);
+
+
+/**
+* Returns true if the specified position is a boundary position. As a side
+* effect, leaves the iterator pointing to the first boundary position at
+* or after "offset".
+* @param bi The break iterator to use.
+* @param offset the offset to check.
+* @return True if "offset" is a boundary position.
+* @stable ICU 2.0
+*/
+U_CAPI UBool U_EXPORT2
+ubrk_isBoundary(UBreakIterator *bi, int32_t offset);
+
+/**
+ * Return the status from the break rule that determined the most recently
+ * returned break position. The values appear in the rule source
+ * within brackets, {123}, for example. For rules that do not specify a
+ * status, a default value of 0 is returned.
+ * <p>
+ * For word break iterators, the possible values are defined in enum UWordBreak.
+ * @stable ICU 2.2
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_getRuleStatus(UBreakIterator *bi);
+
+/**
+ * Get the statuses from the break rules that determined the most recently
+ * returned break position. The values appear in the rule source
+ * within brackets, {123}, for example. The default status value for rules
+ * that do not explicitly provide one is zero.
+ * <p>
+ * For word break iterators, the possible values are defined in enum UWordBreak.
+ * @param bi The break iterator to use
+ * @param fillInVec an array to be filled in with the status values.
+ * @param capacity the length of the supplied vector. A length of zero causes
+ * the function to return the number of status values, in the
+ * normal way, without attempting to store any values.
+ * @param status receives error codes.
+ * @return The number of rule status values from rules that determined
+ * the most recent boundary returned by the break iterator.
+ * @stable ICU 3.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status);
+
+/**
+ * Return the locale of the break iterator. You can choose between the valid and
+ * the actual locale.
+ * @param bi break iterator
+ * @param type locale type (valid or actual)
+ * @param status error code
+ * @return locale string
+ * @stable ICU 2.8
+ */
+U_CAPI const char* U_EXPORT2
+ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode* status);
+
+/**
+ * Set the subject text string upon which the break iterator is operating
+ * without changing any other aspect of the state.
+ * The new and previous text strings must have the same content.
+ *
+ * This function is intended for use in environments where ICU is operating on
+ * strings that may move around in memory. It provides a mechanism for notifying
+ * ICU that the string has been relocated, and providing a new UText to access the
+ * string in its new position.
+ *
+ * Note that the break iterator never copies the underlying text
+ * of a string being processed, but always operates directly on the original text
+ * provided by the user. Refreshing simply drops the references to the old text
+ * and replaces them with references to the new.
+ *
+ * Caution: this function is normally used only by very specialized
+ * system-level code. One example use case is with garbage collection
+ * that moves the text in memory.
+ *
+ * @param bi The break iterator.
+ * @param text The new (moved) text string.
+ * @param status Receives errors detected by this function.
+ *
+ * @stable ICU 49
+ */
+U_CAPI void U_EXPORT2
+ubrk_refreshUText(UBreakIterator *bi,
+ UText *text,
+ UErrorCode *status);
+
+
+/**
+ * Get a compiled binary version of the rules specifying the behavior of a UBreakIterator.
+ * The binary rules may be used with ubrk_openBinaryRules to open a new UBreakIterator
+ * more quickly than using ubrk_openRules. The compiled rules are not compatible across
+ * different major versions of ICU, nor across platforms of different endianness or
+ * different base character set family (ASCII vs EBCDIC). Supports preflighting (with
+ * binaryRules=NULL and rulesCapacity=0) to get the rules length without copying them to
+ * the binaryRules buffer. However, whether preflighting or not, if the actual length
+ * is greater than INT32_MAX, then the function returns 0 and sets *status to
+ * U_INDEX_OUTOFBOUNDS_ERROR.
+
+ * @param bi The break iterator to use.
+ * @param binaryRules Buffer to receive the compiled binary rules; set to NULL for
+ * preflighting.
+ * @param rulesCapacity Capacity (in bytes) of the binaryRules buffer; set to 0 for
+ * preflighting. Must be >= 0.
+ * @param status Pointer to UErrorCode to receive any errors, such as
+ * U_BUFFER_OVERFLOW_ERROR, U_INDEX_OUTOFBOUNDS_ERROR, or
+ * U_ILLEGAL_ARGUMENT_ERROR.
+ * @return The actual byte length of the binary rules, if <= INT32_MAX;
+ * otherwise 0. If not preflighting and this is larger than
+ * rulesCapacity, *status will be set to an error.
+ * @see ubrk_openBinaryRules
+ * @stable ICU 59
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_getBinaryRules(UBreakIterator *bi,
+ uint8_t * binaryRules, int32_t rulesCapacity,
+ UErrorCode * status);
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/ucasemap.h b/thirdparty/icu4c/common/unicode/ucasemap.h
new file mode 100644
index 0000000000..d1c1b483ab
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/ucasemap.h
@@ -0,0 +1,388 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2005-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: ucasemap.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2005may06
+* created by: Markus W. Scherer
+*
+* Case mapping service object and functions using it.
+*/
+
+#ifndef __UCASEMAP_H__
+#define __UCASEMAP_H__
+
+#include "unicode/utypes.h"
+#include "unicode/stringoptions.h"
+#include "unicode/ustring.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ * \file
+ * \brief C API: Unicode case mapping functions using a UCaseMap service object.
+ *
+ * The service object takes care of memory allocations, data loading, and setup
+ * for the attributes, as usual.
+ *
+ * Currently, the functionality provided here does not overlap with uchar.h
+ * and ustring.h, except for ucasemap_toTitle().
+ *
+ * ucasemap_utf8XYZ() functions operate directly on UTF-8 strings.
+ */
+
+/**
+ * UCaseMap is an opaque service object for newer ICU case mapping functions.
+ * Older functions did not use a service object.
+ * @stable ICU 3.4
+ */
+struct UCaseMap;
+typedef struct UCaseMap UCaseMap; /**< C typedef for struct UCaseMap. @stable ICU 3.4 */
+
+/**
+ * Open a UCaseMap service object for a locale and a set of options.
+ * The locale ID and options are preprocessed so that functions using the
+ * service object need not process them in each call.
+ *
+ * @param locale ICU locale ID, used for language-dependent
+ * upper-/lower-/title-casing according to the Unicode standard.
+ * Usual semantics: ""=root, NULL=default locale, etc.
+ * @param options Options bit set, used for case folding and string comparisons.
+ * Same flags as for u_foldCase(), u_strFoldCase(),
+ * u_strCaseCompare(), etc.
+ * Use 0 or U_FOLD_CASE_DEFAULT for default behavior.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return Pointer to a UCaseMap service object, if successful.
+ *
+ * @see U_FOLD_CASE_DEFAULT
+ * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @see U_TITLECASE_NO_LOWERCASE
+ * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
+ * @stable ICU 3.4
+ */
+U_CAPI UCaseMap * U_EXPORT2
+ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode);
+
+/**
+ * Close a UCaseMap service object.
+ * @param csm Object to be closed.
+ * @stable ICU 3.4
+ */
+U_CAPI void U_EXPORT2
+ucasemap_close(UCaseMap *csm);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUCaseMapPointer
+ * "Smart pointer" class, closes a UCaseMap via ucasemap_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Get the locale ID that is used for language-dependent case mappings.
+ * @param csm UCaseMap service object.
+ * @return locale ID
+ * @stable ICU 3.4
+ */
+U_CAPI const char * U_EXPORT2
+ucasemap_getLocale(const UCaseMap *csm);
+
+/**
+ * Get the options bit set that is used for case folding and string comparisons.
+ * @param csm UCaseMap service object.
+ * @return options bit set
+ * @stable ICU 3.4
+ */
+U_CAPI uint32_t U_EXPORT2
+ucasemap_getOptions(const UCaseMap *csm);
+
+/**
+ * Set the locale ID that is used for language-dependent case mappings.
+ *
+ * @param csm UCaseMap service object.
+ * @param locale Locale ID, see ucasemap_open().
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_open
+ * @stable ICU 3.4
+ */
+U_CAPI void U_EXPORT2
+ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode);
+
+/**
+ * Set the options bit set that is used for case folding and string comparisons.
+ *
+ * @param csm UCaseMap service object.
+ * @param options Options bit set, see ucasemap_open().
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_open
+ * @stable ICU 3.4
+ */
+U_CAPI void U_EXPORT2
+ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * Get the break iterator that is used for titlecasing.
+ * Do not modify the returned break iterator.
+ * @param csm UCaseMap service object.
+ * @return titlecasing break iterator
+ * @stable ICU 3.8
+ */
+U_CAPI const UBreakIterator * U_EXPORT2
+ucasemap_getBreakIterator(const UCaseMap *csm);
+
+/**
+ * Set the break iterator that is used for titlecasing.
+ * The UCaseMap service object releases a previously set break iterator
+ * and "adopts" this new one, taking ownership of it.
+ * It will be released in a subsequent call to ucasemap_setBreakIterator()
+ * or ucasemap_close().
+ *
+ * Break iterator operations are not thread-safe. Therefore, titlecasing
+ * functions use non-const UCaseMap objects. It is not possible to titlecase
+ * strings concurrently using the same UCaseMap.
+ *
+ * @param csm UCaseMap service object.
+ * @param iterToAdopt Break iterator to be adopted for titlecasing.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_toTitle
+ * @see ucasemap_utf8ToTitle
+ * @stable ICU 3.8
+ */
+U_CAPI void U_EXPORT2
+ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode);
+
+/**
+ * Titlecase a UTF-16 string. This function is almost a duplicate of u_strToTitle(),
+ * except that it takes ucasemap_setOptions() into account and has performance
+ * advantages from being able to use a UCaseMap object for multiple case mapping
+ * operations, saving setup time.
+ *
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with ucasemap_setOptions().)
+ *
+ * Note: This function takes a non-const UCaseMap pointer because it will
+ * open a default break iterator if no break iterator was set yet,
+ * and effectively call ucasemap_setBreakIterator();
+ * also because the break iterator is stateful and will be modified during
+ * the iteration.
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm UCaseMap service object. This pointer is non-const!
+ * See the note above for details.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ *
+ * @see u_strToTitle
+ * @stable ICU 3.8
+ */
+U_CAPI int32_t U_EXPORT2
+ucasemap_toTitle(UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+#endif // UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * Lowercase the characters in a UTF-8 string.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm UCaseMap service object.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ *
+ * @see u_strToLower
+ * @stable ICU 3.4
+ */
+U_CAPI int32_t U_EXPORT2
+ucasemap_utf8ToLower(const UCaseMap *csm,
+ char *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Uppercase the characters in a UTF-8 string.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm UCaseMap service object.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ *
+ * @see u_strToUpper
+ * @stable ICU 3.4
+ */
+U_CAPI int32_t U_EXPORT2
+ucasemap_utf8ToUpper(const UCaseMap *csm,
+ char *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * Titlecase a UTF-8 string.
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with ucasemap_setOptions().)
+ *
+ * Note: This function takes a non-const UCaseMap pointer because it will
+ * open a default break iterator if no break iterator was set yet,
+ * and effectively call ucasemap_setBreakIterator();
+ * also because the break iterator is stateful and will be modified during
+ * the iteration.
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setUText(), first(), next() and close() methods of the
+ * provided break iterator.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm UCaseMap service object. This pointer is non-const!
+ * See the note above for details.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ *
+ * @see u_strToTitle
+ * @see U_TITLECASE_NO_LOWERCASE
+ * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
+ * @stable ICU 3.8
+ */
+U_CAPI int32_t U_EXPORT2
+ucasemap_utf8ToTitle(UCaseMap *csm,
+ char *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+#endif
+
+/**
+ * Case-folds the characters in a UTF-8 string.
+ *
+ * Case-folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'T' in CaseFolding.txt.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm UCaseMap service object.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ *
+ * @see u_strFoldCase
+ * @see ucasemap_setOptions
+ * @see U_FOLD_CASE_DEFAULT
+ * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @stable ICU 3.8
+ */
+U_CAPI int32_t U_EXPORT2
+ucasemap_utf8FoldCase(const UCaseMap *csm,
+ char *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/ucat.h b/thirdparty/icu4c/common/unicode/ucat.h
new file mode 100644
index 0000000000..93850348ff
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/ucat.h
@@ -0,0 +1,160 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2003-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Author: Alan Liu
+* Created: March 19 2003
+* Since: ICU 2.6
+**********************************************************************
+*/
+#ifndef UCAT_H
+#define UCAT_H
+
+#include "unicode/utypes.h"
+#include "unicode/ures.h"
+
+/**
+ * \file
+ * \brief C API: Message Catalog Wrappers
+ *
+ * This C API provides look-alike functions that deliberately resemble
+ * the POSIX catopen, catclose, and catgets functions. The underlying
+ * implementation is in terms of ICU resource bundles, rather than
+ * POSIX message catalogs.
+ *
+ * The ICU resource bundles obey standard ICU inheritance policies.
+ * To facilitate this, sets and messages are flattened into one tier.
+ * This is done by creating resource bundle keys of the form
+ * &lt;set_num&gt;%&lt;msg_num&gt; where set_num is the set number and msg_num is
+ * the message number, formatted as decimal strings.
+ *
+ * Example: Consider a message catalog containing two sets:
+ *
+ * Set 1: Message 4 = "Good morning."
+ * Message 5 = "Good afternoon."
+ * Message 7 = "Good evening."
+ * Message 8 = "Good night."
+ * Set 4: Message 14 = "Please "
+ * Message 19 = "Thank you."
+ * Message 20 = "Sincerely,"
+ *
+ * The ICU resource bundle source file would, assuming it is named
+ * "greet.txt", would look like this:
+ *
+ * greet
+ * {
+ * 1%4 { "Good morning." }
+ * 1%5 { "Good afternoon." }
+ * 1%7 { "Good evening." }
+ * 1%8 { "Good night." }
+ *
+ * 4%14 { "Please " }
+ * 4%19 { "Thank you." }
+ * 4%20 { "Sincerely," }
+ * }
+ *
+ * The catgets function is commonly used in combination with functions
+ * like printf and strftime. ICU components like message format can
+ * be used instead, although they use a different format syntax.
+ * There is an ICU package, icuio, that provides some of
+ * the POSIX-style formatting API.
+ */
+
+U_CDECL_BEGIN
+
+/**
+ * An ICU message catalog descriptor, analogous to nl_catd.
+ *
+ * @stable ICU 2.6
+ */
+typedef UResourceBundle* u_nl_catd;
+
+/**
+ * Open and return an ICU message catalog descriptor. The descriptor
+ * may be passed to u_catgets() to retrieve localized strings.
+ *
+ * @param name string containing the full path pointing to the
+ * directory where the resources reside followed by the package name
+ * e.g. "/usr/resource/my_app/resources/guimessages" on a Unix system.
+ * If NULL, ICU default data files will be used.
+ *
+ * Unlike POSIX, environment variables are not interpolated within the
+ * name.
+ *
+ * @param locale the locale for which we want to open the resource. If
+ * NULL, the default ICU locale will be used (see uloc_getDefault). If
+ * strlen(locale) == 0, the root locale will be used.
+ *
+ * @param ec input/output error code. Upon output,
+ * U_USING_FALLBACK_WARNING indicates that a fallback locale was
+ * used. For example, 'de_CH' was requested, but nothing was found
+ * there, so 'de' was used. U_USING_DEFAULT_WARNING indicates that the
+ * default locale data or root locale data was used; neither the
+ * requested locale nor any of its fallback locales were found.
+ *
+ * @return a message catalog descriptor that may be passed to
+ * u_catgets(). If the ec parameter indicates success, then the caller
+ * is responsible for calling u_catclose() to close the message
+ * catalog. If the ec parameter indicates failure, then NULL will be
+ * returned.
+ *
+ * @stable ICU 2.6
+ */
+U_CAPI u_nl_catd U_EXPORT2
+u_catopen(const char* name, const char* locale, UErrorCode* ec);
+
+/**
+ * Close an ICU message catalog, given its descriptor.
+ *
+ * @param catd a message catalog descriptor to be closed. May be NULL,
+ * in which case no action is taken.
+ *
+ * @stable ICU 2.6
+ */
+U_CAPI void U_EXPORT2
+u_catclose(u_nl_catd catd);
+
+/**
+ * Retrieve a localized string from an ICU message catalog.
+ *
+ * @param catd a message catalog descriptor returned by u_catopen.
+ *
+ * @param set_num the message catalog set number. Sets need not be
+ * numbered consecutively.
+ *
+ * @param msg_num the message catalog message number within the
+ * set. Messages need not be numbered consecutively.
+ *
+ * @param s the default string. This is returned if the string
+ * specified by the set_num and msg_num is not found. It must be
+ * zero-terminated.
+ *
+ * @param len fill-in parameter to receive the length of the result.
+ * May be NULL, in which case it is ignored.
+ *
+ * @param ec input/output error code. May be U_USING_FALLBACK_WARNING
+ * or U_USING_DEFAULT_WARNING. U_MISSING_RESOURCE_ERROR indicates that
+ * the set_num/msg_num tuple does not specify a valid message string
+ * in this catalog.
+ *
+ * @return a pointer to a zero-terminated UChar array which lives in
+ * an internal buffer area, typically a memory mapped/DLL file. The
+ * caller must NOT delete this pointer. If the call is unsuccessful
+ * for any reason, then s is returned. This includes the situation in
+ * which ec indicates a failing error code upon entry to this
+ * function.
+ *
+ * @stable ICU 2.6
+ */
+U_CAPI const UChar* U_EXPORT2
+u_catgets(u_nl_catd catd, int32_t set_num, int32_t msg_num,
+ const UChar* s,
+ int32_t* len, UErrorCode* ec);
+
+U_CDECL_END
+
+#endif /*UCAT_H*/
+/*eof*/
diff --git a/thirdparty/icu4c/common/unicode/uchar.h b/thirdparty/icu4c/common/unicode/uchar.h
new file mode 100644
index 0000000000..1e0f82e706
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/uchar.h
@@ -0,0 +1,4056 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1997-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File UCHAR.H
+*
+* Modification History:
+*
+* Date Name Description
+* 04/02/97 aliu Creation.
+* 03/29/99 helena Updated for C APIs.
+* 4/15/99 Madhu Updated for C Implementation and Javadoc
+* 5/20/99 Madhu Added the function u_getVersion()
+* 8/19/1999 srl Upgraded scripts to Unicode 3.0
+* 8/27/1999 schererm UCharDirection constants: U_...
+* 11/11/1999 weiv added u_isalnum(), cleaned comments
+* 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion().
+******************************************************************************
+*/
+
+#ifndef UCHAR_H
+#define UCHAR_H
+
+#include "unicode/utypes.h"
+#include "unicode/stringoptions.h"
+#include "unicode/ucpmap.h"
+
+#if !defined(USET_DEFINED) && !defined(U_IN_DOXYGEN)
+
+#define USET_DEFINED
+
+/**
+ * USet is the C API type corresponding to C++ class UnicodeSet.
+ * It is forward-declared here to avoid including unicode/uset.h file if related
+ * APIs are not used.
+ *
+ * @see ucnv_getUnicodeSet
+ * @stable ICU 2.4
+ */
+typedef struct USet USet;
+
+#endif
+
+
+U_CDECL_BEGIN
+
+/*==========================================================================*/
+/* Unicode version number */
+/*==========================================================================*/
+/**
+ * Unicode version number, default for the current ICU version.
+ * The actual Unicode Character Database (UCD) data is stored in uprops.dat
+ * and may be generated from UCD files from a different Unicode version.
+ * Call u_getUnicodeVersion to get the actual Unicode version of the data.
+ *
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.0
+ */
+#define U_UNICODE_VERSION "13.0"
+
+/**
+ * \file
+ * \brief C API: Unicode Properties
+ *
+ * This C API provides low-level access to the Unicode Character Database.
+ * In addition to raw property values, some convenience functions calculate
+ * derived properties, for example for Java-style programming.
+ *
+ * Unicode assigns each code point (not just assigned character) values for
+ * many properties.
+ * Most of them are simple boolean flags, or constants from a small enumerated list.
+ * For some properties, values are strings or other relatively more complex types.
+ *
+ * For more information see
+ * "About the Unicode Character Database" (http://www.unicode.org/ucd/)
+ * and the ICU User Guide chapter on Properties (http://icu-project.org/userguide/properties.html).
+ *
+ * Many properties are accessible via generic functions that take a UProperty selector.
+ * - u_hasBinaryProperty() returns a binary value (true/false) per property and code point.
+ * - u_getIntPropertyValue() returns an integer value per property and code point.
+ * For each supported enumerated or catalog property, there is
+ * an enum type for all of the property's values, and
+ * u_getIntPropertyValue() returns the numeric values of those constants.
+ * - u_getBinaryPropertySet() returns a set for each ICU-supported binary property with
+ * all code points for which the property is true.
+ * - u_getIntPropertyMap() returns a map for each
+ * ICU-supported enumerated/catalog/int-valued property which
+ * maps all Unicode code points to their values for that property.
+ *
+ * Many functions are designed to match java.lang.Character functions.
+ * See the individual function documentation,
+ * and see the JDK 1.4 java.lang.Character documentation
+ * at http://java.sun.com/j2se/1.4/docs/api/java/lang/Character.html
+ *
+ * There are also functions that provide easy migration from C/POSIX functions
+ * like isblank(). Their use is generally discouraged because the C/POSIX
+ * standards do not define their semantics beyond the ASCII range, which means
+ * that different implementations exhibit very different behavior.
+ * Instead, Unicode properties should be used directly.
+ *
+ * There are also only a few, broad C/POSIX character classes, and they tend
+ * to be used for conflicting purposes. For example, the "isalpha()" class
+ * is sometimes used to determine word boundaries, while a more sophisticated
+ * approach would at least distinguish initial letters from continuation
+ * characters (the latter including combining marks).
+ * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
+ * Another example: There is no "istitle()" class for titlecase characters.
+ *
+ * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
+ * ICU implements them according to the Standard Recommendations in
+ * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
+ * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
+ *
+ * API access for C/POSIX character classes is as follows:
+ * - alpha: u_isUAlphabetic(c) or u_hasBinaryProperty(c, UCHAR_ALPHABETIC)
+ * - lower: u_isULowercase(c) or u_hasBinaryProperty(c, UCHAR_LOWERCASE)
+ * - upper: u_isUUppercase(c) or u_hasBinaryProperty(c, UCHAR_UPPERCASE)
+ * - punct: u_ispunct(c)
+ * - digit: u_isdigit(c) or u_charType(c)==U_DECIMAL_DIGIT_NUMBER
+ * - xdigit: u_isxdigit(c) or u_hasBinaryProperty(c, UCHAR_POSIX_XDIGIT)
+ * - alnum: u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM)
+ * - space: u_isUWhiteSpace(c) or u_hasBinaryProperty(c, UCHAR_WHITE_SPACE)
+ * - blank: u_isblank(c) or u_hasBinaryProperty(c, UCHAR_POSIX_BLANK)
+ * - cntrl: u_charType(c)==U_CONTROL_CHAR
+ * - graph: u_hasBinaryProperty(c, UCHAR_POSIX_GRAPH)
+ * - print: u_hasBinaryProperty(c, UCHAR_POSIX_PRINT)
+ *
+ * Note: Some of the u_isxyz() functions in uchar.h predate, and do not match,
+ * the Standard Recommendations in UTS #18. Instead, they match Java
+ * functions according to their API documentation.
+ *
+ * \htmlonly
+ * The C/POSIX character classes are also available in UnicodeSet patterns,
+ * using patterns like [:graph:] or \p{graph}.
+ * \endhtmlonly
+ *
+ * Note: There are several ICU whitespace functions.
+ * Comparison:
+ * - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
+ * most of general categories "Z" (separators) + most whitespace ISO controls
+ * (including no-break spaces, but excluding IS1..IS4)
+ * - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
+ * - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces)
+ * - u_isspace: Z + whitespace ISO controls (including no-break spaces)
+ * - u_isblank: "horizontal spaces" = TAB + Zs
+ */
+
+/**
+ * Constants.
+ */
+
+/** The lowest Unicode code point value. Code points are non-negative. @stable ICU 2.0 */
+#define UCHAR_MIN_VALUE 0
+
+/**
+ * The highest Unicode code point value (scalar value) according to
+ * The Unicode Standard. This is a 21-bit value (20.1 bits, rounded up).
+ * For a single character, UChar32 is a simple type that can hold any code point value.
+ *
+ * @see UChar32
+ * @stable ICU 2.0
+ */
+#define UCHAR_MAX_VALUE 0x10ffff
+
+/**
+ * Get a single-bit bit set (a flag) from a bit number 0..31.
+ * @stable ICU 2.1
+ */
+#define U_MASK(x) ((uint32_t)1<<(x))
+
+/**
+ * Selection constants for Unicode properties.
+ * These constants are used in functions like u_hasBinaryProperty to select
+ * one of the Unicode properties.
+ *
+ * The properties APIs are intended to reflect Unicode properties as defined
+ * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
+ *
+ * For details about the properties see
+ * UAX #44: Unicode Character Database (http://www.unicode.org/reports/tr44/).
+ *
+ * Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2,
+ * then properties marked with "new in Unicode 3.2" are not or not fully available.
+ * Check u_getUnicodeVersion to be sure.
+ *
+ * @see u_hasBinaryProperty
+ * @see u_getIntPropertyValue
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.1
+ */
+typedef enum UProperty {
+ /*
+ * Note: UProperty constants are parsed by preparseucd.py.
+ * It matches lines like
+ * UCHAR_<Unicode property name>=<integer>,
+ */
+
+ /* Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that
+ debuggers display UCHAR_ALPHABETIC as the symbolic name for 0,
+ rather than UCHAR_BINARY_START. Likewise for other *_START
+ identifiers. */
+
+ /** Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha.
+ Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic @stable ICU 2.1 */
+ UCHAR_ALPHABETIC=0,
+ /** First constant for binary Unicode properties. @stable ICU 2.1 */
+ UCHAR_BINARY_START=UCHAR_ALPHABETIC,
+ /** Binary property ASCII_Hex_Digit. 0-9 A-F a-f @stable ICU 2.1 */
+ UCHAR_ASCII_HEX_DIGIT=1,
+ /** Binary property Bidi_Control.
+ Format controls which have specific functions
+ in the Bidi Algorithm. @stable ICU 2.1 */
+ UCHAR_BIDI_CONTROL=2,
+ /** Binary property Bidi_Mirrored.
+ Characters that may change display in RTL text.
+ Same as u_isMirrored.
+ See Bidi Algorithm, UTR 9. @stable ICU 2.1 */
+ UCHAR_BIDI_MIRRORED=3,
+ /** Binary property Dash. Variations of dashes. @stable ICU 2.1 */
+ UCHAR_DASH=4,
+ /** Binary property Default_Ignorable_Code_Point (new in Unicode 3.2).
+ Ignorable in most processing.
+ <2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space) @stable ICU 2.1 */
+ UCHAR_DEFAULT_IGNORABLE_CODE_POINT=5,
+ /** Binary property Deprecated (new in Unicode 3.2).
+ The usage of deprecated characters is strongly discouraged. @stable ICU 2.1 */
+ UCHAR_DEPRECATED=6,
+ /** Binary property Diacritic. Characters that linguistically modify
+ the meaning of another character to which they apply. @stable ICU 2.1 */
+ UCHAR_DIACRITIC=7,
+ /** Binary property Extender.
+ Extend the value or shape of a preceding alphabetic character,
+ e.g., length and iteration marks. @stable ICU 2.1 */
+ UCHAR_EXTENDER=8,
+ /** Binary property Full_Composition_Exclusion.
+ CompositionExclusions.txt+Singleton Decompositions+
+ Non-Starter Decompositions. @stable ICU 2.1 */
+ UCHAR_FULL_COMPOSITION_EXCLUSION=9,
+ /** Binary property Grapheme_Base (new in Unicode 3.2).
+ For programmatic determination of grapheme cluster boundaries.
+ [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ @stable ICU 2.1 */
+ UCHAR_GRAPHEME_BASE=10,
+ /** Binary property Grapheme_Extend (new in Unicode 3.2).
+ For programmatic determination of grapheme cluster boundaries.
+ Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ @stable ICU 2.1 */
+ UCHAR_GRAPHEME_EXTEND=11,
+ /** Binary property Grapheme_Link (new in Unicode 3.2).
+ For programmatic determination of grapheme cluster boundaries. @stable ICU 2.1 */
+ UCHAR_GRAPHEME_LINK=12,
+ /** Binary property Hex_Digit.
+ Characters commonly used for hexadecimal numbers. @stable ICU 2.1 */
+ UCHAR_HEX_DIGIT=13,
+ /** Binary property Hyphen. Dashes used to mark connections
+ between pieces of words, plus the Katakana middle dot. @stable ICU 2.1 */
+ UCHAR_HYPHEN=14,
+ /** Binary property ID_Continue.
+ Characters that can continue an identifier.
+ DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out."
+ ID_Start+Mn+Mc+Nd+Pc @stable ICU 2.1 */
+ UCHAR_ID_CONTINUE=15,
+ /** Binary property ID_Start.
+ Characters that can start an identifier.
+ Lu+Ll+Lt+Lm+Lo+Nl @stable ICU 2.1 */
+ UCHAR_ID_START=16,
+ /** Binary property Ideographic.
+ CJKV ideographs. @stable ICU 2.1 */
+ UCHAR_IDEOGRAPHIC=17,
+ /** Binary property IDS_Binary_Operator (new in Unicode 3.2).
+ For programmatic determination of
+ Ideographic Description Sequences. @stable ICU 2.1 */
+ UCHAR_IDS_BINARY_OPERATOR=18,
+ /** Binary property IDS_Trinary_Operator (new in Unicode 3.2).
+ For programmatic determination of
+ Ideographic Description Sequences. @stable ICU 2.1 */
+ UCHAR_IDS_TRINARY_OPERATOR=19,
+ /** Binary property Join_Control.
+ Format controls for cursive joining and ligation. @stable ICU 2.1 */
+ UCHAR_JOIN_CONTROL=20,
+ /** Binary property Logical_Order_Exception (new in Unicode 3.2).
+ Characters that do not use logical order and
+ require special handling in most processing. @stable ICU 2.1 */
+ UCHAR_LOGICAL_ORDER_EXCEPTION=21,
+ /** Binary property Lowercase. Same as u_isULowercase, different from u_islower.
+ Ll+Other_Lowercase @stable ICU 2.1 */
+ UCHAR_LOWERCASE=22,
+ /** Binary property Math. Sm+Other_Math @stable ICU 2.1 */
+ UCHAR_MATH=23,
+ /** Binary property Noncharacter_Code_Point.
+ Code points that are explicitly defined as illegal
+ for the encoding of characters. @stable ICU 2.1 */
+ UCHAR_NONCHARACTER_CODE_POINT=24,
+ /** Binary property Quotation_Mark. @stable ICU 2.1 */
+ UCHAR_QUOTATION_MARK=25,
+ /** Binary property Radical (new in Unicode 3.2).
+ For programmatic determination of
+ Ideographic Description Sequences. @stable ICU 2.1 */
+ UCHAR_RADICAL=26,
+ /** Binary property Soft_Dotted (new in Unicode 3.2).
+ Characters with a "soft dot", like i or j.
+ An accent placed on these characters causes
+ the dot to disappear. @stable ICU 2.1 */
+ UCHAR_SOFT_DOTTED=27,
+ /** Binary property Terminal_Punctuation.
+ Punctuation characters that generally mark
+ the end of textual units. @stable ICU 2.1 */
+ UCHAR_TERMINAL_PUNCTUATION=28,
+ /** Binary property Unified_Ideograph (new in Unicode 3.2).
+ For programmatic determination of
+ Ideographic Description Sequences. @stable ICU 2.1 */
+ UCHAR_UNIFIED_IDEOGRAPH=29,
+ /** Binary property Uppercase. Same as u_isUUppercase, different from u_isupper.
+ Lu+Other_Uppercase @stable ICU 2.1 */
+ UCHAR_UPPERCASE=30,
+ /** Binary property White_Space.
+ Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace.
+ Space characters+TAB+CR+LF-ZWSP-ZWNBSP @stable ICU 2.1 */
+ UCHAR_WHITE_SPACE=31,
+ /** Binary property XID_Continue.
+ ID_Continue modified to allow closure under
+ normalization forms NFKC and NFKD. @stable ICU 2.1 */
+ UCHAR_XID_CONTINUE=32,
+ /** Binary property XID_Start. ID_Start modified to allow
+ closure under normalization forms NFKC and NFKD. @stable ICU 2.1 */
+ UCHAR_XID_START=33,
+ /** Binary property Case_Sensitive. Either the source of a case
+ mapping or _in_ the target of a case mapping. Not the same as
+ the general category Cased_Letter. @stable ICU 2.6 */
+ UCHAR_CASE_SENSITIVE=34,
+ /** Binary property STerm (new in Unicode 4.0.1).
+ Sentence Terminal. Used in UAX #29: Text Boundaries
+ (http://www.unicode.org/reports/tr29/)
+ @stable ICU 3.0 */
+ UCHAR_S_TERM=35,
+ /** Binary property Variation_Selector (new in Unicode 4.0.1).
+ Indicates all those characters that qualify as Variation Selectors.
+ For details on the behavior of these characters,
+ see StandardizedVariants.html and 15.6 Variation Selectors.
+ @stable ICU 3.0 */
+ UCHAR_VARIATION_SELECTOR=36,
+ /** Binary property NFD_Inert.
+ ICU-specific property for characters that are inert under NFD,
+ i.e., they do not interact with adjacent characters.
+ See the documentation for the Normalizer2 class and the
+ Normalizer2::isInert() method.
+ @stable ICU 3.0 */
+ UCHAR_NFD_INERT=37,
+ /** Binary property NFKD_Inert.
+ ICU-specific property for characters that are inert under NFKD,
+ i.e., they do not interact with adjacent characters.
+ See the documentation for the Normalizer2 class and the
+ Normalizer2::isInert() method.
+ @stable ICU 3.0 */
+ UCHAR_NFKD_INERT=38,
+ /** Binary property NFC_Inert.
+ ICU-specific property for characters that are inert under NFC,
+ i.e., they do not interact with adjacent characters.
+ See the documentation for the Normalizer2 class and the
+ Normalizer2::isInert() method.
+ @stable ICU 3.0 */
+ UCHAR_NFC_INERT=39,
+ /** Binary property NFKC_Inert.
+ ICU-specific property for characters that are inert under NFKC,
+ i.e., they do not interact with adjacent characters.
+ See the documentation for the Normalizer2 class and the
+ Normalizer2::isInert() method.
+ @stable ICU 3.0 */
+ UCHAR_NFKC_INERT=40,
+ /** Binary Property Segment_Starter.
+ ICU-specific property for characters that are starters in terms of
+ Unicode normalization and combining character sequences.
+ They have ccc=0 and do not occur in non-initial position of the
+ canonical decomposition of any character
+ (like a-umlaut in NFD and a Jamo T in an NFD(Hangul LVT)).
+ ICU uses this property for segmenting a string for generating a set of
+ canonically equivalent strings, e.g. for canonical closure while
+ processing collation tailoring rules.
+ @stable ICU 3.0 */
+ UCHAR_SEGMENT_STARTER=41,
+ /** Binary property Pattern_Syntax (new in Unicode 4.1).
+ See UAX #31 Identifier and Pattern Syntax
+ (http://www.unicode.org/reports/tr31/)
+ @stable ICU 3.4 */
+ UCHAR_PATTERN_SYNTAX=42,
+ /** Binary property Pattern_White_Space (new in Unicode 4.1).
+ See UAX #31 Identifier and Pattern Syntax
+ (http://www.unicode.org/reports/tr31/)
+ @stable ICU 3.4 */
+ UCHAR_PATTERN_WHITE_SPACE=43,
+ /** Binary property alnum (a C/POSIX character class).
+ Implemented according to the UTS #18 Annex C Standard Recommendation.
+ See the uchar.h file documentation.
+ @stable ICU 3.4 */
+ UCHAR_POSIX_ALNUM=44,
+ /** Binary property blank (a C/POSIX character class).
+ Implemented according to the UTS #18 Annex C Standard Recommendation.
+ See the uchar.h file documentation.
+ @stable ICU 3.4 */
+ UCHAR_POSIX_BLANK=45,
+ /** Binary property graph (a C/POSIX character class).
+ Implemented according to the UTS #18 Annex C Standard Recommendation.
+ See the uchar.h file documentation.
+ @stable ICU 3.4 */
+ UCHAR_POSIX_GRAPH=46,
+ /** Binary property print (a C/POSIX character class).
+ Implemented according to the UTS #18 Annex C Standard Recommendation.
+ See the uchar.h file documentation.
+ @stable ICU 3.4 */
+ UCHAR_POSIX_PRINT=47,
+ /** Binary property xdigit (a C/POSIX character class).
+ Implemented according to the UTS #18 Annex C Standard Recommendation.
+ See the uchar.h file documentation.
+ @stable ICU 3.4 */
+ UCHAR_POSIX_XDIGIT=48,
+ /** Binary property Cased. For Lowercase, Uppercase and Titlecase characters. @stable ICU 4.4 */
+ UCHAR_CASED=49,
+ /** Binary property Case_Ignorable. Used in context-sensitive case mappings. @stable ICU 4.4 */
+ UCHAR_CASE_IGNORABLE=50,
+ /** Binary property Changes_When_Lowercased. @stable ICU 4.4 */
+ UCHAR_CHANGES_WHEN_LOWERCASED=51,
+ /** Binary property Changes_When_Uppercased. @stable ICU 4.4 */
+ UCHAR_CHANGES_WHEN_UPPERCASED=52,
+ /** Binary property Changes_When_Titlecased. @stable ICU 4.4 */
+ UCHAR_CHANGES_WHEN_TITLECASED=53,
+ /** Binary property Changes_When_Casefolded. @stable ICU 4.4 */
+ UCHAR_CHANGES_WHEN_CASEFOLDED=54,
+ /** Binary property Changes_When_Casemapped. @stable ICU 4.4 */
+ UCHAR_CHANGES_WHEN_CASEMAPPED=55,
+ /** Binary property Changes_When_NFKC_Casefolded. @stable ICU 4.4 */
+ UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED=56,
+ /**
+ * Binary property Emoji.
+ * See http://www.unicode.org/reports/tr51/#Emoji_Properties
+ *
+ * @stable ICU 57
+ */
+ UCHAR_EMOJI=57,
+ /**
+ * Binary property Emoji_Presentation.
+ * See http://www.unicode.org/reports/tr51/#Emoji_Properties
+ *
+ * @stable ICU 57
+ */
+ UCHAR_EMOJI_PRESENTATION=58,
+ /**
+ * Binary property Emoji_Modifier.
+ * See http://www.unicode.org/reports/tr51/#Emoji_Properties
+ *
+ * @stable ICU 57
+ */
+ UCHAR_EMOJI_MODIFIER=59,
+ /**
+ * Binary property Emoji_Modifier_Base.
+ * See http://www.unicode.org/reports/tr51/#Emoji_Properties
+ *
+ * @stable ICU 57
+ */
+ UCHAR_EMOJI_MODIFIER_BASE=60,
+ /**
+ * Binary property Emoji_Component.
+ * See http://www.unicode.org/reports/tr51/#Emoji_Properties
+ *
+ * @stable ICU 60
+ */
+ UCHAR_EMOJI_COMPONENT=61,
+ /**
+ * Binary property Regional_Indicator.
+ * @stable ICU 60
+ */
+ UCHAR_REGIONAL_INDICATOR=62,
+ /**
+ * Binary property Prepended_Concatenation_Mark.
+ * @stable ICU 60
+ */
+ UCHAR_PREPENDED_CONCATENATION_MARK=63,
+ /**
+ * Binary property Extended_Pictographic.
+ * See http://www.unicode.org/reports/tr51/#Emoji_Properties
+ *
+ * @stable ICU 62
+ */
+ UCHAR_EXTENDED_PICTOGRAPHIC=64,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the last constant for binary Unicode properties.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UCHAR_BINARY_LIMIT,
+#endif // U_HIDE_DEPRECATED_API
+
+ /** Enumerated property Bidi_Class.
+ Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */
+ UCHAR_BIDI_CLASS=0x1000,
+ /** First constant for enumerated/integer Unicode properties. @stable ICU 2.2 */
+ UCHAR_INT_START=UCHAR_BIDI_CLASS,
+ /** Enumerated property Block.
+ Same as ublock_getCode, returns UBlockCode values. @stable ICU 2.2 */
+ UCHAR_BLOCK=0x1001,
+ /** Enumerated property Canonical_Combining_Class.
+ Same as u_getCombiningClass, returns 8-bit numeric values. @stable ICU 2.2 */
+ UCHAR_CANONICAL_COMBINING_CLASS=0x1002,
+ /** Enumerated property Decomposition_Type.
+ Returns UDecompositionType values. @stable ICU 2.2 */
+ UCHAR_DECOMPOSITION_TYPE=0x1003,
+ /** Enumerated property East_Asian_Width.
+ See http://www.unicode.org/reports/tr11/
+ Returns UEastAsianWidth values. @stable ICU 2.2 */
+ UCHAR_EAST_ASIAN_WIDTH=0x1004,
+ /** Enumerated property General_Category.
+ Same as u_charType, returns UCharCategory values. @stable ICU 2.2 */
+ UCHAR_GENERAL_CATEGORY=0x1005,
+ /** Enumerated property Joining_Group.
+ Returns UJoiningGroup values. @stable ICU 2.2 */
+ UCHAR_JOINING_GROUP=0x1006,
+ /** Enumerated property Joining_Type.
+ Returns UJoiningType values. @stable ICU 2.2 */
+ UCHAR_JOINING_TYPE=0x1007,
+ /** Enumerated property Line_Break.
+ Returns ULineBreak values. @stable ICU 2.2 */
+ UCHAR_LINE_BREAK=0x1008,
+ /** Enumerated property Numeric_Type.
+ Returns UNumericType values. @stable ICU 2.2 */
+ UCHAR_NUMERIC_TYPE=0x1009,
+ /** Enumerated property Script.
+ Same as uscript_getScript, returns UScriptCode values. @stable ICU 2.2 */
+ UCHAR_SCRIPT=0x100A,
+ /** Enumerated property Hangul_Syllable_Type, new in Unicode 4.
+ Returns UHangulSyllableType values. @stable ICU 2.6 */
+ UCHAR_HANGUL_SYLLABLE_TYPE=0x100B,
+ /** Enumerated property NFD_Quick_Check.
+ Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+ UCHAR_NFD_QUICK_CHECK=0x100C,
+ /** Enumerated property NFKD_Quick_Check.
+ Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+ UCHAR_NFKD_QUICK_CHECK=0x100D,
+ /** Enumerated property NFC_Quick_Check.
+ Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+ UCHAR_NFC_QUICK_CHECK=0x100E,
+ /** Enumerated property NFKC_Quick_Check.
+ Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+ UCHAR_NFKC_QUICK_CHECK=0x100F,
+ /** Enumerated property Lead_Canonical_Combining_Class.
+ ICU-specific property for the ccc of the first code point
+ of the decomposition, or lccc(c)=ccc(NFD(c)[0]).
+ Useful for checking for canonically ordered text;
+ see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
+ Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */
+ UCHAR_LEAD_CANONICAL_COMBINING_CLASS=0x1010,
+ /** Enumerated property Trail_Canonical_Combining_Class.
+ ICU-specific property for the ccc of the last code point
+ of the decomposition, or tccc(c)=ccc(NFD(c)[last]).
+ Useful for checking for canonically ordered text;
+ see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
+ Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */
+ UCHAR_TRAIL_CANONICAL_COMBINING_CLASS=0x1011,
+ /** Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1).
+ Used in UAX #29: Text Boundaries
+ (http://www.unicode.org/reports/tr29/)
+ Returns UGraphemeClusterBreak values. @stable ICU 3.4 */
+ UCHAR_GRAPHEME_CLUSTER_BREAK=0x1012,
+ /** Enumerated property Sentence_Break (new in Unicode 4.1).
+ Used in UAX #29: Text Boundaries
+ (http://www.unicode.org/reports/tr29/)
+ Returns USentenceBreak values. @stable ICU 3.4 */
+ UCHAR_SENTENCE_BREAK=0x1013,
+ /** Enumerated property Word_Break (new in Unicode 4.1).
+ Used in UAX #29: Text Boundaries
+ (http://www.unicode.org/reports/tr29/)
+ Returns UWordBreakValues values. @stable ICU 3.4 */
+ UCHAR_WORD_BREAK=0x1014,
+ /** Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
+ Used in UAX #9: Unicode Bidirectional Algorithm
+ (http://www.unicode.org/reports/tr9/)
+ Returns UBidiPairedBracketType values. @stable ICU 52 */
+ UCHAR_BIDI_PAIRED_BRACKET_TYPE=0x1015,
+ /**
+ * Enumerated property Indic_Positional_Category.
+ * New in Unicode 6.0 as provisional property Indic_Matra_Category;
+ * renamed and changed to informative in Unicode 8.0.
+ * See http://www.unicode.org/reports/tr44/#IndicPositionalCategory.txt
+ * @stable ICU 63
+ */
+ UCHAR_INDIC_POSITIONAL_CATEGORY=0x1016,
+ /**
+ * Enumerated property Indic_Syllabic_Category.
+ * New in Unicode 6.0 as provisional; informative since Unicode 8.0.
+ * See http://www.unicode.org/reports/tr44/#IndicSyllabicCategory.txt
+ * @stable ICU 63
+ */
+ UCHAR_INDIC_SYLLABIC_CATEGORY=0x1017,
+ /**
+ * Enumerated property Vertical_Orientation.
+ * Used for UAX #50 Unicode Vertical Text Layout (https://www.unicode.org/reports/tr50/).
+ * New as a UCD property in Unicode 10.0.
+ * @stable ICU 63
+ */
+ UCHAR_VERTICAL_ORIENTATION=0x1018,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the last constant for enumerated/integer Unicode properties.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UCHAR_INT_LIMIT=0x1019,
+#endif // U_HIDE_DEPRECATED_API
+
+ /** Bitmask property General_Category_Mask.
+ This is the General_Category property returned as a bit mask.
+ When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)),
+ returns bit masks for UCharCategory values where exactly one bit is set.
+ When used with u_getPropertyValueName() and u_getPropertyValueEnum(),
+ a multi-bit mask is used for sets of categories like "Letters".
+ Mask values should be cast to uint32_t.
+ @stable ICU 2.4 */
+ UCHAR_GENERAL_CATEGORY_MASK=0x2000,
+ /** First constant for bit-mask Unicode properties. @stable ICU 2.4 */
+ UCHAR_MASK_START=UCHAR_GENERAL_CATEGORY_MASK,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the last constant for bit-mask Unicode properties.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UCHAR_MASK_LIMIT=0x2001,
+#endif // U_HIDE_DEPRECATED_API
+
+ /** Double property Numeric_Value.
+ Corresponds to u_getNumericValue. @stable ICU 2.4 */
+ UCHAR_NUMERIC_VALUE=0x3000,
+ /** First constant for double Unicode properties. @stable ICU 2.4 */
+ UCHAR_DOUBLE_START=UCHAR_NUMERIC_VALUE,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the last constant for double Unicode properties.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UCHAR_DOUBLE_LIMIT=0x3001,
+#endif // U_HIDE_DEPRECATED_API
+
+ /** String property Age.
+ Corresponds to u_charAge. @stable ICU 2.4 */
+ UCHAR_AGE=0x4000,
+ /** First constant for string Unicode properties. @stable ICU 2.4 */
+ UCHAR_STRING_START=UCHAR_AGE,
+ /** String property Bidi_Mirroring_Glyph.
+ Corresponds to u_charMirror. @stable ICU 2.4 */
+ UCHAR_BIDI_MIRRORING_GLYPH=0x4001,
+ /** String property Case_Folding.
+ Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */
+ UCHAR_CASE_FOLDING=0x4002,
+#ifndef U_HIDE_DEPRECATED_API
+ /** Deprecated string property ISO_Comment.
+ Corresponds to u_getISOComment. @deprecated ICU 49 */
+ UCHAR_ISO_COMMENT=0x4003,
+#endif /* U_HIDE_DEPRECATED_API */
+ /** String property Lowercase_Mapping.
+ Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */
+ UCHAR_LOWERCASE_MAPPING=0x4004,
+ /** String property Name.
+ Corresponds to u_charName. @stable ICU 2.4 */
+ UCHAR_NAME=0x4005,
+ /** String property Simple_Case_Folding.
+ Corresponds to u_foldCase. @stable ICU 2.4 */
+ UCHAR_SIMPLE_CASE_FOLDING=0x4006,
+ /** String property Simple_Lowercase_Mapping.
+ Corresponds to u_tolower. @stable ICU 2.4 */
+ UCHAR_SIMPLE_LOWERCASE_MAPPING=0x4007,
+ /** String property Simple_Titlecase_Mapping.
+ Corresponds to u_totitle. @stable ICU 2.4 */
+ UCHAR_SIMPLE_TITLECASE_MAPPING=0x4008,
+ /** String property Simple_Uppercase_Mapping.
+ Corresponds to u_toupper. @stable ICU 2.4 */
+ UCHAR_SIMPLE_UPPERCASE_MAPPING=0x4009,
+ /** String property Titlecase_Mapping.
+ Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */
+ UCHAR_TITLECASE_MAPPING=0x400A,
+#ifndef U_HIDE_DEPRECATED_API
+ /** String property Unicode_1_Name.
+ This property is of little practical value.
+ Beginning with ICU 49, ICU APIs return an empty string for this property.
+ Corresponds to u_charName(U_UNICODE_10_CHAR_NAME). @deprecated ICU 49 */
+ UCHAR_UNICODE_1_NAME=0x400B,
+#endif /* U_HIDE_DEPRECATED_API */
+ /** String property Uppercase_Mapping.
+ Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */
+ UCHAR_UPPERCASE_MAPPING=0x400C,
+ /** String property Bidi_Paired_Bracket (new in Unicode 6.3).
+ Corresponds to u_getBidiPairedBracket. @stable ICU 52 */
+ UCHAR_BIDI_PAIRED_BRACKET=0x400D,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the last constant for string Unicode properties.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UCHAR_STRING_LIMIT=0x400E,
+#endif // U_HIDE_DEPRECATED_API
+
+ /** Miscellaneous property Script_Extensions (new in Unicode 6.0).
+ Some characters are commonly used in multiple scripts.
+ For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
+ Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h.
+ @stable ICU 4.6 */
+ UCHAR_SCRIPT_EXTENSIONS=0x7000,
+ /** First constant for Unicode properties with unusual value types. @stable ICU 4.6 */
+ UCHAR_OTHER_PROPERTY_START=UCHAR_SCRIPT_EXTENSIONS,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the last constant for Unicode properties with unusual value types.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UCHAR_OTHER_PROPERTY_LIMIT=0x7001,
+#endif // U_HIDE_DEPRECATED_API
+
+ /** Represents a nonexistent or invalid property or property value. @stable ICU 2.4 */
+ UCHAR_INVALID_CODE = -1
+} UProperty;
+
+/**
+ * Data for enumerated Unicode general category types.
+ * See http://www.unicode.org/Public/UNIDATA/UnicodeData.html .
+ * @stable ICU 2.0
+ */
+typedef enum UCharCategory
+{
+ /*
+ * Note: UCharCategory constants and their API comments are parsed by preparseucd.py.
+ * It matches pairs of lines like
+ * / ** <Unicode 2-letter General_Category value> comment... * /
+ * U_<[A-Z_]+> = <integer>,
+ */
+
+ /** Non-category for unassigned and non-character code points. @stable ICU 2.0 */
+ U_UNASSIGNED = 0,
+ /** Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) @stable ICU 2.0 */
+ U_GENERAL_OTHER_TYPES = 0,
+ /** Lu @stable ICU 2.0 */
+ U_UPPERCASE_LETTER = 1,
+ /** Ll @stable ICU 2.0 */
+ U_LOWERCASE_LETTER = 2,
+ /** Lt @stable ICU 2.0 */
+ U_TITLECASE_LETTER = 3,
+ /** Lm @stable ICU 2.0 */
+ U_MODIFIER_LETTER = 4,
+ /** Lo @stable ICU 2.0 */
+ U_OTHER_LETTER = 5,
+ /** Mn @stable ICU 2.0 */
+ U_NON_SPACING_MARK = 6,
+ /** Me @stable ICU 2.0 */
+ U_ENCLOSING_MARK = 7,
+ /** Mc @stable ICU 2.0 */
+ U_COMBINING_SPACING_MARK = 8,
+ /** Nd @stable ICU 2.0 */
+ U_DECIMAL_DIGIT_NUMBER = 9,
+ /** Nl @stable ICU 2.0 */
+ U_LETTER_NUMBER = 10,
+ /** No @stable ICU 2.0 */
+ U_OTHER_NUMBER = 11,
+ /** Zs @stable ICU 2.0 */
+ U_SPACE_SEPARATOR = 12,
+ /** Zl @stable ICU 2.0 */
+ U_LINE_SEPARATOR = 13,
+ /** Zp @stable ICU 2.0 */
+ U_PARAGRAPH_SEPARATOR = 14,
+ /** Cc @stable ICU 2.0 */
+ U_CONTROL_CHAR = 15,
+ /** Cf @stable ICU 2.0 */
+ U_FORMAT_CHAR = 16,
+ /** Co @stable ICU 2.0 */
+ U_PRIVATE_USE_CHAR = 17,
+ /** Cs @stable ICU 2.0 */
+ U_SURROGATE = 18,
+ /** Pd @stable ICU 2.0 */
+ U_DASH_PUNCTUATION = 19,
+ /** Ps @stable ICU 2.0 */
+ U_START_PUNCTUATION = 20,
+ /** Pe @stable ICU 2.0 */
+ U_END_PUNCTUATION = 21,
+ /** Pc @stable ICU 2.0 */
+ U_CONNECTOR_PUNCTUATION = 22,
+ /** Po @stable ICU 2.0 */
+ U_OTHER_PUNCTUATION = 23,
+ /** Sm @stable ICU 2.0 */
+ U_MATH_SYMBOL = 24,
+ /** Sc @stable ICU 2.0 */
+ U_CURRENCY_SYMBOL = 25,
+ /** Sk @stable ICU 2.0 */
+ U_MODIFIER_SYMBOL = 26,
+ /** So @stable ICU 2.0 */
+ U_OTHER_SYMBOL = 27,
+ /** Pi @stable ICU 2.0 */
+ U_INITIAL_PUNCTUATION = 28,
+ /** Pf @stable ICU 2.0 */
+ U_FINAL_PUNCTUATION = 29,
+ /**
+ * One higher than the last enum UCharCategory constant.
+ * This numeric value is stable (will not change), see
+ * http://www.unicode.org/policies/stability_policy.html#Property_Value
+ *
+ * @stable ICU 2.0
+ */
+ U_CHAR_CATEGORY_COUNT
+} UCharCategory;
+
+/**
+ * U_GC_XX_MASK constants are bit flags corresponding to Unicode
+ * general category values.
+ * For each category, the nth bit is set if the numeric value of the
+ * corresponding UCharCategory constant is n.
+ *
+ * There are also some U_GC_Y_MASK constants for groups of general categories
+ * like L for all letter categories.
+ *
+ * @see u_charType
+ * @see U_GET_GC_MASK
+ * @see UCharCategory
+ * @stable ICU 2.1
+ */
+#define U_GC_CN_MASK U_MASK(U_GENERAL_OTHER_TYPES)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LU_MASK U_MASK(U_UPPERCASE_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LL_MASK U_MASK(U_LOWERCASE_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LT_MASK U_MASK(U_TITLECASE_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LM_MASK U_MASK(U_MODIFIER_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LO_MASK U_MASK(U_OTHER_LETTER)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_MN_MASK U_MASK(U_NON_SPACING_MARK)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ME_MASK U_MASK(U_ENCLOSING_MARK)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_MC_MASK U_MASK(U_COMBINING_SPACING_MARK)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ND_MASK U_MASK(U_DECIMAL_DIGIT_NUMBER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_NL_MASK U_MASK(U_LETTER_NUMBER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_NO_MASK U_MASK(U_OTHER_NUMBER)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ZS_MASK U_MASK(U_SPACE_SEPARATOR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ZL_MASK U_MASK(U_LINE_SEPARATOR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ZP_MASK U_MASK(U_PARAGRAPH_SEPARATOR)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CC_MASK U_MASK(U_CONTROL_CHAR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CF_MASK U_MASK(U_FORMAT_CHAR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CO_MASK U_MASK(U_PRIVATE_USE_CHAR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CS_MASK U_MASK(U_SURROGATE)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PD_MASK U_MASK(U_DASH_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PS_MASK U_MASK(U_START_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PE_MASK U_MASK(U_END_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PC_MASK U_MASK(U_CONNECTOR_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PO_MASK U_MASK(U_OTHER_PUNCTUATION)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SM_MASK U_MASK(U_MATH_SYMBOL)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SC_MASK U_MASK(U_CURRENCY_SYMBOL)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SK_MASK U_MASK(U_MODIFIER_SYMBOL)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SO_MASK U_MASK(U_OTHER_SYMBOL)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PI_MASK U_MASK(U_INITIAL_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PF_MASK U_MASK(U_FINAL_PUNCTUATION)
+
+
+/** Mask constant for multiple UCharCategory bits (L Letters). @stable ICU 2.1 */
+#define U_GC_L_MASK \
+ (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK)
+
+/** Mask constant for multiple UCharCategory bits (LC Cased Letters). @stable ICU 2.1 */
+#define U_GC_LC_MASK \
+ (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK)
+
+/** Mask constant for multiple UCharCategory bits (M Marks). @stable ICU 2.1 */
+#define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK)
+
+/** Mask constant for multiple UCharCategory bits (N Numbers). @stable ICU 2.1 */
+#define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK)
+
+/** Mask constant for multiple UCharCategory bits (Z Separators). @stable ICU 2.1 */
+#define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK)
+
+/** Mask constant for multiple UCharCategory bits (C Others). @stable ICU 2.1 */
+#define U_GC_C_MASK \
+ (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK)
+
+/** Mask constant for multiple UCharCategory bits (P Punctuation). @stable ICU 2.1 */
+#define U_GC_P_MASK \
+ (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \
+ U_GC_PI_MASK|U_GC_PF_MASK)
+
+/** Mask constant for multiple UCharCategory bits (S Symbols). @stable ICU 2.1 */
+#define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK)
+
+/**
+ * This specifies the language directional property of a character set.
+ * @stable ICU 2.0
+ */
+typedef enum UCharDirection {
+ /*
+ * Note: UCharDirection constants and their API comments are parsed by preparseucd.py.
+ * It matches pairs of lines like
+ * / ** <Unicode 1..3-letter Bidi_Class value> comment... * /
+ * U_<[A-Z_]+> = <integer>,
+ */
+
+ /** L @stable ICU 2.0 */
+ U_LEFT_TO_RIGHT = 0,
+ /** R @stable ICU 2.0 */
+ U_RIGHT_TO_LEFT = 1,
+ /** EN @stable ICU 2.0 */
+ U_EUROPEAN_NUMBER = 2,
+ /** ES @stable ICU 2.0 */
+ U_EUROPEAN_NUMBER_SEPARATOR = 3,
+ /** ET @stable ICU 2.0 */
+ U_EUROPEAN_NUMBER_TERMINATOR = 4,
+ /** AN @stable ICU 2.0 */
+ U_ARABIC_NUMBER = 5,
+ /** CS @stable ICU 2.0 */
+ U_COMMON_NUMBER_SEPARATOR = 6,
+ /** B @stable ICU 2.0 */
+ U_BLOCK_SEPARATOR = 7,
+ /** S @stable ICU 2.0 */
+ U_SEGMENT_SEPARATOR = 8,
+ /** WS @stable ICU 2.0 */
+ U_WHITE_SPACE_NEUTRAL = 9,
+ /** ON @stable ICU 2.0 */
+ U_OTHER_NEUTRAL = 10,
+ /** LRE @stable ICU 2.0 */
+ U_LEFT_TO_RIGHT_EMBEDDING = 11,
+ /** LRO @stable ICU 2.0 */
+ U_LEFT_TO_RIGHT_OVERRIDE = 12,
+ /** AL @stable ICU 2.0 */
+ U_RIGHT_TO_LEFT_ARABIC = 13,
+ /** RLE @stable ICU 2.0 */
+ U_RIGHT_TO_LEFT_EMBEDDING = 14,
+ /** RLO @stable ICU 2.0 */
+ U_RIGHT_TO_LEFT_OVERRIDE = 15,
+ /** PDF @stable ICU 2.0 */
+ U_POP_DIRECTIONAL_FORMAT = 16,
+ /** NSM @stable ICU 2.0 */
+ U_DIR_NON_SPACING_MARK = 17,
+ /** BN @stable ICU 2.0 */
+ U_BOUNDARY_NEUTRAL = 18,
+ /** FSI @stable ICU 52 */
+ U_FIRST_STRONG_ISOLATE = 19,
+ /** LRI @stable ICU 52 */
+ U_LEFT_TO_RIGHT_ISOLATE = 20,
+ /** RLI @stable ICU 52 */
+ U_RIGHT_TO_LEFT_ISOLATE = 21,
+ /** PDI @stable ICU 52 */
+ U_POP_DIRECTIONAL_ISOLATE = 22,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest UCharDirection value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_CHAR_DIRECTION_COUNT
+#endif // U_HIDE_DEPRECATED_API
+} UCharDirection;
+
+/**
+ * Bidi Paired Bracket Type constants.
+ *
+ * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE
+ * @stable ICU 52
+ */
+typedef enum UBidiPairedBracketType {
+ /*
+ * Note: UBidiPairedBracketType constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_BPT_<Unicode Bidi_Paired_Bracket_Type value name>
+ */
+
+ /** Not a paired bracket. @stable ICU 52 */
+ U_BPT_NONE,
+ /** Open paired bracket. @stable ICU 52 */
+ U_BPT_OPEN,
+ /** Close paired bracket. @stable ICU 52 */
+ U_BPT_CLOSE,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UBidiPairedBracketType value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_BPT_COUNT /* 3 */
+#endif // U_HIDE_DEPRECATED_API
+} UBidiPairedBracketType;
+
+/**
+ * Constants for Unicode blocks, see the Unicode Data file Blocks.txt
+ * @stable ICU 2.0
+ */
+enum UBlockCode {
+ /*
+ * Note: UBlockCode constants are parsed by preparseucd.py.
+ * It matches lines like
+ * UBLOCK_<Unicode Block value name> = <integer>,
+ */
+
+ /** New No_Block value in Unicode 4. @stable ICU 2.6 */
+ UBLOCK_NO_BLOCK = 0, /*[none]*/ /* Special range indicating No_Block */
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BASIC_LATIN = 1, /*[0000]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LATIN_1_SUPPLEMENT=2, /*[0080]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LATIN_EXTENDED_A =3, /*[0100]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LATIN_EXTENDED_B =4, /*[0180]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_IPA_EXTENSIONS =5, /*[0250]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SPACING_MODIFIER_LETTERS =6, /*[02B0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_COMBINING_DIACRITICAL_MARKS =7, /*[0300]*/
+
+ /**
+ * Unicode 3.2 renames this block to "Greek and Coptic".
+ * @stable ICU 2.0
+ */
+ UBLOCK_GREEK =8, /*[0370]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CYRILLIC =9, /*[0400]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ARMENIAN =10, /*[0530]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HEBREW =11, /*[0590]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ARABIC =12, /*[0600]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SYRIAC =13, /*[0700]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_THAANA =14, /*[0780]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_DEVANAGARI =15, /*[0900]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BENGALI =16, /*[0980]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GURMUKHI =17, /*[0A00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GUJARATI =18, /*[0A80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ORIYA =19, /*[0B00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_TAMIL =20, /*[0B80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_TELUGU =21, /*[0C00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_KANNADA =22, /*[0C80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MALAYALAM =23, /*[0D00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SINHALA =24, /*[0D80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_THAI =25, /*[0E00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LAO =26, /*[0E80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_TIBETAN =27, /*[0F00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MYANMAR =28, /*[1000]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GEORGIAN =29, /*[10A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HANGUL_JAMO =30, /*[1100]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ETHIOPIC =31, /*[1200]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CHEROKEE =32, /*[13A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33, /*[1400]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_OGHAM =34, /*[1680]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_RUNIC =35, /*[16A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_KHMER =36, /*[1780]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MONGOLIAN =37, /*[1800]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LATIN_EXTENDED_ADDITIONAL =38, /*[1E00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GREEK_EXTENDED =39, /*[1F00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GENERAL_PUNCTUATION =40, /*[2000]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41, /*[2070]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CURRENCY_SYMBOLS =42, /*[20A0]*/
+
+ /**
+ * Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols".
+ * @stable ICU 2.0
+ */
+ UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43, /*[20D0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LETTERLIKE_SYMBOLS =44, /*[2100]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_NUMBER_FORMS =45, /*[2150]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ARROWS =46, /*[2190]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MATHEMATICAL_OPERATORS =47, /*[2200]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MISCELLANEOUS_TECHNICAL =48, /*[2300]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CONTROL_PICTURES =49, /*[2400]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50, /*[2440]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ENCLOSED_ALPHANUMERICS =51, /*[2460]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BOX_DRAWING =52, /*[2500]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BLOCK_ELEMENTS =53, /*[2580]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GEOMETRIC_SHAPES =54, /*[25A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MISCELLANEOUS_SYMBOLS =55, /*[2600]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_DINGBATS =56, /*[2700]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BRAILLE_PATTERNS =57, /*[2800]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_RADICALS_SUPPLEMENT =58, /*[2E80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_KANGXI_RADICALS =59, /*[2F00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60, /*[2FF0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61, /*[3000]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HIRAGANA =62, /*[3040]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_KATAKANA =63, /*[30A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BOPOMOFO =64, /*[3100]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HANGUL_COMPATIBILITY_JAMO =65, /*[3130]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_KANBUN =66, /*[3190]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BOPOMOFO_EXTENDED =67, /*[31A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68, /*[3200]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_COMPATIBILITY =69, /*[3300]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70, /*[3400]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71, /*[4E00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_YI_SYLLABLES =72, /*[A000]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_YI_RADICALS =73, /*[A490]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HANGUL_SYLLABLES =74, /*[AC00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HIGH_SURROGATES =75, /*[D800]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76, /*[DB80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LOW_SURROGATES =77, /*[DC00]*/
+
+ /**
+ * Same as UBLOCK_PRIVATE_USE.
+ * Until Unicode 3.1.1, the corresponding block name was "Private Use",
+ * and multiple code point ranges had this block.
+ * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and
+ * adds separate blocks for the supplementary PUAs.
+ *
+ * @stable ICU 2.0
+ */
+ UBLOCK_PRIVATE_USE_AREA =78, /*[E000]*/
+ /**
+ * Same as UBLOCK_PRIVATE_USE_AREA.
+ * Until Unicode 3.1.1, the corresponding block name was "Private Use",
+ * and multiple code point ranges had this block.
+ * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and
+ * adds separate blocks for the supplementary PUAs.
+ *
+ * @stable ICU 2.0
+ */
+ UBLOCK_PRIVATE_USE = UBLOCK_PRIVATE_USE_AREA,
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79, /*[F900]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80, /*[FB00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ARABIC_PRESENTATION_FORMS_A =81, /*[FB50]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_COMBINING_HALF_MARKS =82, /*[FE20]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_COMPATIBILITY_FORMS =83, /*[FE30]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SMALL_FORM_VARIANTS =84, /*[FE50]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ARABIC_PRESENTATION_FORMS_B =85, /*[FE70]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SPECIALS =86, /*[FFF0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87, /*[FF00]*/
+
+ /* New blocks in Unicode 3.1 */
+
+ /** @stable ICU 2.0 */
+ UBLOCK_OLD_ITALIC = 88, /*[10300]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_GOTHIC = 89, /*[10330]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_DESERET = 90, /*[10400]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91, /*[1D000]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_MUSICAL_SYMBOLS = 92, /*[1D100]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93, /*[1D400]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94, /*[20000]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95, /*[2F800]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_TAGS = 96, /*[E0000]*/
+
+ /* New blocks in Unicode 3.2 */
+
+ /** @stable ICU 3.0 */
+ UBLOCK_CYRILLIC_SUPPLEMENT = 97, /*[0500]*/
+ /**
+ * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
+ * @stable ICU 2.2
+ */
+ UBLOCK_CYRILLIC_SUPPLEMENTARY = UBLOCK_CYRILLIC_SUPPLEMENT,
+ /** @stable ICU 2.2 */
+ UBLOCK_TAGALOG = 98, /*[1700]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_HANUNOO = 99, /*[1720]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_BUHID = 100, /*[1740]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_TAGBANWA = 101, /*[1760]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102, /*[27C0]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_SUPPLEMENTAL_ARROWS_A = 103, /*[27F0]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_SUPPLEMENTAL_ARROWS_B = 104, /*[2900]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105, /*[2980]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106, /*[2A00]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107, /*[31F0]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109, /*[F0000]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110, /*[100000]*/
+
+ /* New blocks in Unicode 4 */
+
+ /** @stable ICU 2.6 */
+ UBLOCK_LIMBU = 111, /*[1900]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_TAI_LE = 112, /*[1950]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_KHMER_SYMBOLS = 113, /*[19E0]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_PHONETIC_EXTENSIONS = 114, /*[1D00]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 115, /*[2B00]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_YIJING_HEXAGRAM_SYMBOLS = 116, /*[4DC0]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_LINEAR_B_SYLLABARY = 117, /*[10000]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_LINEAR_B_IDEOGRAMS = 118, /*[10080]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_AEGEAN_NUMBERS = 119, /*[10100]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_UGARITIC = 120, /*[10380]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_SHAVIAN = 121, /*[10450]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_OSMANYA = 122, /*[10480]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_CYPRIOT_SYLLABARY = 123, /*[10800]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_TAI_XUAN_JING_SYMBOLS = 124, /*[1D300]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_VARIATION_SELECTORS_SUPPLEMENT = 125, /*[E0100]*/
+
+ /* New blocks in Unicode 4.1 */
+
+ /** @stable ICU 3.4 */
+ UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION = 126, /*[1D200]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_ANCIENT_GREEK_NUMBERS = 127, /*[10140]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_ARABIC_SUPPLEMENT = 128, /*[0750]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_BUGINESE = 129, /*[1A00]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_CJK_STROKES = 130, /*[31C0]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 131, /*[1DC0]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_COPTIC = 132, /*[2C80]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_ETHIOPIC_EXTENDED = 133, /*[2D80]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_ETHIOPIC_SUPPLEMENT = 134, /*[1380]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_GEORGIAN_SUPPLEMENT = 135, /*[2D00]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_GLAGOLITIC = 136, /*[2C00]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_KHAROSHTHI = 137, /*[10A00]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_MODIFIER_TONE_LETTERS = 138, /*[A700]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_NEW_TAI_LUE = 139, /*[1980]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_OLD_PERSIAN = 140, /*[103A0]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT = 141, /*[1D80]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_SUPPLEMENTAL_PUNCTUATION = 142, /*[2E00]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_SYLOTI_NAGRI = 143, /*[A800]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_TIFINAGH = 144, /*[2D30]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_VERTICAL_FORMS = 145, /*[FE10]*/
+
+ /* New blocks in Unicode 5.0 */
+
+ /** @stable ICU 3.6 */
+ UBLOCK_NKO = 146, /*[07C0]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_BALINESE = 147, /*[1B00]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_LATIN_EXTENDED_C = 148, /*[2C60]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_LATIN_EXTENDED_D = 149, /*[A720]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_PHAGS_PA = 150, /*[A840]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_PHOENICIAN = 151, /*[10900]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_CUNEIFORM = 152, /*[12000]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION = 153, /*[12400]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_COUNTING_ROD_NUMERALS = 154, /*[1D360]*/
+
+ /* New blocks in Unicode 5.1 */
+
+ /** @stable ICU 4.0 */
+ UBLOCK_SUNDANESE = 155, /*[1B80]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_LEPCHA = 156, /*[1C00]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_OL_CHIKI = 157, /*[1C50]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_CYRILLIC_EXTENDED_A = 158, /*[2DE0]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_VAI = 159, /*[A500]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_CYRILLIC_EXTENDED_B = 160, /*[A640]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_SAURASHTRA = 161, /*[A880]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_KAYAH_LI = 162, /*[A900]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_REJANG = 163, /*[A930]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_CHAM = 164, /*[AA00]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_ANCIENT_SYMBOLS = 165, /*[10190]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_PHAISTOS_DISC = 166, /*[101D0]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_LYCIAN = 167, /*[10280]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_CARIAN = 168, /*[102A0]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_LYDIAN = 169, /*[10920]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_MAHJONG_TILES = 170, /*[1F000]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_DOMINO_TILES = 171, /*[1F030]*/
+
+ /* New blocks in Unicode 5.2 */
+
+ /** @stable ICU 4.4 */
+ UBLOCK_SAMARITAN = 172, /*[0800]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 173, /*[18B0]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_TAI_THAM = 174, /*[1A20]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_VEDIC_EXTENSIONS = 175, /*[1CD0]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_LISU = 176, /*[A4D0]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_BAMUM = 177, /*[A6A0]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_COMMON_INDIC_NUMBER_FORMS = 178, /*[A830]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_DEVANAGARI_EXTENDED = 179, /*[A8E0]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_HANGUL_JAMO_EXTENDED_A = 180, /*[A960]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_JAVANESE = 181, /*[A980]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_MYANMAR_EXTENDED_A = 182, /*[AA60]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_TAI_VIET = 183, /*[AA80]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_MEETEI_MAYEK = 184, /*[ABC0]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_HANGUL_JAMO_EXTENDED_B = 185, /*[D7B0]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_IMPERIAL_ARAMAIC = 186, /*[10840]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_OLD_SOUTH_ARABIAN = 187, /*[10A60]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_AVESTAN = 188, /*[10B00]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_INSCRIPTIONAL_PARTHIAN = 189, /*[10B40]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_INSCRIPTIONAL_PAHLAVI = 190, /*[10B60]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_OLD_TURKIC = 191, /*[10C00]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_RUMI_NUMERAL_SYMBOLS = 192, /*[10E60]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_KAITHI = 193, /*[11080]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_EGYPTIAN_HIEROGLYPHS = 194, /*[13000]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 195, /*[1F100]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 196, /*[1F200]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 197, /*[2A700]*/
+
+ /* New blocks in Unicode 6.0 */
+
+ /** @stable ICU 4.6 */
+ UBLOCK_MANDAIC = 198, /*[0840]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_BATAK = 199, /*[1BC0]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_ETHIOPIC_EXTENDED_A = 200, /*[AB00]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_BRAHMI = 201, /*[11000]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_BAMUM_SUPPLEMENT = 202, /*[16800]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_KANA_SUPPLEMENT = 203, /*[1B000]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_PLAYING_CARDS = 204, /*[1F0A0]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 205, /*[1F300]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_EMOTICONS = 206, /*[1F600]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_TRANSPORT_AND_MAP_SYMBOLS = 207, /*[1F680]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_ALCHEMICAL_SYMBOLS = 208, /*[1F700]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 209, /*[2B740]*/
+
+ /* New blocks in Unicode 6.1 */
+
+ /** @stable ICU 49 */
+ UBLOCK_ARABIC_EXTENDED_A = 210, /*[08A0]*/
+ /** @stable ICU 49 */
+ UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 211, /*[1EE00]*/
+ /** @stable ICU 49 */
+ UBLOCK_CHAKMA = 212, /*[11100]*/
+ /** @stable ICU 49 */
+ UBLOCK_MEETEI_MAYEK_EXTENSIONS = 213, /*[AAE0]*/
+ /** @stable ICU 49 */
+ UBLOCK_MEROITIC_CURSIVE = 214, /*[109A0]*/
+ /** @stable ICU 49 */
+ UBLOCK_MEROITIC_HIEROGLYPHS = 215, /*[10980]*/
+ /** @stable ICU 49 */
+ UBLOCK_MIAO = 216, /*[16F00]*/
+ /** @stable ICU 49 */
+ UBLOCK_SHARADA = 217, /*[11180]*/
+ /** @stable ICU 49 */
+ UBLOCK_SORA_SOMPENG = 218, /*[110D0]*/
+ /** @stable ICU 49 */
+ UBLOCK_SUNDANESE_SUPPLEMENT = 219, /*[1CC0]*/
+ /** @stable ICU 49 */
+ UBLOCK_TAKRI = 220, /*[11680]*/
+
+ /* New blocks in Unicode 7.0 */
+
+ /** @stable ICU 54 */
+ UBLOCK_BASSA_VAH = 221, /*[16AD0]*/
+ /** @stable ICU 54 */
+ UBLOCK_CAUCASIAN_ALBANIAN = 222, /*[10530]*/
+ /** @stable ICU 54 */
+ UBLOCK_COPTIC_EPACT_NUMBERS = 223, /*[102E0]*/
+ /** @stable ICU 54 */
+ UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED = 224, /*[1AB0]*/
+ /** @stable ICU 54 */
+ UBLOCK_DUPLOYAN = 225, /*[1BC00]*/
+ /** @stable ICU 54 */
+ UBLOCK_ELBASAN = 226, /*[10500]*/
+ /** @stable ICU 54 */
+ UBLOCK_GEOMETRIC_SHAPES_EXTENDED = 227, /*[1F780]*/
+ /** @stable ICU 54 */
+ UBLOCK_GRANTHA = 228, /*[11300]*/
+ /** @stable ICU 54 */
+ UBLOCK_KHOJKI = 229, /*[11200]*/
+ /** @stable ICU 54 */
+ UBLOCK_KHUDAWADI = 230, /*[112B0]*/
+ /** @stable ICU 54 */
+ UBLOCK_LATIN_EXTENDED_E = 231, /*[AB30]*/
+ /** @stable ICU 54 */
+ UBLOCK_LINEAR_A = 232, /*[10600]*/
+ /** @stable ICU 54 */
+ UBLOCK_MAHAJANI = 233, /*[11150]*/
+ /** @stable ICU 54 */
+ UBLOCK_MANICHAEAN = 234, /*[10AC0]*/
+ /** @stable ICU 54 */
+ UBLOCK_MENDE_KIKAKUI = 235, /*[1E800]*/
+ /** @stable ICU 54 */
+ UBLOCK_MODI = 236, /*[11600]*/
+ /** @stable ICU 54 */
+ UBLOCK_MRO = 237, /*[16A40]*/
+ /** @stable ICU 54 */
+ UBLOCK_MYANMAR_EXTENDED_B = 238, /*[A9E0]*/
+ /** @stable ICU 54 */
+ UBLOCK_NABATAEAN = 239, /*[10880]*/
+ /** @stable ICU 54 */
+ UBLOCK_OLD_NORTH_ARABIAN = 240, /*[10A80]*/
+ /** @stable ICU 54 */
+ UBLOCK_OLD_PERMIC = 241, /*[10350]*/
+ /** @stable ICU 54 */
+ UBLOCK_ORNAMENTAL_DINGBATS = 242, /*[1F650]*/
+ /** @stable ICU 54 */
+ UBLOCK_PAHAWH_HMONG = 243, /*[16B00]*/
+ /** @stable ICU 54 */
+ UBLOCK_PALMYRENE = 244, /*[10860]*/
+ /** @stable ICU 54 */
+ UBLOCK_PAU_CIN_HAU = 245, /*[11AC0]*/
+ /** @stable ICU 54 */
+ UBLOCK_PSALTER_PAHLAVI = 246, /*[10B80]*/
+ /** @stable ICU 54 */
+ UBLOCK_SHORTHAND_FORMAT_CONTROLS = 247, /*[1BCA0]*/
+ /** @stable ICU 54 */
+ UBLOCK_SIDDHAM = 248, /*[11580]*/
+ /** @stable ICU 54 */
+ UBLOCK_SINHALA_ARCHAIC_NUMBERS = 249, /*[111E0]*/
+ /** @stable ICU 54 */
+ UBLOCK_SUPPLEMENTAL_ARROWS_C = 250, /*[1F800]*/
+ /** @stable ICU 54 */
+ UBLOCK_TIRHUTA = 251, /*[11480]*/
+ /** @stable ICU 54 */
+ UBLOCK_WARANG_CITI = 252, /*[118A0]*/
+
+ /* New blocks in Unicode 8.0 */
+
+ /** @stable ICU 56 */
+ UBLOCK_AHOM = 253, /*[11700]*/
+ /** @stable ICU 56 */
+ UBLOCK_ANATOLIAN_HIEROGLYPHS = 254, /*[14400]*/
+ /** @stable ICU 56 */
+ UBLOCK_CHEROKEE_SUPPLEMENT = 255, /*[AB70]*/
+ /** @stable ICU 56 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 256, /*[2B820]*/
+ /** @stable ICU 56 */
+ UBLOCK_EARLY_DYNASTIC_CUNEIFORM = 257, /*[12480]*/
+ /** @stable ICU 56 */
+ UBLOCK_HATRAN = 258, /*[108E0]*/
+ /** @stable ICU 56 */
+ UBLOCK_MULTANI = 259, /*[11280]*/
+ /** @stable ICU 56 */
+ UBLOCK_OLD_HUNGARIAN = 260, /*[10C80]*/
+ /** @stable ICU 56 */
+ UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 261, /*[1F900]*/
+ /** @stable ICU 56 */
+ UBLOCK_SUTTON_SIGNWRITING = 262, /*[1D800]*/
+
+ /* New blocks in Unicode 9.0 */
+
+ /** @stable ICU 58 */
+ UBLOCK_ADLAM = 263, /*[1E900]*/
+ /** @stable ICU 58 */
+ UBLOCK_BHAIKSUKI = 264, /*[11C00]*/
+ /** @stable ICU 58 */
+ UBLOCK_CYRILLIC_EXTENDED_C = 265, /*[1C80]*/
+ /** @stable ICU 58 */
+ UBLOCK_GLAGOLITIC_SUPPLEMENT = 266, /*[1E000]*/
+ /** @stable ICU 58 */
+ UBLOCK_IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 267, /*[16FE0]*/
+ /** @stable ICU 58 */
+ UBLOCK_MARCHEN = 268, /*[11C70]*/
+ /** @stable ICU 58 */
+ UBLOCK_MONGOLIAN_SUPPLEMENT = 269, /*[11660]*/
+ /** @stable ICU 58 */
+ UBLOCK_NEWA = 270, /*[11400]*/
+ /** @stable ICU 58 */
+ UBLOCK_OSAGE = 271, /*[104B0]*/
+ /** @stable ICU 58 */
+ UBLOCK_TANGUT = 272, /*[17000]*/
+ /** @stable ICU 58 */
+ UBLOCK_TANGUT_COMPONENTS = 273, /*[18800]*/
+
+ // New blocks in Unicode 10.0
+
+ /** @stable ICU 60 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 274, /*[2CEB0]*/
+ /** @stable ICU 60 */
+ UBLOCK_KANA_EXTENDED_A = 275, /*[1B100]*/
+ /** @stable ICU 60 */
+ UBLOCK_MASARAM_GONDI = 276, /*[11D00]*/
+ /** @stable ICU 60 */
+ UBLOCK_NUSHU = 277, /*[1B170]*/
+ /** @stable ICU 60 */
+ UBLOCK_SOYOMBO = 278, /*[11A50]*/
+ /** @stable ICU 60 */
+ UBLOCK_SYRIAC_SUPPLEMENT = 279, /*[0860]*/
+ /** @stable ICU 60 */
+ UBLOCK_ZANABAZAR_SQUARE = 280, /*[11A00]*/
+
+ // New blocks in Unicode 11.0
+
+ /** @stable ICU 62 */
+ UBLOCK_CHESS_SYMBOLS = 281, /*[1FA00]*/
+ /** @stable ICU 62 */
+ UBLOCK_DOGRA = 282, /*[11800]*/
+ /** @stable ICU 62 */
+ UBLOCK_GEORGIAN_EXTENDED = 283, /*[1C90]*/
+ /** @stable ICU 62 */
+ UBLOCK_GUNJALA_GONDI = 284, /*[11D60]*/
+ /** @stable ICU 62 */
+ UBLOCK_HANIFI_ROHINGYA = 285, /*[10D00]*/
+ /** @stable ICU 62 */
+ UBLOCK_INDIC_SIYAQ_NUMBERS = 286, /*[1EC70]*/
+ /** @stable ICU 62 */
+ UBLOCK_MAKASAR = 287, /*[11EE0]*/
+ /** @stable ICU 62 */
+ UBLOCK_MAYAN_NUMERALS = 288, /*[1D2E0]*/
+ /** @stable ICU 62 */
+ UBLOCK_MEDEFAIDRIN = 289, /*[16E40]*/
+ /** @stable ICU 62 */
+ UBLOCK_OLD_SOGDIAN = 290, /*[10F00]*/
+ /** @stable ICU 62 */
+ UBLOCK_SOGDIAN = 291, /*[10F30]*/
+
+ // New blocks in Unicode 12.0
+
+ /** @stable ICU 64 */
+ UBLOCK_EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS = 292, /*[13430]*/
+ /** @stable ICU 64 */
+ UBLOCK_ELYMAIC = 293, /*[10FE0]*/
+ /** @stable ICU 64 */
+ UBLOCK_NANDINAGARI = 294, /*[119A0]*/
+ /** @stable ICU 64 */
+ UBLOCK_NYIAKENG_PUACHUE_HMONG = 295, /*[1E100]*/
+ /** @stable ICU 64 */
+ UBLOCK_OTTOMAN_SIYAQ_NUMBERS = 296, /*[1ED00]*/
+ /** @stable ICU 64 */
+ UBLOCK_SMALL_KANA_EXTENSION = 297, /*[1B130]*/
+ /** @stable ICU 64 */
+ UBLOCK_SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A = 298, /*[1FA70]*/
+ /** @stable ICU 64 */
+ UBLOCK_TAMIL_SUPPLEMENT = 299, /*[11FC0]*/
+ /** @stable ICU 64 */
+ UBLOCK_WANCHO = 300, /*[1E2C0]*/
+
+ // New blocks in Unicode 13.0
+
+ /** @stable ICU 66 */
+ UBLOCK_CHORASMIAN = 301, /*[10FB0]*/
+ /** @stable ICU 66 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G = 302, /*[30000]*/
+ /** @stable ICU 66 */
+ UBLOCK_DIVES_AKURU = 303, /*[11900]*/
+ /** @stable ICU 66 */
+ UBLOCK_KHITAN_SMALL_SCRIPT = 304, /*[18B00]*/
+ /** @stable ICU 66 */
+ UBLOCK_LISU_SUPPLEMENT = 305, /*[11FB0]*/
+ /** @stable ICU 66 */
+ UBLOCK_SYMBOLS_FOR_LEGACY_COMPUTING = 306, /*[1FB00]*/
+ /** @stable ICU 66 */
+ UBLOCK_TANGUT_SUPPLEMENT = 307, /*[18D00]*/
+ /** @stable ICU 66 */
+ UBLOCK_YEZIDI = 308, /*[10E80]*/
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UBlockCode value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_BLOCK).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UBLOCK_COUNT = 309,
+#endif // U_HIDE_DEPRECATED_API
+
+ /** @stable ICU 2.0 */
+ UBLOCK_INVALID_CODE=-1
+};
+
+/** @stable ICU 2.0 */
+typedef enum UBlockCode UBlockCode;
+
+/**
+ * East Asian Width constants.
+ *
+ * @see UCHAR_EAST_ASIAN_WIDTH
+ * @see u_getIntPropertyValue
+ * @stable ICU 2.2
+ */
+typedef enum UEastAsianWidth {
+ /*
+ * Note: UEastAsianWidth constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_EA_<Unicode East_Asian_Width value name>
+ */
+
+ U_EA_NEUTRAL, /*[N]*/
+ U_EA_AMBIGUOUS, /*[A]*/
+ U_EA_HALFWIDTH, /*[H]*/
+ U_EA_FULLWIDTH, /*[F]*/
+ U_EA_NARROW, /*[Na]*/
+ U_EA_WIDE, /*[W]*/
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UEastAsianWidth value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_EA_COUNT
+#endif // U_HIDE_DEPRECATED_API
+} UEastAsianWidth;
+
+/**
+ * Selector constants for u_charName().
+ * u_charName() returns the "modern" name of a
+ * Unicode character; or the name that was defined in
+ * Unicode version 1.0, before the Unicode standard merged
+ * with ISO-10646; or an "extended" name that gives each
+ * Unicode code point a unique name.
+ *
+ * @see u_charName
+ * @stable ICU 2.0
+ */
+typedef enum UCharNameChoice {
+ /** Unicode character name (Name property). @stable ICU 2.0 */
+ U_UNICODE_CHAR_NAME,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * The Unicode_1_Name property value which is of little practical value.
+ * Beginning with ICU 49, ICU APIs return an empty string for this name choice.
+ * @deprecated ICU 49
+ */
+ U_UNICODE_10_CHAR_NAME,
+#endif /* U_HIDE_DEPRECATED_API */
+ /** Standard or synthetic character name. @stable ICU 2.0 */
+ U_EXTENDED_CHAR_NAME = U_UNICODE_CHAR_NAME+2,
+ /** Corrected name from NameAliases.txt. @stable ICU 4.4 */
+ U_CHAR_NAME_ALIAS,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UCharNameChoice value.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_CHAR_NAME_CHOICE_COUNT
+#endif // U_HIDE_DEPRECATED_API
+} UCharNameChoice;
+
+/**
+ * Selector constants for u_getPropertyName() and
+ * u_getPropertyValueName(). These selectors are used to choose which
+ * name is returned for a given property or value. All properties and
+ * values have a long name. Most have a short name, but some do not.
+ * Unicode allows for additional names, beyond the long and short
+ * name, which would be indicated by U_LONG_PROPERTY_NAME + i, where
+ * i=1, 2,...
+ *
+ * @see u_getPropertyName()
+ * @see u_getPropertyValueName()
+ * @stable ICU 2.4
+ */
+typedef enum UPropertyNameChoice {
+ U_SHORT_PROPERTY_NAME,
+ U_LONG_PROPERTY_NAME,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UPropertyNameChoice value.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_PROPERTY_NAME_CHOICE_COUNT
+#endif // U_HIDE_DEPRECATED_API
+} UPropertyNameChoice;
+
+/**
+ * Decomposition Type constants.
+ *
+ * @see UCHAR_DECOMPOSITION_TYPE
+ * @stable ICU 2.2
+ */
+typedef enum UDecompositionType {
+ /*
+ * Note: UDecompositionType constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_DT_<Unicode Decomposition_Type value name>
+ */
+
+ U_DT_NONE, /*[none]*/
+ U_DT_CANONICAL, /*[can]*/
+ U_DT_COMPAT, /*[com]*/
+ U_DT_CIRCLE, /*[enc]*/
+ U_DT_FINAL, /*[fin]*/
+ U_DT_FONT, /*[font]*/
+ U_DT_FRACTION, /*[fra]*/
+ U_DT_INITIAL, /*[init]*/
+ U_DT_ISOLATED, /*[iso]*/
+ U_DT_MEDIAL, /*[med]*/
+ U_DT_NARROW, /*[nar]*/
+ U_DT_NOBREAK, /*[nb]*/
+ U_DT_SMALL, /*[sml]*/
+ U_DT_SQUARE, /*[sqr]*/
+ U_DT_SUB, /*[sub]*/
+ U_DT_SUPER, /*[sup]*/
+ U_DT_VERTICAL, /*[vert]*/
+ U_DT_WIDE, /*[wide]*/
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UDecompositionType value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_DT_COUNT /* 18 */
+#endif // U_HIDE_DEPRECATED_API
+} UDecompositionType;
+
+/**
+ * Joining Type constants.
+ *
+ * @see UCHAR_JOINING_TYPE
+ * @stable ICU 2.2
+ */
+typedef enum UJoiningType {
+ /*
+ * Note: UJoiningType constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_JT_<Unicode Joining_Type value name>
+ */
+
+ U_JT_NON_JOINING, /*[U]*/
+ U_JT_JOIN_CAUSING, /*[C]*/
+ U_JT_DUAL_JOINING, /*[D]*/
+ U_JT_LEFT_JOINING, /*[L]*/
+ U_JT_RIGHT_JOINING, /*[R]*/
+ U_JT_TRANSPARENT, /*[T]*/
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UJoiningType value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_JT_COUNT /* 6 */
+#endif // U_HIDE_DEPRECATED_API
+} UJoiningType;
+
+/**
+ * Joining Group constants.
+ *
+ * @see UCHAR_JOINING_GROUP
+ * @stable ICU 2.2
+ */
+typedef enum UJoiningGroup {
+ /*
+ * Note: UJoiningGroup constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_JG_<Unicode Joining_Group value name>
+ */
+
+ U_JG_NO_JOINING_GROUP,
+ U_JG_AIN,
+ U_JG_ALAPH,
+ U_JG_ALEF,
+ U_JG_BEH,
+ U_JG_BETH,
+ U_JG_DAL,
+ U_JG_DALATH_RISH,
+ U_JG_E,
+ U_JG_FEH,
+ U_JG_FINAL_SEMKATH,
+ U_JG_GAF,
+ U_JG_GAMAL,
+ U_JG_HAH,
+ U_JG_TEH_MARBUTA_GOAL, /**< @stable ICU 4.6 */
+ U_JG_HAMZA_ON_HEH_GOAL=U_JG_TEH_MARBUTA_GOAL,
+ U_JG_HE,
+ U_JG_HEH,
+ U_JG_HEH_GOAL,
+ U_JG_HETH,
+ U_JG_KAF,
+ U_JG_KAPH,
+ U_JG_KNOTTED_HEH,
+ U_JG_LAM,
+ U_JG_LAMADH,
+ U_JG_MEEM,
+ U_JG_MIM,
+ U_JG_NOON,
+ U_JG_NUN,
+ U_JG_PE,
+ U_JG_QAF,
+ U_JG_QAPH,
+ U_JG_REH,
+ U_JG_REVERSED_PE,
+ U_JG_SAD,
+ U_JG_SADHE,
+ U_JG_SEEN,
+ U_JG_SEMKATH,
+ U_JG_SHIN,
+ U_JG_SWASH_KAF,
+ U_JG_SYRIAC_WAW,
+ U_JG_TAH,
+ U_JG_TAW,
+ U_JG_TEH_MARBUTA,
+ U_JG_TETH,
+ U_JG_WAW,
+ U_JG_YEH,
+ U_JG_YEH_BARREE,
+ U_JG_YEH_WITH_TAIL,
+ U_JG_YUDH,
+ U_JG_YUDH_HE,
+ U_JG_ZAIN,
+ U_JG_FE, /**< @stable ICU 2.6 */
+ U_JG_KHAPH, /**< @stable ICU 2.6 */
+ U_JG_ZHAIN, /**< @stable ICU 2.6 */
+ U_JG_BURUSHASKI_YEH_BARREE, /**< @stable ICU 4.0 */
+ U_JG_FARSI_YEH, /**< @stable ICU 4.4 */
+ U_JG_NYA, /**< @stable ICU 4.4 */
+ U_JG_ROHINGYA_YEH, /**< @stable ICU 49 */
+ U_JG_MANICHAEAN_ALEPH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_AYIN, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_BETH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_DALETH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_DHAMEDH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_FIVE, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_GIMEL, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_HETH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_HUNDRED, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_KAPH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_LAMEDH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_MEM, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_NUN, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_ONE, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_PE, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_QOPH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_RESH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_SADHE, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_SAMEKH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_TAW, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_TEN, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_TETH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_THAMEDH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_TWENTY, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_WAW, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_YODH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_ZAYIN, /**< @stable ICU 54 */
+ U_JG_STRAIGHT_WAW, /**< @stable ICU 54 */
+ U_JG_AFRICAN_FEH, /**< @stable ICU 58 */
+ U_JG_AFRICAN_NOON, /**< @stable ICU 58 */
+ U_JG_AFRICAN_QAF, /**< @stable ICU 58 */
+
+ U_JG_MALAYALAM_BHA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_JA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_LLA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_LLLA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_NGA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_NNA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_NNNA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_NYA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_RA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_SSA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_TTA, /**< @stable ICU 60 */
+
+ U_JG_HANIFI_ROHINGYA_KINNA_YA, /**< @stable ICU 62 */
+ U_JG_HANIFI_ROHINGYA_PA, /**< @stable ICU 62 */
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UJoiningGroup value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_JG_COUNT
+#endif // U_HIDE_DEPRECATED_API
+} UJoiningGroup;
+
+/**
+ * Grapheme Cluster Break constants.
+ *
+ * @see UCHAR_GRAPHEME_CLUSTER_BREAK
+ * @stable ICU 3.4
+ */
+typedef enum UGraphemeClusterBreak {
+ /*
+ * Note: UGraphemeClusterBreak constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_GCB_<Unicode Grapheme_Cluster_Break value name>
+ */
+
+ U_GCB_OTHER = 0, /*[XX]*/
+ U_GCB_CONTROL = 1, /*[CN]*/
+ U_GCB_CR = 2, /*[CR]*/
+ U_GCB_EXTEND = 3, /*[EX]*/
+ U_GCB_L = 4, /*[L]*/
+ U_GCB_LF = 5, /*[LF]*/
+ U_GCB_LV = 6, /*[LV]*/
+ U_GCB_LVT = 7, /*[LVT]*/
+ U_GCB_T = 8, /*[T]*/
+ U_GCB_V = 9, /*[V]*/
+ /** @stable ICU 4.0 */
+ U_GCB_SPACING_MARK = 10, /*[SM]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
+ /** @stable ICU 4.0 */
+ U_GCB_PREPEND = 11, /*[PP]*/
+ /** @stable ICU 50 */
+ U_GCB_REGIONAL_INDICATOR = 12, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
+ /** @stable ICU 58 */
+ U_GCB_E_BASE = 13, /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
+ /** @stable ICU 58 */
+ U_GCB_E_BASE_GAZ = 14, /*[EBG]*/
+ /** @stable ICU 58 */
+ U_GCB_E_MODIFIER = 15, /*[EM]*/
+ /** @stable ICU 58 */
+ U_GCB_GLUE_AFTER_ZWJ = 16, /*[GAZ]*/
+ /** @stable ICU 58 */
+ U_GCB_ZWJ = 17, /*[ZWJ]*/
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UGraphemeClusterBreak value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_GCB_COUNT = 18
+#endif // U_HIDE_DEPRECATED_API
+} UGraphemeClusterBreak;
+
+/**
+ * Word Break constants.
+ * (UWordBreak is a pre-existing enum type in ubrk.h for word break status tags.)
+ *
+ * @see UCHAR_WORD_BREAK
+ * @stable ICU 3.4
+ */
+typedef enum UWordBreakValues {
+ /*
+ * Note: UWordBreakValues constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_WB_<Unicode Word_Break value name>
+ */
+
+ U_WB_OTHER = 0, /*[XX]*/
+ U_WB_ALETTER = 1, /*[LE]*/
+ U_WB_FORMAT = 2, /*[FO]*/
+ U_WB_KATAKANA = 3, /*[KA]*/
+ U_WB_MIDLETTER = 4, /*[ML]*/
+ U_WB_MIDNUM = 5, /*[MN]*/
+ U_WB_NUMERIC = 6, /*[NU]*/
+ U_WB_EXTENDNUMLET = 7, /*[EX]*/
+ /** @stable ICU 4.0 */
+ U_WB_CR = 8, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
+ /** @stable ICU 4.0 */
+ U_WB_EXTEND = 9, /*[Extend]*/
+ /** @stable ICU 4.0 */
+ U_WB_LF = 10, /*[LF]*/
+ /** @stable ICU 4.0 */
+ U_WB_MIDNUMLET =11, /*[MB]*/
+ /** @stable ICU 4.0 */
+ U_WB_NEWLINE =12, /*[NL]*/
+ /** @stable ICU 50 */
+ U_WB_REGIONAL_INDICATOR = 13, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
+ /** @stable ICU 52 */
+ U_WB_HEBREW_LETTER = 14, /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */
+ /** @stable ICU 52 */
+ U_WB_SINGLE_QUOTE = 15, /*[SQ]*/
+ /** @stable ICU 52 */
+ U_WB_DOUBLE_QUOTE = 16, /*[DQ]*/
+ /** @stable ICU 58 */
+ U_WB_E_BASE = 17, /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
+ /** @stable ICU 58 */
+ U_WB_E_BASE_GAZ = 18, /*[EBG]*/
+ /** @stable ICU 58 */
+ U_WB_E_MODIFIER = 19, /*[EM]*/
+ /** @stable ICU 58 */
+ U_WB_GLUE_AFTER_ZWJ = 20, /*[GAZ]*/
+ /** @stable ICU 58 */
+ U_WB_ZWJ = 21, /*[ZWJ]*/
+ /** @stable ICU 62 */
+ U_WB_WSEGSPACE = 22, /*[WSEGSPACE]*/
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UWordBreakValues value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_WORD_BREAK).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_WB_COUNT = 23
+#endif // U_HIDE_DEPRECATED_API
+} UWordBreakValues;
+
+/**
+ * Sentence Break constants.
+ *
+ * @see UCHAR_SENTENCE_BREAK
+ * @stable ICU 3.4
+ */
+typedef enum USentenceBreak {
+ /*
+ * Note: USentenceBreak constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_SB_<Unicode Sentence_Break value name>
+ */
+
+ U_SB_OTHER = 0, /*[XX]*/
+ U_SB_ATERM = 1, /*[AT]*/
+ U_SB_CLOSE = 2, /*[CL]*/
+ U_SB_FORMAT = 3, /*[FO]*/
+ U_SB_LOWER = 4, /*[LO]*/
+ U_SB_NUMERIC = 5, /*[NU]*/
+ U_SB_OLETTER = 6, /*[LE]*/
+ U_SB_SEP = 7, /*[SE]*/
+ U_SB_SP = 8, /*[SP]*/
+ U_SB_STERM = 9, /*[ST]*/
+ U_SB_UPPER = 10, /*[UP]*/
+ U_SB_CR = 11, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
+ U_SB_EXTEND = 12, /*[EX]*/
+ U_SB_LF = 13, /*[LF]*/
+ U_SB_SCONTINUE = 14, /*[SC]*/
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal USentenceBreak value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_SB_COUNT = 15
+#endif // U_HIDE_DEPRECATED_API
+} USentenceBreak;
+
+/**
+ * Line Break constants.
+ *
+ * @see UCHAR_LINE_BREAK
+ * @stable ICU 2.2
+ */
+typedef enum ULineBreak {
+ /*
+ * Note: ULineBreak constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_LB_<Unicode Line_Break value name>
+ */
+
+ U_LB_UNKNOWN = 0, /*[XX]*/
+ U_LB_AMBIGUOUS = 1, /*[AI]*/
+ U_LB_ALPHABETIC = 2, /*[AL]*/
+ U_LB_BREAK_BOTH = 3, /*[B2]*/
+ U_LB_BREAK_AFTER = 4, /*[BA]*/
+ U_LB_BREAK_BEFORE = 5, /*[BB]*/
+ U_LB_MANDATORY_BREAK = 6, /*[BK]*/
+ U_LB_CONTINGENT_BREAK = 7, /*[CB]*/
+ U_LB_CLOSE_PUNCTUATION = 8, /*[CL]*/
+ U_LB_COMBINING_MARK = 9, /*[CM]*/
+ U_LB_CARRIAGE_RETURN = 10, /*[CR]*/
+ U_LB_EXCLAMATION = 11, /*[EX]*/
+ U_LB_GLUE = 12, /*[GL]*/
+ U_LB_HYPHEN = 13, /*[HY]*/
+ U_LB_IDEOGRAPHIC = 14, /*[ID]*/
+ /** Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0 @stable ICU 3.0 */
+ U_LB_INSEPARABLE = 15, /*[IN]*/
+ U_LB_INSEPERABLE = U_LB_INSEPARABLE,
+ U_LB_INFIX_NUMERIC = 16, /*[IS]*/
+ U_LB_LINE_FEED = 17, /*[LF]*/
+ U_LB_NONSTARTER = 18, /*[NS]*/
+ U_LB_NUMERIC = 19, /*[NU]*/
+ U_LB_OPEN_PUNCTUATION = 20, /*[OP]*/
+ U_LB_POSTFIX_NUMERIC = 21, /*[PO]*/
+ U_LB_PREFIX_NUMERIC = 22, /*[PR]*/
+ U_LB_QUOTATION = 23, /*[QU]*/
+ U_LB_COMPLEX_CONTEXT = 24, /*[SA]*/
+ U_LB_SURROGATE = 25, /*[SG]*/
+ U_LB_SPACE = 26, /*[SP]*/
+ U_LB_BREAK_SYMBOLS = 27, /*[SY]*/
+ U_LB_ZWSPACE = 28, /*[ZW]*/
+ /** @stable ICU 2.6 */
+ U_LB_NEXT_LINE = 29, /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
+ /** @stable ICU 2.6 */
+ U_LB_WORD_JOINER = 30, /*[WJ]*/
+ /** @stable ICU 3.4 */
+ U_LB_H2 = 31, /*[H2]*/ /* from here on: new in Unicode 4.1/ICU 3.4 */
+ /** @stable ICU 3.4 */
+ U_LB_H3 = 32, /*[H3]*/
+ /** @stable ICU 3.4 */
+ U_LB_JL = 33, /*[JL]*/
+ /** @stable ICU 3.4 */
+ U_LB_JT = 34, /*[JT]*/
+ /** @stable ICU 3.4 */
+ U_LB_JV = 35, /*[JV]*/
+ /** @stable ICU 4.4 */
+ U_LB_CLOSE_PARENTHESIS = 36, /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */
+ /** @stable ICU 49 */
+ U_LB_CONDITIONAL_JAPANESE_STARTER = 37,/*[CJ]*/ /* new in Unicode 6.1/ICU 49 */
+ /** @stable ICU 49 */
+ U_LB_HEBREW_LETTER = 38, /*[HL]*/ /* new in Unicode 6.1/ICU 49 */
+ /** @stable ICU 50 */
+ U_LB_REGIONAL_INDICATOR = 39,/*[RI]*/ /* new in Unicode 6.2/ICU 50 */
+ /** @stable ICU 58 */
+ U_LB_E_BASE = 40, /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
+ /** @stable ICU 58 */
+ U_LB_E_MODIFIER = 41, /*[EM]*/
+ /** @stable ICU 58 */
+ U_LB_ZWJ = 42, /*[ZWJ]*/
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal ULineBreak value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_LINE_BREAK).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_LB_COUNT = 43
+#endif // U_HIDE_DEPRECATED_API
+} ULineBreak;
+
+/**
+ * Numeric Type constants.
+ *
+ * @see UCHAR_NUMERIC_TYPE
+ * @stable ICU 2.2
+ */
+typedef enum UNumericType {
+ /*
+ * Note: UNumericType constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_NT_<Unicode Numeric_Type value name>
+ */
+
+ U_NT_NONE, /*[None]*/
+ U_NT_DECIMAL, /*[de]*/
+ U_NT_DIGIT, /*[di]*/
+ U_NT_NUMERIC, /*[nu]*/
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UNumericType value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_NT_COUNT
+#endif // U_HIDE_DEPRECATED_API
+} UNumericType;
+
+/**
+ * Hangul Syllable Type constants.
+ *
+ * @see UCHAR_HANGUL_SYLLABLE_TYPE
+ * @stable ICU 2.6
+ */
+typedef enum UHangulSyllableType {
+ /*
+ * Note: UHangulSyllableType constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_HST_<Unicode Hangul_Syllable_Type value name>
+ */
+
+ U_HST_NOT_APPLICABLE, /*[NA]*/
+ U_HST_LEADING_JAMO, /*[L]*/
+ U_HST_VOWEL_JAMO, /*[V]*/
+ U_HST_TRAILING_JAMO, /*[T]*/
+ U_HST_LV_SYLLABLE, /*[LV]*/
+ U_HST_LVT_SYLLABLE, /*[LVT]*/
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UHangulSyllableType value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_HST_COUNT
+#endif // U_HIDE_DEPRECATED_API
+} UHangulSyllableType;
+
+/**
+ * Indic Positional Category constants.
+ *
+ * @see UCHAR_INDIC_POSITIONAL_CATEGORY
+ * @stable ICU 63
+ */
+typedef enum UIndicPositionalCategory {
+ /*
+ * Note: UIndicPositionalCategory constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_INPC_<Unicode Indic_Positional_Category value name>
+ */
+
+ /** @stable ICU 63 */
+ U_INPC_NA,
+ /** @stable ICU 63 */
+ U_INPC_BOTTOM,
+ /** @stable ICU 63 */
+ U_INPC_BOTTOM_AND_LEFT,
+ /** @stable ICU 63 */
+ U_INPC_BOTTOM_AND_RIGHT,
+ /** @stable ICU 63 */
+ U_INPC_LEFT,
+ /** @stable ICU 63 */
+ U_INPC_LEFT_AND_RIGHT,
+ /** @stable ICU 63 */
+ U_INPC_OVERSTRUCK,
+ /** @stable ICU 63 */
+ U_INPC_RIGHT,
+ /** @stable ICU 63 */
+ U_INPC_TOP,
+ /** @stable ICU 63 */
+ U_INPC_TOP_AND_BOTTOM,
+ /** @stable ICU 63 */
+ U_INPC_TOP_AND_BOTTOM_AND_RIGHT,
+ /** @stable ICU 63 */
+ U_INPC_TOP_AND_LEFT,
+ /** @stable ICU 63 */
+ U_INPC_TOP_AND_LEFT_AND_RIGHT,
+ /** @stable ICU 63 */
+ U_INPC_TOP_AND_RIGHT,
+ /** @stable ICU 63 */
+ U_INPC_VISUAL_ORDER_LEFT,
+ /** @stable ICU 66 */
+ U_INPC_TOP_AND_BOTTOM_AND_LEFT,
+} UIndicPositionalCategory;
+
+/**
+ * Indic Syllabic Category constants.
+ *
+ * @see UCHAR_INDIC_SYLLABIC_CATEGORY
+ * @stable ICU 63
+ */
+typedef enum UIndicSyllabicCategory {
+ /*
+ * Note: UIndicSyllabicCategory constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_INSC_<Unicode Indic_Syllabic_Category value name>
+ */
+
+ /** @stable ICU 63 */
+ U_INSC_OTHER,
+ /** @stable ICU 63 */
+ U_INSC_AVAGRAHA,
+ /** @stable ICU 63 */
+ U_INSC_BINDU,
+ /** @stable ICU 63 */
+ U_INSC_BRAHMI_JOINING_NUMBER,
+ /** @stable ICU 63 */
+ U_INSC_CANTILLATION_MARK,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_DEAD,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_FINAL,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_HEAD_LETTER,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_INITIAL_POSTFIXED,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_KILLER,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_MEDIAL,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_PLACEHOLDER,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_PRECEDING_REPHA,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_PREFIXED,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_SUBJOINED,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_SUCCEEDING_REPHA,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_WITH_STACKER,
+ /** @stable ICU 63 */
+ U_INSC_GEMINATION_MARK,
+ /** @stable ICU 63 */
+ U_INSC_INVISIBLE_STACKER,
+ /** @stable ICU 63 */
+ U_INSC_JOINER,
+ /** @stable ICU 63 */
+ U_INSC_MODIFYING_LETTER,
+ /** @stable ICU 63 */
+ U_INSC_NON_JOINER,
+ /** @stable ICU 63 */
+ U_INSC_NUKTA,
+ /** @stable ICU 63 */
+ U_INSC_NUMBER,
+ /** @stable ICU 63 */
+ U_INSC_NUMBER_JOINER,
+ /** @stable ICU 63 */
+ U_INSC_PURE_KILLER,
+ /** @stable ICU 63 */
+ U_INSC_REGISTER_SHIFTER,
+ /** @stable ICU 63 */
+ U_INSC_SYLLABLE_MODIFIER,
+ /** @stable ICU 63 */
+ U_INSC_TONE_LETTER,
+ /** @stable ICU 63 */
+ U_INSC_TONE_MARK,
+ /** @stable ICU 63 */
+ U_INSC_VIRAMA,
+ /** @stable ICU 63 */
+ U_INSC_VISARGA,
+ /** @stable ICU 63 */
+ U_INSC_VOWEL,
+ /** @stable ICU 63 */
+ U_INSC_VOWEL_DEPENDENT,
+ /** @stable ICU 63 */
+ U_INSC_VOWEL_INDEPENDENT,
+} UIndicSyllabicCategory;
+
+/**
+ * Vertical Orientation constants.
+ *
+ * @see UCHAR_VERTICAL_ORIENTATION
+ * @stable ICU 63
+ */
+typedef enum UVerticalOrientation {
+ /*
+ * Note: UVerticalOrientation constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_VO_<Unicode Vertical_Orientation value name>
+ */
+
+ /** @stable ICU 63 */
+ U_VO_ROTATED,
+ /** @stable ICU 63 */
+ U_VO_TRANSFORMED_ROTATED,
+ /** @stable ICU 63 */
+ U_VO_TRANSFORMED_UPRIGHT,
+ /** @stable ICU 63 */
+ U_VO_UPRIGHT,
+} UVerticalOrientation;
+
+/**
+ * Check a binary Unicode property for a code point.
+ *
+ * Unicode, especially in version 3.2, defines many more properties than the
+ * original set in UnicodeData.txt.
+ *
+ * The properties APIs are intended to reflect Unicode properties as defined
+ * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
+ * For details about the properties see http://www.unicode.org/ucd/ .
+ * For names of Unicode properties see the UCD file PropertyAliases.txt.
+ *
+ * Important: If ICU is built with UCD files from Unicode versions below 3.2,
+ * then properties marked with "new in Unicode 3.2" are not or not fully available.
+ *
+ * @param c Code point to test.
+ * @param which UProperty selector constant, identifies which binary property to check.
+ * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT.
+ * @return true or false according to the binary Unicode property value for c.
+ * Also false if 'which' is out of bounds or if the Unicode version
+ * does not have data for the property at all, or not for this code point.
+ *
+ * @see UProperty
+ * @see u_getBinaryPropertySet
+ * @see u_getIntPropertyValue
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.1
+ */
+U_CAPI UBool U_EXPORT2
+u_hasBinaryProperty(UChar32 c, UProperty which);
+
+/**
+ * Returns a frozen USet for a binary property.
+ * The library retains ownership over the returned object.
+ * Sets an error code if the property number is not one for a binary property.
+ *
+ * The returned set contains all code points for which the property is true.
+ *
+ * @param property UCHAR_BINARY_START..UCHAR_BINARY_LIMIT-1
+ * @param pErrorCode an in/out ICU UErrorCode
+ * @return the property as a set
+ * @see UProperty
+ * @see u_hasBinaryProperty
+ * @see Unicode::fromUSet
+ * @stable ICU 63
+ */
+U_CAPI const USet * U_EXPORT2
+u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode);
+
+/**
+ * Check if a code point has the Alphabetic Unicode property.
+ * Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC).
+ * This is different from u_isalpha!
+ * @param c Code point to test
+ * @return true if the code point has the Alphabetic Unicode property, false otherwise
+ *
+ * @see UCHAR_ALPHABETIC
+ * @see u_isalpha
+ * @see u_hasBinaryProperty
+ * @stable ICU 2.1
+ */
+U_CAPI UBool U_EXPORT2
+u_isUAlphabetic(UChar32 c);
+
+/**
+ * Check if a code point has the Lowercase Unicode property.
+ * Same as u_hasBinaryProperty(c, UCHAR_LOWERCASE).
+ * This is different from u_islower!
+ * @param c Code point to test
+ * @return true if the code point has the Lowercase Unicode property, false otherwise
+ *
+ * @see UCHAR_LOWERCASE
+ * @see u_islower
+ * @see u_hasBinaryProperty
+ * @stable ICU 2.1
+ */
+U_CAPI UBool U_EXPORT2
+u_isULowercase(UChar32 c);
+
+/**
+ * Check if a code point has the Uppercase Unicode property.
+ * Same as u_hasBinaryProperty(c, UCHAR_UPPERCASE).
+ * This is different from u_isupper!
+ * @param c Code point to test
+ * @return true if the code point has the Uppercase Unicode property, false otherwise
+ *
+ * @see UCHAR_UPPERCASE
+ * @see u_isupper
+ * @see u_hasBinaryProperty
+ * @stable ICU 2.1
+ */
+U_CAPI UBool U_EXPORT2
+u_isUUppercase(UChar32 c);
+
+/**
+ * Check if a code point has the White_Space Unicode property.
+ * Same as u_hasBinaryProperty(c, UCHAR_WHITE_SPACE).
+ * This is different from both u_isspace and u_isWhitespace!
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * @param c Code point to test
+ * @return true if the code point has the White_Space Unicode property, false otherwise.
+ *
+ * @see UCHAR_WHITE_SPACE
+ * @see u_isWhitespace
+ * @see u_isspace
+ * @see u_isJavaSpaceChar
+ * @see u_hasBinaryProperty
+ * @stable ICU 2.1
+ */
+U_CAPI UBool U_EXPORT2
+u_isUWhiteSpace(UChar32 c);
+
+/**
+ * Get the property value for an enumerated or integer Unicode property for a code point.
+ * Also returns binary and mask property values.
+ *
+ * Unicode, especially in version 3.2, defines many more properties than the
+ * original set in UnicodeData.txt.
+ *
+ * The properties APIs are intended to reflect Unicode properties as defined
+ * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
+ * For details about the properties see http://www.unicode.org/ .
+ * For names of Unicode properties see the UCD file PropertyAliases.txt.
+ *
+ * Sample usage:
+ * UEastAsianWidth ea=(UEastAsianWidth)u_getIntPropertyValue(c, UCHAR_EAST_ASIAN_WIDTH);
+ * UBool b=(UBool)u_getIntPropertyValue(c, UCHAR_IDEOGRAPHIC);
+ *
+ * @param c Code point to test.
+ * @param which UProperty selector constant, identifies which property to check.
+ * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
+ * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
+ * @return Numeric value that is directly the property value or,
+ * for enumerated properties, corresponds to the numeric value of the enumerated
+ * constant of the respective property value enumeration type
+ * (cast to enum type if necessary).
+ * Returns 0 or 1 (for false/true) for binary Unicode properties.
+ * Returns a bit-mask for mask properties.
+ * Returns 0 if 'which' is out of bounds or if the Unicode version
+ * does not have data for the property at all, or not for this code point.
+ *
+ * @see UProperty
+ * @see u_hasBinaryProperty
+ * @see u_getIntPropertyMinValue
+ * @see u_getIntPropertyMaxValue
+ * @see u_getIntPropertyMap
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.2
+ */
+U_CAPI int32_t U_EXPORT2
+u_getIntPropertyValue(UChar32 c, UProperty which);
+
+/**
+ * Get the minimum value for an enumerated/integer/binary Unicode property.
+ * Can be used together with u_getIntPropertyMaxValue
+ * to allocate arrays of UnicodeSet or similar.
+ *
+ * @param which UProperty selector constant, identifies which binary property to check.
+ * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT.
+ * @return Minimum value returned by u_getIntPropertyValue for a Unicode property.
+ * 0 if the property selector is out of range.
+ *
+ * @see UProperty
+ * @see u_hasBinaryProperty
+ * @see u_getUnicodeVersion
+ * @see u_getIntPropertyMaxValue
+ * @see u_getIntPropertyValue
+ * @stable ICU 2.2
+ */
+U_CAPI int32_t U_EXPORT2
+u_getIntPropertyMinValue(UProperty which);
+
+/**
+ * Get the maximum value for an enumerated/integer/binary Unicode property.
+ * Can be used together with u_getIntPropertyMinValue
+ * to allocate arrays of UnicodeSet or similar.
+ *
+ * Examples for min/max values (for Unicode 3.2):
+ *
+ * - UCHAR_BIDI_CLASS: 0/18 (U_LEFT_TO_RIGHT/U_BOUNDARY_NEUTRAL)
+ * - UCHAR_SCRIPT: 0/45 (USCRIPT_COMMON/USCRIPT_TAGBANWA)
+ * - UCHAR_IDEOGRAPHIC: 0/1 (false/true)
+ *
+ * For undefined UProperty constant values, min/max values will be 0/-1.
+ *
+ * @param which UProperty selector constant, identifies which binary property to check.
+ * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT.
+ * @return Maximum value returned by u_getIntPropertyValue for a Unicode property.
+ * <=0 if the property selector is out of range.
+ *
+ * @see UProperty
+ * @see u_hasBinaryProperty
+ * @see u_getUnicodeVersion
+ * @see u_getIntPropertyMaxValue
+ * @see u_getIntPropertyValue
+ * @stable ICU 2.2
+ */
+U_CAPI int32_t U_EXPORT2
+u_getIntPropertyMaxValue(UProperty which);
+
+/**
+ * Returns an immutable UCPMap for an enumerated/catalog/int-valued property.
+ * The library retains ownership over the returned object.
+ * Sets an error code if the property number is not one for an "int property".
+ *
+ * The returned object maps all Unicode code points to their values for that property.
+ * For documentation of the integer values see u_getIntPropertyValue().
+ *
+ * @param property UCHAR_INT_START..UCHAR_INT_LIMIT-1
+ * @param pErrorCode an in/out ICU UErrorCode
+ * @return the property as a map
+ * @see UProperty
+ * @see u_getIntPropertyValue
+ * @stable ICU 63
+ */
+U_CAPI const UCPMap * U_EXPORT2
+u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode);
+
+/**
+ * Get the numeric value for a Unicode code point as defined in the
+ * Unicode Character Database.
+ *
+ * A "double" return type is necessary because
+ * some numeric values are fractions, negative, or too large for int32_t.
+ *
+ * For characters without any numeric values in the Unicode Character Database,
+ * this function will return U_NO_NUMERIC_VALUE.
+ * Note: This is different from the Unicode Standard which specifies NaN as the default value.
+ * (NaN is not available on all platforms.)
+ *
+ * Similar to java.lang.Character.getNumericValue(), but u_getNumericValue()
+ * also supports negative values, large values, and fractions,
+ * while Java's getNumericValue() returns values 10..35 for ASCII letters.
+ *
+ * @param c Code point to get the numeric value for.
+ * @return Numeric value of c, or U_NO_NUMERIC_VALUE if none is defined.
+ *
+ * @see U_NO_NUMERIC_VALUE
+ * @stable ICU 2.2
+ */
+U_CAPI double U_EXPORT2
+u_getNumericValue(UChar32 c);
+
+/**
+ * Special value that is returned by u_getNumericValue when
+ * no numeric value is defined for a code point.
+ *
+ * @see u_getNumericValue
+ * @stable ICU 2.2
+ */
+#define U_NO_NUMERIC_VALUE ((double)-123456789.)
+
+/**
+ * Determines whether the specified code point has the general category "Ll"
+ * (lowercase letter).
+ *
+ * Same as java.lang.Character.isLowerCase().
+ *
+ * This misses some characters that are also lowercase but
+ * have a different general category value.
+ * In order to include those, use UCHAR_LOWERCASE.
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is an Ll lowercase letter
+ *
+ * @see UCHAR_LOWERCASE
+ * @see u_isupper
+ * @see u_istitle
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_islower(UChar32 c);
+
+/**
+ * Determines whether the specified code point has the general category "Lu"
+ * (uppercase letter).
+ *
+ * Same as java.lang.Character.isUpperCase().
+ *
+ * This misses some characters that are also uppercase but
+ * have a different general category value.
+ * In order to include those, use UCHAR_UPPERCASE.
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is an Lu uppercase letter
+ *
+ * @see UCHAR_UPPERCASE
+ * @see u_islower
+ * @see u_istitle
+ * @see u_tolower
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isupper(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a titlecase letter.
+ * True for general category "Lt" (titlecase letter).
+ *
+ * Same as java.lang.Character.isTitleCase().
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is an Lt titlecase letter
+ *
+ * @see u_isupper
+ * @see u_islower
+ * @see u_totitle
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_istitle(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a digit character according to Java.
+ * True for characters with general category "Nd" (decimal digit numbers).
+ * Beginning with Unicode 4, this is the same as
+ * testing for the Numeric_Type of Decimal.
+ *
+ * Same as java.lang.Character.isDigit().
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is a digit character according to Character.isDigit()
+ *
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isdigit(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a letter character.
+ * True for general categories "L" (letters).
+ *
+ * Same as java.lang.Character.isLetter().
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is a letter character
+ *
+ * @see u_isdigit
+ * @see u_isalnum
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isalpha(UChar32 c);
+
+/**
+ * Determines whether the specified code point is an alphanumeric character
+ * (letter or digit) according to Java.
+ * True for characters with general categories
+ * "L" (letters) and "Nd" (decimal digit numbers).
+ *
+ * Same as java.lang.Character.isLetterOrDigit().
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is an alphanumeric character according to Character.isLetterOrDigit()
+ *
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isalnum(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a hexadecimal digit.
+ * This is equivalent to u_digit(c, 16)>=0.
+ * True for characters with general category "Nd" (decimal digit numbers)
+ * as well as Latin letters a-f and A-F in both ASCII and Fullwidth ASCII.
+ * (That is, for letters with code points
+ * 0041..0046, 0061..0066, FF21..FF26, FF41..FF46.)
+ *
+ * In order to narrow the definition of hexadecimal digits to only ASCII
+ * characters, use (c<=0x7f && u_isxdigit(c)).
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is a hexadecimal digit
+ *
+ * @stable ICU 2.6
+ */
+U_CAPI UBool U_EXPORT2
+u_isxdigit(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a punctuation character.
+ * True for characters with general categories "P" (punctuation).
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is a punctuation character
+ *
+ * @stable ICU 2.6
+ */
+U_CAPI UBool U_EXPORT2
+u_ispunct(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a "graphic" character
+ * (printable, excluding spaces).
+ * true for all characters except those with general categories
+ * "Cc" (control codes), "Cf" (format controls), "Cs" (surrogates),
+ * "Cn" (unassigned), and "Z" (separators).
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is a "graphic" character
+ *
+ * @stable ICU 2.6
+ */
+U_CAPI UBool U_EXPORT2
+u_isgraph(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a "blank" or "horizontal space",
+ * a character that visibly separates words on a line.
+ * The following are equivalent definitions:
+ *
+ * true for Unicode White_Space characters except for "vertical space controls"
+ * where "vertical space controls" are the following characters:
+ * U+000A (LF) U+000B (VT) U+000C (FF) U+000D (CR) U+0085 (NEL) U+2028 (LS) U+2029 (PS)
+ *
+ * same as
+ *
+ * true for U+0009 (TAB) and characters with general category "Zs" (space separators).
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is a "blank"
+ *
+ * @stable ICU 2.6
+ */
+U_CAPI UBool U_EXPORT2
+u_isblank(UChar32 c);
+
+/**
+ * Determines whether the specified code point is "defined",
+ * which usually means that it is assigned a character.
+ * True for general categories other than "Cn" (other, not assigned),
+ * i.e., true for all code points mentioned in UnicodeData.txt.
+ *
+ * Note that non-character code points (e.g., U+FDD0) are not "defined"
+ * (they are Cn), but surrogate code points are "defined" (Cs).
+ *
+ * Same as java.lang.Character.isDefined().
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is assigned a character
+ *
+ * @see u_isdigit
+ * @see u_isalpha
+ * @see u_isalnum
+ * @see u_isupper
+ * @see u_islower
+ * @see u_istitle
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isdefined(UChar32 c);
+
+/**
+ * Determines if the specified character is a space character or not.
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the character to be tested
+ * @return true if the character is a space character; false otherwise.
+ *
+ * @see u_isJavaSpaceChar
+ * @see u_isWhitespace
+ * @see u_isUWhiteSpace
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isspace(UChar32 c);
+
+/**
+ * Determine if the specified code point is a space character according to Java.
+ * True for characters with general categories "Z" (separators),
+ * which does not include control codes (e.g., TAB or Line Feed).
+ *
+ * Same as java.lang.Character.isSpaceChar().
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is a space character according to Character.isSpaceChar()
+ *
+ * @see u_isspace
+ * @see u_isWhitespace
+ * @see u_isUWhiteSpace
+ * @stable ICU 2.6
+ */
+U_CAPI UBool U_EXPORT2
+u_isJavaSpaceChar(UChar32 c);
+
+/**
+ * Determines if the specified code point is a whitespace character according to Java/ICU.
+ * A character is considered to be a Java whitespace character if and only
+ * if it satisfies one of the following criteria:
+ *
+ * - It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not
+ * also a non-breaking space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP).
+ * - It is U+0009 HORIZONTAL TABULATION.
+ * - It is U+000A LINE FEED.
+ * - It is U+000B VERTICAL TABULATION.
+ * - It is U+000C FORM FEED.
+ * - It is U+000D CARRIAGE RETURN.
+ * - It is U+001C FILE SEPARATOR.
+ * - It is U+001D GROUP SEPARATOR.
+ * - It is U+001E RECORD SEPARATOR.
+ * - It is U+001F UNIT SEPARATOR.
+ *
+ * This API tries to sync with the semantics of Java's
+ * java.lang.Character.isWhitespace(), but it may not return
+ * the exact same results because of the Unicode version
+ * difference.
+ *
+ * Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs)
+ * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false.
+ * See http://www.unicode.org/versions/Unicode4.0.1/
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is a whitespace character according to Java/ICU
+ *
+ * @see u_isspace
+ * @see u_isJavaSpaceChar
+ * @see u_isUWhiteSpace
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isWhitespace(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a control character
+ * (as defined by this function).
+ * A control character is one of the following:
+ * - ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f)
+ * - U_CONTROL_CHAR (Cc)
+ * - U_FORMAT_CHAR (Cf)
+ * - U_LINE_SEPARATOR (Zl)
+ * - U_PARAGRAPH_SEPARATOR (Zp)
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is a control character
+ *
+ * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
+ * @see u_isprint
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_iscntrl(UChar32 c);
+
+/**
+ * Determines whether the specified code point is an ISO control code.
+ * True for U+0000..U+001f and U+007f..U+009f (general category "Cc").
+ *
+ * Same as java.lang.Character.isISOControl().
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is an ISO control code
+ *
+ * @see u_iscntrl
+ * @stable ICU 2.6
+ */
+U_CAPI UBool U_EXPORT2
+u_isISOControl(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a printable character.
+ * True for general categories <em>other</em> than "C" (controls).
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is a printable character
+ *
+ * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
+ * @see u_iscntrl
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isprint(UChar32 c);
+
+/**
+ * Non-standard: Determines whether the specified code point is a base character.
+ * True for general categories "L" (letters), "N" (numbers),
+ * "Mc" (spacing combining marks), and "Me" (enclosing marks).
+ *
+ * Note that this is different from the Unicode Standard definition in
+ * chapter 3.6, conformance clause D51 “Base characterâ€,
+ * which defines base characters as the code points with general categories
+ * Letter (L), Number (N), Punctuation (P), Symbol (S), or Space Separator (Zs).
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is a base character according to this function
+ *
+ * @see u_isalpha
+ * @see u_isdigit
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isbase(UChar32 c);
+
+/**
+ * Returns the bidirectional category value for the code point,
+ * which is used in the Unicode bidirectional algorithm
+ * (UAX #9 http://www.unicode.org/reports/tr9/).
+ * Note that some <em>unassigned</em> code points have bidi values
+ * of R or AL because they are in blocks that are reserved
+ * for Right-To-Left scripts.
+ *
+ * Same as java.lang.Character.getDirectionality()
+ *
+ * @param c the code point to be tested
+ * @return the bidirectional category (UCharDirection) value
+ *
+ * @see UCharDirection
+ * @stable ICU 2.0
+ */
+U_CAPI UCharDirection U_EXPORT2
+u_charDirection(UChar32 c);
+
+/**
+ * Determines whether the code point has the Bidi_Mirrored property.
+ * This property is set for characters that are commonly used in
+ * Right-To-Left contexts and need to be displayed with a "mirrored"
+ * glyph.
+ *
+ * Same as java.lang.Character.isMirrored().
+ * Same as UCHAR_BIDI_MIRRORED
+ *
+ * @param c the code point to be tested
+ * @return true if the character has the Bidi_Mirrored property
+ *
+ * @see UCHAR_BIDI_MIRRORED
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isMirrored(UChar32 c);
+
+/**
+ * Maps the specified character to a "mirror-image" character.
+ * For characters with the Bidi_Mirrored property, implementations
+ * sometimes need a "poor man's" mapping to another Unicode
+ * character (code point) such that the default glyph may serve
+ * as the mirror-image of the default glyph of the specified
+ * character. This is useful for text conversion to and from
+ * codepages with visual order, and for displays without glyph
+ * selection capabilities.
+ *
+ * @param c the code point to be mapped
+ * @return another Unicode code point that may serve as a mirror-image
+ * substitute, or c itself if there is no such mapping or c
+ * does not have the Bidi_Mirrored property
+ *
+ * @see UCHAR_BIDI_MIRRORED
+ * @see u_isMirrored
+ * @stable ICU 2.0
+ */
+U_CAPI UChar32 U_EXPORT2
+u_charMirror(UChar32 c);
+
+/**
+ * Maps the specified character to its paired bracket character.
+ * For Bidi_Paired_Bracket_Type!=None, this is the same as u_charMirror().
+ * Otherwise c itself is returned.
+ * See http://www.unicode.org/reports/tr9/
+ *
+ * @param c the code point to be mapped
+ * @return the paired bracket code point,
+ * or c itself if there is no such mapping
+ * (Bidi_Paired_Bracket_Type=None)
+ *
+ * @see UCHAR_BIDI_PAIRED_BRACKET
+ * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE
+ * @see u_charMirror
+ * @stable ICU 52
+ */
+U_CAPI UChar32 U_EXPORT2
+u_getBidiPairedBracket(UChar32 c);
+
+/**
+ * Returns the general category value for the code point.
+ *
+ * Same as java.lang.Character.getType().
+ *
+ * @param c the code point to be tested
+ * @return the general category (UCharCategory) value
+ *
+ * @see UCharCategory
+ * @stable ICU 2.0
+ */
+U_CAPI int8_t U_EXPORT2
+u_charType(UChar32 c);
+
+/**
+ * Get a single-bit bit set for the general category of a character.
+ * This bit set can be compared bitwise with U_GC_SM_MASK, U_GC_L_MASK, etc.
+ * Same as U_MASK(u_charType(c)).
+ *
+ * @param c the code point to be tested
+ * @return a single-bit mask corresponding to the general category (UCharCategory) value
+ *
+ * @see u_charType
+ * @see UCharCategory
+ * @see U_GC_CN_MASK
+ * @stable ICU 2.1
+ */
+#define U_GET_GC_MASK(c) U_MASK(u_charType(c))
+
+/**
+ * Callback from u_enumCharTypes(), is called for each contiguous range
+ * of code points c (where start<=c<limit)
+ * with the same Unicode general category ("character type").
+ *
+ * The callback function can stop the enumeration by returning false.
+ *
+ * @param context an opaque pointer, as passed into utrie_enum()
+ * @param start the first code point in a contiguous range with value
+ * @param limit one past the last code point in a contiguous range with value
+ * @param type the general category for all code points in [start..limit[
+ * @return false to stop the enumeration
+ *
+ * @stable ICU 2.1
+ * @see UCharCategory
+ * @see u_enumCharTypes
+ */
+typedef UBool U_CALLCONV
+UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type);
+
+/**
+ * Enumerate efficiently all code points with their Unicode general categories.
+ *
+ * This is useful for building data structures (e.g., UnicodeSet's),
+ * for enumerating all assigned code points (type!=U_UNASSIGNED), etc.
+ *
+ * For each contiguous range of code points with a given general category ("character type"),
+ * the UCharEnumTypeRange function is called.
+ * Adjacent ranges have different types.
+ * The Unicode Standard guarantees that the numeric value of the type is 0..31.
+ *
+ * @param enumRange a pointer to a function that is called for each contiguous range
+ * of code points with the same general category
+ * @param context an opaque pointer that is passed on to the callback function
+ *
+ * @stable ICU 2.1
+ * @see UCharCategory
+ * @see UCharEnumTypeRange
+ */
+U_CAPI void U_EXPORT2
+u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context);
+
+#if !UCONFIG_NO_NORMALIZATION
+
+/**
+ * Returns the combining class of the code point as specified in UnicodeData.txt.
+ *
+ * @param c the code point of the character
+ * @return the combining class of the character
+ * @stable ICU 2.0
+ */
+U_CAPI uint8_t U_EXPORT2
+u_getCombiningClass(UChar32 c);
+
+#endif
+
+/**
+ * Returns the decimal digit value of a decimal digit character.
+ * Such characters have the general category "Nd" (decimal digit numbers)
+ * and a Numeric_Type of Decimal.
+ *
+ * Unlike ICU releases before 2.6, no digit values are returned for any
+ * Han characters because Han number characters are often used with a special
+ * Chinese-style number format (with characters for powers of 10 in between)
+ * instead of in decimal-positional notation.
+ * Unicode 4 explicitly assigns Han number characters the Numeric_Type
+ * Numeric instead of Decimal.
+ * See Jitterbug 1483 for more details.
+ *
+ * Use u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE) and u_getNumericValue()
+ * for complete numeric Unicode properties.
+ *
+ * @param c the code point for which to get the decimal digit value
+ * @return the decimal digit value of c,
+ * or -1 if c is not a decimal digit character
+ *
+ * @see u_getNumericValue
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_charDigitValue(UChar32 c);
+
+/**
+ * Returns the Unicode allocation block that contains the character.
+ *
+ * @param c the code point to be tested
+ * @return the block value (UBlockCode) for c
+ *
+ * @see UBlockCode
+ * @stable ICU 2.0
+ */
+U_CAPI UBlockCode U_EXPORT2
+ublock_getCode(UChar32 c);
+
+/**
+ * Retrieve the name of a Unicode character.
+ * Depending on <code>nameChoice</code>, the character name written
+ * into the buffer is the "modern" name or the name that was defined
+ * in Unicode version 1.0.
+ * The name contains only "invariant" characters
+ * like A-Z, 0-9, space, and '-'.
+ * Unicode 1.0 names are only retrieved if they are different from the modern
+ * names and if the data file contains the data for them. gennames may or may
+ * not be called with a command line option to include 1.0 names in unames.dat.
+ *
+ * @param code The character (code point) for which to get the name.
+ * It must be <code>0<=code<=0x10ffff</code>.
+ * @param nameChoice Selector for which name to get.
+ * @param buffer Destination address for copying the name.
+ * The name will always be zero-terminated.
+ * If there is no name, then the buffer will be set to the empty string.
+ * @param bufferLength <code>==sizeof(buffer)</code>
+ * @param pErrorCode Pointer to a UErrorCode variable;
+ * check for <code>U_SUCCESS()</code> after <code>u_charName()</code>
+ * returns.
+ * @return The length of the name, or 0 if there is no name for this character.
+ * If the bufferLength is less than or equal to the length, then the buffer
+ * contains the truncated name and the returned length indicates the full
+ * length of the name.
+ * The length does not include the zero-termination.
+ *
+ * @see UCharNameChoice
+ * @see u_charFromName
+ * @see u_enumCharNames
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_charName(UChar32 code, UCharNameChoice nameChoice,
+ char *buffer, int32_t bufferLength,
+ UErrorCode *pErrorCode);
+
+#ifndef U_HIDE_DEPRECATED_API
+/**
+ * Returns an empty string.
+ * Used to return the ISO 10646 comment for a character.
+ * The Unicode ISO_Comment property is deprecated and has no values.
+ *
+ * @param c The character (code point) for which to get the ISO comment.
+ * It must be <code>0<=c<=0x10ffff</code>.
+ * @param dest Destination address for copying the comment.
+ * The comment will be zero-terminated if possible.
+ * If there is no comment, then the buffer will be set to the empty string.
+ * @param destCapacity <code>==sizeof(dest)</code>
+ * @param pErrorCode Pointer to a UErrorCode variable;
+ * check for <code>U_SUCCESS()</code> after <code>u_getISOComment()</code>
+ * returns.
+ * @return 0
+ *
+ * @deprecated ICU 49
+ */
+U_DEPRECATED int32_t U_EXPORT2
+u_getISOComment(UChar32 c,
+ char *dest, int32_t destCapacity,
+ UErrorCode *pErrorCode);
+#endif /* U_HIDE_DEPRECATED_API */
+
+/**
+ * Find a Unicode character by its name and return its code point value.
+ * The name is matched exactly and completely.
+ * If the name does not correspond to a code point, <i>pErrorCode</i>
+ * is set to <code>U_INVALID_CHAR_FOUND</code>.
+ * A Unicode 1.0 name is matched only if it differs from the modern name.
+ * Unicode names are all uppercase. Extended names are lowercase followed
+ * by an uppercase hexadecimal number, and within angle brackets.
+ *
+ * @param nameChoice Selector for which name to match.
+ * @param name The name to match.
+ * @param pErrorCode Pointer to a UErrorCode variable
+ * @return The Unicode value of the code point with the given name,
+ * or an undefined value if there is no such code point.
+ *
+ * @see UCharNameChoice
+ * @see u_charName
+ * @see u_enumCharNames
+ * @stable ICU 1.7
+ */
+U_CAPI UChar32 U_EXPORT2
+u_charFromName(UCharNameChoice nameChoice,
+ const char *name,
+ UErrorCode *pErrorCode);
+
+/**
+ * Type of a callback function for u_enumCharNames() that gets called
+ * for each Unicode character with the code point value and
+ * the character name.
+ * If such a function returns false, then the enumeration is stopped.
+ *
+ * @param context The context pointer that was passed to u_enumCharNames().
+ * @param code The Unicode code point for the character with this name.
+ * @param nameChoice Selector for which kind of names is enumerated.
+ * @param name The character's name, zero-terminated.
+ * @param length The length of the name.
+ * @return true if the enumeration should continue, false to stop it.
+ *
+ * @see UCharNameChoice
+ * @see u_enumCharNames
+ * @stable ICU 1.7
+ */
+typedef UBool U_CALLCONV UEnumCharNamesFn(void *context,
+ UChar32 code,
+ UCharNameChoice nameChoice,
+ const char *name,
+ int32_t length);
+
+/**
+ * Enumerate all assigned Unicode characters between the start and limit
+ * code points (start inclusive, limit exclusive) and call a function
+ * for each, passing the code point value and the character name.
+ * For Unicode 1.0 names, only those are enumerated that differ from the
+ * modern names.
+ *
+ * @param start The first code point in the enumeration range.
+ * @param limit One more than the last code point in the enumeration range
+ * (the first one after the range).
+ * @param fn The function that is to be called for each character name.
+ * @param context An arbitrary pointer that is passed to the function.
+ * @param nameChoice Selector for which kind of names to enumerate.
+ * @param pErrorCode Pointer to a UErrorCode variable
+ *
+ * @see UCharNameChoice
+ * @see UEnumCharNamesFn
+ * @see u_charName
+ * @see u_charFromName
+ * @stable ICU 1.7
+ */
+U_CAPI void U_EXPORT2
+u_enumCharNames(UChar32 start, UChar32 limit,
+ UEnumCharNamesFn *fn,
+ void *context,
+ UCharNameChoice nameChoice,
+ UErrorCode *pErrorCode);
+
+/**
+ * Return the Unicode name for a given property, as given in the
+ * Unicode database file PropertyAliases.txt.
+ *
+ * In addition, this function maps the property
+ * UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
+ * "General_Category_Mask". These names are not in
+ * PropertyAliases.txt.
+ *
+ * @param property UProperty selector other than UCHAR_INVALID_CODE.
+ * If out of range, NULL is returned.
+ *
+ * @param nameChoice selector for which name to get. If out of range,
+ * NULL is returned. All properties have a long name. Most
+ * have a short name, but some do not. Unicode allows for
+ * additional names; if present these will be returned by
+ * U_LONG_PROPERTY_NAME + i, where i=1, 2,...
+ *
+ * @return a pointer to the name, or NULL if either the
+ * property or the nameChoice is out of range. If a given
+ * nameChoice returns NULL, then all larger values of
+ * nameChoice will return NULL, with one exception: if NULL is
+ * returned for U_SHORT_PROPERTY_NAME, then
+ * U_LONG_PROPERTY_NAME (and higher) may still return a
+ * non-NULL value. The returned pointer is valid until
+ * u_cleanup() is called.
+ *
+ * @see UProperty
+ * @see UPropertyNameChoice
+ * @stable ICU 2.4
+ */
+U_CAPI const char* U_EXPORT2
+u_getPropertyName(UProperty property,
+ UPropertyNameChoice nameChoice);
+
+/**
+ * Return the UProperty enum for a given property name, as specified
+ * in the Unicode database file PropertyAliases.txt. Short, long, and
+ * any other variants are recognized.
+ *
+ * In addition, this function maps the synthetic names "gcm" /
+ * "General_Category_Mask" to the property
+ * UCHAR_GENERAL_CATEGORY_MASK. These names are not in
+ * PropertyAliases.txt.
+ *
+ * @param alias the property name to be matched. The name is compared
+ * using "loose matching" as described in PropertyAliases.txt.
+ *
+ * @return a UProperty enum, or UCHAR_INVALID_CODE if the given name
+ * does not match any property.
+ *
+ * @see UProperty
+ * @stable ICU 2.4
+ */
+U_CAPI UProperty U_EXPORT2
+u_getPropertyEnum(const char* alias);
+
+/**
+ * Return the Unicode name for a given property value, as given in the
+ * Unicode database file PropertyValueAliases.txt.
+ *
+ * Note: Some of the names in PropertyValueAliases.txt can only be
+ * retrieved using UCHAR_GENERAL_CATEGORY_MASK, not
+ * UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" /
+ * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
+ * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
+ *
+ * @param property UProperty selector constant.
+ * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
+ * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
+ * If out of range, NULL is returned.
+ *
+ * @param value selector for a value for the given property. If out
+ * of range, NULL is returned. In general, valid values range
+ * from 0 up to some maximum. There are a few exceptions:
+ * (1.) UCHAR_BLOCK values begin at the non-zero value
+ * UBLOCK_BASIC_LATIN. (2.) UCHAR_CANONICAL_COMBINING_CLASS
+ * values are not contiguous and range from 0..240. (3.)
+ * UCHAR_GENERAL_CATEGORY_MASK values are not values of
+ * UCharCategory, but rather mask values produced by
+ * U_GET_GC_MASK(). This allows grouped categories such as
+ * [:L:] to be represented. Mask values range
+ * non-contiguously from 1..U_GC_P_MASK.
+ *
+ * @param nameChoice selector for which name to get. If out of range,
+ * NULL is returned. All values have a long name. Most have
+ * a short name, but some do not. Unicode allows for
+ * additional names; if present these will be returned by
+ * U_LONG_PROPERTY_NAME + i, where i=1, 2,...
+
+ * @return a pointer to the name, or NULL if either the
+ * property or the nameChoice is out of range. If a given
+ * nameChoice returns NULL, then all larger values of
+ * nameChoice will return NULL, with one exception: if NULL is
+ * returned for U_SHORT_PROPERTY_NAME, then
+ * U_LONG_PROPERTY_NAME (and higher) may still return a
+ * non-NULL value. The returned pointer is valid until
+ * u_cleanup() is called.
+ *
+ * @see UProperty
+ * @see UPropertyNameChoice
+ * @stable ICU 2.4
+ */
+U_CAPI const char* U_EXPORT2
+u_getPropertyValueName(UProperty property,
+ int32_t value,
+ UPropertyNameChoice nameChoice);
+
+/**
+ * Return the property value integer for a given value name, as
+ * specified in the Unicode database file PropertyValueAliases.txt.
+ * Short, long, and any other variants are recognized.
+ *
+ * Note: Some of the names in PropertyValueAliases.txt will only be
+ * recognized with UCHAR_GENERAL_CATEGORY_MASK, not
+ * UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" /
+ * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
+ * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
+ *
+ * @param property UProperty selector constant.
+ * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
+ * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
+ * If out of range, UCHAR_INVALID_CODE is returned.
+ *
+ * @param alias the value name to be matched. The name is compared
+ * using "loose matching" as described in
+ * PropertyValueAliases.txt.
+ *
+ * @return a value integer or UCHAR_INVALID_CODE if the given name
+ * does not match any value of the given property, or if the
+ * property is invalid. Note: UCHAR_GENERAL_CATEGORY_MASK values
+ * are not values of UCharCategory, but rather mask values
+ * produced by U_GET_GC_MASK(). This allows grouped
+ * categories such as [:L:] to be represented.
+ *
+ * @see UProperty
+ * @stable ICU 2.4
+ */
+U_CAPI int32_t U_EXPORT2
+u_getPropertyValueEnum(UProperty property,
+ const char* alias);
+
+/**
+ * Determines if the specified character is permissible as the
+ * first character in an identifier according to Unicode
+ * (The Unicode Standard, Version 3.0, chapter 5.16 Identifiers).
+ * True for characters with general categories "L" (letters) and "Nl" (letter numbers).
+ *
+ * Same as java.lang.Character.isUnicodeIdentifierStart().
+ * Same as UCHAR_ID_START
+ *
+ * @param c the code point to be tested
+ * @return true if the code point may start an identifier
+ *
+ * @see UCHAR_ID_START
+ * @see u_isalpha
+ * @see u_isIDPart
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isIDStart(UChar32 c);
+
+/**
+ * Determines if the specified character is permissible
+ * in an identifier according to Java.
+ * True for characters with general categories "L" (letters),
+ * "Nl" (letter numbers), "Nd" (decimal digits),
+ * "Mc" and "Mn" (combining marks), "Pc" (connecting punctuation), and
+ * u_isIDIgnorable(c).
+ *
+ * Same as java.lang.Character.isUnicodeIdentifierPart().
+ * Almost the same as Unicode's ID_Continue (UCHAR_ID_CONTINUE)
+ * except that Unicode recommends to ignore Cf which is less than
+ * u_isIDIgnorable(c).
+ *
+ * @param c the code point to be tested
+ * @return true if the code point may occur in an identifier according to Java
+ *
+ * @see UCHAR_ID_CONTINUE
+ * @see u_isIDStart
+ * @see u_isIDIgnorable
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isIDPart(UChar32 c);
+
+/**
+ * Determines if the specified character should be regarded
+ * as an ignorable character in an identifier,
+ * according to Java.
+ * True for characters with general category "Cf" (format controls) as well as
+ * non-whitespace ISO controls
+ * (U+0000..U+0008, U+000E..U+001B, U+007F..U+009F).
+ *
+ * Same as java.lang.Character.isIdentifierIgnorable().
+ *
+ * Note that Unicode just recommends to ignore Cf (format controls).
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is ignorable in identifiers according to Java
+ *
+ * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
+ * @see u_isIDStart
+ * @see u_isIDPart
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isIDIgnorable(UChar32 c);
+
+/**
+ * Determines if the specified character is permissible as the
+ * first character in a Java identifier.
+ * In addition to u_isIDStart(c), true for characters with
+ * general categories "Sc" (currency symbols) and "Pc" (connecting punctuation).
+ *
+ * Same as java.lang.Character.isJavaIdentifierStart().
+ *
+ * @param c the code point to be tested
+ * @return true if the code point may start a Java identifier
+ *
+ * @see u_isJavaIDPart
+ * @see u_isalpha
+ * @see u_isIDStart
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isJavaIDStart(UChar32 c);
+
+/**
+ * Determines if the specified character is permissible
+ * in a Java identifier.
+ * In addition to u_isIDPart(c), true for characters with
+ * general category "Sc" (currency symbols).
+ *
+ * Same as java.lang.Character.isJavaIdentifierPart().
+ *
+ * @param c the code point to be tested
+ * @return true if the code point may occur in a Java identifier
+ *
+ * @see u_isIDIgnorable
+ * @see u_isJavaIDStart
+ * @see u_isalpha
+ * @see u_isdigit
+ * @see u_isIDPart
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isJavaIDPart(UChar32 c);
+
+/**
+ * The given character is mapped to its lowercase equivalent according to
+ * UnicodeData.txt; if the character has no lowercase equivalent, the character
+ * itself is returned.
+ *
+ * Same as java.lang.Character.toLowerCase().
+ *
+ * This function only returns the simple, single-code point case mapping.
+ * Full case mappings should be used whenever possible because they produce
+ * better results by working on whole strings.
+ * They take into account the string context and the language and can map
+ * to a result string with a different length as appropriate.
+ * Full case mappings are applied by the string case mapping functions,
+ * see ustring.h and the UnicodeString class.
+ * See also the User Guide chapter on C/POSIX migration:
+ * http://icu-project.org/userguide/posix.html#case_mappings
+ *
+ * @param c the code point to be mapped
+ * @return the Simple_Lowercase_Mapping of the code point, if any;
+ * otherwise the code point itself.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar32 U_EXPORT2
+u_tolower(UChar32 c);
+
+/**
+ * The given character is mapped to its uppercase equivalent according to UnicodeData.txt;
+ * if the character has no uppercase equivalent, the character itself is
+ * returned.
+ *
+ * Same as java.lang.Character.toUpperCase().
+ *
+ * This function only returns the simple, single-code point case mapping.
+ * Full case mappings should be used whenever possible because they produce
+ * better results by working on whole strings.
+ * They take into account the string context and the language and can map
+ * to a result string with a different length as appropriate.
+ * Full case mappings are applied by the string case mapping functions,
+ * see ustring.h and the UnicodeString class.
+ * See also the User Guide chapter on C/POSIX migration:
+ * http://icu-project.org/userguide/posix.html#case_mappings
+ *
+ * @param c the code point to be mapped
+ * @return the Simple_Uppercase_Mapping of the code point, if any;
+ * otherwise the code point itself.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar32 U_EXPORT2
+u_toupper(UChar32 c);
+
+/**
+ * The given character is mapped to its titlecase equivalent
+ * according to UnicodeData.txt;
+ * if none is defined, the character itself is returned.
+ *
+ * Same as java.lang.Character.toTitleCase().
+ *
+ * This function only returns the simple, single-code point case mapping.
+ * Full case mappings should be used whenever possible because they produce
+ * better results by working on whole strings.
+ * They take into account the string context and the language and can map
+ * to a result string with a different length as appropriate.
+ * Full case mappings are applied by the string case mapping functions,
+ * see ustring.h and the UnicodeString class.
+ * See also the User Guide chapter on C/POSIX migration:
+ * http://icu-project.org/userguide/posix.html#case_mappings
+ *
+ * @param c the code point to be mapped
+ * @return the Simple_Titlecase_Mapping of the code point, if any;
+ * otherwise the code point itself.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar32 U_EXPORT2
+u_totitle(UChar32 c);
+
+/**
+ * The given character is mapped to its case folding equivalent according to
+ * UnicodeData.txt and CaseFolding.txt;
+ * if the character has no case folding equivalent, the character
+ * itself is returned.
+ *
+ * This function only returns the simple, single-code point case mapping.
+ * Full case mappings should be used whenever possible because they produce
+ * better results by working on whole strings.
+ * They take into account the string context and the language and can map
+ * to a result string with a different length as appropriate.
+ * Full case mappings are applied by the string case mapping functions,
+ * see ustring.h and the UnicodeString class.
+ * See also the User Guide chapter on C/POSIX migration:
+ * http://icu-project.org/userguide/posix.html#case_mappings
+ *
+ * @param c the code point to be mapped
+ * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @return the Simple_Case_Folding of the code point, if any;
+ * otherwise the code point itself.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar32 U_EXPORT2
+u_foldCase(UChar32 c, uint32_t options);
+
+/**
+ * Returns the decimal digit value of the code point in the
+ * specified radix.
+ *
+ * If the radix is not in the range <code>2<=radix<=36</code> or if the
+ * value of <code>c</code> is not a valid digit in the specified
+ * radix, <code>-1</code> is returned. A character is a valid digit
+ * if at least one of the following is true:
+ * <ul>
+ * <li>The character has a decimal digit value.
+ * Such characters have the general category "Nd" (decimal digit numbers)
+ * and a Numeric_Type of Decimal.
+ * In this case the value is the character's decimal digit value.</li>
+ * <li>The character is one of the uppercase Latin letters
+ * <code>'A'</code> through <code>'Z'</code>.
+ * In this case the value is <code>c-'A'+10</code>.</li>
+ * <li>The character is one of the lowercase Latin letters
+ * <code>'a'</code> through <code>'z'</code>.
+ * In this case the value is <code>ch-'a'+10</code>.</li>
+ * <li>Latin letters from both the ASCII range (0061..007A, 0041..005A)
+ * as well as from the Fullwidth ASCII range (FF41..FF5A, FF21..FF3A)
+ * are recognized.</li>
+ * </ul>
+ *
+ * Same as java.lang.Character.digit().
+ *
+ * @param ch the code point to be tested.
+ * @param radix the radix.
+ * @return the numeric value represented by the character in the
+ * specified radix,
+ * or -1 if there is no value or if the value exceeds the radix.
+ *
+ * @see UCHAR_NUMERIC_TYPE
+ * @see u_forDigit
+ * @see u_charDigitValue
+ * @see u_isdigit
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_digit(UChar32 ch, int8_t radix);
+
+/**
+ * Determines the character representation for a specific digit in
+ * the specified radix. If the value of <code>radix</code> is not a
+ * valid radix, or the value of <code>digit</code> is not a valid
+ * digit in the specified radix, the null character
+ * (<code>U+0000</code>) is returned.
+ * <p>
+ * The <code>radix</code> argument is valid if it is greater than or
+ * equal to 2 and less than or equal to 36.
+ * The <code>digit</code> argument is valid if
+ * <code>0 <= digit < radix</code>.
+ * <p>
+ * If the digit is less than 10, then
+ * <code>'0' + digit</code> is returned. Otherwise, the value
+ * <code>'a' + digit - 10</code> is returned.
+ *
+ * Same as java.lang.Character.forDigit().
+ *
+ * @param digit the number to convert to a character.
+ * @param radix the radix.
+ * @return the <code>char</code> representation of the specified digit
+ * in the specified radix.
+ *
+ * @see u_digit
+ * @see u_charDigitValue
+ * @see u_isdigit
+ * @stable ICU 2.0
+ */
+U_CAPI UChar32 U_EXPORT2
+u_forDigit(int32_t digit, int8_t radix);
+
+/**
+ * Get the "age" of the code point.
+ * The "age" is the Unicode version when the code point was first
+ * designated (as a non-character or for Private Use)
+ * or assigned a character.
+ * This can be useful to avoid emitting code points to receiving
+ * processes that do not accept newer characters.
+ * The data is from the UCD file DerivedAge.txt.
+ *
+ * @param c The code point.
+ * @param versionArray The Unicode version number array, to be filled in.
+ *
+ * @stable ICU 2.1
+ */
+U_CAPI void U_EXPORT2
+u_charAge(UChar32 c, UVersionInfo versionArray);
+
+/**
+ * Gets the Unicode version information.
+ * The version array is filled in with the version information
+ * for the Unicode standard that is currently used by ICU.
+ * For example, Unicode version 3.1.1 is represented as an array with
+ * the values { 3, 1, 1, 0 }.
+ *
+ * @param versionArray an output array that will be filled in with
+ * the Unicode version number
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+u_getUnicodeVersion(UVersionInfo versionArray);
+
+#if !UCONFIG_NO_NORMALIZATION
+/**
+ * Get the FC_NFKC_Closure property string for a character.
+ * See Unicode Standard Annex #15 for details, search for "FC_NFKC_Closure"
+ * or for "FNC": http://www.unicode.org/reports/tr15/
+ *
+ * @param c The character (code point) for which to get the FC_NFKC_Closure string.
+ * It must be <code>0<=c<=0x10ffff</code>.
+ * @param dest Destination address for copying the string.
+ * The string will be zero-terminated if possible.
+ * If there is no FC_NFKC_Closure string,
+ * then the buffer will be set to the empty string.
+ * @param destCapacity <code>==sizeof(dest)</code>
+ * @param pErrorCode Pointer to a UErrorCode variable.
+ * @return The length of the string, or 0 if there is no FC_NFKC_Closure string for this character.
+ * If the destCapacity is less than or equal to the length, then the buffer
+ * contains the truncated name and the returned length indicates the full
+ * length of the name.
+ * The length does not include the zero-termination.
+ *
+ * @stable ICU 2.2
+ */
+U_CAPI int32_t U_EXPORT2
+u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode);
+
+#endif
+
+
+U_CDECL_END
+
+#endif /*_UCHAR*/
+/*eof*/
diff --git a/thirdparty/icu4c/common/unicode/ucharstrie.h b/thirdparty/icu4c/common/unicode/ucharstrie.h
new file mode 100644
index 0000000000..b6f9e3e075
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/ucharstrie.h
@@ -0,0 +1,623 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: ucharstrie.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010nov14
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UCHARSTRIE_H__
+#define __UCHARSTRIE_H__
+
+/**
+ * \file
+ * \brief C++ API: Trie for mapping Unicode strings (or 16-bit-unit sequences)
+ * to integer values.
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/unistr.h"
+#include "unicode/uobject.h"
+#include "unicode/ustringtrie.h"
+
+U_NAMESPACE_BEGIN
+
+class Appendable;
+class UCharsTrieBuilder;
+class UVector32;
+
+/**
+ * Light-weight, non-const reader class for a UCharsTrie.
+ * Traverses a char16_t-serialized data structure with minimal state,
+ * for mapping strings (16-bit-unit sequences) to non-negative integer values.
+ *
+ * This class owns the serialized trie data only if it was constructed by
+ * the builder's build() method.
+ * The public constructor and the copy constructor only alias the data (only copy the pointer).
+ * There is no assignment operator.
+ *
+ * This class is not intended for public subclassing.
+ * @stable ICU 4.8
+ */
+class U_COMMON_API UCharsTrie : public UMemory {
+public:
+ /**
+ * Constructs a UCharsTrie reader instance.
+ *
+ * The trieUChars must contain a copy of a char16_t sequence from the UCharsTrieBuilder,
+ * starting with the first char16_t of that sequence.
+ * The UCharsTrie object will not read more char16_ts than
+ * the UCharsTrieBuilder generated in the corresponding build() call.
+ *
+ * The array is not copied/cloned and must not be modified while
+ * the UCharsTrie object is in use.
+ *
+ * @param trieUChars The char16_t array that contains the serialized trie.
+ * @stable ICU 4.8
+ */
+ UCharsTrie(ConstChar16Ptr trieUChars)
+ : ownedArray_(NULL), uchars_(trieUChars),
+ pos_(uchars_), remainingMatchLength_(-1) {}
+
+ /**
+ * Destructor.
+ * @stable ICU 4.8
+ */
+ ~UCharsTrie();
+
+ /**
+ * Copy constructor, copies the other trie reader object and its state,
+ * but not the char16_t array which will be shared. (Shallow copy.)
+ * @param other Another UCharsTrie object.
+ * @stable ICU 4.8
+ */
+ UCharsTrie(const UCharsTrie &other)
+ : ownedArray_(NULL), uchars_(other.uchars_),
+ pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {}
+
+ /**
+ * Resets this trie to its initial state.
+ * @return *this
+ * @stable ICU 4.8
+ */
+ UCharsTrie &reset() {
+ pos_=uchars_;
+ remainingMatchLength_=-1;
+ return *this;
+ }
+
+ /**
+ * Returns the state of this trie as a 64-bit integer.
+ * The state value is never 0.
+ *
+ * @return opaque state value
+ * @see resetToState64
+ * @stable ICU 65
+ */
+ uint64_t getState64() const {
+ return (static_cast<uint64_t>(remainingMatchLength_ + 2) << kState64RemainingShift) |
+ (uint64_t)(pos_ - uchars_);
+ }
+
+ /**
+ * Resets this trie to the saved state.
+ * Unlike resetToState(State), the 64-bit state value
+ * must be from getState64() from the same trie object or
+ * from one initialized the exact same way.
+ * Because of no validation, this method is faster.
+ *
+ * @param state The opaque trie state value from getState64().
+ * @return *this
+ * @see getState64
+ * @see resetToState
+ * @see reset
+ * @stable ICU 65
+ */
+ UCharsTrie &resetToState64(uint64_t state) {
+ remainingMatchLength_ = static_cast<int32_t>(state >> kState64RemainingShift) - 2;
+ pos_ = uchars_ + (state & kState64PosMask);
+ return *this;
+ }
+
+ /**
+ * UCharsTrie state object, for saving a trie's current state
+ * and resetting the trie back to this state later.
+ * @stable ICU 4.8
+ */
+ class State : public UMemory {
+ public:
+ /**
+ * Constructs an empty State.
+ * @stable ICU 4.8
+ */
+ State() { uchars=NULL; }
+ private:
+ friend class UCharsTrie;
+
+ const char16_t *uchars;
+ const char16_t *pos;
+ int32_t remainingMatchLength;
+ };
+
+ /**
+ * Saves the state of this trie.
+ * @param state The State object to hold the trie's state.
+ * @return *this
+ * @see resetToState
+ * @stable ICU 4.8
+ */
+ const UCharsTrie &saveState(State &state) const {
+ state.uchars=uchars_;
+ state.pos=pos_;
+ state.remainingMatchLength=remainingMatchLength_;
+ return *this;
+ }
+
+ /**
+ * Resets this trie to the saved state.
+ * If the state object contains no state, or the state of a different trie,
+ * then this trie remains unchanged.
+ * @param state The State object which holds a saved trie state.
+ * @return *this
+ * @see saveState
+ * @see reset
+ * @stable ICU 4.8
+ */
+ UCharsTrie &resetToState(const State &state) {
+ if(uchars_==state.uchars && uchars_!=NULL) {
+ pos_=state.pos;
+ remainingMatchLength_=state.remainingMatchLength;
+ }
+ return *this;
+ }
+
+ /**
+ * Determines whether the string so far matches, whether it has a value,
+ * and whether another input char16_t can continue a matching string.
+ * @return The match/value Result.
+ * @stable ICU 4.8
+ */
+ UStringTrieResult current() const;
+
+ /**
+ * Traverses the trie from the initial state for this input char16_t.
+ * Equivalent to reset().next(uchar).
+ * @param uchar Input char value. Values below 0 and above 0xffff will never match.
+ * @return The match/value Result.
+ * @stable ICU 4.8
+ */
+ inline UStringTrieResult first(int32_t uchar) {
+ remainingMatchLength_=-1;
+ return nextImpl(uchars_, uchar);
+ }
+
+ /**
+ * Traverses the trie from the initial state for the
+ * one or two UTF-16 code units for this input code point.
+ * Equivalent to reset().nextForCodePoint(cp).
+ * @param cp A Unicode code point 0..0x10ffff.
+ * @return The match/value Result.
+ * @stable ICU 4.8
+ */
+ UStringTrieResult firstForCodePoint(UChar32 cp);
+
+ /**
+ * Traverses the trie from the current state for this input char16_t.
+ * @param uchar Input char value. Values below 0 and above 0xffff will never match.
+ * @return The match/value Result.
+ * @stable ICU 4.8
+ */
+ UStringTrieResult next(int32_t uchar);
+
+ /**
+ * Traverses the trie from the current state for the
+ * one or two UTF-16 code units for this input code point.
+ * @param cp A Unicode code point 0..0x10ffff.
+ * @return The match/value Result.
+ * @stable ICU 4.8
+ */
+ UStringTrieResult nextForCodePoint(UChar32 cp);
+
+ /**
+ * Traverses the trie from the current state for this string.
+ * Equivalent to
+ * \code
+ * Result result=current();
+ * for(each c in s)
+ * if(!USTRINGTRIE_HAS_NEXT(result)) return USTRINGTRIE_NO_MATCH;
+ * result=next(c);
+ * return result;
+ * \endcode
+ * @param s A string. Can be NULL if length is 0.
+ * @param length The length of the string. Can be -1 if NUL-terminated.
+ * @return The match/value Result.
+ * @stable ICU 4.8
+ */
+ UStringTrieResult next(ConstChar16Ptr s, int32_t length);
+
+ /**
+ * Returns a matching string's value if called immediately after
+ * current()/first()/next() returned USTRINGTRIE_INTERMEDIATE_VALUE or USTRINGTRIE_FINAL_VALUE.
+ * getValue() can be called multiple times.
+ *
+ * Do not call getValue() after USTRINGTRIE_NO_MATCH or USTRINGTRIE_NO_VALUE!
+ * @return The value for the string so far.
+ * @stable ICU 4.8
+ */
+ inline int32_t getValue() const {
+ const char16_t *pos=pos_;
+ int32_t leadUnit=*pos++;
+ // U_ASSERT(leadUnit>=kMinValueLead);
+ return leadUnit&kValueIsFinal ?
+ readValue(pos, leadUnit&0x7fff) : readNodeValue(pos, leadUnit);
+ }
+
+ /**
+ * Determines whether all strings reachable from the current state
+ * map to the same value.
+ * @param uniqueValue Receives the unique value, if this function returns true.
+ * (output-only)
+ * @return true if all strings reachable from the current state
+ * map to the same value.
+ * @stable ICU 4.8
+ */
+ inline UBool hasUniqueValue(int32_t &uniqueValue) const {
+ const char16_t *pos=pos_;
+ // Skip the rest of a pending linear-match node.
+ return pos!=NULL && findUniqueValue(pos+remainingMatchLength_+1, false, uniqueValue);
+ }
+
+ /**
+ * Finds each char16_t which continues the string from the current state.
+ * That is, each char16_t c for which it would be next(c)!=USTRINGTRIE_NO_MATCH now.
+ * @param out Each next char16_t is appended to this object.
+ * @return the number of char16_ts which continue the string from here
+ * @stable ICU 4.8
+ */
+ int32_t getNextUChars(Appendable &out) const;
+
+ /**
+ * Iterator for all of the (string, value) pairs in a UCharsTrie.
+ * @stable ICU 4.8
+ */
+ class U_COMMON_API Iterator : public UMemory {
+ public:
+ /**
+ * Iterates from the root of a char16_t-serialized UCharsTrie.
+ * @param trieUChars The trie char16_ts.
+ * @param maxStringLength If 0, the iterator returns full strings.
+ * Otherwise, the iterator returns strings with this maximum length.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @stable ICU 4.8
+ */
+ Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength, UErrorCode &errorCode);
+
+ /**
+ * Iterates from the current state of the specified UCharsTrie.
+ * @param trie The trie whose state will be copied for iteration.
+ * @param maxStringLength If 0, the iterator returns full strings.
+ * Otherwise, the iterator returns strings with this maximum length.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @stable ICU 4.8
+ */
+ Iterator(const UCharsTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);
+
+ /**
+ * Destructor.
+ * @stable ICU 4.8
+ */
+ ~Iterator();
+
+ /**
+ * Resets this iterator to its initial state.
+ * @return *this
+ * @stable ICU 4.8
+ */
+ Iterator &reset();
+
+ /**
+ * @return true if there are more elements.
+ * @stable ICU 4.8
+ */
+ UBool hasNext() const;
+
+ /**
+ * Finds the next (string, value) pair if there is one.
+ *
+ * If the string is truncated to the maximum length and does not
+ * have a real value, then the value is set to -1.
+ * In this case, this "not a real value" is indistinguishable from
+ * a real value of -1.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return true if there is another element.
+ * @stable ICU 4.8
+ */
+ UBool next(UErrorCode &errorCode);
+
+ /**
+ * @return The string for the last successful next().
+ * @stable ICU 4.8
+ */
+ const UnicodeString &getString() const { return str_; }
+ /**
+ * @return The value for the last successful next().
+ * @stable ICU 4.8
+ */
+ int32_t getValue() const { return value_; }
+
+ private:
+ UBool truncateAndStop() {
+ pos_=NULL;
+ value_=-1; // no real value for str
+ return true;
+ }
+
+ const char16_t *branchNext(const char16_t *pos, int32_t length, UErrorCode &errorCode);
+
+ const char16_t *uchars_;
+ const char16_t *pos_;
+ const char16_t *initialPos_;
+ int32_t remainingMatchLength_;
+ int32_t initialRemainingMatchLength_;
+ UBool skipValue_; // Skip intermediate value which was already delivered.
+
+ UnicodeString str_;
+ int32_t maxLength_;
+ int32_t value_;
+
+ // The stack stores pairs of integers for backtracking to another
+ // outbound edge of a branch node.
+ // The first integer is an offset from uchars_.
+ // The second integer has the str_.length() from before the node in bits 15..0,
+ // and the remaining branch length in bits 31..16.
+ // (We could store the remaining branch length minus 1 in bits 30..16 and not use the sign bit,
+ // but the code looks more confusing that way.)
+ UVector32 *stack_;
+ };
+
+private:
+ friend class UCharsTrieBuilder;
+
+ /**
+ * Constructs a UCharsTrie reader instance.
+ * Unlike the public constructor which just aliases an array,
+ * this constructor adopts the builder's array.
+ * This constructor is only called by the builder.
+ */
+ UCharsTrie(char16_t *adoptUChars, const char16_t *trieUChars)
+ : ownedArray_(adoptUChars), uchars_(trieUChars),
+ pos_(uchars_), remainingMatchLength_(-1) {}
+
+ // No assignment operator.
+ UCharsTrie &operator=(const UCharsTrie &other);
+
+ inline void stop() {
+ pos_=NULL;
+ }
+
+ // Reads a compact 32-bit integer.
+ // pos is already after the leadUnit, and the lead unit has bit 15 reset.
+ static inline int32_t readValue(const char16_t *pos, int32_t leadUnit) {
+ int32_t value;
+ if(leadUnit<kMinTwoUnitValueLead) {
+ value=leadUnit;
+ } else if(leadUnit<kThreeUnitValueLead) {
+ value=((leadUnit-kMinTwoUnitValueLead)<<16)|*pos;
+ } else {
+ value=(pos[0]<<16)|pos[1];
+ }
+ return value;
+ }
+ static inline const char16_t *skipValue(const char16_t *pos, int32_t leadUnit) {
+ if(leadUnit>=kMinTwoUnitValueLead) {
+ if(leadUnit<kThreeUnitValueLead) {
+ ++pos;
+ } else {
+ pos+=2;
+ }
+ }
+ return pos;
+ }
+ static inline const char16_t *skipValue(const char16_t *pos) {
+ int32_t leadUnit=*pos++;
+ return skipValue(pos, leadUnit&0x7fff);
+ }
+
+ static inline int32_t readNodeValue(const char16_t *pos, int32_t leadUnit) {
+ // U_ASSERT(kMinValueLead<=leadUnit && leadUnit<kValueIsFinal);
+ int32_t value;
+ if(leadUnit<kMinTwoUnitNodeValueLead) {
+ value=(leadUnit>>6)-1;
+ } else if(leadUnit<kThreeUnitNodeValueLead) {
+ value=(((leadUnit&0x7fc0)-kMinTwoUnitNodeValueLead)<<10)|*pos;
+ } else {
+ value=(pos[0]<<16)|pos[1];
+ }
+ return value;
+ }
+ static inline const char16_t *skipNodeValue(const char16_t *pos, int32_t leadUnit) {
+ // U_ASSERT(kMinValueLead<=leadUnit && leadUnit<kValueIsFinal);
+ if(leadUnit>=kMinTwoUnitNodeValueLead) {
+ if(leadUnit<kThreeUnitNodeValueLead) {
+ ++pos;
+ } else {
+ pos+=2;
+ }
+ }
+ return pos;
+ }
+
+ static inline const char16_t *jumpByDelta(const char16_t *pos) {
+ int32_t delta=*pos++;
+ if(delta>=kMinTwoUnitDeltaLead) {
+ if(delta==kThreeUnitDeltaLead) {
+ delta=(pos[0]<<16)|pos[1];
+ pos+=2;
+ } else {
+ delta=((delta-kMinTwoUnitDeltaLead)<<16)|*pos++;
+ }
+ }
+ return pos+delta;
+ }
+
+ static const char16_t *skipDelta(const char16_t *pos) {
+ int32_t delta=*pos++;
+ if(delta>=kMinTwoUnitDeltaLead) {
+ if(delta==kThreeUnitDeltaLead) {
+ pos+=2;
+ } else {
+ ++pos;
+ }
+ }
+ return pos;
+ }
+
+ static inline UStringTrieResult valueResult(int32_t node) {
+ return (UStringTrieResult)(USTRINGTRIE_INTERMEDIATE_VALUE-(node>>15));
+ }
+
+ // Handles a branch node for both next(uchar) and next(string).
+ UStringTrieResult branchNext(const char16_t *pos, int32_t length, int32_t uchar);
+
+ // Requires remainingLength_<0.
+ UStringTrieResult nextImpl(const char16_t *pos, int32_t uchar);
+
+ // Helper functions for hasUniqueValue().
+ // Recursively finds a unique value (or whether there is not a unique one)
+ // from a branch.
+ static const char16_t *findUniqueValueFromBranch(const char16_t *pos, int32_t length,
+ UBool haveUniqueValue, int32_t &uniqueValue);
+ // Recursively finds a unique value (or whether there is not a unique one)
+ // starting from a position on a node lead unit.
+ static UBool findUniqueValue(const char16_t *pos, UBool haveUniqueValue, int32_t &uniqueValue);
+
+ // Helper functions for getNextUChars().
+ // getNextUChars() when pos is on a branch node.
+ static void getNextBranchUChars(const char16_t *pos, int32_t length, Appendable &out);
+
+ // UCharsTrie data structure
+ //
+ // The trie consists of a series of char16_t-serialized nodes for incremental
+ // Unicode string/char16_t sequence matching. (char16_t=16-bit unsigned integer)
+ // The root node is at the beginning of the trie data.
+ //
+ // Types of nodes are distinguished by their node lead unit ranges.
+ // After each node, except a final-value node, another node follows to
+ // encode match values or continue matching further units.
+ //
+ // Node types:
+ // - Final-value node: Stores a 32-bit integer in a compact, variable-length format.
+ // The value is for the string/char16_t sequence so far.
+ // - Match node, optionally with an intermediate value in a different compact format.
+ // The value, if present, is for the string/char16_t sequence so far.
+ //
+ // Aside from the value, which uses the node lead unit's high bits:
+ //
+ // - Linear-match node: Matches a number of units.
+ // - Branch node: Branches to other nodes according to the current input unit.
+ // The node unit is the length of the branch (number of units to select from)
+ // minus 1. It is followed by a sub-node:
+ // - If the length is at most kMaxBranchLinearSubNodeLength, then
+ // there are length-1 (key, value) pairs and then one more comparison unit.
+ // If one of the key units matches, then the value is either a final value for
+ // the string so far, or a "jump" delta to the next node.
+ // If the last unit matches, then matching continues with the next node.
+ // (Values have the same encoding as final-value nodes.)
+ // - If the length is greater than kMaxBranchLinearSubNodeLength, then
+ // there is one unit and one "jump" delta.
+ // If the input unit is less than the sub-node unit, then "jump" by delta to
+ // the next sub-node which will have a length of length/2.
+ // (The delta has its own compact encoding.)
+ // Otherwise, skip the "jump" delta to the next sub-node
+ // which will have a length of length-length/2.
+
+ // Match-node lead unit values, after masking off intermediate-value bits:
+
+ // 0000..002f: Branch node. If node!=0 then the length is node+1, otherwise
+ // the length is one more than the next unit.
+
+ // For a branch sub-node with at most this many entries, we drop down
+ // to a linear search.
+ static const int32_t kMaxBranchLinearSubNodeLength=5;
+
+ // 0030..003f: Linear-match node, match 1..16 units and continue reading the next node.
+ static const int32_t kMinLinearMatch=0x30;
+ static const int32_t kMaxLinearMatchLength=0x10;
+
+ // Match-node lead unit bits 14..6 for the optional intermediate value.
+ // If these bits are 0, then there is no intermediate value.
+ // Otherwise, see the *NodeValue* constants below.
+ static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength; // 0x0040
+ static const int32_t kNodeTypeMask=kMinValueLead-1; // 0x003f
+
+ // A final-value node has bit 15 set.
+ static const int32_t kValueIsFinal=0x8000;
+
+ // Compact value: After testing and masking off bit 15, use the following thresholds.
+ static const int32_t kMaxOneUnitValue=0x3fff;
+
+ static const int32_t kMinTwoUnitValueLead=kMaxOneUnitValue+1; // 0x4000
+ static const int32_t kThreeUnitValueLead=0x7fff;
+
+ static const int32_t kMaxTwoUnitValue=((kThreeUnitValueLead-kMinTwoUnitValueLead)<<16)-1; // 0x3ffeffff
+
+ // Compact intermediate-value integer, lead unit shared with a branch or linear-match node.
+ static const int32_t kMaxOneUnitNodeValue=0xff;
+ static const int32_t kMinTwoUnitNodeValueLead=kMinValueLead+((kMaxOneUnitNodeValue+1)<<6); // 0x4040
+ static const int32_t kThreeUnitNodeValueLead=0x7fc0;
+
+ static const int32_t kMaxTwoUnitNodeValue=
+ ((kThreeUnitNodeValueLead-kMinTwoUnitNodeValueLead)<<10)-1; // 0xfdffff
+
+ // Compact delta integers.
+ static const int32_t kMaxOneUnitDelta=0xfbff;
+ static const int32_t kMinTwoUnitDeltaLead=kMaxOneUnitDelta+1; // 0xfc00
+ static const int32_t kThreeUnitDeltaLead=0xffff;
+
+ static const int32_t kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1; // 0x03feffff
+
+ // For getState64():
+ // The remainingMatchLength_ is -1..14=(kMaxLinearMatchLength=0x10)-2
+ // so we need at least 5 bits for that.
+ // We add 2 to store it as a positive value 1..16=kMaxLinearMatchLength.
+ static constexpr int32_t kState64RemainingShift = 59;
+ static constexpr uint64_t kState64PosMask = (UINT64_C(1) << kState64RemainingShift) - 1;
+
+ char16_t *ownedArray_;
+
+ // Fixed value referencing the UCharsTrie words.
+ const char16_t *uchars_;
+
+ // Iterator variables.
+
+ // Pointer to next trie unit to read. NULL if no more matches.
+ const char16_t *pos_;
+ // Remaining length of a linear-match node, minus 1. Negative if not in such a node.
+ int32_t remainingMatchLength_;
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __UCHARSTRIE_H__
diff --git a/thirdparty/icu4c/common/unicode/ucharstriebuilder.h b/thirdparty/icu4c/common/unicode/ucharstriebuilder.h
new file mode 100644
index 0000000000..15657702f9
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/ucharstriebuilder.h
@@ -0,0 +1,193 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: ucharstriebuilder.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010nov14
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UCHARSTRIEBUILDER_H__
+#define __UCHARSTRIEBUILDER_H__
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/stringtriebuilder.h"
+#include "unicode/ucharstrie.h"
+#include "unicode/unistr.h"
+
+/**
+ * \file
+ * \brief C++ API: Builder for icu::UCharsTrie
+ */
+
+U_NAMESPACE_BEGIN
+
+class UCharsTrieElement;
+
+/**
+ * Builder class for UCharsTrie.
+ *
+ * This class is not intended for public subclassing.
+ * @stable ICU 4.8
+ */
+class U_COMMON_API UCharsTrieBuilder : public StringTrieBuilder {
+public:
+ /**
+ * Constructs an empty builder.
+ * @param errorCode Standard ICU error code.
+ * @stable ICU 4.8
+ */
+ UCharsTrieBuilder(UErrorCode &errorCode);
+
+ /**
+ * Destructor.
+ * @stable ICU 4.8
+ */
+ virtual ~UCharsTrieBuilder();
+
+ /**
+ * Adds a (string, value) pair.
+ * The string must be unique.
+ * The string contents will be copied; the builder does not keep
+ * a reference to the input UnicodeString or its buffer.
+ * @param s The input string.
+ * @param value The value associated with this string.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return *this
+ * @stable ICU 4.8
+ */
+ UCharsTrieBuilder &add(const UnicodeString &s, int32_t value, UErrorCode &errorCode);
+
+ /**
+ * Builds a UCharsTrie for the add()ed data.
+ * Once built, no further data can be add()ed until clear() is called.
+ *
+ * A UCharsTrie cannot be empty. At least one (string, value) pair
+ * must have been add()ed.
+ *
+ * This method passes ownership of the builder's internal result array to the new trie object.
+ * Another call to any build() variant will re-serialize the trie.
+ * After clear() has been called, a new array will be used as well.
+ * @param buildOption Build option, see UStringTrieBuildOption.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return A new UCharsTrie for the add()ed data.
+ * @stable ICU 4.8
+ */
+ UCharsTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
+
+ /**
+ * Builds a UCharsTrie for the add()ed data and char16_t-serializes it.
+ * Once built, no further data can be add()ed until clear() is called.
+ *
+ * A UCharsTrie cannot be empty. At least one (string, value) pair
+ * must have been add()ed.
+ *
+ * Multiple calls to buildUnicodeString() set the UnicodeStrings to the
+ * builder's same char16_t array, without rebuilding.
+ * If buildUnicodeString() is called after build(), the trie will be
+ * re-serialized into a new array (because build() passes on ownership).
+ * If build() is called after buildUnicodeString(), the trie object returned
+ * by build() will become the owner of the underlying data for the
+ * previously returned UnicodeString.
+ * After clear() has been called, a new array will be used as well.
+ * @param buildOption Build option, see UStringTrieBuildOption.
+ * @param result A UnicodeString which will be set to the char16_t-serialized
+ * UCharsTrie for the add()ed data.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return result
+ * @stable ICU 4.8
+ */
+ UnicodeString &buildUnicodeString(UStringTrieBuildOption buildOption, UnicodeString &result,
+ UErrorCode &errorCode);
+
+ /**
+ * Removes all (string, value) pairs.
+ * New data can then be add()ed and a new trie can be built.
+ * @return *this
+ * @stable ICU 4.8
+ */
+ UCharsTrieBuilder &clear() {
+ strings.remove();
+ elementsLength=0;
+ ucharsLength=0;
+ return *this;
+ }
+
+private:
+ UCharsTrieBuilder(const UCharsTrieBuilder &other); // no copy constructor
+ UCharsTrieBuilder &operator=(const UCharsTrieBuilder &other); // no assignment operator
+
+ void buildUChars(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
+
+ virtual int32_t getElementStringLength(int32_t i) const;
+ virtual char16_t getElementUnit(int32_t i, int32_t unitIndex) const;
+ virtual int32_t getElementValue(int32_t i) const;
+
+ virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const;
+
+ virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const;
+ virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const;
+ virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, char16_t unit) const;
+
+ virtual UBool matchNodesCanHaveValues() const { return true; }
+
+ virtual int32_t getMaxBranchLinearSubNodeLength() const { return UCharsTrie::kMaxBranchLinearSubNodeLength; }
+ virtual int32_t getMinLinearMatch() const { return UCharsTrie::kMinLinearMatch; }
+ virtual int32_t getMaxLinearMatchLength() const { return UCharsTrie::kMaxLinearMatchLength; }
+
+ class UCTLinearMatchNode : public LinearMatchNode {
+ public:
+ UCTLinearMatchNode(const char16_t *units, int32_t len, Node *nextNode);
+ virtual UBool operator==(const Node &other) const;
+ virtual void write(StringTrieBuilder &builder);
+ private:
+ const char16_t *s;
+ };
+
+ virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
+ Node *nextNode) const;
+
+ UBool ensureCapacity(int32_t length);
+ virtual int32_t write(int32_t unit);
+ int32_t write(const char16_t *s, int32_t length);
+ virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length);
+ virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal);
+ virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node);
+ virtual int32_t writeDeltaTo(int32_t jumpTarget);
+
+ UnicodeString strings;
+ UCharsTrieElement *elements;
+ int32_t elementsCapacity;
+ int32_t elementsLength;
+
+ // char16_t serialization of the trie.
+ // Grows from the back: ucharsLength measures from the end of the buffer!
+ char16_t *uchars;
+ int32_t ucharsCapacity;
+ int32_t ucharsLength;
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __UCHARSTRIEBUILDER_H__
diff --git a/thirdparty/icu4c/common/unicode/uchriter.h b/thirdparty/icu4c/common/unicode/uchriter.h
new file mode 100644
index 0000000000..f5083561a8
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/uchriter.h
@@ -0,0 +1,393 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1998-2005, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+
+#ifndef UCHRITER_H
+#define UCHRITER_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/chariter.h"
+
+/**
+ * \file
+ * \brief C++ API: char16_t Character Iterator
+ */
+
+U_NAMESPACE_BEGIN
+
+/**
+ * A concrete subclass of CharacterIterator that iterates over the
+ * characters (code units or code points) in a char16_t array.
+ * It's possible not only to create an
+ * iterator that iterates over an entire char16_t array, but also to
+ * create one that iterates over only a subrange of a char16_t array
+ * (iterators over different subranges of the same char16_t array don't
+ * compare equal).
+ * @see CharacterIterator
+ * @see ForwardCharacterIterator
+ * @stable ICU 2.0
+ */
+class U_COMMON_API UCharCharacterIterator : public CharacterIterator {
+public:
+ /**
+ * Create an iterator over the char16_t array referred to by "textPtr".
+ * The iteration range is 0 to <code>length-1</code>.
+ * text is only aliased, not adopted (the
+ * destructor will not delete it).
+ * @param textPtr The char16_t array to be iterated over
+ * @param length The length of the char16_t array
+ * @stable ICU 2.0
+ */
+ UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length);
+
+ /**
+ * Create an iterator over the char16_t array referred to by "textPtr".
+ * The iteration range is 0 to <code>length-1</code>.
+ * text is only aliased, not adopted (the
+ * destructor will not delete it).
+ * The starting
+ * position is specified by "position". If "position" is outside the valid
+ * iteration range, the behavior of this object is undefined.
+ * @param textPtr The char16_t array to be iteratd over
+ * @param length The length of the char16_t array
+ * @param position The starting position of the iteration
+ * @stable ICU 2.0
+ */
+ UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length,
+ int32_t position);
+
+ /**
+ * Create an iterator over the char16_t array referred to by "textPtr".
+ * The iteration range is 0 to <code>end-1</code>.
+ * text is only aliased, not adopted (the
+ * destructor will not delete it).
+ * The starting
+ * position is specified by "position". If begin and end do not
+ * form a valid iteration range or "position" is outside the valid
+ * iteration range, the behavior of this object is undefined.
+ * @param textPtr The char16_t array to be iterated over
+ * @param length The length of the char16_t array
+ * @param textBegin The begin position of the iteration range
+ * @param textEnd The end position of the iteration range
+ * @param position The starting position of the iteration
+ * @stable ICU 2.0
+ */
+ UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length,
+ int32_t textBegin,
+ int32_t textEnd,
+ int32_t position);
+
+ /**
+ * Copy constructor. The new iterator iterates over the same range
+ * of the same string as "that", and its initial position is the
+ * same as "that"'s current position.
+ * @param that The UCharCharacterIterator to be copied
+ * @stable ICU 2.0
+ */
+ UCharCharacterIterator(const UCharCharacterIterator& that);
+
+ /**
+ * Destructor.
+ * @stable ICU 2.0
+ */
+ virtual ~UCharCharacterIterator();
+
+ /**
+ * Assignment operator. *this is altered to iterate over the sane
+ * range of the same string as "that", and refers to the same
+ * character within that string as "that" does.
+ * @param that The object to be copied
+ * @return the newly created object
+ * @stable ICU 2.0
+ */
+ UCharCharacterIterator&
+ operator=(const UCharCharacterIterator& that);
+
+ /**
+ * Returns true if the iterators iterate over the same range of the
+ * same string and are pointing at the same character.
+ * @param that The ForwardCharacterIterator used to be compared for equality
+ * @return true if the iterators iterate over the same range of the
+ * same string and are pointing at the same character.
+ * @stable ICU 2.0
+ */
+ virtual UBool operator==(const ForwardCharacterIterator& that) const;
+
+ /**
+ * Generates a hash code for this iterator.
+ * @return the hash code.
+ * @stable ICU 2.0
+ */
+ virtual int32_t hashCode(void) const;
+
+ /**
+ * Returns a new UCharCharacterIterator referring to the same
+ * character in the same range of the same string as this one. The
+ * caller must delete the new iterator.
+ * @return the CharacterIterator newly created
+ * @stable ICU 2.0
+ */
+ virtual UCharCharacterIterator* clone() const;
+
+ /**
+ * Sets the iterator to refer to the first code unit in its
+ * iteration range, and returns that code unit.
+ * This can be used to begin an iteration with next().
+ * @return the first code unit in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual char16_t first(void);
+
+ /**
+ * Sets the iterator to refer to the first code unit in its
+ * iteration range, returns that code unit, and moves the position
+ * to the second code unit. This is an alternative to setToStart()
+ * for forward iteration with nextPostInc().
+ * @return the first code unit in its iteration range
+ * @stable ICU 2.0
+ */
+ virtual char16_t firstPostInc(void);
+
+ /**
+ * Sets the iterator to refer to the first code point in its
+ * iteration range, and returns that code unit,
+ * This can be used to begin an iteration with next32().
+ * Note that an iteration with next32PostInc(), beginning with,
+ * e.g., setToStart() or firstPostInc(), is more efficient.
+ * @return the first code point in its iteration range
+ * @stable ICU 2.0
+ */
+ virtual UChar32 first32(void);
+
+ /**
+ * Sets the iterator to refer to the first code point in its
+ * iteration range, returns that code point, and moves the position
+ * to the second code point. This is an alternative to setToStart()
+ * for forward iteration with next32PostInc().
+ * @return the first code point in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 first32PostInc(void);
+
+ /**
+ * Sets the iterator to refer to the last code unit in its
+ * iteration range, and returns that code unit.
+ * This can be used to begin an iteration with previous().
+ * @return the last code unit in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual char16_t last(void);
+
+ /**
+ * Sets the iterator to refer to the last code point in its
+ * iteration range, and returns that code unit.
+ * This can be used to begin an iteration with previous32().
+ * @return the last code point in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 last32(void);
+
+ /**
+ * Sets the iterator to refer to the "position"-th code unit
+ * in the text-storage object the iterator refers to, and
+ * returns that code unit.
+ * @param position the position within the text-storage object
+ * @return the code unit
+ * @stable ICU 2.0
+ */
+ virtual char16_t setIndex(int32_t position);
+
+ /**
+ * Sets the iterator to refer to the beginning of the code point
+ * that contains the "position"-th code unit
+ * in the text-storage object the iterator refers to, and
+ * returns that code point.
+ * The current position is adjusted to the beginning of the code point
+ * (its first code unit).
+ * @param position the position within the text-storage object
+ * @return the code unit
+ * @stable ICU 2.0
+ */
+ virtual UChar32 setIndex32(int32_t position);
+
+ /**
+ * Returns the code unit the iterator currently refers to.
+ * @return the code unit the iterator currently refers to.
+ * @stable ICU 2.0
+ */
+ virtual char16_t current(void) const;
+
+ /**
+ * Returns the code point the iterator currently refers to.
+ * @return the code point the iterator currently refers to.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 current32(void) const;
+
+ /**
+ * Advances to the next code unit in the iteration range (toward
+ * endIndex()), and returns that code unit. If there are no more
+ * code units to return, returns DONE.
+ * @return the next code unit in the iteration range.
+ * @stable ICU 2.0
+ */
+ virtual char16_t next(void);
+
+ /**
+ * Gets the current code unit for returning and advances to the next code unit
+ * in the iteration range
+ * (toward endIndex()). If there are
+ * no more code units to return, returns DONE.
+ * @return the current code unit.
+ * @stable ICU 2.0
+ */
+ virtual char16_t nextPostInc(void);
+
+ /**
+ * Advances to the next code point in the iteration range (toward
+ * endIndex()), and returns that code point. If there are no more
+ * code points to return, returns DONE.
+ * Note that iteration with "pre-increment" semantics is less
+ * efficient than iteration with "post-increment" semantics
+ * that is provided by next32PostInc().
+ * @return the next code point in the iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 next32(void);
+
+ /**
+ * Gets the current code point for returning and advances to the next code point
+ * in the iteration range
+ * (toward endIndex()). If there are
+ * no more code points to return, returns DONE.
+ * @return the current point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 next32PostInc(void);
+
+ /**
+ * Returns false if there are no more code units or code points
+ * at or after the current position in the iteration range.
+ * This is used with nextPostInc() or next32PostInc() in forward
+ * iteration.
+ * @return false if there are no more code units or code points
+ * at or after the current position in the iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UBool hasNext();
+
+ /**
+ * Advances to the previous code unit in the iteration range (toward
+ * startIndex()), and returns that code unit. If there are no more
+ * code units to return, returns DONE.
+ * @return the previous code unit in the iteration range.
+ * @stable ICU 2.0
+ */
+ virtual char16_t previous(void);
+
+ /**
+ * Advances to the previous code point in the iteration range (toward
+ * startIndex()), and returns that code point. If there are no more
+ * code points to return, returns DONE.
+ * @return the previous code point in the iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 previous32(void);
+
+ /**
+ * Returns false if there are no more code units or code points
+ * before the current position in the iteration range.
+ * This is used with previous() or previous32() in backward
+ * iteration.
+ * @return false if there are no more code units or code points
+ * before the current position in the iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UBool hasPrevious();
+
+ /**
+ * Moves the current position relative to the start or end of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code units forward
+ * or backward by specifying a positive or negative delta.
+ * @param delta the position relative to origin. A positive delta means forward;
+ * a negative delta means backward.
+ * @param origin Origin enumeration {kStart, kCurrent, kEnd}
+ * @return the new position
+ * @stable ICU 2.0
+ */
+ virtual int32_t move(int32_t delta, EOrigin origin);
+
+ /**
+ * Moves the current position relative to the start or end of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code points forward
+ * or backward by specifying a positive or negative delta.
+ * @param delta the position relative to origin. A positive delta means forward;
+ * a negative delta means backward.
+ * @param origin Origin enumeration {kStart, kCurrent, kEnd}
+ * @return the new position
+ * @stable ICU 2.0
+ */
+#ifdef move32
+ // One of the system headers right now is sometimes defining a conflicting macro we don't use
+#undef move32
+#endif
+ virtual int32_t move32(int32_t delta, EOrigin origin);
+
+ /**
+ * Sets the iterator to iterate over a new range of text
+ * @stable ICU 2.0
+ */
+ void setText(ConstChar16Ptr newText, int32_t newTextLength);
+
+ /**
+ * Copies the char16_t array under iteration into the UnicodeString
+ * referred to by "result". Even if this iterator iterates across
+ * only a part of this string, the whole string is copied.
+ * @param result Receives a copy of the text under iteration.
+ * @stable ICU 2.0
+ */
+ virtual void getText(UnicodeString& result);
+
+ /**
+ * Return a class ID for this class (not really public)
+ * @return a class ID for this class
+ * @stable ICU 2.0
+ */
+ static UClassID U_EXPORT2 getStaticClassID(void);
+
+ /**
+ * Return a class ID for this object (not really public)
+ * @return a class ID for this object.
+ * @stable ICU 2.0
+ */
+ virtual UClassID getDynamicClassID(void) const;
+
+protected:
+ /**
+ * Protected constructor
+ * @stable ICU 2.0
+ */
+ UCharCharacterIterator();
+ /**
+ * Protected member text
+ * @stable ICU 2.0
+ */
+ const char16_t* text;
+
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/uclean.h b/thirdparty/icu4c/common/unicode/uclean.h
new file mode 100644
index 0000000000..c2d920a16e
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/uclean.h
@@ -0,0 +1,262 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 2001-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+******************************************************************************
+* file name: uclean.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2001July05
+* created by: George Rhoten
+*/
+
+#ifndef __UCLEAN_H__
+#define __UCLEAN_H__
+
+#include "unicode/utypes.h"
+/**
+ * \file
+ * \brief C API: Initialize and clean up ICU
+ */
+
+/**
+ * Initialize ICU.
+ *
+ * Use of this function is optional. It is OK to simply use ICU
+ * services and functions without first having initialized
+ * ICU by calling u_init().
+ *
+ * u_init() will attempt to load some part of ICU's data, and is
+ * useful as a test for configuration or installation problems that
+ * leave the ICU data inaccessible. A successful invocation of u_init()
+ * does not, however, guarantee that all ICU data is accessible.
+ *
+ * Multiple calls to u_init() cause no harm, aside from the small amount
+ * of time required.
+ *
+ * In old versions of ICU, u_init() was required in multi-threaded applications
+ * to ensure the thread safety of ICU. u_init() is no longer needed for this purpose.
+ *
+ * @param status An ICU UErrorCode parameter. It must not be <code>NULL</code>.
+ * An Error will be returned if some required part of ICU data can not
+ * be loaded or initialized.
+ * The function returns immediately if the input error code indicates a
+ * failure, as usual.
+ *
+ * @stable ICU 2.6
+ */
+U_CAPI void U_EXPORT2
+u_init(UErrorCode *status);
+
+#ifndef U_HIDE_SYSTEM_API
+/**
+ * Clean up the system resources, such as allocated memory or open files,
+ * used in all ICU libraries. This will free/delete all memory owned by the
+ * ICU libraries, and return them to their original load state. All open ICU
+ * items (collators, resource bundles, converters, etc.) must be closed before
+ * calling this function, otherwise ICU may not free its allocated memory
+ * (e.g. close your converters and resource bundles before calling this
+ * function). Generally, this function should be called once just before
+ * an application exits. For applications that dynamically load and unload
+ * the ICU libraries (relatively uncommon), u_cleanup() should be called
+ * just before the library unload.
+ * <p>
+ * u_cleanup() also clears any ICU heap functions, mutex functions or
+ * trace functions that may have been set for the process.
+ * This has the effect of restoring ICU to its initial condition, before
+ * any of these override functions were installed. Refer to
+ * u_setMemoryFunctions(), u_setMutexFunctions and
+ * utrace_setFunctions(). If ICU is to be reinitialized after
+ * calling u_cleanup(), these runtime override functions will need to
+ * be set up again if they are still required.
+ * <p>
+ * u_cleanup() is not thread safe. All other threads should stop using ICU
+ * before calling this function.
+ * <p>
+ * Any open ICU items will be left in an undefined state by u_cleanup(),
+ * and any subsequent attempt to use such an item will give unpredictable
+ * results.
+ * <p>
+ * After calling u_cleanup(), an application may continue to use ICU by
+ * calling u_init(). An application must invoke u_init() first from one single
+ * thread before allowing other threads call u_init(). All threads existing
+ * at the time of the first thread's call to u_init() must also call
+ * u_init() themselves before continuing with other ICU operations.
+ * <p>
+ * The use of u_cleanup() just before an application terminates is optional,
+ * but it should be called only once for performance reasons. The primary
+ * benefit is to eliminate reports of memory or resource leaks originating
+ * in ICU code from the results generated by heap analysis tools.
+ * <p>
+ * <strong>Use this function with great care!</strong>
+ * </p>
+ *
+ * @stable ICU 2.0
+ * @system
+ */
+U_CAPI void U_EXPORT2
+u_cleanup(void);
+
+U_CDECL_BEGIN
+/**
+ * Pointer type for a user supplied memory allocation function.
+ * @param context user supplied value, obtained from u_setMemoryFunctions().
+ * @param size The number of bytes to be allocated
+ * @return Pointer to the newly allocated memory, or NULL if the allocation failed.
+ * @stable ICU 2.8
+ * @system
+ */
+typedef void *U_CALLCONV UMemAllocFn(const void *context, size_t size);
+/**
+ * Pointer type for a user supplied memory re-allocation function.
+ * @param context user supplied value, obtained from u_setMemoryFunctions().
+ * @param size The number of bytes to be allocated
+ * @return Pointer to the newly allocated memory, or NULL if the allocation failed.
+ * @stable ICU 2.8
+ * @system
+ */
+typedef void *U_CALLCONV UMemReallocFn(const void *context, void *mem, size_t size);
+/**
+ * Pointer type for a user supplied memory free function. Behavior should be
+ * similar the standard C library free().
+ * @param context user supplied value, obtained from u_setMemoryFunctions().
+ * @param mem Pointer to the memory block to be resized
+ * @param size The new size for the block
+ * @return Pointer to the resized memory block, or NULL if the resizing failed.
+ * @stable ICU 2.8
+ * @system
+ */
+typedef void U_CALLCONV UMemFreeFn (const void *context, void *mem);
+
+/**
+ * Set the functions that ICU will use for memory allocation.
+ * Use of this function is optional; by default (without this function), ICU will
+ * use the standard C library malloc() and free() functions.
+ * This function can only be used when ICU is in an initial, unused state, before
+ * u_init() has been called.
+ * @param context This pointer value will be saved, and then (later) passed as
+ * a parameter to the memory functions each time they
+ * are called.
+ * @param a Pointer to a user-supplied malloc function.
+ * @param r Pointer to a user-supplied realloc function.
+ * @param f Pointer to a user-supplied free function.
+ * @param status Receives error values.
+ * @stable ICU 2.8
+ * @system
+ */
+U_CAPI void U_EXPORT2
+u_setMemoryFunctions(const void *context, UMemAllocFn * U_CALLCONV_FPTR a, UMemReallocFn * U_CALLCONV_FPTR r, UMemFreeFn * U_CALLCONV_FPTR f,
+ UErrorCode *status);
+
+U_CDECL_END
+
+#ifndef U_HIDE_DEPRECATED_API
+/*********************************************************************************
+ *
+ * Deprecated Functions
+ *
+ * The following functions for user supplied mutexes are no longer supported.
+ * Any attempt to use them will return a U_UNSUPPORTED_ERROR.
+ *
+ **********************************************************************************/
+
+/**
+ * An opaque pointer type that represents an ICU mutex.
+ * For user-implemented mutexes, the value will typically point to a
+ * struct or object that implements the mutex.
+ * @deprecated ICU 52. This type is no longer supported.
+ * @system
+ */
+typedef void *UMTX;
+
+U_CDECL_BEGIN
+/**
+ * Function Pointer type for a user supplied mutex initialization function.
+ * The user-supplied function will be called by ICU whenever ICU needs to create a
+ * new mutex. The function implementation should create a mutex, and store a pointer
+ * to something that uniquely identifies the mutex into the UMTX that is supplied
+ * as a parameter.
+ * @param context user supplied value, obtained from u_setMutexFunctions().
+ * @param mutex Receives a pointer that identifies the new mutex.
+ * The mutex init function must set the UMTX to a non-null value.
+ * Subsequent calls by ICU to lock, unlock, or destroy a mutex will
+ * identify the mutex by the UMTX value.
+ * @param status Error status. Report errors back to ICU by setting this variable
+ * with an error code.
+ * @deprecated ICU 52. This function is no longer supported.
+ * @system
+ */
+typedef void U_CALLCONV UMtxInitFn (const void *context, UMTX *mutex, UErrorCode* status);
+
+
+/**
+ * Function Pointer type for a user supplied mutex functions.
+ * One of the user-supplied functions with this signature will be called by ICU
+ * whenever ICU needs to lock, unlock, or destroy a mutex.
+ * @param context user supplied value, obtained from u_setMutexFunctions().
+ * @param mutex specify the mutex on which to operate.
+ * @deprecated ICU 52. This function is no longer supported.
+ * @system
+ */
+typedef void U_CALLCONV UMtxFn (const void *context, UMTX *mutex);
+U_CDECL_END
+
+/**
+ * Set the functions that ICU will use for mutex operations
+ * Use of this function is optional; by default (without this function), ICU will
+ * directly access system functions for mutex operations
+ * This function can only be used when ICU is in an initial, unused state, before
+ * u_init() has been called.
+ * @param context This pointer value will be saved, and then (later) passed as
+ * a parameter to the user-supplied mutex functions each time they
+ * are called.
+ * @param init Pointer to a mutex initialization function. Must be non-null.
+ * @param destroy Pointer to the mutex destroy function. Must be non-null.
+ * @param lock pointer to the mutex lock function. Must be non-null.
+ * @param unlock Pointer to the mutex unlock function. Must be non-null.
+ * @param status Receives error values.
+ * @deprecated ICU 52. This function is no longer supported.
+ * @system
+ */
+U_DEPRECATED void U_EXPORT2
+u_setMutexFunctions(const void *context, UMtxInitFn *init, UMtxFn *destroy, UMtxFn *lock, UMtxFn *unlock,
+ UErrorCode *status);
+
+
+/**
+ * Pointer type for a user supplied atomic increment or decrement function.
+ * @param context user supplied value, obtained from u_setAtomicIncDecFunctions().
+ * @param p Pointer to a 32 bit int to be incremented or decremented
+ * @return The value of the variable after the inc or dec operation.
+ * @deprecated ICU 52. This function is no longer supported.
+ * @system
+ */
+typedef int32_t U_CALLCONV UMtxAtomicFn(const void *context, int32_t *p);
+
+/**
+ * Set the functions that ICU will use for atomic increment and decrement of int32_t values.
+ * Use of this function is optional; by default (without this function), ICU will
+ * use its own internal implementation of atomic increment/decrement.
+ * This function can only be used when ICU is in an initial, unused state, before
+ * u_init() has been called.
+ * @param context This pointer value will be saved, and then (later) passed as
+ * a parameter to the increment and decrement functions each time they
+ * are called. This function can only be called
+ * @param inc Pointer to a function to do an atomic increment operation. Must be non-null.
+ * @param dec Pointer to a function to do an atomic decrement operation. Must be non-null.
+ * @param status Receives error values.
+ * @deprecated ICU 52. This function is no longer supported.
+ * @system
+ */
+U_DEPRECATED void U_EXPORT2
+u_setAtomicIncDecFunctions(const void *context, UMtxAtomicFn *inc, UMtxAtomicFn *dec,
+ UErrorCode *status);
+
+#endif /* U_HIDE_DEPRECATED_API */
+#endif /* U_HIDE_SYSTEM_API */
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/ucnv.h b/thirdparty/icu4c/common/unicode/ucnv.h
new file mode 100644
index 0000000000..58f271cfb5
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/ucnv.h
@@ -0,0 +1,2045 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1999-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+ * ucnv.h:
+ * External APIs for the ICU's codeset conversion library
+ * Bertrand A. Damiba
+ *
+ * Modification History:
+ *
+ * Date Name Description
+ * 04/04/99 helena Fixed internal header inclusion.
+ * 05/11/00 helena Added setFallback and usesFallback APIs.
+ * 06/29/2000 helena Major rewrite of the callback APIs.
+ * 12/07/2000 srl Update of documentation
+ */
+
+/**
+ * \file
+ * \brief C API: Character conversion
+ *
+ * <h2>Character Conversion C API</h2>
+ *
+ * <p>This API is used to convert codepage or character encoded data to and
+ * from UTF-16. You can open a converter with {@link ucnv_open() }. With that
+ * converter, you can get its properties, set options, convert your data and
+ * close the converter.</p>
+ *
+ * <p>Since many software programs recognize different converter names for
+ * different types of converters, there are other functions in this API to
+ * iterate over the converter aliases. The functions {@link ucnv_getAvailableName() },
+ * {@link ucnv_getAlias() } and {@link ucnv_getStandardName() } are some of the
+ * more frequently used alias functions to get this information.</p>
+ *
+ * <p>When a converter encounters an illegal, irregular, invalid or unmappable character
+ * its default behavior is to use a substitution character to replace the
+ * bad byte sequence. This behavior can be changed by using {@link ucnv_setFromUCallBack() }
+ * or {@link ucnv_setToUCallBack() } on the converter. The header ucnv_err.h defines
+ * many other callback actions that can be used instead of a character substitution.</p>
+ *
+ * <p>More information about this API can be found in our
+ * <a href="http://icu-project.org/userguide/conversion.html">User's
+ * Guide</a>.</p>
+ */
+
+#ifndef UCNV_H
+#define UCNV_H
+
+#include "unicode/ucnv_err.h"
+#include "unicode/uenum.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+#if !defined(USET_DEFINED) && !defined(U_IN_DOXYGEN)
+
+#define USET_DEFINED
+
+/**
+ * USet is the C API type corresponding to C++ class UnicodeSet.
+ * It is forward-declared here to avoid including unicode/uset.h file if related
+ * conversion APIs are not used.
+ *
+ * @see ucnv_getUnicodeSet
+ * @stable ICU 2.4
+ */
+typedef struct USet USet;
+
+#endif
+
+#if !UCONFIG_NO_CONVERSION
+
+U_CDECL_BEGIN
+
+/** Maximum length of a converter name including the terminating NULL @stable ICU 2.0 */
+#define UCNV_MAX_CONVERTER_NAME_LENGTH 60
+/** Maximum length of a converter name including path and terminating NULL @stable ICU 2.0 */
+#define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH)
+
+/** Shift in for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */
+#define UCNV_SI 0x0F
+/** Shift out for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */
+#define UCNV_SO 0x0E
+
+/**
+ * Enum for specifying basic types of converters
+ * @see ucnv_getType
+ * @stable ICU 2.0
+ */
+typedef enum {
+ /** @stable ICU 2.0 */
+ UCNV_UNSUPPORTED_CONVERTER = -1,
+ /** @stable ICU 2.0 */
+ UCNV_SBCS = 0,
+ /** @stable ICU 2.0 */
+ UCNV_DBCS = 1,
+ /** @stable ICU 2.0 */
+ UCNV_MBCS = 2,
+ /** @stable ICU 2.0 */
+ UCNV_LATIN_1 = 3,
+ /** @stable ICU 2.0 */
+ UCNV_UTF8 = 4,
+ /** @stable ICU 2.0 */
+ UCNV_UTF16_BigEndian = 5,
+ /** @stable ICU 2.0 */
+ UCNV_UTF16_LittleEndian = 6,
+ /** @stable ICU 2.0 */
+ UCNV_UTF32_BigEndian = 7,
+ /** @stable ICU 2.0 */
+ UCNV_UTF32_LittleEndian = 8,
+ /** @stable ICU 2.0 */
+ UCNV_EBCDIC_STATEFUL = 9,
+ /** @stable ICU 2.0 */
+ UCNV_ISO_2022 = 10,
+
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_1 = 11,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_2,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_3,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_4,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_5,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_6,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_8,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_11,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_16,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_17,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_18,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_19,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_LAST = UCNV_LMBCS_19,
+ /** @stable ICU 2.0 */
+ UCNV_HZ,
+ /** @stable ICU 2.0 */
+ UCNV_SCSU,
+ /** @stable ICU 2.0 */
+ UCNV_ISCII,
+ /** @stable ICU 2.0 */
+ UCNV_US_ASCII,
+ /** @stable ICU 2.0 */
+ UCNV_UTF7,
+ /** @stable ICU 2.2 */
+ UCNV_BOCU1,
+ /** @stable ICU 2.2 */
+ UCNV_UTF16,
+ /** @stable ICU 2.2 */
+ UCNV_UTF32,
+ /** @stable ICU 2.2 */
+ UCNV_CESU8,
+ /** @stable ICU 2.4 */
+ UCNV_IMAP_MAILBOX,
+ /** @stable ICU 4.8 */
+ UCNV_COMPOUND_TEXT,
+
+ /* Number of converter types for which we have conversion routines. */
+ UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES
+} UConverterType;
+
+/**
+ * Enum for specifying which platform a converter ID refers to.
+ * The use of platform/CCSID is not recommended. See ucnv_openCCSID().
+ *
+ * @see ucnv_getPlatform
+ * @see ucnv_openCCSID
+ * @see ucnv_getCCSID
+ * @stable ICU 2.0
+ */
+typedef enum {
+ UCNV_UNKNOWN = -1,
+ UCNV_IBM = 0
+} UConverterPlatform;
+
+/**
+ * Function pointer for error callback in the codepage to unicode direction.
+ * Called when an error has occurred in conversion to unicode, or on open/close of the callback (see reason).
+ * @param context Pointer to the callback's private data
+ * @param args Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param pErrorCode ICU error code in/out parameter.
+ * For converter callback functions, set to a conversion error
+ * before the call, and the callback may reset it to U_ZERO_ERROR.
+ * @see ucnv_setToUCallBack
+ * @see UConverterToUnicodeArgs
+ * @stable ICU 2.0
+ */
+typedef void (U_EXPORT2 *UConverterToUCallback) (
+ const void* context,
+ UConverterToUnicodeArgs *args,
+ const char *codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode *pErrorCode);
+
+/**
+ * Function pointer for error callback in the unicode to codepage direction.
+ * Called when an error has occurred in conversion from unicode, or on open/close of the callback (see reason).
+ * @param context Pointer to the callback's private data
+ * @param args Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param pErrorCode ICU error code in/out parameter.
+ * For converter callback functions, set to a conversion error
+ * before the call, and the callback may reset it to U_ZERO_ERROR.
+ * @see ucnv_setFromUCallBack
+ * @stable ICU 2.0
+ */
+typedef void (U_EXPORT2 *UConverterFromUCallback) (
+ const void* context,
+ UConverterFromUnicodeArgs *args,
+ const UChar* codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode *pErrorCode);
+
+U_CDECL_END
+
+/**
+ * Character that separates converter names from options and options from each other.
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_OPTION_SEP_CHAR ','
+
+/**
+ * String version of UCNV_OPTION_SEP_CHAR.
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_OPTION_SEP_STRING ","
+
+/**
+ * Character that separates a converter option from its value.
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_VALUE_SEP_CHAR '='
+
+/**
+ * String version of UCNV_VALUE_SEP_CHAR.
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_VALUE_SEP_STRING "="
+
+/**
+ * Converter option for specifying a locale.
+ * For example, ucnv_open("SCSU,locale=ja", &errorCode);
+ * See convrtrs.txt.
+ *
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_LOCALE_OPTION_STRING ",locale="
+
+/**
+ * Converter option for specifying a version selector (0..9) for some converters.
+ * For example,
+ * \code
+ * ucnv_open("UTF-7,version=1", &errorCode);
+ * \endcode
+ * See convrtrs.txt.
+ *
+ * @see ucnv_open
+ * @stable ICU 2.4
+ */
+#define UCNV_VERSION_OPTION_STRING ",version="
+
+/**
+ * Converter option for EBCDIC SBCS or mixed-SBCS/DBCS (stateful) codepages.
+ * Swaps Unicode mappings for EBCDIC LF and NL codes, as used on
+ * S/390 (z/OS) Unix System Services (Open Edition).
+ * For example, ucnv_open("ibm-1047,swaplfnl", &errorCode);
+ * See convrtrs.txt.
+ *
+ * @see ucnv_open
+ * @stable ICU 2.4
+ */
+#define UCNV_SWAP_LFNL_OPTION_STRING ",swaplfnl"
+
+/**
+ * Do a fuzzy compare of two converter/alias names.
+ * The comparison is case-insensitive, ignores leading zeroes if they are not
+ * followed by further digits, and ignores all but letters and digits.
+ * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
+ * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
+ * at http://www.unicode.org/reports/tr22/
+ *
+ * @param name1 a converter name or alias, zero-terminated
+ * @param name2 a converter name or alias, zero-terminated
+ * @return 0 if the names match, or a negative value if the name1
+ * lexically precedes name2, or a positive value if the name1
+ * lexically follows name2.
+ * @stable ICU 2.0
+ */
+U_CAPI int U_EXPORT2
+ucnv_compareNames(const char *name1, const char *name2);
+
+
+/**
+ * Creates a UConverter object with the name of a coded character set specified as a C string.
+ * The actual name will be resolved with the alias file
+ * using a case-insensitive string comparison that ignores
+ * leading zeroes and all non-alphanumeric characters.
+ * E.g., the names "UTF8", "utf-8", "u*T@f08" and "Utf 8" are all equivalent.
+ * (See also ucnv_compareNames().)
+ * If <code>NULL</code> is passed for the converter name, it will create one with the
+ * getDefaultName return value.
+ *
+ * <p>A converter name for ICU 1.5 and above may contain options
+ * like a locale specification to control the specific behavior of
+ * the newly instantiated converter.
+ * The meaning of the options depends on the particular converter.
+ * If an option is not defined for or recognized by a given converter, then it is ignored.</p>
+ *
+ * <p>Options are appended to the converter name string, with a
+ * <code>UCNV_OPTION_SEP_CHAR</code> between the name and the first option and
+ * also between adjacent options.</p>
+ *
+ * <p>If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.</p>
+ *
+ * <p>The conversion behavior and names can vary between platforms. ICU may
+ * convert some characters differently from other platforms. Details on this topic
+ * are in the <a href="http://icu-project.org/userguide/conversion.html">User's
+ * Guide</a>. Aliases starting with a "cp" prefix have no specific meaning
+ * other than its an alias starting with the letters "cp". Please do not
+ * associate any meaning to these aliases.</p>
+ *
+ * \snippet samples/ucnv/convsamp.cpp ucnv_open
+ *
+ * @param converterName Name of the coded character set table.
+ * This may have options appended to the string.
+ * IANA alias character set names, IBM CCSIDs starting with "ibm-",
+ * Windows codepage numbers starting with "windows-" are frequently
+ * used for this parameter. See ucnv_getAvailableName and
+ * ucnv_getAlias for a complete list that is available.
+ * If this parameter is NULL, the default converter will be used.
+ * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
+ * @return the created Unicode converter object, or <TT>NULL</TT> if an error occurred
+ * @see ucnv_openU
+ * @see ucnv_openCCSID
+ * @see ucnv_getAvailableName
+ * @see ucnv_getAlias
+ * @see ucnv_getDefaultName
+ * @see ucnv_close
+ * @see ucnv_compareNames
+ * @stable ICU 2.0
+ */
+U_CAPI UConverter* U_EXPORT2
+ucnv_open(const char *converterName, UErrorCode *err);
+
+
+/**
+ * Creates a Unicode converter with the names specified as unicode string.
+ * The name should be limited to the ASCII-7 alphanumerics range.
+ * The actual name will be resolved with the alias file
+ * using a case-insensitive string comparison that ignores
+ * leading zeroes and all non-alphanumeric characters.
+ * E.g., the names "UTF8", "utf-8", "u*T@f08" and "Utf 8" are all equivalent.
+ * (See also ucnv_compareNames().)
+ * If <TT>NULL</TT> is passed for the converter name, it will create
+ * one with the ucnv_getDefaultName() return value.
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ *
+ * <p>See ucnv_open for the complete details</p>
+ * @param name Name of the UConverter table in a zero terminated
+ * Unicode string
+ * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR,
+ * U_FILE_ACCESS_ERROR</TT>
+ * @return the created Unicode converter object, or <TT>NULL</TT> if an
+ * error occurred
+ * @see ucnv_open
+ * @see ucnv_openCCSID
+ * @see ucnv_close
+ * @see ucnv_compareNames
+ * @stable ICU 2.0
+ */
+U_CAPI UConverter* U_EXPORT2
+ucnv_openU(const UChar *name,
+ UErrorCode *err);
+
+/**
+ * Creates a UConverter object from a CCSID number and platform pair.
+ * Note that the usefulness of this function is limited to platforms with numeric
+ * encoding IDs. Only IBM and Microsoft platforms use numeric (16-bit) identifiers for
+ * encodings.
+ *
+ * In addition, IBM CCSIDs and Unicode conversion tables are not 1:1 related.
+ * For many IBM CCSIDs there are multiple (up to six) Unicode conversion tables, and
+ * for some Unicode conversion tables there are multiple CCSIDs.
+ * Some "alternate" Unicode conversion tables are provided by the
+ * IBM CDRA conversion table registry.
+ * The most prominent example of a systematic modification of conversion tables that is
+ * not provided in the form of conversion table files in the repository is
+ * that S/390 Unix System Services swaps the codes for Line Feed and New Line in all
+ * EBCDIC codepages, which requires such a swap in the Unicode conversion tables as well.
+ *
+ * Only IBM default conversion tables are accessible with ucnv_openCCSID().
+ * ucnv_getCCSID() will return the same CCSID for all conversion tables that are associated
+ * with that CCSID.
+ *
+ * Currently, the only "platform" supported in the ICU converter API is UCNV_IBM.
+ *
+ * In summary, the use of CCSIDs and the associated API functions is not recommended.
+ *
+ * In order to open a converter with the default IBM CDRA Unicode conversion table,
+ * you can use this function or use the prefix "ibm-":
+ * \code
+ * char name[20];
+ * sprintf(name, "ibm-%hu", ccsid);
+ * cnv=ucnv_open(name, &errorCode);
+ * \endcode
+ *
+ * In order to open a converter with the IBM S/390 Unix System Services variant
+ * of a Unicode/EBCDIC conversion table,
+ * you can use the prefix "ibm-" together with the option string UCNV_SWAP_LFNL_OPTION_STRING:
+ * \code
+ * char name[20];
+ * sprintf(name, "ibm-%hu" UCNV_SWAP_LFNL_OPTION_STRING, ccsid);
+ * cnv=ucnv_open(name, &errorCode);
+ * \endcode
+ *
+ * In order to open a converter from a Microsoft codepage number, use the prefix "cp":
+ * \code
+ * char name[20];
+ * sprintf(name, "cp%hu", codepageID);
+ * cnv=ucnv_open(name, &errorCode);
+ * \endcode
+ *
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ *
+ * @param codepage codepage number to create
+ * @param platform the platform in which the codepage number exists
+ * @param err error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
+ * @return the created Unicode converter object, or <TT>NULL</TT> if an error
+ * occurred.
+ * @see ucnv_open
+ * @see ucnv_openU
+ * @see ucnv_close
+ * @see ucnv_getCCSID
+ * @see ucnv_getPlatform
+ * @see UConverterPlatform
+ * @stable ICU 2.0
+ */
+U_CAPI UConverter* U_EXPORT2
+ucnv_openCCSID(int32_t codepage,
+ UConverterPlatform platform,
+ UErrorCode * err);
+
+/**
+ * <p>Creates a UConverter object specified from a packageName and a converterName.</p>
+ *
+ * <p>The packageName and converterName must point to an ICU udata object, as defined by
+ * <code> udata_open( packageName, "cnv", converterName, err) </code> or equivalent.
+ * Typically, packageName will refer to a (.dat) file, or to a package registered with
+ * udata_setAppData(). Using a full file or directory pathname for packageName is deprecated.</p>
+ *
+ * <p>The name will NOT be looked up in the alias mechanism, nor will the converter be
+ * stored in the converter cache or the alias table. The only way to open further converters
+ * is call this function multiple times, or use the ucnv_safeClone() function to clone a
+ * 'primary' converter.</p>
+ *
+ * <p>A future version of ICU may add alias table lookups and/or caching
+ * to this function.</p>
+ *
+ * <p>Example Use:
+ * <code>cnv = ucnv_openPackage("myapp", "myconverter", &err);</code>
+ * </p>
+ *
+ * @param packageName name of the package (equivalent to 'path' in udata_open() call)
+ * @param converterName name of the data item to be used, without suffix.
+ * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
+ * @return the created Unicode converter object, or <TT>NULL</TT> if an error occurred
+ * @see udata_open
+ * @see ucnv_open
+ * @see ucnv_safeClone
+ * @see ucnv_close
+ * @stable ICU 2.2
+ */
+U_CAPI UConverter* U_EXPORT2
+ucnv_openPackage(const char *packageName, const char *converterName, UErrorCode *err);
+
+/**
+ * Thread safe converter cloning operation.
+ * For most efficient operation, pass in a stackBuffer (and a *pBufferSize)
+ * with at least U_CNV_SAFECLONE_BUFFERSIZE bytes of space.
+ * If the buffer size is sufficient, then the clone will use the stack buffer;
+ * otherwise, it will be allocated, and *pBufferSize will indicate
+ * the actual size. (This should not occur with U_CNV_SAFECLONE_BUFFERSIZE.)
+ *
+ * You must ucnv_close() the clone in any case.
+ *
+ * If *pBufferSize==0, (regardless of whether stackBuffer==NULL or not)
+ * then *pBufferSize will be changed to a sufficient size
+ * for cloning this converter,
+ * without actually cloning the converter ("pure pre-flighting").
+ *
+ * If *pBufferSize is greater than zero but not large enough for a stack-based
+ * clone, then the converter is cloned using newly allocated memory
+ * and *pBufferSize is changed to the necessary size.
+ *
+ * If the converter clone fits into the stack buffer but the stack buffer is not
+ * sufficiently aligned for the clone, then the clone will use an
+ * adjusted pointer and use an accordingly smaller buffer size.
+ *
+ * @param cnv converter to be cloned
+ * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br>
+ * user allocated space for the new clone. If NULL new memory will be allocated.
+ * If buffer is not large enough, new memory will be allocated.
+ * Clients can use the U_CNV_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations.
+ * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br>
+ * pointer to size of allocated space.
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ * An informational status value, U_SAFECLONE_ALLOCATED_WARNING,
+ * is used if any allocations were necessary.
+ * However, it is better to check if *pBufferSize grew for checking for
+ * allocations because warning codes can be overridden by subsequent
+ * function calls.
+ * @return pointer to the new clone
+ * @stable ICU 2.0
+ */
+U_CAPI UConverter * U_EXPORT2
+ucnv_safeClone(const UConverter *cnv,
+ void *stackBuffer,
+ int32_t *pBufferSize,
+ UErrorCode *status);
+
+#ifndef U_HIDE_DEPRECATED_API
+
+/**
+ * \def U_CNV_SAFECLONE_BUFFERSIZE
+ * Definition of a buffer size that is designed to be large enough for
+ * converters to be cloned with ucnv_safeClone().
+ * @deprecated ICU 52. Do not rely on ucnv_safeClone() cloning into any provided buffer.
+ */
+#define U_CNV_SAFECLONE_BUFFERSIZE 1024
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+/**
+ * Deletes the unicode converter and releases resources associated
+ * with just this instance.
+ * Does not free up shared converter tables.
+ *
+ * @param converter the converter object to be deleted
+ * @see ucnv_open
+ * @see ucnv_openU
+ * @see ucnv_openCCSID
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_close(UConverter * converter);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUConverterPointer
+ * "Smart pointer" class, closes a UConverter via ucnv_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterPointer, UConverter, ucnv_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Fills in the output parameter, subChars, with the substitution characters
+ * as multiple bytes.
+ * If ucnv_setSubstString() set a Unicode string because the converter is
+ * stateful, then subChars will be an empty string.
+ *
+ * @param converter the Unicode converter
+ * @param subChars the substitution characters
+ * @param len on input the capacity of subChars, on output the number
+ * of bytes copied to it
+ * @param err the outgoing error status code.
+ * If the substitution character array is too small, an
+ * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
+ * @see ucnv_setSubstString
+ * @see ucnv_setSubstChars
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_getSubstChars(const UConverter *converter,
+ char *subChars,
+ int8_t *len,
+ UErrorCode *err);
+
+/**
+ * Sets the substitution chars when converting from unicode to a codepage. The
+ * substitution is specified as a string of 1-4 bytes, and may contain
+ * <TT>NULL</TT> bytes.
+ * The subChars must represent a single character. The caller needs to know the
+ * byte sequence of a valid character in the converter's charset.
+ * For some converters, for example some ISO 2022 variants, only single-byte
+ * substitution characters may be supported.
+ * The newer ucnv_setSubstString() function relaxes these limitations.
+ *
+ * @param converter the Unicode converter
+ * @param subChars the substitution character byte sequence we want set
+ * @param len the number of bytes in subChars
+ * @param err the error status code. <TT>U_INDEX_OUTOFBOUNDS_ERROR </TT> if
+ * len is bigger than the maximum number of bytes allowed in subchars
+ * @see ucnv_setSubstString
+ * @see ucnv_getSubstChars
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_setSubstChars(UConverter *converter,
+ const char *subChars,
+ int8_t len,
+ UErrorCode *err);
+
+/**
+ * Set a substitution string for converting from Unicode to a charset.
+ * The caller need not know the charset byte sequence for each charset.
+ *
+ * Unlike ucnv_setSubstChars() which is designed to set a charset byte sequence
+ * for a single character, this function takes a Unicode string with
+ * zero, one or more characters, and immediately verifies that the string can be
+ * converted to the charset.
+ * If not, or if the result is too long (more than 32 bytes as of ICU 3.6),
+ * then the function returns with an error accordingly.
+ *
+ * Also unlike ucnv_setSubstChars(), this function works for stateful charsets
+ * by converting on the fly at the point of substitution rather than setting
+ * a fixed byte sequence.
+ *
+ * @param cnv The UConverter object.
+ * @param s The Unicode string.
+ * @param length The number of UChars in s, or -1 for a NUL-terminated string.
+ * @param err Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ *
+ * @see ucnv_setSubstChars
+ * @see ucnv_getSubstChars
+ * @stable ICU 3.6
+ */
+U_CAPI void U_EXPORT2
+ucnv_setSubstString(UConverter *cnv,
+ const UChar *s,
+ int32_t length,
+ UErrorCode *err);
+
+/**
+ * Fills in the output parameter, errBytes, with the error characters from the
+ * last failing conversion.
+ *
+ * @param converter the Unicode converter
+ * @param errBytes the codepage bytes which were in error
+ * @param len on input the capacity of errBytes, on output the number of
+ * bytes which were copied to it
+ * @param err the error status code.
+ * If the substitution character array is too small, an
+ * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_getInvalidChars(const UConverter *converter,
+ char *errBytes,
+ int8_t *len,
+ UErrorCode *err);
+
+/**
+ * Fills in the output parameter, errChars, with the error characters from the
+ * last failing conversion.
+ *
+ * @param converter the Unicode converter
+ * @param errUChars the UChars which were in error
+ * @param len on input the capacity of errUChars, on output the number of
+ * UChars which were copied to it
+ * @param err the error status code.
+ * If the substitution character array is too small, an
+ * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_getInvalidUChars(const UConverter *converter,
+ UChar *errUChars,
+ int8_t *len,
+ UErrorCode *err);
+
+/**
+ * Resets the state of a converter to the default state. This is used
+ * in the case of an error, to restart a conversion from a known default state.
+ * It will also empty the internal output buffers.
+ * @param converter the Unicode converter
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_reset(UConverter *converter);
+
+/**
+ * Resets the to-Unicode part of a converter state to the default state.
+ * This is used in the case of an error to restart a conversion to
+ * Unicode to a known default state. It will also empty the internal
+ * output buffers used for the conversion to Unicode codepoints.
+ * @param converter the Unicode converter
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_resetToUnicode(UConverter *converter);
+
+/**
+ * Resets the from-Unicode part of a converter state to the default state.
+ * This is used in the case of an error to restart a conversion from
+ * Unicode to a known default state. It will also empty the internal output
+ * buffers used for the conversion from Unicode codepoints.
+ * @param converter the Unicode converter
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_resetFromUnicode(UConverter *converter);
+
+/**
+ * Returns the maximum number of bytes that are output per UChar in conversion
+ * from Unicode using this converter.
+ * The returned number can be used with UCNV_GET_MAX_BYTES_FOR_STRING
+ * to calculate the size of a target buffer for conversion from Unicode.
+ *
+ * Note: Before ICU 2.8, this function did not return reliable numbers for
+ * some stateful converters (EBCDIC_STATEFUL, ISO-2022) and LMBCS.
+ *
+ * This number may not be the same as the maximum number of bytes per
+ * "conversion unit". In other words, it may not be the intuitively expected
+ * number of bytes per character that would be published for a charset,
+ * and may not fulfill any other purpose than the allocation of an output
+ * buffer of guaranteed sufficient size for a given input length and converter.
+ *
+ * Examples for special cases that are taken into account:
+ * - Supplementary code points may convert to more bytes than BMP code points.
+ * This function returns bytes per UChar (UTF-16 code unit), not per
+ * Unicode code point, for efficient buffer allocation.
+ * - State-shifting output (SI/SO, escapes, etc.) from stateful converters.
+ * - When m input UChars are converted to n output bytes, then the maximum m/n
+ * is taken into account.
+ *
+ * The number returned here does not take into account
+ * (see UCNV_GET_MAX_BYTES_FOR_STRING):
+ * - callbacks which output more than one charset character sequence per call,
+ * like escape callbacks
+ * - initial and final non-character bytes that are output by some converters
+ * (automatic BOMs, initial escape sequence, final SI, etc.)
+ *
+ * Examples for returned values:
+ * - SBCS charsets: 1
+ * - Shift-JIS: 2
+ * - UTF-16: 2 (2 per BMP, 4 per surrogate _pair_, BOM not counted)
+ * - UTF-8: 3 (3 per BMP, 4 per surrogate _pair_)
+ * - EBCDIC_STATEFUL (EBCDIC mixed SBCS/DBCS): 3 (SO + DBCS)
+ * - ISO-2022: 3 (always outputs UTF-8)
+ * - ISO-2022-JP: 6 (4-byte escape sequences + DBCS)
+ * - ISO-2022-CN: 8 (4-byte designator sequences + 2-byte SS2/SS3 + DBCS)
+ *
+ * @param converter The Unicode converter.
+ * @return The maximum number of bytes per UChar (16 bit code unit)
+ * that are output by ucnv_fromUnicode(),
+ * to be used together with UCNV_GET_MAX_BYTES_FOR_STRING
+ * for buffer allocation.
+ *
+ * @see UCNV_GET_MAX_BYTES_FOR_STRING
+ * @see ucnv_getMinCharSize
+ * @stable ICU 2.0
+ */
+U_CAPI int8_t U_EXPORT2
+ucnv_getMaxCharSize(const UConverter *converter);
+
+/**
+ * Calculates the size of a buffer for conversion from Unicode to a charset.
+ * The calculated size is guaranteed to be sufficient for this conversion.
+ *
+ * It takes into account initial and final non-character bytes that are output
+ * by some converters.
+ * It does not take into account callbacks which output more than one charset
+ * character sequence per call, like escape callbacks.
+ * The default (substitution) callback only outputs one charset character sequence.
+ *
+ * @param length Number of UChars to be converted.
+ * @param maxCharSize Return value from ucnv_getMaxCharSize() for the converter
+ * that will be used.
+ * @return Size of a buffer that will be large enough to hold the output bytes of
+ * converting length UChars with the converter that returned the maxCharSize.
+ *
+ * @see ucnv_getMaxCharSize
+ * @stable ICU 2.8
+ */
+#define UCNV_GET_MAX_BYTES_FOR_STRING(length, maxCharSize) \
+ (((int32_t)(length)+10)*(int32_t)(maxCharSize))
+
+/**
+ * Returns the minimum byte length (per codepoint) for characters in this codepage.
+ * This is usually either 1 or 2.
+ * @param converter the Unicode converter
+ * @return the minimum number of bytes per codepoint allowed by this particular converter
+ * @see ucnv_getMaxCharSize
+ * @stable ICU 2.0
+ */
+U_CAPI int8_t U_EXPORT2
+ucnv_getMinCharSize(const UConverter *converter);
+
+/**
+ * Returns the display name of the converter passed in based on the Locale
+ * passed in. If the locale contains no display name, the internal ASCII
+ * name will be filled in.
+ *
+ * @param converter the Unicode converter.
+ * @param displayLocale is the specific Locale we want to localized for
+ * @param displayName user provided buffer to be filled in
+ * @param displayNameCapacity size of displayName Buffer
+ * @param err error status code
+ * @return displayNameLength number of UChar needed in displayName
+ * @see ucnv_getName
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucnv_getDisplayName(const UConverter *converter,
+ const char *displayLocale,
+ UChar *displayName,
+ int32_t displayNameCapacity,
+ UErrorCode *err);
+
+/**
+ * Gets the internal, canonical name of the converter (zero-terminated).
+ * The lifetime of the returned string will be that of the converter
+ * passed to this function.
+ * @param converter the Unicode converter
+ * @param err UErrorCode status
+ * @return the internal name of the converter
+ * @see ucnv_getDisplayName
+ * @stable ICU 2.0
+ */
+U_CAPI const char * U_EXPORT2
+ucnv_getName(const UConverter *converter, UErrorCode *err);
+
+/**
+ * Gets a codepage number associated with the converter. This is not guaranteed
+ * to be the one used to create the converter. Some converters do not represent
+ * platform registered codepages and return zero for the codepage number.
+ * The error code fill-in parameter indicates if the codepage number
+ * is available.
+ * Does not check if the converter is <TT>NULL</TT> or if converter's data
+ * table is <TT>NULL</TT>.
+ *
+ * Important: The use of CCSIDs is not recommended because it is limited
+ * to only two platforms in principle and only one (UCNV_IBM) in the current
+ * ICU converter API.
+ * Also, CCSIDs are insufficient to identify IBM Unicode conversion tables precisely.
+ * For more details see ucnv_openCCSID().
+ *
+ * @param converter the Unicode converter
+ * @param err the error status code.
+ * @return If any error occurs, -1 will be returned otherwise, the codepage number
+ * will be returned
+ * @see ucnv_openCCSID
+ * @see ucnv_getPlatform
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucnv_getCCSID(const UConverter *converter,
+ UErrorCode *err);
+
+/**
+ * Gets a codepage platform associated with the converter. Currently,
+ * only <TT>UCNV_IBM</TT> will be returned.
+ * Does not test if the converter is <TT>NULL</TT> or if converter's data
+ * table is <TT>NULL</TT>.
+ * @param converter the Unicode converter
+ * @param err the error status code.
+ * @return The codepage platform
+ * @stable ICU 2.0
+ */
+U_CAPI UConverterPlatform U_EXPORT2
+ucnv_getPlatform(const UConverter *converter,
+ UErrorCode *err);
+
+/**
+ * Gets the type of the converter
+ * e.g. SBCS, MBCS, DBCS, UTF8, UTF16_BE, UTF16_LE, ISO_2022,
+ * EBCDIC_STATEFUL, LATIN_1
+ * @param converter a valid, opened converter
+ * @return the type of the converter
+ * @stable ICU 2.0
+ */
+U_CAPI UConverterType U_EXPORT2
+ucnv_getType(const UConverter * converter);
+
+/**
+ * Gets the "starter" (lead) bytes for converters of type MBCS.
+ * Will fill in an <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if converter passed in
+ * is not MBCS. Fills in an array of type UBool, with the value of the byte
+ * as offset to the array. For example, if (starters[0x20] == true) at return,
+ * it means that the byte 0x20 is a starter byte in this converter.
+ * Context pointers are always owned by the caller.
+ *
+ * @param converter a valid, opened converter of type MBCS
+ * @param starters an array of size 256 to be filled in
+ * @param err error status, <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if the
+ * converter is not a type which can return starters.
+ * @see ucnv_getType
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_getStarters(const UConverter* converter,
+ UBool starters[256],
+ UErrorCode* err);
+
+
+/**
+ * Selectors for Unicode sets that can be returned by ucnv_getUnicodeSet().
+ * @see ucnv_getUnicodeSet
+ * @stable ICU 2.6
+ */
+typedef enum UConverterUnicodeSet {
+ /** Select the set of roundtrippable Unicode code points. @stable ICU 2.6 */
+ UCNV_ROUNDTRIP_SET,
+ /** Select the set of Unicode code points with roundtrip or fallback mappings. @stable ICU 4.0 */
+ UCNV_ROUNDTRIP_AND_FALLBACK_SET,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * Number of UConverterUnicodeSet selectors.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UCNV_SET_COUNT
+#endif // U_HIDE_DEPRECATED_API
+} UConverterUnicodeSet;
+
+
+/**
+ * Returns the set of Unicode code points that can be converted by an ICU converter.
+ *
+ * Returns one of several kinds of set:
+ *
+ * 1. UCNV_ROUNDTRIP_SET
+ *
+ * The set of all Unicode code points that can be roundtrip-converted
+ * (converted without any data loss) with the converter (ucnv_fromUnicode()).
+ * This set will not include code points that have fallback mappings
+ * or are only the result of reverse fallback mappings.
+ * This set will also not include PUA code points with fallbacks, although
+ * ucnv_fromUnicode() will always uses those mappings despite ucnv_setFallback().
+ * See UTR #22 "Character Mapping Markup Language"
+ * at http://www.unicode.org/reports/tr22/
+ *
+ * This is useful for example for
+ * - checking that a string or document can be roundtrip-converted with a converter,
+ * without/before actually performing the conversion
+ * - testing if a converter can be used for text for typical text for a certain locale,
+ * by comparing its roundtrip set with the set of ExemplarCharacters from
+ * ICU's locale data or other sources
+ *
+ * 2. UCNV_ROUNDTRIP_AND_FALLBACK_SET
+ *
+ * The set of all Unicode code points that can be converted with the converter (ucnv_fromUnicode())
+ * when fallbacks are turned on (see ucnv_setFallback()).
+ * This set includes all code points with roundtrips and fallbacks (but not reverse fallbacks).
+ *
+ * In the future, there may be more UConverterUnicodeSet choices to select
+ * sets with different properties.
+ *
+ * @param cnv The converter for which a set is requested.
+ * @param setFillIn A valid USet *. It will be cleared by this function before
+ * the converter's specific set is filled into the USet.
+ * @param whichSet A UConverterUnicodeSet selector;
+ * currently UCNV_ROUNDTRIP_SET is the only supported value.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ *
+ * @see UConverterUnicodeSet
+ * @see uset_open
+ * @see uset_close
+ * @stable ICU 2.6
+ */
+U_CAPI void U_EXPORT2
+ucnv_getUnicodeSet(const UConverter *cnv,
+ USet *setFillIn,
+ UConverterUnicodeSet whichSet,
+ UErrorCode *pErrorCode);
+
+/**
+ * Gets the current calback function used by the converter when an illegal
+ * or invalid codepage sequence is found.
+ * Context pointers are always owned by the caller.
+ *
+ * @param converter the unicode converter
+ * @param action fillin: returns the callback function pointer
+ * @param context fillin: returns the callback's private void* context
+ * @see ucnv_setToUCallBack
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_getToUCallBack (const UConverter * converter,
+ UConverterToUCallback *action,
+ const void **context);
+
+/**
+ * Gets the current callback function used by the converter when illegal
+ * or invalid Unicode sequence is found.
+ * Context pointers are always owned by the caller.
+ *
+ * @param converter the unicode converter
+ * @param action fillin: returns the callback function pointer
+ * @param context fillin: returns the callback's private void* context
+ * @see ucnv_setFromUCallBack
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_getFromUCallBack (const UConverter * converter,
+ UConverterFromUCallback *action,
+ const void **context);
+
+/**
+ * Changes the callback function used by the converter when
+ * an illegal or invalid sequence is found.
+ * Context pointers are always owned by the caller.
+ * Predefined actions and contexts can be found in the ucnv_err.h header.
+ *
+ * @param converter the unicode converter
+ * @param newAction the new callback function
+ * @param newContext the new toUnicode callback context pointer. This can be NULL.
+ * @param oldAction fillin: returns the old callback function pointer. This can be NULL.
+ * @param oldContext fillin: returns the old callback's private void* context. This can be NULL.
+ * @param err The error code status
+ * @see ucnv_getToUCallBack
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_setToUCallBack (UConverter * converter,
+ UConverterToUCallback newAction,
+ const void* newContext,
+ UConverterToUCallback *oldAction,
+ const void** oldContext,
+ UErrorCode * err);
+
+/**
+ * Changes the current callback function used by the converter when
+ * an illegal or invalid sequence is found.
+ * Context pointers are always owned by the caller.
+ * Predefined actions and contexts can be found in the ucnv_err.h header.
+ *
+ * @param converter the unicode converter
+ * @param newAction the new callback function
+ * @param newContext the new fromUnicode callback context pointer. This can be NULL.
+ * @param oldAction fillin: returns the old callback function pointer. This can be NULL.
+ * @param oldContext fillin: returns the old callback's private void* context. This can be NULL.
+ * @param err The error code status
+ * @see ucnv_getFromUCallBack
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_setFromUCallBack (UConverter * converter,
+ UConverterFromUCallback newAction,
+ const void *newContext,
+ UConverterFromUCallback *oldAction,
+ const void **oldContext,
+ UErrorCode * err);
+
+/**
+ * Converts an array of unicode characters to an array of codepage
+ * characters. This function is optimized for converting a continuous
+ * stream of data in buffer-sized chunks, where the entire source and
+ * target does not fit in available buffers.
+ *
+ * The source pointer is an in/out parameter. It starts out pointing where the
+ * conversion is to begin, and ends up pointing after the last UChar consumed.
+ *
+ * Target similarly starts out pointer at the first available byte in the output
+ * buffer, and ends up pointing after the last byte written to the output.
+ *
+ * The converter always attempts to consume the entire source buffer, unless
+ * (1.) the target buffer is full, or (2.) a failing error is returned from the
+ * current callback function. When a successful error status has been
+ * returned, it means that all of the source buffer has been
+ * consumed. At that point, the caller should reset the source and
+ * sourceLimit pointers to point to the next chunk.
+ *
+ * At the end of the stream (flush==true), the input is completely consumed
+ * when *source==sourceLimit and no error code is set.
+ * The converter object is then automatically reset by this function.
+ * (This means that a converter need not be reset explicitly between data
+ * streams if it finishes the previous stream without errors.)
+ *
+ * This is a <I>stateful</I> conversion. Additionally, even when all source data has
+ * been consumed, some data may be in the converters' internal state.
+ * Call this function repeatedly, updating the target pointers with
+ * the next empty chunk of target in case of a
+ * <TT>U_BUFFER_OVERFLOW_ERROR</TT>, and updating the source pointers
+ * with the next chunk of source when a successful error status is
+ * returned, until there are no more chunks of source data.
+ * @param converter the Unicode converter
+ * @param target I/O parameter. Input : Points to the beginning of the buffer to copy
+ * codepage characters to. Output : points to after the last codepage character copied
+ * to <TT>target</TT>.
+ * @param targetLimit the pointer just after last of the <TT>target</TT> buffer
+ * @param source I/O parameter, pointer to pointer to the source Unicode character buffer.
+ * @param sourceLimit the pointer just after the last of the source buffer
+ * @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
+ * of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer
+ * e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT>
+ * For output data carried across calls, and other data without a specific source character
+ * (such as from escape sequences or callbacks) -1 will be placed for offsets.
+ * @param flush set to <TT>true</TT> if the current source buffer is the last available
+ * chunk of the source, <TT>false</TT> otherwise. Note that if a failing status is returned,
+ * this function may have to be called multiple times with flush set to <TT>true</TT> until
+ * the source buffer is consumed.
+ * @param err the error status. <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be set if the
+ * converter is <TT>NULL</TT>.
+ * <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is
+ * still data to be written to the target.
+ * @see ucnv_fromUChars
+ * @see ucnv_convert
+ * @see ucnv_getMinCharSize
+ * @see ucnv_setToUCallBack
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_fromUnicode (UConverter * converter,
+ char **target,
+ const char *targetLimit,
+ const UChar ** source,
+ const UChar * sourceLimit,
+ int32_t* offsets,
+ UBool flush,
+ UErrorCode * err);
+
+/**
+ * Converts a buffer of codepage bytes into an array of unicode UChars
+ * characters. This function is optimized for converting a continuous
+ * stream of data in buffer-sized chunks, where the entire source and
+ * target does not fit in available buffers.
+ *
+ * The source pointer is an in/out parameter. It starts out pointing where the
+ * conversion is to begin, and ends up pointing after the last byte of source consumed.
+ *
+ * Target similarly starts out pointer at the first available UChar in the output
+ * buffer, and ends up pointing after the last UChar written to the output.
+ * It does NOT necessarily keep UChar sequences together.
+ *
+ * The converter always attempts to consume the entire source buffer, unless
+ * (1.) the target buffer is full, or (2.) a failing error is returned from the
+ * current callback function. When a successful error status has been
+ * returned, it means that all of the source buffer has been
+ * consumed. At that point, the caller should reset the source and
+ * sourceLimit pointers to point to the next chunk.
+ *
+ * At the end of the stream (flush==true), the input is completely consumed
+ * when *source==sourceLimit and no error code is set
+ * The converter object is then automatically reset by this function.
+ * (This means that a converter need not be reset explicitly between data
+ * streams if it finishes the previous stream without errors.)
+ *
+ * This is a <I>stateful</I> conversion. Additionally, even when all source data has
+ * been consumed, some data may be in the converters' internal state.
+ * Call this function repeatedly, updating the target pointers with
+ * the next empty chunk of target in case of a
+ * <TT>U_BUFFER_OVERFLOW_ERROR</TT>, and updating the source pointers
+ * with the next chunk of source when a successful error status is
+ * returned, until there are no more chunks of source data.
+ * @param converter the Unicode converter
+ * @param target I/O parameter. Input : Points to the beginning of the buffer to copy
+ * UChars into. Output : points to after the last UChar copied.
+ * @param targetLimit the pointer just after the end of the <TT>target</TT> buffer
+ * @param source I/O parameter, pointer to pointer to the source codepage buffer.
+ * @param sourceLimit the pointer to the byte after the end of the source buffer
+ * @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
+ * of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer
+ * e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT>
+ * For output data carried across calls, and other data without a specific source character
+ * (such as from escape sequences or callbacks) -1 will be placed for offsets.
+ * @param flush set to <TT>true</TT> if the current source buffer is the last available
+ * chunk of the source, <TT>false</TT> otherwise. Note that if a failing status is returned,
+ * this function may have to be called multiple times with flush set to <TT>true</TT> until
+ * the source buffer is consumed.
+ * @param err the error status. <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be set if the
+ * converter is <TT>NULL</TT>.
+ * <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is
+ * still data to be written to the target.
+ * @see ucnv_fromUChars
+ * @see ucnv_convert
+ * @see ucnv_getMinCharSize
+ * @see ucnv_setFromUCallBack
+ * @see ucnv_getNextUChar
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_toUnicode(UConverter *converter,
+ UChar **target,
+ const UChar *targetLimit,
+ const char **source,
+ const char *sourceLimit,
+ int32_t *offsets,
+ UBool flush,
+ UErrorCode *err);
+
+/**
+ * Convert the Unicode string into a codepage string using an existing UConverter.
+ * The output string is NUL-terminated if possible.
+ *
+ * This function is a more convenient but less powerful version of ucnv_fromUnicode().
+ * It is only useful for whole strings, not for streaming conversion.
+ *
+ * The maximum output buffer capacity required (barring output from callbacks) will be
+ * UCNV_GET_MAX_BYTES_FOR_STRING(srcLength, ucnv_getMaxCharSize(cnv)).
+ *
+ * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called)
+ * @param src the input Unicode string
+ * @param srcLength the input string length, or -1 if NUL-terminated
+ * @param dest destination string buffer, can be NULL if destCapacity==0
+ * @param destCapacity the number of chars available at dest
+ * @param pErrorCode normal ICU error code;
+ * common error codes that may be set by this function include
+ * U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING,
+ * U_ILLEGAL_ARGUMENT_ERROR, and conversion errors
+ * @return the length of the output string, not counting the terminating NUL;
+ * if the length is greater than destCapacity, then the string will not fit
+ * and a buffer of the indicated length would need to be passed in
+ * @see ucnv_fromUnicode
+ * @see ucnv_convert
+ * @see UCNV_GET_MAX_BYTES_FOR_STRING
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucnv_fromUChars(UConverter *cnv,
+ char *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert the codepage string into a Unicode string using an existing UConverter.
+ * The output string is NUL-terminated if possible.
+ *
+ * This function is a more convenient but less powerful version of ucnv_toUnicode().
+ * It is only useful for whole strings, not for streaming conversion.
+ *
+ * The maximum output buffer capacity required (barring output from callbacks) will be
+ * 2*srcLength (each char may be converted into a surrogate pair).
+ *
+ * @param cnv the converter object to be used (ucnv_resetToUnicode() will be called)
+ * @param src the input codepage string
+ * @param srcLength the input string length, or -1 if NUL-terminated
+ * @param dest destination string buffer, can be NULL if destCapacity==0
+ * @param destCapacity the number of UChars available at dest
+ * @param pErrorCode normal ICU error code;
+ * common error codes that may be set by this function include
+ * U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING,
+ * U_ILLEGAL_ARGUMENT_ERROR, and conversion errors
+ * @return the length of the output string, not counting the terminating NUL;
+ * if the length is greater than destCapacity, then the string will not fit
+ * and a buffer of the indicated length would need to be passed in
+ * @see ucnv_toUnicode
+ * @see ucnv_convert
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucnv_toUChars(UConverter *cnv,
+ UChar *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a codepage buffer into Unicode one character at a time.
+ * The input is completely consumed when the U_INDEX_OUTOFBOUNDS_ERROR is set.
+ *
+ * Advantage compared to ucnv_toUnicode() or ucnv_toUChars():
+ * - Faster for small amounts of data, for most converters, e.g.,
+ * US-ASCII, ISO-8859-1, UTF-8/16/32, and most "normal" charsets.
+ * (For complex converters, e.g., SCSU, UTF-7 and ISO 2022 variants,
+ * it uses ucnv_toUnicode() internally.)
+ * - Convenient.
+ *
+ * Limitations compared to ucnv_toUnicode():
+ * - Always assumes flush=true.
+ * This makes ucnv_getNextUChar() unsuitable for "streaming" conversion,
+ * that is, for where the input is supplied in multiple buffers,
+ * because ucnv_getNextUChar() will assume the end of the input at the end
+ * of the first buffer.
+ * - Does not provide offset output.
+ *
+ * It is possible to "mix" ucnv_getNextUChar() and ucnv_toUnicode() because
+ * ucnv_getNextUChar() uses the current state of the converter
+ * (unlike ucnv_toUChars() which always resets first).
+ * However, if ucnv_getNextUChar() is called after ucnv_toUnicode()
+ * stopped in the middle of a character sequence (with flush=false),
+ * then ucnv_getNextUChar() will always use the slower ucnv_toUnicode()
+ * internally until the next character boundary.
+ * (This is new in ICU 2.6. In earlier releases, ucnv_getNextUChar() had to
+ * start at a character boundary.)
+ *
+ * Instead of using ucnv_getNextUChar(), it is recommended
+ * to convert using ucnv_toUnicode() or ucnv_toUChars()
+ * and then iterate over the text using U16_NEXT() or a UCharIterator (uiter.h)
+ * or a C++ CharacterIterator or similar.
+ * This allows streaming conversion and offset output, for example.
+ *
+ * <p>Handling of surrogate pairs and supplementary-plane code points:<br>
+ * There are two different kinds of codepages that provide mappings for surrogate characters:
+ * <ul>
+ * <li>Codepages like UTF-8, UTF-32, and GB 18030 provide direct representations for Unicode
+ * code points U+10000-U+10ffff as well as for single surrogates U+d800-U+dfff.
+ * Each valid sequence will result in exactly one returned code point.
+ * If a sequence results in a single surrogate, then that will be returned
+ * by itself, even if a neighboring sequence encodes the matching surrogate.</li>
+ * <li>Codepages like SCSU and LMBCS (and UTF-16) provide direct representations only for BMP code points
+ * including surrogates. Code points in supplementary planes are represented with
+ * two sequences, each encoding a surrogate.
+ * For these codepages, matching pairs of surrogates will be combined into single
+ * code points for returning from this function.
+ * (Note that SCSU is actually a mix of these codepage types.)</li>
+ * </ul></p>
+ *
+ * @param converter an open UConverter
+ * @param source the address of a pointer to the codepage buffer, will be
+ * updated to point after the bytes consumed in the conversion call.
+ * @param sourceLimit points to the end of the input buffer
+ * @param err fills in error status (see ucnv_toUnicode)
+ * <code>U_INDEX_OUTOFBOUNDS_ERROR</code> will be set if the input
+ * is empty or does not convert to any output (e.g.: pure state-change
+ * codes SI/SO, escape sequences for ISO 2022,
+ * or if the callback did not output anything, ...).
+ * This function will not set a <code>U_BUFFER_OVERFLOW_ERROR</code> because
+ * the "buffer" is the return code. However, there might be subsequent output
+ * stored in the converter object
+ * that will be returned in following calls to this function.
+ * @return a UChar32 resulting from the partial conversion of source
+ * @see ucnv_toUnicode
+ * @see ucnv_toUChars
+ * @see ucnv_convert
+ * @stable ICU 2.0
+ */
+U_CAPI UChar32 U_EXPORT2
+ucnv_getNextUChar(UConverter * converter,
+ const char **source,
+ const char * sourceLimit,
+ UErrorCode * err);
+
+/**
+ * Convert from one external charset to another using two existing UConverters.
+ * Internally, two conversions - ucnv_toUnicode() and ucnv_fromUnicode() -
+ * are used, "pivoting" through 16-bit Unicode.
+ *
+ * Important: For streaming conversion (multiple function calls for successive
+ * parts of a text stream), the caller must provide a pivot buffer explicitly,
+ * and must preserve the pivot buffer and associated pointers from one
+ * call to another. (The buffer may be moved if its contents and the relative
+ * pointer positions are preserved.)
+ *
+ * There is a similar function, ucnv_convert(),
+ * which has the following limitations:
+ * - it takes charset names, not converter objects, so that
+ * - two converters are opened for each call
+ * - only single-string conversion is possible, not streaming operation
+ * - it does not provide enough information to find out,
+ * in case of failure, whether the toUnicode or
+ * the fromUnicode conversion failed
+ *
+ * By contrast, ucnv_convertEx()
+ * - takes UConverter parameters instead of charset names
+ * - fully exposes the pivot buffer for streaming conversion and complete error handling
+ *
+ * ucnv_convertEx() also provides further convenience:
+ * - an option to reset the converters at the beginning
+ * (if reset==true, see parameters;
+ * also sets *pivotTarget=*pivotSource=pivotStart)
+ * - allow NUL-terminated input
+ * (only a single NUL byte, will not work for charsets with multi-byte NULs)
+ * (if sourceLimit==NULL, see parameters)
+ * - terminate with a NUL on output
+ * (only a single NUL byte, not useful for charsets with multi-byte NULs),
+ * or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills
+ * the target buffer
+ * - the pivot buffer can be provided internally;
+ * possible only for whole-string conversion, not streaming conversion;
+ * in this case, the caller will not be able to get details about where an
+ * error occurred
+ * (if pivotStart==NULL, see below)
+ *
+ * The function returns when one of the following is true:
+ * - the entire source text has been converted successfully to the target buffer
+ * - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR)
+ * - a conversion error occurred
+ * (other U_FAILURE(), see description of pErrorCode)
+ *
+ * Limitation compared to the direct use of
+ * ucnv_fromUnicode() and ucnv_toUnicode():
+ * ucnv_convertEx() does not provide offset information.
+ *
+ * Limitation compared to ucnv_fromUChars() and ucnv_toUChars():
+ * ucnv_convertEx() does not support preflighting directly.
+ *
+ * Sample code for converting a single string from
+ * one external charset to UTF-8, ignoring the location of errors:
+ *
+ * \code
+ * int32_t
+ * myToUTF8(UConverter *cnv,
+ * const char *s, int32_t length,
+ * char *u8, int32_t capacity,
+ * UErrorCode *pErrorCode) {
+ * UConverter *utf8Cnv;
+ * char *target;
+ *
+ * if(U_FAILURE(*pErrorCode)) {
+ * return 0;
+ * }
+ *
+ * utf8Cnv=myGetCachedUTF8Converter(pErrorCode);
+ * if(U_FAILURE(*pErrorCode)) {
+ * return 0;
+ * }
+ *
+ * if(length<0) {
+ * length=strlen(s);
+ * }
+ * target=u8;
+ * ucnv_convertEx(utf8Cnv, cnv,
+ * &target, u8+capacity,
+ * &s, s+length,
+ * NULL, NULL, NULL, NULL,
+ * true, true,
+ * pErrorCode);
+ *
+ * myReleaseCachedUTF8Converter(utf8Cnv);
+ *
+ * // return the output string length, but without preflighting
+ * return (int32_t)(target-u8);
+ * }
+ * \endcode
+ *
+ * @param targetCnv Output converter, used to convert from the UTF-16 pivot
+ * to the target using ucnv_fromUnicode().
+ * @param sourceCnv Input converter, used to convert from the source to
+ * the UTF-16 pivot using ucnv_toUnicode().
+ * @param target I/O parameter, same as for ucnv_fromUChars().
+ * Input: *target points to the beginning of the target buffer.
+ * Output: *target points to the first unit after the last char written.
+ * @param targetLimit Pointer to the first unit after the target buffer.
+ * @param source I/O parameter, same as for ucnv_toUChars().
+ * Input: *source points to the beginning of the source buffer.
+ * Output: *source points to the first unit after the last char read.
+ * @param sourceLimit Pointer to the first unit after the source buffer.
+ * @param pivotStart Pointer to the UTF-16 pivot buffer. If pivotStart==NULL,
+ * then an internal buffer is used and the other pivot
+ * arguments are ignored and can be NULL as well.
+ * @param pivotSource I/O parameter, same as source in ucnv_fromUChars() for
+ * conversion from the pivot buffer to the target buffer.
+ * @param pivotTarget I/O parameter, same as target in ucnv_toUChars() for
+ * conversion from the source buffer to the pivot buffer.
+ * It must be pivotStart<=*pivotSource<=*pivotTarget<=pivotLimit
+ * and pivotStart<pivotLimit (unless pivotStart==NULL).
+ * @param pivotLimit Pointer to the first unit after the pivot buffer.
+ * @param reset If true, then ucnv_resetToUnicode(sourceCnv) and
+ * ucnv_resetFromUnicode(targetCnv) are called, and the
+ * pivot pointers are reset (*pivotTarget=*pivotSource=pivotStart).
+ * @param flush If true, indicates the end of the input.
+ * Passed directly to ucnv_toUnicode(), and carried over to
+ * ucnv_fromUnicode() when the source is empty as well.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * U_BUFFER_OVERFLOW_ERROR always refers to the target buffer
+ * because overflows into the pivot buffer are handled internally.
+ * Other conversion errors are from the source-to-pivot
+ * conversion if *pivotSource==pivotStart, otherwise from
+ * the pivot-to-target conversion.
+ *
+ * @see ucnv_convert
+ * @see ucnv_fromAlgorithmic
+ * @see ucnv_toAlgorithmic
+ * @see ucnv_fromUnicode
+ * @see ucnv_toUnicode
+ * @see ucnv_fromUChars
+ * @see ucnv_toUChars
+ * @stable ICU 2.6
+ */
+U_CAPI void U_EXPORT2
+ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
+ char **target, const char *targetLimit,
+ const char **source, const char *sourceLimit,
+ UChar *pivotStart, UChar **pivotSource,
+ UChar **pivotTarget, const UChar *pivotLimit,
+ UBool reset, UBool flush,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert from one external charset to another.
+ * Internally, two converters are opened according to the name arguments,
+ * then the text is converted to and from the 16-bit Unicode "pivot"
+ * using ucnv_convertEx(), then the converters are closed again.
+ *
+ * This is a convenience function, not an efficient way to convert a lot of text:
+ * ucnv_convert()
+ * - takes charset names, not converter objects, so that
+ * - two converters are opened for each call
+ * - only single-string conversion is possible, not streaming operation
+ * - does not provide enough information to find out,
+ * in case of failure, whether the toUnicode or
+ * the fromUnicode conversion failed
+ * - allows NUL-terminated input
+ * (only a single NUL byte, will not work for charsets with multi-byte NULs)
+ * (if sourceLength==-1, see parameters)
+ * - terminate with a NUL on output
+ * (only a single NUL byte, not useful for charsets with multi-byte NULs),
+ * or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills
+ * the target buffer
+ * - a pivot buffer is provided internally
+ *
+ * The function returns when one of the following is true:
+ * - the entire source text has been converted successfully to the target buffer
+ * and either the target buffer is terminated with a single NUL byte
+ * or the error code is set to U_STRING_NOT_TERMINATED_WARNING
+ * - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR)
+ * and the full output string length is returned ("preflighting")
+ * - a conversion error occurred
+ * (other U_FAILURE(), see description of pErrorCode)
+ *
+ * @param toConverterName The name of the converter that is used to convert
+ * from the UTF-16 pivot buffer to the target.
+ * @param fromConverterName The name of the converter that is used to convert
+ * from the source to the UTF-16 pivot buffer.
+ * @param target Pointer to the output buffer.
+ * @param targetCapacity Capacity of the target, in bytes.
+ * @param source Pointer to the input buffer.
+ * @param sourceLength Length of the input text, in bytes, or -1 for NUL-terminated input.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity
+ * and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see ucnv_convertEx
+ * @see ucnv_fromAlgorithmic
+ * @see ucnv_toAlgorithmic
+ * @see ucnv_fromUnicode
+ * @see ucnv_toUnicode
+ * @see ucnv_fromUChars
+ * @see ucnv_toUChars
+ * @see ucnv_getNextUChar
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucnv_convert(const char *toConverterName,
+ const char *fromConverterName,
+ char *target,
+ int32_t targetCapacity,
+ const char *source,
+ int32_t sourceLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert from one external charset to another.
+ * Internally, the text is converted to and from the 16-bit Unicode "pivot"
+ * using ucnv_convertEx(). ucnv_toAlgorithmic() works exactly like ucnv_convert()
+ * except that the two converters need not be looked up and opened completely.
+ *
+ * The source-to-pivot conversion uses the cnv converter parameter.
+ * The pivot-to-target conversion uses a purely algorithmic converter
+ * according to the specified type, e.g., UCNV_UTF8 for a UTF-8 converter.
+ *
+ * Internally, the algorithmic converter is opened and closed for each
+ * function call, which is more efficient than using the public ucnv_open()
+ * but somewhat less efficient than only resetting an existing converter
+ * and using ucnv_convertEx().
+ *
+ * This function is more convenient than ucnv_convertEx() for single-string
+ * conversions, especially when "preflighting" is desired (returning the length
+ * of the complete output even if it does not fit into the target buffer;
+ * see the User Guide Strings chapter). See ucnv_convert() for details.
+ *
+ * @param algorithmicType UConverterType constant identifying the desired target
+ * charset as a purely algorithmic converter.
+ * Those are converters for Unicode charsets like
+ * UTF-8, BOCU-1, SCSU, UTF-7, IMAP-mailbox-name, etc.,
+ * as well as US-ASCII and ISO-8859-1.
+ * @param cnv The converter that is used to convert
+ * from the source to the UTF-16 pivot buffer.
+ * @param target Pointer to the output buffer.
+ * @param targetCapacity Capacity of the target, in bytes.
+ * @param source Pointer to the input buffer.
+ * @param sourceLength Length of the input text, in bytes
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity
+ * and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see ucnv_fromAlgorithmic
+ * @see ucnv_convert
+ * @see ucnv_convertEx
+ * @see ucnv_fromUnicode
+ * @see ucnv_toUnicode
+ * @see ucnv_fromUChars
+ * @see ucnv_toUChars
+ * @stable ICU 2.6
+ */
+U_CAPI int32_t U_EXPORT2
+ucnv_toAlgorithmic(UConverterType algorithmicType,
+ UConverter *cnv,
+ char *target, int32_t targetCapacity,
+ const char *source, int32_t sourceLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert from one external charset to another.
+ * Internally, the text is converted to and from the 16-bit Unicode "pivot"
+ * using ucnv_convertEx(). ucnv_fromAlgorithmic() works exactly like ucnv_convert()
+ * except that the two converters need not be looked up and opened completely.
+ *
+ * The source-to-pivot conversion uses a purely algorithmic converter
+ * according to the specified type, e.g., UCNV_UTF8 for a UTF-8 converter.
+ * The pivot-to-target conversion uses the cnv converter parameter.
+ *
+ * Internally, the algorithmic converter is opened and closed for each
+ * function call, which is more efficient than using the public ucnv_open()
+ * but somewhat less efficient than only resetting an existing converter
+ * and using ucnv_convertEx().
+ *
+ * This function is more convenient than ucnv_convertEx() for single-string
+ * conversions, especially when "preflighting" is desired (returning the length
+ * of the complete output even if it does not fit into the target buffer;
+ * see the User Guide Strings chapter). See ucnv_convert() for details.
+ *
+ * @param cnv The converter that is used to convert
+ * from the UTF-16 pivot buffer to the target.
+ * @param algorithmicType UConverterType constant identifying the desired source
+ * charset as a purely algorithmic converter.
+ * Those are converters for Unicode charsets like
+ * UTF-8, BOCU-1, SCSU, UTF-7, IMAP-mailbox-name, etc.,
+ * as well as US-ASCII and ISO-8859-1.
+ * @param target Pointer to the output buffer.
+ * @param targetCapacity Capacity of the target, in bytes.
+ * @param source Pointer to the input buffer.
+ * @param sourceLength Length of the input text, in bytes
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity
+ * and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see ucnv_fromAlgorithmic
+ * @see ucnv_convert
+ * @see ucnv_convertEx
+ * @see ucnv_fromUnicode
+ * @see ucnv_toUnicode
+ * @see ucnv_fromUChars
+ * @see ucnv_toUChars
+ * @stable ICU 2.6
+ */
+U_CAPI int32_t U_EXPORT2
+ucnv_fromAlgorithmic(UConverter *cnv,
+ UConverterType algorithmicType,
+ char *target, int32_t targetCapacity,
+ const char *source, int32_t sourceLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Frees up memory occupied by unused, cached converter shared data.
+ *
+ * @return the number of cached converters successfully deleted
+ * @see ucnv_close
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucnv_flushCache(void);
+
+/**
+ * Returns the number of available converters, as per the alias file.
+ *
+ * @return the number of available converters
+ * @see ucnv_getAvailableName
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucnv_countAvailable(void);
+
+/**
+ * Gets the canonical converter name of the specified converter from a list of
+ * all available converters contaied in the alias file. All converters
+ * in this list can be opened.
+ *
+ * @param n the index to a converter available on the system (in the range <TT>[0..ucnv_countAvaiable()]</TT>)
+ * @return a pointer a string (library owned), or <TT>NULL</TT> if the index is out of bounds.
+ * @see ucnv_countAvailable
+ * @stable ICU 2.0
+ */
+U_CAPI const char* U_EXPORT2
+ucnv_getAvailableName(int32_t n);
+
+/**
+ * Returns a UEnumeration to enumerate all of the canonical converter
+ * names, as per the alias file, regardless of the ability to open each
+ * converter.
+ *
+ * @return A UEnumeration object for getting all the recognized canonical
+ * converter names.
+ * @see ucnv_getAvailableName
+ * @see uenum_close
+ * @see uenum_next
+ * @stable ICU 2.4
+ */
+U_CAPI UEnumeration * U_EXPORT2
+ucnv_openAllNames(UErrorCode *pErrorCode);
+
+/**
+ * Gives the number of aliases for a given converter or alias name.
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ * This method only enumerates the listed entries in the alias file.
+ * @param alias alias name
+ * @param pErrorCode error status
+ * @return number of names on alias list for given alias
+ * @stable ICU 2.0
+ */
+U_CAPI uint16_t U_EXPORT2
+ucnv_countAliases(const char *alias, UErrorCode *pErrorCode);
+
+/**
+ * Gives the name of the alias at given index of alias list.
+ * This method only enumerates the listed entries in the alias file.
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ * @param alias alias name
+ * @param n index in alias list
+ * @param pErrorCode result of operation
+ * @return returns the name of the alias at given index
+ * @see ucnv_countAliases
+ * @stable ICU 2.0
+ */
+U_CAPI const char * U_EXPORT2
+ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode);
+
+/**
+ * Fill-up the list of alias names for the given alias.
+ * This method only enumerates the listed entries in the alias file.
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ * @param alias alias name
+ * @param aliases fill-in list, aliases is a pointer to an array of
+ * <code>ucnv_countAliases()</code> string-pointers
+ * (<code>const char *</code>) that will be filled in.
+ * The strings themselves are owned by the library.
+ * @param pErrorCode result of operation
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode);
+
+/**
+ * Return a new UEnumeration object for enumerating all the
+ * alias names for a given converter that are recognized by a standard.
+ * This method only enumerates the listed entries in the alias file.
+ * The convrtrs.txt file can be modified to change the results of
+ * this function.
+ * The first result in this list is the same result given by
+ * <code>ucnv_getStandardName</code>, which is the default alias for
+ * the specified standard name. The returned object must be closed with
+ * <code>uenum_close</code> when you are done with the object.
+ *
+ * @param convName original converter name
+ * @param standard name of the standard governing the names; MIME and IANA
+ * are such standards
+ * @param pErrorCode The error code
+ * @return A UEnumeration object for getting all aliases that are recognized
+ * by a standard. If any of the parameters are invalid, NULL
+ * is returned.
+ * @see ucnv_getStandardName
+ * @see uenum_close
+ * @see uenum_next
+ * @stable ICU 2.2
+ */
+U_CAPI UEnumeration * U_EXPORT2
+ucnv_openStandardNames(const char *convName,
+ const char *standard,
+ UErrorCode *pErrorCode);
+
+/**
+ * Gives the number of standards associated to converter names.
+ * @return number of standards
+ * @stable ICU 2.0
+ */
+U_CAPI uint16_t U_EXPORT2
+ucnv_countStandards(void);
+
+/**
+ * Gives the name of the standard at given index of standard list.
+ * @param n index in standard list
+ * @param pErrorCode result of operation
+ * @return returns the name of the standard at given index. Owned by the library.
+ * @stable ICU 2.0
+ */
+U_CAPI const char * U_EXPORT2
+ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode);
+
+/**
+ * Returns a standard name for a given converter name.
+ * <p>
+ * Example alias table:<br>
+ * conv alias1 { STANDARD1 } alias2 { STANDARD1* }
+ * <p>
+ * Result of ucnv_getStandardName("conv", "STANDARD1") from example
+ * alias table:<br>
+ * <b>"alias2"</b>
+ *
+ * @param name original converter name
+ * @param standard name of the standard governing the names; MIME and IANA
+ * are such standards
+ * @param pErrorCode result of operation
+ * @return returns the standard converter name;
+ * if a standard converter name cannot be determined,
+ * then <code>NULL</code> is returned. Owned by the library.
+ * @stable ICU 2.0
+ */
+U_CAPI const char * U_EXPORT2
+ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode);
+
+/**
+ * This function will return the internal canonical converter name of the
+ * tagged alias. This is the opposite of ucnv_openStandardNames, which
+ * returns the tagged alias given the canonical name.
+ * <p>
+ * Example alias table:<br>
+ * conv alias1 { STANDARD1 } alias2 { STANDARD1* }
+ * <p>
+ * Result of ucnv_getStandardName("alias1", "STANDARD1") from example
+ * alias table:<br>
+ * <b>"conv"</b>
+ *
+ * @return returns the canonical converter name;
+ * if a standard or alias name cannot be determined,
+ * then <code>NULL</code> is returned. The returned string is
+ * owned by the library.
+ * @see ucnv_getStandardName
+ * @stable ICU 2.4
+ */
+U_CAPI const char * U_EXPORT2
+ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode);
+
+/**
+ * Returns the current default converter name. If you want to open
+ * a default converter, you do not need to use this function.
+ * It is faster if you pass a NULL argument to ucnv_open the
+ * default converter.
+ *
+ * If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function
+ * always returns "UTF-8".
+ *
+ * @return returns the current default converter name.
+ * Storage owned by the library
+ * @see ucnv_setDefaultName
+ * @stable ICU 2.0
+ */
+U_CAPI const char * U_EXPORT2
+ucnv_getDefaultName(void);
+
+#ifndef U_HIDE_SYSTEM_API
+/**
+ * This function is not thread safe. DO NOT call this function when ANY ICU
+ * function is being used from more than one thread! This function sets the
+ * current default converter name. If this function needs to be called, it
+ * should be called during application initialization. Most of the time, the
+ * results from ucnv_getDefaultName() or ucnv_open with a NULL string argument
+ * is sufficient for your application.
+ *
+ * If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function
+ * does nothing.
+ *
+ * @param name the converter name to be the default (must be known by ICU).
+ * @see ucnv_getDefaultName
+ * @system
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_setDefaultName(const char *name);
+#endif /* U_HIDE_SYSTEM_API */
+
+/**
+ * Fixes the backslash character mismapping. For example, in SJIS, the backslash
+ * character in the ASCII portion is also used to represent the yen currency sign.
+ * When mapping from Unicode character 0x005C, it's unclear whether to map the
+ * character back to yen or backslash in SJIS. This function will take the input
+ * buffer and replace all the yen sign characters with backslash. This is necessary
+ * when the user tries to open a file with the input buffer on Windows.
+ * This function will test the converter to see whether such mapping is
+ * required. You can sometimes avoid using this function by using the correct version
+ * of Shift-JIS.
+ *
+ * @param cnv The converter representing the target codepage.
+ * @param source the input buffer to be fixed
+ * @param sourceLen the length of the input buffer
+ * @see ucnv_isAmbiguous
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_fixFileSeparator(const UConverter *cnv, UChar *source, int32_t sourceLen);
+
+/**
+ * Determines if the converter contains ambiguous mappings of the same
+ * character or not.
+ * @param cnv the converter to be tested
+ * @return true if the converter contains ambiguous mapping of the same
+ * character, false otherwise.
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+ucnv_isAmbiguous(const UConverter *cnv);
+
+/**
+ * Sets the converter to use fallback mappings or not.
+ * Regardless of this flag, the converter will always use
+ * fallbacks from Unicode Private Use code points, as well as
+ * reverse fallbacks (to Unicode).
+ * For details see ".ucm File Format"
+ * in the Conversion Data chapter of the ICU User Guide:
+ * http://www.icu-project.org/userguide/conversion-data.html#ucmformat
+ *
+ * @param cnv The converter to set the fallback mapping usage on.
+ * @param usesFallback true if the user wants the converter to take advantage of the fallback
+ * mapping, false otherwise.
+ * @stable ICU 2.0
+ * @see ucnv_usesFallback
+ */
+U_CAPI void U_EXPORT2
+ucnv_setFallback(UConverter *cnv, UBool usesFallback);
+
+/**
+ * Determines if the converter uses fallback mappings or not.
+ * This flag has restrictions, see ucnv_setFallback().
+ *
+ * @param cnv The converter to be tested
+ * @return true if the converter uses fallback, false otherwise.
+ * @stable ICU 2.0
+ * @see ucnv_setFallback
+ */
+U_CAPI UBool U_EXPORT2
+ucnv_usesFallback(const UConverter *cnv);
+
+/**
+ * Detects Unicode signature byte sequences at the start of the byte stream
+ * and returns the charset name of the indicated Unicode charset.
+ * NULL is returned when no Unicode signature is recognized.
+ * The number of bytes in the signature is output as well.
+ *
+ * The caller can ucnv_open() a converter using the charset name.
+ * The first code unit (UChar) from the start of the stream will be U+FEFF
+ * (the Unicode BOM/signature character) and can usually be ignored.
+ *
+ * For most Unicode charsets it is also possible to ignore the indicated
+ * number of initial stream bytes and start converting after them.
+ * However, there are stateful Unicode charsets (UTF-7 and BOCU-1) for which
+ * this will not work. Therefore, it is best to ignore the first output UChar
+ * instead of the input signature bytes.
+ * <p>
+ * Usage:
+ * \snippet samples/ucnv/convsamp.cpp ucnv_detectUnicodeSignature
+ *
+ * @param source The source string in which the signature should be detected.
+ * @param sourceLength Length of the input string, or -1 if terminated with a NUL byte.
+ * @param signatureLength A pointer to int32_t to receive the number of bytes that make up the signature
+ * of the detected UTF. 0 if not detected.
+ * Can be a NULL pointer.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return The name of the encoding detected. NULL if encoding is not detected.
+ * @stable ICU 2.4
+ */
+U_CAPI const char* U_EXPORT2
+ucnv_detectUnicodeSignature(const char* source,
+ int32_t sourceLength,
+ int32_t *signatureLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Returns the number of UChars held in the converter's internal state
+ * because more input is needed for completing the conversion. This function is
+ * useful for mapping semantics of ICU's converter interface to those of iconv,
+ * and this information is not needed for normal conversion.
+ * @param cnv The converter in which the input is held
+ * @param status ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return The number of UChars in the state. -1 if an error is encountered.
+ * @stable ICU 3.4
+ */
+U_CAPI int32_t U_EXPORT2
+ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status);
+
+/**
+ * Returns the number of chars held in the converter's internal state
+ * because more input is needed for completing the conversion. This function is
+ * useful for mapping semantics of ICU's converter interface to those of iconv,
+ * and this information is not needed for normal conversion.
+ * @param cnv The converter in which the input is held as internal state
+ * @param status ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return The number of chars in the state. -1 if an error is encountered.
+ * @stable ICU 3.4
+ */
+U_CAPI int32_t U_EXPORT2
+ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status);
+
+/**
+ * Returns whether or not the charset of the converter has a fixed number of bytes
+ * per charset character.
+ * An example of this are converters that are of the type UCNV_SBCS or UCNV_DBCS.
+ * Another example is UTF-32 which is always 4 bytes per character.
+ * A Unicode code point may be represented by more than one UTF-8 or UTF-16 code unit
+ * but a UTF-32 converter encodes each code point with 4 bytes.
+ * Note: This method is not intended to be used to determine whether the charset has a
+ * fixed ratio of bytes to Unicode codes <i>units</i> for any particular Unicode encoding form.
+ * false is returned with the UErrorCode if error occurs or cnv is NULL.
+ * @param cnv The converter to be tested
+ * @param status ICU error code in/out paramter
+ * @return true if the converter is fixed-width
+ * @stable ICU 4.8
+ */
+U_CAPI UBool U_EXPORT2
+ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status);
+
+#endif
+
+#endif
+/*_UCNV*/
diff --git a/thirdparty/icu4c/common/unicode/ucnv_cb.h b/thirdparty/icu4c/common/unicode/ucnv_cb.h
new file mode 100644
index 0000000000..41845d1bca
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/ucnv_cb.h
@@ -0,0 +1,164 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2000-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+ * ucnv_cb.h:
+ * External APIs for the ICU's codeset conversion library
+ * Helena Shih
+ *
+ * Modification History:
+ *
+ * Date Name Description
+ */
+
+/**
+ * \file
+ * \brief C UConverter functions to aid the writers of callbacks
+ *
+ * <h2> Callback API for UConverter </h2>
+ *
+ * These functions are provided here for the convenience of the callback
+ * writer. If you are just looking for callback functions to use, please
+ * see ucnv_err.h. DO NOT call these functions directly when you are
+ * working with converters, unless your code has been called as a callback
+ * via ucnv_setFromUCallback or ucnv_setToUCallback !!
+ *
+ * A note about error codes and overflow. Unlike other ICU functions,
+ * these functions do not expect the error status to be U_ZERO_ERROR.
+ * Callbacks must be much more careful about their error codes.
+ * The error codes used here are in/out parameters, which should be passed
+ * back in the callback's error parameter.
+ *
+ * For example, if you call ucnv_cbfromUWriteBytes to write data out
+ * to the output codepage, it may return U_BUFFER_OVERFLOW_ERROR if
+ * the data did not fit in the target. But this isn't a failing error,
+ * in fact, ucnv_cbfromUWriteBytes may be called AGAIN with the error
+ * status still U_BUFFER_OVERFLOW_ERROR to attempt to write further bytes,
+ * which will also go into the internal overflow buffers.
+ *
+ * Concerning offsets, the 'offset' parameters here are relative to the start
+ * of SOURCE. For example, Suppose the string "ABCD" was being converted
+ * from Unicode into a codepage which doesn't have a mapping for 'B'.
+ * 'A' will be written out correctly, but
+ * The FromU Callback will be called on an unassigned character for 'B'.
+ * At this point, this is the state of the world:
+ * Target: A [..] [points after A]
+ * Source: A B [C] D [points to C - B has been consumed]
+ * 0 1 2 3
+ * codePoint = "B" [the unassigned codepoint]
+ *
+ * Now, suppose a callback wants to write the substitution character '?' to
+ * the target. It calls ucnv_cbFromUWriteBytes() to write the ?.
+ * It should pass ZERO as the offset, because the offset as far as the
+ * callback is concerned is relative to the SOURCE pointer [which points
+ * before 'C'.] If the callback goes into the args and consumes 'C' also,
+ * it would call FromUWriteBytes with an offset of 1 (and advance the source
+ * pointer).
+ *
+ */
+
+#ifndef UCNV_CB_H
+#define UCNV_CB_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/ucnv.h"
+#include "unicode/ucnv_err.h"
+
+/**
+ * ONLY used by FromU callback functions.
+ * Writes out the specified byte output bytes to the target byte buffer or to converter internal buffers.
+ *
+ * @param args callback fromUnicode arguments
+ * @param source source bytes to write
+ * @param length length of bytes to write
+ * @param offsetIndex the relative offset index from callback.
+ * @param err error status. If <TT>U_BUFFER_OVERFLOW</TT> is returned, then U_BUFFER_OVERFLOW <STRONG>must</STRONG>
+ * be returned to the user, because it means that not all data could be written into the target buffer, and some is
+ * in the converter error buffer.
+ * @see ucnv_cbFromUWriteSub
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args,
+ const char* source,
+ int32_t length,
+ int32_t offsetIndex,
+ UErrorCode * err);
+
+/**
+ * ONLY used by FromU callback functions.
+ * This function will write out the correct substitution character sequence
+ * to the target.
+ *
+ * @param args callback fromUnicode arguments
+ * @param offsetIndex the relative offset index from the current source pointer to be used
+ * @param err error status. If <TT>U_BUFFER_OVERFLOW</TT> is returned, then U_BUFFER_OVERFLOW <STRONG>must</STRONG>
+ * be returned to the user, because it means that not all data could be written into the target buffer, and some is
+ * in the converter error buffer.
+ * @see ucnv_cbFromUWriteBytes
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args,
+ int32_t offsetIndex,
+ UErrorCode * err);
+
+/**
+ * ONLY used by fromU callback functions.
+ * This function will write out the error character(s) to the target UChar buffer.
+ *
+ * @param args callback fromUnicode arguments
+ * @param source pointer to pointer to first UChar to write [on exit: 1 after last UChar processed]
+ * @param sourceLimit pointer after last UChar to write
+ * @param offsetIndex the relative offset index from callback which will be set
+ * @param err error status <TT>U_BUFFER_OVERFLOW</TT>
+ * @see ucnv_cbToUWriteSub
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2 ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args,
+ const UChar** source,
+ const UChar* sourceLimit,
+ int32_t offsetIndex,
+ UErrorCode * err);
+
+/**
+ * ONLY used by ToU callback functions.
+ * This function will write out the specified characters to the target
+ * UChar buffer.
+ *
+ * @param args callback toUnicode arguments
+ * @param source source string to write
+ * @param length the length of source string
+ * @param offsetIndex the relative offset index which will be written.
+ * @param err error status <TT>U_BUFFER_OVERFLOW</TT>
+ * @see ucnv_cbToUWriteSub
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2 ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args,
+ const UChar* source,
+ int32_t length,
+ int32_t offsetIndex,
+ UErrorCode * err);
+
+/**
+ * ONLY used by ToU callback functions.
+ * This function will write out the Unicode substitution character (U+FFFD).
+ *
+ * @param args callback fromUnicode arguments
+ * @param offsetIndex the relative offset index from callback.
+ * @param err error status <TT>U_BUFFER_OVERFLOW</TT>
+ * @see ucnv_cbToUWriteUChars
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2 ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args,
+ int32_t offsetIndex,
+ UErrorCode * err);
+#endif
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/ucnv_err.h b/thirdparty/icu4c/common/unicode/ucnv_err.h
new file mode 100644
index 0000000000..7209ba5f7b
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/ucnv_err.h
@@ -0,0 +1,465 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1999-2009, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+ *
+ *
+ * ucnv_err.h:
+ */
+
+/**
+ * \file
+ * \brief C UConverter predefined error callbacks
+ *
+ * <h2>Error Behaviour Functions</h2>
+ * Defines some error behaviour functions called by ucnv_{from,to}Unicode
+ * These are provided as part of ICU and many are stable, but they
+ * can also be considered only as an example of what can be done with
+ * callbacks. You may of course write your own.
+ *
+ * If you want to write your own, you may also find the functions from
+ * ucnv_cb.h useful when writing your own callbacks.
+ *
+ * These functions, although public, should NEVER be called directly.
+ * They should be used as parameters to the ucnv_setFromUCallback
+ * and ucnv_setToUCallback functions, to set the behaviour of a converter
+ * when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
+ *
+ * usage example: 'STOP' doesn't need any context, but newContext
+ * could be set to something other than 'NULL' if needed. The available
+ * contexts in this header can modify the default behavior of the callback.
+ *
+ * \code
+ * UErrorCode err = U_ZERO_ERROR;
+ * UConverter *myConverter = ucnv_open("ibm-949", &err);
+ * const void *oldContext;
+ * UConverterFromUCallback oldAction;
+ *
+ *
+ * if (U_SUCCESS(err))
+ * {
+ * ucnv_setFromUCallBack(myConverter,
+ * UCNV_FROM_U_CALLBACK_STOP,
+ * NULL,
+ * &oldAction,
+ * &oldContext,
+ * &status);
+ * }
+ * \endcode
+ *
+ * The code above tells "myConverter" to stop when it encounters an
+ * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
+ * Unicode -> Codepage. The behavior from Codepage to Unicode is not changed,
+ * and ucnv_setToUCallBack would need to be called in order to change
+ * that behavior too.
+ *
+ * Here is an example with a context:
+ *
+ * \code
+ * UErrorCode err = U_ZERO_ERROR;
+ * UConverter *myConverter = ucnv_open("ibm-949", &err);
+ * const void *oldContext;
+ * UConverterFromUCallback oldAction;
+ *
+ *
+ * if (U_SUCCESS(err))
+ * {
+ * ucnv_setToUCallBack(myConverter,
+ * UCNV_TO_U_CALLBACK_SUBSTITUTE,
+ * UCNV_SUB_STOP_ON_ILLEGAL,
+ * &oldAction,
+ * &oldContext,
+ * &status);
+ * }
+ * \endcode
+ *
+ * The code above tells "myConverter" to stop when it encounters an
+ * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
+ * Codepage -> Unicode. Any unmapped and legal characters will be
+ * substituted to be the default substitution character.
+ */
+
+#ifndef UCNV_ERR_H
+#define UCNV_ERR_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+/** Forward declaring the UConverter structure. @stable ICU 2.0 */
+struct UConverter;
+
+/** @stable ICU 2.0 */
+typedef struct UConverter UConverter;
+
+/**
+ * FROM_U, TO_U context options for sub callback
+ * @stable ICU 2.0
+ */
+#define UCNV_SUB_STOP_ON_ILLEGAL "i"
+
+/**
+ * FROM_U, TO_U context options for skip callback
+ * @stable ICU 2.0
+ */
+#define UCNV_SKIP_STOP_ON_ILLEGAL "i"
+
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_ICU NULL
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_JAVA "J"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
+ * TO_U_CALLBACK_ESCAPE option to escape the character value according to C (\\xXXXX)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_C "C"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
+ * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_XML_DEC "D"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
+ * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_XML_HEX "X"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_UNICODE "U"
+
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H<space>, that is,
+ * a backslash, 1..6 hex digits, and a space)
+ * @stable ICU 4.0
+ */
+#define UCNV_ESCAPE_CSS2 "S"
+
+/**
+ * The process condition code to be used with the callbacks.
+ * Codes which are greater than UCNV_IRREGULAR should be
+ * passed on to any chained callbacks.
+ * @stable ICU 2.0
+ */
+typedef enum {
+ UCNV_UNASSIGNED = 0, /**< The code point is unassigned.
+ The error code U_INVALID_CHAR_FOUND will be set. */
+ UCNV_ILLEGAL = 1, /**< The code point is illegal. For example,
+ \\x81\\x2E is illegal in SJIS because \\x2E
+ is not a valid trail byte for the \\x81
+ lead byte.
+ Also, starting with Unicode 3.0.1, non-shortest byte sequences
+ in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061)
+ are also illegal, not just irregular.
+ The error code U_ILLEGAL_CHAR_FOUND will be set. */
+ UCNV_IRREGULAR = 2, /**< The codepoint is not a regular sequence in
+ the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF
+ are irregular UTF-8 byte sequences for single surrogate
+ code points.
+ The error code U_INVALID_CHAR_FOUND will be set. */
+ UCNV_RESET = 3, /**< The callback is called with this reason when a
+ 'reset' has occurred. Callback should reset all
+ state. */
+ UCNV_CLOSE = 4, /**< Called when the converter is closed. The
+ callback should release any allocated memory.*/
+ UCNV_CLONE = 5 /**< Called when ucnv_safeClone() is called on the
+ converter. the pointer available as the
+ 'context' is an alias to the original converters'
+ context pointer. If the context must be owned
+ by the new converter, the callback must clone
+ the data and call ucnv_setFromUCallback
+ (or setToUCallback) with the correct pointer.
+ @stable ICU 2.2
+ */
+} UConverterCallbackReason;
+
+
+/**
+ * The structure for the fromUnicode callback function parameter.
+ * @stable ICU 2.0
+ */
+typedef struct {
+ uint16_t size; /**< The size of this struct. @stable ICU 2.0 */
+ UBool flush; /**< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0 */
+ UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
+ const UChar *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */
+ const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
+ char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
+ const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
+ int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
+} UConverterFromUnicodeArgs;
+
+
+/**
+ * The structure for the toUnicode callback function parameter.
+ * @stable ICU 2.0
+ */
+typedef struct {
+ uint16_t size; /**< The size of this struct @stable ICU 2.0 */
+ UBool flush; /**< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0 */
+ UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
+ const char *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */
+ const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
+ UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
+ const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
+ int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
+} UConverterToUnicodeArgs;
+
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ *
+ * @param context Pointer to the callback's private data
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err This should always be set to a failure status prior to calling.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (
+ const void *context,
+ UConverterFromUnicodeArgs *fromUArgs,
+ const UChar* codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ *
+ * @param context Pointer to the callback's private data
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err This should always be set to a failure status prior to calling.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (
+ const void *context,
+ UConverterToUnicodeArgs *toUArgs,
+ const char* codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback skips any ILLEGAL_SEQUENCE, or
+ * skips only UNASSINGED_SEQUENCE depending on the context parameter
+ * simply ignoring those characters.
+ *
+ * @param context The function currently recognizes the callback options:
+ * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ * NULL: Skips any ILLEGAL_SEQUENCE
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (
+ const void *context,
+ UConverterFromUnicodeArgs *fromUArgs,
+ const UChar* codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or
+ * UNASSIGNED_SEQUENCE depending on context parameter, with the
+ * current substitution string for the converter. This is the default
+ * callback.
+ *
+ * @param context The function currently recognizes the callback options:
+ * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ * NULL: Substitutes any ILLEGAL_SEQUENCE
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @see ucnv_setSubstChars
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
+ const void *context,
+ UConverterFromUnicodeArgs *fromUArgs,
+ const UChar* codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the
+ * hexadecimal representation of the illegal codepoints
+ *
+ * @param context The function currently recognizes the callback options:
+ * <ul>
+ * <li>UCNV_ESCAPE_ICU: Substitues the ILLEGAL SEQUENCE with the hexadecimal
+ * representation in the format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE").
+ * In the Event the converter doesn't support the characters {%,U}[A-F][0-9],
+ * it will substitute the illegal sequence with the substitution characters.
+ * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ * %UD84D%UDC56</li>
+ * <li>UCNV_ESCAPE_JAVA: Substitues the ILLEGAL SEQUENCE with the hexadecimal
+ * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
+ * In the Event the converter doesn't support the characters {\,u}[A-F][0-9],
+ * it will substitute the illegal sequence with the substitution characters.
+ * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ * \\uD84D\\uDC56</li>
+ * <li>UCNV_ESCAPE_C: Substitues the ILLEGAL SEQUENCE with the hexadecimal
+ * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
+ * In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9],
+ * it will substitute the illegal sequence with the substitution characters.
+ * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ * \\U00023456</li>
+ * <li>UCNV_ESCAPE_XML_DEC: Substitues the ILLEGAL SEQUENCE with the decimal
+ * representation in the format \htmlonly&amp;#DDDDDDDD;, e.g. "&amp;#65534;&amp;#172;&amp;#51454;")\endhtmlonly.
+ * In the Event the converter doesn't support the characters {&amp;,#}[0-9],
+ * it will substitute the illegal sequence with the substitution characters.
+ * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ * &amp;#144470; and Zero padding is ignored.</li>
+ * <li>UCNV_ESCAPE_XML_HEX:Substitues the ILLEGAL SEQUENCE with the decimal
+ * representation in the format \htmlonly&amp;#xXXXX; e.g. "&amp;#xFFFE;&amp;#x00AC;&amp;#xC8FE;")\endhtmlonly.
+ * In the Event the converter doesn't support the characters {&,#,x}[0-9],
+ * it will substitute the illegal sequence with the substitution characters.
+ * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ * \htmlonly&amp;#x23456;\endhtmlonly</li>
+ * </ul>
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (
+ const void *context,
+ UConverterFromUnicodeArgs *fromUArgs,
+ const UChar* codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback skips any ILLEGAL_SEQUENCE, or
+ * skips only UNASSINGED_SEQUENCE depending on the context parameter
+ * simply ignoring those characters.
+ *
+ * @param context The function currently recognizes the callback options:
+ * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ * NULL: Skips any ILLEGAL_SEQUENCE
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (
+ const void *context,
+ UConverterToUnicodeArgs *toUArgs,
+ const char* codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or
+ * UNASSIGNED_SEQUENCE depending on context parameter, with the
+ * Unicode substitution character, U+FFFD.
+ *
+ * @param context The function currently recognizes the callback options:
+ * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ * NULL: Substitutes any ILLEGAL_SEQUENCE
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (
+ const void *context,
+ UConverterToUnicodeArgs *toUArgs,
+ const char* codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the
+ * hexadecimal representation of the illegal bytes
+ * (in the format %XNN, e.g. "%XFF%X0A%XC8%X03").
+ *
+ * @param context This function currently recognizes the callback options:
+ * UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC,
+ * UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE.
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+
+U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (
+ const void *context,
+ UConverterToUnicodeArgs *toUArgs,
+ const char* codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+#endif
+
+#endif
+
+/*UCNV_ERR_H*/
diff --git a/thirdparty/icu4c/common/unicode/ucnvsel.h b/thirdparty/icu4c/common/unicode/ucnvsel.h
new file mode 100644
index 0000000000..5e0a71cf35
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/ucnvsel.h
@@ -0,0 +1,192 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2008-2011, International Business Machines
+* Corporation, Google and others. All Rights Reserved.
+*
+*******************************************************************************
+*/
+/*
+ * Author : eldawy@google.com (Mohamed Eldawy)
+ * ucnvsel.h
+ *
+ * Purpose: To generate a list of encodings capable of handling
+ * a given Unicode text
+ *
+ * Started 09-April-2008
+ */
+
+#ifndef __ICU_UCNV_SEL_H__
+#define __ICU_UCNV_SEL_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/uset.h"
+#include "unicode/utf16.h"
+#include "unicode/uenum.h"
+#include "unicode/ucnv.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ * \file
+ *
+ * A converter selector is built with a set of encoding/charset names
+ * and given an input string returns the set of names of the
+ * corresponding converters which can convert the string.
+ *
+ * A converter selector can be serialized into a buffer and reopened
+ * from the serialized form.
+ */
+
+/**
+ * @{
+ * The selector data structure
+ */
+struct UConverterSelector;
+typedef struct UConverterSelector UConverterSelector;
+/** @} */
+
+/**
+ * Open a selector.
+ * If converterListSize is 0, build for all available converters.
+ * If excludedCodePoints is NULL, don't exclude any code points.
+ *
+ * @param converterList a pointer to encoding names needed to be involved.
+ * Can be NULL if converterListSize==0.
+ * The list and the names will be cloned, and the caller
+ * retains ownership of the original.
+ * @param converterListSize number of encodings in above list.
+ * If 0, builds a selector for all available converters.
+ * @param excludedCodePoints a set of code points to be excluded from consideration.
+ * That is, excluded code points in a string do not change
+ * the selection result. (They might be handled by a callback.)
+ * Use NULL to exclude nothing.
+ * @param whichSet what converter set to use? Use this to determine whether
+ * to consider only roundtrip mappings or also fallbacks.
+ * @param status an in/out ICU UErrorCode
+ * @return the new selector
+ *
+ * @stable ICU 4.2
+ */
+U_CAPI UConverterSelector* U_EXPORT2
+ucnvsel_open(const char* const* converterList, int32_t converterListSize,
+ const USet* excludedCodePoints,
+ const UConverterUnicodeSet whichSet, UErrorCode* status);
+
+/**
+ * Closes a selector.
+ * If any Enumerations were returned by ucnv_select*, they become invalid.
+ * They can be closed before or after calling ucnv_closeSelector,
+ * but should never be used after the selector is closed.
+ *
+ * @see ucnv_selectForString
+ * @see ucnv_selectForUTF8
+ *
+ * @param sel selector to close
+ *
+ * @stable ICU 4.2
+ */
+U_CAPI void U_EXPORT2
+ucnvsel_close(UConverterSelector *sel);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUConverterSelectorPointer
+ * "Smart pointer" class, closes a UConverterSelector via ucnvsel_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterSelectorPointer, UConverterSelector, ucnvsel_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Open a selector from its serialized form.
+ * The buffer must remain valid and unchanged for the lifetime of the selector.
+ * This is much faster than creating a selector from scratch.
+ * Using a serialized form from a different machine (endianness/charset) is supported.
+ *
+ * @param buffer pointer to the serialized form of a converter selector;
+ * must be 32-bit-aligned
+ * @param length the capacity of this buffer (can be equal to or larger than
+ * the actual data length)
+ * @param status an in/out ICU UErrorCode
+ * @return the new selector
+ *
+ * @stable ICU 4.2
+ */
+U_CAPI UConverterSelector* U_EXPORT2
+ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status);
+
+/**
+ * Serialize a selector into a linear buffer.
+ * The serialized form is portable to different machines.
+ *
+ * @param sel selector to consider
+ * @param buffer pointer to 32-bit-aligned memory to be filled with the
+ * serialized form of this converter selector
+ * @param bufferCapacity the capacity of this buffer
+ * @param status an in/out ICU UErrorCode
+ * @return the required buffer capacity to hold serialize data (even if the call fails
+ * with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity)
+ *
+ * @stable ICU 4.2
+ */
+U_CAPI int32_t U_EXPORT2
+ucnvsel_serialize(const UConverterSelector* sel,
+ void* buffer, int32_t bufferCapacity, UErrorCode* status);
+
+/**
+ * Select converters that can map all characters in a UTF-16 string,
+ * ignoring the excluded code points.
+ *
+ * @param sel a selector
+ * @param s UTF-16 string
+ * @param length length of the string, or -1 if NUL-terminated
+ * @param status an in/out ICU UErrorCode
+ * @return an enumeration containing encoding names.
+ * The returned encoding names and their order will be the same as
+ * supplied when building the selector.
+ *
+ * @stable ICU 4.2
+ */
+U_CAPI UEnumeration * U_EXPORT2
+ucnvsel_selectForString(const UConverterSelector* sel,
+ const UChar *s, int32_t length, UErrorCode *status);
+
+/**
+ * Select converters that can map all characters in a UTF-8 string,
+ * ignoring the excluded code points.
+ *
+ * @param sel a selector
+ * @param s UTF-8 string
+ * @param length length of the string, or -1 if NUL-terminated
+ * @param status an in/out ICU UErrorCode
+ * @return an enumeration containing encoding names.
+ * The returned encoding names and their order will be the same as
+ * supplied when building the selector.
+ *
+ * @stable ICU 4.2
+ */
+U_CAPI UEnumeration * U_EXPORT2
+ucnvsel_selectForUTF8(const UConverterSelector* sel,
+ const char *s, int32_t length, UErrorCode *status);
+
+#endif /* !UCONFIG_NO_CONVERSION */
+
+#endif /* __ICU_UCNV_SEL_H__ */
diff --git a/thirdparty/icu4c/common/unicode/uconfig.h b/thirdparty/icu4c/common/unicode/uconfig.h
new file mode 100644
index 0000000000..bbc232d1ed
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/uconfig.h
@@ -0,0 +1,456 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2002-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* file name: uconfig.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002sep19
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UCONFIG_H__
+#define __UCONFIG_H__
+
+
+/*!
+ * \file
+ * \brief User-configurable settings
+ *
+ * Miscellaneous switches:
+ *
+ * A number of macros affect a variety of minor aspects of ICU.
+ * Most of them used to be defined elsewhere (e.g., in utypes.h or platform.h)
+ * and moved here to make them easier to find.
+ *
+ * Switches for excluding parts of ICU library code modules:
+ *
+ * Changing these macros allows building partial, smaller libraries for special purposes.
+ * By default, all modules are built.
+ * The switches are fairly coarse, controlling large modules.
+ * Basic services cannot be turned off.
+ *
+ * Building with any of these options does not guarantee that the
+ * ICU build process will completely work. It is recommended that
+ * the ICU libraries and data be built using the normal build.
+ * At that time you should remove the data used by those services.
+ * After building the ICU data library, you should rebuild the ICU
+ * libraries with these switches customized to your needs.
+ *
+ * @stable ICU 2.4
+ */
+
+/**
+ * If this switch is defined, ICU will attempt to load a header file named "uconfig_local.h"
+ * prior to determining default settings for uconfig variables.
+ *
+ * @internal ICU 4.0
+ */
+#if defined(UCONFIG_USE_LOCAL)
+#include "uconfig_local.h"
+#endif
+
+/**
+ * \def U_DEBUG
+ * Determines whether to include debugging code.
+ * Automatically set on Windows, but most compilers do not have
+ * related predefined macros.
+ * @internal
+ */
+#ifdef U_DEBUG
+ /* Use the predefined value. */
+#elif defined(_DEBUG)
+ /*
+ * _DEBUG is defined by Visual Studio debug compilation.
+ * Do *not* test for its NDEBUG macro: It is an orthogonal macro
+ * which disables assert().
+ */
+# define U_DEBUG 1
+# else
+# define U_DEBUG 0
+#endif
+
+/**
+ * Determines whether to enable auto cleanup of libraries.
+ * @internal
+ */
+#ifndef UCLN_NO_AUTO_CLEANUP
+#define UCLN_NO_AUTO_CLEANUP 1
+#endif
+
+/**
+ * \def U_DISABLE_RENAMING
+ * Determines whether to disable renaming or not.
+ * @internal
+ */
+#ifndef U_DISABLE_RENAMING
+#define U_DISABLE_RENAMING 0
+#endif
+
+/**
+ * \def U_NO_DEFAULT_INCLUDE_UTF_HEADERS
+ * Determines whether utypes.h includes utf.h, utf8.h, utf16.h and utf_old.h.
+ * utypes.h includes those headers if this macro is defined to 0.
+ * Otherwise, each those headers must be included explicitly when using one of their macros.
+ * Defaults to 0 for backward compatibility, except inside ICU.
+ * @stable ICU 49
+ */
+#ifdef U_NO_DEFAULT_INCLUDE_UTF_HEADERS
+ /* Use the predefined value. */
+#elif defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || \
+ defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION) || \
+ defined(U_TOOLUTIL_IMPLEMENTATION)
+# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 1
+#else
+# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 0
+#endif
+
+/**
+ * \def U_OVERRIDE_CXX_ALLOCATION
+ * Determines whether to override new and delete.
+ * ICU is normally built such that all of its C++ classes, via their UMemory base,
+ * override operators new and delete to use its internal, customizable,
+ * non-exception-throwing memory allocation functions. (Default value 1 for this macro.)
+ *
+ * This is especially important when the application and its libraries use multiple heaps.
+ * For example, on Windows, this allows the ICU DLL to be used by
+ * applications that statically link the C Runtime library.
+ *
+ * @stable ICU 2.2
+ */
+#ifndef U_OVERRIDE_CXX_ALLOCATION
+#define U_OVERRIDE_CXX_ALLOCATION 1
+#endif
+
+/**
+ * \def U_ENABLE_TRACING
+ * Determines whether to enable tracing.
+ * @internal
+ */
+#ifndef U_ENABLE_TRACING
+#define U_ENABLE_TRACING 0
+#endif
+
+/**
+ * \def UCONFIG_ENABLE_PLUGINS
+ * Determines whether to enable ICU plugins.
+ * @internal
+ */
+#ifndef UCONFIG_ENABLE_PLUGINS
+#define UCONFIG_ENABLE_PLUGINS 0
+#endif
+
+/**
+ * \def U_ENABLE_DYLOAD
+ * Whether to enable Dynamic loading in ICU.
+ * @internal
+ */
+#ifndef U_ENABLE_DYLOAD
+#define U_ENABLE_DYLOAD 1
+#endif
+
+/**
+ * \def U_CHECK_DYLOAD
+ * Whether to test Dynamic loading as an OS capability.
+ * @internal
+ */
+#ifndef U_CHECK_DYLOAD
+#define U_CHECK_DYLOAD 1
+#endif
+
+/**
+ * \def U_DEFAULT_SHOW_DRAFT
+ * Do we allow ICU users to use the draft APIs by default?
+ * @internal
+ */
+#ifndef U_DEFAULT_SHOW_DRAFT
+#define U_DEFAULT_SHOW_DRAFT 1
+#endif
+
+/*===========================================================================*/
+/* Custom icu entry point renaming */
+/*===========================================================================*/
+
+/**
+ * \def U_HAVE_LIB_SUFFIX
+ * 1 if a custom library suffix is set.
+ * @internal
+ */
+#ifdef U_HAVE_LIB_SUFFIX
+ /* Use the predefined value. */
+#elif defined(U_LIB_SUFFIX_C_NAME) || defined(U_IN_DOXYGEN)
+# define U_HAVE_LIB_SUFFIX 1
+#endif
+
+/**
+ * \def U_LIB_SUFFIX_C_NAME_STRING
+ * Defines the library suffix as a string with C syntax.
+ * @internal
+ */
+#ifdef U_LIB_SUFFIX_C_NAME_STRING
+ /* Use the predefined value. */
+#elif defined(U_LIB_SUFFIX_C_NAME)
+# define CONVERT_TO_STRING(s) #s
+# define U_LIB_SUFFIX_C_NAME_STRING CONVERT_TO_STRING(U_LIB_SUFFIX_C_NAME)
+#else
+# define U_LIB_SUFFIX_C_NAME_STRING ""
+#endif
+
+/* common/i18n library switches --------------------------------------------- */
+
+/**
+ * \def UCONFIG_ONLY_COLLATION
+ * This switch turns off modules that are not needed for collation.
+ *
+ * It does not turn off legacy conversion because that is necessary
+ * for ICU to work on EBCDIC platforms (for the default converter).
+ * If you want "only collation" and do not build for EBCDIC,
+ * then you can define UCONFIG_NO_CONVERSION or UCONFIG_NO_LEGACY_CONVERSION to 1 as well.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_ONLY_COLLATION
+# define UCONFIG_ONLY_COLLATION 0
+#endif
+
+#if UCONFIG_ONLY_COLLATION
+ /* common library */
+# define UCONFIG_NO_BREAK_ITERATION 1
+# define UCONFIG_NO_IDNA 1
+
+ /* i18n library */
+# if UCONFIG_NO_COLLATION
+# error Contradictory collation switches in uconfig.h.
+# endif
+# define UCONFIG_NO_FORMATTING 1
+# define UCONFIG_NO_TRANSLITERATION 1
+# define UCONFIG_NO_REGULAR_EXPRESSIONS 1
+#endif
+
+/* common library switches -------------------------------------------------- */
+
+/**
+ * \def UCONFIG_NO_FILE_IO
+ * This switch turns off all file access in the common library
+ * where file access is only used for data loading.
+ * ICU data must then be provided in the form of a data DLL (or with an
+ * equivalent way to link to the data residing in an executable,
+ * as in building a combined library with both the common library's code and
+ * the data), or via udata_setCommonData().
+ * Application data must be provided via udata_setAppData() or by using
+ * "open" functions that take pointers to data, for example ucol_openBinary().
+ *
+ * File access is not used at all in the i18n library.
+ *
+ * File access cannot be turned off for the icuio library or for the ICU
+ * test suites and ICU tools.
+ *
+ * @stable ICU 3.6
+ */
+#ifndef UCONFIG_NO_FILE_IO
+# define UCONFIG_NO_FILE_IO 0
+#endif
+
+#if UCONFIG_NO_FILE_IO && defined(U_TIMEZONE_FILES_DIR)
+# error Contradictory file io switches in uconfig.h.
+#endif
+
+/**
+ * \def UCONFIG_NO_CONVERSION
+ * ICU will not completely build (compiling the tools fails) with this
+ * switch turned on.
+ * This switch turns off all converters.
+ *
+ * You may want to use this together with U_CHARSET_IS_UTF8 defined to 1
+ * in utypes.h if char* strings in your environment are always in UTF-8.
+ *
+ * @stable ICU 3.2
+ * @see U_CHARSET_IS_UTF8
+ */
+#ifndef UCONFIG_NO_CONVERSION
+# define UCONFIG_NO_CONVERSION 0
+#endif
+
+#if UCONFIG_NO_CONVERSION
+# define UCONFIG_NO_LEGACY_CONVERSION 1
+#endif
+
+/**
+ * \def UCONFIG_ONLY_HTML_CONVERSION
+ * This switch turns off all of the converters NOT listed in
+ * the HTML encoding standard:
+ * http://www.w3.org/TR/encoding/#names-and-labels
+ *
+ * This is not possible on EBCDIC platforms
+ * because they need ibm-37 or ibm-1047 default converters.
+ *
+ * @stable ICU 55
+ */
+#ifndef UCONFIG_ONLY_HTML_CONVERSION
+# define UCONFIG_ONLY_HTML_CONVERSION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_LEGACY_CONVERSION
+ * This switch turns off all converters except for
+ * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1)
+ * - US-ASCII
+ * - ISO-8859-1
+ *
+ * Turning off legacy conversion is not possible on EBCDIC platforms
+ * because they need ibm-37 or ibm-1047 default converters.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_LEGACY_CONVERSION
+# define UCONFIG_NO_LEGACY_CONVERSION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_NORMALIZATION
+ * This switch turns off normalization.
+ * It implies turning off several other services as well, for example
+ * collation and IDNA.
+ *
+ * @stable ICU 2.6
+ */
+#ifndef UCONFIG_NO_NORMALIZATION
+# define UCONFIG_NO_NORMALIZATION 0
+#endif
+
+#if UCONFIG_NO_NORMALIZATION
+ /* common library */
+ /* ICU 50 CJK dictionary BreakIterator uses normalization */
+# define UCONFIG_NO_BREAK_ITERATION 1
+ /* IDNA (UTS #46) is implemented via normalization */
+# define UCONFIG_NO_IDNA 1
+
+ /* i18n library */
+# if UCONFIG_ONLY_COLLATION
+# error Contradictory collation switches in uconfig.h.
+# endif
+# define UCONFIG_NO_COLLATION 1
+# define UCONFIG_NO_TRANSLITERATION 1
+#endif
+
+/**
+ * \def UCONFIG_NO_BREAK_ITERATION
+ * This switch turns off break iteration.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_BREAK_ITERATION
+# define UCONFIG_NO_BREAK_ITERATION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_IDNA
+ * This switch turns off IDNA.
+ *
+ * @stable ICU 2.6
+ */
+#ifndef UCONFIG_NO_IDNA
+# define UCONFIG_NO_IDNA 0
+#endif
+
+/**
+ * \def UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
+ * Determines the default UMessagePatternApostropheMode.
+ * See the documentation for that enum.
+ *
+ * @stable ICU 4.8
+ */
+#ifndef UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
+# define UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE UMSGPAT_APOS_DOUBLE_OPTIONAL
+#endif
+
+/**
+ * \def UCONFIG_USE_WINDOWS_LCID_MAPPING_API
+ * On platforms where U_PLATFORM_HAS_WIN32_API is true, this switch determines
+ * if the Windows platform APIs are used for LCID<->Locale Name conversions.
+ * Otherwise, only the built-in ICU tables are used.
+ *
+ * @internal ICU 64
+ */
+#ifndef UCONFIG_USE_WINDOWS_LCID_MAPPING_API
+# define UCONFIG_USE_WINDOWS_LCID_MAPPING_API 1
+#endif
+
+/* i18n library switches ---------------------------------------------------- */
+
+/**
+ * \def UCONFIG_NO_COLLATION
+ * This switch turns off collation and collation-based string search.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_COLLATION
+# define UCONFIG_NO_COLLATION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_FORMATTING
+ * This switch turns off formatting and calendar/timezone services.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_FORMATTING
+# define UCONFIG_NO_FORMATTING 0
+#endif
+
+/**
+ * \def UCONFIG_NO_TRANSLITERATION
+ * This switch turns off transliteration.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_TRANSLITERATION
+# define UCONFIG_NO_TRANSLITERATION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_REGULAR_EXPRESSIONS
+ * This switch turns off regular expressions.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_REGULAR_EXPRESSIONS
+# define UCONFIG_NO_REGULAR_EXPRESSIONS 0
+#endif
+
+/**
+ * \def UCONFIG_NO_SERVICE
+ * This switch turns off service registration.
+ *
+ * @stable ICU 3.2
+ */
+#ifndef UCONFIG_NO_SERVICE
+# define UCONFIG_NO_SERVICE 0
+#endif
+
+/**
+ * \def UCONFIG_HAVE_PARSEALLINPUT
+ * This switch turns on the "parse all input" attribute. Binary incompatible.
+ *
+ * @internal
+ */
+#ifndef UCONFIG_HAVE_PARSEALLINPUT
+# define UCONFIG_HAVE_PARSEALLINPUT 1
+#endif
+
+/**
+ * \def UCONFIG_NO_FILTERED_BREAK_ITERATION
+ * This switch turns off filtered break iteration code.
+ *
+ * @internal
+ */
+#ifndef UCONFIG_NO_FILTERED_BREAK_ITERATION
+# define UCONFIG_NO_FILTERED_BREAK_ITERATION 0
+#endif
+
+#endif // __UCONFIG_H__
diff --git a/thirdparty/icu4c/common/unicode/ucpmap.h b/thirdparty/icu4c/common/unicode/ucpmap.h
new file mode 100644
index 0000000000..31e1365cac
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/ucpmap.h
@@ -0,0 +1,159 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// ucpmap.h
+// created: 2018sep03 Markus W. Scherer
+
+#ifndef __UCPMAP_H__
+#define __UCPMAP_H__
+
+#include "unicode/utypes.h"
+
+U_CDECL_BEGIN
+
+/**
+ * \file
+ *
+ * This file defines an abstract map from Unicode code points to integer values.
+ *
+ * @see UCPMap
+ * @see UCPTrie
+ * @see UMutableCPTrie
+ */
+
+/**
+ * Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values.
+ *
+ * @see UCPTrie
+ * @see UMutableCPTrie
+ * @stable ICU 63
+ */
+typedef struct UCPMap UCPMap;
+
+/**
+ * Selectors for how ucpmap_getRange() etc. should report value ranges overlapping with surrogates.
+ * Most users should use UCPMAP_RANGE_NORMAL.
+ *
+ * @see ucpmap_getRange
+ * @see ucptrie_getRange
+ * @see umutablecptrie_getRange
+ * @stable ICU 63
+ */
+enum UCPMapRangeOption {
+ /**
+ * ucpmap_getRange() enumerates all same-value ranges as stored in the map.
+ * Most users should use this option.
+ * @stable ICU 63
+ */
+ UCPMAP_RANGE_NORMAL,
+ /**
+ * ucpmap_getRange() enumerates all same-value ranges as stored in the map,
+ * except that lead surrogates (U+D800..U+DBFF) are treated as having the
+ * surrogateValue, which is passed to getRange() as a separate parameter.
+ * The surrogateValue is not transformed via filter().
+ * See U_IS_LEAD(c).
+ *
+ * Most users should use UCPMAP_RANGE_NORMAL instead.
+ *
+ * This option is useful for maps that map surrogate code *units* to
+ * special values optimized for UTF-16 string processing
+ * or for special error behavior for unpaired surrogates,
+ * but those values are not to be associated with the lead surrogate code *points*.
+ * @stable ICU 63
+ */
+ UCPMAP_RANGE_FIXED_LEAD_SURROGATES,
+ /**
+ * ucpmap_getRange() enumerates all same-value ranges as stored in the map,
+ * except that all surrogates (U+D800..U+DFFF) are treated as having the
+ * surrogateValue, which is passed to getRange() as a separate parameter.
+ * The surrogateValue is not transformed via filter().
+ * See U_IS_SURROGATE(c).
+ *
+ * Most users should use UCPMAP_RANGE_NORMAL instead.
+ *
+ * This option is useful for maps that map surrogate code *units* to
+ * special values optimized for UTF-16 string processing
+ * or for special error behavior for unpaired surrogates,
+ * but those values are not to be associated with the lead surrogate code *points*.
+ * @stable ICU 63
+ */
+ UCPMAP_RANGE_FIXED_ALL_SURROGATES
+};
+#ifndef U_IN_DOXYGEN
+typedef enum UCPMapRangeOption UCPMapRangeOption;
+#endif
+
+/**
+ * Returns the value for a code point as stored in the map, with range checking.
+ * Returns an implementation-defined error value if c is not in the range 0..U+10FFFF.
+ *
+ * @param map the map
+ * @param c the code point
+ * @return the map value,
+ * or an implementation-defined error value if the code point is not in the range 0..U+10FFFF
+ * @stable ICU 63
+ */
+U_CAPI uint32_t U_EXPORT2
+ucpmap_get(const UCPMap *map, UChar32 c);
+
+/**
+ * Callback function type: Modifies a map value.
+ * Optionally called by ucpmap_getRange()/ucptrie_getRange()/umutablecptrie_getRange().
+ * The modified value will be returned by the getRange function.
+ *
+ * Can be used to ignore some of the value bits,
+ * make a filter for one of several values,
+ * return a value index computed from the map value, etc.
+ *
+ * @param context an opaque pointer, as passed into the getRange function
+ * @param value a value from the map
+ * @return the modified value
+ * @stable ICU 63
+ */
+typedef uint32_t U_CALLCONV
+UCPMapValueFilter(const void *context, uint32_t value);
+
+/**
+ * Returns the last code point such that all those from start to there have the same value.
+ * Can be used to efficiently iterate over all same-value ranges in a map.
+ * (This is normally faster than iterating over code points and get()ting each value,
+ * but much slower than a data structure that stores ranges directly.)
+ *
+ * If the UCPMapValueFilter function pointer is not NULL, then
+ * the value to be delivered is passed through that function, and the return value is the end
+ * of the range where all values are modified to the same actual value.
+ * The value is unchanged if that function pointer is NULL.
+ *
+ * Example:
+ * \code
+ * UChar32 start = 0, end;
+ * uint32_t value;
+ * while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0,
+ * NULL, NULL, &value)) >= 0) {
+ * // Work with the range start..end and its value.
+ * start = end + 1;
+ * }
+ * \endcode
+ *
+ * @param map the map
+ * @param start range start
+ * @param option defines whether surrogates are treated normally,
+ * or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL
+ * @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL
+ * @param filter a pointer to a function that may modify the map data value,
+ * or NULL if the values from the map are to be used unmodified
+ * @param context an opaque pointer that is passed on to the filter function
+ * @param pValue if not NULL, receives the value that every code point start..end has;
+ * may have been modified by filter(context, map value)
+ * if that function pointer is not NULL
+ * @return the range end code point, or -1 if start is not a valid code point
+ * @stable ICU 63
+ */
+U_CAPI UChar32 U_EXPORT2
+ucpmap_getRange(const UCPMap *map, UChar32 start,
+ UCPMapRangeOption option, uint32_t surrogateValue,
+ UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
+
+U_CDECL_END
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/ucptrie.h b/thirdparty/icu4c/common/unicode/ucptrie.h
new file mode 100644
index 0000000000..b95491b183
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/ucptrie.h
@@ -0,0 +1,646 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// ucptrie.h (modified from utrie2.h)
+// created: 2017dec29 Markus W. Scherer
+
+#ifndef __UCPTRIE_H__
+#define __UCPTRIE_H__
+
+#include "unicode/utypes.h"
+#include "unicode/ucpmap.h"
+#include "unicode/utf8.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+U_CDECL_BEGIN
+
+/**
+ * \file
+ *
+ * This file defines an immutable Unicode code point trie.
+ *
+ * @see UCPTrie
+ * @see UMutableCPTrie
+ */
+
+#ifndef U_IN_DOXYGEN
+/** @internal */
+typedef union UCPTrieData {
+ /** @internal */
+ const void *ptr0;
+ /** @internal */
+ const uint16_t *ptr16;
+ /** @internal */
+ const uint32_t *ptr32;
+ /** @internal */
+ const uint8_t *ptr8;
+} UCPTrieData;
+#endif
+
+/**
+ * Immutable Unicode code point trie structure.
+ * Fast, reasonably compact, map from Unicode code points (U+0000..U+10FFFF) to integer values.
+ * For details see http://site.icu-project.org/design/struct/utrie
+ *
+ * Do not access UCPTrie fields directly; use public functions and macros.
+ * Functions are easy to use: They support all trie types and value widths.
+ *
+ * When performance is really important, macros provide faster access.
+ * Most macros are specific to either "fast" or "small" tries, see UCPTrieType.
+ * There are "fast" macros for special optimized use cases.
+ *
+ * The macros will return bogus values, or may crash, if used on the wrong type or value width.
+ *
+ * @see UMutableCPTrie
+ * @stable ICU 63
+ */
+struct UCPTrie {
+#ifndef U_IN_DOXYGEN
+ /** @internal */
+ const uint16_t *index;
+ /** @internal */
+ UCPTrieData data;
+
+ /** @internal */
+ int32_t indexLength;
+ /** @internal */
+ int32_t dataLength;
+ /** Start of the last range which ends at U+10FFFF. @internal */
+ UChar32 highStart;
+ /** highStart>>12 @internal */
+ uint16_t shifted12HighStart;
+
+ /** @internal */
+ int8_t type; // UCPTrieType
+ /** @internal */
+ int8_t valueWidth; // UCPTrieValueWidth
+
+ /** padding/reserved @internal */
+ uint32_t reserved32;
+ /** padding/reserved @internal */
+ uint16_t reserved16;
+
+ /**
+ * Internal index-3 null block offset.
+ * Set to an impossibly high value (e.g., 0xffff) if there is no dedicated index-3 null block.
+ * @internal
+ */
+ uint16_t index3NullOffset;
+ /**
+ * Internal data null block offset, not shifted.
+ * Set to an impossibly high value (e.g., 0xfffff) if there is no dedicated data null block.
+ * @internal
+ */
+ int32_t dataNullOffset;
+ /** @internal */
+ uint32_t nullValue;
+
+#ifdef UCPTRIE_DEBUG
+ /** @internal */
+ const char *name;
+#endif
+#endif
+};
+#ifndef U_IN_DOXYGEN
+typedef struct UCPTrie UCPTrie;
+#endif
+
+/**
+ * Selectors for the type of a UCPTrie.
+ * Different trade-offs for size vs. speed.
+ *
+ * @see umutablecptrie_buildImmutable
+ * @see ucptrie_openFromBinary
+ * @see ucptrie_getType
+ * @stable ICU 63
+ */
+enum UCPTrieType {
+ /**
+ * For ucptrie_openFromBinary() to accept any type.
+ * ucptrie_getType() will return the actual type.
+ * @stable ICU 63
+ */
+ UCPTRIE_TYPE_ANY = -1,
+ /**
+ * Fast/simple/larger BMP data structure. Use functions and "fast" macros.
+ * @stable ICU 63
+ */
+ UCPTRIE_TYPE_FAST,
+ /**
+ * Small/slower BMP data structure. Use functions and "small" macros.
+ * @stable ICU 63
+ */
+ UCPTRIE_TYPE_SMALL
+};
+#ifndef U_IN_DOXYGEN
+typedef enum UCPTrieType UCPTrieType;
+#endif
+
+/**
+ * Selectors for the number of bits in a UCPTrie data value.
+ *
+ * @see umutablecptrie_buildImmutable
+ * @see ucptrie_openFromBinary
+ * @see ucptrie_getValueWidth
+ * @stable ICU 63
+ */
+enum UCPTrieValueWidth {
+ /**
+ * For ucptrie_openFromBinary() to accept any data value width.
+ * ucptrie_getValueWidth() will return the actual data value width.
+ * @stable ICU 63
+ */
+ UCPTRIE_VALUE_BITS_ANY = -1,
+ /**
+ * The trie stores 16 bits per data value.
+ * It returns them as unsigned values 0..0xffff=65535.
+ * @stable ICU 63
+ */
+ UCPTRIE_VALUE_BITS_16,
+ /**
+ * The trie stores 32 bits per data value.
+ * @stable ICU 63
+ */
+ UCPTRIE_VALUE_BITS_32,
+ /**
+ * The trie stores 8 bits per data value.
+ * It returns them as unsigned values 0..0xff=255.
+ * @stable ICU 63
+ */
+ UCPTRIE_VALUE_BITS_8
+};
+#ifndef U_IN_DOXYGEN
+typedef enum UCPTrieValueWidth UCPTrieValueWidth;
+#endif
+
+/**
+ * Opens a trie from its binary form, stored in 32-bit-aligned memory.
+ * Inverse of ucptrie_toBinary().
+ *
+ * The memory must remain valid and unchanged as long as the trie is used.
+ * You must ucptrie_close() the trie once you are done using it.
+ *
+ * @param type selects the trie type; results in an
+ * U_INVALID_FORMAT_ERROR if it does not match the binary data;
+ * use UCPTRIE_TYPE_ANY to accept any type
+ * @param valueWidth selects the number of bits in a data value; results in an
+ * U_INVALID_FORMAT_ERROR if it does not match the binary data;
+ * use UCPTRIE_VALUE_BITS_ANY to accept any data value width
+ * @param data a pointer to 32-bit-aligned memory containing the binary data of a UCPTrie
+ * @param length the number of bytes available at data;
+ * can be more than necessary
+ * @param pActualLength receives the actual number of bytes at data taken up by the trie data;
+ * can be NULL
+ * @param pErrorCode an in/out ICU UErrorCode
+ * @return the trie
+ *
+ * @see umutablecptrie_open
+ * @see umutablecptrie_buildImmutable
+ * @see ucptrie_toBinary
+ * @stable ICU 63
+ */
+U_CAPI UCPTrie * U_EXPORT2
+ucptrie_openFromBinary(UCPTrieType type, UCPTrieValueWidth valueWidth,
+ const void *data, int32_t length, int32_t *pActualLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Closes a trie and releases associated memory.
+ *
+ * @param trie the trie
+ * @stable ICU 63
+ */
+U_CAPI void U_EXPORT2
+ucptrie_close(UCPTrie *trie);
+
+/**
+ * Returns the trie type.
+ *
+ * @param trie the trie
+ * @return the trie type
+ * @see ucptrie_openFromBinary
+ * @see UCPTRIE_TYPE_ANY
+ * @stable ICU 63
+ */
+U_CAPI UCPTrieType U_EXPORT2
+ucptrie_getType(const UCPTrie *trie);
+
+/**
+ * Returns the number of bits in a trie data value.
+ *
+ * @param trie the trie
+ * @return the number of bits in a trie data value
+ * @see ucptrie_openFromBinary
+ * @see UCPTRIE_VALUE_BITS_ANY
+ * @stable ICU 63
+ */
+U_CAPI UCPTrieValueWidth U_EXPORT2
+ucptrie_getValueWidth(const UCPTrie *trie);
+
+/**
+ * Returns the value for a code point as stored in the trie, with range checking.
+ * Returns the trie error value if c is not in the range 0..U+10FFFF.
+ *
+ * Easier to use than UCPTRIE_FAST_GET() and similar macros but slower.
+ * Easier to use because, unlike the macros, this function works on all UCPTrie
+ * objects, for all types and value widths.
+ *
+ * @param trie the trie
+ * @param c the code point
+ * @return the trie value,
+ * or the trie error value if the code point is not in the range 0..U+10FFFF
+ * @stable ICU 63
+ */
+U_CAPI uint32_t U_EXPORT2
+ucptrie_get(const UCPTrie *trie, UChar32 c);
+
+/**
+ * Returns the last code point such that all those from start to there have the same value.
+ * Can be used to efficiently iterate over all same-value ranges in a trie.
+ * (This is normally faster than iterating over code points and get()ting each value,
+ * but much slower than a data structure that stores ranges directly.)
+ *
+ * If the UCPMapValueFilter function pointer is not NULL, then
+ * the value to be delivered is passed through that function, and the return value is the end
+ * of the range where all values are modified to the same actual value.
+ * The value is unchanged if that function pointer is NULL.
+ *
+ * Example:
+ * \code
+ * UChar32 start = 0, end;
+ * uint32_t value;
+ * while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0,
+ * NULL, NULL, &value)) >= 0) {
+ * // Work with the range start..end and its value.
+ * start = end + 1;
+ * }
+ * \endcode
+ *
+ * @param trie the trie
+ * @param start range start
+ * @param option defines whether surrogates are treated normally,
+ * or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL
+ * @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL
+ * @param filter a pointer to a function that may modify the trie data value,
+ * or NULL if the values from the trie are to be used unmodified
+ * @param context an opaque pointer that is passed on to the filter function
+ * @param pValue if not NULL, receives the value that every code point start..end has;
+ * may have been modified by filter(context, trie value)
+ * if that function pointer is not NULL
+ * @return the range end code point, or -1 if start is not a valid code point
+ * @stable ICU 63
+ */
+U_CAPI UChar32 U_EXPORT2
+ucptrie_getRange(const UCPTrie *trie, UChar32 start,
+ UCPMapRangeOption option, uint32_t surrogateValue,
+ UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
+
+/**
+ * Writes a memory-mappable form of the trie into 32-bit aligned memory.
+ * Inverse of ucptrie_openFromBinary().
+ *
+ * @param trie the trie
+ * @param data a pointer to 32-bit-aligned memory to be filled with the trie data;
+ * can be NULL if capacity==0
+ * @param capacity the number of bytes available at data, or 0 for pure preflighting
+ * @param pErrorCode an in/out ICU UErrorCode;
+ * U_BUFFER_OVERFLOW_ERROR if the capacity is too small
+ * @return the number of bytes written or (if buffer overflow) needed for the trie
+ *
+ * @see ucptrie_openFromBinary()
+ * @stable ICU 63
+ */
+U_CAPI int32_t U_EXPORT2
+ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode *pErrorCode);
+
+/**
+ * Macro parameter value for a trie with 16-bit data values.
+ * Use the name of this macro as a "dataAccess" parameter in other macros.
+ * Do not use this macro in any other way.
+ *
+ * @see UCPTRIE_VALUE_BITS_16
+ * @stable ICU 63
+ */
+#define UCPTRIE_16(trie, i) ((trie)->data.ptr16[i])
+
+/**
+ * Macro parameter value for a trie with 32-bit data values.
+ * Use the name of this macro as a "dataAccess" parameter in other macros.
+ * Do not use this macro in any other way.
+ *
+ * @see UCPTRIE_VALUE_BITS_32
+ * @stable ICU 63
+ */
+#define UCPTRIE_32(trie, i) ((trie)->data.ptr32[i])
+
+/**
+ * Macro parameter value for a trie with 8-bit data values.
+ * Use the name of this macro as a "dataAccess" parameter in other macros.
+ * Do not use this macro in any other way.
+ *
+ * @see UCPTRIE_VALUE_BITS_8
+ * @stable ICU 63
+ */
+#define UCPTRIE_8(trie, i) ((trie)->data.ptr8[i])
+
+/**
+ * Returns a trie value for a code point, with range checking.
+ * Returns the trie error value if c is not in the range 0..U+10FFFF.
+ *
+ * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
+ * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
+ * @param c (UChar32, in) the input code point
+ * @return The code point's trie value.
+ * @stable ICU 63
+ */
+#define UCPTRIE_FAST_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_CP_INDEX(trie, 0xffff, c))
+
+/**
+ * Returns a 16-bit trie value for a code point, with range checking.
+ * Returns the trie error value if c is not in the range U+0000..U+10FFFF.
+ *
+ * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_SMALL
+ * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
+ * @param c (UChar32, in) the input code point
+ * @return The code point's trie value.
+ * @stable ICU 63
+ */
+#define UCPTRIE_SMALL_GET(trie, dataAccess, c) \
+ dataAccess(trie, _UCPTRIE_CP_INDEX(trie, UCPTRIE_SMALL_MAX, c))
+
+/**
+ * UTF-16: Reads the next code point (UChar32 c, out), post-increments src,
+ * and gets a value from the trie.
+ * Sets the trie error value if c is an unpaired surrogate.
+ *
+ * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
+ * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
+ * @param src (const UChar *, in/out) the source text pointer
+ * @param limit (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated
+ * @param c (UChar32, out) variable for the code point
+ * @param result (out) variable for the trie lookup result
+ * @stable ICU 63
+ */
+#define UCPTRIE_FAST_U16_NEXT(trie, dataAccess, src, limit, c, result) UPRV_BLOCK_MACRO_BEGIN { \
+ (c) = *(src)++; \
+ int32_t __index; \
+ if (!U16_IS_SURROGATE(c)) { \
+ __index = _UCPTRIE_FAST_INDEX(trie, c); \
+ } else { \
+ uint16_t __c2; \
+ if (U16_IS_SURROGATE_LEAD(c) && (src) != (limit) && U16_IS_TRAIL(__c2 = *(src))) { \
+ ++(src); \
+ (c) = U16_GET_SUPPLEMENTARY((c), __c2); \
+ __index = _UCPTRIE_SMALL_INDEX(trie, c); \
+ } else { \
+ __index = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; \
+ } \
+ } \
+ (result) = dataAccess(trie, __index); \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * UTF-16: Reads the previous code point (UChar32 c, out), pre-decrements src,
+ * and gets a value from the trie.
+ * Sets the trie error value if c is an unpaired surrogate.
+ *
+ * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
+ * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
+ * @param start (const UChar *, in) the start pointer for the text
+ * @param src (const UChar *, in/out) the source text pointer
+ * @param c (UChar32, out) variable for the code point
+ * @param result (out) variable for the trie lookup result
+ * @stable ICU 63
+ */
+#define UCPTRIE_FAST_U16_PREV(trie, dataAccess, start, src, c, result) UPRV_BLOCK_MACRO_BEGIN { \
+ (c) = *--(src); \
+ int32_t __index; \
+ if (!U16_IS_SURROGATE(c)) { \
+ __index = _UCPTRIE_FAST_INDEX(trie, c); \
+ } else { \
+ uint16_t __c2; \
+ if (U16_IS_SURROGATE_TRAIL(c) && (src) != (start) && U16_IS_LEAD(__c2 = *((src) - 1))) { \
+ --(src); \
+ (c) = U16_GET_SUPPLEMENTARY(__c2, (c)); \
+ __index = _UCPTRIE_SMALL_INDEX(trie, c); \
+ } else { \
+ __index = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; \
+ } \
+ } \
+ (result) = dataAccess(trie, __index); \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * UTF-8: Post-increments src and gets a value from the trie.
+ * Sets the trie error value for an ill-formed byte sequence.
+ *
+ * Unlike UCPTRIE_FAST_U16_NEXT() this UTF-8 macro does not provide the code point
+ * because it would be more work to do so and is often not needed.
+ * If the trie value differs from the error value, then the byte sequence is well-formed,
+ * and the code point can be assembled without revalidation.
+ *
+ * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
+ * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
+ * @param src (const char *, in/out) the source text pointer
+ * @param limit (const char *, in) the limit pointer for the text (must not be NULL)
+ * @param result (out) variable for the trie lookup result
+ * @stable ICU 63
+ */
+#define UCPTRIE_FAST_U8_NEXT(trie, dataAccess, src, limit, result) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __lead = (uint8_t)*(src)++; \
+ if (!U8_IS_SINGLE(__lead)) { \
+ uint8_t __t1, __t2, __t3; \
+ if ((src) != (limit) && \
+ (__lead >= 0xe0 ? \
+ __lead < 0xf0 ? /* U+0800..U+FFFF except surrogates */ \
+ U8_LEAD3_T1_BITS[__lead &= 0xf] & (1 << ((__t1 = *(src)) >> 5)) && \
+ ++(src) != (limit) && (__t2 = *(src) - 0x80) <= 0x3f && \
+ (__lead = ((int32_t)(trie)->index[(__lead << 6) + (__t1 & 0x3f)]) + __t2, 1) \
+ : /* U+10000..U+10FFFF */ \
+ (__lead -= 0xf0) <= 4 && \
+ U8_LEAD4_T1_BITS[(__t1 = *(src)) >> 4] & (1 << __lead) && \
+ (__lead = (__lead << 6) | (__t1 & 0x3f), ++(src) != (limit)) && \
+ (__t2 = *(src) - 0x80) <= 0x3f && \
+ ++(src) != (limit) && (__t3 = *(src) - 0x80) <= 0x3f && \
+ (__lead = __lead >= (trie)->shifted12HighStart ? \
+ (trie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : \
+ ucptrie_internalSmallU8Index((trie), __lead, __t2, __t3), 1) \
+ : /* U+0080..U+07FF */ \
+ __lead >= 0xc2 && (__t1 = *(src) - 0x80) <= 0x3f && \
+ (__lead = (int32_t)(trie)->index[__lead & 0x1f] + __t1, 1))) { \
+ ++(src); \
+ } else { \
+ __lead = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; /* ill-formed*/ \
+ } \
+ } \
+ (result) = dataAccess(trie, __lead); \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * UTF-8: Pre-decrements src and gets a value from the trie.
+ * Sets the trie error value for an ill-formed byte sequence.
+ *
+ * Unlike UCPTRIE_FAST_U16_PREV() this UTF-8 macro does not provide the code point
+ * because it would be more work to do so and is often not needed.
+ * If the trie value differs from the error value, then the byte sequence is well-formed,
+ * and the code point can be assembled without revalidation.
+ *
+ * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
+ * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
+ * @param start (const char *, in) the start pointer for the text
+ * @param src (const char *, in/out) the source text pointer
+ * @param result (out) variable for the trie lookup result
+ * @stable ICU 63
+ */
+#define UCPTRIE_FAST_U8_PREV(trie, dataAccess, start, src, result) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __index = (uint8_t)*--(src); \
+ if (!U8_IS_SINGLE(__index)) { \
+ __index = ucptrie_internalU8PrevIndex((trie), __index, (const uint8_t *)(start), \
+ (const uint8_t *)(src)); \
+ (src) -= __index & 7; \
+ __index >>= 3; \
+ } \
+ (result) = dataAccess(trie, __index); \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Returns a trie value for an ASCII code point, without range checking.
+ *
+ * @param trie (const UCPTrie *, in) the trie (of either fast or small type)
+ * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
+ * @param c (UChar32, in) the input code point; must be U+0000..U+007F
+ * @return The ASCII code point's trie value.
+ * @stable ICU 63
+ */
+#define UCPTRIE_ASCII_GET(trie, dataAccess, c) dataAccess(trie, c)
+
+/**
+ * Returns a trie value for a BMP code point (U+0000..U+FFFF), without range checking.
+ * Can be used to look up a value for a UTF-16 code unit if other parts of
+ * the string processing check for surrogates.
+ *
+ * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
+ * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
+ * @param c (UChar32, in) the input code point, must be U+0000..U+FFFF
+ * @return The BMP code point's trie value.
+ * @stable ICU 63
+ */
+#define UCPTRIE_FAST_BMP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_FAST_INDEX(trie, c))
+
+/**
+ * Returns a trie value for a supplementary code point (U+10000..U+10FFFF),
+ * without range checking.
+ *
+ * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
+ * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
+ * @param c (UChar32, in) the input code point, must be U+10000..U+10FFFF
+ * @return The supplementary code point's trie value.
+ * @stable ICU 63
+ */
+#define UCPTRIE_FAST_SUPP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_SMALL_INDEX(trie, c))
+
+/* Internal definitions ----------------------------------------------------- */
+
+#ifndef U_IN_DOXYGEN
+
+/**
+ * Internal implementation constants.
+ * These are needed for the API macros, but users should not use these directly.
+ * @internal
+ */
+enum {
+ /** @internal */
+ UCPTRIE_FAST_SHIFT = 6,
+
+ /** Number of entries in a data block for code points below the fast limit. 64=0x40 @internal */
+ UCPTRIE_FAST_DATA_BLOCK_LENGTH = 1 << UCPTRIE_FAST_SHIFT,
+
+ /** Mask for getting the lower bits for the in-fast-data-block offset. @internal */
+ UCPTRIE_FAST_DATA_MASK = UCPTRIE_FAST_DATA_BLOCK_LENGTH - 1,
+
+ /** @internal */
+ UCPTRIE_SMALL_MAX = 0xfff,
+
+ /**
+ * Offset from dataLength (to be subtracted) for fetching the
+ * value returned for out-of-range code points and ill-formed UTF-8/16.
+ * @internal
+ */
+ UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET = 1,
+ /**
+ * Offset from dataLength (to be subtracted) for fetching the
+ * value returned for code points highStart..U+10FFFF.
+ * @internal
+ */
+ UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET = 2
+};
+
+/* Internal functions and macros -------------------------------------------- */
+// Do not conditionalize with #ifndef U_HIDE_INTERNAL_API, needed for public API
+
+/** @internal */
+U_CAPI int32_t U_EXPORT2
+ucptrie_internalSmallIndex(const UCPTrie *trie, UChar32 c);
+
+/** @internal */
+U_CAPI int32_t U_EXPORT2
+ucptrie_internalSmallU8Index(const UCPTrie *trie, int32_t lt1, uint8_t t2, uint8_t t3);
+
+/**
+ * Internal function for part of the UCPTRIE_FAST_U8_PREVxx() macro implementations.
+ * Do not call directly.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+ucptrie_internalU8PrevIndex(const UCPTrie *trie, UChar32 c,
+ const uint8_t *start, const uint8_t *src);
+
+/** Internal trie getter for a code point below the fast limit. Returns the data index. @internal */
+#define _UCPTRIE_FAST_INDEX(trie, c) \
+ ((int32_t)(trie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK))
+
+/** Internal trie getter for a code point at or above the fast limit. Returns the data index. @internal */
+#define _UCPTRIE_SMALL_INDEX(trie, c) \
+ ((c) >= (trie)->highStart ? \
+ (trie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : \
+ ucptrie_internalSmallIndex(trie, c))
+
+/**
+ * Internal trie getter for a code point, with checking that c is in U+0000..10FFFF.
+ * Returns the data index.
+ * @internal
+ */
+#define _UCPTRIE_CP_INDEX(trie, fastMax, c) \
+ ((uint32_t)(c) <= (uint32_t)(fastMax) ? \
+ _UCPTRIE_FAST_INDEX(trie, c) : \
+ (uint32_t)(c) <= 0x10ffff ? \
+ _UCPTRIE_SMALL_INDEX(trie, c) : \
+ (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET)
+
+U_CDECL_END
+
+#endif // U_IN_DOXYGEN
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUCPTriePointer
+ * "Smart pointer" class, closes a UCPTrie via ucptrie_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 63
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUCPTriePointer, UCPTrie, ucptrie_close);
+
+U_NAMESPACE_END
+
+#endif // U_SHOW_CPLUSPLUS_API
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/ucurr.h b/thirdparty/icu4c/common/unicode/ucurr.h
new file mode 100644
index 0000000000..35c2a39389
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/ucurr.h
@@ -0,0 +1,468 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2002-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+#ifndef _UCURR_H_
+#define _UCURR_H_
+
+#include "unicode/utypes.h"
+#include "unicode/uenum.h"
+
+/**
+ * \file
+ * \brief C API: Encapsulates information about a currency.
+ *
+ * The ucurr API encapsulates information about a currency, as defined by
+ * ISO 4217. A currency is represented by a 3-character string
+ * containing its ISO 4217 code. This API can return various data
+ * necessary the proper display of a currency:
+ *
+ * <ul><li>A display symbol, for a specific locale
+ * <li>The number of fraction digits to display
+ * <li>A rounding increment
+ * </ul>
+ *
+ * The <tt>DecimalFormat</tt> class uses these data to display
+ * currencies.
+ * @author Alan Liu
+ * @since ICU 2.2
+ */
+
+#if !UCONFIG_NO_FORMATTING
+
+/**
+ * Currency Usage used for Decimal Format
+ * @stable ICU 54
+ */
+enum UCurrencyUsage {
+ /**
+ * a setting to specify currency usage which determines currency digit
+ * and rounding for standard usage, for example: "50.00 NT$"
+ * used as DEFAULT value
+ * @stable ICU 54
+ */
+ UCURR_USAGE_STANDARD=0,
+ /**
+ * a setting to specify currency usage which determines currency digit
+ * and rounding for cash usage, for example: "50 NT$"
+ * @stable ICU 54
+ */
+ UCURR_USAGE_CASH=1,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One higher than the last enum UCurrencyUsage constant.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UCURR_USAGE_COUNT=2
+#endif // U_HIDE_DEPRECATED_API
+};
+/** Currency Usage used for Decimal Format */
+typedef enum UCurrencyUsage UCurrencyUsage;
+
+/**
+ * Finds a currency code for the given locale.
+ * @param locale the locale for which to retrieve a currency code.
+ * Currency can be specified by the "currency" keyword
+ * in which case it overrides the default currency code
+ * @param buff fill in buffer. Can be NULL for preflighting.
+ * @param buffCapacity capacity of the fill in buffer. Can be 0 for
+ * preflighting. If it is non-zero, the buff parameter
+ * must not be NULL.
+ * @param ec error code
+ * @return length of the currency string. It should always be 3. If 0,
+ * currency couldn't be found or the input values are
+ * invalid.
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+ucurr_forLocale(const char* locale,
+ UChar* buff,
+ int32_t buffCapacity,
+ UErrorCode* ec);
+
+/**
+ * Selector constants for ucurr_getName().
+ *
+ * @see ucurr_getName
+ * @stable ICU 2.6
+ */
+typedef enum UCurrNameStyle {
+ /**
+ * Selector for ucurr_getName indicating a symbolic name for a
+ * currency, such as "$" for USD.
+ * @stable ICU 2.6
+ */
+ UCURR_SYMBOL_NAME,
+
+ /**
+ * Selector for ucurr_getName indicating the long name for a
+ * currency, such as "US Dollar" for USD.
+ * @stable ICU 2.6
+ */
+ UCURR_LONG_NAME,
+
+ /**
+ * Selector for getName() indicating the narrow currency symbol.
+ * The narrow currency symbol is similar to the regular currency
+ * symbol, but it always takes the shortest form: for example,
+ * "$" instead of "US$" for USD in en-CA.
+ *
+ * @stable ICU 61
+ */
+ UCURR_NARROW_SYMBOL_NAME,
+
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * Selector for getName() indicating the formal currency symbol.
+ * The formal currency symbol is similar to the regular currency
+ * symbol, but it always takes the form used in formal settings
+ * such as banking; for example, "NT$" instead of "$" for TWD in zh-TW.
+ *
+ * @draft ICU 68
+ */
+ UCURR_FORMAL_SYMBOL_NAME,
+
+ /**
+ * Selector for getName() indicating the variant currency symbol.
+ * The variant symbol for a currency is an alternative symbol
+ * that is not necessarily as widely used as the regular symbol.
+ *
+ * @draft ICU 68
+ */
+ UCURR_VARIANT_SYMBOL_NAME
+#endif // U_HIDE_DRAFT_API
+
+} UCurrNameStyle;
+
+#if !UCONFIG_NO_SERVICE
+/**
+ * @stable ICU 2.6
+ */
+typedef const void* UCurrRegistryKey;
+
+/**
+ * Register an (existing) ISO 4217 currency code for the given locale.
+ * Only the country code and the two variants EURO and PRE_EURO are
+ * recognized.
+ * @param isoCode the three-letter ISO 4217 currency code
+ * @param locale the locale for which to register this currency code
+ * @param status the in/out status code
+ * @return a registry key that can be used to unregister this currency code, or NULL
+ * if there was an error.
+ * @stable ICU 2.6
+ */
+U_CAPI UCurrRegistryKey U_EXPORT2
+ucurr_register(const UChar* isoCode,
+ const char* locale,
+ UErrorCode* status);
+/**
+ * Unregister the previously-registered currency definitions using the
+ * URegistryKey returned from ucurr_register. Key becomes invalid after
+ * a successful call and should not be used again. Any currency
+ * that might have been hidden by the original ucurr_register call is
+ * restored.
+ * @param key the registry key returned by a previous call to ucurr_register
+ * @param status the in/out status code, no special meanings are assigned
+ * @return true if the currency for this key was successfully unregistered
+ * @stable ICU 2.6
+ */
+U_CAPI UBool U_EXPORT2
+ucurr_unregister(UCurrRegistryKey key, UErrorCode* status);
+#endif /* UCONFIG_NO_SERVICE */
+
+/**
+ * Returns the display name for the given currency in the
+ * given locale. For example, the display name for the USD
+ * currency object in the en_US locale is "$".
+ * @param currency null-terminated 3-letter ISO 4217 code
+ * @param locale locale in which to display currency
+ * @param nameStyle selector for which kind of name to return
+ * @param isChoiceFormat always set to false, or can be NULL;
+ * display names are static strings;
+ * since ICU 4.4, ChoiceFormat patterns are no longer supported
+ * @param len fill-in parameter to receive length of result
+ * @param ec error code
+ * @return pointer to display string of 'len' UChars. If the resource
+ * data contains no entry for 'currency', then 'currency' itself is
+ * returned.
+ * @stable ICU 2.6
+ */
+U_CAPI const UChar* U_EXPORT2
+ucurr_getName(const UChar* currency,
+ const char* locale,
+ UCurrNameStyle nameStyle,
+ UBool* isChoiceFormat,
+ int32_t* len,
+ UErrorCode* ec);
+
+/**
+ * Returns the plural name for the given currency in the
+ * given locale. For example, the plural name for the USD
+ * currency object in the en_US locale is "US dollar" or "US dollars".
+ * @param currency null-terminated 3-letter ISO 4217 code
+ * @param locale locale in which to display currency
+ * @param isChoiceFormat always set to false, or can be NULL;
+ * display names are static strings;
+ * since ICU 4.4, ChoiceFormat patterns are no longer supported
+ * @param pluralCount plural count
+ * @param len fill-in parameter to receive length of result
+ * @param ec error code
+ * @return pointer to display string of 'len' UChars. If the resource
+ * data contains no entry for 'currency', then 'currency' itself is
+ * returned.
+ * @stable ICU 4.2
+ */
+U_CAPI const UChar* U_EXPORT2
+ucurr_getPluralName(const UChar* currency,
+ const char* locale,
+ UBool* isChoiceFormat,
+ const char* pluralCount,
+ int32_t* len,
+ UErrorCode* ec);
+
+/**
+ * Returns the number of the number of fraction digits that should
+ * be displayed for the given currency.
+ * This is equivalent to ucurr_getDefaultFractionDigitsForUsage(currency,UCURR_USAGE_STANDARD,ec);
+ *
+ * Important: The number of fraction digits for a given currency is NOT
+ * guaranteed to be constant across versions of ICU or CLDR. For example,
+ * do NOT use this value as a mechanism for deciding the magnitude used
+ * to store currency values in a database. You should use this value for
+ * display purposes only.
+ *
+ * @param currency null-terminated 3-letter ISO 4217 code
+ * @param ec input-output error code
+ * @return a non-negative number of fraction digits to be
+ * displayed, or 0 if there is an error
+ * @stable ICU 3.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucurr_getDefaultFractionDigits(const UChar* currency,
+ UErrorCode* ec);
+
+/**
+ * Returns the number of the number of fraction digits that should
+ * be displayed for the given currency with usage.
+ *
+ * Important: The number of fraction digits for a given currency is NOT
+ * guaranteed to be constant across versions of ICU or CLDR. For example,
+ * do NOT use this value as a mechanism for deciding the magnitude used
+ * to store currency values in a database. You should use this value for
+ * display purposes only.
+ *
+ * @param currency null-terminated 3-letter ISO 4217 code
+ * @param usage enum usage for the currency
+ * @param ec input-output error code
+ * @return a non-negative number of fraction digits to be
+ * displayed, or 0 if there is an error
+ * @stable ICU 54
+ */
+U_CAPI int32_t U_EXPORT2
+ucurr_getDefaultFractionDigitsForUsage(const UChar* currency,
+ const UCurrencyUsage usage,
+ UErrorCode* ec);
+
+/**
+ * Returns the rounding increment for the given currency, or 0.0 if no
+ * rounding is done by the currency.
+ * This is equivalent to ucurr_getRoundingIncrementForUsage(currency,UCURR_USAGE_STANDARD,ec);
+ * @param currency null-terminated 3-letter ISO 4217 code
+ * @param ec input-output error code
+ * @return the non-negative rounding increment, or 0.0 if none,
+ * or 0.0 if there is an error
+ * @stable ICU 3.0
+ */
+U_CAPI double U_EXPORT2
+ucurr_getRoundingIncrement(const UChar* currency,
+ UErrorCode* ec);
+
+/**
+ * Returns the rounding increment for the given currency, or 0.0 if no
+ * rounding is done by the currency given usage.
+ * @param currency null-terminated 3-letter ISO 4217 code
+ * @param usage enum usage for the currency
+ * @param ec input-output error code
+ * @return the non-negative rounding increment, or 0.0 if none,
+ * or 0.0 if there is an error
+ * @stable ICU 54
+ */
+U_CAPI double U_EXPORT2
+ucurr_getRoundingIncrementForUsage(const UChar* currency,
+ const UCurrencyUsage usage,
+ UErrorCode* ec);
+
+/**
+ * Selector constants for ucurr_openCurrencies().
+ *
+ * @see ucurr_openCurrencies
+ * @stable ICU 3.2
+ */
+typedef enum UCurrCurrencyType {
+ /**
+ * Select all ISO-4217 currency codes.
+ * @stable ICU 3.2
+ */
+ UCURR_ALL = INT32_MAX,
+ /**
+ * Select only ISO-4217 commonly used currency codes.
+ * These currencies can be found in common use, and they usually have
+ * bank notes or coins associated with the currency code.
+ * This does not include fund codes, precious metals and other
+ * various ISO-4217 codes limited to special financial products.
+ * @stable ICU 3.2
+ */
+ UCURR_COMMON = 1,
+ /**
+ * Select ISO-4217 uncommon currency codes.
+ * These codes respresent fund codes, precious metals and other
+ * various ISO-4217 codes limited to special financial products.
+ * A fund code is a monetary resource associated with a currency.
+ * @stable ICU 3.2
+ */
+ UCURR_UNCOMMON = 2,
+ /**
+ * Select only deprecated ISO-4217 codes.
+ * These codes are no longer in general public use.
+ * @stable ICU 3.2
+ */
+ UCURR_DEPRECATED = 4,
+ /**
+ * Select only non-deprecated ISO-4217 codes.
+ * These codes are in general public use.
+ * @stable ICU 3.2
+ */
+ UCURR_NON_DEPRECATED = 8
+} UCurrCurrencyType;
+
+/**
+ * Provides a UEnumeration object for listing ISO-4217 codes.
+ * @param currType You can use one of several UCurrCurrencyType values for this
+ * variable. You can also | (or) them together to get a specific list of
+ * currencies. Most people will want to use the (UCURR_COMMON|UCURR_NON_DEPRECATED) value to
+ * get a list of current currencies.
+ * @param pErrorCode Error code
+ * @stable ICU 3.2
+ */
+U_CAPI UEnumeration * U_EXPORT2
+ucurr_openISOCurrencies(uint32_t currType, UErrorCode *pErrorCode);
+
+/**
+ * Queries if the given ISO 4217 3-letter code is available on the specified date range.
+ *
+ * Note: For checking availability of a currency on a specific date, specify the date on both 'from' and 'to'
+ *
+ * When 'from' is U_DATE_MIN and 'to' is U_DATE_MAX, this method checks if the specified currency is available any time.
+ * If 'from' and 'to' are same UDate value, this method checks if the specified currency is available on that date.
+ *
+ * @param isoCode
+ * The ISO 4217 3-letter code.
+ *
+ * @param from
+ * The lower bound of the date range, inclusive. When 'from' is U_DATE_MIN, check the availability
+ * of the currency any date before 'to'
+ *
+ * @param to
+ * The upper bound of the date range, inclusive. When 'to' is U_DATE_MAX, check the availability of
+ * the currency any date after 'from'
+ *
+ * @param errorCode
+ * ICU error code
+ *
+ * @return true if the given ISO 4217 3-letter code is supported on the specified date range.
+ *
+ * @stable ICU 4.8
+ */
+U_CAPI UBool U_EXPORT2
+ucurr_isAvailable(const UChar* isoCode,
+ UDate from,
+ UDate to,
+ UErrorCode* errorCode);
+
+/**
+ * Finds the number of valid currency codes for the
+ * given locale and date.
+ * @param locale the locale for which to retrieve the
+ * currency count.
+ * @param date the date for which to retrieve the
+ * currency count for the given locale.
+ * @param ec error code
+ * @return the number of currency codes for the
+ * given locale and date. If 0, currency
+ * codes couldn't be found for the input
+ * values are invalid.
+ * @stable ICU 4.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucurr_countCurrencies(const char* locale,
+ UDate date,
+ UErrorCode* ec);
+
+/**
+ * Finds a currency code for the given locale and date
+ * @param locale the locale for which to retrieve a currency code.
+ * Currency can be specified by the "currency" keyword
+ * in which case it overrides the default currency code
+ * @param date the date for which to retrieve a currency code for
+ * the given locale.
+ * @param index the index within the available list of currency codes
+ * for the given locale on the given date.
+ * @param buff fill in buffer. Can be NULL for preflighting.
+ * @param buffCapacity capacity of the fill in buffer. Can be 0 for
+ * preflighting. If it is non-zero, the buff parameter
+ * must not be NULL.
+ * @param ec error code
+ * @return length of the currency string. It should always be 3.
+ * If 0, currency couldn't be found or the input values are
+ * invalid.
+ * @stable ICU 4.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucurr_forLocaleAndDate(const char* locale,
+ UDate date,
+ int32_t index,
+ UChar* buff,
+ int32_t buffCapacity,
+ UErrorCode* ec);
+
+/**
+ * Given a key and a locale, returns an array of string values in a preferred
+ * order that would make a difference. These are all and only those values where
+ * the open (creation) of the service with the locale formed from the input locale
+ * plus input keyword and that value has different behavior than creation with the
+ * input locale alone.
+ * @param key one of the keys supported by this service. For now, only
+ * "currency" is supported.
+ * @param locale the locale
+ * @param commonlyUsed if set to true it will return only commonly used values
+ * with the given locale in preferred order. Otherwise,
+ * it will return all the available values for the locale.
+ * @param status error status
+ * @return a string enumeration over keyword values for the given key and the locale.
+ * @stable ICU 4.2
+ */
+U_CAPI UEnumeration* U_EXPORT2
+ucurr_getKeywordValuesForLocale(const char* key,
+ const char* locale,
+ UBool commonlyUsed,
+ UErrorCode* status);
+
+/**
+ * Returns the ISO 4217 numeric code for the currency.
+ * <p>Note: If the ISO 4217 numeric code is not assigned for the currency or
+ * the currency is unknown, this function returns 0.
+ *
+ * @param currency null-terminated 3-letter ISO 4217 code
+ * @return The ISO 4217 numeric code of the currency
+ * @stable ICU 49
+ */
+U_CAPI int32_t U_EXPORT2
+ucurr_getNumericCode(const UChar* currency);
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/udata.h b/thirdparty/icu4c/common/unicode/udata.h
new file mode 100644
index 0000000000..6caa849c42
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/udata.h
@@ -0,0 +1,440 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: udata.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999oct25
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UDATA_H__
+#define __UDATA_H__
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+U_CDECL_BEGIN
+
+/**
+ * \file
+ * \brief C API: Data loading interface
+ *
+ * <h2>Information about data loading interface</h2>
+ *
+ * This API is used to find and efficiently load data for ICU and applications
+ * using ICU. It provides an abstract interface that specifies a data type and
+ * name to find and load the data. Normally this API is used by other ICU APIs
+ * to load required data out of the ICU data library, but it can be used to
+ * load data out of other places.
+ *
+ * See the User Guide Data Management chapter.
+ */
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * Character used to separate package names from tree names
+ * @internal ICU 3.0
+ */
+#define U_TREE_SEPARATOR '-'
+
+/**
+ * String used to separate package names from tree names
+ * @internal ICU 3.0
+ */
+#define U_TREE_SEPARATOR_STRING "-"
+
+/**
+ * Character used to separate parts of entry names
+ * @internal ICU 3.0
+ */
+#define U_TREE_ENTRY_SEP_CHAR '/'
+
+/**
+ * String used to separate parts of entry names
+ * @internal ICU 3.0
+ */
+#define U_TREE_ENTRY_SEP_STRING "/"
+
+/**
+ * Alias for standard ICU data
+ * @internal ICU 3.0
+ */
+#define U_ICUDATA_ALIAS "ICUDATA"
+
+#endif /* U_HIDE_INTERNAL_API */
+
+/**
+ * UDataInfo contains the properties about the requested data.
+ * This is meta data.
+ *
+ * <p>This structure may grow in the future, indicated by the
+ * <code>size</code> field.</p>
+ *
+ * <p>ICU data must be at least 8-aligned, and should be 16-aligned.
+ * The UDataInfo struct begins 4 bytes after the start of the data item,
+ * so it is 4-aligned.
+ *
+ * <p>The platform data property fields help determine if a data
+ * file can be efficiently used on a given machine.
+ * The particular fields are of importance only if the data
+ * is affected by the properties - if there is integer data
+ * with word sizes > 1 byte, char* text, or UChar* text.</p>
+ *
+ * <p>The implementation for the <code>udata_open[Choice]()</code>
+ * functions may reject data based on the value in <code>isBigEndian</code>.
+ * No other field is used by the <code>udata</code> API implementation.</p>
+ *
+ * <p>The <code>dataFormat</code> may be used to identify
+ * the kind of data, e.g. a converter table.</p>
+ *
+ * <p>The <code>formatVersion</code> field should be used to
+ * make sure that the format can be interpreted.
+ * It may be a good idea to check only for the one or two highest
+ * of the version elements to allow the data memory to
+ * get more or somewhat rearranged contents, for as long
+ * as the using code can still interpret the older contents.</p>
+ *
+ * <p>The <code>dataVersion</code> field is intended to be a
+ * common place to store the source version of the data;
+ * for data from the Unicode character database, this could
+ * reflect the Unicode version.</p>
+ *
+ * @stable ICU 2.0
+ */
+typedef struct {
+ /** sizeof(UDataInfo)
+ * @stable ICU 2.0 */
+ uint16_t size;
+
+ /** unused, set to 0
+ * @stable ICU 2.0*/
+ uint16_t reservedWord;
+
+ /* platform data properties */
+ /** 0 for little-endian machine, 1 for big-endian
+ * @stable ICU 2.0 */
+ uint8_t isBigEndian;
+
+ /** see U_CHARSET_FAMILY values in utypes.h
+ * @stable ICU 2.0*/
+ uint8_t charsetFamily;
+
+ /** sizeof(UChar), one of { 1, 2, 4 }
+ * @stable ICU 2.0*/
+ uint8_t sizeofUChar;
+
+ /** unused, set to 0
+ * @stable ICU 2.0*/
+ uint8_t reservedByte;
+
+ /** data format identifier
+ * @stable ICU 2.0*/
+ uint8_t dataFormat[4];
+
+ /** versions: [0] major [1] minor [2] milli [3] micro
+ * @stable ICU 2.0*/
+ uint8_t formatVersion[4];
+
+ /** versions: [0] major [1] minor [2] milli [3] micro
+ * @stable ICU 2.0*/
+ uint8_t dataVersion[4];
+} UDataInfo;
+
+/* API for reading data -----------------------------------------------------*/
+
+/**
+ * Forward declaration of the data memory type.
+ * @stable ICU 2.0
+ */
+typedef struct UDataMemory UDataMemory;
+
+/**
+ * Callback function for udata_openChoice().
+ * @param context parameter passed into <code>udata_openChoice()</code>.
+ * @param type The type of the data as passed into <code>udata_openChoice()</code>.
+ * It may be <code>NULL</code>.
+ * @param name The name of the data as passed into <code>udata_openChoice()</code>.
+ * @param pInfo A pointer to the <code>UDataInfo</code> structure
+ * of data that has been loaded and will be returned
+ * by <code>udata_openChoice()</code> if this function
+ * returns <code>true</code>.
+ * @return true if the current data memory is acceptable
+ * @stable ICU 2.0
+ */
+typedef UBool U_CALLCONV
+UDataMemoryIsAcceptable(void *context,
+ const char *type, const char *name,
+ const UDataInfo *pInfo);
+
+
+/**
+ * Convenience function.
+ * This function works the same as <code>udata_openChoice</code>
+ * except that any data that matches the type and name
+ * is assumed to be acceptable.
+ * @param path Specifies an absolute path and/or a basename for the
+ * finding of the data in the file system.
+ * <code>NULL</code> for ICU data.
+ * @param type A string that specifies the type of data to be loaded.
+ * For example, resource bundles are loaded with type "res",
+ * conversion tables with type "cnv".
+ * This may be <code>NULL</code> or empty.
+ * @param name A string that specifies the name of the data.
+ * @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>.
+ * @return A pointer (handle) to a data memory object, or <code>NULL</code>
+ * if an error occurs. Call <code>udata_getMemory()</code>
+ * to get a pointer to the actual data.
+ *
+ * @see udata_openChoice
+ * @stable ICU 2.0
+ */
+U_CAPI UDataMemory * U_EXPORT2
+udata_open(const char *path, const char *type, const char *name,
+ UErrorCode *pErrorCode);
+
+/**
+ * Data loading function.
+ * This function is used to find and load efficiently data for
+ * ICU and applications using ICU.
+ * It provides an abstract interface that allows to specify a data
+ * type and name to find and load the data.
+ *
+ * <p>The implementation depends on platform properties and user preferences
+ * and may involve loading shared libraries (DLLs), mapping
+ * files into memory, or fopen()/fread() files.
+ * It may also involve using static memory or database queries etc.
+ * Several or all data items may be combined into one entity
+ * (DLL, memory-mappable file).</p>
+ *
+ * <p>The data is always preceded by a header that includes
+ * a <code>UDataInfo</code> structure.
+ * The caller's <code>isAcceptable()</code> function is called to make
+ * sure that the data is useful. It may be called several times if it
+ * rejects the data and there is more than one location with data
+ * matching the type and name.</p>
+ *
+ * <p>If <code>path==NULL</code>, then ICU data is loaded.
+ * Otherwise, it is separated into a basename and a basename-less directory string.
+ * The basename is used as the data package name, and the directory is
+ * logically prepended to the ICU data directory string.</p>
+ *
+ * <p>For details about ICU data loading see the User Guide
+ * Data Management chapter. (http://icu-project.org/userguide/icudata.html)</p>
+ *
+ * @param path Specifies an absolute path and/or a basename for the
+ * finding of the data in the file system.
+ * <code>NULL</code> for ICU data.
+ * @param type A string that specifies the type of data to be loaded.
+ * For example, resource bundles are loaded with type "res",
+ * conversion tables with type "cnv".
+ * This may be <code>NULL</code> or empty.
+ * @param name A string that specifies the name of the data.
+ * @param isAcceptable This function is called to verify that loaded data
+ * is useful for the client code. If it returns false
+ * for all data items, then <code>udata_openChoice()</code>
+ * will return with an error.
+ * @param context Arbitrary parameter to be passed into isAcceptable.
+ * @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>.
+ * @return A pointer (handle) to a data memory object, or <code>NULL</code>
+ * if an error occurs. Call <code>udata_getMemory()</code>
+ * to get a pointer to the actual data.
+ * @stable ICU 2.0
+ */
+U_CAPI UDataMemory * U_EXPORT2
+udata_openChoice(const char *path, const char *type, const char *name,
+ UDataMemoryIsAcceptable *isAcceptable, void *context,
+ UErrorCode *pErrorCode);
+
+/**
+ * Close the data memory.
+ * This function must be called to allow the system to
+ * release resources associated with this data memory.
+ * @param pData The pointer to data memory object
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+udata_close(UDataMemory *pData);
+
+/**
+ * Get the pointer to the actual data inside the data memory.
+ * The data is read-only.
+ *
+ * ICU data must be at least 8-aligned, and should be 16-aligned.
+ *
+ * @param pData The pointer to data memory object
+ * @stable ICU 2.0
+ */
+U_CAPI const void * U_EXPORT2
+udata_getMemory(UDataMemory *pData);
+
+/**
+ * Get the information from the data memory header.
+ * This allows to get access to the header containing
+ * platform data properties etc. which is not part of
+ * the data itself and can therefore not be accessed
+ * via the pointer that <code>udata_getMemory()</code> returns.
+ *
+ * @param pData pointer to the data memory object
+ * @param pInfo pointer to a UDataInfo object;
+ * its <code>size</code> field must be set correctly,
+ * typically to <code>sizeof(UDataInfo)</code>.
+ *
+ * <code>*pInfo</code> will be filled with the UDataInfo structure
+ * in the data memory object. If this structure is smaller than
+ * <code>pInfo->size</code>, then the <code>size</code> will be
+ * adjusted and only part of the structure will be filled.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+udata_getInfo(UDataMemory *pData, UDataInfo *pInfo);
+
+/**
+ * This function bypasses the normal ICU data loading process and
+ * allows you to force ICU's system data to come out of a user-specified
+ * area in memory.
+ *
+ * ICU data must be at least 8-aligned, and should be 16-aligned.
+ * See https://unicode-org.github.io/icu/userguide/icudata
+ *
+ * The format of this data is that of the icu common data file, as is
+ * generated by the pkgdata tool with mode=common or mode=dll.
+ * You can read in a whole common mode file and pass the address to the start of the
+ * data, or (with the appropriate link options) pass in the pointer to
+ * the data that has been loaded from a dll by the operating system,
+ * as shown in this code:
+ *
+ * extern const char U_IMPORT U_ICUDATA_ENTRY_POINT [];
+ * // U_ICUDATA_ENTRY_POINT is same as entry point specified to pkgdata tool
+ * UErrorCode status = U_ZERO_ERROR;
+ *
+ * udata_setCommonData(&U_ICUDATA_ENTRY_POINT, &status);
+ *
+ * It is important that the declaration be as above. The entry point
+ * must not be declared as an extern void*.
+ *
+ * Starting with ICU 4.4, it is possible to set several data packages,
+ * one per call to this function.
+ * udata_open() will look for data in the multiple data packages in the order
+ * in which they were set.
+ * The position of the linked-in or default-name ICU .data package in the
+ * search list depends on when the first data item is loaded that is not contained
+ * in the already explicitly set packages.
+ * If data was loaded implicitly before the first call to this function
+ * (for example, via opening a converter, constructing a UnicodeString
+ * from default-codepage data, using formatting or collation APIs, etc.),
+ * then the default data will be first in the list.
+ *
+ * This function has no effect on application (non ICU) data. See udata_setAppData()
+ * for similar functionality for application data.
+ *
+ * @param data pointer to ICU common data
+ * @param err outgoing error status <code>U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR</code>
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+udata_setCommonData(const void *data, UErrorCode *err);
+
+
+/**
+ * This function bypasses the normal ICU data loading process for application-specific
+ * data and allows you to force the it to come out of a user-specified
+ * pointer.
+ *
+ * ICU data must be at least 8-aligned, and should be 16-aligned.
+ * See https://unicode-org.github.io/icu/userguide/icudata
+ *
+ * The format of this data is that of the icu common data file, like 'icudt26l.dat'
+ * or the corresponding shared library (DLL) file.
+ * The application must read in or otherwise construct an image of the data and then
+ * pass the address of it to this function.
+ *
+ *
+ * Warning: setAppData will set a U_USING_DEFAULT_WARNING code if
+ * data with the specifed path that has already been opened, or
+ * if setAppData with the same path has already been called.
+ * Any such calls to setAppData will have no effect.
+ *
+ *
+ * @param packageName the package name by which the application will refer
+ * to (open) this data
+ * @param data pointer to the data
+ * @param err outgoing error status <code>U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR</code>
+ * @see udata_setCommonData
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+udata_setAppData(const char *packageName, const void *data, UErrorCode *err);
+
+/**
+ * Possible settings for udata_setFileAccess()
+ * @see udata_setFileAccess
+ * @stable ICU 3.4
+ */
+typedef enum UDataFileAccess {
+ /** ICU looks for data in single files first, then in packages. (default) @stable ICU 3.4 */
+ UDATA_FILES_FIRST,
+ /** An alias for the default access mode. @stable ICU 3.4 */
+ UDATA_DEFAULT_ACCESS = UDATA_FILES_FIRST,
+ /** ICU only loads data from packages, not from single files. @stable ICU 3.4 */
+ UDATA_ONLY_PACKAGES,
+ /** ICU loads data from packages first, and only from single files
+ if the data cannot be found in a package. @stable ICU 3.4 */
+ UDATA_PACKAGES_FIRST,
+ /** ICU does not access the file system for data loading. @stable ICU 3.4 */
+ UDATA_NO_FILES,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * Number of real UDataFileAccess values.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UDATA_FILE_ACCESS_COUNT
+#endif // U_HIDE_DEPRECATED_API
+} UDataFileAccess;
+
+/**
+ * This function may be called to control how ICU loads data. It must be called
+ * before any ICU data is loaded, including application data loaded with
+ * ures/ResourceBundle or udata APIs. This function is not multithread safe.
+ * The results of calling it while other threads are loading data are undefined.
+ * @param access The type of file access to be used
+ * @param status Error code.
+ * @see UDataFileAccess
+ * @stable ICU 3.4
+ */
+U_CAPI void U_EXPORT2
+udata_setFileAccess(UDataFileAccess access, UErrorCode *status);
+
+U_CDECL_END
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUDataMemoryPointer
+ * "Smart pointer" class, closes a UDataMemory via udata_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUDataMemoryPointer, UDataMemory, udata_close);
+
+U_NAMESPACE_END
+
+#endif // U_SHOW_CPLUSPLUS_API
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/udisplaycontext.h b/thirdparty/icu4c/common/unicode/udisplaycontext.h
new file mode 100644
index 0000000000..6e14217980
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/udisplaycontext.h
@@ -0,0 +1,173 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*****************************************************************************************
+* Copyright (C) 2014-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*****************************************************************************************
+*/
+
+#ifndef UDISPLAYCONTEXT_H
+#define UDISPLAYCONTEXT_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+/**
+ * \file
+ * \brief C API: Display context types (enum values)
+ */
+
+/**
+ * Display context types, for getting values of a particular setting.
+ * Note, the specific numeric values are internal and may change.
+ * @stable ICU 51
+ */
+enum UDisplayContextType {
+ /**
+ * Type to retrieve the dialect handling setting, e.g.
+ * UDISPCTX_STANDARD_NAMES or UDISPCTX_DIALECT_NAMES.
+ * @stable ICU 51
+ */
+ UDISPCTX_TYPE_DIALECT_HANDLING = 0,
+ /**
+ * Type to retrieve the capitalization context setting, e.g.
+ * UDISPCTX_CAPITALIZATION_NONE, UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE,
+ * UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, etc.
+ * @stable ICU 51
+ */
+ UDISPCTX_TYPE_CAPITALIZATION = 1,
+ /**
+ * Type to retrieve the display length setting, e.g.
+ * UDISPCTX_LENGTH_FULL, UDISPCTX_LENGTH_SHORT.
+ * @stable ICU 54
+ */
+ UDISPCTX_TYPE_DISPLAY_LENGTH = 2,
+ /**
+ * Type to retrieve the substitute handling setting, e.g.
+ * UDISPCTX_SUBSTITUTE, UDISPCTX_NO_SUBSTITUTE.
+ * @stable ICU 58
+ */
+ UDISPCTX_TYPE_SUBSTITUTE_HANDLING = 3
+};
+/**
+* @stable ICU 51
+*/
+typedef enum UDisplayContextType UDisplayContextType;
+
+/**
+ * Display context settings.
+ * Note, the specific numeric values are internal and may change.
+ * @stable ICU 51
+ */
+enum UDisplayContext {
+ /**
+ * ================================
+ * DIALECT_HANDLING can be set to one of UDISPCTX_STANDARD_NAMES or
+ * UDISPCTX_DIALECT_NAMES. Use UDisplayContextType UDISPCTX_TYPE_DIALECT_HANDLING
+ * to get the value.
+ */
+ /**
+ * A possible setting for DIALECT_HANDLING:
+ * use standard names when generating a locale name,
+ * e.g. en_GB displays as 'English (United Kingdom)'.
+ * @stable ICU 51
+ */
+ UDISPCTX_STANDARD_NAMES = (UDISPCTX_TYPE_DIALECT_HANDLING<<8) + 0,
+ /**
+ * A possible setting for DIALECT_HANDLING:
+ * use dialect names, when generating a locale name,
+ * e.g. en_GB displays as 'British English'.
+ * @stable ICU 51
+ */
+ UDISPCTX_DIALECT_NAMES = (UDISPCTX_TYPE_DIALECT_HANDLING<<8) + 1,
+ /**
+ * ================================
+ * CAPITALIZATION can be set to one of UDISPCTX_CAPITALIZATION_NONE,
+ * UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE,
+ * UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE,
+ * UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU, or
+ * UDISPCTX_CAPITALIZATION_FOR_STANDALONE.
+ * Use UDisplayContextType UDISPCTX_TYPE_CAPITALIZATION to get the value.
+ */
+ /**
+ * The capitalization context to be used is unknown (this is the default value).
+ * @stable ICU 51
+ */
+ UDISPCTX_CAPITALIZATION_NONE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 0,
+ /**
+ * The capitalization context if a date, date symbol or display name is to be
+ * formatted with capitalization appropriate for the middle of a sentence.
+ * @stable ICU 51
+ */
+ UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 1,
+ /**
+ * The capitalization context if a date, date symbol or display name is to be
+ * formatted with capitalization appropriate for the beginning of a sentence.
+ * @stable ICU 51
+ */
+ UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 2,
+ /**
+ * The capitalization context if a date, date symbol or display name is to be
+ * formatted with capitalization appropriate for a user-interface list or menu item.
+ * @stable ICU 51
+ */
+ UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 3,
+ /**
+ * The capitalization context if a date, date symbol or display name is to be
+ * formatted with capitalization appropriate for stand-alone usage such as an
+ * isolated name on a calendar page.
+ * @stable ICU 51
+ */
+ UDISPCTX_CAPITALIZATION_FOR_STANDALONE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 4,
+ /**
+ * ================================
+ * DISPLAY_LENGTH can be set to one of UDISPCTX_LENGTH_FULL or
+ * UDISPCTX_LENGTH_SHORT. Use UDisplayContextType UDISPCTX_TYPE_DISPLAY_LENGTH
+ * to get the value.
+ */
+ /**
+ * A possible setting for DISPLAY_LENGTH:
+ * use full names when generating a locale name,
+ * e.g. "United States" for US.
+ * @stable ICU 54
+ */
+ UDISPCTX_LENGTH_FULL = (UDISPCTX_TYPE_DISPLAY_LENGTH<<8) + 0,
+ /**
+ * A possible setting for DISPLAY_LENGTH:
+ * use short names when generating a locale name,
+ * e.g. "U.S." for US.
+ * @stable ICU 54
+ */
+ UDISPCTX_LENGTH_SHORT = (UDISPCTX_TYPE_DISPLAY_LENGTH<<8) + 1,
+ /**
+ * ================================
+ * SUBSTITUTE_HANDLING can be set to one of UDISPCTX_SUBSTITUTE or
+ * UDISPCTX_NO_SUBSTITUTE. Use UDisplayContextType UDISPCTX_TYPE_SUBSTITUTE_HANDLING
+ * to get the value.
+ */
+ /**
+ * A possible setting for SUBSTITUTE_HANDLING:
+ * Returns a fallback value (e.g., the input code) when no data is available.
+ * This is the default value.
+ * @stable ICU 58
+ */
+ UDISPCTX_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 0,
+ /**
+ * A possible setting for SUBSTITUTE_HANDLING:
+ * Returns a null value with error code set to U_ILLEGAL_ARGUMENT_ERROR when no
+ * data is available.
+ * @stable ICU 58
+ */
+ UDISPCTX_NO_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 1
+
+};
+/**
+* @stable ICU 51
+*/
+typedef enum UDisplayContext UDisplayContext;
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/uenum.h b/thirdparty/icu4c/common/unicode/uenum.h
new file mode 100644
index 0000000000..d9c893e06d
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/uenum.h
@@ -0,0 +1,209 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uenum.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:2
+*
+* created on: 2002jul08
+* created by: Vladimir Weinstein
+*/
+
+#ifndef __UENUM_H
+#define __UENUM_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+
+U_NAMESPACE_BEGIN
+class StringEnumeration;
+U_NAMESPACE_END
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ * \file
+ * \brief C API: String Enumeration
+ */
+
+/**
+ * An enumeration object.
+ * For usage in C programs.
+ * @stable ICU 2.2
+ */
+struct UEnumeration;
+/** structure representing an enumeration object instance @stable ICU 2.2 */
+typedef struct UEnumeration UEnumeration;
+
+/**
+ * Disposes of resources in use by the iterator. If en is NULL,
+ * does nothing. After this call, any char* or UChar* pointer
+ * returned by uenum_unext() or uenum_next() is invalid.
+ * @param en UEnumeration structure pointer
+ * @stable ICU 2.2
+ */
+U_CAPI void U_EXPORT2
+uenum_close(UEnumeration* en);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUEnumerationPointer
+ * "Smart pointer" class, closes a UEnumeration via uenum_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUEnumerationPointer, UEnumeration, uenum_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Returns the number of elements that the iterator traverses. If
+ * the iterator is out-of-sync with its service, status is set to
+ * U_ENUM_OUT_OF_SYNC_ERROR.
+ * This is a convenience function. It can end up being very
+ * expensive as all the items might have to be pre-fetched (depending
+ * on the type of data being traversed). Use with caution and only
+ * when necessary.
+ * @param en UEnumeration structure pointer
+ * @param status error code, can be U_ENUM_OUT_OF_SYNC_ERROR if the
+ * iterator is out of sync.
+ * @return number of elements in the iterator
+ * @stable ICU 2.2
+ */
+U_CAPI int32_t U_EXPORT2
+uenum_count(UEnumeration* en, UErrorCode* status);
+
+/**
+ * Returns the next element in the iterator's list. If there are
+ * no more elements, returns NULL. If the iterator is out-of-sync
+ * with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
+ * NULL is returned. If the native service string is a char* string,
+ * it is converted to UChar* with the invariant converter.
+ * The result is terminated by (UChar)0.
+ * @param en the iterator object
+ * @param resultLength pointer to receive the length of the result
+ * (not including the terminating \\0).
+ * If the pointer is NULL it is ignored.
+ * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
+ * the iterator is out of sync with its service.
+ * @return a pointer to the string. The string will be
+ * zero-terminated. The return pointer is owned by this iterator
+ * and must not be deleted by the caller. The pointer is valid
+ * until the next call to any uenum_... method, including
+ * uenum_next() or uenum_unext(). When all strings have been
+ * traversed, returns NULL.
+ * @stable ICU 2.2
+ */
+U_CAPI const UChar* U_EXPORT2
+uenum_unext(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* status);
+
+/**
+ * Returns the next element in the iterator's list. If there are
+ * no more elements, returns NULL. If the iterator is out-of-sync
+ * with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
+ * NULL is returned. If the native service string is a UChar*
+ * string, it is converted to char* with the invariant converter.
+ * The result is terminated by (char)0. If the conversion fails
+ * (because a character cannot be converted) then status is set to
+ * U_INVARIANT_CONVERSION_ERROR and the return value is undefined
+ * (but non-NULL).
+ * @param en the iterator object
+ * @param resultLength pointer to receive the length of the result
+ * (not including the terminating \\0).
+ * If the pointer is NULL it is ignored.
+ * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
+ * the iterator is out of sync with its service. Set to
+ * U_INVARIANT_CONVERSION_ERROR if the underlying native string is
+ * UChar* and conversion to char* with the invariant converter
+ * fails. This error pertains only to current string, so iteration
+ * might be able to continue successfully.
+ * @return a pointer to the string. The string will be
+ * zero-terminated. The return pointer is owned by this iterator
+ * and must not be deleted by the caller. The pointer is valid
+ * until the next call to any uenum_... method, including
+ * uenum_next() or uenum_unext(). When all strings have been
+ * traversed, returns NULL.
+ * @stable ICU 2.2
+ */
+U_CAPI const char* U_EXPORT2
+uenum_next(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* status);
+
+/**
+ * Resets the iterator to the current list of service IDs. This
+ * re-establishes sync with the service and rewinds the iterator
+ * to start at the first element.
+ * @param en the iterator object
+ * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
+ * the iterator is out of sync with its service.
+ * @stable ICU 2.2
+ */
+U_CAPI void U_EXPORT2
+uenum_reset(UEnumeration* en, UErrorCode* status);
+
+#if U_SHOW_CPLUSPLUS_API
+
+/**
+ * Given a StringEnumeration, wrap it in a UEnumeration. The
+ * StringEnumeration is adopted; after this call, the caller must not
+ * delete it (regardless of error status).
+ * @param adopted the C++ StringEnumeration to be wrapped in a UEnumeration.
+ * @param ec the error code.
+ * @return a UEnumeration wrapping the adopted StringEnumeration.
+ * @stable ICU 4.2
+ */
+U_CAPI UEnumeration* U_EXPORT2
+uenum_openFromStringEnumeration(icu::StringEnumeration* adopted, UErrorCode* ec);
+
+#endif
+
+/**
+ * Given an array of const UChar* strings, return a UEnumeration. String pointers from 0..count-1 must not be null.
+ * Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.
+ * \snippet test/cintltst/uenumtst.c uenum_openUCharStringsEnumeration
+ * @param strings array of const UChar* strings (each null terminated). All storage is owned by the caller.
+ * @param count length of the array
+ * @param ec error code
+ * @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory.
+ * @see uenum_close
+ * @stable ICU 50
+ */
+U_CAPI UEnumeration* U_EXPORT2
+uenum_openUCharStringsEnumeration(const UChar* const strings[], int32_t count,
+ UErrorCode* ec);
+
+/**
+ * Given an array of const char* strings (invariant chars only), return a UEnumeration. String pointers from 0..count-1 must not be null.
+ * Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.
+ * \snippet test/cintltst/uenumtst.c uenum_openCharStringsEnumeration
+ * @param strings array of char* strings (each null terminated). All storage is owned by the caller.
+ * @param count length of the array
+ * @param ec error code
+ * @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory
+ * @see uenum_close
+ * @stable ICU 50
+ */
+U_CAPI UEnumeration* U_EXPORT2
+uenum_openCharStringsEnumeration(const char* const strings[], int32_t count,
+ UErrorCode* ec);
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/uidna.h b/thirdparty/icu4c/common/unicode/uidna.h
new file mode 100644
index 0000000000..24a81ceadd
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/uidna.h
@@ -0,0 +1,776 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ *******************************************************************************
+ *
+ * Copyright (C) 2003-2014, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ *
+ *******************************************************************************
+ * file name: uidna.h
+ * encoding: UTF-8
+ * tab size: 8 (not used)
+ * indentation:4
+ *
+ * created on: 2003feb1
+ * created by: Ram Viswanadha
+ */
+
+#ifndef __UIDNA_H__
+#define __UIDNA_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_IDNA
+
+#include <stdbool.h>
+#include "unicode/parseerr.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ * \file
+ * \brief C API: Internationalizing Domain Names in Applications (IDNA)
+ *
+ * IDNA2008 is implemented according to UTS #46, see the IDNA C++ class in idna.h.
+ *
+ * The C API functions which do take a UIDNA * service object pointer
+ * implement UTS #46 and IDNA2008.
+ *
+ * IDNA2003 is obsolete.
+ * The C API functions which do not take a service object pointer
+ * implement IDNA2003. They are all deprecated.
+ */
+
+/*
+ * IDNA option bit set values.
+ */
+enum {
+ /**
+ * Default options value: None of the other options are set.
+ * For use in static worker and factory methods.
+ * @stable ICU 2.6
+ */
+ UIDNA_DEFAULT=0,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * Option to allow unassigned code points in domain names and labels.
+ * For use in static worker and factory methods.
+ * <p>This option is ignored by the UTS46 implementation.
+ * (UTS #46 disallows unassigned code points.)
+ * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
+ */
+ UIDNA_ALLOW_UNASSIGNED=1,
+#endif /* U_HIDE_DEPRECATED_API */
+ /**
+ * Option to check whether the input conforms to the STD3 ASCII rules,
+ * for example the restriction of labels to LDH characters
+ * (ASCII Letters, Digits and Hyphen-Minus).
+ * For use in static worker and factory methods.
+ * @stable ICU 2.6
+ */
+ UIDNA_USE_STD3_RULES=2,
+ /**
+ * IDNA option to check for whether the input conforms to the BiDi rules.
+ * For use in static worker and factory methods.
+ * <p>This option is ignored by the IDNA2003 implementation.
+ * (IDNA2003 always performs a BiDi check.)
+ * @stable ICU 4.6
+ */
+ UIDNA_CHECK_BIDI=4,
+ /**
+ * IDNA option to check for whether the input conforms to the CONTEXTJ rules.
+ * For use in static worker and factory methods.
+ * <p>This option is ignored by the IDNA2003 implementation.
+ * (The CONTEXTJ check is new in IDNA2008.)
+ * @stable ICU 4.6
+ */
+ UIDNA_CHECK_CONTEXTJ=8,
+ /**
+ * IDNA option for nontransitional processing in ToASCII().
+ * For use in static worker and factory methods.
+ * <p>By default, ToASCII() uses transitional processing.
+ * <p>This option is ignored by the IDNA2003 implementation.
+ * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
+ * @stable ICU 4.6
+ */
+ UIDNA_NONTRANSITIONAL_TO_ASCII=0x10,
+ /**
+ * IDNA option for nontransitional processing in ToUnicode().
+ * For use in static worker and factory methods.
+ * <p>By default, ToUnicode() uses transitional processing.
+ * <p>This option is ignored by the IDNA2003 implementation.
+ * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
+ * @stable ICU 4.6
+ */
+ UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20,
+ /**
+ * IDNA option to check for whether the input conforms to the CONTEXTO rules.
+ * For use in static worker and factory methods.
+ * <p>This option is ignored by the IDNA2003 implementation.
+ * (The CONTEXTO check is new in IDNA2008.)
+ * <p>This is for use by registries for IDNA2008 conformance.
+ * UTS #46 does not require the CONTEXTO check.
+ * @stable ICU 49
+ */
+ UIDNA_CHECK_CONTEXTO=0x40
+};
+
+/**
+ * Opaque C service object type for the new IDNA API.
+ * @stable ICU 4.6
+ */
+struct UIDNA;
+typedef struct UIDNA UIDNA; /**< C typedef for struct UIDNA. @stable ICU 4.6 */
+
+/**
+ * Returns a UIDNA instance which implements UTS #46.
+ * Returns an unmodifiable instance, owned by the caller.
+ * Cache it for multiple operations, and uidna_close() it when done.
+ * The instance is thread-safe, that is, it can be used concurrently.
+ *
+ * For details about the UTS #46 implementation see the IDNA C++ class in idna.h.
+ *
+ * @param options Bit set to modify the processing and error checking.
+ * See option bit set values in uidna.h.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the UTS #46 UIDNA instance, if successful
+ * @stable ICU 4.6
+ */
+U_CAPI UIDNA * U_EXPORT2
+uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode);
+
+/**
+ * Closes a UIDNA instance.
+ * @param idna UIDNA instance to be closed
+ * @stable ICU 4.6
+ */
+U_CAPI void U_EXPORT2
+uidna_close(UIDNA *idna);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUIDNAPointer
+ * "Smart pointer" class, closes a UIDNA via uidna_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.6
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUIDNAPointer, UIDNA, uidna_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Output container for IDNA processing errors.
+ * Initialize with UIDNA_INFO_INITIALIZER:
+ * \code
+ * UIDNAInfo info = UIDNA_INFO_INITIALIZER;
+ * int32_t length = uidna_nameToASCII(..., &info, &errorCode);
+ * if(U_SUCCESS(errorCode) && info.errors!=0) { ... }
+ * \endcode
+ * @stable ICU 4.6
+ */
+typedef struct UIDNAInfo {
+ /** sizeof(UIDNAInfo) @stable ICU 4.6 */
+ int16_t size;
+ /**
+ * Set to true if transitional and nontransitional processing produce different results.
+ * For details see C++ IDNAInfo::isTransitionalDifferent().
+ * @stable ICU 4.6
+ */
+ UBool isTransitionalDifferent;
+ UBool reservedB3; /**< Reserved field, do not use. @internal */
+ /**
+ * Bit set indicating IDNA processing errors. 0 if no errors.
+ * See UIDNA_ERROR_... constants.
+ * @stable ICU 4.6
+ */
+ uint32_t errors;
+ int32_t reservedI2; /**< Reserved field, do not use. @internal */
+ int32_t reservedI3; /**< Reserved field, do not use. @internal */
+} UIDNAInfo;
+
+/**
+ * Static initializer for a UIDNAInfo struct.
+ * @stable ICU 4.6
+ */
+#define UIDNA_INFO_INITIALIZER { \
+ (int16_t)sizeof(UIDNAInfo), \
+ false, false, \
+ 0, 0, 0 }
+
+/**
+ * Converts a single domain name label into its ASCII form for DNS lookup.
+ * If any processing step fails, then pInfo->errors will be non-zero and
+ * the result might not be an ASCII string.
+ * The label might be modified according to the types of errors.
+ * Labels with severe errors will be left in (or turned into) their Unicode form.
+ *
+ * The UErrorCode indicates an error only in exceptional cases,
+ * such as a U_MEMORY_ALLOCATION_ERROR.
+ *
+ * @param idna UIDNA instance
+ * @param label Input domain name label
+ * @param length Label length, or -1 if NUL-terminated
+ * @param dest Destination string buffer
+ * @param capacity Destination buffer capacity
+ * @param pInfo Output container of IDNA processing details.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return destination string length
+ * @stable ICU 4.6
+ */
+U_CAPI int32_t U_EXPORT2
+uidna_labelToASCII(const UIDNA *idna,
+ const UChar *label, int32_t length,
+ UChar *dest, int32_t capacity,
+ UIDNAInfo *pInfo, UErrorCode *pErrorCode);
+
+/**
+ * Converts a single domain name label into its Unicode form for human-readable display.
+ * If any processing step fails, then pInfo->errors will be non-zero.
+ * The label might be modified according to the types of errors.
+ *
+ * The UErrorCode indicates an error only in exceptional cases,
+ * such as a U_MEMORY_ALLOCATION_ERROR.
+ *
+ * @param idna UIDNA instance
+ * @param label Input domain name label
+ * @param length Label length, or -1 if NUL-terminated
+ * @param dest Destination string buffer
+ * @param capacity Destination buffer capacity
+ * @param pInfo Output container of IDNA processing details.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return destination string length
+ * @stable ICU 4.6
+ */
+U_CAPI int32_t U_EXPORT2
+uidna_labelToUnicode(const UIDNA *idna,
+ const UChar *label, int32_t length,
+ UChar *dest, int32_t capacity,
+ UIDNAInfo *pInfo, UErrorCode *pErrorCode);
+
+/**
+ * Converts a whole domain name into its ASCII form for DNS lookup.
+ * If any processing step fails, then pInfo->errors will be non-zero and
+ * the result might not be an ASCII string.
+ * The domain name might be modified according to the types of errors.
+ * Labels with severe errors will be left in (or turned into) their Unicode form.
+ *
+ * The UErrorCode indicates an error only in exceptional cases,
+ * such as a U_MEMORY_ALLOCATION_ERROR.
+ *
+ * @param idna UIDNA instance
+ * @param name Input domain name
+ * @param length Domain name length, or -1 if NUL-terminated
+ * @param dest Destination string buffer
+ * @param capacity Destination buffer capacity
+ * @param pInfo Output container of IDNA processing details.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return destination string length
+ * @stable ICU 4.6
+ */
+U_CAPI int32_t U_EXPORT2
+uidna_nameToASCII(const UIDNA *idna,
+ const UChar *name, int32_t length,
+ UChar *dest, int32_t capacity,
+ UIDNAInfo *pInfo, UErrorCode *pErrorCode);
+
+/**
+ * Converts a whole domain name into its Unicode form for human-readable display.
+ * If any processing step fails, then pInfo->errors will be non-zero.
+ * The domain name might be modified according to the types of errors.
+ *
+ * The UErrorCode indicates an error only in exceptional cases,
+ * such as a U_MEMORY_ALLOCATION_ERROR.
+ *
+ * @param idna UIDNA instance
+ * @param name Input domain name
+ * @param length Domain name length, or -1 if NUL-terminated
+ * @param dest Destination string buffer
+ * @param capacity Destination buffer capacity
+ * @param pInfo Output container of IDNA processing details.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return destination string length
+ * @stable ICU 4.6
+ */
+U_CAPI int32_t U_EXPORT2
+uidna_nameToUnicode(const UIDNA *idna,
+ const UChar *name, int32_t length,
+ UChar *dest, int32_t capacity,
+ UIDNAInfo *pInfo, UErrorCode *pErrorCode);
+
+/* UTF-8 versions of the processing methods --------------------------------- */
+
+/**
+ * Converts a single domain name label into its ASCII form for DNS lookup.
+ * UTF-8 version of uidna_labelToASCII(), same behavior.
+ *
+ * @param idna UIDNA instance
+ * @param label Input domain name label
+ * @param length Label length, or -1 if NUL-terminated
+ * @param dest Destination string buffer
+ * @param capacity Destination buffer capacity
+ * @param pInfo Output container of IDNA processing details.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return destination string length
+ * @stable ICU 4.6
+ */
+U_CAPI int32_t U_EXPORT2
+uidna_labelToASCII_UTF8(const UIDNA *idna,
+ const char *label, int32_t length,
+ char *dest, int32_t capacity,
+ UIDNAInfo *pInfo, UErrorCode *pErrorCode);
+
+/**
+ * Converts a single domain name label into its Unicode form for human-readable display.
+ * UTF-8 version of uidna_labelToUnicode(), same behavior.
+ *
+ * @param idna UIDNA instance
+ * @param label Input domain name label
+ * @param length Label length, or -1 if NUL-terminated
+ * @param dest Destination string buffer
+ * @param capacity Destination buffer capacity
+ * @param pInfo Output container of IDNA processing details.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return destination string length
+ * @stable ICU 4.6
+ */
+U_CAPI int32_t U_EXPORT2
+uidna_labelToUnicodeUTF8(const UIDNA *idna,
+ const char *label, int32_t length,
+ char *dest, int32_t capacity,
+ UIDNAInfo *pInfo, UErrorCode *pErrorCode);
+
+/**
+ * Converts a whole domain name into its ASCII form for DNS lookup.
+ * UTF-8 version of uidna_nameToASCII(), same behavior.
+ *
+ * @param idna UIDNA instance
+ * @param name Input domain name
+ * @param length Domain name length, or -1 if NUL-terminated
+ * @param dest Destination string buffer
+ * @param capacity Destination buffer capacity
+ * @param pInfo Output container of IDNA processing details.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return destination string length
+ * @stable ICU 4.6
+ */
+U_CAPI int32_t U_EXPORT2
+uidna_nameToASCII_UTF8(const UIDNA *idna,
+ const char *name, int32_t length,
+ char *dest, int32_t capacity,
+ UIDNAInfo *pInfo, UErrorCode *pErrorCode);
+
+/**
+ * Converts a whole domain name into its Unicode form for human-readable display.
+ * UTF-8 version of uidna_nameToUnicode(), same behavior.
+ *
+ * @param idna UIDNA instance
+ * @param name Input domain name
+ * @param length Domain name length, or -1 if NUL-terminated
+ * @param dest Destination string buffer
+ * @param capacity Destination buffer capacity
+ * @param pInfo Output container of IDNA processing details.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return destination string length
+ * @stable ICU 4.6
+ */
+U_CAPI int32_t U_EXPORT2
+uidna_nameToUnicodeUTF8(const UIDNA *idna,
+ const char *name, int32_t length,
+ char *dest, int32_t capacity,
+ UIDNAInfo *pInfo, UErrorCode *pErrorCode);
+
+/*
+ * IDNA error bit set values.
+ * When a domain name or label fails a processing step or does not meet the
+ * validity criteria, then one or more of these error bits are set.
+ */
+enum {
+ /**
+ * A non-final domain name label (or the whole domain name) is empty.
+ * @stable ICU 4.6
+ */
+ UIDNA_ERROR_EMPTY_LABEL=1,
+ /**
+ * A domain name label is longer than 63 bytes.
+ * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
+ * This is only checked in ToASCII operations, and only if the output label is all-ASCII.
+ * @stable ICU 4.6
+ */
+ UIDNA_ERROR_LABEL_TOO_LONG=2,
+ /**
+ * A domain name is longer than 255 bytes in its storage form.
+ * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
+ * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII.
+ * @stable ICU 4.6
+ */
+ UIDNA_ERROR_DOMAIN_NAME_TOO_LONG=4,
+ /**
+ * A label starts with a hyphen-minus ('-').
+ * @stable ICU 4.6
+ */
+ UIDNA_ERROR_LEADING_HYPHEN=8,
+ /**
+ * A label ends with a hyphen-minus ('-').
+ * @stable ICU 4.6
+ */
+ UIDNA_ERROR_TRAILING_HYPHEN=0x10,
+ /**
+ * A label contains hyphen-minus ('-') in the third and fourth positions.
+ * @stable ICU 4.6
+ */
+ UIDNA_ERROR_HYPHEN_3_4=0x20,
+ /**
+ * A label starts with a combining mark.
+ * @stable ICU 4.6
+ */
+ UIDNA_ERROR_LEADING_COMBINING_MARK=0x40,
+ /**
+ * A label or domain name contains disallowed characters.
+ * @stable ICU 4.6
+ */
+ UIDNA_ERROR_DISALLOWED=0x80,
+ /**
+ * A label starts with "xn--" but does not contain valid Punycode.
+ * That is, an xn-- label failed Punycode decoding.
+ * @stable ICU 4.6
+ */
+ UIDNA_ERROR_PUNYCODE=0x100,
+ /**
+ * A label contains a dot=full stop.
+ * This can occur in an input string for a single-label function.
+ * @stable ICU 4.6
+ */
+ UIDNA_ERROR_LABEL_HAS_DOT=0x200,
+ /**
+ * An ACE label does not contain a valid label string.
+ * The label was successfully ACE (Punycode) decoded but the resulting
+ * string had severe validation errors. For example,
+ * it might contain characters that are not allowed in ACE labels,
+ * or it might not be normalized.
+ * @stable ICU 4.6
+ */
+ UIDNA_ERROR_INVALID_ACE_LABEL=0x400,
+ /**
+ * A label does not meet the IDNA BiDi requirements (for right-to-left characters).
+ * @stable ICU 4.6
+ */
+ UIDNA_ERROR_BIDI=0x800,
+ /**
+ * A label does not meet the IDNA CONTEXTJ requirements.
+ * @stable ICU 4.6
+ */
+ UIDNA_ERROR_CONTEXTJ=0x1000,
+ /**
+ * A label does not meet the IDNA CONTEXTO requirements for punctuation characters.
+ * Some punctuation characters "Would otherwise have been DISALLOWED"
+ * but are allowed in certain contexts. (RFC 5892)
+ * @stable ICU 49
+ */
+ UIDNA_ERROR_CONTEXTO_PUNCTUATION=0x2000,
+ /**
+ * A label does not meet the IDNA CONTEXTO requirements for digits.
+ * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx).
+ * @stable ICU 49
+ */
+ UIDNA_ERROR_CONTEXTO_DIGITS=0x4000
+};
+
+#ifndef U_HIDE_DEPRECATED_API
+
+/* IDNA2003 API ------------------------------------------------------------- */
+
+/**
+ * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
+ * This operation is done on <b>single labels</b> before sending it to something that expects
+ * ASCII names. A label is an individual part of a domain name. Labels are usually
+ * separated by dots; e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
+ *
+ * IDNA2003 API Overview:
+ *
+ * The uidna_ API implements the IDNA protocol as defined in the IDNA RFC
+ * (http://www.ietf.org/rfc/rfc3490.txt).
+ * The RFC defines 2 operations: ToASCII and ToUnicode. Domain name labels
+ * containing non-ASCII code points are processed by the
+ * ToASCII operation before passing it to resolver libraries. Domain names
+ * that are obtained from resolver libraries are processed by the
+ * ToUnicode operation before displaying the domain name to the user.
+ * IDNA requires that implementations process input strings with Nameprep
+ * (http://www.ietf.org/rfc/rfc3491.txt),
+ * which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt),
+ * and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt).
+ * Implementations of IDNA MUST fully implement Nameprep and Punycode;
+ * neither Nameprep nor Punycode are optional.
+ * The input and output of ToASCII and ToUnicode operations are Unicode
+ * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
+ * multiple times to an input string will yield the same result as applying the operation
+ * once.
+ * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
+ * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
+ *
+ * @param src Input UChar array containing label in Unicode.
+ * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
+ * @param dest Output UChar array with ASCII (ACE encoded) label.
+ * @param destCapacity Size of dest.
+ * @param options A bit set of options:
+ *
+ * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
+ * and do not use STD3 ASCII rules
+ * If unassigned code points are found the operation fails with
+ * U_UNASSIGNED_ERROR error code.
+ *
+ * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
+ * If this option is set, the unassigned code points are in the input
+ * are treated as normal Unicode code points.
+ *
+ * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
+ * If this option is set and the input does not satisfy STD3 rules,
+ * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *
+ * @param parseError Pointer to UParseError struct to receive information on position
+ * of error if an error is encountered. Can be NULL.
+ * @param status ICU in/out error code parameter.
+ * U_INVALID_CHAR_FOUND if src contains
+ * unmatched single surrogates.
+ * U_INDEX_OUTOFBOUNDS_ERROR if src contains
+ * too many code points.
+ * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
+ */
+U_DEPRECATED int32_t U_EXPORT2
+uidna_toASCII(const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UParseError* parseError,
+ UErrorCode* status);
+
+
+/**
+ * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
+ * This operation is done on <b>single labels</b> before sending it to something that expects
+ * Unicode names. A label is an individual part of a domain name. Labels are usually
+ * separated by dots; for e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
+ *
+ * @param src Input UChar array containing ASCII (ACE encoded) label.
+ * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
+ * @param dest Output Converted UChar array containing Unicode equivalent of label.
+ * @param destCapacity Size of dest.
+ * @param options A bit set of options:
+ *
+ * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
+ * and do not use STD3 ASCII rules
+ * If unassigned code points are found the operation fails with
+ * U_UNASSIGNED_ERROR error code.
+ *
+ * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
+ * If this option is set, the unassigned code points are in the input
+ * are treated as normal Unicode code points. <b> Note: </b> This option is
+ * required on toUnicode operation because the RFC mandates
+ * verification of decoded ACE input by applying toASCII and comparing
+ * its output with source
+ *
+ * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
+ * If this option is set and the input does not satisfy STD3 rules,
+ * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *
+ * @param parseError Pointer to UParseError struct to receive information on position
+ * of error if an error is encountered. Can be NULL.
+ * @param status ICU in/out error code parameter.
+ * U_INVALID_CHAR_FOUND if src contains
+ * unmatched single surrogates.
+ * U_INDEX_OUTOFBOUNDS_ERROR if src contains
+ * too many code points.
+ * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
+ */
+U_DEPRECATED int32_t U_EXPORT2
+uidna_toUnicode(const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UParseError* parseError,
+ UErrorCode* status);
+
+
+/**
+ * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
+ * This operation is done on complete domain names, e.g: "www.example.com".
+ * It is important to note that this operation can fail. If it fails, then the input
+ * domain name cannot be used as an Internationalized Domain Name and the application
+ * should have methods defined to deal with the failure.
+ *
+ * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
+ * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
+ * and then convert. This function does not offer that level of granularity. The options once
+ * set will apply to all labels in the domain name
+ *
+ * @param src Input UChar array containing IDN in Unicode.
+ * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
+ * @param dest Output UChar array with ASCII (ACE encoded) IDN.
+ * @param destCapacity Size of dest.
+ * @param options A bit set of options:
+ *
+ * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
+ * and do not use STD3 ASCII rules
+ * If unassigned code points are found the operation fails with
+ * U_UNASSIGNED_CODE_POINT_FOUND error code.
+ *
+ * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
+ * If this option is set, the unassigned code points are in the input
+ * are treated as normal Unicode code points.
+ *
+ * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
+ * If this option is set and the input does not satisfy STD3 rules,
+ * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *
+ * @param parseError Pointer to UParseError struct to receive information on position
+ * of error if an error is encountered. Can be NULL.
+ * @param status ICU in/out error code parameter.
+ * U_INVALID_CHAR_FOUND if src contains
+ * unmatched single surrogates.
+ * U_INDEX_OUTOFBOUNDS_ERROR if src contains
+ * too many code points.
+ * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
+ */
+U_DEPRECATED int32_t U_EXPORT2
+uidna_IDNToASCII( const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UParseError* parseError,
+ UErrorCode* status);
+
+/**
+ * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
+ * This operation is done on complete domain names, e.g: "www.example.com".
+ *
+ * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
+ * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
+ * and then convert. This function does not offer that level of granularity. The options once
+ * set will apply to all labels in the domain name
+ *
+ * @param src Input UChar array containing IDN in ASCII (ACE encoded) form.
+ * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
+ * @param dest Output UChar array containing Unicode equivalent of source IDN.
+ * @param destCapacity Size of dest.
+ * @param options A bit set of options:
+ *
+ * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
+ * and do not use STD3 ASCII rules
+ * If unassigned code points are found the operation fails with
+ * U_UNASSIGNED_CODE_POINT_FOUND error code.
+ *
+ * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
+ * If this option is set, the unassigned code points are in the input
+ * are treated as normal Unicode code points.
+ *
+ * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
+ * If this option is set and the input does not satisfy STD3 rules,
+ * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *
+ * @param parseError Pointer to UParseError struct to receive information on position
+ * of error if an error is encountered. Can be NULL.
+ * @param status ICU in/out error code parameter.
+ * U_INVALID_CHAR_FOUND if src contains
+ * unmatched single surrogates.
+ * U_INDEX_OUTOFBOUNDS_ERROR if src contains
+ * too many code points.
+ * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
+ */
+U_DEPRECATED int32_t U_EXPORT2
+uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UParseError* parseError,
+ UErrorCode* status);
+
+/**
+ * IDNA2003: Compare two IDN strings for equivalence.
+ * This function splits the domain names into labels and compares them.
+ * According to IDN RFC, whenever two labels are compared, they are
+ * considered equal if and only if their ASCII forms (obtained by
+ * applying toASCII) match using an case-insensitive ASCII comparison.
+ * Two domain names are considered a match if and only if all labels
+ * match regardless of whether label separators match.
+ *
+ * @param s1 First source string.
+ * @param length1 Length of first source string, or -1 if NUL-terminated.
+ *
+ * @param s2 Second source string.
+ * @param length2 Length of second source string, or -1 if NUL-terminated.
+ * @param options A bit set of options:
+ *
+ * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
+ * and do not use STD3 ASCII rules
+ * If unassigned code points are found the operation fails with
+ * U_UNASSIGNED_CODE_POINT_FOUND error code.
+ *
+ * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
+ * If this option is set, the unassigned code points are in the input
+ * are treated as normal Unicode code points.
+ *
+ * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
+ * If this option is set and the input does not satisfy STD3 rules,
+ * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *
+ * @param status ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return <0 or 0 or >0 as usual for string comparisons
+ * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
+ */
+U_DEPRECATED int32_t U_EXPORT2
+uidna_compare( const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ int32_t options,
+ UErrorCode* status);
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+#endif /* #if !UCONFIG_NO_IDNA */
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/uiter.h b/thirdparty/icu4c/common/unicode/uiter.h
new file mode 100644
index 0000000000..be232c774d
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/uiter.h
@@ -0,0 +1,709 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2011 International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uiter.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002jan18
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UITER_H__
+#define __UITER_H__
+
+/**
+ * \file
+ * \brief C API: Unicode Character Iteration
+ *
+ * @see UCharIterator
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+ U_NAMESPACE_BEGIN
+
+ class CharacterIterator;
+ class Replaceable;
+
+ U_NAMESPACE_END
+#endif
+
+U_CDECL_BEGIN
+
+struct UCharIterator;
+typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
+
+/**
+ * Origin constants for UCharIterator.getIndex() and UCharIterator.move().
+ * @see UCharIteratorMove
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef enum UCharIteratorOrigin {
+ UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH
+} UCharIteratorOrigin;
+
+/** Constants for UCharIterator. @stable ICU 2.6 */
+enum {
+ /**
+ * Constant value that may be returned by UCharIteratorMove
+ * indicating that the final UTF-16 index is not known, but that the move succeeded.
+ * This can occur when moving relative to limit or length, or
+ * when moving relative to the current index after a setState()
+ * when the current UTF-16 index is not known.
+ *
+ * It would be very inefficient to have to count from the beginning of the text
+ * just to get the current/limit/length index after moving relative to it.
+ * The actual index can be determined with getIndex(UITER_CURRENT)
+ * which will count the UChars if necessary.
+ *
+ * @stable ICU 2.6
+ */
+ UITER_UNKNOWN_INDEX=-2
+};
+
+
+/**
+ * Constant for UCharIterator getState() indicating an error or
+ * an unknown state.
+ * Returned by uiter_getState()/UCharIteratorGetState
+ * when an error occurs.
+ * Also, some UCharIterator implementations may not be able to return
+ * a valid state for each position. This will be clearly documented
+ * for each such iterator (none of the public ones here).
+ *
+ * @stable ICU 2.6
+ */
+#define UITER_NO_STATE ((uint32_t)0xffffffff)
+
+/**
+ * Function type declaration for UCharIterator.getIndex().
+ *
+ * Gets the current position, or the start or limit of the
+ * iteration range.
+ *
+ * This function may perform slowly for UITER_CURRENT after setState() was called,
+ * or for UITER_LENGTH, because an iterator implementation may have to count
+ * UChars if the underlying storage is not UTF-16.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param origin get the 0, start, limit, length, or current index
+ * @return the requested index, or U_SENTINEL in an error condition
+ *
+ * @see UCharIteratorOrigin
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef int32_t U_CALLCONV
+UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin);
+
+/**
+ * Function type declaration for UCharIterator.move().
+ *
+ * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index).
+ *
+ * Moves the current position relative to the start or limit of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code units forward
+ * or backward by specifying a positive or negative delta.
+ * Out of bounds movement will be pinned to the start or limit.
+ *
+ * This function may perform slowly for moving relative to UITER_LENGTH
+ * because an iterator implementation may have to count the rest of the
+ * UChars if the native storage is not UTF-16.
+ *
+ * When moving relative to the limit or length, or
+ * relative to the current position after setState() was called,
+ * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient
+ * determination of the actual UTF-16 index.
+ * The actual index can be determined with getIndex(UITER_CURRENT)
+ * which will count the UChars if necessary.
+ * See UITER_UNKNOWN_INDEX for details.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param delta can be positive, zero, or negative
+ * @param origin move relative to the 0, start, limit, length, or current index
+ * @return the new index, or U_SENTINEL on an error condition,
+ * or UITER_UNKNOWN_INDEX when the index is not known.
+ *
+ * @see UCharIteratorOrigin
+ * @see UCharIterator
+ * @see UITER_UNKNOWN_INDEX
+ * @stable ICU 2.1
+ */
+typedef int32_t U_CALLCONV
+UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin);
+
+/**
+ * Function type declaration for UCharIterator.hasNext().
+ *
+ * Check if current() and next() can still
+ * return another code unit.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return boolean value for whether current() and next() can still return another code unit
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UBool U_CALLCONV
+UCharIteratorHasNext(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.hasPrevious().
+ *
+ * Check if previous() can still return another code unit.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return boolean value for whether previous() can still return another code unit
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UBool U_CALLCONV
+UCharIteratorHasPrevious(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.current().
+ *
+ * Return the code unit at the current position,
+ * or U_SENTINEL if there is none (index is at the limit).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code unit
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UChar32 U_CALLCONV
+UCharIteratorCurrent(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.next().
+ *
+ * Return the code unit at the current index and increment
+ * the index (post-increment, like s[i++]),
+ * or return U_SENTINEL if there is none (index is at the limit).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code unit (and post-increment the current index)
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UChar32 U_CALLCONV
+UCharIteratorNext(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.previous().
+ *
+ * Decrement the index and return the code unit from there
+ * (pre-decrement, like s[--i]),
+ * or return U_SENTINEL if there is none (index is at the start).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the previous code unit (after pre-decrementing the current index)
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UChar32 U_CALLCONV
+UCharIteratorPrevious(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.reservedFn().
+ * Reserved for future use.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param something some integer argument
+ * @return some integer
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef int32_t U_CALLCONV
+UCharIteratorReserved(UCharIterator *iter, int32_t something);
+
+/**
+ * Function type declaration for UCharIterator.getState().
+ *
+ * Get the "state" of the iterator in the form of a single 32-bit word.
+ * It is recommended that the state value be calculated to be as small as
+ * is feasible. For strings with limited lengths, fewer than 32 bits may
+ * be sufficient.
+ *
+ * This is used together with setState()/UCharIteratorSetState
+ * to save and restore the iterator position more efficiently than with
+ * getIndex()/move().
+ *
+ * The iterator state is defined as a uint32_t value because it is designed
+ * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state
+ * of the character iterator.
+ *
+ * With some UCharIterator implementations (e.g., UTF-8),
+ * getting and setting the UTF-16 index with existing functions
+ * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but
+ * relatively slow because the iterator has to "walk" from a known index
+ * to the requested one.
+ * This takes more time the farther it needs to go.
+ *
+ * An opaque state value allows an iterator implementation to provide
+ * an internal index (UTF-8: the source byte array index) for
+ * fast, constant-time restoration.
+ *
+ * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
+ * the UTF-16 index may not be restored as well, but the iterator can deliver
+ * the correct text contents and move relative to the current position
+ * without performance degradation.
+ *
+ * Some UCharIterator implementations may not be able to return
+ * a valid state for each position, in which case they return UITER_NO_STATE instead.
+ * This will be clearly documented for each such iterator (none of the public ones here).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the state word
+ *
+ * @see UCharIterator
+ * @see UCharIteratorSetState
+ * @see UITER_NO_STATE
+ * @stable ICU 2.6
+ */
+typedef uint32_t U_CALLCONV
+UCharIteratorGetState(const UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.setState().
+ *
+ * Restore the "state" of the iterator using a state word from a getState() call.
+ * The iterator object need not be the same one as for which getState() was called,
+ * but it must be of the same type (set up using the same uiter_setXYZ function)
+ * and it must iterate over the same string
+ * (binary identical regardless of memory address).
+ * For more about the state word see UCharIteratorGetState.
+ *
+ * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
+ * the UTF-16 index may not be restored as well, but the iterator can deliver
+ * the correct text contents and move relative to the current position
+ * without performance degradation.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param state the state word from a getState() call
+ * on a same-type, same-string iterator
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @see UCharIterator
+ * @see UCharIteratorGetState
+ * @stable ICU 2.6
+ */
+typedef void U_CALLCONV
+UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
+
+
+/**
+ * C API for code unit iteration.
+ * This can be used as a C wrapper around
+ * CharacterIterator, Replaceable, or implemented using simple strings, etc.
+ *
+ * There are two roles for using UCharIterator:
+ *
+ * A "provider" sets the necessary function pointers and controls the "protected"
+ * fields of the UCharIterator structure. A "provider" passes a UCharIterator
+ * into C APIs that need a UCharIterator as an abstract, flexible string interface.
+ *
+ * Implementations of such C APIs are "callers" of UCharIterator functions;
+ * they only use the "public" function pointers and never access the "protected"
+ * fields directly.
+ *
+ * The current() and next() functions only check the current index against the
+ * limit, and previous() only checks the current index against the start,
+ * to see if the iterator already reached the end of the iteration range.
+ *
+ * The assumption - in all iterators - is that the index is moved via the API,
+ * which means it won't go out of bounds, or the index is modified by
+ * user code that knows enough about the iterator implementation to set valid
+ * index values.
+ *
+ * UCharIterator functions return code unit values 0..0xffff,
+ * or U_SENTINEL if the iteration bounds are reached.
+ *
+ * @stable ICU 2.1
+ */
+struct UCharIterator {
+ /**
+ * (protected) Pointer to string or wrapped object or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ const void *context;
+
+ /**
+ * (protected) Length of string or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ int32_t length;
+
+ /**
+ * (protected) Start index or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ int32_t start;
+
+ /**
+ * (protected) Current index or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ int32_t index;
+
+ /**
+ * (protected) Limit index or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ int32_t limit;
+
+ /**
+ * (protected) Used by UTF-8 iterators and possibly others.
+ * @stable ICU 2.1
+ */
+ int32_t reservedField;
+
+ /**
+ * (public) Returns the current position or the
+ * start or limit index of the iteration range.
+ *
+ * @see UCharIteratorGetIndex
+ * @stable ICU 2.1
+ */
+ UCharIteratorGetIndex *getIndex;
+
+ /**
+ * (public) Moves the current position relative to the start or limit of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code units forward
+ * or backward by specifying a positive or negative delta.
+ *
+ * @see UCharIteratorMove
+ * @stable ICU 2.1
+ */
+ UCharIteratorMove *move;
+
+ /**
+ * (public) Check if current() and next() can still
+ * return another code unit.
+ *
+ * @see UCharIteratorHasNext
+ * @stable ICU 2.1
+ */
+ UCharIteratorHasNext *hasNext;
+
+ /**
+ * (public) Check if previous() can still return another code unit.
+ *
+ * @see UCharIteratorHasPrevious
+ * @stable ICU 2.1
+ */
+ UCharIteratorHasPrevious *hasPrevious;
+
+ /**
+ * (public) Return the code unit at the current position,
+ * or U_SENTINEL if there is none (index is at the limit).
+ *
+ * @see UCharIteratorCurrent
+ * @stable ICU 2.1
+ */
+ UCharIteratorCurrent *current;
+
+ /**
+ * (public) Return the code unit at the current index and increment
+ * the index (post-increment, like s[i++]),
+ * or return U_SENTINEL if there is none (index is at the limit).
+ *
+ * @see UCharIteratorNext
+ * @stable ICU 2.1
+ */
+ UCharIteratorNext *next;
+
+ /**
+ * (public) Decrement the index and return the code unit from there
+ * (pre-decrement, like s[--i]),
+ * or return U_SENTINEL if there is none (index is at the start).
+ *
+ * @see UCharIteratorPrevious
+ * @stable ICU 2.1
+ */
+ UCharIteratorPrevious *previous;
+
+ /**
+ * (public) Reserved for future use. Currently NULL.
+ *
+ * @see UCharIteratorReserved
+ * @stable ICU 2.1
+ */
+ UCharIteratorReserved *reservedFn;
+
+ /**
+ * (public) Return the state of the iterator, to be restored later with setState().
+ * This function pointer is NULL if the iterator does not implement it.
+ *
+ * @see UCharIteratorGet
+ * @stable ICU 2.6
+ */
+ UCharIteratorGetState *getState;
+
+ /**
+ * (public) Restore the iterator state from the state word from a call
+ * to getState().
+ * This function pointer is NULL if the iterator does not implement it.
+ *
+ * @see UCharIteratorSet
+ * @stable ICU 2.6
+ */
+ UCharIteratorSetState *setState;
+};
+
+/**
+ * Helper function for UCharIterator to get the code point
+ * at the current index.
+ *
+ * Return the code point that includes the code unit at the current position,
+ * or U_SENTINEL if there is none (index is at the limit).
+ * If the current code unit is a lead or trail surrogate,
+ * then the following or preceding surrogate is used to form
+ * the code point value.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code point
+ *
+ * @see UCharIterator
+ * @see U16_GET
+ * @see UnicodeString::char32At()
+ * @stable ICU 2.1
+ */
+U_CAPI UChar32 U_EXPORT2
+uiter_current32(UCharIterator *iter);
+
+/**
+ * Helper function for UCharIterator to get the next code point.
+ *
+ * Return the code point at the current index and increment
+ * the index (post-increment, like s[i++]),
+ * or return U_SENTINEL if there is none (index is at the limit).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code point (and post-increment the current index)
+ *
+ * @see UCharIterator
+ * @see U16_NEXT
+ * @stable ICU 2.1
+ */
+U_CAPI UChar32 U_EXPORT2
+uiter_next32(UCharIterator *iter);
+
+/**
+ * Helper function for UCharIterator to get the previous code point.
+ *
+ * Decrement the index and return the code point from there
+ * (pre-decrement, like s[--i]),
+ * or return U_SENTINEL if there is none (index is at the start).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the previous code point (after pre-decrementing the current index)
+ *
+ * @see UCharIterator
+ * @see U16_PREV
+ * @stable ICU 2.1
+ */
+U_CAPI UChar32 U_EXPORT2
+uiter_previous32(UCharIterator *iter);
+
+/**
+ * Get the "state" of the iterator in the form of a single 32-bit word.
+ * This is a convenience function that calls iter->getState(iter)
+ * if iter->getState is not NULL;
+ * if it is NULL or any other error occurs, then UITER_NO_STATE is returned.
+ *
+ * Some UCharIterator implementations may not be able to return
+ * a valid state for each position, in which case they return UITER_NO_STATE instead.
+ * This will be clearly documented for each such iterator (none of the public ones here).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the state word
+ *
+ * @see UCharIterator
+ * @see UCharIteratorGetState
+ * @see UITER_NO_STATE
+ * @stable ICU 2.6
+ */
+U_CAPI uint32_t U_EXPORT2
+uiter_getState(const UCharIterator *iter);
+
+/**
+ * Restore the "state" of the iterator using a state word from a getState() call.
+ * This is a convenience function that calls iter->setState(iter, state, pErrorCode)
+ * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param state the state word from a getState() call
+ * on a same-type, same-string iterator
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @see UCharIterator
+ * @see UCharIteratorSetState
+ * @stable ICU 2.6
+ */
+U_CAPI void U_EXPORT2
+uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
+
+/**
+ * Set up a UCharIterator to iterate over a string.
+ *
+ * Sets the UCharIterator function pointers for iteration over the string s
+ * with iteration boundaries start=index=0 and length=limit=string length.
+ * The "provider" may set the start, index, and limit values at any time
+ * within the range 0..length.
+ * The length field will be ignored.
+ *
+ * The string pointer s is set into UCharIterator.context without copying
+ * or reallocating the string contents.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param s String to iterate over
+ * @param length Length of s, or -1 if NUL-terminated
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+U_CAPI void U_EXPORT2
+uiter_setString(UCharIterator *iter, const UChar *s, int32_t length);
+
+/**
+ * Set up a UCharIterator to iterate over a UTF-16BE string
+ * (byte vector with a big-endian pair of bytes per UChar).
+ *
+ * Everything works just like with a normal UChar iterator (uiter_setString),
+ * except that UChars are assembled from byte pairs,
+ * and that the length argument here indicates an even number of bytes.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param s UTF-16BE string to iterate over
+ * @param length Length of s as an even number of bytes, or -1 if NUL-terminated
+ * (NUL means pair of 0 bytes at even index from s)
+ *
+ * @see UCharIterator
+ * @see uiter_setString
+ * @stable ICU 2.6
+ */
+U_CAPI void U_EXPORT2
+uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length);
+
+/**
+ * Set up a UCharIterator to iterate over a UTF-8 string.
+ *
+ * Sets the UCharIterator function pointers for iteration over the UTF-8 string s
+ * with UTF-8 iteration boundaries 0 and length.
+ * The implementation counts the UTF-16 index on the fly and
+ * lazily evaluates the UTF-16 length of the text.
+ *
+ * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length.
+ * When the reservedField is not 0, then it contains a supplementary code point
+ * and the UTF-16 index is between the two corresponding surrogates.
+ * At that point, the UTF-8 index is behind that code point.
+ *
+ * The UTF-8 string pointer s is set into UCharIterator.context without copying
+ * or reallocating the string contents.
+ *
+ * getState() returns a state value consisting of
+ * - the current UTF-8 source byte index (bits 31..1)
+ * - a flag (bit 0) that indicates whether the UChar position is in the middle
+ * of a surrogate pair
+ * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point)
+ *
+ * getState() cannot also encode the UTF-16 index in the state value.
+ * move(relative to limit or length), or
+ * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param s UTF-8 string to iterate over
+ * @param length Length of s in bytes, or -1 if NUL-terminated
+ *
+ * @see UCharIterator
+ * @stable ICU 2.6
+ */
+U_CAPI void U_EXPORT2
+uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length);
+
+#if U_SHOW_CPLUSPLUS_API
+
+/**
+ * Set up a UCharIterator to wrap around a C++ CharacterIterator.
+ *
+ * Sets the UCharIterator function pointers for iteration using the
+ * CharacterIterator charIter.
+ *
+ * The CharacterIterator pointer charIter is set into UCharIterator.context
+ * without copying or cloning the CharacterIterator object.
+ * The other "protected" UCharIterator fields are set to 0 and will be ignored.
+ * The iteration index and boundaries are controlled by the CharacterIterator.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param charIter CharacterIterator to wrap
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+U_CAPI void U_EXPORT2
+uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter);
+
+/**
+ * Set up a UCharIterator to iterate over a C++ Replaceable.
+ *
+ * Sets the UCharIterator function pointers for iteration over the
+ * Replaceable rep with iteration boundaries start=index=0 and
+ * length=limit=rep->length().
+ * The "provider" may set the start, index, and limit values at any time
+ * within the range 0..length=rep->length().
+ * The length field will be ignored.
+ *
+ * The Replaceable pointer rep is set into UCharIterator.context without copying
+ * or cloning/reallocating the Replaceable object.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param rep Replaceable to iterate over
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+U_CAPI void U_EXPORT2
+uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep);
+
+#endif
+
+U_CDECL_END
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/uldnames.h b/thirdparty/icu4c/common/unicode/uldnames.h
new file mode 100644
index 0000000000..47b047ece9
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/uldnames.h
@@ -0,0 +1,307 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2016, International Business Machines Corporation and
+* others. All Rights Reserved.
+*******************************************************************************
+*/
+
+#ifndef __ULDNAMES_H__
+#define __ULDNAMES_H__
+
+/**
+ * \file
+ * \brief C API: Provides display names of Locale ids and their components.
+ */
+
+#include "unicode/utypes.h"
+#include "unicode/uscript.h"
+#include "unicode/udisplaycontext.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ * Enum used in LocaleDisplayNames::createInstance.
+ * @stable ICU 4.4
+ */
+typedef enum {
+ /**
+ * Use standard names when generating a locale name,
+ * e.g. en_GB displays as 'English (United Kingdom)'.
+ * @stable ICU 4.4
+ */
+ ULDN_STANDARD_NAMES = 0,
+ /**
+ * Use dialect names, when generating a locale name,
+ * e.g. en_GB displays as 'British English'.
+ * @stable ICU 4.4
+ */
+ ULDN_DIALECT_NAMES
+} UDialectHandling;
+
+/**
+ * Opaque C service object type for the locale display names API
+ * @stable ICU 4.4
+ */
+struct ULocaleDisplayNames;
+
+/**
+ * C typedef for struct ULocaleDisplayNames.
+ * @stable ICU 4.4
+ */
+typedef struct ULocaleDisplayNames ULocaleDisplayNames;
+
+#if !UCONFIG_NO_FORMATTING
+
+/**
+ * Returns an instance of LocaleDisplayNames that returns names
+ * formatted for the provided locale, using the provided
+ * dialectHandling. The usual value for dialectHandling is
+ * ULOC_STANDARD_NAMES.
+ *
+ * @param locale the display locale
+ * @param dialectHandling how to select names for locales
+ * @return a ULocaleDisplayNames instance
+ * @param pErrorCode the status code
+ * @stable ICU 4.4
+ */
+U_CAPI ULocaleDisplayNames * U_EXPORT2
+uldn_open(const char * locale,
+ UDialectHandling dialectHandling,
+ UErrorCode *pErrorCode);
+
+/**
+ * Closes a ULocaleDisplayNames instance obtained from uldn_open().
+ * @param ldn the ULocaleDisplayNames instance to be closed
+ * @stable ICU 4.4
+ */
+U_CAPI void U_EXPORT2
+uldn_close(ULocaleDisplayNames *ldn);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalULocaleDisplayNamesPointer
+ * "Smart pointer" class, closes a ULocaleDisplayNames via uldn_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalULocaleDisplayNamesPointer, ULocaleDisplayNames, uldn_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/* getters for state */
+
+/**
+ * Returns the locale used to determine the display names. This is
+ * not necessarily the same locale passed to {@link #uldn_open}.
+ * @param ldn the LocaleDisplayNames instance
+ * @return the display locale
+ * @stable ICU 4.4
+ */
+U_CAPI const char * U_EXPORT2
+uldn_getLocale(const ULocaleDisplayNames *ldn);
+
+/**
+ * Returns the dialect handling used in the display names.
+ * @param ldn the LocaleDisplayNames instance
+ * @return the dialect handling enum
+ * @stable ICU 4.4
+ */
+U_CAPI UDialectHandling U_EXPORT2
+uldn_getDialectHandling(const ULocaleDisplayNames *ldn);
+
+/* names for entire locales */
+
+/**
+ * Returns the display name of the provided locale.
+ * @param ldn the LocaleDisplayNames instance
+ * @param locale the locale whose display name to return
+ * @param result receives the display name
+ * @param maxResultSize the size of the result buffer
+ * @param pErrorCode the status code
+ * @return the actual buffer size needed for the display name. If it's
+ * greater than maxResultSize, the returned name will be truncated.
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+uldn_localeDisplayName(const ULocaleDisplayNames *ldn,
+ const char *locale,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode);
+
+/* names for components of a locale */
+
+/**
+ * Returns the display name of the provided language code.
+ * @param ldn the LocaleDisplayNames instance
+ * @param lang the language code whose display name to return
+ * @param result receives the display name
+ * @param maxResultSize the size of the result buffer
+ * @param pErrorCode the status code
+ * @return the actual buffer size needed for the display name. If it's
+ * greater than maxResultSize, the returned name will be truncated.
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+uldn_languageDisplayName(const ULocaleDisplayNames *ldn,
+ const char *lang,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode);
+
+/**
+ * Returns the display name of the provided script.
+ * @param ldn the LocaleDisplayNames instance
+ * @param script the script whose display name to return
+ * @param result receives the display name
+ * @param maxResultSize the size of the result buffer
+ * @param pErrorCode the status code
+ * @return the actual buffer size needed for the display name. If it's
+ * greater than maxResultSize, the returned name will be truncated.
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+uldn_scriptDisplayName(const ULocaleDisplayNames *ldn,
+ const char *script,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode);
+
+/**
+ * Returns the display name of the provided script code.
+ * @param ldn the LocaleDisplayNames instance
+ * @param scriptCode the script code whose display name to return
+ * @param result receives the display name
+ * @param maxResultSize the size of the result buffer
+ * @param pErrorCode the status code
+ * @return the actual buffer size needed for the display name. If it's
+ * greater than maxResultSize, the returned name will be truncated.
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+uldn_scriptCodeDisplayName(const ULocaleDisplayNames *ldn,
+ UScriptCode scriptCode,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode);
+
+/**
+ * Returns the display name of the provided region code.
+ * @param ldn the LocaleDisplayNames instance
+ * @param region the region code whose display name to return
+ * @param result receives the display name
+ * @param maxResultSize the size of the result buffer
+ * @param pErrorCode the status code
+ * @return the actual buffer size needed for the display name. If it's
+ * greater than maxResultSize, the returned name will be truncated.
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+uldn_regionDisplayName(const ULocaleDisplayNames *ldn,
+ const char *region,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode);
+
+/**
+ * Returns the display name of the provided variant
+ * @param ldn the LocaleDisplayNames instance
+ * @param variant the variant whose display name to return
+ * @param result receives the display name
+ * @param maxResultSize the size of the result buffer
+ * @param pErrorCode the status code
+ * @return the actual buffer size needed for the display name. If it's
+ * greater than maxResultSize, the returned name will be truncated.
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+uldn_variantDisplayName(const ULocaleDisplayNames *ldn,
+ const char *variant,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode);
+
+/**
+ * Returns the display name of the provided locale key
+ * @param ldn the LocaleDisplayNames instance
+ * @param key the locale key whose display name to return
+ * @param result receives the display name
+ * @param maxResultSize the size of the result buffer
+ * @param pErrorCode the status code
+ * @return the actual buffer size needed for the display name. If it's
+ * greater than maxResultSize, the returned name will be truncated.
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+uldn_keyDisplayName(const ULocaleDisplayNames *ldn,
+ const char *key,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode);
+
+/**
+ * Returns the display name of the provided value (used with the provided key).
+ * @param ldn the LocaleDisplayNames instance
+ * @param key the locale key
+ * @param value the locale key's value
+ * @param result receives the display name
+ * @param maxResultSize the size of the result buffer
+ * @param pErrorCode the status code
+ * @return the actual buffer size needed for the display name. If it's
+ * greater than maxResultSize, the returned name will be truncated.
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+uldn_keyValueDisplayName(const ULocaleDisplayNames *ldn,
+ const char *key,
+ const char *value,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode);
+
+/**
+* Returns an instance of LocaleDisplayNames that returns names formatted
+* for the provided locale, using the provided UDisplayContext settings.
+*
+* @param locale The display locale
+* @param contexts List of one or more context settings (e.g. for dialect
+* handling, capitalization, etc.
+* @param length Number of items in the contexts list
+* @param pErrorCode Pointer to UErrorCode input/output status. If at entry this indicates
+* a failure status, the function will do nothing; otherwise this will be
+* updated with any new status from the function.
+* @return a ULocaleDisplayNames instance
+* @stable ICU 51
+*/
+U_CAPI ULocaleDisplayNames * U_EXPORT2
+uldn_openForContext(const char * locale, UDisplayContext *contexts,
+ int32_t length, UErrorCode *pErrorCode);
+
+/**
+* Returns the UDisplayContext value for the specified UDisplayContextType.
+* @param ldn the ULocaleDisplayNames instance
+* @param type the UDisplayContextType whose value to return
+* @param pErrorCode Pointer to UErrorCode input/output status. If at entry this indicates
+* a failure status, the function will do nothing; otherwise this will be
+* updated with any new status from the function.
+* @return the UDisplayContextValue for the specified type.
+* @stable ICU 51
+*/
+U_CAPI UDisplayContext U_EXPORT2
+uldn_getContext(const ULocaleDisplayNames *ldn, UDisplayContextType type,
+ UErrorCode *pErrorCode);
+
+#endif /* !UCONFIG_NO_FORMATTING */
+#endif /* __ULDNAMES_H__ */
diff --git a/thirdparty/icu4c/common/unicode/uloc.h b/thirdparty/icu4c/common/unicode/uloc.h
new file mode 100644
index 0000000000..3addb847e7
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/uloc.h
@@ -0,0 +1,1393 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1997-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File ULOC.H
+*
+* Modification History:
+*
+* Date Name Description
+* 04/01/97 aliu Creation.
+* 08/22/98 stephen JDK 1.2 sync.
+* 12/08/98 rtg New C API for Locale
+* 03/30/99 damiba overhaul
+* 03/31/99 helena Javadoc for uloc functions.
+* 04/15/99 Madhu Updated Javadoc
+********************************************************************************
+*/
+
+#ifndef ULOC_H
+#define ULOC_H
+
+#include "unicode/utypes.h"
+#include "unicode/uenum.h"
+
+/**
+ * \file
+ * \brief C API: Locale
+ *
+ * <h2> ULoc C API for Locale </h2>
+ * A <code>Locale</code> represents a specific geographical, political,
+ * or cultural region. An operation that requires a <code>Locale</code> to perform
+ * its task is called <em>locale-sensitive</em> and uses the <code>Locale</code>
+ * to tailor information for the user. For example, displaying a number
+ * is a locale-sensitive operation--the number should be formatted
+ * according to the customs/conventions of the user's native country,
+ * region, or culture. In the C APIs, a locales is simply a const char string.
+ *
+ * <P>
+ * You create a <code>Locale</code> with one of the three options listed below.
+ * Each of the component is separated by '_' in the locale string.
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * \code
+ * newLanguage
+ *
+ * newLanguage + newCountry
+ *
+ * newLanguage + newCountry + newVariant
+ * \endcode
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ * The first option is a valid <STRONG>ISO
+ * Language Code.</STRONG> These codes are the lower-case two-letter
+ * codes as defined by ISO-639.
+ * You can find a full list of these codes at a number of sites, such as:
+ * <BR><a href ="http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt">
+ * http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt</a>
+ *
+ * <P>
+ * The second option includes an additional <STRONG>ISO Country
+ * Code.</STRONG> These codes are the upper-case two-letter codes
+ * as defined by ISO-3166.
+ * You can find a full list of these codes at a number of sites, such as:
+ * <BR><a href="http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html">
+ * http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html</a>
+ *
+ * <P>
+ * The third option requires another additional information--the
+ * <STRONG>Variant.</STRONG>
+ * The Variant codes are vendor and browser-specific.
+ * For example, use WIN for Windows, MAC for Macintosh, and POSIX for POSIX.
+ * Where there are two variants, separate them with an underscore, and
+ * put the most important one first. For
+ * example, a Traditional Spanish collation might be referenced, with
+ * "ES", "ES", "Traditional_WIN".
+ *
+ * <P>
+ * Because a <code>Locale</code> is just an identifier for a region,
+ * no validity check is performed when you specify a <code>Locale</code>.
+ * If you want to see whether particular resources are available for the
+ * <code>Locale</code> you asked for, you must query those resources. For
+ * example, ask the <code>UNumberFormat</code> for the locales it supports
+ * using its <code>getAvailable</code> method.
+ * <BR><STRONG>Note:</STRONG> When you ask for a resource for a particular
+ * locale, you get back the best available match, not necessarily
+ * precisely what you asked for. For more information, look at
+ * <code>UResourceBundle</code>.
+ *
+ * <P>
+ * The <code>Locale</code> provides a number of convenient constants
+ * that you can use to specify the commonly used
+ * locales. For example, the following refers to a locale
+ * for the United States:
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * \code
+ * ULOC_US
+ * \endcode
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ *
+ * <P>
+ * Once you've specified a locale you can query it for information about
+ * itself. Use <code>uloc_getCountry</code> to get the ISO Country Code and
+ * <code>uloc_getLanguage</code> to get the ISO Language Code. You can
+ * use <code>uloc_getDisplayCountry</code> to get the
+ * name of the country suitable for displaying to the user. Similarly,
+ * you can use <code>uloc_getDisplayLanguage</code> to get the name of
+ * the language suitable for displaying to the user. Interestingly,
+ * the <code>uloc_getDisplayXXX</code> methods are themselves locale-sensitive
+ * and have two versions: one that uses the default locale and one
+ * that takes a locale as an argument and displays the name or country in
+ * a language appropriate to that locale.
+ *
+ * <P>
+ * The ICU provides a number of services that perform locale-sensitive
+ * operations. For example, the <code>unum_xxx</code> functions format
+ * numbers, currency, or percentages in a locale-sensitive manner.
+ * </P>
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * \code
+ * UErrorCode success = U_ZERO_ERROR;
+ * UNumberFormat *nf;
+ * const char* myLocale = "fr_FR";
+ *
+ * nf = unum_open( UNUM_DEFAULT, NULL, success );
+ * unum_close(nf);
+ * nf = unum_open( UNUM_CURRENCY, NULL, success );
+ * unum_close(nf);
+ * nf = unum_open( UNUM_PERCENT, NULL, success );
+ * unum_close(nf);
+ * \endcode
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ * Each of these methods has two variants; one with an explicit locale
+ * and one without; the latter using the default locale.
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * \code
+ *
+ * nf = unum_open( UNUM_DEFAULT, myLocale, success );
+ * unum_close(nf);
+ * nf = unum_open( UNUM_CURRENCY, myLocale, success );
+ * unum_close(nf);
+ * nf = unum_open( UNUM_PERCENT, myLocale, success );
+ * unum_close(nf);
+ * \endcode
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ * A <code>Locale</code> is the mechanism for identifying the kind of services
+ * (<code>UNumberFormat</code>) that you would like to get. The locale is
+ * <STRONG>just</STRONG> a mechanism for identifying these services.
+ *
+ * <P>
+ * Each international service that performs locale-sensitive operations
+ * allows you
+ * to get all the available objects of that type. You can sift
+ * through these objects by language, country, or variant,
+ * and use the display names to present a menu to the user.
+ * For example, you can create a menu of all the collation objects
+ * suitable for a given language. Such classes implement these
+ * three class methods:
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * \code
+ * const char* uloc_getAvailable(int32_t index);
+ * int32_t uloc_countAvailable();
+ * int32_t
+ * uloc_getDisplayName(const char* localeID,
+ * const char* inLocaleID,
+ * UChar* result,
+ * int32_t maxResultSize,
+ * UErrorCode* err);
+ *
+ * \endcode
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ * <P>
+ * Concerning POSIX/RFC1766 Locale IDs,
+ * the getLanguage/getCountry/getVariant/getName functions do understand
+ * the POSIX type form of language_COUNTRY.ENCODING\@VARIANT
+ * and if there is not an ICU-stype variant, uloc_getVariant() for example
+ * will return the one listed after the \@at sign. As well, the hyphen
+ * "-" is recognized as a country/variant separator similarly to RFC1766.
+ * So for example, "en-us" will be interpreted as en_US.
+ * As a result, uloc_getName() is far from a no-op, and will have the
+ * effect of converting POSIX/RFC1766 IDs into ICU form, although it does
+ * NOT map any of the actual codes (i.e. russian->ru) in any way.
+ * Applications should call uloc_getName() at the point where a locale ID
+ * is coming from an external source (user entry, OS, web browser)
+ * and pass the resulting string to other ICU functions. For example,
+ * don't use de-de\@EURO as an argument to resourcebundle.
+ *
+ * @see UResourceBundle
+ */
+
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_CHINESE "zh"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_ENGLISH "en"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_FRENCH "fr"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_GERMAN "de"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_ITALIAN "it"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_JAPANESE "ja"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_KOREAN "ko"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_SIMPLIFIED_CHINESE "zh_CN"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_TRADITIONAL_CHINESE "zh_TW"
+
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_CANADA "en_CA"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_CANADA_FRENCH "fr_CA"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_CHINA "zh_CN"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_PRC "zh_CN"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_FRANCE "fr_FR"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_GERMANY "de_DE"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_ITALY "it_IT"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_JAPAN "ja_JP"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_KOREA "ko_KR"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_TAIWAN "zh_TW"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_UK "en_GB"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_US "en_US"
+
+/**
+ * Useful constant for the maximum size of the language part of a locale ID.
+ * (including the terminating NULL).
+ * @stable ICU 2.0
+ */
+#define ULOC_LANG_CAPACITY 12
+
+/**
+ * Useful constant for the maximum size of the country part of a locale ID
+ * (including the terminating NULL).
+ * @stable ICU 2.0
+ */
+#define ULOC_COUNTRY_CAPACITY 4
+/**
+ * Useful constant for the maximum size of the whole locale ID
+ * (including the terminating NULL and all keywords).
+ * @stable ICU 2.0
+ */
+#define ULOC_FULLNAME_CAPACITY 157
+
+/**
+ * Useful constant for the maximum size of the script part of a locale ID
+ * (including the terminating NULL).
+ * @stable ICU 2.8
+ */
+#define ULOC_SCRIPT_CAPACITY 6
+
+/**
+ * Useful constant for the maximum size of keywords in a locale
+ * @stable ICU 2.8
+ */
+#define ULOC_KEYWORDS_CAPACITY 96
+
+/**
+ * Useful constant for the maximum total size of keywords and their values in a locale
+ * @stable ICU 2.8
+ */
+#define ULOC_KEYWORD_AND_VALUES_CAPACITY 100
+
+/**
+ * Invariant character separating keywords from the locale string
+ * @stable ICU 2.8
+ */
+#define ULOC_KEYWORD_SEPARATOR '@'
+
+/**
+ * Unicode code point for '@' separating keywords from the locale string.
+ * @see ULOC_KEYWORD_SEPARATOR
+ * @stable ICU 4.6
+ */
+#define ULOC_KEYWORD_SEPARATOR_UNICODE 0x40
+
+/**
+ * Invariant character for assigning value to a keyword
+ * @stable ICU 2.8
+ */
+#define ULOC_KEYWORD_ASSIGN '='
+
+/**
+ * Unicode code point for '=' for assigning value to a keyword.
+ * @see ULOC_KEYWORD_ASSIGN
+ * @stable ICU 4.6
+ */
+#define ULOC_KEYWORD_ASSIGN_UNICODE 0x3D
+
+/**
+ * Invariant character separating keywords
+ * @stable ICU 2.8
+ */
+#define ULOC_KEYWORD_ITEM_SEPARATOR ';'
+
+/**
+ * Unicode code point for ';' separating keywords
+ * @see ULOC_KEYWORD_ITEM_SEPARATOR
+ * @stable ICU 4.6
+ */
+#define ULOC_KEYWORD_ITEM_SEPARATOR_UNICODE 0x3B
+
+/**
+ * Constants for *_getLocale()
+ * Allow user to select whether she wants information on
+ * requested, valid or actual locale.
+ * For example, a collator for "en_US_CALIFORNIA" was
+ * requested. In the current state of ICU (2.0),
+ * the requested locale is "en_US_CALIFORNIA",
+ * the valid locale is "en_US" (most specific locale supported by ICU)
+ * and the actual locale is "root" (the collation data comes unmodified
+ * from the UCA)
+ * The locale is considered supported by ICU if there is a core ICU bundle
+ * for that locale (although it may be empty).
+ * @stable ICU 2.1
+ */
+typedef enum {
+ /** This is locale the data actually comes from
+ * @stable ICU 2.1
+ */
+ ULOC_ACTUAL_LOCALE = 0,
+ /** This is the most specific locale supported by ICU
+ * @stable ICU 2.1
+ */
+ ULOC_VALID_LOCALE = 1,
+
+#ifndef U_HIDE_DEPRECATED_API
+ /** This is the requested locale
+ * @deprecated ICU 2.8
+ */
+ ULOC_REQUESTED_LOCALE = 2,
+
+ /**
+ * One more than the highest normal ULocDataLocaleType value.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ ULOC_DATA_LOCALE_TYPE_LIMIT = 3
+#endif // U_HIDE_DEPRECATED_API
+} ULocDataLocaleType;
+
+#ifndef U_HIDE_SYSTEM_API
+/**
+ * Gets ICU's default locale.
+ * The returned string is a snapshot in time, and will remain valid
+ * and unchanged even when uloc_setDefault() is called.
+ * The returned storage is owned by ICU, and must not be altered or deleted
+ * by the caller.
+ *
+ * @return the ICU default locale
+ * @system
+ * @stable ICU 2.0
+ */
+U_CAPI const char* U_EXPORT2
+uloc_getDefault(void);
+
+/**
+ * Sets ICU's default locale.
+ * By default (without calling this function), ICU's default locale will be based
+ * on information obtained from the underlying system environment.
+ * <p>
+ * Changes to ICU's default locale do not propagate back to the
+ * system environment.
+ * <p>
+ * Changes to ICU's default locale to not affect any ICU services that
+ * may already be open based on the previous default locale value.
+ *
+ * @param localeID the new ICU default locale. A value of NULL will try to get
+ * the system's default locale.
+ * @param status the error information if the setting of default locale fails
+ * @system
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+uloc_setDefault(const char* localeID,
+ UErrorCode* status);
+#endif /* U_HIDE_SYSTEM_API */
+
+/**
+ * Gets the language code for the specified locale.
+ *
+ * @param localeID the locale to get the ISO language code with
+ * @param language the language code for localeID
+ * @param languageCapacity the size of the language buffer to store the
+ * language code with
+ * @param err error information if retrieving the language code failed
+ * @return the actual buffer size needed for the language code. If it's greater
+ * than languageCapacity, the returned language code will be truncated.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getLanguage(const char* localeID,
+ char* language,
+ int32_t languageCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets the script code for the specified locale.
+ *
+ * @param localeID the locale to get the ISO language code with
+ * @param script the language code for localeID
+ * @param scriptCapacity the size of the language buffer to store the
+ * language code with
+ * @param err error information if retrieving the language code failed
+ * @return the actual buffer size needed for the language code. If it's greater
+ * than scriptCapacity, the returned language code will be truncated.
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getScript(const char* localeID,
+ char* script,
+ int32_t scriptCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets the country code for the specified locale.
+ *
+ * @param localeID the locale to get the country code with
+ * @param country the country code for localeID
+ * @param countryCapacity the size of the country buffer to store the
+ * country code with
+ * @param err error information if retrieving the country code failed
+ * @return the actual buffer size needed for the country code. If it's greater
+ * than countryCapacity, the returned country code will be truncated.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getCountry(const char* localeID,
+ char* country,
+ int32_t countryCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets the variant code for the specified locale.
+ *
+ * @param localeID the locale to get the variant code with
+ * @param variant the variant code for localeID
+ * @param variantCapacity the size of the variant buffer to store the
+ * variant code with
+ * @param err error information if retrieving the variant code failed
+ * @return the actual buffer size needed for the variant code. If it's greater
+ * than variantCapacity, the returned variant code will be truncated.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getVariant(const char* localeID,
+ char* variant,
+ int32_t variantCapacity,
+ UErrorCode* err);
+
+
+/**
+ * Gets the full name for the specified locale.
+ * Note: This has the effect of 'canonicalizing' the ICU locale ID to
+ * a certain extent. Upper and lower case are set as needed.
+ * It does NOT map aliased names in any way.
+ * See the top of this header file.
+ * This API supports preflighting.
+ *
+ * @param localeID the locale to get the full name with
+ * @param name fill in buffer for the name without keywords.
+ * @param nameCapacity capacity of the fill in buffer.
+ * @param err error information if retrieving the full name failed
+ * @return the actual buffer size needed for the full name. If it's greater
+ * than nameCapacity, the returned full name will be truncated.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getName(const char* localeID,
+ char* name,
+ int32_t nameCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets the full name for the specified locale.
+ * Note: This has the effect of 'canonicalizing' the string to
+ * a certain extent. Upper and lower case are set as needed,
+ * and if the components were in 'POSIX' format they are changed to
+ * ICU format. It does NOT map aliased names in any way.
+ * See the top of this header file.
+ *
+ * @param localeID the locale to get the full name with
+ * @param name the full name for localeID
+ * @param nameCapacity the size of the name buffer to store the
+ * full name with
+ * @param err error information if retrieving the full name failed
+ * @return the actual buffer size needed for the full name. If it's greater
+ * than nameCapacity, the returned full name will be truncated.
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_canonicalize(const char* localeID,
+ char* name,
+ int32_t nameCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets the ISO language code for the specified locale.
+ *
+ * @param localeID the locale to get the ISO language code with
+ * @return language the ISO language code for localeID
+ * @stable ICU 2.0
+ */
+U_CAPI const char* U_EXPORT2
+uloc_getISO3Language(const char* localeID);
+
+
+/**
+ * Gets the ISO country code for the specified locale.
+ *
+ * @param localeID the locale to get the ISO country code with
+ * @return country the ISO country code for localeID
+ * @stable ICU 2.0
+ */
+U_CAPI const char* U_EXPORT2
+uloc_getISO3Country(const char* localeID);
+
+/**
+ * Gets the Win32 LCID value for the specified locale.
+ * If the ICU locale is not recognized by Windows, 0 will be returned.
+ *
+ * LCIDs were deprecated with Windows Vista and Microsoft recommends
+ * that developers use BCP47 style tags instead (uloc_toLanguageTag).
+ *
+ * @param localeID the locale to get the Win32 LCID value with
+ * @return country the Win32 LCID for localeID
+ * @stable ICU 2.0
+ */
+U_CAPI uint32_t U_EXPORT2
+uloc_getLCID(const char* localeID);
+
+/**
+ * Gets the language name suitable for display for the specified locale.
+ *
+ * @param locale the locale to get the ISO language code with
+ * @param displayLocale Specifies the locale to be used to display the name. In
+ * other words, if the locale's language code is "en", passing
+ * Locale::getFrench() for inLocale would result in "Anglais",
+ * while passing Locale::getGerman() for inLocale would result
+ * in "Englisch".
+ * @param language the displayable language code for localeID
+ * @param languageCapacity the size of the language buffer to store the
+ * displayable language code with.
+ * @param status error information if retrieving the displayable language code
+ * failed. U_USING_DEFAULT_WARNING indicates that no data was
+ * found from the locale resources and a case canonicalized
+ * language code is placed into language as fallback.
+ * @return the actual buffer size needed for the displayable language code. If
+ * it's greater than languageCapacity, the returned language
+ * code will be truncated.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayLanguage(const char* locale,
+ const char* displayLocale,
+ UChar* language,
+ int32_t languageCapacity,
+ UErrorCode* status);
+
+/**
+ * Gets the script name suitable for display for the specified locale.
+ *
+ * @param locale the locale to get the displayable script code with. NULL may be
+ * used to specify the default.
+ * @param displayLocale Specifies the locale to be used to display the name. In
+ * other words, if the locale's language code is "en", passing
+ * Locale::getFrench() for inLocale would result in "", while
+ * passing Locale::getGerman() for inLocale would result in "".
+ * NULL may be used to specify the default.
+ * @param script the displayable script for the localeID.
+ * @param scriptCapacity the size of the script buffer to store the displayable
+ * script code with.
+ * @param status error information if retrieving the displayable script code
+ * failed. U_USING_DEFAULT_WARNING indicates that no data was
+ * found from the locale resources and a case canonicalized
+ * script code is placed into script as fallback.
+ * @return the actual buffer size needed for the displayable script code. If
+ * it's greater than scriptCapacity, the returned displayable
+ * script code will be truncated.
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayScript(const char* locale,
+ const char* displayLocale,
+ UChar* script,
+ int32_t scriptCapacity,
+ UErrorCode* status);
+
+/**
+ * Gets the country name suitable for display for the specified locale.
+ * Warning: this is for the region part of a valid locale ID; it cannot just be
+ * the region code (like "FR"). To get the display name for a region alone, or
+ * for other options, use ULocaleDisplayNames instead.
+ *
+ * @param locale the locale to get the displayable country code with. NULL may
+ * be used to specify the default.
+ * @param displayLocale Specifies the locale to be used to display the name. In
+ * other words, if the locale's language code is "en", passing
+ * Locale::getFrench() for inLocale would result in "Anglais",
+ * while passing Locale::getGerman() for inLocale would result
+ * in "Englisch". NULL may be used to specify the default.
+ * @param country the displayable country code for localeID.
+ * @param countryCapacity the size of the country buffer to store the
+ * displayable country code with.
+ * @param status error information if retrieving the displayable country code
+ * failed. U_USING_DEFAULT_WARNING indicates that no data was
+ * found from the locale resources and a case canonicalized
+ * country code is placed into country as fallback.
+ * @return the actual buffer size needed for the displayable country code. If
+ * it's greater than countryCapacity, the returned displayable
+ * country code will be truncated.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayCountry(const char* locale,
+ const char* displayLocale,
+ UChar* country,
+ int32_t countryCapacity,
+ UErrorCode* status);
+
+
+/**
+ * Gets the variant name suitable for display for the specified locale.
+ *
+ * @param locale the locale to get the displayable variant code with. NULL may
+ * be used to specify the default.
+ * @param displayLocale Specifies the locale to be used to display the name. In
+ * other words, if the locale's language code is "en", passing
+ * Locale::getFrench() for inLocale would result in "Anglais",
+ * while passing Locale::getGerman() for inLocale would result
+ * in "Englisch". NULL may be used to specify the default.
+ * @param variant the displayable variant code for localeID.
+ * @param variantCapacity the size of the variant buffer to store the
+ * displayable variant code with.
+ * @param status error information if retrieving the displayable variant code
+ * failed. U_USING_DEFAULT_WARNING indicates that no data was
+ * found from the locale resources and a case canonicalized
+ * variant code is placed into variant as fallback.
+ * @return the actual buffer size needed for the displayable variant code. If
+ * it's greater than variantCapacity, the returned displayable
+ * variant code will be truncated.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayVariant(const char* locale,
+ const char* displayLocale,
+ UChar* variant,
+ int32_t variantCapacity,
+ UErrorCode* status);
+
+/**
+ * Gets the keyword name suitable for display for the specified locale. E.g:
+ * for the locale string de_DE\@collation=PHONEBOOK, this API gets the display
+ * string for the keyword collation.
+ * Usage:
+ * <code>
+ * UErrorCode status = U_ZERO_ERROR;
+ * const char* keyword =NULL;
+ * int32_t keywordLen = 0;
+ * int32_t keywordCount = 0;
+ * UChar displayKeyword[256];
+ * int32_t displayKeywordLen = 0;
+ * UEnumeration* keywordEnum = uloc_openKeywords("de_DE@collation=PHONEBOOK;calendar=TRADITIONAL", &status);
+ * for(keywordCount = uenum_count(keywordEnum, &status); keywordCount > 0 ; keywordCount--){
+ * if(U_FAILURE(status)){
+ * ...something went wrong so handle the error...
+ * break;
+ * }
+ * // the uenum_next returns NUL terminated string
+ * keyword = uenum_next(keywordEnum, &keywordLen, &status);
+ * displayKeywordLen = uloc_getDisplayKeyword(keyword, "en_US", displayKeyword, 256);
+ * ... do something interesting .....
+ * }
+ * uenum_close(keywordEnum);
+ * </code>
+ * @param keyword The keyword whose display string needs to be returned.
+ * @param displayLocale Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * inLocale would result in "Anglais", while passing Locale::getGerman()
+ * for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param dest the buffer to which the displayable keyword should be written.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param status error information if retrieving the displayable string failed.
+ * Should not be NULL and should not indicate failure on entry.
+ * U_USING_DEFAULT_WARNING indicates that no data was found from the locale
+ * resources and the keyword is placed into dest as fallback.
+ * @return the actual buffer size needed for the displayable variant code.
+ * @see #uloc_openKeywords
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayKeyword(const char* keyword,
+ const char* displayLocale,
+ UChar* dest,
+ int32_t destCapacity,
+ UErrorCode* status);
+/**
+ * Gets the value of the keyword suitable for display for the specified locale.
+ * E.g: for the locale string de_DE\@collation=PHONEBOOK, this API gets the display
+ * string for PHONEBOOK, in the display locale, when "collation" is specified as the keyword.
+ *
+ * @param locale The locale to get the displayable variant code with. NULL may be used to specify the default.
+ * @param keyword The keyword for whose value should be used.
+ * @param displayLocale Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * inLocale would result in "Anglais", while passing Locale::getGerman()
+ * for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param dest the buffer to which the displayable keyword should be written.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param status error information if retrieving the displayable string failed.
+ * Should not be NULL and must not indicate failure on entry.
+ * U_USING_DEFAULT_WARNING indicates that no data was found from the locale
+ * resources and the value of the keyword is placed into dest as fallback.
+ * @return the actual buffer size needed for the displayable variant code.
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayKeywordValue( const char* locale,
+ const char* keyword,
+ const char* displayLocale,
+ UChar* dest,
+ int32_t destCapacity,
+ UErrorCode* status);
+/**
+ * Gets the full name suitable for display for the specified locale.
+ *
+ * @param localeID the locale to get the displayable name with. NULL may be used to specify the default.
+ * @param inLocaleID Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * inLocale would result in "Anglais", while passing Locale::getGerman()
+ * for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param result the displayable name for localeID
+ * @param maxResultSize the size of the name buffer to store the
+ * displayable full name with
+ * @param err error information if retrieving the displayable name failed
+ * @return the actual buffer size needed for the displayable name. If it's greater
+ * than maxResultSize, the returned displayable name will be truncated.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayName(const char* localeID,
+ const char* inLocaleID,
+ UChar* result,
+ int32_t maxResultSize,
+ UErrorCode* err);
+
+
+/**
+ * Gets the specified locale from a list of available locales.
+ *
+ * This method corresponds to uloc_openAvailableByType called with the
+ * ULOC_AVAILABLE_DEFAULT type argument.
+ *
+ * The return value is a pointer to an item of a locale name array. Both this
+ * array and the pointers it contains are owned by ICU and should not be
+ * deleted or written through by the caller. The locale name is terminated by
+ * a null pointer.
+ *
+ * @param n the specific locale name index of the available locale list;
+ * should not exceed the number returned by uloc_countAvailable.
+ * @return a specified locale name of all available locales
+ * @stable ICU 2.0
+ */
+U_CAPI const char* U_EXPORT2
+uloc_getAvailable(int32_t n);
+
+/**
+ * Gets the size of the all available locale list.
+ *
+ * @return the size of the locale list
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2 uloc_countAvailable(void);
+
+/**
+ * Types for uloc_getAvailableByType and uloc_countAvailableByType.
+ *
+ * @stable ICU 65
+ */
+typedef enum ULocAvailableType {
+ /**
+ * Locales that return data when passed to ICU APIs,
+ * but not including legacy or alias locales.
+ *
+ * @stable ICU 65
+ */
+ ULOC_AVAILABLE_DEFAULT,
+
+ /**
+ * Legacy or alias locales that return data when passed to ICU APIs.
+ * Examples of supported legacy or alias locales:
+ *
+ * - iw (alias to he)
+ * - mo (alias to ro)
+ * - zh_CN (alias to zh_Hans_CN)
+ * - sr_BA (alias to sr_Cyrl_BA)
+ * - ars (alias to ar_SA)
+ *
+ * The locales in this set are disjoint from the ones in
+ * ULOC_AVAILABLE_DEFAULT. To get both sets at the same time, use
+ * ULOC_AVAILABLE_WITH_LEGACY_ALIASES.
+ *
+ * @stable ICU 65
+ */
+ ULOC_AVAILABLE_ONLY_LEGACY_ALIASES,
+
+ /**
+ * The union of the locales in ULOC_AVAILABLE_DEFAULT and
+ * ULOC_AVAILABLE_ONLY_LEGACY_ALIAS.
+ *
+ * @stable ICU 65
+ */
+ ULOC_AVAILABLE_WITH_LEGACY_ALIASES,
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * @internal
+ */
+ ULOC_AVAILABLE_COUNT
+#endif
+} ULocAvailableType;
+
+/**
+ * Gets a list of available locales according to the type argument, allowing
+ * the user to access different sets of supported locales in ICU.
+ *
+ * The returned UEnumeration must be closed by the caller.
+ *
+ * @param type Type choice from ULocAvailableType.
+ * @param status Set if an error occurred.
+ * @return a UEnumeration owned by the caller, or nullptr on failure.
+ * @stable ICU 65
+ */
+U_CAPI UEnumeration* U_EXPORT2
+uloc_openAvailableByType(ULocAvailableType type, UErrorCode* status);
+
+/**
+ *
+ * Gets a list of all available 2-letter language codes defined in ISO 639,
+ * plus additional 3-letter codes determined to be useful for locale generation as
+ * defined by Unicode CLDR. This is a pointer
+ * to an array of pointers to arrays of char. All of these pointers are owned
+ * by ICU-- do not delete them, and do not write through them. The array is
+ * terminated with a null pointer.
+ * @return a list of all available language codes
+ * @stable ICU 2.0
+ */
+U_CAPI const char* const* U_EXPORT2
+uloc_getISOLanguages(void);
+
+/**
+ *
+ * Gets a list of all available 2-letter country codes defined in ISO 639. This is a
+ * pointer to an array of pointers to arrays of char. All of these pointers are
+ * owned by ICU-- do not delete them, and do not write through them. The array is
+ * terminated with a null pointer.
+ * @return a list of all available country codes
+ * @stable ICU 2.0
+ */
+U_CAPI const char* const* U_EXPORT2
+uloc_getISOCountries(void);
+
+/**
+ * Truncate the locale ID string to get the parent locale ID.
+ * Copies the part of the string before the last underscore.
+ * The parent locale ID will be an empty string if there is no
+ * underscore, or if there is only one underscore at localeID[0].
+ *
+ * @param localeID Input locale ID string.
+ * @param parent Output string buffer for the parent locale ID.
+ * @param parentCapacity Size of the output buffer.
+ * @param err A UErrorCode value.
+ * @return The length of the parent locale ID.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getParent(const char* localeID,
+ char* parent,
+ int32_t parentCapacity,
+ UErrorCode* err);
+
+
+
+
+/**
+ * Gets the full name for the specified locale, like uloc_getName(),
+ * but without keywords.
+ *
+ * Note: This has the effect of 'canonicalizing' the string to
+ * a certain extent. Upper and lower case are set as needed,
+ * and if the components were in 'POSIX' format they are changed to
+ * ICU format. It does NOT map aliased names in any way.
+ * See the top of this header file.
+ *
+ * This API strips off the keyword part, so "de_DE\@collation=phonebook"
+ * will become "de_DE".
+ * This API supports preflighting.
+ *
+ * @param localeID the locale to get the full name with
+ * @param name fill in buffer for the name without keywords.
+ * @param nameCapacity capacity of the fill in buffer.
+ * @param err error information if retrieving the full name failed
+ * @return the actual buffer size needed for the full name. If it's greater
+ * than nameCapacity, the returned full name will be truncated.
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getBaseName(const char* localeID,
+ char* name,
+ int32_t nameCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets an enumeration of keywords for the specified locale. Enumeration
+ * must get disposed of by the client using uenum_close function.
+ *
+ * @param localeID the locale to get the variant code with
+ * @param status error information if retrieving the keywords failed
+ * @return enumeration of keywords or NULL if there are no keywords.
+ * @stable ICU 2.8
+ */
+U_CAPI UEnumeration* U_EXPORT2
+uloc_openKeywords(const char* localeID,
+ UErrorCode* status);
+
+/**
+ * Get the value for a keyword. Locale name does not need to be normalized.
+ *
+ * @param localeID locale name containing the keyword ("de_DE@currency=EURO;collation=PHONEBOOK")
+ * @param keywordName name of the keyword for which we want the value; must not be
+ * NULL or empty, and must consist only of [A-Za-z0-9]. Case insensitive.
+ * @param buffer receiving buffer
+ * @param bufferCapacity capacity of receiving buffer
+ * @param status containing error code: e.g. buffer not big enough or ill-formed localeID
+ * or keywordName parameters.
+ * @return the length of keyword value
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getKeywordValue(const char* localeID,
+ const char* keywordName,
+ char* buffer, int32_t bufferCapacity,
+ UErrorCode* status);
+
+
+/**
+ * Sets or removes the value of the specified keyword.
+ *
+ * For removing all keywords, use uloc_getBaseName().
+ *
+ * NOTE: Unlike almost every other ICU function which takes a
+ * buffer, this function will NOT truncate the output text, and will
+ * not update the buffer with unterminated text setting a status of
+ * U_STRING_NOT_TERMINATED_WARNING. If a BUFFER_OVERFLOW_ERROR is received,
+ * it means a terminated version of the updated locale ID would not fit
+ * in the buffer, and the original buffer is untouched. This is done to
+ * prevent incorrect or possibly even malformed locales from being generated
+ * and used.
+ *
+ * @param keywordName name of the keyword to be set; must not be
+ * NULL or empty, and must consist only of [A-Za-z0-9]. Case insensitive.
+ * @param keywordValue value of the keyword to be set. If 0-length or
+ * NULL, will result in the keyword being removed; no error is given if
+ * that keyword does not exist. Otherwise, must consist only of
+ * [A-Za-z0-9] and [/_+-].
+ * @param buffer input buffer containing well-formed locale ID to be
+ * modified.
+ * @param bufferCapacity capacity of receiving buffer
+ * @param status containing error code: e.g. buffer not big enough
+ * or ill-formed keywordName or keywordValue parameters, or ill-formed
+ * locale ID in buffer on input.
+ * @return the length needed for the buffer
+ * @see uloc_getKeywordValue
+ * @stable ICU 3.2
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_setKeywordValue(const char* keywordName,
+ const char* keywordValue,
+ char* buffer, int32_t bufferCapacity,
+ UErrorCode* status);
+
+/**
+ * Returns whether the locale's script is written right-to-left.
+ * If there is no script subtag, then the likely script is used, see uloc_addLikelySubtags().
+ * If no likely script is known, then false is returned.
+ *
+ * A script is right-to-left according to the CLDR script metadata
+ * which corresponds to whether the script's letters have Bidi_Class=R or AL.
+ *
+ * Returns true for "ar" and "en-Hebr", false for "zh" and "fa-Cyrl".
+ *
+ * @param locale input locale ID
+ * @return true if the locale's script is written right-to-left
+ * @stable ICU 54
+ */
+U_CAPI UBool U_EXPORT2
+uloc_isRightToLeft(const char *locale);
+
+/**
+ * enums for the return value for the character and line orientation
+ * functions.
+ * @stable ICU 4.0
+ */
+typedef enum {
+ ULOC_LAYOUT_LTR = 0, /* left-to-right. */
+ ULOC_LAYOUT_RTL = 1, /* right-to-left. */
+ ULOC_LAYOUT_TTB = 2, /* top-to-bottom. */
+ ULOC_LAYOUT_BTT = 3, /* bottom-to-top. */
+ ULOC_LAYOUT_UNKNOWN
+} ULayoutType;
+
+/**
+ * Get the layout character orientation for the specified locale.
+ *
+ * @param localeId locale name
+ * @param status Error status
+ * @return an enum indicating the layout orientation for characters.
+ * @stable ICU 4.0
+ */
+U_CAPI ULayoutType U_EXPORT2
+uloc_getCharacterOrientation(const char* localeId,
+ UErrorCode *status);
+
+/**
+ * Get the layout line orientation for the specified locale.
+ *
+ * @param localeId locale name
+ * @param status Error status
+ * @return an enum indicating the layout orientation for lines.
+ * @stable ICU 4.0
+ */
+U_CAPI ULayoutType U_EXPORT2
+uloc_getLineOrientation(const char* localeId,
+ UErrorCode *status);
+
+/**
+ * Output values which uloc_acceptLanguage() writes to the 'outResult' parameter.
+ *
+ * @see uloc_acceptLanguageFromHTTP
+ * @see uloc_acceptLanguage
+ * @stable ICU 3.2
+ */
+typedef enum {
+ /**
+ * No exact match was found.
+ * @stable ICU 3.2
+ */
+ ULOC_ACCEPT_FAILED = 0,
+ /**
+ * An exact match was found.
+ * @stable ICU 3.2
+ */
+ ULOC_ACCEPT_VALID = 1,
+ /**
+ * A fallback was found. For example, the Accept-Language list includes 'ja_JP'
+ * and is matched with available locale 'ja'.
+ * @stable ICU 3.2
+ */
+ ULOC_ACCEPT_FALLBACK = 2 /* */
+} UAcceptResult;
+
+/**
+ * Based on a HTTP header from a web browser and a list of available locales,
+ * determine an acceptable locale for the user.
+ *
+ * This is a thin wrapper over C++ class LocaleMatcher.
+ *
+ * @param result - buffer to accept the result locale
+ * @param resultAvailable the size of the result buffer.
+ * @param outResult - An out parameter that contains the fallback status
+ * @param httpAcceptLanguage - "Accept-Language:" header as per HTTP.
+ * @param availableLocales - list of available locales to match
+ * @param status ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return length needed for the locale.
+ * @stable ICU 3.2
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable,
+ UAcceptResult *outResult,
+ const char *httpAcceptLanguage,
+ UEnumeration* availableLocales,
+ UErrorCode *status);
+
+/**
+ * Based on a list of available locales,
+ * determine an acceptable locale for the user.
+ *
+ * This is a thin wrapper over C++ class LocaleMatcher.
+ *
+ * @param result - buffer to accept the result locale
+ * @param resultAvailable the size of the result buffer.
+ * @param outResult - An out parameter that contains the fallback status
+ * @param acceptList - list of acceptable languages
+ * @param acceptListCount - count of acceptList items
+ * @param availableLocales - list of available locales to match
+ * @param status ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return length needed for the locale.
+ * @stable ICU 3.2
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_acceptLanguage(char *result, int32_t resultAvailable,
+ UAcceptResult *outResult, const char **acceptList,
+ int32_t acceptListCount,
+ UEnumeration* availableLocales,
+ UErrorCode *status);
+
+
+/**
+ * Gets the ICU locale ID for the specified Win32 LCID value.
+ *
+ * @param hostID the Win32 LCID to translate
+ * @param locale the output buffer for the ICU locale ID, which will be NUL-terminated
+ * if there is room.
+ * @param localeCapacity the size of the output buffer
+ * @param status an error is returned if the LCID is unrecognized or the output buffer
+ * is too small
+ * @return actual the actual size of the locale ID, not including NUL-termination
+ * @stable ICU 3.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getLocaleForLCID(uint32_t hostID, char *locale, int32_t localeCapacity,
+ UErrorCode *status);
+
+
+/**
+ * Add the likely subtags for a provided locale ID, per the algorithm described
+ * in the following CLDR technical report:
+ *
+ * http://www.unicode.org/reports/tr35/#Likely_Subtags
+ *
+ * If localeID is already in the maximal form, or there is no data available
+ * for maximization, it will be copied to the output buffer. For example,
+ * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
+ *
+ * Examples:
+ *
+ * "en" maximizes to "en_Latn_US"
+ *
+ * "de" maximizes to "de_Latn_US"
+ *
+ * "sr" maximizes to "sr_Cyrl_RS"
+ *
+ * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
+ *
+ * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
+ *
+ * @param localeID The locale to maximize
+ * @param maximizedLocaleID The maximized locale
+ * @param maximizedLocaleIDCapacity The capacity of the maximizedLocaleID buffer
+ * @param err Error information if maximizing the locale failed. If the length
+ * of the localeID and the null-terminator is greater than the maximum allowed size,
+ * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
+ * @return The actual buffer size needed for the maximized locale. If it's
+ * greater than maximizedLocaleIDCapacity, the returned ID will be truncated.
+ * On error, the return value is -1.
+ * @stable ICU 4.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_addLikelySubtags(const char* localeID,
+ char* maximizedLocaleID,
+ int32_t maximizedLocaleIDCapacity,
+ UErrorCode* err);
+
+
+/**
+ * Minimize the subtags for a provided locale ID, per the algorithm described
+ * in the following CLDR technical report:
+ *
+ * http://www.unicode.org/reports/tr35/#Likely_Subtags
+ *
+ * If localeID is already in the minimal form, or there is no data available
+ * for minimization, it will be copied to the output buffer. Since the
+ * minimization algorithm relies on proper maximization, see the comments
+ * for uloc_addLikelySubtags for reasons why there might not be any data.
+ *
+ * Examples:
+ *
+ * "en_Latn_US" minimizes to "en"
+ *
+ * "de_Latn_US" minimizes to "de"
+ *
+ * "sr_Cyrl_RS" minimizes to "sr"
+ *
+ * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
+ * script, and minimizing to "zh" would imply "zh_Hans_CN".)
+ *
+ * @param localeID The locale to minimize
+ * @param minimizedLocaleID The minimized locale
+ * @param minimizedLocaleIDCapacity The capacity of the minimizedLocaleID buffer
+ * @param err Error information if minimizing the locale failed. If the length
+ * of the localeID and the null-terminator is greater than the maximum allowed size,
+ * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
+ * @return The actual buffer size needed for the minimized locale. If it's
+ * greater than minimizedLocaleIDCapacity, the returned ID will be truncated.
+ * On error, the return value is -1.
+ * @stable ICU 4.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_minimizeSubtags(const char* localeID,
+ char* minimizedLocaleID,
+ int32_t minimizedLocaleIDCapacity,
+ UErrorCode* err);
+
+/**
+ * Returns a locale ID for the specified BCP47 language tag string.
+ * If the specified language tag contains any ill-formed subtags,
+ * the first such subtag and all following subtags are ignored.
+ * <p>
+ * This implements the 'Language-Tag' production of BCP 47, and so
+ * supports legacy language tags (marked as “Type: grandfathered†in BCP 47)
+ * (regular and irregular) as well as private use language tags.
+ *
+ * Private use tags are represented as 'x-whatever',
+ * and legacy tags are converted to their canonical replacements where they exist.
+ *
+ * Note that a few legacy tags have no modern replacement;
+ * these will be converted using the fallback described in
+ * the first paragraph, so some information might be lost.
+ *
+ * @param langtag the input BCP47 language tag.
+ * @param localeID the output buffer receiving a locale ID for the
+ * specified BCP47 language tag.
+ * @param localeIDCapacity the size of the locale ID output buffer.
+ * @param parsedLength if not NULL, successfully parsed length
+ * for the input language tag is set.
+ * @param err error information if receiving the locald ID
+ * failed.
+ * @return the length of the locale ID.
+ * @stable ICU 4.2
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_forLanguageTag(const char* langtag,
+ char* localeID,
+ int32_t localeIDCapacity,
+ int32_t* parsedLength,
+ UErrorCode* err);
+
+/**
+ * Returns a well-formed language tag for this locale ID.
+ * <p>
+ * <b>Note</b>: When <code>strict</code> is false, any locale
+ * fields which do not satisfy the BCP47 syntax requirement will
+ * be omitted from the result. When <code>strict</code> is
+ * true, this function sets U_ILLEGAL_ARGUMENT_ERROR to the
+ * <code>err</code> if any locale fields do not satisfy the
+ * BCP47 syntax requirement.
+ * @param localeID the input locale ID
+ * @param langtag the output buffer receiving BCP47 language
+ * tag for the locale ID.
+ * @param langtagCapacity the size of the BCP47 language tag
+ * output buffer.
+ * @param strict boolean value indicating if the function returns
+ * an error for an ill-formed input locale ID.
+ * @param err error information if receiving the language
+ * tag failed.
+ * @return The length of the BCP47 language tag.
+ * @stable ICU 4.2
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_toLanguageTag(const char* localeID,
+ char* langtag,
+ int32_t langtagCapacity,
+ UBool strict,
+ UErrorCode* err);
+
+/**
+ * Converts the specified keyword (legacy key, or BCP 47 Unicode locale
+ * extension key) to the equivalent BCP 47 Unicode locale extension key.
+ * For example, BCP 47 Unicode locale extension key "co" is returned for
+ * the input keyword "collation".
+ * <p>
+ * When the specified keyword is unknown, but satisfies the BCP syntax,
+ * then the pointer to the input keyword itself will be returned.
+ * For example,
+ * <code>uloc_toUnicodeLocaleKey("ZZ")</code> returns "ZZ".
+ *
+ * @param keyword the input locale keyword (either legacy key
+ * such as "collation" or BCP 47 Unicode locale extension
+ * key such as "co").
+ * @return the well-formed BCP 47 Unicode locale extension key,
+ * or NULL if the specified locale keyword cannot be
+ * mapped to a well-formed BCP 47 Unicode locale extension
+ * key.
+ * @see uloc_toLegacyKey
+ * @stable ICU 54
+ */
+U_CAPI const char* U_EXPORT2
+uloc_toUnicodeLocaleKey(const char* keyword);
+
+/**
+ * Converts the specified keyword value (legacy type, or BCP 47
+ * Unicode locale extension type) to the well-formed BCP 47 Unicode locale
+ * extension type for the specified keyword (category). For example, BCP 47
+ * Unicode locale extension type "phonebk" is returned for the input
+ * keyword value "phonebook", with the keyword "collation" (or "co").
+ * <p>
+ * When the specified keyword is not recognized, but the specified value
+ * satisfies the syntax of the BCP 47 Unicode locale extension type,
+ * or when the specified keyword allows 'variable' type and the specified
+ * value satisfies the syntax, then the pointer to the input type value itself
+ * will be returned.
+ * For example,
+ * <code>uloc_toUnicodeLocaleType("Foo", "Bar")</code> returns "Bar",
+ * <code>uloc_toUnicodeLocaleType("variableTop", "00A4")</code> returns "00A4".
+ *
+ * @param keyword the locale keyword (either legacy key such as
+ * "collation" or BCP 47 Unicode locale extension
+ * key such as "co").
+ * @param value the locale keyword value (either legacy type
+ * such as "phonebook" or BCP 47 Unicode locale extension
+ * type such as "phonebk").
+ * @return the well-formed BCP47 Unicode locale extension type,
+ * or NULL if the locale keyword value cannot be mapped to
+ * a well-formed BCP 47 Unicode locale extension type.
+ * @see uloc_toLegacyType
+ * @stable ICU 54
+ */
+U_CAPI const char* U_EXPORT2
+uloc_toUnicodeLocaleType(const char* keyword, const char* value);
+
+/**
+ * Converts the specified keyword (BCP 47 Unicode locale extension key, or
+ * legacy key) to the legacy key. For example, legacy key "collation" is
+ * returned for the input BCP 47 Unicode locale extension key "co".
+ *
+ * @param keyword the input locale keyword (either BCP 47 Unicode locale
+ * extension key or legacy key).
+ * @return the well-formed legacy key, or NULL if the specified
+ * keyword cannot be mapped to a well-formed legacy key.
+ * @see toUnicodeLocaleKey
+ * @stable ICU 54
+ */
+U_CAPI const char* U_EXPORT2
+uloc_toLegacyKey(const char* keyword);
+
+/**
+ * Converts the specified keyword value (BCP 47 Unicode locale extension type,
+ * or legacy type or type alias) to the canonical legacy type. For example,
+ * the legacy type "phonebook" is returned for the input BCP 47 Unicode
+ * locale extension type "phonebk" with the keyword "collation" (or "co").
+ * <p>
+ * When the specified keyword is not recognized, but the specified value
+ * satisfies the syntax of legacy key, or when the specified keyword
+ * allows 'variable' type and the specified value satisfies the syntax,
+ * then the pointer to the input type value itself will be returned.
+ * For example,
+ * <code>uloc_toLegacyType("Foo", "Bar")</code> returns "Bar",
+ * <code>uloc_toLegacyType("vt", "00A4")</code> returns "00A4".
+ *
+ * @param keyword the locale keyword (either legacy keyword such as
+ * "collation" or BCP 47 Unicode locale extension
+ * key such as "co").
+ * @param value the locale keyword value (either BCP 47 Unicode locale
+ * extension type such as "phonebk" or legacy keyword value
+ * such as "phonebook").
+ * @return the well-formed legacy type, or NULL if the specified
+ * keyword value cannot be mapped to a well-formed legacy
+ * type.
+ * @see toUnicodeLocaleType
+ * @stable ICU 54
+ */
+U_CAPI const char* U_EXPORT2
+uloc_toLegacyType(const char* keyword, const char* value);
+
+#endif /*_ULOC*/
diff --git a/thirdparty/icu4c/common/unicode/umachine.h b/thirdparty/icu4c/common/unicode/umachine.h
new file mode 100644
index 0000000000..09c887c80e
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/umachine.h
@@ -0,0 +1,491 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: umachine.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999sep13
+* created by: Markus W. Scherer
+*
+* This file defines basic types and constants for ICU to be
+* platform-independent. umachine.h and utf.h are included into
+* utypes.h to provide all the general definitions for ICU.
+* All of these definitions used to be in utypes.h before
+* the UTF-handling macros made this unmaintainable.
+*/
+
+#ifndef __UMACHINE_H__
+#define __UMACHINE_H__
+
+
+/**
+ * \file
+ * \brief Basic types and constants for UTF
+ *
+ * <h2> Basic types and constants for UTF </h2>
+ * This file defines basic types and constants for utf.h to be
+ * platform-independent. umachine.h and utf.h are included into
+ * utypes.h to provide all the general definitions for ICU.
+ * All of these definitions used to be in utypes.h before
+ * the UTF-handling macros made this unmaintainable.
+ *
+ */
+/*==========================================================================*/
+/* Include platform-dependent definitions */
+/* which are contained in the platform-specific file platform.h */
+/*==========================================================================*/
+
+#include "unicode/ptypes.h" /* platform.h is included in ptypes.h */
+
+/*
+ * ANSI C headers:
+ * stddef.h defines wchar_t
+ */
+#include <stdbool.h>
+#include <stddef.h>
+
+/*==========================================================================*/
+/* For C wrappers, we use the symbol U_CAPI. */
+/* This works properly if the includer is C or C++. */
+/* Functions are declared U_CAPI return-type U_EXPORT2 function-name()... */
+/*==========================================================================*/
+
+/**
+ * \def U_CFUNC
+ * This is used in a declaration of a library private ICU C function.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_CDECL_BEGIN
+ * This is used to begin a declaration of a library private ICU C API.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_CDECL_END
+ * This is used to end a declaration of a library private ICU C API
+ * @stable ICU 2.4
+ */
+
+#ifdef __cplusplus
+# define U_CFUNC extern "C"
+# define U_CDECL_BEGIN extern "C" {
+# define U_CDECL_END }
+#else
+# define U_CFUNC extern
+# define U_CDECL_BEGIN
+# define U_CDECL_END
+#endif
+
+#ifndef U_ATTRIBUTE_DEPRECATED
+/**
+ * \def U_ATTRIBUTE_DEPRECATED
+ * This is used for GCC specific attributes
+ * @internal
+ */
+#if U_GCC_MAJOR_MINOR >= 302
+# define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated))
+/**
+ * \def U_ATTRIBUTE_DEPRECATED
+ * This is used for Visual C++ specific attributes
+ * @internal
+ */
+#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
+# define U_ATTRIBUTE_DEPRECATED __declspec(deprecated)
+#else
+# define U_ATTRIBUTE_DEPRECATED
+#endif
+#endif
+
+/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/
+#define U_CAPI U_CFUNC U_EXPORT
+/** Obsolete/same as U_CAPI; was used to declare a function as a stable public ICU C API*/
+#define U_STABLE U_CAPI
+/** Obsolete/same as U_CAPI; was used to declare a function as a draft public ICU C API */
+#define U_DRAFT U_CAPI
+/** This is used to declare a function as a deprecated public ICU C API */
+#define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED
+/** Obsolete/same as U_CAPI; was used to declare a function as an obsolete public ICU C API */
+#define U_OBSOLETE U_CAPI
+/** Obsolete/same as U_CAPI; was used to declare a function as an internal ICU C API */
+#define U_INTERNAL U_CAPI
+
+/**
+ * \def U_OVERRIDE
+ * Defined to the C++11 "override" keyword if available.
+ * Denotes a class or member which is an override of the base class.
+ * May result in an error if it applied to something not an override.
+ * @internal
+ */
+#ifndef U_OVERRIDE
+#define U_OVERRIDE override
+#endif
+
+/**
+ * \def U_FINAL
+ * Defined to the C++11 "final" keyword if available.
+ * Denotes a class or member which may not be overridden in subclasses.
+ * May result in an error if subclasses attempt to override.
+ * @internal
+ */
+#if !defined(U_FINAL) || defined(U_IN_DOXYGEN)
+#define U_FINAL final
+#endif
+
+// Before ICU 65, function-like, multi-statement ICU macros were just defined as
+// series of statements wrapped in { } blocks and the caller could choose to
+// either treat them as if they were actual functions and end the invocation
+// with a trailing ; creating an empty statement after the block or else omit
+// this trailing ; using the knowledge that the macro would expand to { }.
+//
+// But doing so doesn't work well with macros that look like functions and
+// compiler warnings about empty statements (ICU-20601) and ICU 65 therefore
+// switches to the standard solution of wrapping such macros in do { } while.
+//
+// This will however break existing code that depends on being able to invoke
+// these macros without a trailing ; so to be able to remain compatible with
+// such code the wrapper is itself defined as macros so that it's possible to
+// build ICU 65 and later with the old macro behaviour, like this:
+//
+// export CPPFLAGS='-DUPRV_BLOCK_MACRO_BEGIN="" -DUPRV_BLOCK_MACRO_END=""'
+// runConfigureICU ...
+//
+
+/**
+ * \def UPRV_BLOCK_MACRO_BEGIN
+ * Defined as the "do" keyword by default.
+ * @internal
+ */
+#ifndef UPRV_BLOCK_MACRO_BEGIN
+#define UPRV_BLOCK_MACRO_BEGIN do
+#endif
+
+/**
+ * \def UPRV_BLOCK_MACRO_END
+ * Defined as "while (false)" by default.
+ * @internal
+ */
+#ifndef UPRV_BLOCK_MACRO_END
+#define UPRV_BLOCK_MACRO_END while (false)
+#endif
+
+/*==========================================================================*/
+/* limits for int32_t etc., like in POSIX inttypes.h */
+/*==========================================================================*/
+
+#ifndef INT8_MIN
+/** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */
+# define INT8_MIN ((int8_t)(-128))
+#endif
+#ifndef INT16_MIN
+/** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */
+# define INT16_MIN ((int16_t)(-32767-1))
+#endif
+#ifndef INT32_MIN
+/** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */
+# define INT32_MIN ((int32_t)(-2147483647-1))
+#endif
+
+#ifndef INT8_MAX
+/** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */
+# define INT8_MAX ((int8_t)(127))
+#endif
+#ifndef INT16_MAX
+/** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */
+# define INT16_MAX ((int16_t)(32767))
+#endif
+#ifndef INT32_MAX
+/** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */
+# define INT32_MAX ((int32_t)(2147483647))
+#endif
+
+#ifndef UINT8_MAX
+/** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */
+# define UINT8_MAX ((uint8_t)(255U))
+#endif
+#ifndef UINT16_MAX
+/** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */
+# define UINT16_MAX ((uint16_t)(65535U))
+#endif
+#ifndef UINT32_MAX
+/** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */
+# define UINT32_MAX ((uint32_t)(4294967295U))
+#endif
+
+#if defined(U_INT64_T_UNAVAILABLE)
+# error int64_t is required for decimal format and rule-based number format.
+#else
+# ifndef INT64_C
+/**
+ * Provides a platform independent way to specify a signed 64-bit integer constant.
+ * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C
+ * @stable ICU 2.8
+ */
+# define INT64_C(c) c ## LL
+# endif
+# ifndef UINT64_C
+/**
+ * Provides a platform independent way to specify an unsigned 64-bit integer constant.
+ * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C
+ * @stable ICU 2.8
+ */
+# define UINT64_C(c) c ## ULL
+# endif
+# ifndef U_INT64_MIN
+/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */
+# define U_INT64_MIN ((int64_t)(INT64_C(-9223372036854775807)-1))
+# endif
+# ifndef U_INT64_MAX
+/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */
+# define U_INT64_MAX ((int64_t)(INT64_C(9223372036854775807)))
+# endif
+# ifndef U_UINT64_MAX
+/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */
+# define U_UINT64_MAX ((uint64_t)(UINT64_C(18446744073709551615)))
+# endif
+#endif
+
+/*==========================================================================*/
+/* Boolean data type */
+/*==========================================================================*/
+
+/**
+ * The ICU boolean type, a signed-byte integer.
+ * ICU-specific for historical reasons: The C and C++ standards used to not define type bool.
+ * Also provides a fixed type definition, as opposed to
+ * type bool whose details (e.g., sizeof) may vary by compiler and between C and C++.
+ *
+ * @stable ICU 2.0
+ */
+typedef int8_t UBool;
+
+/**
+ * \def U_DEFINE_FALSE_AND_TRUE
+ * Normally turns off defining macros FALSE=0 & TRUE=1 in public ICU headers.
+ * These obsolete macros sometimes break compilation of other code that
+ * defines enum constants or similar with these names.
+ * C++ has long defined bool/false/true.
+ * C99 also added definitions for these, although as macros; see stdbool.h.
+ *
+ * You may transitionally define U_DEFINE_FALSE_AND_TRUE=1 if you need time to migrate code.
+ *
+ * @internal ICU 68
+ */
+#ifdef U_DEFINE_FALSE_AND_TRUE
+ // Use the predefined value.
+#elif defined(U_COMBINED_IMPLEMENTATION) || \
+ defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || \
+ defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION) || \
+ defined(U_TOOLUTIL_IMPLEMENTATION)
+ // Inside ICU: Keep FALSE & TRUE available.
+# define U_DEFINE_FALSE_AND_TRUE 1
+#else
+ // Outside ICU: Avoid collision with non-macro definitions of FALSE & TRUE.
+# define U_DEFINE_FALSE_AND_TRUE 0
+#endif
+
+#if U_DEFINE_FALSE_AND_TRUE || defined(U_IN_DOXYGEN)
+#ifndef TRUE
+/**
+ * The TRUE value of a UBool.
+ *
+ * @deprecated ICU 68 Use standard "true" instead.
+ */
+# define TRUE 1
+#endif
+#ifndef FALSE
+/**
+ * The FALSE value of a UBool.
+ *
+ * @deprecated ICU 68 Use standard "false" instead.
+ */
+# define FALSE 0
+#endif
+#endif // U_DEFINE_FALSE_AND_TRUE
+
+/*==========================================================================*/
+/* Unicode data types */
+/*==========================================================================*/
+
+/* wchar_t-related definitions -------------------------------------------- */
+
+/*
+ * \def U_WCHAR_IS_UTF16
+ * Defined if wchar_t uses UTF-16.
+ *
+ * @stable ICU 2.0
+ */
+/*
+ * \def U_WCHAR_IS_UTF32
+ * Defined if wchar_t uses UTF-32.
+ *
+ * @stable ICU 2.0
+ */
+#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
+# ifdef __STDC_ISO_10646__
+# if (U_SIZEOF_WCHAR_T==2)
+# define U_WCHAR_IS_UTF16
+# elif (U_SIZEOF_WCHAR_T==4)
+# define U_WCHAR_IS_UTF32
+# endif
+# elif defined __UCS2__
+# if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2)
+# define U_WCHAR_IS_UTF16
+# endif
+# elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__))
+# if (U_SIZEOF_WCHAR_T==4)
+# define U_WCHAR_IS_UTF32
+# endif
+# elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED)
+# define U_WCHAR_IS_UTF32
+# elif U_PLATFORM_HAS_WIN32_API
+# define U_WCHAR_IS_UTF16
+# endif
+#endif
+
+/* UChar and UChar32 definitions -------------------------------------------- */
+
+/** Number of bytes in a UChar. @stable ICU 2.0 */
+#define U_SIZEOF_UCHAR 2
+
+/**
+ * \def U_CHAR16_IS_TYPEDEF
+ * If 1, then char16_t is a typedef and not a real type (yet)
+ * @internal
+ */
+#if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11)
+// for AIX, uchar.h needs to be included
+# include <uchar.h>
+# define U_CHAR16_IS_TYPEDEF 1
+#elif defined(_MSC_VER) && (_MSC_VER < 1900)
+// Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type,
+// and instead use a typedef. https://msdn.microsoft.com/library/bb531344.aspx
+# define U_CHAR16_IS_TYPEDEF 1
+#else
+# define U_CHAR16_IS_TYPEDEF 0
+#endif
+
+
+/**
+ * \var UChar
+ *
+ * The base type for UTF-16 code units and pointers.
+ * Unsigned 16-bit integer.
+ * Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar.
+ *
+ * UChar is configurable by defining the macro UCHAR_TYPE
+ * on the preprocessor or compiler command line:
+ * -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc.
+ * (The UCHAR_TYPE can also be \#defined earlier in this file, for outside the ICU library code.)
+ * This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16.
+ *
+ * The default is UChar=char16_t.
+ *
+ * C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type.
+ *
+ * In C, char16_t is a simple typedef of uint_least16_t.
+ * ICU requires uint_least16_t=uint16_t for data memory mapping.
+ * On macOS, char16_t is not available because the uchar.h standard header is missing.
+ *
+ * @stable ICU 4.4
+ */
+
+#if 1
+ // #if 1 is normal. UChar defaults to char16_t in C++.
+ // For configuration testing of UChar=uint16_t temporarily change this to #if 0.
+ // The intltest Makefile #defines UCHAR_TYPE=char16_t,
+ // so we only #define it to uint16_t if it is undefined so far.
+#elif !defined(UCHAR_TYPE)
+# define UCHAR_TYPE uint16_t
+#endif
+
+#if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
+ defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
+ // Inside the ICU library code, never configurable.
+ typedef char16_t UChar;
+#elif defined(UCHAR_TYPE)
+ typedef UCHAR_TYPE UChar;
+#elif (U_CPLUSPLUS_VERSION >= 11)
+ typedef char16_t UChar;
+#else
+ typedef uint16_t UChar;
+#endif
+
+/**
+ * \var OldUChar
+ * Default ICU 58 definition of UChar.
+ * A base type for UTF-16 code units and pointers.
+ * Unsigned 16-bit integer.
+ *
+ * Define OldUChar to be wchar_t if that is 16 bits wide.
+ * If wchar_t is not 16 bits wide, then define UChar to be uint16_t.
+ *
+ * This makes the definition of OldUChar platform-dependent
+ * but allows direct string type compatibility with platforms with
+ * 16-bit wchar_t types.
+ *
+ * This is how UChar was defined in ICU 58, for transition convenience.
+ * Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined.
+ * The current UChar responds to UCHAR_TYPE but OldUChar does not.
+ *
+ * @stable ICU 59
+ */
+#if U_SIZEOF_WCHAR_T==2
+ typedef wchar_t OldUChar;
+#elif defined(__CHAR16_TYPE__)
+ typedef __CHAR16_TYPE__ OldUChar;
+#else
+ typedef uint16_t OldUChar;
+#endif
+
+/**
+ * Define UChar32 as a type for single Unicode code points.
+ * UChar32 is a signed 32-bit integer (same as int32_t).
+ *
+ * The Unicode code point range is 0..0x10ffff.
+ * All other values (negative or >=0x110000) are illegal as Unicode code points.
+ * They may be used as sentinel values to indicate "done", "error"
+ * or similar non-code point conditions.
+ *
+ * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined
+ * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned)
+ * or else to be uint32_t.
+ * That is, the definition of UChar32 was platform-dependent.
+ *
+ * @see U_SENTINEL
+ * @stable ICU 2.4
+ */
+typedef int32_t UChar32;
+
+/**
+ * This value is intended for sentinel values for APIs that
+ * (take or) return single code points (UChar32).
+ * It is outside of the Unicode code point range 0..0x10ffff.
+ *
+ * For example, a "done" or "error" value in a new API
+ * could be indicated with U_SENTINEL.
+ *
+ * ICU APIs designed before ICU 2.4 usually define service-specific "done"
+ * values, mostly 0xffff.
+ * Those may need to be distinguished from
+ * actual U+ffff text contents by calling functions like
+ * CharacterIterator::hasNext() or UnicodeString::length().
+ *
+ * @return -1
+ * @see UChar32
+ * @stable ICU 2.4
+ */
+#define U_SENTINEL (-1)
+
+#include "unicode/urename.h"
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/umisc.h b/thirdparty/icu4c/common/unicode/umisc.h
new file mode 100644
index 0000000000..213290b9af
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/umisc.h
@@ -0,0 +1,62 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1999-2006, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* file name: umisc.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999oct15
+* created by: Markus W. Scherer
+*/
+
+#ifndef UMISC_H
+#define UMISC_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C API:misc definitions
+ *
+ * This file contains miscellaneous definitions for the C APIs.
+ */
+
+U_CDECL_BEGIN
+
+/** A struct representing a range of text containing a specific field
+ * @stable ICU 2.0
+ */
+typedef struct UFieldPosition {
+ /**
+ * The field
+ * @stable ICU 2.0
+ */
+ int32_t field;
+ /**
+ * The start of the text range containing field
+ * @stable ICU 2.0
+ */
+ int32_t beginIndex;
+ /**
+ * The limit of the text range containing field
+ * @stable ICU 2.0
+ */
+ int32_t endIndex;
+} UFieldPosition;
+
+#if !UCONFIG_NO_SERVICE
+/**
+ * Opaque type returned by registerInstance, registerFactory and unregister for service registration.
+ * @stable ICU 2.6
+ */
+typedef const void* URegistryKey;
+#endif
+
+U_CDECL_END
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/umutablecptrie.h b/thirdparty/icu4c/common/unicode/umutablecptrie.h
new file mode 100644
index 0000000000..5325d58147
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/umutablecptrie.h
@@ -0,0 +1,241 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// umutablecptrie.h (split out of ucptrie.h)
+// created: 2018jan24 Markus W. Scherer
+
+#ifndef __UMUTABLECPTRIE_H__
+#define __UMUTABLECPTRIE_H__
+
+#include "unicode/utypes.h"
+
+#include "unicode/ucpmap.h"
+#include "unicode/ucptrie.h"
+#include "unicode/utf8.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+U_CDECL_BEGIN
+
+/**
+ * \file
+ *
+ * This file defines a mutable Unicode code point trie.
+ *
+ * @see UCPTrie
+ * @see UMutableCPTrie
+ */
+
+/**
+ * Mutable Unicode code point trie.
+ * Fast map from Unicode code points (U+0000..U+10FFFF) to 32-bit integer values.
+ * For details see http://site.icu-project.org/design/struct/utrie
+ *
+ * Setting values (especially ranges) and lookup is fast.
+ * The mutable trie is only somewhat space-efficient.
+ * It builds a compacted, immutable UCPTrie.
+ *
+ * This trie can be modified while iterating over its contents.
+ * For example, it is possible to merge its values with those from another
+ * set of ranges (e.g., another mutable or immutable trie):
+ * Iterate over those source ranges; for each of them iterate over this trie;
+ * add the source value into the value of each trie range.
+ *
+ * @see UCPTrie
+ * @see umutablecptrie_buildImmutable
+ * @stable ICU 63
+ */
+typedef struct UMutableCPTrie UMutableCPTrie;
+
+/**
+ * Creates a mutable trie that initially maps each Unicode code point to the same value.
+ * It uses 32-bit data values until umutablecptrie_buildImmutable() is called.
+ * umutablecptrie_buildImmutable() takes a valueWidth parameter which
+ * determines the number of bits in the data value in the resulting UCPTrie.
+ * You must umutablecptrie_close() the trie once you are done using it.
+ *
+ * @param initialValue the initial value that is set for all code points
+ * @param errorValue the value for out-of-range code points and ill-formed UTF-8/16
+ * @param pErrorCode an in/out ICU UErrorCode
+ * @return the trie
+ * @stable ICU 63
+ */
+U_CAPI UMutableCPTrie * U_EXPORT2
+umutablecptrie_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode);
+
+/**
+ * Clones a mutable trie.
+ * You must umutablecptrie_close() the clone once you are done using it.
+ *
+ * @param other the trie to clone
+ * @param pErrorCode an in/out ICU UErrorCode
+ * @return the trie clone
+ * @stable ICU 63
+ */
+U_CAPI UMutableCPTrie * U_EXPORT2
+umutablecptrie_clone(const UMutableCPTrie *other, UErrorCode *pErrorCode);
+
+/**
+ * Closes a mutable trie and releases associated memory.
+ *
+ * @param trie the trie
+ * @stable ICU 63
+ */
+U_CAPI void U_EXPORT2
+umutablecptrie_close(UMutableCPTrie *trie);
+
+/**
+ * Creates a mutable trie with the same contents as the UCPMap.
+ * You must umutablecptrie_close() the mutable trie once you are done using it.
+ *
+ * @param map the source map
+ * @param pErrorCode an in/out ICU UErrorCode
+ * @return the mutable trie
+ * @stable ICU 63
+ */
+U_CAPI UMutableCPTrie * U_EXPORT2
+umutablecptrie_fromUCPMap(const UCPMap *map, UErrorCode *pErrorCode);
+
+/**
+ * Creates a mutable trie with the same contents as the immutable one.
+ * You must umutablecptrie_close() the mutable trie once you are done using it.
+ *
+ * @param trie the immutable trie
+ * @param pErrorCode an in/out ICU UErrorCode
+ * @return the mutable trie
+ * @stable ICU 63
+ */
+U_CAPI UMutableCPTrie * U_EXPORT2
+umutablecptrie_fromUCPTrie(const UCPTrie *trie, UErrorCode *pErrorCode);
+
+/**
+ * Returns the value for a code point as stored in the trie.
+ *
+ * @param trie the trie
+ * @param c the code point
+ * @return the value
+ * @stable ICU 63
+ */
+U_CAPI uint32_t U_EXPORT2
+umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c);
+
+/**
+ * Returns the last code point such that all those from start to there have the same value.
+ * Can be used to efficiently iterate over all same-value ranges in a trie.
+ * (This is normally faster than iterating over code points and get()ting each value,
+ * but much slower than a data structure that stores ranges directly.)
+ *
+ * The trie can be modified between calls to this function.
+ *
+ * If the UCPMapValueFilter function pointer is not NULL, then
+ * the value to be delivered is passed through that function, and the return value is the end
+ * of the range where all values are modified to the same actual value.
+ * The value is unchanged if that function pointer is NULL.
+ *
+ * See the same-signature ucptrie_getRange() for a code sample.
+ *
+ * @param trie the trie
+ * @param start range start
+ * @param option defines whether surrogates are treated normally,
+ * or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL
+ * @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL
+ * @param filter a pointer to a function that may modify the trie data value,
+ * or NULL if the values from the trie are to be used unmodified
+ * @param context an opaque pointer that is passed on to the filter function
+ * @param pValue if not NULL, receives the value that every code point start..end has;
+ * may have been modified by filter(context, trie value)
+ * if that function pointer is not NULL
+ * @return the range end code point, or -1 if start is not a valid code point
+ * @stable ICU 63
+ */
+U_CAPI UChar32 U_EXPORT2
+umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start,
+ UCPMapRangeOption option, uint32_t surrogateValue,
+ UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
+
+/**
+ * Sets a value for a code point.
+ *
+ * @param trie the trie
+ * @param c the code point
+ * @param value the value
+ * @param pErrorCode an in/out ICU UErrorCode
+ * @stable ICU 63
+ */
+U_CAPI void U_EXPORT2
+umutablecptrie_set(UMutableCPTrie *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode);
+
+/**
+ * Sets a value for each code point [start..end].
+ * Faster and more space-efficient than setting the value for each code point separately.
+ *
+ * @param trie the trie
+ * @param start the first code point to get the value
+ * @param end the last code point to get the value (inclusive)
+ * @param value the value
+ * @param pErrorCode an in/out ICU UErrorCode
+ * @stable ICU 63
+ */
+U_CAPI void U_EXPORT2
+umutablecptrie_setRange(UMutableCPTrie *trie,
+ UChar32 start, UChar32 end,
+ uint32_t value, UErrorCode *pErrorCode);
+
+/**
+ * Compacts the data and builds an immutable UCPTrie according to the parameters.
+ * After this, the mutable trie will be empty.
+ *
+ * The mutable trie stores 32-bit values until buildImmutable() is called.
+ * If values shorter than 32 bits are to be stored in the immutable trie,
+ * then the upper bits are discarded.
+ * For example, when the mutable trie contains values 0x81, -0x7f, and 0xa581,
+ * and the value width is 8 bits, then each of these is stored as 0x81
+ * and the immutable trie will return that as an unsigned value.
+ * (Some implementations may want to make productive temporary use of the upper bits
+ * until buildImmutable() discards them.)
+ *
+ * Not every possible set of mappings can be built into a UCPTrie,
+ * because of limitations resulting from speed and space optimizations.
+ * Every Unicode assigned character can be mapped to a unique value.
+ * Typical data yields data structures far smaller than the limitations.
+ *
+ * It is possible to construct extremely unusual mappings that exceed the data structure limits.
+ * In such a case this function will fail with a U_INDEX_OUTOFBOUNDS_ERROR.
+ *
+ * @param trie the trie trie
+ * @param type selects the trie type
+ * @param valueWidth selects the number of bits in a trie data value; if smaller than 32 bits,
+ * then the values stored in the trie will be truncated first
+ * @param pErrorCode an in/out ICU UErrorCode
+ *
+ * @see umutablecptrie_fromUCPTrie
+ * @stable ICU 63
+ */
+U_CAPI UCPTrie * U_EXPORT2
+umutablecptrie_buildImmutable(UMutableCPTrie *trie, UCPTrieType type, UCPTrieValueWidth valueWidth,
+ UErrorCode *pErrorCode);
+
+U_CDECL_END
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUMutableCPTriePointer
+ * "Smart pointer" class, closes a UMutableCPTrie via umutablecptrie_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 63
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUMutableCPTriePointer, UMutableCPTrie, umutablecptrie_close);
+
+U_NAMESPACE_END
+
+#endif
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/unifilt.h b/thirdparty/icu4c/common/unicode/unifilt.h
new file mode 100644
index 0000000000..420e1a1905
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/unifilt.h
@@ -0,0 +1,136 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1999-2010, International Business Machines Corporation and others.
+* All Rights Reserved.
+**********************************************************************
+* Date Name Description
+* 11/17/99 aliu Creation.
+**********************************************************************
+*/
+#ifndef UNIFILT_H
+#define UNIFILT_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/unifunct.h"
+#include "unicode/unimatch.h"
+
+/**
+ * \file
+ * \brief C++ API: Unicode Filter
+ */
+
+U_NAMESPACE_BEGIN
+
+/**
+ * U_ETHER is used to represent character values for positions outside
+ * a range. For example, transliterator uses this to represent
+ * characters outside the range contextStart..contextLimit-1. This
+ * allows explicit matching by rules and UnicodeSets of text outside a
+ * defined range.
+ * @stable ICU 3.0
+ */
+#define U_ETHER ((char16_t)0xFFFF)
+
+/**
+ *
+ * <code>UnicodeFilter</code> defines a protocol for selecting a
+ * subset of the full range (U+0000 to U+10FFFF) of Unicode characters.
+ * Currently, filters are used in conjunction with classes like {@link
+ * Transliterator} to only process selected characters through a
+ * transformation.
+ *
+ * <p>Note: UnicodeFilter currently stubs out two pure virtual methods
+ * of its base class, UnicodeMatcher. These methods are toPattern()
+ * and matchesIndexValue(). This is done so that filter classes that
+ * are not actually used as matchers -- specifically, those in the
+ * UnicodeFilterLogic component, and those in tests -- can continue to
+ * work without defining these methods. As long as a filter is not
+ * used in an RBT during real transliteration, these methods will not
+ * be called. However, this breaks the UnicodeMatcher base class
+ * protocol, and it is not a correct solution.
+ *
+ * <p>In the future we may revisit the UnicodeMatcher / UnicodeFilter
+ * hierarchy and either redesign it, or simply remove the stubs in
+ * UnicodeFilter and force subclasses to implement the full
+ * UnicodeMatcher protocol.
+ *
+ * @see UnicodeFilterLogic
+ * @stable ICU 2.0
+ */
+class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher {
+
+public:
+ /**
+ * Destructor
+ * @stable ICU 2.0
+ */
+ virtual ~UnicodeFilter();
+
+ /**
+ * Clones this object polymorphically.
+ * The caller owns the result and should delete it when done.
+ * @return clone, or nullptr if an error occurred
+ * @stable ICU 2.4
+ */
+ virtual UnicodeFilter* clone() const = 0;
+
+ /**
+ * Returns <tt>true</tt> for characters that are in the selected
+ * subset. In other words, if a character is <b>to be
+ * filtered</b>, then <tt>contains()</tt> returns
+ * <b><tt>false</tt></b>.
+ * @stable ICU 2.0
+ */
+ virtual UBool contains(UChar32 c) const = 0;
+
+ /**
+ * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer
+ * and return the pointer.
+ * @stable ICU 2.4
+ */
+ virtual UnicodeMatcher* toMatcher() const;
+
+ /**
+ * Implement UnicodeMatcher API.
+ * @stable ICU 2.4
+ */
+ virtual UMatchDegree matches(const Replaceable& text,
+ int32_t& offset,
+ int32_t limit,
+ UBool incremental);
+
+ /**
+ * UnicodeFunctor API. Nothing to do.
+ * @stable ICU 2.4
+ */
+ virtual void setData(const TransliterationRuleData*);
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ *
+ * @stable ICU 2.2
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+protected:
+
+ /*
+ * Since this class has pure virtual functions,
+ * a constructor can't be used.
+ * @stable ICU 2.0
+ */
+/* UnicodeFilter();*/
+};
+
+/*inline UnicodeFilter::UnicodeFilter() {}*/
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/unifunct.h b/thirdparty/icu4c/common/unicode/unifunct.h
new file mode 100644
index 0000000000..7d31af7daf
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/unifunct.h
@@ -0,0 +1,132 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2002-2005, International Business Machines Corporation
+* and others. All Rights Reserved.
+**********************************************************************
+* Date Name Description
+* 01/14/2002 aliu Creation.
+**********************************************************************
+*/
+#ifndef UNIFUNCT_H
+#define UNIFUNCT_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+
+/**
+ * \file
+ * \brief C++ API: Unicode Functor
+ */
+
+U_NAMESPACE_BEGIN
+
+class UnicodeMatcher;
+class UnicodeReplacer;
+class TransliterationRuleData;
+
+/**
+ * <code>UnicodeFunctor</code> is an abstract base class for objects
+ * that perform match and/or replace operations on Unicode strings.
+ * @author Alan Liu
+ * @stable ICU 2.4
+ */
+class U_COMMON_API UnicodeFunctor : public UObject {
+
+public:
+
+ /**
+ * Destructor
+ * @stable ICU 2.4
+ */
+ virtual ~UnicodeFunctor();
+
+ /**
+ * Return a copy of this object. All UnicodeFunctor objects
+ * have to support cloning in order to allow classes using
+ * UnicodeFunctor to implement cloning.
+ * @stable ICU 2.4
+ */
+ virtual UnicodeFunctor* clone() const = 0;
+
+ /**
+ * Cast 'this' to a UnicodeMatcher* pointer and return the
+ * pointer, or null if this is not a UnicodeMatcher*. Subclasses
+ * that mix in UnicodeMatcher as a base class must override this.
+ * This protocol is required because a pointer to a UnicodeFunctor
+ * cannot be cast to a pointer to a UnicodeMatcher, since
+ * UnicodeMatcher is a mixin that does not derive from
+ * UnicodeFunctor.
+ * @stable ICU 2.4
+ */
+ virtual UnicodeMatcher* toMatcher() const;
+
+ /**
+ * Cast 'this' to a UnicodeReplacer* pointer and return the
+ * pointer, or null if this is not a UnicodeReplacer*. Subclasses
+ * that mix in UnicodeReplacer as a base class must override this.
+ * This protocol is required because a pointer to a UnicodeFunctor
+ * cannot be cast to a pointer to a UnicodeReplacer, since
+ * UnicodeReplacer is a mixin that does not derive from
+ * UnicodeFunctor.
+ * @stable ICU 2.4
+ */
+ virtual UnicodeReplacer* toReplacer() const;
+
+ /**
+ * Return the class ID for this class. This is useful only for
+ * comparing to a return value from getDynamicClassID().
+ * @return The class ID for all objects of this class.
+ * @stable ICU 2.0
+ */
+ static UClassID U_EXPORT2 getStaticClassID(void);
+
+ /**
+ * Returns a unique class ID <b>polymorphically</b>. This method
+ * is to implement a simple version of RTTI, since not all C++
+ * compilers support genuine RTTI. Polymorphic operator==() and
+ * clone() methods call this method.
+ *
+ * <p>Concrete subclasses of UnicodeFunctor should use the macro
+ * UOBJECT_DEFINE_RTTI_IMPLEMENTATION from uobject.h to
+ * provide definitios getStaticClassID and getDynamicClassID.
+ *
+ * @return The class ID for this object. All objects of a given
+ * class have the same class ID. Objects of other classes have
+ * different class IDs.
+ * @stable ICU 2.4
+ */
+ virtual UClassID getDynamicClassID(void) const = 0;
+
+ /**
+ * Set the data object associated with this functor. The data
+ * object provides context for functor-to-standin mapping. This
+ * method is required when assigning a functor to a different data
+ * object. This function MAY GO AWAY later if the architecture is
+ * changed to pass data object pointers through the API.
+ * @internal ICU 2.1
+ */
+ virtual void setData(const TransliterationRuleData*) = 0;
+
+protected:
+
+ /**
+ * Since this class has pure virtual functions,
+ * a constructor can't be used.
+ * @stable ICU 2.0
+ */
+ /*UnicodeFunctor();*/
+
+};
+
+/*inline UnicodeFunctor::UnicodeFunctor() {}*/
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/unimatch.h b/thirdparty/icu4c/common/unicode/unimatch.h
new file mode 100644
index 0000000000..302332f455
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/unimatch.h
@@ -0,0 +1,168 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+* Copyright (C) 2001-2005, International Business Machines Corporation and others. All Rights Reserved.
+**********************************************************************
+* Date Name Description
+* 07/18/01 aliu Creation.
+**********************************************************************
+*/
+#ifndef UNIMATCH_H
+#define UNIMATCH_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C++ API: Unicode Matcher
+ */
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+class Replaceable;
+class UnicodeString;
+class UnicodeSet;
+
+/**
+ * Constants returned by <code>UnicodeMatcher::matches()</code>
+ * indicating the degree of match.
+ * @stable ICU 2.4
+ */
+enum UMatchDegree {
+ /**
+ * Constant returned by <code>matches()</code> indicating a
+ * mismatch between the text and this matcher. The text contains
+ * a character which does not match, or the text does not contain
+ * all desired characters for a non-incremental match.
+ * @stable ICU 2.4
+ */
+ U_MISMATCH,
+
+ /**
+ * Constant returned by <code>matches()</code> indicating a
+ * partial match between the text and this matcher. This value is
+ * only returned for incremental match operations. All characters
+ * of the text match, but more characters are required for a
+ * complete match. Alternatively, for variable-length matchers,
+ * all characters of the text match, and if more characters were
+ * supplied at limit, they might also match.
+ * @stable ICU 2.4
+ */
+ U_PARTIAL_MATCH,
+
+ /**
+ * Constant returned by <code>matches()</code> indicating a
+ * complete match between the text and this matcher. For an
+ * incremental variable-length match, this value is returned if
+ * the given text matches, and it is known that additional
+ * characters would not alter the extent of the match.
+ * @stable ICU 2.4
+ */
+ U_MATCH
+};
+
+/**
+ * <code>UnicodeMatcher</code> defines a protocol for objects that can
+ * match a range of characters in a Replaceable string.
+ * @stable ICU 2.4
+ */
+class U_COMMON_API UnicodeMatcher /* not : public UObject because this is an interface/mixin class */ {
+
+public:
+ /**
+ * Destructor.
+ * @stable ICU 2.4
+ */
+ virtual ~UnicodeMatcher();
+
+ /**
+ * Return a UMatchDegree value indicating the degree of match for
+ * the given text at the given offset. Zero, one, or more
+ * characters may be matched.
+ *
+ * Matching in the forward direction is indicated by limit >
+ * offset. Characters from offset forwards to limit-1 will be
+ * considered for matching.
+ *
+ * Matching in the reverse direction is indicated by limit <
+ * offset. Characters from offset backwards to limit+1 will be
+ * considered for matching.
+ *
+ * If limit == offset then the only match possible is a zero
+ * character match (which subclasses may implement if desired).
+ *
+ * As a side effect, advance the offset parameter to the limit of
+ * the matched substring. In the forward direction, this will be
+ * the index of the last matched character plus one. In the
+ * reverse direction, this will be the index of the last matched
+ * character minus one.
+ *
+ * <p>Note: This method is not const because some classes may
+ * modify their state as the result of a match.
+ *
+ * @param text the text to be matched
+ * @param offset on input, the index into text at which to begin
+ * matching. On output, the limit of the matched text. The
+ * number of matched characters is the output value of offset
+ * minus the input value. Offset should always point to the
+ * HIGH SURROGATE (leading code unit) of a pair of surrogates,
+ * both on entry and upon return.
+ * @param limit the limit index of text to be matched. Greater
+ * than offset for a forward direction match, less than offset for
+ * a backward direction match. The last character to be
+ * considered for matching will be text.charAt(limit-1) in the
+ * forward direction or text.charAt(limit+1) in the backward
+ * direction.
+ * @param incremental if true, then assume further characters may
+ * be inserted at limit and check for partial matching. Otherwise
+ * assume the text as given is complete.
+ * @return a match degree value indicating a full match, a partial
+ * match, or a mismatch. If incremental is false then
+ * U_PARTIAL_MATCH should never be returned.
+ * @stable ICU 2.4
+ */
+ virtual UMatchDegree matches(const Replaceable& text,
+ int32_t& offset,
+ int32_t limit,
+ UBool incremental) = 0;
+
+ /**
+ * Returns a string representation of this matcher. If the result of
+ * calling this function is passed to the appropriate parser, it
+ * will produce another matcher that is equal to this one.
+ * @param result the string to receive the pattern. Previous
+ * contents will be deleted.
+ * @param escapeUnprintable if true then convert unprintable
+ * character to their hex escape representations, \\uxxxx or
+ * \\Uxxxxxxxx. Unprintable characters are those other than
+ * U+000A, U+0020..U+007E.
+ * @stable ICU 2.4
+ */
+ virtual UnicodeString& toPattern(UnicodeString& result,
+ UBool escapeUnprintable = false) const = 0;
+
+ /**
+ * Returns true if this matcher will match a character c, where c
+ * & 0xFF == v, at offset, in the forward direction (with limit >
+ * offset). This is used by <tt>RuleBasedTransliterator</tt> for
+ * indexing.
+ * @stable ICU 2.4
+ */
+ virtual UBool matchesIndexValue(uint8_t v) const = 0;
+
+ /**
+ * Union the set of all characters that may be matched by this object
+ * into the given set.
+ * @param toUnionTo the set into which to union the source characters
+ * @stable ICU 2.4
+ */
+ virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0;
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/uniset.h b/thirdparty/icu4c/common/unicode/uniset.h
new file mode 100644
index 0000000000..50b6360f3a
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/uniset.h
@@ -0,0 +1,1744 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+***************************************************************************
+* Copyright (C) 1999-2016, International Business Machines Corporation
+* and others. All Rights Reserved.
+***************************************************************************
+* Date Name Description
+* 10/20/99 alan Creation.
+***************************************************************************
+*/
+
+#ifndef UNICODESET_H
+#define UNICODESET_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/ucpmap.h"
+#include "unicode/unifilt.h"
+#include "unicode/unistr.h"
+#include "unicode/uset.h"
+
+/**
+ * \file
+ * \brief C++ API: Unicode Set
+ */
+
+U_NAMESPACE_BEGIN
+
+// Forward Declarations.
+class BMPSet;
+class ParsePosition;
+class RBBIRuleScanner;
+class SymbolTable;
+class UnicodeSetStringSpan;
+class UVector;
+class RuleCharacterIterator;
+
+/**
+ * A mutable set of Unicode characters and multicharacter strings. Objects of this class
+ * represent <em>character classes</em> used in regular expressions.
+ * A character specifies a subset of Unicode code points. Legal
+ * code points are U+0000 to U+10FFFF, inclusive.
+ *
+ * <p>The UnicodeSet class is not designed to be subclassed.
+ *
+ * <p><code>UnicodeSet</code> supports two APIs. The first is the
+ * <em>operand</em> API that allows the caller to modify the value of
+ * a <code>UnicodeSet</code> object. It conforms to Java 2's
+ * <code>java.util.Set</code> interface, although
+ * <code>UnicodeSet</code> does not actually implement that
+ * interface. All methods of <code>Set</code> are supported, with the
+ * modification that they take a character range or single character
+ * instead of an <code>Object</code>, and they take a
+ * <code>UnicodeSet</code> instead of a <code>Collection</code>. The
+ * operand API may be thought of in terms of boolean logic: a boolean
+ * OR is implemented by <code>add</code>, a boolean AND is implemented
+ * by <code>retain</code>, a boolean XOR is implemented by
+ * <code>complement</code> taking an argument, and a boolean NOT is
+ * implemented by <code>complement</code> with no argument. In terms
+ * of traditional set theory function names, <code>add</code> is a
+ * union, <code>retain</code> is an intersection, <code>remove</code>
+ * is an asymmetric difference, and <code>complement</code> with no
+ * argument is a set complement with respect to the superset range
+ * <code>MIN_VALUE-MAX_VALUE</code>
+ *
+ * <p>The second API is the
+ * <code>applyPattern()</code>/<code>toPattern()</code> API from the
+ * <code>java.text.Format</code>-derived classes. Unlike the
+ * methods that add characters, add categories, and control the logic
+ * of the set, the method <code>applyPattern()</code> sets all
+ * attributes of a <code>UnicodeSet</code> at once, based on a
+ * string pattern.
+ *
+ * <p><b>Pattern syntax</b></p>
+ *
+ * Patterns are accepted by the constructors and the
+ * <code>applyPattern()</code> methods and returned by the
+ * <code>toPattern()</code> method. These patterns follow a syntax
+ * similar to that employed by version 8 regular expression character
+ * classes. Here are some simple examples:
+ *
+ * \htmlonly<blockquote>\endhtmlonly
+ * <table>
+ * <tr align="top">
+ * <td nowrap valign="top" align="left"><code>[]</code></td>
+ * <td valign="top">No characters</td>
+ * </tr><tr align="top">
+ * <td nowrap valign="top" align="left"><code>[a]</code></td>
+ * <td valign="top">The character 'a'</td>
+ * </tr><tr align="top">
+ * <td nowrap valign="top" align="left"><code>[ae]</code></td>
+ * <td valign="top">The characters 'a' and 'e'</td>
+ * </tr>
+ * <tr>
+ * <td nowrap valign="top" align="left"><code>[a-e]</code></td>
+ * <td valign="top">The characters 'a' through 'e' inclusive, in Unicode code
+ * point order</td>
+ * </tr>
+ * <tr>
+ * <td nowrap valign="top" align="left"><code>[\\u4E01]</code></td>
+ * <td valign="top">The character U+4E01</td>
+ * </tr>
+ * <tr>
+ * <td nowrap valign="top" align="left"><code>[a{ab}{ac}]</code></td>
+ * <td valign="top">The character 'a' and the multicharacter strings &quot;ab&quot; and
+ * &quot;ac&quot;</td>
+ * </tr>
+ * <tr>
+ * <td nowrap valign="top" align="left"><code>[\\p{Lu}]</code></td>
+ * <td valign="top">All characters in the general category Uppercase Letter</td>
+ * </tr>
+ * </table>
+ * \htmlonly</blockquote>\endhtmlonly
+ *
+ * Any character may be preceded by a backslash in order to remove any special
+ * meaning. White space characters, as defined by UCharacter.isWhitespace(), are
+ * ignored, unless they are escaped.
+ *
+ * <p>Property patterns specify a set of characters having a certain
+ * property as defined by the Unicode standard. Both the POSIX-like
+ * "[:Lu:]" and the Perl-like syntax "\\p{Lu}" are recognized. For a
+ * complete list of supported property patterns, see the User's Guide
+ * for UnicodeSet at
+ * <a href="http://icu-project.org/userguide/unicodeSet.html">
+ * http://icu-project.org/userguide/unicodeSet.html</a>.
+ * Actual determination of property data is defined by the underlying
+ * Unicode database as implemented by UCharacter.
+ *
+ * <p>Patterns specify individual characters, ranges of characters, and
+ * Unicode property sets. When elements are concatenated, they
+ * specify their union. To complement a set, place a '^' immediately
+ * after the opening '['. Property patterns are inverted by modifying
+ * their delimiters; "[:^foo]" and "\\P{foo}". In any other location,
+ * '^' has no special meaning.
+ *
+ * <p>Ranges are indicated by placing two a '-' between two
+ * characters, as in "a-z". This specifies the range of all
+ * characters from the left to the right, in Unicode order. If the
+ * left character is greater than or equal to the
+ * right character it is a syntax error. If a '-' occurs as the first
+ * character after the opening '[' or '[^', or if it occurs as the
+ * last character before the closing ']', then it is taken as a
+ * literal. Thus "[a\-b]", "[-ab]", and "[ab-]" all indicate the same
+ * set of three characters, 'a', 'b', and '-'.
+ *
+ * <p>Sets may be intersected using the '&' operator or the asymmetric
+ * set difference may be taken using the '-' operator, for example,
+ * "[[:L:]&[\\u0000-\\u0FFF]]" indicates the set of all Unicode letters
+ * with values less than 4096. Operators ('&' and '|') have equal
+ * precedence and bind left-to-right. Thus
+ * "[[:L:]-[a-z]-[\\u0100-\\u01FF]]" is equivalent to
+ * "[[[:L:]-[a-z]]-[\\u0100-\\u01FF]]". This only really matters for
+ * difference; intersection is commutative.
+ *
+ * <table>
+ * <tr valign=top><td nowrap><code>[a]</code><td>The set containing 'a'
+ * <tr valign=top><td nowrap><code>[a-z]</code><td>The set containing 'a'
+ * through 'z' and all letters in between, in Unicode order
+ * <tr valign=top><td nowrap><code>[^a-z]</code><td>The set containing
+ * all characters but 'a' through 'z',
+ * that is, U+0000 through 'a'-1 and 'z'+1 through U+10FFFF
+ * <tr valign=top><td nowrap><code>[[<em>pat1</em>][<em>pat2</em>]]</code>
+ * <td>The union of sets specified by <em>pat1</em> and <em>pat2</em>
+ * <tr valign=top><td nowrap><code>[[<em>pat1</em>]&[<em>pat2</em>]]</code>
+ * <td>The intersection of sets specified by <em>pat1</em> and <em>pat2</em>
+ * <tr valign=top><td nowrap><code>[[<em>pat1</em>]-[<em>pat2</em>]]</code>
+ * <td>The asymmetric difference of sets specified by <em>pat1</em> and
+ * <em>pat2</em>
+ * <tr valign=top><td nowrap><code>[:Lu:] or \\p{Lu}</code>
+ * <td>The set of characters having the specified
+ * Unicode property; in
+ * this case, Unicode uppercase letters
+ * <tr valign=top><td nowrap><code>[:^Lu:] or \\P{Lu}</code>
+ * <td>The set of characters <em>not</em> having the given
+ * Unicode property
+ * </table>
+ *
+ * <p><b>Warning</b>: you cannot add an empty string ("") to a UnicodeSet.</p>
+ *
+ * <p><b>Formal syntax</b></p>
+ *
+ * \htmlonly<blockquote>\endhtmlonly
+ * <table>
+ * <tr align="top">
+ * <td nowrap valign="top" align="right"><code>pattern :=&nbsp; </code></td>
+ * <td valign="top"><code>('[' '^'? item* ']') |
+ * property</code></td>
+ * </tr>
+ * <tr align="top">
+ * <td nowrap valign="top" align="right"><code>item :=&nbsp; </code></td>
+ * <td valign="top"><code>char | (char '-' char) | pattern-expr<br>
+ * </code></td>
+ * </tr>
+ * <tr align="top">
+ * <td nowrap valign="top" align="right"><code>pattern-expr :=&nbsp; </code></td>
+ * <td valign="top"><code>pattern | pattern-expr pattern |
+ * pattern-expr op pattern<br>
+ * </code></td>
+ * </tr>
+ * <tr align="top">
+ * <td nowrap valign="top" align="right"><code>op :=&nbsp; </code></td>
+ * <td valign="top"><code>'&amp;' | '-'<br>
+ * </code></td>
+ * </tr>
+ * <tr align="top">
+ * <td nowrap valign="top" align="right"><code>special :=&nbsp; </code></td>
+ * <td valign="top"><code>'[' | ']' | '-'<br>
+ * </code></td>
+ * </tr>
+ * <tr align="top">
+ * <td nowrap valign="top" align="right"><code>char :=&nbsp; </code></td>
+ * <td valign="top"><em>any character that is not</em><code> special<br>
+ * | ('\' </code><em>any character</em><code>)<br>
+ * | ('\\u' hex hex hex hex)<br>
+ * </code></td>
+ * </tr>
+ * <tr align="top">
+ * <td nowrap valign="top" align="right"><code>hex :=&nbsp; </code></td>
+ * <td valign="top"><em>any character for which
+ * </em><code>Character.digit(c, 16)</code><em>
+ * returns a non-negative result</em></td>
+ * </tr>
+ * <tr>
+ * <td nowrap valign="top" align="right"><code>property :=&nbsp; </code></td>
+ * <td valign="top"><em>a Unicode property set pattern</em></td>
+ * </tr>
+ * </table>
+ * <br>
+ * <table border="1">
+ * <tr>
+ * <td>Legend: <table>
+ * <tr>
+ * <td nowrap valign="top"><code>a := b</code></td>
+ * <td width="20" valign="top">&nbsp; </td>
+ * <td valign="top"><code>a</code> may be replaced by <code>b</code> </td>
+ * </tr>
+ * <tr>
+ * <td nowrap valign="top"><code>a?</code></td>
+ * <td valign="top"></td>
+ * <td valign="top">zero or one instance of <code>a</code><br>
+ * </td>
+ * </tr>
+ * <tr>
+ * <td nowrap valign="top"><code>a*</code></td>
+ * <td valign="top"></td>
+ * <td valign="top">one or more instances of <code>a</code><br>
+ * </td>
+ * </tr>
+ * <tr>
+ * <td nowrap valign="top"><code>a | b</code></td>
+ * <td valign="top"></td>
+ * <td valign="top">either <code>a</code> or <code>b</code><br>
+ * </td>
+ * </tr>
+ * <tr>
+ * <td nowrap valign="top"><code>'a'</code></td>
+ * <td valign="top"></td>
+ * <td valign="top">the literal string between the quotes </td>
+ * </tr>
+ * </table>
+ * </td>
+ * </tr>
+ * </table>
+ * \htmlonly</blockquote>\endhtmlonly
+ *
+ * <p>Note:
+ * - Most UnicodeSet methods do not take a UErrorCode parameter because
+ * there are usually very few opportunities for failure other than a shortage
+ * of memory, error codes in low-level C++ string methods would be inconvenient,
+ * and the error code as the last parameter (ICU convention) would prevent
+ * the use of default parameter values.
+ * Instead, such methods set the UnicodeSet into a "bogus" state
+ * (see isBogus()) if an error occurs.
+ *
+ * @author Alan Liu
+ * @stable ICU 2.0
+ */
+class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter {
+private:
+ /**
+ * Enough for sets with few ranges.
+ * For example, White_Space has 10 ranges, list length 21.
+ */
+ static constexpr int32_t INITIAL_CAPACITY = 25;
+ // fFlags constant
+ static constexpr uint8_t kIsBogus = 1; // This set is bogus (i.e. not valid)
+
+ UChar32* list = stackList; // MUST be terminated with HIGH
+ int32_t capacity = INITIAL_CAPACITY; // capacity of list
+ int32_t len = 1; // length of list used; 1 <= len <= capacity
+ uint8_t fFlags = 0; // Bit flag (see constants above)
+
+ BMPSet *bmpSet = nullptr; // The set is frozen iff either bmpSet or stringSpan is not NULL.
+ UChar32* buffer = nullptr; // internal buffer, may be NULL
+ int32_t bufferCapacity = 0; // capacity of buffer
+
+ /**
+ * The pattern representation of this set. This may not be the
+ * most economical pattern. It is the pattern supplied to
+ * applyPattern(), with variables substituted and whitespace
+ * removed. For sets constructed without applyPattern(), or
+ * modified using the non-pattern API, this string will be empty,
+ * indicating that toPattern() must generate a pattern
+ * representation from the inversion list.
+ */
+ char16_t *pat = nullptr;
+ int32_t patLen = 0;
+
+ UVector* strings = nullptr; // maintained in sorted order
+ UnicodeSetStringSpan *stringSpan = nullptr;
+
+ /**
+ * Initial list array.
+ * Avoids some heap allocations, and list is never nullptr.
+ * Increases the object size a bit.
+ */
+ UChar32 stackList[INITIAL_CAPACITY];
+
+public:
+ /**
+ * Determine if this object contains a valid set.
+ * A bogus set has no value. It is different from an empty set.
+ * It can be used to indicate that no set value is available.
+ *
+ * @return true if the set is bogus/invalid, false otherwise
+ * @see setToBogus()
+ * @stable ICU 4.0
+ */
+ inline UBool isBogus(void) const;
+
+ /**
+ * Make this UnicodeSet object invalid.
+ * The string will test true with isBogus().
+ *
+ * A bogus set has no value. It is different from an empty set.
+ * It can be used to indicate that no set value is available.
+ *
+ * This utility function is used throughout the UnicodeSet
+ * implementation to indicate that a UnicodeSet operation failed,
+ * and may be used in other functions,
+ * especially but not exclusively when such functions do not
+ * take a UErrorCode for simplicity.
+ *
+ * @see isBogus()
+ * @stable ICU 4.0
+ */
+ void setToBogus();
+
+public:
+
+ enum {
+ /**
+ * Minimum value that can be stored in a UnicodeSet.
+ * @stable ICU 2.4
+ */
+ MIN_VALUE = 0,
+
+ /**
+ * Maximum value that can be stored in a UnicodeSet.
+ * @stable ICU 2.4
+ */
+ MAX_VALUE = 0x10ffff
+ };
+
+ //----------------------------------------------------------------
+ // Constructors &c
+ //----------------------------------------------------------------
+
+public:
+
+ /**
+ * Constructs an empty set.
+ * @stable ICU 2.0
+ */
+ UnicodeSet();
+
+ /**
+ * Constructs a set containing the given range. If <code>end <
+ * start</code> then an empty set is created.
+ *
+ * @param start first character, inclusive, of range
+ * @param end last character, inclusive, of range
+ * @stable ICU 2.4
+ */
+ UnicodeSet(UChar32 start, UChar32 end);
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * @internal
+ */
+ enum ESerialization {
+ kSerialized /* result of serialize() */
+ };
+
+ /**
+ * Constructs a set from the output of serialize().
+ *
+ * @param buffer the 16 bit array
+ * @param bufferLen the original length returned from serialize()
+ * @param serialization the value 'kSerialized'
+ * @param status error code
+ *
+ * @internal
+ */
+ UnicodeSet(const uint16_t buffer[], int32_t bufferLen,
+ ESerialization serialization, UErrorCode &status);
+#endif /* U_HIDE_INTERNAL_API */
+
+ /**
+ * Constructs a set from the given pattern. See the class
+ * description for the syntax of the pattern language.
+ * @param pattern a string specifying what characters are in the set
+ * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
+ * contains a syntax error.
+ * @stable ICU 2.0
+ */
+ UnicodeSet(const UnicodeString& pattern,
+ UErrorCode& status);
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * Constructs a set from the given pattern. See the class
+ * description for the syntax of the pattern language.
+ * @param pattern a string specifying what characters are in the set
+ * @param options bitmask for options to apply to the pattern.
+ * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * @param symbols a symbol table mapping variable names to values
+ * and stand-in characters to UnicodeSets; may be NULL
+ * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
+ * contains a syntax error.
+ * @internal
+ */
+ UnicodeSet(const UnicodeString& pattern,
+ uint32_t options,
+ const SymbolTable* symbols,
+ UErrorCode& status);
+#endif /* U_HIDE_INTERNAL_API */
+
+ /**
+ * Constructs a set from the given pattern. See the class description
+ * for the syntax of the pattern language.
+ * @param pattern a string specifying what characters are in the set
+ * @param pos on input, the position in pattern at which to start parsing.
+ * On output, the position after the last character parsed.
+ * @param options bitmask for options to apply to the pattern.
+ * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * @param symbols a symbol table mapping variable names to values
+ * and stand-in characters to UnicodeSets; may be NULL
+ * @param status input-output error code
+ * @stable ICU 2.8
+ */
+ UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
+ uint32_t options,
+ const SymbolTable* symbols,
+ UErrorCode& status);
+
+ /**
+ * Constructs a set that is identical to the given UnicodeSet.
+ * @stable ICU 2.0
+ */
+ UnicodeSet(const UnicodeSet& o);
+
+ /**
+ * Destructs the set.
+ * @stable ICU 2.0
+ */
+ virtual ~UnicodeSet();
+
+ /**
+ * Assigns this object to be a copy of another.
+ * A frozen set will not be modified.
+ * @stable ICU 2.0
+ */
+ UnicodeSet& operator=(const UnicodeSet& o);
+
+ /**
+ * Compares the specified object with this set for equality. Returns
+ * <tt>true</tt> if the two sets
+ * have the same size, and every member of the specified set is
+ * contained in this set (or equivalently, every member of this set is
+ * contained in the specified set).
+ *
+ * @param o set to be compared for equality with this set.
+ * @return <tt>true</tt> if the specified set is equal to this set.
+ * @stable ICU 2.0
+ */
+ virtual UBool operator==(const UnicodeSet& o) const;
+
+ /**
+ * Compares the specified object with this set for equality. Returns
+ * <tt>true</tt> if the specified set is not equal to this set.
+ * @stable ICU 2.0
+ */
+ inline UBool operator!=(const UnicodeSet& o) const;
+
+ /**
+ * Returns a copy of this object. All UnicodeFunctor objects have
+ * to support cloning in order to allow classes using
+ * UnicodeFunctors, such as Transliterator, to implement cloning.
+ * If this set is frozen, then the clone will be frozen as well.
+ * Use cloneAsThawed() for a mutable clone of a frozen set.
+ * @see cloneAsThawed
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet* clone() const;
+
+ /**
+ * Returns the hash code value for this set.
+ *
+ * @return the hash code value for this set.
+ * @see Object#hashCode()
+ * @stable ICU 2.0
+ */
+ virtual int32_t hashCode(void) const;
+
+ /**
+ * Get a UnicodeSet pointer from a USet
+ *
+ * @param uset a USet (the ICU plain C type for UnicodeSet)
+ * @return the corresponding UnicodeSet pointer.
+ *
+ * @stable ICU 4.2
+ */
+ inline static UnicodeSet *fromUSet(USet *uset);
+
+ /**
+ * Get a UnicodeSet pointer from a const USet
+ *
+ * @param uset a const USet (the ICU plain C type for UnicodeSet)
+ * @return the corresponding UnicodeSet pointer.
+ *
+ * @stable ICU 4.2
+ */
+ inline static const UnicodeSet *fromUSet(const USet *uset);
+
+ /**
+ * Produce a USet * pointer for this UnicodeSet.
+ * USet is the plain C type for UnicodeSet
+ *
+ * @return a USet pointer for this UnicodeSet
+ * @stable ICU 4.2
+ */
+ inline USet *toUSet();
+
+
+ /**
+ * Produce a const USet * pointer for this UnicodeSet.
+ * USet is the plain C type for UnicodeSet
+ *
+ * @return a const USet pointer for this UnicodeSet
+ * @stable ICU 4.2
+ */
+ inline const USet * toUSet() const;
+
+
+ //----------------------------------------------------------------
+ // Freezable API
+ //----------------------------------------------------------------
+
+ /**
+ * Determines whether the set has been frozen (made immutable) or not.
+ * See the ICU4J Freezable interface for details.
+ * @return true/false for whether the set has been frozen
+ * @see freeze
+ * @see cloneAsThawed
+ * @stable ICU 3.8
+ */
+ inline UBool isFrozen() const;
+
+ /**
+ * Freeze the set (make it immutable).
+ * Once frozen, it cannot be unfrozen and is therefore thread-safe
+ * until it is deleted.
+ * See the ICU4J Freezable interface for details.
+ * Freezing the set may also make some operations faster, for example
+ * contains() and span().
+ * A frozen set will not be modified. (It remains frozen.)
+ * @return this set.
+ * @see isFrozen
+ * @see cloneAsThawed
+ * @stable ICU 3.8
+ */
+ UnicodeSet *freeze();
+
+ /**
+ * Clone the set and make the clone mutable.
+ * See the ICU4J Freezable interface for details.
+ * @return the mutable clone
+ * @see freeze
+ * @see isFrozen
+ * @stable ICU 3.8
+ */
+ UnicodeSet *cloneAsThawed() const;
+
+ //----------------------------------------------------------------
+ // Public API
+ //----------------------------------------------------------------
+
+ /**
+ * Make this object represent the range `start - end`.
+ * If `end > start` then this object is set to an empty range.
+ * A frozen set will not be modified.
+ *
+ * @param start first character in the set, inclusive
+ * @param end last character in the set, inclusive
+ * @stable ICU 2.4
+ */
+ UnicodeSet& set(UChar32 start, UChar32 end);
+
+ /**
+ * Return true if the given position, in the given pattern, appears
+ * to be the start of a UnicodeSet pattern.
+ * @stable ICU 2.4
+ */
+ static UBool resemblesPattern(const UnicodeString& pattern,
+ int32_t pos);
+
+ /**
+ * Modifies this set to represent the set specified by the given
+ * pattern, ignoring Unicode Pattern_White_Space characters.
+ * See the class description for the syntax of the pattern language.
+ * A frozen set will not be modified.
+ * @param pattern a string specifying what characters are in the set
+ * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
+ * contains a syntax error.
+ * <em> Empties the set passed before applying the pattern.</em>
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeSet& applyPattern(const UnicodeString& pattern,
+ UErrorCode& status);
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * Modifies this set to represent the set specified by the given
+ * pattern, optionally ignoring Unicode Pattern_White_Space characters.
+ * See the class description for the syntax of the pattern language.
+ * A frozen set will not be modified.
+ * @param pattern a string specifying what characters are in the set
+ * @param options bitmask for options to apply to the pattern.
+ * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * @param symbols a symbol table mapping variable names to
+ * values and stand-ins to UnicodeSets; may be NULL
+ * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
+ * contains a syntax error.
+ *<em> Empties the set passed before applying the pattern.</em>
+ * @return a reference to this
+ * @internal
+ */
+ UnicodeSet& applyPattern(const UnicodeString& pattern,
+ uint32_t options,
+ const SymbolTable* symbols,
+ UErrorCode& status);
+#endif /* U_HIDE_INTERNAL_API */
+
+ /**
+ * Parses the given pattern, starting at the given position. The
+ * character at pattern.charAt(pos.getIndex()) must be '[', or the
+ * parse fails. Parsing continues until the corresponding closing
+ * ']'. If a syntax error is encountered between the opening and
+ * closing brace, the parse fails. Upon return from a successful
+ * parse, the ParsePosition is updated to point to the character
+ * following the closing ']', and a StringBuffer containing a
+ * pairs list for the parsed pattern is returned. This method calls
+ * itself recursively to parse embedded subpatterns.
+ *<em> Empties the set passed before applying the pattern.</em>
+ * A frozen set will not be modified.
+ *
+ * @param pattern the string containing the pattern to be parsed.
+ * The portion of the string from pos.getIndex(), which must be a
+ * '[', to the corresponding closing ']', is parsed.
+ * @param pos upon entry, the position at which to being parsing.
+ * The character at pattern.charAt(pos.getIndex()) must be a '['.
+ * Upon return from a successful parse, pos.getIndex() is either
+ * the character after the closing ']' of the parsed pattern, or
+ * pattern.length() if the closing ']' is the last character of
+ * the pattern string.
+ * @param options bitmask for options to apply to the pattern.
+ * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * @param symbols a symbol table mapping variable names to
+ * values and stand-ins to UnicodeSets; may be NULL
+ * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
+ * contains a syntax error.
+ * @return a reference to this
+ * @stable ICU 2.8
+ */
+ UnicodeSet& applyPattern(const UnicodeString& pattern,
+ ParsePosition& pos,
+ uint32_t options,
+ const SymbolTable* symbols,
+ UErrorCode& status);
+
+ /**
+ * Returns a string representation of this set. If the result of
+ * calling this function is passed to a UnicodeSet constructor, it
+ * will produce another set that is equal to this one.
+ * A frozen set will not be modified.
+ * @param result the string to receive the rules. Previous
+ * contents will be deleted.
+ * @param escapeUnprintable if true then convert unprintable
+ * character to their hex escape representations, \\uxxxx or
+ * \\Uxxxxxxxx. Unprintable characters are those other than
+ * U+000A, U+0020..U+007E.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeString& toPattern(UnicodeString& result,
+ UBool escapeUnprintable = false) const;
+
+ /**
+ * Modifies this set to contain those code points which have the given value
+ * for the given binary or enumerated property, as returned by
+ * u_getIntPropertyValue. Prior contents of this set are lost.
+ * A frozen set will not be modified.
+ *
+ * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
+ * or UCHAR_INT_START..UCHAR_INT_LIMIT-1
+ * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
+ *
+ * @param value a value in the range u_getIntPropertyMinValue(prop)..
+ * u_getIntPropertyMaxValue(prop), with one exception. If prop is
+ * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but
+ * rather a mask value produced by U_GET_GC_MASK(). This allows grouped
+ * categories such as [:L:] to be represented.
+ *
+ * @param ec error code input/output parameter
+ *
+ * @return a reference to this set
+ *
+ * @stable ICU 2.4
+ */
+ UnicodeSet& applyIntPropertyValue(UProperty prop,
+ int32_t value,
+ UErrorCode& ec);
+
+ /**
+ * Modifies this set to contain those code points which have the
+ * given value for the given property. Prior contents of this
+ * set are lost.
+ * A frozen set will not be modified.
+ *
+ * @param prop a property alias, either short or long. The name is matched
+ * loosely. See PropertyAliases.txt for names and a description of loose
+ * matching. If the value string is empty, then this string is interpreted
+ * as either a General_Category value alias, a Script value alias, a binary
+ * property alias, or a special ID. Special IDs are matched loosely and
+ * correspond to the following sets:
+ *
+ * "ANY" = [\\u0000-\\U0010FFFF],
+ * "ASCII" = [\\u0000-\\u007F],
+ * "Assigned" = [:^Cn:].
+ *
+ * @param value a value alias, either short or long. The name is matched
+ * loosely. See PropertyValueAliases.txt for names and a description of
+ * loose matching. In addition to aliases listed, numeric values and
+ * canonical combining classes may be expressed numerically, e.g., ("nv",
+ * "0.5") or ("ccc", "220"). The value string may also be empty.
+ *
+ * @param ec error code input/output parameter
+ *
+ * @return a reference to this set
+ *
+ * @stable ICU 2.4
+ */
+ UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
+ const UnicodeString& value,
+ UErrorCode& ec);
+
+ /**
+ * Returns the number of elements in this set (its cardinality).
+ * Note than the elements of a set may include both individual
+ * codepoints and strings.
+ *
+ * @return the number of elements in this set (its cardinality).
+ * @stable ICU 2.0
+ */
+ virtual int32_t size(void) const;
+
+ /**
+ * Returns <tt>true</tt> if this set contains no elements.
+ *
+ * @return <tt>true</tt> if this set contains no elements.
+ * @stable ICU 2.0
+ */
+ virtual UBool isEmpty(void) const;
+
+ /**
+ * Returns true if this set contains the given character.
+ * This function works faster with a frozen set.
+ * @param c character to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 2.0
+ */
+ virtual UBool contains(UChar32 c) const;
+
+ /**
+ * Returns true if this set contains every character
+ * of the given range.
+ * @param start first character, inclusive, of the range
+ * @param end last character, inclusive, of the range
+ * @return true if the test condition is met
+ * @stable ICU 2.0
+ */
+ virtual UBool contains(UChar32 start, UChar32 end) const;
+
+ /**
+ * Returns <tt>true</tt> if this set contains the given
+ * multicharacter string.
+ * @param s string to be checked for containment
+ * @return <tt>true</tt> if this set contains the specified string
+ * @stable ICU 2.4
+ */
+ UBool contains(const UnicodeString& s) const;
+
+ /**
+ * Returns true if this set contains all the characters and strings
+ * of the given set.
+ * @param c set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 2.4
+ */
+ virtual UBool containsAll(const UnicodeSet& c) const;
+
+ /**
+ * Returns true if this set contains all the characters
+ * of the given string.
+ * @param s string containing characters to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 2.4
+ */
+ UBool containsAll(const UnicodeString& s) const;
+
+ /**
+ * Returns true if this set contains none of the characters
+ * of the given range.
+ * @param start first character, inclusive, of the range
+ * @param end last character, inclusive, of the range
+ * @return true if the test condition is met
+ * @stable ICU 2.4
+ */
+ UBool containsNone(UChar32 start, UChar32 end) const;
+
+ /**
+ * Returns true if this set contains none of the characters and strings
+ * of the given set.
+ * @param c set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 2.4
+ */
+ UBool containsNone(const UnicodeSet& c) const;
+
+ /**
+ * Returns true if this set contains none of the characters
+ * of the given string.
+ * @param s string containing characters to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 2.4
+ */
+ UBool containsNone(const UnicodeString& s) const;
+
+ /**
+ * Returns true if this set contains one or more of the characters
+ * in the given range.
+ * @param start first character, inclusive, of the range
+ * @param end last character, inclusive, of the range
+ * @return true if the condition is met
+ * @stable ICU 2.4
+ */
+ inline UBool containsSome(UChar32 start, UChar32 end) const;
+
+ /**
+ * Returns true if this set contains one or more of the characters
+ * and strings of the given set.
+ * @param s The set to be checked for containment
+ * @return true if the condition is met
+ * @stable ICU 2.4
+ */
+ inline UBool containsSome(const UnicodeSet& s) const;
+
+ /**
+ * Returns true if this set contains one or more of the characters
+ * of the given string.
+ * @param s string containing characters to be checked for containment
+ * @return true if the condition is met
+ * @stable ICU 2.4
+ */
+ inline UBool containsSome(const UnicodeString& s) const;
+
+ /**
+ * Returns the length of the initial substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Similar to the strspn() C library function.
+ * Unpaired surrogates are treated according to contains() of their surrogate code points.
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param s start of the string
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the length of the initial substring according to the spanCondition;
+ * 0 if the start of the string does not fit the spanCondition
+ * @stable ICU 3.8
+ * @see USetSpanCondition
+ */
+ int32_t span(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const;
+
+ /**
+ * Returns the end of the substring of the input string according to the USetSpanCondition.
+ * Same as <code>start+span(s.getBuffer()+start, s.length()-start, spanCondition)</code>
+ * after pinning start to 0<=start<=s.length().
+ * @param s the string
+ * @param start the start index in the string for the span operation
+ * @param spanCondition specifies the containment condition
+ * @return the exclusive end of the substring according to the spanCondition;
+ * the substring s.tempSubStringBetween(start, end) fulfills the spanCondition
+ * @stable ICU 4.4
+ * @see USetSpanCondition
+ */
+ inline int32_t span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const;
+
+ /**
+ * Returns the start of the trailing substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Unpaired surrogates are treated according to contains() of their surrogate code points.
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param s start of the string
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the start of the trailing substring according to the spanCondition;
+ * the string length if the end of the string does not fit the spanCondition
+ * @stable ICU 3.8
+ * @see USetSpanCondition
+ */
+ int32_t spanBack(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const;
+
+ /**
+ * Returns the start of the substring of the input string according to the USetSpanCondition.
+ * Same as <code>spanBack(s.getBuffer(), limit, spanCondition)</code>
+ * after pinning limit to 0<=end<=s.length().
+ * @param s the string
+ * @param limit the exclusive-end index in the string for the span operation
+ * (use s.length() or INT32_MAX for spanning back from the end of the string)
+ * @param spanCondition specifies the containment condition
+ * @return the start of the substring according to the spanCondition;
+ * the substring s.tempSubStringBetween(start, limit) fulfills the spanCondition
+ * @stable ICU 4.4
+ * @see USetSpanCondition
+ */
+ inline int32_t spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const;
+
+ /**
+ * Returns the length of the initial substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Similar to the strspn() C library function.
+ * Malformed byte sequences are treated according to contains(0xfffd).
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param s start of the string (UTF-8)
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the length of the initial substring according to the spanCondition;
+ * 0 if the start of the string does not fit the spanCondition
+ * @stable ICU 3.8
+ * @see USetSpanCondition
+ */
+ int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
+
+ /**
+ * Returns the start of the trailing substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Malformed byte sequences are treated according to contains(0xfffd).
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param s start of the string (UTF-8)
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the start of the trailing substring according to the spanCondition;
+ * the string length if the end of the string does not fit the spanCondition
+ * @stable ICU 3.8
+ * @see USetSpanCondition
+ */
+ int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
+
+ /**
+ * Implement UnicodeMatcher::matches()
+ * @stable ICU 2.4
+ */
+ virtual UMatchDegree matches(const Replaceable& text,
+ int32_t& offset,
+ int32_t limit,
+ UBool incremental);
+
+private:
+ /**
+ * Returns the longest match for s in text at the given position.
+ * If limit > start then match forward from start+1 to limit
+ * matching all characters except s.charAt(0). If limit < start,
+ * go backward starting from start-1 matching all characters
+ * except s.charAt(s.length()-1). This method assumes that the
+ * first character, text.charAt(start), matches s, so it does not
+ * check it.
+ * @param text the text to match
+ * @param start the first character to match. In the forward
+ * direction, text.charAt(start) is matched against s.charAt(0).
+ * In the reverse direction, it is matched against
+ * s.charAt(s.length()-1).
+ * @param limit the limit offset for matching, either last+1 in
+ * the forward direction, or last-1 in the reverse direction,
+ * where last is the index of the last character to match.
+ * @param s
+ * @return If part of s matches up to the limit, return |limit -
+ * start|. If all of s matches before reaching the limit, return
+ * s.length(). If there is a mismatch between s and text, return
+ * 0
+ */
+ static int32_t matchRest(const Replaceable& text,
+ int32_t start, int32_t limit,
+ const UnicodeString& s);
+
+ /**
+ * Returns the smallest value i such that c < list[i]. Caller
+ * must ensure that c is a legal value or this method will enter
+ * an infinite loop. This method performs a binary search.
+ * @param c a character in the range MIN_VALUE..MAX_VALUE
+ * inclusive
+ * @return the smallest integer i in the range 0..len-1,
+ * inclusive, such that c < list[i]
+ */
+ int32_t findCodePoint(UChar32 c) const;
+
+public:
+
+ /**
+ * Implementation of UnicodeMatcher API. Union the set of all
+ * characters that may be matched by this object into the given
+ * set.
+ * @param toUnionTo the set into which to union the source characters
+ * @stable ICU 2.4
+ */
+ virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
+
+ /**
+ * Returns the index of the given character within this set, where
+ * the set is ordered by ascending code point. If the character
+ * is not in this set, return -1. The inverse of this method is
+ * <code>charAt()</code>.
+ * @return an index from 0..size()-1, or -1
+ * @stable ICU 2.4
+ */
+ int32_t indexOf(UChar32 c) const;
+
+ /**
+ * Returns the character at the given index within this set, where
+ * the set is ordered by ascending code point. If the index is
+ * out of range, return (UChar32)-1. The inverse of this method is
+ * <code>indexOf()</code>.
+ * @param index an index from 0..size()-1
+ * @return the character at the given index, or (UChar32)-1.
+ * @stable ICU 2.4
+ */
+ UChar32 charAt(int32_t index) const;
+
+ /**
+ * Adds the specified range to this set if it is not already
+ * present. If this set already contains the specified range,
+ * the call leaves this set unchanged. If <code>end > start</code>
+ * then an empty range is added, leaving the set unchanged.
+ * This is equivalent to a boolean logic OR, or a set UNION.
+ * A frozen set will not be modified.
+ *
+ * @param start first character, inclusive, of range to be added
+ * to this set.
+ * @param end last character, inclusive, of range to be added
+ * to this set.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet& add(UChar32 start, UChar32 end);
+
+ /**
+ * Adds the specified character to this set if it is not already
+ * present. If this set already contains the specified character,
+ * the call leaves this set unchanged.
+ * A frozen set will not be modified.
+ * @stable ICU 2.0
+ */
+ UnicodeSet& add(UChar32 c);
+
+ /**
+ * Adds the specified multicharacter to this set if it is not already
+ * present. If this set already contains the multicharacter,
+ * the call leaves this set unchanged.
+ * Thus "ch" => {"ch"}
+ * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
+ * A frozen set will not be modified.
+ * @param s the source string
+ * @return this object, for chaining
+ * @stable ICU 2.4
+ */
+ UnicodeSet& add(const UnicodeString& s);
+
+ private:
+ /**
+ * @return a code point IF the string consists of a single one.
+ * otherwise returns -1.
+ * @param s string to test
+ */
+ static int32_t getSingleCP(const UnicodeString& s);
+
+ void _add(const UnicodeString& s);
+
+ public:
+ /**
+ * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
+ * If this set already any particular character, it has no effect on that character.
+ * A frozen set will not be modified.
+ * @param s the source string
+ * @return this object, for chaining
+ * @stable ICU 2.4
+ */
+ UnicodeSet& addAll(const UnicodeString& s);
+
+ /**
+ * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
+ * If this set already any particular character, it has no effect on that character.
+ * A frozen set will not be modified.
+ * @param s the source string
+ * @return this object, for chaining
+ * @stable ICU 2.4
+ */
+ UnicodeSet& retainAll(const UnicodeString& s);
+
+ /**
+ * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"}
+ * If this set already any particular character, it has no effect on that character.
+ * A frozen set will not be modified.
+ * @param s the source string
+ * @return this object, for chaining
+ * @stable ICU 2.4
+ */
+ UnicodeSet& complementAll(const UnicodeString& s);
+
+ /**
+ * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"}
+ * If this set already any particular character, it has no effect on that character.
+ * A frozen set will not be modified.
+ * @param s the source string
+ * @return this object, for chaining
+ * @stable ICU 2.4
+ */
+ UnicodeSet& removeAll(const UnicodeString& s);
+
+ /**
+ * Makes a set from a multicharacter string. Thus "ch" => {"ch"}
+ * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
+ * @param s the source string
+ * @return a newly created set containing the given string.
+ * The caller owns the return object and is responsible for deleting it.
+ * @stable ICU 2.4
+ */
+ static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s);
+
+
+ /**
+ * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"}
+ * @param s the source string
+ * @return a newly created set containing the given characters
+ * The caller owns the return object and is responsible for deleting it.
+ * @stable ICU 2.4
+ */
+ static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s);
+
+ /**
+ * Retain only the elements in this set that are contained in the
+ * specified range. If <code>end > start</code> then an empty range is
+ * retained, leaving the set empty. This is equivalent to
+ * a boolean logic AND, or a set INTERSECTION.
+ * A frozen set will not be modified.
+ *
+ * @param start first character, inclusive, of range to be retained
+ * to this set.
+ * @param end last character, inclusive, of range to be retained
+ * to this set.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet& retain(UChar32 start, UChar32 end);
+
+
+ /**
+ * Retain the specified character from this set if it is present.
+ * A frozen set will not be modified.
+ * @stable ICU 2.0
+ */
+ UnicodeSet& retain(UChar32 c);
+
+ /**
+ * Removes the specified range from this set if it is present.
+ * The set will not contain the specified range once the call
+ * returns. If <code>end > start</code> then an empty range is
+ * removed, leaving the set unchanged.
+ * A frozen set will not be modified.
+ *
+ * @param start first character, inclusive, of range to be removed
+ * from this set.
+ * @param end last character, inclusive, of range to be removed
+ * from this set.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet& remove(UChar32 start, UChar32 end);
+
+ /**
+ * Removes the specified character from this set if it is present.
+ * The set will not contain the specified range once the call
+ * returns.
+ * A frozen set will not be modified.
+ * @stable ICU 2.0
+ */
+ UnicodeSet& remove(UChar32 c);
+
+ /**
+ * Removes the specified string from this set if it is present.
+ * The set will not contain the specified character once the call
+ * returns.
+ * A frozen set will not be modified.
+ * @param s the source string
+ * @return this object, for chaining
+ * @stable ICU 2.4
+ */
+ UnicodeSet& remove(const UnicodeString& s);
+
+ /**
+ * Inverts this set. This operation modifies this set so that
+ * its value is its complement. This is equivalent to
+ * <code>complement(MIN_VALUE, MAX_VALUE)</code>.
+ * A frozen set will not be modified.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet& complement(void);
+
+ /**
+ * Complements the specified range in this set. Any character in
+ * the range will be removed if it is in this set, or will be
+ * added if it is not in this set. If <code>end > start</code>
+ * then an empty range is complemented, leaving the set unchanged.
+ * This is equivalent to a boolean logic XOR.
+ * A frozen set will not be modified.
+ *
+ * @param start first character, inclusive, of range to be removed
+ * from this set.
+ * @param end last character, inclusive, of range to be removed
+ * from this set.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet& complement(UChar32 start, UChar32 end);
+
+ /**
+ * Complements the specified character in this set. The character
+ * will be removed if it is in this set, or will be added if it is
+ * not in this set.
+ * A frozen set will not be modified.
+ * @stable ICU 2.0
+ */
+ UnicodeSet& complement(UChar32 c);
+
+ /**
+ * Complement the specified string in this set.
+ * The set will not contain the specified string once the call
+ * returns.
+ * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
+ * A frozen set will not be modified.
+ * @param s the string to complement
+ * @return this object, for chaining
+ * @stable ICU 2.4
+ */
+ UnicodeSet& complement(const UnicodeString& s);
+
+ /**
+ * Adds all of the elements in the specified set to this set if
+ * they're not already present. This operation effectively
+ * modifies this set so that its value is the <i>union</i> of the two
+ * sets. The behavior of this operation is unspecified if the specified
+ * collection is modified while the operation is in progress.
+ * A frozen set will not be modified.
+ *
+ * @param c set whose elements are to be added to this set.
+ * @see #add(UChar32, UChar32)
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet& addAll(const UnicodeSet& c);
+
+ /**
+ * Retains only the elements in this set that are contained in the
+ * specified set. In other words, removes from this set all of
+ * its elements that are not contained in the specified set. This
+ * operation effectively modifies this set so that its value is
+ * the <i>intersection</i> of the two sets.
+ * A frozen set will not be modified.
+ *
+ * @param c set that defines which elements this set will retain.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet& retainAll(const UnicodeSet& c);
+
+ /**
+ * Removes from this set all of its elements that are contained in the
+ * specified set. This operation effectively modifies this
+ * set so that its value is the <i>asymmetric set difference</i> of
+ * the two sets.
+ * A frozen set will not be modified.
+ *
+ * @param c set that defines which elements will be removed from
+ * this set.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet& removeAll(const UnicodeSet& c);
+
+ /**
+ * Complements in this set all elements contained in the specified
+ * set. Any character in the other set will be removed if it is
+ * in this set, or will be added if it is not in this set.
+ * A frozen set will not be modified.
+ *
+ * @param c set that defines which elements will be xor'ed from
+ * this set.
+ * @stable ICU 2.4
+ */
+ virtual UnicodeSet& complementAll(const UnicodeSet& c);
+
+ /**
+ * Removes all of the elements from this set. This set will be
+ * empty after this call returns.
+ * A frozen set will not be modified.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet& clear(void);
+
+ /**
+ * Close this set over the given attribute. For the attribute
+ * USET_CASE, the result is to modify this set so that:
+ *
+ * 1. For each character or string 'a' in this set, all strings or
+ * characters 'b' such that foldCase(a) == foldCase(b) are added
+ * to this set.
+ *
+ * 2. For each string 'e' in the resulting set, if e !=
+ * foldCase(e), 'e' will be removed.
+ *
+ * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}]
+ *
+ * (Here foldCase(x) refers to the operation u_strFoldCase, and a
+ * == b denotes that the contents are the same, not pointer
+ * comparison.)
+ *
+ * A frozen set will not be modified.
+ *
+ * @param attribute bitmask for attributes to close over.
+ * Currently only the USET_CASE bit is supported. Any undefined bits
+ * are ignored.
+ * @return a reference to this set.
+ * @stable ICU 4.2
+ */
+ UnicodeSet& closeOver(int32_t attribute);
+
+ /**
+ * Remove all strings from this set.
+ *
+ * @return a reference to this set.
+ * @stable ICU 4.2
+ */
+ virtual UnicodeSet &removeAllStrings();
+
+ /**
+ * Iteration method that returns the number of ranges contained in
+ * this set.
+ * @see #getRangeStart
+ * @see #getRangeEnd
+ * @stable ICU 2.4
+ */
+ virtual int32_t getRangeCount(void) const;
+
+ /**
+ * Iteration method that returns the first character in the
+ * specified range of this set.
+ * @see #getRangeCount
+ * @see #getRangeEnd
+ * @stable ICU 2.4
+ */
+ virtual UChar32 getRangeStart(int32_t index) const;
+
+ /**
+ * Iteration method that returns the last character in the
+ * specified range of this set.
+ * @see #getRangeStart
+ * @see #getRangeEnd
+ * @stable ICU 2.4
+ */
+ virtual UChar32 getRangeEnd(int32_t index) const;
+
+ /**
+ * Serializes this set into an array of 16-bit integers. Serialization
+ * (currently) only records the characters in the set; multicharacter
+ * strings are ignored.
+ *
+ * The array has following format (each line is one 16-bit
+ * integer):
+ *
+ * length = (n+2*m) | (m!=0?0x8000:0)
+ * bmpLength = n; present if m!=0
+ * bmp[0]
+ * bmp[1]
+ * ...
+ * bmp[n-1]
+ * supp-high[0]
+ * supp-low[0]
+ * supp-high[1]
+ * supp-low[1]
+ * ...
+ * supp-high[m-1]
+ * supp-low[m-1]
+ *
+ * The array starts with a header. After the header are n bmp
+ * code points, then m supplementary code points. Either n or m
+ * or both may be zero. n+2*m is always <= 0x7FFF.
+ *
+ * If there are no supplementary characters (if m==0) then the
+ * header is one 16-bit integer, 'length', with value n.
+ *
+ * If there are supplementary characters (if m!=0) then the header
+ * is two 16-bit integers. The first, 'length', has value
+ * (n+2*m)|0x8000. The second, 'bmpLength', has value n.
+ *
+ * After the header the code points are stored in ascending order.
+ * Supplementary code points are stored as most significant 16
+ * bits followed by least significant 16 bits.
+ *
+ * @param dest pointer to buffer of destCapacity 16-bit integers.
+ * May be NULL only if destCapacity is zero.
+ * @param destCapacity size of dest, or zero. Must not be negative.
+ * @param ec error code. Will be set to U_INDEX_OUTOFBOUNDS_ERROR
+ * if n+2*m > 0x7FFF. Will be set to U_BUFFER_OVERFLOW_ERROR if
+ * n+2*m+(m!=0?2:1) > destCapacity.
+ * @return the total length of the serialized format, including
+ * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
+ * than U_BUFFER_OVERFLOW_ERROR.
+ * @stable ICU 2.4
+ */
+ int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
+
+ /**
+ * Reallocate this objects internal structures to take up the least
+ * possible space, without changing this object's value.
+ * A frozen set will not be modified.
+ * @stable ICU 2.4
+ */
+ virtual UnicodeSet& compact();
+
+ /**
+ * Return the class ID for this class. This is useful only for
+ * comparing to a return value from getDynamicClassID(). For example:
+ * <pre>
+ * . Base* polymorphic_pointer = createPolymorphicObject();
+ * . if (polymorphic_pointer->getDynamicClassID() ==
+ * . Derived::getStaticClassID()) ...
+ * </pre>
+ * @return The class ID for all objects of this class.
+ * @stable ICU 2.0
+ */
+ static UClassID U_EXPORT2 getStaticClassID(void);
+
+ /**
+ * Implement UnicodeFunctor API.
+ *
+ * @return The class ID for this object. All objects of a given
+ * class have the same class ID. Objects of other classes have
+ * different class IDs.
+ * @stable ICU 2.4
+ */
+ virtual UClassID getDynamicClassID(void) const;
+
+private:
+
+ // Private API for the USet API
+
+ friend class USetAccess;
+
+ const UnicodeString* getString(int32_t index) const;
+
+ //----------------------------------------------------------------
+ // RuleBasedTransliterator support
+ //----------------------------------------------------------------
+
+private:
+
+ /**
+ * Returns <tt>true</tt> if this set contains any character whose low byte
+ * is the given value. This is used by <tt>RuleBasedTransliterator</tt> for
+ * indexing.
+ */
+ virtual UBool matchesIndexValue(uint8_t v) const;
+
+private:
+ friend class RBBIRuleScanner;
+
+ //----------------------------------------------------------------
+ // Implementation: Clone as thawed (see ICU4J Freezable)
+ //----------------------------------------------------------------
+
+ UnicodeSet(const UnicodeSet& o, UBool /* asThawed */);
+ UnicodeSet& copyFrom(const UnicodeSet& o, UBool asThawed);
+
+ //----------------------------------------------------------------
+ // Implementation: Pattern parsing
+ //----------------------------------------------------------------
+
+ void applyPatternIgnoreSpace(const UnicodeString& pattern,
+ ParsePosition& pos,
+ const SymbolTable* symbols,
+ UErrorCode& status);
+
+ void applyPattern(RuleCharacterIterator& chars,
+ const SymbolTable* symbols,
+ UnicodeString& rebuiltPat,
+ uint32_t options,
+ UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
+ int32_t depth,
+ UErrorCode& ec);
+
+ //----------------------------------------------------------------
+ // Implementation: Utility methods
+ //----------------------------------------------------------------
+
+ static int32_t nextCapacity(int32_t minCapacity);
+
+ bool ensureCapacity(int32_t newLen);
+
+ bool ensureBufferCapacity(int32_t newLen);
+
+ void swapBuffers(void);
+
+ UBool allocateStrings(UErrorCode &status);
+ UBool hasStrings() const;
+ int32_t stringsSize() const;
+ UBool stringsContains(const UnicodeString &s) const;
+
+ UnicodeString& _toPattern(UnicodeString& result,
+ UBool escapeUnprintable) const;
+
+ UnicodeString& _generatePattern(UnicodeString& result,
+ UBool escapeUnprintable) const;
+
+ static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
+
+ static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
+
+ //----------------------------------------------------------------
+ // Implementation: Fundamental operators
+ //----------------------------------------------------------------
+
+ void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
+
+ void add(const UChar32* other, int32_t otherLen, int8_t polarity);
+
+ void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
+
+ /**
+ * Return true if the given position, in the given pattern, appears
+ * to be the start of a property set pattern [:foo:], \\p{foo}, or
+ * \\P{foo}, or \\N{name}.
+ */
+ static UBool resemblesPropertyPattern(const UnicodeString& pattern,
+ int32_t pos);
+
+ static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
+ int32_t iterOpts);
+
+ /**
+ * Parse the given property pattern at the given parse position
+ * and set this UnicodeSet to the result.
+ *
+ * The original design document is out of date, but still useful.
+ * Ignore the property and value names:
+ * http://source.icu-project.org/repos/icu/icuhtml/trunk/design/unicodeset_properties.html
+ *
+ * Recognized syntax:
+ *
+ * [:foo:] [:^foo:] - white space not allowed within "[:" or ":]"
+ * \\p{foo} \\P{foo} - white space not allowed within "\\p" or "\\P"
+ * \\N{name} - white space not allowed within "\\N"
+ *
+ * Other than the above restrictions, Unicode Pattern_White_Space characters are ignored.
+ * Case is ignored except in "\\p" and "\\P" and "\\N". In 'name' leading
+ * and trailing space is deleted, and internal runs of whitespace
+ * are collapsed to a single space.
+ *
+ * We support binary properties, enumerated properties, and the
+ * following non-enumerated properties:
+ *
+ * Numeric_Value
+ * Name
+ * Unicode_1_Name
+ *
+ * @param pattern the pattern string
+ * @param ppos on entry, the position at which to begin parsing.
+ * This should be one of the locations marked '^':
+ *
+ * [:blah:] \\p{blah} \\P{blah} \\N{name}
+ * ^ % ^ % ^ % ^ %
+ *
+ * On return, the position after the last character parsed, that is,
+ * the locations marked '%'. If the parse fails, ppos is returned
+ * unchanged.
+ * @param ec status
+ * @return a reference to this.
+ */
+ UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
+ ParsePosition& ppos,
+ UErrorCode &ec);
+
+ void applyPropertyPattern(RuleCharacterIterator& chars,
+ UnicodeString& rebuiltPat,
+ UErrorCode& ec);
+
+ static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status);
+
+ /**
+ * A filter that returns true if the given code point should be
+ * included in the UnicodeSet being constructed.
+ */
+ typedef UBool (*Filter)(UChar32 codePoint, void* context);
+
+ /**
+ * Given a filter, set this UnicodeSet to the code points
+ * contained by that filter. The filter MUST be
+ * property-conformant. That is, if it returns value v for one
+ * code point, then it must return v for all affiliated code
+ * points, as defined by the inclusions list. See
+ * getInclusions().
+ * src is a UPropertySource value.
+ */
+ void applyFilter(Filter filter,
+ void* context,
+ const UnicodeSet* inclusions,
+ UErrorCode &status);
+
+ // UCPMap is now stable ICU 63
+ void applyIntPropertyValue(const UCPMap *map,
+ UCPMapValueFilter *filter, const void *context,
+ UErrorCode &errorCode);
+
+ /**
+ * Set the new pattern to cache.
+ */
+ void setPattern(const UnicodeString& newPat) {
+ setPattern(newPat.getBuffer(), newPat.length());
+ }
+ void setPattern(const char16_t *newPat, int32_t newPatLen);
+ /**
+ * Release existing cached pattern.
+ */
+ void releasePattern();
+
+ friend class UnicodeSetIterator;
+};
+
+
+
+inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
+ return !operator==(o);
+}
+
+inline UBool UnicodeSet::isFrozen() const {
+ return (UBool)(bmpSet!=NULL || stringSpan!=NULL);
+}
+
+inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
+ return !containsNone(start, end);
+}
+
+inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
+ return !containsNone(s);
+}
+
+inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
+ return !containsNone(s);
+}
+
+inline UBool UnicodeSet::isBogus() const {
+ return (UBool)(fFlags & kIsBogus);
+}
+
+inline UnicodeSet *UnicodeSet::fromUSet(USet *uset) {
+ return reinterpret_cast<UnicodeSet *>(uset);
+}
+
+inline const UnicodeSet *UnicodeSet::fromUSet(const USet *uset) {
+ return reinterpret_cast<const UnicodeSet *>(uset);
+}
+
+inline USet *UnicodeSet::toUSet() {
+ return reinterpret_cast<USet *>(this);
+}
+
+inline const USet *UnicodeSet::toUSet() const {
+ return reinterpret_cast<const USet *>(this);
+}
+
+inline int32_t UnicodeSet::span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const {
+ int32_t sLength=s.length();
+ if(start<0) {
+ start=0;
+ } else if(start>sLength) {
+ start=sLength;
+ }
+ return start+span(s.getBuffer()+start, sLength-start, spanCondition);
+}
+
+inline int32_t UnicodeSet::spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const {
+ int32_t sLength=s.length();
+ if(limit<0) {
+ limit=0;
+ } else if(limit>sLength) {
+ limit=sLength;
+ }
+ return spanBack(s.getBuffer(), limit, spanCondition);
+}
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/unistr.h b/thirdparty/icu4c/common/unicode/unistr.h
new file mode 100644
index 0000000000..456389f265
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/unistr.h
@@ -0,0 +1,4757 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1998-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File unistr.h
+*
+* Modification History:
+*
+* Date Name Description
+* 09/25/98 stephen Creation.
+* 11/11/98 stephen Changed per 11/9 code review.
+* 04/20/99 stephen Overhauled per 4/16 code review.
+* 11/18/99 aliu Made to inherit from Replaceable. Added method
+* handleReplaceBetween(); other methods unchanged.
+* 06/25/01 grhoten Remove dependency on iostream.
+******************************************************************************
+*/
+
+#ifndef UNISTR_H
+#define UNISTR_H
+
+/**
+ * \file
+ * \brief C++ API: Unicode String
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include <cstddef>
+#include "unicode/char16ptr.h"
+#include "unicode/rep.h"
+#include "unicode/std_string.h"
+#include "unicode/stringpiece.h"
+#include "unicode/bytestream.h"
+
+struct UConverter; // unicode/ucnv.h
+
+#ifndef USTRING_H
+/**
+ * \ingroup ustring_ustrlen
+ */
+U_CAPI int32_t U_EXPORT2
+u_strlen(const UChar *s);
+#endif
+
+U_NAMESPACE_BEGIN
+
+#if !UCONFIG_NO_BREAK_ITERATION
+class BreakIterator; // unicode/brkiter.h
+#endif
+class Edits;
+
+U_NAMESPACE_END
+
+// Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper.
+/**
+ * Internal string case mapping function type.
+ * All error checking must be done.
+ * src and dest must not overlap.
+ * @internal
+ */
+typedef int32_t U_CALLCONV
+UStringCaseMapper(int32_t caseLocale, uint32_t options,
+#if !UCONFIG_NO_BREAK_ITERATION
+ icu::BreakIterator *iter,
+#endif
+ char16_t *dest, int32_t destCapacity,
+ const char16_t *src, int32_t srcLength,
+ icu::Edits *edits,
+ UErrorCode &errorCode);
+
+U_NAMESPACE_BEGIN
+
+class Locale; // unicode/locid.h
+class StringCharacterIterator;
+class UnicodeStringAppendable; // unicode/appendable.h
+
+/* The <iostream> include has been moved to unicode/ustream.h */
+
+/**
+ * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
+ * which constructs a Unicode string from an invariant-character char * string.
+ * About invariant characters see utypes.h.
+ * This constructor has no runtime dependency on conversion code and is
+ * therefore recommended over ones taking a charset name string
+ * (where the empty string "" indicates invariant-character conversion).
+ *
+ * @stable ICU 3.2
+ */
+#define US_INV icu::UnicodeString::kInvariant
+
+/**
+ * Unicode String literals in C++.
+ *
+ * Note: these macros are not recommended for new code.
+ * Prior to the availability of C++11 and u"unicode string literals",
+ * these macros were provided for portability and efficiency when
+ * initializing UnicodeStrings from literals.
+ *
+ * They work only for strings that contain "invariant characters", i.e.,
+ * only latin letters, digits, and some punctuation.
+ * See utypes.h for details.
+ *
+ * The string parameter must be a C string literal.
+ * The length of the string, not including the terminating
+ * `NUL`, must be specified as a constant.
+ * @stable ICU 2.0
+ */
+#if !U_CHAR16_IS_TYPEDEF
+# define UNICODE_STRING(cs, _length) icu::UnicodeString(true, u ## cs, _length)
+#else
+# define UNICODE_STRING(cs, _length) icu::UnicodeString(true, (const char16_t*)u ## cs, _length)
+#endif
+
+/**
+ * Unicode String literals in C++.
+ * Dependent on the platform properties, different UnicodeString
+ * constructors should be used to create a UnicodeString object from
+ * a string literal.
+ * The macros are defined for improved performance.
+ * They work only for strings that contain "invariant characters", i.e.,
+ * only latin letters, digits, and some punctuation.
+ * See utypes.h for details.
+ *
+ * The string parameter must be a C string literal.
+ * @stable ICU 2.0
+ */
+#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
+
+/**
+ * \def UNISTR_FROM_CHAR_EXPLICIT
+ * This can be defined to be empty or "explicit".
+ * If explicit, then the UnicodeString(char16_t) and UnicodeString(UChar32)
+ * constructors are marked as explicit, preventing their inadvertent use.
+ * @stable ICU 49
+ */
+#ifndef UNISTR_FROM_CHAR_EXPLICIT
+# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
+ // Auto-"explicit" in ICU library code.
+# define UNISTR_FROM_CHAR_EXPLICIT explicit
+# else
+ // Empty by default for source code compatibility.
+# define UNISTR_FROM_CHAR_EXPLICIT
+# endif
+#endif
+
+/**
+ * \def UNISTR_FROM_STRING_EXPLICIT
+ * This can be defined to be empty or "explicit".
+ * If explicit, then the UnicodeString(const char *) and UnicodeString(const char16_t *)
+ * constructors are marked as explicit, preventing their inadvertent use.
+ *
+ * In particular, this helps prevent accidentally depending on ICU conversion code
+ * by passing a string literal into an API with a const UnicodeString & parameter.
+ * @stable ICU 49
+ */
+#ifndef UNISTR_FROM_STRING_EXPLICIT
+# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
+ // Auto-"explicit" in ICU library code.
+# define UNISTR_FROM_STRING_EXPLICIT explicit
+# else
+ // Empty by default for source code compatibility.
+# define UNISTR_FROM_STRING_EXPLICIT
+# endif
+#endif
+
+/**
+ * \def UNISTR_OBJECT_SIZE
+ * Desired sizeof(UnicodeString) in bytes.
+ * It should be a multiple of sizeof(pointer) to avoid unusable space for padding.
+ * The object size may want to be a multiple of 16 bytes,
+ * which is a common granularity for heap allocation.
+ *
+ * Any space inside the object beyond sizeof(vtable pointer) + 2
+ * is available for storing short strings inside the object.
+ * The bigger the object, the longer a string that can be stored inside the object,
+ * without additional heap allocation.
+ *
+ * Depending on a platform's pointer size, pointer alignment requirements,
+ * and struct padding, the compiler will usually round up sizeof(UnicodeString)
+ * to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models),
+ * to hold the fields for heap-allocated strings.
+ * Such a minimum size also ensures that the object is easily large enough
+ * to hold at least 2 char16_ts, for one supplementary code point (U16_MAX_LENGTH).
+ *
+ * sizeof(UnicodeString) >= 48 should work for all known platforms.
+ *
+ * For example, on a 64-bit machine where sizeof(vtable pointer) is 8,
+ * sizeof(UnicodeString) = 64 would leave space for
+ * (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27
+ * char16_ts stored inside the object.
+ *
+ * The minimum object size on a 64-bit machine would be
+ * 4 * sizeof(pointer) = 4 * 8 = 32 bytes,
+ * and the internal buffer would hold up to 11 char16_ts in that case.
+ *
+ * @see U16_MAX_LENGTH
+ * @stable ICU 56
+ */
+#ifndef UNISTR_OBJECT_SIZE
+# define UNISTR_OBJECT_SIZE 64
+#endif
+
+/**
+ * UnicodeString is a string class that stores Unicode characters directly and provides
+ * similar functionality as the Java String and StringBuffer/StringBuilder classes.
+ * It is a concrete implementation of the abstract class Replaceable (for transliteration).
+ *
+ * The UnicodeString equivalent of std::string’s clear() is remove().
+ *
+ * A UnicodeString may "alias" an external array of characters
+ * (that is, point to it, rather than own the array)
+ * whose lifetime must then at least match the lifetime of the aliasing object.
+ * This aliasing may be preserved when returning a UnicodeString by value,
+ * depending on the compiler and the function implementation,
+ * via Return Value Optimization (RVO) or the move assignment operator.
+ * (However, the copy assignment operator does not preserve aliasing.)
+ * For details see the description of storage models at the end of the class API docs
+ * and in the User Guide chapter linked from there.
+ *
+ * The UnicodeString class is not suitable for subclassing.
+ *
+ * For an overview of Unicode strings in C and C++ see the
+ * [User Guide Strings chapter](https://unicode-org.github.io/icu/userguide/strings#strings-in-cc).
+ *
+ * In ICU, a Unicode string consists of 16-bit Unicode *code units*.
+ * A Unicode character may be stored with either one code unit
+ * (the most common case) or with a matched pair of special code units
+ * ("surrogates"). The data type for code units is char16_t.
+ * For single-character handling, a Unicode character code *point* is a value
+ * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.
+ *
+ * Indexes and offsets into and lengths of strings always count code units, not code points.
+ * This is the same as with multi-byte char* strings in traditional string handling.
+ * Operations on partial strings typically do not test for code point boundaries.
+ * If necessary, the user needs to take care of such boundaries by testing for the code unit
+ * values or by using functions like
+ * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
+ * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).
+ *
+ * UnicodeString methods are more lenient with regard to input parameter values
+ * than other ICU APIs. In particular:
+ * - If indexes are out of bounds for a UnicodeString object
+ * (< 0 or > length()) then they are "pinned" to the nearest boundary.
+ * - If the buffer passed to an insert/append/replace operation is owned by the
+ * target object, e.g., calling str.append(str), an extra copy may take place
+ * to ensure safety.
+ * - If primitive string pointer values (e.g., const char16_t * or char *)
+ * for input strings are NULL, then those input string parameters are treated
+ * as if they pointed to an empty string.
+ * However, this is *not* the case for char * parameters for charset names
+ * or other IDs.
+ * - Most UnicodeString methods do not take a UErrorCode parameter because
+ * there are usually very few opportunities for failure other than a shortage
+ * of memory, error codes in low-level C++ string methods would be inconvenient,
+ * and the error code as the last parameter (ICU convention) would prevent
+ * the use of default parameter values.
+ * Instead, such methods set the UnicodeString into a "bogus" state
+ * (see isBogus()) if an error occurs.
+ *
+ * In string comparisons, two UnicodeString objects that are both "bogus"
+ * compare equal (to be transitive and prevent endless loops in sorting),
+ * and a "bogus" string compares less than any non-"bogus" one.
+ *
+ * Const UnicodeString methods are thread-safe. Multiple threads can use
+ * const methods on the same UnicodeString object simultaneously,
+ * but non-const methods must not be called concurrently (in multiple threads)
+ * with any other (const or non-const) methods.
+ *
+ * Similarly, const UnicodeString & parameters are thread-safe.
+ * One object may be passed in as such a parameter concurrently in multiple threads.
+ * This includes the const UnicodeString & parameters for
+ * copy construction, assignment, and cloning.
+ *
+ * UnicodeString uses several storage methods.
+ * String contents can be stored inside the UnicodeString object itself,
+ * in an allocated and shared buffer, or in an outside buffer that is "aliased".
+ * Most of this is done transparently, but careful aliasing in particular provides
+ * significant performance improvements.
+ * Also, the internal buffer is accessible via special functions.
+ * For details see the
+ * [User Guide Strings chapter](https://unicode-org.github.io/icu/userguide/strings#maximizing-performance-with-the-unicodestring-storage-model).
+ *
+ * @see utf.h
+ * @see CharacterIterator
+ * @stable ICU 2.0
+ */
+class U_COMMON_API UnicodeString : public Replaceable
+{
+public:
+
+ /**
+ * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
+ * which constructs a Unicode string from an invariant-character char * string.
+ * Use the macro US_INV instead of the full qualification for this value.
+ *
+ * @see US_INV
+ * @stable ICU 3.2
+ */
+ enum EInvariant {
+ /**
+ * @see EInvariant
+ * @stable ICU 3.2
+ */
+ kInvariant
+ };
+
+ //========================================
+ // Read-only operations
+ //========================================
+
+ /* Comparison - bitwise only - for international comparison use collation */
+
+ /**
+ * Equality operator. Performs only bitwise comparison.
+ * @param text The UnicodeString to compare to this one.
+ * @return true if `text` contains the same characters as this one,
+ * false otherwise.
+ * @stable ICU 2.0
+ */
+ inline UBool operator== (const UnicodeString& text) const;
+
+ /**
+ * Inequality operator. Performs only bitwise comparison.
+ * @param text The UnicodeString to compare to this one.
+ * @return false if `text` contains the same characters as this one,
+ * true otherwise.
+ * @stable ICU 2.0
+ */
+ inline UBool operator!= (const UnicodeString& text) const;
+
+ /**
+ * Greater than operator. Performs only bitwise comparison.
+ * @param text The UnicodeString to compare to this one.
+ * @return true if the characters in this are bitwise
+ * greater than the characters in `text`, false otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool operator> (const UnicodeString& text) const;
+
+ /**
+ * Less than operator. Performs only bitwise comparison.
+ * @param text The UnicodeString to compare to this one.
+ * @return true if the characters in this are bitwise
+ * less than the characters in `text`, false otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool operator< (const UnicodeString& text) const;
+
+ /**
+ * Greater than or equal operator. Performs only bitwise comparison.
+ * @param text The UnicodeString to compare to this one.
+ * @return true if the characters in this are bitwise
+ * greater than or equal to the characters in `text`, false otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool operator>= (const UnicodeString& text) const;
+
+ /**
+ * Less than or equal operator. Performs only bitwise comparison.
+ * @param text The UnicodeString to compare to this one.
+ * @return true if the characters in this are bitwise
+ * less than or equal to the characters in `text`, false otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool operator<= (const UnicodeString& text) const;
+
+ /**
+ * Compare the characters bitwise in this UnicodeString to
+ * the characters in `text`.
+ * @param text The UnicodeString to compare to this one.
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as `text`, -1 if the characters in
+ * this are bitwise less than the characters in `text`, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in `text`.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(const UnicodeString& text) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [`start`, `start + length`) with the characters
+ * in the **entire string** `text`.
+ * (The parameters "start" and "length" are not applied to the other text "text".)
+ * @param start the offset at which the compare operation begins
+ * @param length the number of characters of text to compare.
+ * @param text the other text to be compared against this string.
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as `text`, -1 if the characters in
+ * this are bitwise less than the characters in `text`, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in `text`.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(int32_t start,
+ int32_t length,
+ const UnicodeString& text) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [`start`, `start + length`) with the characters
+ * in `srcText` in the range
+ * [`srcStart`, `srcStart + srcLength`).
+ * @param start the offset at which the compare operation begins
+ * @param length the number of characters in this to compare.
+ * @param srcText the text to be compared
+ * @param srcStart the offset into `srcText` to start comparison
+ * @param srcLength the number of characters in `src` to compare
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as `srcText`, -1 if the characters in
+ * this are bitwise less than the characters in `srcText`, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in `srcText`.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Compare the characters bitwise in this UnicodeString with the first
+ * `srcLength` characters in `srcChars`.
+ * @param srcChars The characters to compare to this UnicodeString.
+ * @param srcLength the number of characters in `srcChars` to compare
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as `srcChars`, -1 if the characters in
+ * this are bitwise less than the characters in `srcChars`, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in `srcChars`.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(ConstChar16Ptr srcChars,
+ int32_t srcLength) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [`start`, `start + length`) with the first
+ * `length` characters in `srcChars`
+ * @param start the offset at which the compare operation begins
+ * @param length the number of characters to compare.
+ * @param srcChars the characters to be compared
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as `srcChars`, -1 if the characters in
+ * this are bitwise less than the characters in `srcChars`, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in `srcChars`.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(int32_t start,
+ int32_t length,
+ const char16_t *srcChars) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [`start`, `start + length`) with the characters
+ * in `srcChars` in the range
+ * [`srcStart`, `srcStart + srcLength`).
+ * @param start the offset at which the compare operation begins
+ * @param length the number of characters in this to compare
+ * @param srcChars the characters to be compared
+ * @param srcStart the offset into `srcChars` to start comparison
+ * @param srcLength the number of characters in `srcChars` to compare
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as `srcChars`, -1 if the characters in
+ * this are bitwise less than the characters in `srcChars`, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in `srcChars`.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(int32_t start,
+ int32_t length,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [`start`, `limit`) with the characters
+ * in `srcText` in the range
+ * [`srcStart`, `srcLimit`).
+ * @param start the offset at which the compare operation begins
+ * @param limit the offset immediately following the compare operation
+ * @param srcText the text to be compared
+ * @param srcStart the offset into `srcText` to start comparison
+ * @param srcLimit the offset into `srcText` to limit comparison
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as `srcText`, -1 if the characters in
+ * this are bitwise less than the characters in `srcText`, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in `srcText`.
+ * @stable ICU 2.0
+ */
+ inline int8_t compareBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param text Another string to compare this one to.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(const UnicodeString& text) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLength The number of code units from that string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param srcLength The number of code units from that string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars,
+ int32_t srcLength) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcChars A pointer to another string to compare this one to.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(int32_t start,
+ int32_t length,
+ const char16_t *srcChars) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLength The number of code units from that string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(int32_t start,
+ int32_t length,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param limit The offset after the last code unit from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLimit The offset after the last code unit from that string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrderBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
+ *
+ * @param text Another string to compare this one to.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLength The number of code units from that string to compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
+ *
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param srcLength The number of code units from that string to compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(ConstChar16Ptr srcChars,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(int32_t start,
+ int32_t length,
+ const char16_t *srcChars,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLength The number of code units from that string to compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(int32_t start,
+ int32_t length,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param limit The offset after the last code unit from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLimit The offset after the last code unit from that string to compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompareBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit,
+ uint32_t options) const;
+
+ /**
+ * Determine if this starts with the characters in `text`
+ * @param text The text to match.
+ * @return true if this starts with the characters in `text`,
+ * false otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool startsWith(const UnicodeString& text) const;
+
+ /**
+ * Determine if this starts with the characters in `srcText`
+ * in the range [`srcStart`, `srcStart + srcLength`).
+ * @param srcText The text to match.
+ * @param srcStart the offset into `srcText` to start matching
+ * @param srcLength the number of characters in `srcText` to match
+ * @return true if this starts with the characters in `text`,
+ * false otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool startsWith(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this starts with the characters in `srcChars`
+ * @param srcChars The characters to match.
+ * @param srcLength the number of characters in `srcChars`
+ * @return true if this starts with the characters in `srcChars`,
+ * false otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool startsWith(ConstChar16Ptr srcChars,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this ends with the characters in `srcChars`
+ * in the range [`srcStart`, `srcStart + srcLength`).
+ * @param srcChars The characters to match.
+ * @param srcStart the offset into `srcText` to start matching
+ * @param srcLength the number of characters in `srcChars` to match
+ * @return true if this ends with the characters in `srcChars`, false otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool startsWith(const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this ends with the characters in `text`
+ * @param text The text to match.
+ * @return true if this ends with the characters in `text`,
+ * false otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool endsWith(const UnicodeString& text) const;
+
+ /**
+ * Determine if this ends with the characters in `srcText`
+ * in the range [`srcStart`, `srcStart + srcLength`).
+ * @param srcText The text to match.
+ * @param srcStart the offset into `srcText` to start matching
+ * @param srcLength the number of characters in `srcText` to match
+ * @return true if this ends with the characters in `text`,
+ * false otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool endsWith(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this ends with the characters in `srcChars`
+ * @param srcChars The characters to match.
+ * @param srcLength the number of characters in `srcChars`
+ * @return true if this ends with the characters in `srcChars`,
+ * false otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool endsWith(ConstChar16Ptr srcChars,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this ends with the characters in `srcChars`
+ * in the range [`srcStart`, `srcStart + srcLength`).
+ * @param srcChars The characters to match.
+ * @param srcStart the offset into `srcText` to start matching
+ * @param srcLength the number of characters in `srcChars` to match
+ * @return true if this ends with the characters in `srcChars`,
+ * false otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool endsWith(const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+
+ /* Searching - bitwise only */
+
+ /**
+ * Locate in this the first occurrence of the characters in `text`,
+ * using bitwise comparison.
+ * @param text The text to search for.
+ * @return The offset into this of the start of `text`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const UnicodeString& text) const;
+
+ /**
+ * Locate in this the first occurrence of the characters in `text`
+ * starting at offset `start`, using bitwise comparison.
+ * @param text The text to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of the start of `text`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const UnicodeString& text,
+ int32_t start) const;
+
+ /**
+ * Locate in this the first occurrence in the range
+ * [`start`, `start + length`) of the characters
+ * in `text`, using bitwise comparison.
+ * @param text The text to search for.
+ * @param start The offset at which searching will start.
+ * @param length The number of characters to search
+ * @return The offset into this of the start of `text`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const UnicodeString& text,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence in the range
+ * [`start`, `start + length`) of the characters
+ * in `srcText` in the range
+ * [`srcStart`, `srcStart + srcLength`),
+ * using bitwise comparison.
+ * @param srcText The text to search for.
+ * @param srcStart the offset into `srcText` at which
+ * to start matching
+ * @param srcLength the number of characters in `srcText` to match
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of the start of `text`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence of the characters in
+ * `srcChars`
+ * starting at offset `start`, using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcLength the number of characters in `srcChars` to match
+ * @param start the offset into this at which to start matching
+ * @return The offset into this of the start of `text`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const char16_t *srcChars,
+ int32_t srcLength,
+ int32_t start) const;
+
+ /**
+ * Locate in this the first occurrence in the range
+ * [`start`, `start + length`) of the characters
+ * in `srcChars`, using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcLength the number of characters in `srcChars`
+ * @param start The offset at which searching will start.
+ * @param length The number of characters to search
+ * @return The offset into this of the start of `srcChars`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(ConstChar16Ptr srcChars,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence in the range
+ * [`start`, `start + length`) of the characters
+ * in `srcChars` in the range
+ * [`srcStart`, `srcStart + srcLength`),
+ * using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcStart the offset into `srcChars` at which
+ * to start matching
+ * @param srcLength the number of characters in `srcChars` to match
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of the start of `text`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ int32_t indexOf(const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence of the BMP code point `c`,
+ * using bitwise comparison.
+ * @param c The code unit to search for.
+ * @return The offset into this of `c`, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(char16_t c) const;
+
+ /**
+ * Locate in this the first occurrence of the code point `c`,
+ * using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @return The offset into this of `c`, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(UChar32 c) const;
+
+ /**
+ * Locate in this the first occurrence of the BMP code point `c`,
+ * starting at offset `start`, using bitwise comparison.
+ * @param c The code unit to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of `c`, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(char16_t c,
+ int32_t start) const;
+
+ /**
+ * Locate in this the first occurrence of the code point `c`
+ * starting at offset `start`, using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of `c`, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(UChar32 c,
+ int32_t start) const;
+
+ /**
+ * Locate in this the first occurrence of the BMP code point `c`
+ * in the range [`start`, `start + length`),
+ * using bitwise comparison.
+ * @param c The code unit to search for.
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of `c`, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(char16_t c,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence of the code point `c`
+ * in the range [`start`, `start + length`),
+ * using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of `c`, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(UChar32 c,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence of the characters in `text`,
+ * using bitwise comparison.
+ * @param text The text to search for.
+ * @return The offset into this of the start of `text`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const UnicodeString& text) const;
+
+ /**
+ * Locate in this the last occurrence of the characters in `text`
+ * starting at offset `start`, using bitwise comparison.
+ * @param text The text to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of the start of `text`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const UnicodeString& text,
+ int32_t start) const;
+
+ /**
+ * Locate in this the last occurrence in the range
+ * [`start`, `start + length`) of the characters
+ * in `text`, using bitwise comparison.
+ * @param text The text to search for.
+ * @param start The offset at which searching will start.
+ * @param length The number of characters to search
+ * @return The offset into this of the start of `text`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const UnicodeString& text,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence in the range
+ * [`start`, `start + length`) of the characters
+ * in `srcText` in the range
+ * [`srcStart`, `srcStart + srcLength`),
+ * using bitwise comparison.
+ * @param srcText The text to search for.
+ * @param srcStart the offset into `srcText` at which
+ * to start matching
+ * @param srcLength the number of characters in `srcText` to match
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of the start of `text`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence of the characters in `srcChars`
+ * starting at offset `start`, using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcLength the number of characters in `srcChars` to match
+ * @param start the offset into this at which to start matching
+ * @return The offset into this of the start of `text`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const char16_t *srcChars,
+ int32_t srcLength,
+ int32_t start) const;
+
+ /**
+ * Locate in this the last occurrence in the range
+ * [`start`, `start + length`) of the characters
+ * in `srcChars`, using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcLength the number of characters in `srcChars`
+ * @param start The offset at which searching will start.
+ * @param length The number of characters to search
+ * @return The offset into this of the start of `srcChars`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(ConstChar16Ptr srcChars,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence in the range
+ * [`start`, `start + length`) of the characters
+ * in `srcChars` in the range
+ * [`srcStart`, `srcStart + srcLength`),
+ * using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcStart the offset into `srcChars` at which
+ * to start matching
+ * @param srcLength the number of characters in `srcChars` to match
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of the start of `text`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ int32_t lastIndexOf(const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence of the BMP code point `c`,
+ * using bitwise comparison.
+ * @param c The code unit to search for.
+ * @return The offset into this of `c`, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(char16_t c) const;
+
+ /**
+ * Locate in this the last occurrence of the code point `c`,
+ * using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @return The offset into this of `c`, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(UChar32 c) const;
+
+ /**
+ * Locate in this the last occurrence of the BMP code point `c`
+ * starting at offset `start`, using bitwise comparison.
+ * @param c The code unit to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of `c`, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(char16_t c,
+ int32_t start) const;
+
+ /**
+ * Locate in this the last occurrence of the code point `c`
+ * starting at offset `start`, using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of `c`, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(UChar32 c,
+ int32_t start) const;
+
+ /**
+ * Locate in this the last occurrence of the BMP code point `c`
+ * in the range [`start`, `start + length`),
+ * using bitwise comparison.
+ * @param c The code unit to search for.
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of `c`, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(char16_t c,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence of the code point `c`
+ * in the range [`start`, `start + length`),
+ * using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of `c`, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(UChar32 c,
+ int32_t start,
+ int32_t length) const;
+
+
+ /* Character access */
+
+ /**
+ * Return the code unit at offset `offset`.
+ * If the offset is not valid (0..length()-1) then U+ffff is returned.
+ * @param offset a valid offset into the text
+ * @return the code unit at offset `offset`
+ * or 0xffff if the offset is not valid for this string
+ * @stable ICU 2.0
+ */
+ inline char16_t charAt(int32_t offset) const;
+
+ /**
+ * Return the code unit at offset `offset`.
+ * If the offset is not valid (0..length()-1) then U+ffff is returned.
+ * @param offset a valid offset into the text
+ * @return the code unit at offset `offset`
+ * @stable ICU 2.0
+ */
+ inline char16_t operator[] (int32_t offset) const;
+
+ /**
+ * Return the code point that contains the code unit
+ * at offset `offset`.
+ * If the offset is not valid (0..length()-1) then U+ffff is returned.
+ * @param offset a valid offset into the text
+ * that indicates the text offset of any of the code units
+ * that will be assembled into a code point (21-bit value) and returned
+ * @return the code point of text at `offset`
+ * or 0xffff if the offset is not valid for this string
+ * @stable ICU 2.0
+ */
+ UChar32 char32At(int32_t offset) const;
+
+ /**
+ * Adjust a random-access offset so that
+ * it points to the beginning of a Unicode character.
+ * The offset that is passed in points to
+ * any code unit of a code point,
+ * while the returned offset will point to the first code unit
+ * of the same code point.
+ * In UTF-16, if the input offset points to a second surrogate
+ * of a surrogate pair, then the returned offset will point
+ * to the first surrogate.
+ * @param offset a valid offset into one code point of the text
+ * @return offset of the first code unit of the same code point
+ * @see U16_SET_CP_START
+ * @stable ICU 2.0
+ */
+ int32_t getChar32Start(int32_t offset) const;
+
+ /**
+ * Adjust a random-access offset so that
+ * it points behind a Unicode character.
+ * The offset that is passed in points behind
+ * any code unit of a code point,
+ * while the returned offset will point behind the last code unit
+ * of the same code point.
+ * In UTF-16, if the input offset points behind the first surrogate
+ * (i.e., to the second surrogate)
+ * of a surrogate pair, then the returned offset will point
+ * behind the second surrogate (i.e., to the first surrogate).
+ * @param offset a valid offset after any code unit of a code point of the text
+ * @return offset of the first code unit after the same code point
+ * @see U16_SET_CP_LIMIT
+ * @stable ICU 2.0
+ */
+ int32_t getChar32Limit(int32_t offset) const;
+
+ /**
+ * Move the code unit index along the string by delta code points.
+ * Interpret the input index as a code unit-based offset into the string,
+ * move the index forward or backward by delta code points, and
+ * return the resulting index.
+ * The input index should point to the first code unit of a code point,
+ * if there is more than one.
+ *
+ * Both input and output indexes are code unit-based as for all
+ * string indexes/offsets in ICU (and other libraries, like MBCS char*).
+ * If delta<0 then the index is moved backward (toward the start of the string).
+ * If delta>0 then the index is moved forward (toward the end of the string).
+ *
+ * This behaves like CharacterIterator::move32(delta, kCurrent).
+ *
+ * Behavior for out-of-bounds indexes:
+ * `moveIndex32` pins the input index to 0..length(), i.e.,
+ * if the input index<0 then it is pinned to 0;
+ * if it is index>length() then it is pinned to length().
+ * Afterwards, the index is moved by `delta` code points
+ * forward or backward,
+ * but no further backward than to 0 and no further forward than to length().
+ * The resulting index return value will be in between 0 and length(), inclusively.
+ *
+ * Examples:
+ * \code
+ * // s has code points 'a' U+10000 'b' U+10ffff U+2029
+ * UnicodeString s(u"a\U00010000b\U0010ffff\u2029");
+ *
+ * // initial index: position of U+10000
+ * int32_t index=1;
+ *
+ * // the following examples will all result in index==4, position of U+10ffff
+ *
+ * // skip 2 code points from some position in the string
+ * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
+ *
+ * // go to the 3rd code point from the start of s (0-based)
+ * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
+ *
+ * // go to the next-to-last code point of s
+ * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
+ * \endcode
+ *
+ * @param index input code unit index
+ * @param delta (signed) code point count to move the index forward or backward
+ * in the string
+ * @return the resulting code unit index
+ * @stable ICU 2.0
+ */
+ int32_t moveIndex32(int32_t index, int32_t delta) const;
+
+ /* Substring extraction */
+
+ /**
+ * Copy the characters in the range
+ * [`start`, `start + length`) into the array `dst`,
+ * beginning at `dstStart`.
+ * If the string aliases to `dst` itself as an external buffer,
+ * then extract() will not copy the contents.
+ *
+ * @param start offset of first character which will be copied into the array
+ * @param length the number of characters to extract
+ * @param dst array in which to copy characters. The length of `dst`
+ * must be at least (`dstStart + length`).
+ * @param dstStart the offset in `dst` where the first character
+ * will be extracted
+ * @stable ICU 2.0
+ */
+ inline void extract(int32_t start,
+ int32_t length,
+ Char16Ptr dst,
+ int32_t dstStart = 0) const;
+
+ /**
+ * Copy the contents of the string into dest.
+ * This is a convenience function that
+ * checks if there is enough space in dest,
+ * extracts the entire string if possible,
+ * and NUL-terminates dest if possible.
+ *
+ * If the string fits into dest but cannot be NUL-terminated
+ * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
+ * If the string itself does not fit into dest
+ * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
+ *
+ * If the string aliases to `dest` itself as an external buffer,
+ * then extract() will not copy the contents.
+ *
+ * @param dest Destination string buffer.
+ * @param destCapacity Number of char16_ts available at dest.
+ * @param errorCode ICU error code.
+ * @return length()
+ * @stable ICU 2.0
+ */
+ int32_t
+ extract(Char16Ptr dest, int32_t destCapacity,
+ UErrorCode &errorCode) const;
+
+ /**
+ * Copy the characters in the range
+ * [`start`, `start + length`) into the UnicodeString
+ * `target`.
+ * @param start offset of first character which will be copied
+ * @param length the number of characters to extract
+ * @param target UnicodeString into which to copy characters.
+ * @stable ICU 2.0
+ */
+ inline void extract(int32_t start,
+ int32_t length,
+ UnicodeString& target) const;
+
+ /**
+ * Copy the characters in the range [`start`, `limit`)
+ * into the array `dst`, beginning at `dstStart`.
+ * @param start offset of first character which will be copied into the array
+ * @param limit offset immediately following the last character to be copied
+ * @param dst array in which to copy characters. The length of `dst`
+ * must be at least (`dstStart + (limit - start)`).
+ * @param dstStart the offset in `dst` where the first character
+ * will be extracted
+ * @stable ICU 2.0
+ */
+ inline void extractBetween(int32_t start,
+ int32_t limit,
+ char16_t *dst,
+ int32_t dstStart = 0) const;
+
+ /**
+ * Copy the characters in the range [`start`, `limit`)
+ * into the UnicodeString `target`. Replaceable API.
+ * @param start offset of first character which will be copied
+ * @param limit offset immediately following the last character to be copied
+ * @param target UnicodeString into which to copy characters.
+ * @stable ICU 2.0
+ */
+ virtual void extractBetween(int32_t start,
+ int32_t limit,
+ UnicodeString& target) const;
+
+ /**
+ * Copy the characters in the range
+ * [`start`, `start + startLength`) into an array of characters.
+ * All characters must be invariant (see utypes.h).
+ * Use US_INV as the last, signature-distinguishing parameter.
+ *
+ * This function does not write any more than `targetCapacity`
+ * characters but returns the length of the entire output string
+ * so that one can allocate a larger buffer and call the function again
+ * if necessary.
+ * The output string is NUL-terminated if possible.
+ *
+ * @param start offset of first character which will be copied
+ * @param startLength the number of characters to extract
+ * @param target the target buffer for extraction, can be NULL
+ * if targetLength is 0
+ * @param targetCapacity the length of the target buffer
+ * @param inv Signature-distinguishing paramater, use US_INV.
+ * @return the output string length, not including the terminating NUL
+ * @stable ICU 3.2
+ */
+ int32_t extract(int32_t start,
+ int32_t startLength,
+ char *target,
+ int32_t targetCapacity,
+ enum EInvariant inv) const;
+
+#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
+
+ /**
+ * Copy the characters in the range
+ * [`start`, `start + length`) into an array of characters
+ * in the platform's default codepage.
+ * This function does not write any more than `targetLength`
+ * characters but returns the length of the entire output string
+ * so that one can allocate a larger buffer and call the function again
+ * if necessary.
+ * The output string is NUL-terminated if possible.
+ *
+ * @param start offset of first character which will be copied
+ * @param startLength the number of characters to extract
+ * @param target the target buffer for extraction
+ * @param targetLength the length of the target buffer
+ * If `target` is NULL, then the number of bytes required for
+ * `target` is returned.
+ * @return the output string length, not including the terminating NUL
+ * @stable ICU 2.0
+ */
+ int32_t extract(int32_t start,
+ int32_t startLength,
+ char *target,
+ uint32_t targetLength) const;
+
+#endif
+
+#if !UCONFIG_NO_CONVERSION
+
+ /**
+ * Copy the characters in the range
+ * [`start`, `start + length`) into an array of characters
+ * in a specified codepage.
+ * The output string is NUL-terminated.
+ *
+ * Recommendation: For invariant-character strings use
+ * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
+ * because it avoids object code dependencies of UnicodeString on
+ * the conversion code.
+ *
+ * @param start offset of first character which will be copied
+ * @param startLength the number of characters to extract
+ * @param target the target buffer for extraction
+ * @param codepage the desired codepage for the characters. 0 has
+ * the special meaning of the default codepage
+ * If `codepage` is an empty string (`""`),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ * If `target` is NULL, then the number of bytes required for
+ * `target` is returned. It is assumed that the target is big enough
+ * to fit all of the characters.
+ * @return the output string length, not including the terminating NUL
+ * @stable ICU 2.0
+ */
+ inline int32_t extract(int32_t start,
+ int32_t startLength,
+ char *target,
+ const char *codepage = 0) const;
+
+ /**
+ * Copy the characters in the range
+ * [`start`, `start + length`) into an array of characters
+ * in a specified codepage.
+ * This function does not write any more than `targetLength`
+ * characters but returns the length of the entire output string
+ * so that one can allocate a larger buffer and call the function again
+ * if necessary.
+ * The output string is NUL-terminated if possible.
+ *
+ * Recommendation: For invariant-character strings use
+ * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
+ * because it avoids object code dependencies of UnicodeString on
+ * the conversion code.
+ *
+ * @param start offset of first character which will be copied
+ * @param startLength the number of characters to extract
+ * @param target the target buffer for extraction
+ * @param targetLength the length of the target buffer
+ * @param codepage the desired codepage for the characters. 0 has
+ * the special meaning of the default codepage
+ * If `codepage` is an empty string (`""`),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ * If `target` is NULL, then the number of bytes required for
+ * `target` is returned.
+ * @return the output string length, not including the terminating NUL
+ * @stable ICU 2.0
+ */
+ int32_t extract(int32_t start,
+ int32_t startLength,
+ char *target,
+ uint32_t targetLength,
+ const char *codepage) const;
+
+ /**
+ * Convert the UnicodeString into a codepage string using an existing UConverter.
+ * The output string is NUL-terminated if possible.
+ *
+ * This function avoids the overhead of opening and closing a converter if
+ * multiple strings are extracted.
+ *
+ * @param dest destination string buffer, can be NULL if destCapacity==0
+ * @param destCapacity the number of chars available at dest
+ * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
+ * or NULL for the default converter
+ * @param errorCode normal ICU error code
+ * @return the length of the output string, not counting the terminating NUL;
+ * if the length is greater than destCapacity, then the string will not fit
+ * and a buffer of the indicated length would need to be passed in
+ * @stable ICU 2.0
+ */
+ int32_t extract(char *dest, int32_t destCapacity,
+ UConverter *cnv,
+ UErrorCode &errorCode) const;
+
+#endif
+
+ /**
+ * Create a temporary substring for the specified range.
+ * Unlike the substring constructor and setTo() functions,
+ * the object returned here will be a read-only alias (using getBuffer())
+ * rather than copying the text.
+ * As a result, this substring operation is much faster but requires
+ * that the original string not be modified or deleted during the lifetime
+ * of the returned substring object.
+ * @param start offset of the first character visible in the substring
+ * @param length length of the substring
+ * @return a read-only alias UnicodeString object for the substring
+ * @stable ICU 4.4
+ */
+ UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
+
+ /**
+ * Create a temporary substring for the specified range.
+ * Same as tempSubString(start, length) except that the substring range
+ * is specified as a (start, limit) pair (with an exclusive limit index)
+ * rather than a (start, length) pair.
+ * @param start offset of the first character visible in the substring
+ * @param limit offset immediately following the last character visible in the substring
+ * @return a read-only alias UnicodeString object for the substring
+ * @stable ICU 4.4
+ */
+ inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
+
+ /**
+ * Convert the UnicodeString to UTF-8 and write the result
+ * to a ByteSink. This is called by toUTF8String().
+ * Unpaired surrogates are replaced with U+FFFD.
+ * Calls u_strToUTF8WithSub().
+ *
+ * @param sink A ByteSink to which the UTF-8 version of the string is written.
+ * sink.Flush() is called at the end.
+ * @stable ICU 4.2
+ * @see toUTF8String
+ */
+ void toUTF8(ByteSink &sink) const;
+
+ /**
+ * Convert the UnicodeString to UTF-8 and append the result
+ * to a standard string.
+ * Unpaired surrogates are replaced with U+FFFD.
+ * Calls toUTF8().
+ *
+ * @param result A standard string (or a compatible object)
+ * to which the UTF-8 version of the string is appended.
+ * @return The string object.
+ * @stable ICU 4.2
+ * @see toUTF8
+ */
+ template<typename StringClass>
+ StringClass &toUTF8String(StringClass &result) const {
+ StringByteSink<StringClass> sbs(&result, length());
+ toUTF8(sbs);
+ return result;
+ }
+
+ /**
+ * Convert the UnicodeString to UTF-32.
+ * Unpaired surrogates are replaced with U+FFFD.
+ * Calls u_strToUTF32WithSub().
+ *
+ * @param utf32 destination string buffer, can be NULL if capacity==0
+ * @param capacity the number of UChar32s available at utf32
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The length of the UTF-32 string.
+ * @see fromUTF32
+ * @stable ICU 4.2
+ */
+ int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
+
+ /* Length operations */
+
+ /**
+ * Return the length of the UnicodeString object.
+ * The length is the number of char16_t code units are in the UnicodeString.
+ * If you want the number of code points, please use countChar32().
+ * @return the length of the UnicodeString object
+ * @see countChar32
+ * @stable ICU 2.0
+ */
+ inline int32_t length(void) const;
+
+ /**
+ * Count Unicode code points in the length char16_t code units of the string.
+ * A code point may occupy either one or two char16_t code units.
+ * Counting code points involves reading all code units.
+ *
+ * This functions is basically the inverse of moveIndex32().
+ *
+ * @param start the index of the first code unit to check
+ * @param length the number of char16_t code units to check
+ * @return the number of code points in the specified code units
+ * @see length
+ * @stable ICU 2.0
+ */
+ int32_t
+ countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
+
+ /**
+ * Check if the length char16_t code units of the string
+ * contain more Unicode code points than a certain number.
+ * This is more efficient than counting all code points in this part of the string
+ * and comparing that number with a threshold.
+ * This function may not need to scan the string at all if the length
+ * falls within a certain range, and
+ * never needs to count more than 'number+1' code points.
+ * Logically equivalent to (countChar32(start, length)>number).
+ * A Unicode code point may occupy either one or two char16_t code units.
+ *
+ * @param start the index of the first code unit to check (0 for the entire string)
+ * @param length the number of char16_t code units to check
+ * (use INT32_MAX for the entire string; remember that start/length
+ * values are pinned)
+ * @param number The number of code points in the (sub)string is compared against
+ * the 'number' parameter.
+ * @return Boolean value for whether the string contains more Unicode code points
+ * than 'number'. Same as (u_countChar32(s, length)>number).
+ * @see countChar32
+ * @see u_strHasMoreChar32Than
+ * @stable ICU 2.4
+ */
+ UBool
+ hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
+
+ /**
+ * Determine if this string is empty.
+ * @return true if this string contains 0 characters, false otherwise.
+ * @stable ICU 2.0
+ */
+ inline UBool isEmpty(void) const;
+
+ /**
+ * Return the capacity of the internal buffer of the UnicodeString object.
+ * This is useful together with the getBuffer functions.
+ * See there for details.
+ *
+ * @return the number of char16_ts available in the internal buffer
+ * @see getBuffer
+ * @stable ICU 2.0
+ */
+ inline int32_t getCapacity(void) const;
+
+ /* Other operations */
+
+ /**
+ * Generate a hash code for this object.
+ * @return The hash code of this UnicodeString.
+ * @stable ICU 2.0
+ */
+ inline int32_t hashCode(void) const;
+
+ /**
+ * Determine if this object contains a valid string.
+ * A bogus string has no value. It is different from an empty string,
+ * although in both cases isEmpty() returns true and length() returns 0.
+ * setToBogus() and isBogus() can be used to indicate that no string value is available.
+ * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
+ * length() returns 0.
+ *
+ * @return true if the string is bogus/invalid, false otherwise
+ * @see setToBogus()
+ * @stable ICU 2.0
+ */
+ inline UBool isBogus(void) const;
+
+
+ //========================================
+ // Write operations
+ //========================================
+
+ /* Assignment operations */
+
+ /**
+ * Assignment operator. Replace the characters in this UnicodeString
+ * with the characters from `srcText`.
+ *
+ * Starting with ICU 2.4, the assignment operator and the copy constructor
+ * allocate a new buffer and copy the buffer contents even for readonly aliases.
+ * By contrast, the fastCopyFrom() function implements the old,
+ * more efficient but less safe behavior
+ * of making this string also a readonly alias to the same buffer.
+ *
+ * If the source object has an "open" buffer from getBuffer(minCapacity),
+ * then the copy is an empty string.
+ *
+ * @param srcText The text containing the characters to replace
+ * @return a reference to this
+ * @stable ICU 2.0
+ * @see fastCopyFrom
+ */
+ UnicodeString &operator=(const UnicodeString &srcText);
+
+ /**
+ * Almost the same as the assignment operator.
+ * Replace the characters in this UnicodeString
+ * with the characters from `srcText`.
+ *
+ * This function works the same as the assignment operator
+ * for all strings except for ones that are readonly aliases.
+ *
+ * Starting with ICU 2.4, the assignment operator and the copy constructor
+ * allocate a new buffer and copy the buffer contents even for readonly aliases.
+ * This function implements the old, more efficient but less safe behavior
+ * of making this string also a readonly alias to the same buffer.
+ *
+ * The fastCopyFrom function must be used only if it is known that the lifetime of
+ * this UnicodeString does not exceed the lifetime of the aliased buffer
+ * including its contents, for example for strings from resource bundles
+ * or aliases to string constants.
+ *
+ * If the source object has an "open" buffer from getBuffer(minCapacity),
+ * then the copy is an empty string.
+ *
+ * @param src The text containing the characters to replace.
+ * @return a reference to this
+ * @stable ICU 2.4
+ */
+ UnicodeString &fastCopyFrom(const UnicodeString &src);
+
+ /**
+ * Move assignment operator; might leave src in bogus state.
+ * This string will have the same contents and state that the source string had.
+ * The behavior is undefined if *this and src are the same object.
+ * @param src source string
+ * @return *this
+ * @stable ICU 56
+ */
+ UnicodeString &operator=(UnicodeString &&src) U_NOEXCEPT;
+
+ /**
+ * Swap strings.
+ * @param other other string
+ * @stable ICU 56
+ */
+ void swap(UnicodeString &other) U_NOEXCEPT;
+
+ /**
+ * Non-member UnicodeString swap function.
+ * @param s1 will get s2's contents and state
+ * @param s2 will get s1's contents and state
+ * @stable ICU 56
+ */
+ friend inline void U_EXPORT2
+ swap(UnicodeString &s1, UnicodeString &s2) U_NOEXCEPT {
+ s1.swap(s2);
+ }
+
+ /**
+ * Assignment operator. Replace the characters in this UnicodeString
+ * with the code unit `ch`.
+ * @param ch the code unit to replace
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& operator= (char16_t ch);
+
+ /**
+ * Assignment operator. Replace the characters in this UnicodeString
+ * with the code point `ch`.
+ * @param ch the code point to replace
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& operator= (UChar32 ch);
+
+ /**
+ * Set the text in the UnicodeString object to the characters
+ * in `srcText` in the range
+ * [`srcStart`, `srcText.length()`).
+ * `srcText` is not modified.
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into `srcText` where new characters
+ * will be obtained
+ * @return a reference to this
+ * @stable ICU 2.2
+ */
+ inline UnicodeString& setTo(const UnicodeString& srcText,
+ int32_t srcStart);
+
+ /**
+ * Set the text in the UnicodeString object to the characters
+ * in `srcText` in the range
+ * [`srcStart`, `srcStart + srcLength`).
+ * `srcText` is not modified.
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into `srcText` where new characters
+ * will be obtained
+ * @param srcLength the number of characters in `srcText` in the
+ * replace string.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& setTo(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Set the text in the UnicodeString object to the characters in
+ * `srcText`.
+ * `srcText` is not modified.
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& setTo(const UnicodeString& srcText);
+
+ /**
+ * Set the characters in the UnicodeString object to the characters
+ * in `srcChars`. `srcChars` is not modified.
+ * @param srcChars the source for the new characters
+ * @param srcLength the number of Unicode characters in srcChars.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& setTo(const char16_t *srcChars,
+ int32_t srcLength);
+
+ /**
+ * Set the characters in the UnicodeString object to the code unit
+ * `srcChar`.
+ * @param srcChar the code unit which becomes the UnicodeString's character
+ * content
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& setTo(char16_t srcChar);
+
+ /**
+ * Set the characters in the UnicodeString object to the code point
+ * `srcChar`.
+ * @param srcChar the code point which becomes the UnicodeString's character
+ * content
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& setTo(UChar32 srcChar);
+
+ /**
+ * Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor.
+ * The text will be used for the UnicodeString object, but
+ * it will not be released when the UnicodeString is destroyed.
+ * This has copy-on-write semantics:
+ * When the string is modified, then the buffer is first copied into
+ * newly allocated memory.
+ * The aliased buffer is never modified.
+ *
+ * In an assignment to another UnicodeString, when using the copy constructor
+ * or the assignment operator, the text will be copied.
+ * When using fastCopyFrom(), the text will be aliased again,
+ * so that both strings then alias the same readonly-text.
+ *
+ * @param isTerminated specifies if `text` is `NUL`-terminated.
+ * This must be true if `textLength==-1`.
+ * @param text The characters to alias for the UnicodeString.
+ * @param textLength The number of Unicode characters in `text` to alias.
+ * If -1, then this constructor will determine the length
+ * by calling `u_strlen()`.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString &setTo(UBool isTerminated,
+ ConstChar16Ptr text,
+ int32_t textLength);
+
+ /**
+ * Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor.
+ * The text will be used for the UnicodeString object, but
+ * it will not be released when the UnicodeString is destroyed.
+ * This has write-through semantics:
+ * For as long as the capacity of the buffer is sufficient, write operations
+ * will directly affect the buffer. When more capacity is necessary, then
+ * a new buffer will be allocated and the contents copied as with regularly
+ * constructed strings.
+ * In an assignment to another UnicodeString, the buffer will be copied.
+ * The extract(Char16Ptr dst) function detects whether the dst pointer is the same
+ * as the string buffer itself and will in this case not copy the contents.
+ *
+ * @param buffer The characters to alias for the UnicodeString.
+ * @param buffLength The number of Unicode characters in `buffer` to alias.
+ * @param buffCapacity The size of `buffer` in char16_ts.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString &setTo(char16_t *buffer,
+ int32_t buffLength,
+ int32_t buffCapacity);
+
+ /**
+ * Make this UnicodeString object invalid.
+ * The string will test true with isBogus().
+ *
+ * A bogus string has no value. It is different from an empty string.
+ * It can be used to indicate that no string value is available.
+ * getBuffer() and getTerminatedBuffer() return NULL, and
+ * length() returns 0.
+ *
+ * This utility function is used throughout the UnicodeString
+ * implementation to indicate that a UnicodeString operation failed,
+ * and may be used in other functions,
+ * especially but not exclusively when such functions do not
+ * take a UErrorCode for simplicity.
+ *
+ * The following methods, and no others, will clear a string object's bogus flag:
+ * - remove()
+ * - remove(0, INT32_MAX)
+ * - truncate(0)
+ * - operator=() (assignment operator)
+ * - setTo(...)
+ *
+ * The simplest ways to turn a bogus string into an empty one
+ * is to use the remove() function.
+ * Examples for other functions that are equivalent to "set to empty string":
+ * \code
+ * if(s.isBogus()) {
+ * s.remove(); // set to an empty string (remove all), or
+ * s.remove(0, INT32_MAX); // set to an empty string (remove all), or
+ * s.truncate(0); // set to an empty string (complete truncation), or
+ * s=UnicodeString(); // assign an empty string, or
+ * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
+ * s.setTo(u"", 0); // set to an empty C Unicode string
+ * }
+ * \endcode
+ *
+ * @see isBogus()
+ * @stable ICU 2.0
+ */
+ void setToBogus();
+
+ /**
+ * Set the character at the specified offset to the specified character.
+ * @param offset A valid offset into the text of the character to set
+ * @param ch The new character
+ * @return A reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& setCharAt(int32_t offset,
+ char16_t ch);
+
+
+ /* Append operations */
+
+ /**
+ * Append operator. Append the code unit `ch` to the UnicodeString
+ * object.
+ * @param ch the code unit to be appended
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& operator+= (char16_t ch);
+
+ /**
+ * Append operator. Append the code point `ch` to the UnicodeString
+ * object.
+ * @param ch the code point to be appended
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& operator+= (UChar32 ch);
+
+ /**
+ * Append operator. Append the characters in `srcText` to the
+ * UnicodeString object. `srcText` is not modified.
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& operator+= (const UnicodeString& srcText);
+
+ /**
+ * Append the characters
+ * in `srcText` in the range
+ * [`srcStart`, `srcStart + srcLength`) to the
+ * UnicodeString object at offset `start`. `srcText`
+ * is not modified.
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into `srcText` where new characters
+ * will be obtained
+ * @param srcLength the number of characters in `srcText` in
+ * the append string
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& append(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Append the characters in `srcText` to the UnicodeString object.
+ * `srcText` is not modified.
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& append(const UnicodeString& srcText);
+
+ /**
+ * Append the characters in `srcChars` in the range
+ * [`srcStart`, `srcStart + srcLength`) to the UnicodeString
+ * object at offset
+ * `start`. `srcChars` is not modified.
+ * @param srcChars the source for the new characters
+ * @param srcStart the offset into `srcChars` where new characters
+ * will be obtained
+ * @param srcLength the number of characters in `srcChars` in
+ * the append string; can be -1 if `srcChars` is NUL-terminated
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& append(const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Append the characters in `srcChars` to the UnicodeString object
+ * at offset `start`. `srcChars` is not modified.
+ * @param srcChars the source for the new characters
+ * @param srcLength the number of Unicode characters in `srcChars`;
+ * can be -1 if `srcChars` is NUL-terminated
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& append(ConstChar16Ptr srcChars,
+ int32_t srcLength);
+
+ /**
+ * Append the code unit `srcChar` to the UnicodeString object.
+ * @param srcChar the code unit to append
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& append(char16_t srcChar);
+
+ /**
+ * Append the code point `srcChar` to the UnicodeString object.
+ * @param srcChar the code point to append
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& append(UChar32 srcChar);
+
+
+ /* Insert operations */
+
+ /**
+ * Insert the characters in `srcText` in the range
+ * [`srcStart`, `srcStart + srcLength`) into the UnicodeString
+ * object at offset `start`. `srcText` is not modified.
+ * @param start the offset where the insertion begins
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into `srcText` where new characters
+ * will be obtained
+ * @param srcLength the number of characters in `srcText` in
+ * the insert string
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Insert the characters in `srcText` into the UnicodeString object
+ * at offset `start`. `srcText` is not modified.
+ * @param start the offset where the insertion begins
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ const UnicodeString& srcText);
+
+ /**
+ * Insert the characters in `srcChars` in the range
+ * [`srcStart`, `srcStart + srcLength`) into the UnicodeString
+ * object at offset `start`. `srcChars` is not modified.
+ * @param start the offset at which the insertion begins
+ * @param srcChars the source for the new characters
+ * @param srcStart the offset into `srcChars` where new characters
+ * will be obtained
+ * @param srcLength the number of characters in `srcChars`
+ * in the insert string
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Insert the characters in `srcChars` into the UnicodeString object
+ * at offset `start`. `srcChars` is not modified.
+ * @param start the offset where the insertion begins
+ * @param srcChars the source for the new characters
+ * @param srcLength the number of Unicode characters in srcChars.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ ConstChar16Ptr srcChars,
+ int32_t srcLength);
+
+ /**
+ * Insert the code unit `srcChar` into the UnicodeString object at
+ * offset `start`.
+ * @param start the offset at which the insertion occurs
+ * @param srcChar the code unit to insert
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ char16_t srcChar);
+
+ /**
+ * Insert the code point `srcChar` into the UnicodeString object at
+ * offset `start`.
+ * @param start the offset at which the insertion occurs
+ * @param srcChar the code point to insert
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ UChar32 srcChar);
+
+
+ /* Replace operations */
+
+ /**
+ * Replace the characters in the range
+ * [`start`, `start + length`) with the characters in
+ * `srcText` in the range
+ * [`srcStart`, `srcStart + srcLength`).
+ * `srcText` is not modified.
+ * @param start the offset at which the replace operation begins
+ * @param length the number of characters to replace. The character at
+ * `start + length` is not modified.
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into `srcText` where new characters
+ * will be obtained
+ * @param srcLength the number of characters in `srcText` in
+ * the replace string
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& replace(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Replace the characters in the range
+ * [`start`, `start + length`)
+ * with the characters in `srcText`. `srcText` is
+ * not modified.
+ * @param start the offset at which the replace operation begins
+ * @param length the number of characters to replace. The character at
+ * `start + length` is not modified.
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& replace(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText);
+
+ /**
+ * Replace the characters in the range
+ * [`start`, `start + length`) with the characters in
+ * `srcChars` in the range
+ * [`srcStart`, `srcStart + srcLength`). `srcChars`
+ * is not modified.
+ * @param start the offset at which the replace operation begins
+ * @param length the number of characters to replace. The character at
+ * `start + length` is not modified.
+ * @param srcChars the source for the new characters
+ * @param srcStart the offset into `srcChars` where new characters
+ * will be obtained
+ * @param srcLength the number of characters in `srcChars`
+ * in the replace string
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& replace(int32_t start,
+ int32_t length,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Replace the characters in the range
+ * [`start`, `start + length`) with the characters in
+ * `srcChars`. `srcChars` is not modified.
+ * @param start the offset at which the replace operation begins
+ * @param length number of characters to replace. The character at
+ * `start + length` is not modified.
+ * @param srcChars the source for the new characters
+ * @param srcLength the number of Unicode characters in srcChars
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& replace(int32_t start,
+ int32_t length,
+ ConstChar16Ptr srcChars,
+ int32_t srcLength);
+
+ /**
+ * Replace the characters in the range
+ * [`start`, `start + length`) with the code unit
+ * `srcChar`.
+ * @param start the offset at which the replace operation begins
+ * @param length the number of characters to replace. The character at
+ * `start + length` is not modified.
+ * @param srcChar the new code unit
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& replace(int32_t start,
+ int32_t length,
+ char16_t srcChar);
+
+ /**
+ * Replace the characters in the range
+ * [`start`, `start + length`) with the code point
+ * `srcChar`.
+ * @param start the offset at which the replace operation begins
+ * @param length the number of characters to replace. The character at
+ * `start + length` is not modified.
+ * @param srcChar the new code point
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
+
+ /**
+ * Replace the characters in the range [`start`, `limit`)
+ * with the characters in `srcText`. `srcText` is not modified.
+ * @param start the offset at which the replace operation begins
+ * @param limit the offset immediately following the replace range
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& replaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText);
+
+ /**
+ * Replace the characters in the range [`start`, `limit`)
+ * with the characters in `srcText` in the range
+ * [`srcStart`, `srcLimit`). `srcText` is not modified.
+ * @param start the offset at which the replace operation begins
+ * @param limit the offset immediately following the replace range
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into `srcChars` where new characters
+ * will be obtained
+ * @param srcLimit the offset immediately following the range to copy
+ * in `srcText`
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& replaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit);
+
+ /**
+ * Replace a substring of this object with the given text.
+ * @param start the beginning index, inclusive; `0 <= start <= limit`.
+ * @param limit the ending index, exclusive; `start <= limit <= length()`.
+ * @param text the text to replace characters `start` to `limit - 1`
+ * @stable ICU 2.0
+ */
+ virtual void handleReplaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& text);
+
+ /**
+ * Replaceable API
+ * @return true if it has MetaData
+ * @stable ICU 2.4
+ */
+ virtual UBool hasMetaData() const;
+
+ /**
+ * Copy a substring of this object, retaining attribute (out-of-band)
+ * information. This method is used to duplicate or reorder substrings.
+ * The destination index must not overlap the source range.
+ *
+ * @param start the beginning index, inclusive; `0 <= start <= limit`.
+ * @param limit the ending index, exclusive; `start <= limit <= length()`.
+ * @param dest the destination index. The characters from
+ * `start..limit-1` will be copied to `dest`.
+ * Implementations of this method may assume that `dest <= start ||
+ * dest >= limit`.
+ * @stable ICU 2.0
+ */
+ virtual void copy(int32_t start, int32_t limit, int32_t dest);
+
+ /* Search and replace operations */
+
+ /**
+ * Replace all occurrences of characters in oldText with the characters
+ * in newText
+ * @param oldText the text containing the search text
+ * @param newText the text containing the replacement text
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& findAndReplace(const UnicodeString& oldText,
+ const UnicodeString& newText);
+
+ /**
+ * Replace all occurrences of characters in oldText with characters
+ * in newText
+ * in the range [`start`, `start + length`).
+ * @param start the start of the range in which replace will performed
+ * @param length the length of the range in which replace will be performed
+ * @param oldText the text containing the search text
+ * @param newText the text containing the replacement text
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& findAndReplace(int32_t start,
+ int32_t length,
+ const UnicodeString& oldText,
+ const UnicodeString& newText);
+
+ /**
+ * Replace all occurrences of characters in oldText in the range
+ * [`oldStart`, `oldStart + oldLength`) with the characters
+ * in newText in the range
+ * [`newStart`, `newStart + newLength`)
+ * in the range [`start`, `start + length`).
+ * @param start the start of the range in which replace will performed
+ * @param length the length of the range in which replace will be performed
+ * @param oldText the text containing the search text
+ * @param oldStart the start of the search range in `oldText`
+ * @param oldLength the length of the search range in `oldText`
+ * @param newText the text containing the replacement text
+ * @param newStart the start of the replacement range in `newText`
+ * @param newLength the length of the replacement range in `newText`
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& findAndReplace(int32_t start,
+ int32_t length,
+ const UnicodeString& oldText,
+ int32_t oldStart,
+ int32_t oldLength,
+ const UnicodeString& newText,
+ int32_t newStart,
+ int32_t newLength);
+
+
+ /* Remove operations */
+
+ /**
+ * Removes all characters from the UnicodeString object and clears the bogus flag.
+ * This is the UnicodeString equivalent of std::string’s clear().
+ *
+ * @return a reference to this
+ * @see setToBogus
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& remove();
+
+ /**
+ * Remove the characters in the range
+ * [`start`, `start + length`) from the UnicodeString object.
+ * @param start the offset of the first character to remove
+ * @param length the number of characters to remove
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& remove(int32_t start,
+ int32_t length = (int32_t)INT32_MAX);
+
+ /**
+ * Remove the characters in the range
+ * [`start`, `limit`) from the UnicodeString object.
+ * @param start the offset of the first character to remove
+ * @param limit the offset immediately following the range to remove
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& removeBetween(int32_t start,
+ int32_t limit = (int32_t)INT32_MAX);
+
+ /**
+ * Retain only the characters in the range
+ * [`start`, `limit`) from the UnicodeString object.
+ * Removes characters before `start` and at and after `limit`.
+ * @param start the offset of the first character to retain
+ * @param limit the offset immediately following the range to retain
+ * @return a reference to this
+ * @stable ICU 4.4
+ */
+ inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
+
+ /* Length operations */
+
+ /**
+ * Pad the start of this UnicodeString with the character `padChar`.
+ * If the length of this UnicodeString is less than targetLength,
+ * length() - targetLength copies of padChar will be added to the
+ * beginning of this UnicodeString.
+ * @param targetLength the desired length of the string
+ * @param padChar the character to use for padding. Defaults to
+ * space (U+0020)
+ * @return true if the text was padded, false otherwise.
+ * @stable ICU 2.0
+ */
+ UBool padLeading(int32_t targetLength,
+ char16_t padChar = 0x0020);
+
+ /**
+ * Pad the end of this UnicodeString with the character `padChar`.
+ * If the length of this UnicodeString is less than targetLength,
+ * length() - targetLength copies of padChar will be added to the
+ * end of this UnicodeString.
+ * @param targetLength the desired length of the string
+ * @param padChar the character to use for padding. Defaults to
+ * space (U+0020)
+ * @return true if the text was padded, false otherwise.
+ * @stable ICU 2.0
+ */
+ UBool padTrailing(int32_t targetLength,
+ char16_t padChar = 0x0020);
+
+ /**
+ * Truncate this UnicodeString to the `targetLength`.
+ * @param targetLength the desired length of this UnicodeString.
+ * @return true if the text was truncated, false otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool truncate(int32_t targetLength);
+
+ /**
+ * Trims leading and trailing whitespace from this UnicodeString.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& trim(void);
+
+
+ /* Miscellaneous operations */
+
+ /**
+ * Reverse this UnicodeString in place.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& reverse(void);
+
+ /**
+ * Reverse the range [`start`, `start + length`) in
+ * this UnicodeString.
+ * @param start the start of the range to reverse
+ * @param length the number of characters to to reverse
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& reverse(int32_t start,
+ int32_t length);
+
+ /**
+ * Convert the characters in this to UPPER CASE following the conventions of
+ * the default locale.
+ * @return A reference to this.
+ * @stable ICU 2.0
+ */
+ UnicodeString& toUpper(void);
+
+ /**
+ * Convert the characters in this to UPPER CASE following the conventions of
+ * a specific locale.
+ * @param locale The locale containing the conventions to use.
+ * @return A reference to this.
+ * @stable ICU 2.0
+ */
+ UnicodeString& toUpper(const Locale& locale);
+
+ /**
+ * Convert the characters in this to lower case following the conventions of
+ * the default locale.
+ * @return A reference to this.
+ * @stable ICU 2.0
+ */
+ UnicodeString& toLower(void);
+
+ /**
+ * Convert the characters in this to lower case following the conventions of
+ * a specific locale.
+ * @param locale The locale containing the conventions to use.
+ * @return A reference to this.
+ * @stable ICU 2.0
+ */
+ UnicodeString& toLower(const Locale& locale);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+ /**
+ * Titlecase this string, convenience function using the default locale.
+ *
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others.
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * It may be more efficient to always provide an iterator to avoid
+ * opening and closing one for each string.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * @param titleIter A break iterator to find the first characters of words
+ * that are to be titlecased.
+ * If none is provided (0), then a standard titlecase
+ * break iterator is opened.
+ * Otherwise the provided iterator is set to the string's text.
+ * @return A reference to this.
+ * @stable ICU 2.1
+ */
+ UnicodeString &toTitle(BreakIterator *titleIter);
+
+ /**
+ * Titlecase this string.
+ *
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others.
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * It may be more efficient to always provide an iterator to avoid
+ * opening and closing one for each string.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * @param titleIter A break iterator to find the first characters of words
+ * that are to be titlecased.
+ * If none is provided (0), then a standard titlecase
+ * break iterator is opened.
+ * Otherwise the provided iterator is set to the string's text.
+ * @param locale The locale to consider.
+ * @return A reference to this.
+ * @stable ICU 2.1
+ */
+ UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
+
+ /**
+ * Titlecase this string, with options.
+ *
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with options.)
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * It may be more efficient to always provide an iterator to avoid
+ * opening and closing one for each string.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * @param titleIter A break iterator to find the first characters of words
+ * that are to be titlecased.
+ * If none is provided (0), then a standard titlecase
+ * break iterator is opened.
+ * Otherwise the provided iterator is set to the string's text.
+ * @param locale The locale to consider.
+ * @param options Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE,
+ * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
+ * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
+ * @param options Options bit set, see ucasemap_open().
+ * @return A reference to this.
+ * @stable ICU 3.8
+ */
+ UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
+
+#endif
+
+ /**
+ * Case-folds the characters in this string.
+ *
+ * Case-folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'T' in CaseFolding.txt.
+ *
+ * The result may be longer or shorter than the original.
+ *
+ * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @return A reference to this.
+ * @stable ICU 2.0
+ */
+ UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
+
+ //========================================
+ // Access to the internal buffer
+ //========================================
+
+ /**
+ * Get a read/write pointer to the internal buffer.
+ * The buffer is guaranteed to be large enough for at least minCapacity char16_ts,
+ * writable, and is still owned by the UnicodeString object.
+ * Calls to getBuffer(minCapacity) must not be nested, and
+ * must be matched with calls to releaseBuffer(newLength).
+ * If the string buffer was read-only or shared,
+ * then it will be reallocated and copied.
+ *
+ * An attempted nested call will return 0, and will not further modify the
+ * state of the UnicodeString object.
+ * It also returns 0 if the string is bogus.
+ *
+ * The actual capacity of the string buffer may be larger than minCapacity.
+ * getCapacity() returns the actual capacity.
+ * For many operations, the full capacity should be used to avoid reallocations.
+ *
+ * While the buffer is "open" between getBuffer(minCapacity)
+ * and releaseBuffer(newLength), the following applies:
+ * - The string length is set to 0.
+ * - Any read API call on the UnicodeString object will behave like on a 0-length string.
+ * - Any write API call on the UnicodeString object is disallowed and will have no effect.
+ * - You can read from and write to the returned buffer.
+ * - The previous string contents will still be in the buffer;
+ * if you want to use it, then you need to call length() before getBuffer(minCapacity).
+ * If the length() was greater than minCapacity, then any contents after minCapacity
+ * may be lost.
+ * The buffer contents is not NUL-terminated by getBuffer().
+ * If length() < getCapacity() then you can terminate it by writing a NUL
+ * at index length().
+ * - You must call releaseBuffer(newLength) before and in order to
+ * return to normal UnicodeString operation.
+ *
+ * @param minCapacity the minimum number of char16_ts that are to be available
+ * in the buffer, starting at the returned pointer;
+ * default to the current string capacity if minCapacity==-1
+ * @return a writable pointer to the internal string buffer,
+ * or nullptr if an error occurs (nested calls, out of memory)
+ *
+ * @see releaseBuffer
+ * @see getTerminatedBuffer()
+ * @stable ICU 2.0
+ */
+ char16_t *getBuffer(int32_t minCapacity);
+
+ /**
+ * Release a read/write buffer on a UnicodeString object with an
+ * "open" getBuffer(minCapacity).
+ * This function must be called in a matched pair with getBuffer(minCapacity).
+ * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
+ *
+ * It will set the string length to newLength, at most to the current capacity.
+ * If newLength==-1 then it will set the length according to the
+ * first NUL in the buffer, or to the capacity if there is no NUL.
+ *
+ * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
+ *
+ * @param newLength the new length of the UnicodeString object;
+ * defaults to the current capacity if newLength is greater than that;
+ * if newLength==-1, it defaults to u_strlen(buffer) but not more than
+ * the current capacity of the string
+ *
+ * @see getBuffer(int32_t minCapacity)
+ * @stable ICU 2.0
+ */
+ void releaseBuffer(int32_t newLength=-1);
+
+ /**
+ * Get a read-only pointer to the internal buffer.
+ * This can be called at any time on a valid UnicodeString.
+ *
+ * It returns 0 if the string is bogus, or
+ * during an "open" getBuffer(minCapacity).
+ *
+ * It can be called as many times as desired.
+ * The pointer that it returns will remain valid until the UnicodeString object is modified,
+ * at which time the pointer is semantically invalidated and must not be used any more.
+ *
+ * The capacity of the buffer can be determined with getCapacity().
+ * The part after length() may or may not be initialized and valid,
+ * depending on the history of the UnicodeString object.
+ *
+ * The buffer contents is (probably) not NUL-terminated.
+ * You can check if it is with
+ * `(s.length() < s.getCapacity() && buffer[s.length()]==0)`.
+ * (See getTerminatedBuffer().)
+ *
+ * The buffer may reside in read-only memory. Its contents must not
+ * be modified.
+ *
+ * @return a read-only pointer to the internal string buffer,
+ * or nullptr if the string is empty or bogus
+ *
+ * @see getBuffer(int32_t minCapacity)
+ * @see getTerminatedBuffer()
+ * @stable ICU 2.0
+ */
+ inline const char16_t *getBuffer() const;
+
+ /**
+ * Get a read-only pointer to the internal buffer,
+ * making sure that it is NUL-terminated.
+ * This can be called at any time on a valid UnicodeString.
+ *
+ * It returns 0 if the string is bogus, or
+ * during an "open" getBuffer(minCapacity), or if the buffer cannot
+ * be NUL-terminated (because memory allocation failed).
+ *
+ * It can be called as many times as desired.
+ * The pointer that it returns will remain valid until the UnicodeString object is modified,
+ * at which time the pointer is semantically invalidated and must not be used any more.
+ *
+ * The capacity of the buffer can be determined with getCapacity().
+ * The part after length()+1 may or may not be initialized and valid,
+ * depending on the history of the UnicodeString object.
+ *
+ * The buffer contents is guaranteed to be NUL-terminated.
+ * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
+ * is written.
+ * For this reason, this function is not const, unlike getBuffer().
+ * Note that a UnicodeString may also contain NUL characters as part of its contents.
+ *
+ * The buffer may reside in read-only memory. Its contents must not
+ * be modified.
+ *
+ * @return a read-only pointer to the internal string buffer,
+ * or 0 if the string is empty or bogus
+ *
+ * @see getBuffer(int32_t minCapacity)
+ * @see getBuffer()
+ * @stable ICU 2.2
+ */
+ const char16_t *getTerminatedBuffer();
+
+ //========================================
+ // Constructors
+ //========================================
+
+ /** Construct an empty UnicodeString.
+ * @stable ICU 2.0
+ */
+ inline UnicodeString();
+
+ /**
+ * Construct a UnicodeString with capacity to hold `capacity` char16_ts
+ * @param capacity the number of char16_ts this UnicodeString should hold
+ * before a resize is necessary; if count is greater than 0 and count
+ * code points c take up more space than capacity, then capacity is adjusted
+ * accordingly.
+ * @param c is used to initially fill the string
+ * @param count specifies how many code points c are to be written in the
+ * string
+ * @stable ICU 2.0
+ */
+ UnicodeString(int32_t capacity, UChar32 c, int32_t count);
+
+ /**
+ * Single char16_t (code unit) constructor.
+ *
+ * It is recommended to mark this constructor "explicit" by
+ * `-DUNISTR_FROM_CHAR_EXPLICIT=explicit`
+ * on the compiler command line or similar.
+ * @param ch the character to place in the UnicodeString
+ * @stable ICU 2.0
+ */
+ UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch);
+
+ /**
+ * Single UChar32 (code point) constructor.
+ *
+ * It is recommended to mark this constructor "explicit" by
+ * `-DUNISTR_FROM_CHAR_EXPLICIT=explicit`
+ * on the compiler command line or similar.
+ * @param ch the character to place in the UnicodeString
+ * @stable ICU 2.0
+ */
+ UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
+
+ /**
+ * char16_t* constructor.
+ *
+ * It is recommended to mark this constructor "explicit" by
+ * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
+ * on the compiler command line or similar.
+ * @param text The characters to place in the UnicodeString. `text`
+ * must be NULL (U+0000) terminated.
+ * @stable ICU 2.0
+ */
+ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text);
+
+#if !U_CHAR16_IS_TYPEDEF
+ /**
+ * uint16_t * constructor.
+ * Delegates to UnicodeString(const char16_t *).
+ *
+ * It is recommended to mark this constructor "explicit" by
+ * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
+ * on the compiler command line or similar.
+ * @param text NUL-terminated UTF-16 string
+ * @stable ICU 59
+ */
+ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) :
+ UnicodeString(ConstChar16Ptr(text)) {}
+#endif
+
+#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
+ /**
+ * wchar_t * constructor.
+ * (Only defined if U_SIZEOF_WCHAR_T==2.)
+ * Delegates to UnicodeString(const char16_t *).
+ *
+ * It is recommended to mark this constructor "explicit" by
+ * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
+ * on the compiler command line or similar.
+ * @param text NUL-terminated UTF-16 string
+ * @stable ICU 59
+ */
+ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) :
+ UnicodeString(ConstChar16Ptr(text)) {}
+#endif
+
+ /**
+ * nullptr_t constructor.
+ * Effectively the same as the default constructor, makes an empty string object.
+ *
+ * It is recommended to mark this constructor "explicit" by
+ * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
+ * on the compiler command line or similar.
+ * @param text nullptr
+ * @stable ICU 59
+ */
+ UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text);
+
+ /**
+ * char16_t* constructor.
+ * @param text The characters to place in the UnicodeString.
+ * @param textLength The number of Unicode characters in `text`
+ * to copy.
+ * @stable ICU 2.0
+ */
+ UnicodeString(const char16_t *text,
+ int32_t textLength);
+
+#if !U_CHAR16_IS_TYPEDEF
+ /**
+ * uint16_t * constructor.
+ * Delegates to UnicodeString(const char16_t *, int32_t).
+ * @param text UTF-16 string
+ * @param textLength string length
+ * @stable ICU 59
+ */
+ UnicodeString(const uint16_t *text, int32_t textLength) :
+ UnicodeString(ConstChar16Ptr(text), textLength) {}
+#endif
+
+#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
+ /**
+ * wchar_t * constructor.
+ * (Only defined if U_SIZEOF_WCHAR_T==2.)
+ * Delegates to UnicodeString(const char16_t *, int32_t).
+ * @param text NUL-terminated UTF-16 string
+ * @param textLength string length
+ * @stable ICU 59
+ */
+ UnicodeString(const wchar_t *text, int32_t textLength) :
+ UnicodeString(ConstChar16Ptr(text), textLength) {}
+#endif
+
+ /**
+ * nullptr_t constructor.
+ * Effectively the same as the default constructor, makes an empty string object.
+ * @param text nullptr
+ * @param textLength ignored
+ * @stable ICU 59
+ */
+ inline UnicodeString(const std::nullptr_t text, int32_t textLength);
+
+ /**
+ * Readonly-aliasing char16_t* constructor.
+ * The text will be used for the UnicodeString object, but
+ * it will not be released when the UnicodeString is destroyed.
+ * This has copy-on-write semantics:
+ * When the string is modified, then the buffer is first copied into
+ * newly allocated memory.
+ * The aliased buffer is never modified.
+ *
+ * In an assignment to another UnicodeString, when using the copy constructor
+ * or the assignment operator, the text will be copied.
+ * When using fastCopyFrom(), the text will be aliased again,
+ * so that both strings then alias the same readonly-text.
+ *
+ * @param isTerminated specifies if `text` is `NUL`-terminated.
+ * This must be true if `textLength==-1`.
+ * @param text The characters to alias for the UnicodeString.
+ * @param textLength The number of Unicode characters in `text` to alias.
+ * If -1, then this constructor will determine the length
+ * by calling `u_strlen()`.
+ * @stable ICU 2.0
+ */
+ UnicodeString(UBool isTerminated,
+ ConstChar16Ptr text,
+ int32_t textLength);
+
+ /**
+ * Writable-aliasing char16_t* constructor.
+ * The text will be used for the UnicodeString object, but
+ * it will not be released when the UnicodeString is destroyed.
+ * This has write-through semantics:
+ * For as long as the capacity of the buffer is sufficient, write operations
+ * will directly affect the buffer. When more capacity is necessary, then
+ * a new buffer will be allocated and the contents copied as with regularly
+ * constructed strings.
+ * In an assignment to another UnicodeString, the buffer will be copied.
+ * The extract(Char16Ptr dst) function detects whether the dst pointer is the same
+ * as the string buffer itself and will in this case not copy the contents.
+ *
+ * @param buffer The characters to alias for the UnicodeString.
+ * @param buffLength The number of Unicode characters in `buffer` to alias.
+ * @param buffCapacity The size of `buffer` in char16_ts.
+ * @stable ICU 2.0
+ */
+ UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity);
+
+#if !U_CHAR16_IS_TYPEDEF
+ /**
+ * Writable-aliasing uint16_t * constructor.
+ * Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
+ * @param buffer writable buffer of/for UTF-16 text
+ * @param buffLength length of the current buffer contents
+ * @param buffCapacity buffer capacity
+ * @stable ICU 59
+ */
+ UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) :
+ UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
+#endif
+
+#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
+ /**
+ * Writable-aliasing wchar_t * constructor.
+ * (Only defined if U_SIZEOF_WCHAR_T==2.)
+ * Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
+ * @param buffer writable buffer of/for UTF-16 text
+ * @param buffLength length of the current buffer contents
+ * @param buffCapacity buffer capacity
+ * @stable ICU 59
+ */
+ UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) :
+ UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
+#endif
+
+ /**
+ * Writable-aliasing nullptr_t constructor.
+ * Effectively the same as the default constructor, makes an empty string object.
+ * @param buffer nullptr
+ * @param buffLength ignored
+ * @param buffCapacity ignored
+ * @stable ICU 59
+ */
+ inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity);
+
+#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
+
+ /**
+ * char* constructor.
+ * Uses the default converter (and thus depends on the ICU conversion code)
+ * unless U_CHARSET_IS_UTF8 is set to 1.
+ *
+ * For ASCII (really "invariant character") strings it is more efficient to use
+ * the constructor that takes a US_INV (for its enum EInvariant).
+ * For ASCII (invariant-character) string literals, see UNICODE_STRING and
+ * UNICODE_STRING_SIMPLE.
+ *
+ * It is recommended to mark this constructor "explicit" by
+ * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
+ * on the compiler command line or similar.
+ * @param codepageData an array of bytes, null-terminated,
+ * in the platform's default codepage.
+ * @stable ICU 2.0
+ * @see UNICODE_STRING
+ * @see UNICODE_STRING_SIMPLE
+ */
+ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
+
+ /**
+ * char* constructor.
+ * Uses the default converter (and thus depends on the ICU conversion code)
+ * unless U_CHARSET_IS_UTF8 is set to 1.
+ * @param codepageData an array of bytes in the platform's default codepage.
+ * @param dataLength The number of bytes in `codepageData`.
+ * @stable ICU 2.0
+ */
+ UnicodeString(const char *codepageData, int32_t dataLength);
+
+#endif
+
+#if !UCONFIG_NO_CONVERSION
+
+ /**
+ * char* constructor.
+ * @param codepageData an array of bytes, null-terminated
+ * @param codepage the encoding of `codepageData`. The special
+ * value 0 for `codepage` indicates that the text is in the
+ * platform's default codepage.
+ *
+ * If `codepage` is an empty string (`""`),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ * Recommendation: For invariant-character strings use the constructor
+ * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
+ * because it avoids object code dependencies of UnicodeString on
+ * the conversion code.
+ *
+ * @stable ICU 2.0
+ */
+ UnicodeString(const char *codepageData, const char *codepage);
+
+ /**
+ * char* constructor.
+ * @param codepageData an array of bytes.
+ * @param dataLength The number of bytes in `codepageData`.
+ * @param codepage the encoding of `codepageData`. The special
+ * value 0 for `codepage` indicates that the text is in the
+ * platform's default codepage.
+ * If `codepage` is an empty string (`""`),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ * Recommendation: For invariant-character strings use the constructor
+ * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
+ * because it avoids object code dependencies of UnicodeString on
+ * the conversion code.
+ *
+ * @stable ICU 2.0
+ */
+ UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
+
+ /**
+ * char * / UConverter constructor.
+ * This constructor uses an existing UConverter object to
+ * convert the codepage string to Unicode and construct a UnicodeString
+ * from that.
+ *
+ * The converter is reset at first.
+ * If the error code indicates a failure before this constructor is called,
+ * or if an error occurs during conversion or construction,
+ * then the string will be bogus.
+ *
+ * This function avoids the overhead of opening and closing a converter if
+ * multiple strings are constructed.
+ *
+ * @param src input codepage string
+ * @param srcLength length of the input string, can be -1 for NUL-terminated strings
+ * @param cnv converter object (ucnv_resetToUnicode() will be called),
+ * can be NULL for the default converter
+ * @param errorCode normal ICU error code
+ * @stable ICU 2.0
+ */
+ UnicodeString(
+ const char *src, int32_t srcLength,
+ UConverter *cnv,
+ UErrorCode &errorCode);
+
+#endif
+
+ /**
+ * Constructs a Unicode string from an invariant-character char * string.
+ * About invariant characters see utypes.h.
+ * This constructor has no runtime dependency on conversion code and is
+ * therefore recommended over ones taking a charset name string
+ * (where the empty string "" indicates invariant-character conversion).
+ *
+ * Use the macro US_INV as the third, signature-distinguishing parameter.
+ *
+ * For example:
+ * \code
+ * void fn(const char *s) {
+ * UnicodeString ustr(s, -1, US_INV);
+ * // use ustr ...
+ * }
+ * \endcode
+ * @param src String using only invariant characters.
+ * @param textLength Length of src, or -1 if NUL-terminated.
+ * @param inv Signature-distinguishing paramater, use US_INV.
+ *
+ * @see US_INV
+ * @stable ICU 3.2
+ */
+ UnicodeString(const char *src, int32_t textLength, enum EInvariant inv);
+
+
+ /**
+ * Copy constructor.
+ *
+ * Starting with ICU 2.4, the assignment operator and the copy constructor
+ * allocate a new buffer and copy the buffer contents even for readonly aliases.
+ * By contrast, the fastCopyFrom() function implements the old,
+ * more efficient but less safe behavior
+ * of making this string also a readonly alias to the same buffer.
+ *
+ * If the source object has an "open" buffer from getBuffer(minCapacity),
+ * then the copy is an empty string.
+ *
+ * @param that The UnicodeString object to copy.
+ * @stable ICU 2.0
+ * @see fastCopyFrom
+ */
+ UnicodeString(const UnicodeString& that);
+
+ /**
+ * Move constructor; might leave src in bogus state.
+ * This string will have the same contents and state that the source string had.
+ * @param src source string
+ * @stable ICU 56
+ */
+ UnicodeString(UnicodeString &&src) U_NOEXCEPT;
+
+ /**
+ * 'Substring' constructor from tail of source string.
+ * @param src The UnicodeString object to copy.
+ * @param srcStart The offset into `src` at which to start copying.
+ * @stable ICU 2.2
+ */
+ UnicodeString(const UnicodeString& src, int32_t srcStart);
+
+ /**
+ * 'Substring' constructor from subrange of source string.
+ * @param src The UnicodeString object to copy.
+ * @param srcStart The offset into `src` at which to start copying.
+ * @param srcLength The number of characters from `src` to copy.
+ * @stable ICU 2.2
+ */
+ UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
+
+ /**
+ * Clone this object, an instance of a subclass of Replaceable.
+ * Clones can be used concurrently in multiple threads.
+ * If a subclass does not implement clone(), or if an error occurs,
+ * then NULL is returned.
+ * The caller must delete the clone.
+ *
+ * @return a clone of this object
+ *
+ * @see Replaceable::clone
+ * @see getDynamicClassID
+ * @stable ICU 2.6
+ */
+ virtual UnicodeString *clone() const;
+
+ /** Destructor.
+ * @stable ICU 2.0
+ */
+ virtual ~UnicodeString();
+
+ /**
+ * Create a UnicodeString from a UTF-8 string.
+ * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
+ * Calls u_strFromUTF8WithSub().
+ *
+ * @param utf8 UTF-8 input string.
+ * Note that a StringPiece can be implicitly constructed
+ * from a std::string or a NUL-terminated const char * string.
+ * @return A UnicodeString with equivalent UTF-16 contents.
+ * @see toUTF8
+ * @see toUTF8String
+ * @stable ICU 4.2
+ */
+ static UnicodeString fromUTF8(StringPiece utf8);
+
+ /**
+ * Create a UnicodeString from a UTF-32 string.
+ * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
+ * Calls u_strFromUTF32WithSub().
+ *
+ * @param utf32 UTF-32 input string. Must not be NULL.
+ * @param length Length of the input string, or -1 if NUL-terminated.
+ * @return A UnicodeString with equivalent UTF-16 contents.
+ * @see toUTF32
+ * @stable ICU 4.2
+ */
+ static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
+
+ /* Miscellaneous operations */
+
+ /**
+ * Unescape a string of characters and return a string containing
+ * the result. The following escape sequences are recognized:
+ *
+ * \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
+ * \\Uhhhhhhhh 8 hex digits
+ * \\xhh 1-2 hex digits
+ * \\ooo 1-3 octal digits; o in [0-7]
+ * \\cX control-X; X is masked with 0x1F
+ *
+ * as well as the standard ANSI C escapes:
+ *
+ * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
+ * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
+ * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
+ *
+ * Anything else following a backslash is generically escaped. For
+ * example, "[a\\-z]" returns "[a-z]".
+ *
+ * If an escape sequence is ill-formed, this method returns an empty
+ * string. An example of an ill-formed sequence is "\\u" followed by
+ * fewer than 4 hex digits.
+ *
+ * This function is similar to u_unescape() but not identical to it.
+ * The latter takes a source char*, so it does escape recognition
+ * and also invariant conversion.
+ *
+ * @return a string with backslash escapes interpreted, or an
+ * empty string on error.
+ * @see UnicodeString#unescapeAt()
+ * @see u_unescape()
+ * @see u_unescapeAt()
+ * @stable ICU 2.0
+ */
+ UnicodeString unescape() const;
+
+ /**
+ * Unescape a single escape sequence and return the represented
+ * character. See unescape() for a listing of the recognized escape
+ * sequences. The character at offset-1 is assumed (without
+ * checking) to be a backslash. If the escape sequence is
+ * ill-formed, or the offset is out of range, U_SENTINEL=-1 is
+ * returned.
+ *
+ * @param offset an input output parameter. On input, it is the
+ * offset into this string where the escape sequence is located,
+ * after the initial backslash. On output, it is advanced after the
+ * last character parsed. On error, it is not advanced at all.
+ * @return the character represented by the escape sequence at
+ * offset, or U_SENTINEL=-1 on error.
+ * @see UnicodeString#unescape()
+ * @see u_unescape()
+ * @see u_unescapeAt()
+ * @stable ICU 2.0
+ */
+ UChar32 unescapeAt(int32_t &offset) const;
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ *
+ * @stable ICU 2.2
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ *
+ * @stable ICU 2.2
+ */
+ virtual UClassID getDynamicClassID() const;
+
+ //========================================
+ // Implementation methods
+ //========================================
+
+protected:
+ /**
+ * Implement Replaceable::getLength() (see jitterbug 1027).
+ * @stable ICU 2.4
+ */
+ virtual int32_t getLength() const;
+
+ /**
+ * The change in Replaceable to use virtual getCharAt() allows
+ * UnicodeString::charAt() to be inline again (see jitterbug 709).
+ * @stable ICU 2.4
+ */
+ virtual char16_t getCharAt(int32_t offset) const;
+
+ /**
+ * The change in Replaceable to use virtual getChar32At() allows
+ * UnicodeString::char32At() to be inline again (see jitterbug 709).
+ * @stable ICU 2.4
+ */
+ virtual UChar32 getChar32At(int32_t offset) const;
+
+private:
+ // For char* constructors. Could be made public.
+ UnicodeString &setToUTF8(StringPiece utf8);
+ // For extract(char*).
+ // We could make a toUTF8(target, capacity, errorCode) public but not
+ // this version: New API will be cleaner if we make callers create substrings
+ // rather than having start+length on every method,
+ // and it should take a UErrorCode&.
+ int32_t
+ toUTF8(int32_t start, int32_t len,
+ char *target, int32_t capacity) const;
+
+ /**
+ * Internal string contents comparison, called by operator==.
+ * Requires: this & text not bogus and have same lengths.
+ */
+ UBool doEquals(const UnicodeString &text, int32_t len) const;
+
+ inline int8_t
+ doCompare(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ int8_t doCompare(int32_t start,
+ int32_t length,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ inline int8_t
+ doCompareCodePointOrder(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ int8_t doCompareCodePointOrder(int32_t start,
+ int32_t length,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ inline int8_t
+ doCaseCompare(int32_t start,
+ int32_t length,
+ const UnicodeString &srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ int8_t
+ doCaseCompare(int32_t start,
+ int32_t length,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ int32_t doIndexOf(char16_t c,
+ int32_t start,
+ int32_t length) const;
+
+ int32_t doIndexOf(UChar32 c,
+ int32_t start,
+ int32_t length) const;
+
+ int32_t doLastIndexOf(char16_t c,
+ int32_t start,
+ int32_t length) const;
+
+ int32_t doLastIndexOf(UChar32 c,
+ int32_t start,
+ int32_t length) const;
+
+ void doExtract(int32_t start,
+ int32_t length,
+ char16_t *dst,
+ int32_t dstStart) const;
+
+ inline void doExtract(int32_t start,
+ int32_t length,
+ UnicodeString& target) const;
+
+ inline char16_t doCharAt(int32_t offset) const;
+
+ UnicodeString& doReplace(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ UnicodeString& doReplace(int32_t start,
+ int32_t length,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
+ UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength);
+
+ UnicodeString& doReverse(int32_t start,
+ int32_t length);
+
+ // calculate hash code
+ int32_t doHashCode(void) const;
+
+ // get pointer to start of array
+ // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
+ inline char16_t* getArrayStart(void);
+ inline const char16_t* getArrayStart(void) const;
+
+ inline UBool hasShortLength() const;
+ inline int32_t getShortLength() const;
+
+ // A UnicodeString object (not necessarily its current buffer)
+ // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
+ inline UBool isWritable() const;
+
+ // Is the current buffer writable?
+ inline UBool isBufferWritable() const;
+
+ // None of the following does releaseArray().
+ inline void setZeroLength();
+ inline void setShortLength(int32_t len);
+ inline void setLength(int32_t len);
+ inline void setToEmpty();
+ inline void setArray(char16_t *array, int32_t len, int32_t capacity); // sets length but not flags
+
+ // allocate the array; result may be the stack buffer
+ // sets refCount to 1 if appropriate
+ // sets fArray, fCapacity, and flags
+ // sets length to 0
+ // returns boolean for success or failure
+ UBool allocate(int32_t capacity);
+
+ // release the array if owned
+ void releaseArray(void);
+
+ // turn a bogus string into an empty one
+ void unBogus();
+
+ // implements assigment operator, copy constructor, and fastCopyFrom()
+ UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=false);
+
+ // Copies just the fields without memory management.
+ void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT;
+
+ // Pin start and limit to acceptable values.
+ inline void pinIndex(int32_t& start) const;
+ inline void pinIndices(int32_t& start,
+ int32_t& length) const;
+
+#if !UCONFIG_NO_CONVERSION
+
+ /* Internal extract() using UConverter. */
+ int32_t doExtract(int32_t start, int32_t length,
+ char *dest, int32_t destCapacity,
+ UConverter *cnv,
+ UErrorCode &errorCode) const;
+
+ /*
+ * Real constructor for converting from codepage data.
+ * It assumes that it is called with !fRefCounted.
+ *
+ * If `codepage==0`, then the default converter
+ * is used for the platform encoding.
+ * If `codepage` is an empty string (`""`),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ */
+ void doCodepageCreate(const char *codepageData,
+ int32_t dataLength,
+ const char *codepage);
+
+ /*
+ * Worker function for creating a UnicodeString from
+ * a codepage string using a UConverter.
+ */
+ void
+ doCodepageCreate(const char *codepageData,
+ int32_t dataLength,
+ UConverter *converter,
+ UErrorCode &status);
+
+#endif
+
+ /*
+ * This function is called when write access to the array
+ * is necessary.
+ *
+ * We need to make a copy of the array if
+ * the buffer is read-only, or
+ * the buffer is refCounted (shared), and refCount>1, or
+ * the buffer is too small.
+ *
+ * Return false if memory could not be allocated.
+ */
+ UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
+ int32_t growCapacity = -1,
+ UBool doCopyArray = true,
+ int32_t **pBufferToDelete = 0,
+ UBool forceClone = false);
+
+ /**
+ * Common function for UnicodeString case mappings.
+ * The stringCaseMapper has the same type UStringCaseMapper
+ * as in ustr_imp.h for ustrcase_map().
+ */
+ UnicodeString &
+ caseMap(int32_t caseLocale, uint32_t options,
+#if !UCONFIG_NO_BREAK_ITERATION
+ BreakIterator *iter,
+#endif
+ UStringCaseMapper *stringCaseMapper);
+
+ // ref counting
+ void addRef(void);
+ int32_t removeRef(void);
+ int32_t refCount(void) const;
+
+ // constants
+ enum {
+ /**
+ * Size of stack buffer for short strings.
+ * Must be at least U16_MAX_LENGTH for the single-code point constructor to work.
+ * @see UNISTR_OBJECT_SIZE
+ */
+ US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR,
+ kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index)
+ kInvalidHashCode=0, // invalid hash code
+ kEmptyHashCode=1, // hash code for empty string
+
+ // bit flag values for fLengthAndFlags
+ kIsBogus=1, // this string is bogus, i.e., not valid or NULL
+ kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields
+ kRefCounted=4, // there is a refCount field before the characters in fArray
+ kBufferIsReadonly=8,// do not write to this buffer
+ kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
+ // and releaseBuffer(newLength) must be called
+ kAllStorageFlags=0x1f,
+
+ kLengthShift=5, // remaining 11 bits for non-negative short length, or negative if long
+ kLength1=1<<kLengthShift,
+ kMaxShortLength=0x3ff, // max non-negative short length (leaves top bit 0)
+ kLengthIsLarge=0xffe0, // short length < 0, real length is in fUnion.fFields.fLength
+
+ // combined values for convenience
+ kShortString=kUsingStackBuffer,
+ kLongString=kRefCounted,
+ kReadonlyAlias=kBufferIsReadonly,
+ kWritableAlias=0
+ };
+
+ friend class UnicodeStringAppendable;
+
+ union StackBufferOrFields; // forward declaration necessary before friend declaration
+ friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
+
+ /*
+ * The following are all the class fields that are stored
+ * in each UnicodeString object.
+ * Note that UnicodeString has virtual functions,
+ * therefore there is an implicit vtable pointer
+ * as the first real field.
+ * The fields should be aligned such that no padding is necessary.
+ * On 32-bit machines, the size should be 32 bytes,
+ * on 64-bit machines (8-byte pointers), it should be 40 bytes.
+ *
+ * We use a hack to achieve this.
+ *
+ * With at least some compilers, each of the following is forced to
+ * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
+ * rounded up with additional padding if the fields do not already fit that requirement:
+ * - sizeof(class UnicodeString)
+ * - offsetof(UnicodeString, fUnion)
+ * - sizeof(fUnion)
+ * - sizeof(fStackFields)
+ *
+ * We optimize for the longest possible internal buffer for short strings.
+ * fUnion.fStackFields begins with 2 bytes for storage flags
+ * and the length of relatively short strings,
+ * followed by the buffer for short string contents.
+ * There is no padding inside fStackFields.
+ *
+ * Heap-allocated and aliased strings use fUnion.fFields.
+ * Both fStackFields and fFields must begin with the same fields for flags and short length,
+ * that is, those must have the same memory offsets inside the object,
+ * because the flags must be inspected in order to decide which half of fUnion is being used.
+ * We assume that the compiler does not reorder the fields.
+ *
+ * (Padding at the end of fFields is ok:
+ * As long as it is no larger than fStackFields, it is not wasted space.)
+ *
+ * For some of the history of the UnicodeString class fields layout, see
+ * - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
+ * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
+ * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
+ */
+ // (implicit) *vtable;
+ union StackBufferOrFields {
+ // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
+ // Each struct of the union must begin with fLengthAndFlags.
+ struct {
+ int16_t fLengthAndFlags; // bit fields: see constants above
+ char16_t fBuffer[US_STACKBUF_SIZE]; // buffer for short strings
+ } fStackFields;
+ struct {
+ int16_t fLengthAndFlags; // bit fields: see constants above
+ int32_t fLength; // number of characters in fArray if >127; else undefined
+ int32_t fCapacity; // capacity of fArray (in char16_ts)
+ // array pointer last to minimize padding for machines with P128 data model
+ // or pointer sizes that are not a power of 2
+ char16_t *fArray; // the Unicode data
+ } fFields;
+ } fUnion;
+};
+
+/**
+ * Create a new UnicodeString with the concatenation of two others.
+ *
+ * @param s1 The first string to be copied to the new one.
+ * @param s2 The second string to be copied to the new one, after s1.
+ * @return UnicodeString(s1).append(s2)
+ * @stable ICU 2.8
+ */
+U_COMMON_API UnicodeString U_EXPORT2
+operator+ (const UnicodeString &s1, const UnicodeString &s2);
+
+//========================================
+// Inline members
+//========================================
+
+//========================================
+// Privates
+//========================================
+
+inline void
+UnicodeString::pinIndex(int32_t& start) const
+{
+ // pin index
+ if(start < 0) {
+ start = 0;
+ } else if(start > length()) {
+ start = length();
+ }
+}
+
+inline void
+UnicodeString::pinIndices(int32_t& start,
+ int32_t& _length) const
+{
+ // pin indices
+ int32_t len = length();
+ if(start < 0) {
+ start = 0;
+ } else if(start > len) {
+ start = len;
+ }
+ if(_length < 0) {
+ _length = 0;
+ } else if(_length > (len - start)) {
+ _length = (len - start);
+ }
+}
+
+inline char16_t*
+UnicodeString::getArrayStart() {
+ return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
+ fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
+}
+
+inline const char16_t*
+UnicodeString::getArrayStart() const {
+ return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
+ fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
+}
+
+//========================================
+// Default constructor
+//========================================
+
+inline
+UnicodeString::UnicodeString() {
+ fUnion.fStackFields.fLengthAndFlags=kShortString;
+}
+
+inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) {
+ fUnion.fStackFields.fLengthAndFlags=kShortString;
+}
+
+inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) {
+ fUnion.fStackFields.fLengthAndFlags=kShortString;
+}
+
+inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) {
+ fUnion.fStackFields.fLengthAndFlags=kShortString;
+}
+
+//========================================
+// Read-only implementation methods
+//========================================
+inline UBool
+UnicodeString::hasShortLength() const {
+ return fUnion.fFields.fLengthAndFlags>=0;
+}
+
+inline int32_t
+UnicodeString::getShortLength() const {
+ // fLengthAndFlags must be non-negative -> short length >= 0
+ // and arithmetic or logical shift does not matter.
+ return fUnion.fFields.fLengthAndFlags>>kLengthShift;
+}
+
+inline int32_t
+UnicodeString::length() const {
+ return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
+}
+
+inline int32_t
+UnicodeString::getCapacity() const {
+ return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
+ US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
+}
+
+inline int32_t
+UnicodeString::hashCode() const
+{ return doHashCode(); }
+
+inline UBool
+UnicodeString::isBogus() const
+{ return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); }
+
+inline UBool
+UnicodeString::isWritable() const
+{ return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus)); }
+
+inline UBool
+UnicodeString::isBufferWritable() const
+{
+ return (UBool)(
+ !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
+ (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1));
+}
+
+inline const char16_t *
+UnicodeString::getBuffer() const {
+ if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) {
+ return nullptr;
+ } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
+ return fUnion.fStackFields.fBuffer;
+ } else {
+ return fUnion.fFields.fArray;
+ }
+}
+
+//========================================
+// Read-only alias methods
+//========================================
+inline int8_t
+UnicodeString::doCompare(int32_t start,
+ int32_t thisLength,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const
+{
+ if(srcText.isBogus()) {
+ return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
+ } else {
+ srcText.pinIndices(srcStart, srcLength);
+ return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
+ }
+}
+
+inline UBool
+UnicodeString::operator== (const UnicodeString& text) const
+{
+ if(isBogus()) {
+ return text.isBogus();
+ } else {
+ int32_t len = length(), textLength = text.length();
+ return !text.isBogus() && len == textLength && doEquals(text, len);
+ }
+}
+
+inline UBool
+UnicodeString::operator!= (const UnicodeString& text) const
+{ return (! operator==(text)); }
+
+inline UBool
+UnicodeString::operator> (const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()) == 1; }
+
+inline UBool
+UnicodeString::operator< (const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()) == -1; }
+
+inline UBool
+UnicodeString::operator>= (const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()) != -1; }
+
+inline UBool
+UnicodeString::operator<= (const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()) != 1; }
+
+inline int8_t
+UnicodeString::compare(const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText) const
+{ return doCompare(start, _length, srcText, 0, srcText.length()); }
+
+inline int8_t
+UnicodeString::compare(ConstChar16Ptr srcChars,
+ int32_t srcLength) const
+{ return doCompare(0, length(), srcChars, 0, srcLength); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const
+{ return doCompare(start, _length, srcText, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+ int32_t _length,
+ const char16_t *srcChars) const
+{ return doCompare(start, _length, srcChars, 0, _length); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+ int32_t _length,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const
+{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compareBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit) const
+{ return doCompare(start, limit - start,
+ srcText, srcStart, srcLimit - srcStart); }
+
+inline int8_t
+UnicodeString::doCompareCodePointOrder(int32_t start,
+ int32_t thisLength,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const
+{
+ if(srcText.isBogus()) {
+ return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
+ } else {
+ srcText.pinIndices(srcStart, srcLength);
+ return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
+ }
+}
+
+inline int8_t
+UnicodeString::compareCodePointOrder(const UnicodeString& text) const
+{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText) const
+{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars,
+ int32_t srcLength) const
+{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const
+{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+ int32_t _length,
+ const char16_t *srcChars) const
+{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+ int32_t _length,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const
+{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compareCodePointOrderBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit) const
+{ return doCompareCodePointOrder(start, limit - start,
+ srcText, srcStart, srcLimit - srcStart); }
+
+inline int8_t
+UnicodeString::doCaseCompare(int32_t start,
+ int32_t thisLength,
+ const UnicodeString &srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const
+{
+ if(srcText.isBogus()) {
+ return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
+ } else {
+ srcText.pinIndices(srcStart, srcLength);
+ return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
+ }
+}
+
+inline int8_t
+UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
+ return doCaseCompare(0, length(), text, 0, text.length(), options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+ int32_t _length,
+ const UnicodeString &srcText,
+ uint32_t options) const {
+ return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(ConstChar16Ptr srcChars,
+ int32_t srcLength,
+ uint32_t options) const {
+ return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+ int32_t _length,
+ const UnicodeString &srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const {
+ return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+ int32_t _length,
+ const char16_t *srcChars,
+ uint32_t options) const {
+ return doCaseCompare(start, _length, srcChars, 0, _length, options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+ int32_t _length,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const {
+ return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
+}
+
+inline int8_t
+UnicodeString::caseCompareBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString &srcText,
+ int32_t srcStart,
+ int32_t srcLimit,
+ uint32_t options) const {
+ return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
+}
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t _length) const
+{
+ if(!srcText.isBogus()) {
+ srcText.pinIndices(srcStart, srcLength);
+ if(srcLength > 0) {
+ return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
+ }
+ }
+ return -1;
+}
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& text) const
+{ return indexOf(text, 0, text.length(), 0, length()); }
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& text,
+ int32_t start) const {
+ pinIndex(start);
+ return indexOf(text, 0, text.length(), start, length() - start);
+}
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& text,
+ int32_t start,
+ int32_t _length) const
+{ return indexOf(text, 0, text.length(), start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(const char16_t *srcChars,
+ int32_t srcLength,
+ int32_t start) const {
+ pinIndex(start);
+ return indexOf(srcChars, 0, srcLength, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::indexOf(ConstChar16Ptr srcChars,
+ int32_t srcLength,
+ int32_t start,
+ int32_t _length) const
+{ return indexOf(srcChars, 0, srcLength, start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(char16_t c,
+ int32_t start,
+ int32_t _length) const
+{ return doIndexOf(c, start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(UChar32 c,
+ int32_t start,
+ int32_t _length) const
+{ return doIndexOf(c, start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(char16_t c) const
+{ return doIndexOf(c, 0, length()); }
+
+inline int32_t
+UnicodeString::indexOf(UChar32 c) const
+{ return indexOf(c, 0, length()); }
+
+inline int32_t
+UnicodeString::indexOf(char16_t c,
+ int32_t start) const {
+ pinIndex(start);
+ return doIndexOf(c, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::indexOf(UChar32 c,
+ int32_t start) const {
+ pinIndex(start);
+ return indexOf(c, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(ConstChar16Ptr srcChars,
+ int32_t srcLength,
+ int32_t start,
+ int32_t _length) const
+{ return lastIndexOf(srcChars, 0, srcLength, start, _length); }
+
+inline int32_t
+UnicodeString::lastIndexOf(const char16_t *srcChars,
+ int32_t srcLength,
+ int32_t start) const {
+ pinIndex(start);
+ return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t _length) const
+{
+ if(!srcText.isBogus()) {
+ srcText.pinIndices(srcStart, srcLength);
+ if(srcLength > 0) {
+ return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
+ }
+ }
+ return -1;
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& text,
+ int32_t start,
+ int32_t _length) const
+{ return lastIndexOf(text, 0, text.length(), start, _length); }
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& text,
+ int32_t start) const {
+ pinIndex(start);
+ return lastIndexOf(text, 0, text.length(), start, length() - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& text) const
+{ return lastIndexOf(text, 0, text.length(), 0, length()); }
+
+inline int32_t
+UnicodeString::lastIndexOf(char16_t c,
+ int32_t start,
+ int32_t _length) const
+{ return doLastIndexOf(c, start, _length); }
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar32 c,
+ int32_t start,
+ int32_t _length) const {
+ return doLastIndexOf(c, start, _length);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(char16_t c) const
+{ return doLastIndexOf(c, 0, length()); }
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar32 c) const {
+ return lastIndexOf(c, 0, length());
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(char16_t c,
+ int32_t start) const {
+ pinIndex(start);
+ return doLastIndexOf(c, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar32 c,
+ int32_t start) const {
+ pinIndex(start);
+ return lastIndexOf(c, start, length() - start);
+}
+
+inline UBool
+UnicodeString::startsWith(const UnicodeString& text) const
+{ return compare(0, text.length(), text, 0, text.length()) == 0; }
+
+inline UBool
+UnicodeString::startsWith(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const
+{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
+
+inline UBool
+UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const {
+ if(srcLength < 0) {
+ srcLength = u_strlen(toUCharPtr(srcChars));
+ }
+ return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
+}
+
+inline UBool
+UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const {
+ if(srcLength < 0) {
+ srcLength = u_strlen(toUCharPtr(srcChars));
+ }
+ return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
+}
+
+inline UBool
+UnicodeString::endsWith(const UnicodeString& text) const
+{ return doCompare(length() - text.length(), text.length(),
+ text, 0, text.length()) == 0; }
+
+inline UBool
+UnicodeString::endsWith(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const {
+ srcText.pinIndices(srcStart, srcLength);
+ return doCompare(length() - srcLength, srcLength,
+ srcText, srcStart, srcLength) == 0;
+}
+
+inline UBool
+UnicodeString::endsWith(ConstChar16Ptr srcChars,
+ int32_t srcLength) const {
+ if(srcLength < 0) {
+ srcLength = u_strlen(toUCharPtr(srcChars));
+ }
+ return doCompare(length() - srcLength, srcLength,
+ srcChars, 0, srcLength) == 0;
+}
+
+inline UBool
+UnicodeString::endsWith(const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const {
+ if(srcLength < 0) {
+ srcLength = u_strlen(toUCharPtr(srcChars + srcStart));
+ }
+ return doCompare(length() - srcLength, srcLength,
+ srcChars, srcStart, srcLength) == 0;
+}
+
+//========================================
+// replace
+//========================================
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText)
+{ return doReplace(start, _length, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength)
+{ return doReplace(start, _length, srcText, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+ int32_t _length,
+ ConstChar16Ptr srcChars,
+ int32_t srcLength)
+{ return doReplace(start, _length, srcChars, 0, srcLength); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+ int32_t _length,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength)
+{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+ int32_t _length,
+ char16_t srcChar)
+{ return doReplace(start, _length, &srcChar, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::replaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText)
+{ return doReplace(start, limit - start, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::replaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit)
+{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
+
+inline UnicodeString&
+UnicodeString::findAndReplace(const UnicodeString& oldText,
+ const UnicodeString& newText)
+{ return findAndReplace(0, length(), oldText, 0, oldText.length(),
+ newText, 0, newText.length()); }
+
+inline UnicodeString&
+UnicodeString::findAndReplace(int32_t start,
+ int32_t _length,
+ const UnicodeString& oldText,
+ const UnicodeString& newText)
+{ return findAndReplace(start, _length, oldText, 0, oldText.length(),
+ newText, 0, newText.length()); }
+
+// ============================
+// extract
+// ============================
+inline void
+UnicodeString::doExtract(int32_t start,
+ int32_t _length,
+ UnicodeString& target) const
+{ target.replace(0, target.length(), *this, start, _length); }
+
+inline void
+UnicodeString::extract(int32_t start,
+ int32_t _length,
+ Char16Ptr target,
+ int32_t targetStart) const
+{ doExtract(start, _length, target, targetStart); }
+
+inline void
+UnicodeString::extract(int32_t start,
+ int32_t _length,
+ UnicodeString& target) const
+{ doExtract(start, _length, target); }
+
+#if !UCONFIG_NO_CONVERSION
+
+inline int32_t
+UnicodeString::extract(int32_t start,
+ int32_t _length,
+ char *dst,
+ const char *codepage) const
+
+{
+ // This dstSize value will be checked explicitly
+ return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
+}
+
+#endif
+
+inline void
+UnicodeString::extractBetween(int32_t start,
+ int32_t limit,
+ char16_t *dst,
+ int32_t dstStart) const {
+ pinIndex(start);
+ pinIndex(limit);
+ doExtract(start, limit - start, dst, dstStart);
+}
+
+inline UnicodeString
+UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
+ return tempSubString(start, limit - start);
+}
+
+inline char16_t
+UnicodeString::doCharAt(int32_t offset) const
+{
+ if((uint32_t)offset < (uint32_t)length()) {
+ return getArrayStart()[offset];
+ } else {
+ return kInvalidUChar;
+ }
+}
+
+inline char16_t
+UnicodeString::charAt(int32_t offset) const
+{ return doCharAt(offset); }
+
+inline char16_t
+UnicodeString::operator[] (int32_t offset) const
+{ return doCharAt(offset); }
+
+inline UBool
+UnicodeString::isEmpty() const {
+ // Arithmetic or logical right shift does not matter: only testing for 0.
+ return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0;
+}
+
+//========================================
+// Write implementation methods
+//========================================
+inline void
+UnicodeString::setZeroLength() {
+ fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
+}
+
+inline void
+UnicodeString::setShortLength(int32_t len) {
+ // requires 0 <= len <= kMaxShortLength
+ fUnion.fFields.fLengthAndFlags =
+ (int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift));
+}
+
+inline void
+UnicodeString::setLength(int32_t len) {
+ if(len <= kMaxShortLength) {
+ setShortLength(len);
+ } else {
+ fUnion.fFields.fLengthAndFlags |= kLengthIsLarge;
+ fUnion.fFields.fLength = len;
+ }
+}
+
+inline void
+UnicodeString::setToEmpty() {
+ fUnion.fFields.fLengthAndFlags = kShortString;
+}
+
+inline void
+UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) {
+ setLength(len);
+ fUnion.fFields.fArray = array;
+ fUnion.fFields.fCapacity = capacity;
+}
+
+inline UnicodeString&
+UnicodeString::operator= (char16_t ch)
+{ return doReplace(0, length(), &ch, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::operator= (UChar32 ch)
+{ return replace(0, length(), ch); }
+
+inline UnicodeString&
+UnicodeString::setTo(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength)
+{
+ unBogus();
+ return doReplace(0, length(), srcText, srcStart, srcLength);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(const UnicodeString& srcText,
+ int32_t srcStart)
+{
+ unBogus();
+ srcText.pinIndex(srcStart);
+ return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(const UnicodeString& srcText)
+{
+ return copyFrom(srcText);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(const char16_t *srcChars,
+ int32_t srcLength)
+{
+ unBogus();
+ return doReplace(0, length(), srcChars, 0, srcLength);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(char16_t srcChar)
+{
+ unBogus();
+ return doReplace(0, length(), &srcChar, 0, 1);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(UChar32 srcChar)
+{
+ unBogus();
+ return replace(0, length(), srcChar);
+}
+
+inline UnicodeString&
+UnicodeString::append(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength)
+{ return doAppend(srcText, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::append(const UnicodeString& srcText)
+{ return doAppend(srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::append(const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength)
+{ return doAppend(srcChars, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::append(ConstChar16Ptr srcChars,
+ int32_t srcLength)
+{ return doAppend(srcChars, 0, srcLength); }
+
+inline UnicodeString&
+UnicodeString::append(char16_t srcChar)
+{ return doAppend(&srcChar, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::operator+= (char16_t ch)
+{ return doAppend(&ch, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::operator+= (UChar32 ch) {
+ return append(ch);
+}
+
+inline UnicodeString&
+UnicodeString::operator+= (const UnicodeString& srcText)
+{ return doAppend(srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength)
+{ return doReplace(start, 0, srcText, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+ const UnicodeString& srcText)
+{ return doReplace(start, 0, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength)
+{ return doReplace(start, 0, srcChars, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+ ConstChar16Ptr srcChars,
+ int32_t srcLength)
+{ return doReplace(start, 0, srcChars, 0, srcLength); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+ char16_t srcChar)
+{ return doReplace(start, 0, &srcChar, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+ UChar32 srcChar)
+{ return replace(start, 0, srcChar); }
+
+
+inline UnicodeString&
+UnicodeString::remove()
+{
+ // remove() of a bogus string makes the string empty and non-bogus
+ if(isBogus()) {
+ setToEmpty();
+ } else {
+ setZeroLength();
+ }
+ return *this;
+}
+
+inline UnicodeString&
+UnicodeString::remove(int32_t start,
+ int32_t _length)
+{
+ if(start <= 0 && _length == INT32_MAX) {
+ // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
+ return remove();
+ }
+ return doReplace(start, _length, NULL, 0, 0);
+}
+
+inline UnicodeString&
+UnicodeString::removeBetween(int32_t start,
+ int32_t limit)
+{ return doReplace(start, limit - start, NULL, 0, 0); }
+
+inline UnicodeString &
+UnicodeString::retainBetween(int32_t start, int32_t limit) {
+ truncate(limit);
+ return doReplace(0, start, NULL, 0, 0);
+}
+
+inline UBool
+UnicodeString::truncate(int32_t targetLength)
+{
+ if(isBogus() && targetLength == 0) {
+ // truncate(0) of a bogus string makes the string empty and non-bogus
+ unBogus();
+ return false;
+ } else if((uint32_t)targetLength < (uint32_t)length()) {
+ setLength(targetLength);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+inline UnicodeString&
+UnicodeString::reverse()
+{ return doReverse(0, length()); }
+
+inline UnicodeString&
+UnicodeString::reverse(int32_t start,
+ int32_t _length)
+{ return doReverse(start, _length); }
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/unorm.h b/thirdparty/icu4c/common/unicode/unorm.h
new file mode 100644
index 0000000000..c3c57582d4
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/unorm.h
@@ -0,0 +1,476 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (c) 1996-2016, International Business Machines Corporation
+* and others. All Rights Reserved.
+*******************************************************************************
+* File unorm.h
+*
+* Created by: Vladimir Weinstein 12052000
+*
+* Modification history :
+*
+* Date Name Description
+* 02/01/01 synwee Added normalization quickcheck enum and method.
+*/
+#ifndef UNORM_H
+#define UNORM_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/uiter.h"
+#include "unicode/unorm2.h"
+
+/**
+ * \file
+ * \brief C API: Unicode Normalization
+ *
+ * Old Unicode normalization API.
+ *
+ * This API has been replaced by the unorm2.h API and is only available
+ * for backward compatibility. The functions here simply delegate to the
+ * unorm2.h functions, for example unorm2_getInstance() and unorm2_normalize().
+ * There is one exception: The new API does not provide a replacement for unorm_compare().
+ * Its declaration has been moved to unorm2.h.
+ *
+ * <code>unorm_normalize</code> transforms Unicode text into an equivalent composed or
+ * decomposed form, allowing for easier sorting and searching of text.
+ * <code>unorm_normalize</code> supports the standard normalization forms described in
+ * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
+ * Unicode Standard Annex #15: Unicode Normalization Forms</a>.
+ *
+ * Characters with accents or other adornments can be encoded in
+ * several different ways in Unicode. For example, take the character A-acute.
+ * In Unicode, this can be encoded as a single character (the
+ * "composed" form):
+ *
+ * \code
+ * 00C1 LATIN CAPITAL LETTER A WITH ACUTE
+ * \endcode
+ *
+ * or as two separate characters (the "decomposed" form):
+ *
+ * \code
+ * 0041 LATIN CAPITAL LETTER A
+ * 0301 COMBINING ACUTE ACCENT
+ * \endcode
+ *
+ * To a user of your program, however, both of these sequences should be
+ * treated as the same "user-level" character "A with acute accent". When you are searching or
+ * comparing text, you must ensure that these two sequences are treated
+ * equivalently. In addition, you must handle characters with more than one
+ * accent. Sometimes the order of a character's combining accents is
+ * significant, while in other cases accent sequences in different orders are
+ * really equivalent.
+ *
+ * Similarly, the string "ffi" can be encoded as three separate letters:
+ *
+ * \code
+ * 0066 LATIN SMALL LETTER F
+ * 0066 LATIN SMALL LETTER F
+ * 0069 LATIN SMALL LETTER I
+ * \endcode
+ *
+ * or as the single character
+ *
+ * \code
+ * FB03 LATIN SMALL LIGATURE FFI
+ * \endcode
+ *
+ * The ffi ligature is not a distinct semantic character, and strictly speaking
+ * it shouldn't be in Unicode at all, but it was included for compatibility
+ * with existing character sets that already provided it. The Unicode standard
+ * identifies such characters by giving them "compatibility" decompositions
+ * into the corresponding semantic characters. When sorting and searching, you
+ * will often want to use these mappings.
+ *
+ * <code>unorm_normalize</code> helps solve these problems by transforming text into the
+ * canonical composed and decomposed forms as shown in the first example above.
+ * In addition, you can have it perform compatibility decompositions so that
+ * you can treat compatibility characters the same as their equivalents.
+ * Finally, <code>unorm_normalize</code> rearranges accents into the proper canonical
+ * order, so that you do not have to worry about accent rearrangement on your
+ * own.
+ *
+ * Form FCD, "Fast C or D", is also designed for collation.
+ * It allows to work on strings that are not necessarily normalized
+ * with an algorithm (like in collation) that works under "canonical closure", i.e., it treats precomposed
+ * characters and their decomposed equivalents the same.
+ *
+ * It is not a normalization form because it does not provide for uniqueness of representation. Multiple strings
+ * may be canonically equivalent (their NFDs are identical) and may all conform to FCD without being identical
+ * themselves.
+ *
+ * The form is defined such that the "raw decomposition", the recursive canonical decomposition of each character,
+ * results in a string that is canonically ordered. This means that precomposed characters are allowed for as long
+ * as their decompositions do not need canonical reordering.
+ *
+ * Its advantage for a process like collation is that all NFD and most NFC texts - and many unnormalized texts -
+ * already conform to FCD and do not need to be normalized (NFD) for such a process. The FCD quick check will
+ * return UNORM_YES for most strings in practice.
+ *
+ * unorm_normalize(UNORM_FCD) may be implemented with UNORM_NFD.
+ *
+ * For more details on FCD see the collation design document:
+ * http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm
+ *
+ * ICU collation performs either NFD or FCD normalization automatically if normalization
+ * is turned on for the collator object.
+ * Beyond collation and string search, normalized strings may be useful for string equivalence comparisons,
+ * transliteration/transcription, unique representations, etc.
+ *
+ * The W3C generally recommends to exchange texts in NFC.
+ * Note also that most legacy character encodings use only precomposed forms and often do not
+ * encode any combining marks by themselves. For conversion to such character encodings the
+ * Unicode text needs to be normalized to NFC.
+ * For more usage examples, see the Unicode Standard Annex.
+ */
+
+// Do not conditionalize the following enum with #ifndef U_HIDE_DEPRECATED_API,
+// it is needed for layout of Normalizer object.
+#ifndef U_FORCE_HIDE_DEPRECATED_API
+
+/**
+ * Constants for normalization modes.
+ * @deprecated ICU 56 Use unorm2.h instead.
+ */
+typedef enum {
+ /** No decomposition/composition. @deprecated ICU 56 Use unorm2.h instead. */
+ UNORM_NONE = 1,
+ /** Canonical decomposition. @deprecated ICU 56 Use unorm2.h instead. */
+ UNORM_NFD = 2,
+ /** Compatibility decomposition. @deprecated ICU 56 Use unorm2.h instead. */
+ UNORM_NFKD = 3,
+ /** Canonical decomposition followed by canonical composition. @deprecated ICU 56 Use unorm2.h instead. */
+ UNORM_NFC = 4,
+ /** Default normalization. @deprecated ICU 56 Use unorm2.h instead. */
+ UNORM_DEFAULT = UNORM_NFC,
+ /** Compatibility decomposition followed by canonical composition. @deprecated ICU 56 Use unorm2.h instead. */
+ UNORM_NFKC =5,
+ /** "Fast C or D" form. @deprecated ICU 56 Use unorm2.h instead. */
+ UNORM_FCD = 6,
+
+ /** One more than the highest normalization mode constant. @deprecated ICU 56 Use unorm2.h instead. */
+ UNORM_MODE_COUNT
+} UNormalizationMode;
+
+#endif // U_FORCE_HIDE_DEPRECATED_API
+
+#ifndef U_HIDE_DEPRECATED_API
+
+/**
+ * Constants for options flags for normalization.
+ * Use 0 for default options,
+ * including normalization according to the Unicode version
+ * that is currently supported by ICU (see u_getUnicodeVersion).
+ * @deprecated ICU 56 Use unorm2.h instead.
+ */
+enum {
+ /**
+ * Options bit set value to select Unicode 3.2 normalization
+ * (except NormalizationCorrections).
+ * At most one Unicode version can be selected at a time.
+ * @deprecated ICU 56 Use unorm2.h instead.
+ */
+ UNORM_UNICODE_3_2=0x20
+};
+
+/**
+ * Lowest-order bit number of unorm_compare() options bits corresponding to
+ * normalization options bits.
+ *
+ * The options parameter for unorm_compare() uses most bits for
+ * itself and for various comparison and folding flags.
+ * The most significant bits, however, are shifted down and passed on
+ * to the normalization implementation.
+ * (That is, from unorm_compare(..., options, ...),
+ * options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT will be passed on to the
+ * internal normalization functions.)
+ *
+ * @see unorm_compare
+ * @deprecated ICU 56 Use unorm2.h instead.
+ */
+#define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
+
+/**
+ * Normalize a string.
+ * The string will be normalized according the specified normalization mode
+ * and options.
+ * The source and result buffers must not be the same, nor overlap.
+ *
+ * @param source The string to normalize.
+ * @param sourceLength The length of source, or -1 if NUL-terminated.
+ * @param mode The normalization mode; one of UNORM_NONE,
+ * UNORM_NFD, UNORM_NFC, UNORM_NFKC, UNORM_NFKD, UNORM_DEFAULT.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param result A pointer to a buffer to receive the result string.
+ * The result string is NUL-terminated if possible.
+ * @param resultLength The maximum size of result.
+ * @param status A pointer to a UErrorCode to receive any errors.
+ * @return The total buffer size needed; if greater than resultLength,
+ * the output was truncated, and the error code is set to U_BUFFER_OVERFLOW_ERROR.
+ * @deprecated ICU 56 Use unorm2.h instead.
+ */
+U_DEPRECATED int32_t U_EXPORT2
+unorm_normalize(const UChar *source, int32_t sourceLength,
+ UNormalizationMode mode, int32_t options,
+ UChar *result, int32_t resultLength,
+ UErrorCode *status);
+
+/**
+ * Performing quick check on a string, to quickly determine if the string is
+ * in a particular normalization format.
+ * Three types of result can be returned UNORM_YES, UNORM_NO or
+ * UNORM_MAYBE. Result UNORM_YES indicates that the argument
+ * string is in the desired normalized format, UNORM_NO determines that
+ * argument string is not in the desired normalized format. A
+ * UNORM_MAYBE result indicates that a more thorough check is required,
+ * the user may have to put the string in its normalized form and compare the
+ * results.
+ *
+ * @param source string for determining if it is in a normalized format
+ * @param sourcelength length of source to test, or -1 if NUL-terminated
+ * @param mode which normalization form to test for
+ * @param status a pointer to a UErrorCode to receive any errors
+ * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
+ *
+ * @see unorm_isNormalized
+ * @deprecated ICU 56 Use unorm2.h instead.
+ */
+U_DEPRECATED UNormalizationCheckResult U_EXPORT2
+unorm_quickCheck(const UChar *source, int32_t sourcelength,
+ UNormalizationMode mode,
+ UErrorCode *status);
+
+/**
+ * Performing quick check on a string; same as unorm_quickCheck but
+ * takes an extra options parameter like most normalization functions.
+ *
+ * @param src String that is to be tested if it is in a normalization format.
+ * @param srcLength Length of source to test, or -1 if NUL-terminated.
+ * @param mode Which normalization form to test for.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
+ *
+ * @see unorm_quickCheck
+ * @see unorm_isNormalized
+ * @deprecated ICU 56 Use unorm2.h instead.
+ */
+U_DEPRECATED UNormalizationCheckResult U_EXPORT2
+unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength,
+ UNormalizationMode mode, int32_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Test if a string is in a given normalization form.
+ * This is semantically equivalent to source.equals(normalize(source, mode)) .
+ *
+ * Unlike unorm_quickCheck(), this function returns a definitive result,
+ * never a "maybe".
+ * For NFD, NFKD, and FCD, both functions work exactly the same.
+ * For NFC and NFKC where quickCheck may return "maybe", this function will
+ * perform further tests to arrive at a true/false result.
+ *
+ * @param src String that is to be tested if it is in a normalization format.
+ * @param srcLength Length of source to test, or -1 if NUL-terminated.
+ * @param mode Which normalization form to test for.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Boolean value indicating whether the source string is in the
+ * "mode" normalization form.
+ *
+ * @see unorm_quickCheck
+ * @deprecated ICU 56 Use unorm2.h instead.
+ */
+U_DEPRECATED UBool U_EXPORT2
+unorm_isNormalized(const UChar *src, int32_t srcLength,
+ UNormalizationMode mode,
+ UErrorCode *pErrorCode);
+
+/**
+ * Test if a string is in a given normalization form; same as unorm_isNormalized but
+ * takes an extra options parameter like most normalization functions.
+ *
+ * @param src String that is to be tested if it is in a normalization format.
+ * @param srcLength Length of source to test, or -1 if NUL-terminated.
+ * @param mode Which normalization form to test for.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Boolean value indicating whether the source string is in the
+ * "mode/options" normalization form.
+ *
+ * @see unorm_quickCheck
+ * @see unorm_isNormalized
+ * @deprecated ICU 56 Use unorm2.h instead.
+ */
+U_DEPRECATED UBool U_EXPORT2
+unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength,
+ UNormalizationMode mode, int32_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Iterative normalization forward.
+ * This function (together with unorm_previous) is somewhat
+ * similar to the C++ Normalizer class (see its non-static functions).
+ *
+ * Iterative normalization is useful when only a small portion of a longer
+ * string/text needs to be processed.
+ *
+ * For example, the likelihood may be high that processing the first 10% of some
+ * text will be sufficient to find certain data.
+ * Another example: When one wants to concatenate two normalized strings and get a
+ * normalized result, it is much more efficient to normalize just a small part of
+ * the result around the concatenation place instead of re-normalizing everything.
+ *
+ * The input text is an instance of the C character iteration API UCharIterator.
+ * It may wrap around a simple string, a CharacterIterator, a Replaceable, or any
+ * other kind of text object.
+ *
+ * If a buffer overflow occurs, then the caller needs to reset the iterator to the
+ * old index and call the function again with a larger buffer - if the caller cares
+ * for the actual output.
+ * Regardless of the output buffer, the iterator will always be moved to the next
+ * normalization boundary.
+ *
+ * This function (like unorm_previous) serves two purposes:
+ *
+ * 1) To find the next boundary so that the normalization of the part of the text
+ * from the current position to that boundary does not affect and is not affected
+ * by the part of the text beyond that boundary.
+ *
+ * 2) To normalize the text up to the boundary.
+ *
+ * The second step is optional, per the doNormalize parameter.
+ * It is omitted for operations like string concatenation, where the two adjacent
+ * string ends need to be normalized together.
+ * In such a case, the output buffer will just contain a copy of the text up to the
+ * boundary.
+ *
+ * pNeededToNormalize is an output-only parameter. Its output value is only defined
+ * if normalization was requested (doNormalize) and successful (especially, no
+ * buffer overflow).
+ * It is useful for operations like a normalizing transliterator, where one would
+ * not want to replace a piece of text if it is not modified.
+ *
+ * If doNormalize==true and pNeededToNormalize!=NULL then *pNeeded... is set true
+ * if the normalization was necessary.
+ *
+ * If doNormalize==false then *pNeededToNormalize will be set to false.
+ *
+ * If the buffer overflows, then *pNeededToNormalize will be undefined;
+ * essentially, whenever U_FAILURE is true (like in buffer overflows), this result
+ * will be undefined.
+ *
+ * @param src The input text in the form of a C character iterator.
+ * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
+ * @param destCapacity The number of UChars that fit into dest.
+ * @param mode The normalization mode.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param doNormalize Indicates if the source text up to the next boundary
+ * is to be normalized (true) or just copied (false).
+ * @param pNeededToNormalize Output flag indicating if the normalization resulted in
+ * different text from the input.
+ * Not defined if an error occurs including buffer overflow.
+ * Always false if !doNormalize.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Length of output (number of UChars) when successful or buffer overflow.
+ *
+ * @see unorm_previous
+ * @see unorm_normalize
+ *
+ * @deprecated ICU 56 Use unorm2.h instead.
+ */
+U_DEPRECATED int32_t U_EXPORT2
+unorm_next(UCharIterator *src,
+ UChar *dest, int32_t destCapacity,
+ UNormalizationMode mode, int32_t options,
+ UBool doNormalize, UBool *pNeededToNormalize,
+ UErrorCode *pErrorCode);
+
+/**
+ * Iterative normalization backward.
+ * This function (together with unorm_next) is somewhat
+ * similar to the C++ Normalizer class (see its non-static functions).
+ * For all details see unorm_next.
+ *
+ * @param src The input text in the form of a C character iterator.
+ * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
+ * @param destCapacity The number of UChars that fit into dest.
+ * @param mode The normalization mode.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param doNormalize Indicates if the source text up to the next boundary
+ * is to be normalized (true) or just copied (false).
+ * @param pNeededToNormalize Output flag indicating if the normalization resulted in
+ * different text from the input.
+ * Not defined if an error occurs including buffer overflow.
+ * Always false if !doNormalize.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Length of output (number of UChars) when successful or buffer overflow.
+ *
+ * @see unorm_next
+ * @see unorm_normalize
+ *
+ * @deprecated ICU 56 Use unorm2.h instead.
+ */
+U_DEPRECATED int32_t U_EXPORT2
+unorm_previous(UCharIterator *src,
+ UChar *dest, int32_t destCapacity,
+ UNormalizationMode mode, int32_t options,
+ UBool doNormalize, UBool *pNeededToNormalize,
+ UErrorCode *pErrorCode);
+
+/**
+ * Concatenate normalized strings, making sure that the result is normalized as well.
+ *
+ * If both the left and the right strings are in
+ * the normalization form according to "mode/options",
+ * then the result will be
+ *
+ * \code
+ * dest=normalize(left+right, mode, options)
+ * \endcode
+ *
+ * With the input strings already being normalized,
+ * this function will use unorm_next() and unorm_previous()
+ * to find the adjacent end pieces of the input strings.
+ * Only the concatenation of these end pieces will be normalized and
+ * then concatenated with the remaining parts of the input strings.
+ *
+ * It is allowed to have dest==left to avoid copying the entire left string.
+ *
+ * @param left Left source string, may be same as dest.
+ * @param leftLength Length of left source string, or -1 if NUL-terminated.
+ * @param right Right source string. Must not be the same as dest, nor overlap.
+ * @param rightLength Length of right source string, or -1 if NUL-terminated.
+ * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
+ * @param destCapacity The number of UChars that fit into dest.
+ * @param mode The normalization mode.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Length of output (number of UChars) when successful or buffer overflow.
+ *
+ * @see unorm_normalize
+ * @see unorm_next
+ * @see unorm_previous
+ *
+ * @deprecated ICU 56 Use unorm2.h instead.
+ */
+U_DEPRECATED int32_t U_EXPORT2
+unorm_concatenate(const UChar *left, int32_t leftLength,
+ const UChar *right, int32_t rightLength,
+ UChar *dest, int32_t destCapacity,
+ UNormalizationMode mode, int32_t options,
+ UErrorCode *pErrorCode);
+
+#endif /* U_HIDE_DEPRECATED_API */
+#endif /* #if !UCONFIG_NO_NORMALIZATION */
+#endif
diff --git a/thirdparty/icu4c/common/unicode/unorm2.h b/thirdparty/icu4c/common/unicode/unorm2.h
new file mode 100644
index 0000000000..24417b7103
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/unorm2.h
@@ -0,0 +1,606 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2009-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: unorm2.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2009dec15
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UNORM2_H__
+#define __UNORM2_H__
+
+/**
+ * \file
+ * \brief C API: New API for Unicode Normalization.
+ *
+ * Unicode normalization functionality for standard Unicode normalization or
+ * for using custom mapping tables.
+ * All instances of UNormalizer2 are unmodifiable/immutable.
+ * Instances returned by unorm2_getInstance() are singletons that must not be deleted by the caller.
+ * For more details see the Normalizer2 C++ class.
+ */
+
+#include "unicode/utypes.h"
+#include "unicode/stringoptions.h"
+#include "unicode/uset.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ * Constants for normalization modes.
+ * For details about standard Unicode normalization forms
+ * and about the algorithms which are also used with custom mapping tables
+ * see http://www.unicode.org/unicode/reports/tr15/
+ * @stable ICU 4.4
+ */
+typedef enum {
+ /**
+ * Decomposition followed by composition.
+ * Same as standard NFC when using an "nfc" instance.
+ * Same as standard NFKC when using an "nfkc" instance.
+ * For details about standard Unicode normalization forms
+ * see http://www.unicode.org/unicode/reports/tr15/
+ * @stable ICU 4.4
+ */
+ UNORM2_COMPOSE,
+ /**
+ * Map, and reorder canonically.
+ * Same as standard NFD when using an "nfc" instance.
+ * Same as standard NFKD when using an "nfkc" instance.
+ * For details about standard Unicode normalization forms
+ * see http://www.unicode.org/unicode/reports/tr15/
+ * @stable ICU 4.4
+ */
+ UNORM2_DECOMPOSE,
+ /**
+ * "Fast C or D" form.
+ * If a string is in this form, then further decomposition <i>without reordering</i>
+ * would yield the same form as DECOMPOSE.
+ * Text in "Fast C or D" form can be processed efficiently with data tables
+ * that are "canonically closed", that is, that provide equivalent data for
+ * equivalent text, without having to be fully normalized.
+ * Not a standard Unicode normalization form.
+ * Not a unique form: Different FCD strings can be canonically equivalent.
+ * For details see http://www.unicode.org/notes/tn5/#FCD
+ * @stable ICU 4.4
+ */
+ UNORM2_FCD,
+ /**
+ * Compose only contiguously.
+ * Also known as "FCC" or "Fast C Contiguous".
+ * The result will often but not always be in NFC.
+ * The result will conform to FCD which is useful for processing.
+ * Not a standard Unicode normalization form.
+ * For details see http://www.unicode.org/notes/tn5/#FCC
+ * @stable ICU 4.4
+ */
+ UNORM2_COMPOSE_CONTIGUOUS
+} UNormalization2Mode;
+
+/**
+ * Result values for normalization quick check functions.
+ * For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms
+ * @stable ICU 2.0
+ */
+typedef enum UNormalizationCheckResult {
+ /**
+ * The input string is not in the normalization form.
+ * @stable ICU 2.0
+ */
+ UNORM_NO,
+ /**
+ * The input string is in the normalization form.
+ * @stable ICU 2.0
+ */
+ UNORM_YES,
+ /**
+ * The input string may or may not be in the normalization form.
+ * This value is only returned for composition forms like NFC and FCC,
+ * when a backward-combining character is found for which the surrounding text
+ * would have to be analyzed further.
+ * @stable ICU 2.0
+ */
+ UNORM_MAYBE
+} UNormalizationCheckResult;
+
+/**
+ * Opaque C service object type for the new normalization API.
+ * @stable ICU 4.4
+ */
+struct UNormalizer2;
+typedef struct UNormalizer2 UNormalizer2; /**< C typedef for struct UNormalizer2. @stable ICU 4.4 */
+
+#if !UCONFIG_NO_NORMALIZATION
+
+/**
+ * Returns a UNormalizer2 instance for Unicode NFC normalization.
+ * Same as unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, pErrorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFCInstance(UErrorCode *pErrorCode);
+
+/**
+ * Returns a UNormalizer2 instance for Unicode NFD normalization.
+ * Same as unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, pErrorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFDInstance(UErrorCode *pErrorCode);
+
+/**
+ * Returns a UNormalizer2 instance for Unicode NFKC normalization.
+ * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, pErrorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFKCInstance(UErrorCode *pErrorCode);
+
+/**
+ * Returns a UNormalizer2 instance for Unicode NFKD normalization.
+ * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, pErrorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFKDInstance(UErrorCode *pErrorCode);
+
+/**
+ * Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization.
+ * Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode);
+
+/**
+ * Returns a UNormalizer2 instance which uses the specified data file
+ * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
+ * and which composes or decomposes text according to the specified mode.
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ *
+ * Use packageName=NULL for data files that are part of ICU's own data.
+ * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
+ * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
+ * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
+ *
+ * @param packageName NULL for ICU built-in data, otherwise application data package name
+ * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
+ * @param mode normalization mode (compose or decompose etc.)
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested UNormalizer2, if successful
+ * @stable ICU 4.4
+ */
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getInstance(const char *packageName,
+ const char *name,
+ UNormalization2Mode mode,
+ UErrorCode *pErrorCode);
+
+/**
+ * Constructs a filtered normalizer wrapping any UNormalizer2 instance
+ * and a filter set.
+ * Both are aliased and must not be modified or deleted while this object
+ * is used.
+ * The filter set should be frozen; otherwise the performance will suffer greatly.
+ * @param norm2 wrapped UNormalizer2 instance
+ * @param filterSet USet which determines the characters to be normalized
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested UNormalizer2, if successful
+ * @stable ICU 4.4
+ */
+U_CAPI UNormalizer2 * U_EXPORT2
+unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode);
+
+/**
+ * Closes a UNormalizer2 instance from unorm2_openFiltered().
+ * Do not close instances from unorm2_getInstance()!
+ * @param norm2 UNormalizer2 instance to be closed
+ * @stable ICU 4.4
+ */
+U_CAPI void U_EXPORT2
+unorm2_close(UNormalizer2 *norm2);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUNormalizer2Pointer
+ * "Smart pointer" class, closes a UNormalizer2 via unorm2_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUNormalizer2Pointer, UNormalizer2, unorm2_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Writes the normalized form of the source string to the destination string
+ * (replacing its contents) and returns the length of the destination string.
+ * The source and destination strings must be different buffers.
+ * @param norm2 UNormalizer2 instance
+ * @param src source string
+ * @param length length of the source string, or -1 if NUL-terminated
+ * @param dest destination string; its contents is replaced with normalized src
+ * @param capacity number of UChars that can be written to dest
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return dest
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+unorm2_normalize(const UNormalizer2 *norm2,
+ const UChar *src, int32_t length,
+ UChar *dest, int32_t capacity,
+ UErrorCode *pErrorCode);
+/**
+ * Appends the normalized form of the second string to the first string
+ * (merging them at the boundary) and returns the length of the first string.
+ * The result is normalized if the first string was normalized.
+ * The first and second strings must be different buffers.
+ * @param norm2 UNormalizer2 instance
+ * @param first string, should be normalized
+ * @param firstLength length of the first string, or -1 if NUL-terminated
+ * @param firstCapacity number of UChars that can be written to first
+ * @param second string, will be normalized
+ * @param secondLength length of the source string, or -1 if NUL-terminated
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return first
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
+ UChar *first, int32_t firstLength, int32_t firstCapacity,
+ const UChar *second, int32_t secondLength,
+ UErrorCode *pErrorCode);
+/**
+ * Appends the second string to the first string
+ * (merging them at the boundary) and returns the length of the first string.
+ * The result is normalized if both the strings were normalized.
+ * The first and second strings must be different buffers.
+ * @param norm2 UNormalizer2 instance
+ * @param first string, should be normalized
+ * @param firstLength length of the first string, or -1 if NUL-terminated
+ * @param firstCapacity number of UChars that can be written to first
+ * @param second string, should be normalized
+ * @param secondLength length of the source string, or -1 if NUL-terminated
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return first
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+unorm2_append(const UNormalizer2 *norm2,
+ UChar *first, int32_t firstLength, int32_t firstCapacity,
+ const UChar *second, int32_t secondLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Gets the decomposition mapping of c.
+ * Roughly equivalent to normalizing the String form of c
+ * on a UNORM2_DECOMPOSE UNormalizer2 instance, but much faster, and except that this function
+ * returns a negative value and does not write a string
+ * if c does not have a decomposition mapping in this instance's data.
+ * This function is independent of the mode of the UNormalizer2.
+ * @param norm2 UNormalizer2 instance
+ * @param c code point
+ * @param decomposition String buffer which will be set to c's
+ * decomposition mapping, if there is one.
+ * @param capacity number of UChars that can be written to decomposition
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the non-negative length of c's decomposition, if there is one; otherwise a negative value
+ * @stable ICU 4.6
+ */
+U_CAPI int32_t U_EXPORT2
+unorm2_getDecomposition(const UNormalizer2 *norm2,
+ UChar32 c, UChar *decomposition, int32_t capacity,
+ UErrorCode *pErrorCode);
+
+/**
+ * Gets the raw decomposition mapping of c.
+ *
+ * This is similar to the unorm2_getDecomposition() function but returns the
+ * raw decomposition mapping as specified in UnicodeData.txt or
+ * (for custom data) in the mapping files processed by the gennorm2 tool.
+ * By contrast, unorm2_getDecomposition() returns the processed,
+ * recursively-decomposed version of this mapping.
+ *
+ * When used on a standard NFKC Normalizer2 instance,
+ * unorm2_getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
+ *
+ * When used on a standard NFC Normalizer2 instance,
+ * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
+ * in this case, the result contains either one or two code points (=1..4 UChars).
+ *
+ * This function is independent of the mode of the UNormalizer2.
+ * @param norm2 UNormalizer2 instance
+ * @param c code point
+ * @param decomposition String buffer which will be set to c's
+ * raw decomposition mapping, if there is one.
+ * @param capacity number of UChars that can be written to decomposition
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the non-negative length of c's raw decomposition, if there is one; otherwise a negative value
+ * @stable ICU 49
+ */
+U_CAPI int32_t U_EXPORT2
+unorm2_getRawDecomposition(const UNormalizer2 *norm2,
+ UChar32 c, UChar *decomposition, int32_t capacity,
+ UErrorCode *pErrorCode);
+
+/**
+ * Performs pairwise composition of a & b and returns the composite if there is one.
+ *
+ * Returns a composite code point c only if c has a two-way mapping to a+b.
+ * In standard Unicode normalization, this means that
+ * c has a canonical decomposition to a+b
+ * and c does not have the Full_Composition_Exclusion property.
+ *
+ * This function is independent of the mode of the UNormalizer2.
+ * @param norm2 UNormalizer2 instance
+ * @param a A (normalization starter) code point.
+ * @param b Another code point.
+ * @return The non-negative composite code point if there is one; otherwise a negative value.
+ * @stable ICU 49
+ */
+U_CAPI UChar32 U_EXPORT2
+unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b);
+
+/**
+ * Gets the combining class of c.
+ * The default implementation returns 0
+ * but all standard implementations return the Unicode Canonical_Combining_Class value.
+ * @param norm2 UNormalizer2 instance
+ * @param c code point
+ * @return c's combining class
+ * @stable ICU 49
+ */
+U_CAPI uint8_t U_EXPORT2
+unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c);
+
+/**
+ * Tests if the string is normalized.
+ * Internally, in cases where the quickCheck() method would return "maybe"
+ * (which is only possible for the two COMPOSE modes) this method
+ * resolves to "yes" or "no" to provide a definitive result,
+ * at the cost of doing more work in those cases.
+ * @param norm2 UNormalizer2 instance
+ * @param s input string
+ * @param length length of the string, or -1 if NUL-terminated
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return true if s is normalized
+ * @stable ICU 4.4
+ */
+U_CAPI UBool U_EXPORT2
+unorm2_isNormalized(const UNormalizer2 *norm2,
+ const UChar *s, int32_t length,
+ UErrorCode *pErrorCode);
+
+/**
+ * Tests if the string is normalized.
+ * For the two COMPOSE modes, the result could be "maybe" in cases that
+ * would take a little more work to resolve definitively.
+ * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
+ * combination of quick check + normalization, to avoid
+ * re-checking the "yes" prefix.
+ * @param norm2 UNormalizer2 instance
+ * @param s input string
+ * @param length length of the string, or -1 if NUL-terminated
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return UNormalizationCheckResult
+ * @stable ICU 4.4
+ */
+U_CAPI UNormalizationCheckResult U_EXPORT2
+unorm2_quickCheck(const UNormalizer2 *norm2,
+ const UChar *s, int32_t length,
+ UErrorCode *pErrorCode);
+
+/**
+ * Returns the end of the normalized substring of the input string.
+ * In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
+ * the substring <code>UnicodeString(s, 0, end)</code>
+ * will pass the quick check with a "yes" result.
+ *
+ * The returned end index is usually one or more characters before the
+ * "no" or "maybe" character: The end index is at a normalization boundary.
+ * (See the class documentation for more about normalization boundaries.)
+ *
+ * When the goal is a normalized string and most input strings are expected
+ * to be normalized already, then call this method,
+ * and if it returns a prefix shorter than the input string,
+ * copy that prefix and use normalizeSecondAndAppend() for the remainder.
+ * @param norm2 UNormalizer2 instance
+ * @param s input string
+ * @param length length of the string, or -1 if NUL-terminated
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return "yes" span end index
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
+ const UChar *s, int32_t length,
+ UErrorCode *pErrorCode);
+
+/**
+ * Tests if the character always has a normalization boundary before it,
+ * regardless of context.
+ * For details see the Normalizer2 base class documentation.
+ * @param norm2 UNormalizer2 instance
+ * @param c character to test
+ * @return true if c has a normalization boundary before it
+ * @stable ICU 4.4
+ */
+U_CAPI UBool U_EXPORT2
+unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c);
+
+/**
+ * Tests if the character always has a normalization boundary after it,
+ * regardless of context.
+ * For details see the Normalizer2 base class documentation.
+ * @param norm2 UNormalizer2 instance
+ * @param c character to test
+ * @return true if c has a normalization boundary after it
+ * @stable ICU 4.4
+ */
+U_CAPI UBool U_EXPORT2
+unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c);
+
+/**
+ * Tests if the character is normalization-inert.
+ * For details see the Normalizer2 base class documentation.
+ * @param norm2 UNormalizer2 instance
+ * @param c character to test
+ * @return true if c is normalization-inert
+ * @stable ICU 4.4
+ */
+U_CAPI UBool U_EXPORT2
+unorm2_isInert(const UNormalizer2 *norm2, UChar32 c);
+
+/**
+ * Compares two strings for canonical equivalence.
+ * Further options include case-insensitive comparison and
+ * code point order (as opposed to code unit order).
+ *
+ * Canonical equivalence between two strings is defined as their normalized
+ * forms (NFD or NFC) being identical.
+ * This function compares strings incrementally instead of normalizing
+ * (and optionally case-folding) both strings entirely,
+ * improving performance significantly.
+ *
+ * Bulk normalization is only necessary if the strings do not fulfill the FCD
+ * conditions. Only in this case, and only if the strings are relatively long,
+ * is memory allocated temporarily.
+ * For FCD strings and short non-FCD strings there is no memory allocation.
+ *
+ * Semantically, this is equivalent to
+ * strcmp[CodePointOrder](NFD(foldCase(NFD(s1))), NFD(foldCase(NFD(s2))))
+ * where code point order and foldCase are all optional.
+ *
+ * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
+ * the case folding must be performed first, then the normalization.
+ *
+ * @param s1 First source string.
+ * @param length1 Length of first source string, or -1 if NUL-terminated.
+ *
+ * @param s2 Second source string.
+ * @param length2 Length of second source string, or -1 if NUL-terminated.
+ *
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Case-sensitive comparison in code unit order, and the input strings
+ * are quick-checked for FCD.
+ *
+ * - UNORM_INPUT_IS_FCD
+ * Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
+ * If not set, the function will quickCheck for FCD
+ * and normalize if necessary.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_COMPARE_IGNORE_CASE
+ * Set to compare strings case-insensitively using case folding,
+ * instead of case-sensitively.
+ * If set, then the following case folding options are used.
+ *
+ * - Options as used with case-insensitive comparisons, currently:
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * (see u_strCaseCompare for details)
+ *
+ * - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
+ *
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @see unorm_normalize
+ * @see UNORM_FCD
+ * @see u_strCompare
+ * @see u_strCaseCompare
+ *
+ * @stable ICU 2.2
+ */
+U_CAPI int32_t U_EXPORT2
+unorm_compare(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ uint32_t options,
+ UErrorCode *pErrorCode);
+
+#endif /* !UCONFIG_NO_NORMALIZATION */
+#endif /* __UNORM2_H__ */
diff --git a/thirdparty/icu4c/common/unicode/uobject.h b/thirdparty/icu4c/common/unicode/uobject.h
new file mode 100644
index 0000000000..eeb331ce97
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/uobject.h
@@ -0,0 +1,324 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2002-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: uobject.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002jun26
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UOBJECT_H__
+#define __UOBJECT_H__
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/platform.h"
+
+/**
+ * \file
+ * \brief C++ API: Common ICU base class UObject.
+ */
+
+/**
+ * \def U_NO_THROW
+ * Since ICU 64, use U_NOEXCEPT instead.
+ *
+ * Previously, define this to define the throw() specification so
+ * certain functions do not throw any exceptions
+ *
+ * UMemory operator new methods should have the throw() specification
+ * appended to them, so that the compiler adds the additional NULL check
+ * before calling constructors. Without, if <code>operator new</code> returns NULL the
+ * constructor is still called, and if the constructor references member
+ * data, (which it typically does), the result is a segmentation violation.
+ *
+ * @stable ICU 4.2. Since ICU 64, Use U_NOEXCEPT instead. See ICU-20422.
+ */
+#ifndef U_NO_THROW
+#define U_NO_THROW U_NOEXCEPT
+#endif
+
+/*===========================================================================*/
+/* UClassID-based RTTI */
+/*===========================================================================*/
+
+/**
+ * UClassID is used to identify classes without using the compiler's RTTI.
+ * This was used before C++ compilers consistently supported RTTI.
+ * ICU 4.6 requires compiler RTTI to be turned on.
+ *
+ * Each class hierarchy which needs
+ * to implement polymorphic clone() or operator==() defines two methods,
+ * described in detail below. UClassID values can be compared using
+ * operator==(). Nothing else should be done with them.
+ *
+ * \par
+ * In class hierarchies that implement "poor man's RTTI",
+ * each concrete subclass implements getDynamicClassID() in the same way:
+ *
+ * \code
+ * class Derived {
+ * public:
+ * virtual UClassID getDynamicClassID() const
+ * { return Derived::getStaticClassID(); }
+ * }
+ * \endcode
+ *
+ * Each concrete class implements getStaticClassID() as well, which allows
+ * clients to test for a specific type.
+ *
+ * \code
+ * class Derived {
+ * public:
+ * static UClassID U_EXPORT2 getStaticClassID();
+ * private:
+ * static char fgClassID;
+ * }
+ *
+ * // In Derived.cpp:
+ * UClassID Derived::getStaticClassID()
+ * { return (UClassID)&Derived::fgClassID; }
+ * char Derived::fgClassID = 0; // Value is irrelevant
+ * \endcode
+ * @stable ICU 2.0
+ */
+typedef void* UClassID;
+
+U_NAMESPACE_BEGIN
+
+/**
+ * UMemory is the common ICU base class.
+ * All other ICU C++ classes are derived from UMemory (starting with ICU 2.4).
+ *
+ * This is primarily to make it possible and simple to override the
+ * C++ memory management by adding new/delete operators to this base class.
+ *
+ * To override ALL ICU memory management, including that from plain C code,
+ * replace the allocation functions declared in cmemory.h
+ *
+ * UMemory does not contain any virtual functions.
+ * Common "boilerplate" functions are defined in UObject.
+ *
+ * @stable ICU 2.4
+ */
+class U_COMMON_API UMemory {
+public:
+
+/* test versions for debugging shaper heap memory problems */
+#ifdef SHAPER_MEMORY_DEBUG
+ static void * NewArray(int size, int count);
+ static void * GrowArray(void * array, int newSize );
+ static void FreeArray(void * array );
+#endif
+
+#if U_OVERRIDE_CXX_ALLOCATION
+ /**
+ * Override for ICU4C C++ memory management.
+ * simple, non-class types are allocated using the macros in common/cmemory.h
+ * (uprv_malloc(), uprv_free(), uprv_realloc());
+ * they or something else could be used here to implement C++ new/delete
+ * for ICU4C C++ classes
+ * @stable ICU 2.4
+ */
+ static void * U_EXPORT2 operator new(size_t size) U_NOEXCEPT;
+
+ /**
+ * Override for ICU4C C++ memory management.
+ * See new().
+ * @stable ICU 2.4
+ */
+ static void * U_EXPORT2 operator new[](size_t size) U_NOEXCEPT;
+
+ /**
+ * Override for ICU4C C++ memory management.
+ * simple, non-class types are allocated using the macros in common/cmemory.h
+ * (uprv_malloc(), uprv_free(), uprv_realloc());
+ * they or something else could be used here to implement C++ new/delete
+ * for ICU4C C++ classes
+ * @stable ICU 2.4
+ */
+ static void U_EXPORT2 operator delete(void *p) U_NOEXCEPT;
+
+ /**
+ * Override for ICU4C C++ memory management.
+ * See delete().
+ * @stable ICU 2.4
+ */
+ static void U_EXPORT2 operator delete[](void *p) U_NOEXCEPT;
+
+#if U_HAVE_PLACEMENT_NEW
+ /**
+ * Override for ICU4C C++ memory management for STL.
+ * See new().
+ * @stable ICU 2.6
+ */
+ static inline void * U_EXPORT2 operator new(size_t, void *ptr) U_NOEXCEPT { return ptr; }
+
+ /**
+ * Override for ICU4C C++ memory management for STL.
+ * See delete().
+ * @stable ICU 2.6
+ */
+ static inline void U_EXPORT2 operator delete(void *, void *) U_NOEXCEPT {}
+#endif /* U_HAVE_PLACEMENT_NEW */
+#if U_HAVE_DEBUG_LOCATION_NEW
+ /**
+ * This method overrides the MFC debug version of the operator new
+ *
+ * @param size The requested memory size
+ * @param file The file where the allocation was requested
+ * @param line The line where the allocation was requested
+ */
+ static void * U_EXPORT2 operator new(size_t size, const char* file, int line) U_NOEXCEPT;
+ /**
+ * This method provides a matching delete for the MFC debug new
+ *
+ * @param p The pointer to the allocated memory
+ * @param file The file where the allocation was requested
+ * @param line The line where the allocation was requested
+ */
+ static void U_EXPORT2 operator delete(void* p, const char* file, int line) U_NOEXCEPT;
+#endif /* U_HAVE_DEBUG_LOCATION_NEW */
+#endif /* U_OVERRIDE_CXX_ALLOCATION */
+
+ /*
+ * Assignment operator not declared. The compiler will provide one
+ * which does nothing since this class does not contain any data members.
+ * API/code coverage may show the assignment operator as present and
+ * untested - ignore.
+ * Subclasses need this assignment operator if they use compiler-provided
+ * assignment operators of their own. An alternative to not declaring one
+ * here would be to declare and empty-implement a protected or public one.
+ UMemory &UMemory::operator=(const UMemory &);
+ */
+};
+
+/**
+ * UObject is the common ICU "boilerplate" class.
+ * UObject inherits UMemory (starting with ICU 2.4),
+ * and all other public ICU C++ classes
+ * are derived from UObject (starting with ICU 2.2).
+ *
+ * UObject contains common virtual functions, in particular a virtual destructor.
+ *
+ * The clone() function is not available in UObject because it is not
+ * implemented by all ICU classes.
+ * Many ICU services provide a clone() function for their class trees,
+ * defined on the service's C++ base class
+ * (which itself is a subclass of UObject).
+ *
+ * @stable ICU 2.2
+ */
+class U_COMMON_API UObject : public UMemory {
+public:
+ /**
+ * Destructor.
+ *
+ * @stable ICU 2.2
+ */
+ virtual ~UObject();
+
+ /**
+ * ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
+ * The base class implementation returns a dummy value.
+ *
+ * Use compiler RTTI rather than ICU's "poor man's RTTI".
+ * Since ICU 4.6, new ICU C++ class hierarchies do not implement "poor man's RTTI".
+ *
+ * @stable ICU 2.2
+ */
+ virtual UClassID getDynamicClassID() const;
+
+protected:
+ // the following functions are protected to prevent instantiation and
+ // direct use of UObject itself
+
+ // default constructor
+ // inline UObject() {}
+
+ // copy constructor
+ // inline UObject(const UObject &other) {}
+
+#if 0
+ // TODO Sometime in the future. Implement operator==().
+ // (This comment inserted in 2.2)
+ // some or all of the following "boilerplate" functions may be made public
+ // in a future ICU4C release when all subclasses implement them
+
+ // assignment operator
+ // (not virtual, see "Taligent's Guide to Designing Programs" pp.73..74)
+ // commented out because the implementation is the same as a compiler's default
+ // UObject &operator=(const UObject &other) { return *this; }
+
+ // comparison operators
+ virtual inline UBool operator==(const UObject &other) const { return this==&other; }
+ inline UBool operator!=(const UObject &other) const { return !operator==(other); }
+
+ // clone() commented out from the base class:
+ // some compilers do not support co-variant return types
+ // (i.e., subclasses would have to return UObject * as well, instead of SubClass *)
+ // see also UObject class documentation.
+ // virtual UObject *clone() const;
+#endif
+
+ /*
+ * Assignment operator not declared. The compiler will provide one
+ * which does nothing since this class does not contain any data members.
+ * API/code coverage may show the assignment operator as present and
+ * untested - ignore.
+ * Subclasses need this assignment operator if they use compiler-provided
+ * assignment operators of their own. An alternative to not declaring one
+ * here would be to declare and empty-implement a protected or public one.
+ UObject &UObject::operator=(const UObject &);
+ */
+};
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * This is a simple macro to add ICU RTTI to an ICU object implementation.
+ * This does not go into the header. This should only be used in *.cpp files.
+ *
+ * @param myClass The name of the class that needs RTTI defined.
+ * @internal
+ */
+#define UOBJECT_DEFINE_RTTI_IMPLEMENTATION(myClass) \
+ UClassID U_EXPORT2 myClass::getStaticClassID() { \
+ static char classID = 0; \
+ return (UClassID)&classID; \
+ } \
+ UClassID myClass::getDynamicClassID() const \
+ { return myClass::getStaticClassID(); }
+
+
+/**
+ * This macro adds ICU RTTI to an ICU abstract class implementation.
+ * This macro should be invoked in *.cpp files. The corresponding
+ * header should declare getStaticClassID.
+ *
+ * @param myClass The name of the class that needs RTTI defined.
+ * @internal
+ */
+#define UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(myClass) \
+ UClassID U_EXPORT2 myClass::getStaticClassID() { \
+ static char classID = 0; \
+ return (UClassID)&classID; \
+ }
+
+#endif /* U_HIDE_INTERNAL_API */
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/urename.h b/thirdparty/icu4c/common/unicode/urename.h
new file mode 100644
index 0000000000..20232cd209
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/urename.h
@@ -0,0 +1,1922 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2002-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*
+* file name: urename.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* Created by: Perl script tools/genren.pl written by Vladimir Weinstein
+*
+* Contains data for renaming ICU exports.
+* Gets included by umachine.h
+*
+* THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT
+* YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
+*/
+
+#ifndef URENAME_H
+#define URENAME_H
+
+/* U_DISABLE_RENAMING can be defined in the following ways:
+ * - when running configure, e.g.
+ * runConfigureICU Linux --disable-renaming
+ * - by changing the default setting of U_DISABLE_RENAMING in uconfig.h
+ */
+
+#include "unicode/uconfig.h"
+
+#if !U_DISABLE_RENAMING
+
+// Disable Renaming for Visual Studio's IntelliSense feature, so that 'Go-to-Definition' (F12) will work.
+#if !(defined(_MSC_VER) && defined(__INTELLISENSE__))
+
+/* We need the U_ICU_ENTRY_POINT_RENAME definition. There's a default one in unicode/uvernum.h we can use, but we will give
+ the platform a chance to define it first.
+ Normally (if utypes.h or umachine.h was included first) this will not be necessary as it will already be defined.
+ */
+
+#ifndef U_ICU_ENTRY_POINT_RENAME
+#include "unicode/umachine.h"
+#endif
+
+/* If we still don't have U_ICU_ENTRY_POINT_RENAME use the default. */
+#ifndef U_ICU_ENTRY_POINT_RENAME
+#include "unicode/uvernum.h"
+#endif
+
+/* Error out before the following defines cause very strange and unexpected code breakage */
+#ifndef U_ICU_ENTRY_POINT_RENAME
+#error U_ICU_ENTRY_POINT_RENAME is not defined - cannot continue. Consider defining U_DISABLE_RENAMING if renaming should not be used.
+#endif
+
+
+/* C exports renaming data */
+
+#define T_CString_int64ToString U_ICU_ENTRY_POINT_RENAME(T_CString_int64ToString)
+#define T_CString_integerToString U_ICU_ENTRY_POINT_RENAME(T_CString_integerToString)
+#define T_CString_stringToInteger U_ICU_ENTRY_POINT_RENAME(T_CString_stringToInteger)
+#define T_CString_toLowerCase U_ICU_ENTRY_POINT_RENAME(T_CString_toLowerCase)
+#define T_CString_toUpperCase U_ICU_ENTRY_POINT_RENAME(T_CString_toUpperCase)
+#define UCNV_FROM_U_CALLBACK_ESCAPE U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_ESCAPE)
+#define UCNV_FROM_U_CALLBACK_SKIP U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_SKIP)
+#define UCNV_FROM_U_CALLBACK_STOP U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_STOP)
+#define UCNV_FROM_U_CALLBACK_SUBSTITUTE U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_SUBSTITUTE)
+#define UCNV_TO_U_CALLBACK_ESCAPE U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_ESCAPE)
+#define UCNV_TO_U_CALLBACK_SKIP U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_SKIP)
+#define UCNV_TO_U_CALLBACK_STOP U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_STOP)
+#define UCNV_TO_U_CALLBACK_SUBSTITUTE U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_SUBSTITUTE)
+#define UDataMemory_createNewInstance U_ICU_ENTRY_POINT_RENAME(UDataMemory_createNewInstance)
+#define UDataMemory_init U_ICU_ENTRY_POINT_RENAME(UDataMemory_init)
+#define UDataMemory_isLoaded U_ICU_ENTRY_POINT_RENAME(UDataMemory_isLoaded)
+#define UDataMemory_normalizeDataPointer U_ICU_ENTRY_POINT_RENAME(UDataMemory_normalizeDataPointer)
+#define UDataMemory_setData U_ICU_ENTRY_POINT_RENAME(UDataMemory_setData)
+#define UDatamemory_assign U_ICU_ENTRY_POINT_RENAME(UDatamemory_assign)
+#define _ASCIIData U_ICU_ENTRY_POINT_RENAME(_ASCIIData)
+#define _Bocu1Data U_ICU_ENTRY_POINT_RENAME(_Bocu1Data)
+#define _CESU8Data U_ICU_ENTRY_POINT_RENAME(_CESU8Data)
+#define _CompoundTextData U_ICU_ENTRY_POINT_RENAME(_CompoundTextData)
+#define _HZData U_ICU_ENTRY_POINT_RENAME(_HZData)
+#define _IMAPData U_ICU_ENTRY_POINT_RENAME(_IMAPData)
+#define _ISCIIData U_ICU_ENTRY_POINT_RENAME(_ISCIIData)
+#define _ISO2022Data U_ICU_ENTRY_POINT_RENAME(_ISO2022Data)
+#define _LMBCSData1 U_ICU_ENTRY_POINT_RENAME(_LMBCSData1)
+#define _LMBCSData11 U_ICU_ENTRY_POINT_RENAME(_LMBCSData11)
+#define _LMBCSData16 U_ICU_ENTRY_POINT_RENAME(_LMBCSData16)
+#define _LMBCSData17 U_ICU_ENTRY_POINT_RENAME(_LMBCSData17)
+#define _LMBCSData18 U_ICU_ENTRY_POINT_RENAME(_LMBCSData18)
+#define _LMBCSData19 U_ICU_ENTRY_POINT_RENAME(_LMBCSData19)
+#define _LMBCSData2 U_ICU_ENTRY_POINT_RENAME(_LMBCSData2)
+#define _LMBCSData3 U_ICU_ENTRY_POINT_RENAME(_LMBCSData3)
+#define _LMBCSData4 U_ICU_ENTRY_POINT_RENAME(_LMBCSData4)
+#define _LMBCSData5 U_ICU_ENTRY_POINT_RENAME(_LMBCSData5)
+#define _LMBCSData6 U_ICU_ENTRY_POINT_RENAME(_LMBCSData6)
+#define _LMBCSData8 U_ICU_ENTRY_POINT_RENAME(_LMBCSData8)
+#define _Latin1Data U_ICU_ENTRY_POINT_RENAME(_Latin1Data)
+#define _MBCSData U_ICU_ENTRY_POINT_RENAME(_MBCSData)
+#define _SCSUData U_ICU_ENTRY_POINT_RENAME(_SCSUData)
+#define _UTF16BEData U_ICU_ENTRY_POINT_RENAME(_UTF16BEData)
+#define _UTF16Data U_ICU_ENTRY_POINT_RENAME(_UTF16Data)
+#define _UTF16LEData U_ICU_ENTRY_POINT_RENAME(_UTF16LEData)
+#define _UTF16v2Data U_ICU_ENTRY_POINT_RENAME(_UTF16v2Data)
+#define _UTF32BEData U_ICU_ENTRY_POINT_RENAME(_UTF32BEData)
+#define _UTF32Data U_ICU_ENTRY_POINT_RENAME(_UTF32Data)
+#define _UTF32LEData U_ICU_ENTRY_POINT_RENAME(_UTF32LEData)
+#define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data)
+#define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data)
+#define _isUnicodeLocaleTypeSubtag U_ICU_ENTRY_POINT_RENAME(_isUnicodeLocaleTypeSubtag)
+#define allowedHourFormatsCleanup U_ICU_ENTRY_POINT_RENAME(allowedHourFormatsCleanup)
+#define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup)
+#define dayPeriodRulesCleanup U_ICU_ENTRY_POINT_RENAME(dayPeriodRulesCleanup)
+#define deleteAllowedHourFormats U_ICU_ENTRY_POINT_RENAME(deleteAllowedHourFormats)
+#define gTimeZoneFilesInitOnce U_ICU_ENTRY_POINT_RENAME(gTimeZoneFilesInitOnce)
+#define initNumsysNames U_ICU_ENTRY_POINT_RENAME(initNumsysNames)
+#define izrule_clone U_ICU_ENTRY_POINT_RENAME(izrule_clone)
+#define izrule_close U_ICU_ENTRY_POINT_RENAME(izrule_close)
+#define izrule_equals U_ICU_ENTRY_POINT_RENAME(izrule_equals)
+#define izrule_getDSTSavings U_ICU_ENTRY_POINT_RENAME(izrule_getDSTSavings)
+#define izrule_getDynamicClassID U_ICU_ENTRY_POINT_RENAME(izrule_getDynamicClassID)
+#define izrule_getFinalStart U_ICU_ENTRY_POINT_RENAME(izrule_getFinalStart)
+#define izrule_getFirstStart U_ICU_ENTRY_POINT_RENAME(izrule_getFirstStart)
+#define izrule_getName U_ICU_ENTRY_POINT_RENAME(izrule_getName)
+#define izrule_getNextStart U_ICU_ENTRY_POINT_RENAME(izrule_getNextStart)
+#define izrule_getPreviousStart U_ICU_ENTRY_POINT_RENAME(izrule_getPreviousStart)
+#define izrule_getRawOffset U_ICU_ENTRY_POINT_RENAME(izrule_getRawOffset)
+#define izrule_getStaticClassID U_ICU_ENTRY_POINT_RENAME(izrule_getStaticClassID)
+#define izrule_isEquivalentTo U_ICU_ENTRY_POINT_RENAME(izrule_isEquivalentTo)
+#define izrule_open U_ICU_ENTRY_POINT_RENAME(izrule_open)
+#define locale_getKeywordsStart U_ICU_ENTRY_POINT_RENAME(locale_getKeywordsStart)
+#define locale_get_default U_ICU_ENTRY_POINT_RENAME(locale_get_default)
+#define locale_set_default U_ICU_ENTRY_POINT_RENAME(locale_set_default)
+#define numSysCleanup U_ICU_ENTRY_POINT_RENAME(numSysCleanup)
+#define rbbi_cleanup U_ICU_ENTRY_POINT_RENAME(rbbi_cleanup)
+#define pl_addFontRun U_ICU_ENTRY_POINT_RENAME(pl_addFontRun)
+#define pl_addLocaleRun U_ICU_ENTRY_POINT_RENAME(pl_addLocaleRun)
+#define pl_addValueRun U_ICU_ENTRY_POINT_RENAME(pl_addValueRun)
+#define pl_close U_ICU_ENTRY_POINT_RENAME(pl_close)
+#define pl_closeFontRuns U_ICU_ENTRY_POINT_RENAME(pl_closeFontRuns)
+#define pl_closeLine U_ICU_ENTRY_POINT_RENAME(pl_closeLine)
+#define pl_closeLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_closeLocaleRuns)
+#define pl_closeValueRuns U_ICU_ENTRY_POINT_RENAME(pl_closeValueRuns)
+#define pl_countLineRuns U_ICU_ENTRY_POINT_RENAME(pl_countLineRuns)
+#define pl_create U_ICU_ENTRY_POINT_RENAME(pl_create)
+#define pl_getAscent U_ICU_ENTRY_POINT_RENAME(pl_getAscent)
+#define pl_getDescent U_ICU_ENTRY_POINT_RENAME(pl_getDescent)
+#define pl_getFontRunCount U_ICU_ENTRY_POINT_RENAME(pl_getFontRunCount)
+#define pl_getFontRunFont U_ICU_ENTRY_POINT_RENAME(pl_getFontRunFont)
+#define pl_getFontRunLastLimit U_ICU_ENTRY_POINT_RENAME(pl_getFontRunLastLimit)
+#define pl_getFontRunLimit U_ICU_ENTRY_POINT_RENAME(pl_getFontRunLimit)
+#define pl_getLeading U_ICU_ENTRY_POINT_RENAME(pl_getLeading)
+#define pl_getLineAscent U_ICU_ENTRY_POINT_RENAME(pl_getLineAscent)
+#define pl_getLineDescent U_ICU_ENTRY_POINT_RENAME(pl_getLineDescent)
+#define pl_getLineLeading U_ICU_ENTRY_POINT_RENAME(pl_getLineLeading)
+#define pl_getLineVisualRun U_ICU_ENTRY_POINT_RENAME(pl_getLineVisualRun)
+#define pl_getLineWidth U_ICU_ENTRY_POINT_RENAME(pl_getLineWidth)
+#define pl_getLocaleRunCount U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunCount)
+#define pl_getLocaleRunLastLimit U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunLastLimit)
+#define pl_getLocaleRunLimit U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunLimit)
+#define pl_getLocaleRunLocale U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunLocale)
+#define pl_getParagraphLevel U_ICU_ENTRY_POINT_RENAME(pl_getParagraphLevel)
+#define pl_getTextDirection U_ICU_ENTRY_POINT_RENAME(pl_getTextDirection)
+#define pl_getValueRunCount U_ICU_ENTRY_POINT_RENAME(pl_getValueRunCount)
+#define pl_getValueRunLastLimit U_ICU_ENTRY_POINT_RENAME(pl_getValueRunLastLimit)
+#define pl_getValueRunLimit U_ICU_ENTRY_POINT_RENAME(pl_getValueRunLimit)
+#define pl_getValueRunValue U_ICU_ENTRY_POINT_RENAME(pl_getValueRunValue)
+#define pl_getVisualRunAscent U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunAscent)
+#define pl_getVisualRunDescent U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunDescent)
+#define pl_getVisualRunDirection U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunDirection)
+#define pl_getVisualRunFont U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunFont)
+#define pl_getVisualRunGlyphCount U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunGlyphCount)
+#define pl_getVisualRunGlyphToCharMap U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunGlyphToCharMap)
+#define pl_getVisualRunGlyphs U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunGlyphs)
+#define pl_getVisualRunLeading U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunLeading)
+#define pl_getVisualRunPositions U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunPositions)
+#define pl_isComplex U_ICU_ENTRY_POINT_RENAME(pl_isComplex)
+#define pl_nextLine U_ICU_ENTRY_POINT_RENAME(pl_nextLine)
+#define pl_openEmptyFontRuns U_ICU_ENTRY_POINT_RENAME(pl_openEmptyFontRuns)
+#define pl_openEmptyLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_openEmptyLocaleRuns)
+#define pl_openEmptyValueRuns U_ICU_ENTRY_POINT_RENAME(pl_openEmptyValueRuns)
+#define pl_openFontRuns U_ICU_ENTRY_POINT_RENAME(pl_openFontRuns)
+#define pl_openLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_openLocaleRuns)
+#define pl_openValueRuns U_ICU_ENTRY_POINT_RENAME(pl_openValueRuns)
+#define pl_reflow U_ICU_ENTRY_POINT_RENAME(pl_reflow)
+#define pl_resetFontRuns U_ICU_ENTRY_POINT_RENAME(pl_resetFontRuns)
+#define pl_resetLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_resetLocaleRuns)
+#define pl_resetValueRuns U_ICU_ENTRY_POINT_RENAME(pl_resetValueRuns)
+#define res_countArrayItems U_ICU_ENTRY_POINT_RENAME(res_countArrayItems)
+#define res_findResource U_ICU_ENTRY_POINT_RENAME(res_findResource)
+#define res_getAlias U_ICU_ENTRY_POINT_RENAME(res_getAlias)
+#define res_getArrayItem U_ICU_ENTRY_POINT_RENAME(res_getArrayItem)
+#define res_getBinaryNoTrace U_ICU_ENTRY_POINT_RENAME(res_getBinaryNoTrace)
+#define res_getIntVectorNoTrace U_ICU_ENTRY_POINT_RENAME(res_getIntVectorNoTrace)
+#define res_getPublicType U_ICU_ENTRY_POINT_RENAME(res_getPublicType)
+#define res_getResource U_ICU_ENTRY_POINT_RENAME(res_getResource)
+#define res_getStringNoTrace U_ICU_ENTRY_POINT_RENAME(res_getStringNoTrace)
+#define res_getTableItemByIndex U_ICU_ENTRY_POINT_RENAME(res_getTableItemByIndex)
+#define res_getTableItemByKey U_ICU_ENTRY_POINT_RENAME(res_getTableItemByKey)
+#define res_load U_ICU_ENTRY_POINT_RENAME(res_load)
+#define res_read U_ICU_ENTRY_POINT_RENAME(res_read)
+#define res_unload U_ICU_ENTRY_POINT_RENAME(res_unload)
+#define u_UCharsToChars U_ICU_ENTRY_POINT_RENAME(u_UCharsToChars)
+#define u_asciiToUpper U_ICU_ENTRY_POINT_RENAME(u_asciiToUpper)
+#define u_austrcpy U_ICU_ENTRY_POINT_RENAME(u_austrcpy)
+#define u_austrncpy U_ICU_ENTRY_POINT_RENAME(u_austrncpy)
+#define u_caseInsensitivePrefixMatch U_ICU_ENTRY_POINT_RENAME(u_caseInsensitivePrefixMatch)
+#define u_catclose U_ICU_ENTRY_POINT_RENAME(u_catclose)
+#define u_catgets U_ICU_ENTRY_POINT_RENAME(u_catgets)
+#define u_catopen U_ICU_ENTRY_POINT_RENAME(u_catopen)
+#define u_charAge U_ICU_ENTRY_POINT_RENAME(u_charAge)
+#define u_charDigitValue U_ICU_ENTRY_POINT_RENAME(u_charDigitValue)
+#define u_charDirection U_ICU_ENTRY_POINT_RENAME(u_charDirection)
+#define u_charFromName U_ICU_ENTRY_POINT_RENAME(u_charFromName)
+#define u_charMirror U_ICU_ENTRY_POINT_RENAME(u_charMirror)
+#define u_charName U_ICU_ENTRY_POINT_RENAME(u_charName)
+#define u_charType U_ICU_ENTRY_POINT_RENAME(u_charType)
+#define u_charsToUChars U_ICU_ENTRY_POINT_RENAME(u_charsToUChars)
+#define u_cleanup U_ICU_ENTRY_POINT_RENAME(u_cleanup)
+#define u_countChar32 U_ICU_ENTRY_POINT_RENAME(u_countChar32)
+#define u_digit U_ICU_ENTRY_POINT_RENAME(u_digit)
+#define u_enumCharNames U_ICU_ENTRY_POINT_RENAME(u_enumCharNames)
+#define u_enumCharTypes U_ICU_ENTRY_POINT_RENAME(u_enumCharTypes)
+#define u_errorName U_ICU_ENTRY_POINT_RENAME(u_errorName)
+#define u_fadopt U_ICU_ENTRY_POINT_RENAME(u_fadopt)
+#define u_fclose U_ICU_ENTRY_POINT_RENAME(u_fclose)
+#define u_feof U_ICU_ENTRY_POINT_RENAME(u_feof)
+#define u_fflush U_ICU_ENTRY_POINT_RENAME(u_fflush)
+#define u_fgetConverter U_ICU_ENTRY_POINT_RENAME(u_fgetConverter)
+#define u_fgetNumberFormat U_ICU_ENTRY_POINT_RENAME(u_fgetNumberFormat)
+#define u_fgetc U_ICU_ENTRY_POINT_RENAME(u_fgetc)
+#define u_fgetcodepage U_ICU_ENTRY_POINT_RENAME(u_fgetcodepage)
+#define u_fgetcx U_ICU_ENTRY_POINT_RENAME(u_fgetcx)
+#define u_fgetfile U_ICU_ENTRY_POINT_RENAME(u_fgetfile)
+#define u_fgetlocale U_ICU_ENTRY_POINT_RENAME(u_fgetlocale)
+#define u_fgets U_ICU_ENTRY_POINT_RENAME(u_fgets)
+#define u_file_read U_ICU_ENTRY_POINT_RENAME(u_file_read)
+#define u_file_write U_ICU_ENTRY_POINT_RENAME(u_file_write)
+#define u_file_write_flush U_ICU_ENTRY_POINT_RENAME(u_file_write_flush)
+#define u_finit U_ICU_ENTRY_POINT_RENAME(u_finit)
+#define u_flushDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_flushDefaultConverter)
+#define u_foldCase U_ICU_ENTRY_POINT_RENAME(u_foldCase)
+#define u_fopen U_ICU_ENTRY_POINT_RENAME(u_fopen)
+#define u_fopen_u U_ICU_ENTRY_POINT_RENAME(u_fopen_u)
+#define u_forDigit U_ICU_ENTRY_POINT_RENAME(u_forDigit)
+#define u_formatMessage U_ICU_ENTRY_POINT_RENAME(u_formatMessage)
+#define u_formatMessageWithError U_ICU_ENTRY_POINT_RENAME(u_formatMessageWithError)
+#define u_fprintf U_ICU_ENTRY_POINT_RENAME(u_fprintf)
+#define u_fprintf_u U_ICU_ENTRY_POINT_RENAME(u_fprintf_u)
+#define u_fputc U_ICU_ENTRY_POINT_RENAME(u_fputc)
+#define u_fputs U_ICU_ENTRY_POINT_RENAME(u_fputs)
+#define u_frewind U_ICU_ENTRY_POINT_RENAME(u_frewind)
+#define u_fscanf U_ICU_ENTRY_POINT_RENAME(u_fscanf)
+#define u_fscanf_u U_ICU_ENTRY_POINT_RENAME(u_fscanf_u)
+#define u_fsetcodepage U_ICU_ENTRY_POINT_RENAME(u_fsetcodepage)
+#define u_fsetlocale U_ICU_ENTRY_POINT_RENAME(u_fsetlocale)
+#define u_fsettransliterator U_ICU_ENTRY_POINT_RENAME(u_fsettransliterator)
+#define u_fstropen U_ICU_ENTRY_POINT_RENAME(u_fstropen)
+#define u_fungetc U_ICU_ENTRY_POINT_RENAME(u_fungetc)
+#define u_getBidiPairedBracket U_ICU_ENTRY_POINT_RENAME(u_getBidiPairedBracket)
+#define u_getBinaryPropertySet U_ICU_ENTRY_POINT_RENAME(u_getBinaryPropertySet)
+#define u_getCombiningClass U_ICU_ENTRY_POINT_RENAME(u_getCombiningClass)
+#define u_getDataDirectory U_ICU_ENTRY_POINT_RENAME(u_getDataDirectory)
+#define u_getDataVersion U_ICU_ENTRY_POINT_RENAME(u_getDataVersion)
+#define u_getDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_getDefaultConverter)
+#define u_getFC_NFKC_Closure U_ICU_ENTRY_POINT_RENAME(u_getFC_NFKC_Closure)
+#define u_getISOComment U_ICU_ENTRY_POINT_RENAME(u_getISOComment)
+#define u_getIntPropertyMap U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMap)
+#define u_getIntPropertyMaxValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMaxValue)
+#define u_getIntPropertyMinValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMinValue)
+#define u_getIntPropertyValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyValue)
+#define u_getMainProperties U_ICU_ENTRY_POINT_RENAME(u_getMainProperties)
+#define u_getNumericValue U_ICU_ENTRY_POINT_RENAME(u_getNumericValue)
+#define u_getPropertyEnum U_ICU_ENTRY_POINT_RENAME(u_getPropertyEnum)
+#define u_getPropertyName U_ICU_ENTRY_POINT_RENAME(u_getPropertyName)
+#define u_getPropertyValueEnum U_ICU_ENTRY_POINT_RENAME(u_getPropertyValueEnum)
+#define u_getPropertyValueName U_ICU_ENTRY_POINT_RENAME(u_getPropertyValueName)
+#define u_getTimeZoneFilesDirectory U_ICU_ENTRY_POINT_RENAME(u_getTimeZoneFilesDirectory)
+#define u_getUnicodeProperties U_ICU_ENTRY_POINT_RENAME(u_getUnicodeProperties)
+#define u_getUnicodeVersion U_ICU_ENTRY_POINT_RENAME(u_getUnicodeVersion)
+#define u_getVersion U_ICU_ENTRY_POINT_RENAME(u_getVersion)
+#define u_get_stdout U_ICU_ENTRY_POINT_RENAME(u_get_stdout)
+#define u_hasBinaryProperty U_ICU_ENTRY_POINT_RENAME(u_hasBinaryProperty)
+#define u_init U_ICU_ENTRY_POINT_RENAME(u_init)
+#define u_isIDIgnorable U_ICU_ENTRY_POINT_RENAME(u_isIDIgnorable)
+#define u_isIDPart U_ICU_ENTRY_POINT_RENAME(u_isIDPart)
+#define u_isIDStart U_ICU_ENTRY_POINT_RENAME(u_isIDStart)
+#define u_isISOControl U_ICU_ENTRY_POINT_RENAME(u_isISOControl)
+#define u_isJavaIDPart U_ICU_ENTRY_POINT_RENAME(u_isJavaIDPart)
+#define u_isJavaIDStart U_ICU_ENTRY_POINT_RENAME(u_isJavaIDStart)
+#define u_isJavaSpaceChar U_ICU_ENTRY_POINT_RENAME(u_isJavaSpaceChar)
+#define u_isMirrored U_ICU_ENTRY_POINT_RENAME(u_isMirrored)
+#define u_isUAlphabetic U_ICU_ENTRY_POINT_RENAME(u_isUAlphabetic)
+#define u_isULowercase U_ICU_ENTRY_POINT_RENAME(u_isULowercase)
+#define u_isUUppercase U_ICU_ENTRY_POINT_RENAME(u_isUUppercase)
+#define u_isUWhiteSpace U_ICU_ENTRY_POINT_RENAME(u_isUWhiteSpace)
+#define u_isWhitespace U_ICU_ENTRY_POINT_RENAME(u_isWhitespace)
+#define u_isalnum U_ICU_ENTRY_POINT_RENAME(u_isalnum)
+#define u_isalnumPOSIX U_ICU_ENTRY_POINT_RENAME(u_isalnumPOSIX)
+#define u_isalpha U_ICU_ENTRY_POINT_RENAME(u_isalpha)
+#define u_isbase U_ICU_ENTRY_POINT_RENAME(u_isbase)
+#define u_isblank U_ICU_ENTRY_POINT_RENAME(u_isblank)
+#define u_iscntrl U_ICU_ENTRY_POINT_RENAME(u_iscntrl)
+#define u_isdefined U_ICU_ENTRY_POINT_RENAME(u_isdefined)
+#define u_isdigit U_ICU_ENTRY_POINT_RENAME(u_isdigit)
+#define u_isgraph U_ICU_ENTRY_POINT_RENAME(u_isgraph)
+#define u_isgraphPOSIX U_ICU_ENTRY_POINT_RENAME(u_isgraphPOSIX)
+#define u_islower U_ICU_ENTRY_POINT_RENAME(u_islower)
+#define u_isprint U_ICU_ENTRY_POINT_RENAME(u_isprint)
+#define u_isprintPOSIX U_ICU_ENTRY_POINT_RENAME(u_isprintPOSIX)
+#define u_ispunct U_ICU_ENTRY_POINT_RENAME(u_ispunct)
+#define u_isspace U_ICU_ENTRY_POINT_RENAME(u_isspace)
+#define u_istitle U_ICU_ENTRY_POINT_RENAME(u_istitle)
+#define u_isupper U_ICU_ENTRY_POINT_RENAME(u_isupper)
+#define u_isxdigit U_ICU_ENTRY_POINT_RENAME(u_isxdigit)
+#define u_locbund_close U_ICU_ENTRY_POINT_RENAME(u_locbund_close)
+#define u_locbund_getNumberFormat U_ICU_ENTRY_POINT_RENAME(u_locbund_getNumberFormat)
+#define u_locbund_init U_ICU_ENTRY_POINT_RENAME(u_locbund_init)
+#define u_memcasecmp U_ICU_ENTRY_POINT_RENAME(u_memcasecmp)
+#define u_memchr U_ICU_ENTRY_POINT_RENAME(u_memchr)
+#define u_memchr32 U_ICU_ENTRY_POINT_RENAME(u_memchr32)
+#define u_memcmp U_ICU_ENTRY_POINT_RENAME(u_memcmp)
+#define u_memcmpCodePointOrder U_ICU_ENTRY_POINT_RENAME(u_memcmpCodePointOrder)
+#define u_memcpy U_ICU_ENTRY_POINT_RENAME(u_memcpy)
+#define u_memmove U_ICU_ENTRY_POINT_RENAME(u_memmove)
+#define u_memrchr U_ICU_ENTRY_POINT_RENAME(u_memrchr)
+#define u_memrchr32 U_ICU_ENTRY_POINT_RENAME(u_memrchr32)
+#define u_memset U_ICU_ENTRY_POINT_RENAME(u_memset)
+#define u_parseMessage U_ICU_ENTRY_POINT_RENAME(u_parseMessage)
+#define u_parseMessageWithError U_ICU_ENTRY_POINT_RENAME(u_parseMessageWithError)
+#define u_printf U_ICU_ENTRY_POINT_RENAME(u_printf)
+#define u_printf_parse U_ICU_ENTRY_POINT_RENAME(u_printf_parse)
+#define u_printf_u U_ICU_ENTRY_POINT_RENAME(u_printf_u)
+#define u_releaseDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_releaseDefaultConverter)
+#define u_scanf_parse U_ICU_ENTRY_POINT_RENAME(u_scanf_parse)
+#define u_setAtomicIncDecFunctions U_ICU_ENTRY_POINT_RENAME(u_setAtomicIncDecFunctions)
+#define u_setDataDirectory U_ICU_ENTRY_POINT_RENAME(u_setDataDirectory)
+#define u_setMemoryFunctions U_ICU_ENTRY_POINT_RENAME(u_setMemoryFunctions)
+#define u_setMutexFunctions U_ICU_ENTRY_POINT_RENAME(u_setMutexFunctions)
+#define u_setTimeZoneFilesDirectory U_ICU_ENTRY_POINT_RENAME(u_setTimeZoneFilesDirectory)
+#define u_shapeArabic U_ICU_ENTRY_POINT_RENAME(u_shapeArabic)
+#define u_snprintf U_ICU_ENTRY_POINT_RENAME(u_snprintf)
+#define u_snprintf_u U_ICU_ENTRY_POINT_RENAME(u_snprintf_u)
+#define u_sprintf U_ICU_ENTRY_POINT_RENAME(u_sprintf)
+#define u_sprintf_u U_ICU_ENTRY_POINT_RENAME(u_sprintf_u)
+#define u_sscanf U_ICU_ENTRY_POINT_RENAME(u_sscanf)
+#define u_sscanf_u U_ICU_ENTRY_POINT_RENAME(u_sscanf_u)
+#define u_strCaseCompare U_ICU_ENTRY_POINT_RENAME(u_strCaseCompare)
+#define u_strCompare U_ICU_ENTRY_POINT_RENAME(u_strCompare)
+#define u_strCompareIter U_ICU_ENTRY_POINT_RENAME(u_strCompareIter)
+#define u_strFindFirst U_ICU_ENTRY_POINT_RENAME(u_strFindFirst)
+#define u_strFindLast U_ICU_ENTRY_POINT_RENAME(u_strFindLast)
+#define u_strFoldCase U_ICU_ENTRY_POINT_RENAME(u_strFoldCase)
+#define u_strFromJavaModifiedUTF8WithSub U_ICU_ENTRY_POINT_RENAME(u_strFromJavaModifiedUTF8WithSub)
+#define u_strFromPunycode U_ICU_ENTRY_POINT_RENAME(u_strFromPunycode)
+#define u_strFromUTF32 U_ICU_ENTRY_POINT_RENAME(u_strFromUTF32)
+#define u_strFromUTF32WithSub U_ICU_ENTRY_POINT_RENAME(u_strFromUTF32WithSub)
+#define u_strFromUTF8 U_ICU_ENTRY_POINT_RENAME(u_strFromUTF8)
+#define u_strFromUTF8Lenient U_ICU_ENTRY_POINT_RENAME(u_strFromUTF8Lenient)
+#define u_strFromUTF8WithSub U_ICU_ENTRY_POINT_RENAME(u_strFromUTF8WithSub)
+#define u_strFromWCS U_ICU_ENTRY_POINT_RENAME(u_strFromWCS)
+#define u_strHasMoreChar32Than U_ICU_ENTRY_POINT_RENAME(u_strHasMoreChar32Than)
+#define u_strToJavaModifiedUTF8 U_ICU_ENTRY_POINT_RENAME(u_strToJavaModifiedUTF8)
+#define u_strToLower U_ICU_ENTRY_POINT_RENAME(u_strToLower)
+#define u_strToPunycode U_ICU_ENTRY_POINT_RENAME(u_strToPunycode)
+#define u_strToTitle U_ICU_ENTRY_POINT_RENAME(u_strToTitle)
+#define u_strToUTF32 U_ICU_ENTRY_POINT_RENAME(u_strToUTF32)
+#define u_strToUTF32WithSub U_ICU_ENTRY_POINT_RENAME(u_strToUTF32WithSub)
+#define u_strToUTF8 U_ICU_ENTRY_POINT_RENAME(u_strToUTF8)
+#define u_strToUTF8WithSub U_ICU_ENTRY_POINT_RENAME(u_strToUTF8WithSub)
+#define u_strToUpper U_ICU_ENTRY_POINT_RENAME(u_strToUpper)
+#define u_strToWCS U_ICU_ENTRY_POINT_RENAME(u_strToWCS)
+#define u_strcasecmp U_ICU_ENTRY_POINT_RENAME(u_strcasecmp)
+#define u_strcat U_ICU_ENTRY_POINT_RENAME(u_strcat)
+#define u_strchr U_ICU_ENTRY_POINT_RENAME(u_strchr)
+#define u_strchr32 U_ICU_ENTRY_POINT_RENAME(u_strchr32)
+#define u_strcmp U_ICU_ENTRY_POINT_RENAME(u_strcmp)
+#define u_strcmpCodePointOrder U_ICU_ENTRY_POINT_RENAME(u_strcmpCodePointOrder)
+#define u_strcmpFold U_ICU_ENTRY_POINT_RENAME(u_strcmpFold)
+#define u_strcpy U_ICU_ENTRY_POINT_RENAME(u_strcpy)
+#define u_strcspn U_ICU_ENTRY_POINT_RENAME(u_strcspn)
+#define u_strlen U_ICU_ENTRY_POINT_RENAME(u_strlen)
+#define u_strncasecmp U_ICU_ENTRY_POINT_RENAME(u_strncasecmp)
+#define u_strncat U_ICU_ENTRY_POINT_RENAME(u_strncat)
+#define u_strncmp U_ICU_ENTRY_POINT_RENAME(u_strncmp)
+#define u_strncmpCodePointOrder U_ICU_ENTRY_POINT_RENAME(u_strncmpCodePointOrder)
+#define u_strncpy U_ICU_ENTRY_POINT_RENAME(u_strncpy)
+#define u_strpbrk U_ICU_ENTRY_POINT_RENAME(u_strpbrk)
+#define u_strrchr U_ICU_ENTRY_POINT_RENAME(u_strrchr)
+#define u_strrchr32 U_ICU_ENTRY_POINT_RENAME(u_strrchr32)
+#define u_strrstr U_ICU_ENTRY_POINT_RENAME(u_strrstr)
+#define u_strspn U_ICU_ENTRY_POINT_RENAME(u_strspn)
+#define u_strstr U_ICU_ENTRY_POINT_RENAME(u_strstr)
+#define u_strtok_r U_ICU_ENTRY_POINT_RENAME(u_strtok_r)
+#define u_terminateChars U_ICU_ENTRY_POINT_RENAME(u_terminateChars)
+#define u_terminateUChar32s U_ICU_ENTRY_POINT_RENAME(u_terminateUChar32s)
+#define u_terminateUChars U_ICU_ENTRY_POINT_RENAME(u_terminateUChars)
+#define u_terminateWChars U_ICU_ENTRY_POINT_RENAME(u_terminateWChars)
+#define u_tolower U_ICU_ENTRY_POINT_RENAME(u_tolower)
+#define u_totitle U_ICU_ENTRY_POINT_RENAME(u_totitle)
+#define u_toupper U_ICU_ENTRY_POINT_RENAME(u_toupper)
+#define u_uastrcpy U_ICU_ENTRY_POINT_RENAME(u_uastrcpy)
+#define u_uastrncpy U_ICU_ENTRY_POINT_RENAME(u_uastrncpy)
+#define u_unescape U_ICU_ENTRY_POINT_RENAME(u_unescape)
+#define u_unescapeAt U_ICU_ENTRY_POINT_RENAME(u_unescapeAt)
+#define u_versionFromString U_ICU_ENTRY_POINT_RENAME(u_versionFromString)
+#define u_versionFromUString U_ICU_ENTRY_POINT_RENAME(u_versionFromUString)
+#define u_versionToString U_ICU_ENTRY_POINT_RENAME(u_versionToString)
+#define u_vformatMessage U_ICU_ENTRY_POINT_RENAME(u_vformatMessage)
+#define u_vformatMessageWithError U_ICU_ENTRY_POINT_RENAME(u_vformatMessageWithError)
+#define u_vfprintf U_ICU_ENTRY_POINT_RENAME(u_vfprintf)
+#define u_vfprintf_u U_ICU_ENTRY_POINT_RENAME(u_vfprintf_u)
+#define u_vfscanf U_ICU_ENTRY_POINT_RENAME(u_vfscanf)
+#define u_vfscanf_u U_ICU_ENTRY_POINT_RENAME(u_vfscanf_u)
+#define u_vparseMessage U_ICU_ENTRY_POINT_RENAME(u_vparseMessage)
+#define u_vparseMessageWithError U_ICU_ENTRY_POINT_RENAME(u_vparseMessageWithError)
+#define u_vsnprintf U_ICU_ENTRY_POINT_RENAME(u_vsnprintf)
+#define u_vsnprintf_u U_ICU_ENTRY_POINT_RENAME(u_vsnprintf_u)
+#define u_vsprintf U_ICU_ENTRY_POINT_RENAME(u_vsprintf)
+#define u_vsprintf_u U_ICU_ENTRY_POINT_RENAME(u_vsprintf_u)
+#define u_vsscanf U_ICU_ENTRY_POINT_RENAME(u_vsscanf)
+#define u_vsscanf_u U_ICU_ENTRY_POINT_RENAME(u_vsscanf_u)
+#define u_writeIdenticalLevelRun U_ICU_ENTRY_POINT_RENAME(u_writeIdenticalLevelRun)
+#define ubidi_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(ubidi_addPropertyStarts)
+#define ubidi_close U_ICU_ENTRY_POINT_RENAME(ubidi_close)
+#define ubidi_countParagraphs U_ICU_ENTRY_POINT_RENAME(ubidi_countParagraphs)
+#define ubidi_countRuns U_ICU_ENTRY_POINT_RENAME(ubidi_countRuns)
+#define ubidi_getBaseDirection U_ICU_ENTRY_POINT_RENAME(ubidi_getBaseDirection)
+#define ubidi_getClass U_ICU_ENTRY_POINT_RENAME(ubidi_getClass)
+#define ubidi_getClassCallback U_ICU_ENTRY_POINT_RENAME(ubidi_getClassCallback)
+#define ubidi_getCustomizedClass U_ICU_ENTRY_POINT_RENAME(ubidi_getCustomizedClass)
+#define ubidi_getDirection U_ICU_ENTRY_POINT_RENAME(ubidi_getDirection)
+#define ubidi_getJoiningGroup U_ICU_ENTRY_POINT_RENAME(ubidi_getJoiningGroup)
+#define ubidi_getJoiningType U_ICU_ENTRY_POINT_RENAME(ubidi_getJoiningType)
+#define ubidi_getLength U_ICU_ENTRY_POINT_RENAME(ubidi_getLength)
+#define ubidi_getLevelAt U_ICU_ENTRY_POINT_RENAME(ubidi_getLevelAt)
+#define ubidi_getLevels U_ICU_ENTRY_POINT_RENAME(ubidi_getLevels)
+#define ubidi_getLogicalIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getLogicalIndex)
+#define ubidi_getLogicalMap U_ICU_ENTRY_POINT_RENAME(ubidi_getLogicalMap)
+#define ubidi_getLogicalRun U_ICU_ENTRY_POINT_RENAME(ubidi_getLogicalRun)
+#define ubidi_getMaxValue U_ICU_ENTRY_POINT_RENAME(ubidi_getMaxValue)
+#define ubidi_getMemory U_ICU_ENTRY_POINT_RENAME(ubidi_getMemory)
+#define ubidi_getMirror U_ICU_ENTRY_POINT_RENAME(ubidi_getMirror)
+#define ubidi_getPairedBracket U_ICU_ENTRY_POINT_RENAME(ubidi_getPairedBracket)
+#define ubidi_getPairedBracketType U_ICU_ENTRY_POINT_RENAME(ubidi_getPairedBracketType)
+#define ubidi_getParaLevel U_ICU_ENTRY_POINT_RENAME(ubidi_getParaLevel)
+#define ubidi_getParaLevelAtIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getParaLevelAtIndex)
+#define ubidi_getParagraph U_ICU_ENTRY_POINT_RENAME(ubidi_getParagraph)
+#define ubidi_getParagraphByIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getParagraphByIndex)
+#define ubidi_getProcessedLength U_ICU_ENTRY_POINT_RENAME(ubidi_getProcessedLength)
+#define ubidi_getReorderingMode U_ICU_ENTRY_POINT_RENAME(ubidi_getReorderingMode)
+#define ubidi_getReorderingOptions U_ICU_ENTRY_POINT_RENAME(ubidi_getReorderingOptions)
+#define ubidi_getResultLength U_ICU_ENTRY_POINT_RENAME(ubidi_getResultLength)
+#define ubidi_getRuns U_ICU_ENTRY_POINT_RENAME(ubidi_getRuns)
+#define ubidi_getText U_ICU_ENTRY_POINT_RENAME(ubidi_getText)
+#define ubidi_getVisualIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualIndex)
+#define ubidi_getVisualMap U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualMap)
+#define ubidi_getVisualRun U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualRun)
+#define ubidi_invertMap U_ICU_ENTRY_POINT_RENAME(ubidi_invertMap)
+#define ubidi_isBidiControl U_ICU_ENTRY_POINT_RENAME(ubidi_isBidiControl)
+#define ubidi_isInverse U_ICU_ENTRY_POINT_RENAME(ubidi_isInverse)
+#define ubidi_isJoinControl U_ICU_ENTRY_POINT_RENAME(ubidi_isJoinControl)
+#define ubidi_isMirrored U_ICU_ENTRY_POINT_RENAME(ubidi_isMirrored)
+#define ubidi_isOrderParagraphsLTR U_ICU_ENTRY_POINT_RENAME(ubidi_isOrderParagraphsLTR)
+#define ubidi_open U_ICU_ENTRY_POINT_RENAME(ubidi_open)
+#define ubidi_openSized U_ICU_ENTRY_POINT_RENAME(ubidi_openSized)
+#define ubidi_orderParagraphsLTR U_ICU_ENTRY_POINT_RENAME(ubidi_orderParagraphsLTR)
+#define ubidi_reorderLogical U_ICU_ENTRY_POINT_RENAME(ubidi_reorderLogical)
+#define ubidi_reorderVisual U_ICU_ENTRY_POINT_RENAME(ubidi_reorderVisual)
+#define ubidi_setClassCallback U_ICU_ENTRY_POINT_RENAME(ubidi_setClassCallback)
+#define ubidi_setContext U_ICU_ENTRY_POINT_RENAME(ubidi_setContext)
+#define ubidi_setInverse U_ICU_ENTRY_POINT_RENAME(ubidi_setInverse)
+#define ubidi_setLine U_ICU_ENTRY_POINT_RENAME(ubidi_setLine)
+#define ubidi_setPara U_ICU_ENTRY_POINT_RENAME(ubidi_setPara)
+#define ubidi_setReorderingMode U_ICU_ENTRY_POINT_RENAME(ubidi_setReorderingMode)
+#define ubidi_setReorderingOptions U_ICU_ENTRY_POINT_RENAME(ubidi_setReorderingOptions)
+#define ubidi_writeReordered U_ICU_ENTRY_POINT_RENAME(ubidi_writeReordered)
+#define ubidi_writeReverse U_ICU_ENTRY_POINT_RENAME(ubidi_writeReverse)
+#define ubiditransform_close U_ICU_ENTRY_POINT_RENAME(ubiditransform_close)
+#define ubiditransform_open U_ICU_ENTRY_POINT_RENAME(ubiditransform_open)
+#define ubiditransform_transform U_ICU_ENTRY_POINT_RENAME(ubiditransform_transform)
+#define ublock_getCode U_ICU_ENTRY_POINT_RENAME(ublock_getCode)
+#define ubrk_close U_ICU_ENTRY_POINT_RENAME(ubrk_close)
+#define ubrk_countAvailable U_ICU_ENTRY_POINT_RENAME(ubrk_countAvailable)
+#define ubrk_current U_ICU_ENTRY_POINT_RENAME(ubrk_current)
+#define ubrk_first U_ICU_ENTRY_POINT_RENAME(ubrk_first)
+#define ubrk_following U_ICU_ENTRY_POINT_RENAME(ubrk_following)
+#define ubrk_getAvailable U_ICU_ENTRY_POINT_RENAME(ubrk_getAvailable)
+#define ubrk_getBinaryRules U_ICU_ENTRY_POINT_RENAME(ubrk_getBinaryRules)
+#define ubrk_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ubrk_getLocaleByType)
+#define ubrk_getRuleStatus U_ICU_ENTRY_POINT_RENAME(ubrk_getRuleStatus)
+#define ubrk_getRuleStatusVec U_ICU_ENTRY_POINT_RENAME(ubrk_getRuleStatusVec)
+#define ubrk_isBoundary U_ICU_ENTRY_POINT_RENAME(ubrk_isBoundary)
+#define ubrk_last U_ICU_ENTRY_POINT_RENAME(ubrk_last)
+#define ubrk_next U_ICU_ENTRY_POINT_RENAME(ubrk_next)
+#define ubrk_open U_ICU_ENTRY_POINT_RENAME(ubrk_open)
+#define ubrk_openBinaryRules U_ICU_ENTRY_POINT_RENAME(ubrk_openBinaryRules)
+#define ubrk_openRules U_ICU_ENTRY_POINT_RENAME(ubrk_openRules)
+#define ubrk_preceding U_ICU_ENTRY_POINT_RENAME(ubrk_preceding)
+#define ubrk_previous U_ICU_ENTRY_POINT_RENAME(ubrk_previous)
+#define ubrk_refreshUText U_ICU_ENTRY_POINT_RENAME(ubrk_refreshUText)
+#define ubrk_safeClone U_ICU_ENTRY_POINT_RENAME(ubrk_safeClone)
+#define ubrk_setText U_ICU_ENTRY_POINT_RENAME(ubrk_setText)
+#define ubrk_setUText U_ICU_ENTRY_POINT_RENAME(ubrk_setUText)
+#define ubrk_swap U_ICU_ENTRY_POINT_RENAME(ubrk_swap)
+#define ucache_compareKeys U_ICU_ENTRY_POINT_RENAME(ucache_compareKeys)
+#define ucache_deleteKey U_ICU_ENTRY_POINT_RENAME(ucache_deleteKey)
+#define ucache_hashKeys U_ICU_ENTRY_POINT_RENAME(ucache_hashKeys)
+#define ucal_add U_ICU_ENTRY_POINT_RENAME(ucal_add)
+#define ucal_clear U_ICU_ENTRY_POINT_RENAME(ucal_clear)
+#define ucal_clearField U_ICU_ENTRY_POINT_RENAME(ucal_clearField)
+#define ucal_clone U_ICU_ENTRY_POINT_RENAME(ucal_clone)
+#define ucal_close U_ICU_ENTRY_POINT_RENAME(ucal_close)
+#define ucal_countAvailable U_ICU_ENTRY_POINT_RENAME(ucal_countAvailable)
+#define ucal_equivalentTo U_ICU_ENTRY_POINT_RENAME(ucal_equivalentTo)
+#define ucal_get U_ICU_ENTRY_POINT_RENAME(ucal_get)
+#define ucal_getAttribute U_ICU_ENTRY_POINT_RENAME(ucal_getAttribute)
+#define ucal_getAvailable U_ICU_ENTRY_POINT_RENAME(ucal_getAvailable)
+#define ucal_getCanonicalTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getCanonicalTimeZoneID)
+#define ucal_getDSTSavings U_ICU_ENTRY_POINT_RENAME(ucal_getDSTSavings)
+#define ucal_getDayOfWeekType U_ICU_ENTRY_POINT_RENAME(ucal_getDayOfWeekType)
+#define ucal_getDefaultTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_getDefaultTimeZone)
+#define ucal_getFieldDifference U_ICU_ENTRY_POINT_RENAME(ucal_getFieldDifference)
+#define ucal_getGregorianChange U_ICU_ENTRY_POINT_RENAME(ucal_getGregorianChange)
+#define ucal_getHostTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_getHostTimeZone)
+#define ucal_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucal_getKeywordValuesForLocale)
+#define ucal_getLimit U_ICU_ENTRY_POINT_RENAME(ucal_getLimit)
+#define ucal_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ucal_getLocaleByType)
+#define ucal_getMillis U_ICU_ENTRY_POINT_RENAME(ucal_getMillis)
+#define ucal_getNow U_ICU_ENTRY_POINT_RENAME(ucal_getNow)
+#define ucal_getTZDataVersion U_ICU_ENTRY_POINT_RENAME(ucal_getTZDataVersion)
+#define ucal_getTimeZoneDisplayName U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneDisplayName)
+#define ucal_getTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneID)
+#define ucal_getTimeZoneIDForWindowsID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneIDForWindowsID)
+#define ucal_getTimeZoneTransitionDate U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneTransitionDate)
+#define ucal_getType U_ICU_ENTRY_POINT_RENAME(ucal_getType)
+#define ucal_getWeekendTransition U_ICU_ENTRY_POINT_RENAME(ucal_getWeekendTransition)
+#define ucal_getWindowsTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getWindowsTimeZoneID)
+#define ucal_inDaylightTime U_ICU_ENTRY_POINT_RENAME(ucal_inDaylightTime)
+#define ucal_isSet U_ICU_ENTRY_POINT_RENAME(ucal_isSet)
+#define ucal_isWeekend U_ICU_ENTRY_POINT_RENAME(ucal_isWeekend)
+#define ucal_open U_ICU_ENTRY_POINT_RENAME(ucal_open)
+#define ucal_openCountryTimeZones U_ICU_ENTRY_POINT_RENAME(ucal_openCountryTimeZones)
+#define ucal_openTimeZoneIDEnumeration U_ICU_ENTRY_POINT_RENAME(ucal_openTimeZoneIDEnumeration)
+#define ucal_openTimeZones U_ICU_ENTRY_POINT_RENAME(ucal_openTimeZones)
+#define ucal_roll U_ICU_ENTRY_POINT_RENAME(ucal_roll)
+#define ucal_set U_ICU_ENTRY_POINT_RENAME(ucal_set)
+#define ucal_setAttribute U_ICU_ENTRY_POINT_RENAME(ucal_setAttribute)
+#define ucal_setDate U_ICU_ENTRY_POINT_RENAME(ucal_setDate)
+#define ucal_setDateTime U_ICU_ENTRY_POINT_RENAME(ucal_setDateTime)
+#define ucal_setDefaultTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_setDefaultTimeZone)
+#define ucal_setGregorianChange U_ICU_ENTRY_POINT_RENAME(ucal_setGregorianChange)
+#define ucal_setMillis U_ICU_ENTRY_POINT_RENAME(ucal_setMillis)
+#define ucal_setTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_setTimeZone)
+#define ucase_addCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addCaseClosure)
+#define ucase_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(ucase_addPropertyStarts)
+#define ucase_addStringCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addStringCaseClosure)
+#define ucase_fold U_ICU_ENTRY_POINT_RENAME(ucase_fold)
+#define ucase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ucase_getCaseLocale)
+#define ucase_getTrie U_ICU_ENTRY_POINT_RENAME(ucase_getTrie)
+#define ucase_getType U_ICU_ENTRY_POINT_RENAME(ucase_getType)
+#define ucase_getTypeOrIgnorable U_ICU_ENTRY_POINT_RENAME(ucase_getTypeOrIgnorable)
+#define ucase_hasBinaryProperty U_ICU_ENTRY_POINT_RENAME(ucase_hasBinaryProperty)
+#define ucase_isCaseSensitive U_ICU_ENTRY_POINT_RENAME(ucase_isCaseSensitive)
+#define ucase_isSoftDotted U_ICU_ENTRY_POINT_RENAME(ucase_isSoftDotted)
+#define ucase_toFullFolding U_ICU_ENTRY_POINT_RENAME(ucase_toFullFolding)
+#define ucase_toFullLower U_ICU_ENTRY_POINT_RENAME(ucase_toFullLower)
+#define ucase_toFullTitle U_ICU_ENTRY_POINT_RENAME(ucase_toFullTitle)
+#define ucase_toFullUpper U_ICU_ENTRY_POINT_RENAME(ucase_toFullUpper)
+#define ucase_tolower U_ICU_ENTRY_POINT_RENAME(ucase_tolower)
+#define ucase_totitle U_ICU_ENTRY_POINT_RENAME(ucase_totitle)
+#define ucase_toupper U_ICU_ENTRY_POINT_RENAME(ucase_toupper)
+#define ucasemap_close U_ICU_ENTRY_POINT_RENAME(ucasemap_close)
+#define ucasemap_getBreakIterator U_ICU_ENTRY_POINT_RENAME(ucasemap_getBreakIterator)
+#define ucasemap_getLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_getLocale)
+#define ucasemap_getOptions U_ICU_ENTRY_POINT_RENAME(ucasemap_getOptions)
+#define ucasemap_internalUTF8ToTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_internalUTF8ToTitle)
+#define ucasemap_open U_ICU_ENTRY_POINT_RENAME(ucasemap_open)
+#define ucasemap_setBreakIterator U_ICU_ENTRY_POINT_RENAME(ucasemap_setBreakIterator)
+#define ucasemap_setLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_setLocale)
+#define ucasemap_setOptions U_ICU_ENTRY_POINT_RENAME(ucasemap_setOptions)
+#define ucasemap_toTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_toTitle)
+#define ucasemap_utf8FoldCase U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8FoldCase)
+#define ucasemap_utf8ToLower U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToLower)
+#define ucasemap_utf8ToTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToTitle)
+#define ucasemap_utf8ToUpper U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToUpper)
+#define ucfpos_close U_ICU_ENTRY_POINT_RENAME(ucfpos_close)
+#define ucfpos_constrainCategory U_ICU_ENTRY_POINT_RENAME(ucfpos_constrainCategory)
+#define ucfpos_constrainField U_ICU_ENTRY_POINT_RENAME(ucfpos_constrainField)
+#define ucfpos_getCategory U_ICU_ENTRY_POINT_RENAME(ucfpos_getCategory)
+#define ucfpos_getField U_ICU_ENTRY_POINT_RENAME(ucfpos_getField)
+#define ucfpos_getIndexes U_ICU_ENTRY_POINT_RENAME(ucfpos_getIndexes)
+#define ucfpos_getInt64IterationContext U_ICU_ENTRY_POINT_RENAME(ucfpos_getInt64IterationContext)
+#define ucfpos_matchesField U_ICU_ENTRY_POINT_RENAME(ucfpos_matchesField)
+#define ucfpos_open U_ICU_ENTRY_POINT_RENAME(ucfpos_open)
+#define ucfpos_reset U_ICU_ENTRY_POINT_RENAME(ucfpos_reset)
+#define ucfpos_setInt64IterationContext U_ICU_ENTRY_POINT_RENAME(ucfpos_setInt64IterationContext)
+#define ucfpos_setState U_ICU_ENTRY_POINT_RENAME(ucfpos_setState)
+#define uchar_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(uchar_addPropertyStarts)
+#define uchar_swapNames U_ICU_ENTRY_POINT_RENAME(uchar_swapNames)
+#define ucln_cleanupOne U_ICU_ENTRY_POINT_RENAME(ucln_cleanupOne)
+#define ucln_common_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_common_registerCleanup)
+#define ucln_i18n_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_i18n_registerCleanup)
+#define ucln_io_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_io_registerCleanup)
+#define ucln_lib_cleanup U_ICU_ENTRY_POINT_RENAME(ucln_lib_cleanup)
+#define ucln_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_registerCleanup)
+#define ucnv_MBCSFromUChar32 U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSFromUChar32)
+#define ucnv_MBCSFromUnicodeWithOffsets U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSFromUnicodeWithOffsets)
+#define ucnv_MBCSGetFilteredUnicodeSetForUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSGetFilteredUnicodeSetForUnicode)
+#define ucnv_MBCSGetType U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSGetType)
+#define ucnv_MBCSGetUnicodeSetForUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSGetUnicodeSetForUnicode)
+#define ucnv_MBCSIsLeadByte U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSIsLeadByte)
+#define ucnv_MBCSSimpleGetNextUChar U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSSimpleGetNextUChar)
+#define ucnv_MBCSToUnicodeWithOffsets U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSToUnicodeWithOffsets)
+#define ucnv_bld_countAvailableConverters U_ICU_ENTRY_POINT_RENAME(ucnv_bld_countAvailableConverters)
+#define ucnv_bld_getAvailableConverter U_ICU_ENTRY_POINT_RENAME(ucnv_bld_getAvailableConverter)
+#define ucnv_canCreateConverter U_ICU_ENTRY_POINT_RENAME(ucnv_canCreateConverter)
+#define ucnv_cbFromUWriteBytes U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteBytes)
+#define ucnv_cbFromUWriteSub U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteSub)
+#define ucnv_cbFromUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteUChars)
+#define ucnv_cbToUWriteSub U_ICU_ENTRY_POINT_RENAME(ucnv_cbToUWriteSub)
+#define ucnv_cbToUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_cbToUWriteUChars)
+#define ucnv_close U_ICU_ENTRY_POINT_RENAME(ucnv_close)
+#define ucnv_compareNames U_ICU_ENTRY_POINT_RENAME(ucnv_compareNames)
+#define ucnv_convert U_ICU_ENTRY_POINT_RENAME(ucnv_convert)
+#define ucnv_convertEx U_ICU_ENTRY_POINT_RENAME(ucnv_convertEx)
+#define ucnv_countAliases U_ICU_ENTRY_POINT_RENAME(ucnv_countAliases)
+#define ucnv_countAvailable U_ICU_ENTRY_POINT_RENAME(ucnv_countAvailable)
+#define ucnv_countStandards U_ICU_ENTRY_POINT_RENAME(ucnv_countStandards)
+#define ucnv_createAlgorithmicConverter U_ICU_ENTRY_POINT_RENAME(ucnv_createAlgorithmicConverter)
+#define ucnv_createConverter U_ICU_ENTRY_POINT_RENAME(ucnv_createConverter)
+#define ucnv_createConverterFromPackage U_ICU_ENTRY_POINT_RENAME(ucnv_createConverterFromPackage)
+#define ucnv_createConverterFromSharedData U_ICU_ENTRY_POINT_RENAME(ucnv_createConverterFromSharedData)
+#define ucnv_detectUnicodeSignature U_ICU_ENTRY_POINT_RENAME(ucnv_detectUnicodeSignature)
+#define ucnv_enableCleanup U_ICU_ENTRY_POINT_RENAME(ucnv_enableCleanup)
+#define ucnv_extContinueMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extContinueMatchFromU)
+#define ucnv_extContinueMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extContinueMatchToU)
+#define ucnv_extGetUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_extGetUnicodeSet)
+#define ucnv_extInitialMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extInitialMatchFromU)
+#define ucnv_extInitialMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extInitialMatchToU)
+#define ucnv_extSimpleMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extSimpleMatchFromU)
+#define ucnv_extSimpleMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extSimpleMatchToU)
+#define ucnv_fixFileSeparator U_ICU_ENTRY_POINT_RENAME(ucnv_fixFileSeparator)
+#define ucnv_flushCache U_ICU_ENTRY_POINT_RENAME(ucnv_flushCache)
+#define ucnv_fromAlgorithmic U_ICU_ENTRY_POINT_RENAME(ucnv_fromAlgorithmic)
+#define ucnv_fromUChars U_ICU_ENTRY_POINT_RENAME(ucnv_fromUChars)
+#define ucnv_fromUCountPending U_ICU_ENTRY_POINT_RENAME(ucnv_fromUCountPending)
+#define ucnv_fromUWriteBytes U_ICU_ENTRY_POINT_RENAME(ucnv_fromUWriteBytes)
+#define ucnv_fromUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_fromUnicode)
+#define ucnv_fromUnicode_UTF8 U_ICU_ENTRY_POINT_RENAME(ucnv_fromUnicode_UTF8)
+#define ucnv_fromUnicode_UTF8_OFFSETS_LOGIC U_ICU_ENTRY_POINT_RENAME(ucnv_fromUnicode_UTF8_OFFSETS_LOGIC)
+#define ucnv_getAlias U_ICU_ENTRY_POINT_RENAME(ucnv_getAlias)
+#define ucnv_getAliases U_ICU_ENTRY_POINT_RENAME(ucnv_getAliases)
+#define ucnv_getAvailableName U_ICU_ENTRY_POINT_RENAME(ucnv_getAvailableName)
+#define ucnv_getCCSID U_ICU_ENTRY_POINT_RENAME(ucnv_getCCSID)
+#define ucnv_getCanonicalName U_ICU_ENTRY_POINT_RENAME(ucnv_getCanonicalName)
+#define ucnv_getCompleteUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_getCompleteUnicodeSet)
+#define ucnv_getDefaultName U_ICU_ENTRY_POINT_RENAME(ucnv_getDefaultName)
+#define ucnv_getDisplayName U_ICU_ENTRY_POINT_RENAME(ucnv_getDisplayName)
+#define ucnv_getFromUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_getFromUCallBack)
+#define ucnv_getInvalidChars U_ICU_ENTRY_POINT_RENAME(ucnv_getInvalidChars)
+#define ucnv_getInvalidUChars U_ICU_ENTRY_POINT_RENAME(ucnv_getInvalidUChars)
+#define ucnv_getMaxCharSize U_ICU_ENTRY_POINT_RENAME(ucnv_getMaxCharSize)
+#define ucnv_getMinCharSize U_ICU_ENTRY_POINT_RENAME(ucnv_getMinCharSize)
+#define ucnv_getName U_ICU_ENTRY_POINT_RENAME(ucnv_getName)
+#define ucnv_getNextUChar U_ICU_ENTRY_POINT_RENAME(ucnv_getNextUChar)
+#define ucnv_getNonSurrogateUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_getNonSurrogateUnicodeSet)
+#define ucnv_getPlatform U_ICU_ENTRY_POINT_RENAME(ucnv_getPlatform)
+#define ucnv_getStandard U_ICU_ENTRY_POINT_RENAME(ucnv_getStandard)
+#define ucnv_getStandardName U_ICU_ENTRY_POINT_RENAME(ucnv_getStandardName)
+#define ucnv_getStarters U_ICU_ENTRY_POINT_RENAME(ucnv_getStarters)
+#define ucnv_getSubstChars U_ICU_ENTRY_POINT_RENAME(ucnv_getSubstChars)
+#define ucnv_getToUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_getToUCallBack)
+#define ucnv_getType U_ICU_ENTRY_POINT_RENAME(ucnv_getType)
+#define ucnv_getUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_getUnicodeSet)
+#define ucnv_incrementRefCount U_ICU_ENTRY_POINT_RENAME(ucnv_incrementRefCount)
+#define ucnv_io_countKnownConverters U_ICU_ENTRY_POINT_RENAME(ucnv_io_countKnownConverters)
+#define ucnv_io_getConverterName U_ICU_ENTRY_POINT_RENAME(ucnv_io_getConverterName)
+#define ucnv_io_stripASCIIForCompare U_ICU_ENTRY_POINT_RENAME(ucnv_io_stripASCIIForCompare)
+#define ucnv_io_stripEBCDICForCompare U_ICU_ENTRY_POINT_RENAME(ucnv_io_stripEBCDICForCompare)
+#define ucnv_isAmbiguous U_ICU_ENTRY_POINT_RENAME(ucnv_isAmbiguous)
+#define ucnv_isFixedWidth U_ICU_ENTRY_POINT_RENAME(ucnv_isFixedWidth)
+#define ucnv_load U_ICU_ENTRY_POINT_RENAME(ucnv_load)
+#define ucnv_loadSharedData U_ICU_ENTRY_POINT_RENAME(ucnv_loadSharedData)
+#define ucnv_open U_ICU_ENTRY_POINT_RENAME(ucnv_open)
+#define ucnv_openAllNames U_ICU_ENTRY_POINT_RENAME(ucnv_openAllNames)
+#define ucnv_openCCSID U_ICU_ENTRY_POINT_RENAME(ucnv_openCCSID)
+#define ucnv_openPackage U_ICU_ENTRY_POINT_RENAME(ucnv_openPackage)
+#define ucnv_openStandardNames U_ICU_ENTRY_POINT_RENAME(ucnv_openStandardNames)
+#define ucnv_openU U_ICU_ENTRY_POINT_RENAME(ucnv_openU)
+#define ucnv_reset U_ICU_ENTRY_POINT_RENAME(ucnv_reset)
+#define ucnv_resetFromUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_resetFromUnicode)
+#define ucnv_resetToUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_resetToUnicode)
+#define ucnv_safeClone U_ICU_ENTRY_POINT_RENAME(ucnv_safeClone)
+#define ucnv_setDefaultName U_ICU_ENTRY_POINT_RENAME(ucnv_setDefaultName)
+#define ucnv_setFallback U_ICU_ENTRY_POINT_RENAME(ucnv_setFallback)
+#define ucnv_setFromUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_setFromUCallBack)
+#define ucnv_setSubstChars U_ICU_ENTRY_POINT_RENAME(ucnv_setSubstChars)
+#define ucnv_setSubstString U_ICU_ENTRY_POINT_RENAME(ucnv_setSubstString)
+#define ucnv_setToUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_setToUCallBack)
+#define ucnv_swap U_ICU_ENTRY_POINT_RENAME(ucnv_swap)
+#define ucnv_swapAliases U_ICU_ENTRY_POINT_RENAME(ucnv_swapAliases)
+#define ucnv_toAlgorithmic U_ICU_ENTRY_POINT_RENAME(ucnv_toAlgorithmic)
+#define ucnv_toUChars U_ICU_ENTRY_POINT_RENAME(ucnv_toUChars)
+#define ucnv_toUCountPending U_ICU_ENTRY_POINT_RENAME(ucnv_toUCountPending)
+#define ucnv_toUWriteCodePoint U_ICU_ENTRY_POINT_RENAME(ucnv_toUWriteCodePoint)
+#define ucnv_toUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_toUWriteUChars)
+#define ucnv_toUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_toUnicode)
+#define ucnv_unload U_ICU_ENTRY_POINT_RENAME(ucnv_unload)
+#define ucnv_unloadSharedDataIfReady U_ICU_ENTRY_POINT_RENAME(ucnv_unloadSharedDataIfReady)
+#define ucnv_usesFallback U_ICU_ENTRY_POINT_RENAME(ucnv_usesFallback)
+#define ucnvsel_close U_ICU_ENTRY_POINT_RENAME(ucnvsel_close)
+#define ucnvsel_open U_ICU_ENTRY_POINT_RENAME(ucnvsel_open)
+#define ucnvsel_openFromSerialized U_ICU_ENTRY_POINT_RENAME(ucnvsel_openFromSerialized)
+#define ucnvsel_selectForString U_ICU_ENTRY_POINT_RENAME(ucnvsel_selectForString)
+#define ucnvsel_selectForUTF8 U_ICU_ENTRY_POINT_RENAME(ucnvsel_selectForUTF8)
+#define ucnvsel_serialize U_ICU_ENTRY_POINT_RENAME(ucnvsel_serialize)
+#define ucol_cloneBinary U_ICU_ENTRY_POINT_RENAME(ucol_cloneBinary)
+#define ucol_close U_ICU_ENTRY_POINT_RENAME(ucol_close)
+#define ucol_closeElements U_ICU_ENTRY_POINT_RENAME(ucol_closeElements)
+#define ucol_countAvailable U_ICU_ENTRY_POINT_RENAME(ucol_countAvailable)
+#define ucol_equal U_ICU_ENTRY_POINT_RENAME(ucol_equal)
+#define ucol_equals U_ICU_ENTRY_POINT_RENAME(ucol_equals)
+#define ucol_getAttribute U_ICU_ENTRY_POINT_RENAME(ucol_getAttribute)
+#define ucol_getAvailable U_ICU_ENTRY_POINT_RENAME(ucol_getAvailable)
+#define ucol_getBound U_ICU_ENTRY_POINT_RENAME(ucol_getBound)
+#define ucol_getContractions U_ICU_ENTRY_POINT_RENAME(ucol_getContractions)
+#define ucol_getContractionsAndExpansions U_ICU_ENTRY_POINT_RENAME(ucol_getContractionsAndExpansions)
+#define ucol_getDisplayName U_ICU_ENTRY_POINT_RENAME(ucol_getDisplayName)
+#define ucol_getEquivalentReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_getEquivalentReorderCodes)
+#define ucol_getFunctionalEquivalent U_ICU_ENTRY_POINT_RENAME(ucol_getFunctionalEquivalent)
+#define ucol_getKeywordValues U_ICU_ENTRY_POINT_RENAME(ucol_getKeywordValues)
+#define ucol_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucol_getKeywordValuesForLocale)
+#define ucol_getKeywords U_ICU_ENTRY_POINT_RENAME(ucol_getKeywords)
+#define ucol_getLocale U_ICU_ENTRY_POINT_RENAME(ucol_getLocale)
+#define ucol_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ucol_getLocaleByType)
+#define ucol_getMaxExpansion U_ICU_ENTRY_POINT_RENAME(ucol_getMaxExpansion)
+#define ucol_getMaxVariable U_ICU_ENTRY_POINT_RENAME(ucol_getMaxVariable)
+#define ucol_getOffset U_ICU_ENTRY_POINT_RENAME(ucol_getOffset)
+#define ucol_getReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_getReorderCodes)
+#define ucol_getRules U_ICU_ENTRY_POINT_RENAME(ucol_getRules)
+#define ucol_getRulesEx U_ICU_ENTRY_POINT_RENAME(ucol_getRulesEx)
+#define ucol_getShortDefinitionString U_ICU_ENTRY_POINT_RENAME(ucol_getShortDefinitionString)
+#define ucol_getSortKey U_ICU_ENTRY_POINT_RENAME(ucol_getSortKey)
+#define ucol_getStrength U_ICU_ENTRY_POINT_RENAME(ucol_getStrength)
+#define ucol_getTailoredSet U_ICU_ENTRY_POINT_RENAME(ucol_getTailoredSet)
+#define ucol_getUCAVersion U_ICU_ENTRY_POINT_RENAME(ucol_getUCAVersion)
+#define ucol_getUnsafeSet U_ICU_ENTRY_POINT_RENAME(ucol_getUnsafeSet)
+#define ucol_getVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_getVariableTop)
+#define ucol_getVersion U_ICU_ENTRY_POINT_RENAME(ucol_getVersion)
+#define ucol_greater U_ICU_ENTRY_POINT_RENAME(ucol_greater)
+#define ucol_greaterOrEqual U_ICU_ENTRY_POINT_RENAME(ucol_greaterOrEqual)
+#define ucol_keyHashCode U_ICU_ENTRY_POINT_RENAME(ucol_keyHashCode)
+#define ucol_looksLikeCollationBinary U_ICU_ENTRY_POINT_RENAME(ucol_looksLikeCollationBinary)
+#define ucol_mergeSortkeys U_ICU_ENTRY_POINT_RENAME(ucol_mergeSortkeys)
+#define ucol_next U_ICU_ENTRY_POINT_RENAME(ucol_next)
+#define ucol_nextSortKeyPart U_ICU_ENTRY_POINT_RENAME(ucol_nextSortKeyPart)
+#define ucol_normalizeShortDefinitionString U_ICU_ENTRY_POINT_RENAME(ucol_normalizeShortDefinitionString)
+#define ucol_open U_ICU_ENTRY_POINT_RENAME(ucol_open)
+#define ucol_openAvailableLocales U_ICU_ENTRY_POINT_RENAME(ucol_openAvailableLocales)
+#define ucol_openBinary U_ICU_ENTRY_POINT_RENAME(ucol_openBinary)
+#define ucol_openElements U_ICU_ENTRY_POINT_RENAME(ucol_openElements)
+#define ucol_openFromShortString U_ICU_ENTRY_POINT_RENAME(ucol_openFromShortString)
+#define ucol_openRules U_ICU_ENTRY_POINT_RENAME(ucol_openRules)
+#define ucol_prepareShortStringOpen U_ICU_ENTRY_POINT_RENAME(ucol_prepareShortStringOpen)
+#define ucol_previous U_ICU_ENTRY_POINT_RENAME(ucol_previous)
+#define ucol_primaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_primaryOrder)
+#define ucol_reset U_ICU_ENTRY_POINT_RENAME(ucol_reset)
+#define ucol_restoreVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_restoreVariableTop)
+#define ucol_safeClone U_ICU_ENTRY_POINT_RENAME(ucol_safeClone)
+#define ucol_secondaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_secondaryOrder)
+#define ucol_setAttribute U_ICU_ENTRY_POINT_RENAME(ucol_setAttribute)
+#define ucol_setMaxVariable U_ICU_ENTRY_POINT_RENAME(ucol_setMaxVariable)
+#define ucol_setOffset U_ICU_ENTRY_POINT_RENAME(ucol_setOffset)
+#define ucol_setReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_setReorderCodes)
+#define ucol_setStrength U_ICU_ENTRY_POINT_RENAME(ucol_setStrength)
+#define ucol_setText U_ICU_ENTRY_POINT_RENAME(ucol_setText)
+#define ucol_setVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_setVariableTop)
+#define ucol_strcoll U_ICU_ENTRY_POINT_RENAME(ucol_strcoll)
+#define ucol_strcollIter U_ICU_ENTRY_POINT_RENAME(ucol_strcollIter)
+#define ucol_strcollUTF8 U_ICU_ENTRY_POINT_RENAME(ucol_strcollUTF8)
+#define ucol_swap U_ICU_ENTRY_POINT_RENAME(ucol_swap)
+#define ucol_swapInverseUCA U_ICU_ENTRY_POINT_RENAME(ucol_swapInverseUCA)
+#define ucol_tertiaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_tertiaryOrder)
+#define ucpmap_get U_ICU_ENTRY_POINT_RENAME(ucpmap_get)
+#define ucpmap_getRange U_ICU_ENTRY_POINT_RENAME(ucpmap_getRange)
+#define ucptrie_close U_ICU_ENTRY_POINT_RENAME(ucptrie_close)
+#define ucptrie_get U_ICU_ENTRY_POINT_RENAME(ucptrie_get)
+#define ucptrie_getRange U_ICU_ENTRY_POINT_RENAME(ucptrie_getRange)
+#define ucptrie_getType U_ICU_ENTRY_POINT_RENAME(ucptrie_getType)
+#define ucptrie_getValueWidth U_ICU_ENTRY_POINT_RENAME(ucptrie_getValueWidth)
+#define ucptrie_internalGetRange U_ICU_ENTRY_POINT_RENAME(ucptrie_internalGetRange)
+#define ucptrie_internalSmallIndex U_ICU_ENTRY_POINT_RENAME(ucptrie_internalSmallIndex)
+#define ucptrie_internalSmallU8Index U_ICU_ENTRY_POINT_RENAME(ucptrie_internalSmallU8Index)
+#define ucptrie_internalU8PrevIndex U_ICU_ENTRY_POINT_RENAME(ucptrie_internalU8PrevIndex)
+#define ucptrie_openFromBinary U_ICU_ENTRY_POINT_RENAME(ucptrie_openFromBinary)
+#define ucptrie_swap U_ICU_ENTRY_POINT_RENAME(ucptrie_swap)
+#define ucptrie_toBinary U_ICU_ENTRY_POINT_RENAME(ucptrie_toBinary)
+#define ucsdet_close U_ICU_ENTRY_POINT_RENAME(ucsdet_close)
+#define ucsdet_detect U_ICU_ENTRY_POINT_RENAME(ucsdet_detect)
+#define ucsdet_detectAll U_ICU_ENTRY_POINT_RENAME(ucsdet_detectAll)
+#define ucsdet_enableInputFilter U_ICU_ENTRY_POINT_RENAME(ucsdet_enableInputFilter)
+#define ucsdet_getAllDetectableCharsets U_ICU_ENTRY_POINT_RENAME(ucsdet_getAllDetectableCharsets)
+#define ucsdet_getConfidence U_ICU_ENTRY_POINT_RENAME(ucsdet_getConfidence)
+#define ucsdet_getDetectableCharsets U_ICU_ENTRY_POINT_RENAME(ucsdet_getDetectableCharsets)
+#define ucsdet_getLanguage U_ICU_ENTRY_POINT_RENAME(ucsdet_getLanguage)
+#define ucsdet_getName U_ICU_ENTRY_POINT_RENAME(ucsdet_getName)
+#define ucsdet_getUChars U_ICU_ENTRY_POINT_RENAME(ucsdet_getUChars)
+#define ucsdet_isInputFilterEnabled U_ICU_ENTRY_POINT_RENAME(ucsdet_isInputFilterEnabled)
+#define ucsdet_open U_ICU_ENTRY_POINT_RENAME(ucsdet_open)
+#define ucsdet_setDeclaredEncoding U_ICU_ENTRY_POINT_RENAME(ucsdet_setDeclaredEncoding)
+#define ucsdet_setDetectableCharset U_ICU_ENTRY_POINT_RENAME(ucsdet_setDetectableCharset)
+#define ucsdet_setText U_ICU_ENTRY_POINT_RENAME(ucsdet_setText)
+#define ucurr_countCurrencies U_ICU_ENTRY_POINT_RENAME(ucurr_countCurrencies)
+#define ucurr_forLocale U_ICU_ENTRY_POINT_RENAME(ucurr_forLocale)
+#define ucurr_forLocaleAndDate U_ICU_ENTRY_POINT_RENAME(ucurr_forLocaleAndDate)
+#define ucurr_getDefaultFractionDigits U_ICU_ENTRY_POINT_RENAME(ucurr_getDefaultFractionDigits)
+#define ucurr_getDefaultFractionDigitsForUsage U_ICU_ENTRY_POINT_RENAME(ucurr_getDefaultFractionDigitsForUsage)
+#define ucurr_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucurr_getKeywordValuesForLocale)
+#define ucurr_getName U_ICU_ENTRY_POINT_RENAME(ucurr_getName)
+#define ucurr_getNumericCode U_ICU_ENTRY_POINT_RENAME(ucurr_getNumericCode)
+#define ucurr_getPluralName U_ICU_ENTRY_POINT_RENAME(ucurr_getPluralName)
+#define ucurr_getRoundingIncrement U_ICU_ENTRY_POINT_RENAME(ucurr_getRoundingIncrement)
+#define ucurr_getRoundingIncrementForUsage U_ICU_ENTRY_POINT_RENAME(ucurr_getRoundingIncrementForUsage)
+#define ucurr_isAvailable U_ICU_ENTRY_POINT_RENAME(ucurr_isAvailable)
+#define ucurr_openISOCurrencies U_ICU_ENTRY_POINT_RENAME(ucurr_openISOCurrencies)
+#define ucurr_register U_ICU_ENTRY_POINT_RENAME(ucurr_register)
+#define ucurr_unregister U_ICU_ENTRY_POINT_RENAME(ucurr_unregister)
+#define udat_adoptNumberFormat U_ICU_ENTRY_POINT_RENAME(udat_adoptNumberFormat)
+#define udat_adoptNumberFormatForFields U_ICU_ENTRY_POINT_RENAME(udat_adoptNumberFormatForFields)
+#define udat_applyPattern U_ICU_ENTRY_POINT_RENAME(udat_applyPattern)
+#define udat_applyPatternRelative U_ICU_ENTRY_POINT_RENAME(udat_applyPatternRelative)
+#define udat_clone U_ICU_ENTRY_POINT_RENAME(udat_clone)
+#define udat_close U_ICU_ENTRY_POINT_RENAME(udat_close)
+#define udat_countAvailable U_ICU_ENTRY_POINT_RENAME(udat_countAvailable)
+#define udat_countSymbols U_ICU_ENTRY_POINT_RENAME(udat_countSymbols)
+#define udat_format U_ICU_ENTRY_POINT_RENAME(udat_format)
+#define udat_formatCalendar U_ICU_ENTRY_POINT_RENAME(udat_formatCalendar)
+#define udat_formatCalendarForFields U_ICU_ENTRY_POINT_RENAME(udat_formatCalendarForFields)
+#define udat_formatForFields U_ICU_ENTRY_POINT_RENAME(udat_formatForFields)
+#define udat_get2DigitYearStart U_ICU_ENTRY_POINT_RENAME(udat_get2DigitYearStart)
+#define udat_getAvailable U_ICU_ENTRY_POINT_RENAME(udat_getAvailable)
+#define udat_getBooleanAttribute U_ICU_ENTRY_POINT_RENAME(udat_getBooleanAttribute)
+#define udat_getCalendar U_ICU_ENTRY_POINT_RENAME(udat_getCalendar)
+#define udat_getContext U_ICU_ENTRY_POINT_RENAME(udat_getContext)
+#define udat_getLocaleByType U_ICU_ENTRY_POINT_RENAME(udat_getLocaleByType)
+#define udat_getNumberFormat U_ICU_ENTRY_POINT_RENAME(udat_getNumberFormat)
+#define udat_getNumberFormatForField U_ICU_ENTRY_POINT_RENAME(udat_getNumberFormatForField)
+#define udat_getSymbols U_ICU_ENTRY_POINT_RENAME(udat_getSymbols)
+#define udat_isLenient U_ICU_ENTRY_POINT_RENAME(udat_isLenient)
+#define udat_open U_ICU_ENTRY_POINT_RENAME(udat_open)
+#define udat_parse U_ICU_ENTRY_POINT_RENAME(udat_parse)
+#define udat_parseCalendar U_ICU_ENTRY_POINT_RENAME(udat_parseCalendar)
+#define udat_registerOpener U_ICU_ENTRY_POINT_RENAME(udat_registerOpener)
+#define udat_set2DigitYearStart U_ICU_ENTRY_POINT_RENAME(udat_set2DigitYearStart)
+#define udat_setBooleanAttribute U_ICU_ENTRY_POINT_RENAME(udat_setBooleanAttribute)
+#define udat_setCalendar U_ICU_ENTRY_POINT_RENAME(udat_setCalendar)
+#define udat_setContext U_ICU_ENTRY_POINT_RENAME(udat_setContext)
+#define udat_setLenient U_ICU_ENTRY_POINT_RENAME(udat_setLenient)
+#define udat_setNumberFormat U_ICU_ENTRY_POINT_RENAME(udat_setNumberFormat)
+#define udat_setSymbols U_ICU_ENTRY_POINT_RENAME(udat_setSymbols)
+#define udat_toCalendarDateField U_ICU_ENTRY_POINT_RENAME(udat_toCalendarDateField)
+#define udat_toPattern U_ICU_ENTRY_POINT_RENAME(udat_toPattern)
+#define udat_toPatternRelativeDate U_ICU_ENTRY_POINT_RENAME(udat_toPatternRelativeDate)
+#define udat_toPatternRelativeTime U_ICU_ENTRY_POINT_RENAME(udat_toPatternRelativeTime)
+#define udat_unregisterOpener U_ICU_ENTRY_POINT_RENAME(udat_unregisterOpener)
+#define udata_checkCommonData U_ICU_ENTRY_POINT_RENAME(udata_checkCommonData)
+#define udata_close U_ICU_ENTRY_POINT_RENAME(udata_close)
+#define udata_closeSwapper U_ICU_ENTRY_POINT_RENAME(udata_closeSwapper)
+#define udata_getHeaderSize U_ICU_ENTRY_POINT_RENAME(udata_getHeaderSize)
+#define udata_getInfo U_ICU_ENTRY_POINT_RENAME(udata_getInfo)
+#define udata_getInfoSize U_ICU_ENTRY_POINT_RENAME(udata_getInfoSize)
+#define udata_getLength U_ICU_ENTRY_POINT_RENAME(udata_getLength)
+#define udata_getMemory U_ICU_ENTRY_POINT_RENAME(udata_getMemory)
+#define udata_getRawMemory U_ICU_ENTRY_POINT_RENAME(udata_getRawMemory)
+#define udata_open U_ICU_ENTRY_POINT_RENAME(udata_open)
+#define udata_openChoice U_ICU_ENTRY_POINT_RENAME(udata_openChoice)
+#define udata_openSwapper U_ICU_ENTRY_POINT_RENAME(udata_openSwapper)
+#define udata_openSwapperForInputData U_ICU_ENTRY_POINT_RENAME(udata_openSwapperForInputData)
+#define udata_printError U_ICU_ENTRY_POINT_RENAME(udata_printError)
+#define udata_readInt16 U_ICU_ENTRY_POINT_RENAME(udata_readInt16)
+#define udata_readInt32 U_ICU_ENTRY_POINT_RENAME(udata_readInt32)
+#define udata_setAppData U_ICU_ENTRY_POINT_RENAME(udata_setAppData)
+#define udata_setCommonData U_ICU_ENTRY_POINT_RENAME(udata_setCommonData)
+#define udata_setFileAccess U_ICU_ENTRY_POINT_RENAME(udata_setFileAccess)
+#define udata_swapDataHeader U_ICU_ENTRY_POINT_RENAME(udata_swapDataHeader)
+#define udata_swapInvStringBlock U_ICU_ENTRY_POINT_RENAME(udata_swapInvStringBlock)
+#define udatpg_addPattern U_ICU_ENTRY_POINT_RENAME(udatpg_addPattern)
+#define udatpg_clone U_ICU_ENTRY_POINT_RENAME(udatpg_clone)
+#define udatpg_close U_ICU_ENTRY_POINT_RENAME(udatpg_close)
+#define udatpg_getAppendItemFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getAppendItemFormat)
+#define udatpg_getAppendItemName U_ICU_ENTRY_POINT_RENAME(udatpg_getAppendItemName)
+#define udatpg_getBaseSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getBaseSkeleton)
+#define udatpg_getBestPattern U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPattern)
+#define udatpg_getBestPatternWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPatternWithOptions)
+#define udatpg_getDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getDateTimeFormat)
+#define udatpg_getDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_getDecimal)
+#define udatpg_getDefaultHourCycle U_ICU_ENTRY_POINT_RENAME(udatpg_getDefaultHourCycle)
+#define udatpg_getFieldDisplayName U_ICU_ENTRY_POINT_RENAME(udatpg_getFieldDisplayName)
+#define udatpg_getPatternForSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getPatternForSkeleton)
+#define udatpg_getSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getSkeleton)
+#define udatpg_open U_ICU_ENTRY_POINT_RENAME(udatpg_open)
+#define udatpg_openBaseSkeletons U_ICU_ENTRY_POINT_RENAME(udatpg_openBaseSkeletons)
+#define udatpg_openEmpty U_ICU_ENTRY_POINT_RENAME(udatpg_openEmpty)
+#define udatpg_openSkeletons U_ICU_ENTRY_POINT_RENAME(udatpg_openSkeletons)
+#define udatpg_replaceFieldTypes U_ICU_ENTRY_POINT_RENAME(udatpg_replaceFieldTypes)
+#define udatpg_replaceFieldTypesWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_replaceFieldTypesWithOptions)
+#define udatpg_setAppendItemFormat U_ICU_ENTRY_POINT_RENAME(udatpg_setAppendItemFormat)
+#define udatpg_setAppendItemName U_ICU_ENTRY_POINT_RENAME(udatpg_setAppendItemName)
+#define udatpg_setDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_setDateTimeFormat)
+#define udatpg_setDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_setDecimal)
+#define udict_swap U_ICU_ENTRY_POINT_RENAME(udict_swap)
+#define udtitvfmt_close U_ICU_ENTRY_POINT_RENAME(udtitvfmt_close)
+#define udtitvfmt_closeResult U_ICU_ENTRY_POINT_RENAME(udtitvfmt_closeResult)
+#define udtitvfmt_format U_ICU_ENTRY_POINT_RENAME(udtitvfmt_format)
+#define udtitvfmt_formatCalendarToResult U_ICU_ENTRY_POINT_RENAME(udtitvfmt_formatCalendarToResult)
+#define udtitvfmt_formatToResult U_ICU_ENTRY_POINT_RENAME(udtitvfmt_formatToResult)
+#define udtitvfmt_getContext U_ICU_ENTRY_POINT_RENAME(udtitvfmt_getContext)
+#define udtitvfmt_open U_ICU_ENTRY_POINT_RENAME(udtitvfmt_open)
+#define udtitvfmt_openResult U_ICU_ENTRY_POINT_RENAME(udtitvfmt_openResult)
+#define udtitvfmt_resultAsValue U_ICU_ENTRY_POINT_RENAME(udtitvfmt_resultAsValue)
+#define udtitvfmt_setContext U_ICU_ENTRY_POINT_RENAME(udtitvfmt_setContext)
+#define uenum_close U_ICU_ENTRY_POINT_RENAME(uenum_close)
+#define uenum_count U_ICU_ENTRY_POINT_RENAME(uenum_count)
+#define uenum_next U_ICU_ENTRY_POINT_RENAME(uenum_next)
+#define uenum_nextDefault U_ICU_ENTRY_POINT_RENAME(uenum_nextDefault)
+#define uenum_openCharStringsEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openCharStringsEnumeration)
+#define uenum_openFromStringEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openFromStringEnumeration)
+#define uenum_openUCharStringsEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openUCharStringsEnumeration)
+#define uenum_reset U_ICU_ENTRY_POINT_RENAME(uenum_reset)
+#define uenum_unext U_ICU_ENTRY_POINT_RENAME(uenum_unext)
+#define uenum_unextDefault U_ICU_ENTRY_POINT_RENAME(uenum_unextDefault)
+#define ufieldpositer_close U_ICU_ENTRY_POINT_RENAME(ufieldpositer_close)
+#define ufieldpositer_next U_ICU_ENTRY_POINT_RENAME(ufieldpositer_next)
+#define ufieldpositer_open U_ICU_ENTRY_POINT_RENAME(ufieldpositer_open)
+#define ufile_getch U_ICU_ENTRY_POINT_RENAME(ufile_getch)
+#define ufile_getch32 U_ICU_ENTRY_POINT_RENAME(ufile_getch32)
+#define ufmt_close U_ICU_ENTRY_POINT_RENAME(ufmt_close)
+#define ufmt_getArrayItemByIndex U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayItemByIndex)
+#define ufmt_getArrayLength U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayLength)
+#define ufmt_getDate U_ICU_ENTRY_POINT_RENAME(ufmt_getDate)
+#define ufmt_getDecNumChars U_ICU_ENTRY_POINT_RENAME(ufmt_getDecNumChars)
+#define ufmt_getDouble U_ICU_ENTRY_POINT_RENAME(ufmt_getDouble)
+#define ufmt_getInt64 U_ICU_ENTRY_POINT_RENAME(ufmt_getInt64)
+#define ufmt_getLong U_ICU_ENTRY_POINT_RENAME(ufmt_getLong)
+#define ufmt_getObject U_ICU_ENTRY_POINT_RENAME(ufmt_getObject)
+#define ufmt_getType U_ICU_ENTRY_POINT_RENAME(ufmt_getType)
+#define ufmt_getUChars U_ICU_ENTRY_POINT_RENAME(ufmt_getUChars)
+#define ufmt_isNumeric U_ICU_ENTRY_POINT_RENAME(ufmt_isNumeric)
+#define ufmt_open U_ICU_ENTRY_POINT_RENAME(ufmt_open)
+#define ufmtval_getString U_ICU_ENTRY_POINT_RENAME(ufmtval_getString)
+#define ufmtval_nextPosition U_ICU_ENTRY_POINT_RENAME(ufmtval_nextPosition)
+#define ugender_getInstance U_ICU_ENTRY_POINT_RENAME(ugender_getInstance)
+#define ugender_getListGender U_ICU_ENTRY_POINT_RENAME(ugender_getListGender)
+#define uhash_close U_ICU_ENTRY_POINT_RENAME(uhash_close)
+#define uhash_compareCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_compareCaselessUnicodeString)
+#define uhash_compareChars U_ICU_ENTRY_POINT_RENAME(uhash_compareChars)
+#define uhash_compareIChars U_ICU_ENTRY_POINT_RENAME(uhash_compareIChars)
+#define uhash_compareLong U_ICU_ENTRY_POINT_RENAME(uhash_compareLong)
+#define uhash_compareScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_compareScriptSet)
+#define uhash_compareUChars U_ICU_ENTRY_POINT_RENAME(uhash_compareUChars)
+#define uhash_compareUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_compareUnicodeString)
+#define uhash_count U_ICU_ENTRY_POINT_RENAME(uhash_count)
+#define uhash_deleteHashtable U_ICU_ENTRY_POINT_RENAME(uhash_deleteHashtable)
+#define uhash_deleteScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_deleteScriptSet)
+#define uhash_equals U_ICU_ENTRY_POINT_RENAME(uhash_equals)
+#define uhash_equalsScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_equalsScriptSet)
+#define uhash_find U_ICU_ENTRY_POINT_RENAME(uhash_find)
+#define uhash_get U_ICU_ENTRY_POINT_RENAME(uhash_get)
+#define uhash_geti U_ICU_ENTRY_POINT_RENAME(uhash_geti)
+#define uhash_hashCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashCaselessUnicodeString)
+#define uhash_hashChars U_ICU_ENTRY_POINT_RENAME(uhash_hashChars)
+#define uhash_hashIChars U_ICU_ENTRY_POINT_RENAME(uhash_hashIChars)
+#define uhash_hashLong U_ICU_ENTRY_POINT_RENAME(uhash_hashLong)
+#define uhash_hashScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_hashScriptSet)
+#define uhash_hashUChars U_ICU_ENTRY_POINT_RENAME(uhash_hashUChars)
+#define uhash_hashUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashUnicodeString)
+#define uhash_iget U_ICU_ENTRY_POINT_RENAME(uhash_iget)
+#define uhash_igeti U_ICU_ENTRY_POINT_RENAME(uhash_igeti)
+#define uhash_init U_ICU_ENTRY_POINT_RENAME(uhash_init)
+#define uhash_initSize U_ICU_ENTRY_POINT_RENAME(uhash_initSize)
+#define uhash_iput U_ICU_ENTRY_POINT_RENAME(uhash_iput)
+#define uhash_iputi U_ICU_ENTRY_POINT_RENAME(uhash_iputi)
+#define uhash_iremove U_ICU_ENTRY_POINT_RENAME(uhash_iremove)
+#define uhash_iremovei U_ICU_ENTRY_POINT_RENAME(uhash_iremovei)
+#define uhash_nextElement U_ICU_ENTRY_POINT_RENAME(uhash_nextElement)
+#define uhash_open U_ICU_ENTRY_POINT_RENAME(uhash_open)
+#define uhash_openSize U_ICU_ENTRY_POINT_RENAME(uhash_openSize)
+#define uhash_put U_ICU_ENTRY_POINT_RENAME(uhash_put)
+#define uhash_puti U_ICU_ENTRY_POINT_RENAME(uhash_puti)
+#define uhash_remove U_ICU_ENTRY_POINT_RENAME(uhash_remove)
+#define uhash_removeAll U_ICU_ENTRY_POINT_RENAME(uhash_removeAll)
+#define uhash_removeElement U_ICU_ENTRY_POINT_RENAME(uhash_removeElement)
+#define uhash_removei U_ICU_ENTRY_POINT_RENAME(uhash_removei)
+#define uhash_setKeyComparator U_ICU_ENTRY_POINT_RENAME(uhash_setKeyComparator)
+#define uhash_setKeyDeleter U_ICU_ENTRY_POINT_RENAME(uhash_setKeyDeleter)
+#define uhash_setKeyHasher U_ICU_ENTRY_POINT_RENAME(uhash_setKeyHasher)
+#define uhash_setResizePolicy U_ICU_ENTRY_POINT_RENAME(uhash_setResizePolicy)
+#define uhash_setValueComparator U_ICU_ENTRY_POINT_RENAME(uhash_setValueComparator)
+#define uhash_setValueDeleter U_ICU_ENTRY_POINT_RENAME(uhash_setValueDeleter)
+#define uidna_IDNToASCII U_ICU_ENTRY_POINT_RENAME(uidna_IDNToASCII)
+#define uidna_IDNToUnicode U_ICU_ENTRY_POINT_RENAME(uidna_IDNToUnicode)
+#define uidna_close U_ICU_ENTRY_POINT_RENAME(uidna_close)
+#define uidna_compare U_ICU_ENTRY_POINT_RENAME(uidna_compare)
+#define uidna_labelToASCII U_ICU_ENTRY_POINT_RENAME(uidna_labelToASCII)
+#define uidna_labelToASCII_UTF8 U_ICU_ENTRY_POINT_RENAME(uidna_labelToASCII_UTF8)
+#define uidna_labelToUnicode U_ICU_ENTRY_POINT_RENAME(uidna_labelToUnicode)
+#define uidna_labelToUnicodeUTF8 U_ICU_ENTRY_POINT_RENAME(uidna_labelToUnicodeUTF8)
+#define uidna_nameToASCII U_ICU_ENTRY_POINT_RENAME(uidna_nameToASCII)
+#define uidna_nameToASCII_UTF8 U_ICU_ENTRY_POINT_RENAME(uidna_nameToASCII_UTF8)
+#define uidna_nameToUnicode U_ICU_ENTRY_POINT_RENAME(uidna_nameToUnicode)
+#define uidna_nameToUnicodeUTF8 U_ICU_ENTRY_POINT_RENAME(uidna_nameToUnicodeUTF8)
+#define uidna_openUTS46 U_ICU_ENTRY_POINT_RENAME(uidna_openUTS46)
+#define uidna_toASCII U_ICU_ENTRY_POINT_RENAME(uidna_toASCII)
+#define uidna_toUnicode U_ICU_ENTRY_POINT_RENAME(uidna_toUnicode)
+#define uiter_current32 U_ICU_ENTRY_POINT_RENAME(uiter_current32)
+#define uiter_getState U_ICU_ENTRY_POINT_RENAME(uiter_getState)
+#define uiter_next32 U_ICU_ENTRY_POINT_RENAME(uiter_next32)
+#define uiter_previous32 U_ICU_ENTRY_POINT_RENAME(uiter_previous32)
+#define uiter_setCharacterIterator U_ICU_ENTRY_POINT_RENAME(uiter_setCharacterIterator)
+#define uiter_setReplaceable U_ICU_ENTRY_POINT_RENAME(uiter_setReplaceable)
+#define uiter_setState U_ICU_ENTRY_POINT_RENAME(uiter_setState)
+#define uiter_setString U_ICU_ENTRY_POINT_RENAME(uiter_setString)
+#define uiter_setUTF16BE U_ICU_ENTRY_POINT_RENAME(uiter_setUTF16BE)
+#define uiter_setUTF8 U_ICU_ENTRY_POINT_RENAME(uiter_setUTF8)
+#define uldn_close U_ICU_ENTRY_POINT_RENAME(uldn_close)
+#define uldn_getContext U_ICU_ENTRY_POINT_RENAME(uldn_getContext)
+#define uldn_getDialectHandling U_ICU_ENTRY_POINT_RENAME(uldn_getDialectHandling)
+#define uldn_getLocale U_ICU_ENTRY_POINT_RENAME(uldn_getLocale)
+#define uldn_keyDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_keyDisplayName)
+#define uldn_keyValueDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_keyValueDisplayName)
+#define uldn_languageDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_languageDisplayName)
+#define uldn_localeDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_localeDisplayName)
+#define uldn_open U_ICU_ENTRY_POINT_RENAME(uldn_open)
+#define uldn_openForContext U_ICU_ENTRY_POINT_RENAME(uldn_openForContext)
+#define uldn_regionDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_regionDisplayName)
+#define uldn_scriptCodeDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_scriptCodeDisplayName)
+#define uldn_scriptDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_scriptDisplayName)
+#define uldn_variantDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_variantDisplayName)
+#define ulist_addItemBeginList U_ICU_ENTRY_POINT_RENAME(ulist_addItemBeginList)
+#define ulist_addItemEndList U_ICU_ENTRY_POINT_RENAME(ulist_addItemEndList)
+#define ulist_close_keyword_values_iterator U_ICU_ENTRY_POINT_RENAME(ulist_close_keyword_values_iterator)
+#define ulist_containsString U_ICU_ENTRY_POINT_RENAME(ulist_containsString)
+#define ulist_count_keyword_values U_ICU_ENTRY_POINT_RENAME(ulist_count_keyword_values)
+#define ulist_createEmptyList U_ICU_ENTRY_POINT_RENAME(ulist_createEmptyList)
+#define ulist_deleteList U_ICU_ENTRY_POINT_RENAME(ulist_deleteList)
+#define ulist_getListFromEnum U_ICU_ENTRY_POINT_RENAME(ulist_getListFromEnum)
+#define ulist_getListSize U_ICU_ENTRY_POINT_RENAME(ulist_getListSize)
+#define ulist_getNext U_ICU_ENTRY_POINT_RENAME(ulist_getNext)
+#define ulist_next_keyword_value U_ICU_ENTRY_POINT_RENAME(ulist_next_keyword_value)
+#define ulist_removeString U_ICU_ENTRY_POINT_RENAME(ulist_removeString)
+#define ulist_resetList U_ICU_ENTRY_POINT_RENAME(ulist_resetList)
+#define ulist_reset_keyword_values_iterator U_ICU_ENTRY_POINT_RENAME(ulist_reset_keyword_values_iterator)
+#define ulistfmt_close U_ICU_ENTRY_POINT_RENAME(ulistfmt_close)
+#define ulistfmt_closeResult U_ICU_ENTRY_POINT_RENAME(ulistfmt_closeResult)
+#define ulistfmt_format U_ICU_ENTRY_POINT_RENAME(ulistfmt_format)
+#define ulistfmt_formatStringsToResult U_ICU_ENTRY_POINT_RENAME(ulistfmt_formatStringsToResult)
+#define ulistfmt_open U_ICU_ENTRY_POINT_RENAME(ulistfmt_open)
+#define ulistfmt_openForType U_ICU_ENTRY_POINT_RENAME(ulistfmt_openForType)
+#define ulistfmt_openResult U_ICU_ENTRY_POINT_RENAME(ulistfmt_openResult)
+#define ulistfmt_resultAsValue U_ICU_ENTRY_POINT_RENAME(ulistfmt_resultAsValue)
+#define uloc_acceptLanguage U_ICU_ENTRY_POINT_RENAME(uloc_acceptLanguage)
+#define uloc_acceptLanguageFromHTTP U_ICU_ENTRY_POINT_RENAME(uloc_acceptLanguageFromHTTP)
+#define uloc_addLikelySubtags U_ICU_ENTRY_POINT_RENAME(uloc_addLikelySubtags)
+#define uloc_canonicalize U_ICU_ENTRY_POINT_RENAME(uloc_canonicalize)
+#define uloc_countAvailable U_ICU_ENTRY_POINT_RENAME(uloc_countAvailable)
+#define uloc_forLanguageTag U_ICU_ENTRY_POINT_RENAME(uloc_forLanguageTag)
+#define uloc_getAvailable U_ICU_ENTRY_POINT_RENAME(uloc_getAvailable)
+#define uloc_getBaseName U_ICU_ENTRY_POINT_RENAME(uloc_getBaseName)
+#define uloc_getCharacterOrientation U_ICU_ENTRY_POINT_RENAME(uloc_getCharacterOrientation)
+#define uloc_getCountry U_ICU_ENTRY_POINT_RENAME(uloc_getCountry)
+#define uloc_getCurrentCountryID U_ICU_ENTRY_POINT_RENAME(uloc_getCurrentCountryID)
+#define uloc_getCurrentLanguageID U_ICU_ENTRY_POINT_RENAME(uloc_getCurrentLanguageID)
+#define uloc_getDefault U_ICU_ENTRY_POINT_RENAME(uloc_getDefault)
+#define uloc_getDisplayCountry U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayCountry)
+#define uloc_getDisplayKeyword U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayKeyword)
+#define uloc_getDisplayKeywordValue U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayKeywordValue)
+#define uloc_getDisplayLanguage U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayLanguage)
+#define uloc_getDisplayName U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayName)
+#define uloc_getDisplayScript U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayScript)
+#define uloc_getDisplayVariant U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayVariant)
+#define uloc_getISO3Country U_ICU_ENTRY_POINT_RENAME(uloc_getISO3Country)
+#define uloc_getISO3Language U_ICU_ENTRY_POINT_RENAME(uloc_getISO3Language)
+#define uloc_getISOCountries U_ICU_ENTRY_POINT_RENAME(uloc_getISOCountries)
+#define uloc_getISOLanguages U_ICU_ENTRY_POINT_RENAME(uloc_getISOLanguages)
+#define uloc_getKeywordValue U_ICU_ENTRY_POINT_RENAME(uloc_getKeywordValue)
+#define uloc_getLCID U_ICU_ENTRY_POINT_RENAME(uloc_getLCID)
+#define uloc_getLanguage U_ICU_ENTRY_POINT_RENAME(uloc_getLanguage)
+#define uloc_getLineOrientation U_ICU_ENTRY_POINT_RENAME(uloc_getLineOrientation)
+#define uloc_getLocaleForLCID U_ICU_ENTRY_POINT_RENAME(uloc_getLocaleForLCID)
+#define uloc_getName U_ICU_ENTRY_POINT_RENAME(uloc_getName)
+#define uloc_getParent U_ICU_ENTRY_POINT_RENAME(uloc_getParent)
+#define uloc_getScript U_ICU_ENTRY_POINT_RENAME(uloc_getScript)
+#define uloc_getTableStringWithFallback U_ICU_ENTRY_POINT_RENAME(uloc_getTableStringWithFallback)
+#define uloc_getVariant U_ICU_ENTRY_POINT_RENAME(uloc_getVariant)
+#define uloc_isRightToLeft U_ICU_ENTRY_POINT_RENAME(uloc_isRightToLeft)
+#define uloc_minimizeSubtags U_ICU_ENTRY_POINT_RENAME(uloc_minimizeSubtags)
+#define uloc_openAvailableByType U_ICU_ENTRY_POINT_RENAME(uloc_openAvailableByType)
+#define uloc_openKeywordList U_ICU_ENTRY_POINT_RENAME(uloc_openKeywordList)
+#define uloc_openKeywords U_ICU_ENTRY_POINT_RENAME(uloc_openKeywords)
+#define uloc_setDefault U_ICU_ENTRY_POINT_RENAME(uloc_setDefault)
+#define uloc_setKeywordValue U_ICU_ENTRY_POINT_RENAME(uloc_setKeywordValue)
+#define uloc_toLanguageTag U_ICU_ENTRY_POINT_RENAME(uloc_toLanguageTag)
+#define uloc_toLegacyKey U_ICU_ENTRY_POINT_RENAME(uloc_toLegacyKey)
+#define uloc_toLegacyType U_ICU_ENTRY_POINT_RENAME(uloc_toLegacyType)
+#define uloc_toUnicodeLocaleKey U_ICU_ENTRY_POINT_RENAME(uloc_toUnicodeLocaleKey)
+#define uloc_toUnicodeLocaleType U_ICU_ENTRY_POINT_RENAME(uloc_toUnicodeLocaleType)
+#define ulocdata_close U_ICU_ENTRY_POINT_RENAME(ulocdata_close)
+#define ulocdata_getCLDRVersion U_ICU_ENTRY_POINT_RENAME(ulocdata_getCLDRVersion)
+#define ulocdata_getDelimiter U_ICU_ENTRY_POINT_RENAME(ulocdata_getDelimiter)
+#define ulocdata_getExemplarSet U_ICU_ENTRY_POINT_RENAME(ulocdata_getExemplarSet)
+#define ulocdata_getLocaleDisplayPattern U_ICU_ENTRY_POINT_RENAME(ulocdata_getLocaleDisplayPattern)
+#define ulocdata_getLocaleSeparator U_ICU_ENTRY_POINT_RENAME(ulocdata_getLocaleSeparator)
+#define ulocdata_getMeasurementSystem U_ICU_ENTRY_POINT_RENAME(ulocdata_getMeasurementSystem)
+#define ulocdata_getNoSubstitute U_ICU_ENTRY_POINT_RENAME(ulocdata_getNoSubstitute)
+#define ulocdata_getPaperSize U_ICU_ENTRY_POINT_RENAME(ulocdata_getPaperSize)
+#define ulocdata_open U_ICU_ENTRY_POINT_RENAME(ulocdata_open)
+#define ulocdata_setNoSubstitute U_ICU_ENTRY_POINT_RENAME(ulocdata_setNoSubstitute)
+#define ulocimp_addLikelySubtags U_ICU_ENTRY_POINT_RENAME(ulocimp_addLikelySubtags)
+#define ulocimp_canonicalize U_ICU_ENTRY_POINT_RENAME(ulocimp_canonicalize)
+#define ulocimp_forLanguageTag U_ICU_ENTRY_POINT_RENAME(ulocimp_forLanguageTag)
+#define ulocimp_getBaseName U_ICU_ENTRY_POINT_RENAME(ulocimp_getBaseName)
+#define ulocimp_getCountry U_ICU_ENTRY_POINT_RENAME(ulocimp_getCountry)
+#define ulocimp_getKeywordValue U_ICU_ENTRY_POINT_RENAME(ulocimp_getKeywordValue)
+#define ulocimp_getKeywords U_ICU_ENTRY_POINT_RENAME(ulocimp_getKeywords)
+#define ulocimp_getKnownCanonicalizedLocaleForTest U_ICU_ENTRY_POINT_RENAME(ulocimp_getKnownCanonicalizedLocaleForTest)
+#define ulocimp_getLanguage U_ICU_ENTRY_POINT_RENAME(ulocimp_getLanguage)
+#define ulocimp_getName U_ICU_ENTRY_POINT_RENAME(ulocimp_getName)
+#define ulocimp_getRegionForSupplementalData U_ICU_ENTRY_POINT_RENAME(ulocimp_getRegionForSupplementalData)
+#define ulocimp_getScript U_ICU_ENTRY_POINT_RENAME(ulocimp_getScript)
+#define ulocimp_isCanonicalizedLocaleForTest U_ICU_ENTRY_POINT_RENAME(ulocimp_isCanonicalizedLocaleForTest)
+#define ulocimp_minimizeSubtags U_ICU_ENTRY_POINT_RENAME(ulocimp_minimizeSubtags)
+#define ulocimp_toBcpKey U_ICU_ENTRY_POINT_RENAME(ulocimp_toBcpKey)
+#define ulocimp_toBcpType U_ICU_ENTRY_POINT_RENAME(ulocimp_toBcpType)
+#define ulocimp_toLanguageTag U_ICU_ENTRY_POINT_RENAME(ulocimp_toLanguageTag)
+#define ulocimp_toLegacyKey U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyKey)
+#define ulocimp_toLegacyType U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyType)
+#define ultag_isExtensionSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isExtensionSubtags)
+#define ultag_isLanguageSubtag U_ICU_ENTRY_POINT_RENAME(ultag_isLanguageSubtag)
+#define ultag_isPrivateuseValueSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isPrivateuseValueSubtags)
+#define ultag_isRegionSubtag U_ICU_ENTRY_POINT_RENAME(ultag_isRegionSubtag)
+#define ultag_isScriptSubtag U_ICU_ENTRY_POINT_RENAME(ultag_isScriptSubtag)
+#define ultag_isTransformedExtensionSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isTransformedExtensionSubtags)
+#define ultag_isUnicodeExtensionSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeExtensionSubtags)
+#define ultag_isUnicodeLocaleAttribute U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleAttribute)
+#define ultag_isUnicodeLocaleAttributes U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleAttributes)
+#define ultag_isUnicodeLocaleKey U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleKey)
+#define ultag_isUnicodeLocaleType U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleType)
+#define ultag_isVariantSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isVariantSubtags)
+#define umsg_applyPattern U_ICU_ENTRY_POINT_RENAME(umsg_applyPattern)
+#define umsg_autoQuoteApostrophe U_ICU_ENTRY_POINT_RENAME(umsg_autoQuoteApostrophe)
+#define umsg_clone U_ICU_ENTRY_POINT_RENAME(umsg_clone)
+#define umsg_close U_ICU_ENTRY_POINT_RENAME(umsg_close)
+#define umsg_format U_ICU_ENTRY_POINT_RENAME(umsg_format)
+#define umsg_getLocale U_ICU_ENTRY_POINT_RENAME(umsg_getLocale)
+#define umsg_open U_ICU_ENTRY_POINT_RENAME(umsg_open)
+#define umsg_parse U_ICU_ENTRY_POINT_RENAME(umsg_parse)
+#define umsg_setLocale U_ICU_ENTRY_POINT_RENAME(umsg_setLocale)
+#define umsg_toPattern U_ICU_ENTRY_POINT_RENAME(umsg_toPattern)
+#define umsg_vformat U_ICU_ENTRY_POINT_RENAME(umsg_vformat)
+#define umsg_vparse U_ICU_ENTRY_POINT_RENAME(umsg_vparse)
+#define umtx_lock U_ICU_ENTRY_POINT_RENAME(umtx_lock)
+#define umtx_unlock U_ICU_ENTRY_POINT_RENAME(umtx_unlock)
+#define umutablecptrie_buildImmutable U_ICU_ENTRY_POINT_RENAME(umutablecptrie_buildImmutable)
+#define umutablecptrie_clone U_ICU_ENTRY_POINT_RENAME(umutablecptrie_clone)
+#define umutablecptrie_close U_ICU_ENTRY_POINT_RENAME(umutablecptrie_close)
+#define umutablecptrie_fromUCPMap U_ICU_ENTRY_POINT_RENAME(umutablecptrie_fromUCPMap)
+#define umutablecptrie_fromUCPTrie U_ICU_ENTRY_POINT_RENAME(umutablecptrie_fromUCPTrie)
+#define umutablecptrie_get U_ICU_ENTRY_POINT_RENAME(umutablecptrie_get)
+#define umutablecptrie_getRange U_ICU_ENTRY_POINT_RENAME(umutablecptrie_getRange)
+#define umutablecptrie_open U_ICU_ENTRY_POINT_RENAME(umutablecptrie_open)
+#define umutablecptrie_set U_ICU_ENTRY_POINT_RENAME(umutablecptrie_set)
+#define umutablecptrie_setRange U_ICU_ENTRY_POINT_RENAME(umutablecptrie_setRange)
+#define uniset_getUnicode32Instance U_ICU_ENTRY_POINT_RENAME(uniset_getUnicode32Instance)
+#define unorm2_append U_ICU_ENTRY_POINT_RENAME(unorm2_append)
+#define unorm2_close U_ICU_ENTRY_POINT_RENAME(unorm2_close)
+#define unorm2_composePair U_ICU_ENTRY_POINT_RENAME(unorm2_composePair)
+#define unorm2_getCombiningClass U_ICU_ENTRY_POINT_RENAME(unorm2_getCombiningClass)
+#define unorm2_getDecomposition U_ICU_ENTRY_POINT_RENAME(unorm2_getDecomposition)
+#define unorm2_getInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getInstance)
+#define unorm2_getNFCInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFCInstance)
+#define unorm2_getNFDInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFDInstance)
+#define unorm2_getNFKCCasefoldInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCCasefoldInstance)
+#define unorm2_getNFKCInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCInstance)
+#define unorm2_getNFKDInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKDInstance)
+#define unorm2_getRawDecomposition U_ICU_ENTRY_POINT_RENAME(unorm2_getRawDecomposition)
+#define unorm2_hasBoundaryAfter U_ICU_ENTRY_POINT_RENAME(unorm2_hasBoundaryAfter)
+#define unorm2_hasBoundaryBefore U_ICU_ENTRY_POINT_RENAME(unorm2_hasBoundaryBefore)
+#define unorm2_isInert U_ICU_ENTRY_POINT_RENAME(unorm2_isInert)
+#define unorm2_isNormalized U_ICU_ENTRY_POINT_RENAME(unorm2_isNormalized)
+#define unorm2_normalize U_ICU_ENTRY_POINT_RENAME(unorm2_normalize)
+#define unorm2_normalizeSecondAndAppend U_ICU_ENTRY_POINT_RENAME(unorm2_normalizeSecondAndAppend)
+#define unorm2_openFiltered U_ICU_ENTRY_POINT_RENAME(unorm2_openFiltered)
+#define unorm2_quickCheck U_ICU_ENTRY_POINT_RENAME(unorm2_quickCheck)
+#define unorm2_spanQuickCheckYes U_ICU_ENTRY_POINT_RENAME(unorm2_spanQuickCheckYes)
+#define unorm2_swap U_ICU_ENTRY_POINT_RENAME(unorm2_swap)
+#define unorm_compare U_ICU_ENTRY_POINT_RENAME(unorm_compare)
+#define unorm_concatenate U_ICU_ENTRY_POINT_RENAME(unorm_concatenate)
+#define unorm_getFCD16 U_ICU_ENTRY_POINT_RENAME(unorm_getFCD16)
+#define unorm_getQuickCheck U_ICU_ENTRY_POINT_RENAME(unorm_getQuickCheck)
+#define unorm_isNormalized U_ICU_ENTRY_POINT_RENAME(unorm_isNormalized)
+#define unorm_isNormalizedWithOptions U_ICU_ENTRY_POINT_RENAME(unorm_isNormalizedWithOptions)
+#define unorm_next U_ICU_ENTRY_POINT_RENAME(unorm_next)
+#define unorm_normalize U_ICU_ENTRY_POINT_RENAME(unorm_normalize)
+#define unorm_previous U_ICU_ENTRY_POINT_RENAME(unorm_previous)
+#define unorm_quickCheck U_ICU_ENTRY_POINT_RENAME(unorm_quickCheck)
+#define unorm_quickCheckWithOptions U_ICU_ENTRY_POINT_RENAME(unorm_quickCheckWithOptions)
+#define unum_applyPattern U_ICU_ENTRY_POINT_RENAME(unum_applyPattern)
+#define unum_clone U_ICU_ENTRY_POINT_RENAME(unum_clone)
+#define unum_close U_ICU_ENTRY_POINT_RENAME(unum_close)
+#define unum_countAvailable U_ICU_ENTRY_POINT_RENAME(unum_countAvailable)
+#define unum_format U_ICU_ENTRY_POINT_RENAME(unum_format)
+#define unum_formatDecimal U_ICU_ENTRY_POINT_RENAME(unum_formatDecimal)
+#define unum_formatDouble U_ICU_ENTRY_POINT_RENAME(unum_formatDouble)
+#define unum_formatDoubleCurrency U_ICU_ENTRY_POINT_RENAME(unum_formatDoubleCurrency)
+#define unum_formatDoubleForFields U_ICU_ENTRY_POINT_RENAME(unum_formatDoubleForFields)
+#define unum_formatInt64 U_ICU_ENTRY_POINT_RENAME(unum_formatInt64)
+#define unum_formatUFormattable U_ICU_ENTRY_POINT_RENAME(unum_formatUFormattable)
+#define unum_getAttribute U_ICU_ENTRY_POINT_RENAME(unum_getAttribute)
+#define unum_getAvailable U_ICU_ENTRY_POINT_RENAME(unum_getAvailable)
+#define unum_getContext U_ICU_ENTRY_POINT_RENAME(unum_getContext)
+#define unum_getDoubleAttribute U_ICU_ENTRY_POINT_RENAME(unum_getDoubleAttribute)
+#define unum_getLocaleByType U_ICU_ENTRY_POINT_RENAME(unum_getLocaleByType)
+#define unum_getSymbol U_ICU_ENTRY_POINT_RENAME(unum_getSymbol)
+#define unum_getTextAttribute U_ICU_ENTRY_POINT_RENAME(unum_getTextAttribute)
+#define unum_open U_ICU_ENTRY_POINT_RENAME(unum_open)
+#define unum_parse U_ICU_ENTRY_POINT_RENAME(unum_parse)
+#define unum_parseDecimal U_ICU_ENTRY_POINT_RENAME(unum_parseDecimal)
+#define unum_parseDouble U_ICU_ENTRY_POINT_RENAME(unum_parseDouble)
+#define unum_parseDoubleCurrency U_ICU_ENTRY_POINT_RENAME(unum_parseDoubleCurrency)
+#define unum_parseInt64 U_ICU_ENTRY_POINT_RENAME(unum_parseInt64)
+#define unum_parseToUFormattable U_ICU_ENTRY_POINT_RENAME(unum_parseToUFormattable)
+#define unum_setAttribute U_ICU_ENTRY_POINT_RENAME(unum_setAttribute)
+#define unum_setContext U_ICU_ENTRY_POINT_RENAME(unum_setContext)
+#define unum_setDoubleAttribute U_ICU_ENTRY_POINT_RENAME(unum_setDoubleAttribute)
+#define unum_setSymbol U_ICU_ENTRY_POINT_RENAME(unum_setSymbol)
+#define unum_setTextAttribute U_ICU_ENTRY_POINT_RENAME(unum_setTextAttribute)
+#define unum_toPattern U_ICU_ENTRY_POINT_RENAME(unum_toPattern)
+#define unumf_close U_ICU_ENTRY_POINT_RENAME(unumf_close)
+#define unumf_closeResult U_ICU_ENTRY_POINT_RENAME(unumf_closeResult)
+#define unumf_formatDecimal U_ICU_ENTRY_POINT_RENAME(unumf_formatDecimal)
+#define unumf_formatDouble U_ICU_ENTRY_POINT_RENAME(unumf_formatDouble)
+#define unumf_formatInt U_ICU_ENTRY_POINT_RENAME(unumf_formatInt)
+#define unumf_openForSkeletonAndLocale U_ICU_ENTRY_POINT_RENAME(unumf_openForSkeletonAndLocale)
+#define unumf_openForSkeletonAndLocaleWithError U_ICU_ENTRY_POINT_RENAME(unumf_openForSkeletonAndLocaleWithError)
+#define unumf_openResult U_ICU_ENTRY_POINT_RENAME(unumf_openResult)
+#define unumf_resultAsValue U_ICU_ENTRY_POINT_RENAME(unumf_resultAsValue)
+#define unumf_resultGetAllFieldPositions U_ICU_ENTRY_POINT_RENAME(unumf_resultGetAllFieldPositions)
+#define unumf_resultNextFieldPosition U_ICU_ENTRY_POINT_RENAME(unumf_resultNextFieldPosition)
+#define unumf_resultToDecimalNumber U_ICU_ENTRY_POINT_RENAME(unumf_resultToDecimalNumber)
+#define unumf_resultToString U_ICU_ENTRY_POINT_RENAME(unumf_resultToString)
+#define unumrf_close U_ICU_ENTRY_POINT_RENAME(unumrf_close)
+#define unumrf_closeResult U_ICU_ENTRY_POINT_RENAME(unumrf_closeResult)
+#define unumrf_formatDecimalRange U_ICU_ENTRY_POINT_RENAME(unumrf_formatDecimalRange)
+#define unumrf_formatDoubleRange U_ICU_ENTRY_POINT_RENAME(unumrf_formatDoubleRange)
+#define unumrf_openForSkeletonWithCollapseAndIdentityFallback U_ICU_ENTRY_POINT_RENAME(unumrf_openForSkeletonWithCollapseAndIdentityFallback)
+#define unumrf_openResult U_ICU_ENTRY_POINT_RENAME(unumrf_openResult)
+#define unumrf_resultAsValue U_ICU_ENTRY_POINT_RENAME(unumrf_resultAsValue)
+#define unumrf_resultGetFirstDecimalNumber U_ICU_ENTRY_POINT_RENAME(unumrf_resultGetFirstDecimalNumber)
+#define unumrf_resultGetIdentityResult U_ICU_ENTRY_POINT_RENAME(unumrf_resultGetIdentityResult)
+#define unumrf_resultGetSecondDecimalNumber U_ICU_ENTRY_POINT_RENAME(unumrf_resultGetSecondDecimalNumber)
+#define unumsys_close U_ICU_ENTRY_POINT_RENAME(unumsys_close)
+#define unumsys_getDescription U_ICU_ENTRY_POINT_RENAME(unumsys_getDescription)
+#define unumsys_getName U_ICU_ENTRY_POINT_RENAME(unumsys_getName)
+#define unumsys_getRadix U_ICU_ENTRY_POINT_RENAME(unumsys_getRadix)
+#define unumsys_isAlgorithmic U_ICU_ENTRY_POINT_RENAME(unumsys_isAlgorithmic)
+#define unumsys_open U_ICU_ENTRY_POINT_RENAME(unumsys_open)
+#define unumsys_openAvailableNames U_ICU_ENTRY_POINT_RENAME(unumsys_openAvailableNames)
+#define unumsys_openByName U_ICU_ENTRY_POINT_RENAME(unumsys_openByName)
+#define uplrules_close U_ICU_ENTRY_POINT_RENAME(uplrules_close)
+#define uplrules_getKeywords U_ICU_ENTRY_POINT_RENAME(uplrules_getKeywords)
+#define uplrules_open U_ICU_ENTRY_POINT_RENAME(uplrules_open)
+#define uplrules_openForType U_ICU_ENTRY_POINT_RENAME(uplrules_openForType)
+#define uplrules_select U_ICU_ENTRY_POINT_RENAME(uplrules_select)
+#define uplrules_selectForRange U_ICU_ENTRY_POINT_RENAME(uplrules_selectForRange)
+#define uplrules_selectFormatted U_ICU_ENTRY_POINT_RENAME(uplrules_selectFormatted)
+#define uplrules_selectWithFormat U_ICU_ENTRY_POINT_RENAME(uplrules_selectWithFormat)
+#define uplug_closeLibrary U_ICU_ENTRY_POINT_RENAME(uplug_closeLibrary)
+#define uplug_findLibrary U_ICU_ENTRY_POINT_RENAME(uplug_findLibrary)
+#define uplug_getConfiguration U_ICU_ENTRY_POINT_RENAME(uplug_getConfiguration)
+#define uplug_getContext U_ICU_ENTRY_POINT_RENAME(uplug_getContext)
+#define uplug_getCurrentLevel U_ICU_ENTRY_POINT_RENAME(uplug_getCurrentLevel)
+#define uplug_getLibrary U_ICU_ENTRY_POINT_RENAME(uplug_getLibrary)
+#define uplug_getLibraryName U_ICU_ENTRY_POINT_RENAME(uplug_getLibraryName)
+#define uplug_getPlugInternal U_ICU_ENTRY_POINT_RENAME(uplug_getPlugInternal)
+#define uplug_getPlugLevel U_ICU_ENTRY_POINT_RENAME(uplug_getPlugLevel)
+#define uplug_getPlugLoadStatus U_ICU_ENTRY_POINT_RENAME(uplug_getPlugLoadStatus)
+#define uplug_getPlugName U_ICU_ENTRY_POINT_RENAME(uplug_getPlugName)
+#define uplug_getPluginFile U_ICU_ENTRY_POINT_RENAME(uplug_getPluginFile)
+#define uplug_getSymbolName U_ICU_ENTRY_POINT_RENAME(uplug_getSymbolName)
+#define uplug_init U_ICU_ENTRY_POINT_RENAME(uplug_init)
+#define uplug_loadPlugFromEntrypoint U_ICU_ENTRY_POINT_RENAME(uplug_loadPlugFromEntrypoint)
+#define uplug_loadPlugFromLibrary U_ICU_ENTRY_POINT_RENAME(uplug_loadPlugFromLibrary)
+#define uplug_nextPlug U_ICU_ENTRY_POINT_RENAME(uplug_nextPlug)
+#define uplug_openLibrary U_ICU_ENTRY_POINT_RENAME(uplug_openLibrary)
+#define uplug_removePlug U_ICU_ENTRY_POINT_RENAME(uplug_removePlug)
+#define uplug_setContext U_ICU_ENTRY_POINT_RENAME(uplug_setContext)
+#define uplug_setPlugLevel U_ICU_ENTRY_POINT_RENAME(uplug_setPlugLevel)
+#define uplug_setPlugName U_ICU_ENTRY_POINT_RENAME(uplug_setPlugName)
+#define uplug_setPlugNoUnload U_ICU_ENTRY_POINT_RENAME(uplug_setPlugNoUnload)
+#define uprops_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(uprops_addPropertyStarts)
+#define uprops_getSource U_ICU_ENTRY_POINT_RENAME(uprops_getSource)
+#define upropsvec_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(upropsvec_addPropertyStarts)
+#define uprv_add32_overflow U_ICU_ENTRY_POINT_RENAME(uprv_add32_overflow)
+#define uprv_aestrncpy U_ICU_ENTRY_POINT_RENAME(uprv_aestrncpy)
+#define uprv_asciiFromEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_asciiFromEbcdic)
+#define uprv_asciitolower U_ICU_ENTRY_POINT_RENAME(uprv_asciitolower)
+#define uprv_calloc U_ICU_ENTRY_POINT_RENAME(uprv_calloc)
+#define uprv_ceil U_ICU_ENTRY_POINT_RENAME(uprv_ceil)
+#define uprv_compareASCIIPropertyNames U_ICU_ENTRY_POINT_RENAME(uprv_compareASCIIPropertyNames)
+#define uprv_compareEBCDICPropertyNames U_ICU_ENTRY_POINT_RENAME(uprv_compareEBCDICPropertyNames)
+#define uprv_compareInvAscii U_ICU_ENTRY_POINT_RENAME(uprv_compareInvAscii)
+#define uprv_compareInvEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_compareInvEbcdic)
+#define uprv_compareInvEbcdicAsAscii U_ICU_ENTRY_POINT_RENAME(uprv_compareInvEbcdicAsAscii)
+#define uprv_convertToLCID U_ICU_ENTRY_POINT_RENAME(uprv_convertToLCID)
+#define uprv_convertToLCIDPlatform U_ICU_ENTRY_POINT_RENAME(uprv_convertToLCIDPlatform)
+#define uprv_convertToPosix U_ICU_ENTRY_POINT_RENAME(uprv_convertToPosix)
+#define uprv_copyAscii U_ICU_ENTRY_POINT_RENAME(uprv_copyAscii)
+#define uprv_copyEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_copyEbcdic)
+#define uprv_decContextClearStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextClearStatus)
+#define uprv_decContextDefault U_ICU_ENTRY_POINT_RENAME(uprv_decContextDefault)
+#define uprv_decContextGetRounding U_ICU_ENTRY_POINT_RENAME(uprv_decContextGetRounding)
+#define uprv_decContextGetStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextGetStatus)
+#define uprv_decContextRestoreStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextRestoreStatus)
+#define uprv_decContextSaveStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextSaveStatus)
+#define uprv_decContextSetRounding U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetRounding)
+#define uprv_decContextSetStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatus)
+#define uprv_decContextSetStatusFromString U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusFromString)
+#define uprv_decContextSetStatusFromStringQuiet U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusFromStringQuiet)
+#define uprv_decContextSetStatusQuiet U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusQuiet)
+#define uprv_decContextStatusToString U_ICU_ENTRY_POINT_RENAME(uprv_decContextStatusToString)
+#define uprv_decContextTestSavedStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextTestSavedStatus)
+#define uprv_decContextTestStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextTestStatus)
+#define uprv_decContextZeroStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextZeroStatus)
+#define uprv_decNumberAbs U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAbs)
+#define uprv_decNumberAdd U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAdd)
+#define uprv_decNumberAnd U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAnd)
+#define uprv_decNumberClassToString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberClassToString)
+#define uprv_decNumberCompare U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompare)
+#define uprv_decNumberCompareSignal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareSignal)
+#define uprv_decNumberCompareTotal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareTotal)
+#define uprv_decNumberCompareTotalMag U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareTotalMag)
+#define uprv_decNumberCopy U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopy)
+#define uprv_decNumberCopyAbs U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopyAbs)
+#define uprv_decNumberCopyNegate U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopyNegate)
+#define uprv_decNumberCopySign U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopySign)
+#define uprv_decNumberDivide U_ICU_ENTRY_POINT_RENAME(uprv_decNumberDivide)
+#define uprv_decNumberDivideInteger U_ICU_ENTRY_POINT_RENAME(uprv_decNumberDivideInteger)
+#define uprv_decNumberExp U_ICU_ENTRY_POINT_RENAME(uprv_decNumberExp)
+#define uprv_decNumberFMA U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFMA)
+#define uprv_decNumberFromInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFromInt32)
+#define uprv_decNumberFromString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFromString)
+#define uprv_decNumberFromUInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFromUInt32)
+#define uprv_decNumberGetBCD U_ICU_ENTRY_POINT_RENAME(uprv_decNumberGetBCD)
+#define uprv_decNumberInvert U_ICU_ENTRY_POINT_RENAME(uprv_decNumberInvert)
+#define uprv_decNumberIsNormal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberIsNormal)
+#define uprv_decNumberIsSubnormal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberIsSubnormal)
+#define uprv_decNumberLn U_ICU_ENTRY_POINT_RENAME(uprv_decNumberLn)
+#define uprv_decNumberLog10 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberLog10)
+#define uprv_decNumberLogB U_ICU_ENTRY_POINT_RENAME(uprv_decNumberLogB)
+#define uprv_decNumberMax U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMax)
+#define uprv_decNumberMaxMag U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMaxMag)
+#define uprv_decNumberMin U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMin)
+#define uprv_decNumberMinMag U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMinMag)
+#define uprv_decNumberMinus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMinus)
+#define uprv_decNumberMultiply U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMultiply)
+#define uprv_decNumberNextMinus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNextMinus)
+#define uprv_decNumberNextPlus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNextPlus)
+#define uprv_decNumberNextToward U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNextToward)
+#define uprv_decNumberNormalize U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNormalize)
+#define uprv_decNumberOr U_ICU_ENTRY_POINT_RENAME(uprv_decNumberOr)
+#define uprv_decNumberPlus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberPlus)
+#define uprv_decNumberPower U_ICU_ENTRY_POINT_RENAME(uprv_decNumberPower)
+#define uprv_decNumberQuantize U_ICU_ENTRY_POINT_RENAME(uprv_decNumberQuantize)
+#define uprv_decNumberReduce U_ICU_ENTRY_POINT_RENAME(uprv_decNumberReduce)
+#define uprv_decNumberRemainder U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRemainder)
+#define uprv_decNumberRemainderNear U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRemainderNear)
+#define uprv_decNumberRescale U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRescale)
+#define uprv_decNumberRotate U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRotate)
+#define uprv_decNumberSameQuantum U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSameQuantum)
+#define uprv_decNumberScaleB U_ICU_ENTRY_POINT_RENAME(uprv_decNumberScaleB)
+#define uprv_decNumberSetBCD U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSetBCD)
+#define uprv_decNumberShift U_ICU_ENTRY_POINT_RENAME(uprv_decNumberShift)
+#define uprv_decNumberSquareRoot U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSquareRoot)
+#define uprv_decNumberSubtract U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSubtract)
+#define uprv_decNumberToEngString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToEngString)
+#define uprv_decNumberToInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToInt32)
+#define uprv_decNumberToIntegralExact U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToIntegralExact)
+#define uprv_decNumberToIntegralValue U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToIntegralValue)
+#define uprv_decNumberToString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToString)
+#define uprv_decNumberToUInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToUInt32)
+#define uprv_decNumberTrim U_ICU_ENTRY_POINT_RENAME(uprv_decNumberTrim)
+#define uprv_decNumberVersion U_ICU_ENTRY_POINT_RENAME(uprv_decNumberVersion)
+#define uprv_decNumberXor U_ICU_ENTRY_POINT_RENAME(uprv_decNumberXor)
+#define uprv_decNumberZero U_ICU_ENTRY_POINT_RENAME(uprv_decNumberZero)
+#define uprv_deleteConditionalCE32 U_ICU_ENTRY_POINT_RENAME(uprv_deleteConditionalCE32)
+#define uprv_deleteUObject U_ICU_ENTRY_POINT_RENAME(uprv_deleteUObject)
+#define uprv_dl_close U_ICU_ENTRY_POINT_RENAME(uprv_dl_close)
+#define uprv_dl_open U_ICU_ENTRY_POINT_RENAME(uprv_dl_open)
+#define uprv_dlsym_func U_ICU_ENTRY_POINT_RENAME(uprv_dlsym_func)
+#define uprv_eastrncpy U_ICU_ENTRY_POINT_RENAME(uprv_eastrncpy)
+#define uprv_ebcdicFromAscii U_ICU_ENTRY_POINT_RENAME(uprv_ebcdicFromAscii)
+#define uprv_ebcdicToAscii U_ICU_ENTRY_POINT_RENAME(uprv_ebcdicToAscii)
+#define uprv_ebcdicToLowercaseAscii U_ICU_ENTRY_POINT_RENAME(uprv_ebcdicToLowercaseAscii)
+#define uprv_ebcdictolower U_ICU_ENTRY_POINT_RENAME(uprv_ebcdictolower)
+#define uprv_fabs U_ICU_ENTRY_POINT_RENAME(uprv_fabs)
+#define uprv_floor U_ICU_ENTRY_POINT_RENAME(uprv_floor)
+#define uprv_fmax U_ICU_ENTRY_POINT_RENAME(uprv_fmax)
+#define uprv_fmin U_ICU_ENTRY_POINT_RENAME(uprv_fmin)
+#define uprv_fmod U_ICU_ENTRY_POINT_RENAME(uprv_fmod)
+#define uprv_free U_ICU_ENTRY_POINT_RENAME(uprv_free)
+#define uprv_getCharNameCharacters U_ICU_ENTRY_POINT_RENAME(uprv_getCharNameCharacters)
+#define uprv_getDefaultLocaleID U_ICU_ENTRY_POINT_RENAME(uprv_getDefaultLocaleID)
+#define uprv_getInfinity U_ICU_ENTRY_POINT_RENAME(uprv_getInfinity)
+#define uprv_getMaxCharNameLength U_ICU_ENTRY_POINT_RENAME(uprv_getMaxCharNameLength)
+#define uprv_getMaxValues U_ICU_ENTRY_POINT_RENAME(uprv_getMaxValues)
+#define uprv_getNaN U_ICU_ENTRY_POINT_RENAME(uprv_getNaN)
+#define uprv_getRawUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getRawUTCtime)
+#define uprv_getStaticCurrencyName U_ICU_ENTRY_POINT_RENAME(uprv_getStaticCurrencyName)
+#define uprv_getUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getUTCtime)
+#define uprv_int32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_int32Comparator)
+#define uprv_isASCIILetter U_ICU_ENTRY_POINT_RENAME(uprv_isASCIILetter)
+#define uprv_isEbcdicAtSign U_ICU_ENTRY_POINT_RENAME(uprv_isEbcdicAtSign)
+#define uprv_isInfinite U_ICU_ENTRY_POINT_RENAME(uprv_isInfinite)
+#define uprv_isInvariantString U_ICU_ENTRY_POINT_RENAME(uprv_isInvariantString)
+#define uprv_isInvariantUString U_ICU_ENTRY_POINT_RENAME(uprv_isInvariantUString)
+#define uprv_isNaN U_ICU_ENTRY_POINT_RENAME(uprv_isNaN)
+#define uprv_isNegativeInfinity U_ICU_ENTRY_POINT_RENAME(uprv_isNegativeInfinity)
+#define uprv_isPositiveInfinity U_ICU_ENTRY_POINT_RENAME(uprv_isPositiveInfinity)
+#define uprv_itou U_ICU_ENTRY_POINT_RENAME(uprv_itou)
+#define uprv_log U_ICU_ENTRY_POINT_RENAME(uprv_log)
+#define uprv_malloc U_ICU_ENTRY_POINT_RENAME(uprv_malloc)
+#define uprv_mapFile U_ICU_ENTRY_POINT_RENAME(uprv_mapFile)
+#define uprv_max U_ICU_ENTRY_POINT_RENAME(uprv_max)
+#define uprv_maxMantissa U_ICU_ENTRY_POINT_RENAME(uprv_maxMantissa)
+#define uprv_maximumPtr U_ICU_ENTRY_POINT_RENAME(uprv_maximumPtr)
+#define uprv_min U_ICU_ENTRY_POINT_RENAME(uprv_min)
+#define uprv_modf U_ICU_ENTRY_POINT_RENAME(uprv_modf)
+#define uprv_mul32_overflow U_ICU_ENTRY_POINT_RENAME(uprv_mul32_overflow)
+#define uprv_parseCurrency U_ICU_ENTRY_POINT_RENAME(uprv_parseCurrency)
+#define uprv_pathIsAbsolute U_ICU_ENTRY_POINT_RENAME(uprv_pathIsAbsolute)
+#define uprv_pow U_ICU_ENTRY_POINT_RENAME(uprv_pow)
+#define uprv_pow10 U_ICU_ENTRY_POINT_RENAME(uprv_pow10)
+#define uprv_realloc U_ICU_ENTRY_POINT_RENAME(uprv_realloc)
+#define uprv_round U_ICU_ENTRY_POINT_RENAME(uprv_round)
+#define uprv_sortArray U_ICU_ENTRY_POINT_RENAME(uprv_sortArray)
+#define uprv_stableBinarySearch U_ICU_ENTRY_POINT_RENAME(uprv_stableBinarySearch)
+#define uprv_strCompare U_ICU_ENTRY_POINT_RENAME(uprv_strCompare)
+#define uprv_strdup U_ICU_ENTRY_POINT_RENAME(uprv_strdup)
+#define uprv_stricmp U_ICU_ENTRY_POINT_RENAME(uprv_stricmp)
+#define uprv_strndup U_ICU_ENTRY_POINT_RENAME(uprv_strndup)
+#define uprv_strnicmp U_ICU_ENTRY_POINT_RENAME(uprv_strnicmp)
+#define uprv_syntaxError U_ICU_ENTRY_POINT_RENAME(uprv_syntaxError)
+#define uprv_timezone U_ICU_ENTRY_POINT_RENAME(uprv_timezone)
+#define uprv_toupper U_ICU_ENTRY_POINT_RENAME(uprv_toupper)
+#define uprv_trunc U_ICU_ENTRY_POINT_RENAME(uprv_trunc)
+#define uprv_tzname U_ICU_ENTRY_POINT_RENAME(uprv_tzname)
+#define uprv_tzname_clear_cache U_ICU_ENTRY_POINT_RENAME(uprv_tzname_clear_cache)
+#define uprv_tzset U_ICU_ENTRY_POINT_RENAME(uprv_tzset)
+#define uprv_uint16Comparator U_ICU_ENTRY_POINT_RENAME(uprv_uint16Comparator)
+#define uprv_uint32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_uint32Comparator)
+#define uprv_unmapFile U_ICU_ENTRY_POINT_RENAME(uprv_unmapFile)
+#define upvec_cloneArray U_ICU_ENTRY_POINT_RENAME(upvec_cloneArray)
+#define upvec_close U_ICU_ENTRY_POINT_RENAME(upvec_close)
+#define upvec_compact U_ICU_ENTRY_POINT_RENAME(upvec_compact)
+#define upvec_compactToUTrie2Handler U_ICU_ENTRY_POINT_RENAME(upvec_compactToUTrie2Handler)
+#define upvec_compactToUTrie2WithRowIndexes U_ICU_ENTRY_POINT_RENAME(upvec_compactToUTrie2WithRowIndexes)
+#define upvec_getArray U_ICU_ENTRY_POINT_RENAME(upvec_getArray)
+#define upvec_getRow U_ICU_ENTRY_POINT_RENAME(upvec_getRow)
+#define upvec_getValue U_ICU_ENTRY_POINT_RENAME(upvec_getValue)
+#define upvec_open U_ICU_ENTRY_POINT_RENAME(upvec_open)
+#define upvec_setValue U_ICU_ENTRY_POINT_RENAME(upvec_setValue)
+#define uregex_appendReplacement U_ICU_ENTRY_POINT_RENAME(uregex_appendReplacement)
+#define uregex_appendReplacementUText U_ICU_ENTRY_POINT_RENAME(uregex_appendReplacementUText)
+#define uregex_appendTail U_ICU_ENTRY_POINT_RENAME(uregex_appendTail)
+#define uregex_appendTailUText U_ICU_ENTRY_POINT_RENAME(uregex_appendTailUText)
+#define uregex_clone U_ICU_ENTRY_POINT_RENAME(uregex_clone)
+#define uregex_close U_ICU_ENTRY_POINT_RENAME(uregex_close)
+#define uregex_end U_ICU_ENTRY_POINT_RENAME(uregex_end)
+#define uregex_end64 U_ICU_ENTRY_POINT_RENAME(uregex_end64)
+#define uregex_find U_ICU_ENTRY_POINT_RENAME(uregex_find)
+#define uregex_find64 U_ICU_ENTRY_POINT_RENAME(uregex_find64)
+#define uregex_findNext U_ICU_ENTRY_POINT_RENAME(uregex_findNext)
+#define uregex_flags U_ICU_ENTRY_POINT_RENAME(uregex_flags)
+#define uregex_getFindProgressCallback U_ICU_ENTRY_POINT_RENAME(uregex_getFindProgressCallback)
+#define uregex_getMatchCallback U_ICU_ENTRY_POINT_RENAME(uregex_getMatchCallback)
+#define uregex_getStackLimit U_ICU_ENTRY_POINT_RENAME(uregex_getStackLimit)
+#define uregex_getText U_ICU_ENTRY_POINT_RENAME(uregex_getText)
+#define uregex_getTimeLimit U_ICU_ENTRY_POINT_RENAME(uregex_getTimeLimit)
+#define uregex_getUText U_ICU_ENTRY_POINT_RENAME(uregex_getUText)
+#define uregex_group U_ICU_ENTRY_POINT_RENAME(uregex_group)
+#define uregex_groupCount U_ICU_ENTRY_POINT_RENAME(uregex_groupCount)
+#define uregex_groupNumberFromCName U_ICU_ENTRY_POINT_RENAME(uregex_groupNumberFromCName)
+#define uregex_groupNumberFromName U_ICU_ENTRY_POINT_RENAME(uregex_groupNumberFromName)
+#define uregex_groupUText U_ICU_ENTRY_POINT_RENAME(uregex_groupUText)
+#define uregex_hasAnchoringBounds U_ICU_ENTRY_POINT_RENAME(uregex_hasAnchoringBounds)
+#define uregex_hasTransparentBounds U_ICU_ENTRY_POINT_RENAME(uregex_hasTransparentBounds)
+#define uregex_hitEnd U_ICU_ENTRY_POINT_RENAME(uregex_hitEnd)
+#define uregex_lookingAt U_ICU_ENTRY_POINT_RENAME(uregex_lookingAt)
+#define uregex_lookingAt64 U_ICU_ENTRY_POINT_RENAME(uregex_lookingAt64)
+#define uregex_matches U_ICU_ENTRY_POINT_RENAME(uregex_matches)
+#define uregex_matches64 U_ICU_ENTRY_POINT_RENAME(uregex_matches64)
+#define uregex_open U_ICU_ENTRY_POINT_RENAME(uregex_open)
+#define uregex_openC U_ICU_ENTRY_POINT_RENAME(uregex_openC)
+#define uregex_openUText U_ICU_ENTRY_POINT_RENAME(uregex_openUText)
+#define uregex_pattern U_ICU_ENTRY_POINT_RENAME(uregex_pattern)
+#define uregex_patternUText U_ICU_ENTRY_POINT_RENAME(uregex_patternUText)
+#define uregex_refreshUText U_ICU_ENTRY_POINT_RENAME(uregex_refreshUText)
+#define uregex_regionEnd U_ICU_ENTRY_POINT_RENAME(uregex_regionEnd)
+#define uregex_regionEnd64 U_ICU_ENTRY_POINT_RENAME(uregex_regionEnd64)
+#define uregex_regionStart U_ICU_ENTRY_POINT_RENAME(uregex_regionStart)
+#define uregex_regionStart64 U_ICU_ENTRY_POINT_RENAME(uregex_regionStart64)
+#define uregex_replaceAll U_ICU_ENTRY_POINT_RENAME(uregex_replaceAll)
+#define uregex_replaceAllUText U_ICU_ENTRY_POINT_RENAME(uregex_replaceAllUText)
+#define uregex_replaceFirst U_ICU_ENTRY_POINT_RENAME(uregex_replaceFirst)
+#define uregex_replaceFirstUText U_ICU_ENTRY_POINT_RENAME(uregex_replaceFirstUText)
+#define uregex_requireEnd U_ICU_ENTRY_POINT_RENAME(uregex_requireEnd)
+#define uregex_reset U_ICU_ENTRY_POINT_RENAME(uregex_reset)
+#define uregex_reset64 U_ICU_ENTRY_POINT_RENAME(uregex_reset64)
+#define uregex_setFindProgressCallback U_ICU_ENTRY_POINT_RENAME(uregex_setFindProgressCallback)
+#define uregex_setMatchCallback U_ICU_ENTRY_POINT_RENAME(uregex_setMatchCallback)
+#define uregex_setRegion U_ICU_ENTRY_POINT_RENAME(uregex_setRegion)
+#define uregex_setRegion64 U_ICU_ENTRY_POINT_RENAME(uregex_setRegion64)
+#define uregex_setRegionAndStart U_ICU_ENTRY_POINT_RENAME(uregex_setRegionAndStart)
+#define uregex_setStackLimit U_ICU_ENTRY_POINT_RENAME(uregex_setStackLimit)
+#define uregex_setText U_ICU_ENTRY_POINT_RENAME(uregex_setText)
+#define uregex_setTimeLimit U_ICU_ENTRY_POINT_RENAME(uregex_setTimeLimit)
+#define uregex_setUText U_ICU_ENTRY_POINT_RENAME(uregex_setUText)
+#define uregex_split U_ICU_ENTRY_POINT_RENAME(uregex_split)
+#define uregex_splitUText U_ICU_ENTRY_POINT_RENAME(uregex_splitUText)
+#define uregex_start U_ICU_ENTRY_POINT_RENAME(uregex_start)
+#define uregex_start64 U_ICU_ENTRY_POINT_RENAME(uregex_start64)
+#define uregex_ucstr_unescape_charAt U_ICU_ENTRY_POINT_RENAME(uregex_ucstr_unescape_charAt)
+#define uregex_useAnchoringBounds U_ICU_ENTRY_POINT_RENAME(uregex_useAnchoringBounds)
+#define uregex_useTransparentBounds U_ICU_ENTRY_POINT_RENAME(uregex_useTransparentBounds)
+#define uregex_utext_unescape_charAt U_ICU_ENTRY_POINT_RENAME(uregex_utext_unescape_charAt)
+#define uregion_areEqual U_ICU_ENTRY_POINT_RENAME(uregion_areEqual)
+#define uregion_contains U_ICU_ENTRY_POINT_RENAME(uregion_contains)
+#define uregion_getAvailable U_ICU_ENTRY_POINT_RENAME(uregion_getAvailable)
+#define uregion_getContainedRegions U_ICU_ENTRY_POINT_RENAME(uregion_getContainedRegions)
+#define uregion_getContainedRegionsOfType U_ICU_ENTRY_POINT_RENAME(uregion_getContainedRegionsOfType)
+#define uregion_getContainingRegion U_ICU_ENTRY_POINT_RENAME(uregion_getContainingRegion)
+#define uregion_getContainingRegionOfType U_ICU_ENTRY_POINT_RENAME(uregion_getContainingRegionOfType)
+#define uregion_getNumericCode U_ICU_ENTRY_POINT_RENAME(uregion_getNumericCode)
+#define uregion_getPreferredValues U_ICU_ENTRY_POINT_RENAME(uregion_getPreferredValues)
+#define uregion_getRegionCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionCode)
+#define uregion_getRegionFromCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionFromCode)
+#define uregion_getRegionFromNumericCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionFromNumericCode)
+#define uregion_getType U_ICU_ENTRY_POINT_RENAME(uregion_getType)
+#define ureldatefmt_close U_ICU_ENTRY_POINT_RENAME(ureldatefmt_close)
+#define ureldatefmt_closeResult U_ICU_ENTRY_POINT_RENAME(ureldatefmt_closeResult)
+#define ureldatefmt_combineDateAndTime U_ICU_ENTRY_POINT_RENAME(ureldatefmt_combineDateAndTime)
+#define ureldatefmt_format U_ICU_ENTRY_POINT_RENAME(ureldatefmt_format)
+#define ureldatefmt_formatNumeric U_ICU_ENTRY_POINT_RENAME(ureldatefmt_formatNumeric)
+#define ureldatefmt_formatNumericToResult U_ICU_ENTRY_POINT_RENAME(ureldatefmt_formatNumericToResult)
+#define ureldatefmt_formatToResult U_ICU_ENTRY_POINT_RENAME(ureldatefmt_formatToResult)
+#define ureldatefmt_open U_ICU_ENTRY_POINT_RENAME(ureldatefmt_open)
+#define ureldatefmt_openResult U_ICU_ENTRY_POINT_RENAME(ureldatefmt_openResult)
+#define ureldatefmt_resultAsValue U_ICU_ENTRY_POINT_RENAME(ureldatefmt_resultAsValue)
+#define ures_close U_ICU_ENTRY_POINT_RENAME(ures_close)
+#define ures_copyResb U_ICU_ENTRY_POINT_RENAME(ures_copyResb)
+#define ures_countArrayItems U_ICU_ENTRY_POINT_RENAME(ures_countArrayItems)
+#define ures_findResource U_ICU_ENTRY_POINT_RENAME(ures_findResource)
+#define ures_findSubResource U_ICU_ENTRY_POINT_RENAME(ures_findSubResource)
+#define ures_getAllItemsWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getAllItemsWithFallback)
+#define ures_getBinary U_ICU_ENTRY_POINT_RENAME(ures_getBinary)
+#define ures_getByIndex U_ICU_ENTRY_POINT_RENAME(ures_getByIndex)
+#define ures_getByKey U_ICU_ENTRY_POINT_RENAME(ures_getByKey)
+#define ures_getByKeyWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getByKeyWithFallback)
+#define ures_getFunctionalEquivalent U_ICU_ENTRY_POINT_RENAME(ures_getFunctionalEquivalent)
+#define ures_getInt U_ICU_ENTRY_POINT_RENAME(ures_getInt)
+#define ures_getIntVector U_ICU_ENTRY_POINT_RENAME(ures_getIntVector)
+#define ures_getKey U_ICU_ENTRY_POINT_RENAME(ures_getKey)
+#define ures_getKeywordValues U_ICU_ENTRY_POINT_RENAME(ures_getKeywordValues)
+#define ures_getLocale U_ICU_ENTRY_POINT_RENAME(ures_getLocale)
+#define ures_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ures_getLocaleByType)
+#define ures_getLocaleInternal U_ICU_ENTRY_POINT_RENAME(ures_getLocaleInternal)
+#define ures_getName U_ICU_ENTRY_POINT_RENAME(ures_getName)
+#define ures_getNextResource U_ICU_ENTRY_POINT_RENAME(ures_getNextResource)
+#define ures_getNextString U_ICU_ENTRY_POINT_RENAME(ures_getNextString)
+#define ures_getSize U_ICU_ENTRY_POINT_RENAME(ures_getSize)
+#define ures_getString U_ICU_ENTRY_POINT_RENAME(ures_getString)
+#define ures_getStringByIndex U_ICU_ENTRY_POINT_RENAME(ures_getStringByIndex)
+#define ures_getStringByKey U_ICU_ENTRY_POINT_RENAME(ures_getStringByKey)
+#define ures_getStringByKeyWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getStringByKeyWithFallback)
+#define ures_getType U_ICU_ENTRY_POINT_RENAME(ures_getType)
+#define ures_getUInt U_ICU_ENTRY_POINT_RENAME(ures_getUInt)
+#define ures_getUTF8String U_ICU_ENTRY_POINT_RENAME(ures_getUTF8String)
+#define ures_getUTF8StringByIndex U_ICU_ENTRY_POINT_RENAME(ures_getUTF8StringByIndex)
+#define ures_getUTF8StringByKey U_ICU_ENTRY_POINT_RENAME(ures_getUTF8StringByKey)
+#define ures_getValueWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getValueWithFallback)
+#define ures_getVersion U_ICU_ENTRY_POINT_RENAME(ures_getVersion)
+#define ures_getVersionByKey U_ICU_ENTRY_POINT_RENAME(ures_getVersionByKey)
+#define ures_getVersionNumber U_ICU_ENTRY_POINT_RENAME(ures_getVersionNumber)
+#define ures_getVersionNumberInternal U_ICU_ENTRY_POINT_RENAME(ures_getVersionNumberInternal)
+#define ures_hasNext U_ICU_ENTRY_POINT_RENAME(ures_hasNext)
+#define ures_initStackObject U_ICU_ENTRY_POINT_RENAME(ures_initStackObject)
+#define ures_open U_ICU_ENTRY_POINT_RENAME(ures_open)
+#define ures_openAvailableLocales U_ICU_ENTRY_POINT_RENAME(ures_openAvailableLocales)
+#define ures_openDirect U_ICU_ENTRY_POINT_RENAME(ures_openDirect)
+#define ures_openDirectFillIn U_ICU_ENTRY_POINT_RENAME(ures_openDirectFillIn)
+#define ures_openFillIn U_ICU_ENTRY_POINT_RENAME(ures_openFillIn)
+#define ures_openNoDefault U_ICU_ENTRY_POINT_RENAME(ures_openNoDefault)
+#define ures_openU U_ICU_ENTRY_POINT_RENAME(ures_openU)
+#define ures_resetIterator U_ICU_ENTRY_POINT_RENAME(ures_resetIterator)
+#define ures_swap U_ICU_ENTRY_POINT_RENAME(ures_swap)
+#define uscript_breaksBetweenLetters U_ICU_ENTRY_POINT_RENAME(uscript_breaksBetweenLetters)
+#define uscript_closeRun U_ICU_ENTRY_POINT_RENAME(uscript_closeRun)
+#define uscript_getCode U_ICU_ENTRY_POINT_RENAME(uscript_getCode)
+#define uscript_getName U_ICU_ENTRY_POINT_RENAME(uscript_getName)
+#define uscript_getSampleString U_ICU_ENTRY_POINT_RENAME(uscript_getSampleString)
+#define uscript_getSampleUnicodeString U_ICU_ENTRY_POINT_RENAME(uscript_getSampleUnicodeString)
+#define uscript_getScript U_ICU_ENTRY_POINT_RENAME(uscript_getScript)
+#define uscript_getScriptExtensions U_ICU_ENTRY_POINT_RENAME(uscript_getScriptExtensions)
+#define uscript_getShortName U_ICU_ENTRY_POINT_RENAME(uscript_getShortName)
+#define uscript_getUsage U_ICU_ENTRY_POINT_RENAME(uscript_getUsage)
+#define uscript_hasScript U_ICU_ENTRY_POINT_RENAME(uscript_hasScript)
+#define uscript_isCased U_ICU_ENTRY_POINT_RENAME(uscript_isCased)
+#define uscript_isRightToLeft U_ICU_ENTRY_POINT_RENAME(uscript_isRightToLeft)
+#define uscript_nextRun U_ICU_ENTRY_POINT_RENAME(uscript_nextRun)
+#define uscript_openRun U_ICU_ENTRY_POINT_RENAME(uscript_openRun)
+#define uscript_resetRun U_ICU_ENTRY_POINT_RENAME(uscript_resetRun)
+#define uscript_setRunText U_ICU_ENTRY_POINT_RENAME(uscript_setRunText)
+#define usearch_close U_ICU_ENTRY_POINT_RENAME(usearch_close)
+#define usearch_first U_ICU_ENTRY_POINT_RENAME(usearch_first)
+#define usearch_following U_ICU_ENTRY_POINT_RENAME(usearch_following)
+#define usearch_getAttribute U_ICU_ENTRY_POINT_RENAME(usearch_getAttribute)
+#define usearch_getBreakIterator U_ICU_ENTRY_POINT_RENAME(usearch_getBreakIterator)
+#define usearch_getCollator U_ICU_ENTRY_POINT_RENAME(usearch_getCollator)
+#define usearch_getMatchedLength U_ICU_ENTRY_POINT_RENAME(usearch_getMatchedLength)
+#define usearch_getMatchedStart U_ICU_ENTRY_POINT_RENAME(usearch_getMatchedStart)
+#define usearch_getMatchedText U_ICU_ENTRY_POINT_RENAME(usearch_getMatchedText)
+#define usearch_getOffset U_ICU_ENTRY_POINT_RENAME(usearch_getOffset)
+#define usearch_getPattern U_ICU_ENTRY_POINT_RENAME(usearch_getPattern)
+#define usearch_getText U_ICU_ENTRY_POINT_RENAME(usearch_getText)
+#define usearch_handleNextCanonical U_ICU_ENTRY_POINT_RENAME(usearch_handleNextCanonical)
+#define usearch_handleNextExact U_ICU_ENTRY_POINT_RENAME(usearch_handleNextExact)
+#define usearch_handlePreviousCanonical U_ICU_ENTRY_POINT_RENAME(usearch_handlePreviousCanonical)
+#define usearch_handlePreviousExact U_ICU_ENTRY_POINT_RENAME(usearch_handlePreviousExact)
+#define usearch_last U_ICU_ENTRY_POINT_RENAME(usearch_last)
+#define usearch_next U_ICU_ENTRY_POINT_RENAME(usearch_next)
+#define usearch_open U_ICU_ENTRY_POINT_RENAME(usearch_open)
+#define usearch_openFromCollator U_ICU_ENTRY_POINT_RENAME(usearch_openFromCollator)
+#define usearch_preceding U_ICU_ENTRY_POINT_RENAME(usearch_preceding)
+#define usearch_previous U_ICU_ENTRY_POINT_RENAME(usearch_previous)
+#define usearch_reset U_ICU_ENTRY_POINT_RENAME(usearch_reset)
+#define usearch_search U_ICU_ENTRY_POINT_RENAME(usearch_search)
+#define usearch_searchBackwards U_ICU_ENTRY_POINT_RENAME(usearch_searchBackwards)
+#define usearch_setAttribute U_ICU_ENTRY_POINT_RENAME(usearch_setAttribute)
+#define usearch_setBreakIterator U_ICU_ENTRY_POINT_RENAME(usearch_setBreakIterator)
+#define usearch_setCollator U_ICU_ENTRY_POINT_RENAME(usearch_setCollator)
+#define usearch_setOffset U_ICU_ENTRY_POINT_RENAME(usearch_setOffset)
+#define usearch_setPattern U_ICU_ENTRY_POINT_RENAME(usearch_setPattern)
+#define usearch_setText U_ICU_ENTRY_POINT_RENAME(usearch_setText)
+#define uset_add U_ICU_ENTRY_POINT_RENAME(uset_add)
+#define uset_addAll U_ICU_ENTRY_POINT_RENAME(uset_addAll)
+#define uset_addAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_addAllCodePoints)
+#define uset_addRange U_ICU_ENTRY_POINT_RENAME(uset_addRange)
+#define uset_addString U_ICU_ENTRY_POINT_RENAME(uset_addString)
+#define uset_applyIntPropertyValue U_ICU_ENTRY_POINT_RENAME(uset_applyIntPropertyValue)
+#define uset_applyPattern U_ICU_ENTRY_POINT_RENAME(uset_applyPattern)
+#define uset_applyPropertyAlias U_ICU_ENTRY_POINT_RENAME(uset_applyPropertyAlias)
+#define uset_charAt U_ICU_ENTRY_POINT_RENAME(uset_charAt)
+#define uset_clear U_ICU_ENTRY_POINT_RENAME(uset_clear)
+#define uset_clone U_ICU_ENTRY_POINT_RENAME(uset_clone)
+#define uset_cloneAsThawed U_ICU_ENTRY_POINT_RENAME(uset_cloneAsThawed)
+#define uset_close U_ICU_ENTRY_POINT_RENAME(uset_close)
+#define uset_closeOver U_ICU_ENTRY_POINT_RENAME(uset_closeOver)
+#define uset_compact U_ICU_ENTRY_POINT_RENAME(uset_compact)
+#define uset_complement U_ICU_ENTRY_POINT_RENAME(uset_complement)
+#define uset_complementAll U_ICU_ENTRY_POINT_RENAME(uset_complementAll)
+#define uset_contains U_ICU_ENTRY_POINT_RENAME(uset_contains)
+#define uset_containsAll U_ICU_ENTRY_POINT_RENAME(uset_containsAll)
+#define uset_containsAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_containsAllCodePoints)
+#define uset_containsNone U_ICU_ENTRY_POINT_RENAME(uset_containsNone)
+#define uset_containsRange U_ICU_ENTRY_POINT_RENAME(uset_containsRange)
+#define uset_containsSome U_ICU_ENTRY_POINT_RENAME(uset_containsSome)
+#define uset_containsString U_ICU_ENTRY_POINT_RENAME(uset_containsString)
+#define uset_equals U_ICU_ENTRY_POINT_RENAME(uset_equals)
+#define uset_freeze U_ICU_ENTRY_POINT_RENAME(uset_freeze)
+#define uset_getItem U_ICU_ENTRY_POINT_RENAME(uset_getItem)
+#define uset_getItemCount U_ICU_ENTRY_POINT_RENAME(uset_getItemCount)
+#define uset_getSerializedRange U_ICU_ENTRY_POINT_RENAME(uset_getSerializedRange)
+#define uset_getSerializedRangeCount U_ICU_ENTRY_POINT_RENAME(uset_getSerializedRangeCount)
+#define uset_getSerializedSet U_ICU_ENTRY_POINT_RENAME(uset_getSerializedSet)
+#define uset_indexOf U_ICU_ENTRY_POINT_RENAME(uset_indexOf)
+#define uset_isEmpty U_ICU_ENTRY_POINT_RENAME(uset_isEmpty)
+#define uset_isFrozen U_ICU_ENTRY_POINT_RENAME(uset_isFrozen)
+#define uset_open U_ICU_ENTRY_POINT_RENAME(uset_open)
+#define uset_openEmpty U_ICU_ENTRY_POINT_RENAME(uset_openEmpty)
+#define uset_openPattern U_ICU_ENTRY_POINT_RENAME(uset_openPattern)
+#define uset_openPatternOptions U_ICU_ENTRY_POINT_RENAME(uset_openPatternOptions)
+#define uset_remove U_ICU_ENTRY_POINT_RENAME(uset_remove)
+#define uset_removeAll U_ICU_ENTRY_POINT_RENAME(uset_removeAll)
+#define uset_removeAllStrings U_ICU_ENTRY_POINT_RENAME(uset_removeAllStrings)
+#define uset_removeRange U_ICU_ENTRY_POINT_RENAME(uset_removeRange)
+#define uset_removeString U_ICU_ENTRY_POINT_RENAME(uset_removeString)
+#define uset_resemblesPattern U_ICU_ENTRY_POINT_RENAME(uset_resemblesPattern)
+#define uset_retain U_ICU_ENTRY_POINT_RENAME(uset_retain)
+#define uset_retainAll U_ICU_ENTRY_POINT_RENAME(uset_retainAll)
+#define uset_serialize U_ICU_ENTRY_POINT_RENAME(uset_serialize)
+#define uset_serializedContains U_ICU_ENTRY_POINT_RENAME(uset_serializedContains)
+#define uset_set U_ICU_ENTRY_POINT_RENAME(uset_set)
+#define uset_setSerializedToOne U_ICU_ENTRY_POINT_RENAME(uset_setSerializedToOne)
+#define uset_size U_ICU_ENTRY_POINT_RENAME(uset_size)
+#define uset_span U_ICU_ENTRY_POINT_RENAME(uset_span)
+#define uset_spanBack U_ICU_ENTRY_POINT_RENAME(uset_spanBack)
+#define uset_spanBackUTF8 U_ICU_ENTRY_POINT_RENAME(uset_spanBackUTF8)
+#define uset_spanUTF8 U_ICU_ENTRY_POINT_RENAME(uset_spanUTF8)
+#define uset_toPattern U_ICU_ENTRY_POINT_RENAME(uset_toPattern)
+#define uspoof_areConfusable U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusable)
+#define uspoof_areConfusableUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUTF8)
+#define uspoof_areConfusableUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUnicodeString)
+#define uspoof_check U_ICU_ENTRY_POINT_RENAME(uspoof_check)
+#define uspoof_check2 U_ICU_ENTRY_POINT_RENAME(uspoof_check2)
+#define uspoof_check2UTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_check2UTF8)
+#define uspoof_check2UnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_check2UnicodeString)
+#define uspoof_checkUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_checkUTF8)
+#define uspoof_checkUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_checkUnicodeString)
+#define uspoof_clone U_ICU_ENTRY_POINT_RENAME(uspoof_clone)
+#define uspoof_close U_ICU_ENTRY_POINT_RENAME(uspoof_close)
+#define uspoof_closeCheckResult U_ICU_ENTRY_POINT_RENAME(uspoof_closeCheckResult)
+#define uspoof_getAllowedChars U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedChars)
+#define uspoof_getAllowedLocales U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedLocales)
+#define uspoof_getAllowedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedUnicodeSet)
+#define uspoof_getCheckResultChecks U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultChecks)
+#define uspoof_getCheckResultNumerics U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultNumerics)
+#define uspoof_getCheckResultRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultRestrictionLevel)
+#define uspoof_getChecks U_ICU_ENTRY_POINT_RENAME(uspoof_getChecks)
+#define uspoof_getInclusionSet U_ICU_ENTRY_POINT_RENAME(uspoof_getInclusionSet)
+#define uspoof_getInclusionUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getInclusionUnicodeSet)
+#define uspoof_getRecommendedSet U_ICU_ENTRY_POINT_RENAME(uspoof_getRecommendedSet)
+#define uspoof_getRecommendedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getRecommendedUnicodeSet)
+#define uspoof_getRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_getRestrictionLevel)
+#define uspoof_getSkeleton U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeleton)
+#define uspoof_getSkeletonUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeletonUTF8)
+#define uspoof_getSkeletonUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeletonUnicodeString)
+#define uspoof_internalInitStatics U_ICU_ENTRY_POINT_RENAME(uspoof_internalInitStatics)
+#define uspoof_open U_ICU_ENTRY_POINT_RENAME(uspoof_open)
+#define uspoof_openCheckResult U_ICU_ENTRY_POINT_RENAME(uspoof_openCheckResult)
+#define uspoof_openFromSerialized U_ICU_ENTRY_POINT_RENAME(uspoof_openFromSerialized)
+#define uspoof_openFromSource U_ICU_ENTRY_POINT_RENAME(uspoof_openFromSource)
+#define uspoof_serialize U_ICU_ENTRY_POINT_RENAME(uspoof_serialize)
+#define uspoof_setAllowedChars U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedChars)
+#define uspoof_setAllowedLocales U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedLocales)
+#define uspoof_setAllowedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedUnicodeSet)
+#define uspoof_setChecks U_ICU_ENTRY_POINT_RENAME(uspoof_setChecks)
+#define uspoof_setRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_setRestrictionLevel)
+#define uspoof_swap U_ICU_ENTRY_POINT_RENAME(uspoof_swap)
+#define usprep_close U_ICU_ENTRY_POINT_RENAME(usprep_close)
+#define usprep_open U_ICU_ENTRY_POINT_RENAME(usprep_open)
+#define usprep_openByType U_ICU_ENTRY_POINT_RENAME(usprep_openByType)
+#define usprep_prepare U_ICU_ENTRY_POINT_RENAME(usprep_prepare)
+#define usprep_swap U_ICU_ENTRY_POINT_RENAME(usprep_swap)
+#define ustr_hashCharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashCharsN)
+#define ustr_hashICharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashICharsN)
+#define ustr_hashUCharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashUCharsN)
+#define ustrcase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ustrcase_getCaseLocale)
+#define ustrcase_getTitleBreakIterator U_ICU_ENTRY_POINT_RENAME(ustrcase_getTitleBreakIterator)
+#define ustrcase_internalFold U_ICU_ENTRY_POINT_RENAME(ustrcase_internalFold)
+#define ustrcase_internalToLower U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToLower)
+#define ustrcase_internalToTitle U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToTitle)
+#define ustrcase_internalToUpper U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToUpper)
+#define ustrcase_map U_ICU_ENTRY_POINT_RENAME(ustrcase_map)
+#define ustrcase_mapWithOverlap U_ICU_ENTRY_POINT_RENAME(ustrcase_mapWithOverlap)
+#define utext_char32At U_ICU_ENTRY_POINT_RENAME(utext_char32At)
+#define utext_clone U_ICU_ENTRY_POINT_RENAME(utext_clone)
+#define utext_close U_ICU_ENTRY_POINT_RENAME(utext_close)
+#define utext_copy U_ICU_ENTRY_POINT_RENAME(utext_copy)
+#define utext_current32 U_ICU_ENTRY_POINT_RENAME(utext_current32)
+#define utext_equals U_ICU_ENTRY_POINT_RENAME(utext_equals)
+#define utext_extract U_ICU_ENTRY_POINT_RENAME(utext_extract)
+#define utext_freeze U_ICU_ENTRY_POINT_RENAME(utext_freeze)
+#define utext_getNativeIndex U_ICU_ENTRY_POINT_RENAME(utext_getNativeIndex)
+#define utext_getPreviousNativeIndex U_ICU_ENTRY_POINT_RENAME(utext_getPreviousNativeIndex)
+#define utext_hasMetaData U_ICU_ENTRY_POINT_RENAME(utext_hasMetaData)
+#define utext_isLengthExpensive U_ICU_ENTRY_POINT_RENAME(utext_isLengthExpensive)
+#define utext_isWritable U_ICU_ENTRY_POINT_RENAME(utext_isWritable)
+#define utext_moveIndex32 U_ICU_ENTRY_POINT_RENAME(utext_moveIndex32)
+#define utext_nativeLength U_ICU_ENTRY_POINT_RENAME(utext_nativeLength)
+#define utext_next32 U_ICU_ENTRY_POINT_RENAME(utext_next32)
+#define utext_next32From U_ICU_ENTRY_POINT_RENAME(utext_next32From)
+#define utext_openCharacterIterator U_ICU_ENTRY_POINT_RENAME(utext_openCharacterIterator)
+#define utext_openConstUnicodeString U_ICU_ENTRY_POINT_RENAME(utext_openConstUnicodeString)
+#define utext_openReplaceable U_ICU_ENTRY_POINT_RENAME(utext_openReplaceable)
+#define utext_openUChars U_ICU_ENTRY_POINT_RENAME(utext_openUChars)
+#define utext_openUTF8 U_ICU_ENTRY_POINT_RENAME(utext_openUTF8)
+#define utext_openUnicodeString U_ICU_ENTRY_POINT_RENAME(utext_openUnicodeString)
+#define utext_previous32 U_ICU_ENTRY_POINT_RENAME(utext_previous32)
+#define utext_previous32From U_ICU_ENTRY_POINT_RENAME(utext_previous32From)
+#define utext_replace U_ICU_ENTRY_POINT_RENAME(utext_replace)
+#define utext_setNativeIndex U_ICU_ENTRY_POINT_RENAME(utext_setNativeIndex)
+#define utext_setup U_ICU_ENTRY_POINT_RENAME(utext_setup)
+#define utf8_appendCharSafeBody U_ICU_ENTRY_POINT_RENAME(utf8_appendCharSafeBody)
+#define utf8_back1SafeBody U_ICU_ENTRY_POINT_RENAME(utf8_back1SafeBody)
+#define utf8_countTrailBytes U_ICU_ENTRY_POINT_RENAME(utf8_countTrailBytes)
+#define utf8_nextCharSafeBody U_ICU_ENTRY_POINT_RENAME(utf8_nextCharSafeBody)
+#define utf8_prevCharSafeBody U_ICU_ENTRY_POINT_RENAME(utf8_prevCharSafeBody)
+#define utmscale_fromInt64 U_ICU_ENTRY_POINT_RENAME(utmscale_fromInt64)
+#define utmscale_getTimeScaleValue U_ICU_ENTRY_POINT_RENAME(utmscale_getTimeScaleValue)
+#define utmscale_toInt64 U_ICU_ENTRY_POINT_RENAME(utmscale_toInt64)
+#define utrace_cleanup U_ICU_ENTRY_POINT_RENAME(utrace_cleanup)
+#define utrace_data U_ICU_ENTRY_POINT_RENAME(utrace_data)
+#define utrace_entry U_ICU_ENTRY_POINT_RENAME(utrace_entry)
+#define utrace_exit U_ICU_ENTRY_POINT_RENAME(utrace_exit)
+#define utrace_format U_ICU_ENTRY_POINT_RENAME(utrace_format)
+#define utrace_functionName U_ICU_ENTRY_POINT_RENAME(utrace_functionName)
+#define utrace_getFunctions U_ICU_ENTRY_POINT_RENAME(utrace_getFunctions)
+#define utrace_getLevel U_ICU_ENTRY_POINT_RENAME(utrace_getLevel)
+#define utrace_setFunctions U_ICU_ENTRY_POINT_RENAME(utrace_setFunctions)
+#define utrace_setLevel U_ICU_ENTRY_POINT_RENAME(utrace_setLevel)
+#define utrace_vformat U_ICU_ENTRY_POINT_RENAME(utrace_vformat)
+#define utrans_clone U_ICU_ENTRY_POINT_RENAME(utrans_clone)
+#define utrans_close U_ICU_ENTRY_POINT_RENAME(utrans_close)
+#define utrans_countAvailableIDs U_ICU_ENTRY_POINT_RENAME(utrans_countAvailableIDs)
+#define utrans_getAvailableID U_ICU_ENTRY_POINT_RENAME(utrans_getAvailableID)
+#define utrans_getID U_ICU_ENTRY_POINT_RENAME(utrans_getID)
+#define utrans_getSourceSet U_ICU_ENTRY_POINT_RENAME(utrans_getSourceSet)
+#define utrans_getUnicodeID U_ICU_ENTRY_POINT_RENAME(utrans_getUnicodeID)
+#define utrans_open U_ICU_ENTRY_POINT_RENAME(utrans_open)
+#define utrans_openIDs U_ICU_ENTRY_POINT_RENAME(utrans_openIDs)
+#define utrans_openInverse U_ICU_ENTRY_POINT_RENAME(utrans_openInverse)
+#define utrans_openU U_ICU_ENTRY_POINT_RENAME(utrans_openU)
+#define utrans_register U_ICU_ENTRY_POINT_RENAME(utrans_register)
+#define utrans_rep_caseContextIterator U_ICU_ENTRY_POINT_RENAME(utrans_rep_caseContextIterator)
+#define utrans_setFilter U_ICU_ENTRY_POINT_RENAME(utrans_setFilter)
+#define utrans_stripRules U_ICU_ENTRY_POINT_RENAME(utrans_stripRules)
+#define utrans_toRules U_ICU_ENTRY_POINT_RENAME(utrans_toRules)
+#define utrans_trans U_ICU_ENTRY_POINT_RENAME(utrans_trans)
+#define utrans_transIncremental U_ICU_ENTRY_POINT_RENAME(utrans_transIncremental)
+#define utrans_transIncrementalUChars U_ICU_ENTRY_POINT_RENAME(utrans_transIncrementalUChars)
+#define utrans_transUChars U_ICU_ENTRY_POINT_RENAME(utrans_transUChars)
+#define utrans_transliterator_cleanup U_ICU_ENTRY_POINT_RENAME(utrans_transliterator_cleanup)
+#define utrans_unregister U_ICU_ENTRY_POINT_RENAME(utrans_unregister)
+#define utrans_unregisterID U_ICU_ENTRY_POINT_RENAME(utrans_unregisterID)
+#define utrie2_clone U_ICU_ENTRY_POINT_RENAME(utrie2_clone)
+#define utrie2_cloneAsThawed U_ICU_ENTRY_POINT_RENAME(utrie2_cloneAsThawed)
+#define utrie2_close U_ICU_ENTRY_POINT_RENAME(utrie2_close)
+#define utrie2_enum U_ICU_ENTRY_POINT_RENAME(utrie2_enum)
+#define utrie2_enumForLeadSurrogate U_ICU_ENTRY_POINT_RENAME(utrie2_enumForLeadSurrogate)
+#define utrie2_freeze U_ICU_ENTRY_POINT_RENAME(utrie2_freeze)
+#define utrie2_fromUTrie U_ICU_ENTRY_POINT_RENAME(utrie2_fromUTrie)
+#define utrie2_get32 U_ICU_ENTRY_POINT_RENAME(utrie2_get32)
+#define utrie2_get32FromLeadSurrogateCodeUnit U_ICU_ENTRY_POINT_RENAME(utrie2_get32FromLeadSurrogateCodeUnit)
+#define utrie2_internalU8NextIndex U_ICU_ENTRY_POINT_RENAME(utrie2_internalU8NextIndex)
+#define utrie2_internalU8PrevIndex U_ICU_ENTRY_POINT_RENAME(utrie2_internalU8PrevIndex)
+#define utrie2_isFrozen U_ICU_ENTRY_POINT_RENAME(utrie2_isFrozen)
+#define utrie2_open U_ICU_ENTRY_POINT_RENAME(utrie2_open)
+#define utrie2_openDummy U_ICU_ENTRY_POINT_RENAME(utrie2_openDummy)
+#define utrie2_openFromSerialized U_ICU_ENTRY_POINT_RENAME(utrie2_openFromSerialized)
+#define utrie2_serialize U_ICU_ENTRY_POINT_RENAME(utrie2_serialize)
+#define utrie2_set32 U_ICU_ENTRY_POINT_RENAME(utrie2_set32)
+#define utrie2_set32ForLeadSurrogateCodeUnit U_ICU_ENTRY_POINT_RENAME(utrie2_set32ForLeadSurrogateCodeUnit)
+#define utrie2_setRange32 U_ICU_ENTRY_POINT_RENAME(utrie2_setRange32)
+#define utrie2_swap U_ICU_ENTRY_POINT_RENAME(utrie2_swap)
+#define utrie_clone U_ICU_ENTRY_POINT_RENAME(utrie_clone)
+#define utrie_close U_ICU_ENTRY_POINT_RENAME(utrie_close)
+#define utrie_defaultGetFoldingOffset U_ICU_ENTRY_POINT_RENAME(utrie_defaultGetFoldingOffset)
+#define utrie_enum U_ICU_ENTRY_POINT_RENAME(utrie_enum)
+#define utrie_get32 U_ICU_ENTRY_POINT_RENAME(utrie_get32)
+#define utrie_getData U_ICU_ENTRY_POINT_RENAME(utrie_getData)
+#define utrie_open U_ICU_ENTRY_POINT_RENAME(utrie_open)
+#define utrie_serialize U_ICU_ENTRY_POINT_RENAME(utrie_serialize)
+#define utrie_set32 U_ICU_ENTRY_POINT_RENAME(utrie_set32)
+#define utrie_setRange32 U_ICU_ENTRY_POINT_RENAME(utrie_setRange32)
+#define utrie_swap U_ICU_ENTRY_POINT_RENAME(utrie_swap)
+#define utrie_swapAnyVersion U_ICU_ENTRY_POINT_RENAME(utrie_swapAnyVersion)
+#define utrie_unserialize U_ICU_ENTRY_POINT_RENAME(utrie_unserialize)
+#define utrie_unserializeDummy U_ICU_ENTRY_POINT_RENAME(utrie_unserializeDummy)
+#define vzone_clone U_ICU_ENTRY_POINT_RENAME(vzone_clone)
+#define vzone_close U_ICU_ENTRY_POINT_RENAME(vzone_close)
+#define vzone_countTransitionRules U_ICU_ENTRY_POINT_RENAME(vzone_countTransitionRules)
+#define vzone_equals U_ICU_ENTRY_POINT_RENAME(vzone_equals)
+#define vzone_getDynamicClassID U_ICU_ENTRY_POINT_RENAME(vzone_getDynamicClassID)
+#define vzone_getLastModified U_ICU_ENTRY_POINT_RENAME(vzone_getLastModified)
+#define vzone_getNextTransition U_ICU_ENTRY_POINT_RENAME(vzone_getNextTransition)
+#define vzone_getOffset U_ICU_ENTRY_POINT_RENAME(vzone_getOffset)
+#define vzone_getOffset2 U_ICU_ENTRY_POINT_RENAME(vzone_getOffset2)
+#define vzone_getOffset3 U_ICU_ENTRY_POINT_RENAME(vzone_getOffset3)
+#define vzone_getPreviousTransition U_ICU_ENTRY_POINT_RENAME(vzone_getPreviousTransition)
+#define vzone_getRawOffset U_ICU_ENTRY_POINT_RENAME(vzone_getRawOffset)
+#define vzone_getStaticClassID U_ICU_ENTRY_POINT_RENAME(vzone_getStaticClassID)
+#define vzone_getTZURL U_ICU_ENTRY_POINT_RENAME(vzone_getTZURL)
+#define vzone_hasSameRules U_ICU_ENTRY_POINT_RENAME(vzone_hasSameRules)
+#define vzone_inDaylightTime U_ICU_ENTRY_POINT_RENAME(vzone_inDaylightTime)
+#define vzone_openData U_ICU_ENTRY_POINT_RENAME(vzone_openData)
+#define vzone_openID U_ICU_ENTRY_POINT_RENAME(vzone_openID)
+#define vzone_setLastModified U_ICU_ENTRY_POINT_RENAME(vzone_setLastModified)
+#define vzone_setRawOffset U_ICU_ENTRY_POINT_RENAME(vzone_setRawOffset)
+#define vzone_setTZURL U_ICU_ENTRY_POINT_RENAME(vzone_setTZURL)
+#define vzone_useDaylightTime U_ICU_ENTRY_POINT_RENAME(vzone_useDaylightTime)
+#define vzone_write U_ICU_ENTRY_POINT_RENAME(vzone_write)
+#define vzone_writeFromStart U_ICU_ENTRY_POINT_RENAME(vzone_writeFromStart)
+#define vzone_writeSimple U_ICU_ENTRY_POINT_RENAME(vzone_writeSimple)
+#define zrule_close U_ICU_ENTRY_POINT_RENAME(zrule_close)
+#define zrule_equals U_ICU_ENTRY_POINT_RENAME(zrule_equals)
+#define zrule_getDSTSavings U_ICU_ENTRY_POINT_RENAME(zrule_getDSTSavings)
+#define zrule_getName U_ICU_ENTRY_POINT_RENAME(zrule_getName)
+#define zrule_getRawOffset U_ICU_ENTRY_POINT_RENAME(zrule_getRawOffset)
+#define zrule_isEquivalentTo U_ICU_ENTRY_POINT_RENAME(zrule_isEquivalentTo)
+#define ztrans_adoptFrom U_ICU_ENTRY_POINT_RENAME(ztrans_adoptFrom)
+#define ztrans_adoptTo U_ICU_ENTRY_POINT_RENAME(ztrans_adoptTo)
+#define ztrans_clone U_ICU_ENTRY_POINT_RENAME(ztrans_clone)
+#define ztrans_close U_ICU_ENTRY_POINT_RENAME(ztrans_close)
+#define ztrans_equals U_ICU_ENTRY_POINT_RENAME(ztrans_equals)
+#define ztrans_getDynamicClassID U_ICU_ENTRY_POINT_RENAME(ztrans_getDynamicClassID)
+#define ztrans_getFrom U_ICU_ENTRY_POINT_RENAME(ztrans_getFrom)
+#define ztrans_getStaticClassID U_ICU_ENTRY_POINT_RENAME(ztrans_getStaticClassID)
+#define ztrans_getTime U_ICU_ENTRY_POINT_RENAME(ztrans_getTime)
+#define ztrans_getTo U_ICU_ENTRY_POINT_RENAME(ztrans_getTo)
+#define ztrans_open U_ICU_ENTRY_POINT_RENAME(ztrans_open)
+#define ztrans_openEmpty U_ICU_ENTRY_POINT_RENAME(ztrans_openEmpty)
+#define ztrans_setFrom U_ICU_ENTRY_POINT_RENAME(ztrans_setFrom)
+#define ztrans_setTime U_ICU_ENTRY_POINT_RENAME(ztrans_setTime)
+#define ztrans_setTo U_ICU_ENTRY_POINT_RENAME(ztrans_setTo)
+
+#endif /* !(defined(_MSC_VER) && defined(__INTELLISENSE__)) */
+#endif /* U_DISABLE_RENAMING */
+#endif /* URENAME_H */
+
diff --git a/thirdparty/icu4c/common/unicode/urep.h b/thirdparty/icu4c/common/unicode/urep.h
new file mode 100644
index 0000000000..932202ddb0
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/urep.h
@@ -0,0 +1,157 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 1997-2010, International Business Machines
+* Corporation and others. All Rights Reserved.
+******************************************************************************
+* Date Name Description
+* 06/23/00 aliu Creation.
+******************************************************************************
+*/
+
+#ifndef __UREP_H
+#define __UREP_H
+
+#include "unicode/utypes.h"
+
+U_CDECL_BEGIN
+
+/********************************************************************
+ * General Notes
+ ********************************************************************
+ * TODO
+ * Add usage scenario
+ * Add test code
+ * Talk about pinning
+ * Talk about "can truncate result if out of memory"
+ */
+
+/********************************************************************
+ * Data Structures
+ ********************************************************************/
+/**
+ * \file
+ * \brief C API: Callbacks for UReplaceable
+ */
+/**
+ * An opaque replaceable text object. This will be manipulated only
+ * through the caller-supplied UReplaceableFunctor struct. Related
+ * to the C++ class Replaceable.
+ * This is currently only used in the Transliterator C API, see utrans.h .
+ * @stable ICU 2.0
+ */
+typedef void* UReplaceable;
+
+/**
+ * A set of function pointers that transliterators use to manipulate a
+ * UReplaceable. The caller should supply the required functions to
+ * manipulate their text appropriately. Related to the C++ class
+ * Replaceable.
+ * @stable ICU 2.0
+ */
+typedef struct UReplaceableCallbacks {
+
+ /**
+ * Function pointer that returns the number of UChar code units in
+ * this text.
+ *
+ * @param rep A pointer to "this" UReplaceable object.
+ * @return The length of the text.
+ * @stable ICU 2.0
+ */
+ int32_t (*length)(const UReplaceable* rep);
+
+ /**
+ * Function pointer that returns a UChar code units at the given
+ * offset into this text; 0 <= offset < n, where n is the value
+ * returned by (*length)(rep). See unistr.h for a description of
+ * charAt() vs. char32At().
+ *
+ * @param rep A pointer to "this" UReplaceable object.
+ * @param offset The index at which to fetch the UChar (code unit).
+ * @return The UChar (code unit) at offset, or U+FFFF if the offset is out of bounds.
+ * @stable ICU 2.0
+ */
+ UChar (*charAt)(const UReplaceable* rep,
+ int32_t offset);
+
+ /**
+ * Function pointer that returns a UChar32 code point at the given
+ * offset into this text. See unistr.h for a description of
+ * charAt() vs. char32At().
+ *
+ * @param rep A pointer to "this" UReplaceable object.
+ * @param offset The index at which to fetch the UChar32 (code point).
+ * @return The UChar32 (code point) at offset, or U+FFFF if the offset is out of bounds.
+ * @stable ICU 2.0
+ */
+ UChar32 (*char32At)(const UReplaceable* rep,
+ int32_t offset);
+
+ /**
+ * Function pointer that replaces text between start and limit in
+ * this text with the given text. Attributes (out of band info)
+ * should be retained.
+ *
+ * @param rep A pointer to "this" UReplaceable object.
+ * @param start the starting index of the text to be replaced,
+ * inclusive.
+ * @param limit the ending index of the text to be replaced,
+ * exclusive.
+ * @param text the new text to replace the UChars from
+ * start..limit-1.
+ * @param textLength the number of UChars at text, or -1 if text
+ * is null-terminated.
+ * @stable ICU 2.0
+ */
+ void (*replace)(UReplaceable* rep,
+ int32_t start,
+ int32_t limit,
+ const UChar* text,
+ int32_t textLength);
+
+ /**
+ * Function pointer that copies the characters in the range
+ * [<tt>start</tt>, <tt>limit</tt>) into the array <tt>dst</tt>.
+ *
+ * @param rep A pointer to "this" UReplaceable object.
+ * @param start offset of first character which will be copied
+ * into the array
+ * @param limit offset immediately following the last character to
+ * be copied
+ * @param dst array in which to copy characters. The length of
+ * <tt>dst</tt> must be at least <tt>(limit - start)</tt>.
+ * @stable ICU 2.1
+ */
+ void (*extract)(UReplaceable* rep,
+ int32_t start,
+ int32_t limit,
+ UChar* dst);
+
+ /**
+ * Function pointer that copies text between start and limit in
+ * this text to another index in the text. Attributes (out of
+ * band info) should be retained. After this call, there will be
+ * (at least) two copies of the characters originally located at
+ * start..limit-1.
+ *
+ * @param rep A pointer to "this" UReplaceable object.
+ * @param start the starting index of the text to be copied,
+ * inclusive.
+ * @param limit the ending index of the text to be copied,
+ * exclusive.
+ * @param dest the index at which the copy of the UChars should be
+ * inserted.
+ * @stable ICU 2.0
+ */
+ void (*copy)(UReplaceable* rep,
+ int32_t start,
+ int32_t limit,
+ int32_t dest);
+
+} UReplaceableCallbacks;
+
+U_CDECL_END
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/ures.h b/thirdparty/icu4c/common/unicode/ures.h
new file mode 100644
index 0000000000..fff84043e8
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/ures.h
@@ -0,0 +1,911 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1997-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File URES.H (formerly CRESBUND.H)
+*
+* Modification History:
+*
+* Date Name Description
+* 04/01/97 aliu Creation.
+* 02/22/99 damiba overhaul.
+* 04/04/99 helena Fixed internal header inclusion.
+* 04/15/99 Madhu Updated Javadoc
+* 06/14/99 stephen Removed functions taking a filename suffix.
+* 07/20/99 stephen Language-independent typedef to void*
+* 11/09/99 weiv Added ures_getLocale()
+* 06/24/02 weiv Added support for resource sharing
+******************************************************************************
+*/
+
+#ifndef URES_H
+#define URES_H
+
+#include "unicode/utypes.h"
+#include "unicode/uloc.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ * \file
+ * \brief C API: Resource Bundle
+ *
+ * <h2>C API: Resource Bundle</h2>
+ *
+ * C API representing a collection of resource information pertaining to a given
+ * locale. A resource bundle provides a way of accessing locale- specific information in
+ * a data file. You create a resource bundle that manages the resources for a given
+ * locale and then ask it for individual resources.
+ * <P>
+ * Resource bundles in ICU4C are currently defined using text files which conform to the following
+ * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/bnf_rb.txt">BNF definition</a>.
+ * More on resource bundle concepts and syntax can be found in the
+ * <a href="http://icu-project.org/userguide/ResourceManagement.html">Users Guide</a>.
+ * <P>
+ */
+
+/**
+ * UResourceBundle is an opaque type for handles for resource bundles in C APIs.
+ * @stable ICU 2.0
+ */
+struct UResourceBundle;
+
+/**
+ * @stable ICU 2.0
+ */
+typedef struct UResourceBundle UResourceBundle;
+
+/**
+ * Numeric constants for types of resource items.
+ * @see ures_getType
+ * @stable ICU 2.0
+ */
+typedef enum {
+ /** Resource type constant for "no resource". @stable ICU 2.6 */
+ URES_NONE=-1,
+
+ /** Resource type constant for 16-bit Unicode strings. @stable ICU 2.6 */
+ URES_STRING=0,
+
+ /** Resource type constant for binary data. @stable ICU 2.6 */
+ URES_BINARY=1,
+
+ /** Resource type constant for tables of key-value pairs. @stable ICU 2.6 */
+ URES_TABLE=2,
+
+ /**
+ * Resource type constant for aliases;
+ * internally stores a string which identifies the actual resource
+ * storing the data (can be in a different resource bundle).
+ * Resolved internally before delivering the actual resource through the API.
+ * @stable ICU 2.6
+ */
+ URES_ALIAS=3,
+
+ /**
+ * Resource type constant for a single 28-bit integer, interpreted as
+ * signed or unsigned by the ures_getInt() or ures_getUInt() function.
+ * @see ures_getInt
+ * @see ures_getUInt
+ * @stable ICU 2.6
+ */
+ URES_INT=7,
+
+ /** Resource type constant for arrays of resources. @stable ICU 2.6 */
+ URES_ARRAY=8,
+
+ /**
+ * Resource type constant for vectors of 32-bit integers.
+ * @see ures_getIntVector
+ * @stable ICU 2.6
+ */
+ URES_INT_VECTOR = 14,
+#ifndef U_HIDE_DEPRECATED_API
+ /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+ RES_NONE=URES_NONE,
+ /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+ RES_STRING=URES_STRING,
+ /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+ RES_BINARY=URES_BINARY,
+ /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+ RES_TABLE=URES_TABLE,
+ /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+ RES_ALIAS=URES_ALIAS,
+ /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+ RES_INT=URES_INT,
+ /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+ RES_ARRAY=URES_ARRAY,
+ /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+ RES_INT_VECTOR=URES_INT_VECTOR,
+ /** @deprecated ICU 2.6 Not used. */
+ RES_RESERVED=15,
+
+ /**
+ * One more than the highest normal UResType value.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ URES_LIMIT = 16
+#endif // U_HIDE_DEPRECATED_API
+} UResType;
+
+/*
+ * Functions to create and destroy resource bundles.
+ */
+
+/**
+ * Opens a UResourceBundle, from which users can extract strings by using
+ * their corresponding keys.
+ * Note that the caller is responsible of calling <TT>ures_close</TT> on each successfully
+ * opened resource bundle.
+ * @param packageName The packageName and locale together point to an ICU udata object,
+ * as defined by <code> udata_open( packageName, "res", locale, err) </code>
+ * or equivalent. Typically, packageName will refer to a (.dat) file, or to
+ * a package registered with udata_setAppData(). Using a full file or directory
+ * pathname for packageName is deprecated. If NULL, ICU data will be used.
+ * @param locale specifies the locale for which we want to open the resource
+ * if NULL, the default locale will be used. If strlen(locale) == 0
+ * root locale will be used.
+ *
+ * @param status fills in the outgoing error code.
+ * The UErrorCode err parameter is used to return status information to the user. To
+ * check whether the construction succeeded or not, you should check the value of
+ * U_SUCCESS(err). If you wish more detailed information, you can check for
+ * informational status results which still indicate success. U_USING_FALLBACK_WARNING
+ * indicates that a fall back locale was used. For example, 'de_CH' was requested,
+ * but nothing was found there, so 'de' was used. U_USING_DEFAULT_WARNING indicates that
+ * the default locale data or root locale data was used; neither the requested locale
+ * nor any of its fall back locales could be found. Please see the users guide for more
+ * information on this topic.
+ * @return a newly allocated resource bundle.
+ * @see ures_close
+ * @stable ICU 2.0
+ */
+U_CAPI UResourceBundle* U_EXPORT2
+ures_open(const char* packageName,
+ const char* locale,
+ UErrorCode* status);
+
+
+/** This function does not care what kind of localeID is passed in. It simply opens a bundle with
+ * that name. Fallback mechanism is disabled for the new bundle. If the requested bundle contains
+ * an %%ALIAS directive, the results are undefined.
+ * @param packageName The packageName and locale together point to an ICU udata object,
+ * as defined by <code> udata_open( packageName, "res", locale, err) </code>
+ * or equivalent. Typically, packageName will refer to a (.dat) file, or to
+ * a package registered with udata_setAppData(). Using a full file or directory
+ * pathname for packageName is deprecated. If NULL, ICU data will be used.
+ * @param locale specifies the locale for which we want to open the resource
+ * if NULL, the default locale will be used. If strlen(locale) == 0
+ * root locale will be used.
+ *
+ * @param status fills in the outgoing error code. Either U_ZERO_ERROR or U_MISSING_RESOURCE_ERROR
+ * @return a newly allocated resource bundle or NULL if it doesn't exist.
+ * @see ures_close
+ * @stable ICU 2.0
+ */
+U_CAPI UResourceBundle* U_EXPORT2
+ures_openDirect(const char* packageName,
+ const char* locale,
+ UErrorCode* status);
+
+/**
+ * Same as ures_open() but takes a const UChar *path.
+ * This path will be converted to char * using the default converter,
+ * then ures_open() is called.
+ *
+ * @param packageName The packageName and locale together point to an ICU udata object,
+ * as defined by <code> udata_open( packageName, "res", locale, err) </code>
+ * or equivalent. Typically, packageName will refer to a (.dat) file, or to
+ * a package registered with udata_setAppData(). Using a full file or directory
+ * pathname for packageName is deprecated. If NULL, ICU data will be used.
+ * @param locale specifies the locale for which we want to open the resource
+ * if NULL, the default locale will be used. If strlen(locale) == 0
+ * root locale will be used.
+ * @param status fills in the outgoing error code.
+ * @return a newly allocated resource bundle.
+ * @see ures_open
+ * @stable ICU 2.0
+ */
+U_CAPI UResourceBundle* U_EXPORT2
+ures_openU(const UChar* packageName,
+ const char* locale,
+ UErrorCode* status);
+
+#ifndef U_HIDE_DEPRECATED_API
+/**
+ * Returns the number of strings/arrays in resource bundles.
+ * Better to use ures_getSize, as this function will be deprecated.
+ *
+ *@param resourceBundle resource bundle containing the desired strings
+ *@param resourceKey key tagging the resource
+ *@param err fills in the outgoing error code
+ * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ * could be a non-failing error
+ * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_FALLBACK_WARNING </TT>
+ *@return: for <STRONG>Arrays</STRONG>: returns the number of resources in the array
+ * <STRONG>Tables</STRONG>: returns the number of resources in the table
+ * <STRONG>single string</STRONG>: returns 1
+ *@see ures_getSize
+ * @deprecated ICU 2.8 User ures_getSize instead
+ */
+U_DEPRECATED int32_t U_EXPORT2
+ures_countArrayItems(const UResourceBundle* resourceBundle,
+ const char* resourceKey,
+ UErrorCode* err);
+#endif /* U_HIDE_DEPRECATED_API */
+
+/**
+ * Close a resource bundle, all pointers returned from the various ures_getXXX calls
+ * on this particular bundle should be considered invalid henceforth.
+ *
+ * @param resourceBundle a pointer to a resourceBundle struct. Can be NULL.
+ * @see ures_open
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ures_close(UResourceBundle* resourceBundle);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUResourceBundlePointer
+ * "Smart pointer" class, closes a UResourceBundle via ures_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUResourceBundlePointer, UResourceBundle, ures_close);
+
+U_NAMESPACE_END
+
+#endif
+
+#ifndef U_HIDE_DEPRECATED_API
+/**
+ * Return the version number associated with this ResourceBundle as a string. Please
+ * use ures_getVersion as this function is going to be deprecated.
+ *
+ * @param resourceBundle The resource bundle for which the version is checked.
+ * @return A version number string as specified in the resource bundle or its parent.
+ * The caller does not own this string.
+ * @see ures_getVersion
+ * @deprecated ICU 2.8 Use ures_getVersion instead.
+ */
+U_DEPRECATED const char* U_EXPORT2
+ures_getVersionNumber(const UResourceBundle* resourceBundle);
+#endif /* U_HIDE_DEPRECATED_API */
+
+/**
+ * Return the version number associated with this ResourceBundle as an
+ * UVersionInfo array.
+ *
+ * @param resB The resource bundle for which the version is checked.
+ * @param versionInfo A UVersionInfo array that is filled with the version number
+ * as specified in the resource bundle or its parent.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ures_getVersion(const UResourceBundle* resB,
+ UVersionInfo versionInfo);
+
+#ifndef U_HIDE_DEPRECATED_API
+/**
+ * Return the name of the Locale associated with this ResourceBundle. This API allows
+ * you to query for the real locale of the resource. For example, if you requested
+ * "en_US_CALIFORNIA" and only "en_US" bundle exists, "en_US" will be returned.
+ * For subresources, the locale where this resource comes from will be returned.
+ * If fallback has occurred, getLocale will reflect this.
+ *
+ * @param resourceBundle resource bundle in question
+ * @param status just for catching illegal arguments
+ * @return A Locale name
+ * @deprecated ICU 2.8 Use ures_getLocaleByType instead.
+ */
+U_DEPRECATED const char* U_EXPORT2
+ures_getLocale(const UResourceBundle* resourceBundle,
+ UErrorCode* status);
+#endif /* U_HIDE_DEPRECATED_API */
+
+/**
+ * Return the name of the Locale associated with this ResourceBundle.
+ * You can choose between requested, valid and real locale.
+ *
+ * @param resourceBundle resource bundle in question
+ * @param type You can choose between requested, valid and actual
+ * locale. For description see the definition of
+ * ULocDataLocaleType in uloc.h
+ * @param status just for catching illegal arguments
+ * @return A Locale name
+ * @stable ICU 2.8
+ */
+U_CAPI const char* U_EXPORT2
+ures_getLocaleByType(const UResourceBundle* resourceBundle,
+ ULocDataLocaleType type,
+ UErrorCode* status);
+
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * Same as ures_open() but uses the fill-in parameter instead of allocating a new bundle.
+ *
+ * TODO need to revisit usefulness of this function
+ * and usage model for fillIn parameters without knowing sizeof(UResourceBundle)
+ * @param r The existing UResourceBundle to fill in. If NULL then status will be
+ * set to U_ILLEGAL_ARGUMENT_ERROR.
+ * @param packageName The packageName and locale together point to an ICU udata object,
+ * as defined by <code> udata_open( packageName, "res", locale, err) </code>
+ * or equivalent. Typically, packageName will refer to a (.dat) file, or to
+ * a package registered with udata_setAppData(). Using a full file or directory
+ * pathname for packageName is deprecated. If NULL, ICU data will be used.
+ * @param localeID specifies the locale for which we want to open the resource
+ * @param status The error code.
+ * @internal
+ */
+U_CAPI void U_EXPORT2
+ures_openFillIn(UResourceBundle *r,
+ const char* packageName,
+ const char* localeID,
+ UErrorCode* status);
+#endif /* U_HIDE_INTERNAL_API */
+
+/**
+ * Returns a string from a string resource type
+ *
+ * @param resourceBundle a string resource
+ * @param len fills in the length of resulting string
+ * @param status fills in the outgoing error code
+ * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ * Always check the value of status. Don't count on returning NULL.
+ * could be a non-failing error
+ * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
+ * @see ures_getBinary
+ * @see ures_getIntVector
+ * @see ures_getInt
+ * @see ures_getUInt
+ * @stable ICU 2.0
+ */
+U_CAPI const UChar* U_EXPORT2
+ures_getString(const UResourceBundle* resourceBundle,
+ int32_t* len,
+ UErrorCode* status);
+
+/**
+ * Returns a UTF-8 string from a string resource.
+ * The UTF-8 string may be returnable directly as a pointer, or
+ * it may need to be copied, or transformed from UTF-16 using u_strToUTF8()
+ * or equivalent.
+ *
+ * If forceCopy==true, then the string is always written to the dest buffer
+ * and dest is returned.
+ *
+ * If forceCopy==false, then the string is returned as a pointer if possible,
+ * without needing a dest buffer (it can be NULL). If the string needs to be
+ * copied or transformed, then it may be placed into dest at an arbitrary offset.
+ *
+ * If the string is to be written to dest, then U_BUFFER_OVERFLOW_ERROR and
+ * U_STRING_NOT_TERMINATED_WARNING are set if appropriate, as usual.
+ *
+ * If the string is transformed from UTF-16, then a conversion error may occur
+ * if an unpaired surrogate is encountered. If the function is successful, then
+ * the output UTF-8 string is always well-formed.
+ *
+ * @param resB Resource bundle.
+ * @param dest Destination buffer. Can be NULL only if capacity=*length==0.
+ * @param length Input: Capacity of destination buffer.
+ * Output: Actual length of the UTF-8 string, not counting the
+ * terminating NUL, even in case of U_BUFFER_OVERFLOW_ERROR.
+ * Can be NULL, meaning capacity=0 and the string length is not
+ * returned to the caller.
+ * @param forceCopy If true, then the output string will always be written to
+ * dest, with U_BUFFER_OVERFLOW_ERROR and
+ * U_STRING_NOT_TERMINATED_WARNING set if appropriate.
+ * If false, then the dest buffer may or may not contain a
+ * copy of the string. dest may or may not be modified.
+ * If a copy needs to be written, then the UErrorCode parameter
+ * indicates overflow etc. as usual.
+ * @param status Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to the UTF-8 string. It may be dest, or at some offset
+ * from dest (only if !forceCopy), or in unrelated memory.
+ * Always NUL-terminated unless the string was written to dest and
+ * length==capacity (in which case U_STRING_NOT_TERMINATED_WARNING is set).
+ *
+ * @see ures_getString
+ * @see u_strToUTF8
+ * @stable ICU 3.6
+ */
+U_CAPI const char * U_EXPORT2
+ures_getUTF8String(const UResourceBundle *resB,
+ char *dest, int32_t *length,
+ UBool forceCopy,
+ UErrorCode *status);
+
+/**
+ * Returns a binary data from a binary resource.
+ *
+ * @param resourceBundle a string resource
+ * @param len fills in the length of resulting byte chunk
+ * @param status fills in the outgoing error code
+ * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ * Always check the value of status. Don't count on returning NULL.
+ * could be a non-failing error
+ * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return a pointer to a chunk of unsigned bytes which live in a memory mapped/DLL file.
+ * @see ures_getString
+ * @see ures_getIntVector
+ * @see ures_getInt
+ * @see ures_getUInt
+ * @stable ICU 2.0
+ */
+U_CAPI const uint8_t* U_EXPORT2
+ures_getBinary(const UResourceBundle* resourceBundle,
+ int32_t* len,
+ UErrorCode* status);
+
+/**
+ * Returns a 32 bit integer array from a resource.
+ *
+ * @param resourceBundle an int vector resource
+ * @param len fills in the length of resulting byte chunk
+ * @param status fills in the outgoing error code
+ * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ * Always check the value of status. Don't count on returning NULL.
+ * could be a non-failing error
+ * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return a pointer to a chunk of integers which live in a memory mapped/DLL file.
+ * @see ures_getBinary
+ * @see ures_getString
+ * @see ures_getInt
+ * @see ures_getUInt
+ * @stable ICU 2.0
+ */
+U_CAPI const int32_t* U_EXPORT2
+ures_getIntVector(const UResourceBundle* resourceBundle,
+ int32_t* len,
+ UErrorCode* status);
+
+/**
+ * Returns an unsigned integer from a resource.
+ * This integer is originally 28 bits.
+ *
+ * @param resourceBundle a string resource
+ * @param status fills in the outgoing error code
+ * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ * could be a non-failing error
+ * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return an integer value
+ * @see ures_getInt
+ * @see ures_getIntVector
+ * @see ures_getBinary
+ * @see ures_getString
+ * @stable ICU 2.0
+ */
+U_CAPI uint32_t U_EXPORT2
+ures_getUInt(const UResourceBundle* resourceBundle,
+ UErrorCode *status);
+
+/**
+ * Returns a signed integer from a resource.
+ * This integer is originally 28 bit and the sign gets propagated.
+ *
+ * @param resourceBundle a string resource
+ * @param status fills in the outgoing error code
+ * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ * could be a non-failing error
+ * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return an integer value
+ * @see ures_getUInt
+ * @see ures_getIntVector
+ * @see ures_getBinary
+ * @see ures_getString
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ures_getInt(const UResourceBundle* resourceBundle,
+ UErrorCode *status);
+
+/**
+ * Returns the size of a resource. Size for scalar types is always 1,
+ * and for vector/table types is the number of child resources.
+ * @warning Integer array is treated as a scalar type. There are no
+ * APIs to access individual members of an integer array. It
+ * is always returned as a whole.
+ * @param resourceBundle a resource
+ * @return number of resources in a given resource.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ures_getSize(const UResourceBundle *resourceBundle);
+
+/**
+ * Returns the type of a resource. Available types are defined in enum UResType
+ *
+ * @param resourceBundle a resource
+ * @return type of the given resource.
+ * @see UResType
+ * @stable ICU 2.0
+ */
+U_CAPI UResType U_EXPORT2
+ures_getType(const UResourceBundle *resourceBundle);
+
+/**
+ * Returns the key associated with a given resource. Not all the resources have a key - only
+ * those that are members of a table.
+ *
+ * @param resourceBundle a resource
+ * @return a key associated to this resource, or NULL if it doesn't have a key
+ * @stable ICU 2.0
+ */
+U_CAPI const char * U_EXPORT2
+ures_getKey(const UResourceBundle *resourceBundle);
+
+/* ITERATION API
+ This API provides means for iterating through a resource
+*/
+
+/**
+ * Resets the internal context of a resource so that iteration starts from the first element.
+ *
+ * @param resourceBundle a resource
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ures_resetIterator(UResourceBundle *resourceBundle);
+
+/**
+ * Checks whether the given resource has another element to iterate over.
+ *
+ * @param resourceBundle a resource
+ * @return true if there are more elements, false if there is no more elements
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+ures_hasNext(const UResourceBundle *resourceBundle);
+
+/**
+ * Returns the next resource in a given resource or NULL if there are no more resources
+ * to iterate over. Features a fill-in parameter.
+ *
+ * @param resourceBundle a resource
+ * @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller.
+ * Alternatively, you can supply a struct to be filled by this function.
+ * @param status fills in the outgoing error code. You may still get a non NULL result even if an
+ * error occurred. Check status instead.
+ * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it
+ * @stable ICU 2.0
+ */
+U_CAPI UResourceBundle* U_EXPORT2
+ures_getNextResource(UResourceBundle *resourceBundle,
+ UResourceBundle *fillIn,
+ UErrorCode *status);
+
+/**
+ * Returns the next string in a given resource or NULL if there are no more resources
+ * to iterate over.
+ *
+ * @param resourceBundle a resource
+ * @param len fill in length of the string
+ * @param key fill in for key associated with this string. NULL if no key
+ * @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't
+ * count on it. Check status instead!
+ * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
+ * @stable ICU 2.0
+ */
+U_CAPI const UChar* U_EXPORT2
+ures_getNextString(UResourceBundle *resourceBundle,
+ int32_t* len,
+ const char ** key,
+ UErrorCode *status);
+
+/**
+ * Returns the resource in a given resource at the specified index. Features a fill-in parameter.
+ *
+ * @param resourceBundle the resource bundle from which to get a sub-resource
+ * @param indexR an index to the wanted resource.
+ * @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller.
+ * Alternatively, you can supply a struct to be filled by this function.
+ * @param status fills in the outgoing error code. Don't count on NULL being returned if an error has
+ * occurred. Check status instead.
+ * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it
+ * @stable ICU 2.0
+ */
+U_CAPI UResourceBundle* U_EXPORT2
+ures_getByIndex(const UResourceBundle *resourceBundle,
+ int32_t indexR,
+ UResourceBundle *fillIn,
+ UErrorCode *status);
+
+/**
+ * Returns the string in a given resource at the specified index.
+ *
+ * @param resourceBundle a resource
+ * @param indexS an index to the wanted string.
+ * @param len fill in length of the string
+ * @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't
+ * count on it. Check status instead!
+ * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
+ * @stable ICU 2.0
+ */
+U_CAPI const UChar* U_EXPORT2
+ures_getStringByIndex(const UResourceBundle *resourceBundle,
+ int32_t indexS,
+ int32_t* len,
+ UErrorCode *status);
+
+/**
+ * Returns a UTF-8 string from a resource at the specified index.
+ * The UTF-8 string may be returnable directly as a pointer, or
+ * it may need to be copied, or transformed from UTF-16 using u_strToUTF8()
+ * or equivalent.
+ *
+ * If forceCopy==true, then the string is always written to the dest buffer
+ * and dest is returned.
+ *
+ * If forceCopy==false, then the string is returned as a pointer if possible,
+ * without needing a dest buffer (it can be NULL). If the string needs to be
+ * copied or transformed, then it may be placed into dest at an arbitrary offset.
+ *
+ * If the string is to be written to dest, then U_BUFFER_OVERFLOW_ERROR and
+ * U_STRING_NOT_TERMINATED_WARNING are set if appropriate, as usual.
+ *
+ * If the string is transformed from UTF-16, then a conversion error may occur
+ * if an unpaired surrogate is encountered. If the function is successful, then
+ * the output UTF-8 string is always well-formed.
+ *
+ * @param resB Resource bundle.
+ * @param stringIndex An index to the wanted string.
+ * @param dest Destination buffer. Can be NULL only if capacity=*length==0.
+ * @param pLength Input: Capacity of destination buffer.
+ * Output: Actual length of the UTF-8 string, not counting the
+ * terminating NUL, even in case of U_BUFFER_OVERFLOW_ERROR.
+ * Can be NULL, meaning capacity=0 and the string length is not
+ * returned to the caller.
+ * @param forceCopy If true, then the output string will always be written to
+ * dest, with U_BUFFER_OVERFLOW_ERROR and
+ * U_STRING_NOT_TERMINATED_WARNING set if appropriate.
+ * If false, then the dest buffer may or may not contain a
+ * copy of the string. dest may or may not be modified.
+ * If a copy needs to be written, then the UErrorCode parameter
+ * indicates overflow etc. as usual.
+ * @param status Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to the UTF-8 string. It may be dest, or at some offset
+ * from dest (only if !forceCopy), or in unrelated memory.
+ * Always NUL-terminated unless the string was written to dest and
+ * length==capacity (in which case U_STRING_NOT_TERMINATED_WARNING is set).
+ *
+ * @see ures_getStringByIndex
+ * @see u_strToUTF8
+ * @stable ICU 3.6
+ */
+U_CAPI const char * U_EXPORT2
+ures_getUTF8StringByIndex(const UResourceBundle *resB,
+ int32_t stringIndex,
+ char *dest, int32_t *pLength,
+ UBool forceCopy,
+ UErrorCode *status);
+
+/**
+ * Returns a resource in a given resource that has a given key. This procedure works only with table
+ * resources. Features a fill-in parameter.
+ *
+ * @param resourceBundle a resource
+ * @param key a key associated with the wanted resource
+ * @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller.
+ * Alternatively, you can supply a struct to be filled by this function.
+ * @param status fills in the outgoing error code.
+ * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it
+ * @stable ICU 2.0
+ */
+U_CAPI UResourceBundle* U_EXPORT2
+ures_getByKey(const UResourceBundle *resourceBundle,
+ const char* key,
+ UResourceBundle *fillIn,
+ UErrorCode *status);
+
+/**
+ * Returns a string in a given resource that has a given key. This procedure works only with table
+ * resources.
+ *
+ * @param resB a resource
+ * @param key a key associated with the wanted string
+ * @param len fill in length of the string
+ * @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't
+ * count on it. Check status instead!
+ * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
+ * @stable ICU 2.0
+ */
+U_CAPI const UChar* U_EXPORT2
+ures_getStringByKey(const UResourceBundle *resB,
+ const char* key,
+ int32_t* len,
+ UErrorCode *status);
+
+/**
+ * Returns a UTF-8 string from a resource and a key.
+ * This function works only with table resources.
+ *
+ * The UTF-8 string may be returnable directly as a pointer, or
+ * it may need to be copied, or transformed from UTF-16 using u_strToUTF8()
+ * or equivalent.
+ *
+ * If forceCopy==true, then the string is always written to the dest buffer
+ * and dest is returned.
+ *
+ * If forceCopy==false, then the string is returned as a pointer if possible,
+ * without needing a dest buffer (it can be NULL). If the string needs to be
+ * copied or transformed, then it may be placed into dest at an arbitrary offset.
+ *
+ * If the string is to be written to dest, then U_BUFFER_OVERFLOW_ERROR and
+ * U_STRING_NOT_TERMINATED_WARNING are set if appropriate, as usual.
+ *
+ * If the string is transformed from UTF-16, then a conversion error may occur
+ * if an unpaired surrogate is encountered. If the function is successful, then
+ * the output UTF-8 string is always well-formed.
+ *
+ * @param resB Resource bundle.
+ * @param key A key associated with the wanted resource
+ * @param dest Destination buffer. Can be NULL only if capacity=*length==0.
+ * @param pLength Input: Capacity of destination buffer.
+ * Output: Actual length of the UTF-8 string, not counting the
+ * terminating NUL, even in case of U_BUFFER_OVERFLOW_ERROR.
+ * Can be NULL, meaning capacity=0 and the string length is not
+ * returned to the caller.
+ * @param forceCopy If true, then the output string will always be written to
+ * dest, with U_BUFFER_OVERFLOW_ERROR and
+ * U_STRING_NOT_TERMINATED_WARNING set if appropriate.
+ * If false, then the dest buffer may or may not contain a
+ * copy of the string. dest may or may not be modified.
+ * If a copy needs to be written, then the UErrorCode parameter
+ * indicates overflow etc. as usual.
+ * @param status Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to the UTF-8 string. It may be dest, or at some offset
+ * from dest (only if !forceCopy), or in unrelated memory.
+ * Always NUL-terminated unless the string was written to dest and
+ * length==capacity (in which case U_STRING_NOT_TERMINATED_WARNING is set).
+ *
+ * @see ures_getStringByKey
+ * @see u_strToUTF8
+ * @stable ICU 3.6
+ */
+U_CAPI const char * U_EXPORT2
+ures_getUTF8StringByKey(const UResourceBundle *resB,
+ const char *key,
+ char *dest, int32_t *pLength,
+ UBool forceCopy,
+ UErrorCode *status);
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/unistr.h"
+
+U_NAMESPACE_BEGIN
+/**
+ * Returns the string value from a string resource bundle.
+ *
+ * @param resB a resource, should have type URES_STRING
+ * @param status: fills in the outgoing error code
+ * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ * could be a non-failing error
+ * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return The string value, or a bogus string if there is a failure UErrorCode.
+ * @stable ICU 2.0
+ */
+inline UnicodeString
+ures_getUnicodeString(const UResourceBundle *resB, UErrorCode* status) {
+ UnicodeString result;
+ int32_t len = 0;
+ const UChar *r = ures_getString(resB, &len, status);
+ if(U_SUCCESS(*status)) {
+ result.setTo(true, r, len);
+ } else {
+ result.setToBogus();
+ }
+ return result;
+}
+
+/**
+ * Returns the next string in a resource, or an empty string if there are no more resources
+ * to iterate over.
+ * Use ures_getNextString() instead to distinguish between
+ * the end of the iteration and a real empty string value.
+ *
+ * @param resB a resource
+ * @param key fill in for key associated with this string
+ * @param status fills in the outgoing error code
+ * @return The string value, or a bogus string if there is a failure UErrorCode.
+ * @stable ICU 2.0
+ */
+inline UnicodeString
+ures_getNextUnicodeString(UResourceBundle *resB, const char ** key, UErrorCode* status) {
+ UnicodeString result;
+ int32_t len = 0;
+ const UChar* r = ures_getNextString(resB, &len, key, status);
+ if(U_SUCCESS(*status)) {
+ result.setTo(true, r, len);
+ } else {
+ result.setToBogus();
+ }
+ return result;
+}
+
+/**
+ * Returns the string in a given resource array or table at the specified index.
+ *
+ * @param resB a resource
+ * @param indexS an index to the wanted string.
+ * @param status fills in the outgoing error code
+ * @return The string value, or a bogus string if there is a failure UErrorCode.
+ * @stable ICU 2.0
+ */
+inline UnicodeString
+ures_getUnicodeStringByIndex(const UResourceBundle *resB, int32_t indexS, UErrorCode* status) {
+ UnicodeString result;
+ int32_t len = 0;
+ const UChar* r = ures_getStringByIndex(resB, indexS, &len, status);
+ if(U_SUCCESS(*status)) {
+ result.setTo(true, r, len);
+ } else {
+ result.setToBogus();
+ }
+ return result;
+}
+
+/**
+ * Returns a string in a resource that has a given key.
+ * This procedure works only with table resources.
+ *
+ * @param resB a resource
+ * @param key a key associated with the wanted string
+ * @param status fills in the outgoing error code
+ * @return The string value, or a bogus string if there is a failure UErrorCode.
+ * @stable ICU 2.0
+ */
+inline UnicodeString
+ures_getUnicodeStringByKey(const UResourceBundle *resB, const char* key, UErrorCode* status) {
+ UnicodeString result;
+ int32_t len = 0;
+ const UChar* r = ures_getStringByKey(resB, key, &len, status);
+ if(U_SUCCESS(*status)) {
+ result.setTo(true, r, len);
+ } else {
+ result.setToBogus();
+ }
+ return result;
+}
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Create a string enumerator, owned by the caller, of all locales located within
+ * the specified resource tree.
+ * @param packageName name of the tree, such as (NULL) or U_ICUDATA_ALIAS or or "ICUDATA-coll"
+ * This call is similar to uloc_getAvailable().
+ * @param status error code
+ * @stable ICU 3.2
+ */
+U_CAPI UEnumeration* U_EXPORT2
+ures_openAvailableLocales(const char *packageName, UErrorCode *status);
+
+
+#endif /*_URES*/
+/*eof*/
diff --git a/thirdparty/icu4c/common/unicode/uscript.h b/thirdparty/icu4c/common/unicode/uscript.h
new file mode 100644
index 0000000000..8448afda76
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/uscript.h
@@ -0,0 +1,708 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ **********************************************************************
+ * Copyright (C) 1997-2016, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ *
+ * File USCRIPT.H
+ *
+ * Modification History:
+ *
+ * Date Name Description
+ * 07/06/2001 Ram Creation.
+ ******************************************************************************
+ */
+
+#ifndef USCRIPT_H
+#define USCRIPT_H
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C API: Unicode Script Information
+ */
+
+/**
+ * Constants for ISO 15924 script codes.
+ *
+ * The current set of script code constants supports at least all scripts
+ * that are encoded in the version of Unicode which ICU currently supports.
+ * The names of the constants are usually derived from the
+ * Unicode script property value aliases.
+ * See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/)
+ * and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt .
+ *
+ * In addition, constants for many ISO 15924 script codes
+ * are included, for use with language tags, CLDR data, and similar.
+ * Some of those codes are not used in the Unicode Character Database (UCD).
+ * For example, there are no characters that have a UCD script property value of
+ * Hans or Hant. All Han ideographs have the Hani script property value in Unicode.
+ *
+ * Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR.
+ *
+ * Starting with ICU 55, script codes are only added when their scripts
+ * have been or will certainly be encoded in Unicode,
+ * and have been assigned Unicode script property value aliases,
+ * to ensure that their script names are stable and match the names of the constants.
+ * Script codes like Latf and Aran that are not subject to separate encoding
+ * may be added at any time.
+ *
+ * @stable ICU 2.2
+ */
+typedef enum UScriptCode {
+ /*
+ * Note: UScriptCode constants and their ISO script code comments
+ * are parsed by preparseucd.py.
+ * It matches lines like
+ * USCRIPT_<Unicode Script value name> = <integer>, / * <ISO script code> * /
+ */
+
+ /** @stable ICU 2.2 */
+ USCRIPT_INVALID_CODE = -1,
+ /** @stable ICU 2.2 */
+ USCRIPT_COMMON = 0, /* Zyyy */
+ /** @stable ICU 2.2 */
+ USCRIPT_INHERITED = 1, /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */
+ /** @stable ICU 2.2 */
+ USCRIPT_ARABIC = 2, /* Arab */
+ /** @stable ICU 2.2 */
+ USCRIPT_ARMENIAN = 3, /* Armn */
+ /** @stable ICU 2.2 */
+ USCRIPT_BENGALI = 4, /* Beng */
+ /** @stable ICU 2.2 */
+ USCRIPT_BOPOMOFO = 5, /* Bopo */
+ /** @stable ICU 2.2 */
+ USCRIPT_CHEROKEE = 6, /* Cher */
+ /** @stable ICU 2.2 */
+ USCRIPT_COPTIC = 7, /* Copt */
+ /** @stable ICU 2.2 */
+ USCRIPT_CYRILLIC = 8, /* Cyrl */
+ /** @stable ICU 2.2 */
+ USCRIPT_DESERET = 9, /* Dsrt */
+ /** @stable ICU 2.2 */
+ USCRIPT_DEVANAGARI = 10, /* Deva */
+ /** @stable ICU 2.2 */
+ USCRIPT_ETHIOPIC = 11, /* Ethi */
+ /** @stable ICU 2.2 */
+ USCRIPT_GEORGIAN = 12, /* Geor */
+ /** @stable ICU 2.2 */
+ USCRIPT_GOTHIC = 13, /* Goth */
+ /** @stable ICU 2.2 */
+ USCRIPT_GREEK = 14, /* Grek */
+ /** @stable ICU 2.2 */
+ USCRIPT_GUJARATI = 15, /* Gujr */
+ /** @stable ICU 2.2 */
+ USCRIPT_GURMUKHI = 16, /* Guru */
+ /** @stable ICU 2.2 */
+ USCRIPT_HAN = 17, /* Hani */
+ /** @stable ICU 2.2 */
+ USCRIPT_HANGUL = 18, /* Hang */
+ /** @stable ICU 2.2 */
+ USCRIPT_HEBREW = 19, /* Hebr */
+ /** @stable ICU 2.2 */
+ USCRIPT_HIRAGANA = 20, /* Hira */
+ /** @stable ICU 2.2 */
+ USCRIPT_KANNADA = 21, /* Knda */
+ /** @stable ICU 2.2 */
+ USCRIPT_KATAKANA = 22, /* Kana */
+ /** @stable ICU 2.2 */
+ USCRIPT_KHMER = 23, /* Khmr */
+ /** @stable ICU 2.2 */
+ USCRIPT_LAO = 24, /* Laoo */
+ /** @stable ICU 2.2 */
+ USCRIPT_LATIN = 25, /* Latn */
+ /** @stable ICU 2.2 */
+ USCRIPT_MALAYALAM = 26, /* Mlym */
+ /** @stable ICU 2.2 */
+ USCRIPT_MONGOLIAN = 27, /* Mong */
+ /** @stable ICU 2.2 */
+ USCRIPT_MYANMAR = 28, /* Mymr */
+ /** @stable ICU 2.2 */
+ USCRIPT_OGHAM = 29, /* Ogam */
+ /** @stable ICU 2.2 */
+ USCRIPT_OLD_ITALIC = 30, /* Ital */
+ /** @stable ICU 2.2 */
+ USCRIPT_ORIYA = 31, /* Orya */
+ /** @stable ICU 2.2 */
+ USCRIPT_RUNIC = 32, /* Runr */
+ /** @stable ICU 2.2 */
+ USCRIPT_SINHALA = 33, /* Sinh */
+ /** @stable ICU 2.2 */
+ USCRIPT_SYRIAC = 34, /* Syrc */
+ /** @stable ICU 2.2 */
+ USCRIPT_TAMIL = 35, /* Taml */
+ /** @stable ICU 2.2 */
+ USCRIPT_TELUGU = 36, /* Telu */
+ /** @stable ICU 2.2 */
+ USCRIPT_THAANA = 37, /* Thaa */
+ /** @stable ICU 2.2 */
+ USCRIPT_THAI = 38, /* Thai */
+ /** @stable ICU 2.2 */
+ USCRIPT_TIBETAN = 39, /* Tibt */
+ /** Canadian_Aboriginal script. @stable ICU 2.6 */
+ USCRIPT_CANADIAN_ABORIGINAL = 40, /* Cans */
+ /** Canadian_Aboriginal script (alias). @stable ICU 2.2 */
+ USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL,
+ /** @stable ICU 2.2 */
+ USCRIPT_YI = 41, /* Yiii */
+ /* New scripts in Unicode 3.2 */
+ /** @stable ICU 2.2 */
+ USCRIPT_TAGALOG = 42, /* Tglg */
+ /** @stable ICU 2.2 */
+ USCRIPT_HANUNOO = 43, /* Hano */
+ /** @stable ICU 2.2 */
+ USCRIPT_BUHID = 44, /* Buhd */
+ /** @stable ICU 2.2 */
+ USCRIPT_TAGBANWA = 45, /* Tagb */
+
+ /* New scripts in Unicode 4 */
+ /** @stable ICU 2.6 */
+ USCRIPT_BRAILLE = 46, /* Brai */
+ /** @stable ICU 2.6 */
+ USCRIPT_CYPRIOT = 47, /* Cprt */
+ /** @stable ICU 2.6 */
+ USCRIPT_LIMBU = 48, /* Limb */
+ /** @stable ICU 2.6 */
+ USCRIPT_LINEAR_B = 49, /* Linb */
+ /** @stable ICU 2.6 */
+ USCRIPT_OSMANYA = 50, /* Osma */
+ /** @stable ICU 2.6 */
+ USCRIPT_SHAVIAN = 51, /* Shaw */
+ /** @stable ICU 2.6 */
+ USCRIPT_TAI_LE = 52, /* Tale */
+ /** @stable ICU 2.6 */
+ USCRIPT_UGARITIC = 53, /* Ugar */
+
+ /** New script code in Unicode 4.0.1 @stable ICU 3.0 */
+ USCRIPT_KATAKANA_OR_HIRAGANA = 54,/*Hrkt */
+
+ /* New scripts in Unicode 4.1 */
+ /** @stable ICU 3.4 */
+ USCRIPT_BUGINESE = 55, /* Bugi */
+ /** @stable ICU 3.4 */
+ USCRIPT_GLAGOLITIC = 56, /* Glag */
+ /** @stable ICU 3.4 */
+ USCRIPT_KHAROSHTHI = 57, /* Khar */
+ /** @stable ICU 3.4 */
+ USCRIPT_SYLOTI_NAGRI = 58, /* Sylo */
+ /** @stable ICU 3.4 */
+ USCRIPT_NEW_TAI_LUE = 59, /* Talu */
+ /** @stable ICU 3.4 */
+ USCRIPT_TIFINAGH = 60, /* Tfng */
+ /** @stable ICU 3.4 */
+ USCRIPT_OLD_PERSIAN = 61, /* Xpeo */
+
+ /* New script codes from Unicode and ISO 15924 */
+ /** @stable ICU 3.6 */
+ USCRIPT_BALINESE = 62, /* Bali */
+ /** @stable ICU 3.6 */
+ USCRIPT_BATAK = 63, /* Batk */
+ /** @stable ICU 3.6 */
+ USCRIPT_BLISSYMBOLS = 64, /* Blis */
+ /** @stable ICU 3.6 */
+ USCRIPT_BRAHMI = 65, /* Brah */
+ /** @stable ICU 3.6 */
+ USCRIPT_CHAM = 66, /* Cham */
+ /** @stable ICU 3.6 */
+ USCRIPT_CIRTH = 67, /* Cirt */
+ /** @stable ICU 3.6 */
+ USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = 68, /* Cyrs */
+ /** @stable ICU 3.6 */
+ USCRIPT_DEMOTIC_EGYPTIAN = 69, /* Egyd */
+ /** @stable ICU 3.6 */
+ USCRIPT_HIERATIC_EGYPTIAN = 70, /* Egyh */
+ /** @stable ICU 3.6 */
+ USCRIPT_EGYPTIAN_HIEROGLYPHS = 71, /* Egyp */
+ /** @stable ICU 3.6 */
+ USCRIPT_KHUTSURI = 72, /* Geok */
+ /** @stable ICU 3.6 */
+ USCRIPT_SIMPLIFIED_HAN = 73, /* Hans */
+ /** @stable ICU 3.6 */
+ USCRIPT_TRADITIONAL_HAN = 74, /* Hant */
+ /** @stable ICU 3.6 */
+ USCRIPT_PAHAWH_HMONG = 75, /* Hmng */
+ /** @stable ICU 3.6 */
+ USCRIPT_OLD_HUNGARIAN = 76, /* Hung */
+ /** @stable ICU 3.6 */
+ USCRIPT_HARAPPAN_INDUS = 77, /* Inds */
+ /** @stable ICU 3.6 */
+ USCRIPT_JAVANESE = 78, /* Java */
+ /** @stable ICU 3.6 */
+ USCRIPT_KAYAH_LI = 79, /* Kali */
+ /** @stable ICU 3.6 */
+ USCRIPT_LATIN_FRAKTUR = 80, /* Latf */
+ /** @stable ICU 3.6 */
+ USCRIPT_LATIN_GAELIC = 81, /* Latg */
+ /** @stable ICU 3.6 */
+ USCRIPT_LEPCHA = 82, /* Lepc */
+ /** @stable ICU 3.6 */
+ USCRIPT_LINEAR_A = 83, /* Lina */
+ /** @stable ICU 4.6 */
+ USCRIPT_MANDAIC = 84, /* Mand */
+ /** @stable ICU 3.6 */
+ USCRIPT_MANDAEAN = USCRIPT_MANDAIC,
+ /** @stable ICU 3.6 */
+ USCRIPT_MAYAN_HIEROGLYPHS = 85, /* Maya */
+ /** @stable ICU 4.6 */
+ USCRIPT_MEROITIC_HIEROGLYPHS = 86, /* Mero */
+ /** @stable ICU 3.6 */
+ USCRIPT_MEROITIC = USCRIPT_MEROITIC_HIEROGLYPHS,
+ /** @stable ICU 3.6 */
+ USCRIPT_NKO = 87, /* Nkoo */
+ /** @stable ICU 3.6 */
+ USCRIPT_ORKHON = 88, /* Orkh */
+ /** @stable ICU 3.6 */
+ USCRIPT_OLD_PERMIC = 89, /* Perm */
+ /** @stable ICU 3.6 */
+ USCRIPT_PHAGS_PA = 90, /* Phag */
+ /** @stable ICU 3.6 */
+ USCRIPT_PHOENICIAN = 91, /* Phnx */
+ /** @stable ICU 52 */
+ USCRIPT_MIAO = 92, /* Plrd */
+ /** @stable ICU 3.6 */
+ USCRIPT_PHONETIC_POLLARD = USCRIPT_MIAO,
+ /** @stable ICU 3.6 */
+ USCRIPT_RONGORONGO = 93, /* Roro */
+ /** @stable ICU 3.6 */
+ USCRIPT_SARATI = 94, /* Sara */
+ /** @stable ICU 3.6 */
+ USCRIPT_ESTRANGELO_SYRIAC = 95, /* Syre */
+ /** @stable ICU 3.6 */
+ USCRIPT_WESTERN_SYRIAC = 96, /* Syrj */
+ /** @stable ICU 3.6 */
+ USCRIPT_EASTERN_SYRIAC = 97, /* Syrn */
+ /** @stable ICU 3.6 */
+ USCRIPT_TENGWAR = 98, /* Teng */
+ /** @stable ICU 3.6 */
+ USCRIPT_VAI = 99, /* Vaii */
+ /** @stable ICU 3.6 */
+ USCRIPT_VISIBLE_SPEECH = 100,/* Visp */
+ /** @stable ICU 3.6 */
+ USCRIPT_CUNEIFORM = 101,/* Xsux */
+ /** @stable ICU 3.6 */
+ USCRIPT_UNWRITTEN_LANGUAGES = 102,/* Zxxx */
+ /** @stable ICU 3.6 */
+ USCRIPT_UNKNOWN = 103,/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */
+
+ /** @stable ICU 3.8 */
+ USCRIPT_CARIAN = 104,/* Cari */
+ /** @stable ICU 3.8 */
+ USCRIPT_JAPANESE = 105,/* Jpan */
+ /** @stable ICU 3.8 */
+ USCRIPT_LANNA = 106,/* Lana */
+ /** @stable ICU 3.8 */
+ USCRIPT_LYCIAN = 107,/* Lyci */
+ /** @stable ICU 3.8 */
+ USCRIPT_LYDIAN = 108,/* Lydi */
+ /** @stable ICU 3.8 */
+ USCRIPT_OL_CHIKI = 109,/* Olck */
+ /** @stable ICU 3.8 */
+ USCRIPT_REJANG = 110,/* Rjng */
+ /** @stable ICU 3.8 */
+ USCRIPT_SAURASHTRA = 111,/* Saur */
+ /** Sutton SignWriting @stable ICU 3.8 */
+ USCRIPT_SIGN_WRITING = 112,/* Sgnw */
+ /** @stable ICU 3.8 */
+ USCRIPT_SUNDANESE = 113,/* Sund */
+ /** @stable ICU 3.8 */
+ USCRIPT_MOON = 114,/* Moon */
+ /** @stable ICU 3.8 */
+ USCRIPT_MEITEI_MAYEK = 115,/* Mtei */
+
+ /** @stable ICU 4.0 */
+ USCRIPT_IMPERIAL_ARAMAIC = 116,/* Armi */
+ /** @stable ICU 4.0 */
+ USCRIPT_AVESTAN = 117,/* Avst */
+ /** @stable ICU 4.0 */
+ USCRIPT_CHAKMA = 118,/* Cakm */
+ /** @stable ICU 4.0 */
+ USCRIPT_KOREAN = 119,/* Kore */
+ /** @stable ICU 4.0 */
+ USCRIPT_KAITHI = 120,/* Kthi */
+ /** @stable ICU 4.0 */
+ USCRIPT_MANICHAEAN = 121,/* Mani */
+ /** @stable ICU 4.0 */
+ USCRIPT_INSCRIPTIONAL_PAHLAVI = 122,/* Phli */
+ /** @stable ICU 4.0 */
+ USCRIPT_PSALTER_PAHLAVI = 123,/* Phlp */
+ /** @stable ICU 4.0 */
+ USCRIPT_BOOK_PAHLAVI = 124,/* Phlv */
+ /** @stable ICU 4.0 */
+ USCRIPT_INSCRIPTIONAL_PARTHIAN = 125,/* Prti */
+ /** @stable ICU 4.0 */
+ USCRIPT_SAMARITAN = 126,/* Samr */
+ /** @stable ICU 4.0 */
+ USCRIPT_TAI_VIET = 127,/* Tavt */
+ /** @stable ICU 4.0 */
+ USCRIPT_MATHEMATICAL_NOTATION = 128,/* Zmth */
+ /** @stable ICU 4.0 */
+ USCRIPT_SYMBOLS = 129,/* Zsym */
+
+ /** @stable ICU 4.4 */
+ USCRIPT_BAMUM = 130,/* Bamu */
+ /** @stable ICU 4.4 */
+ USCRIPT_LISU = 131,/* Lisu */
+ /** @stable ICU 4.4 */
+ USCRIPT_NAKHI_GEBA = 132,/* Nkgb */
+ /** @stable ICU 4.4 */
+ USCRIPT_OLD_SOUTH_ARABIAN = 133,/* Sarb */
+
+ /** @stable ICU 4.6 */
+ USCRIPT_BASSA_VAH = 134,/* Bass */
+ /** @stable ICU 54 */
+ USCRIPT_DUPLOYAN = 135,/* Dupl */
+#ifndef U_HIDE_DEPRECATED_API
+ /** @deprecated ICU 54 Typo, use USCRIPT_DUPLOYAN */
+ USCRIPT_DUPLOYAN_SHORTAND = USCRIPT_DUPLOYAN,
+#endif /* U_HIDE_DEPRECATED_API */
+ /** @stable ICU 4.6 */
+ USCRIPT_ELBASAN = 136,/* Elba */
+ /** @stable ICU 4.6 */
+ USCRIPT_GRANTHA = 137,/* Gran */
+ /** @stable ICU 4.6 */
+ USCRIPT_KPELLE = 138,/* Kpel */
+ /** @stable ICU 4.6 */
+ USCRIPT_LOMA = 139,/* Loma */
+ /** Mende Kikakui @stable ICU 4.6 */
+ USCRIPT_MENDE = 140,/* Mend */
+ /** @stable ICU 4.6 */
+ USCRIPT_MEROITIC_CURSIVE = 141,/* Merc */
+ /** @stable ICU 4.6 */
+ USCRIPT_OLD_NORTH_ARABIAN = 142,/* Narb */
+ /** @stable ICU 4.6 */
+ USCRIPT_NABATAEAN = 143,/* Nbat */
+ /** @stable ICU 4.6 */
+ USCRIPT_PALMYRENE = 144,/* Palm */
+ /** @stable ICU 54 */
+ USCRIPT_KHUDAWADI = 145,/* Sind */
+ /** @stable ICU 4.6 */
+ USCRIPT_SINDHI = USCRIPT_KHUDAWADI,
+ /** @stable ICU 4.6 */
+ USCRIPT_WARANG_CITI = 146,/* Wara */
+
+ /** @stable ICU 4.8 */
+ USCRIPT_AFAKA = 147,/* Afak */
+ /** @stable ICU 4.8 */
+ USCRIPT_JURCHEN = 148,/* Jurc */
+ /** @stable ICU 4.8 */
+ USCRIPT_MRO = 149,/* Mroo */
+ /** @stable ICU 4.8 */
+ USCRIPT_NUSHU = 150,/* Nshu */
+ /** @stable ICU 4.8 */
+ USCRIPT_SHARADA = 151,/* Shrd */
+ /** @stable ICU 4.8 */
+ USCRIPT_SORA_SOMPENG = 152,/* Sora */
+ /** @stable ICU 4.8 */
+ USCRIPT_TAKRI = 153,/* Takr */
+ /** @stable ICU 4.8 */
+ USCRIPT_TANGUT = 154,/* Tang */
+ /** @stable ICU 4.8 */
+ USCRIPT_WOLEAI = 155,/* Wole */
+
+ /** @stable ICU 49 */
+ USCRIPT_ANATOLIAN_HIEROGLYPHS = 156,/* Hluw */
+ /** @stable ICU 49 */
+ USCRIPT_KHOJKI = 157,/* Khoj */
+ /** @stable ICU 49 */
+ USCRIPT_TIRHUTA = 158,/* Tirh */
+
+ /** @stable ICU 52 */
+ USCRIPT_CAUCASIAN_ALBANIAN = 159,/* Aghb */
+ /** @stable ICU 52 */
+ USCRIPT_MAHAJANI = 160,/* Mahj */
+
+ /** @stable ICU 54 */
+ USCRIPT_AHOM = 161,/* Ahom */
+ /** @stable ICU 54 */
+ USCRIPT_HATRAN = 162,/* Hatr */
+ /** @stable ICU 54 */
+ USCRIPT_MODI = 163,/* Modi */
+ /** @stable ICU 54 */
+ USCRIPT_MULTANI = 164,/* Mult */
+ /** @stable ICU 54 */
+ USCRIPT_PAU_CIN_HAU = 165,/* Pauc */
+ /** @stable ICU 54 */
+ USCRIPT_SIDDHAM = 166,/* Sidd */
+
+ /** @stable ICU 58 */
+ USCRIPT_ADLAM = 167,/* Adlm */
+ /** @stable ICU 58 */
+ USCRIPT_BHAIKSUKI = 168,/* Bhks */
+ /** @stable ICU 58 */
+ USCRIPT_MARCHEN = 169,/* Marc */
+ /** @stable ICU 58 */
+ USCRIPT_NEWA = 170,/* Newa */
+ /** @stable ICU 58 */
+ USCRIPT_OSAGE = 171,/* Osge */
+
+ /** @stable ICU 58 */
+ USCRIPT_HAN_WITH_BOPOMOFO = 172,/* Hanb */
+ /** @stable ICU 58 */
+ USCRIPT_JAMO = 173,/* Jamo */
+ /** @stable ICU 58 */
+ USCRIPT_SYMBOLS_EMOJI = 174,/* Zsye */
+
+ /** @stable ICU 60 */
+ USCRIPT_MASARAM_GONDI = 175,/* Gonm */
+ /** @stable ICU 60 */
+ USCRIPT_SOYOMBO = 176,/* Soyo */
+ /** @stable ICU 60 */
+ USCRIPT_ZANABAZAR_SQUARE = 177,/* Zanb */
+
+ /** @stable ICU 62 */
+ USCRIPT_DOGRA = 178,/* Dogr */
+ /** @stable ICU 62 */
+ USCRIPT_GUNJALA_GONDI = 179,/* Gong */
+ /** @stable ICU 62 */
+ USCRIPT_MAKASAR = 180,/* Maka */
+ /** @stable ICU 62 */
+ USCRIPT_MEDEFAIDRIN = 181,/* Medf */
+ /** @stable ICU 62 */
+ USCRIPT_HANIFI_ROHINGYA = 182,/* Rohg */
+ /** @stable ICU 62 */
+ USCRIPT_SOGDIAN = 183,/* Sogd */
+ /** @stable ICU 62 */
+ USCRIPT_OLD_SOGDIAN = 184,/* Sogo */
+
+ /** @stable ICU 64 */
+ USCRIPT_ELYMAIC = 185,/* Elym */
+ /** @stable ICU 64 */
+ USCRIPT_NYIAKENG_PUACHUE_HMONG = 186,/* Hmnp */
+ /** @stable ICU 64 */
+ USCRIPT_NANDINAGARI = 187,/* Nand */
+ /** @stable ICU 64 */
+ USCRIPT_WANCHO = 188,/* Wcho */
+
+ /** @stable ICU 66 */
+ USCRIPT_CHORASMIAN = 189,/* Chrs */
+ /** @stable ICU 66 */
+ USCRIPT_DIVES_AKURU = 190,/* Diak */
+ /** @stable ICU 66 */
+ USCRIPT_KHITAN_SMALL_SCRIPT = 191,/* Kits */
+ /** @stable ICU 66 */
+ USCRIPT_YEZIDI = 192,/* Yezi */
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UScriptCode value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_SCRIPT).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ USCRIPT_CODE_LIMIT = 193
+#endif // U_HIDE_DEPRECATED_API
+} UScriptCode;
+
+/**
+ * Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.
+ * Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym".
+ * Fills in USCRIPT_LATIN given "en" OR "en_US"
+ * If the required capacity is greater than the capacity of the destination buffer,
+ * then the error code is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned.
+ *
+ * <p>Note: To search by short or long script alias only, use
+ * u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. That does
+ * a fast lookup with no access of the locale data.
+ *
+ * @param nameOrAbbrOrLocale name of the script, as given in
+ * PropertyValueAliases.txt, or ISO 15924 code or locale
+ * @param fillIn the UScriptCode buffer to fill in the script code
+ * @param capacity the capacity (size) of UScriptCode buffer passed in.
+ * @param err the error status code.
+ * @return The number of script codes filled in the buffer passed in
+ * @stable ICU 2.4
+ */
+U_CAPI int32_t U_EXPORT2
+uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capacity,UErrorCode *err);
+
+/**
+ * Returns the long Unicode script name, if there is one.
+ * Otherwise returns the 4-letter ISO 15924 script code.
+ * Returns "Malayam" given USCRIPT_MALAYALAM.
+ *
+ * @param scriptCode UScriptCode enum
+ * @return long script name as given in PropertyValueAliases.txt, or the 4-letter code,
+ * or NULL if scriptCode is invalid
+ * @stable ICU 2.4
+ */
+U_CAPI const char* U_EXPORT2
+uscript_getName(UScriptCode scriptCode);
+
+/**
+ * Returns the 4-letter ISO 15924 script code,
+ * which is the same as the short Unicode script name if Unicode has names for the script.
+ * Returns "Mlym" given USCRIPT_MALAYALAM.
+ *
+ * @param scriptCode UScriptCode enum
+ * @return short script name (4-letter code), or NULL if scriptCode is invalid
+ * @stable ICU 2.4
+ */
+U_CAPI const char* U_EXPORT2
+uscript_getShortName(UScriptCode scriptCode);
+
+/**
+ * Gets the script code associated with the given codepoint.
+ * Returns USCRIPT_MALAYALAM given 0x0D02
+ * @param codepoint UChar32 codepoint
+ * @param err the error status code.
+ * @return The UScriptCode, or 0 if codepoint is invalid
+ * @stable ICU 2.4
+ */
+U_CAPI UScriptCode U_EXPORT2
+uscript_getScript(UChar32 codepoint, UErrorCode *err);
+
+/**
+ * Do the Script_Extensions of code point c contain script sc?
+ * If c does not have explicit Script_Extensions, then this tests whether
+ * c has the Script property value sc.
+ *
+ * Some characters are commonly used in multiple scripts.
+ * For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
+ * @param c code point
+ * @param sc script code
+ * @return true if sc is in Script_Extensions(c)
+ * @stable ICU 49
+ */
+U_CAPI UBool U_EXPORT2
+uscript_hasScript(UChar32 c, UScriptCode sc);
+
+/**
+ * Writes code point c's Script_Extensions as a list of UScriptCode values
+ * to the output scripts array and returns the number of script codes.
+ * - If c does have Script_Extensions, then the Script property value
+ * (normally Common or Inherited) is not included.
+ * - If c does not have Script_Extensions, then the one Script code is written to the output array.
+ * - If c is not a valid code point, then the one USCRIPT_UNKNOWN code is written.
+ * In other words, if the return value is 1,
+ * then the output array contains exactly c's single Script code.
+ * If the return value is n>=2, then the output array contains c's n Script_Extensions script codes.
+ *
+ * Some characters are commonly used in multiple scripts.
+ * For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
+ *
+ * If there are more than capacity script codes to be written, then
+ * U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned.
+ * (Usual ICU buffer handling behavior.)
+ *
+ * @param c code point
+ * @param scripts output script code array
+ * @param capacity capacity of the scripts array
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return number of script codes in c's Script_Extensions, or 1 for the single Script value,
+ * written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity
+ * @stable ICU 49
+ */
+U_CAPI int32_t U_EXPORT2
+uscript_getScriptExtensions(UChar32 c,
+ UScriptCode *scripts, int32_t capacity,
+ UErrorCode *errorCode);
+
+/**
+ * Script usage constants.
+ * See UAX #31 Unicode Identifier and Pattern Syntax.
+ * http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers
+ *
+ * @stable ICU 51
+ */
+typedef enum UScriptUsage {
+ /** Not encoded in Unicode. @stable ICU 51 */
+ USCRIPT_USAGE_NOT_ENCODED,
+ /** Unknown script usage. @stable ICU 51 */
+ USCRIPT_USAGE_UNKNOWN,
+ /** Candidate for Exclusion from Identifiers. @stable ICU 51 */
+ USCRIPT_USAGE_EXCLUDED,
+ /** Limited Use script. @stable ICU 51 */
+ USCRIPT_USAGE_LIMITED_USE,
+ /** Aspirational Use script. @stable ICU 51 */
+ USCRIPT_USAGE_ASPIRATIONAL,
+ /** Recommended script. @stable ICU 51 */
+ USCRIPT_USAGE_RECOMMENDED
+} UScriptUsage;
+
+/**
+ * Writes the script sample character string.
+ * This string normally consists of one code point but might be longer.
+ * The string is empty if the script is not encoded.
+ *
+ * @param script script code
+ * @param dest output string array
+ * @param capacity number of UChars in the dest array
+ * @param pErrorCode standard ICU in/out error code, must pass U_SUCCESS() on input
+ * @return the string length, even if U_BUFFER_OVERFLOW_ERROR
+ * @stable ICU 51
+ */
+U_CAPI int32_t U_EXPORT2
+uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+class UnicodeString;
+U_NAMESPACE_END
+
+/**
+ * Returns the script sample character string.
+ * This string normally consists of one code point but might be longer.
+ * The string is empty if the script is not encoded.
+ *
+ * @param script script code
+ * @return the sample character string
+ * @stable ICU 51
+ */
+U_COMMON_API icu::UnicodeString U_EXPORT2
+uscript_getSampleUnicodeString(UScriptCode script);
+
+#endif
+
+/**
+ * Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.
+ * Returns USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode.
+ *
+ * @param script script code
+ * @return script usage
+ * @see UScriptUsage
+ * @stable ICU 51
+ */
+U_CAPI UScriptUsage U_EXPORT2
+uscript_getUsage(UScriptCode script);
+
+/**
+ * Returns true if the script is written right-to-left.
+ * For example, Arab and Hebr.
+ *
+ * @param script script code
+ * @return true if the script is right-to-left
+ * @stable ICU 51
+ */
+U_CAPI UBool U_EXPORT2
+uscript_isRightToLeft(UScriptCode script);
+
+/**
+ * Returns true if the script allows line breaks between letters (excluding hyphenation).
+ * Such a script typically requires dictionary-based line breaking.
+ * For example, Hani and Thai.
+ *
+ * @param script script code
+ * @return true if the script allows line breaks between letters
+ * @stable ICU 51
+ */
+U_CAPI UBool U_EXPORT2
+uscript_breaksBetweenLetters(UScriptCode script);
+
+/**
+ * Returns true if in modern (or most recent) usage of the script case distinctions are customary.
+ * For example, Latn and Cyrl.
+ *
+ * @param script script code
+ * @return true if the script is cased
+ * @stable ICU 51
+ */
+U_CAPI UBool U_EXPORT2
+uscript_isCased(UScriptCode script);
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/uset.h b/thirdparty/icu4c/common/unicode/uset.h
new file mode 100644
index 0000000000..502ea8dc14
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/uset.h
@@ -0,0 +1,1137 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uset.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002mar07
+* created by: Markus W. Scherer
+*
+* C version of UnicodeSet.
+*/
+
+
+/**
+ * \file
+ * \brief C API: Unicode Set
+ *
+ * <p>This is a C wrapper around the C++ UnicodeSet class.</p>
+ */
+
+#ifndef __USET_H__
+#define __USET_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uchar.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+#ifndef USET_DEFINED
+
+#ifndef U_IN_DOXYGEN
+#define USET_DEFINED
+#endif
+/**
+ * USet is the C API type corresponding to C++ class UnicodeSet.
+ * Use the uset_* API to manipulate. Create with
+ * uset_open*, and destroy with uset_close.
+ * @stable ICU 2.4
+ */
+typedef struct USet USet;
+#endif
+
+/**
+ * Bitmask values to be passed to uset_openPatternOptions() or
+ * uset_applyPattern() taking an option parameter.
+ * @stable ICU 2.4
+ */
+enum {
+ /**
+ * Ignore white space within patterns unless quoted or escaped.
+ * @stable ICU 2.4
+ */
+ USET_IGNORE_SPACE = 1,
+
+ /**
+ * Enable case insensitive matching. E.g., "[ab]" with this flag
+ * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will
+ * match all except 'a', 'A', 'b', and 'B'. This performs a full
+ * closure over case mappings, e.g. U+017F for s.
+ *
+ * The resulting set is a superset of the input for the code points but
+ * not for the strings.
+ * It performs a case mapping closure of the code points and adds
+ * full case folding strings for the code points, and reduces strings of
+ * the original set to their full case folding equivalents.
+ *
+ * This is designed for case-insensitive matches, for example
+ * in regular expressions. The full code point case closure allows checking of
+ * an input character directly against the closure set.
+ * Strings are matched by comparing the case-folded form from the closure
+ * set with an incremental case folding of the string in question.
+ *
+ * The closure set will also contain single code points if the original
+ * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.).
+ * This is not necessary (that is, redundant) for the above matching method
+ * but results in the same closure sets regardless of whether the original
+ * set contained the code point or a string.
+ *
+ * @stable ICU 2.4
+ */
+ USET_CASE_INSENSITIVE = 2,
+
+ /**
+ * Enable case insensitive matching. E.g., "[ab]" with this flag
+ * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will
+ * match all except 'a', 'A', 'b', and 'B'. This adds the lower-,
+ * title-, and uppercase mappings as well as the case folding
+ * of each existing element in the set.
+ * @stable ICU 3.2
+ */
+ USET_ADD_CASE_MAPPINGS = 4
+};
+
+/**
+ * Argument values for whether span() and similar functions continue while
+ * the current character is contained vs. not contained in the set.
+ *
+ * The functionality is straightforward for sets with only single code points,
+ * without strings (which is the common case):
+ * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE work the same.
+ * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE are inverses of USET_SPAN_NOT_CONTAINED.
+ * - span() and spanBack() partition any string the same way when
+ * alternating between span(USET_SPAN_NOT_CONTAINED) and
+ * span(either "contained" condition).
+ * - Using a complemented (inverted) set and the opposite span conditions
+ * yields the same results.
+ *
+ * When a set contains multi-code point strings, then these statements may not
+ * be true, depending on the strings in the set (for example, whether they
+ * overlap with each other) and the string that is processed.
+ * For a set with strings:
+ * - The complement of the set contains the opposite set of code points,
+ * but the same set of strings.
+ * Therefore, complementing both the set and the span conditions
+ * may yield different results.
+ * - When starting spans at different positions in a string
+ * (span(s, ...) vs. span(s+1, ...)) the ends of the spans may be different
+ * because a set string may start before the later position.
+ * - span(USET_SPAN_SIMPLE) may be shorter than
+ * span(USET_SPAN_CONTAINED) because it will not recursively try
+ * all possible paths.
+ * For example, with a set which contains the three strings "xy", "xya" and "ax",
+ * span("xyax", USET_SPAN_CONTAINED) will return 4 but
+ * span("xyax", USET_SPAN_SIMPLE) will return 3.
+ * span(USET_SPAN_SIMPLE) will never be longer than
+ * span(USET_SPAN_CONTAINED).
+ * - With either "contained" condition, span() and spanBack() may partition
+ * a string in different ways.
+ * For example, with a set which contains the two strings "ab" and "ba",
+ * and when processing the string "aba",
+ * span() will yield contained/not-contained boundaries of { 0, 2, 3 }
+ * while spanBack() will yield boundaries of { 0, 1, 3 }.
+ *
+ * Note: If it is important to get the same boundaries whether iterating forward
+ * or backward through a string, then either only span() should be used and
+ * the boundaries cached for backward operation, or an ICU BreakIterator
+ * could be used.
+ *
+ * Note: Unpaired surrogates are treated like surrogate code points.
+ * Similarly, set strings match only on code point boundaries,
+ * never in the middle of a surrogate pair.
+ * Illegal UTF-8 sequences are treated like U+FFFD.
+ * When processing UTF-8 strings, malformed set strings
+ * (strings with unpaired surrogates which cannot be converted to UTF-8)
+ * are ignored.
+ *
+ * @stable ICU 3.8
+ */
+typedef enum USetSpanCondition {
+ /**
+ * Continues a span() while there is no set element at the current position.
+ * Increments by one code point at a time.
+ * Stops before the first set element (character or string).
+ * (For code points only, this is like while contains(current)==false).
+ *
+ * When span() returns, the substring between where it started and the position
+ * it returned consists only of characters that are not in the set,
+ * and none of its strings overlap with the span.
+ *
+ * @stable ICU 3.8
+ */
+ USET_SPAN_NOT_CONTAINED = 0,
+ /**
+ * Spans the longest substring that is a concatenation of set elements (characters or strings).
+ * (For characters only, this is like while contains(current)==true).
+ *
+ * When span() returns, the substring between where it started and the position
+ * it returned consists only of set elements (characters or strings) that are in the set.
+ *
+ * If a set contains strings, then the span will be the longest substring for which there
+ * exists at least one non-overlapping concatenation of set elements (characters or strings).
+ * This is equivalent to a POSIX regular expression for <code>(OR of each set element)*</code>.
+ * (Java/ICU/Perl regex stops at the first match of an OR.)
+ *
+ * @stable ICU 3.8
+ */
+ USET_SPAN_CONTAINED = 1,
+ /**
+ * Continues a span() while there is a set element at the current position.
+ * Increments by the longest matching element at each position.
+ * (For characters only, this is like while contains(current)==true).
+ *
+ * When span() returns, the substring between where it started and the position
+ * it returned consists only of set elements (characters or strings) that are in the set.
+ *
+ * If a set only contains single characters, then this is the same
+ * as USET_SPAN_CONTAINED.
+ *
+ * If a set contains strings, then the span will be the longest substring
+ * with a match at each position with the longest single set element (character or string).
+ *
+ * Use this span condition together with other longest-match algorithms,
+ * such as ICU converters (ucnv_getUnicodeSet()).
+ *
+ * @stable ICU 3.8
+ */
+ USET_SPAN_SIMPLE = 2,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the last span condition.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ USET_SPAN_CONDITION_COUNT
+#endif // U_HIDE_DEPRECATED_API
+} USetSpanCondition;
+
+enum {
+ /**
+ * Capacity of USerializedSet::staticArray.
+ * Enough for any single-code point set.
+ * Also provides padding for nice sizeof(USerializedSet).
+ * @stable ICU 2.4
+ */
+ USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8
+};
+
+/**
+ * A serialized form of a Unicode set. Limited manipulations are
+ * possible directly on a serialized set. See below.
+ * @stable ICU 2.4
+ */
+typedef struct USerializedSet {
+ /**
+ * The serialized Unicode Set.
+ * @stable ICU 2.4
+ */
+ const uint16_t *array;
+ /**
+ * The length of the array that contains BMP characters.
+ * @stable ICU 2.4
+ */
+ int32_t bmpLength;
+ /**
+ * The total length of the array.
+ * @stable ICU 2.4
+ */
+ int32_t length;
+ /**
+ * A small buffer for the array to reduce memory allocations.
+ * @stable ICU 2.4
+ */
+ uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY];
+} USerializedSet;
+
+/*********************************************************************
+ * USet API
+ *********************************************************************/
+
+/**
+ * Create an empty USet object.
+ * Equivalent to uset_open(1, 0).
+ * @return a newly created USet. The caller must call uset_close() on
+ * it when done.
+ * @stable ICU 4.2
+ */
+U_CAPI USet* U_EXPORT2
+uset_openEmpty(void);
+
+/**
+ * Creates a USet object that contains the range of characters
+ * start..end, inclusive. If <code>start > end</code>
+ * then an empty set is created (same as using uset_openEmpty()).
+ * @param start first character of the range, inclusive
+ * @param end last character of the range, inclusive
+ * @return a newly created USet. The caller must call uset_close() on
+ * it when done.
+ * @stable ICU 2.4
+ */
+U_CAPI USet* U_EXPORT2
+uset_open(UChar32 start, UChar32 end);
+
+/**
+ * Creates a set from the given pattern. See the UnicodeSet class
+ * description for the syntax of the pattern language.
+ * @param pattern a string specifying what characters are in the set
+ * @param patternLength the length of the pattern, or -1 if null
+ * terminated
+ * @param ec the error code
+ * @stable ICU 2.4
+ */
+U_CAPI USet* U_EXPORT2
+uset_openPattern(const UChar* pattern, int32_t patternLength,
+ UErrorCode* ec);
+
+/**
+ * Creates a set from the given pattern. See the UnicodeSet class
+ * description for the syntax of the pattern language.
+ * @param pattern a string specifying what characters are in the set
+ * @param patternLength the length of the pattern, or -1 if null
+ * terminated
+ * @param options bitmask for options to apply to the pattern.
+ * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * @param ec the error code
+ * @stable ICU 2.4
+ */
+U_CAPI USet* U_EXPORT2
+uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
+ uint32_t options,
+ UErrorCode* ec);
+
+/**
+ * Disposes of the storage used by a USet object. This function should
+ * be called exactly once for objects returned by uset_open().
+ * @param set the object to dispose of
+ * @stable ICU 2.4
+ */
+U_CAPI void U_EXPORT2
+uset_close(USet* set);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUSetPointer
+ * "Smart pointer" class, closes a USet via uset_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUSetPointer, USet, uset_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Returns a copy of this object.
+ * If this set is frozen, then the clone will be frozen as well.
+ * Use uset_cloneAsThawed() for a mutable clone of a frozen set.
+ * @param set the original set
+ * @return the newly allocated copy of the set
+ * @see uset_cloneAsThawed
+ * @stable ICU 3.8
+ */
+U_CAPI USet * U_EXPORT2
+uset_clone(const USet *set);
+
+/**
+ * Determines whether the set has been frozen (made immutable) or not.
+ * See the ICU4J Freezable interface for details.
+ * @param set the set
+ * @return true/false for whether the set has been frozen
+ * @see uset_freeze
+ * @see uset_cloneAsThawed
+ * @stable ICU 3.8
+ */
+U_CAPI UBool U_EXPORT2
+uset_isFrozen(const USet *set);
+
+/**
+ * Freeze the set (make it immutable).
+ * Once frozen, it cannot be unfrozen and is therefore thread-safe
+ * until it is deleted.
+ * See the ICU4J Freezable interface for details.
+ * Freezing the set may also make some operations faster, for example
+ * uset_contains() and uset_span().
+ * A frozen set will not be modified. (It remains frozen.)
+ * @param set the set
+ * @return the same set, now frozen
+ * @see uset_isFrozen
+ * @see uset_cloneAsThawed
+ * @stable ICU 3.8
+ */
+U_CAPI void U_EXPORT2
+uset_freeze(USet *set);
+
+/**
+ * Clone the set and make the clone mutable.
+ * See the ICU4J Freezable interface for details.
+ * @param set the set
+ * @return the mutable clone
+ * @see uset_freeze
+ * @see uset_isFrozen
+ * @see uset_clone
+ * @stable ICU 3.8
+ */
+U_CAPI USet * U_EXPORT2
+uset_cloneAsThawed(const USet *set);
+
+/**
+ * Causes the USet object to represent the range <code>start - end</code>.
+ * If <code>start > end</code> then this USet is set to an empty range.
+ * A frozen set will not be modified.
+ * @param set the object to set to the given range
+ * @param start first character in the set, inclusive
+ * @param end last character in the set, inclusive
+ * @stable ICU 3.2
+ */
+U_CAPI void U_EXPORT2
+uset_set(USet* set,
+ UChar32 start, UChar32 end);
+
+/**
+ * Modifies the set to represent the set specified by the given
+ * pattern. See the UnicodeSet class description for the syntax of
+ * the pattern language. See also the User Guide chapter about UnicodeSet.
+ * <em>Empties the set passed before applying the pattern.</em>
+ * A frozen set will not be modified.
+ * @param set The set to which the pattern is to be applied.
+ * @param pattern A pointer to UChar string specifying what characters are in the set.
+ * The character at pattern[0] must be a '['.
+ * @param patternLength The length of the UChar string. -1 if NUL terminated.
+ * @param options A bitmask for options to apply to the pattern.
+ * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * @param status Returns an error if the pattern cannot be parsed.
+ * @return Upon successful parse, the value is either
+ * the index of the character after the closing ']'
+ * of the parsed pattern.
+ * If the status code indicates failure, then the return value
+ * is the index of the error in the source.
+ *
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uset_applyPattern(USet *set,
+ const UChar *pattern, int32_t patternLength,
+ uint32_t options,
+ UErrorCode *status);
+
+/**
+ * Modifies the set to contain those code points which have the given value
+ * for the given binary or enumerated property, as returned by
+ * u_getIntPropertyValue. Prior contents of this set are lost.
+ * A frozen set will not be modified.
+ *
+ * @param set the object to contain the code points defined by the property
+ *
+ * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
+ * or UCHAR_INT_START..UCHAR_INT_LIMIT-1
+ * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
+ *
+ * @param value a value in the range u_getIntPropertyMinValue(prop)..
+ * u_getIntPropertyMaxValue(prop), with one exception. If prop is
+ * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but
+ * rather a mask value produced by U_GET_GC_MASK(). This allows grouped
+ * categories such as [:L:] to be represented.
+ *
+ * @param ec error code input/output parameter
+ *
+ * @stable ICU 3.2
+ */
+U_CAPI void U_EXPORT2
+uset_applyIntPropertyValue(USet* set,
+ UProperty prop, int32_t value, UErrorCode* ec);
+
+/**
+ * Modifies the set to contain those code points which have the
+ * given value for the given property. Prior contents of this
+ * set are lost.
+ * A frozen set will not be modified.
+ *
+ * @param set the object to contain the code points defined by the given
+ * property and value alias
+ *
+ * @param prop a string specifying a property alias, either short or long.
+ * The name is matched loosely. See PropertyAliases.txt for names and a
+ * description of loose matching. If the value string is empty, then this
+ * string is interpreted as either a General_Category value alias, a Script
+ * value alias, a binary property alias, or a special ID. Special IDs are
+ * matched loosely and correspond to the following sets:
+ *
+ * "ANY" = [\\u0000-\\U0010FFFF],
+ * "ASCII" = [\\u0000-\\u007F],
+ * "Assigned" = [:^Cn:].
+ *
+ * @param propLength the length of the prop, or -1 if NULL
+ *
+ * @param value a string specifying a value alias, either short or long.
+ * The name is matched loosely. See PropertyValueAliases.txt for names
+ * and a description of loose matching. In addition to aliases listed,
+ * numeric values and canonical combining classes may be expressed
+ * numerically, e.g., ("nv", "0.5") or ("ccc", "220"). The value string
+ * may also be empty.
+ *
+ * @param valueLength the length of the value, or -1 if NULL
+ *
+ * @param ec error code input/output parameter
+ *
+ * @stable ICU 3.2
+ */
+U_CAPI void U_EXPORT2
+uset_applyPropertyAlias(USet* set,
+ const UChar *prop, int32_t propLength,
+ const UChar *value, int32_t valueLength,
+ UErrorCode* ec);
+
+/**
+ * Return true if the given position, in the given pattern, appears
+ * to be the start of a UnicodeSet pattern.
+ *
+ * @param pattern a string specifying the pattern
+ * @param patternLength the length of the pattern, or -1 if NULL
+ * @param pos the given position
+ * @stable ICU 3.2
+ */
+U_CAPI UBool U_EXPORT2
+uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
+ int32_t pos);
+
+/**
+ * Returns a string representation of this set. If the result of
+ * calling this function is passed to a uset_openPattern(), it
+ * will produce another set that is equal to this one.
+ * @param set the set
+ * @param result the string to receive the rules, may be NULL
+ * @param resultCapacity the capacity of result, may be 0 if result is NULL
+ * @param escapeUnprintable if true then convert unprintable
+ * character to their hex escape representations, \\uxxxx or
+ * \\Uxxxxxxxx. Unprintable characters are those other than
+ * U+000A, U+0020..U+007E.
+ * @param ec error code.
+ * @return length of string, possibly larger than resultCapacity
+ * @stable ICU 2.4
+ */
+U_CAPI int32_t U_EXPORT2
+uset_toPattern(const USet* set,
+ UChar* result, int32_t resultCapacity,
+ UBool escapeUnprintable,
+ UErrorCode* ec);
+
+/**
+ * Adds the given character to the given USet. After this call,
+ * uset_contains(set, c) will return true.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param c the character to add
+ * @stable ICU 2.4
+ */
+U_CAPI void U_EXPORT2
+uset_add(USet* set, UChar32 c);
+
+/**
+ * Adds all of the elements in the specified set to this set if
+ * they're not already present. This operation effectively
+ * modifies this set so that its value is the <i>union</i> of the two
+ * sets. The behavior of this operation is unspecified if the specified
+ * collection is modified while the operation is in progress.
+ * A frozen set will not be modified.
+ *
+ * @param set the object to which to add the set
+ * @param additionalSet the source set whose elements are to be added to this set.
+ * @stable ICU 2.6
+ */
+U_CAPI void U_EXPORT2
+uset_addAll(USet* set, const USet *additionalSet);
+
+/**
+ * Adds the given range of characters to the given USet. After this call,
+ * uset_contains(set, start, end) will return true.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param start the first character of the range to add, inclusive
+ * @param end the last character of the range to add, inclusive
+ * @stable ICU 2.2
+ */
+U_CAPI void U_EXPORT2
+uset_addRange(USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Adds the given string to the given USet. After this call,
+ * uset_containsString(set, str, strLen) will return true.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param str the string to add
+ * @param strLen the length of the string or -1 if null terminated.
+ * @stable ICU 2.4
+ */
+U_CAPI void U_EXPORT2
+uset_addString(USet* set, const UChar* str, int32_t strLen);
+
+/**
+ * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
+ * If this set already any particular character, it has no effect on that character.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param str the source string
+ * @param strLen the length of the string or -1 if null terminated.
+ * @stable ICU 3.4
+ */
+U_CAPI void U_EXPORT2
+uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
+
+/**
+ * Removes the given character from the given USet. After this call,
+ * uset_contains(set, c) will return false.
+ * A frozen set will not be modified.
+ * @param set the object from which to remove the character
+ * @param c the character to remove
+ * @stable ICU 2.4
+ */
+U_CAPI void U_EXPORT2
+uset_remove(USet* set, UChar32 c);
+
+/**
+ * Removes the given range of characters from the given USet. After this call,
+ * uset_contains(set, start, end) will return false.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param start the first character of the range to remove, inclusive
+ * @param end the last character of the range to remove, inclusive
+ * @stable ICU 2.2
+ */
+U_CAPI void U_EXPORT2
+uset_removeRange(USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Removes the given string to the given USet. After this call,
+ * uset_containsString(set, str, strLen) will return false.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param str the string to remove
+ * @param strLen the length of the string or -1 if null terminated.
+ * @stable ICU 2.4
+ */
+U_CAPI void U_EXPORT2
+uset_removeString(USet* set, const UChar* str, int32_t strLen);
+
+/**
+ * Removes from this set all of its elements that are contained in the
+ * specified set. This operation effectively modifies this
+ * set so that its value is the <i>asymmetric set difference</i> of
+ * the two sets.
+ * A frozen set will not be modified.
+ * @param set the object from which the elements are to be removed
+ * @param removeSet the object that defines which elements will be
+ * removed from this set
+ * @stable ICU 3.2
+ */
+U_CAPI void U_EXPORT2
+uset_removeAll(USet* set, const USet* removeSet);
+
+/**
+ * Retain only the elements in this set that are contained in the
+ * specified range. If <code>start > end</code> then an empty range is
+ * retained, leaving the set empty. This is equivalent to
+ * a boolean logic AND, or a set INTERSECTION.
+ * A frozen set will not be modified.
+ *
+ * @param set the object for which to retain only the specified range
+ * @param start first character, inclusive, of range to be retained
+ * to this set.
+ * @param end last character, inclusive, of range to be retained
+ * to this set.
+ * @stable ICU 3.2
+ */
+U_CAPI void U_EXPORT2
+uset_retain(USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Retains only the elements in this set that are contained in the
+ * specified set. In other words, removes from this set all of
+ * its elements that are not contained in the specified set. This
+ * operation effectively modifies this set so that its value is
+ * the <i>intersection</i> of the two sets.
+ * A frozen set will not be modified.
+ *
+ * @param set the object on which to perform the retain
+ * @param retain set that defines which elements this set will retain
+ * @stable ICU 3.2
+ */
+U_CAPI void U_EXPORT2
+uset_retainAll(USet* set, const USet* retain);
+
+/**
+ * Reallocate this objects internal structures to take up the least
+ * possible space, without changing this object's value.
+ * A frozen set will not be modified.
+ *
+ * @param set the object on which to perfrom the compact
+ * @stable ICU 3.2
+ */
+U_CAPI void U_EXPORT2
+uset_compact(USet* set);
+
+/**
+ * Inverts this set. This operation modifies this set so that
+ * its value is its complement. This operation does not affect
+ * the multicharacter strings, if any.
+ * A frozen set will not be modified.
+ * @param set the set
+ * @stable ICU 2.4
+ */
+U_CAPI void U_EXPORT2
+uset_complement(USet* set);
+
+/**
+ * Complements in this set all elements contained in the specified
+ * set. Any character in the other set will be removed if it is
+ * in this set, or will be added if it is not in this set.
+ * A frozen set will not be modified.
+ *
+ * @param set the set with which to complement
+ * @param complement set that defines which elements will be xor'ed
+ * from this set.
+ * @stable ICU 3.2
+ */
+U_CAPI void U_EXPORT2
+uset_complementAll(USet* set, const USet* complement);
+
+/**
+ * Removes all of the elements from this set. This set will be
+ * empty after this call returns.
+ * A frozen set will not be modified.
+ * @param set the set
+ * @stable ICU 2.4
+ */
+U_CAPI void U_EXPORT2
+uset_clear(USet* set);
+
+/**
+ * Close this set over the given attribute. For the attribute
+ * USET_CASE, the result is to modify this set so that:
+ *
+ * 1. For each character or string 'a' in this set, all strings or
+ * characters 'b' such that foldCase(a) == foldCase(b) are added
+ * to this set.
+ *
+ * 2. For each string 'e' in the resulting set, if e !=
+ * foldCase(e), 'e' will be removed.
+ *
+ * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}]
+ *
+ * (Here foldCase(x) refers to the operation u_strFoldCase, and a
+ * == b denotes that the contents are the same, not pointer
+ * comparison.)
+ *
+ * A frozen set will not be modified.
+ *
+ * @param set the set
+ *
+ * @param attributes bitmask for attributes to close over.
+ * Currently only the USET_CASE bit is supported. Any undefined bits
+ * are ignored.
+ * @stable ICU 4.2
+ */
+U_CAPI void U_EXPORT2
+uset_closeOver(USet* set, int32_t attributes);
+
+/**
+ * Remove all strings from this set.
+ *
+ * @param set the set
+ * @stable ICU 4.2
+ */
+U_CAPI void U_EXPORT2
+uset_removeAllStrings(USet* set);
+
+/**
+ * Returns true if the given USet contains no characters and no
+ * strings.
+ * @param set the set
+ * @return true if set is empty
+ * @stable ICU 2.4
+ */
+U_CAPI UBool U_EXPORT2
+uset_isEmpty(const USet* set);
+
+/**
+ * Returns true if the given USet contains the given character.
+ * This function works faster with a frozen set.
+ * @param set the set
+ * @param c The codepoint to check for within the set
+ * @return true if set contains c
+ * @stable ICU 2.4
+ */
+U_CAPI UBool U_EXPORT2
+uset_contains(const USet* set, UChar32 c);
+
+/**
+ * Returns true if the given USet contains all characters c
+ * where start <= c && c <= end.
+ * @param set the set
+ * @param start the first character of the range to test, inclusive
+ * @param end the last character of the range to test, inclusive
+ * @return true if set contains the range
+ * @stable ICU 2.2
+ */
+U_CAPI UBool U_EXPORT2
+uset_containsRange(const USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Returns true if the given USet contains the given string.
+ * @param set the set
+ * @param str the string
+ * @param strLen the length of the string or -1 if null terminated.
+ * @return true if set contains str
+ * @stable ICU 2.4
+ */
+U_CAPI UBool U_EXPORT2
+uset_containsString(const USet* set, const UChar* str, int32_t strLen);
+
+/**
+ * Returns the index of the given character within this set, where
+ * the set is ordered by ascending code point. If the character
+ * is not in this set, return -1. The inverse of this method is
+ * <code>charAt()</code>.
+ * @param set the set
+ * @param c the character to obtain the index for
+ * @return an index from 0..size()-1, or -1
+ * @stable ICU 3.2
+ */
+U_CAPI int32_t U_EXPORT2
+uset_indexOf(const USet* set, UChar32 c);
+
+/**
+ * Returns the character at the given index within this set, where
+ * the set is ordered by ascending code point. If the index is
+ * out of range, return (UChar32)-1. The inverse of this method is
+ * <code>indexOf()</code>.
+ * @param set the set
+ * @param charIndex an index from 0..size()-1 to obtain the char for
+ * @return the character at the given index, or (UChar32)-1.
+ * @stable ICU 3.2
+ */
+U_CAPI UChar32 U_EXPORT2
+uset_charAt(const USet* set, int32_t charIndex);
+
+/**
+ * Returns the number of characters and strings contained in the given
+ * USet.
+ * @param set the set
+ * @return a non-negative integer counting the characters and strings
+ * contained in set
+ * @stable ICU 2.4
+ */
+U_CAPI int32_t U_EXPORT2
+uset_size(const USet* set);
+
+/**
+ * Returns the number of items in this set. An item is either a range
+ * of characters or a single multicharacter string.
+ * @param set the set
+ * @return a non-negative integer counting the character ranges
+ * and/or strings contained in set
+ * @stable ICU 2.4
+ */
+U_CAPI int32_t U_EXPORT2
+uset_getItemCount(const USet* set);
+
+/**
+ * Returns an item of this set. An item is either a range of
+ * characters or a single multicharacter string.
+ * @param set the set
+ * @param itemIndex a non-negative integer in the range 0..
+ * uset_getItemCount(set)-1
+ * @param start pointer to variable to receive first character
+ * in range, inclusive
+ * @param end pointer to variable to receive last character in range,
+ * inclusive
+ * @param str buffer to receive the string, may be NULL
+ * @param strCapacity capacity of str, or 0 if str is NULL
+ * @param ec error code
+ * @return the length of the string (>= 2), or 0 if the item is a
+ * range, in which case it is the range *start..*end, or -1 if
+ * itemIndex is out of range
+ * @stable ICU 2.4
+ */
+U_CAPI int32_t U_EXPORT2
+uset_getItem(const USet* set, int32_t itemIndex,
+ UChar32* start, UChar32* end,
+ UChar* str, int32_t strCapacity,
+ UErrorCode* ec);
+
+/**
+ * Returns true if set1 contains all the characters and strings
+ * of set2. It answers the question, 'Is set1 a superset of set2?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 3.2
+ */
+U_CAPI UBool U_EXPORT2
+uset_containsAll(const USet* set1, const USet* set2);
+
+/**
+ * Returns true if this set contains all the characters
+ * of the given string. This is does not check containment of grapheme
+ * clusters, like uset_containsString.
+ * @param set set of characters to be checked for containment
+ * @param str string containing codepoints to be checked for containment
+ * @param strLen the length of the string or -1 if null terminated.
+ * @return true if the test condition is met
+ * @stable ICU 3.4
+ */
+U_CAPI UBool U_EXPORT2
+uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
+
+/**
+ * Returns true if set1 contains none of the characters and strings
+ * of set2. It answers the question, 'Is set1 a disjoint set of set2?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 3.2
+ */
+U_CAPI UBool U_EXPORT2
+uset_containsNone(const USet* set1, const USet* set2);
+
+/**
+ * Returns true if set1 contains some of the characters and strings
+ * of set2. It answers the question, 'Does set1 and set2 have an intersection?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 3.2
+ */
+U_CAPI UBool U_EXPORT2
+uset_containsSome(const USet* set1, const USet* set2);
+
+/**
+ * Returns the length of the initial substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Similar to the strspn() C library function.
+ * Unpaired surrogates are treated according to contains() of their surrogate code points.
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the length of the initial substring according to the spanCondition;
+ * 0 if the start of the string does not fit the spanCondition
+ * @stable ICU 3.8
+ * @see USetSpanCondition
+ */
+U_CAPI int32_t U_EXPORT2
+uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
+
+/**
+ * Returns the start of the trailing substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Unpaired surrogates are treated according to contains() of their surrogate code points.
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the start of the trailing substring according to the spanCondition;
+ * the string length if the end of the string does not fit the spanCondition
+ * @stable ICU 3.8
+ * @see USetSpanCondition
+ */
+U_CAPI int32_t U_EXPORT2
+uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
+
+/**
+ * Returns the length of the initial substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Similar to the strspn() C library function.
+ * Malformed byte sequences are treated according to contains(0xfffd).
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string (UTF-8)
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the length of the initial substring according to the spanCondition;
+ * 0 if the start of the string does not fit the spanCondition
+ * @stable ICU 3.8
+ * @see USetSpanCondition
+ */
+U_CAPI int32_t U_EXPORT2
+uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
+
+/**
+ * Returns the start of the trailing substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Malformed byte sequences are treated according to contains(0xfffd).
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string (UTF-8)
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the start of the trailing substring according to the spanCondition;
+ * the string length if the end of the string does not fit the spanCondition
+ * @stable ICU 3.8
+ * @see USetSpanCondition
+ */
+U_CAPI int32_t U_EXPORT2
+uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
+
+/**
+ * Returns true if set1 contains all of the characters and strings
+ * of set2, and vis versa. It answers the question, 'Is set1 equal to set2?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 3.2
+ */
+U_CAPI UBool U_EXPORT2
+uset_equals(const USet* set1, const USet* set2);
+
+/*********************************************************************
+ * Serialized set API
+ *********************************************************************/
+
+/**
+ * Serializes this set into an array of 16-bit integers. Serialization
+ * (currently) only records the characters in the set; multicharacter
+ * strings are ignored.
+ *
+ * The array
+ * has following format (each line is one 16-bit integer):
+ *
+ * length = (n+2*m) | (m!=0?0x8000:0)
+ * bmpLength = n; present if m!=0
+ * bmp[0]
+ * bmp[1]
+ * ...
+ * bmp[n-1]
+ * supp-high[0]
+ * supp-low[0]
+ * supp-high[1]
+ * supp-low[1]
+ * ...
+ * supp-high[m-1]
+ * supp-low[m-1]
+ *
+ * The array starts with a header. After the header are n bmp
+ * code points, then m supplementary code points. Either n or m
+ * or both may be zero. n+2*m is always <= 0x7FFF.
+ *
+ * If there are no supplementary characters (if m==0) then the
+ * header is one 16-bit integer, 'length', with value n.
+ *
+ * If there are supplementary characters (if m!=0) then the header
+ * is two 16-bit integers. The first, 'length', has value
+ * (n+2*m)|0x8000. The second, 'bmpLength', has value n.
+ *
+ * After the header the code points are stored in ascending order.
+ * Supplementary code points are stored as most significant 16
+ * bits followed by least significant 16 bits.
+ *
+ * @param set the set
+ * @param dest pointer to buffer of destCapacity 16-bit integers.
+ * May be NULL only if destCapacity is zero.
+ * @param destCapacity size of dest, or zero. Must not be negative.
+ * @param pErrorCode pointer to the error code. Will be set to
+ * U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF. Will be set to
+ * U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity.
+ * @return the total length of the serialized format, including
+ * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
+ * than U_BUFFER_OVERFLOW_ERROR.
+ * @stable ICU 2.4
+ */
+U_CAPI int32_t U_EXPORT2
+uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
+
+/**
+ * Given a serialized array, fill in the given serialized set object.
+ * @param fillSet pointer to result
+ * @param src pointer to start of array
+ * @param srcLength length of array
+ * @return true if the given array is valid, otherwise false
+ * @stable ICU 2.4
+ */
+U_CAPI UBool U_EXPORT2
+uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
+
+/**
+ * Set the USerializedSet to contain the given character (and nothing
+ * else).
+ * @param fillSet pointer to result
+ * @param c The codepoint to set
+ * @stable ICU 2.4
+ */
+U_CAPI void U_EXPORT2
+uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c);
+
+/**
+ * Returns true if the given USerializedSet contains the given
+ * character.
+ * @param set the serialized set
+ * @param c The codepoint to check for within the set
+ * @return true if set contains c
+ * @stable ICU 2.4
+ */
+U_CAPI UBool U_EXPORT2
+uset_serializedContains(const USerializedSet* set, UChar32 c);
+
+/**
+ * Returns the number of disjoint ranges of characters contained in
+ * the given serialized set. Ignores any strings contained in the
+ * set.
+ * @param set the serialized set
+ * @return a non-negative integer counting the character ranges
+ * contained in set
+ * @stable ICU 2.4
+ */
+U_CAPI int32_t U_EXPORT2
+uset_getSerializedRangeCount(const USerializedSet* set);
+
+/**
+ * Returns a range of characters contained in the given serialized
+ * set.
+ * @param set the serialized set
+ * @param rangeIndex a non-negative integer in the range 0..
+ * uset_getSerializedRangeCount(set)-1
+ * @param pStart pointer to variable to receive first character
+ * in range, inclusive
+ * @param pEnd pointer to variable to receive last character in range,
+ * inclusive
+ * @return true if rangeIndex is valid, otherwise false
+ * @stable ICU 2.4
+ */
+U_CAPI UBool U_EXPORT2
+uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
+ UChar32* pStart, UChar32* pEnd);
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/usetiter.h b/thirdparty/icu4c/common/unicode/usetiter.h
new file mode 100644
index 0000000000..a817ef72b3
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/usetiter.h
@@ -0,0 +1,325 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2002-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+#ifndef USETITER_H
+#define USETITER_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+
+/**
+ * \file
+ * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet.
+ */
+
+U_NAMESPACE_BEGIN
+
+class UnicodeSet;
+class UnicodeString;
+
+/**
+ *
+ * UnicodeSetIterator iterates over the contents of a UnicodeSet. It
+ * iterates over either code points or code point ranges. After all
+ * code points or ranges have been returned, it returns the
+ * multicharacter strings of the UnicodeSet, if any.
+ *
+ * This class is not intended to be subclassed. Consider any fields
+ * or methods declared as "protected" to be private. The use of
+ * protected in this class is an artifact of history.
+ *
+ * <p>To iterate over code points and strings, use a loop like this:
+ * <pre>
+ * UnicodeSetIterator it(set);
+ * while (it.next()) {
+ * processItem(it.getString());
+ * }
+ * </pre>
+ * <p>Each item in the set is accessed as a string. Set elements
+ * consisting of single code points are returned as strings containing
+ * just the one code point.
+ *
+ * <p>To iterate over code point ranges, instead of individual code points,
+ * use a loop like this:
+ * <pre>
+ * UnicodeSetIterator it(set);
+ * while (it.nextRange()) {
+ * if (it.isString()) {
+ * processString(it.getString());
+ * } else {
+ * processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
+ * }
+ * }
+ * </pre>
+ * @author M. Davis
+ * @stable ICU 2.4
+ */
+class U_COMMON_API UnicodeSetIterator : public UObject {
+
+ protected:
+
+ /**
+ * Value of <tt>codepoint</tt> if the iterator points to a string.
+ * If <tt>codepoint == IS_STRING</tt>, then examine
+ * <tt>string</tt> for the current iteration result.
+ * @stable ICU 2.4
+ */
+ enum { IS_STRING = -1 };
+
+ /**
+ * Current code point, or the special value <tt>IS_STRING</tt>, if
+ * the iterator points to a string.
+ * @stable ICU 2.4
+ */
+ UChar32 codepoint;
+
+ /**
+ * When iterating over ranges using <tt>nextRange()</tt>,
+ * <tt>codepointEnd</tt> contains the inclusive end of the
+ * iteration range, if <tt>codepoint != IS_STRING</tt>. If
+ * iterating over code points using <tt>next()</tt>, or if
+ * <tt>codepoint == IS_STRING</tt>, then the value of
+ * <tt>codepointEnd</tt> is undefined.
+ * @stable ICU 2.4
+ */
+ UChar32 codepointEnd;
+
+ /**
+ * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
+ * to the current string. If <tt>codepoint != IS_STRING</tt>, the
+ * value of <tt>string</tt> is undefined.
+ * @stable ICU 2.4
+ */
+ const UnicodeString* string;
+
+ public:
+
+ /**
+ * Create an iterator over the given set. The iterator is valid
+ * only so long as <tt>set</tt> is valid.
+ * @param set set to iterate over
+ * @stable ICU 2.4
+ */
+ UnicodeSetIterator(const UnicodeSet& set);
+
+ /**
+ * Create an iterator over nothing. <tt>next()</tt> and
+ * <tt>nextRange()</tt> return false. This is a convenience
+ * constructor allowing the target to be set later.
+ * @stable ICU 2.4
+ */
+ UnicodeSetIterator();
+
+ /**
+ * Destructor.
+ * @stable ICU 2.4
+ */
+ virtual ~UnicodeSetIterator();
+
+ /**
+ * Returns true if the current element is a string. If so, the
+ * caller can retrieve it with <tt>getString()</tt>. If this
+ * method returns false, the current element is a code point or
+ * code point range, depending on whether <tt>next()</tt> or
+ * <tt>nextRange()</tt> was called.
+ * Elements of types string and codepoint can both be retrieved
+ * with the function <tt>getString()</tt>.
+ * Elements of type codepoint can also be retrieved with
+ * <tt>getCodepoint()</tt>.
+ * For ranges, <tt>getCodepoint()</tt> returns the starting codepoint
+ * of the range, and <tt>getCodepointEnd()</tt> returns the end
+ * of the range.
+ * @stable ICU 2.4
+ */
+ inline UBool isString() const;
+
+ /**
+ * Returns the current code point, if <tt>isString()</tt> returned
+ * false. Otherwise returns an undefined result.
+ * @stable ICU 2.4
+ */
+ inline UChar32 getCodepoint() const;
+
+ /**
+ * Returns the end of the current code point range, if
+ * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was
+ * called. Otherwise returns an undefined result.
+ * @stable ICU 2.4
+ */
+ inline UChar32 getCodepointEnd() const;
+
+ /**
+ * Returns the current string, if <tt>isString()</tt> returned
+ * true. If the current iteration item is a code point, a UnicodeString
+ * containing that single code point is returned.
+ *
+ * Ownership of the returned string remains with the iterator.
+ * The string is guaranteed to remain valid only until the iterator is
+ * advanced to the next item, or until the iterator is deleted.
+ *
+ * @stable ICU 2.4
+ */
+ const UnicodeString& getString();
+
+ /**
+ * Advances the iteration position to the next element in the set,
+ * which can be either a single code point or a string.
+ * If there are no more elements in the set, return false.
+ *
+ * <p>
+ * If <tt>isString() == true</tt>, the value is a
+ * string, otherwise the value is a
+ * single code point. Elements of either type can be retrieved
+ * with the function <tt>getString()</tt>, while elements of
+ * consisting of a single code point can be retrieved with
+ * <tt>getCodepoint()</tt>
+ *
+ * <p>The order of iteration is all code points in sorted order,
+ * followed by all strings sorted order. Do not mix
+ * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
+ * calling <tt>reset()</tt> between them. The results of doing so
+ * are undefined.
+ *
+ * @return true if there was another element in the set.
+ * @stable ICU 2.4
+ */
+ UBool next();
+
+ /**
+ * Returns the next element in the set, either a code point range
+ * or a string. If there are no more elements in the set, return
+ * false. If <tt>isString() == true</tt>, the value is a
+ * string and can be accessed with <tt>getString()</tt>. Otherwise the value is a
+ * range of one or more code points from <tt>getCodepoint()</tt> to
+ * <tt>getCodepointeEnd()</tt> inclusive.
+ *
+ * <p>The order of iteration is all code points ranges in sorted
+ * order, followed by all strings sorted order. Ranges are
+ * disjoint and non-contiguous. The value returned from <tt>getString()</tt>
+ * is undefined unless <tt>isString() == true</tt>. Do not mix calls to
+ * <tt>next()</tt> and <tt>nextRange()</tt> without calling
+ * <tt>reset()</tt> between them. The results of doing so are
+ * undefined.
+ *
+ * @return true if there was another element in the set.
+ * @stable ICU 2.4
+ */
+ UBool nextRange();
+
+ /**
+ * Sets this iterator to visit the elements of the given set and
+ * resets it to the start of that set. The iterator is valid only
+ * so long as <tt>set</tt> is valid.
+ * @param set the set to iterate over.
+ * @stable ICU 2.4
+ */
+ void reset(const UnicodeSet& set);
+
+ /**
+ * Resets this iterator to the start of the set.
+ * @stable ICU 2.4
+ */
+ void reset();
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ *
+ * @stable ICU 2.4
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ *
+ * @stable ICU 2.4
+ */
+ virtual UClassID getDynamicClassID() const;
+
+ // ======================= PRIVATES ===========================
+
+ protected:
+
+ // endElement and nextElements are really UChar32's, but we keep
+ // them as signed int32_t's so we can do comparisons with
+ // endElement set to -1. Leave them as int32_t's.
+ /** The set
+ * @stable ICU 2.4
+ */
+ const UnicodeSet* set;
+ /** End range
+ * @stable ICU 2.4
+ */
+ int32_t endRange;
+ /** Range
+ * @stable ICU 2.4
+ */
+ int32_t range;
+ /** End element
+ * @stable ICU 2.4
+ */
+ int32_t endElement;
+ /** Next element
+ * @stable ICU 2.4
+ */
+ int32_t nextElement;
+ //UBool abbreviated;
+ /** Next string
+ * @stable ICU 2.4
+ */
+ int32_t nextString;
+ /** String count
+ * @stable ICU 2.4
+ */
+ int32_t stringCount;
+
+ /**
+ * Points to the string to use when the caller asks for a
+ * string and the current iteration item is a code point, not a string.
+ * @internal
+ */
+ UnicodeString *cpString;
+
+ /** Copy constructor. Disallowed.
+ * @stable ICU 2.4
+ */
+ UnicodeSetIterator(const UnicodeSetIterator&); // disallow
+
+ /** Assignment operator. Disallowed.
+ * @stable ICU 2.4
+ */
+ UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow
+
+ /** Load range
+ * @stable ICU 2.4
+ */
+ virtual void loadRange(int32_t range);
+
+};
+
+inline UBool UnicodeSetIterator::isString() const {
+ return codepoint == (UChar32)IS_STRING;
+}
+
+inline UChar32 UnicodeSetIterator::getCodepoint() const {
+ return codepoint;
+}
+
+inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
+ return codepointEnd;
+}
+
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/ushape.h b/thirdparty/icu4c/common/unicode/ushape.h
new file mode 100644
index 0000000000..fed4869abd
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/ushape.h
@@ -0,0 +1,476 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2000-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: ushape.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2000jun29
+* created by: Markus W. Scherer
+*/
+
+#ifndef __USHAPE_H__
+#define __USHAPE_H__
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C API: Arabic shaping
+ *
+ */
+
+/**
+ * Shape Arabic text on a character basis.
+ *
+ * <p>This function performs basic operations for "shaping" Arabic text. It is most
+ * useful for use with legacy data formats and legacy display technology
+ * (simple terminals). All operations are performed on Unicode characters.</p>
+ *
+ * <p>Text-based shaping means that some character code points in the text are
+ * replaced by others depending on the context. It transforms one kind of text
+ * into another. In comparison, modern displays for Arabic text select
+ * appropriate, context-dependent font glyphs for each text element, which means
+ * that they transform text into a glyph vector.</p>
+ *
+ * <p>Text transformations are necessary when modern display technology is not
+ * available or when text needs to be transformed to or from legacy formats that
+ * use "shaped" characters. Since the Arabic script is cursive, connecting
+ * adjacent letters to each other, computers select images for each letter based
+ * on the surrounding letters. This usually results in four images per Arabic
+ * letter: initial, middle, final, and isolated forms. In Unicode, on the other
+ * hand, letters are normally stored abstract, and a display system is expected
+ * to select the necessary glyphs. (This makes searching and other text
+ * processing easier because the same letter has only one code.) It is possible
+ * to mimic this with text transformations because there are characters in
+ * Unicode that are rendered as letters with a specific shape
+ * (or cursive connectivity). They were included for interoperability with
+ * legacy systems and codepages, and for unsophisticated display systems.</p>
+ *
+ * <p>A second kind of text transformations is supported for Arabic digits:
+ * For compatibility with legacy codepages that only include European digits,
+ * it is possible to replace one set of digits by another, changing the
+ * character code points. These operations can be performed for either
+ * Arabic-Indic Digits (U+0660...U+0669) or Eastern (Extended) Arabic-Indic
+ * digits (U+06f0...U+06f9).</p>
+ *
+ * <p>Some replacements may result in more or fewer characters (code points).
+ * By default, this means that the destination buffer may receive text with a
+ * length different from the source length. Some legacy systems rely on the
+ * length of the text to be constant. They expect extra spaces to be added
+ * or consumed either next to the affected character or at the end of the
+ * text.</p>
+ *
+ * <p>For details about the available operations, see the description of the
+ * <code>U_SHAPE_...</code> options.</p>
+ *
+ * @param source The input text.
+ *
+ * @param sourceLength The number of UChars in <code>source</code>.
+ *
+ * @param dest The destination buffer that will receive the results of the
+ * requested operations. It may be <code>NULL</code> only if
+ * <code>destSize</code> is 0. The source and destination must not
+ * overlap.
+ *
+ * @param destSize The size (capacity) of the destination buffer in UChars.
+ * If <code>destSize</code> is 0, then no output is produced,
+ * but the necessary buffer size is returned ("preflighting").
+ *
+ * @param options This is a 32-bit set of flags that specify the operations
+ * that are performed on the input text. If no error occurs,
+ * then the result will always be written to the destination
+ * buffer.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @return The number of UChars written to the destination buffer.
+ * If an error occurred, then no output was written, or it may be
+ * incomplete. If <code>U_BUFFER_OVERFLOW_ERROR</code> is set, then
+ * the return value indicates the necessary destination buffer size.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_shapeArabic(const UChar *source, int32_t sourceLength,
+ UChar *dest, int32_t destSize,
+ uint32_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Memory option: allow the result to have a different length than the source.
+ * Affects: LamAlef options
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_LENGTH_GROW_SHRINK 0
+
+/**
+ * Memory option: allow the result to have a different length than the source.
+ * Affects: LamAlef options
+ * This option is an alias to U_SHAPE_LENGTH_GROW_SHRINK
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_LAMALEF_RESIZE 0
+
+/**
+ * Memory option: the result must have the same length as the source.
+ * If more room is necessary, then try to consume spaces next to modified characters.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_LENGTH_FIXED_SPACES_NEAR 1
+
+/**
+ * Memory option: the result must have the same length as the source.
+ * If more room is necessary, then try to consume spaces next to modified characters.
+ * Affects: LamAlef options
+ * This option is an alias to U_SHAPE_LENGTH_FIXED_SPACES_NEAR
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_LAMALEF_NEAR 1
+
+/**
+ * Memory option: the result must have the same length as the source.
+ * If more room is necessary, then try to consume spaces at the end of the text.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_LENGTH_FIXED_SPACES_AT_END 2
+
+/**
+ * Memory option: the result must have the same length as the source.
+ * If more room is necessary, then try to consume spaces at the end of the text.
+ * Affects: LamAlef options
+ * This option is an alias to U_SHAPE_LENGTH_FIXED_SPACES_AT_END
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_LAMALEF_END 2
+
+/**
+ * Memory option: the result must have the same length as the source.
+ * If more room is necessary, then try to consume spaces at the beginning of the text.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_LENGTH_FIXED_SPACES_AT_BEGINNING 3
+
+/**
+ * Memory option: the result must have the same length as the source.
+ * If more room is necessary, then try to consume spaces at the beginning of the text.
+ * Affects: LamAlef options
+ * This option is an alias to U_SHAPE_LENGTH_FIXED_SPACES_AT_BEGINNING
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_LAMALEF_BEGIN 3
+
+
+/**
+ * Memory option: the result must have the same length as the source.
+ * Shaping Mode: For each LAMALEF character found, expand LAMALEF using space at end.
+ * If there is no space at end, use spaces at beginning of the buffer. If there
+ * is no space at beginning of the buffer, use spaces at the near (i.e. the space
+ * after the LAMALEF character).
+ * If there are no spaces found, an error U_NO_SPACE_AVAILABLE (as defined in utypes.h)
+ * will be set in pErrorCode
+ *
+ * Deshaping Mode: Perform the same function as the flag equals U_SHAPE_LAMALEF_END.
+ * Affects: LamAlef options
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_LAMALEF_AUTO 0x10000
+
+/** Bit mask for memory options. @stable ICU 2.0 */
+#define U_SHAPE_LENGTH_MASK 0x10003 /* Changed old value 3 */
+
+
+/**
+ * Bit mask for LamAlef memory options.
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_LAMALEF_MASK 0x10003 /* updated */
+
+/** Direction indicator: the source is in logical (keyboard) order. @stable ICU 2.0 */
+#define U_SHAPE_TEXT_DIRECTION_LOGICAL 0
+
+/**
+ * Direction indicator:
+ * the source is in visual RTL order,
+ * the rightmost displayed character stored first.
+ * This option is an alias to U_SHAPE_TEXT_DIRECTION_LOGICAL
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_TEXT_DIRECTION_VISUAL_RTL 0
+
+/**
+ * Direction indicator:
+ * the source is in visual LTR order,
+ * the leftmost displayed character stored first.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_TEXT_DIRECTION_VISUAL_LTR 4
+
+/** Bit mask for direction indicators. @stable ICU 2.0 */
+#define U_SHAPE_TEXT_DIRECTION_MASK 4
+
+
+/** Letter shaping option: do not perform letter shaping. @stable ICU 2.0 */
+#define U_SHAPE_LETTERS_NOOP 0
+
+/** Letter shaping option: replace abstract letter characters by "shaped" ones. @stable ICU 2.0 */
+#define U_SHAPE_LETTERS_SHAPE 8
+
+/** Letter shaping option: replace "shaped" letter characters by abstract ones. @stable ICU 2.0 */
+#define U_SHAPE_LETTERS_UNSHAPE 0x10
+
+/**
+ * Letter shaping option: replace abstract letter characters by "shaped" ones.
+ * The only difference with U_SHAPE_LETTERS_SHAPE is that Tashkeel letters
+ * are always "shaped" into the isolated form instead of the medial form
+ * (selecting code points from the Arabic Presentation Forms-B block).
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED 0x18
+
+
+/** Bit mask for letter shaping options. @stable ICU 2.0 */
+#define U_SHAPE_LETTERS_MASK 0x18
+
+
+/** Digit shaping option: do not perform digit shaping. @stable ICU 2.0 */
+#define U_SHAPE_DIGITS_NOOP 0
+
+/**
+ * Digit shaping option:
+ * Replace European digits (U+0030...) by Arabic-Indic digits.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_DIGITS_EN2AN 0x20
+
+/**
+ * Digit shaping option:
+ * Replace Arabic-Indic digits by European digits (U+0030...).
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_DIGITS_AN2EN 0x40
+
+/**
+ * Digit shaping option:
+ * Replace European digits (U+0030...) by Arabic-Indic digits if the most recent
+ * strongly directional character is an Arabic letter
+ * (<code>u_charDirection()</code> result <code>U_RIGHT_TO_LEFT_ARABIC</code> [AL]).<br>
+ * The direction of "preceding" depends on the direction indicator option.
+ * For the first characters, the preceding strongly directional character
+ * (initial state) is assumed to be not an Arabic letter
+ * (it is <code>U_LEFT_TO_RIGHT</code> [L] or <code>U_RIGHT_TO_LEFT</code> [R]).
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_DIGITS_ALEN2AN_INIT_LR 0x60
+
+/**
+ * Digit shaping option:
+ * Replace European digits (U+0030...) by Arabic-Indic digits if the most recent
+ * strongly directional character is an Arabic letter
+ * (<code>u_charDirection()</code> result <code>U_RIGHT_TO_LEFT_ARABIC</code> [AL]).<br>
+ * The direction of "preceding" depends on the direction indicator option.
+ * For the first characters, the preceding strongly directional character
+ * (initial state) is assumed to be an Arabic letter.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_DIGITS_ALEN2AN_INIT_AL 0x80
+
+/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */
+#define U_SHAPE_DIGITS_RESERVED 0xa0
+
+/** Bit mask for digit shaping options. @stable ICU 2.0 */
+#define U_SHAPE_DIGITS_MASK 0xe0
+
+
+/** Digit type option: Use Arabic-Indic digits (U+0660...U+0669). @stable ICU 2.0 */
+#define U_SHAPE_DIGIT_TYPE_AN 0
+
+/** Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9). @stable ICU 2.0 */
+#define U_SHAPE_DIGIT_TYPE_AN_EXTENDED 0x100
+
+/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */
+#define U_SHAPE_DIGIT_TYPE_RESERVED 0x200
+
+/** Bit mask for digit type options. @stable ICU 2.0 */
+#define U_SHAPE_DIGIT_TYPE_MASK 0x300 /* I need to change this from 0x3f00 to 0x300 */
+
+/**
+ * Tashkeel aggregation option:
+ * Replaces any combination of U+0651 with one of
+ * U+064C, U+064D, U+064E, U+064F, U+0650 with
+ * U+FC5E, U+FC5F, U+FC60, U+FC61, U+FC62 consecutively.
+ * @stable ICU 3.6
+ */
+#define U_SHAPE_AGGREGATE_TASHKEEL 0x4000
+/** Tashkeel aggregation option: do not aggregate tashkeels. @stable ICU 3.6 */
+#define U_SHAPE_AGGREGATE_TASHKEEL_NOOP 0
+/** Bit mask for tashkeel aggregation. @stable ICU 3.6 */
+#define U_SHAPE_AGGREGATE_TASHKEEL_MASK 0x4000
+
+/**
+ * Presentation form option:
+ * Don't replace Arabic Presentation Forms-A and Arabic Presentation Forms-B
+ * characters with 0+06xx characters, before shaping.
+ * @stable ICU 3.6
+ */
+#define U_SHAPE_PRESERVE_PRESENTATION 0x8000
+/** Presentation form option:
+ * Replace Arabic Presentation Forms-A and Arabic Presentationo Forms-B with
+ * their unshaped correspondants in range 0+06xx, before shaping.
+ * @stable ICU 3.6
+ */
+#define U_SHAPE_PRESERVE_PRESENTATION_NOOP 0
+/** Bit mask for preserve presentation form. @stable ICU 3.6 */
+#define U_SHAPE_PRESERVE_PRESENTATION_MASK 0x8000
+
+/* Seen Tail option */
+/**
+ * Memory option: the result must have the same length as the source.
+ * Shaping mode: The SEEN family character will expand into two characters using space near
+ * the SEEN family character(i.e. the space after the character).
+ * If there are no spaces found, an error U_NO_SPACE_AVAILABLE (as defined in utypes.h)
+ * will be set in pErrorCode
+ *
+ * De-shaping mode: Any Seen character followed by Tail character will be
+ * replaced by one cell Seen and a space will replace the Tail.
+ * Affects: Seen options
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_SEEN_TWOCELL_NEAR 0x200000
+
+/**
+ * Bit mask for Seen memory options.
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_SEEN_MASK 0x700000
+
+/* YehHamza option */
+/**
+ * Memory option: the result must have the same length as the source.
+ * Shaping mode: The YEHHAMZA character will expand into two characters using space near it
+ * (i.e. the space after the character
+ * If there are no spaces found, an error U_NO_SPACE_AVAILABLE (as defined in utypes.h)
+ * will be set in pErrorCode
+ *
+ * De-shaping mode: Any Yeh (final or isolated) character followed by Hamza character will be
+ * replaced by one cell YehHamza and space will replace the Hamza.
+ * Affects: YehHamza options
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_YEHHAMZA_TWOCELL_NEAR 0x1000000
+
+
+/**
+ * Bit mask for YehHamza memory options.
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_YEHHAMZA_MASK 0x3800000
+
+/* New Tashkeel options */
+/**
+ * Memory option: the result must have the same length as the source.
+ * Shaping mode: Tashkeel characters will be replaced by spaces.
+ * Spaces will be placed at beginning of the buffer
+ *
+ * De-shaping mode: N/A
+ * Affects: Tashkeel options
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_TASHKEEL_BEGIN 0x40000
+
+/**
+ * Memory option: the result must have the same length as the source.
+ * Shaping mode: Tashkeel characters will be replaced by spaces.
+ * Spaces will be placed at end of the buffer
+ *
+ * De-shaping mode: N/A
+ * Affects: Tashkeel options
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_TASHKEEL_END 0x60000
+
+/**
+ * Memory option: allow the result to have a different length than the source.
+ * Shaping mode: Tashkeel characters will be removed, buffer length will shrink.
+ * De-shaping mode: N/A
+ *
+ * Affect: Tashkeel options
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_TASHKEEL_RESIZE 0x80000
+
+/**
+ * Memory option: the result must have the same length as the source.
+ * Shaping mode: Tashkeel characters will be replaced by Tatweel if it is connected to adjacent
+ * characters (i.e. shaped on Tatweel) or replaced by space if it is not connected.
+ *
+ * De-shaping mode: N/A
+ * Affects: YehHamza options
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL 0xC0000
+
+/**
+ * Bit mask for Tashkeel replacement with Space or Tatweel memory options.
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_TASHKEEL_MASK 0xE0000
+
+
+/* Space location Control options */
+/**
+ * This option affect the meaning of BEGIN and END options. if this option is not used the default
+ * for BEGIN and END will be as following:
+ * The Default (for both Visual LTR, Visual RTL and Logical Text)
+ * 1. BEGIN always refers to the start address of physical memory.
+ * 2. END always refers to the end address of physical memory.
+ *
+ * If this option is used it will swap the meaning of BEGIN and END only for Visual LTR text.
+ *
+ * The effect on BEGIN and END Memory Options will be as following:
+ * A. BEGIN For Visual LTR text: This will be the beginning (right side) of the visual text(
+ * corresponding to the physical memory address end for Visual LTR text, Same as END in
+ * default behavior)
+ * B. BEGIN For Logical text: Same as BEGIN in default behavior.
+ * C. END For Visual LTR text: This will be the end (left side) of the visual text (corresponding
+ * to the physical memory address beginning for Visual LTR text, Same as BEGIN in default behavior.
+ * D. END For Logical text: Same as END in default behavior).
+ * Affects: All LamAlef BEGIN, END and AUTO options.
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END 0x4000000
+
+/**
+ * Bit mask for swapping BEGIN and END for Visual LTR text
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_SPACES_RELATIVE_TO_TEXT_MASK 0x4000000
+
+/**
+ * If this option is used, shaping will use the new Unicode code point for TAIL (i.e. 0xFE73).
+ * If this option is not specified (Default), old unofficial Unicode TAIL code point is used (i.e. 0x200B)
+ * De-shaping will not use this option as it will always search for both the new Unicode code point for the
+ * TAIL (i.e. 0xFE73) or the old unofficial Unicode TAIL code point (i.e. 0x200B) and de-shape the
+ * Seen-Family letter accordingly.
+ *
+ * Shaping Mode: Only shaping.
+ * De-shaping Mode: N/A.
+ * Affects: All Seen options
+ * @stable ICU 4.8
+ */
+#define U_SHAPE_TAIL_NEW_UNICODE 0x8000000
+
+/**
+ * Bit mask for new Unicode Tail option
+ * @stable ICU 4.8
+ */
+#define U_SHAPE_TAIL_TYPE_MASK 0x8000000
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/usprep.h b/thirdparty/icu4c/common/unicode/usprep.h
new file mode 100644
index 0000000000..f8a0f58e0d
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/usprep.h
@@ -0,0 +1,274 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ *******************************************************************************
+ *
+ * Copyright (C) 2003-2014, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ *
+ *******************************************************************************
+ * file name: usprep.h
+ * encoding: UTF-8
+ * tab size: 8 (not used)
+ * indentation:4
+ *
+ * created on: 2003jul2
+ * created by: Ram Viswanadha
+ */
+
+#ifndef __USPREP_H__
+#define __USPREP_H__
+
+/**
+ * \file
+ * \brief C API: Implements the StringPrep algorithm.
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ *
+ * StringPrep API implements the StingPrep framework as described by RFC 3454.
+ * StringPrep prepares Unicode strings for use in network protocols.
+ * Profiles of StingPrep are set of rules and data according to with the
+ * Unicode Strings are prepared. Each profiles contains tables which describe
+ * how a code point should be treated. The tables are broadly classified into
+ * <ul>
+ * <li> Unassigned Table: Contains code points that are unassigned
+ * in the Unicode Version supported by StringPrep. Currently
+ * RFC 3454 supports Unicode 3.2. </li>
+ * <li> Prohibited Table: Contains code points that are prohibited from
+ * the output of the StringPrep processing function. </li>
+ * <li> Mapping Table: Contains code points that are deleted from the output or case mapped. </li>
+ * </ul>
+ *
+ * The procedure for preparing Unicode strings:
+ * <ol>
+ * <li> Map: For each character in the input, check if it has a mapping
+ * and, if so, replace it with its mapping. </li>
+ * <li> Normalize: Possibly normalize the result of step 1 using Unicode
+ * normalization. </li>
+ * <li> Prohibit: Check for any characters that are not allowed in the
+ * output. If any are found, return an error.</li>
+ * <li> Check bidi: Possibly check for right-to-left characters, and if
+ * any are found, make sure that the whole string satisfies the
+ * requirements for bidirectional strings. If the string does not
+ * satisfy the requirements for bidirectional strings, return an
+ * error. </li>
+ * </ol>
+ * @author Ram Viswanadha
+ */
+#if !UCONFIG_NO_IDNA
+
+#include "unicode/parseerr.h"
+
+/**
+ * The StringPrep profile
+ * @stable ICU 2.8
+ */
+typedef struct UStringPrepProfile UStringPrepProfile;
+
+
+/**
+ * Option to prohibit processing of unassigned code points in the input
+ *
+ * @see usprep_prepare
+ * @stable ICU 2.8
+ */
+#define USPREP_DEFAULT 0x0000
+
+/**
+ * Option to allow processing of unassigned code points in the input
+ *
+ * @see usprep_prepare
+ * @stable ICU 2.8
+ */
+#define USPREP_ALLOW_UNASSIGNED 0x0001
+
+/**
+ * enums for the standard stringprep profile types
+ * supported by usprep_openByType.
+ * @see usprep_openByType
+ * @stable ICU 4.2
+ */
+typedef enum UStringPrepProfileType {
+ /**
+ * RFC3491 Nameprep
+ * @stable ICU 4.2
+ */
+ USPREP_RFC3491_NAMEPREP,
+ /**
+ * RFC3530 nfs4_cs_prep
+ * @stable ICU 4.2
+ */
+ USPREP_RFC3530_NFS4_CS_PREP,
+ /**
+ * RFC3530 nfs4_cs_prep with case insensitive option
+ * @stable ICU 4.2
+ */
+ USPREP_RFC3530_NFS4_CS_PREP_CI,
+ /**
+ * RFC3530 nfs4_cis_prep
+ * @stable ICU 4.2
+ */
+ USPREP_RFC3530_NFS4_CIS_PREP,
+ /**
+ * RFC3530 nfs4_mixed_prep for prefix
+ * @stable ICU 4.2
+ */
+ USPREP_RFC3530_NFS4_MIXED_PREP_PREFIX,
+ /**
+ * RFC3530 nfs4_mixed_prep for suffix
+ * @stable ICU 4.2
+ */
+ USPREP_RFC3530_NFS4_MIXED_PREP_SUFFIX,
+ /**
+ * RFC3722 iSCSI
+ * @stable ICU 4.2
+ */
+ USPREP_RFC3722_ISCSI,
+ /**
+ * RFC3920 XMPP Nodeprep
+ * @stable ICU 4.2
+ */
+ USPREP_RFC3920_NODEPREP,
+ /**
+ * RFC3920 XMPP Resourceprep
+ * @stable ICU 4.2
+ */
+ USPREP_RFC3920_RESOURCEPREP,
+ /**
+ * RFC4011 Policy MIB Stringprep
+ * @stable ICU 4.2
+ */
+ USPREP_RFC4011_MIB,
+ /**
+ * RFC4013 SASLprep
+ * @stable ICU 4.2
+ */
+ USPREP_RFC4013_SASLPREP,
+ /**
+ * RFC4505 trace
+ * @stable ICU 4.2
+ */
+ USPREP_RFC4505_TRACE,
+ /**
+ * RFC4518 LDAP
+ * @stable ICU 4.2
+ */
+ USPREP_RFC4518_LDAP,
+ /**
+ * RFC4518 LDAP for case ignore, numeric and stored prefix
+ * matching rules
+ * @stable ICU 4.2
+ */
+ USPREP_RFC4518_LDAP_CI
+} UStringPrepProfileType;
+
+/**
+ * Creates a StringPrep profile from the data file.
+ *
+ * @param path string containing the full path pointing to the directory
+ * where the profile reside followed by the package name
+ * e.g. "/usr/resource/my_app/profiles/mydata" on a Unix system.
+ * if NULL, ICU default data files will be used.
+ * @param fileName name of the profile file to be opened
+ * @param status ICU error code in/out parameter. Must not be NULL.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Pointer to UStringPrepProfile that is opened. Should be closed by
+ * calling usprep_close()
+ * @see usprep_close()
+ * @stable ICU 2.8
+ */
+U_CAPI UStringPrepProfile* U_EXPORT2
+usprep_open(const char* path,
+ const char* fileName,
+ UErrorCode* status);
+
+/**
+ * Creates a StringPrep profile for the specified profile type.
+ *
+ * @param type The profile type
+ * @param status ICU error code in/out parameter. Must not be NULL.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Pointer to UStringPrepProfile that is opened. Should be closed by
+ * calling usprep_close()
+ * @see usprep_close()
+ * @stable ICU 4.2
+ */
+U_CAPI UStringPrepProfile* U_EXPORT2
+usprep_openByType(UStringPrepProfileType type,
+ UErrorCode* status);
+
+/**
+ * Closes the profile
+ * @param profile The profile to close
+ * @stable ICU 2.8
+ */
+U_CAPI void U_EXPORT2
+usprep_close(UStringPrepProfile* profile);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUStringPrepProfilePointer
+ * "Smart pointer" class, closes a UStringPrepProfile via usprep_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUStringPrepProfilePointer, UStringPrepProfile, usprep_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
+ * checks for prohibited and BiDi characters in the order defined by RFC 3454
+ * depending on the options specified in the profile.
+ *
+ * @param prep The profile to use
+ * @param src Pointer to UChar buffer containing the string to prepare
+ * @param srcLength Number of characters in the source string
+ * @param dest Pointer to the destination buffer to receive the output
+ * @param destCapacity The capacity of destination array
+ * @param options A bit set of options:
+ *
+ * - USPREP_DEFAULT Prohibit processing of unassigned code points in the input
+ *
+ * - USPREP_ALLOW_UNASSIGNED Treat the unassigned code points are in the input
+ * as normal Unicode code points.
+ *
+ * @param parseError Pointer to UParseError struct to receive information on position
+ * of error if an error is encountered. Can be NULL.
+ * @param status ICU in/out error code parameter.
+ * U_INVALID_CHAR_FOUND if src contains
+ * unmatched single surrogates.
+ * U_INDEX_OUTOFBOUNDS_ERROR if src contains
+ * too many code points.
+ * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
+ * @return The number of UChars in the destination buffer
+ * @stable ICU 2.8
+ */
+
+U_CAPI int32_t U_EXPORT2
+usprep_prepare( const UStringPrepProfile* prep,
+ const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UParseError* parseError,
+ UErrorCode* status );
+
+
+#endif /* #if !UCONFIG_NO_IDNA */
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/ustring.h b/thirdparty/icu4c/common/unicode/ustring.h
new file mode 100644
index 0000000000..10ea45ead1
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/ustring.h
@@ -0,0 +1,1689 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1998-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File ustring.h
+*
+* Modification History:
+*
+* Date Name Description
+* 12/07/98 bertrand Creation.
+******************************************************************************
+*/
+
+#ifndef USTRING_H
+#define USTRING_H
+
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "unicode/uiter.h"
+
+/**
+ * \def UBRK_TYPEDEF_UBREAK_ITERATOR
+ * @internal
+ */
+
+#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
+# define UBRK_TYPEDEF_UBREAK_ITERATOR
+/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1*/
+ typedef struct UBreakIterator UBreakIterator;
+#endif
+
+/**
+ * \file
+ * \brief C API: Unicode string handling functions
+ *
+ * These C API functions provide general Unicode string handling.
+ *
+ * Some functions are equivalent in name, signature, and behavior to the ANSI C <string.h>
+ * functions. (For example, they do not check for bad arguments like NULL string pointers.)
+ * In some cases, only the thread-safe variant of such a function is implemented here
+ * (see u_strtok_r()).
+ *
+ * Other functions provide more Unicode-specific functionality like locale-specific
+ * upper/lower-casing and string comparison in code point order.
+ *
+ * ICU uses 16-bit Unicode (UTF-16) in the form of arrays of UChar code units.
+ * UTF-16 encodes each Unicode code point with either one or two UChar code units.
+ * (This is the default form of Unicode, and a forward-compatible extension of the original,
+ * fixed-width form that was known as UCS-2. UTF-16 superseded UCS-2 with Unicode 2.0
+ * in 1996.)
+ *
+ * Some APIs accept a 32-bit UChar32 value for a single code point.
+ *
+ * ICU also handles 16-bit Unicode text with unpaired surrogates.
+ * Such text is not well-formed UTF-16.
+ * Code-point-related functions treat unpaired surrogates as surrogate code points,
+ * i.e., as separate units.
+ *
+ * Although UTF-16 is a variable-width encoding form (like some legacy multi-byte encodings),
+ * it is much more efficient even for random access because the code unit values
+ * for single-unit characters vs. lead units vs. trail units are completely disjoint.
+ * This means that it is easy to determine character (code point) boundaries from
+ * random offsets in the string.
+ *
+ * Unicode (UTF-16) string processing is optimized for the single-unit case.
+ * Although it is important to support supplementary characters
+ * (which use pairs of lead/trail code units called "surrogates"),
+ * their occurrence is rare. Almost all characters in modern use require only
+ * a single UChar code unit (i.e., their code point values are <=0xffff).
+ *
+ * For more details see the User Guide Strings chapter (http://icu-project.org/userguide/strings.html).
+ * For a discussion of the handling of unpaired surrogates see also
+ * Jitterbug 2145 and its icu mailing list proposal on 2002-sep-18.
+ */
+
+/**
+ * \defgroup ustring_ustrlen String Length
+ * \ingroup ustring_strlen
+ */
+/*@{*/
+/**
+ * Determine the length of an array of UChar.
+ *
+ * @param s The array of UChars, NULL (U+0000) terminated.
+ * @return The number of UChars in <code>chars</code>, minus the terminator.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_strlen(const UChar *s);
+/*@}*/
+
+/**
+ * Count Unicode code points in the length UChar code units of the string.
+ * A code point may occupy either one or two UChar code units.
+ * Counting code points involves reading all code units.
+ *
+ * This functions is basically the inverse of the U16_FWD_N() macro (see utf.h).
+ *
+ * @param s The input string.
+ * @param length The number of UChar code units to be checked, or -1 to count all
+ * code points before the first NUL (U+0000).
+ * @return The number of code points in the specified code units.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_countChar32(const UChar *s, int32_t length);
+
+/**
+ * Check if the string contains more Unicode code points than a certain number.
+ * This is more efficient than counting all code points in the entire string
+ * and comparing that number with a threshold.
+ * This function may not need to scan the string at all if the length is known
+ * (not -1 for NUL-termination) and falls within a certain range, and
+ * never needs to count more than 'number+1' code points.
+ * Logically equivalent to (u_countChar32(s, length)>number).
+ * A Unicode code point may occupy either one or two UChar code units.
+ *
+ * @param s The input string.
+ * @param length The length of the string, or -1 if it is NUL-terminated.
+ * @param number The number of code points in the string is compared against
+ * the 'number' parameter.
+ * @return Boolean value for whether the string contains more Unicode code points
+ * than 'number'. Same as (u_countChar32(s, length)>number).
+ * @stable ICU 2.4
+ */
+U_CAPI UBool U_EXPORT2
+u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number);
+
+/**
+ * Concatenate two ustrings. Appends a copy of <code>src</code>,
+ * including the null terminator, to <code>dst</code>. The initial copied
+ * character from <code>src</code> overwrites the null terminator in <code>dst</code>.
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar* U_EXPORT2
+u_strcat(UChar *dst,
+ const UChar *src);
+
+/**
+ * Concatenate two ustrings.
+ * Appends at most <code>n</code> characters from <code>src</code> to <code>dst</code>.
+ * Adds a terminating NUL.
+ * If src is too long, then only <code>n-1</code> characters will be copied
+ * before the terminating NUL.
+ * If <code>n&lt;=0</code> then dst is not modified.
+ *
+ * @param dst The destination string.
+ * @param src The source string (can be NULL/invalid if n<=0).
+ * @param n The maximum number of characters to append; no-op if <=0.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar* U_EXPORT2
+u_strncat(UChar *dst,
+ const UChar *src,
+ int32_t n);
+
+/**
+ * Find the first occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param substring The substring to find (NUL-terminated).
+ * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
+ * or <code>s</code> itself if the <code>substring</code> is empty,
+ * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strrstr
+ * @see u_strFindFirst
+ * @see u_strFindLast
+ */
+U_CAPI UChar * U_EXPORT2
+u_strstr(const UChar *s, const UChar *substring);
+
+/**
+ * Find the first occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search.
+ * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
+ * @param substring The substring to find (NUL-terminated).
+ * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
+ * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
+ * or <code>s</code> itself if the <code>substring</code> is empty,
+ * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strstr
+ * @see u_strFindLast
+ */
+U_CAPI UChar * U_EXPORT2
+u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
+
+/**
+ * Find the first occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The BMP code point to find.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr32
+ * @see u_memchr
+ * @see u_strstr
+ * @see u_strFindFirst
+ */
+U_CAPI UChar * U_EXPORT2
+u_strchr(const UChar *s, UChar c);
+
+/**
+ * Find the first occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The code point to find.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr
+ * @see u_memchr32
+ * @see u_strstr
+ * @see u_strFindFirst
+ */
+U_CAPI UChar * U_EXPORT2
+u_strchr32(const UChar *s, UChar32 c);
+
+/**
+ * Find the last occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param substring The substring to find (NUL-terminated).
+ * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
+ * or <code>s</code> itself if the <code>substring</code> is empty,
+ * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strstr
+ * @see u_strFindFirst
+ * @see u_strFindLast
+ */
+U_CAPI UChar * U_EXPORT2
+u_strrstr(const UChar *s, const UChar *substring);
+
+/**
+ * Find the last occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search.
+ * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
+ * @param substring The substring to find (NUL-terminated).
+ * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
+ * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
+ * or <code>s</code> itself if the <code>substring</code> is empty,
+ * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strstr
+ * @see u_strFindLast
+ */
+U_CAPI UChar * U_EXPORT2
+u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
+
+/**
+ * Find the last occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The BMP code point to find.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr32
+ * @see u_memrchr
+ * @see u_strrstr
+ * @see u_strFindLast
+ */
+U_CAPI UChar * U_EXPORT2
+u_strrchr(const UChar *s, UChar c);
+
+/**
+ * Find the last occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The code point to find.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr
+ * @see u_memchr32
+ * @see u_strrstr
+ * @see u_strFindLast
+ */
+U_CAPI UChar * U_EXPORT2
+u_strrchr32(const UChar *s, UChar32 c);
+
+/**
+ * Locates the first occurrence in the string <code>string</code> of any of the characters
+ * in the string <code>matchSet</code>.
+ * Works just like C's strpbrk but with Unicode.
+ *
+ * @param string The string in which to search, NUL-terminated.
+ * @param matchSet A NUL-terminated string defining a set of code points
+ * for which to search in the text string.
+ * @return A pointer to the character in <code>string</code> that matches one of the
+ * characters in <code>matchSet</code>, or NULL if no such character is found.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar * U_EXPORT2
+u_strpbrk(const UChar *string, const UChar *matchSet);
+
+/**
+ * Returns the number of consecutive characters in <code>string</code>,
+ * beginning with the first, that do not occur somewhere in <code>matchSet</code>.
+ * Works just like C's strcspn but with Unicode.
+ *
+ * @param string The string in which to search, NUL-terminated.
+ * @param matchSet A NUL-terminated string defining a set of code points
+ * for which to search in the text string.
+ * @return The number of initial characters in <code>string</code> that do not
+ * occur in <code>matchSet</code>.
+ * @see u_strspn
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_strcspn(const UChar *string, const UChar *matchSet);
+
+/**
+ * Returns the number of consecutive characters in <code>string</code>,
+ * beginning with the first, that occur somewhere in <code>matchSet</code>.
+ * Works just like C's strspn but with Unicode.
+ *
+ * @param string The string in which to search, NUL-terminated.
+ * @param matchSet A NUL-terminated string defining a set of code points
+ * for which to search in the text string.
+ * @return The number of initial characters in <code>string</code> that do
+ * occur in <code>matchSet</code>.
+ * @see u_strcspn
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_strspn(const UChar *string, const UChar *matchSet);
+
+/**
+ * The string tokenizer API allows an application to break a string into
+ * tokens. Unlike strtok(), the saveState (the current pointer within the
+ * original string) is maintained in saveState. In the first call, the
+ * argument src is a pointer to the string. In subsequent calls to
+ * return successive tokens of that string, src must be specified as
+ * NULL. The value saveState is set by this function to maintain the
+ * function's position within the string, and on each subsequent call
+ * you must give this argument the same variable. This function does
+ * handle surrogate pairs. This function is similar to the strtok_r()
+ * the POSIX Threads Extension (1003.1c-1995) version.
+ *
+ * @param src String containing token(s). This string will be modified.
+ * After the first call to u_strtok_r(), this argument must
+ * be NULL to get to the next token.
+ * @param delim Set of delimiter characters (Unicode code points).
+ * @param saveState The current pointer within the original string,
+ * which is set by this function. The saveState
+ * parameter should the address of a local variable of type
+ * UChar *. (i.e. defined "UChar *myLocalSaveState" and use
+ * &myLocalSaveState for this parameter).
+ * @return A pointer to the next token found in src, or NULL
+ * when there are no more tokens.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar * U_EXPORT2
+u_strtok_r(UChar *src,
+ const UChar *delim,
+ UChar **saveState);
+
+/**
+ * Compare two Unicode strings for bitwise equality (code unit order).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
+ * value if <code>s1</code> is bitwise less than <code>s2,</code>; a positive
+ * value if <code>s1</code> is bitwise greater than <code>s2</code>.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_strcmp(const UChar *s1,
+ const UChar *s2);
+
+/**
+ * Compare two Unicode strings in code point order.
+ * See u_strCompare for details.
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * the first string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_strcmpCodePointOrder(const UChar *s1, const UChar *s2);
+
+/**
+ * Compare two Unicode strings (binary order).
+ *
+ * The comparison can be done in code unit order or in code point order.
+ * They differ only in UTF-16 when
+ * comparing supplementary code points (U+10000..U+10ffff)
+ * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
+ * In code unit order, high BMP code points sort after supplementary code points
+ * because they are stored as pairs of surrogates which are at U+d800..U+dfff.
+ *
+ * This functions works with strings of different explicitly specified lengths
+ * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
+ * NUL-terminated strings are possible with length arguments of -1.
+ *
+ * @param s1 First source string.
+ * @param length1 Length of first source string, or -1 if NUL-terminated.
+ *
+ * @param s2 Second source string.
+ * @param length2 Length of second source string, or -1 if NUL-terminated.
+ *
+ * @param codePointOrder Choose between code unit order (false)
+ * and code point order (true).
+ *
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @stable ICU 2.2
+ */
+U_CAPI int32_t U_EXPORT2
+u_strCompare(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ UBool codePointOrder);
+
+/**
+ * Compare two Unicode strings (binary order)
+ * as presented by UCharIterator objects.
+ * Works otherwise just like u_strCompare().
+ *
+ * Both iterators are reset to their start positions.
+ * When the function returns, it is undefined where the iterators
+ * have stopped.
+ *
+ * @param iter1 First source string iterator.
+ * @param iter2 Second source string iterator.
+ * @param codePointOrder Choose between code unit order (false)
+ * and code point order (true).
+ *
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @see u_strCompare
+ *
+ * @stable ICU 2.6
+ */
+U_CAPI int32_t U_EXPORT2
+u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder);
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to
+ * u_strCompare(u_strFoldCase(s1, options),
+ * u_strFoldCase(s2, options),
+ * (options&U_COMPARE_CODE_POINT_ORDER)!=0).
+ *
+ * The comparison can be done in UTF-16 code unit order or in code point order.
+ * They differ only when comparing supplementary code points (U+10000..U+10ffff)
+ * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
+ * In code unit order, high BMP code points sort after supplementary code points
+ * because they are stored as pairs of surrogates which are at U+d800..U+dfff.
+ *
+ * This functions works with strings of different explicitly specified lengths
+ * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
+ * NUL-terminated strings are possible with length arguments of -1.
+ *
+ * @param s1 First source string.
+ * @param length1 Length of first source string, or -1 if NUL-terminated.
+ *
+ * @param s2 Second source string.
+ * @param length2 Length of second source string, or -1 if NUL-terminated.
+ *
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @stable ICU 2.2
+ */
+U_CAPI int32_t U_EXPORT2
+u_strCaseCompare(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ uint32_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Compare two ustrings for bitwise equality.
+ * Compares at most <code>n</code> characters.
+ *
+ * @param ucs1 A string to compare (can be NULL/invalid if n<=0).
+ * @param ucs2 A string to compare (can be NULL/invalid if n<=0).
+ * @param n The maximum number of characters to compare; always returns 0 if n<=0.
+ * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
+ * value if <code>s1</code> is bitwise less than <code>s2</code>; a positive
+ * value if <code>s1</code> is bitwise greater than <code>s2</code>.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_strncmp(const UChar *ucs1,
+ const UChar *ucs2,
+ int32_t n);
+
+/**
+ * Compare two Unicode strings in code point order.
+ * This is different in UTF-16 from u_strncmp() if supplementary characters are present.
+ * For details, see u_strCompare().
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param n The maximum number of characters to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * the first string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n);
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options);
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options),
+ * u_strFoldCase(s2, at most n, options)).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param n The maximum number of characters each string to case-fold and then compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options);
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to u_strcmp(u_strFoldCase(s1, n, options),
+ * u_strFoldCase(s2, n, options)).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param length The number of characters in each string to case-fold and then compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options);
+
+/**
+ * Copy a ustring. Adds a null terminator.
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar* U_EXPORT2
+u_strcpy(UChar *dst,
+ const UChar *src);
+
+/**
+ * Copy a ustring.
+ * Copies at most <code>n</code> characters. The result will be null terminated
+ * if the length of <code>src</code> is less than <code>n</code>.
+ *
+ * @param dst The destination string.
+ * @param src The source string (can be NULL/invalid if n<=0).
+ * @param n The maximum number of characters to copy; no-op if <=0.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar* U_EXPORT2
+u_strncpy(UChar *dst,
+ const UChar *src,
+ int32_t n);
+
+#if !UCONFIG_NO_CONVERSION
+
+/**
+ * Copy a byte string encoded in the default codepage to a ustring.
+ * Adds a null terminator.
+ * Performs a host byte to UChar conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar* U_EXPORT2 u_uastrcpy(UChar *dst,
+ const char *src );
+
+/**
+ * Copy a byte string encoded in the default codepage to a ustring.
+ * Copies at most <code>n</code> characters. The result will be null terminated
+ * if the length of <code>src</code> is less than <code>n</code>.
+ * Performs a host byte to UChar conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @param n The maximum number of characters to copy.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar* U_EXPORT2 u_uastrncpy(UChar *dst,
+ const char *src,
+ int32_t n);
+
+/**
+ * Copy ustring to a byte string encoded in the default codepage.
+ * Adds a null terminator.
+ * Performs a UChar to host byte conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_CAPI char* U_EXPORT2 u_austrcpy(char *dst,
+ const UChar *src );
+
+/**
+ * Copy ustring to a byte string encoded in the default codepage.
+ * Copies at most <code>n</code> characters. The result will be null terminated
+ * if the length of <code>src</code> is less than <code>n</code>.
+ * Performs a UChar to host byte conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @param n The maximum number of characters to copy.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_CAPI char* U_EXPORT2 u_austrncpy(char *dst,
+ const UChar *src,
+ int32_t n );
+
+#endif
+
+/**
+ * Synonym for memcpy(), but with UChars only.
+ * @param dest The destination string
+ * @param src The source string (can be NULL/invalid if count<=0)
+ * @param count The number of characters to copy; no-op if <=0
+ * @return A pointer to <code>dest</code>
+ * @stable ICU 2.0
+ */
+U_CAPI UChar* U_EXPORT2
+u_memcpy(UChar *dest, const UChar *src, int32_t count);
+
+/**
+ * Synonym for memmove(), but with UChars only.
+ * @param dest The destination string
+ * @param src The source string (can be NULL/invalid if count<=0)
+ * @param count The number of characters to move; no-op if <=0
+ * @return A pointer to <code>dest</code>
+ * @stable ICU 2.0
+ */
+U_CAPI UChar* U_EXPORT2
+u_memmove(UChar *dest, const UChar *src, int32_t count);
+
+/**
+ * Initialize <code>count</code> characters of <code>dest</code> to <code>c</code>.
+ *
+ * @param dest The destination string.
+ * @param c The character to initialize the string.
+ * @param count The maximum number of characters to set.
+ * @return A pointer to <code>dest</code>.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar* U_EXPORT2
+u_memset(UChar *dest, UChar c, int32_t count);
+
+/**
+ * Compare the first <code>count</code> UChars of each buffer.
+ *
+ * @param buf1 The first string to compare.
+ * @param buf2 The second string to compare.
+ * @param count The maximum number of UChars to compare.
+ * @return When buf1 < buf2, a negative number is returned.
+ * When buf1 == buf2, 0 is returned.
+ * When buf1 > buf2, a positive number is returned.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count);
+
+/**
+ * Compare two Unicode strings in code point order.
+ * This is different in UTF-16 from u_memcmp() if supplementary characters are present.
+ * For details, see u_strCompare().
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param count The maximum number of characters to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * the first string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count);
+
+/**
+ * Find the first occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The BMP code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr
+ * @see u_memchr32
+ * @see u_strFindFirst
+ */
+U_CAPI UChar* U_EXPORT2
+u_memchr(const UChar *s, UChar c, int32_t count);
+
+/**
+ * Find the first occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr32
+ * @see u_memchr
+ * @see u_strFindFirst
+ */
+U_CAPI UChar* U_EXPORT2
+u_memchr32(const UChar *s, UChar32 c, int32_t count);
+
+/**
+ * Find the last occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The BMP code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr
+ * @see u_memrchr32
+ * @see u_strFindLast
+ */
+U_CAPI UChar* U_EXPORT2
+u_memrchr(const UChar *s, UChar c, int32_t count);
+
+/**
+ * Find the last occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr32
+ * @see u_memrchr
+ * @see u_strFindLast
+ */
+U_CAPI UChar* U_EXPORT2
+u_memrchr32(const UChar *s, UChar32 c, int32_t count);
+
+/**
+ * Unicode String literals in C.
+ * We need one macro to declare a variable for the string
+ * and to statically preinitialize it if possible,
+ * and a second macro to dynamically initialize such a string variable if necessary.
+ *
+ * The macros are defined for maximum performance.
+ * They work only for strings that contain "invariant characters", i.e.,
+ * only latin letters, digits, and some punctuation.
+ * See utypes.h for details.
+ *
+ * A pair of macros for a single string must be used with the same
+ * parameters.
+ * The string parameter must be a C string literal.
+ * The length of the string, not including the terminating
+ * `NUL`, must be specified as a constant.
+ * The U_STRING_DECL macro should be invoked exactly once for one
+ * such string variable before it is used.
+ *
+ * Usage:
+ *
+ * U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11);
+ * U_STRING_DECL(ustringVar2, "jumps 5%", 8);
+ * static UBool didInit=false;
+ *
+ * int32_t function() {
+ * if(!didInit) {
+ * U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11);
+ * U_STRING_INIT(ustringVar2, "jumps 5%", 8);
+ * didInit=true;
+ * }
+ * return u_strcmp(ustringVar1, ustringVar2);
+ * }
+ *
+ * Note that the macros will NOT consistently work if their argument is another #`define`.
+ * The following will not work on all platforms, don't use it.
+ *
+ * #define GLUCK "Mr. Gluck"
+ * U_STRING_DECL(var, GLUCK, 9)
+ * U_STRING_INIT(var, GLUCK, 9)
+ *
+ * Instead, use the string literal "Mr. Gluck" as the argument to both macro
+ * calls.
+ *
+ *
+ * @stable ICU 2.0
+ */
+#if defined(U_DECLARE_UTF16)
+# define U_STRING_DECL(var, cs, length) static const UChar *var=(const UChar *)U_DECLARE_UTF16(cs)
+ /**@stable ICU 2.0 */
+# define U_STRING_INIT(var, cs, length)
+#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
+# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=L ## cs
+ /**@stable ICU 2.0 */
+# define U_STRING_INIT(var, cs, length)
+#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
+# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=cs
+ /**@stable ICU 2.0 */
+# define U_STRING_INIT(var, cs, length)
+#else
+# define U_STRING_DECL(var, cs, length) static UChar var[(length)+1]
+ /**@stable ICU 2.0 */
+# define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1)
+#endif
+
+/**
+ * Unescape a string of characters and write the resulting
+ * Unicode characters to the destination buffer. The following escape
+ * sequences are recognized:
+ *
+ * \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
+ * \\Uhhhhhhhh 8 hex digits
+ * \\xhh 1-2 hex digits
+ * \\x{h...} 1-8 hex digits
+ * \\ooo 1-3 octal digits; o in [0-7]
+ * \\cX control-X; X is masked with 0x1F
+ *
+ * as well as the standard ANSI C escapes:
+ *
+ * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
+ * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
+ * \\&quot; => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
+ *
+ * Anything else following a backslash is generically escaped. For
+ * example, "[a\\-z]" returns "[a-z]".
+ *
+ * If an escape sequence is ill-formed, this method returns an empty
+ * string. An example of an ill-formed sequence is "\\u" followed by
+ * fewer than 4 hex digits.
+ *
+ * The above characters are recognized in the compiler's codepage,
+ * that is, they are coded as 'u', '\\', etc. Characters that are
+ * not parts of escape sequences are converted using u_charsToUChars().
+ *
+ * This function is similar to UnicodeString::unescape() but not
+ * identical to it. The latter takes a source UnicodeString, so it
+ * does escape recognition but no conversion.
+ *
+ * @param src a zero-terminated string of invariant characters
+ * @param dest pointer to buffer to receive converted and unescaped
+ * text and, if there is room, a zero terminator. May be NULL for
+ * preflighting, in which case no UChars will be written, but the
+ * return value will still be valid. On error, an empty string is
+ * stored here (if possible).
+ * @param destCapacity the number of UChars that may be written at
+ * dest. Ignored if dest == NULL.
+ * @return the length of unescaped string.
+ * @see u_unescapeAt
+ * @see UnicodeString#unescape()
+ * @see UnicodeString#unescapeAt()
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_unescape(const char *src,
+ UChar *dest, int32_t destCapacity);
+
+U_CDECL_BEGIN
+/**
+ * Callback function for u_unescapeAt() that returns a character of
+ * the source text given an offset and a context pointer. The context
+ * pointer will be whatever is passed into u_unescapeAt().
+ *
+ * @param offset pointer to the offset that will be passed to u_unescapeAt().
+ * @param context an opaque pointer passed directly into u_unescapeAt()
+ * @return the character represented by the escape sequence at
+ * offset
+ * @see u_unescapeAt
+ * @stable ICU 2.0
+ */
+typedef UChar (U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset, void *context);
+U_CDECL_END
+
+/**
+ * Unescape a single sequence. The character at offset-1 is assumed
+ * (without checking) to be a backslash. This method takes a callback
+ * pointer to a function that returns the UChar at a given offset. By
+ * varying this callback, ICU functions are able to unescape char*
+ * strings, UnicodeString objects, and UFILE pointers.
+ *
+ * If offset is out of range, or if the escape sequence is ill-formed,
+ * (UChar32)0xFFFFFFFF is returned. See documentation of u_unescape()
+ * for a list of recognized sequences.
+ *
+ * @param charAt callback function that returns a UChar of the source
+ * text given an offset and a context pointer.
+ * @param offset pointer to the offset that will be passed to charAt.
+ * The offset value will be updated upon return to point after the
+ * last parsed character of the escape sequence. On error the offset
+ * is unchanged.
+ * @param length the number of characters in the source text. The
+ * last character of the source text is considered to be at offset
+ * length-1.
+ * @param context an opaque pointer passed directly into charAt.
+ * @return the character represented by the escape sequence at
+ * offset, or (UChar32)0xFFFFFFFF on error.
+ * @see u_unescape()
+ * @see UnicodeString#unescape()
+ * @see UnicodeString#unescapeAt()
+ * @stable ICU 2.0
+ */
+U_CAPI UChar32 U_EXPORT2
+u_unescapeAt(UNESCAPE_CHAR_AT charAt,
+ int32_t *offset,
+ int32_t length,
+ void *context);
+
+/**
+ * Uppercase the characters in a string.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param locale The locale to consider, or "" for the root locale or NULL for the default locale.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ * only some of the result was written to the destination buffer.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_strToUpper(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ const char *locale,
+ UErrorCode *pErrorCode);
+
+/**
+ * Lowercase the characters in a string.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param locale The locale to consider, or "" for the root locale or NULL for the default locale.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ * only some of the result was written to the destination buffer.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_strToLower(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ const char *locale,
+ UErrorCode *pErrorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * Titlecase a string.
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others.
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * It may be more efficient to always provide an iterator to avoid
+ * opening and closing one for each string.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param titleIter A break iterator to find the first characters of words
+ * that are to be titlecased.
+ * If none is provided (NULL), then a standard titlecase
+ * break iterator is opened.
+ * @param locale The locale to consider, or "" for the root locale or NULL for the default locale.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ * only some of the result was written to the destination buffer.
+ * @stable ICU 2.1
+ */
+U_CAPI int32_t U_EXPORT2
+u_strToTitle(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UBreakIterator *titleIter,
+ const char *locale,
+ UErrorCode *pErrorCode);
+
+#endif
+
+/**
+ * Case-folds the characters in a string.
+ *
+ * Case-folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'T' in CaseFolding.txt.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ * only some of the result was written to the destination buffer.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_strFoldCase(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ uint32_t options,
+ UErrorCode *pErrorCode);
+
+#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
+/**
+ * Convert a UTF-16 string to a wchar_t string.
+ * If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then
+ * this function simply calls the fast, dedicated function for that.
+ * Otherwise, two conversions UTF-16 -> default charset -> wchar_t* are performed.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of wchar_t's). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ */
+U_CAPI wchar_t* U_EXPORT2
+u_strToWCS(wchar_t *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+/**
+ * Convert a wchar_t string to UTF-16.
+ * If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then
+ * this function simply calls the fast, dedicated function for that.
+ * Otherwise, two conversions wchar_t* -> default charset -> UTF-16 are performed.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar* U_EXPORT2
+u_strFromWCS(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const wchar_t *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+#endif /* defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION */
+
+/**
+ * Convert a UTF-16 string to UTF-8.
+ * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of chars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ * @see u_strToUTF8WithSub
+ * @see u_strFromUTF8
+ */
+U_CAPI char* U_EXPORT2
+u_strToUTF8(char *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-8 string to UTF-16.
+ * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ * @see u_strFromUTF8WithSub
+ * @see u_strFromUTF8Lenient
+ */
+U_CAPI UChar* U_EXPORT2
+u_strFromUTF8(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const char *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-16 string to UTF-8.
+ *
+ * Same as u_strToUTF8() except for the additional subchar which is output for
+ * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
+ * With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF8().
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of chars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param subchar The substitution character to use in place of an illegal input sequence,
+ * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
+ * A substitution character can be any valid Unicode code point (up to U+10FFFF)
+ * except for surrogate code points (U+D800..U+DFFF).
+ * The recommended value is U+FFFD "REPLACEMENT CHARACTER".
+ * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
+ * Set to 0 if no substitutions occur or subchar<0.
+ * pNumSubstitutions can be NULL.
+ * @param pErrorCode Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strToUTF8
+ * @see u_strFromUTF8WithSub
+ * @stable ICU 3.6
+ */
+U_CAPI char* U_EXPORT2
+u_strToUTF8WithSub(char *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UChar32 subchar, int32_t *pNumSubstitutions,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-8 string to UTF-16.
+ *
+ * Same as u_strFromUTF8() except for the additional subchar which is output for
+ * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
+ * With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF8().
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param subchar The substitution character to use in place of an illegal input sequence,
+ * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
+ * A substitution character can be any valid Unicode code point (up to U+10FFFF)
+ * except for surrogate code points (U+D800..U+DFFF).
+ * The recommended value is U+FFFD "REPLACEMENT CHARACTER".
+ * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
+ * Set to 0 if no substitutions occur or subchar<0.
+ * pNumSubstitutions can be NULL.
+ * @param pErrorCode Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strFromUTF8
+ * @see u_strFromUTF8Lenient
+ * @see u_strToUTF8WithSub
+ * @stable ICU 3.6
+ */
+U_CAPI UChar* U_EXPORT2
+u_strFromUTF8WithSub(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const char *src,
+ int32_t srcLength,
+ UChar32 subchar, int32_t *pNumSubstitutions,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-8 string to UTF-16.
+ *
+ * Same as u_strFromUTF8() except that this function is designed to be very fast,
+ * which it achieves by being lenient about malformed UTF-8 sequences.
+ * This function is intended for use in environments where UTF-8 text is
+ * expected to be well-formed.
+ *
+ * Its semantics are:
+ * - Well-formed UTF-8 text is correctly converted to well-formed UTF-16 text.
+ * - The function will not read beyond the input string, nor write beyond
+ * the destCapacity.
+ * - Malformed UTF-8 results in "garbage" 16-bit Unicode strings which may not
+ * be well-formed UTF-16.
+ * The function will resynchronize to valid code point boundaries
+ * within a small number of code points after an illegal sequence.
+ * - Non-shortest forms are not detected and will result in "spoofing" output.
+ *
+ * For further performance improvement, if srcLength is given (>=0),
+ * then it must be destCapacity>=srcLength.
+ *
+ * There is no inverse u_strToUTF8Lenient() function because there is practically
+ * no performance gain from not checking that a UTF-16 string is well-formed.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * Unlike for other ICU functions, if srcLength>=0 then it
+ * must be destCapacity>=srcLength.
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * Unlike for other ICU functions, if srcLength>=0 but
+ * destCapacity<srcLength, then *pDestLength will be set to srcLength
+ * (and U_BUFFER_OVERFLOW_ERROR will be set)
+ * regardless of the actual result length.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strFromUTF8
+ * @see u_strFromUTF8WithSub
+ * @see u_strToUTF8WithSub
+ * @stable ICU 3.6
+ */
+U_CAPI UChar * U_EXPORT2
+u_strFromUTF8Lenient(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const char *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-16 string to UTF-32.
+ * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChar32s). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @see u_strToUTF32WithSub
+ * @see u_strFromUTF32
+ * @stable ICU 2.0
+ */
+U_CAPI UChar32* U_EXPORT2
+u_strToUTF32(UChar32 *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-32 string to UTF-16.
+ * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @see u_strFromUTF32WithSub
+ * @see u_strToUTF32
+ * @stable ICU 2.0
+ */
+U_CAPI UChar* U_EXPORT2
+u_strFromUTF32(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar32 *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-16 string to UTF-32.
+ *
+ * Same as u_strToUTF32() except for the additional subchar which is output for
+ * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
+ * With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF32().
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChar32s). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param subchar The substitution character to use in place of an illegal input sequence,
+ * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
+ * A substitution character can be any valid Unicode code point (up to U+10FFFF)
+ * except for surrogate code points (U+D800..U+DFFF).
+ * The recommended value is U+FFFD "REPLACEMENT CHARACTER".
+ * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
+ * Set to 0 if no substitutions occur or subchar<0.
+ * pNumSubstitutions can be NULL.
+ * @param pErrorCode Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strToUTF32
+ * @see u_strFromUTF32WithSub
+ * @stable ICU 4.2
+ */
+U_CAPI UChar32* U_EXPORT2
+u_strToUTF32WithSub(UChar32 *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UChar32 subchar, int32_t *pNumSubstitutions,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-32 string to UTF-16.
+ *
+ * Same as u_strFromUTF32() except for the additional subchar which is output for
+ * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
+ * With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF32().
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param subchar The substitution character to use in place of an illegal input sequence,
+ * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
+ * A substitution character can be any valid Unicode code point (up to U+10FFFF)
+ * except for surrogate code points (U+D800..U+DFFF).
+ * The recommended value is U+FFFD "REPLACEMENT CHARACTER".
+ * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
+ * Set to 0 if no substitutions occur or subchar<0.
+ * pNumSubstitutions can be NULL.
+ * @param pErrorCode Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strFromUTF32
+ * @see u_strToUTF32WithSub
+ * @stable ICU 4.2
+ */
+U_CAPI UChar* U_EXPORT2
+u_strFromUTF32WithSub(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar32 *src,
+ int32_t srcLength,
+ UChar32 subchar, int32_t *pNumSubstitutions,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a 16-bit Unicode string to Java Modified UTF-8.
+ * See http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#modified-utf-8
+ *
+ * This function behaves according to the documentation for Java DataOutput.writeUTF()
+ * except that it does not encode the output length in the destination buffer
+ * and does not have an output length restriction.
+ * See http://java.sun.com/javase/6/docs/api/java/io/DataOutput.html#writeUTF(java.lang.String)
+ *
+ * The input string need not be well-formed UTF-16.
+ * (Therefore there is no subchar parameter.)
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of chars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @stable ICU 4.4
+ * @see u_strToUTF8WithSub
+ * @see u_strFromJavaModifiedUTF8WithSub
+ */
+U_CAPI char* U_EXPORT2
+u_strToJavaModifiedUTF8(
+ char *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a Java Modified UTF-8 string to a 16-bit Unicode string.
+ * If the input string is not well-formed and no substitution char is specified,
+ * then the U_INVALID_CHAR_FOUND error code is set.
+ *
+ * This function behaves according to the documentation for Java DataInput.readUTF()
+ * except that it takes a length parameter rather than
+ * interpreting the first two input bytes as the length.
+ * See http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#readUTF()
+ *
+ * The output string may not be well-formed UTF-16.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param subchar The substitution character to use in place of an illegal input sequence,
+ * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
+ * A substitution character can be any valid Unicode code point (up to U+10FFFF)
+ * except for surrogate code points (U+D800..U+DFFF).
+ * The recommended value is U+FFFD "REPLACEMENT CHARACTER".
+ * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
+ * Set to 0 if no substitutions occur or subchar<0.
+ * pNumSubstitutions can be NULL.
+ * @param pErrorCode Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strFromUTF8WithSub
+ * @see u_strFromUTF8Lenient
+ * @see u_strToJavaModifiedUTF8
+ * @stable ICU 4.4
+ */
+U_CAPI UChar* U_EXPORT2
+u_strFromJavaModifiedUTF8WithSub(
+ UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const char *src,
+ int32_t srcLength,
+ UChar32 subchar, int32_t *pNumSubstitutions,
+ UErrorCode *pErrorCode);
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/ustringtrie.h b/thirdparty/icu4c/common/unicode/ustringtrie.h
new file mode 100644
index 0000000000..fd85648225
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/ustringtrie.h
@@ -0,0 +1,97 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: udicttrie.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010dec17
+* created by: Markus W. Scherer
+*/
+
+#ifndef __USTRINGTRIE_H__
+#define __USTRINGTRIE_H__
+
+/**
+ * \file
+ * \brief C API: Helper definitions for dictionary trie APIs.
+ */
+
+#include "unicode/utypes.h"
+
+
+/**
+ * Return values for BytesTrie::next(), UCharsTrie::next() and similar methods.
+ * @see USTRINGTRIE_MATCHES
+ * @see USTRINGTRIE_HAS_VALUE
+ * @see USTRINGTRIE_HAS_NEXT
+ * @stable ICU 4.8
+ */
+enum UStringTrieResult {
+ /**
+ * The input unit(s) did not continue a matching string.
+ * Once current()/next() return USTRINGTRIE_NO_MATCH,
+ * all further calls to current()/next() will also return USTRINGTRIE_NO_MATCH,
+ * until the trie is reset to its original state or to a saved state.
+ * @stable ICU 4.8
+ */
+ USTRINGTRIE_NO_MATCH,
+ /**
+ * The input unit(s) continued a matching string
+ * but there is no value for the string so far.
+ * (It is a prefix of a longer string.)
+ * @stable ICU 4.8
+ */
+ USTRINGTRIE_NO_VALUE,
+ /**
+ * The input unit(s) continued a matching string
+ * and there is a value for the string so far.
+ * This value will be returned by getValue().
+ * No further input byte/unit can continue a matching string.
+ * @stable ICU 4.8
+ */
+ USTRINGTRIE_FINAL_VALUE,
+ /**
+ * The input unit(s) continued a matching string
+ * and there is a value for the string so far.
+ * This value will be returned by getValue().
+ * Another input byte/unit can continue a matching string.
+ * @stable ICU 4.8
+ */
+ USTRINGTRIE_INTERMEDIATE_VALUE
+};
+
+/**
+ * Same as (result!=USTRINGTRIE_NO_MATCH).
+ * @param result A result from BytesTrie::first(), UCharsTrie::next() etc.
+ * @return true if the input bytes/units so far are part of a matching string/byte sequence.
+ * @stable ICU 4.8
+ */
+#define USTRINGTRIE_MATCHES(result) ((result)!=USTRINGTRIE_NO_MATCH)
+
+/**
+ * Equivalent to (result==USTRINGTRIE_INTERMEDIATE_VALUE || result==USTRINGTRIE_FINAL_VALUE) but
+ * this macro evaluates result exactly once.
+ * @param result A result from BytesTrie::first(), UCharsTrie::next() etc.
+ * @return true if there is a value for the input bytes/units so far.
+ * @see BytesTrie::getValue
+ * @see UCharsTrie::getValue
+ * @stable ICU 4.8
+ */
+#define USTRINGTRIE_HAS_VALUE(result) ((result)>=USTRINGTRIE_FINAL_VALUE)
+
+/**
+ * Equivalent to (result==USTRINGTRIE_NO_VALUE || result==USTRINGTRIE_INTERMEDIATE_VALUE) but
+ * this macro evaluates result exactly once.
+ * @param result A result from BytesTrie::first(), UCharsTrie::next() etc.
+ * @return true if another input byte/unit can continue a matching string.
+ * @stable ICU 4.8
+ */
+#define USTRINGTRIE_HAS_NEXT(result) ((result)&1)
+
+#endif /* __USTRINGTRIE_H__ */
diff --git a/thirdparty/icu4c/common/unicode/utext.h b/thirdparty/icu4c/common/unicode/utext.h
new file mode 100644
index 0000000000..c6d1e53a8c
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/utext.h
@@ -0,0 +1,1603 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2004-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utext.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2004oct06
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UTEXT_H__
+#define __UTEXT_H__
+
+/**
+ * \file
+ * \brief C API: Abstract Unicode Text API
+ *
+ * The Text Access API provides a means to allow text that is stored in alternative
+ * formats to work with ICU services. ICU normally operates on text that is
+ * stored in UTF-16 format, in (UChar *) arrays for the C APIs or as type
+ * UnicodeString for C++ APIs.
+ *
+ * ICU Text Access allows other formats, such as UTF-8 or non-contiguous
+ * UTF-16 strings, to be placed in a UText wrapper and then passed to ICU services.
+ *
+ * There are three general classes of usage for UText:
+ *
+ * Application Level Use. This is the simplest usage - applications would
+ * use one of the utext_open() functions on their input text, and pass
+ * the resulting UText to the desired ICU service.
+ *
+ * Second is usage in ICU Services, such as break iteration, that will need to
+ * operate on input presented to them as a UText. These implementations
+ * will need to use the iteration and related UText functions to gain
+ * access to the actual text.
+ *
+ * The third class of UText users are "text providers." These are the
+ * UText implementations for the various text storage formats. An application
+ * or system with a unique text storage format can implement a set of
+ * UText provider functions for that format, which will then allow
+ * ICU services to operate on that format.
+ *
+ *
+ * <em>Iterating over text</em>
+ *
+ * Here is sample code for a forward iteration over the contents of a UText
+ *
+ * \code
+ * UChar32 c;
+ * UText *ut = whatever();
+ *
+ * for (c=utext_next32From(ut, 0); c>=0; c=utext_next32(ut)) {
+ * // do whatever with the codepoint c here.
+ * }
+ * \endcode
+ *
+ * And here is similar code to iterate in the reverse direction, from the end
+ * of the text towards the beginning.
+ *
+ * \code
+ * UChar32 c;
+ * UText *ut = whatever();
+ * int textLength = utext_nativeLength(ut);
+ * for (c=utext_previous32From(ut, textLength); c>=0; c=utext_previous32(ut)) {
+ * // do whatever with the codepoint c here.
+ * }
+ * \endcode
+ *
+ * <em>Characters and Indexing</em>
+ *
+ * Indexing into text by UText functions is nearly always in terms of the native
+ * indexing of the underlying text storage. The storage format could be UTF-8
+ * or UTF-32, for example. When coding to the UText access API, no assumptions
+ * can be made regarding the size of characters, or how far an index
+ * may move when iterating between characters.
+ *
+ * All indices supplied to UText functions are pinned to the length of the
+ * text. An out-of-bounds index is not considered to be an error, but is
+ * adjusted to be in the range 0 <= index <= length of input text.
+ *
+ *
+ * When an index position is returned from a UText function, it will be
+ * a native index to the underlying text. In the case of multi-unit characters,
+ * it will always refer to the first position of the character,
+ * never to the interior. This is essentially the same thing as saying that
+ * a returned index will always point to a boundary between characters.
+ *
+ * When a native index is supplied to a UText function, all indices that
+ * refer to any part of a multi-unit character representation are considered
+ * to be equivalent. In the case of multi-unit characters, an incoming index
+ * will be logically normalized to refer to the start of the character.
+ *
+ * It is possible to test whether a native index is on a code point boundary
+ * by doing a utext_setNativeIndex() followed by a utext_getNativeIndex().
+ * If the index is returned unchanged, it was on a code point boundary. If
+ * an adjusted index is returned, the original index referred to the
+ * interior of a character.
+ *
+ * <em>Conventions for calling UText functions</em>
+ *
+ * Most UText access functions have as their first parameter a (UText *) pointer,
+ * which specifies the UText to be used. Unless otherwise noted, the
+ * pointer must refer to a valid, open UText. Attempting to
+ * use a closed UText or passing a NULL pointer is a programming error and
+ * will produce undefined results or NULL pointer exceptions.
+ *
+ * The UText_Open family of functions can either open an existing (closed)
+ * UText, or heap allocate a new UText. Here is sample code for creating
+ * a stack-allocated UText.
+ *
+ * \code
+ * char *s = whatever(); // A utf-8 string
+ * U_ErrorCode status = U_ZERO_ERROR;
+ * UText ut = UTEXT_INITIALIZER;
+ * utext_openUTF8(ut, s, -1, &status);
+ * if (U_FAILURE(status)) {
+ * // error handling
+ * } else {
+ * // work with the UText
+ * }
+ * \endcode
+ *
+ * Any existing UText passed to an open function _must_ have been initialized,
+ * either by the UTEXT_INITIALIZER, or by having been originally heap-allocated
+ * by an open function. Passing NULL will cause the open function to
+ * heap-allocate and fully initialize a new UText.
+ *
+ */
+
+
+
+#include "unicode/utypes.h"
+#include "unicode/uchar.h"
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#include "unicode/rep.h"
+#include "unicode/unistr.h"
+#include "unicode/chariter.h"
+#endif
+
+
+U_CDECL_BEGIN
+
+struct UText;
+typedef struct UText UText; /**< C typedef for struct UText. @stable ICU 3.6 */
+
+
+/***************************************************************************************
+ *
+ * C Functions for creating UText wrappers around various kinds of text strings.
+ *
+ ****************************************************************************************/
+
+
+/**
+ * Close function for UText instances.
+ * Cleans up, releases any resources being held by an open UText.
+ * <p>
+ * If the UText was originally allocated by one of the utext_open functions,
+ * the storage associated with the utext will also be freed.
+ * If the UText storage originated with the application, as it would with
+ * a local or static instance, the storage will not be deleted.
+ *
+ * An open UText can be reset to refer to new string by using one of the utext_open()
+ * functions without first closing the UText.
+ *
+ * @param ut The UText to be closed.
+ * @return NULL if the UText struct was deleted by the close. If the UText struct
+ * was originally provided by the caller to the open function, it is
+ * returned by this function, and may be safely used again in
+ * a subsequent utext_open.
+ *
+ * @stable ICU 3.4
+ */
+U_CAPI UText * U_EXPORT2
+utext_close(UText *ut);
+
+/**
+ * Open a read-only UText implementation for UTF-8 strings.
+ *
+ * \htmlonly
+ * Any invalid UTF-8 in the input will be handled in this way:
+ * a sequence of bytes that has the form of a truncated, but otherwise valid,
+ * UTF-8 sequence will be replaced by a single unicode replacement character, \uFFFD.
+ * Any other illegal bytes will each be replaced by a \uFFFD.
+ * \endhtmlonly
+ *
+ * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
+ * If non-NULL, must refer to an initialized UText struct, which will then
+ * be reset to reference the specified UTF-8 string.
+ * @param s A UTF-8 string. Must not be NULL.
+ * @param length The length of the UTF-8 string in bytes, or -1 if the string is
+ * zero terminated.
+ * @param status Errors are returned here.
+ * @return A pointer to the UText. If a pre-allocated UText was provided, it
+ * will always be used and returned.
+ * @stable ICU 3.4
+ */
+U_CAPI UText * U_EXPORT2
+utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status);
+
+
+/**
+ * Open a read-only UText for UChar * string.
+ *
+ * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
+ * If non-NULL, must refer to an initialized UText struct, which will then
+ * be reset to reference the specified UChar string.
+ * @param s A UChar (UTF-16) string
+ * @param length The number of UChars in the input string, or -1 if the string is
+ * zero terminated.
+ * @param status Errors are returned here.
+ * @return A pointer to the UText. If a pre-allocated UText was provided, it
+ * will always be used and returned.
+ * @stable ICU 3.4
+ */
+U_CAPI UText * U_EXPORT2
+utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status);
+
+
+#if U_SHOW_CPLUSPLUS_API
+/**
+ * Open a writable UText for a non-const UnicodeString.
+ *
+ * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
+ * If non-NULL, must refer to an initialized UText struct, which will then
+ * be reset to reference the specified input string.
+ * @param s A UnicodeString.
+ * @param status Errors are returned here.
+ * @return Pointer to the UText. If a UText was supplied as input, this
+ * will always be used and returned.
+ * @stable ICU 3.4
+ */
+U_CAPI UText * U_EXPORT2
+utext_openUnicodeString(UText *ut, icu::UnicodeString *s, UErrorCode *status);
+
+
+/**
+ * Open a UText for a const UnicodeString. The resulting UText will not be writable.
+ *
+ * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
+ * If non-NULL, must refer to an initialized UText struct, which will then
+ * be reset to reference the specified input string.
+ * @param s A const UnicodeString to be wrapped.
+ * @param status Errors are returned here.
+ * @return Pointer to the UText. If a UText was supplied as input, this
+ * will always be used and returned.
+ * @stable ICU 3.4
+ */
+U_CAPI UText * U_EXPORT2
+utext_openConstUnicodeString(UText *ut, const icu::UnicodeString *s, UErrorCode *status);
+
+
+/**
+ * Open a writable UText implementation for an ICU Replaceable object.
+ * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
+ * If non-NULL, must refer to an already existing UText, which will then
+ * be reset to reference the specified replaceable text.
+ * @param rep A Replaceable text object.
+ * @param status Errors are returned here.
+ * @return Pointer to the UText. If a UText was supplied as input, this
+ * will always be used and returned.
+ * @see Replaceable
+ * @stable ICU 3.4
+ */
+U_CAPI UText * U_EXPORT2
+utext_openReplaceable(UText *ut, icu::Replaceable *rep, UErrorCode *status);
+
+/**
+ * Open a UText implementation over an ICU CharacterIterator.
+ * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
+ * If non-NULL, must refer to an already existing UText, which will then
+ * be reset to reference the specified replaceable text.
+ * @param ci A Character Iterator.
+ * @param status Errors are returned here.
+ * @return Pointer to the UText. If a UText was supplied as input, this
+ * will always be used and returned.
+ * @see Replaceable
+ * @stable ICU 3.4
+ */
+U_CAPI UText * U_EXPORT2
+utext_openCharacterIterator(UText *ut, icu::CharacterIterator *ci, UErrorCode *status);
+
+#endif
+
+
+/**
+ * Clone a UText. This is much like opening a UText where the source text is itself
+ * another UText.
+ *
+ * A deep clone will copy both the UText data structures and the underlying text.
+ * The original and cloned UText will operate completely independently; modifications
+ * made to the text in one will not affect the other. Text providers are not
+ * required to support deep clones. The user of clone() must check the status return
+ * and be prepared to handle failures.
+ *
+ * The standard UText implementations for UTF8, UChar *, UnicodeString and
+ * Replaceable all support deep cloning.
+ *
+ * The UText returned from a deep clone will be writable, assuming that the text
+ * provider is able to support writing, even if the source UText had been made
+ * non-writable by means of UText_freeze().
+ *
+ * A shallow clone replicates only the UText data structures; it does not make
+ * a copy of the underlying text. Shallow clones can be used as an efficient way to
+ * have multiple iterators active in a single text string that is not being
+ * modified.
+ *
+ * A shallow clone operation will not fail, barring truly exceptional conditions such
+ * as memory allocation failures.
+ *
+ * Shallow UText clones should be avoided if the UText functions that modify the
+ * text are expected to be used, either on the original or the cloned UText.
+ * Any such modifications can cause unpredictable behavior. Read Only
+ * shallow clones provide some protection against errors of this type by
+ * disabling text modification via the cloned UText.
+ *
+ * A shallow clone made with the readOnly parameter == false will preserve the
+ * utext_isWritable() state of the source object. Note, however, that
+ * write operations must be avoided while more than one UText exists that refer
+ * to the same underlying text.
+ *
+ * A UText and its clone may be safely concurrently accessed by separate threads.
+ * This is true for read access only with shallow clones, and for both read and
+ * write access with deep clones.
+ * It is the responsibility of the Text Provider to ensure that this thread safety
+ * constraint is met.
+ *
+ * @param dest A UText struct to be filled in with the result of the clone operation,
+ * or NULL if the clone function should heap-allocate a new UText struct.
+ * If non-NULL, must refer to an already existing UText, which will then
+ * be reset to become the clone.
+ * @param src The UText to be cloned.
+ * @param deep true to request a deep clone, false for a shallow clone.
+ * @param readOnly true to request that the cloned UText have read only access to the
+ * underlying text.
+
+ * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR
+ * will be returned if the text provider is unable to clone the
+ * original text.
+ * @return The newly created clone, or NULL if the clone operation failed.
+ * @stable ICU 3.4
+ */
+U_CAPI UText * U_EXPORT2
+utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status);
+
+
+/**
+ * Compare two UText objects for equality.
+ * UTexts are equal if they are iterating over the same text, and
+ * have the same iteration position within the text.
+ * If either or both of the parameters are NULL, the comparison is false.
+ *
+ * @param a The first of the two UTexts to compare.
+ * @param b The other UText to be compared.
+ * @return true if the two UTexts are equal.
+ * @stable ICU 3.6
+ */
+U_CAPI UBool U_EXPORT2
+utext_equals(const UText *a, const UText *b);
+
+
+/*****************************************************************************
+ *
+ * Functions to work with the text represented by a UText wrapper
+ *
+ *****************************************************************************/
+
+/**
+ * Get the length of the text. Depending on the characteristics
+ * of the underlying text representation, this may be expensive.
+ * @see utext_isLengthExpensive()
+ *
+ *
+ * @param ut the text to be accessed.
+ * @return the length of the text, expressed in native units.
+ *
+ * @stable ICU 3.4
+ */
+U_CAPI int64_t U_EXPORT2
+utext_nativeLength(UText *ut);
+
+/**
+ * Return true if calculating the length of the text could be expensive.
+ * Finding the length of NUL terminated strings is considered to be expensive.
+ *
+ * Note that the value of this function may change
+ * as the result of other operations on a UText.
+ * Once the length of a string has been discovered, it will no longer
+ * be expensive to report it.
+ *
+ * @param ut the text to be accessed.
+ * @return true if determining the length of the text could be time consuming.
+ * @stable ICU 3.4
+ */
+U_CAPI UBool U_EXPORT2
+utext_isLengthExpensive(const UText *ut);
+
+/**
+ * Returns the code point at the requested index,
+ * or U_SENTINEL (-1) if it is out of bounds.
+ *
+ * If the specified index points to the interior of a multi-unit
+ * character - one of the trail bytes of a UTF-8 sequence, for example -
+ * the complete code point will be returned.
+ *
+ * The iteration position will be set to the start of the returned code point.
+ *
+ * This function is roughly equivalent to the sequence
+ * utext_setNativeIndex(index);
+ * utext_current32();
+ * (There is a subtle difference if the index is out of bounds by being less than zero -
+ * utext_setNativeIndex(negative value) sets the index to zero, after which utext_current()
+ * will return the char at zero. utext_char32At(negative index), on the other hand, will
+ * return the U_SENTINEL value of -1.)
+ *
+ * @param ut the text to be accessed
+ * @param nativeIndex the native index of the character to be accessed. If the index points
+ * to other than the first unit of a multi-unit character, it will be adjusted
+ * to the start of the character.
+ * @return the code point at the specified index.
+ * @stable ICU 3.4
+ */
+U_CAPI UChar32 U_EXPORT2
+utext_char32At(UText *ut, int64_t nativeIndex);
+
+
+/**
+ *
+ * Get the code point at the current iteration position,
+ * or U_SENTINEL (-1) if the iteration has reached the end of
+ * the input text.
+ *
+ * @param ut the text to be accessed.
+ * @return the Unicode code point at the current iterator position.
+ * @stable ICU 3.4
+ */
+U_CAPI UChar32 U_EXPORT2
+utext_current32(UText *ut);
+
+
+/**
+ * Get the code point at the current iteration position of the UText, and
+ * advance the position to the first index following the character.
+ *
+ * If the position is at the end of the text (the index following
+ * the last character, which is also the length of the text),
+ * return U_SENTINEL (-1) and do not advance the index.
+ *
+ * This is a post-increment operation.
+ *
+ * An inline macro version of this function, UTEXT_NEXT32(),
+ * is available for performance critical use.
+ *
+ * @param ut the text to be accessed.
+ * @return the Unicode code point at the iteration position.
+ * @see UTEXT_NEXT32
+ * @stable ICU 3.4
+ */
+U_CAPI UChar32 U_EXPORT2
+utext_next32(UText *ut);
+
+
+/**
+ * Move the iterator position to the character (code point) whose
+ * index precedes the current position, and return that character.
+ * This is a pre-decrement operation.
+ *
+ * If the initial position is at the start of the text (index of 0)
+ * return U_SENTINEL (-1), and leave the position unchanged.
+ *
+ * An inline macro version of this function, UTEXT_PREVIOUS32(),
+ * is available for performance critical use.
+ *
+ * @param ut the text to be accessed.
+ * @return the previous UChar32 code point, or U_SENTINEL (-1)
+ * if the iteration has reached the start of the text.
+ * @see UTEXT_PREVIOUS32
+ * @stable ICU 3.4
+ */
+U_CAPI UChar32 U_EXPORT2
+utext_previous32(UText *ut);
+
+
+/**
+ * Set the iteration index and return the code point at that index.
+ * Leave the iteration index at the start of the following code point.
+ *
+ * This function is the most efficient and convenient way to
+ * begin a forward iteration. The results are identical to the those
+ * from the sequence
+ * \code
+ * utext_setIndex();
+ * utext_next32();
+ * \endcode
+ *
+ * @param ut the text to be accessed.
+ * @param nativeIndex Iteration index, in the native units of the text provider.
+ * @return Code point which starts at or before index,
+ * or U_SENTINEL (-1) if it is out of bounds.
+ * @stable ICU 3.4
+ */
+U_CAPI UChar32 U_EXPORT2
+utext_next32From(UText *ut, int64_t nativeIndex);
+
+
+
+/**
+ * Set the iteration index, and return the code point preceding the
+ * one specified by the initial index. Leave the iteration position
+ * at the start of the returned code point.
+ *
+ * This function is the most efficient and convenient way to
+ * begin a backwards iteration.
+ *
+ * @param ut the text to be accessed.
+ * @param nativeIndex Iteration index in the native units of the text provider.
+ * @return Code point preceding the one at the initial index,
+ * or U_SENTINEL (-1) if it is out of bounds.
+ *
+ * @stable ICU 3.4
+ */
+U_CAPI UChar32 U_EXPORT2
+utext_previous32From(UText *ut, int64_t nativeIndex);
+
+/**
+ * Get the current iterator position, which can range from 0 to
+ * the length of the text.
+ * The position is a native index into the input text, in whatever format it
+ * may have (possibly UTF-8 for example), and may not always be the same as
+ * the corresponding UChar (UTF-16) index.
+ * The returned position will always be aligned to a code point boundary.
+ *
+ * @param ut the text to be accessed.
+ * @return the current index position, in the native units of the text provider.
+ * @stable ICU 3.4
+ */
+U_CAPI int64_t U_EXPORT2
+utext_getNativeIndex(const UText *ut);
+
+/**
+ * Set the current iteration position to the nearest code point
+ * boundary at or preceding the specified index.
+ * The index is in the native units of the original input text.
+ * If the index is out of range, it will be pinned to be within
+ * the range of the input text.
+ * <p>
+ * It will usually be more efficient to begin an iteration
+ * using the functions utext_next32From() or utext_previous32From()
+ * rather than setIndex().
+ * <p>
+ * Moving the index position to an adjacent character is best done
+ * with utext_next32(), utext_previous32() or utext_moveIndex32().
+ * Attempting to do direct arithmetic on the index position is
+ * complicated by the fact that the size (in native units) of a
+ * character depends on the underlying representation of the character
+ * (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not
+ * easily knowable.
+ *
+ * @param ut the text to be accessed.
+ * @param nativeIndex the native unit index of the new iteration position.
+ * @stable ICU 3.4
+ */
+U_CAPI void U_EXPORT2
+utext_setNativeIndex(UText *ut, int64_t nativeIndex);
+
+/**
+ * Move the iterator position by delta code points. The number of code points
+ * is a signed number; a negative delta will move the iterator backwards,
+ * towards the start of the text.
+ * <p>
+ * The index is moved by <code>delta</code> code points
+ * forward or backward, but no further backward than to 0 and
+ * no further forward than to utext_nativeLength().
+ * The resulting index value will be in between 0 and length, inclusive.
+ *
+ * @param ut the text to be accessed.
+ * @param delta the signed number of code points to move the iteration position.
+ * @return true if the position could be moved the requested number of positions while
+ * staying within the range [0 - text length].
+ * @stable ICU 3.4
+ */
+U_CAPI UBool U_EXPORT2
+utext_moveIndex32(UText *ut, int32_t delta);
+
+/**
+ * Get the native index of the character preceding the current position.
+ * If the iteration position is already at the start of the text, zero
+ * is returned.
+ * The value returned is the same as that obtained from the following sequence,
+ * but without the side effect of changing the iteration position.
+ *
+ * \code
+ * UText *ut = whatever;
+ * ...
+ * utext_previous(ut)
+ * utext_getNativeIndex(ut);
+ * \endcode
+ *
+ * This function is most useful during forwards iteration, where it will get the
+ * native index of the character most recently returned from utext_next().
+ *
+ * @param ut the text to be accessed
+ * @return the native index of the character preceding the current index position,
+ * or zero if the current position is at the start of the text.
+ * @stable ICU 3.6
+ */
+U_CAPI int64_t U_EXPORT2
+utext_getPreviousNativeIndex(UText *ut);
+
+
+/**
+ *
+ * Extract text from a UText into a UChar buffer. The range of text to be extracted
+ * is specified in the native indices of the UText provider. These may not necessarily
+ * be UTF-16 indices.
+ * <p>
+ * The size (number of 16 bit UChars) of the data to be extracted is returned. The
+ * full number of UChars is returned, even when the extracted text is truncated
+ * because the specified buffer size is too small.
+ * <p>
+ * The extracted string will (if you are a user) / must (if you are a text provider)
+ * be NUL-terminated if there is sufficient space in the destination buffer. This
+ * terminating NUL is not included in the returned length.
+ * <p>
+ * The iteration index is left at the position following the last extracted character.
+ *
+ * @param ut the UText from which to extract data.
+ * @param nativeStart the native index of the first character to extract.\
+ * If the specified index is out of range,
+ * it will be pinned to be within 0 <= index <= textLength
+ * @param nativeLimit the native string index of the position following the last
+ * character to extract. If the specified index is out of range,
+ * it will be pinned to be within 0 <= index <= textLength.
+ * nativeLimit must be >= nativeStart.
+ * @param dest the UChar (UTF-16) buffer into which the extracted text is placed
+ * @param destCapacity The size, in UChars, of the destination buffer. May be zero
+ * for precomputing the required size.
+ * @param status receives any error status.
+ * U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the
+ * buffer was too small. Returns number of UChars for preflighting.
+ * @return Number of UChars in the data to be extracted. Does not include a trailing NUL.
+ *
+ * @stable ICU 3.4
+ */
+U_CAPI int32_t U_EXPORT2
+utext_extract(UText *ut,
+ int64_t nativeStart, int64_t nativeLimit,
+ UChar *dest, int32_t destCapacity,
+ UErrorCode *status);
+
+
+
+/************************************************************************************
+ *
+ * #define inline versions of selected performance-critical text access functions
+ * Caution: do not use auto increment++ or decrement-- expressions
+ * as parameters to these macros.
+ *
+ * For most use, where there is no extreme performance constraint, the
+ * normal, non-inline functions are a better choice. The resulting code
+ * will be smaller, and, if the need ever arises, easier to debug.
+ *
+ * These are implemented as #defines rather than real functions
+ * because there is no fully portable way to do inline functions in plain C.
+ *
+ ************************************************************************************/
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * inline version of utext_current32(), for performance-critical situations.
+ *
+ * Get the code point at the current iteration position of the UText.
+ * Returns U_SENTINEL (-1) if the position is at the end of the
+ * text.
+ *
+ * @internal ICU 4.4 technology preview
+ */
+#define UTEXT_CURRENT32(ut) \
+ ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
+ ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut))
+#endif /* U_HIDE_INTERNAL_API */
+
+/**
+ * inline version of utext_next32(), for performance-critical situations.
+ *
+ * Get the code point at the current iteration position of the UText, and
+ * advance the position to the first index following the character.
+ * This is a post-increment operation.
+ * Returns U_SENTINEL (-1) if the position is at the end of the
+ * text.
+ *
+ * @stable ICU 3.4
+ */
+#define UTEXT_NEXT32(ut) \
+ ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
+ ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut))
+
+/**
+ * inline version of utext_previous32(), for performance-critical situations.
+ *
+ * Move the iterator position to the character (code point) whose
+ * index precedes the current position, and return that character.
+ * This is a pre-decrement operation.
+ * Returns U_SENTINEL (-1) if the position is at the start of the text.
+ *
+ * @stable ICU 3.4
+ */
+#define UTEXT_PREVIOUS32(ut) \
+ ((ut)->chunkOffset > 0 && \
+ (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \
+ (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut))
+
+/**
+ * inline version of utext_getNativeIndex(), for performance-critical situations.
+ *
+ * Get the current iterator position, which can range from 0 to
+ * the length of the text.
+ * The position is a native index into the input text, in whatever format it
+ * may have (possibly UTF-8 for example), and may not always be the same as
+ * the corresponding UChar (UTF-16) index.
+ * The returned position will always be aligned to a code point boundary.
+ *
+ * @stable ICU 3.6
+ */
+#define UTEXT_GETNATIVEINDEX(ut) \
+ ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \
+ (ut)->chunkNativeStart+(ut)->chunkOffset : \
+ (ut)->pFuncs->mapOffsetToNative(ut))
+
+/**
+ * inline version of utext_setNativeIndex(), for performance-critical situations.
+ *
+ * Set the current iteration position to the nearest code point
+ * boundary at or preceding the specified index.
+ * The index is in the native units of the original input text.
+ * If the index is out of range, it will be pinned to be within
+ * the range of the input text.
+ *
+ * @stable ICU 3.8
+ */
+#define UTEXT_SETNATIVEINDEX(ut, ix) UPRV_BLOCK_MACRO_BEGIN { \
+ int64_t __offset = (ix) - (ut)->chunkNativeStart; \
+ if (__offset>=0 && __offset<(int64_t)(ut)->nativeIndexingLimit && (ut)->chunkContents[__offset]<0xdc00) { \
+ (ut)->chunkOffset=(int32_t)__offset; \
+ } else { \
+ utext_setNativeIndex((ut), (ix)); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+
+
+/************************************************************************************
+ *
+ * Functions related to writing or modifying the text.
+ * These will work only with modifiable UTexts. Attempting to
+ * modify a read-only UText will return an error status.
+ *
+ ************************************************************************************/
+
+
+/**
+ * Return true if the text can be written (modified) with utext_replace() or
+ * utext_copy(). For the text to be writable, the text provider must
+ * be of a type that supports writing and the UText must not be frozen.
+ *
+ * Attempting to modify text when utext_isWriteable() is false will fail -
+ * the text will not be modified, and an error will be returned from the function
+ * that attempted the modification.
+ *
+ * @param ut the UText to be tested.
+ * @return true if the text is modifiable.
+ *
+ * @see utext_freeze()
+ * @see utext_replace()
+ * @see utext_copy()
+ * @stable ICU 3.4
+ *
+ */
+U_CAPI UBool U_EXPORT2
+utext_isWritable(const UText *ut);
+
+
+/**
+ * Test whether there is meta data associated with the text.
+ * @see Replaceable::hasMetaData()
+ *
+ * @param ut The UText to be tested
+ * @return true if the underlying text includes meta data.
+ * @stable ICU 3.4
+ */
+U_CAPI UBool U_EXPORT2
+utext_hasMetaData(const UText *ut);
+
+
+/**
+ * Replace a range of the original text with a replacement text.
+ *
+ * Leaves the current iteration position at the position following the
+ * newly inserted replacement text.
+ *
+ * This function is only available on UText types that support writing,
+ * that is, ones where utext_isWritable() returns true.
+ *
+ * When using this function, there should be only a single UText opened onto the
+ * underlying native text string. Behavior after a replace operation
+ * on a UText is undefined for any other additional UTexts that refer to the
+ * modified string.
+ *
+ * @param ut the UText representing the text to be operated on.
+ * @param nativeStart the native index of the start of the region to be replaced
+ * @param nativeLimit the native index of the character following the region to be replaced.
+ * @param replacementText pointer to the replacement text
+ * @param replacementLength length of the replacement text, or -1 if the text is NUL terminated.
+ * @param status receives any error status. Possible errors include
+ * U_NO_WRITE_PERMISSION
+ *
+ * @return The signed number of (native) storage units by which
+ * the length of the text expanded or contracted.
+ *
+ * @stable ICU 3.4
+ */
+U_CAPI int32_t U_EXPORT2
+utext_replace(UText *ut,
+ int64_t nativeStart, int64_t nativeLimit,
+ const UChar *replacementText, int32_t replacementLength,
+ UErrorCode *status);
+
+
+
+/**
+ *
+ * Copy or move a substring from one position to another within the text,
+ * while retaining any metadata associated with the text.
+ * This function is used to duplicate or reorder substrings.
+ * The destination index must not overlap the source range.
+ *
+ * The text to be copied or moved is inserted at destIndex;
+ * it does not replace or overwrite any existing text.
+ *
+ * The iteration position is left following the newly inserted text
+ * at the destination position.
+ *
+ * This function is only available on UText types that support writing,
+ * that is, ones where utext_isWritable() returns true.
+ *
+ * When using this function, there should be only a single UText opened onto the
+ * underlying native text string. Behavior after a copy operation
+ * on a UText is undefined in any other additional UTexts that refer to the
+ * modified string.
+ *
+ * @param ut The UText representing the text to be operated on.
+ * @param nativeStart The native index of the start of the region to be copied or moved
+ * @param nativeLimit The native index of the character position following the region
+ * to be copied.
+ * @param destIndex The native destination index to which the source substring is
+ * copied or moved.
+ * @param move If true, then the substring is moved, not copied/duplicated.
+ * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION
+ *
+ * @stable ICU 3.4
+ */
+U_CAPI void U_EXPORT2
+utext_copy(UText *ut,
+ int64_t nativeStart, int64_t nativeLimit,
+ int64_t destIndex,
+ UBool move,
+ UErrorCode *status);
+
+
+/**
+ * <p>
+ * Freeze a UText. This prevents any modification to the underlying text itself
+ * by means of functions operating on this UText.
+ * </p>
+ * <p>
+ * Once frozen, a UText can not be unfrozen. The intent is to ensure
+ * that a the text underlying a frozen UText wrapper cannot be modified via that UText.
+ * </p>
+ * <p>
+ * Caution: freezing a UText will disable changes made via the specific
+ * frozen UText wrapper only; it will not have any effect on the ability to
+ * directly modify the text by bypassing the UText. Any such backdoor modifications
+ * are always an error while UText access is occurring because the underlying
+ * text can get out of sync with UText's buffering.
+ * </p>
+ *
+ * @param ut The UText to be frozen.
+ * @see utext_isWritable()
+ * @stable ICU 3.6
+ */
+U_CAPI void U_EXPORT2
+utext_freeze(UText *ut);
+
+
+/**
+ * UText provider properties (bit field indexes).
+ *
+ * @see UText
+ * @stable ICU 3.4
+ */
+enum {
+ /**
+ * It is potentially time consuming for the provider to determine the length of the text.
+ * @stable ICU 3.4
+ */
+ UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1,
+ /**
+ * Text chunks remain valid and usable until the text object is modified or
+ * deleted, not just until the next time the access() function is called
+ * (which is the default).
+ * @stable ICU 3.4
+ */
+ UTEXT_PROVIDER_STABLE_CHUNKS = 2,
+ /**
+ * The provider supports modifying the text via the replace() and copy()
+ * functions.
+ * @see Replaceable
+ * @stable ICU 3.4
+ */
+ UTEXT_PROVIDER_WRITABLE = 3,
+ /**
+ * There is meta data associated with the text.
+ * @see Replaceable::hasMetaData()
+ * @stable ICU 3.4
+ */
+ UTEXT_PROVIDER_HAS_META_DATA = 4,
+ /**
+ * Text provider owns the text storage.
+ * Generally occurs as the result of a deep clone of the UText.
+ * When closing the UText, the associated text must
+ * also be closed/deleted/freed/ whatever is appropriate.
+ * @stable ICU 3.6
+ */
+ UTEXT_PROVIDER_OWNS_TEXT = 5
+};
+
+/**
+ * Function type declaration for UText.clone().
+ *
+ * clone a UText. Much like opening a UText where the source text is itself
+ * another UText.
+ *
+ * A deep clone will copy both the UText data structures and the underlying text.
+ * The original and cloned UText will operate completely independently; modifications
+ * made to the text in one will not effect the other. Text providers are not
+ * required to support deep clones. The user of clone() must check the status return
+ * and be prepared to handle failures.
+ *
+ * A shallow clone replicates only the UText data structures; it does not make
+ * a copy of the underlying text. Shallow clones can be used as an efficient way to
+ * have multiple iterators active in a single text string that is not being
+ * modified.
+ *
+ * A shallow clone operation must not fail except for truly exceptional conditions such
+ * as memory allocation failures.
+ *
+ * A UText and its clone may be safely concurrently accessed by separate threads.
+ * This is true for both shallow and deep clones.
+ * It is the responsibility of the Text Provider to ensure that this thread safety
+ * constraint is met.
+
+ *
+ * @param dest A UText struct to be filled in with the result of the clone operation,
+ * or NULL if the clone function should heap-allocate a new UText struct.
+ * @param src The UText to be cloned.
+ * @param deep true to request a deep clone, false for a shallow clone.
+ * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR
+ * should be returned if the text provider is unable to clone the
+ * original text.
+ * @return The newly created clone, or NULL if the clone operation failed.
+ *
+ * @stable ICU 3.4
+ */
+typedef UText * U_CALLCONV
+UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status);
+
+
+/**
+ * Function type declaration for UText.nativeLength().
+ *
+ * @param ut the UText to get the length of.
+ * @return the length, in the native units of the original text string.
+ * @see UText
+ * @stable ICU 3.4
+ */
+typedef int64_t U_CALLCONV
+UTextNativeLength(UText *ut);
+
+/**
+ * Function type declaration for UText.access(). Get the description of the text chunk
+ * containing the text at a requested native index. The UText's iteration
+ * position will be left at the requested index. If the index is out
+ * of bounds, the iteration position will be left at the start or end
+ * of the string, as appropriate.
+ *
+ * Chunks must begin and end on code point boundaries. A single code point
+ * comprised of multiple storage units must never span a chunk boundary.
+ *
+ *
+ * @param ut the UText being accessed.
+ * @param nativeIndex Requested index of the text to be accessed.
+ * @param forward If true, then the returned chunk must contain text
+ * starting from the index, so that start<=index<limit.
+ * If false, then the returned chunk must contain text
+ * before the index, so that start<index<=limit.
+ * @return True if the requested index could be accessed. The chunk
+ * will contain the requested text.
+ * False value if a chunk cannot be accessed
+ * (the requested index is out of bounds).
+ *
+ * @see UText
+ * @stable ICU 3.4
+ */
+typedef UBool U_CALLCONV
+UTextAccess(UText *ut, int64_t nativeIndex, UBool forward);
+
+/**
+ * Function type declaration for UText.extract().
+ *
+ * Extract text from a UText into a UChar buffer. The range of text to be extracted
+ * is specified in the native indices of the UText provider. These may not necessarily
+ * be UTF-16 indices.
+ * <p>
+ * The size (number of 16 bit UChars) in the data to be extracted is returned. The
+ * full amount is returned, even when the specified buffer size is smaller.
+ * <p>
+ * The extracted string will (if you are a user) / must (if you are a text provider)
+ * be NUL-terminated if there is sufficient space in the destination buffer.
+ *
+ * @param ut the UText from which to extract data.
+ * @param nativeStart the native index of the first character to extract.
+ * @param nativeLimit the native string index of the position following the last
+ * character to extract.
+ * @param dest the UChar (UTF-16) buffer into which the extracted text is placed
+ * @param destCapacity The size, in UChars, of the destination buffer. May be zero
+ * for precomputing the required size.
+ * @param status receives any error status.
+ * If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for
+ * preflighting.
+ * @return Number of UChars in the data. Does not include a trailing NUL.
+ *
+ * @stable ICU 3.4
+ */
+typedef int32_t U_CALLCONV
+UTextExtract(UText *ut,
+ int64_t nativeStart, int64_t nativeLimit,
+ UChar *dest, int32_t destCapacity,
+ UErrorCode *status);
+
+/**
+ * Function type declaration for UText.replace().
+ *
+ * Replace a range of the original text with a replacement text.
+ *
+ * Leaves the current iteration position at the position following the
+ * newly inserted replacement text.
+ *
+ * This function need only be implemented on UText types that support writing.
+ *
+ * When using this function, there should be only a single UText opened onto the
+ * underlying native text string. The function is responsible for updating the
+ * text chunk within the UText to reflect the updated iteration position,
+ * taking into account any changes to the underlying string's structure caused
+ * by the replace operation.
+ *
+ * @param ut the UText representing the text to be operated on.
+ * @param nativeStart the index of the start of the region to be replaced
+ * @param nativeLimit the index of the character following the region to be replaced.
+ * @param replacementText pointer to the replacement text
+ * @param replacmentLength length of the replacement text in UChars, or -1 if the text is NUL terminated.
+ * @param status receives any error status. Possible errors include
+ * U_NO_WRITE_PERMISSION
+ *
+ * @return The signed number of (native) storage units by which
+ * the length of the text expanded or contracted.
+ *
+ * @stable ICU 3.4
+ */
+typedef int32_t U_CALLCONV
+UTextReplace(UText *ut,
+ int64_t nativeStart, int64_t nativeLimit,
+ const UChar *replacementText, int32_t replacmentLength,
+ UErrorCode *status);
+
+/**
+ * Function type declaration for UText.copy().
+ *
+ * Copy or move a substring from one position to another within the text,
+ * while retaining any metadata associated with the text.
+ * This function is used to duplicate or reorder substrings.
+ * The destination index must not overlap the source range.
+ *
+ * The text to be copied or moved is inserted at destIndex;
+ * it does not replace or overwrite any existing text.
+ *
+ * This function need only be implemented for UText types that support writing.
+ *
+ * When using this function, there should be only a single UText opened onto the
+ * underlying native text string. The function is responsible for updating the
+ * text chunk within the UText to reflect the updated iteration position,
+ * taking into account any changes to the underlying string's structure caused
+ * by the replace operation.
+ *
+ * @param ut The UText representing the text to be operated on.
+ * @param nativeStart The index of the start of the region to be copied or moved
+ * @param nativeLimit The index of the character following the region to be replaced.
+ * @param nativeDest The destination index to which the source substring is copied or moved.
+ * @param move If true, then the substring is moved, not copied/duplicated.
+ * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION
+ *
+ * @stable ICU 3.4
+ */
+typedef void U_CALLCONV
+UTextCopy(UText *ut,
+ int64_t nativeStart, int64_t nativeLimit,
+ int64_t nativeDest,
+ UBool move,
+ UErrorCode *status);
+
+/**
+ * Function type declaration for UText.mapOffsetToNative().
+ * Map from the current UChar offset within the current text chunk to
+ * the corresponding native index in the original source text.
+ *
+ * This is required only for text providers that do not use native UTF-16 indexes.
+ *
+ * @param ut the UText.
+ * @return Absolute (native) index corresponding to chunkOffset in the current chunk.
+ * The returned native index should always be to a code point boundary.
+ *
+ * @stable ICU 3.4
+ */
+typedef int64_t U_CALLCONV
+UTextMapOffsetToNative(const UText *ut);
+
+/**
+ * Function type declaration for UText.mapIndexToUTF16().
+ * Map from a native index to a UChar offset within a text chunk.
+ * Behavior is undefined if the native index does not fall within the
+ * current chunk.
+ *
+ * This function is required only for text providers that do not use native UTF-16 indexes.
+ *
+ * @param ut The UText containing the text chunk.
+ * @param nativeIndex Absolute (native) text index, chunk->start<=index<=chunk->limit.
+ * @return Chunk-relative UTF-16 offset corresponding to the specified native
+ * index.
+ *
+ * @stable ICU 3.4
+ */
+typedef int32_t U_CALLCONV
+UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex);
+
+
+/**
+ * Function type declaration for UText.utextClose().
+ *
+ * A Text Provider close function is only required for provider types that make
+ * allocations in their open function (or other functions) that must be
+ * cleaned when the UText is closed.
+ *
+ * The allocation of the UText struct itself and any "extra" storage
+ * associated with the UText is handled by the common UText implementation
+ * and does not require provider specific cleanup in a close function.
+ *
+ * Most UText provider implementations do not need to implement this function.
+ *
+ * @param ut A UText object to be closed.
+ *
+ * @stable ICU 3.4
+ */
+typedef void U_CALLCONV
+UTextClose(UText *ut);
+
+
+/**
+ * (public) Function dispatch table for UText.
+ * Conceptually very much like a C++ Virtual Function Table.
+ * This struct defines the organization of the table.
+ * Each text provider implementation must provide an
+ * actual table that is initialized with the appropriate functions
+ * for the type of text being handled.
+ * @stable ICU 3.6
+ */
+struct UTextFuncs {
+ /**
+ * (public) Function table size, sizeof(UTextFuncs)
+ * Intended for use should the table grow to accommodate added
+ * functions in the future, to allow tests for older format
+ * function tables that do not contain the extensions.
+ *
+ * Fields are placed for optimal alignment on
+ * 32/64/128-bit-pointer machines, by normally grouping together
+ * 4 32-bit fields,
+ * 4 pointers,
+ * 2 64-bit fields
+ * in sequence.
+ * @stable ICU 3.6
+ */
+ int32_t tableSize;
+
+ /**
+ * (private) Alignment padding.
+ * Do not use, reserved for use by the UText framework only.
+ * @internal
+ */
+ int32_t reserved1, /** @internal */ reserved2, /** @internal */ reserved3;
+
+
+ /**
+ * (public) Function pointer for UTextClone
+ *
+ * @see UTextClone
+ * @stable ICU 3.6
+ */
+ UTextClone *clone;
+
+ /**
+ * (public) function pointer for UTextLength
+ * May be expensive to compute!
+ *
+ * @see UTextLength
+ * @stable ICU 3.6
+ */
+ UTextNativeLength *nativeLength;
+
+ /**
+ * (public) Function pointer for UTextAccess.
+ *
+ * @see UTextAccess
+ * @stable ICU 3.6
+ */
+ UTextAccess *access;
+
+ /**
+ * (public) Function pointer for UTextExtract.
+ *
+ * @see UTextExtract
+ * @stable ICU 3.6
+ */
+ UTextExtract *extract;
+
+ /**
+ * (public) Function pointer for UTextReplace.
+ *
+ * @see UTextReplace
+ * @stable ICU 3.6
+ */
+ UTextReplace *replace;
+
+ /**
+ * (public) Function pointer for UTextCopy.
+ *
+ * @see UTextCopy
+ * @stable ICU 3.6
+ */
+ UTextCopy *copy;
+
+ /**
+ * (public) Function pointer for UTextMapOffsetToNative.
+ *
+ * @see UTextMapOffsetToNative
+ * @stable ICU 3.6
+ */
+ UTextMapOffsetToNative *mapOffsetToNative;
+
+ /**
+ * (public) Function pointer for UTextMapNativeIndexToUTF16.
+ *
+ * @see UTextMapNativeIndexToUTF16
+ * @stable ICU 3.6
+ */
+ UTextMapNativeIndexToUTF16 *mapNativeIndexToUTF16;
+
+ /**
+ * (public) Function pointer for UTextClose.
+ *
+ * @see UTextClose
+ * @stable ICU 3.6
+ */
+ UTextClose *close;
+
+ /**
+ * (private) Spare function pointer
+ * @internal
+ */
+ UTextClose *spare1;
+
+ /**
+ * (private) Spare function pointer
+ * @internal
+ */
+ UTextClose *spare2;
+
+ /**
+ * (private) Spare function pointer
+ * @internal
+ */
+ UTextClose *spare3;
+
+};
+/**
+ * Function dispatch table for UText
+ * @see UTextFuncs
+ */
+typedef struct UTextFuncs UTextFuncs;
+
+ /**
+ * UText struct. Provides the interface between the generic UText access code
+ * and the UText provider code that works on specific kinds of
+ * text (UTF-8, noncontiguous UTF-16, whatever.)
+ *
+ * Applications that are using predefined types of text providers
+ * to pass text data to ICU services will have no need to view the
+ * internals of the UText structs that they open.
+ *
+ * @stable ICU 3.6
+ */
+struct UText {
+ /**
+ * (private) Magic. Used to help detect when UText functions are handed
+ * invalid or uninitialized UText structs.
+ * utext_openXYZ() functions take an initialized,
+ * but not necessarily open, UText struct as an
+ * optional fill-in parameter. This magic field
+ * is used to check for that initialization.
+ * Text provider close functions must NOT clear
+ * the magic field because that would prevent
+ * reuse of the UText struct.
+ * @internal
+ */
+ uint32_t magic;
+
+
+ /**
+ * (private) Flags for managing the allocation and freeing of
+ * memory associated with this UText.
+ * @internal
+ */
+ int32_t flags;
+
+
+ /**
+ * Text provider properties. This set of flags is maintained by the
+ * text provider implementation.
+ * @stable ICU 3.4
+ */
+ int32_t providerProperties;
+
+ /**
+ * (public) sizeOfStruct=sizeof(UText)
+ * Allows possible backward compatible extension.
+ *
+ * @stable ICU 3.4
+ */
+ int32_t sizeOfStruct;
+
+ /* ------ 16 byte alignment boundary ----------- */
+
+
+ /**
+ * (protected) Native index of the first character position following
+ * the current chunk.
+ * @stable ICU 3.6
+ */
+ int64_t chunkNativeLimit;
+
+ /**
+ * (protected) Size in bytes of the extra space (pExtra).
+ * @stable ICU 3.4
+ */
+ int32_t extraSize;
+
+ /**
+ * (protected) The highest chunk offset where native indexing and
+ * chunk (UTF-16) indexing correspond. For UTF-16 sources, value
+ * will be equal to chunkLength.
+ *
+ * @stable ICU 3.6
+ */
+ int32_t nativeIndexingLimit;
+
+ /* ---- 16 byte alignment boundary------ */
+
+ /**
+ * (protected) Native index of the first character in the text chunk.
+ * @stable ICU 3.6
+ */
+ int64_t chunkNativeStart;
+
+ /**
+ * (protected) Current iteration position within the text chunk (UTF-16 buffer).
+ * This is the index to the character that will be returned by utext_next32().
+ * @stable ICU 3.6
+ */
+ int32_t chunkOffset;
+
+ /**
+ * (protected) Length the text chunk (UTF-16 buffer), in UChars.
+ * @stable ICU 3.6
+ */
+ int32_t chunkLength;
+
+ /* ---- 16 byte alignment boundary-- */
+
+
+ /**
+ * (protected) pointer to a chunk of text in UTF-16 format.
+ * May refer either to original storage of the source of the text, or
+ * if conversion was required, to a buffer owned by the UText.
+ * @stable ICU 3.6
+ */
+ const UChar *chunkContents;
+
+ /**
+ * (public) Pointer to Dispatch table for accessing functions for this UText.
+ * @stable ICU 3.6
+ */
+ const UTextFuncs *pFuncs;
+
+ /**
+ * (protected) Pointer to additional space requested by the
+ * text provider during the utext_open operation.
+ * @stable ICU 3.4
+ */
+ void *pExtra;
+
+ /**
+ * (protected) Pointer to string or text-containing object or similar.
+ * This is the source of the text that this UText is wrapping, in a format
+ * that is known to the text provider functions.
+ * @stable ICU 3.4
+ */
+ const void *context;
+
+ /* --- 16 byte alignment boundary--- */
+
+ /**
+ * (protected) Pointer fields available for use by the text provider.
+ * Not used by UText common code.
+ * @stable ICU 3.6
+ */
+ const void *p;
+ /**
+ * (protected) Pointer fields available for use by the text provider.
+ * Not used by UText common code.
+ * @stable ICU 3.6
+ */
+ const void *q;
+ /**
+ * (protected) Pointer fields available for use by the text provider.
+ * Not used by UText common code.
+ * @stable ICU 3.6
+ */
+ const void *r;
+
+ /**
+ * Private field reserved for future use by the UText framework
+ * itself. This is not to be touched by the text providers.
+ * @internal ICU 3.4
+ */
+ void *privP;
+
+
+ /* --- 16 byte alignment boundary--- */
+
+
+ /**
+ * (protected) Integer field reserved for use by the text provider.
+ * Not used by the UText framework, or by the client (user) of the UText.
+ * @stable ICU 3.4
+ */
+ int64_t a;
+
+ /**
+ * (protected) Integer field reserved for use by the text provider.
+ * Not used by the UText framework, or by the client (user) of the UText.
+ * @stable ICU 3.4
+ */
+ int32_t b;
+
+ /**
+ * (protected) Integer field reserved for use by the text provider.
+ * Not used by the UText framework, or by the client (user) of the UText.
+ * @stable ICU 3.4
+ */
+ int32_t c;
+
+ /* ---- 16 byte alignment boundary---- */
+
+
+ /**
+ * Private field reserved for future use by the UText framework
+ * itself. This is not to be touched by the text providers.
+ * @internal ICU 3.4
+ */
+ int64_t privA;
+ /**
+ * Private field reserved for future use by the UText framework
+ * itself. This is not to be touched by the text providers.
+ * @internal ICU 3.4
+ */
+ int32_t privB;
+ /**
+ * Private field reserved for future use by the UText framework
+ * itself. This is not to be touched by the text providers.
+ * @internal ICU 3.4
+ */
+ int32_t privC;
+};
+
+
+/**
+ * Common function for use by Text Provider implementations to allocate and/or initialize
+ * a new UText struct. To be called in the implementation of utext_open() functions.
+ * If the supplied UText parameter is null, a new UText struct will be allocated on the heap.
+ * If the supplied UText is already open, the provider's close function will be called
+ * so that the struct can be reused by the open that is in progress.
+ *
+ * @param ut pointer to a UText struct to be re-used, or null if a new UText
+ * should be allocated.
+ * @param extraSpace The amount of additional space to be allocated as part
+ * of this UText, for use by types of providers that require
+ * additional storage.
+ * @param status Errors are returned here.
+ * @return pointer to the UText, allocated if necessary, with extra space set up if requested.
+ * @stable ICU 3.4
+ */
+U_CAPI UText * U_EXPORT2
+utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status);
+
+// do not use #ifndef U_HIDE_INTERNAL_API around the following!
+/**
+ * @internal
+ * Value used to help identify correctly initialized UText structs.
+ * Note: must be publicly visible so that UTEXT_INITIALIZER can access it.
+ */
+enum {
+ UTEXT_MAGIC = 0x345ad82c
+};
+
+/**
+ * initializer to be used with local (stack) instances of a UText
+ * struct. UText structs must be initialized before passing
+ * them to one of the utext_open functions.
+ *
+ * @stable ICU 3.6
+ */
+#define UTEXT_INITIALIZER { \
+ UTEXT_MAGIC, /* magic */ \
+ 0, /* flags */ \
+ 0, /* providerProps */ \
+ sizeof(UText), /* sizeOfStruct */ \
+ 0, /* chunkNativeLimit */ \
+ 0, /* extraSize */ \
+ 0, /* nativeIndexingLimit */ \
+ 0, /* chunkNativeStart */ \
+ 0, /* chunkOffset */ \
+ 0, /* chunkLength */ \
+ NULL, /* chunkContents */ \
+ NULL, /* pFuncs */ \
+ NULL, /* pExtra */ \
+ NULL, /* context */ \
+ NULL, NULL, NULL, /* p, q, r */ \
+ NULL, /* privP */ \
+ 0, 0, 0, /* a, b, c */ \
+ 0, 0, 0 /* privA,B,C, */ \
+ }
+
+
+U_CDECL_END
+
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUTextPointer
+ * "Smart pointer" class, closes a UText via utext_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUTextPointer, UText, utext_close);
+
+U_NAMESPACE_END
+
+#endif
+
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/utf.h b/thirdparty/icu4c/common/unicode/utf.h
new file mode 100644
index 0000000000..c9d5f5785c
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/utf.h
@@ -0,0 +1,225 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utf.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999sep09
+* created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C API: Code point macros
+ *
+ * This file defines macros for checking whether a code point is
+ * a surrogate or a non-character etc.
+ *
+ * If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 0 then utf.h is included by utypes.h
+ * and itself includes utf8.h and utf16.h after some
+ * common definitions.
+ * If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 1 then each of these headers must be
+ * included explicitly if their definitions are used.
+ *
+ * utf8.h and utf16.h define macros for efficiently getting code points
+ * in and out of UTF-8/16 strings.
+ * utf16.h macros have "U16_" prefixes.
+ * utf8.h defines similar macros with "U8_" prefixes for UTF-8 string handling.
+ *
+ * ICU mostly processes 16-bit Unicode strings.
+ * Most of the time, such strings are well-formed UTF-16.
+ * Single, unpaired surrogates must be handled as well, and are treated in ICU
+ * like regular code points where possible.
+ * (Pairs of surrogate code points are indistinguishable from supplementary
+ * code points encoded as pairs of supplementary code units.)
+ *
+ * In fact, almost all Unicode code points in normal text (>99%)
+ * are on the BMP (<=U+ffff) and even <=U+d7ff.
+ * ICU functions handle supplementary code points (U+10000..U+10ffff)
+ * but are optimized for the much more frequently occurring BMP code points.
+ *
+ * umachine.h defines UChar to be an unsigned 16-bit integer.
+ * Since ICU 59, ICU uses char16_t in C++, UChar only in C,
+ * and defines UChar=char16_t by default. See the UChar API docs for details.
+ *
+ * UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit
+ * Unicode code point (Unicode scalar value, 0..0x10ffff) and U_SENTINEL (-1).
+ * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as
+ * the definition of UChar. For details see the documentation for UChar32 itself.
+ *
+ * utf.h defines a small number of C macros for single Unicode code points.
+ * These are simple checks for surrogates and non-characters.
+ * For actual Unicode character properties see uchar.h.
+ *
+ * By default, string operations must be done with error checking in case
+ * a string is not well-formed UTF-16 or UTF-8.
+ *
+ * The U16_ macros detect if a surrogate code unit is unpaired
+ * (lead unit without trail unit or vice versa) and just return the unit itself
+ * as the code point.
+ *
+ * The U8_ macros detect illegal byte sequences and return a negative value.
+ * Starting with ICU 60, the observable length of a single illegal byte sequence
+ * skipped by one of these macros follows the Unicode 6+ recommendation
+ * which is consistent with the W3C Encoding Standard.
+ *
+ * There are ..._OR_FFFD versions of both U16_ and U8_ macros
+ * that return U+FFFD for illegal code unit sequences.
+ *
+ * The regular "safe" macros require that the initial, passed-in string index
+ * is within bounds. They only check the index when they read more than one
+ * code unit. This is usually done with code similar to the following loop:
+ * <pre>while(i<length) {
+ * U16_NEXT(s, i, length, c);
+ * // use c
+ * }</pre>
+ *
+ * When it is safe to assume that text is well-formed UTF-16
+ * (does not contain single, unpaired surrogates), then one can use
+ * U16_..._UNSAFE macros.
+ * These do not check for proper code unit sequences or truncated text and may
+ * yield wrong results or even cause a crash if they are used with "malformed"
+ * text.
+ * In practice, U16_..._UNSAFE macros will produce slightly less code but
+ * should not be faster because the processing is only different when a
+ * surrogate code unit is detected, which will be rare.
+ *
+ * Similarly for UTF-8, there are "safe" macros without a suffix,
+ * and U8_..._UNSAFE versions.
+ * The performance differences are much larger here because UTF-8 provides so
+ * many opportunities for malformed sequences.
+ * The unsafe UTF-8 macros are entirely implemented inside the macro definitions
+ * and are fast, while the safe UTF-8 macros call functions for some complicated cases.
+ *
+ * Unlike with UTF-16, malformed sequences cannot be expressed with distinct
+ * code point values (0..U+10ffff). They are indicated with negative values instead.
+ *
+ * For more information see the ICU User Guide Strings chapter
+ * (https://unicode-org.github.io/icu/userguide/strings).
+ *
+ * <em>Usage:</em>
+ * ICU coding guidelines for if() statements should be followed when using these macros.
+ * Compound statements (curly braces {}) must be used for if-else-while...
+ * bodies and all macro statements should be terminated with semicolon.
+ *
+ * @stable ICU 2.4
+ */
+
+#ifndef __UTF_H__
+#define __UTF_H__
+
+#include "unicode/umachine.h"
+/* include the utfXX.h after the following definitions */
+
+/* single-code point definitions -------------------------------------------- */
+
+/**
+ * Is this code point a Unicode noncharacter?
+ * @param c 32-bit code point
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U_IS_UNICODE_NONCHAR(c) \
+ ((c)>=0xfdd0 && \
+ ((c)<=0xfdef || ((c)&0xfffe)==0xfffe) && (c)<=0x10ffff)
+
+/**
+ * Is c a Unicode code point value (0..U+10ffff)
+ * that can be assigned a character?
+ *
+ * Code points that are not characters include:
+ * - single surrogate code points (U+d800..U+dfff, 2048 code points)
+ * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points)
+ * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points)
+ * - the highest Unicode code point value is U+10ffff
+ *
+ * This means that all code points below U+d800 are character code points,
+ * and that boundary is tested first for performance.
+ *
+ * @param c 32-bit code point
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U_IS_UNICODE_CHAR(c) \
+ ((uint32_t)(c)<0xd800 || \
+ (0xdfff<(c) && (c)<=0x10ffff && !U_IS_UNICODE_NONCHAR(c)))
+
+/**
+ * Is this code point a BMP code point (U+0000..U+ffff)?
+ * @param c 32-bit code point
+ * @return true or false
+ * @stable ICU 2.8
+ */
+#define U_IS_BMP(c) ((uint32_t)(c)<=0xffff)
+
+/**
+ * Is this code point a supplementary code point (U+10000..U+10ffff)?
+ * @param c 32-bit code point
+ * @return true or false
+ * @stable ICU 2.8
+ */
+#define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c)-0x10000)<=0xfffff)
+
+/**
+ * Is this code point a lead surrogate (U+d800..U+dbff)?
+ * @param c 32-bit code point
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
+
+/**
+ * Is this code point a trail surrogate (U+dc00..U+dfff)?
+ * @param c 32-bit code point
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
+
+/**
+ * Is this code point a surrogate (U+d800..U+dfff)?
+ * @param c 32-bit code point
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800)
+
+/**
+ * Assuming c is a surrogate code point (U_IS_SURROGATE(c)),
+ * is it a lead surrogate?
+ * @param c 32-bit code point
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
+
+/**
+ * Assuming c is a surrogate code point (U_IS_SURROGATE(c)),
+ * is it a trail surrogate?
+ * @param c 32-bit code point
+ * @return true or false
+ * @stable ICU 4.2
+ */
+#define U_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
+
+/* include the utfXX.h ------------------------------------------------------ */
+
+#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS
+
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+
+/* utf_old.h contains deprecated, pre-ICU 2.4 definitions */
+#include "unicode/utf_old.h"
+
+#endif /* !U_NO_DEFAULT_INCLUDE_UTF_HEADERS */
+
+#endif /* __UTF_H__ */
diff --git a/thirdparty/icu4c/common/unicode/utf16.h b/thirdparty/icu4c/common/unicode/utf16.h
new file mode 100644
index 0000000000..3902c60e95
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/utf16.h
@@ -0,0 +1,734 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utf16.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999sep09
+* created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C API: 16-bit Unicode handling macros
+ *
+ * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
+ *
+ * For more information see utf.h and the ICU User Guide Strings chapter
+ * (https://unicode-org.github.io/icu/userguide/strings).
+ *
+ * <em>Usage:</em>
+ * ICU coding guidelines for if() statements should be followed when using these macros.
+ * Compound statements (curly braces {}) must be used for if-else-while...
+ * bodies and all macro statements should be terminated with semicolon.
+ */
+
+#ifndef __UTF16_H__
+#define __UTF16_H__
+
+#include <stdbool.h>
+#include "unicode/umachine.h"
+#ifndef __UTF_H__
+# include "unicode/utf.h"
+#endif
+
+/* single-code point definitions -------------------------------------------- */
+
+/**
+ * Does this code unit alone encode a code point (BMP, not a surrogate)?
+ * @param c 16-bit code unit
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
+
+/**
+ * Is this code unit a lead surrogate (U+d800..U+dbff)?
+ * @param c 16-bit code unit
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
+
+/**
+ * Is this code unit a trail surrogate (U+dc00..U+dfff)?
+ * @param c 16-bit code unit
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
+
+/**
+ * Is this code unit a surrogate (U+d800..U+dfff)?
+ * @param c 16-bit code unit
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
+
+/**
+ * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
+ * is it a lead surrogate?
+ * @param c 16-bit code unit
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
+
+/**
+ * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
+ * is it a trail surrogate?
+ * @param c 16-bit code unit
+ * @return true or false
+ * @stable ICU 4.2
+ */
+#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
+
+/**
+ * Helper constant for U16_GET_SUPPLEMENTARY.
+ * @internal
+ */
+#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
+
+/**
+ * Get a supplementary code point value (U+10000..U+10ffff)
+ * from its lead and trail surrogates.
+ * The result is undefined if the input values are not
+ * lead and trail surrogates.
+ *
+ * @param lead lead surrogate (U+d800..U+dbff)
+ * @param trail trail surrogate (U+dc00..U+dfff)
+ * @return supplementary code point (U+10000..U+10ffff)
+ * @stable ICU 2.4
+ */
+#define U16_GET_SUPPLEMENTARY(lead, trail) \
+ (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
+
+
+/**
+ * Get the lead surrogate (0xd800..0xdbff) for a
+ * supplementary code point (0x10000..0x10ffff).
+ * @param supplementary 32-bit code point (U+10000..U+10ffff)
+ * @return lead surrogate (U+d800..U+dbff) for supplementary
+ * @stable ICU 2.4
+ */
+#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
+
+/**
+ * Get the trail surrogate (0xdc00..0xdfff) for a
+ * supplementary code point (0x10000..0x10ffff).
+ * @param supplementary 32-bit code point (U+10000..U+10ffff)
+ * @return trail surrogate (U+dc00..U+dfff) for supplementary
+ * @stable ICU 2.4
+ */
+#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
+
+/**
+ * How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
+ * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
+ * @param c 32-bit code point
+ * @return 1 or 2
+ * @stable ICU 2.4
+ */
+#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
+
+/**
+ * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
+ * @return 2
+ * @stable ICU 2.4
+ */
+#define U16_MAX_LENGTH 2
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * The offset may point to either the lead or trail surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the adjacent matching surrogate as well.
+ * The result is undefined if the offset points to a single, unpaired surrogate.
+ * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U16_GET
+ * @stable ICU 2.4
+ */
+#define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[i]; \
+ if(U16_IS_SURROGATE(c)) { \
+ if(U16_IS_SURROGATE_LEAD(c)) { \
+ (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
+ } else { \
+ (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The offset may point to either the lead or trail surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the adjacent matching surrogate as well.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * If the offset points to a single, unpaired surrogate, then
+ * c is set to that unpaired surrogate.
+ * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<=i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_GET_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[i]; \
+ if(U16_IS_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if(U16_IS_SURROGATE_LEAD(c)) { \
+ if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
+ (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+ } \
+ } else { \
+ if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+ (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+ } \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The offset may point to either the lead or trail surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the adjacent matching surrogate as well.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * If the offset points to a single, unpaired surrogate, then
+ * c is set to U+FFFD.
+ * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<=i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_GET_UNSAFE
+ * @stable ICU 60
+ */
+#define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[i]; \
+ if(U16_IS_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if(U16_IS_SURROGATE_LEAD(c)) { \
+ if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
+ (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+ } else { \
+ (c)=0xfffd; \
+ } \
+ } else { \
+ if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+ (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+ } else { \
+ (c)=0xfffd; \
+ } \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/* definitions with forward iteration --------------------------------------- */
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * The offset may point to the lead surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the following trail surrogate as well.
+ * If the offset points to a trail surrogate, then that itself
+ * will be returned as the code point.
+ * The result is undefined if the offset points to a single, unpaired lead surrogate.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U16_NEXT
+ * @stable ICU 2.4
+ */
+#define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[(i)++]; \
+ if(U16_IS_LEAD(c)) { \
+ (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * The offset may point to the lead surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the following trail surrogate as well.
+ * If the offset points to a trail surrogate or
+ * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
+ *
+ * @param s const UChar * string
+ * @param i string offset, must be i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_NEXT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[(i)++]; \
+ if(U16_IS_LEAD(c)) { \
+ uint16_t __c2; \
+ if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
+ ++(i); \
+ (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * The offset may point to the lead surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the following trail surrogate as well.
+ * If the offset points to a trail surrogate or
+ * to a single, unpaired lead surrogate, then c is set to U+FFFD.
+ *
+ * @param s const UChar * string
+ * @param i string offset, must be i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_NEXT_UNSAFE
+ * @stable ICU 60
+ */
+#define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[(i)++]; \
+ if(U16_IS_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
+ ++(i); \
+ (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+ } else { \
+ (c)=0xfffd; \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Append a code point to a string, overwriting 1 or 2 code units.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
+ * Otherwise, the result is undefined.
+ *
+ * @param s const UChar * string buffer
+ * @param i string offset
+ * @param c code point to append
+ * @see U16_APPEND
+ * @stable ICU 2.4
+ */
+#define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ if((uint32_t)(c)<=0xffff) { \
+ (s)[(i)++]=(uint16_t)(c); \
+ } else { \
+ (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+ (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Append a code point to a string, overwriting 1 or 2 code units.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Safe" macro, checks for a valid code point.
+ * If a surrogate pair is written, checks for sufficient space in the string.
+ * If the code point is not valid or a trail surrogate does not fit,
+ * then isError is set to true.
+ *
+ * @param s const UChar * string buffer
+ * @param i string offset, must be i<capacity
+ * @param capacity size of the string buffer
+ * @param c code point to append
+ * @param isError output UBool set to true if an error occurs, otherwise not modified
+ * @see U16_APPEND_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
+ if((uint32_t)(c)<=0xffff) { \
+ (s)[(i)++]=(uint16_t)(c); \
+ } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
+ (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+ (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+ } else /* c>0x10ffff or not enough space */ { \
+ (isError)=true; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_FWD_1
+ * @stable ICU 2.4
+ */
+#define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ if(U16_IS_LEAD((s)[(i)++])) { \
+ ++(i); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const UChar * string
+ * @param i string offset, must be i<length
+ * @param length string length
+ * @see U16_FWD_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
+ if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
+ ++(i); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U16_FWD_N
+ * @stable ICU 2.4
+ */
+#define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ U16_FWD_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const UChar * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @param n number of code points to skip
+ * @see U16_FWD_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __N=(n); \
+ while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
+ U16_FWD_1(s, i, length); \
+ --__N; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to the trail surrogate of a surrogate pair,
+ * then the offset is decremented.
+ * Otherwise, it is not modified.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_SET_CP_START
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ if(U16_IS_TRAIL((s)[i])) { \
+ --(i); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to the trail surrogate of a surrogate pair,
+ * then the offset is decremented.
+ * Otherwise, it is not modified.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<=i
+ * @see U16_SET_CP_START_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
+ if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
+ --(i); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/* definitions with backward iteration -------------------------------------- */
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a trail surrogate unit
+ * for a supplementary code point, then the macro will read
+ * the preceding lead surrogate as well.
+ * If the offset is behind a lead surrogate, then that itself
+ * will be returned as the code point.
+ * The result is undefined if the offset is behind a single, unpaired trail surrogate.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U16_PREV
+ * @stable ICU 2.4
+ */
+#define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[--(i)]; \
+ if(U16_IS_TRAIL(c)) { \
+ (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a trail surrogate unit
+ * for a supplementary code point, then the macro will read
+ * the preceding lead surrogate as well.
+ * If the offset is behind a lead surrogate or behind a single, unpaired
+ * trail surrogate, then c is set to that unpaired surrogate.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<i
+ * @param c output UChar32 variable
+ * @see U16_PREV_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[--(i)]; \
+ if(U16_IS_TRAIL(c)) { \
+ uint16_t __c2; \
+ if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+ --(i); \
+ (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a trail surrogate unit
+ * for a supplementary code point, then the macro will read
+ * the preceding lead surrogate as well.
+ * If the offset is behind a lead surrogate or behind a single, unpaired
+ * trail surrogate, then c is set to U+FFFD.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<i
+ * @param c output UChar32 variable
+ * @see U16_PREV_UNSAFE
+ * @stable ICU 60
+ */
+#define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[--(i)]; \
+ if(U16_IS_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+ --(i); \
+ (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+ } else { \
+ (c)=0xfffd; \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_BACK_1
+ * @stable ICU 2.4
+ */
+#define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ if(U16_IS_TRAIL((s)[--(i)])) { \
+ --(i); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<i
+ * @see U16_BACK_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
+ if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
+ --(i); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U16_BACK_N
+ * @stable ICU 2.4
+ */
+#define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ U16_BACK_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start start of string
+ * @param i string offset, must be start<i
+ * @param n number of code points to skip
+ * @see U16_BACK_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __N=(n); \
+ while(__N>0 && (i)>(start)) { \
+ U16_BACK_1(s, start, i); \
+ --__N; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind the lead surrogate of a surrogate pair,
+ * then the offset is incremented.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_SET_CP_LIMIT
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ if(U16_IS_LEAD((s)[(i)-1])) { \
+ ++(i); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind the lead surrogate of a surrogate pair,
+ * then the offset is incremented.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const UChar * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, start<=i<=length
+ * @param length int32_t string length
+ * @see U16_SET_CP_LIMIT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
+ if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
+ ++(i); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/utf32.h b/thirdparty/icu4c/common/unicode/utf32.h
new file mode 100644
index 0000000000..8822c4dd09
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/utf32.h
@@ -0,0 +1,25 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2001, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utf32.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999sep20
+* created by: Markus W. Scherer
+*/
+/**
+ * \file
+ * \brief C API: UTF-32 macros
+ *
+ * This file is obsolete and its contents moved to utf_old.h.
+ * See utf_old.h and Jitterbug 2150 and its discussion on the ICU mailing list
+ * in September 2002.
+ */
diff --git a/thirdparty/icu4c/common/unicode/utf8.h b/thirdparty/icu4c/common/unicode/utf8.h
new file mode 100644
index 0000000000..5a07435fcf
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/utf8.h
@@ -0,0 +1,882 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utf8.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999sep13
+* created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C API: 8-bit Unicode handling macros
+ *
+ * This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
+ *
+ * For more information see utf.h and the ICU User Guide Strings chapter
+ * (https://unicode-org.github.io/icu/userguide/strings).
+ *
+ * <em>Usage:</em>
+ * ICU coding guidelines for if() statements should be followed when using these macros.
+ * Compound statements (curly braces {}) must be used for if-else-while...
+ * bodies and all macro statements should be terminated with semicolon.
+ */
+
+#ifndef __UTF8_H__
+#define __UTF8_H__
+
+#include <stdbool.h>
+#include "unicode/umachine.h"
+#ifndef __UTF_H__
+# include "unicode/utf.h"
+#endif
+
+/* internal definitions ----------------------------------------------------- */
+
+/**
+ * Counts the trail bytes for a UTF-8 lead byte.
+ * Returns 0 for 0..0xc1 as well as for 0xf5..0xff.
+ * leadByte might be evaluated multiple times.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this file and thus must remain stable.
+ *
+ * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
+ * @internal
+ */
+#define U8_COUNT_TRAIL_BYTES(leadByte) \
+ (U8_IS_LEAD(leadByte) ? \
+ ((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+1 : 0)
+
+/**
+ * Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
+ * Returns 0 for 0..0xc1. Undefined for 0xf5..0xff.
+ * leadByte might be evaluated multiple times.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this file and thus must remain stable.
+ *
+ * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
+ * @internal
+ */
+#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \
+ (((uint8_t)(leadByte)>=0xc2)+((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0))
+
+/**
+ * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this file and thus must remain stable.
+ * @internal
+ */
+#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
+
+/**
+ * Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1.
+ * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
+ * Lead byte E0..EF bits 3..0 are used as byte index,
+ * first trail byte bits 7..5 are used as bit index into that byte.
+ * @see U8_IS_VALID_LEAD3_AND_T1
+ * @internal
+ */
+#define U8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"
+
+/**
+ * Internal 3-byte UTF-8 validity check.
+ * Non-zero if lead byte E0..EF and first trail byte 00..FF start a valid sequence.
+ * @internal
+ */
+#define U8_IS_VALID_LEAD3_AND_T1(lead, t1) (U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5)))
+
+/**
+ * Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1.
+ * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
+ * First trail byte bits 7..4 are used as byte index,
+ * lead byte F0..F4 bits 2..0 are used as bit index into that byte.
+ * @see U8_IS_VALID_LEAD4_AND_T1
+ * @internal
+ */
+#define U8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"
+
+/**
+ * Internal 4-byte UTF-8 validity check.
+ * Non-zero if lead byte F0..F4 and first trail byte 00..FF start a valid sequence.
+ * @internal
+ */
+#define U8_IS_VALID_LEAD4_AND_T1(lead, t1) (U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7)))
+
+/**
+ * Function for handling "next code point" with error-checking.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this
+ * file and thus must remain stable, and should not be hidden when other internal
+ * functions are hidden (otherwise public macros would fail to compile).
+ * @internal
+ */
+U_CAPI UChar32 U_EXPORT2
+utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict);
+
+/**
+ * Function for handling "append code point" with error-checking.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this
+ * file and thus must remain stable, and should not be hidden when other internal
+ * functions are hidden (otherwise public macros would fail to compile).
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError);
+
+/**
+ * Function for handling "previous code point" with error-checking.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this
+ * file and thus must remain stable, and should not be hidden when other internal
+ * functions are hidden (otherwise public macros would fail to compile).
+ * @internal
+ */
+U_CAPI UChar32 U_EXPORT2
+utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict);
+
+/**
+ * Function for handling "skip backward one code point" with error-checking.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this
+ * file and thus must remain stable, and should not be hidden when other internal
+ * functions are hidden (otherwise public macros would fail to compile).
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
+
+/* single-code point definitions -------------------------------------------- */
+
+/**
+ * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
+ * @param c 8-bit code unit (byte)
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U8_IS_SINGLE(c) (((c)&0x80)==0)
+
+/**
+ * Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4)
+ * @param c 8-bit code unit (byte)
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc2)<=0x32)
+// 0x32=0xf4-0xc2
+
+/**
+ * Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF)
+ * @param c 8-bit code unit (byte)
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U8_IS_TRAIL(c) ((int8_t)(c)<-0x40)
+
+/**
+ * How many code units (bytes) are used for the UTF-8 encoding
+ * of this Unicode code point?
+ * @param c 32-bit code point
+ * @return 1..4, or 0 if c is a surrogate or not a Unicode code point
+ * @stable ICU 2.4
+ */
+#define U8_LENGTH(c) \
+ ((uint32_t)(c)<=0x7f ? 1 : \
+ ((uint32_t)(c)<=0x7ff ? 2 : \
+ ((uint32_t)(c)<=0xd7ff ? 3 : \
+ ((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \
+ ((uint32_t)(c)<=0xffff ? 3 : 4)\
+ ) \
+ ) \
+ ) \
+ )
+
+/**
+ * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
+ * @return 4
+ * @stable ICU 2.4
+ */
+#define U8_MAX_LENGTH 4
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * The offset may point to either the lead byte or one of the trail bytes
+ * for a code point, in which case the macro will read all of the bytes
+ * for the code point.
+ * The result is undefined if the offset points to an illegal UTF-8
+ * byte sequence.
+ * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U8_GET
+ * @stable ICU 2.4
+ */
+#define U8_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t _u8_get_unsafe_index=(int32_t)(i); \
+ U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \
+ U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * The offset may point to either the lead byte or one of the trail bytes
+ * for a code point, in which case the macro will read all of the bytes
+ * for the code point.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * If the offset points to an illegal UTF-8 byte sequence, then
+ * c is set to a negative value.
+ * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset
+ * @param i int32_t string offset, must be start<=i<length
+ * @param length int32_t string length
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see U8_GET_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t _u8_get_index=(i); \
+ U8_SET_CP_START(s, start, _u8_get_index); \
+ U8_NEXT(s, _u8_get_index, length, c); \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * The offset may point to either the lead byte or one of the trail bytes
+ * for a code point, in which case the macro will read all of the bytes
+ * for the code point.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * If the offset points to an illegal UTF-8 byte sequence, then
+ * c is set to U+FFFD.
+ * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT_OR_FFFD.
+ *
+ * This macro does not distinguish between a real U+FFFD in the text
+ * and U+FFFD returned for an ill-formed sequence.
+ * Use U8_GET() if that distinction is important.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset
+ * @param i int32_t string offset, must be start<=i<length
+ * @param length int32_t string length
+ * @param c output UChar32 variable, set to U+FFFD in case of an error
+ * @see U8_GET
+ * @stable ICU 51
+ */
+#define U8_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t _u8_get_index=(i); \
+ U8_SET_CP_START(s, start, _u8_get_index); \
+ U8_NEXT_OR_FFFD(s, _u8_get_index, length, c); \
+} UPRV_BLOCK_MACRO_END
+
+/* definitions with forward iteration --------------------------------------- */
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * The offset may point to the lead byte of a multi-byte sequence,
+ * in which case the macro will read the whole sequence.
+ * The result is undefined if the offset points to a trail byte
+ * or an illegal UTF-8 sequence.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U8_NEXT
+ * @stable ICU 2.4
+ */
+#define U8_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(uint8_t)(s)[(i)++]; \
+ if(!U8_IS_SINGLE(c)) { \
+ if((c)<0xe0) { \
+ (c)=(((c)&0x1f)<<6)|((s)[(i)++]&0x3f); \
+ } else if((c)<0xf0) { \
+ /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
+ (c)=(UChar)(((c)<<12)|(((s)[i]&0x3f)<<6)|((s)[(i)+1]&0x3f)); \
+ (i)+=2; \
+ } else { \
+ (c)=(((c)&7)<<18)|(((s)[i]&0x3f)<<12)|(((s)[(i)+1]&0x3f)<<6)|((s)[(i)+2]&0x3f); \
+ (i)+=3; \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * The offset may point to the lead byte of a multi-byte sequence,
+ * in which case the macro will read the whole sequence.
+ * If the offset points to a trail byte or an illegal UTF-8 sequence, then
+ * c is set to a negative value.
+ *
+ * @param s const uint8_t * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see U8_NEXT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL)
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * The offset may point to the lead byte of a multi-byte sequence,
+ * in which case the macro will read the whole sequence.
+ * If the offset points to a trail byte or an illegal UTF-8 sequence, then
+ * c is set to U+FFFD.
+ *
+ * This macro does not distinguish between a real U+FFFD in the text
+ * and U+FFFD returned for an ill-formed sequence.
+ * Use U8_NEXT() if that distinction is important.
+ *
+ * @param s const uint8_t * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @param c output UChar32 variable, set to U+FFFD in case of an error
+ * @see U8_NEXT
+ * @stable ICU 51
+ */
+#define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd)
+
+/** @internal */
+#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(uint8_t)(s)[(i)++]; \
+ if(!U8_IS_SINGLE(c)) { \
+ uint8_t __t = 0; \
+ if((i)!=(length) && \
+ /* fetch/validate/assemble all but last trail byte */ \
+ ((c)>=0xe0 ? \
+ ((c)<0xf0 ? /* U+0800..U+FFFF except surrogates */ \
+ U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \
+ (__t&=0x3f, 1) \
+ : /* U+10000..U+10FFFF */ \
+ ((c)-=0xf0)<=4 && \
+ U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \
+ ((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \
+ (__t=(s)[i]-0x80)<=0x3f) && \
+ /* valid second-to-last trail byte */ \
+ ((c)=((c)<<6)|__t, ++(i)!=(length)) \
+ : /* U+0080..U+07FF */ \
+ (c)>=0xc2 && ((c)&=0x1f, 1)) && \
+ /* last trail byte */ \
+ (__t=(s)[i]-0x80)<=0x3f && \
+ ((c)=((c)<<6)|__t, ++(i), 1)) { \
+ } else { \
+ (c)=(sub); /* ill-formed*/ \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Append a code point to a string, overwriting 1 to 4 bytes.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
+ * Otherwise, the result is undefined.
+ *
+ * @param s const uint8_t * string buffer
+ * @param i string offset
+ * @param c code point to append
+ * @see U8_APPEND
+ * @stable ICU 2.4
+ */
+#define U8_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ uint32_t __uc=(c); \
+ if(__uc<=0x7f) { \
+ (s)[(i)++]=(uint8_t)__uc; \
+ } else { \
+ if(__uc<=0x7ff) { \
+ (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
+ } else { \
+ if(__uc<=0xffff) { \
+ (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
+ } else { \
+ (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
+ (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
+ } \
+ (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
+ } \
+ (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Append a code point to a string, overwriting 1 to 4 bytes.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Safe" macro, checks for a valid code point.
+ * If a non-ASCII code point is written, checks for sufficient space in the string.
+ * If the code point is not valid or trail bytes do not fit,
+ * then isError is set to true.
+ *
+ * @param s const uint8_t * string buffer
+ * @param i int32_t string offset, must be i<capacity
+ * @param capacity int32_t size of the string buffer
+ * @param c UChar32 code point to append
+ * @param isError output UBool set to true if an error occurs, otherwise not modified
+ * @see U8_APPEND_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
+ uint32_t __uc=(c); \
+ if(__uc<=0x7f) { \
+ (s)[(i)++]=(uint8_t)__uc; \
+ } else if(__uc<=0x7ff && (i)+1<(capacity)) { \
+ (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
+ (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
+ } else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \
+ (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
+ (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
+ (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
+ } else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \
+ (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
+ (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
+ (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
+ (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
+ } else { \
+ (isError)=true; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_FWD_1
+ * @stable ICU 2.4
+ */
+#define U8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ (i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((s)[i]); \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const uint8_t * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @see U8_FWD_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
+ uint8_t __b=(s)[(i)++]; \
+ if(U8_IS_LEAD(__b) && (i)!=(length)) { \
+ uint8_t __t1=(s)[i]; \
+ if((0xe0<=__b && __b<0xf0)) { \
+ if(U8_IS_VALID_LEAD3_AND_T1(__b, __t1) && \
+ ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
+ ++(i); \
+ } \
+ } else if(__b<0xe0) { \
+ if(U8_IS_TRAIL(__t1)) { \
+ ++(i); \
+ } \
+ } else /* c>=0xf0 */ { \
+ if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \
+ ++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \
+ ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
+ ++(i); \
+ } \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U8_FWD_N
+ * @stable ICU 2.4
+ */
+#define U8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ U8_FWD_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const uint8_t * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @param n number of code points to skip
+ * @see U8_FWD_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __N=(n); \
+ while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
+ U8_FWD_1(s, i, length); \
+ --__N; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to a UTF-8 trail byte,
+ * then the offset is moved backward to the corresponding lead byte.
+ * Otherwise, it is not modified.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_SET_CP_START
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ while(U8_IS_TRAIL((s)[i])) { --(i); } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to a UTF-8 trail byte,
+ * then the offset is moved backward to the corresponding lead byte.
+ * Otherwise, it is not modified.
+ *
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ * Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i].
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<=i
+ * @see U8_SET_CP_START_UNSAFE
+ * @see U8_TRUNCATE_IF_INCOMPLETE
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
+ if(U8_IS_TRAIL((s)[(i)])) { \
+ (i)=utf8_back1SafeBody(s, start, (i)); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * If the string ends with a UTF-8 byte sequence that is valid so far
+ * but incomplete, then reduce the length of the string to end before
+ * the lead byte of that incomplete sequence.
+ * For example, if the string ends with E1 80, the length is reduced by 2.
+ *
+ * In all other cases (the string ends with a complete sequence, or it is not
+ * possible for any further trail byte to extend the trailing sequence)
+ * the length remains unchanged.
+ *
+ * Useful for processing text split across multiple buffers
+ * (save the incomplete sequence for later)
+ * and for optimizing iteration
+ * (check for string length only once per character).
+ *
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ * Unlike U8_SET_CP_START(), this macro never reads s[length].
+ *
+ * (In UTF-16, simply check for U16_IS_LEAD(last code unit).)
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param length int32_t string length (usually start<=length)
+ * @see U8_SET_CP_START
+ * @stable ICU 61
+ */
+#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) UPRV_BLOCK_MACRO_BEGIN { \
+ if((length)>(start)) { \
+ uint8_t __b1=s[(length)-1]; \
+ if(U8_IS_SINGLE(__b1)) { \
+ /* common ASCII character */ \
+ } else if(U8_IS_LEAD(__b1)) { \
+ --(length); \
+ } else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \
+ uint8_t __b2=s[(length)-2]; \
+ if(0xe0<=__b2 && __b2<=0xf4) { \
+ if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \
+ U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \
+ (length)-=2; \
+ } \
+ } else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \
+ uint8_t __b3=s[(length)-3]; \
+ if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \
+ (length)-=3; \
+ } \
+ } \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/* definitions with backward iteration -------------------------------------- */
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a multi-byte sequence, then the macro will read
+ * the whole sequence.
+ * If the offset is behind a lead byte, then that itself
+ * will be returned as the code point.
+ * The result is undefined if the offset is behind an illegal UTF-8 sequence.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U8_PREV
+ * @stable ICU 2.4
+ */
+#define U8_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(uint8_t)(s)[--(i)]; \
+ if(U8_IS_TRAIL(c)) { \
+ uint8_t __b, __count=1, __shift=6; \
+\
+ /* c is a trail byte */ \
+ (c)&=0x3f; \
+ for(;;) { \
+ __b=(s)[--(i)]; \
+ if(__b>=0xc0) { \
+ U8_MASK_LEAD_BYTE(__b, __count); \
+ (c)|=(UChar32)__b<<__shift; \
+ break; \
+ } else { \
+ (c)|=(UChar32)(__b&0x3f)<<__shift; \
+ ++__count; \
+ __shift+=6; \
+ } \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a multi-byte sequence, then the macro will read
+ * the whole sequence.
+ * If the offset is behind a lead byte, then that itself
+ * will be returned as the code point.
+ * If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<i
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see U8_PREV_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(uint8_t)(s)[--(i)]; \
+ if(!U8_IS_SINGLE(c)) { \
+ (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a multi-byte sequence, then the macro will read
+ * the whole sequence.
+ * If the offset is behind a lead byte, then that itself
+ * will be returned as the code point.
+ * If the offset is behind an illegal UTF-8 sequence, then c is set to U+FFFD.
+ *
+ * This macro does not distinguish between a real U+FFFD in the text
+ * and U+FFFD returned for an ill-formed sequence.
+ * Use U8_PREV() if that distinction is important.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<i
+ * @param c output UChar32 variable, set to U+FFFD in case of an error
+ * @see U8_PREV
+ * @stable ICU 51
+ */
+#define U8_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(uint8_t)(s)[--(i)]; \
+ if(!U8_IS_SINGLE(c)) { \
+ (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_BACK_1
+ * @stable ICU 2.4
+ */
+#define U8_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ while(U8_IS_TRAIL((s)[--(i)])) {} \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<i
+ * @see U8_BACK_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
+ if(U8_IS_TRAIL((s)[--(i)])) { \
+ (i)=utf8_back1SafeBody(s, start, (i)); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U8_BACK_N
+ * @stable ICU 2.4
+ */
+#define U8_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ U8_BACK_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t index of the start of the string
+ * @param i int32_t string offset, must be start<i
+ * @param n number of code points to skip
+ * @see U8_BACK_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __N=(n); \
+ while(__N>0 && (i)>(start)) { \
+ U8_BACK_1(s, start, i); \
+ --__N; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind a partial multi-byte sequence,
+ * then the offset is incremented to behind the whole sequence.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_SET_CP_LIMIT
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ U8_BACK_1_UNSAFE(s, i); \
+ U8_FWD_1_UNSAFE(s, i); \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind a partial multi-byte sequence,
+ * then the offset is incremented to behind the whole sequence.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<=i<=length
+ * @param length int32_t string length
+ * @see U8_SET_CP_LIMIT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
+ if((start)<(i) && ((i)<(length) || (length)<0)) { \
+ U8_BACK_1(s, start, i); \
+ U8_FWD_1(s, i, length); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/utf_old.h b/thirdparty/icu4c/common/unicode/utf_old.h
new file mode 100644
index 0000000000..160f5ad0a9
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/utf_old.h
@@ -0,0 +1,1201 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utf_old.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002sep21
+* created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C API: Deprecated macros for Unicode string handling
+ *
+ * The macros in utf_old.h are all deprecated and their use discouraged.
+ * Some of the design principles behind the set of UTF macros
+ * have changed or proved impractical.
+ * Almost all of the old "UTF macros" are at least renamed.
+ * If you are looking for a new equivalent to an old macro, please see the
+ * comment at the old one.
+ *
+ * Brief summary of reasons for deprecation:
+ * - Switch on UTF_SIZE (selection of UTF-8/16/32 default string processing)
+ * was impractical.
+ * - Switch on UTF_SAFE etc. (selection of unsafe/safe/strict default string processing)
+ * was of little use and impractical.
+ * - Whole classes of macros became obsolete outside of the UTF_SIZE/UTF_SAFE
+ * selection framework: UTF32_ macros (all trivial)
+ * and UTF_ default and intermediate macros (all aliases).
+ * - The selection framework also caused many macro aliases.
+ * - Change in Unicode standard: "irregular" sequences (3.0) became illegal (3.2).
+ * - Change of language in Unicode standard:
+ * Growing distinction between internal x-bit Unicode strings and external UTF-x
+ * forms, with the former more lenient.
+ * Suggests renaming of UTF16_ macros to U16_.
+ * - The prefix "UTF_" without a width number confused some users.
+ * - "Safe" append macros needed the addition of an error indicator output.
+ * - "Safe" UTF-8 macros used legitimate (if rarely used) code point values
+ * to indicate error conditions.
+ * - The use of the "_CHAR" infix for code point operations confused some users.
+ *
+ * More details:
+ *
+ * Until ICU 2.2, utf.h theoretically allowed to choose among UTF-8/16/32
+ * for string processing, and among unsafe/safe/strict default macros for that.
+ *
+ * It proved nearly impossible to write non-trivial, high-performance code
+ * that is UTF-generic.
+ * Unsafe default macros would be dangerous for default string processing,
+ * and the main reason for the "strict" versions disappeared:
+ * Between Unicode 3.0 and 3.2 all "irregular" UTF-8 sequences became illegal.
+ * The only other conditions that "strict" checked for were non-characters,
+ * which are valid during processing. Only during text input/output should they
+ * be checked, and at that time other well-formedness checks may be
+ * necessary or useful as well.
+ * This can still be done by using U16_NEXT and U_IS_UNICODE_NONCHAR
+ * or U_IS_UNICODE_CHAR.
+ *
+ * The old UTF8_..._SAFE macros also used some normal Unicode code points
+ * to indicate malformed sequences.
+ * The new UTF8_ macros without suffix use negative values instead.
+ *
+ * The entire contents of utf32.h was moved here without replacement
+ * because all those macros were trivial and
+ * were meaningful only in the framework of choosing the UTF size.
+ *
+ * See Jitterbug 2150 and its discussion on the ICU mailing list
+ * in September 2002.
+ *
+ * <hr>
+ *
+ * <em>Obsolete part</em> of pre-ICU 2.4 utf.h file documentation:
+ *
+ * <p>The original concept for these files was for ICU to allow
+ * in principle to set which UTF (UTF-8/16/32) is used internally
+ * by defining UTF_SIZE to either 8, 16, or 32. utf.h would then define the UChar type
+ * accordingly. UTF-16 was the default.</p>
+ *
+ * <p>This concept has been abandoned.
+ * A lot of the ICU source code assumes UChar strings are in UTF-16.
+ * This is especially true for low-level code like
+ * conversion, normalization, and collation.
+ * The utf.h header enforces the default of UTF-16.
+ * The UTF-8 and UTF-32 macros remain for now for completeness and backward compatibility.</p>
+ *
+ * <p>Accordingly, utf.h defines UChar to be an unsigned 16-bit integer. If this matches wchar_t, then
+ * UChar is defined to be exactly wchar_t, otherwise uint16_t.</p>
+ *
+ * <p>UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit
+ * Unicode code point (Unicode scalar value, 0..0x10ffff).
+ * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as
+ * the definition of UChar. For details see the documentation for UChar32 itself.</p>
+ *
+ * <p>utf.h also defines a number of C macros for handling single Unicode code points and
+ * for using UTF Unicode strings. It includes utf8.h, utf16.h, and utf32.h for the actual
+ * implementations of those macros and then aliases one set of them (for UTF-16) for general use.
+ * The UTF-specific macros have the UTF size in the macro name prefixes (UTF16_...), while
+ * the general alias macros always begin with UTF_...</p>
+ *
+ * <p>Many string operations can be done with or without error checking.
+ * Where such a distinction is useful, there are two versions of the macros, "unsafe" and "safe"
+ * ones with ..._UNSAFE and ..._SAFE suffixes. The unsafe macros are fast but may cause
+ * program failures if the strings are not well-formed. The safe macros have an additional, boolean
+ * parameter "strict". If strict is false, then only illegal sequences are detected.
+ * Otherwise, irregular sequences and non-characters are detected as well (like single surrogates).
+ * Safe macros return special error code points for illegal/irregular sequences:
+ * Typically, U+ffff, or values that would result in a code unit sequence of the same length
+ * as the erroneous input sequence.<br>
+ * Note that _UNSAFE macros have fewer parameters: They do not have the strictness parameter, and
+ * they do not have start/length parameters for boundary checking.</p>
+ *
+ * <p>Here, the macros are aliased in two steps:
+ * In the first step, the UTF-specific macros with UTF16_ prefix and _UNSAFE and _SAFE suffixes are
+ * aliased according to the UTF_SIZE to macros with UTF_ prefix and the same suffixes and signatures.
+ * Then, in a second step, the default, general alias macros are set to use either the unsafe or
+ * the safe/not strict (default) or the safe/strict macro;
+ * these general macros do not have a strictness parameter.</p>
+ *
+ * <p>It is possible to change the default choice for the general alias macros to be unsafe, safe/not strict or safe/strict.
+ * The default is safe/not strict. It is not recommended to select the unsafe macros as the basis for
+ * Unicode string handling in ICU! To select this, define UTF_SAFE, UTF_STRICT, or UTF_UNSAFE.</p>
+ *
+ * <p>For general use, one should use the default, general macros with UTF_ prefix and no _SAFE/_UNSAFE suffix.
+ * Only in some cases it may be necessary to control the choice of macro directly and use a less generic alias.
+ * For example, if it can be assumed that a string is well-formed and the index will stay within the bounds,
+ * then the _UNSAFE version may be used.
+ * If a UTF-8 string is to be processed, then the macros with UTF8_ prefixes need to be used.</p>
+ *
+ * <hr>
+ *
+ * Deprecated ICU 2.4. Use the macros in utf.h, utf16.h, utf8.h instead.
+ */
+
+#ifndef __UTF_OLD_H__
+#define __UTF_OLD_H__
+
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+
+/**
+ * \def U_HIDE_OBSOLETE_UTF_OLD_H
+ *
+ * Hides the obsolete definitions in unicode/utf_old.h.
+ * Recommended to be set to 1 at compile time to make sure
+ * the long-deprecated macros are no longer used.
+ *
+ * For reasons for the deprecation see the utf_old.h file comments.
+ *
+ * @internal
+ */
+#ifndef U_HIDE_OBSOLETE_UTF_OLD_H
+# define U_HIDE_OBSOLETE_UTF_OLD_H 0
+#endif
+
+#if !defined(U_HIDE_DEPRECATED_API) && !U_HIDE_OBSOLETE_UTF_OLD_H
+
+/* Formerly utf.h, part 1 --------------------------------------------------- */
+
+#ifdef U_USE_UTF_DEPRECATES
+/**
+ * Unicode string and array offset and index type.
+ * ICU always counts Unicode code units (UChars) for
+ * string offsets, indexes, and lengths, not Unicode code points.
+ *
+ * @obsolete ICU 2.6. Use int32_t directly instead since this API will be removed in that release.
+ */
+typedef int32_t UTextOffset;
+#endif
+
+/** Number of bits in a Unicode string code unit - ICU uses 16-bit Unicode. @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF_SIZE 16
+
+/**
+ * The default choice for general Unicode string macros is to use the ..._SAFE macro implementations
+ * with strict=false.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_SAFE
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#undef UTF_UNSAFE
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#undef UTF_STRICT
+
+/**
+ * UTF8_ERROR_VALUE_1 and UTF8_ERROR_VALUE_2 are special error values for UTF-8,
+ * which need 1 or 2 bytes in UTF-8:
+ * \code
+ * U+0015 = NAK = Negative Acknowledge, C0 control character
+ * U+009f = highest C1 control character
+ * \endcode
+ *
+ * These are used by UTF8_..._SAFE macros so that they can return an error value
+ * that needs the same number of code units (bytes) as were seen by
+ * a macro. They should be tested with UTF_IS_ERROR() or UTF_IS_VALID().
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF8_ERROR_VALUE_1 0x15
+
+/**
+ * See documentation on UTF8_ERROR_VALUE_1 for details.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF8_ERROR_VALUE_2 0x9f
+
+/**
+ * Error value for all UTFs. This code point value will be set by macros with error
+ * checking if an error is detected.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_ERROR_VALUE 0xffff
+
+/**
+ * Is a given 32-bit code an error value
+ * as returned by one of the macros for any UTF?
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_IS_ERROR(c) \
+ (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
+
+/**
+ * This is a combined macro: Is c a valid Unicode value _and_ not an error code?
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_IS_VALID(c) \
+ (UTF_IS_UNICODE_CHAR(c) && \
+ (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
+
+/**
+ * Is this code unit or code point a surrogate (U+d800..U+dfff)?
+ * @deprecated ICU 2.4. Renamed to U_IS_SURROGATE and U16_IS_SURROGATE, see utf_old.h.
+ */
+#define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800)
+
+/**
+ * Is a given 32-bit code point a Unicode noncharacter?
+ *
+ * @deprecated ICU 2.4. Renamed to U_IS_UNICODE_NONCHAR, see utf_old.h.
+ */
+#define UTF_IS_UNICODE_NONCHAR(c) \
+ ((c)>=0xfdd0 && \
+ ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
+ (uint32_t)(c)<=0x10ffff)
+
+/**
+ * Is a given 32-bit value a Unicode code point value (0..U+10ffff)
+ * that can be assigned a character?
+ *
+ * Code points that are not characters include:
+ * - single surrogate code points (U+d800..U+dfff, 2048 code points)
+ * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points)
+ * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points)
+ * - the highest Unicode code point value is U+10ffff
+ *
+ * This means that all code points below U+d800 are character code points,
+ * and that boundary is tested first for performance.
+ *
+ * @deprecated ICU 2.4. Renamed to U_IS_UNICODE_CHAR, see utf_old.h.
+ */
+#define UTF_IS_UNICODE_CHAR(c) \
+ ((uint32_t)(c)<0xd800 || \
+ ((uint32_t)(c)>0xdfff && \
+ (uint32_t)(c)<=0x10ffff && \
+ !UTF_IS_UNICODE_NONCHAR(c)))
+
+/* Formerly utf8.h ---------------------------------------------------------- */
+
+/**
+* \var utf8_countTrailBytes
+* Internal array with numbers of trail bytes for any given byte used in
+* lead byte position.
+*
+* This is internal since it is not meant to be called directly by external clients;
+* however it is called by public macros in this file and thus must remain stable,
+* and should not be hidden when other internal functions are hidden (otherwise
+* public macros would fail to compile).
+* @internal
+*/
+#ifdef U_UTF8_IMPL
+// No forward declaration if compiling utf_impl.cpp, which defines utf8_countTrailBytes.
+#elif defined(U_STATIC_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION)
+U_CFUNC const uint8_t utf8_countTrailBytes[];
+#else
+U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[]; /* U_IMPORT2? */ /*U_IMPORT*/
+#endif
+
+/**
+ * Count the trail bytes for a UTF-8 lead byte.
+ * @deprecated ICU 2.4. Renamed to U8_COUNT_TRAIL_BYTES, see utf_old.h.
+ */
+#define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])
+
+/**
+ * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
+ * @deprecated ICU 2.4. Renamed to U8_MASK_LEAD_BYTE, see utf_old.h.
+ */
+#define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
+
+/** Is this this code point a single code unit (byte)? @deprecated ICU 2.4. Renamed to U8_IS_SINGLE, see utf_old.h. */
+#define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0)
+/** Is this this code unit the lead code unit (byte) of a code point? @deprecated ICU 2.4. Renamed to U8_IS_LEAD, see utf_old.h. */
+#define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e)
+/** Is this this code unit a trailing code unit (byte) of a code point? @deprecated ICU 2.4. Renamed to U8_IS_TRAIL, see utf_old.h. */
+#define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80)
+
+/** Does this scalar Unicode value need multiple code units for storage? @deprecated ICU 2.4. Use U8_LENGTH or test ((uint32_t)(c)>0x7f) instead, see utf_old.h. */
+#define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f)
+
+/**
+ * Given the lead character, how many bytes are taken by this code point.
+ * ICU does not deal with code points >0x10ffff
+ * unless necessary for advancing in the byte stream.
+ *
+ * These length macros take into account that for values >0x10ffff
+ * the UTF8_APPEND_CHAR_SAFE macros would write the error code point 0xffff
+ * with 3 bytes.
+ * Code point comparisons need to be in uint32_t because UChar32
+ * may be a signed type, and negative values must be recognized.
+ *
+ * @deprecated ICU 2.4. Use U8_LENGTH instead, see utf.h.
+ */
+#if 1
+# define UTF8_CHAR_LENGTH(c) \
+ ((uint32_t)(c)<=0x7f ? 1 : \
+ ((uint32_t)(c)<=0x7ff ? 2 : \
+ ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \
+ ) \
+ )
+#else
+# define UTF8_CHAR_LENGTH(c) \
+ ((uint32_t)(c)<=0x7f ? 1 : \
+ ((uint32_t)(c)<=0x7ff ? 2 : \
+ ((uint32_t)(c)<=0xffff ? 3 : \
+ ((uint32_t)(c)<=0x10ffff ? 4 : \
+ ((uint32_t)(c)<=0x3ffffff ? 5 : \
+ ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \
+ ) \
+ ) \
+ ) \
+ ) \
+ )
+#endif
+
+/** The maximum number of bytes per code point. @deprecated ICU 2.4. Renamed to U8_MAX_LENGTH, see utf_old.h. */
+#define UTF8_MAX_CHAR_LENGTH 4
+
+/** Average number of code units compared to UTF-16. @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF8_ARRAY_SIZE(size) ((5*(size))/2)
+
+/** @deprecated ICU 2.4. Renamed to U8_GET_UNSAFE, see utf_old.h. */
+#define UTF8_GET_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \
+ UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \
+ UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Use U8_GET instead, see utf_old.h. */
+#define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t _utf8_get_char_safe_index=(int32_t)(i); \
+ UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \
+ UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U8_NEXT_UNSAFE, see utf_old.h. */
+#define UTF8_NEXT_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[(i)++]; \
+ if((uint8_t)((c)-0xc0)<0x35) { \
+ uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \
+ UTF8_MASK_LEAD_BYTE(c, __count); \
+ switch(__count) { \
+ /* each following branch falls through to the next one */ \
+ case 3: \
+ (c)=((c)<<6)|((s)[(i)++]&0x3f); \
+ case 2: \
+ (c)=((c)<<6)|((s)[(i)++]&0x3f); \
+ case 1: \
+ (c)=((c)<<6)|((s)[(i)++]&0x3f); \
+ /* no other branches to optimize switch() */ \
+ break; \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U8_APPEND_UNSAFE, see utf_old.h. */
+#define UTF8_APPEND_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ if((uint32_t)(c)<=0x7f) { \
+ (s)[(i)++]=(uint8_t)(c); \
+ } else { \
+ if((uint32_t)(c)<=0x7ff) { \
+ (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
+ } else { \
+ if((uint32_t)(c)<=0xffff) { \
+ (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
+ } else { \
+ (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
+ (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
+ } \
+ (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
+ } \
+ (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U8_FWD_1_UNSAFE, see utf_old.h. */
+#define UTF8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U8_FWD_N_UNSAFE, see utf_old.h. */
+#define UTF8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ UTF8_FWD_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U8_SET_CP_START_UNSAFE, see utf_old.h. */
+#define UTF8_SET_CHAR_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ while(UTF8_IS_TRAIL((s)[i])) { --(i); } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Use U8_NEXT instead, see utf_old.h. */
+#define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[(i)++]; \
+ if((c)>=0x80) { \
+ if(UTF8_IS_LEAD(c)) { \
+ (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \
+ } else { \
+ (c)=UTF8_ERROR_VALUE_1; \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Use U8_APPEND instead, see utf_old.h. */
+#define UTF8_APPEND_CHAR_SAFE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+ if((uint32_t)(c)<=0x7f) { \
+ (s)[(i)++]=(uint8_t)(c); \
+ } else { \
+ (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U8_FWD_1, see utf_old.h. */
+#define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length)
+
+/** @deprecated ICU 2.4. Renamed to U8_FWD_N, see utf_old.h. */
+#define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n)
+
+/** @deprecated ICU 2.4. Renamed to U8_SET_CP_START, see utf_old.h. */
+#define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i)
+
+/** @deprecated ICU 2.4. Renamed to U8_PREV_UNSAFE, see utf_old.h. */
+#define UTF8_PREV_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[--(i)]; \
+ if(UTF8_IS_TRAIL(c)) { \
+ uint8_t __b, __count=1, __shift=6; \
+\
+ /* c is a trail byte */ \
+ (c)&=0x3f; \
+ for(;;) { \
+ __b=(s)[--(i)]; \
+ if(__b>=0xc0) { \
+ UTF8_MASK_LEAD_BYTE(__b, __count); \
+ (c)|=(UChar32)__b<<__shift; \
+ break; \
+ } else { \
+ (c)|=(UChar32)(__b&0x3f)<<__shift; \
+ ++__count; \
+ __shift+=6; \
+ } \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U8_BACK_1_UNSAFE, see utf_old.h. */
+#define UTF8_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ while(UTF8_IS_TRAIL((s)[--(i)])) {} \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U8_BACK_N_UNSAFE, see utf_old.h. */
+#define UTF8_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ UTF8_BACK_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U8_SET_CP_LIMIT_UNSAFE, see utf_old.h. */
+#define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ UTF8_BACK_1_UNSAFE(s, i); \
+ UTF8_FWD_1_UNSAFE(s, i); \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Use U8_PREV instead, see utf_old.h. */
+#define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[--(i)]; \
+ if((c)>=0x80) { \
+ if((c)<=0xbf) { \
+ (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \
+ } else { \
+ (c)=UTF8_ERROR_VALUE_1; \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U8_BACK_1, see utf_old.h. */
+#define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i)
+
+/** @deprecated ICU 2.4. Renamed to U8_BACK_N, see utf_old.h. */
+#define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n)
+
+/** @deprecated ICU 2.4. Renamed to U8_SET_CP_LIMIT, see utf_old.h. */
+#define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length)
+
+/* Formerly utf16.h --------------------------------------------------------- */
+
+/** Is uchar a first/lead surrogate? @deprecated ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h. */
+#define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800)
+
+/** Is uchar a second/trail surrogate? @deprecated ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h. */
+#define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00)
+
+/** Assuming c is a surrogate, is it a first/lead surrogate? @deprecated ICU 2.4. Renamed to U_IS_SURROGATE_LEAD and U16_IS_SURROGATE_LEAD, see utf_old.h. */
+#define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0)
+
+/** Helper constant for UTF16_GET_PAIR_VALUE. @deprecated ICU 2.4. Renamed to U16_SURROGATE_OFFSET, see utf_old.h. */
+#define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
+
+/** Get the UTF-32 value from the surrogate code units. @deprecated ICU 2.4. Renamed to U16_GET_SUPPLEMENTARY, see utf_old.h. */
+#define UTF16_GET_PAIR_VALUE(first, second) \
+ (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)
+
+/** @deprecated ICU 2.4. Renamed to U16_LEAD, see utf_old.h. */
+#define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
+
+/** @deprecated ICU 2.4. Renamed to U16_TRAIL, see utf_old.h. */
+#define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
+
+/** @deprecated ICU 2.4. Renamed to U16_LEAD, see utf_old.h. */
+#define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary)
+
+/** @deprecated ICU 2.4. Renamed to U16_TRAIL, see utf_old.h. */
+#define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary)
+
+/** @deprecated ICU 2.4. Renamed to U16_IS_SINGLE, see utf_old.h. */
+#define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar)
+
+/** @deprecated ICU 2.4. Renamed to U16_IS_LEAD, see utf_old.h. */
+#define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar)
+
+/** @deprecated ICU 2.4. Renamed to U16_IS_TRAIL, see utf_old.h. */
+#define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar)
+
+/** Does this scalar Unicode value need multiple code units for storage? @deprecated ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead, see utf_old.h. */
+#define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff)
+
+/** @deprecated ICU 2.4. Renamed to U16_LENGTH, see utf_old.h. */
+#define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
+
+/** @deprecated ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h. */
+#define UTF16_MAX_CHAR_LENGTH 2
+
+/** Average number of code units compared to UTF-16. @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF16_ARRAY_SIZE(size) (size)
+
+/**
+ * Get a single code point from an offset that points to any
+ * of the code units that belong to that code point.
+ * Assume 0<=i<length.
+ *
+ * This could be used for iteration together with
+ * UTF16_CHAR_LENGTH() and UTF_IS_ERROR(),
+ * but the use of UTF16_NEXT_CHAR[_UNSAFE]() and
+ * UTF16_PREV_CHAR[_UNSAFE]() is more efficient for that.
+ * @deprecated ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h.
+ */
+#define UTF16_GET_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[i]; \
+ if(UTF_IS_SURROGATE(c)) { \
+ if(UTF_IS_SURROGATE_FIRST(c)) { \
+ (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \
+ } else { \
+ (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Use U16_GET instead, see utf_old.h. */
+#define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[i]; \
+ if(UTF_IS_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if(UTF_IS_SURROGATE_FIRST(c)) { \
+ if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \
+ (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
+ /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
+ } else if(strict) {\
+ /* unmatched first surrogate */ \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+ } else { \
+ if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
+ (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
+ /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
+ } else if(strict) {\
+ /* unmatched second surrogate */ \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+ } \
+ } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h. */
+#define UTF16_NEXT_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[(i)++]; \
+ if(UTF_IS_FIRST_SURROGATE(c)) { \
+ (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h. */
+#define UTF16_APPEND_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ if((uint32_t)(c)<=0xffff) { \
+ (s)[(i)++]=(uint16_t)(c); \
+ } else { \
+ (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+ (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h. */
+#define UTF16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \
+ ++(i); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h. */
+#define UTF16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ UTF16_FWD_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h. */
+#define UTF16_SET_CHAR_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ if(UTF_IS_SECOND_SURROGATE((s)[i])) { \
+ --(i); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Use U16_NEXT instead, see utf_old.h. */
+#define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[(i)++]; \
+ if(UTF_IS_FIRST_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \
+ ++(i); \
+ (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
+ /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
+ } else if(strict) {\
+ /* unmatched first surrogate */ \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+ } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
+ /* unmatched second surrogate or other non-character */ \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. */
+#define UTF16_APPEND_CHAR_SAFE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+ if((uint32_t)(c)<=0xffff) { \
+ (s)[(i)++]=(uint16_t)(c); \
+ } else if((uint32_t)(c)<=0x10ffff) { \
+ if((i)+1<(length)) { \
+ (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+ (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+ } else /* not enough space */ { \
+ (s)[(i)++]=UTF_ERROR_VALUE; \
+ } \
+ } else /* c>0x10ffff, write error value */ { \
+ (s)[(i)++]=UTF_ERROR_VALUE; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. */
+#define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length)
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. */
+#define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n)
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. */
+#define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h. */
+#define UTF16_PREV_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[--(i)]; \
+ if(UTF_IS_SECOND_SURROGATE(c)) { \
+ (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h. */
+#define UTF16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \
+ --(i); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h. */
+#define UTF16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ UTF16_BACK_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h. */
+#define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
+ ++(i); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Use U16_PREV instead, see utf_old.h. */
+#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[--(i)]; \
+ if(UTF_IS_SECOND_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
+ --(i); \
+ (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
+ /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
+ } else if(strict) {\
+ /* unmatched second surrogate */ \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+ } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
+ /* unmatched first surrogate or other non-character */ \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. */
+#define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. */
+#define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n)
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. */
+#define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
+
+/* Formerly utf32.h --------------------------------------------------------- */
+
+/*
+* Old documentation:
+*
+* This file defines macros to deal with UTF-32 code units and code points.
+* Signatures and semantics are the same as for the similarly named macros
+* in utf16.h.
+* utf32.h is included by utf.h after unicode/umachine.h</p>
+* and some common definitions.
+* <p><b>Usage:</b> ICU coding guidelines for if() statements should be followed when using these macros.
+* Compound statements (curly braces {}) must be used for if-else-while...
+* bodies and all macro statements should be terminated with semicolon.</p>
+*/
+
+/* internal definitions ----------------------------------------------------- */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_IS_SAFE(c, strict) \
+ (!(strict) ? \
+ (uint32_t)(c)<=0x10ffff : \
+ UTF_IS_UNICODE_CHAR(c))
+
+/*
+ * For the semantics of all of these macros, see utf16.h.
+ * The UTF-32 versions are trivial because any code point is
+ * encoded using exactly one code unit.
+ */
+
+/* single-code point definitions -------------------------------------------- */
+
+/* classes of code unit values */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_IS_SINGLE(uchar) 1
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_IS_LEAD(uchar) 0
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_IS_TRAIL(uchar) 0
+
+/* number of code units per code point */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_NEED_MULTIPLE_UCHAR(c) 0
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_CHAR_LENGTH(c) 1
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_MAX_CHAR_LENGTH 1
+
+/* average number of code units compared to UTF-16 */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_ARRAY_SIZE(size) (size)
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_GET_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[i]; \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[i]; \
+ if(!UTF32_IS_SAFE(c, strict)) { \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/* definitions with forward iteration --------------------------------------- */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_NEXT_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[(i)++]; \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_APPEND_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (s)[(i)++]=(c); \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ ++(i); \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+ (i)+=(n); \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_SET_CHAR_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[(i)++]; \
+ if(!UTF32_IS_SAFE(c, strict)) { \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_APPEND_CHAR_SAFE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+ if((uint32_t)(c)<=0x10ffff) { \
+ (s)[(i)++]=(c); \
+ } else /* c>0x10ffff, write 0xfffd */ { \
+ (s)[(i)++]=0xfffd; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_FWD_1_SAFE(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
+ ++(i); \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_FWD_N_SAFE(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
+ if(((i)+=(n))>(length)) { \
+ (i)=(length); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_SET_CHAR_START_SAFE(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
+} UPRV_BLOCK_MACRO_END
+
+/* definitions with backward iteration -------------------------------------- */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_PREV_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[--(i)]; \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ --(i); \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+ (i)-=(n); \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[--(i)]; \
+ if(!UTF32_IS_SAFE(c, strict)) { \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_BACK_1_SAFE(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
+ --(i); \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_BACK_N_SAFE(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+ (i)-=(n); \
+ if((i)<(start)) { \
+ (i)=(start); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
+} UPRV_BLOCK_MACRO_END
+
+/* Formerly utf.h, part 2 --------------------------------------------------- */
+
+/**
+ * Estimate the number of code units for a string based on the number of UTF-16 code units.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size)
+
+/** @deprecated ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h. */
+#define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c)
+
+/** @deprecated ICU 2.4. Use U16_GET instead, see utf_old.h. */
+#define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h. */
+#define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c)
+
+/** @deprecated ICU 2.4. Use U16_NEXT instead, see utf_old.h. */
+#define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h. */
+#define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c)
+
+/** @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. */
+#define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h. */
+#define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. */
+#define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h. */
+#define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n)
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. */
+#define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h. */
+#define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. */
+#define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h. */
+#define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c)
+
+/** @deprecated ICU 2.4. Use U16_PREV instead, see utf_old.h. */
+#define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h. */
+#define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. */
+#define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h. */
+#define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n)
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. */
+#define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h. */
+#define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. */
+#define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
+
+/* Define default macros (UTF-16 "safe") ------------------------------------ */
+
+/**
+ * Does this code unit alone encode a code point (BMP, not a surrogate)?
+ * Same as UTF16_IS_SINGLE.
+ * @deprecated ICU 2.4. Renamed to U_IS_SINGLE and U16_IS_SINGLE, see utf_old.h.
+ */
+#define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar)
+
+/**
+ * Is this code unit the first one of several (a lead surrogate)?
+ * Same as UTF16_IS_LEAD.
+ * @deprecated ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h.
+ */
+#define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar)
+
+/**
+ * Is this code unit one of several but not the first one (a trail surrogate)?
+ * Same as UTF16_IS_TRAIL.
+ * @deprecated ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h.
+ */
+#define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar)
+
+/**
+ * Does this code point require multiple code units (is it a supplementary code point)?
+ * Same as UTF16_NEED_MULTIPLE_UCHAR.
+ * @deprecated ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead.
+ */
+#define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c)
+
+/**
+ * How many code units are used to encode this code point (1 or 2)?
+ * Same as UTF16_CHAR_LENGTH.
+ * @deprecated ICU 2.4. Renamed to U16_LENGTH, see utf_old.h.
+ */
+#define UTF_CHAR_LENGTH(c) U16_LENGTH(c)
+
+/**
+ * How many code units are used at most for any Unicode code point (2)?
+ * Same as UTF16_MAX_CHAR_LENGTH.
+ * @deprecated ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h.
+ */
+#define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH
+
+/**
+ * Set c to the code point that contains the code unit i.
+ * i could point to the lead or the trail surrogate for the code point.
+ * i is not modified.
+ * Same as UTF16_GET_CHAR.
+ * \pre 0<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_GET, see utf_old.h.
+ */
+#define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c)
+
+/**
+ * Set c to the code point that starts at code unit i
+ * and advance i to beyond the code units of this code point (post-increment).
+ * i must point to the first code unit of a code point.
+ * Otherwise c is set to the trail unit (surrogate) itself.
+ * Same as UTF16_NEXT_CHAR.
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_NEXT, see utf_old.h.
+ */
+#define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c)
+
+/**
+ * Append the code units of code point c to the string at index i
+ * and advance i to beyond the new code units (post-increment).
+ * The code units beginning at index i will be overwritten.
+ * Same as UTF16_APPEND_CHAR.
+ * \pre 0<=c<=0x10ffff
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ *
+ * @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h.
+ */
+#define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
+
+/**
+ * Advance i to beyond the code units of the code point that begins at i.
+ * I.e., advance i by one code point.
+ * Same as UTF16_FWD_1.
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h.
+ */
+#define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length)
+
+/**
+ * Advance i to beyond the code units of the n code points where the first one begins at i.
+ * I.e., advance i by n code points.
+ * Same as UT16_FWD_N.
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h.
+ */
+#define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n)
+
+/**
+ * Take the random-access index i and adjust it so that it points to the beginning
+ * of a code point.
+ * The input index points to any code unit of a code point and is moved to point to
+ * the first code unit of the same code point. i is never incremented.
+ * In other words, if i points to a trail surrogate that is preceded by a matching
+ * lead surrogate, then i is decremented. Otherwise it is not modified.
+ * This can be used to start an iteration with UTF_NEXT_CHAR() from a random index.
+ * Same as UTF16_SET_CHAR_START.
+ * \pre start<=i<length
+ * \post start<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h.
+ */
+#define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i)
+
+/**
+ * Set c to the code point that has code units before i
+ * and move i backward (towards the beginning of the string)
+ * to the first code unit of this code point (pre-increment).
+ * i must point to the first code unit after the last unit of a code point (i==length is allowed).
+ * Same as UTF16_PREV_CHAR.
+ * \pre start<i<=length
+ * \post start<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_PREV, see utf_old.h.
+ */
+#define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c)
+
+/**
+ * Move i backward (towards the beginning of the string)
+ * to the first code unit of the code point that has code units before i.
+ * I.e., move i backward by one code point.
+ * i must point to the first code unit after the last unit of a code point (i==length is allowed).
+ * Same as UTF16_BACK_1.
+ * \pre start<i<=length
+ * \post start<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h.
+ */
+#define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i)
+
+/**
+ * Move i backward (towards the beginning of the string)
+ * to the first code unit of the n code points that have code units before i.
+ * I.e., move i backward by n code points.
+ * i must point to the first code unit after the last unit of a code point (i==length is allowed).
+ * Same as UTF16_BACK_N.
+ * \pre start<i<=length
+ * \post start<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h.
+ */
+#define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n)
+
+/**
+ * Take the random-access index i and adjust it so that it points beyond
+ * a code point. The input index points beyond any code unit
+ * of a code point and is moved to point beyond the last code unit of the same
+ * code point. i is never decremented.
+ * In other words, if i points to a trail surrogate that is preceded by a matching
+ * lead surrogate, then i is incremented. Otherwise it is not modified.
+ * This can be used to start an iteration with UTF_PREV_CHAR() from a random index.
+ * Same as UTF16_SET_CHAR_LIMIT.
+ * \pre start<i<=length
+ * \post start<i<=length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h.
+ */
+#define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
+
+#endif // !U_HIDE_DEPRECATED_API && !U_HIDE_OBSOLETE_UTF_OLD_H
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/utrace.h b/thirdparty/icu4c/common/unicode/utrace.h
new file mode 100644
index 0000000000..28c313c582
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/utrace.h
@@ -0,0 +1,509 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2003-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utrace.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2003aug06
+* created by: Markus W. Scherer
+*
+* Definitions for ICU tracing/logging.
+*
+*/
+
+#ifndef __UTRACE_H__
+#define __UTRACE_H__
+
+#include <stdarg.h>
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C API: Definitions for ICU tracing/logging.
+ *
+ * This provides API for debugging the internals of ICU without the use of
+ * a traditional debugger.
+ *
+ * By default, tracing is disabled in ICU. If you need to debug ICU with
+ * tracing, please compile ICU with the --enable-tracing configure option.
+ */
+
+U_CDECL_BEGIN
+
+/**
+ * Trace severity levels. Higher levels increase the verbosity of the trace output.
+ * @see utrace_setLevel
+ * @stable ICU 2.8
+ */
+typedef enum UTraceLevel {
+ /** Disable all tracing @stable ICU 2.8*/
+ UTRACE_OFF=-1,
+ /** Trace error conditions only @stable ICU 2.8*/
+ UTRACE_ERROR=0,
+ /** Trace errors and warnings @stable ICU 2.8*/
+ UTRACE_WARNING=3,
+ /** Trace opens and closes of ICU services @stable ICU 2.8*/
+ UTRACE_OPEN_CLOSE=5,
+ /** Trace an intermediate number of ICU operations @stable ICU 2.8*/
+ UTRACE_INFO=7,
+ /** Trace the maximum number of ICU operations @stable ICU 2.8*/
+ UTRACE_VERBOSE=9
+} UTraceLevel;
+
+/**
+ * These are the ICU functions that will be traced when tracing is enabled.
+ * @stable ICU 2.8
+ */
+typedef enum UTraceFunctionNumber {
+ UTRACE_FUNCTION_START=0,
+ UTRACE_U_INIT=UTRACE_FUNCTION_START,
+ UTRACE_U_CLEANUP,
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal collation trace location.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UTRACE_FUNCTION_LIMIT,
+#endif // U_HIDE_DEPRECATED_API
+
+ UTRACE_CONVERSION_START=0x1000,
+ UTRACE_UCNV_OPEN=UTRACE_CONVERSION_START,
+ UTRACE_UCNV_OPEN_PACKAGE,
+ UTRACE_UCNV_OPEN_ALGORITHMIC,
+ UTRACE_UCNV_CLONE,
+ UTRACE_UCNV_CLOSE,
+ UTRACE_UCNV_FLUSH_CACHE,
+ UTRACE_UCNV_LOAD,
+ UTRACE_UCNV_UNLOAD,
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal collation trace location.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UTRACE_CONVERSION_LIMIT,
+#endif // U_HIDE_DEPRECATED_API
+
+ UTRACE_COLLATION_START=0x2000,
+ UTRACE_UCOL_OPEN=UTRACE_COLLATION_START,
+ UTRACE_UCOL_CLOSE,
+ UTRACE_UCOL_STRCOLL,
+ UTRACE_UCOL_GET_SORTKEY,
+ UTRACE_UCOL_GETLOCALE,
+ UTRACE_UCOL_NEXTSORTKEYPART,
+ UTRACE_UCOL_STRCOLLITER,
+ UTRACE_UCOL_OPEN_FROM_SHORT_STRING,
+ UTRACE_UCOL_STRCOLLUTF8, /**< @stable ICU 50 */
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal collation trace location.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UTRACE_COLLATION_LIMIT,
+#endif // U_HIDE_DEPRECATED_API
+
+ /**
+ * The lowest resource/data location.
+ * @stable ICU 65
+ */
+ UTRACE_UDATA_START=0x3000,
+
+ /**
+ * Indicates that a value was read from a resource bundle. Provides three
+ * C-style strings to UTraceData: type, file name, and resource path. The
+ * possible types are:
+ *
+ * - "string" (a string value was accessed)
+ * - "binary" (a binary value was accessed)
+ * - "intvector" (a integer vector value was accessed)
+ * - "int" (a signed integer value was accessed)
+ * - "uint" (a unsigned integer value was accessed)
+ * - "get" (a path was loaded, but the value was not accessed)
+ * - "getalias" (a path was loaded, and an alias was resolved)
+ *
+ * @stable ICU 65
+ */
+ UTRACE_UDATA_RESOURCE=UTRACE_UDATA_START,
+
+ /**
+ * Indicates that a resource bundle was opened.
+ *
+ * Provides one C-style string to UTraceData: file name.
+ * @stable ICU 65
+ */
+ UTRACE_UDATA_BUNDLE,
+
+ /**
+ * Indicates that a data file was opened, but not *.res files.
+ *
+ * Provides one C-style string to UTraceData: file name.
+ *
+ * @stable ICU 65
+ */
+ UTRACE_UDATA_DATA_FILE,
+
+ /**
+ * Indicates that a *.res file was opened.
+ *
+ * This differs from UTRACE_UDATA_BUNDLE because a res file is typically
+ * opened only once per application runtime, but the bundle corresponding
+ * to that res file may be opened many times.
+ *
+ * Provides one C-style string to UTraceData: file name.
+ *
+ * @stable ICU 65
+ */
+ UTRACE_UDATA_RES_FILE,
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * One more than the highest normal resource/data trace location.
+ * @internal The numeric value may change over time, see ICU ticket #12420.
+ */
+ UTRACE_RES_DATA_LIMIT,
+#endif // U_HIDE_INTERNAL_API
+
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * The lowest break iterator location.
+ * @draft ICU 67
+ */
+ UTRACE_UBRK_START=0x4000,
+
+ /**
+ * Indicates that a character instance of break iterator was created.
+ *
+ * @draft ICU 67
+ */
+ UTRACE_UBRK_CREATE_CHARACTER = UTRACE_UBRK_START,
+
+ /**
+ * Indicates that a word instance of break iterator was created.
+ *
+ * @draft ICU 67
+ */
+ UTRACE_UBRK_CREATE_WORD,
+
+ /**
+ * Indicates that a line instance of break iterator was created.
+ *
+ * Provides one C-style string to UTraceData: the lb value ("",
+ * "loose", "strict", or "normal").
+ *
+ * @draft ICU 67
+ */
+ UTRACE_UBRK_CREATE_LINE,
+
+ /**
+ * Indicates that a sentence instance of break iterator was created.
+ *
+ * @draft ICU 67
+ */
+ UTRACE_UBRK_CREATE_SENTENCE,
+
+ /**
+ * Indicates that a title instance of break iterator was created.
+ *
+ * @draft ICU 67
+ */
+ UTRACE_UBRK_CREATE_TITLE,
+
+ /**
+ * Indicates that an internal dictionary break engine was created.
+ *
+ * Provides one C-style string to UTraceData: the script code of what
+ * the break engine cover ("Hani", "Khmr", "Laoo", "Mymr", or "Thai").
+ *
+ * @draft ICU 67
+ */
+ UTRACE_UBRK_CREATE_BREAK_ENGINE,
+
+#endif // U_HIDE_DRAFT_API
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * One more than the highest normal break iterator trace location.
+ * @internal The numeric value may change over time, see ICU ticket #12420.
+ */
+ UTRACE_UBRK_LIMIT,
+#endif // U_HIDE_INTERNAL_API
+
+} UTraceFunctionNumber;
+
+/**
+ * Setter for the trace level.
+ * @param traceLevel A UTraceLevel value.
+ * @stable ICU 2.8
+ */
+U_CAPI void U_EXPORT2
+utrace_setLevel(int32_t traceLevel);
+
+/**
+ * Getter for the trace level.
+ * @return The UTraceLevel value being used by ICU.
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+utrace_getLevel(void);
+
+/* Trace function pointers types ----------------------------- */
+
+/**
+ * Type signature for the trace function to be called when entering a function.
+ * @param context value supplied at the time the trace functions are set.
+ * @param fnNumber Enum value indicating the ICU function being entered.
+ * @stable ICU 2.8
+ */
+typedef void U_CALLCONV
+UTraceEntry(const void *context, int32_t fnNumber);
+
+/**
+ * Type signature for the trace function to be called when exiting from a function.
+ * @param context value supplied at the time the trace functions are set.
+ * @param fnNumber Enum value indicating the ICU function being exited.
+ * @param fmt A formatting string that describes the number and types
+ * of arguments included with the variable args. The fmt
+ * string has the same form as the utrace_vformat format
+ * string.
+ * @param args A variable arguments list. Contents are described by
+ * the fmt parameter.
+ * @see utrace_vformat
+ * @stable ICU 2.8
+ */
+typedef void U_CALLCONV
+UTraceExit(const void *context, int32_t fnNumber,
+ const char *fmt, va_list args);
+
+/**
+ * Type signature for the trace function to be called from within an ICU function
+ * to display data or messages.
+ * @param context value supplied at the time the trace functions are set.
+ * @param fnNumber Enum value indicating the ICU function being exited.
+ * @param level The current tracing level
+ * @param fmt A format string describing the tracing data that is supplied
+ * as variable args
+ * @param args The data being traced, passed as variable args.
+ * @stable ICU 2.8
+ */
+typedef void U_CALLCONV
+UTraceData(const void *context, int32_t fnNumber, int32_t level,
+ const char *fmt, va_list args);
+
+/**
+ * Set ICU Tracing functions. Installs application-provided tracing
+ * functions into ICU. After doing this, subsequent ICU operations
+ * will call back to the installed functions, providing a trace
+ * of the use of ICU. Passing a NULL pointer for a tracing function
+ * is allowed, and inhibits tracing action at points where that function
+ * would be called.
+ * <p>
+ * Tracing and Threads: Tracing functions are global to a process, and
+ * will be called in response to ICU operations performed by any
+ * thread. If tracing of an individual thread is desired, the
+ * tracing functions must themselves filter by checking that the
+ * current thread is the desired thread.
+ *
+ * @param context an uninterpreted pointer. Whatever is passed in
+ * here will in turn be passed to each of the tracing
+ * functions UTraceEntry, UTraceExit and UTraceData.
+ * ICU does not use or alter this pointer.
+ * @param e Callback function to be called on entry to a
+ * a traced ICU function.
+ * @param x Callback function to be called on exit from a
+ * traced ICU function.
+ * @param d Callback function to be called from within a
+ * traced ICU function, for the purpose of providing
+ * data to the trace.
+ *
+ * @stable ICU 2.8
+ */
+U_CAPI void U_EXPORT2
+utrace_setFunctions(const void *context,
+ UTraceEntry *e, UTraceExit *x, UTraceData *d);
+
+/**
+ * Get the currently installed ICU tracing functions. Note that a null function
+ * pointer will be returned if no trace function has been set.
+ *
+ * @param context The currently installed tracing context.
+ * @param e The currently installed UTraceEntry function.
+ * @param x The currently installed UTraceExit function.
+ * @param d The currently installed UTraceData function.
+ * @stable ICU 2.8
+ */
+U_CAPI void U_EXPORT2
+utrace_getFunctions(const void **context,
+ UTraceEntry **e, UTraceExit **x, UTraceData **d);
+
+
+
+/*
+ *
+ * ICU trace format string syntax
+ *
+ * Format Strings are passed to UTraceData functions, and define the
+ * number and types of the trace data being passed on each call.
+ *
+ * The UTraceData function, which is supplied by the application,
+ * not by ICU, can either forward the trace data (passed via
+ * varargs) and the format string back to ICU for formatting into
+ * a displayable string, or it can interpret the format itself,
+ * and do as it wishes with the trace data.
+ *
+ *
+ * Goals for the format string
+ * - basic data output
+ * - easy to use for trace programmer
+ * - sufficient provision for data types for trace output readability
+ * - well-defined types and binary portable APIs
+ *
+ * Non-goals
+ * - printf compatibility
+ * - fancy formatting
+ * - argument reordering and other internationalization features
+ *
+ * ICU trace format strings contain plain text with argument inserts,
+ * much like standard printf format strings.
+ * Each insert begins with a '%', then optionally contains a 'v',
+ * then exactly one type character.
+ * Two '%' in a row represent a '%' instead of an insert.
+ * The trace format strings need not have \n at the end.
+ *
+ *
+ * Types
+ * -----
+ *
+ * Type characters:
+ * - c A char character in the default codepage.
+ * - s A NUL-terminated char * string in the default codepage.
+ * - S A UChar * string. Requires two params, (ptr, length). Length=-1 for nul term.
+ * - b A byte (8-bit integer).
+ * - h A 16-bit integer. Also a 16 bit Unicode code unit.
+ * - d A 32-bit integer. Also a 20 bit Unicode code point value.
+ * - l A 64-bit integer.
+ * - p A data pointer.
+ *
+ * Vectors
+ * -------
+ *
+ * If the 'v' is not specified, then one item of the specified type
+ * is passed in.
+ * If the 'v' (for "vector") is specified, then a vector of items of the
+ * specified type is passed in, via a pointer to the first item
+ * and an int32_t value for the length of the vector.
+ * Length==-1 means zero or NUL termination. Works for vectors of all types.
+ *
+ * Note: %vS is a vector of (UChar *) strings. The strings must
+ * be nul terminated as there is no way to provide a
+ * separate length parameter for each string. The length
+ * parameter (required for all vectors) is the number of
+ * strings, not the length of the strings.
+ *
+ * Examples
+ * --------
+ *
+ * These examples show the parameters that will be passed to an application's
+ * UTraceData() function for various formats.
+ *
+ * - the precise formatting is up to the application!
+ * - the examples use type casts for arguments only to _show_ the types of
+ * arguments without needing variable declarations in the examples;
+ * the type casts will not be necessary in actual code
+ *
+ * UTraceDataFunc(context, fnNumber, level,
+ * "There is a character %c in the string %s.", // Format String
+ * (char)c, (const char *)s); // varargs parameters
+ * -> There is a character 0x42 'B' in the string "Bravo".
+ *
+ * UTraceDataFunc(context, fnNumber, level,
+ * "Vector of bytes %vb vector of chars %vc",
+ * (const uint8_t *)bytes, (int32_t)bytesLength,
+ * (const char *)chars, (int32_t)charsLength);
+ * -> Vector of bytes
+ * 42 63 64 3f [4]
+ * vector of chars
+ * "Bcd?"[4]
+ *
+ * UTraceDataFunc(context, fnNumber, level,
+ * "An int32_t %d and a whole bunch of them %vd",
+ * (int32_t)-5, (const int32_t *)ints, (int32_t)intsLength);
+ * -> An int32_t 0xfffffffb and a whole bunch of them
+ * fffffffb 00000005 0000010a [3]
+ *
+ */
+
+
+
+/**
+ * Trace output Formatter. An application's UTraceData tracing functions may call
+ * back to this function to format the trace output in a
+ * human readable form. Note that a UTraceData function may choose
+ * to not format the data; it could, for example, save it in
+ * in the raw form it was received (more compact), leaving
+ * formatting for a later trace analysis tool.
+ * @param outBuf pointer to a buffer to receive the formatted output. Output
+ * will be nul terminated if there is space in the buffer -
+ * if the length of the requested output < the output buffer size.
+ * @param capacity Length of the output buffer.
+ * @param indent Number of spaces to indent the output. Intended to allow
+ * data displayed from nested functions to be indented for readability.
+ * @param fmt Format specification for the data to output
+ * @param args Data to be formatted.
+ * @return Length of formatted output, including the terminating NUL.
+ * If buffer capacity is insufficient, the required capacity is returned.
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+utrace_vformat(char *outBuf, int32_t capacity,
+ int32_t indent, const char *fmt, va_list args);
+
+/**
+ * Trace output Formatter. An application's UTraceData tracing functions may call
+ * this function to format any additional trace data, beyond that
+ * provided by default, in human readable form with the same
+ * formatting conventions used by utrace_vformat().
+ * @param outBuf pointer to a buffer to receive the formatted output. Output
+ * will be nul terminated if there is space in the buffer -
+ * if the length of the requested output < the output buffer size.
+ * @param capacity Length of the output buffer.
+ * @param indent Number of spaces to indent the output. Intended to allow
+ * data displayed from nested functions to be indented for readability.
+ * @param fmt Format specification for the data to output
+ * @param ... Data to be formatted.
+ * @return Length of formatted output, including the terminating NUL.
+ * If buffer capacity is insufficient, the required capacity is returned.
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+utrace_format(char *outBuf, int32_t capacity,
+ int32_t indent, const char *fmt, ...);
+
+
+
+/* Trace function numbers --------------------------------------------------- */
+
+/**
+ * Get the name of a function from its trace function number.
+ *
+ * @param fnNumber The trace number for an ICU function.
+ * @return The name string for the function.
+ *
+ * @see UTraceFunctionNumber
+ * @stable ICU 2.8
+ */
+U_CAPI const char * U_EXPORT2
+utrace_functionName(int32_t fnNumber);
+
+U_CDECL_END
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/utypes.h b/thirdparty/icu4c/common/unicode/utypes.h
new file mode 100644
index 0000000000..7c4ea7ac28
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/utypes.h
@@ -0,0 +1,732 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1996-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* FILE NAME : UTYPES.H (formerly ptypes.h)
+*
+* Date Name Description
+* 12/11/96 helena Creation.
+* 02/27/97 aliu Added typedefs for UClassID, int8, int16, int32,
+* uint8, uint16, and uint32.
+* 04/01/97 aliu Added XP_CPLUSPLUS and modified to work under C as
+* well as C++.
+* Modified to use memcpy() for uprv_arrayCopy() fns.
+* 04/14/97 aliu Added TPlatformUtilities.
+* 05/07/97 aliu Added import/export specifiers (replacing the old
+* broken EXT_CLASS). Added version number for our
+* code. Cleaned up header.
+* 6/20/97 helena Java class name change.
+* 08/11/98 stephen UErrorCode changed from typedef to enum
+* 08/12/98 erm Changed T_ANALYTIC_PACKAGE_VERSION to 3
+* 08/14/98 stephen Added uprv_arrayCopy() for int8_t, int16_t, int32_t
+* 12/09/98 jfitz Added BUFFER_OVERFLOW_ERROR (bug 1100066)
+* 04/20/99 stephen Cleaned up & reworked for autoconf.
+* Renamed to utypes.h.
+* 05/05/99 stephen Changed to use <inttypes.h>
+* 12/07/99 helena Moved copyright notice string from ucnv_bld.h here.
+*******************************************************************************
+*/
+
+#ifndef UTYPES_H
+#define UTYPES_H
+
+
+#include "unicode/umachine.h"
+#include "unicode/uversion.h"
+#include "unicode/uconfig.h"
+#include <float.h>
+
+#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS
+# include "unicode/utf.h"
+#endif
+
+/*!
+ * \file
+ * \brief Basic definitions for ICU, for both C and C++ APIs
+ *
+ * This file defines basic types, constants, and enumerations directly or
+ * indirectly by including other header files, especially utf.h for the
+ * basic character and string definitions and umachine.h for consistent
+ * integer and other types.
+ */
+
+
+/**
+ * \def U_SHOW_CPLUSPLUS_API
+ * @internal
+ */
+#ifdef __cplusplus
+# ifndef U_SHOW_CPLUSPLUS_API
+# define U_SHOW_CPLUSPLUS_API 1
+# endif
+#else
+# undef U_SHOW_CPLUSPLUS_API
+# define U_SHOW_CPLUSPLUS_API 0
+#endif
+
+/** @{ API visibility control */
+
+/**
+ * \def U_HIDE_DRAFT_API
+ * Define this to 1 to request that draft API be "hidden"
+ * @internal
+ */
+/**
+ * \def U_HIDE_INTERNAL_API
+ * Define this to 1 to request that internal API be "hidden"
+ * @internal
+ */
+#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_DRAFT_API)
+#define U_HIDE_DRAFT_API 1
+#endif
+#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_INTERNAL_API)
+#define U_HIDE_INTERNAL_API 1
+#endif
+
+/** @} */
+
+/*===========================================================================*/
+/* ICUDATA naming scheme */
+/*===========================================================================*/
+
+/**
+ * \def U_ICUDATA_TYPE_LETTER
+ *
+ * This is a platform-dependent string containing one letter:
+ * - b for big-endian, ASCII-family platforms
+ * - l for little-endian, ASCII-family platforms
+ * - e for big-endian, EBCDIC-family platforms
+ * This letter is part of the common data file name.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_ICUDATA_TYPE_LITLETTER
+ * The non-string form of U_ICUDATA_TYPE_LETTER
+ * @stable ICU 2.0
+ */
+#if U_CHARSET_FAMILY
+# if U_IS_BIG_ENDIAN
+ /* EBCDIC - should always be BE */
+# define U_ICUDATA_TYPE_LETTER "e"
+# define U_ICUDATA_TYPE_LITLETTER e
+# else
+# error "Don't know what to do with little endian EBCDIC!"
+# define U_ICUDATA_TYPE_LETTER "x"
+# define U_ICUDATA_TYPE_LITLETTER x
+# endif
+#else
+# if U_IS_BIG_ENDIAN
+ /* Big-endian ASCII */
+# define U_ICUDATA_TYPE_LETTER "b"
+# define U_ICUDATA_TYPE_LITLETTER b
+# else
+ /* Little-endian ASCII */
+# define U_ICUDATA_TYPE_LETTER "l"
+# define U_ICUDATA_TYPE_LITLETTER l
+# endif
+#endif
+
+/**
+ * A single string literal containing the icudata stub name. i.e. 'icudt18e' for
+ * ICU 1.8.x on EBCDIC, etc..
+ * @stable ICU 2.0
+ */
+#define U_ICUDATA_NAME "icudt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER
+#ifndef U_HIDE_INTERNAL_API
+#define U_USRDATA_NAME "usrdt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER /**< @internal */
+#define U_USE_USRDATA 0 /**< @internal */
+#endif /* U_HIDE_INTERNAL_API */
+
+/**
+ * U_ICU_ENTRY_POINT is the name of the DLL entry point to the ICU data library.
+ * Defined as a literal, not a string.
+ * Tricky Preprocessor use - ## operator replaces macro parameters with the literal string
+ * from the corresponding macro invocation, _before_ other macro substitutions.
+ * Need a nested \#defines to get the actual version numbers rather than
+ * the literal text U_ICU_VERSION_MAJOR_NUM into the name.
+ * The net result will be something of the form
+ * \#define U_ICU_ENTRY_POINT icudt19_dat
+ * @stable ICU 2.4
+ */
+#define U_ICUDATA_ENTRY_POINT U_DEF2_ICUDATA_ENTRY_POINT(U_ICU_VERSION_MAJOR_NUM,U_LIB_SUFFIX_C_NAME)
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * Do not use. Note that it's OK for the 2nd argument to be undefined (literal).
+ * @internal
+ */
+#define U_DEF2_ICUDATA_ENTRY_POINT(major,suff) U_DEF_ICUDATA_ENTRY_POINT(major,suff)
+
+/**
+ * Do not use.
+ * @internal
+ */
+#ifndef U_DEF_ICUDATA_ENTRY_POINT
+/* affected by symbol renaming. See platform.h */
+#ifndef U_LIB_SUFFIX_C_NAME
+#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##major##_dat
+#else
+#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##suff ## major##_dat
+#endif
+#endif
+#endif /* U_HIDE_INTERNAL_API */
+
+/**
+ * \def NULL
+ * Define NULL if necessary, to nullptr for C++ and to ((void *)0) for C.
+ * @stable ICU 2.0
+ */
+#ifndef NULL
+#ifdef __cplusplus
+#define NULL nullptr
+#else
+#define NULL ((void *)0)
+#endif
+#endif
+
+/*===========================================================================*/
+/* Calendar/TimeZone data types */
+/*===========================================================================*/
+
+/**
+ * Date and Time data type.
+ * This is a primitive data type that holds the date and time
+ * as the number of milliseconds since 1970-jan-01, 00:00 UTC.
+ * UTC leap seconds are ignored.
+ * @stable ICU 2.0
+ */
+typedef double UDate;
+
+/** The number of milliseconds per second @stable ICU 2.0 */
+#define U_MILLIS_PER_SECOND (1000)
+/** The number of milliseconds per minute @stable ICU 2.0 */
+#define U_MILLIS_PER_MINUTE (60000)
+/** The number of milliseconds per hour @stable ICU 2.0 */
+#define U_MILLIS_PER_HOUR (3600000)
+/** The number of milliseconds per day @stable ICU 2.0 */
+#define U_MILLIS_PER_DAY (86400000)
+
+/**
+ * Maximum UDate value
+ * @stable ICU 4.8
+ */
+#define U_DATE_MAX DBL_MAX
+
+/**
+ * Minimum UDate value
+ * @stable ICU 4.8
+ */
+#define U_DATE_MIN -U_DATE_MAX
+
+/*===========================================================================*/
+/* Shared library/DLL import-export API control */
+/*===========================================================================*/
+
+/*
+ * Control of symbol import/export.
+ * ICU is separated into three libraries.
+ */
+
+/**
+ * \def U_COMBINED_IMPLEMENTATION
+ * Set to export library symbols from inside the ICU library
+ * when all of ICU is in a single library.
+ * This can be set as a compiler option while building ICU, and it
+ * needs to be the first one tested to override U_COMMON_API, U_I18N_API, etc.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_DATA_API
+ * Set to export library symbols from inside the stubdata library,
+ * and to import them from outside.
+ * @stable ICU 3.0
+ */
+
+/**
+ * \def U_COMMON_API
+ * Set to export library symbols from inside the common library,
+ * and to import them from outside.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_I18N_API
+ * Set to export library symbols from inside the i18n library,
+ * and to import them from outside.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_LAYOUT_API
+ * Set to export library symbols from inside the layout engine library,
+ * and to import them from outside.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_LAYOUTEX_API
+ * Set to export library symbols from inside the layout extensions library,
+ * and to import them from outside.
+ * @stable ICU 2.6
+ */
+
+/**
+ * \def U_IO_API
+ * Set to export library symbols from inside the ustdio library,
+ * and to import them from outside.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_TOOLUTIL_API
+ * Set to export library symbols from inside the toolutil library,
+ * and to import them from outside.
+ * @stable ICU 3.4
+ */
+
+#ifdef U_IN_DOXYGEN
+// This definition is required when generating the API docs.
+#define U_COMBINED_IMPLEMENTATION 1
+#endif
+
+#if defined(U_COMBINED_IMPLEMENTATION)
+#define U_DATA_API U_EXPORT
+#define U_COMMON_API U_EXPORT
+#define U_I18N_API U_EXPORT
+#define U_LAYOUT_API U_EXPORT
+#define U_LAYOUTEX_API U_EXPORT
+#define U_IO_API U_EXPORT
+#define U_TOOLUTIL_API U_EXPORT
+#elif defined(U_STATIC_IMPLEMENTATION)
+#define U_DATA_API
+#define U_COMMON_API
+#define U_I18N_API
+#define U_LAYOUT_API
+#define U_LAYOUTEX_API
+#define U_IO_API
+#define U_TOOLUTIL_API
+#elif defined(U_COMMON_IMPLEMENTATION)
+#define U_DATA_API U_IMPORT
+#define U_COMMON_API U_EXPORT
+#define U_I18N_API U_IMPORT
+#define U_LAYOUT_API U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API U_IMPORT
+#define U_TOOLUTIL_API U_IMPORT
+#elif defined(U_I18N_IMPLEMENTATION)
+#define U_DATA_API U_IMPORT
+#define U_COMMON_API U_IMPORT
+#define U_I18N_API U_EXPORT
+#define U_LAYOUT_API U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API U_IMPORT
+#define U_TOOLUTIL_API U_IMPORT
+#elif defined(U_LAYOUT_IMPLEMENTATION)
+#define U_DATA_API U_IMPORT
+#define U_COMMON_API U_IMPORT
+#define U_I18N_API U_IMPORT
+#define U_LAYOUT_API U_EXPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API U_IMPORT
+#define U_TOOLUTIL_API U_IMPORT
+#elif defined(U_LAYOUTEX_IMPLEMENTATION)
+#define U_DATA_API U_IMPORT
+#define U_COMMON_API U_IMPORT
+#define U_I18N_API U_IMPORT
+#define U_LAYOUT_API U_IMPORT
+#define U_LAYOUTEX_API U_EXPORT
+#define U_IO_API U_IMPORT
+#define U_TOOLUTIL_API U_IMPORT
+#elif defined(U_IO_IMPLEMENTATION)
+#define U_DATA_API U_IMPORT
+#define U_COMMON_API U_IMPORT
+#define U_I18N_API U_IMPORT
+#define U_LAYOUT_API U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API U_EXPORT
+#define U_TOOLUTIL_API U_IMPORT
+#elif defined(U_TOOLUTIL_IMPLEMENTATION)
+#define U_DATA_API U_IMPORT
+#define U_COMMON_API U_IMPORT
+#define U_I18N_API U_IMPORT
+#define U_LAYOUT_API U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API U_IMPORT
+#define U_TOOLUTIL_API U_EXPORT
+#else
+#define U_DATA_API U_IMPORT
+#define U_COMMON_API U_IMPORT
+#define U_I18N_API U_IMPORT
+#define U_LAYOUT_API U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API U_IMPORT
+#define U_TOOLUTIL_API U_IMPORT
+#endif
+
+/**
+ * \def U_STANDARD_CPP_NAMESPACE
+ * Control of C++ Namespace
+ * @stable ICU 2.0
+ */
+#ifdef __cplusplus
+#define U_STANDARD_CPP_NAMESPACE ::
+#else
+#define U_STANDARD_CPP_NAMESPACE
+#endif
+
+/*===========================================================================*/
+/* UErrorCode */
+/*===========================================================================*/
+
+/**
+ * Standard ICU4C error code type, a substitute for exceptions.
+ *
+ * Initialize the UErrorCode with U_ZERO_ERROR, and check for success or
+ * failure using U_SUCCESS() or U_FAILURE():
+ *
+ * UErrorCode errorCode = U_ZERO_ERROR;
+ * // call ICU API that needs an error code parameter.
+ * if (U_FAILURE(errorCode)) {
+ * // An error occurred. Handle it here.
+ * }
+ *
+ * C++ code should use icu::ErrorCode, available in unicode/errorcode.h, or a
+ * suitable subclass.
+ *
+ * For more information, see:
+ * http://icu-project.org/userguide/conventions
+ *
+ * Note: By convention, ICU functions that take a reference (C++) or a pointer
+ * (C) to a UErrorCode first test:
+ *
+ * if (U_FAILURE(errorCode)) { return immediately; }
+ *
+ * so that in a chain of such functions the first one that sets an error code
+ * causes the following ones to not perform any operations.
+ *
+ * @stable ICU 2.0
+ */
+typedef enum UErrorCode {
+ /* The ordering of U_ERROR_INFO_START Vs U_USING_FALLBACK_WARNING looks weird
+ * and is that way because VC++ debugger displays first encountered constant,
+ * which is not the what the code is used for
+ */
+
+ U_USING_FALLBACK_WARNING = -128, /**< A resource bundle lookup returned a fallback result (not an error) */
+
+ U_ERROR_WARNING_START = -128, /**< Start of information results (semantically successful) */
+
+ U_USING_DEFAULT_WARNING = -127, /**< A resource bundle lookup returned a result from the root locale (not an error) */
+
+ U_SAFECLONE_ALLOCATED_WARNING = -126, /**< A SafeClone operation required allocating memory (informational only) */
+
+ U_STATE_OLD_WARNING = -125, /**< ICU has to use compatibility layer to construct the service. Expect performance/memory usage degradation. Consider upgrading */
+
+ U_STRING_NOT_TERMINATED_WARNING = -124,/**< An output string could not be NUL-terminated because output length==destCapacity. */
+
+ U_SORT_KEY_TOO_SHORT_WARNING = -123, /**< Number of levels requested in getBound is higher than the number of levels in the sort key */
+
+ U_AMBIGUOUS_ALIAS_WARNING = -122, /**< This converter alias can go to different converter implementations */
+
+ U_DIFFERENT_UCA_VERSION = -121, /**< ucol_open encountered a mismatch between UCA version and collator image version, so the collator was constructed from rules. No impact to further function */
+
+ U_PLUGIN_CHANGED_LEVEL_WARNING = -120, /**< A plugin caused a level change. May not be an error, but later plugins may not load. */
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UErrorCode warning value.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_ERROR_WARNING_LIMIT,
+#endif // U_HIDE_DEPRECATED_API
+
+ U_ZERO_ERROR = 0, /**< No error, no warning. */
+
+ U_ILLEGAL_ARGUMENT_ERROR = 1, /**< Start of codes indicating failure */
+ U_MISSING_RESOURCE_ERROR = 2, /**< The requested resource cannot be found */
+ U_INVALID_FORMAT_ERROR = 3, /**< Data format is not what is expected */
+ U_FILE_ACCESS_ERROR = 4, /**< The requested file cannot be found */
+ U_INTERNAL_PROGRAM_ERROR = 5, /**< Indicates a bug in the library code */
+ U_MESSAGE_PARSE_ERROR = 6, /**< Unable to parse a message (message format) */
+ U_MEMORY_ALLOCATION_ERROR = 7, /**< Memory allocation error */
+ U_INDEX_OUTOFBOUNDS_ERROR = 8, /**< Trying to access the index that is out of bounds */
+ U_PARSE_ERROR = 9, /**< Equivalent to Java ParseException */
+ U_INVALID_CHAR_FOUND = 10, /**< Character conversion: Unmappable input sequence. In other APIs: Invalid character. */
+ U_TRUNCATED_CHAR_FOUND = 11, /**< Character conversion: Incomplete input sequence. */
+ U_ILLEGAL_CHAR_FOUND = 12, /**< Character conversion: Illegal input sequence/combination of input units. */
+ U_INVALID_TABLE_FORMAT = 13, /**< Conversion table file found, but corrupted */
+ U_INVALID_TABLE_FILE = 14, /**< Conversion table file not found */
+ U_BUFFER_OVERFLOW_ERROR = 15, /**< A result would not fit in the supplied buffer */
+ U_UNSUPPORTED_ERROR = 16, /**< Requested operation not supported in current context */
+ U_RESOURCE_TYPE_MISMATCH = 17, /**< an operation is requested over a resource that does not support it */
+ U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illegal escape sequence */
+ U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, /**< ISO-2022 unsupported escape sequence */
+ U_NO_SPACE_AVAILABLE = 20, /**< No space available for in-buffer expansion for Arabic shaping */
+ U_CE_NOT_FOUND_ERROR = 21, /**< Currently used only while setting variable top, but can be used generally */
+ U_PRIMARY_TOO_LONG_ERROR = 22, /**< User tried to set variable top to a primary that is longer than two bytes */
+ U_STATE_TOO_OLD_ERROR = 23, /**< ICU cannot construct a service from this state, as it is no longer supported */
+ U_TOO_MANY_ALIASES_ERROR = 24, /**< There are too many aliases in the path to the requested resource.
+ It is very possible that a circular alias definition has occurred */
+ U_ENUM_OUT_OF_SYNC_ERROR = 25, /**< UEnumeration out of sync with underlying collection */
+ U_INVARIANT_CONVERSION_ERROR = 26, /**< Unable to convert a UChar* string to char* with the invariant converter. */
+ U_INVALID_STATE_ERROR = 27, /**< Requested operation can not be completed with ICU in its current state */
+ U_COLLATOR_VERSION_MISMATCH = 28, /**< Collator version is not compatible with the base version */
+ U_USELESS_COLLATOR_ERROR = 29, /**< Collator is options only and no base is specified */
+ U_NO_WRITE_PERMISSION = 30, /**< Attempt to modify read-only or constant data. */
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * The input is impractically long for an operation.
+ * It is rejected because it may lead to problems such as excessive
+ * processing time, stack depth, or heap memory requirements.
+ *
+ * @draft ICU 68
+ */
+ U_INPUT_TOO_LONG_ERROR = 31,
+#endif // U_HIDE_DRAFT_API
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest standard error code.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_STANDARD_ERROR_LIMIT = 32,
+#endif // U_HIDE_DEPRECATED_API
+
+ /*
+ * Error codes in the range 0x10000 0x10100 are reserved for Transliterator.
+ */
+ U_BAD_VARIABLE_DEFINITION=0x10000,/**< Missing '$' or duplicate variable name */
+ U_PARSE_ERROR_START = 0x10000, /**< Start of Transliterator errors */
+ U_MALFORMED_RULE, /**< Elements of a rule are misplaced */
+ U_MALFORMED_SET, /**< A UnicodeSet pattern is invalid*/
+ U_MALFORMED_SYMBOL_REFERENCE, /**< UNUSED as of ICU 2.4 */
+ U_MALFORMED_UNICODE_ESCAPE, /**< A Unicode escape pattern is invalid*/
+ U_MALFORMED_VARIABLE_DEFINITION, /**< A variable definition is invalid */
+ U_MALFORMED_VARIABLE_REFERENCE, /**< A variable reference is invalid */
+ U_MISMATCHED_SEGMENT_DELIMITERS, /**< UNUSED as of ICU 2.4 */
+ U_MISPLACED_ANCHOR_START, /**< A start anchor appears at an illegal position */
+ U_MISPLACED_CURSOR_OFFSET, /**< A cursor offset occurs at an illegal position */
+ U_MISPLACED_QUANTIFIER, /**< A quantifier appears after a segment close delimiter */
+ U_MISSING_OPERATOR, /**< A rule contains no operator */
+ U_MISSING_SEGMENT_CLOSE, /**< UNUSED as of ICU 2.4 */
+ U_MULTIPLE_ANTE_CONTEXTS, /**< More than one ante context */
+ U_MULTIPLE_CURSORS, /**< More than one cursor */
+ U_MULTIPLE_POST_CONTEXTS, /**< More than one post context */
+ U_TRAILING_BACKSLASH, /**< A dangling backslash */
+ U_UNDEFINED_SEGMENT_REFERENCE, /**< A segment reference does not correspond to a defined segment */
+ U_UNDEFINED_VARIABLE, /**< A variable reference does not correspond to a defined variable */
+ U_UNQUOTED_SPECIAL, /**< A special character was not quoted or escaped */
+ U_UNTERMINATED_QUOTE, /**< A closing single quote is missing */
+ U_RULE_MASK_ERROR, /**< A rule is hidden by an earlier more general rule */
+ U_MISPLACED_COMPOUND_FILTER, /**< A compound filter is in an invalid location */
+ U_MULTIPLE_COMPOUND_FILTERS, /**< More than one compound filter */
+ U_INVALID_RBT_SYNTAX, /**< A "::id" rule was passed to the RuleBasedTransliterator parser */
+ U_INVALID_PROPERTY_PATTERN, /**< UNUSED as of ICU 2.4 */
+ U_MALFORMED_PRAGMA, /**< A 'use' pragma is invalid */
+ U_UNCLOSED_SEGMENT, /**< A closing ')' is missing */
+ U_ILLEGAL_CHAR_IN_SEGMENT, /**< UNUSED as of ICU 2.4 */
+ U_VARIABLE_RANGE_EXHAUSTED, /**< Too many stand-ins generated for the given variable range */
+ U_VARIABLE_RANGE_OVERLAP, /**< The variable range overlaps characters used in rules */
+ U_ILLEGAL_CHARACTER, /**< A special character is outside its allowed context */
+ U_INTERNAL_TRANSLITERATOR_ERROR, /**< Internal transliterator system error */
+ U_INVALID_ID, /**< A "::id" rule specifies an unknown transliterator */
+ U_INVALID_FUNCTION, /**< A "&fn()" rule specifies an unknown transliterator */
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal Transliterator error code.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_PARSE_ERROR_LIMIT,
+#endif // U_HIDE_DEPRECATED_API
+
+ /*
+ * Error codes in the range 0x10100 0x10200 are reserved for the formatting API.
+ */
+ U_UNEXPECTED_TOKEN=0x10100, /**< Syntax error in format pattern */
+ U_FMT_PARSE_ERROR_START=0x10100, /**< Start of format library errors */
+ U_MULTIPLE_DECIMAL_SEPARATORS, /**< More than one decimal separator in number pattern */
+ U_MULTIPLE_DECIMAL_SEPERATORS = U_MULTIPLE_DECIMAL_SEPARATORS, /**< Typo: kept for backward compatibility. Use U_MULTIPLE_DECIMAL_SEPARATORS */
+ U_MULTIPLE_EXPONENTIAL_SYMBOLS, /**< More than one exponent symbol in number pattern */
+ U_MALFORMED_EXPONENTIAL_PATTERN, /**< Grouping symbol in exponent pattern */
+ U_MULTIPLE_PERCENT_SYMBOLS, /**< More than one percent symbol in number pattern */
+ U_MULTIPLE_PERMILL_SYMBOLS, /**< More than one permill symbol in number pattern */
+ U_MULTIPLE_PAD_SPECIFIERS, /**< More than one pad symbol in number pattern */
+ U_PATTERN_SYNTAX_ERROR, /**< Syntax error in format pattern */
+ U_ILLEGAL_PAD_POSITION, /**< Pad symbol misplaced in number pattern */
+ U_UNMATCHED_BRACES, /**< Braces do not match in message pattern */
+ U_UNSUPPORTED_PROPERTY, /**< UNUSED as of ICU 2.4 */
+ U_UNSUPPORTED_ATTRIBUTE, /**< UNUSED as of ICU 2.4 */
+ U_ARGUMENT_TYPE_MISMATCH, /**< Argument name and argument index mismatch in MessageFormat functions */
+ U_DUPLICATE_KEYWORD, /**< Duplicate keyword in PluralFormat */
+ U_UNDEFINED_KEYWORD, /**< Undefined Plural keyword */
+ U_DEFAULT_KEYWORD_MISSING, /**< Missing DEFAULT rule in plural rules */
+ U_DECIMAL_NUMBER_SYNTAX_ERROR, /**< Decimal number syntax error */
+ U_FORMAT_INEXACT_ERROR, /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */
+ U_NUMBER_ARG_OUTOFBOUNDS_ERROR, /**< The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999. @stable ICU 61 */
+ U_NUMBER_SKELETON_SYNTAX_ERROR, /**< The number skeleton passed to C++ NumberFormatter or C UNumberFormatter was invalid or contained a syntax error. @stable ICU 62 */
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal formatting API error code.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_FMT_PARSE_ERROR_LIMIT = 0x10114,
+#endif // U_HIDE_DEPRECATED_API
+
+ /*
+ * Error codes in the range 0x10200 0x102ff are reserved for BreakIterator.
+ */
+ U_BRK_INTERNAL_ERROR=0x10200, /**< An internal error (bug) was detected. */
+ U_BRK_ERROR_START=0x10200, /**< Start of codes indicating Break Iterator failures */
+ U_BRK_HEX_DIGITS_EXPECTED, /**< Hex digits expected as part of a escaped char in a rule. */
+ U_BRK_SEMICOLON_EXPECTED, /**< Missing ';' at the end of a RBBI rule. */
+ U_BRK_RULE_SYNTAX, /**< Syntax error in RBBI rule. */
+ U_BRK_UNCLOSED_SET, /**< UnicodeSet writing an RBBI rule missing a closing ']'. */
+ U_BRK_ASSIGN_ERROR, /**< Syntax error in RBBI rule assignment statement. */
+ U_BRK_VARIABLE_REDFINITION, /**< RBBI rule $Variable redefined. */
+ U_BRK_MISMATCHED_PAREN, /**< Mis-matched parentheses in an RBBI rule. */
+ U_BRK_NEW_LINE_IN_QUOTED_STRING, /**< Missing closing quote in an RBBI rule. */
+ U_BRK_UNDEFINED_VARIABLE, /**< Use of an undefined $Variable in an RBBI rule. */
+ U_BRK_INIT_ERROR, /**< Initialization failure. Probable missing ICU Data. */
+ U_BRK_RULE_EMPTY_SET, /**< Rule contains an empty Unicode Set. */
+ U_BRK_UNRECOGNIZED_OPTION, /**< !!option in RBBI rules not recognized. */
+ U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is malformed */
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal BreakIterator error code.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_BRK_ERROR_LIMIT,
+#endif // U_HIDE_DEPRECATED_API
+
+ /*
+ * Error codes in the range 0x10300-0x103ff are reserved for regular expression related errors.
+ */
+ U_REGEX_INTERNAL_ERROR=0x10300, /**< An internal error (bug) was detected. */
+ U_REGEX_ERROR_START=0x10300, /**< Start of codes indicating Regexp failures */
+ U_REGEX_RULE_SYNTAX, /**< Syntax error in regexp pattern. */
+ U_REGEX_INVALID_STATE, /**< RegexMatcher in invalid state for requested operation */
+ U_REGEX_BAD_ESCAPE_SEQUENCE, /**< Unrecognized backslash escape sequence in pattern */
+ U_REGEX_PROPERTY_SYNTAX, /**< Incorrect Unicode property */
+ U_REGEX_UNIMPLEMENTED, /**< Use of regexp feature that is not yet implemented. */
+ U_REGEX_MISMATCHED_PAREN, /**< Incorrectly nested parentheses in regexp pattern. */
+ U_REGEX_NUMBER_TOO_BIG, /**< Decimal number is too large. */
+ U_REGEX_BAD_INTERVAL, /**< Error in {min,max} interval */
+ U_REGEX_MAX_LT_MIN, /**< In {min,max}, max is less than min. */
+ U_REGEX_INVALID_BACK_REF, /**< Back-reference to a non-existent capture group. */
+ U_REGEX_INVALID_FLAG, /**< Invalid value for match mode flags. */
+ U_REGEX_LOOK_BEHIND_LIMIT, /**< Look-Behind pattern matches must have a bounded maximum length. */
+ U_REGEX_SET_CONTAINS_STRING, /**< Regexps cannot have UnicodeSets containing strings.*/
+#ifndef U_HIDE_DEPRECATED_API
+ U_REGEX_OCTAL_TOO_BIG, /**< Octal character constants must be <= 0377. @deprecated ICU 54. This error cannot occur. */
+#endif /* U_HIDE_DEPRECATED_API */
+ U_REGEX_MISSING_CLOSE_BRACKET=U_REGEX_SET_CONTAINS_STRING+2, /**< Missing closing bracket on a bracket expression. */
+ U_REGEX_INVALID_RANGE, /**< In a character range [x-y], x is greater than y. */
+ U_REGEX_STACK_OVERFLOW, /**< Regular expression backtrack stack overflow. */
+ U_REGEX_TIME_OUT, /**< Maximum allowed match time exceeded */
+ U_REGEX_STOPPED_BY_CALLER, /**< Matching operation aborted by user callback fn. */
+ U_REGEX_PATTERN_TOO_BIG, /**< Pattern exceeds limits on size or complexity. @stable ICU 55 */
+ U_REGEX_INVALID_CAPTURE_GROUP_NAME, /**< Invalid capture group name. @stable ICU 55 */
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal regular expression error code.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_REGEX_ERROR_LIMIT=U_REGEX_STOPPED_BY_CALLER+3,
+#endif // U_HIDE_DEPRECATED_API
+
+ /*
+ * Error codes in the range 0x10400-0x104ff are reserved for IDNA related error codes.
+ */
+ U_IDNA_PROHIBITED_ERROR=0x10400,
+ U_IDNA_ERROR_START=0x10400,
+ U_IDNA_UNASSIGNED_ERROR,
+ U_IDNA_CHECK_BIDI_ERROR,
+ U_IDNA_STD3_ASCII_RULES_ERROR,
+ U_IDNA_ACE_PREFIX_ERROR,
+ U_IDNA_VERIFICATION_ERROR,
+ U_IDNA_LABEL_TOO_LONG_ERROR,
+ U_IDNA_ZERO_LENGTH_LABEL_ERROR,
+ U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal IDNA error code.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_IDNA_ERROR_LIMIT,
+#endif // U_HIDE_DEPRECATED_API
+ /*
+ * Aliases for StringPrep
+ */
+ U_STRINGPREP_PROHIBITED_ERROR = U_IDNA_PROHIBITED_ERROR,
+ U_STRINGPREP_UNASSIGNED_ERROR = U_IDNA_UNASSIGNED_ERROR,
+ U_STRINGPREP_CHECK_BIDI_ERROR = U_IDNA_CHECK_BIDI_ERROR,
+
+ /*
+ * Error codes in the range 0x10500-0x105ff are reserved for Plugin related error codes.
+ */
+ U_PLUGIN_ERROR_START=0x10500, /**< Start of codes indicating plugin failures */
+ U_PLUGIN_TOO_HIGH=0x10500, /**< The plugin's level is too high to be loaded right now. */
+ U_PLUGIN_DIDNT_SET_LEVEL, /**< The plugin didn't call uplug_setPlugLevel in response to a QUERY */
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal plug-in error code.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_PLUGIN_ERROR_LIMIT,
+#endif // U_HIDE_DEPRECATED_API
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal error code.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_ERROR_LIMIT=U_PLUGIN_ERROR_LIMIT
+#endif // U_HIDE_DEPRECATED_API
+} UErrorCode;
+
+/* Use the following to determine if an UErrorCode represents */
+/* operational success or failure. */
+
+#ifdef __cplusplus
+ /**
+ * Does the error code indicate success?
+ * @stable ICU 2.0
+ */
+ static
+ inline UBool U_SUCCESS(UErrorCode code) { return (UBool)(code<=U_ZERO_ERROR); }
+ /**
+ * Does the error code indicate a failure?
+ * @stable ICU 2.0
+ */
+ static
+ inline UBool U_FAILURE(UErrorCode code) { return (UBool)(code>U_ZERO_ERROR); }
+#else
+ /**
+ * Does the error code indicate success?
+ * @stable ICU 2.0
+ */
+# define U_SUCCESS(x) ((x)<=U_ZERO_ERROR)
+ /**
+ * Does the error code indicate a failure?
+ * @stable ICU 2.0
+ */
+# define U_FAILURE(x) ((x)>U_ZERO_ERROR)
+#endif
+
+/**
+ * Return a string for a UErrorCode value.
+ * The string will be the same as the name of the error code constant
+ * in the UErrorCode enum above.
+ * @stable ICU 2.0
+ */
+U_CAPI const char * U_EXPORT2
+u_errorName(UErrorCode code);
+
+
+#endif /* _UTYPES */
diff --git a/thirdparty/icu4c/common/unicode/uvernum.h b/thirdparty/icu4c/common/unicode/uvernum.h
new file mode 100644
index 0000000000..a4cbb9e0fe
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/uvernum.h
@@ -0,0 +1,198 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2000-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*
+* file name: uvernum.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* Created by: Vladimir Weinstein
+* Updated by: Steven R. Loomis
+*
+*/
+
+/**
+ * \file
+ * \brief C API: definitions of ICU version numbers
+ *
+ * This file is included by uversion.h and other files. This file contains only
+ * macros and definitions. The actual version numbers are defined here.
+ */
+
+ /*
+ * IMPORTANT: When updating version, the following things need to be done:
+ * source/common/unicode/uvernum.h - this file: update major, minor,
+ * patchlevel, suffix, version, short version constants, namespace,
+ * renaming macro, and copyright
+ *
+ * The following files need to be updated as well, which can be done
+ * by running the UNIX makefile target 'update-windows-makefiles' in icu/source.
+ *
+ *
+ * source/common/common_uwp.vcxproj
+ * source/common/common.vcxproj - update 'Output file name' on the link tab so
+ * that it contains the new major/minor combination
+ * source/i18n/i18n.vcxproj - same as for the common.vcxproj
+ * source/i18n/i18n_uwp.vcxproj - same as for the common_uwp.vcxproj
+ * source/layoutex/layoutex.vcproj - same
+ * source/stubdata/stubdata.vcproj - same as for the common.vcxproj
+ * source/io/io.vcproj - same as for the common.vcxproj
+ * source/data/makedata.mak - change U_ICUDATA_NAME so that it contains
+ * the new major/minor combination and the Unicode version.
+ */
+
+#ifndef UVERNUM_H
+#define UVERNUM_H
+
+/** The standard copyright notice that gets compiled into each library.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.4
+ */
+#define U_COPYRIGHT_STRING \
+ " Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html "
+
+/** The current ICU major version as an integer.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.4
+ */
+#define U_ICU_VERSION_MAJOR_NUM 68
+
+/** The current ICU minor version as an integer.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.6
+ */
+#define U_ICU_VERSION_MINOR_NUM 1
+
+/** The current ICU patchlevel version as an integer.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.4
+ */
+#define U_ICU_VERSION_PATCHLEVEL_NUM 0
+
+/** The current ICU build level version as an integer.
+ * This value is for use by ICU clients. It defaults to 0.
+ * @stable ICU 4.0
+ */
+#ifndef U_ICU_VERSION_BUILDLEVEL_NUM
+#define U_ICU_VERSION_BUILDLEVEL_NUM 0
+#endif
+
+/** Glued version suffix for renamers
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.6
+ */
+#define U_ICU_VERSION_SUFFIX _68
+
+/**
+ * \def U_DEF2_ICU_ENTRY_POINT_RENAME
+ * @internal
+ */
+/**
+ * \def U_DEF_ICU_ENTRY_POINT_RENAME
+ * @internal
+ */
+/** Glued version suffix function for renamers
+ * This value will change in the subsequent releases of ICU.
+ * If a custom suffix (such as matching library suffixes) is desired, this can be modified.
+ * Note that if present, platform.h may contain an earlier definition of this macro.
+ * \def U_ICU_ENTRY_POINT_RENAME
+ * @stable ICU 4.2
+ */
+/**
+ * Disable the version suffix. Use the custom suffix if exists.
+ * \def U_DISABLE_VERSION_SUFFIX
+ * @internal
+ */
+#ifndef U_DISABLE_VERSION_SUFFIX
+#define U_DISABLE_VERSION_SUFFIX 0
+#endif
+
+#ifndef U_ICU_ENTRY_POINT_RENAME
+#ifdef U_HAVE_LIB_SUFFIX
+# if !U_DISABLE_VERSION_SUFFIX
+# define U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z) x ## y ## z
+# define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y,z) U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z)
+# define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX,U_LIB_SUFFIX_C_NAME)
+# else
+# define U_DEF_ICU_ENTRY_POINT_RENAME(x,y) x ## y
+# define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y) U_DEF_ICU_ENTRY_POINT_RENAME(x,y)
+# define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_LIB_SUFFIX_C_NAME)
+# endif
+#else
+# if !U_DISABLE_VERSION_SUFFIX
+# define U_DEF_ICU_ENTRY_POINT_RENAME(x,y) x ## y
+# define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y) U_DEF_ICU_ENTRY_POINT_RENAME(x,y)
+# define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX)
+# else
+# define U_ICU_ENTRY_POINT_RENAME(x) x
+# endif
+#endif
+#endif
+
+/** The current ICU library version as a dotted-decimal string. The patchlevel
+ * only appears in this string if it non-zero.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.4
+ */
+#define U_ICU_VERSION "68.1"
+
+/**
+ * The current ICU library major version number as a string, for library name suffixes.
+ * This value will change in subsequent releases of ICU.
+ *
+ * Until ICU 4.8, this was the combination of the single-digit major and minor ICU version numbers
+ * into one string without dots ("48").
+ * Since ICU 49, it is the double-digit major ICU version number.
+ * See https://unicode-org.github.io/icu/userguide/design#version-numbers-in-icu
+ *
+ * @stable ICU 2.6
+ */
+#define U_ICU_VERSION_SHORT "68"
+
+#ifndef U_HIDE_INTERNAL_API
+/** Data version in ICU4C.
+ * @internal ICU 4.4 Internal Use Only
+ **/
+#define U_ICU_DATA_VERSION "68.1"
+#endif /* U_HIDE_INTERNAL_API */
+
+/*===========================================================================
+ * ICU collation framework version information
+ * Version info that can be obtained from a collator is affected by these
+ * numbers in a secret and magic way. Please use collator version as whole
+ *===========================================================================
+ */
+
+/**
+ * Collation runtime version (sort key generator, strcoll).
+ * If the version is different, sort keys for the same string could be different.
+ * This value may change in subsequent releases of ICU.
+ * @stable ICU 2.4
+ */
+#define UCOL_RUNTIME_VERSION 9
+
+/**
+ * Collation builder code version.
+ * When this is different, the same tailoring might result
+ * in assigning different collation elements to code points.
+ * This value may change in subsequent releases of ICU.
+ * @stable ICU 2.4
+ */
+#define UCOL_BUILDER_VERSION 9
+
+#ifndef U_HIDE_DEPRECATED_API
+/**
+ * Constant 1.
+ * This was intended to be the version of collation tailorings,
+ * but instead the tailoring data carries a version number.
+ * @deprecated ICU 54
+ */
+#define UCOL_TAILORINGS_VERSION 1
+#endif /* U_HIDE_DEPRECATED_API */
+
+#endif
diff --git a/thirdparty/icu4c/common/unicode/uversion.h b/thirdparty/icu4c/common/unicode/uversion.h
new file mode 100644
index 0000000000..113568df8c
--- /dev/null
+++ b/thirdparty/icu4c/common/unicode/uversion.h
@@ -0,0 +1,187 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2000-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*
+* file name: uversion.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* Created by: Vladimir Weinstein
+*
+* Gets included by utypes.h and Windows .rc files
+*/
+
+/**
+ * \file
+ * \brief C API: API for accessing ICU version numbers.
+ */
+/*===========================================================================*/
+/* Main ICU version information */
+/*===========================================================================*/
+
+#ifndef UVERSION_H
+#define UVERSION_H
+
+#include "unicode/umachine.h"
+
+/* Actual version info lives in uvernum.h */
+#include "unicode/uvernum.h"
+
+/** Maximum length of the copyright string.
+ * @stable ICU 2.4
+ */
+#define U_COPYRIGHT_STRING_LENGTH 128
+
+/** An ICU version consists of up to 4 numbers from 0..255.
+ * @stable ICU 2.4
+ */
+#define U_MAX_VERSION_LENGTH 4
+
+/** In a string, ICU version fields are delimited by dots.
+ * @stable ICU 2.4
+ */
+#define U_VERSION_DELIMITER '.'
+
+/** The maximum length of an ICU version string.
+ * @stable ICU 2.4
+ */
+#define U_MAX_VERSION_STRING_LENGTH 20
+
+/** The binary form of a version on ICU APIs is an array of 4 uint8_t.
+ * To compare two versions, use memcmp(v1,v2,sizeof(UVersionInfo)).
+ * @stable ICU 2.4
+ */
+typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
+
+/*===========================================================================*/
+/* C++ namespace if supported. Versioned unless versioning is disabled. */
+/*===========================================================================*/
+
+/* Define C++ namespace symbols. */
+#ifdef __cplusplus
+
+/**
+ * \def U_NAMESPACE_BEGIN
+ * This is used to begin a declaration of a public ICU C++ API within
+ * versioned-ICU-namespace block.
+ *
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_NAMESPACE_END
+ * This is used to end a declaration of a public ICU C++ API.
+ * It ends the versioned-ICU-namespace block begun by U_NAMESPACE_BEGIN.
+ *
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_NAMESPACE_USE
+ * This is used to specify that the rest of the code uses the
+ * public ICU C++ API namespace.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_NAMESPACE_QUALIFIER
+ * This is used to qualify that a function or class is part of
+ * the public ICU C++ API namespace.
+ *
+ * This macro is unnecessary since ICU 49 requires namespace support.
+ * You can just use "icu::" instead.
+ * @stable ICU 2.4
+ */
+
+# if U_DISABLE_RENAMING
+# define U_ICU_NAMESPACE icu
+ namespace U_ICU_NAMESPACE { }
+# else
+# define U_ICU_NAMESPACE U_ICU_ENTRY_POINT_RENAME(icu)
+ namespace U_ICU_NAMESPACE { }
+ namespace icu = U_ICU_NAMESPACE;
+# endif
+
+# define U_NAMESPACE_BEGIN namespace U_ICU_NAMESPACE {
+# define U_NAMESPACE_END }
+# define U_NAMESPACE_USE using namespace U_ICU_NAMESPACE;
+# define U_NAMESPACE_QUALIFIER U_ICU_NAMESPACE::
+
+# ifndef U_USING_ICU_NAMESPACE
+# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
+ defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || \
+ defined(U_LAYOUTEX_IMPLEMENTATION) || defined(U_TOOLUTIL_IMPLEMENTATION)
+# define U_USING_ICU_NAMESPACE 0
+# else
+# define U_USING_ICU_NAMESPACE 0
+# endif
+# endif
+# if U_USING_ICU_NAMESPACE
+ U_NAMESPACE_USE
+# endif
+#endif /* __cplusplus */
+
+/*===========================================================================*/
+/* General version helper functions. Definitions in putil.c */
+/*===========================================================================*/
+
+/**
+ * Parse a string with dotted-decimal version information and
+ * fill in a UVersionInfo structure with the result.
+ * Definition of this function lives in putil.c
+ *
+ * @param versionArray The destination structure for the version information.
+ * @param versionString A string with dotted-decimal version information,
+ * with up to four non-negative number fields with
+ * values of up to 255 each.
+ * @stable ICU 2.4
+ */
+U_CAPI void U_EXPORT2
+u_versionFromString(UVersionInfo versionArray, const char *versionString);
+
+/**
+ * Parse a Unicode string with dotted-decimal version information and
+ * fill in a UVersionInfo structure with the result.
+ * Definition of this function lives in putil.c
+ *
+ * @param versionArray The destination structure for the version information.
+ * @param versionString A Unicode string with dotted-decimal version
+ * information, with up to four non-negative number
+ * fields with values of up to 255 each.
+ * @stable ICU 4.2
+ */
+U_CAPI void U_EXPORT2
+u_versionFromUString(UVersionInfo versionArray, const UChar *versionString);
+
+
+/**
+ * Write a string with dotted-decimal version information according
+ * to the input UVersionInfo.
+ * Definition of this function lives in putil.c
+ *
+ * @param versionArray The version information to be written as a string.
+ * @param versionString A string buffer that will be filled in with
+ * a string corresponding to the numeric version
+ * information in versionArray.
+ * The buffer size must be at least U_MAX_VERSION_STRING_LENGTH.
+ * @stable ICU 2.4
+ */
+U_CAPI void U_EXPORT2
+u_versionToString(const UVersionInfo versionArray, char *versionString);
+
+/**
+ * Gets the ICU release version. The version array stores the version information
+ * for ICU. For example, release "1.3.31.2" is then represented as 0x01031F02.
+ * Definition of this function lives in putil.c
+ *
+ * @param versionArray the version # information, the result will be filled in
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+u_getVersion(UVersionInfo versionArray);
+#endif
diff --git a/thirdparty/icu4c/common/unifiedcache.cpp b/thirdparty/icu4c/common/unifiedcache.cpp
new file mode 100644
index 0000000000..493ab79f6d
--- /dev/null
+++ b/thirdparty/icu4c/common/unifiedcache.cpp
@@ -0,0 +1,522 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 2015, International Business Machines Corporation and
+* others. All Rights Reserved.
+******************************************************************************
+*
+* File unifiedcache.cpp
+******************************************************************************
+*/
+
+#include "unifiedcache.h"
+
+#include <algorithm> // For std::max()
+#include <mutex>
+
+#include "uassert.h"
+#include "uhash.h"
+#include "ucln_cmn.h"
+
+static icu::UnifiedCache *gCache = NULL;
+static std::mutex *gCacheMutex = nullptr;
+static std::condition_variable *gInProgressValueAddedCond;
+static icu::UInitOnce gCacheInitOnce = U_INITONCE_INITIALIZER;
+
+static const int32_t MAX_EVICT_ITERATIONS = 10;
+static const int32_t DEFAULT_MAX_UNUSED = 1000;
+static const int32_t DEFAULT_PERCENTAGE_OF_IN_USE = 100;
+
+
+U_CDECL_BEGIN
+static UBool U_CALLCONV unifiedcache_cleanup() {
+ gCacheInitOnce.reset();
+ delete gCache;
+ gCache = nullptr;
+ gCacheMutex->~mutex();
+ gCacheMutex = nullptr;
+ gInProgressValueAddedCond->~condition_variable();
+ gInProgressValueAddedCond = nullptr;
+ return TRUE;
+}
+U_CDECL_END
+
+
+U_NAMESPACE_BEGIN
+
+U_CAPI int32_t U_EXPORT2
+ucache_hashKeys(const UHashTok key) {
+ const CacheKeyBase *ckey = (const CacheKeyBase *) key.pointer;
+ return ckey->hashCode();
+}
+
+U_CAPI UBool U_EXPORT2
+ucache_compareKeys(const UHashTok key1, const UHashTok key2) {
+ const CacheKeyBase *p1 = (const CacheKeyBase *) key1.pointer;
+ const CacheKeyBase *p2 = (const CacheKeyBase *) key2.pointer;
+ return *p1 == *p2;
+}
+
+U_CAPI void U_EXPORT2
+ucache_deleteKey(void *obj) {
+ CacheKeyBase *p = (CacheKeyBase *) obj;
+ delete p;
+}
+
+CacheKeyBase::~CacheKeyBase() {
+}
+
+static void U_CALLCONV cacheInit(UErrorCode &status) {
+ U_ASSERT(gCache == NULL);
+ ucln_common_registerCleanup(
+ UCLN_COMMON_UNIFIED_CACHE, unifiedcache_cleanup);
+
+ gCacheMutex = STATIC_NEW(std::mutex);
+ gInProgressValueAddedCond = STATIC_NEW(std::condition_variable);
+ gCache = new UnifiedCache(status);
+ if (gCache == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ if (U_FAILURE(status)) {
+ delete gCache;
+ gCache = NULL;
+ return;
+ }
+}
+
+UnifiedCache *UnifiedCache::getInstance(UErrorCode &status) {
+ umtx_initOnce(gCacheInitOnce, &cacheInit, status);
+ if (U_FAILURE(status)) {
+ return NULL;
+ }
+ U_ASSERT(gCache != NULL);
+ return gCache;
+}
+
+UnifiedCache::UnifiedCache(UErrorCode &status) :
+ fHashtable(NULL),
+ fEvictPos(UHASH_FIRST),
+ fNumValuesTotal(0),
+ fNumValuesInUse(0),
+ fMaxUnused(DEFAULT_MAX_UNUSED),
+ fMaxPercentageOfInUse(DEFAULT_PERCENTAGE_OF_IN_USE),
+ fAutoEvictedCount(0),
+ fNoValue(nullptr) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ fNoValue = new SharedObject();
+ if (fNoValue == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ fNoValue->softRefCount = 1; // Add fake references to prevent fNoValue from being deleted
+ fNoValue->hardRefCount = 1; // when other references to it are removed.
+ fNoValue->cachePtr = this;
+
+ fHashtable = uhash_open(
+ &ucache_hashKeys,
+ &ucache_compareKeys,
+ NULL,
+ &status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+ uhash_setKeyDeleter(fHashtable, &ucache_deleteKey);
+}
+
+void UnifiedCache::setEvictionPolicy(
+ int32_t count, int32_t percentageOfInUseItems, UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ if (count < 0 || percentageOfInUseItems < 0) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ std::lock_guard<std::mutex> lock(*gCacheMutex);
+ fMaxUnused = count;
+ fMaxPercentageOfInUse = percentageOfInUseItems;
+}
+
+int32_t UnifiedCache::unusedCount() const {
+ std::lock_guard<std::mutex> lock(*gCacheMutex);
+ return uhash_count(fHashtable) - fNumValuesInUse;
+}
+
+int64_t UnifiedCache::autoEvictedCount() const {
+ std::lock_guard<std::mutex> lock(*gCacheMutex);
+ return fAutoEvictedCount;
+}
+
+int32_t UnifiedCache::keyCount() const {
+ std::lock_guard<std::mutex> lock(*gCacheMutex);
+ return uhash_count(fHashtable);
+}
+
+void UnifiedCache::flush() const {
+ std::lock_guard<std::mutex> lock(*gCacheMutex);
+
+ // Use a loop in case cache items that are flushed held hard references to
+ // other cache items making those additional cache items eligible for
+ // flushing.
+ while (_flush(FALSE));
+}
+
+void UnifiedCache::handleUnreferencedObject() const {
+ std::lock_guard<std::mutex> lock(*gCacheMutex);
+ --fNumValuesInUse;
+ _runEvictionSlice();
+}
+
+#ifdef UNIFIED_CACHE_DEBUG
+#include <stdio.h>
+
+void UnifiedCache::dump() {
+ UErrorCode status = U_ZERO_ERROR;
+ const UnifiedCache *cache = getInstance(status);
+ if (U_FAILURE(status)) {
+ fprintf(stderr, "Unified Cache: Error fetching cache.\n");
+ return;
+ }
+ cache->dumpContents();
+}
+
+void UnifiedCache::dumpContents() const {
+ std::lock_guard<std::mutex> lock(*gCacheMutex);
+ _dumpContents();
+}
+
+// Dumps content of cache.
+// On entry, gCacheMutex must be held.
+// On exit, cache contents dumped to stderr.
+void UnifiedCache::_dumpContents() const {
+ int32_t pos = UHASH_FIRST;
+ const UHashElement *element = uhash_nextElement(fHashtable, &pos);
+ char buffer[256];
+ int32_t cnt = 0;
+ for (; element != NULL; element = uhash_nextElement(fHashtable, &pos)) {
+ const SharedObject *sharedObject =
+ (const SharedObject *) element->value.pointer;
+ const CacheKeyBase *key =
+ (const CacheKeyBase *) element->key.pointer;
+ if (sharedObject->hasHardReferences()) {
+ ++cnt;
+ fprintf(
+ stderr,
+ "Unified Cache: Key '%s', error %d, value %p, total refcount %d, soft refcount %d\n",
+ key->writeDescription(buffer, 256),
+ key->creationStatus,
+ sharedObject == fNoValue ? NULL :sharedObject,
+ sharedObject->getRefCount(),
+ sharedObject->getSoftRefCount());
+ }
+ }
+ fprintf(stderr, "Unified Cache: %d out of a total of %d still have hard references\n", cnt, uhash_count(fHashtable));
+}
+#endif
+
+UnifiedCache::~UnifiedCache() {
+ // Try our best to clean up first.
+ flush();
+ {
+ // Now all that should be left in the cache are entries that refer to
+ // each other and entries with hard references from outside the cache.
+ // Nothing we can do about these so proceed to wipe out the cache.
+ std::lock_guard<std::mutex> lock(*gCacheMutex);
+ _flush(TRUE);
+ }
+ uhash_close(fHashtable);
+ fHashtable = nullptr;
+ delete fNoValue;
+ fNoValue = nullptr;
+}
+
+const UHashElement *
+UnifiedCache::_nextElement() const {
+ const UHashElement *element = uhash_nextElement(fHashtable, &fEvictPos);
+ if (element == NULL) {
+ fEvictPos = UHASH_FIRST;
+ return uhash_nextElement(fHashtable, &fEvictPos);
+ }
+ return element;
+}
+
+UBool UnifiedCache::_flush(UBool all) const {
+ UBool result = FALSE;
+ int32_t origSize = uhash_count(fHashtable);
+ for (int32_t i = 0; i < origSize; ++i) {
+ const UHashElement *element = _nextElement();
+ if (element == nullptr) {
+ break;
+ }
+ if (all || _isEvictable(element)) {
+ const SharedObject *sharedObject =
+ (const SharedObject *) element->value.pointer;
+ U_ASSERT(sharedObject->cachePtr == this);
+ uhash_removeElement(fHashtable, element);
+ removeSoftRef(sharedObject); // Deletes the sharedObject when softRefCount goes to zero.
+ result = TRUE;
+ }
+ }
+ return result;
+}
+
+int32_t UnifiedCache::_computeCountOfItemsToEvict() const {
+ int32_t totalItems = uhash_count(fHashtable);
+ int32_t evictableItems = totalItems - fNumValuesInUse;
+
+ int32_t unusedLimitByPercentage = fNumValuesInUse * fMaxPercentageOfInUse / 100;
+ int32_t unusedLimit = std::max(unusedLimitByPercentage, fMaxUnused);
+ int32_t countOfItemsToEvict = std::max(0, evictableItems - unusedLimit);
+ return countOfItemsToEvict;
+}
+
+void UnifiedCache::_runEvictionSlice() const {
+ int32_t maxItemsToEvict = _computeCountOfItemsToEvict();
+ if (maxItemsToEvict <= 0) {
+ return;
+ }
+ for (int32_t i = 0; i < MAX_EVICT_ITERATIONS; ++i) {
+ const UHashElement *element = _nextElement();
+ if (element == nullptr) {
+ break;
+ }
+ if (_isEvictable(element)) {
+ const SharedObject *sharedObject =
+ (const SharedObject *) element->value.pointer;
+ uhash_removeElement(fHashtable, element);
+ removeSoftRef(sharedObject); // Deletes sharedObject when SoftRefCount goes to zero.
+ ++fAutoEvictedCount;
+ if (--maxItemsToEvict == 0) {
+ break;
+ }
+ }
+ }
+}
+
+void UnifiedCache::_putNew(
+ const CacheKeyBase &key,
+ const SharedObject *value,
+ const UErrorCode creationStatus,
+ UErrorCode &status) const {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ CacheKeyBase *keyToAdopt = key.clone();
+ if (keyToAdopt == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ keyToAdopt->fCreationStatus = creationStatus;
+ if (value->softRefCount == 0) {
+ _registerPrimary(keyToAdopt, value);
+ }
+ void *oldValue = uhash_put(fHashtable, keyToAdopt, (void *) value, &status);
+ U_ASSERT(oldValue == nullptr);
+ (void)oldValue;
+ if (U_SUCCESS(status)) {
+ value->softRefCount++;
+ }
+}
+
+void UnifiedCache::_putIfAbsentAndGet(
+ const CacheKeyBase &key,
+ const SharedObject *&value,
+ UErrorCode &status) const {
+ std::lock_guard<std::mutex> lock(*gCacheMutex);
+ const UHashElement *element = uhash_find(fHashtable, &key);
+ if (element != NULL && !_inProgress(element)) {
+ _fetch(element, value, status);
+ return;
+ }
+ if (element == NULL) {
+ UErrorCode putError = U_ZERO_ERROR;
+ // best-effort basis only.
+ _putNew(key, value, status, putError);
+ } else {
+ _put(element, value, status);
+ }
+ // Run an eviction slice. This will run even if we added a primary entry
+ // which doesn't increase the unused count, but that is still o.k
+ _runEvictionSlice();
+}
+
+
+UBool UnifiedCache::_poll(
+ const CacheKeyBase &key,
+ const SharedObject *&value,
+ UErrorCode &status) const {
+ U_ASSERT(value == NULL);
+ U_ASSERT(status == U_ZERO_ERROR);
+ std::unique_lock<std::mutex> lock(*gCacheMutex);
+ const UHashElement *element = uhash_find(fHashtable, &key);
+
+ // If the hash table contains an inProgress placeholder entry for this key,
+ // this means that another thread is currently constructing the value object.
+ // Loop, waiting for that construction to complete.
+ while (element != NULL && _inProgress(element)) {
+ gInProgressValueAddedCond->wait(lock);
+ element = uhash_find(fHashtable, &key);
+ }
+
+ // If the hash table contains an entry for the key,
+ // fetch out the contents and return them.
+ if (element != NULL) {
+ _fetch(element, value, status);
+ return TRUE;
+ }
+
+ // The hash table contained nothing for this key.
+ // Insert an inProgress place holder value.
+ // Our caller will create the final value and update the hash table.
+ _putNew(key, fNoValue, U_ZERO_ERROR, status);
+ return FALSE;
+}
+
+void UnifiedCache::_get(
+ const CacheKeyBase &key,
+ const SharedObject *&value,
+ const void *creationContext,
+ UErrorCode &status) const {
+ U_ASSERT(value == NULL);
+ U_ASSERT(status == U_ZERO_ERROR);
+ if (_poll(key, value, status)) {
+ if (value == fNoValue) {
+ SharedObject::clearPtr(value);
+ }
+ return;
+ }
+ if (U_FAILURE(status)) {
+ return;
+ }
+ value = key.createObject(creationContext, status);
+ U_ASSERT(value == NULL || value->hasHardReferences());
+ U_ASSERT(value != NULL || status != U_ZERO_ERROR);
+ if (value == NULL) {
+ SharedObject::copyPtr(fNoValue, value);
+ }
+ _putIfAbsentAndGet(key, value, status);
+ if (value == fNoValue) {
+ SharedObject::clearPtr(value);
+ }
+}
+
+void UnifiedCache::_registerPrimary(
+ const CacheKeyBase *theKey, const SharedObject *value) const {
+ theKey->fIsPrimary = true;
+ value->cachePtr = this;
+ ++fNumValuesTotal;
+ ++fNumValuesInUse;
+}
+
+void UnifiedCache::_put(
+ const UHashElement *element,
+ const SharedObject *value,
+ const UErrorCode status) const {
+ U_ASSERT(_inProgress(element));
+ const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer;
+ const SharedObject *oldValue = (const SharedObject *) element->value.pointer;
+ theKey->fCreationStatus = status;
+ if (value->softRefCount == 0) {
+ _registerPrimary(theKey, value);
+ }
+ value->softRefCount++;
+ UHashElement *ptr = const_cast<UHashElement *>(element);
+ ptr->value.pointer = (void *) value;
+ U_ASSERT(oldValue == fNoValue);
+ removeSoftRef(oldValue);
+
+ // Tell waiting threads that we replace in-progress status with
+ // an error.
+ gInProgressValueAddedCond->notify_all();
+}
+
+void UnifiedCache::_fetch(
+ const UHashElement *element,
+ const SharedObject *&value,
+ UErrorCode &status) const {
+ const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer;
+ status = theKey->fCreationStatus;
+
+ // Since we have the cache lock, calling regular SharedObject add/removeRef
+ // could cause us to deadlock on ourselves since they may need to lock
+ // the cache mutex.
+ removeHardRef(value);
+ value = static_cast<const SharedObject *>(element->value.pointer);
+ addHardRef(value);
+}
+
+
+UBool UnifiedCache::_inProgress(const UHashElement* element) const {
+ UErrorCode status = U_ZERO_ERROR;
+ const SharedObject * value = NULL;
+ _fetch(element, value, status);
+ UBool result = _inProgress(value, status);
+ removeHardRef(value);
+ return result;
+}
+
+UBool UnifiedCache::_inProgress(
+ const SharedObject* theValue, UErrorCode creationStatus) const {
+ return (theValue == fNoValue && creationStatus == U_ZERO_ERROR);
+}
+
+UBool UnifiedCache::_isEvictable(const UHashElement *element) const
+{
+ const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer;
+ const SharedObject *theValue =
+ (const SharedObject *) element->value.pointer;
+
+ // Entries that are under construction are never evictable
+ if (_inProgress(theValue, theKey->fCreationStatus)) {
+ return FALSE;
+ }
+
+ // We can evict entries that are either not a primary or have just
+ // one reference (The one reference being from the cache itself).
+ return (!theKey->fIsPrimary || (theValue->softRefCount == 1 && theValue->noHardReferences()));
+}
+
+void UnifiedCache::removeSoftRef(const SharedObject *value) const {
+ U_ASSERT(value->cachePtr == this);
+ U_ASSERT(value->softRefCount > 0);
+ if (--value->softRefCount == 0) {
+ --fNumValuesTotal;
+ if (value->noHardReferences()) {
+ delete value;
+ } else {
+ // This path only happens from flush(all). Which only happens from the
+ // UnifiedCache destructor. Nulling out value.cacheptr changes the behavior
+ // of value.removeRef(), causing the deletion to be done there.
+ value->cachePtr = nullptr;
+ }
+ }
+}
+
+int32_t UnifiedCache::removeHardRef(const SharedObject *value) const {
+ int refCount = 0;
+ if (value) {
+ refCount = umtx_atomic_dec(&value->hardRefCount);
+ U_ASSERT(refCount >= 0);
+ if (refCount == 0) {
+ --fNumValuesInUse;
+ }
+ }
+ return refCount;
+}
+
+int32_t UnifiedCache::addHardRef(const SharedObject *value) const {
+ int refCount = 0;
+ if (value) {
+ refCount = umtx_atomic_inc(&value->hardRefCount);
+ U_ASSERT(refCount >= 1);
+ if (refCount == 1) {
+ fNumValuesInUse++;
+ }
+ }
+ return refCount;
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/unifiedcache.h b/thirdparty/icu4c/common/unifiedcache.h
new file mode 100644
index 0000000000..a31998db20
--- /dev/null
+++ b/thirdparty/icu4c/common/unifiedcache.h
@@ -0,0 +1,556 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 2015, International Business Machines Corporation and
+* others. All Rights Reserved.
+******************************************************************************
+*
+* File UNIFIEDCACHE.H - The ICU Unified cache.
+******************************************************************************
+*/
+
+#ifndef __UNIFIED_CACHE_H__
+#define __UNIFIED_CACHE_H__
+
+#include "utypeinfo.h" // for 'typeid' to work
+
+#include "unicode/uobject.h"
+#include "unicode/locid.h"
+#include "sharedobject.h"
+#include "unicode/unistr.h"
+#include "cstring.h"
+#include "ustr_imp.h"
+
+struct UHashtable;
+struct UHashElement;
+
+U_NAMESPACE_BEGIN
+
+class UnifiedCache;
+
+/**
+ * A base class for all cache keys.
+ */
+class U_COMMON_API CacheKeyBase : public UObject {
+ public:
+ CacheKeyBase() : fCreationStatus(U_ZERO_ERROR), fIsPrimary(false) {}
+
+ /**
+ * Copy constructor. Needed to support cloning.
+ */
+ CacheKeyBase(const CacheKeyBase &other)
+ : UObject(other), fCreationStatus(other.fCreationStatus), fIsPrimary(false) { }
+ virtual ~CacheKeyBase();
+
+ /**
+ * Returns the hash code for this object.
+ */
+ virtual int32_t hashCode() const = 0;
+
+ /**
+ * Clones this object polymorphically. Caller owns returned value.
+ */
+ virtual CacheKeyBase *clone() const = 0;
+
+ /**
+ * Equality operator.
+ */
+ virtual UBool operator == (const CacheKeyBase &other) const = 0;
+
+ /**
+ * Create a new object for this key. Called by cache on cache miss.
+ * createObject must add a reference to the object it returns. Note
+ * that getting an object from the cache and returning it without calling
+ * removeRef on it satisfies this requirement. It can also return NULL
+ * and set status to an error.
+ *
+ * @param creationContext the context in which the object is being
+ * created. May be NULL.
+ * @param status Implementations can return a failure here.
+ * In addition, implementations may return a
+ * non NULL object and set a warning status.
+ */
+ virtual const SharedObject *createObject(
+ const void *creationContext, UErrorCode &status) const = 0;
+
+ /**
+ * Writes a description of this key to buffer and returns buffer. Written
+ * description is NULL terminated.
+ */
+ virtual char *writeDescription(char *buffer, int32_t bufSize) const = 0;
+
+ /**
+ * Inequality operator.
+ */
+ UBool operator != (const CacheKeyBase &other) const {
+ return !(*this == other);
+ }
+ private:
+ mutable UErrorCode fCreationStatus;
+ mutable UBool fIsPrimary;
+ friend class UnifiedCache;
+};
+
+
+
+/**
+ * Templated version of CacheKeyBase.
+ * A key of type LocaleCacheKey<T> maps to a value of type T.
+ */
+template<typename T>
+class CacheKey : public CacheKeyBase {
+ public:
+ virtual ~CacheKey() { }
+ /**
+ * The template parameter, T, determines the hash code returned.
+ */
+ virtual int32_t hashCode() const {
+ const char *s = typeid(T).name();
+ return ustr_hashCharsN(s, static_cast<int32_t>(uprv_strlen(s)));
+ }
+
+ /**
+ * Use the value type, T, as the description.
+ */
+ virtual char *writeDescription(char *buffer, int32_t bufLen) const {
+ const char *s = typeid(T).name();
+ uprv_strncpy(buffer, s, bufLen);
+ buffer[bufLen - 1] = 0;
+ return buffer;
+ }
+
+ /**
+ * Two objects are equal if they are of the same type.
+ */
+ virtual UBool operator == (const CacheKeyBase &other) const {
+ return typeid(*this) == typeid(other);
+ }
+};
+
+/**
+ * Cache key based on locale.
+ * A key of type LocaleCacheKey<T> maps to a value of type T.
+ */
+template<typename T>
+class LocaleCacheKey : public CacheKey<T> {
+ protected:
+ Locale fLoc;
+ public:
+ LocaleCacheKey(const Locale &loc) : fLoc(loc) {}
+ LocaleCacheKey(const LocaleCacheKey<T> &other)
+ : CacheKey<T>(other), fLoc(other.fLoc) { }
+ virtual ~LocaleCacheKey() { }
+ virtual int32_t hashCode() const {
+ return (int32_t)(37u * (uint32_t)CacheKey<T>::hashCode() + (uint32_t)fLoc.hashCode());
+ }
+ virtual UBool operator == (const CacheKeyBase &other) const {
+ // reflexive
+ if (this == &other) {
+ return true;
+ }
+ if (!CacheKey<T>::operator == (other)) {
+ return false;
+ }
+ // We know this and other are of same class because operator== on
+ // CacheKey returned true.
+ const LocaleCacheKey<T> *fOther =
+ static_cast<const LocaleCacheKey<T> *>(&other);
+ return fLoc == fOther->fLoc;
+ }
+ virtual CacheKeyBase *clone() const {
+ return new LocaleCacheKey<T>(*this);
+ }
+ virtual const T *createObject(
+ const void *creationContext, UErrorCode &status) const;
+ /**
+ * Use the locale id as the description.
+ */
+ virtual char *writeDescription(char *buffer, int32_t bufLen) const {
+ const char *s = fLoc.getName();
+ uprv_strncpy(buffer, s, bufLen);
+ buffer[bufLen - 1] = 0;
+ return buffer;
+ }
+
+};
+
+/**
+ * The unified cache. A singleton type.
+ * Design doc here:
+ * https://docs.google.com/document/d/1RwGQJs4N4tawNbf809iYDRCvXoMKqDJihxzYt1ysmd8/edit?usp=sharing
+ */
+class U_COMMON_API UnifiedCache : public UnifiedCacheBase {
+ public:
+ /**
+ * @internal
+ * Do not call directly. Instead use UnifiedCache::getInstance() as
+ * there should be only one UnifiedCache in an application.
+ */
+ UnifiedCache(UErrorCode &status);
+
+ /**
+ * Return a pointer to the global cache instance.
+ */
+ static UnifiedCache *getInstance(UErrorCode &status);
+
+ /**
+ * Fetches a value from the cache by key. Equivalent to
+ * get(key, NULL, ptr, status);
+ */
+ template<typename T>
+ void get(
+ const CacheKey<T>& key,
+ const T *&ptr,
+ UErrorCode &status) const {
+ get(key, NULL, ptr, status);
+ }
+
+ /**
+ * Fetches value from the cache by key.
+ *
+ * @param key the cache key.
+ * @param creationContext passed verbatim to createObject method of key
+ * @param ptr On entry, ptr must be NULL or be included if
+ * the reference count of the object it points
+ * to. On exit, ptr points to the fetched object
+ * from the cache or is left unchanged on
+ * failure. Caller must call removeRef on ptr
+ * if set to a non NULL value.
+ * @param status Any error returned here. May be set to a
+ * warning value even if ptr is set.
+ */
+ template<typename T>
+ void get(
+ const CacheKey<T>& key,
+ const void *creationContext,
+ const T *&ptr,
+ UErrorCode &status) const {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ UErrorCode creationStatus = U_ZERO_ERROR;
+ const SharedObject *value = NULL;
+ _get(key, value, creationContext, creationStatus);
+ const T *tvalue = (const T *) value;
+ if (U_SUCCESS(creationStatus)) {
+ SharedObject::copyPtr(tvalue, ptr);
+ }
+ SharedObject::clearPtr(tvalue);
+ // Take care not to overwrite a warning status passed in with
+ // another warning or U_ZERO_ERROR.
+ if (status == U_ZERO_ERROR || U_FAILURE(creationStatus)) {
+ status = creationStatus;
+ }
+ }
+
+#ifdef UNIFIED_CACHE_DEBUG
+ /**
+ * Dumps the contents of this cache to standard error. Used for testing of
+ * cache only.
+ */
+ void dumpContents() const;
+#endif
+
+ /**
+ * Convenience method to get a value of type T from cache for a
+ * particular locale with creationContext == NULL.
+ * @param loc the locale
+ * @param ptr On entry, must be NULL or included in the ref count
+ * of the object to which it points.
+ * On exit, fetched value stored here or is left
+ * unchanged on failure. Caller must call removeRef on
+ * ptr if set to a non NULL value.
+ * @param status Any error returned here. May be set to a
+ * warning value even if ptr is set.
+ */
+ template<typename T>
+ static void getByLocale(
+ const Locale &loc, const T *&ptr, UErrorCode &status) {
+ const UnifiedCache *cache = getInstance(status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+ cache->get(LocaleCacheKey<T>(loc), ptr, status);
+ }
+
+#ifdef UNIFIED_CACHE_DEBUG
+ /**
+ * Dumps the cache contents to stderr. For testing only.
+ */
+ static void dump();
+#endif
+
+ /**
+ * Returns the number of keys in this cache. For testing only.
+ */
+ int32_t keyCount() const;
+
+ /**
+ * Removes any values from cache that are not referenced outside
+ * the cache.
+ */
+ void flush() const;
+
+ /**
+ * Configures at what point evcition of unused entries will begin.
+ * Eviction is triggered whenever the number of evictable keys exeeds
+ * BOTH count AND (number of in-use items) * (percentageOfInUseItems / 100).
+ * Once the number of unused entries drops below one of these,
+ * eviction ceases. Because eviction happens incrementally,
+ * the actual unused entry count may exceed both these numbers
+ * from time to time.
+ *
+ * A cache entry is defined as unused if it is not essential to guarantee
+ * that for a given key X, the cache returns the same reference to the
+ * same value as long as the client already holds a reference to that
+ * value.
+ *
+ * If this method is never called, the default settings are 1000 and 100%.
+ *
+ * Although this method is thread-safe, it is designed to be called at
+ * application startup. If it is called in the middle of execution, it
+ * will have no immediate effect on the cache. However over time, the
+ * cache will perform eviction slices in an attempt to honor the new
+ * settings.
+ *
+ * If a client already holds references to many different unique values
+ * in the cache such that the number of those unique values far exeeds
+ * "count" then the cache may not be able to maintain this maximum.
+ * However, if this happens, the cache still guarantees that the number of
+ * unused entries will remain only a small percentage of the total cache
+ * size.
+ *
+ * If the parameters passed are negative, setEvctionPolicy sets status to
+ * U_ILLEGAL_ARGUMENT_ERROR.
+ */
+ void setEvictionPolicy(
+ int32_t count, int32_t percentageOfInUseItems, UErrorCode &status);
+
+
+ /**
+ * Returns how many entries have been auto evicted during the lifetime
+ * of this cache. This only includes auto evicted entries, not
+ * entries evicted because of a call to flush().
+ */
+ int64_t autoEvictedCount() const;
+
+ /**
+ * Returns the unused entry count in this cache. For testing only,
+ * Regular clients will not need this.
+ */
+ int32_t unusedCount() const;
+
+ virtual void handleUnreferencedObject() const;
+ virtual ~UnifiedCache();
+
+ private:
+ UHashtable *fHashtable;
+ mutable int32_t fEvictPos;
+ mutable int32_t fNumValuesTotal;
+ mutable int32_t fNumValuesInUse;
+ int32_t fMaxUnused;
+ int32_t fMaxPercentageOfInUse;
+ mutable int64_t fAutoEvictedCount;
+ SharedObject *fNoValue;
+
+ UnifiedCache(const UnifiedCache &other);
+ UnifiedCache &operator=(const UnifiedCache &other);
+
+ /**
+ * Flushes the contents of the cache. If cache values hold references to other
+ * cache values then _flush should be called in a loop until it returns false.
+ *
+ * On entry, gCacheMutex must be held.
+ * On exit, those values with are evictable are flushed.
+ *
+ * @param all if false flush evictable items only, which are those with no external
+ * references, plus those that can be safely recreated.<br>
+ * if true, flush all elements. Any values (sharedObjects) with remaining
+ * hard (external) references are not deleted, but are detached from
+ * the cache, so that a subsequent removeRefs can delete them.
+ * _flush is not thread safe when all is true.
+ * @return true if any value in cache was flushed or false otherwise.
+ */
+ UBool _flush(UBool all) const;
+
+ /**
+ * Gets value out of cache.
+ * On entry. gCacheMutex must not be held. value must be NULL. status
+ * must be U_ZERO_ERROR.
+ * On exit. value and status set to what is in cache at key or on cache
+ * miss the key's createObject() is called and value and status are set to
+ * the result of that. In this latter case, best effort is made to add the
+ * value and status to the cache. If createObject() fails to create a value,
+ * fNoValue is stored in cache, and value is set to NULL. Caller must call
+ * removeRef on value if non NULL.
+ */
+ void _get(
+ const CacheKeyBase &key,
+ const SharedObject *&value,
+ const void *creationContext,
+ UErrorCode &status) const;
+
+ /**
+ * Attempts to fetch value and status for key from cache.
+ * On entry, gCacheMutex must not be held value must be NULL and status must
+ * be U_ZERO_ERROR.
+ * On exit, either returns false (In this
+ * case caller should try to create the object) or returns true with value
+ * pointing to the fetched value and status set to fetched status. When
+ * false is returned status may be set to failure if an in progress hash
+ * entry could not be made but value will remain unchanged. When true is
+ * returned, caller must call removeRef() on value.
+ */
+ UBool _poll(
+ const CacheKeyBase &key,
+ const SharedObject *&value,
+ UErrorCode &status) const;
+
+ /**
+ * Places a new value and creationStatus in the cache for the given key.
+ * On entry, gCacheMutex must be held. key must not exist in the cache.
+ * On exit, value and creation status placed under key. Soft reference added
+ * to value on successful add. On error sets status.
+ */
+ void _putNew(
+ const CacheKeyBase &key,
+ const SharedObject *value,
+ const UErrorCode creationStatus,
+ UErrorCode &status) const;
+
+ /**
+ * Places value and status at key if there is no value at key or if cache
+ * entry for key is in progress. Otherwise, it leaves the current value and
+ * status there.
+ *
+ * On entry. gCacheMutex must not be held. Value must be
+ * included in the reference count of the object to which it points.
+ *
+ * On exit, value and status are changed to what was already in the cache if
+ * something was there and not in progress. Otherwise, value and status are left
+ * unchanged in which case they are placed in the cache on a best-effort basis.
+ * Caller must call removeRef() on value.
+ */
+ void _putIfAbsentAndGet(
+ const CacheKeyBase &key,
+ const SharedObject *&value,
+ UErrorCode &status) const;
+
+ /**
+ * Returns the next element in the cache round robin style.
+ * Returns nullptr if the cache is empty.
+ * On entry, gCacheMutex must be held.
+ */
+ const UHashElement *_nextElement() const;
+
+ /**
+ * Return the number of cache items that would need to be evicted
+ * to bring usage into conformance with eviction policy.
+ *
+ * An item corresponds to an entry in the hash table, a hash table element.
+ *
+ * On entry, gCacheMutex must be held.
+ */
+ int32_t _computeCountOfItemsToEvict() const;
+
+ /**
+ * Run an eviction slice.
+ * On entry, gCacheMutex must be held.
+ * _runEvictionSlice runs a slice of the evict pipeline by examining the next
+ * 10 entries in the cache round robin style evicting them if they are eligible.
+ */
+ void _runEvictionSlice() const;
+
+ /**
+ * Register a primary cache entry. A primary key is the first key to create
+ * a given SharedObject value. Subsequent keys whose create function
+ * produce referneces to an already existing SharedObject are not primary -
+ * they can be evicted and subsequently recreated.
+ *
+ * On entry, gCacheMutex must be held.
+ * On exit, items in use count incremented, entry is marked as a primary
+ * entry, and value registered with cache so that subsequent calls to
+ * addRef() and removeRef() on it correctly interact with the cache.
+ */
+ void _registerPrimary(const CacheKeyBase *theKey, const SharedObject *value) const;
+
+ /**
+ * Store a value and creation error status in given hash entry.
+ * On entry, gCacheMutex must be held. Hash entry element must be in progress.
+ * value must be non NULL.
+ * On Exit, soft reference added to value. value and status stored in hash
+ * entry. Soft reference removed from previous stored value. Waiting
+ * threads notified.
+ */
+ void _put(
+ const UHashElement *element,
+ const SharedObject *value,
+ const UErrorCode status) const;
+ /**
+ * Remove a soft reference, and delete the SharedObject if no references remain.
+ * To be used from within the UnifiedCache implementation only.
+ * gCacheMutex must be held by caller.
+ * @param value the SharedObject to be acted on.
+ */
+ void removeSoftRef(const SharedObject *value) const;
+
+ /**
+ * Increment the hard reference count of the given SharedObject.
+ * gCacheMutex must be held by the caller.
+ * Update numValuesEvictable on transitions between zero and one reference.
+ *
+ * @param value The SharedObject to be referenced.
+ * @return the hard reference count after the addition.
+ */
+ int32_t addHardRef(const SharedObject *value) const;
+
+ /**
+ * Decrement the hard reference count of the given SharedObject.
+ * gCacheMutex must be held by the caller.
+ * Update numValuesEvictable on transitions between one and zero reference.
+ *
+ * @param value The SharedObject to be referenced.
+ * @return the hard reference count after the removal.
+ */
+ int32_t removeHardRef(const SharedObject *value) const;
+
+
+#ifdef UNIFIED_CACHE_DEBUG
+ void _dumpContents() const;
+#endif
+
+ /**
+ * Fetch value and error code from a particular hash entry.
+ * On entry, gCacheMutex must be held. value must be either NULL or must be
+ * included in the ref count of the object to which it points.
+ * On exit, value and status set to what is in the hash entry. Caller must
+ * eventually call removeRef on value.
+ * If hash entry is in progress, value will be set to gNoValue and status will
+ * be set to U_ZERO_ERROR.
+ */
+ void _fetch(const UHashElement *element, const SharedObject *&value,
+ UErrorCode &status) const;
+
+ /**
+ * Determine if given hash entry is in progress.
+ * On entry, gCacheMutex must be held.
+ */
+ UBool _inProgress(const UHashElement *element) const;
+
+ /**
+ * Determine if given hash entry is in progress.
+ * On entry, gCacheMutex must be held.
+ */
+ UBool _inProgress(const SharedObject *theValue, UErrorCode creationStatus) const;
+
+ /**
+ * Determine if given hash entry is eligible for eviction.
+ * On entry, gCacheMutex must be held.
+ */
+ UBool _isEvictable(const UHashElement *element) const;
+};
+
+U_NAMESPACE_END
+
+#endif
diff --git a/thirdparty/icu4c/common/unifilt.cpp b/thirdparty/icu4c/common/unifilt.cpp
new file mode 100644
index 0000000000..4ab0d9b5f9
--- /dev/null
+++ b/thirdparty/icu4c/common/unifilt.cpp
@@ -0,0 +1,71 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2001-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Date Name Description
+* 07/18/01 aliu Creation.
+**********************************************************************
+*/
+
+#include "unicode/unifilt.h"
+#include "unicode/rep.h"
+#include "unicode/utf16.h"
+
+U_NAMESPACE_BEGIN
+UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(UnicodeFilter)
+
+
+/* Define this here due to the lack of another file.
+ It can't be defined in the header */
+UnicodeMatcher::~UnicodeMatcher() {}
+
+UnicodeFilter::~UnicodeFilter() {}
+
+/**
+ * UnicodeFunctor API.
+ * Note that UnicodeMatcher is a base class of UnicodeFilter.
+ */
+UnicodeMatcher* UnicodeFilter::toMatcher() const {
+ return const_cast<UnicodeFilter *>(this);
+}
+
+void UnicodeFilter::setData(const TransliterationRuleData*) {}
+
+/**
+ * Default implementation of UnicodeMatcher::matches() for Unicode
+ * filters. Matches a single code point at offset (either one or
+ * two 16-bit code units).
+ */
+UMatchDegree UnicodeFilter::matches(const Replaceable& text,
+ int32_t& offset,
+ int32_t limit,
+ UBool incremental) {
+ UChar32 c;
+ if (offset < limit &&
+ contains(c = text.char32At(offset))) {
+ offset += U16_LENGTH(c);
+ return U_MATCH;
+ }
+ if (offset > limit &&
+ contains(c = text.char32At(offset))) {
+ // Backup offset by 1, unless the preceding character is a
+ // surrogate pair -- then backup by 2 (keep offset pointing at
+ // the lead surrogate).
+ --offset;
+ if (offset >= 0) {
+ offset -= U16_LENGTH(text.char32At(offset)) - 1;
+ }
+ return U_MATCH;
+ }
+ if (incremental && offset == limit) {
+ return U_PARTIAL_MATCH;
+ }
+ return U_MISMATCH;
+}
+
+U_NAMESPACE_END
+
+//eof
diff --git a/thirdparty/icu4c/common/unifunct.cpp b/thirdparty/icu4c/common/unifunct.cpp
new file mode 100644
index 0000000000..f3995b298d
--- /dev/null
+++ b/thirdparty/icu4c/common/unifunct.cpp
@@ -0,0 +1,28 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2002-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+
+#include "unicode/unifunct.h"
+
+U_NAMESPACE_BEGIN
+
+UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(UnicodeFunctor)
+
+UnicodeFunctor::~UnicodeFunctor() {}
+
+UnicodeMatcher* UnicodeFunctor::toMatcher() const {
+ return 0;
+}
+
+UnicodeReplacer* UnicodeFunctor::toReplacer() const {
+ return 0;
+}
+
+U_NAMESPACE_END
+
+//eof
diff --git a/thirdparty/icu4c/common/uniquecharstr.h b/thirdparty/icu4c/common/uniquecharstr.h
new file mode 100644
index 0000000000..10cc924f7f
--- /dev/null
+++ b/thirdparty/icu4c/common/uniquecharstr.h
@@ -0,0 +1,98 @@
+// © 2020 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// uniquecharstr.h
+// created: 2020sep01 Frank Yung-Fong Tang
+
+#ifndef __UNIQUECHARSTR_H__
+#define __UNIQUECHARSTR_H__
+
+#include "charstr.h"
+#include "uassert.h"
+#include "uhash.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Stores NUL-terminated strings with duplicate elimination.
+ * Checks for unique UTF-16 string pointers and converts to invariant characters.
+ *
+ * Intended to be stack-allocated. Add strings, get a unique number for each,
+ * freeze the object, get a char * pointer for each string,
+ * call orphanCharStrings() to capture the string storage, and let this object go out of scope.
+ */
+class UniqueCharStrings {
+public:
+ UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) {
+ // Note: We hash on string contents but store stable char16_t * pointers.
+ // If the strings are stored in resource bundles which should be built with
+ // duplicate elimination, then we should be able to hash on just the pointer values.
+ uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ strings = new CharString();
+ if (strings == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ }
+ }
+ ~UniqueCharStrings() {
+ uhash_close(&map);
+ delete strings;
+ }
+
+ /** Returns/orphans the CharString that contains all strings. */
+ CharString *orphanCharStrings() {
+ CharString *result = strings;
+ strings = nullptr;
+ return result;
+ }
+
+ /**
+ * Adds a string and returns a unique number for it.
+ * The string's buffer contents must not change, nor move around in memory,
+ * while this UniqueCharStrings is in use.
+ * The string contents must be NUL-terminated exactly at s.length().
+ *
+ * Best used with read-only-alias UnicodeString objects that point to
+ * stable storage, such as strings returned by resource bundle functions.
+ */
+ int32_t add(const UnicodeString &s, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return 0; }
+ if (isFrozen) {
+ errorCode = U_NO_WRITE_PERMISSION;
+ return 0;
+ }
+ // The string points into the resource bundle.
+ const char16_t *p = s.getBuffer();
+ int32_t oldIndex = uhash_geti(&map, p);
+ if (oldIndex != 0) { // found duplicate
+ return oldIndex;
+ }
+ // Explicit NUL terminator for the previous string.
+ // The strings object is also terminated with one implicit NUL.
+ strings->append(0, errorCode);
+ int32_t newIndex = strings->length();
+ strings->appendInvariantChars(s, errorCode);
+ uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode);
+ return newIndex;
+ }
+
+ void freeze() { isFrozen = true; }
+
+ /**
+ * Returns a string pointer for its unique number, if this object is frozen.
+ * Otherwise nullptr.
+ */
+ const char *get(int32_t i) const {
+ U_ASSERT(isFrozen);
+ return isFrozen && i > 0 ? strings->data() + i : nullptr;
+ }
+
+private:
+ UHashtable map;
+ CharString *strings;
+ bool isFrozen = false;
+};
+
+U_NAMESPACE_END
+
+#endif // __UNIQUECHARSTR_H__
diff --git a/thirdparty/icu4c/common/uniset.cpp b/thirdparty/icu4c/common/uniset.cpp
new file mode 100644
index 0000000000..b73d612f24
--- /dev/null
+++ b/thirdparty/icu4c/common/uniset.cpp
@@ -0,0 +1,2356 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1999-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Date Name Description
+* 10/20/99 alan Creation.
+**********************************************************************
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/parsepos.h"
+#include "unicode/symtable.h"
+#include "unicode/uniset.h"
+#include "unicode/ustring.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+#include "ruleiter.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "patternprops.h"
+#include "uelement.h"
+#include "util.h"
+#include "uvector.h"
+#include "charstr.h"
+#include "ustrfmt.h"
+#include "uassert.h"
+#include "bmpset.h"
+#include "unisetspan.h"
+
+// Define UChar constants using hex for EBCDIC compatibility
+// Used #define to reduce private static exports and memory access time.
+#define SET_OPEN ((UChar)0x005B) /*[*/
+#define SET_CLOSE ((UChar)0x005D) /*]*/
+#define HYPHEN ((UChar)0x002D) /*-*/
+#define COMPLEMENT ((UChar)0x005E) /*^*/
+#define COLON ((UChar)0x003A) /*:*/
+#define BACKSLASH ((UChar)0x005C) /*\*/
+#define INTERSECTION ((UChar)0x0026) /*&*/
+#define UPPER_U ((UChar)0x0055) /*U*/
+#define LOWER_U ((UChar)0x0075) /*u*/
+#define OPEN_BRACE ((UChar)123) /*{*/
+#define CLOSE_BRACE ((UChar)125) /*}*/
+#define UPPER_P ((UChar)0x0050) /*P*/
+#define LOWER_P ((UChar)0x0070) /*p*/
+#define UPPER_N ((UChar)78) /*N*/
+#define EQUALS ((UChar)0x003D) /*=*/
+
+// HIGH_VALUE > all valid values. 110000 for codepoints
+#define UNICODESET_HIGH 0x0110000
+
+// LOW <= all valid values. ZERO for codepoints
+#define UNICODESET_LOW 0x000000
+
+/** Max list [0, 1, 2, ..., max code point, HIGH] */
+constexpr int32_t MAX_LENGTH = UNICODESET_HIGH + 1;
+
+U_NAMESPACE_BEGIN
+
+SymbolTable::~SymbolTable() {}
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeSet)
+
+/**
+ * Modify the given UChar32 variable so that it is in range, by
+ * pinning values < UNICODESET_LOW to UNICODESET_LOW, and
+ * pinning values > UNICODESET_HIGH-1 to UNICODESET_HIGH-1.
+ * It modifies its argument in-place and also returns it.
+ */
+static inline UChar32 pinCodePoint(UChar32& c) {
+ if (c < UNICODESET_LOW) {
+ c = UNICODESET_LOW;
+ } else if (c > (UNICODESET_HIGH-1)) {
+ c = (UNICODESET_HIGH-1);
+ }
+ return c;
+}
+
+//----------------------------------------------------------------
+// Debugging
+//----------------------------------------------------------------
+
+// DO NOT DELETE THIS CODE. This code is used to debug memory leaks.
+// To enable the debugging, define the symbol DEBUG_MEM in the line
+// below. This will result in text being sent to stdout that looks
+// like this:
+// DEBUG UnicodeSet: ct 0x00A39B20; 397 [\u0A81-\u0A83\u0A85-
+// DEBUG UnicodeSet: dt 0x00A39B20; 396 [\u0A81-\u0A83\u0A85-
+// Each line lists a construction (ct) or destruction (dt) event, the
+// object address, the number of outstanding objects after the event,
+// and the pattern of the object in question.
+
+// #define DEBUG_MEM
+
+#ifdef DEBUG_MEM
+#include <stdio.h>
+static int32_t _dbgCount = 0;
+
+static inline void _dbgct(UnicodeSet* set) {
+ UnicodeString str;
+ set->toPattern(str, TRUE);
+ char buf[40];
+ str.extract(0, 39, buf, "");
+ printf("DEBUG UnicodeSet: ct 0x%08X; %d %s\n", set, ++_dbgCount, buf);
+}
+
+static inline void _dbgdt(UnicodeSet* set) {
+ UnicodeString str;
+ set->toPattern(str, TRUE);
+ char buf[40];
+ str.extract(0, 39, buf, "");
+ printf("DEBUG UnicodeSet: dt 0x%08X; %d %s\n", set, --_dbgCount, buf);
+}
+
+#else
+
+#define _dbgct(set)
+#define _dbgdt(set)
+
+#endif
+
+//----------------------------------------------------------------
+// UnicodeString in UVector support
+//----------------------------------------------------------------
+
+static void U_CALLCONV cloneUnicodeString(UElement *dst, UElement *src) {
+ dst->pointer = new UnicodeString(*(UnicodeString*)src->pointer);
+}
+
+static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
+ const UnicodeString &a = *(const UnicodeString*)t1.pointer;
+ const UnicodeString &b = *(const UnicodeString*)t2.pointer;
+ return a.compare(b);
+}
+
+UBool UnicodeSet::hasStrings() const {
+ return strings != nullptr && !strings->isEmpty();
+}
+
+int32_t UnicodeSet::stringsSize() const {
+ return strings == nullptr ? 0 : strings->size();
+}
+
+UBool UnicodeSet::stringsContains(const UnicodeString &s) const {
+ return strings != nullptr && strings->contains((void*) &s);
+}
+
+//----------------------------------------------------------------
+// Constructors &c
+//----------------------------------------------------------------
+
+/**
+ * Constructs an empty set.
+ */
+UnicodeSet::UnicodeSet() {
+ list[0] = UNICODESET_HIGH;
+ _dbgct(this);
+}
+
+/**
+ * Constructs a set containing the given range. If <code>end >
+ * start</code> then an empty set is created.
+ *
+ * @param start first character, inclusive, of range
+ * @param end last character, inclusive, of range
+ */
+UnicodeSet::UnicodeSet(UChar32 start, UChar32 end) {
+ list[0] = UNICODESET_HIGH;
+ add(start, end);
+ _dbgct(this);
+}
+
+/**
+ * Constructs a set that is identical to the given UnicodeSet.
+ */
+UnicodeSet::UnicodeSet(const UnicodeSet& o) : UnicodeFilter(o) {
+ *this = o;
+ _dbgct(this);
+}
+
+// Copy-construct as thawed.
+UnicodeSet::UnicodeSet(const UnicodeSet& o, UBool /* asThawed */) : UnicodeFilter(o) {
+ if (ensureCapacity(o.len)) {
+ // *this = o except for bmpSet and stringSpan
+ len = o.len;
+ uprv_memcpy(list, o.list, (size_t)len*sizeof(UChar32));
+ if (o.hasStrings()) {
+ UErrorCode status = U_ZERO_ERROR;
+ if (!allocateStrings(status) ||
+ (strings->assign(*o.strings, cloneUnicodeString, status), U_FAILURE(status))) {
+ setToBogus();
+ return;
+ }
+ }
+ if (o.pat) {
+ setPattern(o.pat, o.patLen);
+ }
+ _dbgct(this);
+ }
+}
+
+/**
+ * Destructs the set.
+ */
+UnicodeSet::~UnicodeSet() {
+ _dbgdt(this); // first!
+ if (list != stackList) {
+ uprv_free(list);
+ }
+ delete bmpSet;
+ if (buffer != stackList) {
+ uprv_free(buffer);
+ }
+ delete strings;
+ delete stringSpan;
+ releasePattern();
+}
+
+/**
+ * Assigns this object to be a copy of another.
+ */
+UnicodeSet& UnicodeSet::operator=(const UnicodeSet& o) {
+ return copyFrom(o, FALSE);
+}
+
+UnicodeSet& UnicodeSet::copyFrom(const UnicodeSet& o, UBool asThawed) {
+ if (this == &o) {
+ return *this;
+ }
+ if (isFrozen()) {
+ return *this;
+ }
+ if (o.isBogus()) {
+ setToBogus();
+ return *this;
+ }
+ if (!ensureCapacity(o.len)) {
+ // ensureCapacity will mark the UnicodeSet as Bogus if OOM failure happens.
+ return *this;
+ }
+ len = o.len;
+ uprv_memcpy(list, o.list, (size_t)len*sizeof(UChar32));
+ if (o.bmpSet != nullptr && !asThawed) {
+ bmpSet = new BMPSet(*o.bmpSet, list, len);
+ if (bmpSet == NULL) { // Check for memory allocation error.
+ setToBogus();
+ return *this;
+ }
+ }
+ if (o.hasStrings()) {
+ UErrorCode status = U_ZERO_ERROR;
+ if ((strings == nullptr && !allocateStrings(status)) ||
+ (strings->assign(*o.strings, cloneUnicodeString, status), U_FAILURE(status))) {
+ setToBogus();
+ return *this;
+ }
+ } else if (hasStrings()) {
+ strings->removeAllElements();
+ }
+ if (o.stringSpan != nullptr && !asThawed) {
+ stringSpan = new UnicodeSetStringSpan(*o.stringSpan, *strings);
+ if (stringSpan == NULL) { // Check for memory allocation error.
+ setToBogus();
+ return *this;
+ }
+ }
+ releasePattern();
+ if (o.pat) {
+ setPattern(o.pat, o.patLen);
+ }
+ return *this;
+}
+
+/**
+ * Returns a copy of this object. All UnicodeMatcher objects have
+ * to support cloning in order to allow classes using
+ * UnicodeMatchers, such as Transliterator, to implement cloning.
+ */
+UnicodeSet* UnicodeSet::clone() const {
+ return new UnicodeSet(*this);
+}
+
+UnicodeSet *UnicodeSet::cloneAsThawed() const {
+ return new UnicodeSet(*this, TRUE);
+}
+
+/**
+ * Compares the specified object with this set for equality. Returns
+ * <tt>true</tt> if the two sets
+ * have the same size, and every member of the specified set is
+ * contained in this set (or equivalently, every member of this set is
+ * contained in the specified set).
+ *
+ * @param o set to be compared for equality with this set.
+ * @return <tt>true</tt> if the specified set is equal to this set.
+ */
+UBool UnicodeSet::operator==(const UnicodeSet& o) const {
+ if (len != o.len) return FALSE;
+ for (int32_t i = 0; i < len; ++i) {
+ if (list[i] != o.list[i]) return FALSE;
+ }
+ if (hasStrings() != o.hasStrings()) { return FALSE; }
+ if (hasStrings() && *strings != *o.strings) return FALSE;
+ return TRUE;
+}
+
+/**
+ * Returns the hash code value for this set.
+ *
+ * @return the hash code value for this set.
+ * @see Object#hashCode()
+ */
+int32_t UnicodeSet::hashCode(void) const {
+ uint32_t result = static_cast<uint32_t>(len);
+ for (int32_t i = 0; i < len; ++i) {
+ result *= 1000003u;
+ result += list[i];
+ }
+ return static_cast<int32_t>(result);
+}
+
+//----------------------------------------------------------------
+// Public API
+//----------------------------------------------------------------
+
+/**
+ * Returns the number of elements in this set (its cardinality),
+ * Note than the elements of a set may include both individual
+ * codepoints and strings.
+ *
+ * @return the number of elements in this set (its cardinality).
+ */
+int32_t UnicodeSet::size(void) const {
+ int32_t n = 0;
+ int32_t count = getRangeCount();
+ for (int32_t i = 0; i < count; ++i) {
+ n += getRangeEnd(i) - getRangeStart(i) + 1;
+ }
+ return n + stringsSize();
+}
+
+/**
+ * Returns <tt>true</tt> if this set contains no elements.
+ *
+ * @return <tt>true</tt> if this set contains no elements.
+ */
+UBool UnicodeSet::isEmpty(void) const {
+ return len == 1 && !hasStrings();
+}
+
+/**
+ * Returns true if this set contains the given character.
+ * @param c character to be checked for containment
+ * @return true if the test condition is met
+ */
+UBool UnicodeSet::contains(UChar32 c) const {
+ // Set i to the index of the start item greater than ch
+ // We know we will terminate without length test!
+ // LATER: for large sets, add binary search
+ //int32_t i = -1;
+ //for (;;) {
+ // if (c < list[++i]) break;
+ //}
+ if (bmpSet != NULL) {
+ return bmpSet->contains(c);
+ }
+ if (stringSpan != NULL) {
+ return stringSpan->contains(c);
+ }
+ if (c >= UNICODESET_HIGH) { // Don't need to check LOW bound
+ return FALSE;
+ }
+ int32_t i = findCodePoint(c);
+ return (UBool)(i & 1); // return true if odd
+}
+
+/**
+ * Returns the smallest value i such that c < list[i]. Caller
+ * must ensure that c is a legal value or this method will enter
+ * an infinite loop. This method performs a binary search.
+ * @param c a character in the range MIN_VALUE..MAX_VALUE
+ * inclusive
+ * @return the smallest integer i in the range 0..len-1,
+ * inclusive, such that c < list[i]
+ */
+int32_t UnicodeSet::findCodePoint(UChar32 c) const {
+ /* Examples:
+ findCodePoint(c)
+ set list[] c=0 1 3 4 7 8
+ === ============== ===========
+ [] [110000] 0 0 0 0 0 0
+ [\u0000-\u0003] [0, 4, 110000] 1 1 1 2 2 2
+ [\u0004-\u0007] [4, 8, 110000] 0 0 0 1 1 2
+ [:Any:] [0, 110000] 1 1 1 1 1 1
+ */
+
+ // Return the smallest i such that c < list[i]. Assume
+ // list[len - 1] == HIGH and that c is legal (0..HIGH-1).
+ if (c < list[0])
+ return 0;
+ // High runner test. c is often after the last range, so an
+ // initial check for this condition pays off.
+ int32_t lo = 0;
+ int32_t hi = len - 1;
+ if (lo >= hi || c >= list[hi-1])
+ return hi;
+ // invariant: c >= list[lo]
+ // invariant: c < list[hi]
+ for (;;) {
+ int32_t i = (lo + hi) >> 1;
+ if (i == lo) {
+ break; // Found!
+ } else if (c < list[i]) {
+ hi = i;
+ } else {
+ lo = i;
+ }
+ }
+ return hi;
+}
+
+/**
+ * Returns true if this set contains every character
+ * of the given range.
+ * @param start first character, inclusive, of the range
+ * @param end last character, inclusive, of the range
+ * @return true if the test condition is met
+ */
+UBool UnicodeSet::contains(UChar32 start, UChar32 end) const {
+ //int32_t i = -1;
+ //for (;;) {
+ // if (start < list[++i]) break;
+ //}
+ int32_t i = findCodePoint(start);
+ return ((i & 1) != 0 && end < list[i]);
+}
+
+/**
+ * Returns <tt>true</tt> if this set contains the given
+ * multicharacter string.
+ * @param s string to be checked for containment
+ * @return <tt>true</tt> if this set contains the specified string
+ */
+UBool UnicodeSet::contains(const UnicodeString& s) const {
+ if (s.length() == 0) return FALSE;
+ int32_t cp = getSingleCP(s);
+ if (cp < 0) {
+ return stringsContains(s);
+ } else {
+ return contains((UChar32) cp);
+ }
+}
+
+/**
+ * Returns true if this set contains all the characters and strings
+ * of the given set.
+ * @param c set to be checked for containment
+ * @return true if the test condition is met
+ */
+UBool UnicodeSet::containsAll(const UnicodeSet& c) const {
+ // The specified set is a subset if all of its pairs are contained in
+ // this set. It's possible to code this more efficiently in terms of
+ // direct manipulation of the inversion lists if the need arises.
+ int32_t n = c.getRangeCount();
+ for (int i=0; i<n; ++i) {
+ if (!contains(c.getRangeStart(i), c.getRangeEnd(i))) {
+ return FALSE;
+ }
+ }
+ return !c.hasStrings() || (strings != nullptr && strings->containsAll(*c.strings));
+}
+
+/**
+ * Returns true if this set contains all the characters
+ * of the given string.
+ * @param s string containing characters to be checked for containment
+ * @return true if the test condition is met
+ */
+UBool UnicodeSet::containsAll(const UnicodeString& s) const {
+ return (UBool)(span(s.getBuffer(), s.length(), USET_SPAN_CONTAINED) ==
+ s.length());
+}
+
+/**
+ * Returns true if this set contains none of the characters
+ * of the given range.
+ * @param start first character, inclusive, of the range
+ * @param end last character, inclusive, of the range
+ * @return true if the test condition is met
+ */
+UBool UnicodeSet::containsNone(UChar32 start, UChar32 end) const {
+ //int32_t i = -1;
+ //for (;;) {
+ // if (start < list[++i]) break;
+ //}
+ int32_t i = findCodePoint(start);
+ return ((i & 1) == 0 && end < list[i]);
+}
+
+/**
+ * Returns true if this set contains none of the characters and strings
+ * of the given set.
+ * @param c set to be checked for containment
+ * @return true if the test condition is met
+ */
+UBool UnicodeSet::containsNone(const UnicodeSet& c) const {
+ // The specified set is a subset if all of its pairs are contained in
+ // this set. It's possible to code this more efficiently in terms of
+ // direct manipulation of the inversion lists if the need arises.
+ int32_t n = c.getRangeCount();
+ for (int32_t i=0; i<n; ++i) {
+ if (!containsNone(c.getRangeStart(i), c.getRangeEnd(i))) {
+ return FALSE;
+ }
+ }
+ return strings == nullptr || !c.hasStrings() || strings->containsNone(*c.strings);
+}
+
+/**
+ * Returns true if this set contains none of the characters
+ * of the given string.
+ * @param s string containing characters to be checked for containment
+ * @return true if the test condition is met
+ */
+UBool UnicodeSet::containsNone(const UnicodeString& s) const {
+ return (UBool)(span(s.getBuffer(), s.length(), USET_SPAN_NOT_CONTAINED) ==
+ s.length());
+}
+
+/**
+ * Returns <tt>true</tt> if this set contains any character whose low byte
+ * is the given value. This is used by <tt>RuleBasedTransliterator</tt> for
+ * indexing.
+ */
+UBool UnicodeSet::matchesIndexValue(uint8_t v) const {
+ /* The index value v, in the range [0,255], is contained in this set if
+ * it is contained in any pair of this set. Pairs either have the high
+ * bytes equal, or unequal. If the high bytes are equal, then we have
+ * aaxx..aayy, where aa is the high byte. Then v is contained if xx <=
+ * v <= yy. If the high bytes are unequal we have aaxx..bbyy, bb>aa.
+ * Then v is contained if xx <= v || v <= yy. (This is identical to the
+ * time zone month containment logic.)
+ */
+ int32_t i;
+ int32_t rangeCount=getRangeCount();
+ for (i=0; i<rangeCount; ++i) {
+ UChar32 low = getRangeStart(i);
+ UChar32 high = getRangeEnd(i);
+ if ((low & ~0xFF) == (high & ~0xFF)) {
+ if ((low & 0xFF) <= v && v <= (high & 0xFF)) {
+ return TRUE;
+ }
+ } else if ((low & 0xFF) <= v || v <= (high & 0xFF)) {
+ return TRUE;
+ }
+ }
+ if (hasStrings()) {
+ for (i=0; i<strings->size(); ++i) {
+ const UnicodeString& s = *(const UnicodeString*)strings->elementAt(i);
+ //if (s.length() == 0) {
+ // // Empty strings match everything
+ // return TRUE;
+ //}
+ // assert(s.length() != 0); // We enforce this elsewhere
+ UChar32 c = s.char32At(0);
+ if ((c & 0xFF) == v) {
+ return TRUE;
+ }
+ }
+ }
+ return FALSE;
+}
+
+/**
+ * Implementation of UnicodeMatcher::matches(). Always matches the
+ * longest possible multichar string.
+ */
+UMatchDegree UnicodeSet::matches(const Replaceable& text,
+ int32_t& offset,
+ int32_t limit,
+ UBool incremental) {
+ if (offset == limit) {
+ // Strings, if any, have length != 0, so we don't worry
+ // about them here. If we ever allow zero-length strings
+ // we much check for them here.
+ if (contains(U_ETHER)) {
+ return incremental ? U_PARTIAL_MATCH : U_MATCH;
+ } else {
+ return U_MISMATCH;
+ }
+ } else {
+ if (hasStrings()) { // try strings first
+
+ // might separate forward and backward loops later
+ // for now they are combined
+
+ // TODO Improve efficiency of this, at least in the forward
+ // direction, if not in both. In the forward direction we
+ // can assume the strings are sorted.
+
+ int32_t i;
+ UBool forward = offset < limit;
+
+ // firstChar is the leftmost char to match in the
+ // forward direction or the rightmost char to match in
+ // the reverse direction.
+ UChar firstChar = text.charAt(offset);
+
+ // If there are multiple strings that can match we
+ // return the longest match.
+ int32_t highWaterLength = 0;
+
+ for (i=0; i<strings->size(); ++i) {
+ const UnicodeString& trial = *(const UnicodeString*)strings->elementAt(i);
+
+ //if (trial.length() == 0) {
+ // return U_MATCH; // null-string always matches
+ //}
+ // assert(trial.length() != 0); // We ensure this elsewhere
+
+ UChar c = trial.charAt(forward ? 0 : trial.length() - 1);
+
+ // Strings are sorted, so we can optimize in the
+ // forward direction.
+ if (forward && c > firstChar) break;
+ if (c != firstChar) continue;
+
+ int32_t matchLen = matchRest(text, offset, limit, trial);
+
+ if (incremental) {
+ int32_t maxLen = forward ? limit-offset : offset-limit;
+ if (matchLen == maxLen) {
+ // We have successfully matched but only up to limit.
+ return U_PARTIAL_MATCH;
+ }
+ }
+
+ if (matchLen == trial.length()) {
+ // We have successfully matched the whole string.
+ if (matchLen > highWaterLength) {
+ highWaterLength = matchLen;
+ }
+ // In the forward direction we know strings
+ // are sorted so we can bail early.
+ if (forward && matchLen < highWaterLength) {
+ break;
+ }
+ continue;
+ }
+ }
+
+ // We've checked all strings without a partial match.
+ // If we have full matches, return the longest one.
+ if (highWaterLength != 0) {
+ offset += forward ? highWaterLength : -highWaterLength;
+ return U_MATCH;
+ }
+ }
+ return UnicodeFilter::matches(text, offset, limit, incremental);
+ }
+}
+
+/**
+ * Returns the longest match for s in text at the given position.
+ * If limit > start then match forward from start+1 to limit
+ * matching all characters except s.charAt(0). If limit < start,
+ * go backward starting from start-1 matching all characters
+ * except s.charAt(s.length()-1). This method assumes that the
+ * first character, text.charAt(start), matches s, so it does not
+ * check it.
+ * @param text the text to match
+ * @param start the first character to match. In the forward
+ * direction, text.charAt(start) is matched against s.charAt(0).
+ * In the reverse direction, it is matched against
+ * s.charAt(s.length()-1).
+ * @param limit the limit offset for matching, either last+1 in
+ * the forward direction, or last-1 in the reverse direction,
+ * where last is the index of the last character to match.
+ * @return If part of s matches up to the limit, return |limit -
+ * start|. If all of s matches before reaching the limit, return
+ * s.length(). If there is a mismatch between s and text, return
+ * 0
+ */
+int32_t UnicodeSet::matchRest(const Replaceable& text,
+ int32_t start, int32_t limit,
+ const UnicodeString& s) {
+ int32_t i;
+ int32_t maxLen;
+ int32_t slen = s.length();
+ if (start < limit) {
+ maxLen = limit - start;
+ if (maxLen > slen) maxLen = slen;
+ for (i = 1; i < maxLen; ++i) {
+ if (text.charAt(start + i) != s.charAt(i)) return 0;
+ }
+ } else {
+ maxLen = start - limit;
+ if (maxLen > slen) maxLen = slen;
+ --slen; // <=> slen = s.length() - 1;
+ for (i = 1; i < maxLen; ++i) {
+ if (text.charAt(start - i) != s.charAt(slen - i)) return 0;
+ }
+ }
+ return maxLen;
+}
+
+/**
+ * Implement of UnicodeMatcher
+ */
+void UnicodeSet::addMatchSetTo(UnicodeSet& toUnionTo) const {
+ toUnionTo.addAll(*this);
+}
+
+/**
+ * Returns the index of the given character within this set, where
+ * the set is ordered by ascending code point. If the character
+ * is not in this set, return -1. The inverse of this method is
+ * <code>charAt()</code>.
+ * @return an index from 0..size()-1, or -1
+ */
+int32_t UnicodeSet::indexOf(UChar32 c) const {
+ if (c < MIN_VALUE || c > MAX_VALUE) {
+ return -1;
+ }
+ int32_t i = 0;
+ int32_t n = 0;
+ for (;;) {
+ UChar32 start = list[i++];
+ if (c < start) {
+ return -1;
+ }
+ UChar32 limit = list[i++];
+ if (c < limit) {
+ return n + c - start;
+ }
+ n += limit - start;
+ }
+}
+
+/**
+ * Returns the character at the given index within this set, where
+ * the set is ordered by ascending code point. If the index is
+ * out of range, return (UChar32)-1. The inverse of this method is
+ * <code>indexOf()</code>.
+ * @param index an index from 0..size()-1
+ * @return the character at the given index, or (UChar32)-1.
+ */
+UChar32 UnicodeSet::charAt(int32_t index) const {
+ if (index >= 0) {
+ // len2 is the largest even integer <= len, that is, it is len
+ // for even values and len-1 for odd values. With odd values
+ // the last entry is UNICODESET_HIGH.
+ int32_t len2 = len & ~1;
+ for (int32_t i=0; i < len2;) {
+ UChar32 start = list[i++];
+ int32_t count = list[i++] - start;
+ if (index < count) {
+ return (UChar32)(start + index);
+ }
+ index -= count;
+ }
+ }
+ return (UChar32)-1;
+}
+
+/**
+ * Make this object represent the range <code>start - end</code>.
+ * If <code>end > start</code> then this object is set to an
+ * an empty range.
+ *
+ * @param start first character in the set, inclusive
+ * @rparam end last character in the set, inclusive
+ */
+UnicodeSet& UnicodeSet::set(UChar32 start, UChar32 end) {
+ clear();
+ complement(start, end);
+ return *this;
+}
+
+/**
+ * Adds the specified range to this set if it is not already
+ * present. If this set already contains the specified range,
+ * the call leaves this set unchanged. If <code>end > start</code>
+ * then an empty range is added, leaving the set unchanged.
+ *
+ * @param start first character, inclusive, of range to be added
+ * to this set.
+ * @param end last character, inclusive, of range to be added
+ * to this set.
+ */
+UnicodeSet& UnicodeSet::add(UChar32 start, UChar32 end) {
+ if (pinCodePoint(start) < pinCodePoint(end)) {
+ UChar32 limit = end + 1;
+ // Fast path for adding a new range after the last one.
+ // Odd list length: [..., lastStart, lastLimit, HIGH]
+ if ((len & 1) != 0) {
+ // If the list is empty, set lastLimit low enough to not be adjacent to 0.
+ UChar32 lastLimit = len == 1 ? -2 : list[len - 2];
+ if (lastLimit <= start && !isFrozen() && !isBogus()) {
+ if (lastLimit == start) {
+ // Extend the last range.
+ list[len - 2] = limit;
+ if (limit == UNICODESET_HIGH) {
+ --len;
+ }
+ } else {
+ list[len - 1] = start;
+ if (limit < UNICODESET_HIGH) {
+ if (ensureCapacity(len + 2)) {
+ list[len++] = limit;
+ list[len++] = UNICODESET_HIGH;
+ }
+ } else { // limit == UNICODESET_HIGH
+ if (ensureCapacity(len + 1)) {
+ list[len++] = UNICODESET_HIGH;
+ }
+ }
+ }
+ releasePattern();
+ return *this;
+ }
+ }
+ // This is slow. Could be much faster using findCodePoint(start)
+ // and modifying the list, dealing with adjacent & overlapping ranges.
+ UChar32 range[3] = { start, limit, UNICODESET_HIGH };
+ add(range, 2, 0);
+ } else if (start == end) {
+ add(start);
+ }
+ return *this;
+}
+
+// #define DEBUG_US_ADD
+
+#ifdef DEBUG_US_ADD
+#include <stdio.h>
+void dump(UChar32 c) {
+ if (c <= 0xFF) {
+ printf("%c", (char)c);
+ } else {
+ printf("U+%04X", c);
+ }
+}
+void dump(const UChar32* list, int32_t len) {
+ printf("[");
+ for (int32_t i=0; i<len; ++i) {
+ if (i != 0) printf(", ");
+ dump(list[i]);
+ }
+ printf("]");
+}
+#endif
+
+/**
+ * Adds the specified character to this set if it is not already
+ * present. If this set already contains the specified character,
+ * the call leaves this set unchanged.
+ */
+UnicodeSet& UnicodeSet::add(UChar32 c) {
+ // find smallest i such that c < list[i]
+ // if odd, then it is IN the set
+ // if even, then it is OUT of the set
+ int32_t i = findCodePoint(pinCodePoint(c));
+
+ // already in set?
+ if ((i & 1) != 0 || isFrozen() || isBogus()) return *this;
+
+ // HIGH is 0x110000
+ // assert(list[len-1] == HIGH);
+
+ // empty = [HIGH]
+ // [start_0, limit_0, start_1, limit_1, HIGH]
+
+ // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH]
+ // ^
+ // list[i]
+
+ // i == 0 means c is before the first range
+
+#ifdef DEBUG_US_ADD
+ printf("Add of ");
+ dump(c);
+ printf(" found at %d", i);
+ printf(": ");
+ dump(list, len);
+ printf(" => ");
+#endif
+
+ if (c == list[i]-1) {
+ // c is before start of next range
+ list[i] = c;
+ // if we touched the HIGH mark, then add a new one
+ if (c == (UNICODESET_HIGH - 1)) {
+ if (!ensureCapacity(len+1)) {
+ // ensureCapacity will mark the object as Bogus if OOM failure happens.
+ return *this;
+ }
+ list[len++] = UNICODESET_HIGH;
+ }
+ if (i > 0 && c == list[i-1]) {
+ // collapse adjacent ranges
+
+ // [..., start_k-1, c, c, limit_k, ..., HIGH]
+ // ^
+ // list[i]
+
+ //for (int32_t k=i-1; k<len-2; ++k) {
+ // list[k] = list[k+2];
+ //}
+ UChar32* dst = list + i - 1;
+ UChar32* src = dst + 2;
+ UChar32* srclimit = list + len;
+ while (src < srclimit) *(dst++) = *(src++);
+
+ len -= 2;
+ }
+ }
+
+ else if (i > 0 && c == list[i-1]) {
+ // c is after end of prior range
+ list[i-1]++;
+ // no need to check for collapse here
+ }
+
+ else {
+ // At this point we know the new char is not adjacent to
+ // any existing ranges, and it is not 10FFFF.
+
+
+ // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH]
+ // ^
+ // list[i]
+
+ // [..., start_k-1, limit_k-1, c, c+1, start_k, limit_k, ..., HIGH]
+ // ^
+ // list[i]
+
+ if (!ensureCapacity(len+2)) {
+ // ensureCapacity will mark the object as Bogus if OOM failure happens.
+ return *this;
+ }
+
+ UChar32 *p = list + i;
+ uprv_memmove(p + 2, p, (len - i) * sizeof(*p));
+ list[i] = c;
+ list[i+1] = c+1;
+ len += 2;
+ }
+
+#ifdef DEBUG_US_ADD
+ dump(list, len);
+ printf("\n");
+
+ for (i=1; i<len; ++i) {
+ if (list[i] <= list[i-1]) {
+ // Corrupt array!
+ printf("ERROR: list has been corrupted\n");
+ exit(1);
+ }
+ }
+#endif
+
+ releasePattern();
+ return *this;
+}
+
+/**
+ * Adds the specified multicharacter to this set if it is not already
+ * present. If this set already contains the multicharacter,
+ * the call leaves this set unchanged.
+ * Thus "ch" => {"ch"}
+ * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
+ * @param s the source string
+ * @return the modified set, for chaining
+ */
+UnicodeSet& UnicodeSet::add(const UnicodeString& s) {
+ if (s.length() == 0 || isFrozen() || isBogus()) return *this;
+ int32_t cp = getSingleCP(s);
+ if (cp < 0) {
+ if (!stringsContains(s)) {
+ _add(s);
+ releasePattern();
+ }
+ } else {
+ add((UChar32)cp);
+ }
+ return *this;
+}
+
+/**
+ * Adds the given string, in order, to 'strings'. The given string
+ * must have been checked by the caller to not be empty and to not
+ * already be in 'strings'.
+ */
+void UnicodeSet::_add(const UnicodeString& s) {
+ if (isFrozen() || isBogus()) {
+ return;
+ }
+ UErrorCode ec = U_ZERO_ERROR;
+ if (strings == nullptr && !allocateStrings(ec)) {
+ setToBogus();
+ return;
+ }
+ UnicodeString* t = new UnicodeString(s);
+ if (t == NULL) { // Check for memory allocation error.
+ setToBogus();
+ return;
+ }
+ strings->sortedInsert(t, compareUnicodeString, ec);
+ if (U_FAILURE(ec)) {
+ setToBogus();
+ delete t;
+ }
+}
+
+/**
+ * @return a code point IF the string consists of a single one.
+ * otherwise returns -1.
+ * @param string to test
+ */
+int32_t UnicodeSet::getSingleCP(const UnicodeString& s) {
+ //if (s.length() < 1) {
+ // throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet");
+ //}
+ if (s.length() > 2) return -1;
+ if (s.length() == 1) return s.charAt(0);
+
+ // at this point, len = 2
+ UChar32 cp = s.char32At(0);
+ if (cp > 0xFFFF) { // is surrogate pair
+ return cp;
+ }
+ return -1;
+}
+
+/**
+ * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
+ * If this set already any particular character, it has no effect on that character.
+ * @param the source string
+ * @return the modified set, for chaining
+ */
+UnicodeSet& UnicodeSet::addAll(const UnicodeString& s) {
+ UChar32 cp;
+ for (int32_t i = 0; i < s.length(); i += U16_LENGTH(cp)) {
+ cp = s.char32At(i);
+ add(cp);
+ }
+ return *this;
+}
+
+/**
+ * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
+ * If this set already any particular character, it has no effect on that character.
+ * @param the source string
+ * @return the modified set, for chaining
+ */
+UnicodeSet& UnicodeSet::retainAll(const UnicodeString& s) {
+ UnicodeSet set;
+ set.addAll(s);
+ retainAll(set);
+ return *this;
+}
+
+/**
+ * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"}
+ * If this set already any particular character, it has no effect on that character.
+ * @param the source string
+ * @return the modified set, for chaining
+ */
+UnicodeSet& UnicodeSet::complementAll(const UnicodeString& s) {
+ UnicodeSet set;
+ set.addAll(s);
+ complementAll(set);
+ return *this;
+}
+
+/**
+ * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"}
+ * If this set already any particular character, it has no effect on that character.
+ * @param the source string
+ * @return the modified set, for chaining
+ */
+UnicodeSet& UnicodeSet::removeAll(const UnicodeString& s) {
+ UnicodeSet set;
+ set.addAll(s);
+ removeAll(set);
+ return *this;
+}
+
+UnicodeSet& UnicodeSet::removeAllStrings() {
+ if (!isFrozen() && hasStrings()) {
+ strings->removeAllElements();
+ releasePattern();
+ }
+ return *this;
+}
+
+
+/**
+ * Makes a set from a multicharacter string. Thus "ch" => {"ch"}
+ * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
+ * @param the source string
+ * @return a newly created set containing the given string
+ */
+UnicodeSet* U_EXPORT2 UnicodeSet::createFrom(const UnicodeString& s) {
+ UnicodeSet *set = new UnicodeSet();
+ if (set != NULL) { // Check for memory allocation error.
+ set->add(s);
+ }
+ return set;
+}
+
+
+/**
+ * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"}
+ * @param the source string
+ * @return a newly created set containing the given characters
+ */
+UnicodeSet* U_EXPORT2 UnicodeSet::createFromAll(const UnicodeString& s) {
+ UnicodeSet *set = new UnicodeSet();
+ if (set != NULL) { // Check for memory allocation error.
+ set->addAll(s);
+ }
+ return set;
+}
+
+/**
+ * Retain only the elements in this set that are contained in the
+ * specified range. If <code>end > start</code> then an empty range is
+ * retained, leaving the set empty.
+ *
+ * @param start first character, inclusive, of range to be retained
+ * to this set.
+ * @param end last character, inclusive, of range to be retained
+ * to this set.
+ */
+UnicodeSet& UnicodeSet::retain(UChar32 start, UChar32 end) {
+ if (pinCodePoint(start) <= pinCodePoint(end)) {
+ UChar32 range[3] = { start, end+1, UNICODESET_HIGH };
+ retain(range, 2, 0);
+ } else {
+ clear();
+ }
+ return *this;
+}
+
+UnicodeSet& UnicodeSet::retain(UChar32 c) {
+ return retain(c, c);
+}
+
+/**
+ * Removes the specified range from this set if it is present.
+ * The set will not contain the specified range once the call
+ * returns. If <code>end > start</code> then an empty range is
+ * removed, leaving the set unchanged.
+ *
+ * @param start first character, inclusive, of range to be removed
+ * from this set.
+ * @param end last character, inclusive, of range to be removed
+ * from this set.
+ */
+UnicodeSet& UnicodeSet::remove(UChar32 start, UChar32 end) {
+ if (pinCodePoint(start) <= pinCodePoint(end)) {
+ UChar32 range[3] = { start, end+1, UNICODESET_HIGH };
+ retain(range, 2, 2);
+ }
+ return *this;
+}
+
+/**
+ * Removes the specified character from this set if it is present.
+ * The set will not contain the specified range once the call
+ * returns.
+ */
+UnicodeSet& UnicodeSet::remove(UChar32 c) {
+ return remove(c, c);
+}
+
+/**
+ * Removes the specified string from this set if it is present.
+ * The set will not contain the specified character once the call
+ * returns.
+ * @param the source string
+ * @return the modified set, for chaining
+ */
+UnicodeSet& UnicodeSet::remove(const UnicodeString& s) {
+ if (s.length() == 0 || isFrozen() || isBogus()) return *this;
+ int32_t cp = getSingleCP(s);
+ if (cp < 0) {
+ if (strings != nullptr && strings->removeElement((void*) &s)) {
+ releasePattern();
+ }
+ } else {
+ remove((UChar32)cp, (UChar32)cp);
+ }
+ return *this;
+}
+
+/**
+ * Complements the specified range in this set. Any character in
+ * the range will be removed if it is in this set, or will be
+ * added if it is not in this set. If <code>end > start</code>
+ * then an empty range is xor'ed, leaving the set unchanged.
+ *
+ * @param start first character, inclusive, of range to be removed
+ * from this set.
+ * @param end last character, inclusive, of range to be removed
+ * from this set.
+ */
+UnicodeSet& UnicodeSet::complement(UChar32 start, UChar32 end) {
+ if (isFrozen() || isBogus()) {
+ return *this;
+ }
+ if (pinCodePoint(start) <= pinCodePoint(end)) {
+ UChar32 range[3] = { start, end+1, UNICODESET_HIGH };
+ exclusiveOr(range, 2, 0);
+ }
+ releasePattern();
+ return *this;
+}
+
+UnicodeSet& UnicodeSet::complement(UChar32 c) {
+ return complement(c, c);
+}
+
+/**
+ * This is equivalent to
+ * <code>complement(MIN_VALUE, MAX_VALUE)</code>.
+ */
+UnicodeSet& UnicodeSet::complement(void) {
+ if (isFrozen() || isBogus()) {
+ return *this;
+ }
+ if (list[0] == UNICODESET_LOW) {
+ uprv_memmove(list, list + 1, (size_t)(len-1)*sizeof(UChar32));
+ --len;
+ } else {
+ if (!ensureCapacity(len+1)) {
+ return *this;
+ }
+ uprv_memmove(list + 1, list, (size_t)len*sizeof(UChar32));
+ list[0] = UNICODESET_LOW;
+ ++len;
+ }
+ releasePattern();
+ return *this;
+}
+
+/**
+ * Complement the specified string in this set.
+ * The set will not contain the specified string once the call
+ * returns.
+ * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
+ * @param s the string to complement
+ * @return this object, for chaining
+ */
+UnicodeSet& UnicodeSet::complement(const UnicodeString& s) {
+ if (s.length() == 0 || isFrozen() || isBogus()) return *this;
+ int32_t cp = getSingleCP(s);
+ if (cp < 0) {
+ if (stringsContains(s)) {
+ strings->removeElement((void*) &s);
+ } else {
+ _add(s);
+ }
+ releasePattern();
+ } else {
+ complement((UChar32)cp, (UChar32)cp);
+ }
+ return *this;
+}
+
+/**
+ * Adds all of the elements in the specified set to this set if
+ * they're not already present. This operation effectively
+ * modifies this set so that its value is the <i>union</i> of the two
+ * sets. The behavior of this operation is unspecified if the specified
+ * collection is modified while the operation is in progress.
+ *
+ * @param c set whose elements are to be added to this set.
+ * @see #add(char, char)
+ */
+UnicodeSet& UnicodeSet::addAll(const UnicodeSet& c) {
+ if ( c.len>0 && c.list!=NULL ) {
+ add(c.list, c.len, 0);
+ }
+
+ // Add strings in order
+ if ( c.strings!=NULL ) {
+ for (int32_t i=0; i<c.strings->size(); ++i) {
+ const UnicodeString* s = (const UnicodeString*)c.strings->elementAt(i);
+ if (!stringsContains(*s)) {
+ _add(*s);
+ }
+ }
+ }
+ return *this;
+}
+
+/**
+ * Retains only the elements in this set that are contained in the
+ * specified set. In other words, removes from this set all of
+ * its elements that are not contained in the specified set. This
+ * operation effectively modifies this set so that its value is
+ * the <i>intersection</i> of the two sets.
+ *
+ * @param c set that defines which elements this set will retain.
+ */
+UnicodeSet& UnicodeSet::retainAll(const UnicodeSet& c) {
+ if (isFrozen() || isBogus()) {
+ return *this;
+ }
+ retain(c.list, c.len, 0);
+ if (hasStrings()) {
+ if (!c.hasStrings()) {
+ strings->removeAllElements();
+ } else {
+ strings->retainAll(*c.strings);
+ }
+ }
+ return *this;
+}
+
+/**
+ * Removes from this set all of its elements that are contained in the
+ * specified set. This operation effectively modifies this
+ * set so that its value is the <i>asymmetric set difference</i> of
+ * the two sets.
+ *
+ * @param c set that defines which elements will be removed from
+ * this set.
+ */
+UnicodeSet& UnicodeSet::removeAll(const UnicodeSet& c) {
+ if (isFrozen() || isBogus()) {
+ return *this;
+ }
+ retain(c.list, c.len, 2);
+ if (hasStrings() && c.hasStrings()) {
+ strings->removeAll(*c.strings);
+ }
+ return *this;
+}
+
+/**
+ * Complements in this set all elements contained in the specified
+ * set. Any character in the other set will be removed if it is
+ * in this set, or will be added if it is not in this set.
+ *
+ * @param c set that defines which elements will be xor'ed from
+ * this set.
+ */
+UnicodeSet& UnicodeSet::complementAll(const UnicodeSet& c) {
+ if (isFrozen() || isBogus()) {
+ return *this;
+ }
+ exclusiveOr(c.list, c.len, 0);
+
+ if (c.strings != nullptr) {
+ for (int32_t i=0; i<c.strings->size(); ++i) {
+ void* e = c.strings->elementAt(i);
+ if (strings == nullptr || !strings->removeElement(e)) {
+ _add(*(const UnicodeString*)e);
+ }
+ }
+ }
+ return *this;
+}
+
+/**
+ * Removes all of the elements from this set. This set will be
+ * empty after this call returns.
+ */
+UnicodeSet& UnicodeSet::clear(void) {
+ if (isFrozen()) {
+ return *this;
+ }
+ list[0] = UNICODESET_HIGH;
+ len = 1;
+ releasePattern();
+ if (strings != NULL) {
+ strings->removeAllElements();
+ }
+ // Remove bogus
+ fFlags = 0;
+ return *this;
+}
+
+/**
+ * Iteration method that returns the number of ranges contained in
+ * this set.
+ * @see #getRangeStart
+ * @see #getRangeEnd
+ */
+int32_t UnicodeSet::getRangeCount() const {
+ return len/2;
+}
+
+/**
+ * Iteration method that returns the first character in the
+ * specified range of this set.
+ * @see #getRangeCount
+ * @see #getRangeEnd
+ */
+UChar32 UnicodeSet::getRangeStart(int32_t index) const {
+ return list[index*2];
+}
+
+/**
+ * Iteration method that returns the last character in the
+ * specified range of this set.
+ * @see #getRangeStart
+ * @see #getRangeEnd
+ */
+UChar32 UnicodeSet::getRangeEnd(int32_t index) const {
+ return list[index*2 + 1] - 1;
+}
+
+const UnicodeString* UnicodeSet::getString(int32_t index) const {
+ return (const UnicodeString*) strings->elementAt(index);
+}
+
+/**
+ * Reallocate this objects internal structures to take up the least
+ * possible space, without changing this object's value.
+ */
+UnicodeSet& UnicodeSet::compact() {
+ if (isFrozen() || isBogus()) {
+ return *this;
+ }
+ // Delete buffer first to defragment memory less.
+ if (buffer != stackList) {
+ uprv_free(buffer);
+ buffer = NULL;
+ bufferCapacity = 0;
+ }
+ if (list == stackList) {
+ // pass
+ } else if (len <= INITIAL_CAPACITY) {
+ uprv_memcpy(stackList, list, len * sizeof(UChar32));
+ uprv_free(list);
+ list = stackList;
+ capacity = INITIAL_CAPACITY;
+ } else if ((len + 7) < capacity) {
+ // If we have more than a little unused capacity, shrink it to len.
+ UChar32* temp = (UChar32*) uprv_realloc(list, sizeof(UChar32) * len);
+ if (temp) {
+ list = temp;
+ capacity = len;
+ }
+ // else what the heck happened?! We allocated less memory!
+ // Oh well. We'll keep our original array.
+ }
+ if (strings != nullptr && strings->isEmpty()) {
+ delete strings;
+ strings = nullptr;
+ }
+ return *this;
+}
+
+#ifdef DEBUG_SERIALIZE
+#include <stdio.h>
+#endif
+
+/**
+ * Deserialize constructor.
+ */
+UnicodeSet::UnicodeSet(const uint16_t data[], int32_t dataLen, ESerialization serialization,
+ UErrorCode &ec) {
+
+ if(U_FAILURE(ec)) {
+ setToBogus();
+ return;
+ }
+
+ if( (serialization != kSerialized)
+ || (data==NULL)
+ || (dataLen < 1)) {
+ ec = U_ILLEGAL_ARGUMENT_ERROR;
+ setToBogus();
+ return;
+ }
+
+ // bmp?
+ int32_t headerSize = ((data[0]&0x8000)) ?2:1;
+ int32_t bmpLength = (headerSize==1)?data[0]:data[1];
+
+ int32_t newLength = (((data[0]&0x7FFF)-bmpLength)/2)+bmpLength;
+#ifdef DEBUG_SERIALIZE
+ printf("dataLen %d headerSize %d bmpLen %d len %d. data[0]=%X/%X/%X/%X\n", dataLen,headerSize,bmpLength,newLength, data[0],data[1],data[2],data[3]);
+#endif
+ if(!ensureCapacity(newLength + 1)) { // +1 for HIGH
+ return;
+ }
+ // copy bmp
+ int32_t i;
+ for(i = 0; i< bmpLength;i++) {
+ list[i] = data[i+headerSize];
+#ifdef DEBUG_SERIALIZE
+ printf("<<16@%d[%d] %X\n", i+headerSize, i, list[i]);
+#endif
+ }
+ // copy smp
+ for(i=bmpLength;i<newLength;i++) {
+ list[i] = ((UChar32)data[headerSize+bmpLength+(i-bmpLength)*2+0] << 16) +
+ ((UChar32)data[headerSize+bmpLength+(i-bmpLength)*2+1]);
+#ifdef DEBUG_SERIALIZE
+ printf("<<32@%d+[%d] %lX\n", headerSize+bmpLength+i, i, list[i]);
+#endif
+ }
+ U_ASSERT(i == newLength);
+ if (i == 0 || list[i - 1] != UNICODESET_HIGH) {
+ list[i++] = UNICODESET_HIGH;
+ }
+ len = i;
+}
+
+
+int32_t UnicodeSet::serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const {
+ int32_t bmpLength, length, destLength;
+
+ if (U_FAILURE(ec)) {
+ return 0;
+ }
+
+ if (destCapacity<0 || (destCapacity>0 && dest==NULL)) {
+ ec=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* count necessary 16-bit units */
+ length=this->len-1; // Subtract 1 to ignore final UNICODESET_HIGH
+ // assert(length>=0);
+ if (length==0) {
+ /* empty set */
+ if (destCapacity>0) {
+ *dest=0;
+ } else {
+ ec=U_BUFFER_OVERFLOW_ERROR;
+ }
+ return 1;
+ }
+ /* now length>0 */
+
+ if (this->list[length-1]<=0xffff) {
+ /* all BMP */
+ bmpLength=length;
+ } else if (this->list[0]>=0x10000) {
+ /* all supplementary */
+ bmpLength=0;
+ length*=2;
+ } else {
+ /* some BMP, some supplementary */
+ for (bmpLength=0; bmpLength<length && this->list[bmpLength]<=0xffff; ++bmpLength) {}
+ length=bmpLength+2*(length-bmpLength);
+ }
+#ifdef DEBUG_SERIALIZE
+ printf(">> bmpLength%d length%d len%d\n", bmpLength, length, len);
+#endif
+ /* length: number of 16-bit array units */
+ if (length>0x7fff) {
+ /* there are only 15 bits for the length in the first serialized word */
+ ec=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ /*
+ * total serialized length:
+ * number of 16-bit array units (length) +
+ * 1 length unit (always) +
+ * 1 bmpLength unit (if there are supplementary values)
+ */
+ destLength=length+((length>bmpLength)?2:1);
+ if (destLength<=destCapacity) {
+ const UChar32 *p;
+ int32_t i;
+
+#ifdef DEBUG_SERIALIZE
+ printf("writeHdr\n");
+#endif
+ *dest=(uint16_t)length;
+ if (length>bmpLength) {
+ *dest|=0x8000;
+ *++dest=(uint16_t)bmpLength;
+ }
+ ++dest;
+
+ /* write the BMP part of the array */
+ p=this->list;
+ for (i=0; i<bmpLength; ++i) {
+#ifdef DEBUG_SERIALIZE
+ printf("writebmp: %x\n", (int)*p);
+#endif
+ *dest++=(uint16_t)*p++;
+ }
+
+ /* write the supplementary part of the array */
+ for (; i<length; i+=2) {
+#ifdef DEBUG_SERIALIZE
+ printf("write32: %x\n", (int)*p);
+#endif
+ *dest++=(uint16_t)(*p>>16);
+ *dest++=(uint16_t)*p++;
+ }
+ } else {
+ ec=U_BUFFER_OVERFLOW_ERROR;
+ }
+ return destLength;
+}
+
+//----------------------------------------------------------------
+// Implementation: Utility methods
+//----------------------------------------------------------------
+
+/**
+ * Allocate our strings vector and return TRUE if successful.
+ */
+UBool UnicodeSet::allocateStrings(UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return FALSE;
+ }
+ strings = new UVector(uprv_deleteUObject,
+ uhash_compareUnicodeString, 1, status);
+ if (strings == NULL) { // Check for memory allocation error.
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return FALSE;
+ }
+ if (U_FAILURE(status)) {
+ delete strings;
+ strings = NULL;
+ return FALSE;
+ }
+ return TRUE;
+}
+
+int32_t UnicodeSet::nextCapacity(int32_t minCapacity) {
+ // Grow exponentially to reduce the frequency of allocations.
+ if (minCapacity < INITIAL_CAPACITY) {
+ return minCapacity + INITIAL_CAPACITY;
+ } else if (minCapacity <= 2500) {
+ return 5 * minCapacity;
+ } else {
+ int32_t newCapacity = 2 * minCapacity;
+ if (newCapacity > MAX_LENGTH) {
+ newCapacity = MAX_LENGTH;
+ }
+ return newCapacity;
+ }
+}
+
+bool UnicodeSet::ensureCapacity(int32_t newLen) {
+ if (newLen > MAX_LENGTH) {
+ newLen = MAX_LENGTH;
+ }
+ if (newLen <= capacity) {
+ return true;
+ }
+ int32_t newCapacity = nextCapacity(newLen);
+ UChar32* temp = (UChar32*) uprv_malloc(newCapacity * sizeof(UChar32));
+ if (temp == NULL) {
+ setToBogus(); // set the object to bogus state if an OOM failure occurred.
+ return false;
+ }
+ // Copy only the actual contents.
+ uprv_memcpy(temp, list, len * sizeof(UChar32));
+ if (list != stackList) {
+ uprv_free(list);
+ }
+ list = temp;
+ capacity = newCapacity;
+ return true;
+}
+
+bool UnicodeSet::ensureBufferCapacity(int32_t newLen) {
+ if (newLen > MAX_LENGTH) {
+ newLen = MAX_LENGTH;
+ }
+ if (newLen <= bufferCapacity) {
+ return true;
+ }
+ int32_t newCapacity = nextCapacity(newLen);
+ UChar32* temp = (UChar32*) uprv_malloc(newCapacity * sizeof(UChar32));
+ if (temp == NULL) {
+ setToBogus();
+ return false;
+ }
+ // The buffer has no contents to be copied.
+ // It is always filled from scratch after this call.
+ if (buffer != stackList) {
+ uprv_free(buffer);
+ }
+ buffer = temp;
+ bufferCapacity = newCapacity;
+ return true;
+}
+
+/**
+ * Swap list and buffer.
+ */
+void UnicodeSet::swapBuffers(void) {
+ // swap list and buffer
+ UChar32* temp = list;
+ list = buffer;
+ buffer = temp;
+
+ int32_t c = capacity;
+ capacity = bufferCapacity;
+ bufferCapacity = c;
+}
+
+void UnicodeSet::setToBogus() {
+ clear(); // Remove everything in the set.
+ fFlags = kIsBogus;
+}
+
+//----------------------------------------------------------------
+// Implementation: Fundamental operators
+//----------------------------------------------------------------
+
+static inline UChar32 max(UChar32 a, UChar32 b) {
+ return (a > b) ? a : b;
+}
+
+// polarity = 0, 3 is normal: x xor y
+// polarity = 1, 2: x xor ~y == x === y
+
+void UnicodeSet::exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity) {
+ if (isFrozen() || isBogus()) {
+ return;
+ }
+ if (!ensureBufferCapacity(len + otherLen)) {
+ return;
+ }
+
+ int32_t i = 0, j = 0, k = 0;
+ UChar32 a = list[i++];
+ UChar32 b;
+ if (polarity == 1 || polarity == 2) {
+ b = UNICODESET_LOW;
+ if (other[j] == UNICODESET_LOW) { // skip base if already LOW
+ ++j;
+ b = other[j];
+ }
+ } else {
+ b = other[j++];
+ }
+ // simplest of all the routines
+ // sort the values, discarding identicals!
+ for (;;) {
+ if (a < b) {
+ buffer[k++] = a;
+ a = list[i++];
+ } else if (b < a) {
+ buffer[k++] = b;
+ b = other[j++];
+ } else if (a != UNICODESET_HIGH) { // at this point, a == b
+ // discard both values!
+ a = list[i++];
+ b = other[j++];
+ } else { // DONE!
+ buffer[k++] = UNICODESET_HIGH;
+ len = k;
+ break;
+ }
+ }
+ swapBuffers();
+ releasePattern();
+}
+
+// polarity = 0 is normal: x union y
+// polarity = 2: x union ~y
+// polarity = 1: ~x union y
+// polarity = 3: ~x union ~y
+
+void UnicodeSet::add(const UChar32* other, int32_t otherLen, int8_t polarity) {
+ if (isFrozen() || isBogus() || other==NULL) {
+ return;
+ }
+ if (!ensureBufferCapacity(len + otherLen)) {
+ return;
+ }
+
+ int32_t i = 0, j = 0, k = 0;
+ UChar32 a = list[i++];
+ UChar32 b = other[j++];
+ // change from xor is that we have to check overlapping pairs
+ // polarity bit 1 means a is second, bit 2 means b is.
+ for (;;) {
+ switch (polarity) {
+ case 0: // both first; take lower if unequal
+ if (a < b) { // take a
+ // Back up over overlapping ranges in buffer[]
+ if (k > 0 && a <= buffer[k-1]) {
+ // Pick latter end value in buffer[] vs. list[]
+ a = max(list[i], buffer[--k]);
+ } else {
+ // No overlap
+ buffer[k++] = a;
+ a = list[i];
+ }
+ i++; // Common if/else code factored out
+ polarity ^= 1;
+ } else if (b < a) { // take b
+ if (k > 0 && b <= buffer[k-1]) {
+ b = max(other[j], buffer[--k]);
+ } else {
+ buffer[k++] = b;
+ b = other[j];
+ }
+ j++;
+ polarity ^= 2;
+ } else { // a == b, take a, drop b
+ if (a == UNICODESET_HIGH) goto loop_end;
+ // This is symmetrical; it doesn't matter if
+ // we backtrack with a or b. - liu
+ if (k > 0 && a <= buffer[k-1]) {
+ a = max(list[i], buffer[--k]);
+ } else {
+ // No overlap
+ buffer[k++] = a;
+ a = list[i];
+ }
+ i++;
+ polarity ^= 1;
+ b = other[j++];
+ polarity ^= 2;
+ }
+ break;
+ case 3: // both second; take higher if unequal, and drop other
+ if (b <= a) { // take a
+ if (a == UNICODESET_HIGH) goto loop_end;
+ buffer[k++] = a;
+ } else { // take b
+ if (b == UNICODESET_HIGH) goto loop_end;
+ buffer[k++] = b;
+ }
+ a = list[i++];
+ polarity ^= 1; // factored common code
+ b = other[j++];
+ polarity ^= 2;
+ break;
+ case 1: // a second, b first; if b < a, overlap
+ if (a < b) { // no overlap, take a
+ buffer[k++] = a; a = list[i++]; polarity ^= 1;
+ } else if (b < a) { // OVERLAP, drop b
+ b = other[j++];
+ polarity ^= 2;
+ } else { // a == b, drop both!
+ if (a == UNICODESET_HIGH) goto loop_end;
+ a = list[i++];
+ polarity ^= 1;
+ b = other[j++];
+ polarity ^= 2;
+ }
+ break;
+ case 2: // a first, b second; if a < b, overlap
+ if (b < a) { // no overlap, take b
+ buffer[k++] = b;
+ b = other[j++];
+ polarity ^= 2;
+ } else if (a < b) { // OVERLAP, drop a
+ a = list[i++];
+ polarity ^= 1;
+ } else { // a == b, drop both!
+ if (a == UNICODESET_HIGH) goto loop_end;
+ a = list[i++];
+ polarity ^= 1;
+ b = other[j++];
+ polarity ^= 2;
+ }
+ break;
+ }
+ }
+ loop_end:
+ buffer[k++] = UNICODESET_HIGH; // terminate
+ len = k;
+ swapBuffers();
+ releasePattern();
+}
+
+// polarity = 0 is normal: x intersect y
+// polarity = 2: x intersect ~y == set-minus
+// polarity = 1: ~x intersect y
+// polarity = 3: ~x intersect ~y
+
+void UnicodeSet::retain(const UChar32* other, int32_t otherLen, int8_t polarity) {
+ if (isFrozen() || isBogus()) {
+ return;
+ }
+ if (!ensureBufferCapacity(len + otherLen)) {
+ return;
+ }
+
+ int32_t i = 0, j = 0, k = 0;
+ UChar32 a = list[i++];
+ UChar32 b = other[j++];
+ // change from xor is that we have to check overlapping pairs
+ // polarity bit 1 means a is second, bit 2 means b is.
+ for (;;) {
+ switch (polarity) {
+ case 0: // both first; drop the smaller
+ if (a < b) { // drop a
+ a = list[i++];
+ polarity ^= 1;
+ } else if (b < a) { // drop b
+ b = other[j++];
+ polarity ^= 2;
+ } else { // a == b, take one, drop other
+ if (a == UNICODESET_HIGH) goto loop_end;
+ buffer[k++] = a;
+ a = list[i++];
+ polarity ^= 1;
+ b = other[j++];
+ polarity ^= 2;
+ }
+ break;
+ case 3: // both second; take lower if unequal
+ if (a < b) { // take a
+ buffer[k++] = a;
+ a = list[i++];
+ polarity ^= 1;
+ } else if (b < a) { // take b
+ buffer[k++] = b;
+ b = other[j++];
+ polarity ^= 2;
+ } else { // a == b, take one, drop other
+ if (a == UNICODESET_HIGH) goto loop_end;
+ buffer[k++] = a;
+ a = list[i++];
+ polarity ^= 1;
+ b = other[j++];
+ polarity ^= 2;
+ }
+ break;
+ case 1: // a second, b first;
+ if (a < b) { // NO OVERLAP, drop a
+ a = list[i++];
+ polarity ^= 1;
+ } else if (b < a) { // OVERLAP, take b
+ buffer[k++] = b;
+ b = other[j++];
+ polarity ^= 2;
+ } else { // a == b, drop both!
+ if (a == UNICODESET_HIGH) goto loop_end;
+ a = list[i++];
+ polarity ^= 1;
+ b = other[j++];
+ polarity ^= 2;
+ }
+ break;
+ case 2: // a first, b second; if a < b, overlap
+ if (b < a) { // no overlap, drop b
+ b = other[j++];
+ polarity ^= 2;
+ } else if (a < b) { // OVERLAP, take a
+ buffer[k++] = a;
+ a = list[i++];
+ polarity ^= 1;
+ } else { // a == b, drop both!
+ if (a == UNICODESET_HIGH) goto loop_end;
+ a = list[i++];
+ polarity ^= 1;
+ b = other[j++];
+ polarity ^= 2;
+ }
+ break;
+ }
+ }
+ loop_end:
+ buffer[k++] = UNICODESET_HIGH; // terminate
+ len = k;
+ swapBuffers();
+ releasePattern();
+}
+
+/**
+ * Append the <code>toPattern()</code> representation of a
+ * string to the given <code>StringBuffer</code>.
+ */
+void UnicodeSet::_appendToPat(UnicodeString& buf, const UnicodeString& s, UBool
+escapeUnprintable) {
+ UChar32 cp;
+ for (int32_t i = 0; i < s.length(); i += U16_LENGTH(cp)) {
+ _appendToPat(buf, cp = s.char32At(i), escapeUnprintable);
+ }
+}
+
+/**
+ * Append the <code>toPattern()</code> representation of a
+ * character to the given <code>StringBuffer</code>.
+ */
+void UnicodeSet::_appendToPat(UnicodeString& buf, UChar32 c, UBool
+escapeUnprintable) {
+ if (escapeUnprintable && ICU_Utility::isUnprintable(c)) {
+ // Use hex escape notation (\uxxxx or \Uxxxxxxxx) for anything
+ // unprintable
+ if (ICU_Utility::escapeUnprintable(buf, c)) {
+ return;
+ }
+ }
+ // Okay to let ':' pass through
+ switch (c) {
+ case SET_OPEN:
+ case SET_CLOSE:
+ case HYPHEN:
+ case COMPLEMENT:
+ case INTERSECTION:
+ case BACKSLASH:
+ case OPEN_BRACE:
+ case CLOSE_BRACE:
+ case COLON:
+ case SymbolTable::SYMBOL_REF:
+ buf.append(BACKSLASH);
+ break;
+ default:
+ // Escape whitespace
+ if (PatternProps::isWhiteSpace(c)) {
+ buf.append(BACKSLASH);
+ }
+ break;
+ }
+ buf.append(c);
+}
+
+/**
+ * Append a string representation of this set to result. This will be
+ * a cleaned version of the string passed to applyPattern(), if there
+ * is one. Otherwise it will be generated.
+ */
+UnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
+ UBool escapeUnprintable) const
+{
+ if (pat != NULL) {
+ int32_t i;
+ int32_t backslashCount = 0;
+ for (i=0; i<patLen; ) {
+ UChar32 c;
+ U16_NEXT(pat, i, patLen, c);
+ if (escapeUnprintable && ICU_Utility::isUnprintable(c)) {
+ // If the unprintable character is preceded by an odd
+ // number of backslashes, then it has been escaped.
+ // Before unescaping it, we delete the final
+ // backslash.
+ if ((backslashCount % 2) == 1) {
+ result.truncate(result.length() - 1);
+ }
+ ICU_Utility::escapeUnprintable(result, c);
+ backslashCount = 0;
+ } else {
+ result.append(c);
+ if (c == BACKSLASH) {
+ ++backslashCount;
+ } else {
+ backslashCount = 0;
+ }
+ }
+ }
+ return result;
+ }
+
+ return _generatePattern(result, escapeUnprintable);
+}
+
+/**
+ * Returns a string representation of this set. If the result of
+ * calling this function is passed to a UnicodeSet constructor, it
+ * will produce another set that is equal to this one.
+ */
+UnicodeString& UnicodeSet::toPattern(UnicodeString& result,
+ UBool escapeUnprintable) const
+{
+ result.truncate(0);
+ return _toPattern(result, escapeUnprintable);
+}
+
+/**
+ * Generate and append a string representation of this set to result.
+ * This does not use this.pat, the cleaned up copy of the string
+ * passed to applyPattern().
+ */
+UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
+ UBool escapeUnprintable) const
+{
+ result.append(SET_OPEN);
+
+// // Check against the predefined categories. We implicitly build
+// // up ALL category sets the first time toPattern() is called.
+// for (int8_t cat=0; cat<Unicode::GENERAL_TYPES_COUNT; ++cat) {
+// if (*this == getCategorySet(cat)) {
+// result.append(COLON);
+// result.append(CATEGORY_NAMES, cat*2, 2);
+// return result.append(CATEGORY_CLOSE);
+// }
+// }
+
+ int32_t count = getRangeCount();
+
+ // If the set contains at least 2 intervals and includes both
+ // MIN_VALUE and MAX_VALUE, then the inverse representation will
+ // be more economical.
+ if (count > 1 &&
+ getRangeStart(0) == MIN_VALUE &&
+ getRangeEnd(count-1) == MAX_VALUE) {
+
+ // Emit the inverse
+ result.append(COMPLEMENT);
+
+ for (int32_t i = 1; i < count; ++i) {
+ UChar32 start = getRangeEnd(i-1)+1;
+ UChar32 end = getRangeStart(i)-1;
+ _appendToPat(result, start, escapeUnprintable);
+ if (start != end) {
+ if ((start+1) != end) {
+ result.append(HYPHEN);
+ }
+ _appendToPat(result, end, escapeUnprintable);
+ }
+ }
+ }
+
+ // Default; emit the ranges as pairs
+ else {
+ for (int32_t i = 0; i < count; ++i) {
+ UChar32 start = getRangeStart(i);
+ UChar32 end = getRangeEnd(i);
+ _appendToPat(result, start, escapeUnprintable);
+ if (start != end) {
+ if ((start+1) != end) {
+ result.append(HYPHEN);
+ }
+ _appendToPat(result, end, escapeUnprintable);
+ }
+ }
+ }
+
+ if (strings != nullptr) {
+ for (int32_t i = 0; i<strings->size(); ++i) {
+ result.append(OPEN_BRACE);
+ _appendToPat(result,
+ *(const UnicodeString*) strings->elementAt(i),
+ escapeUnprintable);
+ result.append(CLOSE_BRACE);
+ }
+ }
+ return result.append(SET_CLOSE);
+}
+
+/**
+* Release existing cached pattern
+*/
+void UnicodeSet::releasePattern() {
+ if (pat) {
+ uprv_free(pat);
+ pat = NULL;
+ patLen = 0;
+ }
+}
+
+/**
+* Set the new pattern to cache.
+*/
+void UnicodeSet::setPattern(const char16_t *newPat, int32_t newPatLen) {
+ releasePattern();
+ pat = (UChar *)uprv_malloc((newPatLen + 1) * sizeof(UChar));
+ if (pat) {
+ patLen = newPatLen;
+ u_memcpy(pat, newPat, patLen);
+ pat[patLen] = 0;
+ }
+ // else we don't care if malloc failed. This was just a nice cache.
+ // We can regenerate an equivalent pattern later when requested.
+}
+
+UnicodeSet *UnicodeSet::freeze() {
+ if(!isFrozen() && !isBogus()) {
+ compact();
+
+ // Optimize contains() and span() and similar functions.
+ if (hasStrings()) {
+ stringSpan = new UnicodeSetStringSpan(*this, *strings, UnicodeSetStringSpan::ALL);
+ if (stringSpan == nullptr) {
+ setToBogus();
+ return this;
+ } else if (!stringSpan->needsStringSpanUTF16()) {
+ // All strings are irrelevant for span() etc. because
+ // all of each string's code points are contained in this set.
+ // Do not check needsStringSpanUTF8() because UTF-8 has at most as
+ // many relevant strings as UTF-16.
+ // (Thus needsStringSpanUTF8() implies needsStringSpanUTF16().)
+ delete stringSpan;
+ stringSpan = NULL;
+ }
+ }
+ if (stringSpan == NULL) {
+ // No span-relevant strings: Optimize for code point spans.
+ bmpSet=new BMPSet(list, len);
+ if (bmpSet == NULL) { // Check for memory allocation error.
+ setToBogus();
+ }
+ }
+ }
+ return this;
+}
+
+int32_t UnicodeSet::span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const {
+ if(length>0 && bmpSet!=NULL) {
+ return (int32_t)(bmpSet->span(s, s+length, spanCondition)-s);
+ }
+ if(length<0) {
+ length=u_strlen(s);
+ }
+ if(length==0) {
+ return 0;
+ }
+ if(stringSpan!=NULL) {
+ return stringSpan->span(s, length, spanCondition);
+ } else if(hasStrings()) {
+ uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
+ UnicodeSetStringSpan::FWD_UTF16_NOT_CONTAINED :
+ UnicodeSetStringSpan::FWD_UTF16_CONTAINED;
+ UnicodeSetStringSpan strSpan(*this, *strings, which);
+ if(strSpan.needsStringSpanUTF16()) {
+ return strSpan.span(s, length, spanCondition);
+ }
+ }
+
+ if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
+ spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
+ }
+
+ UChar32 c;
+ int32_t start=0, prev=0;
+ do {
+ U16_NEXT(s, start, length, c);
+ if(spanCondition!=contains(c)) {
+ break;
+ }
+ } while((prev=start)<length);
+ return prev;
+}
+
+int32_t UnicodeSet::spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const {
+ if(length>0 && bmpSet!=NULL) {
+ return (int32_t)(bmpSet->spanBack(s, s+length, spanCondition)-s);
+ }
+ if(length<0) {
+ length=u_strlen(s);
+ }
+ if(length==0) {
+ return 0;
+ }
+ if(stringSpan!=NULL) {
+ return stringSpan->spanBack(s, length, spanCondition);
+ } else if(hasStrings()) {
+ uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
+ UnicodeSetStringSpan::BACK_UTF16_NOT_CONTAINED :
+ UnicodeSetStringSpan::BACK_UTF16_CONTAINED;
+ UnicodeSetStringSpan strSpan(*this, *strings, which);
+ if(strSpan.needsStringSpanUTF16()) {
+ return strSpan.spanBack(s, length, spanCondition);
+ }
+ }
+
+ if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
+ spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
+ }
+
+ UChar32 c;
+ int32_t prev=length;
+ do {
+ U16_PREV(s, 0, length, c);
+ if(spanCondition!=contains(c)) {
+ break;
+ }
+ } while((prev=length)>0);
+ return prev;
+}
+
+int32_t UnicodeSet::spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const {
+ if(length>0 && bmpSet!=NULL) {
+ const uint8_t *s0=(const uint8_t *)s;
+ return (int32_t)(bmpSet->spanUTF8(s0, length, spanCondition)-s0);
+ }
+ if(length<0) {
+ length=(int32_t)uprv_strlen(s);
+ }
+ if(length==0) {
+ return 0;
+ }
+ if(stringSpan!=NULL) {
+ return stringSpan->spanUTF8((const uint8_t *)s, length, spanCondition);
+ } else if(hasStrings()) {
+ uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
+ UnicodeSetStringSpan::FWD_UTF8_NOT_CONTAINED :
+ UnicodeSetStringSpan::FWD_UTF8_CONTAINED;
+ UnicodeSetStringSpan strSpan(*this, *strings, which);
+ if(strSpan.needsStringSpanUTF8()) {
+ return strSpan.spanUTF8((const uint8_t *)s, length, spanCondition);
+ }
+ }
+
+ if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
+ spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
+ }
+
+ UChar32 c;
+ int32_t start=0, prev=0;
+ do {
+ U8_NEXT_OR_FFFD(s, start, length, c);
+ if(spanCondition!=contains(c)) {
+ break;
+ }
+ } while((prev=start)<length);
+ return prev;
+}
+
+int32_t UnicodeSet::spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const {
+ if(length>0 && bmpSet!=NULL) {
+ const uint8_t *s0=(const uint8_t *)s;
+ return bmpSet->spanBackUTF8(s0, length, spanCondition);
+ }
+ if(length<0) {
+ length=(int32_t)uprv_strlen(s);
+ }
+ if(length==0) {
+ return 0;
+ }
+ if(stringSpan!=NULL) {
+ return stringSpan->spanBackUTF8((const uint8_t *)s, length, spanCondition);
+ } else if(hasStrings()) {
+ uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
+ UnicodeSetStringSpan::BACK_UTF8_NOT_CONTAINED :
+ UnicodeSetStringSpan::BACK_UTF8_CONTAINED;
+ UnicodeSetStringSpan strSpan(*this, *strings, which);
+ if(strSpan.needsStringSpanUTF8()) {
+ return strSpan.spanBackUTF8((const uint8_t *)s, length, spanCondition);
+ }
+ }
+
+ if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
+ spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
+ }
+
+ UChar32 c;
+ int32_t prev=length;
+ do {
+ U8_PREV_OR_FFFD(s, 0, length, c);
+ if(spanCondition!=contains(c)) {
+ break;
+ }
+ } while((prev=length)>0);
+ return prev;
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/uniset_closure.cpp b/thirdparty/icu4c/common/uniset_closure.cpp
new file mode 100644
index 0000000000..882231ba1a
--- /dev/null
+++ b/thirdparty/icu4c/common/uniset_closure.cpp
@@ -0,0 +1,250 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uniset_closure.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2011may30
+* created by: Markus W. Scherer
+*
+* UnicodeSet::closeOver() and related methods moved here from uniset_props.cpp
+* to simplify dependencies.
+* In particular, this depends on the BreakIterator, but the BreakIterator
+* code also builds UnicodeSets from patterns and needs uniset_props.
+*/
+
+#include "unicode/brkiter.h"
+#include "unicode/locid.h"
+#include "unicode/parsepos.h"
+#include "unicode/uniset.h"
+#include "cmemory.h"
+#include "ruleiter.h"
+#include "ucase.h"
+#include "util.h"
+#include "uvector.h"
+
+U_NAMESPACE_BEGIN
+
+// TODO memory debugging provided inside uniset.cpp
+// could be made available here but probably obsolete with use of modern
+// memory leak checker tools
+#define _dbgct(me)
+
+//----------------------------------------------------------------
+// Constructors &c
+//----------------------------------------------------------------
+
+UnicodeSet::UnicodeSet(const UnicodeString& pattern,
+ uint32_t options,
+ const SymbolTable* symbols,
+ UErrorCode& status) {
+ applyPattern(pattern, options, symbols, status);
+ _dbgct(this);
+}
+
+UnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
+ uint32_t options,
+ const SymbolTable* symbols,
+ UErrorCode& status) {
+ applyPattern(pattern, pos, options, symbols, status);
+ _dbgct(this);
+}
+
+//----------------------------------------------------------------
+// Public API
+//----------------------------------------------------------------
+
+UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
+ uint32_t options,
+ const SymbolTable* symbols,
+ UErrorCode& status) {
+ ParsePosition pos(0);
+ applyPattern(pattern, pos, options, symbols, status);
+ if (U_FAILURE(status)) return *this;
+
+ int32_t i = pos.getIndex();
+
+ if (options & USET_IGNORE_SPACE) {
+ // Skip over trailing whitespace
+ ICU_Utility::skipWhitespace(pattern, i, TRUE);
+ }
+
+ if (i != pattern.length()) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return *this;
+}
+
+UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
+ ParsePosition& pos,
+ uint32_t options,
+ const SymbolTable* symbols,
+ UErrorCode& status) {
+ if (U_FAILURE(status)) {
+ return *this;
+ }
+ if (isFrozen()) {
+ status = U_NO_WRITE_PERMISSION;
+ return *this;
+ }
+ // Need to build the pattern in a temporary string because
+ // _applyPattern calls add() etc., which set pat to empty.
+ UnicodeString rebuiltPat;
+ RuleCharacterIterator chars(pattern, symbols, pos);
+ applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, 0, status);
+ if (U_FAILURE(status)) return *this;
+ if (chars.inVariable()) {
+ // syntaxError(chars, "Extra chars in variable value");
+ status = U_MALFORMED_SET;
+ return *this;
+ }
+ setPattern(rebuiltPat);
+ return *this;
+}
+
+// USetAdder implementation
+// Does not use uset.h to reduce code dependencies
+static void U_CALLCONV
+_set_add(USet *set, UChar32 c) {
+ ((UnicodeSet *)set)->add(c);
+}
+
+static void U_CALLCONV
+_set_addRange(USet *set, UChar32 start, UChar32 end) {
+ ((UnicodeSet *)set)->add(start, end);
+}
+
+static void U_CALLCONV
+_set_addString(USet *set, const UChar *str, int32_t length) {
+ ((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length));
+}
+
+//----------------------------------------------------------------
+// Case folding API
+//----------------------------------------------------------------
+
+// add the result of a full case mapping to the set
+// use str as a temporary string to avoid constructing one
+static inline void
+addCaseMapping(UnicodeSet &set, int32_t result, const UChar *full, UnicodeString &str) {
+ if(result >= 0) {
+ if(result > UCASE_MAX_STRING_LENGTH) {
+ // add a single-code point case mapping
+ set.add(result);
+ } else {
+ // add a string case mapping from full with length result
+ str.setTo((UBool)FALSE, full, result);
+ set.add(str);
+ }
+ }
+ // result < 0: the code point mapped to itself, no need to add it
+ // see ucase.h
+}
+
+UnicodeSet& UnicodeSet::closeOver(int32_t attribute) {
+ if (isFrozen() || isBogus()) {
+ return *this;
+ }
+ if (attribute & (USET_CASE_INSENSITIVE | USET_ADD_CASE_MAPPINGS)) {
+ {
+ UnicodeSet foldSet(*this);
+ UnicodeString str;
+ USetAdder sa = {
+ foldSet.toUSet(),
+ _set_add,
+ _set_addRange,
+ _set_addString,
+ NULL, // don't need remove()
+ NULL // don't need removeRange()
+ };
+
+ // start with input set to guarantee inclusion
+ // USET_CASE: remove strings because the strings will actually be reduced (folded);
+ // therefore, start with no strings and add only those needed
+ if ((attribute & USET_CASE_INSENSITIVE) && foldSet.hasStrings()) {
+ foldSet.strings->removeAllElements();
+ }
+
+ int32_t n = getRangeCount();
+ UChar32 result;
+ const UChar *full;
+
+ for (int32_t i=0; i<n; ++i) {
+ UChar32 start = getRangeStart(i);
+ UChar32 end = getRangeEnd(i);
+
+ if (attribute & USET_CASE_INSENSITIVE) {
+ // full case closure
+ for (UChar32 cp=start; cp<=end; ++cp) {
+ ucase_addCaseClosure(cp, &sa);
+ }
+ } else {
+ // add case mappings
+ // (does not add long s for regular s, or Kelvin for k, for example)
+ for (UChar32 cp=start; cp<=end; ++cp) {
+ result = ucase_toFullLower(cp, NULL, NULL, &full, UCASE_LOC_ROOT);
+ addCaseMapping(foldSet, result, full, str);
+
+ result = ucase_toFullTitle(cp, NULL, NULL, &full, UCASE_LOC_ROOT);
+ addCaseMapping(foldSet, result, full, str);
+
+ result = ucase_toFullUpper(cp, NULL, NULL, &full, UCASE_LOC_ROOT);
+ addCaseMapping(foldSet, result, full, str);
+
+ result = ucase_toFullFolding(cp, &full, 0);
+ addCaseMapping(foldSet, result, full, str);
+ }
+ }
+ }
+ if (hasStrings()) {
+ if (attribute & USET_CASE_INSENSITIVE) {
+ for (int32_t j=0; j<strings->size(); ++j) {
+ str = *(const UnicodeString *) strings->elementAt(j);
+ str.foldCase();
+ if(!ucase_addStringCaseClosure(str.getBuffer(), str.length(), &sa)) {
+ foldSet.add(str); // does not map to code points: add the folded string itself
+ }
+ }
+ } else {
+ Locale root("");
+#if !UCONFIG_NO_BREAK_ITERATION
+ UErrorCode status = U_ZERO_ERROR;
+ BreakIterator *bi = BreakIterator::createWordInstance(root, status);
+ if (U_SUCCESS(status)) {
+#endif
+ const UnicodeString *pStr;
+
+ for (int32_t j=0; j<strings->size(); ++j) {
+ pStr = (const UnicodeString *) strings->elementAt(j);
+ (str = *pStr).toLower(root);
+ foldSet.add(str);
+#if !UCONFIG_NO_BREAK_ITERATION
+ (str = *pStr).toTitle(bi, root);
+ foldSet.add(str);
+#endif
+ (str = *pStr).toUpper(root);
+ foldSet.add(str);
+ (str = *pStr).foldCase();
+ foldSet.add(str);
+ }
+#if !UCONFIG_NO_BREAK_ITERATION
+ }
+ delete bi;
+#endif
+ }
+ }
+ *this = foldSet;
+ }
+ }
+ return *this;
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/uniset_props.cpp b/thirdparty/icu4c/common/uniset_props.cpp
new file mode 100644
index 0000000000..37277fcb75
--- /dev/null
+++ b/thirdparty/icu4c/common/uniset_props.cpp
@@ -0,0 +1,1174 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uniset_props.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2004aug25
+* created by: Markus W. Scherer
+*
+* Character property dependent functions moved here from uniset.cpp
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/uniset.h"
+#include "unicode/parsepos.h"
+#include "unicode/uchar.h"
+#include "unicode/uscript.h"
+#include "unicode/symtable.h"
+#include "unicode/uset.h"
+#include "unicode/locid.h"
+#include "unicode/brkiter.h"
+#include "uset_imp.h"
+#include "ruleiter.h"
+#include "cmemory.h"
+#include "ucln_cmn.h"
+#include "util.h"
+#include "uvector.h"
+#include "uprops.h"
+#include "propname.h"
+#include "normalizer2impl.h"
+#include "uinvchar.h"
+#include "uprops.h"
+#include "charstr.h"
+#include "cstring.h"
+#include "mutex.h"
+#include "umutex.h"
+#include "uassert.h"
+#include "hash.h"
+
+U_NAMESPACE_USE
+
+// Define UChar constants using hex for EBCDIC compatibility
+// Used #define to reduce private static exports and memory access time.
+#define SET_OPEN ((UChar)0x005B) /*[*/
+#define SET_CLOSE ((UChar)0x005D) /*]*/
+#define HYPHEN ((UChar)0x002D) /*-*/
+#define COMPLEMENT ((UChar)0x005E) /*^*/
+#define COLON ((UChar)0x003A) /*:*/
+#define BACKSLASH ((UChar)0x005C) /*\*/
+#define INTERSECTION ((UChar)0x0026) /*&*/
+#define UPPER_U ((UChar)0x0055) /*U*/
+#define LOWER_U ((UChar)0x0075) /*u*/
+#define OPEN_BRACE ((UChar)123) /*{*/
+#define CLOSE_BRACE ((UChar)125) /*}*/
+#define UPPER_P ((UChar)0x0050) /*P*/
+#define LOWER_P ((UChar)0x0070) /*p*/
+#define UPPER_N ((UChar)78) /*N*/
+#define EQUALS ((UChar)0x003D) /*=*/
+
+//static const UChar POSIX_OPEN[] = { SET_OPEN,COLON,0 }; // "[:"
+static const UChar POSIX_CLOSE[] = { COLON,SET_CLOSE,0 }; // ":]"
+//static const UChar PERL_OPEN[] = { BACKSLASH,LOWER_P,0 }; // "\\p"
+//static const UChar PERL_CLOSE[] = { CLOSE_BRACE,0 }; // "}"
+//static const UChar NAME_OPEN[] = { BACKSLASH,UPPER_N,0 }; // "\\N"
+static const UChar HYPHEN_RIGHT_BRACE[] = {HYPHEN,SET_CLOSE,0}; /*-]*/
+
+// Special property set IDs
+static const char ANY[] = "ANY"; // [\u0000-\U0010FFFF]
+static const char ASCII[] = "ASCII"; // [\u0000-\u007F]
+static const char ASSIGNED[] = "Assigned"; // [:^Cn:]
+
+// Unicode name property alias
+#define NAME_PROP "na"
+#define NAME_PROP_LENGTH 2
+
+/**
+ * Delimiter string used in patterns to close a category reference:
+ * ":]". Example: "[:Lu:]".
+ */
+//static const UChar CATEGORY_CLOSE[] = {COLON, SET_CLOSE, 0x0000}; /* ":]" */
+
+// Cached sets ------------------------------------------------------------- ***
+
+U_CDECL_BEGIN
+static UBool U_CALLCONV uset_cleanup();
+
+static UnicodeSet *uni32Singleton;
+static icu::UInitOnce uni32InitOnce = U_INITONCE_INITIALIZER;
+
+/**
+ * Cleanup function for UnicodeSet
+ */
+static UBool U_CALLCONV uset_cleanup(void) {
+ delete uni32Singleton;
+ uni32Singleton = NULL;
+ uni32InitOnce.reset();
+ return TRUE;
+}
+
+U_CDECL_END
+
+U_NAMESPACE_BEGIN
+
+namespace {
+
+// Cache some sets for other services -------------------------------------- ***
+void U_CALLCONV createUni32Set(UErrorCode &errorCode) {
+ U_ASSERT(uni32Singleton == NULL);
+ uni32Singleton = new UnicodeSet(UNICODE_STRING_SIMPLE("[:age=3.2:]"), errorCode);
+ if(uni32Singleton==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ uni32Singleton->freeze();
+ }
+ ucln_common_registerCleanup(UCLN_COMMON_USET, uset_cleanup);
+}
+
+
+U_CFUNC UnicodeSet *
+uniset_getUnicode32Instance(UErrorCode &errorCode) {
+ umtx_initOnce(uni32InitOnce, &createUni32Set, errorCode);
+ return uni32Singleton;
+}
+
+// helper functions for matching of pattern syntax pieces ------------------ ***
+// these functions are parallel to the PERL_OPEN etc. strings above
+
+// using these functions is not only faster than UnicodeString::compare() and
+// caseCompare(), but they also make UnicodeSet work for simple patterns when
+// no Unicode properties data is available - when caseCompare() fails
+
+static inline UBool
+isPerlOpen(const UnicodeString &pattern, int32_t pos) {
+ UChar c;
+ return pattern.charAt(pos)==BACKSLASH && ((c=pattern.charAt(pos+1))==LOWER_P || c==UPPER_P);
+}
+
+/*static inline UBool
+isPerlClose(const UnicodeString &pattern, int32_t pos) {
+ return pattern.charAt(pos)==CLOSE_BRACE;
+}*/
+
+static inline UBool
+isNameOpen(const UnicodeString &pattern, int32_t pos) {
+ return pattern.charAt(pos)==BACKSLASH && pattern.charAt(pos+1)==UPPER_N;
+}
+
+static inline UBool
+isPOSIXOpen(const UnicodeString &pattern, int32_t pos) {
+ return pattern.charAt(pos)==SET_OPEN && pattern.charAt(pos+1)==COLON;
+}
+
+/*static inline UBool
+isPOSIXClose(const UnicodeString &pattern, int32_t pos) {
+ return pattern.charAt(pos)==COLON && pattern.charAt(pos+1)==SET_CLOSE;
+}*/
+
+// TODO memory debugging provided inside uniset.cpp
+// could be made available here but probably obsolete with use of modern
+// memory leak checker tools
+#define _dbgct(me)
+
+} // namespace
+
+//----------------------------------------------------------------
+// Constructors &c
+//----------------------------------------------------------------
+
+/**
+ * Constructs a set from the given pattern, optionally ignoring
+ * white space. See the class description for the syntax of the
+ * pattern language.
+ * @param pattern a string specifying what characters are in the set
+ */
+UnicodeSet::UnicodeSet(const UnicodeString& pattern,
+ UErrorCode& status) {
+ applyPattern(pattern, status);
+ _dbgct(this);
+}
+
+//----------------------------------------------------------------
+// Public API
+//----------------------------------------------------------------
+
+UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
+ UErrorCode& status) {
+ // Equivalent to
+ // return applyPattern(pattern, USET_IGNORE_SPACE, NULL, status);
+ // but without dependency on closeOver().
+ ParsePosition pos(0);
+ applyPatternIgnoreSpace(pattern, pos, NULL, status);
+ if (U_FAILURE(status)) return *this;
+
+ int32_t i = pos.getIndex();
+ // Skip over trailing whitespace
+ ICU_Utility::skipWhitespace(pattern, i, TRUE);
+ if (i != pattern.length()) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return *this;
+}
+
+void
+UnicodeSet::applyPatternIgnoreSpace(const UnicodeString& pattern,
+ ParsePosition& pos,
+ const SymbolTable* symbols,
+ UErrorCode& status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ if (isFrozen()) {
+ status = U_NO_WRITE_PERMISSION;
+ return;
+ }
+ // Need to build the pattern in a temporary string because
+ // _applyPattern calls add() etc., which set pat to empty.
+ UnicodeString rebuiltPat;
+ RuleCharacterIterator chars(pattern, symbols, pos);
+ applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, 0, status);
+ if (U_FAILURE(status)) return;
+ if (chars.inVariable()) {
+ // syntaxError(chars, "Extra chars in variable value");
+ status = U_MALFORMED_SET;
+ return;
+ }
+ setPattern(rebuiltPat);
+}
+
+/**
+ * Return true if the given position, in the given pattern, appears
+ * to be the start of a UnicodeSet pattern.
+ */
+UBool UnicodeSet::resemblesPattern(const UnicodeString& pattern, int32_t pos) {
+ return ((pos+1) < pattern.length() &&
+ pattern.charAt(pos) == (UChar)91/*[*/) ||
+ resemblesPropertyPattern(pattern, pos);
+}
+
+//----------------------------------------------------------------
+// Implementation: Pattern parsing
+//----------------------------------------------------------------
+
+namespace {
+
+/**
+ * A small all-inline class to manage a UnicodeSet pointer. Add
+ * operator->() etc. as needed.
+ */
+class UnicodeSetPointer {
+ UnicodeSet* p;
+public:
+ inline UnicodeSetPointer() : p(0) {}
+ inline ~UnicodeSetPointer() { delete p; }
+ inline UnicodeSet* pointer() { return p; }
+ inline UBool allocate() {
+ if (p == 0) {
+ p = new UnicodeSet();
+ }
+ return p != 0;
+ }
+};
+
+constexpr int32_t MAX_DEPTH = 100;
+
+} // namespace
+
+/**
+ * Parse the pattern from the given RuleCharacterIterator. The
+ * iterator is advanced over the parsed pattern.
+ * @param chars iterator over the pattern characters. Upon return
+ * it will be advanced to the first character after the parsed
+ * pattern, or the end of the iteration if all characters are
+ * parsed.
+ * @param symbols symbol table to use to parse and dereference
+ * variables, or null if none.
+ * @param rebuiltPat the pattern that was parsed, rebuilt or
+ * copied from the input pattern, as appropriate.
+ * @param options a bit mask of zero or more of the following:
+ * IGNORE_SPACE, CASE.
+ */
+void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
+ const SymbolTable* symbols,
+ UnicodeString& rebuiltPat,
+ uint32_t options,
+ UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
+ int32_t depth,
+ UErrorCode& ec) {
+ if (U_FAILURE(ec)) return;
+ if (depth > MAX_DEPTH) {
+ ec = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ // Syntax characters: [ ] ^ - & { }
+
+ // Recognized special forms for chars, sets: c-c s-s s&s
+
+ int32_t opts = RuleCharacterIterator::PARSE_VARIABLES |
+ RuleCharacterIterator::PARSE_ESCAPES;
+ if ((options & USET_IGNORE_SPACE) != 0) {
+ opts |= RuleCharacterIterator::SKIP_WHITESPACE;
+ }
+
+ UnicodeString patLocal, buf;
+ UBool usePat = FALSE;
+ UnicodeSetPointer scratch;
+ RuleCharacterIterator::Pos backup;
+
+ // mode: 0=before [, 1=between [...], 2=after ]
+ // lastItem: 0=none, 1=char, 2=set
+ int8_t lastItem = 0, mode = 0;
+ UChar32 lastChar = 0;
+ UChar op = 0;
+
+ UBool invert = FALSE;
+
+ clear();
+
+ while (mode != 2 && !chars.atEnd()) {
+ U_ASSERT((lastItem == 0 && op == 0) ||
+ (lastItem == 1 && (op == 0 || op == HYPHEN /*'-'*/)) ||
+ (lastItem == 2 && (op == 0 || op == HYPHEN /*'-'*/ ||
+ op == INTERSECTION /*'&'*/)));
+
+ UChar32 c = 0;
+ UBool literal = FALSE;
+ UnicodeSet* nested = 0; // alias - do not delete
+
+ // -------- Check for property pattern
+
+ // setMode: 0=none, 1=unicodeset, 2=propertypat, 3=preparsed
+ int8_t setMode = 0;
+ if (resemblesPropertyPattern(chars, opts)) {
+ setMode = 2;
+ }
+
+ // -------- Parse '[' of opening delimiter OR nested set.
+ // If there is a nested set, use `setMode' to define how
+ // the set should be parsed. If the '[' is part of the
+ // opening delimiter for this pattern, parse special
+ // strings "[", "[^", "[-", and "[^-". Check for stand-in
+ // characters representing a nested set in the symbol
+ // table.
+
+ else {
+ // Prepare to backup if necessary
+ chars.getPos(backup);
+ c = chars.next(opts, literal, ec);
+ if (U_FAILURE(ec)) return;
+
+ if (c == 0x5B /*'['*/ && !literal) {
+ if (mode == 1) {
+ chars.setPos(backup); // backup
+ setMode = 1;
+ } else {
+ // Handle opening '[' delimiter
+ mode = 1;
+ patLocal.append((UChar) 0x5B /*'['*/);
+ chars.getPos(backup); // prepare to backup
+ c = chars.next(opts, literal, ec);
+ if (U_FAILURE(ec)) return;
+ if (c == 0x5E /*'^'*/ && !literal) {
+ invert = TRUE;
+ patLocal.append((UChar) 0x5E /*'^'*/);
+ chars.getPos(backup); // prepare to backup
+ c = chars.next(opts, literal, ec);
+ if (U_FAILURE(ec)) return;
+ }
+ // Fall through to handle special leading '-';
+ // otherwise restart loop for nested [], \p{}, etc.
+ if (c == HYPHEN /*'-'*/) {
+ literal = TRUE;
+ // Fall through to handle literal '-' below
+ } else {
+ chars.setPos(backup); // backup
+ continue;
+ }
+ }
+ } else if (symbols != 0) {
+ const UnicodeFunctor *m = symbols->lookupMatcher(c);
+ if (m != 0) {
+ const UnicodeSet *ms = dynamic_cast<const UnicodeSet *>(m);
+ if (ms == NULL) {
+ ec = U_MALFORMED_SET;
+ return;
+ }
+ // casting away const, but `nested' won't be modified
+ // (important not to modify stored set)
+ nested = const_cast<UnicodeSet*>(ms);
+ setMode = 3;
+ }
+ }
+ }
+
+ // -------- Handle a nested set. This either is inline in
+ // the pattern or represented by a stand-in that has
+ // previously been parsed and was looked up in the symbol
+ // table.
+
+ if (setMode != 0) {
+ if (lastItem == 1) {
+ if (op != 0) {
+ // syntaxError(chars, "Char expected after operator");
+ ec = U_MALFORMED_SET;
+ return;
+ }
+ add(lastChar, lastChar);
+ _appendToPat(patLocal, lastChar, FALSE);
+ lastItem = 0;
+ op = 0;
+ }
+
+ if (op == HYPHEN /*'-'*/ || op == INTERSECTION /*'&'*/) {
+ patLocal.append(op);
+ }
+
+ if (nested == 0) {
+ // lazy allocation
+ if (!scratch.allocate()) {
+ ec = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ nested = scratch.pointer();
+ }
+ switch (setMode) {
+ case 1:
+ nested->applyPattern(chars, symbols, patLocal, options, caseClosure, depth + 1, ec);
+ break;
+ case 2:
+ chars.skipIgnored(opts);
+ nested->applyPropertyPattern(chars, patLocal, ec);
+ if (U_FAILURE(ec)) return;
+ break;
+ case 3: // `nested' already parsed
+ nested->_toPattern(patLocal, FALSE);
+ break;
+ }
+
+ usePat = TRUE;
+
+ if (mode == 0) {
+ // Entire pattern is a category; leave parse loop
+ *this = *nested;
+ mode = 2;
+ break;
+ }
+
+ switch (op) {
+ case HYPHEN: /*'-'*/
+ removeAll(*nested);
+ break;
+ case INTERSECTION: /*'&'*/
+ retainAll(*nested);
+ break;
+ case 0:
+ addAll(*nested);
+ break;
+ }
+
+ op = 0;
+ lastItem = 2;
+
+ continue;
+ }
+
+ if (mode == 0) {
+ // syntaxError(chars, "Missing '['");
+ ec = U_MALFORMED_SET;
+ return;
+ }
+
+ // -------- Parse special (syntax) characters. If the
+ // current character is not special, or if it is escaped,
+ // then fall through and handle it below.
+
+ if (!literal) {
+ switch (c) {
+ case 0x5D /*']'*/:
+ if (lastItem == 1) {
+ add(lastChar, lastChar);
+ _appendToPat(patLocal, lastChar, FALSE);
+ }
+ // Treat final trailing '-' as a literal
+ if (op == HYPHEN /*'-'*/) {
+ add(op, op);
+ patLocal.append(op);
+ } else if (op == INTERSECTION /*'&'*/) {
+ // syntaxError(chars, "Trailing '&'");
+ ec = U_MALFORMED_SET;
+ return;
+ }
+ patLocal.append((UChar) 0x5D /*']'*/);
+ mode = 2;
+ continue;
+ case HYPHEN /*'-'*/:
+ if (op == 0) {
+ if (lastItem != 0) {
+ op = (UChar) c;
+ continue;
+ } else {
+ // Treat final trailing '-' as a literal
+ add(c, c);
+ c = chars.next(opts, literal, ec);
+ if (U_FAILURE(ec)) return;
+ if (c == 0x5D /*']'*/ && !literal) {
+ patLocal.append(HYPHEN_RIGHT_BRACE, 2);
+ mode = 2;
+ continue;
+ }
+ }
+ }
+ // syntaxError(chars, "'-' not after char or set");
+ ec = U_MALFORMED_SET;
+ return;
+ case INTERSECTION /*'&'*/:
+ if (lastItem == 2 && op == 0) {
+ op = (UChar) c;
+ continue;
+ }
+ // syntaxError(chars, "'&' not after set");
+ ec = U_MALFORMED_SET;
+ return;
+ case 0x5E /*'^'*/:
+ // syntaxError(chars, "'^' not after '['");
+ ec = U_MALFORMED_SET;
+ return;
+ case 0x7B /*'{'*/:
+ if (op != 0) {
+ // syntaxError(chars, "Missing operand after operator");
+ ec = U_MALFORMED_SET;
+ return;
+ }
+ if (lastItem == 1) {
+ add(lastChar, lastChar);
+ _appendToPat(patLocal, lastChar, FALSE);
+ }
+ lastItem = 0;
+ buf.truncate(0);
+ {
+ UBool ok = FALSE;
+ while (!chars.atEnd()) {
+ c = chars.next(opts, literal, ec);
+ if (U_FAILURE(ec)) return;
+ if (c == 0x7D /*'}'*/ && !literal) {
+ ok = TRUE;
+ break;
+ }
+ buf.append(c);
+ }
+ if (buf.length() < 1 || !ok) {
+ // syntaxError(chars, "Invalid multicharacter string");
+ ec = U_MALFORMED_SET;
+ return;
+ }
+ }
+ // We have new string. Add it to set and continue;
+ // we don't need to drop through to the further
+ // processing
+ add(buf);
+ patLocal.append((UChar) 0x7B /*'{'*/);
+ _appendToPat(patLocal, buf, FALSE);
+ patLocal.append((UChar) 0x7D /*'}'*/);
+ continue;
+ case SymbolTable::SYMBOL_REF:
+ // symbols nosymbols
+ // [a-$] error error (ambiguous)
+ // [a$] anchor anchor
+ // [a-$x] var "x"* literal '$'
+ // [a-$.] error literal '$'
+ // *We won't get here in the case of var "x"
+ {
+ chars.getPos(backup);
+ c = chars.next(opts, literal, ec);
+ if (U_FAILURE(ec)) return;
+ UBool anchor = (c == 0x5D /*']'*/ && !literal);
+ if (symbols == 0 && !anchor) {
+ c = SymbolTable::SYMBOL_REF;
+ chars.setPos(backup);
+ break; // literal '$'
+ }
+ if (anchor && op == 0) {
+ if (lastItem == 1) {
+ add(lastChar, lastChar);
+ _appendToPat(patLocal, lastChar, FALSE);
+ }
+ add(U_ETHER);
+ usePat = TRUE;
+ patLocal.append((UChar) SymbolTable::SYMBOL_REF);
+ patLocal.append((UChar) 0x5D /*']'*/);
+ mode = 2;
+ continue;
+ }
+ // syntaxError(chars, "Unquoted '$'");
+ ec = U_MALFORMED_SET;
+ return;
+ }
+ default:
+ break;
+ }
+ }
+
+ // -------- Parse literal characters. This includes both
+ // escaped chars ("\u4E01") and non-syntax characters
+ // ("a").
+
+ switch (lastItem) {
+ case 0:
+ lastItem = 1;
+ lastChar = c;
+ break;
+ case 1:
+ if (op == HYPHEN /*'-'*/) {
+ if (lastChar >= c) {
+ // Don't allow redundant (a-a) or empty (b-a) ranges;
+ // these are most likely typos.
+ // syntaxError(chars, "Invalid range");
+ ec = U_MALFORMED_SET;
+ return;
+ }
+ add(lastChar, c);
+ _appendToPat(patLocal, lastChar, FALSE);
+ patLocal.append(op);
+ _appendToPat(patLocal, c, FALSE);
+ lastItem = 0;
+ op = 0;
+ } else {
+ add(lastChar, lastChar);
+ _appendToPat(patLocal, lastChar, FALSE);
+ lastChar = c;
+ }
+ break;
+ case 2:
+ if (op != 0) {
+ // syntaxError(chars, "Set expected after operator");
+ ec = U_MALFORMED_SET;
+ return;
+ }
+ lastChar = c;
+ lastItem = 1;
+ break;
+ }
+ }
+
+ if (mode != 2) {
+ // syntaxError(chars, "Missing ']'");
+ ec = U_MALFORMED_SET;
+ return;
+ }
+
+ chars.skipIgnored(opts);
+
+ /**
+ * Handle global flags (invert, case insensitivity). If this
+ * pattern should be compiled case-insensitive, then we need
+ * to close over case BEFORE COMPLEMENTING. This makes
+ * patterns like /[^abc]/i work.
+ */
+ if ((options & USET_CASE_INSENSITIVE) != 0) {
+ (this->*caseClosure)(USET_CASE_INSENSITIVE);
+ }
+ else if ((options & USET_ADD_CASE_MAPPINGS) != 0) {
+ (this->*caseClosure)(USET_ADD_CASE_MAPPINGS);
+ }
+ if (invert) {
+ complement();
+ }
+
+ // Use the rebuilt pattern (patLocal) only if necessary. Prefer the
+ // generated pattern.
+ if (usePat) {
+ rebuiltPat.append(patLocal);
+ } else {
+ _generatePattern(rebuiltPat, FALSE);
+ }
+ if (isBogus() && U_SUCCESS(ec)) {
+ // We likely ran out of memory. AHHH!
+ ec = U_MEMORY_ALLOCATION_ERROR;
+ }
+}
+
+//----------------------------------------------------------------
+// Property set implementation
+//----------------------------------------------------------------
+
+namespace {
+
+static UBool numericValueFilter(UChar32 ch, void* context) {
+ return u_getNumericValue(ch) == *(double*)context;
+}
+
+static UBool generalCategoryMaskFilter(UChar32 ch, void* context) {
+ int32_t value = *(int32_t*)context;
+ return (U_GET_GC_MASK((UChar32) ch) & value) != 0;
+}
+
+static UBool versionFilter(UChar32 ch, void* context) {
+ static const UVersionInfo none = { 0, 0, 0, 0 };
+ UVersionInfo v;
+ u_charAge(ch, v);
+ UVersionInfo* version = (UVersionInfo*)context;
+ return uprv_memcmp(&v, &none, sizeof(v)) > 0 && uprv_memcmp(&v, version, sizeof(v)) <= 0;
+}
+
+typedef struct {
+ UProperty prop;
+ int32_t value;
+} IntPropertyContext;
+
+static UBool intPropertyFilter(UChar32 ch, void* context) {
+ IntPropertyContext* c = (IntPropertyContext*)context;
+ return u_getIntPropertyValue((UChar32) ch, c->prop) == c->value;
+}
+
+static UBool scriptExtensionsFilter(UChar32 ch, void* context) {
+ return uscript_hasScript(ch, *(UScriptCode*)context);
+}
+
+} // namespace
+
+/**
+ * Generic filter-based scanning code for UCD property UnicodeSets.
+ */
+void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
+ void* context,
+ const UnicodeSet* inclusions,
+ UErrorCode &status) {
+ if (U_FAILURE(status)) return;
+
+ // Logically, walk through all Unicode characters, noting the start
+ // and end of each range for which filter.contain(c) is
+ // true. Add each range to a set.
+ //
+ // To improve performance, use an inclusions set which
+ // encodes information about character ranges that are known
+ // to have identical properties.
+ // inclusions contains the first characters of
+ // same-value ranges for the given property.
+
+ clear();
+
+ UChar32 startHasProperty = -1;
+ int32_t limitRange = inclusions->getRangeCount();
+
+ for (int j=0; j<limitRange; ++j) {
+ // get current range
+ UChar32 start = inclusions->getRangeStart(j);
+ UChar32 end = inclusions->getRangeEnd(j);
+
+ // for all the code points in the range, process
+ for (UChar32 ch = start; ch <= end; ++ch) {
+ // only add to this UnicodeSet on inflection points --
+ // where the hasProperty value changes to false
+ if ((*filter)(ch, context)) {
+ if (startHasProperty < 0) {
+ startHasProperty = ch;
+ }
+ } else if (startHasProperty >= 0) {
+ add(startHasProperty, ch-1);
+ startHasProperty = -1;
+ }
+ }
+ }
+ if (startHasProperty >= 0) {
+ add((UChar32)startHasProperty, (UChar32)0x10FFFF);
+ }
+ if (isBogus() && U_SUCCESS(status)) {
+ // We likely ran out of memory. AHHH!
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+}
+
+namespace {
+
+static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
+ /* Note: we use ' ' in compiler code page */
+ int32_t j = 0;
+ char ch;
+ --dstCapacity; /* make room for term. zero */
+ while ((ch = *src++) != 0) {
+ if (ch == ' ' && (j==0 || (j>0 && dst[j-1]==' '))) {
+ continue;
+ }
+ if (j >= dstCapacity) return FALSE;
+ dst[j++] = ch;
+ }
+ if (j > 0 && dst[j-1] == ' ') --j;
+ dst[j] = 0;
+ return TRUE;
+}
+
+} // namespace
+
+//----------------------------------------------------------------
+// Property set API
+//----------------------------------------------------------------
+
+#define FAIL(ec) UPRV_BLOCK_MACRO_BEGIN { \
+ ec=U_ILLEGAL_ARGUMENT_ERROR; \
+ return *this; \
+} UPRV_BLOCK_MACRO_END
+
+UnicodeSet&
+UnicodeSet::applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode& ec) {
+ if (U_FAILURE(ec) || isFrozen()) { return *this; }
+ if (prop == UCHAR_GENERAL_CATEGORY_MASK) {
+ const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec);
+ applyFilter(generalCategoryMaskFilter, &value, inclusions, ec);
+ } else if (prop == UCHAR_SCRIPT_EXTENSIONS) {
+ const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec);
+ UScriptCode script = (UScriptCode)value;
+ applyFilter(scriptExtensionsFilter, &script, inclusions, ec);
+ } else if (0 <= prop && prop < UCHAR_BINARY_LIMIT) {
+ if (value == 0 || value == 1) {
+ const USet *set = u_getBinaryPropertySet(prop, &ec);
+ if (U_FAILURE(ec)) { return *this; }
+ copyFrom(*UnicodeSet::fromUSet(set), TRUE);
+ if (value == 0) {
+ complement();
+ }
+ } else {
+ clear();
+ }
+ } else if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) {
+ const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec);
+ IntPropertyContext c = {prop, value};
+ applyFilter(intPropertyFilter, &c, inclusions, ec);
+ } else {
+ ec = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return *this;
+}
+
+UnicodeSet&
+UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
+ const UnicodeString& value,
+ UErrorCode& ec) {
+ if (U_FAILURE(ec) || isFrozen()) return *this;
+
+ // prop and value used to be converted to char * using the default
+ // converter instead of the invariant conversion.
+ // This should not be necessary because all Unicode property and value
+ // names use only invariant characters.
+ // If there are any variant characters, then we won't find them anyway.
+ // Checking first avoids assertion failures in the conversion.
+ if( !uprv_isInvariantUString(prop.getBuffer(), prop.length()) ||
+ !uprv_isInvariantUString(value.getBuffer(), value.length())
+ ) {
+ FAIL(ec);
+ }
+ CharString pname, vname;
+ pname.appendInvariantChars(prop, ec);
+ vname.appendInvariantChars(value, ec);
+ if (U_FAILURE(ec)) return *this;
+
+ UProperty p;
+ int32_t v;
+ UBool invert = FALSE;
+
+ if (value.length() > 0) {
+ p = u_getPropertyEnum(pname.data());
+ if (p == UCHAR_INVALID_CODE) FAIL(ec);
+
+ // Treat gc as gcm
+ if (p == UCHAR_GENERAL_CATEGORY) {
+ p = UCHAR_GENERAL_CATEGORY_MASK;
+ }
+
+ if ((p >= UCHAR_BINARY_START && p < UCHAR_BINARY_LIMIT) ||
+ (p >= UCHAR_INT_START && p < UCHAR_INT_LIMIT) ||
+ (p >= UCHAR_MASK_START && p < UCHAR_MASK_LIMIT)) {
+ v = u_getPropertyValueEnum(p, vname.data());
+ if (v == UCHAR_INVALID_CODE) {
+ // Handle numeric CCC
+ if (p == UCHAR_CANONICAL_COMBINING_CLASS ||
+ p == UCHAR_TRAIL_CANONICAL_COMBINING_CLASS ||
+ p == UCHAR_LEAD_CANONICAL_COMBINING_CLASS) {
+ char* end;
+ double val = uprv_strtod(vname.data(), &end);
+ // Anything between 0 and 255 is valid even if unused.
+ // Cast double->int only after range check.
+ // We catch NaN here because comparing it with both 0 and 255 will be false
+ // (as are all comparisons with NaN).
+ if (*end != 0 || !(0 <= val && val <= 255) ||
+ (v = (int32_t)val) != val) {
+ // non-integral value or outside 0..255, or trailing junk
+ FAIL(ec);
+ }
+ } else {
+ FAIL(ec);
+ }
+ }
+ }
+
+ else {
+
+ switch (p) {
+ case UCHAR_NUMERIC_VALUE:
+ {
+ char* end;
+ double val = uprv_strtod(vname.data(), &end);
+ if (*end != 0) {
+ FAIL(ec);
+ }
+ applyFilter(numericValueFilter, &val,
+ CharacterProperties::getInclusionsForProperty(p, ec), ec);
+ return *this;
+ }
+ case UCHAR_NAME:
+ {
+ // Must munge name, since u_charFromName() does not do
+ // 'loose' matching.
+ char buf[128]; // it suffices that this be > uprv_getMaxCharNameLength
+ if (!mungeCharName(buf, vname.data(), sizeof(buf))) FAIL(ec);
+ UChar32 ch = u_charFromName(U_EXTENDED_CHAR_NAME, buf, &ec);
+ if (U_SUCCESS(ec)) {
+ clear();
+ add(ch);
+ return *this;
+ } else {
+ FAIL(ec);
+ }
+ }
+ case UCHAR_UNICODE_1_NAME:
+ // ICU 49 deprecates the Unicode_1_Name property APIs.
+ FAIL(ec);
+ case UCHAR_AGE:
+ {
+ // Must munge name, since u_versionFromString() does not do
+ // 'loose' matching.
+ char buf[128];
+ if (!mungeCharName(buf, vname.data(), sizeof(buf))) FAIL(ec);
+ UVersionInfo version;
+ u_versionFromString(version, buf);
+ applyFilter(versionFilter, &version,
+ CharacterProperties::getInclusionsForProperty(p, ec), ec);
+ return *this;
+ }
+ case UCHAR_SCRIPT_EXTENSIONS:
+ v = u_getPropertyValueEnum(UCHAR_SCRIPT, vname.data());
+ if (v == UCHAR_INVALID_CODE) {
+ FAIL(ec);
+ }
+ // fall through to calling applyIntPropertyValue()
+ break;
+ default:
+ // p is a non-binary, non-enumerated property that we
+ // don't support (yet).
+ FAIL(ec);
+ }
+ }
+ }
+
+ else {
+ // value is empty. Interpret as General Category, Script, or
+ // Binary property.
+ p = UCHAR_GENERAL_CATEGORY_MASK;
+ v = u_getPropertyValueEnum(p, pname.data());
+ if (v == UCHAR_INVALID_CODE) {
+ p = UCHAR_SCRIPT;
+ v = u_getPropertyValueEnum(p, pname.data());
+ if (v == UCHAR_INVALID_CODE) {
+ p = u_getPropertyEnum(pname.data());
+ if (p >= UCHAR_BINARY_START && p < UCHAR_BINARY_LIMIT) {
+ v = 1;
+ } else if (0 == uprv_comparePropertyNames(ANY, pname.data())) {
+ set(MIN_VALUE, MAX_VALUE);
+ return *this;
+ } else if (0 == uprv_comparePropertyNames(ASCII, pname.data())) {
+ set(0, 0x7F);
+ return *this;
+ } else if (0 == uprv_comparePropertyNames(ASSIGNED, pname.data())) {
+ // [:Assigned:]=[:^Cn:]
+ p = UCHAR_GENERAL_CATEGORY_MASK;
+ v = U_GC_CN_MASK;
+ invert = TRUE;
+ } else {
+ FAIL(ec);
+ }
+ }
+ }
+ }
+
+ applyIntPropertyValue(p, v, ec);
+ if(invert) {
+ complement();
+ }
+
+ if (isBogus() && U_SUCCESS(ec)) {
+ // We likely ran out of memory. AHHH!
+ ec = U_MEMORY_ALLOCATION_ERROR;
+ }
+ return *this;
+}
+
+//----------------------------------------------------------------
+// Property set patterns
+//----------------------------------------------------------------
+
+/**
+ * Return true if the given position, in the given pattern, appears
+ * to be the start of a property set pattern.
+ */
+UBool UnicodeSet::resemblesPropertyPattern(const UnicodeString& pattern,
+ int32_t pos) {
+ // Patterns are at least 5 characters long
+ if ((pos+5) > pattern.length()) {
+ return FALSE;
+ }
+
+ // Look for an opening [:, [:^, \p, or \P
+ return isPOSIXOpen(pattern, pos) || isPerlOpen(pattern, pos) || isNameOpen(pattern, pos);
+}
+
+/**
+ * Return true if the given iterator appears to point at a
+ * property pattern. Regardless of the result, return with the
+ * iterator unchanged.
+ * @param chars iterator over the pattern characters. Upon return
+ * it will be unchanged.
+ * @param iterOpts RuleCharacterIterator options
+ */
+UBool UnicodeSet::resemblesPropertyPattern(RuleCharacterIterator& chars,
+ int32_t iterOpts) {
+ // NOTE: literal will always be FALSE, because we don't parse escapes.
+ UBool result = FALSE, literal;
+ UErrorCode ec = U_ZERO_ERROR;
+ iterOpts &= ~RuleCharacterIterator::PARSE_ESCAPES;
+ RuleCharacterIterator::Pos pos;
+ chars.getPos(pos);
+ UChar32 c = chars.next(iterOpts, literal, ec);
+ if (c == 0x5B /*'['*/ || c == 0x5C /*'\\'*/) {
+ UChar32 d = chars.next(iterOpts & ~RuleCharacterIterator::SKIP_WHITESPACE,
+ literal, ec);
+ result = (c == 0x5B /*'['*/) ? (d == 0x3A /*':'*/) :
+ (d == 0x4E /*'N'*/ || d == 0x70 /*'p'*/ || d == 0x50 /*'P'*/);
+ }
+ chars.setPos(pos);
+ return result && U_SUCCESS(ec);
+}
+
+/**
+ * Parse the given property pattern at the given parse position.
+ */
+UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern,
+ ParsePosition& ppos,
+ UErrorCode &ec) {
+ int32_t pos = ppos.getIndex();
+
+ UBool posix = FALSE; // true for [:pat:], false for \p{pat} \P{pat} \N{pat}
+ UBool isName = FALSE; // true for \N{pat}, o/w false
+ UBool invert = FALSE;
+
+ if (U_FAILURE(ec)) return *this;
+
+ // Minimum length is 5 characters, e.g. \p{L}
+ if ((pos+5) > pattern.length()) {
+ FAIL(ec);
+ }
+
+ // On entry, ppos should point to one of the following locations:
+ // Look for an opening [:, [:^, \p, or \P
+ if (isPOSIXOpen(pattern, pos)) {
+ posix = TRUE;
+ pos += 2;
+ pos = ICU_Utility::skipWhitespace(pattern, pos);
+ if (pos < pattern.length() && pattern.charAt(pos) == COMPLEMENT) {
+ ++pos;
+ invert = TRUE;
+ }
+ } else if (isPerlOpen(pattern, pos) || isNameOpen(pattern, pos)) {
+ UChar c = pattern.charAt(pos+1);
+ invert = (c == UPPER_P);
+ isName = (c == UPPER_N);
+ pos += 2;
+ pos = ICU_Utility::skipWhitespace(pattern, pos);
+ if (pos == pattern.length() || pattern.charAt(pos++) != OPEN_BRACE) {
+ // Syntax error; "\p" or "\P" not followed by "{"
+ FAIL(ec);
+ }
+ } else {
+ // Open delimiter not seen
+ FAIL(ec);
+ }
+
+ // Look for the matching close delimiter, either :] or }
+ int32_t close;
+ if (posix) {
+ close = pattern.indexOf(POSIX_CLOSE, 2, pos);
+ } else {
+ close = pattern.indexOf(CLOSE_BRACE, pos);
+ }
+ if (close < 0) {
+ // Syntax error; close delimiter missing
+ FAIL(ec);
+ }
+
+ // Look for an '=' sign. If this is present, we will parse a
+ // medium \p{gc=Cf} or long \p{GeneralCategory=Format}
+ // pattern.
+ int32_t equals = pattern.indexOf(EQUALS, pos);
+ UnicodeString propName, valueName;
+ if (equals >= 0 && equals < close && !isName) {
+ // Equals seen; parse medium/long pattern
+ pattern.extractBetween(pos, equals, propName);
+ pattern.extractBetween(equals+1, close, valueName);
+ }
+
+ else {
+ // Handle case where no '=' is seen, and \N{}
+ pattern.extractBetween(pos, close, propName);
+
+ // Handle \N{name}
+ if (isName) {
+ // This is a little inefficient since it means we have to
+ // parse NAME_PROP back to UCHAR_NAME even though we already
+ // know it's UCHAR_NAME. If we refactor the API to
+ // support args of (UProperty, char*) then we can remove
+ // NAME_PROP and make this a little more efficient.
+ valueName = propName;
+ propName = UnicodeString(NAME_PROP, NAME_PROP_LENGTH, US_INV);
+ }
+ }
+
+ applyPropertyAlias(propName, valueName, ec);
+
+ if (U_SUCCESS(ec)) {
+ if (invert) {
+ complement();
+ }
+
+ // Move to the limit position after the close delimiter if the
+ // parse succeeded.
+ ppos.setIndex(close + (posix ? 2 : 1));
+ }
+
+ return *this;
+}
+
+/**
+ * Parse a property pattern.
+ * @param chars iterator over the pattern characters. Upon return
+ * it will be advanced to the first character after the parsed
+ * pattern, or the end of the iteration if all characters are
+ * parsed.
+ * @param rebuiltPat the pattern that was parsed, rebuilt or
+ * copied from the input pattern, as appropriate.
+ */
+void UnicodeSet::applyPropertyPattern(RuleCharacterIterator& chars,
+ UnicodeString& rebuiltPat,
+ UErrorCode& ec) {
+ if (U_FAILURE(ec)) return;
+ UnicodeString pattern;
+ chars.lookahead(pattern);
+ ParsePosition pos(0);
+ applyPropertyPattern(pattern, pos, ec);
+ if (U_FAILURE(ec)) return;
+ if (pos.getIndex() == 0) {
+ // syntaxError(chars, "Invalid property pattern");
+ ec = U_MALFORMED_SET;
+ return;
+ }
+ chars.jumpahead(pos.getIndex());
+ rebuiltPat.append(pattern, 0, pos.getIndex());
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/unisetspan.cpp b/thirdparty/icu4c/common/unisetspan.cpp
new file mode 100644
index 0000000000..68e44d91ee
--- /dev/null
+++ b/thirdparty/icu4c/common/unisetspan.cpp
@@ -0,0 +1,1509 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2007-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: unisetspan.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2007mar01
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/uniset.h"
+#include "unicode/ustring.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+#include "cmemory.h"
+#include "uvector.h"
+#include "unisetspan.h"
+
+U_NAMESPACE_BEGIN
+
+/*
+ * List of offsets from the current position from where to try matching
+ * a code point or a string.
+ * Store offsets rather than indexes to simplify the code and use the same list
+ * for both increments (in span()) and decrements (in spanBack()).
+ *
+ * Assumption: The maximum offset is limited, and the offsets that are stored
+ * at any one time are relatively dense, that is, there are normally no gaps of
+ * hundreds or thousands of offset values.
+ *
+ * The implementation uses a circular buffer of byte flags,
+ * each indicating whether the corresponding offset is in the list.
+ * This avoids inserting into a sorted list of offsets (or absolute indexes) and
+ * physically moving part of the list.
+ *
+ * Note: In principle, the caller should setMaxLength() to the maximum of the
+ * max string length and U16_LENGTH/U8_LENGTH to account for
+ * "long" single code points.
+ * However, this implementation uses at least a staticList with more than
+ * U8_LENGTH entries anyway.
+ *
+ * Note: If maxLength were guaranteed to be no more than 32 or 64,
+ * the list could be stored as bit flags in a single integer.
+ * Rather than handling a circular buffer with a start list index,
+ * the integer would simply be shifted when lower offsets are removed.
+ * UnicodeSet does not have a limit on the lengths of strings.
+ */
+class OffsetList { // Only ever stack-allocated, does not need to inherit UMemory.
+public:
+ OffsetList() : list(staticList), capacity(0), length(0), start(0) {}
+
+ ~OffsetList() {
+ if(list!=staticList) {
+ uprv_free(list);
+ }
+ }
+
+ // Call exactly once if the list is to be used.
+ void setMaxLength(int32_t maxLength) {
+ if(maxLength<=(int32_t)sizeof(staticList)) {
+ capacity=(int32_t)sizeof(staticList);
+ } else {
+ UBool *l=(UBool *)uprv_malloc(maxLength);
+ if(l!=NULL) {
+ list=l;
+ capacity=maxLength;
+ }
+ }
+ uprv_memset(list, 0, capacity);
+ }
+
+ void clear() {
+ uprv_memset(list, 0, capacity);
+ start=length=0;
+ }
+
+ UBool isEmpty() const {
+ return (UBool)(length==0);
+ }
+
+ // Reduce all stored offsets by delta, used when the current position
+ // moves by delta.
+ // There must not be any offsets lower than delta.
+ // If there is an offset equal to delta, it is removed.
+ // delta=[1..maxLength]
+ void shift(int32_t delta) {
+ int32_t i=start+delta;
+ if(i>=capacity) {
+ i-=capacity;
+ }
+ if(list[i]) {
+ list[i]=FALSE;
+ --length;
+ }
+ start=i;
+ }
+
+ // Add an offset. The list must not contain it yet.
+ // offset=[1..maxLength]
+ void addOffset(int32_t offset) {
+ int32_t i=start+offset;
+ if(i>=capacity) {
+ i-=capacity;
+ }
+ list[i]=TRUE;
+ ++length;
+ }
+
+ // offset=[1..maxLength]
+ UBool containsOffset(int32_t offset) const {
+ int32_t i=start+offset;
+ if(i>=capacity) {
+ i-=capacity;
+ }
+ return list[i];
+ }
+
+ // Find the lowest stored offset from a non-empty list, remove it,
+ // and reduce all other offsets by this minimum.
+ // Returns [1..maxLength].
+ int32_t popMinimum() {
+ // Look for the next offset in list[start+1..capacity-1].
+ int32_t i=start, result;
+ while(++i<capacity) {
+ if(list[i]) {
+ list[i]=FALSE;
+ --length;
+ result=i-start;
+ start=i;
+ return result;
+ }
+ }
+ // i==capacity
+
+ // Wrap around and look for the next offset in list[0..start].
+ // Since the list is not empty, there will be one.
+ result=capacity-start;
+ i=0;
+ while(!list[i]) {
+ ++i;
+ }
+ list[i]=FALSE;
+ --length;
+ start=i;
+ return result+=i;
+ }
+
+private:
+ UBool *list;
+ int32_t capacity;
+ int32_t length;
+ int32_t start;
+
+ UBool staticList[16];
+};
+
+// Get the number of UTF-8 bytes for a UTF-16 (sub)string.
+static int32_t
+getUTF8Length(const UChar *s, int32_t length) {
+ UErrorCode errorCode=U_ZERO_ERROR;
+ int32_t length8=0;
+ u_strToUTF8(NULL, 0, &length8, s, length, &errorCode);
+ if(U_SUCCESS(errorCode) || errorCode==U_BUFFER_OVERFLOW_ERROR) {
+ return length8;
+ } else {
+ // The string contains an unpaired surrogate.
+ // Ignore this string.
+ return 0;
+ }
+}
+
+// Append the UTF-8 version of the string to t and return the appended UTF-8 length.
+static int32_t
+appendUTF8(const UChar *s, int32_t length, uint8_t *t, int32_t capacity) {
+ UErrorCode errorCode=U_ZERO_ERROR;
+ int32_t length8=0;
+ u_strToUTF8((char *)t, capacity, &length8, s, length, &errorCode);
+ if(U_SUCCESS(errorCode)) {
+ return length8;
+ } else {
+ // The string contains an unpaired surrogate.
+ // Ignore this string.
+ return 0;
+ }
+}
+
+static inline uint8_t
+makeSpanLengthByte(int32_t spanLength) {
+ // 0xfe==UnicodeSetStringSpan::LONG_SPAN
+ return spanLength<0xfe ? (uint8_t)spanLength : (uint8_t)0xfe;
+}
+
+// Construct for all variants of span(), or only for any one variant.
+// Initialize as little as possible, for single use.
+UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set,
+ const UVector &setStrings,
+ uint32_t which)
+ : spanSet(0, 0x10ffff), pSpanNotSet(NULL), strings(setStrings),
+ utf8Lengths(NULL), spanLengths(NULL), utf8(NULL),
+ utf8Length(0),
+ maxLength16(0), maxLength8(0),
+ all((UBool)(which==ALL)) {
+ spanSet.retainAll(set);
+ if(which&NOT_CONTAINED) {
+ // Default to the same sets.
+ // addToSpanNotSet() will create a separate set if necessary.
+ pSpanNotSet=&spanSet;
+ }
+
+ // Determine if the strings even need to be taken into account at all for span() etc.
+ // If any string is relevant, then all strings need to be used for
+ // span(longest match) but only the relevant ones for span(while contained).
+ // TODO: Possible optimization: Distinguish CONTAINED vs. LONGEST_MATCH
+ // and do not store UTF-8 strings if !thisRelevant and CONTAINED.
+ // (Only store irrelevant UTF-8 strings for LONGEST_MATCH where they are relevant after all.)
+ // Also count the lengths of the UTF-8 versions of the strings for memory allocation.
+ int32_t stringsLength=strings.size();
+
+ int32_t i, spanLength;
+ UBool someRelevant=FALSE;
+ for(i=0; i<stringsLength; ++i) {
+ const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
+ const UChar *s16=string.getBuffer();
+ int32_t length16=string.length();
+ UBool thisRelevant;
+ spanLength=spanSet.span(s16, length16, USET_SPAN_CONTAINED);
+ if(spanLength<length16) { // Relevant string.
+ someRelevant=thisRelevant=TRUE;
+ } else {
+ thisRelevant=FALSE;
+ }
+ if((which&UTF16) && length16>maxLength16) {
+ maxLength16=length16;
+ }
+ if((which&UTF8) && (thisRelevant || (which&CONTAINED))) {
+ int32_t length8=getUTF8Length(s16, length16);
+ utf8Length+=length8;
+ if(length8>maxLength8) {
+ maxLength8=length8;
+ }
+ }
+ }
+ if(!someRelevant) {
+ maxLength16=maxLength8=0;
+ return;
+ }
+
+ // Freeze after checking for the need to use strings at all because freezing
+ // a set takes some time and memory which are wasted if there are no relevant strings.
+ if(all) {
+ spanSet.freeze();
+ }
+
+ uint8_t *spanBackLengths;
+ uint8_t *spanUTF8Lengths;
+ uint8_t *spanBackUTF8Lengths;
+
+ // Allocate a block of meta data.
+ int32_t allocSize;
+ if(all) {
+ // UTF-8 lengths, 4 sets of span lengths, UTF-8 strings.
+ allocSize=stringsLength*(4+1+1+1+1)+utf8Length;
+ } else {
+ allocSize=stringsLength; // One set of span lengths.
+ if(which&UTF8) {
+ // UTF-8 lengths and UTF-8 strings.
+ allocSize+=stringsLength*4+utf8Length;
+ }
+ }
+ if(allocSize<=(int32_t)sizeof(staticLengths)) {
+ utf8Lengths=staticLengths;
+ } else {
+ utf8Lengths=(int32_t *)uprv_malloc(allocSize);
+ if(utf8Lengths==NULL) {
+ maxLength16=maxLength8=0; // Prevent usage by making needsStringSpanUTF16/8() return FALSE.
+ return; // Out of memory.
+ }
+ }
+
+ if(all) {
+ // Store span lengths for all span() variants.
+ spanLengths=(uint8_t *)(utf8Lengths+stringsLength);
+ spanBackLengths=spanLengths+stringsLength;
+ spanUTF8Lengths=spanBackLengths+stringsLength;
+ spanBackUTF8Lengths=spanUTF8Lengths+stringsLength;
+ utf8=spanBackUTF8Lengths+stringsLength;
+ } else {
+ // Store span lengths for only one span() variant.
+ if(which&UTF8) {
+ spanLengths=(uint8_t *)(utf8Lengths+stringsLength);
+ utf8=spanLengths+stringsLength;
+ } else {
+ spanLengths=(uint8_t *)utf8Lengths;
+ }
+ spanBackLengths=spanUTF8Lengths=spanBackUTF8Lengths=spanLengths;
+ }
+
+ // Set the meta data and pSpanNotSet and write the UTF-8 strings.
+ int32_t utf8Count=0; // Count UTF-8 bytes written so far.
+
+ for(i=0; i<stringsLength; ++i) {
+ const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
+ const UChar *s16=string.getBuffer();
+ int32_t length16=string.length();
+ spanLength=spanSet.span(s16, length16, USET_SPAN_CONTAINED);
+ if(spanLength<length16) { // Relevant string.
+ if(which&UTF16) {
+ if(which&CONTAINED) {
+ if(which&FWD) {
+ spanLengths[i]=makeSpanLengthByte(spanLength);
+ }
+ if(which&BACK) {
+ spanLength=length16-spanSet.spanBack(s16, length16, USET_SPAN_CONTAINED);
+ spanBackLengths[i]=makeSpanLengthByte(spanLength);
+ }
+ } else /* not CONTAINED, not all, but NOT_CONTAINED */ {
+ spanLengths[i]=spanBackLengths[i]=0; // Only store a relevant/irrelevant flag.
+ }
+ }
+ if(which&UTF8) {
+ uint8_t *s8=utf8+utf8Count;
+ int32_t length8=appendUTF8(s16, length16, s8, utf8Length-utf8Count);
+ utf8Count+=utf8Lengths[i]=length8;
+ if(length8==0) { // Irrelevant for UTF-8 because not representable in UTF-8.
+ spanUTF8Lengths[i]=spanBackUTF8Lengths[i]=(uint8_t)ALL_CP_CONTAINED;
+ } else { // Relevant for UTF-8.
+ if(which&CONTAINED) {
+ if(which&FWD) {
+ spanLength=spanSet.spanUTF8((const char *)s8, length8, USET_SPAN_CONTAINED);
+ spanUTF8Lengths[i]=makeSpanLengthByte(spanLength);
+ }
+ if(which&BACK) {
+ spanLength=length8-spanSet.spanBackUTF8((const char *)s8, length8, USET_SPAN_CONTAINED);
+ spanBackUTF8Lengths[i]=makeSpanLengthByte(spanLength);
+ }
+ } else /* not CONTAINED, not all, but NOT_CONTAINED */ {
+ spanUTF8Lengths[i]=spanBackUTF8Lengths[i]=0; // Only store a relevant/irrelevant flag.
+ }
+ }
+ }
+ if(which&NOT_CONTAINED) {
+ // Add string start and end code points to the spanNotSet so that
+ // a span(while not contained) stops before any string.
+ UChar32 c;
+ if(which&FWD) {
+ int32_t len=0;
+ U16_NEXT(s16, len, length16, c);
+ addToSpanNotSet(c);
+ }
+ if(which&BACK) {
+ int32_t len=length16;
+ U16_PREV(s16, 0, len, c);
+ addToSpanNotSet(c);
+ }
+ }
+ } else { // Irrelevant string.
+ if(which&UTF8) {
+ if(which&CONTAINED) { // Only necessary for LONGEST_MATCH.
+ uint8_t *s8=utf8+utf8Count;
+ int32_t length8=appendUTF8(s16, length16, s8, utf8Length-utf8Count);
+ utf8Count+=utf8Lengths[i]=length8;
+ } else {
+ utf8Lengths[i]=0;
+ }
+ }
+ if(all) {
+ spanLengths[i]=spanBackLengths[i]=
+ spanUTF8Lengths[i]=spanBackUTF8Lengths[i]=
+ (uint8_t)ALL_CP_CONTAINED;
+ } else {
+ // All spanXYZLengths pointers contain the same address.
+ spanLengths[i]=(uint8_t)ALL_CP_CONTAINED;
+ }
+ }
+ }
+
+ // Finish.
+ if(all) {
+ pSpanNotSet->freeze();
+ }
+}
+
+// Copy constructor. Assumes which==ALL for a frozen set.
+UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSetStringSpan &otherStringSpan,
+ const UVector &newParentSetStrings)
+ : spanSet(otherStringSpan.spanSet), pSpanNotSet(NULL), strings(newParentSetStrings),
+ utf8Lengths(NULL), spanLengths(NULL), utf8(NULL),
+ utf8Length(otherStringSpan.utf8Length),
+ maxLength16(otherStringSpan.maxLength16), maxLength8(otherStringSpan.maxLength8),
+ all(TRUE) {
+ if(otherStringSpan.pSpanNotSet==&otherStringSpan.spanSet) {
+ pSpanNotSet=&spanSet;
+ } else {
+ pSpanNotSet=otherStringSpan.pSpanNotSet->clone();
+ }
+
+ // Allocate a block of meta data.
+ // UTF-8 lengths, 4 sets of span lengths, UTF-8 strings.
+ int32_t stringsLength=strings.size();
+ int32_t allocSize=stringsLength*(4+1+1+1+1)+utf8Length;
+ if(allocSize<=(int32_t)sizeof(staticLengths)) {
+ utf8Lengths=staticLengths;
+ } else {
+ utf8Lengths=(int32_t *)uprv_malloc(allocSize);
+ if(utf8Lengths==NULL) {
+ maxLength16=maxLength8=0; // Prevent usage by making needsStringSpanUTF16/8() return FALSE.
+ return; // Out of memory.
+ }
+ }
+
+ spanLengths=(uint8_t *)(utf8Lengths+stringsLength);
+ utf8=spanLengths+stringsLength*4;
+ uprv_memcpy(utf8Lengths, otherStringSpan.utf8Lengths, allocSize);
+}
+
+UnicodeSetStringSpan::~UnicodeSetStringSpan() {
+ if(pSpanNotSet!=NULL && pSpanNotSet!=&spanSet) {
+ delete pSpanNotSet;
+ }
+ if(utf8Lengths!=NULL && utf8Lengths!=staticLengths) {
+ uprv_free(utf8Lengths);
+ }
+}
+
+void UnicodeSetStringSpan::addToSpanNotSet(UChar32 c) {
+ if(pSpanNotSet==NULL || pSpanNotSet==&spanSet) {
+ if(spanSet.contains(c)) {
+ return; // Nothing to do.
+ }
+ UnicodeSet *newSet=spanSet.cloneAsThawed();
+ if(newSet==NULL) {
+ return; // Out of memory.
+ } else {
+ pSpanNotSet=newSet;
+ }
+ }
+ pSpanNotSet->add(c);
+}
+
+// Compare strings without any argument checks. Requires length>0.
+static inline UBool
+matches16(const UChar *s, const UChar *t, int32_t length) {
+ do {
+ if(*s++!=*t++) {
+ return FALSE;
+ }
+ } while(--length>0);
+ return TRUE;
+}
+
+static inline UBool
+matches8(const uint8_t *s, const uint8_t *t, int32_t length) {
+ do {
+ if(*s++!=*t++) {
+ return FALSE;
+ }
+ } while(--length>0);
+ return TRUE;
+}
+
+// Compare 16-bit Unicode strings (which may be malformed UTF-16)
+// at code point boundaries.
+// That is, each edge of a match must not be in the middle of a surrogate pair.
+static inline UBool
+matches16CPB(const UChar *s, int32_t start, int32_t limit, const UChar *t, int32_t length) {
+ s+=start;
+ limit-=start;
+ return matches16(s, t, length) &&
+ !(0<start && U16_IS_LEAD(s[-1]) && U16_IS_TRAIL(s[0])) &&
+ !(length<limit && U16_IS_LEAD(s[length-1]) && U16_IS_TRAIL(s[length]));
+}
+
+// Does the set contain the next code point?
+// If so, return its length; otherwise return its negative length.
+static inline int32_t
+spanOne(const UnicodeSet &set, const UChar *s, int32_t length) {
+ UChar c=*s, c2;
+ if(c>=0xd800 && c<=0xdbff && length>=2 && U16_IS_TRAIL(c2=s[1])) {
+ return set.contains(U16_GET_SUPPLEMENTARY(c, c2)) ? 2 : -2;
+ }
+ return set.contains(c) ? 1 : -1;
+}
+
+static inline int32_t
+spanOneBack(const UnicodeSet &set, const UChar *s, int32_t length) {
+ UChar c=s[length-1], c2;
+ if(c>=0xdc00 && c<=0xdfff && length>=2 && U16_IS_LEAD(c2=s[length-2])) {
+ return set.contains(U16_GET_SUPPLEMENTARY(c2, c)) ? 2 : -2;
+ }
+ return set.contains(c) ? 1 : -1;
+}
+
+static inline int32_t
+spanOneUTF8(const UnicodeSet &set, const uint8_t *s, int32_t length) {
+ UChar32 c=*s;
+ if(U8_IS_SINGLE(c)) {
+ return set.contains(c) ? 1 : -1;
+ }
+ // Take advantage of non-ASCII fastpaths in U8_NEXT_OR_FFFD().
+ int32_t i=0;
+ U8_NEXT_OR_FFFD(s, i, length, c);
+ return set.contains(c) ? i : -i;
+}
+
+static inline int32_t
+spanOneBackUTF8(const UnicodeSet &set, const uint8_t *s, int32_t length) {
+ UChar32 c=s[length-1];
+ if(U8_IS_SINGLE(c)) {
+ return set.contains(c) ? 1 : -1;
+ }
+ int32_t i=length-1;
+ c=utf8_prevCharSafeBody(s, 0, &i, c, -3);
+ length-=i;
+ return set.contains(c) ? length : -length;
+}
+
+/*
+ * Note: In span() when spanLength==0 (after a string match, or at the beginning
+ * after an empty code point span) and in spanNot() and spanNotUTF8(),
+ * string matching could use a binary search
+ * because all string matches are done from the same start index.
+ *
+ * For UTF-8, this would require a comparison function that returns UTF-16 order.
+ *
+ * This optimization should not be necessary for normal UnicodeSets because
+ * most sets have no strings, and most sets with strings have
+ * very few very short strings.
+ * For cases with many strings, it might be better to use a different API
+ * and implementation with a DFA (state machine).
+ */
+
+/*
+ * Algorithm for span(USET_SPAN_CONTAINED)
+ *
+ * Theoretical algorithm:
+ * - Iterate through the string, and at each code point boundary:
+ * + If the code point there is in the set, then remember to continue after it.
+ * + If a set string matches at the current position, then remember to continue after it.
+ * + Either recursively span for each code point or string match,
+ * or recursively span for all but the shortest one and
+ * iteratively continue the span with the shortest local match.
+ * + Remember the longest recursive span (the farthest end point).
+ * + If there is no match at the current position, neither for the code point there
+ * nor for any set string, then stop and return the longest recursive span length.
+ *
+ * Optimized implementation:
+ *
+ * (We assume that most sets will have very few very short strings.
+ * A span using a string-less set is extremely fast.)
+ *
+ * Create and cache a spanSet which contains all of the single code points
+ * of the original set but none of its strings.
+ *
+ * - Start with spanLength=spanSet.span(USET_SPAN_CONTAINED).
+ * - Loop:
+ * + Try to match each set string at the end of the spanLength.
+ * ~ Set strings that start with set-contained code points must be matched
+ * with a partial overlap because the recursive algorithm would have tried
+ * to match them at every position.
+ * ~ Set strings that entirely consist of set-contained code points
+ * are irrelevant for span(USET_SPAN_CONTAINED) because the
+ * recursive algorithm would continue after them anyway
+ * and find the longest recursive match from their end.
+ * ~ Rather than recursing, note each end point of a set string match.
+ * + If no set string matched after spanSet.span(), then return
+ * with where the spanSet.span() ended.
+ * + If at least one set string matched after spanSet.span(), then
+ * pop the shortest string match end point and continue
+ * the loop, trying to match all set strings from there.
+ * + If at least one more set string matched after a previous string match,
+ * then test if the code point after the previous string match is also
+ * contained in the set.
+ * Continue the loop with the shortest end point of either this code point
+ * or a matching set string.
+ * + If no more set string matched after a previous string match,
+ * then try another spanLength=spanSet.span(USET_SPAN_CONTAINED).
+ * Stop if spanLength==0, otherwise continue the loop.
+ *
+ * By noting each end point of a set string match,
+ * the function visits each string position at most once and finishes
+ * in linear time.
+ *
+ * The recursive algorithm may visit the same string position many times
+ * if multiple paths lead to it and finishes in exponential time.
+ */
+
+/*
+ * Algorithm for span(USET_SPAN_SIMPLE)
+ *
+ * Theoretical algorithm:
+ * - Iterate through the string, and at each code point boundary:
+ * + If the code point there is in the set, then remember to continue after it.
+ * + If a set string matches at the current position, then remember to continue after it.
+ * + Continue from the farthest match position and ignore all others.
+ * + If there is no match at the current position,
+ * then stop and return the current position.
+ *
+ * Optimized implementation:
+ *
+ * (Same assumption and spanSet as above.)
+ *
+ * - Start with spanLength=spanSet.span(USET_SPAN_CONTAINED).
+ * - Loop:
+ * + Try to match each set string at the end of the spanLength.
+ * ~ Set strings that start with set-contained code points must be matched
+ * with a partial overlap because the standard algorithm would have tried
+ * to match them earlier.
+ * ~ Set strings that entirely consist of set-contained code points
+ * must be matched with a full overlap because the longest-match algorithm
+ * would hide set string matches that end earlier.
+ * Such set strings need not be matched earlier inside the code point span
+ * because the standard algorithm would then have continued after
+ * the set string match anyway.
+ * ~ Remember the longest set string match (farthest end point) from the earliest
+ * starting point.
+ * + If no set string matched after spanSet.span(), then return
+ * with where the spanSet.span() ended.
+ * + If at least one set string matched, then continue the loop after the
+ * longest match from the earliest position.
+ * + If no more set string matched after a previous string match,
+ * then try another spanLength=spanSet.span(USET_SPAN_CONTAINED).
+ * Stop if spanLength==0, otherwise continue the loop.
+ */
+
+int32_t UnicodeSetStringSpan::span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const {
+ if(spanCondition==USET_SPAN_NOT_CONTAINED) {
+ return spanNot(s, length);
+ }
+ int32_t spanLength=spanSet.span(s, length, USET_SPAN_CONTAINED);
+ if(spanLength==length) {
+ return length;
+ }
+
+ // Consider strings; they may overlap with the span.
+ OffsetList offsets;
+ if(spanCondition==USET_SPAN_CONTAINED) {
+ // Use offset list to try all possibilities.
+ offsets.setMaxLength(maxLength16);
+ }
+ int32_t pos=spanLength, rest=length-pos;
+ int32_t i, stringsLength=strings.size();
+ for(;;) {
+ if(spanCondition==USET_SPAN_CONTAINED) {
+ for(i=0; i<stringsLength; ++i) {
+ int32_t overlap=spanLengths[i];
+ if(overlap==ALL_CP_CONTAINED) {
+ continue; // Irrelevant string.
+ }
+ const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
+ const UChar *s16=string.getBuffer();
+ int32_t length16=string.length();
+
+ // Try to match this string at pos-overlap..pos.
+ if(overlap>=LONG_SPAN) {
+ overlap=length16;
+ // While contained: No point matching fully inside the code point span.
+ U16_BACK_1(s16, 0, overlap); // Length of the string minus the last code point.
+ }
+ if(overlap>spanLength) {
+ overlap=spanLength;
+ }
+ int32_t inc=length16-overlap; // Keep overlap+inc==length16.
+ for(;;) {
+ if(inc>rest) {
+ break;
+ }
+ // Try to match if the increment is not listed already.
+ if(!offsets.containsOffset(inc) && matches16CPB(s, pos-overlap, length, s16, length16)) {
+ if(inc==rest) {
+ return length; // Reached the end of the string.
+ }
+ offsets.addOffset(inc);
+ }
+ if(overlap==0) {
+ break;
+ }
+ --overlap;
+ ++inc;
+ }
+ }
+ } else /* USET_SPAN_SIMPLE */ {
+ int32_t maxInc=0, maxOverlap=0;
+ for(i=0; i<stringsLength; ++i) {
+ int32_t overlap=spanLengths[i];
+ // For longest match, we do need to try to match even an all-contained string
+ // to find the match from the earliest start.
+
+ const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
+ const UChar *s16=string.getBuffer();
+ int32_t length16=string.length();
+
+ // Try to match this string at pos-overlap..pos.
+ if(overlap>=LONG_SPAN) {
+ overlap=length16;
+ // Longest match: Need to match fully inside the code point span
+ // to find the match from the earliest start.
+ }
+ if(overlap>spanLength) {
+ overlap=spanLength;
+ }
+ int32_t inc=length16-overlap; // Keep overlap+inc==length16.
+ for(;;) {
+ if(inc>rest || overlap<maxOverlap) {
+ break;
+ }
+ // Try to match if the string is longer or starts earlier.
+ if( (overlap>maxOverlap || /* redundant overlap==maxOverlap && */ inc>maxInc) &&
+ matches16CPB(s, pos-overlap, length, s16, length16)
+ ) {
+ maxInc=inc; // Longest match from earliest start.
+ maxOverlap=overlap;
+ break;
+ }
+ --overlap;
+ ++inc;
+ }
+ }
+
+ if(maxInc!=0 || maxOverlap!=0) {
+ // Longest-match algorithm, and there was a string match.
+ // Simply continue after it.
+ pos+=maxInc;
+ rest-=maxInc;
+ if(rest==0) {
+ return length; // Reached the end of the string.
+ }
+ spanLength=0; // Match strings from after a string match.
+ continue;
+ }
+ }
+ // Finished trying to match all strings at pos.
+
+ if(spanLength!=0 || pos==0) {
+ // The position is after an unlimited code point span (spanLength!=0),
+ // not after a string match.
+ // The only position where spanLength==0 after a span is pos==0.
+ // Otherwise, an unlimited code point span is only tried again when no
+ // strings match, and if such a non-initial span fails we stop.
+ if(offsets.isEmpty()) {
+ return pos; // No strings matched after a span.
+ }
+ // Match strings from after the next string match.
+ } else {
+ // The position is after a string match (or a single code point).
+ if(offsets.isEmpty()) {
+ // No more strings matched after a previous string match.
+ // Try another code point span from after the last string match.
+ spanLength=spanSet.span(s+pos, rest, USET_SPAN_CONTAINED);
+ if( spanLength==rest || // Reached the end of the string, or
+ spanLength==0 // neither strings nor span progressed.
+ ) {
+ return pos+spanLength;
+ }
+ pos+=spanLength;
+ rest-=spanLength;
+ continue; // spanLength>0: Match strings from after a span.
+ } else {
+ // Try to match only one code point from after a string match if some
+ // string matched beyond it, so that we try all possible positions
+ // and don't overshoot.
+ spanLength=spanOne(spanSet, s+pos, rest);
+ if(spanLength>0) {
+ if(spanLength==rest) {
+ return length; // Reached the end of the string.
+ }
+ // Match strings after this code point.
+ // There cannot be any increments below it because UnicodeSet strings
+ // contain multiple code points.
+ pos+=spanLength;
+ rest-=spanLength;
+ offsets.shift(spanLength);
+ spanLength=0;
+ continue; // Match strings from after a single code point.
+ }
+ // Match strings from after the next string match.
+ }
+ }
+ int32_t minOffset=offsets.popMinimum();
+ pos+=minOffset;
+ rest-=minOffset;
+ spanLength=0; // Match strings from after a string match.
+ }
+}
+
+int32_t UnicodeSetStringSpan::spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const {
+ if(spanCondition==USET_SPAN_NOT_CONTAINED) {
+ return spanNotBack(s, length);
+ }
+ int32_t pos=spanSet.spanBack(s, length, USET_SPAN_CONTAINED);
+ if(pos==0) {
+ return 0;
+ }
+ int32_t spanLength=length-pos;
+
+ // Consider strings; they may overlap with the span.
+ OffsetList offsets;
+ if(spanCondition==USET_SPAN_CONTAINED) {
+ // Use offset list to try all possibilities.
+ offsets.setMaxLength(maxLength16);
+ }
+ int32_t i, stringsLength=strings.size();
+ uint8_t *spanBackLengths=spanLengths;
+ if(all) {
+ spanBackLengths+=stringsLength;
+ }
+ for(;;) {
+ if(spanCondition==USET_SPAN_CONTAINED) {
+ for(i=0; i<stringsLength; ++i) {
+ int32_t overlap=spanBackLengths[i];
+ if(overlap==ALL_CP_CONTAINED) {
+ continue; // Irrelevant string.
+ }
+ const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
+ const UChar *s16=string.getBuffer();
+ int32_t length16=string.length();
+
+ // Try to match this string at pos-(length16-overlap)..pos-length16.
+ if(overlap>=LONG_SPAN) {
+ overlap=length16;
+ // While contained: No point matching fully inside the code point span.
+ int32_t len1=0;
+ U16_FWD_1(s16, len1, overlap);
+ overlap-=len1; // Length of the string minus the first code point.
+ }
+ if(overlap>spanLength) {
+ overlap=spanLength;
+ }
+ int32_t dec=length16-overlap; // Keep dec+overlap==length16.
+ for(;;) {
+ if(dec>pos) {
+ break;
+ }
+ // Try to match if the decrement is not listed already.
+ if(!offsets.containsOffset(dec) && matches16CPB(s, pos-dec, length, s16, length16)) {
+ if(dec==pos) {
+ return 0; // Reached the start of the string.
+ }
+ offsets.addOffset(dec);
+ }
+ if(overlap==0) {
+ break;
+ }
+ --overlap;
+ ++dec;
+ }
+ }
+ } else /* USET_SPAN_SIMPLE */ {
+ int32_t maxDec=0, maxOverlap=0;
+ for(i=0; i<stringsLength; ++i) {
+ int32_t overlap=spanBackLengths[i];
+ // For longest match, we do need to try to match even an all-contained string
+ // to find the match from the latest end.
+
+ const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
+ const UChar *s16=string.getBuffer();
+ int32_t length16=string.length();
+
+ // Try to match this string at pos-(length16-overlap)..pos-length16.
+ if(overlap>=LONG_SPAN) {
+ overlap=length16;
+ // Longest match: Need to match fully inside the code point span
+ // to find the match from the latest end.
+ }
+ if(overlap>spanLength) {
+ overlap=spanLength;
+ }
+ int32_t dec=length16-overlap; // Keep dec+overlap==length16.
+ for(;;) {
+ if(dec>pos || overlap<maxOverlap) {
+ break;
+ }
+ // Try to match if the string is longer or ends later.
+ if( (overlap>maxOverlap || /* redundant overlap==maxOverlap && */ dec>maxDec) &&
+ matches16CPB(s, pos-dec, length, s16, length16)
+ ) {
+ maxDec=dec; // Longest match from latest end.
+ maxOverlap=overlap;
+ break;
+ }
+ --overlap;
+ ++dec;
+ }
+ }
+
+ if(maxDec!=0 || maxOverlap!=0) {
+ // Longest-match algorithm, and there was a string match.
+ // Simply continue before it.
+ pos-=maxDec;
+ if(pos==0) {
+ return 0; // Reached the start of the string.
+ }
+ spanLength=0; // Match strings from before a string match.
+ continue;
+ }
+ }
+ // Finished trying to match all strings at pos.
+
+ if(spanLength!=0 || pos==length) {
+ // The position is before an unlimited code point span (spanLength!=0),
+ // not before a string match.
+ // The only position where spanLength==0 before a span is pos==length.
+ // Otherwise, an unlimited code point span is only tried again when no
+ // strings match, and if such a non-initial span fails we stop.
+ if(offsets.isEmpty()) {
+ return pos; // No strings matched before a span.
+ }
+ // Match strings from before the next string match.
+ } else {
+ // The position is before a string match (or a single code point).
+ if(offsets.isEmpty()) {
+ // No more strings matched before a previous string match.
+ // Try another code point span from before the last string match.
+ int32_t oldPos=pos;
+ pos=spanSet.spanBack(s, oldPos, USET_SPAN_CONTAINED);
+ spanLength=oldPos-pos;
+ if( pos==0 || // Reached the start of the string, or
+ spanLength==0 // neither strings nor span progressed.
+ ) {
+ return pos;
+ }
+ continue; // spanLength>0: Match strings from before a span.
+ } else {
+ // Try to match only one code point from before a string match if some
+ // string matched beyond it, so that we try all possible positions
+ // and don't overshoot.
+ spanLength=spanOneBack(spanSet, s, pos);
+ if(spanLength>0) {
+ if(spanLength==pos) {
+ return 0; // Reached the start of the string.
+ }
+ // Match strings before this code point.
+ // There cannot be any decrements below it because UnicodeSet strings
+ // contain multiple code points.
+ pos-=spanLength;
+ offsets.shift(spanLength);
+ spanLength=0;
+ continue; // Match strings from before a single code point.
+ }
+ // Match strings from before the next string match.
+ }
+ }
+ pos-=offsets.popMinimum();
+ spanLength=0; // Match strings from before a string match.
+ }
+}
+
+int32_t UnicodeSetStringSpan::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
+ if(spanCondition==USET_SPAN_NOT_CONTAINED) {
+ return spanNotUTF8(s, length);
+ }
+ int32_t spanLength=spanSet.spanUTF8((const char *)s, length, USET_SPAN_CONTAINED);
+ if(spanLength==length) {
+ return length;
+ }
+
+ // Consider strings; they may overlap with the span.
+ OffsetList offsets;
+ if(spanCondition==USET_SPAN_CONTAINED) {
+ // Use offset list to try all possibilities.
+ offsets.setMaxLength(maxLength8);
+ }
+ int32_t pos=spanLength, rest=length-pos;
+ int32_t i, stringsLength=strings.size();
+ uint8_t *spanUTF8Lengths=spanLengths;
+ if(all) {
+ spanUTF8Lengths+=2*stringsLength;
+ }
+ for(;;) {
+ const uint8_t *s8=utf8;
+ int32_t length8;
+ if(spanCondition==USET_SPAN_CONTAINED) {
+ for(i=0; i<stringsLength; ++i) {
+ length8=utf8Lengths[i];
+ if(length8==0) {
+ continue; // String not representable in UTF-8.
+ }
+ int32_t overlap=spanUTF8Lengths[i];
+ if(overlap==ALL_CP_CONTAINED) {
+ s8+=length8;
+ continue; // Irrelevant string.
+ }
+
+ // Try to match this string at pos-overlap..pos.
+ if(overlap>=LONG_SPAN) {
+ overlap=length8;
+ // While contained: No point matching fully inside the code point span.
+ U8_BACK_1(s8, 0, overlap); // Length of the string minus the last code point.
+ }
+ if(overlap>spanLength) {
+ overlap=spanLength;
+ }
+ int32_t inc=length8-overlap; // Keep overlap+inc==length8.
+ for(;;) {
+ if(inc>rest) {
+ break;
+ }
+ // Try to match if the increment is not listed already.
+ // Match at code point boundaries. (The UTF-8 strings were converted
+ // from UTF-16 and are guaranteed to be well-formed.)
+ if(!U8_IS_TRAIL(s[pos-overlap]) &&
+ !offsets.containsOffset(inc) &&
+ matches8(s+pos-overlap, s8, length8)) {
+ if(inc==rest) {
+ return length; // Reached the end of the string.
+ }
+ offsets.addOffset(inc);
+ }
+ if(overlap==0) {
+ break;
+ }
+ --overlap;
+ ++inc;
+ }
+ s8+=length8;
+ }
+ } else /* USET_SPAN_SIMPLE */ {
+ int32_t maxInc=0, maxOverlap=0;
+ for(i=0; i<stringsLength; ++i) {
+ length8=utf8Lengths[i];
+ if(length8==0) {
+ continue; // String not representable in UTF-8.
+ }
+ int32_t overlap=spanUTF8Lengths[i];
+ // For longest match, we do need to try to match even an all-contained string
+ // to find the match from the earliest start.
+
+ // Try to match this string at pos-overlap..pos.
+ if(overlap>=LONG_SPAN) {
+ overlap=length8;
+ // Longest match: Need to match fully inside the code point span
+ // to find the match from the earliest start.
+ }
+ if(overlap>spanLength) {
+ overlap=spanLength;
+ }
+ int32_t inc=length8-overlap; // Keep overlap+inc==length8.
+ for(;;) {
+ if(inc>rest || overlap<maxOverlap) {
+ break;
+ }
+ // Try to match if the string is longer or starts earlier.
+ // Match at code point boundaries. (The UTF-8 strings were converted
+ // from UTF-16 and are guaranteed to be well-formed.)
+ if(!U8_IS_TRAIL(s[pos-overlap]) &&
+ (overlap>maxOverlap ||
+ /* redundant overlap==maxOverlap && */ inc>maxInc) &&
+ matches8(s+pos-overlap, s8, length8)) {
+ maxInc=inc; // Longest match from earliest start.
+ maxOverlap=overlap;
+ break;
+ }
+ --overlap;
+ ++inc;
+ }
+ s8+=length8;
+ }
+
+ if(maxInc!=0 || maxOverlap!=0) {
+ // Longest-match algorithm, and there was a string match.
+ // Simply continue after it.
+ pos+=maxInc;
+ rest-=maxInc;
+ if(rest==0) {
+ return length; // Reached the end of the string.
+ }
+ spanLength=0; // Match strings from after a string match.
+ continue;
+ }
+ }
+ // Finished trying to match all strings at pos.
+
+ if(spanLength!=0 || pos==0) {
+ // The position is after an unlimited code point span (spanLength!=0),
+ // not after a string match.
+ // The only position where spanLength==0 after a span is pos==0.
+ // Otherwise, an unlimited code point span is only tried again when no
+ // strings match, and if such a non-initial span fails we stop.
+ if(offsets.isEmpty()) {
+ return pos; // No strings matched after a span.
+ }
+ // Match strings from after the next string match.
+ } else {
+ // The position is after a string match (or a single code point).
+ if(offsets.isEmpty()) {
+ // No more strings matched after a previous string match.
+ // Try another code point span from after the last string match.
+ spanLength=spanSet.spanUTF8((const char *)s+pos, rest, USET_SPAN_CONTAINED);
+ if( spanLength==rest || // Reached the end of the string, or
+ spanLength==0 // neither strings nor span progressed.
+ ) {
+ return pos+spanLength;
+ }
+ pos+=spanLength;
+ rest-=spanLength;
+ continue; // spanLength>0: Match strings from after a span.
+ } else {
+ // Try to match only one code point from after a string match if some
+ // string matched beyond it, so that we try all possible positions
+ // and don't overshoot.
+ spanLength=spanOneUTF8(spanSet, s+pos, rest);
+ if(spanLength>0) {
+ if(spanLength==rest) {
+ return length; // Reached the end of the string.
+ }
+ // Match strings after this code point.
+ // There cannot be any increments below it because UnicodeSet strings
+ // contain multiple code points.
+ pos+=spanLength;
+ rest-=spanLength;
+ offsets.shift(spanLength);
+ spanLength=0;
+ continue; // Match strings from after a single code point.
+ }
+ // Match strings from after the next string match.
+ }
+ }
+ int32_t minOffset=offsets.popMinimum();
+ pos+=minOffset;
+ rest-=minOffset;
+ spanLength=0; // Match strings from after a string match.
+ }
+}
+
+int32_t UnicodeSetStringSpan::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
+ if(spanCondition==USET_SPAN_NOT_CONTAINED) {
+ return spanNotBackUTF8(s, length);
+ }
+ int32_t pos=spanSet.spanBackUTF8((const char *)s, length, USET_SPAN_CONTAINED);
+ if(pos==0) {
+ return 0;
+ }
+ int32_t spanLength=length-pos;
+
+ // Consider strings; they may overlap with the span.
+ OffsetList offsets;
+ if(spanCondition==USET_SPAN_CONTAINED) {
+ // Use offset list to try all possibilities.
+ offsets.setMaxLength(maxLength8);
+ }
+ int32_t i, stringsLength=strings.size();
+ uint8_t *spanBackUTF8Lengths=spanLengths;
+ if(all) {
+ spanBackUTF8Lengths+=3*stringsLength;
+ }
+ for(;;) {
+ const uint8_t *s8=utf8;
+ int32_t length8;
+ if(spanCondition==USET_SPAN_CONTAINED) {
+ for(i=0; i<stringsLength; ++i) {
+ length8=utf8Lengths[i];
+ if(length8==0) {
+ continue; // String not representable in UTF-8.
+ }
+ int32_t overlap=spanBackUTF8Lengths[i];
+ if(overlap==ALL_CP_CONTAINED) {
+ s8+=length8;
+ continue; // Irrelevant string.
+ }
+
+ // Try to match this string at pos-(length8-overlap)..pos-length8.
+ if(overlap>=LONG_SPAN) {
+ overlap=length8;
+ // While contained: No point matching fully inside the code point span.
+ int32_t len1=0;
+ U8_FWD_1(s8, len1, overlap);
+ overlap-=len1; // Length of the string minus the first code point.
+ }
+ if(overlap>spanLength) {
+ overlap=spanLength;
+ }
+ int32_t dec=length8-overlap; // Keep dec+overlap==length8.
+ for(;;) {
+ if(dec>pos) {
+ break;
+ }
+ // Try to match if the decrement is not listed already.
+ // Match at code point boundaries. (The UTF-8 strings were converted
+ // from UTF-16 and are guaranteed to be well-formed.)
+ if( !U8_IS_TRAIL(s[pos-dec]) &&
+ !offsets.containsOffset(dec) &&
+ matches8(s+pos-dec, s8, length8)
+ ) {
+ if(dec==pos) {
+ return 0; // Reached the start of the string.
+ }
+ offsets.addOffset(dec);
+ }
+ if(overlap==0) {
+ break;
+ }
+ --overlap;
+ ++dec;
+ }
+ s8+=length8;
+ }
+ } else /* USET_SPAN_SIMPLE */ {
+ int32_t maxDec=0, maxOverlap=0;
+ for(i=0; i<stringsLength; ++i) {
+ length8=utf8Lengths[i];
+ if(length8==0) {
+ continue; // String not representable in UTF-8.
+ }
+ int32_t overlap=spanBackUTF8Lengths[i];
+ // For longest match, we do need to try to match even an all-contained string
+ // to find the match from the latest end.
+
+ // Try to match this string at pos-(length8-overlap)..pos-length8.
+ if(overlap>=LONG_SPAN) {
+ overlap=length8;
+ // Longest match: Need to match fully inside the code point span
+ // to find the match from the latest end.
+ }
+ if(overlap>spanLength) {
+ overlap=spanLength;
+ }
+ int32_t dec=length8-overlap; // Keep dec+overlap==length8.
+ for(;;) {
+ if(dec>pos || overlap<maxOverlap) {
+ break;
+ }
+ // Try to match if the string is longer or ends later.
+ // Match at code point boundaries. (The UTF-8 strings were converted
+ // from UTF-16 and are guaranteed to be well-formed.)
+ if( !U8_IS_TRAIL(s[pos-dec]) &&
+ (overlap>maxOverlap || /* redundant overlap==maxOverlap && */ dec>maxDec) &&
+ matches8(s+pos-dec, s8, length8)
+ ) {
+ maxDec=dec; // Longest match from latest end.
+ maxOverlap=overlap;
+ break;
+ }
+ --overlap;
+ ++dec;
+ }
+ s8+=length8;
+ }
+
+ if(maxDec!=0 || maxOverlap!=0) {
+ // Longest-match algorithm, and there was a string match.
+ // Simply continue before it.
+ pos-=maxDec;
+ if(pos==0) {
+ return 0; // Reached the start of the string.
+ }
+ spanLength=0; // Match strings from before a string match.
+ continue;
+ }
+ }
+ // Finished trying to match all strings at pos.
+
+ if(spanLength!=0 || pos==length) {
+ // The position is before an unlimited code point span (spanLength!=0),
+ // not before a string match.
+ // The only position where spanLength==0 before a span is pos==length.
+ // Otherwise, an unlimited code point span is only tried again when no
+ // strings match, and if such a non-initial span fails we stop.
+ if(offsets.isEmpty()) {
+ return pos; // No strings matched before a span.
+ }
+ // Match strings from before the next string match.
+ } else {
+ // The position is before a string match (or a single code point).
+ if(offsets.isEmpty()) {
+ // No more strings matched before a previous string match.
+ // Try another code point span from before the last string match.
+ int32_t oldPos=pos;
+ pos=spanSet.spanBackUTF8((const char *)s, oldPos, USET_SPAN_CONTAINED);
+ spanLength=oldPos-pos;
+ if( pos==0 || // Reached the start of the string, or
+ spanLength==0 // neither strings nor span progressed.
+ ) {
+ return pos;
+ }
+ continue; // spanLength>0: Match strings from before a span.
+ } else {
+ // Try to match only one code point from before a string match if some
+ // string matched beyond it, so that we try all possible positions
+ // and don't overshoot.
+ spanLength=spanOneBackUTF8(spanSet, s, pos);
+ if(spanLength>0) {
+ if(spanLength==pos) {
+ return 0; // Reached the start of the string.
+ }
+ // Match strings before this code point.
+ // There cannot be any decrements below it because UnicodeSet strings
+ // contain multiple code points.
+ pos-=spanLength;
+ offsets.shift(spanLength);
+ spanLength=0;
+ continue; // Match strings from before a single code point.
+ }
+ // Match strings from before the next string match.
+ }
+ }
+ pos-=offsets.popMinimum();
+ spanLength=0; // Match strings from before a string match.
+ }
+}
+
+/*
+ * Algorithm for spanNot()==span(USET_SPAN_NOT_CONTAINED)
+ *
+ * Theoretical algorithm:
+ * - Iterate through the string, and at each code point boundary:
+ * + If the code point there is in the set, then return with the current position.
+ * + If a set string matches at the current position, then return with the current position.
+ *
+ * Optimized implementation:
+ *
+ * (Same assumption as for span() above.)
+ *
+ * Create and cache a spanNotSet which contains all of the single code points
+ * of the original set but none of its strings.
+ * For each set string add its initial code point to the spanNotSet.
+ * (Also add its final code point for spanNotBack().)
+ *
+ * - Loop:
+ * + Do spanLength=spanNotSet.span(USET_SPAN_NOT_CONTAINED).
+ * + If the current code point is in the original set, then
+ * return the current position.
+ * + If any set string matches at the current position, then
+ * return the current position.
+ * + If there is no match at the current position, neither for the code point there
+ * nor for any set string, then skip this code point and continue the loop.
+ * This happens for set-string-initial code points that were added to spanNotSet
+ * when there is not actually a match for such a set string.
+ */
+
+int32_t UnicodeSetStringSpan::spanNot(const UChar *s, int32_t length) const {
+ int32_t pos=0, rest=length;
+ int32_t i, stringsLength=strings.size();
+ do {
+ // Span until we find a code point from the set,
+ // or a code point that starts or ends some string.
+ i=pSpanNotSet->span(s+pos, rest, USET_SPAN_NOT_CONTAINED);
+ if(i==rest) {
+ return length; // Reached the end of the string.
+ }
+ pos+=i;
+ rest-=i;
+
+ // Check whether the current code point is in the original set,
+ // without the string starts and ends.
+ int32_t cpLength=spanOne(spanSet, s+pos, rest);
+ if(cpLength>0) {
+ return pos; // There is a set element at pos.
+ }
+
+ // Try to match the strings at pos.
+ for(i=0; i<stringsLength; ++i) {
+ if(spanLengths[i]==ALL_CP_CONTAINED) {
+ continue; // Irrelevant string.
+ }
+ const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
+ const UChar *s16=string.getBuffer();
+ int32_t length16=string.length();
+ if(length16<=rest && matches16CPB(s, pos, length, s16, length16)) {
+ return pos; // There is a set element at pos.
+ }
+ }
+
+ // The span(while not contained) ended on a string start/end which is
+ // not in the original set. Skip this code point and continue.
+ // cpLength<0
+ pos-=cpLength;
+ rest+=cpLength;
+ } while(rest!=0);
+ return length; // Reached the end of the string.
+}
+
+int32_t UnicodeSetStringSpan::spanNotBack(const UChar *s, int32_t length) const {
+ int32_t pos=length;
+ int32_t i, stringsLength=strings.size();
+ do {
+ // Span until we find a code point from the set,
+ // or a code point that starts or ends some string.
+ pos=pSpanNotSet->spanBack(s, pos, USET_SPAN_NOT_CONTAINED);
+ if(pos==0) {
+ return 0; // Reached the start of the string.
+ }
+
+ // Check whether the current code point is in the original set,
+ // without the string starts and ends.
+ int32_t cpLength=spanOneBack(spanSet, s, pos);
+ if(cpLength>0) {
+ return pos; // There is a set element at pos.
+ }
+
+ // Try to match the strings at pos.
+ for(i=0; i<stringsLength; ++i) {
+ // Use spanLengths rather than a spanBackLengths pointer because
+ // it is easier and we only need to know whether the string is irrelevant
+ // which is the same in either array.
+ if(spanLengths[i]==ALL_CP_CONTAINED) {
+ continue; // Irrelevant string.
+ }
+ const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
+ const UChar *s16=string.getBuffer();
+ int32_t length16=string.length();
+ if(length16<=pos && matches16CPB(s, pos-length16, length, s16, length16)) {
+ return pos; // There is a set element at pos.
+ }
+ }
+
+ // The span(while not contained) ended on a string start/end which is
+ // not in the original set. Skip this code point and continue.
+ // cpLength<0
+ pos+=cpLength;
+ } while(pos!=0);
+ return 0; // Reached the start of the string.
+}
+
+int32_t UnicodeSetStringSpan::spanNotUTF8(const uint8_t *s, int32_t length) const {
+ int32_t pos=0, rest=length;
+ int32_t i, stringsLength=strings.size();
+ uint8_t *spanUTF8Lengths=spanLengths;
+ if(all) {
+ spanUTF8Lengths+=2*stringsLength;
+ }
+ do {
+ // Span until we find a code point from the set,
+ // or a code point that starts or ends some string.
+ i=pSpanNotSet->spanUTF8((const char *)s+pos, rest, USET_SPAN_NOT_CONTAINED);
+ if(i==rest) {
+ return length; // Reached the end of the string.
+ }
+ pos+=i;
+ rest-=i;
+
+ // Check whether the current code point is in the original set,
+ // without the string starts and ends.
+ int32_t cpLength=spanOneUTF8(spanSet, s+pos, rest);
+ if(cpLength>0) {
+ return pos; // There is a set element at pos.
+ }
+
+ // Try to match the strings at pos.
+ const uint8_t *s8=utf8;
+ int32_t length8;
+ for(i=0; i<stringsLength; ++i) {
+ length8=utf8Lengths[i];
+ // ALL_CP_CONTAINED: Irrelevant string.
+ if(length8!=0 && spanUTF8Lengths[i]!=ALL_CP_CONTAINED && length8<=rest && matches8(s+pos, s8, length8)) {
+ return pos; // There is a set element at pos.
+ }
+ s8+=length8;
+ }
+
+ // The span(while not contained) ended on a string start/end which is
+ // not in the original set. Skip this code point and continue.
+ // cpLength<0
+ pos-=cpLength;
+ rest+=cpLength;
+ } while(rest!=0);
+ return length; // Reached the end of the string.
+}
+
+int32_t UnicodeSetStringSpan::spanNotBackUTF8(const uint8_t *s, int32_t length) const {
+ int32_t pos=length;
+ int32_t i, stringsLength=strings.size();
+ uint8_t *spanBackUTF8Lengths=spanLengths;
+ if(all) {
+ spanBackUTF8Lengths+=3*stringsLength;
+ }
+ do {
+ // Span until we find a code point from the set,
+ // or a code point that starts or ends some string.
+ pos=pSpanNotSet->spanBackUTF8((const char *)s, pos, USET_SPAN_NOT_CONTAINED);
+ if(pos==0) {
+ return 0; // Reached the start of the string.
+ }
+
+ // Check whether the current code point is in the original set,
+ // without the string starts and ends.
+ int32_t cpLength=spanOneBackUTF8(spanSet, s, pos);
+ if(cpLength>0) {
+ return pos; // There is a set element at pos.
+ }
+
+ // Try to match the strings at pos.
+ const uint8_t *s8=utf8;
+ int32_t length8;
+ for(i=0; i<stringsLength; ++i) {
+ length8=utf8Lengths[i];
+ // ALL_CP_CONTAINED: Irrelevant string.
+ if(length8!=0 && spanBackUTF8Lengths[i]!=ALL_CP_CONTAINED && length8<=pos && matches8(s+pos-length8, s8, length8)) {
+ return pos; // There is a set element at pos.
+ }
+ s8+=length8;
+ }
+
+ // The span(while not contained) ended on a string start/end which is
+ // not in the original set. Skip this code point and continue.
+ // cpLength<0
+ pos+=cpLength;
+ } while(pos!=0);
+ return 0; // Reached the start of the string.
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/unisetspan.h b/thirdparty/icu4c/common/unisetspan.h
new file mode 100644
index 0000000000..9a1307a907
--- /dev/null
+++ b/thirdparty/icu4c/common/unisetspan.h
@@ -0,0 +1,157 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: unisetspan.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2007mar01
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UNISETSPAN_H__
+#define __UNISETSPAN_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uniset.h"
+
+U_NAMESPACE_BEGIN
+
+/*
+ * Implement span() etc. for a set with strings.
+ * Avoid recursion because of its exponential complexity.
+ * Instead, try multiple paths at once and track them with an IndexList.
+ */
+class UnicodeSetStringSpan : public UMemory {
+public:
+ /*
+ * Which span() variant will be used?
+ * The object is either built for one variant and used once,
+ * or built for all and may be used many times.
+ */
+ enum {
+ FWD = 0x20,
+ BACK = 0x10,
+ UTF16 = 8,
+ UTF8 = 4,
+ CONTAINED = 2,
+ NOT_CONTAINED = 1,
+
+ ALL = 0x3f,
+
+ FWD_UTF16_CONTAINED = FWD | UTF16 | CONTAINED,
+ FWD_UTF16_NOT_CONTAINED = FWD | UTF16 | NOT_CONTAINED,
+ FWD_UTF8_CONTAINED = FWD | UTF8 | CONTAINED,
+ FWD_UTF8_NOT_CONTAINED = FWD | UTF8 | NOT_CONTAINED,
+ BACK_UTF16_CONTAINED = BACK | UTF16 | CONTAINED,
+ BACK_UTF16_NOT_CONTAINED= BACK | UTF16 | NOT_CONTAINED,
+ BACK_UTF8_CONTAINED = BACK | UTF8 | CONTAINED,
+ BACK_UTF8_NOT_CONTAINED = BACK | UTF8 | NOT_CONTAINED
+ };
+
+ UnicodeSetStringSpan(const UnicodeSet &set, const UVector &setStrings, uint32_t which);
+
+ // Copy constructor. Assumes which==ALL for a frozen set.
+ UnicodeSetStringSpan(const UnicodeSetStringSpan &otherStringSpan, const UVector &newParentSetStrings);
+
+ ~UnicodeSetStringSpan();
+
+ /*
+ * Do the strings need to be checked in span() etc.?
+ * @return true if strings need to be checked (call span() here),
+ * false if not (use a BMPSet for best performance).
+ */
+ inline UBool needsStringSpanUTF16();
+ inline UBool needsStringSpanUTF8();
+
+ // For fast UnicodeSet::contains(c).
+ inline UBool contains(UChar32 c) const;
+
+ int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
+
+ int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
+
+ int32_t spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
+
+ int32_t spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
+
+private:
+ // Special spanLength byte values.
+ enum {
+ // The spanLength is >=0xfe.
+ LONG_SPAN=0xfe,
+ // All code points in the string are contained in the parent set.
+ ALL_CP_CONTAINED=0xff
+ };
+
+ // Add a starting or ending string character to the spanNotSet
+ // so that a character span ends before any string.
+ void addToSpanNotSet(UChar32 c);
+
+ int32_t spanNot(const UChar *s, int32_t length) const;
+ int32_t spanNotBack(const UChar *s, int32_t length) const;
+ int32_t spanNotUTF8(const uint8_t *s, int32_t length) const;
+ int32_t spanNotBackUTF8(const uint8_t *s, int32_t length) const;
+
+ // Set for span(). Same as parent but without strings.
+ UnicodeSet spanSet;
+
+ // Set for span(not contained).
+ // Same as spanSet, plus characters that start or end strings.
+ UnicodeSet *pSpanNotSet;
+
+ // The strings of the parent set.
+ const UVector &strings;
+
+ // Pointer to the UTF-8 string lengths.
+ // Also pointer to further allocated storage for meta data and
+ // UTF-8 string contents as necessary.
+ int32_t *utf8Lengths;
+
+ // Pointer to the part of the (utf8Lengths) memory block that stores
+ // the lengths of span(), spanBack() etc. for each string.
+ uint8_t *spanLengths;
+
+ // Pointer to the part of the (utf8Lengths) memory block that stores
+ // the UTF-8 versions of the parent set's strings.
+ uint8_t *utf8;
+
+ // Number of bytes for all UTF-8 versions of strings together.
+ int32_t utf8Length;
+
+ // Maximum lengths of relevant strings.
+ int32_t maxLength16;
+ int32_t maxLength8;
+
+ // Set up for all variants of span()?
+ UBool all;
+
+ // Memory for small numbers and lengths of strings.
+ // For example, for 8 strings:
+ // 8 UTF-8 lengths, 8*4 bytes span lengths, 8*2 3-byte UTF-8 characters
+ // = 112 bytes = int32_t[28].
+ int32_t staticLengths[32];
+};
+
+UBool UnicodeSetStringSpan::needsStringSpanUTF16() {
+ return (UBool)(maxLength16!=0);
+}
+
+UBool UnicodeSetStringSpan::needsStringSpanUTF8() {
+ return (UBool)(maxLength8!=0);
+}
+
+UBool UnicodeSetStringSpan::contains(UChar32 c) const {
+ return spanSet.contains(c);
+}
+
+U_NAMESPACE_END
+
+#endif
diff --git a/thirdparty/icu4c/common/unistr.cpp b/thirdparty/icu4c/common/unistr.cpp
new file mode 100644
index 0000000000..077b4d6ef2
--- /dev/null
+++ b/thirdparty/icu4c/common/unistr.cpp
@@ -0,0 +1,1982 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 1999-2016, International Business Machines Corporation and
+* others. All Rights Reserved.
+******************************************************************************
+*
+* File unistr.cpp
+*
+* Modification History:
+*
+* Date Name Description
+* 09/25/98 stephen Creation.
+* 04/20/99 stephen Overhauled per 4/16 code review.
+* 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX
+* 11/18/99 aliu Added handleReplaceBetween() to make inherit from
+* Replaceable.
+* 06/25/01 grhoten Removed the dependency on iostream
+******************************************************************************
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/appendable.h"
+#include "unicode/putil.h"
+#include "cstring.h"
+#include "cmemory.h"
+#include "unicode/ustring.h"
+#include "unicode/unistr.h"
+#include "unicode/utf.h"
+#include "unicode/utf16.h"
+#include "uelement.h"
+#include "ustr_imp.h"
+#include "umutex.h"
+#include "uassert.h"
+
+#if 0
+
+#include <iostream>
+using namespace std;
+
+//DEBUGGING
+void
+print(const UnicodeString& s,
+ const char *name)
+{
+ UChar c;
+ cout << name << ":|";
+ for(int i = 0; i < s.length(); ++i) {
+ c = s[i];
+ if(c>= 0x007E || c < 0x0020)
+ cout << "[0x" << hex << s[i] << "]";
+ else
+ cout << (char) s[i];
+ }
+ cout << '|' << endl;
+}
+
+void
+print(const UChar *s,
+ int32_t len,
+ const char *name)
+{
+ UChar c;
+ cout << name << ":|";
+ for(int i = 0; i < len; ++i) {
+ c = s[i];
+ if(c>= 0x007E || c < 0x0020)
+ cout << "[0x" << hex << s[i] << "]";
+ else
+ cout << (char) s[i];
+ }
+ cout << '|' << endl;
+}
+// END DEBUGGING
+#endif
+
+// Local function definitions for now
+
+// need to copy areas that may overlap
+static
+inline void
+us_arrayCopy(const UChar *src, int32_t srcStart,
+ UChar *dst, int32_t dstStart, int32_t count)
+{
+ if(count>0) {
+ uprv_memmove(dst+dstStart, src+srcStart, (size_t)count*sizeof(*src));
+ }
+}
+
+// u_unescapeAt() callback to get a UChar from a UnicodeString
+U_CDECL_BEGIN
+static UChar U_CALLCONV
+UnicodeString_charAt(int32_t offset, void *context) {
+ return ((icu::UnicodeString*) context)->charAt(offset);
+}
+U_CDECL_END
+
+U_NAMESPACE_BEGIN
+
+/* The Replaceable virtual destructor can't be defined in the header
+ due to how AIX works with multiple definitions of virtual functions.
+*/
+Replaceable::~Replaceable() {}
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
+
+UnicodeString U_EXPORT2
+operator+ (const UnicodeString &s1, const UnicodeString &s2) {
+ return
+ UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
+ append(s1).
+ append(s2);
+}
+
+//========================================
+// Reference Counting functions, put at top of file so that optimizing compilers
+// have a chance to automatically inline.
+//========================================
+
+void
+UnicodeString::addRef() {
+ umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
+}
+
+int32_t
+UnicodeString::removeRef() {
+ return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
+}
+
+int32_t
+UnicodeString::refCount() const {
+ return umtx_loadAcquire(*((u_atomic_int32_t *)fUnion.fFields.fArray - 1));
+}
+
+void
+UnicodeString::releaseArray() {
+ if((fUnion.fFields.fLengthAndFlags & kRefCounted) && removeRef() == 0) {
+ uprv_free((int32_t *)fUnion.fFields.fArray - 1);
+ }
+}
+
+
+
+//========================================
+// Constructors
+//========================================
+
+// The default constructor is inline in unistr.h.
+
+UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) {
+ fUnion.fFields.fLengthAndFlags = 0;
+ if(count <= 0 || (uint32_t)c > 0x10ffff) {
+ // just allocate and do not do anything else
+ allocate(capacity);
+ } else if(c <= 0xffff) {
+ int32_t length = count;
+ if(capacity < length) {
+ capacity = length;
+ }
+ if(allocate(capacity)) {
+ UChar *array = getArrayStart();
+ UChar unit = (UChar)c;
+ for(int32_t i = 0; i < length; ++i) {
+ array[i] = unit;
+ }
+ setLength(length);
+ }
+ } else { // supplementary code point, write surrogate pairs
+ if(count > (INT32_MAX / 2)) {
+ // We would get more than 2G UChars.
+ allocate(capacity);
+ return;
+ }
+ int32_t length = count * 2;
+ if(capacity < length) {
+ capacity = length;
+ }
+ if(allocate(capacity)) {
+ UChar *array = getArrayStart();
+ UChar lead = U16_LEAD(c);
+ UChar trail = U16_TRAIL(c);
+ for(int32_t i = 0; i < length; i += 2) {
+ array[i] = lead;
+ array[i + 1] = trail;
+ }
+ setLength(length);
+ }
+ }
+}
+
+UnicodeString::UnicodeString(UChar ch) {
+ fUnion.fFields.fLengthAndFlags = kLength1 | kShortString;
+ fUnion.fStackFields.fBuffer[0] = ch;
+}
+
+UnicodeString::UnicodeString(UChar32 ch) {
+ fUnion.fFields.fLengthAndFlags = kShortString;
+ int32_t i = 0;
+ UBool isError = FALSE;
+ U16_APPEND(fUnion.fStackFields.fBuffer, i, US_STACKBUF_SIZE, ch, isError);
+ // We test isError so that the compiler does not complain that we don't.
+ // If isError then i==0 which is what we want anyway.
+ if(!isError) {
+ setShortLength(i);
+ }
+}
+
+UnicodeString::UnicodeString(const UChar *text) {
+ fUnion.fFields.fLengthAndFlags = kShortString;
+ doAppend(text, 0, -1);
+}
+
+UnicodeString::UnicodeString(const UChar *text,
+ int32_t textLength) {
+ fUnion.fFields.fLengthAndFlags = kShortString;
+ doAppend(text, 0, textLength);
+}
+
+UnicodeString::UnicodeString(UBool isTerminated,
+ ConstChar16Ptr textPtr,
+ int32_t textLength) {
+ fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
+ const UChar *text = textPtr;
+ if(text == NULL) {
+ // treat as an empty string, do not alias
+ setToEmpty();
+ } else if(textLength < -1 ||
+ (textLength == -1 && !isTerminated) ||
+ (textLength >= 0 && isTerminated && text[textLength] != 0)
+ ) {
+ setToBogus();
+ } else {
+ if(textLength == -1) {
+ // text is terminated, or else it would have failed the above test
+ textLength = u_strlen(text);
+ }
+ setArray(const_cast<UChar *>(text), textLength,
+ isTerminated ? textLength + 1 : textLength);
+ }
+}
+
+UnicodeString::UnicodeString(UChar *buff,
+ int32_t buffLength,
+ int32_t buffCapacity) {
+ fUnion.fFields.fLengthAndFlags = kWritableAlias;
+ if(buff == NULL) {
+ // treat as an empty string, do not alias
+ setToEmpty();
+ } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
+ setToBogus();
+ } else {
+ if(buffLength == -1) {
+ // fLength = u_strlen(buff); but do not look beyond buffCapacity
+ const UChar *p = buff, *limit = buff + buffCapacity;
+ while(p != limit && *p != 0) {
+ ++p;
+ }
+ buffLength = (int32_t)(p - buff);
+ }
+ setArray(buff, buffLength, buffCapacity);
+ }
+}
+
+UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) {
+ fUnion.fFields.fLengthAndFlags = kShortString;
+ if(src==NULL) {
+ // treat as an empty string
+ } else {
+ if(length<0) {
+ length=(int32_t)uprv_strlen(src);
+ }
+ if(cloneArrayIfNeeded(length, length, FALSE)) {
+ u_charsToUChars(src, getArrayStart(), length);
+ setLength(length);
+ } else {
+ setToBogus();
+ }
+ }
+}
+
+#if U_CHARSET_IS_UTF8
+
+UnicodeString::UnicodeString(const char *codepageData) {
+ fUnion.fFields.fLengthAndFlags = kShortString;
+ if(codepageData != 0) {
+ setToUTF8(codepageData);
+ }
+}
+
+UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) {
+ fUnion.fFields.fLengthAndFlags = kShortString;
+ // if there's nothing to convert, do nothing
+ if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
+ return;
+ }
+ if(dataLength == -1) {
+ dataLength = (int32_t)uprv_strlen(codepageData);
+ }
+ setToUTF8(StringPiece(codepageData, dataLength));
+}
+
+// else see unistr_cnv.cpp
+#endif
+
+UnicodeString::UnicodeString(const UnicodeString& that) {
+ fUnion.fFields.fLengthAndFlags = kShortString;
+ copyFrom(that);
+}
+
+UnicodeString::UnicodeString(UnicodeString &&src) U_NOEXCEPT {
+ copyFieldsFrom(src, TRUE);
+}
+
+UnicodeString::UnicodeString(const UnicodeString& that,
+ int32_t srcStart) {
+ fUnion.fFields.fLengthAndFlags = kShortString;
+ setTo(that, srcStart);
+}
+
+UnicodeString::UnicodeString(const UnicodeString& that,
+ int32_t srcStart,
+ int32_t srcLength) {
+ fUnion.fFields.fLengthAndFlags = kShortString;
+ setTo(that, srcStart, srcLength);
+}
+
+// Replaceable base class clone() default implementation, does not clone
+Replaceable *
+Replaceable::clone() const {
+ return NULL;
+}
+
+// UnicodeString overrides clone() with a real implementation
+UnicodeString *
+UnicodeString::clone() const {
+ return new UnicodeString(*this);
+}
+
+//========================================
+// array allocation
+//========================================
+
+namespace {
+
+const int32_t kGrowSize = 128;
+
+// The number of bytes for one int32_t reference counter and capacity UChars
+// must fit into a 32-bit size_t (at least when on a 32-bit platform).
+// We also add one for the NUL terminator, to avoid reallocation in getTerminatedBuffer(),
+// and round up to a multiple of 16 bytes.
+// This means that capacity must be at most (0xfffffff0 - 4) / 2 - 1 = 0x7ffffff5.
+// (With more complicated checks we could go up to 0x7ffffffd without rounding up,
+// but that does not seem worth it.)
+const int32_t kMaxCapacity = 0x7ffffff5;
+
+int32_t getGrowCapacity(int32_t newLength) {
+ int32_t growSize = (newLength >> 2) + kGrowSize;
+ if(growSize <= (kMaxCapacity - newLength)) {
+ return newLength + growSize;
+ } else {
+ return kMaxCapacity;
+ }
+}
+
+} // namespace
+
+UBool
+UnicodeString::allocate(int32_t capacity) {
+ if(capacity <= US_STACKBUF_SIZE) {
+ fUnion.fFields.fLengthAndFlags = kShortString;
+ return TRUE;
+ }
+ if(capacity <= kMaxCapacity) {
+ ++capacity; // for the NUL
+ // Switch to size_t which is unsigned so that we can allocate up to 4GB.
+ // Reference counter + UChars.
+ size_t numBytes = sizeof(int32_t) + (size_t)capacity * U_SIZEOF_UCHAR;
+ // Round up to a multiple of 16.
+ numBytes = (numBytes + 15) & ~15;
+ int32_t *array = (int32_t *) uprv_malloc(numBytes);
+ if(array != NULL) {
+ // set initial refCount and point behind the refCount
+ *array++ = 1;
+ numBytes -= sizeof(int32_t);
+
+ // have fArray point to the first UChar
+ fUnion.fFields.fArray = (UChar *)array;
+ fUnion.fFields.fCapacity = (int32_t)(numBytes / U_SIZEOF_UCHAR);
+ fUnion.fFields.fLengthAndFlags = kLongString;
+ return TRUE;
+ }
+ }
+ fUnion.fFields.fLengthAndFlags = kIsBogus;
+ fUnion.fFields.fArray = 0;
+ fUnion.fFields.fCapacity = 0;
+ return FALSE;
+}
+
+//========================================
+// Destructor
+//========================================
+
+#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
+static u_atomic_int32_t finalLengthCounts[0x400]; // UnicodeString::kMaxShortLength+1
+static u_atomic_int32_t beyondCount(0);
+
+U_CAPI void unistr_printLengths() {
+ int32_t i;
+ for(i = 0; i <= 59; ++i) {
+ printf("%2d, %9d\n", i, (int32_t)finalLengthCounts[i]);
+ }
+ int32_t beyond = beyondCount;
+ for(; i < UPRV_LENGTHOF(finalLengthCounts); ++i) {
+ beyond += finalLengthCounts[i];
+ }
+ printf(">59, %9d\n", beyond);
+}
+#endif
+
+UnicodeString::~UnicodeString()
+{
+#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
+ // Count lengths of strings at the end of their lifetime.
+ // Useful for discussion of a desirable stack buffer size.
+ // Count the contents length, not the optional NUL terminator nor further capacity.
+ // Ignore open-buffer strings and strings which alias external storage.
+ if((fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kReadonlyAlias|kWritableAlias)) == 0) {
+ if(hasShortLength()) {
+ umtx_atomic_inc(finalLengthCounts + getShortLength());
+ } else {
+ umtx_atomic_inc(&beyondCount);
+ }
+ }
+#endif
+
+ releaseArray();
+}
+
+//========================================
+// Factory methods
+//========================================
+
+UnicodeString UnicodeString::fromUTF8(StringPiece utf8) {
+ UnicodeString result;
+ result.setToUTF8(utf8);
+ return result;
+}
+
+UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
+ UnicodeString result;
+ int32_t capacity;
+ // Most UTF-32 strings will be BMP-only and result in a same-length
+ // UTF-16 string. We overestimate the capacity just slightly,
+ // just in case there are a few supplementary characters.
+ if(length <= US_STACKBUF_SIZE) {
+ capacity = US_STACKBUF_SIZE;
+ } else {
+ capacity = length + (length >> 4) + 4;
+ }
+ do {
+ UChar *utf16 = result.getBuffer(capacity);
+ int32_t length16;
+ UErrorCode errorCode = U_ZERO_ERROR;
+ u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
+ utf32, length,
+ 0xfffd, // Substitution character.
+ NULL, // Don't care about number of substitutions.
+ &errorCode);
+ result.releaseBuffer(length16);
+ if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
+ capacity = length16 + 1; // +1 for the terminating NUL.
+ continue;
+ } else if(U_FAILURE(errorCode)) {
+ result.setToBogus();
+ }
+ break;
+ } while(TRUE);
+ return result;
+}
+
+//========================================
+// Assignment
+//========================================
+
+UnicodeString &
+UnicodeString::operator=(const UnicodeString &src) {
+ return copyFrom(src);
+}
+
+UnicodeString &
+UnicodeString::fastCopyFrom(const UnicodeString &src) {
+ return copyFrom(src, TRUE);
+}
+
+UnicodeString &
+UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
+ // if assigning to ourselves, do nothing
+ if(this == &src) {
+ return *this;
+ }
+
+ // is the right side bogus?
+ if(src.isBogus()) {
+ setToBogus();
+ return *this;
+ }
+
+ // delete the current contents
+ releaseArray();
+
+ if(src.isEmpty()) {
+ // empty string - use the stack buffer
+ setToEmpty();
+ return *this;
+ }
+
+ // fLength>0 and not an "open" src.getBuffer(minCapacity)
+ fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
+ switch(src.fUnion.fFields.fLengthAndFlags & kAllStorageFlags) {
+ case kShortString:
+ // short string using the stack buffer, do the same
+ uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
+ getShortLength() * U_SIZEOF_UCHAR);
+ break;
+ case kLongString:
+ // src uses a refCounted string buffer, use that buffer with refCount
+ // src is const, use a cast - we don't actually change it
+ ((UnicodeString &)src).addRef();
+ // copy all fields, share the reference-counted buffer
+ fUnion.fFields.fArray = src.fUnion.fFields.fArray;
+ fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
+ if(!hasShortLength()) {
+ fUnion.fFields.fLength = src.fUnion.fFields.fLength;
+ }
+ break;
+ case kReadonlyAlias:
+ if(fastCopy) {
+ // src is a readonly alias, do the same
+ // -> maintain the readonly alias as such
+ fUnion.fFields.fArray = src.fUnion.fFields.fArray;
+ fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
+ if(!hasShortLength()) {
+ fUnion.fFields.fLength = src.fUnion.fFields.fLength;
+ }
+ break;
+ }
+ // else if(!fastCopy) fall through to case kWritableAlias
+ // -> allocate a new buffer and copy the contents
+ U_FALLTHROUGH;
+ case kWritableAlias: {
+ // src is a writable alias; we make a copy of that instead
+ int32_t srcLength = src.length();
+ if(allocate(srcLength)) {
+ u_memcpy(getArrayStart(), src.getArrayStart(), srcLength);
+ setLength(srcLength);
+ break;
+ }
+ // if there is not enough memory, then fall through to setting to bogus
+ U_FALLTHROUGH;
+ }
+ default:
+ // if src is bogus, set ourselves to bogus
+ // do not call setToBogus() here because fArray and flags are not consistent here
+ fUnion.fFields.fLengthAndFlags = kIsBogus;
+ fUnion.fFields.fArray = 0;
+ fUnion.fFields.fCapacity = 0;
+ break;
+ }
+
+ return *this;
+}
+
+UnicodeString &UnicodeString::operator=(UnicodeString &&src) U_NOEXCEPT {
+ // No explicit check for self move assignment, consistent with standard library.
+ // Self move assignment causes no crash nor leak but might make the object bogus.
+ releaseArray();
+ copyFieldsFrom(src, TRUE);
+ return *this;
+}
+
+// Same as move assignment except without memory management.
+void UnicodeString::copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT {
+ int16_t lengthAndFlags = fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
+ if(lengthAndFlags & kUsingStackBuffer) {
+ // Short string using the stack buffer, copy the contents.
+ // Check for self assignment to prevent "overlap in memcpy" warnings,
+ // although it should be harmless to copy a buffer to itself exactly.
+ if(this != &src) {
+ uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
+ getShortLength() * U_SIZEOF_UCHAR);
+ }
+ } else {
+ // In all other cases, copy all fields.
+ fUnion.fFields.fArray = src.fUnion.fFields.fArray;
+ fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
+ if(!hasShortLength()) {
+ fUnion.fFields.fLength = src.fUnion.fFields.fLength;
+ }
+ if(setSrcToBogus) {
+ // Set src to bogus without releasing any memory.
+ src.fUnion.fFields.fLengthAndFlags = kIsBogus;
+ src.fUnion.fFields.fArray = NULL;
+ src.fUnion.fFields.fCapacity = 0;
+ }
+ }
+}
+
+void UnicodeString::swap(UnicodeString &other) U_NOEXCEPT {
+ UnicodeString temp; // Empty short string: Known not to need releaseArray().
+ // Copy fields without resetting source values in between.
+ temp.copyFieldsFrom(*this, FALSE);
+ this->copyFieldsFrom(other, FALSE);
+ other.copyFieldsFrom(temp, FALSE);
+ // Set temp to an empty string so that other's memory is not released twice.
+ temp.fUnion.fFields.fLengthAndFlags = kShortString;
+}
+
+//========================================
+// Miscellaneous operations
+//========================================
+
+UnicodeString UnicodeString::unescape() const {
+ UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
+ if (result.isBogus()) {
+ return result;
+ }
+ const UChar *array = getBuffer();
+ int32_t len = length();
+ int32_t prev = 0;
+ for (int32_t i=0;;) {
+ if (i == len) {
+ result.append(array, prev, len - prev);
+ break;
+ }
+ if (array[i++] == 0x5C /*'\\'*/) {
+ result.append(array, prev, (i - 1) - prev);
+ UChar32 c = unescapeAt(i); // advances i
+ if (c < 0) {
+ result.remove(); // return empty string
+ break; // invalid escape sequence
+ }
+ result.append(c);
+ prev = i;
+ }
+ }
+ return result;
+}
+
+UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
+ return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
+}
+
+//========================================
+// Read-only implementation
+//========================================
+UBool
+UnicodeString::doEquals(const UnicodeString &text, int32_t len) const {
+ // Requires: this & text not bogus and have same lengths.
+ // Byte-wise comparison works for equality regardless of endianness.
+ return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0;
+}
+
+int8_t
+UnicodeString::doCompare( int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const
+{
+ // compare illegal string values
+ if(isBogus()) {
+ return -1;
+ }
+
+ // pin indices to legal values
+ pinIndices(start, length);
+
+ if(srcChars == NULL) {
+ // treat const UChar *srcChars==NULL as an empty string
+ return length == 0 ? 0 : 1;
+ }
+
+ // get the correct pointer
+ const UChar *chars = getArrayStart();
+
+ chars += start;
+ srcChars += srcStart;
+
+ int32_t minLength;
+ int8_t lengthResult;
+
+ // get the srcLength if necessary
+ if(srcLength < 0) {
+ srcLength = u_strlen(srcChars + srcStart);
+ }
+
+ // are we comparing different lengths?
+ if(length != srcLength) {
+ if(length < srcLength) {
+ minLength = length;
+ lengthResult = -1;
+ } else {
+ minLength = srcLength;
+ lengthResult = 1;
+ }
+ } else {
+ minLength = length;
+ lengthResult = 0;
+ }
+
+ /*
+ * note that uprv_memcmp() returns an int but we return an int8_t;
+ * we need to take care not to truncate the result -
+ * one way to do this is to right-shift the value to
+ * move the sign bit into the lower 8 bits and making sure that this
+ * does not become 0 itself
+ */
+
+ if(minLength > 0 && chars != srcChars) {
+ int32_t result;
+
+# if U_IS_BIG_ENDIAN
+ // big-endian: byte comparison works
+ result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
+ if(result != 0) {
+ return (int8_t)(result >> 15 | 1);
+ }
+# else
+ // little-endian: compare UChar units
+ do {
+ result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
+ if(result != 0) {
+ return (int8_t)(result >> 15 | 1);
+ }
+ } while(--minLength > 0);
+# endif
+ }
+ return lengthResult;
+}
+
+/* String compare in code point order - doCompare() compares in code unit order. */
+int8_t
+UnicodeString::doCompareCodePointOrder(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const
+{
+ // compare illegal string values
+ // treat const UChar *srcChars==NULL as an empty string
+ if(isBogus()) {
+ return -1;
+ }
+
+ // pin indices to legal values
+ pinIndices(start, length);
+
+ if(srcChars == NULL) {
+ srcStart = srcLength = 0;
+ }
+
+ int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=NULL)?(srcChars + srcStart):NULL, srcLength, FALSE, TRUE);
+ /* translate the 32-bit result into an 8-bit one */
+ if(diff!=0) {
+ return (int8_t)(diff >> 15 | 1);
+ } else {
+ return 0;
+ }
+}
+
+int32_t
+UnicodeString::getLength() const {
+ return length();
+}
+
+UChar
+UnicodeString::getCharAt(int32_t offset) const {
+ return charAt(offset);
+}
+
+UChar32
+UnicodeString::getChar32At(int32_t offset) const {
+ return char32At(offset);
+}
+
+UChar32
+UnicodeString::char32At(int32_t offset) const
+{
+ int32_t len = length();
+ if((uint32_t)offset < (uint32_t)len) {
+ const UChar *array = getArrayStart();
+ UChar32 c;
+ U16_GET(array, 0, offset, len, c);
+ return c;
+ } else {
+ return kInvalidUChar;
+ }
+}
+
+int32_t
+UnicodeString::getChar32Start(int32_t offset) const {
+ if((uint32_t)offset < (uint32_t)length()) {
+ const UChar *array = getArrayStart();
+ U16_SET_CP_START(array, 0, offset);
+ return offset;
+ } else {
+ return 0;
+ }
+}
+
+int32_t
+UnicodeString::getChar32Limit(int32_t offset) const {
+ int32_t len = length();
+ if((uint32_t)offset < (uint32_t)len) {
+ const UChar *array = getArrayStart();
+ U16_SET_CP_LIMIT(array, 0, offset, len);
+ return offset;
+ } else {
+ return len;
+ }
+}
+
+int32_t
+UnicodeString::countChar32(int32_t start, int32_t length) const {
+ pinIndices(start, length);
+ // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
+ return u_countChar32(getArrayStart()+start, length);
+}
+
+UBool
+UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
+ pinIndices(start, length);
+ // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
+ return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
+}
+
+int32_t
+UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
+ // pin index
+ int32_t len = length();
+ if(index<0) {
+ index=0;
+ } else if(index>len) {
+ index=len;
+ }
+
+ const UChar *array = getArrayStart();
+ if(delta>0) {
+ U16_FWD_N(array, index, len, delta);
+ } else {
+ U16_BACK_N(array, 0, index, -delta);
+ }
+
+ return index;
+}
+
+void
+UnicodeString::doExtract(int32_t start,
+ int32_t length,
+ UChar *dst,
+ int32_t dstStart) const
+{
+ // pin indices to legal values
+ pinIndices(start, length);
+
+ // do not copy anything if we alias dst itself
+ const UChar *array = getArrayStart();
+ if(array + start != dst + dstStart) {
+ us_arrayCopy(array, start, dst, dstStart, length);
+ }
+}
+
+int32_t
+UnicodeString::extract(Char16Ptr dest, int32_t destCapacity,
+ UErrorCode &errorCode) const {
+ int32_t len = length();
+ if(U_SUCCESS(errorCode)) {
+ if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ } else {
+ const UChar *array = getArrayStart();
+ if(len>0 && len<=destCapacity && array!=dest) {
+ u_memcpy(dest, array, len);
+ }
+ return u_terminateUChars(dest, destCapacity, len, &errorCode);
+ }
+ }
+
+ return len;
+}
+
+int32_t
+UnicodeString::extract(int32_t start,
+ int32_t length,
+ char *target,
+ int32_t targetCapacity,
+ enum EInvariant) const
+{
+ // if the arguments are illegal, then do nothing
+ if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
+ return 0;
+ }
+
+ // pin the indices to legal values
+ pinIndices(start, length);
+
+ if(length <= targetCapacity) {
+ u_UCharsToChars(getArrayStart() + start, target, length);
+ }
+ UErrorCode status = U_ZERO_ERROR;
+ return u_terminateChars(target, targetCapacity, length, &status);
+}
+
+UnicodeString
+UnicodeString::tempSubString(int32_t start, int32_t len) const {
+ pinIndices(start, len);
+ const UChar *array = getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer
+ if(array==NULL) {
+ array=fUnion.fStackFields.fBuffer; // anything not NULL because that would make an empty string
+ len=-2; // bogus result string
+ }
+ return UnicodeString(FALSE, array + start, len);
+}
+
+int32_t
+UnicodeString::toUTF8(int32_t start, int32_t len,
+ char *target, int32_t capacity) const {
+ pinIndices(start, len);
+ int32_t length8;
+ UErrorCode errorCode = U_ZERO_ERROR;
+ u_strToUTF8WithSub(target, capacity, &length8,
+ getBuffer() + start, len,
+ 0xFFFD, // Standard substitution character.
+ NULL, // Don't care about number of substitutions.
+ &errorCode);
+ return length8;
+}
+
+#if U_CHARSET_IS_UTF8
+
+int32_t
+UnicodeString::extract(int32_t start, int32_t len,
+ char *target, uint32_t dstSize) const {
+ // if the arguments are illegal, then do nothing
+ if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
+ return 0;
+ }
+ return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
+}
+
+// else see unistr_cnv.cpp
+#endif
+
+void
+UnicodeString::extractBetween(int32_t start,
+ int32_t limit,
+ UnicodeString& target) const {
+ pinIndex(start);
+ pinIndex(limit);
+ doExtract(start, limit - start, target);
+}
+
+// When converting from UTF-16 to UTF-8, the result will have at most 3 times
+// as many bytes as the source has UChars.
+// The "worst cases" are writing systems like Indic, Thai and CJK with
+// 3:1 bytes:UChars.
+void
+UnicodeString::toUTF8(ByteSink &sink) const {
+ int32_t length16 = length();
+ if(length16 != 0) {
+ char stackBuffer[1024];
+ int32_t capacity = (int32_t)sizeof(stackBuffer);
+ UBool utf8IsOwned = FALSE;
+ char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
+ 3*length16,
+ stackBuffer, capacity,
+ &capacity);
+ int32_t length8 = 0;
+ UErrorCode errorCode = U_ZERO_ERROR;
+ u_strToUTF8WithSub(utf8, capacity, &length8,
+ getBuffer(), length16,
+ 0xFFFD, // Standard substitution character.
+ NULL, // Don't care about number of substitutions.
+ &errorCode);
+ if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
+ utf8 = (char *)uprv_malloc(length8);
+ if(utf8 != NULL) {
+ utf8IsOwned = TRUE;
+ errorCode = U_ZERO_ERROR;
+ u_strToUTF8WithSub(utf8, length8, &length8,
+ getBuffer(), length16,
+ 0xFFFD, // Standard substitution character.
+ NULL, // Don't care about number of substitutions.
+ &errorCode);
+ } else {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ }
+ }
+ if(U_SUCCESS(errorCode)) {
+ sink.Append(utf8, length8);
+ sink.Flush();
+ }
+ if(utf8IsOwned) {
+ uprv_free(utf8);
+ }
+ }
+}
+
+int32_t
+UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
+ int32_t length32=0;
+ if(U_SUCCESS(errorCode)) {
+ // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
+ u_strToUTF32WithSub(utf32, capacity, &length32,
+ getBuffer(), length(),
+ 0xfffd, // Substitution character.
+ NULL, // Don't care about number of substitutions.
+ &errorCode);
+ }
+ return length32;
+}
+
+int32_t
+UnicodeString::indexOf(const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const
+{
+ if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
+ return -1;
+ }
+
+ // UnicodeString does not find empty substrings
+ if(srcLength < 0 && srcChars[srcStart] == 0) {
+ return -1;
+ }
+
+ // get the indices within bounds
+ pinIndices(start, length);
+
+ // find the first occurrence of the substring
+ const UChar *array = getArrayStart();
+ const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
+ if(match == NULL) {
+ return -1;
+ } else {
+ return (int32_t)(match - array);
+ }
+}
+
+int32_t
+UnicodeString::doIndexOf(UChar c,
+ int32_t start,
+ int32_t length) const
+{
+ // pin indices
+ pinIndices(start, length);
+
+ // find the first occurrence of c
+ const UChar *array = getArrayStart();
+ const UChar *match = u_memchr(array + start, c, length);
+ if(match == NULL) {
+ return -1;
+ } else {
+ return (int32_t)(match - array);
+ }
+}
+
+int32_t
+UnicodeString::doIndexOf(UChar32 c,
+ int32_t start,
+ int32_t length) const {
+ // pin indices
+ pinIndices(start, length);
+
+ // find the first occurrence of c
+ const UChar *array = getArrayStart();
+ const UChar *match = u_memchr32(array + start, c, length);
+ if(match == NULL) {
+ return -1;
+ } else {
+ return (int32_t)(match - array);
+ }
+}
+
+int32_t
+UnicodeString::lastIndexOf(const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const
+{
+ if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
+ return -1;
+ }
+
+ // UnicodeString does not find empty substrings
+ if(srcLength < 0 && srcChars[srcStart] == 0) {
+ return -1;
+ }
+
+ // get the indices within bounds
+ pinIndices(start, length);
+
+ // find the last occurrence of the substring
+ const UChar *array = getArrayStart();
+ const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
+ if(match == NULL) {
+ return -1;
+ } else {
+ return (int32_t)(match - array);
+ }
+}
+
+int32_t
+UnicodeString::doLastIndexOf(UChar c,
+ int32_t start,
+ int32_t length) const
+{
+ if(isBogus()) {
+ return -1;
+ }
+
+ // pin indices
+ pinIndices(start, length);
+
+ // find the last occurrence of c
+ const UChar *array = getArrayStart();
+ const UChar *match = u_memrchr(array + start, c, length);
+ if(match == NULL) {
+ return -1;
+ } else {
+ return (int32_t)(match - array);
+ }
+}
+
+int32_t
+UnicodeString::doLastIndexOf(UChar32 c,
+ int32_t start,
+ int32_t length) const {
+ // pin indices
+ pinIndices(start, length);
+
+ // find the last occurrence of c
+ const UChar *array = getArrayStart();
+ const UChar *match = u_memrchr32(array + start, c, length);
+ if(match == NULL) {
+ return -1;
+ } else {
+ return (int32_t)(match - array);
+ }
+}
+
+//========================================
+// Write implementation
+//========================================
+
+UnicodeString&
+UnicodeString::findAndReplace(int32_t start,
+ int32_t length,
+ const UnicodeString& oldText,
+ int32_t oldStart,
+ int32_t oldLength,
+ const UnicodeString& newText,
+ int32_t newStart,
+ int32_t newLength)
+{
+ if(isBogus() || oldText.isBogus() || newText.isBogus()) {
+ return *this;
+ }
+
+ pinIndices(start, length);
+ oldText.pinIndices(oldStart, oldLength);
+ newText.pinIndices(newStart, newLength);
+
+ if(oldLength == 0) {
+ return *this;
+ }
+
+ while(length > 0 && length >= oldLength) {
+ int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
+ if(pos < 0) {
+ // no more oldText's here: done
+ break;
+ } else {
+ // we found oldText, replace it by newText and go beyond it
+ replace(pos, oldLength, newText, newStart, newLength);
+ length -= pos + oldLength - start;
+ start = pos + newLength;
+ }
+ }
+
+ return *this;
+}
+
+
+void
+UnicodeString::setToBogus()
+{
+ releaseArray();
+
+ fUnion.fFields.fLengthAndFlags = kIsBogus;
+ fUnion.fFields.fArray = 0;
+ fUnion.fFields.fCapacity = 0;
+}
+
+// turn a bogus string into an empty one
+void
+UnicodeString::unBogus() {
+ if(fUnion.fFields.fLengthAndFlags & kIsBogus) {
+ setToEmpty();
+ }
+}
+
+const char16_t *
+UnicodeString::getTerminatedBuffer() {
+ if(!isWritable()) {
+ return nullptr;
+ }
+ UChar *array = getArrayStart();
+ int32_t len = length();
+ if(len < getCapacity()) {
+ if(fUnion.fFields.fLengthAndFlags & kBufferIsReadonly) {
+ // If len<capacity on a read-only alias, then array[len] is
+ // either the original NUL (if constructed with (TRUE, s, length))
+ // or one of the original string contents characters (if later truncated),
+ // therefore we can assume that array[len] is initialized memory.
+ if(array[len] == 0) {
+ return array;
+ }
+ } else if(((fUnion.fFields.fLengthAndFlags & kRefCounted) == 0 || refCount() == 1)) {
+ // kRefCounted: Do not write the NUL if the buffer is shared.
+ // That is mostly safe, except when the length of one copy was modified
+ // without copy-on-write, e.g., via truncate(newLength) or remove(void).
+ // Then the NUL would be written into the middle of another copy's string.
+
+ // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL.
+ // Do not test if there is a NUL already because it might be uninitialized memory.
+ // (That would be safe, but tools like valgrind & Purify would complain.)
+ array[len] = 0;
+ return array;
+ }
+ }
+ if(len<INT32_MAX && cloneArrayIfNeeded(len+1)) {
+ array = getArrayStart();
+ array[len] = 0;
+ return array;
+ } else {
+ return nullptr;
+ }
+}
+
+// setTo() analogous to the readonly-aliasing constructor with the same signature
+UnicodeString &
+UnicodeString::setTo(UBool isTerminated,
+ ConstChar16Ptr textPtr,
+ int32_t textLength)
+{
+ if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
+ // do not modify a string that has an "open" getBuffer(minCapacity)
+ return *this;
+ }
+
+ const UChar *text = textPtr;
+ if(text == NULL) {
+ // treat as an empty string, do not alias
+ releaseArray();
+ setToEmpty();
+ return *this;
+ }
+
+ if( textLength < -1 ||
+ (textLength == -1 && !isTerminated) ||
+ (textLength >= 0 && isTerminated && text[textLength] != 0)
+ ) {
+ setToBogus();
+ return *this;
+ }
+
+ releaseArray();
+
+ if(textLength == -1) {
+ // text is terminated, or else it would have failed the above test
+ textLength = u_strlen(text);
+ }
+ fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
+ setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
+ return *this;
+}
+
+// setTo() analogous to the writable-aliasing constructor with the same signature
+UnicodeString &
+UnicodeString::setTo(UChar *buffer,
+ int32_t buffLength,
+ int32_t buffCapacity) {
+ if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
+ // do not modify a string that has an "open" getBuffer(minCapacity)
+ return *this;
+ }
+
+ if(buffer == NULL) {
+ // treat as an empty string, do not alias
+ releaseArray();
+ setToEmpty();
+ return *this;
+ }
+
+ if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
+ setToBogus();
+ return *this;
+ } else if(buffLength == -1) {
+ // buffLength = u_strlen(buff); but do not look beyond buffCapacity
+ const UChar *p = buffer, *limit = buffer + buffCapacity;
+ while(p != limit && *p != 0) {
+ ++p;
+ }
+ buffLength = (int32_t)(p - buffer);
+ }
+
+ releaseArray();
+
+ fUnion.fFields.fLengthAndFlags = kWritableAlias;
+ setArray(buffer, buffLength, buffCapacity);
+ return *this;
+}
+
+UnicodeString &UnicodeString::setToUTF8(StringPiece utf8) {
+ unBogus();
+ int32_t length = utf8.length();
+ int32_t capacity;
+ // The UTF-16 string will be at most as long as the UTF-8 string.
+ if(length <= US_STACKBUF_SIZE) {
+ capacity = US_STACKBUF_SIZE;
+ } else {
+ capacity = length + 1; // +1 for the terminating NUL.
+ }
+ UChar *utf16 = getBuffer(capacity);
+ int32_t length16;
+ UErrorCode errorCode = U_ZERO_ERROR;
+ u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
+ utf8.data(), length,
+ 0xfffd, // Substitution character.
+ NULL, // Don't care about number of substitutions.
+ &errorCode);
+ releaseBuffer(length16);
+ if(U_FAILURE(errorCode)) {
+ setToBogus();
+ }
+ return *this;
+}
+
+UnicodeString&
+UnicodeString::setCharAt(int32_t offset,
+ UChar c)
+{
+ int32_t len = length();
+ if(cloneArrayIfNeeded() && len > 0) {
+ if(offset < 0) {
+ offset = 0;
+ } else if(offset >= len) {
+ offset = len - 1;
+ }
+
+ getArrayStart()[offset] = c;
+ }
+ return *this;
+}
+
+UnicodeString&
+UnicodeString::replace(int32_t start,
+ int32_t _length,
+ UChar32 srcChar) {
+ UChar buffer[U16_MAX_LENGTH];
+ int32_t count = 0;
+ UBool isError = FALSE;
+ U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
+ // We test isError so that the compiler does not complain that we don't.
+ // If isError (srcChar is not a valid code point) then count==0 which means
+ // we remove the source segment rather than replacing it with srcChar.
+ return doReplace(start, _length, buffer, 0, isError ? 0 : count);
+}
+
+UnicodeString&
+UnicodeString::append(UChar32 srcChar) {
+ UChar buffer[U16_MAX_LENGTH];
+ int32_t _length = 0;
+ UBool isError = FALSE;
+ U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
+ // We test isError so that the compiler does not complain that we don't.
+ // If isError then _length==0 which turns the doAppend() into a no-op anyway.
+ return isError ? *this : doAppend(buffer, 0, _length);
+}
+
+UnicodeString&
+UnicodeString::doReplace( int32_t start,
+ int32_t length,
+ const UnicodeString& src,
+ int32_t srcStart,
+ int32_t srcLength)
+{
+ // pin the indices to legal values
+ src.pinIndices(srcStart, srcLength);
+
+ // get the characters from src
+ // and replace the range in ourselves with them
+ return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
+}
+
+UnicodeString&
+UnicodeString::doReplace(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength)
+{
+ if(!isWritable()) {
+ return *this;
+ }
+
+ int32_t oldLength = this->length();
+
+ // optimize (read-only alias).remove(0, start) and .remove(start, end)
+ if((fUnion.fFields.fLengthAndFlags&kBufferIsReadonly) && srcLength == 0) {
+ if(start == 0) {
+ // remove prefix by adjusting the array pointer
+ pinIndex(length);
+ fUnion.fFields.fArray += length;
+ fUnion.fFields.fCapacity -= length;
+ setLength(oldLength - length);
+ return *this;
+ } else {
+ pinIndex(start);
+ if(length >= (oldLength - start)) {
+ // remove suffix by reducing the length (like truncate())
+ setLength(start);
+ fUnion.fFields.fCapacity = start; // not NUL-terminated any more
+ return *this;
+ }
+ }
+ }
+
+ if(start == oldLength) {
+ return doAppend(srcChars, srcStart, srcLength);
+ }
+
+ if(srcChars == 0) {
+ srcLength = 0;
+ } else {
+ // Perform all remaining operations relative to srcChars + srcStart.
+ // From this point forward, do not use srcStart.
+ srcChars += srcStart;
+ if (srcLength < 0) {
+ // get the srcLength if necessary
+ srcLength = u_strlen(srcChars);
+ }
+ }
+
+ // pin the indices to legal values
+ pinIndices(start, length);
+
+ // Calculate the size of the string after the replace.
+ // Avoid int32_t overflow.
+ int32_t newLength = oldLength - length;
+ if(srcLength > (INT32_MAX - newLength)) {
+ setToBogus();
+ return *this;
+ }
+ newLength += srcLength;
+
+ // Check for insertion into ourself
+ const UChar *oldArray = getArrayStart();
+ if (isBufferWritable() &&
+ oldArray < srcChars + srcLength &&
+ srcChars < oldArray + oldLength) {
+ // Copy into a new UnicodeString and start over
+ UnicodeString copy(srcChars, srcLength);
+ if (copy.isBogus()) {
+ setToBogus();
+ return *this;
+ }
+ return doReplace(start, length, copy.getArrayStart(), 0, srcLength);
+ }
+
+ // cloneArrayIfNeeded(doCopyArray=FALSE) may change fArray but will not copy the current contents;
+ // therefore we need to keep the current fArray
+ UChar oldStackBuffer[US_STACKBUF_SIZE];
+ if((fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
+ // copy the stack buffer contents because it will be overwritten with
+ // fUnion.fFields values
+ u_memcpy(oldStackBuffer, oldArray, oldLength);
+ oldArray = oldStackBuffer;
+ }
+
+ // clone our array and allocate a bigger array if needed
+ int32_t *bufferToDelete = 0;
+ if(!cloneArrayIfNeeded(newLength, getGrowCapacity(newLength),
+ FALSE, &bufferToDelete)
+ ) {
+ return *this;
+ }
+
+ // now do the replace
+
+ UChar *newArray = getArrayStart();
+ if(newArray != oldArray) {
+ // if fArray changed, then we need to copy everything except what will change
+ us_arrayCopy(oldArray, 0, newArray, 0, start);
+ us_arrayCopy(oldArray, start + length,
+ newArray, start + srcLength,
+ oldLength - (start + length));
+ } else if(length != srcLength) {
+ // fArray did not change; copy only the portion that isn't changing, leaving a hole
+ us_arrayCopy(oldArray, start + length,
+ newArray, start + srcLength,
+ oldLength - (start + length));
+ }
+
+ // now fill in the hole with the new string
+ us_arrayCopy(srcChars, 0, newArray, start, srcLength);
+
+ setLength(newLength);
+
+ // delayed delete in case srcChars == fArray when we started, and
+ // to keep oldArray alive for the above operations
+ if (bufferToDelete) {
+ uprv_free(bufferToDelete);
+ }
+
+ return *this;
+}
+
+// Versions of doReplace() only for append() variants.
+// doReplace() and doAppend() optimize for different cases.
+
+UnicodeString&
+UnicodeString::doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength) {
+ if(srcLength == 0) {
+ return *this;
+ }
+
+ // pin the indices to legal values
+ src.pinIndices(srcStart, srcLength);
+ return doAppend(src.getArrayStart(), srcStart, srcLength);
+}
+
+UnicodeString&
+UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLength) {
+ if(!isWritable() || srcLength == 0 || srcChars == NULL) {
+ return *this;
+ }
+
+ // Perform all remaining operations relative to srcChars + srcStart.
+ // From this point forward, do not use srcStart.
+ srcChars += srcStart;
+
+ if(srcLength < 0) {
+ // get the srcLength if necessary
+ if((srcLength = u_strlen(srcChars)) == 0) {
+ return *this;
+ }
+ }
+
+ int32_t oldLength = length();
+ int32_t newLength;
+ if (uprv_add32_overflow(oldLength, srcLength, &newLength)) {
+ setToBogus();
+ return *this;
+ }
+
+ // Check for append onto ourself
+ const UChar* oldArray = getArrayStart();
+ if (isBufferWritable() &&
+ oldArray < srcChars + srcLength &&
+ srcChars < oldArray + oldLength) {
+ // Copy into a new UnicodeString and start over
+ UnicodeString copy(srcChars, srcLength);
+ if (copy.isBogus()) {
+ setToBogus();
+ return *this;
+ }
+ return doAppend(copy.getArrayStart(), 0, srcLength);
+ }
+
+ // optimize append() onto a large-enough, owned string
+ if((newLength <= getCapacity() && isBufferWritable()) ||
+ cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) {
+ UChar *newArray = getArrayStart();
+ // Do not copy characters when
+ // UChar *buffer=str.getAppendBuffer(...);
+ // is followed by
+ // str.append(buffer, length);
+ // or
+ // str.appendString(buffer, length)
+ // or similar.
+ if(srcChars != newArray + oldLength) {
+ us_arrayCopy(srcChars, 0, newArray, oldLength, srcLength);
+ }
+ setLength(newLength);
+ }
+ return *this;
+}
+
+/**
+ * Replaceable API
+ */
+void
+UnicodeString::handleReplaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& text) {
+ replaceBetween(start, limit, text);
+}
+
+/**
+ * Replaceable API
+ */
+void
+UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
+ if (limit <= start) {
+ return; // Nothing to do; avoid bogus malloc call
+ }
+ UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
+ // Check to make sure text is not null.
+ if (text != NULL) {
+ extractBetween(start, limit, text, 0);
+ insert(dest, text, 0, limit - start);
+ uprv_free(text);
+ }
+}
+
+/**
+ * Replaceable API
+ *
+ * NOTE: This is for the Replaceable class. There is no rep.cpp,
+ * so we implement this function here.
+ */
+UBool Replaceable::hasMetaData() const {
+ return TRUE;
+}
+
+/**
+ * Replaceable API
+ */
+UBool UnicodeString::hasMetaData() const {
+ return FALSE;
+}
+
+UnicodeString&
+UnicodeString::doReverse(int32_t start, int32_t length) {
+ if(length <= 1 || !cloneArrayIfNeeded()) {
+ return *this;
+ }
+
+ // pin the indices to legal values
+ pinIndices(start, length);
+ if(length <= 1) { // pinIndices() might have shrunk the length
+ return *this;
+ }
+
+ UChar *left = getArrayStart() + start;
+ UChar *right = left + length - 1; // -1 for inclusive boundary (length>=2)
+ UChar swap;
+ UBool hasSupplementary = FALSE;
+
+ // Before the loop we know left<right because length>=2.
+ do {
+ hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
+ hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
+ *right-- = swap;
+ } while(left < right);
+ // Make sure to test the middle code unit of an odd-length string.
+ // Redundant if the length is even.
+ hasSupplementary |= (UBool)U16_IS_LEAD(*left);
+
+ /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
+ if(hasSupplementary) {
+ UChar swap2;
+
+ left = getArrayStart() + start;
+ right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
+ while(left < right) {
+ if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
+ *left++ = swap2;
+ *left++ = swap;
+ } else {
+ ++left;
+ }
+ }
+ }
+
+ return *this;
+}
+
+UBool
+UnicodeString::padLeading(int32_t targetLength,
+ UChar padChar)
+{
+ int32_t oldLength = length();
+ if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
+ return FALSE;
+ } else {
+ // move contents up by padding width
+ UChar *array = getArrayStart();
+ int32_t start = targetLength - oldLength;
+ us_arrayCopy(array, 0, array, start, oldLength);
+
+ // fill in padding character
+ while(--start >= 0) {
+ array[start] = padChar;
+ }
+ setLength(targetLength);
+ return TRUE;
+ }
+}
+
+UBool
+UnicodeString::padTrailing(int32_t targetLength,
+ UChar padChar)
+{
+ int32_t oldLength = length();
+ if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
+ return FALSE;
+ } else {
+ // fill in padding character
+ UChar *array = getArrayStart();
+ int32_t length = targetLength;
+ while(--length >= oldLength) {
+ array[length] = padChar;
+ }
+ setLength(targetLength);
+ return TRUE;
+ }
+}
+
+//========================================
+// Hashing
+//========================================
+int32_t
+UnicodeString::doHashCode() const
+{
+ /* Delegate hash computation to uhash. This makes UnicodeString
+ * hashing consistent with UChar* hashing. */
+ int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
+ if (hashCode == kInvalidHashCode) {
+ hashCode = kEmptyHashCode;
+ }
+ return hashCode;
+}
+
+//========================================
+// External Buffer
+//========================================
+
+char16_t *
+UnicodeString::getBuffer(int32_t minCapacity) {
+ if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
+ fUnion.fFields.fLengthAndFlags|=kOpenGetBuffer;
+ setZeroLength();
+ return getArrayStart();
+ } else {
+ return nullptr;
+ }
+}
+
+void
+UnicodeString::releaseBuffer(int32_t newLength) {
+ if(fUnion.fFields.fLengthAndFlags&kOpenGetBuffer && newLength>=-1) {
+ // set the new fLength
+ int32_t capacity=getCapacity();
+ if(newLength==-1) {
+ // the new length is the string length, capped by fCapacity
+ const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
+ while(p<limit && *p!=0) {
+ ++p;
+ }
+ newLength=(int32_t)(p-array);
+ } else if(newLength>capacity) {
+ newLength=capacity;
+ }
+ setLength(newLength);
+ fUnion.fFields.fLengthAndFlags&=~kOpenGetBuffer;
+ }
+}
+
+//========================================
+// Miscellaneous
+//========================================
+UBool
+UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
+ int32_t growCapacity,
+ UBool doCopyArray,
+ int32_t **pBufferToDelete,
+ UBool forceClone) {
+ // default parameters need to be static, therefore
+ // the defaults are -1 to have convenience defaults
+ if(newCapacity == -1) {
+ newCapacity = getCapacity();
+ }
+
+ // while a getBuffer(minCapacity) is "open",
+ // prevent any modifications of the string by returning FALSE here
+ // if the string is bogus, then only an assignment or similar can revive it
+ if(!isWritable()) {
+ return FALSE;
+ }
+
+ /*
+ * We need to make a copy of the array if
+ * the buffer is read-only, or
+ * the buffer is refCounted (shared), and refCount>1, or
+ * the buffer is too small.
+ * Return FALSE if memory could not be allocated.
+ */
+ if(forceClone ||
+ fUnion.fFields.fLengthAndFlags & kBufferIsReadonly ||
+ (fUnion.fFields.fLengthAndFlags & kRefCounted && refCount() > 1) ||
+ newCapacity > getCapacity()
+ ) {
+ // check growCapacity for default value and use of the stack buffer
+ if(growCapacity < 0) {
+ growCapacity = newCapacity;
+ } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
+ growCapacity = US_STACKBUF_SIZE;
+ }
+
+ // save old values
+ UChar oldStackBuffer[US_STACKBUF_SIZE];
+ UChar *oldArray;
+ int32_t oldLength = length();
+ int16_t flags = fUnion.fFields.fLengthAndFlags;
+
+ if(flags&kUsingStackBuffer) {
+ U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
+ if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
+ // copy the stack buffer contents because it will be overwritten with
+ // fUnion.fFields values
+ us_arrayCopy(fUnion.fStackFields.fBuffer, 0, oldStackBuffer, 0, oldLength);
+ oldArray = oldStackBuffer;
+ } else {
+ oldArray = NULL; // no need to copy from the stack buffer to itself
+ }
+ } else {
+ oldArray = fUnion.fFields.fArray;
+ U_ASSERT(oldArray!=NULL); /* when stack buffer is not used, oldArray must have a non-NULL reference */
+ }
+
+ // allocate a new array
+ if(allocate(growCapacity) ||
+ (newCapacity < growCapacity && allocate(newCapacity))
+ ) {
+ if(doCopyArray) {
+ // copy the contents
+ // do not copy more than what fits - it may be smaller than before
+ int32_t minLength = oldLength;
+ newCapacity = getCapacity();
+ if(newCapacity < minLength) {
+ minLength = newCapacity;
+ }
+ if(oldArray != NULL) {
+ us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
+ }
+ setLength(minLength);
+ } else {
+ setZeroLength();
+ }
+
+ // release the old array
+ if(flags & kRefCounted) {
+ // the array is refCounted; decrement and release if 0
+ u_atomic_int32_t *pRefCount = ((u_atomic_int32_t *)oldArray - 1);
+ if(umtx_atomic_dec(pRefCount) == 0) {
+ if(pBufferToDelete == 0) {
+ // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t
+ // is defined as volatile. (Volatile has useful non-standard behavior
+ // with this compiler.)
+ uprv_free((void *)pRefCount);
+ } else {
+ // the caller requested to delete it himself
+ *pBufferToDelete = (int32_t *)pRefCount;
+ }
+ }
+ }
+ } else {
+ // not enough memory for growCapacity and not even for the smaller newCapacity
+ // reset the old values for setToBogus() to release the array
+ if(!(flags&kUsingStackBuffer)) {
+ fUnion.fFields.fArray = oldArray;
+ }
+ fUnion.fFields.fLengthAndFlags = flags;
+ setToBogus();
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+// UnicodeStringAppendable ------------------------------------------------- ***
+
+UnicodeStringAppendable::~UnicodeStringAppendable() {}
+
+UBool
+UnicodeStringAppendable::appendCodeUnit(UChar c) {
+ return str.doAppend(&c, 0, 1).isWritable();
+}
+
+UBool
+UnicodeStringAppendable::appendCodePoint(UChar32 c) {
+ UChar buffer[U16_MAX_LENGTH];
+ int32_t cLength = 0;
+ UBool isError = FALSE;
+ U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
+ return !isError && str.doAppend(buffer, 0, cLength).isWritable();
+}
+
+UBool
+UnicodeStringAppendable::appendString(const UChar *s, int32_t length) {
+ return str.doAppend(s, 0, length).isWritable();
+}
+
+UBool
+UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
+ return str.cloneArrayIfNeeded(str.length() + appendCapacity);
+}
+
+UChar *
+UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
+ int32_t desiredCapacityHint,
+ UChar *scratch, int32_t scratchCapacity,
+ int32_t *resultCapacity) {
+ if(minCapacity < 1 || scratchCapacity < minCapacity) {
+ *resultCapacity = 0;
+ return NULL;
+ }
+ int32_t oldLength = str.length();
+ if(minCapacity <= (kMaxCapacity - oldLength) &&
+ desiredCapacityHint <= (kMaxCapacity - oldLength) &&
+ str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
+ *resultCapacity = str.getCapacity() - oldLength;
+ return str.getArrayStart() + oldLength;
+ }
+ *resultCapacity = scratchCapacity;
+ return scratch;
+}
+
+U_NAMESPACE_END
+
+U_NAMESPACE_USE
+
+U_CAPI int32_t U_EXPORT2
+uhash_hashUnicodeString(const UElement key) {
+ const UnicodeString *str = (const UnicodeString*) key.pointer;
+ return (str == NULL) ? 0 : str->hashCode();
+}
+
+// Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
+// does not depend on hashtable code.
+U_CAPI UBool U_EXPORT2
+uhash_compareUnicodeString(const UElement key1, const UElement key2) {
+ const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
+ const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
+ if (str1 == str2) {
+ return TRUE;
+ }
+ if (str1 == NULL || str2 == NULL) {
+ return FALSE;
+ }
+ return *str1 == *str2;
+}
+
+#ifdef U_STATIC_IMPLEMENTATION
+/*
+This should never be called. It is defined here to make sure that the
+virtual vector deleting destructor is defined within unistr.cpp.
+The vector deleting destructor is already a part of UObject,
+but defining it here makes sure that it is included with this object file.
+This makes sure that static library dependencies are kept to a minimum.
+*/
+static void uprv_UnicodeStringDummy(void) {
+ delete [] (new UnicodeString[2]);
+}
+#endif
diff --git a/thirdparty/icu4c/common/unistr_case.cpp b/thirdparty/icu4c/common/unistr_case.cpp
new file mode 100644
index 0000000000..2138d60c01
--- /dev/null
+++ b/thirdparty/icu4c/common/unistr_case.cpp
@@ -0,0 +1,250 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: unistr_case.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:2
+*
+* created on: 2004aug19
+* created by: Markus W. Scherer
+*
+* Case-mapping functions moved here from unistr.cpp
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/brkiter.h"
+#include "unicode/casemap.h"
+#include "unicode/edits.h"
+#include "unicode/putil.h"
+#include "cstring.h"
+#include "cmemory.h"
+#include "unicode/ustring.h"
+#include "unicode/unistr.h"
+#include "unicode/uchar.h"
+#include "uassert.h"
+#include "ucasemap_imp.h"
+#include "uelement.h"
+
+U_NAMESPACE_BEGIN
+
+//========================================
+// Read-only implementation
+//========================================
+
+int8_t
+UnicodeString::doCaseCompare(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const
+{
+ // compare illegal string values
+ // treat const UChar *srcChars==NULL as an empty string
+ if(isBogus()) {
+ return -1;
+ }
+
+ // pin indices to legal values
+ pinIndices(start, length);
+
+ if(srcChars == NULL) {
+ srcStart = srcLength = 0;
+ }
+
+ // get the correct pointer
+ const UChar *chars = getArrayStart();
+
+ chars += start;
+ if(srcStart!=0) {
+ srcChars += srcStart;
+ }
+
+ if(chars != srcChars) {
+ UErrorCode errorCode=U_ZERO_ERROR;
+ int32_t result=u_strcmpFold(chars, length, srcChars, srcLength,
+ options|U_COMPARE_IGNORE_CASE, &errorCode);
+ if(result!=0) {
+ return (int8_t)(result >> 24 | 1);
+ }
+ } else {
+ // get the srcLength if necessary
+ if(srcLength < 0) {
+ srcLength = u_strlen(srcChars + srcStart);
+ }
+ if(length != srcLength) {
+ return (int8_t)((length - srcLength) >> 24 | 1);
+ }
+ }
+ return 0;
+}
+
+//========================================
+// Write implementation
+//========================================
+
+UnicodeString &
+UnicodeString::caseMap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
+ UStringCaseMapper *stringCaseMapper) {
+ if(isEmpty() || !isWritable()) {
+ // nothing to do
+ return *this;
+ }
+
+ UChar oldBuffer[2 * US_STACKBUF_SIZE];
+ UChar *oldArray;
+ int32_t oldLength = length();
+ int32_t newLength;
+ UBool writable = isBufferWritable();
+ UErrorCode errorCode = U_ZERO_ERROR;
+
+#if !UCONFIG_NO_BREAK_ITERATION
+ // Read-only alias to the original string contents for the titlecasing BreakIterator.
+ // We cannot set the iterator simply to *this because *this is being modified.
+ UnicodeString oldString;
+#endif
+
+ // Try to avoid heap-allocating a new character array for this string.
+ if (writable ? oldLength <= UPRV_LENGTHOF(oldBuffer) : oldLength < US_STACKBUF_SIZE) {
+ // Short string: Copy the contents into a temporary buffer and
+ // case-map back into the current array, or into the stack buffer.
+ UChar *buffer = getArrayStart();
+ int32_t capacity;
+ oldArray = oldBuffer;
+ u_memcpy(oldBuffer, buffer, oldLength);
+ if (writable) {
+ capacity = getCapacity();
+ } else {
+ // Switch from the read-only alias or shared heap buffer to the stack buffer.
+ if (!cloneArrayIfNeeded(US_STACKBUF_SIZE, US_STACKBUF_SIZE, /* doCopyArray= */ FALSE)) {
+ return *this;
+ }
+ U_ASSERT(fUnion.fFields.fLengthAndFlags & kUsingStackBuffer);
+ buffer = fUnion.fStackFields.fBuffer;
+ capacity = US_STACKBUF_SIZE;
+ }
+#if !UCONFIG_NO_BREAK_ITERATION
+ if (iter != nullptr) {
+ oldString.setTo(FALSE, oldArray, oldLength);
+ iter->setText(oldString);
+ }
+#endif
+ newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
+ buffer, capacity,
+ oldArray, oldLength, NULL, errorCode);
+ if (U_SUCCESS(errorCode)) {
+ setLength(newLength);
+ return *this;
+ } else if (errorCode == U_BUFFER_OVERFLOW_ERROR) {
+ // common overflow handling below
+ } else {
+ setToBogus();
+ return *this;
+ }
+ } else {
+ // Longer string or read-only buffer:
+ // Collect only changes and then apply them to this string.
+ // Case mapping often changes only small parts of a string,
+ // and often does not change its length.
+ oldArray = getArrayStart();
+ Edits edits;
+ UChar replacementChars[200];
+#if !UCONFIG_NO_BREAK_ITERATION
+ if (iter != nullptr) {
+ oldString.setTo(FALSE, oldArray, oldLength);
+ iter->setText(oldString);
+ }
+#endif
+ stringCaseMapper(caseLocale, options | U_OMIT_UNCHANGED_TEXT, UCASEMAP_BREAK_ITERATOR
+ replacementChars, UPRV_LENGTHOF(replacementChars),
+ oldArray, oldLength, &edits, errorCode);
+ if (U_SUCCESS(errorCode)) {
+ // Grow the buffer at most once, not for multiple doReplace() calls.
+ newLength = oldLength + edits.lengthDelta();
+ if (newLength > oldLength && !cloneArrayIfNeeded(newLength, newLength)) {
+ return *this;
+ }
+ for (Edits::Iterator ei = edits.getCoarseChangesIterator(); ei.next(errorCode);) {
+ doReplace(ei.destinationIndex(), ei.oldLength(),
+ replacementChars, ei.replacementIndex(), ei.newLength());
+ }
+ if (U_FAILURE(errorCode)) {
+ setToBogus();
+ }
+ return *this;
+ } else if (errorCode == U_BUFFER_OVERFLOW_ERROR) {
+ // common overflow handling below
+ newLength = oldLength + edits.lengthDelta();
+ } else {
+ setToBogus();
+ return *this;
+ }
+ }
+
+ // Handle buffer overflow, newLength is known.
+ // We need to allocate a new buffer for the internal string case mapping function.
+ // This is very similar to how doReplace() keeps the old array pointer
+ // and deletes the old array itself after it is done.
+ // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
+ int32_t *bufferToDelete = 0;
+ if (!cloneArrayIfNeeded(newLength, newLength, FALSE, &bufferToDelete, TRUE)) {
+ return *this;
+ }
+ errorCode = U_ZERO_ERROR;
+ // No need to iter->setText() again: The case mapper restarts via iter->first().
+ newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
+ getArrayStart(), getCapacity(),
+ oldArray, oldLength, NULL, errorCode);
+ if (bufferToDelete) {
+ uprv_free(bufferToDelete);
+ }
+ if (U_SUCCESS(errorCode)) {
+ setLength(newLength);
+ } else {
+ setToBogus();
+ }
+ return *this;
+}
+
+UnicodeString &
+UnicodeString::foldCase(uint32_t options) {
+ return caseMap(UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalFold);
+}
+
+U_NAMESPACE_END
+
+// Defined here to reduce dependencies on break iterator
+U_CAPI int32_t U_EXPORT2
+uhash_hashCaselessUnicodeString(const UElement key) {
+ U_NAMESPACE_USE
+ const UnicodeString *str = (const UnicodeString*) key.pointer;
+ if (str == NULL) {
+ return 0;
+ }
+ // Inefficient; a better way would be to have a hash function in
+ // UnicodeString that does case folding on the fly.
+ UnicodeString copy(*str);
+ return copy.foldCase().hashCode();
+}
+
+// Defined here to reduce dependencies on break iterator
+U_CAPI UBool U_EXPORT2
+uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2) {
+ U_NAMESPACE_USE
+ const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
+ const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
+ if (str1 == str2) {
+ return TRUE;
+ }
+ if (str1 == NULL || str2 == NULL) {
+ return FALSE;
+ }
+ return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
+}
diff --git a/thirdparty/icu4c/common/unistr_case_locale.cpp b/thirdparty/icu4c/common/unistr_case_locale.cpp
new file mode 100644
index 0000000000..f0f3048d06
--- /dev/null
+++ b/thirdparty/icu4c/common/unistr_case_locale.cpp
@@ -0,0 +1,56 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: unistr_case_locale.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2011may31
+* created by: Markus W. Scherer
+*
+* Locale-sensitive case mapping functions (ones that call uloc_getDefault())
+* were moved here to break dependency cycles among parts of the common library.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/locid.h"
+#include "unicode/ucasemap.h"
+#include "unicode/unistr.h"
+#include "ucasemap_imp.h"
+
+U_NAMESPACE_BEGIN
+
+//========================================
+// Write implementation
+//========================================
+
+UnicodeString &
+UnicodeString::toLower() {
+ return caseMap(ustrcase_getCaseLocale(NULL), 0,
+ UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToLower);
+}
+
+UnicodeString &
+UnicodeString::toLower(const Locale &locale) {
+ return caseMap(ustrcase_getCaseLocale(locale.getBaseName()), 0,
+ UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToLower);
+}
+
+UnicodeString &
+UnicodeString::toUpper() {
+ return caseMap(ustrcase_getCaseLocale(NULL), 0,
+ UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToUpper);
+}
+
+UnicodeString &
+UnicodeString::toUpper(const Locale &locale) {
+ return caseMap(ustrcase_getCaseLocale(locale.getBaseName()), 0,
+ UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToUpper);
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/unistr_cnv.cpp b/thirdparty/icu4c/common/unistr_cnv.cpp
new file mode 100644
index 0000000000..64d3c16801
--- /dev/null
+++ b/thirdparty/icu4c/common/unistr_cnv.cpp
@@ -0,0 +1,417 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: unistr_cnv.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:2
+*
+* created on: 2004aug19
+* created by: Markus W. Scherer
+*
+* Character conversion functions moved here from unistr.cpp
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/putil.h"
+#include "cstring.h"
+#include "cmemory.h"
+#include "unicode/ustring.h"
+#include "unicode/unistr.h"
+#include "unicode/ucnv.h"
+#include "ucnv_imp.h"
+#include "putilimp.h"
+#include "ustr_cnv.h"
+#include "ustr_imp.h"
+
+U_NAMESPACE_BEGIN
+
+//========================================
+// Constructors
+//========================================
+
+#if !U_CHARSET_IS_UTF8
+
+UnicodeString::UnicodeString(const char *codepageData) {
+ fUnion.fFields.fLengthAndFlags = kShortString;
+ if(codepageData != 0) {
+ doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), 0);
+ }
+}
+
+UnicodeString::UnicodeString(const char *codepageData,
+ int32_t dataLength) {
+ fUnion.fFields.fLengthAndFlags = kShortString;
+ if(codepageData != 0) {
+ doCodepageCreate(codepageData, dataLength, 0);
+ }
+}
+
+// else see unistr.cpp
+#endif
+
+UnicodeString::UnicodeString(const char *codepageData,
+ const char *codepage) {
+ fUnion.fFields.fLengthAndFlags = kShortString;
+ if(codepageData != 0) {
+ doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), codepage);
+ }
+}
+
+UnicodeString::UnicodeString(const char *codepageData,
+ int32_t dataLength,
+ const char *codepage) {
+ fUnion.fFields.fLengthAndFlags = kShortString;
+ if(codepageData != 0) {
+ doCodepageCreate(codepageData, dataLength, codepage);
+ }
+}
+
+UnicodeString::UnicodeString(const char *src, int32_t srcLength,
+ UConverter *cnv,
+ UErrorCode &errorCode) {
+ fUnion.fFields.fLengthAndFlags = kShortString;
+ if(U_SUCCESS(errorCode)) {
+ // check arguments
+ if(src==NULL) {
+ // treat as an empty string, do nothing more
+ } else if(srcLength<-1) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ } else {
+ // get input length
+ if(srcLength==-1) {
+ srcLength=(int32_t)uprv_strlen(src);
+ }
+ if(srcLength>0) {
+ if(cnv!=0) {
+ // use the provided converter
+ ucnv_resetToUnicode(cnv);
+ doCodepageCreate(src, srcLength, cnv, errorCode);
+ } else {
+ // use the default converter
+ cnv=u_getDefaultConverter(&errorCode);
+ doCodepageCreate(src, srcLength, cnv, errorCode);
+ u_releaseDefaultConverter(cnv);
+ }
+ }
+ }
+
+ if(U_FAILURE(errorCode)) {
+ setToBogus();
+ }
+ }
+}
+
+//========================================
+// Codeset conversion
+//========================================
+
+#if !U_CHARSET_IS_UTF8
+
+int32_t
+UnicodeString::extract(int32_t start,
+ int32_t length,
+ char *target,
+ uint32_t dstSize) const {
+ return extract(start, length, target, dstSize, 0);
+}
+
+// else see unistr.cpp
+#endif
+
+int32_t
+UnicodeString::extract(int32_t start,
+ int32_t length,
+ char *target,
+ uint32_t dstSize,
+ const char *codepage) const
+{
+ // if the arguments are illegal, then do nothing
+ if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
+ return 0;
+ }
+
+ // pin the indices to legal values
+ pinIndices(start, length);
+
+ // We need to cast dstSize to int32_t for all subsequent code.
+ // I don't know why the API was defined with uint32_t but we are stuck with it.
+ // Also, dstSize==0xffffffff means "unlimited" but if we use target+dstSize
+ // as a limit in some functions, it may wrap around and yield a pointer
+ // that compares less-than target.
+ int32_t capacity;
+ if(dstSize < 0x7fffffff) {
+ // Assume that the capacity is real and a limit pointer won't wrap around.
+ capacity = (int32_t)dstSize;
+ } else {
+ // Pin the capacity so that a limit pointer does not wrap around.
+ char *targetLimit = (char *)U_MAX_PTR(target);
+ // U_MAX_PTR(target) returns a targetLimit that is at most 0x7fffffff
+ // greater than target and does not wrap around the top of the address space.
+ capacity = (int32_t)(targetLimit - target);
+ }
+
+ // create the converter
+ UConverter *converter;
+ UErrorCode status = U_ZERO_ERROR;
+
+ // just write the NUL if the string length is 0
+ if(length == 0) {
+ return u_terminateChars(target, capacity, 0, &status);
+ }
+
+ // if the codepage is the default, use our cache
+ // if it is an empty string, then use the "invariant character" conversion
+ if (codepage == 0) {
+ const char *defaultName = ucnv_getDefaultName();
+ if(UCNV_FAST_IS_UTF8(defaultName)) {
+ return toUTF8(start, length, target, capacity);
+ }
+ converter = u_getDefaultConverter(&status);
+ } else if (*codepage == 0) {
+ // use the "invariant characters" conversion
+ int32_t destLength;
+ if(length <= capacity) {
+ destLength = length;
+ } else {
+ destLength = capacity;
+ }
+ u_UCharsToChars(getArrayStart() + start, target, destLength);
+ return u_terminateChars(target, capacity, length, &status);
+ } else {
+ converter = ucnv_open(codepage, &status);
+ }
+
+ length = doExtract(start, length, target, capacity, converter, status);
+
+ // close the converter
+ if (codepage == 0) {
+ u_releaseDefaultConverter(converter);
+ } else {
+ ucnv_close(converter);
+ }
+
+ return length;
+}
+
+int32_t
+UnicodeString::extract(char *dest, int32_t destCapacity,
+ UConverter *cnv,
+ UErrorCode &errorCode) const
+{
+ if(U_FAILURE(errorCode)) {
+ return 0;
+ }
+
+ if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ // nothing to do?
+ if(isEmpty()) {
+ return u_terminateChars(dest, destCapacity, 0, &errorCode);
+ }
+
+ // get the converter
+ UBool isDefaultConverter;
+ if(cnv==0) {
+ isDefaultConverter=TRUE;
+ cnv=u_getDefaultConverter(&errorCode);
+ if(U_FAILURE(errorCode)) {
+ return 0;
+ }
+ } else {
+ isDefaultConverter=FALSE;
+ ucnv_resetFromUnicode(cnv);
+ }
+
+ // convert
+ int32_t len=doExtract(0, length(), dest, destCapacity, cnv, errorCode);
+
+ // release the converter
+ if(isDefaultConverter) {
+ u_releaseDefaultConverter(cnv);
+ }
+
+ return len;
+}
+
+int32_t
+UnicodeString::doExtract(int32_t start, int32_t length,
+ char *dest, int32_t destCapacity,
+ UConverter *cnv,
+ UErrorCode &errorCode) const
+{
+ if(U_FAILURE(errorCode)) {
+ if(destCapacity!=0) {
+ *dest=0;
+ }
+ return 0;
+ }
+
+ const UChar *src=getArrayStart()+start, *srcLimit=src+length;
+ char *originalDest=dest;
+ const char *destLimit;
+
+ if(destCapacity==0) {
+ destLimit=dest=0;
+ } else if(destCapacity==-1) {
+ // Pin the limit to U_MAX_PTR if the "magic" destCapacity is used.
+ destLimit=(char*)U_MAX_PTR(dest);
+ // for NUL-termination, translate into highest int32_t
+ destCapacity=0x7fffffff;
+ } else {
+ destLimit=dest+destCapacity;
+ }
+
+ // perform the conversion
+ ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode);
+ length=(int32_t)(dest-originalDest);
+
+ // if an overflow occurs, then get the preflighting length
+ if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
+ char buffer[1024];
+
+ destLimit=buffer+sizeof(buffer);
+ do {
+ dest=buffer;
+ errorCode=U_ZERO_ERROR;
+ ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode);
+ length+=(int32_t)(dest-buffer);
+ } while(errorCode==U_BUFFER_OVERFLOW_ERROR);
+ }
+
+ return u_terminateChars(originalDest, destCapacity, length, &errorCode);
+}
+
+void
+UnicodeString::doCodepageCreate(const char *codepageData,
+ int32_t dataLength,
+ const char *codepage)
+{
+ // if there's nothing to convert, do nothing
+ if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
+ return;
+ }
+ if(dataLength == -1) {
+ dataLength = (int32_t)uprv_strlen(codepageData);
+ }
+
+ UErrorCode status = U_ZERO_ERROR;
+
+ // create the converter
+ // if the codepage is the default, use our cache
+ // if it is an empty string, then use the "invariant character" conversion
+ UConverter *converter;
+ if (codepage == 0) {
+ const char *defaultName = ucnv_getDefaultName();
+ if(UCNV_FAST_IS_UTF8(defaultName)) {
+ setToUTF8(StringPiece(codepageData, dataLength));
+ return;
+ }
+ converter = u_getDefaultConverter(&status);
+ } else if(*codepage == 0) {
+ // use the "invariant characters" conversion
+ if(cloneArrayIfNeeded(dataLength, dataLength, FALSE)) {
+ u_charsToUChars(codepageData, getArrayStart(), dataLength);
+ setLength(dataLength);
+ } else {
+ setToBogus();
+ }
+ return;
+ } else {
+ converter = ucnv_open(codepage, &status);
+ }
+
+ // if we failed, set the appropriate flags and return
+ if(U_FAILURE(status)) {
+ setToBogus();
+ return;
+ }
+
+ // perform the conversion
+ doCodepageCreate(codepageData, dataLength, converter, status);
+ if(U_FAILURE(status)) {
+ setToBogus();
+ }
+
+ // close the converter
+ if(codepage == 0) {
+ u_releaseDefaultConverter(converter);
+ } else {
+ ucnv_close(converter);
+ }
+}
+
+void
+UnicodeString::doCodepageCreate(const char *codepageData,
+ int32_t dataLength,
+ UConverter *converter,
+ UErrorCode &status)
+{
+ if(U_FAILURE(status)) {
+ return;
+ }
+
+ // set up the conversion parameters
+ const char *mySource = codepageData;
+ const char *mySourceEnd = mySource + dataLength;
+ UChar *array, *myTarget;
+
+ // estimate the size needed:
+ int32_t arraySize;
+ if(dataLength <= US_STACKBUF_SIZE) {
+ // try to use the stack buffer
+ arraySize = US_STACKBUF_SIZE;
+ } else {
+ // 1.25 UChar's per source byte should cover most cases
+ arraySize = dataLength + (dataLength >> 2);
+ }
+
+ // we do not care about the current contents
+ UBool doCopyArray = FALSE;
+ for(;;) {
+ if(!cloneArrayIfNeeded(arraySize, arraySize, doCopyArray)) {
+ setToBogus();
+ break;
+ }
+
+ // perform the conversion
+ array = getArrayStart();
+ myTarget = array + length();
+ ucnv_toUnicode(converter, &myTarget, array + getCapacity(),
+ &mySource, mySourceEnd, 0, TRUE, &status);
+
+ // update the conversion parameters
+ setLength((int32_t)(myTarget - array));
+
+ // allocate more space and copy data, if needed
+ if(status == U_BUFFER_OVERFLOW_ERROR) {
+ // reset the error code
+ status = U_ZERO_ERROR;
+
+ // keep the previous conversion results
+ doCopyArray = TRUE;
+
+ // estimate the new size needed, larger than before
+ // try 2 UChar's per remaining source byte
+ arraySize = (int32_t)(length() + 2 * (mySourceEnd - mySource));
+ } else {
+ break;
+ }
+ }
+}
+
+U_NAMESPACE_END
+
+#endif
diff --git a/thirdparty/icu4c/common/unistr_props.cpp b/thirdparty/icu4c/common/unistr_props.cpp
new file mode 100644
index 0000000000..4006475790
--- /dev/null
+++ b/thirdparty/icu4c/common/unistr_props.cpp
@@ -0,0 +1,77 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: unistr_props.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:2
+*
+* created on: 2004aug25
+* created by: Markus W. Scherer
+*
+* Character property dependent functions moved here from unistr.cpp
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/uchar.h"
+#include "unicode/unistr.h"
+#include "unicode/utf16.h"
+
+U_NAMESPACE_BEGIN
+
+UnicodeString&
+UnicodeString::trim()
+{
+ if(isBogus()) {
+ return *this;
+ }
+
+ UChar *array = getArrayStart();
+ UChar32 c;
+ int32_t oldLength = this->length();
+ int32_t i = oldLength, length;
+
+ // first cut off trailing white space
+ for(;;) {
+ length = i;
+ if(i <= 0) {
+ break;
+ }
+ U16_PREV(array, 0, i, c);
+ if(!(c == 0x20 || u_isWhitespace(c))) {
+ break;
+ }
+ }
+ if(length < oldLength) {
+ setLength(length);
+ }
+
+ // find leading white space
+ int32_t start;
+ i = 0;
+ for(;;) {
+ start = i;
+ if(i >= length) {
+ break;
+ }
+ U16_NEXT(array, i, length, c);
+ if(!(c == 0x20 || u_isWhitespace(c))) {
+ break;
+ }
+ }
+
+ // move string forward over leading white space
+ if(start > 0) {
+ doReplace(0, start, 0, 0, 0);
+ }
+
+ return *this;
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/unistr_titlecase_brkiter.cpp b/thirdparty/icu4c/common/unistr_titlecase_brkiter.cpp
new file mode 100644
index 0000000000..4969884b0d
--- /dev/null
+++ b/thirdparty/icu4c/common/unistr_titlecase_brkiter.cpp
@@ -0,0 +1,57 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: unistr_titlecase_brkiter.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:2
+*
+* created on: 2011may30
+* created by: Markus W. Scherer
+*
+* Titlecasing functions that are based on BreakIterator
+* were moved here to break dependency cycles among parts of the common library.
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/brkiter.h"
+#include "unicode/locid.h"
+#include "unicode/ucasemap.h"
+#include "unicode/unistr.h"
+#include "ucasemap_imp.h"
+
+U_NAMESPACE_BEGIN
+
+UnicodeString &
+UnicodeString::toTitle(BreakIterator *iter) {
+ return toTitle(iter, Locale::getDefault(), 0);
+}
+
+UnicodeString &
+UnicodeString::toTitle(BreakIterator *iter, const Locale &locale) {
+ return toTitle(iter, locale, 0);
+}
+
+UnicodeString &
+UnicodeString::toTitle(BreakIterator *iter, const Locale &locale, uint32_t options) {
+ LocalPointer<BreakIterator> ownedIter;
+ UErrorCode errorCode = U_ZERO_ERROR;
+ iter = ustrcase_getTitleBreakIterator(&locale, "", options, iter, ownedIter, errorCode);
+ if (iter == nullptr) {
+ setToBogus();
+ return *this;
+ }
+ caseMap(ustrcase_getCaseLocale(locale.getBaseName()), options, iter, ustrcase_internalToTitle);
+ return *this;
+}
+
+U_NAMESPACE_END
+
+#endif // !UCONFIG_NO_BREAK_ITERATION
diff --git a/thirdparty/icu4c/common/unistrappender.h b/thirdparty/icu4c/common/unistrappender.h
new file mode 100644
index 0000000000..75fcb9e775
--- /dev/null
+++ b/thirdparty/icu4c/common/unistrappender.h
@@ -0,0 +1,90 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 2015, International Business Machines Corporation and
+* others. All Rights Reserved.
+******************************************************************************
+*
+* File unistrappender.h
+******************************************************************************
+*/
+
+#ifndef __UNISTRAPPENDER_H__
+#define __UNISTRAPPENDER_H__
+
+#include "unicode/unistr.h"
+#include "unicode/uobject.h"
+#include "unicode/utf16.h"
+#include "unicode/utypes.h"
+#include "cmemory.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * An optimization for the slowness of calling UnicodeString::append()
+ * one character at a time in a loop. It stores appends in a buffer while
+ * never actually calling append on the unicode string unless the buffer
+ * fills up or is flushed.
+ *
+ * proper usage:
+ * {
+ * UnicodeStringAppender appender(astring);
+ * for (int32_t i = 0; i < 100; ++i) {
+ * appender.append((UChar) i);
+ * }
+ * // appender flushed automatically when it goes out of scope.
+ * }
+ */
+class UnicodeStringAppender : public UMemory {
+public:
+
+ /**
+ * dest is the UnicodeString being appended to. It must always
+ * exist while this instance exists.
+ */
+ UnicodeStringAppender(UnicodeString &dest) : fDest(&dest), fIdx(0) { }
+
+ inline void append(UChar x) {
+ if (fIdx == UPRV_LENGTHOF(fBuffer)) {
+ fDest->append(fBuffer, 0, fIdx);
+ fIdx = 0;
+ }
+ fBuffer[fIdx++] = x;
+ }
+
+ inline void append(UChar32 x) {
+ if (fIdx >= UPRV_LENGTHOF(fBuffer) - 1) {
+ fDest->append(fBuffer, 0, fIdx);
+ fIdx = 0;
+ }
+ U16_APPEND_UNSAFE(fBuffer, fIdx, x);
+ }
+
+ /**
+ * Ensures that all appended characters have been written out to dest.
+ */
+ inline void flush() {
+ if (fIdx) {
+ fDest->append(fBuffer, 0, fIdx);
+ }
+ fIdx = 0;
+ }
+
+ /**
+ * flush the buffer when we go out of scope.
+ */
+ ~UnicodeStringAppender() {
+ flush();
+ }
+private:
+ UnicodeString *fDest;
+ int32_t fIdx;
+ UChar fBuffer[32];
+ UnicodeStringAppender(const UnicodeStringAppender &other);
+ UnicodeStringAppender &operator=(const UnicodeStringAppender &other);
+};
+
+U_NAMESPACE_END
+
+#endif
diff --git a/thirdparty/icu4c/common/unorm.cpp b/thirdparty/icu4c/common/unorm.cpp
new file mode 100644
index 0000000000..2d9f46052f
--- /dev/null
+++ b/thirdparty/icu4c/common/unorm.cpp
@@ -0,0 +1,280 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (c) 1996-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+******************************************************************************
+* File unorm.cpp
+*
+* Created by: Vladimir Weinstein 12052000
+*
+* Modification history :
+*
+* Date Name Description
+* 02/01/01 synwee Added normalization quickcheck enum and method.
+* 02/12/01 synwee Commented out quickcheck util api has been approved
+* Added private method for doing FCD checks
+* 02/23/01 synwee Modified quickcheck and checkFCE to run through
+* string for codepoints < 0x300 for the normalization
+* mode NFC.
+* 05/25/01+ Markus Scherer total rewrite, implement all normalization here
+* instead of just wrappers around normlzr.cpp,
+* load unorm.dat, support Unicode 3.1 with
+* supplementary code points, etc.
+* 2009-nov..2010-jan Markus Scherer total rewrite, new Normalizer2 API & code
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/udata.h"
+#include "unicode/ustring.h"
+#include "unicode/uiter.h"
+#include "unicode/unorm.h"
+#include "unicode/unorm2.h"
+#include "normalizer2impl.h"
+#include "unormimp.h"
+#include "uprops.h"
+#include "ustr_imp.h"
+
+U_NAMESPACE_USE
+
+/* quick check functions ---------------------------------------------------- */
+
+U_CAPI UNormalizationCheckResult U_EXPORT2
+unorm_quickCheck(const UChar *src,
+ int32_t srcLength,
+ UNormalizationMode mode,
+ UErrorCode *pErrorCode) {
+ const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode);
+ return unorm2_quickCheck((const UNormalizer2 *)n2, src, srcLength, pErrorCode);
+}
+
+U_CAPI UNormalizationCheckResult U_EXPORT2
+unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength,
+ UNormalizationMode mode, int32_t options,
+ UErrorCode *pErrorCode) {
+ const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode);
+ if(options&UNORM_UNICODE_3_2) {
+ FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*pErrorCode));
+ return unorm2_quickCheck(
+ reinterpret_cast<const UNormalizer2 *>(static_cast<Normalizer2 *>(&fn2)),
+ src, srcLength, pErrorCode);
+ } else {
+ return unorm2_quickCheck((const UNormalizer2 *)n2, src, srcLength, pErrorCode);
+ }
+}
+
+U_CAPI UBool U_EXPORT2
+unorm_isNormalized(const UChar *src, int32_t srcLength,
+ UNormalizationMode mode,
+ UErrorCode *pErrorCode) {
+ const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode);
+ return unorm2_isNormalized((const UNormalizer2 *)n2, src, srcLength, pErrorCode);
+}
+
+U_CAPI UBool U_EXPORT2
+unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength,
+ UNormalizationMode mode, int32_t options,
+ UErrorCode *pErrorCode) {
+ const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode);
+ if(options&UNORM_UNICODE_3_2) {
+ FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*pErrorCode));
+ return unorm2_isNormalized(
+ reinterpret_cast<const UNormalizer2 *>(static_cast<Normalizer2 *>(&fn2)),
+ src, srcLength, pErrorCode);
+ } else {
+ return unorm2_isNormalized((const UNormalizer2 *)n2, src, srcLength, pErrorCode);
+ }
+}
+
+/* normalize() API ---------------------------------------------------------- */
+
+/** Public API for normalizing. */
+U_CAPI int32_t U_EXPORT2
+unorm_normalize(const UChar *src, int32_t srcLength,
+ UNormalizationMode mode, int32_t options,
+ UChar *dest, int32_t destCapacity,
+ UErrorCode *pErrorCode) {
+ const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode);
+ if(options&UNORM_UNICODE_3_2) {
+ FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*pErrorCode));
+ return unorm2_normalize(
+ reinterpret_cast<const UNormalizer2 *>(static_cast<Normalizer2 *>(&fn2)),
+ src, srcLength, dest, destCapacity, pErrorCode);
+ } else {
+ return unorm2_normalize((const UNormalizer2 *)n2,
+ src, srcLength, dest, destCapacity, pErrorCode);
+ }
+}
+
+
+/* iteration functions ------------------------------------------------------ */
+
+static int32_t
+_iterate(UCharIterator *src, UBool forward,
+ UChar *dest, int32_t destCapacity,
+ const Normalizer2 *n2,
+ UBool doNormalize, UBool *pNeededToNormalize,
+ UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(destCapacity<0 || (dest==NULL && destCapacity>0) || src==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ if(pNeededToNormalize!=NULL) {
+ *pNeededToNormalize=FALSE;
+ }
+ if(!(forward ? src->hasNext(src) : src->hasPrevious(src))) {
+ return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
+ }
+
+ UnicodeString buffer;
+ UChar32 c;
+ if(forward) {
+ /* get one character and ignore its properties */
+ buffer.append(uiter_next32(src));
+ /* get all following characters until we see a boundary */
+ while((c=uiter_next32(src))>=0) {
+ if(n2->hasBoundaryBefore(c)) {
+ /* back out the latest movement to stop at the boundary */
+ src->move(src, -U16_LENGTH(c), UITER_CURRENT);
+ break;
+ } else {
+ buffer.append(c);
+ }
+ }
+ } else {
+ while((c=uiter_previous32(src))>=0) {
+ /* always write this character to the front of the buffer */
+ buffer.insert(0, c);
+ /* stop if this just-copied character is a boundary */
+ if(n2->hasBoundaryBefore(c)) {
+ break;
+ }
+ }
+ }
+
+ UnicodeString destString(dest, 0, destCapacity);
+ if(buffer.length()>0 && doNormalize) {
+ n2->normalize(buffer, destString, *pErrorCode).extract(dest, destCapacity, *pErrorCode);
+ if(pNeededToNormalize!=NULL && U_SUCCESS(*pErrorCode)) {
+ *pNeededToNormalize= destString!=buffer;
+ }
+ return destString.length();
+ } else {
+ /* just copy the source characters */
+ return buffer.extract(dest, destCapacity, *pErrorCode);
+ }
+}
+
+static int32_t
+unorm_iterate(UCharIterator *src, UBool forward,
+ UChar *dest, int32_t destCapacity,
+ UNormalizationMode mode, int32_t options,
+ UBool doNormalize, UBool *pNeededToNormalize,
+ UErrorCode *pErrorCode) {
+ const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode);
+ if(options&UNORM_UNICODE_3_2) {
+ const UnicodeSet *uni32 = uniset_getUnicode32Instance(*pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ FilteredNormalizer2 fn2(*n2, *uni32);
+ return _iterate(src, forward, dest, destCapacity,
+ &fn2, doNormalize, pNeededToNormalize, pErrorCode);
+ }
+ return _iterate(src, forward, dest, destCapacity,
+ n2, doNormalize, pNeededToNormalize, pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+unorm_previous(UCharIterator *src,
+ UChar *dest, int32_t destCapacity,
+ UNormalizationMode mode, int32_t options,
+ UBool doNormalize, UBool *pNeededToNormalize,
+ UErrorCode *pErrorCode) {
+ return unorm_iterate(src, FALSE,
+ dest, destCapacity,
+ mode, options,
+ doNormalize, pNeededToNormalize,
+ pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+unorm_next(UCharIterator *src,
+ UChar *dest, int32_t destCapacity,
+ UNormalizationMode mode, int32_t options,
+ UBool doNormalize, UBool *pNeededToNormalize,
+ UErrorCode *pErrorCode) {
+ return unorm_iterate(src, TRUE,
+ dest, destCapacity,
+ mode, options,
+ doNormalize, pNeededToNormalize,
+ pErrorCode);
+}
+
+/* Concatenation of normalized strings -------------------------------------- */
+
+static int32_t
+_concatenate(const UChar *left, int32_t leftLength,
+ const UChar *right, int32_t rightLength,
+ UChar *dest, int32_t destCapacity,
+ const Normalizer2 *n2,
+ UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(destCapacity<0 || (dest==NULL && destCapacity>0) ||
+ left==NULL || leftLength<-1 || right==NULL || rightLength<-1) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* check for overlapping right and destination */
+ if( dest!=NULL &&
+ ((right>=dest && right<(dest+destCapacity)) ||
+ (rightLength>0 && dest>=right && dest<(right+rightLength)))
+ ) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* allow left==dest */
+ UnicodeString destString;
+ if(left==dest) {
+ destString.setTo(dest, leftLength, destCapacity);
+ } else {
+ destString.setTo(dest, 0, destCapacity);
+ destString.append(left, leftLength);
+ }
+ return n2->append(destString, UnicodeString(rightLength<0, right, rightLength), *pErrorCode).
+ extract(dest, destCapacity, *pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+unorm_concatenate(const UChar *left, int32_t leftLength,
+ const UChar *right, int32_t rightLength,
+ UChar *dest, int32_t destCapacity,
+ UNormalizationMode mode, int32_t options,
+ UErrorCode *pErrorCode) {
+ const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode);
+ if(options&UNORM_UNICODE_3_2) {
+ const UnicodeSet *uni32 = uniset_getUnicode32Instance(*pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ FilteredNormalizer2 fn2(*n2, *uni32);
+ return _concatenate(left, leftLength, right, rightLength,
+ dest, destCapacity, &fn2, pErrorCode);
+ }
+ return _concatenate(left, leftLength, right, rightLength,
+ dest, destCapacity, n2, pErrorCode);
+}
+
+#endif /* #if !UCONFIG_NO_NORMALIZATION */
diff --git a/thirdparty/icu4c/common/unormcmp.cpp b/thirdparty/icu4c/common/unormcmp.cpp
new file mode 100644
index 0000000000..689b0b53b2
--- /dev/null
+++ b/thirdparty/icu4c/common/unormcmp.cpp
@@ -0,0 +1,640 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2001-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: unormcmp.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2004sep13
+* created by: Markus W. Scherer
+*
+* unorm_compare() function moved here from unorm.cpp for better modularization.
+* Depends on both normalization and case folding.
+* Allows unorm.cpp to not depend on any character properties code.
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/unorm.h"
+#include "unicode/ustring.h"
+#include "cmemory.h"
+#include "normalizer2impl.h"
+#include "ucase.h"
+#include "uprops.h"
+#include "ustr_imp.h"
+
+U_NAMESPACE_USE
+
+/* compare canonically equivalent ------------------------------------------- */
+
+/*
+ * Compare two strings for canonical equivalence.
+ * Further options include case-insensitive comparison and
+ * code point order (as opposed to code unit order).
+ *
+ * In this function, canonical equivalence is optional as well.
+ * If canonical equivalence is tested, then both strings must fulfill
+ * the FCD check.
+ *
+ * Semantically, this is equivalent to
+ * strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2)))
+ * where code point order, NFD and foldCase are all optional.
+ *
+ * String comparisons almost always yield results before processing both strings
+ * completely.
+ * They are generally more efficient working incrementally instead of
+ * performing the sub-processing (strlen, normalization, case-folding)
+ * on the entire strings first.
+ *
+ * It is also unnecessary to not normalize identical characters.
+ *
+ * This function works in principle as follows:
+ *
+ * loop {
+ * get one code unit c1 from s1 (-1 if end of source)
+ * get one code unit c2 from s2 (-1 if end of source)
+ *
+ * if(either string finished) {
+ * return result;
+ * }
+ * if(c1==c2) {
+ * continue;
+ * }
+ *
+ * // c1!=c2
+ * try to decompose/case-fold c1/c2, and continue if one does;
+ *
+ * // still c1!=c2 and neither decomposes/case-folds, return result
+ * return c1-c2;
+ * }
+ *
+ * When a character decomposes, then the pointer for that source changes to
+ * the decomposition, pushing the previous pointer onto a stack.
+ * When the end of the decomposition is reached, then the code unit reader
+ * pops the previous source from the stack.
+ * (Same for case-folding.)
+ *
+ * This is complicated further by operating on variable-width UTF-16.
+ * The top part of the loop works on code units, while lookups for decomposition
+ * and case-folding need code points.
+ * Code points are assembled after the equality/end-of-source part.
+ * The source pointer is only advanced beyond all code units when the code point
+ * actually decomposes/case-folds.
+ *
+ * If we were on a trail surrogate unit when assembling a code point,
+ * and the code point decomposes/case-folds, then the decomposition/folding
+ * result must be compared with the part of the other string that corresponds to
+ * this string's lead surrogate.
+ * Since we only assemble a code point when hitting a trail unit when the
+ * preceding lead units were identical, we back up the other string by one unit
+ * in such a case.
+ *
+ * The optional code point order comparison at the end works with
+ * the same fix-up as the other code point order comparison functions.
+ * See ustring.c and the comment near the end of this function.
+ *
+ * Assumption: A decomposition or case-folding result string never contains
+ * a single surrogate. This is a safe assumption in the Unicode Standard.
+ * Therefore, we do not need to check for surrogate pairs across
+ * decomposition/case-folding boundaries.
+ *
+ * Further assumptions (see verifications tstnorm.cpp):
+ * The API function checks for FCD first, while the core function
+ * first case-folds and then decomposes. This requires that case-folding does not
+ * un-FCD any strings.
+ *
+ * The API function may also NFD the input and turn off decomposition.
+ * This requires that case-folding does not un-NFD strings either.
+ *
+ * TODO If any of the above two assumptions is violated,
+ * then this entire code must be re-thought.
+ * If this happens, then a simple solution is to case-fold both strings up front
+ * and to turn off UNORM_INPUT_IS_FCD.
+ * We already do this when not both strings are in FCD because makeFCD
+ * would be a partial NFD before the case folding, which does not work.
+ * Note that all of this is only a problem when case-folding _and_
+ * canonical equivalence come together.
+ * (Comments in unorm_compare() are more up to date than this TODO.)
+ */
+
+/* stack element for previous-level source/decomposition pointers */
+struct CmpEquivLevel {
+ const UChar *start, *s, *limit;
+};
+typedef struct CmpEquivLevel CmpEquivLevel;
+
+/**
+ * Internal option for unorm_cmpEquivFold() for decomposing.
+ * If not set, just do strcasecmp().
+ */
+#define _COMPARE_EQUIV 0x80000
+
+/* internal function */
+static int32_t
+unorm_cmpEquivFold(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ uint32_t options,
+ UErrorCode *pErrorCode) {
+ const Normalizer2Impl *nfcImpl;
+
+ /* current-level start/limit - s1/s2 as current */
+ const UChar *start1, *start2, *limit1, *limit2;
+
+ /* decomposition and case folding variables */
+ const UChar *p;
+ int32_t length;
+
+ /* stacks of previous-level start/current/limit */
+ CmpEquivLevel stack1[2], stack2[2];
+
+ /* buffers for algorithmic decompositions */
+ UChar decomp1[4], decomp2[4];
+
+ /* case folding buffers, only use current-level start/limit */
+ UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];
+
+ /* track which is the current level per string */
+ int32_t level1, level2;
+
+ /* current code units, and code points for lookups */
+ UChar32 c1, c2, cp1, cp2;
+
+ /* no argument error checking because this itself is not an API */
+
+ /*
+ * assume that at least one of the options _COMPARE_EQUIV and U_COMPARE_IGNORE_CASE is set
+ * otherwise this function must behave exactly as uprv_strCompare()
+ * not checking for that here makes testing this function easier
+ */
+
+ /* normalization/properties data loaded? */
+ if((options&_COMPARE_EQUIV)!=0) {
+ nfcImpl=Normalizer2Factory::getNFCImpl(*pErrorCode);
+ } else {
+ nfcImpl=NULL;
+ }
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ /* initialize */
+ start1=s1;
+ if(length1==-1) {
+ limit1=NULL;
+ } else {
+ limit1=s1+length1;
+ }
+
+ start2=s2;
+ if(length2==-1) {
+ limit2=NULL;
+ } else {
+ limit2=s2+length2;
+ }
+
+ level1=level2=0;
+ c1=c2=-1;
+
+ /* comparison loop */
+ for(;;) {
+ /*
+ * here a code unit value of -1 means "get another code unit"
+ * below it will mean "this source is finished"
+ */
+
+ if(c1<0) {
+ /* get next code unit from string 1, post-increment */
+ for(;;) {
+ if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) {
+ if(level1==0) {
+ c1=-1;
+ break;
+ }
+ } else {
+ ++s1;
+ break;
+ }
+
+ /* reached end of level buffer, pop one level */
+ do {
+ --level1;
+ start1=stack1[level1].start; /*Not uninitialized*/
+ } while(start1==NULL);
+ s1=stack1[level1].s; /*Not uninitialized*/
+ limit1=stack1[level1].limit; /*Not uninitialized*/
+ }
+ }
+
+ if(c2<0) {
+ /* get next code unit from string 2, post-increment */
+ for(;;) {
+ if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) {
+ if(level2==0) {
+ c2=-1;
+ break;
+ }
+ } else {
+ ++s2;
+ break;
+ }
+
+ /* reached end of level buffer, pop one level */
+ do {
+ --level2;
+ start2=stack2[level2].start; /*Not uninitialized*/
+ } while(start2==NULL);
+ s2=stack2[level2].s; /*Not uninitialized*/
+ limit2=stack2[level2].limit; /*Not uninitialized*/
+ }
+ }
+
+ /*
+ * compare c1 and c2
+ * either variable c1, c2 is -1 only if the corresponding string is finished
+ */
+ if(c1==c2) {
+ if(c1<0) {
+ return 0; /* c1==c2==-1 indicating end of strings */
+ }
+ c1=c2=-1; /* make us fetch new code units */
+ continue;
+ } else if(c1<0) {
+ return -1; /* string 1 ends before string 2 */
+ } else if(c2<0) {
+ return 1; /* string 2 ends before string 1 */
+ }
+ /* c1!=c2 && c1>=0 && c2>=0 */
+
+ /* get complete code points for c1, c2 for lookups if either is a surrogate */
+ cp1=c1;
+ if(U_IS_SURROGATE(c1)) {
+ UChar c;
+
+ if(U_IS_SURROGATE_LEAD(c1)) {
+ if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) {
+ /* advance ++s1; only below if cp1 decomposes/case-folds */
+ cp1=U16_GET_SUPPLEMENTARY(c1, c);
+ }
+ } else /* isTrail(c1) */ {
+ if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) {
+ cp1=U16_GET_SUPPLEMENTARY(c, c1);
+ }
+ }
+ }
+
+ cp2=c2;
+ if(U_IS_SURROGATE(c2)) {
+ UChar c;
+
+ if(U_IS_SURROGATE_LEAD(c2)) {
+ if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) {
+ /* advance ++s2; only below if cp2 decomposes/case-folds */
+ cp2=U16_GET_SUPPLEMENTARY(c2, c);
+ }
+ } else /* isTrail(c2) */ {
+ if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) {
+ cp2=U16_GET_SUPPLEMENTARY(c, c2);
+ }
+ }
+ }
+
+ /*
+ * go down one level for each string
+ * continue with the main loop as soon as there is a real change
+ */
+
+ if( level1==0 && (options&U_COMPARE_IGNORE_CASE) &&
+ (length=ucase_toFullFolding((UChar32)cp1, &p, options))>=0
+ ) {
+ /* cp1 case-folds to the code point "length" or to p[length] */
+ if(U_IS_SURROGATE(c1)) {
+ if(U_IS_SURROGATE_LEAD(c1)) {
+ /* advance beyond source surrogate pair if it case-folds */
+ ++s1;
+ } else /* isTrail(c1) */ {
+ /*
+ * we got a supplementary code point when hitting its trail surrogate,
+ * therefore the lead surrogate must have been the same as in the other string;
+ * compare this decomposition with the lead surrogate in the other string
+ * remember that this simulates bulk text replacement:
+ * the decomposition would replace the entire code point
+ */
+ --s2;
+ c2=*(s2-1);
+ }
+ }
+
+ /* push current level pointers */
+ stack1[0].start=start1;
+ stack1[0].s=s1;
+ stack1[0].limit=limit1;
+ ++level1;
+
+ /* copy the folding result to fold1[] */
+ if(length<=UCASE_MAX_STRING_LENGTH) {
+ u_memcpy(fold1, p, length);
+ } else {
+ int32_t i=0;
+ U16_APPEND_UNSAFE(fold1, i, length);
+ length=i;
+ }
+
+ /* set next level pointers to case folding */
+ start1=s1=fold1;
+ limit1=fold1+length;
+
+ /* get ready to read from decomposition, continue with loop */
+ c1=-1;
+ continue;
+ }
+
+ if( level2==0 && (options&U_COMPARE_IGNORE_CASE) &&
+ (length=ucase_toFullFolding((UChar32)cp2, &p, options))>=0
+ ) {
+ /* cp2 case-folds to the code point "length" or to p[length] */
+ if(U_IS_SURROGATE(c2)) {
+ if(U_IS_SURROGATE_LEAD(c2)) {
+ /* advance beyond source surrogate pair if it case-folds */
+ ++s2;
+ } else /* isTrail(c2) */ {
+ /*
+ * we got a supplementary code point when hitting its trail surrogate,
+ * therefore the lead surrogate must have been the same as in the other string;
+ * compare this decomposition with the lead surrogate in the other string
+ * remember that this simulates bulk text replacement:
+ * the decomposition would replace the entire code point
+ */
+ --s1;
+ c1=*(s1-1);
+ }
+ }
+
+ /* push current level pointers */
+ stack2[0].start=start2;
+ stack2[0].s=s2;
+ stack2[0].limit=limit2;
+ ++level2;
+
+ /* copy the folding result to fold2[] */
+ if(length<=UCASE_MAX_STRING_LENGTH) {
+ u_memcpy(fold2, p, length);
+ } else {
+ int32_t i=0;
+ U16_APPEND_UNSAFE(fold2, i, length);
+ length=i;
+ }
+
+ /* set next level pointers to case folding */
+ start2=s2=fold2;
+ limit2=fold2+length;
+
+ /* get ready to read from decomposition, continue with loop */
+ c2=-1;
+ continue;
+ }
+
+ if( level1<2 && (options&_COMPARE_EQUIV) &&
+ 0!=(p=nfcImpl->getDecomposition((UChar32)cp1, decomp1, length))
+ ) {
+ /* cp1 decomposes into p[length] */
+ if(U_IS_SURROGATE(c1)) {
+ if(U_IS_SURROGATE_LEAD(c1)) {
+ /* advance beyond source surrogate pair if it decomposes */
+ ++s1;
+ } else /* isTrail(c1) */ {
+ /*
+ * we got a supplementary code point when hitting its trail surrogate,
+ * therefore the lead surrogate must have been the same as in the other string;
+ * compare this decomposition with the lead surrogate in the other string
+ * remember that this simulates bulk text replacement:
+ * the decomposition would replace the entire code point
+ */
+ --s2;
+ c2=*(s2-1);
+ }
+ }
+
+ /* push current level pointers */
+ stack1[level1].start=start1;
+ stack1[level1].s=s1;
+ stack1[level1].limit=limit1;
+ ++level1;
+
+ /* set empty intermediate level if skipped */
+ if(level1<2) {
+ stack1[level1++].start=NULL;
+ }
+
+ /* set next level pointers to decomposition */
+ start1=s1=p;
+ limit1=p+length;
+
+ /* get ready to read from decomposition, continue with loop */
+ c1=-1;
+ continue;
+ }
+
+ if( level2<2 && (options&_COMPARE_EQUIV) &&
+ 0!=(p=nfcImpl->getDecomposition((UChar32)cp2, decomp2, length))
+ ) {
+ /* cp2 decomposes into p[length] */
+ if(U_IS_SURROGATE(c2)) {
+ if(U_IS_SURROGATE_LEAD(c2)) {
+ /* advance beyond source surrogate pair if it decomposes */
+ ++s2;
+ } else /* isTrail(c2) */ {
+ /*
+ * we got a supplementary code point when hitting its trail surrogate,
+ * therefore the lead surrogate must have been the same as in the other string;
+ * compare this decomposition with the lead surrogate in the other string
+ * remember that this simulates bulk text replacement:
+ * the decomposition would replace the entire code point
+ */
+ --s1;
+ c1=*(s1-1);
+ }
+ }
+
+ /* push current level pointers */
+ stack2[level2].start=start2;
+ stack2[level2].s=s2;
+ stack2[level2].limit=limit2;
+ ++level2;
+
+ /* set empty intermediate level if skipped */
+ if(level2<2) {
+ stack2[level2++].start=NULL;
+ }
+
+ /* set next level pointers to decomposition */
+ start2=s2=p;
+ limit2=p+length;
+
+ /* get ready to read from decomposition, continue with loop */
+ c2=-1;
+ continue;
+ }
+
+ /*
+ * no decomposition/case folding, max level for both sides:
+ * return difference result
+ *
+ * code point order comparison must not just return cp1-cp2
+ * because when single surrogates are present then the surrogate pairs
+ * that formed cp1 and cp2 may be from different string indexes
+ *
+ * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units
+ * c1=d800 cp1=10001 c2=dc00 cp2=10000
+ * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 }
+ *
+ * therefore, use same fix-up as in ustring.c/uprv_strCompare()
+ * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++
+ * so we have slightly different pointer/start/limit comparisons here
+ */
+
+ if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) {
+ /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
+ if(
+ (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) ||
+ (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2)))
+ ) {
+ /* part of a surrogate pair, leave >=d800 */
+ } else {
+ /* BMP code point - may be surrogate code point - make <d800 */
+ c1-=0x2800;
+ }
+
+ if(
+ (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) ||
+ (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2)))
+ ) {
+ /* part of a surrogate pair, leave >=d800 */
+ } else {
+ /* BMP code point - may be surrogate code point - make <d800 */
+ c2-=0x2800;
+ }
+ }
+
+ return c1-c2;
+ }
+}
+
+static
+UBool _normalize(const Normalizer2 *n2, const UChar *s, int32_t length,
+ UnicodeString &normalized, UErrorCode *pErrorCode) {
+ UnicodeString str(length<0, s, length);
+
+ // check if s fulfill the conditions
+ int32_t spanQCYes=n2->spanQuickCheckYes(str, *pErrorCode);
+ if (U_FAILURE(*pErrorCode)) {
+ return FALSE;
+ }
+ /*
+ * ICU 2.4 had a further optimization:
+ * If both strings were not in FCD, then they were both NFD'ed,
+ * and the _COMPARE_EQUIV option was turned off.
+ * It is not entirely clear that this is valid with the current
+ * definition of the canonical caseless match.
+ * Therefore, ICU 2.6 removes that optimization.
+ */
+ if(spanQCYes<str.length()) {
+ UnicodeString unnormalized=str.tempSubString(spanQCYes);
+ normalized.setTo(FALSE, str.getBuffer(), spanQCYes);
+ n2->normalizeSecondAndAppend(normalized, unnormalized, *pErrorCode);
+ if (U_SUCCESS(*pErrorCode)) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+U_CAPI int32_t U_EXPORT2
+unorm_compare(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ uint32_t options,
+ UErrorCode *pErrorCode) {
+ /* argument checking */
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(s1==0 || length1<-1 || s2==0 || length2<-1) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ UnicodeString fcd1, fcd2;
+ int32_t normOptions=(int32_t)(options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT);
+ options|=_COMPARE_EQUIV;
+
+ /*
+ * UAX #21 Case Mappings, as fixed for Unicode version 4
+ * (see Jitterbug 2021), defines a canonical caseless match as
+ *
+ * A string X is a canonical caseless match
+ * for a string Y if and only if
+ * NFD(toCasefold(NFD(X))) = NFD(toCasefold(NFD(Y)))
+ *
+ * For better performance, we check for FCD (or let the caller tell us that
+ * both strings are in FCD) for the inner normalization.
+ * BasicNormalizerTest::FindFoldFCDExceptions() makes sure that
+ * case-folding preserves the FCD-ness of a string.
+ * The outer normalization is then only performed by unorm_cmpEquivFold()
+ * when there is a difference.
+ *
+ * Exception: When using the Turkic case-folding option, we do perform
+ * full NFD first. This is because in the Turkic case precomposed characters
+ * with 0049 capital I or 0069 small i fold differently whether they
+ * are first decomposed or not, so an FCD check - a check only for
+ * canonical order - is not sufficient.
+ */
+ if(!(options&UNORM_INPUT_IS_FCD) || (options&U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
+ const Normalizer2 *n2;
+ if(options&U_FOLD_CASE_EXCLUDE_SPECIAL_I) {
+ n2=Normalizer2::getNFDInstance(*pErrorCode);
+ } else {
+ n2=Normalizer2Factory::getFCDInstance(*pErrorCode);
+ }
+ if (U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ if(normOptions&UNORM_UNICODE_3_2) {
+ const UnicodeSet *uni32=uniset_getUnicode32Instance(*pErrorCode);
+ FilteredNormalizer2 fn2(*n2, *uni32);
+ if(_normalize(&fn2, s1, length1, fcd1, pErrorCode)) {
+ s1=fcd1.getBuffer();
+ length1=fcd1.length();
+ }
+ if(_normalize(&fn2, s2, length2, fcd2, pErrorCode)) {
+ s2=fcd2.getBuffer();
+ length2=fcd2.length();
+ }
+ } else {
+ if(_normalize(n2, s1, length1, fcd1, pErrorCode)) {
+ s1=fcd1.getBuffer();
+ length1=fcd1.length();
+ }
+ if(_normalize(n2, s2, length2, fcd2, pErrorCode)) {
+ s2=fcd2.getBuffer();
+ length2=fcd2.length();
+ }
+ }
+ }
+
+ if(U_SUCCESS(*pErrorCode)) {
+ return unorm_cmpEquivFold(s1, length1, s2, length2, options, pErrorCode);
+ } else {
+ return 0;
+ }
+}
+
+#endif /* #if !UCONFIG_NO_NORMALIZATION */
diff --git a/thirdparty/icu4c/common/unormimp.h b/thirdparty/icu4c/common/unormimp.h
new file mode 100644
index 0000000000..d2604adb4a
--- /dev/null
+++ b/thirdparty/icu4c/common/unormimp.h
@@ -0,0 +1,488 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2001-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: unormimp.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2001may25
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UNORMIMP_H__
+#define __UNORMIMP_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "udataswp.h"
+
+/*
+ * The 2001-2010 implementation of the normalization code loads its data from
+ * unorm.icu, which is generated with the gennorm tool.
+ * The format of that file is described at the end of this file.
+ */
+
+/* norm32 value constants */
+enum {
+ /* quick check flags 0..3 set mean "no" for their forms */
+ _NORM_QC_NFC=0x11, /* no|maybe */
+ _NORM_QC_NFKC=0x22, /* no|maybe */
+ _NORM_QC_NFD=4, /* no */
+ _NORM_QC_NFKD=8, /* no */
+
+ _NORM_QC_ANY_NO=0xf,
+
+ /* quick check flags 4..5 mean "maybe" for their forms; test flags>=_NORM_QC_MAYBE */
+ _NORM_QC_MAYBE=0x10,
+ _NORM_QC_ANY_MAYBE=0x30,
+
+ _NORM_QC_MASK=0x3f,
+
+ _NORM_COMBINES_FWD=0x40,
+ _NORM_COMBINES_BACK=0x80,
+ _NORM_COMBINES_ANY=0xc0,
+
+ _NORM_CC_SHIFT=8, /* UnicodeData.txt combining class in bits 15..8 */
+ _NORM_CC_MASK=0xff00,
+
+ _NORM_EXTRA_SHIFT=16, /* 16 bits for the index to UChars and other extra data */
+ _NORM_EXTRA_INDEX_TOP=0xfc00, /* start of surrogate specials after shift */
+
+ _NORM_EXTRA_SURROGATE_MASK=0x3ff,
+ _NORM_EXTRA_SURROGATE_TOP=0x3f0, /* hangul etc. */
+
+ _NORM_EXTRA_HANGUL=_NORM_EXTRA_SURROGATE_TOP,
+ _NORM_EXTRA_JAMO_L,
+ _NORM_EXTRA_JAMO_V,
+ _NORM_EXTRA_JAMO_T
+};
+
+/* norm32 value constants using >16 bits */
+#define _NORM_MIN_SPECIAL 0xfc000000
+#define _NORM_SURROGATES_TOP 0xfff00000
+#define _NORM_MIN_HANGUL 0xfff00000
+#define _NORM_MIN_JAMO_V 0xfff20000
+#define _NORM_JAMO_V_TOP 0xfff30000
+
+/* value constants for auxTrie */
+enum {
+ _NORM_AUX_COMP_EX_SHIFT=10,
+ _NORM_AUX_UNSAFE_SHIFT=11,
+ _NORM_AUX_NFC_SKIPPABLE_F_SHIFT=12
+};
+
+#define _NORM_AUX_MAX_FNC ((int32_t)1<<_NORM_AUX_COMP_EX_SHIFT)
+
+#define _NORM_AUX_FNC_MASK (uint32_t)(_NORM_AUX_MAX_FNC-1)
+#define _NORM_AUX_COMP_EX_MASK ((uint32_t)1<<_NORM_AUX_COMP_EX_SHIFT)
+#define _NORM_AUX_UNSAFE_MASK ((uint32_t)1<<_NORM_AUX_UNSAFE_SHIFT)
+#define _NORM_AUX_NFC_SKIP_F_MASK ((uint32_t)1<<_NORM_AUX_NFC_SKIPPABLE_F_SHIFT)
+
+/* canonStartSets[0..31] contains indexes for what is in the array */
+enum {
+ _NORM_SET_INDEX_CANON_SETS_LENGTH, /* number of uint16_t in canonical starter sets */
+ _NORM_SET_INDEX_CANON_BMP_TABLE_LENGTH, /* number of uint16_t in the BMP search table (contains pairs) */
+ _NORM_SET_INDEX_CANON_SUPP_TABLE_LENGTH,/* number of uint16_t in the supplementary search table (contains triplets) */
+
+ /* from formatVersion 2.3: */
+ _NORM_SET_INDEX_NX_CJK_COMPAT_OFFSET, /* uint16_t offset from canonStartSets[0] to the
+ exclusion set for CJK compatibility characters */
+ _NORM_SET_INDEX_NX_UNICODE32_OFFSET, /* uint16_t offset from canonStartSets[0] to the
+ exclusion set for Unicode 3.2 characters */
+ _NORM_SET_INDEX_NX_RESERVED_OFFSET, /* uint16_t offset from canonStartSets[0] to the
+ end of the previous exclusion set */
+
+ _NORM_SET_INDEX_TOP=32 /* changing this requires a new formatVersion */
+};
+
+/* more constants for canonical starter sets */
+
+/* 14 bit indexes to canonical USerializedSets */
+#define _NORM_MAX_CANON_SETS 0x4000
+
+/* single-code point BMP sets are encoded directly in the search table except if result=0x4000..0x7fff */
+#define _NORM_CANON_SET_BMP_MASK 0xc000
+#define _NORM_CANON_SET_BMP_IS_INDEX 0x4000
+
+/* indexes[] value names */
+enum {
+ _NORM_INDEX_TRIE_SIZE, /* number of bytes in normalization trie */
+ _NORM_INDEX_UCHAR_COUNT, /* number of UChars in extra data */
+
+ _NORM_INDEX_COMBINE_DATA_COUNT, /* number of uint16_t words for combining data */
+ _NORM_INDEX_COMBINE_FWD_COUNT, /* number of code points that combine forward */
+ _NORM_INDEX_COMBINE_BOTH_COUNT, /* number of code points that combine forward and backward */
+ _NORM_INDEX_COMBINE_BACK_COUNT, /* number of code points that combine backward */
+
+ _NORM_INDEX_MIN_NFC_NO_MAYBE, /* first code point with quick check NFC NO/MAYBE */
+ _NORM_INDEX_MIN_NFKC_NO_MAYBE, /* first code point with quick check NFKC NO/MAYBE */
+ _NORM_INDEX_MIN_NFD_NO_MAYBE, /* first code point with quick check NFD NO/MAYBE */
+ _NORM_INDEX_MIN_NFKD_NO_MAYBE, /* first code point with quick check NFKD NO/MAYBE */
+
+ _NORM_INDEX_FCD_TRIE_SIZE, /* number of bytes in FCD trie */
+
+ _NORM_INDEX_AUX_TRIE_SIZE, /* number of bytes in the auxiliary trie */
+ _NORM_INDEX_CANON_SET_COUNT, /* number of uint16_t in the array of serialized USet */
+
+ _NORM_INDEX_TOP=32 /* changing this requires a new formatVersion */
+};
+
+enum {
+ /* FCD check: everything below this code point is known to have a 0 lead combining class */
+ _NORM_MIN_WITH_LEAD_CC=0x300
+};
+
+enum {
+ /**
+ * Bit 7 of the length byte for a decomposition string in extra data is
+ * a flag indicating whether the decomposition string is
+ * preceded by a 16-bit word with the leading and trailing cc
+ * of the decomposition (like for A-umlaut);
+ * if not, then both cc's are zero (like for compatibility ideographs).
+ */
+ _NORM_DECOMP_FLAG_LENGTH_HAS_CC=0x80,
+ /**
+ * Bits 6..0 of the length byte contain the actual length.
+ */
+ _NORM_DECOMP_LENGTH_MASK=0x7f
+};
+
+/** Constants for options flags for normalization. */
+enum {
+ /** Options bit 0, do not decompose Hangul syllables. */
+ UNORM_NX_HANGUL=1,
+ /** Options bit 1, do not decompose CJK compatibility characters. */
+ UNORM_NX_CJK_COMPAT=2
+};
+
+/**
+ * Description of the format of unorm.icu version 2.3.
+ *
+ * Main change from version 1 to version 2:
+ * Use of new, common UTrie instead of normalization-specific tries.
+ * Change to version 2.1: add third/auxiliary trie with associated data.
+ * Change to version 2.2: add skippable (f) flag data (_NORM_AUX_NFC_SKIP_F_MASK).
+ * Change to version 2.3: add serialized sets for normalization exclusions
+ * stored inside canonStartSets[]
+ *
+ * For more details of how to use the data structures see the code
+ * in unorm.cpp (runtime normalization code) and
+ * in gennorm.c and gennorm/store.c (build-time data generation).
+ *
+ * For the serialized format of UTrie see utrie.c/UTrieHeader.
+ *
+ * - Overall partition
+ *
+ * unorm.dat customarily begins with a UDataInfo structure, see udata.h and .c.
+ * After that there are the following structures:
+ *
+ * int32_t indexes[_NORM_INDEX_TOP]; -- _NORM_INDEX_TOP=32, see enum in this file
+ *
+ * UTrie normTrie; -- size in bytes=indexes[_NORM_INDEX_TRIE_SIZE]
+ *
+ * uint16_t extraData[extraDataTop]; -- extraDataTop=indexes[_NORM_INDEX_UCHAR_COUNT]
+ * extraData[0] contains the number of units for
+ * FC_NFKC_Closure (formatVersion>=2.1)
+ *
+ * uint16_t combiningTable[combiningTableTop]; -- combiningTableTop=indexes[_NORM_INDEX_COMBINE_DATA_COUNT]
+ * combiningTableTop may include one 16-bit padding unit
+ * to make sure that fcdTrie is 32-bit-aligned
+ *
+ * UTrie fcdTrie; -- size in bytes=indexes[_NORM_INDEX_FCD_TRIE_SIZE]
+ *
+ * UTrie auxTrie; -- size in bytes=indexes[_NORM_INDEX_AUX_TRIE_SIZE]
+ *
+ * uint16_t canonStartSets[canonStartSetsTop] -- canonStartSetsTop=indexes[_NORM_INDEX_CANON_SET_COUNT]
+ * serialized USets and binary search tables, see below
+ *
+ *
+ * The indexes array contains lengths and sizes of the following arrays and structures
+ * as well as the following values:
+ * indexes[_NORM_INDEX_COMBINE_FWD_COUNT]=combineFwdTop
+ * -- one more than the highest combining index computed for forward-only-combining characters
+ * indexes[_NORM_INDEX_COMBINE_BOTH_COUNT]=combineBothTop-combineFwdTop
+ * -- number of combining indexes computed for both-ways-combining characters
+ * indexes[_NORM_INDEX_COMBINE_BACK_COUNT]=combineBackTop-combineBothTop
+ * -- number of combining indexes computed for backward-only-combining characters
+ *
+ * indexes[_NORM_INDEX_MIN_NF*_NO_MAYBE] (where *={ C, D, KC, KD })
+ * -- first code point with a quick check NF* value of NO/MAYBE
+ *
+ *
+ * - Tries
+ *
+ * The main structures are two UTrie tables ("compact arrays"),
+ * each with one index array and one data array.
+ * See utrie.h and utrie.c.
+ *
+ *
+ * - Tries in unorm.dat
+ *
+ * The first trie (normTrie above)
+ * provides data for the NF* quick checks and normalization.
+ * The second trie (fcdTrie above) provides data just for FCD checks.
+ *
+ *
+ * - norm32 data words from the first trie
+ *
+ * The norm32Table contains one 32-bit word "norm32" per code point.
+ * It contains the following bit fields:
+ * 31..16 extra data index, _NORM_EXTRA_SHIFT is used to shift this field down
+ * if this index is <_NORM_EXTRA_INDEX_TOP then it is an index into
+ * extraData[] where variable-length normalization data for this
+ * code point is found
+ * if this index is <_NORM_EXTRA_INDEX_TOP+_NORM_EXTRA_SURROGATE_TOP
+ * then this is a norm32 for a leading surrogate, and the index
+ * value is used together with the following trailing surrogate
+ * code unit in the second trie access
+ * if this index is >=_NORM_EXTRA_INDEX_TOP+_NORM_EXTRA_SURROGATE_TOP
+ * then this is a norm32 for a "special" character,
+ * i.e., the character is a Hangul syllable or a Jamo
+ * see _NORM_EXTRA_HANGUL etc.
+ * generally, instead of extracting this index from the norm32 and
+ * comparing it with the above constants,
+ * the normalization code compares the entire norm32 value
+ * with _NORM_MIN_SPECIAL, _NORM_SURROGATES_TOP, _NORM_MIN_HANGUL etc.
+ *
+ * 15..8 combining class (cc) according to UnicodeData.txt
+ *
+ * 7..6 _NORM_COMBINES_ANY flags, used in composition to see if a character
+ * combines with any following or preceding character(s)
+ * at all
+ * 7 _NORM_COMBINES_BACK
+ * 6 _NORM_COMBINES_FWD
+ *
+ * 5..0 quick check flags, set for "no" or "maybe", with separate flags for
+ * each normalization form
+ * the higher bits are "maybe" flags; for NF*D there are no such flags
+ * the lower bits are "no" flags for all forms, in the same order
+ * as the "maybe" flags,
+ * which is (MSB to LSB): NFKD NFD NFKC NFC
+ * 5..4 _NORM_QC_ANY_MAYBE
+ * 3..0 _NORM_QC_ANY_NO
+ * see further related constants
+ *
+ *
+ * - Extra data per code point
+ *
+ * "Extra data" is referenced by the index in norm32.
+ * It is variable-length data. It is only present, and only those parts
+ * of it are, as needed for a given character.
+ * The norm32 extra data index is added to the beginning of extraData[]
+ * to get to a vector of 16-bit words with data at the following offsets:
+ *
+ * [-1] Combining index for composition.
+ * Stored only if norm32&_NORM_COMBINES_ANY .
+ * [0] Lengths of the canonical and compatibility decomposition strings.
+ * Stored only if there are decompositions, i.e.,
+ * if norm32&(_NORM_QC_NFD|_NORM_QC_NFKD)
+ * High byte: length of NFKD, or 0 if none
+ * Low byte: length of NFD, or 0 if none
+ * Each length byte also has another flag:
+ * Bit 7 of a length byte is set if there are non-zero
+ * combining classes (cc's) associated with the respective
+ * decomposition. If this flag is set, then the decomposition
+ * is preceded by a 16-bit word that contains the
+ * leading and trailing cc's.
+ * Bits 6..0 of a length byte are the length of the
+ * decomposition string, not counting the cc word.
+ * [1..n] NFD
+ * [n+1..] NFKD
+ *
+ * Each of the two decompositions consists of up to two parts:
+ * - The 16-bit words with the leading and trailing cc's.
+ * This is only stored if bit 7 of the corresponding length byte
+ * is set. In this case, at least one of the cc's is not zero.
+ * High byte: leading cc==cc of the first code point in the decomposition string
+ * Low byte: trailing cc==cc of the last code point in the decomposition string
+ * - The decomposition string in UTF-16, with length code units.
+ *
+ *
+ * - Combining indexes and combiningTable[]
+ *
+ * Combining indexes are stored at the [-1] offset of the extra data
+ * if the character combines forward or backward with any other characters.
+ * They are used for (re)composition in NF*C.
+ * Values of combining indexes are arranged according to whether a character
+ * combines forward, backward, or both ways:
+ * forward-only < both ways < backward-only
+ *
+ * The index values for forward-only and both-ways combining characters
+ * are indexes into the combiningTable[].
+ * The index values for backward-only combining characters are simply
+ * incremented from the preceding index values to be unique.
+ *
+ * In the combiningTable[], a variable-length list
+ * of variable-length (back-index, code point) pair entries is stored
+ * for each forward-combining character.
+ *
+ * These back-indexes are the combining indexes of both-ways or backward-only
+ * combining characters that the forward-combining character combines with.
+ *
+ * Each list is sorted in ascending order of back-indexes.
+ * Each list is terminated with the last back-index having bit 15 set.
+ *
+ * Each pair (back-index, code point) takes up either 2 or 3
+ * 16-bit words.
+ * The first word of a list entry is the back-index, with its bit 15 set if
+ * this is the last pair in the list.
+ *
+ * The second word contains flags in bits 15..13 that determine
+ * if there is a third word and how the combined character is encoded:
+ * 15 set if there is a third word in this list entry
+ * 14 set if the result is a supplementary character
+ * 13 set if the result itself combines forward
+ *
+ * According to these bits 15..14 of the second word,
+ * the result character is encoded as follows:
+ * 00 or 01 The result is <=0x1fff and stored in bits 12..0 of
+ * the second word.
+ * 10 The result is 0x2000..0xffff and stored in the third word.
+ * Bits 12..0 of the second word are not used.
+ * 11 The result is a supplementary character.
+ * Bits 9..0 of the leading surrogate are in bits 9..0 of
+ * the second word.
+ * Add 0xd800 to these bits to get the complete surrogate.
+ * Bits 12..10 of the second word are not used.
+ * The trailing surrogate is stored in the third word.
+ *
+ *
+ * - FCD trie
+ *
+ * The FCD trie is very simple.
+ * It is a folded trie with 16-bit data words.
+ * In each word, the high byte contains the leading cc of the character,
+ * and the low byte contains the trailing cc of the character.
+ * These cc's are the cc's of the first and last code points in the
+ * canonical decomposition of the character.
+ *
+ * Since all 16 bits are used for cc's, lead surrogates must be tested
+ * by checking the code unit instead of the trie data.
+ * This is done only if the 16-bit data word is not zero.
+ * If the code unit is a leading surrogate and the data word is not zero,
+ * then instead of cc's it contains the offset for the second trie lookup.
+ *
+ *
+ * - Auxiliary trie and data
+ *
+ * The auxiliary 16-bit trie contains data for additional properties.
+ * Bits
+ * 15..13 reserved
+ * 12 not NFC_Skippable (f) (formatVersion>=2.2)
+ * 11 flag: not a safe starter for canonical closure
+ * 10 composition exclusion
+ * 9.. 0 index into extraData[] to FC_NFKC_Closure string
+ * (not for lead surrogate),
+ * or lead surrogate offset (for lead surrogate, if 9..0 not zero)
+ *
+ * - FC_NFKC_Closure strings in extraData[]
+ *
+ * Strings are either stored as a single code unit or as the length
+ * followed by that many units.
+ * const UChar *s=extraData+(index from auxTrie data bits 9..0);
+ * int32_t length;
+ * if(*s<0xff00) {
+ * // s points to the single-unit string
+ * length=1;
+ * } else {
+ * length=*s&0xff;
+ * ++s;
+ * }
+ *
+ * Conditions for "NF* Skippable" from Mark Davis' com.ibm.text.UCD.NFSkippable:
+ * (used in NormalizerTransliterator)
+ *
+ * A skippable character is
+ * a) unassigned, or ALL of the following:
+ * b) of combining class 0.
+ * c) not decomposed by this normalization form.
+ * AND if NFC or NFKC,
+ * d) can never compose with a previous character.
+ * e) can never compose with a following character.
+ * f) can never change if another character is added.
+ * Example: a-breve might satisfy all but f, but if you
+ * add an ogonek it changes to a-ogonek + breve
+ *
+ * a)..e) must be tested from norm32.
+ * Since f) is more complicated, the (not-)NFC_Skippable flag (f) is built
+ * into the auxiliary trie.
+ * The same bit is used for NFC and NFKC; (c) differs for them.
+ * As usual, we build the "not skippable" flags so that unassigned
+ * code points get a 0 bit.
+ * This bit is only valid after (a)..(e) test false; test NFD_NO before (f) as well.
+ * Test Hangul LV syllables entirely in code.
+ *
+ *
+ * - structure inside canonStartSets[]
+ *
+ * This array maps from code points c to sets of code points (USerializedSet).
+ * The result sets are the code points whose canonical decompositions start
+ * with c.
+ *
+ * canonStartSets[] contains the following sub-arrays:
+ *
+ * indexes[_NORM_SET_INDEX_TOP]
+ * - contains lengths of sub-arrays etc.
+ *
+ * startSets[indexes[_NORM_SET_INDEX_CANON_SETS_LENGTH]-_NORM_SET_INDEX_TOP]
+ * - contains serialized sets (USerializedSet) of canonical starters for
+ * enumerating canonically equivalent strings
+ * indexes[_NORM_SET_INDEX_CANON_SETS_LENGTH] includes _NORM_SET_INDEX_TOP
+ * for details about the structure see uset.c
+ *
+ * bmpTable[indexes[_NORM_SET_INDEX_CANON_BMP_TABLE_LENGTH]]
+ * - a sorted search table for BMP code points whose results are
+ * either indexes to USerializedSets or single code points for
+ * single-code point sets;
+ * each entry is a pair of { code point, result } with result=(binary) yy xxxxxx xxxxxxxx
+ * if yy==01 then there is a USerializedSet at canonStartSets+x
+ * else build a USerializedSet with result as the single code point
+ *
+ * suppTable[indexes[_NORM_SET_INDEX_CANON_SUPP_TABLE_LENGTH]]
+ * - a sorted search table for supplementary code points whose results are
+ * either indexes to USerializedSets or single code points for
+ * single-code point sets;
+ * each entry is a triplet of { high16(cp), low16(cp), result }
+ * each code point's high-word may contain extra data in bits 15..5:
+ * if the high word has bit 15 set, then build a set with a single code point
+ * which is (((high16(cp)&0x1f00)<<8)|result;
+ * else there is a USerializedSet at canonStartSets+result
+ *
+ * FormatVersion 2.3 adds 2 serialized sets for normalization exclusions.
+ * They are stored in the data file so that the runtime normalization code need
+ * not depend on other properties and their data and implementation files.
+ * The _NORM_SET_INDEX_NX_..._OFFSET offsets in the canonStartSets index table
+ * give the location for each set.
+ * There is no set stored for UNORM_NX_HANGUL because it's trivial to create
+ * without using properties.
+ *
+ * Set contents:
+ *
+ * _NORM_SET_INDEX_NX_CJK_COMPAT_OFFSET (for UNORM_NX_CJK_COMPAT)
+ * [[:Ideographic:]&[:NFD_QC=No:]]
+ * =[CJK Ideographs]&[has canonical decomposition]
+ *
+ * _NORM_SET_INDEX_NX_UNICODE32_OFFSET (for UNORM_UNICODE_3_2)
+ * [:^Age=3.2:]
+ * =set with all code points that were not designated by the specified Unicode version
+ *
+ * _NORM_SET_INDEX_NX_RESERVED_OFFSET
+ * This is an offset that points to where the next, future set would start.
+ * Currently it indicates where the previous set ends, and thus its length.
+ * The name for this enum constant may in the future be applied to different
+ * index slots. In order to get the limit of a set, use its index slot and
+ * the immediately following one regardless of that one's enum name.
+ */
+
+#endif /* #if !UCONFIG_NO_NORMALIZATION */
+
+#endif
diff --git a/thirdparty/icu4c/common/uobject.cpp b/thirdparty/icu4c/common/uobject.cpp
new file mode 100644
index 0000000000..e222b2ce9b
--- /dev/null
+++ b/thirdparty/icu4c/common/uobject.cpp
@@ -0,0 +1,105 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2002-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: uobject.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002jun26
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/uobject.h"
+#include "cmemory.h"
+
+U_NAMESPACE_BEGIN
+
+#if U_OVERRIDE_CXX_ALLOCATION
+
+/*
+ * Default implementation of UMemory::new/delete
+ * using uprv_malloc() and uprv_free().
+ *
+ * For testing, this is used together with a list of imported symbols to verify
+ * that ICU is not using the global ::new and ::delete operators.
+ *
+ * These operators can be implemented like this or any other appropriate way
+ * when customizing ICU for certain environments.
+ * Whenever ICU is customized in binary incompatible ways please be sure
+ * to use library name suffixes to distinguish such libraries from
+ * the standard build.
+ *
+ * Instead of just modifying these C++ new/delete operators, it is usually best
+ * to modify the uprv_malloc()/uprv_free()/uprv_realloc() functions in cmemory.c.
+ *
+ * Memory test on Windows/MSVC 6:
+ * The global operators new and delete look as follows:
+ * 04F 00000000 UNDEF notype () External | ??2@YAPAXI@Z (void * __cdecl operator new(unsigned int))
+ * 03F 00000000 UNDEF notype () External | ??3@YAXPAX@Z (void __cdecl operator delete(void *))
+ *
+ * These lines are from output generated by the MSVC 6 tool dumpbin with
+ * dumpbin /symbols *.obj
+ *
+ * ??2@YAPAXI@Z and ??3@YAXPAX@Z are the linker symbols in the .obj
+ * files and are imported from msvcrtd.dll (in a debug build).
+ *
+ * Make sure that with the UMemory operators new and delete defined these two symbols
+ * do not appear in the dumpbin /symbols output for the ICU libraries!
+ *
+ * If such a symbol appears in the output then look in the preceding lines in the output
+ * for which file and function calls the global new or delete operator,
+ * and replace with uprv_malloc/uprv_free.
+ */
+
+void * U_EXPORT2 UMemory::operator new(size_t size) U_NOEXCEPT {
+ return uprv_malloc(size);
+}
+
+void U_EXPORT2 UMemory::operator delete(void *p) U_NOEXCEPT {
+ if(p!=NULL) {
+ uprv_free(p);
+ }
+}
+
+void * U_EXPORT2 UMemory::operator new[](size_t size) U_NOEXCEPT {
+ return uprv_malloc(size);
+}
+
+void U_EXPORT2 UMemory::operator delete[](void *p) U_NOEXCEPT {
+ if(p!=NULL) {
+ uprv_free(p);
+ }
+}
+
+#if U_HAVE_DEBUG_LOCATION_NEW
+void * U_EXPORT2 UMemory::operator new(size_t size, const char* /*file*/, int /*line*/) U_NOEXCEPT {
+ return UMemory::operator new(size);
+}
+
+void U_EXPORT2 UMemory::operator delete(void* p, const char* /*file*/, int /*line*/) U_NOEXCEPT {
+ UMemory::operator delete(p);
+}
+#endif /* U_HAVE_DEBUG_LOCATION_NEW */
+
+
+#endif
+
+UObject::~UObject() {}
+
+UClassID UObject::getDynamicClassID() const { return NULL; }
+
+U_NAMESPACE_END
+
+U_NAMESPACE_USE
+
+U_CAPI void U_EXPORT2
+uprv_deleteUObject(void *obj) {
+ delete static_cast<UObject *>(obj);
+}
diff --git a/thirdparty/icu4c/common/uposixdefs.h b/thirdparty/icu4c/common/uposixdefs.h
new file mode 100644
index 0000000000..23c3f6d466
--- /dev/null
+++ b/thirdparty/icu4c/common/uposixdefs.h
@@ -0,0 +1,77 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2011-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: uposixdefs.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2011jul25
+* created by: Markus W. Scherer
+*
+* Common definitions for implementation files working with POSIX functions.
+* *Important*: #include this file before any other header files!
+*/
+
+#ifndef __UPOSIXDEFS_H__
+#define __UPOSIXDEFS_H__
+
+/*
+ * Define _XOPEN_SOURCE for access to POSIX functions.
+ *
+ * We cannot use U_PLATFORM from platform.h/utypes.h because
+ * "The Open Group Base Specifications"
+ * chapter "2.2 The Compilation Environment" says:
+ * "In the compilation of an application that #defines a feature test macro
+ * specified by IEEE Std 1003.1-2001,
+ * no header defined by IEEE Std 1003.1-2001 shall be included prior to
+ * the definition of the feature test macro."
+ */
+#ifdef _XOPEN_SOURCE
+ /* Use the predefined value. */
+#else
+ /*
+ * Version 6.0:
+ * The Open Group Base Specifications Issue 6 (IEEE Std 1003.1, 2004 Edition)
+ * also known as
+ * SUSv3 = Open Group Single UNIX Specification, Version 3 (UNIX03)
+ *
+ * Note: This definition used to be in C source code (e.g., putil.c)
+ * and define _XOPEN_SOURCE to different values depending on __STDC_VERSION__.
+ * In C++ source code (e.g., putil.cpp), __STDC_VERSION__ is not defined at all.
+ */
+# define _XOPEN_SOURCE 600
+#endif
+
+/*
+ * Make sure things like readlink and such functions work.
+ * Poorly upgraded Solaris machines can't have this defined.
+ * Cleanly installed Solaris can use this #define.
+ *
+ * z/OS needs this definition for timeval and to get usleep.
+ */
+#if !defined(_XOPEN_SOURCE_EXTENDED) && defined(__TOS_MVS__)
+# define _XOPEN_SOURCE_EXTENDED 1
+#endif
+
+/**
+ * Solaris says:
+ * "...it is invalid to compile an XPG6 or a POSIX.1-2001 application with anything other
+ * than a c99 or later compiler."
+ * Apparently C++11 is not "or later". Work around this.
+ */
+#if defined(__cplusplus) && (defined(sun) || defined(__sun)) && !defined (_STDC_C99)
+# define _STDC_C99
+#endif
+
+#if !defined _POSIX_C_SOURCE && \
+ defined(__APPLE__) && defined(__MACH__) && !defined(__clang__)
+// Needed to prevent EOWNERDEAD issues with GCC on Mac
+#define _POSIX_C_SOURCE 200809L
+#endif
+
+#endif /* __UPOSIXDEFS_H__ */
diff --git a/thirdparty/icu4c/common/uprops.cpp b/thirdparty/icu4c/common/uprops.cpp
new file mode 100644
index 0000000000..1604ad9a17
--- /dev/null
+++ b/thirdparty/icu4c/common/uprops.cpp
@@ -0,0 +1,797 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uprops.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002feb24
+* created by: Markus W. Scherer
+*
+* Implementations for mostly non-core Unicode character properties
+* stored in uprops.icu.
+*
+* With the APIs implemented here, almost all properties files and
+* their associated implementation files are used from this file,
+* including those for normalization and case mappings.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/uchar.h"
+#include "unicode/ucptrie.h"
+#include "unicode/udata.h"
+#include "unicode/unorm2.h"
+#include "unicode/uscript.h"
+#include "unicode/ustring.h"
+#include "cstring.h"
+#include "mutex.h"
+#include "normalizer2impl.h"
+#include "umutex.h"
+#include "ubidi_props.h"
+#include "uprops.h"
+#include "ucase.h"
+#include "ucln_cmn.h"
+#include "ulayout_props.h"
+#include "ustr_imp.h"
+
+U_NAMESPACE_USE
+
+// Unicode text layout properties data -----------------------------------------
+
+namespace {
+
+icu::UInitOnce gLayoutInitOnce = U_INITONCE_INITIALIZER;
+UDataMemory *gLayoutMemory = nullptr;
+
+UCPTrie *gInpcTrie = nullptr; // Indic_Positional_Category
+UCPTrie *gInscTrie = nullptr; // Indic_Syllabic_Category
+UCPTrie *gVoTrie = nullptr; // Vertical_Orientation
+
+int32_t gMaxInpcValue = 0;
+int32_t gMaxInscValue = 0;
+int32_t gMaxVoValue = 0;
+
+UBool U_CALLCONV uprops_cleanup() {
+ udata_close(gLayoutMemory);
+ gLayoutMemory = nullptr;
+
+ ucptrie_close(gInpcTrie);
+ gInpcTrie = nullptr;
+ ucptrie_close(gInscTrie);
+ gInscTrie = nullptr;
+ ucptrie_close(gVoTrie);
+ gVoTrie = nullptr;
+
+ gMaxInpcValue = 0;
+ gMaxInscValue = 0;
+ gMaxVoValue = 0;
+
+ gLayoutInitOnce.reset();
+ return TRUE;
+}
+
+UBool U_CALLCONV
+ulayout_isAcceptable(void * /*context*/,
+ const char * /* type */, const char * /*name*/,
+ const UDataInfo *pInfo) {
+ return pInfo->size >= 20 &&
+ pInfo->isBigEndian == U_IS_BIG_ENDIAN &&
+ pInfo->charsetFamily == U_CHARSET_FAMILY &&
+ pInfo->dataFormat[0] == ULAYOUT_FMT_0 &&
+ pInfo->dataFormat[1] == ULAYOUT_FMT_1 &&
+ pInfo->dataFormat[2] == ULAYOUT_FMT_2 &&
+ pInfo->dataFormat[3] == ULAYOUT_FMT_3 &&
+ pInfo->formatVersion[0] == 1;
+}
+
+// UInitOnce singleton initialization function
+void U_CALLCONV ulayout_load(UErrorCode &errorCode) {
+ gLayoutMemory = udata_openChoice(
+ nullptr, ULAYOUT_DATA_TYPE, ULAYOUT_DATA_NAME,
+ ulayout_isAcceptable, nullptr, &errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+
+ const uint8_t *inBytes = (const uint8_t *)udata_getMemory(gLayoutMemory);
+ const int32_t *inIndexes = (const int32_t *)inBytes;
+ int32_t indexesLength = inIndexes[ULAYOUT_IX_INDEXES_LENGTH];
+ if (indexesLength < 12) {
+ errorCode = U_INVALID_FORMAT_ERROR; // Not enough indexes.
+ return;
+ }
+ int32_t offset = indexesLength * 4;
+ int32_t top = inIndexes[ULAYOUT_IX_INPC_TRIE_TOP];
+ int32_t trieSize = top - offset;
+ if (trieSize >= 16) {
+ gInpcTrie = ucptrie_openFromBinary(
+ UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY,
+ inBytes + offset, trieSize, nullptr, &errorCode);
+ }
+ offset = top;
+ top = inIndexes[ULAYOUT_IX_INSC_TRIE_TOP];
+ trieSize = top - offset;
+ if (trieSize >= 16) {
+ gInscTrie = ucptrie_openFromBinary(
+ UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY,
+ inBytes + offset, trieSize, nullptr, &errorCode);
+ }
+ offset = top;
+ top = inIndexes[ULAYOUT_IX_VO_TRIE_TOP];
+ trieSize = top - offset;
+ if (trieSize >= 16) {
+ gVoTrie = ucptrie_openFromBinary(
+ UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY,
+ inBytes + offset, trieSize, nullptr, &errorCode);
+ }
+
+ uint32_t maxValues = inIndexes[ULAYOUT_IX_MAX_VALUES];
+ gMaxInpcValue = maxValues >> ULAYOUT_MAX_INPC_SHIFT;
+ gMaxInscValue = (maxValues >> ULAYOUT_MAX_INSC_SHIFT) & 0xff;
+ gMaxVoValue = (maxValues >> ULAYOUT_MAX_VO_SHIFT) & 0xff;
+
+ ucln_common_registerCleanup(UCLN_COMMON_UPROPS, uprops_cleanup);
+}
+
+UBool ulayout_ensureData(UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return FALSE; }
+ umtx_initOnce(gLayoutInitOnce, &ulayout_load, errorCode);
+ return U_SUCCESS(errorCode);
+}
+
+UBool ulayout_ensureData() {
+ UErrorCode errorCode = U_ZERO_ERROR;
+ return ulayout_ensureData(errorCode);
+}
+
+} // namespace
+
+/* general properties API functions ----------------------------------------- */
+
+struct BinaryProperty;
+
+typedef UBool BinaryPropertyContains(const BinaryProperty &prop, UChar32 c, UProperty which);
+
+struct BinaryProperty {
+ int32_t column; // SRC_PROPSVEC column, or "source" if mask==0
+ uint32_t mask;
+ BinaryPropertyContains *contains;
+};
+
+static UBool defaultContains(const BinaryProperty &prop, UChar32 c, UProperty /*which*/) {
+ /* systematic, directly stored properties */
+ return (u_getUnicodeProperties(c, prop.column)&prop.mask)!=0;
+}
+
+static UBool caseBinaryPropertyContains(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) {
+ return static_cast<UBool>(ucase_hasBinaryProperty(c, which));
+}
+
+static UBool isBidiControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
+ return ubidi_isBidiControl(c);
+}
+
+static UBool isMirrored(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
+ return ubidi_isMirrored(c);
+}
+
+static UBool isJoinControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
+ return ubidi_isJoinControl(c);
+}
+
+#if UCONFIG_NO_NORMALIZATION
+static UBool hasFullCompositionExclusion(const BinaryProperty &, UChar32, UProperty) {
+ return FALSE;
+}
+#else
+static UBool hasFullCompositionExclusion(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
+ // By definition, Full_Composition_Exclusion is the same as NFC_QC=No.
+ UErrorCode errorCode=U_ZERO_ERROR;
+ const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
+ return U_SUCCESS(errorCode) && impl->isCompNo(impl->getNorm16(c));
+}
+#endif
+
+// UCHAR_NF*_INERT properties
+#if UCONFIG_NO_NORMALIZATION
+static UBool isNormInert(const BinaryProperty &, UChar32, UProperty) {
+ return FALSE;
+}
+#else
+static UBool isNormInert(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) {
+ UErrorCode errorCode=U_ZERO_ERROR;
+ const Normalizer2 *norm2=Normalizer2Factory::getInstance(
+ (UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD), errorCode);
+ return U_SUCCESS(errorCode) && norm2->isInert(c);
+}
+#endif
+
+#if UCONFIG_NO_NORMALIZATION
+static UBool changesWhenCasefolded(const BinaryProperty &, UChar32, UProperty) {
+ return FALSE;
+}
+#else
+static UBool changesWhenCasefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
+ UnicodeString nfd;
+ UErrorCode errorCode=U_ZERO_ERROR;
+ const Normalizer2 *nfcNorm2=Normalizer2::getNFCInstance(errorCode);
+ if(U_FAILURE(errorCode)) {
+ return FALSE;
+ }
+ if(nfcNorm2->getDecomposition(c, nfd)) {
+ /* c has a decomposition */
+ if(nfd.length()==1) {
+ c=nfd[0]; /* single BMP code point */
+ } else if(nfd.length()<=U16_MAX_LENGTH &&
+ nfd.length()==U16_LENGTH(c=nfd.char32At(0))
+ ) {
+ /* single supplementary code point */
+ } else {
+ c=U_SENTINEL;
+ }
+ } else if(c<0) {
+ return FALSE; /* protect against bad input */
+ }
+ if(c>=0) {
+ /* single code point */
+ const UChar *resultString;
+ return (UBool)(ucase_toFullFolding(c, &resultString, U_FOLD_CASE_DEFAULT)>=0);
+ } else {
+ /* guess some large but stack-friendly capacity */
+ UChar dest[2*UCASE_MAX_STRING_LENGTH];
+ int32_t destLength;
+ destLength=u_strFoldCase(dest, UPRV_LENGTHOF(dest),
+ nfd.getBuffer(), nfd.length(),
+ U_FOLD_CASE_DEFAULT, &errorCode);
+ return (UBool)(U_SUCCESS(errorCode) &&
+ 0!=u_strCompare(nfd.getBuffer(), nfd.length(),
+ dest, destLength, FALSE));
+ }
+}
+#endif
+
+#if UCONFIG_NO_NORMALIZATION
+static UBool changesWhenNFKC_Casefolded(const BinaryProperty &, UChar32, UProperty) {
+ return FALSE;
+}
+#else
+static UBool changesWhenNFKC_Casefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
+ UErrorCode errorCode=U_ZERO_ERROR;
+ const Normalizer2Impl *kcf=Normalizer2Factory::getNFKC_CFImpl(errorCode);
+ if(U_FAILURE(errorCode)) {
+ return FALSE;
+ }
+ UnicodeString src(c);
+ UnicodeString dest;
+ {
+ // The ReorderingBuffer must be in a block because its destructor
+ // needs to release dest's buffer before we look at its contents.
+ ReorderingBuffer buffer(*kcf, dest);
+ // Small destCapacity for NFKC_CF(c).
+ if(buffer.init(5, errorCode)) {
+ const UChar *srcArray=src.getBuffer();
+ kcf->compose(srcArray, srcArray+src.length(), FALSE,
+ TRUE, buffer, errorCode);
+ }
+ }
+ return U_SUCCESS(errorCode) && dest!=src;
+}
+#endif
+
+#if UCONFIG_NO_NORMALIZATION
+static UBool isCanonSegmentStarter(const BinaryProperty &, UChar32, UProperty) {
+ return FALSE;
+}
+#else
+static UBool isCanonSegmentStarter(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
+ UErrorCode errorCode=U_ZERO_ERROR;
+ const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
+ return
+ U_SUCCESS(errorCode) && impl->ensureCanonIterData(errorCode) &&
+ impl->isCanonSegmentStarter(c);
+}
+#endif
+
+static UBool isPOSIX_alnum(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
+ return u_isalnumPOSIX(c);
+}
+
+static UBool isPOSIX_blank(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
+ return u_isblank(c);
+}
+
+static UBool isPOSIX_graph(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
+ return u_isgraphPOSIX(c);
+}
+
+static UBool isPOSIX_print(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
+ return u_isprintPOSIX(c);
+}
+
+static UBool isPOSIX_xdigit(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
+ return u_isxdigit(c);
+}
+
+static UBool isRegionalIndicator(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
+ // Property starts are a subset of lb=RI etc.
+ return 0x1F1E6<=c && c<=0x1F1FF;
+}
+
+static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={
+ /*
+ * column and mask values for binary properties from u_getUnicodeProperties().
+ * Must be in order of corresponding UProperty,
+ * and there must be exactly one entry per binary UProperty.
+ *
+ * Properties with mask==0 are handled in code.
+ * For them, column is the UPropertySource value.
+ */
+ { 1, U_MASK(UPROPS_ALPHABETIC), defaultContains },
+ { 1, U_MASK(UPROPS_ASCII_HEX_DIGIT), defaultContains },
+ { UPROPS_SRC_BIDI, 0, isBidiControl },
+ { UPROPS_SRC_BIDI, 0, isMirrored },
+ { 1, U_MASK(UPROPS_DASH), defaultContains },
+ { 1, U_MASK(UPROPS_DEFAULT_IGNORABLE_CODE_POINT), defaultContains },
+ { 1, U_MASK(UPROPS_DEPRECATED), defaultContains },
+ { 1, U_MASK(UPROPS_DIACRITIC), defaultContains },
+ { 1, U_MASK(UPROPS_EXTENDER), defaultContains },
+ { UPROPS_SRC_NFC, 0, hasFullCompositionExclusion },
+ { 1, U_MASK(UPROPS_GRAPHEME_BASE), defaultContains },
+ { 1, U_MASK(UPROPS_GRAPHEME_EXTEND), defaultContains },
+ { 1, U_MASK(UPROPS_GRAPHEME_LINK), defaultContains },
+ { 1, U_MASK(UPROPS_HEX_DIGIT), defaultContains },
+ { 1, U_MASK(UPROPS_HYPHEN), defaultContains },
+ { 1, U_MASK(UPROPS_ID_CONTINUE), defaultContains },
+ { 1, U_MASK(UPROPS_ID_START), defaultContains },
+ { 1, U_MASK(UPROPS_IDEOGRAPHIC), defaultContains },
+ { 1, U_MASK(UPROPS_IDS_BINARY_OPERATOR), defaultContains },
+ { 1, U_MASK(UPROPS_IDS_TRINARY_OPERATOR), defaultContains },
+ { UPROPS_SRC_BIDI, 0, isJoinControl },
+ { 1, U_MASK(UPROPS_LOGICAL_ORDER_EXCEPTION), defaultContains },
+ { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_LOWERCASE
+ { 1, U_MASK(UPROPS_MATH), defaultContains },
+ { 1, U_MASK(UPROPS_NONCHARACTER_CODE_POINT), defaultContains },
+ { 1, U_MASK(UPROPS_QUOTATION_MARK), defaultContains },
+ { 1, U_MASK(UPROPS_RADICAL), defaultContains },
+ { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_SOFT_DOTTED
+ { 1, U_MASK(UPROPS_TERMINAL_PUNCTUATION), defaultContains },
+ { 1, U_MASK(UPROPS_UNIFIED_IDEOGRAPH), defaultContains },
+ { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_UPPERCASE
+ { 1, U_MASK(UPROPS_WHITE_SPACE), defaultContains },
+ { 1, U_MASK(UPROPS_XID_CONTINUE), defaultContains },
+ { 1, U_MASK(UPROPS_XID_START), defaultContains },
+ { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_SENSITIVE
+ { 1, U_MASK(UPROPS_S_TERM), defaultContains },
+ { 1, U_MASK(UPROPS_VARIATION_SELECTOR), defaultContains },
+ { UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFD_INERT
+ { UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKD_INERT
+ { UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFC_INERT
+ { UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKC_INERT
+ { UPROPS_SRC_NFC_CANON_ITER, 0, isCanonSegmentStarter },
+ { 1, U_MASK(UPROPS_PATTERN_SYNTAX), defaultContains },
+ { 1, U_MASK(UPROPS_PATTERN_WHITE_SPACE), defaultContains },
+ { UPROPS_SRC_CHAR_AND_PROPSVEC, 0, isPOSIX_alnum },
+ { UPROPS_SRC_CHAR, 0, isPOSIX_blank },
+ { UPROPS_SRC_CHAR, 0, isPOSIX_graph },
+ { UPROPS_SRC_CHAR, 0, isPOSIX_print },
+ { UPROPS_SRC_CHAR, 0, isPOSIX_xdigit },
+ { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASED
+ { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_IGNORABLE
+ { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_LOWERCASED
+ { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_UPPERCASED
+ { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_TITLECASED
+ { UPROPS_SRC_CASE_AND_NORM, 0, changesWhenCasefolded },
+ { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_CASEMAPPED
+ { UPROPS_SRC_NFKC_CF, 0, changesWhenNFKC_Casefolded },
+ { 2, U_MASK(UPROPS_2_EMOJI), defaultContains },
+ { 2, U_MASK(UPROPS_2_EMOJI_PRESENTATION), defaultContains },
+ { 2, U_MASK(UPROPS_2_EMOJI_MODIFIER), defaultContains },
+ { 2, U_MASK(UPROPS_2_EMOJI_MODIFIER_BASE), defaultContains },
+ { 2, U_MASK(UPROPS_2_EMOJI_COMPONENT), defaultContains },
+ { 2, 0, isRegionalIndicator },
+ { 1, U_MASK(UPROPS_PREPENDED_CONCATENATION_MARK), defaultContains },
+ { 2, U_MASK(UPROPS_2_EXTENDED_PICTOGRAPHIC), defaultContains },
+};
+
+U_CAPI UBool U_EXPORT2
+u_hasBinaryProperty(UChar32 c, UProperty which) {
+ /* c is range-checked in the functions that are called from here */
+ if(which<UCHAR_BINARY_START || UCHAR_BINARY_LIMIT<=which) {
+ /* not a known binary property */
+ return FALSE;
+ } else {
+ const BinaryProperty &prop=binProps[which];
+ return prop.contains(prop, c, which);
+ }
+}
+
+struct IntProperty;
+
+typedef int32_t IntPropertyGetValue(const IntProperty &prop, UChar32 c, UProperty which);
+typedef int32_t IntPropertyGetMaxValue(const IntProperty &prop, UProperty which);
+
+struct IntProperty {
+ int32_t column; // SRC_PROPSVEC column, or "source" if mask==0
+ uint32_t mask;
+ int32_t shift; // =maxValue if getMaxValueFromShift() is used
+ IntPropertyGetValue *getValue;
+ IntPropertyGetMaxValue *getMaxValue;
+};
+
+static int32_t defaultGetValue(const IntProperty &prop, UChar32 c, UProperty /*which*/) {
+ /* systematic, directly stored properties */
+ return (int32_t)(u_getUnicodeProperties(c, prop.column)&prop.mask)>>prop.shift;
+}
+
+static int32_t defaultGetMaxValue(const IntProperty &prop, UProperty /*which*/) {
+ return (uprv_getMaxValues(prop.column)&prop.mask)>>prop.shift;
+}
+
+static int32_t getMaxValueFromShift(const IntProperty &prop, UProperty /*which*/) {
+ return prop.shift;
+}
+
+static int32_t getBiDiClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
+ return (int32_t)u_charDirection(c);
+}
+
+static int32_t getBiDiPairedBracketType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
+ return (int32_t)ubidi_getPairedBracketType(c);
+}
+
+static int32_t biDiGetMaxValue(const IntProperty &/*prop*/, UProperty which) {
+ return ubidi_getMaxValue(which);
+}
+
+#if UCONFIG_NO_NORMALIZATION
+static int32_t getCombiningClass(const IntProperty &, UChar32, UProperty) {
+ return 0;
+}
+#else
+static int32_t getCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
+ return u_getCombiningClass(c);
+}
+#endif
+
+static int32_t getGeneralCategory(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
+ return (int32_t)u_charType(c);
+}
+
+static int32_t getJoiningGroup(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
+ return ubidi_getJoiningGroup(c);
+}
+
+static int32_t getJoiningType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
+ return ubidi_getJoiningType(c);
+}
+
+static int32_t getNumericType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
+ int32_t ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getMainProperties(c));
+ return UPROPS_NTV_GET_TYPE(ntv);
+}
+
+static int32_t getScript(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
+ UErrorCode errorCode=U_ZERO_ERROR;
+ return (int32_t)uscript_getScript(c, &errorCode);
+}
+
+static int32_t scriptGetMaxValue(const IntProperty &/*prop*/, UProperty /*which*/) {
+ uint32_t scriptX=uprv_getMaxValues(0)&UPROPS_SCRIPT_X_MASK;
+ return uprops_mergeScriptCodeOrIndex(scriptX);
+}
+
+/*
+ * Map some of the Grapheme Cluster Break values to Hangul Syllable Types.
+ * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break.
+ */
+static const UHangulSyllableType gcbToHst[]={
+ U_HST_NOT_APPLICABLE, /* U_GCB_OTHER */
+ U_HST_NOT_APPLICABLE, /* U_GCB_CONTROL */
+ U_HST_NOT_APPLICABLE, /* U_GCB_CR */
+ U_HST_NOT_APPLICABLE, /* U_GCB_EXTEND */
+ U_HST_LEADING_JAMO, /* U_GCB_L */
+ U_HST_NOT_APPLICABLE, /* U_GCB_LF */
+ U_HST_LV_SYLLABLE, /* U_GCB_LV */
+ U_HST_LVT_SYLLABLE, /* U_GCB_LVT */
+ U_HST_TRAILING_JAMO, /* U_GCB_T */
+ U_HST_VOWEL_JAMO /* U_GCB_V */
+ /*
+ * Omit GCB values beyond what we need for hst.
+ * The code below checks for the array length.
+ */
+};
+
+static int32_t getHangulSyllableType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
+ /* see comments on gcbToHst[] above */
+ int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT;
+ if(gcb<UPRV_LENGTHOF(gcbToHst)) {
+ return gcbToHst[gcb];
+ } else {
+ return U_HST_NOT_APPLICABLE;
+ }
+}
+
+#if UCONFIG_NO_NORMALIZATION
+static int32_t getNormQuickCheck(const IntProperty &, UChar32, UProperty) {
+ return 0;
+}
+#else
+static int32_t getNormQuickCheck(const IntProperty &/*prop*/, UChar32 c, UProperty which) {
+ return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK+UNORM_NFD));
+}
+#endif
+
+#if UCONFIG_NO_NORMALIZATION
+static int32_t getLeadCombiningClass(const IntProperty &, UChar32, UProperty) {
+ return 0;
+}
+#else
+static int32_t getLeadCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
+ return unorm_getFCD16(c)>>8;
+}
+#endif
+
+#if UCONFIG_NO_NORMALIZATION
+static int32_t getTrailCombiningClass(const IntProperty &, UChar32, UProperty) {
+ return 0;
+}
+#else
+static int32_t getTrailCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
+ return unorm_getFCD16(c)&0xff;
+}
+#endif
+
+static int32_t getInPC(const IntProperty &, UChar32 c, UProperty) {
+ return ulayout_ensureData() && gInpcTrie != nullptr ? ucptrie_get(gInpcTrie, c) : 0;
+}
+
+static int32_t getInSC(const IntProperty &, UChar32 c, UProperty) {
+ return ulayout_ensureData() && gInscTrie != nullptr ? ucptrie_get(gInscTrie, c) : 0;
+}
+
+static int32_t getVo(const IntProperty &, UChar32 c, UProperty) {
+ return ulayout_ensureData() && gVoTrie != nullptr ? ucptrie_get(gVoTrie, c) : 0;
+}
+
+static int32_t layoutGetMaxValue(const IntProperty &/*prop*/, UProperty which) {
+ if (!ulayout_ensureData()) { return 0; }
+ switch (which) {
+ case UCHAR_INDIC_POSITIONAL_CATEGORY:
+ return gMaxInpcValue;
+ case UCHAR_INDIC_SYLLABIC_CATEGORY:
+ return gMaxInscValue;
+ case UCHAR_VERTICAL_ORIENTATION:
+ return gMaxVoValue;
+ default:
+ return 0;
+ }
+}
+
+static const IntProperty intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]={
+ /*
+ * column, mask and shift values for int-value properties from u_getUnicodeProperties().
+ * Must be in order of corresponding UProperty,
+ * and there must be exactly one entry per int UProperty.
+ *
+ * Properties with mask==0 are handled in code.
+ * For them, column is the UPropertySource value.
+ */
+ { UPROPS_SRC_BIDI, 0, 0, getBiDiClass, biDiGetMaxValue },
+ { 0, UPROPS_BLOCK_MASK, UPROPS_BLOCK_SHIFT, defaultGetValue, defaultGetMaxValue },
+ { UPROPS_SRC_NFC, 0, 0xff, getCombiningClass, getMaxValueFromShift },
+ { 2, UPROPS_DT_MASK, 0, defaultGetValue, defaultGetMaxValue },
+ { 0, UPROPS_EA_MASK, UPROPS_EA_SHIFT, defaultGetValue, defaultGetMaxValue },
+ { UPROPS_SRC_CHAR, 0, (int32_t)U_CHAR_CATEGORY_COUNT-1,getGeneralCategory, getMaxValueFromShift },
+ { UPROPS_SRC_BIDI, 0, 0, getJoiningGroup, biDiGetMaxValue },
+ { UPROPS_SRC_BIDI, 0, 0, getJoiningType, biDiGetMaxValue },
+ { 2, UPROPS_LB_MASK, UPROPS_LB_SHIFT, defaultGetValue, defaultGetMaxValue },
+ { UPROPS_SRC_CHAR, 0, (int32_t)U_NT_COUNT-1, getNumericType, getMaxValueFromShift },
+ { UPROPS_SRC_PROPSVEC, 0, 0, getScript, scriptGetMaxValue },
+ { UPROPS_SRC_PROPSVEC, 0, (int32_t)U_HST_COUNT-1, getHangulSyllableType, getMaxValueFromShift },
+ // UCHAR_NFD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes"
+ { UPROPS_SRC_NFC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift },
+ // UCHAR_NFKD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes"
+ { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift },
+ // UCHAR_NFC_QUICK_CHECK: max=2=MAYBE
+ { UPROPS_SRC_NFC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift },
+ // UCHAR_NFKC_QUICK_CHECK: max=2=MAYBE
+ { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift },
+ { UPROPS_SRC_NFC, 0, 0xff, getLeadCombiningClass, getMaxValueFromShift },
+ { UPROPS_SRC_NFC, 0, 0xff, getTrailCombiningClass, getMaxValueFromShift },
+ { 2, UPROPS_GCB_MASK, UPROPS_GCB_SHIFT, defaultGetValue, defaultGetMaxValue },
+ { 2, UPROPS_SB_MASK, UPROPS_SB_SHIFT, defaultGetValue, defaultGetMaxValue },
+ { 2, UPROPS_WB_MASK, UPROPS_WB_SHIFT, defaultGetValue, defaultGetMaxValue },
+ { UPROPS_SRC_BIDI, 0, 0, getBiDiPairedBracketType, biDiGetMaxValue },
+ { UPROPS_SRC_INPC, 0, 0, getInPC, layoutGetMaxValue },
+ { UPROPS_SRC_INSC, 0, 0, getInSC, layoutGetMaxValue },
+ { UPROPS_SRC_VO, 0, 0, getVo, layoutGetMaxValue },
+};
+
+U_CAPI int32_t U_EXPORT2
+u_getIntPropertyValue(UChar32 c, UProperty which) {
+ if(which<UCHAR_INT_START) {
+ if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) {
+ const BinaryProperty &prop=binProps[which];
+ return prop.contains(prop, c, which);
+ }
+ } else if(which<UCHAR_INT_LIMIT) {
+ const IntProperty &prop=intProps[which-UCHAR_INT_START];
+ return prop.getValue(prop, c, which);
+ } else if(which==UCHAR_GENERAL_CATEGORY_MASK) {
+ return U_MASK(u_charType(c));
+ }
+ return 0; // undefined
+}
+
+U_CAPI int32_t U_EXPORT2
+u_getIntPropertyMinValue(UProperty /*which*/) {
+ return 0; /* all binary/enum/int properties have a minimum value of 0 */
+}
+
+U_CAPI int32_t U_EXPORT2
+u_getIntPropertyMaxValue(UProperty which) {
+ if(which<UCHAR_INT_START) {
+ if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) {
+ return 1; // maximum TRUE for all binary properties
+ }
+ } else if(which<UCHAR_INT_LIMIT) {
+ const IntProperty &prop=intProps[which-UCHAR_INT_START];
+ return prop.getMaxValue(prop, which);
+ }
+ return -1; // undefined
+}
+
+U_CFUNC UPropertySource U_EXPORT2
+uprops_getSource(UProperty which) {
+ if(which<UCHAR_BINARY_START) {
+ return UPROPS_SRC_NONE; /* undefined */
+ } else if(which<UCHAR_BINARY_LIMIT) {
+ const BinaryProperty &prop=binProps[which];
+ if(prop.mask!=0) {
+ return UPROPS_SRC_PROPSVEC;
+ } else {
+ return (UPropertySource)prop.column;
+ }
+ } else if(which<UCHAR_INT_START) {
+ return UPROPS_SRC_NONE; /* undefined */
+ } else if(which<UCHAR_INT_LIMIT) {
+ const IntProperty &prop=intProps[which-UCHAR_INT_START];
+ if(prop.mask!=0) {
+ return UPROPS_SRC_PROPSVEC;
+ } else {
+ return (UPropertySource)prop.column;
+ }
+ } else if(which<UCHAR_STRING_START) {
+ switch(which) {
+ case UCHAR_GENERAL_CATEGORY_MASK:
+ case UCHAR_NUMERIC_VALUE:
+ return UPROPS_SRC_CHAR;
+
+ default:
+ return UPROPS_SRC_NONE;
+ }
+ } else if(which<UCHAR_STRING_LIMIT) {
+ switch(which) {
+ case UCHAR_AGE:
+ return UPROPS_SRC_PROPSVEC;
+
+ case UCHAR_BIDI_MIRRORING_GLYPH:
+ return UPROPS_SRC_BIDI;
+
+ case UCHAR_CASE_FOLDING:
+ case UCHAR_LOWERCASE_MAPPING:
+ case UCHAR_SIMPLE_CASE_FOLDING:
+ case UCHAR_SIMPLE_LOWERCASE_MAPPING:
+ case UCHAR_SIMPLE_TITLECASE_MAPPING:
+ case UCHAR_SIMPLE_UPPERCASE_MAPPING:
+ case UCHAR_TITLECASE_MAPPING:
+ case UCHAR_UPPERCASE_MAPPING:
+ return UPROPS_SRC_CASE;
+
+ case UCHAR_ISO_COMMENT:
+ case UCHAR_NAME:
+ case UCHAR_UNICODE_1_NAME:
+ return UPROPS_SRC_NAMES;
+
+ default:
+ return UPROPS_SRC_NONE;
+ }
+ } else {
+ switch(which) {
+ case UCHAR_SCRIPT_EXTENSIONS:
+ return UPROPS_SRC_PROPSVEC;
+ default:
+ return UPROPS_SRC_NONE; /* undefined */
+ }
+ }
+}
+
+U_CFUNC void U_EXPORT2
+uprops_addPropertyStarts(UPropertySource src, const USetAdder *sa, UErrorCode *pErrorCode) {
+ if (!ulayout_ensureData(*pErrorCode)) { return; }
+ const UCPTrie *trie;
+ switch (src) {
+ case UPROPS_SRC_INPC:
+ trie = gInpcTrie;
+ break;
+ case UPROPS_SRC_INSC:
+ trie = gInscTrie;
+ break;
+ case UPROPS_SRC_VO:
+ trie = gVoTrie;
+ break;
+ default:
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ if (trie == nullptr) {
+ *pErrorCode = U_MISSING_RESOURCE_ERROR;
+ return;
+ }
+
+ // Add the start code point of each same-value range of the trie.
+ UChar32 start = 0, end;
+ while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0,
+ nullptr, nullptr, nullptr)) >= 0) {
+ sa->add(sa->set, start);
+ start = end + 1;
+ }
+}
+
+#if !UCONFIG_NO_NORMALIZATION
+
+U_CAPI int32_t U_EXPORT2
+u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode) {
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(destCapacity<0 || (dest==NULL && destCapacity>0)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ // Compute the FC_NFKC_Closure on the fly:
+ // We have the API for complete coverage of Unicode properties, although
+ // this value by itself is not useful via API.
+ // (What could be useful is a custom normalization table that combines
+ // case folding and NFKC.)
+ // For the derivation, see Unicode's DerivedNormalizationProps.txt.
+ const Normalizer2 *nfkc=Normalizer2::getNFKCInstance(*pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ // first: b = NFKC(Fold(a))
+ UnicodeString folded1String;
+ const UChar *folded1;
+ int32_t folded1Length=ucase_toFullFolding(c, &folded1, U_FOLD_CASE_DEFAULT);
+ if(folded1Length<0) {
+ const Normalizer2Impl *nfkcImpl=Normalizer2Factory::getImpl(nfkc);
+ if(nfkcImpl->getCompQuickCheck(nfkcImpl->getNorm16(c))!=UNORM_NO) {
+ return u_terminateUChars(dest, destCapacity, 0, pErrorCode); // c does not change at all under CaseFolding+NFKC
+ }
+ folded1String.setTo(c);
+ } else {
+ if(folded1Length>UCASE_MAX_STRING_LENGTH) {
+ folded1String.setTo(folded1Length);
+ } else {
+ folded1String.setTo(FALSE, folded1, folded1Length);
+ }
+ }
+ UnicodeString kc1=nfkc->normalize(folded1String, *pErrorCode);
+ // second: c = NFKC(Fold(b))
+ UnicodeString folded2String(kc1);
+ UnicodeString kc2=nfkc->normalize(folded2String.foldCase(), *pErrorCode);
+ // if (c != b) add the mapping from a to c
+ if(U_FAILURE(*pErrorCode) || kc1==kc2) {
+ return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
+ } else {
+ return kc2.extract(dest, destCapacity, *pErrorCode);
+ }
+}
+
+#endif
diff --git a/thirdparty/icu4c/common/uprops.h b/thirdparty/icu4c/common/uprops.h
new file mode 100644
index 0000000000..8bf929919f
--- /dev/null
+++ b/thirdparty/icu4c/common/uprops.h
@@ -0,0 +1,504 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uprops.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002feb24
+* created by: Markus W. Scherer
+*
+* Constants for mostly non-core Unicode character properties
+* stored in uprops.icu.
+*/
+
+#ifndef __UPROPS_H__
+#define __UPROPS_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uset.h"
+#include "uset_imp.h"
+#include "udataswp.h"
+
+/* indexes[] entries */
+enum {
+ UPROPS_PROPS32_INDEX,
+ UPROPS_EXCEPTIONS_INDEX,
+ UPROPS_EXCEPTIONS_TOP_INDEX,
+
+ UPROPS_ADDITIONAL_TRIE_INDEX,
+ UPROPS_ADDITIONAL_VECTORS_INDEX,
+ UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX,
+
+ UPROPS_SCRIPT_EXTENSIONS_INDEX,
+
+ UPROPS_RESERVED_INDEX_7,
+ UPROPS_RESERVED_INDEX_8,
+
+ /* size of the data file (number of 32-bit units after the header) */
+ UPROPS_DATA_TOP_INDEX,
+
+ /* maximum values for code values in vector word 0 */
+ UPROPS_MAX_VALUES_INDEX=10,
+ /* maximum values for code values in vector word 2 */
+ UPROPS_MAX_VALUES_2_INDEX,
+
+ UPROPS_INDEX_COUNT=16
+};
+
+/* definitions for the main properties words */
+enum {
+ /* general category shift==0 0 (5 bits) */
+ /* reserved 5 (1 bit) */
+ UPROPS_NUMERIC_TYPE_VALUE_SHIFT=6 /* 6 (10 bits) */
+};
+
+#define GET_CATEGORY(props) ((props)&0x1f)
+#define CAT_MASK(props) U_MASK(GET_CATEGORY(props))
+
+#define GET_NUMERIC_TYPE_VALUE(props) ((props)>>UPROPS_NUMERIC_TYPE_VALUE_SHIFT)
+
+/* constants for the storage form of numeric types and values */
+enum {
+ /** No numeric value. */
+ UPROPS_NTV_NONE=0,
+ /** Decimal digits: nv=0..9 */
+ UPROPS_NTV_DECIMAL_START=1,
+ /** Other digits: nv=0..9 */
+ UPROPS_NTV_DIGIT_START=11,
+ /** Small integers: nv=0..154 */
+ UPROPS_NTV_NUMERIC_START=21,
+ /** Fractions: ((ntv>>4)-12) / ((ntv&0xf)+1) = -1..17 / 1..16 */
+ UPROPS_NTV_FRACTION_START=0xb0,
+ /**
+ * Large integers:
+ * ((ntv>>5)-14) * 10^((ntv&0x1f)+2) = (1..9)*(10^2..10^33)
+ * (only one significant decimal digit)
+ */
+ UPROPS_NTV_LARGE_START=0x1e0,
+ /**
+ * Sexagesimal numbers:
+ * ((ntv>>2)-0xbf) * 60^((ntv&3)+1) = (1..9)*(60^1..60^4)
+ */
+ UPROPS_NTV_BASE60_START=0x300,
+ /**
+ * Fraction-20 values:
+ * frac20 = ntv-0x324 = 0..0x17 -> 1|3|5|7 / 20|40|80|160|320|640
+ * numerator: num = 2*(frac20&3)+1
+ * denominator: den = 20<<(frac20>>2)
+ */
+ UPROPS_NTV_FRACTION20_START=UPROPS_NTV_BASE60_START+36, // 0x300+9*4=0x324
+ /**
+ * Fraction-32 values:
+ * frac32 = ntv-0x34c = 0..15 -> 1|3|5|7 / 32|64|128|256
+ * numerator: num = 2*(frac32&3)+1
+ * denominator: den = 32<<(frac32>>2)
+ */
+ UPROPS_NTV_FRACTION32_START=UPROPS_NTV_FRACTION20_START+24, // 0x324+6*4=0x34c
+ /** No numeric value (yet). */
+ UPROPS_NTV_RESERVED_START=UPROPS_NTV_FRACTION32_START+16, // 0x34c+4*4=0x35c
+
+ UPROPS_NTV_MAX_SMALL_INT=UPROPS_NTV_FRACTION_START-UPROPS_NTV_NUMERIC_START-1
+};
+
+#define UPROPS_NTV_GET_TYPE(ntv) \
+ ((ntv==UPROPS_NTV_NONE) ? U_NT_NONE : \
+ (ntv<UPROPS_NTV_DIGIT_START) ? U_NT_DECIMAL : \
+ (ntv<UPROPS_NTV_NUMERIC_START) ? U_NT_DIGIT : \
+ U_NT_NUMERIC)
+
+/* number of properties vector words */
+#define UPROPS_VECTOR_WORDS 3
+
+/*
+ * Properties in vector word 0
+ * Bits
+ * 31..24 DerivedAge version major/minor one nibble each
+ * 23..22 3..1: Bits 21..20 & 7..0 = Script_Extensions index
+ * 3: Script value from Script_Extensions
+ * 2: Script=Inherited
+ * 1: Script=Common
+ * 0: Script=bits 21..20 & 7..0
+ * 21..20 Bits 9..8 of the UScriptCode, or index to Script_Extensions
+ * 19..17 East Asian Width
+ * 16.. 8 UBlockCode
+ * 7.. 0 UScriptCode, or index to Script_Extensions
+ */
+
+/* derived age: one nibble each for major and minor version numbers */
+#define UPROPS_AGE_MASK 0xff000000
+#define UPROPS_AGE_SHIFT 24
+
+/* Script_Extensions: mask includes Script */
+#define UPROPS_SCRIPT_X_MASK 0x00f000ff
+#define UPROPS_SCRIPT_X_SHIFT 22
+
+// The UScriptCode or Script_Extensions index is split across two bit fields.
+// (Starting with Unicode 13/ICU 66/2019 due to more varied Script_Extensions.)
+// Shift the high bits right by 12 to assemble the full value.
+#define UPROPS_SCRIPT_HIGH_MASK 0x00300000
+#define UPROPS_SCRIPT_HIGH_SHIFT 12
+#define UPROPS_MAX_SCRIPT 0x3ff
+
+#define UPROPS_EA_MASK 0x000e0000
+#define UPROPS_EA_SHIFT 17
+
+#define UPROPS_BLOCK_MASK 0x0001ff00
+#define UPROPS_BLOCK_SHIFT 8
+
+#define UPROPS_SCRIPT_LOW_MASK 0x000000ff
+
+/* UPROPS_SCRIPT_X_WITH_COMMON must be the lowest value that involves Script_Extensions. */
+#define UPROPS_SCRIPT_X_WITH_COMMON 0x400000
+#define UPROPS_SCRIPT_X_WITH_INHERITED 0x800000
+#define UPROPS_SCRIPT_X_WITH_OTHER 0xc00000
+
+#ifdef __cplusplus
+
+namespace {
+
+inline uint32_t uprops_mergeScriptCodeOrIndex(uint32_t scriptX) {
+ return
+ ((scriptX & UPROPS_SCRIPT_HIGH_MASK) >> UPROPS_SCRIPT_HIGH_SHIFT) |
+ (scriptX & UPROPS_SCRIPT_LOW_MASK);
+}
+
+} // namespace
+
+#endif // __cplusplus
+
+/*
+ * Properties in vector word 1
+ * Each bit encodes one binary property.
+ * The following constants represent the bit number, use 1<<UPROPS_XYZ.
+ * UPROPS_BINARY_1_TOP<=32!
+ *
+ * Keep this list of property enums in sync with
+ * propListNames[] in icu/source/tools/genprops/props2.c!
+ *
+ * ICU 2.6/uprops format version 3.2 stores full properties instead of "Other_".
+ */
+enum {
+ UPROPS_WHITE_SPACE,
+ UPROPS_DASH,
+ UPROPS_HYPHEN,
+ UPROPS_QUOTATION_MARK,
+ UPROPS_TERMINAL_PUNCTUATION,
+ UPROPS_MATH,
+ UPROPS_HEX_DIGIT,
+ UPROPS_ASCII_HEX_DIGIT,
+ UPROPS_ALPHABETIC,
+ UPROPS_IDEOGRAPHIC,
+ UPROPS_DIACRITIC,
+ UPROPS_EXTENDER,
+ UPROPS_NONCHARACTER_CODE_POINT,
+ UPROPS_GRAPHEME_EXTEND,
+ UPROPS_GRAPHEME_LINK,
+ UPROPS_IDS_BINARY_OPERATOR,
+ UPROPS_IDS_TRINARY_OPERATOR,
+ UPROPS_RADICAL,
+ UPROPS_UNIFIED_IDEOGRAPH,
+ UPROPS_DEFAULT_IGNORABLE_CODE_POINT,
+ UPROPS_DEPRECATED,
+ UPROPS_LOGICAL_ORDER_EXCEPTION,
+ UPROPS_XID_START,
+ UPROPS_XID_CONTINUE,
+ UPROPS_ID_START, /* ICU 2.6, uprops format version 3.2 */
+ UPROPS_ID_CONTINUE,
+ UPROPS_GRAPHEME_BASE,
+ UPROPS_S_TERM, /* new in ICU 3.0 and Unicode 4.0.1 */
+ UPROPS_VARIATION_SELECTOR,
+ UPROPS_PATTERN_SYNTAX, /* new in ICU 3.4 and Unicode 4.1 */
+ UPROPS_PATTERN_WHITE_SPACE,
+ UPROPS_PREPENDED_CONCATENATION_MARK, // new in ICU 60 and Unicode 10
+ UPROPS_BINARY_1_TOP /* ==32 - full! */
+};
+
+/*
+ * Properties in vector word 2
+ * Bits
+ * 31..26 http://www.unicode.org/reports/tr51/#Emoji_Properties
+ * 25..20 Line Break
+ * 19..15 Sentence Break
+ * 14..10 Word Break
+ * 9.. 5 Grapheme Cluster Break
+ * 4.. 0 Decomposition Type
+ */
+enum {
+ UPROPS_2_EXTENDED_PICTOGRAPHIC=26,
+ UPROPS_2_EMOJI_COMPONENT,
+ UPROPS_2_EMOJI,
+ UPROPS_2_EMOJI_PRESENTATION,
+ UPROPS_2_EMOJI_MODIFIER,
+ UPROPS_2_EMOJI_MODIFIER_BASE
+};
+
+#define UPROPS_LB_MASK 0x03f00000
+#define UPROPS_LB_SHIFT 20
+
+#define UPROPS_SB_MASK 0x000f8000
+#define UPROPS_SB_SHIFT 15
+
+#define UPROPS_WB_MASK 0x00007c00
+#define UPROPS_WB_SHIFT 10
+
+#define UPROPS_GCB_MASK 0x000003e0
+#define UPROPS_GCB_SHIFT 5
+
+#define UPROPS_DT_MASK 0x0000001f
+
+/**
+ * Gets the main properties value for a code point.
+ * Implemented in uchar.c for uprops.cpp.
+ */
+U_CFUNC uint32_t
+u_getMainProperties(UChar32 c);
+
+/**
+ * Get a properties vector word for a code point.
+ * Implemented in uchar.c for uprops.cpp.
+ * @return 0 if no data or illegal argument
+ */
+U_CFUNC uint32_t
+u_getUnicodeProperties(UChar32 c, int32_t column);
+
+/**
+ * Get the the maximum values for some enum/int properties.
+ * Use the same column numbers as for u_getUnicodeProperties().
+ * The returned value will contain maximum values stored in the same bit fields
+ * as where the enum values are stored in the u_getUnicodeProperties()
+ * return values for the same columns.
+ *
+ * Valid columns are those for properties words that contain enumerated values.
+ * (ICU 2.6: columns 0 and 2)
+ * For other column numbers, this function will return 0.
+ *
+ * @internal
+ */
+U_CFUNC int32_t
+uprv_getMaxValues(int32_t column);
+
+/**
+ * Checks if c is alphabetic, or a decimal digit; implements UCHAR_POSIX_ALNUM.
+ * @internal
+ */
+U_CFUNC UBool
+u_isalnumPOSIX(UChar32 c);
+
+/**
+ * Checks if c is in
+ * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}]
+ * with space=\p{Whitespace} and Control=Cc.
+ * Implements UCHAR_POSIX_GRAPH.
+ * @internal
+ */
+U_CFUNC UBool
+u_isgraphPOSIX(UChar32 c);
+
+/**
+ * Checks if c is in \p{graph}\p{blank} - \p{cntrl}.
+ * Implements UCHAR_POSIX_PRINT.
+ * @internal
+ */
+U_CFUNC UBool
+u_isprintPOSIX(UChar32 c);
+
+/** Turn a bit index into a bit flag. @internal */
+#define FLAG(n) ((uint32_t)1<<(n))
+
+/** Flags for general categories in the order of UCharCategory. @internal */
+#define _Cn FLAG(U_GENERAL_OTHER_TYPES)
+#define _Lu FLAG(U_UPPERCASE_LETTER)
+#define _Ll FLAG(U_LOWERCASE_LETTER)
+#define _Lt FLAG(U_TITLECASE_LETTER)
+#define _Lm FLAG(U_MODIFIER_LETTER)
+/* #define _Lo FLAG(U_OTHER_LETTER) -- conflicts with MS Visual Studio 9.0 xiosbase */
+#define _Mn FLAG(U_NON_SPACING_MARK)
+#define _Me FLAG(U_ENCLOSING_MARK)
+#define _Mc FLAG(U_COMBINING_SPACING_MARK)
+#define _Nd FLAG(U_DECIMAL_DIGIT_NUMBER)
+#define _Nl FLAG(U_LETTER_NUMBER)
+#define _No FLAG(U_OTHER_NUMBER)
+#define _Zs FLAG(U_SPACE_SEPARATOR)
+#define _Zl FLAG(U_LINE_SEPARATOR)
+#define _Zp FLAG(U_PARAGRAPH_SEPARATOR)
+#define _Cc FLAG(U_CONTROL_CHAR)
+#define _Cf FLAG(U_FORMAT_CHAR)
+#define _Co FLAG(U_PRIVATE_USE_CHAR)
+#define _Cs FLAG(U_SURROGATE)
+#define _Pd FLAG(U_DASH_PUNCTUATION)
+#define _Ps FLAG(U_START_PUNCTUATION)
+/* #define _Pe FLAG(U_END_PUNCTUATION) -- conflicts with MS Visual Studio 9.0 xlocnum */
+/* #define _Pc FLAG(U_CONNECTOR_PUNCTUATION) -- conflicts with MS Visual Studio 9.0 streambuf */
+#define _Po FLAG(U_OTHER_PUNCTUATION)
+#define _Sm FLAG(U_MATH_SYMBOL)
+#define _Sc FLAG(U_CURRENCY_SYMBOL)
+#define _Sk FLAG(U_MODIFIER_SYMBOL)
+#define _So FLAG(U_OTHER_SYMBOL)
+#define _Pi FLAG(U_INITIAL_PUNCTUATION)
+/* #define _Pf FLAG(U_FINAL_PUNCTUATION) -- conflicts with MS Visual Studio 9.0 streambuf */
+
+/** Some code points. @internal */
+enum {
+ TAB =0x0009,
+ LF =0x000a,
+ FF =0x000c,
+ CR =0x000d,
+ U_A =0x0041,
+ U_F =0x0046,
+ U_Z =0x005a,
+ U_a =0x0061,
+ U_f =0x0066,
+ U_z =0x007a,
+ DEL =0x007f,
+ NL =0x0085,
+ NBSP =0x00a0,
+ CGJ =0x034f,
+ FIGURESP=0x2007,
+ HAIRSP =0x200a,
+ ZWNJ =0x200c,
+ ZWJ =0x200d,
+ RLM =0x200f,
+ NNBSP =0x202f,
+ WJ =0x2060,
+ INHSWAP =0x206a,
+ NOMDIG =0x206f,
+ U_FW_A =0xff21,
+ U_FW_F =0xff26,
+ U_FW_Z =0xff3a,
+ U_FW_a =0xff41,
+ U_FW_f =0xff46,
+ U_FW_z =0xff5a,
+ ZWNBSP =0xfeff
+};
+
+/**
+ * Get the maximum length of a (regular/1.0/extended) character name.
+ * @return 0 if no character names available.
+ */
+U_CAPI int32_t U_EXPORT2
+uprv_getMaxCharNameLength(void);
+
+/**
+ * Fills set with characters that are used in Unicode character names.
+ * Includes all characters that are used in regular/Unicode 1.0/extended names.
+ * Just empties the set if no character names are available.
+ * @param sa USetAdder to receive characters.
+ */
+U_CAPI void U_EXPORT2
+uprv_getCharNameCharacters(const USetAdder *sa);
+
+/**
+ * Constants for which data and implementation files provide which properties.
+ * Used by UnicodeSet for service-specific property enumeration.
+ * @internal
+ */
+enum UPropertySource {
+ /** No source, not a supported property. */
+ UPROPS_SRC_NONE,
+ /** From uchar.c/uprops.icu main trie */
+ UPROPS_SRC_CHAR,
+ /** From uchar.c/uprops.icu properties vectors trie */
+ UPROPS_SRC_PROPSVEC,
+ /** From unames.c/unames.icu */
+ UPROPS_SRC_NAMES,
+ /** From ucase.c/ucase.icu */
+ UPROPS_SRC_CASE,
+ /** From ubidi_props.c/ubidi.icu */
+ UPROPS_SRC_BIDI,
+ /** From uchar.c/uprops.icu main trie as well as properties vectors trie */
+ UPROPS_SRC_CHAR_AND_PROPSVEC,
+ /** From ucase.c/ucase.icu as well as unorm.cpp/unorm.icu */
+ UPROPS_SRC_CASE_AND_NORM,
+ /** From normalizer2impl.cpp/nfc.nrm */
+ UPROPS_SRC_NFC,
+ /** From normalizer2impl.cpp/nfkc.nrm */
+ UPROPS_SRC_NFKC,
+ /** From normalizer2impl.cpp/nfkc_cf.nrm */
+ UPROPS_SRC_NFKC_CF,
+ /** From normalizer2impl.cpp/nfc.nrm canonical iterator data */
+ UPROPS_SRC_NFC_CANON_ITER,
+ // Text layout properties.
+ UPROPS_SRC_INPC,
+ UPROPS_SRC_INSC,
+ UPROPS_SRC_VO,
+ /** One more than the highest UPropertySource (UPROPS_SRC_) constant. */
+ UPROPS_SRC_COUNT
+};
+typedef enum UPropertySource UPropertySource;
+
+/**
+ * @see UPropertySource
+ * @internal
+ */
+U_CFUNC UPropertySource U_EXPORT2
+uprops_getSource(UProperty which);
+
+/**
+ * Enumerate uprops.icu's main data trie and add the
+ * start of each range of same properties to the set.
+ * @internal
+ */
+U_CFUNC void U_EXPORT2
+uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);
+
+/**
+ * Enumerate uprops.icu's properties vectors trie and add the
+ * start of each range of same properties to the set.
+ * @internal
+ */
+U_CFUNC void U_EXPORT2
+upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);
+
+U_CFUNC void U_EXPORT2
+uprops_addPropertyStarts(UPropertySource src, const USetAdder *sa, UErrorCode *pErrorCode);
+
+/**
+ * Return a set of characters for property enumeration.
+ * For each two consecutive characters (start, limit) in the set,
+ * all of the properties for start..limit-1 are all the same.
+ *
+ * @param sa USetAdder to receive result. Existing contents are lost.
+ * @internal
+ */
+/*U_CFUNC void U_EXPORT2
+uprv_getInclusions(const USetAdder *sa, UErrorCode *pErrorCode);
+*/
+
+/**
+ * Swap the ICU Unicode character names file. See uchar.c.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+uchar_swapNames(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+#ifdef __cplusplus
+
+U_NAMESPACE_BEGIN
+
+class UnicodeSet;
+
+class CharacterProperties {
+public:
+ CharacterProperties() = delete;
+ static const UnicodeSet *getInclusionsForProperty(UProperty prop, UErrorCode &errorCode);
+};
+
+// implemented in uniset_props.cpp
+U_CFUNC UnicodeSet *
+uniset_getUnicode32Instance(UErrorCode &errorCode);
+
+U_NAMESPACE_END
+
+#endif
+
+#endif
diff --git a/thirdparty/icu4c/common/ures_cnv.cpp b/thirdparty/icu4c/common/ures_cnv.cpp
new file mode 100644
index 0000000000..1aa58e753c
--- /dev/null
+++ b/thirdparty/icu4c/common/ures_cnv.cpp
@@ -0,0 +1,78 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1997-2006, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: ures_cnv.c
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2004aug25
+* created by: Markus W. Scherer
+*
+* Character conversion functions moved here from uresbund.c
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "unicode/ustring.h"
+#include "unicode/ucnv.h"
+#include "unicode/ures.h"
+#include "uinvchar.h"
+#include "ustr_cnv.h"
+
+U_CAPI UResourceBundle * U_EXPORT2
+ures_openU(const UChar *myPath,
+ const char *localeID,
+ UErrorCode *status)
+{
+ char pathBuffer[1024];
+ int32_t length;
+ char *path = pathBuffer;
+
+ if(status==NULL || U_FAILURE(*status)) {
+ return NULL;
+ }
+ if(myPath==NULL) {
+ path = NULL;
+ }
+ else {
+ length=u_strlen(myPath);
+ if(length>=(int32_t)sizeof(pathBuffer)) {
+ *status=U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ } else if(uprv_isInvariantUString(myPath, length)) {
+ /*
+ * the invariant converter is sufficient for package and tree names
+ * and is more efficient
+ */
+ u_UCharsToChars(myPath, path, length+1); /* length+1 to include the NUL */
+ } else {
+#if !UCONFIG_NO_CONVERSION
+ /* use the default converter to support variant-character paths */
+ UConverter *cnv=u_getDefaultConverter(status);
+ length=ucnv_fromUChars(cnv, path, (int32_t)sizeof(pathBuffer), myPath, length, status);
+ u_releaseDefaultConverter(cnv);
+ if(U_FAILURE(*status)) {
+ return NULL;
+ }
+ if(length>=(int32_t)sizeof(pathBuffer)) {
+ /* not NUL-terminated - path too long */
+ *status=U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+#else
+ /* the default converter is not available */
+ *status=U_UNSUPPORTED_ERROR;
+ return NULL;
+#endif
+ }
+ }
+
+ return ures_open(path, localeID, status);
+}
diff --git a/thirdparty/icu4c/common/uresbund.cpp b/thirdparty/icu4c/common/uresbund.cpp
new file mode 100644
index 0000000000..2ece87897d
--- /dev/null
+++ b/thirdparty/icu4c/common/uresbund.cpp
@@ -0,0 +1,3090 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 1997-2016, International Business Machines Corporation and
+* others. All Rights Reserved.
+******************************************************************************
+*
+* File uresbund.cpp
+*
+* Modification History:
+*
+* Date Name Description
+* 04/01/97 aliu Creation.
+* 06/14/99 stephen Removed functions taking a filename suffix.
+* 07/20/99 stephen Changed for UResourceBundle typedef'd to void*
+* 11/09/99 weiv Added ures_getLocale()
+* March 2000 weiv Total overhaul - using data in DLLs
+* 06/20/2000 helena OS/400 port changes; mostly typecast.
+* 06/24/02 weiv Added support for resource sharing
+******************************************************************************
+*/
+
+#include "unicode/ures.h"
+#include "unicode/ustring.h"
+#include "unicode/ucnv.h"
+#include "charstr.h"
+#include "uresimp.h"
+#include "ustr_imp.h"
+#include "cwchar.h"
+#include "ucln_cmn.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "mutex.h"
+#include "uhash.h"
+#include "unicode/uenum.h"
+#include "uenumimp.h"
+#include "ulocimp.h"
+#include "umutex.h"
+#include "putilimp.h"
+#include "uassert.h"
+#include "uresdata.h"
+
+using namespace icu;
+
+/*
+Static cache for already opened resource bundles - mostly for keeping fallback info
+TODO: This cache should probably be removed when the deprecated code is
+ completely removed.
+*/
+static UHashtable *cache = NULL;
+static icu::UInitOnce gCacheInitOnce = U_INITONCE_INITIALIZER;
+
+static UMutex resbMutex;
+
+/* INTERNAL: hashes an entry */
+static int32_t U_CALLCONV hashEntry(const UHashTok parm) {
+ UResourceDataEntry *b = (UResourceDataEntry *)parm.pointer;
+ UHashTok namekey, pathkey;
+ namekey.pointer = b->fName;
+ pathkey.pointer = b->fPath;
+ return uhash_hashChars(namekey)+37u*uhash_hashChars(pathkey);
+}
+
+/* INTERNAL: compares two entries */
+static UBool U_CALLCONV compareEntries(const UHashTok p1, const UHashTok p2) {
+ UResourceDataEntry *b1 = (UResourceDataEntry *)p1.pointer;
+ UResourceDataEntry *b2 = (UResourceDataEntry *)p2.pointer;
+ UHashTok name1, name2, path1, path2;
+ name1.pointer = b1->fName;
+ name2.pointer = b2->fName;
+ path1.pointer = b1->fPath;
+ path2.pointer = b2->fPath;
+ return (UBool)(uhash_compareChars(name1, name2) &&
+ uhash_compareChars(path1, path2));
+}
+
+
+/**
+ * Internal function, gets parts of locale name according
+ * to the position of '_' character
+ */
+static UBool chopLocale(char *name) {
+ char *i = uprv_strrchr(name, '_');
+
+ if(i != NULL) {
+ *i = '\0';
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+/**
+ * Internal function
+ */
+static void entryIncrease(UResourceDataEntry *entry) {
+ Mutex lock(&resbMutex);
+ entry->fCountExisting++;
+ while(entry->fParent != NULL) {
+ entry = entry->fParent;
+ entry->fCountExisting++;
+ }
+}
+
+/**
+ * Internal function. Tries to find a resource in given Resource
+ * Bundle, as well as in its parents
+ */
+static const ResourceData *getFallbackData(const UResourceBundle* resBundle, const char* * resTag, UResourceDataEntry* *realData, Resource *res, UErrorCode *status) {
+ UResourceDataEntry *resB = resBundle->fData;
+ int32_t indexR = -1;
+ int32_t i = 0;
+ *res = RES_BOGUS;
+ if(resB != NULL) {
+ if(resB->fBogus == U_ZERO_ERROR) { /* if this resource is real, */
+ *res = res_getTableItemByKey(&(resB->fData), resB->fData.rootRes, &indexR, resTag); /* try to get data from there */
+ i++;
+ }
+ if(resBundle->fHasFallback == TRUE) {
+ while(*res == RES_BOGUS && resB->fParent != NULL) { /* Otherwise, we'll look in parents */
+ resB = resB->fParent;
+ if(resB->fBogus == U_ZERO_ERROR) {
+ i++;
+ *res = res_getTableItemByKey(&(resB->fData), resB->fData.rootRes, &indexR, resTag);
+ }
+ }
+ }
+
+ if(*res != RES_BOGUS) { /* If the resource is found in parents, we need to adjust the error */
+ if(i>1) {
+ if(uprv_strcmp(resB->fName, uloc_getDefault())==0 || uprv_strcmp(resB->fName, kRootLocaleName)==0) {
+ *status = U_USING_DEFAULT_WARNING;
+ } else {
+ *status = U_USING_FALLBACK_WARNING;
+ }
+ }
+ *realData = resB;
+ return (&(resB->fData));
+ } else { /* If resource is not found, we need to give an error */
+ *status = U_MISSING_RESOURCE_ERROR;
+ return NULL;
+ }
+ } else {
+ *status = U_MISSING_RESOURCE_ERROR;
+ return NULL;
+ }
+}
+
+static void
+free_entry(UResourceDataEntry *entry) {
+ UResourceDataEntry *alias;
+ res_unload(&(entry->fData));
+ if(entry->fName != NULL && entry->fName != entry->fNameBuffer) {
+ uprv_free(entry->fName);
+ }
+ if(entry->fPath != NULL) {
+ uprv_free(entry->fPath);
+ }
+ if(entry->fPool != NULL) {
+ --entry->fPool->fCountExisting;
+ }
+ alias = entry->fAlias;
+ if(alias != NULL) {
+ while(alias->fAlias != NULL) {
+ alias = alias->fAlias;
+ }
+ --alias->fCountExisting;
+ }
+ uprv_free(entry);
+}
+
+/* Works just like ucnv_flushCache() */
+static int32_t ures_flushCache()
+{
+ UResourceDataEntry *resB;
+ int32_t pos;
+ int32_t rbDeletedNum = 0;
+ const UHashElement *e;
+ UBool deletedMore;
+
+ /*if shared data hasn't even been lazy evaluated yet
+ * return 0
+ */
+ Mutex lock(&resbMutex);
+ if (cache == NULL) {
+ return 0;
+ }
+
+ do {
+ deletedMore = FALSE;
+ /*creates an enumeration to iterate through every element in the table */
+ pos = UHASH_FIRST;
+ while ((e = uhash_nextElement(cache, &pos)) != NULL)
+ {
+ resB = (UResourceDataEntry *) e->value.pointer;
+ /* Deletes only if reference counter == 0
+ * Don't worry about the children of this node.
+ * Those will eventually get deleted too, if not already.
+ * Don't worry about the parents of this node.
+ * Those will eventually get deleted too, if not already.
+ */
+ /* 04/05/2002 [weiv] fCountExisting should now be accurate. If it's not zero, that means that */
+ /* some resource bundles are still open somewhere. */
+
+ if (resB->fCountExisting == 0) {
+ rbDeletedNum++;
+ deletedMore = TRUE;
+ uhash_removeElement(cache, e);
+ free_entry(resB);
+ }
+ }
+ /*
+ * Do it again to catch bundles (aliases, pool bundle) whose fCountExisting
+ * got decremented by free_entry().
+ */
+ } while(deletedMore);
+
+ return rbDeletedNum;
+}
+
+#ifdef URES_DEBUG
+#include <stdio.h>
+
+U_CAPI UBool U_EXPORT2 ures_dumpCacheContents(void) {
+ UBool cacheNotEmpty = FALSE;
+ int32_t pos = UHASH_FIRST;
+ const UHashElement *e;
+ UResourceDataEntry *resB;
+
+ Mutex lock(&resbMutex);
+ if (cache == NULL) {
+ fprintf(stderr,"%s:%d: RB Cache is NULL.\n", __FILE__, __LINE__);
+ return FALSE;
+ }
+
+ while ((e = uhash_nextElement(cache, &pos)) != NULL) {
+ cacheNotEmpty=TRUE;
+ resB = (UResourceDataEntry *) e->value.pointer;
+ fprintf(stderr,"%s:%d: RB Cache: Entry @0x%p, refcount %d, name %s:%s. Pool 0x%p, alias 0x%p, parent 0x%p\n",
+ __FILE__, __LINE__,
+ (void*)resB, resB->fCountExisting,
+ resB->fName?resB->fName:"NULL",
+ resB->fPath?resB->fPath:"NULL",
+ (void*)resB->fPool,
+ (void*)resB->fAlias,
+ (void*)resB->fParent);
+ }
+
+ fprintf(stderr,"%s:%d: RB Cache still contains %d items.\n", __FILE__, __LINE__, uhash_count(cache));
+ return cacheNotEmpty;
+}
+
+#endif
+
+static UBool U_CALLCONV ures_cleanup(void)
+{
+ if (cache != NULL) {
+ ures_flushCache();
+ uhash_close(cache);
+ cache = NULL;
+ }
+ gCacheInitOnce.reset();
+ return TRUE;
+}
+
+/** INTERNAL: Initializes the cache for resources */
+static void U_CALLCONV createCache(UErrorCode &status) {
+ U_ASSERT(cache == NULL);
+ cache = uhash_open(hashEntry, compareEntries, NULL, &status);
+ ucln_common_registerCleanup(UCLN_COMMON_URES, ures_cleanup);
+}
+
+static void initCache(UErrorCode *status) {
+ umtx_initOnce(gCacheInitOnce, &createCache, *status);
+}
+
+/** INTERNAL: sets the name (locale) of the resource bundle to given name */
+
+static void setEntryName(UResourceDataEntry *res, const char *name, UErrorCode *status) {
+ int32_t len = (int32_t)uprv_strlen(name);
+ if(res->fName != NULL && res->fName != res->fNameBuffer) {
+ uprv_free(res->fName);
+ }
+ if (len < (int32_t)sizeof(res->fNameBuffer)) {
+ res->fName = res->fNameBuffer;
+ }
+ else {
+ res->fName = (char *)uprv_malloc(len+1);
+ }
+ if(res->fName == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ uprv_strcpy(res->fName, name);
+ }
+}
+
+static UResourceDataEntry *
+getPoolEntry(const char *path, UErrorCode *status);
+
+/**
+ * INTERNAL: Inits and opens an entry from a data DLL.
+ * CAUTION: resbMutex must be locked when calling this function.
+ */
+static UResourceDataEntry *init_entry(const char *localeID, const char *path, UErrorCode *status) {
+ UResourceDataEntry *r = NULL;
+ UResourceDataEntry find;
+ /*int32_t hashValue;*/
+ const char *name;
+ char aliasName[100] = { 0 };
+ int32_t aliasLen = 0;
+ /*UBool isAlias = FALSE;*/
+ /*UHashTok hashkey; */
+
+ if(U_FAILURE(*status)) {
+ return NULL;
+ }
+
+ /* here we try to deduce the right locale name */
+ if(localeID == NULL) { /* if localeID is NULL, we're trying to open default locale */
+ name = uloc_getDefault();
+ } else if(*localeID == 0) { /* if localeID is "" then we try to open root locale */
+ name = kRootLocaleName;
+ } else { /* otherwise, we'll open what we're given */
+ name = localeID;
+ }
+
+ find.fName = (char *)name;
+ find.fPath = (char *)path;
+
+ /* calculate the hash value of the entry */
+ /*hashkey.pointer = (void *)&find;*/
+ /*hashValue = hashEntry(hashkey);*/
+
+ /* check to see if we already have this entry */
+ r = (UResourceDataEntry *)uhash_get(cache, &find);
+ if(r == NULL) {
+ /* if the entry is not yet in the hash table, we'll try to construct a new one */
+ r = (UResourceDataEntry *) uprv_malloc(sizeof(UResourceDataEntry));
+ if(r == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+
+ uprv_memset(r, 0, sizeof(UResourceDataEntry));
+ /*r->fHashKey = hashValue;*/
+
+ setEntryName(r, name, status);
+ if (U_FAILURE(*status)) {
+ uprv_free(r);
+ return NULL;
+ }
+
+ if(path != NULL) {
+ r->fPath = (char *)uprv_strdup(path);
+ if(r->fPath == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ uprv_free(r);
+ return NULL;
+ }
+ }
+
+ /* this is the actual loading */
+ res_load(&(r->fData), r->fPath, r->fName, status);
+
+ if (U_FAILURE(*status)) {
+ /* if we failed to load due to an out-of-memory error, exit early. */
+ if (*status == U_MEMORY_ALLOCATION_ERROR) {
+ uprv_free(r);
+ return NULL;
+ }
+ /* we have no such entry in dll, so it will always use fallback */
+ *status = U_USING_FALLBACK_WARNING;
+ r->fBogus = U_USING_FALLBACK_WARNING;
+ } else { /* if we have a regular entry */
+ Resource aliasres;
+ if (r->fData.usesPoolBundle) {
+ r->fPool = getPoolEntry(r->fPath, status);
+ if (U_SUCCESS(*status)) {
+ const int32_t *poolIndexes = r->fPool->fData.pRoot + 1;
+ if(r->fData.pRoot[1 + URES_INDEX_POOL_CHECKSUM] == poolIndexes[URES_INDEX_POOL_CHECKSUM]) {
+ r->fData.poolBundleKeys = (const char *)(poolIndexes + (poolIndexes[URES_INDEX_LENGTH] & 0xff));
+ r->fData.poolBundleStrings = r->fPool->fData.p16BitUnits;
+ } else {
+ r->fBogus = *status = U_INVALID_FORMAT_ERROR;
+ }
+ } else {
+ r->fBogus = *status;
+ }
+ }
+ if (U_SUCCESS(*status)) {
+ /* handle the alias by trying to get out the %%Alias tag.*/
+ /* We'll try to get alias string from the bundle */
+ aliasres = res_getResource(&(r->fData), "%%ALIAS");
+ if (aliasres != RES_BOGUS) {
+ // No tracing: called during initial data loading
+ const UChar *alias = res_getStringNoTrace(&(r->fData), aliasres, &aliasLen);
+ if(alias != NULL && aliasLen > 0) { /* if there is actual alias - unload and load new data */
+ u_UCharsToChars(alias, aliasName, aliasLen+1);
+ r->fAlias = init_entry(aliasName, path, status);
+ }
+ }
+ }
+ }
+
+ {
+ UResourceDataEntry *oldR = NULL;
+ if((oldR = (UResourceDataEntry *)uhash_get(cache, r)) == NULL) { /* if the data is not cached */
+ /* just insert it in the cache */
+ UErrorCode cacheStatus = U_ZERO_ERROR;
+ uhash_put(cache, (void *)r, r, &cacheStatus);
+ if (U_FAILURE(cacheStatus)) {
+ *status = cacheStatus;
+ free_entry(r);
+ r = NULL;
+ }
+ } else {
+ /* somebody have already inserted it while we were working, discard newly opened data */
+ /* Also, we could get here IF we opened an alias */
+ free_entry(r);
+ r = oldR;
+ }
+ }
+
+ }
+ if(r != NULL) {
+ /* return the real bundle */
+ while(r->fAlias != NULL) {
+ r = r->fAlias;
+ }
+ r->fCountExisting++; /* we increase its reference count */
+ /* if the resource has a warning */
+ /* we don't want to overwrite a status with no error */
+ if(r->fBogus != U_ZERO_ERROR && U_SUCCESS(*status)) {
+ *status = r->fBogus; /* set the returning status */
+ }
+ }
+ return r;
+}
+
+static UResourceDataEntry *
+getPoolEntry(const char *path, UErrorCode *status) {
+ UResourceDataEntry *poolBundle = init_entry(kPoolBundleName, path, status);
+ if( U_SUCCESS(*status) &&
+ (poolBundle == NULL || poolBundle->fBogus != U_ZERO_ERROR || !poolBundle->fData.isPoolBundle)
+ ) {
+ *status = U_INVALID_FORMAT_ERROR;
+ }
+ return poolBundle;
+}
+
+/* INTERNAL: */
+/* CAUTION: resbMutex must be locked when calling this function! */
+static UResourceDataEntry *
+findFirstExisting(const char* path, char* name,
+ UBool *isRoot, UBool *hasChopped, UBool *isDefault, UErrorCode* status) {
+ UResourceDataEntry *r = NULL;
+ UBool hasRealData = FALSE;
+ const char *defaultLoc = uloc_getDefault();
+ *hasChopped = TRUE; /* we're starting with a fresh name */
+
+ while(*hasChopped && !hasRealData) {
+ r = init_entry(name, path, status);
+ /* Null pointer test */
+ if (U_FAILURE(*status)) {
+ return NULL;
+ }
+ *isDefault = (UBool)(uprv_strncmp(name, defaultLoc, uprv_strlen(name)) == 0);
+ hasRealData = (UBool)(r->fBogus == U_ZERO_ERROR);
+ if(!hasRealData) {
+ /* this entry is not real. We will discard it. */
+ /* However, the parent line for this entry is */
+ /* not to be used - as there might be parent */
+ /* lines in cache from previous openings that */
+ /* are not updated yet. */
+ r->fCountExisting--;
+ /*entryCloseInt(r);*/
+ r = NULL;
+ *status = U_USING_FALLBACK_WARNING;
+ } else {
+ uprv_strcpy(name, r->fName); /* this is needed for supporting aliases */
+ }
+
+ *isRoot = (UBool)(uprv_strcmp(name, kRootLocaleName) == 0);
+
+ /*Fallback data stuff*/
+ *hasChopped = chopLocale(name);
+ if (*hasChopped && *name == '\0') {
+ uprv_strcpy(name, "und");
+ }
+ }
+ return r;
+}
+
+static void ures_setIsStackObject( UResourceBundle* resB, UBool state) {
+ if(state) {
+ resB->fMagic1 = 0;
+ resB->fMagic2 = 0;
+ } else {
+ resB->fMagic1 = MAGIC1;
+ resB->fMagic2 = MAGIC2;
+ }
+}
+
+static UBool ures_isStackObject(const UResourceBundle* resB) {
+ return((resB->fMagic1 == MAGIC1 && resB->fMagic2 == MAGIC2)?FALSE:TRUE);
+}
+
+
+U_CFUNC void ures_initStackObject(UResourceBundle* resB) {
+ uprv_memset(resB, 0, sizeof(UResourceBundle));
+ ures_setIsStackObject(resB, TRUE);
+}
+
+U_NAMESPACE_BEGIN
+
+StackUResourceBundle::StackUResourceBundle() {
+ ures_initStackObject(&bundle);
+}
+
+StackUResourceBundle::~StackUResourceBundle() {
+ ures_close(&bundle);
+}
+
+U_NAMESPACE_END
+
+static UBool // returns U_SUCCESS(*status)
+loadParentsExceptRoot(UResourceDataEntry *&t1,
+ char name[], int32_t nameCapacity,
+ UBool usingUSRData, char usrDataPath[], UErrorCode *status) {
+ if (U_FAILURE(*status)) { return FALSE; }
+ UBool hasChopped = TRUE;
+ while (hasChopped && t1->fParent == NULL && !t1->fData.noFallback &&
+ res_getResource(&t1->fData,"%%ParentIsRoot") == RES_BOGUS) {
+ Resource parentRes = res_getResource(&t1->fData, "%%Parent");
+ if (parentRes != RES_BOGUS) { // An explicit parent was found.
+ int32_t parentLocaleLen = 0;
+ // No tracing: called during initial data loading
+ const UChar *parentLocaleName = res_getStringNoTrace(&(t1->fData), parentRes, &parentLocaleLen);
+ if(parentLocaleName != NULL && 0 < parentLocaleLen && parentLocaleLen < nameCapacity) {
+ u_UCharsToChars(parentLocaleName, name, parentLocaleLen + 1);
+ if (uprv_strcmp(name, kRootLocaleName) == 0) {
+ return TRUE;
+ }
+ }
+ }
+ // Insert regular parents.
+ UErrorCode parentStatus = U_ZERO_ERROR;
+ UResourceDataEntry *t2 = init_entry(name, t1->fPath, &parentStatus);
+ if (U_FAILURE(parentStatus)) {
+ *status = parentStatus;
+ return FALSE;
+ }
+ UResourceDataEntry *u2 = NULL;
+ UErrorCode usrStatus = U_ZERO_ERROR;
+ if (usingUSRData) { // This code inserts user override data into the inheritance chain.
+ u2 = init_entry(name, usrDataPath, &usrStatus);
+ // If we failed due to out-of-memory, report that to the caller and exit early.
+ if (usrStatus == U_MEMORY_ALLOCATION_ERROR) {
+ *status = usrStatus;
+ return FALSE;
+ }
+ }
+
+ if (usingUSRData && U_SUCCESS(usrStatus) && u2->fBogus == U_ZERO_ERROR) {
+ t1->fParent = u2;
+ u2->fParent = t2;
+ } else {
+ t1->fParent = t2;
+ if (usingUSRData) {
+ // The USR override data wasn't found, set it to be deleted.
+ u2->fCountExisting = 0;
+ }
+ }
+ t1 = t2;
+ hasChopped = chopLocale(name);
+ }
+ return TRUE;
+}
+
+static UBool // returns U_SUCCESS(*status)
+insertRootBundle(UResourceDataEntry *&t1, UErrorCode *status) {
+ if (U_FAILURE(*status)) { return FALSE; }
+ UErrorCode parentStatus = U_ZERO_ERROR;
+ UResourceDataEntry *t2 = init_entry(kRootLocaleName, t1->fPath, &parentStatus);
+ if (U_FAILURE(parentStatus)) {
+ *status = parentStatus;
+ return FALSE;
+ }
+ t1->fParent = t2;
+ t1 = t2;
+ return TRUE;
+}
+
+enum UResOpenType {
+ /**
+ * Open a resource bundle for the locale;
+ * if there is not even a base language bundle, then fall back to the default locale;
+ * if there is no bundle for that either, then load the root bundle.
+ *
+ * This is the default bundle loading behavior.
+ */
+ URES_OPEN_LOCALE_DEFAULT_ROOT,
+ // TODO: ICU ticket #11271 "consistent default locale across locale trees"
+ // Add an option to look at the main locale tree for whether to
+ // fall back to root directly (if the locale has main data) or
+ // fall back to the default locale first (if the locale does not even have main data).
+ /**
+ * Open a resource bundle for the locale;
+ * if there is not even a base language bundle, then load the root bundle;
+ * never fall back to the default locale.
+ *
+ * This is used for algorithms that have good pan-Unicode default behavior,
+ * such as case mappings, collation, and segmentation (BreakIterator).
+ */
+ URES_OPEN_LOCALE_ROOT,
+ /**
+ * Open a resource bundle for the exact bundle name as requested;
+ * no fallbacks, do not load parent bundles.
+ *
+ * This is used for supplemental (non-locale) data.
+ */
+ URES_OPEN_DIRECT
+};
+typedef enum UResOpenType UResOpenType;
+
+static UResourceDataEntry *entryOpen(const char* path, const char* localeID,
+ UResOpenType openType, UErrorCode* status) {
+ U_ASSERT(openType != URES_OPEN_DIRECT);
+ UErrorCode intStatus = U_ZERO_ERROR;
+ UResourceDataEntry *r = NULL;
+ UResourceDataEntry *t1 = NULL;
+ UBool isDefault = FALSE;
+ UBool isRoot = FALSE;
+ UBool hasRealData = FALSE;
+ UBool hasChopped = TRUE;
+ UBool usingUSRData = U_USE_USRDATA && ( path == NULL || uprv_strncmp(path,U_ICUDATA_NAME,8) == 0);
+
+ char name[ULOC_FULLNAME_CAPACITY];
+ char usrDataPath[96];
+
+ initCache(status);
+
+ if(U_FAILURE(*status)) {
+ return NULL;
+ }
+
+ uprv_strncpy(name, localeID, sizeof(name) - 1);
+ name[sizeof(name) - 1] = 0;
+
+ if ( usingUSRData ) {
+ if ( path == NULL ) {
+ uprv_strcpy(usrDataPath, U_USRDATA_NAME);
+ } else {
+ uprv_strncpy(usrDataPath, path, sizeof(usrDataPath) - 1);
+ usrDataPath[0] = 'u';
+ usrDataPath[1] = 's';
+ usrDataPath[2] = 'r';
+ usrDataPath[sizeof(usrDataPath) - 1] = 0;
+ }
+ }
+
+ Mutex lock(&resbMutex); // Lock resbMutex until the end of this function.
+
+ /* We're going to skip all the locales that do not have any data */
+ r = findFirstExisting(path, name, &isRoot, &hasChopped, &isDefault, &intStatus);
+
+ // If we failed due to out-of-memory, report the failure and exit early.
+ if (intStatus == U_MEMORY_ALLOCATION_ERROR) {
+ *status = intStatus;
+ goto finish;
+ }
+
+ if(r != NULL) { /* if there is one real locale, we can look for parents. */
+ t1 = r;
+ hasRealData = TRUE;
+ if ( usingUSRData ) { /* This code inserts user override data into the inheritance chain */
+ UErrorCode usrStatus = U_ZERO_ERROR;
+ UResourceDataEntry *u1 = init_entry(t1->fName, usrDataPath, &usrStatus);
+ // If we failed due to out-of-memory, report the failure and exit early.
+ if (intStatus == U_MEMORY_ALLOCATION_ERROR) {
+ *status = intStatus;
+ goto finish;
+ }
+ if ( u1 != NULL ) {
+ if(u1->fBogus == U_ZERO_ERROR) {
+ u1->fParent = t1;
+ r = u1;
+ } else {
+ /* the USR override data wasn't found, set it to be deleted */
+ u1->fCountExisting = 0;
+ }
+ }
+ }
+ if (hasChopped && !isRoot) {
+ if (!loadParentsExceptRoot(t1, name, UPRV_LENGTHOF(name), usingUSRData, usrDataPath, status)) {
+ goto finish;
+ }
+ }
+ }
+
+ /* we could have reached this point without having any real data */
+ /* if that is the case, we need to chain in the default locale */
+ if(r==NULL && openType == URES_OPEN_LOCALE_DEFAULT_ROOT && !isDefault && !isRoot) {
+ /* insert default locale */
+ uprv_strcpy(name, uloc_getDefault());
+ r = findFirstExisting(path, name, &isRoot, &hasChopped, &isDefault, &intStatus);
+ // If we failed due to out-of-memory, report the failure and exit early.
+ if (intStatus == U_MEMORY_ALLOCATION_ERROR) {
+ *status = intStatus;
+ goto finish;
+ }
+ intStatus = U_USING_DEFAULT_WARNING;
+ if(r != NULL) { /* the default locale exists */
+ t1 = r;
+ hasRealData = TRUE;
+ isDefault = TRUE;
+ // TODO: Why not if (usingUSRData) { ... } like in the non-default-locale code path?
+ if (hasChopped && !isRoot) {
+ if (!loadParentsExceptRoot(t1, name, UPRV_LENGTHOF(name), usingUSRData, usrDataPath, status)) {
+ goto finish;
+ }
+ }
+ }
+ }
+
+ /* we could still have r == NULL at this point - maybe even default locale is not */
+ /* present */
+ if(r == NULL) {
+ uprv_strcpy(name, kRootLocaleName);
+ r = findFirstExisting(path, name, &isRoot, &hasChopped, &isDefault, &intStatus);
+ // If we failed due to out-of-memory, report the failure and exit early.
+ if (intStatus == U_MEMORY_ALLOCATION_ERROR) {
+ *status = intStatus;
+ goto finish;
+ }
+ if(r != NULL) {
+ t1 = r;
+ intStatus = U_USING_DEFAULT_WARNING;
+ hasRealData = TRUE;
+ } else { /* we don't even have the root locale */
+ *status = U_MISSING_RESOURCE_ERROR;
+ goto finish;
+ }
+ } else if(!isRoot && uprv_strcmp(t1->fName, kRootLocaleName) != 0 &&
+ t1->fParent == NULL && !r->fData.noFallback) {
+ if (!insertRootBundle(t1, status)) {
+ goto finish;
+ }
+ if(!hasRealData) {
+ r->fBogus = U_USING_DEFAULT_WARNING;
+ }
+ }
+
+ // TODO: Does this ever loop?
+ while(r != NULL && !isRoot && t1->fParent != NULL) {
+ t1->fParent->fCountExisting++;
+ t1 = t1->fParent;
+ }
+
+finish:
+ if(U_SUCCESS(*status)) {
+ if(intStatus != U_ZERO_ERROR) {
+ *status = intStatus;
+ }
+ return r;
+ } else {
+ return NULL;
+ }
+}
+
+/**
+ * Version of entryOpen() and findFirstExisting() for ures_openDirect(),
+ * with no fallbacks.
+ * Parent and root locale bundles are loaded if
+ * the requested bundle does not have the "nofallback" flag.
+ */
+static UResourceDataEntry *
+entryOpenDirect(const char* path, const char* localeID, UErrorCode* status) {
+ initCache(status);
+ if(U_FAILURE(*status)) {
+ return NULL;
+ }
+
+ Mutex lock(&resbMutex);
+ // findFirstExisting() without fallbacks.
+ UResourceDataEntry *r = init_entry(localeID, path, status);
+ if(U_SUCCESS(*status)) {
+ if(r->fBogus != U_ZERO_ERROR) {
+ r->fCountExisting--;
+ r = NULL;
+ }
+ } else {
+ r = NULL;
+ }
+
+ // Some code depends on the ures_openDirect() bundle to have a parent bundle chain,
+ // unless it is marked with "nofallback".
+ UResourceDataEntry *t1 = r;
+ if(r != NULL && uprv_strcmp(localeID, kRootLocaleName) != 0 && // not root
+ r->fParent == NULL && !r->fData.noFallback &&
+ uprv_strlen(localeID) < ULOC_FULLNAME_CAPACITY) {
+ char name[ULOC_FULLNAME_CAPACITY];
+ uprv_strcpy(name, localeID);
+ if(!chopLocale(name) || uprv_strcmp(name, kRootLocaleName) == 0 ||
+ loadParentsExceptRoot(t1, name, UPRV_LENGTHOF(name), FALSE, NULL, status)) {
+ if(uprv_strcmp(t1->fName, kRootLocaleName) != 0 && t1->fParent == NULL) {
+ insertRootBundle(t1, status);
+ }
+ }
+ if(U_FAILURE(*status)) {
+ r = NULL;
+ }
+ }
+
+ if(r != NULL) {
+ // TODO: Does this ever loop?
+ while(t1->fParent != NULL) {
+ t1->fParent->fCountExisting++;
+ t1 = t1->fParent;
+ }
+ }
+ return r;
+}
+
+/**
+ * Functions to create and destroy resource bundles.
+ * CAUTION: resbMutex must be locked when calling this function.
+ */
+/* INTERNAL: */
+static void entryCloseInt(UResourceDataEntry *resB) {
+ UResourceDataEntry *p = resB;
+
+ while(resB != NULL) {
+ p = resB->fParent;
+ resB->fCountExisting--;
+
+ /* Entries are left in the cache. TODO: add ures_flushCache() to force a flush
+ of the cache. */
+/*
+ if(resB->fCountExisting <= 0) {
+ uhash_remove(cache, resB);
+ if(resB->fBogus == U_ZERO_ERROR) {
+ res_unload(&(resB->fData));
+ }
+ if(resB->fName != NULL) {
+ uprv_free(resB->fName);
+ }
+ if(resB->fPath != NULL) {
+ uprv_free(resB->fPath);
+ }
+ uprv_free(resB);
+ }
+*/
+
+ resB = p;
+ }
+}
+
+/**
+ * API: closes a resource bundle and cleans up.
+ */
+
+static void entryClose(UResourceDataEntry *resB) {
+ Mutex lock(&resbMutex);
+ entryCloseInt(resB);
+}
+
+/*
+U_CFUNC void ures_setResPath(UResourceBundle *resB, const char* toAdd) {
+ if(resB->fResPath == NULL) {
+ resB->fResPath = resB->fResBuf;
+ *(resB->fResPath) = 0;
+ }
+ resB->fResPathLen = uprv_strlen(toAdd);
+ if(RES_BUFSIZE <= resB->fResPathLen+1) {
+ if(resB->fResPath == resB->fResBuf) {
+ resB->fResPath = (char *)uprv_malloc((resB->fResPathLen+1)*sizeof(char));
+ } else {
+ resB->fResPath = (char *)uprv_realloc(resB->fResPath, (resB->fResPathLen+1)*sizeof(char));
+ }
+ }
+ uprv_strcpy(resB->fResPath, toAdd);
+}
+*/
+static void ures_appendResPath(UResourceBundle *resB, const char* toAdd, int32_t lenToAdd, UErrorCode *status) {
+ int32_t resPathLenOrig = resB->fResPathLen;
+ if(resB->fResPath == NULL) {
+ resB->fResPath = resB->fResBuf;
+ *(resB->fResPath) = 0;
+ resB->fResPathLen = 0;
+ }
+ resB->fResPathLen += lenToAdd;
+ if(RES_BUFSIZE <= resB->fResPathLen+1) {
+ if(resB->fResPath == resB->fResBuf) {
+ resB->fResPath = (char *)uprv_malloc((resB->fResPathLen+1)*sizeof(char));
+ /* Check that memory was allocated correctly. */
+ if (resB->fResPath == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ uprv_strcpy(resB->fResPath, resB->fResBuf);
+ } else {
+ char *temp = (char *)uprv_realloc(resB->fResPath, (resB->fResPathLen+1)*sizeof(char));
+ /* Check that memory was reallocated correctly. */
+ if (temp == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ resB->fResPath = temp;
+ }
+ }
+ uprv_strcpy(resB->fResPath + resPathLenOrig, toAdd);
+}
+
+static void ures_freeResPath(UResourceBundle *resB) {
+ if (resB->fResPath && resB->fResPath != resB->fResBuf) {
+ uprv_free(resB->fResPath);
+ }
+ resB->fResPath = NULL;
+ resB->fResPathLen = 0;
+}
+
+static void
+ures_closeBundle(UResourceBundle* resB, UBool freeBundleObj)
+{
+ if(resB != NULL) {
+ if(resB->fData != NULL) {
+ entryClose(resB->fData);
+ }
+ if(resB->fVersion != NULL) {
+ uprv_free(resB->fVersion);
+ }
+ ures_freeResPath(resB);
+
+ if(ures_isStackObject(resB) == FALSE && freeBundleObj) {
+ uprv_free(resB);
+ }
+#if 0 /*U_DEBUG*/
+ else {
+ /* poison the data */
+ uprv_memset(resB, -1, sizeof(UResourceBundle));
+ }
+#endif
+ }
+}
+
+U_CAPI void U_EXPORT2
+ures_close(UResourceBundle* resB)
+{
+ ures_closeBundle(resB, TRUE);
+}
+
+static UResourceBundle *init_resb_result(const ResourceData *rdata, Resource r,
+ const char *key, int32_t idx, UResourceDataEntry *realData,
+ const UResourceBundle *parent, int32_t noAlias,
+ UResourceBundle *resB, UErrorCode *status)
+{
+ if(status == NULL || U_FAILURE(*status)) {
+ return resB;
+ }
+ if (parent == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+ if(RES_GET_TYPE(r) == URES_ALIAS) { /* This is an alias, need to exchange with real data */
+ if(noAlias < URES_MAX_ALIAS_LEVEL) {
+ int32_t len = 0;
+ const UChar *alias = res_getAlias(rdata, r, &len);
+ if(len > 0) {
+ /* we have an alias, now let's cut it up */
+ char stackAlias[200];
+ char *chAlias = NULL, *path = NULL, *locale = NULL, *keyPath = NULL;
+ int32_t capacity;
+
+ /*
+ * Allocate enough space for both the char * version
+ * of the alias and parent->fResPath.
+ *
+ * We do this so that res_findResource() can modify the path,
+ * which allows us to remove redundant _res_findResource() variants
+ * in uresdata.c.
+ * res_findResource() now NUL-terminates each segment so that table keys
+ * can always be compared with strcmp() instead of strncmp().
+ * Saves code there and simplifies testing and code coverage.
+ *
+ * markus 2003oct17
+ */
+ ++len; /* count the terminating NUL */
+ if(parent->fResPath != NULL) {
+ capacity = (int32_t)uprv_strlen(parent->fResPath) + 1;
+ } else {
+ capacity = 0;
+ }
+ if(capacity < len) {
+ capacity = len;
+ }
+ if(capacity <= (int32_t)sizeof(stackAlias)) {
+ capacity = (int32_t)sizeof(stackAlias);
+ chAlias = stackAlias;
+ } else {
+ chAlias = (char *)uprv_malloc(capacity);
+ /* test for NULL */
+ if(chAlias == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ }
+ u_UCharsToChars(alias, chAlias, len);
+
+ if(*chAlias == RES_PATH_SEPARATOR) {
+ /* there is a path included */
+ locale = uprv_strchr(chAlias+1, RES_PATH_SEPARATOR);
+ if(locale == NULL) {
+ locale = uprv_strchr(chAlias, 0); /* avoid locale == NULL to make code below work */
+ } else {
+ *locale = 0;
+ locale++;
+ }
+ path = chAlias+1;
+ if(uprv_strcmp(path, "LOCALE") == 0) {
+ /* this is an XPath alias, starting with "/LOCALE/" */
+ /* it contains the path to a resource which should be looked up */
+ /* starting in the requested locale */
+ keyPath = locale;
+ locale = parent->fTopLevelData->fName; /* this is the requested locale's name */
+ path = realData->fPath; /* we will be looking in the same package */
+ } else {
+ if(uprv_strcmp(path, "ICUDATA") == 0) { /* want ICU data */
+ path = NULL;
+ }
+ keyPath = uprv_strchr(locale, RES_PATH_SEPARATOR);
+ if(keyPath) {
+ *keyPath = 0;
+ keyPath++;
+ }
+ }
+ } else {
+ /* no path, start with a locale */
+ locale = chAlias;
+ keyPath = uprv_strchr(locale, RES_PATH_SEPARATOR);
+ if(keyPath) {
+ *keyPath = 0;
+ keyPath++;
+ }
+ path = realData->fPath;
+ }
+
+
+ {
+ /* got almost everything, let's try to open */
+ /* first, open the bundle with real data */
+ UResourceBundle *result = resB;
+ const char* temp = NULL;
+ UErrorCode intStatus = U_ZERO_ERROR;
+ UResourceBundle *mainRes = ures_openDirect(path, locale, &intStatus);
+ if(U_SUCCESS(intStatus)) {
+ if(keyPath == NULL) {
+ /* no key path. This means that we are going to
+ * to use the corresponding resource from
+ * another bundle
+ */
+ /* first, we are going to get a corresponding parent
+ * resource to the one we are searching.
+ */
+ char *aKey = parent->fResPath;
+ if(aKey) {
+ uprv_strcpy(chAlias, aKey); /* allocated large enough above */
+ aKey = chAlias;
+ r = res_findResource(&(mainRes->fResData), mainRes->fRes, &aKey, &temp);
+ } else {
+ r = mainRes->fRes;
+ }
+ if(key) {
+ /* we need to make keyPath from parent's fResPath and
+ * current key, if there is a key associated
+ */
+ len = (int32_t)(uprv_strlen(key) + 1);
+ if(len > capacity) {
+ capacity = len;
+ if(chAlias == stackAlias) {
+ chAlias = (char *)uprv_malloc(capacity);
+ } else {
+ chAlias = (char *)uprv_realloc(chAlias, capacity);
+ }
+ if(chAlias == NULL) {
+ ures_close(mainRes);
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ }
+ uprv_memcpy(chAlias, key, len);
+ aKey = chAlias;
+ r = res_findResource(&(mainRes->fResData), r, &aKey, &temp);
+ } else if(idx != -1) {
+ /* if there is no key, but there is an index, try to get by the index */
+ /* here we have either a table or an array, so get the element */
+ int32_t type = RES_GET_TYPE(r);
+ if(URES_IS_TABLE(type)) {
+ r = res_getTableItemByIndex(&(mainRes->fResData), r, idx, (const char **)&aKey);
+ } else { /* array */
+ r = res_getArrayItem(&(mainRes->fResData), r, idx);
+ }
+ }
+ if(r != RES_BOGUS) {
+ result = init_resb_result(&(mainRes->fResData), r, temp, -1, mainRes->fData, mainRes, noAlias+1, resB, status);
+ } else {
+ *status = U_MISSING_RESOURCE_ERROR;
+ result = resB;
+ }
+ } else {
+ /* this one is a bit trickier.
+ * we start finding keys, but after we resolve one alias, the path might continue.
+ * Consider:
+ * aliastest:alias { "testtypes/anotheralias/Sequence" }
+ * anotheralias:alias { "/ICUDATA/sh/CollationElements" }
+ * aliastest resource should finally have the sequence, not collation elements.
+ */
+ UResourceDataEntry *dataEntry = mainRes->fData;
+ char stackPath[URES_MAX_BUFFER_SIZE];
+ char *pathBuf = stackPath, *myPath = pathBuf;
+ if(uprv_strlen(keyPath) >= UPRV_LENGTHOF(stackPath)) {
+ pathBuf = (char *)uprv_malloc((uprv_strlen(keyPath)+1)*sizeof(char));
+ if(pathBuf == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ ures_close(mainRes);
+ return NULL;
+ }
+ }
+ uprv_strcpy(pathBuf, keyPath);
+ result = mainRes;
+ /* now we have fallback following here */
+ do {
+ r = dataEntry->fData.rootRes;
+ /* this loop handles 'found' resources over several levels */
+ while(*myPath && U_SUCCESS(*status)) {
+ r = res_findResource(&(dataEntry->fData), r, &myPath, &temp);
+ if(r != RES_BOGUS) { /* found a resource, but it might be an indirection */
+ resB = init_resb_result(&(dataEntry->fData), r, temp, -1, dataEntry, result, noAlias+1, resB, status);
+ result = resB;
+ if(result) {
+ r = result->fRes; /* switch to a new resource, possibly a new tree */
+ dataEntry = result->fData;
+ }
+ } else { /* no resource found, we don't really want to look anymore on this level */
+ break;
+ }
+ }
+ dataEntry = dataEntry->fParent;
+ uprv_strcpy(pathBuf, keyPath);
+ myPath = pathBuf;
+ } while(r == RES_BOGUS && dataEntry != NULL);
+ if(r == RES_BOGUS) {
+ *status = U_MISSING_RESOURCE_ERROR;
+ result = resB;
+ }
+ if(pathBuf != stackPath) {
+ uprv_free(pathBuf);
+ }
+ }
+ } else { /* we failed to open the resource we're aliasing to */
+ *status = intStatus;
+ }
+ if(chAlias != stackAlias) {
+ uprv_free(chAlias);
+ }
+ if(mainRes != result) {
+ ures_close(mainRes);
+ }
+ ResourceTracer(resB).maybeTrace("getalias");
+ return result;
+ }
+ } else {
+ /* bad alias, should be an error */
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return resB;
+ }
+ } else {
+ *status = U_TOO_MANY_ALIASES_ERROR;
+ return resB;
+ }
+ }
+ if(resB == NULL) {
+ resB = (UResourceBundle *)uprv_malloc(sizeof(UResourceBundle));
+ /* test for NULL */
+ if (resB == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ ures_setIsStackObject(resB, FALSE);
+ resB->fResPath = NULL;
+ resB->fResPathLen = 0;
+ } else {
+ if(resB->fData != NULL) {
+ entryClose(resB->fData);
+ }
+ if(resB->fVersion != NULL) {
+ uprv_free(resB->fVersion);
+ }
+ /*
+ weiv: if stack object was passed in, it doesn't really need to be reinited,
+ since the purpose of initing is to remove stack junk. However, at this point
+ we would not do anything to an allocated object, so stack object should be
+ treated the same
+ */
+ /*
+ if(ures_isStackObject(resB) != FALSE) {
+ ures_initStackObject(resB);
+ }
+ */
+ if(parent != resB) {
+ ures_freeResPath(resB);
+ }
+ }
+ resB->fData = realData;
+ entryIncrease(resB->fData);
+ resB->fHasFallback = FALSE;
+ resB->fIsTopLevel = FALSE;
+ resB->fIndex = -1;
+ resB->fKey = key;
+ /*resB->fParentRes = parent;*/
+ resB->fTopLevelData = parent->fTopLevelData;
+ if(parent->fResPath && parent != resB) {
+ ures_appendResPath(resB, parent->fResPath, parent->fResPathLen, status);
+ }
+ if(key != NULL) {
+ ures_appendResPath(resB, key, (int32_t)uprv_strlen(key), status);
+ if(resB->fResPath[resB->fResPathLen-1] != RES_PATH_SEPARATOR) {
+ ures_appendResPath(resB, RES_PATH_SEPARATOR_S, 1, status);
+ }
+ } else if(idx >= 0) {
+ char buf[256];
+ int32_t len = T_CString_integerToString(buf, idx, 10);
+ ures_appendResPath(resB, buf, len, status);
+ if(resB->fResPath[resB->fResPathLen-1] != RES_PATH_SEPARATOR) {
+ ures_appendResPath(resB, RES_PATH_SEPARATOR_S, 1, status);
+ }
+ }
+ /* Make sure that Purify doesn't complain about uninitialized memory copies. */
+ {
+ int32_t usedLen = ((resB->fResBuf == resB->fResPath) ? resB->fResPathLen : 0);
+ uprv_memset(resB->fResBuf + usedLen, 0, sizeof(resB->fResBuf) - usedLen);
+ }
+
+ resB->fVersion = NULL;
+ resB->fRes = r;
+ /*resB->fParent = parent->fRes;*/
+ uprv_memmove(&resB->fResData, rdata, sizeof(ResourceData));
+ resB->fSize = res_countArrayItems(&(resB->fResData), resB->fRes);
+ ResourceTracer(resB).trace("get");
+ return resB;
+}
+
+UResourceBundle *ures_copyResb(UResourceBundle *r, const UResourceBundle *original, UErrorCode *status) {
+ UBool isStackObject;
+ if(U_FAILURE(*status) || r == original) {
+ return r;
+ }
+ if(original != NULL) {
+ if(r == NULL) {
+ isStackObject = FALSE;
+ r = (UResourceBundle *)uprv_malloc(sizeof(UResourceBundle));
+ /* test for NULL */
+ if (r == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ } else {
+ isStackObject = ures_isStackObject(r);
+ ures_closeBundle(r, FALSE);
+ }
+ uprv_memcpy(r, original, sizeof(UResourceBundle));
+ r->fResPath = NULL;
+ r->fResPathLen = 0;
+ if(original->fResPath) {
+ ures_appendResPath(r, original->fResPath, original->fResPathLen, status);
+ }
+ ures_setIsStackObject(r, isStackObject);
+ if(r->fData != NULL) {
+ entryIncrease(r->fData);
+ }
+ }
+ return r;
+}
+
+/**
+ * Functions to retrieve data from resource bundles.
+ */
+
+U_CAPI const UChar* U_EXPORT2 ures_getString(const UResourceBundle* resB, int32_t* len, UErrorCode* status) {
+ const UChar *s;
+ if (status==NULL || U_FAILURE(*status)) {
+ return NULL;
+ }
+ if(resB == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+ s = res_getString({resB}, &(resB->fResData), resB->fRes, len);
+ if (s == NULL) {
+ *status = U_RESOURCE_TYPE_MISMATCH;
+ }
+ return s;
+}
+
+static const char *
+ures_toUTF8String(const UChar *s16, int32_t length16,
+ char *dest, int32_t *pLength,
+ UBool forceCopy,
+ UErrorCode *status) {
+ int32_t capacity;
+
+ if (U_FAILURE(*status)) {
+ return NULL;
+ }
+ if (pLength != NULL) {
+ capacity = *pLength;
+ } else {
+ capacity = 0;
+ }
+ if (capacity < 0 || (capacity > 0 && dest == NULL)) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+
+ if (length16 == 0) {
+ /* empty string, return as read-only pointer */
+ if (pLength != NULL) {
+ *pLength = 0;
+ }
+ if (forceCopy) {
+ u_terminateChars(dest, capacity, 0, status);
+ return dest;
+ } else {
+ return "";
+ }
+ } else {
+ /* We need to transform the string to the destination buffer. */
+ if (capacity < length16) {
+ /* No chance for the string to fit. Pure preflighting. */
+ return u_strToUTF8(NULL, 0, pLength, s16, length16, status);
+ }
+ if (!forceCopy && (length16 <= 0x2aaaaaaa)) {
+ /*
+ * We know the string will fit into dest because each UChar turns
+ * into at most three UTF-8 bytes. Fill the latter part of dest
+ * so that callers do not expect to use dest as a string pointer,
+ * hopefully leading to more robust code for when resource bundles
+ * may store UTF-8 natively.
+ * (In which case dest would not be used at all.)
+ *
+ * We do not do this if forceCopy=TRUE because then the caller
+ * expects the string to start exactly at dest.
+ *
+ * The test above for <= 0x2aaaaaaa prevents overflows.
+ * The +1 is for the NUL terminator.
+ */
+ int32_t maxLength = 3 * length16 + 1;
+ if (capacity > maxLength) {
+ dest += capacity - maxLength;
+ capacity = maxLength;
+ }
+ }
+ return u_strToUTF8(dest, capacity, pLength, s16, length16, status);
+ }
+}
+
+U_CAPI const char * U_EXPORT2
+ures_getUTF8String(const UResourceBundle *resB,
+ char *dest, int32_t *pLength,
+ UBool forceCopy,
+ UErrorCode *status) {
+ int32_t length16;
+ const UChar *s16 = ures_getString(resB, &length16, status);
+ return ures_toUTF8String(s16, length16, dest, pLength, forceCopy, status);
+}
+
+U_CAPI const uint8_t* U_EXPORT2 ures_getBinary(const UResourceBundle* resB, int32_t* len,
+ UErrorCode* status) {
+ const uint8_t *p;
+ if (status==NULL || U_FAILURE(*status)) {
+ return NULL;
+ }
+ if(resB == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+ p = res_getBinary({resB}, &(resB->fResData), resB->fRes, len);
+ if (p == NULL) {
+ *status = U_RESOURCE_TYPE_MISMATCH;
+ }
+ return p;
+}
+
+U_CAPI const int32_t* U_EXPORT2 ures_getIntVector(const UResourceBundle* resB, int32_t* len,
+ UErrorCode* status) {
+ const int32_t *p;
+ if (status==NULL || U_FAILURE(*status)) {
+ return NULL;
+ }
+ if(resB == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+ p = res_getIntVector({resB}, &(resB->fResData), resB->fRes, len);
+ if (p == NULL) {
+ *status = U_RESOURCE_TYPE_MISMATCH;
+ }
+ return p;
+}
+
+/* this function returns a signed integer */
+/* it performs sign extension */
+U_CAPI int32_t U_EXPORT2 ures_getInt(const UResourceBundle* resB, UErrorCode *status) {
+ if (status==NULL || U_FAILURE(*status)) {
+ return 0xffffffff;
+ }
+ if(resB == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0xffffffff;
+ }
+ if(RES_GET_TYPE(resB->fRes) != URES_INT) {
+ *status = U_RESOURCE_TYPE_MISMATCH;
+ return 0xffffffff;
+ }
+ return res_getInt({resB}, resB->fRes);
+}
+
+U_CAPI uint32_t U_EXPORT2 ures_getUInt(const UResourceBundle* resB, UErrorCode *status) {
+ if (status==NULL || U_FAILURE(*status)) {
+ return 0xffffffff;
+ }
+ if(resB == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0xffffffff;
+ }
+ if(RES_GET_TYPE(resB->fRes) != URES_INT) {
+ *status = U_RESOURCE_TYPE_MISMATCH;
+ return 0xffffffff;
+ }
+ return res_getUInt({resB}, resB->fRes);
+}
+
+U_CAPI UResType U_EXPORT2 ures_getType(const UResourceBundle *resB) {
+ if(resB == NULL) {
+ return URES_NONE;
+ }
+ return res_getPublicType(resB->fRes);
+}
+
+U_CAPI const char * U_EXPORT2 ures_getKey(const UResourceBundle *resB) {
+ //
+ // TODO: Trace ures_getKey? I guess not usually.
+ //
+ // We usually get the key string to decide whether we want the value, or to
+ // make a key-value pair. Tracing the value should suffice.
+ //
+ // However, I believe we have some data (e.g., in res_index) where the key
+ // strings are the data. Tracing the enclosing table should suffice.
+ //
+ if(resB == NULL) {
+ return NULL;
+ }
+ return(resB->fKey);
+}
+
+U_CAPI int32_t U_EXPORT2 ures_getSize(const UResourceBundle *resB) {
+ if(resB == NULL) {
+ return 0;
+ }
+
+ return resB->fSize;
+}
+
+static const UChar* ures_getStringWithAlias(const UResourceBundle *resB, Resource r, int32_t sIndex, int32_t *len, UErrorCode *status) {
+ if(RES_GET_TYPE(r) == URES_ALIAS) {
+ const UChar* result = 0;
+ UResourceBundle *tempRes = ures_getByIndex(resB, sIndex, NULL, status);
+ result = ures_getString(tempRes, len, status);
+ ures_close(tempRes);
+ return result;
+ } else {
+ return res_getString({resB, sIndex}, &(resB->fResData), r, len);
+ }
+}
+
+U_CAPI void U_EXPORT2 ures_resetIterator(UResourceBundle *resB){
+ if(resB == NULL) {
+ return;
+ }
+ resB->fIndex = -1;
+}
+
+U_CAPI UBool U_EXPORT2 ures_hasNext(const UResourceBundle *resB) {
+ if(resB == NULL) {
+ return FALSE;
+ }
+ return (UBool)(resB->fIndex < resB->fSize-1);
+}
+
+U_CAPI const UChar* U_EXPORT2 ures_getNextString(UResourceBundle *resB, int32_t* len, const char ** key, UErrorCode *status) {
+ Resource r = RES_BOGUS;
+
+ if (status==NULL || U_FAILURE(*status)) {
+ return NULL;
+ }
+ if(resB == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+
+ if(resB->fIndex == resB->fSize-1) {
+ *status = U_INDEX_OUTOFBOUNDS_ERROR;
+ } else {
+ resB->fIndex++;
+ switch(RES_GET_TYPE(resB->fRes)) {
+ case URES_STRING:
+ case URES_STRING_V2:
+ return res_getString({resB}, &(resB->fResData), resB->fRes, len);
+ case URES_TABLE:
+ case URES_TABLE16:
+ case URES_TABLE32:
+ r = res_getTableItemByIndex(&(resB->fResData), resB->fRes, resB->fIndex, key);
+ if(r == RES_BOGUS && resB->fHasFallback) {
+ /* TODO: do the fallback */
+ }
+ return ures_getStringWithAlias(resB, r, resB->fIndex, len, status);
+ case URES_ARRAY:
+ case URES_ARRAY16:
+ r = res_getArrayItem(&(resB->fResData), resB->fRes, resB->fIndex);
+ if(r == RES_BOGUS && resB->fHasFallback) {
+ /* TODO: do the fallback */
+ }
+ return ures_getStringWithAlias(resB, r, resB->fIndex, len, status);
+ case URES_ALIAS:
+ return ures_getStringWithAlias(resB, resB->fRes, resB->fIndex, len, status);
+ case URES_INT:
+ case URES_BINARY:
+ case URES_INT_VECTOR:
+ *status = U_RESOURCE_TYPE_MISMATCH;
+ U_FALLTHROUGH;
+ default:
+ return NULL;
+ }
+ }
+
+ return NULL;
+}
+
+U_CAPI UResourceBundle* U_EXPORT2 ures_getNextResource(UResourceBundle *resB, UResourceBundle *fillIn, UErrorCode *status) {
+ const char *key = NULL;
+ Resource r = RES_BOGUS;
+
+ if (status==NULL || U_FAILURE(*status)) {
+ /*return NULL;*/
+ return fillIn;
+ }
+ if(resB == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ /*return NULL;*/
+ return fillIn;
+ }
+
+ if(resB->fIndex == resB->fSize-1) {
+ *status = U_INDEX_OUTOFBOUNDS_ERROR;
+ /*return NULL;*/
+ } else {
+ resB->fIndex++;
+ switch(RES_GET_TYPE(resB->fRes)) {
+ case URES_INT:
+ case URES_BINARY:
+ case URES_STRING:
+ case URES_STRING_V2:
+ case URES_INT_VECTOR:
+ return ures_copyResb(fillIn, resB, status);
+ case URES_TABLE:
+ case URES_TABLE16:
+ case URES_TABLE32:
+ r = res_getTableItemByIndex(&(resB->fResData), resB->fRes, resB->fIndex, &key);
+ if(r == RES_BOGUS && resB->fHasFallback) {
+ /* TODO: do the fallback */
+ }
+ return init_resb_result(&(resB->fResData), r, key, resB->fIndex, resB->fData, resB, 0, fillIn, status);
+ case URES_ARRAY:
+ case URES_ARRAY16:
+ r = res_getArrayItem(&(resB->fResData), resB->fRes, resB->fIndex);
+ if(r == RES_BOGUS && resB->fHasFallback) {
+ /* TODO: do the fallback */
+ }
+ return init_resb_result(&(resB->fResData), r, key, resB->fIndex, resB->fData, resB, 0, fillIn, status);
+ default:
+ /*return NULL;*/
+ return fillIn;
+ }
+ }
+ /*return NULL;*/
+ return fillIn;
+}
+
+U_CAPI UResourceBundle* U_EXPORT2 ures_getByIndex(const UResourceBundle *resB, int32_t indexR, UResourceBundle *fillIn, UErrorCode *status) {
+ const char* key = NULL;
+ Resource r = RES_BOGUS;
+
+ if (status==NULL || U_FAILURE(*status)) {
+ /*return NULL;*/
+ return fillIn;
+ }
+ if(resB == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ /*return NULL;*/
+ return fillIn;
+ }
+
+ if(indexR >= 0 && resB->fSize > indexR) {
+ switch(RES_GET_TYPE(resB->fRes)) {
+ case URES_INT:
+ case URES_BINARY:
+ case URES_STRING:
+ case URES_STRING_V2:
+ case URES_INT_VECTOR:
+ return ures_copyResb(fillIn, resB, status);
+ case URES_TABLE:
+ case URES_TABLE16:
+ case URES_TABLE32:
+ r = res_getTableItemByIndex(&(resB->fResData), resB->fRes, indexR, &key);
+ if(r == RES_BOGUS && resB->fHasFallback) {
+ /* TODO: do the fallback */
+ }
+ return init_resb_result(&(resB->fResData), r, key, indexR, resB->fData, resB, 0, fillIn, status);
+ case URES_ARRAY:
+ case URES_ARRAY16:
+ r = res_getArrayItem(&(resB->fResData), resB->fRes, indexR);
+ if(r == RES_BOGUS && resB->fHasFallback) {
+ /* TODO: do the fallback */
+ }
+ return init_resb_result(&(resB->fResData), r, key, indexR, resB->fData, resB, 0, fillIn, status);
+ default:
+ /*return NULL;*/
+ return fillIn;
+ }
+ } else {
+ *status = U_MISSING_RESOURCE_ERROR;
+ }
+ /*return NULL;*/
+ return fillIn;
+}
+
+U_CAPI const UChar* U_EXPORT2 ures_getStringByIndex(const UResourceBundle *resB, int32_t indexS, int32_t* len, UErrorCode *status) {
+ const char* key = NULL;
+ Resource r = RES_BOGUS;
+
+ if (status==NULL || U_FAILURE(*status)) {
+ return NULL;
+ }
+ if(resB == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+
+ if(indexS >= 0 && resB->fSize > indexS) {
+ switch(RES_GET_TYPE(resB->fRes)) {
+ case URES_STRING:
+ case URES_STRING_V2:
+ return res_getString({resB}, &(resB->fResData), resB->fRes, len);
+ case URES_TABLE:
+ case URES_TABLE16:
+ case URES_TABLE32:
+ r = res_getTableItemByIndex(&(resB->fResData), resB->fRes, indexS, &key);
+ if(r == RES_BOGUS && resB->fHasFallback) {
+ /* TODO: do the fallback */
+ }
+ return ures_getStringWithAlias(resB, r, indexS, len, status);
+ case URES_ARRAY:
+ case URES_ARRAY16:
+ r = res_getArrayItem(&(resB->fResData), resB->fRes, indexS);
+ if(r == RES_BOGUS && resB->fHasFallback) {
+ /* TODO: do the fallback */
+ }
+ return ures_getStringWithAlias(resB, r, indexS, len, status);
+ case URES_ALIAS:
+ return ures_getStringWithAlias(resB, resB->fRes, indexS, len, status);
+ case URES_INT:
+ case URES_BINARY:
+ case URES_INT_VECTOR:
+ *status = U_RESOURCE_TYPE_MISMATCH;
+ break;
+ default:
+ /* must not occur */
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ break;
+ }
+ } else {
+ *status = U_MISSING_RESOURCE_ERROR;
+ }
+ return NULL;
+}
+
+U_CAPI const char * U_EXPORT2
+ures_getUTF8StringByIndex(const UResourceBundle *resB,
+ int32_t idx,
+ char *dest, int32_t *pLength,
+ UBool forceCopy,
+ UErrorCode *status) {
+ int32_t length16;
+ const UChar *s16 = ures_getStringByIndex(resB, idx, &length16, status);
+ return ures_toUTF8String(s16, length16, dest, pLength, forceCopy, status);
+}
+
+/*U_CAPI const char *ures_getResPath(UResourceBundle *resB) {
+ return resB->fResPath;
+}*/
+
+U_CAPI UResourceBundle* U_EXPORT2
+ures_findResource(const char* path, UResourceBundle *fillIn, UErrorCode *status)
+{
+ UResourceBundle *first = NULL;
+ UResourceBundle *result = fillIn;
+ char *packageName = NULL;
+ char *pathToResource = NULL, *save = NULL;
+ char *locale = NULL, *localeEnd = NULL;
+ int32_t length;
+
+ if(status == NULL || U_FAILURE(*status)) {
+ return result;
+ }
+
+ length = (int32_t)(uprv_strlen(path)+1);
+ save = pathToResource = (char *)uprv_malloc(length*sizeof(char));
+ /* test for NULL */
+ if(pathToResource == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return result;
+ }
+ uprv_memcpy(pathToResource, path, length);
+
+ locale = pathToResource;
+ if(*pathToResource == RES_PATH_SEPARATOR) { /* there is a path specification */
+ pathToResource++;
+ packageName = pathToResource;
+ pathToResource = uprv_strchr(pathToResource, RES_PATH_SEPARATOR);
+ if(pathToResource == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ } else {
+ *pathToResource = 0;
+ locale = pathToResource+1;
+ }
+ }
+
+ localeEnd = uprv_strchr(locale, RES_PATH_SEPARATOR);
+ if(localeEnd != NULL) {
+ *localeEnd = 0;
+ }
+
+ first = ures_open(packageName, locale, status);
+
+ if(U_SUCCESS(*status)) {
+ if(localeEnd) {
+ result = ures_findSubResource(first, localeEnd+1, fillIn, status);
+ } else {
+ result = ures_copyResb(fillIn, first, status);
+ }
+ ures_close(first);
+ }
+ uprv_free(save);
+ return result;
+}
+
+U_CAPI UResourceBundle* U_EXPORT2
+ures_findSubResource(const UResourceBundle *resB, char* path, UResourceBundle *fillIn, UErrorCode *status)
+{
+ Resource res = RES_BOGUS;
+ UResourceBundle *result = fillIn;
+ const char *key;
+
+ if(status == NULL || U_FAILURE(*status)) {
+ return result;
+ }
+
+ /* here we do looping and circular alias checking */
+ /* this loop is here because aliasing is resolved on this level, not on res level */
+ /* so, when we encounter an alias, it is not an aggregate resource, so we return */
+ do {
+ res = res_findResource(&(resB->fResData), resB->fRes, &path, &key);
+ if(res != RES_BOGUS) {
+ result = init_resb_result(&(resB->fResData), res, key, -1, resB->fData, resB, 0, fillIn, status);
+ resB = result;
+ } else {
+ *status = U_MISSING_RESOURCE_ERROR;
+ break;
+ }
+ } while(*path); /* there is more stuff in the path */
+
+ return result;
+}
+U_CAPI const UChar* U_EXPORT2
+ures_getStringByKeyWithFallback(const UResourceBundle *resB,
+ const char* inKey,
+ int32_t* len,
+ UErrorCode *status) {
+
+ UResourceBundle stack;
+ const UChar* retVal = NULL;
+ ures_initStackObject(&stack);
+ ures_getByKeyWithFallback(resB, inKey, &stack, status);
+ int32_t length;
+ retVal = ures_getString(&stack, &length, status);
+ ures_close(&stack);
+ if (U_FAILURE(*status)) {
+ return NULL;
+ }
+ if (length == 3 && retVal[0] == EMPTY_SET && retVal[1] == EMPTY_SET && retVal[2] == EMPTY_SET ) {
+ retVal = NULL;
+ length = 0;
+ *status = U_MISSING_RESOURCE_ERROR;
+ }
+ if (len != NULL) {
+ *len = length;
+ }
+ return retVal;
+}
+
+/*
+ Like res_getTableItemByKey but accepts full paths like "NumberElements/latn/patternsShort".
+*/
+static Resource getTableItemByKeyPath(const ResourceData *pResData, Resource table, const char *key) {
+ Resource resource = table; /* The current resource */
+ icu::CharString path;
+ UErrorCode errorCode = U_ZERO_ERROR;
+ path.append(key, errorCode);
+ if (U_FAILURE(errorCode)) { return RES_BOGUS; }
+ char *pathPart = path.data(); /* Path from current resource to desired resource */
+ UResType type = (UResType)RES_GET_TYPE(resource); /* the current resource type */
+ while (*pathPart && resource != RES_BOGUS && URES_IS_CONTAINER(type)) {
+ char *nextPathPart = uprv_strchr(pathPart, RES_PATH_SEPARATOR);
+ if (nextPathPart != NULL) {
+ *nextPathPart = 0; /* Terminating null for this part of path. */
+ nextPathPart++;
+ } else {
+ nextPathPart = uprv_strchr(pathPart, 0);
+ }
+ int32_t t;
+ const char *pathP = pathPart;
+ resource = res_getTableItemByKey(pResData, resource, &t, &pathP);
+ type = (UResType)RES_GET_TYPE(resource);
+ pathPart = nextPathPart;
+ }
+ if (*pathPart) {
+ return RES_BOGUS;
+ }
+ return resource;
+}
+
+U_CAPI UResourceBundle* U_EXPORT2
+ures_getByKeyWithFallback(const UResourceBundle *resB,
+ const char* inKey,
+ UResourceBundle *fillIn,
+ UErrorCode *status) {
+ Resource res = RES_BOGUS, rootRes = RES_BOGUS;
+ /*UResourceDataEntry *realData = NULL;*/
+ UResourceBundle *helper = NULL;
+
+ if (status==NULL || U_FAILURE(*status)) {
+ return fillIn;
+ }
+ if(resB == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return fillIn;
+ }
+
+ int32_t type = RES_GET_TYPE(resB->fRes);
+ if(URES_IS_TABLE(type)) {
+ res = getTableItemByKeyPath(&(resB->fResData), resB->fRes, inKey);
+ const char* key = inKey;
+ if(res == RES_BOGUS) {
+ UResourceDataEntry *dataEntry = resB->fData;
+ CharString path;
+ char *myPath = NULL;
+ const char* resPath = resB->fResPath;
+ int32_t len = resB->fResPathLen;
+ while(res == RES_BOGUS && dataEntry->fParent != NULL) { /* Otherwise, we'll look in parents */
+ dataEntry = dataEntry->fParent;
+ rootRes = dataEntry->fData.rootRes;
+
+ if(dataEntry->fBogus == U_ZERO_ERROR) {
+ path.clear();
+ if (len > 0) {
+ path.append(resPath, len, *status);
+ }
+ path.append(inKey, *status);
+ if (U_FAILURE(*status)) {
+ ures_close(helper);
+ return fillIn;
+ }
+ myPath = path.data();
+ key = inKey;
+ do {
+ res = res_findResource(&(dataEntry->fData), rootRes, &myPath, &key);
+ if (RES_GET_TYPE(res) == URES_ALIAS && *myPath) {
+ /* We hit an alias, but we didn't finish following the path. */
+ helper = init_resb_result(&(dataEntry->fData), res, NULL, -1, dataEntry, resB, 0, helper, status);
+ /*helper = init_resb_result(&(dataEntry->fData), res, inKey, -1, dataEntry, resB, 0, helper, status);*/
+ if(helper) {
+ dataEntry = helper->fData;
+ rootRes = helper->fRes;
+ resPath = helper->fResPath;
+ len = helper->fResPathLen;
+
+ } else {
+ break;
+ }
+ }
+ } while(*myPath); /* Continue until the whole path is consumed */
+ }
+ }
+ /*const ResourceData *rd = getFallbackData(resB, &key, &realData, &res, status);*/
+ if(res != RES_BOGUS) {
+ /* check if resB->fResPath gives the right name here */
+ if(uprv_strcmp(dataEntry->fName, uloc_getDefault())==0 || uprv_strcmp(dataEntry->fName, kRootLocaleName)==0) {
+ *status = U_USING_DEFAULT_WARNING;
+ } else {
+ *status = U_USING_FALLBACK_WARNING;
+ }
+
+ fillIn = init_resb_result(&(dataEntry->fData), res, inKey, -1, dataEntry, resB, 0, fillIn, status);
+ } else {
+ *status = U_MISSING_RESOURCE_ERROR;
+ }
+ } else {
+ fillIn = init_resb_result(&(resB->fResData), res, key, -1, resB->fData, resB, 0, fillIn, status);
+ }
+ }
+ else {
+ *status = U_RESOURCE_TYPE_MISMATCH;
+ }
+ ures_close(helper);
+ return fillIn;
+}
+
+namespace {
+
+void getAllItemsWithFallback(
+ const UResourceBundle *bundle, ResourceDataValue &value,
+ ResourceSink &sink,
+ UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return; }
+ // We recursively enumerate child-first,
+ // only storing parent items in the absence of child items.
+ // The sink needs to store a placeholder value for the no-fallback/no-inheritance marker
+ // to prevent a parent item from being stored.
+ //
+ // It would be possible to recursively enumerate parent-first,
+ // overriding parent items with child items.
+ // When the sink sees the no-fallback/no-inheritance marker,
+ // then it would remove the parent's item.
+ // We would deserialize parent values even though they are overridden in a child bundle.
+ value.setData(&bundle->fResData);
+ UResourceDataEntry *parentEntry = bundle->fData->fParent;
+ UBool hasParent = parentEntry != NULL && U_SUCCESS(parentEntry->fBogus);
+ value.setResource(bundle->fRes, ResourceTracer(bundle));
+ sink.put(bundle->fKey, value, !hasParent, errorCode);
+ if (hasParent) {
+ // We might try to query the sink whether
+ // any fallback from the parent bundle is still possible.
+
+ // Turn the parent UResourceDataEntry into a UResourceBundle,
+ // much like in ures_openWithType().
+ // TODO: See if we can refactor ures_getByKeyWithFallback()
+ // and pull out an inner function that takes and returns a UResourceDataEntry
+ // so that we need not create UResourceBundle objects.
+ UResourceBundle parentBundle;
+ ures_initStackObject(&parentBundle);
+ parentBundle.fTopLevelData = parentBundle.fData = parentEntry;
+ // TODO: What is the difference between bundle fData and fTopLevelData?
+ uprv_memcpy(&parentBundle.fResData, &parentEntry->fData, sizeof(ResourceData));
+ // TODO: Try to replace bundle.fResData with just using bundle.fData->fData.
+ parentBundle.fHasFallback = !parentBundle.fResData.noFallback;
+ parentBundle.fIsTopLevel = TRUE;
+ parentBundle.fRes = parentBundle.fResData.rootRes;
+ parentBundle.fSize = res_countArrayItems(&(parentBundle.fResData), parentBundle.fRes);
+ parentBundle.fIndex = -1;
+ entryIncrease(parentEntry);
+
+ // Look up the container item in the parent bundle.
+ UResourceBundle containerBundle;
+ ures_initStackObject(&containerBundle);
+ const UResourceBundle *rb;
+ UErrorCode pathErrorCode = U_ZERO_ERROR; // Ignore if parents up to root do not have this path.
+ if (bundle->fResPath == NULL || *bundle->fResPath == 0) {
+ rb = &parentBundle;
+ } else {
+ rb = ures_getByKeyWithFallback(&parentBundle, bundle->fResPath,
+ &containerBundle, &pathErrorCode);
+ }
+ if (U_SUCCESS(pathErrorCode)) {
+ getAllItemsWithFallback(rb, value, sink, errorCode);
+ }
+ ures_close(&containerBundle);
+ ures_close(&parentBundle);
+ }
+}
+
+} // namespace
+
+// Requires a ResourceDataValue fill-in, so that we need not cast from a ResourceValue.
+// Unfortunately, the caller must know which subclass to make and pass in.
+// Alternatively, we could make it as polymorphic as in Java by
+// returning a ResourceValue pointer (possibly wrapped into a LocalPointer)
+// that the caller then owns.
+//
+// Also requires a UResourceBundle fill-in, so that the value's ResourceTracer
+// can point to a non-local bundle.
+// Without tracing, the child bundle could be a function-local object.
+U_CAPI void U_EXPORT2
+ures_getValueWithFallback(const UResourceBundle *bundle, const char *path,
+ UResourceBundle *tempFillIn,
+ ResourceDataValue &value, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return; }
+ if (path == nullptr) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ const UResourceBundle *rb;
+ if (*path == 0) {
+ // empty path
+ rb = bundle;
+ } else {
+ rb = ures_getByKeyWithFallback(bundle, path, tempFillIn, &errorCode);
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
+ }
+ value.setData(&rb->fResData);
+ value.setResource(rb->fRes, ResourceTracer(rb));
+}
+
+U_CAPI void U_EXPORT2
+ures_getAllItemsWithFallback(const UResourceBundle *bundle, const char *path,
+ icu::ResourceSink &sink, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return; }
+ if (path == nullptr) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ StackUResourceBundle stackBundle;
+ const UResourceBundle *rb;
+ if (*path == 0) {
+ // empty path
+ rb = bundle;
+ } else {
+ rb = ures_getByKeyWithFallback(bundle, path, stackBundle.getAlias(), &errorCode);
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
+ }
+ // Get all table items with fallback.
+ ResourceDataValue value;
+ getAllItemsWithFallback(rb, value, sink, errorCode);
+}
+
+U_CAPI UResourceBundle* U_EXPORT2 ures_getByKey(const UResourceBundle *resB, const char* inKey, UResourceBundle *fillIn, UErrorCode *status) {
+ Resource res = RES_BOGUS;
+ UResourceDataEntry *realData = NULL;
+ const char *key = inKey;
+
+ if (status==NULL || U_FAILURE(*status)) {
+ return fillIn;
+ }
+ if(resB == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return fillIn;
+ }
+
+ int32_t type = RES_GET_TYPE(resB->fRes);
+ if(URES_IS_TABLE(type)) {
+ int32_t t;
+ res = res_getTableItemByKey(&(resB->fResData), resB->fRes, &t, &key);
+ if(res == RES_BOGUS) {
+ key = inKey;
+ if(resB->fHasFallback == TRUE) {
+ const ResourceData *rd = getFallbackData(resB, &key, &realData, &res, status);
+ if(U_SUCCESS(*status)) {
+ /* check if resB->fResPath gives the right name here */
+ return init_resb_result(rd, res, key, -1, realData, resB, 0, fillIn, status);
+ } else {
+ *status = U_MISSING_RESOURCE_ERROR;
+ }
+ } else {
+ *status = U_MISSING_RESOURCE_ERROR;
+ }
+ } else {
+ return init_resb_result(&(resB->fResData), res, key, -1, resB->fData, resB, 0, fillIn, status);
+ }
+ }
+#if 0
+ /* this is a kind of TODO item. If we have an array with an index table, we could do this. */
+ /* not currently */
+ else if(RES_GET_TYPE(resB->fRes) == URES_ARRAY && resB->fHasFallback == TRUE) {
+ /* here should go a first attempt to locate the key using index table */
+ const ResourceData *rd = getFallbackData(resB, &key, &realData, &res, status);
+ if(U_SUCCESS(*status)) {
+ return init_resb_result(rd, res, key, realData, resB, fillIn, status);
+ } else {
+ *status = U_MISSING_RESOURCE_ERROR;
+ }
+ }
+#endif
+ else {
+ *status = U_RESOURCE_TYPE_MISMATCH;
+ }
+ return fillIn;
+}
+
+U_CAPI const UChar* U_EXPORT2 ures_getStringByKey(const UResourceBundle *resB, const char* inKey, int32_t* len, UErrorCode *status) {
+ Resource res = RES_BOGUS;
+ UResourceDataEntry *realData = NULL;
+ const char* key = inKey;
+
+ if (status==NULL || U_FAILURE(*status)) {
+ return NULL;
+ }
+ if(resB == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+
+ int32_t type = RES_GET_TYPE(resB->fRes);
+ if(URES_IS_TABLE(type)) {
+ int32_t t=0;
+
+ res = res_getTableItemByKey(&(resB->fResData), resB->fRes, &t, &key);
+
+ if(res == RES_BOGUS) {
+ key = inKey;
+ if(resB->fHasFallback == TRUE) {
+ const ResourceData *rd = getFallbackData(resB, &key, &realData, &res, status);
+ if(U_SUCCESS(*status)) {
+ switch (RES_GET_TYPE(res)) {
+ case URES_STRING:
+ case URES_STRING_V2:
+ return res_getString({resB, key}, rd, res, len);
+ case URES_ALIAS:
+ {
+ const UChar* result = 0;
+ UResourceBundle *tempRes = ures_getByKey(resB, inKey, NULL, status);
+ result = ures_getString(tempRes, len, status);
+ ures_close(tempRes);
+ return result;
+ }
+ default:
+ *status = U_RESOURCE_TYPE_MISMATCH;
+ }
+ } else {
+ *status = U_MISSING_RESOURCE_ERROR;
+ }
+ } else {
+ *status = U_MISSING_RESOURCE_ERROR;
+ }
+ } else {
+ switch (RES_GET_TYPE(res)) {
+ case URES_STRING:
+ case URES_STRING_V2:
+ return res_getString({resB, key}, &(resB->fResData), res, len);
+ case URES_ALIAS:
+ {
+ const UChar* result = 0;
+ UResourceBundle *tempRes = ures_getByKey(resB, inKey, NULL, status);
+ result = ures_getString(tempRes, len, status);
+ ures_close(tempRes);
+ return result;
+ }
+ default:
+ *status = U_RESOURCE_TYPE_MISMATCH;
+ }
+ }
+ }
+#if 0
+ /* this is a kind of TODO item. If we have an array with an index table, we could do this. */
+ /* not currently */
+ else if(RES_GET_TYPE(resB->fRes) == URES_ARRAY && resB->fHasFallback == TRUE) {
+ /* here should go a first attempt to locate the key using index table */
+ const ResourceData *rd = getFallbackData(resB, &key, &realData, &res, status);
+ if(U_SUCCESS(*status)) {
+ // TODO: Tracing
+ return res_getString(rd, res, len);
+ } else {
+ *status = U_MISSING_RESOURCE_ERROR;
+ }
+ }
+#endif
+ else {
+ *status = U_RESOURCE_TYPE_MISMATCH;
+ }
+ return NULL;
+}
+
+U_CAPI const char * U_EXPORT2
+ures_getUTF8StringByKey(const UResourceBundle *resB,
+ const char *key,
+ char *dest, int32_t *pLength,
+ UBool forceCopy,
+ UErrorCode *status) {
+ int32_t length16;
+ const UChar *s16 = ures_getStringByKey(resB, key, &length16, status);
+ return ures_toUTF8String(s16, length16, dest, pLength, forceCopy, status);
+}
+
+/* TODO: clean from here down */
+
+/**
+ * INTERNAL: Get the name of the first real locale (not placeholder)
+ * that has resource bundle data.
+ */
+U_CAPI const char* U_EXPORT2
+ures_getLocaleInternal(const UResourceBundle* resourceBundle, UErrorCode* status)
+{
+ if (status==NULL || U_FAILURE(*status)) {
+ return NULL;
+ }
+ if (!resourceBundle) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ } else {
+ return resourceBundle->fData->fName;
+ }
+}
+
+U_CAPI const char* U_EXPORT2
+ures_getLocale(const UResourceBundle* resourceBundle,
+ UErrorCode* status)
+{
+ return ures_getLocaleInternal(resourceBundle, status);
+}
+
+
+U_CAPI const char* U_EXPORT2
+ures_getLocaleByType(const UResourceBundle* resourceBundle,
+ ULocDataLocaleType type,
+ UErrorCode* status) {
+ if (status==NULL || U_FAILURE(*status)) {
+ return NULL;
+ }
+ if (!resourceBundle) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ } else {
+ switch(type) {
+ case ULOC_ACTUAL_LOCALE:
+ return resourceBundle->fData->fName;
+ case ULOC_VALID_LOCALE:
+ return resourceBundle->fTopLevelData->fName;
+ case ULOC_REQUESTED_LOCALE:
+ default:
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+ }
+}
+
+U_CFUNC const char* ures_getName(const UResourceBundle* resB) {
+ if(resB == NULL) {
+ return NULL;
+ }
+
+ return resB->fData->fName;
+}
+
+#ifdef URES_DEBUG
+U_CFUNC const char* ures_getPath(const UResourceBundle* resB) {
+ if(resB == NULL) {
+ return NULL;
+ }
+
+ return resB->fData->fPath;
+}
+#endif
+
+static UResourceBundle*
+ures_openWithType(UResourceBundle *r, const char* path, const char* localeID,
+ UResOpenType openType, UErrorCode* status) {
+ if(U_FAILURE(*status)) {
+ return NULL;
+ }
+
+ UResourceDataEntry *entry;
+ if(openType != URES_OPEN_DIRECT) {
+ /* first "canonicalize" the locale ID */
+ char canonLocaleID[ULOC_FULLNAME_CAPACITY];
+ uloc_getBaseName(localeID, canonLocaleID, UPRV_LENGTHOF(canonLocaleID), status);
+ if(U_FAILURE(*status) || *status == U_STRING_NOT_TERMINATED_WARNING) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+ entry = entryOpen(path, canonLocaleID, openType, status);
+ } else {
+ entry = entryOpenDirect(path, localeID, status);
+ }
+ if(U_FAILURE(*status)) {
+ return NULL;
+ }
+ if(entry == NULL) {
+ *status = U_MISSING_RESOURCE_ERROR;
+ return NULL;
+ }
+
+ UBool isStackObject;
+ if(r == NULL) {
+ r = (UResourceBundle *)uprv_malloc(sizeof(UResourceBundle));
+ if(r == NULL) {
+ entryClose(entry);
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ isStackObject = FALSE;
+ } else { // fill-in
+ isStackObject = ures_isStackObject(r);
+ ures_closeBundle(r, FALSE);
+ }
+ uprv_memset(r, 0, sizeof(UResourceBundle));
+ ures_setIsStackObject(r, isStackObject);
+
+ r->fTopLevelData = r->fData = entry;
+ uprv_memcpy(&r->fResData, &entry->fData, sizeof(ResourceData));
+ r->fHasFallback = openType != URES_OPEN_DIRECT && !r->fResData.noFallback;
+ r->fIsTopLevel = TRUE;
+ r->fRes = r->fResData.rootRes;
+ r->fSize = res_countArrayItems(&(r->fResData), r->fRes);
+ r->fIndex = -1;
+
+ ResourceTracer(r).traceOpen();
+
+ return r;
+}
+
+U_CAPI UResourceBundle* U_EXPORT2
+ures_open(const char* path, const char* localeID, UErrorCode* status) {
+ return ures_openWithType(NULL, path, localeID, URES_OPEN_LOCALE_DEFAULT_ROOT, status);
+}
+
+U_CAPI UResourceBundle* U_EXPORT2
+ures_openNoDefault(const char* path, const char* localeID, UErrorCode* status) {
+ return ures_openWithType(NULL, path, localeID, URES_OPEN_LOCALE_ROOT, status);
+}
+
+/**
+ * Opens a resource bundle without "canonicalizing" the locale name. No fallback will be performed
+ * or sought. However, alias substitution will happen!
+ */
+U_CAPI UResourceBundle* U_EXPORT2
+ures_openDirect(const char* path, const char* localeID, UErrorCode* status) {
+ return ures_openWithType(NULL, path, localeID, URES_OPEN_DIRECT, status);
+}
+
+/**
+ * Internal API: This function is used to open a resource bundle
+ * proper fallback chaining is executed while initialization.
+ * The result is stored in cache for later fallback search.
+ *
+ * Same as ures_open(), but uses the fill-in parameter and does not allocate a new bundle.
+ */
+U_CAPI void U_EXPORT2
+ures_openFillIn(UResourceBundle *r, const char* path,
+ const char* localeID, UErrorCode* status) {
+ if(U_SUCCESS(*status) && r == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ ures_openWithType(r, path, localeID, URES_OPEN_LOCALE_DEFAULT_ROOT, status);
+}
+
+/**
+ * Same as ures_openDirect(), but uses the fill-in parameter and does not allocate a new bundle.
+ */
+U_CAPI void U_EXPORT2
+ures_openDirectFillIn(UResourceBundle *r, const char* path, const char* localeID, UErrorCode* status) {
+ if(U_SUCCESS(*status) && r == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ ures_openWithType(r, path, localeID, URES_OPEN_DIRECT, status);
+}
+
+/**
+ * API: Counts members. For arrays and tables, returns number of resources.
+ * For strings, returns 1.
+ */
+U_CAPI int32_t U_EXPORT2
+ures_countArrayItems(const UResourceBundle* resourceBundle,
+ const char* resourceKey,
+ UErrorCode* status)
+{
+ UResourceBundle resData;
+ ures_initStackObject(&resData);
+ if (status==NULL || U_FAILURE(*status)) {
+ return 0;
+ }
+ if(resourceBundle == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ ures_getByKey(resourceBundle, resourceKey, &resData, status);
+
+ if(resData.fResData.data != NULL) {
+ int32_t result = res_countArrayItems(&resData.fResData, resData.fRes);
+ ures_close(&resData);
+ return result;
+ } else {
+ *status = U_MISSING_RESOURCE_ERROR;
+ ures_close(&resData);
+ return 0;
+ }
+}
+
+/**
+ * Internal function.
+ * Return the version number associated with this ResourceBundle as a string.
+ *
+ * @param resourceBundle The resource bundle for which the version is checked.
+ * @return A version number string as specified in the resource bundle or its parent.
+ * The caller does not own this string.
+ * @see ures_getVersion
+ * @internal
+ */
+U_CAPI const char* U_EXPORT2
+ures_getVersionNumberInternal(const UResourceBundle *resourceBundle)
+{
+ if (!resourceBundle) return NULL;
+
+ if(resourceBundle->fVersion == NULL) {
+
+ /* If the version ID has not been built yet, then do so. Retrieve */
+ /* the minor version from the file. */
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t minor_len = 0;
+ int32_t len;
+
+ const UChar* minor_version = ures_getStringByKey(resourceBundle, kVersionTag, &minor_len, &status);
+
+ /* Determine the length of of the final version string. This is */
+ /* the length of the major part + the length of the separator */
+ /* (==1) + the length of the minor part (+ 1 for the zero byte at */
+ /* the end). */
+
+ len = (minor_len > 0) ? minor_len : 1;
+
+ /* Allocate the string, and build it up. */
+ /* + 1 for zero byte */
+
+
+ ((UResourceBundle *)resourceBundle)->fVersion = (char *)uprv_malloc(1 + len);
+ /* Check for null pointer. */
+ if (((UResourceBundle *)resourceBundle)->fVersion == NULL) {
+ return NULL;
+ }
+
+ if(minor_len > 0) {
+ u_UCharsToChars(minor_version, resourceBundle->fVersion , minor_len);
+ resourceBundle->fVersion[len] = '\0';
+ }
+ else {
+ uprv_strcpy(resourceBundle->fVersion, kDefaultMinorVersion);
+ }
+ }
+
+ return resourceBundle->fVersion;
+}
+
+U_CAPI const char* U_EXPORT2
+ures_getVersionNumber(const UResourceBundle* resourceBundle)
+{
+ return ures_getVersionNumberInternal(resourceBundle);
+}
+
+U_CAPI void U_EXPORT2 ures_getVersion(const UResourceBundle* resB, UVersionInfo versionInfo) {
+ if (!resB) return;
+
+ u_versionFromString(versionInfo, ures_getVersionNumberInternal(resB));
+}
+
+/** Tree support functions *******************************/
+#define INDEX_LOCALE_NAME "res_index"
+#define INDEX_TAG "InstalledLocales"
+#define DEFAULT_TAG "default"
+
+#if defined(URES_TREE_DEBUG)
+#include <stdio.h>
+#endif
+
+typedef struct ULocalesContext {
+ UResourceBundle installed;
+ UResourceBundle curr;
+} ULocalesContext;
+
+static void U_CALLCONV
+ures_loc_closeLocales(UEnumeration *enumerator) {
+ ULocalesContext *ctx = (ULocalesContext *)enumerator->context;
+ ures_close(&ctx->curr);
+ ures_close(&ctx->installed);
+ uprv_free(ctx);
+ uprv_free(enumerator);
+}
+
+static int32_t U_CALLCONV
+ures_loc_countLocales(UEnumeration *en, UErrorCode * /*status*/) {
+ ULocalesContext *ctx = (ULocalesContext *)en->context;
+ return ures_getSize(&ctx->installed);
+}
+
+U_CDECL_BEGIN
+
+
+static const char * U_CALLCONV
+ures_loc_nextLocale(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* status) {
+ ULocalesContext *ctx = (ULocalesContext *)en->context;
+ UResourceBundle *res = &(ctx->installed);
+ UResourceBundle *k = NULL;
+ const char *result = NULL;
+ int32_t len = 0;
+ if(ures_hasNext(res) && (k = ures_getNextResource(res, &ctx->curr, status)) != 0) {
+ result = ures_getKey(k);
+ len = (int32_t)uprv_strlen(result);
+ }
+ if (resultLength) {
+ *resultLength = len;
+ }
+ return result;
+}
+
+static void U_CALLCONV
+ures_loc_resetLocales(UEnumeration* en,
+ UErrorCode* /*status*/) {
+ UResourceBundle *res = &((ULocalesContext *)en->context)->installed;
+ ures_resetIterator(res);
+}
+
+U_CDECL_END
+
+static const UEnumeration gLocalesEnum = {
+ NULL,
+ NULL,
+ ures_loc_closeLocales,
+ ures_loc_countLocales,
+ uenum_unextDefault,
+ ures_loc_nextLocale,
+ ures_loc_resetLocales
+};
+
+
+U_CAPI UEnumeration* U_EXPORT2
+ures_openAvailableLocales(const char *path, UErrorCode *status)
+{
+ UResourceBundle *idx = NULL;
+ UEnumeration *en = NULL;
+ ULocalesContext *myContext = NULL;
+
+ if(U_FAILURE(*status)) {
+ return NULL;
+ }
+ myContext = static_cast<ULocalesContext *>(uprv_malloc(sizeof(ULocalesContext)));
+ en = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
+ if(!en || !myContext) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ uprv_free(en);
+ uprv_free(myContext);
+ return NULL;
+ }
+ uprv_memcpy(en, &gLocalesEnum, sizeof(UEnumeration));
+
+ ures_initStackObject(&myContext->installed);
+ ures_initStackObject(&myContext->curr);
+ idx = ures_openDirect(path, INDEX_LOCALE_NAME, status);
+ ures_getByKey(idx, INDEX_TAG, &myContext->installed, status);
+ if(U_SUCCESS(*status)) {
+#if defined(URES_TREE_DEBUG)
+ fprintf(stderr, "Got %s::%s::[%s] : %s\n",
+ path, INDEX_LOCALE_NAME, INDEX_TAG, ures_getKey(&myContext->installed));
+#endif
+ en->context = myContext;
+ } else {
+#if defined(URES_TREE_DEBUG)
+ fprintf(stderr, "%s open failed - %s\n", path, u_errorName(*status));
+#endif
+ ures_close(&myContext->installed);
+ uprv_free(myContext);
+ uprv_free(en);
+ en = NULL;
+ }
+
+ ures_close(idx);
+
+ return en;
+}
+
+static UBool isLocaleInList(UEnumeration *locEnum, const char *locToSearch, UErrorCode *status) {
+ const char *loc;
+ while ((loc = uenum_next(locEnum, NULL, status)) != NULL) {
+ if (uprv_strcmp(loc, locToSearch) == 0) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+U_CAPI int32_t U_EXPORT2
+ures_getFunctionalEquivalent(char *result, int32_t resultCapacity,
+ const char *path, const char *resName, const char *keyword, const char *locid,
+ UBool *isAvailable, UBool omitDefault, UErrorCode *status)
+{
+ char kwVal[1024] = ""; /* value of keyword 'keyword' */
+ char defVal[1024] = ""; /* default value for given locale */
+ char defLoc[1024] = ""; /* default value for given locale */
+ char base[1024] = ""; /* base locale */
+ char found[1024] = "";
+ char parent[1024] = "";
+ char full[1024] = "";
+ UResourceBundle bund1, bund2;
+ UResourceBundle *res = NULL;
+ UErrorCode subStatus = U_ZERO_ERROR;
+ int32_t length = 0;
+ if(U_FAILURE(*status)) return 0;
+ uloc_getKeywordValue(locid, keyword, kwVal, 1024-1,&subStatus);
+ if(!uprv_strcmp(kwVal, DEFAULT_TAG)) {
+ kwVal[0]=0;
+ }
+ uloc_getBaseName(locid, base, 1024-1,&subStatus);
+#if defined(URES_TREE_DEBUG)
+ fprintf(stderr, "getFunctionalEquivalent: \"%s\" [%s=%s] in %s - %s\n",
+ locid, keyword, kwVal, base, u_errorName(subStatus));
+#endif
+ ures_initStackObject(&bund1);
+ ures_initStackObject(&bund2);
+
+
+ uprv_strcpy(parent, base);
+ uprv_strcpy(found, base);
+
+ if(isAvailable) {
+ UEnumeration *locEnum = ures_openAvailableLocales(path, &subStatus);
+ *isAvailable = TRUE;
+ if (U_SUCCESS(subStatus)) {
+ *isAvailable = isLocaleInList(locEnum, parent, &subStatus);
+ }
+ uenum_close(locEnum);
+ }
+
+ if(U_FAILURE(subStatus)) {
+ *status = subStatus;
+ return 0;
+ }
+
+ do {
+ subStatus = U_ZERO_ERROR;
+ res = ures_open(path, parent, &subStatus);
+ if(((subStatus == U_USING_FALLBACK_WARNING) ||
+ (subStatus == U_USING_DEFAULT_WARNING)) && isAvailable)
+ {
+ *isAvailable = FALSE;
+ }
+ isAvailable = NULL; /* only want to set this the first time around */
+
+#if defined(URES_TREE_DEBUG)
+ fprintf(stderr, "%s;%s -> %s [%s]\n", path?path:"ICUDATA", parent, u_errorName(subStatus), ures_getLocale(res, &subStatus));
+#endif
+ if(U_FAILURE(subStatus)) {
+ *status = subStatus;
+ } else if(subStatus == U_ZERO_ERROR) {
+ ures_getByKey(res,resName,&bund1, &subStatus);
+ if(subStatus == U_ZERO_ERROR) {
+ const UChar *defUstr;
+ int32_t defLen;
+ /* look for default item */
+#if defined(URES_TREE_DEBUG)
+ fprintf(stderr, "%s;%s : loaded default -> %s\n",
+ path?path:"ICUDATA", parent, u_errorName(subStatus));
+#endif
+ defUstr = ures_getStringByKey(&bund1, DEFAULT_TAG, &defLen, &subStatus);
+ if(U_SUCCESS(subStatus) && defLen) {
+ u_UCharsToChars(defUstr, defVal, u_strlen(defUstr));
+#if defined(URES_TREE_DEBUG)
+ fprintf(stderr, "%s;%s -> default %s=%s, %s\n",
+ path?path:"ICUDATA", parent, keyword, defVal, u_errorName(subStatus));
+#endif
+ uprv_strcpy(defLoc, parent);
+ if(kwVal[0]==0) {
+ uprv_strcpy(kwVal, defVal);
+#if defined(URES_TREE_DEBUG)
+ fprintf(stderr, "%s;%s -> kwVal = %s\n",
+ path?path:"ICUDATA", parent, keyword, kwVal);
+#endif
+ }
+ }
+ }
+ }
+
+ subStatus = U_ZERO_ERROR;
+
+ if (res != NULL) {
+ uprv_strcpy(found, ures_getLocaleByType(res, ULOC_VALID_LOCALE, &subStatus));
+ }
+
+ uloc_getParent(found,parent,sizeof(parent),&subStatus);
+ ures_close(res);
+ } while(!defVal[0] && *found && uprv_strcmp(found, "root") != 0 && U_SUCCESS(*status));
+
+ /* Now, see if we can find the kwVal collator.. start the search over.. */
+ uprv_strcpy(parent, base);
+ uprv_strcpy(found, base);
+
+ do {
+ subStatus = U_ZERO_ERROR;
+ res = ures_open(path, parent, &subStatus);
+ if((subStatus == U_USING_FALLBACK_WARNING) && isAvailable) {
+ *isAvailable = FALSE;
+ }
+ isAvailable = NULL; /* only want to set this the first time around */
+
+#if defined(URES_TREE_DEBUG)
+ fprintf(stderr, "%s;%s -> %s (looking for %s)\n",
+ path?path:"ICUDATA", parent, u_errorName(subStatus), kwVal);
+#endif
+ if(U_FAILURE(subStatus)) {
+ *status = subStatus;
+ } else if(subStatus == U_ZERO_ERROR) {
+ ures_getByKey(res,resName,&bund1, &subStatus);
+#if defined(URES_TREE_DEBUG)
+/**/ fprintf(stderr,"@%d [%s] %s\n", __LINE__, resName, u_errorName(subStatus));
+#endif
+ if(subStatus == U_ZERO_ERROR) {
+ ures_getByKey(&bund1, kwVal, &bund2, &subStatus);
+#if defined(URES_TREE_DEBUG)
+/**/ fprintf(stderr,"@%d [%s] %s\n", __LINE__, kwVal, u_errorName(subStatus));
+#endif
+ if(subStatus == U_ZERO_ERROR) {
+#if defined(URES_TREE_DEBUG)
+ fprintf(stderr, "%s;%s -> full0 %s=%s, %s\n",
+ path?path:"ICUDATA", parent, keyword, kwVal, u_errorName(subStatus));
+#endif
+ uprv_strcpy(full, parent);
+ if(*full == 0) {
+ uprv_strcpy(full, "root");
+ }
+ /* now, recalculate default kw if need be */
+ if(uprv_strlen(defLoc) > uprv_strlen(full)) {
+ const UChar *defUstr;
+ int32_t defLen;
+ /* look for default item */
+#if defined(URES_TREE_DEBUG)
+ fprintf(stderr, "%s;%s -> recalculating Default0\n",
+ path?path:"ICUDATA", full);
+#endif
+ defUstr = ures_getStringByKey(&bund1, DEFAULT_TAG, &defLen, &subStatus);
+ if(U_SUCCESS(subStatus) && defLen) {
+ u_UCharsToChars(defUstr, defVal, u_strlen(defUstr));
+#if defined(URES_TREE_DEBUG)
+ fprintf(stderr, "%s;%s -> default0 %s=%s, %s\n",
+ path?path:"ICUDATA", full, keyword, defVal, u_errorName(subStatus));
+#endif
+ uprv_strcpy(defLoc, full);
+ }
+ } /* end of recalculate default KW */
+#if defined(URES_TREE_DEBUG)
+ else {
+ fprintf(stderr, "No trim0, %s <= %s\n", defLoc, full);
+ }
+#endif
+ } else {
+#if defined(URES_TREE_DEBUG)
+ fprintf(stderr, "err=%s in %s looking for %s\n",
+ u_errorName(subStatus), parent, kwVal);
+#endif
+ }
+ }
+ }
+
+ subStatus = U_ZERO_ERROR;
+
+ uprv_strcpy(found, parent);
+ uloc_getParent(found,parent,1023,&subStatus);
+ ures_close(res);
+ } while(!full[0] && *found && U_SUCCESS(*status));
+
+ if((full[0]==0) && uprv_strcmp(kwVal, defVal)) {
+#if defined(URES_TREE_DEBUG)
+ fprintf(stderr, "Failed to locate kw %s - try default %s\n", kwVal, defVal);
+#endif
+ uprv_strcpy(kwVal, defVal);
+ uprv_strcpy(parent, base);
+ uprv_strcpy(found, base);
+
+ do { /* search for 'default' named item */
+ subStatus = U_ZERO_ERROR;
+ res = ures_open(path, parent, &subStatus);
+ if((subStatus == U_USING_FALLBACK_WARNING) && isAvailable) {
+ *isAvailable = FALSE;
+ }
+ isAvailable = NULL; /* only want to set this the first time around */
+
+#if defined(URES_TREE_DEBUG)
+ fprintf(stderr, "%s;%s -> %s (looking for default %s)\n",
+ path?path:"ICUDATA", parent, u_errorName(subStatus), kwVal);
+#endif
+ if(U_FAILURE(subStatus)) {
+ *status = subStatus;
+ } else if(subStatus == U_ZERO_ERROR) {
+ ures_getByKey(res,resName,&bund1, &subStatus);
+ if(subStatus == U_ZERO_ERROR) {
+ ures_getByKey(&bund1, kwVal, &bund2, &subStatus);
+ if(subStatus == U_ZERO_ERROR) {
+#if defined(URES_TREE_DEBUG)
+ fprintf(stderr, "%s;%s -> full1 %s=%s, %s\n", path?path:"ICUDATA",
+ parent, keyword, kwVal, u_errorName(subStatus));
+#endif
+ uprv_strcpy(full, parent);
+ if(*full == 0) {
+ uprv_strcpy(full, "root");
+ }
+
+ /* now, recalculate default kw if need be */
+ if(uprv_strlen(defLoc) > uprv_strlen(full)) {
+ const UChar *defUstr;
+ int32_t defLen;
+ /* look for default item */
+#if defined(URES_TREE_DEBUG)
+ fprintf(stderr, "%s;%s -> recalculating Default1\n",
+ path?path:"ICUDATA", full);
+#endif
+ defUstr = ures_getStringByKey(&bund1, DEFAULT_TAG, &defLen, &subStatus);
+ if(U_SUCCESS(subStatus) && defLen) {
+ u_UCharsToChars(defUstr, defVal, u_strlen(defUstr));
+#if defined(URES_TREE_DEBUG)
+ fprintf(stderr, "%s;%s -> default %s=%s, %s\n",
+ path?path:"ICUDATA", full, keyword, defVal, u_errorName(subStatus));
+#endif
+ uprv_strcpy(defLoc, full);
+ }
+ } /* end of recalculate default KW */
+#if defined(URES_TREE_DEBUG)
+ else {
+ fprintf(stderr, "No trim1, %s <= %s\n", defLoc, full);
+ }
+#endif
+ }
+ }
+ }
+ subStatus = U_ZERO_ERROR;
+
+ uprv_strcpy(found, parent);
+ uloc_getParent(found,parent,1023,&subStatus);
+ ures_close(res);
+ } while(!full[0] && *found && U_SUCCESS(*status));
+ }
+
+ if(U_SUCCESS(*status)) {
+ if(!full[0]) {
+#if defined(URES_TREE_DEBUG)
+ fprintf(stderr, "Still could not load keyword %s=%s\n", keyword, kwVal);
+#endif
+ *status = U_MISSING_RESOURCE_ERROR;
+ } else if(omitDefault) {
+#if defined(URES_TREE_DEBUG)
+ fprintf(stderr,"Trim? full=%s, defLoc=%s, found=%s\n", full, defLoc, found);
+#endif
+ if(uprv_strlen(defLoc) <= uprv_strlen(full)) {
+ /* found the keyword in a *child* of where the default tag was present. */
+ if(!uprv_strcmp(kwVal, defVal)) { /* if the requested kw is default, */
+ /* and the default is in or in an ancestor of the current locale */
+#if defined(URES_TREE_DEBUG)
+ fprintf(stderr, "Removing unneeded var %s=%s\n", keyword, kwVal);
+#endif
+ kwVal[0]=0;
+ }
+ }
+ }
+ uprv_strcpy(found, full);
+ if(kwVal[0]) {
+ uprv_strcat(found, "@");
+ uprv_strcat(found, keyword);
+ uprv_strcat(found, "=");
+ uprv_strcat(found, kwVal);
+ } else if(!omitDefault) {
+ uprv_strcat(found, "@");
+ uprv_strcat(found, keyword);
+ uprv_strcat(found, "=");
+ uprv_strcat(found, defVal);
+ }
+ }
+ /* we found the default locale - no need to repeat it.*/
+
+ ures_close(&bund1);
+ ures_close(&bund2);
+
+ length = (int32_t)uprv_strlen(found);
+
+ if(U_SUCCESS(*status)) {
+ int32_t copyLength = uprv_min(length, resultCapacity);
+ if(copyLength>0) {
+ uprv_strncpy(result, found, copyLength);
+ }
+ if(length == 0) {
+ *status = U_MISSING_RESOURCE_ERROR;
+ }
+ } else {
+ length = 0;
+ result[0]=0;
+ }
+ return u_terminateChars(result, resultCapacity, length, status);
+}
+
+U_CAPI UEnumeration* U_EXPORT2
+ures_getKeywordValues(const char *path, const char *keyword, UErrorCode *status)
+{
+#define VALUES_BUF_SIZE 2048
+#define VALUES_LIST_SIZE 512
+
+ char valuesBuf[VALUES_BUF_SIZE];
+ int32_t valuesIndex = 0;
+ const char *valuesList[VALUES_LIST_SIZE];
+ int32_t valuesCount = 0;
+
+ const char *locale;
+ int32_t locLen;
+
+ UEnumeration *locs = NULL;
+
+ UResourceBundle item;
+ UResourceBundle subItem;
+
+ ures_initStackObject(&item);
+ ures_initStackObject(&subItem);
+ locs = ures_openAvailableLocales(path, status);
+
+ if(U_FAILURE(*status)) {
+ ures_close(&item);
+ ures_close(&subItem);
+ return NULL;
+ }
+
+ valuesBuf[0]=0;
+ valuesBuf[1]=0;
+
+ while((locale = uenum_next(locs, &locLen, status)) != 0) {
+ UResourceBundle *bund = NULL;
+ UResourceBundle *subPtr = NULL;
+ UErrorCode subStatus = U_ZERO_ERROR; /* don't fail if a bundle is unopenable */
+ bund = ures_openDirect(path, locale, &subStatus);
+
+#if defined(URES_TREE_DEBUG)
+ if(!bund || U_FAILURE(subStatus)) {
+ fprintf(stderr, "%s-%s values: Can't open %s locale - skipping. (%s)\n",
+ path?path:"<ICUDATA>", keyword, locale, u_errorName(subStatus));
+ }
+#endif
+
+ ures_getByKey(bund, keyword, &item, &subStatus);
+
+ if(!bund || U_FAILURE(subStatus)) {
+#if defined(URES_TREE_DEBUG)
+ fprintf(stderr, "%s-%s values: Can't find in %s - skipping. (%s)\n",
+ path?path:"<ICUDATA>", keyword, locale, u_errorName(subStatus));
+#endif
+ ures_close(bund);
+ bund = NULL;
+ continue;
+ }
+
+ while((subPtr = ures_getNextResource(&item,&subItem,&subStatus)) != 0
+ && U_SUCCESS(subStatus)) {
+ const char *k;
+ int32_t i;
+ k = ures_getKey(subPtr);
+
+#if defined(URES_TREE_DEBUG)
+ /* fprintf(stderr, "%s | %s | %s | %s\n", path?path:"<ICUDATA>", keyword, locale, k); */
+#endif
+ if(k == NULL || *k == 0 ||
+ uprv_strcmp(k, DEFAULT_TAG) == 0 || uprv_strncmp(k, "private-", 8) == 0) {
+ // empty or "default" or unlisted type
+ continue;
+ }
+ for(i=0; i<valuesCount; i++) {
+ if(!uprv_strcmp(valuesList[i],k)) {
+ k = NULL; /* found duplicate */
+ break;
+ }
+ }
+ if(k != NULL) {
+ int32_t kLen = (int32_t)uprv_strlen(k);
+ if((valuesCount >= (VALUES_LIST_SIZE-1)) || /* no more space in list .. */
+ ((valuesIndex+kLen+1+1) >= VALUES_BUF_SIZE)) { /* no more space in buffer (string + 2 nulls) */
+ *status = U_ILLEGAL_ARGUMENT_ERROR; /* out of space.. */
+ } else {
+ uprv_strcpy(valuesBuf+valuesIndex, k);
+ valuesList[valuesCount++] = valuesBuf+valuesIndex;
+ valuesIndex += kLen;
+#if defined(URES_TREE_DEBUG)
+ fprintf(stderr, "%s | %s | %s | [%s] (UNIQUE)\n",
+ path?path:"<ICUDATA>", keyword, locale, k);
+#endif
+ valuesBuf[valuesIndex++] = 0; /* terminate */
+ }
+ }
+ }
+ ures_close(bund);
+ }
+ valuesBuf[valuesIndex++] = 0; /* terminate */
+
+ ures_close(&item);
+ ures_close(&subItem);
+ uenum_close(locs);
+#if defined(URES_TREE_DEBUG)
+ fprintf(stderr, "%s: size %d, #%d\n", u_errorName(*status),
+ valuesIndex, valuesCount);
+#endif
+ return uloc_openKeywordList(valuesBuf, valuesIndex, status);
+}
+#if 0
+/* This code isn't needed, and given the documentation warnings the implementation is suspect */
+U_CAPI UBool U_EXPORT2
+ures_equal(const UResourceBundle* res1, const UResourceBundle* res2){
+ if(res1==NULL || res2==NULL){
+ return res1==res2; /* pointer comparision */
+ }
+ if(res1->fKey==NULL|| res2->fKey==NULL){
+ return (res1->fKey==res2->fKey);
+ }else{
+ if(uprv_strcmp(res1->fKey, res2->fKey)!=0){
+ return FALSE;
+ }
+ }
+ if(uprv_strcmp(res1->fData->fName, res2->fData->fName)!=0){
+ return FALSE;
+ }
+ if(res1->fData->fPath == NULL|| res2->fData->fPath==NULL){
+ return (res1->fData->fPath == res2->fData->fPath);
+ }else{
+ if(uprv_strcmp(res1->fData->fPath, res2->fData->fPath)!=0){
+ return FALSE;
+ }
+ }
+ if(uprv_strcmp(res1->fData->fParent->fName, res2->fData->fParent->fName)!=0){
+ return FALSE;
+ }
+ if(uprv_strcmp(res1->fData->fParent->fPath, res2->fData->fParent->fPath)!=0){
+ return FALSE;
+ }
+ if(uprv_strncmp(res1->fResPath, res2->fResPath, res1->fResPathLen)!=0){
+ return FALSE;
+ }
+ if(res1->fRes != res2->fRes){
+ return FALSE;
+ }
+ return TRUE;
+}
+U_CAPI UResourceBundle* U_EXPORT2
+ures_clone(const UResourceBundle* res, UErrorCode* status){
+ UResourceBundle* bundle = NULL;
+ UResourceBundle* ret = NULL;
+ if(U_FAILURE(*status) || res == NULL){
+ return NULL;
+ }
+ bundle = ures_open(res->fData->fPath, res->fData->fName, status);
+ if(res->fResPath!=NULL){
+ ret = ures_findSubResource(bundle, res->fResPath, NULL, status);
+ ures_close(bundle);
+ }else{
+ ret = bundle;
+ }
+ return ret;
+}
+U_CAPI const UResourceBundle* U_EXPORT2
+ures_getParentBundle(const UResourceBundle* res){
+ if(res==NULL){
+ return NULL;
+ }
+ return res->fParentRes;
+}
+#endif
+
+U_CAPI void U_EXPORT2
+ures_getVersionByKey(const UResourceBundle* res, const char *key, UVersionInfo ver, UErrorCode *status) {
+ const UChar *str;
+ int32_t len;
+ str = ures_getStringByKey(res, key, &len, status);
+ if(U_SUCCESS(*status)) {
+ u_versionFromUString(ver, str);
+ }
+}
+
+/* eof */
diff --git a/thirdparty/icu4c/common/uresdata.cpp b/thirdparty/icu4c/common/uresdata.cpp
new file mode 100644
index 0000000000..ae731e4544
--- /dev/null
+++ b/thirdparty/icu4c/common/uresdata.cpp
@@ -0,0 +1,1518 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 1999-2016, International Business Machines Corporation
+* and others. All Rights Reserved.
+*******************************************************************************
+* file name: uresdata.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999dec08
+* created by: Markus W. Scherer
+* Modification History:
+*
+* Date Name Description
+* 06/20/2000 helena OS/400 port changes; mostly typecast.
+* 06/24/02 weiv Added support for resource sharing
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/udata.h"
+#include "unicode/ustring.h"
+#include "unicode/utf16.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "resource.h"
+#include "uarrsort.h"
+#include "uassert.h"
+#include "ucol_swp.h"
+#include "udataswp.h"
+#include "uinvchar.h"
+#include "uresdata.h"
+#include "uresimp.h"
+#include "utracimp.h"
+
+/*
+ * Resource access helpers
+ */
+
+/* get a const char* pointer to the key with the keyOffset byte offset from pRoot */
+#define RES_GET_KEY16(pResData, keyOffset) \
+ ((keyOffset)<(pResData)->localKeyLimit ? \
+ (const char *)(pResData)->pRoot+(keyOffset) : \
+ (pResData)->poolBundleKeys+(keyOffset)-(pResData)->localKeyLimit)
+
+#define RES_GET_KEY32(pResData, keyOffset) \
+ ((keyOffset)>=0 ? \
+ (const char *)(pResData)->pRoot+(keyOffset) : \
+ (pResData)->poolBundleKeys+((keyOffset)&0x7fffffff))
+
+#define URESDATA_ITEM_NOT_FOUND -1
+
+/* empty resources, returned when the resource offset is 0 */
+static const uint16_t gEmpty16=0;
+
+static const struct {
+ int32_t length;
+ int32_t res;
+} gEmpty32={ 0, 0 };
+
+static const struct {
+ int32_t length;
+ UChar nul;
+ UChar pad;
+} gEmptyString={ 0, 0, 0 };
+
+/*
+ * All the type-access functions assume that
+ * the resource is of the expected type.
+ */
+
+static int32_t
+_res_findTableItem(const ResourceData *pResData, const uint16_t *keyOffsets, int32_t length,
+ const char *key, const char **realKey) {
+ const char *tableKey;
+ int32_t mid, start, limit;
+ int result;
+
+ /* do a binary search for the key */
+ start=0;
+ limit=length;
+ while(start<limit) {
+ mid = (start + limit) / 2;
+ tableKey = RES_GET_KEY16(pResData, keyOffsets[mid]);
+ if (pResData->useNativeStrcmp) {
+ result = uprv_strcmp(key, tableKey);
+ } else {
+ result = uprv_compareInvCharsAsAscii(key, tableKey);
+ }
+ if (result < 0) {
+ limit = mid;
+ } else if (result > 0) {
+ start = mid + 1;
+ } else {
+ /* We found it! */
+ *realKey=tableKey;
+ return mid;
+ }
+ }
+ return URESDATA_ITEM_NOT_FOUND; /* not found or table is empty. */
+}
+
+static int32_t
+_res_findTable32Item(const ResourceData *pResData, const int32_t *keyOffsets, int32_t length,
+ const char *key, const char **realKey) {
+ const char *tableKey;
+ int32_t mid, start, limit;
+ int result;
+
+ /* do a binary search for the key */
+ start=0;
+ limit=length;
+ while(start<limit) {
+ mid = (start + limit) / 2;
+ tableKey = RES_GET_KEY32(pResData, keyOffsets[mid]);
+ if (pResData->useNativeStrcmp) {
+ result = uprv_strcmp(key, tableKey);
+ } else {
+ result = uprv_compareInvCharsAsAscii(key, tableKey);
+ }
+ if (result < 0) {
+ limit = mid;
+ } else if (result > 0) {
+ start = mid + 1;
+ } else {
+ /* We found it! */
+ *realKey=tableKey;
+ return mid;
+ }
+ }
+ return URESDATA_ITEM_NOT_FOUND; /* not found or table is empty. */
+}
+
+/* helper for res_load() ---------------------------------------------------- */
+
+static UBool U_CALLCONV
+isAcceptable(void *context,
+ const char * /*type*/, const char * /*name*/,
+ const UDataInfo *pInfo) {
+ uprv_memcpy(context, pInfo->formatVersion, 4);
+ return (UBool)(
+ pInfo->size>=20 &&
+ pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
+ pInfo->charsetFamily==U_CHARSET_FAMILY &&
+ pInfo->sizeofUChar==U_SIZEOF_UCHAR &&
+ pInfo->dataFormat[0]==0x52 && /* dataFormat="ResB" */
+ pInfo->dataFormat[1]==0x65 &&
+ pInfo->dataFormat[2]==0x73 &&
+ pInfo->dataFormat[3]==0x42 &&
+ (1<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=3));
+}
+
+/* semi-public functions ---------------------------------------------------- */
+
+static void
+res_init(ResourceData *pResData,
+ UVersionInfo formatVersion, const void *inBytes, int32_t length,
+ UErrorCode *errorCode) {
+ UResType rootType;
+
+ /* get the root resource */
+ pResData->pRoot=(const int32_t *)inBytes;
+ pResData->rootRes=(Resource)*pResData->pRoot;
+ pResData->p16BitUnits=&gEmpty16;
+
+ /* formatVersion 1.1 must have a root item and at least 5 indexes */
+ if(length>=0 && (length/4)<((formatVersion[0]==1 && formatVersion[1]==0) ? 1 : 1+5)) {
+ *errorCode=U_INVALID_FORMAT_ERROR;
+ res_unload(pResData);
+ return;
+ }
+
+ /* currently, we accept only resources that have a Table as their roots */
+ rootType=(UResType)RES_GET_TYPE(pResData->rootRes);
+ if(!URES_IS_TABLE(rootType)) {
+ *errorCode=U_INVALID_FORMAT_ERROR;
+ res_unload(pResData);
+ return;
+ }
+
+ if(formatVersion[0]==1 && formatVersion[1]==0) {
+ pResData->localKeyLimit=0x10000; /* greater than any 16-bit key string offset */
+ } else {
+ /* bundles with formatVersion 1.1 and later contain an indexes[] array */
+ const int32_t *indexes=pResData->pRoot+1;
+ int32_t indexLength=indexes[URES_INDEX_LENGTH]&0xff;
+ if(indexLength<=URES_INDEX_MAX_TABLE_LENGTH) {
+ *errorCode=U_INVALID_FORMAT_ERROR;
+ res_unload(pResData);
+ return;
+ }
+ if( length>=0 &&
+ (length<((1+indexLength)<<2) ||
+ length<(indexes[URES_INDEX_BUNDLE_TOP]<<2))
+ ) {
+ *errorCode=U_INVALID_FORMAT_ERROR;
+ res_unload(pResData);
+ return;
+ }
+ if(indexes[URES_INDEX_KEYS_TOP]>(1+indexLength)) {
+ pResData->localKeyLimit=indexes[URES_INDEX_KEYS_TOP]<<2;
+ }
+ if(formatVersion[0]>=3) {
+ // In formatVersion 1, the indexLength took up this whole int.
+ // In version 2, bits 31..8 were reserved and always 0.
+ // In version 3, they contain bits 23..0 of the poolStringIndexLimit.
+ // Bits 27..24 are in indexes[URES_INDEX_ATTRIBUTES] bits 15..12.
+ pResData->poolStringIndexLimit=(int32_t)((uint32_t)indexes[URES_INDEX_LENGTH]>>8);
+ }
+ if(indexLength>URES_INDEX_ATTRIBUTES) {
+ int32_t att=indexes[URES_INDEX_ATTRIBUTES];
+ pResData->noFallback=(UBool)(att&URES_ATT_NO_FALLBACK);
+ pResData->isPoolBundle=(UBool)((att&URES_ATT_IS_POOL_BUNDLE)!=0);
+ pResData->usesPoolBundle=(UBool)((att&URES_ATT_USES_POOL_BUNDLE)!=0);
+ pResData->poolStringIndexLimit|=(att&0xf000)<<12; // bits 15..12 -> 27..24
+ pResData->poolStringIndex16Limit=(int32_t)((uint32_t)att>>16);
+ }
+ if((pResData->isPoolBundle || pResData->usesPoolBundle) && indexLength<=URES_INDEX_POOL_CHECKSUM) {
+ *errorCode=U_INVALID_FORMAT_ERROR;
+ res_unload(pResData);
+ return;
+ }
+ if( indexLength>URES_INDEX_16BIT_TOP &&
+ indexes[URES_INDEX_16BIT_TOP]>indexes[URES_INDEX_KEYS_TOP]
+ ) {
+ pResData->p16BitUnits=(const uint16_t *)(pResData->pRoot+indexes[URES_INDEX_KEYS_TOP]);
+ }
+ }
+
+ if(formatVersion[0]==1 || U_CHARSET_FAMILY==U_ASCII_FAMILY) {
+ /*
+ * formatVersion 1: compare key strings in native-charset order
+ * formatVersion 2 and up: compare key strings in ASCII order
+ */
+ pResData->useNativeStrcmp=TRUE;
+ }
+}
+
+U_CAPI void U_EXPORT2
+res_read(ResourceData *pResData,
+ const UDataInfo *pInfo, const void *inBytes, int32_t length,
+ UErrorCode *errorCode) {
+ UVersionInfo formatVersion;
+
+ uprv_memset(pResData, 0, sizeof(ResourceData));
+ if(U_FAILURE(*errorCode)) {
+ return;
+ }
+ if(!isAcceptable(formatVersion, NULL, NULL, pInfo)) {
+ *errorCode=U_INVALID_FORMAT_ERROR;
+ return;
+ }
+ res_init(pResData, formatVersion, inBytes, length, errorCode);
+}
+
+U_CFUNC void
+res_load(ResourceData *pResData,
+ const char *path, const char *name, UErrorCode *errorCode) {
+ UVersionInfo formatVersion;
+
+ uprv_memset(pResData, 0, sizeof(ResourceData));
+
+ /* load the ResourceBundle file */
+ pResData->data=udata_openChoice(path, "res", name, isAcceptable, formatVersion, errorCode);
+ if(U_FAILURE(*errorCode)) {
+ return;
+ }
+
+ /* get its memory and initialize *pResData */
+ res_init(pResData, formatVersion, udata_getMemory(pResData->data), -1, errorCode);
+}
+
+U_CFUNC void
+res_unload(ResourceData *pResData) {
+ if(pResData->data!=NULL) {
+ udata_close(pResData->data);
+ pResData->data=NULL;
+ }
+}
+
+static const int8_t gPublicTypes[URES_LIMIT] = {
+ URES_STRING,
+ URES_BINARY,
+ URES_TABLE,
+ URES_ALIAS,
+
+ URES_TABLE, /* URES_TABLE32 */
+ URES_TABLE, /* URES_TABLE16 */
+ URES_STRING, /* URES_STRING_V2 */
+ URES_INT,
+
+ URES_ARRAY,
+ URES_ARRAY, /* URES_ARRAY16 */
+ URES_NONE,
+ URES_NONE,
+
+ URES_NONE,
+ URES_NONE,
+ URES_INT_VECTOR,
+ URES_NONE
+};
+
+U_CAPI UResType U_EXPORT2
+res_getPublicType(Resource res) {
+ return (UResType)gPublicTypes[RES_GET_TYPE(res)];
+}
+
+U_CAPI const UChar * U_EXPORT2
+res_getStringNoTrace(const ResourceData *pResData, Resource res, int32_t *pLength) {
+ const UChar *p;
+ uint32_t offset=RES_GET_OFFSET(res);
+ int32_t length;
+ if(RES_GET_TYPE(res)==URES_STRING_V2) {
+ int32_t first;
+ if((int32_t)offset<pResData->poolStringIndexLimit) {
+ p=(const UChar *)pResData->poolBundleStrings+offset;
+ } else {
+ p=(const UChar *)pResData->p16BitUnits+(offset-pResData->poolStringIndexLimit);
+ }
+ first=*p;
+ if(!U16_IS_TRAIL(first)) {
+ length=u_strlen(p);
+ } else if(first<0xdfef) {
+ length=first&0x3ff;
+ ++p;
+ } else if(first<0xdfff) {
+ length=((first-0xdfef)<<16)|p[1];
+ p+=2;
+ } else {
+ length=((int32_t)p[1]<<16)|p[2];
+ p+=3;
+ }
+ } else if(res==offset) /* RES_GET_TYPE(res)==URES_STRING */ {
+ const int32_t *p32= res==0 ? &gEmptyString.length : pResData->pRoot+res;
+ length=*p32++;
+ p=(const UChar *)p32;
+ } else {
+ p=NULL;
+ length=0;
+ }
+ if(pLength) {
+ *pLength=length;
+ }
+ return p;
+}
+
+namespace {
+
+/**
+ * CLDR string value (three empty-set symbols)=={2205, 2205, 2205}
+ * prevents fallback to the parent bundle.
+ * TODO: combine with other code that handles this marker, use EMPTY_SET constant.
+ * TODO: maybe move to uresbund.cpp?
+ */
+UBool isNoInheritanceMarker(const ResourceData *pResData, Resource res) {
+ uint32_t offset=RES_GET_OFFSET(res);
+ if (offset == 0) {
+ // empty string
+ } else if (res == offset) {
+ const int32_t *p32=pResData->pRoot+res;
+ int32_t length=*p32;
+ const UChar *p=(const UChar *)p32;
+ return length == 3 && p[2] == 0x2205 && p[3] == 0x2205 && p[4] == 0x2205;
+ } else if (RES_GET_TYPE(res) == URES_STRING_V2) {
+ const UChar *p;
+ if((int32_t)offset<pResData->poolStringIndexLimit) {
+ p=(const UChar *)pResData->poolBundleStrings+offset;
+ } else {
+ p=(const UChar *)pResData->p16BitUnits+(offset-pResData->poolStringIndexLimit);
+ }
+ int32_t first=*p;
+ if (first == 0x2205) { // implicit length
+ return p[1] == 0x2205 && p[2] == 0x2205 && p[3] == 0;
+ } else if (first == 0xdc03) { // explicit length 3 (should not occur)
+ return p[1] == 0x2205 && p[2] == 0x2205 && p[3] == 0x2205;
+ } else {
+ // Assume that the string has not been stored with more length units than necessary.
+ return FALSE;
+ }
+ }
+ return FALSE;
+}
+
+int32_t getStringArray(const ResourceData *pResData, const icu::ResourceArray &array,
+ icu::UnicodeString *dest, int32_t capacity,
+ UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return 0;
+ }
+ if(dest == NULL ? capacity != 0 : capacity < 0) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ int32_t length = array.getSize();
+ if(length == 0) {
+ return 0;
+ }
+ if(length > capacity) {
+ errorCode = U_BUFFER_OVERFLOW_ERROR;
+ return length;
+ }
+ for(int32_t i = 0; i < length; ++i) {
+ int32_t sLength;
+ // No tracing: handled by the caller
+ const UChar *s = res_getStringNoTrace(pResData, array.internalGetResource(pResData, i), &sLength);
+ if(s == NULL) {
+ errorCode = U_RESOURCE_TYPE_MISMATCH;
+ return 0;
+ }
+ dest[i].setTo(TRUE, s, sLength);
+ }
+ return length;
+}
+
+} // namespace
+
+U_CAPI const UChar * U_EXPORT2
+res_getAlias(const ResourceData *pResData, Resource res, int32_t *pLength) {
+ const UChar *p;
+ uint32_t offset=RES_GET_OFFSET(res);
+ int32_t length;
+ if(RES_GET_TYPE(res)==URES_ALIAS) {
+ const int32_t *p32= offset==0 ? &gEmptyString.length : pResData->pRoot+offset;
+ length=*p32++;
+ p=(const UChar *)p32;
+ } else {
+ p=NULL;
+ length=0;
+ }
+ if(pLength) {
+ *pLength=length;
+ }
+ return p;
+}
+
+U_CAPI const uint8_t * U_EXPORT2
+res_getBinaryNoTrace(const ResourceData *pResData, Resource res, int32_t *pLength) {
+ const uint8_t *p;
+ uint32_t offset=RES_GET_OFFSET(res);
+ int32_t length;
+ if(RES_GET_TYPE(res)==URES_BINARY) {
+ const int32_t *p32= offset==0 ? (const int32_t*)&gEmpty32 : pResData->pRoot+offset;
+ length=*p32++;
+ p=(const uint8_t *)p32;
+ } else {
+ p=NULL;
+ length=0;
+ }
+ if(pLength) {
+ *pLength=length;
+ }
+ return p;
+}
+
+
+U_CAPI const int32_t * U_EXPORT2
+res_getIntVectorNoTrace(const ResourceData *pResData, Resource res, int32_t *pLength) {
+ const int32_t *p;
+ uint32_t offset=RES_GET_OFFSET(res);
+ int32_t length;
+ if(RES_GET_TYPE(res)==URES_INT_VECTOR) {
+ p= offset==0 ? (const int32_t *)&gEmpty32 : pResData->pRoot+offset;
+ length=*p++;
+ } else {
+ p=NULL;
+ length=0;
+ }
+ if(pLength) {
+ *pLength=length;
+ }
+ return p;
+}
+
+U_CAPI int32_t U_EXPORT2
+res_countArrayItems(const ResourceData *pResData, Resource res) {
+ uint32_t offset=RES_GET_OFFSET(res);
+ switch(RES_GET_TYPE(res)) {
+ case URES_STRING:
+ case URES_STRING_V2:
+ case URES_BINARY:
+ case URES_ALIAS:
+ case URES_INT:
+ case URES_INT_VECTOR:
+ return 1;
+ case URES_ARRAY:
+ case URES_TABLE32:
+ return offset==0 ? 0 : *(pResData->pRoot+offset);
+ case URES_TABLE:
+ return offset==0 ? 0 : *((const uint16_t *)(pResData->pRoot+offset));
+ case URES_ARRAY16:
+ case URES_TABLE16:
+ return pResData->p16BitUnits[offset];
+ default:
+ return 0;
+ }
+}
+
+U_NAMESPACE_BEGIN
+
+ResourceDataValue::~ResourceDataValue() {}
+
+UResType ResourceDataValue::getType() const {
+ return res_getPublicType(res);
+}
+
+const UChar *ResourceDataValue::getString(int32_t &length, UErrorCode &errorCode) const {
+ if(U_FAILURE(errorCode)) {
+ return NULL;
+ }
+ const UChar *s = res_getString(fTraceInfo, &getData(), res, &length);
+ if(s == NULL) {
+ errorCode = U_RESOURCE_TYPE_MISMATCH;
+ }
+ return s;
+}
+
+const UChar *ResourceDataValue::getAliasString(int32_t &length, UErrorCode &errorCode) const {
+ if(U_FAILURE(errorCode)) {
+ return NULL;
+ }
+ const UChar *s = res_getAlias(&getData(), res, &length);
+ if(s == NULL) {
+ errorCode = U_RESOURCE_TYPE_MISMATCH;
+ }
+ return s;
+}
+
+int32_t ResourceDataValue::getInt(UErrorCode &errorCode) const {
+ if(U_FAILURE(errorCode)) {
+ return 0;
+ }
+ if(RES_GET_TYPE(res) != URES_INT) {
+ errorCode = U_RESOURCE_TYPE_MISMATCH;
+ }
+ return res_getInt(fTraceInfo, res);
+}
+
+uint32_t ResourceDataValue::getUInt(UErrorCode &errorCode) const {
+ if(U_FAILURE(errorCode)) {
+ return 0;
+ }
+ if(RES_GET_TYPE(res) != URES_INT) {
+ errorCode = U_RESOURCE_TYPE_MISMATCH;
+ }
+ return res_getUInt(fTraceInfo, res);
+}
+
+const int32_t *ResourceDataValue::getIntVector(int32_t &length, UErrorCode &errorCode) const {
+ if(U_FAILURE(errorCode)) {
+ return NULL;
+ }
+ const int32_t *iv = res_getIntVector(fTraceInfo, &getData(), res, &length);
+ if(iv == NULL) {
+ errorCode = U_RESOURCE_TYPE_MISMATCH;
+ }
+ return iv;
+}
+
+const uint8_t *ResourceDataValue::getBinary(int32_t &length, UErrorCode &errorCode) const {
+ if(U_FAILURE(errorCode)) {
+ return NULL;
+ }
+ const uint8_t *b = res_getBinary(fTraceInfo, &getData(), res, &length);
+ if(b == NULL) {
+ errorCode = U_RESOURCE_TYPE_MISMATCH;
+ }
+ return b;
+}
+
+ResourceArray ResourceDataValue::getArray(UErrorCode &errorCode) const {
+ if(U_FAILURE(errorCode)) {
+ return ResourceArray();
+ }
+ const uint16_t *items16 = NULL;
+ const Resource *items32 = NULL;
+ uint32_t offset=RES_GET_OFFSET(res);
+ int32_t length = 0;
+ switch(RES_GET_TYPE(res)) {
+ case URES_ARRAY:
+ if (offset!=0) { // empty if offset==0
+ items32 = (const Resource *)getData().pRoot+offset;
+ length = *items32++;
+ }
+ break;
+ case URES_ARRAY16:
+ items16 = getData().p16BitUnits+offset;
+ length = *items16++;
+ break;
+ default:
+ errorCode = U_RESOURCE_TYPE_MISMATCH;
+ return ResourceArray();
+ }
+ return ResourceArray(items16, items32, length, fTraceInfo);
+}
+
+ResourceTable ResourceDataValue::getTable(UErrorCode &errorCode) const {
+ if(U_FAILURE(errorCode)) {
+ return ResourceTable();
+ }
+ const uint16_t *keys16 = NULL;
+ const int32_t *keys32 = NULL;
+ const uint16_t *items16 = NULL;
+ const Resource *items32 = NULL;
+ uint32_t offset = RES_GET_OFFSET(res);
+ int32_t length = 0;
+ switch(RES_GET_TYPE(res)) {
+ case URES_TABLE:
+ if (offset != 0) { // empty if offset==0
+ keys16 = (const uint16_t *)(getData().pRoot+offset);
+ length = *keys16++;
+ items32 = (const Resource *)(keys16+length+(~length&1));
+ }
+ break;
+ case URES_TABLE16:
+ keys16 = getData().p16BitUnits+offset;
+ length = *keys16++;
+ items16 = keys16 + length;
+ break;
+ case URES_TABLE32:
+ if (offset != 0) { // empty if offset==0
+ keys32 = getData().pRoot+offset;
+ length = *keys32++;
+ items32 = (const Resource *)keys32 + length;
+ }
+ break;
+ default:
+ errorCode = U_RESOURCE_TYPE_MISMATCH;
+ return ResourceTable();
+ }
+ return ResourceTable(keys16, keys32, items16, items32, length, fTraceInfo);
+}
+
+UBool ResourceDataValue::isNoInheritanceMarker() const {
+ return ::isNoInheritanceMarker(&getData(), res);
+}
+
+int32_t ResourceDataValue::getStringArray(UnicodeString *dest, int32_t capacity,
+ UErrorCode &errorCode) const {
+ return ::getStringArray(&getData(), getArray(errorCode), dest, capacity, errorCode);
+}
+
+int32_t ResourceDataValue::getStringArrayOrStringAsArray(UnicodeString *dest, int32_t capacity,
+ UErrorCode &errorCode) const {
+ if(URES_IS_ARRAY(res)) {
+ return ::getStringArray(&getData(), getArray(errorCode), dest, capacity, errorCode);
+ }
+ if(U_FAILURE(errorCode)) {
+ return 0;
+ }
+ if(dest == NULL ? capacity != 0 : capacity < 0) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ if(capacity < 1) {
+ errorCode = U_BUFFER_OVERFLOW_ERROR;
+ return 1;
+ }
+ int32_t sLength;
+ const UChar *s = res_getString(fTraceInfo, &getData(), res, &sLength);
+ if(s != NULL) {
+ dest[0].setTo(TRUE, s, sLength);
+ return 1;
+ }
+ errorCode = U_RESOURCE_TYPE_MISMATCH;
+ return 0;
+}
+
+UnicodeString ResourceDataValue::getStringOrFirstOfArray(UErrorCode &errorCode) const {
+ UnicodeString us;
+ if(U_FAILURE(errorCode)) {
+ return us;
+ }
+ int32_t sLength;
+ const UChar *s = res_getString(fTraceInfo, &getData(), res, &sLength);
+ if(s != NULL) {
+ us.setTo(TRUE, s, sLength);
+ return us;
+ }
+ ResourceArray array = getArray(errorCode);
+ if(U_FAILURE(errorCode)) {
+ return us;
+ }
+ if(array.getSize() > 0) {
+ // Tracing is already performed above (unimportant for trace that this is an array)
+ s = res_getStringNoTrace(&getData(), array.internalGetResource(&getData(), 0), &sLength);
+ if(s != NULL) {
+ us.setTo(TRUE, s, sLength);
+ return us;
+ }
+ }
+ errorCode = U_RESOURCE_TYPE_MISMATCH;
+ return us;
+}
+
+U_NAMESPACE_END
+
+static Resource
+makeResourceFrom16(const ResourceData *pResData, int32_t res16) {
+ if(res16<pResData->poolStringIndex16Limit) {
+ // Pool string, nothing to do.
+ } else {
+ // Local string, adjust the 16-bit offset to a regular one,
+ // with a larger pool string index limit.
+ res16=res16-pResData->poolStringIndex16Limit+pResData->poolStringIndexLimit;
+ }
+ return URES_MAKE_RESOURCE(URES_STRING_V2, res16);
+}
+
+U_CAPI Resource U_EXPORT2
+res_getTableItemByKey(const ResourceData *pResData, Resource table,
+ int32_t *indexR, const char **key) {
+ uint32_t offset=RES_GET_OFFSET(table);
+ int32_t length;
+ int32_t idx;
+ if(key == NULL || *key == NULL) {
+ return RES_BOGUS;
+ }
+ switch(RES_GET_TYPE(table)) {
+ case URES_TABLE: {
+ if (offset!=0) { /* empty if offset==0 */
+ const uint16_t *p= (const uint16_t *)(pResData->pRoot+offset);
+ length=*p++;
+ *indexR=idx=_res_findTableItem(pResData, p, length, *key, key);
+ if(idx>=0) {
+ const Resource *p32=(const Resource *)(p+length+(~length&1));
+ return p32[idx];
+ }
+ }
+ break;
+ }
+ case URES_TABLE16: {
+ const uint16_t *p=pResData->p16BitUnits+offset;
+ length=*p++;
+ *indexR=idx=_res_findTableItem(pResData, p, length, *key, key);
+ if(idx>=0) {
+ return makeResourceFrom16(pResData, p[length+idx]);
+ }
+ break;
+ }
+ case URES_TABLE32: {
+ if (offset!=0) { /* empty if offset==0 */
+ const int32_t *p= pResData->pRoot+offset;
+ length=*p++;
+ *indexR=idx=_res_findTable32Item(pResData, p, length, *key, key);
+ if(idx>=0) {
+ return (Resource)p[length+idx];
+ }
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ return RES_BOGUS;
+}
+
+U_CAPI Resource U_EXPORT2
+res_getTableItemByIndex(const ResourceData *pResData, Resource table,
+ int32_t indexR, const char **key) {
+ uint32_t offset=RES_GET_OFFSET(table);
+ int32_t length;
+ if (indexR < 0) {
+ return RES_BOGUS;
+ }
+ switch(RES_GET_TYPE(table)) {
+ case URES_TABLE: {
+ if (offset != 0) { /* empty if offset==0 */
+ const uint16_t *p= (const uint16_t *)(pResData->pRoot+offset);
+ length=*p++;
+ if(indexR<length) {
+ const Resource *p32=(const Resource *)(p+length+(~length&1));
+ if(key!=NULL) {
+ *key=RES_GET_KEY16(pResData, p[indexR]);
+ }
+ return p32[indexR];
+ }
+ }
+ break;
+ }
+ case URES_TABLE16: {
+ const uint16_t *p=pResData->p16BitUnits+offset;
+ length=*p++;
+ if(indexR<length) {
+ if(key!=NULL) {
+ *key=RES_GET_KEY16(pResData, p[indexR]);
+ }
+ return makeResourceFrom16(pResData, p[length+indexR]);
+ }
+ break;
+ }
+ case URES_TABLE32: {
+ if (offset != 0) { /* empty if offset==0 */
+ const int32_t *p= pResData->pRoot+offset;
+ length=*p++;
+ if(indexR<length) {
+ if(key!=NULL) {
+ *key=RES_GET_KEY32(pResData, p[indexR]);
+ }
+ return (Resource)p[length+indexR];
+ }
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ return RES_BOGUS;
+}
+
+U_CAPI Resource U_EXPORT2
+res_getResource(const ResourceData *pResData, const char *key) {
+ const char *realKey=key;
+ int32_t idx;
+ return res_getTableItemByKey(pResData, pResData->rootRes, &idx, &realKey);
+}
+
+
+UBool icu::ResourceTable::getKeyAndValue(int32_t i,
+ const char *&key, icu::ResourceValue &value) const {
+ if(0 <= i && i < length) {
+ icu::ResourceDataValue &rdValue = static_cast<icu::ResourceDataValue &>(value);
+ if (keys16 != nullptr) {
+ key = RES_GET_KEY16(&rdValue.getData(), keys16[i]);
+ } else {
+ key = RES_GET_KEY32(&rdValue.getData(), keys32[i]);
+ }
+ Resource res;
+ if (items16 != nullptr) {
+ res = makeResourceFrom16(&rdValue.getData(), items16[i]);
+ } else {
+ res = items32[i];
+ }
+ // Note: the ResourceTracer keeps a reference to the field of this
+ // ResourceTable. This is OK because the ResourceTable should remain
+ // alive for the duration that fields are being read from it
+ // (including nested fields).
+ rdValue.setResource(res, ResourceTracer(fTraceInfo, key));
+ return TRUE;
+ }
+ return FALSE;
+}
+
+UBool icu::ResourceTable::findValue(const char *key, ResourceValue &value) const {
+ icu::ResourceDataValue &rdValue = static_cast<icu::ResourceDataValue &>(value);
+ const char *realKey = nullptr;
+ int32_t i;
+ if (keys16 != nullptr) {
+ i = _res_findTableItem(&rdValue.getData(), keys16, length, key, &realKey);
+ } else {
+ i = _res_findTable32Item(&rdValue.getData(), keys32, length, key, &realKey);
+ }
+ if (i >= 0) {
+ Resource res;
+ if (items16 != nullptr) {
+ res = makeResourceFrom16(&rdValue.getData(), items16[i]);
+ } else {
+ res = items32[i];
+ }
+ // Same note about lifetime as in getKeyAndValue().
+ rdValue.setResource(res, ResourceTracer(fTraceInfo, key));
+ return TRUE;
+ }
+ return FALSE;
+}
+
+U_CAPI Resource U_EXPORT2
+res_getArrayItem(const ResourceData *pResData, Resource array, int32_t indexR) {
+ uint32_t offset=RES_GET_OFFSET(array);
+ if (indexR < 0) {
+ return RES_BOGUS;
+ }
+ switch(RES_GET_TYPE(array)) {
+ case URES_ARRAY: {
+ if (offset!=0) { /* empty if offset==0 */
+ const int32_t *p= pResData->pRoot+offset;
+ if(indexR<*p) {
+ return (Resource)p[1+indexR];
+ }
+ }
+ break;
+ }
+ case URES_ARRAY16: {
+ const uint16_t *p=pResData->p16BitUnits+offset;
+ if(indexR<*p) {
+ return makeResourceFrom16(pResData, p[1+indexR]);
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ return RES_BOGUS;
+}
+
+uint32_t icu::ResourceArray::internalGetResource(const ResourceData *pResData, int32_t i) const {
+ if (items16 != NULL) {
+ return makeResourceFrom16(pResData, items16[i]);
+ } else {
+ return items32[i];
+ }
+}
+
+UBool icu::ResourceArray::getValue(int32_t i, icu::ResourceValue &value) const {
+ if(0 <= i && i < length) {
+ icu::ResourceDataValue &rdValue = static_cast<icu::ResourceDataValue &>(value);
+ // Note: the ResourceTracer keeps a reference to the field of this
+ // ResourceArray. This is OK because the ResourceArray should remain
+ // alive for the duration that fields are being read from it
+ // (including nested fields).
+ rdValue.setResource(
+ internalGetResource(&rdValue.getData(), i),
+ ResourceTracer(fTraceInfo, i));
+ return TRUE;
+ }
+ return FALSE;
+}
+
+U_CFUNC Resource
+res_findResource(const ResourceData *pResData, Resource r, char** path, const char** key) {
+ char *pathP = *path, *nextSepP = *path;
+ char *closeIndex = NULL;
+ Resource t1 = r;
+ Resource t2;
+ int32_t indexR = 0;
+ UResType type = (UResType)RES_GET_TYPE(t1);
+
+ /* if you come in with an empty path, you'll be getting back the same resource */
+ if(!uprv_strlen(pathP)) {
+ return r;
+ }
+
+ /* one needs to have an aggregate resource in order to search in it */
+ if(!URES_IS_CONTAINER(type)) {
+ return RES_BOGUS;
+ }
+
+ while(nextSepP && *pathP && t1 != RES_BOGUS && URES_IS_CONTAINER(type)) {
+ /* Iteration stops if: the path has been consumed, we found a non-existing
+ * resource (t1 == RES_BOGUS) or we found a scalar resource (including alias)
+ */
+ nextSepP = uprv_strchr(pathP, RES_PATH_SEPARATOR);
+ /* if there are more separators, terminate string
+ * and set path to the remaining part of the string
+ */
+ if(nextSepP != NULL) {
+ if(nextSepP == pathP) {
+ // Empty key string.
+ return RES_BOGUS;
+ }
+ *nextSepP = 0; /* overwrite the separator with a NUL to terminate the key */
+ *path = nextSepP+1;
+ } else {
+ *path = uprv_strchr(pathP, 0);
+ }
+
+ /* if the resource is a table */
+ /* try the key based access */
+ if(URES_IS_TABLE(type)) {
+ *key = pathP;
+ t2 = res_getTableItemByKey(pResData, t1, &indexR, key);
+ if(t2 == RES_BOGUS) {
+ /* if we fail to get the resource by key, maybe we got an index */
+ indexR = uprv_strtol(pathP, &closeIndex, 10);
+ if(indexR >= 0 && *closeIndex == 0 && (*pathP != '0' || closeIndex - pathP == 1)) {
+ /* if we indeed have an index, try to get the item by index */
+ t2 = res_getTableItemByIndex(pResData, t1, indexR, key);
+ } // else t2 is already RES_BOGUS
+ }
+ } else if(URES_IS_ARRAY(type)) {
+ indexR = uprv_strtol(pathP, &closeIndex, 10);
+ if(indexR >= 0 && *closeIndex == 0) {
+ t2 = res_getArrayItem(pResData, t1, indexR);
+ } else {
+ t2 = RES_BOGUS; /* have an array, but don't have a valid index */
+ }
+ *key = NULL;
+ } else { /* can't do much here, except setting t2 to bogus */
+ t2 = RES_BOGUS;
+ }
+ t1 = t2;
+ type = (UResType)RES_GET_TYPE(t1);
+ /* position pathP to next resource key/index */
+ pathP = *path;
+ }
+
+ return t1;
+}
+
+/* resource bundle swapping ------------------------------------------------- */
+
+/*
+ * Need to always enumerate the entire item tree,
+ * track the lowest address of any item to use as the limit for char keys[],
+ * track the highest address of any item to return the size of the data.
+ *
+ * We should have thought of storing those in the data...
+ * It is possible to extend the data structure by putting additional values
+ * in places that are inaccessible by ordinary enumeration of the item tree.
+ * For example, additional integers could be stored at the beginning or
+ * end of the key strings; this could be indicated by a minor version number,
+ * and the data swapping would have to know about these values.
+ *
+ * The data structure does not forbid keys to be shared, so we must swap
+ * all keys once instead of each key when it is referenced.
+ *
+ * These swapping functions assume that a resource bundle always has a length
+ * that is a multiple of 4 bytes.
+ * Currently, this is trivially true because genrb writes bundle tree leaves
+ * physically first, before their branches, so that the root table with its
+ * array of resource items (uint32_t values) is always last.
+ */
+
+/* definitions for table sorting ------------------------ */
+
+/*
+ * row of a temporary array
+ *
+ * gets platform-endian key string indexes and sorting indexes;
+ * after sorting this array by keys, the actual key/value arrays are permutated
+ * according to the sorting indexes
+ */
+typedef struct Row {
+ int32_t keyIndex, sortIndex;
+} Row;
+
+static int32_t U_CALLCONV
+ures_compareRows(const void *context, const void *left, const void *right) {
+ const char *keyChars=(const char *)context;
+ return (int32_t)uprv_strcmp(keyChars+((const Row *)left)->keyIndex,
+ keyChars+((const Row *)right)->keyIndex);
+}
+
+typedef struct TempTable {
+ const char *keyChars;
+ Row *rows;
+ int32_t *resort;
+ uint32_t *resFlags;
+ int32_t localKeyLimit;
+ uint8_t majorFormatVersion;
+} TempTable;
+
+enum {
+ STACK_ROW_CAPACITY=200
+};
+
+/* The table item key string is not locally available. */
+static const char *const gUnknownKey="";
+
+/* resource table key for collation binaries: "%%CollationBin" */
+static const UChar gCollationBinKey[]={
+ 0x25, 0x25,
+ 0x43, 0x6f, 0x6c, 0x6c, 0x61, 0x74, 0x69, 0x6f, 0x6e,
+ 0x42, 0x69, 0x6e,
+ 0
+};
+
+/*
+ * swap one resource item
+ */
+static void
+ures_swapResource(const UDataSwapper *ds,
+ const Resource *inBundle, Resource *outBundle,
+ Resource res, /* caller swaps res itself */
+ const char *key,
+ TempTable *pTempTable,
+ UErrorCode *pErrorCode) {
+ const Resource *p;
+ Resource *q;
+ int32_t offset, count;
+
+ switch(RES_GET_TYPE(res)) {
+ case URES_TABLE16:
+ case URES_STRING_V2:
+ case URES_INT:
+ case URES_ARRAY16:
+ /* integer, or points to 16-bit units, nothing to do here */
+ return;
+ default:
+ break;
+ }
+
+ /* all other types use an offset to point to their data */
+ offset=(int32_t)RES_GET_OFFSET(res);
+ if(offset==0) {
+ /* special offset indicating an empty item */
+ return;
+ }
+ if(pTempTable->resFlags[offset>>5]&((uint32_t)1<<(offset&0x1f))) {
+ /* we already swapped this resource item */
+ return;
+ } else {
+ /* mark it as swapped now */
+ pTempTable->resFlags[offset>>5]|=((uint32_t)1<<(offset&0x1f));
+ }
+
+ p=inBundle+offset;
+ q=outBundle+offset;
+
+ switch(RES_GET_TYPE(res)) {
+ case URES_ALIAS:
+ /* physically same value layout as string, fall through */
+ U_FALLTHROUGH;
+ case URES_STRING:
+ count=udata_readInt32(ds, (int32_t)*p);
+ /* swap length */
+ ds->swapArray32(ds, p, 4, q, pErrorCode);
+ /* swap each UChar (the terminating NUL would not change) */
+ ds->swapArray16(ds, p+1, 2*count, q+1, pErrorCode);
+ break;
+ case URES_BINARY:
+ count=udata_readInt32(ds, (int32_t)*p);
+ /* swap length */
+ ds->swapArray32(ds, p, 4, q, pErrorCode);
+ /* no need to swap or copy bytes - ures_swap() copied them all */
+
+ /* swap known formats */
+#if !UCONFIG_NO_COLLATION
+ if( key!=NULL && /* the binary is in a table */
+ (key!=gUnknownKey ?
+ /* its table key string is "%%CollationBin" */
+ 0==ds->compareInvChars(ds, key, -1,
+ gCollationBinKey, UPRV_LENGTHOF(gCollationBinKey)-1) :
+ /* its table key string is unknown but it looks like a collation binary */
+ ucol_looksLikeCollationBinary(ds, p+1, count))
+ ) {
+ ucol_swap(ds, p+1, count, q+1, pErrorCode);
+ }
+#endif
+ break;
+ case URES_TABLE:
+ case URES_TABLE32:
+ {
+ const uint16_t *pKey16;
+ uint16_t *qKey16;
+
+ const int32_t *pKey32;
+ int32_t *qKey32;
+
+ Resource item;
+ int32_t i, oldIndex;
+
+ if(RES_GET_TYPE(res)==URES_TABLE) {
+ /* get table item count */
+ pKey16=(const uint16_t *)p;
+ qKey16=(uint16_t *)q;
+ count=ds->readUInt16(*pKey16);
+
+ pKey32=qKey32=NULL;
+
+ /* swap count */
+ ds->swapArray16(ds, pKey16++, 2, qKey16++, pErrorCode);
+
+ offset+=((1+count)+1)/2;
+ } else {
+ /* get table item count */
+ pKey32=(const int32_t *)p;
+ qKey32=(int32_t *)q;
+ count=udata_readInt32(ds, *pKey32);
+
+ pKey16=qKey16=NULL;
+
+ /* swap count */
+ ds->swapArray32(ds, pKey32++, 4, qKey32++, pErrorCode);
+
+ offset+=1+count;
+ }
+
+ if(count==0) {
+ break;
+ }
+
+ p=inBundle+offset; /* pointer to table resources */
+ q=outBundle+offset;
+
+ /* recurse */
+ for(i=0; i<count; ++i) {
+ const char *itemKey=gUnknownKey;
+ if(pKey16!=NULL) {
+ int32_t keyOffset=ds->readUInt16(pKey16[i]);
+ if(keyOffset<pTempTable->localKeyLimit) {
+ itemKey=(const char *)outBundle+keyOffset;
+ }
+ } else {
+ int32_t keyOffset=udata_readInt32(ds, pKey32[i]);
+ if(keyOffset>=0) {
+ itemKey=(const char *)outBundle+keyOffset;
+ }
+ }
+ item=ds->readUInt32(p[i]);
+ ures_swapResource(ds, inBundle, outBundle, item, itemKey, pTempTable, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ udata_printError(ds, "ures_swapResource(table res=%08x)[%d].recurse(%08x) failed\n",
+ res, i, item);
+ return;
+ }
+ }
+
+ if(pTempTable->majorFormatVersion>1 || ds->inCharset==ds->outCharset) {
+ /* no need to sort, just swap the offset/value arrays */
+ if(pKey16!=NULL) {
+ ds->swapArray16(ds, pKey16, count*2, qKey16, pErrorCode);
+ ds->swapArray32(ds, p, count*4, q, pErrorCode);
+ } else {
+ /* swap key offsets and items as one array */
+ ds->swapArray32(ds, pKey32, count*2*4, qKey32, pErrorCode);
+ }
+ break;
+ }
+
+ /*
+ * We need to sort tables by outCharset key strings because they
+ * sort differently for different charset families.
+ * ures_swap() already set pTempTable->keyChars appropriately.
+ * First we set up a temporary table with the key indexes and
+ * sorting indexes and sort that.
+ * Then we permutate and copy/swap the actual values.
+ */
+ if(pKey16!=NULL) {
+ for(i=0; i<count; ++i) {
+ pTempTable->rows[i].keyIndex=ds->readUInt16(pKey16[i]);
+ pTempTable->rows[i].sortIndex=i;
+ }
+ } else {
+ for(i=0; i<count; ++i) {
+ pTempTable->rows[i].keyIndex=udata_readInt32(ds, pKey32[i]);
+ pTempTable->rows[i].sortIndex=i;
+ }
+ }
+ uprv_sortArray(pTempTable->rows, count, sizeof(Row),
+ ures_compareRows, pTempTable->keyChars,
+ FALSE, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ udata_printError(ds, "ures_swapResource(table res=%08x).uprv_sortArray(%d items) failed\n",
+ res, count);
+ return;
+ }
+
+ /*
+ * copy/swap/permutate items
+ *
+ * If we swap in-place, then the permutation must use another
+ * temporary array (pTempTable->resort)
+ * before the results are copied to the outBundle.
+ */
+ /* keys */
+ if(pKey16!=NULL) {
+ uint16_t *rKey16;
+
+ if(pKey16!=qKey16) {
+ rKey16=qKey16;
+ } else {
+ rKey16=(uint16_t *)pTempTable->resort;
+ }
+ for(i=0; i<count; ++i) {
+ oldIndex=pTempTable->rows[i].sortIndex;
+ ds->swapArray16(ds, pKey16+oldIndex, 2, rKey16+i, pErrorCode);
+ }
+ if(qKey16!=rKey16) {
+ uprv_memcpy(qKey16, rKey16, 2*count);
+ }
+ } else {
+ int32_t *rKey32;
+
+ if(pKey32!=qKey32) {
+ rKey32=qKey32;
+ } else {
+ rKey32=pTempTable->resort;
+ }
+ for(i=0; i<count; ++i) {
+ oldIndex=pTempTable->rows[i].sortIndex;
+ ds->swapArray32(ds, pKey32+oldIndex, 4, rKey32+i, pErrorCode);
+ }
+ if(qKey32!=rKey32) {
+ uprv_memcpy(qKey32, rKey32, 4*count);
+ }
+ }
+
+ /* resources */
+ {
+ Resource *r;
+
+
+ if(p!=q) {
+ r=q;
+ } else {
+ r=(Resource *)pTempTable->resort;
+ }
+ for(i=0; i<count; ++i) {
+ oldIndex=pTempTable->rows[i].sortIndex;
+ ds->swapArray32(ds, p+oldIndex, 4, r+i, pErrorCode);
+ }
+ if(q!=r) {
+ uprv_memcpy(q, r, 4*count);
+ }
+ }
+ }
+ break;
+ case URES_ARRAY:
+ {
+ Resource item;
+ int32_t i;
+
+ count=udata_readInt32(ds, (int32_t)*p);
+ /* swap length */
+ ds->swapArray32(ds, p++, 4, q++, pErrorCode);
+
+ /* recurse */
+ for(i=0; i<count; ++i) {
+ item=ds->readUInt32(p[i]);
+ ures_swapResource(ds, inBundle, outBundle, item, NULL, pTempTable, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ udata_printError(ds, "ures_swapResource(array res=%08x)[%d].recurse(%08x) failed\n",
+ res, i, item);
+ return;
+ }
+ }
+
+ /* swap items */
+ ds->swapArray32(ds, p, 4*count, q, pErrorCode);
+ }
+ break;
+ case URES_INT_VECTOR:
+ count=udata_readInt32(ds, (int32_t)*p);
+ /* swap length and each integer */
+ ds->swapArray32(ds, p, 4*(1+count), q, pErrorCode);
+ break;
+ default:
+ /* also catches RES_BOGUS */
+ *pErrorCode=U_UNSUPPORTED_ERROR;
+ break;
+ }
+}
+
+U_CAPI int32_t U_EXPORT2
+ures_swap(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const UDataInfo *pInfo;
+ const Resource *inBundle;
+ Resource rootRes;
+ int32_t headerSize, maxTableLength;
+
+ Row rows[STACK_ROW_CAPACITY];
+ int32_t resort[STACK_ROW_CAPACITY];
+ TempTable tempTable;
+
+ const int32_t *inIndexes;
+
+ /* the following integers count Resource item offsets (4 bytes each), not bytes */
+ int32_t bundleLength, indexLength, keysBottom, keysTop, resBottom, top;
+
+ /* udata_swapDataHeader checks the arguments */
+ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ /* check data format and format version */
+ pInfo=(const UDataInfo *)((const char *)inData+4);
+ if(!(
+ pInfo->dataFormat[0]==0x52 && /* dataFormat="ResB" */
+ pInfo->dataFormat[1]==0x65 &&
+ pInfo->dataFormat[2]==0x73 &&
+ pInfo->dataFormat[3]==0x42 &&
+ /* formatVersion 1.1+ or 2.x or 3.x */
+ ((pInfo->formatVersion[0]==1 && pInfo->formatVersion[1]>=1) ||
+ pInfo->formatVersion[0]==2 || pInfo->formatVersion[0]==3)
+ )) {
+ udata_printError(ds, "ures_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not a resource bundle\n",
+ pInfo->dataFormat[0], pInfo->dataFormat[1],
+ pInfo->dataFormat[2], pInfo->dataFormat[3],
+ pInfo->formatVersion[0], pInfo->formatVersion[1]);
+ *pErrorCode=U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+ tempTable.majorFormatVersion=pInfo->formatVersion[0];
+
+ /* a resource bundle must contain at least one resource item */
+ if(length<0) {
+ bundleLength=-1;
+ } else {
+ bundleLength=(length-headerSize)/4;
+
+ /* formatVersion 1.1 must have a root item and at least 5 indexes */
+ if(bundleLength<(1+5)) {
+ udata_printError(ds, "ures_swap(): too few bytes (%d after header) for a resource bundle\n",
+ length-headerSize);
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ }
+
+ inBundle=(const Resource *)((const char *)inData+headerSize);
+ rootRes=ds->readUInt32(*inBundle);
+
+ /* formatVersion 1.1 adds the indexes[] array */
+ inIndexes=(const int32_t *)(inBundle+1);
+
+ indexLength=udata_readInt32(ds, inIndexes[URES_INDEX_LENGTH])&0xff;
+ if(indexLength<=URES_INDEX_MAX_TABLE_LENGTH) {
+ udata_printError(ds, "ures_swap(): too few indexes for a 1.1+ resource bundle\n");
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ keysBottom=1+indexLength;
+ keysTop=udata_readInt32(ds, inIndexes[URES_INDEX_KEYS_TOP]);
+ if(indexLength>URES_INDEX_16BIT_TOP) {
+ resBottom=udata_readInt32(ds, inIndexes[URES_INDEX_16BIT_TOP]);
+ } else {
+ resBottom=keysTop;
+ }
+ top=udata_readInt32(ds, inIndexes[URES_INDEX_BUNDLE_TOP]);
+ maxTableLength=udata_readInt32(ds, inIndexes[URES_INDEX_MAX_TABLE_LENGTH]);
+
+ if(0<=bundleLength && bundleLength<top) {
+ udata_printError(ds, "ures_swap(): resource top %d exceeds bundle length %d\n",
+ top, bundleLength);
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ if(keysTop>(1+indexLength)) {
+ tempTable.localKeyLimit=keysTop<<2;
+ } else {
+ tempTable.localKeyLimit=0;
+ }
+
+ if(length>=0) {
+ Resource *outBundle=(Resource *)((char *)outData+headerSize);
+
+ /* track which resources we have already swapped */
+ uint32_t stackResFlags[STACK_ROW_CAPACITY];
+ int32_t resFlagsLength;
+
+ /*
+ * We need one bit per 4 resource bundle bytes so that we can track
+ * every possible Resource for whether we have swapped it already.
+ * Multiple Resource words can refer to the same bundle offsets
+ * for sharing identical values.
+ * We could optimize this by allocating only for locations above
+ * where Resource values are stored (above keys & strings).
+ */
+ resFlagsLength=(length+31)>>5; /* number of bytes needed */
+ resFlagsLength=(resFlagsLength+3)&~3; /* multiple of 4 bytes for uint32_t */
+ if(resFlagsLength<=(int32_t)sizeof(stackResFlags)) {
+ tempTable.resFlags=stackResFlags;
+ } else {
+ tempTable.resFlags=(uint32_t *)uprv_malloc(resFlagsLength);
+ if(tempTable.resFlags==NULL) {
+ udata_printError(ds, "ures_swap(): unable to allocate memory for tracking resources\n");
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ }
+ uprv_memset(tempTable.resFlags, 0, resFlagsLength);
+
+ /* copy the bundle for binary and inaccessible data */
+ if(inData!=outData) {
+ uprv_memcpy(outBundle, inBundle, 4*top);
+ }
+
+ /* swap the key strings, but not the padding bytes (0xaa) after the last string and its NUL */
+ udata_swapInvStringBlock(ds, inBundle+keysBottom, 4*(keysTop-keysBottom),
+ outBundle+keysBottom, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ udata_printError(ds, "ures_swap().udata_swapInvStringBlock(keys[%d]) failed\n", 4*(keysTop-keysBottom));
+ return 0;
+ }
+
+ /* swap the 16-bit units (strings, table16, array16) */
+ if(keysTop<resBottom) {
+ ds->swapArray16(ds, inBundle+keysTop, (resBottom-keysTop)*4, outBundle+keysTop, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ udata_printError(ds, "ures_swap().swapArray16(16-bit units[%d]) failed\n", 2*(resBottom-keysTop));
+ return 0;
+ }
+ }
+
+ /* allocate the temporary table for sorting resource tables */
+ tempTable.keyChars=(const char *)outBundle; /* sort by outCharset */
+ if(tempTable.majorFormatVersion>1 || maxTableLength<=STACK_ROW_CAPACITY) {
+ tempTable.rows=rows;
+ tempTable.resort=resort;
+ } else {
+ tempTable.rows=(Row *)uprv_malloc(maxTableLength*sizeof(Row)+maxTableLength*4);
+ if(tempTable.rows==NULL) {
+ udata_printError(ds, "ures_swap(): unable to allocate memory for sorting tables (max length: %d)\n",
+ maxTableLength);
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ if(tempTable.resFlags!=stackResFlags) {
+ uprv_free(tempTable.resFlags);
+ }
+ return 0;
+ }
+ tempTable.resort=(int32_t *)(tempTable.rows+maxTableLength);
+ }
+
+ /* swap the resources */
+ ures_swapResource(ds, inBundle, outBundle, rootRes, NULL, &tempTable, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ udata_printError(ds, "ures_swapResource(root res=%08x) failed\n",
+ rootRes);
+ }
+
+ if(tempTable.rows!=rows) {
+ uprv_free(tempTable.rows);
+ }
+ if(tempTable.resFlags!=stackResFlags) {
+ uprv_free(tempTable.resFlags);
+ }
+
+ /* swap the root resource and indexes */
+ ds->swapArray32(ds, inBundle, keysBottom*4, outBundle, pErrorCode);
+ }
+
+ return headerSize+4*top;
+}
diff --git a/thirdparty/icu4c/common/uresdata.h b/thirdparty/icu4c/common/uresdata.h
new file mode 100644
index 0000000000..7c2152e57b
--- /dev/null
+++ b/thirdparty/icu4c/common/uresdata.h
@@ -0,0 +1,565 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 1999-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+******************************************************************************
+* file name: uresdata.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999dec08
+* created by: Markus W. Scherer
+* 06/24/02 weiv Added support for resource sharing
+*/
+
+#ifndef __RESDATA_H__
+#define __RESDATA_H__
+
+#include "unicode/utypes.h"
+#include "unicode/udata.h"
+#include "unicode/ures.h"
+#include "putilimp.h"
+#include "udataswp.h"
+
+/**
+ * Numeric constants for internal-only types of resource items.
+ * These must use different numeric values than UResType constants
+ * because they are used together.
+ * Internal types are never returned by ures_getType().
+ */
+typedef enum {
+ /** Include a negative value so that the compiler uses the same int type as for UResType. */
+ URES_INTERNAL_NONE=-1,
+
+ /** Resource type constant for tables with 32-bit count, key offsets and values. */
+ URES_TABLE32=4,
+
+ /**
+ * Resource type constant for tables with 16-bit count, key offsets and values.
+ * All values are URES_STRING_V2 strings.
+ */
+ URES_TABLE16=5,
+
+ /** Resource type constant for 16-bit Unicode strings in formatVersion 2. */
+ URES_STRING_V2=6,
+
+ /**
+ * Resource type constant for arrays with 16-bit count and values.
+ * All values are URES_STRING_V2 strings.
+ */
+ URES_ARRAY16=9
+
+ /* Resource type 15 is not defined but effectively used by RES_BOGUS=0xffffffff. */
+} UResInternalType;
+
+/*
+ * A Resource is a 32-bit value that has 2 bit fields:
+ * 31..28 4-bit type, see enum below
+ * 27..0 28-bit four-byte-offset or value according to the type
+ */
+typedef uint32_t Resource;
+
+#define RES_BOGUS 0xffffffff
+#define RES_MAX_OFFSET 0x0fffffff
+
+#define RES_GET_TYPE(res) ((int32_t)((res)>>28UL))
+#define RES_GET_OFFSET(res) ((res)&0x0fffffff)
+#define RES_GET_POINTER(pRoot, res) ((pRoot)+RES_GET_OFFSET(res))
+
+/* get signed and unsigned integer values directly from the Resource handle
+ * NOTE: For proper logging, please use the res_getInt() constexpr
+ */
+#if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
+# define RES_GET_INT_NO_TRACE(res) (((int32_t)((res)<<4L))>>4L)
+#else
+# define RES_GET_INT_NO_TRACE(res) (int32_t)(((res)&0x08000000) ? (res)|0xf0000000 : (res)&0x07ffffff)
+#endif
+
+#define RES_GET_UINT_NO_TRACE(res) ((res)&0x0fffffff)
+
+#define URES_IS_ARRAY(type) ((int32_t)(type)==URES_ARRAY || (int32_t)(type)==URES_ARRAY16)
+#define URES_IS_TABLE(type) ((int32_t)(type)==URES_TABLE || (int32_t)(type)==URES_TABLE16 || (int32_t)(type)==URES_TABLE32)
+#define URES_IS_CONTAINER(type) (URES_IS_TABLE(type) || URES_IS_ARRAY(type))
+
+#define URES_MAKE_RESOURCE(type, offset) (((Resource)(type)<<28)|(Resource)(offset))
+#define URES_MAKE_EMPTY_RESOURCE(type) ((Resource)(type)<<28)
+
+/* indexes[] value names; indexes are generally 32-bit (Resource) indexes */
+enum {
+ /**
+ * [0] contains the length of indexes[]
+ * which is at most URES_INDEX_TOP of the latest format version
+ *
+ * formatVersion==1: all bits contain the length of indexes[]
+ * but the length is much less than 0xff;
+ * formatVersion>1:
+ * only bits 7..0 contain the length of indexes[],
+ * bits 31..8 are reserved and set to 0
+ * formatVersion>=3:
+ * bits 31..8 poolStringIndexLimit bits 23..0
+ */
+ URES_INDEX_LENGTH,
+ /**
+ * [1] contains the top of the key strings,
+ * same as the bottom of resources or UTF-16 strings, rounded up
+ */
+ URES_INDEX_KEYS_TOP,
+ /** [2] contains the top of all resources */
+ URES_INDEX_RESOURCES_TOP,
+ /**
+ * [3] contains the top of the bundle,
+ * in case it were ever different from [2]
+ */
+ URES_INDEX_BUNDLE_TOP,
+ /** [4] max. length of any table */
+ URES_INDEX_MAX_TABLE_LENGTH,
+ /**
+ * [5] attributes bit set, see URES_ATT_* (new in formatVersion 1.2)
+ *
+ * formatVersion>=3:
+ * bits 31..16 poolStringIndex16Limit
+ * bits 15..12 poolStringIndexLimit bits 27..24
+ */
+ URES_INDEX_ATTRIBUTES,
+ /**
+ * [6] top of the 16-bit units (UTF-16 string v2 UChars, URES_TABLE16, URES_ARRAY16),
+ * rounded up (new in formatVersion 2.0, ICU 4.4)
+ */
+ URES_INDEX_16BIT_TOP,
+ /** [7] checksum of the pool bundle (new in formatVersion 2.0, ICU 4.4) */
+ URES_INDEX_POOL_CHECKSUM,
+ URES_INDEX_TOP
+};
+
+/*
+ * Nofallback attribute, attribute bit 0 in indexes[URES_INDEX_ATTRIBUTES].
+ * New in formatVersion 1.2 (ICU 3.6).
+ *
+ * If set, then this resource bundle is a standalone bundle.
+ * If not set, then the bundle participates in locale fallback, eventually
+ * all the way to the root bundle.
+ * If indexes[] is missing or too short, then the attribute cannot be determined
+ * reliably. Dependency checking should ignore such bundles, and loading should
+ * use fallbacks.
+ */
+#define URES_ATT_NO_FALLBACK 1
+
+/*
+ * Attributes for bundles that are, or use, a pool bundle.
+ * A pool bundle provides key strings that are shared among several other bundles
+ * to reduce their total size.
+ * New in formatVersion 2 (ICU 4.4).
+ */
+#define URES_ATT_IS_POOL_BUNDLE 2
+#define URES_ATT_USES_POOL_BUNDLE 4
+
+/*
+ * File format for .res resource bundle files
+ *
+ * ICU 56: New in formatVersion 3 compared with 2: -------------
+ *
+ * Resource bundles can optionally use shared string-v2 values
+ * stored in the pool bundle.
+ * If so, then the indexes[] contain two new values
+ * in previously-unused bits of existing indexes[] slots:
+ * - poolStringIndexLimit:
+ * String-v2 offsets (in 32-bit Resource words) below this limit
+ * point to pool bundle string-v2 values.
+ * - poolStringIndex16Limit:
+ * Resource16 string-v2 offsets below this limit
+ * point to pool bundle string-v2 values.
+ * Guarantee: poolStringIndex16Limit <= poolStringIndexLimit
+ *
+ * The local bundle's poolStringIndexLimit is greater than
+ * any pool bundle string index used in the local bundle.
+ * The poolStringIndexLimit should not be greater than
+ * the maximum possible pool bundle string index.
+ *
+ * The maximum possible pool bundle string index is the index to the last non-NUL
+ * pool string character, due to suffix sharing.
+ *
+ * In the pool bundle, there is no structure that lists the strings.
+ * (The root resource is an empty Table.)
+ * If the strings need to be enumerated (as genrb --usePoolBundle does),
+ * then iterate through the pool bundle's 16-bit-units array from the beginning.
+ * Stop at the end of the array, or when an explicit or implicit string length
+ * would lead beyond the end of the array,
+ * or when an apparent string is not NUL-terminated.
+ * (Future genrb version might terminate the strings with
+ * what looks like a large explicit string length.)
+ *
+ * ICU 4.4: New in formatVersion 2 compared with 1.3: -------------
+ *
+ * Three new resource types -- String-v2, Table16 and Array16 -- have their
+ * values stored in a new array of 16-bit units between the table key strings
+ * and the start of the other resources.
+ *
+ * genrb eliminates duplicates among Unicode string-v2 values.
+ * Multiple Unicode strings may use the same offset and string data,
+ * or a short string may point to the suffix of a longer string. ("Suffix sharing")
+ * For example, one string "abc" may be reused for another string "bc" by pointing
+ * to the second character. (Short strings-v2 are NUL-terminated
+ * and not preceded by an explicit length value.)
+ *
+ * It is allowed for all resource types to share values.
+ * The swapper code (ures_swap()) has been modified so that it swaps each item
+ * exactly once.
+ *
+ * A resource bundle may use a special pool bundle. Some or all of the table key strings
+ * of the using-bundle are omitted, and the key string offsets for such key strings refer
+ * to offsets in the pool bundle.
+ * The using-bundle's and the pool-bundle's indexes[URES_INDEX_POOL_CHECKSUM] values
+ * must match.
+ * Two bits in indexes[URES_INDEX_ATTRIBUTES] indicate whether a resource bundle
+ * is or uses a pool bundle.
+ *
+ * Table key strings must be compared in ASCII order, even if they are not
+ * stored in ASCII.
+ *
+ * New in formatVersion 1.3 compared with 1.2: -------------
+ *
+ * genrb eliminates duplicates among key strings.
+ * Multiple table items may share one key string, or one item may point
+ * to the suffix of another's key string. ("Suffix sharing")
+ * For example, one key "abc" may be reused for another key "bc" by pointing
+ * to the second character. (Key strings are NUL-terminated.)
+ *
+ * -------------
+ *
+ * An ICU4C resource bundle file (.res) is a binary, memory-mappable file
+ * with nested, hierarchical data structures.
+ * It physically contains the following:
+ *
+ * Resource root; -- 32-bit Resource item, root item for this bundle's tree;
+ * currently, the root item must be a table or table32 resource item
+ * int32_t indexes[indexes[0]]; -- array of indexes for friendly
+ * reading and swapping; see URES_INDEX_* above
+ * new in formatVersion 1.1 (ICU 2.8)
+ * char keys[]; -- characters for key strings
+ * (formatVersion 1.0: up to 65k of characters; 1.1: <2G)
+ * (minus the space for root and indexes[]),
+ * which consist of invariant characters (ASCII/EBCDIC) and are NUL-terminated;
+ * padded to multiple of 4 bytes for 4-alignment of the following data
+ * uint16_t 16BitUnits[]; -- resources that are stored entirely as sequences of 16-bit units
+ * (new in formatVersion 2/ICU 4.4)
+ * data is indexed by the offset values in 16-bit resource types,
+ * with offset 0 pointing to the beginning of this array;
+ * there is a 0 at offset 0, for empty resources;
+ * padded to multiple of 4 bytes for 4-alignment of the following data
+ * data; -- data directly and indirectly indexed by the root item;
+ * the structure is determined by walking the tree
+ *
+ * Each resource bundle item has a 32-bit Resource handle (see typedef above)
+ * which contains the item type number in its upper 4 bits (31..28) and either
+ * an offset or a direct value in its lower 28 bits (27..0).
+ * The order of items is undefined and only determined by walking the tree.
+ * Leaves of the tree may be stored first or last or anywhere in between,
+ * and it is in theory possible to have unreferenced holes in the file.
+ *
+ * 16-bit-unit values:
+ * Starting with formatVersion 2/ICU 4.4, some resources are stored in a special
+ * array of 16-bit units. Each resource value is a sequence of 16-bit units,
+ * with no per-resource padding to a 4-byte boundary.
+ * 16-bit container types (Table16 and Array16) contain Resource16 values
+ * which are offsets to String-v2 resources in the same 16-bit-units array.
+ *
+ * Direct values:
+ * - Empty Unicode strings have an offset value of 0 in the Resource handle itself.
+ * - Starting with formatVersion 2/ICU 4.4, an offset value of 0 for
+ * _any_ resource type indicates an empty value.
+ * - Integer values are 28-bit values stored in the Resource handle itself;
+ * the interpretation of unsigned vs. signed integers is up to the application.
+ *
+ * All other types and values use 28-bit offsets to point to the item's data.
+ * The offset is an index to the first 32-bit word of the value, relative to the
+ * start of the resource data (i.e., the root item handle is at offset 0).
+ * To get byte offsets, the offset is multiplied by 4 (or shifted left by 2 bits).
+ * All resource item values are 4-aligned.
+ *
+ * New in formatVersion 2/ICU 4.4: Some types use offsets into the 16-bit-units array,
+ * indexing 16-bit units in that array.
+ *
+ * The structures (memory layouts) for the values for each item type are listed
+ * in the table below.
+ *
+ * Nested, hierarchical structures: -------------
+ *
+ * Table items contain key-value pairs where the keys are offsets to char * key strings.
+ * The values of these pairs are either Resource handles or
+ * offsets into the 16-bit-units array, depending on the table type.
+ *
+ * Array items are simple vectors of Resource handles,
+ * or of offsets into the 16-bit-units array, depending on the array type.
+ *
+ * Table key string offsets: -------
+ *
+ * Key string offsets are relative to the start of the resource data (of the root handle),
+ * i.e., the first string has an offset of 4+sizeof(indexes).
+ * (After the 4-byte root handle and after the indexes array.)
+ *
+ * If the resource bundle uses a pool bundle, then some key strings are stored
+ * in the pool bundle rather than in the local bundle itself.
+ * - In a Table or Table16, the 16-bit key string offset is local if it is
+ * less than indexes[URES_INDEX_KEYS_TOP]<<2.
+ * Otherwise, subtract indexes[URES_INDEX_KEYS_TOP]<<2 to get the offset into
+ * the pool bundle key strings.
+ * - In a Table32, the 32-bit key string offset is local if it is non-negative.
+ * Otherwise, reset bit 31 to get the pool key string offset.
+ *
+ * Unlike the local offset, the pool key offset is relative to
+ * the start of the key strings, not to the start of the bundle.
+ *
+ * An alias item is special (and new in ICU 2.4): --------------
+ *
+ * Its memory layout is just like for a UnicodeString, but at runtime it resolves to
+ * another resource bundle's item according to the path in the string.
+ * This is used to share items across bundles that are in different lookup/fallback
+ * chains (e.g., large collation data among zh_TW and zh_HK).
+ * This saves space (for large items) and maintenance effort (less duplication of data).
+ *
+ * --------------------------------------------------------------------------
+ *
+ * Resource types:
+ *
+ * Most resources have their values stored at four-byte offsets from the start
+ * of the resource data. These values are at least 4-aligned.
+ * Some resource values are stored directly in the offset field of the Resource itself.
+ * See UResType in unicode/ures.h for enumeration constants for Resource types.
+ *
+ * Some resources have their values stored as sequences of 16-bit units,
+ * at 2-byte offsets from the start of a contiguous 16-bit-unit array between
+ * the table key strings and the other resources. (new in formatVersion 2/ICU 4.4)
+ * At offset 0 of that array is a 16-bit zero value for empty 16-bit resources.
+ *
+ * Resource16 values in Table16 and Array16 are 16-bit offsets to String-v2
+ * resources, with the offsets relative to the start of the 16-bit-units array.
+ * Starting with formatVersion 3/ICU 56, if offset<poolStringIndex16Limit
+ * then use the pool bundle's 16-bit-units array,
+ * otherwise subtract that limit and use the local 16-bit-units array.
+ *
+ * Type Name Memory layout of values
+ * (in parentheses: scalar, non-offset values)
+ *
+ * 0 Unicode String: int32_t length, UChar[length], (UChar)0, (padding)
+ * or (empty string ("") if offset==0)
+ * 1 Binary: int32_t length, uint8_t[length], (padding)
+ * - the start of the bytes is 16-aligned -
+ * 2 Table: uint16_t count, uint16_t keyStringOffsets[count], (uint16_t padding), Resource[count]
+ * 3 Alias: (physically same value layout as string, new in ICU 2.4)
+ * 4 Table32: int32_t count, int32_t keyStringOffsets[count], Resource[count]
+ * (new in formatVersion 1.1/ICU 2.8)
+ * 5 Table16: uint16_t count, uint16_t keyStringOffsets[count], Resource16[count]
+ * (stored in the 16-bit-units array; new in formatVersion 2/ICU 4.4)
+ * 6 Unicode String-v2:UChar[length], (UChar)0; length determined by the first UChar:
+ * - if first is not a trail surrogate, then the length is implicit
+ * and u_strlen() needs to be called
+ * - if first<0xdfef then length=first&0x3ff (and skip first)
+ * - if first<0xdfff then length=((first-0xdfef)<<16) | second UChar
+ * - if first==0xdfff then length=((second UChar)<<16) | third UChar
+ * (stored in the 16-bit-units array; new in formatVersion 2/ICU 4.4)
+ *
+ * Starting with formatVersion 3/ICU 56, if offset<poolStringIndexLimit
+ * then use the pool bundle's 16-bit-units array,
+ * otherwise subtract that limit and use the local 16-bit-units array.
+ * (Note different limits for Resource16 vs. Resource.)
+ *
+ * 7 Integer: (28-bit offset is integer value)
+ * 8 Array: int32_t count, Resource[count]
+ * 9 Array16: uint16_t count, Resource16[count]
+ * (stored in the 16-bit-units array; new in formatVersion 2/ICU 4.4)
+ * 14 Integer Vector: int32_t length, int32_t[length]
+ * 15 Reserved: This value denotes special purpose resources and is for internal use.
+ *
+ * Note that there are 3 types with data vector values:
+ * - Vectors of 8-bit bytes stored as type Binary.
+ * - Vectors of 16-bit words stored as type Unicode String or Unicode String-v2
+ * (no value restrictions, all values 0..ffff allowed!).
+ * - Vectors of 32-bit words stored as type Integer Vector.
+ */
+
+/*
+ * Structure for a single, memory-mapped ResourceBundle.
+ */
+typedef struct ResourceData {
+ UDataMemory *data;
+ const int32_t *pRoot;
+ const uint16_t *p16BitUnits;
+ const char *poolBundleKeys;
+ Resource rootRes;
+ int32_t localKeyLimit;
+ const uint16_t *poolBundleStrings;
+ int32_t poolStringIndexLimit;
+ int32_t poolStringIndex16Limit;
+ UBool noFallback; /* see URES_ATT_NO_FALLBACK */
+ UBool isPoolBundle;
+ UBool usesPoolBundle;
+ UBool useNativeStrcmp;
+} ResourceData;
+
+/*
+ * Read a resource bundle from memory.
+ */
+U_CAPI void U_EXPORT2
+res_read(ResourceData *pResData,
+ const UDataInfo *pInfo, const void *inBytes, int32_t length,
+ UErrorCode *errorCode);
+
+/*
+ * Load a resource bundle file.
+ * The ResourceData structure must be allocated externally.
+ */
+U_CFUNC void
+res_load(ResourceData *pResData,
+ const char *path, const char *name, UErrorCode *errorCode);
+
+/*
+ * Release a resource bundle file.
+ * This does not release the ResourceData structure itself.
+ */
+U_CFUNC void
+res_unload(ResourceData *pResData);
+
+U_CAPI UResType U_EXPORT2
+res_getPublicType(Resource res);
+
+///////////////////////////////////////////////////////////////////////////
+// To enable tracing, use the inline versions of the res_get* functions. //
+///////////////////////////////////////////////////////////////////////////
+
+/*
+ * Return a pointer to a zero-terminated, const UChar* string
+ * and set its length in *pLength.
+ * Returns NULL if not found.
+ */
+U_CAPI const UChar * U_EXPORT2
+res_getStringNoTrace(const ResourceData *pResData, Resource res, int32_t *pLength);
+
+U_CAPI const uint8_t * U_EXPORT2
+res_getBinaryNoTrace(const ResourceData *pResData, Resource res, int32_t *pLength);
+
+U_CAPI const int32_t * U_EXPORT2
+res_getIntVectorNoTrace(const ResourceData *pResData, Resource res, int32_t *pLength);
+
+U_CAPI const UChar * U_EXPORT2
+res_getAlias(const ResourceData *pResData, Resource res, int32_t *pLength);
+
+U_CAPI Resource U_EXPORT2
+res_getResource(const ResourceData *pResData, const char *key);
+
+U_CAPI int32_t U_EXPORT2
+res_countArrayItems(const ResourceData *pResData, Resource res);
+
+U_CAPI Resource U_EXPORT2
+res_getArrayItem(const ResourceData *pResData, Resource array, int32_t indexS);
+
+U_CAPI Resource U_EXPORT2
+res_getTableItemByIndex(const ResourceData *pResData, Resource table, int32_t indexS, const char ** key);
+
+U_CAPI Resource U_EXPORT2
+res_getTableItemByKey(const ResourceData *pResData, Resource table, int32_t *indexS, const char* * key);
+
+/**
+ * Iterates over the path and stops when a scalar resource is found.
+ * Follows aliases.
+ * Modifies the contents of *path (replacing separators with NULs),
+ * and also moves *path forward while it finds items.
+ *
+ * @param path input: "CollationElements/Sequence" or "zoneStrings/3/2" etc.;
+ * output: points to the part that has not yet been processed
+ */
+U_CFUNC Resource res_findResource(const ResourceData *pResData, Resource r,
+ char** path, const char** key);
+
+#ifdef __cplusplus
+
+#include "resource.h"
+#include "restrace.h"
+
+U_NAMESPACE_BEGIN
+
+inline const UChar* res_getString(const ResourceTracer& traceInfo,
+ const ResourceData *pResData, Resource res, int32_t *pLength) {
+ traceInfo.trace("string");
+ return res_getStringNoTrace(pResData, res, pLength);
+}
+
+inline const uint8_t* res_getBinary(const ResourceTracer& traceInfo,
+ const ResourceData *pResData, Resource res, int32_t *pLength) {
+ traceInfo.trace("binary");
+ return res_getBinaryNoTrace(pResData, res, pLength);
+}
+
+inline const int32_t* res_getIntVector(const ResourceTracer& traceInfo,
+ const ResourceData *pResData, Resource res, int32_t *pLength) {
+ traceInfo.trace("intvector");
+ return res_getIntVectorNoTrace(pResData, res, pLength);
+}
+
+inline int32_t res_getInt(const ResourceTracer& traceInfo, Resource res) {
+ traceInfo.trace("int");
+ return RES_GET_INT_NO_TRACE(res);
+}
+
+inline uint32_t res_getUInt(const ResourceTracer& traceInfo, Resource res) {
+ traceInfo.trace("uint");
+ return RES_GET_UINT_NO_TRACE(res);
+}
+
+class ResourceDataValue : public ResourceValue {
+public:
+ ResourceDataValue() :
+ res(static_cast<Resource>(URES_NONE)),
+ fTraceInfo() {}
+ virtual ~ResourceDataValue();
+
+ void setData(const ResourceData *data) {
+ resData = *data;
+ }
+
+ void setResource(Resource r, ResourceTracer&& traceInfo) {
+ res = r;
+ fTraceInfo = traceInfo;
+ }
+
+ const ResourceData &getData() const { return resData; }
+ virtual UResType getType() const;
+ virtual const UChar *getString(int32_t &length, UErrorCode &errorCode) const;
+ virtual const UChar *getAliasString(int32_t &length, UErrorCode &errorCode) const;
+ virtual int32_t getInt(UErrorCode &errorCode) const;
+ virtual uint32_t getUInt(UErrorCode &errorCode) const;
+ virtual const int32_t *getIntVector(int32_t &length, UErrorCode &errorCode) const;
+ virtual const uint8_t *getBinary(int32_t &length, UErrorCode &errorCode) const;
+ virtual ResourceArray getArray(UErrorCode &errorCode) const;
+ virtual ResourceTable getTable(UErrorCode &errorCode) const;
+ virtual UBool isNoInheritanceMarker() const;
+ virtual int32_t getStringArray(UnicodeString *dest, int32_t capacity,
+ UErrorCode &errorCode) const;
+ virtual int32_t getStringArrayOrStringAsArray(UnicodeString *dest, int32_t capacity,
+ UErrorCode &errorCode) const;
+ virtual UnicodeString getStringOrFirstOfArray(UErrorCode &errorCode) const;
+
+private:
+ // TODO(ICU-20769): If UResourceBundle.fResData becomes a pointer,
+ // then remove this value field again and just store a pResData pointer.
+ ResourceData resData;
+ Resource res;
+ ResourceTracer fTraceInfo;
+};
+
+U_NAMESPACE_END
+
+#endif /* __cplusplus */
+
+/**
+ * Swap an ICU resource bundle. See udataswp.h.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+ures_swap(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+#endif
diff --git a/thirdparty/icu4c/common/uresimp.h b/thirdparty/icu4c/common/uresimp.h
new file mode 100644
index 0000000000..69d82566fe
--- /dev/null
+++ b/thirdparty/icu4c/common/uresimp.h
@@ -0,0 +1,364 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2000-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+
+#ifndef URESIMP_H
+#define URESIMP_H
+
+#include "unicode/ures.h"
+#include "unicode/utypes.h"
+
+#include "uresdata.h"
+
+#define kRootLocaleName "root"
+#define kPoolBundleName "pool"
+
+/*
+ The default minor version and the version separator must be exactly one
+ character long.
+*/
+
+#define kDefaultMinorVersion "0"
+#define kVersionSeparator "."
+#define kVersionTag "Version"
+
+#define MAGIC1 19700503
+#define MAGIC2 19641227
+
+#define URES_MAX_ALIAS_LEVEL 256
+#define URES_MAX_BUFFER_SIZE 256
+
+#define EMPTY_SET 0x2205
+
+struct UResourceDataEntry;
+typedef struct UResourceDataEntry UResourceDataEntry;
+
+/*
+ * Note: If we wanted to make this structure smaller, then we could try
+ * to use one UResourceDataEntry pointer for fAlias and fPool, with a separate
+ * flag to distinguish whether this struct is for a real bundle with a pool,
+ * or for an alias entry for which we won't use the pool after loading.
+ */
+struct UResourceDataEntry {
+ char *fName; /* name of the locale for bundle - still to decide whether it is original or fallback */
+ char *fPath; /* path to bundle - used for distinguishing between resources with the same name */
+ UResourceDataEntry *fParent; /*next resource in fallback chain*/
+ UResourceDataEntry *fAlias;
+ UResourceDataEntry *fPool;
+ ResourceData fData; /* data for low level access */
+ char fNameBuffer[3]; /* A small buffer of free space for fName. The free space is due to struct padding. */
+ uint32_t fCountExisting; /* how much is this resource used */
+ UErrorCode fBogus;
+ /* int32_t fHashKey;*/ /* for faster access in the hashtable */
+};
+
+#define RES_BUFSIZE 64
+#define RES_PATH_SEPARATOR '/'
+#define RES_PATH_SEPARATOR_S "/"
+
+struct UResourceBundle {
+ const char *fKey; /*tag*/
+ UResourceDataEntry *fData; /*for low-level access*/
+ char *fVersion;
+ UResourceDataEntry *fTopLevelData; /* for getting the valid locale */
+ char *fResPath; /* full path to the resource: "zh_TW/CollationElements/Sequence" */
+ // TODO(ICU-20769): Try to change the by-value fResData into a pointer,
+ // with the struct in only one place for each bundle.
+ // Also replace class ResourceDataValue.resData with a pResData pointer again.
+ ResourceData fResData;
+ char fResBuf[RES_BUFSIZE];
+ int32_t fResPathLen;
+ Resource fRes;
+ UBool fHasFallback;
+ UBool fIsTopLevel;
+ uint32_t fMagic1; /* For determining if it's a stack object */
+ uint32_t fMagic2; /* For determining if it's a stack object */
+ int32_t fIndex;
+ int32_t fSize;
+
+ /*const UResourceBundle *fParentRes;*/ /* needed to get the actual locale for a child resource */
+};
+
+U_CAPI void U_EXPORT2 ures_initStackObject(UResourceBundle* resB);
+
+#ifdef __cplusplus
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class StackUResourceBundle
+ * "Smart pointer" like class, closes a UResourceBundle via ures_close().
+ *
+ * This code:
+ *
+ * StackUResourceBundle bundle;
+ * foo(bundle.getAlias());
+ *
+ * Is equivalent to this code:
+ *
+ * UResourceBundle bundle;
+ * ures_initStackObject(&bundle);
+ * foo(&bundle);
+ * ures_close(&bundle);
+ *
+ * @see LocalUResourceBundlePointer
+ * @internal
+ */
+class U_COMMON_API StackUResourceBundle {
+public:
+ // No heap allocation. Use only on the stack.
+ static void* U_EXPORT2 operator new(size_t) U_NOEXCEPT = delete;
+ static void* U_EXPORT2 operator new[](size_t) U_NOEXCEPT = delete;
+#if U_HAVE_PLACEMENT_NEW
+ static void* U_EXPORT2 operator new(size_t, void*) U_NOEXCEPT = delete;
+#endif
+
+ StackUResourceBundle();
+ ~StackUResourceBundle();
+
+ UResourceBundle* getAlias() { return &bundle; }
+
+ UResourceBundle& ref() { return bundle; }
+ const UResourceBundle& ref() const { return bundle; }
+
+ StackUResourceBundle(const StackUResourceBundle&) = delete;
+ StackUResourceBundle& operator=(const StackUResourceBundle&) = delete;
+
+ StackUResourceBundle(StackUResourceBundle&&) = delete;
+ StackUResourceBundle& operator=(StackUResourceBundle&&) = delete;
+
+private:
+ UResourceBundle bundle;
+};
+
+U_NAMESPACE_END
+
+#endif /* __cplusplus */
+
+/**
+ * Opens a resource bundle for the locale;
+ * if there is not even a base language bundle, then loads the root bundle;
+ * never falls back to the default locale.
+ *
+ * This is used for algorithms that have good pan-Unicode default behavior,
+ * such as case mappings, collation, and segmentation (BreakIterator).
+ */
+U_CAPI UResourceBundle* U_EXPORT2
+ures_openNoDefault(const char* path, const char* localeID, UErrorCode* status);
+
+/* Some getters used by the copy constructor */
+U_CFUNC const char* ures_getName(const UResourceBundle* resB);
+#ifdef URES_DEBUG
+U_CFUNC const char* ures_getPath(const UResourceBundle* resB);
+/**
+ * If anything was in the RB cache, dump it to the screen.
+ * @return true if there was anything into the cache
+ */
+U_CAPI UBool U_EXPORT2 ures_dumpCacheContents(void);
+#endif
+/*U_CFUNC void ures_appendResPath(UResourceBundle *resB, const char* toAdd, int32_t lenToAdd);*/
+/*U_CFUNC void ures_setResPath(UResourceBundle *resB, const char* toAdd);*/
+/*U_CFUNC void ures_freeResPath(UResourceBundle *resB);*/
+
+/* Candidates for export */
+U_CFUNC UResourceBundle *ures_copyResb(UResourceBundle *r, const UResourceBundle *original, UErrorCode *status);
+
+/**
+ * Returns a resource that can be located using the pathToResource argument. One needs optional package, locale
+ * and path inside the locale, for example: "/myData/en/zoneStrings/3". Keys and indexes are supported. Keys
+ * need to reference data in named structures, while indexes can reference both named and anonymous resources.
+ * Features a fill-in parameter.
+ *
+ * Note, this function does NOT have a syntax for specifying items within a tree. May want to consider a
+ * syntax that delineates between package/tree and resource.
+ *
+ * @param pathToResource a path that will lead to the requested resource
+ * @param fillIn if NULL a new UResourceBundle struct is allocated and must be deleted by the caller.
+ * Alternatively, you can supply a struct to be filled by this function.
+ * @param status fills in the outgoing error code.
+ * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must delete it
+ */
+U_CAPI UResourceBundle* U_EXPORT2
+ures_findResource(const char* pathToResource,
+ UResourceBundle *fillIn, UErrorCode *status);
+
+/**
+ * Returns a sub resource that can be located using the pathToResource argument. One needs a path inside
+ * the supplied resource, for example, if you have "en_US" resource bundle opened, you might ask for
+ * "zoneStrings/3". Keys and indexes are supported. Keys
+ * need to reference data in named structures, while indexes can reference both
+ * named and anonymous resources.
+ * Features a fill-in parameter.
+ *
+ * @param resourceBundle a resource
+ * @param pathToResource a path that will lead to the requested resource
+ * @param fillIn if NULL a new UResourceBundle struct is allocated and must be deleted by the caller.
+ * Alternatively, you can supply a struct to be filled by this function.
+ * @param status fills in the outgoing error code.
+ * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must delete it
+ */
+U_CAPI UResourceBundle* U_EXPORT2
+ures_findSubResource(const UResourceBundle *resB,
+ char* pathToResource,
+ UResourceBundle *fillIn, UErrorCode *status);
+
+/**
+ * Returns a functionally equivalent locale (considering keywords) for the specified keyword.
+ * @param result fillin for the equivalent locale
+ * @param resultCapacity capacity of the fillin buffer
+ * @param path path to the tree, or NULL for ICU data
+ * @param resName top level resource. Example: "collations"
+ * @param keyword locale keyword. Example: "collation"
+ * @param locid The requested locale
+ * @param isAvailable If non-null, pointer to fillin parameter that indicates whether the
+ * requested locale was available. The locale is defined as 'available' if it physically
+ * exists within the specified tree.
+ * @param omitDefault if true, omit keyword and value if default. 'de_DE\@collation=standard' -> 'de_DE'
+ * @param status error code
+ * @return the actual buffer size needed for the full locale. If it's greater
+ * than resultCapacity, the returned full name will be truncated and an error code will be returned.
+ */
+U_CAPI int32_t U_EXPORT2
+ures_getFunctionalEquivalent(char *result, int32_t resultCapacity,
+ const char *path, const char *resName, const char *keyword, const char *locid,
+ UBool *isAvailable, UBool omitDefault, UErrorCode *status);
+
+/**
+ * Given a tree path and keyword, return a string enumeration of all possible values for that keyword.
+ * @param path path to the tree, or NULL for ICU data
+ * @param keyword a particular keyword to consider, must match a top level resource name
+ * within the tree.
+ * @param status error code
+ */
+U_CAPI UEnumeration* U_EXPORT2
+ures_getKeywordValues(const char *path, const char *keyword, UErrorCode *status);
+
+
+/**
+ * Get a resource with multi-level fallback. Normally only the top level resources will
+ * fallback to its parent. This performs fallback on subresources. For example, when a table
+ * is defined in a resource bundle and a parent resource bundle, normally no fallback occurs
+ * on the sub-resources because the table is defined in the current resource bundle, but this
+ * function can perform fallback on the sub-resources of the table.
+ * @param resB a resource
+ * @param inKey a key associated with the requested resource
+ * @param fillIn if NULL a new UResourceBundle struct is allocated and must be deleted by the caller.
+ * Alternatively, you can supply a struct to be filled by this function.
+ * @param status: fills in the outgoing error code
+ * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ * could be a non-failing error
+ * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must delete it
+ */
+U_CAPI UResourceBundle* U_EXPORT2
+ures_getByKeyWithFallback(const UResourceBundle *resB,
+ const char* inKey,
+ UResourceBundle *fillIn,
+ UErrorCode *status);
+
+
+/**
+ * Get a String with multi-level fallback. Normally only the top level resources will
+ * fallback to its parent. This performs fallback on subresources. For example, when a table
+ * is defined in a resource bundle and a parent resource bundle, normally no fallback occurs
+ * on the sub-resources because the table is defined in the current resource bundle, but this
+ * function can perform fallback on the sub-resources of the table.
+ * @param resB a resource
+ * @param inKey a key associated with the requested resource
+ * @param status: fills in the outgoing error code
+ * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ * could be a non-failing error
+ * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must delete it
+ */
+U_CAPI const UChar* U_EXPORT2
+ures_getStringByKeyWithFallback(const UResourceBundle *resB,
+ const char* inKey,
+ int32_t* len,
+ UErrorCode *status);
+
+#ifdef __cplusplus
+
+U_CAPI void U_EXPORT2
+ures_getValueWithFallback(const UResourceBundle *bundle, const char *path,
+ UResourceBundle *tempFillIn,
+ icu::ResourceDataValue &value, UErrorCode &errorCode);
+
+U_CAPI void U_EXPORT2
+ures_getAllItemsWithFallback(const UResourceBundle *bundle, const char *path,
+ icu::ResourceSink &sink, UErrorCode &errorCode);
+
+#endif /* __cplusplus */
+
+/**
+ * Get a version number by key
+ * @param resB bundle containing version number
+ * @param key the key for the version number
+ * @param ver fillin for the version number
+ * @param status error code
+ */
+U_CAPI void U_EXPORT2
+ures_getVersionByKey(const UResourceBundle *resB,
+ const char *key,
+ UVersionInfo ver,
+ UErrorCode *status);
+
+
+/**
+ * Internal function.
+ * Return the version number associated with this ResourceBundle as a string.
+ *
+ * @param resourceBundle The resource bundle for which the version is checked.
+ * @return A version number string as specified in the resource bundle or its parent.
+ * The caller does not own this string.
+ * @see ures_getVersion
+ */
+U_CAPI const char* U_EXPORT2
+ures_getVersionNumberInternal(const UResourceBundle *resourceBundle);
+
+/**
+ * Return the name of the Locale associated with this ResourceBundle. This API allows
+ * you to query for the real locale of the resource. For example, if you requested
+ * "en_US_CALIFORNIA" and only "en_US" bundle exists, "en_US" will be returned.
+ * For subresources, the locale where this resource comes from will be returned.
+ * If fallback has occured, getLocale will reflect this.
+ *
+ * This internal version avoids deprecated-warnings in ICU code.
+ *
+ * @param resourceBundle resource bundle in question
+ * @param status just for catching illegal arguments
+ * @return A Locale name
+ */
+U_CAPI const char* U_EXPORT2
+ures_getLocaleInternal(const UResourceBundle* resourceBundle,
+ UErrorCode* status);
+
+/**
+ * Same as ures_openDirect() but uses the fill-in parameter instead of allocating a new bundle.
+ *
+ * @param r The existing UResourceBundle to fill in. If NULL then status will be
+ * set to U_ILLEGAL_ARGUMENT_ERROR.
+ * @param packageName The packageName and locale together point to an ICU udata object,
+ * as defined by <code> udata_open( packageName, "res", locale, err) </code>
+ * or equivalent. Typically, packageName will refer to a (.dat) file, or to
+ * a package registered with udata_setAppData(). Using a full file or directory
+ * pathname for packageName is deprecated. If NULL, ICU data will be used.
+ * @param locale specifies the locale for which we want to open the resource
+ * if NULL, the default locale will be used. If strlen(locale) == 0
+ * root locale will be used.
+ * @param status The error code.
+ * @see ures_openDirect
+ * @internal
+ */
+U_CAPI void U_EXPORT2
+ures_openDirectFillIn(UResourceBundle *r,
+ const char *packageName,
+ const char *locale,
+ UErrorCode *status);
+
+#endif /*URESIMP_H*/
diff --git a/thirdparty/icu4c/common/ureslocs.h b/thirdparty/icu4c/common/ureslocs.h
new file mode 100644
index 0000000000..f7c3344ef2
--- /dev/null
+++ b/thirdparty/icu4c/common/ureslocs.h
@@ -0,0 +1,27 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2009-2014 International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+
+#ifndef __URESLOCS_H__
+#define __URESLOCS_H__
+
+#include "unicode/utypes.h"
+#include "unicode/udata.h"
+
+U_CDECL_BEGIN
+
+
+#define U_ICUDATA_LANG U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "lang"
+#define U_ICUDATA_REGION U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "region"
+#define U_ICUDATA_CURR U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "curr"
+#define U_ICUDATA_ZONE U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "zone"
+#define U_ICUDATA_UNIT U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "unit"
+
+U_CDECL_END
+
+#endif
diff --git a/thirdparty/icu4c/common/usc_impl.cpp b/thirdparty/icu4c/common/usc_impl.cpp
new file mode 100644
index 0000000000..111029b974
--- /dev/null
+++ b/thirdparty/icu4c/common/usc_impl.cpp
@@ -0,0 +1,361 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1999-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File USC_IMPL.C
+*
+* Modification History:
+*
+* Date Name Description
+* 07/08/2002 Eric Mader Creation.
+******************************************************************************
+*/
+
+#include "unicode/uscript.h"
+#include "usc_impl.h"
+#include "cmemory.h"
+
+#define PAREN_STACK_DEPTH 32
+
+#define MOD(sp) ((sp) % PAREN_STACK_DEPTH)
+#define LIMIT_INC(sp) (((sp) < PAREN_STACK_DEPTH)? (sp) + 1 : PAREN_STACK_DEPTH)
+#define INC(sp,count) (MOD((sp) + (count)))
+#define INC1(sp) (INC(sp, 1))
+#define DEC(sp,count) (MOD((sp) + PAREN_STACK_DEPTH - (count)))
+#define DEC1(sp) (DEC(sp, 1))
+#define STACK_IS_EMPTY(scriptRun) ((scriptRun)->pushCount <= 0)
+#define STACK_IS_NOT_EMPTY(scriptRun) (! STACK_IS_EMPTY(scriptRun))
+#define TOP(scriptRun) ((scriptRun)->parenStack[(scriptRun)->parenSP])
+#define SYNC_FIXUP(scriptRun) ((scriptRun)->fixupCount = 0)
+
+struct ParenStackEntry
+{
+ int32_t pairIndex;
+ UScriptCode scriptCode;
+};
+
+struct UScriptRun
+{
+ int32_t textLength;
+ const UChar *textArray;
+
+ int32_t scriptStart;
+ int32_t scriptLimit;
+ UScriptCode scriptCode;
+
+ struct ParenStackEntry parenStack[PAREN_STACK_DEPTH];
+ int32_t parenSP;
+ int32_t pushCount;
+ int32_t fixupCount;
+};
+
+static int8_t highBit(int32_t value);
+
+static const UChar32 pairedChars[] = {
+ 0x0028, 0x0029, /* ascii paired punctuation */
+ 0x003c, 0x003e,
+ 0x005b, 0x005d,
+ 0x007b, 0x007d,
+ 0x00ab, 0x00bb, /* guillemets */
+ 0x2018, 0x2019, /* general punctuation */
+ 0x201c, 0x201d,
+ 0x2039, 0x203a,
+ 0x3008, 0x3009, /* chinese paired punctuation */
+ 0x300a, 0x300b,
+ 0x300c, 0x300d,
+ 0x300e, 0x300f,
+ 0x3010, 0x3011,
+ 0x3014, 0x3015,
+ 0x3016, 0x3017,
+ 0x3018, 0x3019,
+ 0x301a, 0x301b
+};
+
+static void push(UScriptRun *scriptRun, int32_t pairIndex, UScriptCode scriptCode)
+{
+ scriptRun->pushCount = LIMIT_INC(scriptRun->pushCount);
+ scriptRun->fixupCount = LIMIT_INC(scriptRun->fixupCount);
+
+ scriptRun->parenSP = INC1(scriptRun->parenSP);
+ scriptRun->parenStack[scriptRun->parenSP].pairIndex = pairIndex;
+ scriptRun->parenStack[scriptRun->parenSP].scriptCode = scriptCode;
+}
+
+static void pop(UScriptRun *scriptRun)
+{
+ if (STACK_IS_EMPTY(scriptRun)) {
+ return;
+ }
+
+ if (scriptRun->fixupCount > 0) {
+ scriptRun->fixupCount -= 1;
+ }
+
+ scriptRun->pushCount -= 1;
+ scriptRun->parenSP = DEC1(scriptRun->parenSP);
+
+ /* If the stack is now empty, reset the stack
+ pointers to their initial values.
+ */
+ if (STACK_IS_EMPTY(scriptRun)) {
+ scriptRun->parenSP = -1;
+ }
+}
+
+static void fixup(UScriptRun *scriptRun, UScriptCode scriptCode)
+{
+ int32_t fixupSP = DEC(scriptRun->parenSP, scriptRun->fixupCount);
+
+ while (scriptRun->fixupCount-- > 0) {
+ fixupSP = INC1(fixupSP);
+ scriptRun->parenStack[fixupSP].scriptCode = scriptCode;
+ }
+}
+
+static int8_t
+highBit(int32_t value)
+{
+ int8_t bit = 0;
+
+ if (value <= 0) {
+ return -32;
+ }
+
+ if (value >= 1 << 16) {
+ value >>= 16;
+ bit += 16;
+ }
+
+ if (value >= 1 << 8) {
+ value >>= 8;
+ bit += 8;
+ }
+
+ if (value >= 1 << 4) {
+ value >>= 4;
+ bit += 4;
+ }
+
+ if (value >= 1 << 2) {
+ value >>= 2;
+ bit += 2;
+ }
+
+ if (value >= 1 << 1) {
+ //value >>= 1;
+ bit += 1;
+ }
+
+ return bit;
+}
+
+static int32_t
+getPairIndex(UChar32 ch)
+{
+ int32_t pairedCharCount = UPRV_LENGTHOF(pairedChars);
+ int32_t pairedCharPower = 1 << highBit(pairedCharCount);
+ int32_t pairedCharExtra = pairedCharCount - pairedCharPower;
+
+ int32_t probe = pairedCharPower;
+ int32_t pairIndex = 0;
+
+ if (ch >= pairedChars[pairedCharExtra]) {
+ pairIndex = pairedCharExtra;
+ }
+
+ while (probe > (1 << 0)) {
+ probe >>= 1;
+
+ if (ch >= pairedChars[pairIndex + probe]) {
+ pairIndex += probe;
+ }
+ }
+
+ if (pairedChars[pairIndex] != ch) {
+ pairIndex = -1;
+ }
+
+ return pairIndex;
+}
+
+static UBool
+sameScript(UScriptCode scriptOne, UScriptCode scriptTwo)
+{
+ return scriptOne <= USCRIPT_INHERITED || scriptTwo <= USCRIPT_INHERITED || scriptOne == scriptTwo;
+}
+
+U_CAPI UScriptRun * U_EXPORT2
+uscript_openRun(const UChar *src, int32_t length, UErrorCode *pErrorCode)
+{
+ UScriptRun *result = NULL;
+
+ if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) {
+ return NULL;
+ }
+
+ result = (UScriptRun *)uprv_malloc(sizeof (UScriptRun));
+
+ if (result == NULL) {
+ *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+
+ uscript_setRunText(result, src, length, pErrorCode);
+
+ /* Release the UScriptRun if uscript_setRunText() returns an error */
+ if (U_FAILURE(*pErrorCode)) {
+ uprv_free(result);
+ result = NULL;
+ }
+
+ return result;
+}
+
+U_CAPI void U_EXPORT2
+uscript_closeRun(UScriptRun *scriptRun)
+{
+ if (scriptRun != NULL) {
+ uprv_free(scriptRun);
+ }
+}
+
+U_CAPI void U_EXPORT2
+uscript_resetRun(UScriptRun *scriptRun)
+{
+ if (scriptRun != NULL) {
+ scriptRun->scriptStart = 0;
+ scriptRun->scriptLimit = 0;
+ scriptRun->scriptCode = USCRIPT_INVALID_CODE;
+ scriptRun->parenSP = -1;
+ scriptRun->pushCount = 0;
+ scriptRun->fixupCount = 0;
+ }
+}
+
+U_CAPI void U_EXPORT2
+uscript_setRunText(UScriptRun *scriptRun, const UChar *src, int32_t length, UErrorCode *pErrorCode)
+{
+ if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) {
+ return;
+ }
+
+ if (scriptRun == NULL || length < 0 || ((src == NULL) != (length == 0))) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ scriptRun->textArray = src;
+ scriptRun->textLength = length;
+
+ uscript_resetRun(scriptRun);
+}
+
+U_CAPI UBool U_EXPORT2
+uscript_nextRun(UScriptRun *scriptRun, int32_t *pRunStart, int32_t *pRunLimit, UScriptCode *pRunScript)
+{
+ UErrorCode error = U_ZERO_ERROR;
+
+ /* if we've fallen off the end of the text, we're done */
+ if (scriptRun == NULL || scriptRun->scriptLimit >= scriptRun->textLength) {
+ return FALSE;
+ }
+
+ SYNC_FIXUP(scriptRun);
+ scriptRun->scriptCode = USCRIPT_COMMON;
+
+ for (scriptRun->scriptStart = scriptRun->scriptLimit; scriptRun->scriptLimit < scriptRun->textLength; scriptRun->scriptLimit += 1) {
+ UChar high = scriptRun->textArray[scriptRun->scriptLimit];
+ UChar32 ch = high;
+ UScriptCode sc;
+ int32_t pairIndex;
+
+ /*
+ * if the character is a high surrogate and it's not the last one
+ * in the text, see if it's followed by a low surrogate
+ */
+ if (high >= 0xD800 && high <= 0xDBFF && scriptRun->scriptLimit < scriptRun->textLength - 1) {
+ UChar low = scriptRun->textArray[scriptRun->scriptLimit + 1];
+
+ /*
+ * if it is followed by a low surrogate,
+ * consume it and form the full character
+ */
+ if (low >= 0xDC00 && low <= 0xDFFF) {
+ ch = (high - 0xD800) * 0x0400 + low - 0xDC00 + 0x10000;
+ scriptRun->scriptLimit += 1;
+ }
+ }
+
+ sc = uscript_getScript(ch, &error);
+ pairIndex = getPairIndex(ch);
+
+ /*
+ * Paired character handling:
+ *
+ * if it's an open character, push it onto the stack.
+ * if it's a close character, find the matching open on the
+ * stack, and use that script code. Any non-matching open
+ * characters above it on the stack will be poped.
+ */
+ if (pairIndex >= 0) {
+ if ((pairIndex & 1) == 0) {
+ push(scriptRun, pairIndex, scriptRun->scriptCode);
+ } else {
+ int32_t pi = pairIndex & ~1;
+
+ while (STACK_IS_NOT_EMPTY(scriptRun) && TOP(scriptRun).pairIndex != pi) {
+ pop(scriptRun);
+ }
+
+ if (STACK_IS_NOT_EMPTY(scriptRun)) {
+ sc = TOP(scriptRun).scriptCode;
+ }
+ }
+ }
+
+ if (sameScript(scriptRun->scriptCode, sc)) {
+ if (scriptRun->scriptCode <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) {
+ scriptRun->scriptCode = sc;
+
+ fixup(scriptRun, scriptRun->scriptCode);
+ }
+
+ /*
+ * if this character is a close paired character,
+ * pop the matching open character from the stack
+ */
+ if (pairIndex >= 0 && (pairIndex & 1) != 0) {
+ pop(scriptRun);
+ }
+ } else {
+ /*
+ * if the run broke on a surrogate pair,
+ * end it before the high surrogate
+ */
+ if (ch >= 0x10000) {
+ scriptRun->scriptLimit -= 1;
+ }
+
+ break;
+ }
+ }
+
+
+ if (pRunStart != NULL) {
+ *pRunStart = scriptRun->scriptStart;
+ }
+
+ if (pRunLimit != NULL) {
+ *pRunLimit = scriptRun->scriptLimit;
+ }
+
+ if (pRunScript != NULL) {
+ *pRunScript = scriptRun->scriptCode;
+ }
+
+ return TRUE;
+}
diff --git a/thirdparty/icu4c/common/usc_impl.h b/thirdparty/icu4c/common/usc_impl.h
new file mode 100644
index 0000000000..44899649d4
--- /dev/null
+++ b/thirdparty/icu4c/common/usc_impl.h
@@ -0,0 +1,139 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1999-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File USC_IMPL.H
+*
+* Modification History:
+*
+* Date Name Description
+* 07/08/2002 Eric Mader Creation.
+******************************************************************************
+*/
+
+#ifndef USC_IMPL_H
+#define USC_IMPL_H
+#include "unicode/utypes.h"
+#include "unicode/uscript.h"
+
+/**
+ * <code>UScriptRun</code> is used to find runs of characters in
+ * the same script. It implements a simple iterator over an array
+ * of characters. The iterator will resolve script-neutral characters
+ * like punctuation into the script of the surrounding characters.
+ *
+ * The iterator will try to match paired punctuation. If it sees an
+ * opening punctuation character, it will remember the script that
+ * was assigned to that character, and assign the same script to the
+ * matching closing punctuation.
+ *
+ * Scripts are chosen based on the <code>UScriptCode</code> enumeration.
+ * No attempt is made to combine related scripts into a single run. In
+ * particular, Hiragana, Katakana, and Han characters will appear in seperate
+ * runs.
+
+ * Here is an example of how to iterate over script runs:
+ * <pre>
+ * \code
+ * void printScriptRuns(const UChar *text, int32_t length)
+ * {
+ * UErrorCode error = U_ZERO_ERROR;
+ * UScriptRun *scriptRun = uscript_openRun(text, testLength, &error);
+ * int32_t start = 0, limit = 0;
+ * UScriptCode code = USCRIPT_INVALID_CODE;
+ *
+ * while (uscript_nextRun(&start, &limit, &code)) {
+ * printf("Script '%s' from %d to %d.\n", uscript_getName(code), start, limit);
+ * }
+ *
+ * uscript_closeRun(scriptRun);
+ * }
+ * </pre>
+ */
+struct UScriptRun;
+
+typedef struct UScriptRun UScriptRun;
+
+/**
+ * Create a <code>UScriptRun</code> object for iterating over the given text. This object must
+ * be freed using <code>uscript_closeRun()</code>. Note that this object does not copy the source text,
+ * only the pointer to it. You must make sure that the pointer remains valid until you call
+ * <code>uscript_closeRun()</code> or <code>uscript_setRunText()</code>.
+ *
+ * @param src is the address of the array of characters over which to iterate.
+ * if <code>src == NULL</code> and <code>length == 0</code>,
+ * an empty <code>UScriptRun</code> object will be returned.
+ *
+ * @param length is the number of characters over which to iterate.
+ *
+ * @param pErrorCode is a pointer to a valid <code>UErrorCode</code> value. If this value
+ * indicates a failure on entry, the function will immediately return.
+ * On exit the value will indicate the success of the operation.
+ *
+ * @return the address of <code>UScriptRun</code> object which will iterate over the text,
+ * or <code>NULL</code> if the operation failed.
+ */
+U_CAPI UScriptRun * U_EXPORT2
+uscript_openRun(const UChar *src, int32_t length, UErrorCode *pErrorCode);
+
+/**
+ * Frees the given <code>UScriptRun</code> object and any storage associated with it.
+ * On return, scriptRun no longer points to a valid <code>UScriptRun</code> object.
+ *
+ * @param scriptRun is the <code>UScriptRun</code> object which will be freed.
+ */
+U_CAPI void U_EXPORT2
+uscript_closeRun(UScriptRun *scriptRun);
+
+/**
+ * Reset the <code>UScriptRun</code> object so that it will start iterating from
+ * the beginning.
+ *
+ * @param scriptRun is the address of the <code>UScriptRun</code> object to be reset.
+ */
+U_CAPI void U_EXPORT2
+uscript_resetRun(UScriptRun *scriptRun);
+
+/**
+ * Change the text over which the given <code>UScriptRun</code> object iterates.
+ *
+ * @param scriptRun is the <code>UScriptRun</code> object which will be changed.
+ *
+ * @param src is the address of the new array of characters over which to iterate.
+ * If <code>src == NULL</code> and <code>length == 0</code>,
+ * the <code>UScriptRun</code> object will become empty.
+ *
+ * @param length is the new number of characters over which to iterate
+ *
+ * @param pErrorCode is a pointer to a valid <code>UErrorCode</code> value. If this value
+ * indicates a failure on entry, the function will immediately return.
+ * On exit the value will indicate the success of the operation.
+ */
+U_CAPI void U_EXPORT2
+uscript_setRunText(UScriptRun *scriptRun, const UChar *src, int32_t length, UErrorCode *pErrorCode);
+
+/**
+ * Advance the <code>UScriptRun</code> object to the next script run, return the start and limit
+ * offsets, and the script of the run.
+ *
+ * @param scriptRun is the address of the <code>UScriptRun</code> object.
+ *
+ * @param pRunStart is a pointer to the variable to receive the starting offset of the next run.
+ * This pointer can be <code>NULL</code> if the value is not needed.
+ *
+ * @param pRunLimit is a pointer to the variable to receive the limit offset of the next run.
+ * This pointer can be <code>NULL</code> if the value is not needed.
+ *
+ * @param pRunScript is a pointer to the variable to receive the UScriptCode for the
+ * script of the current run. This pointer can be <code>NULL</code> if the value is not needed.
+ *
+ * @return true if there was another script run.
+ */
+U_CAPI UBool U_EXPORT2
+uscript_nextRun(UScriptRun *scriptRun, int32_t *pRunStart, int32_t *pRunLimit, UScriptCode *pRunScript);
+
+#endif
diff --git a/thirdparty/icu4c/common/uscript.cpp b/thirdparty/icu4c/common/uscript.cpp
new file mode 100644
index 0000000000..f8bd7e7fdd
--- /dev/null
+++ b/thirdparty/icu4c/common/uscript.cpp
@@ -0,0 +1,149 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1997-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File USCRIPT.C
+*
+* Modification History:
+*
+* Date Name Description
+* 07/06/2001 Ram Creation.
+******************************************************************************
+*/
+
+#include "unicode/uchar.h"
+#include "unicode/uscript.h"
+#include "unicode/uloc.h"
+#include "bytesinkutil.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "ulocimp.h"
+
+static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
+static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN };
+static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO };
+
+static int32_t
+setCodes(const UScriptCode *src, int32_t length,
+ UScriptCode *dest, int32_t capacity, UErrorCode *err) {
+ int32_t i;
+ if(U_FAILURE(*err)) { return 0; }
+ if(length > capacity) {
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ return length;
+ }
+ for(i = 0; i < length; ++i) {
+ dest[i] = src[i];
+ }
+ return length;
+}
+
+static int32_t
+setOneCode(UScriptCode script, UScriptCode *scripts, int32_t capacity, UErrorCode *err) {
+ if(U_FAILURE(*err)) { return 0; }
+ if(1 > capacity) {
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ return 1;
+ }
+ scripts[0] = script;
+ return 1;
+}
+
+static int32_t
+getCodesFromLocale(const char *locale,
+ UScriptCode *scripts, int32_t capacity, UErrorCode *err) {
+ UErrorCode internalErrorCode = U_ZERO_ERROR;
+ char lang[8] = {0};
+ char script[8] = {0};
+ int32_t scriptLength;
+ if(U_FAILURE(*err)) { return 0; }
+ // Multi-script languages, equivalent to the LocaleScript data
+ // that we used to load from locale resource bundles.
+ /*length = */ uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &internalErrorCode);
+ if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) {
+ return 0;
+ }
+ if(0 == uprv_strcmp(lang, "ja")) {
+ return setCodes(JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, capacity, err);
+ }
+ if(0 == uprv_strcmp(lang, "ko")) {
+ return setCodes(KOREAN, UPRV_LENGTHOF(KOREAN), scripts, capacity, err);
+ }
+ scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &internalErrorCode);
+ if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) {
+ return 0;
+ }
+ if(0 == uprv_strcmp(lang, "zh") && 0 == uprv_strcmp(script, "Hant")) {
+ return setCodes(HAN_BOPO, UPRV_LENGTHOF(HAN_BOPO), scripts, capacity, err);
+ }
+ // Explicit script code.
+ if(scriptLength != 0) {
+ UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
+ if(scriptCode != USCRIPT_INVALID_CODE) {
+ if(scriptCode == USCRIPT_SIMPLIFIED_HAN || scriptCode == USCRIPT_TRADITIONAL_HAN) {
+ scriptCode = USCRIPT_HAN;
+ }
+ return setOneCode(scriptCode, scripts, capacity, err);
+ }
+ }
+ return 0;
+}
+
+/* TODO: this is a bad API and should be deprecated, ticket #11141 */
+U_CAPI int32_t U_EXPORT2
+uscript_getCode(const char* nameOrAbbrOrLocale,
+ UScriptCode* fillIn,
+ int32_t capacity,
+ UErrorCode* err){
+ UBool triedCode;
+ UErrorCode internalErrorCode;
+ int32_t length;
+
+ if(U_FAILURE(*err)) {
+ return 0;
+ }
+ if(nameOrAbbrOrLocale==NULL ||
+ (fillIn == NULL ? capacity != 0 : capacity < 0)) {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ triedCode = FALSE;
+ if(uprv_strchr(nameOrAbbrOrLocale, '-')==NULL && uprv_strchr(nameOrAbbrOrLocale, '_')==NULL ){
+ /* try long and abbreviated script names first */
+ UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale);
+ if(code!=USCRIPT_INVALID_CODE) {
+ return setOneCode(code, fillIn, capacity, err);
+ }
+ triedCode = TRUE;
+ }
+ internalErrorCode = U_ZERO_ERROR;
+ length = getCodesFromLocale(nameOrAbbrOrLocale, fillIn, capacity, err);
+ if(U_FAILURE(*err) || length != 0) {
+ return length;
+ }
+ icu::CharString likely;
+ {
+ icu::CharStringByteSink sink(&likely);
+ ulocimp_addLikelySubtags(nameOrAbbrOrLocale, sink, &internalErrorCode);
+ }
+ if(U_SUCCESS(internalErrorCode) && internalErrorCode != U_STRING_NOT_TERMINATED_WARNING) {
+ length = getCodesFromLocale(likely.data(), fillIn, capacity, err);
+ if(U_FAILURE(*err) || length != 0) {
+ return length;
+ }
+ }
+ if(!triedCode) {
+ /* still not found .. try long and abbreviated script names again */
+ UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale);
+ if(code!=USCRIPT_INVALID_CODE) {
+ return setOneCode(code, fillIn, capacity, err);
+ }
+ }
+ return 0;
+}
diff --git a/thirdparty/icu4c/common/uscript_props.cpp b/thirdparty/icu4c/common/uscript_props.cpp
new file mode 100644
index 0000000000..25d287b57a
--- /dev/null
+++ b/thirdparty/icu4c/common/uscript_props.cpp
@@ -0,0 +1,302 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2013-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: uscript_props.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2013feb16
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+#include "unicode/uscript.h"
+#include "unicode/utf16.h"
+#include "ustr_imp.h"
+#include "cmemory.h"
+
+namespace {
+
+// Script metadata (script properties).
+// See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt
+
+// 0 = NOT_ENCODED, no sample character, default false script properties.
+// Bits 20.. 0: sample character
+
+// Bits 23..21: usage
+const int32_t UNKNOWN = 1 << 21;
+const int32_t EXCLUSION = 2 << 21;
+const int32_t LIMITED_USE = 3 << 21;
+// st int32_t ASPIRATIONAL = 4 << 21; -- not used any more since Unicode 10
+const int32_t RECOMMENDED = 5 << 21;
+
+// Bits 31..24: Single-bit flags
+const int32_t RTL = 1 << 24;
+const int32_t LB_LETTERS = 1 << 25;
+const int32_t CASED = 1 << 26;
+
+const int32_t SCRIPT_PROPS[] = {
+ // Begin copy-paste output from
+ // tools/trunk/unicode/py/parsescriptmetadata.py
+ 0x0040 | RECOMMENDED, // Zyyy
+ 0x0308 | RECOMMENDED, // Zinh
+ 0x0628 | RECOMMENDED | RTL, // Arab
+ 0x0531 | RECOMMENDED | CASED, // Armn
+ 0x0995 | RECOMMENDED, // Beng
+ 0x3105 | RECOMMENDED | LB_LETTERS, // Bopo
+ 0x13C4 | LIMITED_USE | CASED, // Cher
+ 0x03E2 | EXCLUSION | CASED, // Copt
+ 0x042F | RECOMMENDED | CASED, // Cyrl
+ 0x10414 | EXCLUSION | CASED, // Dsrt
+ 0x0905 | RECOMMENDED, // Deva
+ 0x12A0 | RECOMMENDED, // Ethi
+ 0x10D3 | RECOMMENDED, // Geor
+ 0x10330 | EXCLUSION, // Goth
+ 0x03A9 | RECOMMENDED | CASED, // Grek
+ 0x0A95 | RECOMMENDED, // Gujr
+ 0x0A15 | RECOMMENDED, // Guru
+ 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani
+ 0xAC00 | RECOMMENDED, // Hang
+ 0x05D0 | RECOMMENDED | RTL, // Hebr
+ 0x304B | RECOMMENDED | LB_LETTERS, // Hira
+ 0x0C95 | RECOMMENDED, // Knda
+ 0x30AB | RECOMMENDED | LB_LETTERS, // Kana
+ 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr
+ 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo
+ 0x004C | RECOMMENDED | CASED, // Latn
+ 0x0D15 | RECOMMENDED, // Mlym
+ 0x1826 | EXCLUSION, // Mong
+ 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr
+ 0x168F | EXCLUSION, // Ogam
+ 0x10300 | EXCLUSION, // Ital
+ 0x0B15 | RECOMMENDED, // Orya
+ 0x16A0 | EXCLUSION, // Runr
+ 0x0D85 | RECOMMENDED, // Sinh
+ 0x0710 | LIMITED_USE | RTL, // Syrc
+ 0x0B95 | RECOMMENDED, // Taml
+ 0x0C15 | RECOMMENDED, // Telu
+ 0x078C | RECOMMENDED | RTL, // Thaa
+ 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai
+ 0x0F40 | RECOMMENDED, // Tibt
+ 0x14C0 | LIMITED_USE, // Cans
+ 0xA288 | LIMITED_USE | LB_LETTERS, // Yiii
+ 0x1703 | EXCLUSION, // Tglg
+ 0x1723 | EXCLUSION, // Hano
+ 0x1743 | EXCLUSION, // Buhd
+ 0x1763 | EXCLUSION, // Tagb
+ 0x280E | UNKNOWN, // Brai
+ 0x10800 | EXCLUSION | RTL, // Cprt
+ 0x1900 | LIMITED_USE, // Limb
+ 0x10000 | EXCLUSION, // Linb
+ 0x10480 | EXCLUSION, // Osma
+ 0x10450 | EXCLUSION, // Shaw
+ 0x1950 | LIMITED_USE | LB_LETTERS, // Tale
+ 0x10380 | EXCLUSION, // Ugar
+ 0,
+ 0x1A00 | EXCLUSION, // Bugi
+ 0x2C00 | EXCLUSION | CASED, // Glag
+ 0x10A00 | EXCLUSION | RTL, // Khar
+ 0xA800 | LIMITED_USE, // Sylo
+ 0x1980 | LIMITED_USE | LB_LETTERS, // Talu
+ 0x2D30 | LIMITED_USE, // Tfng
+ 0x103A0 | EXCLUSION, // Xpeo
+ 0x1B05 | LIMITED_USE, // Bali
+ 0x1BC0 | LIMITED_USE, // Batk
+ 0,
+ 0x11005 | EXCLUSION, // Brah
+ 0xAA00 | LIMITED_USE, // Cham
+ 0,
+ 0,
+ 0,
+ 0,
+ 0x13153 | EXCLUSION, // Egyp
+ 0,
+ 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans
+ 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant
+ 0x16B1C | EXCLUSION, // Hmng
+ 0x10CA1 | EXCLUSION | RTL | CASED, // Hung
+ 0,
+ 0xA984 | LIMITED_USE, // Java
+ 0xA90A | LIMITED_USE, // Kali
+ 0,
+ 0,
+ 0x1C00 | LIMITED_USE, // Lepc
+ 0x10647 | EXCLUSION, // Lina
+ 0x0840 | LIMITED_USE | RTL, // Mand
+ 0,
+ 0x10980 | EXCLUSION | RTL, // Mero
+ 0x07CA | LIMITED_USE | RTL, // Nkoo
+ 0x10C00 | EXCLUSION | RTL, // Orkh
+ 0x1036B | EXCLUSION, // Perm
+ 0xA840 | EXCLUSION, // Phag
+ 0x10900 | EXCLUSION | RTL, // Phnx
+ 0x16F00 | LIMITED_USE, // Plrd
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0xA549 | LIMITED_USE, // Vaii
+ 0,
+ 0x12000 | EXCLUSION, // Xsux
+ 0,
+ 0xFDD0 | UNKNOWN, // Zzzz
+ 0x102A0 | EXCLUSION, // Cari
+ 0x304B | RECOMMENDED | LB_LETTERS, // Jpan
+ 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana
+ 0x10280 | EXCLUSION, // Lyci
+ 0x10920 | EXCLUSION | RTL, // Lydi
+ 0x1C5A | LIMITED_USE, // Olck
+ 0xA930 | EXCLUSION, // Rjng
+ 0xA882 | LIMITED_USE, // Saur
+ 0x1D850 | EXCLUSION, // Sgnw
+ 0x1B83 | LIMITED_USE, // Sund
+ 0,
+ 0xABC0 | LIMITED_USE, // Mtei
+ 0x10840 | EXCLUSION | RTL, // Armi
+ 0x10B00 | EXCLUSION | RTL, // Avst
+ 0x11103 | LIMITED_USE, // Cakm
+ 0xAC00 | RECOMMENDED, // Kore
+ 0x11083 | EXCLUSION, // Kthi
+ 0x10AD8 | EXCLUSION | RTL, // Mani
+ 0x10B60 | EXCLUSION | RTL, // Phli
+ 0x10B8F | EXCLUSION | RTL, // Phlp
+ 0,
+ 0x10B40 | EXCLUSION | RTL, // Prti
+ 0x0800 | EXCLUSION | RTL, // Samr
+ 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt
+ 0,
+ 0,
+ 0xA6A0 | LIMITED_USE, // Bamu
+ 0xA4D0 | LIMITED_USE, // Lisu
+ 0,
+ 0x10A60 | EXCLUSION | RTL, // Sarb
+ 0x16AE6 | EXCLUSION, // Bass
+ 0x1BC20 | EXCLUSION, // Dupl
+ 0x10500 | EXCLUSION, // Elba
+ 0x11315 | EXCLUSION, // Gran
+ 0,
+ 0,
+ 0x1E802 | EXCLUSION | RTL, // Mend
+ 0x109A0 | EXCLUSION | RTL, // Merc
+ 0x10A95 | EXCLUSION | RTL, // Narb
+ 0x10896 | EXCLUSION | RTL, // Nbat
+ 0x10873 | EXCLUSION | RTL, // Palm
+ 0x112BE | EXCLUSION, // Sind
+ 0x118B4 | EXCLUSION | CASED, // Wara
+ 0,
+ 0,
+ 0x16A4F | EXCLUSION, // Mroo
+ 0x1B1C4 | EXCLUSION | LB_LETTERS, // Nshu
+ 0x11183 | EXCLUSION, // Shrd
+ 0x110D0 | EXCLUSION, // Sora
+ 0x11680 | EXCLUSION, // Takr
+ 0x18229 | EXCLUSION | LB_LETTERS, // Tang
+ 0,
+ 0x14400 | EXCLUSION, // Hluw
+ 0x11208 | EXCLUSION, // Khoj
+ 0x11484 | EXCLUSION, // Tirh
+ 0x10537 | EXCLUSION, // Aghb
+ 0x11152 | EXCLUSION, // Mahj
+ 0x11717 | EXCLUSION | LB_LETTERS, // Ahom
+ 0x108F4 | EXCLUSION | RTL, // Hatr
+ 0x1160E | EXCLUSION, // Modi
+ 0x1128F | EXCLUSION, // Mult
+ 0x11AC0 | EXCLUSION, // Pauc
+ 0x1158E | EXCLUSION, // Sidd
+ 0x1E909 | LIMITED_USE | RTL | CASED, // Adlm
+ 0x11C0E | EXCLUSION, // Bhks
+ 0x11C72 | EXCLUSION, // Marc
+ 0x11412 | LIMITED_USE, // Newa
+ 0x104B5 | LIMITED_USE | CASED, // Osge
+ 0x5B57 | RECOMMENDED | LB_LETTERS, // Hanb
+ 0x1112 | RECOMMENDED, // Jamo
+ 0,
+ 0x11D10 | EXCLUSION, // Gonm
+ 0x11A5C | EXCLUSION, // Soyo
+ 0x11A0B | EXCLUSION, // Zanb
+ 0x1180B | EXCLUSION, // Dogr
+ 0x11D71 | LIMITED_USE, // Gong
+ 0x11EE5 | EXCLUSION, // Maka
+ 0x16E40 | EXCLUSION | CASED, // Medf
+ 0x10D12 | LIMITED_USE | RTL, // Rohg
+ 0x10F42 | EXCLUSION | RTL, // Sogd
+ 0x10F19 | EXCLUSION | RTL, // Sogo
+ 0x10FF1 | EXCLUSION | RTL, // Elym
+ 0x1E108 | LIMITED_USE, // Hmnp
+ 0x119CE | EXCLUSION, // Nand
+ 0x1E2E1 | LIMITED_USE, // Wcho
+ 0x10FBF | EXCLUSION | RTL, // Chrs
+ 0x1190C | EXCLUSION, // Diak
+ 0x18C65 | EXCLUSION | LB_LETTERS, // Kits
+ 0x10E88 | EXCLUSION | RTL, // Yezi
+ // End copy-paste from parsescriptmetadata.py
+};
+
+int32_t getScriptProps(UScriptCode script) {
+ if (0 <= script && script < UPRV_LENGTHOF(SCRIPT_PROPS)) {
+ return SCRIPT_PROPS[script];
+ } else {
+ return 0;
+ }
+}
+
+} // namespace
+
+U_CAPI int32_t U_EXPORT2
+uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) { return 0; }
+ if(capacity < 0 || (capacity > 0 && dest == NULL)) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ int32_t sampleChar = getScriptProps(script) & 0x1fffff;
+ int32_t length;
+ if(sampleChar == 0) {
+ length = 0;
+ } else {
+ length = U16_LENGTH(sampleChar);
+ if(length <= capacity) {
+ int32_t i = 0;
+ U16_APPEND_UNSAFE(dest, i, sampleChar);
+ }
+ }
+ return u_terminateUChars(dest, capacity, length, pErrorCode);
+}
+
+U_COMMON_API icu::UnicodeString U_EXPORT2
+uscript_getSampleUnicodeString(UScriptCode script) {
+ icu::UnicodeString sample;
+ int32_t sampleChar = getScriptProps(script) & 0x1fffff;
+ if(sampleChar != 0) {
+ sample.append(sampleChar);
+ }
+ return sample;
+}
+
+U_CAPI UScriptUsage U_EXPORT2
+uscript_getUsage(UScriptCode script) {
+ return (UScriptUsage)((getScriptProps(script) >> 21) & 7);
+}
+
+U_CAPI UBool U_EXPORT2
+uscript_isRightToLeft(UScriptCode script) {
+ return (getScriptProps(script) & RTL) != 0;
+}
+
+U_CAPI UBool U_EXPORT2
+uscript_breaksBetweenLetters(UScriptCode script) {
+ return (getScriptProps(script) & LB_LETTERS) != 0;
+}
+
+U_CAPI UBool U_EXPORT2
+uscript_isCased(UScriptCode script) {
+ return (getScriptProps(script) & CASED) != 0;
+}
diff --git a/thirdparty/icu4c/common/uset.cpp b/thirdparty/icu4c/common/uset.cpp
new file mode 100644
index 0000000000..eae7981d52
--- /dev/null
+++ b/thirdparty/icu4c/common/uset.cpp
@@ -0,0 +1,641 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uset.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002mar07
+* created by: Markus W. Scherer
+*
+* There are functions to efficiently serialize a USet into an array of uint16_t
+* and functions to use such a serialized form efficiently without
+* instantiating a new USet.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "unicode/uset.h"
+#include "unicode/uniset.h"
+#include "cmemory.h"
+#include "unicode/ustring.h"
+#include "unicode/parsepos.h"
+
+U_NAMESPACE_USE
+
+U_CAPI USet* U_EXPORT2
+uset_openEmpty() {
+ return (USet*) new UnicodeSet();
+}
+
+U_CAPI USet* U_EXPORT2
+uset_open(UChar32 start, UChar32 end) {
+ return (USet*) new UnicodeSet(start, end);
+}
+
+U_CAPI void U_EXPORT2
+uset_close(USet* set) {
+ delete (UnicodeSet*) set;
+}
+
+U_CAPI USet * U_EXPORT2
+uset_clone(const USet *set) {
+ return (USet*) (((UnicodeSet*) set)->UnicodeSet::clone());
+}
+
+U_CAPI UBool U_EXPORT2
+uset_isFrozen(const USet *set) {
+ return ((UnicodeSet*) set)->UnicodeSet::isFrozen();
+}
+
+U_CAPI void U_EXPORT2
+uset_freeze(USet *set) {
+ ((UnicodeSet*) set)->UnicodeSet::freeze();
+}
+
+U_CAPI USet * U_EXPORT2
+uset_cloneAsThawed(const USet *set) {
+ return (USet*) (((UnicodeSet*) set)->UnicodeSet::cloneAsThawed());
+}
+
+U_CAPI void U_EXPORT2
+uset_set(USet* set,
+ UChar32 start, UChar32 end) {
+ ((UnicodeSet*) set)->UnicodeSet::set(start, end);
+}
+
+U_CAPI void U_EXPORT2
+uset_addAll(USet* set, const USet *additionalSet) {
+ ((UnicodeSet*) set)->UnicodeSet::addAll(*((const UnicodeSet*)additionalSet));
+}
+
+U_CAPI void U_EXPORT2
+uset_add(USet* set, UChar32 c) {
+ ((UnicodeSet*) set)->UnicodeSet::add(c);
+}
+
+U_CAPI void U_EXPORT2
+uset_addRange(USet* set, UChar32 start, UChar32 end) {
+ ((UnicodeSet*) set)->UnicodeSet::add(start, end);
+}
+
+U_CAPI void U_EXPORT2
+uset_addString(USet* set, const UChar* str, int32_t strLen) {
+ // UnicodeString handles -1 for strLen
+ UnicodeString s(strLen<0, str, strLen);
+ ((UnicodeSet*) set)->UnicodeSet::add(s);
+}
+
+U_CAPI void U_EXPORT2
+uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen) {
+ // UnicodeString handles -1 for strLen
+ UnicodeString s(str, strLen);
+ ((UnicodeSet*) set)->UnicodeSet::addAll(s);
+}
+
+U_CAPI void U_EXPORT2
+uset_remove(USet* set, UChar32 c) {
+ ((UnicodeSet*) set)->UnicodeSet::remove(c);
+}
+
+U_CAPI void U_EXPORT2
+uset_removeRange(USet* set, UChar32 start, UChar32 end) {
+ ((UnicodeSet*) set)->UnicodeSet::remove(start, end);
+}
+
+U_CAPI void U_EXPORT2
+uset_removeString(USet* set, const UChar* str, int32_t strLen) {
+ UnicodeString s(strLen==-1, str, strLen);
+ ((UnicodeSet*) set)->UnicodeSet::remove(s);
+}
+
+U_CAPI void U_EXPORT2
+uset_removeAll(USet* set, const USet* remove) {
+ ((UnicodeSet*) set)->UnicodeSet::removeAll(*(const UnicodeSet*)remove);
+}
+
+U_CAPI void U_EXPORT2
+uset_retain(USet* set, UChar32 start, UChar32 end) {
+ ((UnicodeSet*) set)->UnicodeSet::retain(start, end);
+}
+
+U_CAPI void U_EXPORT2
+uset_retainAll(USet* set, const USet* retain) {
+ ((UnicodeSet*) set)->UnicodeSet::retainAll(*(const UnicodeSet*)retain);
+}
+
+U_CAPI void U_EXPORT2
+uset_compact(USet* set) {
+ ((UnicodeSet*) set)->UnicodeSet::compact();
+}
+
+U_CAPI void U_EXPORT2
+uset_complement(USet* set) {
+ ((UnicodeSet*) set)->UnicodeSet::complement();
+}
+
+U_CAPI void U_EXPORT2
+uset_complementAll(USet* set, const USet* complement) {
+ ((UnicodeSet*) set)->UnicodeSet::complementAll(*(const UnicodeSet*)complement);
+}
+
+U_CAPI void U_EXPORT2
+uset_clear(USet* set) {
+ ((UnicodeSet*) set)->UnicodeSet::clear();
+}
+
+U_CAPI void U_EXPORT2
+uset_removeAllStrings(USet* set) {
+ ((UnicodeSet*) set)->UnicodeSet::removeAllStrings();
+}
+
+U_CAPI UBool U_EXPORT2
+uset_isEmpty(const USet* set) {
+ return ((const UnicodeSet*) set)->UnicodeSet::isEmpty();
+}
+
+U_CAPI UBool U_EXPORT2
+uset_contains(const USet* set, UChar32 c) {
+ return ((const UnicodeSet*) set)->UnicodeSet::contains(c);
+}
+
+U_CAPI UBool U_EXPORT2
+uset_containsRange(const USet* set, UChar32 start, UChar32 end) {
+ return ((const UnicodeSet*) set)->UnicodeSet::contains(start, end);
+}
+
+U_CAPI UBool U_EXPORT2
+uset_containsString(const USet* set, const UChar* str, int32_t strLen) {
+ UnicodeString s(strLen==-1, str, strLen);
+ return ((const UnicodeSet*) set)->UnicodeSet::contains(s);
+}
+
+U_CAPI UBool U_EXPORT2
+uset_containsAll(const USet* set1, const USet* set2) {
+ return ((const UnicodeSet*) set1)->UnicodeSet::containsAll(* (const UnicodeSet*) set2);
+}
+
+U_CAPI UBool U_EXPORT2
+uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen) {
+ // Create a string alias, since nothing is being added to the set.
+ UnicodeString s(strLen==-1, str, strLen);
+ return ((const UnicodeSet*) set)->UnicodeSet::containsAll(s);
+}
+
+U_CAPI UBool U_EXPORT2
+uset_containsNone(const USet* set1, const USet* set2) {
+ return ((const UnicodeSet*) set1)->UnicodeSet::containsNone(* (const UnicodeSet*) set2);
+}
+
+U_CAPI UBool U_EXPORT2
+uset_containsSome(const USet* set1, const USet* set2) {
+ return ((const UnicodeSet*) set1)->UnicodeSet::containsSome(* (const UnicodeSet*) set2);
+}
+
+U_CAPI int32_t U_EXPORT2
+uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition) {
+ return ((UnicodeSet*) set)->UnicodeSet::span(s, length, spanCondition);
+}
+
+U_CAPI int32_t U_EXPORT2
+uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition) {
+ return ((UnicodeSet*) set)->UnicodeSet::spanBack(s, length, spanCondition);
+}
+
+U_CAPI int32_t U_EXPORT2
+uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) {
+ return ((UnicodeSet*) set)->UnicodeSet::spanUTF8(s, length, spanCondition);
+}
+
+U_CAPI int32_t U_EXPORT2
+uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) {
+ return ((UnicodeSet*) set)->UnicodeSet::spanBackUTF8(s, length, spanCondition);
+}
+
+U_CAPI UBool U_EXPORT2
+uset_equals(const USet* set1, const USet* set2) {
+ return *(const UnicodeSet*)set1 == *(const UnicodeSet*)set2;
+}
+
+U_CAPI int32_t U_EXPORT2
+uset_indexOf(const USet* set, UChar32 c) {
+ return ((UnicodeSet*) set)->UnicodeSet::indexOf(c);
+}
+
+U_CAPI UChar32 U_EXPORT2
+uset_charAt(const USet* set, int32_t index) {
+ return ((UnicodeSet*) set)->UnicodeSet::charAt(index);
+}
+
+U_CAPI int32_t U_EXPORT2
+uset_size(const USet* set) {
+ return ((const UnicodeSet*) set)->UnicodeSet::size();
+}
+
+U_NAMESPACE_BEGIN
+/**
+ * This class only exists to provide access to the UnicodeSet private
+ * USet support API. Declaring a class a friend is more portable than
+ * trying to declare extern "C" functions as friends.
+ */
+class USetAccess /* not : public UObject because all methods are static */ {
+public:
+ /* Try to have the compiler inline these*/
+ inline static int32_t getStringCount(const UnicodeSet& set) {
+ return set.stringsSize();
+ }
+ inline static const UnicodeString* getString(const UnicodeSet& set,
+ int32_t i) {
+ return set.getString(i);
+ }
+private:
+ /* do not instantiate*/
+ USetAccess();
+};
+U_NAMESPACE_END
+
+U_CAPI int32_t U_EXPORT2
+uset_getItemCount(const USet* uset) {
+ const UnicodeSet& set = *(const UnicodeSet*)uset;
+ return set.getRangeCount() + USetAccess::getStringCount(set);
+}
+
+U_CAPI int32_t U_EXPORT2
+uset_getItem(const USet* uset, int32_t itemIndex,
+ UChar32* start, UChar32* end,
+ UChar* str, int32_t strCapacity,
+ UErrorCode* ec) {
+ if (U_FAILURE(*ec)) return 0;
+ const UnicodeSet& set = *(const UnicodeSet*)uset;
+ int32_t rangeCount;
+
+ if (itemIndex < 0) {
+ *ec = U_ILLEGAL_ARGUMENT_ERROR;
+ return -1;
+ } else if (itemIndex < (rangeCount = set.getRangeCount())) {
+ *start = set.getRangeStart(itemIndex);
+ *end = set.getRangeEnd(itemIndex);
+ return 0;
+ } else {
+ itemIndex -= rangeCount;
+ if (itemIndex < USetAccess::getStringCount(set)) {
+ const UnicodeString* s = USetAccess::getString(set, itemIndex);
+ return s->extract(str, strCapacity, *ec);
+ } else {
+ *ec = U_INDEX_OUTOFBOUNDS_ERROR;
+ return -1;
+ }
+ }
+}
+
+//U_CAPI int32_t U_EXPORT2
+//uset_getRangeCount(const USet* set) {
+// return ((const UnicodeSet*) set)->getRangeCount();
+//}
+//
+//U_CAPI UBool U_EXPORT2
+//uset_getRange(const USet* set, int32_t rangeIndex,
+// UChar32* pStart, UChar32* pEnd) {
+// if ((uint32_t) rangeIndex >= (uint32_t) uset_getRangeCount(set)) {
+// return FALSE;
+// }
+// const UnicodeSet* us = (const UnicodeSet*) set;
+// *pStart = us->getRangeStart(rangeIndex);
+// *pEnd = us->getRangeEnd(rangeIndex);
+// return TRUE;
+//}
+
+/*
+ * Serialize a USet into 16-bit units.
+ * Store BMP code points as themselves with one 16-bit unit each.
+ *
+ * Important: the code points in the array are in ascending order,
+ * therefore all BMP code points precede all supplementary code points.
+ *
+ * Store each supplementary code point in 2 16-bit units,
+ * simply with higher-then-lower 16-bit halfs.
+ *
+ * Precede the entire list with the length.
+ * If there are supplementary code points, then set bit 15 in the length
+ * and add the bmpLength between it and the array.
+ *
+ * In other words:
+ * - all BMP: (length=bmpLength) BMP, .., BMP
+ * - some supplementary: (length|0x8000) (bmpLength<length) BMP, .., BMP, supp-high, supp-low, ..
+ */
+U_CAPI int32_t U_EXPORT2
+uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* ec) {
+ if (ec==NULL || U_FAILURE(*ec)) {
+ return 0;
+ }
+
+ return ((const UnicodeSet*) set)->UnicodeSet::serialize(dest, destCapacity,* ec);
+}
+
+U_CAPI UBool U_EXPORT2
+uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength) {
+ int32_t length;
+
+ if(fillSet==NULL) {
+ return FALSE;
+ }
+ if(src==NULL || srcLength<=0) {
+ fillSet->length=fillSet->bmpLength=0;
+ return FALSE;
+ }
+
+ length=*src++;
+ if(length&0x8000) {
+ /* there are supplementary values */
+ length&=0x7fff;
+ if(srcLength<(2+length)) {
+ fillSet->length=fillSet->bmpLength=0;
+ return FALSE;
+ }
+ fillSet->bmpLength=*src++;
+ } else {
+ /* only BMP values */
+ if(srcLength<(1+length)) {
+ fillSet->length=fillSet->bmpLength=0;
+ return FALSE;
+ }
+ fillSet->bmpLength=length;
+ }
+ fillSet->array=src;
+ fillSet->length=length;
+ return TRUE;
+}
+
+U_CAPI void U_EXPORT2
+uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c) {
+ if(fillSet==NULL || (uint32_t)c>0x10ffff) {
+ return;
+ }
+
+ fillSet->array=fillSet->staticArray;
+ if(c<0xffff) {
+ fillSet->bmpLength=fillSet->length=2;
+ fillSet->staticArray[0]=(uint16_t)c;
+ fillSet->staticArray[1]=(uint16_t)c+1;
+ } else if(c==0xffff) {
+ fillSet->bmpLength=1;
+ fillSet->length=3;
+ fillSet->staticArray[0]=0xffff;
+ fillSet->staticArray[1]=1;
+ fillSet->staticArray[2]=0;
+ } else if(c<0x10ffff) {
+ fillSet->bmpLength=0;
+ fillSet->length=4;
+ fillSet->staticArray[0]=(uint16_t)(c>>16);
+ fillSet->staticArray[1]=(uint16_t)c;
+ ++c;
+ fillSet->staticArray[2]=(uint16_t)(c>>16);
+ fillSet->staticArray[3]=(uint16_t)c;
+ } else /* c==0x10ffff */ {
+ fillSet->bmpLength=0;
+ fillSet->length=2;
+ fillSet->staticArray[0]=0x10;
+ fillSet->staticArray[1]=0xffff;
+ }
+}
+
+U_CAPI UBool U_EXPORT2
+uset_serializedContains(const USerializedSet* set, UChar32 c) {
+ const uint16_t* array;
+
+ if(set==NULL || (uint32_t)c>0x10ffff) {
+ return FALSE;
+ }
+
+ array=set->array;
+ if(c<=0xffff) {
+ /* find c in the BMP part */
+ int32_t lo = 0;
+ int32_t hi = set->bmpLength-1;
+ if (c < array[0]) {
+ hi = 0;
+ } else if (c < array[hi]) {
+ for(;;) {
+ int32_t i = (lo + hi) >> 1;
+ if (i == lo) {
+ break; // Done!
+ } else if (c < array[i]) {
+ hi = i;
+ } else {
+ lo = i;
+ }
+ }
+ } else {
+ hi += 1;
+ }
+ return (UBool)(hi&1);
+ } else {
+ /* find c in the supplementary part */
+ uint16_t high=(uint16_t)(c>>16), low=(uint16_t)c;
+ int32_t base = set->bmpLength;
+ int32_t lo = 0;
+ int32_t hi = set->length - 2 - base;
+ if (high < array[base] || (high==array[base] && low<array[base+1])) {
+ hi = 0;
+ } else if (high < array[base+hi] || (high==array[base+hi] && low<array[base+hi+1])) {
+ for (;;) {
+ int32_t i = ((lo + hi) >> 1) & ~1; // Guarantee even result
+ int32_t iabs = i + base;
+ if (i == lo) {
+ break; // Done!
+ } else if (high < array[iabs] || (high==array[iabs] && low<array[iabs+1])) {
+ hi = i;
+ } else {
+ lo = i;
+ }
+ }
+ } else {
+ hi += 2;
+ }
+ /* count pairs of 16-bit units even per BMP and check if the number of pairs is odd */
+ return (UBool)(((hi+(base<<1))&2)!=0);
+ }
+}
+
+U_CAPI int32_t U_EXPORT2
+uset_getSerializedRangeCount(const USerializedSet* set) {
+ if(set==NULL) {
+ return 0;
+ }
+
+ return (set->bmpLength+(set->length-set->bmpLength)/2+1)/2;
+}
+
+U_CAPI UBool U_EXPORT2
+uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
+ UChar32* pStart, UChar32* pEnd) {
+ const uint16_t* array;
+ int32_t bmpLength, length;
+
+ if(set==NULL || rangeIndex<0 || pStart==NULL || pEnd==NULL) {
+ return FALSE;
+ }
+
+ array=set->array;
+ length=set->length;
+ bmpLength=set->bmpLength;
+
+ rangeIndex*=2; /* address start/limit pairs */
+ if(rangeIndex<bmpLength) {
+ *pStart=array[rangeIndex++];
+ if(rangeIndex<bmpLength) {
+ *pEnd=array[rangeIndex]-1;
+ } else if(rangeIndex<length) {
+ *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1;
+ } else {
+ *pEnd=0x10ffff;
+ }
+ return TRUE;
+ } else {
+ rangeIndex-=bmpLength;
+ rangeIndex*=2; /* address pairs of pairs of units */
+ length-=bmpLength;
+ if(rangeIndex<length) {
+ array+=bmpLength;
+ *pStart=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
+ rangeIndex+=2;
+ if(rangeIndex<length) {
+ *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1;
+ } else {
+ *pEnd=0x10ffff;
+ }
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+ }
+}
+
+// TODO The old, internal uset.c had an efficient uset_containsOne function.
+// Returned the one and only code point, or else -1 or something.
+// Consider adding such a function to both C and C++ UnicodeSet/uset.
+// See tools/gennorm/store.c for usage, now usetContainsOne there.
+
+// TODO Investigate incorporating this code into UnicodeSet to improve
+// efficiency.
+// ---
+// #define USET_GROW_DELTA 20
+//
+// static int32_t
+// findChar(const UChar32* array, int32_t length, UChar32 c) {
+// int32_t i;
+//
+// /* check the last range limit first for more efficient appending */
+// if(length>0) {
+// if(c>=array[length-1]) {
+// return length;
+// }
+//
+// /* do not check the last range limit again in the loop below */
+// --length;
+// }
+//
+// for(i=0; i<length && c>=array[i]; ++i) {}
+// return i;
+// }
+//
+// static UBool
+// addRemove(USet* set, UChar32 c, int32_t doRemove) {
+// int32_t i, length, more;
+//
+// if(set==NULL || (uint32_t)c>0x10ffff) {
+// return FALSE;
+// }
+//
+// length=set->length;
+// i=findChar(set->array, length, c);
+// if((i&1)^doRemove) {
+// /* c is already in the set */
+// return TRUE;
+// }
+//
+// /* how many more array items do we need? */
+// if(i<length && (c+1)==set->array[i]) {
+// /* c is just before the following range, extend that in-place by one */
+// set->array[i]=c;
+// if(i>0) {
+// --i;
+// if(c==set->array[i]) {
+// /* the previous range collapsed, remove it */
+// set->length=length-=2;
+// if(i<length) {
+// uprv_memmove(set->array+i, set->array+i+2, (length-i)*4);
+// }
+// }
+// }
+// return TRUE;
+// } else if(i>0 && c==set->array[i-1]) {
+// /* c is just after the previous range, extend that in-place by one */
+// if(++c<=0x10ffff) {
+// set->array[i-1]=c;
+// if(i<length && c==set->array[i]) {
+// /* the following range collapsed, remove it */
+// --i;
+// set->length=length-=2;
+// if(i<length) {
+// uprv_memmove(set->array+i, set->array+i+2, (length-i)*4);
+// }
+// }
+// } else {
+// /* extend the previous range (had limit 0x10ffff) to the end of Unicode */
+// set->length=i-1;
+// }
+// return TRUE;
+// } else if(i==length && c==0x10ffff) {
+// /* insert one range limit c */
+// more=1;
+// } else {
+// /* insert two range limits c, c+1 */
+// more=2;
+// }
+//
+// /* insert <more> range limits */
+// if(length+more>set->capacity) {
+// /* reallocate */
+// int32_t newCapacity=set->capacity+set->capacity/2+USET_GROW_DELTA;
+// UChar32* newArray=(UChar32* )uprv_malloc(newCapacity*4);
+// if(newArray==NULL) {
+// return FALSE;
+// }
+// set->capacity=newCapacity;
+// uprv_memcpy(newArray, set->array, length*4);
+//
+// if(set->array!=set->staticBuffer) {
+// uprv_free(set->array);
+// }
+// set->array=newArray;
+// }
+//
+// if(i<length) {
+// uprv_memmove(set->array+i+more, set->array+i, (length-i)*4);
+// }
+// set->array[i]=c;
+// if(more==2) {
+// set->array[i+1]=c+1;
+// }
+// set->length+=more;
+//
+// return TRUE;
+// }
+//
+// U_CAPI UBool U_EXPORT2
+// uset_add(USet* set, UChar32 c) {
+// return addRemove(set, c, 0);
+// }
+//
+// U_CAPI void U_EXPORT2
+// uset_remove(USet* set, UChar32 c) {
+// addRemove(set, c, 1);
+// }
diff --git a/thirdparty/icu4c/common/uset_imp.h b/thirdparty/icu4c/common/uset_imp.h
new file mode 100644
index 0000000000..7233b9303c
--- /dev/null
+++ b/thirdparty/icu4c/common/uset_imp.h
@@ -0,0 +1,62 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2004-2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uset_imp.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2004sep07
+* created by: Markus W. Scherer
+*
+* Internal USet definitions.
+*/
+
+#ifndef __USET_IMP_H__
+#define __USET_IMP_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uset.h"
+
+U_CDECL_BEGIN
+
+typedef void U_CALLCONV
+USetAdd(USet *set, UChar32 c);
+
+typedef void U_CALLCONV
+USetAddRange(USet *set, UChar32 start, UChar32 end);
+
+typedef void U_CALLCONV
+USetAddString(USet *set, const UChar *str, int32_t length);
+
+typedef void U_CALLCONV
+USetRemove(USet *set, UChar32 c);
+
+typedef void U_CALLCONV
+USetRemoveRange(USet *set, UChar32 start, UChar32 end);
+
+/**
+ * Interface for adding items to a USet, to keep low-level code from
+ * statically depending on the USet implementation.
+ * Calls will look like sa->add(sa->set, c);
+ */
+struct USetAdder {
+ USet *set;
+ USetAdd *add;
+ USetAddRange *addRange;
+ USetAddString *addString;
+ USetRemove *remove;
+ USetRemoveRange *removeRange;
+};
+typedef struct USetAdder USetAdder;
+
+U_CDECL_END
+
+#endif
+
diff --git a/thirdparty/icu4c/common/uset_props.cpp b/thirdparty/icu4c/common/uset_props.cpp
new file mode 100644
index 0000000000..f08e760b10
--- /dev/null
+++ b/thirdparty/icu4c/common/uset_props.cpp
@@ -0,0 +1,143 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uset_props.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2004aug30
+* created by: Markus W. Scherer
+*
+* C wrappers around UnicodeSet functions that are implemented in
+* uniset_props.cpp, split off for modularization.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "unicode/uset.h"
+#include "unicode/uniset.h"
+#include "cmemory.h"
+#include "unicode/ustring.h"
+#include "unicode/parsepos.h"
+
+U_NAMESPACE_USE
+
+U_CAPI USet* U_EXPORT2
+uset_openPattern(const UChar* pattern, int32_t patternLength,
+ UErrorCode* ec)
+{
+ UnicodeString pat(patternLength==-1, pattern, patternLength);
+ UnicodeSet* set = new UnicodeSet(pat, *ec);
+ /* test for NULL */
+ if(set == 0) {
+ *ec = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+
+ if (U_FAILURE(*ec)) {
+ delete set;
+ set = NULL;
+ }
+ return (USet*) set;
+}
+
+U_CAPI USet* U_EXPORT2
+uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
+ uint32_t options,
+ UErrorCode* ec)
+{
+ UnicodeString pat(patternLength==-1, pattern, patternLength);
+ UnicodeSet* set = new UnicodeSet(pat, options, NULL, *ec);
+ /* test for NULL */
+ if(set == 0) {
+ *ec = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+
+ if (U_FAILURE(*ec)) {
+ delete set;
+ set = NULL;
+ }
+ return (USet*) set;
+}
+
+
+U_CAPI int32_t U_EXPORT2
+uset_applyPattern(USet *set,
+ const UChar *pattern, int32_t patternLength,
+ uint32_t options,
+ UErrorCode *status){
+
+ // status code needs to be checked since we
+ // dereference it
+ if(status == NULL || U_FAILURE(*status)){
+ return 0;
+ }
+
+ // check only the set paramenter
+ // if pattern is NULL or null terminate
+ // UnicodeString constructor takes care of it
+ if(set == NULL){
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ UnicodeString pat(pattern, patternLength);
+
+ ParsePosition pos;
+
+ ((UnicodeSet*) set)->applyPattern(pat, pos, options, NULL, *status);
+
+ return pos.getIndex();
+}
+
+U_CAPI void U_EXPORT2
+uset_applyIntPropertyValue(USet* set,
+ UProperty prop, int32_t value, UErrorCode* ec) {
+ ((UnicodeSet*) set)->applyIntPropertyValue(prop, value, *ec);
+}
+
+U_CAPI void U_EXPORT2
+uset_applyPropertyAlias(USet* set,
+ const UChar *prop, int32_t propLength,
+ const UChar *value, int32_t valueLength,
+ UErrorCode* ec) {
+
+ UnicodeString p(prop, propLength);
+ UnicodeString v(value, valueLength);
+
+ ((UnicodeSet*) set)->applyPropertyAlias(p, v, *ec);
+}
+
+U_CAPI UBool U_EXPORT2
+uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
+ int32_t pos) {
+
+ UnicodeString pat(pattern, patternLength);
+
+ return ((pos+1) < pat.length() &&
+ pat.charAt(pos) == (UChar)91/*[*/) ||
+ UnicodeSet::resemblesPattern(pat, pos);
+}
+
+U_CAPI int32_t U_EXPORT2
+uset_toPattern(const USet* set,
+ UChar* result, int32_t resultCapacity,
+ UBool escapeUnprintable,
+ UErrorCode* ec) {
+ UnicodeString pat;
+ ((const UnicodeSet*) set)->toPattern(pat, escapeUnprintable);
+ return pat.extract(result, resultCapacity, *ec);
+}
+
+U_CAPI void U_EXPORT2
+uset_closeOver(USet* set, int32_t attributes) {
+ ((UnicodeSet*) set)->UnicodeSet::closeOver(attributes);
+}
diff --git a/thirdparty/icu4c/common/usetiter.cpp b/thirdparty/icu4c/common/usetiter.cpp
new file mode 100644
index 0000000000..7915169049
--- /dev/null
+++ b/thirdparty/icu4c/common/usetiter.cpp
@@ -0,0 +1,152 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2002-2006, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+#include "unicode/usetiter.h"
+#include "unicode/uniset.h"
+#include "unicode/unistr.h"
+#include "uvector.h"
+
+U_NAMESPACE_BEGIN
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeSetIterator)
+
+/**
+ * Create an iterator
+ * @param set set to iterate over
+ */
+UnicodeSetIterator::UnicodeSetIterator(const UnicodeSet& uSet) {
+ cpString = NULL;
+ reset(uSet);
+}
+
+/**
+ * Create an iterator. Convenience for when the contents are to be set later.
+ */
+UnicodeSetIterator::UnicodeSetIterator() {
+ this->set = NULL;
+ cpString = NULL;
+ reset();
+}
+
+UnicodeSetIterator::~UnicodeSetIterator() {
+ delete cpString;
+}
+
+/**
+ * Returns the next element in the set.
+ * @return true if there was another element in the set.
+ * if so, if codepoint == IS_STRING, the value is a string in the string field
+ * else the value is a single code point in the codepoint field.
+ * <br>You are guaranteed that the codepoints are in sorted order, and the strings are in sorted order,
+ * and that all code points are returned before any strings are returned.
+ * <br>Note also that the codepointEnd is undefined after calling this method.
+ */
+UBool UnicodeSetIterator::next() {
+ if (nextElement <= endElement) {
+ codepoint = codepointEnd = nextElement++;
+ string = NULL;
+ return TRUE;
+ }
+ if (range < endRange) {
+ loadRange(++range);
+ codepoint = codepointEnd = nextElement++;
+ string = NULL;
+ return TRUE;
+ }
+
+ if (nextString >= stringCount) return FALSE;
+ codepoint = (UChar32)IS_STRING; // signal that value is actually a string
+ string = (const UnicodeString*) set->strings->elementAt(nextString++);
+ return TRUE;
+}
+
+/**
+ * @return true if there was another element in the set.
+ * if so, if codepoint == IS_STRING, the value is a string in the string field
+ * else the value is a range of codepoints in the <codepoint, codepointEnd> fields.
+ * <br>Note that the codepoints are in sorted order, and the strings are in sorted order,
+ * and that all code points are returned before any strings are returned.
+ * <br>You are guaranteed that the ranges are in sorted order, and the strings are in sorted order,
+ * and that all ranges are returned before any strings are returned.
+ * <br>You are also guaranteed that ranges are disjoint and non-contiguous.
+ * <br>Note also that the codepointEnd is undefined after calling this method.
+ */
+UBool UnicodeSetIterator::nextRange() {
+ string = NULL;
+ if (nextElement <= endElement) {
+ codepointEnd = endElement;
+ codepoint = nextElement;
+ nextElement = endElement+1;
+ return TRUE;
+ }
+ if (range < endRange) {
+ loadRange(++range);
+ codepointEnd = endElement;
+ codepoint = nextElement;
+ nextElement = endElement+1;
+ return TRUE;
+ }
+
+ if (nextString >= stringCount) return FALSE;
+ codepoint = (UChar32)IS_STRING; // signal that value is actually a string
+ string = (const UnicodeString*) set->strings->elementAt(nextString++);
+ return TRUE;
+}
+
+/**
+ *@param set the set to iterate over. This allows reuse of the iterator.
+ */
+void UnicodeSetIterator::reset(const UnicodeSet& uSet) {
+ this->set = &uSet;
+ reset();
+}
+
+/**
+ * Resets to the start, to allow the iteration to start over again.
+ */
+void UnicodeSetIterator::reset() {
+ if (set == NULL) {
+ // Set up indices to empty iteration
+ endRange = -1;
+ stringCount = 0;
+ } else {
+ endRange = set->getRangeCount() - 1;
+ stringCount = set->stringsSize();
+ }
+ range = 0;
+ endElement = -1;
+ nextElement = 0;
+ if (endRange >= 0) {
+ loadRange(range);
+ }
+ nextString = 0;
+ string = NULL;
+}
+
+void UnicodeSetIterator::loadRange(int32_t iRange) {
+ nextElement = set->getRangeStart(iRange);
+ endElement = set->getRangeEnd(iRange);
+}
+
+
+const UnicodeString& UnicodeSetIterator::getString() {
+ if (string==NULL && codepoint!=(UChar32)IS_STRING) {
+ if (cpString == NULL) {
+ cpString = new UnicodeString();
+ }
+ if (cpString != NULL) {
+ cpString->setTo((UChar32)codepoint);
+ }
+ string = cpString;
+ }
+ return *string;
+}
+
+U_NAMESPACE_END
+
+//eof
diff --git a/thirdparty/icu4c/common/ushape.cpp b/thirdparty/icu4c/common/ushape.cpp
new file mode 100644
index 0000000000..ae13b5c118
--- /dev/null
+++ b/thirdparty/icu4c/common/ushape.cpp
@@ -0,0 +1,1728 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ ******************************************************************************
+ *
+ * Copyright (C) 2000-2016, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ *
+ ******************************************************************************
+ * file name: ushape.cpp
+ * encoding: UTF-8
+ * tab size: 8 (not used)
+ * indentation:4
+ *
+ * created on: 2000jun29
+ * created by: Markus W. Scherer
+ *
+ * Arabic letter shaping implemented by Ayman Roshdy
+ */
+
+#include "unicode/utypes.h"
+#include "unicode/uchar.h"
+#include "unicode/ustring.h"
+#include "unicode/ushape.h"
+#include "cmemory.h"
+#include "putilimp.h"
+#include "ustr_imp.h"
+#include "ubidi_props.h"
+#include "uassert.h"
+
+/*
+ * This implementation is designed for 16-bit Unicode strings.
+ * The main assumption is that the Arabic characters and their
+ * presentation forms each fit into a single UChar.
+ * With UTF-8, they occupy 2 or 3 bytes, and more than the ASCII
+ * characters.
+ */
+
+/*
+ * ### TODO in general for letter shaping:
+ * - the letter shaping code is UTF-16-unaware; needs update
+ * + especially invertBuffer()?!
+ * - needs to handle the "Arabic Tail" that is used in some legacy codepages
+ * as a glyph fragment of wide-glyph letters
+ * + IBM Unicode conversion tables map it to U+200B (ZWSP)
+ * + IBM Egypt has proposed to encode the tail in Unicode among Arabic Presentation Forms
+ * + Unicode 3.2 added U+FE73 ARABIC TAIL FRAGMENT
+ */
+
+/* definitions for Arabic letter shaping ------------------------------------ */
+
+#define IRRELEVANT 4
+#define LAMTYPE 16
+#define ALEFTYPE 32
+#define LINKR 1
+#define LINKL 2
+#define APRESENT 8
+#define SHADDA 64
+#define CSHADDA 128
+#define COMBINE (SHADDA+CSHADDA)
+
+#define HAMZAFE_CHAR 0xfe80
+#define HAMZA06_CHAR 0x0621
+#define YEH_HAMZA_CHAR 0x0626
+#define YEH_HAMZAFE_CHAR 0xFE89
+#define LAMALEF_SPACE_SUB 0xFFFF
+#define TASHKEEL_SPACE_SUB 0xFFFE
+#define NEW_TAIL_CHAR 0xFE73
+#define OLD_TAIL_CHAR 0x200B
+#define LAM_CHAR 0x0644
+#define SPACE_CHAR 0x0020
+#define SHADDA_CHAR 0xFE7C
+#define TATWEEL_CHAR 0x0640
+#define SHADDA_TATWEEL_CHAR 0xFE7D
+#define SHADDA06_CHAR 0x0651
+
+#define SHAPE_MODE 0
+#define DESHAPE_MODE 1
+
+struct uShapeVariables {
+ UChar tailChar;
+ uint32_t uShapeLamalefBegin;
+ uint32_t uShapeLamalefEnd;
+ uint32_t uShapeTashkeelBegin;
+ uint32_t uShapeTashkeelEnd;
+ int spacesRelativeToTextBeginEnd;
+};
+
+static const uint8_t tailFamilyIsolatedFinal[] = {
+ /* FEB1 */ 1,
+ /* FEB2 */ 1,
+ /* FEB3 */ 0,
+ /* FEB4 */ 0,
+ /* FEB5 */ 1,
+ /* FEB6 */ 1,
+ /* FEB7 */ 0,
+ /* FEB8 */ 0,
+ /* FEB9 */ 1,
+ /* FEBA */ 1,
+ /* FEBB */ 0,
+ /* FEBC */ 0,
+ /* FEBD */ 1,
+ /* FEBE */ 1
+};
+
+static const uint8_t tashkeelMedial[] = {
+ /* FE70 */ 0,
+ /* FE71 */ 1,
+ /* FE72 */ 0,
+ /* FE73 */ 0,
+ /* FE74 */ 0,
+ /* FE75 */ 0,
+ /* FE76 */ 0,
+ /* FE77 */ 1,
+ /* FE78 */ 0,
+ /* FE79 */ 1,
+ /* FE7A */ 0,
+ /* FE7B */ 1,
+ /* FE7C */ 0,
+ /* FE7D */ 1,
+ /* FE7E */ 0,
+ /* FE7F */ 1
+};
+
+static const UChar yehHamzaToYeh[] =
+{
+/* isolated*/ 0xFEEF,
+/* final */ 0xFEF0
+};
+
+static const uint8_t IrrelevantPos[] = {
+ 0x0, 0x2, 0x4, 0x6,
+ 0x8, 0xA, 0xC, 0xE
+};
+
+
+static const UChar convertLamAlef[] =
+{
+/*FEF5*/ 0x0622,
+/*FEF6*/ 0x0622,
+/*FEF7*/ 0x0623,
+/*FEF8*/ 0x0623,
+/*FEF9*/ 0x0625,
+/*FEFA*/ 0x0625,
+/*FEFB*/ 0x0627,
+/*FEFC*/ 0x0627
+};
+
+static const UChar araLink[178]=
+{
+ 1 + 32 + 256 * 0x11,/*0x0622*/
+ 1 + 32 + 256 * 0x13,/*0x0623*/
+ 1 + 256 * 0x15,/*0x0624*/
+ 1 + 32 + 256 * 0x17,/*0x0625*/
+ 1 + 2 + 256 * 0x19,/*0x0626*/
+ 1 + 32 + 256 * 0x1D,/*0x0627*/
+ 1 + 2 + 256 * 0x1F,/*0x0628*/
+ 1 + 256 * 0x23,/*0x0629*/
+ 1 + 2 + 256 * 0x25,/*0x062A*/
+ 1 + 2 + 256 * 0x29,/*0x062B*/
+ 1 + 2 + 256 * 0x2D,/*0x062C*/
+ 1 + 2 + 256 * 0x31,/*0x062D*/
+ 1 + 2 + 256 * 0x35,/*0x062E*/
+ 1 + 256 * 0x39,/*0x062F*/
+ 1 + 256 * 0x3B,/*0x0630*/
+ 1 + 256 * 0x3D,/*0x0631*/
+ 1 + 256 * 0x3F,/*0x0632*/
+ 1 + 2 + 256 * 0x41,/*0x0633*/
+ 1 + 2 + 256 * 0x45,/*0x0634*/
+ 1 + 2 + 256 * 0x49,/*0x0635*/
+ 1 + 2 + 256 * 0x4D,/*0x0636*/
+ 1 + 2 + 256 * 0x51,/*0x0637*/
+ 1 + 2 + 256 * 0x55,/*0x0638*/
+ 1 + 2 + 256 * 0x59,/*0x0639*/
+ 1 + 2 + 256 * 0x5D,/*0x063A*/
+ 0, 0, 0, 0, 0, /*0x063B-0x063F*/
+ 1 + 2, /*0x0640*/
+ 1 + 2 + 256 * 0x61,/*0x0641*/
+ 1 + 2 + 256 * 0x65,/*0x0642*/
+ 1 + 2 + 256 * 0x69,/*0x0643*/
+ 1 + 2 + 16 + 256 * 0x6D,/*0x0644*/
+ 1 + 2 + 256 * 0x71,/*0x0645*/
+ 1 + 2 + 256 * 0x75,/*0x0646*/
+ 1 + 2 + 256 * 0x79,/*0x0647*/
+ 1 + 256 * 0x7D,/*0x0648*/
+ 1 + 256 * 0x7F,/*0x0649*/
+ 1 + 2 + 256 * 0x81,/*0x064A*/
+ 4 + 256 * 1, /*0x064B*/
+ 4 + 128 + 256 * 1, /*0x064C*/
+ 4 + 128 + 256 * 1, /*0x064D*/
+ 4 + 128 + 256 * 1, /*0x064E*/
+ 4 + 128 + 256 * 1, /*0x064F*/
+ 4 + 128 + 256 * 1, /*0x0650*/
+ 4 + 64 + 256 * 3, /*0x0651*/
+ 4 + 256 * 1, /*0x0652*/
+ 4 + 256 * 7, /*0x0653*/
+ 4 + 256 * 8, /*0x0654*/
+ 4 + 256 * 8, /*0x0655*/
+ 4 + 256 * 1, /*0x0656*/
+ 0, 0, 0, 0, 0, /*0x0657-0x065B*/
+ 1 + 256 * 0x85,/*0x065C*/
+ 1 + 256 * 0x87,/*0x065D*/
+ 1 + 256 * 0x89,/*0x065E*/
+ 1 + 256 * 0x8B,/*0x065F*/
+ 0, 0, 0, 0, 0, /*0x0660-0x0664*/
+ 0, 0, 0, 0, 0, /*0x0665-0x0669*/
+ 0, 0, 0, 0, 0, 0, /*0x066A-0x066F*/
+ 4 + 256 * 6, /*0x0670*/
+ 1 + 8 + 256 * 0x00,/*0x0671*/
+ 1 + 32, /*0x0672*/
+ 1 + 32, /*0x0673*/
+ 0, /*0x0674*/
+ 1 + 32, /*0x0675*/
+ 1, 1, /*0x0676-0x0677*/
+ 1 + 2, /*0x0678*/
+ 1 + 2 + 8 + 256 * 0x16,/*0x0679*/
+ 1 + 2 + 8 + 256 * 0x0E,/*0x067A*/
+ 1 + 2 + 8 + 256 * 0x02,/*0x067B*/
+ 1+2, 1+2, /*0x67C-0x067D*/
+ 1+2+8+256 * 0x06, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x067E-0x0683*/
+ 1+2, 1+2, 1+2+8+256 * 0x2A, 1+2, /*0x0684-0x0687*/
+ 1 + 8 + 256 * 0x38,/*0x0688*/
+ 1, 1, 1, /*0x0689-0x068B*/
+ 1 + 8 + 256 * 0x34,/*0x068C*/
+ 1 + 8 + 256 * 0x32,/*0x068D*/
+ 1 + 8 + 256 * 0x36,/*0x068E*/
+ 1, 1, /*0x068F-0x0690*/
+ 1 + 8 + 256 * 0x3C,/*0x0691*/
+ 1, 1, 1, 1, 1, 1, 1+8+256 * 0x3A, 1, /*0x0692-0x0699*/
+ 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/
+ 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/
+ 1+2, 1+2, 1+2, 1+2, 1+2, 1+2+8+256 * 0x3E, /*0x06A4-0x06AD*/
+ 1+2, 1+2, 1+2, 1+2, /*0x06A4-0x06AD*/
+ 1+2, 1+2+8+256 * 0x42, 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/
+ 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/
+ 1+2, 1+2, /*0x06B8-0x06B9*/
+ 1 + 8 + 256 * 0x4E,/*0x06BA*/
+ 1 + 2 + 8 + 256 * 0x50,/*0x06BB*/
+ 1+2, 1+2, /*0x06BC-0x06BD*/
+ 1 + 2 + 8 + 256 * 0x5A,/*0x06BE*/
+ 1+2, /*0x06BF*/
+ 1 + 8 + 256 * 0x54,/*0x06C0*/
+ 1 + 2 + 8 + 256 * 0x56,/*0x06C1*/
+ 1, 1, 1, /*0x06C2-0x06C4*/
+ 1 + 8 + 256 * 0x90,/*0x06C5*/
+ 1 + 8 + 256 * 0x89,/*0x06C6*/
+ 1 + 8 + 256 * 0x87,/*0x06C7*/
+ 1 + 8 + 256 * 0x8B,/*0x06C8*/
+ 1 + 8 + 256 * 0x92,/*0x06C9*/
+ 1, /*0x06CA*/
+ 1 + 8 + 256 * 0x8E,/*0x06CB*/
+ 1 + 2 + 8 + 256 * 0xAC,/*0x06CC*/
+ 1, /*0x06CD*/
+ 1+2, 1+2, /*0x06CE-0x06CF*/
+ 1 + 2 + 8 + 256 * 0x94,/*0x06D0*/
+ 1+2, /*0x06D1*/
+ 1 + 8 + 256 * 0x5E,/*0x06D2*/
+ 1 + 8 + 256 * 0x60 /*0x06D3*/
+};
+
+static const uint8_t presALink[] = {
+/***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*****C*****D*****E*****F*/
+/*FB5*/ 0, 1, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0,
+/*FB6*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0,
+/*FB8*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
+/*FB9*/ 2,1 + 2, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*FBA*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*FBB*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*FBE*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,1 + 2,
+/*FC0*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*FC1*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*FC2*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*FC3*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*FC4*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*FC5*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4,
+/*FC6*/ 4, 4, 4
+};
+
+static const uint8_t presBLink[]=
+{
+/***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*****C*****D*****E*****F*/
+/*FE7*/1 + 2,1 + 2,1 + 2, 0,1 + 2, 0,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,
+/*FE8*/ 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2,1 + 2, 0, 1, 0,
+/*FE9*/ 1, 2,1 + 2, 0, 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,
+/*FEA*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0,
+/*FEB*/ 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,
+/*FEC*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,
+/*FED*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,
+/*FEE*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 0,
+/*FEF*/ 1, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0
+};
+
+static const UChar convertFBto06[] =
+{
+/***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/
+/*FB5*/ 0x671, 0x671, 0x67B, 0x67B, 0x67B, 0x67B, 0x67E, 0x67E, 0x67E, 0x67E, 0, 0, 0, 0, 0x67A, 0x67A,
+/*FB6*/ 0x67A, 0x67A, 0, 0, 0, 0, 0x679, 0x679, 0x679, 0x679, 0, 0, 0, 0, 0, 0,
+/*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x686, 0x686, 0x686, 0x686, 0, 0,
+/*FB8*/ 0, 0, 0x68D, 0x68D, 0x68C, 0x68C, 0x68E, 0x68E, 0x688, 0x688, 0x698, 0x698, 0x691, 0x691, 0x6A9, 0x6A9,
+/*FB9*/ 0x6A9, 0x6A9, 0x6AF, 0x6AF, 0x6AF, 0x6AF, 0, 0, 0, 0, 0, 0, 0, 0, 0x6BA, 0x6BA,
+/*FBA*/ 0x6BB, 0x6BB, 0x6BB, 0x6BB, 0x6C0, 0x6C0, 0x6C1, 0x6C1, 0x6C1, 0x6C1, 0x6BE, 0x6BE, 0x6BE, 0x6BE, 0x6d2, 0x6D2,
+/*FBB*/ 0x6D3, 0x6D3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0x6C7, 0x6C7, 0x6C6, 0x6C6, 0x6C8, 0x6C8, 0, 0x6CB, 0x6CB,
+/*FBE*/ 0x6C5, 0x6C5, 0x6C9, 0x6C9, 0x6D0, 0x6D0, 0x6D0, 0x6D0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x6CC, 0x6CC, 0x6CC, 0x6CC
+};
+
+static const UChar convertFEto06[] =
+{
+/***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/
+/*FE7*/ 0x64B, 0x64B, 0x64C, 0x64C, 0x64D, 0x64D, 0x64E, 0x64E, 0x64F, 0x64F, 0x650, 0x650, 0x651, 0x651, 0x652, 0x652,
+/*FE8*/ 0x621, 0x622, 0x622, 0x623, 0x623, 0x624, 0x624, 0x625, 0x625, 0x626, 0x626, 0x626, 0x626, 0x627, 0x627, 0x628,
+/*FE9*/ 0x628, 0x628, 0x628, 0x629, 0x629, 0x62A, 0x62A, 0x62A, 0x62A, 0x62B, 0x62B, 0x62B, 0x62B, 0x62C, 0x62C, 0x62C,
+/*FEA*/ 0x62C, 0x62D, 0x62D, 0x62D, 0x62D, 0x62E, 0x62E, 0x62E, 0x62E, 0x62F, 0x62F, 0x630, 0x630, 0x631, 0x631, 0x632,
+/*FEB*/ 0x632, 0x633, 0x633, 0x633, 0x633, 0x634, 0x634, 0x634, 0x634, 0x635, 0x635, 0x635, 0x635, 0x636, 0x636, 0x636,
+/*FEC*/ 0x636, 0x637, 0x637, 0x637, 0x637, 0x638, 0x638, 0x638, 0x638, 0x639, 0x639, 0x639, 0x639, 0x63A, 0x63A, 0x63A,
+/*FED*/ 0x63A, 0x641, 0x641, 0x641, 0x641, 0x642, 0x642, 0x642, 0x642, 0x643, 0x643, 0x643, 0x643, 0x644, 0x644, 0x644,
+/*FEE*/ 0x644, 0x645, 0x645, 0x645, 0x645, 0x646, 0x646, 0x646, 0x646, 0x647, 0x647, 0x647, 0x647, 0x648, 0x648, 0x649,
+/*FEF*/ 0x649, 0x64A, 0x64A, 0x64A, 0x64A, 0x65C, 0x65C, 0x65D, 0x65D, 0x65E, 0x65E, 0x65F, 0x65F
+};
+
+static const uint8_t shapeTable[4][4][4]=
+{
+ { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,1} },
+ { {0,0,2,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} },
+ { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,3} },
+ { {0,0,1,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} }
+};
+
+/*
+ * This function shapes European digits to Arabic-Indic digits
+ * in-place, writing over the input characters.
+ * Since we know that we are only looking for BMP code points,
+ * we can safely just work with code units (again, at least UTF-16).
+ */
+static void
+_shapeToArabicDigitsWithContext(UChar *s, int32_t length,
+ UChar digitBase,
+ UBool isLogical, UBool lastStrongWasAL) {
+ int32_t i;
+ UChar c;
+
+ digitBase-=0x30;
+
+ /* the iteration direction depends on the type of input */
+ if(isLogical) {
+ for(i=0; i<length; ++i) {
+ c=s[i];
+ switch(ubidi_getClass(c)) {
+ case U_LEFT_TO_RIGHT: /* L */
+ case U_RIGHT_TO_LEFT: /* R */
+ lastStrongWasAL=FALSE;
+ break;
+ case U_RIGHT_TO_LEFT_ARABIC: /* AL */
+ lastStrongWasAL=TRUE;
+ break;
+ case U_EUROPEAN_NUMBER: /* EN */
+ if(lastStrongWasAL && (uint32_t)(c-0x30)<10) {
+ s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */
+ }
+ break;
+ default :
+ break;
+ }
+ }
+ } else {
+ for(i=length; i>0; /* pre-decrement in the body */) {
+ c=s[--i];
+ switch(ubidi_getClass(c)) {
+ case U_LEFT_TO_RIGHT: /* L */
+ case U_RIGHT_TO_LEFT: /* R */
+ lastStrongWasAL=FALSE;
+ break;
+ case U_RIGHT_TO_LEFT_ARABIC: /* AL */
+ lastStrongWasAL=TRUE;
+ break;
+ case U_EUROPEAN_NUMBER: /* EN */
+ if(lastStrongWasAL && (uint32_t)(c-0x30)<10) {
+ s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */
+ }
+ break;
+ default :
+ break;
+ }
+ }
+ }
+}
+
+/*
+ *Name : invertBuffer
+ *Function : This function inverts the buffer, it's used
+ * in case the user specifies the buffer to be
+ * U_SHAPE_TEXT_DIRECTION_LOGICAL
+ */
+static void
+invertBuffer(UChar *buffer, int32_t size, uint32_t /*options*/, int32_t lowlimit, int32_t highlimit) {
+ UChar temp;
+ int32_t i=0,j=0;
+ for(i=lowlimit,j=size-highlimit-1;i<j;i++,j--) {
+ temp = buffer[i];
+ buffer[i] = buffer[j];
+ buffer[j] = temp;
+ }
+}
+
+/*
+ *Name : changeLamAlef
+ *Function : Converts the Alef characters into an equivalent
+ * LamAlef location in the 0x06xx Range, this is an
+ * intermediate stage in the operation of the program
+ * later it'll be converted into the 0xFExx LamAlefs
+ * in the shaping function.
+ */
+static inline UChar
+changeLamAlef(UChar ch) {
+ switch(ch) {
+ case 0x0622 :
+ return 0x065C;
+ case 0x0623 :
+ return 0x065D;
+ case 0x0625 :
+ return 0x065E;
+ case 0x0627 :
+ return 0x065F;
+ }
+ return 0;
+}
+
+/*
+ *Name : getLink
+ *Function : Resolves the link between the characters as
+ * Arabic characters have four forms :
+ * Isolated, Initial, Middle and Final Form
+ */
+static UChar
+getLink(UChar ch) {
+ if(ch >= 0x0622 && ch <= 0x06D3) {
+ return(araLink[ch-0x0622]);
+ } else if(ch == 0x200D) {
+ return(3);
+ } else if(ch >= 0x206D && ch <= 0x206F) {
+ return(4);
+ }else if(ch >= 0xFB50 && ch <= 0xFC62) {
+ return(presALink[ch-0xFB50]);
+ } else if(ch >= 0xFE70 && ch <= 0xFEFC) {
+ return(presBLink[ch-0xFE70]);
+ }else {
+ return(0);
+ }
+}
+
+/*
+ *Name : countSpaces
+ *Function : Counts the number of spaces
+ * at each end of the logical buffer
+ */
+static void
+countSpaces(UChar *dest, int32_t size, uint32_t /*options*/, int32_t *spacesCountl, int32_t *spacesCountr) {
+ int32_t i = 0;
+ int32_t countl = 0,countr = 0;
+ while((dest[i] == SPACE_CHAR) && (countl < size)) {
+ countl++;
+ i++;
+ }
+ if (countl < size) { /* the entire buffer is not all space */
+ while(dest[size-1] == SPACE_CHAR) {
+ countr++;
+ size--;
+ }
+ }
+ *spacesCountl = countl;
+ *spacesCountr = countr;
+}
+
+/*
+ *Name : isTashkeelChar
+ *Function : Returns 1 for Tashkeel characters in 06 range else return 0
+ */
+static inline int32_t
+isTashkeelChar(UChar ch) {
+ return (int32_t)( ch>=0x064B && ch<= 0x0652 );
+}
+
+/*
+ *Name : isTashkeelCharFE
+ *Function : Returns 1 for Tashkeel characters in FE range else return 0
+ */
+static inline int32_t
+isTashkeelCharFE(UChar ch) {
+ return (int32_t)( ch>=0xFE70 && ch<= 0xFE7F );
+}
+
+/*
+ *Name : isAlefChar
+ *Function : Returns 1 for Alef characters else return 0
+ */
+static inline int32_t
+isAlefChar(UChar ch) {
+ return (int32_t)( (ch==0x0622)||(ch==0x0623)||(ch==0x0625)||(ch==0x0627) );
+}
+
+/*
+ *Name : isLamAlefChar
+ *Function : Returns 1 for LamAlef characters else return 0
+ */
+static inline int32_t
+isLamAlefChar(UChar ch) {
+ return (int32_t)((ch>=0xFEF5)&&(ch<=0xFEFC) );
+}
+
+/*BIDI
+ *Name : isTailChar
+ *Function : returns 1 if the character matches one of the tail characters (0xfe73 or 0x200b) otherwise returns 0
+ */
+
+static inline int32_t
+isTailChar(UChar ch) {
+ if(ch == OLD_TAIL_CHAR || ch == NEW_TAIL_CHAR){
+ return 1;
+ }else{
+ return 0;
+ }
+}
+
+/*BIDI
+ *Name : isSeenTailFamilyChar
+ *Function : returns 1 if the character is a seen family isolated character
+ * in the FE range otherwise returns 0
+ */
+
+static inline int32_t
+isSeenTailFamilyChar(UChar ch) {
+ if(ch >= 0xfeb1 && ch < 0xfebf){
+ return tailFamilyIsolatedFinal [ch - 0xFEB1];
+ }else{
+ return 0;
+ }
+}
+
+ /* Name : isSeenFamilyChar
+ * Function : returns 1 if the character is a seen family character in the Unicode
+ * 06 range otherwise returns 0
+ */
+
+static inline int32_t
+isSeenFamilyChar(UChar ch){
+ if(ch >= 0x633 && ch <= 0x636){
+ return 1;
+ }else {
+ return 0;
+ }
+}
+
+/*Start of BIDI*/
+/*
+ *Name : isAlefMaksouraChar
+ *Function : returns 1 if the character is a Alef Maksoura Final or isolated
+ * otherwise returns 0
+ */
+static inline int32_t
+isAlefMaksouraChar(UChar ch) {
+ return (int32_t)( (ch == 0xFEEF) || ( ch == 0xFEF0) || (ch == 0x0649));
+}
+
+/*
+ * Name : isYehHamzaChar
+ * Function : returns 1 if the character is a yehHamza isolated or yehhamza
+ * final is found otherwise returns 0
+ */
+static inline int32_t
+isYehHamzaChar(UChar ch) {
+ if((ch==0xFE89)||(ch==0xFE8A)){
+ return 1;
+ }else{
+ return 0;
+ }
+}
+
+ /*
+ * Name: isTashkeelOnTatweelChar
+ * Function: Checks if the Tashkeel Character is on Tatweel or not,if the
+ * Tashkeel on tatweel (FE range), it returns 1 else if the
+ * Tashkeel with shadda on tatweel (FC range)return 2 otherwise
+ * returns 0
+ */
+static inline int32_t
+isTashkeelOnTatweelChar(UChar ch){
+ if(ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR && ch != 0xFE75 && ch != SHADDA_TATWEEL_CHAR)
+ {
+ return tashkeelMedial [ch - 0xFE70];
+ }else if( (ch >= 0xfcf2 && ch <= 0xfcf4) || (ch == SHADDA_TATWEEL_CHAR)) {
+ return 2;
+ }else{
+ return 0;
+ }
+}
+
+/*
+ * Name: isIsolatedTashkeelChar
+ * Function: Checks if the Tashkeel Character is in the isolated form
+ * (i.e. Unicode FE range) returns 1 else if the Tashkeel
+ * with shadda is in the isolated form (i.e. Unicode FC range)
+ * returns 2 otherwise returns 0
+ */
+static inline int32_t
+isIsolatedTashkeelChar(UChar ch){
+ if(ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR && ch != 0xFE75){
+ return (1 - tashkeelMedial [ch - 0xFE70]);
+ }else if(ch >= 0xfc5e && ch <= 0xfc63){
+ return 1;
+ }else{
+ return 0;
+ }
+}
+
+
+
+
+/*
+ *Name : calculateSize
+ *Function : This function calculates the destSize to be used in preflighting
+ * when the destSize is equal to 0
+ * It is used also to calculate the new destsize in case the
+ * destination buffer will be resized.
+ */
+
+static int32_t
+calculateSize(const UChar *source, int32_t sourceLength,
+int32_t destSize,uint32_t options) {
+ int32_t i = 0;
+
+ int lamAlefOption = 0;
+ int tashkeelOption = 0;
+
+ destSize = sourceLength;
+
+ if (((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE ||
+ ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED )) &&
+ ((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE )){
+ lamAlefOption = 1;
+ }
+ if((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE &&
+ ((options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_RESIZE ) ){
+ tashkeelOption = 1;
+ }
+
+ if(lamAlefOption || tashkeelOption){
+ if((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_VISUAL_LTR) {
+ for(i=0;i<sourceLength;i++) {
+ if( ((isAlefChar(source[i]))&& (i<(sourceLength-1)) &&(source[i+1] == LAM_CHAR)) || (isTashkeelCharFE(source[i])) ) {
+ destSize--;
+ }
+ }
+ }else if((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL) {
+ for(i=0;i<sourceLength;i++) {
+ if( ( (source[i] == LAM_CHAR) && (i<(sourceLength-1)) && (isAlefChar(source[i+1]))) || (isTashkeelCharFE(source[i])) ) {
+ destSize--;
+ }
+ }
+ }
+ }
+
+ if ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_UNSHAPE){
+ if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE){
+ for(i=0;i<sourceLength;i++) {
+ if(isLamAlefChar(source[i]))
+ destSize++;
+ }
+ }
+ }
+
+ return destSize;
+}
+
+/*
+ *Name : handleTashkeelWithTatweel
+ *Function : Replaces Tashkeel as following:
+ * Case 1 :if the Tashkeel on tatweel, replace it with Tatweel.
+ * Case 2 :if the Tashkeel aggregated with Shadda on Tatweel, replace
+ * it with Shadda on Tatweel.
+ * Case 3: if the Tashkeel is isolated replace it with Space.
+ *
+ */
+static int32_t
+handleTashkeelWithTatweel(UChar *dest, int32_t sourceLength,
+ int32_t /*destSize*/, uint32_t /*options*/,
+ UErrorCode * /*pErrorCode*/) {
+ int i;
+ for(i = 0; i < sourceLength; i++){
+ if((isTashkeelOnTatweelChar(dest[i]) == 1)){
+ dest[i] = TATWEEL_CHAR;
+ }else if((isTashkeelOnTatweelChar(dest[i]) == 2)){
+ dest[i] = SHADDA_TATWEEL_CHAR;
+ }else if(isIsolatedTashkeelChar(dest[i]) && dest[i] != SHADDA_CHAR){
+ dest[i] = SPACE_CHAR;
+ }
+ }
+ return sourceLength;
+}
+
+
+
+/*
+ *Name : handleGeneratedSpaces
+ *Function : The shapeUnicode function converts Lam + Alef into LamAlef + space,
+ * and Tashkeel to space.
+ * handleGeneratedSpaces function puts these generated spaces
+ * according to the options the user specifies. LamAlef and Tashkeel
+ * spaces can be replaced at begin, at end, at near or decrease the
+ * buffer size.
+ *
+ * There is also Auto option for LamAlef and tashkeel, which will put
+ * the spaces at end of the buffer (or end of text if the user used
+ * the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END).
+ *
+ * If the text type was visual_LTR and the option
+ * U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected the END
+ * option will place the space at the beginning of the buffer and
+ * BEGIN will place the space at the end of the buffer.
+ */
+
+static int32_t
+handleGeneratedSpaces(UChar *dest, int32_t sourceLength,
+ int32_t destSize,
+ uint32_t options,
+ UErrorCode *pErrorCode,struct uShapeVariables shapeVars ) {
+
+ int32_t i = 0, j = 0;
+ int32_t count = 0;
+ UChar *tempbuffer=NULL;
+
+ int lamAlefOption = 0;
+ int tashkeelOption = 0;
+ int shapingMode = SHAPE_MODE;
+
+ if (shapingMode == 0){
+ if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE ){
+ lamAlefOption = 1;
+ }
+ if ( (options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_RESIZE ){
+ tashkeelOption = 1;
+ }
+ }
+
+ tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR);
+ /* Test for NULL */
+ if(tempbuffer == NULL) {
+ *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+
+
+ if (lamAlefOption || tashkeelOption){
+ uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
+
+ i = j = 0; count = 0;
+ while(i < sourceLength) {
+ if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) ||
+ (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){
+ j--;
+ count++;
+ } else {
+ tempbuffer[j] = dest[i];
+ }
+ i++;
+ j++;
+ }
+
+ while(count >= 0) {
+ tempbuffer[i] = 0x0000;
+ i--;
+ count--;
+ }
+
+ u_memcpy(dest, tempbuffer, sourceLength);
+ destSize = u_strlen(dest);
+ }
+
+ lamAlefOption = 0;
+
+ if (shapingMode == 0){
+ if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_NEAR ){
+ lamAlefOption = 1;
+ }
+ }
+
+ if (lamAlefOption){
+ /* Lam+Alef is already shaped into LamAlef + FFFF */
+ i = 0;
+ while(i < sourceLength) {
+ if(lamAlefOption&&dest[i] == LAMALEF_SPACE_SUB){
+ dest[i] = SPACE_CHAR;
+ }
+ i++;
+ }
+ destSize = sourceLength;
+ }
+ lamAlefOption = 0;
+ tashkeelOption = 0;
+
+ if (shapingMode == 0) {
+ if ( ((options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefBegin) ||
+ (((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO )
+ && (shapeVars.spacesRelativeToTextBeginEnd==1)) ) {
+ lamAlefOption = 1;
+ }
+ if ( (options&U_SHAPE_TASHKEEL_MASK) == shapeVars.uShapeTashkeelBegin ) {
+ tashkeelOption = 1;
+ }
+ }
+
+ if(lamAlefOption || tashkeelOption){
+ uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
+
+ i = j = sourceLength; count = 0;
+
+ while(i >= 0) {
+ if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) ||
+ (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){
+ j++;
+ count++;
+ }else {
+ tempbuffer[j] = dest[i];
+ }
+ i--;
+ j--;
+ }
+
+ for(i=0 ;i < count; i++){
+ tempbuffer[i] = SPACE_CHAR;
+ }
+
+ u_memcpy(dest, tempbuffer, sourceLength);
+ destSize = sourceLength;
+ }
+
+
+
+ lamAlefOption = 0;
+ tashkeelOption = 0;
+
+ if (shapingMode == 0) {
+ if ( ((options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefEnd) ||
+ (((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO )
+ && (shapeVars.spacesRelativeToTextBeginEnd==0)) ) {
+ lamAlefOption = 1;
+ }
+ if ( (options&U_SHAPE_TASHKEEL_MASK) == shapeVars.uShapeTashkeelEnd ){
+ tashkeelOption = 1;
+ }
+ }
+
+ if(lamAlefOption || tashkeelOption){
+ uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
+
+ i = j = 0; count = 0;
+ while(i < sourceLength) {
+ if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) ||
+ (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){
+ j--;
+ count++;
+ }else {
+ tempbuffer[j] = dest[i];
+ }
+ i++;
+ j++;
+ }
+
+ while(count >= 0) {
+ tempbuffer[i] = SPACE_CHAR;
+ i--;
+ count--;
+ }
+
+ u_memcpy(dest, tempbuffer, sourceLength);
+ destSize = sourceLength;
+ }
+
+
+ if(tempbuffer){
+ uprv_free(tempbuffer);
+ }
+
+ return destSize;
+}
+
+/*
+ *Name :expandCompositCharAtBegin
+ *Function :Expands the LamAlef character to Lam and Alef consuming the required
+ * space from beginning of the buffer. If the text type was visual_LTR
+ * and the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected
+ * the spaces will be located at end of buffer.
+ * If there are no spaces to expand the LamAlef, an error
+ * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h
+ */
+
+static int32_t
+expandCompositCharAtBegin(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode) {
+ int32_t i = 0,j = 0;
+ int32_t countl = 0;
+ UChar *tempbuffer=NULL;
+
+ tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR);
+
+ /* Test for NULL */
+ if(tempbuffer == NULL) {
+ *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+
+ uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
+
+ i = 0;
+ while(dest[i] == SPACE_CHAR) {
+ countl++;
+ i++;
+ }
+
+ i = j = sourceLength-1;
+
+ while(i >= 0 && j >= 0) {
+ if( countl>0 && isLamAlefChar(dest[i])) {
+ tempbuffer[j] = LAM_CHAR;
+ /* to ensure the array index is within the range */
+ U_ASSERT(dest[i] >= 0xFEF5u
+ && dest[i]-0xFEF5u < UPRV_LENGTHOF(convertLamAlef));
+ tempbuffer[j-1] = convertLamAlef[ dest[i] - 0xFEF5 ];
+ j--;
+ countl--;
+ }else {
+ if( countl == 0 && isLamAlefChar(dest[i]) ) {
+ *pErrorCode=U_NO_SPACE_AVAILABLE;
+ }
+ tempbuffer[j] = dest[i];
+ }
+ i--;
+ j--;
+ }
+ u_memcpy(dest, tempbuffer, sourceLength);
+
+ uprv_free(tempbuffer);
+
+ destSize = sourceLength;
+ return destSize;
+}
+
+/*
+ *Name : expandCompositCharAtEnd
+ *Function : Expands the LamAlef character to Lam and Alef consuming the
+ * required space from end of the buffer. If the text type was
+ * Visual LTR and the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END
+ * was used, the spaces will be consumed from begin of buffer. If
+ * there are no spaces to expand the LamAlef, an error
+ * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h
+ */
+
+static int32_t
+expandCompositCharAtEnd(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode) {
+ int32_t i = 0,j = 0;
+
+ int32_t countr = 0;
+ int32_t inpsize = sourceLength;
+
+ UChar *tempbuffer=NULL;
+ tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR);
+
+ /* Test for NULL */
+ if(tempbuffer == NULL) {
+ *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+
+ uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
+
+ while(dest[inpsize-1] == SPACE_CHAR) {
+ countr++;
+ inpsize--;
+ }
+
+ i = sourceLength - countr - 1;
+ j = sourceLength - 1;
+
+ while(i >= 0 && j >= 0) {
+ if( countr>0 && isLamAlefChar(dest[i]) ) {
+ tempbuffer[j] = LAM_CHAR;
+ tempbuffer[j-1] = convertLamAlef[ dest[i] - 0xFEF5 ];
+ j--;
+ countr--;
+ }else {
+ if ((countr == 0) && isLamAlefChar(dest[i]) ) {
+ *pErrorCode=U_NO_SPACE_AVAILABLE;
+ }
+ tempbuffer[j] = dest[i];
+ }
+ i--;
+ j--;
+ }
+
+ if(countr > 0) {
+ u_memmove(tempbuffer, tempbuffer+countr, sourceLength);
+ if(u_strlen(tempbuffer) < sourceLength) {
+ for(i=sourceLength-1;i>=sourceLength-countr;i--) {
+ tempbuffer[i] = SPACE_CHAR;
+ }
+ }
+ }
+ u_memcpy(dest, tempbuffer, sourceLength);
+
+ uprv_free(tempbuffer);
+
+ destSize = sourceLength;
+ return destSize;
+}
+
+/*
+ *Name : expandCompositCharAtNear
+ *Function : Expands the LamAlef character into Lam + Alef, YehHamza character
+ * into Yeh + Hamza, SeenFamily character into SeenFamily character
+ * + Tail, while consuming the space next to the character.
+ * If there are no spaces next to the character, an error
+ * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h
+ */
+
+static int32_t
+expandCompositCharAtNear(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode,
+ int yehHamzaOption, int seenTailOption, int lamAlefOption, struct uShapeVariables shapeVars) {
+ int32_t i = 0;
+
+
+ UChar lamalefChar, yehhamzaChar;
+
+ for(i = 0 ;i<=sourceLength-1;i++) {
+ if (seenTailOption && isSeenTailFamilyChar(dest[i])) {
+ if ((i>0) && (dest[i-1] == SPACE_CHAR) ) {
+ dest[i-1] = shapeVars.tailChar;
+ }else {
+ *pErrorCode=U_NO_SPACE_AVAILABLE;
+ }
+ }else if(yehHamzaOption && (isYehHamzaChar(dest[i])) ) {
+ if ((i>0) && (dest[i-1] == SPACE_CHAR) ) {
+ yehhamzaChar = dest[i];
+ dest[i] = yehHamzaToYeh[yehhamzaChar - YEH_HAMZAFE_CHAR];
+ dest[i-1] = HAMZAFE_CHAR;
+ }else {
+
+ *pErrorCode=U_NO_SPACE_AVAILABLE;
+ }
+ }else if(lamAlefOption && isLamAlefChar(dest[i+1])) {
+ if(dest[i] == SPACE_CHAR){
+ lamalefChar = dest[i+1];
+ dest[i+1] = LAM_CHAR;
+ dest[i] = convertLamAlef[ lamalefChar - 0xFEF5 ];
+ }else {
+ *pErrorCode=U_NO_SPACE_AVAILABLE;
+ }
+ }
+ }
+ destSize = sourceLength;
+ return destSize;
+}
+ /*
+ * Name : expandCompositChar
+ * Function : LamAlef, need special handling, since it expands from one
+ * character into two characters while shaping or deshaping.
+ * In order to expand it, near or far spaces according to the
+ * options user specifies. Also buffer size can be increased.
+ *
+ * For SeenFamily characters and YehHamza only the near option is
+ * supported, while for LamAlef we can take spaces from begin, end,
+ * near or even increase the buffer size.
+ * There is also the Auto option for LamAlef only, which will first
+ * search for a space at end, begin then near, respectively.
+ * If there are no spaces to expand these characters, an error will be set to
+ * U_NO_SPACE_AVAILABLE as defined in utypes.h
+ */
+
+static int32_t
+expandCompositChar(UChar *dest, int32_t sourceLength,
+ int32_t destSize,uint32_t options,
+ UErrorCode *pErrorCode, int shapingMode,struct uShapeVariables shapeVars) {
+
+ int32_t i = 0,j = 0;
+
+ UChar *tempbuffer=NULL;
+ int yehHamzaOption = 0;
+ int seenTailOption = 0;
+ int lamAlefOption = 0;
+
+ if (shapingMode == 1){
+ if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO){
+
+ if(shapeVars.spacesRelativeToTextBeginEnd == 0) {
+ destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode);
+
+ if(*pErrorCode == U_NO_SPACE_AVAILABLE) {
+ *pErrorCode = U_ZERO_ERROR;
+ destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode);
+ }
+ }else {
+ destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode);
+
+ if(*pErrorCode == U_NO_SPACE_AVAILABLE) {
+ *pErrorCode = U_ZERO_ERROR;
+ destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode);
+ }
+ }
+
+ if(*pErrorCode == U_NO_SPACE_AVAILABLE) {
+ *pErrorCode = U_ZERO_ERROR;
+ destSize = expandCompositCharAtNear(dest, sourceLength, destSize, pErrorCode, yehHamzaOption,
+ seenTailOption, 1,shapeVars);
+ }
+ }
+ }
+
+ if (shapingMode == 1){
+ if ( (options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefEnd){
+ destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode);
+ }
+ }
+
+ if (shapingMode == 1){
+ if ( (options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefBegin){
+ destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode);
+ }
+ }
+
+ if (shapingMode == 0){
+ if ((options&U_SHAPE_YEHHAMZA_MASK) == U_SHAPE_YEHHAMZA_TWOCELL_NEAR){
+ yehHamzaOption = 1;
+ }
+ if ((options&U_SHAPE_SEEN_MASK) == U_SHAPE_SEEN_TWOCELL_NEAR){
+ seenTailOption = 1;
+ }
+ }
+ if (shapingMode == 1) {
+ if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_NEAR) {
+ lamAlefOption = 1;
+ }
+ }
+
+
+ if (yehHamzaOption || seenTailOption || lamAlefOption){
+ destSize = expandCompositCharAtNear(dest, sourceLength, destSize, pErrorCode, yehHamzaOption,
+ seenTailOption,lamAlefOption,shapeVars);
+ }
+
+
+ if (shapingMode == 1){
+ if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE){
+ destSize = calculateSize(dest,sourceLength,destSize,options);
+ tempbuffer = (UChar *)uprv_malloc((destSize+1)*U_SIZEOF_UCHAR);
+
+ /* Test for NULL */
+ if(tempbuffer == NULL) {
+ *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+
+ uprv_memset(tempbuffer, 0, (destSize+1)*U_SIZEOF_UCHAR);
+
+ i = j = 0;
+ while(i < destSize && j < destSize) {
+ if(isLamAlefChar(dest[i]) ) {
+ tempbuffer[j] = convertLamAlef[ dest[i] - 0xFEF5 ];
+ tempbuffer[j+1] = LAM_CHAR;
+ j++;
+ }else {
+ tempbuffer[j] = dest[i];
+ }
+ i++;
+ j++;
+ }
+
+ u_memcpy(dest, tempbuffer, destSize);
+ }
+ }
+
+ if(tempbuffer) {
+ uprv_free(tempbuffer);
+ }
+ return destSize;
+}
+
+/*
+ *Name : shapeUnicode
+ *Function : Converts an Arabic Unicode buffer in 06xx Range into a shaped
+ * arabic Unicode buffer in FExx Range
+ */
+static int32_t
+shapeUnicode(UChar *dest, int32_t sourceLength,
+ int32_t destSize,uint32_t options,
+ UErrorCode *pErrorCode,
+ int tashkeelFlag, struct uShapeVariables shapeVars) {
+
+ int32_t i, iend;
+ int32_t step;
+ int32_t lastPos,Nx, Nw;
+ unsigned int Shape;
+ int32_t lamalef_found = 0;
+ int32_t seenfamFound = 0, yehhamzaFound =0, tashkeelFound = 0;
+ UChar prevLink = 0, lastLink = 0, currLink, nextLink = 0;
+ UChar wLamalef;
+
+ /*
+ * Converts the input buffer from FExx Range into 06xx Range
+ * to make sure that all characters are in the 06xx range
+ * even the lamalef is converted to the special region in
+ * the 06xx range
+ */
+ if ((options & U_SHAPE_PRESERVE_PRESENTATION_MASK) == U_SHAPE_PRESERVE_PRESENTATION_NOOP) {
+ for (i = 0; i < sourceLength; i++) {
+ UChar inputChar = dest[i];
+ if ( (inputChar >= 0xFB50) && (inputChar <= 0xFBFF)) {
+ UChar c = convertFBto06 [ (inputChar - 0xFB50) ];
+ if (c != 0)
+ dest[i] = c;
+ } else if ( (inputChar >= 0xFE70) && (inputChar <= 0xFEFC)) {
+ dest[i] = convertFEto06 [ (inputChar - 0xFE70) ] ;
+ } else {
+ dest[i] = inputChar ;
+ }
+ }
+ }
+
+
+ /* sets the index to the end of the buffer, together with the step point to -1 */
+ i = sourceLength - 1;
+ iend = -1;
+ step = -1;
+
+ /*
+ * This function resolves the link between the characters .
+ * Arabic characters have four forms :
+ * Isolated Form, Initial Form, Middle Form and Final Form
+ */
+ currLink = getLink(dest[i]);
+
+ lastPos = i;
+ Nx = -2, Nw = 0;
+
+ while (i != iend) {
+ /* If high byte of currLink > 0 then more than one shape */
+ if ((currLink & 0xFF00) > 0 || (getLink(dest[i]) & IRRELEVANT) != 0) {
+ Nw = i + step;
+ while (Nx < 0) { /* we need to know about next char */
+ if(Nw == iend) {
+ nextLink = 0;
+ Nx = 3000;
+ } else {
+ nextLink = getLink(dest[Nw]);
+ if((nextLink & IRRELEVANT) == 0) {
+ Nx = Nw;
+ } else {
+ Nw = Nw + step;
+ }
+ }
+ }
+
+ if ( ((currLink & ALEFTYPE) > 0) && ((lastLink & LAMTYPE) > 0) ) {
+ lamalef_found = 1;
+ wLamalef = changeLamAlef(dest[i]); /*get from 0x065C-0x065f */
+ if ( wLamalef != 0) {
+ dest[i] = LAMALEF_SPACE_SUB; /* The default case is to drop the Alef and replace */
+ dest[lastPos] =wLamalef; /* it by LAMALEF_SPACE_SUB which is the last character in the */
+ i=lastPos; /* unicode private use area, this is done to make */
+ } /* sure that removeLamAlefSpaces() handles only the */
+ lastLink = prevLink; /* spaces generated during lamalef generation. */
+ currLink = getLink(wLamalef); /* LAMALEF_SPACE_SUB is added here and is replaced by spaces */
+ } /* in removeLamAlefSpaces() */
+
+ if ((i > 0) && (dest[i-1] == SPACE_CHAR)){
+ if ( isSeenFamilyChar(dest[i])) {
+ seenfamFound = 1;
+ } else if (dest[i] == YEH_HAMZA_CHAR) {
+ yehhamzaFound = 1;
+ }
+ }
+ else if(i==0){
+ if ( isSeenFamilyChar(dest[i])){
+ seenfamFound = 1;
+ } else if (dest[i] == YEH_HAMZA_CHAR) {
+ yehhamzaFound = 1;
+ }
+ }
+
+ /*
+ * get the proper shape according to link ability of neighbors
+ * and of character; depends on the order of the shapes
+ * (isolated, initial, middle, final) in the compatibility area
+ */
+ Shape = shapeTable[nextLink & (LINKR + LINKL)]
+ [lastLink & (LINKR + LINKL)]
+ [currLink & (LINKR + LINKL)];
+
+ if ((currLink & (LINKR+LINKL)) == 1) {
+ Shape &= 1;
+ } else if(isTashkeelChar(dest[i])) {
+ if( (lastLink & LINKL) && (nextLink & LINKR) && (tashkeelFlag == 1) &&
+ dest[i] != 0x064C && dest[i] != 0x064D )
+ {
+ Shape = 1;
+ if( (nextLink&ALEFTYPE) == ALEFTYPE && (lastLink&LAMTYPE) == LAMTYPE ) {
+ Shape = 0;
+ }
+ } else if(tashkeelFlag == 2 && dest[i] == SHADDA06_CHAR){
+ Shape = 1;
+ } else {
+ Shape = 0;
+ }
+ }
+ if ((dest[i] ^ 0x0600) < 0x100) {
+ if ( isTashkeelChar(dest[i]) ){
+ if (tashkeelFlag == 2 && dest[i] != SHADDA06_CHAR){
+ dest[i] = TASHKEEL_SPACE_SUB;
+ tashkeelFound = 1;
+ } else {
+ /* to ensure the array index is within the range */
+ U_ASSERT(dest[i] >= 0x064Bu
+ && dest[i]-0x064Bu < UPRV_LENGTHOF(IrrelevantPos));
+ dest[i] = 0xFE70 + IrrelevantPos[(dest[i] - 0x064B)] + static_cast<UChar>(Shape);
+ }
+ }else if ((currLink & APRESENT) > 0) {
+ dest[i] = (UChar)(0xFB50 + (currLink >> 8) + Shape);
+ }else if ((currLink >> 8) > 0 && (currLink & IRRELEVANT) == 0) {
+ dest[i] = (UChar)(0xFE70 + (currLink >> 8) + Shape);
+ }
+ }
+ }
+
+ /* move one notch forward */
+ if ((currLink & IRRELEVANT) == 0) {
+ prevLink = lastLink;
+ lastLink = currLink;
+ lastPos = i;
+ }
+
+ i = i + step;
+ if (i == Nx) {
+ currLink = nextLink;
+ Nx = -2;
+ } else if(i != iend) {
+ currLink = getLink(dest[i]);
+ }
+ }
+ destSize = sourceLength;
+ if ( (lamalef_found != 0 ) || (tashkeelFound != 0) ){
+ destSize = handleGeneratedSpaces(dest,sourceLength,destSize,options,pErrorCode, shapeVars);
+ }
+
+ if ( (seenfamFound != 0) || (yehhamzaFound != 0) ) {
+ destSize = expandCompositChar(dest, sourceLength,destSize,options,pErrorCode, SHAPE_MODE,shapeVars);
+ }
+ return destSize;
+}
+
+/*
+ *Name : deShapeUnicode
+ *Function : Converts an Arabic Unicode buffer in FExx Range into unshaped
+ * arabic Unicode buffer in 06xx Range
+ */
+static int32_t
+deShapeUnicode(UChar *dest, int32_t sourceLength,
+ int32_t destSize,uint32_t options,
+ UErrorCode *pErrorCode, struct uShapeVariables shapeVars) {
+ int32_t i = 0;
+ int32_t lamalef_found = 0;
+ int32_t yehHamzaComposeEnabled = 0;
+ int32_t seenComposeEnabled = 0;
+
+ yehHamzaComposeEnabled = ((options&U_SHAPE_YEHHAMZA_MASK) == U_SHAPE_YEHHAMZA_TWOCELL_NEAR) ? 1 : 0;
+ seenComposeEnabled = ((options&U_SHAPE_SEEN_MASK) == U_SHAPE_SEEN_TWOCELL_NEAR)? 1 : 0;
+
+ /*
+ *This for loop changes the buffer from the Unicode FE range to
+ *the Unicode 06 range
+ */
+
+ for(i = 0; i < sourceLength; i++) {
+ UChar inputChar = dest[i];
+ if ( (inputChar >= 0xFB50) && (inputChar <= 0xFBFF)) { /* FBxx Arabic range */
+ UChar c = convertFBto06 [ (inputChar - 0xFB50) ];
+ if (c != 0)
+ dest[i] = c;
+ } else if( (yehHamzaComposeEnabled == 1) && ((inputChar == HAMZA06_CHAR) || (inputChar == HAMZAFE_CHAR))
+ && (i < (sourceLength - 1)) && isAlefMaksouraChar(dest[i+1] )) {
+ dest[i] = SPACE_CHAR;
+ dest[i+1] = YEH_HAMZA_CHAR;
+ } else if ( (seenComposeEnabled == 1) && (isTailChar(inputChar)) && (i< (sourceLength - 1))
+ && (isSeenTailFamilyChar(dest[i+1])) ) {
+ dest[i] = SPACE_CHAR;
+ } else if (( inputChar >= 0xFE70) && (inputChar <= 0xFEF4 )) { /* FExx Arabic range */
+ dest[i] = convertFEto06 [ (inputChar - 0xFE70) ];
+ } else {
+ dest[i] = inputChar ;
+ }
+
+ if( isLamAlefChar(dest[i]) )
+ lamalef_found = 1;
+ }
+
+ destSize = sourceLength;
+ if (lamalef_found != 0){
+ destSize = expandCompositChar(dest,sourceLength,destSize,options,pErrorCode,DESHAPE_MODE, shapeVars);
+ }
+ return destSize;
+}
+
+/*
+ ****************************************
+ * u_shapeArabic
+ ****************************************
+ */
+
+U_CAPI int32_t U_EXPORT2
+u_shapeArabic(const UChar *source, int32_t sourceLength,
+ UChar *dest, int32_t destCapacity,
+ uint32_t options,
+ UErrorCode *pErrorCode) {
+
+ int32_t destLength;
+ struct uShapeVariables shapeVars = { OLD_TAIL_CHAR,U_SHAPE_LAMALEF_BEGIN,U_SHAPE_LAMALEF_END,U_SHAPE_TASHKEEL_BEGIN,U_SHAPE_TASHKEEL_END,0};
+
+ /* usual error checking */
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ /* make sure that no reserved options values are used; allow dest==NULL only for preflighting */
+ if( source==NULL || sourceLength<-1 || (dest==NULL && destCapacity!=0) || destCapacity<0 ||
+ (((options&U_SHAPE_TASHKEEL_MASK) > 0) &&
+ ((options&U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) == U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) ) ||
+ (((options&U_SHAPE_TASHKEEL_MASK) > 0) &&
+ ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_UNSHAPE)) ||
+ (options&U_SHAPE_DIGIT_TYPE_RESERVED)==U_SHAPE_DIGIT_TYPE_RESERVED ||
+ (options&U_SHAPE_DIGITS_MASK)==U_SHAPE_DIGITS_RESERVED ||
+ ((options&U_SHAPE_LAMALEF_MASK) != U_SHAPE_LAMALEF_RESIZE &&
+ (options&U_SHAPE_AGGREGATE_TASHKEEL_MASK) != 0) ||
+ ((options&U_SHAPE_AGGREGATE_TASHKEEL_MASK) == U_SHAPE_AGGREGATE_TASHKEEL &&
+ (options&U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) != U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED)
+ )
+ {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ /* Validate lamalef options */
+ if(((options&U_SHAPE_LAMALEF_MASK) > 0)&&
+ !(((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_BEGIN) ||
+ ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_END ) ||
+ ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_RESIZE )||
+ ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_AUTO) ||
+ ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_NEAR)))
+ {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ /* Validate Tashkeel options */
+ if(((options&U_SHAPE_TASHKEEL_MASK) > 0)&&
+ !(((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_BEGIN) ||
+ ((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_END )
+ ||((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_RESIZE )||
+ ((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL)))
+ {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ /* determine the source length */
+ if(sourceLength==-1) {
+ sourceLength=u_strlen(source);
+ }
+ if(sourceLength<=0) {
+ return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
+ }
+
+ /* check that source and destination do not overlap */
+ if( dest!=NULL &&
+ ((source<=dest && dest<source+sourceLength) ||
+ (dest<=source && source<dest+destCapacity))) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* Does Options contain the new Seen Tail Unicode code point option */
+ if ( (options&U_SHAPE_TAIL_TYPE_MASK) == U_SHAPE_TAIL_NEW_UNICODE){
+ shapeVars.tailChar = NEW_TAIL_CHAR;
+ }else {
+ shapeVars.tailChar = OLD_TAIL_CHAR;
+ }
+
+ if((options&U_SHAPE_LETTERS_MASK)!=U_SHAPE_LETTERS_NOOP) {
+ UChar buffer[300];
+ UChar *tempbuffer, *tempsource = NULL;
+ int32_t outputSize, spacesCountl=0, spacesCountr=0;
+
+ if((options&U_SHAPE_AGGREGATE_TASHKEEL_MASK)>0) {
+ int32_t logical_order = (options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL;
+ int32_t aggregate_tashkeel =
+ (options&(U_SHAPE_AGGREGATE_TASHKEEL_MASK+U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED)) ==
+ (U_SHAPE_AGGREGATE_TASHKEEL+U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED);
+ int step=logical_order?1:-1;
+ int j=logical_order?-1:2*sourceLength;
+ int i=logical_order?-1:sourceLength;
+ int end=logical_order?sourceLength:-1;
+ int aggregation_possible = 1;
+ UChar prev = 0;
+ UChar prevLink, currLink = 0;
+ int newSourceLength = 0;
+ tempsource = (UChar *)uprv_malloc(2*sourceLength*U_SIZEOF_UCHAR);
+ if(tempsource == NULL) {
+ *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+
+ while ((i+=step) != end) {
+ prevLink = currLink;
+ currLink = getLink(source[i]);
+ if (aggregate_tashkeel && ((prevLink|currLink)&COMBINE) == COMBINE && aggregation_possible) {
+ aggregation_possible = 0;
+ tempsource[j] = (prev<source[i]?prev:source[i])-0x064C+0xFC5E;
+ currLink = getLink(tempsource[j]);
+ } else {
+ aggregation_possible = 1;
+ tempsource[j+=step] = source[i];
+ prev = source[i];
+ newSourceLength++;
+ }
+ }
+ source = tempsource+(logical_order?0:j);
+ sourceLength = newSourceLength;
+ }
+
+ /* calculate destination size */
+ /* TODO: do we ever need to do this pure preflighting? */
+ if(((options&U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_RESIZE) ||
+ ((options&U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_RESIZE)) {
+ outputSize=calculateSize(source,sourceLength,destCapacity,options);
+ } else {
+ outputSize=sourceLength;
+ }
+
+ if(outputSize>destCapacity) {
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ if (tempsource != NULL) uprv_free(tempsource);
+ return outputSize;
+ }
+
+ /*
+ * need a temporary buffer of size max(outputSize, sourceLength)
+ * because at first we copy source->temp
+ */
+ if(sourceLength>outputSize) {
+ outputSize=sourceLength;
+ }
+
+ /* Start of Arabic letter shaping part */
+ if(outputSize<=UPRV_LENGTHOF(buffer)) {
+ outputSize=UPRV_LENGTHOF(buffer);
+ tempbuffer=buffer;
+ } else {
+ tempbuffer = (UChar *)uprv_malloc(outputSize*U_SIZEOF_UCHAR);
+
+ /*Test for NULL*/
+ if(tempbuffer == NULL) {
+ *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
+ if (tempsource != NULL) uprv_free(tempsource);
+ return 0;
+ }
+ }
+ u_memcpy(tempbuffer, source, sourceLength);
+ if (tempsource != NULL){
+ uprv_free(tempsource);
+ }
+
+ if(sourceLength<outputSize) {
+ uprv_memset(tempbuffer+sourceLength, 0, (outputSize-sourceLength)*U_SIZEOF_UCHAR);
+ }
+
+ if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL) {
+ countSpaces(tempbuffer,sourceLength,options,&spacesCountl,&spacesCountr);
+ invertBuffer(tempbuffer,sourceLength,options,spacesCountl,spacesCountr);
+ }
+
+ if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_VISUAL_LTR) {
+ if((options&U_SHAPE_SPACES_RELATIVE_TO_TEXT_MASK) == U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END) {
+ shapeVars.spacesRelativeToTextBeginEnd = 1;
+ shapeVars.uShapeLamalefBegin = U_SHAPE_LAMALEF_END;
+ shapeVars.uShapeLamalefEnd = U_SHAPE_LAMALEF_BEGIN;
+ shapeVars.uShapeTashkeelBegin = U_SHAPE_TASHKEEL_END;
+ shapeVars.uShapeTashkeelEnd = U_SHAPE_TASHKEEL_BEGIN;
+ }
+ }
+
+ switch(options&U_SHAPE_LETTERS_MASK) {
+ case U_SHAPE_LETTERS_SHAPE :
+ if( (options&U_SHAPE_TASHKEEL_MASK)> 0
+ && ((options&U_SHAPE_TASHKEEL_MASK) !=U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL)) {
+ /* Call the shaping function with tashkeel flag == 2 for removal of tashkeel */
+ destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,2,shapeVars);
+ }else {
+ /* default Call the shaping function with tashkeel flag == 1 */
+ destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,1,shapeVars);
+
+ /*After shaping text check if user wants to remove tashkeel and replace it with tatweel*/
+ if( (options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL){
+ destLength = handleTashkeelWithTatweel(tempbuffer,destLength,destCapacity,options,pErrorCode);
+ }
+ }
+ break;
+ case U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED :
+ /* Call the shaping function with tashkeel flag == 0 */
+ destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,0,shapeVars);
+ break;
+
+ case U_SHAPE_LETTERS_UNSHAPE :
+ /* Call the deshaping function */
+ destLength = deShapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,shapeVars);
+ break;
+ default :
+ /* will never occur because of validity checks above */
+ destLength = 0;
+ break;
+ }
+
+ /*
+ * TODO: (markus 2002aug01)
+ * For as long as we always preflight the outputSize above
+ * we should U_ASSERT(outputSize==destLength)
+ * except for the adjustment above before the tempbuffer allocation
+ */
+
+ if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL) {
+ countSpaces(tempbuffer,destLength,options,&spacesCountl,&spacesCountr);
+ invertBuffer(tempbuffer,destLength,options,spacesCountl,spacesCountr);
+ }
+ u_memcpy(dest, tempbuffer, uprv_min(destLength, destCapacity));
+
+ if(tempbuffer!=buffer) {
+ uprv_free(tempbuffer);
+ }
+
+ if(destLength>destCapacity) {
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ return destLength;
+ }
+
+ /* End of Arabic letter shaping part */
+ } else {
+ /*
+ * No letter shaping:
+ * just make sure the destination is large enough and copy the string.
+ */
+ if(destCapacity<sourceLength) {
+ /* this catches preflighting, too */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ return sourceLength;
+ }
+ u_memcpy(dest, source, sourceLength);
+ destLength=sourceLength;
+ }
+
+ /*
+ * Perform number shaping.
+ * With UTF-16 or UTF-32, the length of the string is constant.
+ * The easiest way to do this is to operate on the destination and
+ * "shape" the digits in-place.
+ */
+ if((options&U_SHAPE_DIGITS_MASK)!=U_SHAPE_DIGITS_NOOP) {
+ UChar digitBase;
+ int32_t i;
+
+ /* select the requested digit group */
+ switch(options&U_SHAPE_DIGIT_TYPE_MASK) {
+ case U_SHAPE_DIGIT_TYPE_AN:
+ digitBase=0x660; /* Unicode: "Arabic-Indic digits" */
+ break;
+ case U_SHAPE_DIGIT_TYPE_AN_EXTENDED:
+ digitBase=0x6f0; /* Unicode: "Eastern Arabic-Indic digits (Persian and Urdu)" */
+ break;
+ default:
+ /* will never occur because of validity checks above */
+ digitBase=0;
+ break;
+ }
+
+ /* perform the requested operation */
+ switch(options&U_SHAPE_DIGITS_MASK) {
+ case U_SHAPE_DIGITS_EN2AN:
+ /* add (digitBase-'0') to each European (ASCII) digit code point */
+ digitBase-=0x30;
+ for(i=0; i<destLength; ++i) {
+ if(((uint32_t)dest[i]-0x30)<10) {
+ dest[i]+=digitBase;
+ }
+ }
+ break;
+ case U_SHAPE_DIGITS_AN2EN:
+ /* subtract (digitBase-'0') from each Arabic digit code point */
+ for(i=0; i<destLength; ++i) {
+ if(((uint32_t)dest[i]-(uint32_t)digitBase)<10) {
+ dest[i]-=digitBase-0x30;
+ }
+ }
+ break;
+ case U_SHAPE_DIGITS_ALEN2AN_INIT_LR:
+ _shapeToArabicDigitsWithContext(dest, destLength,
+ digitBase,
+ (UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL),
+ FALSE);
+ break;
+ case U_SHAPE_DIGITS_ALEN2AN_INIT_AL:
+ _shapeToArabicDigitsWithContext(dest, destLength,
+ digitBase,
+ (UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL),
+ TRUE);
+ break;
+ default:
+ /* will never occur because of validity checks above */
+ break;
+ }
+ }
+
+ return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
+}
diff --git a/thirdparty/icu4c/common/usprep.cpp b/thirdparty/icu4c/common/usprep.cpp
new file mode 100644
index 0000000000..8351a77370
--- /dev/null
+++ b/thirdparty/icu4c/common/usprep.cpp
@@ -0,0 +1,871 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ *******************************************************************************
+ *
+ * Copyright (C) 2003-2016, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ *
+ *******************************************************************************
+ * file name: usprep.cpp
+ * encoding: UTF-8
+ * tab size: 8 (not used)
+ * indentation:4
+ *
+ * created on: 2003jul2
+ * created by: Ram Viswanadha
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_IDNA
+
+#include "unicode/usprep.h"
+
+#include "unicode/normalizer2.h"
+#include "unicode/ustring.h"
+#include "unicode/uchar.h"
+#include "unicode/uversion.h"
+#include "umutex.h"
+#include "cmemory.h"
+#include "sprpimpl.h"
+#include "ustr_imp.h"
+#include "uhash.h"
+#include "cstring.h"
+#include "udataswp.h"
+#include "ucln_cmn.h"
+#include "ubidi_props.h"
+#include "uprops.h"
+
+U_NAMESPACE_USE
+
+U_CDECL_BEGIN
+
+/*
+Static cache for already opened StringPrep profiles
+*/
+static UHashtable *SHARED_DATA_HASHTABLE = NULL;
+static icu::UInitOnce gSharedDataInitOnce = U_INITONCE_INITIALIZER;
+
+static UMutex usprepMutex;
+/* format version of spp file */
+//static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
+
+/* the Unicode version of the sprep data */
+static UVersionInfo dataVersion={ 0, 0, 0, 0 };
+
+/* Profile names must be aligned to UStringPrepProfileType */
+static const char * const PROFILE_NAMES[] = {
+ "rfc3491", /* USPREP_RFC3491_NAMEPREP */
+ "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */
+ "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */
+ "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */
+ "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
+ "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
+ "rfc3722", /* USPREP_RFC3722_ISCSI */
+ "rfc3920node", /* USPREP_RFC3920_NODEPREP */
+ "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */
+ "rfc4011", /* USPREP_RFC4011_MIB */
+ "rfc4013", /* USPREP_RFC4013_SASLPREP */
+ "rfc4505", /* USPREP_RFC4505_TRACE */
+ "rfc4518", /* USPREP_RFC4518_LDAP */
+ "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */
+};
+
+static UBool U_CALLCONV
+isSPrepAcceptable(void * /* context */,
+ const char * /* type */,
+ const char * /* name */,
+ const UDataInfo *pInfo) {
+ if(
+ pInfo->size>=20 &&
+ pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
+ pInfo->charsetFamily==U_CHARSET_FAMILY &&
+ pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
+ pInfo->dataFormat[1]==0x50 &&
+ pInfo->dataFormat[2]==0x52 &&
+ pInfo->dataFormat[3]==0x50 &&
+ pInfo->formatVersion[0]==3 &&
+ pInfo->formatVersion[2]==UTRIE_SHIFT &&
+ pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
+ ) {
+ //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
+ uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+static int32_t U_CALLCONV
+getSPrepFoldingOffset(uint32_t data) {
+
+ return (int32_t)data;
+
+}
+
+/* hashes an entry */
+static int32_t U_CALLCONV
+hashEntry(const UHashTok parm) {
+ UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
+ UHashTok namekey, pathkey;
+ namekey.pointer = b->name;
+ pathkey.pointer = b->path;
+ uint32_t unsignedHash = static_cast<uint32_t>(uhash_hashChars(namekey)) +
+ 37u * static_cast<uint32_t>(uhash_hashChars(pathkey));
+ return static_cast<int32_t>(unsignedHash);
+}
+
+/* compares two entries */
+static UBool U_CALLCONV
+compareEntries(const UHashTok p1, const UHashTok p2) {
+ UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
+ UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
+ UHashTok name1, name2, path1, path2;
+ name1.pointer = b1->name;
+ name2.pointer = b2->name;
+ path1.pointer = b1->path;
+ path2.pointer = b2->path;
+ return ((UBool)(uhash_compareChars(name1, name2) &
+ uhash_compareChars(path1, path2)));
+}
+
+static void
+usprep_unload(UStringPrepProfile* data){
+ udata_close(data->sprepData);
+}
+
+static int32_t
+usprep_internal_flushCache(UBool noRefCount){
+ UStringPrepProfile *profile = NULL;
+ UStringPrepKey *key = NULL;
+ int32_t pos = UHASH_FIRST;
+ int32_t deletedNum = 0;
+ const UHashElement *e;
+
+ /*
+ * if shared data hasn't even been lazy evaluated yet
+ * return 0
+ */
+ umtx_lock(&usprepMutex);
+ if (SHARED_DATA_HASHTABLE == NULL) {
+ umtx_unlock(&usprepMutex);
+ return 0;
+ }
+
+ /*creates an enumeration to iterate through every element in the table */
+ while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
+ {
+ profile = (UStringPrepProfile *) e->value.pointer;
+ key = (UStringPrepKey *) e->key.pointer;
+
+ if ((noRefCount== FALSE && profile->refCount == 0) ||
+ noRefCount== TRUE) {
+ deletedNum++;
+ uhash_removeElement(SHARED_DATA_HASHTABLE, e);
+
+ /* unload the data */
+ usprep_unload(profile);
+
+ if(key->name != NULL) {
+ uprv_free(key->name);
+ key->name=NULL;
+ }
+ if(key->path != NULL) {
+ uprv_free(key->path);
+ key->path=NULL;
+ }
+ uprv_free(profile);
+ uprv_free(key);
+ }
+
+ }
+ umtx_unlock(&usprepMutex);
+
+ return deletedNum;
+}
+
+/* Works just like ucnv_flushCache()
+static int32_t
+usprep_flushCache(){
+ return usprep_internal_flushCache(FALSE);
+}
+*/
+
+static UBool U_CALLCONV usprep_cleanup(void){
+ if (SHARED_DATA_HASHTABLE != NULL) {
+ usprep_internal_flushCache(TRUE);
+ if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
+ uhash_close(SHARED_DATA_HASHTABLE);
+ SHARED_DATA_HASHTABLE = NULL;
+ }
+ }
+ gSharedDataInitOnce.reset();
+ return (SHARED_DATA_HASHTABLE == NULL);
+}
+U_CDECL_END
+
+
+/** Initializes the cache for resources */
+static void U_CALLCONV
+createCache(UErrorCode &status) {
+ SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status);
+ if (U_FAILURE(status)) {
+ SHARED_DATA_HASHTABLE = NULL;
+ }
+ ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
+}
+
+static void
+initCache(UErrorCode *status) {
+ umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
+}
+
+static UBool U_CALLCONV
+loadData(UStringPrepProfile* profile,
+ const char* path,
+ const char* name,
+ const char* type,
+ UErrorCode* errorCode) {
+ /* load Unicode SPREP data from file */
+ UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
+ UDataMemory *dataMemory;
+ const int32_t *p=NULL;
+ const uint8_t *pb;
+ UVersionInfo normUnicodeVersion;
+ int32_t normUniVer, sprepUniVer, normCorrVer;
+
+ if(errorCode==NULL || U_FAILURE(*errorCode)) {
+ return 0;
+ }
+
+ /* open the data outside the mutex block */
+ //TODO: change the path
+ dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
+ if(U_FAILURE(*errorCode)) {
+ return FALSE;
+ }
+
+ p=(const int32_t *)udata_getMemory(dataMemory);
+ pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
+ utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
+ _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
+
+
+ if(U_FAILURE(*errorCode)) {
+ udata_close(dataMemory);
+ return FALSE;
+ }
+
+ /* in the mutex block, set the data for this process */
+ umtx_lock(&usprepMutex);
+ if(profile->sprepData==NULL) {
+ profile->sprepData=dataMemory;
+ dataMemory=NULL;
+ uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
+ uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
+ } else {
+ p=(const int32_t *)udata_getMemory(profile->sprepData);
+ }
+ umtx_unlock(&usprepMutex);
+ /* initialize some variables */
+ profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
+
+ u_getUnicodeVersion(normUnicodeVersion);
+ normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
+ (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
+ sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
+ (dataVersion[2] << 8 ) + (dataVersion[3]);
+ normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
+
+ if(U_FAILURE(*errorCode)){
+ udata_close(dataMemory);
+ return FALSE;
+ }
+ if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
+ normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
+ ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
+ ){
+ *errorCode = U_INVALID_FORMAT_ERROR;
+ udata_close(dataMemory);
+ return FALSE;
+ }
+ profile->isDataLoaded = TRUE;
+
+ /* if a different thread set it first, then close the extra data */
+ if(dataMemory!=NULL) {
+ udata_close(dataMemory); /* NULL if it was set correctly */
+ }
+
+
+ return profile->isDataLoaded;
+}
+
+static UStringPrepProfile*
+usprep_getProfile(const char* path,
+ const char* name,
+ UErrorCode *status){
+
+ UStringPrepProfile* profile = NULL;
+
+ initCache(status);
+
+ if(U_FAILURE(*status)){
+ return NULL;
+ }
+
+ UStringPrepKey stackKey;
+ /*
+ * const is cast way to save malloc, strcpy and free calls
+ * we use the passed in pointers for fetching the data from the
+ * hash table which is safe
+ */
+ stackKey.name = (char*) name;
+ stackKey.path = (char*) path;
+
+ /* fetch the data from the cache */
+ umtx_lock(&usprepMutex);
+ profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
+ if(profile != NULL) {
+ profile->refCount++;
+ }
+ umtx_unlock(&usprepMutex);
+
+ if(profile == NULL) {
+ /* else load the data and put the data in the cache */
+ LocalMemory<UStringPrepProfile> newProfile;
+ if(newProfile.allocateInsteadAndReset() == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+
+ /* load the data */
+ if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
+ return NULL;
+ }
+
+ /* get the options */
+ newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
+ newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
+
+ LocalMemory<UStringPrepKey> key;
+ LocalMemory<char> keyName;
+ LocalMemory<char> keyPath;
+ if( key.allocateInsteadAndReset() == NULL ||
+ keyName.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(name)+1)) == NULL ||
+ (path != NULL &&
+ keyPath.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(path)+1)) == NULL)
+ ) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ usprep_unload(newProfile.getAlias());
+ return NULL;
+ }
+
+ umtx_lock(&usprepMutex);
+ // If another thread already inserted the same key/value, refcount and cleanup our thread data
+ profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
+ if(profile != NULL) {
+ profile->refCount++;
+ usprep_unload(newProfile.getAlias());
+ }
+ else {
+ /* initialize the key members */
+ key->name = keyName.orphan();
+ uprv_strcpy(key->name, name);
+ if(path != NULL){
+ key->path = keyPath.orphan();
+ uprv_strcpy(key->path, path);
+ }
+ profile = newProfile.orphan();
+
+ /* add the data object to the cache */
+ profile->refCount = 1;
+ uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
+ }
+ umtx_unlock(&usprepMutex);
+ }
+
+ return profile;
+}
+
+U_CAPI UStringPrepProfile* U_EXPORT2
+usprep_open(const char* path,
+ const char* name,
+ UErrorCode* status){
+
+ if(status == NULL || U_FAILURE(*status)){
+ return NULL;
+ }
+
+ /* initialize the profile struct members */
+ return usprep_getProfile(path,name,status);
+}
+
+U_CAPI UStringPrepProfile* U_EXPORT2
+usprep_openByType(UStringPrepProfileType type,
+ UErrorCode* status) {
+ if(status == NULL || U_FAILURE(*status)){
+ return NULL;
+ }
+ int32_t index = (int32_t)type;
+ if (index < 0 || index >= UPRV_LENGTHOF(PROFILE_NAMES)) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+ return usprep_open(NULL, PROFILE_NAMES[index], status);
+}
+
+U_CAPI void U_EXPORT2
+usprep_close(UStringPrepProfile* profile){
+ if(profile==NULL){
+ return;
+ }
+
+ umtx_lock(&usprepMutex);
+ /* decrement the ref count*/
+ if(profile->refCount > 0){
+ profile->refCount--;
+ }
+ umtx_unlock(&usprepMutex);
+
+}
+
+U_CFUNC void
+uprv_syntaxError(const UChar* rules,
+ int32_t pos,
+ int32_t rulesLen,
+ UParseError* parseError){
+ if(parseError == NULL){
+ return;
+ }
+ parseError->offset = pos;
+ parseError->line = 0 ; // we are not using line numbers
+
+ // for pre-context
+ int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
+ int32_t limit = pos;
+
+ u_memcpy(parseError->preContext,rules+start,limit-start);
+ //null terminate the buffer
+ parseError->preContext[limit-start] = 0;
+
+ // for post-context; include error rules[pos]
+ start = pos;
+ limit = start + (U_PARSE_CONTEXT_LEN-1);
+ if (limit > rulesLen) {
+ limit = rulesLen;
+ }
+ if (start < rulesLen) {
+ u_memcpy(parseError->postContext,rules+start,limit-start);
+ }
+ //null terminate the buffer
+ parseError->postContext[limit-start]= 0;
+}
+
+
+static inline UStringPrepType
+getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
+
+ UStringPrepType type;
+ if(trieWord == 0){
+ /*
+ * Initial value stored in the mapping table
+ * just return USPREP_TYPE_LIMIT .. so that
+ * the source codepoint is copied to the destination
+ */
+ type = USPREP_TYPE_LIMIT;
+ isIndex =FALSE;
+ value = 0;
+ }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
+ type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
+ isIndex =FALSE;
+ value = 0;
+ }else{
+ /* get the type */
+ type = USPREP_MAP;
+ /* ascertain if the value is index or delta */
+ if(trieWord & 0x02){
+ isIndex = TRUE;
+ value = trieWord >> 2; //mask off the lower 2 bits and shift
+ }else{
+ isIndex = FALSE;
+ value = (int16_t)trieWord;
+ value = (value >> 2);
+ }
+
+ if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
+ type = USPREP_DELETE;
+ isIndex =FALSE;
+ value = 0;
+ }
+ }
+ return type;
+}
+
+// TODO: change to writing to UnicodeString not UChar *
+static int32_t
+usprep_map( const UStringPrepProfile* profile,
+ const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UParseError* parseError,
+ UErrorCode* status ){
+
+ uint16_t result;
+ int32_t destIndex=0;
+ int32_t srcIndex;
+ UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
+ UStringPrepType type;
+ int16_t value;
+ UBool isIndex;
+ const int32_t* indexes = profile->indexes;
+
+ // no error checking the caller check for error and arguments
+ // no string length check the caller finds out the string length
+
+ for(srcIndex=0;srcIndex<srcLength;){
+ UChar32 ch;
+
+ U16_NEXT(src,srcIndex,srcLength,ch);
+
+ result=0;
+
+ UTRIE_GET16(&profile->sprepTrie,ch,result);
+
+ type = getValues(result, value, isIndex);
+
+ // check if the source codepoint is unassigned
+ if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
+
+ uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
+ *status = U_STRINGPREP_UNASSIGNED_ERROR;
+ return 0;
+
+ }else if(type == USPREP_MAP){
+
+ int32_t index, length;
+
+ if(isIndex){
+ index = value;
+ if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
+ index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
+ length = 1;
+ }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
+ index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
+ length = 2;
+ }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
+ index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
+ length = 3;
+ }else{
+ length = profile->mappingData[index++];
+
+ }
+
+ /* copy mapping to destination */
+ for(int32_t i=0; i< length; i++){
+ if(destIndex < destCapacity ){
+ dest[destIndex] = profile->mappingData[index+i];
+ }
+ destIndex++; /* for pre-flighting */
+ }
+ continue;
+ }else{
+ // subtract the delta to arrive at the code point
+ ch -= value;
+ }
+
+ }else if(type==USPREP_DELETE){
+ // just consume the codepoint and contine
+ continue;
+ }
+ //copy the code point into destination
+ if(ch <= 0xFFFF){
+ if(destIndex < destCapacity ){
+ dest[destIndex] = (UChar)ch;
+ }
+ destIndex++;
+ }else{
+ if(destIndex+1 < destCapacity ){
+ dest[destIndex] = U16_LEAD(ch);
+ dest[destIndex+1] = U16_TRAIL(ch);
+ }
+ destIndex +=2;
+ }
+
+ }
+
+ return u_terminateUChars(dest, destCapacity, destIndex, status);
+}
+
+/*
+ 1) Map -- For each character in the input, check if it has a mapping
+ and, if so, replace it with its mapping.
+
+ 2) Normalize -- Possibly normalize the result of step 1 using Unicode
+ normalization.
+
+ 3) Prohibit -- Check for any characters that are not allowed in the
+ output. If any are found, return an error.
+
+ 4) Check bidi -- Possibly check for right-to-left characters, and if
+ any are found, make sure that the whole string satisfies the
+ requirements for bidirectional strings. If the string does not
+ satisfy the requirements for bidirectional strings, return an
+ error.
+ [Unicode3.2] defines several bidirectional categories; each character
+ has one bidirectional category assigned to it. For the purposes of
+ the requirements below, an "RandALCat character" is a character that
+ has Unicode bidirectional categories "R" or "AL"; an "LCat character"
+ is a character that has Unicode bidirectional category "L". Note
+
+
+ that there are many characters which fall in neither of the above
+ definitions; Latin digits (<U+0030> through <U+0039>) are examples of
+ this because they have bidirectional category "EN".
+
+ In any profile that specifies bidirectional character handling, all
+ three of the following requirements MUST be met:
+
+ 1) The characters in section 5.8 MUST be prohibited.
+
+ 2) If a string contains any RandALCat character, the string MUST NOT
+ contain any LCat character.
+
+ 3) If a string contains any RandALCat character, a RandALCat
+ character MUST be the first character of the string, and a
+ RandALCat character MUST be the last character of the string.
+*/
+U_CAPI int32_t U_EXPORT2
+usprep_prepare( const UStringPrepProfile* profile,
+ const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UParseError* parseError,
+ UErrorCode* status ){
+
+ // check error status
+ if(U_FAILURE(*status)){
+ return 0;
+ }
+
+ //check arguments
+ if(profile==NULL ||
+ (src==NULL ? srcLength!=0 : srcLength<-1) ||
+ (dest==NULL ? destCapacity!=0 : destCapacity<0)) {
+ *status=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ //get the string length
+ if(srcLength < 0){
+ srcLength = u_strlen(src);
+ }
+ // map
+ UnicodeString s1;
+ UChar *b1 = s1.getBuffer(srcLength);
+ if(b1==NULL){
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ int32_t b1Len = usprep_map(profile, src, srcLength,
+ b1, s1.getCapacity(), options, parseError, status);
+ s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
+
+ if(*status == U_BUFFER_OVERFLOW_ERROR){
+ // redo processing of string
+ /* we do not have enough room so grow the buffer*/
+ b1 = s1.getBuffer(b1Len);
+ if(b1==NULL){
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+
+ *status = U_ZERO_ERROR; // reset error
+ b1Len = usprep_map(profile, src, srcLength,
+ b1, s1.getCapacity(), options, parseError, status);
+ s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
+ }
+ if(U_FAILURE(*status)){
+ return 0;
+ }
+
+ // normalize
+ UnicodeString s2;
+ if(profile->doNFKC){
+ const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status);
+ FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status));
+ if(U_FAILURE(*status)){
+ return 0;
+ }
+ fn2.normalize(s1, s2, *status);
+ }else{
+ s2.fastCopyFrom(s1);
+ }
+ if(U_FAILURE(*status)){
+ return 0;
+ }
+
+ // Prohibit and checkBiDi in one pass
+ const UChar *b2 = s2.getBuffer();
+ int32_t b2Len = s2.length();
+ UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
+ UBool leftToRight=FALSE, rightToLeft=FALSE;
+ int32_t rtlPos =-1, ltrPos =-1;
+
+ for(int32_t b2Index=0; b2Index<b2Len;){
+ UChar32 ch = 0;
+ U16_NEXT(b2, b2Index, b2Len, ch);
+
+ uint16_t result;
+ UTRIE_GET16(&profile->sprepTrie,ch,result);
+
+ int16_t value;
+ UBool isIndex;
+ UStringPrepType type = getValues(result, value, isIndex);
+
+ if( type == USPREP_PROHIBITED ||
+ ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
+ ){
+ *status = U_STRINGPREP_PROHIBITED_ERROR;
+ uprv_syntaxError(b2, b2Index-U16_LENGTH(ch), b2Len, parseError);
+ return 0;
+ }
+
+ if(profile->checkBiDi) {
+ direction = ubidi_getClass(ch);
+ if(firstCharDir == U_CHAR_DIRECTION_COUNT){
+ firstCharDir = direction;
+ }
+ if(direction == U_LEFT_TO_RIGHT){
+ leftToRight = TRUE;
+ ltrPos = b2Index-1;
+ }
+ if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
+ rightToLeft = TRUE;
+ rtlPos = b2Index-1;
+ }
+ }
+ }
+ if(profile->checkBiDi == TRUE){
+ // satisfy 2
+ if( leftToRight == TRUE && rightToLeft == TRUE){
+ *status = U_STRINGPREP_CHECK_BIDI_ERROR;
+ uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
+ return 0;
+ }
+
+ //satisfy 3
+ if( rightToLeft == TRUE &&
+ !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
+ (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
+ ){
+ *status = U_STRINGPREP_CHECK_BIDI_ERROR;
+ uprv_syntaxError(b2, rtlPos, b2Len, parseError);
+ return FALSE;
+ }
+ }
+ return s2.extract(dest, destCapacity, *status);
+}
+
+
+/* data swapping ------------------------------------------------------------ */
+
+U_CAPI int32_t U_EXPORT2
+usprep_swap(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const UDataInfo *pInfo;
+ int32_t headerSize;
+
+ const uint8_t *inBytes;
+ uint8_t *outBytes;
+
+ const int32_t *inIndexes;
+ int32_t indexes[16];
+
+ int32_t i, offset, count, size;
+
+ /* udata_swapDataHeader checks the arguments */
+ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ /* check data format and format version */
+ pInfo=(const UDataInfo *)((const char *)inData+4);
+ if(!(
+ pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
+ pInfo->dataFormat[1]==0x50 &&
+ pInfo->dataFormat[2]==0x52 &&
+ pInfo->dataFormat[3]==0x50 &&
+ pInfo->formatVersion[0]==3
+ )) {
+ udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
+ pInfo->dataFormat[0], pInfo->dataFormat[1],
+ pInfo->dataFormat[2], pInfo->dataFormat[3],
+ pInfo->formatVersion[0]);
+ *pErrorCode=U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+
+ inBytes=(const uint8_t *)inData+headerSize;
+ outBytes=(uint8_t *)outData+headerSize;
+
+ inIndexes=(const int32_t *)inBytes;
+
+ if(length>=0) {
+ length-=headerSize;
+ if(length<16*4) {
+ udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
+ length);
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ }
+
+ /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
+ for(i=0; i<16; ++i) {
+ indexes[i]=udata_readInt32(ds, inIndexes[i]);
+ }
+
+ /* calculate the total length of the data */
+ size=
+ 16*4+ /* size of indexes[] */
+ indexes[_SPREP_INDEX_TRIE_SIZE]+
+ indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
+
+ if(length>=0) {
+ if(length<size) {
+ udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
+ length);
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ /* copy the data for inaccessible bytes */
+ if(inBytes!=outBytes) {
+ uprv_memcpy(outBytes, inBytes, size);
+ }
+
+ offset=0;
+
+ /* swap the int32_t indexes[] */
+ count=16*4;
+ ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
+ offset+=count;
+
+ /* swap the UTrie */
+ count=indexes[_SPREP_INDEX_TRIE_SIZE];
+ utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
+ offset+=count;
+
+ /* swap the uint16_t mappingTable[] */
+ count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
+ ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
+ //offset+=count;
+ }
+
+ return headerSize+size;
+}
+
+#endif /* #if !UCONFIG_NO_IDNA */
diff --git a/thirdparty/icu4c/common/ustack.cpp b/thirdparty/icu4c/common/ustack.cpp
new file mode 100644
index 0000000000..fb314b0ebe
--- /dev/null
+++ b/thirdparty/icu4c/common/ustack.cpp
@@ -0,0 +1,63 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2003-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+
+#include "uvector.h"
+
+U_NAMESPACE_BEGIN
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UStack)
+
+UStack::UStack(UErrorCode &status) :
+ UVector(status)
+{
+}
+
+UStack::UStack(int32_t initialCapacity, UErrorCode &status) :
+ UVector(initialCapacity, status)
+{
+}
+
+UStack::UStack(UObjectDeleter *d, UElementsAreEqual *c, UErrorCode &status) :
+ UVector(d, c, status)
+{
+}
+
+UStack::UStack(UObjectDeleter *d, UElementsAreEqual *c, int32_t initialCapacity, UErrorCode &status) :
+ UVector(d, c, initialCapacity, status)
+{
+}
+
+UStack::~UStack() {}
+
+void* UStack::pop(void) {
+ int32_t n = size() - 1;
+ void* result = 0;
+ if (n >= 0) {
+ result = elementAt(n);
+ removeElementAt(n);
+ }
+ return result;
+}
+
+int32_t UStack::popi(void) {
+ int32_t n = size() - 1;
+ int32_t result = 0;
+ if (n >= 0) {
+ result = elementAti(n);
+ removeElementAt(n);
+ }
+ return result;
+}
+
+int32_t UStack::search(void* obj) const {
+ int32_t i = indexOf(obj);
+ return (i >= 0) ? size() - i : i;
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/ustr_cnv.cpp b/thirdparty/icu4c/common/ustr_cnv.cpp
new file mode 100644
index 0000000000..9a25a9905a
--- /dev/null
+++ b/thirdparty/icu4c/common/ustr_cnv.cpp
@@ -0,0 +1,256 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1998-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: ustr_cnv.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2004aug24
+* created by: Markus W. Scherer
+*
+* Character conversion functions moved here from ustring.c
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/ustring.h"
+#include "unicode/ucnv.h"
+#include "cstring.h"
+#include "cmemory.h"
+#include "umutex.h"
+#include "ustr_cnv.h"
+#include "ucnv_bld.h"
+
+/* mutexed access to a shared default converter ----------------------------- */
+
+static UConverter *gDefaultConverter = NULL;
+
+U_CAPI UConverter* U_EXPORT2
+u_getDefaultConverter(UErrorCode *status)
+{
+ UConverter *converter = NULL;
+
+ if (gDefaultConverter != NULL) {
+ icu::umtx_lock(NULL);
+
+ /* need to check to make sure it wasn't taken out from under us */
+ if (gDefaultConverter != NULL) {
+ converter = gDefaultConverter;
+ gDefaultConverter = NULL;
+ }
+ icu::umtx_unlock(NULL);
+ }
+
+ /* if the cache was empty, create a converter */
+ if(converter == NULL) {
+ converter = ucnv_open(NULL, status);
+ if(U_FAILURE(*status)) {
+ ucnv_close(converter);
+ converter = NULL;
+ }
+ }
+
+ return converter;
+}
+
+U_CAPI void U_EXPORT2
+u_releaseDefaultConverter(UConverter *converter)
+{
+ if(gDefaultConverter == NULL) {
+ if (converter != NULL) {
+ ucnv_reset(converter);
+ }
+ ucnv_enableCleanup();
+ icu::umtx_lock(NULL);
+ if(gDefaultConverter == NULL) {
+ gDefaultConverter = converter;
+ converter = NULL;
+ }
+ icu::umtx_unlock(NULL);
+ }
+
+ if(converter != NULL) {
+ ucnv_close(converter);
+ }
+}
+
+U_CAPI void U_EXPORT2
+u_flushDefaultConverter()
+{
+ UConverter *converter = NULL;
+
+ if (gDefaultConverter != NULL) {
+ icu::umtx_lock(NULL);
+
+ /* need to check to make sure it wasn't taken out from under us */
+ if (gDefaultConverter != NULL) {
+ converter = gDefaultConverter;
+ gDefaultConverter = NULL;
+ }
+ icu::umtx_unlock(NULL);
+ }
+
+ /* if the cache was populated, flush it */
+ if(converter != NULL) {
+ ucnv_close(converter);
+ }
+}
+
+
+/* conversions between char* and UChar* ------------------------------------- */
+
+/* maximum string length for u_uastrcpy() and u_austrcpy() implementations */
+#define MAX_STRLEN 0x0FFFFFFF
+
+/*
+ returns the minimum of (the length of the null-terminated string) and n.
+*/
+static int32_t u_astrnlen(const char *s1, int32_t n)
+{
+ int32_t len = 0;
+
+ if (s1)
+ {
+ while (n-- && *(s1++))
+ {
+ len++;
+ }
+ }
+ return len;
+}
+
+U_CAPI UChar* U_EXPORT2
+u_uastrncpy(UChar *ucs1,
+ const char *s2,
+ int32_t n)
+{
+ UChar *target = ucs1;
+ UErrorCode err = U_ZERO_ERROR;
+ UConverter *cnv = u_getDefaultConverter(&err);
+ if(U_SUCCESS(err) && cnv != NULL) {
+ ucnv_reset(cnv);
+ ucnv_toUnicode(cnv,
+ &target,
+ ucs1+n,
+ &s2,
+ s2+u_astrnlen(s2, n),
+ NULL,
+ TRUE,
+ &err);
+ ucnv_reset(cnv); /* be good citizens */
+ u_releaseDefaultConverter(cnv);
+ if(U_FAILURE(err) && (err != U_BUFFER_OVERFLOW_ERROR) ) {
+ *ucs1 = 0; /* failure */
+ }
+ if(target < (ucs1+n)) { /* U_BUFFER_OVERFLOW_ERROR isn't an err, just means no termination will happen. */
+ *target = 0; /* terminate */
+ }
+ } else {
+ *ucs1 = 0;
+ }
+ return ucs1;
+}
+
+U_CAPI UChar* U_EXPORT2
+u_uastrcpy(UChar *ucs1,
+ const char *s2 )
+{
+ UErrorCode err = U_ZERO_ERROR;
+ UConverter *cnv = u_getDefaultConverter(&err);
+ if(U_SUCCESS(err) && cnv != NULL) {
+ ucnv_toUChars(cnv,
+ ucs1,
+ MAX_STRLEN,
+ s2,
+ (int32_t)uprv_strlen(s2),
+ &err);
+ u_releaseDefaultConverter(cnv);
+ if(U_FAILURE(err)) {
+ *ucs1 = 0;
+ }
+ } else {
+ *ucs1 = 0;
+ }
+ return ucs1;
+}
+
+/*
+ returns the minimum of (the length of the null-terminated string) and n.
+*/
+static int32_t u_ustrnlen(const UChar *ucs1, int32_t n)
+{
+ int32_t len = 0;
+
+ if (ucs1)
+ {
+ while (n-- && *(ucs1++))
+ {
+ len++;
+ }
+ }
+ return len;
+}
+
+U_CAPI char* U_EXPORT2
+u_austrncpy(char *s1,
+ const UChar *ucs2,
+ int32_t n)
+{
+ char *target = s1;
+ UErrorCode err = U_ZERO_ERROR;
+ UConverter *cnv = u_getDefaultConverter(&err);
+ if(U_SUCCESS(err) && cnv != NULL) {
+ ucnv_reset(cnv);
+ ucnv_fromUnicode(cnv,
+ &target,
+ s1+n,
+ &ucs2,
+ ucs2+u_ustrnlen(ucs2, n),
+ NULL,
+ TRUE,
+ &err);
+ ucnv_reset(cnv); /* be good citizens */
+ u_releaseDefaultConverter(cnv);
+ if(U_FAILURE(err) && (err != U_BUFFER_OVERFLOW_ERROR) ) {
+ *s1 = 0; /* failure */
+ }
+ if(target < (s1+n)) { /* U_BUFFER_OVERFLOW_ERROR isn't an err, just means no termination will happen. */
+ *target = 0; /* terminate */
+ }
+ } else {
+ *s1 = 0;
+ }
+ return s1;
+}
+
+U_CAPI char* U_EXPORT2
+u_austrcpy(char *s1,
+ const UChar *ucs2 )
+{
+ UErrorCode err = U_ZERO_ERROR;
+ UConverter *cnv = u_getDefaultConverter(&err);
+ if(U_SUCCESS(err) && cnv != NULL) {
+ int32_t len = ucnv_fromUChars(cnv,
+ s1,
+ MAX_STRLEN,
+ ucs2,
+ -1,
+ &err);
+ u_releaseDefaultConverter(cnv);
+ s1[len] = 0;
+ } else {
+ *s1 = 0;
+ }
+ return s1;
+}
+
+#endif
diff --git a/thirdparty/icu4c/common/ustr_cnv.h b/thirdparty/icu4c/common/ustr_cnv.h
new file mode 100644
index 0000000000..861e3ebff0
--- /dev/null
+++ b/thirdparty/icu4c/common/ustr_cnv.h
@@ -0,0 +1,51 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1999-2010, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* file name: ustr_cnv.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2004Aug27
+* created by: George Rhoten
+*/
+
+#ifndef USTR_CNV_IMP_H
+#define USTR_CNV_IMP_H
+
+#include "unicode/utypes.h"
+#include "unicode/ucnv.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+/**
+ * Get the default converter. This is a commonly used converter
+ * that is used for the ustring and UnicodeString API.
+ * Remember to use the u_releaseDefaultConverter when you are done.
+ * @internal
+ */
+U_CAPI UConverter* U_EXPORT2
+u_getDefaultConverter(UErrorCode *status);
+
+
+/**
+ * Release the default converter to the converter cache.
+ * @internal
+ */
+U_CAPI void U_EXPORT2
+u_releaseDefaultConverter(UConverter *converter);
+
+/**
+ * Flush the default converter, if cached.
+ * @internal
+ */
+U_CAPI void U_EXPORT2
+u_flushDefaultConverter(void);
+
+#endif
+
+#endif
diff --git a/thirdparty/icu4c/common/ustr_imp.h b/thirdparty/icu4c/common/ustr_imp.h
new file mode 100644
index 0000000000..3c4b9cc2a5
--- /dev/null
+++ b/thirdparty/icu4c/common/ustr_imp.h
@@ -0,0 +1,155 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1999-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* file name: ustr_imp.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2001jan30
+* created by: Markus W. Scherer
+*/
+
+#ifndef __USTR_IMP_H__
+#define __USTR_IMP_H__
+
+#include "unicode/utypes.h"
+#include "unicode/utf8.h"
+
+/**
+ * Internal option for unorm_cmpEquivFold() for strncmp style.
+ * If set, checks for both string length and terminating NUL.
+ */
+#define _STRNCMP_STYLE 0x1000
+
+/**
+ * Compare two strings in code point order or code unit order.
+ * Works in strcmp style (both lengths -1),
+ * strncmp style (lengths equal and >=0, flag true),
+ * and memcmp/UnicodeString style (at least one length >=0).
+ */
+U_CFUNC int32_t U_EXPORT2
+uprv_strCompare(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ UBool strncmpStyle, UBool codePointOrder);
+
+U_CAPI int32_t U_EXPORT2
+ustr_hashUCharsN(const UChar *str, int32_t length);
+
+U_CAPI int32_t U_EXPORT2
+ustr_hashCharsN(const char *str, int32_t length);
+
+U_CAPI int32_t U_EXPORT2
+ustr_hashICharsN(const char *str, int32_t length);
+
+/**
+ * Convert an ASCII-range lowercase character to uppercase.
+ *
+ * @param c A UChar.
+ * @return If UChar is a lowercase ASCII character, returns the uppercase version.
+ * Otherwise, returns the input character.
+ */
+U_CAPI UChar U_EXPORT2
+u_asciiToUpper(UChar c);
+
+// TODO: Add u_asciiToLower if/when there is a need for it.
+
+/**
+ * NUL-terminate a UChar * string if possible.
+ * If length < destCapacity then NUL-terminate.
+ * If length == destCapacity then do not terminate but set U_STRING_NOT_TERMINATED_WARNING.
+ * If length > destCapacity then do not terminate but set U_BUFFER_OVERFLOW_ERROR.
+ *
+ * @param dest Destination buffer, can be NULL if destCapacity==0.
+ * @param destCapacity Number of UChars available at dest.
+ * @param length Number of UChars that were (to be) written to dest.
+ * @param pErrorCode ICU error code.
+ * @return length
+ */
+U_CAPI int32_t U_EXPORT2
+u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
+
+/**
+ * NUL-terminate a char * string if possible.
+ * Same as u_terminateUChars() but for a different string type.
+ */
+U_CAPI int32_t U_EXPORT2
+u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
+
+/**
+ * NUL-terminate a UChar32 * string if possible.
+ * Same as u_terminateUChars() but for a different string type.
+ */
+U_CAPI int32_t U_EXPORT2
+u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
+
+/**
+ * NUL-terminate a wchar_t * string if possible.
+ * Same as u_terminateUChars() but for a different string type.
+ */
+U_CAPI int32_t U_EXPORT2
+u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
+
+/**
+ * Counts the bytes of any whole valid sequence for a UTF-8 lead byte.
+ * Returns 1 for ASCII 0..0x7f.
+ * Returns 0 for 0x80..0xc1 as well as for 0xf5..0xff.
+ * leadByte might be evaluated multiple times.
+ *
+ * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
+ * @return 0..4
+ */
+#define U8_COUNT_BYTES(leadByte) \
+ (U8_IS_SINGLE(leadByte) ? 1 : U8_COUNT_BYTES_NON_ASCII(leadByte))
+
+/**
+ * Counts the bytes of any whole valid sequence for a UTF-8 lead byte.
+ * Returns 0 for 0x00..0xc1 as well as for 0xf5..0xff.
+ * leadByte might be evaluated multiple times.
+ *
+ * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
+ * @return 0 or 2..4
+ */
+#define U8_COUNT_BYTES_NON_ASCII(leadByte) \
+ (U8_IS_LEAD(leadByte) ? ((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+2 : 0)
+
+#ifdef __cplusplus
+
+U_NAMESPACE_BEGIN
+
+class UTF8 {
+public:
+ UTF8() = delete; // all static
+
+ /**
+ * Is t a valid UTF-8 trail byte?
+ *
+ * @param prev Must be the preceding lead byte if i==1 and length>=3;
+ * otherwise ignored.
+ * @param t The i-th byte following the lead byte.
+ * @param i The index (1..3) of byte t in the byte sequence. 0<i<length
+ * @param length The length (2..4) of the byte sequence according to the lead byte.
+ * @return true if t is a valid trail byte in this context.
+ */
+ static inline UBool isValidTrail(int32_t prev, uint8_t t, int32_t i, int32_t length) {
+ // The first trail byte after a 3- or 4-byte lead byte
+ // needs to be validated together with its lead byte.
+ if (length <= 2 || i > 1) {
+ return U8_IS_TRAIL(t);
+ } else if (length == 3) {
+ return U8_IS_VALID_LEAD3_AND_T1(prev, t);
+ } else { // length == 4
+ return U8_IS_VALID_LEAD4_AND_T1(prev, t);
+ }
+ }
+};
+
+U_NAMESPACE_END
+
+#endif // __cplusplus
+
+#endif
diff --git a/thirdparty/icu4c/common/ustr_titlecase_brkiter.cpp b/thirdparty/icu4c/common/ustr_titlecase_brkiter.cpp
new file mode 100644
index 0000000000..457905eb60
--- /dev/null
+++ b/thirdparty/icu4c/common/ustr_titlecase_brkiter.cpp
@@ -0,0 +1,237 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: ustr_titlecase_brkiter.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2011may30
+* created by: Markus W. Scherer
+*
+* Titlecasing functions that are based on BreakIterator
+* were moved here to break dependency cycles among parts of the common library.
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/brkiter.h"
+#include "unicode/casemap.h"
+#include "unicode/chariter.h"
+#include "unicode/localpointer.h"
+#include "unicode/ubrk.h"
+#include "unicode/ucasemap.h"
+#include "unicode/utext.h"
+#include "cmemory.h"
+#include "uassert.h"
+#include "ucase.h"
+#include "ucasemap_imp.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Whole-string BreakIterator.
+ * Titlecasing only calls setText(), first(), and next().
+ * We implement the rest only to satisfy the abstract interface.
+ */
+class WholeStringBreakIterator : public BreakIterator {
+public:
+ WholeStringBreakIterator() : BreakIterator(), length(0) {}
+ ~WholeStringBreakIterator() U_OVERRIDE;
+ UBool operator==(const BreakIterator&) const U_OVERRIDE;
+ WholeStringBreakIterator *clone() const U_OVERRIDE;
+ static UClassID U_EXPORT2 getStaticClassID();
+ UClassID getDynamicClassID() const U_OVERRIDE;
+ CharacterIterator &getText() const U_OVERRIDE;
+ UText *getUText(UText *fillIn, UErrorCode &errorCode) const U_OVERRIDE;
+ void setText(const UnicodeString &text) U_OVERRIDE;
+ void setText(UText *text, UErrorCode &errorCode) U_OVERRIDE;
+ void adoptText(CharacterIterator* it) U_OVERRIDE;
+ int32_t first() U_OVERRIDE;
+ int32_t last() U_OVERRIDE;
+ int32_t previous() U_OVERRIDE;
+ int32_t next() U_OVERRIDE;
+ int32_t current() const U_OVERRIDE;
+ int32_t following(int32_t offset) U_OVERRIDE;
+ int32_t preceding(int32_t offset) U_OVERRIDE;
+ UBool isBoundary(int32_t offset) U_OVERRIDE;
+ int32_t next(int32_t n) U_OVERRIDE;
+ WholeStringBreakIterator *createBufferClone(void *stackBuffer, int32_t &BufferSize,
+ UErrorCode &errorCode) U_OVERRIDE;
+ WholeStringBreakIterator &refreshInputText(UText *input, UErrorCode &errorCode) U_OVERRIDE;
+
+private:
+ int32_t length;
+};
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(WholeStringBreakIterator)
+
+WholeStringBreakIterator::~WholeStringBreakIterator() {}
+UBool WholeStringBreakIterator::operator==(const BreakIterator&) const { return FALSE; }
+WholeStringBreakIterator *WholeStringBreakIterator::clone() const { return nullptr; }
+
+CharacterIterator &WholeStringBreakIterator::getText() const {
+ UPRV_UNREACHABLE; // really should not be called
+}
+UText *WholeStringBreakIterator::getUText(UText * /*fillIn*/, UErrorCode &errorCode) const {
+ if (U_SUCCESS(errorCode)) {
+ errorCode = U_UNSUPPORTED_ERROR;
+ }
+ return nullptr;
+}
+
+void WholeStringBreakIterator::setText(const UnicodeString &text) {
+ length = text.length();
+}
+void WholeStringBreakIterator::setText(UText *text, UErrorCode &errorCode) {
+ if (U_SUCCESS(errorCode)) {
+ int64_t length64 = utext_nativeLength(text);
+ if (length64 <= INT32_MAX) {
+ length = (int32_t)length64;
+ } else {
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ }
+ }
+}
+void WholeStringBreakIterator::adoptText(CharacterIterator*) {
+ UPRV_UNREACHABLE; // should not be called
+}
+
+int32_t WholeStringBreakIterator::first() { return 0; }
+int32_t WholeStringBreakIterator::last() { return length; }
+int32_t WholeStringBreakIterator::previous() { return 0; }
+int32_t WholeStringBreakIterator::next() { return length; }
+int32_t WholeStringBreakIterator::current() const { return 0; }
+int32_t WholeStringBreakIterator::following(int32_t /*offset*/) { return length; }
+int32_t WholeStringBreakIterator::preceding(int32_t /*offset*/) { return 0; }
+UBool WholeStringBreakIterator::isBoundary(int32_t /*offset*/) { return FALSE; }
+int32_t WholeStringBreakIterator::next(int32_t /*n*/) { return length; }
+
+WholeStringBreakIterator *WholeStringBreakIterator::createBufferClone(
+ void * /*stackBuffer*/, int32_t & /*BufferSize*/, UErrorCode &errorCode) {
+ if (U_SUCCESS(errorCode)) {
+ errorCode = U_UNSUPPORTED_ERROR;
+ }
+ return nullptr;
+}
+WholeStringBreakIterator &WholeStringBreakIterator::refreshInputText(
+ UText * /*input*/, UErrorCode &errorCode) {
+ if (U_SUCCESS(errorCode)) {
+ errorCode = U_UNSUPPORTED_ERROR;
+ }
+ return *this;
+}
+
+U_CFUNC
+BreakIterator *ustrcase_getTitleBreakIterator(
+ const Locale *locale, const char *locID, uint32_t options, BreakIterator *iter,
+ LocalPointer<BreakIterator> &ownedIter, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return nullptr; }
+ options &= U_TITLECASE_ITERATOR_MASK;
+ if (options != 0 && iter != nullptr) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return nullptr;
+ }
+ if (iter == nullptr) {
+ switch (options) {
+ case 0:
+ iter = BreakIterator::createWordInstance(
+ locale != nullptr ? *locale : Locale(locID), errorCode);
+ break;
+ case U_TITLECASE_WHOLE_STRING:
+ iter = new WholeStringBreakIterator();
+ if (iter == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ }
+ break;
+ case U_TITLECASE_SENTENCES:
+ iter = BreakIterator::createSentenceInstance(
+ locale != nullptr ? *locale : Locale(locID), errorCode);
+ break;
+ default:
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+ ownedIter.adoptInstead(iter);
+ }
+ return iter;
+}
+
+int32_t CaseMap::toTitle(
+ const char *locale, uint32_t options, BreakIterator *iter,
+ const UChar *src, int32_t srcLength,
+ UChar *dest, int32_t destCapacity, Edits *edits,
+ UErrorCode &errorCode) {
+ LocalPointer<BreakIterator> ownedIter;
+ iter = ustrcase_getTitleBreakIterator(nullptr, locale, options, iter, ownedIter, errorCode);
+ if(iter==NULL) {
+ return 0;
+ }
+ UnicodeString s(srcLength<0, src, srcLength);
+ iter->setText(s);
+ return ustrcase_map(
+ ustrcase_getCaseLocale(locale), options, iter,
+ dest, destCapacity,
+ src, srcLength,
+ ustrcase_internalToTitle, edits, errorCode);
+}
+
+U_NAMESPACE_END
+
+U_NAMESPACE_USE
+
+U_CAPI int32_t U_EXPORT2
+u_strToTitle(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UBreakIterator *titleIter,
+ const char *locale,
+ UErrorCode *pErrorCode) {
+ LocalPointer<BreakIterator> ownedIter;
+ BreakIterator *iter = ustrcase_getTitleBreakIterator(
+ nullptr, locale, 0, reinterpret_cast<BreakIterator *>(titleIter),
+ ownedIter, *pErrorCode);
+ if (iter == nullptr) {
+ return 0;
+ }
+ UnicodeString s(srcLength<0, src, srcLength);
+ iter->setText(s);
+ return ustrcase_mapWithOverlap(
+ ustrcase_getCaseLocale(locale), 0, iter,
+ dest, destCapacity,
+ src, srcLength,
+ ustrcase_internalToTitle, *pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+ucasemap_toTitle(UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if (csm->iter == NULL) {
+ LocalPointer<BreakIterator> ownedIter;
+ BreakIterator *iter = ustrcase_getTitleBreakIterator(
+ nullptr, csm->locale, csm->options, nullptr, ownedIter, *pErrorCode);
+ if (iter == nullptr) {
+ return 0;
+ }
+ csm->iter = ownedIter.orphan();
+ }
+ UnicodeString s(srcLength<0, src, srcLength);
+ csm->iter->setText(s);
+ return ustrcase_map(
+ csm->caseLocale, csm->options, csm->iter,
+ dest, destCapacity,
+ src, srcLength,
+ ustrcase_internalToTitle, NULL, *pErrorCode);
+}
+
+#endif // !UCONFIG_NO_BREAK_ITERATION
diff --git a/thirdparty/icu4c/common/ustr_wcs.cpp b/thirdparty/icu4c/common/ustr_wcs.cpp
new file mode 100644
index 0000000000..e9f278e969
--- /dev/null
+++ b/thirdparty/icu4c/common/ustr_wcs.cpp
@@ -0,0 +1,535 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2001-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: ustr_wcs.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2004sep07
+* created by: Markus W. Scherer
+*
+* u_strToWCS() and u_strFromWCS() functions
+* moved here from ustrtrns.c for better modularization.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/ustring.h"
+#include "cstring.h"
+#include "cwchar.h"
+#include "cmemory.h"
+#include "ustr_imp.h"
+#include "ustr_cnv.h"
+
+#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
+
+#define _STACK_BUFFER_CAPACITY 1000
+#define _BUFFER_CAPACITY_MULTIPLIER 2
+
+#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
+// TODO: We should use CharString for char buffers and UnicodeString for UChar buffers.
+// Then we could change this to work only with wchar_t buffers.
+static inline UBool
+u_growAnyBufferFromStatic(void *context,
+ void **pBuffer, int32_t *pCapacity, int32_t reqCapacity,
+ int32_t length, int32_t size) {
+ // Use char* not void* to avoid the compiler's strict-aliasing assumptions
+ // and related warnings.
+ char *newBuffer=(char *)uprv_malloc(reqCapacity*size);
+ if(newBuffer!=NULL) {
+ if(length>0) {
+ uprv_memcpy(newBuffer, *pBuffer, (size_t)length*size);
+ }
+ *pCapacity=reqCapacity;
+ } else {
+ *pCapacity=0;
+ }
+
+ /* release the old pBuffer if it was not statically allocated */
+ if(*pBuffer!=(char *)context) {
+ uprv_free(*pBuffer);
+ }
+
+ *pBuffer=newBuffer;
+ return (UBool)(newBuffer!=NULL);
+}
+
+/* helper function */
+static wchar_t*
+_strToWCS(wchar_t *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode){
+
+ char stackBuffer [_STACK_BUFFER_CAPACITY];
+ char* tempBuf = stackBuffer;
+ int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY;
+ char* tempBufLimit = stackBuffer + tempBufCapacity;
+ UConverter* conv = NULL;
+ char* saveBuf = tempBuf;
+ wchar_t* intTarget=NULL;
+ int32_t intTargetCapacity=0;
+ int count=0,retVal=0;
+
+ const UChar *pSrcLimit =NULL;
+ const UChar *pSrc = src;
+
+ conv = u_getDefaultConverter(pErrorCode);
+
+ if(U_FAILURE(*pErrorCode)){
+ return NULL;
+ }
+
+ if(srcLength == -1){
+ srcLength = u_strlen(pSrc);
+ }
+
+ pSrcLimit = pSrc + srcLength;
+
+ for(;;) {
+ /* reset the error state */
+ *pErrorCode = U_ZERO_ERROR;
+
+ /* convert to chars using default converter */
+ ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode);
+ count =(tempBuf - saveBuf);
+
+ /* This should rarely occur */
+ if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
+ tempBuf = saveBuf;
+
+ /* we dont have enough room on the stack grow the buffer */
+ int32_t newCapacity = 2 * srcLength;
+ if(newCapacity <= tempBufCapacity) {
+ newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity;
+ }
+ if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
+ newCapacity, count, 1)) {
+ goto cleanup;
+ }
+
+ saveBuf = tempBuf;
+ tempBufLimit = tempBuf + tempBufCapacity;
+ tempBuf = tempBuf + count;
+
+ } else {
+ break;
+ }
+ }
+
+ if(U_FAILURE(*pErrorCode)){
+ goto cleanup;
+ }
+
+ /* done with conversion null terminate the char buffer */
+ if(count>=tempBufCapacity){
+ tempBuf = saveBuf;
+ /* we dont have enough room on the stack grow the buffer */
+ if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
+ count+1, count, 1)) {
+ goto cleanup;
+ }
+ saveBuf = tempBuf;
+ }
+
+ saveBuf[count]=0;
+
+
+ /* allocate more space than required
+ * here we assume that every char requires
+ * no more than 2 wchar_ts
+ */
+ intTargetCapacity = (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */;
+ intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) );
+
+ if(intTarget){
+
+ int32_t nulLen = 0;
+ int32_t remaining = intTargetCapacity;
+ wchar_t* pIntTarget=intTarget;
+ tempBuf = saveBuf;
+
+ /* now convert the mbs to wcs */
+ for(;;){
+
+ /* we can call the system API since we are sure that
+ * there is atleast 1 null in the input
+ */
+ retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining);
+
+ if(retVal==-1){
+ *pErrorCode = U_INVALID_CHAR_FOUND;
+ break;
+ }else if(retVal== remaining){/* should never occur */
+ int numWritten = (pIntTarget-intTarget);
+ u_growAnyBufferFromStatic(NULL,(void**) &intTarget,
+ &intTargetCapacity,
+ intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER,
+ numWritten,
+ sizeof(wchar_t));
+ pIntTarget = intTarget;
+ remaining=intTargetCapacity;
+
+ if(nulLen!=count){ /*there are embedded nulls*/
+ pIntTarget+=numWritten;
+ remaining-=numWritten;
+ }
+
+ }else{
+ int32_t nulVal;
+ /*scan for nulls */
+ /* we donot check for limit since tempBuf is null terminated */
+ while(tempBuf[nulLen++] != 0){
+ }
+ nulVal = (nulLen < srcLength) ? 1 : 0;
+ pIntTarget = pIntTarget + retVal+nulVal;
+ remaining -=(retVal+nulVal);
+
+ /* check if we have reached the source limit*/
+ if(nulLen>=(count)){
+ break;
+ }
+ }
+ }
+ count = (int32_t)(pIntTarget-intTarget);
+
+ if(0 < count && count <= destCapacity){
+ uprv_memcpy(dest, intTarget, (size_t)count*sizeof(wchar_t));
+ }
+
+ if(pDestLength){
+ *pDestLength = count;
+ }
+
+ /* free the allocated memory */
+ uprv_free(intTarget);
+
+ }else{
+ *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
+ }
+cleanup:
+ /* are we still using stack buffer */
+ if(stackBuffer != saveBuf){
+ uprv_free(saveBuf);
+ }
+ u_terminateWChars(dest,destCapacity,count,pErrorCode);
+
+ u_releaseDefaultConverter(conv);
+
+ return dest;
+}
+#endif
+
+U_CAPI wchar_t* U_EXPORT2
+u_strToWCS(wchar_t *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode){
+
+ /* args check */
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
+ return NULL;
+ }
+
+ if( (src==NULL && srcLength!=0) || srcLength < -1 ||
+ (destCapacity<0) || (dest == NULL && destCapacity > 0)
+ ) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+
+#ifdef U_WCHAR_IS_UTF16
+ /* wchar_t is UTF-16 just do a memcpy */
+ if(srcLength == -1){
+ srcLength = u_strlen(src);
+ }
+ if(0 < srcLength && srcLength <= destCapacity){
+ u_memcpy((UChar *)dest, src, srcLength);
+ }
+ if(pDestLength){
+ *pDestLength = srcLength;
+ }
+
+ u_terminateUChars((UChar *)dest,destCapacity,srcLength,pErrorCode);
+
+ return dest;
+
+#elif defined U_WCHAR_IS_UTF32
+
+ return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength,
+ src, srcLength, pErrorCode);
+
+#else
+
+ return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode);
+
+#endif
+
+}
+
+#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
+/* helper function */
+static UChar*
+_strFromWCS( UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const wchar_t *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode)
+{
+ int32_t retVal =0, count =0 ;
+ UConverter* conv = NULL;
+ UChar* pTarget = NULL;
+ UChar* pTargetLimit = NULL;
+ UChar* target = NULL;
+
+ UChar uStack [_STACK_BUFFER_CAPACITY];
+
+ wchar_t wStack[_STACK_BUFFER_CAPACITY];
+ wchar_t* pWStack = wStack;
+
+
+ char cStack[_STACK_BUFFER_CAPACITY];
+ int32_t cStackCap = _STACK_BUFFER_CAPACITY;
+ char* pCSrc=cStack;
+ char* pCSave=pCSrc;
+ char* pCSrcLimit=NULL;
+
+ const wchar_t* pSrc = src;
+ const wchar_t* pSrcLimit = NULL;
+
+ if(srcLength ==-1){
+ /* if the wchar_t source is null terminated we can safely
+ * assume that there are no embedded nulls, this is a fast
+ * path for null terminated strings.
+ */
+ for(;;){
+ /* convert wchars to chars */
+ retVal = uprv_wcstombs(pCSrc,src, cStackCap);
+
+ if(retVal == -1){
+ *pErrorCode = U_ILLEGAL_CHAR_FOUND;
+ goto cleanup;
+ }else if(retVal >= (cStackCap-1)){
+ /* Should rarely occur */
+ u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
+ cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char));
+ pCSave = pCSrc;
+ }else{
+ /* converted every thing */
+ pCSrc = pCSrc+retVal;
+ break;
+ }
+ }
+
+ }else{
+ /* here the source is not null terminated
+ * so it may have nulls embeded and we need to
+ * do some extra processing
+ */
+ int32_t remaining =cStackCap;
+
+ pSrcLimit = src + srcLength;
+
+ for(;;){
+ int32_t nulLen = 0;
+
+ /* find nulls in the string */
+ while(nulLen<srcLength && pSrc[nulLen++]!=0){
+ }
+
+ if((pSrc+nulLen) < pSrcLimit){
+ /* check if we have enough room in pCSrc */
+ if(remaining < (nulLen * MB_CUR_MAX)){
+ /* should rarely occur */
+ int32_t len = (pCSrc-pCSave);
+ pCSrc = pCSave;
+ /* we do not have enough room so grow the buffer*/
+ u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
+ _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
+
+ pCSave = pCSrc;
+ pCSrc = pCSave+len;
+ remaining = cStackCap-(pCSrc - pCSave);
+ }
+
+ /* we have found a null so convert the
+ * chunk from begining of non-null char to null
+ */
+ retVal = uprv_wcstombs(pCSrc,pSrc,remaining);
+
+ if(retVal==-1){
+ /* an error occurred bail out */
+ *pErrorCode = U_ILLEGAL_CHAR_FOUND;
+ goto cleanup;
+ }
+
+ pCSrc += retVal+1 /* already null terminated */;
+
+ pSrc += nulLen; /* skip past the null */
+ srcLength-=nulLen; /* decrement the srcLength */
+ remaining -= (pCSrc-pCSave);
+
+
+ }else{
+ /* the source is not null terminated and we are
+ * end of source so we copy the source to a temp buffer
+ * null terminate it and convert wchar_ts to chars
+ */
+ if(nulLen >= _STACK_BUFFER_CAPACITY){
+ /* Should rarely occcur */
+ /* allocate new buffer buffer */
+ pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1));
+ if(pWStack==NULL){
+ *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
+ goto cleanup;
+ }
+ }
+ if(nulLen>0){
+ /* copy the contents to tempStack */
+ uprv_memcpy(pWStack, pSrc, (size_t)nulLen*sizeof(wchar_t));
+ }
+
+ /* null terminate the tempBuffer */
+ pWStack[nulLen] =0 ;
+
+ if(remaining < (nulLen * MB_CUR_MAX)){
+ /* Should rarely occur */
+ int32_t len = (pCSrc-pCSave);
+ pCSrc = pCSave;
+ /* we do not have enough room so grow the buffer*/
+ u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
+ cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
+
+ pCSave = pCSrc;
+ pCSrc = pCSave+len;
+ remaining = cStackCap-(pCSrc - pCSave);
+ }
+ /* convert to chars */
+ retVal = uprv_wcstombs(pCSrc,pWStack,remaining);
+
+ pCSrc += retVal;
+ pSrc += nulLen;
+ srcLength-=nulLen; /* decrement the srcLength */
+ break;
+ }
+ }
+ }
+
+ /* OK..now we have converted from wchar_ts to chars now
+ * convert chars to UChars
+ */
+ pCSrcLimit = pCSrc;
+ pCSrc = pCSave;
+ pTarget = target= dest;
+ pTargetLimit = dest + destCapacity;
+
+ conv= u_getDefaultConverter(pErrorCode);
+
+ if(U_FAILURE(*pErrorCode)|| conv==NULL){
+ goto cleanup;
+ }
+
+ for(;;) {
+
+ *pErrorCode = U_ZERO_ERROR;
+
+ /* convert to stack buffer*/
+ ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,(UBool)(pCSrc==pCSrcLimit),pErrorCode);
+
+ /* increment count to number written to stack */
+ count+= pTarget - target;
+
+ if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
+ target = uStack;
+ pTarget = uStack;
+ pTargetLimit = uStack + _STACK_BUFFER_CAPACITY;
+ } else {
+ break;
+ }
+
+ }
+
+ if(pDestLength){
+ *pDestLength =count;
+ }
+
+ u_terminateUChars(dest,destCapacity,count,pErrorCode);
+
+cleanup:
+
+ if(cStack != pCSave){
+ uprv_free(pCSave);
+ }
+
+ if(wStack != pWStack){
+ uprv_free(pWStack);
+ }
+
+ u_releaseDefaultConverter(conv);
+
+ return dest;
+}
+#endif
+
+U_CAPI UChar* U_EXPORT2
+u_strFromWCS(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const wchar_t *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode)
+{
+
+ /* args check */
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
+ return NULL;
+ }
+
+ if( (src==NULL && srcLength!=0) || srcLength < -1 ||
+ (destCapacity<0) || (dest == NULL && destCapacity > 0)
+ ) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+
+#ifdef U_WCHAR_IS_UTF16
+ /* wchar_t is UTF-16 just do a memcpy */
+ if(srcLength == -1){
+ srcLength = u_strlen((const UChar *)src);
+ }
+ if(0 < srcLength && srcLength <= destCapacity){
+ u_memcpy(dest, (const UChar *)src, srcLength);
+ }
+ if(pDestLength){
+ *pDestLength = srcLength;
+ }
+
+ u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);
+
+ return dest;
+
+#elif defined U_WCHAR_IS_UTF32
+
+ return u_strFromUTF32(dest, destCapacity, pDestLength,
+ (UChar32*)src, srcLength, pErrorCode);
+
+#else
+
+ return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode);
+
+#endif
+
+}
+
+#endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */
diff --git a/thirdparty/icu4c/common/ustrcase.cpp b/thirdparty/icu4c/common/ustrcase.cpp
new file mode 100644
index 0000000000..618e847c65
--- /dev/null
+++ b/thirdparty/icu4c/common/ustrcase.cpp
@@ -0,0 +1,1818 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2001-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: ustrcase.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002feb20
+* created by: Markus W. Scherer
+*
+* Implementation file for string casing C API functions.
+* Uses functions from uchar.c for basic functionality that requires access
+* to the Unicode Character Database (uprops.dat).
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/brkiter.h"
+#include "unicode/casemap.h"
+#include "unicode/edits.h"
+#include "unicode/stringoptions.h"
+#include "unicode/ustring.h"
+#include "unicode/ucasemap.h"
+#include "unicode/ubrk.h"
+#include "unicode/utf.h"
+#include "unicode/utf16.h"
+#include "cmemory.h"
+#include "ucase.h"
+#include "ucasemap_imp.h"
+#include "ustr_imp.h"
+#include "uassert.h"
+
+U_NAMESPACE_BEGIN
+
+namespace {
+
+int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity,
+ Edits *edits, UErrorCode &errorCode) {
+ if (U_SUCCESS(errorCode)) {
+ if (destIndex > destCapacity) {
+ errorCode = U_BUFFER_OVERFLOW_ERROR;
+ } else if (edits != NULL) {
+ edits->copyErrorTo(errorCode);
+ }
+ }
+ return destIndex;
+}
+
+/* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */
+inline int32_t
+appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
+ int32_t result, const UChar *s,
+ int32_t cpLength, uint32_t options, icu::Edits *edits) {
+ UChar32 c;
+ int32_t length;
+
+ /* decode the result */
+ if(result<0) {
+ /* (not) original code point */
+ if(edits!=NULL) {
+ edits->addUnchanged(cpLength);
+ }
+ if(options & U_OMIT_UNCHANGED_TEXT) {
+ return destIndex;
+ }
+ c=~result;
+ if(destIndex<destCapacity && c<=0xffff) { // BMP slightly-fastpath
+ dest[destIndex++]=(UChar)c;
+ return destIndex;
+ }
+ length=cpLength;
+ } else {
+ if(result<=UCASE_MAX_STRING_LENGTH) {
+ c=U_SENTINEL;
+ length=result;
+ } else if(destIndex<destCapacity && result<=0xffff) { // BMP slightly-fastpath
+ dest[destIndex++]=(UChar)result;
+ if(edits!=NULL) {
+ edits->addReplace(cpLength, 1);
+ }
+ return destIndex;
+ } else {
+ c=result;
+ length=U16_LENGTH(c);
+ }
+ if(edits!=NULL) {
+ edits->addReplace(cpLength, length);
+ }
+ }
+ if(length>(INT32_MAX-destIndex)) {
+ return -1; // integer overflow
+ }
+
+ if(destIndex<destCapacity) {
+ /* append the result */
+ if(c>=0) {
+ /* code point */
+ UBool isError=FALSE;
+ U16_APPEND(dest, destIndex, destCapacity, c, isError);
+ if(isError) {
+ /* overflow, nothing written */
+ destIndex+=length;
+ }
+ } else {
+ /* string */
+ if((destIndex+length)<=destCapacity) {
+ while(length>0) {
+ dest[destIndex++]=*s++;
+ --length;
+ }
+ } else {
+ /* overflow */
+ destIndex+=length;
+ }
+ }
+ } else {
+ /* preflight */
+ destIndex+=length;
+ }
+ return destIndex;
+}
+
+inline int32_t
+appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) {
+ if(destIndex<destCapacity) {
+ dest[destIndex]=c;
+ } else if(destIndex==INT32_MAX) {
+ return -1; // integer overflow
+ }
+ return destIndex+1;
+}
+
+int32_t
+appendNonEmptyUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,
+ const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) {
+ if(edits!=NULL) {
+ edits->addUnchanged(length);
+ }
+ if(options & U_OMIT_UNCHANGED_TEXT) {
+ return destIndex;
+ }
+ if(length>(INT32_MAX-destIndex)) {
+ return -1; // integer overflow
+ }
+ if((destIndex+length)<=destCapacity) {
+ u_memcpy(dest+destIndex, s, length);
+ }
+ return destIndex + length;
+}
+
+inline int32_t
+appendUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,
+ const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) {
+ if (length <= 0) {
+ return destIndex;
+ }
+ return appendNonEmptyUnchanged(dest, destIndex, destCapacity, s, length, options, edits);
+}
+
+UChar32 U_CALLCONV
+utf16_caseContextIterator(void *context, int8_t dir) {
+ UCaseContext *csc=(UCaseContext *)context;
+ UChar32 c;
+
+ if(dir<0) {
+ /* reset for backward iteration */
+ csc->index=csc->cpStart;
+ csc->dir=dir;
+ } else if(dir>0) {
+ /* reset for forward iteration */
+ csc->index=csc->cpLimit;
+ csc->dir=dir;
+ } else {
+ /* continue current iteration direction */
+ dir=csc->dir;
+ }
+
+ if(dir<0) {
+ if(csc->start<csc->index) {
+ U16_PREV((const UChar *)csc->p, csc->start, csc->index, c);
+ return c;
+ }
+ } else {
+ if(csc->index<csc->limit) {
+ U16_NEXT((const UChar *)csc->p, csc->index, csc->limit, c);
+ return c;
+ }
+ }
+ return U_SENTINEL;
+}
+
+/**
+ * caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account.
+ * caseLocale < 0: Case-folds [srcStart..srcLimit[.
+ */
+int32_t toLower(int32_t caseLocale, uint32_t options,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit,
+ icu::Edits *edits, UErrorCode &errorCode) {
+ const int8_t *latinToLower;
+ if (caseLocale == UCASE_LOC_ROOT ||
+ (caseLocale >= 0 ?
+ !(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) :
+ (options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) {
+ latinToLower = LatinCase::TO_LOWER_NORMAL;
+ } else {
+ latinToLower = LatinCase::TO_LOWER_TR_LT;
+ }
+ const UTrie2 *trie = ucase_getTrie();
+ int32_t destIndex = 0;
+ int32_t prev = srcStart;
+ int32_t srcIndex = srcStart;
+ for (;;) {
+ // fast path for simple cases
+ UChar lead = 0;
+ while (srcIndex < srcLimit) {
+ lead = src[srcIndex];
+ int32_t delta;
+ if (lead < LatinCase::LONG_S) {
+ int8_t d = latinToLower[lead];
+ if (d == LatinCase::EXC) { break; }
+ ++srcIndex;
+ if (d == 0) { continue; }
+ delta = d;
+ } else if (lead >= 0xd800) {
+ break; // surrogate or higher
+ } else {
+ uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead);
+ if (UCASE_HAS_EXCEPTION(props)) { break; }
+ ++srcIndex;
+ if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) {
+ continue;
+ }
+ }
+ lead += static_cast<UChar>(delta);
+ destIndex = appendUnchanged(dest, destIndex, destCapacity,
+ src + prev, srcIndex - 1 - prev, options, edits);
+ if (destIndex >= 0) {
+ destIndex = appendUChar(dest, destIndex, destCapacity, lead);
+ if (edits != nullptr) {
+ edits->addReplace(1, 1);
+ }
+ }
+ if (destIndex < 0) {
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ prev = srcIndex;
+ }
+ if (srcIndex >= srcLimit) {
+ break;
+ }
+ // slow path
+ int32_t cpStart = srcIndex++;
+ UChar trail;
+ UChar32 c;
+ if (U16_IS_LEAD(lead) && srcIndex < srcLimit && U16_IS_TRAIL(trail = src[srcIndex])) {
+ c = U16_GET_SUPPLEMENTARY(lead, trail);
+ ++srcIndex;
+ } else {
+ c = lead;
+ }
+ const UChar *s;
+ if (caseLocale >= 0) {
+ csc->cpStart = cpStart;
+ csc->cpLimit = srcIndex;
+ c = ucase_toFullLower(c, utf16_caseContextIterator, csc, &s, caseLocale);
+ } else {
+ c = ucase_toFullFolding(c, &s, options);
+ }
+ if (c >= 0) {
+ destIndex = appendUnchanged(dest, destIndex, destCapacity,
+ src + prev, cpStart - prev, options, edits);
+ if (destIndex >= 0) {
+ destIndex = appendResult(dest, destIndex, destCapacity, c, s,
+ srcIndex - cpStart, options, edits);
+ }
+ if (destIndex < 0) {
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ prev = srcIndex;
+ }
+ }
+ destIndex = appendUnchanged(dest, destIndex, destCapacity,
+ src + prev, srcIndex - prev, options, edits);
+ if (destIndex < 0) {
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ return destIndex;
+}
+
+int32_t toUpper(int32_t caseLocale, uint32_t options,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, UCaseContext *csc, int32_t srcLength,
+ icu::Edits *edits, UErrorCode &errorCode) {
+ const int8_t *latinToUpper;
+ if (caseLocale == UCASE_LOC_TURKISH) {
+ latinToUpper = LatinCase::TO_UPPER_TR;
+ } else {
+ latinToUpper = LatinCase::TO_UPPER_NORMAL;
+ }
+ const UTrie2 *trie = ucase_getTrie();
+ int32_t destIndex = 0;
+ int32_t prev = 0;
+ int32_t srcIndex = 0;
+ for (;;) {
+ // fast path for simple cases
+ UChar lead = 0;
+ while (srcIndex < srcLength) {
+ lead = src[srcIndex];
+ int32_t delta;
+ if (lead < LatinCase::LONG_S) {
+ int8_t d = latinToUpper[lead];
+ if (d == LatinCase::EXC) { break; }
+ ++srcIndex;
+ if (d == 0) { continue; }
+ delta = d;
+ } else if (lead >= 0xd800) {
+ break; // surrogate or higher
+ } else {
+ uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead);
+ if (UCASE_HAS_EXCEPTION(props)) { break; }
+ ++srcIndex;
+ if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) {
+ continue;
+ }
+ }
+ lead += static_cast<UChar>(delta);
+ destIndex = appendUnchanged(dest, destIndex, destCapacity,
+ src + prev, srcIndex - 1 - prev, options, edits);
+ if (destIndex >= 0) {
+ destIndex = appendUChar(dest, destIndex, destCapacity, lead);
+ if (edits != nullptr) {
+ edits->addReplace(1, 1);
+ }
+ }
+ if (destIndex < 0) {
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ prev = srcIndex;
+ }
+ if (srcIndex >= srcLength) {
+ break;
+ }
+ // slow path
+ int32_t cpStart;
+ csc->cpStart = cpStart = srcIndex++;
+ UChar trail;
+ UChar32 c;
+ if (U16_IS_LEAD(lead) && srcIndex < srcLength && U16_IS_TRAIL(trail = src[srcIndex])) {
+ c = U16_GET_SUPPLEMENTARY(lead, trail);
+ ++srcIndex;
+ } else {
+ c = lead;
+ }
+ csc->cpLimit = srcIndex;
+ const UChar *s;
+ c = ucase_toFullUpper(c, utf16_caseContextIterator, csc, &s, caseLocale);
+ if (c >= 0) {
+ destIndex = appendUnchanged(dest, destIndex, destCapacity,
+ src + prev, cpStart - prev, options, edits);
+ if (destIndex >= 0) {
+ destIndex = appendResult(dest, destIndex, destCapacity, c, s,
+ srcIndex - cpStart, options, edits);
+ }
+ if (destIndex < 0) {
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ prev = srcIndex;
+ }
+ }
+ destIndex = appendUnchanged(dest, destIndex, destCapacity,
+ src + prev, srcIndex - prev, options, edits);
+ if (destIndex < 0) {
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ return destIndex;
+}
+
+} // namespace
+
+U_NAMESPACE_END
+
+U_NAMESPACE_USE
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *iter,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ icu::Edits *edits,
+ UErrorCode &errorCode) {
+ if (!ustrcase_checkTitleAdjustmentOptions(options, errorCode)) {
+ return 0;
+ }
+
+ /* set up local variables */
+ UCaseContext csc=UCASECONTEXT_INITIALIZER;
+ csc.p=(void *)src;
+ csc.limit=srcLength;
+ int32_t destIndex=0;
+ int32_t prev=0;
+ UBool isFirstIndex=TRUE;
+
+ /* titlecasing loop */
+ while(prev<srcLength) {
+ /* find next index where to titlecase */
+ int32_t index;
+ if(isFirstIndex) {
+ isFirstIndex=FALSE;
+ index=iter->first();
+ } else {
+ index=iter->next();
+ }
+ if(index==UBRK_DONE || index>srcLength) {
+ index=srcLength;
+ }
+
+ /*
+ * Segment [prev..index[ into 3 parts:
+ * a) skipped characters (copy as-is) [prev..titleStart[
+ * b) first letter (titlecase) [titleStart..titleLimit[
+ * c) subsequent characters (lowercase) [titleLimit..index[
+ */
+ if(prev<index) {
+ // Find and copy skipped characters [prev..titleStart[
+ int32_t titleStart=prev;
+ int32_t titleLimit=prev;
+ UChar32 c;
+ U16_NEXT(src, titleLimit, index, c);
+ if ((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0) {
+ // Adjust the titlecasing index to the next cased character,
+ // or to the next letter/number/symbol/private use.
+ // Stop with titleStart<titleLimit<=index
+ // if there is a character to be titlecased,
+ // or else stop with titleStart==titleLimit==index.
+ UBool toCased = (options&U_TITLECASE_ADJUST_TO_CASED) != 0;
+ while (toCased ? UCASE_NONE==ucase_getType(c) : !ustrcase_isLNS(c)) {
+ titleStart=titleLimit;
+ if(titleLimit==index) {
+ break;
+ }
+ U16_NEXT(src, titleLimit, index, c);
+ }
+ if (prev < titleStart) {
+ destIndex=appendUnchanged(dest, destIndex, destCapacity,
+ src+prev, titleStart-prev, options, edits);
+ if(destIndex<0) {
+ errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ }
+ }
+
+ if(titleStart<titleLimit) {
+ /* titlecase c which is from [titleStart..titleLimit[ */
+ csc.cpStart=titleStart;
+ csc.cpLimit=titleLimit;
+ const UChar *s;
+ c=ucase_toFullTitle(c, utf16_caseContextIterator, &csc, &s, caseLocale);
+ destIndex=appendResult(dest, destIndex, destCapacity, c, s,
+ titleLimit-titleStart, options, edits);
+ if(destIndex<0) {
+ errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ /* Special case Dutch IJ titlecasing */
+ if (titleStart+1 < index &&
+ caseLocale == UCASE_LOC_DUTCH &&
+ (src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) {
+ if (src[titleStart+1] == 0x006A) {
+ destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A);
+ if(destIndex<0) {
+ errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ if(edits!=NULL) {
+ edits->addReplace(1, 1);
+ }
+ titleLimit++;
+ } else if (src[titleStart+1] == 0x004A) {
+ // Keep the capital J from getting lowercased.
+ destIndex=appendUnchanged(dest, destIndex, destCapacity,
+ src+titleStart+1, 1, options, edits);
+ if(destIndex<0) {
+ errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ titleLimit++;
+ }
+ }
+
+ /* lowercase [titleLimit..index[ */
+ if(titleLimit<index) {
+ if((options&U_TITLECASE_NO_LOWERCASE)==0) {
+ /* Normal operation: Lowercase the rest of the word. */
+ destIndex+=
+ toLower(
+ caseLocale, options,
+ dest+destIndex, destCapacity-destIndex,
+ src, &csc, titleLimit, index,
+ edits, errorCode);
+ if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
+ errorCode=U_ZERO_ERROR;
+ }
+ if(U_FAILURE(errorCode)) {
+ return destIndex;
+ }
+ } else {
+ /* Optionally just copy the rest of the word unchanged. */
+ destIndex=appendUnchanged(dest, destIndex, destCapacity,
+ src+titleLimit, index-titleLimit, options, edits);
+ if(destIndex<0) {
+ errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ }
+ }
+ }
+ }
+
+ prev=index;
+ }
+
+ return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
+}
+
+#endif // !UCONFIG_NO_BREAK_ITERATION
+
+U_NAMESPACE_BEGIN
+namespace GreekUpper {
+
+// Data generated by prototype code, see
+// http://site.icu-project.org/design/case/greek-upper
+// TODO: Move this data into ucase.icu.
+static const uint16_t data0370[] = {
+ // U+0370..03FF
+ 0x0370,
+ 0x0370,
+ 0x0372,
+ 0x0372,
+ 0,
+ 0,
+ 0x0376,
+ 0x0376,
+ 0,
+ 0,
+ 0x037A,
+ 0x03FD,
+ 0x03FE,
+ 0x03FF,
+ 0,
+ 0x037F,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0x0391 | HAS_VOWEL | HAS_ACCENT,
+ 0,
+ 0x0395 | HAS_VOWEL | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_ACCENT,
+ 0x0399 | HAS_VOWEL | HAS_ACCENT,
+ 0,
+ 0x039F | HAS_VOWEL | HAS_ACCENT,
+ 0,
+ 0x03A5 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_ACCENT,
+ 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
+ 0x0391 | HAS_VOWEL,
+ 0x0392,
+ 0x0393,
+ 0x0394,
+ 0x0395 | HAS_VOWEL,
+ 0x0396,
+ 0x0397 | HAS_VOWEL,
+ 0x0398,
+ 0x0399 | HAS_VOWEL,
+ 0x039A,
+ 0x039B,
+ 0x039C,
+ 0x039D,
+ 0x039E,
+ 0x039F | HAS_VOWEL,
+ 0x03A0,
+ 0x03A1,
+ 0,
+ 0x03A3,
+ 0x03A4,
+ 0x03A5 | HAS_VOWEL,
+ 0x03A6,
+ 0x03A7,
+ 0x03A8,
+ 0x03A9 | HAS_VOWEL,
+ 0x0399 | HAS_VOWEL | HAS_DIALYTIKA,
+ 0x03A5 | HAS_VOWEL | HAS_DIALYTIKA,
+ 0x0391 | HAS_VOWEL | HAS_ACCENT,
+ 0x0395 | HAS_VOWEL | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_ACCENT,
+ 0x0399 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
+ 0x0391 | HAS_VOWEL,
+ 0x0392,
+ 0x0393,
+ 0x0394,
+ 0x0395 | HAS_VOWEL,
+ 0x0396,
+ 0x0397 | HAS_VOWEL,
+ 0x0398,
+ 0x0399 | HAS_VOWEL,
+ 0x039A,
+ 0x039B,
+ 0x039C,
+ 0x039D,
+ 0x039E,
+ 0x039F | HAS_VOWEL,
+ 0x03A0,
+ 0x03A1,
+ 0x03A3,
+ 0x03A3,
+ 0x03A4,
+ 0x03A5 | HAS_VOWEL,
+ 0x03A6,
+ 0x03A7,
+ 0x03A8,
+ 0x03A9 | HAS_VOWEL,
+ 0x0399 | HAS_VOWEL | HAS_DIALYTIKA,
+ 0x03A5 | HAS_VOWEL | HAS_DIALYTIKA,
+ 0x039F | HAS_VOWEL | HAS_ACCENT,
+ 0x03A5 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_ACCENT,
+ 0x03CF,
+ 0x0392,
+ 0x0398,
+ 0x03D2,
+ 0x03D2 | HAS_ACCENT,
+ 0x03D2 | HAS_DIALYTIKA,
+ 0x03A6,
+ 0x03A0,
+ 0x03CF,
+ 0x03D8,
+ 0x03D8,
+ 0x03DA,
+ 0x03DA,
+ 0x03DC,
+ 0x03DC,
+ 0x03DE,
+ 0x03DE,
+ 0x03E0,
+ 0x03E0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0x039A,
+ 0x03A1,
+ 0x03F9,
+ 0x037F,
+ 0x03F4,
+ 0x0395 | HAS_VOWEL,
+ 0,
+ 0x03F7,
+ 0x03F7,
+ 0x03F9,
+ 0x03FA,
+ 0x03FA,
+ 0x03FC,
+ 0x03FD,
+ 0x03FE,
+ 0x03FF,
+};
+
+static const uint16_t data1F00[] = {
+ // U+1F00..1FFF
+ 0x0391 | HAS_VOWEL,
+ 0x0391 | HAS_VOWEL,
+ 0x0391 | HAS_VOWEL | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL,
+ 0x0391 | HAS_VOWEL,
+ 0x0391 | HAS_VOWEL | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL | HAS_ACCENT,
+ 0x0395 | HAS_VOWEL,
+ 0x0395 | HAS_VOWEL,
+ 0x0395 | HAS_VOWEL | HAS_ACCENT,
+ 0x0395 | HAS_VOWEL | HAS_ACCENT,
+ 0x0395 | HAS_VOWEL | HAS_ACCENT,
+ 0x0395 | HAS_VOWEL | HAS_ACCENT,
+ 0,
+ 0,
+ 0x0395 | HAS_VOWEL,
+ 0x0395 | HAS_VOWEL,
+ 0x0395 | HAS_VOWEL | HAS_ACCENT,
+ 0x0395 | HAS_VOWEL | HAS_ACCENT,
+ 0x0395 | HAS_VOWEL | HAS_ACCENT,
+ 0x0395 | HAS_VOWEL | HAS_ACCENT,
+ 0,
+ 0,
+ 0x0397 | HAS_VOWEL,
+ 0x0397 | HAS_VOWEL,
+ 0x0397 | HAS_VOWEL | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL,
+ 0x0397 | HAS_VOWEL,
+ 0x0397 | HAS_VOWEL | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_ACCENT,
+ 0x0399 | HAS_VOWEL,
+ 0x0399 | HAS_VOWEL,
+ 0x0399 | HAS_VOWEL | HAS_ACCENT,
+ 0x0399 | HAS_VOWEL | HAS_ACCENT,
+ 0x0399 | HAS_VOWEL | HAS_ACCENT,
+ 0x0399 | HAS_VOWEL | HAS_ACCENT,
+ 0x0399 | HAS_VOWEL | HAS_ACCENT,
+ 0x0399 | HAS_VOWEL | HAS_ACCENT,
+ 0x0399 | HAS_VOWEL,
+ 0x0399 | HAS_VOWEL,
+ 0x0399 | HAS_VOWEL | HAS_ACCENT,
+ 0x0399 | HAS_VOWEL | HAS_ACCENT,
+ 0x0399 | HAS_VOWEL | HAS_ACCENT,
+ 0x0399 | HAS_VOWEL | HAS_ACCENT,
+ 0x0399 | HAS_VOWEL | HAS_ACCENT,
+ 0x0399 | HAS_VOWEL | HAS_ACCENT,
+ 0x039F | HAS_VOWEL,
+ 0x039F | HAS_VOWEL,
+ 0x039F | HAS_VOWEL | HAS_ACCENT,
+ 0x039F | HAS_VOWEL | HAS_ACCENT,
+ 0x039F | HAS_VOWEL | HAS_ACCENT,
+ 0x039F | HAS_VOWEL | HAS_ACCENT,
+ 0,
+ 0,
+ 0x039F | HAS_VOWEL,
+ 0x039F | HAS_VOWEL,
+ 0x039F | HAS_VOWEL | HAS_ACCENT,
+ 0x039F | HAS_VOWEL | HAS_ACCENT,
+ 0x039F | HAS_VOWEL | HAS_ACCENT,
+ 0x039F | HAS_VOWEL | HAS_ACCENT,
+ 0,
+ 0,
+ 0x03A5 | HAS_VOWEL,
+ 0x03A5 | HAS_VOWEL,
+ 0x03A5 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A5 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A5 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A5 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A5 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A5 | HAS_VOWEL | HAS_ACCENT,
+ 0,
+ 0x03A5 | HAS_VOWEL,
+ 0,
+ 0x03A5 | HAS_VOWEL | HAS_ACCENT,
+ 0,
+ 0x03A5 | HAS_VOWEL | HAS_ACCENT,
+ 0,
+ 0x03A5 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL,
+ 0x03A9 | HAS_VOWEL,
+ 0x03A9 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL,
+ 0x03A9 | HAS_VOWEL,
+ 0x03A9 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL | HAS_ACCENT,
+ 0x0395 | HAS_VOWEL | HAS_ACCENT,
+ 0x0395 | HAS_VOWEL | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_ACCENT,
+ 0x0399 | HAS_VOWEL | HAS_ACCENT,
+ 0x0399 | HAS_VOWEL | HAS_ACCENT,
+ 0x039F | HAS_VOWEL | HAS_ACCENT,
+ 0x039F | HAS_VOWEL | HAS_ACCENT,
+ 0x03A5 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A5 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_ACCENT,
+ 0,
+ 0,
+ 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
+ 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
+ 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
+ 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
+ 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
+ 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
+ 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
+ 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
+ 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
+ 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
+ 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
+ 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
+ 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL,
+ 0x0391 | HAS_VOWEL,
+ 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
+ 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0,
+ 0x0391 | HAS_VOWEL | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL,
+ 0x0391 | HAS_VOWEL,
+ 0x0391 | HAS_VOWEL | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL | HAS_ACCENT,
+ 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
+ 0,
+ 0x0399 | HAS_VOWEL,
+ 0,
+ 0,
+ 0,
+ 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
+ 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0,
+ 0x0397 | HAS_VOWEL | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x0395 | HAS_VOWEL | HAS_ACCENT,
+ 0x0395 | HAS_VOWEL | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_ACCENT,
+ 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
+ 0,
+ 0,
+ 0,
+ 0x0399 | HAS_VOWEL,
+ 0x0399 | HAS_VOWEL,
+ 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
+ 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
+ 0,
+ 0,
+ 0x0399 | HAS_VOWEL | HAS_ACCENT,
+ 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
+ 0x0399 | HAS_VOWEL,
+ 0x0399 | HAS_VOWEL,
+ 0x0399 | HAS_VOWEL | HAS_ACCENT,
+ 0x0399 | HAS_VOWEL | HAS_ACCENT,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0x03A5 | HAS_VOWEL,
+ 0x03A5 | HAS_VOWEL,
+ 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
+ 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
+ 0x03A1,
+ 0x03A1,
+ 0x03A5 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
+ 0x03A5 | HAS_VOWEL,
+ 0x03A5 | HAS_VOWEL,
+ 0x03A5 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A5 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
+ 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0,
+ 0x03A9 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
+ 0x039F | HAS_VOWEL | HAS_ACCENT,
+ 0x039F | HAS_VOWEL | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_ACCENT,
+ 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
+ 0,
+ 0,
+ 0,
+};
+
+// U+2126 Ohm sign
+static const uint16_t data2126 = 0x03A9 | HAS_VOWEL;
+
+uint32_t getLetterData(UChar32 c) {
+ if (c < 0x370 || 0x2126 < c || (0x3ff < c && c < 0x1f00)) {
+ return 0;
+ } else if (c <= 0x3ff) {
+ return data0370[c - 0x370];
+ } else if (c <= 0x1fff) {
+ return data1F00[c - 0x1f00];
+ } else if (c == 0x2126) {
+ return data2126;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t getDiacriticData(UChar32 c) {
+ switch (c) {
+ case 0x0300: // varia
+ case 0x0301: // tonos = oxia
+ case 0x0342: // perispomeni
+ case 0x0302: // circumflex can look like perispomeni
+ case 0x0303: // tilde can look like perispomeni
+ case 0x0311: // inverted breve can look like perispomeni
+ return HAS_ACCENT;
+ case 0x0308: // dialytika = diaeresis
+ return HAS_COMBINING_DIALYTIKA;
+ case 0x0344: // dialytika tonos
+ return HAS_COMBINING_DIALYTIKA | HAS_ACCENT;
+ case 0x0345: // ypogegrammeni = iota subscript
+ return HAS_YPOGEGRAMMENI;
+ case 0x0304: // macron
+ case 0x0306: // breve
+ case 0x0313: // comma above
+ case 0x0314: // reversed comma above
+ case 0x0343: // koronis
+ return HAS_OTHER_GREEK_DIACRITIC;
+ default:
+ return 0;
+ }
+}
+
+UBool isFollowedByCasedLetter(const UChar *s, int32_t i, int32_t length) {
+ while (i < length) {
+ UChar32 c;
+ U16_NEXT(s, i, length, c);
+ int32_t type = ucase_getTypeOrIgnorable(c);
+ if ((type & UCASE_IGNORABLE) != 0) {
+ // Case-ignorable, continue with the loop.
+ } else if (type != UCASE_NONE) {
+ return TRUE; // Followed by cased letter.
+ } else {
+ return FALSE; // Uncased and not case-ignorable.
+ }
+ }
+ return FALSE; // Not followed by cased letter.
+}
+
+/**
+ * Greek string uppercasing with a state machine.
+ * Probably simpler than a stateless function that has to figure out complex context-before
+ * for each character.
+ * TODO: Try to re-consolidate one way or another with the non-Greek function.
+ */
+int32_t toUpper(uint32_t options,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ Edits *edits,
+ UErrorCode &errorCode) {
+ int32_t destIndex=0;
+ uint32_t state = 0;
+ for (int32_t i = 0; i < srcLength;) {
+ int32_t nextIndex = i;
+ UChar32 c;
+ U16_NEXT(src, nextIndex, srcLength, c);
+ uint32_t nextState = 0;
+ int32_t type = ucase_getTypeOrIgnorable(c);
+ if ((type & UCASE_IGNORABLE) != 0) {
+ // c is case-ignorable
+ nextState |= (state & AFTER_CASED);
+ } else if (type != UCASE_NONE) {
+ // c is cased
+ nextState |= AFTER_CASED;
+ }
+ uint32_t data = getLetterData(c);
+ if (data > 0) {
+ uint32_t upper = data & UPPER_MASK;
+ // Add a dialytika to this iota or ypsilon vowel
+ // if we removed a tonos from the previous vowel,
+ // and that previous vowel did not also have (or gain) a dialytika.
+ // Adding one only to the final vowel in a longer sequence
+ // (which does not occur in normal writing) would require lookahead.
+ // Set the same flag as for preserving an existing dialytika.
+ if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 &&
+ (upper == 0x399 || upper == 0x3A5)) {
+ data |= HAS_DIALYTIKA;
+ }
+ int32_t numYpogegrammeni = 0; // Map each one to a trailing, spacing, capital iota.
+ if ((data & HAS_YPOGEGRAMMENI) != 0) {
+ numYpogegrammeni = 1;
+ }
+ // Skip combining diacritics after this Greek letter.
+ while (nextIndex < srcLength) {
+ uint32_t diacriticData = getDiacriticData(src[nextIndex]);
+ if (diacriticData != 0) {
+ data |= diacriticData;
+ if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) {
+ ++numYpogegrammeni;
+ }
+ ++nextIndex;
+ } else {
+ break; // not a Greek diacritic
+ }
+ }
+ if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) {
+ nextState |= AFTER_VOWEL_WITH_ACCENT;
+ }
+ // Map according to Greek rules.
+ UBool addTonos = FALSE;
+ if (upper == 0x397 &&
+ (data & HAS_ACCENT) != 0 &&
+ numYpogegrammeni == 0 &&
+ (state & AFTER_CASED) == 0 &&
+ !isFollowedByCasedLetter(src, nextIndex, srcLength)) {
+ // Keep disjunctive "or" with (only) a tonos.
+ // We use the same "word boundary" conditions as for the Final_Sigma test.
+ if (i == nextIndex) {
+ upper = 0x389; // Preserve the precomposed form.
+ } else {
+ addTonos = TRUE;
+ }
+ } else if ((data & HAS_DIALYTIKA) != 0) {
+ // Preserve a vowel with dialytika in precomposed form if it exists.
+ if (upper == 0x399) {
+ upper = 0x3AA;
+ data &= ~HAS_EITHER_DIALYTIKA;
+ } else if (upper == 0x3A5) {
+ upper = 0x3AB;
+ data &= ~HAS_EITHER_DIALYTIKA;
+ }
+ }
+
+ UBool change;
+ if (edits == nullptr && (options & U_OMIT_UNCHANGED_TEXT) == 0) {
+ change = TRUE; // common, simple usage
+ } else {
+ // Find out first whether we are changing the text.
+ change = src[i] != upper || numYpogegrammeni > 0;
+ int32_t i2 = i + 1;
+ if ((data & HAS_EITHER_DIALYTIKA) != 0) {
+ change |= i2 >= nextIndex || src[i2] != 0x308;
+ ++i2;
+ }
+ if (addTonos) {
+ change |= i2 >= nextIndex || src[i2] != 0x301;
+ ++i2;
+ }
+ int32_t oldLength = nextIndex - i;
+ int32_t newLength = (i2 - i) + numYpogegrammeni;
+ change |= oldLength != newLength;
+ if (change) {
+ if (edits != NULL) {
+ edits->addReplace(oldLength, newLength);
+ }
+ } else {
+ if (edits != NULL) {
+ edits->addUnchanged(oldLength);
+ }
+ // Write unchanged text?
+ change = (options & U_OMIT_UNCHANGED_TEXT) == 0;
+ }
+ }
+
+ if (change) {
+ destIndex=appendUChar(dest, destIndex, destCapacity, (UChar)upper);
+ if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) {
+ destIndex=appendUChar(dest, destIndex, destCapacity, 0x308); // restore or add a dialytika
+ }
+ if (destIndex >= 0 && addTonos) {
+ destIndex=appendUChar(dest, destIndex, destCapacity, 0x301);
+ }
+ while (destIndex >= 0 && numYpogegrammeni > 0) {
+ destIndex=appendUChar(dest, destIndex, destCapacity, 0x399);
+ --numYpogegrammeni;
+ }
+ if(destIndex<0) {
+ errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ }
+ } else {
+ const UChar *s;
+ c=ucase_toFullUpper(c, NULL, NULL, &s, UCASE_LOC_GREEK);
+ destIndex = appendResult(dest, destIndex, destCapacity, c, s,
+ nextIndex - i, options, edits);
+ if (destIndex < 0) {
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ }
+ i = nextIndex;
+ state = nextState;
+ }
+
+ return destIndex;
+}
+
+} // namespace GreekUpper
+U_NAMESPACE_END
+
+/* functions available in the common library (for unistr_case.cpp) */
+
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ icu::Edits *edits,
+ UErrorCode &errorCode) {
+ UCaseContext csc=UCASECONTEXT_INITIALIZER;
+ csc.p=(void *)src;
+ csc.limit=srcLength;
+ int32_t destIndex = toLower(
+ caseLocale, options,
+ dest, destCapacity,
+ src, &csc, 0, srcLength,
+ edits, errorCode);
+ return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
+}
+
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ icu::Edits *edits,
+ UErrorCode &errorCode) {
+ int32_t destIndex;
+ if (caseLocale == UCASE_LOC_GREEK) {
+ destIndex = GreekUpper::toUpper(options, dest, destCapacity,
+ src, srcLength, edits, errorCode);
+ } else {
+ UCaseContext csc=UCASECONTEXT_INITIALIZER;
+ csc.p=(void *)src;
+ csc.limit=srcLength;
+ destIndex = toUpper(
+ caseLocale, options,
+ dest, destCapacity,
+ src, &csc, srcLength,
+ edits, errorCode);
+ }
+ return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
+}
+
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalFold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ icu::Edits *edits,
+ UErrorCode &errorCode) {
+ int32_t destIndex = toLower(
+ -1, options,
+ dest, destCapacity,
+ src, nullptr, 0, srcLength,
+ edits, errorCode);
+ return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
+}
+
+U_CFUNC int32_t
+ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UStringCaseMapper *stringCaseMapper,
+ icu::Edits *edits,
+ UErrorCode &errorCode) {
+ int32_t destLength;
+
+ /* check argument values */
+ if(U_FAILURE(errorCode)) {
+ return 0;
+ }
+ if( destCapacity<0 ||
+ (dest==NULL && destCapacity>0) ||
+ src==NULL ||
+ srcLength<-1
+ ) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* get the string length */
+ if(srcLength==-1) {
+ srcLength=u_strlen(src);
+ }
+
+ /* check for overlapping source and destination */
+ if( dest!=NULL &&
+ ((src>=dest && src<(dest+destCapacity)) ||
+ (dest>=src && dest<(src+srcLength)))
+ ) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
+ edits->reset();
+ }
+ destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
+ dest, destCapacity, src, srcLength, edits, errorCode);
+ return u_terminateUChars(dest, destCapacity, destLength, &errorCode);
+}
+
+U_CFUNC int32_t
+ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UStringCaseMapper *stringCaseMapper,
+ UErrorCode &errorCode) {
+ UChar buffer[300];
+ UChar *temp;
+
+ int32_t destLength;
+
+ /* check argument values */
+ if(U_FAILURE(errorCode)) {
+ return 0;
+ }
+ if( destCapacity<0 ||
+ (dest==NULL && destCapacity>0) ||
+ src==NULL ||
+ srcLength<-1
+ ) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* get the string length */
+ if(srcLength==-1) {
+ srcLength=u_strlen(src);
+ }
+
+ /* check for overlapping source and destination */
+ if( dest!=NULL &&
+ ((src>=dest && src<(dest+destCapacity)) ||
+ (dest>=src && dest<(src+srcLength)))
+ ) {
+ /* overlap: provide a temporary destination buffer and later copy the result */
+ if(destCapacity<=UPRV_LENGTHOF(buffer)) {
+ /* the stack buffer is large enough */
+ temp=buffer;
+ } else {
+ /* allocate a buffer */
+ temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR);
+ if(temp==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ }
+ } else {
+ temp=dest;
+ }
+
+ destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
+ temp, destCapacity, src, srcLength, NULL, errorCode);
+ if(temp!=dest) {
+ /* copy the result string to the destination buffer */
+ if (U_SUCCESS(errorCode) && 0 < destLength && destLength <= destCapacity) {
+ u_memmove(dest, temp, destLength);
+ }
+ if(temp!=buffer) {
+ uprv_free(temp);
+ }
+ }
+
+ return u_terminateUChars(dest, destCapacity, destLength, &errorCode);
+}
+
+/* public API functions */
+
+U_CAPI int32_t U_EXPORT2
+u_strFoldCase(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ uint32_t options,
+ UErrorCode *pErrorCode) {
+ return ustrcase_mapWithOverlap(
+ UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
+ dest, destCapacity,
+ src, srcLength,
+ ustrcase_internalFold, *pErrorCode);
+}
+
+U_NAMESPACE_BEGIN
+
+int32_t CaseMap::fold(
+ uint32_t options,
+ const UChar *src, int32_t srcLength,
+ UChar *dest, int32_t destCapacity, Edits *edits,
+ UErrorCode &errorCode) {
+ return ustrcase_map(
+ UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
+ dest, destCapacity,
+ src, srcLength,
+ ustrcase_internalFold, edits, errorCode);
+}
+
+U_NAMESPACE_END
+
+/* case-insensitive string comparisons -------------------------------------- */
+
+/*
+ * This function is a copy of unorm_cmpEquivFold() minus the parts for
+ * canonical equivalence.
+ * Keep the functions in sync, and see there for how this works.
+ * The duplication is for modularization:
+ * It makes caseless (but not canonical caseless) matches independent of
+ * the normalization code.
+ */
+
+/* stack element for previous-level source/decomposition pointers */
+struct CmpEquivLevel {
+ const UChar *start, *s, *limit;
+};
+typedef struct CmpEquivLevel CmpEquivLevel;
+
+/**
+ * Internal implementation code comparing string with case fold.
+ * This function is called from u_strcmpFold() and u_caseInsensitivePrefixMatch().
+ *
+ * @param s1 input string 1
+ * @param length1 length of string 1, or -1 (NULL terminated)
+ * @param s2 input string 2
+ * @param length2 length of string 2, or -1 (NULL terminated)
+ * @param options compare options
+ * @param matchLen1 (output) length of partial prefix match in s1
+ * @param matchLen2 (output) length of partial prefix match in s2
+ * @param pErrorCode receives error status
+ * @return The result of comparison
+ */
+static int32_t _cmpFold(
+ const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ uint32_t options,
+ int32_t *matchLen1, int32_t *matchLen2,
+ UErrorCode *pErrorCode) {
+ int32_t cmpRes = 0;
+
+ /* current-level start/limit - s1/s2 as current */
+ const UChar *start1, *start2, *limit1, *limit2;
+
+ /* points to the original start address */
+ const UChar *org1, *org2;
+
+ /* points to the end of match + 1 */
+ const UChar *m1, *m2;
+
+ /* case folding variables */
+ const UChar *p;
+ int32_t length;
+
+ /* stacks of previous-level start/current/limit */
+ CmpEquivLevel stack1[2], stack2[2];
+
+ /* case folding buffers, only use current-level start/limit */
+ UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];
+
+ /* track which is the current level per string */
+ int32_t level1, level2;
+
+ /* current code units, and code points for lookups */
+ UChar32 c1, c2, cp1, cp2;
+
+ /* no argument error checking because this itself is not an API */
+
+ /*
+ * assume that at least the option U_COMPARE_IGNORE_CASE is set
+ * otherwise this function would have to behave exactly as uprv_strCompare()
+ */
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ /* initialize */
+ if(matchLen1) {
+ U_ASSERT(matchLen2 !=NULL);
+ *matchLen1=0;
+ *matchLen2=0;
+ }
+
+ start1=m1=org1=s1;
+ if(length1==-1) {
+ limit1=NULL;
+ } else {
+ limit1=s1+length1;
+ }
+
+ start2=m2=org2=s2;
+ if(length2==-1) {
+ limit2=NULL;
+ } else {
+ limit2=s2+length2;
+ }
+
+ level1=level2=0;
+ c1=c2=-1;
+
+ /* comparison loop */
+ for(;;) {
+ /*
+ * here a code unit value of -1 means "get another code unit"
+ * below it will mean "this source is finished"
+ */
+
+ if(c1<0) {
+ /* get next code unit from string 1, post-increment */
+ for(;;) {
+ if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) {
+ if(level1==0) {
+ c1=-1;
+ break;
+ }
+ } else {
+ ++s1;
+ break;
+ }
+
+ /* reached end of level buffer, pop one level */
+ do {
+ --level1;
+ start1=stack1[level1].start; /*Not uninitialized*/
+ } while(start1==NULL);
+ s1=stack1[level1].s; /*Not uninitialized*/
+ limit1=stack1[level1].limit; /*Not uninitialized*/
+ }
+ }
+
+ if(c2<0) {
+ /* get next code unit from string 2, post-increment */
+ for(;;) {
+ if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) {
+ if(level2==0) {
+ c2=-1;
+ break;
+ }
+ } else {
+ ++s2;
+ break;
+ }
+
+ /* reached end of level buffer, pop one level */
+ do {
+ --level2;
+ start2=stack2[level2].start; /*Not uninitialized*/
+ } while(start2==NULL);
+ s2=stack2[level2].s; /*Not uninitialized*/
+ limit2=stack2[level2].limit; /*Not uninitialized*/
+ }
+ }
+
+ /*
+ * compare c1 and c2
+ * either variable c1, c2 is -1 only if the corresponding string is finished
+ */
+ if(c1==c2) {
+ const UChar *next1, *next2;
+
+ if(c1<0) {
+ cmpRes=0; /* c1==c2==-1 indicating end of strings */
+ break;
+ }
+
+ /*
+ * Note: Move the match positions in both strings at the same time
+ * only when corresponding code point(s) in the original strings
+ * are fully consumed. For example, when comparing s1="Fust" and
+ * s2="Fu\u00dfball", s2[2] is folded into "ss", and s1[2] matches
+ * the first code point in the case-folded data. But the second "s"
+ * has no matching code point in s1, so this implementation returns
+ * 2 as the prefix match length ("Fu").
+ */
+ next1=next2=NULL;
+ if(level1==0) {
+ next1=s1;
+ } else if(s1==limit1) {
+ /* Note: This implementation only use a single level of stack.
+ * If this code needs to be changed to use multiple levels
+ * of stacks, the code above should check if the current
+ * code is at the end of all stacks.
+ */
+ U_ASSERT(level1==1);
+
+ /* is s1 at the end of the current stack? */
+ next1=stack1[0].s;
+ }
+
+ if (next1!=NULL) {
+ if(level2==0) {
+ next2=s2;
+ } else if(s2==limit2) {
+ U_ASSERT(level2==1);
+
+ /* is s2 at the end of the current stack? */
+ next2=stack2[0].s;
+ }
+ if(next2!=NULL) {
+ m1=next1;
+ m2=next2;
+ }
+ }
+ c1=c2=-1; /* make us fetch new code units */
+ continue;
+ } else if(c1<0) {
+ cmpRes=-1; /* string 1 ends before string 2 */
+ break;
+ } else if(c2<0) {
+ cmpRes=1; /* string 2 ends before string 1 */
+ break;
+ }
+ /* c1!=c2 && c1>=0 && c2>=0 */
+
+ /* get complete code points for c1, c2 for lookups if either is a surrogate */
+ cp1=c1;
+ if(U_IS_SURROGATE(c1)) {
+ UChar c;
+
+ if(U_IS_SURROGATE_LEAD(c1)) {
+ if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) {
+ /* advance ++s1; only below if cp1 decomposes/case-folds */
+ cp1=U16_GET_SUPPLEMENTARY(c1, c);
+ }
+ } else /* isTrail(c1) */ {
+ if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) {
+ cp1=U16_GET_SUPPLEMENTARY(c, c1);
+ }
+ }
+ }
+
+ cp2=c2;
+ if(U_IS_SURROGATE(c2)) {
+ UChar c;
+
+ if(U_IS_SURROGATE_LEAD(c2)) {
+ if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) {
+ /* advance ++s2; only below if cp2 decomposes/case-folds */
+ cp2=U16_GET_SUPPLEMENTARY(c2, c);
+ }
+ } else /* isTrail(c2) */ {
+ if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) {
+ cp2=U16_GET_SUPPLEMENTARY(c, c2);
+ }
+ }
+ }
+
+ /*
+ * go down one level for each string
+ * continue with the main loop as soon as there is a real change
+ */
+
+ if( level1==0 &&
+ (length=ucase_toFullFolding((UChar32)cp1, &p, options))>=0
+ ) {
+ /* cp1 case-folds to the code point "length" or to p[length] */
+ if(U_IS_SURROGATE(c1)) {
+ if(U_IS_SURROGATE_LEAD(c1)) {
+ /* advance beyond source surrogate pair if it case-folds */
+ ++s1;
+ } else /* isTrail(c1) */ {
+ /*
+ * we got a supplementary code point when hitting its trail surrogate,
+ * therefore the lead surrogate must have been the same as in the other string;
+ * compare this decomposition with the lead surrogate in the other string
+ * remember that this simulates bulk text replacement:
+ * the decomposition would replace the entire code point
+ */
+ --s2;
+ --m2;
+ c2=*(s2-1);
+ }
+ }
+
+ /* push current level pointers */
+ stack1[0].start=start1;
+ stack1[0].s=s1;
+ stack1[0].limit=limit1;
+ ++level1;
+
+ /* copy the folding result to fold1[] */
+ if(length<=UCASE_MAX_STRING_LENGTH) {
+ u_memcpy(fold1, p, length);
+ } else {
+ int32_t i=0;
+ U16_APPEND_UNSAFE(fold1, i, length);
+ length=i;
+ }
+
+ /* set next level pointers to case folding */
+ start1=s1=fold1;
+ limit1=fold1+length;
+
+ /* get ready to read from decomposition, continue with loop */
+ c1=-1;
+ continue;
+ }
+
+ if( level2==0 &&
+ (length=ucase_toFullFolding((UChar32)cp2, &p, options))>=0
+ ) {
+ /* cp2 case-folds to the code point "length" or to p[length] */
+ if(U_IS_SURROGATE(c2)) {
+ if(U_IS_SURROGATE_LEAD(c2)) {
+ /* advance beyond source surrogate pair if it case-folds */
+ ++s2;
+ } else /* isTrail(c2) */ {
+ /*
+ * we got a supplementary code point when hitting its trail surrogate,
+ * therefore the lead surrogate must have been the same as in the other string;
+ * compare this decomposition with the lead surrogate in the other string
+ * remember that this simulates bulk text replacement:
+ * the decomposition would replace the entire code point
+ */
+ --s1;
+ --m2;
+ c1=*(s1-1);
+ }
+ }
+
+ /* push current level pointers */
+ stack2[0].start=start2;
+ stack2[0].s=s2;
+ stack2[0].limit=limit2;
+ ++level2;
+
+ /* copy the folding result to fold2[] */
+ if(length<=UCASE_MAX_STRING_LENGTH) {
+ u_memcpy(fold2, p, length);
+ } else {
+ int32_t i=0;
+ U16_APPEND_UNSAFE(fold2, i, length);
+ length=i;
+ }
+
+ /* set next level pointers to case folding */
+ start2=s2=fold2;
+ limit2=fold2+length;
+
+ /* get ready to read from decomposition, continue with loop */
+ c2=-1;
+ continue;
+ }
+
+ /*
+ * no decomposition/case folding, max level for both sides:
+ * return difference result
+ *
+ * code point order comparison must not just return cp1-cp2
+ * because when single surrogates are present then the surrogate pairs
+ * that formed cp1 and cp2 may be from different string indexes
+ *
+ * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units
+ * c1=d800 cp1=10001 c2=dc00 cp2=10000
+ * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 }
+ *
+ * therefore, use same fix-up as in ustring.c/uprv_strCompare()
+ * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++
+ * so we have slightly different pointer/start/limit comparisons here
+ */
+
+ if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) {
+ /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
+ if(
+ (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) ||
+ (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2)))
+ ) {
+ /* part of a surrogate pair, leave >=d800 */
+ } else {
+ /* BMP code point - may be surrogate code point - make <d800 */
+ c1-=0x2800;
+ }
+
+ if(
+ (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) ||
+ (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2)))
+ ) {
+ /* part of a surrogate pair, leave >=d800 */
+ } else {
+ /* BMP code point - may be surrogate code point - make <d800 */
+ c2-=0x2800;
+ }
+ }
+
+ cmpRes=c1-c2;
+ break;
+ }
+
+ if(matchLen1) {
+ *matchLen1=static_cast<int32_t>(m1-org1);
+ *matchLen2=static_cast<int32_t>(m2-org2);
+ }
+ return cmpRes;
+}
+
+/* internal function */
+U_CFUNC int32_t
+u_strcmpFold(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ uint32_t options,
+ UErrorCode *pErrorCode) {
+ return _cmpFold(s1, length1, s2, length2, options, NULL, NULL, pErrorCode);
+}
+
+/* public API functions */
+
+U_CAPI int32_t U_EXPORT2
+u_strCaseCompare(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ uint32_t options,
+ UErrorCode *pErrorCode) {
+ /* argument checking */
+ if(pErrorCode==0 || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(s1==NULL || length1<-1 || s2==NULL || length2<-1) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ return u_strcmpFold(s1, length1, s2, length2,
+ options|U_COMPARE_IGNORE_CASE,
+ pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options) {
+ UErrorCode errorCode=U_ZERO_ERROR;
+ return u_strcmpFold(s1, -1, s2, -1,
+ options|U_COMPARE_IGNORE_CASE,
+ &errorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options) {
+ UErrorCode errorCode=U_ZERO_ERROR;
+ return u_strcmpFold(s1, length, s2, length,
+ options|U_COMPARE_IGNORE_CASE,
+ &errorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) {
+ UErrorCode errorCode=U_ZERO_ERROR;
+ return u_strcmpFold(s1, n, s2, n,
+ options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE),
+ &errorCode);
+}
+
+/* internal API - detect length of shared prefix */
+U_CAPI void
+u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ uint32_t options,
+ int32_t *matchLen1, int32_t *matchLen2,
+ UErrorCode *pErrorCode) {
+ _cmpFold(s1, length1, s2, length2, options,
+ matchLen1, matchLen2, pErrorCode);
+}
diff --git a/thirdparty/icu4c/common/ustrcase_locale.cpp b/thirdparty/icu4c/common/ustrcase_locale.cpp
new file mode 100644
index 0000000000..2ecd24f03e
--- /dev/null
+++ b/thirdparty/icu4c/common/ustrcase_locale.cpp
@@ -0,0 +1,94 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: ustrcase_locale.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2011may31
+* created by: Markus W. Scherer
+*
+* Locale-sensitive case mapping functions (ones that call uloc_getDefault())
+* were moved here to break dependency cycles among parts of the common library.
+*/
+
+#include "unicode/utypes.h"
+#include "uassert.h"
+#include "unicode/brkiter.h"
+#include "unicode/casemap.h"
+#include "unicode/ucasemap.h"
+#include "unicode/uloc.h"
+#include "unicode/ustring.h"
+#include "ucase.h"
+#include "ucasemap_imp.h"
+
+U_CFUNC int32_t
+ustrcase_getCaseLocale(const char *locale) {
+ if (locale == NULL) {
+ locale = uloc_getDefault();
+ }
+ if (*locale == 0) {
+ return UCASE_LOC_ROOT;
+ } else {
+ return ucase_getCaseLocale(locale);
+ }
+}
+
+/* public API functions */
+
+U_CAPI int32_t U_EXPORT2
+u_strToLower(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ const char *locale,
+ UErrorCode *pErrorCode) {
+ return ustrcase_mapWithOverlap(
+ ustrcase_getCaseLocale(locale), 0, UCASEMAP_BREAK_ITERATOR_NULL
+ dest, destCapacity,
+ src, srcLength,
+ ustrcase_internalToLower, *pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+u_strToUpper(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ const char *locale,
+ UErrorCode *pErrorCode) {
+ return ustrcase_mapWithOverlap(
+ ustrcase_getCaseLocale(locale), 0, UCASEMAP_BREAK_ITERATOR_NULL
+ dest, destCapacity,
+ src, srcLength,
+ ustrcase_internalToUpper, *pErrorCode);
+}
+
+U_NAMESPACE_BEGIN
+
+int32_t CaseMap::toLower(
+ const char *locale, uint32_t options,
+ const UChar *src, int32_t srcLength,
+ UChar *dest, int32_t destCapacity, Edits *edits,
+ UErrorCode &errorCode) {
+ return ustrcase_map(
+ ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
+ dest, destCapacity,
+ src, srcLength,
+ ustrcase_internalToLower, edits, errorCode);
+}
+
+int32_t CaseMap::toUpper(
+ const char *locale, uint32_t options,
+ const UChar *src, int32_t srcLength,
+ UChar *dest, int32_t destCapacity, Edits *edits,
+ UErrorCode &errorCode) {
+ return ustrcase_map(
+ ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
+ dest, destCapacity,
+ src, srcLength,
+ ustrcase_internalToUpper, edits, errorCode);
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/ustrenum.cpp b/thirdparty/icu4c/common/ustrenum.cpp
new file mode 100644
index 0000000000..ed23eaa232
--- /dev/null
+++ b/thirdparty/icu4c/common/ustrenum.cpp
@@ -0,0 +1,398 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2002-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Author: Alan Liu
+* Created: November 11 2002
+* Since: ICU 2.4
+**********************************************************************
+*/
+#include "utypeinfo.h" // for 'typeid' to work
+
+#include "unicode/ustring.h"
+#include "unicode/strenum.h"
+#include "unicode/putil.h"
+#include "uenumimp.h"
+#include "ustrenum.h"
+#include "cstring.h"
+#include "cmemory.h"
+#include "uassert.h"
+
+U_NAMESPACE_BEGIN
+// StringEnumeration implementation ---------------------------------------- ***
+
+StringEnumeration::StringEnumeration()
+ : chars(charsBuffer), charsCapacity(sizeof(charsBuffer)) {
+}
+
+StringEnumeration::~StringEnumeration() {
+ if (chars != NULL && chars != charsBuffer) {
+ uprv_free(chars);
+ }
+}
+
+// StringEnumeration base class clone() default implementation, does not clone
+StringEnumeration *
+StringEnumeration::clone() const {
+ return NULL;
+}
+
+const char *
+StringEnumeration::next(int32_t *resultLength, UErrorCode &status) {
+ const UnicodeString *s=snext(status);
+ if(U_SUCCESS(status) && s!=NULL) {
+ unistr=*s;
+ ensureCharsCapacity(unistr.length()+1, status);
+ if(U_SUCCESS(status)) {
+ if(resultLength!=NULL) {
+ *resultLength=unistr.length();
+ }
+ unistr.extract(0, INT32_MAX, chars, charsCapacity, US_INV);
+ return chars;
+ }
+ }
+
+ return NULL;
+}
+
+const UChar *
+StringEnumeration::unext(int32_t *resultLength, UErrorCode &status) {
+ const UnicodeString *s=snext(status);
+ if(U_SUCCESS(status) && s!=NULL) {
+ unistr=*s;
+ if(resultLength!=NULL) {
+ *resultLength=unistr.length();
+ }
+ return unistr.getTerminatedBuffer();
+ }
+
+ return NULL;
+}
+
+const UnicodeString *
+StringEnumeration::snext(UErrorCode &status) {
+ int32_t length;
+ const char *s=next(&length, status);
+ return setChars(s, length, status);
+}
+
+void
+StringEnumeration::ensureCharsCapacity(int32_t capacity, UErrorCode &status) {
+ if(U_SUCCESS(status) && capacity>charsCapacity) {
+ if(capacity<(charsCapacity+charsCapacity/2)) {
+ // avoid allocation thrashing
+ capacity=charsCapacity+charsCapacity/2;
+ }
+ if(chars!=charsBuffer) {
+ uprv_free(chars);
+ }
+ chars=(char *)uprv_malloc(capacity);
+ if(chars==NULL) {
+ chars=charsBuffer;
+ charsCapacity=sizeof(charsBuffer);
+ status=U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ charsCapacity=capacity;
+ }
+ }
+}
+
+UnicodeString *
+StringEnumeration::setChars(const char *s, int32_t length, UErrorCode &status) {
+ if(U_SUCCESS(status) && s!=NULL) {
+ if(length<0) {
+ length=(int32_t)uprv_strlen(s);
+ }
+
+ UChar *buffer=unistr.getBuffer(length+1);
+ if(buffer!=NULL) {
+ u_charsToUChars(s, buffer, length);
+ buffer[length]=0;
+ unistr.releaseBuffer(length);
+ return &unistr;
+ } else {
+ status=U_MEMORY_ALLOCATION_ERROR;
+ }
+ }
+
+ return NULL;
+}
+UBool
+StringEnumeration::operator==(const StringEnumeration& that)const {
+ return typeid(*this) == typeid(that);
+}
+
+UBool
+StringEnumeration::operator!=(const StringEnumeration& that)const {
+ return !operator==(that);
+}
+
+// UStringEnumeration implementation --------------------------------------- ***
+
+UStringEnumeration * U_EXPORT2
+UStringEnumeration::fromUEnumeration(
+ UEnumeration *uenumToAdopt, UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ uenum_close(uenumToAdopt);
+ return NULL;
+ }
+ UStringEnumeration *result = new UStringEnumeration(uenumToAdopt);
+ if (result == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ uenum_close(uenumToAdopt);
+ return NULL;
+ }
+ return result;
+}
+
+UStringEnumeration::UStringEnumeration(UEnumeration* _uenum) :
+ uenum(_uenum) {
+ U_ASSERT(_uenum != 0);
+}
+
+UStringEnumeration::~UStringEnumeration() {
+ uenum_close(uenum);
+}
+
+int32_t UStringEnumeration::count(UErrorCode& status) const {
+ return uenum_count(uenum, &status);
+}
+
+const char *UStringEnumeration::next(int32_t *resultLength, UErrorCode &status) {
+ return uenum_next(uenum, resultLength, &status);
+}
+
+const UnicodeString* UStringEnumeration::snext(UErrorCode& status) {
+ int32_t length;
+ const UChar* str = uenum_unext(uenum, &length, &status);
+ if (str == 0 || U_FAILURE(status)) {
+ return 0;
+ }
+ return &unistr.setTo(str, length);
+}
+
+void UStringEnumeration::reset(UErrorCode& status) {
+ uenum_reset(uenum, &status);
+}
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UStringEnumeration)
+U_NAMESPACE_END
+
+// C wrapper --------------------------------------------------------------- ***
+
+#define THIS(en) ((icu::StringEnumeration*)(en->context))
+
+U_CDECL_BEGIN
+
+/**
+ * Wrapper API to make StringEnumeration look like UEnumeration.
+ */
+static void U_CALLCONV
+ustrenum_close(UEnumeration* en) {
+ delete THIS(en);
+ uprv_free(en);
+}
+
+/**
+ * Wrapper API to make StringEnumeration look like UEnumeration.
+ */
+static int32_t U_CALLCONV
+ustrenum_count(UEnumeration* en,
+ UErrorCode* ec)
+{
+ return THIS(en)->count(*ec);
+}
+
+/**
+ * Wrapper API to make StringEnumeration look like UEnumeration.
+ */
+static const UChar* U_CALLCONV
+ustrenum_unext(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* ec)
+{
+ return THIS(en)->unext(resultLength, *ec);
+}
+
+/**
+ * Wrapper API to make StringEnumeration look like UEnumeration.
+ */
+static const char* U_CALLCONV
+ustrenum_next(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* ec)
+{
+ return THIS(en)->next(resultLength, *ec);
+}
+
+/**
+ * Wrapper API to make StringEnumeration look like UEnumeration.
+ */
+static void U_CALLCONV
+ustrenum_reset(UEnumeration* en,
+ UErrorCode* ec)
+{
+ THIS(en)->reset(*ec);
+}
+
+/**
+ * Pseudo-vtable for UEnumeration wrapper around StringEnumeration.
+ * The StringEnumeration pointer will be stored in 'context'.
+ */
+static const UEnumeration USTRENUM_VT = {
+ NULL,
+ NULL, // store StringEnumeration pointer here
+ ustrenum_close,
+ ustrenum_count,
+ ustrenum_unext,
+ ustrenum_next,
+ ustrenum_reset
+};
+
+U_CDECL_END
+
+/**
+ * Given a StringEnumeration, wrap it in a UEnumeration. The
+ * StringEnumeration is adopted; after this call, the caller must not
+ * delete it (regardless of error status).
+ */
+U_CAPI UEnumeration* U_EXPORT2
+uenum_openFromStringEnumeration(icu::StringEnumeration* adopted, UErrorCode* ec) {
+ UEnumeration* result = NULL;
+ if (U_SUCCESS(*ec) && adopted != NULL) {
+ result = (UEnumeration*) uprv_malloc(sizeof(UEnumeration));
+ if (result == NULL) {
+ *ec = U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ uprv_memcpy(result, &USTRENUM_VT, sizeof(USTRENUM_VT));
+ result->context = adopted;
+ }
+ }
+ if (result == NULL) {
+ delete adopted;
+ }
+ return result;
+}
+
+// C wrapper --------------------------------------------------------------- ***
+
+U_CDECL_BEGIN
+
+typedef struct UCharStringEnumeration {
+ UEnumeration uenum;
+ int32_t index, count;
+} UCharStringEnumeration;
+
+static void U_CALLCONV
+ucharstrenum_close(UEnumeration* en) {
+ uprv_free(en);
+}
+
+static int32_t U_CALLCONV
+ucharstrenum_count(UEnumeration* en,
+ UErrorCode* /*ec*/) {
+ return ((UCharStringEnumeration*)en)->count;
+}
+
+static const UChar* U_CALLCONV
+ucharstrenum_unext(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* /*ec*/) {
+ UCharStringEnumeration *e = (UCharStringEnumeration*) en;
+ if (e->index >= e->count) {
+ return NULL;
+ }
+ const UChar* result = ((const UChar**)e->uenum.context)[e->index++];
+ if (resultLength) {
+ *resultLength = (int32_t)u_strlen(result);
+ }
+ return result;
+}
+
+
+static const char* U_CALLCONV
+ucharstrenum_next(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* /*ec*/) {
+ UCharStringEnumeration *e = (UCharStringEnumeration*) en;
+ if (e->index >= e->count) {
+ return NULL;
+ }
+ const char* result = ((const char**)e->uenum.context)[e->index++];
+ if (resultLength) {
+ *resultLength = (int32_t)uprv_strlen(result);
+ }
+ return result;
+}
+
+static void U_CALLCONV
+ucharstrenum_reset(UEnumeration* en,
+ UErrorCode* /*ec*/) {
+ ((UCharStringEnumeration*)en)->index = 0;
+}
+
+static const UEnumeration UCHARSTRENUM_VT = {
+ NULL,
+ NULL, // store StringEnumeration pointer here
+ ucharstrenum_close,
+ ucharstrenum_count,
+ uenum_unextDefault,
+ ucharstrenum_next,
+ ucharstrenum_reset
+};
+
+static const UEnumeration UCHARSTRENUM_U_VT = {
+ NULL,
+ NULL, // store StringEnumeration pointer here
+ ucharstrenum_close,
+ ucharstrenum_count,
+ ucharstrenum_unext,
+ uenum_nextDefault,
+ ucharstrenum_reset
+};
+
+U_CDECL_END
+
+U_CAPI UEnumeration* U_EXPORT2
+uenum_openCharStringsEnumeration(const char* const strings[], int32_t count,
+ UErrorCode* ec) {
+ UCharStringEnumeration* result = NULL;
+ if (U_SUCCESS(*ec) && count >= 0 && (count == 0 || strings != 0)) {
+ result = (UCharStringEnumeration*) uprv_malloc(sizeof(UCharStringEnumeration));
+ if (result == NULL) {
+ *ec = U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ U_ASSERT((char*)result==(char*)(&result->uenum));
+ uprv_memcpy(result, &UCHARSTRENUM_VT, sizeof(UCHARSTRENUM_VT));
+ result->uenum.context = (void*)strings;
+ result->index = 0;
+ result->count = count;
+ }
+ }
+ return (UEnumeration*) result;
+}
+
+U_CAPI UEnumeration* U_EXPORT2
+uenum_openUCharStringsEnumeration(const UChar* const strings[], int32_t count,
+ UErrorCode* ec) {
+ UCharStringEnumeration* result = NULL;
+ if (U_SUCCESS(*ec) && count >= 0 && (count == 0 || strings != 0)) {
+ result = (UCharStringEnumeration*) uprv_malloc(sizeof(UCharStringEnumeration));
+ if (result == NULL) {
+ *ec = U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ U_ASSERT((char*)result==(char*)(&result->uenum));
+ uprv_memcpy(result, &UCHARSTRENUM_U_VT, sizeof(UCHARSTRENUM_U_VT));
+ result->uenum.context = (void*)strings;
+ result->index = 0;
+ result->count = count;
+ }
+ }
+ return (UEnumeration*) result;
+}
+
+
+// end C Wrapper
diff --git a/thirdparty/icu4c/common/ustrenum.h b/thirdparty/icu4c/common/ustrenum.h
new file mode 100644
index 0000000000..a82162e2bd
--- /dev/null
+++ b/thirdparty/icu4c/common/ustrenum.h
@@ -0,0 +1,87 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2002-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Author: Alan Liu
+* Created: November 11 2002
+* Since: ICU 2.4
+**********************************************************************
+*/
+#ifndef _USTRENUM_H_
+#define _USTRENUM_H_
+
+#include "unicode/uenum.h"
+#include "unicode/strenum.h"
+
+//----------------------------------------------------------------------
+U_NAMESPACE_BEGIN
+
+/**
+ * A wrapper to make a UEnumeration into a StringEnumeration. The
+ * wrapper adopts the UEnumeration is wraps.
+ */
+class U_COMMON_API UStringEnumeration : public StringEnumeration {
+
+public:
+ /**
+ * Constructor. This constructor adopts its UEnumeration
+ * argument.
+ * @param uenum a UEnumeration object. This object takes
+ * ownership of 'uenum' and will close it in its destructor. The
+ * caller must not call uenum_close on 'uenum' after calling this
+ * constructor.
+ */
+ UStringEnumeration(UEnumeration* uenum);
+
+ /**
+ * Destructor. This closes the UEnumeration passed in to the
+ * constructor.
+ */
+ virtual ~UStringEnumeration();
+
+ /**
+ * Return the number of elements that the iterator traverses.
+ * @param status the error code.
+ * @return number of elements in the iterator.
+ */
+ virtual int32_t count(UErrorCode& status) const;
+
+ virtual const char* next(int32_t *resultLength, UErrorCode& status);
+
+ /**
+ * Returns the next element a UnicodeString*. If there are no
+ * more elements, returns NULL.
+ * @param status the error code.
+ * @return a pointer to the string, or NULL.
+ */
+ virtual const UnicodeString* snext(UErrorCode& status);
+
+ /**
+ * Resets the iterator.
+ * @param status the error code.
+ */
+ virtual void reset(UErrorCode& status);
+
+ /**
+ * ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
+ */
+ virtual UClassID getDynamicClassID() const;
+
+ /**
+ * ICU4C "poor man's RTTI", returns a UClassID for this ICU class.
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ static UStringEnumeration * U_EXPORT2 fromUEnumeration(
+ UEnumeration *enumToAdopt, UErrorCode &status);
+private:
+ UEnumeration *uenum; // owned
+};
+
+U_NAMESPACE_END
+
+#endif
+
diff --git a/thirdparty/icu4c/common/ustrfmt.cpp b/thirdparty/icu4c/common/ustrfmt.cpp
new file mode 100644
index 0000000000..1a9b15a59f
--- /dev/null
+++ b/thirdparty/icu4c/common/ustrfmt.cpp
@@ -0,0 +1,59 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2001-2006, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+
+#include "cstring.h"
+#include "ustrfmt.h"
+
+
+/***
+ * Fills in a UChar* string with the radix-based representation of a
+ * uint32_t number padded with zeroes to minwidth. The result
+ * will be null terminated if there is room.
+ *
+ * @param buffer UChar buffer to receive result
+ * @param capacity capacity of buffer
+ * @param i the unsigned number to be formatted
+ * @param radix the radix from 2..36
+ * @param minwidth the minimum width. If the result is narrower than
+ * this, '0's will be added on the left. Must be <=
+ * capacity.
+ * @return the length of the result, not including any terminating
+ * null
+ */
+U_CAPI int32_t U_EXPORT2
+uprv_itou (UChar * buffer, int32_t capacity,
+ uint32_t i, uint32_t radix, int32_t minwidth)
+{
+ int32_t length = 0;
+ int digit;
+ int32_t j;
+ UChar temp;
+
+ do{
+ digit = (int)(i % radix);
+ buffer[length++]=(UChar)(digit<=9?(0x0030+digit):(0x0030+digit+7));
+ i=i/radix;
+ } while(i && length<capacity);
+
+ while (length < minwidth){
+ buffer[length++] = (UChar) 0x0030;/*zero padding */
+ }
+ /* null terminate the buffer */
+ if(length<capacity){
+ buffer[length] = (UChar) 0x0000;
+ }
+
+ /* Reverses the string */
+ for (j = 0; j < (length / 2); j++){
+ temp = buffer[(length-1) - j];
+ buffer[(length-1) - j] = buffer[j];
+ buffer[j] = temp;
+ }
+ return length;
+}
diff --git a/thirdparty/icu4c/common/ustrfmt.h b/thirdparty/icu4c/common/ustrfmt.h
new file mode 100644
index 0000000000..53eb0557e4
--- /dev/null
+++ b/thirdparty/icu4c/common/ustrfmt.h
@@ -0,0 +1,19 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2001-2006, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+
+#ifndef USTRFMT_H
+#define USTRFMT_H
+
+#include "unicode/utypes.h"
+
+U_CAPI int32_t U_EXPORT2
+uprv_itou (UChar * buffer, int32_t capacity, uint32_t i, uint32_t radix, int32_t minwidth);
+
+
+#endif
diff --git a/thirdparty/icu4c/common/ustring.cpp b/thirdparty/icu4c/common/ustring.cpp
new file mode 100644
index 0000000000..bba2d45c4e
--- /dev/null
+++ b/thirdparty/icu4c/common/ustring.cpp
@@ -0,0 +1,1537 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1998-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* File ustring.cpp
+*
+* Modification History:
+*
+* Date Name Description
+* 12/07/98 bertrand Creation.
+******************************************************************************
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "unicode/uchar.h"
+#include "unicode/ustring.h"
+#include "unicode/utf16.h"
+#include "cstring.h"
+#include "cwchar.h"
+#include "cmemory.h"
+#include "ustr_imp.h"
+
+/* ANSI string.h - style functions ------------------------------------------ */
+
+/* U+ffff is the highest BMP code point, the highest one that fits into a 16-bit UChar */
+#define U_BMP_MAX 0xffff
+
+/* Forward binary string search functions ----------------------------------- */
+
+/*
+ * Test if a substring match inside a string is at code point boundaries.
+ * All pointers refer to the same buffer.
+ * The limit pointer may be NULL, all others must be real pointers.
+ */
+static inline UBool
+isMatchAtCPBoundary(const UChar *start, const UChar *match, const UChar *matchLimit, const UChar *limit) {
+ if(U16_IS_TRAIL(*match) && start!=match && U16_IS_LEAD(*(match-1))) {
+ /* the leading edge of the match is in the middle of a surrogate pair */
+ return FALSE;
+ }
+ if(U16_IS_LEAD(*(matchLimit-1)) && matchLimit!=limit && U16_IS_TRAIL(*matchLimit)) {
+ /* the trailing edge of the match is in the middle of a surrogate pair */
+ return FALSE;
+ }
+ return TRUE;
+}
+
+U_CAPI UChar * U_EXPORT2
+u_strFindFirst(const UChar *s, int32_t length,
+ const UChar *sub, int32_t subLength) {
+ const UChar *start, *p, *q, *subLimit;
+ UChar c, cs, cq;
+
+ if(sub==NULL || subLength<-1) {
+ return (UChar *)s;
+ }
+ if(s==NULL || length<-1) {
+ return NULL;
+ }
+
+ start=s;
+
+ if(length<0 && subLength<0) {
+ /* both strings are NUL-terminated */
+ if((cs=*sub++)==0) {
+ return (UChar *)s;
+ }
+ if(*sub==0 && !U16_IS_SURROGATE(cs)) {
+ /* the substring consists of a single, non-surrogate BMP code point */
+ return u_strchr(s, cs);
+ }
+
+ while((c=*s++)!=0) {
+ if(c==cs) {
+ /* found first substring UChar, compare rest */
+ p=s;
+ q=sub;
+ for(;;) {
+ if((cq=*q)==0) {
+ if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
+ return (UChar *)(s-1); /* well-formed match */
+ } else {
+ break; /* no match because surrogate pair is split */
+ }
+ }
+ if((c=*p)==0) {
+ return NULL; /* no match, and none possible after s */
+ }
+ if(c!=cq) {
+ break; /* no match */
+ }
+ ++p;
+ ++q;
+ }
+ }
+ }
+
+ /* not found */
+ return NULL;
+ }
+
+ if(subLength<0) {
+ subLength=u_strlen(sub);
+ }
+ if(subLength==0) {
+ return (UChar *)s;
+ }
+
+ /* get sub[0] to search for it fast */
+ cs=*sub++;
+ --subLength;
+ subLimit=sub+subLength;
+
+ if(subLength==0 && !U16_IS_SURROGATE(cs)) {
+ /* the substring consists of a single, non-surrogate BMP code point */
+ return length<0 ? u_strchr(s, cs) : u_memchr(s, cs, length);
+ }
+
+ if(length<0) {
+ /* s is NUL-terminated */
+ while((c=*s++)!=0) {
+ if(c==cs) {
+ /* found first substring UChar, compare rest */
+ p=s;
+ q=sub;
+ for(;;) {
+ if(q==subLimit) {
+ if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
+ return (UChar *)(s-1); /* well-formed match */
+ } else {
+ break; /* no match because surrogate pair is split */
+ }
+ }
+ if((c=*p)==0) {
+ return NULL; /* no match, and none possible after s */
+ }
+ if(c!=*q) {
+ break; /* no match */
+ }
+ ++p;
+ ++q;
+ }
+ }
+ }
+ } else {
+ const UChar *limit, *preLimit;
+
+ /* subLength was decremented above */
+ if(length<=subLength) {
+ return NULL; /* s is shorter than sub */
+ }
+
+ limit=s+length;
+
+ /* the substring must start before preLimit */
+ preLimit=limit-subLength;
+
+ while(s!=preLimit) {
+ c=*s++;
+ if(c==cs) {
+ /* found first substring UChar, compare rest */
+ p=s;
+ q=sub;
+ for(;;) {
+ if(q==subLimit) {
+ if(isMatchAtCPBoundary(start, s-1, p, limit)) {
+ return (UChar *)(s-1); /* well-formed match */
+ } else {
+ break; /* no match because surrogate pair is split */
+ }
+ }
+ if(*p!=*q) {
+ break; /* no match */
+ }
+ ++p;
+ ++q;
+ }
+ }
+ }
+ }
+
+ /* not found */
+ return NULL;
+}
+
+U_CAPI UChar * U_EXPORT2
+u_strstr(const UChar *s, const UChar *substring) {
+ return u_strFindFirst(s, -1, substring, -1);
+}
+
+U_CAPI UChar * U_EXPORT2
+u_strchr(const UChar *s, UChar c) {
+ if(U16_IS_SURROGATE(c)) {
+ /* make sure to not find half of a surrogate pair */
+ return u_strFindFirst(s, -1, &c, 1);
+ } else {
+ UChar cs;
+
+ /* trivial search for a BMP code point */
+ for(;;) {
+ if((cs=*s)==c) {
+ return (UChar *)s;
+ }
+ if(cs==0) {
+ return NULL;
+ }
+ ++s;
+ }
+ }
+}
+
+U_CAPI UChar * U_EXPORT2
+u_strchr32(const UChar *s, UChar32 c) {
+ if((uint32_t)c<=U_BMP_MAX) {
+ /* find BMP code point */
+ return u_strchr(s, (UChar)c);
+ } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
+ /* find supplementary code point as surrogate pair */
+ UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
+
+ while((cs=*s++)!=0) {
+ if(cs==lead && *s==trail) {
+ return (UChar *)(s-1);
+ }
+ }
+ return NULL;
+ } else {
+ /* not a Unicode code point, not findable */
+ return NULL;
+ }
+}
+
+U_CAPI UChar * U_EXPORT2
+u_memchr(const UChar *s, UChar c, int32_t count) {
+ if(count<=0) {
+ return NULL; /* no string */
+ } else if(U16_IS_SURROGATE(c)) {
+ /* make sure to not find half of a surrogate pair */
+ return u_strFindFirst(s, count, &c, 1);
+ } else {
+ /* trivial search for a BMP code point */
+ const UChar *limit=s+count;
+ do {
+ if(*s==c) {
+ return (UChar *)s;
+ }
+ } while(++s!=limit);
+ return NULL;
+ }
+}
+
+U_CAPI UChar * U_EXPORT2
+u_memchr32(const UChar *s, UChar32 c, int32_t count) {
+ if((uint32_t)c<=U_BMP_MAX) {
+ /* find BMP code point */
+ return u_memchr(s, (UChar)c, count);
+ } else if(count<2) {
+ /* too short for a surrogate pair */
+ return NULL;
+ } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
+ /* find supplementary code point as surrogate pair */
+ const UChar *limit=s+count-1; /* -1 so that we do not need a separate check for the trail unit */
+ UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
+
+ do {
+ if(*s==lead && *(s+1)==trail) {
+ return (UChar *)s;
+ }
+ } while(++s!=limit);
+ return NULL;
+ } else {
+ /* not a Unicode code point, not findable */
+ return NULL;
+ }
+}
+
+/* Backward binary string search functions ---------------------------------- */
+
+U_CAPI UChar * U_EXPORT2
+u_strFindLast(const UChar *s, int32_t length,
+ const UChar *sub, int32_t subLength) {
+ const UChar *start, *limit, *p, *q, *subLimit;
+ UChar c, cs;
+
+ if(sub==NULL || subLength<-1) {
+ return (UChar *)s;
+ }
+ if(s==NULL || length<-1) {
+ return NULL;
+ }
+
+ /*
+ * This implementation is more lazy than the one for u_strFindFirst():
+ * There is no special search code for NUL-terminated strings.
+ * It does not seem to be worth it for searching substrings to
+ * search forward and find all matches like in u_strrchr() and similar.
+ * Therefore, we simply get both string lengths and search backward.
+ *
+ * markus 2002oct23
+ */
+
+ if(subLength<0) {
+ subLength=u_strlen(sub);
+ }
+ if(subLength==0) {
+ return (UChar *)s;
+ }
+
+ /* get sub[subLength-1] to search for it fast */
+ subLimit=sub+subLength;
+ cs=*(--subLimit);
+ --subLength;
+
+ if(subLength==0 && !U16_IS_SURROGATE(cs)) {
+ /* the substring consists of a single, non-surrogate BMP code point */
+ return length<0 ? u_strrchr(s, cs) : u_memrchr(s, cs, length);
+ }
+
+ if(length<0) {
+ length=u_strlen(s);
+ }
+
+ /* subLength was decremented above */
+ if(length<=subLength) {
+ return NULL; /* s is shorter than sub */
+ }
+
+ start=s;
+ limit=s+length;
+
+ /* the substring must start no later than s+subLength */
+ s+=subLength;
+
+ while(s!=limit) {
+ c=*(--limit);
+ if(c==cs) {
+ /* found last substring UChar, compare rest */
+ p=limit;
+ q=subLimit;
+ for(;;) {
+ if(q==sub) {
+ if(isMatchAtCPBoundary(start, p, limit+1, start+length)) {
+ return (UChar *)p; /* well-formed match */
+ } else {
+ break; /* no match because surrogate pair is split */
+ }
+ }
+ if(*(--p)!=*(--q)) {
+ break; /* no match */
+ }
+ }
+ }
+ }
+
+ /* not found */
+ return NULL;
+}
+
+U_CAPI UChar * U_EXPORT2
+u_strrstr(const UChar *s, const UChar *substring) {
+ return u_strFindLast(s, -1, substring, -1);
+}
+
+U_CAPI UChar * U_EXPORT2
+u_strrchr(const UChar *s, UChar c) {
+ if(U16_IS_SURROGATE(c)) {
+ /* make sure to not find half of a surrogate pair */
+ return u_strFindLast(s, -1, &c, 1);
+ } else {
+ const UChar *result=NULL;
+ UChar cs;
+
+ /* trivial search for a BMP code point */
+ for(;;) {
+ if((cs=*s)==c) {
+ result=s;
+ }
+ if(cs==0) {
+ return (UChar *)result;
+ }
+ ++s;
+ }
+ }
+}
+
+U_CAPI UChar * U_EXPORT2
+u_strrchr32(const UChar *s, UChar32 c) {
+ if((uint32_t)c<=U_BMP_MAX) {
+ /* find BMP code point */
+ return u_strrchr(s, (UChar)c);
+ } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
+ /* find supplementary code point as surrogate pair */
+ const UChar *result=NULL;
+ UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
+
+ while((cs=*s++)!=0) {
+ if(cs==lead && *s==trail) {
+ result=s-1;
+ }
+ }
+ return (UChar *)result;
+ } else {
+ /* not a Unicode code point, not findable */
+ return NULL;
+ }
+}
+
+U_CAPI UChar * U_EXPORT2
+u_memrchr(const UChar *s, UChar c, int32_t count) {
+ if(count<=0) {
+ return NULL; /* no string */
+ } else if(U16_IS_SURROGATE(c)) {
+ /* make sure to not find half of a surrogate pair */
+ return u_strFindLast(s, count, &c, 1);
+ } else {
+ /* trivial search for a BMP code point */
+ const UChar *limit=s+count;
+ do {
+ if(*(--limit)==c) {
+ return (UChar *)limit;
+ }
+ } while(s!=limit);
+ return NULL;
+ }
+}
+
+U_CAPI UChar * U_EXPORT2
+u_memrchr32(const UChar *s, UChar32 c, int32_t count) {
+ if((uint32_t)c<=U_BMP_MAX) {
+ /* find BMP code point */
+ return u_memrchr(s, (UChar)c, count);
+ } else if(count<2) {
+ /* too short for a surrogate pair */
+ return NULL;
+ } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
+ /* find supplementary code point as surrogate pair */
+ const UChar *limit=s+count-1;
+ UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
+
+ do {
+ if(*limit==trail && *(limit-1)==lead) {
+ return (UChar *)(limit-1);
+ }
+ } while(s!=--limit);
+ return NULL;
+ } else {
+ /* not a Unicode code point, not findable */
+ return NULL;
+ }
+}
+
+/* Tokenization functions --------------------------------------------------- */
+
+/*
+ * Match each code point in a string against each code point in the matchSet.
+ * Return the index of the first string code point that
+ * is (polarity==TRUE) or is not (FALSE) contained in the matchSet.
+ * Return -(string length)-1 if there is no such code point.
+ */
+static int32_t
+_matchFromSet(const UChar *string, const UChar *matchSet, UBool polarity) {
+ int32_t matchLen, matchBMPLen, strItr, matchItr;
+ UChar32 stringCh, matchCh;
+ UChar c, c2;
+
+ /* first part of matchSet contains only BMP code points */
+ matchBMPLen = 0;
+ while((c = matchSet[matchBMPLen]) != 0 && U16_IS_SINGLE(c)) {
+ ++matchBMPLen;
+ }
+
+ /* second part of matchSet contains BMP and supplementary code points */
+ matchLen = matchBMPLen;
+ while(matchSet[matchLen] != 0) {
+ ++matchLen;
+ }
+
+ for(strItr = 0; (c = string[strItr]) != 0;) {
+ ++strItr;
+ if(U16_IS_SINGLE(c)) {
+ if(polarity) {
+ for(matchItr = 0; matchItr < matchLen; ++matchItr) {
+ if(c == matchSet[matchItr]) {
+ return strItr - 1; /* one matches */
+ }
+ }
+ } else {
+ for(matchItr = 0; matchItr < matchLen; ++matchItr) {
+ if(c == matchSet[matchItr]) {
+ goto endloop;
+ }
+ }
+ return strItr - 1; /* none matches */
+ }
+ } else {
+ /*
+ * No need to check for string length before U16_IS_TRAIL
+ * because c2 could at worst be the terminating NUL.
+ */
+ if(U16_IS_SURROGATE_LEAD(c) && U16_IS_TRAIL(c2 = string[strItr])) {
+ ++strItr;
+ stringCh = U16_GET_SUPPLEMENTARY(c, c2);
+ } else {
+ stringCh = c; /* unpaired trail surrogate */
+ }
+
+ if(polarity) {
+ for(matchItr = matchBMPLen; matchItr < matchLen;) {
+ U16_NEXT(matchSet, matchItr, matchLen, matchCh);
+ if(stringCh == matchCh) {
+ return strItr - U16_LENGTH(stringCh); /* one matches */
+ }
+ }
+ } else {
+ for(matchItr = matchBMPLen; matchItr < matchLen;) {
+ U16_NEXT(matchSet, matchItr, matchLen, matchCh);
+ if(stringCh == matchCh) {
+ goto endloop;
+ }
+ }
+ return strItr - U16_LENGTH(stringCh); /* none matches */
+ }
+ }
+endloop:
+ /* wish C had continue with labels like Java... */;
+ }
+
+ /* Didn't find it. */
+ return -strItr-1;
+}
+
+/* Search for a codepoint in a string that matches one of the matchSet codepoints. */
+U_CAPI UChar * U_EXPORT2
+u_strpbrk(const UChar *string, const UChar *matchSet)
+{
+ int32_t idx = _matchFromSet(string, matchSet, TRUE);
+ if(idx >= 0) {
+ return (UChar *)string + idx;
+ } else {
+ return NULL;
+ }
+}
+
+/* Search for a codepoint in a string that matches one of the matchSet codepoints. */
+U_CAPI int32_t U_EXPORT2
+u_strcspn(const UChar *string, const UChar *matchSet)
+{
+ int32_t idx = _matchFromSet(string, matchSet, TRUE);
+ if(idx >= 0) {
+ return idx;
+ } else {
+ return -idx - 1; /* == u_strlen(string) */
+ }
+}
+
+/* Search for a codepoint in a string that does not match one of the matchSet codepoints. */
+U_CAPI int32_t U_EXPORT2
+u_strspn(const UChar *string, const UChar *matchSet)
+{
+ int32_t idx = _matchFromSet(string, matchSet, FALSE);
+ if(idx >= 0) {
+ return idx;
+ } else {
+ return -idx - 1; /* == u_strlen(string) */
+ }
+}
+
+/* ----- Text manipulation functions --- */
+
+U_CAPI UChar* U_EXPORT2
+u_strtok_r(UChar *src,
+ const UChar *delim,
+ UChar **saveState)
+{
+ UChar *tokSource;
+ UChar *nextToken;
+ uint32_t nonDelimIdx;
+
+ /* If saveState is NULL, the user messed up. */
+ if (src != NULL) {
+ tokSource = src;
+ *saveState = src; /* Set to "src" in case there are no delimiters */
+ }
+ else if (*saveState) {
+ tokSource = *saveState;
+ }
+ else {
+ /* src == NULL && *saveState == NULL */
+ /* This shouldn't happen. We already finished tokenizing. */
+ return NULL;
+ }
+
+ /* Skip initial delimiters */
+ nonDelimIdx = u_strspn(tokSource, delim);
+ tokSource = &tokSource[nonDelimIdx];
+
+ if (*tokSource) {
+ nextToken = u_strpbrk(tokSource, delim);
+ if (nextToken != NULL) {
+ /* Create a token */
+ *(nextToken++) = 0;
+ *saveState = nextToken;
+ return tokSource;
+ }
+ else if (*saveState) {
+ /* Return the last token */
+ *saveState = NULL;
+ return tokSource;
+ }
+ }
+ else {
+ /* No tokens were found. Only delimiters were left. */
+ *saveState = NULL;
+ }
+ return NULL;
+}
+
+/* Miscellaneous functions -------------------------------------------------- */
+
+U_CAPI UChar* U_EXPORT2
+u_strcat(UChar *dst,
+ const UChar *src)
+{
+ UChar *anchor = dst; /* save a pointer to start of dst */
+
+ while(*dst != 0) { /* To end of first string */
+ ++dst;
+ }
+ while((*(dst++) = *(src++)) != 0) { /* copy string 2 over */
+ }
+
+ return anchor;
+}
+
+U_CAPI UChar* U_EXPORT2
+u_strncat(UChar *dst,
+ const UChar *src,
+ int32_t n )
+{
+ if(n > 0) {
+ UChar *anchor = dst; /* save a pointer to start of dst */
+
+ while(*dst != 0) { /* To end of first string */
+ ++dst;
+ }
+ while((*dst = *src) != 0) { /* copy string 2 over */
+ ++dst;
+ if(--n == 0) {
+ *dst = 0;
+ break;
+ }
+ ++src;
+ }
+
+ return anchor;
+ } else {
+ return dst;
+ }
+}
+
+/* ----- Text property functions --- */
+
+U_CAPI int32_t U_EXPORT2
+u_strcmp(const UChar *s1,
+ const UChar *s2)
+{
+ UChar c1, c2;
+
+ for(;;) {
+ c1=*s1++;
+ c2=*s2++;
+ if (c1 != c2 || c1 == 0) {
+ break;
+ }
+ }
+ return (int32_t)c1 - (int32_t)c2;
+}
+
+U_CFUNC int32_t U_EXPORT2
+uprv_strCompare(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ UBool strncmpStyle, UBool codePointOrder) {
+ const UChar *start1, *start2, *limit1, *limit2;
+ UChar c1, c2;
+
+ /* setup for fix-up */
+ start1=s1;
+ start2=s2;
+
+ /* compare identical prefixes - they do not need to be fixed up */
+ if(length1<0 && length2<0) {
+ /* strcmp style, both NUL-terminated */
+ if(s1==s2) {
+ return 0;
+ }
+
+ for(;;) {
+ c1=*s1;
+ c2=*s2;
+ if(c1!=c2) {
+ break;
+ }
+ if(c1==0) {
+ return 0;
+ }
+ ++s1;
+ ++s2;
+ }
+
+ /* setup for fix-up */
+ limit1=limit2=NULL;
+ } else if(strncmpStyle) {
+ /* special handling for strncmp, assume length1==length2>=0 but also check for NUL */
+ if(s1==s2) {
+ return 0;
+ }
+
+ limit1=start1+length1;
+
+ for(;;) {
+ /* both lengths are same, check only one limit */
+ if(s1==limit1) {
+ return 0;
+ }
+
+ c1=*s1;
+ c2=*s2;
+ if(c1!=c2) {
+ break;
+ }
+ if(c1==0) {
+ return 0;
+ }
+ ++s1;
+ ++s2;
+ }
+
+ /* setup for fix-up */
+ limit2=start2+length1; /* use length1 here, too, to enforce assumption */
+ } else {
+ /* memcmp/UnicodeString style, both length-specified */
+ int32_t lengthResult;
+
+ if(length1<0) {
+ length1=u_strlen(s1);
+ }
+ if(length2<0) {
+ length2=u_strlen(s2);
+ }
+
+ /* limit1=start1+min(lenght1, length2) */
+ if(length1<length2) {
+ lengthResult=-1;
+ limit1=start1+length1;
+ } else if(length1==length2) {
+ lengthResult=0;
+ limit1=start1+length1;
+ } else /* length1>length2 */ {
+ lengthResult=1;
+ limit1=start1+length2;
+ }
+
+ if(s1==s2) {
+ return lengthResult;
+ }
+
+ for(;;) {
+ /* check pseudo-limit */
+ if(s1==limit1) {
+ return lengthResult;
+ }
+
+ c1=*s1;
+ c2=*s2;
+ if(c1!=c2) {
+ break;
+ }
+ ++s1;
+ ++s2;
+ }
+
+ /* setup for fix-up */
+ limit1=start1+length1;
+ limit2=start2+length2;
+ }
+
+ /* if both values are in or above the surrogate range, fix them up */
+ if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
+ /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
+ if(
+ (c1<=0xdbff && (s1+1)!=limit1 && U16_IS_TRAIL(*(s1+1))) ||
+ (U16_IS_TRAIL(c1) && start1!=s1 && U16_IS_LEAD(*(s1-1)))
+ ) {
+ /* part of a surrogate pair, leave >=d800 */
+ } else {
+ /* BMP code point - may be surrogate code point - make <d800 */
+ c1-=0x2800;
+ }
+
+ if(
+ (c2<=0xdbff && (s2+1)!=limit2 && U16_IS_TRAIL(*(s2+1))) ||
+ (U16_IS_TRAIL(c2) && start2!=s2 && U16_IS_LEAD(*(s2-1)))
+ ) {
+ /* part of a surrogate pair, leave >=d800 */
+ } else {
+ /* BMP code point - may be surrogate code point - make <d800 */
+ c2-=0x2800;
+ }
+ }
+
+ /* now c1 and c2 are in the requested (code unit or code point) order */
+ return (int32_t)c1-(int32_t)c2;
+}
+
+/*
+ * Compare two strings as presented by UCharIterators.
+ * Use code unit or code point order.
+ * When the function returns, it is undefined where the iterators
+ * have stopped.
+ */
+U_CAPI int32_t U_EXPORT2
+u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder) {
+ UChar32 c1, c2;
+
+ /* argument checking */
+ if(iter1==NULL || iter2==NULL) {
+ return 0; /* bad arguments */
+ }
+ if(iter1==iter2) {
+ return 0; /* identical iterators */
+ }
+
+ /* reset iterators to start? */
+ iter1->move(iter1, 0, UITER_START);
+ iter2->move(iter2, 0, UITER_START);
+
+ /* compare identical prefixes - they do not need to be fixed up */
+ for(;;) {
+ c1=iter1->next(iter1);
+ c2=iter2->next(iter2);
+ if(c1!=c2) {
+ break;
+ }
+ if(c1==-1) {
+ return 0;
+ }
+ }
+
+ /* if both values are in or above the surrogate range, fix them up */
+ if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
+ /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
+ if(
+ (c1<=0xdbff && U16_IS_TRAIL(iter1->current(iter1))) ||
+ (U16_IS_TRAIL(c1) && (iter1->previous(iter1), U16_IS_LEAD(iter1->previous(iter1))))
+ ) {
+ /* part of a surrogate pair, leave >=d800 */
+ } else {
+ /* BMP code point - may be surrogate code point - make <d800 */
+ c1-=0x2800;
+ }
+
+ if(
+ (c2<=0xdbff && U16_IS_TRAIL(iter2->current(iter2))) ||
+ (U16_IS_TRAIL(c2) && (iter2->previous(iter2), U16_IS_LEAD(iter2->previous(iter2))))
+ ) {
+ /* part of a surrogate pair, leave >=d800 */
+ } else {
+ /* BMP code point - may be surrogate code point - make <d800 */
+ c2-=0x2800;
+ }
+ }
+
+ /* now c1 and c2 are in the requested (code unit or code point) order */
+ return (int32_t)c1-(int32_t)c2;
+}
+
+#if 0
+/*
+ * u_strCompareIter() does not leave the iterators _on_ the different units.
+ * This is possible but would cost a few extra indirect function calls to back
+ * up if the last unit (c1 or c2 respectively) was >=0.
+ *
+ * Consistently leaving them _behind_ the different units is not an option
+ * because the current "unit" is the end of the string if that is reached,
+ * and in such a case the iterator does not move.
+ * For example, when comparing "ab" with "abc", both iterators rest _on_ the end
+ * of their strings. Calling previous() on each does not move them to where
+ * the comparison fails.
+ *
+ * So the simplest semantics is to not define where the iterators end up.
+ *
+ * The following fragment is part of what would need to be done for backing up.
+ */
+void fragment {
+ /* iff a surrogate is part of a surrogate pair, leave >=d800 */
+ if(c1<=0xdbff) {
+ if(!U16_IS_TRAIL(iter1->current(iter1))) {
+ /* lead surrogate code point - make <d800 */
+ c1-=0x2800;
+ }
+ } else if(c1<=0xdfff) {
+ int32_t idx=iter1->getIndex(iter1, UITER_CURRENT);
+ iter1->previous(iter1); /* ==c1 */
+ if(!U16_IS_LEAD(iter1->previous(iter1))) {
+ /* trail surrogate code point - make <d800 */
+ c1-=0x2800;
+ }
+ /* go back to behind where the difference is */
+ iter1->move(iter1, idx, UITER_ZERO);
+ } else /* 0xe000<=c1<=0xffff */ {
+ /* BMP code point - make <d800 */
+ c1-=0x2800;
+ }
+}
+#endif
+
+U_CAPI int32_t U_EXPORT2
+u_strCompare(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ UBool codePointOrder) {
+ /* argument checking */
+ if(s1==NULL || length1<-1 || s2==NULL || length2<-1) {
+ return 0;
+ }
+ return uprv_strCompare(s1, length1, s2, length2, FALSE, codePointOrder);
+}
+
+/* String compare in code point order - u_strcmp() compares in code unit order. */
+U_CAPI int32_t U_EXPORT2
+u_strcmpCodePointOrder(const UChar *s1, const UChar *s2) {
+ return uprv_strCompare(s1, -1, s2, -1, FALSE, TRUE);
+}
+
+U_CAPI int32_t U_EXPORT2
+u_strncmp(const UChar *s1,
+ const UChar *s2,
+ int32_t n)
+{
+ if(n > 0) {
+ int32_t rc;
+ for(;;) {
+ rc = (int32_t)*s1 - (int32_t)*s2;
+ if(rc != 0 || *s1 == 0 || --n == 0) {
+ return rc;
+ }
+ ++s1;
+ ++s2;
+ }
+ } else {
+ return 0;
+ }
+}
+
+U_CAPI int32_t U_EXPORT2
+u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n) {
+ return uprv_strCompare(s1, n, s2, n, TRUE, TRUE);
+}
+
+U_CAPI UChar* U_EXPORT2
+u_strcpy(UChar *dst,
+ const UChar *src)
+{
+ UChar *anchor = dst; /* save a pointer to start of dst */
+
+ while((*(dst++) = *(src++)) != 0) { /* copy string 2 over */
+ }
+
+ return anchor;
+}
+
+U_CAPI UChar* U_EXPORT2
+u_strncpy(UChar *dst,
+ const UChar *src,
+ int32_t n)
+{
+ UChar *anchor = dst; /* save a pointer to start of dst */
+
+ /* copy string 2 over */
+ while(n > 0 && (*(dst++) = *(src++)) != 0) {
+ --n;
+ }
+
+ return anchor;
+}
+
+U_CAPI int32_t U_EXPORT2
+u_strlen(const UChar *s)
+{
+#if U_SIZEOF_WCHAR_T == U_SIZEOF_UCHAR
+ return (int32_t)uprv_wcslen((const wchar_t *)s);
+#else
+ const UChar *t = s;
+ while(*t != 0) {
+ ++t;
+ }
+ return t - s;
+#endif
+}
+
+U_CAPI int32_t U_EXPORT2
+u_countChar32(const UChar *s, int32_t length) {
+ int32_t count;
+
+ if(s==NULL || length<-1) {
+ return 0;
+ }
+
+ count=0;
+ if(length>=0) {
+ while(length>0) {
+ ++count;
+ if(U16_IS_LEAD(*s) && length>=2 && U16_IS_TRAIL(*(s+1))) {
+ s+=2;
+ length-=2;
+ } else {
+ ++s;
+ --length;
+ }
+ }
+ } else /* length==-1 */ {
+ UChar c;
+
+ for(;;) {
+ if((c=*s++)==0) {
+ break;
+ }
+ ++count;
+
+ /*
+ * sufficient to look ahead one because of UTF-16;
+ * safe to look ahead one because at worst that would be the terminating NUL
+ */
+ if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
+ ++s;
+ }
+ }
+ }
+ return count;
+}
+
+U_CAPI UBool U_EXPORT2
+u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) {
+
+ if(number<0) {
+ return TRUE;
+ }
+ if(s==NULL || length<-1) {
+ return FALSE;
+ }
+
+ if(length==-1) {
+ /* s is NUL-terminated */
+ UChar c;
+
+ /* count code points until they exceed */
+ for(;;) {
+ if((c=*s++)==0) {
+ return FALSE;
+ }
+ if(number==0) {
+ return TRUE;
+ }
+ if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
+ ++s;
+ }
+ --number;
+ }
+ } else {
+ /* length>=0 known */
+ const UChar *limit;
+ int32_t maxSupplementary;
+
+ /* s contains at least (length+1)/2 code points: <=2 UChars per cp */
+ if(((length+1)/2)>number) {
+ return TRUE;
+ }
+
+ /* check if s does not even contain enough UChars */
+ maxSupplementary=length-number;
+ if(maxSupplementary<=0) {
+ return FALSE;
+ }
+ /* there are maxSupplementary=length-number more UChars than asked-for code points */
+
+ /*
+ * count code points until they exceed and also check that there are
+ * no more than maxSupplementary supplementary code points (UChar pairs)
+ */
+ limit=s+length;
+ for(;;) {
+ if(s==limit) {
+ return FALSE;
+ }
+ if(number==0) {
+ return TRUE;
+ }
+ if(U16_IS_LEAD(*s++) && s!=limit && U16_IS_TRAIL(*s)) {
+ ++s;
+ if(--maxSupplementary<=0) {
+ /* too many pairs - too few code points */
+ return FALSE;
+ }
+ }
+ --number;
+ }
+ }
+}
+
+U_CAPI UChar * U_EXPORT2
+u_memcpy(UChar *dest, const UChar *src, int32_t count) {
+ if(count > 0) {
+ uprv_memcpy(dest, src, (size_t)count*U_SIZEOF_UCHAR);
+ }
+ return dest;
+}
+
+U_CAPI UChar * U_EXPORT2
+u_memmove(UChar *dest, const UChar *src, int32_t count) {
+ if(count > 0) {
+ uprv_memmove(dest, src, (size_t)count*U_SIZEOF_UCHAR);
+ }
+ return dest;
+}
+
+U_CAPI UChar * U_EXPORT2
+u_memset(UChar *dest, UChar c, int32_t count) {
+ if(count > 0) {
+ UChar *ptr = dest;
+ UChar *limit = dest + count;
+
+ while (ptr < limit) {
+ *(ptr++) = c;
+ }
+ }
+ return dest;
+}
+
+U_CAPI int32_t U_EXPORT2
+u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count) {
+ if(count > 0) {
+ const UChar *limit = buf1 + count;
+ int32_t result;
+
+ while (buf1 < limit) {
+ result = (int32_t)(uint16_t)*buf1 - (int32_t)(uint16_t)*buf2;
+ if (result != 0) {
+ return result;
+ }
+ buf1++;
+ buf2++;
+ }
+ }
+ return 0;
+}
+
+U_CAPI int32_t U_EXPORT2
+u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count) {
+ return uprv_strCompare(s1, count, s2, count, FALSE, TRUE);
+}
+
+/* u_unescape & support fns ------------------------------------------------- */
+
+/* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
+static const UChar UNESCAPE_MAP[] = {
+ /*" 0x22, 0x22 */
+ /*' 0x27, 0x27 */
+ /*? 0x3F, 0x3F */
+ /*\ 0x5C, 0x5C */
+ /*a*/ 0x61, 0x07,
+ /*b*/ 0x62, 0x08,
+ /*e*/ 0x65, 0x1b,
+ /*f*/ 0x66, 0x0c,
+ /*n*/ 0x6E, 0x0a,
+ /*r*/ 0x72, 0x0d,
+ /*t*/ 0x74, 0x09,
+ /*v*/ 0x76, 0x0b
+};
+enum { UNESCAPE_MAP_LENGTH = UPRV_LENGTHOF(UNESCAPE_MAP) };
+
+/* Convert one octal digit to a numeric value 0..7, or -1 on failure */
+static int8_t _digit8(UChar c) {
+ if (c >= 0x0030 && c <= 0x0037) {
+ return (int8_t)(c - 0x0030);
+ }
+ return -1;
+}
+
+/* Convert one hex digit to a numeric value 0..F, or -1 on failure */
+static int8_t _digit16(UChar c) {
+ if (c >= 0x0030 && c <= 0x0039) {
+ return (int8_t)(c - 0x0030);
+ }
+ if (c >= 0x0041 && c <= 0x0046) {
+ return (int8_t)(c - (0x0041 - 10));
+ }
+ if (c >= 0x0061 && c <= 0x0066) {
+ return (int8_t)(c - (0x0061 - 10));
+ }
+ return -1;
+}
+
+/* Parse a single escape sequence. Although this method deals in
+ * UChars, it does not use C++ or UnicodeString. This allows it to
+ * be used from C contexts. */
+U_CAPI UChar32 U_EXPORT2
+u_unescapeAt(UNESCAPE_CHAR_AT charAt,
+ int32_t *offset,
+ int32_t length,
+ void *context) {
+
+ int32_t start = *offset;
+ UChar c;
+ UChar32 result = 0;
+ int8_t n = 0;
+ int8_t minDig = 0;
+ int8_t maxDig = 0;
+ int8_t bitsPerDigit = 4;
+ int8_t dig;
+ int32_t i;
+ UBool braces = FALSE;
+
+ /* Check that offset is in range */
+ if (*offset < 0 || *offset >= length) {
+ goto err;
+ }
+
+ /* Fetch first UChar after '\\' */
+ c = charAt((*offset)++, context);
+
+ /* Convert hexadecimal and octal escapes */
+ switch (c) {
+ case 0x0075 /*'u'*/:
+ minDig = maxDig = 4;
+ break;
+ case 0x0055 /*'U'*/:
+ minDig = maxDig = 8;
+ break;
+ case 0x0078 /*'x'*/:
+ minDig = 1;
+ if (*offset < length && charAt(*offset, context) == 0x7B /*{*/) {
+ ++(*offset);
+ braces = TRUE;
+ maxDig = 8;
+ } else {
+ maxDig = 2;
+ }
+ break;
+ default:
+ dig = _digit8(c);
+ if (dig >= 0) {
+ minDig = 1;
+ maxDig = 3;
+ n = 1; /* Already have first octal digit */
+ bitsPerDigit = 3;
+ result = dig;
+ }
+ break;
+ }
+ if (minDig != 0) {
+ while (*offset < length && n < maxDig) {
+ c = charAt(*offset, context);
+ dig = (int8_t)((bitsPerDigit == 3) ? _digit8(c) : _digit16(c));
+ if (dig < 0) {
+ break;
+ }
+ result = (result << bitsPerDigit) | dig;
+ ++(*offset);
+ ++n;
+ }
+ if (n < minDig) {
+ goto err;
+ }
+ if (braces) {
+ if (c != 0x7D /*}*/) {
+ goto err;
+ }
+ ++(*offset);
+ }
+ if (result < 0 || result >= 0x110000) {
+ goto err;
+ }
+ /* If an escape sequence specifies a lead surrogate, see if
+ * there is a trail surrogate after it, either as an escape or
+ * as a literal. If so, join them up into a supplementary.
+ */
+ if (*offset < length && U16_IS_LEAD(result)) {
+ int32_t ahead = *offset + 1;
+ c = charAt(*offset, context);
+ if (c == 0x5C /*'\\'*/ && ahead < length) {
+ // Calling u_unescapeAt recursively may cause a stack overflow if
+ // we have repeated surrogate lead after that. Limit the
+ // length to 5 ('u' and 4 hex) after ahead.
+ int32_t tailLimit = ahead + 5;
+ if (tailLimit > length) {
+ tailLimit = length;
+ }
+ c = (UChar) u_unescapeAt(charAt, &ahead, tailLimit,
+ context);
+ }
+ if (U16_IS_TRAIL(c)) {
+ *offset = ahead;
+ result = U16_GET_SUPPLEMENTARY(result, c);
+ }
+ }
+ return result;
+ }
+
+ /* Convert C-style escapes in table */
+ for (i=0; i<UNESCAPE_MAP_LENGTH; i+=2) {
+ if (c == UNESCAPE_MAP[i]) {
+ return UNESCAPE_MAP[i+1];
+ } else if (c < UNESCAPE_MAP[i]) {
+ break;
+ }
+ }
+
+ /* Map \cX to control-X: X & 0x1F */
+ if (c == 0x0063 /*'c'*/ && *offset < length) {
+ c = charAt((*offset)++, context);
+ if (U16_IS_LEAD(c) && *offset < length) {
+ UChar c2 = charAt(*offset, context);
+ if (U16_IS_TRAIL(c2)) {
+ ++(*offset);
+ c = (UChar) U16_GET_SUPPLEMENTARY(c, c2); /* [sic] */
+ }
+ }
+ return 0x1F & c;
+ }
+
+ /* If no special forms are recognized, then consider
+ * the backslash to generically escape the next character.
+ * Deal with surrogate pairs. */
+ if (U16_IS_LEAD(c) && *offset < length) {
+ UChar c2 = charAt(*offset, context);
+ if (U16_IS_TRAIL(c2)) {
+ ++(*offset);
+ return U16_GET_SUPPLEMENTARY(c, c2);
+ }
+ }
+ return c;
+
+ err:
+ /* Invalid escape sequence */
+ *offset = start; /* Reset to initial value */
+ return (UChar32)0xFFFFFFFF;
+}
+
+/* u_unescapeAt() callback to return a UChar from a char* */
+static UChar U_CALLCONV
+_charPtr_charAt(int32_t offset, void *context) {
+ UChar c16;
+ /* It would be more efficient to access the invariant tables
+ * directly but there is no API for that. */
+ u_charsToUChars(((char*) context) + offset, &c16, 1);
+ return c16;
+}
+
+/* Append an escape-free segment of the text; used by u_unescape() */
+static void _appendUChars(UChar *dest, int32_t destCapacity,
+ const char *src, int32_t srcLen) {
+ if (destCapacity < 0) {
+ destCapacity = 0;
+ }
+ if (srcLen > destCapacity) {
+ srcLen = destCapacity;
+ }
+ u_charsToUChars(src, dest, srcLen);
+}
+
+/* Do an invariant conversion of char* -> UChar*, with escape parsing */
+U_CAPI int32_t U_EXPORT2
+u_unescape(const char *src, UChar *dest, int32_t destCapacity) {
+ const char *segment = src;
+ int32_t i = 0;
+ char c;
+
+ while ((c=*src) != 0) {
+ /* '\\' intentionally written as compiler-specific
+ * character constant to correspond to compiler-specific
+ * char* constants. */
+ if (c == '\\') {
+ int32_t lenParsed = 0;
+ UChar32 c32;
+ if (src != segment) {
+ if (dest != NULL) {
+ _appendUChars(dest + i, destCapacity - i,
+ segment, (int32_t)(src - segment));
+ }
+ i += (int32_t)(src - segment);
+ }
+ ++src; /* advance past '\\' */
+ c32 = (UChar32)u_unescapeAt(_charPtr_charAt, &lenParsed, (int32_t)uprv_strlen(src), (void*)src);
+ if (lenParsed == 0) {
+ goto err;
+ }
+ src += lenParsed; /* advance past escape seq. */
+ if (dest != NULL && U16_LENGTH(c32) <= (destCapacity - i)) {
+ U16_APPEND_UNSAFE(dest, i, c32);
+ } else {
+ i += U16_LENGTH(c32);
+ }
+ segment = src;
+ } else {
+ ++src;
+ }
+ }
+ if (src != segment) {
+ if (dest != NULL) {
+ _appendUChars(dest + i, destCapacity - i,
+ segment, (int32_t)(src - segment));
+ }
+ i += (int32_t)(src - segment);
+ }
+ if (dest != NULL && i < destCapacity) {
+ dest[i] = 0;
+ }
+ return i;
+
+ err:
+ if (dest != NULL && destCapacity > 0) {
+ *dest = 0;
+ }
+ return 0;
+}
+
+/* NUL-termination of strings ----------------------------------------------- */
+
+/**
+ * NUL-terminate a string no matter what its type.
+ * Set warning and error codes accordingly.
+ */
+#define __TERMINATE_STRING(dest, destCapacity, length, pErrorCode) UPRV_BLOCK_MACRO_BEGIN { \
+ if(pErrorCode!=NULL && U_SUCCESS(*pErrorCode)) { \
+ /* not a public function, so no complete argument checking */ \
+ \
+ if(length<0) { \
+ /* assume that the caller handles this */ \
+ } else if(length<destCapacity) { \
+ /* NUL-terminate the string, the NUL fits */ \
+ dest[length]=0; \
+ /* unset the not-terminated warning but leave all others */ \
+ if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { \
+ *pErrorCode=U_ZERO_ERROR; \
+ } \
+ } else if(length==destCapacity) { \
+ /* unable to NUL-terminate, but the string itself fit - set a warning code */ \
+ *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; \
+ } else /* length>destCapacity */ { \
+ /* even the string itself did not fit - set an error code */ \
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR; \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+U_CAPI UChar U_EXPORT2
+u_asciiToUpper(UChar c) {
+ if (u'a' <= c && c <= u'z') {
+ c = c + u'A' - u'a';
+ }
+ return c;
+}
+
+U_CAPI int32_t U_EXPORT2
+u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
+ __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
+ return length;
+}
+
+U_CAPI int32_t U_EXPORT2
+u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
+ __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
+ return length;
+}
+
+U_CAPI int32_t U_EXPORT2
+u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
+ __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
+ return length;
+}
+
+U_CAPI int32_t U_EXPORT2
+u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
+ __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
+ return length;
+}
+
+// Compute the hash code for a string -------------------------------------- ***
+
+// Moved here from uhash.c so that UnicodeString::hashCode() does not depend
+// on UHashtable code.
+
+/*
+ Compute the hash by iterating sparsely over about 32 (up to 63)
+ characters spaced evenly through the string. For each character,
+ multiply the previous hash value by a prime number and add the new
+ character in, like a linear congruential random number generator,
+ producing a pseudorandom deterministic value well distributed over
+ the output range. [LIU]
+*/
+
+#define STRING_HASH(TYPE, STR, STRLEN, DEREF) UPRV_BLOCK_MACRO_BEGIN { \
+ uint32_t hash = 0; \
+ const TYPE *p = (const TYPE*) STR; \
+ if (p != NULL) { \
+ int32_t len = (int32_t)(STRLEN); \
+ int32_t inc = ((len - 32) / 32) + 1; \
+ const TYPE *limit = p + len; \
+ while (p<limit) { \
+ hash = (hash * 37) + DEREF; \
+ p += inc; \
+ } \
+ } \
+ return static_cast<int32_t>(hash); \
+} UPRV_BLOCK_MACRO_END
+
+/* Used by UnicodeString to compute its hashcode - Not public API. */
+U_CAPI int32_t U_EXPORT2
+ustr_hashUCharsN(const UChar *str, int32_t length) {
+ STRING_HASH(UChar, str, length, *p);
+}
+
+U_CAPI int32_t U_EXPORT2
+ustr_hashCharsN(const char *str, int32_t length) {
+ STRING_HASH(uint8_t, str, length, *p);
+}
+
+U_CAPI int32_t U_EXPORT2
+ustr_hashICharsN(const char *str, int32_t length) {
+ STRING_HASH(char, str, length, (uint8_t)uprv_tolower(*p));
+}
diff --git a/thirdparty/icu4c/common/ustrtrns.cpp b/thirdparty/icu4c/common/ustrtrns.cpp
new file mode 100644
index 0000000000..5dc032c02f
--- /dev/null
+++ b/thirdparty/icu4c/common/ustrtrns.cpp
@@ -0,0 +1,1451 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2001-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* File ustrtrns.cpp
+*
+* Modification History:
+*
+* Date Name Description
+* 9/10/2001 Ram Creation.
+******************************************************************************
+*/
+
+/*******************************************************************************
+ *
+ * u_strTo* and u_strFrom* APIs
+ * WCS functions moved to ustr_wcs.c for better modularization
+ *
+ *******************************************************************************
+ */
+
+
+#include "unicode/putil.h"
+#include "unicode/ustring.h"
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+#include "cstring.h"
+#include "cmemory.h"
+#include "ustr_imp.h"
+#include "uassert.h"
+
+U_CAPI UChar* U_EXPORT2
+u_strFromUTF32WithSub(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar32 *src,
+ int32_t srcLength,
+ UChar32 subchar, int32_t *pNumSubstitutions,
+ UErrorCode *pErrorCode) {
+ const UChar32 *srcLimit;
+ UChar32 ch;
+ UChar *destLimit;
+ UChar *pDest;
+ int32_t reqLength;
+ int32_t numSubstitutions;
+
+ /* args check */
+ if(U_FAILURE(*pErrorCode)){
+ return NULL;
+ }
+ if( (src==NULL && srcLength!=0) || srcLength < -1 ||
+ (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
+ subchar > 0x10ffff || U_IS_SURROGATE(subchar)
+ ) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+
+ if(pNumSubstitutions != NULL) {
+ *pNumSubstitutions = 0;
+ }
+
+ pDest = dest;
+ destLimit = (dest!=NULL)?(dest + destCapacity):NULL;
+ reqLength = 0;
+ numSubstitutions = 0;
+
+ if(srcLength < 0) {
+ /* simple loop for conversion of a NUL-terminated BMP string */
+ while((ch=*src) != 0 &&
+ ((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff))) {
+ ++src;
+ if(pDest < destLimit) {
+ *pDest++ = (UChar)ch;
+ } else {
+ ++reqLength;
+ }
+ }
+ srcLimit = src;
+ if(ch != 0) {
+ /* "complicated" case, find the end of the remaining string */
+ while(*++srcLimit != 0) {}
+ }
+ } else {
+ srcLimit = (src!=NULL)?(src + srcLength):NULL;
+ }
+
+ /* convert with length */
+ while(src < srcLimit) {
+ ch = *src++;
+ do {
+ /* usually "loops" once; twice only for writing subchar */
+ if((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff)) {
+ if(pDest < destLimit) {
+ *pDest++ = (UChar)ch;
+ } else {
+ ++reqLength;
+ }
+ break;
+ } else if(0x10000 <= ch && ch <= 0x10ffff) {
+ if(pDest!=NULL && ((pDest + 2) <= destLimit)) {
+ *pDest++ = U16_LEAD(ch);
+ *pDest++ = U16_TRAIL(ch);
+ } else {
+ reqLength += 2;
+ }
+ break;
+ } else if((ch = subchar) < 0) {
+ /* surrogate code point, or not a Unicode code point at all */
+ *pErrorCode = U_INVALID_CHAR_FOUND;
+ return NULL;
+ } else {
+ ++numSubstitutions;
+ }
+ } while(TRUE);
+ }
+
+ reqLength += (int32_t)(pDest - dest);
+ if(pDestLength) {
+ *pDestLength = reqLength;
+ }
+ if(pNumSubstitutions != NULL) {
+ *pNumSubstitutions = numSubstitutions;
+ }
+
+ /* Terminate the buffer */
+ u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
+
+ return dest;
+}
+
+U_CAPI UChar* U_EXPORT2
+u_strFromUTF32(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar32 *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode) {
+ return u_strFromUTF32WithSub(
+ dest, destCapacity, pDestLength,
+ src, srcLength,
+ U_SENTINEL, NULL,
+ pErrorCode);
+}
+
+U_CAPI UChar32* U_EXPORT2
+u_strToUTF32WithSub(UChar32 *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UChar32 subchar, int32_t *pNumSubstitutions,
+ UErrorCode *pErrorCode) {
+ const UChar *srcLimit;
+ UChar32 ch;
+ UChar ch2;
+ UChar32 *destLimit;
+ UChar32 *pDest;
+ int32_t reqLength;
+ int32_t numSubstitutions;
+
+ /* args check */
+ if(U_FAILURE(*pErrorCode)){
+ return NULL;
+ }
+ if( (src==NULL && srcLength!=0) || srcLength < -1 ||
+ (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
+ subchar > 0x10ffff || U_IS_SURROGATE(subchar)
+ ) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+
+ if(pNumSubstitutions != NULL) {
+ *pNumSubstitutions = 0;
+ }
+
+ pDest = dest;
+ destLimit = (dest!=NULL)?(dest + destCapacity):NULL;
+ reqLength = 0;
+ numSubstitutions = 0;
+
+ if(srcLength < 0) {
+ /* simple loop for conversion of a NUL-terminated BMP string */
+ while((ch=*src) != 0 && !U16_IS_SURROGATE(ch)) {
+ ++src;
+ if(pDest < destLimit) {
+ *pDest++ = ch;
+ } else {
+ ++reqLength;
+ }
+ }
+ srcLimit = src;
+ if(ch != 0) {
+ /* "complicated" case, find the end of the remaining string */
+ while(*++srcLimit != 0) {}
+ }
+ } else {
+ srcLimit = (src!=NULL)?(src + srcLength):NULL;
+ }
+
+ /* convert with length */
+ while(src < srcLimit) {
+ ch = *src++;
+ if(!U16_IS_SURROGATE(ch)) {
+ /* write or count ch below */
+ } else if(U16_IS_SURROGATE_LEAD(ch) && src < srcLimit && U16_IS_TRAIL(ch2 = *src)) {
+ ++src;
+ ch = U16_GET_SUPPLEMENTARY(ch, ch2);
+ } else if((ch = subchar) < 0) {
+ /* unpaired surrogate */
+ *pErrorCode = U_INVALID_CHAR_FOUND;
+ return NULL;
+ } else {
+ ++numSubstitutions;
+ }
+ if(pDest < destLimit) {
+ *pDest++ = ch;
+ } else {
+ ++reqLength;
+ }
+ }
+
+ reqLength += (int32_t)(pDest - dest);
+ if(pDestLength) {
+ *pDestLength = reqLength;
+ }
+ if(pNumSubstitutions != NULL) {
+ *pNumSubstitutions = numSubstitutions;
+ }
+
+ /* Terminate the buffer */
+ u_terminateUChar32s(dest, destCapacity, reqLength, pErrorCode);
+
+ return dest;
+}
+
+U_CAPI UChar32* U_EXPORT2
+u_strToUTF32(UChar32 *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode) {
+ return u_strToUTF32WithSub(
+ dest, destCapacity, pDestLength,
+ src, srcLength,
+ U_SENTINEL, NULL,
+ pErrorCode);
+}
+
+U_CAPI UChar* U_EXPORT2
+u_strFromUTF8WithSub(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const char* src,
+ int32_t srcLength,
+ UChar32 subchar, int32_t *pNumSubstitutions,
+ UErrorCode *pErrorCode){
+ /* args check */
+ if(U_FAILURE(*pErrorCode)) {
+ return NULL;
+ }
+ if( (src==NULL && srcLength!=0) || srcLength < -1 ||
+ (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
+ subchar > 0x10ffff || U_IS_SURROGATE(subchar)
+ ) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+
+ if(pNumSubstitutions!=NULL) {
+ *pNumSubstitutions=0;
+ }
+ UChar *pDest = dest;
+ UChar *pDestLimit = dest+destCapacity;
+ int32_t reqLength = 0;
+ int32_t numSubstitutions=0;
+
+ /*
+ * Inline processing of UTF-8 byte sequences:
+ *
+ * Byte sequences for the most common characters are handled inline in
+ * the conversion loops. In order to reduce the path lengths for those
+ * characters, the tests are arranged in a kind of binary search.
+ * ASCII (<=0x7f) is checked first, followed by the dividing point
+ * between 2- and 3-byte sequences (0xe0).
+ * The 3-byte branch is tested first to speed up CJK text.
+ * The compiler should combine the subtractions for the two tests for 0xe0.
+ * Each branch then tests for the other end of its range.
+ */
+
+ if(srcLength < 0){
+ /*
+ * Transform a NUL-terminated string.
+ * The code explicitly checks for NULs only in the lead byte position.
+ * A NUL byte in the trail byte position fails the trail byte range check anyway.
+ */
+ int32_t i;
+ UChar32 c;
+ for(i = 0; (c = (uint8_t)src[i]) != 0 && (pDest < pDestLimit);) {
+ // modified copy of U8_NEXT()
+ ++i;
+ if(U8_IS_SINGLE(c)) {
+ *pDest++=(UChar)c;
+ } else {
+ uint8_t __t1, __t2;
+ if( /* handle U+0800..U+FFFF inline */
+ (0xe0<=(c) && (c)<0xf0) &&
+ U8_IS_VALID_LEAD3_AND_T1((c), src[i]) &&
+ (__t2=src[(i)+1]-0x80)<=0x3f) {
+ *pDest++ = (((c)&0xf)<<12)|((src[i]&0x3f)<<6)|__t2;
+ i+=2;
+ } else if( /* handle U+0080..U+07FF inline */
+ ((c)<0xe0 && (c)>=0xc2) &&
+ (__t1=src[i]-0x80)<=0x3f) {
+ *pDest++ = (((c)&0x1f)<<6)|__t1;
+ ++(i);
+ } else {
+ /* function call for "complicated" and error cases */
+ (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), -1, c, -1);
+ if(c<0 && (++numSubstitutions, c = subchar) < 0) {
+ *pErrorCode = U_INVALID_CHAR_FOUND;
+ return NULL;
+ } else if(c<=0xFFFF) {
+ *(pDest++)=(UChar)c;
+ } else {
+ *(pDest++)=U16_LEAD(c);
+ if(pDest<pDestLimit) {
+ *(pDest++)=U16_TRAIL(c);
+ } else {
+ reqLength++;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /* Pre-flight the rest of the string. */
+ while((c = (uint8_t)src[i]) != 0) {
+ // modified copy of U8_NEXT()
+ ++i;
+ if(U8_IS_SINGLE(c)) {
+ ++reqLength;
+ } else {
+ uint8_t __t1, __t2;
+ if( /* handle U+0800..U+FFFF inline */
+ (0xe0<=(c) && (c)<0xf0) &&
+ U8_IS_VALID_LEAD3_AND_T1((c), src[i]) &&
+ (__t2=src[(i)+1]-0x80)<=0x3f) {
+ ++reqLength;
+ i+=2;
+ } else if( /* handle U+0080..U+07FF inline */
+ ((c)<0xe0 && (c)>=0xc2) &&
+ (__t1=src[i]-0x80)<=0x3f) {
+ ++reqLength;
+ ++(i);
+ } else {
+ /* function call for "complicated" and error cases */
+ (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), -1, c, -1);
+ if(c<0 && (++numSubstitutions, c = subchar) < 0) {
+ *pErrorCode = U_INVALID_CHAR_FOUND;
+ return NULL;
+ }
+ reqLength += U16_LENGTH(c);
+ }
+ }
+ }
+ } else /* srcLength >= 0 */ {
+ /* Faster loop without ongoing checking for srcLength and pDestLimit. */
+ int32_t i = 0;
+ UChar32 c;
+ for(;;) {
+ /*
+ * Each iteration of the inner loop progresses by at most 3 UTF-8
+ * bytes and one UChar, for most characters.
+ * For supplementary code points (4 & 2), which are rare,
+ * there is an additional adjustment.
+ */
+ int32_t count = (int32_t)(pDestLimit - pDest);
+ int32_t count2 = (srcLength - i) / 3;
+ if(count > count2) {
+ count = count2; /* min(remaining dest, remaining src/3) */
+ }
+ if(count < 3) {
+ /*
+ * Too much overhead if we get near the end of the string,
+ * continue with the next loop.
+ */
+ break;
+ }
+
+ do {
+ // modified copy of U8_NEXT()
+ c = (uint8_t)src[i++];
+ if(U8_IS_SINGLE(c)) {
+ *pDest++=(UChar)c;
+ } else {
+ uint8_t __t1, __t2;
+ if( /* handle U+0800..U+FFFF inline */
+ (0xe0<=(c) && (c)<0xf0) &&
+ ((i)+1)<srcLength &&
+ U8_IS_VALID_LEAD3_AND_T1((c), src[i]) &&
+ (__t2=src[(i)+1]-0x80)<=0x3f) {
+ *pDest++ = (((c)&0xf)<<12)|((src[i]&0x3f)<<6)|__t2;
+ i+=2;
+ } else if( /* handle U+0080..U+07FF inline */
+ ((c)<0xe0 && (c)>=0xc2) &&
+ ((i)!=srcLength) &&
+ (__t1=src[i]-0x80)<=0x3f) {
+ *pDest++ = (((c)&0x1f)<<6)|__t1;
+ ++(i);
+ } else {
+ if(c >= 0xf0 || subchar > 0xffff) {
+ // We may read up to four bytes and write up to two UChars,
+ // which we didn't account for with computing count,
+ // so we adjust it here.
+ if(--count == 0) {
+ --i; // back out byte c
+ break;
+ }
+ }
+
+ /* function call for "complicated" and error cases */
+ (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, c, -1);
+ if(c<0 && (++numSubstitutions, c = subchar) < 0) {
+ *pErrorCode = U_INVALID_CHAR_FOUND;
+ return NULL;
+ } else if(c<=0xFFFF) {
+ *(pDest++)=(UChar)c;
+ } else {
+ *(pDest++)=U16_LEAD(c);
+ *(pDest++)=U16_TRAIL(c);
+ }
+ }
+ }
+ } while(--count > 0);
+ }
+
+ while(i < srcLength && (pDest < pDestLimit)) {
+ // modified copy of U8_NEXT()
+ c = (uint8_t)src[i++];
+ if(U8_IS_SINGLE(c)) {
+ *pDest++=(UChar)c;
+ } else {
+ uint8_t __t1, __t2;
+ if( /* handle U+0800..U+FFFF inline */
+ (0xe0<=(c) && (c)<0xf0) &&
+ ((i)+1)<srcLength &&
+ U8_IS_VALID_LEAD3_AND_T1((c), src[i]) &&
+ (__t2=src[(i)+1]-0x80)<=0x3f) {
+ *pDest++ = (((c)&0xf)<<12)|((src[i]&0x3f)<<6)|__t2;
+ i+=2;
+ } else if( /* handle U+0080..U+07FF inline */
+ ((c)<0xe0 && (c)>=0xc2) &&
+ ((i)!=srcLength) &&
+ (__t1=src[i]-0x80)<=0x3f) {
+ *pDest++ = (((c)&0x1f)<<6)|__t1;
+ ++(i);
+ } else {
+ /* function call for "complicated" and error cases */
+ (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, c, -1);
+ if(c<0 && (++numSubstitutions, c = subchar) < 0) {
+ *pErrorCode = U_INVALID_CHAR_FOUND;
+ return NULL;
+ } else if(c<=0xFFFF) {
+ *(pDest++)=(UChar)c;
+ } else {
+ *(pDest++)=U16_LEAD(c);
+ if(pDest<pDestLimit) {
+ *(pDest++)=U16_TRAIL(c);
+ } else {
+ reqLength++;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /* Pre-flight the rest of the string. */
+ while(i < srcLength) {
+ // modified copy of U8_NEXT()
+ c = (uint8_t)src[i++];
+ if(U8_IS_SINGLE(c)) {
+ ++reqLength;
+ } else {
+ uint8_t __t1, __t2;
+ if( /* handle U+0800..U+FFFF inline */
+ (0xe0<=(c) && (c)<0xf0) &&
+ ((i)+1)<srcLength &&
+ U8_IS_VALID_LEAD3_AND_T1((c), src[i]) &&
+ (__t2=src[(i)+1]-0x80)<=0x3f) {
+ ++reqLength;
+ i+=2;
+ } else if( /* handle U+0080..U+07FF inline */
+ ((c)<0xe0 && (c)>=0xc2) &&
+ ((i)!=srcLength) &&
+ (__t1=src[i]-0x80)<=0x3f) {
+ ++reqLength;
+ ++(i);
+ } else {
+ /* function call for "complicated" and error cases */
+ (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, c, -1);
+ if(c<0 && (++numSubstitutions, c = subchar) < 0) {
+ *pErrorCode = U_INVALID_CHAR_FOUND;
+ return NULL;
+ }
+ reqLength += U16_LENGTH(c);
+ }
+ }
+ }
+ }
+
+ reqLength+=(int32_t)(pDest - dest);
+
+ if(pNumSubstitutions!=NULL) {
+ *pNumSubstitutions=numSubstitutions;
+ }
+
+ if(pDestLength){
+ *pDestLength = reqLength;
+ }
+
+ /* Terminate the buffer */
+ u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
+
+ return dest;
+}
+
+U_CAPI UChar* U_EXPORT2
+u_strFromUTF8(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const char* src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode){
+ return u_strFromUTF8WithSub(
+ dest, destCapacity, pDestLength,
+ src, srcLength,
+ U_SENTINEL, NULL,
+ pErrorCode);
+}
+
+U_CAPI UChar * U_EXPORT2
+u_strFromUTF8Lenient(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const char *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode) {
+ UChar *pDest = dest;
+ UChar32 ch;
+ int32_t reqLength = 0;
+ uint8_t* pSrc = (uint8_t*) src;
+
+ /* args check */
+ if(U_FAILURE(*pErrorCode)){
+ return NULL;
+ }
+
+ if( (src==NULL && srcLength!=0) || srcLength < -1 ||
+ (destCapacity<0) || (dest == NULL && destCapacity > 0)
+ ) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+
+ if(srcLength < 0) {
+ /* Transform a NUL-terminated string. */
+ UChar *pDestLimit = (dest!=NULL)?(dest+destCapacity):NULL;
+ uint8_t t1, t2, t3; /* trail bytes */
+
+ while(((ch = *pSrc) != 0) && (pDest < pDestLimit)) {
+ if(ch < 0xc0) {
+ /*
+ * ASCII, or a trail byte in lead position which is treated like
+ * a single-byte sequence for better character boundary
+ * resynchronization after illegal sequences.
+ */
+ *pDest++=(UChar)ch;
+ ++pSrc;
+ continue;
+ } else if(ch < 0xe0) { /* U+0080..U+07FF */
+ if((t1 = pSrc[1]) != 0) {
+ /* 0x3080 = (0xc0 << 6) + 0x80 */
+ *pDest++ = (UChar)((ch << 6) + t1 - 0x3080);
+ pSrc += 2;
+ continue;
+ }
+ } else if(ch < 0xf0) { /* U+0800..U+FFFF */
+ if((t1 = pSrc[1]) != 0 && (t2 = pSrc[2]) != 0) {
+ /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
+ /* 0x2080 = (0x80 << 6) + 0x80 */
+ *pDest++ = (UChar)((ch << 12) + (t1 << 6) + t2 - 0x2080);
+ pSrc += 3;
+ continue;
+ }
+ } else /* f0..f4 */ { /* U+10000..U+10FFFF */
+ if((t1 = pSrc[1]) != 0 && (t2 = pSrc[2]) != 0 && (t3 = pSrc[3]) != 0) {
+ pSrc += 4;
+ /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
+ ch = (ch << 18) + (t1 << 12) + (t2 << 6) + t3 - 0x3c82080;
+ *(pDest++) = U16_LEAD(ch);
+ if(pDest < pDestLimit) {
+ *(pDest++) = U16_TRAIL(ch);
+ } else {
+ reqLength = 1;
+ break;
+ }
+ continue;
+ }
+ }
+
+ /* truncated character at the end */
+ *pDest++ = 0xfffd;
+ while(*++pSrc != 0) {}
+ break;
+ }
+
+ /* Pre-flight the rest of the string. */
+ while((ch = *pSrc) != 0) {
+ if(ch < 0xc0) {
+ /*
+ * ASCII, or a trail byte in lead position which is treated like
+ * a single-byte sequence for better character boundary
+ * resynchronization after illegal sequences.
+ */
+ ++reqLength;
+ ++pSrc;
+ continue;
+ } else if(ch < 0xe0) { /* U+0080..U+07FF */
+ if(pSrc[1] != 0) {
+ ++reqLength;
+ pSrc += 2;
+ continue;
+ }
+ } else if(ch < 0xf0) { /* U+0800..U+FFFF */
+ if(pSrc[1] != 0 && pSrc[2] != 0) {
+ ++reqLength;
+ pSrc += 3;
+ continue;
+ }
+ } else /* f0..f4 */ { /* U+10000..U+10FFFF */
+ if(pSrc[1] != 0 && pSrc[2] != 0 && pSrc[3] != 0) {
+ reqLength += 2;
+ pSrc += 4;
+ continue;
+ }
+ }
+
+ /* truncated character at the end */
+ ++reqLength;
+ break;
+ }
+ } else /* srcLength >= 0 */ {
+ const uint8_t *pSrcLimit = (pSrc!=NULL)?(pSrc + srcLength):NULL;
+
+ /*
+ * This function requires that if srcLength is given, then it must be
+ * destCapatity >= srcLength so that we need not check for
+ * destination buffer overflow in the loop.
+ */
+ if(destCapacity < srcLength) {
+ if(pDestLength != NULL) {
+ *pDestLength = srcLength; /* this likely overestimates the true destLength! */
+ }
+ *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
+ return NULL;
+ }
+
+ if((pSrcLimit - pSrc) >= 4) {
+ pSrcLimit -= 3; /* temporarily reduce pSrcLimit */
+
+ /* in this loop, we can always access at least 4 bytes, up to pSrc+3 */
+ do {
+ ch = *pSrc++;
+ if(ch < 0xc0) {
+ /*
+ * ASCII, or a trail byte in lead position which is treated like
+ * a single-byte sequence for better character boundary
+ * resynchronization after illegal sequences.
+ */
+ *pDest++=(UChar)ch;
+ } else if(ch < 0xe0) { /* U+0080..U+07FF */
+ /* 0x3080 = (0xc0 << 6) + 0x80 */
+ *pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080);
+ } else if(ch < 0xf0) { /* U+0800..U+FFFF */
+ /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
+ /* 0x2080 = (0x80 << 6) + 0x80 */
+ ch = (ch << 12) + (*pSrc++ << 6);
+ *pDest++ = (UChar)(ch + *pSrc++ - 0x2080);
+ } else /* f0..f4 */ { /* U+10000..U+10FFFF */
+ /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
+ ch = (ch << 18) + (*pSrc++ << 12);
+ ch += *pSrc++ << 6;
+ ch += *pSrc++ - 0x3c82080;
+ *(pDest++) = U16_LEAD(ch);
+ *(pDest++) = U16_TRAIL(ch);
+ }
+ } while(pSrc < pSrcLimit);
+
+ pSrcLimit += 3; /* restore original pSrcLimit */
+ }
+
+ while(pSrc < pSrcLimit) {
+ ch = *pSrc++;
+ if(ch < 0xc0) {
+ /*
+ * ASCII, or a trail byte in lead position which is treated like
+ * a single-byte sequence for better character boundary
+ * resynchronization after illegal sequences.
+ */
+ *pDest++=(UChar)ch;
+ continue;
+ } else if(ch < 0xe0) { /* U+0080..U+07FF */
+ if(pSrc < pSrcLimit) {
+ /* 0x3080 = (0xc0 << 6) + 0x80 */
+ *pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080);
+ continue;
+ }
+ } else if(ch < 0xf0) { /* U+0800..U+FFFF */
+ if((pSrcLimit - pSrc) >= 2) {
+ /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
+ /* 0x2080 = (0x80 << 6) + 0x80 */
+ ch = (ch << 12) + (*pSrc++ << 6);
+ *pDest++ = (UChar)(ch + *pSrc++ - 0x2080);
+ pSrc += 3;
+ continue;
+ }
+ } else /* f0..f4 */ { /* U+10000..U+10FFFF */
+ if((pSrcLimit - pSrc) >= 3) {
+ /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
+ ch = (ch << 18) + (*pSrc++ << 12);
+ ch += *pSrc++ << 6;
+ ch += *pSrc++ - 0x3c82080;
+ *(pDest++) = U16_LEAD(ch);
+ *(pDest++) = U16_TRAIL(ch);
+ pSrc += 4;
+ continue;
+ }
+ }
+
+ /* truncated character at the end */
+ *pDest++ = 0xfffd;
+ break;
+ }
+ }
+
+ reqLength+=(int32_t)(pDest - dest);
+
+ if(pDestLength){
+ *pDestLength = reqLength;
+ }
+
+ /* Terminate the buffer */
+ u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
+
+ return dest;
+}
+
+static inline uint8_t *
+_appendUTF8(uint8_t *pDest, UChar32 c) {
+ /* it is 0<=c<=0x10ffff and not a surrogate if called by a validating function */
+ if((c)<=0x7f) {
+ *pDest++=(uint8_t)c;
+ } else if(c<=0x7ff) {
+ *pDest++=(uint8_t)((c>>6)|0xc0);
+ *pDest++=(uint8_t)((c&0x3f)|0x80);
+ } else if(c<=0xffff) {
+ *pDest++=(uint8_t)((c>>12)|0xe0);
+ *pDest++=(uint8_t)(((c>>6)&0x3f)|0x80);
+ *pDest++=(uint8_t)(((c)&0x3f)|0x80);
+ } else /* if((uint32_t)(c)<=0x10ffff) */ {
+ *pDest++=(uint8_t)(((c)>>18)|0xf0);
+ *pDest++=(uint8_t)((((c)>>12)&0x3f)|0x80);
+ *pDest++=(uint8_t)((((c)>>6)&0x3f)|0x80);
+ *pDest++=(uint8_t)(((c)&0x3f)|0x80);
+ }
+ return pDest;
+}
+
+
+U_CAPI char* U_EXPORT2
+u_strToUTF8WithSub(char *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *pSrc,
+ int32_t srcLength,
+ UChar32 subchar, int32_t *pNumSubstitutions,
+ UErrorCode *pErrorCode){
+ int32_t reqLength=0;
+ uint32_t ch=0,ch2=0;
+ uint8_t *pDest = (uint8_t *)dest;
+ uint8_t *pDestLimit = (pDest!=NULL)?(pDest + destCapacity):NULL;
+ int32_t numSubstitutions;
+
+ /* args check */
+ if(U_FAILURE(*pErrorCode)){
+ return NULL;
+ }
+
+ if( (pSrc==NULL && srcLength!=0) || srcLength < -1 ||
+ (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
+ subchar > 0x10ffff || U_IS_SURROGATE(subchar)
+ ) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+
+ if(pNumSubstitutions!=NULL) {
+ *pNumSubstitutions=0;
+ }
+ numSubstitutions=0;
+
+ if(srcLength==-1) {
+ while((ch=*pSrc)!=0) {
+ ++pSrc;
+ if(ch <= 0x7f) {
+ if(pDest<pDestLimit) {
+ *pDest++ = (uint8_t)ch;
+ } else {
+ reqLength = 1;
+ break;
+ }
+ } else if(ch <= 0x7ff) {
+ if((pDestLimit - pDest) >= 2) {
+ *pDest++=(uint8_t)((ch>>6)|0xc0);
+ *pDest++=(uint8_t)((ch&0x3f)|0x80);
+ } else {
+ reqLength = 2;
+ break;
+ }
+ } else if(ch <= 0xd7ff || ch >= 0xe000) {
+ if((pDestLimit - pDest) >= 3) {
+ *pDest++=(uint8_t)((ch>>12)|0xe0);
+ *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
+ *pDest++=(uint8_t)((ch&0x3f)|0x80);
+ } else {
+ reqLength = 3;
+ break;
+ }
+ } else /* ch is a surrogate */ {
+ int32_t length;
+
+ /*need not check for NUL because NUL fails U16_IS_TRAIL() anyway*/
+ if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) {
+ ++pSrc;
+ ch=U16_GET_SUPPLEMENTARY(ch, ch2);
+ } else if(subchar>=0) {
+ ch=subchar;
+ ++numSubstitutions;
+ } else {
+ /* Unicode 3.2 forbids surrogate code points in UTF-8 */
+ *pErrorCode = U_INVALID_CHAR_FOUND;
+ return NULL;
+ }
+
+ length = U8_LENGTH(ch);
+ if((pDestLimit - pDest) >= length) {
+ /* convert and append*/
+ pDest=_appendUTF8(pDest, ch);
+ } else {
+ reqLength = length;
+ break;
+ }
+ }
+ }
+ while((ch=*pSrc++)!=0) {
+ if(ch<=0x7f) {
+ ++reqLength;
+ } else if(ch<=0x7ff) {
+ reqLength+=2;
+ } else if(!U16_IS_SURROGATE(ch)) {
+ reqLength+=3;
+ } else if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) {
+ ++pSrc;
+ reqLength+=4;
+ } else if(subchar>=0) {
+ reqLength+=U8_LENGTH(subchar);
+ ++numSubstitutions;
+ } else {
+ /* Unicode 3.2 forbids surrogate code points in UTF-8 */
+ *pErrorCode = U_INVALID_CHAR_FOUND;
+ return NULL;
+ }
+ }
+ } else {
+ const UChar *pSrcLimit = (pSrc!=NULL)?(pSrc+srcLength):NULL;
+ int32_t count;
+
+ /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */
+ for(;;) {
+ /*
+ * Each iteration of the inner loop progresses by at most 3 UTF-8
+ * bytes and one UChar, for most characters.
+ * For supplementary code points (4 & 2), which are rare,
+ * there is an additional adjustment.
+ */
+ count = (int32_t)((pDestLimit - pDest) / 3);
+ srcLength = (int32_t)(pSrcLimit - pSrc);
+ if(count > srcLength) {
+ count = srcLength; /* min(remaining dest/3, remaining src) */
+ }
+ if(count < 3) {
+ /*
+ * Too much overhead if we get near the end of the string,
+ * continue with the next loop.
+ */
+ break;
+ }
+ do {
+ ch=*pSrc++;
+ if(ch <= 0x7f) {
+ *pDest++ = (uint8_t)ch;
+ } else if(ch <= 0x7ff) {
+ *pDest++=(uint8_t)((ch>>6)|0xc0);
+ *pDest++=(uint8_t)((ch&0x3f)|0x80);
+ } else if(ch <= 0xd7ff || ch >= 0xe000) {
+ *pDest++=(uint8_t)((ch>>12)|0xe0);
+ *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
+ *pDest++=(uint8_t)((ch&0x3f)|0x80);
+ } else /* ch is a surrogate */ {
+ /*
+ * We will read two UChars and probably output four bytes,
+ * which we didn't account for with computing count,
+ * so we adjust it here.
+ */
+ if(--count == 0) {
+ --pSrc; /* undo ch=*pSrc++ for the lead surrogate */
+ break; /* recompute count */
+ }
+
+ if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) {
+ ++pSrc;
+ ch=U16_GET_SUPPLEMENTARY(ch, ch2);
+
+ /* writing 4 bytes per 2 UChars is ok */
+ *pDest++=(uint8_t)((ch>>18)|0xf0);
+ *pDest++=(uint8_t)(((ch>>12)&0x3f)|0x80);
+ *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
+ *pDest++=(uint8_t)((ch&0x3f)|0x80);
+ } else {
+ /* Unicode 3.2 forbids surrogate code points in UTF-8 */
+ if(subchar>=0) {
+ ch=subchar;
+ ++numSubstitutions;
+ } else {
+ *pErrorCode = U_INVALID_CHAR_FOUND;
+ return NULL;
+ }
+
+ /* convert and append*/
+ pDest=_appendUTF8(pDest, ch);
+ }
+ }
+ } while(--count > 0);
+ }
+
+ while(pSrc<pSrcLimit) {
+ ch=*pSrc++;
+ if(ch <= 0x7f) {
+ if(pDest<pDestLimit) {
+ *pDest++ = (uint8_t)ch;
+ } else {
+ reqLength = 1;
+ break;
+ }
+ } else if(ch <= 0x7ff) {
+ if((pDestLimit - pDest) >= 2) {
+ *pDest++=(uint8_t)((ch>>6)|0xc0);
+ *pDest++=(uint8_t)((ch&0x3f)|0x80);
+ } else {
+ reqLength = 2;
+ break;
+ }
+ } else if(ch <= 0xd7ff || ch >= 0xe000) {
+ if((pDestLimit - pDest) >= 3) {
+ *pDest++=(uint8_t)((ch>>12)|0xe0);
+ *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
+ *pDest++=(uint8_t)((ch&0x3f)|0x80);
+ } else {
+ reqLength = 3;
+ break;
+ }
+ } else /* ch is a surrogate */ {
+ int32_t length;
+
+ if(U16_IS_SURROGATE_LEAD(ch) && pSrc<pSrcLimit && U16_IS_TRAIL(ch2=*pSrc)) {
+ ++pSrc;
+ ch=U16_GET_SUPPLEMENTARY(ch, ch2);
+ } else if(subchar>=0) {
+ ch=subchar;
+ ++numSubstitutions;
+ } else {
+ /* Unicode 3.2 forbids surrogate code points in UTF-8 */
+ *pErrorCode = U_INVALID_CHAR_FOUND;
+ return NULL;
+ }
+
+ length = U8_LENGTH(ch);
+ if((pDestLimit - pDest) >= length) {
+ /* convert and append*/
+ pDest=_appendUTF8(pDest, ch);
+ } else {
+ reqLength = length;
+ break;
+ }
+ }
+ }
+ while(pSrc<pSrcLimit) {
+ ch=*pSrc++;
+ if(ch<=0x7f) {
+ ++reqLength;
+ } else if(ch<=0x7ff) {
+ reqLength+=2;
+ } else if(!U16_IS_SURROGATE(ch)) {
+ reqLength+=3;
+ } else if(U16_IS_SURROGATE_LEAD(ch) && pSrc<pSrcLimit && U16_IS_TRAIL(ch2=*pSrc)) {
+ ++pSrc;
+ reqLength+=4;
+ } else if(subchar>=0) {
+ reqLength+=U8_LENGTH(subchar);
+ ++numSubstitutions;
+ } else {
+ /* Unicode 3.2 forbids surrogate code points in UTF-8 */
+ *pErrorCode = U_INVALID_CHAR_FOUND;
+ return NULL;
+ }
+ }
+ }
+
+ reqLength+=(int32_t)(pDest - (uint8_t *)dest);
+
+ if(pNumSubstitutions!=NULL) {
+ *pNumSubstitutions=numSubstitutions;
+ }
+
+ if(pDestLength){
+ *pDestLength = reqLength;
+ }
+
+ /* Terminate the buffer */
+ u_terminateChars(dest, destCapacity, reqLength, pErrorCode);
+ return dest;
+}
+
+U_CAPI char* U_EXPORT2
+u_strToUTF8(char *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *pSrc,
+ int32_t srcLength,
+ UErrorCode *pErrorCode){
+ return u_strToUTF8WithSub(
+ dest, destCapacity, pDestLength,
+ pSrc, srcLength,
+ U_SENTINEL, NULL,
+ pErrorCode);
+}
+
+U_CAPI UChar* U_EXPORT2
+u_strFromJavaModifiedUTF8WithSub(
+ UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const char *src,
+ int32_t srcLength,
+ UChar32 subchar, int32_t *pNumSubstitutions,
+ UErrorCode *pErrorCode) {
+ /* args check */
+ if(U_FAILURE(*pErrorCode)) {
+ return NULL;
+ }
+ if( (src==NULL && srcLength!=0) || srcLength < -1 ||
+ (dest==NULL && destCapacity!=0) || destCapacity<0 ||
+ subchar > 0x10ffff || U_IS_SURROGATE(subchar)
+ ) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+
+ if(pNumSubstitutions!=NULL) {
+ *pNumSubstitutions=0;
+ }
+ UChar *pDest = dest;
+ UChar *pDestLimit = dest+destCapacity;
+ int32_t reqLength = 0;
+ int32_t numSubstitutions=0;
+
+ if(srcLength < 0) {
+ /*
+ * Transform a NUL-terminated ASCII string.
+ * Handle non-ASCII strings with slower code.
+ */
+ UChar32 c;
+ while(((c = (uint8_t)*src) != 0) && c <= 0x7f && (pDest < pDestLimit)) {
+ *pDest++=(UChar)c;
+ ++src;
+ }
+ if(c == 0) {
+ reqLength=(int32_t)(pDest - dest);
+ if(pDestLength) {
+ *pDestLength = reqLength;
+ }
+
+ /* Terminate the buffer */
+ u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
+ return dest;
+ }
+ srcLength = static_cast<int32_t>(uprv_strlen(src));
+ }
+
+ /* Faster loop without ongoing checking for srcLength and pDestLimit. */
+ UChar32 ch;
+ uint8_t t1, t2;
+ int32_t i = 0;
+ for(;;) {
+ int32_t count = (int32_t)(pDestLimit - pDest);
+ int32_t count2 = srcLength - i;
+ if(count >= count2 && srcLength > 0 && U8_IS_SINGLE(*src)) {
+ /* fast ASCII loop */
+ int32_t start = i;
+ uint8_t b;
+ while(i < srcLength && U8_IS_SINGLE(b = src[i])) {
+ *pDest++=b;
+ ++i;
+ }
+ int32_t delta = i - start;
+ count -= delta;
+ count2 -= delta;
+ }
+ /*
+ * Each iteration of the inner loop progresses by at most 3 UTF-8
+ * bytes and one UChar.
+ */
+ if(subchar > 0xFFFF) {
+ break;
+ }
+ count2 /= 3;
+ if(count > count2) {
+ count = count2; /* min(remaining dest, remaining src/3) */
+ }
+ if(count < 3) {
+ /*
+ * Too much overhead if we get near the end of the string,
+ * continue with the next loop.
+ */
+ break;
+ }
+ do {
+ ch = (uint8_t)src[i++];
+ if(U8_IS_SINGLE(ch)) {
+ *pDest++=(UChar)ch;
+ } else {
+ if(ch >= 0xe0) {
+ if( /* handle U+0000..U+FFFF inline */
+ ch <= 0xef &&
+ (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f &&
+ (t2 = (uint8_t)(src[i+1] - 0x80)) <= 0x3f
+ ) {
+ /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
+ *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
+ i += 2;
+ continue;
+ }
+ } else {
+ if( /* handle U+0000..U+07FF inline */
+ ch >= 0xc0 &&
+ (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f
+ ) {
+ *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
+ ++i;
+ continue;
+ }
+ }
+
+ if(subchar < 0) {
+ *pErrorCode = U_INVALID_CHAR_FOUND;
+ return NULL;
+ } else if(subchar > 0xffff && --count == 0) {
+ /*
+ * We need to write two UChars, adjusted count for that,
+ * and ran out of space.
+ */
+ --i; // back out byte ch
+ break;
+ } else {
+ /* function call for error cases */
+ utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, ch, -1);
+ ++numSubstitutions;
+ *(pDest++)=(UChar)subchar;
+ }
+ }
+ } while(--count > 0);
+ }
+
+ while(i < srcLength && (pDest < pDestLimit)) {
+ ch = (uint8_t)src[i++];
+ if(U8_IS_SINGLE(ch)){
+ *pDest++=(UChar)ch;
+ } else {
+ if(ch >= 0xe0) {
+ if( /* handle U+0000..U+FFFF inline */
+ ch <= 0xef &&
+ (i+1) < srcLength &&
+ (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f &&
+ (t2 = (uint8_t)(src[i+1] - 0x80)) <= 0x3f
+ ) {
+ /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
+ *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
+ i += 2;
+ continue;
+ }
+ } else {
+ if( /* handle U+0000..U+07FF inline */
+ ch >= 0xc0 &&
+ i < srcLength &&
+ (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f
+ ) {
+ *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
+ ++i;
+ continue;
+ }
+ }
+
+ if(subchar < 0) {
+ *pErrorCode = U_INVALID_CHAR_FOUND;
+ return NULL;
+ } else {
+ /* function call for error cases */
+ utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, ch, -1);
+ ++numSubstitutions;
+ if(subchar<=0xFFFF) {
+ *(pDest++)=(UChar)subchar;
+ } else {
+ *(pDest++)=U16_LEAD(subchar);
+ if(pDest<pDestLimit) {
+ *(pDest++)=U16_TRAIL(subchar);
+ } else {
+ reqLength++;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /* Pre-flight the rest of the string. */
+ while(i < srcLength) {
+ ch = (uint8_t)src[i++];
+ if(U8_IS_SINGLE(ch)) {
+ reqLength++;
+ } else {
+ if(ch >= 0xe0) {
+ if( /* handle U+0000..U+FFFF inline */
+ ch <= 0xef &&
+ (i+1) < srcLength &&
+ (uint8_t)(src[i] - 0x80) <= 0x3f &&
+ (uint8_t)(src[i+1] - 0x80) <= 0x3f
+ ) {
+ reqLength++;
+ i += 2;
+ continue;
+ }
+ } else {
+ if( /* handle U+0000..U+07FF inline */
+ ch >= 0xc0 &&
+ i < srcLength &&
+ (uint8_t)(src[i] - 0x80) <= 0x3f
+ ) {
+ reqLength++;
+ ++i;
+ continue;
+ }
+ }
+
+ if(subchar < 0) {
+ *pErrorCode = U_INVALID_CHAR_FOUND;
+ return NULL;
+ } else {
+ /* function call for error cases */
+ utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, ch, -1);
+ ++numSubstitutions;
+ reqLength+=U16_LENGTH(ch);
+ }
+ }
+ }
+
+ if(pNumSubstitutions!=NULL) {
+ *pNumSubstitutions=numSubstitutions;
+ }
+
+ reqLength+=(int32_t)(pDest - dest);
+ if(pDestLength) {
+ *pDestLength = reqLength;
+ }
+
+ /* Terminate the buffer */
+ u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
+ return dest;
+}
+
+U_CAPI char* U_EXPORT2
+u_strToJavaModifiedUTF8(
+ char *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode) {
+ int32_t reqLength=0;
+ uint32_t ch=0;
+ uint8_t *pDest = (uint8_t *)dest;
+ uint8_t *pDestLimit = pDest + destCapacity;
+ const UChar *pSrcLimit;
+ int32_t count;
+
+ /* args check */
+ if(U_FAILURE(*pErrorCode)){
+ return NULL;
+ }
+ if( (src==NULL && srcLength!=0) || srcLength < -1 ||
+ (dest==NULL && destCapacity!=0) || destCapacity<0
+ ) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+
+ if(srcLength==-1) {
+ /* Convert NUL-terminated ASCII, then find the string length. */
+ while((ch=*src)<=0x7f && ch != 0 && pDest<pDestLimit) {
+ *pDest++ = (uint8_t)ch;
+ ++src;
+ }
+ if(ch == 0) {
+ reqLength=(int32_t)(pDest - (uint8_t *)dest);
+ if(pDestLength) {
+ *pDestLength = reqLength;
+ }
+
+ /* Terminate the buffer */
+ u_terminateChars(dest, destCapacity, reqLength, pErrorCode);
+ return dest;
+ }
+ srcLength = u_strlen(src);
+ }
+
+ /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */
+ pSrcLimit = (src!=NULL)?(src+srcLength):NULL;
+ for(;;) {
+ count = (int32_t)(pDestLimit - pDest);
+ srcLength = (int32_t)(pSrcLimit - src);
+ if(count >= srcLength && srcLength > 0 && *src <= 0x7f) {
+ /* fast ASCII loop */
+ const UChar *prevSrc = src;
+ int32_t delta;
+ while(src < pSrcLimit && (ch = *src) <= 0x7f && ch != 0) {
+ *pDest++=(uint8_t)ch;
+ ++src;
+ }
+ delta = (int32_t)(src - prevSrc);
+ count -= delta;
+ srcLength -= delta;
+ }
+ /*
+ * Each iteration of the inner loop progresses by at most 3 UTF-8
+ * bytes and one UChar.
+ */
+ count /= 3;
+ if(count > srcLength) {
+ count = srcLength; /* min(remaining dest/3, remaining src) */
+ }
+ if(count < 3) {
+ /*
+ * Too much overhead if we get near the end of the string,
+ * continue with the next loop.
+ */
+ break;
+ }
+ do {
+ ch=*src++;
+ if(ch <= 0x7f && ch != 0) {
+ *pDest++ = (uint8_t)ch;
+ } else if(ch <= 0x7ff) {
+ *pDest++=(uint8_t)((ch>>6)|0xc0);
+ *pDest++=(uint8_t)((ch&0x3f)|0x80);
+ } else {
+ *pDest++=(uint8_t)((ch>>12)|0xe0);
+ *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
+ *pDest++=(uint8_t)((ch&0x3f)|0x80);
+ }
+ } while(--count > 0);
+ }
+
+ while(src<pSrcLimit) {
+ ch=*src++;
+ if(ch <= 0x7f && ch != 0) {
+ if(pDest<pDestLimit) {
+ *pDest++ = (uint8_t)ch;
+ } else {
+ reqLength = 1;
+ break;
+ }
+ } else if(ch <= 0x7ff) {
+ if((pDestLimit - pDest) >= 2) {
+ *pDest++=(uint8_t)((ch>>6)|0xc0);
+ *pDest++=(uint8_t)((ch&0x3f)|0x80);
+ } else {
+ reqLength = 2;
+ break;
+ }
+ } else {
+ if((pDestLimit - pDest) >= 3) {
+ *pDest++=(uint8_t)((ch>>12)|0xe0);
+ *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
+ *pDest++=(uint8_t)((ch&0x3f)|0x80);
+ } else {
+ reqLength = 3;
+ break;
+ }
+ }
+ }
+ while(src<pSrcLimit) {
+ ch=*src++;
+ if(ch <= 0x7f && ch != 0) {
+ ++reqLength;
+ } else if(ch<=0x7ff) {
+ reqLength+=2;
+ } else {
+ reqLength+=3;
+ }
+ }
+
+ reqLength+=(int32_t)(pDest - (uint8_t *)dest);
+ if(pDestLength){
+ *pDestLength = reqLength;
+ }
+
+ /* Terminate the buffer */
+ u_terminateChars(dest, destCapacity, reqLength, pErrorCode);
+ return dest;
+}
diff --git a/thirdparty/icu4c/common/utext.cpp b/thirdparty/icu4c/common/utext.cpp
new file mode 100644
index 0000000000..763b6684fb
--- /dev/null
+++ b/thirdparty/icu4c/common/utext.cpp
@@ -0,0 +1,2877 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2005-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utext.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2005apr12
+* created by: Markus W. Scherer
+*/
+
+#include <cstddef>
+
+#include "unicode/utypes.h"
+#include "unicode/ustring.h"
+#include "unicode/unistr.h"
+#include "unicode/chariter.h"
+#include "unicode/utext.h"
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+#include "ustr_imp.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "uassert.h"
+#include "putilimp.h"
+
+U_NAMESPACE_USE
+
+#define I32_FLAG(bitIndex) ((int32_t)1<<(bitIndex))
+
+
+static UBool
+utext_access(UText *ut, int64_t index, UBool forward) {
+ return ut->pFuncs->access(ut, index, forward);
+}
+
+
+
+U_CAPI UBool U_EXPORT2
+utext_moveIndex32(UText *ut, int32_t delta) {
+ UChar32 c;
+ if (delta > 0) {
+ do {
+ if(ut->chunkOffset>=ut->chunkLength && !utext_access(ut, ut->chunkNativeLimit, TRUE)) {
+ return FALSE;
+ }
+ c = ut->chunkContents[ut->chunkOffset];
+ if (U16_IS_SURROGATE(c)) {
+ c = utext_next32(ut);
+ if (c == U_SENTINEL) {
+ return FALSE;
+ }
+ } else {
+ ut->chunkOffset++;
+ }
+ } while(--delta>0);
+
+ } else if (delta<0) {
+ do {
+ if(ut->chunkOffset<=0 && !utext_access(ut, ut->chunkNativeStart, FALSE)) {
+ return FALSE;
+ }
+ c = ut->chunkContents[ut->chunkOffset-1];
+ if (U16_IS_SURROGATE(c)) {
+ c = utext_previous32(ut);
+ if (c == U_SENTINEL) {
+ return FALSE;
+ }
+ } else {
+ ut->chunkOffset--;
+ }
+ } while(++delta<0);
+ }
+
+ return TRUE;
+}
+
+
+U_CAPI int64_t U_EXPORT2
+utext_nativeLength(UText *ut) {
+ return ut->pFuncs->nativeLength(ut);
+}
+
+
+U_CAPI UBool U_EXPORT2
+utext_isLengthExpensive(const UText *ut) {
+ UBool r = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE)) != 0;
+ return r;
+}
+
+
+U_CAPI int64_t U_EXPORT2
+utext_getNativeIndex(const UText *ut) {
+ if(ut->chunkOffset <= ut->nativeIndexingLimit) {
+ return ut->chunkNativeStart+ut->chunkOffset;
+ } else {
+ return ut->pFuncs->mapOffsetToNative(ut);
+ }
+}
+
+
+U_CAPI void U_EXPORT2
+utext_setNativeIndex(UText *ut, int64_t index) {
+ if(index<ut->chunkNativeStart || index>=ut->chunkNativeLimit) {
+ // The desired position is outside of the current chunk.
+ // Access the new position. Assume a forward iteration from here,
+ // which will also be optimimum for a single random access.
+ // Reverse iterations may suffer slightly.
+ ut->pFuncs->access(ut, index, TRUE);
+ } else if((int32_t)(index - ut->chunkNativeStart) <= ut->nativeIndexingLimit) {
+ // utf-16 indexing.
+ ut->chunkOffset=(int32_t)(index-ut->chunkNativeStart);
+ } else {
+ ut->chunkOffset=ut->pFuncs->mapNativeIndexToUTF16(ut, index);
+ }
+ // The convention is that the index must always be on a code point boundary.
+ // Adjust the index position if it is in the middle of a surrogate pair.
+ if (ut->chunkOffset<ut->chunkLength) {
+ UChar c= ut->chunkContents[ut->chunkOffset];
+ if (U16_IS_TRAIL(c)) {
+ if (ut->chunkOffset==0) {
+ ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE);
+ }
+ if (ut->chunkOffset>0) {
+ UChar lead = ut->chunkContents[ut->chunkOffset-1];
+ if (U16_IS_LEAD(lead)) {
+ ut->chunkOffset--;
+ }
+ }
+ }
+ }
+}
+
+
+
+U_CAPI int64_t U_EXPORT2
+utext_getPreviousNativeIndex(UText *ut) {
+ //
+ // Fast-path the common case.
+ // Common means current position is not at the beginning of a chunk
+ // and the preceding character is not supplementary.
+ //
+ int32_t i = ut->chunkOffset - 1;
+ int64_t result;
+ if (i >= 0) {
+ UChar c = ut->chunkContents[i];
+ if (U16_IS_TRAIL(c) == FALSE) {
+ if (i <= ut->nativeIndexingLimit) {
+ result = ut->chunkNativeStart + i;
+ } else {
+ ut->chunkOffset = i;
+ result = ut->pFuncs->mapOffsetToNative(ut);
+ ut->chunkOffset++;
+ }
+ return result;
+ }
+ }
+
+ // If at the start of text, simply return 0.
+ if (ut->chunkOffset==0 && ut->chunkNativeStart==0) {
+ return 0;
+ }
+
+ // Harder, less common cases. We are at a chunk boundary, or on a surrogate.
+ // Keep it simple, use other functions to handle the edges.
+ //
+ utext_previous32(ut);
+ result = UTEXT_GETNATIVEINDEX(ut);
+ utext_next32(ut);
+ return result;
+}
+
+
+//
+// utext_current32. Get the UChar32 at the current position.
+// UText iteration position is always on a code point boundary,
+// never on the trail half of a surrogate pair.
+//
+U_CAPI UChar32 U_EXPORT2
+utext_current32(UText *ut) {
+ UChar32 c;
+ if (ut->chunkOffset==ut->chunkLength) {
+ // Current position is just off the end of the chunk.
+ if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE) == FALSE) {
+ // Off the end of the text.
+ return U_SENTINEL;
+ }
+ }
+
+ c = ut->chunkContents[ut->chunkOffset];
+ if (U16_IS_LEAD(c) == FALSE) {
+ // Normal, non-supplementary case.
+ return c;
+ }
+
+ //
+ // Possible supplementary char.
+ //
+ UChar32 trail = 0;
+ UChar32 supplementaryC = c;
+ if ((ut->chunkOffset+1) < ut->chunkLength) {
+ // The trail surrogate is in the same chunk.
+ trail = ut->chunkContents[ut->chunkOffset+1];
+ } else {
+ // The trail surrogate is in a different chunk.
+ // Because we must maintain the iteration position, we need to switch forward
+ // into the new chunk, get the trail surrogate, then revert the chunk back to the
+ // original one.
+ // An edge case to be careful of: the entire text may end with an unpaired
+ // leading surrogate. The attempt to access the trail will fail, but
+ // the original position before the unpaired lead still needs to be restored.
+ int64_t nativePosition = ut->chunkNativeLimit;
+ int32_t originalOffset = ut->chunkOffset;
+ if (ut->pFuncs->access(ut, nativePosition, TRUE)) {
+ trail = ut->chunkContents[ut->chunkOffset];
+ }
+ UBool r = ut->pFuncs->access(ut, nativePosition, FALSE); // reverse iteration flag loads preceding chunk
+ U_ASSERT(r==TRUE);
+ ut->chunkOffset = originalOffset;
+ if(!r) {
+ return U_SENTINEL;
+ }
+ }
+
+ if (U16_IS_TRAIL(trail)) {
+ supplementaryC = U16_GET_SUPPLEMENTARY(c, trail);
+ }
+ return supplementaryC;
+
+}
+
+
+U_CAPI UChar32 U_EXPORT2
+utext_char32At(UText *ut, int64_t nativeIndex) {
+ UChar32 c = U_SENTINEL;
+
+ // Fast path the common case.
+ if (nativeIndex>=ut->chunkNativeStart && nativeIndex < ut->chunkNativeStart + ut->nativeIndexingLimit) {
+ ut->chunkOffset = (int32_t)(nativeIndex - ut->chunkNativeStart);
+ c = ut->chunkContents[ut->chunkOffset];
+ if (U16_IS_SURROGATE(c) == FALSE) {
+ return c;
+ }
+ }
+
+
+ utext_setNativeIndex(ut, nativeIndex);
+ if (nativeIndex>=ut->chunkNativeStart && ut->chunkOffset<ut->chunkLength) {
+ c = ut->chunkContents[ut->chunkOffset];
+ if (U16_IS_SURROGATE(c)) {
+ // For surrogates, let current32() deal with the complications
+ // of supplementaries that may span chunk boundaries.
+ c = utext_current32(ut);
+ }
+ }
+ return c;
+}
+
+
+U_CAPI UChar32 U_EXPORT2
+utext_next32(UText *ut) {
+ UChar32 c;
+
+ if (ut->chunkOffset >= ut->chunkLength) {
+ if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE) == FALSE) {
+ return U_SENTINEL;
+ }
+ }
+
+ c = ut->chunkContents[ut->chunkOffset++];
+ if (U16_IS_LEAD(c) == FALSE) {
+ // Normal case, not supplementary.
+ // (A trail surrogate seen here is just returned as is, as a surrogate value.
+ // It cannot be part of a pair.)
+ return c;
+ }
+
+ if (ut->chunkOffset >= ut->chunkLength) {
+ if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE) == FALSE) {
+ // c is an unpaired lead surrogate at the end of the text.
+ // return it as it is.
+ return c;
+ }
+ }
+ UChar32 trail = ut->chunkContents[ut->chunkOffset];
+ if (U16_IS_TRAIL(trail) == FALSE) {
+ // c was an unpaired lead surrogate, not at the end of the text.
+ // return it as it is (unpaired). Iteration position is on the
+ // following character, possibly in the next chunk, where the
+ // trail surrogate would have been if it had existed.
+ return c;
+ }
+
+ UChar32 supplementary = U16_GET_SUPPLEMENTARY(c, trail);
+ ut->chunkOffset++; // move iteration position over the trail surrogate.
+ return supplementary;
+ }
+
+
+U_CAPI UChar32 U_EXPORT2
+utext_previous32(UText *ut) {
+ UChar32 c;
+
+ if (ut->chunkOffset <= 0) {
+ if (ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE) == FALSE) {
+ return U_SENTINEL;
+ }
+ }
+ ut->chunkOffset--;
+ c = ut->chunkContents[ut->chunkOffset];
+ if (U16_IS_TRAIL(c) == FALSE) {
+ // Normal case, not supplementary.
+ // (A lead surrogate seen here is just returned as is, as a surrogate value.
+ // It cannot be part of a pair.)
+ return c;
+ }
+
+ if (ut->chunkOffset <= 0) {
+ if (ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE) == FALSE) {
+ // c is an unpaired trail surrogate at the start of the text.
+ // return it as it is.
+ return c;
+ }
+ }
+
+ UChar32 lead = ut->chunkContents[ut->chunkOffset-1];
+ if (U16_IS_LEAD(lead) == FALSE) {
+ // c was an unpaired trail surrogate, not at the end of the text.
+ // return it as it is (unpaired). Iteration position is at c
+ return c;
+ }
+
+ UChar32 supplementary = U16_GET_SUPPLEMENTARY(lead, c);
+ ut->chunkOffset--; // move iteration position over the lead surrogate.
+ return supplementary;
+}
+
+
+
+U_CAPI UChar32 U_EXPORT2
+utext_next32From(UText *ut, int64_t index) {
+ UChar32 c = U_SENTINEL;
+
+ if(index<ut->chunkNativeStart || index>=ut->chunkNativeLimit) {
+ // Desired position is outside of the current chunk.
+ if(!ut->pFuncs->access(ut, index, TRUE)) {
+ // no chunk available here
+ return U_SENTINEL;
+ }
+ } else if (index - ut->chunkNativeStart <= (int64_t)ut->nativeIndexingLimit) {
+ // Desired position is in chunk, with direct 1:1 native to UTF16 indexing
+ ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart);
+ } else {
+ // Desired position is in chunk, with non-UTF16 indexing.
+ ut->chunkOffset = ut->pFuncs->mapNativeIndexToUTF16(ut, index);
+ }
+
+ c = ut->chunkContents[ut->chunkOffset++];
+ if (U16_IS_SURROGATE(c)) {
+ // Surrogates. Many edge cases. Use other functions that already
+ // deal with the problems.
+ utext_setNativeIndex(ut, index);
+ c = utext_next32(ut);
+ }
+ return c;
+}
+
+
+U_CAPI UChar32 U_EXPORT2
+utext_previous32From(UText *ut, int64_t index) {
+ //
+ // Return the character preceding the specified index.
+ // Leave the iteration position at the start of the character that was returned.
+ //
+ UChar32 cPrev; // The character preceding cCurr, which is what we will return.
+
+ // Address the chunk containg the position preceding the incoming index
+ // A tricky edge case:
+ // We try to test the requested native index against the chunkNativeStart to determine
+ // whether the character preceding the one at the index is in the current chunk.
+ // BUT, this test can fail with UTF-8 (or any other multibyte encoding), when the
+ // requested index is on something other than the first position of the first char.
+ //
+ if(index<=ut->chunkNativeStart || index>ut->chunkNativeLimit) {
+ // Requested native index is outside of the current chunk.
+ if(!ut->pFuncs->access(ut, index, FALSE)) {
+ // no chunk available here
+ return U_SENTINEL;
+ }
+ } else if(index - ut->chunkNativeStart <= (int64_t)ut->nativeIndexingLimit) {
+ // Direct UTF-16 indexing.
+ ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart);
+ } else {
+ ut->chunkOffset=ut->pFuncs->mapNativeIndexToUTF16(ut, index);
+ if (ut->chunkOffset==0 && !ut->pFuncs->access(ut, index, FALSE)) {
+ // no chunk available here
+ return U_SENTINEL;
+ }
+ }
+
+ //
+ // Simple case with no surrogates.
+ //
+ ut->chunkOffset--;
+ cPrev = ut->chunkContents[ut->chunkOffset];
+
+ if (U16_IS_SURROGATE(cPrev)) {
+ // Possible supplementary. Many edge cases.
+ // Let other functions do the heavy lifting.
+ utext_setNativeIndex(ut, index);
+ cPrev = utext_previous32(ut);
+ }
+ return cPrev;
+}
+
+
+U_CAPI int32_t U_EXPORT2
+utext_extract(UText *ut,
+ int64_t start, int64_t limit,
+ UChar *dest, int32_t destCapacity,
+ UErrorCode *status) {
+ return ut->pFuncs->extract(ut, start, limit, dest, destCapacity, status);
+ }
+
+
+
+U_CAPI UBool U_EXPORT2
+utext_equals(const UText *a, const UText *b) {
+ if (a==NULL || b==NULL ||
+ a->magic != UTEXT_MAGIC ||
+ b->magic != UTEXT_MAGIC) {
+ // Null or invalid arguments don't compare equal to anything.
+ return FALSE;
+ }
+
+ if (a->pFuncs != b->pFuncs) {
+ // Different types of text providers.
+ return FALSE;
+ }
+
+ if (a->context != b->context) {
+ // Different sources (different strings)
+ return FALSE;
+ }
+ if (utext_getNativeIndex(a) != utext_getNativeIndex(b)) {
+ // Different current position in the string.
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+U_CAPI UBool U_EXPORT2
+utext_isWritable(const UText *ut)
+{
+ UBool b = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) != 0;
+ return b;
+}
+
+
+U_CAPI void U_EXPORT2
+utext_freeze(UText *ut) {
+ // Zero out the WRITABLE flag.
+ ut->providerProperties &= ~(I32_FLAG(UTEXT_PROVIDER_WRITABLE));
+}
+
+
+U_CAPI UBool U_EXPORT2
+utext_hasMetaData(const UText *ut)
+{
+ UBool b = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_HAS_META_DATA)) != 0;
+ return b;
+}
+
+
+
+U_CAPI int32_t U_EXPORT2
+utext_replace(UText *ut,
+ int64_t nativeStart, int64_t nativeLimit,
+ const UChar *replacementText, int32_t replacementLength,
+ UErrorCode *status)
+{
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+ if ((ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) == 0) {
+ *status = U_NO_WRITE_PERMISSION;
+ return 0;
+ }
+ int32_t i = ut->pFuncs->replace(ut, nativeStart, nativeLimit, replacementText, replacementLength, status);
+ return i;
+}
+
+U_CAPI void U_EXPORT2
+utext_copy(UText *ut,
+ int64_t nativeStart, int64_t nativeLimit,
+ int64_t destIndex,
+ UBool move,
+ UErrorCode *status)
+{
+ if (U_FAILURE(*status)) {
+ return;
+ }
+ if ((ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) == 0) {
+ *status = U_NO_WRITE_PERMISSION;
+ return;
+ }
+ ut->pFuncs->copy(ut, nativeStart, nativeLimit, destIndex, move, status);
+}
+
+
+
+U_CAPI UText * U_EXPORT2
+utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status) {
+ if (U_FAILURE(*status)) {
+ return dest;
+ }
+ UText *result = src->pFuncs->clone(dest, src, deep, status);
+ if (U_FAILURE(*status)) {
+ return result;
+ }
+ if (result == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return result;
+ }
+ if (readOnly) {
+ utext_freeze(result);
+ }
+ return result;
+}
+
+
+
+//------------------------------------------------------------------------------
+//
+// UText common functions implementation
+//
+//------------------------------------------------------------------------------
+
+//
+// UText.flags bit definitions
+//
+enum {
+ UTEXT_HEAP_ALLOCATED = 1, // 1 if ICU has allocated this UText struct on the heap.
+ // 0 if caller provided storage for the UText.
+
+ UTEXT_EXTRA_HEAP_ALLOCATED = 2, // 1 if ICU has allocated extra storage as a separate
+ // heap block.
+ // 0 if there is no separate allocation. Either no extra
+ // storage was requested, or it is appended to the end
+ // of the main UText storage.
+
+ UTEXT_OPEN = 4 // 1 if this UText is currently open
+ // 0 if this UText is not open.
+};
+
+
+//
+// Extended form of a UText. The purpose is to aid in computing the total size required
+// when a provider asks for a UText to be allocated with extra storage.
+
+struct ExtendedUText {
+ UText ut;
+ std::max_align_t extension;
+};
+
+static const UText emptyText = UTEXT_INITIALIZER;
+
+U_CAPI UText * U_EXPORT2
+utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status) {
+ if (U_FAILURE(*status)) {
+ return ut;
+ }
+
+ if (ut == NULL) {
+ // We need to heap-allocate storage for the new UText
+ int32_t spaceRequired = sizeof(UText);
+ if (extraSpace > 0) {
+ spaceRequired = sizeof(ExtendedUText) + extraSpace - sizeof(std::max_align_t);
+ }
+ ut = (UText *)uprv_malloc(spaceRequired);
+ if (ut == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ } else {
+ *ut = emptyText;
+ ut->flags |= UTEXT_HEAP_ALLOCATED;
+ if (spaceRequired>0) {
+ ut->extraSize = extraSpace;
+ ut->pExtra = &((ExtendedUText *)ut)->extension;
+ }
+ }
+ } else {
+ // We have been supplied with an already existing UText.
+ // Verify that it really appears to be a UText.
+ if (ut->magic != UTEXT_MAGIC) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return ut;
+ }
+ // If the ut is already open and there's a provider supplied close
+ // function, call it.
+ if ((ut->flags & UTEXT_OPEN) && ut->pFuncs->close != NULL) {
+ ut->pFuncs->close(ut);
+ }
+ ut->flags &= ~UTEXT_OPEN;
+
+ // If extra space was requested by our caller, check whether
+ // sufficient already exists, and allocate new if needed.
+ if (extraSpace > ut->extraSize) {
+ // Need more space. If there is existing separately allocated space,
+ // delete it first, then allocate new space.
+ if (ut->flags & UTEXT_EXTRA_HEAP_ALLOCATED) {
+ uprv_free(ut->pExtra);
+ ut->extraSize = 0;
+ }
+ ut->pExtra = uprv_malloc(extraSpace);
+ if (ut->pExtra == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ ut->extraSize = extraSpace;
+ ut->flags |= UTEXT_EXTRA_HEAP_ALLOCATED;
+ }
+ }
+ }
+ if (U_SUCCESS(*status)) {
+ ut->flags |= UTEXT_OPEN;
+
+ // Initialize all remaining fields of the UText.
+ //
+ ut->context = NULL;
+ ut->chunkContents = NULL;
+ ut->p = NULL;
+ ut->q = NULL;
+ ut->r = NULL;
+ ut->a = 0;
+ ut->b = 0;
+ ut->c = 0;
+ ut->chunkOffset = 0;
+ ut->chunkLength = 0;
+ ut->chunkNativeStart = 0;
+ ut->chunkNativeLimit = 0;
+ ut->nativeIndexingLimit = 0;
+ ut->providerProperties = 0;
+ ut->privA = 0;
+ ut->privB = 0;
+ ut->privC = 0;
+ ut->privP = NULL;
+ if (ut->pExtra!=NULL && ut->extraSize>0)
+ uprv_memset(ut->pExtra, 0, ut->extraSize);
+
+ }
+ return ut;
+}
+
+
+U_CAPI UText * U_EXPORT2
+utext_close(UText *ut) {
+ if (ut==NULL ||
+ ut->magic != UTEXT_MAGIC ||
+ (ut->flags & UTEXT_OPEN) == 0)
+ {
+ // The supplied ut is not an open UText.
+ // Do nothing.
+ return ut;
+ }
+
+ // If the provider gave us a close function, call it now.
+ // This will clean up anything allocated specifically by the provider.
+ if (ut->pFuncs->close != NULL) {
+ ut->pFuncs->close(ut);
+ }
+ ut->flags &= ~UTEXT_OPEN;
+
+ // If we (the framework) allocated the UText or subsidiary storage,
+ // delete it.
+ if (ut->flags & UTEXT_EXTRA_HEAP_ALLOCATED) {
+ uprv_free(ut->pExtra);
+ ut->pExtra = NULL;
+ ut->flags &= ~UTEXT_EXTRA_HEAP_ALLOCATED;
+ ut->extraSize = 0;
+ }
+
+ // Zero out function table of the closed UText. This is a defensive move,
+ // inteded to cause applications that inadvertantly use a closed
+ // utext to crash with null pointer errors.
+ ut->pFuncs = NULL;
+
+ if (ut->flags & UTEXT_HEAP_ALLOCATED) {
+ // This UText was allocated by UText setup. We need to free it.
+ // Clear magic, so we can detect if the user messes up and immediately
+ // tries to reopen another UText using the deleted storage.
+ ut->magic = 0;
+ uprv_free(ut);
+ ut = NULL;
+ }
+ return ut;
+}
+
+
+
+
+//
+// invalidateChunk Reset a chunk to have no contents, so that the next call
+// to access will cause new data to load.
+// This is needed when copy/move/replace operate directly on the
+// backing text, potentially putting it out of sync with the
+// contents in the chunk.
+//
+static void
+invalidateChunk(UText *ut) {
+ ut->chunkLength = 0;
+ ut->chunkNativeLimit = 0;
+ ut->chunkNativeStart = 0;
+ ut->chunkOffset = 0;
+ ut->nativeIndexingLimit = 0;
+}
+
+//
+// pinIndex Do range pinning on a native index parameter.
+// 64 bit pinning is done in place.
+// 32 bit truncated result is returned as a convenience for
+// use in providers that don't need 64 bits.
+static int32_t
+pinIndex(int64_t &index, int64_t limit) {
+ if (index<0) {
+ index = 0;
+ } else if (index > limit) {
+ index = limit;
+ }
+ return (int32_t)index;
+}
+
+
+U_CDECL_BEGIN
+
+//
+// Pointer relocation function,
+// a utility used by shallow clone.
+// Adjust a pointer that refers to something within one UText (the source)
+// to refer to the same relative offset within a another UText (the target)
+//
+static void adjustPointer(UText *dest, const void **destPtr, const UText *src) {
+ // convert all pointers to (char *) so that byte address arithmetic will work.
+ char *dptr = (char *)*destPtr;
+ char *dUText = (char *)dest;
+ char *sUText = (char *)src;
+
+ if (dptr >= (char *)src->pExtra && dptr < ((char*)src->pExtra)+src->extraSize) {
+ // target ptr was to something within the src UText's pExtra storage.
+ // relocate it into the target UText's pExtra region.
+ *destPtr = ((char *)dest->pExtra) + (dptr - (char *)src->pExtra);
+ } else if (dptr>=sUText && dptr < sUText+src->sizeOfStruct) {
+ // target ptr was pointing to somewhere within the source UText itself.
+ // Move it to the same offset within the target UText.
+ *destPtr = dUText + (dptr-sUText);
+ }
+}
+
+
+//
+// Clone. This is a generic copy-the-utext-by-value clone function that can be
+// used as-is with some utext types, and as a helper by other clones.
+//
+static UText * U_CALLCONV
+shallowTextClone(UText * dest, const UText * src, UErrorCode * status) {
+ if (U_FAILURE(*status)) {
+ return NULL;
+ }
+ int32_t srcExtraSize = src->extraSize;
+
+ //
+ // Use the generic text_setup to allocate storage if required.
+ //
+ dest = utext_setup(dest, srcExtraSize, status);
+ if (U_FAILURE(*status)) {
+ return dest;
+ }
+
+ //
+ // flags (how the UText was allocated) and the pointer to the
+ // extra storage must retain the values in the cloned utext that
+ // were set up by utext_setup. Save them separately before
+ // copying the whole struct.
+ //
+ void *destExtra = dest->pExtra;
+ int32_t flags = dest->flags;
+
+
+ //
+ // Copy the whole UText struct by value.
+ // Any "Extra" storage is copied also.
+ //
+ int sizeToCopy = src->sizeOfStruct;
+ if (sizeToCopy > dest->sizeOfStruct) {
+ sizeToCopy = dest->sizeOfStruct;
+ }
+ uprv_memcpy(dest, src, sizeToCopy);
+ dest->pExtra = destExtra;
+ dest->flags = flags;
+ if (srcExtraSize > 0) {
+ uprv_memcpy(dest->pExtra, src->pExtra, srcExtraSize);
+ }
+
+ //
+ // Relocate any pointers in the target that refer to the UText itself
+ // to point to the cloned copy rather than the original source.
+ //
+ adjustPointer(dest, &dest->context, src);
+ adjustPointer(dest, &dest->p, src);
+ adjustPointer(dest, &dest->q, src);
+ adjustPointer(dest, &dest->r, src);
+ adjustPointer(dest, (const void **)&dest->chunkContents, src);
+
+ // The newly shallow-cloned UText does _not_ own the underlying storage for the text.
+ // (The source for the clone may or may not have owned the text.)
+
+ dest->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);
+
+ return dest;
+}
+
+
+U_CDECL_END
+
+
+
+//------------------------------------------------------------------------------
+//
+// UText implementation for UTF-8 char * strings (read-only)
+// Limitation: string length must be <= 0x7fffffff in length.
+// (length must for in an int32_t variable)
+//
+// Use of UText data members:
+// context pointer to UTF-8 string
+// utext.b is the input string length (bytes).
+// utext.c Length scanned so far in string
+// (for optimizing finding length of zero terminated strings.)
+// utext.p pointer to the current buffer
+// utext.q pointer to the other buffer.
+//
+//------------------------------------------------------------------------------
+
+// Chunk size.
+// Must be less than 85 (256/3), because of byte mapping from UChar indexes to native indexes.
+// Worst case is three native bytes to one UChar. (Supplemenaries are 4 native bytes
+// to two UChars.)
+// The longest illegal byte sequence treated as a single error (and converted to U+FFFD)
+// is a three-byte sequence (truncated four-byte sequence).
+//
+enum { UTF8_TEXT_CHUNK_SIZE=32 };
+
+//
+// UTF8Buf Two of these structs will be set up in the UText's extra allocated space.
+// Each contains the UChar chunk buffer, the to and from native maps, and
+// header info.
+//
+// because backwards iteration fills the buffers starting at the end and
+// working towards the front, the filled part of the buffers may not begin
+// at the start of the available storage for the buffers.
+//
+// Buffer size is one bigger than the specified UTF8_TEXT_CHUNK_SIZE to allow for
+// the last character added being a supplementary, and thus requiring a surrogate
+// pair. Doing this is simpler than checking for the edge case.
+//
+
+struct UTF8Buf {
+ int32_t bufNativeStart; // Native index of first char in UChar buf
+ int32_t bufNativeLimit; // Native index following last char in buf.
+ int32_t bufStartIdx; // First filled position in buf.
+ int32_t bufLimitIdx; // Limit of filled range in buf.
+ int32_t bufNILimit; // Limit of native indexing part of buf
+ int32_t toUCharsMapStart; // Native index corresponding to
+ // mapToUChars[0].
+ // Set to bufNativeStart when filling forwards.
+ // Set to computed value when filling backwards.
+
+ UChar buf[UTF8_TEXT_CHUNK_SIZE+4]; // The UChar buffer. Requires one extra position beyond the
+ // the chunk size, to allow for surrogate at the end.
+ // Length must be identical to mapToNative array, below,
+ // because of the way indexing works when the array is
+ // filled backwards during a reverse iteration. Thus,
+ // the additional extra size.
+ uint8_t mapToNative[UTF8_TEXT_CHUNK_SIZE+4]; // map UChar index in buf to
+ // native offset from bufNativeStart.
+ // Requires two extra slots,
+ // one for a supplementary starting in the last normal position,
+ // and one for an entry for the buffer limit position.
+ uint8_t mapToUChars[UTF8_TEXT_CHUNK_SIZE*3+6]; // Map native offset from bufNativeStart to
+ // correspoding offset in filled part of buf.
+ int32_t align;
+};
+
+U_CDECL_BEGIN
+
+//
+// utf8TextLength
+//
+// Get the length of the string. If we don't already know it,
+// we'll need to scan for the trailing nul.
+//
+static int64_t U_CALLCONV
+utf8TextLength(UText *ut) {
+ if (ut->b < 0) {
+ // Zero terminated string, and we haven't scanned to the end yet.
+ // Scan it now.
+ const char *r = (const char *)ut->context + ut->c;
+ while (*r != 0) {
+ r++;
+ }
+ if ((r - (const char *)ut->context) < 0x7fffffff) {
+ ut->b = (int32_t)(r - (const char *)ut->context);
+ } else {
+ // Actual string was bigger (more than 2 gig) than we
+ // can handle. Clip it to 2 GB.
+ ut->b = 0x7fffffff;
+ }
+ ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
+ }
+ return ut->b;
+}
+
+
+
+
+
+
+static UBool U_CALLCONV
+utf8TextAccess(UText *ut, int64_t index, UBool forward) {
+ //
+ // Apologies to those who are allergic to goto statements.
+ // Consider each goto to a labelled block to be the equivalent of
+ // call the named block as if it were a function();
+ // return;
+ //
+ const uint8_t *s8=(const uint8_t *)ut->context;
+ UTF8Buf *u8b = NULL;
+ int32_t length = ut->b; // Length of original utf-8
+ int32_t ix= (int32_t)index; // Requested index, trimmed to 32 bits.
+ int32_t mapIndex = 0;
+ if (index<0) {
+ ix=0;
+ } else if (index > 0x7fffffff) {
+ // Strings with 64 bit lengths not supported by this UTF-8 provider.
+ ix = 0x7fffffff;
+ }
+
+ // Pin requested index to the string length.
+ if (ix>length) {
+ if (length>=0) {
+ ix=length;
+ } else if (ix>=ut->c) {
+ // Zero terminated string, and requested index is beyond
+ // the region that has already been scanned.
+ // Scan up to either the end of the string or to the
+ // requested position, whichever comes first.
+ while (ut->c<ix && s8[ut->c]!=0) {
+ ut->c++;
+ }
+ // TODO: support for null terminated string length > 32 bits.
+ if (s8[ut->c] == 0) {
+ // We just found the actual length of the string.
+ // Trim the requested index back to that.
+ ix = ut->c;
+ ut->b = ut->c;
+ length = ut->c;
+ ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
+ }
+ }
+ }
+
+ //
+ // Dispatch to the appropriate action for a forward iteration request.
+ //
+ if (forward) {
+ if (ix==ut->chunkNativeLimit) {
+ // Check for normal sequential iteration cases first.
+ if (ix==length) {
+ // Just reached end of string
+ // Don't swap buffers, but do set the
+ // current buffer position.
+ ut->chunkOffset = ut->chunkLength;
+ return FALSE;
+ } else {
+ // End of current buffer.
+ // check whether other buffer already has what we need.
+ UTF8Buf *altB = (UTF8Buf *)ut->q;
+ if (ix>=altB->bufNativeStart && ix<altB->bufNativeLimit) {
+ goto swapBuffers;
+ }
+ }
+ }
+
+ // A random access. Desired index could be in either or niether buf.
+ // For optimizing the order of testing, first check for the index
+ // being in the other buffer. This will be the case for uses that
+ // move back and forth over a fairly limited range
+ {
+ u8b = (UTF8Buf *)ut->q; // the alternate buffer
+ if (ix>=u8b->bufNativeStart && ix<u8b->bufNativeLimit) {
+ // Requested index is in the other buffer.
+ goto swapBuffers;
+ }
+ if (ix == length) {
+ // Requested index is end-of-string.
+ // (this is the case of randomly seeking to the end.
+ // The case of iterating off the end is handled earlier.)
+ if (ix == ut->chunkNativeLimit) {
+ // Current buffer extends up to the end of the string.
+ // Leave it as the current buffer.
+ ut->chunkOffset = ut->chunkLength;
+ return FALSE;
+ }
+ if (ix == u8b->bufNativeLimit) {
+ // Alternate buffer extends to the end of string.
+ // Swap it in as the current buffer.
+ goto swapBuffersAndFail;
+ }
+
+ // Neither existing buffer extends to the end of the string.
+ goto makeStubBuffer;
+ }
+
+ if (ix<ut->chunkNativeStart || ix>=ut->chunkNativeLimit) {
+ // Requested index is in neither buffer.
+ goto fillForward;
+ }
+
+ // Requested index is in this buffer.
+ u8b = (UTF8Buf *)ut->p; // the current buffer
+ mapIndex = ix - u8b->toUCharsMapStart;
+ U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars));
+ ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
+ return TRUE;
+
+ }
+ }
+
+
+ //
+ // Dispatch to the appropriate action for a
+ // Backwards Diretion iteration request.
+ //
+ if (ix==ut->chunkNativeStart) {
+ // Check for normal sequential iteration cases first.
+ if (ix==0) {
+ // Just reached the start of string
+ // Don't swap buffers, but do set the
+ // current buffer position.
+ ut->chunkOffset = 0;
+ return FALSE;
+ } else {
+ // Start of current buffer.
+ // check whether other buffer already has what we need.
+ UTF8Buf *altB = (UTF8Buf *)ut->q;
+ if (ix>altB->bufNativeStart && ix<=altB->bufNativeLimit) {
+ goto swapBuffers;
+ }
+ }
+ }
+
+ // A random access. Desired index could be in either or niether buf.
+ // For optimizing the order of testing,
+ // Most likely case: in the other buffer.
+ // Second most likely: in neither buffer.
+ // Unlikely, but must work: in the current buffer.
+ u8b = (UTF8Buf *)ut->q; // the alternate buffer
+ if (ix>u8b->bufNativeStart && ix<=u8b->bufNativeLimit) {
+ // Requested index is in the other buffer.
+ goto swapBuffers;
+ }
+ // Requested index is start-of-string.
+ // (this is the case of randomly seeking to the start.
+ // The case of iterating off the start is handled earlier.)
+ if (ix==0) {
+ if (u8b->bufNativeStart==0) {
+ // Alternate buffer contains the data for the start string.
+ // Make it be the current buffer.
+ goto swapBuffersAndFail;
+ } else {
+ // Request for data before the start of string,
+ // neither buffer is usable.
+ // set up a zero-length buffer.
+ goto makeStubBuffer;
+ }
+ }
+
+ if (ix<=ut->chunkNativeStart || ix>ut->chunkNativeLimit) {
+ // Requested index is in neither buffer.
+ goto fillReverse;
+ }
+
+ // Requested index is in this buffer.
+ // Set the utf16 buffer index.
+ u8b = (UTF8Buf *)ut->p;
+ mapIndex = ix - u8b->toUCharsMapStart;
+ ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
+ if (ut->chunkOffset==0) {
+ // This occurs when the first character in the text is
+ // a multi-byte UTF-8 char, and the requested index is to
+ // one of the trailing bytes. Because there is no preceding ,
+ // character, this access fails. We can't pick up on the
+ // situation sooner because the requested index is not zero.
+ return FALSE;
+ } else {
+ return TRUE;
+ }
+
+
+
+swapBuffers:
+ // The alternate buffer (ut->q) has the string data that was requested.
+ // Swap the primary and alternate buffers, and set the
+ // chunk index into the new primary buffer.
+ {
+ u8b = (UTF8Buf *)ut->q;
+ ut->q = ut->p;
+ ut->p = u8b;
+ ut->chunkContents = &u8b->buf[u8b->bufStartIdx];
+ ut->chunkLength = u8b->bufLimitIdx - u8b->bufStartIdx;
+ ut->chunkNativeStart = u8b->bufNativeStart;
+ ut->chunkNativeLimit = u8b->bufNativeLimit;
+ ut->nativeIndexingLimit = u8b->bufNILimit;
+
+ // Index into the (now current) chunk
+ // Use the map to set the chunk index. It's more trouble than it's worth
+ // to check whether native indexing can be used.
+ U_ASSERT(ix>=u8b->bufNativeStart);
+ U_ASSERT(ix<=u8b->bufNativeLimit);
+ mapIndex = ix - u8b->toUCharsMapStart;
+ U_ASSERT(mapIndex>=0);
+ U_ASSERT(mapIndex<(int32_t)sizeof(u8b->mapToUChars));
+ ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
+
+ return TRUE;
+ }
+
+
+ swapBuffersAndFail:
+ // We got a request for either the start or end of the string,
+ // with iteration continuing in the out-of-bounds direction.
+ // The alternate buffer already contains the data up to the
+ // start/end.
+ // Swap the buffers, then return failure, indicating that we couldn't
+ // make things correct for continuing the iteration in the requested
+ // direction. The position & buffer are correct should the
+ // user decide to iterate in the opposite direction.
+ u8b = (UTF8Buf *)ut->q;
+ ut->q = ut->p;
+ ut->p = u8b;
+ ut->chunkContents = &u8b->buf[u8b->bufStartIdx];
+ ut->chunkLength = u8b->bufLimitIdx - u8b->bufStartIdx;
+ ut->chunkNativeStart = u8b->bufNativeStart;
+ ut->chunkNativeLimit = u8b->bufNativeLimit;
+ ut->nativeIndexingLimit = u8b->bufNILimit;
+
+ // Index into the (now current) chunk
+ // For this function (swapBuffersAndFail), the requested index
+ // will always be at either the start or end of the chunk.
+ if (ix==u8b->bufNativeLimit) {
+ ut->chunkOffset = ut->chunkLength;
+ } else {
+ ut->chunkOffset = 0;
+ U_ASSERT(ix == u8b->bufNativeStart);
+ }
+ return FALSE;
+
+makeStubBuffer:
+ // The user has done a seek/access past the start or end
+ // of the string. Rather than loading data that is likely
+ // to never be used, just set up a zero-length buffer at
+ // the position.
+ u8b = (UTF8Buf *)ut->q;
+ u8b->bufNativeStart = ix;
+ u8b->bufNativeLimit = ix;
+ u8b->bufStartIdx = 0;
+ u8b->bufLimitIdx = 0;
+ u8b->bufNILimit = 0;
+ u8b->toUCharsMapStart = ix;
+ u8b->mapToNative[0] = 0;
+ u8b->mapToUChars[0] = 0;
+ goto swapBuffersAndFail;
+
+
+
+fillForward:
+ {
+ // Move the incoming index to a code point boundary.
+ U8_SET_CP_START(s8, 0, ix);
+
+ // Swap the UText buffers.
+ // We want to fill what was previously the alternate buffer,
+ // and make what was the current buffer be the new alternate.
+ UTF8Buf *u8b_swap = (UTF8Buf *)ut->q;
+ ut->q = ut->p;
+ ut->p = u8b_swap;
+
+ int32_t strLen = ut->b;
+ UBool nulTerminated = FALSE;
+ if (strLen < 0) {
+ strLen = 0x7fffffff;
+ nulTerminated = TRUE;
+ }
+
+ UChar *buf = u8b_swap->buf;
+ uint8_t *mapToNative = u8b_swap->mapToNative;
+ uint8_t *mapToUChars = u8b_swap->mapToUChars;
+ int32_t destIx = 0;
+ int32_t srcIx = ix;
+ UBool seenNonAscii = FALSE;
+ UChar32 c = 0;
+
+ // Fill the chunk buffer and mapping arrays.
+ while (destIx<UTF8_TEXT_CHUNK_SIZE) {
+ c = s8[srcIx];
+ if (c>0 && c<0x80) {
+ // Special case ASCII range for speed.
+ // zero is excluded to simplify bounds checking.
+ buf[destIx] = (UChar)c;
+ mapToNative[destIx] = (uint8_t)(srcIx - ix);
+ mapToUChars[srcIx-ix] = (uint8_t)destIx;
+ srcIx++;
+ destIx++;
+ } else {
+ // General case, handle everything.
+ if (seenNonAscii == FALSE) {
+ seenNonAscii = TRUE;
+ u8b_swap->bufNILimit = destIx;
+ }
+
+ int32_t cIx = srcIx;
+ int32_t dIx = destIx;
+ int32_t dIxSaved = destIx;
+ U8_NEXT_OR_FFFD(s8, srcIx, strLen, c);
+ if (c==0 && nulTerminated) {
+ srcIx--;
+ break;
+ }
+
+ U16_APPEND_UNSAFE(buf, destIx, c);
+ do {
+ mapToNative[dIx++] = (uint8_t)(cIx - ix);
+ } while (dIx < destIx);
+
+ do {
+ mapToUChars[cIx++ - ix] = (uint8_t)dIxSaved;
+ } while (cIx < srcIx);
+ }
+ if (srcIx>=strLen) {
+ break;
+ }
+
+ }
+
+ // store Native <--> Chunk Map entries for the end of the buffer.
+ // There is no actual character here, but the index position is valid.
+ mapToNative[destIx] = (uint8_t)(srcIx - ix);
+ mapToUChars[srcIx - ix] = (uint8_t)destIx;
+
+ // fill in Buffer descriptor
+ u8b_swap->bufNativeStart = ix;
+ u8b_swap->bufNativeLimit = srcIx;
+ u8b_swap->bufStartIdx = 0;
+ u8b_swap->bufLimitIdx = destIx;
+ if (seenNonAscii == FALSE) {
+ u8b_swap->bufNILimit = destIx;
+ }
+ u8b_swap->toUCharsMapStart = u8b_swap->bufNativeStart;
+
+ // Set UText chunk to refer to this buffer.
+ ut->chunkContents = buf;
+ ut->chunkOffset = 0;
+ ut->chunkLength = u8b_swap->bufLimitIdx;
+ ut->chunkNativeStart = u8b_swap->bufNativeStart;
+ ut->chunkNativeLimit = u8b_swap->bufNativeLimit;
+ ut->nativeIndexingLimit = u8b_swap->bufNILimit;
+
+ // For zero terminated strings, keep track of the maximum point
+ // scanned so far.
+ if (nulTerminated && srcIx>ut->c) {
+ ut->c = srcIx;
+ if (c==0) {
+ // We scanned to the end.
+ // Remember the actual length.
+ ut->b = srcIx;
+ ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
+ }
+ }
+ return TRUE;
+ }
+
+
+fillReverse:
+ {
+ // Move the incoming index to a code point boundary.
+ // Can only do this if the incoming index is somewhere in the interior of the string.
+ // If index is at the end, there is no character there to look at.
+ if (ix != ut->b) {
+ // Note: this function will only move the index back if it is on a trail byte
+ // and there is a preceding lead byte and the sequence from the lead
+ // through this trail could be part of a valid UTF-8 sequence
+ // Otherwise the index remains unchanged.
+ U8_SET_CP_START(s8, 0, ix);
+ }
+
+ // Swap the UText buffers.
+ // We want to fill what was previously the alternate buffer,
+ // and make what was the current buffer be the new alternate.
+ UTF8Buf *u8b_swap = (UTF8Buf *)ut->q;
+ ut->q = ut->p;
+ ut->p = u8b_swap;
+
+ UChar *buf = u8b_swap->buf;
+ uint8_t *mapToNative = u8b_swap->mapToNative;
+ uint8_t *mapToUChars = u8b_swap->mapToUChars;
+ int32_t toUCharsMapStart = ix - sizeof(UTF8Buf::mapToUChars) + 1;
+ // Note that toUCharsMapStart can be negative. Happens when the remaining
+ // text from current position to the beginning is less than the buffer size.
+ // + 1 because mapToUChars must have a slot at the end for the bufNativeLimit entry.
+ int32_t destIx = UTF8_TEXT_CHUNK_SIZE+2; // Start in the overflow region
+ // at end of buffer to leave room
+ // for a surrogate pair at the
+ // buffer start.
+ int32_t srcIx = ix;
+ int32_t bufNILimit = destIx;
+ UChar32 c;
+
+ // Map to/from Native Indexes, fill in for the position at the end of
+ // the buffer.
+ //
+ mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
+ mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;
+
+ // Fill the chunk buffer
+ // Work backwards, filling from the end of the buffer towards the front.
+ //
+ while (destIx>2 && (srcIx - toUCharsMapStart > 5) && (srcIx > 0)) {
+ srcIx--;
+ destIx--;
+
+ // Get last byte of the UTF-8 character
+ c = s8[srcIx];
+ if (c<0x80) {
+ // Special case ASCII range for speed.
+ buf[destIx] = (UChar)c;
+ U_ASSERT(toUCharsMapStart <= srcIx);
+ mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;
+ mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
+ } else {
+ // General case, handle everything non-ASCII.
+
+ int32_t sIx = srcIx; // ix of last byte of multi-byte u8 char
+
+ // Get the full character from the UTF8 string.
+ // use code derived from tbe macros in utf8.h
+ // Leaves srcIx pointing at the first byte of the UTF-8 char.
+ //
+ c=utf8_prevCharSafeBody(s8, 0, &srcIx, c, -3);
+ // leaves srcIx at first byte of the multi-byte char.
+
+ // Store the character in UTF-16 buffer.
+ if (c<0x10000) {
+ buf[destIx] = (UChar)c;
+ mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
+ } else {
+ buf[destIx] = U16_TRAIL(c);
+ mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
+ buf[--destIx] = U16_LEAD(c);
+ mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
+ }
+
+ // Fill in the map from native indexes to UChars buf index.
+ do {
+ mapToUChars[sIx-- - toUCharsMapStart] = (uint8_t)destIx;
+ } while (sIx >= srcIx);
+ U_ASSERT(toUCharsMapStart <= (srcIx+1));
+
+ // Set native indexing limit to be the current position.
+ // We are processing a non-ascii, non-native-indexing char now;
+ // the limit will be here if the rest of the chars to be
+ // added to this buffer are ascii.
+ bufNILimit = destIx;
+ }
+ }
+ u8b_swap->bufNativeStart = srcIx;
+ u8b_swap->bufNativeLimit = ix;
+ u8b_swap->bufStartIdx = destIx;
+ u8b_swap->bufLimitIdx = UTF8_TEXT_CHUNK_SIZE+2;
+ u8b_swap->bufNILimit = bufNILimit - u8b_swap->bufStartIdx;
+ u8b_swap->toUCharsMapStart = toUCharsMapStart;
+
+ ut->chunkContents = &buf[u8b_swap->bufStartIdx];
+ ut->chunkLength = u8b_swap->bufLimitIdx - u8b_swap->bufStartIdx;
+ ut->chunkOffset = ut->chunkLength;
+ ut->chunkNativeStart = u8b_swap->bufNativeStart;
+ ut->chunkNativeLimit = u8b_swap->bufNativeLimit;
+ ut->nativeIndexingLimit = u8b_swap->bufNILimit;
+ return TRUE;
+ }
+
+}
+
+
+
+//
+// This is a slightly modified copy of u_strFromUTF8,
+// Inserts a Replacement Char rather than failing on invalid UTF-8
+// Removes unnecessary features.
+//
+static UChar*
+utext_strFromUTF8(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const char* src,
+ int32_t srcLength, // required. NUL terminated not supported.
+ UErrorCode *pErrorCode
+ )
+{
+
+ UChar *pDest = dest;
+ UChar *pDestLimit = (dest!=NULL)?(dest+destCapacity):NULL;
+ UChar32 ch=0;
+ int32_t index = 0;
+ int32_t reqLength = 0;
+ uint8_t* pSrc = (uint8_t*) src;
+
+
+ while((index < srcLength)&&(pDest<pDestLimit)){
+ ch = pSrc[index++];
+ if(ch <=0x7f){
+ *pDest++=(UChar)ch;
+ }else{
+ ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -3);
+ if(U_IS_BMP(ch)){
+ *(pDest++)=(UChar)ch;
+ }else{
+ *(pDest++)=U16_LEAD(ch);
+ if(pDest<pDestLimit){
+ *(pDest++)=U16_TRAIL(ch);
+ }else{
+ reqLength++;
+ break;
+ }
+ }
+ }
+ }
+ /* donot fill the dest buffer just count the UChars needed */
+ while(index < srcLength){
+ ch = pSrc[index++];
+ if(ch <= 0x7f){
+ reqLength++;
+ }else{
+ ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -3);
+ reqLength+=U16_LENGTH(ch);
+ }
+ }
+
+ reqLength+=(int32_t)(pDest - dest);
+
+ if(pDestLength){
+ *pDestLength = reqLength;
+ }
+
+ /* Terminate the buffer */
+ u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
+
+ return dest;
+}
+
+
+
+static int32_t U_CALLCONV
+utf8TextExtract(UText *ut,
+ int64_t start, int64_t limit,
+ UChar *dest, int32_t destCapacity,
+ UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(destCapacity<0 || (dest==NULL && destCapacity>0)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ int32_t length = ut->b;
+ int32_t start32 = pinIndex(start, length);
+ int32_t limit32 = pinIndex(limit, length);
+
+ if(start32>limit32) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+
+ // adjust the incoming indexes to land on code point boundaries if needed.
+ // adjust by no more than three, because that is the largest number of trail bytes
+ // in a well formed UTF8 character.
+ const uint8_t *buf = (const uint8_t *)ut->context;
+ int i;
+ if (start32 < ut->chunkNativeLimit) {
+ for (i=0; i<3; i++) {
+ if (U8_IS_SINGLE(buf[start32]) || U8_IS_LEAD(buf[start32]) || start32==0) {
+ break;
+ }
+ start32--;
+ }
+ }
+
+ if (limit32 < ut->chunkNativeLimit) {
+ for (i=0; i<3; i++) {
+ if (U8_IS_SINGLE(buf[limit32]) || U8_IS_LEAD(buf[limit32]) || limit32==0) {
+ break;
+ }
+ limit32--;
+ }
+ }
+
+ // Do the actual extract.
+ int32_t destLength=0;
+ utext_strFromUTF8(dest, destCapacity, &destLength,
+ (const char *)ut->context+start32, limit32-start32,
+ pErrorCode);
+ utf8TextAccess(ut, limit32, TRUE);
+ return destLength;
+}
+
+//
+// utf8TextMapOffsetToNative
+//
+// Map a chunk (UTF-16) offset to a native index.
+static int64_t U_CALLCONV
+utf8TextMapOffsetToNative(const UText *ut) {
+ //
+ UTF8Buf *u8b = (UTF8Buf *)ut->p;
+ U_ASSERT(ut->chunkOffset>ut->nativeIndexingLimit && ut->chunkOffset<=ut->chunkLength);
+ int32_t nativeOffset = u8b->mapToNative[ut->chunkOffset + u8b->bufStartIdx] + u8b->toUCharsMapStart;
+ U_ASSERT(nativeOffset >= ut->chunkNativeStart && nativeOffset <= ut->chunkNativeLimit);
+ return nativeOffset;
+}
+
+//
+// Map a native index to the corrsponding chunk offset
+//
+static int32_t U_CALLCONV
+utf8TextMapIndexToUTF16(const UText *ut, int64_t index64) {
+ U_ASSERT(index64 <= 0x7fffffff);
+ int32_t index = (int32_t)index64;
+ UTF8Buf *u8b = (UTF8Buf *)ut->p;
+ U_ASSERT(index>=ut->chunkNativeStart+ut->nativeIndexingLimit);
+ U_ASSERT(index<=ut->chunkNativeLimit);
+ int32_t mapIndex = index - u8b->toUCharsMapStart;
+ U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars));
+ int32_t offset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
+ U_ASSERT(offset>=0 && offset<=ut->chunkLength);
+ return offset;
+}
+
+static UText * U_CALLCONV
+utf8TextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status)
+{
+ // First do a generic shallow clone. Does everything needed for the UText struct itself.
+ dest = shallowTextClone(dest, src, status);
+
+ // For deep clones, make a copy of the string.
+ // The copied storage is owned by the newly created clone.
+ //
+ // TODO: There is an isssue with using utext_nativeLength().
+ // That function is non-const in cases where the input was NUL terminated
+ // and the length has not yet been determined.
+ // This function (clone()) is const.
+ // There potentially a thread safety issue lurking here.
+ //
+ if (deep && U_SUCCESS(*status)) {
+ int32_t len = (int32_t)utext_nativeLength((UText *)src);
+ char *copyStr = (char *)uprv_malloc(len+1);
+ if (copyStr == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ uprv_memcpy(copyStr, src->context, len+1);
+ dest->context = copyStr;
+ dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);
+ }
+ }
+ return dest;
+}
+
+
+static void U_CALLCONV
+utf8TextClose(UText *ut) {
+ // Most of the work of close is done by the generic UText framework close.
+ // All that needs to be done here is to delete the UTF8 string if the UText
+ // owns it. This occurs if the UText was created by cloning.
+ if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {
+ char *s = (char *)ut->context;
+ uprv_free(s);
+ ut->context = NULL;
+ }
+}
+
+U_CDECL_END
+
+
+static const struct UTextFuncs utf8Funcs =
+{
+ sizeof(UTextFuncs),
+ 0, 0, 0, // Reserved alignment padding
+ utf8TextClone,
+ utf8TextLength,
+ utf8TextAccess,
+ utf8TextExtract,
+ NULL, /* replace*/
+ NULL, /* copy */
+ utf8TextMapOffsetToNative,
+ utf8TextMapIndexToUTF16,
+ utf8TextClose,
+ NULL, // spare 1
+ NULL, // spare 2
+ NULL // spare 3
+};
+
+
+static const char gEmptyString[] = {0};
+
+U_CAPI UText * U_EXPORT2
+utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status) {
+ if(U_FAILURE(*status)) {
+ return NULL;
+ }
+ if(s==NULL && length==0) {
+ s = gEmptyString;
+ }
+
+ if(s==NULL || length<-1 || length>INT32_MAX) {
+ *status=U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+
+ ut = utext_setup(ut, sizeof(UTF8Buf) * 2, status);
+ if (U_FAILURE(*status)) {
+ return ut;
+ }
+
+ ut->pFuncs = &utf8Funcs;
+ ut->context = s;
+ ut->b = (int32_t)length;
+ ut->c = (int32_t)length;
+ if (ut->c < 0) {
+ ut->c = 0;
+ ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
+ }
+ ut->p = ut->pExtra;
+ ut->q = (char *)ut->pExtra + sizeof(UTF8Buf);
+ return ut;
+
+}
+
+
+
+
+
+
+
+
+//------------------------------------------------------------------------------
+//
+// UText implementation wrapper for Replaceable (read/write)
+//
+// Use of UText data members:
+// context pointer to Replaceable.
+// p pointer to Replaceable if it is owned by the UText.
+//
+//------------------------------------------------------------------------------
+
+
+
+// minimum chunk size for this implementation: 3
+// to allow for possible trimming for code point boundaries
+enum { REP_TEXT_CHUNK_SIZE=10 };
+
+struct ReplExtra {
+ /*
+ * Chunk UChars.
+ * +1 to simplify filling with surrogate pair at the end.
+ */
+ UChar s[REP_TEXT_CHUNK_SIZE+1];
+};
+
+
+U_CDECL_BEGIN
+
+static UText * U_CALLCONV
+repTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status) {
+ // First do a generic shallow clone. Does everything needed for the UText struct itself.
+ dest = shallowTextClone(dest, src, status);
+
+ // For deep clones, make a copy of the Replaceable.
+ // The copied Replaceable storage is owned by the newly created UText clone.
+ // A non-NULL pointer in UText.p is the signal to the close() function to delete
+ // it.
+ //
+ if (deep && U_SUCCESS(*status)) {
+ const Replaceable *replSrc = (const Replaceable *)src->context;
+ dest->context = replSrc->clone();
+ dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);
+
+ // with deep clone, the copy is writable, even when the source is not.
+ dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE);
+ }
+ return dest;
+}
+
+
+static void U_CALLCONV
+repTextClose(UText *ut) {
+ // Most of the work of close is done by the generic UText framework close.
+ // All that needs to be done here is delete the Replaceable if the UText
+ // owns it. This occurs if the UText was created by cloning.
+ if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {
+ Replaceable *rep = (Replaceable *)ut->context;
+ delete rep;
+ ut->context = NULL;
+ }
+}
+
+
+static int64_t U_CALLCONV
+repTextLength(UText *ut) {
+ const Replaceable *replSrc = (const Replaceable *)ut->context;
+ int32_t len = replSrc->length();
+ return len;
+}
+
+
+static UBool U_CALLCONV
+repTextAccess(UText *ut, int64_t index, UBool forward) {
+ const Replaceable *rep=(const Replaceable *)ut->context;
+ int32_t length=rep->length(); // Full length of the input text (bigger than a chunk)
+
+ // clip the requested index to the limits of the text.
+ int32_t index32 = pinIndex(index, length);
+ U_ASSERT(index<=INT32_MAX);
+
+
+ /*
+ * Compute start/limit boundaries around index, for a segment of text
+ * to be extracted.
+ * To allow for the possibility that our user gave an index to the trailing
+ * half of a surrogate pair, we must request one extra preceding UChar when
+ * going in the forward direction. This will ensure that the buffer has the
+ * entire code point at the specified index.
+ */
+ if(forward) {
+
+ if (index32>=ut->chunkNativeStart && index32<ut->chunkNativeLimit) {
+ // Buffer already contains the requested position.
+ ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart);
+ return TRUE;
+ }
+ if (index32>=length && ut->chunkNativeLimit==length) {
+ // Request for end of string, and buffer already extends up to it.
+ // Can't get the data, but don't change the buffer.
+ ut->chunkOffset = length - (int32_t)ut->chunkNativeStart;
+ return FALSE;
+ }
+
+ ut->chunkNativeLimit = index + REP_TEXT_CHUNK_SIZE - 1;
+ // Going forward, so we want to have the buffer with stuff at and beyond
+ // the requested index. The -1 gets us one code point before the
+ // requested index also, to handle the case of the index being on
+ // a trail surrogate of a surrogate pair.
+ if(ut->chunkNativeLimit > length) {
+ ut->chunkNativeLimit = length;
+ }
+ // unless buffer ran off end, start is index-1.
+ ut->chunkNativeStart = ut->chunkNativeLimit - REP_TEXT_CHUNK_SIZE;
+ if(ut->chunkNativeStart < 0) {
+ ut->chunkNativeStart = 0;
+ }
+ } else {
+ // Reverse iteration. Fill buffer with data preceding the requested index.
+ if (index32>ut->chunkNativeStart && index32<=ut->chunkNativeLimit) {
+ // Requested position already in buffer.
+ ut->chunkOffset = index32 - (int32_t)ut->chunkNativeStart;
+ return TRUE;
+ }
+ if (index32==0 && ut->chunkNativeStart==0) {
+ // Request for start, buffer already begins at start.
+ // No data, but keep the buffer as is.
+ ut->chunkOffset = 0;
+ return FALSE;
+ }
+
+ // Figure out the bounds of the chunk to extract for reverse iteration.
+ // Need to worry about chunk not splitting surrogate pairs, and while still
+ // containing the data we need.
+ // Fix by requesting a chunk that includes an extra UChar at the end.
+ // If this turns out to be a lead surrogate, we can lop it off and still have
+ // the data we wanted.
+ ut->chunkNativeStart = index32 + 1 - REP_TEXT_CHUNK_SIZE;
+ if (ut->chunkNativeStart < 0) {
+ ut->chunkNativeStart = 0;
+ }
+
+ ut->chunkNativeLimit = index32 + 1;
+ if (ut->chunkNativeLimit > length) {
+ ut->chunkNativeLimit = length;
+ }
+ }
+
+ // Extract the new chunk of text from the Replaceable source.
+ ReplExtra *ex = (ReplExtra *)ut->pExtra;
+ // UnicodeString with its buffer a writable alias to the chunk buffer
+ UnicodeString buffer(ex->s, 0 /*buffer length*/, REP_TEXT_CHUNK_SIZE /*buffer capacity*/);
+ rep->extractBetween((int32_t)ut->chunkNativeStart, (int32_t)ut->chunkNativeLimit, buffer);
+
+ ut->chunkContents = ex->s;
+ ut->chunkLength = (int32_t)(ut->chunkNativeLimit - ut->chunkNativeStart);
+ ut->chunkOffset = (int32_t)(index32 - ut->chunkNativeStart);
+
+ // Surrogate pairs from the input text must not span chunk boundaries.
+ // If end of chunk could be the start of a surrogate, trim it off.
+ if (ut->chunkNativeLimit < length &&
+ U16_IS_LEAD(ex->s[ut->chunkLength-1])) {
+ ut->chunkLength--;
+ ut->chunkNativeLimit--;
+ if (ut->chunkOffset > ut->chunkLength) {
+ ut->chunkOffset = ut->chunkLength;
+ }
+ }
+
+ // if the first UChar in the chunk could be the trailing half of a surrogate pair,
+ // trim it off.
+ if(ut->chunkNativeStart>0 && U16_IS_TRAIL(ex->s[0])) {
+ ++(ut->chunkContents);
+ ++(ut->chunkNativeStart);
+ --(ut->chunkLength);
+ --(ut->chunkOffset);
+ }
+
+ // adjust the index/chunkOffset to a code point boundary
+ U16_SET_CP_START(ut->chunkContents, 0, ut->chunkOffset);
+
+ // Use fast indexing for get/setNativeIndex()
+ ut->nativeIndexingLimit = ut->chunkLength;
+
+ return TRUE;
+}
+
+
+
+static int32_t U_CALLCONV
+repTextExtract(UText *ut,
+ int64_t start, int64_t limit,
+ UChar *dest, int32_t destCapacity,
+ UErrorCode *status) {
+ const Replaceable *rep=(const Replaceable *)ut->context;
+ int32_t length=rep->length();
+
+ if(U_FAILURE(*status)) {
+ return 0;
+ }
+ if(destCapacity<0 || (dest==NULL && destCapacity>0)) {
+ *status=U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ if(start>limit) {
+ *status=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ int32_t start32 = pinIndex(start, length);
+ int32_t limit32 = pinIndex(limit, length);
+
+ // adjust start, limit if they point to trail half of surrogates
+ if (start32<length && U16_IS_TRAIL(rep->charAt(start32)) &&
+ U_IS_SUPPLEMENTARY(rep->char32At(start32))){
+ start32--;
+ }
+ if (limit32<length && U16_IS_TRAIL(rep->charAt(limit32)) &&
+ U_IS_SUPPLEMENTARY(rep->char32At(limit32))){
+ limit32--;
+ }
+
+ length=limit32-start32;
+ if(length>destCapacity) {
+ limit32 = start32 + destCapacity;
+ }
+ UnicodeString buffer(dest, 0, destCapacity); // writable alias
+ rep->extractBetween(start32, limit32, buffer);
+ repTextAccess(ut, limit32, TRUE);
+
+ return u_terminateUChars(dest, destCapacity, length, status);
+}
+
+static int32_t U_CALLCONV
+repTextReplace(UText *ut,
+ int64_t start, int64_t limit,
+ const UChar *src, int32_t length,
+ UErrorCode *status) {
+ Replaceable *rep=(Replaceable *)ut->context;
+ int32_t oldLength;
+
+ if(U_FAILURE(*status)) {
+ return 0;
+ }
+ if(src==NULL && length!=0) {
+ *status=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ oldLength=rep->length(); // will subtract from new length
+ if(start>limit ) {
+ *status=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ int32_t start32 = pinIndex(start, oldLength);
+ int32_t limit32 = pinIndex(limit, oldLength);
+
+ // Snap start & limit to code point boundaries.
+ if (start32<oldLength && U16_IS_TRAIL(rep->charAt(start32)) &&
+ start32>0 && U16_IS_LEAD(rep->charAt(start32-1)))
+ {
+ start32--;
+ }
+ if (limit32<oldLength && U16_IS_LEAD(rep->charAt(limit32-1)) &&
+ U16_IS_TRAIL(rep->charAt(limit32)))
+ {
+ limit32++;
+ }
+
+ // Do the actual replace operation using methods of the Replaceable class
+ UnicodeString replStr((UBool)(length<0), src, length); // read-only alias
+ rep->handleReplaceBetween(start32, limit32, replStr);
+ int32_t newLength = rep->length();
+ int32_t lengthDelta = newLength - oldLength;
+
+ // Is the UText chunk buffer OK?
+ if (ut->chunkNativeLimit > start32) {
+ // this replace operation may have impacted the current chunk.
+ // invalidate it, which will force a reload on the next access.
+ invalidateChunk(ut);
+ }
+
+ // set the iteration position to the end of the newly inserted replacement text.
+ int32_t newIndexPos = limit32 + lengthDelta;
+ repTextAccess(ut, newIndexPos, TRUE);
+
+ return lengthDelta;
+}
+
+
+static void U_CALLCONV
+repTextCopy(UText *ut,
+ int64_t start, int64_t limit,
+ int64_t destIndex,
+ UBool move,
+ UErrorCode *status)
+{
+ Replaceable *rep=(Replaceable *)ut->context;
+ int32_t length=rep->length();
+
+ if(U_FAILURE(*status)) {
+ return;
+ }
+ if (start>limit || (start<destIndex && destIndex<limit))
+ {
+ *status=U_INDEX_OUTOFBOUNDS_ERROR;
+ return;
+ }
+
+ int32_t start32 = pinIndex(start, length);
+ int32_t limit32 = pinIndex(limit, length);
+ int32_t destIndex32 = pinIndex(destIndex, length);
+
+ // TODO: snap input parameters to code point boundaries.
+
+ if(move) {
+ // move: copy to destIndex, then replace original with nothing
+ int32_t segLength=limit32-start32;
+ rep->copy(start32, limit32, destIndex32);
+ if(destIndex32<start32) {
+ start32+=segLength;
+ limit32+=segLength;
+ }
+ rep->handleReplaceBetween(start32, limit32, UnicodeString());
+ } else {
+ // copy
+ rep->copy(start32, limit32, destIndex32);
+ }
+
+ // If the change to the text touched the region in the chunk buffer,
+ // invalidate the buffer.
+ int32_t firstAffectedIndex = destIndex32;
+ if (move && start32<firstAffectedIndex) {
+ firstAffectedIndex = start32;
+ }
+ if (firstAffectedIndex < ut->chunkNativeLimit) {
+ // changes may have affected range covered by the chunk
+ invalidateChunk(ut);
+ }
+
+ // Put iteration position at the newly inserted (moved) block,
+ int32_t nativeIterIndex = destIndex32 + limit32 - start32;
+ if (move && destIndex32>start32) {
+ // moved a block of text towards the end of the string.
+ nativeIterIndex = destIndex32;
+ }
+
+ // Set position, reload chunk if needed.
+ repTextAccess(ut, nativeIterIndex, TRUE);
+}
+
+static const struct UTextFuncs repFuncs =
+{
+ sizeof(UTextFuncs),
+ 0, 0, 0, // Reserved alignment padding
+ repTextClone,
+ repTextLength,
+ repTextAccess,
+ repTextExtract,
+ repTextReplace,
+ repTextCopy,
+ NULL, // MapOffsetToNative,
+ NULL, // MapIndexToUTF16,
+ repTextClose,
+ NULL, // spare 1
+ NULL, // spare 2
+ NULL // spare 3
+};
+
+
+U_CAPI UText * U_EXPORT2
+utext_openReplaceable(UText *ut, Replaceable *rep, UErrorCode *status)
+{
+ if(U_FAILURE(*status)) {
+ return NULL;
+ }
+ if(rep==NULL) {
+ *status=U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+ ut = utext_setup(ut, sizeof(ReplExtra), status);
+ if(U_FAILURE(*status)) {
+ return ut;
+ }
+
+ ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_WRITABLE);
+ if(rep->hasMetaData()) {
+ ut->providerProperties |=I32_FLAG(UTEXT_PROVIDER_HAS_META_DATA);
+ }
+
+ ut->pFuncs = &repFuncs;
+ ut->context = rep;
+ return ut;
+}
+
+U_CDECL_END
+
+
+
+
+
+
+
+
+//------------------------------------------------------------------------------
+//
+// UText implementation for UnicodeString (read/write) and
+// for const UnicodeString (read only)
+// (same implementation, only the flags are different)
+//
+// Use of UText data members:
+// context pointer to UnicodeString
+// p pointer to UnicodeString IF this UText owns the string
+// and it must be deleted on close(). NULL otherwise.
+//
+//------------------------------------------------------------------------------
+
+U_CDECL_BEGIN
+
+
+static UText * U_CALLCONV
+unistrTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status) {
+ // First do a generic shallow clone. Does everything needed for the UText struct itself.
+ dest = shallowTextClone(dest, src, status);
+
+ // For deep clones, make a copy of the UnicodeSring.
+ // The copied UnicodeString storage is owned by the newly created UText clone.
+ // A non-NULL pointer in UText.p is the signal to the close() function to delete
+ // the UText.
+ //
+ if (deep && U_SUCCESS(*status)) {
+ const UnicodeString *srcString = (const UnicodeString *)src->context;
+ dest->context = new UnicodeString(*srcString);
+ dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);
+
+ // with deep clone, the copy is writable, even when the source is not.
+ dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE);
+ }
+ return dest;
+}
+
+static void U_CALLCONV
+unistrTextClose(UText *ut) {
+ // Most of the work of close is done by the generic UText framework close.
+ // All that needs to be done here is delete the UnicodeString if the UText
+ // owns it. This occurs if the UText was created by cloning.
+ if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {
+ UnicodeString *str = (UnicodeString *)ut->context;
+ delete str;
+ ut->context = NULL;
+ }
+}
+
+
+static int64_t U_CALLCONV
+unistrTextLength(UText *t) {
+ return ((const UnicodeString *)t->context)->length();
+}
+
+
+static UBool U_CALLCONV
+unistrTextAccess(UText *ut, int64_t index, UBool forward) {
+ int32_t length = ut->chunkLength;
+ ut->chunkOffset = pinIndex(index, length);
+
+ // Check whether request is at the start or end
+ UBool retVal = (forward && index<length) || (!forward && index>0);
+ return retVal;
+}
+
+
+
+static int32_t U_CALLCONV
+unistrTextExtract(UText *t,
+ int64_t start, int64_t limit,
+ UChar *dest, int32_t destCapacity,
+ UErrorCode *pErrorCode) {
+ const UnicodeString *us=(const UnicodeString *)t->context;
+ int32_t length=us->length();
+
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(destCapacity<0 || (dest==NULL && destCapacity>0)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ if(start<0 || start>limit) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ int32_t start32 = start<length ? us->getChar32Start((int32_t)start) : length;
+ int32_t limit32 = limit<length ? us->getChar32Start((int32_t)limit) : length;
+
+ length=limit32-start32;
+ if (destCapacity>0 && dest!=NULL) {
+ int32_t trimmedLength = length;
+ if(trimmedLength>destCapacity) {
+ trimmedLength=destCapacity;
+ }
+ us->extract(start32, trimmedLength, dest);
+ t->chunkOffset = start32+trimmedLength;
+ } else {
+ t->chunkOffset = start32;
+ }
+ u_terminateUChars(dest, destCapacity, length, pErrorCode);
+ return length;
+}
+
+static int32_t U_CALLCONV
+unistrTextReplace(UText *ut,
+ int64_t start, int64_t limit,
+ const UChar *src, int32_t length,
+ UErrorCode *pErrorCode) {
+ UnicodeString *us=(UnicodeString *)ut->context;
+ int32_t oldLength;
+
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(src==NULL && length!=0) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ if(start>limit) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ oldLength=us->length();
+ int32_t start32 = pinIndex(start, oldLength);
+ int32_t limit32 = pinIndex(limit, oldLength);
+ if (start32 < oldLength) {
+ start32 = us->getChar32Start(start32);
+ }
+ if (limit32 < oldLength) {
+ limit32 = us->getChar32Start(limit32);
+ }
+
+ // replace
+ us->replace(start32, limit32-start32, src, length);
+ int32_t newLength = us->length();
+
+ // Update the chunk description.
+ ut->chunkContents = us->getBuffer();
+ ut->chunkLength = newLength;
+ ut->chunkNativeLimit = newLength;
+ ut->nativeIndexingLimit = newLength;
+
+ // Set iteration position to the point just following the newly inserted text.
+ int32_t lengthDelta = newLength - oldLength;
+ ut->chunkOffset = limit32 + lengthDelta;
+
+ return lengthDelta;
+}
+
+static void U_CALLCONV
+unistrTextCopy(UText *ut,
+ int64_t start, int64_t limit,
+ int64_t destIndex,
+ UBool move,
+ UErrorCode *pErrorCode) {
+ UnicodeString *us=(UnicodeString *)ut->context;
+ int32_t length=us->length();
+
+ if(U_FAILURE(*pErrorCode)) {
+ return;
+ }
+ int32_t start32 = pinIndex(start, length);
+ int32_t limit32 = pinIndex(limit, length);
+ int32_t destIndex32 = pinIndex(destIndex, length);
+
+ if( start32>limit32 || (start32<destIndex32 && destIndex32<limit32)) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return;
+ }
+
+ if(move) {
+ // move: copy to destIndex, then remove original
+ int32_t segLength=limit32-start32;
+ us->copy(start32, limit32, destIndex32);
+ if(destIndex32<start32) {
+ start32+=segLength;
+ }
+ us->remove(start32, segLength);
+ } else {
+ // copy
+ us->copy(start32, limit32, destIndex32);
+ }
+
+ // update chunk description, set iteration position.
+ ut->chunkContents = us->getBuffer();
+ if (move==FALSE) {
+ // copy operation, string length grows
+ ut->chunkLength += limit32-start32;
+ ut->chunkNativeLimit = ut->chunkLength;
+ ut->nativeIndexingLimit = ut->chunkLength;
+ }
+
+ // Iteration position to end of the newly inserted text.
+ ut->chunkOffset = destIndex32+limit32-start32;
+ if (move && destIndex32>start32) {
+ ut->chunkOffset = destIndex32;
+ }
+
+}
+
+static const struct UTextFuncs unistrFuncs =
+{
+ sizeof(UTextFuncs),
+ 0, 0, 0, // Reserved alignment padding
+ unistrTextClone,
+ unistrTextLength,
+ unistrTextAccess,
+ unistrTextExtract,
+ unistrTextReplace,
+ unistrTextCopy,
+ NULL, // MapOffsetToNative,
+ NULL, // MapIndexToUTF16,
+ unistrTextClose,
+ NULL, // spare 1
+ NULL, // spare 2
+ NULL // spare 3
+};
+
+
+
+U_CDECL_END
+
+
+U_CAPI UText * U_EXPORT2
+utext_openUnicodeString(UText *ut, UnicodeString *s, UErrorCode *status) {
+ ut = utext_openConstUnicodeString(ut, s, status);
+ if (U_SUCCESS(*status)) {
+ ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE);
+ }
+ return ut;
+}
+
+
+
+U_CAPI UText * U_EXPORT2
+utext_openConstUnicodeString(UText *ut, const UnicodeString *s, UErrorCode *status) {
+ if (U_SUCCESS(*status) && s->isBogus()) {
+ // The UnicodeString is bogus, but we still need to detach the UText
+ // from whatever it was hooked to before, if anything.
+ utext_openUChars(ut, NULL, 0, status);
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return ut;
+ }
+ ut = utext_setup(ut, 0, status);
+ // note: use the standard (writable) function table for UnicodeString.
+ // The flag settings disable writing, so having the functions in
+ // the table is harmless.
+ if (U_SUCCESS(*status)) {
+ ut->pFuncs = &unistrFuncs;
+ ut->context = s;
+ ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS);
+ ut->chunkContents = s->getBuffer();
+ ut->chunkLength = s->length();
+ ut->chunkNativeStart = 0;
+ ut->chunkNativeLimit = ut->chunkLength;
+ ut->nativeIndexingLimit = ut->chunkLength;
+ }
+ return ut;
+}
+
+//------------------------------------------------------------------------------
+//
+// UText implementation for const UChar * strings
+//
+// Use of UText data members:
+// context pointer to UnicodeString
+// a length. -1 if not yet known.
+//
+// TODO: support 64 bit lengths.
+//
+//------------------------------------------------------------------------------
+
+U_CDECL_BEGIN
+
+
+static UText * U_CALLCONV
+ucstrTextClone(UText *dest, const UText * src, UBool deep, UErrorCode * status) {
+ // First do a generic shallow clone.
+ dest = shallowTextClone(dest, src, status);
+
+ // For deep clones, make a copy of the string.
+ // The copied storage is owned by the newly created clone.
+ // A non-NULL pointer in UText.p is the signal to the close() function to delete
+ // it.
+ //
+ if (deep && U_SUCCESS(*status)) {
+ U_ASSERT(utext_nativeLength(dest) < INT32_MAX);
+ int32_t len = (int32_t)utext_nativeLength(dest);
+
+ // The cloned string IS going to be NUL terminated, whether or not the original was.
+ const UChar *srcStr = (const UChar *)src->context;
+ UChar *copyStr = (UChar *)uprv_malloc((len+1) * sizeof(UChar));
+ if (copyStr == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ int64_t i;
+ for (i=0; i<len; i++) {
+ copyStr[i] = srcStr[i];
+ }
+ copyStr[len] = 0;
+ dest->context = copyStr;
+ dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);
+ }
+ }
+ return dest;
+}
+
+
+static void U_CALLCONV
+ucstrTextClose(UText *ut) {
+ // Most of the work of close is done by the generic UText framework close.
+ // All that needs to be done here is delete the string if the UText
+ // owns it. This occurs if the UText was created by cloning.
+ if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {
+ UChar *s = (UChar *)ut->context;
+ uprv_free(s);
+ ut->context = NULL;
+ }
+}
+
+
+
+static int64_t U_CALLCONV
+ucstrTextLength(UText *ut) {
+ if (ut->a < 0) {
+ // null terminated, we don't yet know the length. Scan for it.
+ // Access is not convenient for doing this
+ // because the current interation postion can't be changed.
+ const UChar *str = (const UChar *)ut->context;
+ for (;;) {
+ if (str[ut->chunkNativeLimit] == 0) {
+ break;
+ }
+ ut->chunkNativeLimit++;
+ }
+ ut->a = ut->chunkNativeLimit;
+ ut->chunkLength = (int32_t)ut->chunkNativeLimit;
+ ut->nativeIndexingLimit = ut->chunkLength;
+ ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
+ }
+ return ut->a;
+}
+
+
+static UBool U_CALLCONV
+ucstrTextAccess(UText *ut, int64_t index, UBool forward) {
+ const UChar *str = (const UChar *)ut->context;
+
+ // pin the requested index to the bounds of the string,
+ // and set current iteration position.
+ if (index<0) {
+ index = 0;
+ } else if (index < ut->chunkNativeLimit) {
+ // The request data is within the chunk as it is known so far.
+ // Put index on a code point boundary.
+ U16_SET_CP_START(str, 0, index);
+ } else if (ut->a >= 0) {
+ // We know the length of this string, and the user is requesting something
+ // at or beyond the length. Pin the requested index to the length.
+ index = ut->a;
+ } else {
+ // Null terminated string, length not yet known, and the requested index
+ // is beyond where we have scanned so far.
+ // Scan to 32 UChars beyond the requested index. The strategy here is
+ // to avoid fully scanning a long string when the caller only wants to
+ // see a few characters at its beginning.
+ int32_t scanLimit = (int32_t)index + 32;
+ if ((index + 32)>INT32_MAX || (index + 32)<0 ) { // note: int64 expression
+ scanLimit = INT32_MAX;
+ }
+
+ int32_t chunkLimit = (int32_t)ut->chunkNativeLimit;
+ for (; chunkLimit<scanLimit; chunkLimit++) {
+ if (str[chunkLimit] == 0) {
+ // We found the end of the string. Remember it, pin the requested index to it,
+ // and bail out of here.
+ ut->a = chunkLimit;
+ ut->chunkLength = chunkLimit;
+ ut->nativeIndexingLimit = chunkLimit;
+ if (index >= chunkLimit) {
+ index = chunkLimit;
+ } else {
+ U16_SET_CP_START(str, 0, index);
+ }
+
+ ut->chunkNativeLimit = chunkLimit;
+ ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
+ goto breakout;
+ }
+ }
+ // We scanned through the next batch of UChars without finding the end.
+ U16_SET_CP_START(str, 0, index);
+ if (chunkLimit == INT32_MAX) {
+ // Scanned to the limit of a 32 bit length.
+ // Forceably trim the overlength string back so length fits in int32
+ // TODO: add support for 64 bit strings.
+ ut->a = chunkLimit;
+ ut->chunkLength = chunkLimit;
+ ut->nativeIndexingLimit = chunkLimit;
+ if (index > chunkLimit) {
+ index = chunkLimit;
+ }
+ ut->chunkNativeLimit = chunkLimit;
+ ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
+ } else {
+ // The endpoint of a chunk must not be left in the middle of a surrogate pair.
+ // If the current end is on a lead surrogate, back the end up by one.
+ // It doesn't matter if the end char happens to be an unpaired surrogate,
+ // and it's simpler not to worry about it.
+ if (U16_IS_LEAD(str[chunkLimit-1])) {
+ --chunkLimit;
+ }
+ // Null-terminated chunk with end still unknown.
+ // Update the chunk length to reflect what has been scanned thus far.
+ // That the full length is still unknown is (still) flagged by
+ // ut->a being < 0.
+ ut->chunkNativeLimit = chunkLimit;
+ ut->nativeIndexingLimit = chunkLimit;
+ ut->chunkLength = chunkLimit;
+ }
+
+ }
+breakout:
+ U_ASSERT(index<=INT32_MAX);
+ ut->chunkOffset = (int32_t)index;
+
+ // Check whether request is at the start or end
+ UBool retVal = (forward && index<ut->chunkNativeLimit) || (!forward && index>0);
+ return retVal;
+}
+
+
+
+static int32_t U_CALLCONV
+ucstrTextExtract(UText *ut,
+ int64_t start, int64_t limit,
+ UChar *dest, int32_t destCapacity,
+ UErrorCode *pErrorCode)
+{
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(destCapacity<0 || (dest==NULL && destCapacity>0) || start>limit) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ //const UChar *s=(const UChar *)ut->context;
+ int32_t si, di;
+
+ int32_t start32;
+ int32_t limit32;
+
+ // Access the start. Does two things we need:
+ // Pins 'start' to the length of the string, if it came in out-of-bounds.
+ // Snaps 'start' to the beginning of a code point.
+ ucstrTextAccess(ut, start, TRUE);
+ const UChar *s=ut->chunkContents;
+ start32 = ut->chunkOffset;
+
+ int32_t strLength=(int32_t)ut->a;
+ if (strLength >= 0) {
+ limit32 = pinIndex(limit, strLength);
+ } else {
+ limit32 = pinIndex(limit, INT32_MAX);
+ }
+ di = 0;
+ for (si=start32; si<limit32; si++) {
+ if (strLength<0 && s[si]==0) {
+ // Just hit the end of a null-terminated string.
+ ut->a = si; // set string length for this UText
+ ut->chunkNativeLimit = si;
+ ut->chunkLength = si;
+ ut->nativeIndexingLimit = si;
+ strLength = si;
+ limit32 = si;
+ break;
+ }
+ U_ASSERT(di>=0); /* to ensure di never exceeds INT32_MAX, which must not happen logically */
+ if (di<destCapacity) {
+ // only store if there is space.
+ dest[di] = s[si];
+ } else {
+ if (strLength>=0) {
+ // We have filled the destination buffer, and the string length is known.
+ // Cut the loop short. There is no need to scan string termination.
+ di = limit32 - start32;
+ si = limit32;
+ break;
+ }
+ }
+ di++;
+ }
+
+ // If the limit index points to a lead surrogate of a pair,
+ // add the corresponding trail surrogate to the destination.
+ if (si>0 && U16_IS_LEAD(s[si-1]) &&
+ ((si<strLength || strLength<0) && U16_IS_TRAIL(s[si])))
+ {
+ if (di<destCapacity) {
+ // store only if there is space in the output buffer.
+ dest[di++] = s[si];
+ }
+ si++;
+ }
+
+ // Put iteration position at the point just following the extracted text
+ if (si <= ut->chunkNativeLimit) {
+ ut->chunkOffset = si;
+ } else {
+ ucstrTextAccess(ut, si, TRUE);
+ }
+
+ // Add a terminating NUL if space in the buffer permits,
+ // and set the error status as required.
+ u_terminateUChars(dest, destCapacity, di, pErrorCode);
+ return di;
+}
+
+static const struct UTextFuncs ucstrFuncs =
+{
+ sizeof(UTextFuncs),
+ 0, 0, 0, // Reserved alignment padding
+ ucstrTextClone,
+ ucstrTextLength,
+ ucstrTextAccess,
+ ucstrTextExtract,
+ NULL, // Replace
+ NULL, // Copy
+ NULL, // MapOffsetToNative,
+ NULL, // MapIndexToUTF16,
+ ucstrTextClose,
+ NULL, // spare 1
+ NULL, // spare 2
+ NULL, // spare 3
+};
+
+U_CDECL_END
+
+static const UChar gEmptyUString[] = {0};
+
+U_CAPI UText * U_EXPORT2
+utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status) {
+ if (U_FAILURE(*status)) {
+ return NULL;
+ }
+ if(s==NULL && length==0) {
+ s = gEmptyUString;
+ }
+ if (s==NULL || length < -1 || length>INT32_MAX) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+ ut = utext_setup(ut, 0, status);
+ if (U_SUCCESS(*status)) {
+ ut->pFuncs = &ucstrFuncs;
+ ut->context = s;
+ ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS);
+ if (length==-1) {
+ ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
+ }
+ ut->a = length;
+ ut->chunkContents = s;
+ ut->chunkNativeStart = 0;
+ ut->chunkNativeLimit = length>=0? length : 0;
+ ut->chunkLength = (int32_t)ut->chunkNativeLimit;
+ ut->chunkOffset = 0;
+ ut->nativeIndexingLimit = ut->chunkLength;
+ }
+ return ut;
+}
+
+
+//------------------------------------------------------------------------------
+//
+// UText implementation for text from ICU CharacterIterators
+//
+// Use of UText data members:
+// context pointer to the CharacterIterator
+// a length of the full text.
+// p pointer to buffer 1
+// b start index of local buffer 1 contents
+// q pointer to buffer 2
+// c start index of local buffer 2 contents
+// r pointer to the character iterator if the UText owns it.
+// Null otherwise.
+//
+//------------------------------------------------------------------------------
+#define CIBufSize 16
+
+U_CDECL_BEGIN
+static void U_CALLCONV
+charIterTextClose(UText *ut) {
+ // Most of the work of close is done by the generic UText framework close.
+ // All that needs to be done here is delete the CharacterIterator if the UText
+ // owns it. This occurs if the UText was created by cloning.
+ CharacterIterator *ci = (CharacterIterator *)ut->r;
+ delete ci;
+ ut->r = NULL;
+}
+
+static int64_t U_CALLCONV
+charIterTextLength(UText *ut) {
+ return (int32_t)ut->a;
+}
+
+static UBool U_CALLCONV
+charIterTextAccess(UText *ut, int64_t index, UBool forward) {
+ CharacterIterator *ci = (CharacterIterator *)ut->context;
+
+ int32_t clippedIndex = (int32_t)index;
+ if (clippedIndex<0) {
+ clippedIndex=0;
+ } else if (clippedIndex>=ut->a) {
+ clippedIndex=(int32_t)ut->a;
+ }
+ int32_t neededIndex = clippedIndex;
+ if (!forward && neededIndex>0) {
+ // reverse iteration, want the position just before what was asked for.
+ neededIndex--;
+ } else if (forward && neededIndex==ut->a && neededIndex>0) {
+ // Forward iteration, don't ask for something past the end of the text.
+ neededIndex--;
+ }
+
+ // Find the native index of the start of the buffer containing what we want.
+ neededIndex -= neededIndex % CIBufSize;
+
+ UChar *buf = NULL;
+ UBool needChunkSetup = TRUE;
+ int i;
+ if (ut->chunkNativeStart == neededIndex) {
+ // The buffer we want is already the current chunk.
+ needChunkSetup = FALSE;
+ } else if (ut->b == neededIndex) {
+ // The first buffer (buffer p) has what we need.
+ buf = (UChar *)ut->p;
+ } else if (ut->c == neededIndex) {
+ // The second buffer (buffer q) has what we need.
+ buf = (UChar *)ut->q;
+ } else {
+ // Neither buffer already has what we need.
+ // Load new data from the character iterator.
+ // Use the buf that is not the current buffer.
+ buf = (UChar *)ut->p;
+ if (ut->p == ut->chunkContents) {
+ buf = (UChar *)ut->q;
+ }
+ ci->setIndex(neededIndex);
+ for (i=0; i<CIBufSize; i++) {
+ buf[i] = ci->nextPostInc();
+ if (i+neededIndex > ut->a) {
+ break;
+ }
+ }
+ }
+
+ // We have a buffer with the data we need.
+ // Set it up as the current chunk, if it wasn't already.
+ if (needChunkSetup) {
+ ut->chunkContents = buf;
+ ut->chunkLength = CIBufSize;
+ ut->chunkNativeStart = neededIndex;
+ ut->chunkNativeLimit = neededIndex + CIBufSize;
+ if (ut->chunkNativeLimit > ut->a) {
+ ut->chunkNativeLimit = ut->a;
+ ut->chunkLength = (int32_t)(ut->chunkNativeLimit)-(int32_t)(ut->chunkNativeStart);
+ }
+ ut->nativeIndexingLimit = ut->chunkLength;
+ U_ASSERT(ut->chunkOffset>=0 && ut->chunkOffset<=CIBufSize);
+ }
+ ut->chunkOffset = clippedIndex - (int32_t)ut->chunkNativeStart;
+ UBool success = (forward? ut->chunkOffset<ut->chunkLength : ut->chunkOffset>0);
+ return success;
+}
+
+static UText * U_CALLCONV
+charIterTextClone(UText *dest, const UText *src, UBool deep, UErrorCode * status) {
+ if (U_FAILURE(*status)) {
+ return NULL;
+ }
+
+ if (deep) {
+ // There is no CharacterIterator API for cloning the underlying text storage.
+ *status = U_UNSUPPORTED_ERROR;
+ return NULL;
+ } else {
+ CharacterIterator *srcCI =(CharacterIterator *)src->context;
+ srcCI = srcCI->clone();
+ dest = utext_openCharacterIterator(dest, srcCI, status);
+ if (U_FAILURE(*status)) {
+ return dest;
+ }
+ // cast off const on getNativeIndex.
+ // For CharacterIterator based UTexts, this is safe, the operation is const.
+ int64_t ix = utext_getNativeIndex((UText *)src);
+ utext_setNativeIndex(dest, ix);
+ dest->r = srcCI; // flags that this UText owns the CharacterIterator
+ }
+ return dest;
+}
+
+static int32_t U_CALLCONV
+charIterTextExtract(UText *ut,
+ int64_t start, int64_t limit,
+ UChar *dest, int32_t destCapacity,
+ UErrorCode *status)
+{
+ if(U_FAILURE(*status)) {
+ return 0;
+ }
+ if(destCapacity<0 || (dest==NULL && destCapacity>0) || start>limit) {
+ *status=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ int32_t length = (int32_t)ut->a;
+ int32_t start32 = pinIndex(start, length);
+ int32_t limit32 = pinIndex(limit, length);
+ int32_t desti = 0;
+ int32_t srci;
+ int32_t copyLimit;
+
+ CharacterIterator *ci = (CharacterIterator *)ut->context;
+ ci->setIndex32(start32); // Moves ix to lead of surrogate pair, if needed.
+ srci = ci->getIndex();
+ copyLimit = srci;
+ while (srci<limit32) {
+ UChar32 c = ci->next32PostInc();
+ int32_t len = U16_LENGTH(c);
+ U_ASSERT(desti+len>0); /* to ensure desti+len never exceeds MAX_INT32, which must not happen logically */
+ if (desti+len <= destCapacity) {
+ U16_APPEND_UNSAFE(dest, desti, c);
+ copyLimit = srci+len;
+ } else {
+ desti += len;
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ }
+ srci += len;
+ }
+
+ charIterTextAccess(ut, copyLimit, TRUE);
+
+ u_terminateUChars(dest, destCapacity, desti, status);
+ return desti;
+}
+
+static const struct UTextFuncs charIterFuncs =
+{
+ sizeof(UTextFuncs),
+ 0, 0, 0, // Reserved alignment padding
+ charIterTextClone,
+ charIterTextLength,
+ charIterTextAccess,
+ charIterTextExtract,
+ NULL, // Replace
+ NULL, // Copy
+ NULL, // MapOffsetToNative,
+ NULL, // MapIndexToUTF16,
+ charIterTextClose,
+ NULL, // spare 1
+ NULL, // spare 2
+ NULL // spare 3
+};
+U_CDECL_END
+
+
+U_CAPI UText * U_EXPORT2
+utext_openCharacterIterator(UText *ut, CharacterIterator *ci, UErrorCode *status) {
+ if (U_FAILURE(*status)) {
+ return NULL;
+ }
+
+ if (ci->startIndex() > 0) {
+ // No support for CharacterIterators that do not start indexing from zero.
+ *status = U_UNSUPPORTED_ERROR;
+ return NULL;
+ }
+
+ // Extra space in UText for 2 buffers of CIBufSize UChars each.
+ int32_t extraSpace = 2 * CIBufSize * sizeof(UChar);
+ ut = utext_setup(ut, extraSpace, status);
+ if (U_SUCCESS(*status)) {
+ ut->pFuncs = &charIterFuncs;
+ ut->context = ci;
+ ut->providerProperties = 0;
+ ut->a = ci->endIndex(); // Length of text
+ ut->p = ut->pExtra; // First buffer
+ ut->b = -1; // Native index of first buffer contents
+ ut->q = (UChar*)ut->pExtra+CIBufSize; // Second buffer
+ ut->c = -1; // Native index of second buffer contents
+
+ // Initialize current chunk contents to be empty.
+ // First access will fault something in.
+ // Note: The initial nativeStart and chunkOffset must sum to zero
+ // so that getNativeIndex() will correctly compute to zero
+ // if no call to Access() has ever been made. They can't be both
+ // zero without Access() thinking that the chunk is valid.
+ ut->chunkContents = (UChar *)ut->p;
+ ut->chunkNativeStart = -1;
+ ut->chunkOffset = 1;
+ ut->chunkNativeLimit = 0;
+ ut->chunkLength = 0;
+ ut->nativeIndexingLimit = ut->chunkOffset; // enables native indexing
+ }
+ return ut;
+}
diff --git a/thirdparty/icu4c/common/utf_impl.cpp b/thirdparty/icu4c/common/utf_impl.cpp
new file mode 100644
index 0000000000..9dd241a12b
--- /dev/null
+++ b/thirdparty/icu4c/common/utf_impl.cpp
@@ -0,0 +1,329 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: utf_impl.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999sep13
+* created by: Markus W. Scherer
+*
+* This file provides implementation functions for macros in the utfXX.h
+* that would otherwise be too long as macros.
+*/
+
+/* set import/export definitions */
+#ifndef U_UTF8_IMPL
+# define U_UTF8_IMPL
+#endif
+
+#include "unicode/utypes.h"
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "uassert.h"
+
+/*
+ * Table of the number of utf8 trail bytes, indexed by the lead byte.
+ * Used by the deprecated macro UTF8_COUNT_TRAIL_BYTES, defined in utf_old.h
+ *
+ * The current macro, U8_COUNT_TRAIL_BYTES, does _not_ use this table.
+ *
+ * Note that this table cannot be removed, even if UTF8_COUNT_TRAIL_BYTES were
+ * changed to no longer use it. References to the table from expansions of UTF8_COUNT_TRAIL_BYTES
+ * may exist in old client code that must continue to run with newer icu library versions.
+ *
+ * This table could be replaced on many machines by
+ * a few lines of assembler code using an
+ * "index of first 0-bit from msb" instruction and
+ * one or two more integer instructions.
+ *
+ * For example, on an i386, do something like
+ * - MOV AL, leadByte
+ * - NOT AL (8-bit, leave b15..b8==0..0, reverse only b7..b0)
+ * - MOV AH, 0
+ * - BSR BX, AX (16-bit)
+ * - MOV AX, 6 (result)
+ * - JZ finish (ZF==1 if leadByte==0xff)
+ * - SUB AX, BX (result)
+ * -finish:
+ * (BSR: Bit Scan Reverse, scans for a 1-bit, starting from the MSB)
+ */
+extern "C" U_EXPORT const uint8_t
+utf8_countTrailBytes[256]={
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ // illegal C0 & C1
+ // 2-byte lead bytes C2..DF
+ 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+ // 3-byte lead bytes E0..EF
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ // 4-byte lead bytes F0..F4
+ // illegal F5..FF
+ 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+static const UChar32
+utf8_errorValue[6]={
+ // Same values as UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_2, UTF_ERROR_VALUE,
+ // but without relying on the obsolete unicode/utf_old.h.
+ 0x15, 0x9f, 0xffff,
+ 0x10ffff
+};
+
+static UChar32
+errorValue(int32_t count, int8_t strict) {
+ if(strict>=0) {
+ return utf8_errorValue[count];
+ } else if(strict==-3) {
+ return 0xfffd;
+ } else {
+ return U_SENTINEL;
+ }
+}
+
+/*
+ * Handle the non-inline part of the U8_NEXT() and U8_NEXT_FFFD() macros
+ * and their obsolete sibling UTF8_NEXT_CHAR_SAFE().
+ *
+ * U8_NEXT() supports NUL-terminated strings indicated via length<0.
+ *
+ * The "strict" parameter controls the error behavior:
+ * <0 "Safe" behavior of U8_NEXT():
+ * -1: All illegal byte sequences yield U_SENTINEL=-1.
+ * -2: Same as -1, except for lenient treatment of surrogate code points as legal.
+ * Some implementations use this for roundtripping of
+ * Unicode 16-bit strings that are not well-formed UTF-16, that is, they
+ * contain unpaired surrogates.
+ * -3: All illegal byte sequences yield U+FFFD.
+ * 0 Obsolete "safe" behavior of UTF8_NEXT_CHAR_SAFE(..., FALSE):
+ * All illegal byte sequences yield a positive code point such that this
+ * result code point would be encoded with the same number of bytes as
+ * the illegal sequence.
+ * >0 Obsolete "strict" behavior of UTF8_NEXT_CHAR_SAFE(..., TRUE):
+ * Same as the obsolete "safe" behavior, but non-characters are also treated
+ * like illegal sequences.
+ *
+ * Note that a UBool is the same as an int8_t.
+ */
+U_CAPI UChar32 U_EXPORT2
+utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict) {
+ // *pi is one after byte c.
+ int32_t i=*pi;
+ // length can be negative for NUL-terminated strings: Read and validate one byte at a time.
+ if(i==length || c>0xf4) {
+ // end of string, or not a lead byte
+ } else if(c>=0xf0) {
+ // Test for 4-byte sequences first because
+ // U8_NEXT() handles shorter valid sequences inline.
+ uint8_t t1=s[i], t2, t3;
+ c&=7;
+ if(U8_IS_VALID_LEAD4_AND_T1(c, t1) &&
+ ++i!=length && (t2=s[i]-0x80)<=0x3f &&
+ ++i!=length && (t3=s[i]-0x80)<=0x3f) {
+ ++i;
+ c=(c<<18)|((t1&0x3f)<<12)|(t2<<6)|t3;
+ // strict: forbid non-characters like U+fffe
+ if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) {
+ *pi=i;
+ return c;
+ }
+ }
+ } else if(c>=0xe0) {
+ c&=0xf;
+ if(strict!=-2) {
+ uint8_t t1=s[i], t2;
+ if(U8_IS_VALID_LEAD3_AND_T1(c, t1) &&
+ ++i!=length && (t2=s[i]-0x80)<=0x3f) {
+ ++i;
+ c=(c<<12)|((t1&0x3f)<<6)|t2;
+ // strict: forbid non-characters like U+fffe
+ if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) {
+ *pi=i;
+ return c;
+ }
+ }
+ } else {
+ // strict=-2 -> lenient: allow surrogates
+ uint8_t t1=s[i]-0x80, t2;
+ if(t1<=0x3f && (c>0 || t1>=0x20) &&
+ ++i!=length && (t2=s[i]-0x80)<=0x3f) {
+ *pi=i+1;
+ return (c<<12)|(t1<<6)|t2;
+ }
+ }
+ } else if(c>=0xc2) {
+ uint8_t t1=s[i]-0x80;
+ if(t1<=0x3f) {
+ *pi=i+1;
+ return ((c-0xc0)<<6)|t1;
+ }
+ } // else 0x80<=c<0xc2 is not a lead byte
+
+ /* error handling */
+ c=errorValue(i-*pi, strict);
+ *pi=i;
+ return c;
+}
+
+U_CAPI int32_t U_EXPORT2
+utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError) {
+ if((uint32_t)(c)<=0x7ff) {
+ if((i)+1<(length)) {
+ (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0);
+ (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80);
+ return i;
+ }
+ } else if((uint32_t)(c)<=0xffff) {
+ /* Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8. */
+ if((i)+2<(length) && !U_IS_SURROGATE(c)) {
+ (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0);
+ (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80);
+ (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80);
+ return i;
+ }
+ } else if((uint32_t)(c)<=0x10ffff) {
+ if((i)+3<(length)) {
+ (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0);
+ (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80);
+ (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80);
+ (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80);
+ return i;
+ }
+ }
+ /* c>0x10ffff or not enough space, write an error value */
+ if(pIsError!=NULL) {
+ *pIsError=TRUE;
+ } else {
+ length-=i;
+ if(length>0) {
+ int32_t offset;
+ if(length>3) {
+ length=3;
+ }
+ s+=i;
+ offset=0;
+ c=utf8_errorValue[length-1];
+ U8_APPEND_UNSAFE(s, offset, c);
+ i=i+offset;
+ }
+ }
+ return i;
+}
+
+U_CAPI UChar32 U_EXPORT2
+utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict) {
+ // *pi is the index of byte c.
+ int32_t i=*pi;
+ if(U8_IS_TRAIL(c) && i>start) {
+ uint8_t b1=s[--i];
+ if(U8_IS_LEAD(b1)) {
+ if(b1<0xe0) {
+ *pi=i;
+ return ((b1-0xc0)<<6)|(c&0x3f);
+ } else if(b1<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b1, c) : U8_IS_VALID_LEAD4_AND_T1(b1, c)) {
+ // Truncated 3- or 4-byte sequence.
+ *pi=i;
+ return errorValue(1, strict);
+ }
+ } else if(U8_IS_TRAIL(b1) && i>start) {
+ // Extract the value bits from the last trail byte.
+ c&=0x3f;
+ uint8_t b2=s[--i];
+ if(0xe0<=b2 && b2<=0xf4) {
+ if(b2<0xf0) {
+ b2&=0xf;
+ if(strict!=-2) {
+ if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
+ *pi=i;
+ c=(b2<<12)|((b1&0x3f)<<6)|c;
+ if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) {
+ return c;
+ } else {
+ // strict: forbid non-characters like U+fffe
+ return errorValue(2, strict);
+ }
+ }
+ } else {
+ // strict=-2 -> lenient: allow surrogates
+ b1-=0x80;
+ if((b2>0 || b1>=0x20)) {
+ *pi=i;
+ return (b2<<12)|(b1<<6)|c;
+ }
+ }
+ } else if(U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
+ // Truncated 4-byte sequence.
+ *pi=i;
+ return errorValue(2, strict);
+ }
+ } else if(U8_IS_TRAIL(b2) && i>start) {
+ uint8_t b3=s[--i];
+ if(0xf0<=b3 && b3<=0xf4) {
+ b3&=7;
+ if(U8_IS_VALID_LEAD4_AND_T1(b3, b2)) {
+ *pi=i;
+ c=(b3<<18)|((b2&0x3f)<<12)|((b1&0x3f)<<6)|c;
+ if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) {
+ return c;
+ } else {
+ // strict: forbid non-characters like U+fffe
+ return errorValue(3, strict);
+ }
+ }
+ }
+ }
+ }
+ }
+ return errorValue(0, strict);
+}
+
+U_CAPI int32_t U_EXPORT2
+utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i) {
+ // Same as utf8_prevCharSafeBody(..., strict=-1) minus assembling code points.
+ int32_t orig_i=i;
+ uint8_t c=s[i];
+ if(U8_IS_TRAIL(c) && i>start) {
+ uint8_t b1=s[--i];
+ if(U8_IS_LEAD(b1)) {
+ if(b1<0xe0 ||
+ (b1<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b1, c) : U8_IS_VALID_LEAD4_AND_T1(b1, c))) {
+ return i;
+ }
+ } else if(U8_IS_TRAIL(b1) && i>start) {
+ uint8_t b2=s[--i];
+ if(0xe0<=b2 && b2<=0xf4) {
+ if(b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b2, b1) : U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
+ return i;
+ }
+ } else if(U8_IS_TRAIL(b2) && i>start) {
+ uint8_t b3=s[--i];
+ if(0xf0<=b3 && b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b3, b2)) {
+ return i;
+ }
+ }
+ }
+ }
+ return orig_i;
+}
diff --git a/thirdparty/icu4c/common/util.cpp b/thirdparty/icu4c/common/util.cpp
new file mode 100644
index 0000000000..86e5c791ba
--- /dev/null
+++ b/thirdparty/icu4c/common/util.cpp
@@ -0,0 +1,421 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2001-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Date Name Description
+* 11/19/2001 aliu Creation.
+**********************************************************************
+*/
+
+#include "unicode/unimatch.h"
+#include "unicode/utf16.h"
+#include "patternprops.h"
+#include "util.h"
+
+// Define UChar constants using hex for EBCDIC compatibility
+
+static const UChar BACKSLASH = 0x005C; /*\*/
+static const UChar UPPER_U = 0x0055; /*U*/
+static const UChar LOWER_U = 0x0075; /*u*/
+static const UChar APOSTROPHE = 0x0027; // '\''
+static const UChar SPACE = 0x0020; // ' '
+
+// "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+static const UChar DIGITS[] = {
+ 48,49,50,51,52,53,54,55,56,57,
+ 65,66,67,68,69,70,71,72,73,74,
+ 75,76,77,78,79,80,81,82,83,84,
+ 85,86,87,88,89,90
+};
+
+U_NAMESPACE_BEGIN
+
+UnicodeString& ICU_Utility::appendNumber(UnicodeString& result, int32_t n,
+ int32_t radix, int32_t minDigits) {
+ if (radix < 2 || radix > 36) {
+ // Bogus radix
+ return result.append((UChar)63/*?*/);
+ }
+ // Handle negatives
+ if (n < 0) {
+ n = -n;
+ result.append((UChar)45/*-*/);
+ }
+ // First determine the number of digits
+ int32_t nn = n;
+ int32_t r = 1;
+ while (nn >= radix) {
+ nn /= radix;
+ r *= radix;
+ --minDigits;
+ }
+ // Now generate the digits
+ while (--minDigits > 0) {
+ result.append(DIGITS[0]);
+ }
+ while (r > 0) {
+ int32_t digit = n / r;
+ result.append(DIGITS[digit]);
+ n -= digit * r;
+ r /= radix;
+ }
+ return result;
+}
+
+/**
+ * Return true if the character is NOT printable ASCII.
+ */
+UBool ICU_Utility::isUnprintable(UChar32 c) {
+ return !(c >= 0x20 && c <= 0x7E);
+}
+
+/**
+ * Escape unprintable characters using \uxxxx notation for U+0000 to
+ * U+FFFF and \Uxxxxxxxx for U+10000 and above. If the character is
+ * printable ASCII, then do nothing and return FALSE. Otherwise,
+ * append the escaped notation and return TRUE.
+ */
+UBool ICU_Utility::escapeUnprintable(UnicodeString& result, UChar32 c) {
+ if (isUnprintable(c)) {
+ result.append(BACKSLASH);
+ if (c & ~0xFFFF) {
+ result.append(UPPER_U);
+ result.append(DIGITS[0xF&(c>>28)]);
+ result.append(DIGITS[0xF&(c>>24)]);
+ result.append(DIGITS[0xF&(c>>20)]);
+ result.append(DIGITS[0xF&(c>>16)]);
+ } else {
+ result.append(LOWER_U);
+ }
+ result.append(DIGITS[0xF&(c>>12)]);
+ result.append(DIGITS[0xF&(c>>8)]);
+ result.append(DIGITS[0xF&(c>>4)]);
+ result.append(DIGITS[0xF&c]);
+ return TRUE;
+ }
+ return FALSE;
+}
+
+/**
+ * Returns the index of a character, ignoring quoted text.
+ * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
+ * found by a search for 'h'.
+ */
+// FOR FUTURE USE. DISABLE FOR NOW for coverage reasons.
+/*
+int32_t ICU_Utility::quotedIndexOf(const UnicodeString& text,
+ int32_t start, int32_t limit,
+ UChar charToFind) {
+ for (int32_t i=start; i<limit; ++i) {
+ UChar c = text.charAt(i);
+ if (c == BACKSLASH) {
+ ++i;
+ } else if (c == APOSTROPHE) {
+ while (++i < limit
+ && text.charAt(i) != APOSTROPHE) {}
+ } else if (c == charToFind) {
+ return i;
+ }
+ }
+ return -1;
+}
+*/
+
+/**
+ * Skip over a sequence of zero or more white space characters at pos.
+ * @param advance if true, advance pos to the first non-white-space
+ * character at or after pos, or str.length(), if there is none.
+ * Otherwise leave pos unchanged.
+ * @return the index of the first non-white-space character at or
+ * after pos, or str.length(), if there is none.
+ */
+int32_t ICU_Utility::skipWhitespace(const UnicodeString& str, int32_t& pos,
+ UBool advance) {
+ int32_t p = pos;
+ const UChar* s = str.getBuffer();
+ p = (int32_t)(PatternProps::skipWhiteSpace(s + p, str.length() - p) - s);
+ if (advance) {
+ pos = p;
+ }
+ return p;
+}
+
+/**
+ * Skip over Pattern_White_Space in a Replaceable.
+ * Skipping may be done in the forward or
+ * reverse direction. In either case, the leftmost index will be
+ * inclusive, and the rightmost index will be exclusive. That is,
+ * given a range defined as [start, limit), the call
+ * skipWhitespace(text, start, limit) will advance start past leading
+ * whitespace, whereas the call skipWhitespace(text, limit, start),
+ * will back up limit past trailing whitespace.
+ * @param text the text to be analyzed
+ * @param pos either the start or limit of a range of 'text', to skip
+ * leading or trailing whitespace, respectively
+ * @param stop either the limit or start of a range of 'text', to skip
+ * leading or trailing whitespace, respectively
+ * @return the new start or limit, depending on what was passed in to
+ * 'pos'
+ */
+//?FOR FUTURE USE. DISABLE FOR NOW for coverage reasons.
+//?int32_t ICU_Utility::skipWhitespace(const Replaceable& text,
+//? int32_t pos, int32_t stop) {
+//? UChar32 c;
+//? UBool isForward = (stop >= pos);
+//?
+//? if (!isForward) {
+//? --pos; // pos is a limit, so back up by one
+//? }
+//?
+//? while (pos != stop &&
+//? PatternProps::isWhiteSpace(c = text.char32At(pos))) {
+//? if (isForward) {
+//? pos += U16_LENGTH(c);
+//? } else {
+//? pos -= U16_LENGTH(c);
+//? }
+//? }
+//?
+//? if (!isForward) {
+//? ++pos; // make pos back into a limit
+//? }
+//?
+//? return pos;
+//?}
+
+/**
+ * Parse a single non-whitespace character 'ch', optionally
+ * preceded by whitespace.
+ * @param id the string to be parsed
+ * @param pos INPUT-OUTPUT parameter. On input, pos[0] is the
+ * offset of the first character to be parsed. On output, pos[0]
+ * is the index after the last parsed character. If the parse
+ * fails, pos[0] will be unchanged.
+ * @param ch the non-whitespace character to be parsed.
+ * @return true if 'ch' is seen preceded by zero or more
+ * whitespace characters.
+ */
+UBool ICU_Utility::parseChar(const UnicodeString& id, int32_t& pos, UChar ch) {
+ int32_t start = pos;
+ skipWhitespace(id, pos, TRUE);
+ if (pos == id.length() ||
+ id.charAt(pos) != ch) {
+ pos = start;
+ return FALSE;
+ }
+ ++pos;
+ return TRUE;
+}
+
+/**
+ * Parse a pattern string within the given Replaceable and a parsing
+ * pattern. Characters are matched literally and case-sensitively
+ * except for the following special characters:
+ *
+ * ~ zero or more Pattern_White_Space chars
+ *
+ * If end of pattern is reached with all matches along the way,
+ * pos is advanced to the first unparsed index and returned.
+ * Otherwise -1 is returned.
+ * @param pat pattern that controls parsing
+ * @param text text to be parsed, starting at index
+ * @param index offset to first character to parse
+ * @param limit offset after last character to parse
+ * @return index after last parsed character, or -1 on parse failure.
+ */
+int32_t ICU_Utility::parsePattern(const UnicodeString& pat,
+ const Replaceable& text,
+ int32_t index,
+ int32_t limit) {
+ int32_t ipat = 0;
+
+ // empty pattern matches immediately
+ if (ipat == pat.length()) {
+ return index;
+ }
+
+ UChar32 cpat = pat.char32At(ipat);
+
+ while (index < limit) {
+ UChar32 c = text.char32At(index);
+
+ // parse \s*
+ if (cpat == 126 /*~*/) {
+ if (PatternProps::isWhiteSpace(c)) {
+ index += U16_LENGTH(c);
+ continue;
+ } else {
+ if (++ipat == pat.length()) {
+ return index; // success; c unparsed
+ }
+ // fall thru; process c again with next cpat
+ }
+ }
+
+ // parse literal
+ else if (c == cpat) {
+ index += U16_LENGTH(c);
+ ipat += U16_LENGTH(cpat);
+ if (ipat == pat.length()) {
+ return index; // success; c parsed
+ }
+ // fall thru; get next cpat
+ }
+
+ // match failure of literal
+ else {
+ return -1;
+ }
+
+ cpat = pat.char32At(ipat);
+ }
+
+ return -1; // text ended before end of pat
+}
+
+int32_t ICU_Utility::parseAsciiInteger(const UnicodeString& str, int32_t& pos) {
+ int32_t result = 0;
+ UChar c;
+ while (pos < str.length() && (c = str.charAt(pos)) >= u'0' && c <= u'9') {
+ result = result * 10 + (c - u'0');
+ pos++;
+ }
+ return result;
+}
+
+/**
+ * Append a character to a rule that is being built up. To flush
+ * the quoteBuf to rule, make one final call with isLiteral == TRUE.
+ * If there is no final character, pass in (UChar32)-1 as c.
+ * @param rule the string to append the character to
+ * @param c the character to append, or (UChar32)-1 if none.
+ * @param isLiteral if true, then the given character should not be
+ * quoted or escaped. Usually this means it is a syntactic element
+ * such as > or $
+ * @param escapeUnprintable if true, then unprintable characters
+ * should be escaped using \uxxxx or \Uxxxxxxxx. These escapes will
+ * appear outside of quotes.
+ * @param quoteBuf a buffer which is used to build up quoted
+ * substrings. The caller should initially supply an empty buffer,
+ * and thereafter should not modify the buffer. The buffer should be
+ * cleared out by, at the end, calling this method with a literal
+ * character.
+ */
+void ICU_Utility::appendToRule(UnicodeString& rule,
+ UChar32 c,
+ UBool isLiteral,
+ UBool escapeUnprintable,
+ UnicodeString& quoteBuf) {
+ // If we are escaping unprintables, then escape them outside
+ // quotes. \u and \U are not recognized within quotes. The same
+ // logic applies to literals, but literals are never escaped.
+ if (isLiteral ||
+ (escapeUnprintable && ICU_Utility::isUnprintable(c))) {
+ if (quoteBuf.length() > 0) {
+ // We prefer backslash APOSTROPHE to double APOSTROPHE
+ // (more readable, less similar to ") so if there are
+ // double APOSTROPHEs at the ends, we pull them outside
+ // of the quote.
+
+ // If the first thing in the quoteBuf is APOSTROPHE
+ // (doubled) then pull it out.
+ while (quoteBuf.length() >= 2 &&
+ quoteBuf.charAt(0) == APOSTROPHE &&
+ quoteBuf.charAt(1) == APOSTROPHE) {
+ rule.append(BACKSLASH).append(APOSTROPHE);
+ quoteBuf.remove(0, 2);
+ }
+ // If the last thing in the quoteBuf is APOSTROPHE
+ // (doubled) then remove and count it and add it after.
+ int32_t trailingCount = 0;
+ while (quoteBuf.length() >= 2 &&
+ quoteBuf.charAt(quoteBuf.length()-2) == APOSTROPHE &&
+ quoteBuf.charAt(quoteBuf.length()-1) == APOSTROPHE) {
+ quoteBuf.truncate(quoteBuf.length()-2);
+ ++trailingCount;
+ }
+ if (quoteBuf.length() > 0) {
+ rule.append(APOSTROPHE);
+ rule.append(quoteBuf);
+ rule.append(APOSTROPHE);
+ quoteBuf.truncate(0);
+ }
+ while (trailingCount-- > 0) {
+ rule.append(BACKSLASH).append(APOSTROPHE);
+ }
+ }
+ if (c != (UChar32)-1) {
+ /* Since spaces are ignored during parsing, they are
+ * emitted only for readability. We emit one here
+ * only if there isn't already one at the end of the
+ * rule.
+ */
+ if (c == SPACE) {
+ int32_t len = rule.length();
+ if (len > 0 && rule.charAt(len-1) != c) {
+ rule.append(c);
+ }
+ } else if (!escapeUnprintable || !ICU_Utility::escapeUnprintable(rule, c)) {
+ rule.append(c);
+ }
+ }
+ }
+
+ // Escape ' and '\' and don't begin a quote just for them
+ else if (quoteBuf.length() == 0 &&
+ (c == APOSTROPHE || c == BACKSLASH)) {
+ rule.append(BACKSLASH);
+ rule.append(c);
+ }
+
+ // Specials (printable ascii that isn't [0-9a-zA-Z]) and
+ // whitespace need quoting. Also append stuff to quotes if we are
+ // building up a quoted substring already.
+ else if (quoteBuf.length() > 0 ||
+ (c >= 0x0021 && c <= 0x007E &&
+ !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) ||
+ (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) ||
+ (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) ||
+ PatternProps::isWhiteSpace(c)) {
+ quoteBuf.append(c);
+ // Double ' within a quote
+ if (c == APOSTROPHE) {
+ quoteBuf.append(c);
+ }
+ }
+
+ // Otherwise just append
+ else {
+ rule.append(c);
+ }
+}
+
+void ICU_Utility::appendToRule(UnicodeString& rule,
+ const UnicodeString& text,
+ UBool isLiteral,
+ UBool escapeUnprintable,
+ UnicodeString& quoteBuf) {
+ for (int32_t i=0; i<text.length(); ++i) {
+ appendToRule(rule, text[i], isLiteral, escapeUnprintable, quoteBuf);
+ }
+}
+
+/**
+ * Given a matcher reference, which may be null, append its
+ * pattern as a literal to the given rule.
+ */
+void ICU_Utility::appendToRule(UnicodeString& rule,
+ const UnicodeMatcher* matcher,
+ UBool escapeUnprintable,
+ UnicodeString& quoteBuf) {
+ if (matcher != NULL) {
+ UnicodeString pat;
+ appendToRule(rule, matcher->toPattern(pat, escapeUnprintable),
+ TRUE, escapeUnprintable, quoteBuf);
+ }
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/util.h b/thirdparty/icu4c/common/util.h
new file mode 100644
index 0000000000..9c3b76d9ed
--- /dev/null
+++ b/thirdparty/icu4c/common/util.h
@@ -0,0 +1,257 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ **********************************************************************
+ * Copyright (c) 2001-2011, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ * Date Name Description
+ * 11/19/2001 aliu Creation.
+ **********************************************************************
+ */
+
+#ifndef ICU_UTIL_H
+#define ICU_UTIL_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+
+//--------------------------------------------------------------------
+// class ICU_Utility
+// i18n utility functions, scoped into the class ICU_Utility.
+//--------------------------------------------------------------------
+
+U_NAMESPACE_BEGIN
+
+class UnicodeMatcher;
+
+class U_COMMON_API ICU_Utility /* not : public UObject because all methods are static */ {
+ public:
+
+ /**
+ * Append a number to the given UnicodeString in the given radix.
+ * Standard digits '0'-'9' are used and letters 'A'-'Z' for
+ * radices 11 through 36.
+ * @param result the digits of the number are appended here
+ * @param n the number to be converted to digits; may be negative.
+ * If negative, a '-' is prepended to the digits.
+ * @param radix a radix from 2 to 36 inclusive.
+ * @param minDigits the minimum number of digits, not including
+ * any '-', to produce. Values less than 2 have no effect. One
+ * digit is always emitted regardless of this parameter.
+ * @return a reference to result
+ */
+ static UnicodeString& appendNumber(UnicodeString& result, int32_t n,
+ int32_t radix = 10,
+ int32_t minDigits = 1);
+
+ /** Returns a bogus UnicodeString by value. */
+ static inline UnicodeString makeBogusString() {
+ UnicodeString result;
+ result.setToBogus();
+ return result;
+ }
+
+ /**
+ * Return true if the character is NOT printable ASCII.
+ *
+ * This method should really be in UnicodeString (or similar). For
+ * now, we implement it here and share it with friend classes.
+ */
+ static UBool isUnprintable(UChar32 c);
+
+ /**
+ * Escape unprintable characters using \uxxxx notation for U+0000 to
+ * U+FFFF and \Uxxxxxxxx for U+10000 and above. If the character is
+ * printable ASCII, then do nothing and return false. Otherwise,
+ * append the escaped notation and return true.
+ */
+ static UBool escapeUnprintable(UnicodeString& result, UChar32 c);
+
+ /**
+ * Returns the index of a character, ignoring quoted text.
+ * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
+ * found by a search for 'h'.
+ * @param text text to be searched
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param c character to search for
+ * @return Offset of the first instance of c, or -1 if not found.
+ */
+//?FOR FUTURE USE. DISABLE FOR NOW for coverage reasons.
+// static int32_t quotedIndexOf(const UnicodeString& text,
+// int32_t start, int32_t limit,
+// UChar c);
+
+ /**
+ * Skip over a sequence of zero or more white space characters at pos.
+ * @param advance if true, advance pos to the first non-white-space
+ * character at or after pos, or str.length(), if there is none.
+ * Otherwise leave pos unchanged.
+ * @return the index of the first non-white-space character at or
+ * after pos, or str.length(), if there is none.
+ */
+ static int32_t skipWhitespace(const UnicodeString& str, int32_t& pos,
+ UBool advance = false);
+
+ /**
+ * Skip over Pattern_White_Space in a Replaceable.
+ * Skipping may be done in the forward or
+ * reverse direction. In either case, the leftmost index will be
+ * inclusive, and the rightmost index will be exclusive. That is,
+ * given a range defined as [start, limit), the call
+ * skipWhitespace(text, start, limit) will advance start past leading
+ * whitespace, whereas the call skipWhitespace(text, limit, start),
+ * will back up limit past trailing whitespace.
+ * @param text the text to be analyzed
+ * @param pos either the start or limit of a range of 'text', to skip
+ * leading or trailing whitespace, respectively
+ * @param stop either the limit or start of a range of 'text', to skip
+ * leading or trailing whitespace, respectively
+ * @return the new start or limit, depending on what was passed in to
+ * 'pos'
+ */
+//?FOR FUTURE USE. DISABLE FOR NOW for coverage reasons.
+//? static int32_t skipWhitespace(const Replaceable& text,
+//? int32_t pos, int32_t stop);
+
+ /**
+ * Parse a single non-whitespace character 'ch', optionally
+ * preceded by whitespace.
+ * @param id the string to be parsed
+ * @param pos INPUT-OUTPUT parameter. On input, pos[0] is the
+ * offset of the first character to be parsed. On output, pos[0]
+ * is the index after the last parsed character. If the parse
+ * fails, pos[0] will be unchanged.
+ * @param ch the non-whitespace character to be parsed.
+ * @return true if 'ch' is seen preceded by zero or more
+ * whitespace characters.
+ */
+ static UBool parseChar(const UnicodeString& id, int32_t& pos, UChar ch);
+
+ /**
+ * Parse a pattern string starting at offset pos. Keywords are
+ * matched case-insensitively. Spaces may be skipped and may be
+ * optional or required. Integer values may be parsed, and if
+ * they are, they will be returned in the given array. If
+ * successful, the offset of the next non-space character is
+ * returned. On failure, -1 is returned.
+ * @param pattern must only contain lowercase characters, which
+ * will match their uppercase equivalents as well. A space
+ * character matches one or more required spaces. A '~' character
+ * matches zero or more optional spaces. A '#' character matches
+ * an integer and stores it in parsedInts, which the caller must
+ * ensure has enough capacity.
+ * @param parsedInts array to receive parsed integers. Caller
+ * must ensure that parsedInts.length is >= the number of '#'
+ * signs in 'pattern'.
+ * @return the position after the last character parsed, or -1 if
+ * the parse failed
+ */
+ static int32_t parsePattern(const UnicodeString& rule, int32_t pos, int32_t limit,
+ const UnicodeString& pattern, int32_t* parsedInts);
+
+ /**
+ * Parse a pattern string within the given Replaceable and a parsing
+ * pattern. Characters are matched literally and case-sensitively
+ * except for the following special characters:
+ *
+ * ~ zero or more Pattern_White_Space chars
+ *
+ * If end of pattern is reached with all matches along the way,
+ * pos is advanced to the first unparsed index and returned.
+ * Otherwise -1 is returned.
+ * @param pat pattern that controls parsing
+ * @param text text to be parsed, starting at index
+ * @param index offset to first character to parse
+ * @param limit offset after last character to parse
+ * @return index after last parsed character, or -1 on parse failure.
+ */
+ static int32_t parsePattern(const UnicodeString& pat,
+ const Replaceable& text,
+ int32_t index,
+ int32_t limit);
+
+ /**
+ * Parse an integer at pos, either of the form \d+ or of the form
+ * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,
+ * or octal format.
+ * @param pos INPUT-OUTPUT parameter. On input, the index of the first
+ * character to parse. On output, the index of the character after the
+ * last parsed character.
+ */
+ static int32_t parseInteger(const UnicodeString& rule, int32_t& pos, int32_t limit);
+
+ /**
+ * Parse an integer at pos using only ASCII digits.
+ * Base 10 only.
+ * @param pos INPUT-OUTPUT parameter. On input, the index of the first
+ * character to parse. On output, the index of the character after the
+ * last parsed character.
+ */
+ static int32_t parseAsciiInteger(const UnicodeString& str, int32_t& pos);
+
+ /**
+ * Parse a Unicode identifier from the given string at the given
+ * position. Return the identifier, or an empty string if there
+ * is no identifier.
+ * @param str the string to parse
+ * @param pos INPUT-OUPUT parameter. On INPUT, pos is the
+ * first character to examine. It must be less than str.length(),
+ * and it must not point to a whitespace character. That is, must
+ * have pos < str.length() and
+ * !UCharacter::isWhitespace(str.char32At(pos)). On
+ * OUTPUT, the position after the last parsed character.
+ * @return the Unicode identifier, or an empty string if there is
+ * no valid identifier at pos.
+ */
+ static UnicodeString parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos);
+
+ /**
+ * Parse an unsigned 31-bit integer at the given offset. Use
+ * UCharacter.digit() to parse individual characters into digits.
+ * @param text the text to be parsed
+ * @param pos INPUT-OUTPUT parameter. On entry, pos is the
+ * offset within text at which to start parsing; it should point
+ * to a valid digit. On exit, pos is the offset after the last
+ * parsed character. If the parse failed, it will be unchanged on
+ * exit. Must be >= 0 on entry.
+ * @param radix the radix in which to parse; must be >= 2 and <=
+ * 36.
+ * @return a non-negative parsed number, or -1 upon parse failure.
+ * Parse fails if there are no digits, that is, if pos does not
+ * point to a valid digit on entry, or if the number to be parsed
+ * does not fit into a 31-bit unsigned integer.
+ */
+ static int32_t parseNumber(const UnicodeString& text,
+ int32_t& pos, int8_t radix);
+
+ static void appendToRule(UnicodeString& rule,
+ UChar32 c,
+ UBool isLiteral,
+ UBool escapeUnprintable,
+ UnicodeString& quoteBuf);
+
+ static void appendToRule(UnicodeString& rule,
+ const UnicodeString& text,
+ UBool isLiteral,
+ UBool escapeUnprintable,
+ UnicodeString& quoteBuf);
+
+ static void appendToRule(UnicodeString& rule,
+ const UnicodeMatcher* matcher,
+ UBool escapeUnprintable,
+ UnicodeString& quoteBuf);
+
+private:
+ // do not instantiate
+ ICU_Utility();
+};
+
+U_NAMESPACE_END
+
+#endif
+//eof
diff --git a/thirdparty/icu4c/common/util_props.cpp b/thirdparty/icu4c/common/util_props.cpp
new file mode 100644
index 0000000000..95a112bc91
--- /dev/null
+++ b/thirdparty/icu4c/common/util_props.cpp
@@ -0,0 +1,217 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2001-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Date Name Description
+* 11/19/2001 aliu Creation.
+**********************************************************************
+*/
+
+#include "unicode/uchar.h"
+#include "unicode/utf16.h"
+#include "patternprops.h"
+#include "util.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Parse an integer at pos, either of the form \d+ or of the form
+ * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,
+ * or octal format.
+ * @param pos INPUT-OUTPUT parameter. On input, the first
+ * character to parse. On output, the character after the last
+ * parsed character.
+ */
+int32_t ICU_Utility::parseInteger(const UnicodeString& rule, int32_t& pos, int32_t limit) {
+ int32_t count = 0;
+ int32_t value = 0;
+ int32_t p = pos;
+ int8_t radix = 10;
+
+ if (p < limit && rule.charAt(p) == 48 /*0*/) {
+ if (p+1 < limit && (rule.charAt(p+1) == 0x78 /*x*/ || rule.charAt(p+1) == 0x58 /*X*/)) {
+ p += 2;
+ radix = 16;
+ }
+ else {
+ p++;
+ count = 1;
+ radix = 8;
+ }
+ }
+
+ while (p < limit) {
+ int32_t d = u_digit(rule.charAt(p++), radix);
+ if (d < 0) {
+ --p;
+ break;
+ }
+ ++count;
+ int32_t v = (value * radix) + d;
+ if (v <= value) {
+ // If there are too many input digits, at some point
+ // the value will go negative, e.g., if we have seen
+ // "0x8000000" already and there is another '0', when
+ // we parse the next 0 the value will go negative.
+ return 0;
+ }
+ value = v;
+ }
+ if (count > 0) {
+ pos = p;
+ }
+ return value;
+}
+
+/**
+ * Parse a pattern string starting at offset pos. Keywords are
+ * matched case-insensitively. Spaces may be skipped and may be
+ * optional or required. Integer values may be parsed, and if
+ * they are, they will be returned in the given array. If
+ * successful, the offset of the next non-space character is
+ * returned. On failure, -1 is returned.
+ * @param pattern must only contain lowercase characters, which
+ * will match their uppercase equivalents as well. A space
+ * character matches one or more required spaces. A '~' character
+ * matches zero or more optional spaces. A '#' character matches
+ * an integer and stores it in parsedInts, which the caller must
+ * ensure has enough capacity.
+ * @param parsedInts array to receive parsed integers. Caller
+ * must ensure that parsedInts.length is >= the number of '#'
+ * signs in 'pattern'.
+ * @return the position after the last character parsed, or -1 if
+ * the parse failed
+ */
+int32_t ICU_Utility::parsePattern(const UnicodeString& rule, int32_t pos, int32_t limit,
+ const UnicodeString& pattern, int32_t* parsedInts) {
+ // TODO Update this to handle surrogates
+ int32_t p;
+ int32_t intCount = 0; // number of integers parsed
+ for (int32_t i=0; i<pattern.length(); ++i) {
+ UChar cpat = pattern.charAt(i);
+ UChar c;
+ switch (cpat) {
+ case 32 /*' '*/:
+ if (pos >= limit) {
+ return -1;
+ }
+ c = rule.charAt(pos++);
+ if (!PatternProps::isWhiteSpace(c)) {
+ return -1;
+ }
+ // FALL THROUGH to skipWhitespace
+ U_FALLTHROUGH;
+ case 126 /*'~'*/:
+ pos = skipWhitespace(rule, pos);
+ break;
+ case 35 /*'#'*/:
+ p = pos;
+ parsedInts[intCount++] = parseInteger(rule, p, limit);
+ if (p == pos) {
+ // Syntax error; failed to parse integer
+ return -1;
+ }
+ pos = p;
+ break;
+ default:
+ if (pos >= limit) {
+ return -1;
+ }
+ c = (UChar) u_tolower(rule.charAt(pos++));
+ if (c != cpat) {
+ return -1;
+ }
+ break;
+ }
+ }
+ return pos;
+}
+
+/**
+ * Parse a Unicode identifier from the given string at the given
+ * position. Return the identifier, or an empty string if there
+ * is no identifier.
+ * @param str the string to parse
+ * @param pos INPUT-OUPUT parameter. On INPUT, pos is the
+ * first character to examine. It must be less than str.length(),
+ * and it must not point to a whitespace character. That is, must
+ * have pos < str.length(). On
+ * OUTPUT, the position after the last parsed character.
+ * @return the Unicode identifier, or an empty string if there is
+ * no valid identifier at pos.
+ */
+UnicodeString ICU_Utility::parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos) {
+ // assert(pos < str.length());
+ UnicodeString buf;
+ int p = pos;
+ while (p < str.length()) {
+ UChar32 ch = str.char32At(p);
+ if (buf.length() == 0) {
+ if (u_isIDStart(ch)) {
+ buf.append(ch);
+ } else {
+ buf.truncate(0);
+ return buf;
+ }
+ } else {
+ if (u_isIDPart(ch)) {
+ buf.append(ch);
+ } else {
+ break;
+ }
+ }
+ p += U16_LENGTH(ch);
+ }
+ pos = p;
+ return buf;
+}
+
+/**
+ * Parse an unsigned 31-bit integer at the given offset. Use
+ * UCharacter.digit() to parse individual characters into digits.
+ * @param text the text to be parsed
+ * @param pos INPUT-OUTPUT parameter. On entry, pos[0] is the
+ * offset within text at which to start parsing; it should point
+ * to a valid digit. On exit, pos[0] is the offset after the last
+ * parsed character. If the parse failed, it will be unchanged on
+ * exit. Must be >= 0 on entry.
+ * @param radix the radix in which to parse; must be >= 2 and <=
+ * 36.
+ * @return a non-negative parsed number, or -1 upon parse failure.
+ * Parse fails if there are no digits, that is, if pos[0] does not
+ * point to a valid digit on entry, or if the number to be parsed
+ * does not fit into a 31-bit unsigned integer.
+ */
+int32_t ICU_Utility::parseNumber(const UnicodeString& text,
+ int32_t& pos, int8_t radix) {
+ // assert(pos[0] >= 0);
+ // assert(radix >= 2);
+ // assert(radix <= 36);
+ int32_t n = 0;
+ int32_t p = pos;
+ while (p < text.length()) {
+ UChar32 ch = text.char32At(p);
+ int32_t d = u_digit(ch, radix);
+ if (d < 0) {
+ break;
+ }
+ n = radix*n + d;
+ // ASSUME that when a 32-bit integer overflows it becomes
+ // negative. E.g., 214748364 * 10 + 8 => negative value.
+ if (n < 0) {
+ return -1;
+ }
+ ++p;
+ }
+ if (p == pos) {
+ return -1;
+ }
+ pos = p;
+ return n;
+}
+
+U_NAMESPACE_END
+
diff --git a/thirdparty/icu4c/common/utrace.cpp b/thirdparty/icu4c/common/utrace.cpp
new file mode 100644
index 0000000000..c981546594
--- /dev/null
+++ b/thirdparty/icu4c/common/utrace.cpp
@@ -0,0 +1,504 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2003-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: utrace.c
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*/
+
+#include "unicode/utrace.h"
+#include "utracimp.h"
+#include "cstring.h"
+#include "uassert.h"
+#include "ucln_cmn.h"
+
+
+static UTraceEntry *pTraceEntryFunc = NULL;
+static UTraceExit *pTraceExitFunc = NULL;
+static UTraceData *pTraceDataFunc = NULL;
+static const void *gTraceContext = NULL;
+
+/**
+ * \var utrace_level
+ * Trace level variable. Negative for "off".
+ */
+static int32_t
+utrace_level = UTRACE_ERROR;
+
+U_CAPI void U_EXPORT2
+utrace_entry(int32_t fnNumber) {
+ if (pTraceEntryFunc != NULL) {
+ (*pTraceEntryFunc)(gTraceContext, fnNumber);
+ }
+}
+
+
+static const char gExitFmt[] = "Returns.";
+static const char gExitFmtValue[] = "Returns %d.";
+static const char gExitFmtStatus[] = "Returns. Status = %d.";
+static const char gExitFmtValueStatus[] = "Returns %d. Status = %d.";
+static const char gExitFmtPtrStatus[] = "Returns %d. Status = %p.";
+
+U_CAPI void U_EXPORT2
+utrace_exit(int32_t fnNumber, int32_t returnType, ...) {
+ if (pTraceExitFunc != NULL) {
+ va_list args;
+ const char *fmt;
+
+ switch (returnType) {
+ case 0:
+ fmt = gExitFmt;
+ break;
+ case UTRACE_EXITV_I32:
+ fmt = gExitFmtValue;
+ break;
+ case UTRACE_EXITV_STATUS:
+ fmt = gExitFmtStatus;
+ break;
+ case UTRACE_EXITV_I32 | UTRACE_EXITV_STATUS:
+ fmt = gExitFmtValueStatus;
+ break;
+ case UTRACE_EXITV_PTR | UTRACE_EXITV_STATUS:
+ fmt = gExitFmtPtrStatus;
+ break;
+ default:
+ UPRV_UNREACHABLE;
+ }
+
+ va_start(args, returnType);
+ (*pTraceExitFunc)(gTraceContext, fnNumber, fmt, args);
+ va_end(args);
+ }
+}
+
+
+
+U_CAPI void U_EXPORT2
+utrace_data(int32_t fnNumber, int32_t level, const char *fmt, ...) {
+ if (pTraceDataFunc != NULL) {
+ va_list args;
+ va_start(args, fmt );
+ (*pTraceDataFunc)(gTraceContext, fnNumber, level, fmt, args);
+ va_end(args);
+ }
+}
+
+
+static void outputChar(char c, char *outBuf, int32_t *outIx, int32_t capacity, int32_t indent) {
+ int32_t i;
+ /* Check whether a start of line indenting is needed. Three cases:
+ * 1. At the start of the first line (output index == 0).
+ * 2. At the start of subsequent lines (preceeding char in buffer == '\n')
+ * 3. When preflighting buffer len (buffer capacity is exceeded), when
+ * a \n is output. Ideally we wouldn't do the indent until the following char
+ * is received, but that won't work because there's no place to remember that
+ * the preceding char was \n. Meaning that we may overstimate the
+ * buffer size needed. No harm done.
+ */
+ if (*outIx==0 || /* case 1. */
+ (c!='\n' && c!=0 && *outIx < capacity && outBuf[(*outIx)-1]=='\n') || /* case 2. */
+ (c=='\n' && *outIx>=capacity)) /* case 3 */
+ {
+ /* At the start of a line. Indent. */
+ for(i=0; i<indent; i++) {
+ if (*outIx < capacity) {
+ outBuf[*outIx] = ' ';
+ }
+ (*outIx)++;
+ }
+ }
+
+ if (*outIx < capacity) {
+ outBuf[*outIx] = c;
+ }
+ if (c != 0) {
+ /* Nulls only appear as end-of-string terminators. Move them to the output
+ * buffer, but do not update the length of the buffer, so that any
+ * following output will overwrite the null. */
+ (*outIx)++;
+ }
+}
+
+static void outputHexBytes(int64_t val, int32_t charsToOutput,
+ char *outBuf, int32_t *outIx, int32_t capacity) {
+ static const char gHexChars[] = "0123456789abcdef";
+ int32_t shiftCount;
+ for (shiftCount=(charsToOutput-1)*4; shiftCount >= 0; shiftCount-=4) {
+ char c = gHexChars[(val >> shiftCount) & 0xf];
+ outputChar(c, outBuf, outIx, capacity, 0);
+ }
+}
+
+/* Output a pointer value in hex. Work with any size of pointer */
+static void outputPtrBytes(void *val, char *outBuf, int32_t *outIx, int32_t capacity) {
+ uint32_t i;
+ int32_t incVal = 1; /* +1 for big endian, -1 for little endian */
+ char *p = (char *)&val; /* point to current byte to output in the ptr val */
+
+#if !U_IS_BIG_ENDIAN
+ /* Little Endian. Move p to most significant end of the value */
+ incVal = -1;
+ p += sizeof(void *) - 1;
+#endif
+
+ /* Loop through the bytes of the ptr as it sits in memory, from
+ * most significant to least significant end */
+ for (i=0; i<sizeof(void *); i++) {
+ outputHexBytes(*p, 2, outBuf, outIx, capacity);
+ p += incVal;
+ }
+}
+
+static void outputString(const char *s, char *outBuf, int32_t *outIx, int32_t capacity, int32_t indent) {
+ int32_t i = 0;
+ char c;
+ if (s==NULL) {
+ s = "*NULL*";
+ }
+ do {
+ c = s[i++];
+ outputChar(c, outBuf, outIx, capacity, indent);
+ } while (c != 0);
+}
+
+
+
+static void outputUString(const UChar *s, int32_t len,
+ char *outBuf, int32_t *outIx, int32_t capacity, int32_t indent) {
+ int32_t i = 0;
+ UChar c;
+ if (s==NULL) {
+ outputString(NULL, outBuf, outIx, capacity, indent);
+ return;
+ }
+
+ for (i=0; i<len || len==-1; i++) {
+ c = s[i];
+ outputHexBytes(c, 4, outBuf, outIx, capacity);
+ outputChar(' ', outBuf, outIx, capacity, indent);
+ if (len == -1 && c==0) {
+ break;
+ }
+ }
+}
+
+U_CAPI int32_t U_EXPORT2
+utrace_vformat(char *outBuf, int32_t capacity, int32_t indent, const char *fmt, va_list args) {
+ int32_t outIx = 0;
+ int32_t fmtIx = 0;
+ char fmtC;
+ char c;
+ int32_t intArg;
+ int64_t longArg = 0;
+ char *ptrArg;
+
+ /* Loop runs once for each character in the format string.
+ */
+ for (;;) {
+ fmtC = fmt[fmtIx++];
+ if (fmtC != '%') {
+ /* Literal character, not part of a %sequence. Just copy it to the output. */
+ outputChar(fmtC, outBuf, &outIx, capacity, indent);
+ if (fmtC == 0) {
+ /* We hit the null that terminates the format string.
+ * This is the normal (and only) exit from the loop that
+ * interprets the format
+ */
+ break;
+ }
+ continue;
+ }
+
+ /* We encountered a '%'. Pick up the following format char */
+ fmtC = fmt[fmtIx++];
+
+ switch (fmtC) {
+ case 'c':
+ /* single 8 bit char */
+ c = (char)va_arg(args, int32_t);
+ outputChar(c, outBuf, &outIx, capacity, indent);
+ break;
+
+ case 's':
+ /* char * string, null terminated. */
+ ptrArg = va_arg(args, char *);
+ outputString((const char *)ptrArg, outBuf, &outIx, capacity, indent);
+ break;
+
+ case 'S':
+ /* UChar * string, with length, len==-1 for null terminated. */
+ ptrArg = va_arg(args, char *); /* Ptr */
+ intArg =(int32_t)va_arg(args, int32_t); /* Length */
+ outputUString((const UChar *)ptrArg, intArg, outBuf, &outIx, capacity, indent);
+ break;
+
+ case 'b':
+ /* 8 bit int */
+ intArg = va_arg(args, int);
+ outputHexBytes(intArg, 2, outBuf, &outIx, capacity);
+ break;
+
+ case 'h':
+ /* 16 bit int */
+ intArg = va_arg(args, int);
+ outputHexBytes(intArg, 4, outBuf, &outIx, capacity);
+ break;
+
+ case 'd':
+ /* 32 bit int */
+ intArg = va_arg(args, int);
+ outputHexBytes(intArg, 8, outBuf, &outIx, capacity);
+ break;
+
+ case 'l':
+ /* 64 bit long */
+ longArg = va_arg(args, int64_t);
+ outputHexBytes(longArg, 16, outBuf, &outIx, capacity);
+ break;
+
+ case 'p':
+ /* Pointers. */
+ ptrArg = va_arg(args, char *);
+ outputPtrBytes(ptrArg, outBuf, &outIx, capacity);
+ break;
+
+ case 0:
+ /* Single '%' at end of fmt string. Output as literal '%'.
+ * Back up index into format string so that the terminating null will be
+ * re-fetched in the outer loop, causing it to terminate.
+ */
+ outputChar('%', outBuf, &outIx, capacity, indent);
+ fmtIx--;
+ break;
+
+ case 'v':
+ {
+ /* Vector of values, e.g. %vh */
+ char vectorType;
+ int32_t vectorLen;
+ const char *i8Ptr;
+ int16_t *i16Ptr;
+ int32_t *i32Ptr;
+ int64_t *i64Ptr;
+ void **ptrPtr;
+ int32_t charsToOutput = 0;
+ int32_t i;
+
+ vectorType = fmt[fmtIx]; /* b, h, d, l, p, etc. */
+ if (vectorType != 0) {
+ fmtIx++;
+ }
+ i8Ptr = (const char *)va_arg(args, void*);
+ i16Ptr = (int16_t *)i8Ptr;
+ i32Ptr = (int32_t *)i8Ptr;
+ i64Ptr = (int64_t *)i8Ptr;
+ ptrPtr = (void **)i8Ptr;
+ vectorLen =(int32_t)va_arg(args, int32_t);
+ if (ptrPtr == NULL) {
+ outputString("*NULL* ", outBuf, &outIx, capacity, indent);
+ } else {
+ for (i=0; i<vectorLen || vectorLen==-1; i++) {
+ switch (vectorType) {
+ case 'b':
+ charsToOutput = 2;
+ longArg = *i8Ptr++;
+ break;
+ case 'h':
+ charsToOutput = 4;
+ longArg = *i16Ptr++;
+ break;
+ case 'd':
+ charsToOutput = 8;
+ longArg = *i32Ptr++;
+ break;
+ case 'l':
+ charsToOutput = 16;
+ longArg = *i64Ptr++;
+ break;
+ case 'p':
+ charsToOutput = 0;
+ outputPtrBytes(*ptrPtr, outBuf, &outIx, capacity);
+ longArg = *ptrPtr==NULL? 0: 1; /* test for null terminated array. */
+ ptrPtr++;
+ break;
+ case 'c':
+ charsToOutput = 0;
+ outputChar(*i8Ptr, outBuf, &outIx, capacity, indent);
+ longArg = *i8Ptr; /* for test for null terminated array. */
+ i8Ptr++;
+ break;
+ case 's':
+ charsToOutput = 0;
+ outputString((const char *)*ptrPtr, outBuf, &outIx, capacity, indent);
+ outputChar('\n', outBuf, &outIx, capacity, indent);
+ longArg = *ptrPtr==NULL? 0: 1; /* for test for null term. array. */
+ ptrPtr++;
+ break;
+
+ case 'S':
+ charsToOutput = 0;
+ outputUString((const UChar *)*ptrPtr, -1, outBuf, &outIx, capacity, indent);
+ outputChar('\n', outBuf, &outIx, capacity, indent);
+ longArg = *ptrPtr==NULL? 0: 1; /* for test for null term. array. */
+ ptrPtr++;
+ break;
+
+
+ }
+ if (charsToOutput > 0) {
+ outputHexBytes(longArg, charsToOutput, outBuf, &outIx, capacity);
+ outputChar(' ', outBuf, &outIx, capacity, indent);
+ }
+ if (vectorLen == -1 && longArg == 0) {
+ break;
+ }
+ }
+ }
+ outputChar('[', outBuf, &outIx, capacity, indent);
+ outputHexBytes(vectorLen, 8, outBuf, &outIx, capacity);
+ outputChar(']', outBuf, &outIx, capacity, indent);
+ }
+ break;
+
+
+ default:
+ /* %. in format string, where . is some character not in the set
+ * of recognized format chars. Just output it as if % wasn't there.
+ * (Covers "%%" outputing a single '%')
+ */
+ outputChar(fmtC, outBuf, &outIx, capacity, indent);
+ }
+ }
+ outputChar(0, outBuf, &outIx, capacity, indent); /* Make sure that output is null terminated */
+ return outIx + 1; /* outIx + 1 because outIx does not increment when outputing final null. */
+}
+
+
+
+
+U_CAPI int32_t U_EXPORT2
+utrace_format(char *outBuf, int32_t capacity,
+ int32_t indent, const char *fmt, ...) {
+ int32_t retVal;
+ va_list args;
+ va_start(args, fmt );
+ retVal = utrace_vformat(outBuf, capacity, indent, fmt, args);
+ va_end(args);
+ return retVal;
+}
+
+
+U_CAPI void U_EXPORT2
+utrace_setFunctions(const void *context,
+ UTraceEntry *e, UTraceExit *x, UTraceData *d) {
+ pTraceEntryFunc = e;
+ pTraceExitFunc = x;
+ pTraceDataFunc = d;
+ gTraceContext = context;
+}
+
+
+U_CAPI void U_EXPORT2
+utrace_getFunctions(const void **context,
+ UTraceEntry **e, UTraceExit **x, UTraceData **d) {
+ *e = pTraceEntryFunc;
+ *x = pTraceExitFunc;
+ *d = pTraceDataFunc;
+ *context = gTraceContext;
+}
+
+U_CAPI void U_EXPORT2
+utrace_setLevel(int32_t level) {
+ if (level < UTRACE_OFF) {
+ level = UTRACE_OFF;
+ }
+ if (level > UTRACE_VERBOSE) {
+ level = UTRACE_VERBOSE;
+ }
+ utrace_level = level;
+}
+
+U_CAPI int32_t U_EXPORT2
+utrace_getLevel() {
+ return utrace_level;
+}
+
+
+U_CFUNC UBool
+utrace_cleanup() {
+ pTraceEntryFunc = NULL;
+ pTraceExitFunc = NULL;
+ pTraceDataFunc = NULL;
+ utrace_level = UTRACE_OFF;
+ gTraceContext = NULL;
+ return TRUE;
+}
+
+
+static const char * const
+trFnName[] = {
+ "u_init",
+ "u_cleanup",
+ NULL
+};
+
+
+static const char * const
+trConvNames[] = {
+ "ucnv_open",
+ "ucnv_openPackage",
+ "ucnv_openAlgorithmic",
+ "ucnv_clone",
+ "ucnv_close",
+ "ucnv_flushCache",
+ "ucnv_load",
+ "ucnv_unload",
+ NULL
+};
+
+
+static const char * const
+trCollNames[] = {
+ "ucol_open",
+ "ucol_close",
+ "ucol_strcoll",
+ "ucol_getSortKey",
+ "ucol_getLocale",
+ "ucol_nextSortKeyPart",
+ "ucol_strcollIter",
+ "ucol_openFromShortString",
+ "ucol_strcollUTF8",
+ NULL
+};
+
+
+static const char* const
+trResDataNames[] = {
+ "resc",
+ "bundle-open",
+ "file-open",
+ "res-open",
+ NULL
+};
+
+
+U_CAPI const char * U_EXPORT2
+utrace_functionName(int32_t fnNumber) {
+ if(UTRACE_FUNCTION_START <= fnNumber && fnNumber < UTRACE_FUNCTION_LIMIT) {
+ return trFnName[fnNumber];
+ } else if(UTRACE_CONVERSION_START <= fnNumber && fnNumber < UTRACE_CONVERSION_LIMIT) {
+ return trConvNames[fnNumber - UTRACE_CONVERSION_START];
+ } else if(UTRACE_COLLATION_START <= fnNumber && fnNumber < UTRACE_COLLATION_LIMIT){
+ return trCollNames[fnNumber - UTRACE_COLLATION_START];
+ } else if(UTRACE_UDATA_START <= fnNumber && fnNumber < UTRACE_RES_DATA_LIMIT){
+ return trResDataNames[fnNumber - UTRACE_UDATA_START];
+ } else {
+ return "[BOGUS Trace Function Number]";
+ }
+}
+
diff --git a/thirdparty/icu4c/common/utracimp.h b/thirdparty/icu4c/common/utracimp.h
new file mode 100644
index 0000000000..f32fe1db39
--- /dev/null
+++ b/thirdparty/icu4c/common/utracimp.h
@@ -0,0 +1,391 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2003-2009, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utracimp.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2003aug06
+* created by: Markus W. Scherer
+*
+* Internal header for ICU tracing/logging.
+*
+*
+* Various notes:
+* - using a trace level variable to only call trace functions
+* when the level is sufficient
+* - using the same variable for tracing on/off to never make a function
+* call when off
+* - the function number is put into a local variable by the entry macro
+* and used implicitly to avoid copy&paste/typing mistakes by the developer
+* - the application must call utrace_setFunctions() and pass in
+* implementations for the trace functions
+* - ICU trace macros call ICU functions that route through the function
+* pointers if they have been set;
+* this avoids an indirection at the call site
+* (which would cost more code for another check and for the indirection)
+*
+* ### TODO Issues:
+* - Verify that va_list is portable among compilers for the same platform.
+* va_list should be portable because printf() would fail otherwise!
+* - Should enum values like UTraceLevel be passed into int32_t-type arguments,
+* or should enum types be used?
+*/
+
+#ifndef __UTRACIMP_H__
+#define __UTRACIMP_H__
+
+#include "unicode/utrace.h"
+#include <stdarg.h>
+
+U_CDECL_BEGIN
+
+/**
+ * Traced Function Exit return types.
+ * Flags indicating the number and types of varargs included in a call
+ * to a UTraceExit function.
+ * Bits 0-3: The function return type. First variable param.
+ * Bit 4: Flag for presence of U_ErrorCode status param.
+ * @internal
+ */
+typedef enum UTraceExitVal {
+ /** The traced function returns no value @internal */
+ UTRACE_EXITV_NONE = 0,
+ /** The traced function returns an int32_t, or compatible, type. @internal */
+ UTRACE_EXITV_I32 = 1,
+ /** The traced function returns a pointer @internal */
+ UTRACE_EXITV_PTR = 2,
+ /** The traced function returns a UBool @internal */
+ UTRACE_EXITV_BOOL = 3,
+ /** Mask to extract the return type values from a UTraceExitVal @internal */
+ UTRACE_EXITV_MASK = 0xf,
+ /** Bit indicating that the traced function includes a UErrorCode parameter @internal */
+ UTRACE_EXITV_STATUS = 0x10
+} UTraceExitVal;
+
+/**
+ * Trace function for the entry point of a function.
+ * Do not use directly, use UTRACE_ENTRY instead.
+ * @param fnNumber The UTraceFunctionNumber for the current function.
+ * @internal
+ */
+U_CAPI void U_EXPORT2
+utrace_entry(int32_t fnNumber);
+
+/**
+ * Trace function for each exit point of a function.
+ * Do not use directly, use UTRACE_EXIT* instead.
+ * @param fnNumber The UTraceFunctionNumber for the current function.
+ * @param returnType The type of the value returned by the function.
+ * @param errorCode The UErrorCode value at function exit. See UTRACE_EXIT.
+ * @internal
+ */
+U_CAPI void U_EXPORT2
+utrace_exit(int32_t fnNumber, int32_t returnType, ...);
+
+
+/**
+ * Trace function used inside functions that have a UTRACE_ENTRY() statement.
+ * Do not use directly, use UTRACE_DATAX() macros instead.
+ *
+ * @param utraceFnNumber The number of the current function, from the local
+ * variable of the same name.
+ * @param level The trace level for this message.
+ * @param fmt The trace format string.
+ *
+ * @internal
+ */
+U_CAPI void U_EXPORT2
+utrace_data(int32_t utraceFnNumber, int32_t level, const char *fmt, ...);
+
+U_CDECL_END
+
+#if U_ENABLE_TRACING
+
+/**
+ * Boolean expression to see if ICU tracing is turned on
+ * to at least the specified level.
+ * @internal
+ */
+#define UTRACE_LEVEL(level) (utrace_getLevel()>=(level))
+
+/**
+ * Flag bit in utraceFnNumber, the local variable added to each function
+ * with tracing code to contains the function number.
+ *
+ * Set the flag if the function's entry is traced, which will cause the
+ * function's exit to also be traced. utraceFnNumber is uncoditionally
+ * set at entry, whether or not the entry is traced, so that it will
+ * always be available for error trace output.
+ * @internal
+ */
+#define UTRACE_TRACED_ENTRY 0x80000000
+
+/**
+ * Trace statement for the entry point of a function.
+ * Stores the function number in a local variable.
+ * In C code, must be placed immediately after the last variable declaration.
+ * Must be matched with UTRACE_EXIT() at all function exit points.
+ *
+ * Tracing should start with UTRACE_ENTRY after checking for
+ * U_FAILURE at function entry, so that if a function returns immediately
+ * because of a pre-existing error condition, it does not show up in the trace,
+ * consistent with ICU's error handling model.
+ *
+ * @param fnNumber The UTraceFunctionNumber for the current function.
+ * @internal
+ */
+#define UTRACE_ENTRY(fnNumber) \
+ int32_t utraceFnNumber=(fnNumber); \
+UPRV_BLOCK_MACRO_BEGIN { \
+ if(utrace_getLevel()>=UTRACE_INFO) { \
+ utrace_entry(fnNumber); \
+ utraceFnNumber |= UTRACE_TRACED_ENTRY; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+
+/**
+ * Trace statement for the entry point of open and close functions.
+ * Produces trace output at a less verbose setting than plain UTRACE_ENTRY
+ * Stores the function number in a local variable.
+ * In C code, must be placed immediately after the last variable declaration.
+ * Must be matched with UTRACE_EXIT() at all function exit points.
+ *
+ * @param fnNumber The UTraceFunctionNumber for the current function.
+ * @internal
+ */
+#define UTRACE_ENTRY_OC(fnNumber) \
+ int32_t utraceFnNumber=(fnNumber); \
+UPRV_BLOCK_MACRO_BEGIN { \
+ if(utrace_getLevel()>=UTRACE_OPEN_CLOSE) { \
+ utrace_entry(fnNumber); \
+ utraceFnNumber |= UTRACE_TRACED_ENTRY; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Trace statement for each exit point of a function that has a UTRACE_ENTRY()
+ * statement.
+ *
+ * @param errorCode The function's ICU UErrorCode value at function exit,
+ * or U_ZERO_ERROR if the function does not use a UErrorCode.
+ * 0==U_ZERO_ERROR indicates success,
+ * positive values an error (see u_errorName()),
+ * negative values an informational status.
+ *
+ * @internal
+ */
+#define UTRACE_EXIT() UPRV_BLOCK_MACRO_BEGIN { \
+ if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \
+ utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, UTRACE_EXITV_NONE); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Trace statement for each exit point of a function that has a UTRACE_ENTRY()
+ * statement, and that returns a value.
+ *
+ * @param val The function's return value, int32_t or comatible type.
+ *
+ * @internal
+ */
+#define UTRACE_EXIT_VALUE(val) UPRV_BLOCK_MACRO_BEGIN { \
+ if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \
+ utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, UTRACE_EXITV_I32, val); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+#define UTRACE_EXIT_STATUS(status) UPRV_BLOCK_MACRO_BEGIN { \
+ if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \
+ utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, UTRACE_EXITV_STATUS, status); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+#define UTRACE_EXIT_VALUE_STATUS(val, status) UPRV_BLOCK_MACRO_BEGIN { \
+ if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \
+ utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (UTRACE_EXITV_I32 | UTRACE_EXITV_STATUS), val, status); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+#define UTRACE_EXIT_PTR_STATUS(ptr, status) UPRV_BLOCK_MACRO_BEGIN { \
+ if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \
+ utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (UTRACE_EXITV_PTR | UTRACE_EXITV_STATUS), ptr, status); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes no data arguments.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA0(level, fmt) UPRV_BLOCK_MACRO_BEGIN { \
+ if(UTRACE_LEVEL(level)) { \
+ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt)); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes one data argument.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA1(level, fmt, a) UPRV_BLOCK_MACRO_BEGIN { \
+ if(UTRACE_LEVEL(level)) { \
+ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY , (level), (fmt), (a)); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes two data arguments.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA2(level, fmt, a, b) UPRV_BLOCK_MACRO_BEGIN { \
+ if(UTRACE_LEVEL(level)) { \
+ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY , (level), (fmt), (a), (b)); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes three data arguments.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA3(level, fmt, a, b, c) UPRV_BLOCK_MACRO_BEGIN { \
+ if(UTRACE_LEVEL(level)) { \
+ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c)); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes four data arguments.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA4(level, fmt, a, b, c, d) UPRV_BLOCK_MACRO_BEGIN { \
+ if(UTRACE_LEVEL(level)) { \
+ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d)); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes five data arguments.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA5(level, fmt, a, b, c, d, e) UPRV_BLOCK_MACRO_BEGIN { \
+ if(UTRACE_LEVEL(level)) { \
+ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e)); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes six data arguments.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA6(level, fmt, a, b, c, d, e, f) UPRV_BLOCK_MACRO_BEGIN { \
+ if(UTRACE_LEVEL(level)) { \
+ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f)); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes seven data arguments.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA7(level, fmt, a, b, c, d, e, f, g) UPRV_BLOCK_MACRO_BEGIN { \
+ if(UTRACE_LEVEL(level)) { \
+ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f), (g)); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes eight data arguments.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA8(level, fmt, a, b, c, d, e, f, g, h) UPRV_BLOCK_MACRO_BEGIN { \
+ if(UTRACE_LEVEL(level)) { \
+ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f), (g), (h)); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Trace statement used inside functions that have a UTRACE_ENTRY() statement.
+ * Takes nine data arguments.
+ * The number of arguments for this macro must match the number of inserts
+ * in the format string. Vector inserts count as two arguments.
+ * Calls utrace_data() if the level is high enough.
+ * @internal
+ */
+#define UTRACE_DATA9(level, fmt, a, b, c, d, e, f, g, h, i) UPRV_BLOCK_MACRO_BEGIN { \
+ if(UTRACE_LEVEL(level)) { \
+ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f), (g), (h), (i)); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+#else
+
+/*
+ * When tracing is disabled, the following macros become empty
+ */
+
+#define UTRACE_LEVEL(level) 0
+#define UTRACE_ENTRY(fnNumber)
+#define UTRACE_ENTRY_OC(fnNumber)
+#define UTRACE_EXIT()
+#define UTRACE_EXIT_VALUE(val)
+#define UTRACE_EXIT_STATUS(status)
+#define UTRACE_EXIT_VALUE_STATUS(val, status)
+#define UTRACE_EXIT_PTR_STATUS(ptr, status)
+#define UTRACE_DATA0(level, fmt)
+#define UTRACE_DATA1(level, fmt, a)
+#define UTRACE_DATA2(level, fmt, a, b)
+#define UTRACE_DATA3(level, fmt, a, b, c)
+#define UTRACE_DATA4(level, fmt, a, b, c, d)
+#define UTRACE_DATA5(level, fmt, a, b, c, d, e)
+#define UTRACE_DATA6(level, fmt, a, b, c, d, e, f)
+#define UTRACE_DATA7(level, fmt, a, b, c, d, e, f, g)
+#define UTRACE_DATA8(level, fmt, a, b, c, d, e, f, g, h)
+#define UTRACE_DATA9(level, fmt, a, b, c, d, e, f, g, h, i)
+
+#endif
+
+#endif
diff --git a/thirdparty/icu4c/common/utrie.cpp b/thirdparty/icu4c/common/utrie.cpp
new file mode 100644
index 0000000000..ecf9b1cba7
--- /dev/null
+++ b/thirdparty/icu4c/common/utrie.cpp
@@ -0,0 +1,1234 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2001-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: utrie.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2001oct20
+* created by: Markus W. Scherer
+*
+* This is a common implementation of a "folded" trie.
+* It is a kind of compressed, serializable table of 16- or 32-bit values associated with
+* Unicode code points (0..0x10ffff).
+*/
+
+#ifdef UTRIE_DEBUG
+# include <stdio.h>
+#endif
+
+#include "unicode/utypes.h"
+#include "cmemory.h"
+#include "utrie.h"
+
+/* miscellaneous ------------------------------------------------------------ */
+
+#undef ABS
+#define ABS(x) ((x)>=0 ? (x) : -(x))
+
+static inline UBool
+equal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) {
+ while(length>0 && *s==*t) {
+ ++s;
+ ++t;
+ --length;
+ }
+ return (UBool)(length==0);
+}
+
+/* Building a trie ----------------------------------------------------------*/
+
+U_CAPI UNewTrie * U_EXPORT2
+utrie_open(UNewTrie *fillIn,
+ uint32_t *aliasData, int32_t maxDataLength,
+ uint32_t initialValue, uint32_t leadUnitValue,
+ UBool latin1Linear) {
+ UNewTrie *trie;
+ int32_t i, j;
+
+ if( maxDataLength<UTRIE_DATA_BLOCK_LENGTH ||
+ (latin1Linear && maxDataLength<1024)
+ ) {
+ return NULL;
+ }
+
+ if(fillIn!=NULL) {
+ trie=fillIn;
+ } else {
+ trie=(UNewTrie *)uprv_malloc(sizeof(UNewTrie));
+ if(trie==NULL) {
+ return NULL;
+ }
+ }
+ uprv_memset(trie, 0, sizeof(UNewTrie));
+ trie->isAllocated= (UBool)(fillIn==NULL);
+
+ if(aliasData!=NULL) {
+ trie->data=aliasData;
+ trie->isDataAllocated=FALSE;
+ } else {
+ trie->data=(uint32_t *)uprv_malloc(maxDataLength*4);
+ if(trie->data==NULL) {
+ uprv_free(trie);
+ return NULL;
+ }
+ trie->isDataAllocated=TRUE;
+ }
+
+ /* preallocate and reset the first data block (block index 0) */
+ j=UTRIE_DATA_BLOCK_LENGTH;
+
+ if(latin1Linear) {
+ /* preallocate and reset the first block (number 0) and Latin-1 (U+0000..U+00ff) after that */
+ /* made sure above that maxDataLength>=1024 */
+
+ /* set indexes to point to consecutive data blocks */
+ i=0;
+ do {
+ /* do this at least for trie->index[0] even if that block is only partly used for Latin-1 */
+ trie->index[i++]=j;
+ j+=UTRIE_DATA_BLOCK_LENGTH;
+ } while(i<(256>>UTRIE_SHIFT));
+ }
+
+ /* reset the initially allocated blocks to the initial value */
+ trie->dataLength=j;
+ while(j>0) {
+ trie->data[--j]=initialValue;
+ }
+
+ trie->leadUnitValue=leadUnitValue;
+ trie->indexLength=UTRIE_MAX_INDEX_LENGTH;
+ trie->dataCapacity=maxDataLength;
+ trie->isLatin1Linear=latin1Linear;
+ trie->isCompacted=FALSE;
+ return trie;
+}
+
+U_CAPI UNewTrie * U_EXPORT2
+utrie_clone(UNewTrie *fillIn, const UNewTrie *other, uint32_t *aliasData, int32_t aliasDataCapacity) {
+ UNewTrie *trie;
+ UBool isDataAllocated;
+
+ /* do not clone if other is not valid or already compacted */
+ if(other==NULL || other->data==NULL || other->isCompacted) {
+ return NULL;
+ }
+
+ /* clone data */
+ if(aliasData!=NULL && aliasDataCapacity>=other->dataCapacity) {
+ isDataAllocated=FALSE;
+ } else {
+ aliasDataCapacity=other->dataCapacity;
+ aliasData=(uint32_t *)uprv_malloc(other->dataCapacity*4);
+ if(aliasData==NULL) {
+ return NULL;
+ }
+ isDataAllocated=TRUE;
+ }
+
+ trie=utrie_open(fillIn, aliasData, aliasDataCapacity,
+ other->data[0], other->leadUnitValue,
+ other->isLatin1Linear);
+ if(trie==NULL) {
+ uprv_free(aliasData);
+ } else {
+ uprv_memcpy(trie->index, other->index, sizeof(trie->index));
+ uprv_memcpy(trie->data, other->data, (size_t)other->dataLength*4);
+ trie->dataLength=other->dataLength;
+ trie->isDataAllocated=isDataAllocated;
+ }
+
+ return trie;
+}
+
+U_CAPI void U_EXPORT2
+utrie_close(UNewTrie *trie) {
+ if(trie!=NULL) {
+ if(trie->isDataAllocated) {
+ uprv_free(trie->data);
+ trie->data=NULL;
+ }
+ if(trie->isAllocated) {
+ uprv_free(trie);
+ }
+ }
+}
+
+U_CAPI uint32_t * U_EXPORT2
+utrie_getData(UNewTrie *trie, int32_t *pLength) {
+ if(trie==NULL || pLength==NULL) {
+ return NULL;
+ }
+
+ *pLength=trie->dataLength;
+ return trie->data;
+}
+
+static int32_t
+utrie_allocDataBlock(UNewTrie *trie) {
+ int32_t newBlock, newTop;
+
+ newBlock=trie->dataLength;
+ newTop=newBlock+UTRIE_DATA_BLOCK_LENGTH;
+ if(newTop>trie->dataCapacity) {
+ /* out of memory in the data array */
+ return -1;
+ }
+ trie->dataLength=newTop;
+ return newBlock;
+}
+
+/**
+ * No error checking for illegal arguments.
+ *
+ * @return -1 if no new data block available (out of memory in data array)
+ * @internal
+ */
+static int32_t
+utrie_getDataBlock(UNewTrie *trie, UChar32 c) {
+ int32_t indexValue, newBlock;
+
+ c>>=UTRIE_SHIFT;
+ indexValue=trie->index[c];
+ if(indexValue>0) {
+ return indexValue;
+ }
+
+ /* allocate a new data block */
+ newBlock=utrie_allocDataBlock(trie);
+ if(newBlock<0) {
+ /* out of memory in the data array */
+ return -1;
+ }
+ trie->index[c]=newBlock;
+
+ /* copy-on-write for a block from a setRange() */
+ uprv_memcpy(trie->data+newBlock, trie->data-indexValue, 4*UTRIE_DATA_BLOCK_LENGTH);
+ return newBlock;
+}
+
+/**
+ * @return TRUE if the value was successfully set
+ */
+U_CAPI UBool U_EXPORT2
+utrie_set32(UNewTrie *trie, UChar32 c, uint32_t value) {
+ int32_t block;
+
+ /* valid, uncompacted trie and valid c? */
+ if(trie==NULL || trie->isCompacted || (uint32_t)c>0x10ffff) {
+ return FALSE;
+ }
+
+ block=utrie_getDataBlock(trie, c);
+ if(block<0) {
+ return FALSE;
+ }
+
+ trie->data[block+(c&UTRIE_MASK)]=value;
+ return TRUE;
+}
+
+U_CAPI uint32_t U_EXPORT2
+utrie_get32(UNewTrie *trie, UChar32 c, UBool *pInBlockZero) {
+ int32_t block;
+
+ /* valid, uncompacted trie and valid c? */
+ if(trie==NULL || trie->isCompacted || (uint32_t)c>0x10ffff) {
+ if(pInBlockZero!=NULL) {
+ *pInBlockZero=TRUE;
+ }
+ return 0;
+ }
+
+ block=trie->index[c>>UTRIE_SHIFT];
+ if(pInBlockZero!=NULL) {
+ *pInBlockZero= (UBool)(block==0);
+ }
+
+ return trie->data[ABS(block)+(c&UTRIE_MASK)];
+}
+
+/**
+ * @internal
+ */
+static void
+utrie_fillBlock(uint32_t *block, UChar32 start, UChar32 limit,
+ uint32_t value, uint32_t initialValue, UBool overwrite) {
+ uint32_t *pLimit;
+
+ pLimit=block+limit;
+ block+=start;
+ if(overwrite) {
+ while(block<pLimit) {
+ *block++=value;
+ }
+ } else {
+ while(block<pLimit) {
+ if(*block==initialValue) {
+ *block=value;
+ }
+ ++block;
+ }
+ }
+}
+
+U_CAPI UBool U_EXPORT2
+utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, UBool overwrite) {
+ /*
+ * repeat value in [start..limit[
+ * mark index values for repeat-data blocks by setting bit 31 of the index values
+ * fill around existing values if any, if(overwrite)
+ */
+ uint32_t initialValue;
+ int32_t block, rest, repeatBlock;
+
+ /* valid, uncompacted trie and valid indexes? */
+ if( trie==NULL || trie->isCompacted ||
+ (uint32_t)start>0x10ffff || (uint32_t)limit>0x110000 || start>limit
+ ) {
+ return FALSE;
+ }
+ if(start==limit) {
+ return TRUE; /* nothing to do */
+ }
+
+ initialValue=trie->data[0];
+ if(start&UTRIE_MASK) {
+ UChar32 nextStart;
+
+ /* set partial block at [start..following block boundary[ */
+ block=utrie_getDataBlock(trie, start);
+ if(block<0) {
+ return FALSE;
+ }
+
+ nextStart=(start+UTRIE_DATA_BLOCK_LENGTH)&~UTRIE_MASK;
+ if(nextStart<=limit) {
+ utrie_fillBlock(trie->data+block, start&UTRIE_MASK, UTRIE_DATA_BLOCK_LENGTH,
+ value, initialValue, overwrite);
+ start=nextStart;
+ } else {
+ utrie_fillBlock(trie->data+block, start&UTRIE_MASK, limit&UTRIE_MASK,
+ value, initialValue, overwrite);
+ return TRUE;
+ }
+ }
+
+ /* number of positions in the last, partial block */
+ rest=limit&UTRIE_MASK;
+
+ /* round down limit to a block boundary */
+ limit&=~UTRIE_MASK;
+
+ /* iterate over all-value blocks */
+ if(value==initialValue) {
+ repeatBlock=0;
+ } else {
+ repeatBlock=-1;
+ }
+ while(start<limit) {
+ /* get index value */
+ block=trie->index[start>>UTRIE_SHIFT];
+ if(block>0) {
+ /* already allocated, fill in value */
+ utrie_fillBlock(trie->data+block, 0, UTRIE_DATA_BLOCK_LENGTH, value, initialValue, overwrite);
+ } else if(trie->data[-block]!=value && (block==0 || overwrite)) {
+ /* set the repeatBlock instead of the current block 0 or range block */
+ if(repeatBlock>=0) {
+ trie->index[start>>UTRIE_SHIFT]=-repeatBlock;
+ } else {
+ /* create and set and fill the repeatBlock */
+ repeatBlock=utrie_getDataBlock(trie, start);
+ if(repeatBlock<0) {
+ return FALSE;
+ }
+
+ /* set the negative block number to indicate that it is a repeat block */
+ trie->index[start>>UTRIE_SHIFT]=-repeatBlock;
+ utrie_fillBlock(trie->data+repeatBlock, 0, UTRIE_DATA_BLOCK_LENGTH, value, initialValue, TRUE);
+ }
+ }
+
+ start+=UTRIE_DATA_BLOCK_LENGTH;
+ }
+
+ if(rest>0) {
+ /* set partial block at [last block boundary..limit[ */
+ block=utrie_getDataBlock(trie, start);
+ if(block<0) {
+ return FALSE;
+ }
+
+ utrie_fillBlock(trie->data+block, 0, rest, value, initialValue, overwrite);
+ }
+
+ return TRUE;
+}
+
+static int32_t
+_findSameIndexBlock(const int32_t *idx, int32_t indexLength,
+ int32_t otherBlock) {
+ int32_t block, i;
+
+ for(block=UTRIE_BMP_INDEX_LENGTH; block<indexLength; block+=UTRIE_SURROGATE_BLOCK_COUNT) {
+ for(i=0; i<UTRIE_SURROGATE_BLOCK_COUNT; ++i) {
+ if(idx[block+i]!=idx[otherBlock+i]) {
+ break;
+ }
+ }
+ if(i==UTRIE_SURROGATE_BLOCK_COUNT) {
+ return block;
+ }
+ }
+ return indexLength;
+}
+
+/*
+ * Fold the normalization data for supplementary code points into
+ * a compact area on top of the BMP-part of the trie index,
+ * with the lead surrogates indexing this compact area.
+ *
+ * Duplicate the index values for lead surrogates:
+ * From inside the BMP area, where some may be overridden with folded values,
+ * to just after the BMP area, where they can be retrieved for
+ * code point lookups.
+ */
+static void
+utrie_fold(UNewTrie *trie, UNewTrieGetFoldedValue *getFoldedValue, UErrorCode *pErrorCode) {
+ int32_t leadIndexes[UTRIE_SURROGATE_BLOCK_COUNT];
+ int32_t *idx;
+ uint32_t value;
+ UChar32 c;
+ int32_t indexLength, block;
+#ifdef UTRIE_DEBUG
+ int countLeadCUWithData=0;
+#endif
+
+ idx=trie->index;
+
+ /* copy the lead surrogate indexes into a temporary array */
+ uprv_memcpy(leadIndexes, idx+(0xd800>>UTRIE_SHIFT), 4*UTRIE_SURROGATE_BLOCK_COUNT);
+
+ /*
+ * set all values for lead surrogate code *units* to leadUnitValue
+ * so that, by default, runtime lookups will find no data for associated
+ * supplementary code points, unless there is data for such code points
+ * which will result in a non-zero folding value below that is set for
+ * the respective lead units
+ *
+ * the above saved the indexes for surrogate code *points*
+ * fill the indexes with simplified code from utrie_setRange32()
+ */
+ if(trie->leadUnitValue==trie->data[0]) {
+ block=0; /* leadUnitValue==initialValue, use all-initial-value block */
+ } else {
+ /* create and fill the repeatBlock */
+ block=utrie_allocDataBlock(trie);
+ if(block<0) {
+ /* data table overflow */
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ utrie_fillBlock(trie->data+block, 0, UTRIE_DATA_BLOCK_LENGTH, trie->leadUnitValue, trie->data[0], TRUE);
+ block=-block; /* negative block number to indicate that it is a repeat block */
+ }
+ for(c=(0xd800>>UTRIE_SHIFT); c<(0xdc00>>UTRIE_SHIFT); ++c) {
+ trie->index[c]=block;
+ }
+
+ /*
+ * Fold significant index values into the area just after the BMP indexes.
+ * In case the first lead surrogate has significant data,
+ * its index block must be used first (in which case the folding is a no-op).
+ * Later all folded index blocks are moved up one to insert the copied
+ * lead surrogate indexes.
+ */
+ indexLength=UTRIE_BMP_INDEX_LENGTH;
+
+ /* search for any index (stage 1) entries for supplementary code points */
+ for(c=0x10000; c<0x110000;) {
+ if(idx[c>>UTRIE_SHIFT]!=0) {
+ /* there is data, treat the full block for a lead surrogate */
+ c&=~0x3ff;
+
+#ifdef UTRIE_DEBUG
+ ++countLeadCUWithData;
+ /* printf("supplementary data for lead surrogate U+%04lx\n", (long)(0xd7c0+(c>>10))); */
+#endif
+
+ /* is there an identical index block? */
+ block=_findSameIndexBlock(idx, indexLength, c>>UTRIE_SHIFT);
+
+ /*
+ * get a folded value for [c..c+0x400[ and,
+ * if different from the value for the lead surrogate code point,
+ * set it for the lead surrogate code unit
+ */
+ value=getFoldedValue(trie, c, block+UTRIE_SURROGATE_BLOCK_COUNT);
+ if(value!=utrie_get32(trie, U16_LEAD(c), NULL)) {
+ if(!utrie_set32(trie, U16_LEAD(c), value)) {
+ /* data table overflow */
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ /* if we did not find an identical index block... */
+ if(block==indexLength) {
+ /* move the actual index (stage 1) entries from the supplementary position to the new one */
+ uprv_memmove(idx+indexLength,
+ idx+(c>>UTRIE_SHIFT),
+ 4*UTRIE_SURROGATE_BLOCK_COUNT);
+ indexLength+=UTRIE_SURROGATE_BLOCK_COUNT;
+ }
+ }
+ c+=0x400;
+ } else {
+ c+=UTRIE_DATA_BLOCK_LENGTH;
+ }
+ }
+#ifdef UTRIE_DEBUG
+ if(countLeadCUWithData>0) {
+ printf("supplementary data for %d lead surrogates\n", countLeadCUWithData);
+ }
+#endif
+
+ /*
+ * index array overflow?
+ * This is to guarantee that a folding offset is of the form
+ * UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023.
+ * If the index is too large, then n>=1024 and more than 10 bits are necessary.
+ *
+ * In fact, it can only ever become n==1024 with completely unfoldable data and
+ * the additional block of duplicated values for lead surrogates.
+ */
+ if(indexLength>=UTRIE_MAX_INDEX_LENGTH) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return;
+ }
+
+ /*
+ * make space for the lead surrogate index block and
+ * insert it between the BMP indexes and the folded ones
+ */
+ uprv_memmove(idx+UTRIE_BMP_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT,
+ idx+UTRIE_BMP_INDEX_LENGTH,
+ 4*(indexLength-UTRIE_BMP_INDEX_LENGTH));
+ uprv_memcpy(idx+UTRIE_BMP_INDEX_LENGTH,
+ leadIndexes,
+ 4*UTRIE_SURROGATE_BLOCK_COUNT);
+ indexLength+=UTRIE_SURROGATE_BLOCK_COUNT;
+
+#ifdef UTRIE_DEBUG
+ printf("trie index count: BMP %ld all Unicode %ld folded %ld\n",
+ UTRIE_BMP_INDEX_LENGTH, (long)UTRIE_MAX_INDEX_LENGTH, indexLength);
+#endif
+
+ trie->indexLength=indexLength;
+}
+
+/*
+ * Set a value in the trie index map to indicate which data block
+ * is referenced and which one is not.
+ * utrie_compact() will remove data blocks that are not used at all.
+ * Set
+ * - 0 if it is used
+ * - -1 if it is not used
+ */
+static void
+_findUnusedBlocks(UNewTrie *trie) {
+ int32_t i;
+
+ /* fill the entire map with "not used" */
+ uprv_memset(trie->map, 0xff, (UTRIE_MAX_BUILD_TIME_DATA_LENGTH>>UTRIE_SHIFT)*4);
+
+ /* mark each block that _is_ used with 0 */
+ for(i=0; i<trie->indexLength; ++i) {
+ trie->map[ABS(trie->index[i])>>UTRIE_SHIFT]=0;
+ }
+
+ /* never move the all-initial-value block 0 */
+ trie->map[0]=0;
+}
+
+static int32_t
+_findSameDataBlock(const uint32_t *data, int32_t dataLength,
+ int32_t otherBlock, int32_t step) {
+ int32_t block;
+
+ /* ensure that we do not even partially get past dataLength */
+ dataLength-=UTRIE_DATA_BLOCK_LENGTH;
+
+ for(block=0; block<=dataLength; block+=step) {
+ if(equal_uint32(data+block, data+otherBlock, UTRIE_DATA_BLOCK_LENGTH)) {
+ return block;
+ }
+ }
+ return -1;
+}
+
+/*
+ * Compact a folded build-time trie.
+ *
+ * The compaction
+ * - removes blocks that are identical with earlier ones
+ * - overlaps adjacent blocks as much as possible (if overlap==TRUE)
+ * - moves blocks in steps of the data granularity
+ * - moves and overlaps blocks that overlap with multiple values in the overlap region
+ *
+ * It does not
+ * - try to move and overlap blocks that are not already adjacent
+ */
+static void
+utrie_compact(UNewTrie *trie, UBool overlap, UErrorCode *pErrorCode) {
+ int32_t i, start, newStart, overlapStart;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return;
+ }
+
+ /* valid, uncompacted trie? */
+ if(trie==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ if(trie->isCompacted) {
+ return; /* nothing left to do */
+ }
+
+ /* compaction */
+
+ /* initialize the index map with "block is used/unused" flags */
+ _findUnusedBlocks(trie);
+
+ /* if Latin-1 is preallocated and linear, then do not compact Latin-1 data */
+ if(trie->isLatin1Linear && UTRIE_SHIFT<=8) {
+ overlapStart=UTRIE_DATA_BLOCK_LENGTH+256;
+ } else {
+ overlapStart=UTRIE_DATA_BLOCK_LENGTH;
+ }
+
+ newStart=UTRIE_DATA_BLOCK_LENGTH;
+ for(start=newStart; start<trie->dataLength;) {
+ /*
+ * start: index of first entry of current block
+ * newStart: index where the current block is to be moved
+ * (right after current end of already-compacted data)
+ */
+
+ /* skip blocks that are not used */
+ if(trie->map[start>>UTRIE_SHIFT]<0) {
+ /* advance start to the next block */
+ start+=UTRIE_DATA_BLOCK_LENGTH;
+
+ /* leave newStart with the previous block! */
+ continue;
+ }
+
+ /* search for an identical block */
+ if( start>=overlapStart &&
+ (i=_findSameDataBlock(trie->data, newStart, start,
+ overlap ? UTRIE_DATA_GRANULARITY : UTRIE_DATA_BLOCK_LENGTH))
+ >=0
+ ) {
+ /* found an identical block, set the other block's index value for the current block */
+ trie->map[start>>UTRIE_SHIFT]=i;
+
+ /* advance start to the next block */
+ start+=UTRIE_DATA_BLOCK_LENGTH;
+
+ /* leave newStart with the previous block! */
+ continue;
+ }
+
+ /* see if the beginning of this block can be overlapped with the end of the previous block */
+ if(overlap && start>=overlapStart) {
+ /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
+ for(i=UTRIE_DATA_BLOCK_LENGTH-UTRIE_DATA_GRANULARITY;
+ i>0 && !equal_uint32(trie->data+(newStart-i), trie->data+start, i);
+ i-=UTRIE_DATA_GRANULARITY) {}
+ } else {
+ i=0;
+ }
+
+ if(i>0) {
+ /* some overlap */
+ trie->map[start>>UTRIE_SHIFT]=newStart-i;
+
+ /* move the non-overlapping indexes to their new positions */
+ start+=i;
+ for(i=UTRIE_DATA_BLOCK_LENGTH-i; i>0; --i) {
+ trie->data[newStart++]=trie->data[start++];
+ }
+ } else if(newStart<start) {
+ /* no overlap, just move the indexes to their new positions */
+ trie->map[start>>UTRIE_SHIFT]=newStart;
+ for(i=UTRIE_DATA_BLOCK_LENGTH; i>0; --i) {
+ trie->data[newStart++]=trie->data[start++];
+ }
+ } else /* no overlap && newStart==start */ {
+ trie->map[start>>UTRIE_SHIFT]=start;
+ newStart+=UTRIE_DATA_BLOCK_LENGTH;
+ start=newStart;
+ }
+ }
+
+ /* now adjust the index (stage 1) table */
+ for(i=0; i<trie->indexLength; ++i) {
+ trie->index[i]=trie->map[ABS(trie->index[i])>>UTRIE_SHIFT];
+ }
+
+#ifdef UTRIE_DEBUG
+ /* we saved some space */
+ printf("compacting trie: count of 32-bit words %lu->%lu\n",
+ (long)trie->dataLength, (long)newStart);
+#endif
+
+ trie->dataLength=newStart;
+}
+
+/* serialization ------------------------------------------------------------ */
+
+/*
+ * Default function for the folding value:
+ * Just store the offset (16 bits) if there is any non-initial-value entry.
+ *
+ * The offset parameter is never 0.
+ * Returning the offset itself is safe for UTRIE_SHIFT>=5 because
+ * for UTRIE_SHIFT==5 the maximum index length is UTRIE_MAX_INDEX_LENGTH==0x8800
+ * which fits into 16-bit trie values;
+ * for higher UTRIE_SHIFT, UTRIE_MAX_INDEX_LENGTH decreases.
+ *
+ * Theoretically, it would be safer for all possible UTRIE_SHIFT including
+ * those of 4 and lower to return offset>>UTRIE_SURROGATE_BLOCK_BITS
+ * which would always result in a value of 0x40..0x43f
+ * (start/end 1k blocks of supplementary Unicode code points).
+ * However, this would be uglier, and would not work for some existing
+ * binary data file formats.
+ *
+ * Also, we do not plan to change UTRIE_SHIFT because it would change binary
+ * data file formats, and we would probably not make it smaller because of
+ * the then even larger BMP index length even for empty tries.
+ */
+static uint32_t U_CALLCONV
+defaultGetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) {
+ uint32_t value, initialValue;
+ UChar32 limit;
+ UBool inBlockZero;
+
+ initialValue=trie->data[0];
+ limit=start+0x400;
+ while(start<limit) {
+ value=utrie_get32(trie, start, &inBlockZero);
+ if(inBlockZero) {
+ start+=UTRIE_DATA_BLOCK_LENGTH;
+ } else if(value!=initialValue) {
+ return (uint32_t)offset;
+ } else {
+ ++start;
+ }
+ }
+ return 0;
+}
+
+U_CAPI int32_t U_EXPORT2
+utrie_serialize(UNewTrie *trie, void *dt, int32_t capacity,
+ UNewTrieGetFoldedValue *getFoldedValue,
+ UBool reduceTo16Bits,
+ UErrorCode *pErrorCode) {
+ UTrieHeader *header;
+ uint32_t *p;
+ uint16_t *dest16;
+ int32_t i, length;
+ uint8_t* data = NULL;
+
+ /* argument check */
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ if(trie==NULL || capacity<0 || (capacity>0 && dt==NULL)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ if(getFoldedValue==NULL) {
+ getFoldedValue=defaultGetFoldedValue;
+ }
+
+ data = (uint8_t*)dt;
+ /* fold and compact if necessary, also checks that indexLength is within limits */
+ if(!trie->isCompacted) {
+ /* compact once without overlap to improve folding */
+ utrie_compact(trie, FALSE, pErrorCode);
+
+ /* fold the supplementary part of the index array */
+ utrie_fold(trie, getFoldedValue, pErrorCode);
+
+ /* compact again with overlap for minimum data array length */
+ utrie_compact(trie, TRUE, pErrorCode);
+
+ trie->isCompacted=TRUE;
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ }
+
+ /* is dataLength within limits? */
+ if( (reduceTo16Bits ? (trie->dataLength+trie->indexLength) : trie->dataLength) >= UTRIE_MAX_DATA_LENGTH) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ }
+
+ length=sizeof(UTrieHeader)+2*trie->indexLength;
+ if(reduceTo16Bits) {
+ length+=2*trie->dataLength;
+ } else {
+ length+=4*trie->dataLength;
+ }
+
+ if(length>capacity) {
+ return length; /* preflighting */
+ }
+
+#ifdef UTRIE_DEBUG
+ printf("**UTrieLengths(serialize)** index:%6ld data:%6ld serialized:%6ld\n",
+ (long)trie->indexLength, (long)trie->dataLength, (long)length);
+#endif
+
+ /* set the header fields */
+ header=(UTrieHeader *)data;
+ data+=sizeof(UTrieHeader);
+
+ header->signature=0x54726965; /* "Trie" */
+ header->options=UTRIE_SHIFT | (UTRIE_INDEX_SHIFT<<UTRIE_OPTIONS_INDEX_SHIFT);
+
+ if(!reduceTo16Bits) {
+ header->options|=UTRIE_OPTIONS_DATA_IS_32_BIT;
+ }
+ if(trie->isLatin1Linear) {
+ header->options|=UTRIE_OPTIONS_LATIN1_IS_LINEAR;
+ }
+
+ header->indexLength=trie->indexLength;
+ header->dataLength=trie->dataLength;
+
+ /* write the index (stage 1) array and the 16/32-bit data (stage 2) array */
+ if(reduceTo16Bits) {
+ /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT, after adding indexLength */
+ p=(uint32_t *)trie->index;
+ dest16=(uint16_t *)data;
+ for(i=trie->indexLength; i>0; --i) {
+ *dest16++=(uint16_t)((*p++ + trie->indexLength)>>UTRIE_INDEX_SHIFT);
+ }
+
+ /* write 16-bit data values */
+ p=trie->data;
+ for(i=trie->dataLength; i>0; --i) {
+ *dest16++=(uint16_t)*p++;
+ }
+ } else {
+ /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT */
+ p=(uint32_t *)trie->index;
+ dest16=(uint16_t *)data;
+ for(i=trie->indexLength; i>0; --i) {
+ *dest16++=(uint16_t)(*p++ >> UTRIE_INDEX_SHIFT);
+ }
+
+ /* write 32-bit data values */
+ uprv_memcpy(dest16, trie->data, 4*(size_t)trie->dataLength);
+ }
+
+ return length;
+}
+
+/* inverse to defaultGetFoldedValue() */
+U_CAPI int32_t U_EXPORT2
+utrie_defaultGetFoldingOffset(uint32_t data) {
+ return (int32_t)data;
+}
+
+U_CAPI int32_t U_EXPORT2
+utrie_unserialize(UTrie *trie, const void *data, int32_t length, UErrorCode *pErrorCode) {
+ const UTrieHeader *header;
+ const uint16_t *p16;
+ uint32_t options;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return -1;
+ }
+
+ /* enough data for a trie header? */
+ if(length<(int32_t)sizeof(UTrieHeader)) {
+ *pErrorCode=U_INVALID_FORMAT_ERROR;
+ return -1;
+ }
+
+ /* check the signature */
+ header=(const UTrieHeader *)data;
+ if(header->signature!=0x54726965) {
+ *pErrorCode=U_INVALID_FORMAT_ERROR;
+ return -1;
+ }
+
+ /* get the options and check the shift values */
+ options=header->options;
+ if( (options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT ||
+ ((options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT
+ ) {
+ *pErrorCode=U_INVALID_FORMAT_ERROR;
+ return -1;
+ }
+ trie->isLatin1Linear= (UBool)((options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0);
+
+ /* get the length values */
+ trie->indexLength=header->indexLength;
+ trie->dataLength=header->dataLength;
+
+ length-=(int32_t)sizeof(UTrieHeader);
+
+ /* enough data for the index? */
+ if(length<2*trie->indexLength) {
+ *pErrorCode=U_INVALID_FORMAT_ERROR;
+ return -1;
+ }
+ p16=(const uint16_t *)(header+1);
+ trie->index=p16;
+ p16+=trie->indexLength;
+ length-=2*trie->indexLength;
+
+ /* get the data */
+ if(options&UTRIE_OPTIONS_DATA_IS_32_BIT) {
+ if(length<4*trie->dataLength) {
+ *pErrorCode=U_INVALID_FORMAT_ERROR;
+ return -1;
+ }
+ trie->data32=(const uint32_t *)p16;
+ trie->initialValue=trie->data32[0];
+ length=(int32_t)sizeof(UTrieHeader)+2*trie->indexLength+4*trie->dataLength;
+ } else {
+ if(length<2*trie->dataLength) {
+ *pErrorCode=U_INVALID_FORMAT_ERROR;
+ return -1;
+ }
+
+ /* the "data16" data is used via the index pointer */
+ trie->data32=NULL;
+ trie->initialValue=trie->index[trie->indexLength];
+ length=(int32_t)sizeof(UTrieHeader)+2*trie->indexLength+2*trie->dataLength;
+ }
+
+ trie->getFoldingOffset=utrie_defaultGetFoldingOffset;
+
+ return length;
+}
+
+U_CAPI int32_t U_EXPORT2
+utrie_unserializeDummy(UTrie *trie,
+ void *data, int32_t length,
+ uint32_t initialValue, uint32_t leadUnitValue,
+ UBool make16BitTrie,
+ UErrorCode *pErrorCode) {
+ uint16_t *p16;
+ int32_t actualLength, latin1Length, i, limit;
+ uint16_t block;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return -1;
+ }
+
+ /* calculate the actual size of the dummy trie data */
+
+ /* max(Latin-1, block 0) */
+ latin1Length= 256; /*UTRIE_SHIFT<=8 ? 256 : UTRIE_DATA_BLOCK_LENGTH;*/
+
+ trie->indexLength=UTRIE_BMP_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT;
+ trie->dataLength=latin1Length;
+ if(leadUnitValue!=initialValue) {
+ trie->dataLength+=UTRIE_DATA_BLOCK_LENGTH;
+ }
+
+ actualLength=trie->indexLength*2;
+ if(make16BitTrie) {
+ actualLength+=trie->dataLength*2;
+ } else {
+ actualLength+=trie->dataLength*4;
+ }
+
+ /* enough space for the dummy trie? */
+ if(length<actualLength) {
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ return actualLength;
+ }
+
+ trie->isLatin1Linear=TRUE;
+ trie->initialValue=initialValue;
+
+ /* fill the index and data arrays */
+ p16=(uint16_t *)data;
+ trie->index=p16;
+
+ if(make16BitTrie) {
+ /* indexes to block 0 */
+ block=(uint16_t)(trie->indexLength>>UTRIE_INDEX_SHIFT);
+ limit=trie->indexLength;
+ for(i=0; i<limit; ++i) {
+ p16[i]=block;
+ }
+
+ if(leadUnitValue!=initialValue) {
+ /* indexes for lead surrogate code units to the block after Latin-1 */
+ block+=(uint16_t)(latin1Length>>UTRIE_INDEX_SHIFT);
+ i=0xd800>>UTRIE_SHIFT;
+ limit=0xdc00>>UTRIE_SHIFT;
+ for(; i<limit; ++i) {
+ p16[i]=block;
+ }
+ }
+
+ trie->data32=NULL;
+
+ /* Latin-1 data */
+ p16+=trie->indexLength;
+ for(i=0; i<latin1Length; ++i) {
+ p16[i]=(uint16_t)initialValue;
+ }
+
+ /* data for lead surrogate code units */
+ if(leadUnitValue!=initialValue) {
+ limit=latin1Length+UTRIE_DATA_BLOCK_LENGTH;
+ for(/* i=latin1Length */; i<limit; ++i) {
+ p16[i]=(uint16_t)leadUnitValue;
+ }
+ }
+ } else {
+ uint32_t *p32;
+
+ /* indexes to block 0 */
+ uprv_memset(p16, 0, trie->indexLength*2);
+
+ if(leadUnitValue!=initialValue) {
+ /* indexes for lead surrogate code units to the block after Latin-1 */
+ block=(uint16_t)(latin1Length>>UTRIE_INDEX_SHIFT);
+ i=0xd800>>UTRIE_SHIFT;
+ limit=0xdc00>>UTRIE_SHIFT;
+ for(; i<limit; ++i) {
+ p16[i]=block;
+ }
+ }
+
+ trie->data32=p32=(uint32_t *)(p16+trie->indexLength);
+
+ /* Latin-1 data */
+ for(i=0; i<latin1Length; ++i) {
+ p32[i]=initialValue;
+ }
+
+ /* data for lead surrogate code units */
+ if(leadUnitValue!=initialValue) {
+ limit=latin1Length+UTRIE_DATA_BLOCK_LENGTH;
+ for(/* i=latin1Length */; i<limit; ++i) {
+ p32[i]=leadUnitValue;
+ }
+ }
+ }
+
+ trie->getFoldingOffset=utrie_defaultGetFoldingOffset;
+
+ return actualLength;
+}
+
+/* enumeration -------------------------------------------------------------- */
+
+/* default UTrieEnumValue() returns the input value itself */
+static uint32_t U_CALLCONV
+enumSameValue(const void * /*context*/, uint32_t value) {
+ return value;
+}
+
+/**
+ * Enumerate all ranges of code points with the same relevant values.
+ * The values are transformed from the raw trie entries by the enumValue function.
+ */
+U_CAPI void U_EXPORT2
+utrie_enum(const UTrie *trie,
+ UTrieEnumValue *enumValue, UTrieEnumRange *enumRange, const void *context) {
+ const uint32_t *data32;
+ const uint16_t *idx;
+
+ uint32_t value, prevValue, initialValue;
+ UChar32 c, prev;
+ int32_t l, i, j, block, prevBlock, nullBlock, offset;
+
+ /* check arguments */
+ if(trie==NULL || trie->index==NULL || enumRange==NULL) {
+ return;
+ }
+ if(enumValue==NULL) {
+ enumValue=enumSameValue;
+ }
+
+ idx=trie->index;
+ data32=trie->data32;
+
+ /* get the enumeration value that corresponds to an initial-value trie data entry */
+ initialValue=enumValue(context, trie->initialValue);
+
+ if(data32==NULL) {
+ nullBlock=trie->indexLength;
+ } else {
+ nullBlock=0;
+ }
+
+ /* set variables for previous range */
+ prevBlock=nullBlock;
+ prev=0;
+ prevValue=initialValue;
+
+ /* enumerate BMP - the main loop enumerates data blocks */
+ for(i=0, c=0; c<=0xffff; ++i) {
+ if(c==0xd800) {
+ /* skip lead surrogate code _units_, go to lead surr. code _points_ */
+ i=UTRIE_BMP_INDEX_LENGTH;
+ } else if(c==0xdc00) {
+ /* go back to regular BMP code points */
+ i=c>>UTRIE_SHIFT;
+ }
+
+ block=idx[i]<<UTRIE_INDEX_SHIFT;
+ if(block==prevBlock) {
+ /* the block is the same as the previous one, and filled with value */
+ c+=UTRIE_DATA_BLOCK_LENGTH;
+ } else if(block==nullBlock) {
+ /* this is the all-initial-value block */
+ if(prevValue!=initialValue) {
+ if(prev<c) {
+ if(!enumRange(context, prev, c, prevValue)) {
+ return;
+ }
+ }
+ prevBlock=nullBlock;
+ prev=c;
+ prevValue=initialValue;
+ }
+ c+=UTRIE_DATA_BLOCK_LENGTH;
+ } else {
+ prevBlock=block;
+ for(j=0; j<UTRIE_DATA_BLOCK_LENGTH; ++j) {
+ value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]);
+ if(value!=prevValue) {
+ if(prev<c) {
+ if(!enumRange(context, prev, c, prevValue)) {
+ return;
+ }
+ }
+ if(j>0) {
+ /* the block is not filled with all the same value */
+ prevBlock=-1;
+ }
+ prev=c;
+ prevValue=value;
+ }
+ ++c;
+ }
+ }
+ }
+
+ /* enumerate supplementary code points */
+ for(l=0xd800; l<0xdc00;) {
+ /* lead surrogate access */
+ offset=idx[l>>UTRIE_SHIFT]<<UTRIE_INDEX_SHIFT;
+ if(offset==nullBlock) {
+ /* no entries for a whole block of lead surrogates */
+ if(prevValue!=initialValue) {
+ if(prev<c) {
+ if(!enumRange(context, prev, c, prevValue)) {
+ return;
+ }
+ }
+ prevBlock=nullBlock;
+ prev=c;
+ prevValue=initialValue;
+ }
+
+ l+=UTRIE_DATA_BLOCK_LENGTH;
+ c+=UTRIE_DATA_BLOCK_LENGTH<<10;
+ continue;
+ }
+
+ value= data32!=NULL ? data32[offset+(l&UTRIE_MASK)] : idx[offset+(l&UTRIE_MASK)];
+
+ /* enumerate trail surrogates for this lead surrogate */
+ offset=trie->getFoldingOffset(value);
+ if(offset<=0) {
+ /* no data for this lead surrogate */
+ if(prevValue!=initialValue) {
+ if(prev<c) {
+ if(!enumRange(context, prev, c, prevValue)) {
+ return;
+ }
+ }
+ prevBlock=nullBlock;
+ prev=c;
+ prevValue=initialValue;
+ }
+
+ /* nothing else to do for the supplementary code points for this lead surrogate */
+ c+=0x400;
+ } else {
+ /* enumerate code points for this lead surrogate */
+ i=offset;
+ offset+=UTRIE_SURROGATE_BLOCK_COUNT;
+ do {
+ /* copy of most of the body of the BMP loop */
+ block=idx[i]<<UTRIE_INDEX_SHIFT;
+ if(block==prevBlock) {
+ /* the block is the same as the previous one, and filled with value */
+ c+=UTRIE_DATA_BLOCK_LENGTH;
+ } else if(block==nullBlock) {
+ /* this is the all-initial-value block */
+ if(prevValue!=initialValue) {
+ if(prev<c) {
+ if(!enumRange(context, prev, c, prevValue)) {
+ return;
+ }
+ }
+ prevBlock=nullBlock;
+ prev=c;
+ prevValue=initialValue;
+ }
+ c+=UTRIE_DATA_BLOCK_LENGTH;
+ } else {
+ prevBlock=block;
+ for(j=0; j<UTRIE_DATA_BLOCK_LENGTH; ++j) {
+ value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]);
+ if(value!=prevValue) {
+ if(prev<c) {
+ if(!enumRange(context, prev, c, prevValue)) {
+ return;
+ }
+ }
+ if(j>0) {
+ /* the block is not filled with all the same value */
+ prevBlock=-1;
+ }
+ prev=c;
+ prevValue=value;
+ }
+ ++c;
+ }
+ }
+ } while(++i<offset);
+ }
+
+ ++l;
+ }
+
+ /* deliver last range */
+ enumRange(context, prev, c, prevValue);
+}
diff --git a/thirdparty/icu4c/common/utrie.h b/thirdparty/icu4c/common/utrie.h
new file mode 100644
index 0000000000..2fd2c461ff
--- /dev/null
+++ b/thirdparty/icu4c/common/utrie.h
@@ -0,0 +1,793 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2001-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: utrie.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2001nov08
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UTRIE_H__
+#define __UTRIE_H__
+
+#include "unicode/utypes.h"
+#include "unicode/utf16.h"
+
+U_CDECL_BEGIN
+
+/**
+ * \file
+ *
+ * This is a common implementation of a "folded" trie.
+ * It is a kind of compressed, serializable table of 16- or 32-bit values associated with
+ * Unicode code points (0..0x10ffff).
+ *
+ * This implementation is optimized for getting values while walking forward
+ * through a UTF-16 string.
+ * Therefore, the simplest and fastest access macros are the
+ * _FROM_LEAD() and _FROM_OFFSET_TRAIL() macros.
+ *
+ * The _FROM_BMP() macros are a little more complicated; they get values
+ * even for lead surrogate code _points_, while the _FROM_LEAD() macros
+ * get special "folded" values for lead surrogate code _units_ if
+ * there is relevant data associated with them.
+ * From such a folded value, an offset needs to be extracted to supply
+ * to the _FROM_OFFSET_TRAIL() macros.
+ *
+ * Most of the more complex (and more convenient) functions/macros call a callback function
+ * to get that offset from the folded value for a lead surrogate unit.
+ */
+
+/**
+ * Trie constants, defining shift widths, index array lengths, etc.
+ */
+enum {
+ /** Shift size for shifting right the input index. 1..9 */
+ UTRIE_SHIFT=5,
+
+ /** Number of data values in a stage 2 (data array) block. 2, 4, 8, .., 0x200 */
+ UTRIE_DATA_BLOCK_LENGTH=1<<UTRIE_SHIFT,
+
+ /** Mask for getting the lower bits from the input index. */
+ UTRIE_MASK=UTRIE_DATA_BLOCK_LENGTH-1,
+
+ /**
+ * Lead surrogate code points' index displacement in the index array.
+ * 0x10000-0xd800=0x2800
+ */
+ UTRIE_LEAD_INDEX_DISP=0x2800>>UTRIE_SHIFT,
+
+ /**
+ * Shift size for shifting left the index array values.
+ * Increases possible data size with 16-bit index values at the cost
+ * of compactability.
+ * This requires blocks of stage 2 data to be aligned by UTRIE_DATA_GRANULARITY.
+ * 0..UTRIE_SHIFT
+ */
+ UTRIE_INDEX_SHIFT=2,
+
+ /** The alignment size of a stage 2 data block. Also the granularity for compaction. */
+ UTRIE_DATA_GRANULARITY=1<<UTRIE_INDEX_SHIFT,
+
+ /** Number of bits of a trail surrogate that are used in index table lookups. */
+ UTRIE_SURROGATE_BLOCK_BITS=10-UTRIE_SHIFT,
+
+ /**
+ * Number of index (stage 1) entries per lead surrogate.
+ * Same as number of index entries for 1024 trail surrogates,
+ * ==0x400>>UTRIE_SHIFT
+ */
+ UTRIE_SURROGATE_BLOCK_COUNT=(1<<UTRIE_SURROGATE_BLOCK_BITS),
+
+ /** Length of the BMP portion of the index (stage 1) array. */
+ UTRIE_BMP_INDEX_LENGTH=0x10000>>UTRIE_SHIFT
+};
+
+/**
+ * Length of the index (stage 1) array before folding.
+ * Maximum number of Unicode code points (0x110000) shifted right by UTRIE_SHIFT.
+ */
+#define UTRIE_MAX_INDEX_LENGTH (0x110000>>UTRIE_SHIFT)
+
+/**
+ * Maximum length of the runtime data (stage 2) array.
+ * Limited by 16-bit index values that are left-shifted by UTRIE_INDEX_SHIFT.
+ */
+#define UTRIE_MAX_DATA_LENGTH (0x10000<<UTRIE_INDEX_SHIFT)
+
+/**
+ * Maximum length of the build-time data (stage 2) array.
+ * The maximum length is 0x110000+UTRIE_DATA_BLOCK_LENGTH+0x400.
+ * (Number of Unicode code points + one all-initial-value block +
+ * possible duplicate entries for 1024 lead surrogates.)
+ */
+#define UTRIE_MAX_BUILD_TIME_DATA_LENGTH (0x110000+UTRIE_DATA_BLOCK_LENGTH+0x400)
+
+/**
+ * Number of bytes for a dummy trie.
+ * A dummy trie is an empty runtime trie, used when a real data trie cannot
+ * be loaded.
+ * The number of bytes works for Latin-1-linear tries with 32-bit data
+ * (worst case).
+ *
+ * Calculation:
+ * BMP index + 1 index block for lead surrogate code points +
+ * Latin-1-linear array + 1 data block for lead surrogate code points
+ *
+ * Latin-1: if(UTRIE_SHIFT<=8) { 256 } else { included in first data block }
+ *
+ * @see utrie_unserializeDummy
+ */
+#define UTRIE_DUMMY_SIZE ((UTRIE_BMP_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT)*2+(UTRIE_SHIFT<=8?256:UTRIE_DATA_BLOCK_LENGTH)*4+UTRIE_DATA_BLOCK_LENGTH*4)
+
+/**
+ * Runtime UTrie callback function.
+ * Extract from a lead surrogate's data the
+ * index array offset of the indexes for that lead surrogate.
+ *
+ * @param data data value for a surrogate from the trie, including the folding offset
+ * @return offset>=UTRIE_BMP_INDEX_LENGTH, or 0 if there is no data for the lead surrogate
+ */
+typedef int32_t U_CALLCONV
+UTrieGetFoldingOffset(uint32_t data);
+
+/**
+ * Run-time Trie structure.
+ *
+ * Either the data table is 16 bits wide and accessed via the index
+ * pointer, with each index item increased by indexLength;
+ * in this case, data32==NULL.
+ *
+ * Or the data table is 32 bits wide and accessed via the data32 pointer.
+ */
+struct UTrie {
+ const uint16_t *index;
+ const uint32_t *data32; /* NULL if 16b data is used via index */
+
+ /**
+ * This function is not used in _FROM_LEAD, _FROM_BMP, and _FROM_OFFSET_TRAIL macros.
+ * If convenience macros like _GET16 or _NEXT32 are used, this function must be set.
+ *
+ * utrie_unserialize() sets a default function which simply returns
+ * the lead surrogate's value itself - which is the inverse of the default
+ * folding function used by utrie_serialize().
+ *
+ * @see UTrieGetFoldingOffset
+ */
+ UTrieGetFoldingOffset *getFoldingOffset;
+
+ int32_t indexLength, dataLength;
+ uint32_t initialValue;
+ UBool isLatin1Linear;
+};
+
+#ifndef __UTRIE2_H__
+typedef struct UTrie UTrie;
+#endif
+
+/** Internal trie getter from an offset (0 if c16 is a BMP/lead units) and a 16-bit unit */
+#define _UTRIE_GET_RAW(trie, data, offset, c16) \
+ (trie)->data[ \
+ ((int32_t)((trie)->index[(offset)+((c16)>>UTRIE_SHIFT)])<<UTRIE_INDEX_SHIFT)+ \
+ ((c16)&UTRIE_MASK) \
+ ]
+
+/** Internal trie getter from a pair of surrogates */
+#define _UTRIE_GET_FROM_PAIR(trie, data, c, c2, result, resultType) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __offset; \
+\
+ /* get data for lead surrogate */ \
+ (result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \
+ __offset=(trie)->getFoldingOffset(result); \
+\
+ /* get the real data from the folded lead/trail units */ \
+ if(__offset>0) { \
+ (result)=_UTRIE_GET_RAW((trie), data, __offset, (c2)&0x3ff); \
+ } else { \
+ (result)=(resultType)((trie)->initialValue); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** Internal trie getter from a BMP code point, treating a lead surrogate as a normal code point */
+#define _UTRIE_GET_FROM_BMP(trie, data, c16) \
+ _UTRIE_GET_RAW(trie, data, 0xd800<=(c16) && (c16)<=0xdbff ? UTRIE_LEAD_INDEX_DISP : 0, c16)
+
+/**
+ * Internal trie getter from a code point.
+ * Could be faster(?) but longer with
+ * if((c32)<=0xd7ff) { (result)=_UTRIE_GET_RAW(trie, data, 0, c32); }
+ */
+#define _UTRIE_GET(trie, data, c32, result, resultType) UPRV_BLOCK_MACRO_BEGIN { \
+ if((uint32_t)(c32)<=0xffff) { \
+ /* BMP code points */ \
+ (result)=_UTRIE_GET_FROM_BMP(trie, data, c32); \
+ } else if((uint32_t)(c32)<=0x10ffff) { \
+ /* supplementary code point */ \
+ UChar __lead16=U16_LEAD(c32); \
+ _UTRIE_GET_FROM_PAIR(trie, data, __lead16, c32, result, resultType); \
+ } else { \
+ /* out of range */ \
+ (result)=(resultType)((trie)->initialValue); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** Internal next-post-increment: get the next code point (c, c2) and its data */
+#define _UTRIE_NEXT(trie, data, src, limit, c, c2, result, resultType) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=*(src)++; \
+ if(!U16_IS_LEAD(c)) { \
+ (c2)=0; \
+ (result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \
+ } else if((src)!=(limit) && U16_IS_TRAIL((c2)=*(src))) { \
+ ++(src); \
+ _UTRIE_GET_FROM_PAIR((trie), data, (c), (c2), (result), resultType); \
+ } else { \
+ /* unpaired lead surrogate code point */ \
+ (c2)=0; \
+ (result)=_UTRIE_GET_RAW((trie), data, UTRIE_LEAD_INDEX_DISP, (c)); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** Internal previous: get the previous code point (c, c2) and its data */
+#define _UTRIE_PREVIOUS(trie, data, start, src, c, c2, result, resultType) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=*--(src); \
+ if(!U16_IS_SURROGATE(c)) { \
+ (c2)=0; \
+ (result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \
+ } else if(!U16_IS_SURROGATE_LEAD(c)) { \
+ /* trail surrogate */ \
+ if((start)!=(src) && U16_IS_LEAD((c2)=*((src)-1))) { \
+ --(src); \
+ (result)=(c); (c)=(c2); (c2)=(UChar)(result); /* swap c, c2 */ \
+ _UTRIE_GET_FROM_PAIR((trie), data, (c), (c2), (result), resultType); \
+ } else { \
+ /* unpaired trail surrogate code point */ \
+ (c2)=0; \
+ (result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \
+ } \
+ } else { \
+ /* unpaired lead surrogate code point */ \
+ (c2)=0; \
+ (result)=_UTRIE_GET_RAW((trie), data, UTRIE_LEAD_INDEX_DISP, (c)); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/* Public UTrie API ---------------------------------------------------------*/
+
+/**
+ * Get a pointer to the contiguous part of the data array
+ * for the Latin-1 range (U+0000..U+00ff).
+ * Must be used only if the Latin-1 range is in fact linear
+ * (trie->isLatin1Linear).
+ *
+ * @param trie (const UTrie *, in) a pointer to the runtime trie structure
+ * @return (const uint16_t *) pointer to values for Latin-1 code points
+ */
+#define UTRIE_GET16_LATIN1(trie) ((trie)->index+(trie)->indexLength+UTRIE_DATA_BLOCK_LENGTH)
+
+/**
+ * Get a pointer to the contiguous part of the data array
+ * for the Latin-1 range (U+0000..U+00ff).
+ * Must be used only if the Latin-1 range is in fact linear
+ * (trie->isLatin1Linear).
+ *
+ * @param trie (const UTrie *, in) a pointer to the runtime trie structure
+ * @return (const uint32_t *) pointer to values for Latin-1 code points
+ */
+#define UTRIE_GET32_LATIN1(trie) ((trie)->data32+UTRIE_DATA_BLOCK_LENGTH)
+
+/**
+ * Get a 16-bit trie value from a BMP code point (UChar, <=U+ffff).
+ * c16 may be a lead surrogate, which may have a value including a folding offset.
+ *
+ * @param trie (const UTrie *, in) a pointer to the runtime trie structure
+ * @param c16 (UChar, in) the input BMP code point
+ * @return (uint16_t) trie lookup result
+ */
+#define UTRIE_GET16_FROM_LEAD(trie, c16) _UTRIE_GET_RAW(trie, index, 0, c16)
+
+/**
+ * Get a 32-bit trie value from a BMP code point (UChar, <=U+ffff).
+ * c16 may be a lead surrogate, which may have a value including a folding offset.
+ *
+ * @param trie (const UTrie *, in) a pointer to the runtime trie structure
+ * @param c16 (UChar, in) the input BMP code point
+ * @return (uint32_t) trie lookup result
+ */
+#define UTRIE_GET32_FROM_LEAD(trie, c16) _UTRIE_GET_RAW(trie, data32, 0, c16)
+
+/**
+ * Get a 16-bit trie value from a BMP code point (UChar, <=U+ffff).
+ * Even lead surrogate code points are treated as normal code points,
+ * with unfolded values that may differ from _FROM_LEAD() macro results for them.
+ *
+ * @param trie (const UTrie *, in) a pointer to the runtime trie structure
+ * @param c16 (UChar, in) the input BMP code point
+ * @return (uint16_t) trie lookup result
+ */
+#define UTRIE_GET16_FROM_BMP(trie, c16) _UTRIE_GET_FROM_BMP(trie, index, c16)
+
+/**
+ * Get a 32-bit trie value from a BMP code point (UChar, <=U+ffff).
+ * Even lead surrogate code points are treated as normal code points,
+ * with unfolded values that may differ from _FROM_LEAD() macro results for them.
+ *
+ * @param trie (const UTrie *, in) a pointer to the runtime trie structure
+ * @param c16 (UChar, in) the input BMP code point
+ * @return (uint32_t) trie lookup result
+ */
+#define UTRIE_GET32_FROM_BMP(trie, c16) _UTRIE_GET_FROM_BMP(trie, data32, c16)
+
+/**
+ * Get a 16-bit trie value from a code point.
+ * Even lead surrogate code points are treated as normal code points,
+ * with unfolded values that may differ from _FROM_LEAD() macro results for them.
+ *
+ * @param trie (const UTrie *, in) a pointer to the runtime trie structure
+ * @param c32 (UChar32, in) the input code point
+ * @param result (uint16_t, out) uint16_t variable for the trie lookup result
+ */
+#define UTRIE_GET16(trie, c32, result) _UTRIE_GET(trie, index, c32, result, uint16_t)
+
+/**
+ * Get a 32-bit trie value from a code point.
+ * Even lead surrogate code points are treated as normal code points,
+ * with unfolded values that may differ from _FROM_LEAD() macro results for them.
+ *
+ * @param trie (const UTrie *, in) a pointer to the runtime trie structure
+ * @param c32 (UChar32, in) the input code point
+ * @param result (uint32_t, out) uint32_t variable for the trie lookup result
+ */
+#define UTRIE_GET32(trie, c32, result) _UTRIE_GET(trie, data32, c32, result, uint32_t)
+
+/**
+ * Get the next code point (c, c2), post-increment src,
+ * and get a 16-bit value from the trie.
+ *
+ * @param trie (const UTrie *, in) a pointer to the runtime trie structure
+ * @param src (const UChar *, in/out) the source text pointer
+ * @param limit (const UChar *, in) the limit pointer for the text, or NULL
+ * @param c (UChar, out) variable for the BMP or lead code unit
+ * @param c2 (UChar, out) variable for 0 or the trail code unit
+ * @param result (uint16_t, out) uint16_t variable for the trie lookup result
+ */
+#define UTRIE_NEXT16(trie, src, limit, c, c2, result) _UTRIE_NEXT(trie, index, src, limit, c, c2, result, uint16_t)
+
+/**
+ * Get the next code point (c, c2), post-increment src,
+ * and get a 32-bit value from the trie.
+ *
+ * @param trie (const UTrie *, in) a pointer to the runtime trie structure
+ * @param src (const UChar *, in/out) the source text pointer
+ * @param limit (const UChar *, in) the limit pointer for the text, or NULL
+ * @param c (UChar, out) variable for the BMP or lead code unit
+ * @param c2 (UChar, out) variable for 0 or the trail code unit
+ * @param result (uint32_t, out) uint32_t variable for the trie lookup result
+ */
+#define UTRIE_NEXT32(trie, src, limit, c, c2, result) _UTRIE_NEXT(trie, data32, src, limit, c, c2, result, uint32_t)
+
+/**
+ * Get the previous code point (c, c2), pre-decrement src,
+ * and get a 16-bit value from the trie.
+ *
+ * @param trie (const UTrie *, in) a pointer to the runtime trie structure
+ * @param start (const UChar *, in) the start pointer for the text, or NULL
+ * @param src (const UChar *, in/out) the source text pointer
+ * @param c (UChar, out) variable for the BMP or lead code unit
+ * @param c2 (UChar, out) variable for 0 or the trail code unit
+ * @param result (uint16_t, out) uint16_t variable for the trie lookup result
+ */
+#define UTRIE_PREVIOUS16(trie, start, src, c, c2, result) _UTRIE_PREVIOUS(trie, index, start, src, c, c2, result, uint16_t)
+
+/**
+ * Get the previous code point (c, c2), pre-decrement src,
+ * and get a 32-bit value from the trie.
+ *
+ * @param trie (const UTrie *, in) a pointer to the runtime trie structure
+ * @param start (const UChar *, in) the start pointer for the text, or NULL
+ * @param src (const UChar *, in/out) the source text pointer
+ * @param c (UChar, out) variable for the BMP or lead code unit
+ * @param c2 (UChar, out) variable for 0 or the trail code unit
+ * @param result (uint32_t, out) uint32_t variable for the trie lookup result
+ */
+#define UTRIE_PREVIOUS32(trie, start, src, c, c2, result) _UTRIE_PREVIOUS(trie, data32, start, src, c, c2, result, uint32_t)
+
+/**
+ * Get a 16-bit trie value from a pair of surrogates.
+ *
+ * @param trie (const UTrie *, in) a pointer to the runtime trie structure
+ * @param c (UChar, in) a lead surrogate
+ * @param c2 (UChar, in) a trail surrogate
+ * @param result (uint16_t, out) uint16_t variable for the trie lookup result
+ */
+#define UTRIE_GET16_FROM_PAIR(trie, c, c2, result) _UTRIE_GET_FROM_PAIR(trie, index, c, c2, result, uint16_t)
+
+/**
+ * Get a 32-bit trie value from a pair of surrogates.
+ *
+ * @param trie (const UTrie *, in) a pointer to the runtime trie structure
+ * @param c (UChar, in) a lead surrogate
+ * @param c2 (UChar, in) a trail surrogate
+ * @param result (uint32_t, out) uint32_t variable for the trie lookup result
+ */
+#define UTRIE_GET32_FROM_PAIR(trie, c, c2, result) _UTRIE_GET_FROM_PAIR(trie, data32, c, c2, result, uint32_t)
+
+/**
+ * Get a 16-bit trie value from a folding offset (from the value of a lead surrogate)
+ * and a trail surrogate.
+ *
+ * @param trie (const UTrie *, in) a pointer to the runtime trie structure
+ * @param offset (int32_t, in) the folding offset from the value of a lead surrogate
+ * @param c2 (UChar, in) a trail surrogate (only the 10 low bits are significant)
+ * @return (uint16_t) trie lookup result
+ */
+#define UTRIE_GET16_FROM_OFFSET_TRAIL(trie, offset, c2) _UTRIE_GET_RAW(trie, index, offset, (c2)&0x3ff)
+
+/**
+ * Get a 32-bit trie value from a folding offset (from the value of a lead surrogate)
+ * and a trail surrogate.
+ *
+ * @param trie (const UTrie *, in) a pointer to the runtime trie structure
+ * @param offset (int32_t, in) the folding offset from the value of a lead surrogate
+ * @param c2 (UChar, in) a trail surrogate (only the 10 low bits are significant)
+ * @return (uint32_t) trie lookup result
+ */
+#define UTRIE_GET32_FROM_OFFSET_TRAIL(trie, offset, c2) _UTRIE_GET_RAW(trie, data32, offset, (c2)&0x3ff)
+
+/* enumeration callback types */
+
+/**
+ * Callback from utrie_enum(), extracts a uint32_t value from a
+ * trie value. This value will be passed on to the UTrieEnumRange function.
+ *
+ * @param context an opaque pointer, as passed into utrie_enum()
+ * @param value a value from the trie
+ * @return the value that is to be passed on to the UTrieEnumRange function
+ */
+typedef uint32_t U_CALLCONV
+UTrieEnumValue(const void *context, uint32_t value);
+
+/**
+ * Callback from utrie_enum(), is called for each contiguous range
+ * of code points with the same value as retrieved from the trie and
+ * transformed by the UTrieEnumValue function.
+ *
+ * The callback function can stop the enumeration by returning false.
+ *
+ * @param context an opaque pointer, as passed into utrie_enum()
+ * @param start the first code point in a contiguous range with value
+ * @param limit one past the last code point in a contiguous range with value
+ * @param value the value that is set for all code points in [start..limit[
+ * @return false to stop the enumeration
+ */
+typedef UBool U_CALLCONV
+UTrieEnumRange(const void *context, UChar32 start, UChar32 limit, uint32_t value);
+
+/**
+ * Enumerate efficiently all values in a trie.
+ * For each entry in the trie, the value to be delivered is passed through
+ * the UTrieEnumValue function.
+ * The value is unchanged if that function pointer is NULL.
+ *
+ * For each contiguous range of code points with a given value,
+ * the UTrieEnumRange function is called.
+ *
+ * @param trie a pointer to the runtime trie structure
+ * @param enumValue a pointer to a function that may transform the trie entry value,
+ * or NULL if the values from the trie are to be used directly
+ * @param enumRange a pointer to a function that is called for each contiguous range
+ * of code points with the same value
+ * @param context an opaque pointer that is passed on to the callback functions
+ */
+U_CAPI void U_EXPORT2
+utrie_enum(const UTrie *trie,
+ UTrieEnumValue *enumValue, UTrieEnumRange *enumRange, const void *context);
+
+/**
+ * Unserialize a trie from 32-bit-aligned memory.
+ * Inverse of utrie_serialize().
+ * Fills the UTrie runtime trie structure with the settings for the trie data.
+ *
+ * @param trie a pointer to the runtime trie structure
+ * @param data a pointer to 32-bit-aligned memory containing trie data
+ * @param length the number of bytes available at data
+ * @param pErrorCode an in/out ICU UErrorCode
+ * @return the number of bytes at data taken up by the trie data
+ */
+U_CAPI int32_t U_EXPORT2
+utrie_unserialize(UTrie *trie, const void *data, int32_t length, UErrorCode *pErrorCode);
+
+/**
+ * "Unserialize" a dummy trie.
+ * A dummy trie is an empty runtime trie, used when a real data trie cannot
+ * be loaded.
+ *
+ * The input memory is filled so that the trie always returns the initialValue,
+ * or the leadUnitValue for lead surrogate code points.
+ * The Latin-1 part is always set up to be linear.
+ *
+ * @param trie a pointer to the runtime trie structure
+ * @param data a pointer to 32-bit-aligned memory to be filled with the dummy trie data
+ * @param length the number of bytes available at data (recommended to use UTRIE_DUMMY_SIZE)
+ * @param initialValue the initial value that is set for all code points
+ * @param leadUnitValue the value for lead surrogate code _units_ that do not
+ * have associated supplementary data
+ * @param pErrorCode an in/out ICU UErrorCode
+ *
+ * @see UTRIE_DUMMY_SIZE
+ * @see utrie_open
+ */
+U_CAPI int32_t U_EXPORT2
+utrie_unserializeDummy(UTrie *trie,
+ void *data, int32_t length,
+ uint32_t initialValue, uint32_t leadUnitValue,
+ UBool make16BitTrie,
+ UErrorCode *pErrorCode);
+
+/**
+ * Default implementation for UTrie.getFoldingOffset, set automatically by
+ * utrie_unserialize().
+ * Simply returns the lead surrogate's value itself - which is the inverse
+ * of the default folding function used by utrie_serialize().
+ * Exported for static const UTrie structures.
+ *
+ * @see UTrieGetFoldingOffset
+ */
+U_CAPI int32_t U_EXPORT2
+utrie_defaultGetFoldingOffset(uint32_t data);
+
+/* Building a trie ----------------------------------------------------------*/
+
+/**
+ * Build-time trie structure.
+ * Opaque definition, here only to make fillIn parameters possible
+ * for utrie_open() and utrie_clone().
+ */
+struct UNewTrie {
+ /**
+ * Index values at build-time are 32 bits wide for easier processing.
+ * Bit 31 is set if the data block is used by multiple index values (from utrie_setRange()).
+ */
+ int32_t index[UTRIE_MAX_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT];
+ uint32_t *data;
+
+ uint32_t leadUnitValue;
+ int32_t indexLength, dataCapacity, dataLength;
+ UBool isAllocated, isDataAllocated;
+ UBool isLatin1Linear, isCompacted;
+
+ /**
+ * Map of adjusted indexes, used in utrie_compact().
+ * Maps from original indexes to new ones.
+ */
+ int32_t map[UTRIE_MAX_BUILD_TIME_DATA_LENGTH>>UTRIE_SHIFT];
+};
+
+typedef struct UNewTrie UNewTrie;
+
+/**
+ * Build-time trie callback function, used with utrie_serialize().
+ * This function calculates a lead surrogate's value including a folding offset
+ * from the 1024 supplementary code points [start..start+1024[ .
+ * It is U+10000 <= start <= U+10fc00 and (start&0x3ff)==0.
+ *
+ * The folding offset is provided by the caller.
+ * It is offset=UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023.
+ * Instead of the offset itself, n can be stored in 10 bits -
+ * or fewer if it can be assumed that few lead surrogates have associated data.
+ *
+ * The returned value must be
+ * - not zero if and only if there is relevant data
+ * for the corresponding 1024 supplementary code points
+ * - such that UTrie.getFoldingOffset(UNewTrieGetFoldedValue(..., offset))==offset
+ *
+ * @return a folded value, or 0 if there is no relevant data for the lead surrogate.
+ */
+typedef uint32_t U_CALLCONV
+UNewTrieGetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset);
+
+/**
+ * Open a build-time trie structure.
+ * The size of the build-time data array is specified to avoid allocating a large
+ * array in all cases. The array itself can also be passed in.
+ *
+ * Although the trie is never fully expanded to a linear array, especially when
+ * utrie_setRange32() is used, the data array could be large during build time.
+ * The maximum length is
+ * UTRIE_MAX_BUILD_TIME_DATA_LENGTH=0x110000+UTRIE_DATA_BLOCK_LENGTH+0x400.
+ * (Number of Unicode code points + one all-initial-value block +
+ * possible duplicate entries for 1024 lead surrogates.)
+ * (UTRIE_DATA_BLOCK_LENGTH<=0x200 in all cases.)
+ *
+ * @param fillIn a pointer to a UNewTrie structure to be initialized (will not be released), or
+ * NULL if one is to be allocated
+ * @param aliasData a pointer to a data array to be used (will not be released), or
+ * NULL if one is to be allocated
+ * @param maxDataLength the capacity of aliasData (if not NULL) or
+ * the length of the data array to be allocated
+ * @param initialValue the initial value that is set for all code points
+ * @param leadUnitValue the value for lead surrogate code _units_ that do not
+ * have associated supplementary data
+ * @param latin1Linear a flag indicating whether the Latin-1 range is to be allocated and
+ * kept in a linear, contiguous part of the data array
+ * @return a pointer to the initialized fillIn or the allocated and initialized new UNewTrie
+ */
+U_CAPI UNewTrie * U_EXPORT2
+utrie_open(UNewTrie *fillIn,
+ uint32_t *aliasData, int32_t maxDataLength,
+ uint32_t initialValue, uint32_t leadUnitValue,
+ UBool latin1Linear);
+
+/**
+ * Clone a build-time trie structure with all entries.
+ *
+ * @param fillIn like in utrie_open()
+ * @param other the build-time trie structure to clone
+ * @param aliasData like in utrie_open(),
+ * used if aliasDataLength>=(capacity of other's data array)
+ * @param aliasDataLength the length of aliasData
+ * @return a pointer to the initialized fillIn or the allocated and initialized new UNewTrie
+ */
+U_CAPI UNewTrie * U_EXPORT2
+utrie_clone(UNewTrie *fillIn, const UNewTrie *other, uint32_t *aliasData, int32_t aliasDataLength);
+
+/**
+ * Close a build-time trie structure, and release memory
+ * that was allocated by utrie_open() or utrie_clone().
+ *
+ * @param trie the build-time trie
+ */
+U_CAPI void U_EXPORT2
+utrie_close(UNewTrie *trie);
+
+/**
+ * Get the data array of a build-time trie.
+ * The data may be modified, but entries that are equal before
+ * must still be equal after modification.
+ *
+ * @param trie the build-time trie
+ * @param pLength (out) a pointer to a variable that receives the number
+ * of entries in the data array
+ * @return the data array
+ */
+U_CAPI uint32_t * U_EXPORT2
+utrie_getData(UNewTrie *trie, int32_t *pLength);
+
+/**
+ * Set a value for a code point.
+ *
+ * @param trie the build-time trie
+ * @param c the code point
+ * @param value the value
+ * @return false if a failure occurred (illegal argument or data array overrun)
+ */
+U_CAPI UBool U_EXPORT2
+utrie_set32(UNewTrie *trie, UChar32 c, uint32_t value);
+
+/**
+ * Get a value from a code point as stored in the build-time trie.
+ *
+ * @param trie the build-time trie
+ * @param c the code point
+ * @param pInBlockZero if not NULL, then *pInBlockZero is set to true
+ * iff the value is retrieved from block 0;
+ * block 0 is the all-initial-value initial block
+ * @return the value
+ */
+U_CAPI uint32_t U_EXPORT2
+utrie_get32(UNewTrie *trie, UChar32 c, UBool *pInBlockZero);
+
+/**
+ * Set a value in a range of code points [start..limit[.
+ * All code points c with start<=c<limit will get the value if
+ * overwrite is true or if the old value is 0.
+ *
+ * @param trie the build-time trie
+ * @param start the first code point to get the value
+ * @param limit one past the last code point to get the value
+ * @param value the value
+ * @param overwrite flag for whether old non-initial values are to be overwritten
+ * @return false if a failure occurred (illegal argument or data array overrun)
+ */
+U_CAPI UBool U_EXPORT2
+utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, UBool overwrite);
+
+/**
+ * Compact the build-time trie after all values are set, and then
+ * serialize it into 32-bit aligned memory.
+ *
+ * After this, the trie can only be serizalized again and/or closed;
+ * no further values can be added.
+ *
+ * @see utrie_unserialize()
+ *
+ * @param trie the build-time trie
+ * @param data a pointer to 32-bit-aligned memory for the trie data
+ * @param capacity the number of bytes available at data
+ * @param getFoldedValue a callback function that calculates the value for
+ * a lead surrogate from all of its supplementary code points
+ * and the folding offset;
+ * if NULL, then a default function is used which returns just
+ * the input offset when there are any non-initial-value entries
+ * @param reduceTo16Bits flag for whether the values are to be reduced to a
+ * width of 16 bits for serialization and runtime
+ * @param pErrorCode a UErrorCode argument; among other possible error codes:
+ * - U_BUFFER_OVERFLOW_ERROR if the data storage block is too small for serialization
+ * - U_MEMORY_ALLOCATION_ERROR if the trie data array is too small
+ * - U_INDEX_OUTOFBOUNDS_ERROR if the index or data arrays are too long after compaction for serialization
+ *
+ * @return the number of bytes written for the trie
+ */
+U_CAPI int32_t U_EXPORT2
+utrie_serialize(UNewTrie *trie, void *data, int32_t capacity,
+ UNewTrieGetFoldedValue *getFoldedValue,
+ UBool reduceTo16Bits,
+ UErrorCode *pErrorCode);
+
+/* serialization ------------------------------------------------------------ */
+
+// UTrie signature values, in platform endianness and opposite endianness.
+// The UTrie signature ASCII byte values spell "Trie".
+#define UTRIE_SIG 0x54726965
+#define UTRIE_OE_SIG 0x65697254
+
+/**
+ * Trie data structure in serialized form:
+ *
+ * UTrieHeader header;
+ * uint16_t index[header.indexLength];
+ * uint16_t data[header.dataLength];
+ * @internal
+ */
+typedef struct UTrieHeader {
+ /** "Trie" in big-endian US-ASCII (0x54726965) */
+ uint32_t signature;
+
+ /**
+ * options bit field:
+ * 9 1=Latin-1 data is stored linearly at data+UTRIE_DATA_BLOCK_LENGTH
+ * 8 0=16-bit data, 1=32-bit data
+ * 7..4 UTRIE_INDEX_SHIFT // 0..UTRIE_SHIFT
+ * 3..0 UTRIE_SHIFT // 1..9
+ */
+ uint32_t options;
+
+ /** indexLength is a multiple of UTRIE_SURROGATE_BLOCK_COUNT */
+ int32_t indexLength;
+
+ /** dataLength>=UTRIE_DATA_BLOCK_LENGTH */
+ int32_t dataLength;
+} UTrieHeader;
+
+/**
+ * Constants for use with UTrieHeader.options.
+ * @internal
+ */
+enum {
+ /** Mask to get the UTRIE_SHIFT value from options. */
+ UTRIE_OPTIONS_SHIFT_MASK=0xf,
+
+ /** Shift options right this much to get the UTRIE_INDEX_SHIFT value. */
+ UTRIE_OPTIONS_INDEX_SHIFT=4,
+
+ /** If set, then the data (stage 2) array is 32 bits wide. */
+ UTRIE_OPTIONS_DATA_IS_32_BIT=0x100,
+
+ /**
+ * If set, then Latin-1 data (for U+0000..U+00ff) is stored in the data (stage 2) array
+ * as a simple, linear array at data+UTRIE_DATA_BLOCK_LENGTH.
+ */
+ UTRIE_OPTIONS_LATIN1_IS_LINEAR=0x200
+};
+
+U_CDECL_END
+
+#endif
diff --git a/thirdparty/icu4c/common/utrie2.cpp b/thirdparty/icu4c/common/utrie2.cpp
new file mode 100644
index 0000000000..24ef5782c9
--- /dev/null
+++ b/thirdparty/icu4c/common/utrie2.cpp
@@ -0,0 +1,663 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2001-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: utrie2.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2008aug16 (starting from a copy of utrie.c)
+* created by: Markus W. Scherer
+*
+* This is a common implementation of a Unicode trie.
+* It is a kind of compressed, serializable table of 16- or 32-bit values associated with
+* Unicode code points (0..0x10ffff).
+* This is the second common version of a Unicode trie (hence the name UTrie2).
+* See utrie2.h for a comparison.
+*
+* This file contains only the runtime and enumeration code, for read-only access.
+* See utrie2_builder.c for the builder code.
+*/
+#include "unicode/utypes.h"
+#ifdef UCPTRIE_DEBUG
+#include "unicode/umutablecptrie.h"
+#endif
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+#include "cmemory.h"
+#include "utrie2.h"
+#include "utrie2_impl.h"
+#include "uassert.h"
+
+/* Public UTrie2 API implementation ----------------------------------------- */
+
+static uint32_t
+get32(const UNewTrie2 *trie, UChar32 c, UBool fromLSCP) {
+ int32_t i2, block;
+
+ if(c>=trie->highStart && (!U_IS_LEAD(c) || fromLSCP)) {
+ return trie->data[trie->dataLength-UTRIE2_DATA_GRANULARITY];
+ }
+
+ if(U_IS_LEAD(c) && fromLSCP) {
+ i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+
+ (c>>UTRIE2_SHIFT_2);
+ } else {
+ i2=trie->index1[c>>UTRIE2_SHIFT_1]+
+ ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK);
+ }
+ block=trie->index2[i2];
+ return trie->data[block+(c&UTRIE2_DATA_MASK)];
+}
+
+U_CAPI uint32_t U_EXPORT2
+utrie2_get32(const UTrie2 *trie, UChar32 c) {
+ if(trie->data16!=NULL) {
+ return UTRIE2_GET16(trie, c);
+ } else if(trie->data32!=NULL) {
+ return UTRIE2_GET32(trie, c);
+ } else if((uint32_t)c>0x10ffff) {
+ return trie->errorValue;
+ } else {
+ return get32(trie->newTrie, c, TRUE);
+ }
+}
+
+U_CAPI uint32_t U_EXPORT2
+utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c) {
+ if(!U_IS_LEAD(c)) {
+ return trie->errorValue;
+ }
+ if(trie->data16!=NULL) {
+ return UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c);
+ } else if(trie->data32!=NULL) {
+ return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c);
+ } else {
+ return get32(trie->newTrie, c, FALSE);
+ }
+}
+
+static inline int32_t
+u8Index(const UTrie2 *trie, UChar32 c, int32_t i) {
+ int32_t idx=
+ _UTRIE2_INDEX_FROM_CP(
+ trie,
+ trie->data32==NULL ? trie->indexLength : 0,
+ c);
+ return (idx<<3)|i;
+}
+
+U_CAPI int32_t U_EXPORT2
+utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c,
+ const uint8_t *src, const uint8_t *limit) {
+ int32_t i, length;
+ i=0;
+ /* support 64-bit pointers by avoiding cast of arbitrary difference */
+ if((limit-src)<=7) {
+ length=(int32_t)(limit-src);
+ } else {
+ length=7;
+ }
+ c=utf8_nextCharSafeBody(src, &i, length, c, -1);
+ return u8Index(trie, c, i);
+}
+
+U_CAPI int32_t U_EXPORT2
+utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c,
+ const uint8_t *start, const uint8_t *src) {
+ int32_t i, length;
+ /* support 64-bit pointers by avoiding cast of arbitrary difference */
+ if((src-start)<=7) {
+ i=length=(int32_t)(src-start);
+ } else {
+ i=length=7;
+ start=src-7;
+ }
+ c=utf8_prevCharSafeBody(start, 0, &i, c, -1);
+ i=length-i; /* number of bytes read backward from src */
+ return u8Index(trie, c, i);
+}
+
+U_CAPI UTrie2 * U_EXPORT2
+utrie2_openFromSerialized(UTrie2ValueBits valueBits,
+ const void *data, int32_t length, int32_t *pActualLength,
+ UErrorCode *pErrorCode) {
+ const UTrie2Header *header;
+ const uint16_t *p16;
+ int32_t actualLength;
+
+ UTrie2 tempTrie;
+ UTrie2 *trie;
+
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ if( length<=0 || (U_POINTER_MASK_LSB(data, 3)!=0) ||
+ valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits
+ ) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* enough data for a trie header? */
+ if(length<(int32_t)sizeof(UTrie2Header)) {
+ *pErrorCode=U_INVALID_FORMAT_ERROR;
+ return 0;
+ }
+
+ /* check the signature */
+ header=(const UTrie2Header *)data;
+ if(header->signature!=UTRIE2_SIG) {
+ *pErrorCode=U_INVALID_FORMAT_ERROR;
+ return 0;
+ }
+
+ /* get the options */
+ if(valueBits!=(UTrie2ValueBits)(header->options&UTRIE2_OPTIONS_VALUE_BITS_MASK)) {
+ *pErrorCode=U_INVALID_FORMAT_ERROR;
+ return 0;
+ }
+
+ /* get the length values and offsets */
+ uprv_memset(&tempTrie, 0, sizeof(tempTrie));
+ tempTrie.indexLength=header->indexLength;
+ tempTrie.dataLength=header->shiftedDataLength<<UTRIE2_INDEX_SHIFT;
+ tempTrie.index2NullOffset=header->index2NullOffset;
+ tempTrie.dataNullOffset=header->dataNullOffset;
+
+ tempTrie.highStart=header->shiftedHighStart<<UTRIE2_SHIFT_1;
+ tempTrie.highValueIndex=tempTrie.dataLength-UTRIE2_DATA_GRANULARITY;
+ if(valueBits==UTRIE2_16_VALUE_BITS) {
+ tempTrie.highValueIndex+=tempTrie.indexLength;
+ }
+
+ /* calculate the actual length */
+ actualLength=(int32_t)sizeof(UTrie2Header)+tempTrie.indexLength*2;
+ if(valueBits==UTRIE2_16_VALUE_BITS) {
+ actualLength+=tempTrie.dataLength*2;
+ } else {
+ actualLength+=tempTrie.dataLength*4;
+ }
+ if(length<actualLength) {
+ *pErrorCode=U_INVALID_FORMAT_ERROR; /* not enough bytes */
+ return 0;
+ }
+
+ /* allocate the trie */
+ trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
+ if(trie==NULL) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ uprv_memcpy(trie, &tempTrie, sizeof(tempTrie));
+ trie->memory=(uint32_t *)data;
+ trie->length=actualLength;
+ trie->isMemoryOwned=FALSE;
+#ifdef UTRIE2_DEBUG
+ trie->name="fromSerialized";
+#endif
+
+ /* set the pointers to its index and data arrays */
+ p16=(const uint16_t *)(header+1);
+ trie->index=p16;
+ p16+=trie->indexLength;
+
+ /* get the data */
+ switch(valueBits) {
+ case UTRIE2_16_VALUE_BITS:
+ trie->data16=p16;
+ trie->data32=NULL;
+ trie->initialValue=trie->index[trie->dataNullOffset];
+ trie->errorValue=trie->data16[UTRIE2_BAD_UTF8_DATA_OFFSET];
+ break;
+ case UTRIE2_32_VALUE_BITS:
+ trie->data16=NULL;
+ trie->data32=(const uint32_t *)p16;
+ trie->initialValue=trie->data32[trie->dataNullOffset];
+ trie->errorValue=trie->data32[UTRIE2_BAD_UTF8_DATA_OFFSET];
+ break;
+ default:
+ *pErrorCode=U_INVALID_FORMAT_ERROR;
+ return 0;
+ }
+
+ if(pActualLength!=NULL) {
+ *pActualLength=actualLength;
+ }
+ return trie;
+}
+
+U_CAPI UTrie2 * U_EXPORT2
+utrie2_openDummy(UTrie2ValueBits valueBits,
+ uint32_t initialValue, uint32_t errorValue,
+ UErrorCode *pErrorCode) {
+ UTrie2 *trie;
+ UTrie2Header *header;
+ uint32_t *p;
+ uint16_t *dest16;
+ int32_t indexLength, dataLength, length, i;
+ int32_t dataMove; /* >0 if the data is moved to the end of the index array */
+
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ if(valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* calculate the total length of the dummy trie data */
+ indexLength=UTRIE2_INDEX_1_OFFSET;
+ dataLength=UTRIE2_DATA_START_OFFSET+UTRIE2_DATA_GRANULARITY;
+ length=(int32_t)sizeof(UTrie2Header)+indexLength*2;
+ if(valueBits==UTRIE2_16_VALUE_BITS) {
+ length+=dataLength*2;
+ } else {
+ length+=dataLength*4;
+ }
+
+ /* allocate the trie */
+ trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
+ if(trie==NULL) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ uprv_memset(trie, 0, sizeof(UTrie2));
+ trie->memory=uprv_malloc(length);
+ if(trie->memory==NULL) {
+ uprv_free(trie);
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ trie->length=length;
+ trie->isMemoryOwned=TRUE;
+
+ /* set the UTrie2 fields */
+ if(valueBits==UTRIE2_16_VALUE_BITS) {
+ dataMove=indexLength;
+ } else {
+ dataMove=0;
+ }
+
+ trie->indexLength=indexLength;
+ trie->dataLength=dataLength;
+ trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET;
+ trie->dataNullOffset=(uint16_t)dataMove;
+ trie->initialValue=initialValue;
+ trie->errorValue=errorValue;
+ trie->highStart=0;
+ trie->highValueIndex=dataMove+UTRIE2_DATA_START_OFFSET;
+#ifdef UTRIE2_DEBUG
+ trie->name="dummy";
+#endif
+
+ /* set the header fields */
+ header=(UTrie2Header *)trie->memory;
+
+ header->signature=UTRIE2_SIG; /* "Tri2" */
+ header->options=(uint16_t)valueBits;
+
+ header->indexLength=(uint16_t)indexLength;
+ header->shiftedDataLength=(uint16_t)(dataLength>>UTRIE2_INDEX_SHIFT);
+ header->index2NullOffset=(uint16_t)UTRIE2_INDEX_2_OFFSET;
+ header->dataNullOffset=(uint16_t)dataMove;
+ header->shiftedHighStart=0;
+
+ /* fill the index and data arrays */
+ dest16=(uint16_t *)(header+1);
+ trie->index=dest16;
+
+ /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT */
+ for(i=0; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) {
+ *dest16++=(uint16_t)(dataMove>>UTRIE2_INDEX_SHIFT); /* null data block */
+ }
+
+ /* write UTF-8 2-byte index-2 values, not right-shifted */
+ for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */
+ *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET);
+ }
+ for(; i<(0xe0-0xc0); ++i) { /* C2..DF */
+ *dest16++=(uint16_t)dataMove;
+ }
+
+ /* write the 16/32-bit data array */
+ switch(valueBits) {
+ case UTRIE2_16_VALUE_BITS:
+ /* write 16-bit data values */
+ trie->data16=dest16;
+ trie->data32=NULL;
+ for(i=0; i<0x80; ++i) {
+ *dest16++=(uint16_t)initialValue;
+ }
+ for(; i<0xc0; ++i) {
+ *dest16++=(uint16_t)errorValue;
+ }
+ /* highValue and reserved values */
+ for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) {
+ *dest16++=(uint16_t)initialValue;
+ }
+ break;
+ case UTRIE2_32_VALUE_BITS:
+ /* write 32-bit data values */
+ p=(uint32_t *)dest16;
+ trie->data16=NULL;
+ trie->data32=p;
+ for(i=0; i<0x80; ++i) {
+ *p++=initialValue;
+ }
+ for(; i<0xc0; ++i) {
+ *p++=errorValue;
+ }
+ /* highValue and reserved values */
+ for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) {
+ *p++=initialValue;
+ }
+ break;
+ default:
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ return trie;
+}
+
+U_CAPI void U_EXPORT2
+utrie2_close(UTrie2 *trie) {
+ if(trie!=NULL) {
+ if(trie->isMemoryOwned) {
+ uprv_free(trie->memory);
+ }
+ if(trie->newTrie!=NULL) {
+ uprv_free(trie->newTrie->data);
+#ifdef UCPTRIE_DEBUG
+ umutablecptrie_close(trie->newTrie->t3);
+#endif
+ uprv_free(trie->newTrie);
+ }
+ uprv_free(trie);
+ }
+}
+
+U_CAPI UBool U_EXPORT2
+utrie2_isFrozen(const UTrie2 *trie) {
+ return (UBool)(trie->newTrie==NULL);
+}
+
+U_CAPI int32_t U_EXPORT2
+utrie2_serialize(const UTrie2 *trie,
+ void *data, int32_t capacity,
+ UErrorCode *pErrorCode) {
+ /* argument check */
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ if( trie==NULL || trie->memory==NULL || trie->newTrie!=NULL ||
+ capacity<0 || (capacity>0 && (data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)))
+ ) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ if(capacity>=trie->length) {
+ uprv_memcpy(data, trie->memory, trie->length);
+ } else {
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+ return trie->length;
+}
+
+/* enumeration -------------------------------------------------------------- */
+
+#define MIN_VALUE(a, b) ((a)<(b) ? (a) : (b))
+
+/* default UTrie2EnumValue() returns the input value itself */
+static uint32_t U_CALLCONV
+enumSameValue(const void * /*context*/, uint32_t value) {
+ return value;
+}
+
+/**
+ * Enumerate all ranges of code points with the same relevant values.
+ * The values are transformed from the raw trie entries by the enumValue function.
+ *
+ * Currently requires start<limit and both start and limit must be multiples
+ * of UTRIE2_DATA_BLOCK_LENGTH.
+ *
+ * Optimizations:
+ * - Skip a whole block if we know that it is filled with a single value,
+ * and it is the same as we visited just before.
+ * - Handle the null block specially because we know a priori that it is filled
+ * with a single value.
+ */
+static void
+enumEitherTrie(const UTrie2 *trie,
+ UChar32 start, UChar32 limit,
+ UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) {
+ const uint32_t *data32;
+ const uint16_t *idx;
+
+ uint32_t value, prevValue, initialValue;
+ UChar32 c, prev, highStart;
+ int32_t j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock;
+
+ if(enumRange==NULL) {
+ return;
+ }
+ if(enumValue==NULL) {
+ enumValue=enumSameValue;
+ }
+
+ if(trie->newTrie==NULL) {
+ /* frozen trie */
+ idx=trie->index;
+ U_ASSERT(idx!=NULL); /* the following code assumes trie->newTrie is not NULL when idx is NULL */
+ data32=trie->data32;
+
+ index2NullOffset=trie->index2NullOffset;
+ nullBlock=trie->dataNullOffset;
+ } else {
+ /* unfrozen, mutable trie */
+ idx=NULL;
+ data32=trie->newTrie->data;
+ U_ASSERT(data32!=NULL); /* the following code assumes idx is not NULL when data32 is NULL */
+
+ index2NullOffset=trie->newTrie->index2NullOffset;
+ nullBlock=trie->newTrie->dataNullOffset;
+ }
+
+ highStart=trie->highStart;
+
+ /* get the enumeration value that corresponds to an initial-value trie data entry */
+ initialValue=enumValue(context, trie->initialValue);
+
+ /* set variables for previous range */
+ prevI2Block=-1;
+ prevBlock=-1;
+ prev=start;
+ prevValue=0;
+
+ /* enumerate index-2 blocks */
+ for(c=start; c<limit && c<highStart;) {
+ /* Code point limit for iterating inside this i2Block. */
+ UChar32 tempLimit=c+UTRIE2_CP_PER_INDEX_1_ENTRY;
+ if(limit<tempLimit) {
+ tempLimit=limit;
+ }
+ if(c<=0xffff) {
+ if(!U_IS_SURROGATE(c)) {
+ i2Block=c>>UTRIE2_SHIFT_2;
+ } else if(U_IS_SURROGATE_LEAD(c)) {
+ /*
+ * Enumerate values for lead surrogate code points, not code units:
+ * This special block has half the normal length.
+ */
+ i2Block=UTRIE2_LSCP_INDEX_2_OFFSET;
+ tempLimit=MIN_VALUE(0xdc00, limit);
+ } else {
+ /*
+ * Switch back to the normal part of the index-2 table.
+ * Enumerate the second half of the surrogates block.
+ */
+ i2Block=0xd800>>UTRIE2_SHIFT_2;
+ tempLimit=MIN_VALUE(0xe000, limit);
+ }
+ } else {
+ /* supplementary code points */
+ if(idx!=NULL) {
+ i2Block=idx[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+
+ (c>>UTRIE2_SHIFT_1)];
+ } else {
+ i2Block=trie->newTrie->index1[c>>UTRIE2_SHIFT_1];
+ }
+ if(i2Block==prevI2Block && (c-prev)>=UTRIE2_CP_PER_INDEX_1_ENTRY) {
+ /*
+ * The index-2 block is the same as the previous one, and filled with prevValue.
+ * Only possible for supplementary code points because the linear-BMP index-2
+ * table creates unique i2Block values.
+ */
+ c+=UTRIE2_CP_PER_INDEX_1_ENTRY;
+ continue;
+ }
+ }
+ prevI2Block=i2Block;
+ if(i2Block==index2NullOffset) {
+ /* this is the null index-2 block */
+ if(prevValue!=initialValue) {
+ if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
+ return;
+ }
+ prevBlock=nullBlock;
+ prev=c;
+ prevValue=initialValue;
+ }
+ c+=UTRIE2_CP_PER_INDEX_1_ENTRY;
+ } else {
+ /* enumerate data blocks for one index-2 block */
+ int32_t i2, i2Limit;
+ i2=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
+ if((c>>UTRIE2_SHIFT_1)==(tempLimit>>UTRIE2_SHIFT_1)) {
+ i2Limit=(tempLimit>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
+ } else {
+ i2Limit=UTRIE2_INDEX_2_BLOCK_LENGTH;
+ }
+ for(; i2<i2Limit; ++i2) {
+ if(idx!=NULL) {
+ block=(int32_t)idx[i2Block+i2]<<UTRIE2_INDEX_SHIFT;
+ } else {
+ block=trie->newTrie->index2[i2Block+i2];
+ }
+ if(block==prevBlock && (c-prev)>=UTRIE2_DATA_BLOCK_LENGTH) {
+ /* the block is the same as the previous one, and filled with prevValue */
+ c+=UTRIE2_DATA_BLOCK_LENGTH;
+ continue;
+ }
+ prevBlock=block;
+ if(block==nullBlock) {
+ /* this is the null data block */
+ if(prevValue!=initialValue) {
+ if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
+ return;
+ }
+ prev=c;
+ prevValue=initialValue;
+ }
+ c+=UTRIE2_DATA_BLOCK_LENGTH;
+ } else {
+ for(j=0; j<UTRIE2_DATA_BLOCK_LENGTH; ++j) {
+ value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]);
+ if(value!=prevValue) {
+ if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
+ return;
+ }
+ prev=c;
+ prevValue=value;
+ }
+ ++c;
+ }
+ }
+ }
+ }
+ }
+
+ if(c>limit) {
+ c=limit; /* could be higher if in the index2NullOffset */
+ } else if(c<limit) {
+ /* c==highStart<limit */
+ uint32_t highValue;
+ if(idx!=NULL) {
+ highValue=
+ data32!=NULL ?
+ data32[trie->highValueIndex] :
+ idx[trie->highValueIndex];
+ } else {
+ highValue=trie->newTrie->data[trie->newTrie->dataLength-UTRIE2_DATA_GRANULARITY];
+ }
+ value=enumValue(context, highValue);
+ if(value!=prevValue) {
+ if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
+ return;
+ }
+ prev=c;
+ prevValue=value;
+ }
+ c=limit;
+ }
+
+ /* deliver last range */
+ enumRange(context, prev, c-1, prevValue);
+}
+
+U_CAPI void U_EXPORT2
+utrie2_enum(const UTrie2 *trie,
+ UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) {
+ enumEitherTrie(trie, 0, 0x110000, enumValue, enumRange, context);
+}
+
+U_CAPI void U_EXPORT2
+utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead,
+ UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange,
+ const void *context) {
+ if(!U16_IS_LEAD(lead)) {
+ return;
+ }
+ lead=(lead-0xd7c0)<<10; /* start code point */
+ enumEitherTrie(trie, lead, lead+0x400, enumValue, enumRange, context);
+}
+
+/* C++ convenience wrappers ------------------------------------------------- */
+
+U_NAMESPACE_BEGIN
+
+uint16_t BackwardUTrie2StringIterator::previous16() {
+ codePointLimit=codePointStart;
+ if(start>=codePointStart) {
+ codePoint=U_SENTINEL;
+ return static_cast<uint16_t>(trie->errorValue);
+ }
+ uint16_t result;
+ UTRIE2_U16_PREV16(trie, start, codePointStart, codePoint, result);
+ return result;
+}
+
+uint16_t ForwardUTrie2StringIterator::next16() {
+ codePointStart=codePointLimit;
+ if(codePointLimit==limit) {
+ codePoint=U_SENTINEL;
+ return static_cast<uint16_t>(trie->errorValue);
+ }
+ uint16_t result;
+ UTRIE2_U16_NEXT16(trie, codePointLimit, limit, codePoint, result);
+ return result;
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/utrie2.h b/thirdparty/icu4c/common/utrie2.h
new file mode 100644
index 0000000000..d1e1e15a6e
--- /dev/null
+++ b/thirdparty/icu4c/common/utrie2.h
@@ -0,0 +1,955 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2001-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: utrie2.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2008aug16 (starting from a copy of utrie.h)
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UTRIE2_H__
+#define __UTRIE2_H__
+
+#include "unicode/utypes.h"
+#include "unicode/utf8.h"
+#include "putilimp.h"
+
+U_CDECL_BEGIN
+
+struct UTrie; /* forward declaration */
+#ifndef __UTRIE_H__
+typedef struct UTrie UTrie;
+#endif
+
+/**
+ * \file
+ *
+ * This is a common implementation of a Unicode trie.
+ * It is a kind of compressed, serializable table of 16- or 32-bit values associated with
+ * Unicode code points (0..0x10ffff). (A map from code points to integers.)
+ *
+ * This is the second common version of a Unicode trie (hence the name UTrie2).
+ * Compared with UTrie version 1:
+ * - Still splitting BMP code points 11:5 bits for index and data table lookups.
+ * - Still separate data for lead surrogate code _units_ vs. code _points_,
+ * but the lead surrogate code unit values are not required any more
+ * for data lookup for supplementary code points.
+ * - The "folding" mechanism is removed. In UTrie version 1, this somewhat
+ * hard-to-explain mechanism was meant to be used for optimized UTF-16
+ * processing, with application-specific encoding of indexing bits
+ * in the lead surrogate data for the associated supplementary code points.
+ * - For the last single-value code point range (ending with U+10ffff),
+ * the starting code point ("highStart") and the value are stored.
+ * - For supplementary code points U+10000..highStart-1 a three-table lookup
+ * (two index tables and one data table) is used. The first index
+ * is truncated, omitting both the BMP portion and the high range.
+ * - There is a special small index for 2-byte UTF-8, and the initial data
+ * entries are designed for fast 1/2-byte UTF-8 lookup.
+ * Starting with ICU 60, C0 and C1 are not recognized as UTF-8 lead bytes any more at all,
+ * and the associated 2-byte indexes are unused.
+ */
+
+/**
+ * Trie structure.
+ * Use only with public API macros and functions.
+ */
+struct UTrie2;
+typedef struct UTrie2 UTrie2;
+
+/* Public UTrie2 API functions: read-only access ---------------------------- */
+
+/**
+ * Selectors for the width of a UTrie2 data value.
+ */
+enum UTrie2ValueBits {
+ /** 16 bits per UTrie2 data value. */
+ UTRIE2_16_VALUE_BITS,
+ /** 32 bits per UTrie2 data value. */
+ UTRIE2_32_VALUE_BITS,
+ /** Number of selectors for the width of UTrie2 data values. */
+ UTRIE2_COUNT_VALUE_BITS
+};
+typedef enum UTrie2ValueBits UTrie2ValueBits;
+
+/**
+ * Open a frozen trie from its serialized from, stored in 32-bit-aligned memory.
+ * Inverse of utrie2_serialize().
+ * The memory must remain valid and unchanged as long as the trie is used.
+ * You must utrie2_close() the trie once you are done using it.
+ *
+ * @param valueBits selects the data entry size; results in an
+ * U_INVALID_FORMAT_ERROR if it does not match the serialized form
+ * @param data a pointer to 32-bit-aligned memory containing the serialized form of a UTrie2
+ * @param length the number of bytes available at data;
+ * can be more than necessary
+ * @param pActualLength receives the actual number of bytes at data taken up by the trie data;
+ * can be NULL
+ * @param pErrorCode an in/out ICU UErrorCode
+ * @return the unserialized trie
+ *
+ * @see utrie2_open
+ * @see utrie2_serialize
+ */
+U_CAPI UTrie2 * U_EXPORT2
+utrie2_openFromSerialized(UTrie2ValueBits valueBits,
+ const void *data, int32_t length, int32_t *pActualLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Open a frozen, empty "dummy" trie.
+ * A dummy trie is an empty trie, used when a real data trie cannot
+ * be loaded. Equivalent to calling utrie2_open() and utrie2_freeze(),
+ * but without internally creating and compacting/serializing the
+ * builder data structure.
+ *
+ * The trie always returns the initialValue,
+ * or the errorValue for out-of-range code points and illegal UTF-8.
+ *
+ * You must utrie2_close() the trie once you are done using it.
+ *
+ * @param valueBits selects the data entry size
+ * @param initialValue the initial value that is set for all code points
+ * @param errorValue the value for out-of-range code points and illegal UTF-8
+ * @param pErrorCode an in/out ICU UErrorCode
+ * @return the dummy trie
+ *
+ * @see utrie2_openFromSerialized
+ * @see utrie2_open
+ */
+U_CAPI UTrie2 * U_EXPORT2
+utrie2_openDummy(UTrie2ValueBits valueBits,
+ uint32_t initialValue, uint32_t errorValue,
+ UErrorCode *pErrorCode);
+
+/**
+ * Get a value from a code point as stored in the trie.
+ * Easier to use than UTRIE2_GET16() and UTRIE2_GET32() but slower.
+ * Easier to use because, unlike the macros, this function works on all UTrie2
+ * objects, frozen or not, holding 16-bit or 32-bit data values.
+ *
+ * @param trie the trie
+ * @param c the code point
+ * @return the value
+ */
+U_CAPI uint32_t U_EXPORT2
+utrie2_get32(const UTrie2 *trie, UChar32 c);
+
+/* enumeration callback types */
+
+/**
+ * Callback from utrie2_enum(), extracts a uint32_t value from a
+ * trie value. This value will be passed on to the UTrie2EnumRange function.
+ *
+ * @param context an opaque pointer, as passed into utrie2_enum()
+ * @param value a value from the trie
+ * @return the value that is to be passed on to the UTrie2EnumRange function
+ */
+typedef uint32_t U_CALLCONV
+UTrie2EnumValue(const void *context, uint32_t value);
+
+/**
+ * Callback from utrie2_enum(), is called for each contiguous range
+ * of code points with the same value as retrieved from the trie and
+ * transformed by the UTrie2EnumValue function.
+ *
+ * The callback function can stop the enumeration by returning false.
+ *
+ * @param context an opaque pointer, as passed into utrie2_enum()
+ * @param start the first code point in a contiguous range with value
+ * @param end the last code point in a contiguous range with value (inclusive)
+ * @param value the value that is set for all code points in [start..end]
+ * @return false to stop the enumeration
+ */
+typedef UBool U_CALLCONV
+UTrie2EnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value);
+
+/**
+ * Enumerate efficiently all values in a trie.
+ * Do not modify the trie during the enumeration.
+ *
+ * For each entry in the trie, the value to be delivered is passed through
+ * the UTrie2EnumValue function.
+ * The value is unchanged if that function pointer is NULL.
+ *
+ * For each contiguous range of code points with a given (transformed) value,
+ * the UTrie2EnumRange function is called.
+ *
+ * @param trie a pointer to the trie
+ * @param enumValue a pointer to a function that may transform the trie entry value,
+ * or NULL if the values from the trie are to be used directly
+ * @param enumRange a pointer to a function that is called for each contiguous range
+ * of code points with the same (transformed) value
+ * @param context an opaque pointer that is passed on to the callback functions
+ */
+U_CAPI void U_EXPORT2
+utrie2_enum(const UTrie2 *trie,
+ UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context);
+
+/* Building a trie ---------------------------------------------------------- */
+
+/**
+ * Open an empty, writable trie. At build time, 32-bit data values are used.
+ * utrie2_freeze() takes a valueBits parameter
+ * which determines the data value width in the serialized and frozen forms.
+ * You must utrie2_close() the trie once you are done using it.
+ *
+ * @param initialValue the initial value that is set for all code points
+ * @param errorValue the value for out-of-range code points and illegal UTF-8
+ * @param pErrorCode an in/out ICU UErrorCode
+ * @return a pointer to the allocated and initialized new trie
+ */
+U_CAPI UTrie2 * U_EXPORT2
+utrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode);
+
+/**
+ * Clone a trie.
+ * You must utrie2_close() the clone once you are done using it.
+ *
+ * @param other the trie to clone
+ * @param pErrorCode an in/out ICU UErrorCode
+ * @return a pointer to the new trie clone
+ */
+U_CAPI UTrie2 * U_EXPORT2
+utrie2_clone(const UTrie2 *other, UErrorCode *pErrorCode);
+
+/**
+ * Clone a trie. The clone will be mutable/writable even if the other trie
+ * is frozen. (See utrie2_freeze().)
+ * You must utrie2_close() the clone once you are done using it.
+ *
+ * @param other the trie to clone
+ * @param pErrorCode an in/out ICU UErrorCode
+ * @return a pointer to the new trie clone
+ */
+U_CAPI UTrie2 * U_EXPORT2
+utrie2_cloneAsThawed(const UTrie2 *other, UErrorCode *pErrorCode);
+
+/**
+ * Close a trie and release associated memory.
+ *
+ * @param trie the trie
+ */
+U_CAPI void U_EXPORT2
+utrie2_close(UTrie2 *trie);
+
+/**
+ * Set a value for a code point.
+ *
+ * @param trie the unfrozen trie
+ * @param c the code point
+ * @param value the value
+ * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes:
+ * - U_NO_WRITE_PERMISSION if the trie is frozen
+ */
+U_CAPI void U_EXPORT2
+utrie2_set32(UTrie2 *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode);
+
+/**
+ * Set a value in a range of code points [start..end].
+ * All code points c with start<=c<=end will get the value if
+ * overwrite is true or if the old value is the initial value.
+ *
+ * @param trie the unfrozen trie
+ * @param start the first code point to get the value
+ * @param end the last code point to get the value (inclusive)
+ * @param value the value
+ * @param overwrite flag for whether old non-initial values are to be overwritten
+ * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes:
+ * - U_NO_WRITE_PERMISSION if the trie is frozen
+ */
+U_CAPI void U_EXPORT2
+utrie2_setRange32(UTrie2 *trie,
+ UChar32 start, UChar32 end,
+ uint32_t value, UBool overwrite,
+ UErrorCode *pErrorCode);
+
+/**
+ * Freeze a trie. Make it immutable (read-only) and compact it,
+ * ready for serialization and for use with fast macros.
+ * Functions to set values will fail after serializing.
+ *
+ * A trie can be frozen only once. If this function is called again with different
+ * valueBits then it will set a U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * @param trie the trie
+ * @param valueBits selects the data entry size; if smaller than 32 bits, then
+ * the values stored in the trie will be truncated
+ * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes:
+ * - U_INDEX_OUTOFBOUNDS_ERROR if the compacted index or data arrays are too long
+ * for serialization
+ * (the trie will be immutable and usable,
+ * but not frozen and not usable with the fast macros)
+ *
+ * @see utrie2_cloneAsThawed
+ */
+U_CAPI void U_EXPORT2
+utrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode);
+
+/**
+ * Test if the trie is frozen. (See utrie2_freeze().)
+ *
+ * @param trie the trie
+ * @return true if the trie is frozen, that is, immutable, ready for serialization
+ * and for use with fast macros
+ */
+U_CAPI UBool U_EXPORT2
+utrie2_isFrozen(const UTrie2 *trie);
+
+/**
+ * Serialize a frozen trie into 32-bit aligned memory.
+ * If the trie is not frozen, then the function returns with a U_ILLEGAL_ARGUMENT_ERROR.
+ * A trie can be serialized multiple times.
+ *
+ * @param trie the frozen trie
+ * @param data a pointer to 32-bit-aligned memory to be filled with the trie data,
+ * can be NULL if capacity==0
+ * @param capacity the number of bytes available at data,
+ * or 0 for preflighting
+ * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes:
+ * - U_BUFFER_OVERFLOW_ERROR if the data storage block is too small for serialization
+ * - U_ILLEGAL_ARGUMENT_ERROR if the trie is not frozen or the data and capacity
+ * parameters are bad
+ * @return the number of bytes written or needed for the trie
+ *
+ * @see utrie2_openFromSerialized()
+ */
+U_CAPI int32_t U_EXPORT2
+utrie2_serialize(const UTrie2 *trie,
+ void *data, int32_t capacity,
+ UErrorCode *pErrorCode);
+
+/* Public UTrie2 API: miscellaneous functions ------------------------------- */
+
+/**
+ * Build a UTrie2 (version 2) from a UTrie (version 1).
+ * Enumerates all values in the UTrie and builds a UTrie2 with the same values.
+ * The resulting UTrie2 will be frozen.
+ *
+ * @param trie1 the runtime UTrie structure to be enumerated
+ * @param errorValue the value for out-of-range code points and illegal UTF-8
+ * @param pErrorCode an in/out ICU UErrorCode
+ * @return The frozen UTrie2 with the same values as the UTrie.
+ */
+U_CAPI UTrie2 * U_EXPORT2
+utrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode);
+
+/* Public UTrie2 API macros ------------------------------------------------- */
+
+/*
+ * These macros provide fast data lookup from a frozen trie.
+ * They will crash when used on an unfrozen trie.
+ */
+
+/**
+ * Return a 16-bit trie value from a code point, with range checking.
+ * Returns trie->errorValue if c is not in the range 0..U+10ffff.
+ *
+ * @param trie (const UTrie2 *, in) a frozen trie
+ * @param c (UChar32, in) the input code point
+ * @return (uint16_t) The code point's trie value.
+ */
+#define UTRIE2_GET16(trie, c) _UTRIE2_GET((trie), index, (trie)->indexLength, (c))
+
+/**
+ * Return a 32-bit trie value from a code point, with range checking.
+ * Returns trie->errorValue if c is not in the range 0..U+10ffff.
+ *
+ * @param trie (const UTrie2 *, in) a frozen trie
+ * @param c (UChar32, in) the input code point
+ * @return (uint32_t) The code point's trie value.
+ */
+#define UTRIE2_GET32(trie, c) _UTRIE2_GET((trie), data32, 0, (c))
+
+/**
+ * UTF-16: Get the next code point (UChar32 c, out), post-increment src,
+ * and get a 16-bit value from the trie.
+ *
+ * @param trie (const UTrie2 *, in) a frozen trie
+ * @param src (const UChar *, in/out) the source text pointer
+ * @param limit (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated
+ * @param c (UChar32, out) variable for the code point
+ * @param result (uint16_t, out) uint16_t variable for the trie lookup result
+ */
+#define UTRIE2_U16_NEXT16(trie, src, limit, c, result) _UTRIE2_U16_NEXT(trie, index, src, limit, c, result)
+
+/**
+ * UTF-16: Get the next code point (UChar32 c, out), post-increment src,
+ * and get a 32-bit value from the trie.
+ *
+ * @param trie (const UTrie2 *, in) a frozen trie
+ * @param src (const UChar *, in/out) the source text pointer
+ * @param limit (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated
+ * @param c (UChar32, out) variable for the code point
+ * @param result (uint32_t, out) uint32_t variable for the trie lookup result
+ */
+#define UTRIE2_U16_NEXT32(trie, src, limit, c, result) _UTRIE2_U16_NEXT(trie, data32, src, limit, c, result)
+
+/**
+ * UTF-16: Get the previous code point (UChar32 c, out), pre-decrement src,
+ * and get a 16-bit value from the trie.
+ *
+ * @param trie (const UTrie2 *, in) a frozen trie
+ * @param start (const UChar *, in) the start pointer for the text
+ * @param src (const UChar *, in/out) the source text pointer
+ * @param c (UChar32, out) variable for the code point
+ * @param result (uint16_t, out) uint16_t variable for the trie lookup result
+ */
+#define UTRIE2_U16_PREV16(trie, start, src, c, result) _UTRIE2_U16_PREV(trie, index, start, src, c, result)
+
+/**
+ * UTF-16: Get the previous code point (UChar32 c, out), pre-decrement src,
+ * and get a 32-bit value from the trie.
+ *
+ * @param trie (const UTrie2 *, in) a frozen trie
+ * @param start (const UChar *, in) the start pointer for the text
+ * @param src (const UChar *, in/out) the source text pointer
+ * @param c (UChar32, out) variable for the code point
+ * @param result (uint32_t, out) uint32_t variable for the trie lookup result
+ */
+#define UTRIE2_U16_PREV32(trie, start, src, c, result) _UTRIE2_U16_PREV(trie, data32, start, src, c, result)
+
+/**
+ * UTF-8: Post-increment src and get a 16-bit value from the trie.
+ *
+ * @param trie (const UTrie2 *, in) a frozen trie
+ * @param src (const char *, in/out) the source text pointer
+ * @param limit (const char *, in) the limit pointer for the text (must not be NULL)
+ * @param result (uint16_t, out) uint16_t variable for the trie lookup result
+ */
+#define UTRIE2_U8_NEXT16(trie, src, limit, result)\
+ _UTRIE2_U8_NEXT(trie, data16, index, src, limit, result)
+
+/**
+ * UTF-8: Post-increment src and get a 32-bit value from the trie.
+ *
+ * @param trie (const UTrie2 *, in) a frozen trie
+ * @param src (const char *, in/out) the source text pointer
+ * @param limit (const char *, in) the limit pointer for the text (must not be NULL)
+ * @param result (uint16_t, out) uint32_t variable for the trie lookup result
+ */
+#define UTRIE2_U8_NEXT32(trie, src, limit, result) \
+ _UTRIE2_U8_NEXT(trie, data32, data32, src, limit, result)
+
+/**
+ * UTF-8: Pre-decrement src and get a 16-bit value from the trie.
+ *
+ * @param trie (const UTrie2 *, in) a frozen trie
+ * @param start (const char *, in) the start pointer for the text
+ * @param src (const char *, in/out) the source text pointer
+ * @param result (uint16_t, out) uint16_t variable for the trie lookup result
+ */
+#define UTRIE2_U8_PREV16(trie, start, src, result) \
+ _UTRIE2_U8_PREV(trie, data16, index, start, src, result)
+
+/**
+ * UTF-8: Pre-decrement src and get a 32-bit value from the trie.
+ *
+ * @param trie (const UTrie2 *, in) a frozen trie
+ * @param start (const char *, in) the start pointer for the text
+ * @param src (const char *, in/out) the source text pointer
+ * @param result (uint16_t, out) uint32_t variable for the trie lookup result
+ */
+#define UTRIE2_U8_PREV32(trie, start, src, result) \
+ _UTRIE2_U8_PREV(trie, data32, data32, start, src, result)
+
+/* Public UTrie2 API: optimized UTF-16 access ------------------------------- */
+
+/*
+ * The following functions and macros are used for highly optimized UTF-16
+ * text processing. The UTRIE2_U16_NEXTxy() macros do not depend on these.
+ *
+ * A UTrie2 stores separate values for lead surrogate code _units_ vs. code _points_.
+ * UTF-16 text processing can be optimized by detecting surrogate pairs and
+ * assembling supplementary code points only when there is non-trivial data
+ * available.
+ *
+ * At build-time, use utrie2_enumForLeadSurrogate() to see if there
+ * is non-trivial (non-initialValue) data for any of the supplementary
+ * code points associated with a lead surrogate.
+ * If so, then set a special (application-specific) value for the
+ * lead surrogate code _unit_, with utrie2_set32ForLeadSurrogateCodeUnit().
+ *
+ * At runtime, use UTRIE2_GET16_FROM_U16_SINGLE_LEAD() or
+ * UTRIE2_GET32_FROM_U16_SINGLE_LEAD() per code unit. If there is non-trivial
+ * data and the code unit is a lead surrogate, then check if a trail surrogate
+ * follows. If so, assemble the supplementary code point with
+ * U16_GET_SUPPLEMENTARY() and look up its value with UTRIE2_GET16_FROM_SUPP()
+ * or UTRIE2_GET32_FROM_SUPP(); otherwise reset the lead
+ * surrogate's value or do a code point lookup for it.
+ *
+ * If there is only trivial data for lead and trail surrogates, then processing
+ * can often skip them. For example, in normalization or case mapping
+ * all characters that do not have any mappings are simply copied as is.
+ */
+
+/**
+ * Get a value from a lead surrogate code unit as stored in the trie.
+ *
+ * @param trie the trie
+ * @param c the code unit (U+D800..U+DBFF)
+ * @return the value
+ */
+U_CAPI uint32_t U_EXPORT2
+utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c);
+
+/**
+ * Enumerate the trie values for the 1024=0x400 code points
+ * corresponding to a given lead surrogate.
+ * For example, for the lead surrogate U+D87E it will enumerate the values
+ * for [U+2F800..U+2FC00[.
+ * Used by data builder code that sets special lead surrogate code unit values
+ * for optimized UTF-16 string processing.
+ *
+ * Do not modify the trie during the enumeration.
+ *
+ * Except for the limited code point range, this functions just like utrie2_enum():
+ * For each entry in the trie, the value to be delivered is passed through
+ * the UTrie2EnumValue function.
+ * The value is unchanged if that function pointer is NULL.
+ *
+ * For each contiguous range of code points with a given (transformed) value,
+ * the UTrie2EnumRange function is called.
+ *
+ * @param trie a pointer to the trie
+ * @param enumValue a pointer to a function that may transform the trie entry value,
+ * or NULL if the values from the trie are to be used directly
+ * @param enumRange a pointer to a function that is called for each contiguous range
+ * of code points with the same (transformed) value
+ * @param context an opaque pointer that is passed on to the callback functions
+ */
+U_CAPI void U_EXPORT2
+utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead,
+ UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange,
+ const void *context);
+
+/**
+ * Set a value for a lead surrogate code unit.
+ *
+ * @param trie the unfrozen trie
+ * @param lead the lead surrogate code unit (U+D800..U+DBFF)
+ * @param value the value
+ * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes:
+ * - U_NO_WRITE_PERMISSION if the trie is frozen
+ */
+U_CAPI void U_EXPORT2
+utrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie,
+ UChar32 lead, uint32_t value,
+ UErrorCode *pErrorCode);
+
+/**
+ * Return a 16-bit trie value from a UTF-16 single/lead code unit (<=U+ffff).
+ * Same as UTRIE2_GET16() if c is a BMP code point except for lead surrogates,
+ * but smaller and faster.
+ *
+ * @param trie (const UTrie2 *, in) a frozen trie
+ * @param c (UChar32, in) the input code unit, must be 0<=c<=U+ffff
+ * @return (uint16_t) The code unit's trie value.
+ */
+#define UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c) _UTRIE2_GET_FROM_U16_SINGLE_LEAD((trie), index, c)
+
+/**
+ * Return a 32-bit trie value from a UTF-16 single/lead code unit (<=U+ffff).
+ * Same as UTRIE2_GET32() if c is a BMP code point except for lead surrogates,
+ * but smaller and faster.
+ *
+ * @param trie (const UTrie2 *, in) a frozen trie
+ * @param c (UChar32, in) the input code unit, must be 0<=c<=U+ffff
+ * @return (uint32_t) The code unit's trie value.
+ */
+#define UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c) _UTRIE2_GET_FROM_U16_SINGLE_LEAD((trie), data32, c)
+
+/**
+ * Return a 16-bit trie value from a supplementary code point (U+10000..U+10ffff).
+ *
+ * @param trie (const UTrie2 *, in) a frozen trie
+ * @param c (UChar32, in) the input code point, must be U+10000<=c<=U+10ffff
+ * @return (uint16_t) The code point's trie value.
+ */
+#define UTRIE2_GET16_FROM_SUPP(trie, c) _UTRIE2_GET_FROM_SUPP((trie), index, c)
+
+/**
+ * Return a 32-bit trie value from a supplementary code point (U+10000..U+10ffff).
+ *
+ * @param trie (const UTrie2 *, in) a frozen trie
+ * @param c (UChar32, in) the input code point, must be U+10000<=c<=U+10ffff
+ * @return (uint32_t) The code point's trie value.
+ */
+#define UTRIE2_GET32_FROM_SUPP(trie, c) _UTRIE2_GET_FROM_SUPP((trie), data32, c)
+
+U_CDECL_END
+
+/* C++ convenience wrappers ------------------------------------------------- */
+
+#ifdef __cplusplus
+
+#include "unicode/utf.h"
+#include "mutex.h"
+
+U_NAMESPACE_BEGIN
+
+// Use the Forward/Backward subclasses below.
+class UTrie2StringIterator : public UMemory {
+public:
+ UTrie2StringIterator(const UTrie2 *t, const UChar *p) :
+ trie(t), codePointStart(p), codePointLimit(p), codePoint(U_SENTINEL) {}
+
+ const UTrie2 *trie;
+ const UChar *codePointStart, *codePointLimit;
+ UChar32 codePoint;
+};
+
+class BackwardUTrie2StringIterator : public UTrie2StringIterator {
+public:
+ BackwardUTrie2StringIterator(const UTrie2 *t, const UChar *s, const UChar *p) :
+ UTrie2StringIterator(t, p), start(s) {}
+
+ uint16_t previous16();
+
+ const UChar *start;
+};
+
+class ForwardUTrie2StringIterator : public UTrie2StringIterator {
+public:
+ // Iteration limit l can be NULL.
+ // In that case, the caller must detect c==0 and stop.
+ ForwardUTrie2StringIterator(const UTrie2 *t, const UChar *p, const UChar *l) :
+ UTrie2StringIterator(t, p), limit(l) {}
+
+ uint16_t next16();
+
+ const UChar *limit;
+};
+
+U_NAMESPACE_END
+
+#endif
+
+/* Internal definitions ----------------------------------------------------- */
+
+U_CDECL_BEGIN
+
+/** Build-time trie structure. */
+struct UNewTrie2;
+typedef struct UNewTrie2 UNewTrie2;
+
+/*
+ * Trie structure definition.
+ *
+ * Either the data table is 16 bits wide and accessed via the index
+ * pointer, with each index item increased by indexLength;
+ * in this case, data32==NULL, and data16 is used for direct ASCII access.
+ *
+ * Or the data table is 32 bits wide and accessed via the data32 pointer.
+ */
+struct UTrie2 {
+ /* protected: used by macros and functions for reading values */
+ const uint16_t *index;
+ const uint16_t *data16; /* for fast UTF-8 ASCII access, if 16b data */
+ const uint32_t *data32; /* NULL if 16b data is used via index */
+
+ int32_t indexLength, dataLength;
+ uint16_t index2NullOffset; /* 0xffff if there is no dedicated index-2 null block */
+ uint16_t dataNullOffset;
+ uint32_t initialValue;
+ /** Value returned for out-of-range code points and illegal UTF-8. */
+ uint32_t errorValue;
+
+ /* Start of the last range which ends at U+10ffff, and its value. */
+ UChar32 highStart;
+ int32_t highValueIndex;
+
+ /* private: used by builder and unserialization functions */
+ void *memory; /* serialized bytes; NULL if not frozen yet */
+ int32_t length; /* number of serialized bytes at memory; 0 if not frozen yet */
+ UBool isMemoryOwned; /* true if the trie owns the memory */
+ UBool padding1;
+ int16_t padding2;
+ UNewTrie2 *newTrie; /* builder object; NULL when frozen */
+
+#ifdef UTRIE2_DEBUG
+ const char *name;
+#endif
+};
+
+/**
+ * Trie constants, defining shift widths, index array lengths, etc.
+ *
+ * These are needed for the runtime macros but users can treat these as
+ * implementation details and skip to the actual public API further below.
+ */
+enum {
+ /** Shift size for getting the index-1 table offset. */
+ UTRIE2_SHIFT_1=6+5,
+
+ /** Shift size for getting the index-2 table offset. */
+ UTRIE2_SHIFT_2=5,
+
+ /**
+ * Difference between the two shift sizes,
+ * for getting an index-1 offset from an index-2 offset. 6=11-5
+ */
+ UTRIE2_SHIFT_1_2=UTRIE2_SHIFT_1-UTRIE2_SHIFT_2,
+
+ /**
+ * Number of index-1 entries for the BMP. 32=0x20
+ * This part of the index-1 table is omitted from the serialized form.
+ */
+ UTRIE2_OMITTED_BMP_INDEX_1_LENGTH=0x10000>>UTRIE2_SHIFT_1,
+
+ /** Number of code points per index-1 table entry. 2048=0x800 */
+ UTRIE2_CP_PER_INDEX_1_ENTRY=1<<UTRIE2_SHIFT_1,
+
+ /** Number of entries in an index-2 block. 64=0x40 */
+ UTRIE2_INDEX_2_BLOCK_LENGTH=1<<UTRIE2_SHIFT_1_2,
+
+ /** Mask for getting the lower bits for the in-index-2-block offset. */
+ UTRIE2_INDEX_2_MASK=UTRIE2_INDEX_2_BLOCK_LENGTH-1,
+
+ /** Number of entries in a data block. 32=0x20 */
+ UTRIE2_DATA_BLOCK_LENGTH=1<<UTRIE2_SHIFT_2,
+
+ /** Mask for getting the lower bits for the in-data-block offset. */
+ UTRIE2_DATA_MASK=UTRIE2_DATA_BLOCK_LENGTH-1,
+
+ /**
+ * Shift size for shifting left the index array values.
+ * Increases possible data size with 16-bit index values at the cost
+ * of compactability.
+ * This requires data blocks to be aligned by UTRIE2_DATA_GRANULARITY.
+ */
+ UTRIE2_INDEX_SHIFT=2,
+
+ /** The alignment size of a data block. Also the granularity for compaction. */
+ UTRIE2_DATA_GRANULARITY=1<<UTRIE2_INDEX_SHIFT,
+
+ /* Fixed layout of the first part of the index array. ------------------- */
+
+ /**
+ * The BMP part of the index-2 table is fixed and linear and starts at offset 0.
+ * Length=2048=0x800=0x10000>>UTRIE2_SHIFT_2.
+ */
+ UTRIE2_INDEX_2_OFFSET=0,
+
+ /**
+ * The part of the index-2 table for U+D800..U+DBFF stores values for
+ * lead surrogate code _units_ not code _points_.
+ * Values for lead surrogate code _points_ are indexed with this portion of the table.
+ * Length=32=0x20=0x400>>UTRIE2_SHIFT_2. (There are 1024=0x400 lead surrogates.)
+ */
+ UTRIE2_LSCP_INDEX_2_OFFSET=0x10000>>UTRIE2_SHIFT_2,
+ UTRIE2_LSCP_INDEX_2_LENGTH=0x400>>UTRIE2_SHIFT_2,
+
+ /** Count the lengths of both BMP pieces. 2080=0x820 */
+ UTRIE2_INDEX_2_BMP_LENGTH=UTRIE2_LSCP_INDEX_2_OFFSET+UTRIE2_LSCP_INDEX_2_LENGTH,
+
+ /**
+ * The 2-byte UTF-8 version of the index-2 table follows at offset 2080=0x820.
+ * Length 32=0x20 for lead bytes C0..DF, regardless of UTRIE2_SHIFT_2.
+ */
+ UTRIE2_UTF8_2B_INDEX_2_OFFSET=UTRIE2_INDEX_2_BMP_LENGTH,
+ UTRIE2_UTF8_2B_INDEX_2_LENGTH=0x800>>6, /* U+0800 is the first code point after 2-byte UTF-8 */
+
+ /**
+ * The index-1 table, only used for supplementary code points, at offset 2112=0x840.
+ * Variable length, for code points up to highStart, where the last single-value range starts.
+ * Maximum length 512=0x200=0x100000>>UTRIE2_SHIFT_1.
+ * (For 0x100000 supplementary code points U+10000..U+10ffff.)
+ *
+ * The part of the index-2 table for supplementary code points starts
+ * after this index-1 table.
+ *
+ * Both the index-1 table and the following part of the index-2 table
+ * are omitted completely if there is only BMP data.
+ */
+ UTRIE2_INDEX_1_OFFSET=UTRIE2_UTF8_2B_INDEX_2_OFFSET+UTRIE2_UTF8_2B_INDEX_2_LENGTH,
+ UTRIE2_MAX_INDEX_1_LENGTH=0x100000>>UTRIE2_SHIFT_1,
+
+ /*
+ * Fixed layout of the first part of the data array. -----------------------
+ * Starts with 4 blocks (128=0x80 entries) for ASCII.
+ */
+
+ /**
+ * The illegal-UTF-8 data block follows the ASCII block, at offset 128=0x80.
+ * Used with linear access for single bytes 0..0xbf for simple error handling.
+ * Length 64=0x40, not UTRIE2_DATA_BLOCK_LENGTH.
+ */
+ UTRIE2_BAD_UTF8_DATA_OFFSET=0x80,
+
+ /** The start of non-linear-ASCII data blocks, at offset 192=0xc0. */
+ UTRIE2_DATA_START_OFFSET=0xc0
+};
+
+/* Internal functions and macros -------------------------------------------- */
+
+/**
+ * Internal function for part of the UTRIE2_U8_NEXTxx() macro implementations.
+ * Do not call directly.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c,
+ const uint8_t *src, const uint8_t *limit);
+
+/**
+ * Internal function for part of the UTRIE2_U8_PREVxx() macro implementations.
+ * Do not call directly.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c,
+ const uint8_t *start, const uint8_t *src);
+
+
+/** Internal low-level trie getter. Returns a data index. */
+#define _UTRIE2_INDEX_RAW(offset, trieIndex, c) \
+ (((int32_t)((trieIndex)[(offset)+((c)>>UTRIE2_SHIFT_2)]) \
+ <<UTRIE2_INDEX_SHIFT)+ \
+ ((c)&UTRIE2_DATA_MASK))
+
+/** Internal trie getter from a UTF-16 single/lead code unit. Returns the data index. */
+#define _UTRIE2_INDEX_FROM_U16_SINGLE_LEAD(trieIndex, c) _UTRIE2_INDEX_RAW(0, trieIndex, c)
+
+/** Internal trie getter from a lead surrogate code point (D800..DBFF). Returns the data index. */
+#define _UTRIE2_INDEX_FROM_LSCP(trieIndex, c) \
+ _UTRIE2_INDEX_RAW(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2), trieIndex, c)
+
+/** Internal trie getter from a BMP code point. Returns the data index. */
+#define _UTRIE2_INDEX_FROM_BMP(trieIndex, c) \
+ _UTRIE2_INDEX_RAW(U_IS_LEAD(c) ? UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2) : 0, \
+ trieIndex, c)
+
+/** Internal trie getter from a supplementary code point below highStart. Returns the data index. */
+#define _UTRIE2_INDEX_FROM_SUPP(trieIndex, c) \
+ (((int32_t)((trieIndex)[ \
+ (trieIndex)[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+ \
+ ((c)>>UTRIE2_SHIFT_1)]+ \
+ (((c)>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK)]) \
+ <<UTRIE2_INDEX_SHIFT)+ \
+ ((c)&UTRIE2_DATA_MASK))
+
+/**
+ * Internal trie getter from a code point, with checking that c is in 0..10FFFF.
+ * Returns the data index.
+ */
+#define _UTRIE2_INDEX_FROM_CP(trie, asciiOffset, c) \
+ ((uint32_t)(c)<0xd800 ? \
+ _UTRIE2_INDEX_RAW(0, (trie)->index, c) : \
+ (uint32_t)(c)<=0xffff ? \
+ _UTRIE2_INDEX_RAW( \
+ (c)<=0xdbff ? UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2) : 0, \
+ (trie)->index, c) : \
+ (uint32_t)(c)>0x10ffff ? \
+ (asciiOffset)+UTRIE2_BAD_UTF8_DATA_OFFSET : \
+ (c)>=(trie)->highStart ? \
+ (trie)->highValueIndex : \
+ _UTRIE2_INDEX_FROM_SUPP((trie)->index, c))
+
+/** Internal trie getter from a UTF-16 single/lead code unit. Returns the data. */
+#define _UTRIE2_GET_FROM_U16_SINGLE_LEAD(trie, data, c) \
+ (trie)->data[_UTRIE2_INDEX_FROM_U16_SINGLE_LEAD((trie)->index, c)]
+
+/** Internal trie getter from a supplementary code point. Returns the data. */
+#define _UTRIE2_GET_FROM_SUPP(trie, data, c) \
+ (trie)->data[(c)>=(trie)->highStart ? (trie)->highValueIndex : \
+ _UTRIE2_INDEX_FROM_SUPP((trie)->index, c)]
+
+/**
+ * Internal trie getter from a code point, with checking that c is in 0..10FFFF.
+ * Returns the data.
+ */
+#define _UTRIE2_GET(trie, data, asciiOffset, c) \
+ (trie)->data[_UTRIE2_INDEX_FROM_CP(trie, asciiOffset, c)]
+
+/** Internal next-post-increment: get the next code point (c) and its data. */
+#define _UTRIE2_U16_NEXT(trie, data, src, limit, c, result) UPRV_BLOCK_MACRO_BEGIN { \
+ { \
+ uint16_t __c2; \
+ (c)=*(src)++; \
+ if(!U16_IS_LEAD(c)) { \
+ (result)=_UTRIE2_GET_FROM_U16_SINGLE_LEAD(trie, data, c); \
+ } else if((src)==(limit) || !U16_IS_TRAIL(__c2=*(src))) { \
+ (result)=(trie)->data[_UTRIE2_INDEX_FROM_LSCP((trie)->index, c)]; \
+ } else { \
+ ++(src); \
+ (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+ (result)=_UTRIE2_GET_FROM_SUPP((trie), data, (c)); \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** Internal pre-decrement-previous: get the previous code point (c) and its data */
+#define _UTRIE2_U16_PREV(trie, data, start, src, c, result) UPRV_BLOCK_MACRO_BEGIN { \
+ { \
+ uint16_t __c2; \
+ (c)=*--(src); \
+ if(!U16_IS_TRAIL(c) || (src)==(start) || !U16_IS_LEAD(__c2=*((src)-1))) { \
+ (result)=(trie)->data[_UTRIE2_INDEX_FROM_BMP((trie)->index, c)]; \
+ } else { \
+ --(src); \
+ (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+ (result)=_UTRIE2_GET_FROM_SUPP((trie), data, (c)); \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** Internal UTF-8 next-post-increment: get the next code point's data. */
+#define _UTRIE2_U8_NEXT(trie, ascii, data, src, limit, result) UPRV_BLOCK_MACRO_BEGIN { \
+ uint8_t __lead=(uint8_t)*(src)++; \
+ if(U8_IS_SINGLE(__lead)) { \
+ (result)=(trie)->ascii[__lead]; \
+ } else { \
+ uint8_t __t1, __t2; \
+ if( /* handle U+0800..U+FFFF inline */ \
+ 0xe0<=__lead && __lead<0xf0 && ((src)+1)<(limit) && \
+ U8_IS_VALID_LEAD3_AND_T1(__lead, __t1=(uint8_t)*(src)) && \
+ (__t2=(uint8_t)(*((src)+1)-0x80))<= 0x3f \
+ ) { \
+ (src)+=2; \
+ (result)=(trie)->data[ \
+ ((int32_t)((trie)->index[((__lead-0xe0)<<(12-UTRIE2_SHIFT_2))+ \
+ ((__t1&0x3f)<<(6-UTRIE2_SHIFT_2))+(__t2>>UTRIE2_SHIFT_2)]) \
+ <<UTRIE2_INDEX_SHIFT)+ \
+ (__t2&UTRIE2_DATA_MASK)]; \
+ } else if( /* handle U+0080..U+07FF inline */ \
+ __lead<0xe0 && __lead>=0xc2 && (src)<(limit) && \
+ (__t1=(uint8_t)(*(src)-0x80))<=0x3f \
+ ) { \
+ ++(src); \
+ (result)=(trie)->data[ \
+ (trie)->index[(UTRIE2_UTF8_2B_INDEX_2_OFFSET-0xc0)+__lead]+ \
+ __t1]; \
+ } else { \
+ int32_t __index=utrie2_internalU8NextIndex((trie), __lead, (const uint8_t *)(src), \
+ (const uint8_t *)(limit)); \
+ (src)+=__index&7; \
+ (result)=(trie)->data[__index>>3]; \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** Internal UTF-8 pre-decrement-previous: get the previous code point's data. */
+#define _UTRIE2_U8_PREV(trie, ascii, data, start, src, result) UPRV_BLOCK_MACRO_BEGIN { \
+ uint8_t __b=(uint8_t)*--(src); \
+ if(U8_IS_SINGLE(__b)) { \
+ (result)=(trie)->ascii[__b]; \
+ } else { \
+ int32_t __index=utrie2_internalU8PrevIndex((trie), __b, (const uint8_t *)(start), \
+ (const uint8_t *)(src)); \
+ (src)-=__index&7; \
+ (result)=(trie)->data[__index>>3]; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+U_CDECL_END
+
+#endif
diff --git a/thirdparty/icu4c/common/utrie2_builder.cpp b/thirdparty/icu4c/common/utrie2_builder.cpp
new file mode 100644
index 0000000000..8de824cc3d
--- /dev/null
+++ b/thirdparty/icu4c/common/utrie2_builder.cpp
@@ -0,0 +1,1483 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2001-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: utrie2_builder.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2008sep26 (split off from utrie2.c)
+* created by: Markus W. Scherer
+*
+* This is a common implementation of a Unicode trie.
+* It is a kind of compressed, serializable table of 16- or 32-bit values associated with
+* Unicode code points (0..0x10ffff).
+* This is the second common version of a Unicode trie (hence the name UTrie2).
+* See utrie2.h for a comparison.
+*
+* This file contains only the builder code.
+* See utrie2.c for the runtime and enumeration code.
+*/
+// #define UTRIE2_DEBUG
+#ifdef UTRIE2_DEBUG
+# include <stdio.h>
+#endif
+// #define UCPTRIE_DEBUG
+
+#include "unicode/utypes.h"
+#ifdef UCPTRIE_DEBUG
+#include "unicode/ucptrie.h"
+#include "unicode/umutablecptrie.h"
+#include "ucptrie_impl.h"
+#endif
+#include "cmemory.h"
+#include "utrie2.h"
+#include "utrie2_impl.h"
+
+#include "utrie.h" // for utrie2_fromUTrie()
+
+/* Implementation notes ----------------------------------------------------- */
+
+/*
+ * The UTRIE2_SHIFT_1, UTRIE2_SHIFT_2, UTRIE2_INDEX_SHIFT and other values
+ * have been chosen to minimize trie sizes overall.
+ * Most of the code is flexible enough to work with a range of values,
+ * within certain limits.
+ *
+ * Exception: Support for separate values for lead surrogate code _units_
+ * vs. code _points_ was added after the constants were fixed,
+ * and has not been tested nor particularly designed for different constant values.
+ * (Especially the utrie2_enum() code that jumps to the special LSCP index-2
+ * part and back.)
+ *
+ * Requires UTRIE2_SHIFT_2<=6. Otherwise 0xc0 which is the top of the ASCII-linear data
+ * including the bad-UTF-8-data block is not a multiple of UTRIE2_DATA_BLOCK_LENGTH
+ * and map[block>>UTRIE2_SHIFT_2] (used in reference counting and compaction
+ * remapping) stops working.
+ *
+ * Requires UTRIE2_SHIFT_1>=10 because utrie2_enumForLeadSurrogate()
+ * assumes that a single index-2 block is used for 0x400 code points
+ * corresponding to one lead surrogate.
+ *
+ * Requires UTRIE2_SHIFT_1<=16. Otherwise one single index-2 block contains
+ * more than one Unicode plane, and the split of the index-2 table into a BMP
+ * part and a supplementary part, with a gap in between, would not work.
+ *
+ * Requires UTRIE2_INDEX_SHIFT>=1 not because of the code but because
+ * there is data with more than 64k distinct values,
+ * for example for Unihan collation with a separate collation weight per
+ * Han character.
+ */
+
+/* Building a trie ----------------------------------------------------------*/
+
+enum {
+ /** The null index-2 block, following the gap in the index-2 table. */
+ UNEWTRIE2_INDEX_2_NULL_OFFSET=UNEWTRIE2_INDEX_GAP_OFFSET+UNEWTRIE2_INDEX_GAP_LENGTH,
+
+ /** The start of allocated index-2 blocks. */
+ UNEWTRIE2_INDEX_2_START_OFFSET=UNEWTRIE2_INDEX_2_NULL_OFFSET+UTRIE2_INDEX_2_BLOCK_LENGTH,
+
+ /**
+ * The null data block.
+ * Length 64=0x40 even if UTRIE2_DATA_BLOCK_LENGTH is smaller,
+ * to work with 6-bit trail bytes from 2-byte UTF-8.
+ */
+ UNEWTRIE2_DATA_NULL_OFFSET=UTRIE2_DATA_START_OFFSET,
+
+ /** The start of allocated data blocks. */
+ UNEWTRIE2_DATA_START_OFFSET=UNEWTRIE2_DATA_NULL_OFFSET+0x40,
+
+ /**
+ * The start of data blocks for U+0800 and above.
+ * Below, compaction uses a block length of 64 for 2-byte UTF-8.
+ * From here on, compaction uses UTRIE2_DATA_BLOCK_LENGTH.
+ * Data values for 0x780 code points beyond ASCII.
+ */
+ UNEWTRIE2_DATA_0800_OFFSET=UNEWTRIE2_DATA_START_OFFSET+0x780
+};
+
+/* Start with allocation of 16k data entries. */
+#define UNEWTRIE2_INITIAL_DATA_LENGTH ((int32_t)1<<14)
+
+/* Grow about 8x each time. */
+#define UNEWTRIE2_MEDIUM_DATA_LENGTH ((int32_t)1<<17)
+
+static int32_t
+allocIndex2Block(UNewTrie2 *trie);
+
+U_CAPI UTrie2 * U_EXPORT2
+utrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) {
+ UTrie2 *trie;
+ UNewTrie2 *newTrie;
+ uint32_t *data;
+ int32_t i, j;
+
+ if(U_FAILURE(*pErrorCode)) {
+ return NULL;
+ }
+
+ trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
+ newTrie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2));
+ data=(uint32_t *)uprv_malloc(UNEWTRIE2_INITIAL_DATA_LENGTH*4);
+ if(trie==NULL || newTrie==NULL || data==NULL) {
+ uprv_free(trie);
+ uprv_free(newTrie);
+ uprv_free(data);
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+
+ uprv_memset(trie, 0, sizeof(UTrie2));
+ trie->initialValue=initialValue;
+ trie->errorValue=errorValue;
+ trie->highStart=0x110000;
+ trie->newTrie=newTrie;
+#ifdef UTRIE2_DEBUG
+ trie->name="open";
+#endif
+
+ newTrie->data=data;
+#ifdef UCPTRIE_DEBUG
+ newTrie->t3=umutablecptrie_open(initialValue, errorValue, pErrorCode);
+#endif
+ newTrie->dataCapacity=UNEWTRIE2_INITIAL_DATA_LENGTH;
+ newTrie->initialValue=initialValue;
+ newTrie->errorValue=errorValue;
+ newTrie->highStart=0x110000;
+ newTrie->firstFreeBlock=0; /* no free block in the list */
+ newTrie->isCompacted=FALSE;
+
+ /*
+ * preallocate and reset
+ * - ASCII
+ * - the bad-UTF-8-data block
+ * - the null data block
+ */
+ for(i=0; i<0x80; ++i) {
+ newTrie->data[i]=initialValue;
+ }
+ for(; i<0xc0; ++i) {
+ newTrie->data[i]=errorValue;
+ }
+ for(i=UNEWTRIE2_DATA_NULL_OFFSET; i<UNEWTRIE2_DATA_START_OFFSET; ++i) {
+ newTrie->data[i]=initialValue;
+ }
+ newTrie->dataNullOffset=UNEWTRIE2_DATA_NULL_OFFSET;
+ newTrie->dataLength=UNEWTRIE2_DATA_START_OFFSET;
+
+ /* set the index-2 indexes for the 2=0x80>>UTRIE2_SHIFT_2 ASCII data blocks */
+ for(i=0, j=0; j<0x80; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
+ newTrie->index2[i]=j;
+ newTrie->map[i]=1;
+ }
+ /* reference counts for the bad-UTF-8-data block */
+ for(; j<0xc0; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
+ newTrie->map[i]=0;
+ }
+ /*
+ * Reference counts for the null data block: all blocks except for the ASCII blocks.
+ * Plus 1 so that we don't drop this block during compaction.
+ * Plus as many as needed for lead surrogate code points.
+ */
+ /* i==newTrie->dataNullOffset */
+ newTrie->map[i++]=
+ (0x110000>>UTRIE2_SHIFT_2)-
+ (0x80>>UTRIE2_SHIFT_2)+
+ 1+
+ UTRIE2_LSCP_INDEX_2_LENGTH;
+ j+=UTRIE2_DATA_BLOCK_LENGTH;
+ for(; j<UNEWTRIE2_DATA_START_OFFSET; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
+ newTrie->map[i]=0;
+ }
+
+ /*
+ * set the remaining indexes in the BMP index-2 block
+ * to the null data block
+ */
+ for(i=0x80>>UTRIE2_SHIFT_2; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) {
+ newTrie->index2[i]=UNEWTRIE2_DATA_NULL_OFFSET;
+ }
+
+ /*
+ * Fill the index gap with impossible values so that compaction
+ * does not overlap other index-2 blocks with the gap.
+ */
+ for(i=0; i<UNEWTRIE2_INDEX_GAP_LENGTH; ++i) {
+ newTrie->index2[UNEWTRIE2_INDEX_GAP_OFFSET+i]=-1;
+ }
+
+ /* set the indexes in the null index-2 block */
+ for(i=0; i<UTRIE2_INDEX_2_BLOCK_LENGTH; ++i) {
+ newTrie->index2[UNEWTRIE2_INDEX_2_NULL_OFFSET+i]=UNEWTRIE2_DATA_NULL_OFFSET;
+ }
+ newTrie->index2NullOffset=UNEWTRIE2_INDEX_2_NULL_OFFSET;
+ newTrie->index2Length=UNEWTRIE2_INDEX_2_START_OFFSET;
+
+ /* set the index-1 indexes for the linear index-2 block */
+ for(i=0, j=0;
+ i<UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;
+ ++i, j+=UTRIE2_INDEX_2_BLOCK_LENGTH
+ ) {
+ newTrie->index1[i]=j;
+ }
+
+ /* set the remaining index-1 indexes to the null index-2 block */
+ for(; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) {
+ newTrie->index1[i]=UNEWTRIE2_INDEX_2_NULL_OFFSET;
+ }
+
+ /*
+ * Preallocate and reset data for U+0080..U+07ff,
+ * for 2-byte UTF-8 which will be compacted in 64-blocks
+ * even if UTRIE2_DATA_BLOCK_LENGTH is smaller.
+ */
+ for(i=0x80; i<0x800; i+=UTRIE2_DATA_BLOCK_LENGTH) {
+ utrie2_set32(trie, i, initialValue, pErrorCode);
+ }
+
+ return trie;
+}
+
+static UNewTrie2 *
+cloneBuilder(const UNewTrie2 *other) {
+ UNewTrie2 *trie;
+
+ trie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2));
+ if(trie==NULL) {
+ return NULL;
+ }
+
+ trie->data=(uint32_t *)uprv_malloc(other->dataCapacity*4);
+ if(trie->data==NULL) {
+ uprv_free(trie);
+ return NULL;
+ }
+#ifdef UCPTRIE_DEBUG
+ if(other->t3==nullptr) {
+ trie->t3=nullptr;
+ } else {
+ UErrorCode errorCode=U_ZERO_ERROR;
+ trie->t3=umutablecptrie_clone(other->t3, &errorCode);
+ }
+#endif
+ trie->dataCapacity=other->dataCapacity;
+
+ /* clone data */
+ uprv_memcpy(trie->index1, other->index1, sizeof(trie->index1));
+ uprv_memcpy(trie->index2, other->index2, (size_t)other->index2Length*4);
+ trie->index2NullOffset=other->index2NullOffset;
+ trie->index2Length=other->index2Length;
+
+ uprv_memcpy(trie->data, other->data, (size_t)other->dataLength*4);
+ trie->dataNullOffset=other->dataNullOffset;
+ trie->dataLength=other->dataLength;
+
+ /* reference counters */
+ if(other->isCompacted) {
+ trie->firstFreeBlock=0;
+ } else {
+ uprv_memcpy(trie->map, other->map, ((size_t)other->dataLength>>UTRIE2_SHIFT_2)*4);
+ trie->firstFreeBlock=other->firstFreeBlock;
+ }
+
+ trie->initialValue=other->initialValue;
+ trie->errorValue=other->errorValue;
+ trie->highStart=other->highStart;
+ trie->isCompacted=other->isCompacted;
+
+ return trie;
+}
+
+U_CAPI UTrie2 * U_EXPORT2
+utrie2_clone(const UTrie2 *other, UErrorCode *pErrorCode) {
+ UTrie2 *trie;
+
+ if(U_FAILURE(*pErrorCode)) {
+ return NULL;
+ }
+ if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+
+ trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
+ if(trie==NULL) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ uprv_memcpy(trie, other, sizeof(UTrie2));
+
+ if(other->memory!=NULL) {
+ trie->memory=uprv_malloc(other->length);
+ if(trie->memory!=NULL) {
+ trie->isMemoryOwned=TRUE;
+ uprv_memcpy(trie->memory, other->memory, other->length);
+
+ /* make the clone's pointers point to its own memory */
+ trie->index=(uint16_t *)trie->memory+(other->index-(uint16_t *)other->memory);
+ if(other->data16!=NULL) {
+ trie->data16=(uint16_t *)trie->memory+(other->data16-(uint16_t *)other->memory);
+ }
+ if(other->data32!=NULL) {
+ trie->data32=(uint32_t *)trie->memory+(other->data32-(uint32_t *)other->memory);
+ }
+ }
+ } else /* other->newTrie!=NULL */ {
+ trie->newTrie=cloneBuilder(other->newTrie);
+ }
+
+ if(trie->memory==NULL && trie->newTrie==NULL) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ uprv_free(trie);
+ trie=NULL;
+ }
+ return trie;
+}
+
+typedef struct NewTrieAndStatus {
+ UTrie2 *trie;
+ UErrorCode errorCode;
+ UBool exclusiveLimit; /* rather than inclusive range end */
+} NewTrieAndStatus;
+
+static UBool U_CALLCONV
+copyEnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
+ NewTrieAndStatus *nt=(NewTrieAndStatus *)context;
+ if(value!=nt->trie->initialValue) {
+ if(nt->exclusiveLimit) {
+ --end;
+ }
+ if(start==end) {
+ utrie2_set32(nt->trie, start, value, &nt->errorCode);
+ } else {
+ utrie2_setRange32(nt->trie, start, end, value, TRUE, &nt->errorCode);
+ }
+ return U_SUCCESS(nt->errorCode);
+ } else {
+ return TRUE;
+ }
+}
+
+#ifdef UTRIE2_DEBUG
+static long countInitial(const UTrie2 *trie) {
+ uint32_t initialValue=trie->initialValue;
+ int32_t length=trie->dataLength;
+ long count=0;
+ if(trie->data16!=nullptr) {
+ for(int32_t i=0; i<length; ++i) {
+ if(trie->data16[i]==initialValue) { ++count; }
+ }
+ } else {
+ for(int32_t i=0; i<length; ++i) {
+ if(trie->data32[i]==initialValue) { ++count; }
+ }
+ }
+ return count;
+}
+
+static void
+utrie_printLengths(const UTrie *trie) {
+ long indexLength=trie->indexLength;
+ long dataLength=(long)trie->dataLength;
+ long totalLength=(long)sizeof(UTrieHeader)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2);
+ printf("**UTrieLengths** index:%6ld data:%6ld serialized:%6ld\n",
+ indexLength, dataLength, totalLength);
+}
+
+static void
+utrie2_printLengths(const UTrie2 *trie, const char *which) {
+ long indexLength=trie->indexLength;
+ long dataLength=(long)trie->dataLength;
+ long totalLength=(long)sizeof(UTrie2Header)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2);
+ printf("**UTrie2Lengths(%s %s)** index:%6ld data:%6ld countInitial:%6ld serialized:%6ld\n",
+ which, trie->name, indexLength, dataLength, countInitial(trie), totalLength);
+}
+#endif
+
+U_CAPI UTrie2 * U_EXPORT2
+utrie2_cloneAsThawed(const UTrie2 *other, UErrorCode *pErrorCode) {
+ NewTrieAndStatus context;
+ UChar lead;
+
+ if(U_FAILURE(*pErrorCode)) {
+ return NULL;
+ }
+ if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+ if(other->newTrie!=NULL && !other->newTrie->isCompacted) {
+ return utrie2_clone(other, pErrorCode); /* clone an unfrozen trie */
+ }
+
+ /* Clone the frozen trie by enumerating it and building a new one. */
+ context.trie=utrie2_open(other->initialValue, other->errorValue, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ return NULL;
+ }
+ context.exclusiveLimit=FALSE;
+ context.errorCode=*pErrorCode;
+ utrie2_enum(other, NULL, copyEnumRange, &context);
+ *pErrorCode=context.errorCode;
+ for(lead=0xd800; lead<0xdc00; ++lead) {
+ uint32_t value;
+ if(other->data32==NULL) {
+ value=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(other, lead);
+ } else {
+ value=UTRIE2_GET32_FROM_U16_SINGLE_LEAD(other, lead);
+ }
+ if(value!=other->initialValue) {
+ utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);
+ }
+ }
+ if(U_FAILURE(*pErrorCode)) {
+ utrie2_close(context.trie);
+ context.trie=NULL;
+ }
+ return context.trie;
+}
+
+/* Almost the same as utrie2_cloneAsThawed() but copies a UTrie and freezes the clone. */
+U_CAPI UTrie2 * U_EXPORT2
+utrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode) {
+ NewTrieAndStatus context;
+ UChar lead;
+
+ if(U_FAILURE(*pErrorCode)) {
+ return NULL;
+ }
+ if(trie1==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+ context.trie=utrie2_open(trie1->initialValue, errorValue, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ return NULL;
+ }
+ context.exclusiveLimit=TRUE;
+ context.errorCode=*pErrorCode;
+ utrie_enum(trie1, NULL, copyEnumRange, &context);
+ *pErrorCode=context.errorCode;
+ for(lead=0xd800; lead<0xdc00; ++lead) {
+ uint32_t value;
+ if(trie1->data32==NULL) {
+ value=UTRIE_GET16_FROM_LEAD(trie1, lead);
+ } else {
+ value=UTRIE_GET32_FROM_LEAD(trie1, lead);
+ }
+ if(value!=trie1->initialValue) {
+ utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);
+ }
+ }
+ if(U_SUCCESS(*pErrorCode)) {
+ utrie2_freeze(context.trie,
+ trie1->data32!=NULL ? UTRIE2_32_VALUE_BITS : UTRIE2_16_VALUE_BITS,
+ pErrorCode);
+ }
+#ifdef UTRIE2_DEBUG
+ if(U_SUCCESS(*pErrorCode)) {
+ utrie_printLengths(trie1);
+ utrie2_printLengths(context.trie, "fromUTrie");
+ }
+#endif
+ if(U_FAILURE(*pErrorCode)) {
+ utrie2_close(context.trie);
+ context.trie=NULL;
+ }
+ return context.trie;
+}
+
+static inline UBool
+isInNullBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
+ int32_t i2, block;
+
+ if(U_IS_LEAD(c) && forLSCP) {
+ i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+
+ (c>>UTRIE2_SHIFT_2);
+ } else {
+ i2=trie->index1[c>>UTRIE2_SHIFT_1]+
+ ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK);
+ }
+ block=trie->index2[i2];
+ return (UBool)(block==trie->dataNullOffset);
+}
+
+static int32_t
+allocIndex2Block(UNewTrie2 *trie) {
+ int32_t newBlock, newTop;
+
+ newBlock=trie->index2Length;
+ newTop=newBlock+UTRIE2_INDEX_2_BLOCK_LENGTH;
+ if(newTop>UPRV_LENGTHOF(trie->index2)) {
+ /*
+ * Should never occur.
+ * Either UTRIE2_MAX_BUILD_TIME_INDEX_LENGTH is incorrect,
+ * or the code writes more values than should be possible.
+ */
+ return -1;
+ }
+ trie->index2Length=newTop;
+ uprv_memcpy(trie->index2+newBlock, trie->index2+trie->index2NullOffset, UTRIE2_INDEX_2_BLOCK_LENGTH*4);
+ return newBlock;
+}
+
+static int32_t
+getIndex2Block(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
+ int32_t i1, i2;
+
+ if(U_IS_LEAD(c) && forLSCP) {
+ return UTRIE2_LSCP_INDEX_2_OFFSET;
+ }
+
+ i1=c>>UTRIE2_SHIFT_1;
+ i2=trie->index1[i1];
+ if(i2==trie->index2NullOffset) {
+ i2=allocIndex2Block(trie);
+ if(i2<0) {
+ return -1; /* program error */
+ }
+ trie->index1[i1]=i2;
+ }
+ return i2;
+}
+
+static int32_t
+allocDataBlock(UNewTrie2 *trie, int32_t copyBlock) {
+ int32_t newBlock, newTop;
+
+ if(trie->firstFreeBlock!=0) {
+ /* get the first free block */
+ newBlock=trie->firstFreeBlock;
+ trie->firstFreeBlock=-trie->map[newBlock>>UTRIE2_SHIFT_2];
+ } else {
+ /* get a new block from the high end */
+ newBlock=trie->dataLength;
+ newTop=newBlock+UTRIE2_DATA_BLOCK_LENGTH;
+ if(newTop>trie->dataCapacity) {
+ /* out of memory in the data array */
+ int32_t capacity;
+ uint32_t *data;
+
+ if(trie->dataCapacity<UNEWTRIE2_MEDIUM_DATA_LENGTH) {
+ capacity=UNEWTRIE2_MEDIUM_DATA_LENGTH;
+ } else if(trie->dataCapacity<UNEWTRIE2_MAX_DATA_LENGTH) {
+ capacity=UNEWTRIE2_MAX_DATA_LENGTH;
+ } else {
+ /*
+ * Should never occur.
+ * Either UNEWTRIE2_MAX_DATA_LENGTH is incorrect,
+ * or the code writes more values than should be possible.
+ */
+ return -1;
+ }
+ data=(uint32_t *)uprv_malloc(capacity*4);
+ if(data==NULL) {
+ return -1;
+ }
+ uprv_memcpy(data, trie->data, (size_t)trie->dataLength*4);
+ uprv_free(trie->data);
+ trie->data=data;
+ trie->dataCapacity=capacity;
+ }
+ trie->dataLength=newTop;
+ }
+ uprv_memcpy(trie->data+newBlock, trie->data+copyBlock, UTRIE2_DATA_BLOCK_LENGTH*4);
+ trie->map[newBlock>>UTRIE2_SHIFT_2]=0;
+ return newBlock;
+}
+
+/* call when the block's reference counter reaches 0 */
+static void
+releaseDataBlock(UNewTrie2 *trie, int32_t block) {
+ /* put this block at the front of the free-block chain */
+ trie->map[block>>UTRIE2_SHIFT_2]=-trie->firstFreeBlock;
+ trie->firstFreeBlock=block;
+}
+
+static inline UBool
+isWritableBlock(UNewTrie2 *trie, int32_t block) {
+ return (UBool)(block!=trie->dataNullOffset && 1==trie->map[block>>UTRIE2_SHIFT_2]);
+}
+
+static inline void
+setIndex2Entry(UNewTrie2 *trie, int32_t i2, int32_t block) {
+ int32_t oldBlock;
+ ++trie->map[block>>UTRIE2_SHIFT_2]; /* increment first, in case block==oldBlock! */
+ oldBlock=trie->index2[i2];
+ if(0 == --trie->map[oldBlock>>UTRIE2_SHIFT_2]) {
+ releaseDataBlock(trie, oldBlock);
+ }
+ trie->index2[i2]=block;
+}
+
+/**
+ * No error checking for illegal arguments.
+ *
+ * @return -1 if no new data block available (out of memory in data array)
+ * @internal
+ */
+static int32_t
+getDataBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
+ int32_t i2, oldBlock, newBlock;
+
+ i2=getIndex2Block(trie, c, forLSCP);
+ if(i2<0) {
+ return -1; /* program error */
+ }
+
+ i2+=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
+ oldBlock=trie->index2[i2];
+ if(isWritableBlock(trie, oldBlock)) {
+ return oldBlock;
+ }
+
+ /* allocate a new data block */
+ newBlock=allocDataBlock(trie, oldBlock);
+ if(newBlock<0) {
+ /* out of memory in the data array */
+ return -1;
+ }
+ setIndex2Entry(trie, i2, newBlock);
+ return newBlock;
+}
+
+/**
+ * @return TRUE if the value was successfully set
+ */
+static void
+set32(UNewTrie2 *trie,
+ UChar32 c, UBool forLSCP, uint32_t value,
+ UErrorCode *pErrorCode) {
+ int32_t block;
+
+ if(trie==NULL || trie->isCompacted) {
+ *pErrorCode=U_NO_WRITE_PERMISSION;
+ return;
+ }
+#ifdef UCPTRIE_DEBUG
+ umutablecptrie_set(trie->t3, c, value, pErrorCode);
+#endif
+
+ block=getDataBlock(trie, c, forLSCP);
+ if(block<0) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ trie->data[block+(c&UTRIE2_DATA_MASK)]=value;
+}
+
+U_CAPI void U_EXPORT2
+utrie2_set32(UTrie2 *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) {
+ return;
+ }
+ if((uint32_t)c>0x10ffff) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ set32(trie->newTrie, c, TRUE, value, pErrorCode);
+}
+
+U_CAPI void U_EXPORT2
+utrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie,
+ UChar32 c, uint32_t value,
+ UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) {
+ return;
+ }
+ if(!U_IS_LEAD(c)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ set32(trie->newTrie, c, FALSE, value, pErrorCode);
+}
+
+static void
+writeBlock(uint32_t *block, uint32_t value) {
+ uint32_t *limit=block+UTRIE2_DATA_BLOCK_LENGTH;
+ while(block<limit) {
+ *block++=value;
+ }
+}
+
+/**
+ * initialValue is ignored if overwrite=TRUE
+ * @internal
+ */
+static void
+fillBlock(uint32_t *block, UChar32 start, UChar32 limit,
+ uint32_t value, uint32_t initialValue, UBool overwrite) {
+ uint32_t *pLimit;
+
+ pLimit=block+limit;
+ block+=start;
+ if(overwrite) {
+ while(block<pLimit) {
+ *block++=value;
+ }
+ } else {
+ while(block<pLimit) {
+ if(*block==initialValue) {
+ *block=value;
+ }
+ ++block;
+ }
+ }
+}
+
+U_CAPI void U_EXPORT2
+utrie2_setRange32(UTrie2 *trie,
+ UChar32 start, UChar32 end,
+ uint32_t value, UBool overwrite,
+ UErrorCode *pErrorCode) {
+ /*
+ * repeat value in [start..end]
+ * mark index values for repeat-data blocks by setting bit 31 of the index values
+ * fill around existing values if any, if(overwrite)
+ */
+ UNewTrie2 *newTrie;
+ int32_t block, rest, repeatBlock;
+ UChar32 limit;
+
+ if(U_FAILURE(*pErrorCode)) {
+ return;
+ }
+ if((uint32_t)start>0x10ffff || (uint32_t)end>0x10ffff || start>end) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ newTrie=trie->newTrie;
+ if(newTrie==NULL || newTrie->isCompacted) {
+ *pErrorCode=U_NO_WRITE_PERMISSION;
+ return;
+ }
+#ifdef UCPTRIE_DEBUG
+ umutablecptrie_setRange(newTrie->t3, start, end, value, pErrorCode);
+#endif
+ if(!overwrite && value==newTrie->initialValue) {
+ return; /* nothing to do */
+ }
+
+ limit=end+1;
+ if(start&UTRIE2_DATA_MASK) {
+ UChar32 nextStart;
+
+ /* set partial block at [start..following block boundary[ */
+ block=getDataBlock(newTrie, start, TRUE);
+ if(block<0) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ nextStart=(start+UTRIE2_DATA_MASK)&~UTRIE2_DATA_MASK;
+ if(nextStart<=limit) {
+ fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, UTRIE2_DATA_BLOCK_LENGTH,
+ value, newTrie->initialValue, overwrite);
+ start=nextStart;
+ } else {
+ fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, limit&UTRIE2_DATA_MASK,
+ value, newTrie->initialValue, overwrite);
+ return;
+ }
+ }
+
+ /* number of positions in the last, partial block */
+ rest=limit&UTRIE2_DATA_MASK;
+
+ /* round down limit to a block boundary */
+ limit&=~UTRIE2_DATA_MASK;
+
+ /* iterate over all-value blocks */
+ if(value==newTrie->initialValue) {
+ repeatBlock=newTrie->dataNullOffset;
+ } else {
+ repeatBlock=-1;
+ }
+
+ while(start<limit) {
+ int32_t i2;
+ UBool setRepeatBlock=FALSE;
+
+ if(value==newTrie->initialValue && isInNullBlock(newTrie, start, TRUE)) {
+ start+=UTRIE2_DATA_BLOCK_LENGTH; /* nothing to do */
+ continue;
+ }
+
+ /* get index value */
+ i2=getIndex2Block(newTrie, start, TRUE);
+ if(i2<0) {
+ *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
+ return;
+ }
+ i2+=(start>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
+ block=newTrie->index2[i2];
+ if(isWritableBlock(newTrie, block)) {
+ /* already allocated */
+ if(overwrite && block>=UNEWTRIE2_DATA_0800_OFFSET) {
+ /*
+ * We overwrite all values, and it's not a
+ * protected (ASCII-linear or 2-byte UTF-8) block:
+ * replace with the repeatBlock.
+ */
+ setRepeatBlock=TRUE;
+ } else {
+ /* !overwrite, or protected block: just write the values into this block */
+ fillBlock(newTrie->data+block,
+ 0, UTRIE2_DATA_BLOCK_LENGTH,
+ value, newTrie->initialValue, overwrite);
+ }
+ } else if(newTrie->data[block]!=value && (overwrite || block==newTrie->dataNullOffset)) {
+ /*
+ * Set the repeatBlock instead of the null block or previous repeat block:
+ *
+ * If !isWritableBlock() then all entries in the block have the same value
+ * because it's the null block or a range block (the repeatBlock from a previous
+ * call to utrie2_setRange32()).
+ * No other blocks are used multiple times before compacting.
+ *
+ * The null block is the only non-writable block with the initialValue because
+ * of the repeatBlock initialization above. (If value==initialValue, then
+ * the repeatBlock will be the null data block.)
+ *
+ * We set our repeatBlock if the desired value differs from the block's value,
+ * and if we overwrite any data or if the data is all initial values
+ * (which is the same as the block being the null block, see above).
+ */
+ setRepeatBlock=TRUE;
+ }
+ if(setRepeatBlock) {
+ if(repeatBlock>=0) {
+ setIndex2Entry(newTrie, i2, repeatBlock);
+ } else {
+ /* create and set and fill the repeatBlock */
+ repeatBlock=getDataBlock(newTrie, start, TRUE);
+ if(repeatBlock<0) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ writeBlock(newTrie->data+repeatBlock, value);
+ }
+ }
+
+ start+=UTRIE2_DATA_BLOCK_LENGTH;
+ }
+
+ if(rest>0) {
+ /* set partial block at [last block boundary..limit[ */
+ block=getDataBlock(newTrie, start, TRUE);
+ if(block<0) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ fillBlock(newTrie->data+block, 0, rest, value, newTrie->initialValue, overwrite);
+ }
+
+ return;
+}
+
+/* compaction --------------------------------------------------------------- */
+
+static inline UBool
+equal_int32(const int32_t *s, const int32_t *t, int32_t length) {
+ while(length>0 && *s==*t) {
+ ++s;
+ ++t;
+ --length;
+ }
+ return (UBool)(length==0);
+}
+
+static inline UBool
+equal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) {
+ while(length>0 && *s==*t) {
+ ++s;
+ ++t;
+ --length;
+ }
+ return (UBool)(length==0);
+}
+
+static int32_t
+findSameIndex2Block(const int32_t *idx, int32_t index2Length, int32_t otherBlock) {
+ int32_t block;
+
+ /* ensure that we do not even partially get past index2Length */
+ index2Length-=UTRIE2_INDEX_2_BLOCK_LENGTH;
+
+ for(block=0; block<=index2Length; ++block) {
+ if(equal_int32(idx+block, idx+otherBlock, UTRIE2_INDEX_2_BLOCK_LENGTH)) {
+ return block;
+ }
+ }
+ return -1;
+}
+
+static int32_t
+findSameDataBlock(const uint32_t *data, int32_t dataLength, int32_t otherBlock, int32_t blockLength) {
+ int32_t block;
+
+ /* ensure that we do not even partially get past dataLength */
+ dataLength-=blockLength;
+
+ for(block=0; block<=dataLength; block+=UTRIE2_DATA_GRANULARITY) {
+ if(equal_uint32(data+block, data+otherBlock, blockLength)) {
+ return block;
+ }
+ }
+ return -1;
+}
+
+/*
+ * Find the start of the last range in the trie by enumerating backward.
+ * Indexes for supplementary code points higher than this will be omitted.
+ */
+static UChar32
+findHighStart(UNewTrie2 *trie, uint32_t highValue) {
+ const uint32_t *data32;
+
+ uint32_t value, initialValue;
+ UChar32 c, prev;
+ int32_t i1, i2, j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock;
+
+ data32=trie->data;
+ initialValue=trie->initialValue;
+
+ index2NullOffset=trie->index2NullOffset;
+ nullBlock=trie->dataNullOffset;
+
+ /* set variables for previous range */
+ if(highValue==initialValue) {
+ prevI2Block=index2NullOffset;
+ prevBlock=nullBlock;
+ } else {
+ prevI2Block=-1;
+ prevBlock=-1;
+ }
+ prev=0x110000;
+
+ /* enumerate index-2 blocks */
+ i1=UNEWTRIE2_INDEX_1_LENGTH;
+ c=prev;
+ while(c>0) {
+ i2Block=trie->index1[--i1];
+ if(i2Block==prevI2Block) {
+ /* the index-2 block is the same as the previous one, and filled with highValue */
+ c-=UTRIE2_CP_PER_INDEX_1_ENTRY;
+ continue;
+ }
+ prevI2Block=i2Block;
+ if(i2Block==index2NullOffset) {
+ /* this is the null index-2 block */
+ if(highValue!=initialValue) {
+ return c;
+ }
+ c-=UTRIE2_CP_PER_INDEX_1_ENTRY;
+ } else {
+ /* enumerate data blocks for one index-2 block */
+ for(i2=UTRIE2_INDEX_2_BLOCK_LENGTH; i2>0;) {
+ block=trie->index2[i2Block+ --i2];
+ if(block==prevBlock) {
+ /* the block is the same as the previous one, and filled with highValue */
+ c-=UTRIE2_DATA_BLOCK_LENGTH;
+ continue;
+ }
+ prevBlock=block;
+ if(block==nullBlock) {
+ /* this is the null data block */
+ if(highValue!=initialValue) {
+ return c;
+ }
+ c-=UTRIE2_DATA_BLOCK_LENGTH;
+ } else {
+ for(j=UTRIE2_DATA_BLOCK_LENGTH; j>0;) {
+ value=data32[block+ --j];
+ if(value!=highValue) {
+ return c;
+ }
+ --c;
+ }
+ }
+ }
+ }
+ }
+
+ /* deliver last range */
+ return 0;
+}
+
+/*
+ * Compact a build-time trie.
+ *
+ * The compaction
+ * - removes blocks that are identical with earlier ones
+ * - overlaps adjacent blocks as much as possible (if overlap==TRUE)
+ * - moves blocks in steps of the data granularity
+ * - moves and overlaps blocks that overlap with multiple values in the overlap region
+ *
+ * It does not
+ * - try to move and overlap blocks that are not already adjacent
+ */
+static void
+compactData(UNewTrie2 *trie) {
+#ifdef UTRIE2_DEBUG
+ int32_t countSame=0, sumOverlaps=0;
+#endif
+
+ int32_t start, newStart, movedStart;
+ int32_t blockLength, overlap;
+ int32_t i, mapIndex, blockCount;
+
+ /* do not compact linear-ASCII data */
+ newStart=UTRIE2_DATA_START_OFFSET;
+ for(start=0, i=0; start<newStart; start+=UTRIE2_DATA_BLOCK_LENGTH, ++i) {
+ trie->map[i]=start;
+ }
+
+ /*
+ * Start with a block length of 64 for 2-byte UTF-8,
+ * then switch to UTRIE2_DATA_BLOCK_LENGTH.
+ */
+ blockLength=64;
+ blockCount=blockLength>>UTRIE2_SHIFT_2;
+ for(start=newStart; start<trie->dataLength;) {
+ /*
+ * start: index of first entry of current block
+ * newStart: index where the current block is to be moved
+ * (right after current end of already-compacted data)
+ */
+ if(start==UNEWTRIE2_DATA_0800_OFFSET) {
+ blockLength=UTRIE2_DATA_BLOCK_LENGTH;
+ blockCount=1;
+ }
+
+ /* skip blocks that are not used */
+ if(trie->map[start>>UTRIE2_SHIFT_2]<=0) {
+ /* advance start to the next block */
+ start+=blockLength;
+
+ /* leave newStart with the previous block! */
+ continue;
+ }
+
+ /* search for an identical block */
+ if( (movedStart=findSameDataBlock(trie->data, newStart, start, blockLength))
+ >=0
+ ) {
+#ifdef UTRIE2_DEBUG
+ ++countSame;
+#endif
+ /* found an identical block, set the other block's index value for the current block */
+ for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
+ trie->map[mapIndex++]=movedStart;
+ movedStart+=UTRIE2_DATA_BLOCK_LENGTH;
+ }
+
+ /* advance start to the next block */
+ start+=blockLength;
+
+ /* leave newStart with the previous block! */
+ continue;
+ }
+
+ /* see if the beginning of this block can be overlapped with the end of the previous block */
+ /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
+ for(overlap=blockLength-UTRIE2_DATA_GRANULARITY;
+ overlap>0 && !equal_uint32(trie->data+(newStart-overlap), trie->data+start, overlap);
+ overlap-=UTRIE2_DATA_GRANULARITY) {}
+
+#ifdef UTRIE2_DEBUG
+ sumOverlaps+=overlap;
+#endif
+ if(overlap>0 || newStart<start) {
+ /* some overlap, or just move the whole block */
+ movedStart=newStart-overlap;
+ for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
+ trie->map[mapIndex++]=movedStart;
+ movedStart+=UTRIE2_DATA_BLOCK_LENGTH;
+ }
+
+ /* move the non-overlapping indexes to their new positions */
+ start+=overlap;
+ for(i=blockLength-overlap; i>0; --i) {
+ trie->data[newStart++]=trie->data[start++];
+ }
+ } else /* no overlap && newStart==start */ {
+ for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
+ trie->map[mapIndex++]=start;
+ start+=UTRIE2_DATA_BLOCK_LENGTH;
+ }
+ newStart=start;
+ }
+ }
+
+ /* now adjust the index-2 table */
+ for(i=0; i<trie->index2Length; ++i) {
+ if(i==UNEWTRIE2_INDEX_GAP_OFFSET) {
+ /* Gap indexes are invalid (-1). Skip over the gap. */
+ i+=UNEWTRIE2_INDEX_GAP_LENGTH;
+ }
+ trie->index2[i]=trie->map[trie->index2[i]>>UTRIE2_SHIFT_2];
+ }
+ trie->dataNullOffset=trie->map[trie->dataNullOffset>>UTRIE2_SHIFT_2];
+
+ /* ensure dataLength alignment */
+ while((newStart&(UTRIE2_DATA_GRANULARITY-1))!=0) {
+ trie->data[newStart++]=trie->initialValue;
+ }
+
+#ifdef UTRIE2_DEBUG
+ /* we saved some space */
+ printf("compacting UTrie2: count of 32-bit data words %lu->%lu countSame=%ld sumOverlaps=%ld\n",
+ (long)trie->dataLength, (long)newStart, (long)countSame, (long)sumOverlaps);
+#endif
+
+ trie->dataLength=newStart;
+}
+
+static void
+compactIndex2(UNewTrie2 *trie) {
+ int32_t i, start, newStart, movedStart, overlap;
+
+ /* do not compact linear-BMP index-2 blocks */
+ newStart=UTRIE2_INDEX_2_BMP_LENGTH;
+ for(start=0, i=0; start<newStart; start+=UTRIE2_INDEX_2_BLOCK_LENGTH, ++i) {
+ trie->map[i]=start;
+ }
+
+ /* Reduce the index table gap to what will be needed at runtime. */
+ newStart+=UTRIE2_UTF8_2B_INDEX_2_LENGTH+((trie->highStart-0x10000)>>UTRIE2_SHIFT_1);
+
+ for(start=UNEWTRIE2_INDEX_2_NULL_OFFSET; start<trie->index2Length;) {
+ /*
+ * start: index of first entry of current block
+ * newStart: index where the current block is to be moved
+ * (right after current end of already-compacted data)
+ */
+
+ /* search for an identical block */
+ if( (movedStart=findSameIndex2Block(trie->index2, newStart, start))
+ >=0
+ ) {
+ /* found an identical block, set the other block's index value for the current block */
+ trie->map[start>>UTRIE2_SHIFT_1_2]=movedStart;
+
+ /* advance start to the next block */
+ start+=UTRIE2_INDEX_2_BLOCK_LENGTH;
+
+ /* leave newStart with the previous block! */
+ continue;
+ }
+
+ /* see if the beginning of this block can be overlapped with the end of the previous block */
+ /* look for maximum overlap with the previous, adjacent block */
+ for(overlap=UTRIE2_INDEX_2_BLOCK_LENGTH-1;
+ overlap>0 && !equal_int32(trie->index2+(newStart-overlap), trie->index2+start, overlap);
+ --overlap) {}
+
+ if(overlap>0 || newStart<start) {
+ /* some overlap, or just move the whole block */
+ trie->map[start>>UTRIE2_SHIFT_1_2]=newStart-overlap;
+
+ /* move the non-overlapping indexes to their new positions */
+ start+=overlap;
+ for(i=UTRIE2_INDEX_2_BLOCK_LENGTH-overlap; i>0; --i) {
+ trie->index2[newStart++]=trie->index2[start++];
+ }
+ } else /* no overlap && newStart==start */ {
+ trie->map[start>>UTRIE2_SHIFT_1_2]=start;
+ start+=UTRIE2_INDEX_2_BLOCK_LENGTH;
+ newStart=start;
+ }
+ }
+
+ /* now adjust the index-1 table */
+ for(i=0; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) {
+ trie->index1[i]=trie->map[trie->index1[i]>>UTRIE2_SHIFT_1_2];
+ }
+ trie->index2NullOffset=trie->map[trie->index2NullOffset>>UTRIE2_SHIFT_1_2];
+
+ /*
+ * Ensure data table alignment:
+ * Needs to be granularity-aligned for 16-bit trie
+ * (so that dataMove will be down-shiftable),
+ * and 2-aligned for uint32_t data.
+ */
+ while((newStart&((UTRIE2_DATA_GRANULARITY-1)|1))!=0) {
+ /* Arbitrary value: 0x3fffc not possible for real data. */
+ trie->index2[newStart++]=(int32_t)0xffff<<UTRIE2_INDEX_SHIFT;
+ }
+
+#ifdef UTRIE2_DEBUG
+ /* we saved some space */
+ printf("compacting UTrie2: count of 16-bit index words %lu->%lu\n",
+ (long)trie->index2Length, (long)newStart);
+#endif
+
+ trie->index2Length=newStart;
+}
+
+static void
+compactTrie(UTrie2 *trie, UErrorCode *pErrorCode) {
+ UNewTrie2 *newTrie;
+ UChar32 highStart, suppHighStart;
+ uint32_t highValue;
+
+ newTrie=trie->newTrie;
+
+ /* find highStart and round it up */
+ highValue=utrie2_get32(trie, 0x10ffff);
+ highStart=findHighStart(newTrie, highValue);
+ highStart=(highStart+(UTRIE2_CP_PER_INDEX_1_ENTRY-1))&~(UTRIE2_CP_PER_INDEX_1_ENTRY-1);
+ if(highStart==0x110000) {
+ highValue=trie->errorValue;
+ }
+
+ /*
+ * Set trie->highStart only after utrie2_get32(trie, highStart).
+ * Otherwise utrie2_get32(trie, highStart) would try to read the highValue.
+ */
+ trie->highStart=newTrie->highStart=highStart;
+
+#ifdef UTRIE2_DEBUG
+ printf("UTrie2: highStart U+%06lx highValue 0x%lx initialValue 0x%lx\n",
+ (long)highStart, (long)highValue, (long)trie->initialValue);
+#endif
+
+ if(highStart<0x110000) {
+ /* Blank out [highStart..10ffff] to release associated data blocks. */
+ suppHighStart= highStart<=0x10000 ? 0x10000 : highStart;
+ utrie2_setRange32(trie, suppHighStart, 0x10ffff, trie->initialValue, TRUE, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ return;
+ }
+ }
+
+ compactData(newTrie);
+ if(highStart>0x10000) {
+ compactIndex2(newTrie);
+#ifdef UTRIE2_DEBUG
+ } else {
+ printf("UTrie2: highStart U+%04lx count of 16-bit index words %lu->%lu\n",
+ (long)highStart, (long)trie->newTrie->index2Length, (long)UTRIE2_INDEX_1_OFFSET);
+#endif
+ }
+
+ /*
+ * Store the highValue in the data array and round up the dataLength.
+ * Must be done after compactData() because that assumes that dataLength
+ * is a multiple of UTRIE2_DATA_BLOCK_LENGTH.
+ */
+ newTrie->data[newTrie->dataLength++]=highValue;
+ while((newTrie->dataLength&(UTRIE2_DATA_GRANULARITY-1))!=0) {
+ newTrie->data[newTrie->dataLength++]=trie->initialValue;
+ }
+
+ newTrie->isCompacted=TRUE;
+}
+
+/* serialization ------------------------------------------------------------ */
+
+/**
+ * Maximum length of the runtime index array.
+ * Limited by its own 16-bit index values, and by uint16_t UTrie2Header.indexLength.
+ * (The actual maximum length is lower,
+ * (0x110000>>UTRIE2_SHIFT_2)+UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH.)
+ */
+#define UTRIE2_MAX_INDEX_LENGTH 0xffff
+
+/**
+ * Maximum length of the runtime data array.
+ * Limited by 16-bit index values that are left-shifted by UTRIE2_INDEX_SHIFT,
+ * and by uint16_t UTrie2Header.shiftedDataLength.
+ */
+#define UTRIE2_MAX_DATA_LENGTH (0xffff<<UTRIE2_INDEX_SHIFT)
+
+/* Compact and internally serialize the trie. */
+U_CAPI void U_EXPORT2
+utrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode) {
+ UNewTrie2 *newTrie;
+ UTrie2Header *header;
+ uint32_t *p;
+ uint16_t *dest16;
+ int32_t i, length;
+ int32_t allIndexesLength;
+ int32_t dataMove; /* >0 if the data is moved to the end of the index array */
+ UChar32 highStart;
+
+ /* argument check */
+ if(U_FAILURE(*pErrorCode)) {
+ return;
+ }
+ if( trie==NULL ||
+ valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits
+ ) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ newTrie=trie->newTrie;
+ if(newTrie==NULL) {
+ /* already frozen */
+ UTrie2ValueBits frozenValueBits=
+ trie->data16!=NULL ? UTRIE2_16_VALUE_BITS : UTRIE2_32_VALUE_BITS;
+ if(valueBits!=frozenValueBits) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return;
+ }
+
+ /* compact if necessary */
+ if(!newTrie->isCompacted) {
+ compactTrie(trie, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ return;
+ }
+ }
+ highStart=trie->highStart;
+
+ if(highStart<=0x10000) {
+ allIndexesLength=UTRIE2_INDEX_1_OFFSET;
+ } else {
+ allIndexesLength=newTrie->index2Length;
+ }
+ if(valueBits==UTRIE2_16_VALUE_BITS) {
+ dataMove=allIndexesLength;
+ } else {
+ dataMove=0;
+ }
+
+ /* are indexLength and dataLength within limits? */
+ if( /* for unshifted indexLength */
+ allIndexesLength>UTRIE2_MAX_INDEX_LENGTH ||
+ /* for unshifted dataNullOffset */
+ (dataMove+newTrie->dataNullOffset)>0xffff ||
+ /* for unshifted 2-byte UTF-8 index-2 values */
+ (dataMove+UNEWTRIE2_DATA_0800_OFFSET)>0xffff ||
+ /* for shiftedDataLength */
+ (dataMove+newTrie->dataLength)>UTRIE2_MAX_DATA_LENGTH
+ ) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return;
+ }
+
+ /* calculate the total serialized length */
+ length=sizeof(UTrie2Header)+allIndexesLength*2;
+ if(valueBits==UTRIE2_16_VALUE_BITS) {
+ length+=newTrie->dataLength*2;
+ } else {
+ length+=newTrie->dataLength*4;
+ }
+
+ trie->memory=uprv_malloc(length);
+ if(trie->memory==NULL) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ trie->length=length;
+ trie->isMemoryOwned=TRUE;
+
+ trie->indexLength=allIndexesLength;
+ trie->dataLength=newTrie->dataLength;
+ if(highStart<=0x10000) {
+ trie->index2NullOffset=0xffff;
+ } else {
+ trie->index2NullOffset=static_cast<uint16_t>(UTRIE2_INDEX_2_OFFSET+newTrie->index2NullOffset);
+ }
+ trie->dataNullOffset=(uint16_t)(dataMove+newTrie->dataNullOffset);
+ trie->highValueIndex=dataMove+trie->dataLength-UTRIE2_DATA_GRANULARITY;
+
+ /* set the header fields */
+ header=(UTrie2Header *)trie->memory;
+
+ header->signature=UTRIE2_SIG; /* "Tri2" */
+ header->options=(uint16_t)valueBits;
+
+ header->indexLength=(uint16_t)trie->indexLength;
+ header->shiftedDataLength=(uint16_t)(trie->dataLength>>UTRIE2_INDEX_SHIFT);
+ header->index2NullOffset=trie->index2NullOffset;
+ header->dataNullOffset=trie->dataNullOffset;
+ header->shiftedHighStart=(uint16_t)(highStart>>UTRIE2_SHIFT_1);
+
+ /* fill the index and data arrays */
+ dest16=(uint16_t *)(header+1);
+ trie->index=dest16;
+
+ /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove */
+ p=(uint32_t *)newTrie->index2;
+ for(i=UTRIE2_INDEX_2_BMP_LENGTH; i>0; --i) {
+ *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT);
+ }
+
+ /* write UTF-8 2-byte index-2 values, not right-shifted */
+ for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */
+ *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET);
+ }
+ for(; i<(0xe0-0xc0); ++i) { /* C2..DF */
+ *dest16++=(uint16_t)(dataMove+newTrie->index2[i<<(6-UTRIE2_SHIFT_2)]);
+ }
+
+ if(highStart>0x10000) {
+ int32_t index1Length=(highStart-0x10000)>>UTRIE2_SHIFT_1;
+ int32_t index2Offset=UTRIE2_INDEX_2_BMP_LENGTH+UTRIE2_UTF8_2B_INDEX_2_LENGTH+index1Length;
+
+ /* write 16-bit index-1 values for supplementary code points */
+ p=(uint32_t *)newTrie->index1+UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;
+ for(i=index1Length; i>0; --i) {
+ *dest16++=(uint16_t)(UTRIE2_INDEX_2_OFFSET + *p++);
+ }
+
+ /*
+ * write the index-2 array values for supplementary code points,
+ * shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove
+ */
+ p=(uint32_t *)newTrie->index2+index2Offset;
+ for(i=newTrie->index2Length-index2Offset; i>0; --i) {
+ *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT);
+ }
+ }
+
+ /* write the 16/32-bit data array */
+ switch(valueBits) {
+ case UTRIE2_16_VALUE_BITS:
+ /* write 16-bit data values */
+ trie->data16=dest16;
+ trie->data32=NULL;
+ p=newTrie->data;
+ for(i=newTrie->dataLength; i>0; --i) {
+ *dest16++=(uint16_t)*p++;
+ }
+ break;
+ case UTRIE2_32_VALUE_BITS:
+ /* write 32-bit data values */
+ trie->data16=NULL;
+ trie->data32=(uint32_t *)dest16;
+ uprv_memcpy(dest16, newTrie->data, (size_t)newTrie->dataLength*4);
+ break;
+ default:
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+#ifdef UTRIE2_DEBUG
+ utrie2_printLengths(trie, "");
+#endif
+
+#ifdef UCPTRIE_DEBUG
+ umutablecptrie_setName(newTrie->t3, trie->name);
+ ucptrie_close(
+ umutablecptrie_buildImmutable(
+ newTrie->t3, UCPTRIE_TYPE_FAST, (UCPTrieValueWidth)valueBits, pErrorCode));
+#endif
+ /* Delete the UNewTrie2. */
+ uprv_free(newTrie->data);
+ uprv_free(newTrie);
+ trie->newTrie=NULL;
+}
diff --git a/thirdparty/icu4c/common/utrie2_impl.h b/thirdparty/icu4c/common/utrie2_impl.h
new file mode 100644
index 0000000000..2a14db3a6b
--- /dev/null
+++ b/thirdparty/icu4c/common/utrie2_impl.h
@@ -0,0 +1,175 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2001-2008, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: utrie2_impl.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2008sep26 (split off from utrie2.c)
+* created by: Markus W. Scherer
+*
+* Definitions needed for both runtime and builder code for UTrie2,
+* used by utrie2.c and utrie2_builder.c.
+*/
+
+#ifndef __UTRIE2_IMPL_H__
+#define __UTRIE2_IMPL_H__
+
+#ifdef UCPTRIE_DEBUG
+#include "unicode/umutablecptrie.h"
+#endif
+#include "utrie2.h"
+
+/* Public UTrie2 API implementation ----------------------------------------- */
+
+/*
+ * These definitions are mostly needed by utrie2.cpp,
+ * but also by utrie2_serialize() and utrie2_swap().
+ */
+
+// UTrie2 signature values, in platform endianness and opposite endianness.
+// The UTrie2 signature ASCII byte values spell "Tri2".
+#define UTRIE2_SIG 0x54726932
+#define UTRIE2_OE_SIG 0x32697254
+
+/**
+ * Trie data structure in serialized form:
+ *
+ * UTrie2Header header;
+ * uint16_t index[header.index2Length];
+ * uint16_t data[header.shiftedDataLength<<2]; -- or uint32_t data[...]
+ * @internal
+ */
+typedef struct UTrie2Header {
+ /** "Tri2" in big-endian US-ASCII (0x54726932) */
+ uint32_t signature;
+
+ /**
+ * options bit field:
+ * 15.. 4 reserved (0)
+ * 3.. 0 UTrie2ValueBits valueBits
+ */
+ uint16_t options;
+
+ /** UTRIE2_INDEX_1_OFFSET..UTRIE2_MAX_INDEX_LENGTH */
+ uint16_t indexLength;
+
+ /** (UTRIE2_DATA_START_OFFSET..UTRIE2_MAX_DATA_LENGTH)>>UTRIE2_INDEX_SHIFT */
+ uint16_t shiftedDataLength;
+
+ /** Null index and data blocks, not shifted. */
+ uint16_t index2NullOffset, dataNullOffset;
+
+ /**
+ * First code point of the single-value range ending with U+10ffff,
+ * rounded up and then shifted right by UTRIE2_SHIFT_1.
+ */
+ uint16_t shiftedHighStart;
+} UTrie2Header;
+
+/**
+ * Constants for use with UTrie2Header.options.
+ * @internal
+ */
+enum {
+ /** Mask to get the UTrie2ValueBits valueBits from options. */
+ UTRIE2_OPTIONS_VALUE_BITS_MASK=0xf
+};
+
+/* Building a trie ---------------------------------------------------------- */
+
+/*
+ * These definitions are mostly needed by utrie2_builder.c, but also by
+ * utrie2_get32() and utrie2_enum().
+ */
+
+enum {
+ /**
+ * At build time, leave a gap in the index-2 table,
+ * at least as long as the maximum lengths of the 2-byte UTF-8 index-2 table
+ * and the supplementary index-1 table.
+ * Round up to UTRIE2_INDEX_2_BLOCK_LENGTH for proper compacting.
+ */
+ UNEWTRIE2_INDEX_GAP_OFFSET=UTRIE2_INDEX_2_BMP_LENGTH,
+ UNEWTRIE2_INDEX_GAP_LENGTH=
+ ((UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH)+UTRIE2_INDEX_2_MASK)&
+ ~UTRIE2_INDEX_2_MASK,
+
+ /**
+ * Maximum length of the build-time index-2 array.
+ * Maximum number of Unicode code points (0x110000) shifted right by UTRIE2_SHIFT_2,
+ * plus the part of the index-2 table for lead surrogate code points,
+ * plus the build-time index gap,
+ * plus the null index-2 block.
+ */
+ UNEWTRIE2_MAX_INDEX_2_LENGTH=
+ (0x110000>>UTRIE2_SHIFT_2)+
+ UTRIE2_LSCP_INDEX_2_LENGTH+
+ UNEWTRIE2_INDEX_GAP_LENGTH+
+ UTRIE2_INDEX_2_BLOCK_LENGTH,
+
+ UNEWTRIE2_INDEX_1_LENGTH=0x110000>>UTRIE2_SHIFT_1
+};
+
+/**
+ * Maximum length of the build-time data array.
+ * One entry per 0x110000 code points, plus the illegal-UTF-8 block and the null block,
+ * plus values for the 0x400 surrogate code units.
+ */
+#define UNEWTRIE2_MAX_DATA_LENGTH (0x110000+0x40+0x40+0x400)
+
+/*
+ * Build-time trie structure.
+ *
+ * Just using a boolean flag for "repeat use" could lead to data array overflow
+ * because we would not be able to detect when a data block becomes unused.
+ * It also leads to orphan data blocks that are kept through serialization.
+ *
+ * Need to use reference counting for data blocks,
+ * and allocDataBlock() needs to look for a free block before increasing dataLength.
+ *
+ * This scheme seems like overkill for index-2 blocks since the whole index array is
+ * preallocated anyway (unlike the growable data array).
+ * Just allocating multiple index-2 blocks as needed.
+ */
+struct UNewTrie2 {
+ int32_t index1[UNEWTRIE2_INDEX_1_LENGTH];
+ int32_t index2[UNEWTRIE2_MAX_INDEX_2_LENGTH];
+ uint32_t *data;
+#ifdef UCPTRIE_DEBUG
+ UMutableCPTrie *t3;
+#endif
+
+ uint32_t initialValue, errorValue;
+ int32_t index2Length, dataCapacity, dataLength;
+ int32_t firstFreeBlock;
+ int32_t index2NullOffset, dataNullOffset;
+ UChar32 highStart;
+ UBool isCompacted;
+
+ /**
+ * Multi-purpose per-data-block table.
+ *
+ * Before compacting:
+ *
+ * Per-data-block reference counters/free-block list.
+ * 0: unused
+ * >0: reference counter (number of index-2 entries pointing here)
+ * <0: next free data block in free-block list
+ *
+ * While compacting:
+ *
+ * Map of adjusted indexes, used in compactData() and compactIndex2().
+ * Maps from original indexes to new ones.
+ */
+ int32_t map[UNEWTRIE2_MAX_DATA_LENGTH>>UTRIE2_SHIFT_2];
+};
+
+#endif
diff --git a/thirdparty/icu4c/common/utrie_swap.cpp b/thirdparty/icu4c/common/utrie_swap.cpp
new file mode 100644
index 0000000000..6e8b138394
--- /dev/null
+++ b/thirdparty/icu4c/common/utrie_swap.cpp
@@ -0,0 +1,348 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// utrie_swap.cpp
+// created: 2018aug08 Markus W. Scherer
+
+#include "unicode/utypes.h"
+#include "cmemory.h"
+#include "ucptrie_impl.h"
+#include "udataswp.h"
+#include "utrie.h"
+#include "utrie2_impl.h"
+
+// These functions for swapping different generations of ICU code point tries are here
+// so that their implementation files need not depend on swapper code,
+// need not depend on each other, and so that other swapper code
+// need not depend on other trie code.
+
+namespace {
+
+constexpr int32_t ASCII_LIMIT = 0x80;
+
+} // namespace
+
+U_CAPI int32_t U_EXPORT2
+utrie_swap(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const UTrieHeader *inTrie;
+ UTrieHeader trie;
+ int32_t size;
+ UBool dataIs32;
+
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* setup and swapping */
+ if(length>=0 && (uint32_t)length<sizeof(UTrieHeader)) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ inTrie=(const UTrieHeader *)inData;
+ trie.signature=ds->readUInt32(inTrie->signature);
+ trie.options=ds->readUInt32(inTrie->options);
+ trie.indexLength=udata_readInt32(ds, inTrie->indexLength);
+ trie.dataLength=udata_readInt32(ds, inTrie->dataLength);
+
+ if( trie.signature!=0x54726965 ||
+ (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT ||
+ ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT ||
+ trie.indexLength<UTRIE_BMP_INDEX_LENGTH ||
+ (trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 ||
+ trie.dataLength<UTRIE_DATA_BLOCK_LENGTH ||
+ (trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 ||
+ ((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100))
+ ) {
+ *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
+ return 0;
+ }
+
+ dataIs32=(UBool)((trie.options&UTRIE_OPTIONS_DATA_IS_32_BIT)!=0);
+ size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2);
+
+ if(length>=0) {
+ UTrieHeader *outTrie;
+
+ if(length<size) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ outTrie=(UTrieHeader *)outData;
+
+ /* swap the header */
+ ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode);
+
+ /* swap the index and the data */
+ if(dataIs32) {
+ ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
+ ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4,
+ (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
+ } else {
+ ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode);
+ }
+ }
+
+ return size;
+}
+
+U_CAPI int32_t U_EXPORT2
+utrie2_swap(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const UTrie2Header *inTrie;
+ UTrie2Header trie;
+ int32_t dataLength, size;
+ UTrie2ValueBits valueBits;
+
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* setup and swapping */
+ if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ inTrie=(const UTrie2Header *)inData;
+ trie.signature=ds->readUInt32(inTrie->signature);
+ trie.options=ds->readUInt16(inTrie->options);
+ trie.indexLength=ds->readUInt16(inTrie->indexLength);
+ trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength);
+
+ valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK);
+ dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT;
+
+ if( trie.signature!=UTRIE2_SIG ||
+ valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits ||
+ trie.indexLength<UTRIE2_INDEX_1_OFFSET ||
+ dataLength<UTRIE2_DATA_START_OFFSET
+ ) {
+ *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
+ return 0;
+ }
+
+ size=sizeof(UTrie2Header)+trie.indexLength*2;
+ switch(valueBits) {
+ case UTRIE2_16_VALUE_BITS:
+ size+=dataLength*2;
+ break;
+ case UTRIE2_32_VALUE_BITS:
+ size+=dataLength*4;
+ break;
+ default:
+ *pErrorCode=U_INVALID_FORMAT_ERROR;
+ return 0;
+ }
+
+ if(length>=0) {
+ UTrie2Header *outTrie;
+
+ if(length<size) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ outTrie=(UTrie2Header *)outData;
+
+ /* swap the header */
+ ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
+ ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
+
+ /* swap the index and the data */
+ switch(valueBits) {
+ case UTRIE2_16_VALUE_BITS:
+ ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode);
+ break;
+ case UTRIE2_32_VALUE_BITS:
+ ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
+ ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4,
+ (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
+ break;
+ default:
+ *pErrorCode=U_INVALID_FORMAT_ERROR;
+ return 0;
+ }
+ }
+
+ return size;
+}
+
+U_CAPI int32_t U_EXPORT2
+ucptrie_swap(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const UCPTrieHeader *inTrie;
+ UCPTrieHeader trie;
+ int32_t dataLength, size;
+ UCPTrieValueWidth valueWidth;
+
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* setup and swapping */
+ if(length>=0 && length<(int32_t)sizeof(UCPTrieHeader)) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ inTrie=(const UCPTrieHeader *)inData;
+ trie.signature=ds->readUInt32(inTrie->signature);
+ trie.options=ds->readUInt16(inTrie->options);
+ trie.indexLength=ds->readUInt16(inTrie->indexLength);
+ trie.dataLength = ds->readUInt16(inTrie->dataLength);
+
+ UCPTrieType type = (UCPTrieType)((trie.options >> 6) & 3);
+ valueWidth = (UCPTrieValueWidth)(trie.options & UCPTRIE_OPTIONS_VALUE_BITS_MASK);
+ dataLength = ((int32_t)(trie.options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | trie.dataLength;
+
+ int32_t minIndexLength = type == UCPTRIE_TYPE_FAST ?
+ UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH;
+ if( trie.signature!=UCPTRIE_SIG ||
+ type > UCPTRIE_TYPE_SMALL ||
+ (trie.options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0 ||
+ valueWidth > UCPTRIE_VALUE_BITS_8 ||
+ trie.indexLength < minIndexLength ||
+ dataLength < ASCII_LIMIT
+ ) {
+ *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UCPTrie */
+ return 0;
+ }
+
+ size=sizeof(UCPTrieHeader)+trie.indexLength*2;
+ switch(valueWidth) {
+ case UCPTRIE_VALUE_BITS_16:
+ size+=dataLength*2;
+ break;
+ case UCPTRIE_VALUE_BITS_32:
+ size+=dataLength*4;
+ break;
+ case UCPTRIE_VALUE_BITS_8:
+ size+=dataLength;
+ break;
+ default:
+ *pErrorCode=U_INVALID_FORMAT_ERROR;
+ return 0;
+ }
+
+ if(length>=0) {
+ UCPTrieHeader *outTrie;
+
+ if(length<size) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ outTrie=(UCPTrieHeader *)outData;
+
+ /* swap the header */
+ ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
+ ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
+
+ /* swap the index */
+ const uint16_t *inIndex=reinterpret_cast<const uint16_t *>(inTrie+1);
+ uint16_t *outIndex=reinterpret_cast<uint16_t *>(outTrie+1);
+ ds->swapArray16(ds, inIndex, trie.indexLength*2, outIndex, pErrorCode);
+
+ /* swap the data */
+ const uint16_t *inData=inIndex+trie.indexLength;
+ uint16_t *outData=outIndex+trie.indexLength;
+ switch(valueWidth) {
+ case UCPTRIE_VALUE_BITS_16:
+ ds->swapArray16(ds, inData, dataLength*2, outData, pErrorCode);
+ break;
+ case UCPTRIE_VALUE_BITS_32:
+ ds->swapArray32(ds, inData, dataLength*4, outData, pErrorCode);
+ break;
+ case UCPTRIE_VALUE_BITS_8:
+ if(inTrie!=outTrie) {
+ uprv_memmove(outData, inData, dataLength);
+ }
+ break;
+ default:
+ *pErrorCode=U_INVALID_FORMAT_ERROR;
+ return 0;
+ }
+ }
+
+ return size;
+}
+
+namespace {
+
+/**
+ * Gets the trie version from 32-bit-aligned memory containing the serialized form
+ * of a UTrie (version 1), a UTrie2 (version 2), or a UCPTrie (version 3).
+ *
+ * @param data a pointer to 32-bit-aligned memory containing the serialized form of a trie
+ * @param length the number of bytes available at data;
+ * can be more than necessary (see return value)
+ * @param anyEndianOk If FALSE, only platform-endian serialized forms are recognized.
+ * If TRUE, opposite-endian serialized forms are recognized as well.
+ * @return the trie version of the serialized form, or 0 if it is not
+ * recognized as a serialized trie
+ */
+int32_t
+getVersion(const void *data, int32_t length, UBool anyEndianOk) {
+ uint32_t signature;
+ if(length<16 || data==nullptr || (U_POINTER_MASK_LSB(data, 3)!=0)) {
+ return 0;
+ }
+ signature=*(const uint32_t *)data;
+ if(signature==UCPTRIE_SIG) {
+ return 3;
+ }
+ if(anyEndianOk && signature==UCPTRIE_OE_SIG) {
+ return 3;
+ }
+ if(signature==UTRIE2_SIG) {
+ return 2;
+ }
+ if(anyEndianOk && signature==UTRIE2_OE_SIG) {
+ return 2;
+ }
+ if(signature==UTRIE_SIG) {
+ return 1;
+ }
+ if(anyEndianOk && signature==UTRIE_OE_SIG) {
+ return 1;
+ }
+ return 0;
+}
+
+} // namespace
+
+U_CAPI int32_t U_EXPORT2
+utrie_swapAnyVersion(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) { return 0; }
+ switch(getVersion(inData, length, TRUE)) {
+ case 1:
+ return utrie_swap(ds, inData, length, outData, pErrorCode);
+ case 2:
+ return utrie2_swap(ds, inData, length, outData, pErrorCode);
+ case 3:
+ return ucptrie_swap(ds, inData, length, outData, pErrorCode);
+ default:
+ *pErrorCode=U_INVALID_FORMAT_ERROR;
+ return 0;
+ }
+}
diff --git a/thirdparty/icu4c/common/uts46.cpp b/thirdparty/icu4c/common/uts46.cpp
new file mode 100644
index 0000000000..f25b4e12f1
--- /dev/null
+++ b/thirdparty/icu4c/common/uts46.cpp
@@ -0,0 +1,1494 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: uts46.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010mar09
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_IDNA
+
+#include "unicode/idna.h"
+#include "unicode/normalizer2.h"
+#include "unicode/uscript.h"
+#include "unicode/ustring.h"
+#include "unicode/utf16.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "punycode.h"
+#include "ubidi_props.h"
+#include "ustr_imp.h"
+
+// Note about tests for UIDNA_ERROR_DOMAIN_NAME_TOO_LONG:
+//
+// The domain name length limit is 255 octets in an internal DNS representation
+// where the last ("root") label is the empty label
+// represented by length byte 0 alone.
+// In a conventional string, this translates to 253 characters, or 254
+// if there is a trailing dot for the root label.
+
+U_NAMESPACE_BEGIN
+
+// Severe errors which usually result in a U+FFFD replacement character in the result string.
+const uint32_t severeErrors=
+ UIDNA_ERROR_LEADING_COMBINING_MARK|
+ UIDNA_ERROR_DISALLOWED|
+ UIDNA_ERROR_PUNYCODE|
+ UIDNA_ERROR_LABEL_HAS_DOT|
+ UIDNA_ERROR_INVALID_ACE_LABEL;
+
+static inline UBool
+isASCIIString(const UnicodeString &dest) {
+ const UChar *s=dest.getBuffer();
+ const UChar *limit=s+dest.length();
+ while(s<limit) {
+ if(*s++>0x7f) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+static UBool
+isASCIIOkBiDi(const UChar *s, int32_t length);
+
+static UBool
+isASCIIOkBiDi(const char *s, int32_t length);
+
+// IDNA class default implementations -------------------------------------- ***
+
+IDNA::~IDNA() {}
+
+void
+IDNA::labelToASCII_UTF8(StringPiece label, ByteSink &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const {
+ if(U_SUCCESS(errorCode)) {
+ UnicodeString destString;
+ labelToASCII(UnicodeString::fromUTF8(label), destString,
+ info, errorCode).toUTF8(dest);
+ }
+}
+
+void
+IDNA::labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const {
+ if(U_SUCCESS(errorCode)) {
+ UnicodeString destString;
+ labelToUnicode(UnicodeString::fromUTF8(label), destString,
+ info, errorCode).toUTF8(dest);
+ }
+}
+
+void
+IDNA::nameToASCII_UTF8(StringPiece name, ByteSink &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const {
+ if(U_SUCCESS(errorCode)) {
+ UnicodeString destString;
+ nameToASCII(UnicodeString::fromUTF8(name), destString,
+ info, errorCode).toUTF8(dest);
+ }
+}
+
+void
+IDNA::nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const {
+ if(U_SUCCESS(errorCode)) {
+ UnicodeString destString;
+ nameToUnicode(UnicodeString::fromUTF8(name), destString,
+ info, errorCode).toUTF8(dest);
+ }
+}
+
+// UTS46 class declaration ------------------------------------------------- ***
+
+class UTS46 : public IDNA {
+public:
+ UTS46(uint32_t options, UErrorCode &errorCode);
+ virtual ~UTS46();
+
+ virtual UnicodeString &
+ labelToASCII(const UnicodeString &label, UnicodeString &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const;
+
+ virtual UnicodeString &
+ labelToUnicode(const UnicodeString &label, UnicodeString &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const;
+
+ virtual UnicodeString &
+ nameToASCII(const UnicodeString &name, UnicodeString &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const;
+
+ virtual UnicodeString &
+ nameToUnicode(const UnicodeString &name, UnicodeString &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const;
+
+ virtual void
+ labelToASCII_UTF8(StringPiece label, ByteSink &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const;
+
+ virtual void
+ labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const;
+
+ virtual void
+ nameToASCII_UTF8(StringPiece name, ByteSink &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const;
+
+ virtual void
+ nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const;
+
+private:
+ UnicodeString &
+ process(const UnicodeString &src,
+ UBool isLabel, UBool toASCII,
+ UnicodeString &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const;
+
+ void
+ processUTF8(StringPiece src,
+ UBool isLabel, UBool toASCII,
+ ByteSink &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const;
+
+ UnicodeString &
+ processUnicode(const UnicodeString &src,
+ int32_t labelStart, int32_t mappingStart,
+ UBool isLabel, UBool toASCII,
+ UnicodeString &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const;
+
+ // returns the new dest.length()
+ int32_t
+ mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart,
+ UErrorCode &errorCode) const;
+
+ // returns the new label length
+ int32_t
+ processLabel(UnicodeString &dest,
+ int32_t labelStart, int32_t labelLength,
+ UBool toASCII,
+ IDNAInfo &info, UErrorCode &errorCode) const;
+ int32_t
+ markBadACELabel(UnicodeString &dest,
+ int32_t labelStart, int32_t labelLength,
+ UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) const;
+
+ void
+ checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) const;
+
+ UBool
+ isLabelOkContextJ(const UChar *label, int32_t labelLength) const;
+
+ void
+ checkLabelContextO(const UChar *label, int32_t labelLength, IDNAInfo &info) const;
+
+ const Normalizer2 &uts46Norm2; // uts46.nrm
+ uint32_t options;
+};
+
+IDNA *
+IDNA::createUTS46Instance(uint32_t options, UErrorCode &errorCode) {
+ if(U_SUCCESS(errorCode)) {
+ IDNA *idna=new UTS46(options, errorCode);
+ if(idna==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ } else if(U_FAILURE(errorCode)) {
+ delete idna;
+ idna=NULL;
+ }
+ return idna;
+ } else {
+ return NULL;
+ }
+}
+
+// UTS46 implementation ---------------------------------------------------- ***
+
+UTS46::UTS46(uint32_t opt, UErrorCode &errorCode)
+ : uts46Norm2(*Normalizer2::getInstance(NULL, "uts46", UNORM2_COMPOSE, errorCode)),
+ options(opt) {}
+
+UTS46::~UTS46() {}
+
+UnicodeString &
+UTS46::labelToASCII(const UnicodeString &label, UnicodeString &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const {
+ return process(label, TRUE, TRUE, dest, info, errorCode);
+}
+
+UnicodeString &
+UTS46::labelToUnicode(const UnicodeString &label, UnicodeString &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const {
+ return process(label, TRUE, FALSE, dest, info, errorCode);
+}
+
+UnicodeString &
+UTS46::nameToASCII(const UnicodeString &name, UnicodeString &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const {
+ process(name, FALSE, TRUE, dest, info, errorCode);
+ if( dest.length()>=254 && (info.errors&UIDNA_ERROR_DOMAIN_NAME_TOO_LONG)==0 &&
+ isASCIIString(dest) &&
+ (dest.length()>254 || dest[253]!=0x2e)
+ ) {
+ info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
+ }
+ return dest;
+}
+
+UnicodeString &
+UTS46::nameToUnicode(const UnicodeString &name, UnicodeString &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const {
+ return process(name, FALSE, FALSE, dest, info, errorCode);
+}
+
+void
+UTS46::labelToASCII_UTF8(StringPiece label, ByteSink &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const {
+ processUTF8(label, TRUE, TRUE, dest, info, errorCode);
+}
+
+void
+UTS46::labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const {
+ processUTF8(label, TRUE, FALSE, dest, info, errorCode);
+}
+
+void
+UTS46::nameToASCII_UTF8(StringPiece name, ByteSink &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const {
+ processUTF8(name, FALSE, TRUE, dest, info, errorCode);
+}
+
+void
+UTS46::nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const {
+ processUTF8(name, FALSE, FALSE, dest, info, errorCode);
+}
+
+// UTS #46 data for ASCII characters.
+// The normalizer (using uts46.nrm) maps uppercase ASCII letters to lowercase
+// and passes through all other ASCII characters.
+// If UIDNA_USE_STD3_RULES is set, then non-LDH characters are disallowed
+// using this data.
+// The ASCII fastpath also uses this data.
+// Values: -1=disallowed 0==valid 1==mapped (lowercase)
+static const int8_t asciiData[128]={
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ // 002D..002E; valid # HYPHEN-MINUS..FULL STOP
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, -1,
+ // 0030..0039; valid # DIGIT ZERO..DIGIT NINE
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1,
+ // 0041..005A; mapped # LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
+ -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1,
+ // 0061..007A; valid # LATIN SMALL LETTER A..LATIN SMALL LETTER Z
+ -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1
+};
+
+UnicodeString &
+UTS46::process(const UnicodeString &src,
+ UBool isLabel, UBool toASCII,
+ UnicodeString &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const {
+ // uts46Norm2.normalize() would do all of this error checking and setup,
+ // but with the ASCII fastpath we do not always call it, and do not
+ // call it first.
+ if(U_FAILURE(errorCode)) {
+ dest.setToBogus();
+ return dest;
+ }
+ const UChar *srcArray=src.getBuffer();
+ if(&dest==&src || srcArray==NULL) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ dest.setToBogus();
+ return dest;
+ }
+ // Arguments are fine, reset output values.
+ dest.remove();
+ info.reset();
+ int32_t srcLength=src.length();
+ if(srcLength==0) {
+ info.errors|=UIDNA_ERROR_EMPTY_LABEL;
+ return dest;
+ }
+ UChar *destArray=dest.getBuffer(srcLength);
+ if(destArray==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return dest;
+ }
+ // ASCII fastpath
+ UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0;
+ int32_t labelStart=0;
+ int32_t i;
+ for(i=0;; ++i) {
+ if(i==srcLength) {
+ if(toASCII) {
+ if((i-labelStart)>63) {
+ info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
+ }
+ // There is a trailing dot if labelStart==i.
+ if(!isLabel && i>=254 && (i>254 || labelStart<i)) {
+ info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
+ }
+ }
+ info.errors|=info.labelErrors;
+ dest.releaseBuffer(i);
+ return dest;
+ }
+ UChar c=srcArray[i];
+ if(c>0x7f) {
+ break;
+ }
+ int cData=asciiData[c];
+ if(cData>0) {
+ destArray[i]=c+0x20; // Lowercase an uppercase ASCII letter.
+ } else if(cData<0 && disallowNonLDHDot) {
+ break; // Replacing with U+FFFD can be complicated for toASCII.
+ } else {
+ destArray[i]=c;
+ if(c==0x2d) { // hyphen
+ if(i==(labelStart+3) && srcArray[i-1]==0x2d) {
+ // "??--..." is Punycode or forbidden.
+ ++i; // '-' was copied to dest already
+ break;
+ }
+ if(i==labelStart) {
+ // label starts with "-"
+ info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN;
+ }
+ if((i+1)==srcLength || srcArray[i+1]==0x2e) {
+ // label ends with "-"
+ info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN;
+ }
+ } else if(c==0x2e) { // dot
+ if(isLabel) {
+ // Replacing with U+FFFD can be complicated for toASCII.
+ ++i; // '.' was copied to dest already
+ break;
+ }
+ if(i==labelStart) {
+ info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL;
+ }
+ if(toASCII && (i-labelStart)>63) {
+ info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
+ }
+ info.errors|=info.labelErrors;
+ info.labelErrors=0;
+ labelStart=i+1;
+ }
+ }
+ }
+ info.errors|=info.labelErrors;
+ dest.releaseBuffer(i);
+ processUnicode(src, labelStart, i, isLabel, toASCII, dest, info, errorCode);
+ if( info.isBiDi && U_SUCCESS(errorCode) && (info.errors&severeErrors)==0 &&
+ (!info.isOkBiDi || (labelStart>0 && !isASCIIOkBiDi(dest.getBuffer(), labelStart)))
+ ) {
+ info.errors|=UIDNA_ERROR_BIDI;
+ }
+ return dest;
+}
+
+void
+UTS46::processUTF8(StringPiece src,
+ UBool isLabel, UBool toASCII,
+ ByteSink &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const {
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ const char *srcArray=src.data();
+ int32_t srcLength=src.length();
+ if(srcArray==NULL && srcLength!=0) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ // Arguments are fine, reset output values.
+ info.reset();
+ if(srcLength==0) {
+ info.errors|=UIDNA_ERROR_EMPTY_LABEL;
+ dest.Flush();
+ return;
+ }
+ UnicodeString destString;
+ int32_t labelStart=0;
+ if(srcLength<=256) { // length of stackArray[]
+ // ASCII fastpath
+ char stackArray[256];
+ int32_t destCapacity;
+ char *destArray=dest.GetAppendBuffer(srcLength, srcLength+20,
+ stackArray, UPRV_LENGTHOF(stackArray), &destCapacity);
+ UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0;
+ int32_t i;
+ for(i=0;; ++i) {
+ if(i==srcLength) {
+ if(toASCII) {
+ if((i-labelStart)>63) {
+ info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
+ }
+ // There is a trailing dot if labelStart==i.
+ if(!isLabel && i>=254 && (i>254 || labelStart<i)) {
+ info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
+ }
+ }
+ info.errors|=info.labelErrors;
+ dest.Append(destArray, i);
+ dest.Flush();
+ return;
+ }
+ char c=srcArray[i];
+ if((int8_t)c<0) { // (uint8_t)c>0x7f
+ break;
+ }
+ int cData=asciiData[(int)c]; // Cast: gcc warns about indexing with a char.
+ if(cData>0) {
+ destArray[i]=c+0x20; // Lowercase an uppercase ASCII letter.
+ } else if(cData<0 && disallowNonLDHDot) {
+ break; // Replacing with U+FFFD can be complicated for toASCII.
+ } else {
+ destArray[i]=c;
+ if(c==0x2d) { // hyphen
+ if(i==(labelStart+3) && srcArray[i-1]==0x2d) {
+ // "??--..." is Punycode or forbidden.
+ break;
+ }
+ if(i==labelStart) {
+ // label starts with "-"
+ info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN;
+ }
+ if((i+1)==srcLength || srcArray[i+1]==0x2e) {
+ // label ends with "-"
+ info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN;
+ }
+ } else if(c==0x2e) { // dot
+ if(isLabel) {
+ break; // Replacing with U+FFFD can be complicated for toASCII.
+ }
+ if(i==labelStart) {
+ info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL;
+ }
+ if(toASCII && (i-labelStart)>63) {
+ info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
+ }
+ info.errors|=info.labelErrors;
+ info.labelErrors=0;
+ labelStart=i+1;
+ }
+ }
+ }
+ info.errors|=info.labelErrors;
+ // Convert the processed ASCII prefix of the current label to UTF-16.
+ int32_t mappingStart=i-labelStart;
+ destString=UnicodeString::fromUTF8(StringPiece(destArray+labelStart, mappingStart));
+ // Output the previous ASCII labels and process the rest of src in UTF-16.
+ dest.Append(destArray, labelStart);
+ processUnicode(UnicodeString::fromUTF8(StringPiece(src, labelStart)), 0, mappingStart,
+ isLabel, toASCII,
+ destString, info, errorCode);
+ } else {
+ // src is too long for the ASCII fastpath implementation.
+ processUnicode(UnicodeString::fromUTF8(src), 0, 0,
+ isLabel, toASCII,
+ destString, info, errorCode);
+ }
+ destString.toUTF8(dest); // calls dest.Flush()
+ if(toASCII && !isLabel) {
+ // length==labelStart==254 means that there is a trailing dot (ok) and
+ // destString is empty (do not index at 253-labelStart).
+ int32_t length=labelStart+destString.length();
+ if( length>=254 && isASCIIString(destString) &&
+ (length>254 ||
+ (labelStart<254 && destString[253-labelStart]!=0x2e))
+ ) {
+ info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
+ }
+ }
+ if( info.isBiDi && U_SUCCESS(errorCode) && (info.errors&severeErrors)==0 &&
+ (!info.isOkBiDi || (labelStart>0 && !isASCIIOkBiDi(srcArray, labelStart)))
+ ) {
+ info.errors|=UIDNA_ERROR_BIDI;
+ }
+}
+
+UnicodeString &
+UTS46::processUnicode(const UnicodeString &src,
+ int32_t labelStart, int32_t mappingStart,
+ UBool isLabel, UBool toASCII,
+ UnicodeString &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const {
+ if(mappingStart==0) {
+ uts46Norm2.normalize(src, dest, errorCode);
+ } else {
+ uts46Norm2.normalizeSecondAndAppend(dest, src.tempSubString(mappingStart), errorCode);
+ }
+ if(U_FAILURE(errorCode)) {
+ return dest;
+ }
+ UBool doMapDevChars=
+ toASCII ? (options&UIDNA_NONTRANSITIONAL_TO_ASCII)==0 :
+ (options&UIDNA_NONTRANSITIONAL_TO_UNICODE)==0;
+ const UChar *destArray=dest.getBuffer();
+ int32_t destLength=dest.length();
+ int32_t labelLimit=labelStart;
+ while(labelLimit<destLength) {
+ UChar c=destArray[labelLimit];
+ if(c==0x2e && !isLabel) {
+ int32_t labelLength=labelLimit-labelStart;
+ int32_t newLength=processLabel(dest, labelStart, labelLength,
+ toASCII, info, errorCode);
+ info.errors|=info.labelErrors;
+ info.labelErrors=0;
+ if(U_FAILURE(errorCode)) {
+ return dest;
+ }
+ destArray=dest.getBuffer();
+ destLength+=newLength-labelLength;
+ labelLimit=labelStart+=newLength+1;
+ continue;
+ } else if(c<0xdf) {
+ // pass
+ } else if(c<=0x200d && (c==0xdf || c==0x3c2 || c>=0x200c)) {
+ info.isTransDiff=TRUE;
+ if(doMapDevChars) {
+ destLength=mapDevChars(dest, labelStart, labelLimit, errorCode);
+ if(U_FAILURE(errorCode)) {
+ return dest;
+ }
+ destArray=dest.getBuffer();
+ // All deviation characters have been mapped, no need to check for them again.
+ doMapDevChars=FALSE;
+ // Do not increment labelLimit in case c was removed.
+ continue;
+ }
+ } else if(U16_IS_SURROGATE(c)) {
+ if(U16_IS_SURROGATE_LEAD(c) ?
+ (labelLimit+1)==destLength || !U16_IS_TRAIL(destArray[labelLimit+1]) :
+ labelLimit==labelStart || !U16_IS_LEAD(destArray[labelLimit-1])) {
+ // Map an unpaired surrogate to U+FFFD before normalization so that when
+ // that removes characters we do not turn two unpaired ones into a pair.
+ info.labelErrors|=UIDNA_ERROR_DISALLOWED;
+ dest.setCharAt(labelLimit, 0xfffd);
+ destArray=dest.getBuffer();
+ }
+ }
+ ++labelLimit;
+ }
+ // Permit an empty label at the end (0<labelStart==labelLimit==destLength is ok)
+ // but not an empty label elsewhere nor a completely empty domain name.
+ // processLabel() sets UIDNA_ERROR_EMPTY_LABEL when labelLength==0.
+ if(0==labelStart || labelStart<labelLimit) {
+ processLabel(dest, labelStart, labelLimit-labelStart,
+ toASCII, info, errorCode);
+ info.errors|=info.labelErrors;
+ }
+ return dest;
+}
+
+int32_t
+UTS46::mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart,
+ UErrorCode &errorCode) const {
+ if(U_FAILURE(errorCode)) {
+ return 0;
+ }
+ int32_t length=dest.length();
+ UChar *s=dest.getBuffer(dest[mappingStart]==0xdf ? length+1 : length);
+ if(s==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return length;
+ }
+ int32_t capacity=dest.getCapacity();
+ UBool didMapDevChars=FALSE;
+ int32_t readIndex=mappingStart, writeIndex=mappingStart;
+ do {
+ UChar c=s[readIndex++];
+ switch(c) {
+ case 0xdf:
+ // Map sharp s to ss.
+ didMapDevChars=TRUE;
+ s[writeIndex++]=0x73; // Replace sharp s with first s.
+ // Insert second s and account for possible buffer reallocation.
+ if(writeIndex==readIndex) {
+ if(length==capacity) {
+ dest.releaseBuffer(length);
+ s=dest.getBuffer(length+1);
+ if(s==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return length;
+ }
+ capacity=dest.getCapacity();
+ }
+ u_memmove(s+writeIndex+1, s+writeIndex, length-writeIndex);
+ ++readIndex;
+ }
+ s[writeIndex++]=0x73;
+ ++length;
+ break;
+ case 0x3c2: // Map final sigma to nonfinal sigma.
+ didMapDevChars=TRUE;
+ s[writeIndex++]=0x3c3;
+ break;
+ case 0x200c: // Ignore/remove ZWNJ.
+ case 0x200d: // Ignore/remove ZWJ.
+ didMapDevChars=TRUE;
+ --length;
+ break;
+ default:
+ // Only really necessary if writeIndex was different from readIndex.
+ s[writeIndex++]=c;
+ break;
+ }
+ } while(writeIndex<length);
+ dest.releaseBuffer(length);
+ if(didMapDevChars) {
+ // Mapping deviation characters might have resulted in an un-NFC string.
+ // We could use either the NFC or the UTS #46 normalizer.
+ // By using the UTS #46 normalizer again, we avoid having to load a second .nrm data file.
+ UnicodeString normalized;
+ uts46Norm2.normalize(dest.tempSubString(labelStart), normalized, errorCode);
+ if(U_SUCCESS(errorCode)) {
+ dest.replace(labelStart, 0x7fffffff, normalized);
+ if(dest.isBogus()) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ }
+ return dest.length();
+ }
+ }
+ return length;
+}
+
+// Some non-ASCII characters are equivalent to sequences with
+// non-LDH ASCII characters. To find them:
+// grep disallowed_STD3_valid IdnaMappingTable.txt (or uts46.txt)
+static inline UBool
+isNonASCIIDisallowedSTD3Valid(UChar32 c) {
+ return c==0x2260 || c==0x226E || c==0x226F;
+}
+
+// Replace the label in dest with the label string, if the label was modified.
+// If &label==&dest then the label was modified in-place and labelLength
+// is the new label length, different from label.length().
+// If &label!=&dest then labelLength==label.length().
+// Returns labelLength (= the new label length).
+static int32_t
+replaceLabel(UnicodeString &dest, int32_t destLabelStart, int32_t destLabelLength,
+ const UnicodeString &label, int32_t labelLength, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return 0;
+ }
+ if(&label!=&dest) {
+ dest.replace(destLabelStart, destLabelLength, label);
+ if(dest.isBogus()) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ }
+ return labelLength;
+}
+
+int32_t
+UTS46::processLabel(UnicodeString &dest,
+ int32_t labelStart, int32_t labelLength,
+ UBool toASCII,
+ IDNAInfo &info, UErrorCode &errorCode) const {
+ if(U_FAILURE(errorCode)) {
+ return 0;
+ }
+ UnicodeString fromPunycode;
+ UnicodeString *labelString;
+ const UChar *label=dest.getBuffer()+labelStart;
+ int32_t destLabelStart=labelStart;
+ int32_t destLabelLength=labelLength;
+ UBool wasPunycode;
+ if(labelLength>=4 && label[0]==0x78 && label[1]==0x6e && label[2]==0x2d && label[3]==0x2d) {
+ // Label starts with "xn--", try to un-Punycode it.
+ // In IDNA2008, labels like "xn--" (decodes to an empty string) and
+ // "xn--ASCII-" (decodes to just "ASCII") fail the round-trip validation from
+ // comparing the ToUnicode input with the back-to-ToASCII output.
+ // They are alternate encodings of the respective ASCII labels.
+ // Ignore "xn---" here: It will fail Punycode.decode() which logically comes before
+ // the round-trip verification.
+ if(labelLength==4 || (labelLength>5 && label[labelLength-1]==u'-')) {
+ info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL;
+ return markBadACELabel(dest, labelStart, labelLength, toASCII, info, errorCode);
+ }
+ wasPunycode=TRUE;
+ UChar *unicodeBuffer=fromPunycode.getBuffer(-1); // capacity==-1: most labels should fit
+ if(unicodeBuffer==NULL) {
+ // Should never occur if we used capacity==-1 which uses the internal buffer.
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return labelLength;
+ }
+ UErrorCode punycodeErrorCode=U_ZERO_ERROR;
+ int32_t unicodeLength=u_strFromPunycode(label+4, labelLength-4,
+ unicodeBuffer, fromPunycode.getCapacity(),
+ NULL, &punycodeErrorCode);
+ if(punycodeErrorCode==U_BUFFER_OVERFLOW_ERROR) {
+ fromPunycode.releaseBuffer(0);
+ unicodeBuffer=fromPunycode.getBuffer(unicodeLength);
+ if(unicodeBuffer==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return labelLength;
+ }
+ punycodeErrorCode=U_ZERO_ERROR;
+ unicodeLength=u_strFromPunycode(label+4, labelLength-4,
+ unicodeBuffer, fromPunycode.getCapacity(),
+ NULL, &punycodeErrorCode);
+ }
+ fromPunycode.releaseBuffer(unicodeLength);
+ if(U_FAILURE(punycodeErrorCode)) {
+ info.labelErrors|=UIDNA_ERROR_PUNYCODE;
+ return markBadACELabel(dest, labelStart, labelLength, toASCII, info, errorCode);
+ }
+ // Check for NFC, and for characters that are not
+ // valid or deviation characters according to the normalizer.
+ // If there is something wrong, then the string will change.
+ // Note that the normalizer passes through non-LDH ASCII and deviation characters.
+ // Deviation characters are ok in Punycode even in transitional processing.
+ // In the code further below, if we find non-LDH ASCII and we have UIDNA_USE_STD3_RULES
+ // then we will set UIDNA_ERROR_INVALID_ACE_LABEL there too.
+ UBool isValid=uts46Norm2.isNormalized(fromPunycode, errorCode);
+ if(U_FAILURE(errorCode)) {
+ return labelLength;
+ }
+ if(!isValid) {
+ info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL;
+ return markBadACELabel(dest, labelStart, labelLength, toASCII, info, errorCode);
+ }
+ labelString=&fromPunycode;
+ label=fromPunycode.getBuffer();
+ labelStart=0;
+ labelLength=fromPunycode.length();
+ } else {
+ wasPunycode=FALSE;
+ labelString=&dest;
+ }
+ // Validity check
+ if(labelLength==0) {
+ info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL;
+ return replaceLabel(dest, destLabelStart, destLabelLength,
+ *labelString, labelLength, errorCode);
+ }
+ // labelLength>0
+ if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) {
+ // label starts with "??--"
+ info.labelErrors|=UIDNA_ERROR_HYPHEN_3_4;
+ }
+ if(label[0]==0x2d) {
+ // label starts with "-"
+ info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN;
+ }
+ if(label[labelLength-1]==0x2d) {
+ // label ends with "-"
+ info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN;
+ }
+ // If the label was not a Punycode label, then it was the result of
+ // mapping, normalization and label segmentation.
+ // If the label was in Punycode, then we mapped it again above
+ // and checked its validity.
+ // Now we handle the STD3 restriction to LDH characters (if set)
+ // and we look for U+FFFD which indicates disallowed characters
+ // in a non-Punycode label or U+FFFD itself in a Punycode label.
+ // We also check for dots which can come from the input to a single-label function.
+ // Ok to cast away const because we own the UnicodeString.
+ UChar *s=(UChar *)label;
+ const UChar *limit=label+labelLength;
+ UChar oredChars=0;
+ // If we enforce STD3 rules, then ASCII characters other than LDH and dot are disallowed.
+ UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0;
+ do {
+ UChar c=*s;
+ if(c<=0x7f) {
+ if(c==0x2e) {
+ info.labelErrors|=UIDNA_ERROR_LABEL_HAS_DOT;
+ *s=0xfffd;
+ } else if(disallowNonLDHDot && asciiData[c]<0) {
+ info.labelErrors|=UIDNA_ERROR_DISALLOWED;
+ *s=0xfffd;
+ }
+ } else {
+ oredChars|=c;
+ if(disallowNonLDHDot && isNonASCIIDisallowedSTD3Valid(c)) {
+ info.labelErrors|=UIDNA_ERROR_DISALLOWED;
+ *s=0xfffd;
+ } else if(c==0xfffd) {
+ info.labelErrors|=UIDNA_ERROR_DISALLOWED;
+ }
+ }
+ ++s;
+ } while(s<limit);
+ // Check for a leading combining mark after other validity checks
+ // so that we don't report UIDNA_ERROR_DISALLOWED for the U+FFFD from here.
+ UChar32 c;
+ int32_t cpLength=0;
+ // "Unsafe" is ok because unpaired surrogates were mapped to U+FFFD.
+ U16_NEXT_UNSAFE(label, cpLength, c);
+ if((U_GET_GC_MASK(c)&U_GC_M_MASK)!=0) {
+ info.labelErrors|=UIDNA_ERROR_LEADING_COMBINING_MARK;
+ labelString->replace(labelStart, cpLength, (UChar)0xfffd);
+ label=labelString->getBuffer()+labelStart;
+ labelLength+=1-cpLength;
+ if(labelString==&dest) {
+ destLabelLength=labelLength;
+ }
+ }
+ if((info.labelErrors&severeErrors)==0) {
+ // Do contextual checks only if we do not have U+FFFD from a severe error
+ // because U+FFFD can make these checks fail.
+ if((options&UIDNA_CHECK_BIDI)!=0 && (!info.isBiDi || info.isOkBiDi)) {
+ checkLabelBiDi(label, labelLength, info);
+ }
+ if( (options&UIDNA_CHECK_CONTEXTJ)!=0 && (oredChars&0x200c)==0x200c &&
+ !isLabelOkContextJ(label, labelLength)
+ ) {
+ info.labelErrors|=UIDNA_ERROR_CONTEXTJ;
+ }
+ if((options&UIDNA_CHECK_CONTEXTO)!=0 && oredChars>=0xb7) {
+ checkLabelContextO(label, labelLength, info);
+ }
+ if(toASCII) {
+ if(wasPunycode) {
+ // Leave a Punycode label unchanged if it has no severe errors.
+ if(destLabelLength>63) {
+ info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
+ }
+ return destLabelLength;
+ } else if(oredChars>=0x80) {
+ // Contains non-ASCII characters.
+ UnicodeString punycode;
+ UChar *buffer=punycode.getBuffer(63); // 63==maximum DNS label length
+ if(buffer==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return destLabelLength;
+ }
+ buffer[0]=0x78; // Write "xn--".
+ buffer[1]=0x6e;
+ buffer[2]=0x2d;
+ buffer[3]=0x2d;
+ int32_t punycodeLength=u_strToPunycode(label, labelLength,
+ buffer+4, punycode.getCapacity()-4,
+ NULL, &errorCode);
+ if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
+ errorCode=U_ZERO_ERROR;
+ punycode.releaseBuffer(4);
+ buffer=punycode.getBuffer(4+punycodeLength);
+ if(buffer==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return destLabelLength;
+ }
+ punycodeLength=u_strToPunycode(label, labelLength,
+ buffer+4, punycode.getCapacity()-4,
+ NULL, &errorCode);
+ }
+ punycodeLength+=4;
+ punycode.releaseBuffer(punycodeLength);
+ if(U_FAILURE(errorCode)) {
+ return destLabelLength;
+ }
+ if(punycodeLength>63) {
+ info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
+ }
+ return replaceLabel(dest, destLabelStart, destLabelLength,
+ punycode, punycodeLength, errorCode);
+ } else {
+ // all-ASCII label
+ if(labelLength>63) {
+ info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
+ }
+ }
+ }
+ } else {
+ // If a Punycode label has severe errors,
+ // then leave it but make sure it does not look valid.
+ if(wasPunycode) {
+ info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL;
+ return markBadACELabel(dest, destLabelStart, destLabelLength, toASCII, info, errorCode);
+ }
+ }
+ return replaceLabel(dest, destLabelStart, destLabelLength,
+ *labelString, labelLength, errorCode);
+}
+
+// Make sure an ACE label does not look valid.
+// Append U+FFFD if the label has only LDH characters.
+// If UIDNA_USE_STD3_RULES, also replace disallowed ASCII characters with U+FFFD.
+int32_t
+UTS46::markBadACELabel(UnicodeString &dest,
+ int32_t labelStart, int32_t labelLength,
+ UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) const {
+ if(U_FAILURE(errorCode)) {
+ return 0;
+ }
+ UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0;
+ UBool isASCII=TRUE;
+ UBool onlyLDH=TRUE;
+ const UChar *label=dest.getBuffer()+labelStart;
+ const UChar *limit=label+labelLength;
+ // Start after the initial "xn--".
+ // Ok to cast away const because we own the UnicodeString.
+ for(UChar *s=const_cast<UChar *>(label+4); s<limit; ++s) {
+ UChar c=*s;
+ if(c<=0x7f) {
+ if(c==0x2e) {
+ info.labelErrors|=UIDNA_ERROR_LABEL_HAS_DOT;
+ *s=0xfffd;
+ isASCII=onlyLDH=FALSE;
+ } else if(asciiData[c]<0) {
+ onlyLDH=FALSE;
+ if(disallowNonLDHDot) {
+ *s=0xfffd;
+ isASCII=FALSE;
+ }
+ }
+ } else {
+ isASCII=onlyLDH=FALSE;
+ }
+ }
+ if(onlyLDH) {
+ dest.insert(labelStart+labelLength, (UChar)0xfffd);
+ if(dest.isBogus()) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ ++labelLength;
+ } else {
+ if(toASCII && isASCII && labelLength>63) {
+ info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
+ }
+ }
+ return labelLength;
+}
+
+const uint32_t L_MASK=U_MASK(U_LEFT_TO_RIGHT);
+const uint32_t R_AL_MASK=U_MASK(U_RIGHT_TO_LEFT)|U_MASK(U_RIGHT_TO_LEFT_ARABIC);
+const uint32_t L_R_AL_MASK=L_MASK|R_AL_MASK;
+
+const uint32_t R_AL_AN_MASK=R_AL_MASK|U_MASK(U_ARABIC_NUMBER);
+
+const uint32_t EN_AN_MASK=U_MASK(U_EUROPEAN_NUMBER)|U_MASK(U_ARABIC_NUMBER);
+const uint32_t R_AL_EN_AN_MASK=R_AL_MASK|EN_AN_MASK;
+const uint32_t L_EN_MASK=L_MASK|U_MASK(U_EUROPEAN_NUMBER);
+
+const uint32_t ES_CS_ET_ON_BN_NSM_MASK=
+ U_MASK(U_EUROPEAN_NUMBER_SEPARATOR)|
+ U_MASK(U_COMMON_NUMBER_SEPARATOR)|
+ U_MASK(U_EUROPEAN_NUMBER_TERMINATOR)|
+ U_MASK(U_OTHER_NEUTRAL)|
+ U_MASK(U_BOUNDARY_NEUTRAL)|
+ U_MASK(U_DIR_NON_SPACING_MARK);
+const uint32_t L_EN_ES_CS_ET_ON_BN_NSM_MASK=L_EN_MASK|ES_CS_ET_ON_BN_NSM_MASK;
+const uint32_t R_AL_AN_EN_ES_CS_ET_ON_BN_NSM_MASK=R_AL_MASK|EN_AN_MASK|ES_CS_ET_ON_BN_NSM_MASK;
+
+// We scan the whole label and check both for whether it contains RTL characters
+// and whether it passes the BiDi Rule.
+// In a BiDi domain name, all labels must pass the BiDi Rule, but we might find
+// that a domain name is a BiDi domain name (has an RTL label) only after
+// processing several earlier labels.
+void
+UTS46::checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) const {
+ // IDNA2008 BiDi rule
+ // Get the directionality of the first character.
+ UChar32 c;
+ int32_t i=0;
+ U16_NEXT_UNSAFE(label, i, c);
+ uint32_t firstMask=U_MASK(u_charDirection(c));
+ // 1. The first character must be a character with BIDI property L, R
+ // or AL. If it has the R or AL property, it is an RTL label; if it
+ // has the L property, it is an LTR label.
+ if((firstMask&~L_R_AL_MASK)!=0) {
+ info.isOkBiDi=FALSE;
+ }
+ // Get the directionality of the last non-NSM character.
+ uint32_t lastMask;
+ for(;;) {
+ if(i>=labelLength) {
+ lastMask=firstMask;
+ break;
+ }
+ U16_PREV_UNSAFE(label, labelLength, c);
+ UCharDirection dir=u_charDirection(c);
+ if(dir!=U_DIR_NON_SPACING_MARK) {
+ lastMask=U_MASK(dir);
+ break;
+ }
+ }
+ // 3. In an RTL label, the end of the label must be a character with
+ // BIDI property R, AL, EN or AN, followed by zero or more
+ // characters with BIDI property NSM.
+ // 6. In an LTR label, the end of the label must be a character with
+ // BIDI property L or EN, followed by zero or more characters with
+ // BIDI property NSM.
+ if( (firstMask&L_MASK)!=0 ?
+ (lastMask&~L_EN_MASK)!=0 :
+ (lastMask&~R_AL_EN_AN_MASK)!=0
+ ) {
+ info.isOkBiDi=FALSE;
+ }
+ // Add the directionalities of the intervening characters.
+ uint32_t mask=firstMask|lastMask;
+ while(i<labelLength) {
+ U16_NEXT_UNSAFE(label, i, c);
+ mask|=U_MASK(u_charDirection(c));
+ }
+ if(firstMask&L_MASK) {
+ // 5. In an LTR label, only characters with the BIDI properties L, EN,
+ // ES, CS, ET, ON, BN and NSM are allowed.
+ if((mask&~L_EN_ES_CS_ET_ON_BN_NSM_MASK)!=0) {
+ info.isOkBiDi=FALSE;
+ }
+ } else {
+ // 2. In an RTL label, only characters with the BIDI properties R, AL,
+ // AN, EN, ES, CS, ET, ON, BN and NSM are allowed.
+ if((mask&~R_AL_AN_EN_ES_CS_ET_ON_BN_NSM_MASK)!=0) {
+ info.isOkBiDi=FALSE;
+ }
+ // 4. In an RTL label, if an EN is present, no AN may be present, and
+ // vice versa.
+ if((mask&EN_AN_MASK)==EN_AN_MASK) {
+ info.isOkBiDi=FALSE;
+ }
+ }
+ // An RTL label is a label that contains at least one character of type
+ // R, AL or AN. [...]
+ // A "BIDI domain name" is a domain name that contains at least one RTL
+ // label. [...]
+ // The following rule, consisting of six conditions, applies to labels
+ // in BIDI domain names.
+ if((mask&R_AL_AN_MASK)!=0) {
+ info.isBiDi=TRUE;
+ }
+}
+
+// Special code for the ASCII prefix of a BiDi domain name.
+// The ASCII prefix is all-LTR.
+
+// IDNA2008 BiDi rule, parts relevant to ASCII labels:
+// 1. The first character must be a character with BIDI property L [...]
+// 5. In an LTR label, only characters with the BIDI properties L, EN,
+// ES, CS, ET, ON, BN and NSM are allowed.
+// 6. In an LTR label, the end of the label must be a character with
+// BIDI property L or EN [...]
+
+// UTF-16 version, called for mapped ASCII prefix.
+// Cannot contain uppercase A-Z.
+// s[length-1] must be the trailing dot.
+static UBool
+isASCIIOkBiDi(const UChar *s, int32_t length) {
+ int32_t labelStart=0;
+ for(int32_t i=0; i<length; ++i) {
+ UChar c=s[i];
+ if(c==0x2e) { // dot
+ if(i>labelStart) {
+ c=s[i-1];
+ if(!(0x61<=c && c<=0x7a) && !(0x30<=c && c<=0x39)) {
+ // Last character in the label is not an L or EN.
+ return FALSE;
+ }
+ }
+ labelStart=i+1;
+ } else if(i==labelStart) {
+ if(!(0x61<=c && c<=0x7a)) {
+ // First character in the label is not an L.
+ return FALSE;
+ }
+ } else {
+ if(c<=0x20 && (c>=0x1c || (9<=c && c<=0xd))) {
+ // Intermediate character in the label is a B, S or WS.
+ return FALSE;
+ }
+ }
+ }
+ return TRUE;
+}
+
+// UTF-8 version, called for source ASCII prefix.
+// Can contain uppercase A-Z.
+// s[length-1] must be the trailing dot.
+static UBool
+isASCIIOkBiDi(const char *s, int32_t length) {
+ int32_t labelStart=0;
+ for(int32_t i=0; i<length; ++i) {
+ char c=s[i];
+ if(c==0x2e) { // dot
+ if(i>labelStart) {
+ c=s[i-1];
+ if(!(0x61<=c && c<=0x7a) && !(0x41<=c && c<=0x5a) && !(0x30<=c && c<=0x39)) {
+ // Last character in the label is not an L or EN.
+ return FALSE;
+ }
+ }
+ labelStart=i+1;
+ } else if(i==labelStart) {
+ if(!(0x61<=c && c<=0x7a) && !(0x41<=c && c<=0x5a)) {
+ // First character in the label is not an L.
+ return FALSE;
+ }
+ } else {
+ if(c<=0x20 && (c>=0x1c || (9<=c && c<=0xd))) {
+ // Intermediate character in the label is a B, S or WS.
+ return FALSE;
+ }
+ }
+ }
+ return TRUE;
+}
+
+UBool
+UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const {
+ // [IDNA2008-Tables]
+ // 200C..200D ; CONTEXTJ # ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
+ for(int32_t i=0; i<labelLength; ++i) {
+ if(label[i]==0x200c) {
+ // Appendix A.1. ZERO WIDTH NON-JOINER
+ // Rule Set:
+ // False;
+ // If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True;
+ // If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C
+ // (Joining_Type:T)*(Joining_Type:{R,D})) Then True;
+ if(i==0) {
+ return FALSE;
+ }
+ UChar32 c;
+ int32_t j=i;
+ U16_PREV_UNSAFE(label, j, c);
+ if(uts46Norm2.getCombiningClass(c)==9) {
+ continue;
+ }
+ // check precontext (Joining_Type:{L,D})(Joining_Type:T)*
+ for(;;) {
+ UJoiningType type=ubidi_getJoiningType(c);
+ if(type==U_JT_TRANSPARENT) {
+ if(j==0) {
+ return FALSE;
+ }
+ U16_PREV_UNSAFE(label, j, c);
+ } else if(type==U_JT_LEFT_JOINING || type==U_JT_DUAL_JOINING) {
+ break; // precontext fulfilled
+ } else {
+ return FALSE;
+ }
+ }
+ // check postcontext (Joining_Type:T)*(Joining_Type:{R,D})
+ for(j=i+1;;) {
+ if(j==labelLength) {
+ return FALSE;
+ }
+ U16_NEXT_UNSAFE(label, j, c);
+ UJoiningType type=ubidi_getJoiningType(c);
+ if(type==U_JT_TRANSPARENT) {
+ // just skip this character
+ } else if(type==U_JT_RIGHT_JOINING || type==U_JT_DUAL_JOINING) {
+ break; // postcontext fulfilled
+ } else {
+ return FALSE;
+ }
+ }
+ } else if(label[i]==0x200d) {
+ // Appendix A.2. ZERO WIDTH JOINER (U+200D)
+ // Rule Set:
+ // False;
+ // If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True;
+ if(i==0) {
+ return FALSE;
+ }
+ UChar32 c;
+ int32_t j=i;
+ U16_PREV_UNSAFE(label, j, c);
+ if(uts46Norm2.getCombiningClass(c)!=9) {
+ return FALSE;
+ }
+ }
+ }
+ return TRUE;
+}
+
+void
+UTS46::checkLabelContextO(const UChar *label, int32_t labelLength, IDNAInfo &info) const {
+ int32_t labelEnd=labelLength-1; // inclusive
+ int32_t arabicDigits=0; // -1 for 066x, +1 for 06Fx
+ for(int32_t i=0; i<=labelEnd; ++i) {
+ UChar32 c=label[i];
+ if(c<0xb7) {
+ // ASCII fastpath
+ } else if(c<=0x6f9) {
+ if(c==0xb7) {
+ // Appendix A.3. MIDDLE DOT (U+00B7)
+ // Rule Set:
+ // False;
+ // If Before(cp) .eq. U+006C And
+ // After(cp) .eq. U+006C Then True;
+ if(!(0<i && label[i-1]==0x6c &&
+ i<labelEnd && label[i+1]==0x6c)) {
+ info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION;
+ }
+ } else if(c==0x375) {
+ // Appendix A.4. GREEK LOWER NUMERAL SIGN (KERAIA) (U+0375)
+ // Rule Set:
+ // False;
+ // If Script(After(cp)) .eq. Greek Then True;
+ UScriptCode script=USCRIPT_INVALID_CODE;
+ if(i<labelEnd) {
+ UErrorCode errorCode=U_ZERO_ERROR;
+ int32_t j=i+1;
+ U16_NEXT(label, j, labelLength, c);
+ script=uscript_getScript(c, &errorCode);
+ }
+ if(script!=USCRIPT_GREEK) {
+ info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION;
+ }
+ } else if(c==0x5f3 || c==0x5f4) {
+ // Appendix A.5. HEBREW PUNCTUATION GERESH (U+05F3)
+ // Rule Set:
+ // False;
+ // If Script(Before(cp)) .eq. Hebrew Then True;
+ //
+ // Appendix A.6. HEBREW PUNCTUATION GERSHAYIM (U+05F4)
+ // Rule Set:
+ // False;
+ // If Script(Before(cp)) .eq. Hebrew Then True;
+ UScriptCode script=USCRIPT_INVALID_CODE;
+ if(0<i) {
+ UErrorCode errorCode=U_ZERO_ERROR;
+ int32_t j=i;
+ U16_PREV(label, 0, j, c);
+ script=uscript_getScript(c, &errorCode);
+ }
+ if(script!=USCRIPT_HEBREW) {
+ info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION;
+ }
+ } else if(0x660<=c /* && c<=0x6f9 */) {
+ // Appendix A.8. ARABIC-INDIC DIGITS (0660..0669)
+ // Rule Set:
+ // True;
+ // For All Characters:
+ // If cp .in. 06F0..06F9 Then False;
+ // End For;
+ //
+ // Appendix A.9. EXTENDED ARABIC-INDIC DIGITS (06F0..06F9)
+ // Rule Set:
+ // True;
+ // For All Characters:
+ // If cp .in. 0660..0669 Then False;
+ // End For;
+ if(c<=0x669) {
+ if(arabicDigits>0) {
+ info.labelErrors|=UIDNA_ERROR_CONTEXTO_DIGITS;
+ }
+ arabicDigits=-1;
+ } else if(0x6f0<=c) {
+ if(arabicDigits<0) {
+ info.labelErrors|=UIDNA_ERROR_CONTEXTO_DIGITS;
+ }
+ arabicDigits=1;
+ }
+ }
+ } else if(c==0x30fb) {
+ // Appendix A.7. KATAKANA MIDDLE DOT (U+30FB)
+ // Rule Set:
+ // False;
+ // For All Characters:
+ // If Script(cp) .in. {Hiragana, Katakana, Han} Then True;
+ // End For;
+ UErrorCode errorCode=U_ZERO_ERROR;
+ for(int j=0;;) {
+ if(j>labelEnd) {
+ info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION;
+ break;
+ }
+ U16_NEXT(label, j, labelLength, c);
+ UScriptCode script=uscript_getScript(c, &errorCode);
+ if(script==USCRIPT_HIRAGANA || script==USCRIPT_KATAKANA || script==USCRIPT_HAN) {
+ break;
+ }
+ }
+ }
+ }
+}
+
+U_NAMESPACE_END
+
+// C API ------------------------------------------------------------------- ***
+
+U_NAMESPACE_USE
+
+U_CAPI UIDNA * U_EXPORT2
+uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode) {
+ return reinterpret_cast<UIDNA *>(IDNA::createUTS46Instance(options, *pErrorCode));
+}
+
+U_CAPI void U_EXPORT2
+uidna_close(UIDNA *idna) {
+ delete reinterpret_cast<IDNA *>(idna);
+}
+
+static UBool
+checkArgs(const void *label, int32_t length,
+ void *dest, int32_t capacity,
+ UIDNAInfo *pInfo, UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) {
+ return FALSE;
+ }
+ // sizeof(UIDNAInfo)=16 in the first API version.
+ if(pInfo==NULL || pInfo->size<16) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return FALSE;
+ }
+ if( (label==NULL ? length!=0 : length<-1) ||
+ (dest==NULL ? capacity!=0 : capacity<0) ||
+ (dest==label && label!=NULL)
+ ) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return FALSE;
+ }
+ // Set all *pInfo bytes to 0 except for the size field itself.
+ uprv_memset(&pInfo->size+1, 0, pInfo->size-sizeof(pInfo->size));
+ return TRUE;
+}
+
+static void
+idnaInfoToStruct(IDNAInfo &info, UIDNAInfo *pInfo) {
+ pInfo->isTransitionalDifferent=info.isTransitionalDifferent();
+ pInfo->errors=info.getErrors();
+}
+
+U_CAPI int32_t U_EXPORT2
+uidna_labelToASCII(const UIDNA *idna,
+ const UChar *label, int32_t length,
+ UChar *dest, int32_t capacity,
+ UIDNAInfo *pInfo, UErrorCode *pErrorCode) {
+ if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) {
+ return 0;
+ }
+ UnicodeString src((UBool)(length<0), label, length);
+ UnicodeString destString(dest, 0, capacity);
+ IDNAInfo info;
+ reinterpret_cast<const IDNA *>(idna)->labelToASCII(src, destString, info, *pErrorCode);
+ idnaInfoToStruct(info, pInfo);
+ return destString.extract(dest, capacity, *pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uidna_labelToUnicode(const UIDNA *idna,
+ const UChar *label, int32_t length,
+ UChar *dest, int32_t capacity,
+ UIDNAInfo *pInfo, UErrorCode *pErrorCode) {
+ if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) {
+ return 0;
+ }
+ UnicodeString src((UBool)(length<0), label, length);
+ UnicodeString destString(dest, 0, capacity);
+ IDNAInfo info;
+ reinterpret_cast<const IDNA *>(idna)->labelToUnicode(src, destString, info, *pErrorCode);
+ idnaInfoToStruct(info, pInfo);
+ return destString.extract(dest, capacity, *pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uidna_nameToASCII(const UIDNA *idna,
+ const UChar *name, int32_t length,
+ UChar *dest, int32_t capacity,
+ UIDNAInfo *pInfo, UErrorCode *pErrorCode) {
+ if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) {
+ return 0;
+ }
+ UnicodeString src((UBool)(length<0), name, length);
+ UnicodeString destString(dest, 0, capacity);
+ IDNAInfo info;
+ reinterpret_cast<const IDNA *>(idna)->nameToASCII(src, destString, info, *pErrorCode);
+ idnaInfoToStruct(info, pInfo);
+ return destString.extract(dest, capacity, *pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uidna_nameToUnicode(const UIDNA *idna,
+ const UChar *name, int32_t length,
+ UChar *dest, int32_t capacity,
+ UIDNAInfo *pInfo, UErrorCode *pErrorCode) {
+ if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) {
+ return 0;
+ }
+ UnicodeString src((UBool)(length<0), name, length);
+ UnicodeString destString(dest, 0, capacity);
+ IDNAInfo info;
+ reinterpret_cast<const IDNA *>(idna)->nameToUnicode(src, destString, info, *pErrorCode);
+ idnaInfoToStruct(info, pInfo);
+ return destString.extract(dest, capacity, *pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uidna_labelToASCII_UTF8(const UIDNA *idna,
+ const char *label, int32_t length,
+ char *dest, int32_t capacity,
+ UIDNAInfo *pInfo, UErrorCode *pErrorCode) {
+ if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) {
+ return 0;
+ }
+ StringPiece src(label, length<0 ? static_cast<int32_t>(uprv_strlen(label)) : length);
+ CheckedArrayByteSink sink(dest, capacity);
+ IDNAInfo info;
+ reinterpret_cast<const IDNA *>(idna)->labelToASCII_UTF8(src, sink, info, *pErrorCode);
+ idnaInfoToStruct(info, pInfo);
+ return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uidna_labelToUnicodeUTF8(const UIDNA *idna,
+ const char *label, int32_t length,
+ char *dest, int32_t capacity,
+ UIDNAInfo *pInfo, UErrorCode *pErrorCode) {
+ if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) {
+ return 0;
+ }
+ StringPiece src(label, length<0 ? static_cast<int32_t>(uprv_strlen(label)) : length);
+ CheckedArrayByteSink sink(dest, capacity);
+ IDNAInfo info;
+ reinterpret_cast<const IDNA *>(idna)->labelToUnicodeUTF8(src, sink, info, *pErrorCode);
+ idnaInfoToStruct(info, pInfo);
+ return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uidna_nameToASCII_UTF8(const UIDNA *idna,
+ const char *name, int32_t length,
+ char *dest, int32_t capacity,
+ UIDNAInfo *pInfo, UErrorCode *pErrorCode) {
+ if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) {
+ return 0;
+ }
+ StringPiece src(name, length<0 ? static_cast<int32_t>(uprv_strlen(name)) : length);
+ CheckedArrayByteSink sink(dest, capacity);
+ IDNAInfo info;
+ reinterpret_cast<const IDNA *>(idna)->nameToASCII_UTF8(src, sink, info, *pErrorCode);
+ idnaInfoToStruct(info, pInfo);
+ return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uidna_nameToUnicodeUTF8(const UIDNA *idna,
+ const char *name, int32_t length,
+ char *dest, int32_t capacity,
+ UIDNAInfo *pInfo, UErrorCode *pErrorCode) {
+ if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) {
+ return 0;
+ }
+ StringPiece src(name, length<0 ? static_cast<int32_t>(uprv_strlen(name)) : length);
+ CheckedArrayByteSink sink(dest, capacity);
+ IDNAInfo info;
+ reinterpret_cast<const IDNA *>(idna)->nameToUnicodeUTF8(src, sink, info, *pErrorCode);
+ idnaInfoToStruct(info, pInfo);
+ return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode);
+}
+
+#endif // UCONFIG_NO_IDNA
diff --git a/thirdparty/icu4c/common/utypeinfo.h b/thirdparty/icu4c/common/utypeinfo.h
new file mode 100644
index 0000000000..c6663734fc
--- /dev/null
+++ b/thirdparty/icu4c/common/utypeinfo.h
@@ -0,0 +1,32 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2012-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*/
+
+#ifndef __UTYPEINFO_H__
+#define __UTYPEINFO_H__
+
+// Windows header <typeinfo> does not define 'exception' in 'std' namespace.
+// Therefore, a project using ICU cannot be compiled with _HAS_EXCEPTIONS
+// set to 0 on Windows with Visual Studio. To work around that, we have to
+// include <exception> explicitly and add using statement below.
+// Whenever 'typeid' is used, this header has to be included
+// instead of <typeinfo>.
+// Visual Studio 10 emits warning 4275 with this change. If you compile
+// with exception disabled, you have to suppress warning 4275.
+#if defined(_MSC_VER) && _HAS_EXCEPTIONS == 0
+#include <exception>
+using std::exception;
+#endif
+#if defined(__GLIBCXX__)
+namespace std { class type_info; } // WORKAROUND: http://llvm.org/bugs/show_bug.cgi?id=13364
+#endif
+#include <typeinfo> // for 'typeid' to work
+
+#endif
diff --git a/thirdparty/icu4c/common/utypes.cpp b/thirdparty/icu4c/common/utypes.cpp
new file mode 100644
index 0000000000..63e05b1249
--- /dev/null
+++ b/thirdparty/icu4c/common/utypes.cpp
@@ -0,0 +1,227 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* FILE NAME : utypes.c (previously putil.c)
+*
+* Date Name Description
+* 10/07/2004 grhoten split from putil.c
+******************************************************************************
+*/
+
+#include "unicode/utypes.h"
+
+/* u_errorName() ------------------------------------------------------------ */
+
+static const char * const
+_uErrorInfoName[U_ERROR_WARNING_LIMIT-U_ERROR_WARNING_START]={
+ "U_USING_FALLBACK_WARNING",
+ "U_USING_DEFAULT_WARNING",
+ "U_SAFECLONE_ALLOCATED_WARNING",
+ "U_STATE_OLD_WARNING",
+ "U_STRING_NOT_TERMINATED_WARNING",
+ "U_SORT_KEY_TOO_SHORT_WARNING",
+ "U_AMBIGUOUS_ALIAS_WARNING",
+ "U_DIFFERENT_UCA_VERSION",
+ "U_PLUGIN_CHANGED_LEVEL_WARNING",
+};
+
+static const char * const
+_uTransErrorName[U_PARSE_ERROR_LIMIT - U_PARSE_ERROR_START]={
+ "U_BAD_VARIABLE_DEFINITION",
+ "U_MALFORMED_RULE",
+ "U_MALFORMED_SET",
+ "U_MALFORMED_SYMBOL_REFERENCE",
+ "U_MALFORMED_UNICODE_ESCAPE",
+ "U_MALFORMED_VARIABLE_DEFINITION",
+ "U_MALFORMED_VARIABLE_REFERENCE",
+ "U_MISMATCHED_SEGMENT_DELIMITERS",
+ "U_MISPLACED_ANCHOR_START",
+ "U_MISPLACED_CURSOR_OFFSET",
+ "U_MISPLACED_QUANTIFIER",
+ "U_MISSING_OPERATOR",
+ "U_MISSING_SEGMENT_CLOSE",
+ "U_MULTIPLE_ANTE_CONTEXTS",
+ "U_MULTIPLE_CURSORS",
+ "U_MULTIPLE_POST_CONTEXTS",
+ "U_TRAILING_BACKSLASH",
+ "U_UNDEFINED_SEGMENT_REFERENCE",
+ "U_UNDEFINED_VARIABLE",
+ "U_UNQUOTED_SPECIAL",
+ "U_UNTERMINATED_QUOTE",
+ "U_RULE_MASK_ERROR",
+ "U_MISPLACED_COMPOUND_FILTER",
+ "U_MULTIPLE_COMPOUND_FILTERS",
+ "U_INVALID_RBT_SYNTAX",
+ "U_INVALID_PROPERTY_PATTERN",
+ "U_MALFORMED_PRAGMA",
+ "U_UNCLOSED_SEGMENT",
+ "U_ILLEGAL_CHAR_IN_SEGMENT",
+ "U_VARIABLE_RANGE_EXHAUSTED",
+ "U_VARIABLE_RANGE_OVERLAP",
+ "U_ILLEGAL_CHARACTER",
+ "U_INTERNAL_TRANSLITERATOR_ERROR",
+ "U_INVALID_ID",
+ "U_INVALID_FUNCTION"
+};
+
+static const char * const
+_uErrorName[U_STANDARD_ERROR_LIMIT]={
+ "U_ZERO_ERROR",
+
+ "U_ILLEGAL_ARGUMENT_ERROR",
+ "U_MISSING_RESOURCE_ERROR",
+ "U_INVALID_FORMAT_ERROR",
+ "U_FILE_ACCESS_ERROR",
+ "U_INTERNAL_PROGRAM_ERROR",
+ "U_MESSAGE_PARSE_ERROR",
+ "U_MEMORY_ALLOCATION_ERROR",
+ "U_INDEX_OUTOFBOUNDS_ERROR",
+ "U_PARSE_ERROR",
+ "U_INVALID_CHAR_FOUND",
+ "U_TRUNCATED_CHAR_FOUND",
+ "U_ILLEGAL_CHAR_FOUND",
+ "U_INVALID_TABLE_FORMAT",
+ "U_INVALID_TABLE_FILE",
+ "U_BUFFER_OVERFLOW_ERROR",
+ "U_UNSUPPORTED_ERROR",
+ "U_RESOURCE_TYPE_MISMATCH",
+ "U_ILLEGAL_ESCAPE_SEQUENCE",
+ "U_UNSUPPORTED_ESCAPE_SEQUENCE",
+ "U_NO_SPACE_AVAILABLE",
+ "U_CE_NOT_FOUND_ERROR",
+ "U_PRIMARY_TOO_LONG_ERROR",
+ "U_STATE_TOO_OLD_ERROR",
+ "U_TOO_MANY_ALIASES_ERROR",
+ "U_ENUM_OUT_OF_SYNC_ERROR",
+ "U_INVARIANT_CONVERSION_ERROR",
+ "U_INVALID_STATE_ERROR",
+ "U_COLLATOR_VERSION_MISMATCH",
+ "U_USELESS_COLLATOR_ERROR",
+ "U_NO_WRITE_PERMISSION",
+ "U_INPUT_TOO_LONG_ERROR"
+};
+static const char * const
+_uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = {
+ "U_UNEXPECTED_TOKEN",
+ "U_MULTIPLE_DECIMAL_SEPARATORS",
+ "U_MULTIPLE_EXPONENTIAL_SYMBOLS",
+ "U_MALFORMED_EXPONENTIAL_PATTERN",
+ "U_MULTIPLE_PERCENT_SYMBOLS",
+ "U_MULTIPLE_PERMILL_SYMBOLS",
+ "U_MULTIPLE_PAD_SPECIFIERS",
+ "U_PATTERN_SYNTAX_ERROR",
+ "U_ILLEGAL_PAD_POSITION",
+ "U_UNMATCHED_BRACES",
+ "U_UNSUPPORTED_PROPERTY",
+ "U_UNSUPPORTED_ATTRIBUTE",
+ "U_ARGUMENT_TYPE_MISMATCH",
+ "U_DUPLICATE_KEYWORD",
+ "U_UNDEFINED_KEYWORD",
+ "U_DEFAULT_KEYWORD_MISSING",
+ "U_DECIMAL_NUMBER_SYNTAX_ERROR",
+ "U_FORMAT_INEXACT_ERROR",
+ "U_NUMBER_ARG_OUTOFBOUNDS_ERROR",
+ "U_NUMBER_SKELETON_SYNTAX_ERROR",
+};
+
+static const char * const
+_uBrkErrorName[U_BRK_ERROR_LIMIT - U_BRK_ERROR_START] = {
+ "U_BRK_INTERNAL_ERROR",
+ "U_BRK_HEX_DIGITS_EXPECTED",
+ "U_BRK_SEMICOLON_EXPECTED",
+ "U_BRK_RULE_SYNTAX",
+ "U_BRK_UNCLOSED_SET",
+ "U_BRK_ASSIGN_ERROR",
+ "U_BRK_VARIABLE_REDFINITION",
+ "U_BRK_MISMATCHED_PAREN",
+ "U_BRK_NEW_LINE_IN_QUOTED_STRING",
+ "U_BRK_UNDEFINED_VARIABLE",
+ "U_BRK_INIT_ERROR",
+ "U_BRK_RULE_EMPTY_SET",
+ "U_BRK_UNRECOGNIZED_OPTION",
+ "U_BRK_MALFORMED_RULE_TAG"
+};
+
+static const char * const
+_uRegexErrorName[U_REGEX_ERROR_LIMIT - U_REGEX_ERROR_START] = {
+ "U_REGEX_INTERNAL_ERROR",
+ "U_REGEX_RULE_SYNTAX",
+ "U_REGEX_INVALID_STATE",
+ "U_REGEX_BAD_ESCAPE_SEQUENCE",
+ "U_REGEX_PROPERTY_SYNTAX",
+ "U_REGEX_UNIMPLEMENTED",
+ "U_REGEX_MISMATCHED_PAREN",
+ "U_REGEX_NUMBER_TOO_BIG",
+ "U_REGEX_BAD_INTERVAL",
+ "U_REGEX_MAX_LT_MIN",
+ "U_REGEX_INVALID_BACK_REF",
+ "U_REGEX_INVALID_FLAG",
+ "U_REGEX_LOOK_BEHIND_LIMIT",
+ "U_REGEX_SET_CONTAINS_STRING",
+ "U_REGEX_OCTAL_TOO_BIG",
+ "U_REGEX_MISSING_CLOSE_BRACKET",
+ "U_REGEX_INVALID_RANGE",
+ "U_REGEX_STACK_OVERFLOW",
+ "U_REGEX_TIME_OUT",
+ "U_REGEX_STOPPED_BY_CALLER",
+ "U_REGEX_PATTERN_TOO_BIG",
+ "U_REGEX_INVALID_CAPTURE_GROUP_NAME"
+};
+
+static const char * const
+_uIDNAErrorName[U_IDNA_ERROR_LIMIT - U_IDNA_ERROR_START] = {
+ "U_STRINGPREP_PROHIBITED_ERROR",
+ "U_STRINGPREP_UNASSIGNED_ERROR",
+ "U_STRINGPREP_CHECK_BIDI_ERROR",
+ "U_IDNA_STD3_ASCII_RULES_ERROR",
+ "U_IDNA_ACE_PREFIX_ERROR",
+ "U_IDNA_VERIFICATION_ERROR",
+ "U_IDNA_LABEL_TOO_LONG_ERROR",
+ "U_IDNA_ZERO_LENGTH_LABEL_ERROR",
+ "U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR"
+};
+
+static const char * const
+_uPluginErrorName[U_PLUGIN_ERROR_LIMIT - U_PLUGIN_ERROR_START] = {
+ "U_PLUGIN_TOO_HIGH",
+ "U_PLUGIN_DIDNT_SET_LEVEL",
+};
+
+U_CAPI const char * U_EXPORT2
+u_errorName(UErrorCode code) {
+ if(U_ZERO_ERROR <= code && code < U_STANDARD_ERROR_LIMIT) {
+ return _uErrorName[code];
+ } else if(U_ERROR_WARNING_START <= code && code < U_ERROR_WARNING_LIMIT) {
+ return _uErrorInfoName[code - U_ERROR_WARNING_START];
+ } else if(U_PARSE_ERROR_START <= code && code < U_PARSE_ERROR_LIMIT){
+ return _uTransErrorName[code - U_PARSE_ERROR_START];
+ } else if(U_FMT_PARSE_ERROR_START <= code && code < U_FMT_PARSE_ERROR_LIMIT){
+ return _uFmtErrorName[code - U_FMT_PARSE_ERROR_START];
+ } else if (U_BRK_ERROR_START <= code && code < U_BRK_ERROR_LIMIT){
+ return _uBrkErrorName[code - U_BRK_ERROR_START];
+ } else if (U_REGEX_ERROR_START <= code && code < U_REGEX_ERROR_LIMIT) {
+ return _uRegexErrorName[code - U_REGEX_ERROR_START];
+ } else if(U_IDNA_ERROR_START <= code && code < U_IDNA_ERROR_LIMIT) {
+ return _uIDNAErrorName[code - U_IDNA_ERROR_START];
+ } else if(U_PLUGIN_ERROR_START <= code && code < U_PLUGIN_ERROR_LIMIT) {
+ return _uPluginErrorName[code - U_PLUGIN_ERROR_START];
+ } else {
+ return "[BOGUS UErrorCode]";
+ }
+}
+
+/*
+ * Hey, Emacs, please set the following:
+ *
+ * Local Variables:
+ * indent-tabs-mode: nil
+ * End:
+ *
+ */
diff --git a/thirdparty/icu4c/common/uvector.cpp b/thirdparty/icu4c/common/uvector.cpp
new file mode 100644
index 0000000000..cf19edf646
--- /dev/null
+++ b/thirdparty/icu4c/common/uvector.cpp
@@ -0,0 +1,567 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 1999-2013, International Business Machines Corporation and
+* others. All Rights Reserved.
+******************************************************************************
+* Date Name Description
+* 10/22/99 alan Creation.
+**********************************************************************
+*/
+
+#include "uvector.h"
+#include "cmemory.h"
+#include "uarrsort.h"
+#include "uelement.h"
+
+U_NAMESPACE_BEGIN
+
+#define DEFAULT_CAPACITY 8
+
+/*
+ * Constants for hinting whether a key is an integer
+ * or a pointer. If a hint bit is zero, then the associated
+ * token is assumed to be an integer. This is needed for iSeries
+ */
+#define HINT_KEY_POINTER (1)
+#define HINT_KEY_INTEGER (0)
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UVector)
+
+UVector::UVector(UErrorCode &status) :
+ count(0),
+ capacity(0),
+ elements(0),
+ deleter(0),
+ comparer(0)
+{
+ _init(DEFAULT_CAPACITY, status);
+}
+
+UVector::UVector(int32_t initialCapacity, UErrorCode &status) :
+ count(0),
+ capacity(0),
+ elements(0),
+ deleter(0),
+ comparer(0)
+{
+ _init(initialCapacity, status);
+}
+
+UVector::UVector(UObjectDeleter *d, UElementsAreEqual *c, UErrorCode &status) :
+ count(0),
+ capacity(0),
+ elements(0),
+ deleter(d),
+ comparer(c)
+{
+ _init(DEFAULT_CAPACITY, status);
+}
+
+UVector::UVector(UObjectDeleter *d, UElementsAreEqual *c, int32_t initialCapacity, UErrorCode &status) :
+ count(0),
+ capacity(0),
+ elements(0),
+ deleter(d),
+ comparer(c)
+{
+ _init(initialCapacity, status);
+}
+
+void UVector::_init(int32_t initialCapacity, UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ // Fix bogus initialCapacity values; avoid malloc(0) and integer overflow
+ if ((initialCapacity < 1) || (initialCapacity > (int32_t)(INT32_MAX / sizeof(UElement)))) {
+ initialCapacity = DEFAULT_CAPACITY;
+ }
+ elements = (UElement *)uprv_malloc(sizeof(UElement)*initialCapacity);
+ if (elements == 0) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ capacity = initialCapacity;
+ }
+}
+
+UVector::~UVector() {
+ removeAllElements();
+ uprv_free(elements);
+ elements = 0;
+}
+
+/**
+ * Assign this object to another (make this a copy of 'other').
+ * Use the 'assign' function to assign each element.
+ */
+void UVector::assign(const UVector& other, UElementAssigner *assign, UErrorCode &ec) {
+ if (ensureCapacity(other.count, ec)) {
+ setSize(other.count, ec);
+ if (U_SUCCESS(ec)) {
+ for (int32_t i=0; i<other.count; ++i) {
+ if (elements[i].pointer != 0 && deleter != 0) {
+ (*deleter)(elements[i].pointer);
+ }
+ (*assign)(&elements[i], &other.elements[i]);
+ }
+ }
+ }
+}
+
+// This only does something sensible if this object has a non-null comparer
+UBool UVector::operator==(const UVector& other) {
+ int32_t i;
+ if (count != other.count) return FALSE;
+ if (comparer != NULL) {
+ // Compare using this object's comparer
+ for (i=0; i<count; ++i) {
+ if (!(*comparer)(elements[i], other.elements[i])) {
+ return FALSE;
+ }
+ }
+ }
+ return TRUE;
+}
+
+void UVector::addElement(void* obj, UErrorCode &status) {
+ if (ensureCapacity(count + 1, status)) {
+ elements[count++].pointer = obj;
+ }
+}
+
+void UVector::addElement(int32_t elem, UErrorCode &status) {
+ if (ensureCapacity(count + 1, status)) {
+ elements[count].pointer = NULL; // Pointers may be bigger than ints.
+ elements[count].integer = elem;
+ count++;
+ }
+}
+
+void UVector::setElementAt(void* obj, int32_t index) {
+ if (0 <= index && index < count) {
+ if (elements[index].pointer != 0 && deleter != 0) {
+ (*deleter)(elements[index].pointer);
+ }
+ elements[index].pointer = obj;
+ }
+ /* else index out of range */
+}
+
+void UVector::setElementAt(int32_t elem, int32_t index) {
+ if (0 <= index && index < count) {
+ if (elements[index].pointer != 0 && deleter != 0) {
+ // TODO: this should be an error. mixing up ints and pointers.
+ (*deleter)(elements[index].pointer);
+ }
+ elements[index].pointer = NULL;
+ elements[index].integer = elem;
+ }
+ /* else index out of range */
+}
+
+void UVector::insertElementAt(void* obj, int32_t index, UErrorCode &status) {
+ // must have 0 <= index <= count
+ if (0 <= index && index <= count && ensureCapacity(count + 1, status)) {
+ for (int32_t i=count; i>index; --i) {
+ elements[i] = elements[i-1];
+ }
+ elements[index].pointer = obj;
+ ++count;
+ }
+ /* else index out of range */
+}
+
+void UVector::insertElementAt(int32_t elem, int32_t index, UErrorCode &status) {
+ // must have 0 <= index <= count
+ if (0 <= index && index <= count && ensureCapacity(count + 1, status)) {
+ for (int32_t i=count; i>index; --i) {
+ elements[i] = elements[i-1];
+ }
+ elements[index].pointer = NULL;
+ elements[index].integer = elem;
+ ++count;
+ }
+ /* else index out of range */
+}
+
+void* UVector::elementAt(int32_t index) const {
+ return (0 <= index && index < count) ? elements[index].pointer : 0;
+}
+
+int32_t UVector::elementAti(int32_t index) const {
+ return (0 <= index && index < count) ? elements[index].integer : 0;
+}
+
+UBool UVector::containsAll(const UVector& other) const {
+ for (int32_t i=0; i<other.size(); ++i) {
+ if (indexOf(other.elements[i]) < 0) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+UBool UVector::containsNone(const UVector& other) const {
+ for (int32_t i=0; i<other.size(); ++i) {
+ if (indexOf(other.elements[i]) >= 0) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+UBool UVector::removeAll(const UVector& other) {
+ UBool changed = FALSE;
+ for (int32_t i=0; i<other.size(); ++i) {
+ int32_t j = indexOf(other.elements[i]);
+ if (j >= 0) {
+ removeElementAt(j);
+ changed = TRUE;
+ }
+ }
+ return changed;
+}
+
+UBool UVector::retainAll(const UVector& other) {
+ UBool changed = FALSE;
+ for (int32_t j=size()-1; j>=0; --j) {
+ int32_t i = other.indexOf(elements[j]);
+ if (i < 0) {
+ removeElementAt(j);
+ changed = TRUE;
+ }
+ }
+ return changed;
+}
+
+void UVector::removeElementAt(int32_t index) {
+ void* e = orphanElementAt(index);
+ if (e != 0 && deleter != 0) {
+ (*deleter)(e);
+ }
+}
+
+UBool UVector::removeElement(void* obj) {
+ int32_t i = indexOf(obj);
+ if (i >= 0) {
+ removeElementAt(i);
+ return TRUE;
+ }
+ return FALSE;
+}
+
+void UVector::removeAllElements(void) {
+ if (deleter != 0) {
+ for (int32_t i=0; i<count; ++i) {
+ if (elements[i].pointer != 0) {
+ (*deleter)(elements[i].pointer);
+ }
+ }
+ }
+ count = 0;
+}
+
+UBool UVector::equals(const UVector &other) const {
+ int i;
+
+ if (this->count != other.count) {
+ return FALSE;
+ }
+ if (comparer == 0) {
+ for (i=0; i<count; i++) {
+ if (elements[i].pointer != other.elements[i].pointer) {
+ return FALSE;
+ }
+ }
+ } else {
+ UElement key;
+ for (i=0; i<count; i++) {
+ key.pointer = &other.elements[i];
+ if (!(*comparer)(key, elements[i])) {
+ return FALSE;
+ }
+ }
+ }
+ return TRUE;
+}
+
+
+
+int32_t UVector::indexOf(void* obj, int32_t startIndex) const {
+ UElement key;
+ key.pointer = obj;
+ return indexOf(key, startIndex, HINT_KEY_POINTER);
+}
+
+int32_t UVector::indexOf(int32_t obj, int32_t startIndex) const {
+ UElement key;
+ key.integer = obj;
+ return indexOf(key, startIndex, HINT_KEY_INTEGER);
+}
+
+// This only works if this object has a non-null comparer
+int32_t UVector::indexOf(UElement key, int32_t startIndex, int8_t hint) const {
+ int32_t i;
+ if (comparer != 0) {
+ for (i=startIndex; i<count; ++i) {
+ if ((*comparer)(key, elements[i])) {
+ return i;
+ }
+ }
+ } else {
+ for (i=startIndex; i<count; ++i) {
+ /* Pointers are not always the same size as ints so to perform
+ * a valid comparision we need to know whether we are being
+ * provided an int or a pointer. */
+ if (hint & HINT_KEY_POINTER) {
+ if (key.pointer == elements[i].pointer) {
+ return i;
+ }
+ } else {
+ if (key.integer == elements[i].integer) {
+ return i;
+ }
+ }
+ }
+ }
+ return -1;
+}
+
+UBool UVector::ensureCapacity(int32_t minimumCapacity, UErrorCode &status) {
+ if (minimumCapacity < 0) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return FALSE;
+ }
+ if (capacity < minimumCapacity) {
+ if (capacity > (INT32_MAX - 1) / 2) { // integer overflow check
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return FALSE;
+ }
+ int32_t newCap = capacity * 2;
+ if (newCap < minimumCapacity) {
+ newCap = minimumCapacity;
+ }
+ if (newCap > (int32_t)(INT32_MAX / sizeof(UElement))) { // integer overflow check
+ // We keep the original memory contents on bad minimumCapacity.
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return FALSE;
+ }
+ UElement* newElems = (UElement *)uprv_realloc(elements, sizeof(UElement)*newCap);
+ if (newElems == NULL) {
+ // We keep the original contents on the memory failure on realloc or bad minimumCapacity.
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return FALSE;
+ }
+ elements = newElems;
+ capacity = newCap;
+ }
+ return TRUE;
+}
+
+/**
+ * Change the size of this vector as follows: If newSize is smaller,
+ * then truncate the array, possibly deleting held elements for i >=
+ * newSize. If newSize is larger, grow the array, filling in new
+ * slots with NULL.
+ */
+void UVector::setSize(int32_t newSize, UErrorCode &status) {
+ int32_t i;
+ if (newSize < 0) {
+ return;
+ }
+ if (newSize > count) {
+ if (!ensureCapacity(newSize, status)) {
+ return;
+ }
+ UElement empty;
+ empty.pointer = NULL;
+ empty.integer = 0;
+ for (i=count; i<newSize; ++i) {
+ elements[i] = empty;
+ }
+ } else {
+ /* Most efficient to count down */
+ for (i=count-1; i>=newSize; --i) {
+ removeElementAt(i);
+ }
+ }
+ count = newSize;
+}
+
+/**
+ * Fill in the given array with all elements of this vector.
+ */
+void** UVector::toArray(void** result) const {
+ void** a = result;
+ for (int i=0; i<count; ++i) {
+ *a++ = elements[i].pointer;
+ }
+ return result;
+}
+
+UObjectDeleter *UVector::setDeleter(UObjectDeleter *d) {
+ UObjectDeleter *old = deleter;
+ deleter = d;
+ return old;
+}
+
+UElementsAreEqual *UVector::setComparer(UElementsAreEqual *d) {
+ UElementsAreEqual *old = comparer;
+ comparer = d;
+ return old;
+}
+
+/**
+ * Removes the element at the given index from this vector and
+ * transfer ownership of it to the caller. After this call, the
+ * caller owns the result and must delete it and the vector entry
+ * at 'index' is removed, shifting all subsequent entries back by
+ * one index and shortening the size of the vector by one. If the
+ * index is out of range or if there is no item at the given index
+ * then 0 is returned and the vector is unchanged.
+ */
+void* UVector::orphanElementAt(int32_t index) {
+ void* e = 0;
+ if (0 <= index && index < count) {
+ e = elements[index].pointer;
+ for (int32_t i=index; i<count-1; ++i) {
+ elements[i] = elements[i+1];
+ }
+ --count;
+ }
+ /* else index out of range */
+ return e;
+}
+
+/**
+ * Insert the given object into this vector at its sorted position
+ * as defined by 'compare'. The current elements are assumed to
+ * be sorted already.
+ */
+void UVector::sortedInsert(void* obj, UElementComparator *compare, UErrorCode& ec) {
+ UElement e;
+ e.pointer = obj;
+ sortedInsert(e, compare, ec);
+}
+
+/**
+ * Insert the given integer into this vector at its sorted position
+ * as defined by 'compare'. The current elements are assumed to
+ * be sorted already.
+ */
+void UVector::sortedInsert(int32_t obj, UElementComparator *compare, UErrorCode& ec) {
+ UElement e;
+ e.integer = obj;
+ sortedInsert(e, compare, ec);
+}
+
+// ASSUME elements[] IS CURRENTLY SORTED
+void UVector::sortedInsert(UElement e, UElementComparator *compare, UErrorCode& ec) {
+ // Perform a binary search for the location to insert tok at. Tok
+ // will be inserted between two elements a and b such that a <=
+ // tok && tok < b, where there is a 'virtual' elements[-1] always
+ // less than tok and a 'virtual' elements[count] always greater
+ // than tok.
+ int32_t min = 0, max = count;
+ while (min != max) {
+ int32_t probe = (min + max) / 2;
+ int8_t c = (*compare)(elements[probe], e);
+ if (c > 0) {
+ max = probe;
+ } else {
+ // assert(c <= 0);
+ min = probe + 1;
+ }
+ }
+ if (ensureCapacity(count + 1, ec)) {
+ for (int32_t i=count; i>min; --i) {
+ elements[i] = elements[i-1];
+ }
+ elements[min] = e;
+ ++count;
+ }
+}
+
+/**
+ * Array sort comparator function.
+ * Used from UVector::sort()
+ * Conforms to function signature required for uprv_sortArray().
+ * This function is essentially just a wrapper, to make a
+ * UVector style comparator function usable with uprv_sortArray().
+ *
+ * The context pointer to this function is a pointer back
+ * (with some extra indirection) to the user supplied comparator.
+ *
+ */
+static int32_t U_CALLCONV
+sortComparator(const void *context, const void *left, const void *right) {
+ UElementComparator *compare = *static_cast<UElementComparator * const *>(context);
+ UElement e1 = *static_cast<const UElement *>(left);
+ UElement e2 = *static_cast<const UElement *>(right);
+ int32_t result = (*compare)(e1, e2);
+ return result;
+}
+
+
+/**
+ * Array sort comparison function for use from UVector::sorti()
+ * Compares int32_t vector elements.
+ */
+static int32_t U_CALLCONV
+sortiComparator(const void * /*context */, const void *left, const void *right) {
+ const UElement *e1 = static_cast<const UElement *>(left);
+ const UElement *e2 = static_cast<const UElement *>(right);
+ int32_t result = e1->integer < e2->integer? -1 :
+ e1->integer == e2->integer? 0 : 1;
+ return result;
+}
+
+/**
+ * Sort the vector, assuming it constains ints.
+ * (A more general sort would take a comparison function, but it's
+ * not clear whether UVector's UElementComparator or
+ * UComparator from uprv_sortAray would be more appropriate.)
+ */
+void UVector::sorti(UErrorCode &ec) {
+ if (U_SUCCESS(ec)) {
+ uprv_sortArray(elements, count, sizeof(UElement),
+ sortiComparator, NULL, FALSE, &ec);
+ }
+}
+
+
+/**
+ * Sort with a user supplied comparator.
+ *
+ * The comparator function handling is confusing because the function type
+ * for UVector (as defined for sortedInsert()) is different from the signature
+ * required by uprv_sortArray(). This is handled by passing the
+ * the UVector sort function pointer via the context pointer to a
+ * sortArray() comparator function, which can then call back to
+ * the original user functtion.
+ *
+ * An additional twist is that it's not safe to pass a pointer-to-function
+ * as a (void *) data pointer, so instead we pass a (data) pointer to a
+ * pointer-to-function variable.
+ */
+void UVector::sort(UElementComparator *compare, UErrorCode &ec) {
+ if (U_SUCCESS(ec)) {
+ uprv_sortArray(elements, count, sizeof(UElement),
+ sortComparator, &compare, FALSE, &ec);
+ }
+}
+
+
+/**
+ * Stable sort with a user supplied comparator of type UComparator.
+ */
+void UVector::sortWithUComparator(UComparator *compare, const void *context, UErrorCode &ec) {
+ if (U_SUCCESS(ec)) {
+ uprv_sortArray(elements, count, sizeof(UElement),
+ compare, context, TRUE, &ec);
+ }
+}
+
+U_NAMESPACE_END
+
diff --git a/thirdparty/icu4c/common/uvector.h b/thirdparty/icu4c/common/uvector.h
new file mode 100644
index 0000000000..a2bef923af
--- /dev/null
+++ b/thirdparty/icu4c/common/uvector.h
@@ -0,0 +1,415 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1999-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Date Name Description
+* 10/22/99 alan Creation. This is an internal header.
+* It should not be exported.
+**********************************************************************
+*/
+
+#ifndef UVECTOR_H
+#define UVECTOR_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "cmemory.h"
+#include "uarrsort.h"
+#include "uelement.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * <p>Ultralightweight C++ implementation of a <tt>void*</tt> vector
+ * that is (mostly) compatible with java.util.Vector.
+ *
+ * <p>This is a very simple implementation, written to satisfy an
+ * immediate porting need. As such, it is not completely fleshed out,
+ * and it aims for simplicity and conformity. Nonetheless, it serves
+ * its purpose (porting code from java that uses java.util.Vector)
+ * well, and it could be easily made into a more robust vector class.
+ *
+ * <p><b>Design notes</b>
+ *
+ * <p>There is index bounds checking, but little is done about it. If
+ * indices are out of bounds, either nothing happens, or zero is
+ * returned. We <em>do</em> avoid indexing off into the weeds.
+ *
+ * <p>There is detection of out of memory, but the handling is very
+ * coarse-grained -- similar to UnicodeString's protocol, but even
+ * coarser. The class contains <em>one static flag</em> that is set
+ * when any call to <tt>new</tt> returns zero. This allows the caller
+ * to use several vectors and make just one check at the end to see if
+ * a memory failure occurred. This is more efficient than making a
+ * check after each call on each vector when doing many operations on
+ * multiple vectors. The single static flag works best when memory
+ * failures are infrequent, and when recovery options are limited or
+ * nonexistent.
+ *
+ * <p>Since we don't have garbage collection, UVector was given the
+ * option to <em>own</em>its contents. To employ this, set a deleter
+ * function. The deleter is called on a void* pointer when that
+ * pointer is released by the vector, either when the vector itself is
+ * destructed, or when a call to setElementAt() overwrites an element,
+ * or when a call to remove() or one of its variants explicitly
+ * removes an element. If no deleter is set, or the deleter is set to
+ * zero, then it is assumed that the caller will delete elements as
+ * needed.
+ *
+ * <p>In order to implement methods such as contains() and indexOf(),
+ * UVector needs a way to compare objects for equality. To do so, it
+ * uses a comparison function, or "comparer." If the comparer is not
+ * set, or is set to zero, then all such methods will act as if the
+ * vector contains no element. That is, indexOf() will always return
+ * -1, contains() will always return false, etc.
+ *
+ * <p><b>To do</b>
+ *
+ * <p>Improve the handling of index out of bounds errors.
+ *
+ * @author Alan Liu
+ */
+class U_COMMON_API UVector : public UObject {
+ // NOTE: UVector uses the UHashKey (union of void* and int32_t) as
+ // its basic storage type. It uses UElementsAreEqual as its
+ // comparison function. It uses UObjectDeleter as its deleter
+ // function. These are named for hashtables, but used here as-is
+ // rather than duplicating the type. This allows sharing of
+ // support functions.
+
+private:
+ int32_t count;
+
+ int32_t capacity;
+
+ UElement* elements;
+
+ UObjectDeleter *deleter;
+
+ UElementsAreEqual *comparer;
+
+public:
+ UVector(UErrorCode &status);
+
+ UVector(int32_t initialCapacity, UErrorCode &status);
+
+ UVector(UObjectDeleter *d, UElementsAreEqual *c, UErrorCode &status);
+
+ UVector(UObjectDeleter *d, UElementsAreEqual *c, int32_t initialCapacity, UErrorCode &status);
+
+ virtual ~UVector();
+
+ /**
+ * Assign this object to another (make this a copy of 'other').
+ * Use the 'assign' function to assign each element.
+ */
+ void assign(const UVector& other, UElementAssigner *assign, UErrorCode &ec);
+
+ /**
+ * Compare this vector with another. They will be considered
+ * equal if they are of the same size and all elements are equal,
+ * as compared using this object's comparer.
+ */
+ UBool operator==(const UVector& other);
+
+ /**
+ * Equivalent to !operator==()
+ */
+ inline UBool operator!=(const UVector& other);
+
+ //------------------------------------------------------------
+ // java.util.Vector API
+ //------------------------------------------------------------
+
+ void addElement(void* obj, UErrorCode &status);
+
+ void addElement(int32_t elem, UErrorCode &status);
+
+ void setElementAt(void* obj, int32_t index);
+
+ void setElementAt(int32_t elem, int32_t index);
+
+ void insertElementAt(void* obj, int32_t index, UErrorCode &status);
+
+ void insertElementAt(int32_t elem, int32_t index, UErrorCode &status);
+
+ void* elementAt(int32_t index) const;
+
+ int32_t elementAti(int32_t index) const;
+
+ UBool equals(const UVector &other) const;
+
+ inline void* firstElement(void) const;
+
+ inline void* lastElement(void) const;
+
+ inline int32_t lastElementi(void) const;
+
+ int32_t indexOf(void* obj, int32_t startIndex = 0) const;
+
+ int32_t indexOf(int32_t obj, int32_t startIndex = 0) const;
+
+ inline UBool contains(void* obj) const;
+
+ inline UBool contains(int32_t obj) const;
+
+ UBool containsAll(const UVector& other) const;
+
+ UBool removeAll(const UVector& other);
+
+ UBool retainAll(const UVector& other);
+
+ void removeElementAt(int32_t index);
+
+ UBool removeElement(void* obj);
+
+ void removeAllElements();
+
+ inline int32_t size(void) const;
+
+ inline UBool isEmpty(void) const;
+
+ UBool ensureCapacity(int32_t minimumCapacity, UErrorCode &status);
+
+ /**
+ * Change the size of this vector as follows: If newSize is
+ * smaller, then truncate the array, possibly deleting held
+ * elements for i >= newSize. If newSize is larger, grow the
+ * array, filling in new slots with NULL.
+ */
+ void setSize(int32_t newSize, UErrorCode &status);
+
+ /**
+ * Fill in the given array with all elements of this vector.
+ */
+ void** toArray(void** result) const;
+
+ //------------------------------------------------------------
+ // New API
+ //------------------------------------------------------------
+
+ UObjectDeleter *setDeleter(UObjectDeleter *d);
+
+ UElementsAreEqual *setComparer(UElementsAreEqual *c);
+
+ inline void* operator[](int32_t index) const;
+
+ /**
+ * Removes the element at the given index from this vector and
+ * transfer ownership of it to the caller. After this call, the
+ * caller owns the result and must delete it and the vector entry
+ * at 'index' is removed, shifting all subsequent entries back by
+ * one index and shortening the size of the vector by one. If the
+ * index is out of range or if there is no item at the given index
+ * then 0 is returned and the vector is unchanged.
+ */
+ void* orphanElementAt(int32_t index);
+
+ /**
+ * Returns true if this vector contains none of the elements
+ * of the given vector.
+ * @param other vector to be checked for containment
+ * @return true if the test condition is met
+ */
+ UBool containsNone(const UVector& other) const;
+
+ /**
+ * Insert the given object into this vector at its sorted position
+ * as defined by 'compare'. The current elements are assumed to
+ * be sorted already.
+ */
+ void sortedInsert(void* obj, UElementComparator *compare, UErrorCode& ec);
+
+ /**
+ * Insert the given integer into this vector at its sorted position
+ * as defined by 'compare'. The current elements are assumed to
+ * be sorted already.
+ */
+ void sortedInsert(int32_t obj, UElementComparator *compare, UErrorCode& ec);
+
+ /**
+ * Sort the contents of the vector, assuming that the contents of the
+ * vector are of type int32_t.
+ */
+ void sorti(UErrorCode &ec);
+
+ /**
+ * Sort the contents of this vector, using a caller-supplied function
+ * to do the comparisons. (It's confusing that
+ * UVector's UElementComparator function is different from the
+ * UComparator function type defined in uarrsort.h)
+ */
+ void sort(UElementComparator *compare, UErrorCode &ec);
+
+ /**
+ * Stable sort the contents of this vector using a caller-supplied function
+ * of type UComparator to do the comparison. Provides more flexibility
+ * than UVector::sort() because an additional user parameter can be passed to
+ * the comparison function.
+ */
+ void sortWithUComparator(UComparator *compare, const void *context, UErrorCode &ec);
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ */
+ virtual UClassID getDynamicClassID() const;
+
+private:
+ void _init(int32_t initialCapacity, UErrorCode &status);
+
+ int32_t indexOf(UElement key, int32_t startIndex = 0, int8_t hint = 0) const;
+
+ void sortedInsert(UElement e, UElementComparator *compare, UErrorCode& ec);
+
+ // Disallow
+ UVector(const UVector&);
+
+ // Disallow
+ UVector& operator=(const UVector&);
+
+};
+
+
+/**
+ * <p>Ultralightweight C++ implementation of a <tt>void*</tt> stack
+ * that is (mostly) compatible with java.util.Stack. As in java, this
+ * is merely a paper thin layer around UVector. See the UVector
+ * documentation for further information.
+ *
+ * <p><b>Design notes</b>
+ *
+ * <p>The element at index <tt>n-1</tt> is (of course) the top of the
+ * stack.
+ *
+ * <p>The poorly named <tt>empty()</tt> method doesn't empty the
+ * stack; it determines if the stack is empty.
+ *
+ * @author Alan Liu
+ */
+class U_COMMON_API UStack : public UVector {
+public:
+ UStack(UErrorCode &status);
+
+ UStack(int32_t initialCapacity, UErrorCode &status);
+
+ UStack(UObjectDeleter *d, UElementsAreEqual *c, UErrorCode &status);
+
+ UStack(UObjectDeleter *d, UElementsAreEqual *c, int32_t initialCapacity, UErrorCode &status);
+
+ virtual ~UStack();
+
+ // It's okay not to have a virtual destructor (in UVector)
+ // because UStack has no special cleanup to do.
+
+ inline UBool empty(void) const;
+
+ inline void* peek(void) const;
+
+ inline int32_t peeki(void) const;
+
+ void* pop(void);
+
+ int32_t popi(void);
+
+ inline void* push(void* obj, UErrorCode &status);
+
+ inline int32_t push(int32_t i, UErrorCode &status);
+
+ /*
+ If the object o occurs as an item in this stack,
+ this method returns the 1-based distance from the top of the stack.
+ */
+ int32_t search(void* obj) const;
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ */
+ virtual UClassID getDynamicClassID() const;
+
+private:
+ // Disallow
+ UStack(const UStack&);
+
+ // Disallow
+ UStack& operator=(const UStack&);
+};
+
+
+// UVector inlines
+
+inline int32_t UVector::size(void) const {
+ return count;
+}
+
+inline UBool UVector::isEmpty(void) const {
+ return count == 0;
+}
+
+inline UBool UVector::contains(void* obj) const {
+ return indexOf(obj) >= 0;
+}
+
+inline UBool UVector::contains(int32_t obj) const {
+ return indexOf(obj) >= 0;
+}
+
+inline void* UVector::firstElement(void) const {
+ return elementAt(0);
+}
+
+inline void* UVector::lastElement(void) const {
+ return elementAt(count-1);
+}
+
+inline int32_t UVector::lastElementi(void) const {
+ return elementAti(count-1);
+}
+
+inline void* UVector::operator[](int32_t index) const {
+ return elementAt(index);
+}
+
+inline UBool UVector::operator!=(const UVector& other) {
+ return !operator==(other);
+}
+
+// UStack inlines
+
+inline UBool UStack::empty(void) const {
+ return isEmpty();
+}
+
+inline void* UStack::peek(void) const {
+ return lastElement();
+}
+
+inline int32_t UStack::peeki(void) const {
+ return lastElementi();
+}
+
+inline void* UStack::push(void* obj, UErrorCode &status) {
+ addElement(obj, status);
+ return obj;
+}
+
+inline int32_t UStack::push(int32_t i, UErrorCode &status) {
+ addElement(i, status);
+ return i;
+}
+
+U_NAMESPACE_END
+
+#endif
diff --git a/thirdparty/icu4c/common/uvectr32.cpp b/thirdparty/icu4c/common/uvectr32.cpp
new file mode 100644
index 0000000000..d1ae659958
--- /dev/null
+++ b/thirdparty/icu4c/common/uvectr32.cpp
@@ -0,0 +1,335 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 1999-2015, International Business Machines Corporation and
+* others. All Rights Reserved.
+******************************************************************************
+* Date Name Description
+* 10/22/99 alan Creation.
+**********************************************************************
+*/
+
+#include "uvectr32.h"
+#include "cmemory.h"
+#include "putilimp.h"
+
+U_NAMESPACE_BEGIN
+
+#define DEFAULT_CAPACITY 8
+
+/*
+ * Constants for hinting whether a key is an integer
+ * or a pointer. If a hint bit is zero, then the associated
+ * token is assumed to be an integer. This is needed for iSeries
+ */
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UVector32)
+
+UVector32::UVector32(UErrorCode &status) :
+ count(0),
+ capacity(0),
+ maxCapacity(0),
+ elements(NULL)
+{
+ _init(DEFAULT_CAPACITY, status);
+}
+
+UVector32::UVector32(int32_t initialCapacity, UErrorCode &status) :
+ count(0),
+ capacity(0),
+ maxCapacity(0),
+ elements(0)
+{
+ _init(initialCapacity, status);
+}
+
+
+
+void UVector32::_init(int32_t initialCapacity, UErrorCode &status) {
+ // Fix bogus initialCapacity values; avoid malloc(0)
+ if (initialCapacity < 1) {
+ initialCapacity = DEFAULT_CAPACITY;
+ }
+ if (maxCapacity>0 && maxCapacity<initialCapacity) {
+ initialCapacity = maxCapacity;
+ }
+ if (initialCapacity > (int32_t)(INT32_MAX / sizeof(int32_t))) {
+ initialCapacity = uprv_min(DEFAULT_CAPACITY, maxCapacity);
+ }
+ elements = (int32_t *)uprv_malloc(sizeof(int32_t)*initialCapacity);
+ if (elements == 0) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ capacity = initialCapacity;
+ }
+}
+
+UVector32::~UVector32() {
+ uprv_free(elements);
+ elements = 0;
+}
+
+/**
+ * Assign this object to another (make this a copy of 'other').
+ */
+void UVector32::assign(const UVector32& other, UErrorCode &ec) {
+ if (ensureCapacity(other.count, ec)) {
+ setSize(other.count);
+ for (int32_t i=0; i<other.count; ++i) {
+ elements[i] = other.elements[i];
+ }
+ }
+}
+
+
+UBool UVector32::operator==(const UVector32& other) {
+ int32_t i;
+ if (count != other.count) return FALSE;
+ for (i=0; i<count; ++i) {
+ if (elements[i] != other.elements[i]) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+
+void UVector32::setElementAt(int32_t elem, int32_t index) {
+ if (0 <= index && index < count) {
+ elements[index] = elem;
+ }
+ /* else index out of range */
+}
+
+void UVector32::insertElementAt(int32_t elem, int32_t index, UErrorCode &status) {
+ // must have 0 <= index <= count
+ if (0 <= index && index <= count && ensureCapacity(count + 1, status)) {
+ for (int32_t i=count; i>index; --i) {
+ elements[i] = elements[i-1];
+ }
+ elements[index] = elem;
+ ++count;
+ }
+ /* else index out of range */
+}
+
+UBool UVector32::containsAll(const UVector32& other) const {
+ for (int32_t i=0; i<other.size(); ++i) {
+ if (indexOf(other.elements[i]) < 0) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+UBool UVector32::containsNone(const UVector32& other) const {
+ for (int32_t i=0; i<other.size(); ++i) {
+ if (indexOf(other.elements[i]) >= 0) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+UBool UVector32::removeAll(const UVector32& other) {
+ UBool changed = FALSE;
+ for (int32_t i=0; i<other.size(); ++i) {
+ int32_t j = indexOf(other.elements[i]);
+ if (j >= 0) {
+ removeElementAt(j);
+ changed = TRUE;
+ }
+ }
+ return changed;
+}
+
+UBool UVector32::retainAll(const UVector32& other) {
+ UBool changed = FALSE;
+ for (int32_t j=size()-1; j>=0; --j) {
+ int32_t i = other.indexOf(elements[j]);
+ if (i < 0) {
+ removeElementAt(j);
+ changed = TRUE;
+ }
+ }
+ return changed;
+}
+
+void UVector32::removeElementAt(int32_t index) {
+ if (index >= 0) {
+ for (int32_t i=index; i<count-1; ++i) {
+ elements[i] = elements[i+1];
+ }
+ --count;
+ }
+}
+
+void UVector32::removeAllElements(void) {
+ count = 0;
+}
+
+UBool UVector32::equals(const UVector32 &other) const {
+ int i;
+
+ if (this->count != other.count) {
+ return FALSE;
+ }
+ for (i=0; i<count; i++) {
+ if (elements[i] != other.elements[i]) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+
+
+
+int32_t UVector32::indexOf(int32_t key, int32_t startIndex) const {
+ int32_t i;
+ for (i=startIndex; i<count; ++i) {
+ if (key == elements[i]) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+
+UBool UVector32::expandCapacity(int32_t minimumCapacity, UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return FALSE;
+ }
+ if (minimumCapacity < 0) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return FALSE;
+ }
+ if (capacity >= minimumCapacity) {
+ return TRUE;
+ }
+ if (maxCapacity>0 && minimumCapacity>maxCapacity) {
+ status = U_BUFFER_OVERFLOW_ERROR;
+ return FALSE;
+ }
+ if (capacity > (INT32_MAX - 1) / 2) { // integer overflow check
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return FALSE;
+ }
+ int32_t newCap = capacity * 2;
+ if (newCap < minimumCapacity) {
+ newCap = minimumCapacity;
+ }
+ if (maxCapacity > 0 && newCap > maxCapacity) {
+ newCap = maxCapacity;
+ }
+ if (newCap > (int32_t)(INT32_MAX / sizeof(int32_t))) { // integer overflow check
+ // We keep the original memory contents on bad minimumCapacity/maxCapacity.
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return FALSE;
+ }
+ int32_t* newElems = (int32_t *)uprv_realloc(elements, sizeof(int32_t)*newCap);
+ if (newElems == NULL) {
+ // We keep the original contents on the memory failure on realloc.
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return FALSE;
+ }
+ elements = newElems;
+ capacity = newCap;
+ return TRUE;
+}
+
+void UVector32::setMaxCapacity(int32_t limit) {
+ U_ASSERT(limit >= 0);
+ if (limit < 0) {
+ limit = 0;
+ }
+ if (limit > (int32_t)(INT32_MAX / sizeof(int32_t))) { // integer overflow check for realloc
+ // Something is very wrong, don't realloc, leave capacity and maxCapacity unchanged
+ return;
+ }
+ maxCapacity = limit;
+ if (capacity <= maxCapacity || maxCapacity == 0) {
+ // Current capacity is within the new limit.
+ return;
+ }
+
+ // New maximum capacity is smaller than the current size.
+ // Realloc the storage to the new, smaller size.
+ int32_t* newElems = (int32_t *)uprv_realloc(elements, sizeof(int32_t)*maxCapacity);
+ if (newElems == NULL) {
+ // Realloc to smaller failed.
+ // Just keep what we had. No need to call it a failure.
+ return;
+ }
+ elements = newElems;
+ capacity = maxCapacity;
+ if (count > capacity) {
+ count = capacity;
+ }
+}
+
+/**
+ * Change the size of this vector as follows: If newSize is smaller,
+ * then truncate the array, possibly deleting held elements for i >=
+ * newSize. If newSize is larger, grow the array, filling in new
+ * slots with NULL.
+ */
+void UVector32::setSize(int32_t newSize) {
+ int32_t i;
+ if (newSize < 0) {
+ return;
+ }
+ if (newSize > count) {
+ UErrorCode ec = U_ZERO_ERROR;
+ if (!ensureCapacity(newSize, ec)) {
+ return;
+ }
+ for (i=count; i<newSize; ++i) {
+ elements[i] = 0;
+ }
+ }
+ count = newSize;
+}
+
+
+
+
+/**
+ * Insert the given integer into this vector at its sorted position
+ * as defined by 'compare'. The current elements are assumed to
+ * be sorted already.
+ */
+void UVector32::sortedInsert(int32_t tok, UErrorCode& ec) {
+ // Perform a binary search for the location to insert tok at. Tok
+ // will be inserted between two elements a and b such that a <=
+ // tok && tok < b, where there is a 'virtual' elements[-1] always
+ // less than tok and a 'virtual' elements[count] always greater
+ // than tok.
+ int32_t min = 0, max = count;
+ while (min != max) {
+ int32_t probe = (min + max) / 2;
+ //int8_t c = (*compare)(elements[probe], tok);
+ //if (c > 0) {
+ if (elements[probe] > tok) {
+ max = probe;
+ } else {
+ // assert(c <= 0);
+ min = probe + 1;
+ }
+ }
+ if (ensureCapacity(count + 1, ec)) {
+ for (int32_t i=count; i>min; --i) {
+ elements[i] = elements[i-1];
+ }
+ elements[min] = tok;
+ ++count;
+ }
+}
+
+
+
+
+
+U_NAMESPACE_END
+
diff --git a/thirdparty/icu4c/common/uvectr32.h b/thirdparty/icu4c/common/uvectr32.h
new file mode 100644
index 0000000000..0d81dfb5c1
--- /dev/null
+++ b/thirdparty/icu4c/common/uvectr32.h
@@ -0,0 +1,306 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1999-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+
+//
+// UVector32 is a class implementing a vector of 32 bit integers.
+// It is similar to UVector, but holds int32_t values rather than pointers.
+// Most of the code is unchanged from UVector.
+//
+
+#ifndef UVECTOR32_H
+#define UVECTOR32_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "uhash.h"
+#include "uassert.h"
+
+U_NAMESPACE_BEGIN
+
+
+
+/**
+ * <p>Ultralightweight C++ implementation of a <tt>void*</tt> vector
+ * that is (mostly) compatible with java.util.Vector.
+ *
+ * <p>This is a very simple implementation, written to satisfy an
+ * immediate porting need. As such, it is not completely fleshed out,
+ * and it aims for simplicity and conformity. Nonetheless, it serves
+ * its purpose (porting code from java that uses java.util.Vector)
+ * well, and it could be easily made into a more robust vector class.
+ *
+ * <p><b>Design notes</b>
+ *
+ * <p>There is index bounds checking, but little is done about it. If
+ * indices are out of bounds, either nothing happens, or zero is
+ * returned. We <em>do</em> avoid indexing off into the weeds.
+ *
+ * <p>There is detection of out of memory, but the handling is very
+ * coarse-grained -- similar to UnicodeString's protocol, but even
+ * coarser. The class contains <em>one static flag</em> that is set
+ * when any call to <tt>new</tt> returns zero. This allows the caller
+ * to use several vectors and make just one check at the end to see if
+ * a memory failure occurred. This is more efficient than making a
+ * check after each call on each vector when doing many operations on
+ * multiple vectors. The single static flag works best when memory
+ * failures are infrequent, and when recovery options are limited or
+ * nonexistent.
+ *
+ * <p><b>To do</b>
+ *
+ * <p>Improve the handling of index out of bounds errors.
+ *
+ * @author Alan Liu
+ */
+class U_COMMON_API UVector32 : public UObject {
+private:
+ int32_t count;
+
+ int32_t capacity;
+
+ int32_t maxCapacity; // Limit beyond which capacity is not permitted to grow.
+
+ int32_t* elements;
+
+public:
+ UVector32(UErrorCode &status);
+
+ UVector32(int32_t initialCapacity, UErrorCode &status);
+
+ virtual ~UVector32();
+
+ /**
+ * Assign this object to another (make this a copy of 'other').
+ * Use the 'assign' function to assign each element.
+ */
+ void assign(const UVector32& other, UErrorCode &ec);
+
+ /**
+ * Compare this vector with another. They will be considered
+ * equal if they are of the same size and all elements are equal,
+ * as compared using this object's comparer.
+ */
+ UBool operator==(const UVector32& other);
+
+ /**
+ * Equivalent to !operator==()
+ */
+ inline UBool operator!=(const UVector32& other);
+
+ //------------------------------------------------------------
+ // java.util.Vector API
+ //------------------------------------------------------------
+
+ inline void addElement(int32_t elem, UErrorCode &status);
+
+ void setElementAt(int32_t elem, int32_t index);
+
+ void insertElementAt(int32_t elem, int32_t index, UErrorCode &status);
+
+ inline int32_t elementAti(int32_t index) const;
+
+ UBool equals(const UVector32 &other) const;
+
+ inline int32_t lastElementi(void) const;
+
+ int32_t indexOf(int32_t elem, int32_t startIndex = 0) const;
+
+ inline UBool contains(int32_t elem) const;
+
+ UBool containsAll(const UVector32& other) const;
+
+ UBool removeAll(const UVector32& other);
+
+ UBool retainAll(const UVector32& other);
+
+ void removeElementAt(int32_t index);
+
+ void removeAllElements();
+
+ inline int32_t size(void) const;
+
+ inline UBool isEmpty(void) const;
+
+ // Inline. Use this one for speedy size check.
+ inline UBool ensureCapacity(int32_t minimumCapacity, UErrorCode &status);
+
+ // Out-of-line, handles actual growth. Called by ensureCapacity() when necessary.
+ UBool expandCapacity(int32_t minimumCapacity, UErrorCode &status);
+
+ /**
+ * Change the size of this vector as follows: If newSize is
+ * smaller, then truncate the array, possibly deleting held
+ * elements for i >= newSize. If newSize is larger, grow the
+ * array, filling in new slows with zero.
+ */
+ void setSize(int32_t newSize);
+
+ //------------------------------------------------------------
+ // New API
+ //------------------------------------------------------------
+
+ /**
+ * Returns true if this vector contains none of the elements
+ * of the given vector.
+ * @param other vector to be checked for containment
+ * @return true if the test condition is met
+ */
+ UBool containsNone(const UVector32& other) const;
+
+
+ /**
+ * Insert the given integer into this vector at its sorted position.
+ * The current elements are assumed to be sorted already.
+ */
+ void sortedInsert(int32_t elem, UErrorCode& ec);
+
+ /**
+ * Returns a pointer to the internal array holding the vector.
+ */
+ inline int32_t *getBuffer() const;
+
+ /**
+ * Set the maximum allowed buffer capacity for this vector/stack.
+ * Default with no limit set is unlimited, go until malloc() fails.
+ * A Limit of zero means unlimited capacity.
+ * Units are vector elements (32 bits each), not bytes.
+ */
+ void setMaxCapacity(int32_t limit);
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ */
+ virtual UClassID getDynamicClassID() const;
+
+private:
+ void _init(int32_t initialCapacity, UErrorCode &status);
+
+ // Disallow
+ UVector32(const UVector32&);
+
+ // Disallow
+ UVector32& operator=(const UVector32&);
+
+
+ // API Functions for Stack operations.
+ // In the original UVector, these were in a separate derived class, UStack.
+ // Here in UVector32, they are all together.
+public:
+ inline UBool empty(void) const; // TODO: redundant, same as empty(). Remove it?
+
+ inline int32_t peeki(void) const;
+
+ inline int32_t popi(void);
+
+ inline int32_t push(int32_t i, UErrorCode &status);
+
+ inline int32_t *reserveBlock(int32_t size, UErrorCode &status);
+ inline int32_t *popFrame(int32_t size);
+};
+
+
+// UVector32 inlines
+
+inline UBool UVector32::ensureCapacity(int32_t minimumCapacity, UErrorCode &status) {
+ if ((minimumCapacity >= 0) && (capacity >= minimumCapacity)) {
+ return true;
+ } else {
+ return expandCapacity(minimumCapacity, status);
+ }
+}
+
+inline int32_t UVector32::elementAti(int32_t index) const {
+ return (index >= 0 && count > 0 && count - index > 0) ? elements[index] : 0;
+}
+
+
+inline void UVector32::addElement(int32_t elem, UErrorCode &status) {
+ if (ensureCapacity(count + 1, status)) {
+ elements[count] = elem;
+ count++;
+ }
+}
+
+inline int32_t *UVector32::reserveBlock(int32_t size, UErrorCode &status) {
+ if (ensureCapacity(count+size, status) == false) {
+ return NULL;
+ }
+ int32_t *rp = elements+count;
+ count += size;
+ return rp;
+}
+
+inline int32_t *UVector32::popFrame(int32_t size) {
+ U_ASSERT(count >= size);
+ count -= size;
+ if (count < 0) {
+ count = 0;
+ }
+ return elements+count-size;
+}
+
+
+
+inline int32_t UVector32::size(void) const {
+ return count;
+}
+
+inline UBool UVector32::isEmpty(void) const {
+ return count == 0;
+}
+
+inline UBool UVector32::contains(int32_t obj) const {
+ return indexOf(obj) >= 0;
+}
+
+inline int32_t UVector32::lastElementi(void) const {
+ return elementAti(count-1);
+}
+
+inline UBool UVector32::operator!=(const UVector32& other) {
+ return !operator==(other);
+}
+
+inline int32_t *UVector32::getBuffer() const {
+ return elements;
+}
+
+
+// UStack inlines
+
+inline UBool UVector32::empty(void) const {
+ return isEmpty();
+}
+
+inline int32_t UVector32::peeki(void) const {
+ return lastElementi();
+}
+
+inline int32_t UVector32::push(int32_t i, UErrorCode &status) {
+ addElement(i, status);
+ return i;
+}
+
+inline int32_t UVector32::popi(void) {
+ int32_t result = 0;
+ if (count > 0) {
+ count--;
+ result = elements[count];
+ }
+ return result;
+}
+
+U_NAMESPACE_END
+
+#endif
diff --git a/thirdparty/icu4c/common/uvectr64.cpp b/thirdparty/icu4c/common/uvectr64.cpp
new file mode 100644
index 0000000000..081565959c
--- /dev/null
+++ b/thirdparty/icu4c/common/uvectr64.cpp
@@ -0,0 +1,214 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 1999-2015, International Business Machines Corporation and
+* others. All Rights Reserved.
+******************************************************************************
+*/
+
+#include "uvectr64.h"
+#include "cmemory.h"
+#include "putilimp.h"
+
+U_NAMESPACE_BEGIN
+
+#define DEFAULT_CAPACITY 8
+
+/*
+ * Constants for hinting whether a key is an integer
+ * or a pointer. If a hint bit is zero, then the associated
+ * token is assumed to be an integer. This is needed for iSeries
+ */
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UVector64)
+
+UVector64::UVector64(UErrorCode &status) :
+ count(0),
+ capacity(0),
+ maxCapacity(0),
+ elements(NULL)
+{
+ _init(DEFAULT_CAPACITY, status);
+}
+
+UVector64::UVector64(int32_t initialCapacity, UErrorCode &status) :
+ count(0),
+ capacity(0),
+ maxCapacity(0),
+ elements(0)
+{
+ _init(initialCapacity, status);
+}
+
+
+
+void UVector64::_init(int32_t initialCapacity, UErrorCode &status) {
+ // Fix bogus initialCapacity values; avoid malloc(0)
+ if (initialCapacity < 1) {
+ initialCapacity = DEFAULT_CAPACITY;
+ }
+ if (maxCapacity>0 && maxCapacity<initialCapacity) {
+ initialCapacity = maxCapacity;
+ }
+ if (initialCapacity > (int32_t)(INT32_MAX / sizeof(int64_t))) {
+ initialCapacity = uprv_min(DEFAULT_CAPACITY, maxCapacity);
+ }
+ elements = (int64_t *)uprv_malloc(sizeof(int64_t)*initialCapacity);
+ if (elements == 0) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ capacity = initialCapacity;
+ }
+}
+
+UVector64::~UVector64() {
+ uprv_free(elements);
+ elements = 0;
+}
+
+/**
+ * Assign this object to another (make this a copy of 'other').
+ */
+void UVector64::assign(const UVector64& other, UErrorCode &ec) {
+ if (ensureCapacity(other.count, ec)) {
+ setSize(other.count);
+ for (int32_t i=0; i<other.count; ++i) {
+ elements[i] = other.elements[i];
+ }
+ }
+}
+
+
+UBool UVector64::operator==(const UVector64& other) {
+ int32_t i;
+ if (count != other.count) return FALSE;
+ for (i=0; i<count; ++i) {
+ if (elements[i] != other.elements[i]) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+
+void UVector64::setElementAt(int64_t elem, int32_t index) {
+ if (0 <= index && index < count) {
+ elements[index] = elem;
+ }
+ /* else index out of range */
+}
+
+void UVector64::insertElementAt(int64_t elem, int32_t index, UErrorCode &status) {
+ // must have 0 <= index <= count
+ if (0 <= index && index <= count && ensureCapacity(count + 1, status)) {
+ for (int32_t i=count; i>index; --i) {
+ elements[i] = elements[i-1];
+ }
+ elements[index] = elem;
+ ++count;
+ }
+ /* else index out of range */
+}
+
+void UVector64::removeAllElements(void) {
+ count = 0;
+}
+
+UBool UVector64::expandCapacity(int32_t minimumCapacity, UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return FALSE;
+ }
+ if (minimumCapacity < 0) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return FALSE;
+ }
+ if (capacity >= minimumCapacity) {
+ return TRUE;
+ }
+ if (maxCapacity>0 && minimumCapacity>maxCapacity) {
+ status = U_BUFFER_OVERFLOW_ERROR;
+ return FALSE;
+ }
+ if (capacity > (INT32_MAX - 1) / 2) { // integer overflow check
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return FALSE;
+ }
+ int32_t newCap = capacity * 2;
+ if (newCap < minimumCapacity) {
+ newCap = minimumCapacity;
+ }
+ if (maxCapacity > 0 && newCap > maxCapacity) {
+ newCap = maxCapacity;
+ }
+ if (newCap > (int32_t)(INT32_MAX / sizeof(int64_t))) { // integer overflow check
+ // We keep the original memory contents on bad minimumCapacity/maxCapacity.
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return FALSE;
+ }
+ int64_t* newElems = (int64_t *)uprv_realloc(elements, sizeof(int64_t)*newCap);
+ if (newElems == NULL) {
+ // We keep the original contents on the memory failure on realloc.
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return FALSE;
+ }
+ elements = newElems;
+ capacity = newCap;
+ return TRUE;
+}
+
+void UVector64::setMaxCapacity(int32_t limit) {
+ U_ASSERT(limit >= 0);
+ if (limit < 0) {
+ limit = 0;
+ }
+ if (limit > (int32_t)(INT32_MAX / sizeof(int64_t))) { // integer overflow check for realloc
+ // Something is very wrong, don't realloc, leave capacity and maxCapacity unchanged
+ return;
+ }
+ maxCapacity = limit;
+ if (capacity <= maxCapacity || maxCapacity == 0) {
+ // Current capacity is within the new limit.
+ return;
+ }
+
+ // New maximum capacity is smaller than the current size.
+ // Realloc the storage to the new, smaller size.
+ int64_t* newElems = (int64_t *)uprv_realloc(elements, sizeof(int64_t)*maxCapacity);
+ if (newElems == NULL) {
+ // Realloc to smaller failed.
+ // Just keep what we had. No need to call it a failure.
+ return;
+ }
+ elements = newElems;
+ capacity = maxCapacity;
+ if (count > capacity) {
+ count = capacity;
+ }
+}
+
+/**
+ * Change the size of this vector as follows: If newSize is smaller,
+ * then truncate the array, possibly deleting held elements for i >=
+ * newSize. If newSize is larger, grow the array, filling in new
+ * slots with NULL.
+ */
+void UVector64::setSize(int32_t newSize) {
+ int32_t i;
+ if (newSize < 0) {
+ return;
+ }
+ if (newSize > count) {
+ UErrorCode ec = U_ZERO_ERROR;
+ if (!ensureCapacity(newSize, ec)) {
+ return;
+ }
+ for (i=count; i<newSize; ++i) {
+ elements[i] = 0;
+ }
+ }
+ count = newSize;
+}
+
+U_NAMESPACE_END
+
diff --git a/thirdparty/icu4c/common/uvectr64.h b/thirdparty/icu4c/common/uvectr64.h
new file mode 100644
index 0000000000..15c9b3f830
--- /dev/null
+++ b/thirdparty/icu4c/common/uvectr64.h
@@ -0,0 +1,279 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1999-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+
+//
+// UVector64 is a class implementing a vector of 64 bit integers.
+// It is similar to UVector32, but holds int64_t values rather than int32_t.
+// Most of the code is unchanged from UVector.
+//
+
+#ifndef UVECTOR64_H
+#define UVECTOR64_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "uhash.h"
+#include "uassert.h"
+
+U_NAMESPACE_BEGIN
+
+
+
+/**
+ * <p>Ultralightweight C++ implementation of an <tt>int64_t</tt> vector
+ * that has a subset of methods from UVector32
+ *
+ * <p>This is a very simple implementation, written to satisfy an
+ * immediate porting need. As such, it is not completely fleshed out,
+ * and it aims for simplicity and conformity. Nonetheless, it serves
+ * its purpose (porting code from java that uses java.util.Vector)
+ * well, and it could be easily made into a more robust vector class.
+ *
+ * <p><b>Design notes</b>
+ *
+ * <p>There is index bounds checking, but little is done about it. If
+ * indices are out of bounds, either nothing happens, or zero is
+ * returned. We <em>do</em> avoid indexing off into the weeds.
+ *
+ * <p>There is detection of out of memory, but the handling is very
+ * coarse-grained -- similar to UnicodeString's protocol, but even
+ * coarser. The class contains <em>one static flag</em> that is set
+ * when any call to <tt>new</tt> returns zero. This allows the caller
+ * to use several vectors and make just one check at the end to see if
+ * a memory failure occurred. This is more efficient than making a
+ * check after each call on each vector when doing many operations on
+ * multiple vectors. The single static flag works best when memory
+ * failures are infrequent, and when recovery options are limited or
+ * nonexistent.
+ *
+ * <p><b>To do</b>
+ *
+ * <p>Improve the handling of index out of bounds errors.
+ *
+ */
+class U_COMMON_API UVector64 : public UObject {
+private:
+ int32_t count;
+
+ int32_t capacity;
+
+ int32_t maxCapacity; // Limit beyond which capacity is not permitted to grow.
+
+ int64_t* elements;
+
+public:
+ UVector64(UErrorCode &status);
+
+ UVector64(int32_t initialCapacity, UErrorCode &status);
+
+ virtual ~UVector64();
+
+ /**
+ * Assign this object to another (make this a copy of 'other').
+ * Use the 'assign' function to assign each element.
+ */
+ void assign(const UVector64& other, UErrorCode &ec);
+
+ /**
+ * Compare this vector with another. They will be considered
+ * equal if they are of the same size and all elements are equal,
+ * as compared using this object's comparer.
+ */
+ UBool operator==(const UVector64& other);
+
+ /**
+ * Equivalent to !operator==()
+ */
+ inline UBool operator!=(const UVector64& other);
+
+ //------------------------------------------------------------
+ // subset of java.util.Vector API
+ //------------------------------------------------------------
+
+ inline void addElement(int64_t elem, UErrorCode &status);
+
+ void setElementAt(int64_t elem, int32_t index);
+
+ void insertElementAt(int64_t elem, int32_t index, UErrorCode &status);
+
+ inline int64_t elementAti(int32_t index) const;
+
+ //UBool equals(const UVector64 &other) const;
+
+ inline int64_t lastElementi(void) const;
+
+ //int32_t indexOf(int64_t elem, int32_t startIndex = 0) const;
+
+ //UBool contains(int64_t elem) const;
+
+ //UBool containsAll(const UVector64& other) const;
+
+ //UBool removeAll(const UVector64& other);
+
+ //UBool retainAll(const UVector64& other);
+
+ //void removeElementAt(int32_t index);
+
+ void removeAllElements();
+
+ inline int32_t size(void) const;
+
+ inline UBool isEmpty(void) const { return count == 0; }
+
+ // Inline. Use this one for speedy size check.
+ inline UBool ensureCapacity(int32_t minimumCapacity, UErrorCode &status);
+
+ // Out-of-line, handles actual growth. Called by ensureCapacity() when necessary.
+ UBool expandCapacity(int32_t minimumCapacity, UErrorCode &status);
+
+ /**
+ * Change the size of this vector as follows: If newSize is
+ * smaller, then truncate the array, possibly deleting held
+ * elements for i >= newSize. If newSize is larger, grow the
+ * array, filling in new slows with zero.
+ */
+ void setSize(int32_t newSize);
+
+ //------------------------------------------------------------
+ // New API
+ //------------------------------------------------------------
+
+ //UBool containsNone(const UVector64& other) const;
+
+
+ //void sortedInsert(int64_t elem, UErrorCode& ec);
+
+ /**
+ * Returns a pointer to the internal array holding the vector.
+ */
+ inline int64_t *getBuffer() const;
+
+ /**
+ * Set the maximum allowed buffer capacity for this vector/stack.
+ * Default with no limit set is unlimited, go until malloc() fails.
+ * A Limit of zero means unlimited capacity.
+ * Units are vector elements (64 bits each), not bytes.
+ */
+ void setMaxCapacity(int32_t limit);
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ */
+ virtual UClassID getDynamicClassID() const;
+
+private:
+ void _init(int32_t initialCapacity, UErrorCode &status);
+
+ // Disallow
+ UVector64(const UVector64&);
+
+ // Disallow
+ UVector64& operator=(const UVector64&);
+
+
+ // API Functions for Stack operations.
+ // In the original UVector, these were in a separate derived class, UStack.
+ // Here in UVector64, they are all together.
+public:
+ //UBool empty(void) const; // TODO: redundant, same as empty(). Remove it?
+
+ //int64_t peeki(void) const;
+
+ inline int64_t popi(void);
+
+ inline int64_t push(int64_t i, UErrorCode &status);
+
+ inline int64_t *reserveBlock(int32_t size, UErrorCode &status);
+ inline int64_t *popFrame(int32_t size);
+};
+
+
+// UVector64 inlines
+
+inline UBool UVector64::ensureCapacity(int32_t minimumCapacity, UErrorCode &status) {
+ if ((minimumCapacity >= 0) && (capacity >= minimumCapacity)) {
+ return true;
+ } else {
+ return expandCapacity(minimumCapacity, status);
+ }
+}
+
+inline int64_t UVector64::elementAti(int32_t index) const {
+ return (0 <= index && index < count) ? elements[index] : 0;
+}
+
+
+inline void UVector64::addElement(int64_t elem, UErrorCode &status) {
+ if (ensureCapacity(count + 1, status)) {
+ elements[count] = elem;
+ count++;
+ }
+}
+
+inline int64_t *UVector64::reserveBlock(int32_t size, UErrorCode &status) {
+ if (ensureCapacity(count+size, status) == false) {
+ return NULL;
+ }
+ int64_t *rp = elements+count;
+ count += size;
+ return rp;
+}
+
+inline int64_t *UVector64::popFrame(int32_t size) {
+ U_ASSERT(count >= size);
+ count -= size;
+ if (count < 0) {
+ count = 0;
+ }
+ return elements+count-size;
+}
+
+
+
+inline int32_t UVector64::size(void) const {
+ return count;
+}
+
+inline int64_t UVector64::lastElementi(void) const {
+ return elementAti(count-1);
+}
+
+inline UBool UVector64::operator!=(const UVector64& other) {
+ return !operator==(other);
+}
+
+inline int64_t *UVector64::getBuffer() const {
+ return elements;
+}
+
+
+// UStack inlines
+
+inline int64_t UVector64::push(int64_t i, UErrorCode &status) {
+ addElement(i, status);
+ return i;
+}
+
+inline int64_t UVector64::popi(void) {
+ int64_t result = 0;
+ if (count > 0) {
+ count--;
+ result = elements[count];
+ }
+ return result;
+}
+
+U_NAMESPACE_END
+
+#endif
diff --git a/thirdparty/icu4c/common/wintz.cpp b/thirdparty/icu4c/common/wintz.cpp
new file mode 100644
index 0000000000..3730232286
--- /dev/null
+++ b/thirdparty/icu4c/common/wintz.cpp
@@ -0,0 +1,147 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+********************************************************************************
+* Copyright (C) 2005-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+********************************************************************************
+*
+* File WINTZ.CPP
+*
+********************************************************************************
+*/
+
+#include "unicode/utypes.h"
+
+#if U_PLATFORM_USES_ONLY_WIN32_API
+
+#include "wintz.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "cstring.h"
+
+#include "unicode/ures.h"
+#include "unicode/unistr.h"
+#include "uresimp.h"
+
+#ifndef WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN
+#endif
+# define VC_EXTRALEAN
+# define NOUSER
+# define NOSERVICE
+# define NOIME
+# define NOMCX
+#include <windows.h>
+
+U_NAMESPACE_BEGIN
+
+/**
+* Main Windows time zone detection function.
+* Returns the Windows time zone converted to an ICU time zone as a heap-allocated buffer, or nullptr upon failure.
+*
+* Note: We use the Win32 API GetDynamicTimeZoneInformation (available since Vista+) to get the current time zone info.
+* This API returns a non-localized time zone name, which is mapped to an ICU time zone ID (~ Olsen ID).
+*/
+U_CAPI const char* U_EXPORT2
+uprv_detectWindowsTimeZone()
+{
+ // Obtain the DYNAMIC_TIME_ZONE_INFORMATION info to get the non-localized time zone name.
+ DYNAMIC_TIME_ZONE_INFORMATION dynamicTZI;
+ uprv_memset(&dynamicTZI, 0, sizeof(dynamicTZI));
+ SYSTEMTIME systemTimeAllZero;
+ uprv_memset(&systemTimeAllZero, 0, sizeof(systemTimeAllZero));
+
+ if (GetDynamicTimeZoneInformation(&dynamicTZI) == TIME_ZONE_ID_INVALID) {
+ return nullptr;
+ }
+
+ // If the DST setting has been turned off in the Control Panel, then return "Etc/GMT<offset>".
+ //
+ // Note: This logic is based on how the Control Panel itself determines if DST is 'off' on Windows.
+ // The code is somewhat convoluted; in a sort of pseudo-code it looks like this:
+ //
+ // IF (GetDynamicTimeZoneInformation != TIME_ZONE_ID_INVALID) && (DynamicDaylightTimeDisabled != 0) &&
+ // (StandardDate == DaylightDate) &&
+ // (
+ // (TimeZoneKeyName != Empty && StandardDate == 0) ||
+ // (TimeZoneKeyName == Empty && StandardDate != 0)
+ // )
+ // THEN
+ // DST setting is "Disabled".
+ //
+ if (dynamicTZI.DynamicDaylightTimeDisabled != 0 &&
+ uprv_memcmp(&dynamicTZI.StandardDate, &dynamicTZI.DaylightDate, sizeof(dynamicTZI.StandardDate)) == 0 &&
+ ((dynamicTZI.TimeZoneKeyName[0] != L'\0' && uprv_memcmp(&dynamicTZI.StandardDate, &systemTimeAllZero, sizeof(systemTimeAllZero)) == 0) ||
+ (dynamicTZI.TimeZoneKeyName[0] == L'\0' && uprv_memcmp(&dynamicTZI.StandardDate, &systemTimeAllZero, sizeof(systemTimeAllZero)) != 0)))
+ {
+ LONG utcOffsetMins = dynamicTZI.Bias;
+ if (utcOffsetMins == 0) {
+ return uprv_strdup("Etc/UTC");
+ }
+
+ // No way to support when DST is turned off and the offset in minutes is not a multiple of 60.
+ if (utcOffsetMins % 60 == 0) {
+ char gmtOffsetTz[11] = {}; // "Etc/GMT+dd" is 11-char long with a terminal null.
+ // Note '-' before 'utcOffsetMin'. The timezone ID's sign convention
+ // is that a timezone ahead of UTC is Etc/GMT-<offset> and a timezone
+ // behind UTC is Etc/GMT+<offset>.
+ int ret = snprintf(gmtOffsetTz, UPRV_LENGTHOF(gmtOffsetTz), "Etc/GMT%+d", -utcOffsetMins / 60);
+ if (ret > 0 && ret < UPRV_LENGTHOF(gmtOffsetTz)) {
+ return uprv_strdup(gmtOffsetTz);
+ }
+ }
+ }
+
+ // If DST is NOT disabled, but we have an empty TimeZoneKeyName, then it is unclear
+ // what we should do as this should not happen.
+ if (dynamicTZI.TimeZoneKeyName[0] == 0) {
+ return nullptr;
+ }
+
+ CharString winTZ;
+ UErrorCode status = U_ZERO_ERROR;
+ winTZ.appendInvariantChars(UnicodeString(TRUE, dynamicTZI.TimeZoneKeyName, -1), status);
+
+ // Map Windows Timezone name (non-localized) to ICU timezone ID (~ Olson timezone id).
+ StackUResourceBundle winTZBundle;
+ ures_openDirectFillIn(winTZBundle.getAlias(), nullptr, "windowsZones", &status);
+ ures_getByKey(winTZBundle.getAlias(), "mapTimezones", winTZBundle.getAlias(), &status);
+ ures_getByKey(winTZBundle.getAlias(), winTZ.data(), winTZBundle.getAlias(), &status);
+
+ if (U_FAILURE(status)) {
+ return nullptr;
+ }
+
+ // Note: Since the ISO 3166 country/region codes are all invariant ASCII chars, we can
+ // directly downcast from wchar_t to do the conversion.
+ // We could call the A version of the GetGeoInfo API, but that would be slightly slower than calling the W API,
+ // as the A version of the API will end up calling MultiByteToWideChar anyways internally.
+ wchar_t regionCodeW[3] = {};
+ char regionCode[3] = {}; // 2 letter ISO 3166 country/region code made entirely of invariant chars.
+ int geoId = GetUserGeoID(GEOCLASS_NATION);
+ int regionCodeLen = GetGeoInfoW(geoId, GEO_ISO2, regionCodeW, UPRV_LENGTHOF(regionCodeW), 0);
+
+ const UChar *icuTZ16 = nullptr;
+ int32_t tzLen;
+
+ if (regionCodeLen != 0) {
+ for (int i = 0; i < UPRV_LENGTHOF(regionCodeW); i++) {
+ regionCode[i] = static_cast<char>(regionCodeW[i]);
+ }
+ icuTZ16 = ures_getStringByKey(winTZBundle.getAlias(), regionCode, &tzLen, &status);
+ }
+ if (regionCodeLen == 0 || U_FAILURE(status)) {
+ // fallback to default "001" (world)
+ status = U_ZERO_ERROR;
+ icuTZ16 = ures_getStringByKey(winTZBundle.getAlias(), "001", &tzLen, &status);
+ }
+
+ // Note: cloneData returns nullptr if the status is a failure, so this
+ // will return nullptr if the above look-up fails.
+ CharString icuTZStr;
+ return icuTZStr.appendInvariantChars(icuTZ16, tzLen, status).cloneData(status);
+}
+
+U_NAMESPACE_END
+#endif /* U_PLATFORM_USES_ONLY_WIN32_API */
diff --git a/thirdparty/icu4c/common/wintz.h b/thirdparty/icu4c/common/wintz.h
new file mode 100644
index 0000000000..ce9c1e9019
--- /dev/null
+++ b/thirdparty/icu4c/common/wintz.h
@@ -0,0 +1,36 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+********************************************************************************
+* Copyright (C) 2005-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+********************************************************************************
+*
+* File WINTZ.H
+*
+********************************************************************************
+*/
+
+#ifndef __WINTZ
+#define __WINTZ
+
+#include "unicode/utypes.h"
+
+#if U_PLATFORM_USES_ONLY_WIN32_API
+
+/**
+ * \file
+ * \brief C API: Utilities for dealing w/ Windows time zones.
+ */
+
+U_CDECL_BEGIN
+/* Forward declarations for Windows types... */
+typedef struct _TIME_ZONE_INFORMATION TIME_ZONE_INFORMATION;
+U_CDECL_END
+
+U_CAPI const char* U_EXPORT2
+uprv_detectWindowsTimeZone();
+
+#endif /* U_PLATFORM_USES_ONLY_WIN32_API */
+
+#endif /* __WINTZ */
diff --git a/thirdparty/icu4c/godot_data.json b/thirdparty/icu4c/godot_data.json
new file mode 100644
index 0000000000..16cfcd2651
--- /dev/null
+++ b/thirdparty/icu4c/godot_data.json
@@ -0,0 +1,9 @@
+{
+ strategy: "additive"
+ featureFilters: {
+ brkitr_rules: include
+ brkitr_dictionaries: include
+ brkitr_tree: include
+ misc: include
+ }
+} \ No newline at end of file
diff --git a/thirdparty/icu4c/icudt68l.dat b/thirdparty/icu4c/icudt68l.dat
new file mode 100644
index 0000000000..548c1a5a72
--- /dev/null
+++ b/thirdparty/icu4c/icudt68l.dat
Binary files differ
diff --git a/thirdparty/meshoptimizer/LICENSE.md b/thirdparty/meshoptimizer/LICENSE.md
new file mode 100644
index 0000000000..4fcd766d22
--- /dev/null
+++ b/thirdparty/meshoptimizer/LICENSE.md
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2016-2020 Arseny Kapoulkine
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/thirdparty/meshoptimizer/allocator.cpp b/thirdparty/meshoptimizer/allocator.cpp
new file mode 100644
index 0000000000..da7cc540b2
--- /dev/null
+++ b/thirdparty/meshoptimizer/allocator.cpp
@@ -0,0 +1,8 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+void meshopt_setAllocator(void* (*allocate)(size_t), void (*deallocate)(void*))
+{
+ meshopt_Allocator::Storage::allocate = allocate;
+ meshopt_Allocator::Storage::deallocate = deallocate;
+}
diff --git a/thirdparty/meshoptimizer/clusterizer.cpp b/thirdparty/meshoptimizer/clusterizer.cpp
new file mode 100644
index 0000000000..f7d88c5136
--- /dev/null
+++ b/thirdparty/meshoptimizer/clusterizer.cpp
@@ -0,0 +1,351 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+#include <assert.h>
+#include <math.h>
+#include <string.h>
+
+// This work is based on:
+// Graham Wihlidal. Optimizing the Graphics Pipeline with Compute. 2016
+// Matthaeus Chajdas. GeometryFX 1.2 - Cluster Culling. 2016
+// Jack Ritter. An Efficient Bounding Sphere. 1990
+namespace meshopt
+{
+
+static void computeBoundingSphere(float result[4], const float points[][3], size_t count)
+{
+ assert(count > 0);
+
+ // find extremum points along all 3 axes; for each axis we get a pair of points with min/max coordinates
+ size_t pmin[3] = {0, 0, 0};
+ size_t pmax[3] = {0, 0, 0};
+
+ for (size_t i = 0; i < count; ++i)
+ {
+ const float* p = points[i];
+
+ for (int axis = 0; axis < 3; ++axis)
+ {
+ pmin[axis] = (p[axis] < points[pmin[axis]][axis]) ? i : pmin[axis];
+ pmax[axis] = (p[axis] > points[pmax[axis]][axis]) ? i : pmax[axis];
+ }
+ }
+
+ // find the pair of points with largest distance
+ float paxisd2 = 0;
+ int paxis = 0;
+
+ for (int axis = 0; axis < 3; ++axis)
+ {
+ const float* p1 = points[pmin[axis]];
+ const float* p2 = points[pmax[axis]];
+
+ float d2 = (p2[0] - p1[0]) * (p2[0] - p1[0]) + (p2[1] - p1[1]) * (p2[1] - p1[1]) + (p2[2] - p1[2]) * (p2[2] - p1[2]);
+
+ if (d2 > paxisd2)
+ {
+ paxisd2 = d2;
+ paxis = axis;
+ }
+ }
+
+ // use the longest segment as the initial sphere diameter
+ const float* p1 = points[pmin[paxis]];
+ const float* p2 = points[pmax[paxis]];
+
+ float center[3] = {(p1[0] + p2[0]) / 2, (p1[1] + p2[1]) / 2, (p1[2] + p2[2]) / 2};
+ float radius = sqrtf(paxisd2) / 2;
+
+ // iteratively adjust the sphere up until all points fit
+ for (size_t i = 0; i < count; ++i)
+ {
+ const float* p = points[i];
+ float d2 = (p[0] - center[0]) * (p[0] - center[0]) + (p[1] - center[1]) * (p[1] - center[1]) + (p[2] - center[2]) * (p[2] - center[2]);
+
+ if (d2 > radius * radius)
+ {
+ float d = sqrtf(d2);
+ assert(d > 0);
+
+ float k = 0.5f + (radius / d) / 2;
+
+ center[0] = center[0] * k + p[0] * (1 - k);
+ center[1] = center[1] * k + p[1] * (1 - k);
+ center[2] = center[2] * k + p[2] * (1 - k);
+ radius = (radius + d) / 2;
+ }
+ }
+
+ result[0] = center[0];
+ result[1] = center[1];
+ result[2] = center[2];
+ result[3] = radius;
+}
+
+} // namespace meshopt
+
+size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles)
+{
+ assert(index_count % 3 == 0);
+ assert(max_vertices >= 3);
+ assert(max_triangles >= 1);
+
+ // meshlet construction is limited by max vertices and max triangles per meshlet
+ // the worst case is that the input is an unindexed stream since this equally stresses both limits
+ // note that we assume that in the worst case, we leave 2 vertices unpacked in each meshlet - if we have space for 3 we can pack any triangle
+ size_t max_vertices_conservative = max_vertices - 2;
+ size_t meshlet_limit_vertices = (index_count + max_vertices_conservative - 1) / max_vertices_conservative;
+ size_t meshlet_limit_triangles = (index_count / 3 + max_triangles - 1) / max_triangles;
+
+ return meshlet_limit_vertices > meshlet_limit_triangles ? meshlet_limit_vertices : meshlet_limit_triangles;
+}
+
+size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles)
+{
+ assert(index_count % 3 == 0);
+ assert(max_vertices >= 3);
+ assert(max_triangles >= 1);
+
+ meshopt_Allocator allocator;
+
+ meshopt_Meshlet meshlet;
+ memset(&meshlet, 0, sizeof(meshlet));
+
+ assert(max_vertices <= sizeof(meshlet.vertices) / sizeof(meshlet.vertices[0]));
+ assert(max_triangles <= sizeof(meshlet.indices) / 3);
+
+ // index of the vertex in the meshlet, 0xff if the vertex isn't used
+ unsigned char* used = allocator.allocate<unsigned char>(vertex_count);
+ memset(used, -1, vertex_count);
+
+ size_t offset = 0;
+
+ for (size_t i = 0; i < index_count; i += 3)
+ {
+ unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
+ assert(a < vertex_count && b < vertex_count && c < vertex_count);
+
+ unsigned char& av = used[a];
+ unsigned char& bv = used[b];
+ unsigned char& cv = used[c];
+
+ unsigned int used_extra = (av == 0xff) + (bv == 0xff) + (cv == 0xff);
+
+ if (meshlet.vertex_count + used_extra > max_vertices || meshlet.triangle_count >= max_triangles)
+ {
+ destination[offset++] = meshlet;
+
+ for (size_t j = 0; j < meshlet.vertex_count; ++j)
+ used[meshlet.vertices[j]] = 0xff;
+
+ memset(&meshlet, 0, sizeof(meshlet));
+ }
+
+ if (av == 0xff)
+ {
+ av = meshlet.vertex_count;
+ meshlet.vertices[meshlet.vertex_count++] = a;
+ }
+
+ if (bv == 0xff)
+ {
+ bv = meshlet.vertex_count;
+ meshlet.vertices[meshlet.vertex_count++] = b;
+ }
+
+ if (cv == 0xff)
+ {
+ cv = meshlet.vertex_count;
+ meshlet.vertices[meshlet.vertex_count++] = c;
+ }
+
+ meshlet.indices[meshlet.triangle_count][0] = av;
+ meshlet.indices[meshlet.triangle_count][1] = bv;
+ meshlet.indices[meshlet.triangle_count][2] = cv;
+ meshlet.triangle_count++;
+ }
+
+ if (meshlet.triangle_count)
+ destination[offset++] = meshlet;
+
+ assert(offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles));
+
+ return offset;
+}
+
+meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+ using namespace meshopt;
+
+ assert(index_count % 3 == 0);
+ assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+ assert(vertex_positions_stride % sizeof(float) == 0);
+
+ assert(index_count / 3 <= 256);
+
+ (void)vertex_count;
+
+ size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
+
+ // compute triangle normals and gather triangle corners
+ float normals[256][3];
+ float corners[256][3][3];
+ size_t triangles = 0;
+
+ for (size_t i = 0; i < index_count; i += 3)
+ {
+ unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
+ assert(a < vertex_count && b < vertex_count && c < vertex_count);
+
+ const float* p0 = vertex_positions + vertex_stride_float * a;
+ const float* p1 = vertex_positions + vertex_stride_float * b;
+ const float* p2 = vertex_positions + vertex_stride_float * c;
+
+ float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]};
+ float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]};
+
+ float normalx = p10[1] * p20[2] - p10[2] * p20[1];
+ float normaly = p10[2] * p20[0] - p10[0] * p20[2];
+ float normalz = p10[0] * p20[1] - p10[1] * p20[0];
+
+ float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz);
+
+ // no need to include degenerate triangles - they will be invisible anyway
+ if (area == 0.f)
+ continue;
+
+ // record triangle normals & corners for future use; normal and corner 0 define a plane equation
+ normals[triangles][0] = normalx / area;
+ normals[triangles][1] = normaly / area;
+ normals[triangles][2] = normalz / area;
+ memcpy(corners[triangles][0], p0, 3 * sizeof(float));
+ memcpy(corners[triangles][1], p1, 3 * sizeof(float));
+ memcpy(corners[triangles][2], p2, 3 * sizeof(float));
+ triangles++;
+ }
+
+ meshopt_Bounds bounds = {};
+
+ // degenerate cluster, no valid triangles => trivial reject (cone data is 0)
+ if (triangles == 0)
+ return bounds;
+
+ // compute cluster bounding sphere; we'll use the center to determine normal cone apex as well
+ float psphere[4] = {};
+ computeBoundingSphere(psphere, corners[0], triangles * 3);
+
+ float center[3] = {psphere[0], psphere[1], psphere[2]};
+
+ // treating triangle normals as points, find the bounding sphere - the sphere center determines the optimal cone axis
+ float nsphere[4] = {};
+ computeBoundingSphere(nsphere, normals, triangles);
+
+ float axis[3] = {nsphere[0], nsphere[1], nsphere[2]};
+ float axislength = sqrtf(axis[0] * axis[0] + axis[1] * axis[1] + axis[2] * axis[2]);
+ float invaxislength = axislength == 0.f ? 0.f : 1.f / axislength;
+
+ axis[0] *= invaxislength;
+ axis[1] *= invaxislength;
+ axis[2] *= invaxislength;
+
+ // compute a tight cone around all normals, mindp = cos(angle/2)
+ float mindp = 1.f;
+
+ for (size_t i = 0; i < triangles; ++i)
+ {
+ float dp = normals[i][0] * axis[0] + normals[i][1] * axis[1] + normals[i][2] * axis[2];
+
+ mindp = (dp < mindp) ? dp : mindp;
+ }
+
+ // fill bounding sphere info; note that below we can return bounds without cone information for degenerate cones
+ bounds.center[0] = center[0];
+ bounds.center[1] = center[1];
+ bounds.center[2] = center[2];
+ bounds.radius = psphere[3];
+
+ // degenerate cluster, normal cone is larger than a hemisphere => trivial accept
+ // note that if mindp is positive but close to 0, the triangle intersection code below gets less stable
+ // we arbitrarily decide that if a normal cone is ~168 degrees wide or more, the cone isn't useful
+ if (mindp <= 0.1f)
+ {
+ bounds.cone_cutoff = 1;
+ bounds.cone_cutoff_s8 = 127;
+ return bounds;
+ }
+
+ float maxt = 0;
+
+ // we need to find the point on center-t*axis ray that lies in negative half-space of all triangles
+ for (size_t i = 0; i < triangles; ++i)
+ {
+ // dot(center-t*axis-corner, trinormal) = 0
+ // dot(center-corner, trinormal) - t * dot(axis, trinormal) = 0
+ float cx = center[0] - corners[i][0][0];
+ float cy = center[1] - corners[i][0][1];
+ float cz = center[2] - corners[i][0][2];
+
+ float dc = cx * normals[i][0] + cy * normals[i][1] + cz * normals[i][2];
+ float dn = axis[0] * normals[i][0] + axis[1] * normals[i][1] + axis[2] * normals[i][2];
+
+ // dn should be larger than mindp cutoff above
+ assert(dn > 0.f);
+ float t = dc / dn;
+
+ maxt = (t > maxt) ? t : maxt;
+ }
+
+ // cone apex should be in the negative half-space of all cluster triangles by construction
+ bounds.cone_apex[0] = center[0] - axis[0] * maxt;
+ bounds.cone_apex[1] = center[1] - axis[1] * maxt;
+ bounds.cone_apex[2] = center[2] - axis[2] * maxt;
+
+ // note: this axis is the axis of the normal cone, but our test for perspective camera effectively negates the axis
+ bounds.cone_axis[0] = axis[0];
+ bounds.cone_axis[1] = axis[1];
+ bounds.cone_axis[2] = axis[2];
+
+ // cos(a) for normal cone is mindp; we need to add 90 degrees on both sides and invert the cone
+ // which gives us -cos(a+90) = -(-sin(a)) = sin(a) = sqrt(1 - cos^2(a))
+ bounds.cone_cutoff = sqrtf(1 - mindp * mindp);
+
+ // quantize axis & cutoff to 8-bit SNORM format
+ bounds.cone_axis_s8[0] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[0], 8));
+ bounds.cone_axis_s8[1] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[1], 8));
+ bounds.cone_axis_s8[2] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[2], 8));
+
+ // for the 8-bit test to be conservative, we need to adjust the cutoff by measuring the max. error
+ float cone_axis_s8_e0 = fabsf(bounds.cone_axis_s8[0] / 127.f - bounds.cone_axis[0]);
+ float cone_axis_s8_e1 = fabsf(bounds.cone_axis_s8[1] / 127.f - bounds.cone_axis[1]);
+ float cone_axis_s8_e2 = fabsf(bounds.cone_axis_s8[2] / 127.f - bounds.cone_axis[2]);
+
+ // note that we need to round this up instead of rounding to nearest, hence +1
+ int cone_cutoff_s8 = int(127 * (bounds.cone_cutoff + cone_axis_s8_e0 + cone_axis_s8_e1 + cone_axis_s8_e2) + 1);
+
+ bounds.cone_cutoff_s8 = (cone_cutoff_s8 > 127) ? 127 : (signed char)(cone_cutoff_s8);
+
+ return bounds;
+}
+
+meshopt_Bounds meshopt_computeMeshletBounds(const meshopt_Meshlet* meshlet, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+ assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+ assert(vertex_positions_stride % sizeof(float) == 0);
+
+ unsigned int indices[sizeof(meshlet->indices) / sizeof(meshlet->indices[0][0])];
+
+ for (size_t i = 0; i < meshlet->triangle_count; ++i)
+ {
+ unsigned int a = meshlet->vertices[meshlet->indices[i][0]];
+ unsigned int b = meshlet->vertices[meshlet->indices[i][1]];
+ unsigned int c = meshlet->vertices[meshlet->indices[i][2]];
+
+ assert(a < vertex_count && b < vertex_count && c < vertex_count);
+
+ indices[i * 3 + 0] = a;
+ indices[i * 3 + 1] = b;
+ indices[i * 3 + 2] = c;
+ }
+
+ return meshopt_computeClusterBounds(indices, meshlet->triangle_count * 3, vertex_positions, vertex_count, vertex_positions_stride);
+}
diff --git a/thirdparty/meshoptimizer/indexcodec.cpp b/thirdparty/meshoptimizer/indexcodec.cpp
new file mode 100644
index 0000000000..eeb541e5be
--- /dev/null
+++ b/thirdparty/meshoptimizer/indexcodec.cpp
@@ -0,0 +1,752 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+#include <assert.h>
+#include <string.h>
+
+#ifndef TRACE
+#define TRACE 0
+#endif
+
+#if TRACE
+#include <stdio.h>
+#endif
+
+// This work is based on:
+// Fabian Giesen. Simple lossless index buffer compression & follow-up. 2013
+// Conor Stokes. Vertex Cache Optimised Index Buffer Compression. 2014
+namespace meshopt
+{
+
+const unsigned char kIndexHeader = 0xe0;
+const unsigned char kSequenceHeader = 0xd0;
+
+static int gEncodeIndexVersion = 0;
+
+typedef unsigned int VertexFifo[16];
+typedef unsigned int EdgeFifo[16][2];
+
+static const unsigned int kTriangleIndexOrder[3][3] = {
+ {0, 1, 2},
+ {1, 2, 0},
+ {2, 0, 1},
+};
+
+static const unsigned char kCodeAuxEncodingTable[16] = {
+ 0x00, 0x76, 0x87, 0x56, 0x67, 0x78, 0xa9, 0x86, 0x65, 0x89, 0x68, 0x98, 0x01, 0x69,
+ 0, 0, // last two entries aren't used for encoding
+};
+
+static int rotateTriangle(unsigned int a, unsigned int b, unsigned int c, unsigned int next)
+{
+ (void)a;
+
+ return (b == next) ? 1 : (c == next) ? 2 : 0;
+}
+
+static int getEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, unsigned int c, size_t offset)
+{
+ for (int i = 0; i < 16; ++i)
+ {
+ size_t index = (offset - 1 - i) & 15;
+
+ unsigned int e0 = fifo[index][0];
+ unsigned int e1 = fifo[index][1];
+
+ if (e0 == a && e1 == b)
+ return (i << 2) | 0;
+ if (e0 == b && e1 == c)
+ return (i << 2) | 1;
+ if (e0 == c && e1 == a)
+ return (i << 2) | 2;
+ }
+
+ return -1;
+}
+
+static void pushEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, size_t& offset)
+{
+ fifo[offset][0] = a;
+ fifo[offset][1] = b;
+ offset = (offset + 1) & 15;
+}
+
+static int getVertexFifo(VertexFifo fifo, unsigned int v, size_t offset)
+{
+ for (int i = 0; i < 16; ++i)
+ {
+ size_t index = (offset - 1 - i) & 15;
+
+ if (fifo[index] == v)
+ return i;
+ }
+
+ return -1;
+}
+
+static void pushVertexFifo(VertexFifo fifo, unsigned int v, size_t& offset, int cond = 1)
+{
+ fifo[offset] = v;
+ offset = (offset + cond) & 15;
+}
+
+static void encodeVByte(unsigned char*& data, unsigned int v)
+{
+ // encode 32-bit value in up to 5 7-bit groups
+ do
+ {
+ *data++ = (v & 127) | (v > 127 ? 128 : 0);
+ v >>= 7;
+ } while (v);
+}
+
+static unsigned int decodeVByte(const unsigned char*& data)
+{
+ unsigned char lead = *data++;
+
+ // fast path: single byte
+ if (lead < 128)
+ return lead;
+
+ // slow path: up to 4 extra bytes
+ // note that this loop always terminates, which is important for malformed data
+ unsigned int result = lead & 127;
+ unsigned int shift = 7;
+
+ for (int i = 0; i < 4; ++i)
+ {
+ unsigned char group = *data++;
+ result |= (group & 127) << shift;
+ shift += 7;
+
+ if (group < 128)
+ break;
+ }
+
+ return result;
+}
+
+static void encodeIndex(unsigned char*& data, unsigned int index, unsigned int last)
+{
+ unsigned int d = index - last;
+ unsigned int v = (d << 1) ^ (int(d) >> 31);
+
+ encodeVByte(data, v);
+}
+
+static unsigned int decodeIndex(const unsigned char*& data, unsigned int last)
+{
+ unsigned int v = decodeVByte(data);
+ unsigned int d = (v >> 1) ^ -int(v & 1);
+
+ return last + d;
+}
+
+static int getCodeAuxIndex(unsigned char v, const unsigned char* table)
+{
+ for (int i = 0; i < 16; ++i)
+ if (table[i] == v)
+ return i;
+
+ return -1;
+}
+
+static void writeTriangle(void* destination, size_t offset, size_t index_size, unsigned int a, unsigned int b, unsigned int c)
+{
+ if (index_size == 2)
+ {
+ static_cast<unsigned short*>(destination)[offset + 0] = (unsigned short)(a);
+ static_cast<unsigned short*>(destination)[offset + 1] = (unsigned short)(b);
+ static_cast<unsigned short*>(destination)[offset + 2] = (unsigned short)(c);
+ }
+ else
+ {
+ static_cast<unsigned int*>(destination)[offset + 0] = a;
+ static_cast<unsigned int*>(destination)[offset + 1] = b;
+ static_cast<unsigned int*>(destination)[offset + 2] = c;
+ }
+}
+
+#if TRACE
+static size_t sortTop16(unsigned char dest[16], size_t stats[256])
+{
+ size_t destsize = 0;
+
+ for (size_t i = 0; i < 256; ++i)
+ {
+ size_t j = 0;
+ for (; j < destsize; ++j)
+ {
+ if (stats[i] >= stats[dest[j]])
+ {
+ if (destsize < 16)
+ destsize++;
+
+ memmove(&dest[j + 1], &dest[j], destsize - 1 - j);
+ dest[j] = (unsigned char)i;
+ break;
+ }
+ }
+
+ if (j == destsize && destsize < 16)
+ {
+ dest[destsize] = (unsigned char)i;
+ destsize++;
+ }
+ }
+
+ return destsize;
+}
+#endif
+
+} // namespace meshopt
+
+size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count)
+{
+ using namespace meshopt;
+
+ assert(index_count % 3 == 0);
+
+#if TRACE
+ size_t codestats[256] = {};
+ size_t codeauxstats[256] = {};
+#endif
+
+ // the minimum valid encoding is header, 1 byte per triangle and a 16-byte codeaux table
+ if (buffer_size < 1 + index_count / 3 + 16)
+ return 0;
+
+ int version = gEncodeIndexVersion;
+
+ buffer[0] = (unsigned char)(kIndexHeader | version);
+
+ EdgeFifo edgefifo;
+ memset(edgefifo, -1, sizeof(edgefifo));
+
+ VertexFifo vertexfifo;
+ memset(vertexfifo, -1, sizeof(vertexfifo));
+
+ size_t edgefifooffset = 0;
+ size_t vertexfifooffset = 0;
+
+ unsigned int next = 0;
+ unsigned int last = 0;
+
+ unsigned char* code = buffer + 1;
+ unsigned char* data = code + index_count / 3;
+ unsigned char* data_safe_end = buffer + buffer_size - 16;
+
+ int fecmax = version >= 1 ? 13 : 15;
+
+ // use static encoding table; it's possible to pack the result and then build an optimal table and repack
+ // for now we keep it simple and use the table that has been generated based on symbol frequency on a training mesh set
+ const unsigned char* codeaux_table = kCodeAuxEncodingTable;
+
+ for (size_t i = 0; i < index_count; i += 3)
+ {
+ // make sure we have enough space to write a triangle
+ // each triangle writes at most 16 bytes: 1b for codeaux and 5b for each free index
+ // after this we can be sure we can write without extra bounds checks
+ if (data > data_safe_end)
+ return 0;
+
+ int fer = getEdgeFifo(edgefifo, indices[i + 0], indices[i + 1], indices[i + 2], edgefifooffset);
+
+ if (fer >= 0 && (fer >> 2) < 15)
+ {
+ const unsigned int* order = kTriangleIndexOrder[fer & 3];
+
+ unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]];
+
+ // encode edge index and vertex fifo index, next or free index
+ int fe = fer >> 2;
+ int fc = getVertexFifo(vertexfifo, c, vertexfifooffset);
+
+ int fec = (fc >= 1 && fc < fecmax) ? fc : (c == next) ? (next++, 0) : 15;
+
+ if (fec == 15 && version >= 1)
+ {
+ // encode last-1 and last+1 to optimize strip-like sequences
+ if (c + 1 == last)
+ fec = 13, last = c;
+ if (c == last + 1)
+ fec = 14, last = c;
+ }
+
+ *code++ = (unsigned char)((fe << 4) | fec);
+
+#if TRACE
+ codestats[code[-1]]++;
+#endif
+
+ // note that we need to update the last index since free indices are delta-encoded
+ if (fec == 15)
+ encodeIndex(data, c, last), last = c;
+
+ // we only need to push third vertex since first two are likely already in the vertex fifo
+ if (fec == 0 || fec >= fecmax)
+ pushVertexFifo(vertexfifo, c, vertexfifooffset);
+
+ // we only need to push two new edges to edge fifo since the third one is already there
+ pushEdgeFifo(edgefifo, c, b, edgefifooffset);
+ pushEdgeFifo(edgefifo, a, c, edgefifooffset);
+ }
+ else
+ {
+ int rotation = rotateTriangle(indices[i + 0], indices[i + 1], indices[i + 2], next);
+ const unsigned int* order = kTriangleIndexOrder[rotation];
+
+ unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]];
+
+ // if a/b/c are 0/1/2, we emit a reset code
+ bool reset = false;
+
+ if (a == 0 && b == 1 && c == 2 && next > 0 && version >= 1)
+ {
+ reset = true;
+ next = 0;
+
+ // reset vertex fifo to make sure we don't accidentally reference vertices from that in the future
+ // this makes sure next continues to get incremented instead of being stuck
+ memset(vertexfifo, -1, sizeof(vertexfifo));
+ }
+
+ int fb = getVertexFifo(vertexfifo, b, vertexfifooffset);
+ int fc = getVertexFifo(vertexfifo, c, vertexfifooffset);
+
+ // after rotation, a is almost always equal to next, so we don't waste bits on FIFO encoding for a
+ int fea = (a == next) ? (next++, 0) : 15;
+ int feb = (fb >= 0 && fb < 14) ? (fb + 1) : (b == next) ? (next++, 0) : 15;
+ int fec = (fc >= 0 && fc < 14) ? (fc + 1) : (c == next) ? (next++, 0) : 15;
+
+ // we encode feb & fec in 4 bits using a table if possible, and as a full byte otherwise
+ unsigned char codeaux = (unsigned char)((feb << 4) | fec);
+ int codeauxindex = getCodeAuxIndex(codeaux, codeaux_table);
+
+ // <14 encodes an index into codeaux table, 14 encodes fea=0, 15 encodes fea=15
+ if (fea == 0 && codeauxindex >= 0 && codeauxindex < 14 && !reset)
+ {
+ *code++ = (unsigned char)((15 << 4) | codeauxindex);
+ }
+ else
+ {
+ *code++ = (unsigned char)((15 << 4) | 14 | fea);
+ *data++ = codeaux;
+ }
+
+#if TRACE
+ codestats[code[-1]]++;
+ codeauxstats[codeaux]++;
+#endif
+
+ // note that we need to update the last index since free indices are delta-encoded
+ if (fea == 15)
+ encodeIndex(data, a, last), last = a;
+
+ if (feb == 15)
+ encodeIndex(data, b, last), last = b;
+
+ if (fec == 15)
+ encodeIndex(data, c, last), last = c;
+
+ // only push vertices that weren't already in fifo
+ if (fea == 0 || fea == 15)
+ pushVertexFifo(vertexfifo, a, vertexfifooffset);
+
+ if (feb == 0 || feb == 15)
+ pushVertexFifo(vertexfifo, b, vertexfifooffset);
+
+ if (fec == 0 || fec == 15)
+ pushVertexFifo(vertexfifo, c, vertexfifooffset);
+
+ // all three edges aren't in the fifo; pushing all of them is important so that we can match them for later triangles
+ pushEdgeFifo(edgefifo, b, a, edgefifooffset);
+ pushEdgeFifo(edgefifo, c, b, edgefifooffset);
+ pushEdgeFifo(edgefifo, a, c, edgefifooffset);
+ }
+ }
+
+ // make sure we have enough space to write codeaux table
+ if (data > data_safe_end)
+ return 0;
+
+ // add codeaux encoding table to the end of the stream; this is used for decoding codeaux *and* as padding
+ // we need padding for decoding to be able to assume that each triangle is encoded as <= 16 bytes of extra data
+ // this is enough space for aux byte + 5 bytes per varint index which is the absolute worst case for any input
+ for (size_t i = 0; i < 16; ++i)
+ {
+ // decoder assumes that table entries never refer to separately encoded indices
+ assert((codeaux_table[i] & 0xf) != 0xf && (codeaux_table[i] >> 4) != 0xf);
+
+ *data++ = codeaux_table[i];
+ }
+
+ // since we encode restarts as codeaux without a table reference, we need to make sure 00 is encoded as a table reference
+ assert(codeaux_table[0] == 0);
+
+ assert(data >= buffer + index_count / 3 + 16);
+ assert(data <= buffer + buffer_size);
+
+#if TRACE
+ unsigned char codetop[16], codeauxtop[16];
+ size_t codetopsize = sortTop16(codetop, codestats);
+ size_t codeauxtopsize = sortTop16(codeauxtop, codeauxstats);
+
+ size_t sumcode = 0, sumcodeaux = 0;
+ for (size_t i = 0; i < 256; ++i)
+ sumcode += codestats[i], sumcodeaux += codeauxstats[i];
+
+ size_t acccode = 0, acccodeaux = 0;
+
+ printf("code\t\t\t\t\tcodeaux\n");
+
+ for (size_t i = 0; i < codetopsize && i < codeauxtopsize; ++i)
+ {
+ acccode += codestats[codetop[i]];
+ acccodeaux += codeauxstats[codeauxtop[i]];
+
+ printf("%2d: %02x = %d (%.1f%% ..%.1f%%)\t\t%2d: %02x = %d (%.1f%% ..%.1f%%)\n",
+ int(i), codetop[i], int(codestats[codetop[i]]), double(codestats[codetop[i]]) / double(sumcode) * 100, double(acccode) / double(sumcode) * 100,
+ int(i), codeauxtop[i], int(codeauxstats[codeauxtop[i]]), double(codeauxstats[codeauxtop[i]]) / double(sumcodeaux) * 100, double(acccodeaux) / double(sumcodeaux) * 100);
+ }
+#endif
+
+ return data - buffer;
+}
+
+size_t meshopt_encodeIndexBufferBound(size_t index_count, size_t vertex_count)
+{
+ assert(index_count % 3 == 0);
+
+ // compute number of bits required for each index
+ unsigned int vertex_bits = 1;
+
+ while (vertex_bits < 32 && vertex_count > size_t(1) << vertex_bits)
+ vertex_bits++;
+
+ // worst-case encoding is 2 header bytes + 3 varint-7 encoded index deltas
+ unsigned int vertex_groups = (vertex_bits + 1 + 6) / 7;
+
+ return 1 + (index_count / 3) * (2 + 3 * vertex_groups) + 16;
+}
+
+void meshopt_encodeIndexVersion(int version)
+{
+ assert(unsigned(version) <= 1);
+
+ meshopt::gEncodeIndexVersion = version;
+}
+
+int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size)
+{
+ using namespace meshopt;
+
+ assert(index_count % 3 == 0);
+ assert(index_size == 2 || index_size == 4);
+
+ // the minimum valid encoding is header, 1 byte per triangle and a 16-byte codeaux table
+ if (buffer_size < 1 + index_count / 3 + 16)
+ return -2;
+
+ if ((buffer[0] & 0xf0) != kIndexHeader)
+ return -1;
+
+ int version = buffer[0] & 0x0f;
+ if (version > 1)
+ return -1;
+
+ EdgeFifo edgefifo;
+ memset(edgefifo, -1, sizeof(edgefifo));
+
+ VertexFifo vertexfifo;
+ memset(vertexfifo, -1, sizeof(vertexfifo));
+
+ size_t edgefifooffset = 0;
+ size_t vertexfifooffset = 0;
+
+ unsigned int next = 0;
+ unsigned int last = 0;
+
+ int fecmax = version >= 1 ? 13 : 15;
+
+ // since we store 16-byte codeaux table at the end, triangle data has to begin before data_safe_end
+ const unsigned char* code = buffer + 1;
+ const unsigned char* data = code + index_count / 3;
+ const unsigned char* data_safe_end = buffer + buffer_size - 16;
+
+ const unsigned char* codeaux_table = data_safe_end;
+
+ for (size_t i = 0; i < index_count; i += 3)
+ {
+ // make sure we have enough data to read for a triangle
+ // each triangle reads at most 16 bytes of data: 1b for codeaux and 5b for each free index
+ // after this we can be sure we can read without extra bounds checks
+ if (data > data_safe_end)
+ return -2;
+
+ unsigned char codetri = *code++;
+
+ if (codetri < 0xf0)
+ {
+ int fe = codetri >> 4;
+
+ // fifo reads are wrapped around 16 entry buffer
+ unsigned int a = edgefifo[(edgefifooffset - 1 - fe) & 15][0];
+ unsigned int b = edgefifo[(edgefifooffset - 1 - fe) & 15][1];
+
+ int fec = codetri & 15;
+
+ // note: this is the most common path in the entire decoder
+ // inside this if we try to stay branchless (by using cmov/etc.) since these aren't predictable
+ if (fec < fecmax)
+ {
+ // fifo reads are wrapped around 16 entry buffer
+ unsigned int cf = vertexfifo[(vertexfifooffset - 1 - fec) & 15];
+ unsigned int c = (fec == 0) ? next : cf;
+
+ int fec0 = fec == 0;
+ next += fec0;
+
+ // output triangle
+ writeTriangle(destination, i, index_size, a, b, c);
+
+ // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
+ pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0);
+
+ pushEdgeFifo(edgefifo, c, b, edgefifooffset);
+ pushEdgeFifo(edgefifo, a, c, edgefifooffset);
+ }
+ else
+ {
+ unsigned int c = 0;
+
+ // fec - (fec ^ 3) decodes 13, 14 into -1, 1
+ // note that we need to update the last index since free indices are delta-encoded
+ last = c = (fec != 15) ? last + (fec - (fec ^ 3)) : decodeIndex(data, last);
+
+ // output triangle
+ writeTriangle(destination, i, index_size, a, b, c);
+
+ // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
+ pushVertexFifo(vertexfifo, c, vertexfifooffset);
+
+ pushEdgeFifo(edgefifo, c, b, edgefifooffset);
+ pushEdgeFifo(edgefifo, a, c, edgefifooffset);
+ }
+ }
+ else
+ {
+ // fast path: read codeaux from the table
+ if (codetri < 0xfe)
+ {
+ unsigned char codeaux = codeaux_table[codetri & 15];
+
+ // note: table can't contain feb/fec=15
+ int feb = codeaux >> 4;
+ int fec = codeaux & 15;
+
+ // fifo reads are wrapped around 16 entry buffer
+ // also note that we increment next for all three vertices before decoding indices - this matches encoder behavior
+ unsigned int a = next++;
+
+ unsigned int bf = vertexfifo[(vertexfifooffset - feb) & 15];
+ unsigned int b = (feb == 0) ? next : bf;
+
+ int feb0 = feb == 0;
+ next += feb0;
+
+ unsigned int cf = vertexfifo[(vertexfifooffset - fec) & 15];
+ unsigned int c = (fec == 0) ? next : cf;
+
+ int fec0 = fec == 0;
+ next += fec0;
+
+ // output triangle
+ writeTriangle(destination, i, index_size, a, b, c);
+
+ // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
+ pushVertexFifo(vertexfifo, a, vertexfifooffset);
+ pushVertexFifo(vertexfifo, b, vertexfifooffset, feb0);
+ pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0);
+
+ pushEdgeFifo(edgefifo, b, a, edgefifooffset);
+ pushEdgeFifo(edgefifo, c, b, edgefifooffset);
+ pushEdgeFifo(edgefifo, a, c, edgefifooffset);
+ }
+ else
+ {
+ // slow path: read a full byte for codeaux instead of using a table lookup
+ unsigned char codeaux = *data++;
+
+ int fea = codetri == 0xfe ? 0 : 15;
+ int feb = codeaux >> 4;
+ int fec = codeaux & 15;
+
+ // reset: codeaux is 0 but encoded as not-a-table
+ if (codeaux == 0)
+ next = 0;
+
+ // fifo reads are wrapped around 16 entry buffer
+ // also note that we increment next for all three vertices before decoding indices - this matches encoder behavior
+ unsigned int a = (fea == 0) ? next++ : 0;
+ unsigned int b = (feb == 0) ? next++ : vertexfifo[(vertexfifooffset - feb) & 15];
+ unsigned int c = (fec == 0) ? next++ : vertexfifo[(vertexfifooffset - fec) & 15];
+
+ // note that we need to update the last index since free indices are delta-encoded
+ if (fea == 15)
+ last = a = decodeIndex(data, last);
+
+ if (feb == 15)
+ last = b = decodeIndex(data, last);
+
+ if (fec == 15)
+ last = c = decodeIndex(data, last);
+
+ // output triangle
+ writeTriangle(destination, i, index_size, a, b, c);
+
+ // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
+ pushVertexFifo(vertexfifo, a, vertexfifooffset);
+ pushVertexFifo(vertexfifo, b, vertexfifooffset, (feb == 0) | (feb == 15));
+ pushVertexFifo(vertexfifo, c, vertexfifooffset, (fec == 0) | (fec == 15));
+
+ pushEdgeFifo(edgefifo, b, a, edgefifooffset);
+ pushEdgeFifo(edgefifo, c, b, edgefifooffset);
+ pushEdgeFifo(edgefifo, a, c, edgefifooffset);
+ }
+ }
+ }
+
+ // we should've read all data bytes and stopped at the boundary between data and codeaux table
+ if (data != data_safe_end)
+ return -3;
+
+ return 0;
+}
+
+size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count)
+{
+ using namespace meshopt;
+
+ // the minimum valid encoding is header, 1 byte per index and a 4-byte tail
+ if (buffer_size < 1 + index_count + 4)
+ return 0;
+
+ int version = gEncodeIndexVersion;
+
+ buffer[0] = (unsigned char)(kSequenceHeader | version);
+
+ unsigned int last[2] = {};
+ unsigned int current = 0;
+
+ unsigned char* data = buffer + 1;
+ unsigned char* data_safe_end = buffer + buffer_size - 4;
+
+ for (size_t i = 0; i < index_count; ++i)
+ {
+ // make sure we have enough data to write
+ // each index writes at most 5 bytes of data; there's a 4 byte tail after data_safe_end
+ // after this we can be sure we can write without extra bounds checks
+ if (data >= data_safe_end)
+ return 0;
+
+ unsigned int index = indices[i];
+
+ // this is a heuristic that switches between baselines when the delta grows too large
+ // we want the encoded delta to fit into one byte (7 bits), but 2 bits are used for sign and baseline index
+ // for now we immediately switch the baseline when delta grows too large - this can be adjusted arbitrarily
+ int cd = int(index - last[current]);
+ current ^= ((cd < 0 ? -cd : cd) >= 30);
+
+ // encode delta from the last index
+ unsigned int d = index - last[current];
+ unsigned int v = (d << 1) ^ (int(d) >> 31);
+
+ // note: low bit encodes the index of the last baseline which will be used for reconstruction
+ encodeVByte(data, (v << 1) | current);
+
+ // update last for the next iteration that uses it
+ last[current] = index;
+ }
+
+ // make sure we have enough space to write tail
+ if (data > data_safe_end)
+ return 0;
+
+ for (int k = 0; k < 4; ++k)
+ *data++ = 0;
+
+ return data - buffer;
+}
+
+size_t meshopt_encodeIndexSequenceBound(size_t index_count, size_t vertex_count)
+{
+ // compute number of bits required for each index
+ unsigned int vertex_bits = 1;
+
+ while (vertex_bits < 32 && vertex_count > size_t(1) << vertex_bits)
+ vertex_bits++;
+
+ // worst-case encoding is 1 varint-7 encoded index delta for a K bit value and an extra bit
+ unsigned int vertex_groups = (vertex_bits + 1 + 1 + 6) / 7;
+
+ return 1 + index_count * vertex_groups + 4;
+}
+
+int meshopt_decodeIndexSequence(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size)
+{
+ using namespace meshopt;
+
+ // the minimum valid encoding is header, 1 byte per index and a 4-byte tail
+ if (buffer_size < 1 + index_count + 4)
+ return -2;
+
+ if ((buffer[0] & 0xf0) != kSequenceHeader)
+ return -1;
+
+ int version = buffer[0] & 0x0f;
+ if (version > 1)
+ return -1;
+
+ const unsigned char* data = buffer + 1;
+ const unsigned char* data_safe_end = buffer + buffer_size - 4;
+
+ unsigned int last[2] = {};
+
+ for (size_t i = 0; i < index_count; ++i)
+ {
+ // make sure we have enough data to read
+ // each index reads at most 5 bytes of data; there's a 4 byte tail after data_safe_end
+ // after this we can be sure we can read without extra bounds checks
+ if (data >= data_safe_end)
+ return -2;
+
+ unsigned int v = decodeVByte(data);
+
+ // decode the index of the last baseline
+ unsigned int current = v & 1;
+ v >>= 1;
+
+ // reconstruct index as a delta
+ unsigned int d = (v >> 1) ^ -int(v & 1);
+ unsigned int index = last[current] + d;
+
+ // update last for the next iteration that uses it
+ last[current] = index;
+
+ if (index_size == 2)
+ {
+ static_cast<unsigned short*>(destination)[i] = (unsigned short)(index);
+ }
+ else
+ {
+ static_cast<unsigned int*>(destination)[i] = index;
+ }
+ }
+
+ // we should've read all data bytes and stopped at the boundary between data and tail
+ if (data != data_safe_end)
+ return -3;
+
+ return 0;
+}
diff --git a/thirdparty/meshoptimizer/indexgenerator.cpp b/thirdparty/meshoptimizer/indexgenerator.cpp
new file mode 100644
index 0000000000..aa4a30efa4
--- /dev/null
+++ b/thirdparty/meshoptimizer/indexgenerator.cpp
@@ -0,0 +1,347 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+#include <assert.h>
+#include <string.h>
+
+namespace meshopt
+{
+
+static unsigned int hashUpdate4(unsigned int h, const unsigned char* key, size_t len)
+{
+ // MurmurHash2
+ const unsigned int m = 0x5bd1e995;
+ const int r = 24;
+
+ while (len >= 4)
+ {
+ unsigned int k = *reinterpret_cast<const unsigned int*>(key);
+
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+
+ h *= m;
+ h ^= k;
+
+ key += 4;
+ len -= 4;
+ }
+
+ return h;
+}
+
+struct VertexHasher
+{
+ const unsigned char* vertices;
+ size_t vertex_size;
+ size_t vertex_stride;
+
+ size_t hash(unsigned int index) const
+ {
+ return hashUpdate4(0, vertices + index * vertex_stride, vertex_size);
+ }
+
+ bool equal(unsigned int lhs, unsigned int rhs) const
+ {
+ return memcmp(vertices + lhs * vertex_stride, vertices + rhs * vertex_stride, vertex_size) == 0;
+ }
+};
+
+struct VertexStreamHasher
+{
+ const meshopt_Stream* streams;
+ size_t stream_count;
+
+ size_t hash(unsigned int index) const
+ {
+ unsigned int h = 0;
+
+ for (size_t i = 0; i < stream_count; ++i)
+ {
+ const meshopt_Stream& s = streams[i];
+ const unsigned char* data = static_cast<const unsigned char*>(s.data);
+
+ h = hashUpdate4(h, data + index * s.stride, s.size);
+ }
+
+ return h;
+ }
+
+ bool equal(unsigned int lhs, unsigned int rhs) const
+ {
+ for (size_t i = 0; i < stream_count; ++i)
+ {
+ const meshopt_Stream& s = streams[i];
+ const unsigned char* data = static_cast<const unsigned char*>(s.data);
+
+ if (memcmp(data + lhs * s.stride, data + rhs * s.stride, s.size) != 0)
+ return false;
+ }
+
+ return true;
+ }
+};
+
+static size_t hashBuckets(size_t count)
+{
+ size_t buckets = 1;
+ while (buckets < count)
+ buckets *= 2;
+
+ return buckets;
+}
+
+template <typename T, typename Hash>
+static T* hashLookup(T* table, size_t buckets, const Hash& hash, const T& key, const T& empty)
+{
+ assert(buckets > 0);
+ assert((buckets & (buckets - 1)) == 0);
+
+ size_t hashmod = buckets - 1;
+ size_t bucket = hash.hash(key) & hashmod;
+
+ for (size_t probe = 0; probe <= hashmod; ++probe)
+ {
+ T& item = table[bucket];
+
+ if (item == empty)
+ return &item;
+
+ if (hash.equal(item, key))
+ return &item;
+
+ // hash collision, quadratic probing
+ bucket = (bucket + probe + 1) & hashmod;
+ }
+
+ assert(false && "Hash table is full"); // unreachable
+ return 0;
+}
+
+} // namespace meshopt
+
+size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size)
+{
+ using namespace meshopt;
+
+ assert(indices || index_count == vertex_count);
+ assert(index_count % 3 == 0);
+ assert(vertex_size > 0 && vertex_size <= 256);
+
+ meshopt_Allocator allocator;
+
+ memset(destination, -1, vertex_count * sizeof(unsigned int));
+
+ VertexHasher hasher = {static_cast<const unsigned char*>(vertices), vertex_size, vertex_size};
+
+ size_t table_size = hashBuckets(vertex_count);
+ unsigned int* table = allocator.allocate<unsigned int>(table_size);
+ memset(table, -1, table_size * sizeof(unsigned int));
+
+ unsigned int next_vertex = 0;
+
+ for (size_t i = 0; i < index_count; ++i)
+ {
+ unsigned int index = indices ? indices[i] : unsigned(i);
+ assert(index < vertex_count);
+
+ if (destination[index] == ~0u)
+ {
+ unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u);
+
+ if (*entry == ~0u)
+ {
+ *entry = index;
+
+ destination[index] = next_vertex++;
+ }
+ else
+ {
+ assert(destination[*entry] != ~0u);
+
+ destination[index] = destination[*entry];
+ }
+ }
+ }
+
+ assert(next_vertex <= vertex_count);
+
+ return next_vertex;
+}
+
+size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count)
+{
+ using namespace meshopt;
+
+ assert(indices || index_count == vertex_count);
+ assert(index_count % 3 == 0);
+ assert(stream_count > 0 && stream_count <= 16);
+
+ for (size_t i = 0; i < stream_count; ++i)
+ {
+ assert(streams[i].size > 0 && streams[i].size <= 256);
+ assert(streams[i].size <= streams[i].stride);
+ }
+
+ meshopt_Allocator allocator;
+
+ memset(destination, -1, vertex_count * sizeof(unsigned int));
+
+ VertexStreamHasher hasher = {streams, stream_count};
+
+ size_t table_size = hashBuckets(vertex_count);
+ unsigned int* table = allocator.allocate<unsigned int>(table_size);
+ memset(table, -1, table_size * sizeof(unsigned int));
+
+ unsigned int next_vertex = 0;
+
+ for (size_t i = 0; i < index_count; ++i)
+ {
+ unsigned int index = indices ? indices[i] : unsigned(i);
+ assert(index < vertex_count);
+
+ if (destination[index] == ~0u)
+ {
+ unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u);
+
+ if (*entry == ~0u)
+ {
+ *entry = index;
+
+ destination[index] = next_vertex++;
+ }
+ else
+ {
+ assert(destination[*entry] != ~0u);
+
+ destination[index] = destination[*entry];
+ }
+ }
+ }
+
+ assert(next_vertex <= vertex_count);
+
+ return next_vertex;
+}
+
+void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap)
+{
+ assert(vertex_size > 0 && vertex_size <= 256);
+
+ meshopt_Allocator allocator;
+
+ // support in-place remap
+ if (destination == vertices)
+ {
+ unsigned char* vertices_copy = allocator.allocate<unsigned char>(vertex_count * vertex_size);
+ memcpy(vertices_copy, vertices, vertex_count * vertex_size);
+ vertices = vertices_copy;
+ }
+
+ for (size_t i = 0; i < vertex_count; ++i)
+ {
+ if (remap[i] != ~0u)
+ {
+ assert(remap[i] < vertex_count);
+
+ memcpy(static_cast<unsigned char*>(destination) + remap[i] * vertex_size, static_cast<const unsigned char*>(vertices) + i * vertex_size, vertex_size);
+ }
+ }
+}
+
+void meshopt_remapIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const unsigned int* remap)
+{
+ assert(index_count % 3 == 0);
+
+ for (size_t i = 0; i < index_count; ++i)
+ {
+ unsigned int index = indices ? indices[i] : unsigned(i);
+ assert(remap[index] != ~0u);
+
+ destination[i] = remap[index];
+ }
+}
+
+void meshopt_generateShadowIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride)
+{
+ using namespace meshopt;
+
+ assert(indices);
+ assert(index_count % 3 == 0);
+ assert(vertex_size > 0 && vertex_size <= 256);
+ assert(vertex_size <= vertex_stride);
+
+ meshopt_Allocator allocator;
+
+ unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
+ memset(remap, -1, vertex_count * sizeof(unsigned int));
+
+ VertexHasher hasher = {static_cast<const unsigned char*>(vertices), vertex_size, vertex_stride};
+
+ size_t table_size = hashBuckets(vertex_count);
+ unsigned int* table = allocator.allocate<unsigned int>(table_size);
+ memset(table, -1, table_size * sizeof(unsigned int));
+
+ for (size_t i = 0; i < index_count; ++i)
+ {
+ unsigned int index = indices[i];
+ assert(index < vertex_count);
+
+ if (remap[index] == ~0u)
+ {
+ unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u);
+
+ if (*entry == ~0u)
+ *entry = index;
+
+ remap[index] = *entry;
+ }
+
+ destination[i] = remap[index];
+ }
+}
+
+void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count)
+{
+ using namespace meshopt;
+
+ assert(indices);
+ assert(index_count % 3 == 0);
+ assert(stream_count > 0 && stream_count <= 16);
+
+ for (size_t i = 0; i < stream_count; ++i)
+ {
+ assert(streams[i].size > 0 && streams[i].size <= 256);
+ assert(streams[i].size <= streams[i].stride);
+ }
+
+ meshopt_Allocator allocator;
+
+ unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
+ memset(remap, -1, vertex_count * sizeof(unsigned int));
+
+ VertexStreamHasher hasher = {streams, stream_count};
+
+ size_t table_size = hashBuckets(vertex_count);
+ unsigned int* table = allocator.allocate<unsigned int>(table_size);
+ memset(table, -1, table_size * sizeof(unsigned int));
+
+ for (size_t i = 0; i < index_count; ++i)
+ {
+ unsigned int index = indices[i];
+ assert(index < vertex_count);
+
+ if (remap[index] == ~0u)
+ {
+ unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u);
+
+ if (*entry == ~0u)
+ *entry = index;
+
+ remap[index] = *entry;
+ }
+
+ destination[i] = remap[index];
+ }
+}
diff --git a/thirdparty/meshoptimizer/meshoptimizer.h b/thirdparty/meshoptimizer/meshoptimizer.h
new file mode 100644
index 0000000000..fde00f9c82
--- /dev/null
+++ b/thirdparty/meshoptimizer/meshoptimizer.h
@@ -0,0 +1,951 @@
+/**
+ * meshoptimizer - version 0.15
+ *
+ * Copyright (C) 2016-2020, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
+ * Report bugs and download new versions at https://github.com/zeux/meshoptimizer
+ *
+ * This library is distributed under the MIT License. See notice at the end of this file.
+ */
+#pragma once
+
+#include <assert.h>
+#include <stddef.h>
+
+/* Version macro; major * 1000 + minor * 10 + patch */
+#define MESHOPTIMIZER_VERSION 150 /* 0.15 */
+
+/* If no API is defined, assume default */
+#ifndef MESHOPTIMIZER_API
+#define MESHOPTIMIZER_API
+#endif
+
+/* Experimental APIs have unstable interface and might have implementation that's not fully tested or optimized */
+#define MESHOPTIMIZER_EXPERIMENTAL MESHOPTIMIZER_API
+
+/* C interface */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Vertex attribute stream, similar to glVertexPointer
+ * Each element takes size bytes, with stride controlling the spacing between successive elements.
+ */
+struct meshopt_Stream
+{
+ const void* data;
+ size_t size;
+ size_t stride;
+};
+
+/**
+ * Generates a vertex remap table from the vertex buffer and an optional index buffer and returns number of unique vertices
+ * As a result, all vertices that are binary equivalent map to the same (new) location, with no gaps in the resulting sequence.
+ * Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer/meshopt_remapIndexBuffer.
+ * Note that binary equivalence considers all vertex_size bytes, including padding which should be zero-initialized.
+ *
+ * destination must contain enough space for the resulting remap table (vertex_count elements)
+ * indices can be NULL if the input is unindexed
+ */
+MESHOPTIMIZER_API size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size);
+
+/**
+ * Generates a vertex remap table from multiple vertex streams and an optional index buffer and returns number of unique vertices
+ * As a result, all vertices that are binary equivalent map to the same (new) location, with no gaps in the resulting sequence.
+ * Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer/meshopt_remapIndexBuffer.
+ * To remap vertex buffers, you will need to call meshopt_remapVertexBuffer for each vertex stream.
+ * Note that binary equivalence considers all size bytes in each stream, including padding which should be zero-initialized.
+ *
+ * destination must contain enough space for the resulting remap table (vertex_count elements)
+ * indices can be NULL if the input is unindexed
+ */
+MESHOPTIMIZER_API size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count);
+
+/**
+ * Generates vertex buffer from the source vertex buffer and remap table generated by meshopt_generateVertexRemap
+ *
+ * destination must contain enough space for the resulting vertex buffer (unique_vertex_count elements, returned by meshopt_generateVertexRemap)
+ * vertex_count should be the initial vertex count and not the value returned by meshopt_generateVertexRemap
+ */
+MESHOPTIMIZER_API void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap);
+
+/**
+ * Generate index buffer from the source index buffer and remap table generated by meshopt_generateVertexRemap
+ *
+ * destination must contain enough space for the resulting index buffer (index_count elements)
+ * indices can be NULL if the input is unindexed
+ */
+MESHOPTIMIZER_API void meshopt_remapIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const unsigned int* remap);
+
+/**
+ * Generate index buffer that can be used for more efficient rendering when only a subset of the vertex attributes is necessary
+ * All vertices that are binary equivalent (wrt first vertex_size bytes) map to the first vertex in the original vertex buffer.
+ * This makes it possible to use the index buffer for Z pre-pass or shadowmap rendering, while using the original index buffer for regular rendering.
+ * Note that binary equivalence considers all vertex_size bytes, including padding which should be zero-initialized.
+ *
+ * destination must contain enough space for the resulting index buffer (index_count elements)
+ */
+MESHOPTIMIZER_API void meshopt_generateShadowIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride);
+
+/**
+ * Generate index buffer that can be used for more efficient rendering when only a subset of the vertex attributes is necessary
+ * All vertices that are binary equivalent (wrt specified streams) map to the first vertex in the original vertex buffer.
+ * This makes it possible to use the index buffer for Z pre-pass or shadowmap rendering, while using the original index buffer for regular rendering.
+ * Note that binary equivalence considers all size bytes in each stream, including padding which should be zero-initialized.
+ *
+ * destination must contain enough space for the resulting index buffer (index_count elements)
+ */
+MESHOPTIMIZER_API void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count);
+
+/**
+ * Vertex transform cache optimizer
+ * Reorders indices to reduce the number of GPU vertex shader invocations
+ * If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually.
+ *
+ * destination must contain enough space for the resulting index buffer (index_count elements)
+ */
+MESHOPTIMIZER_API void meshopt_optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count);
+
+/**
+ * Vertex transform cache optimizer for strip-like caches
+ * Produces inferior results to meshopt_optimizeVertexCache from the GPU vertex cache perspective
+ * However, the resulting index order is more optimal if the goal is to reduce the triangle strip length or improve compression efficiency
+ *
+ * destination must contain enough space for the resulting index buffer (index_count elements)
+ */
+MESHOPTIMIZER_API void meshopt_optimizeVertexCacheStrip(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count);
+
+/**
+ * Vertex transform cache optimizer for FIFO caches
+ * Reorders indices to reduce the number of GPU vertex shader invocations
+ * Generally takes ~3x less time to optimize meshes but produces inferior results compared to meshopt_optimizeVertexCache
+ * If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually.
+ *
+ * destination must contain enough space for the resulting index buffer (index_count elements)
+ * cache_size should be less than the actual GPU cache size to avoid cache thrashing
+ */
+MESHOPTIMIZER_API void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size);
+
+/**
+ * Overdraw optimizer
+ * Reorders indices to reduce the number of GPU vertex shader invocations and the pixel overdraw
+ * If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually.
+ *
+ * destination must contain enough space for the resulting index buffer (index_count elements)
+ * indices must contain index data that is the result of meshopt_optimizeVertexCache (*not* the original mesh indices!)
+ * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
+ * threshold indicates how much the overdraw optimizer can degrade vertex cache efficiency (1.05 = up to 5%) to reduce overdraw more efficiently
+ */
+MESHOPTIMIZER_API void meshopt_optimizeOverdraw(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold);
+
+/**
+ * Vertex fetch cache optimizer
+ * Reorders vertices and changes indices to reduce the amount of GPU memory fetches during vertex processing
+ * Returns the number of unique vertices, which is the same as input vertex count unless some vertices are unused
+ * This functions works for a single vertex stream; for multiple vertex streams, use meshopt_optimizeVertexFetchRemap + meshopt_remapVertexBuffer for each stream.
+ *
+ * destination must contain enough space for the resulting vertex buffer (vertex_count elements)
+ * indices is used both as an input and as an output index buffer
+ */
+MESHOPTIMIZER_API size_t meshopt_optimizeVertexFetch(void* destination, unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size);
+
+/**
+ * Vertex fetch cache optimizer
+ * Generates vertex remap to reduce the amount of GPU memory fetches during vertex processing
+ * Returns the number of unique vertices, which is the same as input vertex count unless some vertices are unused
+ * The resulting remap table should be used to reorder vertex/index buffers using meshopt_remapVertexBuffer/meshopt_remapIndexBuffer
+ *
+ * destination must contain enough space for the resulting remap table (vertex_count elements)
+ */
+MESHOPTIMIZER_API size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count);
+
+/**
+ * Index buffer encoder
+ * Encodes index data into an array of bytes that is generally much smaller (<1.5 bytes/triangle) and compresses better (<1 bytes/triangle) compared to original.
+ * Input index buffer must represent a triangle list.
+ * Returns encoded data size on success, 0 on error; the only error condition is if buffer doesn't have enough space
+ * For maximum efficiency the index buffer being encoded has to be optimized for vertex cache and vertex fetch first.
+ *
+ * buffer must contain enough space for the encoded index buffer (use meshopt_encodeIndexBufferBound to compute worst case size)
+ */
+MESHOPTIMIZER_API size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count);
+MESHOPTIMIZER_API size_t meshopt_encodeIndexBufferBound(size_t index_count, size_t vertex_count);
+
+/**
+ * Experimental: Set index encoder format version
+ * version must specify the data format version to encode; valid values are 0 (decodable by all library versions) and 1 (decodable by 0.14+)
+ */
+MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeIndexVersion(int version);
+
+/**
+ * Index buffer decoder
+ * Decodes index data from an array of bytes generated by meshopt_encodeIndexBuffer
+ * Returns 0 if decoding was successful, and an error code otherwise
+ * The decoder is safe to use for untrusted input, but it may produce garbage data (e.g. out of range indices).
+ *
+ * destination must contain enough space for the resulting index buffer (index_count elements)
+ */
+MESHOPTIMIZER_API int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size);
+
+/**
+ * Experimental: Index sequence encoder
+ * Encodes index sequence into an array of bytes that is generally smaller and compresses better compared to original.
+ * Input index sequence can represent arbitrary topology; for triangle lists meshopt_encodeIndexBuffer is likely to be better.
+ * Returns encoded data size on success, 0 on error; the only error condition is if buffer doesn't have enough space
+ *
+ * buffer must contain enough space for the encoded index sequence (use meshopt_encodeIndexSequenceBound to compute worst case size)
+ */
+MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count);
+MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_encodeIndexSequenceBound(size_t index_count, size_t vertex_count);
+
+/**
+ * Index sequence decoder
+ * Decodes index data from an array of bytes generated by meshopt_encodeIndexSequence
+ * Returns 0 if decoding was successful, and an error code otherwise
+ * The decoder is safe to use for untrusted input, but it may produce garbage data (e.g. out of range indices).
+ *
+ * destination must contain enough space for the resulting index sequence (index_count elements)
+ */
+MESHOPTIMIZER_EXPERIMENTAL int meshopt_decodeIndexSequence(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size);
+
+/**
+ * Vertex buffer encoder
+ * Encodes vertex data into an array of bytes that is generally smaller and compresses better compared to original.
+ * Returns encoded data size on success, 0 on error; the only error condition is if buffer doesn't have enough space
+ * This function works for a single vertex stream; for multiple vertex streams, call meshopt_encodeVertexBuffer for each stream.
+ * Note that all vertex_size bytes of each vertex are encoded verbatim, including padding which should be zero-initialized.
+ *
+ * buffer must contain enough space for the encoded vertex buffer (use meshopt_encodeVertexBufferBound to compute worst case size)
+ */
+MESHOPTIMIZER_API size_t meshopt_encodeVertexBuffer(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size);
+MESHOPTIMIZER_API size_t meshopt_encodeVertexBufferBound(size_t vertex_count, size_t vertex_size);
+
+/**
+ * Experimental: Set vertex encoder format version
+ * version must specify the data format version to encode; valid values are 0 (decodable by all library versions)
+ */
+MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeVertexVersion(int version);
+
+/**
+ * Vertex buffer decoder
+ * Decodes vertex data from an array of bytes generated by meshopt_encodeVertexBuffer
+ * Returns 0 if decoding was successful, and an error code otherwise
+ * The decoder is safe to use for untrusted input, but it may produce garbage data.
+ *
+ * destination must contain enough space for the resulting vertex buffer (vertex_count * vertex_size bytes)
+ */
+MESHOPTIMIZER_API int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t vertex_size, const unsigned char* buffer, size_t buffer_size);
+
+/**
+ * Vertex buffer filters
+ * These functions can be used to filter output of meshopt_decodeVertexBuffer in-place.
+ * count must be aligned by 4 and stride is fixed for each function to facilitate SIMD implementation.
+ *
+ * meshopt_decodeFilterOct decodes octahedral encoding of a unit vector with K-bit (K <= 16) signed X/Y as an input; Z must store 1.0f.
+ * Each component is stored as an 8-bit or 16-bit normalized integer; stride must be equal to 4 or 8. W is preserved as is.
+ *
+ * meshopt_decodeFilterQuat decodes 3-component quaternion encoding with K-bit (4 <= K <= 16) component encoding and a 2-bit component index indicating which component to reconstruct.
+ * Each component is stored as an 16-bit integer; stride must be equal to 8.
+ *
+ * meshopt_decodeFilterExp decodes exponential encoding of floating-point data with 8-bit exponent and 24-bit integer mantissa as 2^E*M.
+ * Each 32-bit component is decoded in isolation; stride must be divisible by 4.
+ */
+MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterOct(void* buffer, size_t vertex_count, size_t vertex_size);
+MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterQuat(void* buffer, size_t vertex_count, size_t vertex_size);
+MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t vertex_count, size_t vertex_size);
+
+/**
+ * Experimental: Mesh simplifier
+ * Reduces the number of triangles in the mesh, attempting to preserve mesh appearance as much as possible
+ * The algorithm tries to preserve mesh topology and can stop short of the target goal based on topology constraints or target error.
+ * If not all attributes from the input mesh are required, it's recommended to reindex the mesh using meshopt_generateShadowIndexBuffer prior to simplification.
+ * Returns the number of indices after simplification, with destination containing new index data
+ * The resulting index buffer references vertices from the original vertex buffer.
+ * If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended.
+ *
+ * destination must contain enough space for the *source* index buffer (since optimization is iterative, this means index_count elements - *not* target_index_count!)
+ * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
+ */
+// -- GODOT start --
+//MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error);
+MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float *r_resulting_error);
+// -- GODOT end --
+
+/**
+ * Experimental: Mesh simplifier (sloppy)
+ * Reduces the number of triangles in the mesh, sacrificing mesh apperance for simplification performance
+ * The algorithm doesn't preserve mesh topology but is always able to reach target triangle count.
+ * Returns the number of indices after simplification, with destination containing new index data
+ * The resulting index buffer references vertices from the original vertex buffer.
+ * If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended.
+ *
+ * destination must contain enough space for the target index buffer
+ * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
+ */
+MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count);
+
+/**
+ * Experimental: Point cloud simplifier
+ * Reduces the number of points in the cloud to reach the given target
+ * Returns the number of points after simplification, with destination containing new index data
+ * The resulting index buffer references vertices from the original vertex buffer.
+ * If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended.
+ *
+ * destination must contain enough space for the target index buffer
+ * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
+ */
+MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_vertex_count);
+
+/**
+ * Mesh stripifier
+ * Converts a previously vertex cache optimized triangle list to triangle strip, stitching strips using restart index or degenerate triangles
+ * Returns the number of indices in the resulting strip, with destination containing new index data
+ * For maximum efficiency the index buffer being converted has to be optimized for vertex cache first.
+ * Using restart indices can result in ~10% smaller index buffers, but on some GPUs restart indices may result in decreased performance.
+ *
+ * destination must contain enough space for the target index buffer, worst case can be computed with meshopt_stripifyBound
+ * restart_index should be 0xffff or 0xffffffff depending on index size, or 0 to use degenerate triangles
+ */
+MESHOPTIMIZER_API size_t meshopt_stripify(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int restart_index);
+MESHOPTIMIZER_API size_t meshopt_stripifyBound(size_t index_count);
+
+/**
+ * Mesh unstripifier
+ * Converts a triangle strip to a triangle list
+ * Returns the number of indices in the resulting list, with destination containing new index data
+ *
+ * destination must contain enough space for the target index buffer, worst case can be computed with meshopt_unstripifyBound
+ */
+MESHOPTIMIZER_API size_t meshopt_unstripify(unsigned int* destination, const unsigned int* indices, size_t index_count, unsigned int restart_index);
+MESHOPTIMIZER_API size_t meshopt_unstripifyBound(size_t index_count);
+
+struct meshopt_VertexCacheStatistics
+{
+ unsigned int vertices_transformed;
+ unsigned int warps_executed;
+ float acmr; /* transformed vertices / triangle count; best case 0.5, worst case 3.0, optimum depends on topology */
+ float atvr; /* transformed vertices / vertex count; best case 1.0, worst case 6.0, optimum is 1.0 (each vertex is transformed once) */
+};
+
+/**
+ * Vertex transform cache analyzer
+ * Returns cache hit statistics using a simplified FIFO model
+ * Results may not match actual GPU performance
+ */
+MESHOPTIMIZER_API struct meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int primgroup_size);
+
+struct meshopt_OverdrawStatistics
+{
+ unsigned int pixels_covered;
+ unsigned int pixels_shaded;
+ float overdraw; /* shaded pixels / covered pixels; best case 1.0 */
+};
+
+/**
+ * Overdraw analyzer
+ * Returns overdraw statistics using a software rasterizer
+ * Results may not match actual GPU performance
+ *
+ * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
+ */
+MESHOPTIMIZER_API struct meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+
+struct meshopt_VertexFetchStatistics
+{
+ unsigned int bytes_fetched;
+ float overfetch; /* fetched bytes / vertex buffer size; best case 1.0 (each byte is fetched once) */
+};
+
+/**
+ * Vertex fetch cache analyzer
+ * Returns cache hit statistics using a simplified direct mapped model
+ * Results may not match actual GPU performance
+ */
+MESHOPTIMIZER_API struct meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size);
+
+struct meshopt_Meshlet
+{
+ unsigned int vertices[64];
+ unsigned char indices[126][3];
+ unsigned char triangle_count;
+ unsigned char vertex_count;
+};
+
+/**
+ * Experimental: Meshlet builder
+ * Splits the mesh into a set of meshlets where each meshlet has a micro index buffer indexing into meshlet vertices that refer to the original vertex buffer
+ * The resulting data can be used to render meshes using NVidia programmable mesh shading pipeline, or in other cluster-based renderers.
+ * For maximum efficiency the index buffer being converted has to be optimized for vertex cache first.
+ *
+ * destination must contain enough space for all meshlets, worst case size can be computed with meshopt_buildMeshletsBound
+ * max_vertices and max_triangles can't exceed limits statically declared in meshopt_Meshlet (max_vertices <= 64, max_triangles <= 126)
+ */
+MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshlets(struct meshopt_Meshlet* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles);
+MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles);
+
+struct meshopt_Bounds
+{
+ /* bounding sphere, useful for frustum and occlusion culling */
+ float center[3];
+ float radius;
+
+ /* normal cone, useful for backface culling */
+ float cone_apex[3];
+ float cone_axis[3];
+ float cone_cutoff; /* = cos(angle/2) */
+
+ /* normal cone axis and cutoff, stored in 8-bit SNORM format; decode using x/127.0 */
+ signed char cone_axis_s8[3];
+ signed char cone_cutoff_s8;
+};
+
+/**
+ * Experimental: Cluster bounds generator
+ * Creates bounding volumes that can be used for frustum, backface and occlusion culling.
+ *
+ * For backface culling with orthographic projection, use the following formula to reject backfacing clusters:
+ * dot(view, cone_axis) >= cone_cutoff
+ *
+ * For perspective projection, you can the formula that needs cone apex in addition to axis & cutoff:
+ * dot(normalize(cone_apex - camera_position), cone_axis) >= cone_cutoff
+ *
+ * Alternatively, you can use the formula that doesn't need cone apex and uses bounding sphere instead:
+ * dot(normalize(center - camera_position), cone_axis) >= cone_cutoff + radius / length(center - camera_position)
+ * or an equivalent formula that doesn't have a singularity at center = camera_position:
+ * dot(center - camera_position, cone_axis) >= cone_cutoff * length(center - camera_position) + radius
+ *
+ * The formula that uses the apex is slightly more accurate but needs the apex; if you are already using bounding sphere
+ * to do frustum/occlusion culling, the formula that doesn't use the apex may be preferable.
+ *
+ * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
+ * index_count should be less than or equal to 256*3 (the function assumes clusters of limited size)
+ */
+MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeMeshletBounds(const struct meshopt_Meshlet* meshlet, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+
+/**
+ * Experimental: Spatial sorter
+ * Generates a remap table that can be used to reorder points for spatial locality.
+ * Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer.
+ *
+ * destination must contain enough space for the resulting remap table (vertex_count elements)
+ */
+MESHOPTIMIZER_EXPERIMENTAL void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+
+/**
+ * Experimental: Spatial sorter
+ * Reorders triangles for spatial locality, and generates a new index buffer. The resulting index buffer can be used with other functions like optimizeVertexCache.
+ *
+ * destination must contain enough space for the resulting index buffer (index_count elements)
+ * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
+ */
+MESHOPTIMIZER_EXPERIMENTAL void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+
+/**
+ * Set allocation callbacks
+ * These callbacks will be used instead of the default operator new/operator delete for all temporary allocations in the library.
+ * Note that all algorithms only allocate memory for temporary use.
+ * allocate/deallocate are always called in a stack-like order - last pointer to be allocated is deallocated first.
+ */
+MESHOPTIMIZER_API void meshopt_setAllocator(void* (*allocate)(size_t), void (*deallocate)(void*));
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+/* Quantization into commonly supported data formats */
+#ifdef __cplusplus
+/**
+ * Quantize a float in [0..1] range into an N-bit fixed point unorm value
+ * Assumes reconstruction function (q / (2^N-1)), which is the case for fixed-function normalized fixed point conversion
+ * Maximum reconstruction error: 1/2^(N+1)
+ */
+inline int meshopt_quantizeUnorm(float v, int N);
+
+/**
+ * Quantize a float in [-1..1] range into an N-bit fixed point snorm value
+ * Assumes reconstruction function (q / (2^(N-1)-1)), which is the case for fixed-function normalized fixed point conversion (except early OpenGL versions)
+ * Maximum reconstruction error: 1/2^N
+ */
+inline int meshopt_quantizeSnorm(float v, int N);
+
+/**
+ * Quantize a float into half-precision floating point value
+ * Generates +-inf for overflow, preserves NaN, flushes denormals to zero, rounds to nearest
+ * Representable magnitude range: [6e-5; 65504]
+ * Maximum relative reconstruction error: 5e-4
+ */
+inline unsigned short meshopt_quantizeHalf(float v);
+
+/**
+ * Quantize a float into a floating point value with a limited number of significant mantissa bits
+ * Generates +-inf for overflow, preserves NaN, flushes denormals to zero, rounds to nearest
+ * Assumes N is in a valid mantissa precision range, which is 1..23
+ */
+inline float meshopt_quantizeFloat(float v, int N);
+#endif
+
+/**
+ * C++ template interface
+ *
+ * These functions mirror the C interface the library provides, providing template-based overloads so that
+ * the caller can use an arbitrary type for the index data, both for input and output.
+ * When the supplied type is the same size as that of unsigned int, the wrappers are zero-cost; when it's not,
+ * the wrappers end up allocating memory and copying index data to convert from one type to another.
+ */
+#if defined(__cplusplus) && !defined(MESHOPTIMIZER_NO_WRAPPERS)
+template <typename T>
+inline size_t meshopt_generateVertexRemap(unsigned int* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size);
+template <typename T>
+inline size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count);
+template <typename T>
+inline void meshopt_remapIndexBuffer(T* destination, const T* indices, size_t index_count, const unsigned int* remap);
+template <typename T>
+inline void meshopt_generateShadowIndexBuffer(T* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride);
+template <typename T>
+inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count);
+template <typename T>
+inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count);
+template <typename T>
+inline void meshopt_optimizeVertexCacheStrip(T* destination, const T* indices, size_t index_count, size_t vertex_count);
+template <typename T>
+inline void meshopt_optimizeVertexCacheFifo(T* destination, const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size);
+template <typename T>
+inline void meshopt_optimizeOverdraw(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold);
+template <typename T>
+inline size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count);
+template <typename T>
+inline size_t meshopt_optimizeVertexFetch(void* destination, T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size);
+template <typename T>
+inline size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count);
+template <typename T>
+inline int meshopt_decodeIndexBuffer(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size);
+template <typename T>
+inline size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count);
+template <typename T>
+inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size);
+template <typename T>
+inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error);
+template <typename T>
+inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count);
+template <typename T>
+inline size_t meshopt_stripify(T* destination, const T* indices, size_t index_count, size_t vertex_count, T restart_index);
+template <typename T>
+inline size_t meshopt_unstripify(T* destination, const T* indices, size_t index_count, T restart_index);
+template <typename T>
+inline meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int buffer_size);
+template <typename T>
+inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+template <typename T>
+inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size);
+template <typename T>
+inline size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles);
+template <typename T>
+inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+template <typename T>
+inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+#endif
+
+/* Inline implementation */
+#ifdef __cplusplus
+inline int meshopt_quantizeUnorm(float v, int N)
+{
+ const float scale = float((1 << N) - 1);
+
+ v = (v >= 0) ? v : 0;
+ v = (v <= 1) ? v : 1;
+
+ return int(v * scale + 0.5f);
+}
+
+inline int meshopt_quantizeSnorm(float v, int N)
+{
+ const float scale = float((1 << (N - 1)) - 1);
+
+ float round = (v >= 0 ? 0.5f : -0.5f);
+
+ v = (v >= -1) ? v : -1;
+ v = (v <= +1) ? v : +1;
+
+ return int(v * scale + round);
+}
+
+inline unsigned short meshopt_quantizeHalf(float v)
+{
+ union { float f; unsigned int ui; } u = {v};
+ unsigned int ui = u.ui;
+
+ int s = (ui >> 16) & 0x8000;
+ int em = ui & 0x7fffffff;
+
+ /* bias exponent and round to nearest; 112 is relative exponent bias (127-15) */
+ int h = (em - (112 << 23) + (1 << 12)) >> 13;
+
+ /* underflow: flush to zero; 113 encodes exponent -14 */
+ h = (em < (113 << 23)) ? 0 : h;
+
+ /* overflow: infinity; 143 encodes exponent 16 */
+ h = (em >= (143 << 23)) ? 0x7c00 : h;
+
+ /* NaN; note that we convert all types of NaN to qNaN */
+ h = (em > (255 << 23)) ? 0x7e00 : h;
+
+ return (unsigned short)(s | h);
+}
+
+inline float meshopt_quantizeFloat(float v, int N)
+{
+ union { float f; unsigned int ui; } u = {v};
+ unsigned int ui = u.ui;
+
+ const int mask = (1 << (23 - N)) - 1;
+ const int round = (1 << (23 - N)) >> 1;
+
+ int e = ui & 0x7f800000;
+ unsigned int rui = (ui + round) & ~mask;
+
+ /* round all numbers except inf/nan; this is important to make sure nan doesn't overflow into -0 */
+ ui = e == 0x7f800000 ? ui : rui;
+
+ /* flush denormals to zero */
+ ui = e == 0 ? 0 : ui;
+
+ u.ui = ui;
+ return u.f;
+}
+#endif
+
+/* Internal implementation helpers */
+#ifdef __cplusplus
+class meshopt_Allocator
+{
+public:
+ template <typename T>
+ struct StorageT
+ {
+ static void* (*allocate)(size_t);
+ static void (*deallocate)(void*);
+ };
+
+ typedef StorageT<void> Storage;
+
+ meshopt_Allocator()
+ : blocks()
+ , count(0)
+ {
+ }
+
+ ~meshopt_Allocator()
+ {
+ for (size_t i = count; i > 0; --i)
+ Storage::deallocate(blocks[i - 1]);
+ }
+
+ template <typename T> T* allocate(size_t size)
+ {
+ assert(count < sizeof(blocks) / sizeof(blocks[0]));
+ T* result = static_cast<T*>(Storage::allocate(size > size_t(-1) / sizeof(T) ? size_t(-1) : size * sizeof(T)));
+ blocks[count++] = result;
+ return result;
+ }
+
+private:
+ void* blocks[24];
+ size_t count;
+};
+
+// This makes sure that allocate/deallocate are lazily generated in translation units that need them and are deduplicated by the linker
+template <typename T> void* (*meshopt_Allocator::StorageT<T>::allocate)(size_t) = operator new;
+template <typename T> void (*meshopt_Allocator::StorageT<T>::deallocate)(void*) = operator delete;
+#endif
+
+/* Inline implementation for C++ templated wrappers */
+#if defined(__cplusplus) && !defined(MESHOPTIMIZER_NO_WRAPPERS)
+template <typename T, bool ZeroCopy = sizeof(T) == sizeof(unsigned int)>
+struct meshopt_IndexAdapter;
+
+template <typename T>
+struct meshopt_IndexAdapter<T, false>
+{
+ T* result;
+ unsigned int* data;
+ size_t count;
+
+ meshopt_IndexAdapter(T* result_, const T* input, size_t count_)
+ : result(result_)
+ , data(0)
+ , count(count_)
+ {
+ size_t size = count > size_t(-1) / sizeof(unsigned int) ? size_t(-1) : count * sizeof(unsigned int);
+
+ data = static_cast<unsigned int*>(meshopt_Allocator::Storage::allocate(size));
+
+ if (input)
+ {
+ for (size_t i = 0; i < count; ++i)
+ data[i] = input[i];
+ }
+ }
+
+ ~meshopt_IndexAdapter()
+ {
+ if (result)
+ {
+ for (size_t i = 0; i < count; ++i)
+ result[i] = T(data[i]);
+ }
+
+ meshopt_Allocator::Storage::deallocate(data);
+ }
+};
+
+template <typename T>
+struct meshopt_IndexAdapter<T, true>
+{
+ unsigned int* data;
+
+ meshopt_IndexAdapter(T* result, const T* input, size_t)
+ : data(reinterpret_cast<unsigned int*>(result ? result : const_cast<T*>(input)))
+ {
+ }
+};
+
+template <typename T>
+inline size_t meshopt_generateVertexRemap(unsigned int* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size)
+{
+ meshopt_IndexAdapter<T> in(0, indices, indices ? index_count : 0);
+
+ return meshopt_generateVertexRemap(destination, indices ? in.data : 0, index_count, vertices, vertex_count, vertex_size);
+}
+
+template <typename T>
+inline size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count)
+{
+ meshopt_IndexAdapter<T> in(0, indices, indices ? index_count : 0);
+
+ return meshopt_generateVertexRemapMulti(destination, indices ? in.data : 0, index_count, vertex_count, streams, stream_count);
+}
+
+template <typename T>
+inline void meshopt_remapIndexBuffer(T* destination, const T* indices, size_t index_count, const unsigned int* remap)
+{
+ meshopt_IndexAdapter<T> in(0, indices, indices ? index_count : 0);
+ meshopt_IndexAdapter<T> out(destination, 0, index_count);
+
+ meshopt_remapIndexBuffer(out.data, indices ? in.data : 0, index_count, remap);
+}
+
+template <typename T>
+inline void meshopt_generateShadowIndexBuffer(T* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride)
+{
+ meshopt_IndexAdapter<T> in(0, indices, index_count);
+ meshopt_IndexAdapter<T> out(destination, 0, index_count);
+
+ meshopt_generateShadowIndexBuffer(out.data, in.data, index_count, vertices, vertex_count, vertex_size, vertex_stride);
+}
+
+template <typename T>
+inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count)
+{
+ meshopt_IndexAdapter<T> in(0, indices, index_count);
+ meshopt_IndexAdapter<T> out(destination, 0, index_count);
+
+ meshopt_generateShadowIndexBufferMulti(out.data, in.data, index_count, vertex_count, streams, stream_count);
+}
+
+template <typename T>
+inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count)
+{
+ meshopt_IndexAdapter<T> in(0, indices, index_count);
+ meshopt_IndexAdapter<T> out(destination, 0, index_count);
+
+ meshopt_optimizeVertexCache(out.data, in.data, index_count, vertex_count);
+}
+
+template <typename T>
+inline void meshopt_optimizeVertexCacheStrip(T* destination, const T* indices, size_t index_count, size_t vertex_count)
+{
+ meshopt_IndexAdapter<T> in(0, indices, index_count);
+ meshopt_IndexAdapter<T> out(destination, 0, index_count);
+
+ meshopt_optimizeVertexCacheStrip(out.data, in.data, index_count, vertex_count);
+}
+
+template <typename T>
+inline void meshopt_optimizeVertexCacheFifo(T* destination, const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size)
+{
+ meshopt_IndexAdapter<T> in(0, indices, index_count);
+ meshopt_IndexAdapter<T> out(destination, 0, index_count);
+
+ meshopt_optimizeVertexCacheFifo(out.data, in.data, index_count, vertex_count, cache_size);
+}
+
+template <typename T>
+inline void meshopt_optimizeOverdraw(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold)
+{
+ meshopt_IndexAdapter<T> in(0, indices, index_count);
+ meshopt_IndexAdapter<T> out(destination, 0, index_count);
+
+ meshopt_optimizeOverdraw(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, threshold);
+}
+
+template <typename T>
+inline size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count)
+{
+ meshopt_IndexAdapter<T> in(0, indices, index_count);
+
+ return meshopt_optimizeVertexFetchRemap(destination, in.data, index_count, vertex_count);
+}
+
+template <typename T>
+inline size_t meshopt_optimizeVertexFetch(void* destination, T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size)
+{
+ meshopt_IndexAdapter<T> inout(indices, indices, index_count);
+
+ return meshopt_optimizeVertexFetch(destination, inout.data, index_count, vertices, vertex_count, vertex_size);
+}
+
+template <typename T>
+inline size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count)
+{
+ meshopt_IndexAdapter<T> in(0, indices, index_count);
+
+ return meshopt_encodeIndexBuffer(buffer, buffer_size, in.data, index_count);
+}
+
+template <typename T>
+inline int meshopt_decodeIndexBuffer(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size)
+{
+ char index_size_valid[sizeof(T) == 2 || sizeof(T) == 4 ? 1 : -1];
+ (void)index_size_valid;
+
+ return meshopt_decodeIndexBuffer(destination, index_count, sizeof(T), buffer, buffer_size);
+}
+
+template <typename T>
+inline size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count)
+{
+ meshopt_IndexAdapter<T> in(0, indices, index_count);
+
+ return meshopt_encodeIndexSequence(buffer, buffer_size, in.data, index_count);
+}
+
+template <typename T>
+inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size)
+{
+ char index_size_valid[sizeof(T) == 2 || sizeof(T) == 4 ? 1 : -1];
+ (void)index_size_valid;
+
+ return meshopt_decodeIndexSequence(destination, index_count, sizeof(T), buffer, buffer_size);
+}
+
+template <typename T>
+inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error)
+{
+ meshopt_IndexAdapter<T> in(0, indices, index_count);
+ meshopt_IndexAdapter<T> out(destination, 0, index_count);
+
+ return meshopt_simplify(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count, target_error);
+}
+
+template <typename T>
+inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count)
+{
+ meshopt_IndexAdapter<T> in(0, indices, index_count);
+ meshopt_IndexAdapter<T> out(destination, 0, target_index_count);
+
+ return meshopt_simplifySloppy(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count);
+}
+
+template <typename T>
+inline size_t meshopt_stripify(T* destination, const T* indices, size_t index_count, size_t vertex_count, T restart_index)
+{
+ meshopt_IndexAdapter<T> in(0, indices, index_count);
+ meshopt_IndexAdapter<T> out(destination, 0, (index_count / 3) * 5);
+
+ return meshopt_stripify(out.data, in.data, index_count, vertex_count, unsigned(restart_index));
+}
+
+template <typename T>
+inline size_t meshopt_unstripify(T* destination, const T* indices, size_t index_count, T restart_index)
+{
+ meshopt_IndexAdapter<T> in(0, indices, index_count);
+ meshopt_IndexAdapter<T> out(destination, 0, (index_count - 2) * 3);
+
+ return meshopt_unstripify(out.data, in.data, index_count, unsigned(restart_index));
+}
+
+template <typename T>
+inline meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int buffer_size)
+{
+ meshopt_IndexAdapter<T> in(0, indices, index_count);
+
+ return meshopt_analyzeVertexCache(in.data, index_count, vertex_count, cache_size, warp_size, buffer_size);
+}
+
+template <typename T>
+inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+ meshopt_IndexAdapter<T> in(0, indices, index_count);
+
+ return meshopt_analyzeOverdraw(in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride);
+}
+
+template <typename T>
+inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size)
+{
+ meshopt_IndexAdapter<T> in(0, indices, index_count);
+
+ return meshopt_analyzeVertexFetch(in.data, index_count, vertex_count, vertex_size);
+}
+
+template <typename T>
+inline size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles)
+{
+ meshopt_IndexAdapter<T> in(0, indices, index_count);
+
+ return meshopt_buildMeshlets(destination, in.data, index_count, vertex_count, max_vertices, max_triangles);
+}
+
+template <typename T>
+inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+ meshopt_IndexAdapter<T> in(0, indices, index_count);
+
+ return meshopt_computeClusterBounds(in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride);
+}
+
+template <typename T>
+inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+ meshopt_IndexAdapter<T> in(0, indices, index_count);
+ meshopt_IndexAdapter<T> out(destination, 0, index_count);
+
+ meshopt_spatialSortTriangles(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride);
+}
+#endif
+
+/**
+ * Copyright (c) 2016-2020 Arseny Kapoulkine
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
diff --git a/thirdparty/meshoptimizer/overdrawanalyzer.cpp b/thirdparty/meshoptimizer/overdrawanalyzer.cpp
new file mode 100644
index 0000000000..8d5859ba39
--- /dev/null
+++ b/thirdparty/meshoptimizer/overdrawanalyzer.cpp
@@ -0,0 +1,230 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+#include <assert.h>
+#include <float.h>
+#include <string.h>
+
+// This work is based on:
+// Nicolas Capens. Advanced Rasterization. 2004
+namespace meshopt
+{
+
+const int kViewport = 256;
+
+struct OverdrawBuffer
+{
+ float z[kViewport][kViewport][2];
+ unsigned int overdraw[kViewport][kViewport][2];
+};
+
+#ifndef min
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#endif
+
+#ifndef max
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#endif
+
+static float computeDepthGradients(float& dzdx, float& dzdy, float x1, float y1, float z1, float x2, float y2, float z2, float x3, float y3, float z3)
+{
+ // z2 = z1 + dzdx * (x2 - x1) + dzdy * (y2 - y1)
+ // z3 = z1 + dzdx * (x3 - x1) + dzdy * (y3 - y1)
+ // (x2-x1 y2-y1)(dzdx) = (z2-z1)
+ // (x3-x1 y3-y1)(dzdy) (z3-z1)
+ // we'll solve it with Cramer's rule
+ float det = (x2 - x1) * (y3 - y1) - (y2 - y1) * (x3 - x1);
+ float invdet = (det == 0) ? 0 : 1 / det;
+
+ dzdx = (z2 - z1) * (y3 - y1) - (y2 - y1) * (z3 - z1) * invdet;
+ dzdy = (x2 - x1) * (z3 - z1) - (z2 - z1) * (x3 - x1) * invdet;
+
+ return det;
+}
+
+// half-space fixed point triangle rasterizer
+static void rasterize(OverdrawBuffer* buffer, float v1x, float v1y, float v1z, float v2x, float v2y, float v2z, float v3x, float v3y, float v3z)
+{
+ // compute depth gradients
+ float DZx, DZy;
+ float det = computeDepthGradients(DZx, DZy, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z);
+ int sign = det > 0;
+
+ // flip backfacing triangles to simplify rasterization logic
+ if (sign)
+ {
+ // flipping v2 & v3 preserves depth gradients since they're based on v1
+ float t;
+ t = v2x, v2x = v3x, v3x = t;
+ t = v2y, v2y = v3y, v3y = t;
+ t = v2z, v2z = v3z, v3z = t;
+
+ // flip depth since we rasterize backfacing triangles to second buffer with reverse Z; only v1z is used below
+ v1z = kViewport - v1z;
+ DZx = -DZx;
+ DZy = -DZy;
+ }
+
+ // coordinates, 28.4 fixed point
+ int X1 = int(16.0f * v1x + 0.5f);
+ int X2 = int(16.0f * v2x + 0.5f);
+ int X3 = int(16.0f * v3x + 0.5f);
+
+ int Y1 = int(16.0f * v1y + 0.5f);
+ int Y2 = int(16.0f * v2y + 0.5f);
+ int Y3 = int(16.0f * v3y + 0.5f);
+
+ // bounding rectangle, clipped against viewport
+ // since we rasterize pixels with covered centers, min >0.5 should round up
+ // as for max, due to top-left filling convention we will never rasterize right/bottom edges
+ // so max >= 0.5 should round down
+ int minx = max((min(X1, min(X2, X3)) + 7) >> 4, 0);
+ int maxx = min((max(X1, max(X2, X3)) + 7) >> 4, kViewport);
+ int miny = max((min(Y1, min(Y2, Y3)) + 7) >> 4, 0);
+ int maxy = min((max(Y1, max(Y2, Y3)) + 7) >> 4, kViewport);
+
+ // deltas, 28.4 fixed point
+ int DX12 = X1 - X2;
+ int DX23 = X2 - X3;
+ int DX31 = X3 - X1;
+
+ int DY12 = Y1 - Y2;
+ int DY23 = Y2 - Y3;
+ int DY31 = Y3 - Y1;
+
+ // fill convention correction
+ int TL1 = DY12 < 0 || (DY12 == 0 && DX12 > 0);
+ int TL2 = DY23 < 0 || (DY23 == 0 && DX23 > 0);
+ int TL3 = DY31 < 0 || (DY31 == 0 && DX31 > 0);
+
+ // half edge equations, 24.8 fixed point
+ // note that we offset minx/miny by half pixel since we want to rasterize pixels with covered centers
+ int FX = (minx << 4) + 8;
+ int FY = (miny << 4) + 8;
+ int CY1 = DX12 * (FY - Y1) - DY12 * (FX - X1) + TL1 - 1;
+ int CY2 = DX23 * (FY - Y2) - DY23 * (FX - X2) + TL2 - 1;
+ int CY3 = DX31 * (FY - Y3) - DY31 * (FX - X3) + TL3 - 1;
+ float ZY = v1z + (DZx * float(FX - X1) + DZy * float(FY - Y1)) * (1 / 16.f);
+
+ for (int y = miny; y < maxy; y++)
+ {
+ int CX1 = CY1;
+ int CX2 = CY2;
+ int CX3 = CY3;
+ float ZX = ZY;
+
+ for (int x = minx; x < maxx; x++)
+ {
+ // check if all CXn are non-negative
+ if ((CX1 | CX2 | CX3) >= 0)
+ {
+ if (ZX >= buffer->z[y][x][sign])
+ {
+ buffer->z[y][x][sign] = ZX;
+ buffer->overdraw[y][x][sign]++;
+ }
+ }
+
+ // signed left shift is UB for negative numbers so use unsigned-signed casts
+ CX1 -= int(unsigned(DY12) << 4);
+ CX2 -= int(unsigned(DY23) << 4);
+ CX3 -= int(unsigned(DY31) << 4);
+ ZX += DZx;
+ }
+
+ // signed left shift is UB for negative numbers so use unsigned-signed casts
+ CY1 += int(unsigned(DX12) << 4);
+ CY2 += int(unsigned(DX23) << 4);
+ CY3 += int(unsigned(DX31) << 4);
+ ZY += DZy;
+ }
+}
+
+} // namespace meshopt
+
+meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+ using namespace meshopt;
+
+ assert(index_count % 3 == 0);
+ assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+ assert(vertex_positions_stride % sizeof(float) == 0);
+
+ meshopt_Allocator allocator;
+
+ size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
+
+ meshopt_OverdrawStatistics result = {};
+
+ float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX};
+ float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX};
+
+ for (size_t i = 0; i < vertex_count; ++i)
+ {
+ const float* v = vertex_positions + i * vertex_stride_float;
+
+ for (int j = 0; j < 3; ++j)
+ {
+ minv[j] = min(minv[j], v[j]);
+ maxv[j] = max(maxv[j], v[j]);
+ }
+ }
+
+ float extent = max(maxv[0] - minv[0], max(maxv[1] - minv[1], maxv[2] - minv[2]));
+ float scale = kViewport / extent;
+
+ float* triangles = allocator.allocate<float>(index_count * 3);
+
+ for (size_t i = 0; i < index_count; ++i)
+ {
+ unsigned int index = indices[i];
+ assert(index < vertex_count);
+
+ const float* v = vertex_positions + index * vertex_stride_float;
+
+ triangles[i * 3 + 0] = (v[0] - minv[0]) * scale;
+ triangles[i * 3 + 1] = (v[1] - minv[1]) * scale;
+ triangles[i * 3 + 2] = (v[2] - minv[2]) * scale;
+ }
+
+ OverdrawBuffer* buffer = allocator.allocate<OverdrawBuffer>(1);
+
+ for (int axis = 0; axis < 3; ++axis)
+ {
+ memset(buffer, 0, sizeof(OverdrawBuffer));
+
+ for (size_t i = 0; i < index_count; i += 3)
+ {
+ const float* vn0 = &triangles[3 * (i + 0)];
+ const float* vn1 = &triangles[3 * (i + 1)];
+ const float* vn2 = &triangles[3 * (i + 2)];
+
+ switch (axis)
+ {
+ case 0:
+ rasterize(buffer, vn0[2], vn0[1], vn0[0], vn1[2], vn1[1], vn1[0], vn2[2], vn2[1], vn2[0]);
+ break;
+ case 1:
+ rasterize(buffer, vn0[0], vn0[2], vn0[1], vn1[0], vn1[2], vn1[1], vn2[0], vn2[2], vn2[1]);
+ break;
+ case 2:
+ rasterize(buffer, vn0[1], vn0[0], vn0[2], vn1[1], vn1[0], vn1[2], vn2[1], vn2[0], vn2[2]);
+ break;
+ }
+ }
+
+ for (int y = 0; y < kViewport; ++y)
+ for (int x = 0; x < kViewport; ++x)
+ for (int s = 0; s < 2; ++s)
+ {
+ unsigned int overdraw = buffer->overdraw[y][x][s];
+
+ result.pixels_covered += overdraw > 0;
+ result.pixels_shaded += overdraw;
+ }
+ }
+
+ result.overdraw = result.pixels_covered ? float(result.pixels_shaded) / float(result.pixels_covered) : 0.f;
+
+ return result;
+}
diff --git a/thirdparty/meshoptimizer/overdrawoptimizer.cpp b/thirdparty/meshoptimizer/overdrawoptimizer.cpp
new file mode 100644
index 0000000000..143656ed76
--- /dev/null
+++ b/thirdparty/meshoptimizer/overdrawoptimizer.cpp
@@ -0,0 +1,333 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+#include <assert.h>
+#include <math.h>
+#include <string.h>
+
+// This work is based on:
+// Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007
+namespace meshopt
+{
+
+static void calculateSortData(float* sort_data, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_positions_stride, const unsigned int* clusters, size_t cluster_count)
+{
+ size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
+
+ float mesh_centroid[3] = {};
+
+ for (size_t i = 0; i < index_count; ++i)
+ {
+ const float* p = vertex_positions + vertex_stride_float * indices[i];
+
+ mesh_centroid[0] += p[0];
+ mesh_centroid[1] += p[1];
+ mesh_centroid[2] += p[2];
+ }
+
+ mesh_centroid[0] /= index_count;
+ mesh_centroid[1] /= index_count;
+ mesh_centroid[2] /= index_count;
+
+ for (size_t cluster = 0; cluster < cluster_count; ++cluster)
+ {
+ size_t cluster_begin = clusters[cluster] * 3;
+ size_t cluster_end = (cluster + 1 < cluster_count) ? clusters[cluster + 1] * 3 : index_count;
+ assert(cluster_begin < cluster_end);
+
+ float cluster_area = 0;
+ float cluster_centroid[3] = {};
+ float cluster_normal[3] = {};
+
+ for (size_t i = cluster_begin; i < cluster_end; i += 3)
+ {
+ const float* p0 = vertex_positions + vertex_stride_float * indices[i + 0];
+ const float* p1 = vertex_positions + vertex_stride_float * indices[i + 1];
+ const float* p2 = vertex_positions + vertex_stride_float * indices[i + 2];
+
+ float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]};
+ float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]};
+
+ float normalx = p10[1] * p20[2] - p10[2] * p20[1];
+ float normaly = p10[2] * p20[0] - p10[0] * p20[2];
+ float normalz = p10[0] * p20[1] - p10[1] * p20[0];
+
+ float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz);
+
+ cluster_centroid[0] += (p0[0] + p1[0] + p2[0]) * (area / 3);
+ cluster_centroid[1] += (p0[1] + p1[1] + p2[1]) * (area / 3);
+ cluster_centroid[2] += (p0[2] + p1[2] + p2[2]) * (area / 3);
+ cluster_normal[0] += normalx;
+ cluster_normal[1] += normaly;
+ cluster_normal[2] += normalz;
+ cluster_area += area;
+ }
+
+ float inv_cluster_area = cluster_area == 0 ? 0 : 1 / cluster_area;
+
+ cluster_centroid[0] *= inv_cluster_area;
+ cluster_centroid[1] *= inv_cluster_area;
+ cluster_centroid[2] *= inv_cluster_area;
+
+ float cluster_normal_length = sqrtf(cluster_normal[0] * cluster_normal[0] + cluster_normal[1] * cluster_normal[1] + cluster_normal[2] * cluster_normal[2]);
+ float inv_cluster_normal_length = cluster_normal_length == 0 ? 0 : 1 / cluster_normal_length;
+
+ cluster_normal[0] *= inv_cluster_normal_length;
+ cluster_normal[1] *= inv_cluster_normal_length;
+ cluster_normal[2] *= inv_cluster_normal_length;
+
+ float centroid_vector[3] = {cluster_centroid[0] - mesh_centroid[0], cluster_centroid[1] - mesh_centroid[1], cluster_centroid[2] - mesh_centroid[2]};
+
+ sort_data[cluster] = centroid_vector[0] * cluster_normal[0] + centroid_vector[1] * cluster_normal[1] + centroid_vector[2] * cluster_normal[2];
+ }
+}
+
+static void calculateSortOrderRadix(unsigned int* sort_order, const float* sort_data, unsigned short* sort_keys, size_t cluster_count)
+{
+ // compute sort data bounds and renormalize, using fixed point snorm
+ float sort_data_max = 1e-3f;
+
+ for (size_t i = 0; i < cluster_count; ++i)
+ {
+ float dpa = fabsf(sort_data[i]);
+
+ sort_data_max = (sort_data_max < dpa) ? dpa : sort_data_max;
+ }
+
+ const int sort_bits = 11;
+
+ for (size_t i = 0; i < cluster_count; ++i)
+ {
+ // note that we flip distribution since high dot product should come first
+ float sort_key = 0.5f - 0.5f * (sort_data[i] / sort_data_max);
+
+ sort_keys[i] = meshopt_quantizeUnorm(sort_key, sort_bits) & ((1 << sort_bits) - 1);
+ }
+
+ // fill histogram for counting sort
+ unsigned int histogram[1 << sort_bits];
+ memset(histogram, 0, sizeof(histogram));
+
+ for (size_t i = 0; i < cluster_count; ++i)
+ {
+ histogram[sort_keys[i]]++;
+ }
+
+ // compute offsets based on histogram data
+ size_t histogram_sum = 0;
+
+ for (size_t i = 0; i < 1 << sort_bits; ++i)
+ {
+ size_t count = histogram[i];
+ histogram[i] = unsigned(histogram_sum);
+ histogram_sum += count;
+ }
+
+ assert(histogram_sum == cluster_count);
+
+ // compute sort order based on offsets
+ for (size_t i = 0; i < cluster_count; ++i)
+ {
+ sort_order[histogram[sort_keys[i]]++] = unsigned(i);
+ }
+}
+
+static unsigned int updateCache(unsigned int a, unsigned int b, unsigned int c, unsigned int cache_size, unsigned int* cache_timestamps, unsigned int& timestamp)
+{
+ unsigned int cache_misses = 0;
+
+ // if vertex is not in cache, put it in cache
+ if (timestamp - cache_timestamps[a] > cache_size)
+ {
+ cache_timestamps[a] = timestamp++;
+ cache_misses++;
+ }
+
+ if (timestamp - cache_timestamps[b] > cache_size)
+ {
+ cache_timestamps[b] = timestamp++;
+ cache_misses++;
+ }
+
+ if (timestamp - cache_timestamps[c] > cache_size)
+ {
+ cache_timestamps[c] = timestamp++;
+ cache_misses++;
+ }
+
+ return cache_misses;
+}
+
+static size_t generateHardBoundaries(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int* cache_timestamps)
+{
+ memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
+
+ unsigned int timestamp = cache_size + 1;
+
+ size_t face_count = index_count / 3;
+
+ size_t result = 0;
+
+ for (size_t i = 0; i < face_count; ++i)
+ {
+ unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp);
+
+ // when all three vertices are not in the cache it's usually relatively safe to assume that this is a new patch in the mesh
+ // that is disjoint from previous vertices; sometimes it might come back to reference existing vertices but that frequently
+ // suggests an inefficiency in the vertex cache optimization algorithm
+ // usually the first triangle has 3 misses unless it's degenerate - thus we make sure the first cluster always starts with 0
+ if (i == 0 || m == 3)
+ {
+ destination[result++] = unsigned(i);
+ }
+ }
+
+ assert(result <= index_count / 3);
+
+ return result;
+}
+
+static size_t generateSoftBoundaries(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const unsigned int* clusters, size_t cluster_count, unsigned int cache_size, float threshold, unsigned int* cache_timestamps)
+{
+ memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
+
+ unsigned int timestamp = 0;
+
+ size_t result = 0;
+
+ for (size_t it = 0; it < cluster_count; ++it)
+ {
+ size_t start = clusters[it];
+ size_t end = (it + 1 < cluster_count) ? clusters[it + 1] : index_count / 3;
+ assert(start < end);
+
+ // reset cache
+ timestamp += cache_size + 1;
+
+ // measure cluster ACMR
+ unsigned int cluster_misses = 0;
+
+ for (size_t i = start; i < end; ++i)
+ {
+ unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp);
+
+ cluster_misses += m;
+ }
+
+ float cluster_threshold = threshold * (float(cluster_misses) / float(end - start));
+
+ // first cluster always starts from the hard cluster boundary
+ destination[result++] = unsigned(start);
+
+ // reset cache
+ timestamp += cache_size + 1;
+
+ unsigned int running_misses = 0;
+ unsigned int running_faces = 0;
+
+ for (size_t i = start; i < end; ++i)
+ {
+ unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp);
+
+ running_misses += m;
+ running_faces += 1;
+
+ if (float(running_misses) / float(running_faces) <= cluster_threshold)
+ {
+ // we have reached the target ACMR with the current triangle so we need to start a new cluster on the next one
+ // note that this may mean that we add 'end` to destination for the last triangle, which will imply that the last
+ // cluster is empty; however, the 'pop_back' after the loop will clean it up
+ destination[result++] = unsigned(i + 1);
+
+ // reset cache
+ timestamp += cache_size + 1;
+
+ running_misses = 0;
+ running_faces = 0;
+ }
+ }
+
+ // each time we reach the target ACMR we flush the cluster
+ // this means that the last cluster is by definition not very good - there are frequent cases where we are left with a few triangles
+ // in the last cluster, producing a very bad ACMR and significantly penalizing the overall results
+ // thus we remove the last cluster boundary, merging the last complete cluster with the last incomplete one
+ // there are sometimes cases when the last cluster is actually good enough - in which case the code above would have added 'end'
+ // to the cluster boundary array which we need to remove anyway - this code will do that automatically
+ if (destination[result - 1] != start)
+ {
+ result--;
+ }
+ }
+
+ assert(result >= cluster_count);
+ assert(result <= index_count / 3);
+
+ return result;
+}
+
+} // namespace meshopt
+
+void meshopt_optimizeOverdraw(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold)
+{
+ using namespace meshopt;
+
+ assert(index_count % 3 == 0);
+ assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+ assert(vertex_positions_stride % sizeof(float) == 0);
+
+ meshopt_Allocator allocator;
+
+ // guard for empty meshes
+ if (index_count == 0 || vertex_count == 0)
+ return;
+
+ // support in-place optimization
+ if (destination == indices)
+ {
+ unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
+ memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
+ indices = indices_copy;
+ }
+
+ unsigned int cache_size = 16;
+
+ unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count);
+
+ // generate hard boundaries from full-triangle cache misses
+ unsigned int* hard_clusters = allocator.allocate<unsigned int>(index_count / 3);
+ size_t hard_cluster_count = generateHardBoundaries(hard_clusters, indices, index_count, vertex_count, cache_size, cache_timestamps);
+
+ // generate soft boundaries
+ unsigned int* soft_clusters = allocator.allocate<unsigned int>(index_count / 3 + 1);
+ size_t soft_cluster_count = generateSoftBoundaries(soft_clusters, indices, index_count, vertex_count, hard_clusters, hard_cluster_count, cache_size, threshold, cache_timestamps);
+
+ const unsigned int* clusters = soft_clusters;
+ size_t cluster_count = soft_cluster_count;
+
+ // fill sort data
+ float* sort_data = allocator.allocate<float>(cluster_count);
+ calculateSortData(sort_data, indices, index_count, vertex_positions, vertex_positions_stride, clusters, cluster_count);
+
+ // sort clusters using sort data
+ unsigned short* sort_keys = allocator.allocate<unsigned short>(cluster_count);
+ unsigned int* sort_order = allocator.allocate<unsigned int>(cluster_count);
+ calculateSortOrderRadix(sort_order, sort_data, sort_keys, cluster_count);
+
+ // fill output buffer
+ size_t offset = 0;
+
+ for (size_t it = 0; it < cluster_count; ++it)
+ {
+ unsigned int cluster = sort_order[it];
+ assert(cluster < cluster_count);
+
+ size_t cluster_begin = clusters[cluster] * 3;
+ size_t cluster_end = (cluster + 1 < cluster_count) ? clusters[cluster + 1] * 3 : index_count;
+ assert(cluster_begin < cluster_end);
+
+ memcpy(destination + offset, indices + cluster_begin, (cluster_end - cluster_begin) * sizeof(unsigned int));
+ offset += cluster_end - cluster_begin;
+ }
+
+ assert(offset == index_count);
+}
diff --git a/thirdparty/meshoptimizer/patches/simplifier_get_resulting_error.patch b/thirdparty/meshoptimizer/patches/simplifier_get_resulting_error.patch
new file mode 100644
index 0000000000..1be38e45d2
--- /dev/null
+++ b/thirdparty/meshoptimizer/patches/simplifier_get_resulting_error.patch
@@ -0,0 +1,96 @@
+diff --git a/thirdparty/meshoptimizer/meshoptimizer.h b/thirdparty/meshoptimizer/meshoptimizer.h
+index a442d103c8..fde00f9c82 100644
+--- a/thirdparty/meshoptimizer/meshoptimizer.h
++++ b/thirdparty/meshoptimizer/meshoptimizer.h
+@@ -266,7 +266,10 @@ MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t ver
+ * destination must contain enough space for the *source* index buffer (since optimization is iterative, this means index_count elements - *not* target_index_count!)
+ * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
+ */
+-MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error);
++// -- GODOT start --
++//MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error);
++MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float *r_resulting_error);
++// -- GODOT end --
+
+ /**
+ * Experimental: Mesh simplifier (sloppy)
+diff --git a/thirdparty/meshoptimizer/simplifier.cpp b/thirdparty/meshoptimizer/simplifier.cpp
+index bd523275ce..51cf634186 100644
+--- a/thirdparty/meshoptimizer/simplifier.cpp
++++ b/thirdparty/meshoptimizer/simplifier.cpp
+@@ -1143,7 +1143,10 @@ unsigned int* meshopt_simplifyDebugLoop = 0;
+ unsigned int* meshopt_simplifyDebugLoopBack = 0;
+ #endif
+
+-size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error)
++// -- GODOT start --
++//size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error)
++size_t meshopt_simplify(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float *r_resulting_error)
++// -- GODOT end --
+ {
+ using namespace meshopt;
+
+@@ -1198,10 +1201,13 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
+ if (result != indices)
+ memcpy(result, indices, index_count * sizeof(unsigned int));
+
++// -- GODOT start --
+ #if TRACE
+ size_t pass_count = 0;
+- float worst_error = 0;
++ //float worst_error = 0;
+ #endif
++ float worst_error = 0;
++// -- GODOT end --
+
+ Collapse* edge_collapses = allocator.allocate<Collapse>(index_count);
+ unsigned int* collapse_order = allocator.allocate<unsigned int>(index_count);
+@@ -1213,6 +1219,12 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
+ // target_error input is linear; we need to adjust it to match quadricError units
+ float error_limit = target_error * target_error;
+
++// -- GODOT start --
++ if (r_resulting_error) {
++ *r_resulting_error = 1.0;
++ }
++// -- GODOT end --
++
+ while (result_count > target_index_count)
+ {
+ size_t edge_collapse_count = pickEdgeCollapses(edge_collapses, result, result_count, remap, vertex_kind, loop);
+@@ -1257,7 +1269,8 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
+ size_t new_count = remapIndexBuffer(result, result_count, collapse_remap);
+ assert(new_count < result_count);
+
+-#if TRACE
++// -- GODOT start --
++//#if TRACE
+ float pass_error = 0.f;
+ for (size_t i = 0; i < edge_collapse_count; ++i)
+ {
+@@ -1267,15 +1280,24 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
+ pass_error = c.error;
+ }
+
+- pass_count++;
++ //pass_count++;
+ worst_error = (worst_error < pass_error) ? pass_error : worst_error;
+
++#if TRACE
++ pass_count++;
+ printf("pass %d: triangles: %d -> %d, collapses: %d/%d (goal: %d), error: %e (limit %e goal %e)\n", int(pass_count), int(result_count / 3), int(new_count / 3), int(collapses), int(edge_collapse_count), int(edge_collapse_goal), pass_error, error_limit, error_goal);
+ #endif
++// -- GODOT end --
+
+ result_count = new_count;
+ }
+
++// -- GODOT start --
++ if (r_resulting_error) {
++ *r_resulting_error = sqrt(worst_error);
++ }
++// -- GODOT end --
++
+ #if TRACE
+ printf("passes: %d, worst error: %e\n", int(pass_count), worst_error);
+ #endif
diff --git a/thirdparty/meshoptimizer/simplifier.cpp b/thirdparty/meshoptimizer/simplifier.cpp
new file mode 100644
index 0000000000..b195a8cb5d
--- /dev/null
+++ b/thirdparty/meshoptimizer/simplifier.cpp
@@ -0,0 +1,1562 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+#include <assert.h>
+#include <float.h>
+#include <math.h>
+#include <string.h>
+
+
+#ifndef TRACE
+#define TRACE 0
+#endif
+
+#if TRACE
+#include <stdio.h>
+#endif
+
+// This work is based on:
+// Michael Garland and Paul S. Heckbert. Surface simplification using quadric error metrics. 1997
+// Michael Garland. Quadric-based polygonal surface simplification. 1999
+// Peter Lindstrom. Out-of-Core Simplification of Large Polygonal Models. 2000
+// Matthias Teschner, Bruno Heidelberger, Matthias Mueller, Danat Pomeranets, Markus Gross. Optimized Spatial Hashing for Collision Detection of Deformable Objects. 2003
+// Peter Van Sandt, Yannis Chronis, Jignesh M. Patel. Efficiently Searching In-Memory Sorted Arrays: Revenge of the Interpolation Search? 2019
+namespace meshopt
+{
+
+struct EdgeAdjacency
+{
+ unsigned int* counts;
+ unsigned int* offsets;
+ unsigned int* data;
+};
+
+static void buildEdgeAdjacency(EdgeAdjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator)
+{
+ size_t face_count = index_count / 3;
+
+ // allocate arrays
+ adjacency.counts = allocator.allocate<unsigned int>(vertex_count);
+ adjacency.offsets = allocator.allocate<unsigned int>(vertex_count);
+ adjacency.data = allocator.allocate<unsigned int>(index_count);
+
+ // fill edge counts
+ memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int));
+
+ for (size_t i = 0; i < index_count; ++i)
+ {
+ assert(indices[i] < vertex_count);
+
+ adjacency.counts[indices[i]]++;
+ }
+
+ // fill offset table
+ unsigned int offset = 0;
+
+ for (size_t i = 0; i < vertex_count; ++i)
+ {
+ adjacency.offsets[i] = offset;
+ offset += adjacency.counts[i];
+ }
+
+ assert(offset == index_count);
+
+ // fill edge data
+ for (size_t i = 0; i < face_count; ++i)
+ {
+ unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
+
+ adjacency.data[adjacency.offsets[a]++] = b;
+ adjacency.data[adjacency.offsets[b]++] = c;
+ adjacency.data[adjacency.offsets[c]++] = a;
+ }
+
+ // fix offsets that have been disturbed by the previous pass
+ for (size_t i = 0; i < vertex_count; ++i)
+ {
+ assert(adjacency.offsets[i] >= adjacency.counts[i]);
+
+ adjacency.offsets[i] -= adjacency.counts[i];
+ }
+}
+
+struct PositionHasher
+{
+ const float* vertex_positions;
+ size_t vertex_stride_float;
+
+ size_t hash(unsigned int index) const
+ {
+ const unsigned int* key = reinterpret_cast<const unsigned int*>(vertex_positions + index * vertex_stride_float);
+
+ // Optimized Spatial Hashing for Collision Detection of Deformable Objects
+ return (key[0] * 73856093) ^ (key[1] * 19349663) ^ (key[2] * 83492791);
+ }
+
+ bool equal(unsigned int lhs, unsigned int rhs) const
+ {
+ return memcmp(vertex_positions + lhs * vertex_stride_float, vertex_positions + rhs * vertex_stride_float, sizeof(float) * 3) == 0;
+ }
+};
+
+static size_t hashBuckets2(size_t count)
+{
+ size_t buckets = 1;
+ while (buckets < count)
+ buckets *= 2;
+
+ return buckets;
+}
+
+template <typename T, typename Hash>
+static T* hashLookup2(T* table, size_t buckets, const Hash& hash, const T& key, const T& empty)
+{
+ assert(buckets > 0);
+ assert((buckets & (buckets - 1)) == 0);
+
+ size_t hashmod = buckets - 1;
+ size_t bucket = hash.hash(key) & hashmod;
+
+ for (size_t probe = 0; probe <= hashmod; ++probe)
+ {
+ T& item = table[bucket];
+
+ if (item == empty)
+ return &item;
+
+ if (hash.equal(item, key))
+ return &item;
+
+ // hash collision, quadratic probing
+ bucket = (bucket + probe + 1) & hashmod;
+ }
+
+ assert(false && "Hash table is full"); // unreachable
+ return 0;
+}
+
+static void buildPositionRemap(unsigned int* remap, unsigned int* wedge, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, meshopt_Allocator& allocator)
+{
+ PositionHasher hasher = {vertex_positions_data, vertex_positions_stride / sizeof(float)};
+
+ size_t table_size = hashBuckets2(vertex_count);
+ unsigned int* table = allocator.allocate<unsigned int>(table_size);
+ memset(table, -1, table_size * sizeof(unsigned int));
+
+ // build forward remap: for each vertex, which other (canonical) vertex does it map to?
+ // we use position equivalence for this, and remap vertices to other existing vertices
+ for (size_t i = 0; i < vertex_count; ++i)
+ {
+ unsigned int index = unsigned(i);
+ unsigned int* entry = hashLookup2(table, table_size, hasher, index, ~0u);
+
+ if (*entry == ~0u)
+ *entry = index;
+
+ remap[index] = *entry;
+ }
+
+ // build wedge table: for each vertex, which other vertex is the next wedge that also maps to the same vertex?
+ // entries in table form a (cyclic) wedge loop per vertex; for manifold vertices, wedge[i] == remap[i] == i
+ for (size_t i = 0; i < vertex_count; ++i)
+ wedge[i] = unsigned(i);
+
+ for (size_t i = 0; i < vertex_count; ++i)
+ if (remap[i] != i)
+ {
+ unsigned int r = remap[i];
+
+ wedge[i] = wedge[r];
+ wedge[r] = unsigned(i);
+ }
+}
+
+enum VertexKind
+{
+ Kind_Manifold, // not on an attribute seam, not on any boundary
+ Kind_Border, // not on an attribute seam, has exactly two open edges
+ Kind_Seam, // on an attribute seam with exactly two attribute seam edges
+ Kind_Complex, // none of the above; these vertices can move as long as all wedges move to the target vertex
+ Kind_Locked, // none of the above; these vertices can't move
+
+ Kind_Count
+};
+
+// manifold vertices can collapse onto anything
+// border/seam vertices can only be collapsed onto border/seam respectively
+// complex vertices can collapse onto complex/locked
+// a rule of thumb is that collapsing kind A into kind B preserves the kind B in the target vertex
+// for example, while we could collapse Complex into Manifold, this would mean the target vertex isn't Manifold anymore
+const unsigned char kCanCollapse[Kind_Count][Kind_Count] = {
+ {1, 1, 1, 1, 1},
+ {0, 1, 0, 0, 0},
+ {0, 0, 1, 0, 0},
+ {0, 0, 0, 1, 1},
+ {0, 0, 0, 0, 0},
+};
+
+// if a vertex is manifold or seam, adjoining edges are guaranteed to have an opposite edge
+// note that for seam edges, the opposite edge isn't present in the attribute-based topology
+// but is present if you consider a position-only mesh variant
+const unsigned char kHasOpposite[Kind_Count][Kind_Count] = {
+ {1, 1, 1, 0, 1},
+ {1, 0, 1, 0, 0},
+ {1, 1, 1, 0, 1},
+ {0, 0, 0, 0, 0},
+ {1, 0, 1, 0, 0},
+};
+
+static bool hasEdge(const EdgeAdjacency& adjacency, unsigned int a, unsigned int b)
+{
+ unsigned int count = adjacency.counts[a];
+ const unsigned int* data = adjacency.data + adjacency.offsets[a];
+
+ for (size_t i = 0; i < count; ++i)
+ if (data[i] == b)
+ return true;
+
+ return false;
+}
+
+static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned int* loopback, size_t vertex_count, const EdgeAdjacency& adjacency, const unsigned int* remap, const unsigned int* wedge)
+{
+ memset(loop, -1, vertex_count * sizeof(unsigned int));
+ memset(loopback, -1, vertex_count * sizeof(unsigned int));
+
+ // incoming & outgoing open edges: ~0u if no open edges, i if there are more than 1
+ // note that this is the same data as required in loop[] arrays; loop[] data is only valid for border/seam
+ // but here it's okay to fill the data out for other types of vertices as well
+ unsigned int* openinc = loopback;
+ unsigned int* openout = loop;
+
+ for (size_t i = 0; i < vertex_count; ++i)
+ {
+ unsigned int vertex = unsigned(i);
+
+ unsigned int count = adjacency.counts[vertex];
+ const unsigned int* data = adjacency.data + adjacency.offsets[vertex];
+
+ for (size_t j = 0; j < count; ++j)
+ {
+ unsigned int target = data[j];
+
+ if (!hasEdge(adjacency, target, vertex))
+ {
+ openinc[target] = (openinc[target] == ~0u) ? vertex : target;
+ openout[vertex] = (openout[vertex] == ~0u) ? target : vertex;
+ }
+ }
+ }
+
+#if TRACE
+ size_t lockedstats[4] = {};
+#define TRACELOCKED(i) lockedstats[i]++;
+#else
+#define TRACELOCKED(i) (void)0
+#endif
+
+ for (size_t i = 0; i < vertex_count; ++i)
+ {
+ if (remap[i] == i)
+ {
+ if (wedge[i] == i)
+ {
+ // no attribute seam, need to check if it's manifold
+ unsigned int openi = openinc[i], openo = openout[i];
+
+ // note: we classify any vertices with no open edges as manifold
+ // this is technically incorrect - if 4 triangles share an edge, we'll classify vertices as manifold
+ // it's unclear if this is a problem in practice
+ if (openi == ~0u && openo == ~0u)
+ {
+ result[i] = Kind_Manifold;
+ }
+ else if (openi != i && openo != i)
+ {
+ result[i] = Kind_Border;
+ }
+ else
+ {
+ result[i] = Kind_Locked;
+ TRACELOCKED(0);
+ }
+ }
+ else if (wedge[wedge[i]] == i)
+ {
+ // attribute seam; need to distinguish between Seam and Locked
+ unsigned int w = wedge[i];
+ unsigned int openiv = openinc[i], openov = openout[i];
+ unsigned int openiw = openinc[w], openow = openout[w];
+
+ // seam should have one open half-edge for each vertex, and the edges need to "connect" - point to the same vertex post-remap
+ if (openiv != ~0u && openiv != i && openov != ~0u && openov != i &&
+ openiw != ~0u && openiw != w && openow != ~0u && openow != w)
+ {
+ if (remap[openiv] == remap[openow] && remap[openov] == remap[openiw])
+ {
+ result[i] = Kind_Seam;
+ }
+ else
+ {
+ result[i] = Kind_Locked;
+ TRACELOCKED(1);
+ }
+ }
+ else
+ {
+ result[i] = Kind_Locked;
+ TRACELOCKED(2);
+ }
+ }
+ else
+ {
+ // more than one vertex maps to this one; we don't have classification available
+ result[i] = Kind_Locked;
+ TRACELOCKED(3);
+ }
+ }
+ else
+ {
+ assert(remap[i] < i);
+
+ result[i] = result[remap[i]];
+ }
+ }
+
+#if TRACE
+ printf("locked: many open edges %d, disconnected seam %d, many seam edges %d, many wedges %d\n",
+ int(lockedstats[0]), int(lockedstats[1]), int(lockedstats[2]), int(lockedstats[3]));
+#endif
+}
+
+struct Vector3
+{
+ float x, y, z;
+};
+// -- GODOT start --
+//static void rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride)
+static float rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride)
+// -- GODOT end --
+
+{
+ size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
+
+ float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX};
+ float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX};
+
+ for (size_t i = 0; i < vertex_count; ++i)
+ {
+ const float* v = vertex_positions_data + i * vertex_stride_float;
+
+ result[i].x = v[0];
+ result[i].y = v[1];
+ result[i].z = v[2];
+
+ for (int j = 0; j < 3; ++j)
+ {
+ float vj = v[j];
+
+ minv[j] = minv[j] > vj ? vj : minv[j];
+ maxv[j] = maxv[j] < vj ? vj : maxv[j];
+ }
+ }
+
+ float extent = 0.f;
+
+ extent = (maxv[0] - minv[0]) < extent ? extent : (maxv[0] - minv[0]);
+ extent = (maxv[1] - minv[1]) < extent ? extent : (maxv[1] - minv[1]);
+ extent = (maxv[2] - minv[2]) < extent ? extent : (maxv[2] - minv[2]);
+
+ float scale = extent == 0 ? 0.f : 1.f / extent;
+
+ for (size_t i = 0; i < vertex_count; ++i)
+ {
+ result[i].x = (result[i].x - minv[0]) * scale;
+ result[i].y = (result[i].y - minv[1]) * scale;
+ result[i].z = (result[i].z - minv[2]) * scale;
+ }
+// -- GODOT start --
+ return extent;
+// -- GODOT end --
+
+}
+
+struct Quadric
+{
+ float a00, a11, a22;
+ float a10, a20, a21;
+ float b0, b1, b2, c;
+ float w;
+};
+
+struct Collapse
+{
+ unsigned int v0;
+ unsigned int v1;
+
+ union
+ {
+ unsigned int bidi;
+ float error;
+ unsigned int errorui;
+ };
+};
+
+static float normalize(Vector3& v)
+{
+ float length = sqrtf(v.x * v.x + v.y * v.y + v.z * v.z);
+
+ if (length > 0)
+ {
+ v.x /= length;
+ v.y /= length;
+ v.z /= length;
+ }
+
+ return length;
+}
+
+static void quadricAdd(Quadric& Q, const Quadric& R)
+{
+ Q.a00 += R.a00;
+ Q.a11 += R.a11;
+ Q.a22 += R.a22;
+ Q.a10 += R.a10;
+ Q.a20 += R.a20;
+ Q.a21 += R.a21;
+ Q.b0 += R.b0;
+ Q.b1 += R.b1;
+ Q.b2 += R.b2;
+ Q.c += R.c;
+ Q.w += R.w;
+}
+
+static float quadricError(const Quadric& Q, const Vector3& v)
+{
+ float rx = Q.b0;
+ float ry = Q.b1;
+ float rz = Q.b2;
+
+ rx += Q.a10 * v.y;
+ ry += Q.a21 * v.z;
+ rz += Q.a20 * v.x;
+
+ rx *= 2;
+ ry *= 2;
+ rz *= 2;
+
+ rx += Q.a00 * v.x;
+ ry += Q.a11 * v.y;
+ rz += Q.a22 * v.z;
+
+ float r = Q.c;
+ r += rx * v.x;
+ r += ry * v.y;
+ r += rz * v.z;
+
+ float s = Q.w == 0.f ? 0.f : 1.f / Q.w;
+
+ return fabsf(r) * s;
+}
+
+static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d, float w)
+{
+ float aw = a * w;
+ float bw = b * w;
+ float cw = c * w;
+ float dw = d * w;
+
+ Q.a00 = a * aw;
+ Q.a11 = b * bw;
+ Q.a22 = c * cw;
+ Q.a10 = a * bw;
+ Q.a20 = a * cw;
+ Q.a21 = b * cw;
+ Q.b0 = a * dw;
+ Q.b1 = b * dw;
+ Q.b2 = c * dw;
+ Q.c = d * dw;
+ Q.w = w;
+}
+
+static void quadricFromPoint(Quadric& Q, float x, float y, float z, float w)
+{
+ // we need to encode (x - X) ^ 2 + (y - Y)^2 + (z - Z)^2 into the quadric
+ Q.a00 = w;
+ Q.a11 = w;
+ Q.a22 = w;
+ Q.a10 = 0.f;
+ Q.a20 = 0.f;
+ Q.a21 = 0.f;
+ Q.b0 = -2.f * x * w;
+ Q.b1 = -2.f * y * w;
+ Q.b2 = -2.f * z * w;
+ Q.c = (x * x + y * y + z * z) * w;
+ Q.w = w;
+}
+
+static void quadricFromTriangle(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2, float weight)
+{
+ Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z};
+ Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z};
+
+ // normal = cross(p1 - p0, p2 - p0)
+ Vector3 normal = {p10.y * p20.z - p10.z * p20.y, p10.z * p20.x - p10.x * p20.z, p10.x * p20.y - p10.y * p20.x};
+ float area = normalize(normal);
+
+ float distance = normal.x * p0.x + normal.y * p0.y + normal.z * p0.z;
+
+ // we use sqrtf(area) so that the error is scaled linearly; this tends to improve silhouettes
+ quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance, sqrtf(area) * weight);
+}
+
+static void quadricFromTriangleEdge(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2, float weight)
+{
+ Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z};
+ float length = normalize(p10);
+
+ // p20p = length of projection of p2-p0 onto normalize(p1 - p0)
+ Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z};
+ float p20p = p20.x * p10.x + p20.y * p10.y + p20.z * p10.z;
+
+ // normal = altitude of triangle from point p2 onto edge p1-p0
+ Vector3 normal = {p20.x - p10.x * p20p, p20.y - p10.y * p20p, p20.z - p10.z * p20p};
+ normalize(normal);
+
+ float distance = normal.x * p0.x + normal.y * p0.y + normal.z * p0.z;
+
+ // note: the weight is scaled linearly with edge length; this has to match the triangle weight
+ quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance, length * weight);
+}
+
+static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap)
+{
+ for (size_t i = 0; i < index_count; i += 3)
+ {
+ unsigned int i0 = indices[i + 0];
+ unsigned int i1 = indices[i + 1];
+ unsigned int i2 = indices[i + 2];
+
+ Quadric Q;
+ quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], 1.f);
+
+ quadricAdd(vertex_quadrics[remap[i0]], Q);
+ quadricAdd(vertex_quadrics[remap[i1]], Q);
+ quadricAdd(vertex_quadrics[remap[i2]], Q);
+ }
+}
+
+static void fillEdgeQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap, const unsigned char* vertex_kind, const unsigned int* loop, const unsigned int* loopback)
+{
+ for (size_t i = 0; i < index_count; i += 3)
+ {
+ static const int next[3] = {1, 2, 0};
+
+ for (int e = 0; e < 3; ++e)
+ {
+ unsigned int i0 = indices[i + e];
+ unsigned int i1 = indices[i + next[e]];
+
+ unsigned char k0 = vertex_kind[i0];
+ unsigned char k1 = vertex_kind[i1];
+
+ // check that either i0 or i1 are border/seam and are on the same edge loop
+ // note that we need to add the error even for edged that connect e.g. border & locked
+ // if we don't do that, the adjacent border->border edge won't have correct errors for corners
+ if (k0 != Kind_Border && k0 != Kind_Seam && k1 != Kind_Border && k1 != Kind_Seam)
+ continue;
+
+ if ((k0 == Kind_Border || k0 == Kind_Seam) && loop[i0] != i1)
+ continue;
+
+ if ((k1 == Kind_Border || k1 == Kind_Seam) && loopback[i1] != i0)
+ continue;
+
+ // seam edges should occur twice (i0->i1 and i1->i0) - skip redundant edges
+ if (kHasOpposite[k0][k1] && remap[i1] > remap[i0])
+ continue;
+
+ unsigned int i2 = indices[i + next[next[e]]];
+
+ // we try hard to maintain border edge geometry; seam edges can move more freely
+ // due to topological restrictions on collapses, seam quadrics slightly improves collapse structure but aren't critical
+ const float kEdgeWeightSeam = 1.f;
+ const float kEdgeWeightBorder = 10.f;
+
+ float edgeWeight = (k0 == Kind_Border || k1 == Kind_Border) ? kEdgeWeightBorder : kEdgeWeightSeam;
+
+ Quadric Q;
+ quadricFromTriangleEdge(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], edgeWeight);
+
+ quadricAdd(vertex_quadrics[remap[i0]], Q);
+ quadricAdd(vertex_quadrics[remap[i1]], Q);
+ }
+ }
+}
+
+static size_t pickEdgeCollapses(Collapse* collapses, const unsigned int* indices, size_t index_count, const unsigned int* remap, const unsigned char* vertex_kind, const unsigned int* loop)
+{
+ size_t collapse_count = 0;
+
+ for (size_t i = 0; i < index_count; i += 3)
+ {
+ static const int next[3] = {1, 2, 0};
+
+ for (int e = 0; e < 3; ++e)
+ {
+ unsigned int i0 = indices[i + e];
+ unsigned int i1 = indices[i + next[e]];
+
+ // this can happen either when input has a zero-length edge, or when we perform collapses for complex
+ // topology w/seams and collapse a manifold vertex that connects to both wedges onto one of them
+ // we leave edges like this alone since they may be important for preserving mesh integrity
+ if (remap[i0] == remap[i1])
+ continue;
+
+ unsigned char k0 = vertex_kind[i0];
+ unsigned char k1 = vertex_kind[i1];
+
+ // the edge has to be collapsible in at least one direction
+ if (!(kCanCollapse[k0][k1] | kCanCollapse[k1][k0]))
+ continue;
+
+ // manifold and seam edges should occur twice (i0->i1 and i1->i0) - skip redundant edges
+ if (kHasOpposite[k0][k1] && remap[i1] > remap[i0])
+ continue;
+
+ // two vertices are on a border or a seam, but there's no direct edge between them
+ // this indicates that they belong to two different edge loops and we should not collapse this edge
+ // loop[] tracks half edges so we only need to check i0->i1
+ if (k0 == k1 && (k0 == Kind_Border || k0 == Kind_Seam) && loop[i0] != i1)
+ continue;
+
+ // edge can be collapsed in either direction - we will pick the one with minimum error
+ // note: we evaluate error later during collapse ranking, here we just tag the edge as bidirectional
+ if (kCanCollapse[k0][k1] & kCanCollapse[k1][k0])
+ {
+ Collapse c = {i0, i1, {/* bidi= */ 1}};
+ collapses[collapse_count++] = c;
+ }
+ else
+ {
+ // edge can only be collapsed in one direction
+ unsigned int e0 = kCanCollapse[k0][k1] ? i0 : i1;
+ unsigned int e1 = kCanCollapse[k0][k1] ? i1 : i0;
+
+ Collapse c = {e0, e1, {/* bidi= */ 0}};
+ collapses[collapse_count++] = c;
+ }
+ }
+ }
+
+ return collapse_count;
+}
+
+static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const Vector3* vertex_positions, const Quadric* vertex_quadrics, const unsigned int* remap)
+{
+ for (size_t i = 0; i < collapse_count; ++i)
+ {
+ Collapse& c = collapses[i];
+
+ unsigned int i0 = c.v0;
+ unsigned int i1 = c.v1;
+
+ // most edges are bidirectional which means we need to evaluate errors for two collapses
+ // to keep this code branchless we just use the same edge for unidirectional edges
+ unsigned int j0 = c.bidi ? i1 : i0;
+ unsigned int j1 = c.bidi ? i0 : i1;
+
+ const Quadric& qi = vertex_quadrics[remap[i0]];
+ const Quadric& qj = vertex_quadrics[remap[j0]];
+
+ float ei = quadricError(qi, vertex_positions[i1]);
+ float ej = quadricError(qj, vertex_positions[j1]);
+
+ // pick edge direction with minimal error
+ c.v0 = ei <= ej ? i0 : j0;
+ c.v1 = ei <= ej ? i1 : j1;
+ c.error = ei <= ej ? ei : ej;
+ }
+}
+
+#if TRACE > 1
+static void dumpEdgeCollapses(const Collapse* collapses, size_t collapse_count, const unsigned char* vertex_kind)
+{
+ size_t ckinds[Kind_Count][Kind_Count] = {};
+ float cerrors[Kind_Count][Kind_Count] = {};
+
+ for (int k0 = 0; k0 < Kind_Count; ++k0)
+ for (int k1 = 0; k1 < Kind_Count; ++k1)
+ cerrors[k0][k1] = FLT_MAX;
+
+ for (size_t i = 0; i < collapse_count; ++i)
+ {
+ unsigned int i0 = collapses[i].v0;
+ unsigned int i1 = collapses[i].v1;
+
+ unsigned char k0 = vertex_kind[i0];
+ unsigned char k1 = vertex_kind[i1];
+
+ ckinds[k0][k1]++;
+ cerrors[k0][k1] = (collapses[i].error < cerrors[k0][k1]) ? collapses[i].error : cerrors[k0][k1];
+ }
+
+ for (int k0 = 0; k0 < Kind_Count; ++k0)
+ for (int k1 = 0; k1 < Kind_Count; ++k1)
+ if (ckinds[k0][k1])
+ printf("collapses %d -> %d: %d, min error %e\n", k0, k1, int(ckinds[k0][k1]), cerrors[k0][k1]);
+}
+
+static void dumpLockedCollapses(const unsigned int* indices, size_t index_count, const unsigned char* vertex_kind)
+{
+ size_t locked_collapses[Kind_Count][Kind_Count] = {};
+
+ for (size_t i = 0; i < index_count; i += 3)
+ {
+ static const int next[3] = {1, 2, 0};
+
+ for (int e = 0; e < 3; ++e)
+ {
+ unsigned int i0 = indices[i + e];
+ unsigned int i1 = indices[i + next[e]];
+
+ unsigned char k0 = vertex_kind[i0];
+ unsigned char k1 = vertex_kind[i1];
+
+ locked_collapses[k0][k1] += !kCanCollapse[k0][k1] && !kCanCollapse[k1][k0];
+ }
+ }
+
+ for (int k0 = 0; k0 < Kind_Count; ++k0)
+ for (int k1 = 0; k1 < Kind_Count; ++k1)
+ if (locked_collapses[k0][k1])
+ printf("locked collapses %d -> %d: %d\n", k0, k1, int(locked_collapses[k0][k1]));
+}
+#endif
+
+static void sortEdgeCollapses(unsigned int* sort_order, const Collapse* collapses, size_t collapse_count)
+{
+ const int sort_bits = 11;
+
+ // fill histogram for counting sort
+ unsigned int histogram[1 << sort_bits];
+ memset(histogram, 0, sizeof(histogram));
+
+ for (size_t i = 0; i < collapse_count; ++i)
+ {
+ // skip sign bit since error is non-negative
+ unsigned int key = (collapses[i].errorui << 1) >> (32 - sort_bits);
+
+ histogram[key]++;
+ }
+
+ // compute offsets based on histogram data
+ size_t histogram_sum = 0;
+
+ for (size_t i = 0; i < 1 << sort_bits; ++i)
+ {
+ size_t count = histogram[i];
+ histogram[i] = unsigned(histogram_sum);
+ histogram_sum += count;
+ }
+
+ assert(histogram_sum == collapse_count);
+
+ // compute sort order based on offsets
+ for (size_t i = 0; i < collapse_count; ++i)
+ {
+ // skip sign bit since error is non-negative
+ unsigned int key = (collapses[i].errorui << 1) >> (32 - sort_bits);
+
+ sort_order[histogram[key]++] = unsigned(i);
+ }
+}
+
+static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* collapse_locked, Quadric* vertex_quadrics, const Collapse* collapses, size_t collapse_count, const unsigned int* collapse_order, const unsigned int* remap, const unsigned int* wedge, const unsigned char* vertex_kind, size_t triangle_collapse_goal, float error_goal, float error_limit)
+{
+ size_t edge_collapses = 0;
+ size_t triangle_collapses = 0;
+
+ for (size_t i = 0; i < collapse_count; ++i)
+ {
+ const Collapse& c = collapses[collapse_order[i]];
+
+ if (c.error > error_limit)
+ break;
+
+ if (c.error > error_goal && triangle_collapses > triangle_collapse_goal / 10)
+ break;
+
+ if (triangle_collapses >= triangle_collapse_goal)
+ break;
+
+ unsigned int i0 = c.v0;
+ unsigned int i1 = c.v1;
+
+ unsigned int r0 = remap[i0];
+ unsigned int r1 = remap[i1];
+
+ // we don't collapse vertices that had source or target vertex involved in a collapse
+ // it's important to not move the vertices twice since it complicates the tracking/remapping logic
+ // it's important to not move other vertices towards a moved vertex to preserve error since we don't re-rank collapses mid-pass
+ if (collapse_locked[r0] | collapse_locked[r1])
+ continue;
+
+ assert(collapse_remap[r0] == r0);
+ assert(collapse_remap[r1] == r1);
+
+ quadricAdd(vertex_quadrics[r1], vertex_quadrics[r0]);
+
+ if (vertex_kind[i0] == Kind_Complex)
+ {
+ unsigned int v = i0;
+
+ do
+ {
+ collapse_remap[v] = r1;
+ v = wedge[v];
+ } while (v != i0);
+ }
+ else if (vertex_kind[i0] == Kind_Seam)
+ {
+ // remap v0 to v1 and seam pair of v0 to seam pair of v1
+ unsigned int s0 = wedge[i0];
+ unsigned int s1 = wedge[i1];
+
+ assert(s0 != i0 && s1 != i1);
+ assert(wedge[s0] == i0 && wedge[s1] == i1);
+
+ collapse_remap[i0] = i1;
+ collapse_remap[s0] = s1;
+ }
+ else
+ {
+ assert(wedge[i0] == i0);
+
+ collapse_remap[i0] = i1;
+ }
+
+ collapse_locked[r0] = 1;
+ collapse_locked[r1] = 1;
+
+ // border edges collapse 1 triangle, other edges collapse 2 or more
+ triangle_collapses += (vertex_kind[i0] == Kind_Border) ? 1 : 2;
+ edge_collapses++;
+ }
+
+ return edge_collapses;
+}
+
+static size_t remapIndexBuffer(unsigned int* indices, size_t index_count, const unsigned int* collapse_remap)
+{
+ size_t write = 0;
+
+ for (size_t i = 0; i < index_count; i += 3)
+ {
+ unsigned int v0 = collapse_remap[indices[i + 0]];
+ unsigned int v1 = collapse_remap[indices[i + 1]];
+ unsigned int v2 = collapse_remap[indices[i + 2]];
+
+ // we never move the vertex twice during a single pass
+ assert(collapse_remap[v0] == v0);
+ assert(collapse_remap[v1] == v1);
+ assert(collapse_remap[v2] == v2);
+
+ if (v0 != v1 && v0 != v2 && v1 != v2)
+ {
+ indices[write + 0] = v0;
+ indices[write + 1] = v1;
+ indices[write + 2] = v2;
+ write += 3;
+ }
+ }
+
+ return write;
+}
+
+static void remapEdgeLoops(unsigned int* loop, size_t vertex_count, const unsigned int* collapse_remap)
+{
+ for (size_t i = 0; i < vertex_count; ++i)
+ {
+ if (loop[i] != ~0u)
+ {
+ unsigned int l = loop[i];
+ unsigned int r = collapse_remap[l];
+
+ // i == r is a special case when the seam edge is collapsed in a direction opposite to where loop goes
+ loop[i] = (i == r) ? loop[l] : r;
+ }
+ }
+}
+
+struct CellHasher
+{
+ const unsigned int* vertex_ids;
+
+ size_t hash(unsigned int i) const
+ {
+ unsigned int h = vertex_ids[i];
+
+ // MurmurHash2 finalizer
+ h ^= h >> 13;
+ h *= 0x5bd1e995;
+ h ^= h >> 15;
+ return h;
+ }
+
+ bool equal(unsigned int lhs, unsigned int rhs) const
+ {
+ return vertex_ids[lhs] == vertex_ids[rhs];
+ }
+};
+
+struct IdHasher
+{
+ size_t hash(unsigned int id) const
+ {
+ unsigned int h = id;
+
+ // MurmurHash2 finalizer
+ h ^= h >> 13;
+ h *= 0x5bd1e995;
+ h ^= h >> 15;
+ return h;
+ }
+
+ bool equal(unsigned int lhs, unsigned int rhs) const
+ {
+ return lhs == rhs;
+ }
+};
+
+struct TriangleHasher
+{
+ unsigned int* indices;
+
+ size_t hash(unsigned int i) const
+ {
+ const unsigned int* tri = indices + i * 3;
+
+ // Optimized Spatial Hashing for Collision Detection of Deformable Objects
+ return (tri[0] * 73856093) ^ (tri[1] * 19349663) ^ (tri[2] * 83492791);
+ }
+
+ bool equal(unsigned int lhs, unsigned int rhs) const
+ {
+ const unsigned int* lt = indices + lhs * 3;
+ const unsigned int* rt = indices + rhs * 3;
+
+ return lt[0] == rt[0] && lt[1] == rt[1] && lt[2] == rt[2];
+ }
+};
+
+static void computeVertexIds(unsigned int* vertex_ids, const Vector3* vertex_positions, size_t vertex_count, int grid_size)
+{
+ assert(grid_size >= 1 && grid_size <= 1024);
+ float cell_scale = float(grid_size - 1);
+
+ for (size_t i = 0; i < vertex_count; ++i)
+ {
+ const Vector3& v = vertex_positions[i];
+
+ int xi = int(v.x * cell_scale + 0.5f);
+ int yi = int(v.y * cell_scale + 0.5f);
+ int zi = int(v.z * cell_scale + 0.5f);
+
+ vertex_ids[i] = (xi << 20) | (yi << 10) | zi;
+ }
+}
+
+static size_t countTriangles(const unsigned int* vertex_ids, const unsigned int* indices, size_t index_count)
+{
+ size_t result = 0;
+
+ for (size_t i = 0; i < index_count; i += 3)
+ {
+ unsigned int id0 = vertex_ids[indices[i + 0]];
+ unsigned int id1 = vertex_ids[indices[i + 1]];
+ unsigned int id2 = vertex_ids[indices[i + 2]];
+
+ result += (id0 != id1) & (id0 != id2) & (id1 != id2);
+ }
+
+ return result;
+}
+
+static size_t fillVertexCells(unsigned int* table, size_t table_size, unsigned int* vertex_cells, const unsigned int* vertex_ids, size_t vertex_count)
+{
+ CellHasher hasher = {vertex_ids};
+
+ memset(table, -1, table_size * sizeof(unsigned int));
+
+ size_t result = 0;
+
+ for (size_t i = 0; i < vertex_count; ++i)
+ {
+ unsigned int* entry = hashLookup2(table, table_size, hasher, unsigned(i), ~0u);
+
+ if (*entry == ~0u)
+ {
+ *entry = unsigned(i);
+ vertex_cells[i] = unsigned(result++);
+ }
+ else
+ {
+ vertex_cells[i] = vertex_cells[*entry];
+ }
+ }
+
+ return result;
+}
+
+static size_t countVertexCells(unsigned int* table, size_t table_size, const unsigned int* vertex_ids, size_t vertex_count)
+{
+ IdHasher hasher;
+
+ memset(table, -1, table_size * sizeof(unsigned int));
+
+ size_t result = 0;
+
+ for (size_t i = 0; i < vertex_count; ++i)
+ {
+ unsigned int id = vertex_ids[i];
+ unsigned int* entry = hashLookup2(table, table_size, hasher, id, ~0u);
+
+ result += (*entry == ~0u);
+ *entry = id;
+ }
+
+ return result;
+}
+
+static void fillCellQuadrics(Quadric* cell_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* vertex_cells)
+{
+ for (size_t i = 0; i < index_count; i += 3)
+ {
+ unsigned int i0 = indices[i + 0];
+ unsigned int i1 = indices[i + 1];
+ unsigned int i2 = indices[i + 2];
+
+ unsigned int c0 = vertex_cells[i0];
+ unsigned int c1 = vertex_cells[i1];
+ unsigned int c2 = vertex_cells[i2];
+
+ bool single_cell = (c0 == c1) & (c0 == c2);
+
+ Quadric Q;
+ quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], single_cell ? 3.f : 1.f);
+
+ if (single_cell)
+ {
+ quadricAdd(cell_quadrics[c0], Q);
+ }
+ else
+ {
+ quadricAdd(cell_quadrics[c0], Q);
+ quadricAdd(cell_quadrics[c1], Q);
+ quadricAdd(cell_quadrics[c2], Q);
+ }
+ }
+}
+
+static void fillCellQuadrics(Quadric* cell_quadrics, const Vector3* vertex_positions, size_t vertex_count, const unsigned int* vertex_cells)
+{
+ for (size_t i = 0; i < vertex_count; ++i)
+ {
+ unsigned int c = vertex_cells[i];
+ const Vector3& v = vertex_positions[i];
+
+ Quadric Q;
+ quadricFromPoint(Q, v.x, v.y, v.z, 1.f);
+
+ quadricAdd(cell_quadrics[c], Q);
+ }
+}
+
+static void fillCellRemap(unsigned int* cell_remap, float* cell_errors, size_t cell_count, const unsigned int* vertex_cells, const Quadric* cell_quadrics, const Vector3* vertex_positions, size_t vertex_count)
+{
+ memset(cell_remap, -1, cell_count * sizeof(unsigned int));
+
+ for (size_t i = 0; i < vertex_count; ++i)
+ {
+ unsigned int cell = vertex_cells[i];
+ float error = quadricError(cell_quadrics[cell], vertex_positions[i]);
+
+ if (cell_remap[cell] == ~0u || cell_errors[cell] > error)
+ {
+ cell_remap[cell] = unsigned(i);
+ cell_errors[cell] = error;
+ }
+ }
+}
+
+static size_t filterTriangles(unsigned int* destination, unsigned int* tritable, size_t tritable_size, const unsigned int* indices, size_t index_count, const unsigned int* vertex_cells, const unsigned int* cell_remap)
+{
+ TriangleHasher hasher = {destination};
+
+ memset(tritable, -1, tritable_size * sizeof(unsigned int));
+
+ size_t result = 0;
+
+ for (size_t i = 0; i < index_count; i += 3)
+ {
+ unsigned int c0 = vertex_cells[indices[i + 0]];
+ unsigned int c1 = vertex_cells[indices[i + 1]];
+ unsigned int c2 = vertex_cells[indices[i + 2]];
+
+ if (c0 != c1 && c0 != c2 && c1 != c2)
+ {
+ unsigned int a = cell_remap[c0];
+ unsigned int b = cell_remap[c1];
+ unsigned int c = cell_remap[c2];
+
+ if (b < a && b < c)
+ {
+ unsigned int t = a;
+ a = b, b = c, c = t;
+ }
+ else if (c < a && c < b)
+ {
+ unsigned int t = c;
+ c = b, b = a, a = t;
+ }
+
+ destination[result * 3 + 0] = a;
+ destination[result * 3 + 1] = b;
+ destination[result * 3 + 2] = c;
+
+ unsigned int* entry = hashLookup2(tritable, tritable_size, hasher, unsigned(result), ~0u);
+
+ if (*entry == ~0u)
+ *entry = unsigned(result++);
+ }
+ }
+
+ return result * 3;
+}
+
+static float interpolate(float y, float x0, float y0, float x1, float y1, float x2, float y2)
+{
+ // three point interpolation from "revenge of interpolation search" paper
+ float num = (y1 - y) * (x1 - x2) * (x1 - x0) * (y2 - y0);
+ float den = (y2 - y) * (x1 - x2) * (y0 - y1) + (y0 - y) * (x1 - x0) * (y1 - y2);
+ return x1 + num / den;
+}
+
+} // namespace meshopt
+
+#ifndef NDEBUG
+unsigned char* meshopt_simplifyDebugKind = 0;
+unsigned int* meshopt_simplifyDebugLoop = 0;
+unsigned int* meshopt_simplifyDebugLoopBack = 0;
+#endif
+
+// -- GODOT start --
+//size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error)
+size_t meshopt_simplify(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float *r_resulting_error)
+// -- GODOT end --
+{
+ using namespace meshopt;
+
+ assert(index_count % 3 == 0);
+ assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+ assert(vertex_positions_stride % sizeof(float) == 0);
+ assert(target_index_count <= index_count);
+
+ meshopt_Allocator allocator;
+
+ unsigned int* result = destination;
+
+ // build adjacency information
+ EdgeAdjacency adjacency = {};
+ buildEdgeAdjacency(adjacency, indices, index_count, vertex_count, allocator);
+
+ // build position remap that maps each vertex to the one with identical position
+ unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
+ unsigned int* wedge = allocator.allocate<unsigned int>(vertex_count);
+ buildPositionRemap(remap, wedge, vertex_positions_data, vertex_count, vertex_positions_stride, allocator);
+
+ // classify vertices; vertex kind determines collapse rules, see kCanCollapse
+ unsigned char* vertex_kind = allocator.allocate<unsigned char>(vertex_count);
+ unsigned int* loop = allocator.allocate<unsigned int>(vertex_count);
+ unsigned int* loopback = allocator.allocate<unsigned int>(vertex_count);
+ classifyVertices(vertex_kind, loop, loopback, vertex_count, adjacency, remap, wedge);
+
+#if TRACE
+ size_t unique_positions = 0;
+ for (size_t i = 0; i < vertex_count; ++i)
+ unique_positions += remap[i] == i;
+
+ printf("position remap: %d vertices => %d positions\n", int(vertex_count), int(unique_positions));
+
+ size_t kinds[Kind_Count] = {};
+ for (size_t i = 0; i < vertex_count; ++i)
+ kinds[vertex_kind[i]] += remap[i] == i;
+
+ printf("kinds: manifold %d, border %d, seam %d, complex %d, locked %d\n",
+ int(kinds[Kind_Manifold]), int(kinds[Kind_Border]), int(kinds[Kind_Seam]), int(kinds[Kind_Complex]), int(kinds[Kind_Locked]));
+#endif
+
+ Vector3* vertex_positions = allocator.allocate<Vector3>(vertex_count);
+// -- GODOT start --
+ //rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride);
+ float extent = rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride);
+// -- GODOT end --
+
+ Quadric* vertex_quadrics = allocator.allocate<Quadric>(vertex_count);
+ memset(vertex_quadrics, 0, vertex_count * sizeof(Quadric));
+
+ fillFaceQuadrics(vertex_quadrics, indices, index_count, vertex_positions, remap);
+ fillEdgeQuadrics(vertex_quadrics, indices, index_count, vertex_positions, remap, vertex_kind, loop, loopback);
+
+ if (result != indices)
+ memcpy(result, indices, index_count * sizeof(unsigned int));
+
+// -- GODOT start --
+#if TRACE
+ size_t pass_count = 0;
+ //float worst_error = 0;
+#endif
+ float worst_error = 0;
+// -- GODOT end --
+
+ Collapse* edge_collapses = allocator.allocate<Collapse>(index_count);
+ unsigned int* collapse_order = allocator.allocate<unsigned int>(index_count);
+ unsigned int* collapse_remap = allocator.allocate<unsigned int>(vertex_count);
+ unsigned char* collapse_locked = allocator.allocate<unsigned char>(vertex_count);
+
+ size_t result_count = index_count;
+
+ // target_error input is linear; we need to adjust it to match quadricError units
+ float error_limit = target_error * target_error;
+
+// -- GODOT start --
+ if (r_resulting_error) {
+ *r_resulting_error = 1.0;
+ }
+// -- GODOT end --
+
+ while (result_count > target_index_count)
+ {
+ size_t edge_collapse_count = pickEdgeCollapses(edge_collapses, result, result_count, remap, vertex_kind, loop);
+
+ // no edges can be collapsed any more due to topology restrictions
+ if (edge_collapse_count == 0)
+ break;
+
+ rankEdgeCollapses(edge_collapses, edge_collapse_count, vertex_positions, vertex_quadrics, remap);
+
+#if TRACE > 1
+ dumpEdgeCollapses(edge_collapses, edge_collapse_count, vertex_kind);
+#endif
+
+ sortEdgeCollapses(collapse_order, edge_collapses, edge_collapse_count);
+
+ // most collapses remove 2 triangles; use this to establish a bound on the pass in terms of error limit
+ // note that edge_collapse_goal is an estimate; triangle_collapse_goal will be used to actually limit collapses
+ size_t triangle_collapse_goal = (result_count - target_index_count) / 3;
+ size_t edge_collapse_goal = triangle_collapse_goal / 2;
+
+ // we limit the error in each pass based on the error of optimal last collapse; since many collapses will be locked
+ // as they will share vertices with other successfull collapses, we need to increase the acceptable error by this factor
+ const float kPassErrorBound = 1.5f;
+
+ float error_goal = edge_collapse_goal < edge_collapse_count ? edge_collapses[collapse_order[edge_collapse_goal]].error * kPassErrorBound : FLT_MAX;
+
+ for (size_t i = 0; i < vertex_count; ++i)
+ collapse_remap[i] = unsigned(i);
+
+ memset(collapse_locked, 0, vertex_count);
+
+ size_t collapses = performEdgeCollapses(collapse_remap, collapse_locked, vertex_quadrics, edge_collapses, edge_collapse_count, collapse_order, remap, wedge, vertex_kind, triangle_collapse_goal, error_goal, error_limit);
+
+ // no edges can be collapsed any more due to hitting the error limit or triangle collapse limit
+ if (collapses == 0)
+ break;
+
+ remapEdgeLoops(loop, vertex_count, collapse_remap);
+ remapEdgeLoops(loopback, vertex_count, collapse_remap);
+
+ size_t new_count = remapIndexBuffer(result, result_count, collapse_remap);
+ assert(new_count < result_count);
+
+// -- GODOT start --
+//#if TRACE
+ float pass_error = 0.f;
+ for (size_t i = 0; i < edge_collapse_count; ++i)
+ {
+ Collapse& c = edge_collapses[collapse_order[i]];
+
+ if (collapse_remap[c.v0] == c.v1)
+ pass_error = c.error;
+ }
+
+ //pass_count++;
+ worst_error = (worst_error < pass_error) ? pass_error : worst_error;
+
+#if TRACE
+ pass_count++;
+ printf("pass %d: triangles: %d -> %d, collapses: %d/%d (goal: %d), error: %e (limit %e goal %e)\n", int(pass_count), int(result_count / 3), int(new_count / 3), int(collapses), int(edge_collapse_count), int(edge_collapse_goal), pass_error, error_limit, error_goal);
+#endif
+// -- GODOT end --
+
+ result_count = new_count;
+ }
+
+// -- GODOT start --
+ if (r_resulting_error) {
+ *r_resulting_error = sqrt(worst_error) * extent;
+ }
+// -- GODOT end --
+
+#if TRACE
+ printf("passes: %d, worst error: %e\n", int(pass_count), worst_error);
+#endif
+
+#if TRACE > 1
+ dumpLockedCollapses(result, result_count, vertex_kind);
+#endif
+
+#ifndef NDEBUG
+ if (meshopt_simplifyDebugKind)
+ memcpy(meshopt_simplifyDebugKind, vertex_kind, vertex_count);
+
+ if (meshopt_simplifyDebugLoop)
+ memcpy(meshopt_simplifyDebugLoop, loop, vertex_count * sizeof(unsigned int));
+
+ if (meshopt_simplifyDebugLoopBack)
+ memcpy(meshopt_simplifyDebugLoopBack, loopback, vertex_count * sizeof(unsigned int));
+#endif
+
+ return result_count;
+}
+
+size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count)
+{
+ using namespace meshopt;
+
+ assert(index_count % 3 == 0);
+ assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+ assert(vertex_positions_stride % sizeof(float) == 0);
+ assert(target_index_count <= index_count);
+
+ // we expect to get ~2 triangles/vertex in the output
+ size_t target_cell_count = target_index_count / 6;
+
+ if (target_cell_count == 0)
+ return 0;
+
+ meshopt_Allocator allocator;
+
+ Vector3* vertex_positions = allocator.allocate<Vector3>(vertex_count);
+ rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride);
+
+ // find the optimal grid size using guided binary search
+#if TRACE
+ printf("source: %d vertices, %d triangles\n", int(vertex_count), int(index_count / 3));
+ printf("target: %d cells, %d triangles\n", int(target_cell_count), int(target_index_count / 3));
+#endif
+
+ unsigned int* vertex_ids = allocator.allocate<unsigned int>(vertex_count);
+
+ const int kInterpolationPasses = 5;
+
+ // invariant: # of triangles in min_grid <= target_count
+ int min_grid = 0;
+ int max_grid = 1025;
+ size_t min_triangles = 0;
+ size_t max_triangles = index_count / 3;
+
+ // instead of starting in the middle, let's guess as to what the answer might be! triangle count usually grows as a square of grid size...
+ int next_grid_size = int(sqrtf(float(target_cell_count)) + 0.5f);
+
+ for (int pass = 0; pass < 10 + kInterpolationPasses; ++pass)
+ {
+ assert(min_triangles < target_index_count / 3);
+ assert(max_grid - min_grid > 1);
+
+ // we clamp the prediction of the grid size to make sure that the search converges
+ int grid_size = next_grid_size;
+ grid_size = (grid_size <= min_grid) ? min_grid + 1 : (grid_size >= max_grid) ? max_grid - 1 : grid_size;
+
+ computeVertexIds(vertex_ids, vertex_positions, vertex_count, grid_size);
+ size_t triangles = countTriangles(vertex_ids, indices, index_count);
+
+#if TRACE
+ printf("pass %d (%s): grid size %d, triangles %d, %s\n",
+ pass, (pass == 0) ? "guess" : (pass <= kInterpolationPasses) ? "lerp" : "binary",
+ grid_size, int(triangles),
+ (triangles <= target_index_count / 3) ? "under" : "over");
+#endif
+
+ float tip = interpolate(float(target_index_count / 3), float(min_grid), float(min_triangles), float(grid_size), float(triangles), float(max_grid), float(max_triangles));
+
+ if (triangles <= target_index_count / 3)
+ {
+ min_grid = grid_size;
+ min_triangles = triangles;
+ }
+ else
+ {
+ max_grid = grid_size;
+ max_triangles = triangles;
+ }
+
+ if (triangles == target_index_count / 3 || max_grid - min_grid <= 1)
+ break;
+
+ // we start by using interpolation search - it usually converges faster
+ // however, interpolation search has a worst case of O(N) so we switch to binary search after a few iterations which converges in O(logN)
+ next_grid_size = (pass < kInterpolationPasses) ? int(tip + 0.5f) : (min_grid + max_grid) / 2;
+ }
+
+ if (min_triangles == 0)
+ return 0;
+
+ // build vertex->cell association by mapping all vertices with the same quantized position to the same cell
+ size_t table_size = hashBuckets2(vertex_count);
+ unsigned int* table = allocator.allocate<unsigned int>(table_size);
+
+ unsigned int* vertex_cells = allocator.allocate<unsigned int>(vertex_count);
+
+ computeVertexIds(vertex_ids, vertex_positions, vertex_count, min_grid);
+ size_t cell_count = fillVertexCells(table, table_size, vertex_cells, vertex_ids, vertex_count);
+
+ // build a quadric for each target cell
+ Quadric* cell_quadrics = allocator.allocate<Quadric>(cell_count);
+ memset(cell_quadrics, 0, cell_count * sizeof(Quadric));
+
+ fillCellQuadrics(cell_quadrics, indices, index_count, vertex_positions, vertex_cells);
+
+ // for each target cell, find the vertex with the minimal error
+ unsigned int* cell_remap = allocator.allocate<unsigned int>(cell_count);
+ float* cell_errors = allocator.allocate<float>(cell_count);
+
+ fillCellRemap(cell_remap, cell_errors, cell_count, vertex_cells, cell_quadrics, vertex_positions, vertex_count);
+
+ // collapse triangles!
+ // note that we need to filter out triangles that we've already output because we very frequently generate redundant triangles between cells :(
+ size_t tritable_size = hashBuckets2(min_triangles);
+ unsigned int* tritable = allocator.allocate<unsigned int>(tritable_size);
+
+ size_t write = filterTriangles(destination, tritable, tritable_size, indices, index_count, vertex_cells, cell_remap);
+ assert(write <= target_index_count);
+
+#if TRACE
+ printf("result: %d cells, %d triangles (%d unfiltered)\n", int(cell_count), int(write / 3), int(min_triangles));
+#endif
+
+ return write;
+}
+
+size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_vertex_count)
+{
+ using namespace meshopt;
+
+ assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+ assert(vertex_positions_stride % sizeof(float) == 0);
+ assert(target_vertex_count <= vertex_count);
+
+ size_t target_cell_count = target_vertex_count;
+
+ if (target_cell_count == 0)
+ return 0;
+
+ meshopt_Allocator allocator;
+
+ Vector3* vertex_positions = allocator.allocate<Vector3>(vertex_count);
+ rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride);
+
+ // find the optimal grid size using guided binary search
+#if TRACE
+ printf("source: %d vertices\n", int(vertex_count));
+ printf("target: %d cells\n", int(target_cell_count));
+#endif
+
+ unsigned int* vertex_ids = allocator.allocate<unsigned int>(vertex_count);
+
+ size_t table_size = hashBuckets2(vertex_count);
+ unsigned int* table = allocator.allocate<unsigned int>(table_size);
+
+ const int kInterpolationPasses = 5;
+
+ // invariant: # of vertices in min_grid <= target_count
+ int min_grid = 0;
+ int max_grid = 1025;
+ size_t min_vertices = 0;
+ size_t max_vertices = vertex_count;
+
+ // instead of starting in the middle, let's guess as to what the answer might be! triangle count usually grows as a square of grid size...
+ int next_grid_size = int(sqrtf(float(target_cell_count)) + 0.5f);
+
+ for (int pass = 0; pass < 10 + kInterpolationPasses; ++pass)
+ {
+ assert(min_vertices < target_vertex_count);
+ assert(max_grid - min_grid > 1);
+
+ // we clamp the prediction of the grid size to make sure that the search converges
+ int grid_size = next_grid_size;
+ grid_size = (grid_size <= min_grid) ? min_grid + 1 : (grid_size >= max_grid) ? max_grid - 1 : grid_size;
+
+ computeVertexIds(vertex_ids, vertex_positions, vertex_count, grid_size);
+ size_t vertices = countVertexCells(table, table_size, vertex_ids, vertex_count);
+
+#if TRACE
+ printf("pass %d (%s): grid size %d, vertices %d, %s\n",
+ pass, (pass == 0) ? "guess" : (pass <= kInterpolationPasses) ? "lerp" : "binary",
+ grid_size, int(vertices),
+ (vertices <= target_vertex_count) ? "under" : "over");
+#endif
+
+ float tip = interpolate(float(target_vertex_count), float(min_grid), float(min_vertices), float(grid_size), float(vertices), float(max_grid), float(max_vertices));
+
+ if (vertices <= target_vertex_count)
+ {
+ min_grid = grid_size;
+ min_vertices = vertices;
+ }
+ else
+ {
+ max_grid = grid_size;
+ max_vertices = vertices;
+ }
+
+ if (vertices == target_vertex_count || max_grid - min_grid <= 1)
+ break;
+
+ // we start by using interpolation search - it usually converges faster
+ // however, interpolation search has a worst case of O(N) so we switch to binary search after a few iterations which converges in O(logN)
+ next_grid_size = (pass < kInterpolationPasses) ? int(tip + 0.5f) : (min_grid + max_grid) / 2;
+ }
+
+ if (min_vertices == 0)
+ return 0;
+
+ // build vertex->cell association by mapping all vertices with the same quantized position to the same cell
+ unsigned int* vertex_cells = allocator.allocate<unsigned int>(vertex_count);
+
+ computeVertexIds(vertex_ids, vertex_positions, vertex_count, min_grid);
+ size_t cell_count = fillVertexCells(table, table_size, vertex_cells, vertex_ids, vertex_count);
+
+ // build a quadric for each target cell
+ Quadric* cell_quadrics = allocator.allocate<Quadric>(cell_count);
+ memset(cell_quadrics, 0, cell_count * sizeof(Quadric));
+
+ fillCellQuadrics(cell_quadrics, vertex_positions, vertex_count, vertex_cells);
+
+ // for each target cell, find the vertex with the minimal error
+ unsigned int* cell_remap = allocator.allocate<unsigned int>(cell_count);
+ float* cell_errors = allocator.allocate<float>(cell_count);
+
+ fillCellRemap(cell_remap, cell_errors, cell_count, vertex_cells, cell_quadrics, vertex_positions, vertex_count);
+
+ // copy results to the output
+ assert(cell_count <= target_vertex_count);
+ memcpy(destination, cell_remap, sizeof(unsigned int) * cell_count);
+
+#if TRACE
+ printf("result: %d cells\n", int(cell_count));
+#endif
+
+ return cell_count;
+}
diff --git a/thirdparty/meshoptimizer/spatialorder.cpp b/thirdparty/meshoptimizer/spatialorder.cpp
new file mode 100644
index 0000000000..b09f80ac6f
--- /dev/null
+++ b/thirdparty/meshoptimizer/spatialorder.cpp
@@ -0,0 +1,194 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+#include <assert.h>
+#include <float.h>
+#include <string.h>
+
+// This work is based on:
+// Fabian Giesen. Decoding Morton codes. 2009
+namespace meshopt
+{
+
+// "Insert" two 0 bits after each of the 10 low bits of x
+inline unsigned int part1By2(unsigned int x)
+{
+ x &= 0x000003ff; // x = ---- ---- ---- ---- ---- --98 7654 3210
+ x = (x ^ (x << 16)) & 0xff0000ff; // x = ---- --98 ---- ---- ---- ---- 7654 3210
+ x = (x ^ (x << 8)) & 0x0300f00f; // x = ---- --98 ---- ---- 7654 ---- ---- 3210
+ x = (x ^ (x << 4)) & 0x030c30c3; // x = ---- --98 ---- 76-- --54 ---- 32-- --10
+ x = (x ^ (x << 2)) & 0x09249249; // x = ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0
+ return x;
+}
+
+static void computeOrder(unsigned int* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride)
+{
+ size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
+
+ float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX};
+ float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX};
+
+ for (size_t i = 0; i < vertex_count; ++i)
+ {
+ const float* v = vertex_positions_data + i * vertex_stride_float;
+
+ for (int j = 0; j < 3; ++j)
+ {
+ float vj = v[j];
+
+ minv[j] = minv[j] > vj ? vj : minv[j];
+ maxv[j] = maxv[j] < vj ? vj : maxv[j];
+ }
+ }
+
+ float extent = 0.f;
+
+ extent = (maxv[0] - minv[0]) < extent ? extent : (maxv[0] - minv[0]);
+ extent = (maxv[1] - minv[1]) < extent ? extent : (maxv[1] - minv[1]);
+ extent = (maxv[2] - minv[2]) < extent ? extent : (maxv[2] - minv[2]);
+
+ float scale = extent == 0 ? 0.f : 1.f / extent;
+
+ // generate Morton order based on the position inside a unit cube
+ for (size_t i = 0; i < vertex_count; ++i)
+ {
+ const float* v = vertex_positions_data + i * vertex_stride_float;
+
+ int x = int((v[0] - minv[0]) * scale * 1023.f + 0.5f);
+ int y = int((v[1] - minv[1]) * scale * 1023.f + 0.5f);
+ int z = int((v[2] - minv[2]) * scale * 1023.f + 0.5f);
+
+ result[i] = part1By2(x) | (part1By2(y) << 1) | (part1By2(z) << 2);
+ }
+}
+
+static void computeHistogram(unsigned int (&hist)[1024][3], const unsigned int* data, size_t count)
+{
+ memset(hist, 0, sizeof(hist));
+
+ // compute 3 10-bit histograms in parallel
+ for (size_t i = 0; i < count; ++i)
+ {
+ unsigned int id = data[i];
+
+ hist[(id >> 0) & 1023][0]++;
+ hist[(id >> 10) & 1023][1]++;
+ hist[(id >> 20) & 1023][2]++;
+ }
+
+ unsigned int sumx = 0, sumy = 0, sumz = 0;
+
+ // replace histogram data with prefix histogram sums in-place
+ for (int i = 0; i < 1024; ++i)
+ {
+ unsigned int hx = hist[i][0], hy = hist[i][1], hz = hist[i][2];
+
+ hist[i][0] = sumx;
+ hist[i][1] = sumy;
+ hist[i][2] = sumz;
+
+ sumx += hx;
+ sumy += hy;
+ sumz += hz;
+ }
+
+ assert(sumx == count && sumy == count && sumz == count);
+}
+
+static void radixPass(unsigned int* destination, const unsigned int* source, const unsigned int* keys, size_t count, unsigned int (&hist)[1024][3], int pass)
+{
+ int bitoff = pass * 10;
+
+ for (size_t i = 0; i < count; ++i)
+ {
+ unsigned int id = (keys[source[i]] >> bitoff) & 1023;
+
+ destination[hist[id][pass]++] = source[i];
+ }
+}
+
+} // namespace meshopt
+
+void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+ using namespace meshopt;
+
+ assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+ assert(vertex_positions_stride % sizeof(float) == 0);
+
+ meshopt_Allocator allocator;
+
+ unsigned int* keys = allocator.allocate<unsigned int>(vertex_count);
+ computeOrder(keys, vertex_positions, vertex_count, vertex_positions_stride);
+
+ unsigned int hist[1024][3];
+ computeHistogram(hist, keys, vertex_count);
+
+ unsigned int* scratch = allocator.allocate<unsigned int>(vertex_count);
+
+ for (size_t i = 0; i < vertex_count; ++i)
+ destination[i] = unsigned(i);
+
+ // 3-pass radix sort computes the resulting order into scratch
+ radixPass(scratch, destination, keys, vertex_count, hist, 0);
+ radixPass(destination, scratch, keys, vertex_count, hist, 1);
+ radixPass(scratch, destination, keys, vertex_count, hist, 2);
+
+ // since our remap table is mapping old=>new, we need to reverse it
+ for (size_t i = 0; i < vertex_count; ++i)
+ destination[scratch[i]] = unsigned(i);
+}
+
+void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+ using namespace meshopt;
+
+ assert(index_count % 3 == 0);
+ assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+ assert(vertex_positions_stride % sizeof(float) == 0);
+
+ (void)vertex_count;
+
+ size_t face_count = index_count / 3;
+ size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
+
+ meshopt_Allocator allocator;
+
+ float* centroids = allocator.allocate<float>(face_count * 3);
+
+ for (size_t i = 0; i < face_count; ++i)
+ {
+ unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
+ assert(a < vertex_count && b < vertex_count && c < vertex_count);
+
+ const float* va = vertex_positions + a * vertex_stride_float;
+ const float* vb = vertex_positions + b * vertex_stride_float;
+ const float* vc = vertex_positions + c * vertex_stride_float;
+
+ centroids[i * 3 + 0] = (va[0] + vb[0] + vc[0]) / 3.f;
+ centroids[i * 3 + 1] = (va[1] + vb[1] + vc[1]) / 3.f;
+ centroids[i * 3 + 2] = (va[2] + vb[2] + vc[2]) / 3.f;
+ }
+
+ unsigned int* remap = allocator.allocate<unsigned int>(face_count);
+
+ meshopt_spatialSortRemap(remap, centroids, face_count, sizeof(float) * 3);
+
+ // support in-order remap
+ if (destination == indices)
+ {
+ unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
+ memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
+ indices = indices_copy;
+ }
+
+ for (size_t i = 0; i < face_count; ++i)
+ {
+ unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
+ unsigned int r = remap[i];
+
+ destination[r * 3 + 0] = a;
+ destination[r * 3 + 1] = b;
+ destination[r * 3 + 2] = c;
+ }
+}
diff --git a/thirdparty/meshoptimizer/stripifier.cpp b/thirdparty/meshoptimizer/stripifier.cpp
new file mode 100644
index 0000000000..8ce17ef3dc
--- /dev/null
+++ b/thirdparty/meshoptimizer/stripifier.cpp
@@ -0,0 +1,295 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+#include <assert.h>
+#include <limits.h>
+#include <string.h>
+
+// This work is based on:
+// Francine Evans, Steven Skiena and Amitabh Varshney. Optimizing Triangle Strips for Fast Rendering. 1996
+namespace meshopt
+{
+
+static unsigned int findStripFirst(const unsigned int buffer[][3], unsigned int buffer_size, const unsigned int* valence)
+{
+ unsigned int index = 0;
+ unsigned int iv = ~0u;
+
+ for (size_t i = 0; i < buffer_size; ++i)
+ {
+ unsigned int va = valence[buffer[i][0]], vb = valence[buffer[i][1]], vc = valence[buffer[i][2]];
+ unsigned int v = (va < vb && va < vc) ? va : (vb < vc) ? vb : vc;
+
+ if (v < iv)
+ {
+ index = unsigned(i);
+ iv = v;
+ }
+ }
+
+ return index;
+}
+
+static int findStripNext(const unsigned int buffer[][3], unsigned int buffer_size, unsigned int e0, unsigned int e1)
+{
+ for (size_t i = 0; i < buffer_size; ++i)
+ {
+ unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2];
+
+ if (e0 == a && e1 == b)
+ return (int(i) << 2) | 2;
+ else if (e0 == b && e1 == c)
+ return (int(i) << 2) | 0;
+ else if (e0 == c && e1 == a)
+ return (int(i) << 2) | 1;
+ }
+
+ return -1;
+}
+
+} // namespace meshopt
+
+size_t meshopt_stripify(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int restart_index)
+{
+ assert(destination != indices);
+ assert(index_count % 3 == 0);
+
+ using namespace meshopt;
+
+ meshopt_Allocator allocator;
+
+ const size_t buffer_capacity = 8;
+
+ unsigned int buffer[buffer_capacity][3] = {};
+ unsigned int buffer_size = 0;
+
+ size_t index_offset = 0;
+
+ unsigned int strip[2] = {};
+ unsigned int parity = 0;
+
+ size_t strip_size = 0;
+
+ // compute vertex valence; this is used to prioritize starting triangle for strips
+ unsigned int* valence = allocator.allocate<unsigned int>(vertex_count);
+ memset(valence, 0, vertex_count * sizeof(unsigned int));
+
+ for (size_t i = 0; i < index_count; ++i)
+ {
+ unsigned int index = indices[i];
+ assert(index < vertex_count);
+
+ valence[index]++;
+ }
+
+ int next = -1;
+
+ while (buffer_size > 0 || index_offset < index_count)
+ {
+ assert(next < 0 || (size_t(next >> 2) < buffer_size && (next & 3) < 3));
+
+ // fill triangle buffer
+ while (buffer_size < buffer_capacity && index_offset < index_count)
+ {
+ buffer[buffer_size][0] = indices[index_offset + 0];
+ buffer[buffer_size][1] = indices[index_offset + 1];
+ buffer[buffer_size][2] = indices[index_offset + 2];
+
+ buffer_size++;
+ index_offset += 3;
+ }
+
+ assert(buffer_size > 0);
+
+ if (next >= 0)
+ {
+ unsigned int i = next >> 2;
+ unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2];
+ unsigned int v = buffer[i][next & 3];
+
+ // ordered removal from the buffer
+ memmove(buffer[i], buffer[i + 1], (buffer_size - i - 1) * sizeof(buffer[0]));
+ buffer_size--;
+
+ // update vertex valences for strip start heuristic
+ valence[a]--;
+ valence[b]--;
+ valence[c]--;
+
+ // find next triangle (note that edge order flips on every iteration)
+ // in some cases we need to perform a swap to pick a different outgoing triangle edge
+ // for [a b c], the default strip edge is [b c], but we might want to use [a c]
+ int cont = findStripNext(buffer, buffer_size, parity ? strip[1] : v, parity ? v : strip[1]);
+ int swap = cont < 0 ? findStripNext(buffer, buffer_size, parity ? v : strip[0], parity ? strip[0] : v) : -1;
+
+ if (cont < 0 && swap >= 0)
+ {
+ // [a b c] => [a b a c]
+ destination[strip_size++] = strip[0];
+ destination[strip_size++] = v;
+
+ // next strip has same winding
+ // ? a b => b a v
+ strip[1] = v;
+
+ next = swap;
+ }
+ else
+ {
+ // emit the next vertex in the strip
+ destination[strip_size++] = v;
+
+ // next strip has flipped winding
+ strip[0] = strip[1];
+ strip[1] = v;
+ parity ^= 1;
+
+ next = cont;
+ }
+ }
+ else
+ {
+ // if we didn't find anything, we need to find the next new triangle
+ // we use a heuristic to maximize the strip length
+ unsigned int i = findStripFirst(buffer, buffer_size, &valence[0]);
+ unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2];
+
+ // ordered removal from the buffer
+ memmove(buffer[i], buffer[i + 1], (buffer_size - i - 1) * sizeof(buffer[0]));
+ buffer_size--;
+
+ // update vertex valences for strip start heuristic
+ valence[a]--;
+ valence[b]--;
+ valence[c]--;
+
+ // we need to pre-rotate the triangle so that we will find a match in the existing buffer on the next iteration
+ int ea = findStripNext(buffer, buffer_size, c, b);
+ int eb = findStripNext(buffer, buffer_size, a, c);
+ int ec = findStripNext(buffer, buffer_size, b, a);
+
+ // in some cases we can have several matching edges; since we can pick any edge, we pick the one with the smallest
+ // triangle index in the buffer. this reduces the effect of stripification on ACMR and additionally - for unclear
+ // reasons - slightly improves the stripification efficiency
+ int mine = INT_MAX;
+ mine = (ea >= 0 && mine > ea) ? ea : mine;
+ mine = (eb >= 0 && mine > eb) ? eb : mine;
+ mine = (ec >= 0 && mine > ec) ? ec : mine;
+
+ if (ea == mine)
+ {
+ // keep abc
+ next = ea;
+ }
+ else if (eb == mine)
+ {
+ // abc -> bca
+ unsigned int t = a;
+ a = b, b = c, c = t;
+
+ next = eb;
+ }
+ else if (ec == mine)
+ {
+ // abc -> cab
+ unsigned int t = c;
+ c = b, b = a, a = t;
+
+ next = ec;
+ }
+
+ if (restart_index)
+ {
+ if (strip_size)
+ destination[strip_size++] = restart_index;
+
+ destination[strip_size++] = a;
+ destination[strip_size++] = b;
+ destination[strip_size++] = c;
+
+ // new strip always starts with the same edge winding
+ strip[0] = b;
+ strip[1] = c;
+ parity = 1;
+ }
+ else
+ {
+ if (strip_size)
+ {
+ // connect last strip using degenerate triangles
+ destination[strip_size++] = strip[1];
+ destination[strip_size++] = a;
+ }
+
+ // note that we may need to flip the emitted triangle based on parity
+ // we always end up with outgoing edge "cb" in the end
+ unsigned int e0 = parity ? c : b;
+ unsigned int e1 = parity ? b : c;
+
+ destination[strip_size++] = a;
+ destination[strip_size++] = e0;
+ destination[strip_size++] = e1;
+
+ strip[0] = e0;
+ strip[1] = e1;
+ parity ^= 1;
+ }
+ }
+ }
+
+ return strip_size;
+}
+
+size_t meshopt_stripifyBound(size_t index_count)
+{
+ assert(index_count % 3 == 0);
+
+ // worst case without restarts is 2 degenerate indices and 3 indices per triangle
+ // worst case with restarts is 1 restart index and 3 indices per triangle
+ return (index_count / 3) * 5;
+}
+
+size_t meshopt_unstripify(unsigned int* destination, const unsigned int* indices, size_t index_count, unsigned int restart_index)
+{
+ assert(destination != indices);
+
+ size_t offset = 0;
+ size_t start = 0;
+
+ for (size_t i = 0; i < index_count; ++i)
+ {
+ if (restart_index && indices[i] == restart_index)
+ {
+ start = i + 1;
+ }
+ else if (i - start >= 2)
+ {
+ unsigned int a = indices[i - 2], b = indices[i - 1], c = indices[i];
+
+ // flip winding for odd triangles
+ if ((i - start) & 1)
+ {
+ unsigned int t = a;
+ a = b, b = t;
+ }
+
+ // although we use restart indices, strip swaps still produce degenerate triangles, so skip them
+ if (a != b && a != c && b != c)
+ {
+ destination[offset + 0] = a;
+ destination[offset + 1] = b;
+ destination[offset + 2] = c;
+ offset += 3;
+ }
+ }
+ }
+
+ return offset;
+}
+
+size_t meshopt_unstripifyBound(size_t index_count)
+{
+ assert(index_count == 0 || index_count >= 3);
+
+ return (index_count == 0) ? 0 : (index_count - 2) * 3;
+}
diff --git a/thirdparty/meshoptimizer/vcacheanalyzer.cpp b/thirdparty/meshoptimizer/vcacheanalyzer.cpp
new file mode 100644
index 0000000000..3682743820
--- /dev/null
+++ b/thirdparty/meshoptimizer/vcacheanalyzer.cpp
@@ -0,0 +1,73 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+#include <assert.h>
+#include <string.h>
+
+meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int primgroup_size)
+{
+ assert(index_count % 3 == 0);
+ assert(cache_size >= 3);
+ assert(warp_size == 0 || warp_size >= 3);
+
+ meshopt_Allocator allocator;
+
+ meshopt_VertexCacheStatistics result = {};
+
+ unsigned int warp_offset = 0;
+ unsigned int primgroup_offset = 0;
+
+ unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count);
+ memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
+
+ unsigned int timestamp = cache_size + 1;
+
+ for (size_t i = 0; i < index_count; i += 3)
+ {
+ unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
+ assert(a < vertex_count && b < vertex_count && c < vertex_count);
+
+ bool ac = (timestamp - cache_timestamps[a]) > cache_size;
+ bool bc = (timestamp - cache_timestamps[b]) > cache_size;
+ bool cc = (timestamp - cache_timestamps[c]) > cache_size;
+
+ // flush cache if triangle doesn't fit into warp or into the primitive buffer
+ if ((primgroup_size && primgroup_offset == primgroup_size) || (warp_size && warp_offset + ac + bc + cc > warp_size))
+ {
+ result.warps_executed += warp_offset > 0;
+
+ warp_offset = 0;
+ primgroup_offset = 0;
+
+ // reset cache
+ timestamp += cache_size + 1;
+ }
+
+ // update cache and add vertices to warp
+ for (int j = 0; j < 3; ++j)
+ {
+ unsigned int index = indices[i + j];
+
+ if (timestamp - cache_timestamps[index] > cache_size)
+ {
+ cache_timestamps[index] = timestamp++;
+ result.vertices_transformed++;
+ warp_offset++;
+ }
+ }
+
+ primgroup_offset++;
+ }
+
+ size_t unique_vertex_count = 0;
+
+ for (size_t i = 0; i < vertex_count; ++i)
+ unique_vertex_count += cache_timestamps[i] > 0;
+
+ result.warps_executed += warp_offset > 0;
+
+ result.acmr = index_count == 0 ? 0 : float(result.vertices_transformed) / float(index_count / 3);
+ result.atvr = unique_vertex_count == 0 ? 0 : float(result.vertices_transformed) / float(unique_vertex_count);
+
+ return result;
+}
diff --git a/thirdparty/meshoptimizer/vcacheoptimizer.cpp b/thirdparty/meshoptimizer/vcacheoptimizer.cpp
new file mode 100644
index 0000000000..fb8ade4b77
--- /dev/null
+++ b/thirdparty/meshoptimizer/vcacheoptimizer.cpp
@@ -0,0 +1,473 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+#include <assert.h>
+#include <string.h>
+
+// This work is based on:
+// Tom Forsyth. Linear-Speed Vertex Cache Optimisation. 2006
+// Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007
+namespace meshopt
+{
+
+const size_t kCacheSizeMax = 16;
+const size_t kValenceMax = 8;
+
+struct VertexScoreTable
+{
+ float cache[1 + kCacheSizeMax];
+ float live[1 + kValenceMax];
+};
+
+// Tuned to minimize the ACMR of a GPU that has a cache profile similar to NVidia and AMD
+static const VertexScoreTable kVertexScoreTable = {
+ {0.f, 0.779f, 0.791f, 0.789f, 0.981f, 0.843f, 0.726f, 0.847f, 0.882f, 0.867f, 0.799f, 0.642f, 0.613f, 0.600f, 0.568f, 0.372f, 0.234f},
+ {0.f, 0.995f, 0.713f, 0.450f, 0.404f, 0.059f, 0.005f, 0.147f, 0.006f},
+};
+
+// Tuned to minimize the encoded index buffer size
+static const VertexScoreTable kVertexScoreTableStrip = {
+ {0.f, 1.000f, 1.000f, 1.000f, 0.453f, 0.561f, 0.490f, 0.459f, 0.179f, 0.526f, 0.000f, 0.227f, 0.184f, 0.490f, 0.112f, 0.050f, 0.131f},
+ {0.f, 0.956f, 0.786f, 0.577f, 0.558f, 0.618f, 0.549f, 0.499f, 0.489f},
+};
+
+struct TriangleAdjacency
+{
+ unsigned int* counts;
+ unsigned int* offsets;
+ unsigned int* data;
+};
+
+static void buildTriangleAdjacency(TriangleAdjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator)
+{
+ size_t face_count = index_count / 3;
+
+ // allocate arrays
+ adjacency.counts = allocator.allocate<unsigned int>(vertex_count);
+ adjacency.offsets = allocator.allocate<unsigned int>(vertex_count);
+ adjacency.data = allocator.allocate<unsigned int>(index_count);
+
+ // fill triangle counts
+ memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int));
+
+ for (size_t i = 0; i < index_count; ++i)
+ {
+ assert(indices[i] < vertex_count);
+
+ adjacency.counts[indices[i]]++;
+ }
+
+ // fill offset table
+ unsigned int offset = 0;
+
+ for (size_t i = 0; i < vertex_count; ++i)
+ {
+ adjacency.offsets[i] = offset;
+ offset += adjacency.counts[i];
+ }
+
+ assert(offset == index_count);
+
+ // fill triangle data
+ for (size_t i = 0; i < face_count; ++i)
+ {
+ unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
+
+ adjacency.data[adjacency.offsets[a]++] = unsigned(i);
+ adjacency.data[adjacency.offsets[b]++] = unsigned(i);
+ adjacency.data[adjacency.offsets[c]++] = unsigned(i);
+ }
+
+ // fix offsets that have been disturbed by the previous pass
+ for (size_t i = 0; i < vertex_count; ++i)
+ {
+ assert(adjacency.offsets[i] >= adjacency.counts[i]);
+
+ adjacency.offsets[i] -= adjacency.counts[i];
+ }
+}
+
+static unsigned int getNextVertexDeadEnd(const unsigned int* dead_end, unsigned int& dead_end_top, unsigned int& input_cursor, const unsigned int* live_triangles, size_t vertex_count)
+{
+ // check dead-end stack
+ while (dead_end_top)
+ {
+ unsigned int vertex = dead_end[--dead_end_top];
+
+ if (live_triangles[vertex] > 0)
+ return vertex;
+ }
+
+ // input order
+ while (input_cursor < vertex_count)
+ {
+ if (live_triangles[input_cursor] > 0)
+ return input_cursor;
+
+ ++input_cursor;
+ }
+
+ return ~0u;
+}
+
+static unsigned int getNextVertexNeighbour(const unsigned int* next_candidates_begin, const unsigned int* next_candidates_end, const unsigned int* live_triangles, const unsigned int* cache_timestamps, unsigned int timestamp, unsigned int cache_size)
+{
+ unsigned int best_candidate = ~0u;
+ int best_priority = -1;
+
+ for (const unsigned int* next_candidate = next_candidates_begin; next_candidate != next_candidates_end; ++next_candidate)
+ {
+ unsigned int vertex = *next_candidate;
+
+ // otherwise we don't need to process it
+ if (live_triangles[vertex] > 0)
+ {
+ int priority = 0;
+
+ // will it be in cache after fanning?
+ if (2 * live_triangles[vertex] + timestamp - cache_timestamps[vertex] <= cache_size)
+ {
+ priority = timestamp - cache_timestamps[vertex]; // position in cache
+ }
+
+ if (priority > best_priority)
+ {
+ best_candidate = vertex;
+ best_priority = priority;
+ }
+ }
+ }
+
+ return best_candidate;
+}
+
+static float vertexScore(const VertexScoreTable* table, int cache_position, unsigned int live_triangles)
+{
+ assert(cache_position >= -1 && cache_position < int(kCacheSizeMax));
+
+ unsigned int live_triangles_clamped = live_triangles < kValenceMax ? live_triangles : kValenceMax;
+
+ return table->cache[1 + cache_position] + table->live[live_triangles_clamped];
+}
+
+static unsigned int getNextTriangleDeadEnd(unsigned int& input_cursor, const unsigned char* emitted_flags, size_t face_count)
+{
+ // input order
+ while (input_cursor < face_count)
+ {
+ if (!emitted_flags[input_cursor])
+ return input_cursor;
+
+ ++input_cursor;
+ }
+
+ return ~0u;
+}
+
+} // namespace meshopt
+
+void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const meshopt::VertexScoreTable* table)
+{
+ using namespace meshopt;
+
+ assert(index_count % 3 == 0);
+
+ meshopt_Allocator allocator;
+
+ // guard for empty meshes
+ if (index_count == 0 || vertex_count == 0)
+ return;
+
+ // support in-place optimization
+ if (destination == indices)
+ {
+ unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
+ memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
+ indices = indices_copy;
+ }
+
+ unsigned int cache_size = 16;
+ assert(cache_size <= kCacheSizeMax);
+
+ size_t face_count = index_count / 3;
+
+ // build adjacency information
+ TriangleAdjacency adjacency = {};
+ buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
+
+ // live triangle counts
+ unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count);
+ memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int));
+
+ // emitted flags
+ unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count);
+ memset(emitted_flags, 0, face_count);
+
+ // compute initial vertex scores
+ float* vertex_scores = allocator.allocate<float>(vertex_count);
+
+ for (size_t i = 0; i < vertex_count; ++i)
+ vertex_scores[i] = vertexScore(table, -1, live_triangles[i]);
+
+ // compute triangle scores
+ float* triangle_scores = allocator.allocate<float>(face_count);
+
+ for (size_t i = 0; i < face_count; ++i)
+ {
+ unsigned int a = indices[i * 3 + 0];
+ unsigned int b = indices[i * 3 + 1];
+ unsigned int c = indices[i * 3 + 2];
+
+ triangle_scores[i] = vertex_scores[a] + vertex_scores[b] + vertex_scores[c];
+ }
+
+ unsigned int cache_holder[2 * (kCacheSizeMax + 3)];
+ unsigned int* cache = cache_holder;
+ unsigned int* cache_new = cache_holder + kCacheSizeMax + 3;
+ size_t cache_count = 0;
+
+ unsigned int current_triangle = 0;
+ unsigned int input_cursor = 1;
+
+ unsigned int output_triangle = 0;
+
+ while (current_triangle != ~0u)
+ {
+ assert(output_triangle < face_count);
+
+ unsigned int a = indices[current_triangle * 3 + 0];
+ unsigned int b = indices[current_triangle * 3 + 1];
+ unsigned int c = indices[current_triangle * 3 + 2];
+
+ // output indices
+ destination[output_triangle * 3 + 0] = a;
+ destination[output_triangle * 3 + 1] = b;
+ destination[output_triangle * 3 + 2] = c;
+ output_triangle++;
+
+ // update emitted flags
+ emitted_flags[current_triangle] = true;
+ triangle_scores[current_triangle] = 0;
+
+ // new triangle
+ size_t cache_write = 0;
+ cache_new[cache_write++] = a;
+ cache_new[cache_write++] = b;
+ cache_new[cache_write++] = c;
+
+ // old triangles
+ for (size_t i = 0; i < cache_count; ++i)
+ {
+ unsigned int index = cache[i];
+
+ if (index != a && index != b && index != c)
+ {
+ cache_new[cache_write++] = index;
+ }
+ }
+
+ unsigned int* cache_temp = cache;
+ cache = cache_new, cache_new = cache_temp;
+ cache_count = cache_write > cache_size ? cache_size : cache_write;
+
+ // update live triangle counts
+ live_triangles[a]--;
+ live_triangles[b]--;
+ live_triangles[c]--;
+
+ // remove emitted triangle from adjacency data
+ // this makes sure that we spend less time traversing these lists on subsequent iterations
+ for (size_t k = 0; k < 3; ++k)
+ {
+ unsigned int index = indices[current_triangle * 3 + k];
+
+ unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index];
+ size_t neighbours_size = adjacency.counts[index];
+
+ for (size_t i = 0; i < neighbours_size; ++i)
+ {
+ unsigned int tri = neighbours[i];
+
+ if (tri == current_triangle)
+ {
+ neighbours[i] = neighbours[neighbours_size - 1];
+ adjacency.counts[index]--;
+ break;
+ }
+ }
+ }
+
+ unsigned int best_triangle = ~0u;
+ float best_score = 0;
+
+ // update cache positions, vertex scores and triangle scores, and find next best triangle
+ for (size_t i = 0; i < cache_write; ++i)
+ {
+ unsigned int index = cache[i];
+
+ int cache_position = i >= cache_size ? -1 : int(i);
+
+ // update vertex score
+ float score = vertexScore(table, cache_position, live_triangles[index]);
+ float score_diff = score - vertex_scores[index];
+
+ vertex_scores[index] = score;
+
+ // update scores of vertex triangles
+ const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[index];
+ const unsigned int* neighbours_end = neighbours_begin + adjacency.counts[index];
+
+ for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it)
+ {
+ unsigned int tri = *it;
+ assert(!emitted_flags[tri]);
+
+ float tri_score = triangle_scores[tri] + score_diff;
+ assert(tri_score > 0);
+
+ if (best_score < tri_score)
+ {
+ best_triangle = tri;
+ best_score = tri_score;
+ }
+
+ triangle_scores[tri] = tri_score;
+ }
+ }
+
+ // step through input triangles in order if we hit a dead-end
+ current_triangle = best_triangle;
+
+ if (current_triangle == ~0u)
+ {
+ current_triangle = getNextTriangleDeadEnd(input_cursor, &emitted_flags[0], face_count);
+ }
+ }
+
+ assert(input_cursor == face_count);
+ assert(output_triangle == face_count);
+}
+
+void meshopt_optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count)
+{
+ meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTable);
+}
+
+void meshopt_optimizeVertexCacheStrip(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count)
+{
+ meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTableStrip);
+}
+
+void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size)
+{
+ using namespace meshopt;
+
+ assert(index_count % 3 == 0);
+ assert(cache_size >= 3);
+
+ meshopt_Allocator allocator;
+
+ // guard for empty meshes
+ if (index_count == 0 || vertex_count == 0)
+ return;
+
+ // support in-place optimization
+ if (destination == indices)
+ {
+ unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
+ memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
+ indices = indices_copy;
+ }
+
+ size_t face_count = index_count / 3;
+
+ // build adjacency information
+ TriangleAdjacency adjacency = {};
+ buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
+
+ // live triangle counts
+ unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count);
+ memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int));
+
+ // cache time stamps
+ unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count);
+ memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
+
+ // dead-end stack
+ unsigned int* dead_end = allocator.allocate<unsigned int>(index_count);
+ unsigned int dead_end_top = 0;
+
+ // emitted flags
+ unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count);
+ memset(emitted_flags, 0, face_count);
+
+ unsigned int current_vertex = 0;
+
+ unsigned int timestamp = cache_size + 1;
+ unsigned int input_cursor = 1; // vertex to restart from in case of dead-end
+
+ unsigned int output_triangle = 0;
+
+ while (current_vertex != ~0u)
+ {
+ const unsigned int* next_candidates_begin = &dead_end[0] + dead_end_top;
+
+ // emit all vertex neighbours
+ const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[current_vertex];
+ const unsigned int* neighbours_end = neighbours_begin + adjacency.counts[current_vertex];
+
+ for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it)
+ {
+ unsigned int triangle = *it;
+
+ if (!emitted_flags[triangle])
+ {
+ unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2];
+
+ // output indices
+ destination[output_triangle * 3 + 0] = a;
+ destination[output_triangle * 3 + 1] = b;
+ destination[output_triangle * 3 + 2] = c;
+ output_triangle++;
+
+ // update dead-end stack
+ dead_end[dead_end_top + 0] = a;
+ dead_end[dead_end_top + 1] = b;
+ dead_end[dead_end_top + 2] = c;
+ dead_end_top += 3;
+
+ // update live triangle counts
+ live_triangles[a]--;
+ live_triangles[b]--;
+ live_triangles[c]--;
+
+ // update cache info
+ // if vertex is not in cache, put it in cache
+ if (timestamp - cache_timestamps[a] > cache_size)
+ cache_timestamps[a] = timestamp++;
+
+ if (timestamp - cache_timestamps[b] > cache_size)
+ cache_timestamps[b] = timestamp++;
+
+ if (timestamp - cache_timestamps[c] > cache_size)
+ cache_timestamps[c] = timestamp++;
+
+ // update emitted flags
+ emitted_flags[triangle] = true;
+ }
+ }
+
+ // next candidates are the ones we pushed to dead-end stack just now
+ const unsigned int* next_candidates_end = &dead_end[0] + dead_end_top;
+
+ // get next vertex
+ current_vertex = getNextVertexNeighbour(next_candidates_begin, next_candidates_end, &live_triangles[0], &cache_timestamps[0], timestamp, cache_size);
+
+ if (current_vertex == ~0u)
+ {
+ current_vertex = getNextVertexDeadEnd(&dead_end[0], dead_end_top, input_cursor, &live_triangles[0], vertex_count);
+ }
+ }
+
+ assert(output_triangle == face_count);
+}
diff --git a/thirdparty/meshoptimizer/vertexcodec.cpp b/thirdparty/meshoptimizer/vertexcodec.cpp
new file mode 100644
index 0000000000..784c9a13db
--- /dev/null
+++ b/thirdparty/meshoptimizer/vertexcodec.cpp
@@ -0,0 +1,1265 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+#include <assert.h>
+#include <string.h>
+
+// The block below auto-detects SIMD ISA that can be used on the target platform
+#ifndef MESHOPTIMIZER_NO_SIMD
+
+// The SIMD implementation requires SSSE3, which can be enabled unconditionally through compiler settings
+#if defined(__AVX__) || defined(__SSSE3__)
+#define SIMD_SSE
+#endif
+
+// An experimental implementation using AVX512 instructions; it's only enabled when AVX512 is enabled through compiler settings
+#if defined(__AVX512VBMI2__) && defined(__AVX512VBMI__) && defined(__AVX512VL__) && defined(__POPCNT__)
+#undef SIMD_SSE
+#define SIMD_AVX
+#endif
+
+// MSVC supports compiling SSSE3 code regardless of compile options; we use a cpuid-based scalar fallback
+#if !defined(SIMD_SSE) && !defined(SIMD_AVX) && defined(_MSC_VER) && !defined(__clang__) && (defined(_M_IX86) || defined(_M_X64))
+#define SIMD_SSE
+#define SIMD_FALLBACK
+#endif
+
+// GCC 4.9+ and clang 3.8+ support targeting SIMD ISA from individual functions; we use a cpuid-based scalar fallback
+#if !defined(SIMD_SSE) && !defined(SIMD_AVX) && ((defined(__clang__) && __clang_major__ * 100 + __clang_minor__ >= 308) || (defined(__GNUC__) && __GNUC__ * 100 + __GNUC_MINOR__ >= 409)) && (defined(__i386__) || defined(__x86_64__))
+#define SIMD_SSE
+#define SIMD_FALLBACK
+#define SIMD_TARGET __attribute__((target("ssse3")))
+#endif
+
+// GCC/clang define these when NEON support is available
+#if defined(__ARM_NEON__) || defined(__ARM_NEON)
+#define SIMD_NEON
+#endif
+
+// On MSVC, we assume that ARM builds always target NEON-capable devices
+#if !defined(SIMD_NEON) && defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
+#define SIMD_NEON
+#endif
+
+// When targeting Wasm SIMD we can't use runtime cpuid checks so we unconditionally enable SIMD
+#if defined(__wasm_simd128__)
+#define SIMD_WASM
+#endif
+
+#ifndef SIMD_TARGET
+#define SIMD_TARGET
+#endif
+
+#endif // !MESHOPTIMIZER_NO_SIMD
+
+#ifdef SIMD_SSE
+#include <tmmintrin.h>
+#endif
+
+#if defined(SIMD_SSE) && defined(SIMD_FALLBACK)
+#ifdef _MSC_VER
+#include <intrin.h> // __cpuid
+#else
+#include <cpuid.h> // __cpuid
+#endif
+#endif
+
+#ifdef SIMD_AVX
+#include <immintrin.h>
+#endif
+
+#ifdef SIMD_NEON
+#if defined(_MSC_VER) && defined(_M_ARM64)
+#include <arm64_neon.h>
+#else
+#include <arm_neon.h>
+#endif
+#endif
+
+#ifdef SIMD_WASM
+#include <wasm_simd128.h>
+#endif
+
+#ifndef TRACE
+#define TRACE 0
+#endif
+
+#if TRACE
+#include <stdio.h>
+#endif
+
+#ifdef SIMD_WASM
+#define wasmx_splat_v32x4(v, i) wasm_v32x4_shuffle(v, v, i, i, i, i)
+#define wasmx_unpacklo_v8x16(a, b) wasm_v8x16_shuffle(a, b, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23)
+#define wasmx_unpackhi_v8x16(a, b) wasm_v8x16_shuffle(a, b, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31)
+#define wasmx_unpacklo_v16x8(a, b) wasm_v16x8_shuffle(a, b, 0, 8, 1, 9, 2, 10, 3, 11)
+#define wasmx_unpackhi_v16x8(a, b) wasm_v16x8_shuffle(a, b, 4, 12, 5, 13, 6, 14, 7, 15)
+#define wasmx_unpacklo_v64x2(a, b) wasm_v64x2_shuffle(a, b, 0, 2)
+#define wasmx_unpackhi_v64x2(a, b) wasm_v64x2_shuffle(a, b, 1, 3)
+#endif
+
+namespace meshopt
+{
+
+const unsigned char kVertexHeader = 0xa0;
+
+static int gEncodeVertexVersion = 0;
+
+const size_t kVertexBlockSizeBytes = 8192;
+const size_t kVertexBlockMaxSize = 256;
+const size_t kByteGroupSize = 16;
+const size_t kByteGroupDecodeLimit = 24;
+const size_t kTailMaxSize = 32;
+
+static size_t getVertexBlockSize(size_t vertex_size)
+{
+ // make sure the entire block fits into the scratch buffer
+ size_t result = kVertexBlockSizeBytes / vertex_size;
+
+ // align to byte group size; we encode each byte as a byte group
+ // if vertex block is misaligned, it results in wasted bytes, so just truncate the block size
+ result &= ~(kByteGroupSize - 1);
+
+ return (result < kVertexBlockMaxSize) ? result : kVertexBlockMaxSize;
+}
+
+inline unsigned char zigzag8(unsigned char v)
+{
+ return ((signed char)(v) >> 7) ^ (v << 1);
+}
+
+inline unsigned char unzigzag8(unsigned char v)
+{
+ return -(v & 1) ^ (v >> 1);
+}
+
+#if TRACE
+struct Stats
+{
+ size_t size;
+ size_t header;
+ size_t bitg[4];
+ size_t bitb[4];
+};
+
+Stats* bytestats;
+Stats vertexstats[256];
+#endif
+
+static bool encodeBytesGroupZero(const unsigned char* buffer)
+{
+ for (size_t i = 0; i < kByteGroupSize; ++i)
+ if (buffer[i])
+ return false;
+
+ return true;
+}
+
+static size_t encodeBytesGroupMeasure(const unsigned char* buffer, int bits)
+{
+ assert(bits >= 1 && bits <= 8);
+
+ if (bits == 1)
+ return encodeBytesGroupZero(buffer) ? 0 : size_t(-1);
+
+ if (bits == 8)
+ return kByteGroupSize;
+
+ size_t result = kByteGroupSize * bits / 8;
+
+ unsigned char sentinel = (1 << bits) - 1;
+
+ for (size_t i = 0; i < kByteGroupSize; ++i)
+ result += buffer[i] >= sentinel;
+
+ return result;
+}
+
+static unsigned char* encodeBytesGroup(unsigned char* data, const unsigned char* buffer, int bits)
+{
+ assert(bits >= 1 && bits <= 8);
+
+ if (bits == 1)
+ return data;
+
+ if (bits == 8)
+ {
+ memcpy(data, buffer, kByteGroupSize);
+ return data + kByteGroupSize;
+ }
+
+ size_t byte_size = 8 / bits;
+ assert(kByteGroupSize % byte_size == 0);
+
+ // fixed portion: bits bits for each value
+ // variable portion: full byte for each out-of-range value (using 1...1 as sentinel)
+ unsigned char sentinel = (1 << bits) - 1;
+
+ for (size_t i = 0; i < kByteGroupSize; i += byte_size)
+ {
+ unsigned char byte = 0;
+
+ for (size_t k = 0; k < byte_size; ++k)
+ {
+ unsigned char enc = (buffer[i + k] >= sentinel) ? sentinel : buffer[i + k];
+
+ byte <<= bits;
+ byte |= enc;
+ }
+
+ *data++ = byte;
+ }
+
+ for (size_t i = 0; i < kByteGroupSize; ++i)
+ {
+ if (buffer[i] >= sentinel)
+ {
+ *data++ = buffer[i];
+ }
+ }
+
+ return data;
+}
+
+static unsigned char* encodeBytes(unsigned char* data, unsigned char* data_end, const unsigned char* buffer, size_t buffer_size)
+{
+ assert(buffer_size % kByteGroupSize == 0);
+
+ unsigned char* header = data;
+
+ // round number of groups to 4 to get number of header bytes
+ size_t header_size = (buffer_size / kByteGroupSize + 3) / 4;
+
+ if (size_t(data_end - data) < header_size)
+ return 0;
+
+ data += header_size;
+
+ memset(header, 0, header_size);
+
+ for (size_t i = 0; i < buffer_size; i += kByteGroupSize)
+ {
+ if (size_t(data_end - data) < kByteGroupDecodeLimit)
+ return 0;
+
+ int best_bits = 8;
+ size_t best_size = encodeBytesGroupMeasure(buffer + i, 8);
+
+ for (int bits = 1; bits < 8; bits *= 2)
+ {
+ size_t size = encodeBytesGroupMeasure(buffer + i, bits);
+
+ if (size < best_size)
+ {
+ best_bits = bits;
+ best_size = size;
+ }
+ }
+
+ int bitslog2 = (best_bits == 1) ? 0 : (best_bits == 2) ? 1 : (best_bits == 4) ? 2 : 3;
+ assert((1 << bitslog2) == best_bits);
+
+ size_t header_offset = i / kByteGroupSize;
+
+ header[header_offset / 4] |= bitslog2 << ((header_offset % 4) * 2);
+
+ unsigned char* next = encodeBytesGroup(data, buffer + i, best_bits);
+
+ assert(data + best_size == next);
+ data = next;
+
+#if TRACE > 1
+ bytestats->bitg[bitslog2]++;
+ bytestats->bitb[bitslog2] += best_size;
+#endif
+ }
+
+#if TRACE > 1
+ bytestats->header += header_size;
+#endif
+
+ return data;
+}
+
+static unsigned char* encodeVertexBlock(unsigned char* data, unsigned char* data_end, const unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, unsigned char last_vertex[256])
+{
+ assert(vertex_count > 0 && vertex_count <= kVertexBlockMaxSize);
+
+ unsigned char buffer[kVertexBlockMaxSize];
+ assert(sizeof(buffer) % kByteGroupSize == 0);
+
+ // we sometimes encode elements we didn't fill when rounding to kByteGroupSize
+ memset(buffer, 0, sizeof(buffer));
+
+ for (size_t k = 0; k < vertex_size; ++k)
+ {
+ size_t vertex_offset = k;
+
+ unsigned char p = last_vertex[k];
+
+ for (size_t i = 0; i < vertex_count; ++i)
+ {
+ buffer[i] = zigzag8(vertex_data[vertex_offset] - p);
+
+ p = vertex_data[vertex_offset];
+
+ vertex_offset += vertex_size;
+ }
+
+#if TRACE
+ const unsigned char* olddata = data;
+ bytestats = &vertexstats[k];
+#endif
+
+ data = encodeBytes(data, data_end, buffer, (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1));
+ if (!data)
+ return 0;
+
+#if TRACE
+ bytestats = 0;
+ vertexstats[k].size += data - olddata;
+#endif
+ }
+
+ memcpy(last_vertex, &vertex_data[vertex_size * (vertex_count - 1)], vertex_size);
+
+ return data;
+}
+
+#if defined(SIMD_FALLBACK) || (!defined(SIMD_SSE) && !defined(SIMD_NEON) && !defined(SIMD_AVX))
+static const unsigned char* decodeBytesGroup(const unsigned char* data, unsigned char* buffer, int bitslog2)
+{
+#define READ() byte = *data++
+#define NEXT(bits) enc = byte >> (8 - bits), byte <<= bits, encv = *data_var, *buffer++ = (enc == (1 << bits) - 1) ? encv : enc, data_var += (enc == (1 << bits) - 1)
+
+ unsigned char byte, enc, encv;
+ const unsigned char* data_var;
+
+ switch (bitslog2)
+ {
+ case 0:
+ memset(buffer, 0, kByteGroupSize);
+ return data;
+ case 1:
+ data_var = data + 4;
+
+ // 4 groups with 4 2-bit values in each byte
+ READ(), NEXT(2), NEXT(2), NEXT(2), NEXT(2);
+ READ(), NEXT(2), NEXT(2), NEXT(2), NEXT(2);
+ READ(), NEXT(2), NEXT(2), NEXT(2), NEXT(2);
+ READ(), NEXT(2), NEXT(2), NEXT(2), NEXT(2);
+
+ return data_var;
+ case 2:
+ data_var = data + 8;
+
+ // 8 groups with 2 4-bit values in each byte
+ READ(), NEXT(4), NEXT(4);
+ READ(), NEXT(4), NEXT(4);
+ READ(), NEXT(4), NEXT(4);
+ READ(), NEXT(4), NEXT(4);
+ READ(), NEXT(4), NEXT(4);
+ READ(), NEXT(4), NEXT(4);
+ READ(), NEXT(4), NEXT(4);
+ READ(), NEXT(4), NEXT(4);
+
+ return data_var;
+ case 3:
+ memcpy(buffer, data, kByteGroupSize);
+ return data + kByteGroupSize;
+ default:
+ assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value
+ return data;
+ }
+
+#undef READ
+#undef NEXT
+}
+
+static const unsigned char* decodeBytes(const unsigned char* data, const unsigned char* data_end, unsigned char* buffer, size_t buffer_size)
+{
+ assert(buffer_size % kByteGroupSize == 0);
+
+ const unsigned char* header = data;
+
+ // round number of groups to 4 to get number of header bytes
+ size_t header_size = (buffer_size / kByteGroupSize + 3) / 4;
+
+ if (size_t(data_end - data) < header_size)
+ return 0;
+
+ data += header_size;
+
+ for (size_t i = 0; i < buffer_size; i += kByteGroupSize)
+ {
+ if (size_t(data_end - data) < kByteGroupDecodeLimit)
+ return 0;
+
+ size_t header_offset = i / kByteGroupSize;
+
+ int bitslog2 = (header[header_offset / 4] >> ((header_offset % 4) * 2)) & 3;
+
+ data = decodeBytesGroup(data, buffer + i, bitslog2);
+ }
+
+ return data;
+}
+
+static const unsigned char* decodeVertexBlock(const unsigned char* data, const unsigned char* data_end, unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, unsigned char last_vertex[256])
+{
+ assert(vertex_count > 0 && vertex_count <= kVertexBlockMaxSize);
+
+ unsigned char buffer[kVertexBlockMaxSize];
+ unsigned char transposed[kVertexBlockSizeBytes];
+
+ size_t vertex_count_aligned = (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1);
+
+ for (size_t k = 0; k < vertex_size; ++k)
+ {
+ data = decodeBytes(data, data_end, buffer, vertex_count_aligned);
+ if (!data)
+ return 0;
+
+ size_t vertex_offset = k;
+
+ unsigned char p = last_vertex[k];
+
+ for (size_t i = 0; i < vertex_count; ++i)
+ {
+ unsigned char v = unzigzag8(buffer[i]) + p;
+
+ transposed[vertex_offset] = v;
+ p = v;
+
+ vertex_offset += vertex_size;
+ }
+ }
+
+ memcpy(vertex_data, transposed, vertex_count * vertex_size);
+
+ memcpy(last_vertex, &transposed[vertex_size * (vertex_count - 1)], vertex_size);
+
+ return data;
+}
+#endif
+
+#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM)
+static unsigned char kDecodeBytesGroupShuffle[256][8];
+static unsigned char kDecodeBytesGroupCount[256];
+
+#ifdef __wasm__
+__attribute__((cold)) // this saves 500 bytes in the output binary - we don't need to vectorize this loop!
+#endif
+static bool
+decodeBytesGroupBuildTables()
+{
+ for (int mask = 0; mask < 256; ++mask)
+ {
+ unsigned char shuffle[8];
+ unsigned char count = 0;
+
+ for (int i = 0; i < 8; ++i)
+ {
+ int maski = (mask >> i) & 1;
+ shuffle[i] = maski ? count : 0x80;
+ count += (unsigned char)(maski);
+ }
+
+ memcpy(kDecodeBytesGroupShuffle[mask], shuffle, 8);
+ kDecodeBytesGroupCount[mask] = count;
+ }
+
+ return true;
+}
+
+static bool gDecodeBytesGroupInitialized = decodeBytesGroupBuildTables();
+#endif
+
+#ifdef SIMD_SSE
+SIMD_TARGET
+static __m128i decodeShuffleMask(unsigned char mask0, unsigned char mask1)
+{
+ __m128i sm0 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(&kDecodeBytesGroupShuffle[mask0]));
+ __m128i sm1 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(&kDecodeBytesGroupShuffle[mask1]));
+ __m128i sm1off = _mm_set1_epi8(kDecodeBytesGroupCount[mask0]);
+
+ __m128i sm1r = _mm_add_epi8(sm1, sm1off);
+
+ return _mm_unpacklo_epi64(sm0, sm1r);
+}
+
+SIMD_TARGET
+static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2)
+{
+ switch (bitslog2)
+ {
+ case 0:
+ {
+ __m128i result = _mm_setzero_si128();
+
+ _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result);
+
+ return data;
+ }
+
+ case 1:
+ {
+#ifdef __GNUC__
+ typedef int __attribute__((aligned(1))) unaligned_int;
+#else
+ typedef int unaligned_int;
+#endif
+
+ __m128i sel2 = _mm_cvtsi32_si128(*reinterpret_cast<const unaligned_int*>(data));
+ __m128i rest = _mm_loadu_si128(reinterpret_cast<const __m128i*>(data + 4));
+
+ __m128i sel22 = _mm_unpacklo_epi8(_mm_srli_epi16(sel2, 4), sel2);
+ __m128i sel2222 = _mm_unpacklo_epi8(_mm_srli_epi16(sel22, 2), sel22);
+ __m128i sel = _mm_and_si128(sel2222, _mm_set1_epi8(3));
+
+ __m128i mask = _mm_cmpeq_epi8(sel, _mm_set1_epi8(3));
+ int mask16 = _mm_movemask_epi8(mask);
+ unsigned char mask0 = (unsigned char)(mask16 & 255);
+ unsigned char mask1 = (unsigned char)(mask16 >> 8);
+
+ __m128i shuf = decodeShuffleMask(mask0, mask1);
+
+ __m128i result = _mm_or_si128(_mm_shuffle_epi8(rest, shuf), _mm_andnot_si128(mask, sel));
+
+ _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result);
+
+ return data + 4 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1];
+ }
+
+ case 2:
+ {
+ __m128i sel4 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(data));
+ __m128i rest = _mm_loadu_si128(reinterpret_cast<const __m128i*>(data + 8));
+
+ __m128i sel44 = _mm_unpacklo_epi8(_mm_srli_epi16(sel4, 4), sel4);
+ __m128i sel = _mm_and_si128(sel44, _mm_set1_epi8(15));
+
+ __m128i mask = _mm_cmpeq_epi8(sel, _mm_set1_epi8(15));
+ int mask16 = _mm_movemask_epi8(mask);
+ unsigned char mask0 = (unsigned char)(mask16 & 255);
+ unsigned char mask1 = (unsigned char)(mask16 >> 8);
+
+ __m128i shuf = decodeShuffleMask(mask0, mask1);
+
+ __m128i result = _mm_or_si128(_mm_shuffle_epi8(rest, shuf), _mm_andnot_si128(mask, sel));
+
+ _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result);
+
+ return data + 8 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1];
+ }
+
+ case 3:
+ {
+ __m128i result = _mm_loadu_si128(reinterpret_cast<const __m128i*>(data));
+
+ _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result);
+
+ return data + 16;
+ }
+
+ default:
+ assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value
+ return data;
+ }
+}
+#endif
+
+#ifdef SIMD_AVX
+static const __m128i decodeBytesGroupConfig[] = {
+ _mm_set1_epi8(3),
+ _mm_set1_epi8(15),
+ _mm_setr_epi8(6, 4, 2, 0, 14, 12, 10, 8, 22, 20, 18, 16, 30, 28, 26, 24),
+ _mm_setr_epi8(4, 0, 12, 8, 20, 16, 28, 24, 36, 32, 44, 40, 52, 48, 60, 56),
+};
+
+static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2)
+{
+ switch (bitslog2)
+ {
+ case 0:
+ {
+ __m128i result = _mm_setzero_si128();
+
+ _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result);
+
+ return data;
+ }
+
+ case 1:
+ case 2:
+ {
+ const unsigned char* skip = data + (bitslog2 << 2);
+
+ __m128i selb = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(data));
+ __m128i rest = _mm_loadu_si128(reinterpret_cast<const __m128i*>(skip));
+
+ __m128i sent = decodeBytesGroupConfig[bitslog2 - 1];
+ __m128i ctrl = decodeBytesGroupConfig[bitslog2 + 1];
+
+ __m128i selw = _mm_shuffle_epi32(selb, 0x44);
+ __m128i sel = _mm_and_si128(sent, _mm_multishift_epi64_epi8(ctrl, selw));
+ __mmask16 mask16 = _mm_cmp_epi8_mask(sel, sent, _MM_CMPINT_EQ);
+
+ __m128i result = _mm_mask_expand_epi8(sel, mask16, rest);
+
+ _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result);
+
+ return skip + _mm_popcnt_u32(mask16);
+ }
+
+ case 3:
+ {
+ __m128i result = _mm_loadu_si128(reinterpret_cast<const __m128i*>(data));
+
+ _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result);
+
+ return data + 16;
+ }
+
+ default:
+ assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value
+ return data;
+ }
+}
+#endif
+
+#ifdef SIMD_NEON
+static uint8x16_t shuffleBytes(unsigned char mask0, unsigned char mask1, uint8x8_t rest0, uint8x8_t rest1)
+{
+ uint8x8_t sm0 = vld1_u8(kDecodeBytesGroupShuffle[mask0]);
+ uint8x8_t sm1 = vld1_u8(kDecodeBytesGroupShuffle[mask1]);
+
+ uint8x8_t r0 = vtbl1_u8(rest0, sm0);
+ uint8x8_t r1 = vtbl1_u8(rest1, sm1);
+
+ return vcombine_u8(r0, r1);
+}
+
+static void neonMoveMask(uint8x16_t mask, unsigned char& mask0, unsigned char& mask1)
+{
+ static const unsigned char byte_mask_data[16] = {1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128};
+
+ uint8x16_t byte_mask = vld1q_u8(byte_mask_data);
+ uint8x16_t masked = vandq_u8(mask, byte_mask);
+
+#ifdef __aarch64__
+ // aarch64 has horizontal sums; MSVC doesn't expose this via arm64_neon.h so this path is exclusive to clang/gcc
+ mask0 = vaddv_u8(vget_low_u8(masked));
+ mask1 = vaddv_u8(vget_high_u8(masked));
+#else
+ // we need horizontal sums of each half of masked, which can be done in 3 steps (yielding sums of sizes 2, 4, 8)
+ uint8x8_t sum1 = vpadd_u8(vget_low_u8(masked), vget_high_u8(masked));
+ uint8x8_t sum2 = vpadd_u8(sum1, sum1);
+ uint8x8_t sum3 = vpadd_u8(sum2, sum2);
+
+ mask0 = vget_lane_u8(sum3, 0);
+ mask1 = vget_lane_u8(sum3, 1);
+#endif
+}
+
+static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2)
+{
+ switch (bitslog2)
+ {
+ case 0:
+ {
+ uint8x16_t result = vdupq_n_u8(0);
+
+ vst1q_u8(buffer, result);
+
+ return data;
+ }
+
+ case 1:
+ {
+ uint8x8_t sel2 = vld1_u8(data);
+ uint8x8_t sel22 = vzip_u8(vshr_n_u8(sel2, 4), sel2).val[0];
+ uint8x8x2_t sel2222 = vzip_u8(vshr_n_u8(sel22, 2), sel22);
+ uint8x16_t sel = vandq_u8(vcombine_u8(sel2222.val[0], sel2222.val[1]), vdupq_n_u8(3));
+
+ uint8x16_t mask = vceqq_u8(sel, vdupq_n_u8(3));
+ unsigned char mask0, mask1;
+ neonMoveMask(mask, mask0, mask1);
+
+ uint8x8_t rest0 = vld1_u8(data + 4);
+ uint8x8_t rest1 = vld1_u8(data + 4 + kDecodeBytesGroupCount[mask0]);
+
+ uint8x16_t result = vbslq_u8(mask, shuffleBytes(mask0, mask1, rest0, rest1), sel);
+
+ vst1q_u8(buffer, result);
+
+ return data + 4 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1];
+ }
+
+ case 2:
+ {
+ uint8x8_t sel4 = vld1_u8(data);
+ uint8x8x2_t sel44 = vzip_u8(vshr_n_u8(sel4, 4), vand_u8(sel4, vdup_n_u8(15)));
+ uint8x16_t sel = vcombine_u8(sel44.val[0], sel44.val[1]);
+
+ uint8x16_t mask = vceqq_u8(sel, vdupq_n_u8(15));
+ unsigned char mask0, mask1;
+ neonMoveMask(mask, mask0, mask1);
+
+ uint8x8_t rest0 = vld1_u8(data + 8);
+ uint8x8_t rest1 = vld1_u8(data + 8 + kDecodeBytesGroupCount[mask0]);
+
+ uint8x16_t result = vbslq_u8(mask, shuffleBytes(mask0, mask1, rest0, rest1), sel);
+
+ vst1q_u8(buffer, result);
+
+ return data + 8 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1];
+ }
+
+ case 3:
+ {
+ uint8x16_t result = vld1q_u8(data);
+
+ vst1q_u8(buffer, result);
+
+ return data + 16;
+ }
+
+ default:
+ assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value
+ return data;
+ }
+}
+#endif
+
+#ifdef SIMD_WASM
+SIMD_TARGET
+static v128_t decodeShuffleMask(unsigned char mask0, unsigned char mask1)
+{
+ v128_t sm0 = wasm_v128_load(&kDecodeBytesGroupShuffle[mask0]);
+ v128_t sm1 = wasm_v128_load(&kDecodeBytesGroupShuffle[mask1]);
+
+ v128_t sm1off = wasm_v128_load(&kDecodeBytesGroupCount[mask0]);
+ sm1off = wasm_v8x16_shuffle(sm1off, sm1off, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+
+ v128_t sm1r = wasm_i8x16_add(sm1, sm1off);
+
+ return wasmx_unpacklo_v64x2(sm0, sm1r);
+}
+
+SIMD_TARGET
+static void wasmMoveMask(v128_t mask, unsigned char& mask0, unsigned char& mask1)
+{
+ v128_t mask_0 = wasm_v32x4_shuffle(mask, mask, 0, 2, 1, 3);
+
+ uint64_t mask_1a = wasm_i64x2_extract_lane(mask_0, 0) & 0x0804020108040201ull;
+ uint64_t mask_1b = wasm_i64x2_extract_lane(mask_0, 1) & 0x8040201080402010ull;
+
+ // TODO: This can use v8x16_bitmask in the future
+ uint64_t mask_2 = mask_1a | mask_1b;
+ uint64_t mask_4 = mask_2 | (mask_2 >> 16);
+ uint64_t mask_8 = mask_4 | (mask_4 >> 8);
+
+ mask0 = uint8_t(mask_8);
+ mask1 = uint8_t(mask_8 >> 32);
+}
+
+SIMD_TARGET
+static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2)
+{
+ unsigned char byte, enc, encv;
+ const unsigned char* data_var;
+
+ switch (bitslog2)
+ {
+ case 0:
+ {
+ v128_t result = wasm_i8x16_splat(0);
+
+ wasm_v128_store(buffer, result);
+
+ return data;
+ }
+
+ case 1:
+ {
+ v128_t sel2 = wasm_v128_load(data);
+ v128_t rest = wasm_v128_load(data + 4);
+
+ v128_t sel22 = wasmx_unpacklo_v8x16(wasm_i16x8_shr(sel2, 4), sel2);
+ v128_t sel2222 = wasmx_unpacklo_v8x16(wasm_i16x8_shr(sel22, 2), sel22);
+ v128_t sel = wasm_v128_and(sel2222, wasm_i8x16_splat(3));
+
+ v128_t mask = wasm_i8x16_eq(sel, wasm_i8x16_splat(3));
+
+ unsigned char mask0, mask1;
+ wasmMoveMask(mask, mask0, mask1);
+
+ v128_t shuf = decodeShuffleMask(mask0, mask1);
+
+ v128_t result = wasm_v128_bitselect(wasm_v8x16_swizzle(rest, shuf), sel, mask);
+
+ wasm_v128_store(buffer, result);
+
+ return data + 4 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1];
+ }
+
+ case 2:
+ {
+ v128_t sel4 = wasm_v128_load(data);
+ v128_t rest = wasm_v128_load(data + 8);
+
+ v128_t sel44 = wasmx_unpacklo_v8x16(wasm_i16x8_shr(sel4, 4), sel4);
+ v128_t sel = wasm_v128_and(sel44, wasm_i8x16_splat(15));
+
+ v128_t mask = wasm_i8x16_eq(sel, wasm_i8x16_splat(15));
+
+ unsigned char mask0, mask1;
+ wasmMoveMask(mask, mask0, mask1);
+
+ v128_t shuf = decodeShuffleMask(mask0, mask1);
+
+ v128_t result = wasm_v128_bitselect(wasm_v8x16_swizzle(rest, shuf), sel, mask);
+
+ wasm_v128_store(buffer, result);
+
+ return data + 8 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1];
+ }
+
+ case 3:
+ {
+ v128_t result = wasm_v128_load(data);
+
+ wasm_v128_store(buffer, result);
+
+ return data + 16;
+ }
+
+ default:
+ assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value
+ return data;
+ }
+}
+#endif
+
+#if defined(SIMD_SSE) || defined(SIMD_AVX)
+SIMD_TARGET
+static void transpose8(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3)
+{
+ __m128i t0 = _mm_unpacklo_epi8(x0, x1);
+ __m128i t1 = _mm_unpackhi_epi8(x0, x1);
+ __m128i t2 = _mm_unpacklo_epi8(x2, x3);
+ __m128i t3 = _mm_unpackhi_epi8(x2, x3);
+
+ x0 = _mm_unpacklo_epi16(t0, t2);
+ x1 = _mm_unpackhi_epi16(t0, t2);
+ x2 = _mm_unpacklo_epi16(t1, t3);
+ x3 = _mm_unpackhi_epi16(t1, t3);
+}
+
+SIMD_TARGET
+static __m128i unzigzag8(__m128i v)
+{
+ __m128i xl = _mm_sub_epi8(_mm_setzero_si128(), _mm_and_si128(v, _mm_set1_epi8(1)));
+ __m128i xr = _mm_and_si128(_mm_srli_epi16(v, 1), _mm_set1_epi8(127));
+
+ return _mm_xor_si128(xl, xr);
+}
+#endif
+
+#ifdef SIMD_NEON
+static void transpose8(uint8x16_t& x0, uint8x16_t& x1, uint8x16_t& x2, uint8x16_t& x3)
+{
+ uint8x16x2_t t01 = vzipq_u8(x0, x1);
+ uint8x16x2_t t23 = vzipq_u8(x2, x3);
+
+ uint16x8x2_t x01 = vzipq_u16(vreinterpretq_u16_u8(t01.val[0]), vreinterpretq_u16_u8(t23.val[0]));
+ uint16x8x2_t x23 = vzipq_u16(vreinterpretq_u16_u8(t01.val[1]), vreinterpretq_u16_u8(t23.val[1]));
+
+ x0 = vreinterpretq_u8_u16(x01.val[0]);
+ x1 = vreinterpretq_u8_u16(x01.val[1]);
+ x2 = vreinterpretq_u8_u16(x23.val[0]);
+ x3 = vreinterpretq_u8_u16(x23.val[1]);
+}
+
+static uint8x16_t unzigzag8(uint8x16_t v)
+{
+ uint8x16_t xl = vreinterpretq_u8_s8(vnegq_s8(vreinterpretq_s8_u8(vandq_u8(v, vdupq_n_u8(1)))));
+ uint8x16_t xr = vshrq_n_u8(v, 1);
+
+ return veorq_u8(xl, xr);
+}
+#endif
+
+#ifdef SIMD_WASM
+SIMD_TARGET
+static void transpose8(v128_t& x0, v128_t& x1, v128_t& x2, v128_t& x3)
+{
+ v128_t t0 = wasmx_unpacklo_v8x16(x0, x1);
+ v128_t t1 = wasmx_unpackhi_v8x16(x0, x1);
+ v128_t t2 = wasmx_unpacklo_v8x16(x2, x3);
+ v128_t t3 = wasmx_unpackhi_v8x16(x2, x3);
+
+ x0 = wasmx_unpacklo_v16x8(t0, t2);
+ x1 = wasmx_unpackhi_v16x8(t0, t2);
+ x2 = wasmx_unpacklo_v16x8(t1, t3);
+ x3 = wasmx_unpackhi_v16x8(t1, t3);
+}
+
+SIMD_TARGET
+static v128_t unzigzag8(v128_t v)
+{
+ v128_t xl = wasm_i8x16_neg(wasm_v128_and(v, wasm_i8x16_splat(1)));
+ v128_t xr = wasm_u8x16_shr(v, 1);
+
+ return wasm_v128_xor(xl, xr);
+}
+#endif
+
+#if defined(SIMD_SSE) || defined(SIMD_AVX) || defined(SIMD_NEON) || defined(SIMD_WASM)
+SIMD_TARGET
+static const unsigned char* decodeBytesSimd(const unsigned char* data, const unsigned char* data_end, unsigned char* buffer, size_t buffer_size)
+{
+ assert(buffer_size % kByteGroupSize == 0);
+ assert(kByteGroupSize == 16);
+
+ const unsigned char* header = data;
+
+ // round number of groups to 4 to get number of header bytes
+ size_t header_size = (buffer_size / kByteGroupSize + 3) / 4;
+
+ if (size_t(data_end - data) < header_size)
+ return 0;
+
+ data += header_size;
+
+ size_t i = 0;
+
+ // fast-path: process 4 groups at a time, do a shared bounds check - each group reads <=24b
+ for (; i + kByteGroupSize * 4 <= buffer_size && size_t(data_end - data) >= kByteGroupDecodeLimit * 4; i += kByteGroupSize * 4)
+ {
+ size_t header_offset = i / kByteGroupSize;
+ unsigned char header_byte = header[header_offset / 4];
+
+ data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 0, (header_byte >> 0) & 3);
+ data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 1, (header_byte >> 2) & 3);
+ data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 2, (header_byte >> 4) & 3);
+ data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 3, (header_byte >> 6) & 3);
+ }
+
+ // slow-path: process remaining groups
+ for (; i < buffer_size; i += kByteGroupSize)
+ {
+ if (size_t(data_end - data) < kByteGroupDecodeLimit)
+ return 0;
+
+ size_t header_offset = i / kByteGroupSize;
+
+ int bitslog2 = (header[header_offset / 4] >> ((header_offset % 4) * 2)) & 3;
+
+ data = decodeBytesGroupSimd(data, buffer + i, bitslog2);
+ }
+
+ return data;
+}
+
+SIMD_TARGET
+static const unsigned char* decodeVertexBlockSimd(const unsigned char* data, const unsigned char* data_end, unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, unsigned char last_vertex[256])
+{
+ assert(vertex_count > 0 && vertex_count <= kVertexBlockMaxSize);
+
+ unsigned char buffer[kVertexBlockMaxSize * 4];
+ unsigned char transposed[kVertexBlockSizeBytes];
+
+ size_t vertex_count_aligned = (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1);
+
+ for (size_t k = 0; k < vertex_size; k += 4)
+ {
+ for (size_t j = 0; j < 4; ++j)
+ {
+ data = decodeBytesSimd(data, data_end, buffer + j * vertex_count_aligned, vertex_count_aligned);
+ if (!data)
+ return 0;
+ }
+
+#if defined(SIMD_SSE) || defined(SIMD_AVX)
+#define TEMP __m128i
+#define PREP() __m128i pi = _mm_cvtsi32_si128(*reinterpret_cast<const int*>(last_vertex + k))
+#define LOAD(i) __m128i r##i = _mm_loadu_si128(reinterpret_cast<const __m128i*>(buffer + j + i * vertex_count_aligned))
+#define GRP4(i) t0 = _mm_shuffle_epi32(r##i, 0), t1 = _mm_shuffle_epi32(r##i, 1), t2 = _mm_shuffle_epi32(r##i, 2), t3 = _mm_shuffle_epi32(r##i, 3)
+#define FIXD(i) t##i = pi = _mm_add_epi8(pi, t##i)
+#define SAVE(i) *reinterpret_cast<int*>(savep) = _mm_cvtsi128_si32(t##i), savep += vertex_size
+#endif
+
+#ifdef SIMD_NEON
+#define TEMP uint8x8_t
+#define PREP() uint8x8_t pi = vreinterpret_u8_u32(vld1_lane_u32(reinterpret_cast<uint32_t*>(last_vertex + k), vdup_n_u32(0), 0))
+#define LOAD(i) uint8x16_t r##i = vld1q_u8(buffer + j + i * vertex_count_aligned)
+#define GRP4(i) t0 = vget_low_u8(r##i), t1 = vreinterpret_u8_u32(vdup_lane_u32(vreinterpret_u32_u8(t0), 1)), t2 = vget_high_u8(r##i), t3 = vreinterpret_u8_u32(vdup_lane_u32(vreinterpret_u32_u8(t2), 1))
+#define FIXD(i) t##i = pi = vadd_u8(pi, t##i)
+#define SAVE(i) vst1_lane_u32(reinterpret_cast<uint32_t*>(savep), vreinterpret_u32_u8(t##i), 0), savep += vertex_size
+#endif
+
+#ifdef SIMD_WASM
+#define TEMP v128_t
+#define PREP() v128_t pi = wasm_v128_load(last_vertex + k)
+#define LOAD(i) v128_t r##i = wasm_v128_load(buffer + j + i * vertex_count_aligned)
+#define GRP4(i) t0 = wasmx_splat_v32x4(r##i, 0), t1 = wasmx_splat_v32x4(r##i, 1), t2 = wasmx_splat_v32x4(r##i, 2), t3 = wasmx_splat_v32x4(r##i, 3)
+#define FIXD(i) t##i = pi = wasm_i8x16_add(pi, t##i)
+#define SAVE(i) *reinterpret_cast<int*>(savep) = wasm_i32x4_extract_lane(t##i, 0), savep += vertex_size
+#endif
+
+ PREP();
+
+ unsigned char* savep = transposed + k;
+
+ for (size_t j = 0; j < vertex_count_aligned; j += 16)
+ {
+ LOAD(0);
+ LOAD(1);
+ LOAD(2);
+ LOAD(3);
+
+ r0 = unzigzag8(r0);
+ r1 = unzigzag8(r1);
+ r2 = unzigzag8(r2);
+ r3 = unzigzag8(r3);
+
+ transpose8(r0, r1, r2, r3);
+
+ TEMP t0, t1, t2, t3;
+
+ GRP4(0);
+ FIXD(0), FIXD(1), FIXD(2), FIXD(3);
+ SAVE(0), SAVE(1), SAVE(2), SAVE(3);
+
+ GRP4(1);
+ FIXD(0), FIXD(1), FIXD(2), FIXD(3);
+ SAVE(0), SAVE(1), SAVE(2), SAVE(3);
+
+ GRP4(2);
+ FIXD(0), FIXD(1), FIXD(2), FIXD(3);
+ SAVE(0), SAVE(1), SAVE(2), SAVE(3);
+
+ GRP4(3);
+ FIXD(0), FIXD(1), FIXD(2), FIXD(3);
+ SAVE(0), SAVE(1), SAVE(2), SAVE(3);
+
+#undef TEMP
+#undef PREP
+#undef LOAD
+#undef GRP4
+#undef FIXD
+#undef SAVE
+ }
+ }
+
+ memcpy(vertex_data, transposed, vertex_count * vertex_size);
+
+ memcpy(last_vertex, &transposed[vertex_size * (vertex_count - 1)], vertex_size);
+
+ return data;
+}
+#endif
+
+#if defined(SIMD_SSE) && defined(SIMD_FALLBACK)
+static unsigned int getCpuFeatures()
+{
+ int cpuinfo[4] = {};
+#ifdef _MSC_VER
+ __cpuid(cpuinfo, 1);
+#else
+ __cpuid(1, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]);
+#endif
+ return cpuinfo[2];
+}
+
+unsigned int cpuid = getCpuFeatures();
+#endif
+
+} // namespace meshopt
+
+size_t meshopt_encodeVertexBuffer(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size)
+{
+ using namespace meshopt;
+
+ assert(vertex_size > 0 && vertex_size <= 256);
+ assert(vertex_size % 4 == 0);
+
+#if TRACE
+ memset(vertexstats, 0, sizeof(vertexstats));
+#endif
+
+ const unsigned char* vertex_data = static_cast<const unsigned char*>(vertices);
+
+ unsigned char* data = buffer;
+ unsigned char* data_end = buffer + buffer_size;
+
+ if (size_t(data_end - data) < 1 + vertex_size)
+ return 0;
+
+ int version = gEncodeVertexVersion;
+
+ *data++ = (unsigned char)(kVertexHeader | version);
+
+ unsigned char first_vertex[256] = {};
+ if (vertex_count > 0)
+ memcpy(first_vertex, vertex_data, vertex_size);
+
+ unsigned char last_vertex[256] = {};
+ memcpy(last_vertex, first_vertex, vertex_size);
+
+ size_t vertex_block_size = getVertexBlockSize(vertex_size);
+
+ size_t vertex_offset = 0;
+
+ while (vertex_offset < vertex_count)
+ {
+ size_t block_size = (vertex_offset + vertex_block_size < vertex_count) ? vertex_block_size : vertex_count - vertex_offset;
+
+ data = encodeVertexBlock(data, data_end, vertex_data + vertex_offset * vertex_size, block_size, vertex_size, last_vertex);
+ if (!data)
+ return 0;
+
+ vertex_offset += block_size;
+ }
+
+ size_t tail_size = vertex_size < kTailMaxSize ? kTailMaxSize : vertex_size;
+
+ if (size_t(data_end - data) < tail_size)
+ return 0;
+
+ // write first vertex to the end of the stream and pad it to 32 bytes; this is important to simplify bounds checks in decoder
+ if (vertex_size < kTailMaxSize)
+ {
+ memset(data, 0, kTailMaxSize - vertex_size);
+ data += kTailMaxSize - vertex_size;
+ }
+
+ memcpy(data, first_vertex, vertex_size);
+ data += vertex_size;
+
+ assert(data >= buffer + tail_size);
+ assert(data <= buffer + buffer_size);
+
+#if TRACE
+ size_t total_size = data - buffer;
+
+ for (size_t k = 0; k < vertex_size; ++k)
+ {
+ const Stats& vsk = vertexstats[k];
+
+ printf("%2d: %d bytes\t%.1f%%\t%.1f bpv", int(k), int(vsk.size), double(vsk.size) / double(total_size) * 100, double(vsk.size) / double(vertex_count) * 8);
+
+#if TRACE > 1
+ printf("\t\thdr %d bytes\tbit0 %d (%d bytes)\tbit1 %d (%d bytes)\tbit2 %d (%d bytes)\tbit3 %d (%d bytes)",
+ int(vsk.header),
+ int(vsk.bitg[0]), int(vsk.bitb[0]),
+ int(vsk.bitg[1]), int(vsk.bitb[1]),
+ int(vsk.bitg[2]), int(vsk.bitb[2]),
+ int(vsk.bitg[3]), int(vsk.bitb[3]));
+#endif
+
+ printf("\n");
+ }
+#endif
+
+ return data - buffer;
+}
+
+size_t meshopt_encodeVertexBufferBound(size_t vertex_count, size_t vertex_size)
+{
+ using namespace meshopt;
+
+ assert(vertex_size > 0 && vertex_size <= 256);
+ assert(vertex_size % 4 == 0);
+
+ size_t vertex_block_size = getVertexBlockSize(vertex_size);
+ size_t vertex_block_count = (vertex_count + vertex_block_size - 1) / vertex_block_size;
+
+ size_t vertex_block_header_size = (vertex_block_size / kByteGroupSize + 3) / 4;
+ size_t vertex_block_data_size = vertex_block_size;
+
+ size_t tail_size = vertex_size < kTailMaxSize ? kTailMaxSize : vertex_size;
+
+ return 1 + vertex_block_count * vertex_size * (vertex_block_header_size + vertex_block_data_size) + tail_size;
+}
+
+void meshopt_encodeVertexVersion(int version)
+{
+ assert(unsigned(version) <= 0);
+
+ meshopt::gEncodeVertexVersion = version;
+}
+
+int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t vertex_size, const unsigned char* buffer, size_t buffer_size)
+{
+ using namespace meshopt;
+
+ assert(vertex_size > 0 && vertex_size <= 256);
+ assert(vertex_size % 4 == 0);
+
+ const unsigned char* (*decode)(const unsigned char*, const unsigned char*, unsigned char*, size_t, size_t, unsigned char[256]) = 0;
+
+#if defined(SIMD_SSE) && defined(SIMD_FALLBACK)
+ decode = (cpuid & (1 << 9)) ? decodeVertexBlockSimd : decodeVertexBlock;
+#elif defined(SIMD_SSE) || defined(SIMD_AVX) || defined(SIMD_NEON) || defined(SIMD_WASM)
+ decode = decodeVertexBlockSimd;
+#else
+ decode = decodeVertexBlock;
+#endif
+
+#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM)
+ assert(gDecodeBytesGroupInitialized);
+ (void)gDecodeBytesGroupInitialized;
+#endif
+
+ unsigned char* vertex_data = static_cast<unsigned char*>(destination);
+
+ const unsigned char* data = buffer;
+ const unsigned char* data_end = buffer + buffer_size;
+
+ if (size_t(data_end - data) < 1 + vertex_size)
+ return -2;
+
+ unsigned char data_header = *data++;
+
+ if ((data_header & 0xf0) != kVertexHeader)
+ return -1;
+
+ int version = data_header & 0x0f;
+ if (version > 0)
+ return -1;
+
+ unsigned char last_vertex[256];
+ memcpy(last_vertex, data_end - vertex_size, vertex_size);
+
+ size_t vertex_block_size = getVertexBlockSize(vertex_size);
+
+ size_t vertex_offset = 0;
+
+ while (vertex_offset < vertex_count)
+ {
+ size_t block_size = (vertex_offset + vertex_block_size < vertex_count) ? vertex_block_size : vertex_count - vertex_offset;
+
+ data = decode(data, data_end, vertex_data + vertex_offset * vertex_size, block_size, vertex_size, last_vertex);
+ if (!data)
+ return -2;
+
+ vertex_offset += block_size;
+ }
+
+ size_t tail_size = vertex_size < kTailMaxSize ? kTailMaxSize : vertex_size;
+
+ if (size_t(data_end - data) != tail_size)
+ return -3;
+
+ return 0;
+}
+
+#undef SIMD_NEON
+#undef SIMD_SSE
+#undef SIMD_AVX
+#undef SIMD_WASM
+#undef SIMD_FALLBACK
+#undef SIMD_TARGET
diff --git a/thirdparty/meshoptimizer/vertexfilter.cpp b/thirdparty/meshoptimizer/vertexfilter.cpp
new file mode 100644
index 0000000000..e7ad2c9d39
--- /dev/null
+++ b/thirdparty/meshoptimizer/vertexfilter.cpp
@@ -0,0 +1,825 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+#include <math.h>
+
+// The block below auto-detects SIMD ISA that can be used on the target platform
+#ifndef MESHOPTIMIZER_NO_SIMD
+
+// The SIMD implementation requires SSE2, which can be enabled unconditionally through compiler settings
+#if defined(__SSE2__)
+#define SIMD_SSE
+#endif
+
+// MSVC supports compiling SSE2 code regardless of compile options; we assume all 32-bit CPUs support SSE2
+#if !defined(SIMD_SSE) && defined(_MSC_VER) && !defined(__clang__) && (defined(_M_IX86) || defined(_M_X64))
+#define SIMD_SSE
+#endif
+
+// GCC/clang define these when NEON support is available
+#if defined(__ARM_NEON__) || defined(__ARM_NEON)
+#define SIMD_NEON
+#endif
+
+// On MSVC, we assume that ARM builds always target NEON-capable devices
+#if !defined(SIMD_NEON) && defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
+#define SIMD_NEON
+#endif
+
+// When targeting Wasm SIMD we can't use runtime cpuid checks so we unconditionally enable SIMD
+#if defined(__wasm_simd128__)
+#define SIMD_WASM
+#endif
+
+#endif // !MESHOPTIMIZER_NO_SIMD
+
+#ifdef SIMD_SSE
+#include <emmintrin.h>
+#include <stdint.h>
+#endif
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+#ifdef SIMD_NEON
+#if defined(_MSC_VER) && defined(_M_ARM64)
+#include <arm64_neon.h>
+#else
+#include <arm_neon.h>
+#endif
+#endif
+
+#ifdef SIMD_WASM
+#include <wasm_simd128.h>
+#endif
+
+#ifdef SIMD_WASM
+#define wasmx_unpacklo_v16x8(a, b) wasm_v16x8_shuffle(a, b, 0, 8, 1, 9, 2, 10, 3, 11)
+#define wasmx_unpackhi_v16x8(a, b) wasm_v16x8_shuffle(a, b, 4, 12, 5, 13, 6, 14, 7, 15)
+#define wasmx_unziplo_v32x4(a, b) wasm_v32x4_shuffle(a, b, 0, 2, 4, 6)
+#define wasmx_unziphi_v32x4(a, b) wasm_v32x4_shuffle(a, b, 1, 3, 5, 7)
+#endif
+
+namespace meshopt
+{
+
+#if !defined(SIMD_SSE) && !defined(SIMD_NEON) && !defined(SIMD_WASM)
+template <typename T>
+static void decodeFilterOct(T* data, size_t count)
+{
+ const float max = float((1 << (sizeof(T) * 8 - 1)) - 1);
+
+ for (size_t i = 0; i < count; ++i)
+ {
+ // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count
+ float x = float(data[i * 4 + 0]);
+ float y = float(data[i * 4 + 1]);
+ float z = float(data[i * 4 + 2]) - fabsf(x) - fabsf(y);
+
+ // fixup octahedral coordinates for z<0
+ float t = (z >= 0.f) ? 0.f : z;
+
+ x += (x >= 0.f) ? t : -t;
+ y += (y >= 0.f) ? t : -t;
+
+ // compute normal length & scale
+ float l = sqrtf(x * x + y * y + z * z);
+ float s = max / l;
+
+ // rounded signed float->int
+ int xf = int(x * s + (x >= 0.f ? 0.5f : -0.5f));
+ int yf = int(y * s + (y >= 0.f ? 0.5f : -0.5f));
+ int zf = int(z * s + (z >= 0.f ? 0.5f : -0.5f));
+
+ data[i * 4 + 0] = T(xf);
+ data[i * 4 + 1] = T(yf);
+ data[i * 4 + 2] = T(zf);
+ }
+}
+
+static void decodeFilterQuat(short* data, size_t count)
+{
+ const float scale = 1.f / sqrtf(2.f);
+
+ for (size_t i = 0; i < count; ++i)
+ {
+ // recover scale from the high byte of the component
+ int sf = data[i * 4 + 3] | 3;
+ float ss = scale / float(sf);
+
+ // convert x/y/z to [-1..1] (scaled...)
+ float x = float(data[i * 4 + 0]) * ss;
+ float y = float(data[i * 4 + 1]) * ss;
+ float z = float(data[i * 4 + 2]) * ss;
+
+ // reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors
+ float ww = 1.f - x * x - y * y - z * z;
+ float w = sqrtf(ww >= 0.f ? ww : 0.f);
+
+ // rounded signed float->int
+ int xf = int(x * 32767.f + (x >= 0.f ? 0.5f : -0.5f));
+ int yf = int(y * 32767.f + (y >= 0.f ? 0.5f : -0.5f));
+ int zf = int(z * 32767.f + (z >= 0.f ? 0.5f : -0.5f));
+ int wf = int(w * 32767.f + 0.5f);
+
+ int qc = data[i * 4 + 3] & 3;
+
+ // output order is dictated by input index
+ data[i * 4 + ((qc + 1) & 3)] = short(xf);
+ data[i * 4 + ((qc + 2) & 3)] = short(yf);
+ data[i * 4 + ((qc + 3) & 3)] = short(zf);
+ data[i * 4 + ((qc + 0) & 3)] = short(wf);
+ }
+}
+
+static void decodeFilterExp(unsigned int* data, size_t count)
+{
+ for (size_t i = 0; i < count; ++i)
+ {
+ unsigned int v = data[i];
+
+ // decode mantissa and exponent
+ int m = int(v << 8) >> 8;
+ int e = int(v) >> 24;
+
+ union
+ {
+ float f;
+ unsigned int ui;
+ } u;
+
+ // optimized version of ldexp(float(m), e)
+ u.ui = unsigned(e + 127) << 23;
+ u.f = u.f * float(m);
+
+ data[i] = u.ui;
+ }
+}
+#endif
+
+#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM)
+inline uint64_t rotateleft64(uint64_t v, int x)
+{
+#if defined(_MSC_VER) && !defined(__clang__)
+ return _rotl64(v, x);
+// Apple's Clang 8 is actually vanilla Clang 3.9, there we need to look for
+// version 11 instead: https://en.wikipedia.org/wiki/Xcode#Toolchain_versions
+#elif defined(__clang__) && ((!defined(__apple_build_version__) && __clang_major__ >= 8) || __clang_major__ >= 11)
+ return __builtin_rotateleft64(v, x);
+#else
+ return (v << (x & 63)) | (v >> ((64 - x) & 63));
+#endif
+}
+#endif
+
+#ifdef SIMD_SSE
+static void decodeFilterOctSimd(signed char* data, size_t count)
+{
+ const __m128 sign = _mm_set1_ps(-0.f);
+
+ for (size_t i = 0; i < count; i += 4)
+ {
+ __m128i n4 = _mm_loadu_si128(reinterpret_cast<__m128i*>(&data[i * 4]));
+
+ // sign-extends each of x,y in [x y ? ?] with arithmetic shifts
+ __m128i xf = _mm_srai_epi32(_mm_slli_epi32(n4, 24), 24);
+ __m128i yf = _mm_srai_epi32(_mm_slli_epi32(n4, 16), 24);
+
+ // unpack z; note that z is unsigned so we technically don't need to sign extend it
+ __m128i zf = _mm_srai_epi32(_mm_slli_epi32(n4, 8), 24);
+
+ // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count
+ __m128 x = _mm_cvtepi32_ps(xf);
+ __m128 y = _mm_cvtepi32_ps(yf);
+ __m128 z = _mm_sub_ps(_mm_cvtepi32_ps(zf), _mm_add_ps(_mm_andnot_ps(sign, x), _mm_andnot_ps(sign, y)));
+
+ // fixup octahedral coordinates for z<0
+ __m128 t = _mm_min_ps(z, _mm_setzero_ps());
+
+ x = _mm_add_ps(x, _mm_xor_ps(t, _mm_and_ps(x, sign)));
+ y = _mm_add_ps(y, _mm_xor_ps(t, _mm_and_ps(y, sign)));
+
+ // compute normal length & scale
+ __m128 ll = _mm_add_ps(_mm_mul_ps(x, x), _mm_add_ps(_mm_mul_ps(y, y), _mm_mul_ps(z, z)));
+ __m128 s = _mm_mul_ps(_mm_set1_ps(127.f), _mm_rsqrt_ps(ll));
+
+ // rounded signed float->int
+ __m128i xr = _mm_cvtps_epi32(_mm_mul_ps(x, s));
+ __m128i yr = _mm_cvtps_epi32(_mm_mul_ps(y, s));
+ __m128i zr = _mm_cvtps_epi32(_mm_mul_ps(z, s));
+
+ // combine xr/yr/zr into final value
+ __m128i res = _mm_and_si128(n4, _mm_set1_epi32(0xff000000));
+ res = _mm_or_si128(res, _mm_and_si128(xr, _mm_set1_epi32(0xff)));
+ res = _mm_or_si128(res, _mm_slli_epi32(_mm_and_si128(yr, _mm_set1_epi32(0xff)), 8));
+ res = _mm_or_si128(res, _mm_slli_epi32(_mm_and_si128(zr, _mm_set1_epi32(0xff)), 16));
+
+ _mm_storeu_si128(reinterpret_cast<__m128i*>(&data[i * 4]), res);
+ }
+}
+
+static void decodeFilterOctSimd(short* data, size_t count)
+{
+ const __m128 sign = _mm_set1_ps(-0.f);
+
+ for (size_t i = 0; i < count; i += 4)
+ {
+ __m128 n4_0 = _mm_loadu_ps(reinterpret_cast<float*>(&data[(i + 0) * 4]));
+ __m128 n4_1 = _mm_loadu_ps(reinterpret_cast<float*>(&data[(i + 2) * 4]));
+
+ // gather both x/y 16-bit pairs in each 32-bit lane
+ __m128i n4 = _mm_castps_si128(_mm_shuffle_ps(n4_0, n4_1, _MM_SHUFFLE(2, 0, 2, 0)));
+
+ // sign-extends each of x,y in [x y] with arithmetic shifts
+ __m128i xf = _mm_srai_epi32(_mm_slli_epi32(n4, 16), 16);
+ __m128i yf = _mm_srai_epi32(n4, 16);
+
+ // unpack z; note that z is unsigned so we don't need to sign extend it
+ __m128i z4 = _mm_castps_si128(_mm_shuffle_ps(n4_0, n4_1, _MM_SHUFFLE(3, 1, 3, 1)));
+ __m128i zf = _mm_and_si128(z4, _mm_set1_epi32(0x7fff));
+
+ // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count
+ __m128 x = _mm_cvtepi32_ps(xf);
+ __m128 y = _mm_cvtepi32_ps(yf);
+ __m128 z = _mm_sub_ps(_mm_cvtepi32_ps(zf), _mm_add_ps(_mm_andnot_ps(sign, x), _mm_andnot_ps(sign, y)));
+
+ // fixup octahedral coordinates for z<0
+ __m128 t = _mm_min_ps(z, _mm_setzero_ps());
+
+ x = _mm_add_ps(x, _mm_xor_ps(t, _mm_and_ps(x, sign)));
+ y = _mm_add_ps(y, _mm_xor_ps(t, _mm_and_ps(y, sign)));
+
+ // compute normal length & scale
+ __m128 ll = _mm_add_ps(_mm_mul_ps(x, x), _mm_add_ps(_mm_mul_ps(y, y), _mm_mul_ps(z, z)));
+ __m128 s = _mm_div_ps(_mm_set1_ps(32767.f), _mm_sqrt_ps(ll));
+
+ // rounded signed float->int
+ __m128i xr = _mm_cvtps_epi32(_mm_mul_ps(x, s));
+ __m128i yr = _mm_cvtps_epi32(_mm_mul_ps(y, s));
+ __m128i zr = _mm_cvtps_epi32(_mm_mul_ps(z, s));
+
+ // mix x/z and y/0 to make 16-bit unpack easier
+ __m128i xzr = _mm_or_si128(_mm_and_si128(xr, _mm_set1_epi32(0xffff)), _mm_slli_epi32(zr, 16));
+ __m128i y0r = _mm_and_si128(yr, _mm_set1_epi32(0xffff));
+
+ // pack x/y/z using 16-bit unpacks; note that this has 0 where we should have .w
+ __m128i res_0 = _mm_unpacklo_epi16(xzr, y0r);
+ __m128i res_1 = _mm_unpackhi_epi16(xzr, y0r);
+
+ // patch in .w
+ res_0 = _mm_or_si128(res_0, _mm_and_si128(_mm_castps_si128(n4_0), _mm_set1_epi64x(0xffff000000000000)));
+ res_1 = _mm_or_si128(res_1, _mm_and_si128(_mm_castps_si128(n4_1), _mm_set1_epi64x(0xffff000000000000)));
+
+ _mm_storeu_si128(reinterpret_cast<__m128i*>(&data[(i + 0) * 4]), res_0);
+ _mm_storeu_si128(reinterpret_cast<__m128i*>(&data[(i + 2) * 4]), res_1);
+ }
+}
+
+static void decodeFilterQuatSimd(short* data, size_t count)
+{
+ const float scale = 1.f / sqrtf(2.f);
+
+ for (size_t i = 0; i < count; i += 4)
+ {
+ __m128 q4_0 = _mm_loadu_ps(reinterpret_cast<float*>(&data[(i + 0) * 4]));
+ __m128 q4_1 = _mm_loadu_ps(reinterpret_cast<float*>(&data[(i + 2) * 4]));
+
+ // gather both x/y 16-bit pairs in each 32-bit lane
+ __m128i q4_xy = _mm_castps_si128(_mm_shuffle_ps(q4_0, q4_1, _MM_SHUFFLE(2, 0, 2, 0)));
+ __m128i q4_zc = _mm_castps_si128(_mm_shuffle_ps(q4_0, q4_1, _MM_SHUFFLE(3, 1, 3, 1)));
+
+ // sign-extends each of x,y in [x y] with arithmetic shifts
+ __m128i xf = _mm_srai_epi32(_mm_slli_epi32(q4_xy, 16), 16);
+ __m128i yf = _mm_srai_epi32(q4_xy, 16);
+ __m128i zf = _mm_srai_epi32(_mm_slli_epi32(q4_zc, 16), 16);
+ __m128i cf = _mm_srai_epi32(q4_zc, 16);
+
+ // get a floating-point scaler using zc with bottom 2 bits set to 1 (which represents 1.f)
+ __m128i sf = _mm_or_si128(cf, _mm_set1_epi32(3));
+ __m128 ss = _mm_div_ps(_mm_set1_ps(scale), _mm_cvtepi32_ps(sf));
+
+ // convert x/y/z to [-1..1] (scaled...)
+ __m128 x = _mm_mul_ps(_mm_cvtepi32_ps(xf), ss);
+ __m128 y = _mm_mul_ps(_mm_cvtepi32_ps(yf), ss);
+ __m128 z = _mm_mul_ps(_mm_cvtepi32_ps(zf), ss);
+
+ // reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors
+ __m128 ww = _mm_sub_ps(_mm_set1_ps(1.f), _mm_add_ps(_mm_mul_ps(x, x), _mm_add_ps(_mm_mul_ps(y, y), _mm_mul_ps(z, z))));
+ __m128 w = _mm_sqrt_ps(_mm_max_ps(ww, _mm_setzero_ps()));
+
+ __m128 s = _mm_set1_ps(32767.f);
+
+ // rounded signed float->int
+ __m128i xr = _mm_cvtps_epi32(_mm_mul_ps(x, s));
+ __m128i yr = _mm_cvtps_epi32(_mm_mul_ps(y, s));
+ __m128i zr = _mm_cvtps_epi32(_mm_mul_ps(z, s));
+ __m128i wr = _mm_cvtps_epi32(_mm_mul_ps(w, s));
+
+ // mix x/z and w/y to make 16-bit unpack easier
+ __m128i xzr = _mm_or_si128(_mm_and_si128(xr, _mm_set1_epi32(0xffff)), _mm_slli_epi32(zr, 16));
+ __m128i wyr = _mm_or_si128(_mm_and_si128(wr, _mm_set1_epi32(0xffff)), _mm_slli_epi32(yr, 16));
+
+ // pack x/y/z/w using 16-bit unpacks; we pack wxyz by default (for qc=0)
+ __m128i res_0 = _mm_unpacklo_epi16(wyr, xzr);
+ __m128i res_1 = _mm_unpackhi_epi16(wyr, xzr);
+
+ // store results to stack so that we can rotate using scalar instructions
+ uint64_t res[4];
+ _mm_storeu_si128(reinterpret_cast<__m128i*>(&res[0]), res_0);
+ _mm_storeu_si128(reinterpret_cast<__m128i*>(&res[2]), res_1);
+
+ // rotate and store
+ uint64_t* out = reinterpret_cast<uint64_t*>(&data[i * 4]);
+
+ out[0] = rotateleft64(res[0], data[(i + 0) * 4 + 3] << 4);
+ out[1] = rotateleft64(res[1], data[(i + 1) * 4 + 3] << 4);
+ out[2] = rotateleft64(res[2], data[(i + 2) * 4 + 3] << 4);
+ out[3] = rotateleft64(res[3], data[(i + 3) * 4 + 3] << 4);
+ }
+}
+
+static void decodeFilterExpSimd(unsigned int* data, size_t count)
+{
+ for (size_t i = 0; i < count; i += 4)
+ {
+ __m128i v = _mm_loadu_si128(reinterpret_cast<__m128i*>(&data[i]));
+
+ // decode exponent into 2^x directly
+ __m128i ef = _mm_srai_epi32(v, 24);
+ __m128i es = _mm_slli_epi32(_mm_add_epi32(ef, _mm_set1_epi32(127)), 23);
+
+ // decode 24-bit mantissa into floating-point value
+ __m128i mf = _mm_srai_epi32(_mm_slli_epi32(v, 8), 8);
+ __m128 m = _mm_cvtepi32_ps(mf);
+
+ __m128 r = _mm_mul_ps(_mm_castsi128_ps(es), m);
+
+ _mm_storeu_ps(reinterpret_cast<float*>(&data[i]), r);
+ }
+}
+#endif
+
+#if defined(SIMD_NEON) && !defined(__aarch64__) && !defined(_M_ARM64)
+inline float32x4_t vsqrtq_f32(float32x4_t x)
+{
+ float32x4_t r = vrsqrteq_f32(x);
+ r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(r, x), r)); // refine rsqrt estimate
+ return vmulq_f32(r, x);
+}
+
+inline float32x4_t vdivq_f32(float32x4_t x, float32x4_t y)
+{
+ float32x4_t r = vrecpeq_f32(y);
+ r = vmulq_f32(r, vrecpsq_f32(y, r)); // refine rcp estimate
+ return vmulq_f32(x, r);
+}
+#endif
+
+#ifdef SIMD_NEON
+static void decodeFilterOctSimd(signed char* data, size_t count)
+{
+ const int32x4_t sign = vdupq_n_s32(0x80000000);
+
+ for (size_t i = 0; i < count; i += 4)
+ {
+ int32x4_t n4 = vld1q_s32(reinterpret_cast<int32_t*>(&data[i * 4]));
+
+ // sign-extends each of x,y in [x y ? ?] with arithmetic shifts
+ int32x4_t xf = vshrq_n_s32(vshlq_n_s32(n4, 24), 24);
+ int32x4_t yf = vshrq_n_s32(vshlq_n_s32(n4, 16), 24);
+
+ // unpack z; note that z is unsigned so we technically don't need to sign extend it
+ int32x4_t zf = vshrq_n_s32(vshlq_n_s32(n4, 8), 24);
+
+ // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count
+ float32x4_t x = vcvtq_f32_s32(xf);
+ float32x4_t y = vcvtq_f32_s32(yf);
+ float32x4_t z = vsubq_f32(vcvtq_f32_s32(zf), vaddq_f32(vabsq_f32(x), vabsq_f32(y)));
+
+ // fixup octahedral coordinates for z<0
+ float32x4_t t = vminq_f32(z, vdupq_n_f32(0.f));
+
+ x = vaddq_f32(x, vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(t), vandq_s32(vreinterpretq_s32_f32(x), sign))));
+ y = vaddq_f32(y, vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(t), vandq_s32(vreinterpretq_s32_f32(y), sign))));
+
+ // compute normal length & scale
+ float32x4_t ll = vaddq_f32(vmulq_f32(x, x), vaddq_f32(vmulq_f32(y, y), vmulq_f32(z, z)));
+ float32x4_t rl = vrsqrteq_f32(ll);
+ float32x4_t s = vmulq_f32(vdupq_n_f32(127.f), rl);
+
+ // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value
+ // note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction
+ const float32x4_t fsnap = vdupq_n_f32(3 << 22);
+
+ int32x4_t xr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(x, s), fsnap));
+ int32x4_t yr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(y, s), fsnap));
+ int32x4_t zr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(z, s), fsnap));
+
+ // combine xr/yr/zr into final value
+ int32x4_t res = vandq_s32(n4, vdupq_n_s32(0xff000000));
+ res = vorrq_s32(res, vandq_s32(xr, vdupq_n_s32(0xff)));
+ res = vorrq_s32(res, vshlq_n_s32(vandq_s32(yr, vdupq_n_s32(0xff)), 8));
+ res = vorrq_s32(res, vshlq_n_s32(vandq_s32(zr, vdupq_n_s32(0xff)), 16));
+
+ vst1q_s32(reinterpret_cast<int32_t*>(&data[i * 4]), res);
+ }
+}
+
+static void decodeFilterOctSimd(short* data, size_t count)
+{
+ const int32x4_t sign = vdupq_n_s32(0x80000000);
+
+ for (size_t i = 0; i < count; i += 4)
+ {
+ int32x4_t n4_0 = vld1q_s32(reinterpret_cast<int32_t*>(&data[(i + 0) * 4]));
+ int32x4_t n4_1 = vld1q_s32(reinterpret_cast<int32_t*>(&data[(i + 2) * 4]));
+
+ // gather both x/y 16-bit pairs in each 32-bit lane
+ int32x4_t n4 = vuzpq_s32(n4_0, n4_1).val[0];
+
+ // sign-extends each of x,y in [x y] with arithmetic shifts
+ int32x4_t xf = vshrq_n_s32(vshlq_n_s32(n4, 16), 16);
+ int32x4_t yf = vshrq_n_s32(n4, 16);
+
+ // unpack z; note that z is unsigned so we don't need to sign extend it
+ int32x4_t z4 = vuzpq_s32(n4_0, n4_1).val[1];
+ int32x4_t zf = vandq_s32(z4, vdupq_n_s32(0x7fff));
+
+ // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count
+ float32x4_t x = vcvtq_f32_s32(xf);
+ float32x4_t y = vcvtq_f32_s32(yf);
+ float32x4_t z = vsubq_f32(vcvtq_f32_s32(zf), vaddq_f32(vabsq_f32(x), vabsq_f32(y)));
+
+ // fixup octahedral coordinates for z<0
+ float32x4_t t = vminq_f32(z, vdupq_n_f32(0.f));
+
+ x = vaddq_f32(x, vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(t), vandq_s32(vreinterpretq_s32_f32(x), sign))));
+ y = vaddq_f32(y, vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(t), vandq_s32(vreinterpretq_s32_f32(y), sign))));
+
+ // compute normal length & scale
+ float32x4_t ll = vaddq_f32(vmulq_f32(x, x), vaddq_f32(vmulq_f32(y, y), vmulq_f32(z, z)));
+ float32x4_t rl = vrsqrteq_f32(ll);
+ rl = vmulq_f32(rl, vrsqrtsq_f32(vmulq_f32(rl, ll), rl)); // refine rsqrt estimate
+ float32x4_t s = vmulq_f32(vdupq_n_f32(32767.f), rl);
+
+ // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value
+ // note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction
+ const float32x4_t fsnap = vdupq_n_f32(3 << 22);
+
+ int32x4_t xr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(x, s), fsnap));
+ int32x4_t yr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(y, s), fsnap));
+ int32x4_t zr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(z, s), fsnap));
+
+ // mix x/z and y/0 to make 16-bit unpack easier
+ int32x4_t xzr = vorrq_s32(vandq_s32(xr, vdupq_n_s32(0xffff)), vshlq_n_s32(zr, 16));
+ int32x4_t y0r = vandq_s32(yr, vdupq_n_s32(0xffff));
+
+ // pack x/y/z using 16-bit unpacks; note that this has 0 where we should have .w
+ int32x4_t res_0 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(xzr), vreinterpretq_s16_s32(y0r)).val[0]);
+ int32x4_t res_1 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(xzr), vreinterpretq_s16_s32(y0r)).val[1]);
+
+ // patch in .w
+ res_0 = vbslq_s32(vreinterpretq_u32_u64(vdupq_n_u64(0xffff000000000000)), n4_0, res_0);
+ res_1 = vbslq_s32(vreinterpretq_u32_u64(vdupq_n_u64(0xffff000000000000)), n4_1, res_1);
+
+ vst1q_s32(reinterpret_cast<int32_t*>(&data[(i + 0) * 4]), res_0);
+ vst1q_s32(reinterpret_cast<int32_t*>(&data[(i + 2) * 4]), res_1);
+ }
+}
+
+static void decodeFilterQuatSimd(short* data, size_t count)
+{
+ const float scale = 1.f / sqrtf(2.f);
+
+ for (size_t i = 0; i < count; i += 4)
+ {
+ int32x4_t q4_0 = vld1q_s32(reinterpret_cast<int32_t*>(&data[(i + 0) * 4]));
+ int32x4_t q4_1 = vld1q_s32(reinterpret_cast<int32_t*>(&data[(i + 2) * 4]));
+
+ // gather both x/y 16-bit pairs in each 32-bit lane
+ int32x4_t q4_xy = vuzpq_s32(q4_0, q4_1).val[0];
+ int32x4_t q4_zc = vuzpq_s32(q4_0, q4_1).val[1];
+
+ // sign-extends each of x,y in [x y] with arithmetic shifts
+ int32x4_t xf = vshrq_n_s32(vshlq_n_s32(q4_xy, 16), 16);
+ int32x4_t yf = vshrq_n_s32(q4_xy, 16);
+ int32x4_t zf = vshrq_n_s32(vshlq_n_s32(q4_zc, 16), 16);
+ int32x4_t cf = vshrq_n_s32(q4_zc, 16);
+
+ // get a floating-point scaler using zc with bottom 2 bits set to 1 (which represents 1.f)
+ int32x4_t sf = vorrq_s32(cf, vdupq_n_s32(3));
+ float32x4_t ss = vdivq_f32(vdupq_n_f32(scale), vcvtq_f32_s32(sf));
+
+ // convert x/y/z to [-1..1] (scaled...)
+ float32x4_t x = vmulq_f32(vcvtq_f32_s32(xf), ss);
+ float32x4_t y = vmulq_f32(vcvtq_f32_s32(yf), ss);
+ float32x4_t z = vmulq_f32(vcvtq_f32_s32(zf), ss);
+
+ // reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors
+ float32x4_t ww = vsubq_f32(vdupq_n_f32(1.f), vaddq_f32(vmulq_f32(x, x), vaddq_f32(vmulq_f32(y, y), vmulq_f32(z, z))));
+ float32x4_t w = vsqrtq_f32(vmaxq_f32(ww, vdupq_n_f32(0.f)));
+
+ float32x4_t s = vdupq_n_f32(32767.f);
+
+ // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value
+ // note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction
+ const float32x4_t fsnap = vdupq_n_f32(3 << 22);
+
+ int32x4_t xr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(x, s), fsnap));
+ int32x4_t yr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(y, s), fsnap));
+ int32x4_t zr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(z, s), fsnap));
+ int32x4_t wr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(w, s), fsnap));
+
+ // mix x/z and w/y to make 16-bit unpack easier
+ int32x4_t xzr = vorrq_s32(vandq_s32(xr, vdupq_n_s32(0xffff)), vshlq_n_s32(zr, 16));
+ int32x4_t wyr = vorrq_s32(vandq_s32(wr, vdupq_n_s32(0xffff)), vshlq_n_s32(yr, 16));
+
+ // pack x/y/z/w using 16-bit unpacks; we pack wxyz by default (for qc=0)
+ int32x4_t res_0 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(wyr), vreinterpretq_s16_s32(xzr)).val[0]);
+ int32x4_t res_1 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(wyr), vreinterpretq_s16_s32(xzr)).val[1]);
+
+ // rotate and store
+ uint64_t* out = (uint64_t*)&data[i * 4];
+
+ out[0] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_0), 0), vgetq_lane_s32(cf, 0) << 4);
+ out[1] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_0), 1), vgetq_lane_s32(cf, 1) << 4);
+ out[2] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_1), 0), vgetq_lane_s32(cf, 2) << 4);
+ out[3] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_1), 1), vgetq_lane_s32(cf, 3) << 4);
+ }
+}
+
+static void decodeFilterExpSimd(unsigned int* data, size_t count)
+{
+ for (size_t i = 0; i < count; i += 4)
+ {
+ int32x4_t v = vld1q_s32(reinterpret_cast<int32_t*>(&data[i]));
+
+ // decode exponent into 2^x directly
+ int32x4_t ef = vshrq_n_s32(v, 24);
+ int32x4_t es = vshlq_n_s32(vaddq_s32(ef, vdupq_n_s32(127)), 23);
+
+ // decode 24-bit mantissa into floating-point value
+ int32x4_t mf = vshrq_n_s32(vshlq_n_s32(v, 8), 8);
+ float32x4_t m = vcvtq_f32_s32(mf);
+
+ float32x4_t r = vmulq_f32(vreinterpretq_f32_s32(es), m);
+
+ vst1q_f32(reinterpret_cast<float*>(&data[i]), r);
+ }
+}
+#endif
+
+#ifdef SIMD_WASM
+static void decodeFilterOctSimd(signed char* data, size_t count)
+{
+ const v128_t sign = wasm_f32x4_splat(-0.f);
+
+ for (size_t i = 0; i < count; i += 4)
+ {
+ v128_t n4 = wasm_v128_load(&data[i * 4]);
+
+ // sign-extends each of x,y in [x y ? ?] with arithmetic shifts
+ v128_t xf = wasm_i32x4_shr(wasm_i32x4_shl(n4, 24), 24);
+ v128_t yf = wasm_i32x4_shr(wasm_i32x4_shl(n4, 16), 24);
+
+ // unpack z; note that z is unsigned so we technically don't need to sign extend it
+ v128_t zf = wasm_i32x4_shr(wasm_i32x4_shl(n4, 8), 24);
+
+ // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count
+ v128_t x = wasm_f32x4_convert_i32x4(xf);
+ v128_t y = wasm_f32x4_convert_i32x4(yf);
+ v128_t z = wasm_f32x4_sub(wasm_f32x4_convert_i32x4(zf), wasm_f32x4_add(wasm_f32x4_abs(x), wasm_f32x4_abs(y)));
+
+ // fixup octahedral coordinates for z<0
+ // note: i32x4_min with 0 is equvalent to f32x4_min
+ v128_t t = wasm_i32x4_min(z, wasm_i32x4_splat(0));
+
+ x = wasm_f32x4_add(x, wasm_v128_xor(t, wasm_v128_and(x, sign)));
+ y = wasm_f32x4_add(y, wasm_v128_xor(t, wasm_v128_and(y, sign)));
+
+ // compute normal length & scale
+ v128_t ll = wasm_f32x4_add(wasm_f32x4_mul(x, x), wasm_f32x4_add(wasm_f32x4_mul(y, y), wasm_f32x4_mul(z, z)));
+ v128_t s = wasm_f32x4_div(wasm_f32x4_splat(127.f), wasm_f32x4_sqrt(ll));
+
+ // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value
+ // note: the result is offset by 0x4B40_0000, but we only need the low 8 bits so we can omit the subtraction
+ const v128_t fsnap = wasm_f32x4_splat(3 << 22);
+
+ v128_t xr = wasm_f32x4_add(wasm_f32x4_mul(x, s), fsnap);
+ v128_t yr = wasm_f32x4_add(wasm_f32x4_mul(y, s), fsnap);
+ v128_t zr = wasm_f32x4_add(wasm_f32x4_mul(z, s), fsnap);
+
+ // combine xr/yr/zr into final value
+ v128_t res = wasm_v128_and(n4, wasm_i32x4_splat(0xff000000));
+ res = wasm_v128_or(res, wasm_v128_and(xr, wasm_i32x4_splat(0xff)));
+ res = wasm_v128_or(res, wasm_i32x4_shl(wasm_v128_and(yr, wasm_i32x4_splat(0xff)), 8));
+ res = wasm_v128_or(res, wasm_i32x4_shl(wasm_v128_and(zr, wasm_i32x4_splat(0xff)), 16));
+
+ wasm_v128_store(&data[i * 4], res);
+ }
+}
+
+static void decodeFilterOctSimd(short* data, size_t count)
+{
+ const v128_t sign = wasm_f32x4_splat(-0.f);
+ const v128_t zmask = wasm_i32x4_splat(0x7fff);
+
+ for (size_t i = 0; i < count; i += 4)
+ {
+ v128_t n4_0 = wasm_v128_load(&data[(i + 0) * 4]);
+ v128_t n4_1 = wasm_v128_load(&data[(i + 2) * 4]);
+
+ // gather both x/y 16-bit pairs in each 32-bit lane
+ v128_t n4 = wasmx_unziplo_v32x4(n4_0, n4_1);
+
+ // sign-extends each of x,y in [x y] with arithmetic shifts
+ v128_t xf = wasm_i32x4_shr(wasm_i32x4_shl(n4, 16), 16);
+ v128_t yf = wasm_i32x4_shr(n4, 16);
+
+ // unpack z; note that z is unsigned so we don't need to sign extend it
+ v128_t z4 = wasmx_unziphi_v32x4(n4_0, n4_1);
+ v128_t zf = wasm_v128_and(z4, zmask);
+
+ // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count
+ v128_t x = wasm_f32x4_convert_i32x4(xf);
+ v128_t y = wasm_f32x4_convert_i32x4(yf);
+ v128_t z = wasm_f32x4_sub(wasm_f32x4_convert_i32x4(zf), wasm_f32x4_add(wasm_f32x4_abs(x), wasm_f32x4_abs(y)));
+
+ // fixup octahedral coordinates for z<0
+ // note: i32x4_min with 0 is equvalent to f32x4_min
+ v128_t t = wasm_i32x4_min(z, wasm_i32x4_splat(0));
+
+ x = wasm_f32x4_add(x, wasm_v128_xor(t, wasm_v128_and(x, sign)));
+ y = wasm_f32x4_add(y, wasm_v128_xor(t, wasm_v128_and(y, sign)));
+
+ // compute normal length & scale
+ v128_t ll = wasm_f32x4_add(wasm_f32x4_mul(x, x), wasm_f32x4_add(wasm_f32x4_mul(y, y), wasm_f32x4_mul(z, z)));
+ v128_t s = wasm_f32x4_div(wasm_f32x4_splat(32767.f), wasm_f32x4_sqrt(ll));
+
+ // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value
+ // note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction
+ const v128_t fsnap = wasm_f32x4_splat(3 << 22);
+
+ v128_t xr = wasm_f32x4_add(wasm_f32x4_mul(x, s), fsnap);
+ v128_t yr = wasm_f32x4_add(wasm_f32x4_mul(y, s), fsnap);
+ v128_t zr = wasm_f32x4_add(wasm_f32x4_mul(z, s), fsnap);
+
+ // mix x/z and y/0 to make 16-bit unpack easier
+ v128_t xzr = wasm_v128_or(wasm_v128_and(xr, wasm_i32x4_splat(0xffff)), wasm_i32x4_shl(zr, 16));
+ v128_t y0r = wasm_v128_and(yr, wasm_i32x4_splat(0xffff));
+
+ // pack x/y/z using 16-bit unpacks; note that this has 0 where we should have .w
+ v128_t res_0 = wasmx_unpacklo_v16x8(xzr, y0r);
+ v128_t res_1 = wasmx_unpackhi_v16x8(xzr, y0r);
+
+ // patch in .w
+ res_0 = wasm_v128_or(res_0, wasm_v128_and(n4_0, wasm_i64x2_splat(0xffff000000000000)));
+ res_1 = wasm_v128_or(res_1, wasm_v128_and(n4_1, wasm_i64x2_splat(0xffff000000000000)));
+
+ wasm_v128_store(&data[(i + 0) * 4], res_0);
+ wasm_v128_store(&data[(i + 2) * 4], res_1);
+ }
+}
+
+static void decodeFilterQuatSimd(short* data, size_t count)
+{
+ const float scale = 1.f / sqrtf(2.f);
+
+ for (size_t i = 0; i < count; i += 4)
+ {
+ v128_t q4_0 = wasm_v128_load(&data[(i + 0) * 4]);
+ v128_t q4_1 = wasm_v128_load(&data[(i + 2) * 4]);
+
+ // gather both x/y 16-bit pairs in each 32-bit lane
+ v128_t q4_xy = wasmx_unziplo_v32x4(q4_0, q4_1);
+ v128_t q4_zc = wasmx_unziphi_v32x4(q4_0, q4_1);
+
+ // sign-extends each of x,y in [x y] with arithmetic shifts
+ v128_t xf = wasm_i32x4_shr(wasm_i32x4_shl(q4_xy, 16), 16);
+ v128_t yf = wasm_i32x4_shr(q4_xy, 16);
+ v128_t zf = wasm_i32x4_shr(wasm_i32x4_shl(q4_zc, 16), 16);
+ v128_t cf = wasm_i32x4_shr(q4_zc, 16);
+
+ // get a floating-point scaler using zc with bottom 2 bits set to 1 (which represents 1.f)
+ v128_t sf = wasm_v128_or(cf, wasm_i32x4_splat(3));
+ v128_t ss = wasm_f32x4_div(wasm_f32x4_splat(scale), wasm_f32x4_convert_i32x4(sf));
+
+ // convert x/y/z to [-1..1] (scaled...)
+ v128_t x = wasm_f32x4_mul(wasm_f32x4_convert_i32x4(xf), ss);
+ v128_t y = wasm_f32x4_mul(wasm_f32x4_convert_i32x4(yf), ss);
+ v128_t z = wasm_f32x4_mul(wasm_f32x4_convert_i32x4(zf), ss);
+
+ // reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors
+ // note: i32x4_max with 0 is equivalent to f32x4_max
+ v128_t ww = wasm_f32x4_sub(wasm_f32x4_splat(1.f), wasm_f32x4_add(wasm_f32x4_mul(x, x), wasm_f32x4_add(wasm_f32x4_mul(y, y), wasm_f32x4_mul(z, z))));
+ v128_t w = wasm_f32x4_sqrt(wasm_i32x4_max(ww, wasm_i32x4_splat(0)));
+
+ v128_t s = wasm_f32x4_splat(32767.f);
+
+ // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value
+ // note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction
+ const v128_t fsnap = wasm_f32x4_splat(3 << 22);
+
+ v128_t xr = wasm_f32x4_add(wasm_f32x4_mul(x, s), fsnap);
+ v128_t yr = wasm_f32x4_add(wasm_f32x4_mul(y, s), fsnap);
+ v128_t zr = wasm_f32x4_add(wasm_f32x4_mul(z, s), fsnap);
+ v128_t wr = wasm_f32x4_add(wasm_f32x4_mul(w, s), fsnap);
+
+ // mix x/z and w/y to make 16-bit unpack easier
+ v128_t xzr = wasm_v128_or(wasm_v128_and(xr, wasm_i32x4_splat(0xffff)), wasm_i32x4_shl(zr, 16));
+ v128_t wyr = wasm_v128_or(wasm_v128_and(wr, wasm_i32x4_splat(0xffff)), wasm_i32x4_shl(yr, 16));
+
+ // pack x/y/z/w using 16-bit unpacks; we pack wxyz by default (for qc=0)
+ v128_t res_0 = wasmx_unpacklo_v16x8(wyr, xzr);
+ v128_t res_1 = wasmx_unpackhi_v16x8(wyr, xzr);
+
+ // compute component index shifted left by 4 (and moved into i32x4 slot)
+ // TODO: volatile here works around LLVM mis-optimizing code; https://github.com/emscripten-core/emscripten/issues/11449
+ volatile v128_t cm = wasm_i32x4_shl(cf, 4);
+
+ // rotate and store
+ uint64_t* out = reinterpret_cast<uint64_t*>(&data[i * 4]);
+
+ out[0] = rotateleft64(wasm_i64x2_extract_lane(res_0, 0), wasm_i32x4_extract_lane(cm, 0));
+ out[1] = rotateleft64(wasm_i64x2_extract_lane(res_0, 1), wasm_i32x4_extract_lane(cm, 1));
+ out[2] = rotateleft64(wasm_i64x2_extract_lane(res_1, 0), wasm_i32x4_extract_lane(cm, 2));
+ out[3] = rotateleft64(wasm_i64x2_extract_lane(res_1, 1), wasm_i32x4_extract_lane(cm, 3));
+ }
+}
+
+static void decodeFilterExpSimd(unsigned int* data, size_t count)
+{
+ for (size_t i = 0; i < count; i += 4)
+ {
+ v128_t v = wasm_v128_load(&data[i]);
+
+ // decode exponent into 2^x directly
+ v128_t ef = wasm_i32x4_shr(v, 24);
+ v128_t es = wasm_i32x4_shl(wasm_i32x4_add(ef, wasm_i32x4_splat(127)), 23);
+
+ // decode 24-bit mantissa into floating-point value
+ v128_t mf = wasm_i32x4_shr(wasm_i32x4_shl(v, 8), 8);
+ v128_t m = wasm_f32x4_convert_i32x4(mf);
+
+ v128_t r = wasm_f32x4_mul(es, m);
+
+ wasm_v128_store(&data[i], r);
+ }
+}
+#endif
+
+} // namespace meshopt
+
+void meshopt_decodeFilterOct(void* buffer, size_t vertex_count, size_t vertex_size)
+{
+ using namespace meshopt;
+
+ assert(vertex_count % 4 == 0);
+ assert(vertex_size == 4 || vertex_size == 8);
+
+#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM)
+ if (vertex_size == 4)
+ decodeFilterOctSimd(static_cast<signed char*>(buffer), vertex_count);
+ else
+ decodeFilterOctSimd(static_cast<short*>(buffer), vertex_count);
+#else
+ if (vertex_size == 4)
+ decodeFilterOct(static_cast<signed char*>(buffer), vertex_count);
+ else
+ decodeFilterOct(static_cast<short*>(buffer), vertex_count);
+#endif
+}
+
+void meshopt_decodeFilterQuat(void* buffer, size_t vertex_count, size_t vertex_size)
+{
+ using namespace meshopt;
+
+ assert(vertex_count % 4 == 0);
+ assert(vertex_size == 8);
+ (void)vertex_size;
+
+#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM)
+ decodeFilterQuatSimd(static_cast<short*>(buffer), vertex_count);
+#else
+ decodeFilterQuat(static_cast<short*>(buffer), vertex_count);
+#endif
+}
+
+void meshopt_decodeFilterExp(void* buffer, size_t vertex_count, size_t vertex_size)
+{
+ using namespace meshopt;
+
+ assert(vertex_count % 4 == 0);
+ assert(vertex_size % 4 == 0);
+
+#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM)
+ decodeFilterExpSimd(static_cast<unsigned int*>(buffer), vertex_count * (vertex_size / 4));
+#else
+ decodeFilterExp(static_cast<unsigned int*>(buffer), vertex_count * (vertex_size / 4));
+#endif
+}
+
+#undef SIMD_SSE
+#undef SIMD_NEON
+#undef SIMD_WASM
diff --git a/thirdparty/meshoptimizer/vfetchanalyzer.cpp b/thirdparty/meshoptimizer/vfetchanalyzer.cpp
new file mode 100644
index 0000000000..51dca873f8
--- /dev/null
+++ b/thirdparty/meshoptimizer/vfetchanalyzer.cpp
@@ -0,0 +1,58 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+#include <assert.h>
+#include <string.h>
+
+meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size)
+{
+ assert(index_count % 3 == 0);
+ assert(vertex_size > 0 && vertex_size <= 256);
+
+ meshopt_Allocator allocator;
+
+ meshopt_VertexFetchStatistics result = {};
+
+ unsigned char* vertex_visited = allocator.allocate<unsigned char>(vertex_count);
+ memset(vertex_visited, 0, vertex_count);
+
+ const size_t kCacheLine = 64;
+ const size_t kCacheSize = 128 * 1024;
+
+ // simple direct mapped cache; on typical mesh data this is close to 4-way cache, and this model is a gross approximation anyway
+ size_t cache[kCacheSize / kCacheLine] = {};
+
+ for (size_t i = 0; i < index_count; ++i)
+ {
+ unsigned int index = indices[i];
+ assert(index < vertex_count);
+
+ vertex_visited[index] = 1;
+
+ size_t start_address = index * vertex_size;
+ size_t end_address = start_address + vertex_size;
+
+ size_t start_tag = start_address / kCacheLine;
+ size_t end_tag = (end_address + kCacheLine - 1) / kCacheLine;
+
+ assert(start_tag < end_tag);
+
+ for (size_t tag = start_tag; tag < end_tag; ++tag)
+ {
+ size_t line = tag % (sizeof(cache) / sizeof(cache[0]));
+
+ // we store +1 since cache is filled with 0 by default
+ result.bytes_fetched += (cache[line] != tag + 1) * kCacheLine;
+ cache[line] = tag + 1;
+ }
+ }
+
+ size_t unique_vertex_count = 0;
+
+ for (size_t i = 0; i < vertex_count; ++i)
+ unique_vertex_count += vertex_visited[i];
+
+ result.overfetch = unique_vertex_count == 0 ? 0 : float(result.bytes_fetched) / float(unique_vertex_count * vertex_size);
+
+ return result;
+}
diff --git a/thirdparty/meshoptimizer/vfetchoptimizer.cpp b/thirdparty/meshoptimizer/vfetchoptimizer.cpp
new file mode 100644
index 0000000000..465d6df5ca
--- /dev/null
+++ b/thirdparty/meshoptimizer/vfetchoptimizer.cpp
@@ -0,0 +1,74 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+#include <assert.h>
+#include <string.h>
+
+size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count)
+{
+ assert(index_count % 3 == 0);
+
+ memset(destination, -1, vertex_count * sizeof(unsigned int));
+
+ unsigned int next_vertex = 0;
+
+ for (size_t i = 0; i < index_count; ++i)
+ {
+ unsigned int index = indices[i];
+ assert(index < vertex_count);
+
+ if (destination[index] == ~0u)
+ {
+ destination[index] = next_vertex++;
+ }
+ }
+
+ assert(next_vertex <= vertex_count);
+
+ return next_vertex;
+}
+
+size_t meshopt_optimizeVertexFetch(void* destination, unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size)
+{
+ assert(index_count % 3 == 0);
+ assert(vertex_size > 0 && vertex_size <= 256);
+
+ meshopt_Allocator allocator;
+
+ // support in-place optimization
+ if (destination == vertices)
+ {
+ unsigned char* vertices_copy = allocator.allocate<unsigned char>(vertex_count * vertex_size);
+ memcpy(vertices_copy, vertices, vertex_count * vertex_size);
+ vertices = vertices_copy;
+ }
+
+ // build vertex remap table
+ unsigned int* vertex_remap = allocator.allocate<unsigned int>(vertex_count);
+ memset(vertex_remap, -1, vertex_count * sizeof(unsigned int));
+
+ unsigned int next_vertex = 0;
+
+ for (size_t i = 0; i < index_count; ++i)
+ {
+ unsigned int index = indices[i];
+ assert(index < vertex_count);
+
+ unsigned int& remap = vertex_remap[index];
+
+ if (remap == ~0u) // vertex was not added to destination VB
+ {
+ // add vertex
+ memcpy(static_cast<unsigned char*>(destination) + next_vertex * vertex_size, static_cast<const unsigned char*>(vertices) + index * vertex_size, vertex_size);
+
+ remap = next_vertex++;
+ }
+
+ // modify indices in place
+ indices[i] = remap;
+ }
+
+ assert(next_vertex <= vertex_count);
+
+ return next_vertex;
+}
diff --git a/thirdparty/minimp3/LICENSE b/thirdparty/minimp3/LICENSE
new file mode 100644
index 0000000000..2c4afabdb6
--- /dev/null
+++ b/thirdparty/minimp3/LICENSE
@@ -0,0 +1,117 @@
+CC0 1.0 Universal
+
+Statement of Purpose
+
+The laws of most jurisdictions throughout the world automatically confer
+exclusive Copyright and Related Rights (defined below) upon the creator and
+subsequent owner(s) (each and all, an "owner") of an original work of
+authorship and/or a database (each, a "Work").
+
+Certain owners wish to permanently relinquish those rights to a Work for the
+purpose of contributing to a commons of creative, cultural and scientific
+works ("Commons") that the public can reliably and without fear of later
+claims of infringement build upon, modify, incorporate in other works, reuse
+and redistribute as freely as possible in any form whatsoever and for any
+purposes, including without limitation commercial purposes. These owners may
+contribute to the Commons to promote the ideal of a free culture and the
+further production of creative, cultural and scientific works, or to gain
+reputation or greater distribution for their Work in part through the use and
+efforts of others.
+
+For these and/or other purposes and motivations, and without any expectation
+of additional consideration or compensation, the person associating CC0 with a
+Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
+and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
+and publicly distribute the Work under its terms, with knowledge of his or her
+Copyright and Related Rights in the Work and the meaning and intended legal
+effect of CC0 on those rights.
+
+1. Copyright and Related Rights. A Work made available under CC0 may be
+protected by copyright and related or neighboring rights ("Copyright and
+Related Rights"). Copyright and Related Rights include, but are not limited
+to, the following:
+
+ i. the right to reproduce, adapt, distribute, perform, display, communicate,
+ and translate a Work;
+
+ ii. moral rights retained by the original author(s) and/or performer(s);
+
+ iii. publicity and privacy rights pertaining to a person's image or likeness
+ depicted in a Work;
+
+ iv. rights protecting against unfair competition in regards to a Work,
+ subject to the limitations in paragraph 4(a), below;
+
+ v. rights protecting the extraction, dissemination, use and reuse of data in
+ a Work;
+
+ vi. database rights (such as those arising under Directive 96/9/EC of the
+ European Parliament and of the Council of 11 March 1996 on the legal
+ protection of databases, and under any national implementation thereof,
+ including any amended or successor version of such directive); and
+
+ vii. other similar, equivalent or corresponding rights throughout the world
+ based on applicable law or treaty, and any national implementations thereof.
+
+2. Waiver. To the greatest extent permitted by, but not in contravention of,
+applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
+unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
+and Related Rights and associated claims and causes of action, whether now
+known or unknown (including existing as well as future claims and causes of
+action), in the Work (i) in all territories worldwide, (ii) for the maximum
+duration provided by applicable law or treaty (including future time
+extensions), (iii) in any current or future medium and for any number of
+copies, and (iv) for any purpose whatsoever, including without limitation
+commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
+the Waiver for the benefit of each member of the public at large and to the
+detriment of Affirmer's heirs and successors, fully intending that such Waiver
+shall not be subject to revocation, rescission, cancellation, termination, or
+any other legal or equitable action to disrupt the quiet enjoyment of the Work
+by the public as contemplated by Affirmer's express Statement of Purpose.
+
+3. Public License Fallback. Should any part of the Waiver for any reason be
+judged legally invalid or ineffective under applicable law, then the Waiver
+shall be preserved to the maximum extent permitted taking into account
+Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
+is so judged Affirmer hereby grants to each affected person a royalty-free,
+non transferable, non sublicensable, non exclusive, irrevocable and
+unconditional license to exercise Affirmer's Copyright and Related Rights in
+the Work (i) in all territories worldwide, (ii) for the maximum duration
+provided by applicable law or treaty (including future time extensions), (iii)
+in any current or future medium and for any number of copies, and (iv) for any
+purpose whatsoever, including without limitation commercial, advertising or
+promotional purposes (the "License"). The License shall be deemed effective as
+of the date CC0 was applied by Affirmer to the Work. Should any part of the
+License for any reason be judged legally invalid or ineffective under
+applicable law, such partial invalidity or ineffectiveness shall not
+invalidate the remainder of the License, and in such case Affirmer hereby
+affirms that he or she will not (i) exercise any of his or her remaining
+Copyright and Related Rights in the Work or (ii) assert any associated claims
+and causes of action with respect to the Work, in either case contrary to
+Affirmer's express Statement of Purpose.
+
+4. Limitations and Disclaimers.
+
+ a. No trademark or patent rights held by Affirmer are waived, abandoned,
+ surrendered, licensed or otherwise affected by this document.
+
+ b. Affirmer offers the Work as-is and makes no representations or warranties
+ of any kind concerning the Work, express, implied, statutory or otherwise,
+ including without limitation warranties of title, merchantability, fitness
+ for a particular purpose, non infringement, or the absence of latent or
+ other defects, accuracy, or the present or absence of errors, whether or not
+ discoverable, all to the greatest extent permissible under applicable law.
+
+ c. Affirmer disclaims responsibility for clearing rights of other persons
+ that may apply to the Work or any use thereof, including without limitation
+ any person's Copyright and Related Rights in the Work. Further, Affirmer
+ disclaims responsibility for obtaining any necessary consents, permissions
+ or other rights required for any use of the Work.
+
+ d. Affirmer understands and acknowledges that Creative Commons is not a
+ party to this document and has no duty or obligation with respect to this
+ CC0 or use of the Work.
+
+For more information, please see
+<http://creativecommons.org/publicdomain/zero/1.0/>
+
diff --git a/thirdparty/minimp3/minimp3.h b/thirdparty/minimp3/minimp3.h
new file mode 100644
index 0000000000..796cbc1f8e
--- /dev/null
+++ b/thirdparty/minimp3/minimp3.h
@@ -0,0 +1,1855 @@
+#ifndef MINIMP3_H
+#define MINIMP3_H
+/*
+ https://github.com/lieff/minimp3
+ To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide.
+ This software is distributed without any warranty.
+ See <http://creativecommons.org/publicdomain/zero/1.0/>.
+*/
+#include <stdint.h>
+
+#define MINIMP3_MAX_SAMPLES_PER_FRAME (1152*2)
+
+typedef struct
+{
+ int frame_bytes, frame_offset, channels, hz, layer, bitrate_kbps;
+} mp3dec_frame_info_t;
+
+typedef struct
+{
+ float mdct_overlap[2][9*32], qmf_state[15*2*32];
+ int reserv, free_format_bytes;
+ unsigned char header[4], reserv_buf[511];
+} mp3dec_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+void mp3dec_init(mp3dec_t *dec);
+#ifndef MINIMP3_FLOAT_OUTPUT
+typedef int16_t mp3d_sample_t;
+#else /* MINIMP3_FLOAT_OUTPUT */
+typedef float mp3d_sample_t;
+void mp3dec_f32_to_s16(const float *in, int16_t *out, int num_samples);
+#endif /* MINIMP3_FLOAT_OUTPUT */
+int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, mp3d_sample_t *pcm, mp3dec_frame_info_t *info);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* MINIMP3_H */
+#if defined(MINIMP3_IMPLEMENTATION) && !defined(_MINIMP3_IMPLEMENTATION_GUARD)
+#define _MINIMP3_IMPLEMENTATION_GUARD
+
+#include <stdlib.h>
+#include <string.h>
+
+#define MAX_FREE_FORMAT_FRAME_SIZE 2304 /* more than ISO spec's */
+#ifndef MAX_FRAME_SYNC_MATCHES
+#define MAX_FRAME_SYNC_MATCHES 10
+#endif /* MAX_FRAME_SYNC_MATCHES */
+
+#define MAX_L3_FRAME_PAYLOAD_BYTES MAX_FREE_FORMAT_FRAME_SIZE /* MUST be >= 320000/8/32000*1152 = 1440 */
+
+#define MAX_BITRESERVOIR_BYTES 511
+#define SHORT_BLOCK_TYPE 2
+#define STOP_BLOCK_TYPE 3
+#define MODE_MONO 3
+#define MODE_JOINT_STEREO 1
+#define HDR_SIZE 4
+#define HDR_IS_MONO(h) (((h[3]) & 0xC0) == 0xC0)
+#define HDR_IS_MS_STEREO(h) (((h[3]) & 0xE0) == 0x60)
+#define HDR_IS_FREE_FORMAT(h) (((h[2]) & 0xF0) == 0)
+#define HDR_IS_CRC(h) (!((h[1]) & 1))
+#define HDR_TEST_PADDING(h) ((h[2]) & 0x2)
+#define HDR_TEST_MPEG1(h) ((h[1]) & 0x8)
+#define HDR_TEST_NOT_MPEG25(h) ((h[1]) & 0x10)
+#define HDR_TEST_I_STEREO(h) ((h[3]) & 0x10)
+#define HDR_TEST_MS_STEREO(h) ((h[3]) & 0x20)
+#define HDR_GET_STEREO_MODE(h) (((h[3]) >> 6) & 3)
+#define HDR_GET_STEREO_MODE_EXT(h) (((h[3]) >> 4) & 3)
+#define HDR_GET_LAYER(h) (((h[1]) >> 1) & 3)
+#define HDR_GET_BITRATE(h) ((h[2]) >> 4)
+#define HDR_GET_SAMPLE_RATE(h) (((h[2]) >> 2) & 3)
+#define HDR_GET_MY_SAMPLE_RATE(h) (HDR_GET_SAMPLE_RATE(h) + (((h[1] >> 3) & 1) + ((h[1] >> 4) & 1))*3)
+#define HDR_IS_FRAME_576(h) ((h[1] & 14) == 2)
+#define HDR_IS_LAYER_1(h) ((h[1] & 6) == 6)
+
+#define BITS_DEQUANTIZER_OUT -1
+#define MAX_SCF (255 + BITS_DEQUANTIZER_OUT*4 - 210)
+#define MAX_SCFI ((MAX_SCF + 3) & ~3)
+
+#define MINIMP3_MIN(a, b) ((a) > (b) ? (b) : (a))
+#define MINIMP3_MAX(a, b) ((a) < (b) ? (b) : (a))
+
+#if !defined(MINIMP3_NO_SIMD)
+
+#if !defined(MINIMP3_ONLY_SIMD) && (defined(_M_X64) || defined(__x86_64__) || defined(__aarch64__) || defined(_M_ARM64))
+/* x64 always have SSE2, arm64 always have neon, no need for generic code */
+#define MINIMP3_ONLY_SIMD
+#endif /* SIMD checks... */
+
+#if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || ((defined(__i386__) || defined(__x86_64__)) && defined(__SSE2__))
+#if defined(_MSC_VER)
+#include <intrin.h>
+#endif /* defined(_MSC_VER) */
+#include <immintrin.h>
+#define HAVE_SSE 1
+#define HAVE_SIMD 1
+#define VSTORE _mm_storeu_ps
+#define VLD _mm_loadu_ps
+#define VSET _mm_set1_ps
+#define VADD _mm_add_ps
+#define VSUB _mm_sub_ps
+#define VMUL _mm_mul_ps
+#define VMAC(a, x, y) _mm_add_ps(a, _mm_mul_ps(x, y))
+#define VMSB(a, x, y) _mm_sub_ps(a, _mm_mul_ps(x, y))
+#define VMUL_S(x, s) _mm_mul_ps(x, _mm_set1_ps(s))
+#define VREV(x) _mm_shuffle_ps(x, x, _MM_SHUFFLE(0, 1, 2, 3))
+typedef __m128 f4;
+#if defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD)
+#define minimp3_cpuid __cpuid
+#else /* defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD) */
+static __inline__ __attribute__((always_inline)) void minimp3_cpuid(int CPUInfo[], const int InfoType)
+{
+#if defined(__PIC__)
+ __asm__ __volatile__(
+#if defined(__x86_64__)
+ "push %%rbx\n"
+ "cpuid\n"
+ "xchgl %%ebx, %1\n"
+ "pop %%rbx\n"
+#else /* defined(__x86_64__) */
+ "xchgl %%ebx, %1\n"
+ "cpuid\n"
+ "xchgl %%ebx, %1\n"
+#endif /* defined(__x86_64__) */
+ : "=a" (CPUInfo[0]), "=r" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3])
+ : "a" (InfoType));
+#else /* defined(__PIC__) */
+ __asm__ __volatile__(
+ "cpuid"
+ : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3])
+ : "a" (InfoType));
+#endif /* defined(__PIC__)*/
+}
+#endif /* defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD) */
+static int have_simd(void)
+{
+#ifdef MINIMP3_ONLY_SIMD
+ return 1;
+#else /* MINIMP3_ONLY_SIMD */
+ static int g_have_simd;
+ int CPUInfo[4];
+#ifdef MINIMP3_TEST
+ static int g_counter;
+ if (g_counter++ > 100)
+ return 0;
+#endif /* MINIMP3_TEST */
+ if (g_have_simd)
+ goto end;
+ minimp3_cpuid(CPUInfo, 0);
+ g_have_simd = 1;
+ if (CPUInfo[0] > 0)
+ {
+ minimp3_cpuid(CPUInfo, 1);
+ g_have_simd = (CPUInfo[3] & (1 << 26)) + 1; /* SSE2 */
+ }
+end:
+ return g_have_simd - 1;
+#endif /* MINIMP3_ONLY_SIMD */
+}
+#elif defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64)
+#include <arm_neon.h>
+#define HAVE_SSE 0
+#define HAVE_SIMD 1
+#define VSTORE vst1q_f32
+#define VLD vld1q_f32
+#define VSET vmovq_n_f32
+#define VADD vaddq_f32
+#define VSUB vsubq_f32
+#define VMUL vmulq_f32
+#define VMAC(a, x, y) vmlaq_f32(a, x, y)
+#define VMSB(a, x, y) vmlsq_f32(a, x, y)
+#define VMUL_S(x, s) vmulq_f32(x, vmovq_n_f32(s))
+#define VREV(x) vcombine_f32(vget_high_f32(vrev64q_f32(x)), vget_low_f32(vrev64q_f32(x)))
+typedef float32x4_t f4;
+static int have_simd()
+{ /* TODO: detect neon for !MINIMP3_ONLY_SIMD */
+ return 1;
+}
+#else /* SIMD checks... */
+#define HAVE_SSE 0
+#define HAVE_SIMD 0
+#ifdef MINIMP3_ONLY_SIMD
+#error MINIMP3_ONLY_SIMD used, but SSE/NEON not enabled
+#endif /* MINIMP3_ONLY_SIMD */
+#endif /* SIMD checks... */
+#else /* !defined(MINIMP3_NO_SIMD) */
+#define HAVE_SIMD 0
+#endif /* !defined(MINIMP3_NO_SIMD) */
+
+#if defined(__ARM_ARCH) && (__ARM_ARCH >= 6) && !defined(__aarch64__) && !defined(_M_ARM64)
+#define HAVE_ARMV6 1
+static __inline__ __attribute__((always_inline)) int32_t minimp3_clip_int16_arm(int32_t a)
+{
+ int32_t x = 0;
+ __asm__ ("ssat %0, #16, %1" : "=r"(x) : "r"(a));
+ return x;
+}
+#else
+#define HAVE_ARMV6 0
+#endif
+
+typedef struct
+{
+ const uint8_t *buf;
+ int pos, limit;
+} bs_t;
+
+typedef struct
+{
+ float scf[3*64];
+ uint8_t total_bands, stereo_bands, bitalloc[64], scfcod[64];
+} L12_scale_info;
+
+typedef struct
+{
+ uint8_t tab_offset, code_tab_width, band_count;
+} L12_subband_alloc_t;
+
+typedef struct
+{
+ const uint8_t *sfbtab;
+ uint16_t part_23_length, big_values, scalefac_compress;
+ uint8_t global_gain, block_type, mixed_block_flag, n_long_sfb, n_short_sfb;
+ uint8_t table_select[3], region_count[3], subblock_gain[3];
+ uint8_t preflag, scalefac_scale, count1_table, scfsi;
+} L3_gr_info_t;
+
+typedef struct
+{
+ bs_t bs;
+ uint8_t maindata[MAX_BITRESERVOIR_BYTES + MAX_L3_FRAME_PAYLOAD_BYTES];
+ L3_gr_info_t gr_info[4];
+ float grbuf[2][576], scf[40], syn[18 + 15][2*32];
+ uint8_t ist_pos[2][39];
+} mp3dec_scratch_t;
+
+static void bs_init(bs_t *bs, const uint8_t *data, int bytes)
+{
+ bs->buf = data;
+ bs->pos = 0;
+ bs->limit = bytes*8;
+}
+
+static uint32_t get_bits(bs_t *bs, int n)
+{
+ uint32_t next, cache = 0, s = bs->pos & 7;
+ int shl = n + s;
+ const uint8_t *p = bs->buf + (bs->pos >> 3);
+ if ((bs->pos += n) > bs->limit)
+ return 0;
+ next = *p++ & (255 >> s);
+ while ((shl -= 8) > 0)
+ {
+ cache |= next << shl;
+ next = *p++;
+ }
+ return cache | (next >> -shl);
+}
+
+static int hdr_valid(const uint8_t *h)
+{
+ return h[0] == 0xff &&
+ ((h[1] & 0xF0) == 0xf0 || (h[1] & 0xFE) == 0xe2) &&
+ (HDR_GET_LAYER(h) != 0) &&
+ (HDR_GET_BITRATE(h) != 15) &&
+ (HDR_GET_SAMPLE_RATE(h) != 3);
+}
+
+static int hdr_compare(const uint8_t *h1, const uint8_t *h2)
+{
+ return hdr_valid(h2) &&
+ ((h1[1] ^ h2[1]) & 0xFE) == 0 &&
+ ((h1[2] ^ h2[2]) & 0x0C) == 0 &&
+ !(HDR_IS_FREE_FORMAT(h1) ^ HDR_IS_FREE_FORMAT(h2));
+}
+
+static unsigned hdr_bitrate_kbps(const uint8_t *h)
+{
+ static const uint8_t halfrate[2][3][15] = {
+ { { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,16,24,28,32,40,48,56,64,72,80,88,96,112,128 } },
+ { { 0,16,20,24,28,32,40,48,56,64,80,96,112,128,160 }, { 0,16,24,28,32,40,48,56,64,80,96,112,128,160,192 }, { 0,16,32,48,64,80,96,112,128,144,160,176,192,208,224 } },
+ };
+ return 2*halfrate[!!HDR_TEST_MPEG1(h)][HDR_GET_LAYER(h) - 1][HDR_GET_BITRATE(h)];
+}
+
+static unsigned hdr_sample_rate_hz(const uint8_t *h)
+{
+ static const unsigned g_hz[3] = { 44100, 48000, 32000 };
+ return g_hz[HDR_GET_SAMPLE_RATE(h)] >> (int)!HDR_TEST_MPEG1(h) >> (int)!HDR_TEST_NOT_MPEG25(h);
+}
+
+static unsigned hdr_frame_samples(const uint8_t *h)
+{
+ return HDR_IS_LAYER_1(h) ? 384 : (1152 >> (int)HDR_IS_FRAME_576(h));
+}
+
+static int hdr_frame_bytes(const uint8_t *h, int free_format_size)
+{
+ int frame_bytes = hdr_frame_samples(h)*hdr_bitrate_kbps(h)*125/hdr_sample_rate_hz(h);
+ if (HDR_IS_LAYER_1(h))
+ {
+ frame_bytes &= ~3; /* slot align */
+ }
+ return frame_bytes ? frame_bytes : free_format_size;
+}
+
+static int hdr_padding(const uint8_t *h)
+{
+ return HDR_TEST_PADDING(h) ? (HDR_IS_LAYER_1(h) ? 4 : 1) : 0;
+}
+
+#ifndef MINIMP3_ONLY_MP3
+static const L12_subband_alloc_t *L12_subband_alloc_table(const uint8_t *hdr, L12_scale_info *sci)
+{
+ const L12_subband_alloc_t *alloc;
+ int mode = HDR_GET_STEREO_MODE(hdr);
+ int nbands, stereo_bands = (mode == MODE_MONO) ? 0 : (mode == MODE_JOINT_STEREO) ? (HDR_GET_STEREO_MODE_EXT(hdr) << 2) + 4 : 32;
+
+ if (HDR_IS_LAYER_1(hdr))
+ {
+ static const L12_subband_alloc_t g_alloc_L1[] = { { 76, 4, 32 } };
+ alloc = g_alloc_L1;
+ nbands = 32;
+ } else if (!HDR_TEST_MPEG1(hdr))
+ {
+ static const L12_subband_alloc_t g_alloc_L2M2[] = { { 60, 4, 4 }, { 44, 3, 7 }, { 44, 2, 19 } };
+ alloc = g_alloc_L2M2;
+ nbands = 30;
+ } else
+ {
+ static const L12_subband_alloc_t g_alloc_L2M1[] = { { 0, 4, 3 }, { 16, 4, 8 }, { 32, 3, 12 }, { 40, 2, 7 } };
+ int sample_rate_idx = HDR_GET_SAMPLE_RATE(hdr);
+ unsigned kbps = hdr_bitrate_kbps(hdr) >> (int)(mode != MODE_MONO);
+ if (!kbps) /* free-format */
+ {
+ kbps = 192;
+ }
+
+ alloc = g_alloc_L2M1;
+ nbands = 27;
+ if (kbps < 56)
+ {
+ static const L12_subband_alloc_t g_alloc_L2M1_lowrate[] = { { 44, 4, 2 }, { 44, 3, 10 } };
+ alloc = g_alloc_L2M1_lowrate;
+ nbands = sample_rate_idx == 2 ? 12 : 8;
+ } else if (kbps >= 96 && sample_rate_idx != 1)
+ {
+ nbands = 30;
+ }
+ }
+
+ sci->total_bands = (uint8_t)nbands;
+ sci->stereo_bands = (uint8_t)MINIMP3_MIN(stereo_bands, nbands);
+
+ return alloc;
+}
+
+static void L12_read_scalefactors(bs_t *bs, uint8_t *pba, uint8_t *scfcod, int bands, float *scf)
+{
+ static const float g_deq_L12[18*3] = {
+#define DQ(x) 9.53674316e-07f/x, 7.56931807e-07f/x, 6.00777173e-07f/x
+ DQ(3),DQ(7),DQ(15),DQ(31),DQ(63),DQ(127),DQ(255),DQ(511),DQ(1023),DQ(2047),DQ(4095),DQ(8191),DQ(16383),DQ(32767),DQ(65535),DQ(3),DQ(5),DQ(9)
+ };
+ int i, m;
+ for (i = 0; i < bands; i++)
+ {
+ float s = 0;
+ int ba = *pba++;
+ int mask = ba ? 4 + ((19 >> scfcod[i]) & 3) : 0;
+ for (m = 4; m; m >>= 1)
+ {
+ if (mask & m)
+ {
+ int b = get_bits(bs, 6);
+ s = g_deq_L12[ba*3 - 6 + b % 3]*(1 << 21 >> b/3);
+ }
+ *scf++ = s;
+ }
+ }
+}
+
+static void L12_read_scale_info(const uint8_t *hdr, bs_t *bs, L12_scale_info *sci)
+{
+ static const uint8_t g_bitalloc_code_tab[] = {
+ 0,17, 3, 4, 5,6,7, 8,9,10,11,12,13,14,15,16,
+ 0,17,18, 3,19,4,5, 6,7, 8, 9,10,11,12,13,16,
+ 0,17,18, 3,19,4,5,16,
+ 0,17,18,16,
+ 0,17,18,19, 4,5,6, 7,8, 9,10,11,12,13,14,15,
+ 0,17,18, 3,19,4,5, 6,7, 8, 9,10,11,12,13,14,
+ 0, 2, 3, 4, 5,6,7, 8,9,10,11,12,13,14,15,16
+ };
+ const L12_subband_alloc_t *subband_alloc = L12_subband_alloc_table(hdr, sci);
+
+ int i, k = 0, ba_bits = 0;
+ const uint8_t *ba_code_tab = g_bitalloc_code_tab;
+
+ for (i = 0; i < sci->total_bands; i++)
+ {
+ uint8_t ba;
+ if (i == k)
+ {
+ k += subband_alloc->band_count;
+ ba_bits = subband_alloc->code_tab_width;
+ ba_code_tab = g_bitalloc_code_tab + subband_alloc->tab_offset;
+ subband_alloc++;
+ }
+ ba = ba_code_tab[get_bits(bs, ba_bits)];
+ sci->bitalloc[2*i] = ba;
+ if (i < sci->stereo_bands)
+ {
+ ba = ba_code_tab[get_bits(bs, ba_bits)];
+ }
+ sci->bitalloc[2*i + 1] = sci->stereo_bands ? ba : 0;
+ }
+
+ for (i = 0; i < 2*sci->total_bands; i++)
+ {
+ sci->scfcod[i] = sci->bitalloc[i] ? HDR_IS_LAYER_1(hdr) ? 2 : get_bits(bs, 2) : 6;
+ }
+
+ L12_read_scalefactors(bs, sci->bitalloc, sci->scfcod, sci->total_bands*2, sci->scf);
+
+ for (i = sci->stereo_bands; i < sci->total_bands; i++)
+ {
+ sci->bitalloc[2*i + 1] = 0;
+ }
+}
+
+static int L12_dequantize_granule(float *grbuf, bs_t *bs, L12_scale_info *sci, int group_size)
+{
+ int i, j, k, choff = 576;
+ for (j = 0; j < 4; j++)
+ {
+ float *dst = grbuf + group_size*j;
+ for (i = 0; i < 2*sci->total_bands; i++)
+ {
+ int ba = sci->bitalloc[i];
+ if (ba != 0)
+ {
+ if (ba < 17)
+ {
+ int half = (1 << (ba - 1)) - 1;
+ for (k = 0; k < group_size; k++)
+ {
+ dst[k] = (float)((int)get_bits(bs, ba) - half);
+ }
+ } else
+ {
+ unsigned mod = (2 << (ba - 17)) + 1; /* 3, 5, 9 */
+ unsigned code = get_bits(bs, mod + 2 - (mod >> 3)); /* 5, 7, 10 */
+ for (k = 0; k < group_size; k++, code /= mod)
+ {
+ dst[k] = (float)((int)(code % mod - mod/2));
+ }
+ }
+ }
+ dst += choff;
+ choff = 18 - choff;
+ }
+ }
+ return group_size*4;
+}
+
+static void L12_apply_scf_384(L12_scale_info *sci, const float *scf, float *dst)
+{
+ int i, k;
+ memcpy(dst + 576 + sci->stereo_bands*18, dst + sci->stereo_bands*18, (sci->total_bands - sci->stereo_bands)*18*sizeof(float));
+ for (i = 0; i < sci->total_bands; i++, dst += 18, scf += 6)
+ {
+ for (k = 0; k < 12; k++)
+ {
+ dst[k + 0] *= scf[0];
+ dst[k + 576] *= scf[3];
+ }
+ }
+}
+#endif /* MINIMP3_ONLY_MP3 */
+
+static int L3_read_side_info(bs_t *bs, L3_gr_info_t *gr, const uint8_t *hdr)
+{
+ static const uint8_t g_scf_long[8][23] = {
+ { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },
+ { 12,12,12,12,12,12,16,20,24,28,32,40,48,56,64,76,90,2,2,2,2,2,0 },
+ { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },
+ { 6,6,6,6,6,6,8,10,12,14,16,18,22,26,32,38,46,54,62,70,76,36,0 },
+ { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },
+ { 4,4,4,4,4,4,6,6,8,8,10,12,16,20,24,28,34,42,50,54,76,158,0 },
+ { 4,4,4,4,4,4,6,6,6,8,10,12,16,18,22,28,34,40,46,54,54,192,0 },
+ { 4,4,4,4,4,4,6,6,8,10,12,16,20,24,30,38,46,56,68,84,102,26,0 }
+ };
+ static const uint8_t g_scf_short[8][40] = {
+ { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
+ { 8,8,8,8,8,8,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 },
+ { 4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 },
+ { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,32,32,32,44,44,44,12,12,12,0 },
+ { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
+ { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,22,22,22,30,30,30,56,56,56,0 },
+ { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 },
+ { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 }
+ };
+ static const uint8_t g_scf_mixed[8][40] = {
+ { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
+ { 12,12,12,4,4,4,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 },
+ { 6,6,6,6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 },
+ { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,32,32,32,44,44,44,12,12,12,0 },
+ { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
+ { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,22,22,22,30,30,30,56,56,56,0 },
+ { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 },
+ { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 }
+ };
+
+ unsigned tables, scfsi = 0;
+ int main_data_begin, part_23_sum = 0;
+ int sr_idx = HDR_GET_MY_SAMPLE_RATE(hdr); sr_idx -= (sr_idx != 0);
+ int gr_count = HDR_IS_MONO(hdr) ? 1 : 2;
+
+ if (HDR_TEST_MPEG1(hdr))
+ {
+ gr_count *= 2;
+ main_data_begin = get_bits(bs, 9);
+ scfsi = get_bits(bs, 7 + gr_count);
+ } else
+ {
+ main_data_begin = get_bits(bs, 8 + gr_count) >> gr_count;
+ }
+
+ do
+ {
+ if (HDR_IS_MONO(hdr))
+ {
+ scfsi <<= 4;
+ }
+ gr->part_23_length = (uint16_t)get_bits(bs, 12);
+ part_23_sum += gr->part_23_length;
+ gr->big_values = (uint16_t)get_bits(bs, 9);
+ if (gr->big_values > 288)
+ {
+ return -1;
+ }
+ gr->global_gain = (uint8_t)get_bits(bs, 8);
+ gr->scalefac_compress = (uint16_t)get_bits(bs, HDR_TEST_MPEG1(hdr) ? 4 : 9);
+ gr->sfbtab = g_scf_long[sr_idx];
+ gr->n_long_sfb = 22;
+ gr->n_short_sfb = 0;
+ if (get_bits(bs, 1))
+ {
+ gr->block_type = (uint8_t)get_bits(bs, 2);
+ if (!gr->block_type)
+ {
+ return -1;
+ }
+ gr->mixed_block_flag = (uint8_t)get_bits(bs, 1);
+ gr->region_count[0] = 7;
+ gr->region_count[1] = 255;
+ if (gr->block_type == SHORT_BLOCK_TYPE)
+ {
+ scfsi &= 0x0F0F;
+ if (!gr->mixed_block_flag)
+ {
+ gr->region_count[0] = 8;
+ gr->sfbtab = g_scf_short[sr_idx];
+ gr->n_long_sfb = 0;
+ gr->n_short_sfb = 39;
+ } else
+ {
+ gr->sfbtab = g_scf_mixed[sr_idx];
+ gr->n_long_sfb = HDR_TEST_MPEG1(hdr) ? 8 : 6;
+ gr->n_short_sfb = 30;
+ }
+ }
+ tables = get_bits(bs, 10);
+ tables <<= 5;
+ gr->subblock_gain[0] = (uint8_t)get_bits(bs, 3);
+ gr->subblock_gain[1] = (uint8_t)get_bits(bs, 3);
+ gr->subblock_gain[2] = (uint8_t)get_bits(bs, 3);
+ } else
+ {
+ gr->block_type = 0;
+ gr->mixed_block_flag = 0;
+ tables = get_bits(bs, 15);
+ gr->region_count[0] = (uint8_t)get_bits(bs, 4);
+ gr->region_count[1] = (uint8_t)get_bits(bs, 3);
+ gr->region_count[2] = 255;
+ }
+ gr->table_select[0] = (uint8_t)(tables >> 10);
+ gr->table_select[1] = (uint8_t)((tables >> 5) & 31);
+ gr->table_select[2] = (uint8_t)((tables) & 31);
+ gr->preflag = HDR_TEST_MPEG1(hdr) ? get_bits(bs, 1) : (gr->scalefac_compress >= 500);
+ gr->scalefac_scale = (uint8_t)get_bits(bs, 1);
+ gr->count1_table = (uint8_t)get_bits(bs, 1);
+ gr->scfsi = (uint8_t)((scfsi >> 12) & 15);
+ scfsi <<= 4;
+ gr++;
+ } while(--gr_count);
+
+ if (part_23_sum + bs->pos > bs->limit + main_data_begin*8)
+ {
+ return -1;
+ }
+
+ return main_data_begin;
+}
+
+static void L3_read_scalefactors(uint8_t *scf, uint8_t *ist_pos, const uint8_t *scf_size, const uint8_t *scf_count, bs_t *bitbuf, int scfsi)
+{
+ int i, k;
+ for (i = 0; i < 4 && scf_count[i]; i++, scfsi *= 2)
+ {
+ int cnt = scf_count[i];
+ if (scfsi & 8)
+ {
+ memcpy(scf, ist_pos, cnt);
+ } else
+ {
+ int bits = scf_size[i];
+ if (!bits)
+ {
+ memset(scf, 0, cnt);
+ memset(ist_pos, 0, cnt);
+ } else
+ {
+ int max_scf = (scfsi < 0) ? (1 << bits) - 1 : -1;
+ for (k = 0; k < cnt; k++)
+ {
+ int s = get_bits(bitbuf, bits);
+ ist_pos[k] = (s == max_scf ? -1 : s);
+ scf[k] = s;
+ }
+ }
+ }
+ ist_pos += cnt;
+ scf += cnt;
+ }
+ scf[0] = scf[1] = scf[2] = 0;
+}
+
+static float L3_ldexp_q2(float y, int exp_q2)
+{
+ static const float g_expfrac[4] = { 9.31322575e-10f,7.83145814e-10f,6.58544508e-10f,5.53767716e-10f };
+ int e;
+ do
+ {
+ e = MINIMP3_MIN(30*4, exp_q2);
+ y *= g_expfrac[e & 3]*(1 << 30 >> (e >> 2));
+ } while ((exp_q2 -= e) > 0);
+ return y;
+}
+
+static void L3_decode_scalefactors(const uint8_t *hdr, uint8_t *ist_pos, bs_t *bs, const L3_gr_info_t *gr, float *scf, int ch)
+{
+ static const uint8_t g_scf_partitions[3][28] = {
+ { 6,5,5, 5,6,5,5,5,6,5, 7,3,11,10,0,0, 7, 7, 7,0, 6, 6,6,3, 8, 8,5,0 },
+ { 8,9,6,12,6,9,9,9,6,9,12,6,15,18,0,0, 6,15,12,0, 6,12,9,6, 6,18,9,0 },
+ { 9,9,6,12,9,9,9,9,9,9,12,6,18,18,0,0,12,12,12,0,12, 9,9,6,15,12,9,0 }
+ };
+ const uint8_t *scf_partition = g_scf_partitions[!!gr->n_short_sfb + !gr->n_long_sfb];
+ uint8_t scf_size[4], iscf[40];
+ int i, scf_shift = gr->scalefac_scale + 1, gain_exp, scfsi = gr->scfsi;
+ float gain;
+
+ if (HDR_TEST_MPEG1(hdr))
+ {
+ static const uint8_t g_scfc_decode[16] = { 0,1,2,3, 12,5,6,7, 9,10,11,13, 14,15,18,19 };
+ int part = g_scfc_decode[gr->scalefac_compress];
+ scf_size[1] = scf_size[0] = (uint8_t)(part >> 2);
+ scf_size[3] = scf_size[2] = (uint8_t)(part & 3);
+ } else
+ {
+ static const uint8_t g_mod[6*4] = { 5,5,4,4,5,5,4,1,4,3,1,1,5,6,6,1,4,4,4,1,4,3,1,1 };
+ int k, modprod, sfc, ist = HDR_TEST_I_STEREO(hdr) && ch;
+ sfc = gr->scalefac_compress >> ist;
+ for (k = ist*3*4; sfc >= 0; sfc -= modprod, k += 4)
+ {
+ for (modprod = 1, i = 3; i >= 0; i--)
+ {
+ scf_size[i] = (uint8_t)(sfc / modprod % g_mod[k + i]);
+ modprod *= g_mod[k + i];
+ }
+ }
+ scf_partition += k;
+ scfsi = -16;
+ }
+ L3_read_scalefactors(iscf, ist_pos, scf_size, scf_partition, bs, scfsi);
+
+ if (gr->n_short_sfb)
+ {
+ int sh = 3 - scf_shift;
+ for (i = 0; i < gr->n_short_sfb; i += 3)
+ {
+ iscf[gr->n_long_sfb + i + 0] += gr->subblock_gain[0] << sh;
+ iscf[gr->n_long_sfb + i + 1] += gr->subblock_gain[1] << sh;
+ iscf[gr->n_long_sfb + i + 2] += gr->subblock_gain[2] << sh;
+ }
+ } else if (gr->preflag)
+ {
+ static const uint8_t g_preamp[10] = { 1,1,1,1,2,2,3,3,3,2 };
+ for (i = 0; i < 10; i++)
+ {
+ iscf[11 + i] += g_preamp[i];
+ }
+ }
+
+ gain_exp = gr->global_gain + BITS_DEQUANTIZER_OUT*4 - 210 - (HDR_IS_MS_STEREO(hdr) ? 2 : 0);
+ gain = L3_ldexp_q2(1 << (MAX_SCFI/4), MAX_SCFI - gain_exp);
+ for (i = 0; i < (int)(gr->n_long_sfb + gr->n_short_sfb); i++)
+ {
+ scf[i] = L3_ldexp_q2(gain, iscf[i] << scf_shift);
+ }
+}
+
+static const float g_pow43[129 + 16] = {
+ 0,-1,-2.519842f,-4.326749f,-6.349604f,-8.549880f,-10.902724f,-13.390518f,-16.000000f,-18.720754f,-21.544347f,-24.463781f,-27.473142f,-30.567351f,-33.741992f,-36.993181f,
+ 0,1,2.519842f,4.326749f,6.349604f,8.549880f,10.902724f,13.390518f,16.000000f,18.720754f,21.544347f,24.463781f,27.473142f,30.567351f,33.741992f,36.993181f,40.317474f,43.711787f,47.173345f,50.699631f,54.288352f,57.937408f,61.644865f,65.408941f,69.227979f,73.100443f,77.024898f,81.000000f,85.024491f,89.097188f,93.216975f,97.382800f,101.593667f,105.848633f,110.146801f,114.487321f,118.869381f,123.292209f,127.755065f,132.257246f,136.798076f,141.376907f,145.993119f,150.646117f,155.335327f,160.060199f,164.820202f,169.614826f,174.443577f,179.305980f,184.201575f,189.129918f,194.090580f,199.083145f,204.107210f,209.162385f,214.248292f,219.364564f,224.510845f,229.686789f,234.892058f,240.126328f,245.389280f,250.680604f,256.000000f,261.347174f,266.721841f,272.123723f,277.552547f,283.008049f,288.489971f,293.998060f,299.532071f,305.091761f,310.676898f,316.287249f,321.922592f,327.582707f,333.267377f,338.976394f,344.709550f,350.466646f,356.247482f,362.051866f,367.879608f,373.730522f,379.604427f,385.501143f,391.420496f,397.362314f,403.326427f,409.312672f,415.320884f,421.350905f,427.402579f,433.475750f,439.570269f,445.685987f,451.822757f,457.980436f,464.158883f,470.357960f,476.577530f,482.817459f,489.077615f,495.357868f,501.658090f,507.978156f,514.317941f,520.677324f,527.056184f,533.454404f,539.871867f,546.308458f,552.764065f,559.238575f,565.731879f,572.243870f,578.774440f,585.323483f,591.890898f,598.476581f,605.080431f,611.702349f,618.342238f,625.000000f,631.675540f,638.368763f,645.079578f
+};
+
+static float L3_pow_43(int x)
+{
+ float frac;
+ int sign, mult = 256;
+
+ if (x < 129)
+ {
+ return g_pow43[16 + x];
+ }
+
+ if (x < 1024)
+ {
+ mult = 16;
+ x <<= 3;
+ }
+
+ sign = 2*x & 64;
+ frac = (float)((x & 63) - sign) / ((x & ~63) + sign);
+ return g_pow43[16 + ((x + sign) >> 6)]*(1.f + frac*((4.f/3) + frac*(2.f/9)))*mult;
+}
+
+static void L3_huffman(float *dst, bs_t *bs, const L3_gr_info_t *gr_info, const float *scf, int layer3gr_limit)
+{
+ static const int16_t tabs[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 785,785,785,785,784,784,784,784,513,513,513,513,513,513,513,513,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,
+ -255,1313,1298,1282,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,290,288,
+ -255,1313,1298,1282,769,769,769,769,529,529,529,529,529,529,529,529,528,528,528,528,528,528,528,528,512,512,512,512,512,512,512,512,290,288,
+ -253,-318,-351,-367,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,819,818,547,547,275,275,275,275,561,560,515,546,289,274,288,258,
+ -254,-287,1329,1299,1314,1312,1057,1057,1042,1042,1026,1026,784,784,784,784,529,529,529,529,529,529,529,529,769,769,769,769,768,768,768,768,563,560,306,306,291,259,
+ -252,-413,-477,-542,1298,-575,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-383,-399,1107,1092,1106,1061,849,849,789,789,1104,1091,773,773,1076,1075,341,340,325,309,834,804,577,577,532,532,516,516,832,818,803,816,561,561,531,531,515,546,289,289,288,258,
+ -252,-429,-493,-559,1057,1057,1042,1042,529,529,529,529,529,529,529,529,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,-382,1077,-415,1106,1061,1104,849,849,789,789,1091,1076,1029,1075,834,834,597,581,340,340,339,324,804,833,532,532,832,772,818,803,817,787,816,771,290,290,290,290,288,258,
+ -253,-349,-414,-447,-463,1329,1299,-479,1314,1312,1057,1057,1042,1042,1026,1026,785,785,785,785,784,784,784,784,769,769,769,769,768,768,768,768,-319,851,821,-335,836,850,805,849,341,340,325,336,533,533,579,579,564,564,773,832,578,548,563,516,321,276,306,291,304,259,
+ -251,-572,-733,-830,-863,-879,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,1396,1351,1381,1366,1395,1335,1380,-559,1334,1138,1138,1063,1063,1350,1392,1031,1031,1062,1062,1364,1363,1120,1120,1333,1348,881,881,881,881,375,374,359,373,343,358,341,325,791,791,1123,1122,-703,1105,1045,-719,865,865,790,790,774,774,1104,1029,338,293,323,308,-799,-815,833,788,772,818,803,816,322,292,307,320,561,531,515,546,289,274,288,258,
+ -251,-525,-605,-685,-765,-831,-846,1298,1057,1057,1312,1282,785,785,785,785,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,1399,1398,1383,1367,1382,1396,1351,-511,1381,1366,1139,1139,1079,1079,1124,1124,1364,1349,1363,1333,882,882,882,882,807,807,807,807,1094,1094,1136,1136,373,341,535,535,881,775,867,822,774,-591,324,338,-671,849,550,550,866,864,609,609,293,336,534,534,789,835,773,-751,834,804,308,307,833,788,832,772,562,562,547,547,305,275,560,515,290,290,
+ -252,-397,-477,-557,-622,-653,-719,-735,-750,1329,1299,1314,1057,1057,1042,1042,1312,1282,1024,1024,785,785,785,785,784,784,784,784,769,769,769,769,-383,1127,1141,1111,1126,1140,1095,1110,869,869,883,883,1079,1109,882,882,375,374,807,868,838,881,791,-463,867,822,368,263,852,837,836,-543,610,610,550,550,352,336,534,534,865,774,851,821,850,805,593,533,579,564,773,832,578,578,548,548,577,577,307,276,306,291,516,560,259,259,
+ -250,-2107,-2507,-2764,-2909,-2974,-3007,-3023,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-767,-1052,-1213,-1277,-1358,-1405,-1469,-1535,-1550,-1582,-1614,-1647,-1662,-1694,-1726,-1759,-1774,-1807,-1822,-1854,-1886,1565,-1919,-1935,-1951,-1967,1731,1730,1580,1717,-1983,1729,1564,-1999,1548,-2015,-2031,1715,1595,-2047,1714,-2063,1610,-2079,1609,-2095,1323,1323,1457,1457,1307,1307,1712,1547,1641,1700,1699,1594,1685,1625,1442,1442,1322,1322,-780,-973,-910,1279,1278,1277,1262,1276,1261,1275,1215,1260,1229,-959,974,974,989,989,-943,735,478,478,495,463,506,414,-1039,1003,958,1017,927,942,987,957,431,476,1272,1167,1228,-1183,1256,-1199,895,895,941,941,1242,1227,1212,1135,1014,1014,490,489,503,487,910,1013,985,925,863,894,970,955,1012,847,-1343,831,755,755,984,909,428,366,754,559,-1391,752,486,457,924,997,698,698,983,893,740,740,908,877,739,739,667,667,953,938,497,287,271,271,683,606,590,712,726,574,302,302,738,736,481,286,526,725,605,711,636,724,696,651,589,681,666,710,364,467,573,695,466,466,301,465,379,379,709,604,665,679,316,316,634,633,436,436,464,269,424,394,452,332,438,363,347,408,393,448,331,422,362,407,392,421,346,406,391,376,375,359,1441,1306,-2367,1290,-2383,1337,-2399,-2415,1426,1321,-2431,1411,1336,-2447,-2463,-2479,1169,1169,1049,1049,1424,1289,1412,1352,1319,-2495,1154,1154,1064,1064,1153,1153,416,390,360,404,403,389,344,374,373,343,358,372,327,357,342,311,356,326,1395,1394,1137,1137,1047,1047,1365,1392,1287,1379,1334,1364,1349,1378,1318,1363,792,792,792,792,1152,1152,1032,1032,1121,1121,1046,1046,1120,1120,1030,1030,-2895,1106,1061,1104,849,849,789,789,1091,1076,1029,1090,1060,1075,833,833,309,324,532,532,832,772,818,803,561,561,531,560,515,546,289,274,288,258,
+ -250,-1179,-1579,-1836,-1996,-2124,-2253,-2333,-2413,-2477,-2542,-2574,-2607,-2622,-2655,1314,1313,1298,1312,1282,785,785,785,785,1040,1040,1025,1025,768,768,768,768,-766,-798,-830,-862,-895,-911,-927,-943,-959,-975,-991,-1007,-1023,-1039,-1055,-1070,1724,1647,-1103,-1119,1631,1767,1662,1738,1708,1723,-1135,1780,1615,1779,1599,1677,1646,1778,1583,-1151,1777,1567,1737,1692,1765,1722,1707,1630,1751,1661,1764,1614,1736,1676,1763,1750,1645,1598,1721,1691,1762,1706,1582,1761,1566,-1167,1749,1629,767,766,751,765,494,494,735,764,719,749,734,763,447,447,748,718,477,506,431,491,446,476,461,505,415,430,475,445,504,399,460,489,414,503,383,474,429,459,502,502,746,752,488,398,501,473,413,472,486,271,480,270,-1439,-1455,1357,-1471,-1487,-1503,1341,1325,-1519,1489,1463,1403,1309,-1535,1372,1448,1418,1476,1356,1462,1387,-1551,1475,1340,1447,1402,1386,-1567,1068,1068,1474,1461,455,380,468,440,395,425,410,454,364,467,466,464,453,269,409,448,268,432,1371,1473,1432,1417,1308,1460,1355,1446,1459,1431,1083,1083,1401,1416,1458,1445,1067,1067,1370,1457,1051,1051,1291,1430,1385,1444,1354,1415,1400,1443,1082,1082,1173,1113,1186,1066,1185,1050,-1967,1158,1128,1172,1097,1171,1081,-1983,1157,1112,416,266,375,400,1170,1142,1127,1065,793,793,1169,1033,1156,1096,1141,1111,1155,1080,1126,1140,898,898,808,808,897,897,792,792,1095,1152,1032,1125,1110,1139,1079,1124,882,807,838,881,853,791,-2319,867,368,263,822,852,837,866,806,865,-2399,851,352,262,534,534,821,836,594,594,549,549,593,593,533,533,848,773,579,579,564,578,548,563,276,276,577,576,306,291,516,560,305,305,275,259,
+ -251,-892,-2058,-2620,-2828,-2957,-3023,-3039,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,-559,1530,-575,-591,1528,1527,1407,1526,1391,1023,1023,1023,1023,1525,1375,1268,1268,1103,1103,1087,1087,1039,1039,1523,-604,815,815,815,815,510,495,509,479,508,463,507,447,431,505,415,399,-734,-782,1262,-815,1259,1244,-831,1258,1228,-847,-863,1196,-879,1253,987,987,748,-767,493,493,462,477,414,414,686,669,478,446,461,445,474,429,487,458,412,471,1266,1264,1009,1009,799,799,-1019,-1276,-1452,-1581,-1677,-1757,-1821,-1886,-1933,-1997,1257,1257,1483,1468,1512,1422,1497,1406,1467,1496,1421,1510,1134,1134,1225,1225,1466,1451,1374,1405,1252,1252,1358,1480,1164,1164,1251,1251,1238,1238,1389,1465,-1407,1054,1101,-1423,1207,-1439,830,830,1248,1038,1237,1117,1223,1148,1236,1208,411,426,395,410,379,269,1193,1222,1132,1235,1221,1116,976,976,1192,1162,1177,1220,1131,1191,963,963,-1647,961,780,-1663,558,558,994,993,437,408,393,407,829,978,813,797,947,-1743,721,721,377,392,844,950,828,890,706,706,812,859,796,960,948,843,934,874,571,571,-1919,690,555,689,421,346,539,539,944,779,918,873,932,842,903,888,570,570,931,917,674,674,-2575,1562,-2591,1609,-2607,1654,1322,1322,1441,1441,1696,1546,1683,1593,1669,1624,1426,1426,1321,1321,1639,1680,1425,1425,1305,1305,1545,1668,1608,1623,1667,1592,1638,1666,1320,1320,1652,1607,1409,1409,1304,1304,1288,1288,1664,1637,1395,1395,1335,1335,1622,1636,1394,1394,1319,1319,1606,1621,1392,1392,1137,1137,1137,1137,345,390,360,375,404,373,1047,-2751,-2767,-2783,1062,1121,1046,-2799,1077,-2815,1106,1061,789,789,1105,1104,263,355,310,340,325,354,352,262,339,324,1091,1076,1029,1090,1060,1075,833,833,788,788,1088,1028,818,818,803,803,561,561,531,531,816,771,546,546,289,274,288,258,
+ -253,-317,-381,-446,-478,-509,1279,1279,-811,-1179,-1451,-1756,-1900,-2028,-2189,-2253,-2333,-2414,-2445,-2511,-2526,1313,1298,-2559,1041,1041,1040,1040,1025,1025,1024,1024,1022,1007,1021,991,1020,975,1019,959,687,687,1018,1017,671,671,655,655,1016,1015,639,639,758,758,623,623,757,607,756,591,755,575,754,559,543,543,1009,783,-575,-621,-685,-749,496,-590,750,749,734,748,974,989,1003,958,988,973,1002,942,987,957,972,1001,926,986,941,971,956,1000,910,985,925,999,894,970,-1071,-1087,-1102,1390,-1135,1436,1509,1451,1374,-1151,1405,1358,1480,1420,-1167,1507,1494,1389,1342,1465,1435,1450,1326,1505,1310,1493,1373,1479,1404,1492,1464,1419,428,443,472,397,736,526,464,464,486,457,442,471,484,482,1357,1449,1434,1478,1388,1491,1341,1490,1325,1489,1463,1403,1309,1477,1372,1448,1418,1433,1476,1356,1462,1387,-1439,1475,1340,1447,1402,1474,1324,1461,1371,1473,269,448,1432,1417,1308,1460,-1711,1459,-1727,1441,1099,1099,1446,1386,1431,1401,-1743,1289,1083,1083,1160,1160,1458,1445,1067,1067,1370,1457,1307,1430,1129,1129,1098,1098,268,432,267,416,266,400,-1887,1144,1187,1082,1173,1113,1186,1066,1050,1158,1128,1143,1172,1097,1171,1081,420,391,1157,1112,1170,1142,1127,1065,1169,1049,1156,1096,1141,1111,1155,1080,1126,1154,1064,1153,1140,1095,1048,-2159,1125,1110,1137,-2175,823,823,1139,1138,807,807,384,264,368,263,868,838,853,791,867,822,852,837,866,806,865,790,-2319,851,821,836,352,262,850,805,849,-2399,533,533,835,820,336,261,578,548,563,577,532,532,832,772,562,562,547,547,305,275,560,515,290,290,288,258 };
+ static const uint8_t tab32[] = { 130,162,193,209,44,28,76,140,9,9,9,9,9,9,9,9,190,254,222,238,126,94,157,157,109,61,173,205 };
+ static const uint8_t tab33[] = { 252,236,220,204,188,172,156,140,124,108,92,76,60,44,28,12 };
+ static const int16_t tabindex[2*16] = { 0,32,64,98,0,132,180,218,292,364,426,538,648,746,0,1126,1460,1460,1460,1460,1460,1460,1460,1460,1842,1842,1842,1842,1842,1842,1842,1842 };
+ static const uint8_t g_linbits[] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,6,8,10,13,4,5,6,7,8,9,11,13 };
+
+#define PEEK_BITS(n) (bs_cache >> (32 - n))
+#define FLUSH_BITS(n) { bs_cache <<= (n); bs_sh += (n); }
+#define CHECK_BITS while (bs_sh >= 0) { bs_cache |= (uint32_t)*bs_next_ptr++ << bs_sh; bs_sh -= 8; }
+#define BSPOS ((bs_next_ptr - bs->buf)*8 - 24 + bs_sh)
+
+ float one = 0.0f;
+ int ireg = 0, big_val_cnt = gr_info->big_values;
+ const uint8_t *sfb = gr_info->sfbtab;
+ const uint8_t *bs_next_ptr = bs->buf + bs->pos/8;
+ uint32_t bs_cache = (((bs_next_ptr[0]*256u + bs_next_ptr[1])*256u + bs_next_ptr[2])*256u + bs_next_ptr[3]) << (bs->pos & 7);
+ int pairs_to_decode, np, bs_sh = (bs->pos & 7) - 8;
+ bs_next_ptr += 4;
+
+ while (big_val_cnt > 0)
+ {
+ int tab_num = gr_info->table_select[ireg];
+ int sfb_cnt = gr_info->region_count[ireg++];
+ const int16_t *codebook = tabs + tabindex[tab_num];
+ int linbits = g_linbits[tab_num];
+ if (linbits)
+ {
+ do
+ {
+ np = *sfb++ / 2;
+ pairs_to_decode = MINIMP3_MIN(big_val_cnt, np);
+ one = *scf++;
+ do
+ {
+ int j, w = 5;
+ int leaf = codebook[PEEK_BITS(w)];
+ while (leaf < 0)
+ {
+ FLUSH_BITS(w);
+ w = leaf & 7;
+ leaf = codebook[PEEK_BITS(w) - (leaf >> 3)];
+ }
+ FLUSH_BITS(leaf >> 8);
+
+ for (j = 0; j < 2; j++, dst++, leaf >>= 4)
+ {
+ int lsb = leaf & 0x0F;
+ if (lsb == 15)
+ {
+ lsb += PEEK_BITS(linbits);
+ FLUSH_BITS(linbits);
+ CHECK_BITS;
+ *dst = one*L3_pow_43(lsb)*((int32_t)bs_cache < 0 ? -1: 1);
+ } else
+ {
+ *dst = g_pow43[16 + lsb - 16*(bs_cache >> 31)]*one;
+ }
+ FLUSH_BITS(lsb ? 1 : 0);
+ }
+ CHECK_BITS;
+ } while (--pairs_to_decode);
+ } while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0);
+ } else
+ {
+ do
+ {
+ np = *sfb++ / 2;
+ pairs_to_decode = MINIMP3_MIN(big_val_cnt, np);
+ one = *scf++;
+ do
+ {
+ int j, w = 5;
+ int leaf = codebook[PEEK_BITS(w)];
+ while (leaf < 0)
+ {
+ FLUSH_BITS(w);
+ w = leaf & 7;
+ leaf = codebook[PEEK_BITS(w) - (leaf >> 3)];
+ }
+ FLUSH_BITS(leaf >> 8);
+
+ for (j = 0; j < 2; j++, dst++, leaf >>= 4)
+ {
+ int lsb = leaf & 0x0F;
+ *dst = g_pow43[16 + lsb - 16*(bs_cache >> 31)]*one;
+ FLUSH_BITS(lsb ? 1 : 0);
+ }
+ CHECK_BITS;
+ } while (--pairs_to_decode);
+ } while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0);
+ }
+ }
+
+ for (np = 1 - big_val_cnt;; dst += 4)
+ {
+ const uint8_t *codebook_count1 = (gr_info->count1_table) ? tab33 : tab32;
+ int leaf = codebook_count1[PEEK_BITS(4)];
+ if (!(leaf & 8))
+ {
+ leaf = codebook_count1[(leaf >> 3) + (bs_cache << 4 >> (32 - (leaf & 3)))];
+ }
+ FLUSH_BITS(leaf & 7);
+ if (BSPOS > layer3gr_limit)
+ {
+ break;
+ }
+#define RELOAD_SCALEFACTOR if (!--np) { np = *sfb++/2; if (!np) break; one = *scf++; }
+#define DEQ_COUNT1(s) if (leaf & (128 >> s)) { dst[s] = ((int32_t)bs_cache < 0) ? -one : one; FLUSH_BITS(1) }
+ RELOAD_SCALEFACTOR;
+ DEQ_COUNT1(0);
+ DEQ_COUNT1(1);
+ RELOAD_SCALEFACTOR;
+ DEQ_COUNT1(2);
+ DEQ_COUNT1(3);
+ CHECK_BITS;
+ }
+
+ bs->pos = layer3gr_limit;
+}
+
+static void L3_midside_stereo(float *left, int n)
+{
+ int i = 0;
+ float *right = left + 576;
+#if HAVE_SIMD
+ if (have_simd()) for (; i < n - 3; i += 4)
+ {
+ f4 vl = VLD(left + i);
+ f4 vr = VLD(right + i);
+ VSTORE(left + i, VADD(vl, vr));
+ VSTORE(right + i, VSUB(vl, vr));
+ }
+#endif /* HAVE_SIMD */
+ for (; i < n; i++)
+ {
+ float a = left[i];
+ float b = right[i];
+ left[i] = a + b;
+ right[i] = a - b;
+ }
+}
+
+static void L3_intensity_stereo_band(float *left, int n, float kl, float kr)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ {
+ left[i + 576] = left[i]*kr;
+ left[i] = left[i]*kl;
+ }
+}
+
+static void L3_stereo_top_band(const float *right, const uint8_t *sfb, int nbands, int max_band[3])
+{
+ int i, k;
+
+ max_band[0] = max_band[1] = max_band[2] = -1;
+
+ for (i = 0; i < nbands; i++)
+ {
+ for (k = 0; k < sfb[i]; k += 2)
+ {
+ if (right[k] != 0 || right[k + 1] != 0)
+ {
+ max_band[i % 3] = i;
+ break;
+ }
+ }
+ right += sfb[i];
+ }
+}
+
+static void L3_stereo_process(float *left, const uint8_t *ist_pos, const uint8_t *sfb, const uint8_t *hdr, int max_band[3], int mpeg2_sh)
+{
+ static const float g_pan[7*2] = { 0,1,0.21132487f,0.78867513f,0.36602540f,0.63397460f,0.5f,0.5f,0.63397460f,0.36602540f,0.78867513f,0.21132487f,1,0 };
+ unsigned i, max_pos = HDR_TEST_MPEG1(hdr) ? 7 : 64;
+
+ for (i = 0; sfb[i]; i++)
+ {
+ unsigned ipos = ist_pos[i];
+ if ((int)i > max_band[i % 3] && ipos < max_pos)
+ {
+ float kl, kr, s = HDR_TEST_MS_STEREO(hdr) ? 1.41421356f : 1;
+ if (HDR_TEST_MPEG1(hdr))
+ {
+ kl = g_pan[2*ipos];
+ kr = g_pan[2*ipos + 1];
+ } else
+ {
+ kl = 1;
+ kr = L3_ldexp_q2(1, (ipos + 1) >> 1 << mpeg2_sh);
+ if (ipos & 1)
+ {
+ kl = kr;
+ kr = 1;
+ }
+ }
+ L3_intensity_stereo_band(left, sfb[i], kl*s, kr*s);
+ } else if (HDR_TEST_MS_STEREO(hdr))
+ {
+ L3_midside_stereo(left, sfb[i]);
+ }
+ left += sfb[i];
+ }
+}
+
+static void L3_intensity_stereo(float *left, uint8_t *ist_pos, const L3_gr_info_t *gr, const uint8_t *hdr)
+{
+ int max_band[3], n_sfb = gr->n_long_sfb + gr->n_short_sfb;
+ int i, max_blocks = gr->n_short_sfb ? 3 : 1;
+
+ L3_stereo_top_band(left + 576, gr->sfbtab, n_sfb, max_band);
+ if (gr->n_long_sfb)
+ {
+ max_band[0] = max_band[1] = max_band[2] = MINIMP3_MAX(MINIMP3_MAX(max_band[0], max_band[1]), max_band[2]);
+ }
+ for (i = 0; i < max_blocks; i++)
+ {
+ int default_pos = HDR_TEST_MPEG1(hdr) ? 3 : 0;
+ int itop = n_sfb - max_blocks + i;
+ int prev = itop - max_blocks;
+ ist_pos[itop] = max_band[i] >= prev ? default_pos : ist_pos[prev];
+ }
+ L3_stereo_process(left, ist_pos, gr->sfbtab, hdr, max_band, gr[1].scalefac_compress & 1);
+}
+
+static void L3_reorder(float *grbuf, float *scratch, const uint8_t *sfb)
+{
+ int i, len;
+ float *src = grbuf, *dst = scratch;
+
+ for (;0 != (len = *sfb); sfb += 3, src += 2*len)
+ {
+ for (i = 0; i < len; i++, src++)
+ {
+ *dst++ = src[0*len];
+ *dst++ = src[1*len];
+ *dst++ = src[2*len];
+ }
+ }
+ memcpy(grbuf, scratch, (dst - scratch)*sizeof(float));
+}
+
+static void L3_antialias(float *grbuf, int nbands)
+{
+ static const float g_aa[2][8] = {
+ {0.85749293f,0.88174200f,0.94962865f,0.98331459f,0.99551782f,0.99916056f,0.99989920f,0.99999316f},
+ {0.51449576f,0.47173197f,0.31337745f,0.18191320f,0.09457419f,0.04096558f,0.01419856f,0.00369997f}
+ };
+
+ for (; nbands > 0; nbands--, grbuf += 18)
+ {
+ int i = 0;
+#if HAVE_SIMD
+ if (have_simd()) for (; i < 8; i += 4)
+ {
+ f4 vu = VLD(grbuf + 18 + i);
+ f4 vd = VLD(grbuf + 14 - i);
+ f4 vc0 = VLD(g_aa[0] + i);
+ f4 vc1 = VLD(g_aa[1] + i);
+ vd = VREV(vd);
+ VSTORE(grbuf + 18 + i, VSUB(VMUL(vu, vc0), VMUL(vd, vc1)));
+ vd = VADD(VMUL(vu, vc1), VMUL(vd, vc0));
+ VSTORE(grbuf + 14 - i, VREV(vd));
+ }
+#endif /* HAVE_SIMD */
+#ifndef MINIMP3_ONLY_SIMD
+ for(; i < 8; i++)
+ {
+ float u = grbuf[18 + i];
+ float d = grbuf[17 - i];
+ grbuf[18 + i] = u*g_aa[0][i] - d*g_aa[1][i];
+ grbuf[17 - i] = u*g_aa[1][i] + d*g_aa[0][i];
+ }
+#endif /* MINIMP3_ONLY_SIMD */
+ }
+}
+
+static void L3_dct3_9(float *y)
+{
+ float s0, s1, s2, s3, s4, s5, s6, s7, s8, t0, t2, t4;
+
+ s0 = y[0]; s2 = y[2]; s4 = y[4]; s6 = y[6]; s8 = y[8];
+ t0 = s0 + s6*0.5f;
+ s0 -= s6;
+ t4 = (s4 + s2)*0.93969262f;
+ t2 = (s8 + s2)*0.76604444f;
+ s6 = (s4 - s8)*0.17364818f;
+ s4 += s8 - s2;
+
+ s2 = s0 - s4*0.5f;
+ y[4] = s4 + s0;
+ s8 = t0 - t2 + s6;
+ s0 = t0 - t4 + t2;
+ s4 = t0 + t4 - s6;
+
+ s1 = y[1]; s3 = y[3]; s5 = y[5]; s7 = y[7];
+
+ s3 *= 0.86602540f;
+ t0 = (s5 + s1)*0.98480775f;
+ t4 = (s5 - s7)*0.34202014f;
+ t2 = (s1 + s7)*0.64278761f;
+ s1 = (s1 - s5 - s7)*0.86602540f;
+
+ s5 = t0 - s3 - t2;
+ s7 = t4 - s3 - t0;
+ s3 = t4 + s3 - t2;
+
+ y[0] = s4 - s7;
+ y[1] = s2 + s1;
+ y[2] = s0 - s3;
+ y[3] = s8 + s5;
+ y[5] = s8 - s5;
+ y[6] = s0 + s3;
+ y[7] = s2 - s1;
+ y[8] = s4 + s7;
+}
+
+static void L3_imdct36(float *grbuf, float *overlap, const float *window, int nbands)
+{
+ int i, j;
+ static const float g_twid9[18] = {
+ 0.73727734f,0.79335334f,0.84339145f,0.88701083f,0.92387953f,0.95371695f,0.97629601f,0.99144486f,0.99904822f,0.67559021f,0.60876143f,0.53729961f,0.46174861f,0.38268343f,0.30070580f,0.21643961f,0.13052619f,0.04361938f
+ };
+
+ for (j = 0; j < nbands; j++, grbuf += 18, overlap += 9)
+ {
+ float co[9], si[9];
+ co[0] = -grbuf[0];
+ si[0] = grbuf[17];
+ for (i = 0; i < 4; i++)
+ {
+ si[8 - 2*i] = grbuf[4*i + 1] - grbuf[4*i + 2];
+ co[1 + 2*i] = grbuf[4*i + 1] + grbuf[4*i + 2];
+ si[7 - 2*i] = grbuf[4*i + 4] - grbuf[4*i + 3];
+ co[2 + 2*i] = -(grbuf[4*i + 3] + grbuf[4*i + 4]);
+ }
+ L3_dct3_9(co);
+ L3_dct3_9(si);
+
+ si[1] = -si[1];
+ si[3] = -si[3];
+ si[5] = -si[5];
+ si[7] = -si[7];
+
+ i = 0;
+
+#if HAVE_SIMD
+ if (have_simd()) for (; i < 8; i += 4)
+ {
+ f4 vovl = VLD(overlap + i);
+ f4 vc = VLD(co + i);
+ f4 vs = VLD(si + i);
+ f4 vr0 = VLD(g_twid9 + i);
+ f4 vr1 = VLD(g_twid9 + 9 + i);
+ f4 vw0 = VLD(window + i);
+ f4 vw1 = VLD(window + 9 + i);
+ f4 vsum = VADD(VMUL(vc, vr1), VMUL(vs, vr0));
+ VSTORE(overlap + i, VSUB(VMUL(vc, vr0), VMUL(vs, vr1)));
+ VSTORE(grbuf + i, VSUB(VMUL(vovl, vw0), VMUL(vsum, vw1)));
+ vsum = VADD(VMUL(vovl, vw1), VMUL(vsum, vw0));
+ VSTORE(grbuf + 14 - i, VREV(vsum));
+ }
+#endif /* HAVE_SIMD */
+ for (; i < 9; i++)
+ {
+ float ovl = overlap[i];
+ float sum = co[i]*g_twid9[9 + i] + si[i]*g_twid9[0 + i];
+ overlap[i] = co[i]*g_twid9[0 + i] - si[i]*g_twid9[9 + i];
+ grbuf[i] = ovl*window[0 + i] - sum*window[9 + i];
+ grbuf[17 - i] = ovl*window[9 + i] + sum*window[0 + i];
+ }
+ }
+}
+
+static void L3_idct3(float x0, float x1, float x2, float *dst)
+{
+ float m1 = x1*0.86602540f;
+ float a1 = x0 - x2*0.5f;
+ dst[1] = x0 + x2;
+ dst[0] = a1 + m1;
+ dst[2] = a1 - m1;
+}
+
+static void L3_imdct12(float *x, float *dst, float *overlap)
+{
+ static const float g_twid3[6] = { 0.79335334f,0.92387953f,0.99144486f, 0.60876143f,0.38268343f,0.13052619f };
+ float co[3], si[3];
+ int i;
+
+ L3_idct3(-x[0], x[6] + x[3], x[12] + x[9], co);
+ L3_idct3(x[15], x[12] - x[9], x[6] - x[3], si);
+ si[1] = -si[1];
+
+ for (i = 0; i < 3; i++)
+ {
+ float ovl = overlap[i];
+ float sum = co[i]*g_twid3[3 + i] + si[i]*g_twid3[0 + i];
+ overlap[i] = co[i]*g_twid3[0 + i] - si[i]*g_twid3[3 + i];
+ dst[i] = ovl*g_twid3[2 - i] - sum*g_twid3[5 - i];
+ dst[5 - i] = ovl*g_twid3[5 - i] + sum*g_twid3[2 - i];
+ }
+}
+
+static void L3_imdct_short(float *grbuf, float *overlap, int nbands)
+{
+ for (;nbands > 0; nbands--, overlap += 9, grbuf += 18)
+ {
+ float tmp[18];
+ memcpy(tmp, grbuf, sizeof(tmp));
+ memcpy(grbuf, overlap, 6*sizeof(float));
+ L3_imdct12(tmp, grbuf + 6, overlap + 6);
+ L3_imdct12(tmp + 1, grbuf + 12, overlap + 6);
+ L3_imdct12(tmp + 2, overlap, overlap + 6);
+ }
+}
+
+static void L3_change_sign(float *grbuf)
+{
+ int b, i;
+ for (b = 0, grbuf += 18; b < 32; b += 2, grbuf += 36)
+ for (i = 1; i < 18; i += 2)
+ grbuf[i] = -grbuf[i];
+}
+
+static void L3_imdct_gr(float *grbuf, float *overlap, unsigned block_type, unsigned n_long_bands)
+{
+ static const float g_mdct_window[2][18] = {
+ { 0.99904822f,0.99144486f,0.97629601f,0.95371695f,0.92387953f,0.88701083f,0.84339145f,0.79335334f,0.73727734f,0.04361938f,0.13052619f,0.21643961f,0.30070580f,0.38268343f,0.46174861f,0.53729961f,0.60876143f,0.67559021f },
+ { 1,1,1,1,1,1,0.99144486f,0.92387953f,0.79335334f,0,0,0,0,0,0,0.13052619f,0.38268343f,0.60876143f }
+ };
+ if (n_long_bands)
+ {
+ L3_imdct36(grbuf, overlap, g_mdct_window[0], n_long_bands);
+ grbuf += 18*n_long_bands;
+ overlap += 9*n_long_bands;
+ }
+ if (block_type == SHORT_BLOCK_TYPE)
+ L3_imdct_short(grbuf, overlap, 32 - n_long_bands);
+ else
+ L3_imdct36(grbuf, overlap, g_mdct_window[block_type == STOP_BLOCK_TYPE], 32 - n_long_bands);
+}
+
+static void L3_save_reservoir(mp3dec_t *h, mp3dec_scratch_t *s)
+{
+ int pos = (s->bs.pos + 7)/8u;
+ int remains = s->bs.limit/8u - pos;
+ if (remains > MAX_BITRESERVOIR_BYTES)
+ {
+ pos += remains - MAX_BITRESERVOIR_BYTES;
+ remains = MAX_BITRESERVOIR_BYTES;
+ }
+ if (remains > 0)
+ {
+ memmove(h->reserv_buf, s->maindata + pos, remains);
+ }
+ h->reserv = remains;
+}
+
+static int L3_restore_reservoir(mp3dec_t *h, bs_t *bs, mp3dec_scratch_t *s, int main_data_begin)
+{
+ int frame_bytes = (bs->limit - bs->pos)/8;
+ int bytes_have = MINIMP3_MIN(h->reserv, main_data_begin);
+ memcpy(s->maindata, h->reserv_buf + MINIMP3_MAX(0, h->reserv - main_data_begin), MINIMP3_MIN(h->reserv, main_data_begin));
+ memcpy(s->maindata + bytes_have, bs->buf + bs->pos/8, frame_bytes);
+ bs_init(&s->bs, s->maindata, bytes_have + frame_bytes);
+ return h->reserv >= main_data_begin;
+}
+
+static void L3_decode(mp3dec_t *h, mp3dec_scratch_t *s, L3_gr_info_t *gr_info, int nch)
+{
+ int ch;
+
+ for (ch = 0; ch < nch; ch++)
+ {
+ int layer3gr_limit = s->bs.pos + gr_info[ch].part_23_length;
+ L3_decode_scalefactors(h->header, s->ist_pos[ch], &s->bs, gr_info + ch, s->scf, ch);
+ L3_huffman(s->grbuf[ch], &s->bs, gr_info + ch, s->scf, layer3gr_limit);
+ }
+
+ if (HDR_TEST_I_STEREO(h->header))
+ {
+ L3_intensity_stereo(s->grbuf[0], s->ist_pos[1], gr_info, h->header);
+ } else if (HDR_IS_MS_STEREO(h->header))
+ {
+ L3_midside_stereo(s->grbuf[0], 576);
+ }
+
+ for (ch = 0; ch < nch; ch++, gr_info++)
+ {
+ int aa_bands = 31;
+ int n_long_bands = (gr_info->mixed_block_flag ? 2 : 0) << (int)(HDR_GET_MY_SAMPLE_RATE(h->header) == 2);
+
+ if (gr_info->n_short_sfb)
+ {
+ aa_bands = n_long_bands - 1;
+ L3_reorder(s->grbuf[ch] + n_long_bands*18, s->syn[0], gr_info->sfbtab + gr_info->n_long_sfb);
+ }
+
+ L3_antialias(s->grbuf[ch], aa_bands);
+ L3_imdct_gr(s->grbuf[ch], h->mdct_overlap[ch], gr_info->block_type, n_long_bands);
+ L3_change_sign(s->grbuf[ch]);
+ }
+}
+
+static void mp3d_DCT_II(float *grbuf, int n)
+{
+ static const float g_sec[24] = {
+ 10.19000816f,0.50060302f,0.50241929f,3.40760851f,0.50547093f,0.52249861f,2.05778098f,0.51544732f,0.56694406f,1.48416460f,0.53104258f,0.64682180f,1.16943991f,0.55310392f,0.78815460f,0.97256821f,0.58293498f,1.06067765f,0.83934963f,0.62250412f,1.72244716f,0.74453628f,0.67480832f,5.10114861f
+ };
+ int i, k = 0;
+#if HAVE_SIMD
+ if (have_simd()) for (; k < n; k += 4)
+ {
+ f4 t[4][8], *x;
+ float *y = grbuf + k;
+
+ for (x = t[0], i = 0; i < 8; i++, x++)
+ {
+ f4 x0 = VLD(&y[i*18]);
+ f4 x1 = VLD(&y[(15 - i)*18]);
+ f4 x2 = VLD(&y[(16 + i)*18]);
+ f4 x3 = VLD(&y[(31 - i)*18]);
+ f4 t0 = VADD(x0, x3);
+ f4 t1 = VADD(x1, x2);
+ f4 t2 = VMUL_S(VSUB(x1, x2), g_sec[3*i + 0]);
+ f4 t3 = VMUL_S(VSUB(x0, x3), g_sec[3*i + 1]);
+ x[0] = VADD(t0, t1);
+ x[8] = VMUL_S(VSUB(t0, t1), g_sec[3*i + 2]);
+ x[16] = VADD(t3, t2);
+ x[24] = VMUL_S(VSUB(t3, t2), g_sec[3*i + 2]);
+ }
+ for (x = t[0], i = 0; i < 4; i++, x += 8)
+ {
+ f4 x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt;
+ xt = VSUB(x0, x7); x0 = VADD(x0, x7);
+ x7 = VSUB(x1, x6); x1 = VADD(x1, x6);
+ x6 = VSUB(x2, x5); x2 = VADD(x2, x5);
+ x5 = VSUB(x3, x4); x3 = VADD(x3, x4);
+ x4 = VSUB(x0, x3); x0 = VADD(x0, x3);
+ x3 = VSUB(x1, x2); x1 = VADD(x1, x2);
+ x[0] = VADD(x0, x1);
+ x[4] = VMUL_S(VSUB(x0, x1), 0.70710677f);
+ x5 = VADD(x5, x6);
+ x6 = VMUL_S(VADD(x6, x7), 0.70710677f);
+ x7 = VADD(x7, xt);
+ x3 = VMUL_S(VADD(x3, x4), 0.70710677f);
+ x5 = VSUB(x5, VMUL_S(x7, 0.198912367f)); /* rotate by PI/8 */
+ x7 = VADD(x7, VMUL_S(x5, 0.382683432f));
+ x5 = VSUB(x5, VMUL_S(x7, 0.198912367f));
+ x0 = VSUB(xt, x6); xt = VADD(xt, x6);
+ x[1] = VMUL_S(VADD(xt, x7), 0.50979561f);
+ x[2] = VMUL_S(VADD(x4, x3), 0.54119611f);
+ x[3] = VMUL_S(VSUB(x0, x5), 0.60134488f);
+ x[5] = VMUL_S(VADD(x0, x5), 0.89997619f);
+ x[6] = VMUL_S(VSUB(x4, x3), 1.30656302f);
+ x[7] = VMUL_S(VSUB(xt, x7), 2.56291556f);
+ }
+
+ if (k > n - 3)
+ {
+#if HAVE_SSE
+#define VSAVE2(i, v) _mm_storel_pi((__m64 *)(void*)&y[i*18], v)
+#else /* HAVE_SSE */
+#define VSAVE2(i, v) vst1_f32((float32_t *)&y[i*18], vget_low_f32(v))
+#endif /* HAVE_SSE */
+ for (i = 0; i < 7; i++, y += 4*18)
+ {
+ f4 s = VADD(t[3][i], t[3][i + 1]);
+ VSAVE2(0, t[0][i]);
+ VSAVE2(1, VADD(t[2][i], s));
+ VSAVE2(2, VADD(t[1][i], t[1][i + 1]));
+ VSAVE2(3, VADD(t[2][1 + i], s));
+ }
+ VSAVE2(0, t[0][7]);
+ VSAVE2(1, VADD(t[2][7], t[3][7]));
+ VSAVE2(2, t[1][7]);
+ VSAVE2(3, t[3][7]);
+ } else
+ {
+#define VSAVE4(i, v) VSTORE(&y[i*18], v)
+ for (i = 0; i < 7; i++, y += 4*18)
+ {
+ f4 s = VADD(t[3][i], t[3][i + 1]);
+ VSAVE4(0, t[0][i]);
+ VSAVE4(1, VADD(t[2][i], s));
+ VSAVE4(2, VADD(t[1][i], t[1][i + 1]));
+ VSAVE4(3, VADD(t[2][1 + i], s));
+ }
+ VSAVE4(0, t[0][7]);
+ VSAVE4(1, VADD(t[2][7], t[3][7]));
+ VSAVE4(2, t[1][7]);
+ VSAVE4(3, t[3][7]);
+ }
+ } else
+#endif /* HAVE_SIMD */
+#ifdef MINIMP3_ONLY_SIMD
+ {}
+#else /* MINIMP3_ONLY_SIMD */
+ for (; k < n; k++)
+ {
+ float t[4][8], *x, *y = grbuf + k;
+
+ for (x = t[0], i = 0; i < 8; i++, x++)
+ {
+ float x0 = y[i*18];
+ float x1 = y[(15 - i)*18];
+ float x2 = y[(16 + i)*18];
+ float x3 = y[(31 - i)*18];
+ float t0 = x0 + x3;
+ float t1 = x1 + x2;
+ float t2 = (x1 - x2)*g_sec[3*i + 0];
+ float t3 = (x0 - x3)*g_sec[3*i + 1];
+ x[0] = t0 + t1;
+ x[8] = (t0 - t1)*g_sec[3*i + 2];
+ x[16] = t3 + t2;
+ x[24] = (t3 - t2)*g_sec[3*i + 2];
+ }
+ for (x = t[0], i = 0; i < 4; i++, x += 8)
+ {
+ float x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt;
+ xt = x0 - x7; x0 += x7;
+ x7 = x1 - x6; x1 += x6;
+ x6 = x2 - x5; x2 += x5;
+ x5 = x3 - x4; x3 += x4;
+ x4 = x0 - x3; x0 += x3;
+ x3 = x1 - x2; x1 += x2;
+ x[0] = x0 + x1;
+ x[4] = (x0 - x1)*0.70710677f;
+ x5 = x5 + x6;
+ x6 = (x6 + x7)*0.70710677f;
+ x7 = x7 + xt;
+ x3 = (x3 + x4)*0.70710677f;
+ x5 -= x7*0.198912367f; /* rotate by PI/8 */
+ x7 += x5*0.382683432f;
+ x5 -= x7*0.198912367f;
+ x0 = xt - x6; xt += x6;
+ x[1] = (xt + x7)*0.50979561f;
+ x[2] = (x4 + x3)*0.54119611f;
+ x[3] = (x0 - x5)*0.60134488f;
+ x[5] = (x0 + x5)*0.89997619f;
+ x[6] = (x4 - x3)*1.30656302f;
+ x[7] = (xt - x7)*2.56291556f;
+
+ }
+ for (i = 0; i < 7; i++, y += 4*18)
+ {
+ y[0*18] = t[0][i];
+ y[1*18] = t[2][i] + t[3][i] + t[3][i + 1];
+ y[2*18] = t[1][i] + t[1][i + 1];
+ y[3*18] = t[2][i + 1] + t[3][i] + t[3][i + 1];
+ }
+ y[0*18] = t[0][7];
+ y[1*18] = t[2][7] + t[3][7];
+ y[2*18] = t[1][7];
+ y[3*18] = t[3][7];
+ }
+#endif /* MINIMP3_ONLY_SIMD */
+}
+
+#ifndef MINIMP3_FLOAT_OUTPUT
+static int16_t mp3d_scale_pcm(float sample)
+{
+#if HAVE_ARMV6
+ int32_t s32 = (int32_t)(sample + .5f);
+ s32 -= (s32 < 0);
+ int16_t s = (int16_t)minimp3_clip_int16_arm(s32);
+#else
+ if (sample >= 32766.5) return (int16_t) 32767;
+ if (sample <= -32767.5) return (int16_t)-32768;
+ int16_t s = (int16_t)(sample + .5f);
+ s -= (s < 0); /* away from zero, to be compliant */
+#endif
+ return s;
+}
+#else /* MINIMP3_FLOAT_OUTPUT */
+static float mp3d_scale_pcm(float sample)
+{
+ return sample*(1.f/32768.f);
+}
+#endif /* MINIMP3_FLOAT_OUTPUT */
+
+static void mp3d_synth_pair(mp3d_sample_t *pcm, int nch, const float *z)
+{
+ float a;
+ a = (z[14*64] - z[ 0]) * 29;
+ a += (z[ 1*64] + z[13*64]) * 213;
+ a += (z[12*64] - z[ 2*64]) * 459;
+ a += (z[ 3*64] + z[11*64]) * 2037;
+ a += (z[10*64] - z[ 4*64]) * 5153;
+ a += (z[ 5*64] + z[ 9*64]) * 6574;
+ a += (z[ 8*64] - z[ 6*64]) * 37489;
+ a += z[ 7*64] * 75038;
+ pcm[0] = mp3d_scale_pcm(a);
+
+ z += 2;
+ a = z[14*64] * 104;
+ a += z[12*64] * 1567;
+ a += z[10*64] * 9727;
+ a += z[ 8*64] * 64019;
+ a += z[ 6*64] * -9975;
+ a += z[ 4*64] * -45;
+ a += z[ 2*64] * 146;
+ a += z[ 0*64] * -5;
+ pcm[16*nch] = mp3d_scale_pcm(a);
+}
+
+static void mp3d_synth(float *xl, mp3d_sample_t *dstl, int nch, float *lins)
+{
+ int i;
+ float *xr = xl + 576*(nch - 1);
+ mp3d_sample_t *dstr = dstl + (nch - 1);
+
+ static const float g_win[] = {
+ -1,26,-31,208,218,401,-519,2063,2000,4788,-5517,7134,5959,35640,-39336,74992,
+ -1,24,-35,202,222,347,-581,2080,1952,4425,-5879,7640,5288,33791,-41176,74856,
+ -1,21,-38,196,225,294,-645,2087,1893,4063,-6237,8092,4561,31947,-43006,74630,
+ -1,19,-41,190,227,244,-711,2085,1822,3705,-6589,8492,3776,30112,-44821,74313,
+ -1,17,-45,183,228,197,-779,2075,1739,3351,-6935,8840,2935,28289,-46617,73908,
+ -1,16,-49,176,228,153,-848,2057,1644,3004,-7271,9139,2037,26482,-48390,73415,
+ -2,14,-53,169,227,111,-919,2032,1535,2663,-7597,9389,1082,24694,-50137,72835,
+ -2,13,-58,161,224,72,-991,2001,1414,2330,-7910,9592,70,22929,-51853,72169,
+ -2,11,-63,154,221,36,-1064,1962,1280,2006,-8209,9750,-998,21189,-53534,71420,
+ -2,10,-68,147,215,2,-1137,1919,1131,1692,-8491,9863,-2122,19478,-55178,70590,
+ -3,9,-73,139,208,-29,-1210,1870,970,1388,-8755,9935,-3300,17799,-56778,69679,
+ -3,8,-79,132,200,-57,-1283,1817,794,1095,-8998,9966,-4533,16155,-58333,68692,
+ -4,7,-85,125,189,-83,-1356,1759,605,814,-9219,9959,-5818,14548,-59838,67629,
+ -4,7,-91,117,177,-106,-1428,1698,402,545,-9416,9916,-7154,12980,-61289,66494,
+ -5,6,-97,111,163,-127,-1498,1634,185,288,-9585,9838,-8540,11455,-62684,65290
+ };
+ float *zlin = lins + 15*64;
+ const float *w = g_win;
+
+ zlin[4*15] = xl[18*16];
+ zlin[4*15 + 1] = xr[18*16];
+ zlin[4*15 + 2] = xl[0];
+ zlin[4*15 + 3] = xr[0];
+
+ zlin[4*31] = xl[1 + 18*16];
+ zlin[4*31 + 1] = xr[1 + 18*16];
+ zlin[4*31 + 2] = xl[1];
+ zlin[4*31 + 3] = xr[1];
+
+ mp3d_synth_pair(dstr, nch, lins + 4*15 + 1);
+ mp3d_synth_pair(dstr + 32*nch, nch, lins + 4*15 + 64 + 1);
+ mp3d_synth_pair(dstl, nch, lins + 4*15);
+ mp3d_synth_pair(dstl + 32*nch, nch, lins + 4*15 + 64);
+
+#if HAVE_SIMD
+ if (have_simd()) for (i = 14; i >= 0; i--)
+ {
+#define VLOAD(k) f4 w0 = VSET(*w++); f4 w1 = VSET(*w++); f4 vz = VLD(&zlin[4*i - 64*k]); f4 vy = VLD(&zlin[4*i - 64*(15 - k)]);
+#define V0(k) { VLOAD(k) b = VADD(VMUL(vz, w1), VMUL(vy, w0)) ; a = VSUB(VMUL(vz, w0), VMUL(vy, w1)); }
+#define V1(k) { VLOAD(k) b = VADD(b, VADD(VMUL(vz, w1), VMUL(vy, w0))); a = VADD(a, VSUB(VMUL(vz, w0), VMUL(vy, w1))); }
+#define V2(k) { VLOAD(k) b = VADD(b, VADD(VMUL(vz, w1), VMUL(vy, w0))); a = VADD(a, VSUB(VMUL(vy, w1), VMUL(vz, w0))); }
+ f4 a, b;
+ zlin[4*i] = xl[18*(31 - i)];
+ zlin[4*i + 1] = xr[18*(31 - i)];
+ zlin[4*i + 2] = xl[1 + 18*(31 - i)];
+ zlin[4*i + 3] = xr[1 + 18*(31 - i)];
+ zlin[4*i + 64] = xl[1 + 18*(1 + i)];
+ zlin[4*i + 64 + 1] = xr[1 + 18*(1 + i)];
+ zlin[4*i - 64 + 2] = xl[18*(1 + i)];
+ zlin[4*i - 64 + 3] = xr[18*(1 + i)];
+
+ V0(0) V2(1) V1(2) V2(3) V1(4) V2(5) V1(6) V2(7)
+
+ {
+#ifndef MINIMP3_FLOAT_OUTPUT
+#if HAVE_SSE
+ static const f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f };
+ static const f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f };
+ __m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)),
+ _mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min)));
+ dstr[(15 - i)*nch] = _mm_extract_epi16(pcm8, 1);
+ dstr[(17 + i)*nch] = _mm_extract_epi16(pcm8, 5);
+ dstl[(15 - i)*nch] = _mm_extract_epi16(pcm8, 0);
+ dstl[(17 + i)*nch] = _mm_extract_epi16(pcm8, 4);
+ dstr[(47 - i)*nch] = _mm_extract_epi16(pcm8, 3);
+ dstr[(49 + i)*nch] = _mm_extract_epi16(pcm8, 7);
+ dstl[(47 - i)*nch] = _mm_extract_epi16(pcm8, 2);
+ dstl[(49 + i)*nch] = _mm_extract_epi16(pcm8, 6);
+#else /* HAVE_SSE */
+ int16x4_t pcma, pcmb;
+ a = VADD(a, VSET(0.5f));
+ b = VADD(b, VSET(0.5f));
+ pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, VSET(0)))));
+ pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, VSET(0)))));
+ vst1_lane_s16(dstr + (15 - i)*nch, pcma, 1);
+ vst1_lane_s16(dstr + (17 + i)*nch, pcmb, 1);
+ vst1_lane_s16(dstl + (15 - i)*nch, pcma, 0);
+ vst1_lane_s16(dstl + (17 + i)*nch, pcmb, 0);
+ vst1_lane_s16(dstr + (47 - i)*nch, pcma, 3);
+ vst1_lane_s16(dstr + (49 + i)*nch, pcmb, 3);
+ vst1_lane_s16(dstl + (47 - i)*nch, pcma, 2);
+ vst1_lane_s16(dstl + (49 + i)*nch, pcmb, 2);
+#endif /* HAVE_SSE */
+
+#else /* MINIMP3_FLOAT_OUTPUT */
+
+ static const f4 g_scale = { 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f };
+ a = VMUL(a, g_scale);
+ b = VMUL(b, g_scale);
+#if HAVE_SSE
+ _mm_store_ss(dstr + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1)));
+ _mm_store_ss(dstr + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(1, 1, 1, 1)));
+ _mm_store_ss(dstl + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0)));
+ _mm_store_ss(dstl + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 0, 0, 0)));
+ _mm_store_ss(dstr + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 3, 3, 3)));
+ _mm_store_ss(dstr + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 3, 3, 3)));
+ _mm_store_ss(dstl + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2)));
+ _mm_store_ss(dstl + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 2, 2, 2)));
+#else /* HAVE_SSE */
+ vst1q_lane_f32(dstr + (15 - i)*nch, a, 1);
+ vst1q_lane_f32(dstr + (17 + i)*nch, b, 1);
+ vst1q_lane_f32(dstl + (15 - i)*nch, a, 0);
+ vst1q_lane_f32(dstl + (17 + i)*nch, b, 0);
+ vst1q_lane_f32(dstr + (47 - i)*nch, a, 3);
+ vst1q_lane_f32(dstr + (49 + i)*nch, b, 3);
+ vst1q_lane_f32(dstl + (47 - i)*nch, a, 2);
+ vst1q_lane_f32(dstl + (49 + i)*nch, b, 2);
+#endif /* HAVE_SSE */
+#endif /* MINIMP3_FLOAT_OUTPUT */
+ }
+ } else
+#endif /* HAVE_SIMD */
+#ifdef MINIMP3_ONLY_SIMD
+ {}
+#else /* MINIMP3_ONLY_SIMD */
+ for (i = 14; i >= 0; i--)
+ {
+#define LOAD(k) float w0 = *w++; float w1 = *w++; float *vz = &zlin[4*i - k*64]; float *vy = &zlin[4*i - (15 - k)*64];
+#define S0(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] = vz[j]*w1 + vy[j]*w0, a[j] = vz[j]*w0 - vy[j]*w1; }
+#define S1(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vz[j]*w0 - vy[j]*w1; }
+#define S2(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vy[j]*w1 - vz[j]*w0; }
+ float a[4], b[4];
+
+ zlin[4*i] = xl[18*(31 - i)];
+ zlin[4*i + 1] = xr[18*(31 - i)];
+ zlin[4*i + 2] = xl[1 + 18*(31 - i)];
+ zlin[4*i + 3] = xr[1 + 18*(31 - i)];
+ zlin[4*(i + 16)] = xl[1 + 18*(1 + i)];
+ zlin[4*(i + 16) + 1] = xr[1 + 18*(1 + i)];
+ zlin[4*(i - 16) + 2] = xl[18*(1 + i)];
+ zlin[4*(i - 16) + 3] = xr[18*(1 + i)];
+
+ S0(0) S2(1) S1(2) S2(3) S1(4) S2(5) S1(6) S2(7)
+
+ dstr[(15 - i)*nch] = mp3d_scale_pcm(a[1]);
+ dstr[(17 + i)*nch] = mp3d_scale_pcm(b[1]);
+ dstl[(15 - i)*nch] = mp3d_scale_pcm(a[0]);
+ dstl[(17 + i)*nch] = mp3d_scale_pcm(b[0]);
+ dstr[(47 - i)*nch] = mp3d_scale_pcm(a[3]);
+ dstr[(49 + i)*nch] = mp3d_scale_pcm(b[3]);
+ dstl[(47 - i)*nch] = mp3d_scale_pcm(a[2]);
+ dstl[(49 + i)*nch] = mp3d_scale_pcm(b[2]);
+ }
+#endif /* MINIMP3_ONLY_SIMD */
+}
+
+static void mp3d_synth_granule(float *qmf_state, float *grbuf, int nbands, int nch, mp3d_sample_t *pcm, float *lins)
+{
+ int i;
+ for (i = 0; i < nch; i++)
+ {
+ mp3d_DCT_II(grbuf + 576*i, nbands);
+ }
+
+ memcpy(lins, qmf_state, sizeof(float)*15*64);
+
+ for (i = 0; i < nbands; i += 2)
+ {
+ mp3d_synth(grbuf + i, pcm + 32*nch*i, nch, lins + i*64);
+ }
+#ifndef MINIMP3_NONSTANDARD_BUT_LOGICAL
+ if (nch == 1)
+ {
+ for (i = 0; i < 15*64; i += 2)
+ {
+ qmf_state[i] = lins[nbands*64 + i];
+ }
+ } else
+#endif /* MINIMP3_NONSTANDARD_BUT_LOGICAL */
+ {
+ memcpy(qmf_state, lins + nbands*64, sizeof(float)*15*64);
+ }
+}
+
+static int mp3d_match_frame(const uint8_t *hdr, int mp3_bytes, int frame_bytes)
+{
+ int i, nmatch;
+ for (i = 0, nmatch = 0; nmatch < MAX_FRAME_SYNC_MATCHES; nmatch++)
+ {
+ i += hdr_frame_bytes(hdr + i, frame_bytes) + hdr_padding(hdr + i);
+ if (i + HDR_SIZE > mp3_bytes)
+ return nmatch > 0;
+ if (!hdr_compare(hdr, hdr + i))
+ return 0;
+ }
+ return 1;
+}
+
+static int mp3d_find_frame(const uint8_t *mp3, int mp3_bytes, int *free_format_bytes, int *ptr_frame_bytes)
+{
+ int i, k;
+ for (i = 0; i < mp3_bytes - HDR_SIZE; i++, mp3++)
+ {
+ if (hdr_valid(mp3))
+ {
+ int frame_bytes = hdr_frame_bytes(mp3, *free_format_bytes);
+ int frame_and_padding = frame_bytes + hdr_padding(mp3);
+
+ for (k = HDR_SIZE; !frame_bytes && k < MAX_FREE_FORMAT_FRAME_SIZE && i + 2*k < mp3_bytes - HDR_SIZE; k++)
+ {
+ if (hdr_compare(mp3, mp3 + k))
+ {
+ int fb = k - hdr_padding(mp3);
+ int nextfb = fb + hdr_padding(mp3 + k);
+ if (i + k + nextfb + HDR_SIZE > mp3_bytes || !hdr_compare(mp3, mp3 + k + nextfb))
+ continue;
+ frame_and_padding = k;
+ frame_bytes = fb;
+ *free_format_bytes = fb;
+ }
+ }
+ if ((frame_bytes && i + frame_and_padding <= mp3_bytes &&
+ mp3d_match_frame(mp3, mp3_bytes - i, frame_bytes)) ||
+ (!i && frame_and_padding == mp3_bytes))
+ {
+ *ptr_frame_bytes = frame_and_padding;
+ return i;
+ }
+ *free_format_bytes = 0;
+ }
+ }
+ *ptr_frame_bytes = 0;
+ return mp3_bytes;
+}
+
+void mp3dec_init(mp3dec_t *dec)
+{
+ dec->header[0] = 0;
+}
+
+int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, mp3d_sample_t *pcm, mp3dec_frame_info_t *info)
+{
+ int i = 0, igr, frame_size = 0, success = 1;
+ const uint8_t *hdr;
+ bs_t bs_frame[1];
+ mp3dec_scratch_t scratch;
+
+ if (mp3_bytes > 4 && dec->header[0] == 0xff && hdr_compare(dec->header, mp3))
+ {
+ frame_size = hdr_frame_bytes(mp3, dec->free_format_bytes) + hdr_padding(mp3);
+ if (frame_size != mp3_bytes && (frame_size + HDR_SIZE > mp3_bytes || !hdr_compare(mp3, mp3 + frame_size)))
+ {
+ frame_size = 0;
+ }
+ }
+ if (!frame_size)
+ {
+ memset(dec, 0, sizeof(mp3dec_t));
+ i = mp3d_find_frame(mp3, mp3_bytes, &dec->free_format_bytes, &frame_size);
+ if (!frame_size || i + frame_size > mp3_bytes)
+ {
+ info->frame_bytes = i;
+ return 0;
+ }
+ }
+
+ hdr = mp3 + i;
+ memcpy(dec->header, hdr, HDR_SIZE);
+ info->frame_bytes = i + frame_size;
+ info->frame_offset = i;
+ info->channels = HDR_IS_MONO(hdr) ? 1 : 2;
+ info->hz = hdr_sample_rate_hz(hdr);
+ info->layer = 4 - HDR_GET_LAYER(hdr);
+ info->bitrate_kbps = hdr_bitrate_kbps(hdr);
+
+ if (!pcm)
+ {
+ return hdr_frame_samples(hdr);
+ }
+
+ bs_init(bs_frame, hdr + HDR_SIZE, frame_size - HDR_SIZE);
+ if (HDR_IS_CRC(hdr))
+ {
+ get_bits(bs_frame, 16);
+ }
+
+ if (info->layer == 3)
+ {
+ int main_data_begin = L3_read_side_info(bs_frame, scratch.gr_info, hdr);
+ if (main_data_begin < 0 || bs_frame->pos > bs_frame->limit)
+ {
+ mp3dec_init(dec);
+ return 0;
+ }
+ success = L3_restore_reservoir(dec, bs_frame, &scratch, main_data_begin);
+ if (success)
+ {
+ for (igr = 0; igr < (HDR_TEST_MPEG1(hdr) ? 2 : 1); igr++, pcm += 576*info->channels)
+ {
+ memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
+ L3_decode(dec, &scratch, scratch.gr_info + igr*info->channels, info->channels);
+ mp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 18, info->channels, pcm, scratch.syn[0]);
+ }
+ }
+ L3_save_reservoir(dec, &scratch);
+ } else
+ {
+#ifdef MINIMP3_ONLY_MP3
+ return 0;
+#else /* MINIMP3_ONLY_MP3 */
+ L12_scale_info sci[1];
+ L12_read_scale_info(hdr, bs_frame, sci);
+
+ memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
+ for (i = 0, igr = 0; igr < 3; igr++)
+ {
+ if (12 == (i += L12_dequantize_granule(scratch.grbuf[0] + i, bs_frame, sci, info->layer | 1)))
+ {
+ i = 0;
+ L12_apply_scf_384(sci, sci->scf + igr, scratch.grbuf[0]);
+ mp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 12, info->channels, pcm, scratch.syn[0]);
+ memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
+ pcm += 384*info->channels;
+ }
+ if (bs_frame->pos > bs_frame->limit)
+ {
+ mp3dec_init(dec);
+ return 0;
+ }
+ }
+#endif /* MINIMP3_ONLY_MP3 */
+ }
+ return success*hdr_frame_samples(dec->header);
+}
+
+#ifdef MINIMP3_FLOAT_OUTPUT
+void mp3dec_f32_to_s16(const float *in, int16_t *out, int num_samples)
+{
+ int i = 0;
+#if HAVE_SIMD
+ int aligned_count = num_samples & ~7;
+ for(; i < aligned_count; i += 8)
+ {
+ static const f4 g_scale = { 32768.0f, 32768.0f, 32768.0f, 32768.0f };
+ f4 a = VMUL(VLD(&in[i ]), g_scale);
+ f4 b = VMUL(VLD(&in[i+4]), g_scale);
+#if HAVE_SSE
+ static const f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f };
+ static const f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f };
+ __m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)),
+ _mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min)));
+ out[i ] = _mm_extract_epi16(pcm8, 0);
+ out[i+1] = _mm_extract_epi16(pcm8, 1);
+ out[i+2] = _mm_extract_epi16(pcm8, 2);
+ out[i+3] = _mm_extract_epi16(pcm8, 3);
+ out[i+4] = _mm_extract_epi16(pcm8, 4);
+ out[i+5] = _mm_extract_epi16(pcm8, 5);
+ out[i+6] = _mm_extract_epi16(pcm8, 6);
+ out[i+7] = _mm_extract_epi16(pcm8, 7);
+#else /* HAVE_SSE */
+ int16x4_t pcma, pcmb;
+ a = VADD(a, VSET(0.5f));
+ b = VADD(b, VSET(0.5f));
+ pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, VSET(0)))));
+ pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, VSET(0)))));
+ vst1_lane_s16(out+i , pcma, 0);
+ vst1_lane_s16(out+i+1, pcma, 1);
+ vst1_lane_s16(out+i+2, pcma, 2);
+ vst1_lane_s16(out+i+3, pcma, 3);
+ vst1_lane_s16(out+i+4, pcmb, 0);
+ vst1_lane_s16(out+i+5, pcmb, 1);
+ vst1_lane_s16(out+i+6, pcmb, 2);
+ vst1_lane_s16(out+i+7, pcmb, 3);
+#endif /* HAVE_SSE */
+ }
+#endif /* HAVE_SIMD */
+ for(; i < num_samples; i++)
+ {
+ float sample = in[i] * 32768.0f;
+ if (sample >= 32766.5)
+ out[i] = (int16_t) 32767;
+ else if (sample <= -32767.5)
+ out[i] = (int16_t)-32768;
+ else
+ {
+ int16_t s = (int16_t)(sample + .5f);
+ s -= (s < 0); /* away from zero, to be compliant */
+ out[i] = s;
+ }
+ }
+}
+#endif /* MINIMP3_FLOAT_OUTPUT */
+#endif /* MINIMP3_IMPLEMENTATION && !_MINIMP3_IMPLEMENTATION_GUARD */
diff --git a/thirdparty/minimp3/minimp3_ex.h b/thirdparty/minimp3/minimp3_ex.h
new file mode 100644
index 0000000000..e29dd15b2e
--- /dev/null
+++ b/thirdparty/minimp3/minimp3_ex.h
@@ -0,0 +1,1394 @@
+#ifndef MINIMP3_EXT_H
+#define MINIMP3_EXT_H
+/*
+ https://github.com/lieff/minimp3
+ To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide.
+ This software is distributed without any warranty.
+ See <http://creativecommons.org/publicdomain/zero/1.0/>.
+*/
+#include "minimp3.h"
+
+/* flags for mp3dec_ex_open_* functions */
+#define MP3D_SEEK_TO_BYTE 0 /* mp3dec_ex_seek seeks to byte in stream */
+#define MP3D_SEEK_TO_SAMPLE 1 /* mp3dec_ex_seek precisely seeks to sample using index (created during duration calculation scan or when mp3dec_ex_seek called) */
+#define MP3D_DO_NOT_SCAN 2 /* do not scan whole stream for duration if vbrtag not found, mp3dec_ex_t::samples will be filled only if mp3dec_ex_t::vbr_tag_found == 1 */
+#ifdef MINIMP3_ALLOW_MONO_STEREO_TRANSITION
+#define MP3D_ALLOW_MONO_STEREO_TRANSITION 4
+#define MP3D_FLAGS_MASK 7
+#else
+#define MP3D_FLAGS_MASK 3
+#endif
+
+/* compile-time config */
+#define MINIMP3_PREDECODE_FRAMES 2 /* frames to pre-decode and skip after seek (to fill internal structures) */
+/*#define MINIMP3_SEEK_IDX_LINEAR_SEARCH*/ /* define to use linear index search instead of binary search on seek */
+#define MINIMP3_IO_SIZE (128*1024) /* io buffer size for streaming functions, must be greater than MINIMP3_BUF_SIZE */
+#define MINIMP3_BUF_SIZE (16*1024) /* buffer which can hold minimum 10 consecutive mp3 frames (~16KB) worst case */
+/*#define MINIMP3_SCAN_LIMIT (256*1024)*/ /* how many bytes will be scanned to search first valid mp3 frame, to prevent stall on large non-mp3 files */
+#define MINIMP3_ENABLE_RING 0 /* WIP enable hardware magic ring buffer if available, to make less input buffer memmove(s) in callback IO mode */
+
+/* return error codes */
+#define MP3D_E_PARAM -1
+#define MP3D_E_MEMORY -2
+#define MP3D_E_IOERROR -3
+#define MP3D_E_USER -4 /* can be used to stop processing from callbacks without indicating specific error */
+#define MP3D_E_DECODE -5 /* decode error which can't be safely skipped, such as sample rate, layer and channels change */
+
+typedef struct
+{
+ mp3d_sample_t *buffer;
+ size_t samples; /* channels included, byte size = samples*sizeof(mp3d_sample_t) */
+ int channels, hz, layer, avg_bitrate_kbps;
+} mp3dec_file_info_t;
+
+typedef struct
+{
+ const uint8_t *buffer;
+ size_t size;
+} mp3dec_map_info_t;
+
+typedef struct
+{
+ uint64_t sample;
+ uint64_t offset;
+} mp3dec_frame_t;
+
+typedef struct
+{
+ mp3dec_frame_t *frames;
+ size_t num_frames, capacity;
+} mp3dec_index_t;
+
+typedef size_t (*MP3D_READ_CB)(void *buf, size_t size, void *user_data);
+typedef int (*MP3D_SEEK_CB)(uint64_t position, void *user_data);
+
+typedef struct
+{
+ MP3D_READ_CB read;
+ void *read_data;
+ MP3D_SEEK_CB seek;
+ void *seek_data;
+} mp3dec_io_t;
+
+typedef struct
+{
+ mp3dec_t mp3d;
+ mp3dec_map_info_t file;
+ mp3dec_io_t *io;
+ mp3dec_index_t index;
+ uint64_t offset, samples, detected_samples, cur_sample, start_offset, end_offset;
+ mp3dec_frame_info_t info;
+ mp3d_sample_t buffer[MINIMP3_MAX_SAMPLES_PER_FRAME];
+ size_t input_consumed, input_filled;
+ int is_file, flags, vbr_tag_found, indexes_built;
+ int free_format_bytes;
+ int buffer_samples, buffer_consumed, to_skip, start_delay;
+ int last_error;
+} mp3dec_ex_t;
+
+typedef int (*MP3D_ITERATE_CB)(void *user_data, const uint8_t *frame, int frame_size, int free_format_bytes, size_t buf_size, uint64_t offset, mp3dec_frame_info_t *info);
+typedef int (*MP3D_PROGRESS_CB)(void *user_data, size_t file_size, uint64_t offset, mp3dec_frame_info_t *info);
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* detect mp3/mpa format */
+int mp3dec_detect_buf(const uint8_t *buf, size_t buf_size);
+int mp3dec_detect_cb(mp3dec_io_t *io, uint8_t *buf, size_t buf_size);
+/* decode whole buffer block */
+int mp3dec_load_buf(mp3dec_t *dec, const uint8_t *buf, size_t buf_size, mp3dec_file_info_t *info, MP3D_PROGRESS_CB progress_cb, void *user_data);
+int mp3dec_load_cb(mp3dec_t *dec, mp3dec_io_t *io, uint8_t *buf, size_t buf_size, mp3dec_file_info_t *info, MP3D_PROGRESS_CB progress_cb, void *user_data);
+/* iterate through frames */
+int mp3dec_iterate_buf(const uint8_t *buf, size_t buf_size, MP3D_ITERATE_CB callback, void *user_data);
+int mp3dec_iterate_cb(mp3dec_io_t *io, uint8_t *buf, size_t buf_size, MP3D_ITERATE_CB callback, void *user_data);
+/* streaming decoder with seeking capability */
+int mp3dec_ex_open_buf(mp3dec_ex_t *dec, const uint8_t *buf, size_t buf_size, int flags);
+int mp3dec_ex_open_cb(mp3dec_ex_t *dec, mp3dec_io_t *io, int flags);
+void mp3dec_ex_close(mp3dec_ex_t *dec);
+int mp3dec_ex_seek(mp3dec_ex_t *dec, uint64_t position);
+size_t mp3dec_ex_read_frame(mp3dec_ex_t *dec, mp3d_sample_t **buf, mp3dec_frame_info_t *frame_info, size_t max_samples);
+size_t mp3dec_ex_read(mp3dec_ex_t *dec, mp3d_sample_t *buf, size_t samples);
+#ifndef MINIMP3_NO_STDIO
+/* stdio versions of file detect, load, iterate and stream */
+int mp3dec_detect(const char *file_name);
+int mp3dec_load(mp3dec_t *dec, const char *file_name, mp3dec_file_info_t *info, MP3D_PROGRESS_CB progress_cb, void *user_data);
+int mp3dec_iterate(const char *file_name, MP3D_ITERATE_CB callback, void *user_data);
+int mp3dec_ex_open(mp3dec_ex_t *dec, const char *file_name, int flags);
+#ifdef _WIN32
+int mp3dec_detect_w(const wchar_t *file_name);
+int mp3dec_load_w(mp3dec_t *dec, const wchar_t *file_name, mp3dec_file_info_t *info, MP3D_PROGRESS_CB progress_cb, void *user_data);
+int mp3dec_iterate_w(const wchar_t *file_name, MP3D_ITERATE_CB callback, void *user_data);
+int mp3dec_ex_open_w(mp3dec_ex_t *dec, const wchar_t *file_name, int flags);
+#endif
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+#endif /*MINIMP3_EXT_H*/
+
+#ifdef MINIMP3_IMPLEMENTATION
+#include <limits.h>
+
+static void mp3dec_skip_id3v1(const uint8_t *buf, size_t *pbuf_size)
+{
+ size_t buf_size = *pbuf_size;
+#ifndef MINIMP3_NOSKIP_ID3V1
+ if (buf_size >= 128 && !memcmp(buf + buf_size - 128, "TAG", 3))
+ {
+ buf_size -= 128;
+ if (buf_size >= 227 && !memcmp(buf + buf_size - 227, "TAG+", 4))
+ buf_size -= 227;
+ }
+#endif
+#ifndef MINIMP3_NOSKIP_APEV2
+ if (buf_size > 32 && !memcmp(buf + buf_size - 32, "APETAGEX", 8))
+ {
+ buf_size -= 32;
+ const uint8_t *tag = buf + buf_size + 8 + 4;
+ uint32_t tag_size = (uint32_t)(tag[3] << 24) | (tag[2] << 16) | (tag[1] << 8) | tag[0];
+ if (buf_size >= tag_size)
+ buf_size -= tag_size;
+ }
+#endif
+ *pbuf_size = buf_size;
+}
+
+static size_t mp3dec_skip_id3v2(const uint8_t *buf, size_t buf_size)
+{
+#define MINIMP3_ID3_DETECT_SIZE 10
+#ifndef MINIMP3_NOSKIP_ID3V2
+ if (buf_size >= MINIMP3_ID3_DETECT_SIZE && !memcmp(buf, "ID3", 3) && !((buf[5] & 15) || (buf[6] & 0x80) || (buf[7] & 0x80) || (buf[8] & 0x80) || (buf[9] & 0x80)))
+ {
+ size_t id3v2size = (((buf[6] & 0x7f) << 21) | ((buf[7] & 0x7f) << 14) | ((buf[8] & 0x7f) << 7) | (buf[9] & 0x7f)) + 10;
+ if ((buf[5] & 16))
+ id3v2size += 10; /* footer */
+ return id3v2size;
+ }
+#endif
+ return 0;
+}
+
+static void mp3dec_skip_id3(const uint8_t **pbuf, size_t *pbuf_size)
+{
+ uint8_t *buf = (uint8_t *)(*pbuf);
+ size_t buf_size = *pbuf_size;
+ size_t id3v2size = mp3dec_skip_id3v2(buf, buf_size);
+ if (id3v2size)
+ {
+ if (id3v2size >= buf_size)
+ id3v2size = buf_size;
+ buf += id3v2size;
+ buf_size -= id3v2size;
+ }
+ mp3dec_skip_id3v1(buf, &buf_size);
+ *pbuf = (const uint8_t *)buf;
+ *pbuf_size = buf_size;
+}
+
+static int mp3dec_check_vbrtag(const uint8_t *frame, int frame_size, uint32_t *frames, int *delay, int *padding)
+{
+ static const char g_xing_tag[4] = { 'X', 'i', 'n', 'g' };
+ static const char g_info_tag[4] = { 'I', 'n', 'f', 'o' };
+#define FRAMES_FLAG 1
+#define BYTES_FLAG 2
+#define TOC_FLAG 4
+#define VBR_SCALE_FLAG 8
+ /* Side info offsets after header:
+ / Mono Stereo
+ / MPEG1 17 32
+ / MPEG2 & 2.5 9 17*/
+ bs_t bs[1];
+ L3_gr_info_t gr_info[4];
+ bs_init(bs, frame + HDR_SIZE, frame_size - HDR_SIZE);
+ if (HDR_IS_CRC(frame))
+ get_bits(bs, 16);
+ if (L3_read_side_info(bs, gr_info, frame) < 0)
+ return 0; /* side info corrupted */
+
+ const uint8_t *tag = frame + HDR_SIZE + bs->pos/8;
+ if (memcmp(g_xing_tag, tag, 4) && memcmp(g_info_tag, tag, 4))
+ return 0;
+ int flags = tag[7];
+ if (!((flags & FRAMES_FLAG)))
+ return -1;
+ tag += 8;
+ *frames = (uint32_t)(tag[0] << 24) | (tag[1] << 16) | (tag[2] << 8) | tag[3];
+ tag += 4;
+ if (flags & BYTES_FLAG)
+ tag += 4;
+ if (flags & TOC_FLAG)
+ tag += 100;
+ if (flags & VBR_SCALE_FLAG)
+ tag += 4;
+ *delay = *padding = 0;
+ if (*tag)
+ { /* extension, LAME, Lavc, etc. Should be the same structure. */
+ tag += 21;
+ if (tag - frame + 14 >= frame_size)
+ return 0;
+ *delay = ((tag[0] << 4) | (tag[1] >> 4)) + (528 + 1);
+ *padding = (((tag[1] & 0xF) << 8) | tag[2]) - (528 + 1);
+ }
+ return 1;
+}
+
+int mp3dec_detect_buf(const uint8_t *buf, size_t buf_size)
+{
+ return mp3dec_detect_cb(0, (uint8_t *)buf, buf_size);
+}
+
+int mp3dec_detect_cb(mp3dec_io_t *io, uint8_t *buf, size_t buf_size)
+{
+ if (!buf || (size_t)-1 == buf_size || (io && buf_size < MINIMP3_BUF_SIZE))
+ return MP3D_E_PARAM;
+ size_t filled = buf_size;
+ if (io)
+ {
+ if (io->seek(0, io->seek_data))
+ return MP3D_E_IOERROR;
+ filled = io->read(buf, MINIMP3_ID3_DETECT_SIZE, io->read_data);
+ if (filled > MINIMP3_ID3_DETECT_SIZE)
+ return MP3D_E_IOERROR;
+ }
+ if (filled < MINIMP3_ID3_DETECT_SIZE)
+ return MP3D_E_USER; /* too small, can't be mp3/mpa */
+ if (mp3dec_skip_id3v2(buf, filled))
+ return 0; /* id3v2 tag is enough evidence */
+ if (io)
+ {
+ size_t readed = io->read(buf + MINIMP3_ID3_DETECT_SIZE, buf_size - MINIMP3_ID3_DETECT_SIZE, io->read_data);
+ if (readed > (buf_size - MINIMP3_ID3_DETECT_SIZE))
+ return MP3D_E_IOERROR;
+ filled += readed;
+ if (filled < MINIMP3_BUF_SIZE)
+ mp3dec_skip_id3v1(buf, &filled);
+ } else
+ {
+ mp3dec_skip_id3v1(buf, &filled);
+ if (filled > MINIMP3_BUF_SIZE)
+ filled = MINIMP3_BUF_SIZE;
+ }
+ int free_format_bytes, frame_size;
+ mp3d_find_frame(buf, filled, &free_format_bytes, &frame_size);
+ if (frame_size)
+ return 0; /* MAX_FRAME_SYNC_MATCHES consecutive frames found */
+ return MP3D_E_USER;
+}
+
+int mp3dec_load_buf(mp3dec_t *dec, const uint8_t *buf, size_t buf_size, mp3dec_file_info_t *info, MP3D_PROGRESS_CB progress_cb, void *user_data)
+{
+ return mp3dec_load_cb(dec, 0, (uint8_t *)buf, buf_size, info, progress_cb, user_data);
+}
+
+int mp3dec_load_cb(mp3dec_t *dec, mp3dec_io_t *io, uint8_t *buf, size_t buf_size, mp3dec_file_info_t *info, MP3D_PROGRESS_CB progress_cb, void *user_data)
+{
+ if (!dec || !buf || !info || (size_t)-1 == buf_size || (io && buf_size < MINIMP3_BUF_SIZE))
+ return MP3D_E_PARAM;
+ uint64_t detected_samples = 0;
+ size_t orig_buf_size = buf_size;
+ int to_skip = 0;
+ mp3dec_frame_info_t frame_info;
+ memset(info, 0, sizeof(*info));
+ memset(&frame_info, 0, sizeof(frame_info));
+
+ /* skip id3 */
+ size_t filled = 0, consumed = 0;
+ int eof = 0, ret = 0;
+ if (io)
+ {
+ if (io->seek(0, io->seek_data))
+ return MP3D_E_IOERROR;
+ filled = io->read(buf, MINIMP3_ID3_DETECT_SIZE, io->read_data);
+ if (filled > MINIMP3_ID3_DETECT_SIZE)
+ return MP3D_E_IOERROR;
+ if (MINIMP3_ID3_DETECT_SIZE != filled)
+ return 0;
+ size_t id3v2size = mp3dec_skip_id3v2(buf, filled);
+ if (id3v2size)
+ {
+ if (io->seek(id3v2size, io->seek_data))
+ return MP3D_E_IOERROR;
+ filled = io->read(buf, buf_size, io->read_data);
+ if (filled > buf_size)
+ return MP3D_E_IOERROR;
+ } else
+ {
+ size_t readed = io->read(buf + MINIMP3_ID3_DETECT_SIZE, buf_size - MINIMP3_ID3_DETECT_SIZE, io->read_data);
+ if (readed > (buf_size - MINIMP3_ID3_DETECT_SIZE))
+ return MP3D_E_IOERROR;
+ filled += readed;
+ }
+ if (filled < MINIMP3_BUF_SIZE)
+ mp3dec_skip_id3v1(buf, &filled);
+ } else
+ {
+ mp3dec_skip_id3((const uint8_t **)&buf, &buf_size);
+ if (!buf_size)
+ return 0;
+ }
+ /* try to make allocation size assumption by first frame or vbr tag */
+ mp3dec_init(dec);
+ int samples;
+ do
+ {
+ uint32_t frames;
+ int i, delay, padding, free_format_bytes = 0, frame_size = 0;
+ const uint8_t *hdr;
+ if (io)
+ {
+ if (!eof && filled - consumed < MINIMP3_BUF_SIZE)
+ { /* keep minimum 10 consecutive mp3 frames (~16KB) worst case */
+ memmove(buf, buf + consumed, filled - consumed);
+ filled -= consumed;
+ consumed = 0;
+ size_t readed = io->read(buf + filled, buf_size - filled, io->read_data);
+ if (readed > (buf_size - filled))
+ return MP3D_E_IOERROR;
+ if (readed != (buf_size - filled))
+ eof = 1;
+ filled += readed;
+ if (eof)
+ mp3dec_skip_id3v1(buf, &filled);
+ }
+ i = mp3d_find_frame(buf + consumed, filled - consumed, &free_format_bytes, &frame_size);
+ consumed += i;
+ hdr = buf + consumed;
+ } else
+ {
+ i = mp3d_find_frame(buf, buf_size, &free_format_bytes, &frame_size);
+ buf += i;
+ buf_size -= i;
+ hdr = buf;
+ }
+ if (i && !frame_size)
+ continue;
+ if (!frame_size)
+ return 0;
+ frame_info.channels = HDR_IS_MONO(hdr) ? 1 : 2;
+ frame_info.hz = hdr_sample_rate_hz(hdr);
+ frame_info.layer = 4 - HDR_GET_LAYER(hdr);
+ frame_info.bitrate_kbps = hdr_bitrate_kbps(hdr);
+ frame_info.frame_bytes = frame_size;
+ samples = hdr_frame_samples(hdr)*frame_info.channels;
+ if (3 != frame_info.layer)
+ break;
+ int ret = mp3dec_check_vbrtag(hdr, frame_size, &frames, &delay, &padding);
+ if (ret > 0)
+ {
+ padding *= frame_info.channels;
+ to_skip = delay*frame_info.channels;
+ detected_samples = samples*(uint64_t)frames;
+ if (detected_samples >= (uint64_t)to_skip)
+ detected_samples -= to_skip;
+ if (padding > 0 && detected_samples >= (uint64_t)padding)
+ detected_samples -= padding;
+ if (!detected_samples)
+ return 0;
+ }
+ if (ret)
+ {
+ if (io)
+ {
+ consumed += frame_size;
+ } else
+ {
+ buf += frame_size;
+ buf_size -= frame_size;
+ }
+ }
+ break;
+ } while(1);
+ size_t allocated = MINIMP3_MAX_SAMPLES_PER_FRAME*sizeof(mp3d_sample_t);
+ if (detected_samples)
+ allocated += detected_samples*sizeof(mp3d_sample_t);
+ else
+ allocated += (buf_size/frame_info.frame_bytes)*samples*sizeof(mp3d_sample_t);
+ info->buffer = (mp3d_sample_t*)malloc(allocated);
+ if (!info->buffer)
+ return MP3D_E_MEMORY;
+ /* save info */
+ info->channels = frame_info.channels;
+ info->hz = frame_info.hz;
+ info->layer = frame_info.layer;
+ /* decode all frames */
+ size_t avg_bitrate_kbps = 0, frames = 0;
+ do
+ {
+ if ((allocated - info->samples*sizeof(mp3d_sample_t)) < MINIMP3_MAX_SAMPLES_PER_FRAME*sizeof(mp3d_sample_t))
+ {
+ allocated *= 2;
+ mp3d_sample_t *alloc_buf = (mp3d_sample_t*)realloc(info->buffer, allocated);
+ if (!alloc_buf)
+ return MP3D_E_MEMORY;
+ info->buffer = alloc_buf;
+ }
+ if (io)
+ {
+ if (!eof && filled - consumed < MINIMP3_BUF_SIZE)
+ { /* keep minimum 10 consecutive mp3 frames (~16KB) worst case */
+ memmove(buf, buf + consumed, filled - consumed);
+ filled -= consumed;
+ consumed = 0;
+ size_t readed = io->read(buf + filled, buf_size - filled, io->read_data);
+ if (readed != (buf_size - filled))
+ eof = 1;
+ filled += readed;
+ if (eof)
+ mp3dec_skip_id3v1(buf, &filled);
+ }
+ samples = mp3dec_decode_frame(dec, buf + consumed, filled - consumed, info->buffer + info->samples, &frame_info);
+ consumed += frame_info.frame_bytes;
+ } else
+ {
+ samples = mp3dec_decode_frame(dec, buf, MINIMP3_MIN(buf_size, (size_t)INT_MAX), info->buffer + info->samples, &frame_info);
+ buf += frame_info.frame_bytes;
+ buf_size -= frame_info.frame_bytes;
+ }
+ if (samples)
+ {
+ if (info->hz != frame_info.hz || info->layer != frame_info.layer)
+ {
+ ret = MP3D_E_DECODE;
+ break;
+ }
+ if (info->channels && info->channels != frame_info.channels)
+ {
+#ifdef MINIMP3_ALLOW_MONO_STEREO_TRANSITION
+ info->channels = 0; /* mark file with mono-stereo transition */
+#else
+ ret = MP3D_E_DECODE;
+ break;
+#endif
+ }
+ samples *= frame_info.channels;
+ if (to_skip)
+ {
+ size_t skip = MINIMP3_MIN(samples, to_skip);
+ to_skip -= skip;
+ samples -= skip;
+ memmove(info->buffer, info->buffer + skip, samples*sizeof(mp3d_sample_t));
+ }
+ info->samples += samples;
+ avg_bitrate_kbps += frame_info.bitrate_kbps;
+ frames++;
+ if (progress_cb)
+ {
+ ret = progress_cb(user_data, orig_buf_size, orig_buf_size - buf_size, &frame_info);
+ if (ret)
+ break;
+ }
+ }
+ } while (frame_info.frame_bytes);
+ if (detected_samples && info->samples > detected_samples)
+ info->samples = detected_samples; /* cut padding */
+ /* reallocate to normal buffer size */
+ if (allocated != info->samples*sizeof(mp3d_sample_t))
+ {
+ mp3d_sample_t *alloc_buf = (mp3d_sample_t*)realloc(info->buffer, info->samples*sizeof(mp3d_sample_t));
+ if (!alloc_buf && info->samples)
+ return MP3D_E_MEMORY;
+ info->buffer = alloc_buf;
+ }
+ if (frames)
+ info->avg_bitrate_kbps = avg_bitrate_kbps/frames;
+ return ret;
+}
+
+int mp3dec_iterate_buf(const uint8_t *buf, size_t buf_size, MP3D_ITERATE_CB callback, void *user_data)
+{
+ const uint8_t *orig_buf = buf;
+ if (!buf || (size_t)-1 == buf_size || !callback)
+ return MP3D_E_PARAM;
+ /* skip id3 */
+ mp3dec_skip_id3(&buf, &buf_size);
+ if (!buf_size)
+ return 0;
+ mp3dec_frame_info_t frame_info;
+ memset(&frame_info, 0, sizeof(frame_info));
+ do
+ {
+ int free_format_bytes = 0, frame_size = 0, ret;
+ int i = mp3d_find_frame(buf, buf_size, &free_format_bytes, &frame_size);
+ buf += i;
+ buf_size -= i;
+ if (i && !frame_size)
+ continue;
+ if (!frame_size)
+ break;
+ const uint8_t *hdr = buf;
+ frame_info.channels = HDR_IS_MONO(hdr) ? 1 : 2;
+ frame_info.hz = hdr_sample_rate_hz(hdr);
+ frame_info.layer = 4 - HDR_GET_LAYER(hdr);
+ frame_info.bitrate_kbps = hdr_bitrate_kbps(hdr);
+ frame_info.frame_bytes = frame_size;
+
+ if (callback)
+ {
+ if ((ret = callback(user_data, hdr, frame_size, free_format_bytes, buf_size, hdr - orig_buf, &frame_info)))
+ return ret;
+ }
+ buf += frame_size;
+ buf_size -= frame_size;
+ } while (1);
+ return 0;
+}
+
+int mp3dec_iterate_cb(mp3dec_io_t *io, uint8_t *buf, size_t buf_size, MP3D_ITERATE_CB callback, void *user_data)
+{
+ if (!io || !buf || (size_t)-1 == buf_size || buf_size < MINIMP3_BUF_SIZE || !callback)
+ return MP3D_E_PARAM;
+ size_t filled = io->read(buf, MINIMP3_ID3_DETECT_SIZE, io->read_data), consumed = 0;
+ uint64_t readed = 0;
+ mp3dec_frame_info_t frame_info;
+ int eof = 0;
+ memset(&frame_info, 0, sizeof(frame_info));
+ if (filled > MINIMP3_ID3_DETECT_SIZE)
+ return MP3D_E_IOERROR;
+ if (MINIMP3_ID3_DETECT_SIZE != filled)
+ return 0;
+ size_t id3v2size = mp3dec_skip_id3v2(buf, filled);
+ if (id3v2size)
+ {
+ if (io->seek(id3v2size, io->seek_data))
+ return MP3D_E_IOERROR;
+ filled = io->read(buf, buf_size, io->read_data);
+ if (filled > buf_size)
+ return MP3D_E_IOERROR;
+ readed += id3v2size;
+ } else
+ {
+ size_t readed = io->read(buf + MINIMP3_ID3_DETECT_SIZE, buf_size - MINIMP3_ID3_DETECT_SIZE, io->read_data);
+ if (readed > (buf_size - MINIMP3_ID3_DETECT_SIZE))
+ return MP3D_E_IOERROR;
+ filled += readed;
+ }
+ if (filled < MINIMP3_BUF_SIZE)
+ mp3dec_skip_id3v1(buf, &filled);
+ do
+ {
+ int free_format_bytes = 0, frame_size = 0, ret;
+ int i = mp3d_find_frame(buf + consumed, filled - consumed, &free_format_bytes, &frame_size);
+ if (i && !frame_size)
+ {
+ consumed += i;
+ continue;
+ }
+ if (!frame_size)
+ break;
+ const uint8_t *hdr = buf + consumed + i;
+ frame_info.channels = HDR_IS_MONO(hdr) ? 1 : 2;
+ frame_info.hz = hdr_sample_rate_hz(hdr);
+ frame_info.layer = 4 - HDR_GET_LAYER(hdr);
+ frame_info.bitrate_kbps = hdr_bitrate_kbps(hdr);
+ frame_info.frame_bytes = frame_size;
+
+ readed += i;
+ if (callback)
+ {
+ if ((ret = callback(user_data, hdr, frame_size, free_format_bytes, filled - consumed, readed, &frame_info)))
+ return ret;
+ }
+ readed += frame_size;
+ consumed += i + frame_size;
+ if (!eof && filled - consumed < MINIMP3_BUF_SIZE)
+ { /* keep minimum 10 consecutive mp3 frames (~16KB) worst case */
+ memmove(buf, buf + consumed, filled - consumed);
+ filled -= consumed;
+ consumed = 0;
+ size_t readed = io->read(buf + filled, buf_size - filled, io->read_data);
+ if (readed > (buf_size - filled))
+ return MP3D_E_IOERROR;
+ if (readed != (buf_size - filled))
+ eof = 1;
+ filled += readed;
+ if (eof)
+ mp3dec_skip_id3v1(buf, &filled);
+ }
+ } while (1);
+ return 0;
+}
+
+static int mp3dec_load_index(void *user_data, const uint8_t *frame, int frame_size, int free_format_bytes, size_t buf_size, uint64_t offset, mp3dec_frame_info_t *info)
+{
+ mp3dec_frame_t *idx_frame;
+ mp3dec_ex_t *dec = (mp3dec_ex_t *)user_data;
+ if (!dec->index.frames && !dec->start_offset)
+ { /* detect VBR tag and try to avoid full scan */
+ uint32_t frames;
+ int delay, padding;
+ dec->info = *info;
+ dec->start_offset = dec->offset = offset;
+ dec->end_offset = offset + buf_size;
+ dec->free_format_bytes = free_format_bytes; /* should not change */
+ if (3 == dec->info.layer)
+ {
+ int ret = mp3dec_check_vbrtag(frame, frame_size, &frames, &delay, &padding);
+ if (ret)
+ dec->start_offset = dec->offset = offset + frame_size;
+ if (ret > 0)
+ {
+ padding *= info->channels;
+ dec->start_delay = dec->to_skip = delay*info->channels;
+ dec->samples = hdr_frame_samples(frame)*info->channels*(uint64_t)frames;
+ if (dec->samples >= (uint64_t)dec->start_delay)
+ dec->samples -= dec->start_delay;
+ if (padding > 0 && dec->samples >= (uint64_t)padding)
+ dec->samples -= padding;
+ dec->detected_samples = dec->samples;
+ dec->vbr_tag_found = 1;
+ return MP3D_E_USER;
+ } else if (ret < 0)
+ return 0;
+ }
+ }
+ if (dec->flags & MP3D_DO_NOT_SCAN)
+ return MP3D_E_USER;
+ if (dec->index.num_frames + 1 > dec->index.capacity)
+ {
+ if (!dec->index.capacity)
+ dec->index.capacity = 4096;
+ else
+ dec->index.capacity *= 2;
+ mp3dec_frame_t *alloc_buf = (mp3dec_frame_t *)realloc((void*)dec->index.frames, sizeof(mp3dec_frame_t)*dec->index.capacity);
+ if (!alloc_buf)
+ return MP3D_E_MEMORY;
+ dec->index.frames = alloc_buf;
+ }
+ idx_frame = &dec->index.frames[dec->index.num_frames++];
+ idx_frame->offset = offset;
+ idx_frame->sample = dec->samples;
+ if (!dec->buffer_samples && dec->index.num_frames < 256)
+ { /* for some cutted mp3 frames, bit-reservoir not filled and decoding can't be started from first frames */
+ /* try to decode up to 255 first frames till samples starts to decode */
+ dec->buffer_samples = mp3dec_decode_frame(&dec->mp3d, frame, MINIMP3_MIN(buf_size, (size_t)INT_MAX), dec->buffer, info);
+ dec->samples += dec->buffer_samples*info->channels;
+ } else
+ dec->samples += hdr_frame_samples(frame)*info->channels;
+ return 0;
+}
+
+int mp3dec_ex_open_buf(mp3dec_ex_t *dec, const uint8_t *buf, size_t buf_size, int flags)
+{
+ if (!dec || !buf || (size_t)-1 == buf_size || (flags & (~MP3D_FLAGS_MASK)))
+ return MP3D_E_PARAM;
+ memset(dec, 0, sizeof(*dec));
+ dec->file.buffer = buf;
+ dec->file.size = buf_size;
+ dec->flags = flags;
+ mp3dec_init(&dec->mp3d);
+ int ret = mp3dec_iterate_buf(dec->file.buffer, dec->file.size, mp3dec_load_index, dec);
+ if (ret && MP3D_E_USER != ret)
+ return ret;
+ mp3dec_init(&dec->mp3d);
+ dec->buffer_samples = 0;
+ dec->indexes_built = !(dec->vbr_tag_found || (flags & MP3D_DO_NOT_SCAN));
+ dec->flags &= (~MP3D_DO_NOT_SCAN);
+ return 0;
+}
+
+#ifndef MINIMP3_SEEK_IDX_LINEAR_SEARCH
+static size_t mp3dec_idx_binary_search(mp3dec_index_t *idx, uint64_t position)
+{
+ size_t end = idx->num_frames, start = 0, index = 0;
+ while (start <= end)
+ {
+ size_t mid = (start + end) / 2;
+ if (idx->frames[mid].sample >= position)
+ { /* move left side. */
+ if (idx->frames[mid].sample == position)
+ return mid;
+ end = mid - 1;
+ } else
+ { /* move to right side */
+ index = mid;
+ start = mid + 1;
+ if (start == idx->num_frames)
+ break;
+ }
+ }
+ return index;
+}
+#endif
+
+int mp3dec_ex_seek(mp3dec_ex_t *dec, uint64_t position)
+{
+ size_t i;
+ if (!dec)
+ return MP3D_E_PARAM;
+ if (!(dec->flags & MP3D_SEEK_TO_SAMPLE))
+ {
+ if (dec->io)
+ {
+ dec->offset = position;
+ } else
+ {
+ dec->offset = MINIMP3_MIN(position, dec->file.size);
+ }
+ dec->cur_sample = 0;
+ goto do_exit;
+ }
+ dec->cur_sample = position;
+ position += dec->start_delay;
+ if (0 == position)
+ { /* optimize seek to zero, no index needed */
+seek_zero:
+ dec->offset = dec->start_offset;
+ dec->to_skip = 0;
+ goto do_exit;
+ }
+ if (!dec->indexes_built)
+ { /* no index created yet (vbr tag used to calculate track length or MP3D_DO_NOT_SCAN open flag used) */
+ dec->indexes_built = 1;
+ dec->samples = 0;
+ dec->buffer_samples = 0;
+ if (dec->io)
+ {
+ if (dec->io->seek(dec->start_offset, dec->io->seek_data))
+ return MP3D_E_IOERROR;
+ int ret = mp3dec_iterate_cb(dec->io, (uint8_t *)dec->file.buffer, dec->file.size, mp3dec_load_index, dec);
+ if (ret && MP3D_E_USER != ret)
+ return ret;
+ } else
+ {
+ int ret = mp3dec_iterate_buf(dec->file.buffer + dec->start_offset, dec->file.size - dec->start_offset, mp3dec_load_index, dec);
+ if (ret && MP3D_E_USER != ret)
+ return ret;
+ }
+ for (i = 0; i < dec->index.num_frames; i++)
+ dec->index.frames[i].offset += dec->start_offset;
+ dec->samples = dec->detected_samples;
+ }
+ if (!dec->index.frames)
+ goto seek_zero; /* no frames in file - seek to zero */
+#ifdef MINIMP3_SEEK_IDX_LINEAR_SEARCH
+ for (i = 0; i < dec->index.num_frames; i++)
+ {
+ if (dec->index.frames[i].sample >= position)
+ break;
+ }
+#else
+ i = mp3dec_idx_binary_search(&dec->index, position);
+#endif
+ if (i)
+ {
+ int to_fill_bytes = 511;
+ int skip_frames = MINIMP3_PREDECODE_FRAMES
+#ifdef MINIMP3_SEEK_IDX_LINEAR_SEARCH
+ + ((dec->index.frames[i].sample == position) ? 0 : 1)
+#endif
+ ;
+ i -= MINIMP3_MIN(i, (size_t)skip_frames);
+ if (3 == dec->info.layer)
+ {
+ while (i && to_fill_bytes)
+ { /* make sure bit-reservoir is filled when we start decoding */
+ bs_t bs[1];
+ L3_gr_info_t gr_info[4];
+ int frame_bytes, frame_size;
+ const uint8_t *hdr;
+ if (dec->io)
+ {
+ hdr = dec->file.buffer;
+ if (dec->io->seek(dec->index.frames[i - 1].offset, dec->io->seek_data))
+ return MP3D_E_IOERROR;
+ size_t readed = dec->io->read((uint8_t *)hdr, HDR_SIZE, dec->io->read_data);
+ if (readed != HDR_SIZE)
+ return MP3D_E_IOERROR;
+ frame_size = hdr_frame_bytes(hdr, dec->free_format_bytes) + hdr_padding(hdr);
+ readed = dec->io->read((uint8_t *)hdr + HDR_SIZE, frame_size - HDR_SIZE, dec->io->read_data);
+ if (readed != (size_t)(frame_size - HDR_SIZE))
+ return MP3D_E_IOERROR;
+ bs_init(bs, hdr + HDR_SIZE, frame_size - HDR_SIZE);
+ } else
+ {
+ hdr = dec->file.buffer + dec->index.frames[i - 1].offset;
+ frame_size = hdr_frame_bytes(hdr, dec->free_format_bytes) + hdr_padding(hdr);
+ bs_init(bs, hdr + HDR_SIZE, frame_size - HDR_SIZE);
+ }
+ if (HDR_IS_CRC(hdr))
+ get_bits(bs, 16);
+ i--;
+ if (L3_read_side_info(bs, gr_info, hdr) < 0)
+ break; /* frame not decodable, we can start from here */
+ frame_bytes = (bs->limit - bs->pos)/8;
+ to_fill_bytes -= MINIMP3_MIN(to_fill_bytes, frame_bytes);
+ }
+ }
+ }
+ dec->offset = dec->index.frames[i].offset;
+ dec->to_skip = position - dec->index.frames[i].sample;
+ while ((i + 1) < dec->index.num_frames && !dec->index.frames[i].sample && !dec->index.frames[i + 1].sample)
+ { /* skip not decodable first frames */
+ const uint8_t *hdr;
+ if (dec->io)
+ {
+ hdr = dec->file.buffer;
+ if (dec->io->seek(dec->index.frames[i].offset, dec->io->seek_data))
+ return MP3D_E_IOERROR;
+ size_t readed = dec->io->read((uint8_t *)hdr, HDR_SIZE, dec->io->read_data);
+ if (readed != HDR_SIZE)
+ return MP3D_E_IOERROR;
+ } else
+ hdr = dec->file.buffer + dec->index.frames[i].offset;
+ dec->to_skip += hdr_frame_samples(hdr)*dec->info.channels;
+ i++;
+ }
+do_exit:
+ if (dec->io)
+ {
+ if (dec->io->seek(dec->offset, dec->io->seek_data))
+ return MP3D_E_IOERROR;
+ }
+ dec->buffer_samples = 0;
+ dec->buffer_consumed = 0;
+ dec->input_consumed = 0;
+ dec->input_filled = 0;
+ dec->last_error = 0;
+ mp3dec_init(&dec->mp3d);
+ return 0;
+}
+
+size_t mp3dec_ex_read_frame(mp3dec_ex_t *dec, mp3d_sample_t **buf, mp3dec_frame_info_t *frame_info, size_t max_samples)
+{
+ if (!dec || !buf || !frame_info)
+ {
+ if (dec)
+ dec->last_error = MP3D_E_PARAM;
+ return 0;
+ }
+ if (dec->detected_samples && dec->cur_sample >= dec->detected_samples)
+ return 0; /* at end of stream */
+ if (dec->last_error)
+ return 0; /* error eof state, seek can reset it */
+ *buf = NULL;
+ uint64_t end_offset = dec->end_offset ? dec->end_offset : dec->file.size;
+ int eof = 0;
+ while (dec->buffer_consumed == dec->buffer_samples)
+ {
+ const uint8_t *dec_buf;
+ if (dec->io)
+ {
+ if (!eof && (dec->input_filled - dec->input_consumed) < MINIMP3_BUF_SIZE)
+ { /* keep minimum 10 consecutive mp3 frames (~16KB) worst case */
+ memmove((uint8_t*)dec->file.buffer, (uint8_t*)dec->file.buffer + dec->input_consumed, dec->input_filled - dec->input_consumed);
+ dec->input_filled -= dec->input_consumed;
+ dec->input_consumed = 0;
+ size_t readed = dec->io->read((uint8_t*)dec->file.buffer + dec->input_filled, dec->file.size - dec->input_filled, dec->io->read_data);
+ if (readed > (dec->file.size - dec->input_filled))
+ {
+ dec->last_error = MP3D_E_IOERROR;
+ readed = 0;
+ }
+ if (readed != (dec->file.size - dec->input_filled))
+ eof = 1;
+ dec->input_filled += readed;
+ if (eof)
+ mp3dec_skip_id3v1((uint8_t*)dec->file.buffer, &dec->input_filled);
+ }
+ dec_buf = dec->file.buffer + dec->input_consumed;
+ if (!(dec->input_filled - dec->input_consumed))
+ return 0;
+ dec->buffer_samples = mp3dec_decode_frame(&dec->mp3d, dec_buf, dec->input_filled - dec->input_consumed, dec->buffer, frame_info);
+ dec->input_consumed += frame_info->frame_bytes;
+ } else
+ {
+ dec_buf = dec->file.buffer + dec->offset;
+ uint64_t buf_size = end_offset - dec->offset;
+ if (!buf_size)
+ return 0;
+ dec->buffer_samples = mp3dec_decode_frame(&dec->mp3d, dec_buf, MINIMP3_MIN(buf_size, (uint64_t)INT_MAX), dec->buffer, frame_info);
+ }
+ dec->buffer_consumed = 0;
+ if (dec->info.hz != frame_info->hz || dec->info.layer != frame_info->layer)
+ {
+return_e_decode:
+ dec->last_error = MP3D_E_DECODE;
+ return 0;
+ }
+ if (dec->buffer_samples)
+ {
+ dec->buffer_samples *= frame_info->channels;
+ if (dec->to_skip)
+ {
+ size_t skip = MINIMP3_MIN(dec->buffer_samples, dec->to_skip);
+ dec->buffer_consumed += skip;
+ dec->to_skip -= skip;
+ }
+ if (
+#ifdef MINIMP3_ALLOW_MONO_STEREO_TRANSITION
+ !(dec->flags & MP3D_ALLOW_MONO_STEREO_TRANSITION) &&
+#endif
+ dec->buffer_consumed != dec->buffer_samples && dec->info.channels != frame_info->channels)
+ {
+ goto return_e_decode;
+ }
+ } else if (dec->to_skip)
+ { /* In mp3 decoding not always can start decode from any frame because of bit reservoir,
+ count skip samples for such frames */
+ int frame_samples = hdr_frame_samples(dec_buf)*frame_info->channels;
+ dec->to_skip -= MINIMP3_MIN(frame_samples, dec->to_skip);
+ }
+ dec->offset += frame_info->frame_bytes;
+ }
+ size_t out_samples = MINIMP3_MIN((size_t)(dec->buffer_samples - dec->buffer_consumed), max_samples);
+ if (dec->detected_samples)
+ { /* count decoded samples to properly cut padding */
+ if (dec->cur_sample + out_samples >= dec->detected_samples)
+ out_samples = dec->detected_samples - dec->cur_sample;
+ }
+ dec->cur_sample += out_samples;
+ *buf = dec->buffer + dec->buffer_consumed;
+ dec->buffer_consumed += out_samples;
+ return out_samples;
+}
+
+size_t mp3dec_ex_read(mp3dec_ex_t *dec, mp3d_sample_t *buf, size_t samples)
+{
+ if (!dec || !buf)
+ {
+ if (dec)
+ dec->last_error = MP3D_E_PARAM;
+ return 0;
+ }
+ mp3dec_frame_info_t frame_info;
+ memset(&frame_info, 0, sizeof(frame_info));
+ size_t samples_requested = samples;
+ while (samples)
+ {
+ mp3d_sample_t *buf_frame = NULL;
+ size_t read_samples = mp3dec_ex_read_frame(dec, &buf_frame, &frame_info, samples);
+ if (!read_samples)
+ {
+ break;
+ }
+ memcpy(buf, buf_frame, read_samples * sizeof(mp3d_sample_t));
+ buf += read_samples;
+ samples -= read_samples;
+ }
+ return samples_requested - samples;
+}
+
+int mp3dec_ex_open_cb(mp3dec_ex_t *dec, mp3dec_io_t *io, int flags)
+{
+ if (!dec || !io || (flags & (~MP3D_FLAGS_MASK)))
+ return MP3D_E_PARAM;
+ memset(dec, 0, sizeof(*dec));
+#ifdef MINIMP3_HAVE_RING
+ int ret;
+ if (ret = mp3dec_open_ring(&dec->file, MINIMP3_IO_SIZE))
+ return ret;
+#else
+ dec->file.size = MINIMP3_IO_SIZE;
+ dec->file.buffer = (const uint8_t*)malloc(dec->file.size);
+ if (!dec->file.buffer)
+ return MP3D_E_MEMORY;
+#endif
+ dec->flags = flags;
+ dec->io = io;
+ mp3dec_init(&dec->mp3d);
+ if (io->seek(0, io->seek_data))
+ return MP3D_E_IOERROR;
+ int ret = mp3dec_iterate_cb(io, (uint8_t *)dec->file.buffer, dec->file.size, mp3dec_load_index, dec);
+ if (ret && MP3D_E_USER != ret)
+ return ret;
+ if (dec->io->seek(dec->start_offset, dec->io->seek_data))
+ return MP3D_E_IOERROR;
+ mp3dec_init(&dec->mp3d);
+ dec->buffer_samples = 0;
+ dec->indexes_built = !(dec->vbr_tag_found || (flags & MP3D_DO_NOT_SCAN));
+ dec->flags &= (~MP3D_DO_NOT_SCAN);
+ return 0;
+}
+
+
+#ifndef MINIMP3_NO_STDIO
+
+#if defined(__linux__) || defined(__FreeBSD__)
+#include <errno.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+#if !defined(_GNU_SOURCE)
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#endif
+#if !defined(MAP_POPULATE) && defined(__linux__)
+#define MAP_POPULATE 0x08000
+#elif !defined(MAP_POPULATE)
+#define MAP_POPULATE 0
+#endif
+
+static void mp3dec_close_file(mp3dec_map_info_t *map_info)
+{
+ if (map_info->buffer && MAP_FAILED != map_info->buffer)
+ munmap((void *)map_info->buffer, map_info->size);
+ map_info->buffer = 0;
+ map_info->size = 0;
+}
+
+static int mp3dec_open_file(const char *file_name, mp3dec_map_info_t *map_info)
+{
+ if (!file_name)
+ return MP3D_E_PARAM;
+ int file;
+ struct stat st;
+ memset(map_info, 0, sizeof(*map_info));
+retry_open:
+ file = open(file_name, O_RDONLY);
+ if (file < 0 && (errno == EAGAIN || errno == EINTR))
+ goto retry_open;
+ if (file < 0 || fstat(file, &st) < 0)
+ {
+ close(file);
+ return MP3D_E_IOERROR;
+ }
+
+ map_info->size = st.st_size;
+retry_mmap:
+ map_info->buffer = (const uint8_t *)mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE | MAP_POPULATE, file, 0);
+ if (MAP_FAILED == map_info->buffer && (errno == EAGAIN || errno == EINTR))
+ goto retry_mmap;
+ close(file);
+ if (MAP_FAILED == map_info->buffer)
+ return MP3D_E_IOERROR;
+ return 0;
+}
+
+#if MINIMP3_ENABLE_RING && defined(__linux__) && defined(_GNU_SOURCE)
+#define MINIMP3_HAVE_RING
+static void mp3dec_close_ring(mp3dec_map_info_t *map_info)
+{
+#if defined(__linux__) && defined(_GNU_SOURCE)
+ if (map_info->buffer && MAP_FAILED != map_info->buffer)
+ munmap((void *)map_info->buffer, map_info->size*2);
+#else
+ if (map_info->buffer)
+ {
+ shmdt(map_info->buffer);
+ shmdt(map_info->buffer + map_info->size);
+ }
+#endif
+ map_info->buffer = 0;
+ map_info->size = 0;
+}
+
+static int mp3dec_open_ring(mp3dec_map_info_t *map_info, size_t size)
+{
+ int memfd, page_size;
+#if defined(__linux__) && defined(_GNU_SOURCE)
+ void *buffer;
+ int res;
+#endif
+ memset(map_info, 0, sizeof(*map_info));
+
+#ifdef _SC_PAGESIZE
+ page_size = sysconf(_SC_PAGESIZE);
+#else
+ page_size = getpagesize();
+#endif
+ map_info->size = (size + page_size - 1)/page_size*page_size;
+
+#if defined(__linux__) && defined(_GNU_SOURCE)
+ memfd = memfd_create("mp3_ring", 0);
+ if (memfd < 0)
+ return MP3D_E_MEMORY;
+
+retry_ftruncate:
+ res = ftruncate(memfd, map_info->size);
+ if (res && (errno == EAGAIN || errno == EINTR))
+ goto retry_ftruncate;
+ if (res)
+ goto error;
+
+retry_mmap:
+ map_info->buffer = (const uint8_t *)mmap(NULL, map_info->size*2, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (MAP_FAILED == map_info->buffer && (errno == EAGAIN || errno == EINTR))
+ goto retry_mmap;
+ if (MAP_FAILED == map_info->buffer || !map_info->buffer)
+ goto error;
+retry_mmap2:
+ buffer = mmap((void *)map_info->buffer, map_info->size, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, memfd, 0);
+ if (MAP_FAILED == map_info->buffer && (errno == EAGAIN || errno == EINTR))
+ goto retry_mmap2;
+ if (MAP_FAILED == map_info->buffer || buffer != (void *)map_info->buffer)
+ goto error;
+retry_mmap3:
+ buffer = mmap((void *)map_info->buffer + map_info->size, map_info->size, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, memfd, 0);
+ if (MAP_FAILED == map_info->buffer && (errno == EAGAIN || errno == EINTR))
+ goto retry_mmap3;
+ if (MAP_FAILED == map_info->buffer || buffer != (void *)(map_info->buffer + map_info->size))
+ goto error;
+
+ close(memfd);
+ return 0;
+error:
+ close(memfd);
+ mp3dec_close_ring(map_info);
+ return MP3D_E_MEMORY;
+#else
+ memfd = shmget(IPC_PRIVATE, map_info->size, IPC_CREAT | 0700);
+ if (memfd < 0)
+ return MP3D_E_MEMORY;
+retry_mmap:
+ map_info->buffer = (const uint8_t *)mmap(NULL, map_info->size*2, PROT_NONE, MAP_PRIVATE, -1, 0);
+ if (MAP_FAILED == map_info->buffer && (errno == EAGAIN || errno == EINTR))
+ goto retry_mmap;
+ if (MAP_FAILED == map_info->buffer)
+ goto error;
+ if (map_info->buffer != shmat(memfd, map_info->buffer, 0))
+ goto error;
+ if ((map_info->buffer + map_info->size) != shmat(memfd, map_info->buffer + map_info->size, 0))
+ goto error;
+ if (shmctl(memfd, IPC_RMID, NULL) < 0)
+ return MP3D_E_MEMORY;
+ return 0;
+error:
+ shmctl(memfd, IPC_RMID, NULL);
+ mp3dec_close_ring(map_info);
+ return MP3D_E_MEMORY;
+#endif
+}
+#endif /*MINIMP3_ENABLE_RING*/
+#elif defined(_WIN32)
+#include <windows.h>
+
+static void mp3dec_close_file(mp3dec_map_info_t *map_info)
+{
+ if (map_info->buffer)
+ UnmapViewOfFile(map_info->buffer);
+ map_info->buffer = 0;
+ map_info->size = 0;
+}
+
+static int mp3dec_open_file_h(HANDLE file, mp3dec_map_info_t *map_info)
+{
+ memset(map_info, 0, sizeof(*map_info));
+
+ HANDLE mapping = NULL;
+ LARGE_INTEGER s;
+ s.LowPart = GetFileSize(file, (DWORD*)&s.HighPart);
+ if (s.LowPart == INVALID_FILE_SIZE && GetLastError() != NO_ERROR)
+ goto error;
+ map_info->size = s.QuadPart;
+
+ mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL);
+ if (!mapping)
+ goto error;
+ map_info->buffer = (const uint8_t*)MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, s.QuadPart);
+ CloseHandle(mapping);
+ if (!map_info->buffer)
+ goto error;
+
+ CloseHandle(file);
+ return 0;
+error:
+ mp3dec_close_file(map_info);
+ CloseHandle(file);
+ return MP3D_E_IOERROR;
+}
+
+static int mp3dec_open_file(const char *file_name, mp3dec_map_info_t *map_info)
+{
+ if (!file_name)
+ return MP3D_E_PARAM;
+ HANDLE file = CreateFileA(file_name, GENERIC_READ, FILE_SHARE_READ, 0, OPEN_EXISTING, 0, 0);
+ if (INVALID_HANDLE_VALUE == file)
+ return MP3D_E_IOERROR;
+ return mp3dec_open_file_h(file, map_info);
+}
+
+static int mp3dec_open_file_w(const wchar_t *file_name, mp3dec_map_info_t *map_info)
+{
+ if (!file_name)
+ return MP3D_E_PARAM;
+ HANDLE file = CreateFileW(file_name, GENERIC_READ, FILE_SHARE_READ, 0, OPEN_EXISTING, 0, 0);
+ if (INVALID_HANDLE_VALUE == file)
+ return MP3D_E_IOERROR;
+ return mp3dec_open_file_h(file, map_info);
+}
+#else
+#include <stdio.h>
+
+static void mp3dec_close_file(mp3dec_map_info_t *map_info)
+{
+ if (map_info->buffer)
+ free((void *)map_info->buffer);
+ map_info->buffer = 0;
+ map_info->size = 0;
+}
+
+static int mp3dec_open_file(const char *file_name, mp3dec_map_info_t *map_info)
+{
+ if (!file_name)
+ return MP3D_E_PARAM;
+ memset(map_info, 0, sizeof(*map_info));
+ FILE *file = fopen(file_name, "rb");
+ if (!file)
+ return MP3D_E_IOERROR;
+ int res = MP3D_E_IOERROR;
+ long size = -1;
+ if (fseek(file, 0, SEEK_END))
+ goto error;
+ size = ftell(file);
+ if (size < 0)
+ goto error;
+ map_info->size = (size_t)size;
+ if (fseek(file, 0, SEEK_SET))
+ goto error;
+ map_info->buffer = (uint8_t *)malloc(map_info->size);
+ if (!map_info->buffer)
+ {
+ res = MP3D_E_MEMORY;
+ goto error;
+ }
+ if (fread((void *)map_info->buffer, 1, map_info->size, file) != map_info->size)
+ goto error;
+ fclose(file);
+ return 0;
+error:
+ mp3dec_close_file(map_info);
+ fclose(file);
+ return res;
+}
+#endif
+
+static int mp3dec_detect_mapinfo(mp3dec_map_info_t *map_info)
+{
+ int ret = mp3dec_detect_buf(map_info->buffer, map_info->size);
+ mp3dec_close_file(map_info);
+ return ret;
+}
+
+static int mp3dec_load_mapinfo(mp3dec_t *dec, mp3dec_map_info_t *map_info, mp3dec_file_info_t *info, MP3D_PROGRESS_CB progress_cb, void *user_data)
+{
+ int ret = mp3dec_load_buf(dec, map_info->buffer, map_info->size, info, progress_cb, user_data);
+ mp3dec_close_file(map_info);
+ return ret;
+}
+
+static int mp3dec_iterate_mapinfo(mp3dec_map_info_t *map_info, MP3D_ITERATE_CB callback, void *user_data)
+{
+ int ret = mp3dec_iterate_buf(map_info->buffer, map_info->size, callback, user_data);
+ mp3dec_close_file(map_info);
+ return ret;
+}
+
+static int mp3dec_ex_open_mapinfo(mp3dec_ex_t *dec, int flags)
+{
+ int ret = mp3dec_ex_open_buf(dec, dec->file.buffer, dec->file.size, flags);
+ dec->is_file = 1;
+ if (ret)
+ mp3dec_ex_close(dec);
+ return ret;
+}
+
+int mp3dec_detect(const char *file_name)
+{
+ int ret;
+ mp3dec_map_info_t map_info;
+ if ((ret = mp3dec_open_file(file_name, &map_info)))
+ return ret;
+ return mp3dec_detect_mapinfo(&map_info);
+}
+
+int mp3dec_load(mp3dec_t *dec, const char *file_name, mp3dec_file_info_t *info, MP3D_PROGRESS_CB progress_cb, void *user_data)
+{
+ int ret;
+ mp3dec_map_info_t map_info;
+ if ((ret = mp3dec_open_file(file_name, &map_info)))
+ return ret;
+ return mp3dec_load_mapinfo(dec, &map_info, info, progress_cb, user_data);
+}
+
+int mp3dec_iterate(const char *file_name, MP3D_ITERATE_CB callback, void *user_data)
+{
+ int ret;
+ mp3dec_map_info_t map_info;
+ if ((ret = mp3dec_open_file(file_name, &map_info)))
+ return ret;
+ return mp3dec_iterate_mapinfo(&map_info, callback, user_data);
+}
+
+int mp3dec_ex_open(mp3dec_ex_t *dec, const char *file_name, int flags)
+{
+ int ret;
+ if (!dec)
+ return MP3D_E_PARAM;
+ if ((ret = mp3dec_open_file(file_name, &dec->file)))
+ return ret;
+ return mp3dec_ex_open_mapinfo(dec, flags);
+}
+
+void mp3dec_ex_close(mp3dec_ex_t *dec)
+{
+#ifdef MINIMP3_HAVE_RING
+ if (dec->io)
+ mp3dec_close_ring(&dec->file);
+#else
+ if (dec->io && dec->file.buffer)
+ free((void*)dec->file.buffer);
+#endif
+ if (dec->is_file)
+ mp3dec_close_file(&dec->file);
+ if (dec->index.frames)
+ free(dec->index.frames);
+ memset(dec, 0, sizeof(*dec));
+}
+
+#ifdef _WIN32
+int mp3dec_detect_w(const wchar_t *file_name)
+{
+ int ret;
+ mp3dec_map_info_t map_info;
+ if ((ret = mp3dec_open_file_w(file_name, &map_info)))
+ return ret;
+ return mp3dec_detect_mapinfo(&map_info);
+}
+
+int mp3dec_load_w(mp3dec_t *dec, const wchar_t *file_name, mp3dec_file_info_t *info, MP3D_PROGRESS_CB progress_cb, void *user_data)
+{
+ int ret;
+ mp3dec_map_info_t map_info;
+ if ((ret = mp3dec_open_file_w(file_name, &map_info)))
+ return ret;
+ return mp3dec_load_mapinfo(dec, &map_info, info, progress_cb, user_data);
+}
+
+int mp3dec_iterate_w(const wchar_t *file_name, MP3D_ITERATE_CB callback, void *user_data)
+{
+ int ret;
+ mp3dec_map_info_t map_info;
+ if ((ret = mp3dec_open_file_w(file_name, &map_info)))
+ return ret;
+ return mp3dec_iterate_mapinfo(&map_info, callback, user_data);
+}
+
+int mp3dec_ex_open_w(mp3dec_ex_t *dec, const wchar_t *file_name, int flags)
+{
+ int ret;
+ if ((ret = mp3dec_open_file_w(file_name, &dec->file)))
+ return ret;
+ return mp3dec_ex_open_mapinfo(dec, flags);
+}
+#endif
+#else /* MINIMP3_NO_STDIO */
+void mp3dec_ex_close(mp3dec_ex_t *dec)
+{
+#ifdef MINIMP3_HAVE_RING
+ if (dec->io)
+ mp3dec_close_ring(&dec->file);
+#else
+ if (dec->io && dec->file.buffer)
+ free((void*)dec->file.buffer);
+#endif
+ if (dec->index.frames)
+ free(dec->index.frames);
+ memset(dec, 0, sizeof(*dec));
+}
+#endif
+
+#endif /*MINIMP3_IMPLEMENTATION*/
diff --git a/thirdparty/misc/easing_equations.cpp b/thirdparty/misc/easing_equations.cpp
index bc84564b19..af48aaf079 100644
--- a/thirdparty/misc/easing_equations.cpp
+++ b/thirdparty/misc/easing_equations.cpp
@@ -188,7 +188,8 @@ static real_t out_in(real_t t, real_t b, real_t c, real_t d) {
///////////////////////////////////////////////////////////////////////////
namespace cubic {
static real_t in(real_t t, real_t b, real_t c, real_t d) {
- return c * (t /= d) * t * t + b;
+ t /= d;
+ return c * t * t * t + b;
}
static real_t out(real_t t, real_t b, real_t c, real_t d) {
@@ -197,8 +198,10 @@ static real_t out(real_t t, real_t b, real_t c, real_t d) {
}
static real_t in_out(real_t t, real_t b, real_t c, real_t d) {
- if ((t /= d / 2) < 1) return c / 2 * t * t * t + b;
- return c / 2 * ((t -= 2) * t * t + 2) + b;
+ t /= d / 2;
+ if (t < 1) return c / 2 * t * t * t + b;
+ t -= 2;
+ return c / 2 * (t * t * t + 2) + b;
}
static real_t out_in(real_t t, real_t b, real_t c, real_t d) {
@@ -210,16 +213,22 @@ static real_t out_in(real_t t, real_t b, real_t c, real_t d) {
///////////////////////////////////////////////////////////////////////////
namespace circ {
static real_t in(real_t t, real_t b, real_t c, real_t d) {
- return -c * (sqrt(1 - (t /= d) * t) - 1) + b; // TODO: ehrich: operation with t is undefined
+ t /= d;
+ return -c * (sqrt(1 - t * t) - 1) + b;
}
static real_t out(real_t t, real_t b, real_t c, real_t d) {
- return c * sqrt(1 - (t = t / d - 1) * t) + b; // TODO: ehrich: operation with t is undefined
+ t = t / d - 1;
+ return c * sqrt(1 - t * t) + b;
}
static real_t in_out(real_t t, real_t b, real_t c, real_t d) {
- if ((t /= d / 2) < 1) return -c / 2 * (sqrt(1 - t * t) - 1) + b;
- return c / 2 * (sqrt(1 - t * (t -= 2)) + 1) + b; // TODO: ehrich: operation with t is undefined
+ t /= d / 2;
+ if (t < 1) {
+ return -c / 2 * (sqrt(1 - t * t) - 1) + b;
+ }
+ t -= 2;
+ return c / 2 * (sqrt(1 - t * t) + 1) + b;
}
static real_t out_in(real_t t, real_t b, real_t c, real_t d) {
@@ -271,14 +280,16 @@ static real_t in(real_t t, real_t b, real_t c, real_t d) {
static real_t out(real_t t, real_t b, real_t c, real_t d) {
float s = 1.70158f;
- return c * ((t = t / d - 1) * t * ((s + 1) * t + s) + 1) + b; // TODO: ehrich: operation with t is undefined
+ t = t / d - 1;
+ return c * (t * t * ((s + 1) * t + s) + 1) + b;
}
static real_t in_out(real_t t, real_t b, real_t c, real_t d) {
- float s = 1.70158f;
- if ((t /= d / 2) < 1) return c / 2 * (t * t * (((s *= (1.525f)) + 1) * t - s)) + b; // TODO: ehrich: operation with s is undefined
- float postFix = t -= 2;
- return c / 2 * ((postFix)*t * (((s *= (1.525f)) + 1) * t + s) + 2) + b; // TODO: ehrich: operation with s is undefined
+ float s = 1.70158f * 1.525f;
+ t /= d / 2;
+ if (t < 1) return c / 2 * (t * t * ((s + 1) * t - s)) + b;
+ t -= 2;
+ return c / 2 * (t * t * ((s + 1) * t + s) + 2) + b;
}
static real_t out_in(real_t t, real_t b, real_t c, real_t d) {
diff --git a/thirdparty/misc/open-simplex-noise.c b/thirdparty/misc/open-simplex-noise.c
index 88fbd3e51d..44a072cad1 100644
--- a/thirdparty/misc/open-simplex-noise.c
+++ b/thirdparty/misc/open-simplex-noise.c
@@ -100,27 +100,27 @@ static const signed char gradients4D[] = {
-3, -1, -1, -1, -1, -3, -1, -1, -1, -1, -3, -1, -1, -1, -1, -3,
};
-static double extrapolate2(struct osn_context *ctx, int xsb, int ysb, double dx, double dy)
+static double extrapolate2(const struct osn_context *ctx, int xsb, int ysb, double dx, double dy)
{
- int16_t *perm = ctx->perm;
+ const int16_t *perm = ctx->perm;
int index = perm[(perm[xsb & 0xFF] + ysb) & 0xFF] & 0x0E;
return gradients2D[index] * dx
+ gradients2D[index + 1] * dy;
}
-static double extrapolate3(struct osn_context *ctx, int xsb, int ysb, int zsb, double dx, double dy, double dz)
+static double extrapolate3(const struct osn_context *ctx, int xsb, int ysb, int zsb, double dx, double dy, double dz)
{
- int16_t *perm = ctx->perm;
- int16_t *permGradIndex3D = ctx->permGradIndex3D;
+ const int16_t *perm = ctx->perm;
+ const int16_t *permGradIndex3D = ctx->permGradIndex3D;
int index = permGradIndex3D[(perm[(perm[xsb & 0xFF] + ysb) & 0xFF] + zsb) & 0xFF];
return gradients3D[index] * dx
+ gradients3D[index + 1] * dy
+ gradients3D[index + 2] * dz;
}
-static double extrapolate4(struct osn_context *ctx, int xsb, int ysb, int zsb, int wsb, double dx, double dy, double dz, double dw)
+static double extrapolate4(const struct osn_context *ctx, int xsb, int ysb, int zsb, int wsb, double dx, double dy, double dz, double dw)
{
- int16_t *perm = ctx->perm;
+ const int16_t *perm = ctx->perm;
int index = perm[(perm[(perm[(perm[xsb & 0xFF] + ysb) & 0xFF] + zsb) & 0xFF] + wsb) & 0xFF] & 0xFC;
return gradients4D[index] * dx
+ gradients4D[index + 1] * dy
@@ -227,7 +227,7 @@ void open_simplex_noise_free(struct osn_context *ctx)
// -- GODOT end --
/* 2D OpenSimplex (Simplectic) Noise. */
-double open_simplex_noise2(struct osn_context *ctx, double x, double y)
+double open_simplex_noise2(const struct osn_context *ctx, double x, double y)
{
/* Place input coordinates onto grid. */
@@ -355,7 +355,7 @@ double open_simplex_noise2(struct osn_context *ctx, double x, double y)
/*
* 3D OpenSimplex (Simplectic) Noise
*/
-double open_simplex_noise3(struct osn_context *ctx, double x, double y, double z)
+double open_simplex_noise3(const struct osn_context *ctx, double x, double y, double z)
{
/* Place input coordinates on simplectic honeycomb. */
@@ -928,7 +928,7 @@ double open_simplex_noise3(struct osn_context *ctx, double x, double y, double z
/*
* 4D OpenSimplex (Simplectic) Noise.
*/
-double open_simplex_noise4(struct osn_context *ctx, double x, double y, double z, double w)
+double open_simplex_noise4(const struct osn_context *ctx, double x, double y, double z, double w)
{
double uins;
double dx1, dy1, dz1, dw1;
diff --git a/thirdparty/misc/open-simplex-noise.h b/thirdparty/misc/open-simplex-noise.h
index 89e0df8218..fd9248c3a1 100644
--- a/thirdparty/misc/open-simplex-noise.h
+++ b/thirdparty/misc/open-simplex-noise.h
@@ -47,9 +47,9 @@ int open_simplex_noise(int64_t seed, struct osn_context *ctx);
//int open_simplex_noise_init_perm(struct osn_context *ctx, int16_t p[], int nelements);
// -- GODOT end --
void open_simplex_noise_free(struct osn_context *ctx);
-double open_simplex_noise2(struct osn_context *ctx, double x, double y);
-double open_simplex_noise3(struct osn_context *ctx, double x, double y, double z);
-double open_simplex_noise4(struct osn_context *ctx, double x, double y, double z, double w);
+double open_simplex_noise2(const struct osn_context *ctx, double x, double y);
+double open_simplex_noise3(const struct osn_context *ctx, double x, double y, double z);
+double open_simplex_noise4(const struct osn_context *ctx, double x, double y, double z, double w);
#ifdef __cplusplus
}
diff --git a/thirdparty/rvo2/src/API.h b/thirdparty/rvo2/API.h
index c64efb452c..c64efb452c 100644
--- a/thirdparty/rvo2/src/API.h
+++ b/thirdparty/rvo2/API.h
diff --git a/thirdparty/rvo2/src/Agent.cpp b/thirdparty/rvo2/Agent.cpp
index 851d780758..851d780758 100644
--- a/thirdparty/rvo2/src/Agent.cpp
+++ b/thirdparty/rvo2/Agent.cpp
diff --git a/thirdparty/rvo2/src/Agent.h b/thirdparty/rvo2/Agent.h
index 16f75a08f6..16f75a08f6 100644
--- a/thirdparty/rvo2/src/Agent.h
+++ b/thirdparty/rvo2/Agent.h
diff --git a/thirdparty/rvo2/src/Definitions.h b/thirdparty/rvo2/Definitions.h
index a73aca9908..a73aca9908 100644
--- a/thirdparty/rvo2/src/Definitions.h
+++ b/thirdparty/rvo2/Definitions.h
diff --git a/thirdparty/rvo2/src/KdTree.cpp b/thirdparty/rvo2/KdTree.cpp
index bc224614f0..bc224614f0 100644
--- a/thirdparty/rvo2/src/KdTree.cpp
+++ b/thirdparty/rvo2/KdTree.cpp
diff --git a/thirdparty/rvo2/src/KdTree.h b/thirdparty/rvo2/KdTree.h
index 1dbad00ea4..1dbad00ea4 100644
--- a/thirdparty/rvo2/src/KdTree.h
+++ b/thirdparty/rvo2/KdTree.h
diff --git a/thirdparty/rvo2/src/Vector3.h b/thirdparty/rvo2/Vector3.h
index 8c8835c865..8c8835c865 100644
--- a/thirdparty/rvo2/src/Vector3.h
+++ b/thirdparty/rvo2/Vector3.h
diff --git a/thirdparty/xatlas/xatlas.cpp b/thirdparty/xatlas/xatlas.cpp
index 43aec33a9f..9f66ae0067 100644
--- a/thirdparty/xatlas/xatlas.cpp
+++ b/thirdparty/xatlas/xatlas.cpp
@@ -33,19 +33,25 @@ https://github.com/brandonpelfrey/Fast-BVH
MIT License
Copyright (c) 2012 Brandon Pelfrey
*/
-#include <atomic>
-#include <condition_variable>
-#include <mutex>
-#include <thread>
+#include "xatlas.h"
+#ifndef XATLAS_C_API
+#define XATLAS_C_API 0
+#endif
+#if XATLAS_C_API
+#include "xatlas_c.h"
+#endif
#include <assert.h>
#include <float.h> // FLT_MAX
#include <limits.h>
#include <math.h>
+#include <atomic>
+#include <condition_variable>
+#include <mutex>
+#include <thread>
#define __STDC_LIMIT_MACROS
#include <stdint.h>
#include <stdio.h>
#include <string.h>
-#include "xatlas.h"
#ifndef XA_DEBUG
#ifdef NDEBUG
@@ -59,7 +65,7 @@ Copyright (c) 2012 Brandon Pelfrey
#define XA_PROFILE 0
#endif
#if XA_PROFILE
-#include <time.h>
+#include <chrono>
#endif
#ifndef XA_MULTITHREADED
@@ -70,7 +76,10 @@ Copyright (c) 2012 Brandon Pelfrey
#define XA_XSTR(x) XA_STR(x)
#ifndef XA_ASSERT
-#define XA_ASSERT(exp) if (!(exp)) { XA_PRINT_WARNING("\rASSERT: %s %s %d\n", XA_XSTR(exp), __FILE__, __LINE__); }
+#define XA_ASSERT(exp) \
+ if (!(exp)) { \
+ XA_PRINT_WARNING("\rASSERT: %s %s %d\n", XA_XSTR(exp), __FILE__, __LINE__); \
+ }
#endif
#ifndef XA_DEBUG_ASSERT
@@ -78,13 +87,13 @@ Copyright (c) 2012 Brandon Pelfrey
#endif
#ifndef XA_PRINT
-#define XA_PRINT(...) \
+#define XA_PRINT(...) \
if (xatlas::internal::s_print && xatlas::internal::s_printVerbose) \
xatlas::internal::s_print(__VA_ARGS__);
#endif
#ifndef XA_PRINT_WARNING
-#define XA_PRINT_WARNING(...) \
+#define XA_PRINT_WARNING(...) \
if (xatlas::internal::s_print) \
xatlas::internal::s_print(__VA_ARGS__);
#endif
@@ -116,9 +125,9 @@ Copyright (c) 2012 Brandon Pelfrey
#define XA_MERGE_CHARTS 1
#define XA_MERGE_CHARTS_MIN_NORMAL_DEVIATION 0.5f
#define XA_RECOMPUTE_CHARTS 1
-#define XA_CLOSE_HOLES_CHECK_EDGE_INTERSECTION 0
-#define XA_FIX_INTERNAL_BOUNDARY_LOOPS 1
-#define XA_PRINT_CHART_WARNINGS 0
+#define XA_CHECK_PARAM_WINDING 0
+#define XA_CHECK_PIECEWISE_CHART_QUALITY 0
+#define XA_CHECK_T_JUNCTIONS 0
#define XA_DEBUG_HEAP 0
#define XA_DEBUG_SINGLE_CHART 0
@@ -131,25 +140,19 @@ Copyright (c) 2012 Brandon Pelfrey
#define XA_DEBUG_EXPORT_OBJ_CHART_GROUPS 0
#define XA_DEBUG_EXPORT_OBJ_PLANAR_REGIONS 0
#define XA_DEBUG_EXPORT_OBJ_CHARTS 0
-#define XA_DEBUG_EXPORT_OBJ_BEFORE_FIX_TJUNCTION 0
-#define XA_DEBUG_EXPORT_OBJ_CLOSE_HOLES_ERROR 0
+#define XA_DEBUG_EXPORT_OBJ_TJUNCTION 0 // XA_CHECK_T_JUNCTIONS must also be set
#define XA_DEBUG_EXPORT_OBJ_CHARTS_AFTER_PARAMETERIZATION 0
#define XA_DEBUG_EXPORT_OBJ_INVALID_PARAMETERIZATION 0
#define XA_DEBUG_EXPORT_OBJ_RECOMPUTED_CHARTS 0
-#define XA_DEBUG_EXPORT_OBJ (0 \
- || XA_DEBUG_EXPORT_OBJ_FACE_GROUPS \
- || XA_DEBUG_EXPORT_OBJ_CHART_GROUPS \
- || XA_DEBUG_EXPORT_OBJ_PLANAR_REGIONS \
- || XA_DEBUG_EXPORT_OBJ_CHARTS \
- || XA_DEBUG_EXPORT_OBJ_BEFORE_FIX_TJUNCTION \
- || XA_DEBUG_EXPORT_OBJ_CLOSE_HOLES_ERROR \
- || XA_DEBUG_EXPORT_OBJ_CHARTS_AFTER_PARAMETERIZATION \
- || XA_DEBUG_EXPORT_OBJ_INVALID_PARAMETERIZATION \
- || XA_DEBUG_EXPORT_OBJ_RECOMPUTED_CHARTS)
+#define XA_DEBUG_EXPORT_OBJ (0 || XA_DEBUG_EXPORT_OBJ_FACE_GROUPS || XA_DEBUG_EXPORT_OBJ_CHART_GROUPS || XA_DEBUG_EXPORT_OBJ_PLANAR_REGIONS || XA_DEBUG_EXPORT_OBJ_CHARTS || XA_DEBUG_EXPORT_OBJ_TJUNCTION || XA_DEBUG_EXPORT_OBJ_CHARTS_AFTER_PARAMETERIZATION || XA_DEBUG_EXPORT_OBJ_INVALID_PARAMETERIZATION || XA_DEBUG_EXPORT_OBJ_RECOMPUTED_CHARTS)
#ifdef _MSC_VER
-#define XA_FOPEN(_file, _filename, _mode) { if (fopen_s(&_file, _filename, _mode) != 0) _file = NULL; }
+#define XA_FOPEN(_file, _filename, _mode) \
+ { \
+ if (fopen_s(&_file, _filename, _mode) != 0) \
+ _file = NULL; \
+ }
#define XA_SPRINTF(_buffer, _size, _format, ...) sprintf_s(_buffer, _size, _format, __VA_ARGS__)
#else
#define XA_FOPEN(_file, _filename, _mode) _file = fopen(_filename, _mode)
@@ -165,74 +168,76 @@ static PrintFunc s_print = printf;
static bool s_printVerbose = false;
#if XA_PROFILE
-#define XA_PROFILE_START(var) const clock_t var##Start = clock();
-#define XA_PROFILE_END(var) internal::s_profile.var += clock() - var##Start;
-#define XA_PROFILE_PRINT_AND_RESET(label, var) XA_PRINT("%s%.2f seconds (%g ms)\n", label, internal::clockToSeconds(internal::s_profile.var), internal::clockToMs(internal::s_profile.var)); internal::s_profile.var = 0;
+typedef uint64_t Duration;
+
+#define XA_PROFILE_START(var) const std::chrono::time_point<std::chrono::high_resolution_clock> var##Start = std::chrono::high_resolution_clock::now();
+#define XA_PROFILE_END(var) internal::s_profile.var += uint64_t(std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - var##Start).count());
+#define XA_PROFILE_PRINT_AND_RESET(label, var) \
+ XA_PRINT("%s%.2f seconds (%g ms)\n", label, internal::durationToSeconds(internal::s_profile.var), internal::durationToMs(internal::s_profile.var)); \
+ internal::s_profile.var = 0u;
#define XA_PROFILE_ALLOC 0
-struct ProfileData
-{
+struct ProfileData {
#if XA_PROFILE_ALLOC
- std::atomic<clock_t> alloc;
+ std::atomic<Duration> alloc;
#endif
- clock_t addMeshReal;
- clock_t addMeshCopyData;
- std::atomic<clock_t> addMeshThread;
- std::atomic<clock_t> addMeshCreateColocals;
- clock_t computeChartsReal;
- std::atomic<clock_t> computeChartsThread;
- std::atomic<clock_t> createFaceGroups;
- std::atomic<clock_t> extractInvalidMeshGeometry;
- std::atomic<clock_t> chartGroupComputeChartsReal;
- std::atomic<clock_t> chartGroupComputeChartsThread;
- std::atomic<clock_t> createChartGroupMesh;
- std::atomic<clock_t> createChartGroupMeshColocals;
- std::atomic<clock_t> createChartGroupMeshBoundaries;
- std::atomic<clock_t> buildAtlas;
- std::atomic<clock_t> buildAtlasInit;
- std::atomic<clock_t> planarCharts;
- std::atomic<clock_t> clusteredCharts;
- std::atomic<clock_t> clusteredChartsPlaceSeeds;
- std::atomic<clock_t> clusteredChartsPlaceSeedsBoundaryIntersection;
- std::atomic<clock_t> clusteredChartsRelocateSeeds;
- std::atomic<clock_t> clusteredChartsReset;
- std::atomic<clock_t> clusteredChartsGrow;
- std::atomic<clock_t> clusteredChartsGrowBoundaryIntersection;
- std::atomic<clock_t> clusteredChartsMerge;
- std::atomic<clock_t> clusteredChartsFillHoles;
- std::atomic<clock_t> copyChartFaces;
- clock_t parameterizeChartsReal;
- std::atomic<clock_t> parameterizeChartsThread;
- std::atomic<clock_t> createChartMesh;
- std::atomic<clock_t> fixChartMeshTJunctions;
- std::atomic<clock_t> closeChartMeshHoles;
- std::atomic<clock_t> parameterizeChartsOrthogonal;
- std::atomic<clock_t> parameterizeChartsLSCM;
- std::atomic<clock_t> parameterizeChartsRecompute;
- std::atomic<clock_t> parameterizeChartsPiecewise;
- std::atomic<clock_t> parameterizeChartsPiecewiseBoundaryIntersection;
- std::atomic<clock_t> parameterizeChartsEvaluateQuality;
- clock_t packCharts;
- clock_t packChartsAddCharts;
- std::atomic<clock_t> packChartsAddChartsThread;
- std::atomic<clock_t> packChartsAddChartsRestoreTexcoords;
- clock_t packChartsRasterize;
- clock_t packChartsDilate;
- clock_t packChartsFindLocation;
- clock_t packChartsBlit;
- clock_t buildOutputMeshes;
+ std::chrono::time_point<std::chrono::high_resolution_clock> addMeshRealStart;
+ Duration addMeshReal;
+ Duration addMeshCopyData;
+ std::atomic<Duration> addMeshThread;
+ std::atomic<Duration> addMeshCreateColocals;
+ Duration computeChartsReal;
+ std::atomic<Duration> computeChartsThread;
+ std::atomic<Duration> createFaceGroups;
+ std::atomic<Duration> extractInvalidMeshGeometry;
+ std::atomic<Duration> chartGroupComputeChartsReal;
+ std::atomic<Duration> chartGroupComputeChartsThread;
+ std::atomic<Duration> createChartGroupMesh;
+ std::atomic<Duration> createChartGroupMeshColocals;
+ std::atomic<Duration> createChartGroupMeshBoundaries;
+ std::atomic<Duration> buildAtlas;
+ std::atomic<Duration> buildAtlasInit;
+ std::atomic<Duration> planarCharts;
+ std::atomic<Duration> originalUvCharts;
+ std::atomic<Duration> clusteredCharts;
+ std::atomic<Duration> clusteredChartsPlaceSeeds;
+ std::atomic<Duration> clusteredChartsPlaceSeedsBoundaryIntersection;
+ std::atomic<Duration> clusteredChartsRelocateSeeds;
+ std::atomic<Duration> clusteredChartsReset;
+ std::atomic<Duration> clusteredChartsGrow;
+ std::atomic<Duration> clusteredChartsGrowBoundaryIntersection;
+ std::atomic<Duration> clusteredChartsMerge;
+ std::atomic<Duration> clusteredChartsFillHoles;
+ std::atomic<Duration> copyChartFaces;
+ std::atomic<Duration> createChartMeshAndParameterizeReal;
+ std::atomic<Duration> createChartMeshAndParameterizeThread;
+ std::atomic<Duration> createChartMesh;
+ std::atomic<Duration> parameterizeCharts;
+ std::atomic<Duration> parameterizeChartsOrthogonal;
+ std::atomic<Duration> parameterizeChartsLSCM;
+ std::atomic<Duration> parameterizeChartsRecompute;
+ std::atomic<Duration> parameterizeChartsPiecewise;
+ std::atomic<Duration> parameterizeChartsPiecewiseBoundaryIntersection;
+ std::atomic<Duration> parameterizeChartsEvaluateQuality;
+ Duration packCharts;
+ Duration packChartsAddCharts;
+ std::atomic<Duration> packChartsAddChartsThread;
+ std::atomic<Duration> packChartsAddChartsRestoreTexcoords;
+ Duration packChartsRasterize;
+ Duration packChartsDilate;
+ Duration packChartsFindLocation;
+ Duration packChartsBlit;
+ Duration buildOutputMeshes;
};
static ProfileData s_profile;
-static double clockToMs(clock_t c)
-{
- return c * 1000.0 / CLOCKS_PER_SEC;
+static double durationToMs(Duration c) {
+ return (double)c * 0.001;
}
-static double clockToSeconds(clock_t c)
-{
- return c / (double)CLOCKS_PER_SEC;
+static double durationToSeconds(Duration c) {
+ return (double)c * 0.000001;
}
#else
#define XA_PROFILE_START(var)
@@ -241,10 +246,8 @@ static double clockToSeconds(clock_t c)
#define XA_PROFILE_ALLOC 0
#endif
-struct MemTag
-{
- enum
- {
+struct MemTag {
+ enum {
Default,
BitImage,
BVH,
@@ -267,8 +270,7 @@ struct MemTag
};
#if XA_DEBUG_HEAP
-struct AllocHeader
-{
+struct AllocHeader {
size_t size;
const char *file;
int line;
@@ -281,11 +283,10 @@ struct AllocHeader
static std::mutex s_allocMutex;
static AllocHeader *s_allocRoot = nullptr;
static size_t s_allocTotalCount = 0, s_allocTotalSize = 0, s_allocPeakSize = 0, s_allocCount[MemTag::Count] = { 0 }, s_allocTotalTagSize[MemTag::Count] = { 0 }, s_allocPeakTagSize[MemTag::Count] = { 0 };
-static uint32_t s_allocId =0 ;
+static uint32_t s_allocId = 0;
static constexpr uint32_t kAllocRedzone = 0x12345678;
-static void *Realloc(void *ptr, size_t size, int tag, const char *file, int line)
-{
+static void *Realloc(void *ptr, size_t size, int tag, const char *file, int line) {
std::unique_lock<std::mutex> lock(s_allocMutex);
if (!size && !ptr)
return nullptr;
@@ -346,8 +347,7 @@ static void *Realloc(void *ptr, size_t size, int tag, const char *file, int line
return newPtr + sizeof(AllocHeader);
}
-static void ReportLeaks()
-{
+static void ReportLeaks() {
printf("Checking for memory leaks...\n");
bool anyLeaks = false;
AllocHeader *header = s_allocRoot;
@@ -375,8 +375,7 @@ static void ReportLeaks()
s_allocTotalTagSize[i] = s_allocPeakTagSize[i] = 0;
}
-static void PrintMemoryUsage()
-{
+static void PrintMemoryUsage() {
XA_PRINT("Total allocations: %zu\n", s_allocTotalCount);
XA_PRINT("Memory usage: %0.2fMB current, %0.2fMB peak\n", internal::s_allocTotalSize / 1024.0f / 1024.0f, internal::s_allocPeakSize / 1024.0f / 1024.0f);
static const char *labels[] = { // Sync with MemTag
@@ -405,8 +404,7 @@ static void PrintMemoryUsage()
#define XA_PRINT_MEM_USAGE internal::PrintMemoryUsage();
#else
-static void *Realloc(void *ptr, size_t size, int /*tag*/, const char * /*file*/, int /*line*/)
-{
+static void *Realloc(void *ptr, size_t size, int /*tag*/, const char * /*file*/, int /*line*/) {
if (size == 0 && !ptr)
return nullptr;
if (size == 0 && s_free) {
@@ -432,89 +430,75 @@ static constexpr float kEpsilon = 0.0001f;
static constexpr float kAreaEpsilon = FLT_EPSILON;
static constexpr float kNormalEpsilon = 0.001f;
-static int align(int x, int a)
-{
+static int align(int x, int a) {
return (x + a - 1) & ~(a - 1);
}
template <typename T>
-static T max(const T &a, const T &b)
-{
+static T max(const T &a, const T &b) {
return a > b ? a : b;
}
template <typename T>
-static T min(const T &a, const T &b)
-{
+static T min(const T &a, const T &b) {
return a < b ? a : b;
}
template <typename T>
-static T max3(const T &a, const T &b, const T &c)
-{
+static T max3(const T &a, const T &b, const T &c) {
return max(a, max(b, c));
}
/// Return the maximum of the three arguments.
template <typename T>
-static T min3(const T &a, const T &b, const T &c)
-{
+static T min3(const T &a, const T &b, const T &c) {
return min(a, min(b, c));
}
/// Clamp between two values.
template <typename T>
-static T clamp(const T &x, const T &a, const T &b)
-{
+static T clamp(const T &x, const T &a, const T &b) {
return min(max(x, a), b);
}
template <typename T>
-static void swap(T &a, T &b)
-{
+static void swap(T &a, T &b) {
T temp = a;
a = b;
b = temp;
}
-union FloatUint32
-{
+union FloatUint32 {
float f;
uint32_t u;
};
-static bool isFinite(float f)
-{
+static bool isFinite(float f) {
FloatUint32 fu;
fu.f = f;
return fu.u != 0x7F800000u && fu.u != 0x7F800001u;
}
-static bool isNan(float f)
-{
+static bool isNan(float f) {
return f != f;
}
// Robust floating point comparisons:
// http://realtimecollisiondetection.net/blog/?p=89
-static bool equal(const float f0, const float f1, const float epsilon)
-{
+static bool equal(const float f0, const float f1, const float epsilon) {
//return fabs(f0-f1) <= epsilon;
return fabs(f0 - f1) <= epsilon * max3(1.0f, fabsf(f0), fabsf(f1));
}
-static int ftoi_ceil(float val)
-{
+static int ftoi_ceil(float val) {
return (int)ceilf(val);
}
-static bool isZero(const float f, const float epsilon)
-{
+static bool isZero(const float f, const float epsilon) {
return fabs(f) <= epsilon;
}
-static float square(float f)
-{
+static float square(float f) {
return f * f;
}
@@ -524,9 +508,8 @@ static float square(float f)
* @note isPowerOfTwo(x) == true -> nextPowerOfTwo(x) == x
* @note nextPowerOfTwo(x) = 2 << log2(x-1)
*/
-static uint32_t nextPowerOfTwo(uint32_t x)
-{
- XA_DEBUG_ASSERT( x != 0 );
+static uint32_t nextPowerOfTwo(uint32_t x) {
+ XA_DEBUG_ASSERT(x != 0);
// On modern CPUs this is supposed to be as fast as using the bsr instruction.
x--;
x |= x >> 1;
@@ -537,38 +520,34 @@ static uint32_t nextPowerOfTwo(uint32_t x)
return x + 1;
}
-class Vector2
-{
+class Vector2 {
public:
Vector2() {}
- explicit Vector2(float f) : x(f), y(f) {}
- Vector2(float x, float y): x(x), y(y) {}
+ explicit Vector2(float f) :
+ x(f), y(f) {}
+ Vector2(float _x, float _y) :
+ x(_x), y(_y) {}
- Vector2 operator-() const
- {
+ Vector2 operator-() const {
return Vector2(-x, -y);
}
- void operator+=(const Vector2 &v)
- {
+ void operator+=(const Vector2 &v) {
x += v.x;
y += v.y;
}
- void operator-=(const Vector2 &v)
- {
+ void operator-=(const Vector2 &v) {
x -= v.x;
y -= v.y;
}
- void operator*=(float s)
- {
+ void operator*=(float s) {
x *= s;
y *= s;
}
- void operator*=(const Vector2 &v)
- {
+ void operator*=(const Vector2 &v) {
x *= v.x;
y *= v.y;
}
@@ -576,13 +555,11 @@ public:
float x, y;
};
-static bool operator==(const Vector2 &a, const Vector2 &b)
-{
+static bool operator==(const Vector2 &a, const Vector2 &b) {
return a.x == b.x && a.y == b.y;
}
-static bool operator!=(const Vector2 &a, const Vector2 &b)
-{
+static bool operator!=(const Vector2 &a, const Vector2 &b) {
return a.x != b.x || a.y != b.y;
}
@@ -591,78 +568,64 @@ static bool operator!=(const Vector2 &a, const Vector2 &b)
return Vector2(a.x + b.x, a.y + b.y);
}*/
-static Vector2 operator-(const Vector2 &a, const Vector2 &b)
-{
+static Vector2 operator-(const Vector2 &a, const Vector2 &b) {
return Vector2(a.x - b.x, a.y - b.y);
}
-static Vector2 operator*(const Vector2 &v, float s)
-{
+static Vector2 operator*(const Vector2 &v, float s) {
return Vector2(v.x * s, v.y * s);
}
-static float dot(const Vector2 &a, const Vector2 &b)
-{
+static float dot(const Vector2 &a, const Vector2 &b) {
return a.x * b.x + a.y * b.y;
}
-static float lengthSquared(const Vector2 &v)
-{
+static float lengthSquared(const Vector2 &v) {
return v.x * v.x + v.y * v.y;
}
-static float length(const Vector2 &v)
-{
+static float length(const Vector2 &v) {
return sqrtf(lengthSquared(v));
}
#if XA_DEBUG
-static bool isNormalized(const Vector2 &v, float epsilon = kNormalEpsilon)
-{
+static bool isNormalized(const Vector2 &v, float epsilon = kNormalEpsilon) {
return equal(length(v), 1, epsilon);
}
#endif
-static Vector2 normalize(const Vector2 &v, float epsilon)
-{
- float l = length(v);
- XA_DEBUG_ASSERT(!isZero(l, epsilon));
- XA_UNUSED(epsilon);
- Vector2 n = v * (1.0f / l);
+static Vector2 normalize(const Vector2 &v) {
+ const float l = length(v);
+ XA_DEBUG_ASSERT(l > 0.0f); // Never negative.
+ const Vector2 n = v * (1.0f / l);
XA_DEBUG_ASSERT(isNormalized(n));
return n;
}
-static Vector2 normalizeSafe(const Vector2 &v, const Vector2 &fallback, float epsilon)
-{
- float l = length(v);
- if (isZero(l, epsilon))
- return fallback;
- return v * (1.0f / l);
+static Vector2 normalizeSafe(const Vector2 &v, const Vector2 &fallback) {
+ const float l = length(v);
+ if (l > 0.0f) // Never negative.
+ return v * (1.0f / l);
+ return fallback;
}
-static bool equal(const Vector2 &v1, const Vector2 &v2, float epsilon)
-{
+static bool equal(const Vector2 &v1, const Vector2 &v2, float epsilon) {
return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon);
}
-static Vector2 min(const Vector2 &a, const Vector2 &b)
-{
+static Vector2 min(const Vector2 &a, const Vector2 &b) {
return Vector2(min(a.x, b.x), min(a.y, b.y));
}
-static Vector2 max(const Vector2 &a, const Vector2 &b)
-{
+static Vector2 max(const Vector2 &a, const Vector2 &b) {
return Vector2(max(a.x, b.x), max(a.y, b.y));
}
-static bool isFinite(const Vector2 &v)
-{
+static bool isFinite(const Vector2 &v) {
return isFinite(v.x) && isFinite(v.y);
}
-static float triangleArea(const Vector2 &a, const Vector2 &b, const Vector2 &c)
-{
+static float triangleArea(const Vector2 &a, const Vector2 &b, const Vector2 &c) {
// IC: While it may be appealing to use the following expression:
//return (c.x * a.y + a.x * b.y + b.x * c.y - b.x * a.y - c.x * b.y - a.x * c.y) * 0.5f;
// That's actually a terrible idea. Small triangles far from the origin can end up producing fairly large floating point
@@ -676,8 +639,7 @@ static float triangleArea(const Vector2 &a, const Vector2 &b, const Vector2 &c)
return (v0.x * v1.y - v0.y * v1.x) * 0.5f;
}
-static bool linesIntersect(const Vector2 &a1, const Vector2 &a2, const Vector2 &b1, const Vector2 &b2, float epsilon)
-{
+static bool linesIntersect(const Vector2 &a1, const Vector2 &a2, const Vector2 &b1, const Vector2 &b2, float epsilon) {
const Vector2 v0 = a2 - a1;
const Vector2 v1 = b2 - b1;
const float denom = -v1.x * v0.y + v0.x * v1.y;
@@ -685,76 +647,70 @@ static bool linesIntersect(const Vector2 &a1, const Vector2 &a2, const Vector2 &
return false;
const float s = (-v0.y * (a1.x - b1.x) + v0.x * (a1.y - b1.y)) / denom;
if (s > epsilon && s < 1.0f - epsilon) {
- const float t = ( v1.x * (a1.y - b1.y) - v1.y * (a1.x - b1.x)) / denom;
+ const float t = (v1.x * (a1.y - b1.y) - v1.y * (a1.x - b1.x)) / denom;
return t > epsilon && t < 1.0f - epsilon;
}
return false;
}
-struct Vector2i
-{
+struct Vector2i {
Vector2i() {}
- Vector2i(int32_t x, int32_t y) : x(x), y(y) {}
+ Vector2i(int32_t _x, int32_t _y) :
+ x(_x), y(_y) {}
int32_t x, y;
};
-class Vector3
-{
+class Vector3 {
public:
Vector3() {}
- explicit Vector3(float f) : x(f), y(f), z(f) {}
- Vector3(float x, float y, float z) : x(x), y(y), z(z) {}
- Vector3(const Vector2 &v, float z) : x(v.x), y(v.y), z(z) {}
-
- Vector2 xy() const
- {
+ explicit Vector3(float f) :
+ x(f), y(f), z(f) {}
+ Vector3(float _x, float _y, float _z) :
+ x(_x), y(_y), z(_z) {}
+ Vector3(const Vector2 &v, float _z) :
+ x(v.x), y(v.y), z(_z) {}
+
+ Vector2 xy() const {
return Vector2(x, y);
}
- Vector3 operator-() const
- {
+ Vector3 operator-() const {
return Vector3(-x, -y, -z);
}
- void operator+=(const Vector3 &v)
- {
+ void operator+=(const Vector3 &v) {
x += v.x;
y += v.y;
z += v.z;
}
- void operator-=(const Vector3 &v)
- {
+ void operator-=(const Vector3 &v) {
x -= v.x;
y -= v.y;
z -= v.z;
}
- void operator*=(float s)
- {
+ void operator*=(float s) {
x *= s;
y *= s;
z *= s;
}
- void operator/=(float s)
- {
+ void operator/=(float s) {
float is = 1.0f / s;
x *= is;
y *= is;
z *= is;
}
- void operator*=(const Vector3 &v)
- {
+ void operator*=(const Vector3 &v) {
x *= v.x;
y *= v.y;
z *= v.z;
}
- void operator/=(const Vector3 &v)
- {
+ void operator/=(const Vector3 &v) {
x /= v.x;
y /= v.y;
z /= v.z;
@@ -763,260 +719,151 @@ public:
float x, y, z;
};
-static Vector3 operator+(const Vector3 &a, const Vector3 &b)
-{
+static Vector3 operator+(const Vector3 &a, const Vector3 &b) {
return Vector3(a.x + b.x, a.y + b.y, a.z + b.z);
}
-static Vector3 operator-(const Vector3 &a, const Vector3 &b)
-{
+static Vector3 operator-(const Vector3 &a, const Vector3 &b) {
return Vector3(a.x - b.x, a.y - b.y, a.z - b.z);
}
-static Vector3 cross(const Vector3 &a, const Vector3 &b)
-{
+static bool operator==(const Vector3 &a, const Vector3 &b) {
+ return a.x == b.x && a.y == b.y && a.z == b.z;
+}
+
+static Vector3 cross(const Vector3 &a, const Vector3 &b) {
return Vector3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x);
}
-static Vector3 operator*(const Vector3 &v, float s)
-{
+static Vector3 operator*(const Vector3 &v, float s) {
return Vector3(v.x * s, v.y * s, v.z * s);
}
-static Vector3 operator/(const Vector3 &v, float s)
-{
+static Vector3 operator/(const Vector3 &v, float s) {
return v * (1.0f / s);
}
-static float dot(const Vector3 &a, const Vector3 &b)
-{
+static float dot(const Vector3 &a, const Vector3 &b) {
return a.x * b.x + a.y * b.y + a.z * b.z;
}
-static float lengthSquared(const Vector3 &v)
-{
+static float lengthSquared(const Vector3 &v) {
return v.x * v.x + v.y * v.y + v.z * v.z;
}
-static float length(const Vector3 &v)
-{
+static float length(const Vector3 &v) {
return sqrtf(lengthSquared(v));
}
-static bool isNormalized(const Vector3 &v, float epsilon = kNormalEpsilon)
-{
- return equal(length(v), 1, epsilon);
+static bool isNormalized(const Vector3 &v, float epsilon = kNormalEpsilon) {
+ return equal(length(v), 1.0f, epsilon);
}
-static Vector3 normalize(const Vector3 &v, float epsilon)
-{
- float l = length(v);
- XA_DEBUG_ASSERT(!isZero(l, epsilon));
- XA_UNUSED(epsilon);
- Vector3 n = v * (1.0f / l);
+static Vector3 normalize(const Vector3 &v) {
+ const float l = length(v);
+ XA_DEBUG_ASSERT(l > 0.0f); // Never negative.
+ const Vector3 n = v * (1.0f / l);
XA_DEBUG_ASSERT(isNormalized(n));
return n;
}
-static Vector3 normalizeSafe(const Vector3 &v, const Vector3 &fallback, float epsilon)
-{
- float l = length(v);
- if (isZero(l, epsilon)) {
- return fallback;
- }
- return v * (1.0f / l);
+static Vector3 normalizeSafe(const Vector3 &v, const Vector3 &fallback) {
+ const float l = length(v);
+ if (l > 0.0f) // Never negative.
+ return v * (1.0f / l);
+ return fallback;
}
-static bool equal(const Vector3 &v0, const Vector3 &v1, float epsilon)
-{
+static bool equal(const Vector3 &v0, const Vector3 &v1, float epsilon) {
return fabs(v0.x - v1.x) <= epsilon && fabs(v0.y - v1.y) <= epsilon && fabs(v0.z - v1.z) <= epsilon;
}
-static Vector3 min(const Vector3 &a, const Vector3 &b)
-{
+static Vector3 min(const Vector3 &a, const Vector3 &b) {
return Vector3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));
}
-static Vector3 max(const Vector3 &a, const Vector3 &b)
-{
+static Vector3 max(const Vector3 &a, const Vector3 &b) {
return Vector3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));
}
#if XA_DEBUG
-bool isFinite(const Vector3 &v)
-{
+bool isFinite(const Vector3 &v) {
return isFinite(v.x) && isFinite(v.y) && isFinite(v.z);
}
#endif
-struct Extents2
-{
+struct Extents2 {
Vector2 min, max;
Extents2() {}
-
- Extents2(Vector2 p1, Vector2 p2)
- {
+
+ Extents2(Vector2 p1, Vector2 p2) {
min = xatlas::internal::min(p1, p2);
max = xatlas::internal::max(p1, p2);
}
- void reset()
- {
+ void reset() {
min.x = min.y = FLT_MAX;
max.x = max.y = -FLT_MAX;
}
- void add(Vector2 p)
- {
+ void add(Vector2 p) {
min = xatlas::internal::min(min, p);
max = xatlas::internal::max(max, p);
}
- Vector2 midpoint() const
- {
+ Vector2 midpoint() const {
return Vector2(min.x + (max.x - min.x) * 0.5f, min.y + (max.y - min.y) * 0.5f);
}
- static bool intersect(const Extents2 &e1, const Extents2 &e2)
- {
+ static bool intersect(const Extents2 &e1, const Extents2 &e2) {
return e1.min.x <= e2.max.x && e1.max.x >= e2.min.x && e1.min.y <= e2.max.y && e1.max.y >= e2.min.y;
}
};
-struct Plane
-{
- Plane() = default;
-
- Plane(const Vector3 &p1, const Vector3 &p2, const Vector3 &p3)
- {
- normal = cross(p2 - p1, p3 - p1);
- dist = dot(normal, p1);
- }
-
- float distance(const Vector3 &p) const
- {
- return dot(normal, p) - dist;
- }
-
- void normalize()
- {
- const float len = length(normal);
- if (len > 0.0f) {
- const float il = 1.0f / len;
- normal *= il;
- dist *= il;
- }
- }
-
- Vector3 normal;
- float dist;
-};
-
-static bool lineIntersectsPoint(const Vector3 &point, const Vector3 &lineStart, const Vector3 &lineEnd, float *t, float epsilon)
-{
- float tt;
- if (!t)
- t = &tt;
- *t = 0.0f;
- if (equal(lineStart, point, epsilon) || equal(lineEnd, point, epsilon))
- return false; // Vertex lies on either line vertices.
- const Vector3 v01 = point - lineStart;
- const Vector3 v21 = lineEnd - lineStart;
- const float l = length(v21);
- const float d = length(cross(v01, v21)) / l;
- if (!isZero(d, epsilon))
- return false;
- *t = dot(v01, v21) / (l * l);
- return *t > kEpsilon && *t < 1.0f - kEpsilon;
-}
-
-static bool sameSide(const Vector3 &p1, const Vector3 &p2, const Vector3 &a, const Vector3 &b)
-{
- const Vector3 &ab = b - a;
- return dot(cross(ab, p1 - a), cross(ab, p2 - a)) >= 0.0f;
-}
-
-// http://blackpawn.com/texts/pointinpoly/default.html
-static bool pointInTriangle(const Vector3 &p, const Vector3 &a, const Vector3 &b, const Vector3 &c)
-{
- return sameSide(p, a, b, c) && sameSide(p, b, a, c) && sameSide(p, c, a, b);
-}
-
-#if XA_CLOSE_HOLES_CHECK_EDGE_INTERSECTION
-// https://en.wikipedia.org/wiki/M%C3%B6ller%E2%80%93Trumbore_intersection_algorithm
-static bool rayIntersectsTriangle(const Vector3 &rayOrigin, const Vector3 &rayDir, const Vector3 *tri, float *t)
-{
- *t = 0.0f;
- const Vector3 &edge1 = tri[1] - tri[0];
- const Vector3 &edge2 = tri[2] - tri[0];
- const Vector3 h = cross(rayDir, edge2);
- const float a = dot(edge1, h);
- if (a > -kEpsilon && a < kEpsilon)
- return false; // This ray is parallel to this triangle.
- const float f = 1.0f / a;
- const Vector3 s = rayOrigin - tri[0];
- const float u = f * dot(s, h);
- if (u < 0.0f || u > 1.0f)
- return false;
- const Vector3 q = cross(s, edge1);
- const float v = f * dot(rayDir, q);
- if (v < 0.0f || u + v > 1.0f)
- return false;
- // At this stage we can compute t to find out where the intersection point is on the line.
- *t = f * dot(edge2, q);
- if (*t > kEpsilon && *t < 1.0f - kEpsilon)
- return true;
- // This means that there is a line intersection but not a ray intersection.
- return false;
-}
-#endif
-
// From Fast-BVH
-struct AABB
-{
- AABB() : min(FLT_MAX, FLT_MAX, FLT_MAX), max(-FLT_MAX, -FLT_MAX, -FLT_MAX) {}
- AABB(const Vector3 &min, const Vector3 &max) : min(min), max(max) { }
- AABB(const Vector3 &p, float radius = 0.0f) : min(p), max(p) { if (radius > 0.0f) expand(radius); }
-
- bool intersect(const AABB &other) const
- {
+struct AABB {
+ AABB() :
+ min(FLT_MAX, FLT_MAX, FLT_MAX), max(-FLT_MAX, -FLT_MAX, -FLT_MAX) {}
+ AABB(const Vector3 &_min, const Vector3 &_max) :
+ min(_min), max(_max) {}
+ AABB(const Vector3 &p, float radius = 0.0f) :
+ min(p), max(p) {
+ if (radius > 0.0f)
+ expand(radius);
+ }
+
+ bool intersect(const AABB &other) const {
return min.x <= other.max.x && max.x >= other.min.x && min.y <= other.max.y && max.y >= other.min.y && min.z <= other.max.z && max.z >= other.min.z;
}
- void expandToInclude(const Vector3 &p)
- {
+ void expandToInclude(const Vector3 &p) {
min = internal::min(min, p);
max = internal::max(max, p);
}
- void expandToInclude(const AABB &aabb)
- {
+ void expandToInclude(const AABB &aabb) {
min = internal::min(min, aabb.min);
max = internal::max(max, aabb.max);
}
- void expand(float amount)
- {
+ void expand(float amount) {
min -= Vector3(amount);
max += Vector3(amount);
}
- Vector3 centroid() const
- {
+ Vector3 centroid() const {
return min + (max - min) * 0.5f;
}
- uint32_t maxDimension() const
- {
+ uint32_t maxDimension() const {
const Vector3 extent = max - min;
uint32_t result = 0;
if (extent.y > extent.x) {
result = 1;
if (extent.z > extent.y)
result = 2;
- }
- else if(extent.z > extent.x)
+ } else if (extent.z > extent.x)
result = 2;
return result;
}
@@ -1024,10 +871,9 @@ struct AABB
Vector3 min, max;
};
-struct ArrayBase
-{
- ArrayBase(uint32_t elementSize, int memTag = MemTag::Default) : buffer(nullptr), elementSize(elementSize), size(0), capacity(0)
- {
+struct ArrayBase {
+ ArrayBase(uint32_t _elementSize, int memTag = MemTag::Default) :
+ buffer(nullptr), elementSize(_elementSize), size(0), capacity(0) {
#if XA_DEBUG_HEAP
this->memTag = memTag;
#else
@@ -1035,31 +881,31 @@ struct ArrayBase
#endif
}
- ~ArrayBase()
- {
+ ~ArrayBase() {
XA_FREE(buffer);
}
- XA_INLINE void clear()
- {
+ XA_INLINE void clear() {
size = 0;
}
- void copyFrom(const uint8_t *data, uint32_t length)
- {
+ void copyFrom(const uint8_t *data, uint32_t length) {
+ XA_DEBUG_ASSERT(data);
+ XA_DEBUG_ASSERT(length > 0);
resize(length, true);
- memcpy(buffer, data, length * elementSize);
+ if (buffer && data && length > 0)
+ memcpy(buffer, data, length * elementSize);
}
- void copyTo(ArrayBase &other) const
- {
+ void copyTo(ArrayBase &other) const {
XA_DEBUG_ASSERT(elementSize == other.elementSize);
+ XA_DEBUG_ASSERT(size > 0);
other.resize(size, true);
- memcpy(other.buffer, buffer, size * elementSize);
+ if (other.buffer && buffer && size > 0)
+ memcpy(other.buffer, buffer, size * elementSize);
}
- void destroy()
- {
+ void destroy() {
size = 0;
XA_FREE(buffer);
buffer = nullptr;
@@ -1068,17 +914,18 @@ struct ArrayBase
}
// Insert the given element at the given index shifting all the elements up.
- void insertAt(uint32_t index, const uint8_t *value)
- {
+ void insertAt(uint32_t index, const uint8_t *value) {
XA_DEBUG_ASSERT(index >= 0 && index <= size);
+ XA_DEBUG_ASSERT(value);
resize(size + 1, false);
- if (index < size - 1)
+ XA_DEBUG_ASSERT(buffer);
+ if (buffer && index < size - 1)
memmove(buffer + elementSize * (index + 1), buffer + elementSize * index, elementSize * (size - 1 - index));
- memcpy(&buffer[index * elementSize], value, elementSize);
+ if (buffer && value)
+ memcpy(&buffer[index * elementSize], value, elementSize);
}
- void moveTo(ArrayBase &other)
- {
+ void moveTo(ArrayBase &other) {
XA_DEBUG_ASSERT(elementSize == other.elementSize);
other.destroy();
other.buffer = buffer;
@@ -1092,55 +939,61 @@ struct ArrayBase
elementSize = size = capacity = 0;
}
- void pop_back()
- {
+ void pop_back() {
XA_DEBUG_ASSERT(size > 0);
resize(size - 1, false);
}
- void push_back(const uint8_t *value)
- {
+ void push_back(const uint8_t *value) {
XA_DEBUG_ASSERT(value < buffer || value >= buffer + size);
+ XA_DEBUG_ASSERT(value);
resize(size + 1, false);
- memcpy(&buffer[(size - 1) * elementSize], value, elementSize);
+ XA_DEBUG_ASSERT(buffer);
+ if (buffer && value)
+ memcpy(&buffer[(size - 1) * elementSize], value, elementSize);
}
- void push_back(const ArrayBase &other)
- {
+ void push_back(const ArrayBase &other) {
XA_DEBUG_ASSERT(elementSize == other.elementSize);
- if (other.size == 0)
- return;
- const uint32_t oldSize = size;
- resize(size + other.size, false);
- memcpy(buffer + oldSize * elementSize, other.buffer, other.size * other.elementSize);
+ if (other.size > 0) {
+ const uint32_t oldSize = size;
+ resize(size + other.size, false);
+ XA_DEBUG_ASSERT(buffer);
+ if (buffer)
+ memcpy(buffer + oldSize * elementSize, other.buffer, other.size * other.elementSize);
+ }
}
// Remove the element at the given index. This is an expensive operation!
- void removeAt(uint32_t index)
- {
+ void removeAt(uint32_t index) {
XA_DEBUG_ASSERT(index >= 0 && index < size);
- if (size != 1)
- memmove(buffer + elementSize * index, buffer + elementSize * (index + 1), elementSize * (size - 1 - index));
- size--;
+ XA_DEBUG_ASSERT(buffer);
+ if (buffer) {
+ if (size > 1)
+ memmove(buffer + elementSize * index, buffer + elementSize * (index + 1), elementSize * (size - 1 - index));
+ if (size > 0)
+ size--;
+ }
}
// Element at index is swapped with the last element, then the array length is decremented.
- void removeAtFast(uint32_t index)
- {
+ void removeAtFast(uint32_t index) {
XA_DEBUG_ASSERT(index >= 0 && index < size);
- if (size != 1 && index != size - 1)
- memcpy(buffer + elementSize * index, buffer + elementSize * (size - 1), elementSize);
- size--;
+ XA_DEBUG_ASSERT(buffer);
+ if (buffer) {
+ if (size > 1 && index != size - 1)
+ memcpy(buffer + elementSize * index, buffer + elementSize * (size - 1), elementSize);
+ if (size > 0)
+ size--;
+ }
}
- void reserve(uint32_t desiredSize)
- {
+ void reserve(uint32_t desiredSize) {
if (desiredSize > capacity)
setArrayCapacity(desiredSize);
}
- void resize(uint32_t newSize, bool exact)
- {
+ void resize(uint32_t newSize, bool exact) {
size = newSize;
if (size > capacity) {
// First allocation is always exact. Otherwise, following allocations grow array to 150% of desired size.
@@ -1153,8 +1006,7 @@ struct ArrayBase
}
}
- void setArrayCapacity(uint32_t newCapacity)
- {
+ void setArrayCapacity(uint32_t newCapacity) {
XA_DEBUG_ASSERT(newCapacity >= size);
if (newCapacity == 0) {
// free the buffer.
@@ -1174,8 +1026,7 @@ struct ArrayBase
}
#if XA_DEBUG_HEAP
- void setMemTag(int _memTag)
- {
+ void setMemTag(int _memTag) {
this->memTag = _memTag;
}
#endif
@@ -1189,28 +1040,27 @@ struct ArrayBase
#endif
};
-template<typename T>
-class Array
-{
+template <typename T>
+class Array {
public:
- Array(int memTag = MemTag::Default) : m_base(sizeof(T), memTag) {}
- Array(const Array&) = delete;
+ Array(int memTag = MemTag::Default) :
+ m_base(sizeof(T), memTag) {}
+ Array(const Array &) = delete;
Array &operator=(const Array &) = delete;
- XA_INLINE const T &operator[](uint32_t index) const
- {
+ XA_INLINE const T &operator[](uint32_t index) const {
XA_DEBUG_ASSERT(index < m_base.size);
+ XA_DEBUG_ASSERT(m_base.buffer);
return ((const T *)m_base.buffer)[index];
}
- XA_INLINE T &operator[](uint32_t index)
- {
+ XA_INLINE T &operator[](uint32_t index) {
XA_DEBUG_ASSERT(index < m_base.size);
+ XA_DEBUG_ASSERT(m_base.buffer);
return ((T *)m_base.buffer)[index];
}
- XA_INLINE const T &back() const
- {
+ XA_INLINE const T &back() const {
XA_DEBUG_ASSERT(!isEmpty());
return ((const T *)m_base.buffer)[m_base.size - 1];
}
@@ -1218,8 +1068,7 @@ public:
XA_INLINE T *begin() { return (T *)m_base.buffer; }
XA_INLINE void clear() { m_base.clear(); }
- bool contains(const T &value) const
- {
+ bool contains(const T &value) const {
for (uint32_t i = 0; i < m_base.size; i++) {
if (((const T *)m_base.buffer)[i] == value)
return true;
@@ -1244,28 +1093,25 @@ public:
void reserve(uint32_t desiredSize) { m_base.reserve(desiredSize); }
void resize(uint32_t newSize) { m_base.resize(newSize, true); }
- void runCtors()
- {
+ void runCtors() {
for (uint32_t i = 0; i < m_base.size; i++)
new (&((T *)m_base.buffer)[i]) T;
}
- void runDtors()
- {
+ void runDtors() {
for (uint32_t i = 0; i < m_base.size; i++)
((T *)m_base.buffer)[i].~T();
}
- void fill(const T &value)
- {
+ void fill(const T &value) {
auto buffer = (T *)m_base.buffer;
for (uint32_t i = 0; i < m_base.size; i++)
buffer[i] = value;
}
- void fillBytes(uint8_t value)
- {
- memset(m_base.buffer, (int)value, m_base.size * m_base.elementSize);
+ void fillBytes(uint8_t value) {
+ if (m_base.buffer && m_base.size > 0)
+ memset(m_base.buffer, (int)value, m_base.size * m_base.elementSize);
}
#if XA_DEBUG_HEAP
@@ -1273,41 +1119,67 @@ public:
#endif
XA_INLINE uint32_t size() const { return m_base.size; }
- XA_INLINE void zeroOutMemory() { memset(m_base.buffer, 0, m_base.elementSize * m_base.size); }
+
+ XA_INLINE void zeroOutMemory() {
+ if (m_base.buffer && m_base.size > 0)
+ memset(m_base.buffer, 0, m_base.elementSize * m_base.size);
+ }
private:
ArrayBase m_base;
};
-template<typename T>
-struct ArrayView
-{
- ArrayView() : data(nullptr), length(0) {}
- ArrayView(Array<T> &a) : data(a.data()), length(a.size()) {}
- ArrayView(T *data, uint32_t length) : data(data), length(length) {}
- ArrayView &operator=(Array<T> &a) { data = a.data(); length = a.size(); return *this; }
- XA_INLINE const T &operator[](uint32_t index) const { XA_DEBUG_ASSERT(index < length); return data[index]; }
+template <typename T>
+struct ArrayView {
+ ArrayView() :
+ data(nullptr), length(0) {}
+ ArrayView(Array<T> &a) :
+ data(a.data()), length(a.size()) {}
+ ArrayView(T *_data, uint32_t _length) :
+ data(_data), length(_length) {}
+ ArrayView &operator=(Array<T> &a) {
+ data = a.data();
+ length = a.size();
+ return *this;
+ }
+ XA_INLINE const T &operator[](uint32_t index) const {
+ XA_DEBUG_ASSERT(index < length);
+ return data[index];
+ }
+ XA_INLINE T &operator[](uint32_t index) {
+ XA_DEBUG_ASSERT(index < length);
+ return data[index];
+ }
T *data;
uint32_t length;
};
-template<typename T>
-struct ConstArrayView
-{
- ConstArrayView() : data(nullptr), length(0) {}
- ConstArrayView(const Array<T> &a) : data(a.data()), length(a.size()) {}
- ConstArrayView(const T *data, uint32_t length) : data(data), length(length) {}
- ConstArrayView &operator=(const Array<T> &a) { data = a.data(); length = a.size(); return *this; }
- XA_INLINE const T &operator[](uint32_t index) const { XA_DEBUG_ASSERT(index < length); return data[index]; }
+template <typename T>
+struct ConstArrayView {
+ ConstArrayView() :
+ data(nullptr), length(0) {}
+ ConstArrayView(const Array<T> &a) :
+ data(a.data()), length(a.size()) {}
+ ConstArrayView(ArrayView<T> av) :
+ data(av.data), length(av.length) {}
+ ConstArrayView(const T *_data, uint32_t _length) :
+ data(_data), length(_length) {}
+ ConstArrayView &operator=(const Array<T> &a) {
+ data = a.data();
+ length = a.size();
+ return *this;
+ }
+ XA_INLINE const T &operator[](uint32_t index) const {
+ XA_DEBUG_ASSERT(index < length);
+ return data[index];
+ }
const T *data;
uint32_t length;
};
/// Basis class to compute tangent space basis, ortogonalizations and to transform vectors from one space to another.
-struct Basis
-{
- XA_NODISCARD static Vector3 computeTangent(const Vector3 &normal)
- {
+struct Basis {
+ XA_NODISCARD static Vector3 computeTangent(const Vector3 &normal) {
XA_ASSERT(isNormalized(normal));
// Choose minimum axis.
Vector3 tangent;
@@ -1319,12 +1191,11 @@ struct Basis
tangent = Vector3(0, 0, 1);
// Ortogonalize
tangent -= normal * dot(normal, tangent);
- tangent = normalize(tangent, kEpsilon);
+ tangent = normalize(tangent);
return tangent;
}
- XA_NODISCARD static Vector3 computeBitangent(const Vector3 &normal, const Vector3 &tangent)
- {
+ XA_NODISCARD static Vector3 computeBitangent(const Vector3 &normal, const Vector3 &tangent) {
return cross(normal, tangent);
}
@@ -1334,42 +1205,36 @@ struct Basis
};
// Simple bit array.
-class BitArray
-{
+class BitArray {
public:
- BitArray() : m_size(0) {}
+ BitArray() :
+ m_size(0) {}
- BitArray(uint32_t sz)
- {
+ BitArray(uint32_t sz) {
resize(sz);
}
- void resize(uint32_t new_size)
- {
+ void resize(uint32_t new_size) {
m_size = new_size;
m_wordArray.resize((m_size + 31) >> 5);
}
- bool get(uint32_t index) const
- {
+ bool get(uint32_t index) const {
XA_DEBUG_ASSERT(index < m_size);
return (m_wordArray[index >> 5] & (1 << (index & 31))) != 0;
}
- void set(uint32_t index)
- {
+ void set(uint32_t index) {
XA_DEBUG_ASSERT(index < m_size);
m_wordArray[index >> 5] |= (1 << (index & 31));
}
- void unset(uint32_t index)
- {
+ void unset(uint32_t index) {
XA_DEBUG_ASSERT(index < m_size);
m_wordArray[index >> 5] &= ~(1 << (index & 31));
}
- void zeroOutMemory()
- {
+ void zeroOutMemory() {
m_wordArray.zeroOutMemory();
}
@@ -1378,13 +1243,13 @@ private:
Array<uint32_t> m_wordArray;
};
-class BitImage
-{
+class BitImage {
public:
- BitImage() : m_width(0), m_height(0), m_rowStride(0), m_data(MemTag::BitImage) {}
+ BitImage() :
+ m_width(0), m_height(0), m_rowStride(0), m_data(MemTag::BitImage) {}
- BitImage(uint32_t w, uint32_t h) : m_width(w), m_height(h), m_data(MemTag::BitImage)
- {
+ BitImage(uint32_t w, uint32_t h) :
+ m_width(w), m_height(h), m_data(MemTag::BitImage) {
m_rowStride = (m_width + 63) >> 6;
m_data.resize(m_rowStride * m_height);
m_data.zeroOutMemory();
@@ -1395,16 +1260,14 @@ public:
uint32_t width() const { return m_width; }
uint32_t height() const { return m_height; }
- void copyTo(BitImage &other)
- {
+ void copyTo(BitImage &other) {
other.m_width = m_width;
other.m_height = m_height;
other.m_rowStride = m_rowStride;
m_data.copyTo(other.m_data);
}
- void resize(uint32_t w, uint32_t h, bool discard)
- {
+ void resize(uint32_t w, uint32_t h, bool discard) {
const uint32_t rowStride = (w + 63) >> 6;
if (discard) {
m_data.resize(rowStride * h);
@@ -1428,28 +1291,24 @@ public:
m_rowStride = rowStride;
}
- bool get(uint32_t x, uint32_t y) const
- {
+ bool get(uint32_t x, uint32_t y) const {
XA_DEBUG_ASSERT(x < m_width && y < m_height);
const uint32_t index = (x >> 6) + y * m_rowStride;
return (m_data[index] & (UINT64_C(1) << (uint64_t(x) & UINT64_C(63)))) != 0;
}
- void set(uint32_t x, uint32_t y)
- {
+ void set(uint32_t x, uint32_t y) {
XA_DEBUG_ASSERT(x < m_width && y < m_height);
const uint32_t index = (x >> 6) + y * m_rowStride;
m_data[index] |= UINT64_C(1) << (uint64_t(x) & UINT64_C(63));
XA_DEBUG_ASSERT(get(x, y));
}
- void zeroOutMemory()
- {
+ void zeroOutMemory() {
m_data.zeroOutMemory();
}
- bool canBlit(const BitImage &image, uint32_t offsetX, uint32_t offsetY) const
- {
+ bool canBlit(const BitImage &image, uint32_t offsetX, uint32_t offsetY) const {
for (uint32_t y = 0; y < image.m_height; y++) {
const uint32_t thisY = y + offsetY;
if (thisY >= m_height)
@@ -1473,8 +1332,7 @@ public:
return true;
}
- void dilate(uint32_t padding)
- {
+ void dilate(uint32_t padding) {
BitImage tmp(m_width, m_height);
for (uint32_t p = 0; p < padding; p++) {
tmp.zeroOutMemory();
@@ -1484,15 +1342,21 @@ public:
if (!b) {
if (x > 0) {
b |= get(x - 1, y);
- if (y > 0) b |= get(x - 1, y - 1);
- if (y < m_height - 1) b |= get(x - 1, y + 1);
+ if (y > 0)
+ b |= get(x - 1, y - 1);
+ if (y < m_height - 1)
+ b |= get(x - 1, y + 1);
}
- if (y > 0) b |= get(x, y - 1);
- if (y < m_height - 1) b |= get(x, y + 1);
+ if (y > 0)
+ b |= get(x, y - 1);
+ if (y < m_height - 1)
+ b |= get(x, y + 1);
if (x < m_width - 1) {
b |= get(x + 1, y);
- if (y > 0) b |= get(x + 1, y - 1);
- if (y < m_height - 1) b |= get(x + 1, y + 1);
+ if (y > 0)
+ b |= get(x + 1, y - 1);
+ if (y < m_height - 1)
+ b |= get(x + 1, y + 1);
}
}
if (b)
@@ -1511,11 +1375,10 @@ private:
};
// From Fast-BVH
-class BVH
-{
+class BVH {
public:
- BVH(const Array<AABB> &objectAabbs, uint32_t leafSize = 4) : m_objectIds(MemTag::BVH), m_nodes(MemTag::BVH)
- {
+ BVH(const Array<AABB> &objectAabbs, uint32_t leafSize = 4) :
+ m_objectIds(MemTag::BVH), m_nodes(MemTag::BVH) {
m_objectAabbs = &objectAabbs;
if (m_objectAabbs->isEmpty())
return;
@@ -1535,7 +1398,7 @@ public:
Node node;
m_nodes.reserve(objectAabbs.size() * 2);
uint32_t nNodes = 0;
- while(stackptr > 0) {
+ while (stackptr > 0) {
// Pop the next item off of the stack
const BuildEntry &bnode = todo[--stackptr];
const uint32_t start = bnode.start;
@@ -1548,7 +1411,7 @@ public:
// Calculate the bounding box for this node
AABB bb(objectAabbs[m_objectIds[start]]);
AABB bc(objectAabbs[m_objectIds[start]].centroid());
- for(uint32_t p = start + 1; p < end; ++p) {
+ for (uint32_t p = start + 1; p < end; ++p) {
bb.expandToInclude(objectAabbs[m_objectIds[p]]);
bc.expandToInclude(objectAabbs[m_objectIds[p]].centroid());
}
@@ -1564,7 +1427,7 @@ public:
m_nodes[bnode.parent].rightOffset--;
// When this is the second touch, this is the right child.
// The right child sets up the offset for the flat tree.
- if (m_nodes[bnode.parent].rightOffset == kTouchedTwice )
+ if (m_nodes[bnode.parent].rightOffset == kTouchedTwice)
m_nodes[bnode.parent].rightOffset = nNodes - 1 - bnode.parent;
}
// If this is a leaf, no need to subdivide.
@@ -1599,21 +1462,20 @@ public:
}
}
- void query(const AABB &queryAabb, Array<uint32_t> &result) const
- {
+ void query(const AABB &queryAabb, Array<uint32_t> &result) const {
result.clear();
// Working set
uint32_t todo[64];
int32_t stackptr = 0;
// "Push" on the root node to the working set
todo[stackptr] = 0;
- while(stackptr >= 0) {
+ while (stackptr >= 0) {
// Pop off the next node to work on.
const int ni = todo[stackptr--];
const Node &node = m_nodes[ni];
// Is leaf -> Intersect
if (node.rightOffset == 0) {
- for(uint32_t o = 0; o < node.nPrims; ++o) {
+ for (uint32_t o = 0; o < node.nPrims; ++o) {
const uint32_t obj = node.start + o;
if (queryAabb.intersect((*m_objectAabbs)[m_objectIds[obj]]))
result.push_back(m_objectIds[obj]);
@@ -1630,14 +1492,12 @@ public:
}
private:
- struct BuildEntry
- {
+ struct BuildEntry {
uint32_t parent; // If non-zero then this is the index of the parent. (used in offsets)
uint32_t start, end; // The range of objects in the object list covered by this node.
};
- struct Node
- {
+ struct Node {
AABB aabb;
uint32_t start, nPrims, rightOffset;
};
@@ -1647,16 +1507,14 @@ private:
Array<Node> m_nodes;
};
-struct Fit
-{
- static bool computeBasis(const Vector3 *points, uint32_t pointsCount, Basis *basis)
- {
- if (computeLeastSquaresNormal(points, pointsCount, &basis->normal)) {
+struct Fit {
+ static bool computeBasis(ConstArrayView<Vector3> points, Basis *basis) {
+ if (computeLeastSquaresNormal(points, &basis->normal)) {
basis->tangent = Basis::computeTangent(basis->normal);
basis->bitangent = Basis::computeBitangent(basis->normal, basis->tangent);
return true;
}
- return computeEigen(points, pointsCount, basis);
+ return computeEigen(points, basis);
}
private:
@@ -1664,21 +1522,20 @@ private:
// Fast, and accurate to within a few degrees.
// Returns None if the points do not span a plane.
// https://www.ilikebigbits.com/2015_03_04_plane_from_points.html
- static bool computeLeastSquaresNormal(const Vector3 *points, uint32_t pointsCount, Vector3 *normal)
- {
- XA_DEBUG_ASSERT(pointsCount >= 3);
- if (pointsCount == 3) {
- *normal = normalize(cross(points[2] - points[0], points[1] - points[0]), kEpsilon);
+ static bool computeLeastSquaresNormal(ConstArrayView<Vector3> points, Vector3 *normal) {
+ XA_DEBUG_ASSERT(points.length >= 3);
+ if (points.length == 3) {
+ *normal = normalize(cross(points[2] - points[0], points[1] - points[0]));
return true;
}
- const float invN = 1.0f / float(pointsCount);
+ const float invN = 1.0f / float(points.length);
Vector3 centroid(0.0f);
- for (uint32_t i = 0; i < pointsCount; i++)
+ for (uint32_t i = 0; i < points.length; i++)
centroid += points[i];
centroid *= invN;
// Calculate full 3x3 covariance matrix, excluding symmetries:
float xx = 0.0f, xy = 0.0f, xz = 0.0f, yy = 0.0f, yz = 0.0f, zz = 0.0f;
- for (uint32_t i = 0; i < pointsCount; i++) {
+ for (uint32_t i = 0; i < points.length; i++) {
Vector3 r = points[i] - centroid;
xx += r.x * r.x;
xy += r.x * r.y;
@@ -1730,7 +1587,7 @@ private:
// Pick path with best conditioning:
Vector3 dir(0.0f);
if (det_max == det_x)
- dir = Vector3(det_x,xz * yz - xy * zz,xy * yz - xz * yy);
+ dir = Vector3(det_x, xz * yz - xy * zz, xy * yz - xz * yy);
else if (det_max == det_y)
dir = Vector3(xz * yz - xy * zz, det_y, xy * xz - yz * xx);
else if (det_max == det_z)
@@ -1743,41 +1600,37 @@ private:
return isNormalized(*normal);
}
- static bool computeEigen(const Vector3 *points, uint32_t pointsCount, Basis *basis)
- {
+ static bool computeEigen(ConstArrayView<Vector3> points, Basis *basis) {
float matrix[6];
- computeCovariance(pointsCount, points, matrix);
+ computeCovariance(points, matrix);
if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0)
return false;
float eigenValues[3];
Vector3 eigenVectors[3];
if (!eigenSolveSymmetric3(matrix, eigenValues, eigenVectors))
return false;
- basis->normal = normalize(eigenVectors[2], kEpsilon);
- basis->tangent = normalize(eigenVectors[0], kEpsilon);
- basis->bitangent = normalize(eigenVectors[1], kEpsilon);
+ basis->normal = normalize(eigenVectors[2]);
+ basis->tangent = normalize(eigenVectors[0]);
+ basis->bitangent = normalize(eigenVectors[1]);
return true;
}
- static Vector3 computeCentroid(int n, const Vector3 * points)
- {
+ static Vector3 computeCentroid(ConstArrayView<Vector3> points) {
Vector3 centroid(0.0f);
- for (int i = 0; i < n; i++) {
+ for (uint32_t i = 0; i < points.length; i++)
centroid += points[i];
- }
- centroid /= float(n);
+ centroid /= float(points.length);
return centroid;
}
- static Vector3 computeCovariance(int n, const Vector3 * points, float * covariance)
- {
+ static Vector3 computeCovariance(ConstArrayView<Vector3> points, float *covariance) {
// compute the centroid
- Vector3 centroid = computeCentroid(n, points);
+ Vector3 centroid = computeCentroid(points);
// compute covariance matrix
for (int i = 0; i < 6; i++) {
covariance[i] = 0.0f;
}
- for (int i = 0; i < n; i++) {
+ for (uint32_t i = 0; i < points.length; i++) {
Vector3 v = points[i] - centroid;
covariance[0] += v.x * v.x;
covariance[1] += v.x * v.y;
@@ -1792,8 +1645,7 @@ private:
// Tridiagonal solver from Charles Bloom.
// Householder transforms followed by QL decomposition.
// Seems to be based on the code from Numerical Recipes in C.
- static bool eigenSolveSymmetric3(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3])
- {
+ static bool eigenSolveSymmetric3(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3]) {
XA_DEBUG_ASSERT(matrix != nullptr && eigenValues != nullptr && eigenVectors != nullptr);
float subd[3];
float diag[3];
@@ -1818,7 +1670,7 @@ private:
// eigenvectors are the columns; make them the rows :
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
- (&eigenVectors[j].x)[i] = (float) work[i][j];
+ (&eigenVectors[j].x)[i] = (float)work[i][j];
}
}
// shuffle to sort by singular value :
@@ -1840,8 +1692,7 @@ private:
}
private:
- static void EigenSolver3_Tridiagonal(float mat[3][3], float *diag, float *subd)
- {
+ static void EigenSolver3_Tridiagonal(float mat[3][3], float *diag, float *subd) {
// Householder reduction T = Q^t M Q
// Input:
// mat, symmetric 3x3 matrix M
@@ -1893,8 +1744,7 @@ private:
}
}
- static bool EigenSolver3_QLAlgorithm(float mat[3][3], float *diag, float *subd)
- {
+ static bool EigenSolver3_QLAlgorithm(float mat[3][3], float *diag, float *subd) {
// QL iteration with implicit shifting to reduce matrix from tridiagonal
// to diagonal
const int maxiter = 32;
@@ -1904,21 +1754,21 @@ private:
int m;
for (m = ell; m <= 1; m++) {
float dd = fabsf(diag[m]) + fabsf(diag[m + 1]);
- if ( fabsf(subd[m]) + dd == dd )
+ if (fabsf(subd[m]) + dd == dd)
break;
}
- if ( m == ell )
+ if (m == ell)
break;
float g = (diag[ell + 1] - diag[ell]) / (2 * subd[ell]);
float r = sqrtf(g * g + 1);
- if ( g < 0 )
+ if (g < 0)
g = diag[m] - diag[ell] + subd[ell] / (g - r);
else
g = diag[m] - diag[ell] + subd[ell] / (g + r);
float s = 1, c = 1, p = 0;
for (int i = m - 1; i >= ell; i--) {
float f = s * subd[i], b = c * subd[i];
- if ( fabsf(f) >= fabsf(g) ) {
+ if (fabsf(f) >= fabsf(g)) {
c = g / f;
r = sqrtf(c * c + 1);
subd[i + 1] = f * r;
@@ -1944,7 +1794,7 @@ private:
subd[ell] = g;
subd[m] = 0;
}
- if ( iter == maxiter )
+ if (iter == maxiter)
// should not get here under normal circumstances
return false;
}
@@ -1952,56 +1802,48 @@ private:
}
};
-static uint32_t sdbmHash(const void *data_in, uint32_t size, uint32_t h = 5381)
-{
- const uint8_t *data = (const uint8_t *) data_in;
+static uint32_t sdbmHash(const void *data_in, uint32_t size, uint32_t h = 5381) {
+ const uint8_t *data = (const uint8_t *)data_in;
uint32_t i = 0;
while (i < size) {
- h = (h << 16) + (h << 6) - h + (uint32_t ) data[i++];
+ h = (h << 16) + (h << 6) - h + (uint32_t)data[i++];
}
return h;
}
template <typename T>
-static uint32_t hash(const T &t, uint32_t h = 5381)
-{
+static uint32_t hash(const T &t, uint32_t h = 5381) {
return sdbmHash(&t, sizeof(T), h);
}
template <typename Key>
-struct Hash
-{
+struct Hash {
uint32_t operator()(const Key &k) const { return hash(k); }
};
template <typename Key>
-struct PassthroughHash
-{
+struct PassthroughHash {
uint32_t operator()(const Key &k) const { return (uint32_t)k; }
};
template <typename Key>
-struct Equal
-{
+struct Equal {
bool operator()(const Key &k0, const Key &k1) const { return k0 == k1; }
};
-template<typename Key, typename H = Hash<Key>, typename E = Equal<Key> >
-class HashMap
-{
+template <typename Key, typename H = Hash<Key>, typename E = Equal<Key>>
+class HashMap {
public:
- HashMap(int memTag, uint32_t size) : m_memTag(memTag), m_size(size), m_numSlots(0), m_slots(nullptr), m_keys(memTag), m_next(memTag)
- {
+ HashMap(int memTag, uint32_t size) :
+ m_memTag(memTag), m_size(size), m_numSlots(0), m_slots(nullptr), m_keys(memTag), m_next(memTag) {
}
- ~HashMap()
- {
+ ~HashMap() {
if (m_slots)
XA_FREE(m_slots);
}
- void destroy()
- {
+ void destroy() {
if (m_slots) {
XA_FREE(m_slots);
m_slots = nullptr;
@@ -2010,8 +1852,7 @@ public:
m_next.destroy();
}
- uint32_t add(const Key &key)
- {
+ uint32_t add(const Key &key) {
if (!m_slots)
alloc();
const uint32_t hash = computeHash(key);
@@ -2021,36 +1862,18 @@ public:
return m_keys.size() - 1;
}
- uint32_t get(const Key &key) const
- {
+ uint32_t get(const Key &key) const {
if (!m_slots)
return UINT32_MAX;
- const uint32_t hash = computeHash(key);
- uint32_t i = m_slots[hash];
- E equal;
- while (i != UINT32_MAX) {
- if (equal(m_keys[i], key))
- return i;
- i = m_next[i];
- }
- return UINT32_MAX;
+ return find(key, m_slots[computeHash(key)]);
}
- uint32_t getNext(uint32_t current) const
- {
- uint32_t i = m_next[current];
- E equal;
- while (i != UINT32_MAX) {
- if (equal(m_keys[i], m_keys[current]))
- return i;
- i = m_next[i];
- }
- return UINT32_MAX;
+ uint32_t getNext(const Key &key, uint32_t current) const {
+ return find(key, m_next[current]);
}
private:
- void alloc()
- {
+ void alloc() {
XA_DEBUG_ASSERT(m_size > 0);
m_numSlots = nextPowerOfTwo(m_size);
auto minNumSlots = uint32_t(m_size * 1.3);
@@ -2063,12 +1886,21 @@ private:
m_next.reserve(m_size);
}
- uint32_t computeHash(const Key &key) const
- {
+ uint32_t computeHash(const Key &key) const {
H hash;
return hash(key) & (m_numSlots - 1);
}
+ uint32_t find(const Key &key, uint32_t current) const {
+ E equal;
+ while (current != UINT32_MAX) {
+ if (equal(m_keys[current], key))
+ return current;
+ current = m_next[current];
+ }
+ return current;
+ }
+
int m_memTag;
uint32_t m_size;
uint32_t m_numSlots;
@@ -2077,9 +1909,8 @@ private:
Array<uint32_t> m_next;
};
-template<typename T>
-static void insertionSort(T *data, uint32_t length)
-{
+template <typename T>
+static void insertionSort(T *data, uint32_t length) {
for (int32_t i = 1; i < (int32_t)length; i++) {
T x = data[i];
int32_t j = i - 1;
@@ -2091,21 +1922,18 @@ static void insertionSort(T *data, uint32_t length)
}
}
-class KISSRng
-{
+class KISSRng {
public:
KISSRng() { reset(); }
- void reset()
- {
+ void reset() {
x = 123456789;
y = 362436000;
z = 521288629;
c = 7654321;
}
- uint32_t getRange(uint32_t range)
- {
+ uint32_t getRange(uint32_t range) {
if (range == 0)
return 0;
x = 69069 * x + 12345;
@@ -2124,12 +1952,10 @@ private:
// Based on Pierre Terdiman's and Michael Herf's source code.
// http://www.codercorner.com/RadixSortRevisited.htm
// http://www.stereopsis.com/radix.html
-class RadixSort
-{
+class RadixSort {
public:
- void sort(const float *input, uint32_t count)
- {
- if (input == nullptr || count == 0) {
+ void sort(ConstArrayView<float> input) {
+ if (input.length == 0) {
m_buffer1.clear();
m_buffer2.clear();
m_ranks = m_buffer1.data();
@@ -2137,33 +1963,27 @@ public:
return;
}
// Resize lists if needed
- m_buffer1.resize(count);
- m_buffer2.resize(count);
+ m_buffer1.resize(input.length);
+ m_buffer2.resize(input.length);
m_ranks = m_buffer1.data();
m_ranks2 = m_buffer2.data();
m_validRanks = false;
- if (count < 32)
- insertionSort(input, count);
+ if (input.length < 32)
+ insertionSort(input);
else {
// @@ Avoid touching the input multiple times.
- for (uint32_t i = 0; i < count; i++) {
+ for (uint32_t i = 0; i < input.length; i++) {
floatFlip((uint32_t &)input[i]);
}
- radixSort<uint32_t>((const uint32_t *)input, count);
- for (uint32_t i = 0; i < count; i++) {
+ radixSort(ConstArrayView<uint32_t>((const uint32_t *)input.data, input.length));
+ for (uint32_t i = 0; i < input.length; i++) {
ifloatFlip((uint32_t &)input[i]);
}
}
}
- void sort(const Array<float> &input)
- {
- sort(input.data(), input.size());
- }
-
// Access to results. m_ranks is a list of indices in sorted order, i.e. in the order you may further process your data
- const uint32_t *ranks() const
- {
+ const uint32_t *ranks() const {
XA_DEBUG_ASSERT(m_validRanks);
return m_ranks;
}
@@ -2171,54 +1991,40 @@ public:
private:
uint32_t *m_ranks, *m_ranks2;
Array<uint32_t> m_buffer1, m_buffer2;
- bool m_validRanks;
+ bool m_validRanks = false;
- void floatFlip(uint32_t &f)
- {
+ void floatFlip(uint32_t &f) {
int32_t mask = (int32_t(f) >> 31) | 0x80000000; // Warren Hunt, Manchor Ko.
f ^= mask;
}
- void ifloatFlip(uint32_t &f)
- {
+ void ifloatFlip(uint32_t &f) {
uint32_t mask = ((f >> 31) - 1) | 0x80000000; // Michael Herf.
f ^= mask;
}
- template<typename T>
- void createHistograms(const T *buffer, uint32_t count, uint32_t *histogram)
- {
- const uint32_t bucketCount = sizeof(T); // (8 * sizeof(T)) / log2(radix)
+ void createHistograms(ConstArrayView<uint32_t> input, uint32_t *histogram) {
+ const uint32_t bucketCount = sizeof(uint32_t);
// Init bucket pointers.
uint32_t *h[bucketCount];
for (uint32_t i = 0; i < bucketCount; i++) {
h[i] = histogram + 256 * i;
}
// Clear histograms.
- memset(histogram, 0, 256 * bucketCount * sizeof(uint32_t ));
+ memset(histogram, 0, 256 * bucketCount * sizeof(uint32_t));
// @@ Add support for signed integers.
// Build histograms.
- const uint8_t *p = (const uint8_t *)buffer; // @@ Does this break aliasing rules?
- const uint8_t *pe = p + count * sizeof(T);
+ const uint8_t *p = (const uint8_t *)input.data; // @@ Does this break aliasing rules?
+ const uint8_t *pe = p + input.length * sizeof(uint32_t);
while (p != pe) {
h[0][*p++]++, h[1][*p++]++, h[2][*p++]++, h[3][*p++]++;
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4127)
-#endif
- if (bucketCount == 8) h[4][*p++]++, h[5][*p++]++, h[6][*p++]++, h[7][*p++]++;
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
}
}
- template <typename T>
- void insertionSort(const T *input, uint32_t count)
- {
+ void insertionSort(ConstArrayView<float> input) {
if (!m_validRanks) {
m_ranks[0] = 0;
- for (uint32_t i = 1; i != count; ++i) {
+ for (uint32_t i = 1; i != input.length; ++i) {
int rank = m_ranks[i] = i;
uint32_t j = i;
while (j != 0 && input[rank] < input[m_ranks[j - 1]]) {
@@ -2231,7 +2037,7 @@ private:
}
m_validRanks = true;
} else {
- for (uint32_t i = 1; i != count; ++i) {
+ for (uint32_t i = 1; i != input.length; ++i) {
int rank = m_ranks[i];
uint32_t j = i;
while (j != 0 && input[rank] < input[m_ranks[j - 1]]) {
@@ -2245,35 +2051,34 @@ private:
}
}
- template <typename T>
- void radixSort(const T *input, uint32_t count)
- {
- const uint32_t P = sizeof(T); // pass count
+ void radixSort(ConstArrayView<uint32_t> input) {
+ const uint32_t P = sizeof(uint32_t); // pass count
// Allocate histograms & offsets on the stack
uint32_t histogram[256 * P];
uint32_t *link[256];
- createHistograms(input, count, histogram);
+ createHistograms(input, histogram);
// Radix sort, j is the pass number (0=LSB, P=MSB)
for (uint32_t j = 0; j < P; j++) {
// Pointer to this bucket.
const uint32_t *h = &histogram[j * 256];
- const uint8_t *inputBytes = (const uint8_t *)input; // @@ Is this aliasing legal?
+ auto inputBytes = (const uint8_t *)input.data; // @@ Is this aliasing legal?
inputBytes += j;
- if (h[inputBytes[0]] == count) {
+ if (h[inputBytes[0]] == input.length) {
// Skip this pass, all values are the same.
continue;
}
// Create offsets
link[0] = m_ranks2;
- for (uint32_t i = 1; i < 256; i++) link[i] = link[i - 1] + h[i - 1];
+ for (uint32_t i = 1; i < 256; i++)
+ link[i] = link[i - 1] + h[i - 1];
// Perform Radix Sort
if (!m_validRanks) {
- for (uint32_t i = 0; i < count; i++) {
+ for (uint32_t i = 0; i < input.length; i++) {
*link[inputBytes[i * P]]++ = i;
}
m_validRanks = true;
} else {
- for (uint32_t i = 0; i < count; i++) {
+ for (uint32_t i = 0; i < input.length; i++) {
const uint32_t idx = m_ranks[i];
*link[inputBytes[idx * P]]++ = idx;
}
@@ -2283,7 +2088,7 @@ private:
}
// All values were equal, generate linear ranks.
if (!m_validRanks) {
- for (uint32_t i = 0; i < count; i++)
+ for (uint32_t i = 0; i < input.length; i++)
m_ranks[i] = i;
m_validRanks = true;
}
@@ -2291,30 +2096,25 @@ private:
};
// Wrapping this in a class allows temporary arrays to be re-used.
-class BoundingBox2D
-{
+class BoundingBox2D {
public:
Vector2 majorAxis, minorAxis, minCorner, maxCorner;
- void clear()
- {
+ void clear() {
m_boundaryVertices.clear();
}
- void appendBoundaryVertex(Vector2 v)
- {
+ void appendBoundaryVertex(Vector2 v) {
m_boundaryVertices.push_back(v);
}
// This should compute convex hull and use rotating calipers to find the best box. Currently it uses a brute force method.
- // If vertices is null or vertexCount is 0, the boundary vertices are used.
- void compute(const Vector2 *vertices = nullptr, uint32_t vertexCount = 0)
- {
- if (!vertices || vertexCount == 0) {
- vertices = m_boundaryVertices.data();
- vertexCount = m_boundaryVertices.size();
- }
- convexHull(m_boundaryVertices.data(), m_boundaryVertices.size(), m_hull, 0.00001f);
+ // If vertices are empty, the boundary vertices are used.
+ void compute(ConstArrayView<Vector2> vertices = ConstArrayView<Vector2>()) {
+ XA_DEBUG_ASSERT(!m_boundaryVertices.isEmpty());
+ if (vertices.length == 0)
+ vertices = m_boundaryVertices;
+ convexHull(m_boundaryVertices, m_hull, 0.00001f);
// @@ Ideally I should use rotating calipers to find the best box. Using brute force for now.
float best_area = FLT_MAX;
Vector2 best_min(0);
@@ -2324,13 +2124,13 @@ public:
for (uint32_t i = 0, j = hullCount - 1; i < hullCount; j = i, i++) {
if (equal(m_hull[i], m_hull[j], kEpsilon))
continue;
- Vector2 axis = normalize(m_hull[i] - m_hull[j], 0.0f);
+ Vector2 axis = normalize(m_hull[i] - m_hull[j]);
XA_DEBUG_ASSERT(isFinite(axis));
// Compute bounding box.
Vector2 box_min(FLT_MAX, FLT_MAX);
Vector2 box_max(-FLT_MAX, -FLT_MAX);
// Consider all points, not only boundary points, in case the input chart is malformed.
- for (uint32_t v = 0; v < vertexCount; v++) {
+ for (uint32_t v = 0; v < vertices.length; v++) {
const Vector2 &point = vertices[v];
const float x = dot(axis, point);
const float y = dot(Vector2(-axis.y, axis.x), point);
@@ -2357,28 +2157,27 @@ public:
private:
// Compute the convex hull using Graham Scan.
- void convexHull(const Vector2 *input, uint32_t inputCount, Array<Vector2> &output, float epsilon)
- {
- m_coords.resize(inputCount);
- for (uint32_t i = 0; i < inputCount; i++)
+ void convexHull(ConstArrayView<Vector2> input, Array<Vector2> &output, float epsilon) {
+ m_coords.resize(input.length);
+ for (uint32_t i = 0; i < input.length; i++)
m_coords[i] = input[i].x;
m_radix.sort(m_coords);
const uint32_t *ranks = m_radix.ranks();
m_top.clear();
m_bottom.clear();
- m_top.reserve(inputCount);
- m_bottom.reserve(inputCount);
+ m_top.reserve(input.length);
+ m_bottom.reserve(input.length);
Vector2 P = input[ranks[0]];
- Vector2 Q = input[ranks[inputCount - 1]];
+ Vector2 Q = input[ranks[input.length - 1]];
float topy = max(P.y, Q.y);
float boty = min(P.y, Q.y);
- for (uint32_t i = 0; i < inputCount; i++) {
+ for (uint32_t i = 0; i < input.length; i++) {
Vector2 p = input[ranks[i]];
if (p.y >= boty)
m_top.push_back(p);
}
- for (uint32_t i = 0; i < inputCount; i++) {
- Vector2 p = input[ranks[inputCount - 1 - i]];
+ for (uint32_t i = 0; i < input.length; i++) {
+ Vector2 p = input[ranks[input.length - 1 - i]];
if (p.y <= topy)
m_bottom.push_back(p);
}
@@ -2387,7 +2186,7 @@ private:
XA_DEBUG_ASSERT(m_top.size() >= 2);
output.push_back(m_top[0]);
output.push_back(m_top[1]);
- for (uint32_t i = 2; i < m_top.size(); ) {
+ for (uint32_t i = 2; i < m_top.size();) {
Vector2 a = output[output.size() - 2];
Vector2 b = output[output.size() - 1];
Vector2 c = m_top[i];
@@ -2403,7 +2202,7 @@ private:
XA_DEBUG_ASSERT(m_bottom.size() >= 2);
output.push_back(m_bottom[1]);
// Filter bottom list.
- for (uint32_t i = 2; i < m_bottom.size(); ) {
+ for (uint32_t i = 2; i < m_bottom.size();) {
Vector2 a = output[output.size() - 2];
Vector2 b = output[output.size() - 1];
Vector2 c = m_bottom[i];
@@ -2426,32 +2225,45 @@ private:
RadixSort m_radix;
};
-static uint32_t meshEdgeFace(uint32_t edge) { return edge / 3; }
-static uint32_t meshEdgeIndex0(uint32_t edge) { return edge; }
+struct EdgeKey {
+ EdgeKey(const EdgeKey &k) :
+ v0(k.v0), v1(k.v1) {}
+ EdgeKey(uint32_t _v0, uint32_t _v1) :
+ v0(_v0), v1(_v1) {}
+ bool operator==(const EdgeKey &k) const { return v0 == k.v0 && v1 == k.v1; }
-static uint32_t meshEdgeIndex1(uint32_t edge)
-{
+ uint32_t v0;
+ uint32_t v1;
+};
+
+struct EdgeHash {
+ uint32_t operator()(const EdgeKey &k) const { return k.v0 * 32768u + k.v1; }
+};
+
+static uint32_t meshEdgeFace(uint32_t edge) {
+ return edge / 3;
+}
+static uint32_t meshEdgeIndex0(uint32_t edge) {
+ return edge;
+}
+
+static uint32_t meshEdgeIndex1(uint32_t edge) {
const uint32_t faceFirstEdge = edge / 3 * 3;
return faceFirstEdge + (edge - faceFirstEdge + 1) % 3;
}
-struct MeshFlags
-{
- enum
- {
- HasIgnoredFaces = 1<<0,
- HasNormals = 1<<1
+struct MeshFlags {
+ enum {
+ HasIgnoredFaces = 1 << 0,
+ HasNormals = 1 << 1,
+ HasMaterials = 1 << 2
};
};
-class Mesh;
-static void meshGetBoundaryLoops(const Mesh &mesh, Array<uint32_t> &boundaryLoops);
-
-class Mesh
-{
+class Mesh {
public:
- Mesh(float epsilon, uint32_t approxVertexCount, uint32_t approxFaceCount, uint32_t flags = 0, uint32_t id = UINT32_MAX) : m_epsilon(epsilon), m_flags(flags), m_id(id), m_faceIgnore(MemTag::Mesh), m_indices(MemTag::MeshIndices), m_positions(MemTag::MeshPositions), m_normals(MemTag::MeshNormals), m_texcoords(MemTag::MeshTexcoords), m_nextColocalVertex(MemTag::MeshColocals), m_boundaryEdges(MemTag::MeshBoundaries), m_oppositeEdges(MemTag::MeshBoundaries), m_nextBoundaryEdges(MemTag::MeshBoundaries), m_edgeMap(MemTag::MeshEdgeMap, approxFaceCount * 3)
- {
+ Mesh(float epsilon, uint32_t approxVertexCount, uint32_t approxFaceCount, uint32_t flags = 0, uint32_t id = UINT32_MAX) :
+ m_epsilon(epsilon), m_flags(flags), m_id(id), m_faceIgnore(MemTag::Mesh), m_faceMaterials(MemTag::Mesh), m_indices(MemTag::MeshIndices), m_positions(MemTag::MeshPositions), m_normals(MemTag::MeshNormals), m_texcoords(MemTag::MeshTexcoords), m_nextColocalVertex(MemTag::MeshColocals), m_firstColocalVertex(MemTag::MeshColocals), m_boundaryEdges(MemTag::MeshBoundaries), m_oppositeEdges(MemTag::MeshBoundaries), m_edgeMap(MemTag::MeshEdgeMap, approxFaceCount * 3) {
m_indices.reserve(approxFaceCount * 3);
m_positions.reserve(approxVertexCount);
m_texcoords.reserve(approxVertexCount);
@@ -2459,13 +2271,14 @@ public:
m_faceIgnore.reserve(approxFaceCount);
if (m_flags & MeshFlags::HasNormals)
m_normals.reserve(approxVertexCount);
+ if (m_flags & MeshFlags::HasMaterials)
+ m_faceMaterials.reserve(approxFaceCount);
}
uint32_t flags() const { return m_flags; }
uint32_t id() const { return m_id; }
- void addVertex(const Vector3 &pos, const Vector3 &normal = Vector3(0.0f), const Vector2 &texcoord = Vector2(0.0f))
- {
+ void addVertex(const Vector3 &pos, const Vector3 &normal = Vector3(0.0f), const Vector2 &texcoord = Vector2(0.0f)) {
XA_DEBUG_ASSERT(isFinite(pos));
m_positions.push_back(pos);
if (m_flags & MeshFlags::HasNormals)
@@ -2473,45 +2286,22 @@ public:
m_texcoords.push_back(texcoord);
}
- struct AddFaceResult
- {
- enum Enum
- {
- OK,
- DuplicateEdge = 1
- };
- };
-
- AddFaceResult::Enum addFace(uint32_t v0, uint32_t v1, uint32_t v2, bool ignore = false)
- {
- uint32_t indexArray[3];
- indexArray[0] = v0;
- indexArray[1] = v1;
- indexArray[2] = v2;
- return addFace(indexArray, ignore);
- }
-
- AddFaceResult::Enum addFace(const uint32_t *indices, bool ignore = false)
- {
- AddFaceResult::Enum result = AddFaceResult::OK;
+ void addFace(const uint32_t *indices, bool ignore = false, uint32_t material = UINT32_MAX) {
if (m_flags & MeshFlags::HasIgnoredFaces)
m_faceIgnore.push_back(ignore);
+ if (m_flags & MeshFlags::HasMaterials)
+ m_faceMaterials.push_back(material);
const uint32_t firstIndex = m_indices.size();
for (uint32_t i = 0; i < 3; i++)
m_indices.push_back(indices[i]);
for (uint32_t i = 0; i < 3; i++) {
const uint32_t vertex0 = m_indices[firstIndex + i];
const uint32_t vertex1 = m_indices[firstIndex + (i + 1) % 3];
- const EdgeKey key(vertex0, vertex1);
- if (m_edgeMap.get(key) != UINT32_MAX)
- result = AddFaceResult::DuplicateEdge;
- m_edgeMap.add(key);
+ m_edgeMap.add(EdgeKey(vertex0, vertex1));
}
- return result;
}
- void createColocals()
- {
+ void createColocalsBVH() {
const uint32_t vertexCount = m_positions.size();
Array<AABB> aabbs(MemTag::BVH);
aabbs.resize(vertexCount);
@@ -2522,6 +2312,8 @@ public:
Array<uint32_t> potential(MemTag::MeshColocals);
m_nextColocalVertex.resize(vertexCount);
m_nextColocalVertex.fillBytes(0xff);
+ m_firstColocalVertex.resize(vertexCount);
+ m_firstColocalVertex.fillBytes(0xff);
for (uint32_t i = 0; i < vertexCount; i++) {
if (m_nextColocalVertex[i] != UINT32_MAX)
continue; // Already linked.
@@ -2537,18 +2329,65 @@ public:
if (colocals.size() == 1) {
// No colocals for this vertex.
m_nextColocalVertex[i] = i;
- continue;
+ m_firstColocalVertex[i] = i;
+ continue;
}
// Link in ascending order.
insertionSort(colocals.data(), colocals.size());
- for (uint32_t j = 0; j < colocals.size(); j++)
+ for (uint32_t j = 0; j < colocals.size(); j++) {
m_nextColocalVertex[colocals[j]] = colocals[(j + 1) % colocals.size()];
+ m_firstColocalVertex[colocals[j]] = colocals[0];
+ }
XA_DEBUG_ASSERT(m_nextColocalVertex[i] != UINT32_MAX);
}
}
- void createBoundaries()
- {
+ void createColocalsHash() {
+ const uint32_t vertexCount = m_positions.size();
+ HashMap<Vector3> positionToVertexMap(MemTag::Default, vertexCount);
+ for (uint32_t i = 0; i < vertexCount; i++)
+ positionToVertexMap.add(m_positions[i]);
+ Array<uint32_t> colocals(MemTag::MeshColocals);
+ m_nextColocalVertex.resize(vertexCount);
+ m_nextColocalVertex.fillBytes(0xff);
+ m_firstColocalVertex.resize(vertexCount);
+ m_firstColocalVertex.fillBytes(0xff);
+ for (uint32_t i = 0; i < vertexCount; i++) {
+ if (m_nextColocalVertex[i] != UINT32_MAX)
+ continue; // Already linked.
+ // Find other vertices colocal to this one.
+ colocals.clear();
+ colocals.push_back(i); // Always add this vertex.
+ uint32_t otherVertex = positionToVertexMap.get(m_positions[i]);
+ while (otherVertex != UINT32_MAX) {
+ if (otherVertex != i && equal(m_positions[i], m_positions[otherVertex], m_epsilon) && m_nextColocalVertex[otherVertex] == UINT32_MAX)
+ colocals.push_back(otherVertex);
+ otherVertex = positionToVertexMap.getNext(m_positions[i], otherVertex);
+ }
+ if (colocals.size() == 1) {
+ // No colocals for this vertex.
+ m_nextColocalVertex[i] = i;
+ m_firstColocalVertex[i] = i;
+ continue;
+ }
+ // Link in ascending order.
+ insertionSort(colocals.data(), colocals.size());
+ for (uint32_t j = 0; j < colocals.size(); j++) {
+ m_nextColocalVertex[colocals[j]] = colocals[(j + 1) % colocals.size()];
+ m_firstColocalVertex[colocals[j]] = colocals[0];
+ }
+ XA_DEBUG_ASSERT(m_nextColocalVertex[i] != UINT32_MAX);
+ }
+ }
+
+ void createColocals() {
+ if (m_epsilon <= FLT_EPSILON)
+ createColocalsHash();
+ else
+ createColocalsBVH();
+ }
+
+ void createBoundaries() {
const uint32_t edgeCount = m_indices.size();
const uint32_t vertexCount = m_positions.size();
m_oppositeEdges.resize(edgeCount);
@@ -2578,151 +2417,54 @@ public:
}
}
- void linkBoundaries()
- {
- const uint32_t edgeCount = m_indices.size();
- HashMap<uint32_t> vertexToEdgeMap(MemTag::Mesh, edgeCount); // Edge is index / 2
- for (uint32_t i = 0; i < edgeCount; i++) {
- vertexToEdgeMap.add(m_indices[meshEdgeIndex0(i)]);
- vertexToEdgeMap.add(m_indices[meshEdgeIndex1(i)]);
- }
- m_nextBoundaryEdges.resize(edgeCount);
- for (uint32_t i = 0; i < edgeCount; i++)
- m_nextBoundaryEdges[i] = UINT32_MAX;
- uint32_t numBoundaryLoops = 0, numUnclosedBoundaries = 0;
- BitArray linkedEdges(edgeCount);
- linkedEdges.zeroOutMemory();
- for (;;) {
- // Find the first boundary edge that hasn't been linked yet.
- uint32_t firstEdge = UINT32_MAX;
- for (uint32_t i = 0; i < edgeCount; i++) {
- if (m_oppositeEdges[i] == UINT32_MAX && !linkedEdges.get(i)) {
- firstEdge = i;
- break;
- }
- }
- if (firstEdge == UINT32_MAX)
- break;
- uint32_t currentEdge = firstEdge;
- for (;;) {
- // Find the next boundary edge. The first vertex will be the same as (or colocal to) the current edge second vertex.
- const uint32_t startVertex = m_indices[meshEdgeIndex1(currentEdge)];
- uint32_t bestNextEdge = UINT32_MAX;
- for (ColocalVertexIterator it(this, startVertex); !it.isDone(); it.advance()) {
- uint32_t mapIndex = vertexToEdgeMap.get(it.vertex());
- while (mapIndex != UINT32_MAX) {
- const uint32_t otherEdge = mapIndex / 2; // Two vertices added per edge.
- if (m_oppositeEdges[otherEdge] != UINT32_MAX)
- goto next; // Not a boundary edge.
- if (linkedEdges.get(otherEdge))
- goto next; // Already linked.
- if (m_indices[meshEdgeIndex0(otherEdge)] != it.vertex())
- goto next; // Edge contains the vertex, but it's the wrong one.
- // First edge (closing the boundary loop) has the highest priority.
- // Non-colocal vertex has the next highest.
- if (bestNextEdge != firstEdge && (bestNextEdge == UINT32_MAX || it.vertex() == startVertex))
- bestNextEdge = otherEdge;
- next:
- mapIndex = vertexToEdgeMap.getNext(mapIndex);
- }
- }
- if (bestNextEdge == UINT32_MAX) {
- numUnclosedBoundaries++;
- if (currentEdge == firstEdge)
- linkedEdges.set(firstEdge); // Only 1 edge in this boundary "loop".
- break; // Can't find a next edge.
- }
- m_nextBoundaryEdges[currentEdge] = bestNextEdge;
- linkedEdges.set(bestNextEdge);
- currentEdge = bestNextEdge;
- if (currentEdge == firstEdge) {
- numBoundaryLoops++;
- break; // Closed the boundary loop.
- }
- }
- }
-#if XA_FIX_INTERNAL_BOUNDARY_LOOPS
- // Find internal boundary loops and separate them.
- // Detect by finding two edges in a boundary loop that have a colocal end vertex.
- // Fix by swapping their next boundary edge.
- // Need to start over after every fix since known boundary loops have changed.
- Array<uint32_t> boundaryLoops;
- fixInternalBoundary:
- meshGetBoundaryLoops(*this, boundaryLoops);
- for (uint32_t loop = 0; loop < boundaryLoops.size(); loop++) {
- linkedEdges.zeroOutMemory();
- for (Mesh::BoundaryLoopEdgeIterator it1(this, boundaryLoops[loop]); !it1.isDone(); it1.advance()) {
- const uint32_t e1 = it1.edge();
- if (linkedEdges.get(e1))
- continue;
- for (Mesh::BoundaryLoopEdgeIterator it2(this, boundaryLoops[loop]); !it2.isDone(); it2.advance()) {
- const uint32_t e2 = it2.edge();
- if (e1 == e2 || !isBoundaryEdge(e2) || linkedEdges.get(e2))
- continue;
- if (!areColocal(m_indices[meshEdgeIndex1(e1)], m_indices[meshEdgeIndex1(e2)]))
- continue;
- swap(m_nextBoundaryEdges[e1], m_nextBoundaryEdges[e2]);
- linkedEdges.set(e1);
- linkedEdges.set(e2);
- goto fixInternalBoundary; // start over
- }
- }
- }
-#endif
- }
-
/// Find edge, test all colocals.
- uint32_t findEdge(uint32_t vertex0, uint32_t vertex1) const
- {
- uint32_t result = UINT32_MAX;
- if (m_nextColocalVertex.isEmpty()) {
+ uint32_t findEdge(uint32_t vertex0, uint32_t vertex1) const {
+ // Try to find exact vertex match first.
+ {
EdgeKey key(vertex0, vertex1);
uint32_t edge = m_edgeMap.get(key);
while (edge != UINT32_MAX) {
// Don't find edges of ignored faces.
- if (!isFaceIgnored(meshEdgeFace(edge))) {
- //XA_DEBUG_ASSERT(m_id != UINT32_MAX || (m_id == UINT32_MAX && result == UINT32_MAX)); // duplicate edge - ignore on initial meshes
- result = edge;
-#if !XA_DEBUG
- return result;
-#endif
- }
- edge = m_edgeMap.getNext(edge);
+ if (!isFaceIgnored(meshEdgeFace(edge)))
+ return edge;
+ edge = m_edgeMap.getNext(key, edge);
}
- } else {
- for (ColocalVertexIterator it0(this, vertex0); !it0.isDone(); it0.advance()) {
- for (ColocalVertexIterator it1(this, vertex1); !it1.isDone(); it1.advance()) {
- EdgeKey key(it0.vertex(), it1.vertex());
+ }
+ // If colocals were created, try every permutation.
+ if (!m_nextColocalVertex.isEmpty()) {
+ uint32_t colocalVertex0 = vertex0;
+ for (;;) {
+ uint32_t colocalVertex1 = vertex1;
+ for (;;) {
+ EdgeKey key(colocalVertex0, colocalVertex1);
uint32_t edge = m_edgeMap.get(key);
while (edge != UINT32_MAX) {
// Don't find edges of ignored faces.
- if (!isFaceIgnored(meshEdgeFace(edge))) {
- XA_DEBUG_ASSERT(m_id != UINT32_MAX || (m_id == UINT32_MAX && result == UINT32_MAX)); // duplicate edge - ignore on initial meshes
- result = edge;
-#if !XA_DEBUG
- return result;
-#endif
- }
- edge = m_edgeMap.getNext(edge);
+ if (!isFaceIgnored(meshEdgeFace(edge)))
+ return edge;
+ edge = m_edgeMap.getNext(key, edge);
}
+ colocalVertex1 = m_nextColocalVertex[colocalVertex1];
+ if (colocalVertex1 == vertex1)
+ break; // Back to start.
}
+ colocalVertex0 = m_nextColocalVertex[colocalVertex0];
+ if (colocalVertex0 == vertex0)
+ break; // Back to start.
}
}
- return result;
+ return UINT32_MAX;
}
// Edge map can be destroyed when no longer used to reduce memory usage. It's used by:
// * Mesh::createBoundaries()
- // * Mesh::ColocalEdgeIterator (used by MeshFaceGroups)
- // * meshCloseHole()
- void destroyEdgeMap()
- {
+ // * Mesh::edgeMap() (used by MeshFaceGroups)
+ void destroyEdgeMap() {
m_edgeMap.destroy();
}
#if XA_DEBUG_EXPORT_OBJ
- void writeObjVertices(FILE *file) const
- {
+ void writeObjVertices(FILE *file) const {
for (uint32_t i = 0; i < m_positions.size(); i++)
fprintf(file, "v %g %g %g\n", m_positions[i].x, m_positions[i].y, m_positions[i].z);
if (m_flags & MeshFlags::HasNormals) {
@@ -2733,8 +2475,7 @@ public:
fprintf(file, "vt %g %g\n", m_texcoords[i].x, m_texcoords[i].y);
}
- void writeObjFace(FILE *file, uint32_t face, uint32_t offset = 0) const
- {
+ void writeObjFace(FILE *file, uint32_t face, uint32_t offset = 0) const {
fprintf(file, "f ");
for (uint32_t j = 0; j < 3; j++) {
const uint32_t index = m_indices[face * 3 + j] + 1 + offset; // 1-indexed
@@ -2742,8 +2483,7 @@ public:
}
}
- void writeObjBoundaryEges(FILE *file) const
- {
+ void writeObjBoundaryEges(FILE *file) const {
if (m_oppositeEdges.isEmpty())
return; // Boundaries haven't been created.
fprintf(file, "o boundary_edges\n");
@@ -2754,31 +2494,7 @@ public:
}
}
- void writeObjLinkedBoundaries(FILE *file) const
- {
- if (m_oppositeEdges.isEmpty() || m_nextBoundaryEdges.isEmpty())
- return; // Boundaries haven't been created and/or linked.
- Array<uint32_t> boundaryLoops;
- meshGetBoundaryLoops(*this, boundaryLoops);
- for (uint32_t i = 0; i < boundaryLoops.size(); i++) {
- uint32_t edge = boundaryLoops[i];
- fprintf(file, "o boundary_%04d\n", i);
- fprintf(file, "l");
- for (;;) {
- const uint32_t vertex0 = m_indices[meshEdgeIndex0(edge)];
- const uint32_t vertex1 = m_indices[meshEdgeIndex1(edge)];
- fprintf(file, " %d", vertex0 + 1); // 1-indexed
- edge = m_nextBoundaryEdges[edge];
- if (edge == boundaryLoops[i] || edge == UINT32_MAX) {
- fprintf(file, " %d\n", vertex1 + 1); // 1-indexed
- break;
- }
- }
- }
- }
-
- void writeObjFile(const char *filename) const
- {
+ void writeObjFile(const char *filename) const {
FILE *file;
XA_FOPEN(file, filename, "w");
if (!file)
@@ -2789,13 +2505,11 @@ public:
for (uint32_t i = 0; i < faceCount(); i++)
writeObjFace(file, i);
writeObjBoundaryEges(file);
- writeObjLinkedBoundaries(file);
fclose(file);
}
#endif
- float computeSurfaceArea() const
- {
+ float computeSurfaceArea() const {
float area = 0;
for (uint32_t f = 0; f < faceCount(); f++)
area += computeFaceArea(f);
@@ -2804,24 +2518,21 @@ public:
}
// Returned value is always positive, even if some triangles are flipped.
- float computeParametricArea() const
- {
+ float computeParametricArea() const {
float area = 0;
for (uint32_t f = 0; f < faceCount(); f++)
area += fabsf(computeFaceParametricArea(f)); // May be negative, depends on texcoord winding.
- return area;
+ return area;
}
- float computeFaceArea(uint32_t face) const
- {
+ float computeFaceArea(uint32_t face) const {
const Vector3 &p0 = m_positions[m_indices[face * 3 + 0]];
const Vector3 &p1 = m_positions[m_indices[face * 3 + 1]];
const Vector3 &p2 = m_positions[m_indices[face * 3 + 2]];
return length(cross(p1 - p0, p2 - p0)) * 0.5f;
}
- Vector3 computeFaceCentroid(uint32_t face) const
- {
+ Vector3 computeFaceCentroid(uint32_t face) const {
Vector3 sum(0.0f);
for (uint32_t i = 0; i < 3; i++)
sum += m_positions[m_indices[face * 3 + i]];
@@ -2830,8 +2541,7 @@ public:
// Average of the edge midpoints weighted by the edge length.
// I want a point inside the triangle, but closer to the cirumcenter.
- Vector3 computeFaceCenter(uint32_t face) const
- {
+ Vector3 computeFaceCenter(uint32_t face) const {
const Vector3 &p0 = m_positions[m_indices[face * 3 + 0]];
const Vector3 &p1 = m_positions[m_indices[face * 3 + 1]];
const Vector3 &p2 = m_positions[m_indices[face * 3 + 2]];
@@ -2844,28 +2554,25 @@ public:
return m0 + m1 + m2;
}
- Vector3 computeFaceNormal(uint32_t face) const
- {
+ Vector3 computeFaceNormal(uint32_t face) const {
const Vector3 &p0 = m_positions[m_indices[face * 3 + 0]];
const Vector3 &p1 = m_positions[m_indices[face * 3 + 1]];
const Vector3 &p2 = m_positions[m_indices[face * 3 + 2]];
const Vector3 e0 = p2 - p0;
const Vector3 e1 = p1 - p0;
const Vector3 normalAreaScaled = cross(e0, e1);
- return normalizeSafe(normalAreaScaled, Vector3(0, 0, 1), 0.0f);
+ return normalizeSafe(normalAreaScaled, Vector3(0, 0, 1));
}
- float computeFaceParametricArea(uint32_t face) const
- {
+ float computeFaceParametricArea(uint32_t face) const {
const Vector2 &t0 = m_texcoords[m_indices[face * 3 + 0]];
const Vector2 &t1 = m_texcoords[m_indices[face * 3 + 1]];
const Vector2 &t2 = m_texcoords[m_indices[face * 3 + 2]];
return triangleArea(t0, t1, t2);
}
-
+
// @@ This is not exactly accurate, we should compare the texture coordinates...
- bool isSeam(uint32_t edge) const
- {
+ bool isSeam(uint32_t edge) const {
const uint32_t oppositeEdge = m_oppositeEdges[edge];
if (oppositeEdge == UINT32_MAX)
return false; // boundary edge
@@ -2876,8 +2583,7 @@ public:
return m_indices[e0] != m_indices[oe1] || m_indices[e1] != m_indices[oe0];
}
- bool isTextureSeam(uint32_t edge) const
- {
+ bool isTextureSeam(uint32_t edge) const {
const uint32_t oppositeEdge = m_oppositeEdges[edge];
if (oppositeEdge == UINT32_MAX)
return false; // boundary edge
@@ -2888,26 +2594,9 @@ public:
return m_texcoords[m_indices[e0]] != m_texcoords[m_indices[oe1]] || m_texcoords[m_indices[e1]] != m_texcoords[m_indices[oe0]];
}
- uint32_t firstColocal(uint32_t vertex) const
- {
- for (ColocalVertexIterator it(this, vertex); !it.isDone(); it.advance()) {
- if (it.vertex() < vertex)
- vertex = it.vertex();
- }
- return vertex;
- }
-
- bool areColocal(uint32_t vertex0, uint32_t vertex1) const
- {
- if (vertex0 == vertex1)
- return true;
- if (m_nextColocalVertex.isEmpty())
- return false;
- for (ColocalVertexIterator it(this, vertex0); !it.isDone(); it.advance()) {
- if (it.vertex() == vertex1)
- return true;
- }
- return false;
+ uint32_t firstColocalVertex(uint32_t vertex) const {
+ XA_DEBUG_ASSERT(m_firstColocalVertex.size() == m_positions.size());
+ return m_firstColocalVertex[vertex];
}
XA_INLINE float epsilon() const { return m_epsilon; }
@@ -2919,23 +2608,28 @@ public:
XA_INLINE uint32_t vertexCount() const { return m_positions.size(); }
XA_INLINE uint32_t vertexAt(uint32_t i) const { return m_indices[i]; }
XA_INLINE const Vector3 &position(uint32_t vertex) const { return m_positions[vertex]; }
- XA_INLINE const Vector3 *positions() const { return m_positions.data(); }
- XA_INLINE const Vector3 &normal(uint32_t vertex) const { XA_DEBUG_ASSERT(m_flags & MeshFlags::HasNormals); return m_normals[vertex]; }
+ XA_INLINE ConstArrayView<Vector3> positions() const { return m_positions; }
+ XA_INLINE const Vector3 &normal(uint32_t vertex) const {
+ XA_DEBUG_ASSERT(m_flags & MeshFlags::HasNormals);
+ return m_normals[vertex];
+ }
XA_INLINE const Vector2 &texcoord(uint32_t vertex) const { return m_texcoords[vertex]; }
XA_INLINE Vector2 &texcoord(uint32_t vertex) { return m_texcoords[vertex]; }
- XA_INLINE const Vector2 *texcoords() const { return m_texcoords.data(); }
- XA_INLINE Vector2 *texcoords() { return m_texcoords.data(); }
+ XA_INLINE const ConstArrayView<Vector2> texcoords() const { return m_texcoords; }
+ XA_INLINE ArrayView<Vector2> texcoords() { return m_texcoords; }
XA_INLINE uint32_t faceCount() const { return m_indices.size() / 3; }
- XA_INLINE const uint32_t *indices() const { return m_indices.data(); }
+ XA_INLINE ConstArrayView<uint32_t> indices() const { return m_indices; }
XA_INLINE uint32_t indexCount() const { return m_indices.size(); }
XA_INLINE bool isFaceIgnored(uint32_t face) const { return (m_flags & MeshFlags::HasIgnoredFaces) && m_faceIgnore[face]; }
+ XA_INLINE uint32_t faceMaterial(uint32_t face) const { return (m_flags & MeshFlags::HasMaterials) ? m_faceMaterials[face] : UINT32_MAX; }
+ XA_INLINE const HashMap<EdgeKey, EdgeHash> &edgeMap() const { return m_edgeMap; }
private:
-
float m_epsilon;
uint32_t m_flags;
uint32_t m_id;
Array<bool> m_faceIgnore;
+ Array<uint32_t> m_faceMaterials;
Array<uint32_t> m_indices;
Array<Vector3> m_positions;
Array<Vector3> m_normals;
@@ -2943,205 +2637,31 @@ private:
// Populated by createColocals
Array<uint32_t> m_nextColocalVertex; // In: vertex index. Out: the vertex index of the next colocal position.
+ Array<uint32_t> m_firstColocalVertex;
// Populated by createBoundaries
BitArray m_isBoundaryVertex;
Array<uint32_t> m_boundaryEdges;
Array<uint32_t> m_oppositeEdges; // In: edge index. Out: the index of the opposite edge (i.e. wound the opposite direction). UINT32_MAX if the input edge is a boundary edge.
- // Populated by linkBoundaries
- Array<uint32_t> m_nextBoundaryEdges; // The index of the next boundary edge. UINT32_MAX if the edge is not a boundary edge.
-
- struct EdgeKey
- {
- EdgeKey(const EdgeKey &k) : v0(k.v0), v1(k.v1) {}
- EdgeKey(uint32_t v0, uint32_t v1) : v0(v0), v1(v1) {}
- bool operator==(const EdgeKey &k) const { return v0 == k.v0 && v1 == k.v1; }
-
- uint32_t v0;
- uint32_t v1;
- };
-
- struct EdgeHash
- {
- uint32_t operator()(const EdgeKey &k) const { return k.v0 * 32768u + k.v1; }
- };
-
HashMap<EdgeKey, EdgeHash> m_edgeMap;
public:
- class BoundaryLoopEdgeIterator
- {
+ class FaceEdgeIterator {
public:
- BoundaryLoopEdgeIterator(const Mesh *mesh, uint32_t edge) : m_mesh(mesh), m_first(UINT32_MAX), m_current(edge) {}
-
- void advance()
- {
- if (m_first == UINT32_MAX)
- m_first = m_current;
- m_current = m_mesh->m_nextBoundaryEdges[m_current];
- }
-
- bool isDone() const
- {
- return m_first == m_current || m_current == UINT32_MAX;
- }
-
- uint32_t edge() const
- {
- return m_current;
- }
-
- uint32_t nextEdge() const
- {
- return m_mesh->m_nextBoundaryEdges[m_current];
- }
-
- private:
- const Mesh *m_mesh;
- uint32_t m_first;
- uint32_t m_current;
- };
-
- class ColocalVertexIterator
- {
- public:
- ColocalVertexIterator(const Mesh *mesh, uint32_t v) : m_mesh(mesh), m_first(UINT32_MAX), m_current(v) {}
-
- void advance()
- {
- if (m_first == UINT32_MAX)
- m_first = m_current;
- if (!m_mesh->m_nextColocalVertex.isEmpty())
- m_current = m_mesh->m_nextColocalVertex[m_current];
- }
-
- bool isDone() const
- {
- return m_first == m_current;
- }
-
- uint32_t vertex() const
- {
- return m_current;
- }
-
- const Vector3 *pos() const
- {
- return &m_mesh->m_positions[m_current];
- }
-
- private:
- const Mesh *m_mesh;
- uint32_t m_first;
- uint32_t m_current;
- };
-
- class ColocalEdgeIterator
- {
- public:
- ColocalEdgeIterator(const Mesh *mesh, uint32_t vertex0, uint32_t vertex1) : m_mesh(mesh), m_vertex0It(mesh, vertex0), m_vertex1It(mesh, vertex1), m_vertex1(vertex1)
- {
- do {
- if (!resetElement()) {
- advanceVertex1();
- }
- else {
- break;
- }
- } while (!isDone());
- }
-
- void advance()
- {
- advanceElement();
- }
-
- bool isDone() const
- {
- return m_vertex0It.isDone() && m_vertex1It.isDone() && m_edge == UINT32_MAX;
- }
-
- uint32_t edge() const
- {
- return m_edge;
- }
-
- private:
- bool resetElement()
- {
- m_edge = m_mesh->m_edgeMap.get(Mesh::EdgeKey(m_vertex0It.vertex(), m_vertex1It.vertex()));
- while (m_edge != UINT32_MAX) {
- if (!isIgnoredFace())
- break;
- m_edge = m_mesh->m_edgeMap.getNext(m_edge);
- }
- if (m_edge == UINT32_MAX) {
- return false;
- }
- return true;
- }
-
- void advanceElement()
- {
- for (;;) {
- m_edge = m_mesh->m_edgeMap.getNext(m_edge);
- if (m_edge == UINT32_MAX)
- break;
- if (!isIgnoredFace())
- break;
- }
- if (m_edge == UINT32_MAX)
- advanceVertex1();
- }
-
- void advanceVertex1()
- {
- auto successful = false;
- while (!successful) {
- m_vertex1It.advance();
- if (m_vertex1It.isDone()) {
- if (!m_vertex0It.isDone()) {
- m_vertex0It.advance();
- m_vertex1It = ColocalVertexIterator(m_mesh, m_vertex1);
- }
- else {
- return;
- }
- }
- successful = resetElement();
- }
- }
-
- bool isIgnoredFace() const
- {
- return m_mesh->m_faceIgnore[meshEdgeFace(m_edge)];
- }
-
- const Mesh *m_mesh;
- ColocalVertexIterator m_vertex0It, m_vertex1It;
- const uint32_t m_vertex1;
- uint32_t m_edge;
- };
-
- class FaceEdgeIterator
- {
- public:
- FaceEdgeIterator (const Mesh *mesh, uint32_t face) : m_mesh(mesh), m_face(face), m_relativeEdge(0)
- {
+ FaceEdgeIterator(const Mesh *mesh, uint32_t face) :
+ m_mesh(mesh), m_face(face), m_relativeEdge(0) {
m_edge = m_face * 3;
}
- void advance()
- {
+ void advance() {
if (m_relativeEdge < 3) {
m_edge++;
m_relativeEdge++;
}
}
- bool isDone() const
- {
+ bool isDone() const {
return m_relativeEdge == 3;
}
@@ -3152,9 +2672,8 @@ public:
uint32_t relativeEdge() const { return m_relativeEdge; }
uint32_t face() const { return m_face; }
uint32_t oppositeEdge() const { return m_mesh->m_oppositeEdges[m_edge]; }
-
- uint32_t oppositeFace() const
- {
+
+ uint32_t oppositeFace() const {
const uint32_t oedge = m_mesh->m_oppositeEdges[m_edge];
if (oedge == UINT32_MAX)
return UINT32_MAX;
@@ -3178,19 +2697,18 @@ public:
};
};
-struct MeshFaceGroups
-{
+struct MeshFaceGroups {
typedef uint32_t Handle;
static constexpr Handle kInvalid = UINT32_MAX;
- MeshFaceGroups(const Mesh *mesh) : m_mesh(mesh), m_groups(MemTag::Mesh), m_firstFace(MemTag::Mesh), m_nextFace(MemTag::Mesh), m_faceCount(MemTag::Mesh) {}
+ MeshFaceGroups(const Mesh *mesh) :
+ m_mesh(mesh), m_groups(MemTag::Mesh), m_firstFace(MemTag::Mesh), m_nextFace(MemTag::Mesh), m_faceCount(MemTag::Mesh) {}
XA_INLINE Handle groupAt(uint32_t face) const { return m_groups[face]; }
XA_INLINE uint32_t groupCount() const { return m_faceCount.size(); }
XA_INLINE uint32_t nextFace(uint32_t face) const { return m_nextFace[face]; }
XA_INLINE uint32_t faceCount(uint32_t group) const { return m_faceCount[group]; }
- void compute()
- {
+ void compute() {
m_groups.resize(m_mesh->faceCount());
m_groups.fillBytes(0xff); // Set all faces to kInvalid
uint32_t firstUnassignedFace = 0;
@@ -3222,57 +2740,25 @@ struct MeshFaceGroups
break;
const uint32_t f = growFaces.back();
growFaces.pop_back();
+ const uint32_t material = m_mesh->faceMaterial(f);
for (Mesh::FaceEdgeIterator edgeIt(m_mesh, f); !edgeIt.isDone(); edgeIt.advance()) {
- // Iterate opposite edges. There may be more than one - non-manifold geometry can have duplicate edges.
- // Prioritize the one with exact vertex match, not just colocal.
- // If *any* of the opposite edges are already assigned to this group, don't do anything.
- bool alreadyAssignedToThisGroup = false;
- uint32_t bestConnectedFace = UINT32_MAX;
- for (Mesh::ColocalEdgeIterator oppositeEdgeIt(m_mesh, edgeIt.vertex1(), edgeIt.vertex0()); !oppositeEdgeIt.isDone(); oppositeEdgeIt.advance()) {
- const uint32_t oppositeEdge = oppositeEdgeIt.edge();
- const uint32_t oppositeFace = meshEdgeFace(oppositeEdge);
-#if 0
- // Reject opposite face if dihedral angle >= 90 degrees.
- {
- Vector3 a = m_mesh->computeFaceNormal(f);
- Vector3 b = m_mesh->computeFaceNormal(oppositeFace);
- if (dot(a, b) <= 0.0f)
- continue;
- }
-#endif
- if (m_mesh->isFaceIgnored(oppositeFace))
- continue; // Don't add ignored faces to group.
- if (m_groups[oppositeFace] == group) {
- alreadyAssignedToThisGroup = true;
- break;
- }
- if (m_groups[oppositeFace] != kInvalid)
- continue; // Connected face is already assigned to another group.
- if (faceDuplicatesGroupEdge(group, oppositeFace))
- continue; // Don't want duplicate edges in a group.
- const uint32_t oppositeVertex0 = m_mesh->vertexAt(meshEdgeIndex0(oppositeEdge));
- const uint32_t oppositeVertex1 = m_mesh->vertexAt(meshEdgeIndex1(oppositeEdge));
- if (bestConnectedFace == UINT32_MAX || (oppositeVertex0 == edgeIt.vertex1() && oppositeVertex1 == edgeIt.vertex0()))
- bestConnectedFace = oppositeFace;
-#if 0
- else {
- // Choose the opposite face with the smallest dihedral angle.
- const float d1 = 1.0f - dot(computeFaceNormal(f), computeFaceNormal(bestConnectedFace));
- const float d2 = 1.0f - dot(computeFaceNormal(f), computeFaceNormal(oppositeFace));
- if (d2 < d1)
- bestConnectedFace = oppositeFace;
- }
-#endif
- }
- if (!alreadyAssignedToThisGroup && bestConnectedFace != UINT32_MAX) {
- m_groups[bestConnectedFace] = group;
- m_nextFace[bestConnectedFace] = UINT32_MAX;
- if (prevFace != UINT32_MAX)
- m_nextFace[prevFace] = bestConnectedFace;
- prevFace = bestConnectedFace;
- groupFaceCount++;
- growFaces.push_back(bestConnectedFace);
- }
+ const uint32_t oppositeEdge = m_mesh->findEdge(edgeIt.vertex1(), edgeIt.vertex0());
+ if (oppositeEdge == UINT32_MAX)
+ continue; // Boundary edge.
+ const uint32_t oppositeFace = meshEdgeFace(oppositeEdge);
+ if (m_mesh->isFaceIgnored(oppositeFace))
+ continue; // Don't add ignored faces to group.
+ if (m_mesh->faceMaterial(oppositeFace) != material)
+ continue; // Different material.
+ if (m_groups[oppositeFace] != kInvalid)
+ continue; // Connected face is already assigned to another group.
+ m_groups[oppositeFace] = group;
+ m_nextFace[oppositeFace] = UINT32_MAX;
+ if (prevFace != UINT32_MAX)
+ m_nextFace[prevFace] = oppositeFace;
+ prevFace = oppositeFace;
+ groupFaceCount++;
+ growFaces.push_back(oppositeFace);
}
}
m_faceCount.push_back(groupFaceCount);
@@ -3281,27 +2767,23 @@ struct MeshFaceGroups
}
}
- class Iterator
- {
+ class Iterator {
public:
- Iterator(const MeshFaceGroups *meshFaceGroups, Handle group) : m_meshFaceGroups(meshFaceGroups)
- {
+ Iterator(const MeshFaceGroups *meshFaceGroups, Handle group) :
+ m_meshFaceGroups(meshFaceGroups) {
XA_DEBUG_ASSERT(group != kInvalid);
m_current = m_meshFaceGroups->m_firstFace[group];
}
- void advance()
- {
+ void advance() {
m_current = m_meshFaceGroups->m_nextFace[m_current];
}
- bool isDone() const
- {
+ bool isDone() const {
return m_current == UINT32_MAX;
}
- uint32_t face() const
- {
+ uint32_t face() const {
return m_current;
}
@@ -3311,18 +2793,6 @@ struct MeshFaceGroups
};
private:
- // Check if the face duplicates any edges of any face already in the group.
- bool faceDuplicatesGroupEdge(Handle group, uint32_t face) const
- {
- for (Mesh::FaceEdgeIterator edgeIt(m_mesh, face); !edgeIt.isDone(); edgeIt.advance()) {
- for (Mesh::ColocalEdgeIterator colocalEdgeIt(m_mesh, edgeIt.vertex0(), edgeIt.vertex1()); !colocalEdgeIt.isDone(); colocalEdgeIt.advance()) {
- if (m_groups[meshEdgeFace(colocalEdgeIt.edge())] == group)
- return true;
- }
- }
- return false;
- }
-
const Mesh *m_mesh;
Array<Handle> m_groups;
Array<uint32_t> m_firstFace;
@@ -3332,243 +2802,27 @@ private:
constexpr MeshFaceGroups::Handle MeshFaceGroups::kInvalid;
-static bool meshCloseHole(Mesh *mesh, const Array<uint32_t> &holeVertices, const Vector3 &normal)
-{
-#if XA_CLOSE_HOLES_CHECK_EDGE_INTERSECTION
- const uint32_t faceCount = mesh->faceCount();
-#endif
- const bool compareNormal = equal(normal, Vector3(0.0f), FLT_EPSILON);
- uint32_t frontCount = holeVertices.size();
- Array<uint32_t> frontVertices;
- Array<Vector3> frontPoints;
- Array<float> frontAngles;
- frontVertices.resize(frontCount);
- frontPoints.resize(frontCount);
- for (uint32_t i = 0; i < frontCount; i++) {
- frontVertices[i] = holeVertices[i];
- frontPoints[i] = mesh->position(frontVertices[i]);
- }
- while (frontCount >= 3) {
- frontAngles.resize(frontCount);
- float smallestAngle = kPi2, smallestAngleIgnoringNormal = kPi2;
- uint32_t smallestAngleIndex = UINT32_MAX, smallestAngleIndexIgnoringNormal = UINT32_MAX;
- for (uint32_t i = 0; i < frontCount; i++) {
- const uint32_t i1 = i == 0 ? frontCount - 1 : i - 1;
- const uint32_t i2 = i;
- const uint32_t i3 = (i + 1) % frontCount;
- const Vector3 edge1 = frontPoints[i1] - frontPoints[i2];
- const Vector3 edge2 = frontPoints[i3] - frontPoints[i2];
- frontAngles[i] = atan2f(length(cross(edge1, edge2)), dot(edge1, edge2));
- if (frontAngles[i] >= smallestAngle || isNan(frontAngles[i]))
- continue;
- // Don't duplicate edges.
- if (mesh->findEdge(frontVertices[i1], frontVertices[i2]) != UINT32_MAX)
- continue;
- if (mesh->findEdge(frontVertices[i2], frontVertices[i3]) != UINT32_MAX)
- continue;
- if (mesh->findEdge(frontVertices[i3], frontVertices[i1]) != UINT32_MAX)
- continue;
- /*
- Make sure he new edge that would be formed by (i3, i1) doesn't intersect any vertices. This often happens when fixing t-junctions.
-
- i2
- *
- / \
- / \
- i1 *--*--* i3
- \ | /
- \|/
- *
- */
- bool intersection = false;
- for (uint32_t j = 0; j < frontCount; j++) {
- if (j == i1 || j == i2 || j == i3)
- continue;
- if (lineIntersectsPoint(frontPoints[j], frontPoints[i3], frontPoints[i1], nullptr, mesh->epsilon())) {
- intersection = true;
- break;
- }
- }
- if (intersection)
- continue;
- // Don't add the triangle if a boundary point lies on the same plane as the triangle, and is inside it.
- intersection = false;
- const Plane plane(frontPoints[i1], frontPoints[i2], frontPoints[i3]);
- for (uint32_t j = 0; j < frontCount; j++) {
- if (j == i1 || j == i2 || j == i3)
- continue;
- if (!isZero(plane.distance(frontPoints[j]), mesh->epsilon()))
- continue;
- if (pointInTriangle(frontPoints[j], frontPoints[i1], frontPoints[i2], frontPoints[i3])) {
- intersection = true;
- break;
- }
- }
- if (intersection)
- continue;
-#if XA_CLOSE_HOLES_CHECK_EDGE_INTERSECTION
- // Don't add the triangle if the new edge (i3, i1), intersects any other triangle that isn't part of the filled hole.
- intersection = false;
- const Vector3 newEdgeVector = frontPoints[i1] - frontPoints[i3];
- for (uint32_t f = 0; f < faceCount; f++) {
- Vector3 tri[3];
- for (uint32_t j = 0; j < 3; j++)
- tri[j] = mesh->position(mesh->vertexAt(f * 3 + j));
- float t;
- if (rayIntersectsTriangle(frontPoints[i3], newEdgeVector, tri, &t)) {
- intersection = true;
- break;
- }
- }
- if (intersection)
- continue;
-#endif
- // Skip backwards facing triangles.
- if (compareNormal) {
- if (frontAngles[i] < smallestAngleIgnoringNormal) {
- smallestAngleIgnoringNormal = frontAngles[i];
- smallestAngleIndexIgnoringNormal = i;
- }
- const Vector3 e0 = frontPoints[i3] - frontPoints[i1];
- const Vector3 e1 = frontPoints[i2] - frontPoints[i1];
- const Vector3 triNormal = normalizeSafe(cross(e0, e1), Vector3(0.0f), mesh->epsilon());
- if (dot(normal, triNormal) <= 0.0f)
- continue;
- }
- smallestAngle = smallestAngleIgnoringNormal = frontAngles[i];
- smallestAngleIndex = smallestAngleIndexIgnoringNormal = i;
- }
- // Closing holes failed if we don't have a smallest angle.
- // Fallback to ignoring the backwards facing normal test if possible.
- if (smallestAngleIndex == UINT32_MAX || smallestAngle <= 0.0f || smallestAngle >= kPi) {
- if (smallestAngleIgnoringNormal == UINT32_MAX || smallestAngleIgnoringNormal <= 0.0f || smallestAngleIgnoringNormal >= kPi)
- return false;
- else
- smallestAngleIndex = smallestAngleIndexIgnoringNormal;
- }
- const uint32_t i1 = smallestAngleIndex == 0 ? frontCount - 1 : smallestAngleIndex - 1;
- const uint32_t i2 = smallestAngleIndex;
- const uint32_t i3 = (smallestAngleIndex + 1) % frontCount;
- const Mesh::AddFaceResult::Enum result = mesh->addFace(frontVertices[i1], frontVertices[i2], frontVertices[i3]);
- XA_DEBUG_ASSERT(result == Mesh::AddFaceResult::OK); // Shouldn't happen due to the findEdge calls above.
- XA_UNUSED(result);
- frontVertices.removeAt(i2);
- frontPoints.removeAt(i2);
- frontCount = frontVertices.size();
- }
- return true;
-}
-
-static bool meshCloseHoles(Mesh *mesh, const Array<uint32_t> &boundaryLoops, const Vector3 &normal, uint32_t *holeCount, Array<uint32_t> *holeFaceCounts)
-{
- if (holeFaceCounts)
- holeFaceCounts->clear();
- // Compute lengths.
- const uint32_t boundaryCount = boundaryLoops.size();
- Array<float> boundaryLengths;
- Array<uint32_t> boundaryEdgeCounts;
- boundaryEdgeCounts.resize(boundaryCount);
- for (uint32_t i = 0; i < boundaryCount; i++) {
- float boundaryLength = 0.0f;
- boundaryEdgeCounts[i] = 0;
- for (Mesh::BoundaryLoopEdgeIterator it(mesh, boundaryLoops[i]); !it.isDone(); it.advance()) {
- const Vector3 &t0 = mesh->position(mesh->vertexAt(meshEdgeIndex0(it.edge())));
- const Vector3 &t1 = mesh->position(mesh->vertexAt(meshEdgeIndex1(it.edge())));
- boundaryLength += length(t1 - t0);
- boundaryEdgeCounts[i]++;
- }
- boundaryLengths.push_back(boundaryLength);
- }
- // Find disk boundary.
- uint32_t diskBoundary = 0;
- float maxLength = boundaryLengths[0];
- for (uint32_t i = 1; i < boundaryCount; i++) {
- if (boundaryLengths[i] > maxLength) {
- maxLength = boundaryLengths[i];
- diskBoundary = i;
- }
- }
- // Close holes.
- Array<uint32_t> holeVertices;
- Array<Vector3> holePoints;
- bool result = true;
- for (uint32_t i = 0; i < boundaryCount; i++) {
- if (diskBoundary == i)
- continue; // Skip disk boundary.
- holeVertices.resize(boundaryEdgeCounts[i]);
- holePoints.resize(boundaryEdgeCounts[i]);
- // Winding is backwards for internal boundaries.
- uint32_t e = 0;
- for (Mesh::BoundaryLoopEdgeIterator it(mesh, boundaryLoops[i]); !it.isDone(); it.advance()) {
- const uint32_t vertex = mesh->vertexAt(meshEdgeIndex0(it.edge()));
- holeVertices[boundaryEdgeCounts[i] - 1 - e] = vertex;
- holePoints[boundaryEdgeCounts[i] - 1 - e] = mesh->position(vertex);
- e++;
- }
- const uint32_t oldFaceCount = mesh->faceCount();
- if (!meshCloseHole(mesh, holeVertices, normal))
- result = false; // Return false if any hole failed to close, but keep trying to close other holes.
- if (holeCount)
- (*holeCount)++;
- if (holeFaceCounts)
- holeFaceCounts->push_back(mesh->faceCount() - oldFaceCount);
- }
- return result;
-}
-
-static bool meshIsPlanar(const Mesh &mesh)
-{
- const Vector3 p1 = mesh.position(mesh.vertexAt(0));
- const Vector3 p2 = mesh.position(mesh.vertexAt(1));
- const Vector3 p3 = mesh.position(mesh.vertexAt(2));
- const Plane plane(p1, p2, p3);
- const uint32_t vertexCount = mesh.vertexCount();
- for (uint32_t v = 0; v < vertexCount; v++) {
- const float d = plane.distance(mesh.position(v));
- if (!isZero(d, mesh.epsilon()))
- return false;
- }
- return true;
-}
-
-/*
-Fixing T-junctions.
-
-- Find T-junctions. Find vertices that are on an edge.
-- This test is approximate.
-- Insert edges on a spatial index to speedup queries.
-- Consider only open edges, that is edges that have no pairs.
-- Consider only vertices on boundaries.
-- Close T-junction.
-- Split edge.
-
-*/
-struct SplitEdge
-{
- uint32_t edge;
- float t;
- uint32_t vertex;
-
- bool operator<(const SplitEdge &other) const
- {
- if (edge < other.edge)
- return true;
- else if (edge == other.edge) {
- if (t < other.t)
- return true;
- }
+#if XA_CHECK_T_JUNCTIONS
+static bool lineIntersectsPoint(const Vector3 &point, const Vector3 &lineStart, const Vector3 &lineEnd, float *t, float epsilon) {
+ float tt;
+ if (!t)
+ t = &tt;
+ *t = 0.0f;
+ if (equal(lineStart, point, epsilon) || equal(lineEnd, point, epsilon))
+ return false; // Vertex lies on either line vertices.
+ const Vector3 v01 = point - lineStart;
+ const Vector3 v21 = lineEnd - lineStart;
+ const float l = length(v21);
+ const float d = length(cross(v01, v21)) / l;
+ if (!isZero(d, epsilon))
return false;
- }
-};
+ *t = dot(v01, v21) / (l * l);
+ return *t > kEpsilon && *t < 1.0f - kEpsilon;
+}
-// Returns nullptr if there were no t-junctions to fix.
-static Mesh *meshFixTJunctions(const Mesh &inputMesh, bool *duplicatedEdge, bool *failed, uint32_t *fixedTJunctionsCount)
-{
- if (duplicatedEdge)
- *duplicatedEdge = false;
- if (failed)
- *failed = false;
- Array<SplitEdge> splitEdges;
+// Returns the number of T-junctions found.
+static int meshCheckTJunctions(const Mesh &inputMesh) {
+ int count = 0;
const uint32_t vertexCount = inputMesh.vertexCount();
const uint32_t edgeCount = inputMesh.edgeCount();
for (uint32_t v = 0; v < vertexCount; v++) {
@@ -3582,155 +2836,130 @@ static Mesh *meshFixTJunctions(const Mesh &inputMesh, bool *duplicatedEdge, bool
const Vector3 &edgePos1 = inputMesh.position(inputMesh.vertexAt(meshEdgeIndex0(e)));
const Vector3 &edgePos2 = inputMesh.position(inputMesh.vertexAt(meshEdgeIndex1(e)));
float t;
- if (!lineIntersectsPoint(pos, edgePos1, edgePos2, &t, inputMesh.epsilon()))
- continue;
- SplitEdge splitEdge;
- splitEdge.edge = e;
- splitEdge.t = t;
- splitEdge.vertex = v;
- splitEdges.push_back(splitEdge);
+ if (lineIntersectsPoint(pos, edgePos1, edgePos2, &t, inputMesh.epsilon()))
+ count++;
}
}
- if (splitEdges.isEmpty())
- return nullptr;
- const uint32_t faceCount = inputMesh.faceCount();
- Mesh *mesh = XA_NEW_ARGS(MemTag::Mesh, Mesh, inputMesh.epsilon(), vertexCount + splitEdges.size(), faceCount);
- for (uint32_t v = 0; v < vertexCount; v++)
- mesh->addVertex(inputMesh.position(v));
- Array<uint32_t> indexArray;
- indexArray.reserve(4);
- Array<SplitEdge> faceSplitEdges;
- faceSplitEdges.reserve(4);
- for (uint32_t f = 0; f < faceCount; f++) {
- // Find t-junctions in this face.
- faceSplitEdges.clear();
- for (uint32_t i = 0; i < splitEdges.size(); i++) {
- if (meshEdgeFace(splitEdges[i].edge) == f)
- faceSplitEdges.push_back(splitEdges[i]);
- }
- if (!faceSplitEdges.isEmpty()) {
- // Need to split edges in winding order when a single edge has multiple t-junctions.
- insertionSort(faceSplitEdges.data(), faceSplitEdges.size());
- indexArray.clear();
- for (Mesh::FaceEdgeIterator it(&inputMesh, f); !it.isDone(); it.advance()) {
- indexArray.push_back(it.vertex0());
- for (uint32_t se = 0; se < faceSplitEdges.size(); se++) {
- const SplitEdge &splitEdge = faceSplitEdges[se];
- if (splitEdge.edge == it.edge())
- indexArray.push_back(splitEdge.vertex);
+ return count;
+}
+#endif
+
+// References invalid faces and vertices in a mesh.
+struct InvalidMeshGeometry {
+ // If meshFaceGroups is not null, invalid faces have the face group MeshFaceGroups::kInvalid.
+ // If meshFaceGroups is null, invalid faces are Mesh::isFaceIgnored.
+ void extract(const Mesh *mesh, const MeshFaceGroups *meshFaceGroups) {
+ // Copy invalid faces.
+ m_faces.clear();
+ const uint32_t meshFaceCount = mesh->faceCount();
+ for (uint32_t f = 0; f < meshFaceCount; f++) {
+ if ((meshFaceGroups && meshFaceGroups->groupAt(f) == MeshFaceGroups::kInvalid) || (!meshFaceGroups && mesh->isFaceIgnored(f)))
+ m_faces.push_back(f);
+ }
+ // Create *unique* list of vertices of invalid faces.
+ const uint32_t faceCount = m_faces.size();
+ m_indices.resize(faceCount * 3);
+ const uint32_t approxVertexCount = min(faceCount * 3, mesh->vertexCount());
+ m_vertexToSourceVertexMap.clear();
+ m_vertexToSourceVertexMap.reserve(approxVertexCount);
+ HashMap<uint32_t, PassthroughHash<uint32_t>> sourceVertexToVertexMap(MemTag::Mesh, approxVertexCount);
+ for (uint32_t f = 0; f < faceCount; f++) {
+ const uint32_t face = m_faces[f];
+ for (uint32_t i = 0; i < 3; i++) {
+ const uint32_t vertex = mesh->vertexAt(face * 3 + i);
+ uint32_t newVertex = sourceVertexToVertexMap.get(vertex);
+ if (newVertex == UINT32_MAX) {
+ newVertex = sourceVertexToVertexMap.add(vertex);
+ m_vertexToSourceVertexMap.push_back(vertex);
}
- }
- if (!meshCloseHole(mesh, indexArray, Vector3(0.0f))) {
- if (failed)
- *failed = true;
- }
- } else {
- // No t-junctions in this face. Copy from input mesh.
- if (mesh->addFace(&inputMesh.indices()[f * 3]) == Mesh::AddFaceResult::DuplicateEdge) {
- if (duplicatedEdge)
- *duplicatedEdge = true;
+ m_indices[f * 3 + i] = newVertex;
}
}
}
- if (fixedTJunctionsCount)
- *fixedTJunctionsCount = splitEdges.size();
- return mesh;
-}
-// boundaryLoops are the first edges for each boundary loop.
-static void meshGetBoundaryLoops(const Mesh &mesh, Array<uint32_t> &boundaryLoops)
-{
- const uint32_t edgeCount = mesh.edgeCount();
- BitArray bitFlags(edgeCount);
- bitFlags.zeroOutMemory();
- boundaryLoops.clear();
- // Search for boundary edges. Mark all the edges that belong to the same boundary.
- for (uint32_t e = 0; e < edgeCount; e++) {
- if (bitFlags.get(e) || !mesh.isBoundaryEdge(e))
- continue;
- for (Mesh::BoundaryLoopEdgeIterator it(&mesh, e); !it.isDone(); it.advance())
- bitFlags.set(it.edge());
- boundaryLoops.push_back(e);
- }
-}
+ ConstArrayView<uint32_t> faces() const { return m_faces; }
+ ConstArrayView<uint32_t> indices() const { return m_indices; }
+ ConstArrayView<uint32_t> vertices() const { return m_vertexToSourceVertexMap; }
-struct Progress
-{
- Progress(ProgressCategory::Enum category, ProgressFunc func, void *userData, uint32_t maxValue) : value(0), cancel(false), m_category(category), m_func(func), m_userData(userData), m_maxValue(maxValue), m_progress(0)
- {
+private:
+ Array<uint32_t> m_faces, m_indices;
+ Array<uint32_t> m_vertexToSourceVertexMap; // Map face vertices to vertices of the source mesh.
+};
+
+struct Progress {
+ Progress(ProgressCategory category, ProgressFunc func, void *userData, uint32_t maxValue) :
+ cancel(false), m_category(category), m_func(func), m_userData(userData), m_value(0), m_maxValue(maxValue), m_percent(0) {
if (m_func) {
if (!m_func(category, 0, userData))
cancel = true;
}
}
- ~Progress()
- {
+ ~Progress() {
if (m_func) {
if (!m_func(m_category, 100, m_userData))
cancel = true;
}
}
- void update()
- {
- if (!m_func)
- return;
- m_mutex.lock();
- const uint32_t newProgress = uint32_t(ceilf(value.load() / (float)m_maxValue * 100.0f));
- if (newProgress != m_progress && newProgress < 100) {
- m_progress = newProgress;
- if (!m_func(m_category, m_progress, m_userData))
- cancel = true;
- }
- m_mutex.unlock();
+ void increment(uint32_t value) {
+ m_value += value;
+ update();
}
- void setMaxValue(uint32_t maxValue)
- {
- m_mutex.lock();
+ void setMaxValue(uint32_t maxValue) {
m_maxValue = maxValue;
- m_mutex.unlock();
+ update();
}
- std::atomic<uint32_t> value;
std::atomic<bool> cancel;
private:
- ProgressCategory::Enum m_category;
+ void update() {
+ if (!m_func)
+ return;
+ const uint32_t newPercent = uint32_t(ceilf(m_value.load() / (float)m_maxValue.load() * 100.0f));
+ if (newPercent != m_percent) {
+ // Atomic max.
+ uint32_t oldPercent = m_percent;
+ while (oldPercent < newPercent && !m_percent.compare_exchange_weak(oldPercent, newPercent)) {
+ }
+ if (!m_func(m_category, m_percent, m_userData))
+ cancel = true;
+ }
+ }
+
+ ProgressCategory m_category;
ProgressFunc m_func;
void *m_userData;
- uint32_t m_maxValue;
- uint32_t m_progress;
- std::mutex m_mutex;
+ std::atomic<uint32_t> m_value, m_maxValue, m_percent;
};
-struct Spinlock
-{
- void lock() { while(m_lock.test_and_set(std::memory_order_acquire)) {} }
+struct Spinlock {
+ void lock() {
+ while (m_lock.test_and_set(std::memory_order_acquire)) {
+ }
+ }
void unlock() { m_lock.clear(std::memory_order_release); }
private:
std::atomic_flag m_lock = ATOMIC_FLAG_INIT;
};
-struct TaskGroupHandle
-{
+struct TaskGroupHandle {
uint32_t value = UINT32_MAX;
};
-struct Task
-{
- void (*func)(void *userData);
- void *userData;
+struct Task {
+ void (*func)(void *groupUserData, void *taskUserData);
+ void *userData; // Passed to func as taskUserData.
};
#if XA_MULTITHREADED
-class TaskScheduler
-{
+class TaskScheduler {
public:
- TaskScheduler() : m_shutdown(false)
- {
+ TaskScheduler() :
+ m_shutdown(false) {
m_threadIndex = 0;
// Max with current task scheduler usage is 1 per thread + 1 deep nesting, but allow for some slop.
m_maxGroups = std::thread::hardware_concurrency() * 4;
@@ -3739,6 +2968,7 @@ public:
new (&m_groups[i]) TaskGroup();
m_groups[i].free = true;
m_groups[i].ref = 0;
+ m_groups[i].userData = nullptr;
}
m_workers.resize(std::thread::hardware_concurrency() <= 1 ? 1 : std::thread::hardware_concurrency() - 1);
for (uint32_t i = 0; i < m_workers.size(); i++) {
@@ -3748,8 +2978,7 @@ public:
}
}
- ~TaskScheduler()
- {
+ ~TaskScheduler() {
m_shutdown = true;
for (uint32_t i = 0; i < m_workers.size(); i++) {
Worker &worker = m_workers[i];
@@ -3767,13 +2996,12 @@ public:
XA_FREE(m_groups);
}
- uint32_t threadCount() const
- {
+ uint32_t threadCount() const {
return max(1u, std::thread::hardware_concurrency()); // Including the main thread.
}
- TaskGroupHandle createTaskGroup(uint32_t reserveSize = 0)
- {
+ // userData is passed to Task::func as groupUserData.
+ TaskGroupHandle createTaskGroup(void *userData = nullptr, uint32_t reserveSize = 0) {
// Claim the first free group.
for (uint32_t i = 0; i < m_maxGroups; i++) {
TaskGroup &group = m_groups[i];
@@ -3785,6 +3013,8 @@ public:
group.queue.clear();
group.queue.reserve(reserveSize);
group.queueLock.unlock();
+ group.userData = userData;
+ group.ref = 0;
TaskGroupHandle handle;
handle.value = i;
return handle;
@@ -3795,8 +3025,7 @@ public:
return handle;
}
- void run(TaskGroupHandle handle, const Task &task)
- {
+ void run(TaskGroupHandle handle, const Task &task) {
XA_DEBUG_ASSERT(handle.value != UINT32_MAX);
TaskGroup &group = m_groups[handle.value];
group.queueLock.lock();
@@ -3810,8 +3039,7 @@ public:
}
}
- void wait(TaskGroupHandle *handle)
- {
+ void wait(TaskGroupHandle *handle) {
if (handle->value == UINT32_MAX) {
XA_DEBUG_ASSERT(false);
return;
@@ -3826,7 +3054,7 @@ public:
group.queueLock.unlock();
if (!task)
break;
- task->func(task->userData);
+ task->func(group.userData, task->userData);
group.ref--;
}
// Even though the task queue is empty, workers can still be running tasks.
@@ -3839,17 +3067,16 @@ public:
static uint32_t currentThreadIndex() { return m_threadIndex; }
private:
- struct TaskGroup
- {
+ struct TaskGroup {
std::atomic<bool> free;
Array<Task> queue; // Items are never removed. queueHead is incremented to pop items.
uint32_t queueHead = 0;
Spinlock queueLock;
std::atomic<uint32_t> ref; // Increment when a task is enqueued, decrement when a task finishes.
+ void *userData;
};
- struct Worker
- {
+ struct Worker {
std::thread *thread = nullptr;
std::mutex mutex;
std::condition_variable cv;
@@ -3862,12 +3089,11 @@ private:
uint32_t m_maxGroups;
static thread_local uint32_t m_threadIndex;
- static void workerThread(TaskScheduler *scheduler, Worker *worker, uint32_t threadIndex)
- {
+ static void workerThread(TaskScheduler *scheduler, Worker *worker, uint32_t threadIndex) {
m_threadIndex = threadIndex;
std::unique_lock<std::mutex> lock(worker->mutex);
for (;;) {
- worker->cv.wait(lock, [=]{ return worker->wakeup.load(); });
+ worker->cv.wait(lock, [=] { return worker->wakeup.load(); });
worker->wakeup = false;
for (;;) {
if (scheduler->m_shutdown)
@@ -3889,7 +3115,7 @@ private:
}
if (!task)
break;
- task->func(task->userData);
+ task->func(group->userData, task->userData);
group->ref--;
}
}
@@ -3898,44 +3124,39 @@ private:
thread_local uint32_t TaskScheduler::m_threadIndex;
#else
-class TaskScheduler
-{
+class TaskScheduler {
public:
- ~TaskScheduler()
- {
+ ~TaskScheduler() {
for (uint32_t i = 0; i < m_groups.size(); i++)
destroyGroup({ i });
}
- uint32_t threadCount() const
- {
+ uint32_t threadCount() const {
return 1;
}
- TaskGroupHandle createTaskGroup(uint32_t reserveSize = 0)
- {
+ TaskGroupHandle createTaskGroup(void *userData = nullptr, uint32_t reserveSize = 0) {
TaskGroup *group = XA_NEW(MemTag::Default, TaskGroup);
group->queue.reserve(reserveSize);
+ group->userData = userData;
m_groups.push_back(group);
TaskGroupHandle handle;
handle.value = m_groups.size() - 1;
return handle;
}
- void run(TaskGroupHandle handle, Task task)
- {
+ void run(TaskGroupHandle handle, Task task) {
m_groups[handle.value]->queue.push_back(task);
}
- void wait(TaskGroupHandle *handle)
- {
+ void wait(TaskGroupHandle *handle) {
if (handle->value == UINT32_MAX) {
XA_DEBUG_ASSERT(false);
return;
}
TaskGroup *group = m_groups[handle->value];
for (uint32_t i = 0; i < group->queue.size(); i++)
- group->queue[i].func(group->queue[i].userData);
+ group->queue[i].func(group->userData, group->queue[i].userData);
group->queue.clear();
destroyGroup(*handle);
handle->value = UINT32_MAX;
@@ -3944,8 +3165,7 @@ public:
static uint32_t currentThreadIndex() { return 0; }
private:
- void destroyGroup(TaskGroupHandle handle)
- {
+ void destroyGroup(TaskGroupHandle handle) {
TaskGroup *group = m_groups[handle.value];
if (group) {
group->~TaskGroup();
@@ -3954,9 +3174,9 @@ private:
}
}
- struct TaskGroup
- {
+ struct TaskGroup {
Array<Task> queue;
+ void *userData;
};
Array<TaskGroup *> m_groups;
@@ -3968,8 +3188,7 @@ const uint8_t TGA_TYPE_RGB = 2;
const uint8_t TGA_ORIGIN_UPPER = 0x20;
#pragma pack(push, 1)
-struct TgaHeader
-{
+struct TgaHeader {
uint8_t id_length;
uint8_t colormap_type;
uint8_t image_type;
@@ -3986,8 +3205,7 @@ struct TgaHeader
};
#pragma pack(pop)
-static void WriteTga(const char *filename, const uint8_t *data, uint32_t width, uint32_t height)
-{
+static void WriteTga(const char *filename, const uint8_t *data, uint32_t width, uint32_t height) {
XA_DEBUG_ASSERT(sizeof(TgaHeader) == TgaHeader::Size);
FILE *f;
XA_FOPEN(f, filename, "wb");
@@ -4012,12 +3230,10 @@ static void WriteTga(const char *filename, const uint8_t *data, uint32_t width,
}
#endif
-template<typename T>
-class ThreadLocal
-{
+template <typename T>
+class ThreadLocal {
public:
- ThreadLocal()
- {
+ ThreadLocal() {
#if XA_MULTITHREADED
const uint32_t n = std::thread::hardware_concurrency();
#else
@@ -4028,8 +3244,7 @@ public:
new (&m_array[i]) T;
}
- ~ThreadLocal()
- {
+ ~ThreadLocal() {
#if XA_MULTITHREADED
const uint32_t n = std::thread::hardware_concurrency();
#else
@@ -4040,8 +3255,7 @@ public:
XA_FREE(m_array);
}
- T &get() const
- {
+ T &get() const {
return m_array[TaskScheduler::currentThreadIndex()];
}
@@ -4049,11 +3263,104 @@ private:
T *m_array;
};
-class UniformGrid2
-{
+// Implemented as a struct so the temporary arrays can be reused.
+struct Triangulator {
+ // This is doing a simple ear-clipping algorithm that skips invalid triangles. Ideally, we should
+ // also sort the ears by angle, start with the ones that have the smallest angle and proceed in order.
+ void triangulatePolygon(ConstArrayView<Vector3> vertices, ConstArrayView<uint32_t> inputIndices, Array<uint32_t> &outputIndices) {
+ m_polygonVertices.clear();
+ m_polygonVertices.reserve(inputIndices.length);
+ outputIndices.clear();
+ if (inputIndices.length == 3) {
+ // Simple case for triangles.
+ outputIndices.push_back(inputIndices[0]);
+ outputIndices.push_back(inputIndices[1]);
+ outputIndices.push_back(inputIndices[2]);
+ } else {
+ // Build 2D polygon projecting vertices onto normal plane.
+ // Faces are not necesarily planar, this is for example the case, when the face comes from filling a hole. In such cases
+ // it's much better to use the best fit plane.
+ Basis basis;
+ basis.normal = normalize(cross(vertices[inputIndices[1]] - vertices[inputIndices[0]], vertices[inputIndices[2]] - vertices[inputIndices[1]]));
+ basis.tangent = basis.computeTangent(basis.normal);
+ basis.bitangent = basis.computeBitangent(basis.normal, basis.tangent);
+ const uint32_t edgeCount = inputIndices.length;
+ m_polygonPoints.clear();
+ m_polygonPoints.reserve(edgeCount);
+ m_polygonAngles.clear();
+ m_polygonAngles.reserve(edgeCount);
+ for (uint32_t i = 0; i < inputIndices.length; i++) {
+ m_polygonVertices.push_back(inputIndices[i]);
+ const Vector3 &pos = vertices[inputIndices[i]];
+ m_polygonPoints.push_back(Vector2(dot(basis.tangent, pos), dot(basis.bitangent, pos)));
+ }
+ m_polygonAngles.resize(edgeCount);
+ while (m_polygonVertices.size() > 2) {
+ const uint32_t size = m_polygonVertices.size();
+ // Update polygon angles. @@ Update only those that have changed.
+ float minAngle = kPi2;
+ uint32_t bestEar = 0; // Use first one if none of them is valid.
+ bool bestIsValid = false;
+ for (uint32_t i = 0; i < size; i++) {
+ uint32_t i0 = i;
+ uint32_t i1 = (i + 1) % size; // Use Sean's polygon interation trick.
+ uint32_t i2 = (i + 2) % size;
+ Vector2 p0 = m_polygonPoints[i0];
+ Vector2 p1 = m_polygonPoints[i1];
+ Vector2 p2 = m_polygonPoints[i2];
+ float d = clamp(dot(p0 - p1, p2 - p1) / (length(p0 - p1) * length(p2 - p1)), -1.0f, 1.0f);
+ float angle = acosf(d);
+ float area = triangleArea(p0, p1, p2);
+ if (area < 0.0f)
+ angle = kPi2 - angle;
+ m_polygonAngles[i1] = angle;
+ if (angle < minAngle || !bestIsValid) {
+ // Make sure this is a valid ear, if not, skip this point.
+ bool valid = true;
+ for (uint32_t j = 0; j < size; j++) {
+ if (j == i0 || j == i1 || j == i2)
+ continue;
+ Vector2 p = m_polygonPoints[j];
+ if (pointInTriangle(p, p0, p1, p2)) {
+ valid = false;
+ break;
+ }
+ }
+ if (valid || !bestIsValid) {
+ minAngle = angle;
+ bestEar = i1;
+ bestIsValid = valid;
+ }
+ }
+ }
+ // Clip best ear:
+ const uint32_t i0 = (bestEar + size - 1) % size;
+ const uint32_t i1 = (bestEar + 0) % size;
+ const uint32_t i2 = (bestEar + 1) % size;
+ outputIndices.push_back(m_polygonVertices[i0]);
+ outputIndices.push_back(m_polygonVertices[i1]);
+ outputIndices.push_back(m_polygonVertices[i2]);
+ m_polygonVertices.removeAt(i1);
+ m_polygonPoints.removeAt(i1);
+ m_polygonAngles.removeAt(i1);
+ }
+ }
+ }
+
+private:
+ static bool pointInTriangle(const Vector2 &p, const Vector2 &a, const Vector2 &b, const Vector2 &c) {
+ return triangleArea(a, b, p) >= kAreaEpsilon && triangleArea(b, c, p) >= kAreaEpsilon && triangleArea(c, a, p) >= kAreaEpsilon;
+ }
+
+ Array<int> m_polygonVertices;
+ Array<float> m_polygonAngles;
+ Array<Vector2> m_polygonPoints;
+};
+
+class UniformGrid2 {
public:
- void reset(const Vector2 *positions, const uint32_t *indices = nullptr, uint32_t reserveEdgeCount = 0)
- {
+ // indices are optional.
+ void reset(ConstArrayView<Vector2> positions, ConstArrayView<uint32_t> indices = ConstArrayView<uint32_t>(), uint32_t reserveEdgeCount = 0) {
m_edges.clear();
if (reserveEdgeCount > 0)
m_edges.reserve(reserveEdgeCount);
@@ -4062,14 +3369,12 @@ public:
m_cellDataOffsets.clear();
}
- void append(uint32_t edge)
- {
+ void append(uint32_t edge) {
XA_DEBUG_ASSERT(m_cellDataOffsets.isEmpty());
m_edges.push_back(edge);
}
- bool intersect(Vector2 v1, Vector2 v2, float epsilon)
- {
+ bool intersect(Vector2 v1, Vector2 v2, float epsilon) {
const uint32_t edgeCount = m_edges.size();
bool bruteForce = edgeCount <= 20;
if (!bruteForce && m_cellDataOffsets.isEmpty())
@@ -4096,8 +3401,7 @@ public:
}
// If edges is empty, checks for intersection with all edges in the grid.
- bool intersect(float epsilon, ConstArrayView<uint32_t> edges = ConstArrayView<uint32_t>(), ConstArrayView<uint32_t> ignoreEdges = ConstArrayView<uint32_t>())
- {
+ bool intersect(float epsilon, ConstArrayView<uint32_t> edges = ConstArrayView<uint32_t>(), ConstArrayView<uint32_t> ignoreEdges = ConstArrayView<uint32_t>()) {
bool bruteForce = m_edges.size() <= 20;
if (!bruteForce && m_cellDataOffsets.isEmpty())
bruteForce = !createGrid();
@@ -4167,8 +3471,7 @@ public:
}
#if XA_DEBUG_EXPORT_BOUNDARY_GRID
- void debugExport(const char *filename)
- {
+ void debugExport(const char *filename) {
Array<uint8_t> image;
image.resize(m_gridWidth * m_gridHeight * 3);
for (uint32_t y = 0; y < m_gridHeight; y++) {
@@ -4190,8 +3493,7 @@ public:
#endif
private:
- bool createGrid()
- {
+ bool createGrid() {
// Compute edge extents. Min will be the grid origin.
const uint32_t edgeCount = m_edges.size();
Extents2 edgeExtents;
@@ -4202,14 +3504,14 @@ private:
edgeExtents.add(edgePosition1(edge));
}
m_gridOrigin = edgeExtents.min;
- // Size grid to approximately one edge per cell.
+ // Size grid to approximately one edge per cell in the largest dimension.
const Vector2 extentsSize(edgeExtents.max - edgeExtents.min);
- m_cellSize = min(extentsSize.x, extentsSize.y) / sqrtf((float)edgeCount);
+ m_cellSize = max(extentsSize.x, extentsSize.y) / (float)clamp(edgeCount, 32u, 512u);
if (m_cellSize <= 0.0f)
return false;
m_gridWidth = uint32_t(ceilf(extentsSize.x / m_cellSize));
m_gridHeight = uint32_t(ceilf(extentsSize.y / m_cellSize));
- if (m_gridWidth == 0 || m_gridHeight == 0)
+ if (m_gridWidth <= 1 || m_gridHeight <= 1)
return false;
// Insert edges into cells.
m_cellDataOffsets.resize(m_gridWidth * m_gridHeight);
@@ -4243,8 +3545,7 @@ private:
return true;
}
- void computePotentialEdges(Vector2 p1, Vector2 p2)
- {
+ void computePotentialEdges(Vector2 p1, Vector2 p2) {
m_potentialEdges.clear();
traverse(p1, p2);
for (uint32_t j = 0; j < m_traversedCellOffsets.size(); j++) {
@@ -4262,10 +3563,9 @@ private:
}
// "A Fast Voxel Traversal Algorithm for Ray Tracing"
- void traverse(Vector2 p1, Vector2 p2)
- {
+ void traverse(Vector2 p1, Vector2 p2) {
const Vector2 dir = p2 - p1;
- const Vector2 normal = normalizeSafe(dir, Vector2(0.0f), kEpsilon);
+ const Vector2 normal = normalizeSafe(dir, Vector2(0.0f));
const int stepX = dir.x >= 0 ? 1 : -1;
const int stepY = dir.y >= 0 ? 1 : -1;
const uint32_t firstCell[2] = { cellX(p1.x), cellY(p1.y) };
@@ -4284,14 +3584,12 @@ private:
if (normal.x > kEpsilon || normal.x < -kEpsilon) {
tMaxX = (distToNextCellX * stepX) / normal.x;
tDeltaX = (m_cellSize * stepX) / normal.x;
- }
- else
+ } else
tMaxX = tDeltaX = FLT_MAX;
if (normal.y > kEpsilon || normal.y < -kEpsilon) {
tMaxY = (distToNextCellY * stepY) / normal.y;
tDeltaY = (m_cellSize * stepY) / normal.y;
- }
- else
+ } else
tMaxY = tDeltaY = FLT_MAX;
m_traversedCellOffsets.clear();
m_traversedCellOffsets.push_back(firstCell[0] + firstCell[1] * m_gridWidth);
@@ -4318,34 +3616,29 @@ private:
}
}
- uint32_t cellX(float x) const
- {
+ uint32_t cellX(float x) const {
return min((uint32_t)max(0.0f, (x - m_gridOrigin.x) / m_cellSize), m_gridWidth - 1u);
}
- uint32_t cellY(float y) const
- {
+ uint32_t cellY(float y) const {
return min((uint32_t)max(0.0f, (y - m_gridOrigin.y) / m_cellSize), m_gridHeight - 1u);
}
- Vector2 edgePosition0(uint32_t edge) const
- {
+ Vector2 edgePosition0(uint32_t edge) const {
return m_positions[vertexAt(meshEdgeIndex0(edge))];
}
- Vector2 edgePosition1(uint32_t edge) const
- {
+ Vector2 edgePosition1(uint32_t edge) const {
return m_positions[vertexAt(meshEdgeIndex1(edge))];
}
- uint32_t vertexAt(uint32_t index) const
- {
- return m_indices ? m_indices[index] : index;
+ uint32_t vertexAt(uint32_t index) const {
+ return m_indices.length > 0 ? m_indices[index] : index;
}
Array<uint32_t> m_edges;
- const Vector2 *m_positions;
- const uint32_t *m_indices; // Optional
+ ConstArrayView<Vector2> m_positions;
+ ConstArrayView<uint32_t> m_indices; // Optional. Empty if unused.
float m_cellSize;
Vector2 m_gridOrigin;
uint32_t m_gridWidth, m_gridHeight; // in cells
@@ -4355,26 +3648,25 @@ private:
Array<uint32_t> m_traversedCellOffsets;
};
-struct UvMeshChart
-{
+struct UvMeshChart {
Array<uint32_t> faces;
Array<uint32_t> indices;
uint32_t material;
};
-struct UvMesh
-{
+struct UvMesh {
UvMeshDecl decl;
+ BitArray faceIgnore;
+ Array<uint32_t> faceMaterials;
Array<uint32_t> indices;
+ Array<Vector2> texcoords; // Copied from input and never modified, UvMeshInstance::texcoords are. Used to restore UvMeshInstance::texcoords so packing can be run multiple times.
Array<UvMeshChart *> charts;
Array<uint32_t> vertexToChartMap;
};
-struct UvMeshInstance
-{
+struct UvMeshInstance {
UvMesh *mesh;
Array<Vector2> texcoords;
- bool rotateCharts;
};
/*
@@ -4420,27 +3712,30 @@ struct UvMeshInstance
* FRANCE
*/
namespace opennl {
-#define NL_NEW(T) XA_ALLOC(MemTag::OpenNL, T)
-#define NL_NEW_ARRAY(T,NB) XA_ALLOC_ARRAY(MemTag::OpenNL, T, NB)
-#define NL_RENEW_ARRAY(T,x,NB) XA_REALLOC(MemTag::OpenNL, x, T, NB)
-#define NL_DELETE(x) XA_FREE(x); x = nullptr
-#define NL_DELETE_ARRAY(x) XA_FREE(x); x = nullptr
-#define NL_CLEAR(x, T) memset(x, 0, sizeof(T));
-#define NL_CLEAR_ARRAY(T,x,NB) memset(x, 0, (size_t)(NB)*sizeof(T))
-#define NL_NEW_VECTOR(dim) XA_ALLOC_ARRAY(MemTag::OpenNL, double, dim)
-#define NL_DELETE_VECTOR(ptr) XA_FREE(ptr)
+#define NL_NEW(T) XA_ALLOC(MemTag::OpenNL, T)
+#define NL_NEW_ARRAY(T, NB) XA_ALLOC_ARRAY(MemTag::OpenNL, T, NB)
+#define NL_RENEW_ARRAY(T, x, NB) XA_REALLOC(MemTag::OpenNL, x, T, NB)
+#define NL_DELETE(x) \
+ XA_FREE(x); \
+ x = nullptr
+#define NL_DELETE_ARRAY(x) \
+ XA_FREE(x); \
+ x = nullptr
+#define NL_CLEAR(x, T) memset(x, 0, sizeof(T));
+#define NL_CLEAR_ARRAY(T, x, NB) memset(x, 0, (size_t)(NB) * sizeof(T))
+#define NL_NEW_VECTOR(dim) XA_ALLOC_ARRAY(MemTag::OpenNL, double, dim)
+#define NL_DELETE_VECTOR(ptr) XA_FREE(ptr)
struct NLMatrixStruct;
-typedef NLMatrixStruct * NLMatrix;
+typedef NLMatrixStruct *NLMatrix;
typedef void (*NLDestroyMatrixFunc)(NLMatrix M);
-typedef void (*NLMultMatrixVectorFunc)(NLMatrix M, const double* x, double* y);
+typedef void (*NLMultMatrixVectorFunc)(NLMatrix M, const double *x, double *y);
#define NL_MATRIX_SPARSE_DYNAMIC 0x1001
-#define NL_MATRIX_CRS 0x1002
-#define NL_MATRIX_OTHER 0x1006
+#define NL_MATRIX_CRS 0x1002
+#define NL_MATRIX_OTHER 0x1006
-struct NLMatrixStruct
-{
+struct NLMatrixStruct {
uint32_t m;
uint32_t n;
uint32_t type;
@@ -4450,39 +3745,35 @@ struct NLMatrixStruct
/* Dynamic arrays for sparse row/columns */
-struct NLCoeff
-{
+struct NLCoeff {
uint32_t index;
double value;
};
-struct NLRowColumn
-{
+struct NLRowColumn {
uint32_t size;
uint32_t capacity;
- NLCoeff* coeff;
+ NLCoeff *coeff;
};
/* Compressed Row Storage */
-struct NLCRSMatrix
-{
+struct NLCRSMatrix {
uint32_t m;
uint32_t n;
uint32_t type;
NLDestroyMatrixFunc destroy_func;
NLMultMatrixVectorFunc mult_func;
- double* val;
- uint32_t* rowptr;
- uint32_t* colind;
+ double *val;
+ uint32_t *rowptr;
+ uint32_t *colind;
uint32_t nslices;
- uint32_t* sliceptr;
+ uint32_t *sliceptr;
};
/* SparseMatrix data structure */
-struct NLSparseMatrix
-{
+struct NLSparseMatrix {
uint32_t m;
uint32_t n;
uint32_t type;
@@ -4490,25 +3781,23 @@ struct NLSparseMatrix
NLMultMatrixVectorFunc mult_func;
uint32_t diag_size;
uint32_t diag_capacity;
- NLRowColumn* row;
- NLRowColumn* column;
- double* diag;
+ NLRowColumn *row;
+ NLRowColumn *column;
+ double *diag;
uint32_t row_capacity;
uint32_t column_capacity;
};
/* NLContext data structure */
-struct NLBufferBinding
-{
- void* base_address;
+struct NLBufferBinding {
+ void *base_address;
uint32_t stride;
};
-#define NL_BUFFER_ITEM(B,i) *(double*)((void*)((char*)((B).base_address)+((i)*(B).stride)))
+#define NL_BUFFER_ITEM(B, i) *(double *)((void *)((char *)((B).base_address) + ((i) * (B).stride)))
-struct NLContext
-{
+struct NLContext {
NLBufferBinding *variable_buffer;
double *variable_value;
bool *variable_is_locked;
@@ -4532,35 +3821,30 @@ struct NLContext
double error;
};
-static void nlDeleteMatrix(NLMatrix M)
-{
+static void nlDeleteMatrix(NLMatrix M) {
if (!M)
return;
M->destroy_func(M);
NL_DELETE(M);
}
-static void nlMultMatrixVector(NLMatrix M, const double* x, double* y)
-{
+static void nlMultMatrixVector(NLMatrix M, const double *x, double *y) {
M->mult_func(M, x, y);
}
-static void nlRowColumnConstruct(NLRowColumn* c)
-{
+static void nlRowColumnConstruct(NLRowColumn *c) {
c->size = 0;
c->capacity = 0;
c->coeff = nullptr;
}
-static void nlRowColumnDestroy(NLRowColumn* c)
-{
+static void nlRowColumnDestroy(NLRowColumn *c) {
NL_DELETE_ARRAY(c->coeff);
c->size = 0;
c->capacity = 0;
}
-static void nlRowColumnGrow(NLRowColumn* c)
-{
+static void nlRowColumnGrow(NLRowColumn *c) {
if (c->capacity != 0) {
c->capacity = 2 * c->capacity;
c->coeff = NL_RENEW_ARRAY(NLCoeff, c->coeff, c->capacity);
@@ -4571,8 +3855,7 @@ static void nlRowColumnGrow(NLRowColumn* c)
}
}
-static void nlRowColumnAdd(NLRowColumn* c, uint32_t index, double value)
-{
+static void nlRowColumnAdd(NLRowColumn *c, uint32_t index, double value) {
for (uint32_t i = 0; i < c->size; i++) {
if (c->coeff[i].index == index) {
c->coeff[i].value += value;
@@ -4587,8 +3870,7 @@ static void nlRowColumnAdd(NLRowColumn* c, uint32_t index, double value)
}
/* Does not check whether the index already exists */
-static void nlRowColumnAppend(NLRowColumn* c, uint32_t index, double value)
-{
+static void nlRowColumnAppend(NLRowColumn *c, uint32_t index, double value) {
if (c->size == c->capacity)
nlRowColumnGrow(c);
c->coeff[c->size].index = index;
@@ -4596,32 +3878,27 @@ static void nlRowColumnAppend(NLRowColumn* c, uint32_t index, double value)
c->size++;
}
-static void nlRowColumnZero(NLRowColumn* c)
-{
+static void nlRowColumnZero(NLRowColumn *c) {
c->size = 0;
}
-static void nlRowColumnClear(NLRowColumn* c)
-{
+static void nlRowColumnClear(NLRowColumn *c) {
c->size = 0;
c->capacity = 0;
NL_DELETE_ARRAY(c->coeff);
}
-static int nlCoeffCompare(const void* p1, const void* p2)
-{
- return (((NLCoeff*)(p2))->index < ((NLCoeff*)(p1))->index);
+static int nlCoeffCompare(const void *p1, const void *p2) {
+ return (((NLCoeff *)(p2))->index < ((NLCoeff *)(p1))->index);
}
-static void nlRowColumnSort(NLRowColumn* c)
-{
+static void nlRowColumnSort(NLRowColumn *c) {
qsort(c->coeff, c->size, sizeof(NLCoeff), nlCoeffCompare);
}
/* CRSMatrix data structure */
-static void nlCRSMatrixDestroy(NLCRSMatrix* M)
-{
+static void nlCRSMatrixDestroy(NLCRSMatrix *M) {
NL_DELETE_ARRAY(M->val);
NL_DELETE_ARRAY(M->rowptr);
NL_DELETE_ARRAY(M->colind);
@@ -4631,8 +3908,7 @@ static void nlCRSMatrixDestroy(NLCRSMatrix* M)
M->nslices = 0;
}
-static void nlCRSMatrixMultSlice(NLCRSMatrix* M, const double* x, double* y, uint32_t Ibegin, uint32_t Iend)
-{
+static void nlCRSMatrixMultSlice(NLCRSMatrix *M, const double *x, double *y, uint32_t Ibegin, uint32_t Iend) {
for (uint32_t i = Ibegin; i < Iend; ++i) {
double sum = 0.0;
for (uint32_t j = M->rowptr[i]; j < M->rowptr[i + 1]; ++j)
@@ -4641,15 +3917,13 @@ static void nlCRSMatrixMultSlice(NLCRSMatrix* M, const double* x, double* y, uin
}
}
-static void nlCRSMatrixMult(NLCRSMatrix* M, const double* x, double* y)
-{
+static void nlCRSMatrixMult(NLCRSMatrix *M, const double *x, double *y) {
int nslices = (int)(M->nslices);
for (int slice = 0; slice < nslices; ++slice)
nlCRSMatrixMultSlice(M, x, y, M->sliceptr[slice], M->sliceptr[slice + 1]);
}
-static void nlCRSMatrixConstruct(NLCRSMatrix* M, uint32_t m, uint32_t n, uint32_t nnz, uint32_t nslices)
-{
+static void nlCRSMatrixConstruct(NLCRSMatrix *M, uint32_t m, uint32_t n, uint32_t nnz, uint32_t nslices) {
M->m = m;
M->n = n;
M->type = NL_MATRIX_CRS;
@@ -4668,22 +3942,19 @@ static void nlCRSMatrixConstruct(NLCRSMatrix* M, uint32_t m, uint32_t n, uint32_
/* SparseMatrix data structure */
-static void nlSparseMatrixDestroyRowColumns(NLSparseMatrix* M)
-{
+static void nlSparseMatrixDestroyRowColumns(NLSparseMatrix *M) {
for (uint32_t i = 0; i < M->m; i++)
nlRowColumnDestroy(&(M->row[i]));
NL_DELETE_ARRAY(M->row);
}
-static void nlSparseMatrixDestroy(NLSparseMatrix* M)
-{
+static void nlSparseMatrixDestroy(NLSparseMatrix *M) {
XA_DEBUG_ASSERT(M->type == NL_MATRIX_SPARSE_DYNAMIC);
nlSparseMatrixDestroyRowColumns(M);
NL_DELETE_ARRAY(M->diag);
}
-static void nlSparseMatrixAdd(NLSparseMatrix* M, uint32_t i, uint32_t j, double value)
-{
+static void nlSparseMatrixAdd(NLSparseMatrix *M, uint32_t i, uint32_t j, double value) {
XA_DEBUG_ASSERT(i >= 0 && i <= M->m - 1);
XA_DEBUG_ASSERT(j >= 0 && j <= M->n - 1);
if (i == j)
@@ -4692,24 +3963,21 @@ static void nlSparseMatrixAdd(NLSparseMatrix* M, uint32_t i, uint32_t j, double
}
/* Returns the number of non-zero coefficients */
-static uint32_t nlSparseMatrixNNZ(NLSparseMatrix* M)
-{
+static uint32_t nlSparseMatrixNNZ(NLSparseMatrix *M) {
uint32_t nnz = 0;
for (uint32_t i = 0; i < M->m; i++)
nnz += M->row[i].size;
return nnz;
}
-static void nlSparseMatrixSort(NLSparseMatrix* M)
-{
+static void nlSparseMatrixSort(NLSparseMatrix *M) {
for (uint32_t i = 0; i < M->m; i++)
nlRowColumnSort(&(M->row[i]));
}
/* SparseMatrix x Vector routines, internal helper routines */
-static void nlSparseMatrix_mult_rows(NLSparseMatrix* A, const double* x, double* y)
-{
+static void nlSparseMatrix_mult_rows(NLSparseMatrix *A, const double *x, double *y) {
/*
* Note: OpenMP does not like unsigned ints
* (causes some floating point exceptions),
@@ -4717,8 +3985,8 @@ static void nlSparseMatrix_mult_rows(NLSparseMatrix* A, const double* x, double*
* indices.
*/
int m = (int)(A->m);
- NLCoeff* c = nullptr;
- NLRowColumn* Ri = nullptr;
+ NLCoeff *c = nullptr;
+ NLRowColumn *Ri = nullptr;
for (int i = 0; i < m; i++) {
Ri = &(A->row[i]);
y[i] = 0;
@@ -4729,14 +3997,12 @@ static void nlSparseMatrix_mult_rows(NLSparseMatrix* A, const double* x, double*
}
}
-static void nlSparseMatrixMult(NLSparseMatrix* A, const double* x, double* y)
-{
+static void nlSparseMatrixMult(NLSparseMatrix *A, const double *x, double *y) {
XA_DEBUG_ASSERT(A->type == NL_MATRIX_SPARSE_DYNAMIC);
nlSparseMatrix_mult_rows(A, x, y);
}
-static void nlSparseMatrixConstruct(NLSparseMatrix* M, uint32_t m, uint32_t n)
-{
+static void nlSparseMatrixConstruct(NLSparseMatrix *M, uint32_t m, uint32_t n) {
M->m = m;
M->n = n;
M->type = NL_MATRIX_SPARSE_DYNAMIC;
@@ -4756,24 +4022,23 @@ static void nlSparseMatrixConstruct(NLSparseMatrix* M, uint32_t m, uint32_t n)
NL_CLEAR_ARRAY(double, M->diag, M->diag_size);
}
-static NLMatrix nlCRSMatrixNewFromSparseMatrix(NLSparseMatrix* M)
-{
+static NLMatrix nlCRSMatrixNewFromSparseMatrix(NLSparseMatrix *M) {
uint32_t nnz = nlSparseMatrixNNZ(M);
uint32_t nslices = 8; /* TODO: get number of cores */
uint32_t slice, cur_bound, cur_NNZ, cur_row;
uint32_t k;
uint32_t slice_size = nnz / nslices;
- NLCRSMatrix* CRS = NL_NEW(NLCRSMatrix);
+ NLCRSMatrix *CRS = NL_NEW(NLCRSMatrix);
NL_CLEAR(CRS, NLCRSMatrix);
nlCRSMatrixConstruct(CRS, M->m, M->n, nnz, nslices);
nlSparseMatrixSort(M);
/* Convert matrix to CRS format */
k = 0;
for (uint32_t i = 0; i < M->m; ++i) {
- NLRowColumn* Ri = &(M->row[i]);
+ NLRowColumn *Ri = &(M->row[i]);
CRS->rowptr[i] = k;
for (uint32_t ij = 0; ij < Ri->size; ij++) {
- NLCoeff* c = &(Ri->coeff[ij]);
+ NLCoeff *c = &(Ri->coeff[ij]);
CRS->val[k] = c->value;
CRS->colind[k] = c->index;
++k;
@@ -4799,19 +4064,17 @@ static NLMatrix nlCRSMatrixNewFromSparseMatrix(NLSparseMatrix* M)
return (NLMatrix)CRS;
}
-static void nlMatrixCompress(NLMatrix* M)
-{
+static void nlMatrixCompress(NLMatrix *M) {
NLMatrix CRS = nullptr;
if ((*M)->type != NL_MATRIX_SPARSE_DYNAMIC)
return;
- CRS = nlCRSMatrixNewFromSparseMatrix((NLSparseMatrix*)*M);
+ CRS = nlCRSMatrixNewFromSparseMatrix((NLSparseMatrix *)*M);
nlDeleteMatrix(*M);
*M = CRS;
}
-static NLContext *nlNewContext()
-{
- NLContext* result = NL_NEW(NLContext);
+static NLContext *nlNewContext() {
+ NLContext *result = NL_NEW(NLContext);
NL_CLEAR(result, NLContext);
result->max_iterations = 100;
result->threshold = 1e-6;
@@ -4820,8 +4083,7 @@ static NLContext *nlNewContext()
return result;
}
-static void nlDeleteContext(NLContext *context)
-{
+static void nlDeleteContext(NLContext *context) {
nlDeleteMatrix(context->M);
context->M = nullptr;
nlDeleteMatrix(context->P);
@@ -4839,22 +4101,19 @@ static void nlDeleteContext(NLContext *context)
NL_DELETE(context);
}
-static double ddot(int n, const double *x, const double *y)
-{
+static double ddot(int n, const double *x, const double *y) {
double sum = 0.0;
for (int i = 0; i < n; i++)
sum += x[i] * y[i];
return sum;
}
-static void daxpy(int n, double a, const double *x, double *y)
-{
+static void daxpy(int n, double a, const double *x, double *y) {
for (int i = 0; i < n; i++)
y[i] = a * x[i] + y[i];
}
-static void dscal(int n, double a, double *x)
-{
+static void dscal(int n, double a, double *x) {
for (int i = 0; i < n; i++)
x[i] *= a;
}
@@ -4877,17 +4136,16 @@ static void dscal(int n, double a, double *x)
* versions of matrix x vector product (CPU/GPU, sparse/dense ...)
*/
-static uint32_t nlSolveSystem_PRE_CG(NLMatrix M, NLMatrix P, double* b, double* x, double eps, uint32_t max_iter, double *sq_bnorm, double *sq_rnorm)
-{
- int N = (int)M->n;
- double* r = NL_NEW_VECTOR(N);
- double* d = NL_NEW_VECTOR(N);
- double* h = NL_NEW_VECTOR(N);
+static uint32_t nlSolveSystem_PRE_CG(NLMatrix M, NLMatrix P, double *b, double *x, double eps, uint32_t max_iter, double *sq_bnorm, double *sq_rnorm) {
+ int N = (int)M->n;
+ double *r = NL_NEW_VECTOR(N);
+ double *d = NL_NEW_VECTOR(N);
+ double *h = NL_NEW_VECTOR(N);
double *Ad = h;
uint32_t its = 0;
double rh, alpha, beta;
double b_square = ddot(N, b, b);
- double err = eps * eps*b_square;
+ double err = eps * eps * b_square;
double curr_err;
nlMultMatrixVector(M, x, r);
daxpy(N, -1., b, r);
@@ -4917,13 +4175,12 @@ static uint32_t nlSolveSystem_PRE_CG(NLMatrix M, NLMatrix P, double* b, double*
return its;
}
-static uint32_t nlSolveSystemIterative(NLContext *context, NLMatrix M, NLMatrix P, double* b_in, double* x_in, double eps, uint32_t max_iter)
-{
+static uint32_t nlSolveSystemIterative(NLContext *context, NLMatrix M, NLMatrix P, double *b_in, double *x_in, double eps, uint32_t max_iter) {
uint32_t result = 0;
double rnorm = 0.0;
double bnorm = 0.0;
- double* b = b_in;
- double* x = x_in;
+ double *b = b_in;
+ double *x = x_in;
XA_DEBUG_ASSERT(M->m == M->n);
double sq_bnorm, sq_rnorm;
result = nlSolveSystem_PRE_CG(M, P, b, x, eps, max_iter, &sq_bnorm, &sq_rnorm);
@@ -4938,10 +4195,9 @@ static uint32_t nlSolveSystemIterative(NLContext *context, NLMatrix M, NLMatrix
return result;
}
-static bool nlSolveIterative(NLContext *context)
-{
- double* b = context->b;
- double* x = context->x;
+static bool nlSolveIterative(NLContext *context) {
+ double *b = context->b;
+ double *x = context->x;
uint32_t n = context->n;
NLMatrix M = context->M;
NLMatrix P = context->P;
@@ -4953,34 +4209,30 @@ static bool nlSolveIterative(NLContext *context)
return true;
}
-struct NLJacobiPreconditioner
-{
+struct NLJacobiPreconditioner {
uint32_t m;
uint32_t n;
uint32_t type;
NLDestroyMatrixFunc destroy_func;
NLMultMatrixVectorFunc mult_func;
- double* diag_inv;
+ double *diag_inv;
};
-static void nlJacobiPreconditionerDestroy(NLJacobiPreconditioner* M)
-{
+static void nlJacobiPreconditionerDestroy(NLJacobiPreconditioner *M) {
NL_DELETE_ARRAY(M->diag_inv);
}
-static void nlJacobiPreconditionerMult(NLJacobiPreconditioner* M, const double* x, double* y)
-{
+static void nlJacobiPreconditionerMult(NLJacobiPreconditioner *M, const double *x, double *y) {
for (uint32_t i = 0; i < M->n; ++i)
y[i] = x[i] * M->diag_inv[i];
}
-static NLMatrix nlNewJacobiPreconditioner(NLMatrix M_in)
-{
- NLSparseMatrix* M = nullptr;
- NLJacobiPreconditioner* result = nullptr;
+static NLMatrix nlNewJacobiPreconditioner(NLMatrix M_in) {
+ NLSparseMatrix *M = nullptr;
+ NLJacobiPreconditioner *result = nullptr;
XA_DEBUG_ASSERT(M_in->type == NL_MATRIX_SPARSE_DYNAMIC);
XA_DEBUG_ASSERT(M_in->m == M_in->n);
- M = (NLSparseMatrix*)M_in;
+ M = (NLSparseMatrix *)M_in;
result = NL_NEW(NLJacobiPreconditioner);
NL_CLEAR(result, NLJacobiPreconditioner);
result->m = M->m;
@@ -4998,8 +4250,7 @@ static NLMatrix nlNewJacobiPreconditioner(NLMatrix M_in)
#define NL_NB_VARIABLES 0x101
#define NL_MAX_ITERATIONS 0x103
-static void nlSolverParameteri(NLContext *context, uint32_t pname, int param)
-{
+static void nlSolverParameteri(NLContext *context, uint32_t pname, int param) {
if (pname == NL_NB_VARIABLES) {
XA_DEBUG_ASSERT(param > 0);
context->nb_variables = (uint32_t)param;
@@ -5010,26 +4261,22 @@ static void nlSolverParameteri(NLContext *context, uint32_t pname, int param)
}
}
-static void nlSetVariable(NLContext *context, uint32_t index, double value)
-{
+static void nlSetVariable(NLContext *context, uint32_t index, double value) {
XA_DEBUG_ASSERT(index >= 0 && index <= context->nb_variables - 1);
NL_BUFFER_ITEM(context->variable_buffer[0], index) = value;
}
-static double nlGetVariable(NLContext *context, uint32_t index)
-{
+static double nlGetVariable(NLContext *context, uint32_t index) {
XA_DEBUG_ASSERT(index >= 0 && index <= context->nb_variables - 1);
return NL_BUFFER_ITEM(context->variable_buffer[0], index);
}
-static void nlLockVariable(NLContext *context, uint32_t index)
-{
+static void nlLockVariable(NLContext *context, uint32_t index) {
XA_DEBUG_ASSERT(index >= 0 && index <= context->nb_variables - 1);
context->variable_is_locked[index] = true;
}
-static void nlVariablesToVector(NLContext *context)
-{
+static void nlVariablesToVector(NLContext *context) {
uint32_t n = context->n;
XA_DEBUG_ASSERT(context->x);
for (uint32_t k = 0; k < context->nb_systems; ++k) {
@@ -5044,8 +4291,7 @@ static void nlVariablesToVector(NLContext *context)
}
}
-static void nlVectorToVariables(NLContext *context)
-{
+static void nlVectorToVariables(NLContext *context) {
uint32_t n = context->n;
XA_DEBUG_ASSERT(context->x);
for (uint32_t k = 0; k < context->nb_systems; ++k) {
@@ -5060,8 +4306,7 @@ static void nlVectorToVariables(NLContext *context)
}
}
-static void nlCoefficient(NLContext *context, uint32_t index, double value)
-{
+static void nlCoefficient(NLContext *context, uint32_t index, double value) {
XA_DEBUG_ASSERT(index >= 0 && index <= context->nb_variables - 1);
if (context->variable_is_locked[index]) {
/*
@@ -5078,12 +4323,11 @@ static void nlCoefficient(NLContext *context, uint32_t index, double value)
}
}
-#define NL_SYSTEM 0x0
-#define NL_MATRIX 0x1
-#define NL_ROW 0x2
+#define NL_SYSTEM 0x0
+#define NL_MATRIX 0x1
+#define NL_ROW 0x2
-static void nlBegin(NLContext *context, uint32_t prim)
-{
+static void nlBegin(NLContext *context, uint32_t prim) {
if (prim == NL_SYSTEM) {
XA_DEBUG_ASSERT(context->nb_variables > 0);
context->variable_buffer = NL_NEW_ARRAY(NLBufferBinding, context->nb_systems);
@@ -5092,8 +4336,8 @@ static void nlBegin(NLContext *context, uint32_t prim)
NL_CLEAR_ARRAY(double, context->variable_value, context->nb_variables * context->nb_systems);
for (uint32_t k = 0; k < context->nb_systems; ++k) {
context->variable_buffer[k].base_address =
- context->variable_value +
- k * context->nb_variables;
+ context->variable_value +
+ k * context->nb_variables;
context->variable_buffer[k].stride = sizeof(double);
}
context->variable_is_locked = NL_NEW_ARRAY(bool, context->nb_variables);
@@ -5116,11 +4360,11 @@ static void nlBegin(NLContext *context, uint32_t prim)
context->max_iterations = n * 5;
context->M = (NLMatrix)(NL_NEW(NLSparseMatrix));
NL_CLEAR(context->M, NLSparseMatrix);
- nlSparseMatrixConstruct((NLSparseMatrix*)(context->M), n, n);
- context->x = NL_NEW_ARRAY(double, n*context->nb_systems);
- NL_CLEAR_ARRAY(double, context->x, n*context->nb_systems);
- context->b = NL_NEW_ARRAY(double, n*context->nb_systems);
- NL_CLEAR_ARRAY(double, context->b, n*context->nb_systems);
+ nlSparseMatrixConstruct((NLSparseMatrix *)(context->M), n, n);
+ context->x = NL_NEW_ARRAY(double, n * context->nb_systems);
+ NL_CLEAR_ARRAY(double, context->x, n * context->nb_systems);
+ context->b = NL_NEW_ARRAY(double, n * context->nb_systems);
+ NL_CLEAR_ARRAY(double, context->b, n * context->nb_systems);
nlVariablesToVector(context);
nlRowColumnConstruct(&context->af);
nlRowColumnConstruct(&context->al);
@@ -5131,16 +4375,15 @@ static void nlBegin(NLContext *context, uint32_t prim)
}
}
-static void nlEnd(NLContext *context, uint32_t prim)
-{
+static void nlEnd(NLContext *context, uint32_t prim) {
if (prim == NL_MATRIX) {
nlRowColumnClear(&context->af);
nlRowColumnClear(&context->al);
} else if (prim == NL_ROW) {
- NLRowColumn* af = &context->af;
- NLRowColumn* al = &context->al;
- NLSparseMatrix* M = (NLSparseMatrix*)context->M;
- double* b = context->b;
+ NLRowColumn *af = &context->af;
+ NLRowColumn *al = &context->al;
+ NLSparseMatrix *M = (NLSparseMatrix *)context->M;
+ double *b = context->b;
uint32_t nf = af->size;
uint32_t nl = al->size;
uint32_t n = context->n;
@@ -5161,14 +4404,13 @@ static void nlEnd(NLContext *context, uint32_t prim)
S += al->coeff[jj].value * NL_BUFFER_ITEM(context->variable_buffer[k], j);
}
for (uint32_t jj = 0; jj < nf; jj++)
- b[k*n + af->coeff[jj].index] -= af->coeff[jj].value * S;
+ b[k * n + af->coeff[jj].index] -= af->coeff[jj].value * S;
}
context->current_row++;
}
}
-static bool nlSolve(NLContext *context)
-{
+static bool nlSolve(NLContext *context) {
nlDeleteMatrix(context->P);
context->P = nlNewJacobiPreconditioner(context->M);
nlMatrixCompress(&context->M);
@@ -5179,11 +4421,9 @@ static bool nlSolve(NLContext *context)
} // namespace opennl
namespace raster {
-class ClippedTriangle
-{
+class ClippedTriangle {
public:
- ClippedTriangle(const Vector2 &a, const Vector2 &b, const Vector2 &c)
- {
+ ClippedTriangle(const Vector2 &a, const Vector2 &b, const Vector2 &c) {
m_numVertices = 3;
m_activeVertexBuffer = 0;
m_verticesA[0] = a;
@@ -5194,20 +4434,20 @@ public:
m_area = 0;
}
- void clipHorizontalPlane(float offset, float clipdirection)
- {
- Vector2 *v = m_vertexBuffers[m_activeVertexBuffer];
+ void clipHorizontalPlane(float offset, float clipdirection) {
+ Vector2 *v = m_vertexBuffers[m_activeVertexBuffer];
m_activeVertexBuffer ^= 1;
Vector2 *v2 = m_vertexBuffers[m_activeVertexBuffer];
v[m_numVertices] = v[0];
- float dy2, dy1 = offset - v[0].y;
- int dy2in, dy1in = clipdirection * dy1 >= 0;
- uint32_t p = 0;
+ float dy2, dy1 = offset - v[0].y;
+ int dy2in, dy1in = clipdirection * dy1 >= 0;
+ uint32_t p = 0;
for (uint32_t k = 0; k < m_numVertices; k++) {
- dy2 = offset - v[k + 1].y;
+ dy2 = offset - v[k + 1].y;
dy2in = clipdirection * dy2 >= 0;
- if (dy1in) v2[p++] = v[k];
- if ( dy1in + dy2in == 1 ) { // not both in/out
+ if (dy1in)
+ v2[p++] = v[k];
+ if (dy1in + dy2in == 1) { // not both in/out
float dx = v[k + 1].x - v[k].x;
float dy = v[k + 1].y - v[k].y;
v2[p++] = Vector2(v[k].x + dy1 * (dx / dy), offset);
@@ -5218,20 +4458,20 @@ public:
m_numVertices = p;
}
- void clipVerticalPlane(float offset, float clipdirection)
- {
- Vector2 *v = m_vertexBuffers[m_activeVertexBuffer];
+ void clipVerticalPlane(float offset, float clipdirection) {
+ Vector2 *v = m_vertexBuffers[m_activeVertexBuffer];
m_activeVertexBuffer ^= 1;
Vector2 *v2 = m_vertexBuffers[m_activeVertexBuffer];
v[m_numVertices] = v[0];
- float dx2, dx1 = offset - v[0].x;
- int dx2in, dx1in = clipdirection * dx1 >= 0;
- uint32_t p = 0;
+ float dx2, dx1 = offset - v[0].x;
+ int dx2in, dx1in = clipdirection * dx1 >= 0;
+ uint32_t p = 0;
for (uint32_t k = 0; k < m_numVertices; k++) {
dx2 = offset - v[k + 1].x;
dx2in = clipdirection * dx2 >= 0;
- if (dx1in) v2[p++] = v[k];
- if ( dx1in + dx2in == 1 ) { // not both in/out
+ if (dx1in)
+ v2[p++] = v[k];
+ if (dx1in + dx2in == 1) { // not both in/out
float dx = v[k + 1].x - v[k].x;
float dy = v[k + 1].y - v[k].y;
v2[p++] = Vector2(offset, v[k].y + dx1 * (dy / dx));
@@ -5242,9 +4482,8 @@ public:
m_numVertices = p;
}
- void computeArea()
- {
- Vector2 *v = m_vertexBuffers[m_activeVertexBuffer];
+ void computeArea() {
+ Vector2 *v = m_vertexBuffers[m_activeVertexBuffer];
v[m_numVertices] = v[0];
m_area = 0;
float centroidx = 0, centroidy = 0;
@@ -5258,8 +4497,7 @@ public:
m_area = 0.5f * fabsf(m_area);
}
- void clipAABox(float x0, float y0, float x1, float y1)
- {
+ void clipAABox(float x0, float y0, float x1, float y1) {
clipVerticalPlane(x0, -1);
clipHorizontalPlane(y0, -1);
clipVerticalPlane(x1, 1);
@@ -5267,8 +4505,7 @@ public:
computeArea();
}
- float area() const
- {
+ float area() const {
return m_area;
}
@@ -5285,10 +4522,9 @@ private:
typedef bool (*SamplingCallback)(void *param, int x, int y);
/// A triangle for rasterization.
-struct Triangle
-{
- Triangle(const Vector2 &_v0, const Vector2 &_v1, const Vector2 &_v2) : v1(_v0), v2(_v2), v3(_v1)
- {
+struct Triangle {
+ Triangle(const Vector2 &_v0, const Vector2 &_v1, const Vector2 &_v2) :
+ v1(_v0), v2(_v2), v3(_v1), n1(0.0f), n2(0.0f), n3(0.0f) {
// make sure every triangle is front facing.
flipBackface();
// Compute deltas.
@@ -5296,8 +4532,7 @@ struct Triangle
computeUnitInwardNormals();
}
- bool isValid()
- {
+ bool isValid() {
const Vector2 e0 = v3 - v1;
const Vector2 e1 = v2 - v1;
const float area = e0.y * e1.x - e1.y * e0.x;
@@ -5305,18 +4540,17 @@ struct Triangle
}
// extents has to be multiple of BK_SIZE!!
- bool drawAA(const Vector2 &extents, SamplingCallback cb, void *param)
- {
- const float PX_INSIDE = 1.0f/sqrtf(2.0f);
- const float PX_OUTSIDE = -1.0f/sqrtf(2.0f);
+ bool drawAA(const Vector2 &extents, SamplingCallback cb, void *param) {
+ const float PX_INSIDE = 1.0f / sqrtf(2.0f);
+ const float PX_OUTSIDE = -1.0f / sqrtf(2.0f);
const float BK_SIZE = 8;
- const float BK_INSIDE = sqrtf(BK_SIZE*BK_SIZE/2.0f);
- const float BK_OUTSIDE = -sqrtf(BK_SIZE*BK_SIZE/2.0f);
+ const float BK_INSIDE = sqrtf(BK_SIZE * BK_SIZE / 2.0f);
+ const float BK_OUTSIDE = -sqrtf(BK_SIZE * BK_SIZE / 2.0f);
// Bounding rectangle
float minx = floorf(max(min3(v1.x, v2.x, v3.x), 0.0f));
float miny = floorf(max(min3(v1.y, v2.y, v3.y), 0.0f));
- float maxx = ceilf( min(max3(v1.x, v2.x, v3.x), extents.x - 1.0f));
- float maxy = ceilf( min(max3(v1.y, v2.y, v3.y), extents.y - 1.0f));
+ float maxx = ceilf(min(max3(v1.x, v2.x, v3.x), extents.x - 1.0f));
+ float maxy = ceilf(min(max3(v1.y, v2.y, v3.y), extents.y - 1.0f));
// There's no reason to align the blocks to the viewport, instead we align them to the origin of the triangle bounds.
minx = floorf(minx);
miny = floorf(miny);
@@ -5341,9 +4575,10 @@ struct Triangle
float bC = C2 + n2.x * xc + n2.y * yc;
float cC = C3 + n3.x * xc + n3.y * yc;
// Skip block when outside an edge
- if ( (aC <= BK_OUTSIDE) || (bC <= BK_OUTSIDE) || (cC <= BK_OUTSIDE) ) continue;
+ if ((aC <= BK_OUTSIDE) || (bC <= BK_OUTSIDE) || (cC <= BK_OUTSIDE))
+ continue;
// Accept whole block when totally covered
- if ( (aC >= BK_INSIDE) && (bC >= BK_INSIDE) && (cC >= BK_INSIDE) ) {
+ if ((aC >= BK_INSIDE) && (bC >= BK_INSIDE) && (cC >= BK_INSIDE)) {
for (float y = y0; y < y0 + BK_SIZE; y++) {
for (float x = x0; x < x0 + BK_SIZE; x++) {
if (!cb(param, (int)x, (int)y))
@@ -5386,10 +4621,9 @@ struct Triangle
}
private:
- void flipBackface()
- {
+ void flipBackface() {
// check if triangle is backfacing, if so, swap two vertices
- if ( ((v3.x - v1.x) * (v2.y - v1.y) - (v3.y - v1.y) * (v2.x - v1.x)) < 0 ) {
+ if (((v3.x - v1.x) * (v2.y - v1.y) - (v3.y - v1.y) * (v2.x - v1.x)) < 0) {
Vector2 hv = v1;
v1 = v2;
v2 = hv; // swap pos
@@ -5397,8 +4631,7 @@ private:
}
// compute unit inward normals for each edge.
- void computeUnitInwardNormals()
- {
+ void computeUnitInwardNormals() {
n1 = v1 - v2;
n1 = Vector2(-n1.y, n1.x);
n1 = n1 * (1.0f / sqrtf(dot(n1, n1)));
@@ -5416,8 +4649,7 @@ private:
};
// Process the given triangle. Returns false if rasterization was interrupted by the callback.
-static bool drawTriangle(const Vector2 &extents, const Vector2 v[3], SamplingCallback cb, void *param)
-{
+static bool drawTriangle(const Vector2 &extents, const Vector2 v[3], SamplingCallback cb, void *param) {
Triangle tri(v[0], v[1], v[2]);
// @@ It would be nice to have a conservative drawing mode that enlarges the triangle extents by one texel and is able to handle degenerate triangles.
// @@ Maybe the simplest thing to do would be raster triangle edges.
@@ -5432,22 +4664,19 @@ namespace segment {
// - Insertion is o(n)
// - Smallest element goes at the end, so that popping it is o(1).
-struct CostQueue
-{
- CostQueue(uint32_t size = UINT32_MAX) : m_maxSize(size), m_pairs(MemTag::SegmentAtlasChartCandidates) {}
+struct CostQueue {
+ CostQueue(uint32_t size = UINT32_MAX) :
+ m_maxSize(size), m_pairs(MemTag::SegmentAtlasChartCandidates) {}
- float peekCost() const
- {
+ float peekCost() const {
return m_pairs.back().cost;
}
- uint32_t peekFace() const
- {
+ uint32_t peekFace() const {
return m_pairs.back().face;
}
- void push(float cost, uint32_t face)
- {
+ void push(float cost, uint32_t face) {
const Pair p = { cost, face };
if (m_pairs.isEmpty() || cost < peekCost())
m_pairs.push_back(p);
@@ -5464,29 +4693,25 @@ struct CostQueue
}
}
- uint32_t pop()
- {
+ uint32_t pop() {
XA_DEBUG_ASSERT(!m_pairs.isEmpty());
uint32_t f = m_pairs.back().face;
m_pairs.pop_back();
return f;
}
- XA_INLINE void clear()
- {
+ XA_INLINE void clear() {
m_pairs.clear();
}
- XA_INLINE uint32_t count() const
- {
+ XA_INLINE uint32_t count() const {
return m_pairs.size();
}
private:
const uint32_t m_maxSize;
- struct Pair
- {
+ struct Pair {
float cost;
uint32_t face;
};
@@ -5494,25 +4719,27 @@ private:
Array<Pair> m_pairs;
};
-struct AtlasData
-{
+struct AtlasData {
ChartOptions options;
const Mesh *mesh = nullptr;
Array<float> edgeDihedralAngles;
Array<float> edgeLengths;
Array<float> faceAreas;
+ Array<float> faceUvAreas; // Can be negative.
Array<Vector3> faceNormals;
BitArray isFaceInChart;
- AtlasData() : edgeDihedralAngles(MemTag::SegmentAtlasMeshData), edgeLengths(MemTag::SegmentAtlasMeshData), faceAreas(MemTag::SegmentAtlasMeshData), faceNormals(MemTag::SegmentAtlasMeshData) {}
+ AtlasData() :
+ edgeDihedralAngles(MemTag::SegmentAtlasMeshData), edgeLengths(MemTag::SegmentAtlasMeshData), faceAreas(MemTag::SegmentAtlasMeshData), faceNormals(MemTag::SegmentAtlasMeshData) {}
- void compute()
- {
+ void compute() {
const uint32_t faceCount = mesh->faceCount();
const uint32_t edgeCount = mesh->edgeCount();
edgeDihedralAngles.resize(edgeCount);
edgeLengths.resize(edgeCount);
faceAreas.resize(faceCount);
+ if (options.useInputMeshUvs)
+ faceUvAreas.resize(faceCount);
faceNormals.resize(faceCount);
isFaceInChart.resize(faceCount);
isFaceInChart.zeroOutMemory();
@@ -5526,6 +4753,8 @@ struct AtlasData
}
faceAreas[f] = mesh->computeFaceArea(f);
XA_DEBUG_ASSERT(faceAreas[f] > 0.0f);
+ if (options.useInputMeshUvs)
+ faceUvAreas[f] = mesh->computeFaceParametricArea(f);
faceNormals[f] = mesh->computeFaceNormal(f);
}
for (uint32_t face = 0; face < faceCount; face++) {
@@ -5543,19 +4772,109 @@ struct AtlasData
}
};
+// If MeshDecl::vertexUvData is set on input meshes, find charts by floodfilling faces in world/model space without crossing UV seams.
+struct OriginalUvCharts {
+ OriginalUvCharts(AtlasData &data) :
+ m_data(data) {}
+ uint32_t chartCount() const { return m_charts.size(); }
+ const Basis &chartBasis(uint32_t chartIndex) const { return m_chartBasis[chartIndex]; }
+
+ ConstArrayView<uint32_t> chartFaces(uint32_t chartIndex) const {
+ const Chart &chart = m_charts[chartIndex];
+ return ConstArrayView<uint32_t>(&m_chartFaces[chart.firstFace], chart.faceCount);
+ }
+
+ void compute() {
+ m_charts.clear();
+ m_chartFaces.clear();
+ const Mesh *mesh = m_data.mesh;
+ const uint32_t faceCount = mesh->faceCount();
+ for (uint32_t f = 0; f < faceCount; f++) {
+ if (m_data.isFaceInChart.get(f))
+ continue;
+ if (isZero(m_data.faceUvAreas[f], kAreaEpsilon))
+ continue; // Face must have valid UVs.
+ // Found an unassigned face, create a new chart.
+ Chart chart;
+ chart.firstFace = m_chartFaces.size();
+ chart.faceCount = 1;
+ m_chartFaces.push_back(f);
+ m_data.isFaceInChart.set(f);
+ floodfillFaces(chart);
+ m_charts.push_back(chart);
+ }
+ // Compute basis for each chart.
+ m_chartBasis.resize(m_charts.size());
+ for (uint32_t c = 0; c < m_charts.size(); c++) {
+ const Chart &chart = m_charts[c];
+ m_tempPoints.resize(chart.faceCount * 3);
+ for (uint32_t f = 0; f < chart.faceCount; f++) {
+ const uint32_t face = m_chartFaces[chart.firstFace + f];
+ for (uint32_t i = 0; i < 3; i++)
+ m_tempPoints[f * 3 + i] = m_data.mesh->position(m_data.mesh->vertexAt(face * 3 + i));
+ }
+ Fit::computeBasis(m_tempPoints, &m_chartBasis[c]);
+ }
+ }
+
+private:
+ struct Chart {
+ uint32_t firstFace, faceCount;
+ };
+
+ void floodfillFaces(Chart &chart) {
+ const bool isFaceAreaNegative = m_data.faceUvAreas[m_chartFaces[chart.firstFace]] < 0.0f;
+ for (;;) {
+ bool newFaceAdded = false;
+ const uint32_t faceCount = chart.faceCount;
+ for (uint32_t f = 0; f < faceCount; f++) {
+ const uint32_t sourceFace = m_chartFaces[chart.firstFace + f];
+ for (Mesh::FaceEdgeIterator edgeIt(m_data.mesh, sourceFace); !edgeIt.isDone(); edgeIt.advance()) {
+ const uint32_t face = edgeIt.oppositeFace();
+ if (face == UINT32_MAX)
+ continue; // Boundary edge.
+ if (m_data.isFaceInChart.get(face))
+ continue; // Already assigned to a chart.
+ if (isZero(m_data.faceUvAreas[face], kAreaEpsilon))
+ continue; // Face must have valid UVs.
+ if ((m_data.faceUvAreas[face] < 0.0f) != isFaceAreaNegative)
+ continue; // Face winding is opposite of the first chart face.
+ const Vector2 &uv0 = m_data.mesh->texcoord(edgeIt.vertex0());
+ const Vector2 &uv1 = m_data.mesh->texcoord(edgeIt.vertex1());
+ const Vector2 &ouv0 = m_data.mesh->texcoord(m_data.mesh->vertexAt(meshEdgeIndex0(edgeIt.oppositeEdge())));
+ const Vector2 &ouv1 = m_data.mesh->texcoord(m_data.mesh->vertexAt(meshEdgeIndex1(edgeIt.oppositeEdge())));
+ if (!equal(uv0, ouv1, m_data.mesh->epsilon()) || !equal(uv1, ouv0, m_data.mesh->epsilon()))
+ continue; // UVs must match exactly.
+ m_chartFaces.push_back(face);
+ chart.faceCount++;
+ m_data.isFaceInChart.set(face);
+ newFaceAdded = true;
+ }
+ }
+ if (!newFaceAdded)
+ break;
+ }
+ }
+
+ AtlasData &m_data;
+ Array<Chart> m_charts;
+ Array<Basis> m_chartBasis;
+ Array<uint32_t> m_chartFaces;
+ Array<Vector3> m_tempPoints;
+};
+
#if XA_DEBUG_EXPORT_OBJ_PLANAR_REGIONS
static uint32_t s_planarRegionsCurrentRegion;
static uint32_t s_planarRegionsCurrentVertex;
#endif
-struct PlanarCharts
-{
- PlanarCharts(AtlasData &data) : m_data(data), m_nextRegionFace(MemTag::SegmentAtlasPlanarRegions), m_faceToRegionId(MemTag::SegmentAtlasPlanarRegions) {}
+struct PlanarCharts {
+ PlanarCharts(AtlasData &data) :
+ m_data(data), m_nextRegionFace(MemTag::SegmentAtlasPlanarRegions), m_faceToRegionId(MemTag::SegmentAtlasPlanarRegions) {}
const Basis &chartBasis(uint32_t chartIndex) const { return m_chartBasis[chartIndex]; }
uint32_t chartCount() const { return m_charts.size(); }
-
- ConstArrayView<uint32_t> chartFaces(uint32_t chartIndex) const
- {
+
+ ConstArrayView<uint32_t> chartFaces(uint32_t chartIndex) const {
const Chart &chart = m_charts[chartIndex];
return ConstArrayView<uint32_t>(&m_chartFaces[chart.firstFace], chart.faceCount);
}
@@ -5564,8 +4883,7 @@ struct PlanarCharts
uint32_t nextRegionFace(uint32_t face) const { return m_nextRegionFace[face]; }
float regionArea(uint32_t region) const { return m_regionAreas[region]; }
- void compute()
- {
+ void compute() {
const uint32_t faceCount = m_data.mesh->faceCount();
// Precompute regions of coplanar incident faces.
m_regionFirstFace.clear();
@@ -5581,6 +4899,8 @@ struct PlanarCharts
for (uint32_t f = 0; f < faceCount; f++) {
if (m_nextRegionFace[f] != f)
continue; // Already assigned.
+ if (m_data.isFaceInChart.get(f))
+ continue; // Already in a chart.
faceStack.clear();
faceStack.push_back(f);
for (;;) {
@@ -5595,6 +4915,8 @@ struct PlanarCharts
continue;
if (m_nextRegionFace[oface] != oface)
continue; // Already assigned.
+ if (m_data.isFaceInChart.get(oface))
+ continue; // Already in a chart.
if (!equal(dot(m_data.faceNormals[face], m_data.faceNormals[oface]), 1.0f, kEpsilon))
continue; // Not coplanar.
const uint32_t next = m_nextRegionFace[face];
@@ -5632,8 +4954,11 @@ struct PlanarCharts
// Precompute planar region areas.
m_regionAreas.resize(regionCount);
m_regionAreas.zeroOutMemory();
- for (uint32_t f = 0; f < faceCount; f++)
+ for (uint32_t f = 0; f < faceCount; f++) {
+ if (m_faceToRegionId[f] == UINT32_MAX)
+ continue;
m_regionAreas[m_faceToRegionId[f]] += m_data.faceAreas[f];
+ }
// Create charts from suitable planar regions.
// The dihedral angle of all boundary edges must be >= 90 degrees.
m_charts.clear();
@@ -5658,8 +4983,7 @@ struct PlanarCharts
if (!createChart)
break;
face = m_nextRegionFace[face];
- }
- while (face != firstRegionFace);
+ } while (face != firstRegionFace);
// Create a chart.
if (createChart) {
Chart chart;
@@ -5671,15 +4995,13 @@ struct PlanarCharts
m_chartFaces.push_back(face);
chart.faceCount++;
face = m_nextRegionFace[face];
- }
- while (face != firstRegionFace);
+ } while (face != firstRegionFace);
m_charts.push_back(chart);
}
}
// Compute basis for each chart using the first face normal (all faces have the same normal).
m_chartBasis.resize(m_charts.size());
- for (uint32_t c = 0; c < m_charts.size(); c++)
- {
+ for (uint32_t c = 0; c < m_charts.size(); c++) {
const uint32_t face = m_chartFaces[m_charts[c].firstFace];
Basis &basis = m_chartBasis[c];
basis.normal = m_data.faceNormals[face];
@@ -5689,8 +5011,7 @@ struct PlanarCharts
}
private:
- struct Chart
- {
+ struct Chart {
uint32_t firstFace, faceCount;
};
@@ -5704,12 +5025,11 @@ private:
Array<Basis> m_chartBasis;
};
-struct ClusteredCharts
-{
- ClusteredCharts(AtlasData &data, const PlanarCharts &planarCharts) : m_data(data), m_planarCharts(planarCharts), m_texcoords(MemTag::SegmentAtlasMeshData), m_bestTriangles(10), m_placingSeeds(false) {}
+struct ClusteredCharts {
+ ClusteredCharts(AtlasData &data, const PlanarCharts &planarCharts) :
+ m_data(data), m_planarCharts(planarCharts), m_texcoords(MemTag::SegmentAtlasMeshData), m_bestTriangles(10), m_placingSeeds(false) {}
- ~ClusteredCharts()
- {
+ ~ClusteredCharts() {
const uint32_t chartCount = m_charts.size();
for (uint32_t i = 0; i < chartCount; i++) {
m_charts[i]->~Chart();
@@ -5721,8 +5041,7 @@ struct ClusteredCharts
ConstArrayView<uint32_t> chartFaces(uint32_t chartIndex) const { return m_charts[chartIndex]->faces; }
const Basis &chartBasis(uint32_t chartIndex) const { return m_charts[chartIndex]->basis; }
- void compute()
- {
+ void compute() {
const uint32_t faceCount = m_data.mesh->faceCount();
m_facesLeft = 0;
for (uint32_t i = 0; i < faceCount; i++) {
@@ -5768,9 +5087,9 @@ struct ClusteredCharts
}
private:
- struct Chart
- {
- Chart() : faces(MemTag::SegmentAtlasChartFaces) {}
+ struct Chart {
+ Chart() :
+ faces(MemTag::SegmentAtlasChartFaces) {}
int id = -1;
Basis basis; // Best fit normal.
@@ -5784,8 +5103,7 @@ private:
uint32_t seed;
};
- void placeSeeds(float threshold)
- {
+ void placeSeeds(float threshold) {
XA_PROFILE_START(clusteredChartsPlaceSeeds)
m_placingSeeds = true;
// Instead of using a predefiened number of seeds:
@@ -5801,8 +5119,7 @@ private:
}
// Returns true if any of the charts can grow more.
- void growCharts(float threshold)
- {
+ void growCharts(float threshold) {
XA_PROFILE_START(clusteredChartsGrow)
for (;;) {
if (m_facesLeft == 0)
@@ -5848,8 +5165,7 @@ private:
XA_PROFILE_END(clusteredChartsGrow)
}
- void resetCharts()
- {
+ void resetCharts() {
XA_PROFILE_START(clusteredChartsReset)
const uint32_t faceCount = m_data.mesh->faceCount();
for (uint32_t i = 0; i < faceCount; i++) {
@@ -5880,8 +5196,7 @@ private:
XA_PROFILE_END(clusteredChartsReset)
}
- bool relocateSeeds()
- {
+ bool relocateSeeds() {
XA_PROFILE_START(clusteredChartsRelocateSeeds)
bool anySeedChanged = false;
const uint32_t chartCount = m_charts.size();
@@ -5894,8 +5209,7 @@ private:
return anySeedChanged;
}
- void fillHoles(float threshold)
- {
+ void fillHoles(float threshold) {
XA_PROFILE_START(clusteredChartsFillHoles)
while (m_facesLeft > 0)
createChart(threshold);
@@ -5903,8 +5217,7 @@ private:
}
#if XA_MERGE_CHARTS
- void mergeCharts()
- {
+ void mergeCharts() {
XA_PROFILE_START(clusteredChartsMerge)
const uint32_t chartCount = m_charts.size();
// Merge charts progressively until there's none left to merge.
@@ -5964,7 +5277,7 @@ private:
// Merge if chart2 has a single face.
// chart1 must have more than 1 face.
// chart2 area must be <= 10% of chart1 area.
- if (m_sharedBoundaryLengthsNoSeams[cc] > 0.0f && chart->faces.size() > 1 && chart2->faces.size() == 1 && chart2->area <= chart->area * 0.1f)
+ if (m_sharedBoundaryLengthsNoSeams[cc] > 0.0f && chart->faces.size() > 1 && chart2->faces.size() == 1 && chart2->area <= chart->area * 0.1f)
goto merge;
// Merge if chart2 has two faces (probably a quad), and chart1 bounds at least 2 of its edges.
if (chart2->faces.size() == 2 && m_sharedBoundaryEdgeCountNoSeams[cc] >= 2)
@@ -5972,8 +5285,8 @@ private:
// Merge if chart2 is wholely inside chart1, ignoring seams.
if (m_sharedBoundaryLengthsNoSeams[cc] > 0.0f && equal(m_sharedBoundaryLengthsNoSeams[cc], chart2->boundaryLength, kEpsilon))
goto merge;
- if (m_sharedBoundaryLengths[cc] > 0.2f * max(0.0f, chart->boundaryLength - externalBoundaryLength) ||
- m_sharedBoundaryLengths[cc] > 0.75f * chart2->boundaryLength)
+ if (m_sharedBoundaryLengths[cc] > 0.2f * max(0.0f, chart->boundaryLength - externalBoundaryLength) ||
+ m_sharedBoundaryLengths[cc] > 0.75f * chart2->boundaryLength)
goto merge;
continue;
merge:
@@ -6011,8 +5324,7 @@ private:
#endif
private:
- void createChart(float threshold)
- {
+ void createChart(float threshold) {
Chart *chart = XA_NEW(MemTag::Default, Chart);
chart->id = (int)m_charts.size();
m_charts.push_back(chart);
@@ -6043,15 +5355,13 @@ private:
}
}
- bool isChartBoundaryEdge(const Chart *chart, uint32_t edge) const
- {
+ bool isChartBoundaryEdge(const Chart *chart, uint32_t edge) const {
const uint32_t oppositeEdge = m_data.mesh->oppositeEdge(edge);
const uint32_t oppositeFace = meshEdgeFace(oppositeEdge);
return oppositeEdge == UINT32_MAX || m_faceCharts[oppositeFace] != chart->id;
}
- bool computeChartBasis(Chart *chart, Basis *basis)
- {
+ bool computeChartBasis(Chart *chart, Basis *basis) {
const uint32_t faceCount = chart->faces.size();
m_tempPoints.resize(chart->faces.size() * 3);
for (uint32_t i = 0; i < faceCount; i++) {
@@ -6059,11 +5369,10 @@ private:
for (uint32_t j = 0; j < 3; j++)
m_tempPoints[i * 3 + j] = m_data.mesh->position(m_data.mesh->vertexAt(f * 3 + j));
}
- return Fit::computeBasis(m_tempPoints.data(), m_tempPoints.size(), basis);
+ return Fit::computeBasis(m_tempPoints, basis);
}
- bool isFaceFlipped(uint32_t face) const
- {
+ bool isFaceFlipped(uint32_t face) const {
const Vector2 &v1 = m_texcoords[face * 3 + 0];
const Vector2 &v2 = m_texcoords[face * 3 + 1];
const Vector2 &v3 = m_texcoords[face * 3 + 2];
@@ -6071,8 +5380,7 @@ private:
return parametricArea < 0.0f;
}
- void parameterizeChart(const Chart *chart)
- {
+ void parameterizeChart(const Chart *chart) {
const uint32_t faceCount = chart->faces.size();
for (uint32_t i = 0; i < faceCount; i++) {
const uint32_t face = chart->faces[i];
@@ -6085,8 +5393,7 @@ private:
}
// m_faceCharts for the chart faces must be set to the chart ID. Needed to compute boundary edges.
- bool isChartParameterizationValid(const Chart *chart)
- {
+ bool isChartParameterizationValid(const Chart *chart) {
const uint32_t faceCount = chart->faces.size();
// Check for flipped faces in the parameterization. OK if all are flipped.
uint32_t flippedFaceCount = 0;
@@ -6099,7 +5406,7 @@ private:
// Check for boundary intersection in the parameterization.
XA_PROFILE_START(clusteredChartsPlaceSeedsBoundaryIntersection)
XA_PROFILE_START(clusteredChartsGrowBoundaryIntersection)
- m_boundaryGrid.reset(m_texcoords.data());
+ m_boundaryGrid.reset(m_texcoords);
for (uint32_t i = 0; i < faceCount; i++) {
const uint32_t f = chart->faces[i];
for (uint32_t j = 0; j < 3; j++) {
@@ -6120,15 +5427,14 @@ private:
return true;
}
- bool addFaceToChart(Chart *chart, uint32_t face)
- {
+ bool addFaceToChart(Chart *chart, uint32_t face) {
XA_DEBUG_ASSERT(!m_data.isFaceInChart.get(face));
const uint32_t oldFaceCount = chart->faces.size();
const bool firstFace = oldFaceCount == 0;
// Append the face and any coplanar connected faces to the chart faces array.
chart->faces.push_back(face);
uint32_t coplanarFace = m_planarCharts.nextRegionFace(face);
- while (coplanarFace != face) {
+ while (coplanarFace != face) {
XA_DEBUG_ASSERT(!m_data.isFaceInChart.get(coplanarFace));
chart->faces.push_back(coplanarFace);
coplanarFace = m_planarCharts.nextRegionFace(coplanarFace);
@@ -6140,7 +5446,7 @@ private:
// Use the first face normal.
// Use any edge as the tangent vector.
basis.normal = m_data.faceNormals[face];
- basis.tangent = normalize(m_data.mesh->position(m_data.mesh->vertexAt(face * 3 + 0)) - m_data.mesh->position(m_data.mesh->vertexAt(face * 3 + 1)), kEpsilon);
+ basis.tangent = normalize(m_data.mesh->position(m_data.mesh->vertexAt(face * 3 + 0)) - m_data.mesh->position(m_data.mesh->vertexAt(face * 3 + 1)));
basis.bitangent = cross(basis.normal, basis.tangent);
} else {
// Use best fit normal.
@@ -6199,8 +5505,7 @@ private:
}
// Returns true if the seed has changed.
- bool relocateSeed(Chart *chart)
- {
+ bool relocateSeed(Chart *chart) {
// Find the first N triangles that fit the proxy best.
const uint32_t faceCount = chart->faces.size();
m_bestTriangles.clear();
@@ -6230,8 +5535,7 @@ private:
}
// Cost is combined metrics * weights.
- float computeCost(Chart *chart, uint32_t face) const
- {
+ float computeCost(Chart *chart, uint32_t face) const {
// Estimate boundary length and area:
const float newChartArea = computeArea(chart, face);
const float newBoundaryLength = computeBoundaryLength(chart, face);
@@ -6267,28 +5571,25 @@ private:
// Returns a value in [0-1].
// 0 if face normal is coplanar to the chart's best fit normal.
// 1 if face normal is perpendicular.
- float computeNormalDeviationMetric(Chart *chart, uint32_t face) const
- {
+ float computeNormalDeviationMetric(Chart *chart, uint32_t face) const {
// All faces in coplanar regions have the same normal, can use any face.
const Vector3 faceNormal = m_data.faceNormals[face];
// Use plane fitting metric for now:
return min(1.0f - dot(faceNormal, chart->basis.normal), 1.0f); // @@ normal deviations should be weighted by face area
}
- float computeRoundnessMetric(Chart *chart, float newBoundaryLength, float newChartArea) const
- {
+ float computeRoundnessMetric(Chart *chart, float newBoundaryLength, float newChartArea) const {
const float oldRoundness = square(chart->boundaryLength) / chart->area;
const float newRoundness = square(newBoundaryLength) / newChartArea;
return 1.0f - oldRoundness / newRoundness;
}
- float computeStraightnessMetric(Chart *chart, uint32_t firstFace) const
- {
+ float computeStraightnessMetric(Chart *chart, uint32_t firstFace) const {
float l_out = 0.0f; // Length of firstFace planar region boundary that doesn't border the chart.
float l_in = 0.0f; // Length that does border the chart.
const uint32_t planarRegionId = m_planarCharts.regionIdFromFace(firstFace);
uint32_t face = firstFace;
- for (;;) {
+ for (;;) {
for (Mesh::FaceEdgeIterator it(m_data.mesh, face); !it.isDone(); it.advance()) {
const float l = m_data.edgeLengths[it.edge()];
if (it.isBoundary()) {
@@ -6305,7 +5606,6 @@ private:
break;
}
#if 1
- XA_DEBUG_ASSERT(l_in != 0.0f); // Candidate face must be adjacent to chart. @@ This is not true if the input mesh has zero-length edges.
float ratio = (l_out - l_in) / (l_out + l_in);
return min(ratio, 0.0f); // Only use the straightness metric to close gaps.
#else
@@ -6313,8 +5613,7 @@ private:
#endif
}
- bool isNormalSeam(uint32_t edge) const
- {
+ bool isNormalSeam(uint32_t edge) const {
const uint32_t oppositeEdge = m_data.mesh->oppositeEdge(edge);
if (oppositeEdge == UINT32_MAX)
return false; // boundary edge
@@ -6334,11 +5633,10 @@ private:
return !equal(m_data.faceNormals[f0], m_data.faceNormals[f1], kNormalEpsilon);
}
- float computeNormalSeamMetric(Chart *chart, uint32_t firstFace) const
- {
+ float computeNormalSeamMetric(Chart *chart, uint32_t firstFace) const {
float seamFactor = 0.0f, totalLength = 0.0f;
uint32_t face = firstFace;
- for (;;) {
+ for (;;) {
for (Mesh::FaceEdgeIterator it(m_data.mesh, face); !it.isDone(); it.advance()) {
if (it.isBoundary())
continue;
@@ -6375,11 +5673,10 @@ private:
return seamFactor / totalLength;
}
- float computeTextureSeamMetric(Chart *chart, uint32_t firstFace) const
- {
+ float computeTextureSeamMetric(Chart *chart, uint32_t firstFace) const {
float seamLength = 0.0f, totalLength = 0.0f;
uint32_t face = firstFace;
- for (;;) {
+ for (;;) {
for (Mesh::FaceEdgeIterator it(m_data.mesh, face); !it.isDone(); it.advance()) {
if (it.isBoundary())
continue;
@@ -6402,11 +5699,10 @@ private:
return seamLength / totalLength;
}
- float computeArea(Chart *chart, uint32_t firstFace) const
- {
+ float computeArea(Chart *chart, uint32_t firstFace) const {
float area = chart->area;
uint32_t face = firstFace;
- for (;;) {
+ for (;;) {
area += m_data.faceAreas[face];
face = m_planarCharts.nextRegionFace(face);
if (face == firstFace)
@@ -6415,13 +5711,12 @@ private:
return area;
}
- float computeBoundaryLength(Chart *chart, uint32_t firstFace) const
- {
+ float computeBoundaryLength(Chart *chart, uint32_t firstFace) const {
float boundaryLength = chart->boundaryLength;
// Add new edges, subtract edges shared with the chart.
const uint32_t planarRegionId = m_planarCharts.regionIdFromFace(firstFace);
uint32_t face = firstFace;
- for (;;) {
+ for (;;) {
for (Mesh::FaceEdgeIterator it(m_data.mesh, face); !it.isDone(); it.advance()) {
const float edgeLength = m_data.edgeLengths[it.edge()];
if (it.isBoundary()) {
@@ -6437,11 +5732,10 @@ private:
if (face == firstFace)
break;
}
- return max(0.0f, boundaryLength); // @@ Hack!
+ return max(0.0f, boundaryLength); // @@ Hack!
}
- bool mergeChart(Chart *owner, Chart *chart, float sharedBoundaryLength)
- {
+ bool mergeChart(Chart *owner, Chart *chart, float sharedBoundaryLength) {
const uint32_t oldOwnerFaceCount = owner->faces.size();
const uint32_t chartFaceCount = chart->faces.size();
owner->faces.push_back(chart->faces);
@@ -6499,33 +5793,53 @@ private:
bool m_placingSeeds;
};
-struct Atlas
-{
- Atlas() : m_planarCharts(m_data), m_clusteredCharts(m_data, m_planarCharts) {}
+struct ChartGeneratorType {
+ enum Enum {
+ OriginalUv,
+ Planar,
+ Clustered,
+ Piecewise
+ };
+};
- uint32_t chartCount() const
- {
- return m_planarCharts.chartCount() + m_clusteredCharts.chartCount();
+struct Atlas {
+ Atlas() :
+ m_originalUvCharts(m_data), m_planarCharts(m_data), m_clusteredCharts(m_data, m_planarCharts) {}
+
+ uint32_t chartCount() const {
+ return m_originalUvCharts.chartCount() + m_planarCharts.chartCount() + m_clusteredCharts.chartCount();
}
- ConstArrayView<uint32_t> chartFaces(uint32_t chartIndex) const
- {
+ ConstArrayView<uint32_t> chartFaces(uint32_t chartIndex) const {
+ if (chartIndex < m_originalUvCharts.chartCount())
+ return m_originalUvCharts.chartFaces(chartIndex);
+ chartIndex -= m_originalUvCharts.chartCount();
if (chartIndex < m_planarCharts.chartCount())
return m_planarCharts.chartFaces(chartIndex);
chartIndex -= m_planarCharts.chartCount();
return m_clusteredCharts.chartFaces(chartIndex);
}
- const Basis &chartBasis(uint32_t chartIndex) const
- {
+ const Basis &chartBasis(uint32_t chartIndex) const {
+ if (chartIndex < m_originalUvCharts.chartCount())
+ return m_originalUvCharts.chartBasis(chartIndex);
+ chartIndex -= m_originalUvCharts.chartCount();
if (chartIndex < m_planarCharts.chartCount())
return m_planarCharts.chartBasis(chartIndex);
chartIndex -= m_planarCharts.chartCount();
return m_clusteredCharts.chartBasis(chartIndex);
}
- void reset(const Mesh *mesh, const ChartOptions &options)
- {
+ ChartGeneratorType::Enum chartGeneratorType(uint32_t chartIndex) const {
+ if (chartIndex < m_originalUvCharts.chartCount())
+ return ChartGeneratorType::OriginalUv;
+ chartIndex -= m_originalUvCharts.chartCount();
+ if (chartIndex < m_planarCharts.chartCount())
+ return ChartGeneratorType::Planar;
+ return ChartGeneratorType::Clustered;
+ }
+
+ void reset(const Mesh *mesh, const ChartOptions &options) {
XA_PROFILE_START(buildAtlasInit)
m_data.options = options;
m_data.mesh = mesh;
@@ -6533,8 +5847,12 @@ struct Atlas
XA_PROFILE_END(buildAtlasInit)
}
- void compute()
- {
+ void compute() {
+ if (m_data.options.useInputMeshUvs) {
+ XA_PROFILE_START(originalUvCharts)
+ m_originalUvCharts.compute();
+ XA_PROFILE_END(originalUvCharts)
+ }
XA_PROFILE_START(planarCharts)
m_planarCharts.compute();
XA_PROFILE_END(planarCharts)
@@ -6545,17 +5863,143 @@ struct Atlas
private:
AtlasData m_data;
+ OriginalUvCharts m_originalUvCharts;
PlanarCharts m_planarCharts;
ClusteredCharts m_clusteredCharts;
};
+struct ComputeUvMeshChartsTaskArgs {
+ UvMesh *mesh;
+ Progress *progress;
+};
+
+// Charts are found by floodfilling faces without crossing UV seams.
+struct ComputeUvMeshChartsTask {
+ ComputeUvMeshChartsTask(ComputeUvMeshChartsTaskArgs *args) :
+ m_mesh(args->mesh), m_progress(args->progress), m_uvToEdgeMap(MemTag::Default, m_mesh->indices.size()), m_faceAssigned(m_mesh->indices.size() / 3) {}
+
+ void run() {
+ const uint32_t vertexCount = m_mesh->texcoords.size();
+ const uint32_t indexCount = m_mesh->indices.size();
+ const uint32_t faceCount = indexCount / 3;
+ // A vertex can only be assigned to one chart.
+ m_mesh->vertexToChartMap.resize(vertexCount);
+ m_mesh->vertexToChartMap.fill(UINT32_MAX);
+ // Map vertex UV to edge. Face is then edge / 3.
+ for (uint32_t i = 0; i < indexCount; i++)
+ m_uvToEdgeMap.add(m_mesh->texcoords[m_mesh->indices[i]]);
+ // Find charts.
+ m_faceAssigned.zeroOutMemory();
+ for (uint32_t f = 0; f < faceCount; f++) {
+ if (m_progress->cancel)
+ return;
+ m_progress->increment(1);
+ // Found an unassigned face, see if it can be added.
+ const uint32_t chartIndex = m_mesh->charts.size();
+ if (!canAddFaceToChart(chartIndex, f))
+ continue;
+ // Face is OK, create a new chart with the face.
+ UvMeshChart *chart = XA_NEW(MemTag::Default, UvMeshChart);
+ m_mesh->charts.push_back(chart);
+ chart->material = m_mesh->faceMaterials.isEmpty() ? 0 : m_mesh->faceMaterials[f];
+ addFaceToChart(chartIndex, f);
+ // Walk incident faces and assign them to the chart.
+ uint32_t f2 = 0;
+ for (;;) {
+ bool newFaceAssigned = false;
+ const uint32_t faceCount2 = chart->faces.size();
+ for (; f2 < faceCount2; f2++) {
+ const uint32_t face = chart->faces[f2];
+ for (uint32_t i = 0; i < 3; i++) {
+ // Add any valid faces with colocal UVs to the chart.
+ const Vector2 &uv = m_mesh->texcoords[m_mesh->indices[face * 3 + i]];
+ uint32_t edge = m_uvToEdgeMap.get(uv);
+ while (edge != UINT32_MAX) {
+ const uint32_t newFace = edge / 3;
+ if (canAddFaceToChart(chartIndex, newFace)) {
+ addFaceToChart(chartIndex, newFace);
+ newFaceAssigned = true;
+ }
+ edge = m_uvToEdgeMap.getNext(uv, edge);
+ }
+ }
+ }
+ if (!newFaceAssigned)
+ break;
+ }
+ }
+ }
+
+private:
+ // The chart at chartIndex doesn't have to exist yet.
+ bool canAddFaceToChart(uint32_t chartIndex, uint32_t face) const {
+ if (m_faceAssigned.get(face))
+ return false; // Already assigned to a chart.
+ if (m_mesh->faceIgnore.get(face))
+ return false; // Face is ignored (zero area or nan UVs).
+ if (!m_mesh->faceMaterials.isEmpty() && chartIndex < m_mesh->charts.size()) {
+ if (m_mesh->faceMaterials[face] != m_mesh->charts[chartIndex]->material)
+ return false; // Materials don't match.
+ }
+ for (uint32_t i = 0; i < 3; i++) {
+ const uint32_t vertex = m_mesh->indices[face * 3 + i];
+ if (m_mesh->vertexToChartMap[vertex] != UINT32_MAX && m_mesh->vertexToChartMap[vertex] != chartIndex)
+ return false; // Vertex already assigned to another chart.
+ }
+ return true;
+ }
+
+ void addFaceToChart(uint32_t chartIndex, uint32_t face) {
+ UvMeshChart *chart = m_mesh->charts[chartIndex];
+ m_faceAssigned.set(face);
+ chart->faces.push_back(face);
+ for (uint32_t i = 0; i < 3; i++) {
+ const uint32_t vertex = m_mesh->indices[face * 3 + i];
+ m_mesh->vertexToChartMap[vertex] = chartIndex;
+ chart->indices.push_back(vertex);
+ }
+ }
+
+ UvMesh *const m_mesh;
+ Progress *const m_progress;
+ HashMap<Vector2> m_uvToEdgeMap; // Face is edge / 3.
+ BitArray m_faceAssigned;
+};
+
+static void runComputeUvMeshChartsTask(void * /*groupUserData*/, void *taskUserData) {
+ XA_PROFILE_START(computeChartsThread)
+ ComputeUvMeshChartsTask task((ComputeUvMeshChartsTaskArgs *)taskUserData);
+ task.run();
+ XA_PROFILE_END(computeChartsThread)
+}
+
+static bool computeUvMeshCharts(TaskScheduler *taskScheduler, ArrayView<UvMesh *> meshes, ProgressFunc progressFunc, void *progressUserData) {
+ uint32_t totalFaceCount = 0;
+ for (uint32_t i = 0; i < meshes.length; i++)
+ totalFaceCount += meshes[i]->indices.size() / 3;
+ Progress progress(ProgressCategory::ComputeCharts, progressFunc, progressUserData, totalFaceCount);
+ TaskGroupHandle taskGroup = taskScheduler->createTaskGroup(nullptr, meshes.length);
+ Array<ComputeUvMeshChartsTaskArgs> taskArgs;
+ taskArgs.resize(meshes.length);
+ for (uint32_t i = 0; i < meshes.length; i++) {
+ ComputeUvMeshChartsTaskArgs &args = taskArgs[i];
+ args.mesh = meshes[i];
+ args.progress = &progress;
+ Task task;
+ task.userData = &args;
+ task.func = runComputeUvMeshChartsTask;
+ taskScheduler->run(taskGroup, task);
+ }
+ taskScheduler->wait(&taskGroup);
+ return !progress.cancel;
+}
+
} // namespace segment
namespace param {
// Fast sweep in 3 directions
-static bool findApproximateDiameterVertices(Mesh *mesh, uint32_t *a, uint32_t *b)
-{
+static bool findApproximateDiameterVertices(Mesh *mesh, uint32_t *a, uint32_t *b) {
XA_DEBUG_ASSERT(a != nullptr);
XA_DEBUG_ASSERT(b != nullptr);
const uint32_t vertexCount = mesh->vertexCount();
@@ -6612,10 +6056,9 @@ static bool findApproximateDiameterVertices(Mesh *mesh, uint32_t *a, uint32_t *b
// From OpenNL LSCM example.
// Computes the coordinates of the vertices of a triangle in a local 2D orthonormal basis of the triangle's plane.
-static void projectTriangle(Vector3 p0, Vector3 p1, Vector3 p2, Vector2 *z0, Vector2 *z1, Vector2 *z2, float epsilon)
-{
- Vector3 X = normalize(p1 - p0, epsilon);
- Vector3 Z = normalize(cross(X, p2 - p0), epsilon);
+static void projectTriangle(Vector3 p0, Vector3 p1, Vector3 p2, Vector2 *z0, Vector2 *z1, Vector2 *z2) {
+ Vector3 X = normalize(p1 - p0);
+ Vector3 Z = normalize(cross(X, p2 - p0));
Vector3 Y = cross(Z, X);
Vector3 &O = p0;
*z0 = Vector2(0, 0);
@@ -6623,8 +6066,83 @@ static void projectTriangle(Vector3 p0, Vector3 p1, Vector3 p2, Vector2 *z0, Vec
*z2 = Vector2(dot(p2 - O, X), dot(p2 - O, Y));
}
-static bool computeLeastSquaresConformalMap(Mesh *mesh)
-{
+// Conformal relations from Brecht Van Lommel (based on ABF):
+
+static float vec_angle_cos(const Vector3 &v1, const Vector3 &v2, const Vector3 &v3) {
+ Vector3 d1 = v1 - v2;
+ Vector3 d2 = v3 - v2;
+ return clamp(dot(d1, d2) / (length(d1) * length(d2)), -1.0f, 1.0f);
+}
+
+static float vec_angle(const Vector3 &v1, const Vector3 &v2, const Vector3 &v3) {
+ float dot = vec_angle_cos(v1, v2, v3);
+ return acosf(dot);
+}
+
+static void triangle_angles(const Vector3 &v1, const Vector3 &v2, const Vector3 &v3, float *a1, float *a2, float *a3) {
+ *a1 = vec_angle(v3, v1, v2);
+ *a2 = vec_angle(v1, v2, v3);
+ *a3 = kPi - *a2 - *a1;
+}
+
+static bool setup_abf_relations(opennl::NLContext *context, int id0, int id1, int id2, const Vector3 &p0, const Vector3 &p1, const Vector3 &p2) {
+ // @@ IC: Wouldn't it be more accurate to return cos and compute 1-cos^2?
+ // It does indeed seem to be a little bit more robust.
+ // @@ Need to revisit this more carefully!
+ float a0, a1, a2;
+ triangle_angles(p0, p1, p2, &a0, &a1, &a2);
+ if (a0 == 0.0f || a1 == 0.0f || a2 == 0.0f)
+ return false;
+ float s0 = sinf(a0);
+ float s1 = sinf(a1);
+ float s2 = sinf(a2);
+ if (s1 > s0 && s1 > s2) {
+ swap(s1, s2);
+ swap(s0, s1);
+ swap(a1, a2);
+ swap(a0, a1);
+ swap(id1, id2);
+ swap(id0, id1);
+ } else if (s0 > s1 && s0 > s2) {
+ swap(s0, s2);
+ swap(s0, s1);
+ swap(a0, a2);
+ swap(a0, a1);
+ swap(id0, id2);
+ swap(id0, id1);
+ }
+ float c0 = cosf(a0);
+ float ratio = (s2 == 0.0f) ? 1.0f : s1 / s2;
+ float cosine = c0 * ratio;
+ float sine = s0 * ratio;
+ // Note : 2*id + 0 --> u
+ // 2*id + 1 --> v
+ int u0_id = 2 * id0 + 0;
+ int v0_id = 2 * id0 + 1;
+ int u1_id = 2 * id1 + 0;
+ int v1_id = 2 * id1 + 1;
+ int u2_id = 2 * id2 + 0;
+ int v2_id = 2 * id2 + 1;
+ // Real part
+ opennl::nlBegin(context, NL_ROW);
+ opennl::nlCoefficient(context, u0_id, cosine - 1.0f);
+ opennl::nlCoefficient(context, v0_id, -sine);
+ opennl::nlCoefficient(context, u1_id, -cosine);
+ opennl::nlCoefficient(context, v1_id, sine);
+ opennl::nlCoefficient(context, u2_id, 1);
+ opennl::nlEnd(context, NL_ROW);
+ // Imaginary part
+ opennl::nlBegin(context, NL_ROW);
+ opennl::nlCoefficient(context, u0_id, sine);
+ opennl::nlCoefficient(context, v0_id, cosine - 1.0f);
+ opennl::nlCoefficient(context, u1_id, -sine);
+ opennl::nlCoefficient(context, v1_id, -cosine);
+ opennl::nlCoefficient(context, v2_id, 1);
+ opennl::nlEnd(context, NL_ROW);
+ return true;
+}
+
+static bool computeLeastSquaresConformalMap(Mesh *mesh) {
uint32_t lockedVertex0, lockedVertex1;
if (!findApproximateDiameterVertices(mesh, &lockedVertex0, &lockedVertex1)) {
// Mesh has no boundaries.
@@ -6635,55 +6153,57 @@ static bool computeLeastSquaresConformalMap(Mesh *mesh)
opennl::nlSolverParameteri(context, NL_NB_VARIABLES, int(2 * vertexCount));
opennl::nlSolverParameteri(context, NL_MAX_ITERATIONS, int(5 * vertexCount));
opennl::nlBegin(context, NL_SYSTEM);
- const Vector2 *texcoords = mesh->texcoords();
+ ArrayView<Vector2> texcoords = mesh->texcoords();
for (uint32_t i = 0; i < vertexCount; i++) {
opennl::nlSetVariable(context, 2 * i, texcoords[i].x);
opennl::nlSetVariable(context, 2 * i + 1, texcoords[i].y);
if (i == lockedVertex0 || i == lockedVertex1) {
opennl::nlLockVariable(context, 2 * i);
opennl::nlLockVariable(context, 2 * i + 1);
- }
+ }
}
opennl::nlBegin(context, NL_MATRIX);
const uint32_t faceCount = mesh->faceCount();
- const Vector3 *positions = mesh->positions();
- const uint32_t *indices = mesh->indices();
+ ConstArrayView<Vector3> positions = mesh->positions();
+ ConstArrayView<uint32_t> indices = mesh->indices();
for (uint32_t f = 0; f < faceCount; f++) {
const uint32_t v0 = indices[f * 3 + 0];
const uint32_t v1 = indices[f * 3 + 1];
const uint32_t v2 = indices[f * 3 + 2];
- Vector2 z0, z1, z2;
- projectTriangle(positions[v0], positions[v1], positions[v2], &z0, &z1, &z2, mesh->epsilon());
- double a = z1.x - z0.x;
- double b = z1.y - z0.y;
- double c = z2.x - z0.x;
- double d = z2.y - z0.y;
- XA_DEBUG_ASSERT(b == 0.0);
- // Note : 2*id + 0 --> u
- // 2*id + 1 --> v
- uint32_t u0_id = 2 * v0;
- uint32_t v0_id = 2 * v0 + 1;
- uint32_t u1_id = 2 * v1;
- uint32_t v1_id = 2 * v1 + 1;
- uint32_t u2_id = 2 * v2;
- uint32_t v2_id = 2 * v2 + 1;
- // Note : b = 0
- // Real part
- opennl::nlBegin(context, NL_ROW);
- opennl::nlCoefficient(context, u0_id, -a+c) ;
- opennl::nlCoefficient(context, v0_id, b-d) ;
- opennl::nlCoefficient(context, u1_id, -c) ;
- opennl::nlCoefficient(context, v1_id, d) ;
- opennl::nlCoefficient(context, u2_id, a);
- opennl::nlEnd(context, NL_ROW);
- // Imaginary part
- opennl::nlBegin(context, NL_ROW);
- opennl::nlCoefficient(context, u0_id, -b+d);
- opennl::nlCoefficient(context, v0_id, -a+c);
- opennl::nlCoefficient(context, u1_id, -d);
- opennl::nlCoefficient(context, v1_id, -c);
- opennl::nlCoefficient(context, v2_id, a);
- opennl::nlEnd(context, NL_ROW);
+ if (!setup_abf_relations(context, v0, v1, v2, positions[v0], positions[v1], positions[v2])) {
+ Vector2 z0, z1, z2;
+ projectTriangle(positions[v0], positions[v1], positions[v2], &z0, &z1, &z2);
+ double a = z1.x - z0.x;
+ double b = z1.y - z0.y;
+ double c = z2.x - z0.x;
+ double d = z2.y - z0.y;
+ XA_DEBUG_ASSERT(b == 0.0);
+ // Note : 2*id + 0 --> u
+ // 2*id + 1 --> v
+ uint32_t u0_id = 2 * v0;
+ uint32_t v0_id = 2 * v0 + 1;
+ uint32_t u1_id = 2 * v1;
+ uint32_t v1_id = 2 * v1 + 1;
+ uint32_t u2_id = 2 * v2;
+ uint32_t v2_id = 2 * v2 + 1;
+ // Note : b = 0
+ // Real part
+ opennl::nlBegin(context, NL_ROW);
+ opennl::nlCoefficient(context, u0_id, -a + c);
+ opennl::nlCoefficient(context, v0_id, b - d);
+ opennl::nlCoefficient(context, u1_id, -c);
+ opennl::nlCoefficient(context, v1_id, d);
+ opennl::nlCoefficient(context, u2_id, a);
+ opennl::nlEnd(context, NL_ROW);
+ // Imaginary part
+ opennl::nlBegin(context, NL_ROW);
+ opennl::nlCoefficient(context, u0_id, -b + d);
+ opennl::nlCoefficient(context, v0_id, -a + c);
+ opennl::nlCoefficient(context, u1_id, -d);
+ opennl::nlCoefficient(context, v1_id, -c);
+ opennl::nlCoefficient(context, v2_id, a);
+ opennl::nlEnd(context, NL_ROW);
+ }
}
opennl::nlEnd(context, NL_MATRIX);
opennl::nlEnd(context, NL_SYSTEM);
@@ -6694,7 +6214,7 @@ static bool computeLeastSquaresConformalMap(Mesh *mesh)
for (uint32_t i = 0; i < vertexCount; i++) {
const double u = opennl::nlGetVariable(context, 2 * i);
const double v = opennl::nlGetVariable(context, 2 * i + 1);
- mesh->texcoord(i) = Vector2((float)u, (float)v);
+ texcoords[i] = Vector2((float)u, (float)v);
XA_DEBUG_ASSERT(!isNan(mesh->texcoord(i).x));
XA_DEBUG_ASSERT(!isNan(mesh->texcoord(i).y));
}
@@ -6702,30 +6222,26 @@ static bool computeLeastSquaresConformalMap(Mesh *mesh)
return true;
}
-#if XA_RECOMPUTE_CHARTS
-struct PiecewiseParam
-{
- void reset(const Mesh *mesh, uint32_t faceCount)
- {
+struct PiecewiseParam {
+ void reset(const Mesh *mesh) {
m_mesh = mesh;
- m_faceCount = faceCount;
+ const uint32_t faceCount = m_mesh->faceCount();
const uint32_t vertexCount = m_mesh->vertexCount();
m_texcoords.resize(vertexCount);
- m_patch.reserve(m_faceCount);
- m_candidates.reserve(m_faceCount);
- m_faceInAnyPatch.resize(m_faceCount);
+ m_patch.reserve(faceCount);
+ m_candidates.reserve(faceCount);
+ m_faceInAnyPatch.resize(faceCount);
m_faceInAnyPatch.zeroOutMemory();
- m_faceInvalid.resize(m_faceCount);
- m_faceInPatch.resize(m_faceCount);
+ m_faceInvalid.resize(faceCount);
+ m_faceInPatch.resize(faceCount);
m_vertexInPatch.resize(vertexCount);
- m_faceToCandidate.resize(m_faceCount);
+ m_faceToCandidate.resize(faceCount);
}
ConstArrayView<uint32_t> chartFaces() const { return m_patch; }
- const Vector2 *texcoords() const { return m_texcoords.data(); }
+ ConstArrayView<Vector2> texcoords() const { return m_texcoords; }
- bool computeChart()
- {
+ bool computeChart() {
// Clear per-patch state.
m_patch.clear();
m_candidates.clear();
@@ -6734,8 +6250,9 @@ struct PiecewiseParam
m_faceInPatch.zeroOutMemory();
m_vertexInPatch.zeroOutMemory();
// Add the seed face (first unassigned face) to the patch.
+ const uint32_t faceCount = m_mesh->faceCount();
uint32_t seed = UINT32_MAX;
- for (uint32_t f = 0; f < m_faceCount; f++) {
+ for (uint32_t f = 0; f < faceCount; f++) {
if (m_faceInAnyPatch.get(f))
continue;
seed = f;
@@ -6749,7 +6266,7 @@ struct PiecewiseParam
}
addFaceToPatch(seed);
// Initialize the boundary grid.
- m_boundaryGrid.reset(m_texcoords.data(), m_mesh->indices());
+ m_boundaryGrid.reset(m_texcoords, m_mesh->indices());
for (Mesh::FaceEdgeIterator it(m_mesh, seed); !it.isDone(); it.advance())
m_boundaryGrid.append(it.edge());
break;
@@ -6793,22 +6310,34 @@ struct PiecewiseParam
break;
}
}
+ // Check for zero area and flipped faces (using area).
+ for (CandidateIterator it(bestCandidate); !it.isDone(); it.advance()) {
+ const Vector2 a = m_texcoords[m_mesh->vertexAt(it.current()->face * 3 + 0)];
+ const Vector2 b = m_texcoords[m_mesh->vertexAt(it.current()->face * 3 + 1)];
+ const Vector2 c = m_texcoords[m_mesh->vertexAt(it.current()->face * 3 + 2)];
+ const float area = triangleArea(a, b, c);
+ if (area <= 0.0f) {
+ invalid = true;
+ break;
+ }
+ }
// Check for boundary intersection.
if (!invalid) {
XA_PROFILE_START(parameterizeChartsPiecewiseBoundaryIntersection)
// Test candidate edges that would form part of the new patch boundary.
// Ignore boundary edges that would become internal if the candidate faces were added to the patch.
- Array<uint32_t> newBoundaryEdges, ignoreEdges;
+ m_newBoundaryEdges.clear();
+ m_ignoreBoundaryEdges.clear();
for (CandidateIterator candidateIt(bestCandidate); !candidateIt.isDone(); candidateIt.advance()) {
for (Mesh::FaceEdgeIterator it(m_mesh, candidateIt.current()->face); !it.isDone(); it.advance()) {
const uint32_t oface = it.oppositeFace();
- if (oface == UINT32_MAX || oface >= m_faceCount || !m_faceInPatch.get(oface))
- newBoundaryEdges.push_back(it.edge());
- if (oface != UINT32_MAX && oface < m_faceCount && m_faceInPatch.get(oface))
- ignoreEdges.push_back(it.oppositeEdge());
+ if (oface == UINT32_MAX || !m_faceInPatch.get(oface))
+ m_newBoundaryEdges.push_back(it.edge());
+ if (oface != UINT32_MAX && m_faceInPatch.get(oface))
+ m_ignoreBoundaryEdges.push_back(it.oppositeEdge());
}
}
- invalid = m_boundaryGrid.intersect(m_mesh->epsilon(), newBoundaryEdges, ignoreEdges);
+ invalid = m_boundaryGrid.intersect(m_mesh->epsilon(), m_newBoundaryEdges, m_ignoreBoundaryEdges);
XA_PROFILE_END(parameterizeChartsPiecewiseBoundaryIntersection)
}
if (invalid) {
@@ -6826,11 +6355,11 @@ struct PiecewiseParam
removeLinkedCandidates(bestCandidate);
// Reset the grid with all edges on the patch boundary.
XA_PROFILE_START(parameterizeChartsPiecewiseBoundaryIntersection)
- m_boundaryGrid.reset(m_texcoords.data(), m_mesh->indices());
+ m_boundaryGrid.reset(m_texcoords, m_mesh->indices());
for (uint32_t i = 0; i < m_patch.size(); i++) {
for (Mesh::FaceEdgeIterator it(m_mesh, m_patch[i]); !it.isDone(); it.advance()) {
const uint32_t oface = it.oppositeFace();
- if (oface == UINT32_MAX || oface >= m_faceCount || !m_faceInPatch.get(oface))
+ if (oface == UINT32_MAX || !m_faceInPatch.get(oface))
m_boundaryGrid.append(it.edge());
}
}
@@ -6841,8 +6370,7 @@ struct PiecewiseParam
}
private:
- struct Candidate
- {
+ struct Candidate {
uint32_t face, vertex;
Candidate *prev, *next; // The previous/next candidate with the same vertex.
Vector2 position;
@@ -6852,10 +6380,14 @@ private:
float patchVertexOrient;
};
- struct CandidateIterator
- {
- CandidateIterator(Candidate *head) : m_current(head) { XA_DEBUG_ASSERT(!head->prev); }
- void advance() { if (m_current != nullptr) { m_current = m_current->next; } }
+ struct CandidateIterator {
+ CandidateIterator(Candidate *head) :
+ m_current(head) { XA_DEBUG_ASSERT(!head->prev); }
+ void advance() {
+ if (m_current != nullptr) {
+ m_current = m_current->next;
+ }
+ }
bool isDone() const { return !m_current; }
Candidate *current() { return m_current; }
@@ -6864,7 +6396,6 @@ private:
};
const Mesh *m_mesh;
- uint32_t m_faceCount;
Array<Vector2> m_texcoords;
BitArray m_faceInAnyPatch; // Face is in a previous chart patch or the current patch.
Array<Candidate *> m_candidates; // Incident faces to the patch.
@@ -6873,9 +6404,9 @@ private:
BitArray m_faceInPatch, m_vertexInPatch; // Face/vertex is in the current patch.
BitArray m_faceInvalid; // Face cannot be added to the patch - flipped, cost too high or causes boundary intersection.
UniformGrid2 m_boundaryGrid;
+ Array<uint32_t> m_newBoundaryEdges, m_ignoreBoundaryEdges; // Temp arrays used when testing for boundary intersection.
- void addFaceToPatch(uint32_t face)
- {
+ void addFaceToPatch(uint32_t face) {
XA_DEBUG_ASSERT(!m_faceInPatch.get(face));
XA_DEBUG_ASSERT(!m_faceInAnyPatch.get(face));
m_patch.push_back(face);
@@ -6884,7 +6415,7 @@ private:
// Find new candidate faces on the patch incident to the newly added face.
for (Mesh::FaceEdgeIterator it(m_mesh, face); !it.isDone(); it.advance()) {
const uint32_t oface = it.oppositeFace();
- if (oface == UINT32_MAX || oface >= m_faceCount || m_faceInAnyPatch.get(oface) || m_faceToCandidate[oface])
+ if (oface == UINT32_MAX || m_faceInAnyPatch.get(oface) || m_faceToCandidate[oface])
continue;
// Found an active edge on the patch front.
// Find the free vertex (the vertex that isn't on the active edge).
@@ -6900,12 +6431,14 @@ private:
}
}
XA_DEBUG_ASSERT(freeVertex != UINT32_MAX);
- // If the free vertex is already in the patch, the face is enclosed by the patch. Add the face to the patch - don't need to assign texcoords.
- /*if (m_vertexInPatch.get(freeVertex)) {
+ if (m_vertexInPatch.get(freeVertex)) {
+#if 0
+ // If the free vertex is already in the patch, the face is enclosed by the patch. Add the face to the patch - don't need to assign texcoords.
freeVertex = UINT32_MAX;
- addFaceToPatch(oface, false);
+ addFaceToPatch(oface);
+#endif
continue;
- }*/
+ }
// Check this here rather than above so faces enclosed by the patch are always added.
if (m_faceInvalid.get(oface))
continue;
@@ -6913,8 +6446,7 @@ private:
}
}
- void addCandidateFace(uint32_t patchEdge, float patchVertexOrient, uint32_t face, uint32_t edge, uint32_t freeVertex)
- {
+ void addCandidateFace(uint32_t patchEdge, float patchVertexOrient, uint32_t face, uint32_t edge, uint32_t freeVertex) {
XA_DEBUG_ASSERT(!m_faceToCandidate[face]);
Vector2 texcoords[3];
orthoProjectFace(face, texcoords);
@@ -6960,8 +6492,10 @@ private:
uv.x = x + texcoords[localVertex0].x;
uv.y = y + texcoords[localVertex0].y;
}
- if (isNan(texcoords[localFreeVertex].x) || isNan(texcoords[localFreeVertex].y))
+ if (isNan(texcoords[localFreeVertex].x) || isNan(texcoords[localFreeVertex].y)) {
+ m_faceInvalid.set(face);
return;
+ }
// Check for local overlap (flipped triangle).
// The patch face vertex that isn't on the active edge and the free vertex should be oriented on opposite sides to the active edge.
const float freeVertexOrient = orientToEdge(m_texcoords[vertex0], m_texcoords[vertex1], texcoords[localFreeVertex]);
@@ -6975,12 +6509,10 @@ private:
return;
}
const float cost = fabsf(stretch - 1.0f);
-#if 0
- if (cost > 0.25f) {
+ if (cost > 0.5f) {
m_faceInvalid.set(face);
return;
}
-#endif
// Add the candidate.
Candidate *candidate = XA_ALLOC(MemTag::Default, Candidate);
candidate->face = face;
@@ -7017,8 +6549,7 @@ private:
it.current()->maxCost = maxCost;
}
- Candidate *linkedCandidateHead(Candidate *candidate)
- {
+ Candidate *linkedCandidateHead(Candidate *candidate) {
Candidate *current = candidate;
for (;;) {
if (!current->prev)
@@ -7028,8 +6559,7 @@ private:
return current;
}
- void removeLinkedCandidates(Candidate *head)
- {
+ void removeLinkedCandidates(Candidate *head) {
XA_DEBUG_ASSERT(!head->prev);
Candidate *current = head;
while (current) {
@@ -7046,10 +6576,9 @@ private:
}
}
- void orthoProjectFace(uint32_t face, Vector2 *texcoords) const
- {
- const Vector3 normal = m_mesh->computeFaceNormal(face);
- const Vector3 tangent = normalize(m_mesh->position(m_mesh->vertexAt(face * 3 + 1)) - m_mesh->position(m_mesh->vertexAt(face * 3 + 0)), kEpsilon);
+ void orthoProjectFace(uint32_t face, Vector2 *texcoords) const {
+ const Vector3 normal = -m_mesh->computeFaceNormal(face);
+ const Vector3 tangent = normalize(m_mesh->position(m_mesh->vertexAt(face * 3 + 1)) - m_mesh->position(m_mesh->vertexAt(face * 3 + 0)));
const Vector3 bitangent = cross(normal, tangent);
for (uint32_t i = 0; i < 3; i++) {
const Vector3 &pos = m_mesh->position(m_mesh->vertexAt(face * 3 + i));
@@ -7057,16 +6586,14 @@ private:
}
}
- float parametricArea(const Vector2 *texcoords) const
- {
+ float parametricArea(const Vector2 *texcoords) const {
const Vector2 &v1 = texcoords[0];
const Vector2 &v2 = texcoords[1];
const Vector2 &v3 = texcoords[2];
return ((v2.x - v1.x) * (v3.y - v1.y) - (v3.x - v1.x) * (v2.y - v1.y)) * 0.5f;
}
- float computeStretch(Vector3 p1, Vector3 p2, Vector3 p3, Vector2 t1, Vector2 t2, Vector2 t3) const
- {
+ float computeStretch(Vector3 p1, Vector3 p2, Vector3 p3, Vector2 t1, Vector2 t2, Vector2 t3) const {
float parametricArea = ((t2.y - t1.y) * (t3.x - t1.x) - (t3.y - t1.y) * (t2.x - t1.x)) * 0.5f;
if (isZero(parametricArea, kAreaEpsilon))
return FLT_MAX;
@@ -7080,16 +6607,13 @@ private:
}
// Return value is positive if the point is one side of the edge, negative if on the other side.
- float orientToEdge(Vector2 edgeVertex0, Vector2 edgeVertex1, Vector2 point) const
- {
+ float orientToEdge(Vector2 edgeVertex0, Vector2 edgeVertex1, Vector2 point) const {
return (edgeVertex0.x - point.x) * (edgeVertex1.y - point.y) - (edgeVertex0.y - point.y) * (edgeVertex1.x - point.x);
}
};
-#endif
// Estimate quality of existing parameterization.
-struct Quality
-{
+struct Quality {
// computeBoundaryIntersection
bool boundaryIntersection = false;
@@ -7106,8 +6630,7 @@ struct Quality
float conformalMetric = 0.0f;
float authalicMetric = 0.0f;
- void computeBoundaryIntersection(const Mesh *mesh, UniformGrid2 &boundaryGrid)
- {
+ void computeBoundaryIntersection(const Mesh *mesh, UniformGrid2 &boundaryGrid) {
const Array<uint32_t> &boundaryEdges = mesh->boundaryEdges();
const uint32_t boundaryEdgeCount = boundaryEdges.size();
boundaryGrid.reset(mesh->texcoords(), mesh->indices(), boundaryEdgeCount);
@@ -7123,11 +6646,11 @@ struct Quality
#endif
}
- void computeFlippedFaces(const Mesh *mesh, uint32_t faceCount, Array<uint32_t> *flippedFaces)
- {
+ void computeFlippedFaces(const Mesh *mesh, Array<uint32_t> *flippedFaces) {
totalTriangleCount = flippedTriangleCount = zeroAreaTriangleCount = 0;
if (flippedFaces)
flippedFaces->clear();
+ const uint32_t faceCount = mesh->faceCount();
for (uint32_t f = 0; f < faceCount; f++) {
Vector2 texcoord[3];
for (int i = 0; i < 3; i++) {
@@ -7159,8 +6682,7 @@ struct Quality
flippedFaces->clear();
flippedTriangleCount = 0;
}
- if (flippedTriangleCount > totalTriangleCount / 2)
- {
+ if (flippedTriangleCount > totalTriangleCount / 2) {
// If more than half the triangles are flipped, reverse the flipped / not flipped classification.
flippedTriangleCount = totalTriangleCount - flippedTriangleCount;
if (flippedFaces) {
@@ -7182,10 +6704,10 @@ struct Quality
}
}
- void computeMetrics(const Mesh *mesh, uint32_t faceCount)
- {
+ void computeMetrics(const Mesh *mesh) {
totalGeometricArea = totalParametricArea = 0.0f;
stretchMetric = maxStretchMetric = conformalMetric = authalicMetric = 0.0f;
+ const uint32_t faceCount = mesh->faceCount();
for (uint32_t f = 0; f < faceCount; f++) {
Vector3 pos[3];
Vector2 texcoord[3];
@@ -7214,7 +6736,7 @@ struct Quality
const float a = dot(Ss, Ss); // E
const float b = dot(Ss, St); // F
const float c = dot(St, St); // G
- // Compute eigen-values of the first fundamental form:
+ // Compute eigen-values of the first fundamental form:
const float sigma1 = sqrtf(0.5f * max(0.0f, a + c - sqrtf(square(a - c) + 4 * square(b)))); // gamma uppercase, min eigenvalue.
const float sigma2 = sqrtf(0.5f * max(0.0f, a + c + sqrtf(square(a - c) + 4 * square(b)))); // gamma lowercase, max eigenvalue.
XA_ASSERT(sigma2 > sigma1 || equal(sigma1, sigma2, kEpsilon));
@@ -7245,347 +6767,261 @@ struct Quality
if (totalGeometricArea > 0.0f) {
const float normFactor = sqrtf(totalParametricArea / totalGeometricArea);
stretchMetric = sqrtf(stretchMetric / totalGeometricArea) * normFactor;
- maxStretchMetric *= normFactor;
+ maxStretchMetric *= normFactor;
conformalMetric = sqrtf(conformalMetric / totalGeometricArea);
authalicMetric = sqrtf(authalicMetric / totalGeometricArea);
}
}
};
-struct ChartWarningFlags
-{
- enum Enum
- {
- CloseHolesFailed = 1<<1,
- FixTJunctionsDuplicatedEdge = 1<<2,
- FixTJunctionsFailed = 1<<3,
- TriangulateDuplicatedEdge = 1<<4,
- };
-};
-
-struct ChartCtorBuffers
-{
+struct ChartCtorBuffers {
Array<uint32_t> chartMeshIndices;
Array<uint32_t> unifiedMeshIndices;
- Array<uint32_t> boundaryLoops;
};
-class Chart
-{
+class Chart {
public:
- Chart(ChartCtorBuffers &buffers, const ParameterizeOptions &options, const Basis &basis, ConstArrayView<uint32_t> faces, const Mesh *sourceMesh, uint32_t chartGroupId, uint32_t chartId) : m_basis(basis), m_mesh(nullptr), m_unifiedMesh(nullptr), m_unmodifiedUnifiedMesh(nullptr), m_type(ChartType::LSCM), m_warningFlags(0), m_closedHolesCount(0), m_fixedTJunctionsCount(0), m_isInvalid(false)
- {
+ Chart(const Basis &basis, segment::ChartGeneratorType::Enum generatorType, ConstArrayView<uint32_t> faces, const Mesh *sourceMesh, uint32_t chartGroupId, uint32_t chartId) :
+ m_basis(basis), m_unifiedMesh(nullptr), m_type(ChartType::LSCM), m_generatorType(generatorType), m_tjunctionCount(0), m_originalVertexCount(0), m_isInvalid(false) {
XA_UNUSED(chartGroupId);
XA_UNUSED(chartId);
m_faceToSourceFaceMap.copyFrom(faces.data, faces.length);
const uint32_t approxVertexCount = min(faces.length * 3, sourceMesh->vertexCount());
- m_mesh = XA_NEW_ARGS(MemTag::Mesh, Mesh, sourceMesh->epsilon(), approxVertexCount, faces.length);
m_unifiedMesh = XA_NEW_ARGS(MemTag::Mesh, Mesh, sourceMesh->epsilon(), approxVertexCount, faces.length);
HashMap<uint32_t, PassthroughHash<uint32_t>> sourceVertexToUnifiedVertexMap(MemTag::Mesh, approxVertexCount), sourceVertexToChartVertexMap(MemTag::Mesh, approxVertexCount);
- // Add vertices.
- const uint32_t faceCount = m_initialFaceCount = faces.length;
+ m_originalIndices.resize(faces.length * 3);
+ // Add geometry.
+ const uint32_t faceCount = faces.length;
for (uint32_t f = 0; f < faceCount; f++) {
+ uint32_t unifiedIndices[3];
for (uint32_t i = 0; i < 3; i++) {
const uint32_t sourceVertex = sourceMesh->vertexAt(m_faceToSourceFaceMap[f] * 3 + i);
- const uint32_t sourceUnifiedVertex = sourceMesh->firstColocal(sourceVertex);
+ uint32_t sourceUnifiedVertex = sourceMesh->firstColocalVertex(sourceVertex);
+ if (m_generatorType == segment::ChartGeneratorType::OriginalUv && sourceVertex != sourceUnifiedVertex) {
+ // Original UVs: don't unify vertices with different UVs; we want to preserve UVs.
+ if (!equal(sourceMesh->texcoord(sourceVertex), sourceMesh->texcoord(sourceUnifiedVertex), sourceMesh->epsilon()))
+ sourceUnifiedVertex = sourceVertex;
+ }
uint32_t unifiedVertex = sourceVertexToUnifiedVertexMap.get(sourceUnifiedVertex);
if (unifiedVertex == UINT32_MAX) {
unifiedVertex = sourceVertexToUnifiedVertexMap.add(sourceUnifiedVertex);
- m_unifiedMesh->addVertex(sourceMesh->position(sourceVertex));
+ m_unifiedMesh->addVertex(sourceMesh->position(sourceVertex), Vector3(0.0f), sourceMesh->texcoord(sourceVertex));
}
if (sourceVertexToChartVertexMap.get(sourceVertex) == UINT32_MAX) {
sourceVertexToChartVertexMap.add(sourceVertex);
m_vertexToSourceVertexMap.push_back(sourceVertex);
m_chartVertexToUnifiedVertexMap.push_back(unifiedVertex);
- m_mesh->addVertex(sourceMesh->position(sourceVertex), Vector3(0.0f), sourceMesh->texcoord(sourceVertex));
+ m_originalVertexCount++;
}
- }
- }
- // Add faces.
- for (uint32_t f = 0; f < faceCount; f++) {
- uint32_t indices[3], unifiedIndices[3];
- for (uint32_t i = 0; i < 3; i++) {
- const uint32_t sourceVertex = sourceMesh->vertexAt(m_faceToSourceFaceMap[f] * 3 + i);
- const uint32_t sourceUnifiedVertex = sourceMesh->firstColocal(sourceVertex);
- indices[i] = sourceVertexToChartVertexMap.get(sourceVertex);
- XA_DEBUG_ASSERT(indices[i] != UINT32_MAX);
+ m_originalIndices[f * 3 + i] = sourceVertexToChartVertexMap.get(sourceVertex);
+ ;
+ XA_DEBUG_ASSERT(m_originalIndices[f * 3 + i] != UINT32_MAX);
unifiedIndices[i] = sourceVertexToUnifiedVertexMap.get(sourceUnifiedVertex);
XA_DEBUG_ASSERT(unifiedIndices[i] != UINT32_MAX);
}
- Mesh::AddFaceResult::Enum result = m_mesh->addFace(indices);
- XA_UNUSED(result);
- XA_DEBUG_ASSERT(result == Mesh::AddFaceResult::OK);
-#if XA_DEBUG
- // Unifying colocals may create degenerate edges. e.g. if two triangle vertices are colocal.
- for (int i = 0; i < 3; i++) {
- const uint32_t index1 = unifiedIndices[i];
- const uint32_t index2 = unifiedIndices[(i + 1) % 3];
- XA_DEBUG_ASSERT(index1 != index2);
- }
-#endif
- result = m_unifiedMesh->addFace(unifiedIndices);
- XA_UNUSED(result);
- XA_DEBUG_ASSERT(result == Mesh::AddFaceResult::OK);
+ m_unifiedMesh->addFace(unifiedIndices);
}
- m_mesh->createBoundaries(); // For AtlasPacker::computeBoundingBox
- m_mesh->destroyEdgeMap(); // Only needed it for createBoundaries.
m_unifiedMesh->createBoundaries();
- if (meshIsPlanar(*m_unifiedMesh)) {
+ if (m_generatorType == segment::ChartGeneratorType::Planar) {
m_type = ChartType::Planar;
return;
}
- m_unifiedMesh->linkBoundaries();
-#if XA_DEBUG_EXPORT_OBJ_BEFORE_FIX_TJUNCTION
- m_unifiedMesh->writeObjFile("debug_before_fix_tjunction.obj");
-#endif
- bool duplicatedEdge = false, failed = false;
- if (options.fixTJunctions) {
- XA_PROFILE_START(fixChartMeshTJunctions)
- Mesh *fixedUnifiedMesh = meshFixTJunctions(*m_unifiedMesh, &duplicatedEdge, &failed, &m_fixedTJunctionsCount);
- XA_PROFILE_END(fixChartMeshTJunctions)
- if (fixedUnifiedMesh) {
- if (duplicatedEdge)
- m_warningFlags |= ChartWarningFlags::FixTJunctionsDuplicatedEdge;
- if (failed)
- m_warningFlags |= ChartWarningFlags::FixTJunctionsFailed;
- m_unmodifiedUnifiedMesh = m_unifiedMesh;
- m_unifiedMesh = fixedUnifiedMesh;
- m_unifiedMesh->createBoundaries();
- m_unifiedMesh->linkBoundaries();
- m_initialFaceCount = m_unifiedMesh->faceCount(); // Fixing t-junctions rewrites faces.
- }
- }
- if (options.closeHoles) {
- // See if there are any holes that need closing.
- Array<uint32_t> &boundaryLoops = buffers.boundaryLoops;
- meshGetBoundaryLoops(*m_unifiedMesh, boundaryLoops);
- if (boundaryLoops.size() > 1) {
-#if XA_DEBUG_EXPORT_OBJ_CLOSE_HOLES_ERROR
- const uint32_t faceCountBeforeHolesClosed = m_unifiedMesh->faceCount();
+#if XA_CHECK_T_JUNCTIONS
+ m_tjunctionCount = meshCheckTJunctions(*m_unifiedMesh);
+#if XA_DEBUG_EXPORT_OBJ_TJUNCTION
+ if (m_tjunctionCount > 0) {
+ char filename[256];
+ XA_SPRINTF(filename, sizeof(filename), "debug_mesh_%03u_chartgroup_%03u_chart_%03u_tjunction.obj", sourceMesh->id(), chartGroupId, chartId);
+ m_unifiedMesh->writeObjFile(filename);
+ }
#endif
- // Closing the holes is not always the best solution and does not fix all the problems.
- // We need to do some analysis of the holes and the genus to:
- // - Find cuts that reduce genus.
- // - Find cuts to connect holes.
- // - Use minimal spanning trees or seamster.
- XA_PROFILE_START(closeChartMeshHoles)
- uint32_t holeCount = 0;
-#if XA_DEBUG_EXPORT_OBJ_CLOSE_HOLES_ERROR
- Array<uint32_t> holeFaceCounts;
- failed = !meshCloseHoles(m_unifiedMesh, boundaryLoops, m_basis.normal, &holeFaceCounts);
-#else
- failed = !meshCloseHoles(m_unifiedMesh, boundaryLoops, m_basis.normal, &holeCount, nullptr);
#endif
- XA_PROFILE_END(closeChartMeshHoles)
- m_unifiedMesh->createBoundaries();
- m_unifiedMesh->linkBoundaries();
- meshGetBoundaryLoops(*m_unifiedMesh, boundaryLoops);
- if (failed || boundaryLoops.size() > 1)
- m_warningFlags |= ChartWarningFlags::CloseHolesFailed;
- m_closedHolesCount = holeCount;
-#if XA_DEBUG_EXPORT_OBJ_CLOSE_HOLES_ERROR
- if (m_warningFlags & ChartWarningFlags::CloseHolesFailed) {
- char filename[256];
- XA_SPRINTF(filename, sizeof(filename), "debug_mesh_%03u_chartgroup_%03u_chart_%03u_close_holes_error.obj", sourceMesh->id(), chartGroupId, chartId);
- FILE *file;
- XA_FOPEN(file, filename, "w");
- if (file) {
- m_unifiedMesh->writeObjVertices(file);
- fprintf(file, "s off\n");
- fprintf(file, "o object\n");
- for (uint32_t i = 0; i < faceCountBeforeHolesClosed; i++)
- m_unifiedMesh->writeObjFace(file, i);
- uint32_t face = faceCountBeforeHolesClosed;
- for (uint32_t i = 0; i < holeFaceCounts.size(); i++) {
- fprintf(file, "s off\n");
- fprintf(file, "o hole%u\n", i);
- for (uint32_t j = 0; j < holeFaceCounts[i]; j++) {
- m_unifiedMesh->writeObjFace(file, face);
- face++;
- }
- }
- m_unifiedMesh->writeObjBoundaryEges(file);
- m_unifiedMesh->writeObjLinkedBoundaries(file);
- fclose(file);
- }
- }
-#endif
- }
- }
}
-#if XA_RECOMPUTE_CHARTS
- Chart(ChartCtorBuffers &buffers, const Chart *parent, const Mesh *parentMesh, ConstArrayView<uint32_t> faces, const Vector2 *texcoords, const Mesh *sourceMesh) : m_mesh(nullptr), m_unifiedMesh(nullptr), m_unmodifiedUnifiedMesh(nullptr), m_type(ChartType::Piecewise), m_warningFlags(0), m_closedHolesCount(0), m_fixedTJunctionsCount(0), m_isInvalid(false)
- {
- const uint32_t faceCount = m_initialFaceCount = faces.length;
+ Chart(ChartCtorBuffers &buffers, const Chart *parent, const Mesh *parentMesh, ConstArrayView<uint32_t> faces, ConstArrayView<Vector2> texcoords, const Mesh *sourceMesh) :
+ m_unifiedMesh(nullptr), m_type(ChartType::Piecewise), m_generatorType(segment::ChartGeneratorType::Piecewise), m_tjunctionCount(0), m_originalVertexCount(0), m_isInvalid(false) {
+ const uint32_t faceCount = faces.length;
m_faceToSourceFaceMap.resize(faceCount);
for (uint32_t i = 0; i < faceCount; i++)
m_faceToSourceFaceMap[i] = parent->m_faceToSourceFaceMap[faces[i]]; // Map faces to parent chart source mesh.
// Copy face indices.
- m_mesh = XA_NEW_ARGS(MemTag::Mesh, Mesh, sourceMesh->epsilon(), m_faceToSourceFaceMap.size() * 3, m_faceToSourceFaceMap.size());
Array<uint32_t> &chartMeshIndices = buffers.chartMeshIndices;
chartMeshIndices.resize(sourceMesh->vertexCount());
chartMeshIndices.fillBytes(0xff);
+ m_unifiedMesh = XA_NEW_ARGS(MemTag::Mesh, Mesh, sourceMesh->epsilon(), m_faceToSourceFaceMap.size() * 3, m_faceToSourceFaceMap.size());
+ HashMap<uint32_t, PassthroughHash<uint32_t>> sourceVertexToUnifiedVertexMap(MemTag::Mesh, m_faceToSourceFaceMap.size() * 3);
// Add vertices.
for (uint32_t f = 0; f < faceCount; f++) {
for (uint32_t i = 0; i < 3; i++) {
const uint32_t vertex = sourceMesh->vertexAt(m_faceToSourceFaceMap[f] * 3 + i);
+ const uint32_t sourceUnifiedVertex = sourceMesh->firstColocalVertex(vertex);
const uint32_t parentVertex = parentMesh->vertexAt(faces[f] * 3 + i);
- if (chartMeshIndices[vertex] == (uint32_t)~0) {
- chartMeshIndices[vertex] = m_mesh->vertexCount();
+ uint32_t unifiedVertex = sourceVertexToUnifiedVertexMap.get(sourceUnifiedVertex);
+ if (unifiedVertex == UINT32_MAX) {
+ unifiedVertex = sourceVertexToUnifiedVertexMap.add(sourceUnifiedVertex);
+ m_unifiedMesh->addVertex(sourceMesh->position(vertex), Vector3(0.0f), texcoords[parentVertex]);
+ }
+ if (chartMeshIndices[vertex] == UINT32_MAX) {
+ chartMeshIndices[vertex] = m_originalVertexCount;
+ m_originalVertexCount++;
m_vertexToSourceVertexMap.push_back(vertex);
- m_mesh->addVertex(sourceMesh->position(vertex), Vector3(0.0f), texcoords[parentVertex]);
+ m_chartVertexToUnifiedVertexMap.push_back(unifiedVertex);
}
}
}
// Add faces.
+ m_originalIndices.resize(faceCount * 3);
for (uint32_t f = 0; f < faceCount; f++) {
- uint32_t indices[3];
+ uint32_t unifiedIndices[3];
for (uint32_t i = 0; i < 3; i++) {
const uint32_t vertex = sourceMesh->vertexAt(m_faceToSourceFaceMap[f] * 3 + i);
- indices[i] = chartMeshIndices[vertex];
+ m_originalIndices[f * 3 + i] = chartMeshIndices[vertex];
+ const uint32_t unifiedVertex = sourceMesh->firstColocalVertex(vertex);
+ unifiedIndices[i] = sourceVertexToUnifiedVertexMap.get(unifiedVertex);
}
- Mesh::AddFaceResult::Enum result = m_mesh->addFace(indices);
- XA_UNUSED(result);
- XA_DEBUG_ASSERT(result == Mesh::AddFaceResult::OK);
+ m_unifiedMesh->addFace(unifiedIndices);
}
- m_mesh->createBoundaries(); // For AtlasPacker::computeBoundingBox
- m_mesh->destroyEdgeMap(); // Only needed it for createBoundaries.
+ m_unifiedMesh->createBoundaries();
// Need to store texcoords for backup/restore so packing can be run multiple times.
backupTexcoords();
}
-#endif
- ~Chart()
- {
- if (m_mesh) {
- m_mesh->~Mesh();
- XA_FREE(m_mesh);
+ ~Chart() {
+ if (m_unifiedMesh) {
+ m_unifiedMesh->~Mesh();
+ XA_FREE(m_unifiedMesh);
+ m_unifiedMesh = nullptr;
}
- destroyUnifiedMesh();
}
bool isInvalid() const { return m_isInvalid; }
- ChartType::Enum type() const { return m_type; }
- uint32_t warningFlags() const { return m_warningFlags; }
- uint32_t closedHolesCount() const { return m_closedHolesCount; }
- uint32_t fixedTJunctionsCount() const { return m_fixedTJunctionsCount; }
+ ChartType type() const { return m_type; }
+ segment::ChartGeneratorType::Enum generatorType() const { return m_generatorType; }
+ uint32_t tjunctionCount() const { return m_tjunctionCount; }
const Quality &quality() const { return m_quality; }
- uint32_t initialFaceCount() const { return m_initialFaceCount; }
#if XA_DEBUG_EXPORT_OBJ_INVALID_PARAMETERIZATION
const Array<uint32_t> &paramFlippedFaces() const { return m_paramFlippedFaces; }
#endif
uint32_t mapFaceToSourceFace(uint32_t i) const { return m_faceToSourceFaceMap[i]; }
uint32_t mapChartVertexToSourceVertex(uint32_t i) const { return m_vertexToSourceVertexMap[i]; }
- const Mesh *mesh() const { return m_mesh; }
- Mesh *mesh() { return m_mesh; }
const Mesh *unifiedMesh() const { return m_unifiedMesh; }
- const Mesh *unmodifiedUnifiedMesh() const { return m_unmodifiedUnifiedMesh; }
+ Mesh *unifiedMesh() { return m_unifiedMesh; }
- void parameterize(const ParameterizeOptions &options, UniformGrid2 &boundaryGrid)
- {
- XA_PROFILE_START(parameterizeChartsOrthogonal)
- {
+ // Vertex count of the chart mesh before unifying vertices.
+ uint32_t originalVertexCount() const { return m_originalVertexCount; }
+
+ uint32_t originalVertexToUnifiedVertex(uint32_t v) const { return m_chartVertexToUnifiedVertexMap[v]; }
+
+ ConstArrayView<uint32_t> originalVertices() const { return m_originalIndices; }
+
+ void parameterize(const ChartOptions &options, UniformGrid2 &boundaryGrid) {
+ const uint32_t unifiedVertexCount = m_unifiedMesh->vertexCount();
+ if (m_generatorType == segment::ChartGeneratorType::OriginalUv) {
+ } else {
// Project vertices to plane.
- const uint32_t vertexCount = m_unifiedMesh->vertexCount();
- for (uint32_t i = 0; i < vertexCount; i++)
+ XA_PROFILE_START(parameterizeChartsOrthogonal)
+ for (uint32_t i = 0; i < unifiedVertexCount; i++)
m_unifiedMesh->texcoord(i) = Vector2(dot(m_basis.tangent, m_unifiedMesh->position(i)), dot(m_basis.bitangent, m_unifiedMesh->position(i)));
- }
- XA_PROFILE_END(parameterizeChartsOrthogonal)
- // Computing charts checks for flipped triangles and boundary intersection. Don't need to do that again here if chart is planar.
- if (m_type != ChartType::Planar) {
- XA_PROFILE_START(parameterizeChartsEvaluateQuality)
- m_quality.computeBoundaryIntersection(m_unifiedMesh, boundaryGrid);
- m_quality.computeFlippedFaces(m_unifiedMesh, m_initialFaceCount, nullptr);
- m_quality.computeMetrics(m_unifiedMesh, m_initialFaceCount);
- XA_PROFILE_END(parameterizeChartsEvaluateQuality)
- // Use orthogonal parameterization if quality is acceptable.
- if (!m_quality.boundaryIntersection && m_quality.flippedTriangleCount == 0 && m_quality.totalGeometricArea > 0.0f && m_quality.stretchMetric <= 1.1f && m_quality.maxStretchMetric <= 1.25f)
- m_type = ChartType::Ortho;
- }
- if (m_type == ChartType::LSCM) {
- XA_PROFILE_START(parameterizeChartsLSCM)
- if (options.func) {
- options.func(&m_unifiedMesh->position(0).x, &m_unifiedMesh->texcoord(0).x, m_unifiedMesh->vertexCount(), m_unifiedMesh->indices(), m_unifiedMesh->indexCount());
- }
- else
- computeLeastSquaresConformalMap(m_unifiedMesh);
- XA_PROFILE_END(parameterizeChartsLSCM)
- XA_PROFILE_START(parameterizeChartsEvaluateQuality)
- m_quality.computeBoundaryIntersection(m_unifiedMesh, boundaryGrid);
+ XA_PROFILE_END(parameterizeChartsOrthogonal)
+ // Computing charts checks for flipped triangles and boundary intersection. Don't need to do that again here if chart is planar.
+ if (m_type != ChartType::Planar && m_generatorType != segment::ChartGeneratorType::OriginalUv) {
+ XA_PROFILE_START(parameterizeChartsEvaluateQuality)
+ m_quality.computeBoundaryIntersection(m_unifiedMesh, boundaryGrid);
+ m_quality.computeFlippedFaces(m_unifiedMesh, nullptr);
+ m_quality.computeMetrics(m_unifiedMesh);
+ XA_PROFILE_END(parameterizeChartsEvaluateQuality)
+ // Use orthogonal parameterization if quality is acceptable.
+ if (!m_quality.boundaryIntersection && m_quality.flippedTriangleCount == 0 && m_quality.zeroAreaTriangleCount == 0 && m_quality.totalGeometricArea > 0.0f && m_quality.stretchMetric <= 1.1f && m_quality.maxStretchMetric <= 1.25f)
+ m_type = ChartType::Ortho;
+ }
+ if (m_type == ChartType::LSCM) {
+ XA_PROFILE_START(parameterizeChartsLSCM)
+ if (options.paramFunc) {
+ options.paramFunc(&m_unifiedMesh->position(0).x, &m_unifiedMesh->texcoord(0).x, m_unifiedMesh->vertexCount(), m_unifiedMesh->indices().data, m_unifiedMesh->indexCount());
+ } else
+ computeLeastSquaresConformalMap(m_unifiedMesh);
+ XA_PROFILE_END(parameterizeChartsLSCM)
+ XA_PROFILE_START(parameterizeChartsEvaluateQuality)
+ m_quality.computeBoundaryIntersection(m_unifiedMesh, boundaryGrid);
#if XA_DEBUG_EXPORT_OBJ_INVALID_PARAMETERIZATION
- m_quality.computeFlippedFaces(m_unifiedMesh, m_initialFaceCount, &m_paramFlippedFaces);
+ m_quality.computeFlippedFaces(m_unifiedMesh, &m_paramFlippedFaces);
#else
- m_quality.computeFlippedFaces(m_unifiedMesh, m_initialFaceCount, nullptr);
+ m_quality.computeFlippedFaces(m_unifiedMesh, nullptr);
#endif
- // Don't need to call computeMetrics here, that's only used in evaluateOrthoQuality to determine if quality is acceptable enough to use ortho projection.
- if (m_quality.boundaryIntersection || m_quality.flippedTriangleCount > 0)
- m_isInvalid = true;
- XA_PROFILE_END(parameterizeChartsEvaluateQuality)
+ // Don't need to call computeMetrics here, that's only used in evaluateOrthoQuality to determine if quality is acceptable enough to use ortho projection.
+ if (m_quality.boundaryIntersection || m_quality.flippedTriangleCount > 0 || m_quality.zeroAreaTriangleCount > 0)
+ m_isInvalid = true;
+ XA_PROFILE_END(parameterizeChartsEvaluateQuality)
+ }
}
+ if (options.fixWinding && m_unifiedMesh->computeFaceParametricArea(0) < 0.0f) {
+ for (uint32_t i = 0; i < unifiedVertexCount; i++)
+ m_unifiedMesh->texcoord(i).x *= -1.0f;
+ }
+#if XA_CHECK_PARAM_WINDING
+ const uint32_t faceCount = m_unifiedMesh->faceCount();
+ uint32_t flippedCount = 0;
+ for (uint32_t i = 0; i < faceCount; i++) {
+ const float area = m_unifiedMesh->computeFaceParametricArea(i);
+ if (area < 0.0f)
+ flippedCount++;
+ }
+ if (flippedCount == faceCount) {
+ XA_PRINT_WARNING("param: all faces flipped\n");
+ } else if (flippedCount > 0) {
+ XA_PRINT_WARNING("param: %u / %u faces flipped\n", flippedCount, faceCount);
+ }
+#endif
+
#if XA_DEBUG_ALL_CHARTS_INVALID
m_isInvalid = true;
#endif
- // Transfer parameterization from unified mesh to chart mesh.
- const uint32_t vertexCount = m_mesh->vertexCount();
- for (uint32_t v = 0; v < vertexCount; v++)
- m_mesh->texcoord(v) = m_unifiedMesh->texcoord(m_chartVertexToUnifiedVertexMap[v]);
- // Can destroy unified mesh now.
- // But not if the parameterization is invalid, the unified mesh will be needed for PiecewiseParameterization.
- if (!m_isInvalid)
- destroyUnifiedMesh();
// Need to store texcoords for backup/restore so packing can be run multiple times.
backupTexcoords();
}
- Vector2 computeParametricBounds() const
- {
+ Vector2 computeParametricBounds() const {
Vector2 minCorner(FLT_MAX, FLT_MAX);
Vector2 maxCorner(-FLT_MAX, -FLT_MAX);
- const uint32_t vertexCount = m_mesh->vertexCount();
+ const uint32_t vertexCount = m_unifiedMesh->vertexCount();
for (uint32_t v = 0; v < vertexCount; v++) {
- minCorner = min(minCorner, m_mesh->texcoord(v));
- maxCorner = max(maxCorner, m_mesh->texcoord(v));
+ minCorner = min(minCorner, m_unifiedMesh->texcoord(v));
+ maxCorner = max(maxCorner, m_unifiedMesh->texcoord(v));
}
return (maxCorner - minCorner) * 0.5f;
}
- void restoreTexcoords()
- {
- memcpy(m_mesh->texcoords(), m_backupTexcoords.data(), m_mesh->vertexCount() * sizeof(Vector2));
+#if XA_CHECK_PIECEWISE_CHART_QUALITY
+ void evaluateQuality(UniformGrid2 &boundaryGrid) {
+ m_quality.computeBoundaryIntersection(m_unifiedMesh, boundaryGrid);
+#if XA_DEBUG_EXPORT_OBJ_INVALID_PARAMETERIZATION
+ m_quality.computeFlippedFaces(m_unifiedMesh, &m_paramFlippedFaces);
+#else
+ m_quality.computeFlippedFaces(m_unifiedMesh, nullptr);
+#endif
+ if (m_quality.boundaryIntersection || m_quality.flippedTriangleCount > 0 || m_quality.zeroAreaTriangleCount > 0)
+ m_isInvalid = true;
}
+#endif
-private:
- void backupTexcoords()
- {
- m_backupTexcoords.resize(m_mesh->vertexCount());
- memcpy(m_backupTexcoords.data(), m_mesh->texcoords(), m_mesh->vertexCount() * sizeof(Vector2));
+ void restoreTexcoords() {
+ memcpy(m_unifiedMesh->texcoords().data, m_backupTexcoords.data(), m_unifiedMesh->vertexCount() * sizeof(Vector2));
}
- void destroyUnifiedMesh()
- {
- if (m_unifiedMesh) {
- m_unifiedMesh->~Mesh();
- XA_FREE(m_unifiedMesh);
- m_unifiedMesh = nullptr;
- }
- if (m_unmodifiedUnifiedMesh) {
- m_unmodifiedUnifiedMesh->~Mesh();
- XA_FREE(m_unmodifiedUnifiedMesh);
- m_unmodifiedUnifiedMesh = nullptr;
- }
- // Don't need this when unified meshes are destroyed.
- m_chartVertexToUnifiedVertexMap.destroy();
+private:
+ void backupTexcoords() {
+ m_backupTexcoords.resize(m_unifiedMesh->vertexCount());
+ memcpy(m_backupTexcoords.data(), m_unifiedMesh->texcoords().data, m_unifiedMesh->vertexCount() * sizeof(Vector2));
}
Basis m_basis;
- Mesh *m_mesh;
Mesh *m_unifiedMesh;
- Mesh *m_unmodifiedUnifiedMesh; // Unified mesh before fixing t-junctions. Null if no t-junctions were fixed
- ChartType::Enum m_type;
- uint32_t m_warningFlags;
- uint32_t m_initialFaceCount; // Before fixing T-junctions and/or closing holes.
- uint32_t m_closedHolesCount, m_fixedTJunctionsCount;
+ ChartType m_type;
+ segment::ChartGeneratorType::Enum m_generatorType;
+ uint32_t m_tjunctionCount;
+
+ uint32_t m_originalVertexCount;
+ Array<uint32_t> m_originalIndices;
// List of faces of the source mesh that belong to this chart.
Array<uint32_t> m_faceToSourceFaceMap;
@@ -7604,47 +7040,46 @@ private:
bool m_isInvalid;
};
-struct CreateAndParameterizeChartTaskArgs
-{
- const Basis *basis;
+struct CreateAndParameterizeChartTaskGroupArgs {
+ Progress *progress;
ThreadLocal<UniformGrid2> *boundaryGrid;
+ ThreadLocal<ChartCtorBuffers> *chartBuffers;
+ const ChartOptions *options;
+ ThreadLocal<PiecewiseParam> *pp;
+};
+
+struct CreateAndParameterizeChartTaskArgs {
+ const Basis *basis;
Chart *chart; // output
Array<Chart *> charts; // output (if more than one chart)
- ThreadLocal<ChartCtorBuffers> *chartBuffers;
+ segment::ChartGeneratorType::Enum chartGeneratorType;
const Mesh *mesh;
- const ParameterizeOptions *options;
-#if XA_RECOMPUTE_CHARTS
- ThreadLocal<PiecewiseParam> *pp;
-#endif
ConstArrayView<uint32_t> faces;
uint32_t chartGroupId;
uint32_t chartId;
};
-static void runCreateAndParameterizeChartTask(void *userData)
-{
- auto args = (CreateAndParameterizeChartTaskArgs *)userData;
+static void runCreateAndParameterizeChartTask(void *groupUserData, void *taskUserData) {
+ XA_PROFILE_START(createChartMeshAndParameterizeThread)
+ auto groupArgs = (CreateAndParameterizeChartTaskGroupArgs *)groupUserData;
+ auto args = (CreateAndParameterizeChartTaskArgs *)taskUserData;
XA_PROFILE_START(createChartMesh)
- args->chart = XA_NEW_ARGS(MemTag::Default, Chart, args->chartBuffers->get(), *args->options, *args->basis, args->faces, args->mesh, args->chartGroupId, args->chartId);
+ args->chart = XA_NEW_ARGS(MemTag::Default, Chart, *args->basis, args->chartGeneratorType, args->faces, args->mesh, args->chartGroupId, args->chartId);
XA_PROFILE_END(createChartMesh)
- args->chart->parameterize(*args->options, args->boundaryGrid->get());
+ XA_PROFILE_START(parameterizeCharts)
+ args->chart->parameterize(*groupArgs->options, groupArgs->boundaryGrid->get());
+ XA_PROFILE_END(parameterizeCharts)
#if XA_RECOMPUTE_CHARTS
- if (!args->chart->isInvalid())
+ if (!args->chart->isInvalid()) {
+ XA_PROFILE_END(createChartMeshAndParameterizeThread)
return;
+ }
// Recompute charts with invalid parameterizations.
XA_PROFILE_START(parameterizeChartsRecompute)
Chart *invalidChart = args->chart;
- // Fixing t-junctions rewrites unified mesh faces, and we need to map faces back to input mesh. So use the unmodified unified mesh.
- const Mesh *invalidMesh = invalidChart->unmodifiedUnifiedMesh();
- uint32_t faceCount = 0;
- if (invalidMesh) {
- faceCount = invalidMesh->faceCount();
- } else {
- invalidMesh = invalidChart->unifiedMesh();
- faceCount = invalidChart->initialFaceCount(); // Not invalidMesh->faceCount(). Don't want faces added by hole closing.
- }
- PiecewiseParam &pp = args->pp->get();
- pp.reset(invalidMesh, faceCount);
+ const Mesh *invalidMesh = invalidChart->unifiedMesh();
+ PiecewiseParam &pp = groupArgs->pp->get();
+ pp.reset(invalidMesh);
#if XA_DEBUG_EXPORT_OBJ_RECOMPUTED_CHARTS
char filename[256];
XA_SPRINTF(filename, sizeof(filename), "debug_mesh_%03u_chartgroup_%03u_chart_%03u_recomputed.obj", args->mesh->id(), args->chartGroupId, args->chartId);
@@ -7658,7 +7093,10 @@ static void runCreateAndParameterizeChartTask(void *userData)
XA_PROFILE_END(parameterizeChartsPiecewise)
if (!facesRemaining)
break;
- Chart *chart = XA_NEW_ARGS(MemTag::Default, Chart, args->chartBuffers->get(), invalidChart, invalidMesh, pp.chartFaces(), pp.texcoords(), args->mesh);
+ Chart *chart = XA_NEW_ARGS(MemTag::Default, Chart, groupArgs->chartBuffers->get(), invalidChart, invalidMesh, pp.chartFaces(), pp.texcoords(), args->mesh);
+#if XA_CHECK_PIECEWISE_CHART_QUALITY
+ chart->evaluateQuality(args->boundaryGrid->get());
+#endif
args->charts.push_back(chart);
#if XA_DEBUG_EXPORT_OBJ_RECOMPUTED_CHARTS
if (file) {
@@ -7686,50 +7124,63 @@ static void runCreateAndParameterizeChartTask(void *userData)
#endif
XA_PROFILE_END(parameterizeChartsRecompute)
#endif // XA_RECOMPUTE_CHARTS
+ XA_PROFILE_END(createChartMeshAndParameterizeThread)
+ // Update progress.
+ groupArgs->progress->increment(args->faces.length);
}
// Set of charts corresponding to mesh faces in the same face group.
-class ChartGroup
-{
+class ChartGroup {
public:
- ChartGroup(uint32_t id, const Mesh *sourceMesh, const MeshFaceGroups *sourceMeshFaceGroups, MeshFaceGroups::Handle faceGroup) : m_id(id), m_sourceMesh(sourceMesh), m_sourceMeshFaceGroups(sourceMeshFaceGroups), m_faceGroup(faceGroup), m_faceCount(0), m_paramAddedChartsCount(0), m_paramDeletedChartsCount(0)
- {
+ ChartGroup(uint32_t id, const Mesh *sourceMesh, const MeshFaceGroups *sourceMeshFaceGroups, MeshFaceGroups::Handle faceGroup) :
+ m_id(id), m_sourceMesh(sourceMesh), m_sourceMeshFaceGroups(sourceMeshFaceGroups), m_faceGroup(faceGroup) {
}
- ~ChartGroup()
- {
+ ~ChartGroup() {
for (uint32_t i = 0; i < m_charts.size(); i++) {
m_charts[i]->~Chart();
XA_FREE(m_charts[i]);
}
}
- uint32_t segmentChartCount() const { return m_chartBasis.size(); }
uint32_t chartCount() const { return m_charts.size(); }
Chart *chartAt(uint32_t i) const { return m_charts[i]; }
- uint32_t faceCount() const { return m_faceCount; }
- uint32_t paramAddedChartsCount() const { return m_paramAddedChartsCount; }
- uint32_t paramDeletedChartsCount() const { return m_paramDeletedChartsCount; }
+ uint32_t faceCount() const { return m_sourceMeshFaceGroups->faceCount(m_faceGroup); }
- void computeChartFaces(const ChartOptions &options, segment::Atlas &atlas)
- {
+ void computeCharts(TaskScheduler *taskScheduler, const ChartOptions &options, Progress *progress, segment::Atlas &atlas, ThreadLocal<UniformGrid2> *boundaryGrid, ThreadLocal<ChartCtorBuffers> *chartBuffers, ThreadLocal<PiecewiseParam> *piecewiseParam) {
+ // This function may be called multiple times, so destroy existing charts.
+ for (uint32_t i = 0; i < m_charts.size(); i++) {
+ m_charts[i]->~Chart();
+ XA_FREE(m_charts[i]);
+ }
// Create mesh from source mesh, using only the faces in this face group.
XA_PROFILE_START(createChartGroupMesh)
Mesh *mesh = createMesh();
XA_PROFILE_END(createChartGroupMesh)
// Segment mesh into charts (arrays of faces).
#if XA_DEBUG_SINGLE_CHART
- m_chartBasis.resize(1);
- Fit::computeBasis(&mesh->position(0), mesh->vertexCount(), &m_chartBasis[0]);
- m_chartFaces.resize(1 + mesh->faceCount());
- m_chartFaces[0] = mesh->faceCount();
- for (uint32_t i = 0; i < m_chartFaces.size(); i++)
- m_chartFaces[i + 1] = i;
+ XA_UNUSED(options);
+ XA_UNUSED(atlas);
+ const uint32_t chartCount = 1;
+ uint32_t offset;
+ Basis chartBasis;
+ Fit::computeBasis(&mesh->position(0), mesh->vertexCount(), &chartBasis);
+ Array<uint32_t> chartFaces;
+ chartFaces.resize(1 + mesh->faceCount());
+ chartFaces[0] = mesh->faceCount();
+ for (uint32_t i = 0; i < chartFaces.size() - 1; i++)
+ chartFaces[i + 1] = m_faceToSourceFaceMap[i];
+ // Destroy mesh.
+ const uint32_t faceCount = mesh->faceCount();
+ mesh->~Mesh();
+ XA_FREE(mesh);
#else
XA_PROFILE_START(buildAtlas)
atlas.reset(mesh, options);
atlas.compute();
XA_PROFILE_END(buildAtlas)
+ // Update progress.
+ progress->increment(faceCount());
#if XA_DEBUG_EXPORT_OBJ_CHARTS
char filename[256];
XA_SPRINTF(filename, sizeof(filename), "debug_mesh_%03u_chartgroup_%03u_charts.obj", m_sourceMesh->id(), m_id);
@@ -7745,7 +7196,6 @@ public:
mesh->writeObjFace(file, faces[f]);
}
mesh->writeObjBoundaryEges(file);
- mesh->writeObjLinkedBoundaries(file);
fclose(file);
}
#endif
@@ -7754,65 +7204,57 @@ public:
mesh->~Mesh();
XA_FREE(mesh);
XA_PROFILE_START(copyChartFaces)
- // Copy basis.
- const uint32_t chartCount = atlas.chartCount();
- m_chartBasis.resize(chartCount);
- for (uint32_t i = 0; i < chartCount; i++)
- m_chartBasis[i] = atlas.chartBasis(i);
+ if (progress->cancel)
+ return;
// Copy faces from segment::Atlas to m_chartFaces array with <chart 0 face count> <face 0> <face n> <chart 1 face count> etc. encoding.
// segment::Atlas faces refer to the chart group mesh. Map them to the input mesh instead.
- m_chartFaces.resize(chartCount + faceCount);
+ const uint32_t chartCount = atlas.chartCount();
+ Array<uint32_t> chartFaces;
+ chartFaces.resize(chartCount + faceCount);
uint32_t offset = 0;
for (uint32_t i = 0; i < chartCount; i++) {
ConstArrayView<uint32_t> faces = atlas.chartFaces(i);
- m_chartFaces[offset++] = faces.length;
+ chartFaces[offset++] = faces.length;
for (uint32_t j = 0; j < faces.length; j++)
- m_chartFaces[offset++] = m_faceToSourceFaceMap[faces[j]];
+ chartFaces[offset++] = m_faceToSourceFaceMap[faces[j]];
}
XA_PROFILE_END(copyChartFaces)
#endif
- }
-
-#if XA_RECOMPUTE_CHARTS
- void parameterizeCharts(TaskScheduler *taskScheduler, const ParameterizeOptions &options, ThreadLocal<UniformGrid2> *boundaryGrid, ThreadLocal<ChartCtorBuffers> *chartBuffers, ThreadLocal<PiecewiseParam> *piecewiseParam)
-#else
- void parameterizeCharts(TaskScheduler* taskScheduler, const ParameterizeOptions &options, ThreadLocal<UniformGrid2>* boundaryGrid, ThreadLocal<ChartCtorBuffers>* chartBuffers)
-#endif
- {
- // This function may be called multiple times, so destroy existing charts.
- for (uint32_t i = 0; i < m_charts.size(); i++) {
- m_charts[i]->~Chart();
- XA_FREE(m_charts[i]);
- }
- m_paramAddedChartsCount = 0;
- const uint32_t chartCount = m_chartBasis.size();
+ XA_PROFILE_START(createChartMeshAndParameterizeReal)
+ CreateAndParameterizeChartTaskGroupArgs groupArgs;
+ groupArgs.progress = progress;
+ groupArgs.boundaryGrid = boundaryGrid;
+ groupArgs.chartBuffers = chartBuffers;
+ groupArgs.options = &options;
+ groupArgs.pp = piecewiseParam;
+ TaskGroupHandle taskGroup = taskScheduler->createTaskGroup(&groupArgs, chartCount);
Array<CreateAndParameterizeChartTaskArgs> taskArgs;
taskArgs.resize(chartCount);
taskArgs.runCtors(); // Has Array member.
- TaskGroupHandle taskGroup = taskScheduler->createTaskGroup(chartCount);
- uint32_t offset = 0;
+ offset = 0;
for (uint32_t i = 0; i < chartCount; i++) {
CreateAndParameterizeChartTaskArgs &args = taskArgs[i];
- args.basis = &m_chartBasis[i];
- args.boundaryGrid = boundaryGrid;
+#if XA_DEBUG_SINGLE_CHART
+ args.basis = &chartBasis;
+ args.isPlanar = false;
+#else
+ args.basis = &atlas.chartBasis(i);
+ args.chartGeneratorType = atlas.chartGeneratorType(i);
+#endif
args.chart = nullptr;
args.chartGroupId = m_id;
args.chartId = i;
- args.chartBuffers = chartBuffers;
- const uint32_t faceCount = m_chartFaces[offset++];
- args.faces = ConstArrayView<uint32_t>(&m_chartFaces[offset], faceCount);
- offset += faceCount;
+ const uint32_t chartFaceCount = chartFaces[offset++];
+ args.faces = ConstArrayView<uint32_t>(&chartFaces[offset], chartFaceCount);
+ offset += chartFaceCount;
args.mesh = m_sourceMesh;
- args.options = &options;
-#if XA_RECOMPUTE_CHARTS
- args.pp = piecewiseParam;
-#endif
Task task;
task.userData = &args;
task.func = runCreateAndParameterizeChartTask;
taskScheduler->run(taskGroup, task);
}
taskScheduler->wait(&taskGroup);
+ XA_PROFILE_END(createChartMeshAndParameterizeReal)
#if XA_RECOMPUTE_CHARTS
// Count charts. Skip invalid ones and include new ones added by recomputing.
uint32_t newChartCount = 0;
@@ -7830,7 +7272,6 @@ public:
if (chart->isInvalid()) {
chart->~Chart();
XA_FREE(chart);
- m_paramDeletedChartsCount++;
continue;
}
m_charts[current++] = chart;
@@ -7838,10 +7279,8 @@ public:
// Now add new charts.
for (uint32_t i = 0; i < chartCount; i++) {
CreateAndParameterizeChartTaskArgs &args = taskArgs[i];
- for (uint32_t j = 0; j < args.charts.size(); j++) {
+ for (uint32_t j = 0; j < args.charts.size(); j++)
m_charts[current++] = args.charts[j];
- m_paramAddedChartsCount++;
- }
}
#else // XA_RECOMPUTE_CHARTS
m_charts.resize(chartCount);
@@ -7852,15 +7291,14 @@ public:
}
private:
- Mesh *createMesh()
- {
+ Mesh *createMesh() {
XA_DEBUG_ASSERT(m_faceGroup != MeshFaceGroups::kInvalid);
// Create new mesh from the source mesh, using faces that belong to this group.
m_faceToSourceFaceMap.reserve(m_sourceMeshFaceGroups->faceCount(m_faceGroup));
for (MeshFaceGroups::Iterator it(m_sourceMeshFaceGroups, m_faceGroup); !it.isDone(); it.advance())
m_faceToSourceFaceMap.push_back(it.face());
// Only initial meshes has ignored faces. The only flag we care about is HasNormals.
- const uint32_t faceCount = m_faceCount = m_faceToSourceFaceMap.size();
+ const uint32_t faceCount = m_faceToSourceFaceMap.size();
XA_DEBUG_ASSERT(faceCount > 0);
const uint32_t approxVertexCount = min(faceCount * 3, m_sourceMesh->vertexCount());
Mesh *mesh = XA_NEW_ARGS(MemTag::Mesh, Mesh, m_sourceMesh->epsilon(), approxVertexCount, faceCount, m_sourceMesh->flags() & MeshFlags::HasNormals);
@@ -7889,9 +7327,7 @@ private:
XA_DEBUG_ASSERT(indices[i] != UINT32_MAX);
}
// Don't copy flags - ignored faces aren't used by chart groups, they are handled by InvalidMeshGeometry.
- Mesh::AddFaceResult::Enum result = mesh->addFace(indices);
- XA_UNUSED(result);
- XA_DEBUG_ASSERT(result == Mesh::AddFaceResult::OK);
+ mesh->addFace(indices);
}
XA_PROFILE_START(createChartGroupMeshColocals)
mesh->createColocals();
@@ -7909,98 +7345,57 @@ private:
}
const uint32_t m_id;
- const Mesh * const m_sourceMesh;
- const MeshFaceGroups * const m_sourceMeshFaceGroups;
+ const Mesh *const m_sourceMesh;
+ const MeshFaceGroups *const m_sourceMeshFaceGroups;
const MeshFaceGroups::Handle m_faceGroup;
Array<uint32_t> m_faceToSourceFaceMap; // List of faces of the source mesh that belong to this chart group.
- Array<Basis> m_chartBasis; // Copied from segment::Atlas.
- Array<uint32_t> m_chartFaces; // Copied from segment::Atlas. Encoding: <chart 0 face count> <face 0> <face n> <chart 1 face count> etc.
Array<Chart *> m_charts;
- uint32_t m_faceCount; // Set by createMesh(). Used for sorting.
- uint32_t m_paramAddedChartsCount; // Number of new charts added by recomputing charts with invalid parameterizations.
- uint32_t m_paramDeletedChartsCount; // Number of charts with invalid parameterizations that were deleted, after charts were recomputed.
-};
-
-// References invalid faces and vertices in a mesh.
-struct InvalidMeshGeometry
-{
- // Invalid faces have the face groups MeshFaceGroups::kInvalid.
- void extract(const Mesh *mesh, const MeshFaceGroups *meshFaceGroups)
- {
- // Copy invalid faces.
- m_faces.clear();
- const uint32_t meshFaceCount = mesh->faceCount();
- for (uint32_t f = 0; f < meshFaceCount; f++) {
- if (meshFaceGroups->groupAt(f) == MeshFaceGroups::kInvalid)
- m_faces.push_back(f);
- }
- // Create *unique* list of vertices of invalid faces.
- const uint32_t faceCount = m_faces.size();
- m_indices.resize(faceCount * 3);
- const uint32_t approxVertexCount = min(faceCount * 3, mesh->vertexCount());
- m_vertexToSourceVertexMap.clear();
- m_vertexToSourceVertexMap.reserve(approxVertexCount);
- HashMap<uint32_t, PassthroughHash<uint32_t>> sourceVertexToVertexMap(MemTag::Mesh, approxVertexCount);
- for (uint32_t f = 0; f < faceCount; f++) {
- const uint32_t face = m_faces[f];
- for (uint32_t i = 0; i < 3; i++) {
- const uint32_t vertex = mesh->vertexAt(face * 3 + i);
- uint32_t newVertex = sourceVertexToVertexMap.get(vertex);
- if (newVertex == UINT32_MAX) {
- newVertex = sourceVertexToVertexMap.add(vertex);
- m_vertexToSourceVertexMap.push_back(vertex);
- }
- m_indices[f * 3 + i] = newVertex;
- }
- }
- }
-
- ConstArrayView<uint32_t> faces() const { return m_faces; }
- ConstArrayView<uint32_t> indices() const { return m_indices; }
- ConstArrayView<uint32_t> vertices() const { return m_vertexToSourceVertexMap; }
-
-private:
- Array<uint32_t> m_faces, m_indices;
- Array<uint32_t> m_vertexToSourceVertexMap; // Map face vertices to vertices of the source mesh.
};
-struct ChartGroupComputeChartFacesTaskArgs
-{
+struct ChartGroupComputeChartsTaskGroupArgs {
ThreadLocal<segment::Atlas> *atlas;
- ChartGroup *chartGroup;
const ChartOptions *options;
Progress *progress;
+ TaskScheduler *taskScheduler;
+ ThreadLocal<UniformGrid2> *boundaryGrid;
+ ThreadLocal<ChartCtorBuffers> *chartBuffers;
+ ThreadLocal<PiecewiseParam> *piecewiseParam;
};
-static void runChartGroupComputeChartFacesJob(void *userData)
-{
- auto args = (ChartGroupComputeChartFacesTaskArgs *)userData;
+static void runChartGroupComputeChartsTask(void *groupUserData, void *taskUserData) {
+ auto args = (ChartGroupComputeChartsTaskGroupArgs *)groupUserData;
+ auto chartGroup = (ChartGroup *)taskUserData;
if (args->progress->cancel)
return;
XA_PROFILE_START(chartGroupComputeChartsThread)
- args->chartGroup->computeChartFaces(*args->options, args->atlas->get());
+ chartGroup->computeCharts(args->taskScheduler, *args->options, args->progress, args->atlas->get(), args->boundaryGrid, args->chartBuffers, args->piecewiseParam);
XA_PROFILE_END(chartGroupComputeChartsThread)
}
-struct MeshComputeChartFacesTaskArgs
-{
- Array<ChartGroup *> *chartGroups; // output
- InvalidMeshGeometry *invalidMeshGeometry; // output
+struct MeshComputeChartsTaskGroupArgs {
ThreadLocal<segment::Atlas> *atlas;
const ChartOptions *options;
Progress *progress;
- const Mesh *sourceMesh;
TaskScheduler *taskScheduler;
+ ThreadLocal<UniformGrid2> *boundaryGrid;
+ ThreadLocal<ChartCtorBuffers> *chartBuffers;
+ ThreadLocal<PiecewiseParam> *piecewiseParam;
+};
+
+struct MeshComputeChartsTaskArgs {
+ const Mesh *sourceMesh;
+ Array<ChartGroup *> *chartGroups; // output
+ InvalidMeshGeometry *invalidMeshGeometry; // output
};
#if XA_DEBUG_EXPORT_OBJ_FACE_GROUPS
static uint32_t s_faceGroupsCurrentVertex = 0;
#endif
-static void runMeshComputeChartFacesJob(void *userData)
-{
- auto args = (MeshComputeChartFacesTaskArgs *)userData;
- if (args->progress->cancel)
+static void runMeshComputeChartsTask(void *groupUserData, void *taskUserData) {
+ auto groupArgs = (MeshComputeChartsTaskGroupArgs *)groupUserData;
+ auto args = (MeshComputeChartsTaskArgs *)taskUserData;
+ if (groupArgs->progress->cancel)
return;
XA_PROFILE_START(computeChartsThread)
// Create face groups.
@@ -8009,7 +7404,7 @@ static void runMeshComputeChartFacesJob(void *userData)
meshFaceGroups->compute();
const uint32_t chartGroupCount = meshFaceGroups->groupCount();
XA_PROFILE_END(createFaceGroups)
- if (args->progress->cancel)
+ if (groupArgs->progress->cancel)
goto cleanup;
#if XA_DEBUG_EXPORT_OBJ_FACE_GROUPS
{
@@ -8053,33 +7448,41 @@ static void runMeshComputeChartFacesJob(void *userData)
for (uint32_t i = 0; i < chartGroupCount; i++)
(*args->chartGroups)[i] = XA_NEW_ARGS(MemTag::Default, ChartGroup, i, args->sourceMesh, meshFaceGroups, MeshFaceGroups::Handle(i));
// Extract invalid geometry via the invalid face group (MeshFaceGroups::kInvalid).
- XA_PROFILE_START(extractInvalidMeshGeometry)
- args->invalidMeshGeometry->extract(args->sourceMesh, meshFaceGroups);
- XA_PROFILE_END(extractInvalidMeshGeometry)
- // One task for each chart group - compute chart faces.
+ {
+ XA_PROFILE_START(extractInvalidMeshGeometry)
+ args->invalidMeshGeometry->extract(args->sourceMesh, meshFaceGroups);
+ XA_PROFILE_END(extractInvalidMeshGeometry)
+ }
+ // One task for each chart group - compute charts.
{
XA_PROFILE_START(chartGroupComputeChartsReal)
- Array<ChartGroupComputeChartFacesTaskArgs> taskArgs;
- taskArgs.resize(chartGroupCount);
- for (uint32_t i = 0; i < chartGroupCount; i++) {
- taskArgs[i].atlas = args->atlas;
- taskArgs[i].chartGroup = (*args->chartGroups)[i];
- taskArgs[i].options = args->options;
- taskArgs[i].progress = args->progress;
- }
- TaskGroupHandle taskGroup = args->taskScheduler->createTaskGroup(chartGroupCount);
+ // Sort chart groups by face count.
+ Array<float> chartGroupSortData;
+ chartGroupSortData.resize(chartGroupCount);
+ for (uint32_t i = 0; i < chartGroupCount; i++)
+ chartGroupSortData[i] = (float)(*args->chartGroups)[i]->faceCount();
+ RadixSort chartGroupSort;
+ chartGroupSort.sort(chartGroupSortData);
+ // Larger chart groups are added first to reduce the chance of thread starvation.
+ ChartGroupComputeChartsTaskGroupArgs taskGroupArgs;
+ taskGroupArgs.atlas = groupArgs->atlas;
+ taskGroupArgs.options = groupArgs->options;
+ taskGroupArgs.progress = groupArgs->progress;
+ taskGroupArgs.taskScheduler = groupArgs->taskScheduler;
+ taskGroupArgs.boundaryGrid = groupArgs->boundaryGrid;
+ taskGroupArgs.chartBuffers = groupArgs->chartBuffers;
+ taskGroupArgs.piecewiseParam = groupArgs->piecewiseParam;
+ TaskGroupHandle taskGroup = groupArgs->taskScheduler->createTaskGroup(&taskGroupArgs, chartGroupCount);
for (uint32_t i = 0; i < chartGroupCount; i++) {
Task task;
- task.userData = &taskArgs[i];
- task.func = runChartGroupComputeChartFacesJob;
- args->taskScheduler->run(taskGroup, task);
+ task.userData = (*args->chartGroups)[chartGroupCount - i - 1];
+ task.func = runChartGroupComputeChartsTask;
+ groupArgs->taskScheduler->run(taskGroup, task);
}
- args->taskScheduler->wait(&taskGroup);
+ groupArgs->taskScheduler->wait(&taskGroup);
XA_PROFILE_END(chartGroupComputeChartsReal)
}
XA_PROFILE_END(computeChartsThread)
- args->progress->value++;
- args->progress->update();
cleanup:
if (meshFaceGroups) {
meshFaceGroups->~MeshFaceGroups();
@@ -8087,43 +7490,13 @@ cleanup:
}
}
-struct ParameterizeChartsTaskArgs
-{
- TaskScheduler *taskScheduler;
- ChartGroup *chartGroup;
- const ParameterizeOptions *options;
- ThreadLocal<UniformGrid2> *boundaryGrid;
- ThreadLocal<ChartCtorBuffers> *chartBuffers;
-#if XA_RECOMPUTE_CHARTS
- ThreadLocal<PiecewiseParam> *piecewiseParam;
-#endif
- Progress *progress;
-};
-
-static void runParameterizeChartsJob(void *userData)
-{
- auto args = (ParameterizeChartsTaskArgs *)userData;
- if (args->progress->cancel)
- return;
- XA_PROFILE_START(parameterizeChartsThread)
-#if XA_RECOMPUTE_CHARTS
- args->chartGroup->parameterizeCharts(args->taskScheduler, *args->options, args->boundaryGrid, args->chartBuffers, args->piecewiseParam);
-#else
- args->chartGroup->parameterizeCharts(args->taskScheduler, *args->options, args->boundaryGrid, args->chartBuffers);
-#endif
- XA_PROFILE_END(parameterizeChartsThread)
- args->progress->value++;
- args->progress->update();
-}
-
/// An atlas is a set of chart groups.
-class Atlas
-{
+class Atlas {
public:
- Atlas() : m_chartsComputed(false), m_chartsParameterized(false) {}
+ Atlas() :
+ m_chartsComputed(false) {}
- ~Atlas()
- {
+ ~Atlas() {
for (uint32_t i = 0; i < m_meshChartGroups.size(); i++) {
for (uint32_t j = 0; j < m_meshChartGroups[i].size(); j++) {
m_meshChartGroups[i][j]->~ChartGroup();
@@ -8137,22 +7510,25 @@ public:
uint32_t meshCount() const { return m_meshes.size(); }
const InvalidMeshGeometry &invalidMeshGeometry(uint32_t meshIndex) const { return m_invalidMeshGeometry[meshIndex]; }
bool chartsComputed() const { return m_chartsComputed; }
- bool chartsParameterized() const { return m_chartsParameterized; }
uint32_t chartGroupCount(uint32_t mesh) const { return m_meshChartGroups[mesh].size(); }
const ChartGroup *chartGroupAt(uint32_t mesh, uint32_t group) const { return m_meshChartGroups[mesh][group]; }
- void addMesh(const Mesh *mesh)
- {
+ void addMesh(const Mesh *mesh) {
m_meshes.push_back(mesh);
}
- bool computeCharts(TaskScheduler *taskScheduler, const ChartOptions &options, ProgressFunc progressFunc, void *progressUserData)
- {
+ bool computeCharts(TaskScheduler *taskScheduler, const ChartOptions &options, ProgressFunc progressFunc, void *progressUserData) {
+ XA_PROFILE_START(computeChartsReal)
#if XA_DEBUG_EXPORT_OBJ_PLANAR_REGIONS
segment::s_planarRegionsCurrentRegion = segment::s_planarRegionsCurrentVertex = 0;
#endif
+ // Progress is per-face x 2 (1 for chart faces, 1 for parameterized chart faces).
+ const uint32_t meshCount = m_meshes.size();
+ uint32_t totalFaceCount = 0;
+ for (uint32_t i = 0; i < meshCount; i++)
+ totalFaceCount += m_meshes[i]->faceCount();
+ Progress progress(ProgressCategory::ComputeCharts, progressFunc, progressUserData, totalFaceCount * 2);
m_chartsComputed = false;
- m_chartsParameterized = false;
// Clear chart groups, since this function may be called multiple times.
if (!m_meshChartGroups.isEmpty()) {
for (uint32_t i = 0; i < m_meshChartGroups.size(); i++) {
@@ -8162,27 +7538,20 @@ public:
}
m_meshChartGroups[i].clear();
}
- XA_ASSERT(m_meshChartGroups.size() == m_meshes.size()); // The number of meshes shouldn't have changed.
+ XA_ASSERT(m_meshChartGroups.size() == meshCount); // The number of meshes shouldn't have changed.
}
- m_meshChartGroups.resize(m_meshes.size());
+ m_meshChartGroups.resize(meshCount);
m_meshChartGroups.runCtors();
- m_invalidMeshGeometry.resize(m_meshes.size());
+ m_invalidMeshGeometry.resize(meshCount);
m_invalidMeshGeometry.runCtors();
// One task per mesh.
- const uint32_t meshCount = m_meshes.size();
- Progress progress(ProgressCategory::ComputeCharts, progressFunc, progressUserData, meshCount);
- ThreadLocal<segment::Atlas> atlas;
- Array<MeshComputeChartFacesTaskArgs> taskArgs;
+ Array<MeshComputeChartsTaskArgs> taskArgs;
taskArgs.resize(meshCount);
for (uint32_t i = 0; i < meshCount; i++) {
- MeshComputeChartFacesTaskArgs &args = taskArgs[i];
- args.atlas = &atlas;
+ MeshComputeChartsTaskArgs &args = taskArgs[i];
+ args.sourceMesh = m_meshes[i];
args.chartGroups = &m_meshChartGroups[i];
args.invalidMeshGeometry = &m_invalidMeshGeometry[i];
- args.options = &options;
- args.progress = &progress;
- args.sourceMesh = m_meshes[i];
- args.taskScheduler = taskScheduler;
}
// Sort meshes by indexCount.
Array<float> meshSortData;
@@ -8192,105 +7561,53 @@ public:
RadixSort meshSort;
meshSort.sort(meshSortData);
// Larger meshes are added first to reduce the chance of thread starvation.
- TaskGroupHandle taskGroup = taskScheduler->createTaskGroup(meshCount);
- for (uint32_t i = 0; i < meshCount; i++) {
- Task task;
- task.userData = &taskArgs[meshSort.ranks()[meshCount - i - 1]];
- task.func = runMeshComputeChartFacesJob;
- taskScheduler->run(taskGroup, task);
- }
- taskScheduler->wait(&taskGroup);
- if (progress.cancel)
- return false;
- m_chartsComputed = true;
- return true;
- }
-
- bool parameterizeCharts(TaskScheduler *taskScheduler, const ParameterizeOptions &options, ProgressFunc progressFunc, void *progressUserData)
- {
- m_chartsParameterized = false;
- uint32_t chartGroupCount = 0;
- for (uint32_t i = 0; i < m_meshChartGroups.size(); i++)
- chartGroupCount += m_meshChartGroups[i].size();
- Progress progress(ProgressCategory::ParameterizeCharts, progressFunc, progressUserData, chartGroupCount);
+ ThreadLocal<segment::Atlas> atlas;
ThreadLocal<UniformGrid2> boundaryGrid; // For Quality boundary intersection.
ThreadLocal<ChartCtorBuffers> chartBuffers;
-#if XA_RECOMPUTE_CHARTS
ThreadLocal<PiecewiseParam> piecewiseParam;
-#endif
- Array<ParameterizeChartsTaskArgs> taskArgs;
- taskArgs.resize(chartGroupCount);
- {
- uint32_t k = 0;
- for (uint32_t i = 0; i < m_meshChartGroups.size(); i++) {
- const uint32_t count = m_meshChartGroups[i].size();
- for (uint32_t j = 0; j < count; j++) {
- ParameterizeChartsTaskArgs &args = taskArgs[k];
- args.taskScheduler = taskScheduler;
- args.chartGroup = m_meshChartGroups[i][j];
- args.options = &options;
- args.boundaryGrid = &boundaryGrid;
- args.chartBuffers = &chartBuffers;
-#if XA_RECOMPUTE_CHARTS
- args.piecewiseParam = &piecewiseParam;
-#endif
- args.progress = &progress;
- k++;
- }
- }
- }
- // Sort chart groups by face count.
- Array<float> chartGroupSortData;
- chartGroupSortData.resize(chartGroupCount);
- {
- uint32_t k = 0;
- for (uint32_t i = 0; i < m_meshChartGroups.size(); i++) {
- const uint32_t count = m_meshChartGroups[i].size();
- for (uint32_t j = 0; j < count; j++) {
- chartGroupSortData[k++] = (float)m_meshChartGroups[i][j]->faceCount();
- }
- }
- }
- RadixSort chartGroupSort;
- chartGroupSort.sort(chartGroupSortData);
- // Larger chart groups are added first to reduce the chance of thread starvation.
- TaskGroupHandle taskGroup = taskScheduler->createTaskGroup(chartGroupCount);
- for (uint32_t i = 0; i < chartGroupCount; i++) {
+ MeshComputeChartsTaskGroupArgs taskGroupArgs;
+ taskGroupArgs.atlas = &atlas;
+ taskGroupArgs.options = &options;
+ taskGroupArgs.progress = &progress;
+ taskGroupArgs.taskScheduler = taskScheduler;
+ taskGroupArgs.boundaryGrid = &boundaryGrid;
+ taskGroupArgs.chartBuffers = &chartBuffers;
+ taskGroupArgs.piecewiseParam = &piecewiseParam;
+ TaskGroupHandle taskGroup = taskScheduler->createTaskGroup(&taskGroupArgs, meshCount);
+ for (uint32_t i = 0; i < meshCount; i++) {
Task task;
- task.userData = &taskArgs[chartGroupSort.ranks()[chartGroupCount - i - 1]];
- task.func = runParameterizeChartsJob;
+ task.userData = &taskArgs[meshSort.ranks()[meshCount - i - 1]];
+ task.func = runMeshComputeChartsTask;
taskScheduler->run(taskGroup, task);
}
taskScheduler->wait(&taskGroup);
+ XA_PROFILE_END(computeChartsReal)
if (progress.cancel)
return false;
- m_chartsParameterized = true;
+ m_chartsComputed = true;
return true;
}
private:
Array<const Mesh *> m_meshes;
Array<InvalidMeshGeometry> m_invalidMeshGeometry; // 1 per mesh.
- Array<Array<ChartGroup *> > m_meshChartGroups;
+ Array<Array<ChartGroup *>> m_meshChartGroups;
bool m_chartsComputed;
- bool m_chartsParameterized;
};
} // namespace param
namespace pack {
-class AtlasImage
-{
+class AtlasImage {
public:
- AtlasImage(uint32_t width, uint32_t height) : m_width(width), m_height(height)
- {
+ AtlasImage(uint32_t width, uint32_t height) :
+ m_width(width), m_height(height) {
m_data.resize(m_width * m_height);
memset(m_data.data(), 0, sizeof(uint32_t) * m_data.size());
}
- void resize(uint32_t width, uint32_t height)
- {
+ void resize(uint32_t width, uint32_t height) {
Array<uint32_t> data;
data.resize(width * height);
memset(data.data(), 0, sizeof(uint32_t) * data.size());
@@ -8301,8 +7618,7 @@ public:
data.moveTo(m_data);
}
- void addChart(uint32_t chartIndex, const BitImage *image, const BitImage *imageBilinear, const BitImage *imagePadding, int atlas_w, int atlas_h, int offset_x, int offset_y)
- {
+ void addChart(uint32_t chartIndex, const BitImage *image, const BitImage *imageBilinear, const BitImage *imagePadding, int atlas_w, int atlas_h, int offset_x, int offset_y) {
const int w = image->width();
const int h = image->height();
for (int y = 0; y < h; y++) {
@@ -8328,15 +7644,13 @@ public:
}
}
- void copyTo(uint32_t *dest, uint32_t destWidth, uint32_t destHeight, int padding) const
- {
+ void copyTo(uint32_t *dest, uint32_t destWidth, uint32_t destHeight, int padding) const {
for (uint32_t y = 0; y < destHeight; y++)
memcpy(&dest[y * destWidth], &m_data[padding + (y + padding) * m_width], destWidth * sizeof(uint32_t));
}
#if XA_DEBUG_EXPORT_ATLAS_IMAGES
- void writeTga(const char *filename, uint32_t width, uint32_t height) const
- {
+ void writeTga(const char *filename, uint32_t width, uint32_t height) const {
Array<uint8_t> image;
image.resize(width * height * 3);
for (uint32_t y = 0; y < height; y++) {
@@ -8378,18 +7692,14 @@ private:
Array<uint32_t> m_data;
};
-struct Chart
-{
+struct Chart {
int32_t atlasIndex;
uint32_t material;
- uint32_t indexCount;
- const uint32_t *indices;
+ ConstArrayView<uint32_t> indices;
float parametricArea;
float surfaceArea;
- Vector2 *vertices;
- uint32_t vertexCount;
+ ArrayView<Vector2> vertices;
Array<uint32_t> uniqueVertices;
- bool allowRotate;
// bounding box
Vector2 majorAxis, minorAxis, minCorner, maxCorner;
// Mesh only
@@ -8398,29 +7708,26 @@ struct Chart
Array<uint32_t> faces;
Vector2 &uniqueVertexAt(uint32_t v) { return uniqueVertices.isEmpty() ? vertices[v] : vertices[uniqueVertices[v]]; }
- uint32_t uniqueVertexCount() const { return uniqueVertices.isEmpty() ? vertexCount : uniqueVertices.size(); }
+ uint32_t uniqueVertexCount() const { return uniqueVertices.isEmpty() ? vertices.length : uniqueVertices.size(); }
};
-struct AddChartTaskArgs
-{
- ThreadLocal<BoundingBox2D> *boundingBox;
+struct AddChartTaskArgs {
param::Chart *paramChart;
Chart *chart; // out
};
-static void runAddChartTask(void *userData)
-{
+static void runAddChartTask(void *groupUserData, void *taskUserData) {
XA_PROFILE_START(packChartsAddChartsThread)
- auto args = (AddChartTaskArgs *)userData;
+ auto boundingBox = (ThreadLocal<BoundingBox2D> *)groupUserData;
+ auto args = (AddChartTaskArgs *)taskUserData;
param::Chart *paramChart = args->paramChart;
XA_PROFILE_START(packChartsAddChartsRestoreTexcoords)
paramChart->restoreTexcoords();
XA_PROFILE_END(packChartsAddChartsRestoreTexcoords)
- Mesh *mesh = paramChart->mesh();
+ Mesh *mesh = paramChart->unifiedMesh();
Chart *chart = args->chart = XA_NEW(MemTag::Default, Chart);
chart->atlasIndex = -1;
chart->material = 0;
- chart->indexCount = mesh->indexCount();
chart->indices = mesh->indices();
chart->parametricArea = mesh->computeParametricArea();
if (chart->parametricArea < kAreaEpsilon) {
@@ -8430,17 +7737,15 @@ static void runAddChartTask(void *userData)
}
chart->surfaceArea = mesh->computeSurfaceArea();
chart->vertices = mesh->texcoords();
- chart->vertexCount = mesh->vertexCount();
- chart->allowRotate = true;
chart->boundaryEdges = &mesh->boundaryEdges();
// Compute bounding box of chart.
- BoundingBox2D &bb = args->boundingBox->get();
+ BoundingBox2D &bb = boundingBox->get();
bb.clear();
- for (uint32_t v = 0; v < chart->vertexCount; v++) {
+ for (uint32_t v = 0; v < chart->vertices.length; v++) {
if (mesh->isBoundaryVertex(v))
bb.appendBoundaryVertex(mesh->texcoord(v));
}
- bb.compute(mesh->texcoords(), mesh->vertexCount());
+ bb.compute(mesh->texcoords());
chart->majorAxis = bb.majorAxis;
chart->minorAxis = bb.minorAxis;
chart->minCorner = bb.minCorner;
@@ -8448,10 +7753,8 @@ static void runAddChartTask(void *userData)
XA_PROFILE_END(packChartsAddChartsThread)
}
-struct Atlas
-{
- ~Atlas()
- {
+struct Atlas {
+ ~Atlas() {
for (uint32_t i = 0; i < m_atlasImages.size(); i++) {
m_atlasImages[i]->~AtlasImage();
XA_FREE(m_atlasImages[i]);
@@ -8475,8 +7778,7 @@ struct Atlas
const Array<AtlasImage *> &getImages() const { return m_atlasImages; }
float getUtilization(uint32_t atlas) const { return m_utilization[atlas]; }
- void addCharts(TaskScheduler *taskScheduler, param::Atlas *paramAtlas)
- {
+ void addCharts(TaskScheduler *taskScheduler, param::Atlas *paramAtlas) {
// Count charts.
uint32_t chartCount = 0;
for (uint32_t i = 0; i < paramAtlas->meshCount(); i++) {
@@ -8489,11 +7791,11 @@ struct Atlas
if (chartCount == 0)
return;
// Run one task per chart.
+ ThreadLocal<BoundingBox2D> boundingBox;
+ TaskGroupHandle taskGroup = taskScheduler->createTaskGroup(&boundingBox, chartCount);
Array<AddChartTaskArgs> taskArgs;
taskArgs.resize(chartCount);
- TaskGroupHandle taskGroup = taskScheduler->createTaskGroup(chartCount);
uint32_t chartIndex = 0;
- ThreadLocal<BoundingBox2D> boundingBox;
for (uint32_t i = 0; i < paramAtlas->meshCount(); i++) {
const uint32_t chartGroupsCount = paramAtlas->chartGroupCount(i);
for (uint32_t j = 0; j < chartGroupsCount; j++) {
@@ -8501,7 +7803,6 @@ struct Atlas
const uint32_t count = chartGroup->chartCount();
for (uint32_t k = 0; k < count; k++) {
AddChartTaskArgs &args = taskArgs[chartIndex];
- args.boundingBox = &boundingBox;
args.paramChart = chartGroup->chartAt(k);
Task task;
task.userData = &taskArgs[chartIndex];
@@ -8518,8 +7819,10 @@ struct Atlas
m_charts[i] = taskArgs[i].chart;
}
- void addUvMeshCharts(UvMeshInstance *mesh)
- {
+ void addUvMeshCharts(UvMeshInstance *mesh) {
+ // Copy texcoords from mesh.
+ mesh->texcoords.resize(mesh->mesh->texcoords.size());
+ memcpy(mesh->texcoords.data(), mesh->mesh->texcoords.data(), mesh->texcoords.size() * sizeof(Vector2));
BitArray vertexUsed(mesh->texcoords.size());
BoundingBox2D boundingBox;
for (uint32_t c = 0; c < mesh->mesh->charts.size(); c++) {
@@ -8527,17 +7830,14 @@ struct Atlas
Chart *chart = XA_NEW(MemTag::Default, Chart);
chart->atlasIndex = -1;
chart->material = uvChart->material;
- chart->indexCount = uvChart->indices.size();
- chart->indices = uvChart->indices.data();
- chart->vertices = mesh->texcoords.data();
- chart->vertexCount = mesh->texcoords.size();
- chart->allowRotate = mesh->rotateCharts;
+ chart->indices = uvChart->indices;
+ chart->vertices = mesh->texcoords;
chart->boundaryEdges = nullptr;
chart->faces.resize(uvChart->faces.size());
memcpy(chart->faces.data(), uvChart->faces.data(), sizeof(uint32_t) * uvChart->faces.size());
// Find unique vertices.
vertexUsed.zeroOutMemory();
- for (uint32_t i = 0; i < chart->indexCount; i++) {
+ for (uint32_t i = 0; i < chart->indices.length; i++) {
const uint32_t vertex = chart->indices[i];
if (!vertexUsed.get(vertex)) {
vertexUsed.set(vertex);
@@ -8546,14 +7846,13 @@ struct Atlas
}
// Compute parametric and surface areas.
chart->parametricArea = 0.0f;
- for (uint32_t f = 0; f < chart->indexCount / 3; f++) {
+ for (uint32_t f = 0; f < chart->indices.length / 3; f++) {
const Vector2 &v1 = chart->vertices[chart->indices[f * 3 + 0]];
const Vector2 &v2 = chart->vertices[chart->indices[f * 3 + 1]];
const Vector2 &v3 = chart->vertices[chart->indices[f * 3 + 2]];
chart->parametricArea += fabsf(triangleArea(v1, v2, v3));
}
chart->parametricArea *= 0.5f;
- chart->surfaceArea = chart->parametricArea; // Identical for UV meshes.
if (chart->parametricArea < kAreaEpsilon) {
// When the parametric area is too small we use a rough approximation to prevent divisions by very small numbers.
Vector2 minCorner(FLT_MAX, FLT_MAX);
@@ -8565,6 +7864,9 @@ struct Atlas
const Vector2 bounds = (maxCorner - minCorner) * 0.5f;
chart->parametricArea = bounds.x * bounds.y;
}
+ XA_DEBUG_ASSERT(isFinite(chart->parametricArea));
+ XA_DEBUG_ASSERT(!isNan(chart->parametricArea));
+ chart->surfaceArea = chart->parametricArea; // Identical for UV meshes.
// Compute bounding box of chart.
// Using all unique vertices for simplicity, can compute real boundaries if this is too slow.
boundingBox.clear();
@@ -8580,8 +7882,7 @@ struct Atlas
}
// Pack charts in the smallest possible rectangle.
- bool packCharts(const PackOptions &options, ProgressFunc progressFunc, void *progressUserData)
- {
+ bool packCharts(const PackOptions &options, ProgressFunc progressFunc, void *progressUserData) {
if (progressFunc) {
if (!progressFunc(ProgressCategory::PackCharts, 0, progressUserData))
return false;
@@ -8627,19 +7928,19 @@ struct Atlas
// Compute chart scale
float scale = 1.0f;
if (chart->parametricArea != 0.0f) {
- scale = (chart->surfaceArea / chart->parametricArea) * m_texelsPerUnit;
+ scale = sqrtf(chart->surfaceArea / chart->parametricArea) * m_texelsPerUnit;
XA_ASSERT(isFinite(scale));
}
// Translate, rotate and scale vertices. Compute extents.
Vector2 minCorner(FLT_MAX, FLT_MAX);
- if (!chart->allowRotate) {
+ if (!options.rotateChartsToAxis) {
for (uint32_t i = 0; i < chart->uniqueVertexCount(); i++)
minCorner = min(minCorner, chart->uniqueVertexAt(i));
}
Vector2 extents(0.0f);
for (uint32_t i = 0; i < chart->uniqueVertexCount(); i++) {
Vector2 &texcoord = chart->uniqueVertexAt(i);
- if (chart->allowRotate) {
+ if (options.rotateChartsToAxis) {
const float x = dot(texcoord, chart->majorAxis);
const float y = dot(texcoord, chart->minorAxis);
texcoord.x = x;
@@ -8750,27 +8051,27 @@ struct Atlas
// Resize and clear (discard = true) chart images.
// Leave room for padding at extents.
chartImage.resize(ftoi_ceil(chartExtents[c].x) + options.padding, ftoi_ceil(chartExtents[c].y) + options.padding, true);
- if (chart->allowRotate)
+ if (options.rotateCharts)
chartImageRotated.resize(chartImage.height(), chartImage.width(), true);
if (options.bilinear) {
chartImageBilinear.resize(chartImage.width(), chartImage.height(), true);
- if (chart->allowRotate)
+ if (options.rotateCharts)
chartImageBilinearRotated.resize(chartImage.height(), chartImage.width(), true);
}
// Rasterize chart faces.
- const uint32_t faceCount = chart->indexCount / 3;
+ const uint32_t faceCount = chart->indices.length / 3;
for (uint32_t f = 0; f < faceCount; f++) {
Vector2 vertices[3];
for (uint32_t v = 0; v < 3; v++)
vertices[v] = chart->vertices[chart->indices[f * 3 + v]];
DrawTriangleCallbackArgs args;
args.chartBitImage = &chartImage;
- args.chartBitImageRotated = chart->allowRotate ? &chartImageRotated : nullptr;
+ args.chartBitImageRotated = options.rotateCharts ? &chartImageRotated : nullptr;
raster::drawTriangle(Vector2((float)chartImage.width(), (float)chartImage.height()), vertices, drawTriangleCallback, &args);
}
// Expand chart by pixels sampled by bilinear interpolation.
if (options.bilinear)
- bilinearExpand(chart, &chartImage, &chartImageBilinear, chart->allowRotate ? &chartImageBilinearRotated : nullptr, boundaryEdgeGrid);
+ bilinearExpand(chart, &chartImage, &chartImageBilinear, options.rotateCharts ? &chartImageBilinearRotated : nullptr, boundaryEdgeGrid);
// Expand chart by padding pixels (dilation).
if (options.padding > 0) {
// Copy into the same BitImage instances for every chart to avoid reallocating BitImage buffers (largest chart is packed first).
@@ -8780,7 +8081,7 @@ struct Atlas
else
chartImage.copyTo(chartImagePadding);
chartImagePadding.dilate(options.padding);
- if (chart->allowRotate) {
+ if (options.rotateCharts) {
if (options.bilinear)
chartImageBilinearRotated.copyTo(chartImagePaddingRotated);
else
@@ -8815,23 +8116,25 @@ struct Atlas
int best_x = 0, best_y = 0;
int best_cw = 0, best_ch = 0;
int best_r = 0;
- for (;;)
- {
+ for (;;) {
+#if XA_DEBUG
bool firstChartInBitImage = false;
- XA_UNUSED(firstChartInBitImage);
+#endif
if (currentAtlas + 1 > m_bitImages.size()) {
// Chart doesn't fit in the current bitImage, create a new one.
BitImage *bi = XA_NEW_ARGS(MemTag::Default, BitImage, resolution, resolution);
m_bitImages.push_back(bi);
atlasSizes.push_back(Vector2i(0, 0));
+#if XA_DEBUG
firstChartInBitImage = true;
+#endif
if (createImage)
m_atlasImages.push_back(XA_NEW_ARGS(MemTag::Default, AtlasImage, resolution, resolution));
// Start positions are per-atlas, so create a new one of those too.
chartStartPositions.push_back(Vector2i(0, 0));
}
XA_PROFILE_START(packChartsFindLocation)
- const bool foundLocation = findChartLocation(chartStartPositions[currentAtlas], options.bruteForce, m_bitImages[currentAtlas], chartImageToPack, chartImageToPackRotated, atlasSizes[currentAtlas].x, atlasSizes[currentAtlas].y, &best_x, &best_y, &best_cw, &best_ch, &best_r, options.blockAlign, maxResolution, chart->allowRotate);
+ const bool foundLocation = findChartLocation(options, chartStartPositions[currentAtlas], m_bitImages[currentAtlas], chartImageToPack, chartImageToPackRotated, atlasSizes[currentAtlas].x, atlasSizes[currentAtlas].y, &best_x, &best_y, &best_cw, &best_ch, &best_r, maxResolution);
XA_PROFILE_END(packChartsFindLocation)
XA_DEBUG_ASSERT(!(firstChartInBitImage && !foundLocation)); // Chart doesn't fit in an empty, newly allocated bitImage. Shouldn't happen, since charts are resized if they are too big to fit in the atlas.
if (maxResolution == 0) {
@@ -8849,8 +8152,7 @@ struct Atlas
if (best_x + best_cw > atlasSizes[currentAtlas].x || best_y + best_ch > atlasSizes[currentAtlas].y) {
for (uint32_t j = 0; j < chartStartPositions.size(); j++)
chartStartPositions[j] = Vector2i(0, 0);
- }
- else {
+ } else {
chartStartPositions[currentAtlas] = Vector2i(best_x, best_y);
}
}
@@ -8897,7 +8199,7 @@ struct Atlas
Vector2 &texcoord = chart->uniqueVertexAt(v);
Vector2 t = texcoord;
if (best_r) {
- XA_DEBUG_ASSERT(chart->allowRotate);
+ XA_DEBUG_ASSERT(options.rotateCharts);
swap(t.x, t.y);
}
texcoord.x = best_x + t.x;
@@ -8938,8 +8240,7 @@ struct Atlas
}
if (m_utilization.size() > 1) {
XA_PRINT(" %u: %f%% utilization\n", i, m_utilization[i] * 100.0f);
- }
- else {
+ } else {
XA_PRINT(" %f%% utilization\n", m_utilization[i] * 100.0f);
}
}
@@ -8958,28 +8259,22 @@ struct Atlas
}
private:
- // IC: Brute force is slow, and random may take too much time to converge. We start inserting large charts in a small atlas. Using brute force is lame, because most of the space
- // is occupied at this point. At the end we have many small charts and a large atlas with sparse holes. Finding those holes randomly is slow. A better approach would be to
- // start stacking large charts as if they were tetris pieces. Once charts get small try to place them randomly. It may be interesting to try a intermediate strategy, first try
- // along one axis and then try exhaustively along that axis.
- bool findChartLocation(const Vector2i &startPosition, bool bruteForce, const BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int w, int h, int *best_x, int *best_y, int *best_w, int *best_h, int *best_r, bool blockAligned, uint32_t maxResolution, bool allowRotate)
- {
+ bool findChartLocation(const PackOptions &options, const Vector2i &startPosition, const BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int w, int h, int *best_x, int *best_y, int *best_w, int *best_h, int *best_r, uint32_t maxResolution) {
const int attempts = 4096;
- if (bruteForce || attempts >= w * h)
- return findChartLocation_bruteForce(startPosition, atlasBitImage, chartBitImage, chartBitImageRotated, w, h, best_x, best_y, best_w, best_h, best_r, blockAligned, maxResolution, allowRotate);
- return findChartLocation_random(atlasBitImage, chartBitImage, chartBitImageRotated, w, h, best_x, best_y, best_w, best_h, best_r, attempts, blockAligned, maxResolution, allowRotate);
+ if (options.bruteForce || attempts >= w * h)
+ return findChartLocation_bruteForce(options, startPosition, atlasBitImage, chartBitImage, chartBitImageRotated, w, h, best_x, best_y, best_w, best_h, best_r, maxResolution);
+ return findChartLocation_random(options, atlasBitImage, chartBitImage, chartBitImageRotated, w, h, best_x, best_y, best_w, best_h, best_r, attempts, maxResolution);
}
- bool findChartLocation_bruteForce(const Vector2i &startPosition, const BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int w, int h, int *best_x, int *best_y, int *best_w, int *best_h, int *best_r, bool blockAligned, uint32_t maxResolution, bool allowRotate)
- {
- const int stepSize = blockAligned ? 4 : 1;
+ bool findChartLocation_bruteForce(const PackOptions &options, const Vector2i &startPosition, const BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int w, int h, int *best_x, int *best_y, int *best_w, int *best_h, int *best_r, uint32_t maxResolution) {
+ const int stepSize = options.blockAlign ? 4 : 1;
int best_metric = INT_MAX;
// Try two different orientations.
for (int r = 0; r < 2; r++) {
int cw = chartBitImage->width();
int ch = chartBitImage->height();
if (r == 1) {
- if (allowRotate)
+ if (options.rotateCharts)
swap(cw, ch);
else
break;
@@ -9016,15 +8311,14 @@ private:
return best_metric != INT_MAX;
}
- bool findChartLocation_random(const BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int w, int h, int *best_x, int *best_y, int *best_w, int *best_h, int *best_r, int minTrialCount, bool blockAligned, uint32_t maxResolution, bool allowRotate)
- {
+ bool findChartLocation_random(const PackOptions &options, const BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int w, int h, int *best_x, int *best_y, int *best_w, int *best_h, int *best_r, int attempts, uint32_t maxResolution) {
bool result = false;
const int BLOCK_SIZE = 4;
int best_metric = INT_MAX;
- for (int i = 0; i < minTrialCount; i++) {
+ for (int i = 0; i < attempts; i++) {
int cw = chartBitImage->width();
int ch = chartBitImage->height();
- int r = allowRotate ? m_rand.getRange(1) : 0;
+ int r = options.rotateCharts ? m_rand.getRange(1) : 0;
if (r == 1)
swap(cw, ch);
// + 1 to extend atlas in case atlas full. We may want to use a higher number to increase probability of extending atlas.
@@ -9037,7 +8331,7 @@ private:
}
int x = m_rand.getRange(xRange);
int y = m_rand.getRange(yRange);
- if (blockAligned) {
+ if (options.blockAlign) {
x = align(x, BLOCK_SIZE);
y = align(y, BLOCK_SIZE);
if (maxResolution > 0 && (x > (int)maxResolution - cw || y > (int)maxResolution - ch))
@@ -9062,7 +8356,7 @@ private:
*best_y = y;
*best_w = cw;
*best_h = ch;
- *best_r = allowRotate ? r : 0;
+ *best_r = options.rotateCharts ? r : 0;
if (area == w * h) {
// Chart is completely inside, do not look at any other location.
break;
@@ -9072,8 +8366,7 @@ private:
return result;
}
- void addChart(BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int atlas_w, int atlas_h, int offset_x, int offset_y, int r)
- {
+ void addChart(BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int atlas_w, int atlas_h, int offset_x, int offset_y, int r) {
XA_DEBUG_ASSERT(r == 0 || r == 1);
const BitImage *image = r == 0 ? chartBitImage : chartBitImageRotated;
const int w = image->width();
@@ -9096,15 +8389,14 @@ private:
}
}
- void bilinearExpand(const Chart *chart, BitImage *source, BitImage *dest, BitImage *destRotated, UniformGrid2 &boundaryEdgeGrid) const
- {
+ void bilinearExpand(const Chart *chart, BitImage *source, BitImage *dest, BitImage *destRotated, UniformGrid2 &boundaryEdgeGrid) const {
boundaryEdgeGrid.reset(chart->vertices, chart->indices);
if (chart->boundaryEdges) {
const uint32_t edgeCount = chart->boundaryEdges->size();
for (uint32_t i = 0; i < edgeCount; i++)
boundaryEdgeGrid.append((*chart->boundaryEdges)[i]);
} else {
- for (uint32_t i = 0; i < chart->indexCount; i++)
+ for (uint32_t i = 0; i < chart->indices.length; i++)
boundaryEdgeGrid.append(i);
}
const int xOffsets[] = { -1, 0, 1, -1, 1, -1, 0, 1 };
@@ -9152,13 +8444,11 @@ private:
}
}
- struct DrawTriangleCallbackArgs
- {
+ struct DrawTriangleCallbackArgs {
BitImage *chartBitImage, *chartBitImageRotated;
};
- static bool drawTriangleCallback(void *param, int x, int y)
- {
+ static bool drawTriangleCallback(void *param, int x, int y) {
auto args = (DrawTriangleCallbackArgs *)param;
args->chartBitImage->set(x, y);
if (args->chartBitImageRotated)
@@ -9180,8 +8470,14 @@ private:
} // namespace pack
} // namespace internal
-struct Context
-{
+// Used to map triangulated polygons back to polygons.
+struct MeshPolygonMapping {
+ internal::Array<uint8_t> faceVertexCount; // Copied from MeshDecl::faceVertexCount.
+ internal::Array<uint32_t> triangleToPolygonMap; // Triangle index (mesh face index) to polygon index.
+ internal::Array<uint32_t> triangleToPolygonIndicesMap; // Triangle indices to polygon indices.
+};
+
+struct Context {
Atlas atlas;
internal::Progress *addMeshProgress = nullptr;
internal::TaskGroupHandle addMeshTaskGroup;
@@ -9190,20 +8486,20 @@ struct Context
void *progressUserData = nullptr;
internal::TaskScheduler *taskScheduler;
internal::Array<internal::Mesh *> meshes;
+ internal::Array<MeshPolygonMapping *> meshPolygonMappings;
internal::Array<internal::UvMesh *> uvMeshes;
internal::Array<internal::UvMeshInstance *> uvMeshInstances;
+ bool uvMeshChartsComputed = false;
};
-Atlas *Create()
-{
+Atlas *Create() {
Context *ctx = XA_NEW(internal::MemTag::Default, Context);
memset(&ctx->atlas, 0, sizeof(Atlas));
ctx->taskScheduler = XA_NEW(internal::MemTag::Default, internal::TaskScheduler);
return &ctx->atlas;
}
-static void DestroyOutputMeshes(Context *ctx)
-{
+static void DestroyOutputMeshes(Context *ctx) {
if (!ctx->atlas.meshes)
return;
for (int i = 0; i < (int)ctx->atlas.meshCount; i++) {
@@ -9224,8 +8520,7 @@ static void DestroyOutputMeshes(Context *ctx)
ctx->atlas.meshes = nullptr;
}
-void Destroy(Atlas *atlas)
-{
+void Destroy(Atlas *atlas) {
XA_DEBUG_ASSERT(atlas);
Context *ctx = (Context *)atlas;
if (atlas->utilization)
@@ -9244,6 +8539,13 @@ void Destroy(Atlas *atlas)
mesh->~Mesh();
XA_FREE(mesh);
}
+ for (uint32_t i = 0; i < ctx->meshPolygonMappings.size(); i++) {
+ MeshPolygonMapping *mapping = ctx->meshPolygonMappings[i];
+ if (mapping) {
+ mapping->~MeshPolygonMapping();
+ XA_FREE(mapping);
+ }
+ }
for (uint32_t i = 0; i < ctx->uvMeshes.size(); i++) {
internal::UvMesh *mesh = ctx->uvMeshes[i];
for (uint32_t j = 0; j < mesh->charts.size(); j++) {
@@ -9265,66 +8567,52 @@ void Destroy(Atlas *atlas)
#endif
}
-struct AddMeshTaskArgs
-{
- Context *ctx;
- internal::Mesh *mesh;
-};
-
-static void runAddMeshTask(void *userData)
-{
+static void runAddMeshTask(void *groupUserData, void *taskUserData) {
XA_PROFILE_START(addMeshThread)
- auto args = (AddMeshTaskArgs *)userData; // Responsible for freeing this.
- internal::Mesh *mesh = args->mesh;
- internal::Progress *progress = args->ctx->addMeshProgress;
- if (progress->cancel)
- goto cleanup;
- {
- XA_PROFILE_START(addMeshCreateColocals)
- mesh->createColocals();
- XA_PROFILE_END(addMeshCreateColocals)
+ auto ctx = (Context *)groupUserData;
+ auto mesh = (internal::Mesh *)taskUserData;
+ internal::Progress *progress = ctx->addMeshProgress;
+ if (progress->cancel) {
+ XA_PROFILE_END(addMeshThread)
+ return;
}
- if (progress->cancel)
- goto cleanup;
- progress->value++;
- progress->update();
-cleanup:
- args->~AddMeshTaskArgs();
- XA_FREE(args);
+ XA_PROFILE_START(addMeshCreateColocals)
+ mesh->createColocals();
+ XA_PROFILE_END(addMeshCreateColocals)
+ if (progress->cancel) {
+ XA_PROFILE_END(addMeshThread)
+ return;
+ }
+ progress->increment(1);
XA_PROFILE_END(addMeshThread)
}
-static internal::Vector3 DecodePosition(const MeshDecl &meshDecl, uint32_t index)
-{
+static internal::Vector3 DecodePosition(const MeshDecl &meshDecl, uint32_t index) {
XA_DEBUG_ASSERT(meshDecl.vertexPositionData);
XA_DEBUG_ASSERT(meshDecl.vertexPositionStride > 0);
return *((const internal::Vector3 *)&((const uint8_t *)meshDecl.vertexPositionData)[meshDecl.vertexPositionStride * index]);
}
-static internal::Vector3 DecodeNormal(const MeshDecl &meshDecl, uint32_t index)
-{
+static internal::Vector3 DecodeNormal(const MeshDecl &meshDecl, uint32_t index) {
XA_DEBUG_ASSERT(meshDecl.vertexNormalData);
XA_DEBUG_ASSERT(meshDecl.vertexNormalStride > 0);
return *((const internal::Vector3 *)&((const uint8_t *)meshDecl.vertexNormalData)[meshDecl.vertexNormalStride * index]);
}
-static internal::Vector2 DecodeUv(const MeshDecl &meshDecl, uint32_t index)
-{
+static internal::Vector2 DecodeUv(const MeshDecl &meshDecl, uint32_t index) {
XA_DEBUG_ASSERT(meshDecl.vertexUvData);
XA_DEBUG_ASSERT(meshDecl.vertexUvStride > 0);
return *((const internal::Vector2 *)&((const uint8_t *)meshDecl.vertexUvData)[meshDecl.vertexUvStride * index]);
}
-static uint32_t DecodeIndex(IndexFormat::Enum format, const void *indexData, int32_t offset, uint32_t i)
-{
+static uint32_t DecodeIndex(IndexFormat format, const void *indexData, int32_t offset, uint32_t i) {
XA_DEBUG_ASSERT(indexData);
if (format == IndexFormat::UInt16)
return uint16_t((int32_t)((const uint16_t *)indexData)[i] + offset);
return uint32_t((int32_t)((const uint32_t *)indexData)[i] + offset);
}
-AddMeshError::Enum AddMesh(Atlas *atlas, const MeshDecl &meshDecl, uint32_t meshCountHint)
-{
+AddMeshError AddMesh(Atlas *atlas, const MeshDecl &meshDecl, uint32_t meshCountHint) {
XA_DEBUG_ASSERT(atlas);
if (!atlas) {
XA_PRINT_WARNING("AddMesh: atlas is null.\n");
@@ -9337,33 +8625,36 @@ AddMeshError::Enum AddMesh(Atlas *atlas, const MeshDecl &meshDecl, uint32_t mesh
}
#if XA_PROFILE
if (ctx->meshes.isEmpty())
- internal::s_profile.addMeshReal = clock();
+ internal::s_profile.addMeshRealStart = std::chrono::high_resolution_clock::now();
#endif
// Don't know how many times AddMesh will be called, so progress needs to adjusted each time.
if (!ctx->addMeshProgress) {
ctx->addMeshProgress = XA_NEW_ARGS(internal::MemTag::Default, internal::Progress, ProgressCategory::AddMesh, ctx->progressFunc, ctx->progressUserData, 1);
- }
- else {
+ } else {
ctx->addMeshProgress->setMaxValue(internal::max(ctx->meshes.size() + 1, meshCountHint));
}
XA_PROFILE_START(addMeshCopyData)
const bool hasIndices = meshDecl.indexCount > 0;
const uint32_t indexCount = hasIndices ? meshDecl.indexCount : meshDecl.vertexCount;
- XA_PRINT("Adding mesh %d: %u vertices, %u triangles\n", ctx->meshes.size(), meshDecl.vertexCount, indexCount / 3);
- // Expecting triangle faces.
- if ((indexCount % 3) != 0)
- return AddMeshError::InvalidIndexCount;
- if (hasIndices) {
- // Check if any index is out of range.
- for (uint32_t i = 0; i < indexCount; i++) {
- const uint32_t index = DecodeIndex(meshDecl.indexFormat, meshDecl.indexData, meshDecl.indexOffset, i);
- if (index >= meshDecl.vertexCount)
- return AddMeshError::IndexOutOfRange;
+ uint32_t faceCount = indexCount / 3;
+ if (meshDecl.faceVertexCount) {
+ faceCount = meshDecl.faceCount;
+ XA_PRINT("Adding mesh %d: %u vertices, %u polygons\n", ctx->meshes.size(), meshDecl.vertexCount, faceCount);
+ for (uint32_t f = 0; f < faceCount; f++) {
+ if (meshDecl.faceVertexCount[f] < 3)
+ return AddMeshError::InvalidFaceVertexCount;
}
+ } else {
+ XA_PRINT("Adding mesh %d: %u vertices, %u triangles\n", ctx->meshes.size(), meshDecl.vertexCount, faceCount);
+ // Expecting triangle faces unless otherwise specified.
+ if ((indexCount % 3) != 0)
+ return AddMeshError::InvalidIndexCount;
}
uint32_t meshFlags = internal::MeshFlags::HasIgnoredFaces;
if (meshDecl.vertexNormalData)
meshFlags |= internal::MeshFlags::HasNormals;
+ if (meshDecl.faceMaterialData)
+ meshFlags |= internal::MeshFlags::HasMaterials;
internal::Mesh *mesh = XA_NEW_ARGS(internal::MemTag::Mesh, internal::Mesh, meshDecl.epsilon, meshDecl.vertexCount, indexCount / 3, meshFlags, ctx->meshes.size());
for (uint32_t i = 0; i < meshDecl.vertexCount; i++) {
internal::Vector3 normal(0.0f);
@@ -9374,17 +8665,42 @@ AddMeshError::Enum AddMesh(Atlas *atlas, const MeshDecl &meshDecl, uint32_t mesh
texcoord = DecodeUv(meshDecl, i);
mesh->addVertex(DecodePosition(meshDecl, i), normal, texcoord);
}
+ MeshPolygonMapping *meshPolygonMapping = nullptr;
+ if (meshDecl.faceVertexCount) {
+ meshPolygonMapping = XA_NEW(internal::MemTag::Default, MeshPolygonMapping);
+ // Copy MeshDecl::faceVertexCount so it can be used later when building output meshes.
+ meshPolygonMapping->faceVertexCount.copyFrom(meshDecl.faceVertexCount, meshDecl.faceCount);
+ // There should be at least as many triangles as polygons.
+ meshPolygonMapping->triangleToPolygonMap.reserve(meshDecl.faceCount);
+ meshPolygonMapping->triangleToPolygonIndicesMap.reserve(meshDecl.indexCount);
+ }
const uint32_t kMaxWarnings = 50;
uint32_t warningCount = 0;
- for (uint32_t i = 0; i < indexCount / 3; i++) {
- uint32_t tri[3];
- for (int j = 0; j < 3; j++)
- tri[j] = hasIndices ? DecodeIndex(meshDecl.indexFormat, meshDecl.indexData, meshDecl.indexOffset, i * 3 + j) : i * 3 + j;
+ internal::Array<uint32_t> triIndices;
+ uint32_t firstFaceIndex = 0;
+ internal::Triangulator triangulator;
+ for (uint32_t face = 0; face < faceCount; face++) {
+ // Decode face indices.
+ const uint32_t faceVertexCount = meshDecl.faceVertexCount ? (uint32_t)meshDecl.faceVertexCount[face] : 3;
+ uint32_t polygon[UINT8_MAX];
+ for (uint32_t i = 0; i < faceVertexCount; i++) {
+ if (hasIndices) {
+ polygon[i] = DecodeIndex(meshDecl.indexFormat, meshDecl.indexData, meshDecl.indexOffset, face * faceVertexCount + i);
+ // Check if any index is out of range.
+ if (polygon[i] >= meshDecl.vertexCount) {
+ mesh->~Mesh();
+ XA_FREE(mesh);
+ return AddMeshError::IndexOutOfRange;
+ }
+ } else {
+ polygon[i] = face * faceVertexCount + i;
+ }
+ }
+ // Ignore faces with degenerate or zero length edges.
bool ignore = false;
- // Check for degenerate or zero length edges.
- for (int j = 0; j < 3; j++) {
- const uint32_t index1 = tri[j];
- const uint32_t index2 = tri[(j + 1) % 3];
+ for (uint32_t i = 0; i < faceVertexCount; i++) {
+ const uint32_t index1 = polygon[i];
+ const uint32_t index2 = polygon[(i + 1) % 3];
if (index1 == index2) {
ignore = true;
if (++warningCount <= kMaxWarnings)
@@ -9402,119 +8718,136 @@ AddMeshError::Enum AddMesh(Atlas *atlas, const MeshDecl &meshDecl, uint32_t mesh
}
// Ignore faces with any nan vertex attributes.
if (!ignore) {
- for (int j = 0; j < 3; j++) {
- const internal::Vector3 &pos = mesh->position(tri[j]);
+ for (uint32_t i = 0; i < faceVertexCount; i++) {
+ const internal::Vector3 &pos = mesh->position(polygon[i]);
if (internal::isNan(pos.x) || internal::isNan(pos.y) || internal::isNan(pos.z)) {
if (++warningCount <= kMaxWarnings)
- XA_PRINT(" NAN position in face: %d\n", i);
+ XA_PRINT(" NAN position in face: %d\n", face);
ignore = true;
break;
}
if (meshDecl.vertexNormalData) {
- const internal::Vector3 &normal = mesh->normal(tri[j]);
+ const internal::Vector3 &normal = mesh->normal(polygon[i]);
if (internal::isNan(normal.x) || internal::isNan(normal.y) || internal::isNan(normal.z)) {
if (++warningCount <= kMaxWarnings)
- XA_PRINT(" NAN normal in face: %d\n", i);
+ XA_PRINT(" NAN normal in face: %d\n", face);
ignore = true;
break;
}
}
if (meshDecl.vertexUvData) {
- const internal::Vector2 &uv = mesh->texcoord(tri[j]);
+ const internal::Vector2 &uv = mesh->texcoord(polygon[i]);
if (internal::isNan(uv.x) || internal::isNan(uv.y)) {
if (++warningCount <= kMaxWarnings)
- XA_PRINT(" NAN texture coordinate in face: %d\n", i);
+ XA_PRINT(" NAN texture coordinate in face: %d\n", face);
ignore = true;
break;
}
}
}
}
- const internal::Vector3 &a = mesh->position(tri[0]);
- const internal::Vector3 &b = mesh->position(tri[1]);
- const internal::Vector3 &c = mesh->position(tri[2]);
- // Check for zero area faces.
- float area = 0.0f;
- if (!ignore) {
- area = internal::length(internal::cross(b - a, c - a)) * 0.5f;
- if (area <= internal::kAreaEpsilon) {
- ignore = true;
- if (++warningCount <= kMaxWarnings)
- XA_PRINT(" Zero area face: %d, indices (%d %d %d), area is %f\n", i, tri[0], tri[1], tri[2], area);
- }
+ // Triangulate if necessary.
+ triIndices.clear();
+ if (faceVertexCount == 3) {
+ triIndices.push_back(polygon[0]);
+ triIndices.push_back(polygon[1]);
+ triIndices.push_back(polygon[2]);
+ } else {
+ triangulator.triangulatePolygon(mesh->positions(), internal::ConstArrayView<uint32_t>(polygon, faceVertexCount), triIndices);
}
+ // Check for zero area faces.
if (!ignore) {
- if (internal::equal(a, b, meshDecl.epsilon) || internal::equal(a, c, meshDecl.epsilon) || internal::equal(b, c, meshDecl.epsilon)) {
- ignore = true;
- if (++warningCount <= kMaxWarnings)
- XA_PRINT(" Degenerate face: %d, area is %f\n", i, area);
+ for (uint32_t i = 0; i < triIndices.size(); i += 3) {
+ const internal::Vector3 &a = mesh->position(triIndices[i + 0]);
+ const internal::Vector3 &b = mesh->position(triIndices[i + 1]);
+ const internal::Vector3 &c = mesh->position(triIndices[i + 2]);
+ const float area = internal::length(internal::cross(b - a, c - a)) * 0.5f;
+ if (area <= internal::kAreaEpsilon) {
+ ignore = true;
+ if (++warningCount <= kMaxWarnings)
+ XA_PRINT(" Zero area face: %d, area is %f\n", face, area);
+ break;
+ }
}
}
- if (meshDecl.faceIgnoreData && meshDecl.faceIgnoreData[i])
+ // User face ignore.
+ if (meshDecl.faceIgnoreData && meshDecl.faceIgnoreData[face])
ignore = true;
- mesh->addFace(tri[0], tri[1], tri[2], ignore);
+ // User material.
+ uint32_t material = UINT32_MAX;
+ if (meshDecl.faceMaterialData)
+ material = meshDecl.faceMaterialData[face];
+ // Add the face(s).
+ for (uint32_t i = 0; i < triIndices.size(); i += 3) {
+ mesh->addFace(&triIndices[i], ignore, material);
+ if (meshPolygonMapping)
+ meshPolygonMapping->triangleToPolygonMap.push_back(face);
+ }
+ if (meshPolygonMapping) {
+ for (uint32_t i = 0; i < triIndices.size(); i++)
+ meshPolygonMapping->triangleToPolygonIndicesMap.push_back(triIndices[i]);
+ }
+ firstFaceIndex += faceVertexCount;
}
if (warningCount > kMaxWarnings)
XA_PRINT(" %u additional warnings truncated\n", warningCount - kMaxWarnings);
XA_PROFILE_END(addMeshCopyData)
ctx->meshes.push_back(mesh);
+ ctx->meshPolygonMappings.push_back(meshPolygonMapping);
ctx->paramAtlas.addMesh(mesh);
if (ctx->addMeshTaskGroup.value == UINT32_MAX)
- ctx->addMeshTaskGroup = ctx->taskScheduler->createTaskGroup();
- AddMeshTaskArgs *taskArgs = XA_NEW(internal::MemTag::Default, AddMeshTaskArgs); // The task frees this.
- taskArgs->ctx = ctx;
- taskArgs->mesh = mesh;
+ ctx->addMeshTaskGroup = ctx->taskScheduler->createTaskGroup(ctx);
internal::Task task;
- task.userData = taskArgs;
+ task.userData = mesh;
task.func = runAddMeshTask;
ctx->taskScheduler->run(ctx->addMeshTaskGroup, task);
return AddMeshError::Success;
}
-void AddMeshJoin(Atlas *atlas)
-{
+void AddMeshJoin(Atlas *atlas) {
XA_DEBUG_ASSERT(atlas);
if (!atlas) {
XA_PRINT_WARNING("AddMeshJoin: atlas is null.\n");
return;
}
Context *ctx = (Context *)atlas;
- if (!ctx->addMeshProgress)
- return;
- ctx->taskScheduler->wait(&ctx->addMeshTaskGroup);
- ctx->addMeshProgress->~Progress();
- XA_FREE(ctx->addMeshProgress);
- ctx->addMeshProgress = nullptr;
+ if (!ctx->uvMeshes.isEmpty()) {
#if XA_PROFILE
- XA_PRINT("Added %u meshes\n", ctx->meshes.size());
- internal::s_profile.addMeshReal = clock() - internal::s_profile.addMeshReal;
+ XA_PRINT("Added %u UV meshes\n", ctx->uvMeshes.size());
+ internal::s_profile.addMeshReal = uint64_t(std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - internal::s_profile.addMeshRealStart).count());
#endif
- XA_PROFILE_PRINT_AND_RESET(" Total (real): ", addMeshReal)
- XA_PROFILE_PRINT_AND_RESET(" Copy data: ", addMeshCopyData)
- XA_PROFILE_PRINT_AND_RESET(" Total (thread): ", addMeshThread)
- XA_PROFILE_PRINT_AND_RESET(" Create colocals: ", addMeshCreateColocals)
+ XA_PROFILE_PRINT_AND_RESET(" Total: ", addMeshReal)
+ XA_PROFILE_PRINT_AND_RESET(" Copy data: ", addMeshCopyData)
#if XA_PROFILE_ALLOC
- XA_PROFILE_PRINT_AND_RESET(" Alloc: ", alloc)
+ XA_PROFILE_PRINT_AND_RESET(" Alloc: ", alloc)
#endif
- XA_PRINT_MEM_USAGE
+ XA_PRINT_MEM_USAGE
+ } else {
+ if (!ctx->addMeshProgress)
+ return;
+ ctx->taskScheduler->wait(&ctx->addMeshTaskGroup);
+ ctx->addMeshProgress->~Progress();
+ XA_FREE(ctx->addMeshProgress);
+ ctx->addMeshProgress = nullptr;
+#if XA_PROFILE
+ XA_PRINT("Added %u meshes\n", ctx->meshes.size());
+ internal::s_profile.addMeshReal = uint64_t(std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - internal::s_profile.addMeshRealStart).count());
+#endif
+ XA_PROFILE_PRINT_AND_RESET(" Total (real): ", addMeshReal)
+ XA_PROFILE_PRINT_AND_RESET(" Copy data: ", addMeshCopyData)
+ XA_PROFILE_PRINT_AND_RESET(" Total (thread): ", addMeshThread)
+ XA_PROFILE_PRINT_AND_RESET(" Create colocals: ", addMeshCreateColocals)
+#if XA_PROFILE_ALLOC
+ XA_PROFILE_PRINT_AND_RESET(" Alloc: ", alloc)
+#endif
+ XA_PRINT_MEM_USAGE
#if XA_DEBUG_EXPORT_OBJ_FACE_GROUPS
- internal::param::s_faceGroupsCurrentVertex = 0;
+ internal::param::s_faceGroupsCurrentVertex = 0;
#endif
+ }
}
-struct EdgeKey
-{
- EdgeKey() {}
- EdgeKey(const EdgeKey &k) : v0(k.v0), v1(k.v1) {}
- EdgeKey(uint32_t v0, uint32_t v1) : v0(v0), v1(v1) {}
- bool operator==(const EdgeKey &k) const { return v0 == k.v0 && v1 == k.v1; }
-
- uint32_t v0;
- uint32_t v1;
-};
-
-AddMeshError::Enum AddUvMesh(Atlas *atlas, const UvMeshDecl &decl)
-{
+AddMeshError AddUvMesh(Atlas *atlas, const UvMeshDecl &decl) {
XA_DEBUG_ASSERT(atlas);
if (!atlas) {
XA_PRINT_WARNING("AddUvMesh: atlas is null.\n");
@@ -9525,13 +8858,18 @@ AddMeshError::Enum AddUvMesh(Atlas *atlas, const UvMeshDecl &decl)
XA_PRINT_WARNING("AddUvMesh: Meshes and UV meshes cannot be added to the same atlas.\n");
return AddMeshError::Error;
}
- const bool decoded = (decl.indexCount <= 0);
- const uint32_t indexCount = decoded ? decl.vertexCount : decl.indexCount;
+#if XA_PROFILE
+ if (ctx->uvMeshInstances.isEmpty())
+ internal::s_profile.addMeshRealStart = std::chrono::high_resolution_clock::now();
+#endif
+ XA_PROFILE_START(addMeshCopyData)
+ const bool hasIndices = decl.indexCount > 0;
+ const uint32_t indexCount = hasIndices ? decl.indexCount : decl.vertexCount;
XA_PRINT("Adding UV mesh %d: %u vertices, %u triangles\n", ctx->uvMeshes.size(), decl.vertexCount, indexCount / 3);
// Expecting triangle faces.
if ((indexCount % 3) != 0)
return AddMeshError::InvalidIndexCount;
- if (!decoded) {
+ if (hasIndices) {
// Check if any index is out of range.
for (uint32_t i = 0; i < indexCount; i++) {
const uint32_t index = DecodeIndex(decl.indexFormat, decl.indexData, decl.indexOffset, i);
@@ -9539,319 +8877,266 @@ AddMeshError::Enum AddUvMesh(Atlas *atlas, const UvMeshDecl &decl)
return AddMeshError::IndexOutOfRange;
}
}
+ // Create a mesh instance.
internal::UvMeshInstance *meshInstance = XA_NEW(internal::MemTag::Default, internal::UvMeshInstance);
- meshInstance->texcoords.resize(decl.vertexCount);
- for (uint32_t i = 0; i < decl.vertexCount; i++) {
- internal::Vector2 texcoord = *((const internal::Vector2 *)&((const uint8_t *)decl.vertexUvData)[decl.vertexStride * i]);
- // Set nan values to 0.
- if (internal::isNan(texcoord.x) || internal::isNan(texcoord.y))
- texcoord.x = texcoord.y = 0.0f;
- meshInstance->texcoords[i] = texcoord;
- }
- meshInstance->rotateCharts = decl.rotateCharts;
+ meshInstance->mesh = nullptr;
+ ctx->uvMeshInstances.push_back(meshInstance);
// See if this is an instance of an already existing mesh.
internal::UvMesh *mesh = nullptr;
for (uint32_t m = 0; m < ctx->uvMeshes.size(); m++) {
if (memcmp(&ctx->uvMeshes[m]->decl, &decl, sizeof(UvMeshDecl)) == 0) {
- meshInstance->mesh = mesh = ctx->uvMeshes[m];
+ mesh = ctx->uvMeshes[m];
+ XA_PRINT(" instance of a previous UV mesh\n");
break;
}
}
if (!mesh) {
// Copy geometry to mesh.
- meshInstance->mesh = mesh = XA_NEW(internal::MemTag::Default, internal::UvMesh);
+ mesh = XA_NEW(internal::MemTag::Default, internal::UvMesh);
+ ctx->uvMeshes.push_back(mesh);
mesh->decl = decl;
+ if (decl.faceMaterialData) {
+ mesh->faceMaterials.resize(decl.indexCount / 3);
+ memcpy(mesh->faceMaterials.data(), decl.faceMaterialData, mesh->faceMaterials.size() * sizeof(uint32_t));
+ }
mesh->indices.resize(decl.indexCount);
for (uint32_t i = 0; i < indexCount; i++)
- mesh->indices[i] = decoded ? i : DecodeIndex(decl.indexFormat, decl.indexData, decl.indexOffset, i);
- mesh->vertexToChartMap.resize(decl.vertexCount);
- for (uint32_t i = 0; i < mesh->vertexToChartMap.size(); i++)
- mesh->vertexToChartMap[i] = UINT32_MAX;
- // Calculate charts (incident faces).
- internal::HashMap<internal::Vector2> vertexToFaceMap(internal::MemTag::Default, indexCount); // Face is index / 3
- const uint32_t faceCount = indexCount / 3;
- for (uint32_t i = 0; i < indexCount; i++)
- vertexToFaceMap.add(meshInstance->texcoords[mesh->indices[i]]);
- internal::BitArray faceAssigned(faceCount);
- faceAssigned.zeroOutMemory();
- for (uint32_t f = 0; f < faceCount; f++) {
- if (faceAssigned.get(f))
- continue;
- // Found an unassigned face, create a new chart.
- internal::UvMeshChart *chart = XA_NEW(internal::MemTag::Default, internal::UvMeshChart);
- chart->material = decl.faceMaterialData ? decl.faceMaterialData[f] : 0;
- // Walk incident faces and assign them to the chart.
- faceAssigned.set(f);
- chart->faces.push_back(f);
- for (;;) {
- bool newFaceAssigned = false;
- const uint32_t faceCount2 = chart->faces.size();
- for (uint32_t f2 = 0; f2 < faceCount2; f2++) {
- const uint32_t face = chart->faces[f2];
- for (uint32_t i = 0; i < 3; i++) {
- const internal::Vector2 &texcoord = meshInstance->texcoords[meshInstance->mesh->indices[face * 3 + i]];
- uint32_t mapIndex = vertexToFaceMap.get(texcoord);
- while (mapIndex != UINT32_MAX) {
- const uint32_t face2 = mapIndex / 3; // 3 vertices added per face.
- // Materials must match.
- if (!faceAssigned.get(face2) && (!decl.faceMaterialData || decl.faceMaterialData[face] == decl.faceMaterialData[face2])) {
- faceAssigned.set(face2);
- chart->faces.push_back(face2);
- newFaceAssigned = true;
- }
- mapIndex = vertexToFaceMap.getNext(mapIndex);
- }
- }
- }
- if (!newFaceAssigned)
+ mesh->indices[i] = hasIndices ? DecodeIndex(decl.indexFormat, decl.indexData, decl.indexOffset, i) : i;
+ mesh->texcoords.resize(decl.vertexCount);
+ for (uint32_t i = 0; i < decl.vertexCount; i++)
+ mesh->texcoords[i] = *((const internal::Vector2 *)&((const uint8_t *)decl.vertexUvData)[decl.vertexStride * i]);
+ // Validate.
+ mesh->faceIgnore.resize(decl.indexCount / 3);
+ mesh->faceIgnore.zeroOutMemory();
+ const uint32_t kMaxWarnings = 50;
+ uint32_t warningCount = 0;
+ for (uint32_t f = 0; f < indexCount / 3; f++) {
+ bool ignore = false;
+ uint32_t tri[3];
+ for (uint32_t i = 0; i < 3; i++)
+ tri[i] = mesh->indices[f * 3 + i];
+ // Check for nan UVs.
+ for (uint32_t i = 0; i < 3; i++) {
+ const uint32_t vertex = tri[i];
+ if (internal::isNan(mesh->texcoords[vertex].x) || internal::isNan(mesh->texcoords[vertex].y)) {
+ ignore = true;
+ if (++warningCount <= kMaxWarnings)
+ XA_PRINT(" NAN texture coordinate in vertex %u\n", vertex);
break;
+ }
}
- for (uint32_t i = 0; i < chart->faces.size(); i++) {
- for (uint32_t j = 0; j < 3; j++) {
- const uint32_t vertex = meshInstance->mesh->indices[chart->faces[i] * 3 + j];
- chart->indices.push_back(vertex);
- mesh->vertexToChartMap[vertex] = mesh->charts.size();
+ // Check for zero area faces.
+ if (!ignore) {
+ const internal::Vector2 &v1 = mesh->texcoords[tri[0]];
+ const internal::Vector2 &v2 = mesh->texcoords[tri[1]];
+ const internal::Vector2 &v3 = mesh->texcoords[tri[2]];
+ const float area = fabsf(((v2.x - v1.x) * (v3.y - v1.y) - (v3.x - v1.x) * (v2.y - v1.y)) * 0.5f);
+ if (area <= internal::kAreaEpsilon) {
+ ignore = true;
+ if (++warningCount <= kMaxWarnings)
+ XA_PRINT(" Zero area face: %d, indices (%d %d %d), area is %f\n", f, tri[0], tri[1], tri[2], area);
}
}
- mesh->charts.push_back(chart);
+ if (ignore)
+ mesh->faceIgnore.set(f);
}
- ctx->uvMeshes.push_back(mesh);
- } else {
- XA_PRINT(" instance of a previous UV mesh\n");
+ if (warningCount > kMaxWarnings)
+ XA_PRINT(" %u additional warnings truncated\n", warningCount - kMaxWarnings);
}
- XA_PRINT(" %u charts\n", meshInstance->mesh->charts.size());
- ctx->uvMeshInstances.push_back(meshInstance);
+ meshInstance->mesh = mesh;
+ XA_PROFILE_END(addMeshCopyData)
return AddMeshError::Success;
}
-void ComputeCharts(Atlas *atlas, ChartOptions options)
-{
+void ComputeCharts(Atlas *atlas, ChartOptions options) {
if (!atlas) {
XA_PRINT_WARNING("ComputeCharts: atlas is null.\n");
return;
}
Context *ctx = (Context *)atlas;
- if (!ctx->uvMeshInstances.isEmpty()) {
- XA_PRINT_WARNING("ComputeCharts: This function should not be called with UV meshes.\n");
- return;
- }
AddMeshJoin(atlas);
- if (ctx->meshes.isEmpty()) {
- XA_PRINT_WARNING("ComputeCharts: No meshes. Call AddMesh first.\n");
- return;
- }
- XA_PRINT("Computing charts\n");
- XA_PROFILE_START(computeChartsReal)
- if (!ctx->paramAtlas.computeCharts(ctx->taskScheduler, options, ctx->progressFunc, ctx->progressUserData)) {
- XA_PRINT(" Cancelled by user\n");
- return;
- }
- XA_PROFILE_END(computeChartsReal)
- // Count charts.
- uint32_t chartCount = 0;
- const uint32_t meshCount = ctx->meshes.size();
- for (uint32_t i = 0; i < meshCount; i++) {
- for (uint32_t j = 0; j < ctx->paramAtlas.chartGroupCount(i); j++) {
- const internal::param::ChartGroup *chartGroup = ctx->paramAtlas.chartGroupAt(i, j);
- chartCount += chartGroup->segmentChartCount();
- }
- }
- XA_PRINT(" %u charts\n", chartCount);
-#if XA_PROFILE
- XA_PRINT(" Chart groups\n");
- uint32_t chartGroupCount = 0;
- for (uint32_t i = 0; i < meshCount; i++) {
- XA_PRINT(" Mesh %u: %u chart groups\n", i, ctx->paramAtlas.chartGroupCount(i));
- chartGroupCount += ctx->paramAtlas.chartGroupCount(i);
- }
- XA_PRINT(" %u total\n", chartGroupCount);
-#endif
- XA_PROFILE_PRINT_AND_RESET(" Total (real): ", computeChartsReal)
- XA_PROFILE_PRINT_AND_RESET(" Total (thread): ", computeChartsThread)
- XA_PROFILE_PRINT_AND_RESET(" Create face groups: ", createFaceGroups)
- XA_PROFILE_PRINT_AND_RESET(" Extract invalid mesh geometry: ", extractInvalidMeshGeometry)
- XA_PROFILE_PRINT_AND_RESET(" Chart group compute charts (real): ", chartGroupComputeChartsReal)
- XA_PROFILE_PRINT_AND_RESET(" Chart group compute charts (thread): ", chartGroupComputeChartsThread)
- XA_PROFILE_PRINT_AND_RESET(" Create chart group mesh: ", createChartGroupMesh)
- XA_PROFILE_PRINT_AND_RESET(" Create colocals: ", createChartGroupMeshColocals)
- XA_PROFILE_PRINT_AND_RESET(" Create boundaries: ", createChartGroupMeshBoundaries)
- XA_PROFILE_PRINT_AND_RESET(" Build atlas: ", buildAtlas)
- XA_PROFILE_PRINT_AND_RESET(" Init: ", buildAtlasInit)
- XA_PROFILE_PRINT_AND_RESET(" Planar charts: ", planarCharts)
- XA_PROFILE_PRINT_AND_RESET(" Clustered charts: ", clusteredCharts)
- XA_PROFILE_PRINT_AND_RESET(" Place seeds: ", clusteredChartsPlaceSeeds)
- XA_PROFILE_PRINT_AND_RESET(" Boundary intersection: ", clusteredChartsPlaceSeedsBoundaryIntersection)
- XA_PROFILE_PRINT_AND_RESET(" Relocate seeds: ", clusteredChartsRelocateSeeds)
- XA_PROFILE_PRINT_AND_RESET(" Reset: ", clusteredChartsReset)
- XA_PROFILE_PRINT_AND_RESET(" Grow: ", clusteredChartsGrow)
- XA_PROFILE_PRINT_AND_RESET(" Boundary intersection: ", clusteredChartsGrowBoundaryIntersection)
- XA_PROFILE_PRINT_AND_RESET(" Merge: ", clusteredChartsMerge)
- XA_PROFILE_PRINT_AND_RESET(" Fill holes: ", clusteredChartsFillHoles)
- XA_PROFILE_PRINT_AND_RESET(" Copy chart faces: ", copyChartFaces)
-#if XA_PROFILE_ALLOC
- XA_PROFILE_PRINT_AND_RESET(" Alloc: ", alloc)
-#endif
- XA_PRINT_MEM_USAGE
-}
-
-void ParameterizeCharts(Atlas *atlas, ParameterizeOptions options)
-{
- if (!atlas) {
- XA_PRINT_WARNING("ParameterizeCharts: atlas is null.\n");
- return;
- }
- Context *ctx = (Context *)atlas;
- if (!ctx->uvMeshInstances.isEmpty()) {
- XA_PRINT_WARNING("ParameterizeCharts: This function should not be called with UV meshes.\n");
- return;
- }
- if (!ctx->paramAtlas.chartsComputed()) {
- XA_PRINT_WARNING("ParameterizeCharts: ComputeCharts must be called first.\n");
+ if (ctx->meshes.isEmpty() && ctx->uvMeshInstances.isEmpty()) {
+ XA_PRINT_WARNING("ComputeCharts: No meshes. Call AddMesh or AddUvMesh first.\n");
return;
}
- atlas->atlasCount = 0;
- atlas->height = 0;
- atlas->texelsPerUnit = 0;
- atlas->width = 0;
- if (atlas->utilization) {
+ // Reset atlas state. This function may be called multiple times, or again after PackCharts.
+ if (atlas->utilization)
XA_FREE(atlas->utilization);
- atlas->utilization = nullptr;
- }
- if (atlas->image) {
+ if (atlas->image)
XA_FREE(atlas->image);
- atlas->image = nullptr;
- }
DestroyOutputMeshes(ctx);
- XA_PRINT("Parameterizing charts\n");
- XA_PROFILE_START(parameterizeChartsReal)
- if (!ctx->paramAtlas.parameterizeCharts(ctx->taskScheduler, options, ctx->progressFunc, ctx->progressUserData)) {
- XA_PRINT(" Cancelled by user\n");
+ memset(&ctx->atlas, 0, sizeof(Atlas));
+ XA_PRINT("Computing charts\n");
+ if (!ctx->meshes.isEmpty()) {
+ if (!ctx->paramAtlas.computeCharts(ctx->taskScheduler, options, ctx->progressFunc, ctx->progressUserData)) {
+ XA_PRINT(" Cancelled by user\n");
return;
- }
- XA_PROFILE_END(parameterizeChartsReal)
- const uint32_t meshCount = ctx->meshes.size();
- uint32_t chartCount = 0, chartsWithHolesCount = 0, holesCount = 0, chartsWithTJunctionsCount = 0, tJunctionsCount = 0, orthoChartsCount = 0, planarChartsCount = 0, lscmChartsCount = 0, piecewiseChartsCount = 0, chartsAddedCount = 0, chartsDeletedCount = 0;
- for (uint32_t i = 0; i < meshCount; i++) {
- for (uint32_t j = 0; j < ctx->paramAtlas.chartGroupCount(i); j++) {
- const internal::param::ChartGroup *chartGroup = ctx->paramAtlas.chartGroupAt(i, j);
- for (uint32_t k = 0; k < chartGroup->chartCount(); k++) {
- const internal::param::Chart *chart = chartGroup->chartAt(k);
-#if XA_PRINT_CHART_WARNINGS
- if (chart->warningFlags() & internal::param::ChartWarningFlags::CloseHolesFailed)
- XA_PRINT_WARNING(" Chart %u (mesh %u, group %u, id %u): failed to close holes\n", chartCount, i, j, k);
- if (chart->warningFlags() & internal::param::ChartWarningFlags::FixTJunctionsDuplicatedEdge)
- XA_PRINT_WARNING(" Chart %u (mesh %u, group %u, id %u): fixing t-junctions created non-manifold geometry\n", chartCount, i, j, k);
- if (chart->warningFlags() & internal::param::ChartWarningFlags::FixTJunctionsFailed)
- XA_PRINT_WARNING(" Chart %u (mesh %u, group %u, id %u): fixing t-junctions failed\n", chartCount, i, j, k);
- if (chart->warningFlags() & internal::param::ChartWarningFlags::TriangulateDuplicatedEdge)
- XA_PRINT_WARNING(" Chart %u (mesh %u, group %u, id %u): triangulation created non-manifold geometry\n", chartCount, i, j, k);
-#endif
- holesCount += chart->closedHolesCount();
- if (chart->closedHolesCount() > 0)
- chartsWithHolesCount++;
- tJunctionsCount += chart->fixedTJunctionsCount();
- if (chart->fixedTJunctionsCount() > 0)
- chartsWithTJunctionsCount++;
- if (chart->type() == ChartType::Planar)
- planarChartsCount++;
- else if (chart->type() == ChartType::Ortho)
- orthoChartsCount++;
- else if (chart->type() == ChartType::LSCM)
- lscmChartsCount++;
- else if (chart->type() == ChartType::Piecewise)
- piecewiseChartsCount++;
- }
- chartCount += chartGroup->chartCount();
- chartsAddedCount += chartGroup->paramAddedChartsCount();
- chartsDeletedCount += chartGroup->paramDeletedChartsCount();
- }
- }
- if (holesCount > 0)
- XA_PRINT(" %u holes closed in %u charts\n", holesCount, chartsWithHolesCount);
- if (tJunctionsCount > 0)
- XA_PRINT(" %u t-junctions fixed in %u charts\n", tJunctionsCount, chartsWithTJunctionsCount);
- XA_PRINT(" %u planar charts, %u ortho charts, %u LSCM charts, %u piecewise charts\n", planarChartsCount, orthoChartsCount, lscmChartsCount, piecewiseChartsCount);
- if (chartsDeletedCount > 0) {
- XA_PRINT(" %u charts with invalid parameterizations replaced with %u new charts\n", chartsDeletedCount, chartsAddedCount);
+ }
+ uint32_t chartsWithTJunctionsCount = 0, tJunctionCount = 0, orthoChartsCount = 0, planarChartsCount = 0, lscmChartsCount = 0, piecewiseChartsCount = 0, originalUvChartsCount = 0;
+ uint32_t chartCount = 0;
+ const uint32_t meshCount = ctx->meshes.size();
+ for (uint32_t i = 0; i < meshCount; i++) {
+ for (uint32_t j = 0; j < ctx->paramAtlas.chartGroupCount(i); j++) {
+ const internal::param::ChartGroup *chartGroup = ctx->paramAtlas.chartGroupAt(i, j);
+ for (uint32_t k = 0; k < chartGroup->chartCount(); k++) {
+ const internal::param::Chart *chart = chartGroup->chartAt(k);
+ tJunctionCount += chart->tjunctionCount();
+ if (chart->tjunctionCount() > 0)
+ chartsWithTJunctionsCount++;
+ if (chart->type() == ChartType::Planar)
+ planarChartsCount++;
+ else if (chart->type() == ChartType::Ortho)
+ orthoChartsCount++;
+ else if (chart->type() == ChartType::LSCM)
+ lscmChartsCount++;
+ else if (chart->type() == ChartType::Piecewise)
+ piecewiseChartsCount++;
+ if (chart->generatorType() == internal::segment::ChartGeneratorType::OriginalUv)
+ originalUvChartsCount++;
+ }
+ chartCount += chartGroup->chartCount();
+ }
+ }
+ if (tJunctionCount > 0)
+ XA_PRINT(" %u t-junctions found in %u charts\n", tJunctionCount, chartsWithTJunctionsCount);
XA_PRINT(" %u charts\n", chartCount);
- }
- uint32_t chartIndex = 0, invalidParamCount = 0;
- for (uint32_t i = 0; i < meshCount; i++) {
- for (uint32_t j = 0; j < ctx->paramAtlas.chartGroupCount(i); j++) {
- const internal::param::ChartGroup *chartGroup = ctx->paramAtlas.chartGroupAt(i, j);
- for (uint32_t k = 0; k < chartGroup->chartCount(); k++) {
- internal::param::Chart *chart = chartGroup->chartAt(k);
- const internal::param::Quality &quality = chart->quality();
+ XA_PRINT(" %u planar, %u ortho, %u LSCM, %u piecewise\n", planarChartsCount, orthoChartsCount, lscmChartsCount, piecewiseChartsCount);
+ if (originalUvChartsCount > 0)
+ XA_PRINT(" %u with original UVs\n", originalUvChartsCount);
+ uint32_t chartIndex = 0, invalidParamCount = 0;
+ for (uint32_t i = 0; i < meshCount; i++) {
+ for (uint32_t j = 0; j < ctx->paramAtlas.chartGroupCount(i); j++) {
+ const internal::param::ChartGroup *chartGroup = ctx->paramAtlas.chartGroupAt(i, j);
+ for (uint32_t k = 0; k < chartGroup->chartCount(); k++) {
+ internal::param::Chart *chart = chartGroup->chartAt(k);
+ const internal::param::Quality &quality = chart->quality();
#if XA_DEBUG_EXPORT_OBJ_CHARTS_AFTER_PARAMETERIZATION
- {
- char filename[256];
- XA_SPRINTF(filename, sizeof(filename), "debug_chart_%03u_after_parameterization.obj", chartIndex);
- chart->unifiedMesh()->writeObjFile(filename);
- }
-#endif
- const char *type = "LSCM";
- if (chart->type() == ChartType::Planar)
- type = "planar";
- else if (chart->type() == ChartType::Ortho)
- type = "ortho";
- else if (chart->type() == ChartType::Piecewise)
- type = "piecewise";
- if (chart->isInvalid()) {
- if (quality.boundaryIntersection) {
- XA_PRINT_WARNING(" Chart %u (mesh %u, group %u, id %u) (%s): invalid parameterization, self-intersecting boundary.\n", chartIndex, i, j, k, type);
- }
- if (quality.flippedTriangleCount > 0) {
- XA_PRINT_WARNING(" Chart %u (mesh %u, group %u, id %u) (%s): invalid parameterization, %u / %u flipped triangles.\n", chartIndex, i, j, k, type, quality.flippedTriangleCount, quality.totalTriangleCount);
+ {
+ char filename[256];
+ XA_SPRINTF(filename, sizeof(filename), "debug_chart_%03u_after_parameterization.obj", chartIndex);
+ chart->unifiedMesh()->writeObjFile(filename);
}
- invalidParamCount++;
+#endif
+ const char *type = "LSCM";
+ if (chart->type() == ChartType::Planar)
+ type = "planar";
+ else if (chart->type() == ChartType::Ortho)
+ type = "ortho";
+ else if (chart->type() == ChartType::Piecewise)
+ type = "piecewise";
+ if (chart->isInvalid()) {
+ if (quality.boundaryIntersection) {
+ XA_PRINT_WARNING(" Chart %u (mesh %u, group %u, id %u) (%s): invalid parameterization, self-intersecting boundary.\n", chartIndex, i, j, k, type);
+ }
+ if (quality.flippedTriangleCount > 0) {
+ XA_PRINT_WARNING(" Chart %u (mesh %u, group %u, id %u) (%s): invalid parameterization, %u / %u flipped triangles.\n", chartIndex, i, j, k, type, quality.flippedTriangleCount, quality.totalTriangleCount);
+ }
+ if (quality.zeroAreaTriangleCount > 0) {
+ XA_PRINT_WARNING(" Chart %u (mesh %u, group %u, id %u) (%s): invalid parameterization, %u / %u zero area triangles.\n", chartIndex, i, j, k, type, quality.zeroAreaTriangleCount, quality.totalTriangleCount);
+ }
+ invalidParamCount++;
#if XA_DEBUG_EXPORT_OBJ_INVALID_PARAMETERIZATION
- char filename[256];
- XA_SPRINTF(filename, sizeof(filename), "debug_chart_%03u_invalid_parameterization.obj", chartIndex);
- const internal::Mesh *mesh = chart->unifiedMesh();
- FILE *file;
- XA_FOPEN(file, filename, "w");
- if (file) {
- mesh->writeObjVertices(file);
- fprintf(file, "s off\n");
- fprintf(file, "o object\n");
- for (uint32_t f = 0; f < mesh->faceCount(); f++)
- mesh->writeObjFace(file, f);
- if (!chart->paramFlippedFaces().isEmpty()) {
- fprintf(file, "o flipped_faces\n");
- for (uint32_t f = 0; f < chart->paramFlippedFaces().size(); f++)
- mesh->writeObjFace(file, chart->paramFlippedFaces()[f]);
+ char filename[256];
+ XA_SPRINTF(filename, sizeof(filename), "debug_chart_%03u_invalid_parameterization.obj", chartIndex);
+ const internal::Mesh *mesh = chart->unifiedMesh();
+ FILE *file;
+ XA_FOPEN(file, filename, "w");
+ if (file) {
+ mesh->writeObjVertices(file);
+ fprintf(file, "s off\n");
+ fprintf(file, "o object\n");
+ for (uint32_t f = 0; f < mesh->faceCount(); f++)
+ mesh->writeObjFace(file, f);
+ if (!chart->paramFlippedFaces().isEmpty()) {
+ fprintf(file, "o flipped_faces\n");
+ for (uint32_t f = 0; f < chart->paramFlippedFaces().size(); f++)
+ mesh->writeObjFace(file, chart->paramFlippedFaces()[f]);
+ }
+ mesh->writeObjBoundaryEges(file);
+ fclose(file);
}
- mesh->writeObjBoundaryEges(file);
- mesh->writeObjLinkedBoundaries(file);
- fclose(file);
- }
#endif
+ }
+ chartIndex++;
}
- chartIndex++;
}
}
+ if (invalidParamCount > 0)
+ XA_PRINT_WARNING(" %u charts with invalid parameterizations\n", invalidParamCount);
+#if XA_PROFILE
+ XA_PRINT(" Chart groups\n");
+ uint32_t chartGroupCount = 0;
+ for (uint32_t i = 0; i < meshCount; i++) {
+#if 0
+ XA_PRINT(" Mesh %u: %u chart groups\n", i, ctx->paramAtlas.chartGroupCount(i));
+#endif
+ chartGroupCount += ctx->paramAtlas.chartGroupCount(i);
+ }
+ XA_PRINT(" %u total\n", chartGroupCount);
+#endif
+ XA_PROFILE_PRINT_AND_RESET(" Compute charts total (real): ", computeChartsReal)
+ XA_PROFILE_PRINT_AND_RESET(" Compute charts total (thread): ", computeChartsThread)
+ XA_PROFILE_PRINT_AND_RESET(" Create face groups: ", createFaceGroups)
+ XA_PROFILE_PRINT_AND_RESET(" Extract invalid mesh geometry: ", extractInvalidMeshGeometry)
+ XA_PROFILE_PRINT_AND_RESET(" Chart group compute charts (real): ", chartGroupComputeChartsReal)
+ XA_PROFILE_PRINT_AND_RESET(" Chart group compute charts (thread): ", chartGroupComputeChartsThread)
+ XA_PROFILE_PRINT_AND_RESET(" Create chart group mesh: ", createChartGroupMesh)
+ XA_PROFILE_PRINT_AND_RESET(" Create colocals: ", createChartGroupMeshColocals)
+ XA_PROFILE_PRINT_AND_RESET(" Create boundaries: ", createChartGroupMeshBoundaries)
+ XA_PROFILE_PRINT_AND_RESET(" Build atlas: ", buildAtlas)
+ XA_PROFILE_PRINT_AND_RESET(" Init: ", buildAtlasInit)
+ XA_PROFILE_PRINT_AND_RESET(" Planar charts: ", planarCharts)
+ if (options.useInputMeshUvs) {
+ XA_PROFILE_PRINT_AND_RESET(" Original UV charts: ", originalUvCharts)
+ }
+ XA_PROFILE_PRINT_AND_RESET(" Clustered charts: ", clusteredCharts)
+ XA_PROFILE_PRINT_AND_RESET(" Place seeds: ", clusteredChartsPlaceSeeds)
+ XA_PROFILE_PRINT_AND_RESET(" Boundary intersection: ", clusteredChartsPlaceSeedsBoundaryIntersection)
+ XA_PROFILE_PRINT_AND_RESET(" Relocate seeds: ", clusteredChartsRelocateSeeds)
+ XA_PROFILE_PRINT_AND_RESET(" Reset: ", clusteredChartsReset)
+ XA_PROFILE_PRINT_AND_RESET(" Grow: ", clusteredChartsGrow)
+ XA_PROFILE_PRINT_AND_RESET(" Boundary intersection: ", clusteredChartsGrowBoundaryIntersection)
+ XA_PROFILE_PRINT_AND_RESET(" Merge: ", clusteredChartsMerge)
+ XA_PROFILE_PRINT_AND_RESET(" Fill holes: ", clusteredChartsFillHoles)
+ XA_PROFILE_PRINT_AND_RESET(" Copy chart faces: ", copyChartFaces)
+ XA_PROFILE_PRINT_AND_RESET(" Create chart mesh and parameterize (real): ", createChartMeshAndParameterizeReal)
+ XA_PROFILE_PRINT_AND_RESET(" Create chart mesh and parameterize (thread): ", createChartMeshAndParameterizeThread)
+ XA_PROFILE_PRINT_AND_RESET(" Create chart mesh: ", createChartMesh)
+ XA_PROFILE_PRINT_AND_RESET(" Parameterize charts: ", parameterizeCharts)
+ XA_PROFILE_PRINT_AND_RESET(" Orthogonal: ", parameterizeChartsOrthogonal)
+ XA_PROFILE_PRINT_AND_RESET(" LSCM: ", parameterizeChartsLSCM)
+ XA_PROFILE_PRINT_AND_RESET(" Recompute: ", parameterizeChartsRecompute)
+ XA_PROFILE_PRINT_AND_RESET(" Piecewise: ", parameterizeChartsPiecewise)
+ XA_PROFILE_PRINT_AND_RESET(" Boundary intersection: ", parameterizeChartsPiecewiseBoundaryIntersection)
+ XA_PROFILE_PRINT_AND_RESET(" Evaluate quality: ", parameterizeChartsEvaluateQuality)
+#if XA_PROFILE_ALLOC
+ XA_PROFILE_PRINT_AND_RESET(" Alloc: ", alloc)
+#endif
+ XA_PRINT_MEM_USAGE
+ } else {
+ XA_PROFILE_START(computeChartsReal)
+ if (!internal::segment::computeUvMeshCharts(ctx->taskScheduler, ctx->uvMeshes, ctx->progressFunc, ctx->progressUserData)) {
+ XA_PRINT(" Cancelled by user\n");
+ return;
+ }
+ XA_PROFILE_END(computeChartsReal)
+ ctx->uvMeshChartsComputed = true;
+ // Count charts.
+ uint32_t chartCount = 0;
+ const uint32_t meshCount = ctx->uvMeshes.size();
+ for (uint32_t i = 0; i < meshCount; i++)
+ chartCount += ctx->uvMeshes[i]->charts.size();
+ XA_PRINT(" %u charts\n", chartCount);
+ XA_PROFILE_PRINT_AND_RESET(" Total (real): ", computeChartsReal)
+ XA_PROFILE_PRINT_AND_RESET(" Total (thread): ", computeChartsThread)
}
- if (invalidParamCount > 0)
- XA_PRINT_WARNING(" %u charts with invalid parameterizations\n", invalidParamCount);
- XA_PROFILE_PRINT_AND_RESET(" Total (real): ", parameterizeChartsReal)
- XA_PROFILE_PRINT_AND_RESET(" Total (thread): ", parameterizeChartsThread)
- XA_PROFILE_PRINT_AND_RESET(" Create chart mesh: ", createChartMesh)
- XA_PROFILE_PRINT_AND_RESET(" Fix t-junctions: ", fixChartMeshTJunctions)
- XA_PROFILE_PRINT_AND_RESET(" Close holes: ", closeChartMeshHoles)
- XA_PROFILE_PRINT_AND_RESET(" Orthogonal: ", parameterizeChartsOrthogonal)
- XA_PROFILE_PRINT_AND_RESET(" LSCM: ", parameterizeChartsLSCM)
- XA_PROFILE_PRINT_AND_RESET(" Recompute: ", parameterizeChartsRecompute)
- XA_PROFILE_PRINT_AND_RESET(" Piecewise: ", parameterizeChartsPiecewise)
- XA_PROFILE_PRINT_AND_RESET(" Boundary intersection: ", parameterizeChartsPiecewiseBoundaryIntersection)
- XA_PROFILE_PRINT_AND_RESET(" Evaluate quality: ", parameterizeChartsEvaluateQuality)
#if XA_PROFILE_ALLOC
XA_PROFILE_PRINT_AND_RESET(" Alloc: ", alloc)
#endif
XA_PRINT_MEM_USAGE
}
-void PackCharts(Atlas *atlas, PackOptions packOptions)
-{
+void PackCharts(Atlas *atlas, PackOptions packOptions) {
// Validate arguments and context state.
if (!atlas) {
XA_PRINT_WARNING("PackCharts: atlas is null.\n");
@@ -9867,10 +9152,9 @@ void PackCharts(Atlas *atlas, PackOptions packOptions)
XA_PRINT_WARNING("PackCharts: ComputeCharts must be called first.\n");
return;
}
- if (!ctx->paramAtlas.chartsParameterized()) {
- XA_PRINT_WARNING("PackCharts: ParameterizeCharts must be called first.\n");
- return;
- }
+ } else if (!ctx->uvMeshChartsComputed) {
+ XA_PRINT_WARNING("PackCharts: ComputeCharts must be called first.\n");
+ return;
}
if (packOptions.texelsPerUnit < 0.0f) {
XA_PRINT_WARNING("PackCharts: PackOptions::texelsPerUnit is negative.\n");
@@ -9893,8 +9177,7 @@ void PackCharts(Atlas *atlas, PackOptions packOptions)
if (!ctx->uvMeshInstances.isEmpty()) {
for (uint32_t i = 0; i < ctx->uvMeshInstances.size(); i++)
packAtlas.addUvMeshCharts(ctx->uvMeshInstances[i]);
- }
- else
+ } else
packAtlas.addCharts(ctx->taskScheduler, &ctx->paramAtlas);
XA_PROFILE_END(packChartsAddCharts)
XA_PROFILE_START(packCharts)
@@ -9946,16 +9229,35 @@ void PackCharts(Atlas *atlas, PackOptions packOptions)
uint32_t chartIndex = 0;
for (uint32_t i = 0; i < atlas->meshCount; i++) {
Mesh &outputMesh = atlas->meshes[i];
+ MeshPolygonMapping *meshPolygonMapping = ctx->meshPolygonMappings[i];
+ // One polygon can have many triangles. Don't want to process the same polygon more than once when counting indices, building chart faces etc.
+ internal::BitArray polygonTouched;
+ if (meshPolygonMapping) {
+ polygonTouched.resize(meshPolygonMapping->faceVertexCount.size());
+ polygonTouched.zeroOutMemory();
+ }
// Count and alloc arrays.
- const internal::param::InvalidMeshGeometry &invalid = ctx->paramAtlas.invalidMeshGeometry(i);
+ const internal::InvalidMeshGeometry &invalid = ctx->paramAtlas.invalidMeshGeometry(i);
outputMesh.vertexCount += invalid.vertices().length;
outputMesh.indexCount += invalid.faces().length * 3;
for (uint32_t cg = 0; cg < ctx->paramAtlas.chartGroupCount(i); cg++) {
const internal::param::ChartGroup *chartGroup = ctx->paramAtlas.chartGroupAt(i, cg);
for (uint32_t c = 0; c < chartGroup->chartCount(); c++) {
const internal::param::Chart *chart = chartGroup->chartAt(c);
- outputMesh.vertexCount += chart->mesh()->vertexCount();
- outputMesh.indexCount += chart->mesh()->faceCount() * 3;
+ outputMesh.vertexCount += chart->originalVertexCount();
+ const uint32_t faceCount = chart->unifiedMesh()->faceCount();
+ if (meshPolygonMapping) {
+ // Map triangles back to polygons and count the polygon vertices.
+ for (uint32_t f = 0; f < faceCount; f++) {
+ const uint32_t polygon = meshPolygonMapping->triangleToPolygonMap[chart->mapFaceToSourceFace(f)];
+ if (!polygonTouched.get(polygon)) {
+ polygonTouched.set(polygon);
+ outputMesh.indexCount += meshPolygonMapping->faceVertexCount[polygon];
+ }
+ }
+ } else {
+ outputMesh.indexCount += faceCount * 3;
+ }
outputMesh.chartCount++;
}
}
@@ -9966,7 +9268,7 @@ void PackCharts(Atlas *atlas, PackOptions packOptions)
// Copy mesh data.
uint32_t firstVertex = 0;
{
- const internal::param::InvalidMeshGeometry &mesh = ctx->paramAtlas.invalidMeshGeometry(i);
+ const internal::InvalidMeshGeometry &mesh = ctx->paramAtlas.invalidMeshGeometry(i);
internal::ConstArrayView<uint32_t> faces = mesh.faces();
internal::ConstArrayView<uint32_t> indices = mesh.indices();
internal::ConstArrayView<uint32_t> vertices = mesh.vertices();
@@ -9991,23 +9293,50 @@ void PackCharts(Atlas *atlas, PackOptions packOptions)
const internal::param::ChartGroup *chartGroup = ctx->paramAtlas.chartGroupAt(i, cg);
for (uint32_t c = 0; c < chartGroup->chartCount(); c++) {
const internal::param::Chart *chart = chartGroup->chartAt(c);
- const internal::Mesh *mesh = chart->mesh();
+ const internal::Mesh *unifiedMesh = chart->unifiedMesh();
+ const uint32_t faceCount = unifiedMesh->faceCount();
+#if XA_CHECK_PARAM_WINDING
+ uint32_t flippedCount = 0;
+ for (uint32_t f = 0; f < faceCount; f++) {
+ const float area = mesh->computeFaceParametricArea(f);
+ if (area < 0.0f)
+ flippedCount++;
+ }
+ const char *type = "LSCM";
+ if (chart->type() == ChartType::Planar)
+ type = "planar";
+ else if (chart->type() == ChartType::Ortho)
+ type = "ortho";
+ else if (chart->type() == ChartType::Piecewise)
+ type = "piecewise";
+ if (flippedCount > 0) {
+ if (flippedCount == faceCount) {
+ XA_PRINT_WARNING("chart %u (%s): all face flipped\n", chartIndex, type);
+ } else {
+ XA_PRINT_WARNING("chart %u (%s): %u / %u faces flipped\n", chartIndex, type, flippedCount, faceCount);
+ }
+ }
+#endif
// Vertices.
- for (uint32_t v = 0; v < mesh->vertexCount(); v++) {
+ for (uint32_t v = 0; v < chart->originalVertexCount(); v++) {
Vertex &vertex = outputMesh.vertexArray[firstVertex + v];
vertex.atlasIndex = packAtlas.getChart(chartIndex)->atlasIndex;
XA_DEBUG_ASSERT(vertex.atlasIndex >= 0);
vertex.chartIndex = (int32_t)chartIndex;
- const internal::Vector2 &uv = mesh->texcoord(v);
+ const internal::Vector2 &uv = unifiedMesh->texcoord(chart->originalVertexToUnifiedVertex(v));
vertex.uv[0] = internal::max(0.0f, uv.x);
vertex.uv[1] = internal::max(0.0f, uv.y);
vertex.xref = chart->mapChartVertexToSourceVertex(v);
}
// Indices.
- for (uint32_t f = 0; f < mesh->faceCount(); f++) {
+ for (uint32_t f = 0; f < faceCount; f++) {
const uint32_t indexOffset = chart->mapFaceToSourceFace(f) * 3;
- for (uint32_t j = 0; j < 3; j++)
- outputMesh.indexArray[indexOffset + j] = firstVertex + mesh->vertexAt(f * 3 + j);
+ for (uint32_t j = 0; j < 3; j++) {
+ uint32_t outIndex = indexOffset + j;
+ if (meshPolygonMapping)
+ outIndex = meshPolygonMapping->triangleToPolygonIndicesMap[outIndex];
+ outputMesh.indexArray[outIndex] = firstVertex + chart->originalVertices()[f * 3 + j];
+ }
}
// Charts.
Chart *outputChart = &outputMesh.chartArray[meshChartIndex];
@@ -10015,14 +9344,38 @@ void PackCharts(Atlas *atlas, PackOptions packOptions)
XA_DEBUG_ASSERT(atlasIndex >= 0);
outputChart->atlasIndex = (uint32_t)atlasIndex;
outputChart->type = chart->isInvalid() ? ChartType::Invalid : chart->type();
- outputChart->faceCount = mesh->faceCount();
- outputChart->faceArray = XA_ALLOC_ARRAY(internal::MemTag::Default, uint32_t, outputChart->faceCount);
- for (uint32_t f = 0; f < outputChart->faceCount; f++)
- outputChart->faceArray[f] = chart->mapFaceToSourceFace(f);
+ if (meshPolygonMapping) {
+ // Count polygons.
+ polygonTouched.zeroOutMemory();
+ outputChart->faceCount = 0;
+ for (uint32_t f = 0; f < faceCount; f++) {
+ const uint32_t polygon = meshPolygonMapping->triangleToPolygonMap[chart->mapFaceToSourceFace(f)];
+ if (!polygonTouched.get(polygon)) {
+ polygonTouched.set(polygon);
+ outputChart->faceCount++;
+ }
+ }
+ // Write polygons.
+ outputChart->faceArray = XA_ALLOC_ARRAY(internal::MemTag::Default, uint32_t, outputChart->faceCount);
+ polygonTouched.zeroOutMemory();
+ uint32_t of = 0;
+ for (uint32_t f = 0; f < faceCount; f++) {
+ const uint32_t polygon = meshPolygonMapping->triangleToPolygonMap[chart->mapFaceToSourceFace(f)];
+ if (!polygonTouched.get(polygon)) {
+ polygonTouched.set(polygon);
+ outputChart->faceArray[of++] = polygon;
+ }
+ }
+ } else {
+ outputChart->faceCount = faceCount;
+ outputChart->faceArray = XA_ALLOC_ARRAY(internal::MemTag::Default, uint32_t, outputChart->faceCount);
+ for (uint32_t f = 0; f < outputChart->faceCount; f++)
+ outputChart->faceArray[f] = chart->mapFaceToSourceFace(f);
+ }
outputChart->material = 0;
meshChartIndex++;
chartIndex++;
- firstVertex += mesh->vertexCount();
+ firstVertex += chart->originalVertexCount();
}
}
XA_DEBUG_ASSERT(outputMesh.vertexCount == firstVertex);
@@ -10102,28 +9455,21 @@ void PackCharts(Atlas *atlas, PackOptions packOptions)
XA_PRINT_MEM_USAGE
}
-void Generate(Atlas *atlas, ChartOptions chartOptions, ParameterizeOptions parameterizeOptions, PackOptions packOptions)
-{
+void Generate(Atlas *atlas, ChartOptions chartOptions, PackOptions packOptions) {
if (!atlas) {
XA_PRINT_WARNING("Generate: atlas is null.\n");
return;
}
Context *ctx = (Context *)atlas;
- if (!ctx->uvMeshInstances.isEmpty()) {
- XA_PRINT_WARNING("Generate: This function should not be called with UV meshes.\n");
- return;
- }
- if (ctx->meshes.isEmpty()) {
- XA_PRINT_WARNING("Generate: No meshes. Call AddMesh first.\n");
+ if (ctx->meshes.isEmpty() && ctx->uvMeshInstances.isEmpty()) {
+ XA_PRINT_WARNING("Generate: No meshes. Call AddMesh or AddUvMesh first.\n");
return;
}
ComputeCharts(atlas, chartOptions);
- ParameterizeCharts(atlas, parameterizeOptions);
PackCharts(atlas, packOptions);
}
-void SetProgressCallback(Atlas *atlas, ProgressFunc progressFunc, void *progressUserData)
-{
+void SetProgressCallback(Atlas *atlas, ProgressFunc progressFunc, void *progressUserData) {
if (!atlas) {
XA_PRINT_WARNING("SetProgressCallback: atlas is null.\n");
return;
@@ -10133,37 +9479,33 @@ void SetProgressCallback(Atlas *atlas, ProgressFunc progressFunc, void *progress
ctx->progressUserData = progressUserData;
}
-void SetAlloc(ReallocFunc reallocFunc, FreeFunc freeFunc)
-{
+void SetAlloc(ReallocFunc reallocFunc, FreeFunc freeFunc) {
internal::s_realloc = reallocFunc;
internal::s_free = freeFunc;
}
-void SetPrint(PrintFunc print, bool verbose)
-{
+void SetPrint(PrintFunc print, bool verbose) {
internal::s_print = print;
internal::s_printVerbose = verbose;
}
-const char *StringForEnum(AddMeshError::Enum error)
-{
+const char *StringForEnum(AddMeshError error) {
if (error == AddMeshError::Error)
return "Unspecified error";
if (error == AddMeshError::IndexOutOfRange)
return "Index out of range";
+ if (error == AddMeshError::InvalidFaceVertexCount)
+ return "Invalid face vertex count";
if (error == AddMeshError::InvalidIndexCount)
return "Invalid index count";
return "Success";
}
-const char *StringForEnum(ProgressCategory::Enum category)
-{
+const char *StringForEnum(ProgressCategory category) {
if (category == ProgressCategory::AddMesh)
return "Adding mesh(es)";
if (category == ProgressCategory::ComputeCharts)
return "Computing charts";
- if (category == ProgressCategory::ParameterizeCharts)
- return "Parameterizing charts";
if (category == ProgressCategory::PackCharts)
return "Packing charts";
if (category == ProgressCategory::BuildOutputMeshes)
@@ -10172,3 +9514,96 @@ const char *StringForEnum(ProgressCategory::Enum category)
}
} // namespace xatlas
+
+#if XATLAS_C_API
+static_assert(sizeof(xatlas::Chart) == sizeof(xatlasChart), "xatlasChart size mismatch");
+static_assert(sizeof(xatlas::Vertex) == sizeof(xatlasVertex), "xatlasVertex size mismatch");
+static_assert(sizeof(xatlas::Mesh) == sizeof(xatlasMesh), "xatlasMesh size mismatch");
+static_assert(sizeof(xatlas::Atlas) == sizeof(xatlasAtlas), "xatlasAtlas size mismatch");
+static_assert(sizeof(xatlas::MeshDecl) == sizeof(xatlasMeshDecl), "xatlasMeshDecl size mismatch");
+static_assert(sizeof(xatlas::UvMeshDecl) == sizeof(xatlasUvMeshDecl), "xatlasUvMeshDecl size mismatch");
+static_assert(sizeof(xatlas::ChartOptions) == sizeof(xatlasChartOptions), "xatlasChartOptions size mismatch");
+static_assert(sizeof(xatlas::PackOptions) == sizeof(xatlasPackOptions), "xatlasPackOptions size mismatch");
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+xatlasAtlas *xatlasCreate() {
+ return (xatlasAtlas *)xatlas::Create();
+}
+
+void xatlasDestroy(xatlasAtlas *atlas) {
+ xatlas::Destroy((xatlas::Atlas *)atlas);
+}
+
+xatlasAddMeshError xatlasAddMesh(xatlasAtlas *atlas, const xatlasMeshDecl *meshDecl, uint32_t meshCountHint) {
+ return (xatlasAddMeshError)xatlas::AddMesh((xatlas::Atlas *)atlas, *(const xatlas::MeshDecl *)meshDecl, meshCountHint);
+}
+
+void xatlasAddMeshJoin(xatlasAtlas *atlas) {
+ xatlas::AddMeshJoin((xatlas::Atlas *)atlas);
+}
+
+xatlasAddMeshError xatlasAddUvMesh(xatlasAtlas *atlas, const xatlasUvMeshDecl *decl) {
+ return (xatlasAddMeshError)xatlas::AddUvMesh((xatlas::Atlas *)atlas, *(const xatlas::UvMeshDecl *)decl);
+}
+
+void xatlasComputeCharts(xatlasAtlas *atlas, const xatlasChartOptions *chartOptions) {
+ xatlas::ComputeCharts((xatlas::Atlas *)atlas, chartOptions ? *(xatlas::ChartOptions *)chartOptions : xatlas::ChartOptions());
+}
+
+void xatlasPackCharts(xatlasAtlas *atlas, const xatlasPackOptions *packOptions) {
+ xatlas::PackCharts((xatlas::Atlas *)atlas, packOptions ? *(xatlas::PackOptions *)packOptions : xatlas::PackOptions());
+}
+
+void xatlasGenerate(xatlasAtlas *atlas, const xatlasChartOptions *chartOptions, const xatlasPackOptions *packOptions) {
+ xatlas::Generate((xatlas::Atlas *)atlas, chartOptions ? *(xatlas::ChartOptions *)chartOptions : xatlas::ChartOptions(), packOptions ? *(xatlas::PackOptions *)packOptions : xatlas::PackOptions());
+}
+
+void xatlasSetProgressCallback(xatlasAtlas *atlas, xatlasProgressFunc progressFunc, void *progressUserData) {
+ xatlas::ProgressFunc pf;
+ *(void **)&pf = (void *)progressFunc;
+ xatlas::SetProgressCallback((xatlas::Atlas *)atlas, pf, progressUserData);
+}
+
+void xatlasSetAlloc(xatlasReallocFunc reallocFunc, xatlasFreeFunc freeFunc) {
+ xatlas::SetAlloc((xatlas::ReallocFunc)reallocFunc, (xatlas::FreeFunc)freeFunc);
+}
+
+void xatlasSetPrint(xatlasPrintFunc print, bool verbose) {
+ xatlas::SetPrint((xatlas::PrintFunc)print, verbose);
+}
+
+const char *xatlasAddMeshErrorString(xatlasAddMeshError error) {
+ return xatlas::StringForEnum((xatlas::AddMeshError)error);
+}
+
+const char *xatlasProgressCategoryString(xatlasProgressCategory category) {
+ return xatlas::StringForEnum((xatlas::ProgressCategory)category);
+}
+
+void xatlasMeshDeclInit(xatlasMeshDecl *meshDecl) {
+ xatlas::MeshDecl init;
+ memcpy(meshDecl, &init, sizeof(init));
+}
+
+void xatlasUvMeshDeclInit(xatlasUvMeshDecl *uvMeshDecl) {
+ xatlas::UvMeshDecl init;
+ memcpy(uvMeshDecl, &init, sizeof(init));
+}
+
+void xatlasChartOptionsInit(xatlasChartOptions *chartOptions) {
+ xatlas::ChartOptions init;
+ memcpy(chartOptions, &init, sizeof(init));
+}
+
+void xatlasPackOptionsInit(xatlasPackOptions *packOptions) {
+ xatlas::PackOptions init;
+ memcpy(packOptions, &init, sizeof(init));
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+#endif // XATLAS_C_API
diff --git a/thirdparty/xatlas/xatlas.h b/thirdparty/xatlas/xatlas.h
index cc47f4837e..fc40d9d49c 100644
--- a/thirdparty/xatlas/xatlas.h
+++ b/thirdparty/xatlas/xatlas.h
@@ -31,35 +31,30 @@ Copyright NVIDIA Corporation 2006 -- Ignacio Castano <icastano@nvidia.com>
#pragma once
#ifndef XATLAS_H
#define XATLAS_H
+#include <stddef.h>
#include <stdint.h>
namespace xatlas {
-struct ChartType
-{
- enum Enum
- {
- Planar,
- Ortho,
- LSCM,
- Piecewise,
- Invalid
- };
+enum class ChartType {
+ Planar,
+ Ortho,
+ LSCM,
+ Piecewise,
+ Invalid
};
// A group of connected faces, belonging to a single atlas.
-struct Chart
-{
+struct Chart {
uint32_t *faceArray;
uint32_t atlasIndex; // Sub-atlas index.
uint32_t faceCount;
- ChartType::Enum type;
+ ChartType type;
uint32_t material;
};
// Output vertex.
-struct Vertex
-{
+struct Vertex {
int32_t atlasIndex; // Sub-atlas index. -1 if the vertex doesn't exist in any atlas.
int32_t chartIndex; // -1 if the vertex doesn't exist in any chart.
float uv[2]; // Not normalized - values are in Atlas width and height range.
@@ -67,8 +62,7 @@ struct Vertex
};
// Output mesh.
-struct Mesh
-{
+struct Mesh {
Chart *chartArray;
uint32_t *indexArray;
Vertex *vertexArray;
@@ -83,16 +77,15 @@ static const uint32_t kImageIsBilinearBit = 0x40000000;
static const uint32_t kImageIsPaddingBit = 0x20000000;
// Empty on creation. Populated after charts are packed.
-struct Atlas
-{
+struct Atlas {
uint32_t *image;
Mesh *meshes; // The output meshes, corresponding to each AddMesh call.
+ float *utilization; // Normalized atlas texel utilization array. E.g. a value of 0.8 means 20% empty space. atlasCount in length.
uint32_t width; // Atlas width in texels.
uint32_t height; // Atlas height in texels.
uint32_t atlasCount; // Number of sub-atlases. Equal to 0 unless PackOptions resolution is changed from default (0).
uint32_t chartCount; // Total number of charts in all meshes.
uint32_t meshCount; // Number of output meshes. Equal to the number of times AddMesh was called.
- float *utilization; // Normalized atlas texel utilization array. E.g. a value of 0.8 means 20% empty space. atlasCount in length.
float texelsPerUnit; // Equal to PackOptions texelsPerUnit if texelsPerUnit > 0, otherwise an estimated value to match PackOptions resolution.
};
@@ -101,73 +94,76 @@ Atlas *Create();
void Destroy(Atlas *atlas);
-struct IndexFormat
-{
- enum Enum
- {
- UInt16,
- UInt32
- };
+enum class IndexFormat {
+ UInt16,
+ UInt32
};
// Input mesh declaration.
-struct MeshDecl
-{
+struct MeshDecl {
const void *vertexPositionData = nullptr;
const void *vertexNormalData = nullptr; // optional
const void *vertexUvData = nullptr; // optional. The input UVs are provided as a hint to the chart generator.
const void *indexData = nullptr; // optional
-
- // Optional. indexCount / 3 (triangle count) in length.
+
+ // Optional. Must be faceCount in length.
// Don't atlas faces set to true. Ignored faces still exist in the output meshes, Vertex uv is set to (0, 0) and Vertex atlasIndex to -1.
const bool *faceIgnoreData = nullptr;
+ // Optional. Must be faceCount in length.
+ // Only faces with the same material will be assigned to the same chart.
+ const uint32_t *faceMaterialData = nullptr;
+
+ // Optional. Must be faceCount in length.
+ // Polygon / n-gon support. Faces are assumed to be triangles if this is null.
+ const uint8_t *faceVertexCount = nullptr;
+
uint32_t vertexCount = 0;
uint32_t vertexPositionStride = 0;
uint32_t vertexNormalStride = 0; // optional
uint32_t vertexUvStride = 0; // optional
uint32_t indexCount = 0;
int32_t indexOffset = 0; // optional. Add this offset to all indices.
- IndexFormat::Enum indexFormat = IndexFormat::UInt16;
+ uint32_t faceCount = 0; // Optional if faceVertexCount is null. Otherwise assumed to be indexCount / 3.
+ IndexFormat indexFormat = IndexFormat::UInt16;
// Vertex positions within epsilon distance of each other are considered colocal.
float epsilon = 1.192092896e-07F;
};
-struct AddMeshError
-{
- enum Enum
- {
- Success, // No error.
- Error, // Unspecified error.
- IndexOutOfRange, // An index is >= MeshDecl vertexCount.
- InvalidIndexCount // Not evenly divisible by 3 - expecting triangles.
- };
+enum class AddMeshError {
+ Success, // No error.
+ Error, // Unspecified error.
+ IndexOutOfRange, // An index is >= MeshDecl vertexCount.
+ InvalidFaceVertexCount, // Must be >= 3.
+ InvalidIndexCount // Not evenly divisible by 3 - expecting triangles.
};
// Add a mesh to the atlas. MeshDecl data is copied, so it can be freed after AddMesh returns.
-AddMeshError::Enum AddMesh(Atlas *atlas, const MeshDecl &meshDecl, uint32_t meshCountHint = 0);
+AddMeshError AddMesh(Atlas *atlas, const MeshDecl &meshDecl, uint32_t meshCountHint = 0);
// Wait for AddMesh async processing to finish. ComputeCharts / Generate call this internally.
void AddMeshJoin(Atlas *atlas);
-struct UvMeshDecl
-{
+struct UvMeshDecl {
const void *vertexUvData = nullptr;
const void *indexData = nullptr; // optional
- const uint32_t *faceMaterialData = nullptr; // Optional. Faces with different materials won't be assigned to the same chart. Must be indexCount / 3 in length.
+ const uint32_t *faceMaterialData = nullptr; // Optional. Overlapping UVs should be assigned a different material. Must be indexCount / 3 in length.
uint32_t vertexCount = 0;
uint32_t vertexStride = 0;
uint32_t indexCount = 0;
int32_t indexOffset = 0; // optional. Add this offset to all indices.
- IndexFormat::Enum indexFormat = IndexFormat::UInt16;
- bool rotateCharts = true;
+ IndexFormat indexFormat = IndexFormat::UInt16;
};
-AddMeshError::Enum AddUvMesh(Atlas *atlas, const UvMeshDecl &decl);
+AddMeshError AddUvMesh(Atlas *atlas, const UvMeshDecl &decl);
+
+// Custom parameterization function. texcoords initial values are an orthogonal parameterization.
+typedef void (*ParameterizeFunc)(const float *positions, float *texcoords, uint32_t vertexCount, const uint32_t *indices, uint32_t indexCount);
+
+struct ChartOptions {
+ ParameterizeFunc paramFunc = nullptr;
-struct ChartOptions
-{
float maxChartArea = 0.0f; // Don't grow charts to be larger than this. 0 means no limit.
float maxBoundaryLength = 0.0f; // Don't grow charts to have a longer boundary than this. 0 means no limit.
@@ -180,26 +176,31 @@ struct ChartOptions
float maxCost = 2.0f; // If total of all metrics * weights > maxCost, don't grow chart. Lower values result in more charts.
uint32_t maxIterations = 1; // Number of iterations of the chart growing and seeding phases. Higher values result in better charts.
+
+ bool useInputMeshUvs = false; // Use MeshDecl::vertexUvData for charts.
+ bool fixWinding = false; // Enforce consistent texture coordinate winding.
};
// Call after all AddMesh calls. Can be called multiple times to recompute charts with different options.
void ComputeCharts(Atlas *atlas, ChartOptions options = ChartOptions());
-// Custom parameterization function. texcoords initial values are an orthogonal parameterization.
-typedef void (*ParameterizeFunc)(const float *positions, float *texcoords, uint32_t vertexCount, const uint32_t *indices, uint32_t indexCount);
+struct PackOptions {
+ // Charts larger than this will be scaled down. 0 means no limit.
+ uint32_t maxChartSize = 0;
-struct ParameterizeOptions
-{
- ParameterizeFunc func = nullptr;
- bool closeHoles = true; // If the custom parameterization function works with multiple boundaries, this can be set to false to improve performance.
- bool fixTJunctions = true; // If meshes don't have T-junctions, this can be set to false to improve performance.
-};
+ // Number of pixels to pad charts with.
+ uint32_t padding = 0;
+
+ // Unit to texel scale. e.g. a 1x1 quad with texelsPerUnit of 32 will take up approximately 32x32 texels in the atlas.
+ // If 0, an estimated value will be calculated to approximately match the given resolution.
+ // If resolution is also 0, the estimated value will approximately match a 1024x1024 atlas.
+ float texelsPerUnit = 0.0f;
-// Call after ComputeCharts. Can be called multiple times to re-parameterize charts with a different ParameterizeFunc.
-void ParameterizeCharts(Atlas *atlas, ParameterizeOptions options = ParameterizeOptions());
+ // If 0, generate a single atlas with texelsPerUnit determining the final resolution.
+ // If not 0, and texelsPerUnit is not 0, generate one or more atlases with that exact resolution.
+ // If not 0, and texelsPerUnit is 0, texelsPerUnit is estimated to approximately match the resolution.
+ uint32_t resolution = 0;
-struct PackOptions
-{
// Leave space around charts for texels that would be sampled by bilinear filtering.
bool bilinear = true;
@@ -212,44 +213,29 @@ struct PackOptions
// Create Atlas::image
bool createImage = false;
- // Charts larger than this will be scaled down. 0 means no limit.
- uint32_t maxChartSize = 0;
-
- // Number of pixels to pad charts with.
- uint32_t padding = 0;
+ // Rotate charts to the axis of their convex hull.
+ bool rotateChartsToAxis = true;
- // Unit to texel scale. e.g. a 1x1 quad with texelsPerUnit of 32 will take up approximately 32x32 texels in the atlas.
- // If 0, an estimated value will be calculated to approximately match the given resolution.
- // If resolution is also 0, the estimated value will approximately match a 1024x1024 atlas.
- float texelsPerUnit = 0.0f;
-
- // If 0, generate a single atlas with texelsPerUnit determining the final resolution.
- // If not 0, and texelsPerUnit is not 0, generate one or more atlases with that exact resolution.
- // If not 0, and texelsPerUnit is 0, texelsPerUnit is estimated to approximately match the resolution.
- uint32_t resolution = 0;
+ // Rotate charts to improve packing.
+ bool rotateCharts = true;
};
-// Call after ParameterizeCharts. Can be called multiple times to re-pack charts with different options.
+// Call after ComputeCharts. Can be called multiple times to re-pack charts with different options.
void PackCharts(Atlas *atlas, PackOptions packOptions = PackOptions());
-// Equivalent to calling ComputeCharts, ParameterizeCharts and PackCharts in sequence. Can be called multiple times to regenerate with different options.
-void Generate(Atlas *atlas, ChartOptions chartOptions = ChartOptions(), ParameterizeOptions parameterizeOptions = ParameterizeOptions(), PackOptions packOptions = PackOptions());
+// Equivalent to calling ComputeCharts and PackCharts in sequence. Can be called multiple times to regenerate with different options.
+void Generate(Atlas *atlas, ChartOptions chartOptions = ChartOptions(), PackOptions packOptions = PackOptions());
// Progress tracking.
-struct ProgressCategory
-{
- enum Enum
- {
- AddMesh,
- ComputeCharts,
- ParameterizeCharts,
- PackCharts,
- BuildOutputMeshes
- };
+enum class ProgressCategory {
+ AddMesh,
+ ComputeCharts,
+ PackCharts,
+ BuildOutputMeshes
};
// May be called from any thread. Return false to cancel.
-typedef bool (*ProgressFunc)(ProgressCategory::Enum category, int progress, void *userData);
+typedef bool (*ProgressFunc)(ProgressCategory category, int progress, void *userData);
void SetProgressCallback(Atlas *atlas, ProgressFunc progressFunc = nullptr, void *progressUserData = nullptr);
@@ -263,8 +249,8 @@ typedef int (*PrintFunc)(const char *, ...);
void SetPrint(PrintFunc print, bool verbose);
// Helper functions for error messages.
-const char *StringForEnum(AddMeshError::Enum error);
-const char *StringForEnum(ProgressCategory::Enum category);
+const char *StringForEnum(AddMeshError error);
+const char *StringForEnum(ProgressCategory category);
} // namespace xatlas